From 613312ef82efb8420b58f4e2614476edf5e344c0 Mon Sep 17 00:00:00 2001 From: harisbal Date: Sun, 28 Jan 2018 21:52:23 +0000 Subject: [PATCH 001/217] Rebase Rebase --- .github/CODE_OF_CONDUCT.md | 63 + .travis.yml | 7 + LICENSES/XARRAY_LICENSE | 191 ++ asv_bench/benchmarks/frame_methods.py | 4 +- asv_bench/benchmarks/groupby.py | 131 +- asv_bench/benchmarks/pandas_vb_common.py | 47 +- asv_bench/benchmarks/stat_ops.py | 239 +- asv_bench/benchmarks/strings.py | 135 +- asv_bench/benchmarks/timedelta.py | 97 +- asv_bench/benchmarks/timeseries.py | 507 ++-- asv_bench/benchmarks/timestamp.py | 13 +- ci/install_travis.sh | 3 - ci/lint.sh | 34 +- ci/requirements-2.7.build | 2 +- ci/requirements-3.6.build | 2 +- ci/requirements-3.6_DOC.build | 2 +- ci/requirements-3.6_WIN.run | 1 + conda.recipe/meta.yaml | 4 +- doc/source/api.rst | 15 +- doc/source/basics.rst | 96 +- doc/source/contributing.rst | 34 +- doc/source/developer.rst | 43 + doc/source/indexing.rst | 1 + doc/source/internals.rst | 2 + doc/source/io.rst | 64 +- doc/source/overview.rst | 2 +- doc/source/whatsnew/v0.17.1.txt | 2 +- doc/source/whatsnew/v0.23.0.txt | 227 +- pandas/__init__.py | 2 +- pandas/_libs/algos.pyx | 43 +- pandas/_libs/algos_rank_helper.pxi.in | 2 +- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/groupby_helper.pxi.in | 2 +- pandas/_libs/hashtable.pxd | 9 - pandas/_libs/hashtable.pyx | 38 +- pandas/_libs/hashtable_class_helper.pxi.in | 136 - pandas/_libs/index.pyx | 210 +- pandas/_libs/internals.pyx | 438 ++++ pandas/_libs/interval.pyx | 7 +- pandas/_libs/join.pyx | 9 +- pandas/_libs/lib.pyx | 487 +--- pandas/_libs/missing.pyx | 4 +- pandas/_libs/parsers.pyx | 130 +- .../_libs/{src/reduce.pyx => reduction.pyx} | 30 +- pandas/_libs/reshape.pyx | 9 +- pandas/_libs/skiplist.pyx | 12 +- pandas/_libs/sparse.pyx | 15 +- pandas/_libs/src/inference.pyx | 43 +- pandas/_libs/src/klib/ktypes.h | 6 - pandas/_libs/src/klib/kvec.h | 151 -- pandas/_libs/src/numpy.pxd | 994 ------- pandas/_libs/src/numpy_helper.h | 16 - pandas/_libs/src/parser/.gitignore | 2 - pandas/_libs/src/parser/Makefile | 13 - pandas/_libs/src/parser/tokenizer.c | 15 - pandas/_libs/src/parser/tokenizer.h | 2 - pandas/_libs/src/ujson/python/objToJSON.c | 3 +- pandas/_libs/tslib.pyx | 4 +- pandas/_libs/tslibs/ccalendar.pyx | 5 +- pandas/_libs/tslibs/conversion.pyx | 34 +- pandas/_libs/tslibs/fields.pyx | 4 +- pandas/_libs/tslibs/frequencies.pyx | 5 +- pandas/_libs/tslibs/nattype.pyx | 6 +- pandas/_libs/tslibs/offsets.pyx | 74 +- pandas/_libs/tslibs/parsing.pyx | 4 +- pandas/_libs/tslibs/resolution.pyx | 4 +- pandas/_libs/tslibs/strptime.pyx | 1 - pandas/_libs/tslibs/timedeltas.pyx | 222 +- pandas/_libs/tslibs/timestamps.pyx | 26 +- pandas/_libs/tslibs/timezones.pxd | 1 + pandas/_libs/tslibs/timezones.pyx | 35 +- pandas/_libs/window.pyx | 31 +- pandas/api/__init__.py | 1 + pandas/api/extensions/__init__.py | 4 + pandas/compat/pickle_compat.py | 6 +- pandas/computation/expressions.py | 4 + pandas/conftest.py | 20 + pandas/core/accessor.py | 168 +- pandas/core/api.py | 2 +- pandas/core/apply.py | 12 +- pandas/core/arrays/__init__.py | 1 + pandas/core/arrays/categorical.py | 2330 ++++++++++++++++ pandas/core/base.py | 23 +- pandas/core/categorical.py | 2336 +---------------- pandas/core/computation/align.py | 4 +- pandas/core/computation/expressions.py | 7 +- pandas/core/config_init.py | 18 +- pandas/core/datetools.py | 6 +- pandas/core/dtypes/api.py | 1 + pandas/core/dtypes/cast.py | 78 +- pandas/core/dtypes/common.py | 20 +- pandas/core/dtypes/concat.py | 13 +- pandas/core/dtypes/dtypes.py | 35 +- pandas/core/dtypes/generic.py | 1 + pandas/core/dtypes/inference.py | 33 + pandas/core/frame.py | 174 +- pandas/core/generic.py | 117 +- pandas/core/groupby.py | 83 +- pandas/core/indexes/accessors.py | 144 +- pandas/core/indexes/base.py | 92 +- pandas/core/indexes/category.py | 17 +- pandas/core/indexes/datetimelike.py | 43 +- pandas/core/indexes/datetimes.py | 52 +- pandas/core/indexes/interval.py | 194 +- pandas/core/indexes/multi.py | 206 +- pandas/core/indexes/numeric.py | 15 +- pandas/core/indexes/period.py | 1 - pandas/core/indexes/range.py | 10 +- pandas/core/indexes/timedeltas.py | 83 +- pandas/core/indexing.py | 70 +- pandas/core/internals.py | 400 ++- pandas/core/nanops.py | 26 +- pandas/core/ops.py | 1193 ++++----- pandas/core/panel.py | 111 +- pandas/core/resample.py | 19 +- pandas/core/reshape/concat.py | 4 +- pandas/core/reshape/melt.py | 8 +- pandas/core/reshape/merge.py | 50 +- pandas/core/reshape/pivot.py | 2 +- pandas/core/reshape/reshape.py | 98 +- pandas/core/reshape/tile.py | 3 +- pandas/core/series.py | 172 +- pandas/core/sorting.py | 2 +- pandas/core/sparse/array.py | 44 +- pandas/core/sparse/frame.py | 29 +- pandas/core/sparse/series.py | 97 +- pandas/core/strings.py | 81 +- pandas/core/window.py | 28 +- pandas/errors/__init__.py | 12 + pandas/io/common.py | 30 +- pandas/io/excel.py | 29 +- pandas/io/formats/excel.py | 7 +- pandas/io/formats/format.py | 10 +- pandas/io/formats/style.py | 7 +- pandas/io/html.py | 18 +- pandas/io/json/json.py | 29 +- pandas/io/json/table_schema.py | 156 +- pandas/io/parquet.py | 41 +- pandas/io/parsers.py | 89 +- pandas/io/pytables.py | 11 +- pandas/io/s3.py | 10 +- pandas/io/sas/sas.pyx | 18 +- pandas/io/sql.py | 52 +- pandas/io/stata.py | 2 +- pandas/plotting/_converter.py | 4 +- pandas/plotting/_core.py | 12 +- pandas/plotting/_style.py | 4 +- pandas/tests/api/test_api.py | 19 +- pandas/tests/api/test_types.py | 2 +- pandas/tests/categorical/test_api.py | 11 +- pandas/tests/categorical/test_constructors.py | 4 +- pandas/tests/categorical/test_operators.py | 7 + pandas/tests/dtypes/test_dtypes.py | 100 +- pandas/tests/dtypes/test_generic.py | 2 + pandas/tests/dtypes/test_inference.py | 17 + pandas/tests/dtypes/test_io.py | 73 - pandas/tests/frame/test_apply.py | 10 + pandas/tests/frame/test_arithmetic.py | 37 + .../tests/frame/test_axis_select_reindex.py | 35 +- pandas/tests/frame/test_constructors.py | 11 + pandas/tests/frame/test_dtypes.py | 81 + pandas/tests/frame/test_mutate_columns.py | 7 +- pandas/tests/frame/test_operators.py | 47 + pandas/tests/frame/test_reshape.py | 68 + .../frame/test_sort_values_level_as_str.py | 126 + pandas/tests/frame/test_sorting.py | 16 +- pandas/tests/frame/test_subclass.py | 269 +- pandas/tests/frame/test_to_csv.py | 71 +- .../generic/test_label_or_level_utils.py | 11 +- .../tests/groupby/aggregate/test_aggregate.py | 553 ++-- pandas/tests/groupby/aggregate/test_other.py | 20 +- pandas/tests/groupby/test_bin_groupby.py | 23 +- pandas/tests/groupby/test_index_as_string.py | 2 +- pandas/tests/groupby/test_transform.py | 25 + .../indexes/datetimes/test_arithmetic.py | 370 ++- .../tests/indexes/datetimes/test_datetime.py | 102 +- .../indexes/datetimes/test_datetimelike.py | 23 +- pandas/tests/indexes/datetimes/test_ops.py | 76 +- pandas/tests/indexes/interval/test_astype.py | 209 ++ .../indexes/interval/test_construction.py | 342 +++ .../tests/indexes/interval/test_interval.py | 341 +-- pandas/tests/indexes/period/test_ops.py | 128 +- pandas/tests/indexes/test_base.py | 28 +- pandas/tests/indexes/test_category.py | 11 +- pandas/tests/indexes/test_multi.py | 58 + pandas/tests/indexes/test_numeric.py | 99 +- .../indexes/timedeltas/test_arithmetic.py | 345 ++- pandas/tests/indexes/timedeltas/test_ops.py | 59 - .../indexes/timedeltas/test_timedelta.py | 62 +- pandas/tests/indexing/test_coercion.py | 34 + pandas/tests/indexing/test_loc.py | 7 + pandas/tests/indexing/test_multiindex.py | 20 + pandas/tests/internals/test_internals.py | 21 +- pandas/tests/io/formats/test_format.py | 44 + pandas/tests/io/formats/test_to_excel.py | 60 + pandas/tests/io/json/test_compression.py | 68 +- .../tests/io/json/test_json_table_schema.py | 391 +-- pandas/tests/io/parser/na_values.py | 39 + pandas/tests/io/parser/test_network.py | 8 +- pandas/tests/io/parser/test_parsers.py | 4 +- pandas/tests/io/test_excel.py | 52 +- pandas/tests/io/test_feather.py | 5 +- pandas/tests/io/test_html.py | 7 + pandas/tests/io/test_parquet.py | 186 +- pandas/tests/io/test_pickle.py | 51 +- pandas/tests/io/test_s3.py | 6 +- pandas/tests/io/test_sql.py | 25 - pandas/tests/plotting/test_converter.py | 28 +- pandas/tests/plotting/test_datetimelike.py | 13 + pandas/tests/reshape/merge/test_merge.py | 50 +- .../merge/test_merge_index_as_string.py | 4 +- .../tests/reshape/merge/test_merge_ordered.py | 18 + pandas/tests/reshape/test_concat.py | 48 + pandas/tests/reshape/test_tile.py | 9 +- .../tests/reshape/test_union_categoricals.py | 9 + pandas/tests/scalar/test_interval.py | 4 +- pandas/tests/scalar/test_nat.py | 39 +- pandas/tests/scalar/test_timedelta.py | 101 + pandas/tests/scalar/test_timestamp.py | 605 ++--- pandas/tests/series/test_api.py | 2 +- pandas/tests/series/test_apply.py | 11 + pandas/tests/series/test_arithmetic.py | 136 + pandas/tests/series/test_constructors.py | 26 +- pandas/tests/series/test_dtypes.py | 14 +- pandas/tests/series/test_indexing.py | 32 +- pandas/tests/series/test_io.py | 24 + pandas/tests/series/test_operators.py | 808 +++--- pandas/tests/series/test_subclass.py | 15 +- pandas/tests/series/test_timeseries.py | 6 +- pandas/tests/sparse/frame/test_frame.py | 23 + pandas/tests/test_algos.py | 5 +- pandas/tests/test_base.py | 6 +- pandas/tests/test_multilevel.py | 32 + pandas/tests/test_panel.py | 2 +- pandas/tests/test_register_accessor.py | 87 + pandas/tests/test_resample.py | 20 +- pandas/tests/test_strings.py | 38 +- .../tests/tseries/offsets/test_liboffsets.py | 16 - pandas/tests/tseries/offsets/test_offsets.py | 7 + pandas/tests/tseries/test_timezones.py | 29 + pandas/tests/util/test_hashing.py | 15 - pandas/tests/util/test_util.py | 4 +- pandas/tools/hashing.py | 18 - pandas/tseries/offsets.py | 143 +- pandas/util/_decorators.py | 33 +- pandas/util/hashing.py | 18 - pandas/util/testing.py | 92 +- scripts/announce.py | 2 +- scripts/api_rst_coverage.py | 75 +- scripts/build_dist_for_release.sh | 0 scripts/convert_deps.py | 0 scripts/find_commits_touching_func.py | 161 +- scripts/find_undoc_args.py | 161 +- scripts/list_future_warnings.sh | 46 + scripts/merge-pr.py | 14 +- setup.py | 13 +- 256 files changed, 12322 insertions(+), 10254 deletions(-) create mode 100644 .github/CODE_OF_CONDUCT.md create mode 100644 LICENSES/XARRAY_LICENSE create mode 100644 pandas/_libs/internals.pyx rename pandas/_libs/{src/reduce.pyx => reduction.pyx} (96%) delete mode 100644 pandas/_libs/src/klib/ktypes.h delete mode 100644 pandas/_libs/src/klib/kvec.h delete mode 100644 pandas/_libs/src/numpy.pxd delete mode 100644 pandas/_libs/src/parser/.gitignore delete mode 100644 pandas/_libs/src/parser/Makefile create mode 100644 pandas/api/extensions/__init__.py create mode 100644 pandas/core/arrays/__init__.py create mode 100644 pandas/core/arrays/categorical.py delete mode 100644 pandas/tests/dtypes/test_io.py create mode 100644 pandas/tests/frame/test_arithmetic.py create mode 100644 pandas/tests/frame/test_sort_values_level_as_str.py create mode 100644 pandas/tests/indexes/interval/test_astype.py create mode 100644 pandas/tests/indexes/interval/test_construction.py create mode 100644 pandas/tests/series/test_arithmetic.py create mode 100644 pandas/tests/test_register_accessor.py delete mode 100644 pandas/tools/hashing.py delete mode 100644 pandas/util/hashing.py mode change 100644 => 100755 scripts/announce.py mode change 100644 => 100755 scripts/api_rst_coverage.py mode change 100644 => 100755 scripts/build_dist_for_release.sh mode change 100644 => 100755 scripts/convert_deps.py create mode 100755 scripts/list_future_warnings.sh diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000..a1fbece3284ec --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,63 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery +* Personal attacks +* Trolling or insulting/derogatory comments +* Public or private harassment +* Publishing other's private information, such as physical or electronic + addresses, without explicit permission +* Other unethical or unprofessional conduct + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +- Safia Abdalla +- Tom Augspurger +- Joris Van den Bossche +- Camille Scott +- Nathaniel Smith + +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[http://contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct + diff --git a/.travis.yml b/.travis.yml index 5cc6547968b7d..bd5cac8955c8d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,6 +49,7 @@ matrix: apt: packages: - python-gtk2 + # In allow_failures - dist: trusty env: - JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true @@ -76,6 +77,9 @@ matrix: env: - JOB="3.6_DOC" DOC=true allow_failures: + - dist: trusty + env: + - JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true - dist: trusty env: - JOB="2.7_SLOW" SLOW=true @@ -95,6 +99,9 @@ matrix: before_install: - echo "before_install" + # set non-blocking IO on travis + # https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 + - python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);' - source ci/travis_process_gbq_encryption.sh - export PATH="$HOME/miniconda3/bin:$PATH" - df -h diff --git a/LICENSES/XARRAY_LICENSE b/LICENSES/XARRAY_LICENSE new file mode 100644 index 0000000000000..37ec93a14fdcd --- /dev/null +++ b/LICENSES/XARRAY_LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 2b48168238ee8..4cecf12a27042 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -4,7 +4,7 @@ from pandas import (DataFrame, Series, MultiIndex, date_range, period_range, isnull, NaT) -from .pandas_vb_common import setup # noqa +from .pandas_vb_common import setup # noqa class GetNumericData(object): @@ -127,7 +127,7 @@ class ToHTML(object): def setup(self): nrows = 500 self.df2 = DataFrame(np.random.randn(nrows, 10)) - self.df2[0] = period_range('2000', '2010', nrows) + self.df2[0] = period_range('2000', periods=nrows) self.df2[1] = range(nrows) def time_to_html_mixed(self): diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 1978d240abedd..4dfd215e6dc3a 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1,4 +1,4 @@ -from string import ascii_letters, digits +from string import ascii_letters from itertools import product from functools import partial @@ -275,18 +275,12 @@ class GroupStrings(object): def setup(self): n = 2 * 10**5 - alpha = list(map(''.join, product((ascii_letters + digits), repeat=4))) - self.df = DataFrame({'a': np.repeat(np.random.choice(alpha, - (n // 11)), 11), - 'b': np.repeat(np.random.choice(alpha, - (n // 7)), 7), - 'c': np.repeat(np.random.choice(alpha, - (n // 5)), 5), - 'd': np.repeat(np.random.choice(alpha, - (n // 1)), 1)}) + alpha = list(map(''.join, product(ascii_letters, repeat=4))) + data = np.random.choice(alpha, (n // 5, 4), replace=False) + data = np.repeat(data, 5, axis=0) + self.df = DataFrame(data, columns=list('abcd')) self.df['joe'] = (np.random.randn(len(self.df)) * 10).round(3) - i = np.random.permutation(len(self.df)) - self.df = self.df.iloc[i].reset_index(drop=True) + self.df = self.df.sample(frac=1).reset_index(drop=True) def time_multi_columns(self): self.df.groupby(list('abcd')).max() @@ -356,10 +350,16 @@ class GroupByMethods(object): goal_time = 0.2 - param_names = ['dtype', 'ngroups'] - params = [['int', 'float'], [100, 10000]] + param_names = ['dtype', 'method'] + params = [['int', 'float'], + ['all', 'any', 'count', 'cumcount', 'cummax', 'cummin', + 'cumprod', 'cumsum', 'describe', 'first', 'head', 'last', 'mad', + 'max', 'min', 'median', 'mean', 'nunique', 'pct_change', 'prod', + 'rank', 'sem', 'shift', 'size', 'skew', 'std', 'sum', 'tail', + 'unique', 'value_counts', 'var']] - def setup(self, dtype, ngroups): + def setup(self, dtype, method): + ngroups = 1000 size = ngroups * 2 rng = np.arange(ngroups) values = rng.take(np.random.randint(0, ngroups, size=size)) @@ -369,104 +369,11 @@ def setup(self, dtype, ngroups): key = np.concatenate([np.random.random(ngroups) * 0.1, np.random.random(ngroups) * 10.0]) - self.df = DataFrame({'values': values, - 'key': key}) + df = DataFrame({'values': values, 'key': key}) + self.df_groupby_method = getattr(df.groupby('key')['values'], method) - def time_all(self, dtype, ngroups): - self.df.groupby('key')['values'].all() - - def time_any(self, dtype, ngroups): - self.df.groupby('key')['values'].any() - - def time_count(self, dtype, ngroups): - self.df.groupby('key')['values'].count() - - def time_cumcount(self, dtype, ngroups): - self.df.groupby('key')['values'].cumcount() - - def time_cummax(self, dtype, ngroups): - self.df.groupby('key')['values'].cummax() - - def time_cummin(self, dtype, ngroups): - self.df.groupby('key')['values'].cummin() - - def time_cumprod(self, dtype, ngroups): - self.df.groupby('key')['values'].cumprod() - - def time_cumsum(self, dtype, ngroups): - self.df.groupby('key')['values'].cumsum() - - def time_describe(self, dtype, ngroups): - self.df.groupby('key')['values'].describe() - - def time_diff(self, dtype, ngroups): - self.df.groupby('key')['values'].diff() - - def time_first(self, dtype, ngroups): - self.df.groupby('key')['values'].first() - - def time_head(self, dtype, ngroups): - self.df.groupby('key')['values'].head() - - def time_last(self, dtype, ngroups): - self.df.groupby('key')['values'].last() - - def time_mad(self, dtype, ngroups): - self.df.groupby('key')['values'].mad() - - def time_max(self, dtype, ngroups): - self.df.groupby('key')['values'].max() - - def time_mean(self, dtype, ngroups): - self.df.groupby('key')['values'].mean() - - def time_median(self, dtype, ngroups): - self.df.groupby('key')['values'].median() - - def time_min(self, dtype, ngroups): - self.df.groupby('key')['values'].min() - - def time_nunique(self, dtype, ngroups): - self.df.groupby('key')['values'].nunique() - - def time_pct_change(self, dtype, ngroups): - self.df.groupby('key')['values'].pct_change() - - def time_prod(self, dtype, ngroups): - self.df.groupby('key')['values'].prod() - - def time_rank(self, dtype, ngroups): - self.df.groupby('key')['values'].rank() - - def time_sem(self, dtype, ngroups): - self.df.groupby('key')['values'].sem() - - def time_shift(self, dtype, ngroups): - self.df.groupby('key')['values'].shift() - - def time_size(self, dtype, ngroups): - self.df.groupby('key')['values'].size() - - def time_skew(self, dtype, ngroups): - self.df.groupby('key')['values'].skew() - - def time_std(self, dtype, ngroups): - self.df.groupby('key')['values'].std() - - def time_sum(self, dtype, ngroups): - self.df.groupby('key')['values'].sum() - - def time_tail(self, dtype, ngroups): - self.df.groupby('key')['values'].tail() - - def time_unique(self, dtype, ngroups): - self.df.groupby('key')['values'].unique() - - def time_value_counts(self, dtype, ngroups): - self.df.groupby('key')['values'].value_counts() - - def time_var(self, dtype, ngroups): - self.df.groupby('key')['values'].var() + def time_method(self, dtype, method): + self.df_groupby_method() class Float32(object): diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 7b4fec0090701..c0d24afae4219 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -1,27 +1,29 @@ import os -from pandas import * -import pandas as pd -from numpy.random import randn -from numpy.random import randint -import pandas.util.testing as tm -import random -import numpy as np -import threading from importlib import import_module +import numpy as np try: - from pandas.compat import range + from pandas import Panel except ImportError: - pass + from pandas import WidePanel as Panel # noqa + +# Compatibility import for lib +for imp in ['pandas._libs.lib', 'pandas.lib']: + try: + lib = import_module(imp) + break + except: + pass numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32, np.float64, np.int16, np.int8, np.uint16, np.uint8] datetime_dtypes = [np.datetime64, np.timedelta64] -# This function just needs to be imported into each benchmark file in order to -# sets up the random seed before each function. -# http://asv.readthedocs.io/en/latest/writing_benchmarks.html + def setup(*args, **kwargs): + # This function just needs to be imported into each benchmark file to + # set up the random seed before each function. + # http://asv.readthedocs.io/en/latest/writing_benchmarks.html np.random.seed(1234) @@ -42,22 +44,3 @@ def remove(self, f): def teardown(self, *args, **kwargs): self.remove(self.fname) - -# Compatibility import for lib -for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']: - try: - lib = import_module(imp) - break - except: - pass - -try: - Panel = Panel -except Exception: - Panel = WidePanel - -# didn't add to namespace until later -try: - from pandas.core.index import MultiIndex -except ImportError: - pass diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 1e1eb167b46bf..c447c78d0d070 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -1,205 +1,114 @@ -from .pandas_vb_common import * +import numpy as np +import pandas as pd +from .pandas_vb_common import setup # noqa -def _set_use_bottleneck_False(): - try: - pd.options.compute.use_bottleneck = False - except: - from pandas.core import nanops - nanops._USE_BOTTLENECK = False +ops = ['mean', 'sum', 'median', 'std', 'skew', 'kurt', 'mad', 'prod', 'sem', + 'var'] -class FrameOps(object): - goal_time = 0.2 - - param_names = ['op', 'use_bottleneck', 'dtype', 'axis'] - params = [['mean', 'sum', 'median'], - [True, False], - ['float', 'int'], - [0, 1]] - - def setup(self, op, use_bottleneck, dtype, axis): - if dtype == 'float': - self.df = DataFrame(np.random.randn(100000, 4)) - elif dtype == 'int': - self.df = DataFrame(np.random.randint(1000, size=(100000, 4))) - - if not use_bottleneck: - _set_use_bottleneck_False() - - self.func = getattr(self.df, op) - - def time_op(self, op, use_bottleneck, dtype, axis): - self.func(axis=axis) +class FrameOps(object): -class stat_ops_level_frame_sum(object): goal_time = 0.2 + params = [ops, ['float', 'int'], [0, 1], [True, False]] + param_names = ['op', 'dtype', 'axis', 'use_bottleneck'] - def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) - - def time_stat_ops_level_frame_sum(self): - self.df.sum(level=1) - - -class stat_ops_level_frame_sum_multiple(object): - goal_time = 0.2 + def setup(self, op, dtype, axis, use_bottleneck): + df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype) + try: + pd.options.compute.use_bottleneck = use_bottleneck + except: + from pandas.core import nanops + nanops._USE_BOTTLENECK = use_bottleneck + self.df_func = getattr(df, op) - def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) + def time_op(self, op, dtype, axis, use_bottleneck): + self.df_func(axis=axis) - def time_stat_ops_level_frame_sum_multiple(self): - self.df.sum(level=[0, 1]) +class FrameMultiIndexOps(object): -class stat_ops_level_series_sum(object): goal_time = 0.2 + params = ([0, 1, [0, 1]], ops) + param_names = ['level', 'op'] - def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) + def setup(self, level, op): + levels = [np.arange(10), np.arange(100), np.arange(100)] + labels = [np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10)] + index = pd.MultiIndex(levels=levels, labels=labels) + df = pd.DataFrame(np.random.randn(len(index), 4), index=index) + self.df_func = getattr(df, op) - def time_stat_ops_level_series_sum(self): - self.df[1].sum(level=1) + def time_op(self, level, op): + self.df_func(level=level) -class stat_ops_level_series_sum_multiple(object): - goal_time = 0.2 - - def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) - - def time_stat_ops_level_series_sum_multiple(self): - self.df[1].sum(level=[0, 1]) +class SeriesOps(object): - -class stat_ops_series_std(object): goal_time = 0.2 + params = [ops, ['float', 'int'], [True, False]] + param_names = ['op', 'dtype', 'use_bottleneck'] - def setup(self): - self.s = Series(np.random.randn(100000), index=np.arange(100000)) - self.s[::2] = np.nan - - def time_stat_ops_series_std(self): - self.s.std() + def setup(self, op, dtype, use_bottleneck): + s = pd.Series(np.random.randn(100000)).astype(dtype) + try: + pd.options.compute.use_bottleneck = use_bottleneck + except: + from pandas.core import nanops + nanops._USE_BOTTLENECK = use_bottleneck + self.s_func = getattr(s, op) + def time_op(self, op, dtype, use_bottleneck): + self.s_func() -class stats_corr_spearman(object): - goal_time = 0.2 - def setup(self): - self.df = DataFrame(np.random.randn(1000, 30)) +class SeriesMultiIndexOps(object): - def time_stats_corr_spearman(self): - self.df.corr(method='spearman') - - -class stats_rank2d_axis0_average(object): goal_time = 0.2 + params = ([0, 1, [0, 1]], ops) + param_names = ['level', 'op'] - def setup(self): - self.df = DataFrame(np.random.randn(5000, 50)) - - def time_stats_rank2d_axis0_average(self): - self.df.rank() + def setup(self, level, op): + levels = [np.arange(10), np.arange(100), np.arange(100)] + labels = [np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10)] + index = pd.MultiIndex(levels=levels, labels=labels) + s = pd.Series(np.random.randn(len(index)), index=index) + self.s_func = getattr(s, op) + def time_op(self, level, op): + self.s_func(level=level) -class stats_rank2d_axis1_average(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(5000, 50)) - def time_stats_rank2d_axis1_average(self): - self.df.rank(1) +class Rank(object): - -class stats_rank_average(object): - goal_time = 0.2 - - def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) - self.s = Series(self.values) - - def time_stats_rank_average(self): - self.s.rank() - - -class stats_rank_average_int(object): - goal_time = 0.2 - - def setup(self): - self.values = np.random.randint(0, 100000, size=200000) - self.s = Series(self.values) - - def time_stats_rank_average_int(self): - self.s.rank() - - -class stats_rank_pct_average(object): goal_time = 0.2 + params = [['DataFrame', 'Series'], [True, False]] + param_names = ['constructor', 'pct'] - def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) - self.s = Series(self.values) + def setup(self, constructor, pct): + values = np.random.randn(10**5) + self.data = getattr(pd, constructor)(values) - def time_stats_rank_pct_average(self): - self.s.rank(pct=True) + def time_rank(self, constructor, pct): + self.data.rank(pct=pct) + def time_average_old(self, constructor, pct): + self.data.rank(pct=pct) / len(self.data) -class stats_rank_pct_average_old(object): - goal_time = 0.2 - - def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) - self.s = Series(self.values) - - def time_stats_rank_pct_average_old(self): - (self.s.rank() / len(self.s)) +class Correlation(object): -class stats_rolling_mean(object): goal_time = 0.2 + params = ['spearman', 'kendall', 'pearson'] + param_names = ['method'] - def setup(self): - self.arr = np.random.randn(100000) - self.win = 100 - - def time_rolling_mean(self): - rolling_mean(self.arr, self.win) - - def time_rolling_median(self): - rolling_median(self.arr, self.win) - - def time_rolling_min(self): - rolling_min(self.arr, self.win) - - def time_rolling_max(self): - rolling_max(self.arr, self.win) - - def time_rolling_sum(self): - rolling_sum(self.arr, self.win) - - def time_rolling_std(self): - rolling_std(self.arr, self.win) - - def time_rolling_var(self): - rolling_var(self.arr, self.win) - - def time_rolling_skew(self): - rolling_skew(self.arr, self.win) + def setup(self, method): + self.df = pd.DataFrame(np.random.randn(1000, 30)) - def time_rolling_kurt(self): - rolling_kurt(self.arr, self.win) + def time_corr(self, method): + self.df.corr(method=method) diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index 948d4b92a5a57..4435327e1eb38 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -1,119 +1,144 @@ -from .pandas_vb_common import * -import string -import itertools as IT -import pandas.util.testing as testing +import numpy as np +from pandas import Series +import pandas.util.testing as tm -class StringMethods(object): - goal_time = 0.2 +class Methods(object): - def make_series(self, letters, strlen, size): - return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + goal_time = 0.2 def setup(self): - self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) - self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) - self.s = self.make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|') + self.s = Series(tm.makeStringIndex(10**5)) def time_cat(self): - self.many.str.cat(sep=',') + self.s.str.cat(sep=',') def time_center(self): - self.many.str.center(100) - - def time_contains_few(self): - self.few.str.contains('matchthis') - - def time_contains_few_noregex(self): - self.few.str.contains('matchthis', regex=False) - - def time_contains_many(self): - self.many.str.contains('matchthis') - - def time_contains_many_noregex(self): - self.many.str.contains('matchthis', regex=False) + self.s.str.center(100) def time_count(self): - self.many.str.count('matchthis') + self.s.str.count('A') def time_endswith(self): - self.many.str.endswith('matchthis') + self.s.str.endswith('A') def time_extract(self): - self.many.str.extract('(\\w*)matchthis(\\w*)') + self.s.str.extract('(\\w*)A(\\w*)') def time_findall(self): - self.many.str.findall('[A-Z]+') + self.s.str.findall('[A-Z]+') def time_get(self): - self.many.str.get(0) - - def time_join_split(self): - self.many.str.join('--').str.split('--') - - def time_join_split_expand(self): - self.many.str.join('--').str.split('--', expand=True) + self.s.str.get(0) def time_len(self): - self.many.str.len() + self.s.str.len() def time_match(self): - self.many.str.match('mat..this') + self.s.str.match('A') def time_pad(self): - self.many.str.pad(100, side='both') - - def time_repeat(self): - self.many.str.repeat(list(IT.islice(IT.cycle(range(1, 4)), len(self.many)))) + self.s.str.pad(100, side='both') def time_replace(self): - self.many.str.replace('(matchthis)', '\x01\x01') + self.s.str.replace('A', '\x01\x01') def time_slice(self): - self.many.str.slice(5, 15, 2) + self.s.str.slice(5, 15, 2) def time_startswith(self): - self.many.str.startswith('matchthis') + self.s.str.startswith('A') def time_strip(self): - self.many.str.strip('matchthis') + self.s.str.strip('A') def time_rstrip(self): - self.many.str.rstrip('matchthis') + self.s.str.rstrip('A') def time_lstrip(self): - self.many.str.lstrip('matchthis') + self.s.str.lstrip('A') def time_title(self): - self.many.str.title() + self.s.str.title() def time_upper(self): - self.many.str.upper() + self.s.str.upper() def time_lower(self): - self.many.str.lower() + self.s.str.lower() + + +class Repeat(object): + + goal_time = 0.2 + params = ['int', 'array'] + param_names = ['repeats'] + + def setup(self, repeats): + N = 10**5 + self.s = Series(tm.makeStringIndex(N)) + repeat = {'int': 1, 'array': np.random.randint(1, 3, N)} + self.repeat = repeat[repeats] + + def time_repeat(self, repeats): + self.s.str.repeat(self.repeat) + + +class Contains(object): + + goal_time = 0.2 + params = [True, False] + param_names = ['regex'] + + def setup(self, regex): + self.s = Series(tm.makeStringIndex(10**5)) + + def time_contains(self, regex): + self.s.str.contains('A', regex=regex) + + +class Split(object): + + goal_time = 0.2 + params = [True, False] + param_names = ['expand'] + + def setup(self, expand): + self.s = Series(tm.makeStringIndex(10**5)).str.join('--') + + def time_split(self, expand): + self.s.str.split('--', expand=expand) + + +class Dummies(object): + + goal_time = 0.2 + + def setup(self): + self.s = Series(tm.makeStringIndex(10**5)).str.join('|') def time_get_dummies(self): self.s.str.get_dummies('|') -class StringEncode(object): +class Encode(object): + goal_time = 0.2 def setup(self): - self.ser = Series(testing.makeUnicodeIndex()) + self.ser = Series(tm.makeUnicodeIndex()) def time_encode_decode(self): self.ser.str.encode('utf-8').str.decode('utf-8') -class StringSlice(object): +class Slice(object): goal_time = 0.2 def setup(self): self.s = Series(['abcdefg', np.nan] * 500000) - def time_series_string_vector_slice(self): + def time_vector_slice(self): # GH 2602 self.s.str[:5] diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 1897b0287ed19..3fe75b3c34299 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,12 +1,11 @@ import datetime import numpy as np -import pandas as pd - -from pandas import to_timedelta, Timestamp, Timedelta +from pandas import Series, timedelta_range, to_timedelta, Timestamp, Timedelta class TimedeltaConstructor(object): + goal_time = 0.2 def time_from_int(self): @@ -36,35 +35,44 @@ def time_from_missing(self): class ToTimedelta(object): + goal_time = 0.2 def setup(self): - self.arr = np.random.randint(0, 1000, size=10000) - self.arr2 = ['{0} days'.format(i) for i in self.arr] - - self.arr3 = np.random.randint(0, 60, size=10000) - self.arr3 = ['00:00:{0:02d}'.format(i) for i in self.arr3] - - self.arr4 = list(self.arr2) - self.arr4[-1] = 'apple' + self.ints = np.random.randint(0, 60, size=10000) + self.str_days = [] + self.str_seconds = [] + for i in self.ints: + self.str_days.append('{0} days'.format(i)) + self.str_seconds.append('00:00:{0:02d}'.format(i)) def time_convert_int(self): - to_timedelta(self.arr, unit='s') + to_timedelta(self.ints, unit='s') - def time_convert_string(self): - to_timedelta(self.arr2) + def time_convert_string_days(self): + to_timedelta(self.str_days) def time_convert_string_seconds(self): - to_timedelta(self.arr3) + to_timedelta(self.str_seconds) + + +class ToTimedeltaErrors(object): - def time_convert_coerce(self): - to_timedelta(self.arr4, errors='coerce') + goal_time = 0.2 + params = ['coerce', 'ignore'] + param_names = ['errors'] + + def setup(self, errors): + ints = np.random.randint(0, 60, size=10000) + self.arr = ['{0} days'.format(i) for i in ints] + self.arr[-1] = 'apple' - def time_convert_ignore(self): - to_timedelta(self.arr4, errors='ignore') + def time_convert(self, errors): + to_timedelta(self.arr, errors=errors) class TimedeltaOps(object): + goal_time = 0.2 def setup(self): @@ -76,43 +84,46 @@ def time_add_td_ts(self): class TimedeltaProperties(object): + goal_time = 0.2 - def setup(self): - self.td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35) + def setup_cache(self): + td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35) + return td - def time_timedelta_days(self): - self.td.days + def time_timedelta_days(self, td): + td.days - def time_timedelta_seconds(self): - self.td.seconds + def time_timedelta_seconds(self, td): + td.seconds - def time_timedelta_microseconds(self): - self.td.microseconds + def time_timedelta_microseconds(self, td): + td.microseconds - def time_timedelta_nanoseconds(self): - self.td.nanoseconds + def time_timedelta_nanoseconds(self, td): + td.nanoseconds class DatetimeAccessor(object): + goal_time = 0.2 - def setup(self): - self.N = 100000 - self.series = pd.Series( - pd.timedelta_range('1 days', periods=self.N, freq='h')) + def setup_cache(self): + N = 100000 + series = Series(timedelta_range('1 days', periods=N, freq='h')) + return series - def time_dt_accessor(self): - self.series.dt + def time_dt_accessor(self, series): + series.dt - def time_timedelta_dt_accessor_days(self): - self.series.dt.days + def time_timedelta_days(self, series): + series.dt.days - def time_timedelta_dt_accessor_seconds(self): - self.series.dt.seconds + def time_timedelta_seconds(self, series): + series.dt.seconds - def time_timedelta_dt_accessor_microseconds(self): - self.series.dt.microseconds + def time_timedelta_microseconds(self, series): + series.dt.microseconds - def time_timedelta_dt_accessor_nanoseconds(self): - self.series.dt.nanoseconds + def time_timedelta_nanoseconds(self, series): + series.dt.nanoseconds diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index fe282df25e9c5..ea2f077f980d0 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -1,358 +1,330 @@ +from datetime import timedelta + +import numpy as np +from pandas import to_datetime, date_range, Series, DataFrame, period_range +from pandas.tseries.frequencies import infer_freq try: from pandas.plotting._converter import DatetimeConverter except ImportError: from pandas.tseries.converter import DatetimeConverter -import pandas as pd -from pandas import to_datetime, date_range, Series, DataFrame, period_range - -import datetime as dt -from pandas.tseries.frequencies import infer_freq -import numpy as np - -if hasattr(Series, 'convert'): - Series.resample = Series.convert +from .pandas_vb_common import setup # noqa class DatetimeIndex(object): + goal_time = 0.2 + params = ['dst', 'repeated', 'tz_aware', 'tz_naive'] + param_names = ['index_type'] - def setup(self): - self.N = 100000 - self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + def setup(self, index_type): + N = 100000 + dtidxes = {'dst': date_range(start='10/29/2000 1:00:00', + end='10/29/2000 1:59:59', freq='S'), + 'repeated': date_range(start='2000', + periods=N / 10, + freq='s').repeat(10), + 'tz_aware': date_range(start='2000', + periods=N, + freq='s', + tz='US/Eastern'), + 'tz_naive': date_range(start='2000', + periods=N, + freq='s')} + self.index = dtidxes[index_type] - self.rng2 = date_range(start='1/1/2000 9:30', periods=10000, - freq='S', tz='US/Eastern') + def time_add_timedelta(self, index_type): + self.index + timedelta(minutes=2) - self.index_repeated = date_range(start='1/1/2000', - periods=1000, freq='T').repeat(10) + def time_normalize(self, index_type): + self.index.normalize() - self.rng3 = date_range(start='1/1/2000', periods=1000, freq='H') - self.df = DataFrame(np.random.randn(len(self.rng3), 2), self.rng3) + def time_unique(self, index_type): + self.index.unique() - self.rng4 = date_range(start='1/1/2000', periods=1000, - freq='H', tz='US/Eastern') - self.df2 = DataFrame(np.random.randn(len(self.rng4), 2), - index=self.rng4) + def time_to_time(self, index_type): + self.index.time - N = 100000 - self.dti = pd.date_range('2011-01-01', freq='H', periods=N).repeat(5) - self.dti_tz = pd.date_range('2011-01-01', freq='H', periods=N, - tz='Asia/Tokyo').repeat(5) + def time_get(self, index_type): + self.index[0] - self.rng5 = date_range(start='1/1/2000', - end='3/1/2000', tz='US/Eastern') + def time_timeseries_is_month_start(self, index_type): + self.index.is_month_start - self.dst_rng = date_range(start='10/29/2000 1:00:00', - end='10/29/2000 1:59:59', freq='S') + def time_to_date(self, index_type): + self.index.date + + def time_to_pydatetime(self, index_type): + self.index.to_pydatetime() + + +class TzLocalize(object): + + goal_time = 0.2 + + def setup(self): + dst_rng = date_range(start='10/29/2000 1:00:00', + end='10/29/2000 1:59:59', freq='S') self.index = date_range(start='10/29/2000', end='10/29/2000 00:59:59', freq='S') - self.index = self.index.append(self.dst_rng) - self.index = self.index.append(self.dst_rng) + self.index = self.index.append(dst_rng) + self.index = self.index.append(dst_rng) self.index = self.index.append(date_range(start='10/29/2000 2:00:00', end='10/29/2000 3:00:00', freq='S')) - self.N = 10000 - self.rng6 = date_range(start='1/1/1', periods=self.N, freq='B') - - self.rng7 = date_range(start='1/1/1700', freq='D', periods=100000) - self.no_freq = self.rng7[:50000].append(self.rng7[50002:]) - self.d_freq = self.rng7[:50000].append(self.rng7[50000:]) + def time_infer_dst(self): + self.index.tz_localize('US/Eastern', infer_dst=True) - self.rng8 = date_range(start='1/1/1700', freq='B', periods=75000) - self.b_freq = self.rng8[:50000].append(self.rng8[50000:]) - def time_add_timedelta(self): - (self.rng + dt.timedelta(minutes=2)) +class ResetIndex(object): - def time_normalize(self): - self.rng2.normalize() + goal_time = 0.2 + params = [None, 'US/Eastern'] + param_names = 'tz' - def time_unique(self): - self.index_repeated.unique() + def setup(self, tz): + idx = date_range(start='1/1/2000', periods=1000, freq='H', tz=tz) + self.df = DataFrame(np.random.randn(1000, 2), index=idx) - def time_reset_index(self): + def time_reest_datetimeindex(self, tz): self.df.reset_index() - def time_reset_index_tz(self): - self.df2.reset_index() - - def time_dti_factorize(self): - self.dti.factorize() - def time_dti_tz_factorize(self): - self.dti_tz.factorize() +class Factorize(object): - def time_dti_time(self): - self.dst_rng.time - - def time_timestamp_tzinfo_cons(self): - self.rng5[0] + goal_time = 0.2 + params = [None, 'Asia/Tokyo'] + param_names = 'tz' - def time_infer_dst(self): - self.index.tz_localize('US/Eastern', infer_dst=True) + def setup(self, tz): + N = 100000 + self.dti = date_range('2011-01-01', freq='H', periods=N, tz=tz) + self.dti = self.dti.repeat(5) - def time_timeseries_is_month_start(self): - self.rng6.is_month_start + def time_factorize(self, tz): + self.dti.factorize() - def time_infer_freq_none(self): - infer_freq(self.no_freq) - def time_infer_freq_daily(self): - infer_freq(self.d_freq) +class InferFreq(object): - def time_infer_freq_business(self): - infer_freq(self.b_freq) + goal_time = 0.2 + params = [None, 'D', 'B'] + param_names = ['freq'] - def time_to_date(self): - self.rng.date + def setup(self, freq): + if freq is None: + self.idx = date_range(start='1/1/1700', freq='D', periods=10000) + self.idx.freq = None + else: + self.idx = date_range(start='1/1/1700', freq=freq, periods=10000) - def time_to_pydatetime(self): - self.rng.to_pydatetime() + def time_infer_freq(self, freq): + infer_freq(self.idx) class TimeDatetimeConverter(object): + goal_time = 0.2 def setup(self): - self.N = 100000 - self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + N = 100000 + self.rng = date_range(start='1/1/2000', periods=N, freq='T') def time_convert(self): DatetimeConverter.convert(self.rng, None, None) class Iteration(object): - goal_time = 0.2 - - def setup(self): - self.N = 1000000 - self.M = 10000 - self.idx1 = date_range(start='20140101', freq='T', periods=self.N) - self.idx2 = period_range(start='20140101', freq='T', periods=self.N) - - def iter_n(self, iterable, n=None): - self.i = 0 - for _ in iterable: - self.i += 1 - if ((n is not None) and (self.i > n)): - break - def time_iter_datetimeindex(self): - self.iter_n(self.idx1) - - def time_iter_datetimeindex_preexit(self): - self.iter_n(self.idx1, self.M) + goal_time = 0.2 + params = [date_range, period_range] + param_names = ['time_index'] - def time_iter_periodindex(self): - self.iter_n(self.idx2) + def setup(self, time_index): + N = 10**6 + self.idx = time_index(start='20140101', freq='T', periods=N) + self.exit = 10000 - def time_iter_periodindex_preexit(self): - self.iter_n(self.idx2, self.M) + def time_iter(self, time_index): + for _ in self.idx: + pass + def time_iter_preexit(self, time_index): + for i, _ in enumerate(self.idx): + if i > self.exit: + break -# ---------------------------------------------------------------------- -# Resampling class ResampleDataFrame(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range(start='20130101', periods=100000, freq='50L') - self.df = DataFrame(np.random.randn(100000, 2), index=self.rng) - def time_max_numpy(self): - self.df.resample('1s', how=np.max) - - def time_max_string(self): - self.df.resample('1s', how='max') - - def time_mean_numpy(self): - self.df.resample('1s', how=np.mean) - - def time_mean_string(self): - self.df.resample('1s', how='mean') + goal_time = 0.2 + params = ['max', 'mean', 'min'] + param_names = ['method'] - def time_min_numpy(self): - self.df.resample('1s', how=np.min) + def setup(self, method): + rng = date_range(start='20130101', periods=100000, freq='50L') + df = DataFrame(np.random.randn(100000, 2), index=rng) + self.resample = getattr(df.resample('1s'), method) - def time_min_string(self): - self.df.resample('1s', how='min') + def time_method(self, method): + self.resample() class ResampleSeries(object): + + goal_time = 0.2 + params = (['period', 'datetime'], ['5min', '1D'], ['mean', 'ohlc']) + param_names = ['index', 'freq', 'method'] + + def setup(self, index, freq, method): + indexes = {'period': period_range(start='1/1/2000', + end='1/1/2001', + freq='T'), + 'datetime': date_range(start='1/1/2000', + end='1/1/2001', + freq='T')} + idx = indexes[index] + ts = Series(np.random.randn(len(idx)), index=idx) + self.resample = getattr(ts.resample(freq), method) + + def time_resample(self, index, freq, method): + self.resample() + + +class ResampleDatetetime64(object): + # GH 7754 goal_time = 0.2 def setup(self): - self.rng1 = period_range(start='1/1/2000', end='1/1/2001', freq='T') - self.ts1 = Series(np.random.randn(len(self.rng1)), index=self.rng1) + rng3 = date_range(start='2000-01-01 00:00:00', + end='2000-01-01 10:00:00', freq='555000U') + self.dt_ts = Series(5, rng3, dtype='datetime64[ns]') - self.rng2 = date_range(start='1/1/2000', end='1/1/2001', freq='T') - self.ts2 = Series(np.random.randn(len(self.rng2)), index=self.rng2) - - self.rng3 = date_range(start='2000-01-01 00:00:00', - end='2000-01-01 10:00:00', freq='555000U') - self.int_ts = Series(5, self.rng3, dtype='int64') - self.dt_ts = self.int_ts.astype('datetime64[ns]') - - def time_period_downsample_mean(self): - self.ts1.resample('D', how='mean') - - def time_timestamp_downsample_mean(self): - self.ts2.resample('D', how='mean') - - def time_resample_datetime64(self): - # GH 7754 - self.dt_ts.resample('1S', how='last') - - def time_1min_5min_mean(self): - self.ts2[:10000].resample('5min', how='mean') - - def time_1min_5min_ohlc(self): - self.ts2[:10000].resample('5min', how='ohlc') + def time_resample(self): + self.dt_ts.resample('1S').last() class AsOf(object): - goal_time = 0.2 - def setup(self): - self.N = 10000 - self.rng = date_range(start='1/1/1990', periods=self.N, freq='53s') - self.ts = Series(np.random.randn(self.N), index=self.rng) - self.dates = date_range(start='1/1/1990', - periods=(self.N * 10), freq='5s') + goal_time = 0.2 + params = ['DataFrame', 'Series'] + param_names = ['constructor'] + + def setup(self, constructor): + N = 10000 + M = 10 + rng = date_range(start='1/1/1990', periods=N, freq='53s') + data = {'DataFrame': DataFrame(np.random.randn(N, M)), + 'Series': Series(np.random.randn(N))} + self.ts = data[constructor] + self.ts.index = rng self.ts2 = self.ts.copy() - self.ts2[250:5000] = np.nan + self.ts2.iloc[250:5000] = np.nan self.ts3 = self.ts.copy() - self.ts3[-5000:] = np.nan + self.ts3.iloc[-5000:] = np.nan + self.dates = date_range(start='1/1/1990', periods=N * 10, freq='5s') + self.date = self.dates[0] + self.date_last = self.dates[-1] + self.date_early = self.date - timedelta(10) # test speed of pre-computing NAs. - def time_asof(self): + def time_asof(self, constructor): self.ts.asof(self.dates) # should be roughly the same as above. - def time_asof_nan(self): + def time_asof_nan(self, constructor): self.ts2.asof(self.dates) # test speed of the code path for a scalar index # without *while* loop - def time_asof_single(self): - self.ts.asof(self.dates[0]) + def time_asof_single(self, constructor): + self.ts.asof(self.date) # test speed of the code path for a scalar index # before the start. should be the same as above. - def time_asof_single_early(self): - self.ts.asof(self.dates[0] - dt.timedelta(10)) + def time_asof_single_early(self, constructor): + self.ts.asof(self.date_early) # test the speed of the code path for a scalar index # with a long *while* loop. should still be much # faster than pre-computing all the NAs. - def time_asof_nan_single(self): - self.ts3.asof(self.dates[-1]) + def time_asof_nan_single(self, constructor): + self.ts3.asof(self.date_last) -class AsOfDataFrame(object): - goal_time = 0.2 +class SortIndex(object): - def setup(self): - self.N = 10000 - self.M = 100 - self.rng = date_range(start='1/1/1990', periods=self.N, freq='53s') - self.dates = date_range(start='1/1/1990', - periods=(self.N * 10), freq='5s') - self.ts = DataFrame(np.random.randn(self.N, self.M), index=self.rng) - self.ts2 = self.ts.copy() - self.ts2.iloc[250:5000] = np.nan - self.ts3 = self.ts.copy() - self.ts3.iloc[-5000:] = np.nan - - # test speed of pre-computing NAs. - def time_asof(self): - self.ts.asof(self.dates) + goal_time = 0.2 + params = [True, False] + param_names = ['monotonic'] - # should be roughly the same as above. - def time_asof_nan(self): - self.ts2.asof(self.dates) + def setup(self, monotonic): + N = 10**5 + idx = date_range(start='1/1/2000', periods=N, freq='s') + self.s = Series(np.random.randn(N), index=idx) + if not monotonic: + self.s = self.s.sample(frac=1) - # test speed of the code path for a scalar index - # with pre-computing all NAs. - def time_asof_single(self): - self.ts.asof(self.dates[0]) + def time_sort_index(self, monotonic): + self.s.sort_index() - # should be roughly the same as above. - def time_asof_nan_single(self): - self.ts3.asof(self.dates[-1]) + def time_get_slice(self, monotonic): + self.s[:10000] - # test speed of the code path for a scalar index - # before the start. should be without the cost of - # pre-computing all the NAs. - def time_asof_single_early(self): - self.ts.asof(self.dates[0] - dt.timedelta(10)) +class IrregularOps(object): -class TimeSeries(object): goal_time = 0.2 def setup(self): - self.N = 100000 - self.rng = date_range(start='1/1/2000', periods=self.N, freq='s') - self.rng = self.rng.take(np.random.permutation(self.N)) - self.ts = Series(np.random.randn(self.N), index=self.rng) - - self.rng2 = date_range(start='1/1/2000', periods=self.N, freq='T') - self.ts2 = Series(np.random.randn(self.N), index=self.rng2) + N = 10**5 + idx = date_range(start='1/1/2000', periods=N, freq='s') + s = Series(np.random.randn(N), index=idx) + self.left = s.sample(frac=1) + self.right = s.sample(frac=1) - self.lindex = np.random.permutation(self.N)[:(self.N // 2)] - self.rindex = np.random.permutation(self.N)[:(self.N // 2)] - self.left = Series(self.ts2.values.take(self.lindex), - index=self.ts2.index.take(self.lindex)) - self.right = Series(self.ts2.values.take(self.rindex), - index=self.ts2.index.take(self.rindex)) + def time_add(self): + self.left + self.right - self.rng3 = date_range(start='1/1/2000', periods=1500000, freq='S') - self.ts3 = Series(1, index=self.rng3) - def time_sort_index_monotonic(self): - self.ts2.sort_index() +class Lookup(object): - def time_sort_index_non_monotonic(self): - self.ts.sort_index() + goal_time = 0.2 - def time_timeseries_slice_minutely(self): - self.ts2[:10000] + def setup(self): + N = 1500000 + rng = date_range(start='1/1/2000', periods=N, freq='S') + self.ts = Series(1, index=rng) + self.lookup_val = rng[N // 2] - def time_add_irregular(self): - (self.left + self.right) + def time_lookup_and_cleanup(self): + self.ts[self.lookup_val] + self.ts.index._cleanup() - def time_large_lookup_value(self): - self.ts3[self.ts3.index[(len(self.ts3) // 2)]] - self.ts3.index._cleanup() +class ToDatetimeYYYYMMDD(object): -class ToDatetime(object): goal_time = 0.2 def setup(self): - self.rng = date_range(start='1/1/2000', periods=10000, freq='D') - self.stringsD = Series(self.rng.strftime('%Y%m%d')) + rng = date_range(start='1/1/2000', periods=10000, freq='D') + self.stringsD = Series(rng.strftime('%Y%m%d')) - self.rng = date_range(start='1/1/2000', periods=20000, freq='H') - self.strings = self.rng.strftime('%Y-%m-%d %H:%M:%S').tolist() - self.strings_nosep = self.rng.strftime('%Y%m%d %H:%M:%S').tolist() - self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800' - for x in self.rng] + def time_format_YYYYMMDD(self): + to_datetime(self.stringsD, format='%Y%m%d') - self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000)) - self.s2 = self.s.str.replace(':\\S+$', '') - self.unique_numeric_seconds = range(10000) - self.dup_numeric_seconds = [1000] * 10000 - self.dup_string_dates = ['2000-02-11'] * 10000 - self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * 10000 +class ToDatetimeISO8601(object): - def time_format_YYYYMMDD(self): - to_datetime(self.stringsD, format='%Y%m%d') + goal_time = 0.2 + + def setup(self): + rng = date_range(start='1/1/2000', periods=20000, freq='H') + self.strings = rng.strftime('%Y-%m-%d %H:%M:%S').tolist() + self.strings_nosep = rng.strftime('%Y%m%d %H:%M:%S').tolist() + self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800' + for x in rng] def time_iso8601(self): to_datetime(self.strings) @@ -369,49 +341,56 @@ def time_iso8601_format_no_sep(self): def time_iso8601_tz_spaceformat(self): to_datetime(self.strings_tz_space) - def time_format_exact(self): + +class ToDatetimeFormat(object): + + goal_time = 0.2 + + def setup(self): + self.s = Series(['19MAY11', '19MAY11:00:00:00'] * 100000) + self.s2 = self.s.str.replace(':\\S+$', '') + + def time_exact(self): to_datetime(self.s2, format='%d%b%y') - def time_format_no_exact(self): + def time_no_exact(self): to_datetime(self.s, format='%d%b%y', exact=False) - def time_cache_true_with_unique_seconds_and_unit(self): - to_datetime(self.unique_numeric_seconds, unit='s', cache=True) - - def time_cache_false_with_unique_seconds_and_unit(self): - to_datetime(self.unique_numeric_seconds, unit='s', cache=False) - def time_cache_true_with_dup_seconds_and_unit(self): - to_datetime(self.dup_numeric_seconds, unit='s', cache=True) +class ToDatetimeCache(object): - def time_cache_false_with_dup_seconds_and_unit(self): - to_datetime(self.dup_numeric_seconds, unit='s', cache=False) + goal_time = 0.2 + params = [True, False] + param_names = ['cache'] - def time_cache_true_with_dup_string_dates(self): - to_datetime(self.dup_string_dates, cache=True) + def setup(self, cache): + N = 10000 + self.unique_numeric_seconds = range(N) + self.dup_numeric_seconds = [1000] * N + self.dup_string_dates = ['2000-02-11'] * N + self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * N - def time_cache_false_with_dup_string_dates(self): - to_datetime(self.dup_string_dates, cache=False) + def time_unique_seconds_and_unit(self, cache): + to_datetime(self.unique_numeric_seconds, unit='s', cache=cache) - def time_cache_true_with_dup_string_dates_and_format(self): - to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=True) + def time_dup_seconds_and_unit(self, cache): + to_datetime(self.dup_numeric_seconds, unit='s', cache=cache) - def time_cache_false_with_dup_string_dates_and_format(self): - to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=False) + def time_dup_string_dates(self, cache): + to_datetime(self.dup_string_dates, cache=cache) - def time_cache_true_with_dup_string_tzoffset_dates(self): - to_datetime(self.dup_string_with_tz, cache=True) + def time_dup_string_dates_and_format(self, cache): + to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=cache) - def time_cache_false_with_dup_string_tzoffset_dates(self): - to_datetime(self.dup_string_with_tz, cache=False) + def time_dup_string_tzoffset_dates(self, cache): + to_datetime(self.dup_string_with_tz, cache=cache) class DatetimeAccessor(object): + def setup(self): - self.N = 100000 - self.series = pd.Series( - pd.date_range(start='1/1/2000', periods=self.N, freq='T') - ) + N = 100000 + self.series = Series(date_range(start='1/1/2000', periods=N, freq='T')) def time_dt_accessor(self): self.series.dt diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py index 62abaca17d22f..c142a9b59fc43 100644 --- a/asv_bench/benchmarks/timestamp.py +++ b/asv_bench/benchmarks/timestamp.py @@ -1,10 +1,10 @@ +import datetime + from pandas import Timestamp import pytz -import datetime class TimestampConstruction(object): - # TODO: classmethod constructors: fromordinal, fromtimestamp... def time_parse_iso8601_no_tz(self): Timestamp('2017-08-25 08:16:14') @@ -21,6 +21,12 @@ def time_parse_today(self): def time_parse_now(self): Timestamp('now') + def time_fromordinal(self): + Timestamp.fromordinal(730120) + + def time_fromtimestamp(self): + Timestamp.fromtimestamp(1515448538) + class TimestampProperties(object): goal_time = 0.2 @@ -36,9 +42,6 @@ def setup(self, tz, freq): def time_tz(self, tz, freq): self.ts.tz - def time_offset(self, tz, freq): - self.ts.offset - def time_dayofweek(self, tz, freq): self.ts.dayofweek diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 272e7f2e05d14..4ec5b0a9d8820 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -101,9 +101,6 @@ time conda create -n pandas --file=${REQ} || exit 1 source activate pandas -# https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 -python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)" - # may have addtl installation instructions for this build echo echo "[build addtl installs]" diff --git a/ci/lint.sh b/ci/lint.sh index b4eafcaf28e39..49bf9a690b990 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -24,12 +24,19 @@ if [ "$LINT" ]; then echo "Linting setup.py DONE" echo "Linting asv_bench/benchmarks/" - flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/[ps]*.py --ignore=F811 + flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py --ignore=F811 if [ $? -ne "0" ]; then RET=1 fi echo "Linting asv_bench/benchmarks/*.py DONE" + echo "Linting scripts/*.py" + flake8 scripts --filename=*.py + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Linting scripts/*.py DONE" + echo "Linting *.pyx" flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403 if [ $? -ne "0" ]; then @@ -89,8 +96,25 @@ if [ "$LINT" ]; then if [ $? = "0" ]; then RET=1 fi + + # Check for pytest.warns + grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ + + if [ $? = "0" ]; then + RET=1 + fi + echo "Check for invalid testing DONE" + # Check for imports from pandas.core.common instead + # of `import pandas.core.common as com` + echo "Check for non-standard imports" + grep -R --include="*.py*" -E "from pandas.core.common import " pandas + if [ $? = "0" ]; then + RET=1 + fi + echo "Check for non-standard imports DONE" + echo "Check for use of lists instead of generators in built-in Python functions" # Example: Avoid `any([i for i in some_iterator])` in favor of `any(i for i in some_iterator)` @@ -117,6 +141,14 @@ if [ "$LINT" ]; then fi done echo "Check for incorrect sphinx directives DONE" + + echo "Check for deprecated messages without sphinx directive" + grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas + + if [ $? = "0" ]; then + RET=1 + fi + echo "Check for deprecated messages without sphinx directive DONE" else echo "NOT Linting" fi diff --git a/ci/requirements-2.7.build b/ci/requirements-2.7.build index e24baa98d956e..17d34f3895c64 100644 --- a/ci/requirements-2.7.build +++ b/ci/requirements-2.7.build @@ -2,5 +2,5 @@ python=2.7* python-dateutil=2.5.0 pytz=2013b nomkl -numpy +numpy=1.13* cython=0.24 diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build index 1c4b46aea3865..94e1152450d87 100644 --- a/ci/requirements-3.6.build +++ b/ci/requirements-3.6.build @@ -2,5 +2,5 @@ python=3.6* python-dateutil pytz nomkl -numpy +numpy=1.13.* cython diff --git a/ci/requirements-3.6_DOC.build b/ci/requirements-3.6_DOC.build index bdcfe28105866..bc72eed2a0d4e 100644 --- a/ci/requirements-3.6_DOC.build +++ b/ci/requirements-3.6_DOC.build @@ -1,5 +1,5 @@ python=3.6* python-dateutil pytz -numpy +numpy=1.13* cython diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run index db2d429a2a4ff..3042888763863 100644 --- a/ci/requirements-3.6_WIN.run +++ b/ci/requirements-3.6_WIN.run @@ -12,5 +12,6 @@ numexpr pytables matplotlib blosc +thrift=0.10* fastparquet pyarrow diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 3510496f0b519..87a79f7e5a987 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -14,14 +14,14 @@ requirements: build: - python - cython - - {{ pin_compatible('numpy') }} + - {{ pin_compatible('numpy', upper_bound='1.14') }} - setuptools >=3.3 - python-dateutil >=2.5.0 - pytz run: - python - - {{ pin_compatible('numpy') }} + - {{ pin_compatible('numpy', upper_bound='1.14') }} - python-dateutil >=2.5.0 - pytz diff --git a/doc/source/api.rst b/doc/source/api.rst index 02f729c89295b..ddd09327935ce 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1617,7 +1617,6 @@ IntervalIndex Components IntervalIndex.from_arrays IntervalIndex.from_tuples IntervalIndex.from_breaks - IntervalIndex.from_intervals IntervalIndex.contains IntervalIndex.left IntervalIndex.right @@ -2498,6 +2497,20 @@ Scalar introspection api.types.is_re_compilable api.types.is_scalar +Extensions +---------- + +These are primarily intented for library authors looking to extend pandas +objects. + +.. currentmodule:: pandas + +.. autosummary:: + :toctree: generated/ + + api.extensions.register_dataframe_accessor + api.extensions.register_series_accessor + api.extensions.register_index_accessor .. This is to prevent warnings in the doc build. We don't want to encourage .. these methods. diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 55c26e2186344..18da53506f018 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -226,11 +226,11 @@ We can also do elementwise :func:`divmod`: Missing data / operations with fill values ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In Series and DataFrame, the arithmetic functions have the option of inputting -a *fill_value*, namely a value to substitute when at most one of the values at -a location are missing. For example, when adding two DataFrame objects, you may -wish to treat NaN as 0 unless both DataFrames are missing that value, in which -case the result will be NaN (you can later replace NaN with some other value +In Series and DataFrame, the arithmetic functions have the option of inputting +a *fill_value*, namely a value to substitute when at most one of the values at +a location are missing. For example, when adding two DataFrame objects, you may +wish to treat NaN as 0 unless both DataFrames are missing that value, in which +case the result will be NaN (you can later replace NaN with some other value using ``fillna`` if you wish). .. ipython:: python @@ -260,8 +260,8 @@ arithmetic operations described above: df.gt(df2) df2.ne(df) -These operations produce a pandas object of the same type as the left-hand-side -input that is of dtype ``bool``. These ``boolean`` objects can be used in +These operations produce a pandas object of the same type as the left-hand-side +input that is of dtype ``bool``. These ``boolean`` objects can be used in indexing operations, see the section on :ref:`Boolean indexing`. .. _basics.reductions: @@ -452,7 +452,7 @@ So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above: Descriptive statistics ---------------------- -There exists a large number of methods for computing descriptive statistics and +There exists a large number of methods for computing descriptive statistics and other related operations on :ref:`Series `, :ref:`DataFrame `, and :ref:`Panel `. Most of these are aggregations (hence producing a lower-dimensional result) like @@ -540,7 +540,7 @@ will exclude NAs on Series input by default: np.mean(df['one']) np.mean(df['one'].values) -:meth:`Series.nunique` will return the number of unique non-NA values in a +:meth:`Series.nunique` will return the number of unique non-NA values in a Series: .. ipython:: python @@ -852,7 +852,7 @@ Aggregation API The aggregation API allows one to express possibly multiple aggregation operations in a single concise way. This API is similar across pandas objects, see :ref:`groupby API `, the :ref:`window functions API `, and the :ref:`resample API `. -The entry point for aggregation is :meth:`DataFrame.aggregate`, or the alias +The entry point for aggregation is :meth:`DataFrame.aggregate`, or the alias :meth:`DataFrame.agg`. We will use a similar starting frame from above: @@ -864,8 +864,8 @@ We will use a similar starting frame from above: tsdf.iloc[3:7] = np.nan tsdf -Using a single function is equivalent to :meth:`~DataFrame.apply`. You can also -pass named methods as strings. These will return a ``Series`` of the aggregated +Using a single function is equivalent to :meth:`~DataFrame.apply`. You can also +pass named methods as strings. These will return a ``Series`` of the aggregated output: .. ipython:: python @@ -887,7 +887,7 @@ Single aggregations on a ``Series`` this will return a scalar value: Aggregating with multiple functions +++++++++++++++++++++++++++++++++++ -You can pass multiple aggregation arguments as a list. +You can pass multiple aggregation arguments as a list. The results of each of the passed functions will be a row in the resulting ``DataFrame``. These are naturally named from the aggregation function. @@ -1430,7 +1430,7 @@ Series can also be used: df.rename(columns={'one': 'foo', 'two': 'bar'}, index={'a': 'apple', 'b': 'banana', 'd': 'durian'}) -If the mapping doesn't include a column/index label, it isn't renamed. Note that +If the mapping doesn't include a column/index label, it isn't renamed. Note that extra labels in the mapping don't throw an error. .. versionadded:: 0.21.0 @@ -1740,19 +1740,26 @@ description. Sorting ------- -There are two obvious kinds of sorting that you may be interested in: sorting -by label and sorting by actual values. +Pandas supports three kinds of sorting: sorting by index labels, +sorting by column values, and sorting by a combination of both. + +.. _basics.sort_index: By Index ~~~~~~~~ -The primary method for sorting axis -labels (indexes) are the ``Series.sort_index()`` and the ``DataFrame.sort_index()`` methods. +The :meth:`Series.sort_index` and :meth:`DataFrame.sort_index` methods are +used to sort a pandas object by its index levels. .. ipython:: python + df = pd.DataFrame({'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']), + 'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']), + 'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])}) + unsorted_df = df.reindex(index=['a', 'd', 'c', 'b'], columns=['three', 'two', 'one']) + unsorted_df # DataFrame unsorted_df.sort_index() @@ -1762,20 +1769,22 @@ labels (indexes) are the ``Series.sort_index()`` and the ``DataFrame.sort_index( # Series unsorted_df['three'].sort_index() +.. _basics.sort_values: + By Values ~~~~~~~~~ -The :meth:`Series.sort_values` and :meth:`DataFrame.sort_values` are the entry points for **value** sorting (i.e. the values in a column or row). -:meth:`DataFrame.sort_values` can accept an optional ``by`` argument for ``axis=0`` -which will use an arbitrary vector or a column name of the DataFrame to -determine the sort order: +The :meth:`Series.sort_values` method is used to sort a `Series` by its values. The +:meth:`DataFrame.sort_values` method is used to sort a `DataFrame` by its column or row values. +The optional ``by`` parameter to :meth:`DataFrame.sort_values` may used to specify one or more columns +to use to determine the sorted order. .. ipython:: python df1 = pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]}) df1.sort_values(by='two') -The ``by`` argument can take a list of column names, e.g.: +The ``by`` parameter can take a list of column names, e.g.: .. ipython:: python @@ -1790,6 +1799,39 @@ argument: s.sort_values() s.sort_values(na_position='first') +.. _basics.sort_indexes_and_values: + +By Indexes and Values +~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.23.0 + +Strings passed as the ``by`` parameter to :meth:`DataFrame.sort_values` may +refer to either columns or index level names. + +.. ipython:: python + + # Build MultiIndex + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), + ('b', 2), ('b', 1), ('b', 1)]) + idx.names = ['first', 'second'] + + # Build DataFrame + df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, + index=idx) + df_multi + +Sort by 'second' (index) and 'A' (column) + +.. ipython:: python + + df_multi.sort_values(by=['second', 'A']) + +.. note:: + + If a string matches both a column name and an index level name then a + warning is issued and the column takes precedence. This will result in an + ambiguity error in a future version. .. _basics.searchsorted: @@ -1881,7 +1923,7 @@ The main types stored in pandas objects are ``float``, ``int``, ``bool``, ``int64`` and ``int32``. See :ref:`Series with TZ ` for more detail on ``datetime64[ns, tz]`` dtypes. -A convenient :attr:`~DataFrame.dtypes` attribute for DataFrame returns a Series +A convenient :attr:`~DataFrame.dtypes` attribute for DataFrame returns a Series with the data type of each column. .. ipython:: python @@ -1902,8 +1944,8 @@ On a ``Series`` object, use the :attr:`~Series.dtype` attribute. dft['A'].dtype -If a pandas object contains data with multiple dtypes *in a single column*, the -dtype of the column will be chosen to accommodate all of the data types +If a pandas object contains data with multiple dtypes *in a single column*, the +dtype of the column will be chosen to accommodate all of the data types (``object`` is the most general). .. ipython:: python @@ -1941,7 +1983,7 @@ defaults ~~~~~~~~ By default integer types are ``int64`` and float types are ``float64``, -*regardless* of platform (32-bit or 64-bit). +*regardless* of platform (32-bit or 64-bit). The following will all result in ``int64`` dtypes. .. ipython:: python diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 83437022563d5..258ab874cafcf 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -171,6 +171,9 @@ We'll now kick off a three-step process: # Create and activate the build environment conda env create -f ci/environment-dev.yaml conda activate pandas-dev + + # or with older versions of Anaconda: + source activate pandas-dev # Build and install pandas python setup.py build_ext --inplace -j 4 @@ -456,7 +459,7 @@ Here are *some* of the more common ``cpplint`` issues: - we restrict line-length to 80 characters to promote readability - every header file must include a header guard to avoid name collisions if re-included -:ref:`Continuous Integration `. will run the +:ref:`Continuous Integration ` will run the `cpplint `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself:: @@ -547,7 +550,30 @@ Backwards Compatibility Please try to maintain backward compatibility. *pandas* has lots of users with lots of existing code, so don't break it if at all possible. If you think breakage is required, clearly state why as part of the pull request. Also, be careful when changing method -signatures and add deprecation warnings where needed. +signatures and add deprecation warnings where needed. Also, add the deprecated sphinx +directive to the deprecated functions or methods. + +If a function with the same arguments as the one being deprecated exist, you can use +the ``pandas.util._decorators.deprecate``: + +.. code-block:: python + + from pandas.util._decorators import deprecate + + deprecate('old_func', 'new_func', '0.21.0') + +Otherwise, you need to do it manually: + +.. code-block:: python + + def old_func(): + """Summary of the function. + + .. deprecated:: 0.21.0 + Use new_func instead. + """ + warnings.warn('Use new_func instead.', FutureWarning, stacklevel=2) + new_func() .. _contributing.ci: @@ -835,9 +861,9 @@ takes a regular expression. For example, this will only run tests from a If you want to only run a specific group of tests from a file, you can do it using ``.`` as a separator. For example:: - asv continuous -f 1.1 upstream/master HEAD -b groupby.groupby_agg_builtins + asv continuous -f 1.1 upstream/master HEAD -b groupby.GroupByMethods -will only run the ``groupby_agg_builtins`` benchmark defined in ``groupby.py``. +will only run the ``GroupByMethods`` benchmark defined in ``groupby.py``. You can also run the benchmark suite using the version of ``pandas`` already installed in your current Python environment. This can be diff --git a/doc/source/developer.rst b/doc/source/developer.rst index b8bb2b2fcbe2f..0ef097da090f2 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -140,3 +140,46 @@ As an example of fully-formed metadata: 'metadata': None} ], 'pandas_version': '0.20.0'} + +.. _developer.register-accessors: + +Registering Custom Accessors +---------------------------- + +Libraries can use the decorators +:func:`pandas.api.extensions.register_dataframe_accessor`, +:func:`pandas.api.extensions.register_series_accessor`, and +:func:`pandas.api.extensions.register_index_accessor`, to add additional "namespaces" to +pandas objects. All of these follow a similar convention: you decorate a class, providing the name of attribute to add. The +class's `__init__` method gets the object being decorated. For example: + +.. code-block:: python + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Now users can access your methods using the `geo` namespace: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This can be a convenient way to extend pandas objects without subclassing them. +If you write a custom accessor, make a pull request adding it to our +:ref:`ecosystem` page. diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 4ebc8b82aaa47..750b260c7f228 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -640,6 +640,7 @@ For getting *multiple* indexers, using ``.get_indexer``: dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])] +.. _deprecate_loc_reindex_listlike: .. _indexing.deprecate_loc_reindex_listlike: Indexing with list with missing labels is Deprecated diff --git a/doc/source/internals.rst b/doc/source/internals.rst index a321b4202296f..ee4df879d9478 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -100,6 +100,8 @@ Subclassing pandas Data Structures 2. Use *composition*. See `here `_. + 3. Extending by :ref:`registering an accessor ` + This section describes how to subclass ``pandas`` data structures to meet more specific needs. There are 2 points which need attention: 1. Override constructor properties. diff --git a/doc/source/io.rst b/doc/source/io.rst index 2ef7e6d3b64f4..ae04996b4fddf 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -149,7 +149,7 @@ squeeze : boolean, default ``False`` prefix : str, default ``None`` Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... mangle_dupe_cols : boolean, default ``True`` - Duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X'. + Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'. Passing in False will cause data to be overwritten if there are duplicate names in the columns. @@ -214,8 +214,20 @@ na_values : scalar, str, list-like, or dict, default ``None`` for a list of the values interpreted as NaN by default. keep_default_na : boolean, default ``True`` - If na_values are specified and keep_default_na is ``False`` the default NaN - values are overridden, otherwise they're appended to. + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. na_filter : boolean, default ``True`` Detect missing value markers (empty strings and the value of na_values). In data without any NAs, passing ``na_filter=False`` can improve the performance @@ -548,7 +560,7 @@ these names so as to prevent data overwrite: pd.read_csv(StringIO(data)) There is no more duplicate data because ``mangle_dupe_cols=True`` by default, which modifies -a series of duplicate columns 'X'...'X' to become 'X.0'...'X.N'. If ``mangle_dupe_cols +a series of duplicate columns 'X'...'X' to become 'X', 'X.1',...'X.N'. If ``mangle_dupe_cols =False``, duplicate data can arise: .. code-block :: python @@ -1648,7 +1660,7 @@ with optional parameters: DataFrame - default is ``columns`` - - allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``} + - allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``} The format of the JSON string @@ -1732,6 +1744,9 @@ values, index and columns. Name is also included for ``Series``: dfjo.to_json(orient="split") sjo.to_json(orient="split") +**Table oriented** serializes to the JSON `Table Schema`_, allowing for the +preservation of metadata including but not limited to dtypes and index names. + .. note:: Any orient option that encodes to a JSON object will not preserve the ordering of @@ -1833,7 +1848,7 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` DataFrame - default is ``columns`` - - allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``} + - allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``} The format of the JSON string @@ -1846,6 +1861,8 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` ``index``; dict like {index -> {column -> value}} ``columns``; dict like {column -> {index -> value}} ``values``; just the values array + ``table``; adhering to the JSON `Table Schema`_ + - ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if False, then don't infer dtypes at all, default is True, apply only to the data - ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is True @@ -2202,7 +2219,40 @@ A few notes on the generated table schema: then ``level_`` is used. -_Table Schema: http://specs.frictionlessdata.io/json-table-schema/ +.. versionadded:: 0.23.0 + +``read_json`` also accepts ``orient='table'`` as an argument. This allows for +the preserveration of metadata such as dtypes and index names in a +round-trippable manner. + + .. ipython:: python + + df = pd.DataFrame({'foo': [1, 2, 3, 4], + 'bar': ['a', 'b', 'c', 'd'], + 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + 'qux': pd.Categorical(['a', 'b', 'c', 'c']) + }, index=pd.Index(range(4), name='idx')) + df + df.dtypes + + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes + +Please note that the literal string 'index' as the name of an :class:`Index` +is not round-trippable, nor are any names beginning with 'level_' within a +:class:`MultiIndex`. These are used by default in :func:`DataFrame.to_json` to +indicate missing values and the subsequent read cannot distinguish the intent. + +.. ipython:: python + + df.index.name = 'index' + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + print(new_df.index.name) + +.. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ HTML ---- diff --git a/doc/source/overview.rst b/doc/source/overview.rst index 4443428ca6c9b..f86b1c67e6843 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -94,7 +94,7 @@ pandas possible. Thanks to `all of our contributors `__. -pandas is a `NUMFocus `__ sponsored project. +pandas is a `NumFOCUS `__ sponsored project. This will help ensure the success of development of pandas as a world-class open-source project, and makes it possible to `donate `__ to the project. diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index d5ed0503d9ee3..6e5e113e859d7 100644 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -5,7 +5,7 @@ v0.17.1 (November 21, 2015) .. note:: - We are proud to announce that *pandas* has become a sponsored project of the (`NUMFocus organization`_). This will help ensure the success of development of *pandas* as a world-class open-source project. + We are proud to announce that *pandas* has become a sponsored project of the (`NumFOCUS organization`_). This will help ensure the success of development of *pandas* as a world-class open-source project. .. _numfocus organization: http://www.numfocus.org/blog/numfocus-announces-new-fiscally-sponsored-project-pandas diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a62a737fbba31..1890636bc8e1a 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -62,6 +62,32 @@ levels ` documentation section. left.merge(right, on=['key1', 'key2']) +.. _whatsnew_0230.enhancements.sort_by_columns_and_levels: + +Sorting by a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.sort_values` as the ``by`` parameter may +now refer to either column names or index level names. This enables sorting +``DataFrame`` instances by a combination of index levels and columns without +resetting indexes. See the :ref:`Sorting by Indexes and Values +` documentation section. +(:issue:`14353`) + +.. ipython:: python + + # Build MultiIndex + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), + ('b', 2), ('b', 1), ('b', 1)]) + idx.names = ['first', 'second'] + + # Build DataFrame + df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, + index=idx) + df_multi + + # Sort by 'second' (index) and 'A' (column) + df_multi.sort_values(by=['second', 'A']) .. _whatsnew_0230.enhancements.ran_inf: @@ -119,6 +145,37 @@ Current Behavior s.rank(na_option='top') +.. _whatsnew_0230.enhancements.round-trippable_json: + +JSON read/write round-trippable with ``orient='table'`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata. + +.. ipython:: python + + df = pd.DataFrame({'foo': [1, 2, 3, 4], + 'bar': ['a', 'b', 'c', 'd'], + 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + 'qux': pd.Categorical(['a', 'b', 'c', 'c']) + }, index=pd.Index(range(4), name='idx')) + df + df.dtypes + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes + +Please note that the string `index` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. + +.. ipython:: python + + df.index.name = 'index' + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + print(new_df.index.name) + .. _whatsnew_0230.enhancements.other: Other Enhancements @@ -145,6 +202,13 @@ Other Enhancements - ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). - :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- Added :func:`pandas.api.extensions.register_dataframe_accessor`, + :func:`pandas.api.extensions.register_series_accessor`, and + :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas + to register custom accessors like ``.cat`` on pandas objects. See + :ref:`Registering Custom Accessors ` for more (:issue:`14781`). +- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) +- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) .. _whatsnew_0230.api_breaking: @@ -167,6 +231,34 @@ If installed, we now require: | openpyxl | 2.4.0 | | +-----------------+-----------------+----------+ +.. _whatsnew_0230.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` was deprecated in the 0.20.x release, showing as a ``DeprecationWarning``. Using ``Panel`` will now show a ``FutureWarning``. The recommended way to represent 3-D data are +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion. For more details see :ref:`Deprecate Panel ` documentation. (:issue:`13563`, :issue:`18324`). + +.. ipython:: python + :okwarning: + + p = tm.makePanel() + p + +Convert to a MultiIndex DataFrame + +.. ipython:: python + + p.to_frame() + +Convert to an xarray DataArray + +.. ipython:: python + :okwarning: + + p.to_xarray() + Build Changes ^^^^^^^^^^^^^ @@ -213,6 +305,13 @@ Other API Changes - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) - :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) +- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) +- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) +- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) +- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) +- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) +- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) .. _whatsnew_0230.deprecations: @@ -229,6 +328,8 @@ Deprecations - ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`). - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). +- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) + .. _whatsnew_0230.prior_deprecations: @@ -256,6 +357,10 @@ Removal of prior version deprecations/changes - The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) - The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) - The ``Panel``class has dropped the ``to_long``and ``toLong`` methods (:issue:`19077`) +- The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) +- The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attribute:`Categorical.codes` (:issue:`7768`) +- The ``flavor`` parameter have been removed from func:`to_sql` method (:issue:`13611`) +- The modules `pandas.tools.hashing` and `pandas.util.hashing` have been removed (:issue:`16223`) .. _whatsnew_0230.performance: @@ -274,6 +379,9 @@ Performance Improvements - Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`) - Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) - :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) +- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) +- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) + .. _whatsnew_0230.docs: @@ -293,46 +401,87 @@ Bug Fixes ~~~~~~~~~ -Conversion -^^^^^^^^^^ +Datetimelike +^^^^^^^^^^^^ -- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) -- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) -- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`) +- Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) +- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) +- Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) +- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) - Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) -- Fixed a bug where ``FY5253`` date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) -- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) -- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) -- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) - Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`) -- Bug in :class:`WeekOfMonth` and class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`,:issue:`18672`,:issue:`18864`) -- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`) -- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) -- Bug in :class:`DatetimeIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) - Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) +- Bug in :class:`Series`` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) +- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (issue:`19042`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (issue:`19043`) +- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) +- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) +- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) +- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) +- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) + +Timezones +^^^^^^^^^ + +- Bug in creating a ``Series`` from an array that contains both tz-naive and tz-aware values will result in a ``Series`` whose dtype is tz-aware instead of object (:issue:`16406`) +- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`) +- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`) +- Bug in comparing :class:`DatetimeIndex`, which failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) +- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`) +- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) +- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) +- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) +- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) + +Offsets +^^^^^^^ + +- Bug in :class:`WeekOfMonth` and class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`,:issue:`18672`,:issue:`18864`) +- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) - Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) -- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) +- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) + + +Numeric +^^^^^^^ +- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) +- Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) +- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) +- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) + +- Indexing ^^^^^^^^ -- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) -- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) -- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) -- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) -- Bug in :func:`MultiIndex.from_tuples`` which would fail to take zipped tuples in python3 (:issue:`18434`) - Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) - Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) -- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) -- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) +- Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) - Bug in indexing a datetimelike ``Index`` that raised ``ValueError`` instead of ``IndexError`` (:issue:`18386`). -- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) - :func:`Index.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) - :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) - Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) -- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) - Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) +- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) +- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) +- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) +- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) + +MultiIndex +^^^^^^^^^^ + +- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) +- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) +- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) +- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) +- Bug in :func:`MultiIndex.from_tuples`` which would fail to take zipped tuples in python3 (:issue:`18434`) +- Bug in :func:`MultiIndex.get_loc`` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) +- Bug in :func:`MultiIndex.get_loc`` which would cast boolean to integer labels (:issue:`19086`) +- Bug in :func:`MultiIndex.get_loc`` which would fail to locate keys containing ``NaN`` (:issue:`18485`) +- Bug in :func:`MultiIndex.get_loc`` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) I/O @@ -341,9 +490,13 @@ I/O - :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) - Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) - Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) +- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) - Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) - Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) - Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) +- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) +- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) +- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for xls file type (:issue:`19242`, :issue:`9155`) - Plotting @@ -359,13 +512,15 @@ Groupby/Resample/Rolling - Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) - Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) -- +- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) +- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) +- Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) - Sparse ^^^^^^ -- +- Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) - - @@ -373,25 +528,29 @@ Reshaping ^^^^^^^^^ - Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) +- Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`) +- Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`) - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) - Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) - Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) - - -Numeric -^^^^^^^ - -- Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) -- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) -- Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) +- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) +- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) +- Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) - + Categorical ^^^^^^^^^^^ - -- +- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result + when all the categoricals had the same categories, but in a different order. + This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). +- Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`) +- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) +- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) - Other diff --git a/pandas/__init__.py b/pandas/__init__.py index 93c5b6484b840..78501620d780b 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -51,7 +51,7 @@ plot_params = pandas.plotting._style._Options(deprecated=True) # do not import deprecate to top namespace scatter_matrix = pandas.util._decorators.deprecate( - 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, + 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, '0.20.0', 'pandas.plotting.scatter_matrix') from pandas.util._print_versions import show_versions diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7b61cd22f45d1..5d17488963b1c 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1,20 +1,14 @@ # cython: profile=False -cimport numpy as np -import numpy as np - cimport cython from cython cimport Py_ssize_t -np.import_array() - -cdef float64_t FP_ERR = 1e-13 - -cimport util - from libc.stdlib cimport malloc, free from libc.string cimport memmove +from libc.math cimport fabs, sqrt +import numpy as np +cimport numpy as cnp from numpy cimport (ndarray, NPY_INT64, NPY_UINT64, NPY_INT32, NPY_INT16, NPY_INT8, NPY_FLOAT32, NPY_FLOAT64, @@ -22,18 +16,19 @@ from numpy cimport (ndarray, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t, double_t) +cnp.import_array() -cdef double NaN = np.NaN -cdef double nan = NaN - -from libc.math cimport fabs, sqrt - -# this is our util.pxd +cimport util from util cimport numeric, get_nat import missing +cdef float64_t FP_ERR = 1e-13 + +cdef double NaN = np.NaN +cdef double nan = NaN + cdef int64_t iNaT = get_nat() cdef: @@ -196,24 +191,6 @@ cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil: return a[k] -cpdef numeric median(numeric[:] arr): - """ - A faster median - """ - cdef Py_ssize_t n = arr.size - - if n == 0: - return np.NaN - - arr = arr.copy() - - if n % 2: - return kth_smallest(arr, n // 2) - else: - return (kth_smallest(arr, n // 2) + - kth_smallest(arr, n // 2 - 1)) / 2 - - # ---------------------------------------------------------------------- # Pairwise correlation/covariance diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 8ccc6e036da80..2f40bd4349a2e 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -50,7 +50,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, ndarray[float64_t] ranks ndarray[int64_t] argsorted - ndarray[np.uint8_t, cast=True] sorted_mask + ndarray[uint8_t, cast=True] sorted_mask {{if dtype == 'uint64'}} {{ctype}} val diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 9d9ac2ef2f5b1..9cc15fb6692d9 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -118,7 +118,7 @@ def group_last_object(ndarray[object, ndim=2] out, out[i, j] = resx[i, j] -cdef inline float64_t _median_linear(float64_t* a, int n) nogil: +cdef inline float64_t median_linear(float64_t* a, int n) nogil: cdef int i, j, na_count = 0 cdef float64_t result cdef float64_t* tmp diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 14d47398ac1df..a751fadaf48cf 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -740,7 +740,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, ptr += _counts[0] for j in range(ngroups): size = _counts[j + 1] - out[j, i] = _median_linear(ptr, size) + out[j, i] = median_linear(ptr, size) ptr += size diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index 014da22df3382..d735b3c0673b2 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -31,15 +31,6 @@ cdef class PyObjectHashTable(HashTable): cpdef get_item(self, object val) cpdef set_item(self, object key, Py_ssize_t val) -cdef class MultiIndexHashTable(HashTable): - cdef: - kh_uint64_t *table - object mi - - cpdef get_item(self, object val) - cpdef set_item(self, object key, Py_ssize_t val) - cdef inline void _check_for_collision(self, Py_ssize_t loc, object label) - cdef class StringHashTable(HashTable): cdef kh_str_t *table diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 72c2834b0bd57..07b4b80603e03 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -1,6 +1,22 @@ # cython: profile=False -from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check +cimport cython + +from cpython cimport (PyObject, Py_INCREF, PyList_Check, PyTuple_Check, + PyMem_Malloc, PyMem_Realloc, PyMem_Free, + PyString_Check, PyBytes_Check, + PyUnicode_Check) + +from libc.stdlib cimport malloc, free + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, uint8_t, uint32_t +cnp.import_array() + +cdef extern from "numpy/npy_math.h": + double NAN "NPY_NAN" + from khash cimport ( khiter_t, @@ -23,29 +39,13 @@ from khash cimport ( kh_put_pymap, kh_resize_pymap) -from numpy cimport ndarray, uint8_t, uint32_t - -from libc.stdlib cimport malloc, free -from cpython cimport (PyMem_Malloc, PyMem_Realloc, PyMem_Free, - PyString_Check, PyBytes_Check, - PyUnicode_Check) - from util cimport _checknan cimport util -import numpy as np -nan = np.nan - -cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" - -cimport cython -cimport numpy as cnp - from missing cimport checknull -cnp.import_array() -cnp.import_ufunc() + +nan = np.nan cdef int64_t iNaT = util.get_nat() _SIZE_HINT_LIMIT = (1 << 20) + 7 diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index bd9dd1f9bae37..bca4e388f3279 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -899,139 +899,3 @@ cdef class PyObjectHashTable(HashTable): count += 1 return np.asarray(labels) - - -cdef class MultiIndexHashTable(HashTable): - - def __init__(self, size_hint=1): - self.table = kh_init_uint64() - self.mi = None - kh_resize_uint64(self.table, size_hint) - - def __dealloc__(self): - if self.table is not NULL: - kh_destroy_uint64(self.table) - self.table = NULL - - def __len__(self): - return self.table.size - - def sizeof(self, deep=False): - """ return the size of my table in bytes """ - return self.table.n_buckets * (sizeof(uint64_t) + # keys - sizeof(size_t) + # vals - sizeof(uint32_t)) # flags - - def _check_for_collisions(self, int64_t[:] locs, object mi): - # validate that the locs map to the actual values - # provided in the mi - # we can only check if we *don't* have any missing values - # :< - cdef: - ndarray[int64_t] alocs - - alocs = np.asarray(locs) - if (alocs != -1).all(): - - result = self.mi.take(locs) - if isinstance(mi, tuple): - from pandas import Index - mi = Index([mi]) - if not result.equals(mi): - raise AssertionError( - "hash collision\nlocs:\n{}\n" - "result:\n{}\nmi:\n{}".format(alocs, result, mi)) - - cdef inline void _check_for_collision(self, Py_ssize_t loc, object label): - # validate that the loc maps to the actual value - # version of _check_for_collisions above for single label (tuple) - - result = self.mi[loc] - - if not all(l == r or (is_null_datetimelike(l) - and is_null_datetimelike(r)) - for l, r in zip(result, label)): - raise AssertionError( - "hash collision\nloc:\n{}\n" - "result:\n{}\nmi:\n{}".format(loc, result, label)) - - def __contains__(self, object key): - try: - self.get_item(key) - return True - except (KeyError, ValueError, TypeError): - return False - - cpdef get_item(self, object key): - cdef: - khiter_t k - uint64_t value - int64_t[:] locs - Py_ssize_t loc - - value = self.mi._hashed_indexing_key(key) - k = kh_get_uint64(self.table, value) - if k != self.table.n_buckets: - loc = self.table.vals[k] - self._check_for_collision(loc, key) - return loc - else: - raise KeyError(key) - - cpdef set_item(self, object key, Py_ssize_t val): - raise NotImplementedError - - @cython.boundscheck(False) - def map_locations(self, object mi): - cdef: - Py_ssize_t i, n - ndarray[uint64_t] values - uint64_t val - int ret = 0 - khiter_t k - - self.mi = mi - n = len(mi) - values = mi._hashed_values - - with nogil: - for i in range(n): - val = values[i] - k = kh_put_uint64(self.table, val, &ret) - self.table.vals[k] = i - - @cython.boundscheck(False) - def lookup(self, object mi): - # look up with a target mi - cdef: - Py_ssize_t i, n - ndarray[uint64_t] values - int ret = 0 - uint64_t val - khiter_t k - int64_t[:] locs - - n = len(mi) - values = mi._hashed_values - - locs = np.empty(n, dtype=np.int64) - - with nogil: - for i in range(n): - val = values[i] - k = kh_get_uint64(self.table, val) - if k != self.table.n_buckets: - locs[i] = self.table.vals[k] - else: - locs[i] = -1 - - self._check_for_collisions(locs, mi) - return np.asarray(locs) - - def unique(self, object mi): - raise NotImplementedError - - def get_labels(self, object mi, ObjectVector uniques, - Py_ssize_t count_prior, int64_t na_sentinel, - bint check_null=True): - raise NotImplementedError diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f8371d4855803..6b23e487aad3a 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1,17 +1,23 @@ # cython: profile=False +from datetime import datetime, timedelta, date -from numpy cimport (ndarray, float64_t, int32_t, int64_t, uint8_t, uint64_t, - NPY_DATETIME, NPY_TIMEDELTA) cimport cython -cimport numpy as cnp +from cpython cimport PyTuple_Check, PyList_Check +from cpython.slice cimport PySlice_Check +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, float64_t, int32_t, int64_t, uint8_t, uint64_t cnp.import_array() -cnp.import_ufunc() -cimport util +cdef extern from "numpy/arrayobject.h": + # These can be cimported directly from numpy in cython>=0.27.3 + cdef enum NPY_TYPES: + NPY_DATETIME + NPY_TIMEDELTA -import numpy as np +cimport util from tslibs.conversion cimport maybe_datetimelike_to_i8 @@ -20,15 +26,12 @@ from hashtable cimport HashTable from pandas._libs import algos, hashtable as _hash from pandas._libs.tslibs import period as periodlib from pandas._libs.tslib import Timestamp, Timedelta -from datetime import datetime, timedelta, date - -from cpython cimport PyTuple_Check, PyList_Check -from cpython.slice cimport PySlice_Check +from pandas._libs.missing import checknull cdef int64_t iNaT = util.get_nat() -cdef inline is_definitely_invalid_key(object val): +cdef inline bint is_definitely_invalid_key(object val): if PyTuple_Check(val): try: hash(val) @@ -73,10 +76,6 @@ cpdef object get_value_box(ndarray arr, object loc): return util.get_value_1d(arr, i) -def set_value_at(ndarray arr, object loc, object val): - return util.set_value_at(arr, loc, val) - - # Don't populate hash tables in monotonic indexes larger than this _SIZE_CUTOFF = 1000000 @@ -404,18 +403,6 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1: else: return mid + 1 -_pad_functions = { - 'object': algos.pad_object, - 'int64': algos.pad_int64, - 'float64': algos.pad_float64 -} - -_backfill_functions = { - 'object': algos.backfill_object, - 'int64': algos.backfill_int64, - 'float64': algos.backfill_float64 -} - cdef class DatetimeEngine(Int64Engine): @@ -566,7 +553,7 @@ cpdef convert_scalar(ndarray arr, object value): # we don't turn bools into int/float/complex if arr.descr.type_num == NPY_DATETIME: - if isinstance(value, np.ndarray): + if util.is_array(value): pass elif isinstance(value, (datetime, np.datetime64, date)): return Timestamp(value).value @@ -577,7 +564,7 @@ cpdef convert_scalar(ndarray arr, object value): raise ValueError("cannot set a Timestamp with a non-timestamp") elif arr.descr.type_num == NPY_TIMEDELTA: - if isinstance(value, np.ndarray): + if util.is_array(value): pass elif isinstance(value, timedelta): return Timedelta(value).value @@ -599,70 +586,137 @@ cpdef convert_scalar(ndarray arr, object value): return value -cdef class MultiIndexObjectEngine(ObjectEngine): +cdef class BaseMultiIndexCodesEngine: """ - provide the same interface as the MultiIndexEngine - but use the IndexEngine for computation - - This provides good performance with samller MI's + Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which + represent each label in a MultiIndex as an integer, by juxtaposing the bits + encoding each level, with appropriate offsets. + + For instance: if 3 levels have respectively 3, 6 and 1 possible values, + then their labels can be represented using respectively 2, 3 and 1 bits, + as follows: + _ _ _ _____ _ __ __ __ + |0|0|0| ... |0| 0|a1|a0| -> offset 0 (first level) + — — — ————— — —— —— —— + |0|0|0| ... |0|b2|b1|b0| -> offset 2 (bits required for first level) + — — — ————— — —— —— —— + |0|0|0| ... |0| 0| 0|c0| -> offset 5 (bits required for first two levels) + ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ + and the resulting unsigned integer representation will be: + _ _ _ _____ _ __ __ __ __ __ __ + |0|0|0| ... |0|c0|b2|b1|b0|a1|a0| + ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ + + Offsets are calculated at initialization, labels are transformed by method + _codes_to_ints. + + Keys are located by first locating each component against the respective + level, then locating (the integer representation of) codes. """ - def get_indexer(self, values): - # convert a MI to an ndarray - if hasattr(values, 'values'): - values = values.values - return super(MultiIndexObjectEngine, self).get_indexer(values) + def __init__(self, object levels, object labels, + ndarray[uint64_t, ndim=1] offsets): + """ + Parameters + ---------- + levels : list-like of numpy arrays + Levels of the MultiIndex + labels : list-like of numpy arrays of integer dtype + Labels of the MultiIndex + offsets : numpy array of uint64 dtype + Pre-calculated offsets, one for each level of the index + """ - cpdef get_loc(self, object val): + self.levels = levels + self.offsets = offsets - # convert a MI to an ndarray - if hasattr(val, 'values'): - val = val.values - return super(MultiIndexObjectEngine, self).get_loc(val) + # Transform labels in a single array, and add 1 so that we are working + # with positive integers (-1 for NaN becomes 0): + codes = (np.array(labels, dtype='int64').T + 1).astype('uint64', + copy=False) + # Map each codes combination in the index to an integer unambiguously + # (no collisions possible), based on the "offsets", which describe the + # number of bits to switch labels for each level: + lab_ints = self._codes_to_ints(codes) -cdef class MultiIndexHashEngine(ObjectEngine): - """ - Use a hashing based MultiIndex impl - but use the IndexEngine for computation + # Initialize underlying index (e.g. libindex.UInt64Engine) with + # integers representing labels: we will use its get_loc and get_indexer + self._base.__init__(self, lambda: lab_ints, len(lab_ints)) - This provides good performance with larger MI's - """ + def _extract_level_codes(self, object target, object method=None): + """ + Map the requested list of (tuple) keys to their integer representations + for searching in the underlying integer index. + + Parameters + ---------- + target : list-like of keys + Each key is a tuple, with a label for each level of the index. + + Returns + ------ + int_keys : 1-dimensional array of dtype uint64 or object + Integers representing one combination each + """ - def _call_monotonic(self, object mi): - # defer these back to the mi iteself - return (mi.is_monotonic_increasing, - mi.is_monotonic_decreasing, - mi.is_unique) + level_codes = [lev.get_indexer(codes) + 1 for lev, codes + in zip(self.levels, zip(*target))] + return self._codes_to_ints(np.array(level_codes, dtype='uint64').T) + + def get_indexer(self, object target, object method=None, + object limit=None): + lab_ints = self._extract_level_codes(target) + + # All methods (exact, backfill, pad) directly map to the respective + # methods of the underlying (integers) index... + if method is not None: + # but underlying backfill and pad methods require index and keys + # to be sorted. The index already is (checked in + # Index._get_fill_indexer), sort (integer representations of) keys: + order = np.argsort(lab_ints) + lab_ints = lab_ints[order] + indexer = (getattr(self._base, 'get_{}_indexer'.format(method)) + (self, lab_ints, limit=limit)) + indexer = indexer[order] + else: + indexer = self._base.get_indexer(self, lab_ints) - def get_backfill_indexer(self, other, limit=None): - # we coerce to ndarray-of-tuples - values = np.array(self._get_index_values()) - return algos.backfill_object(values, other, limit=limit) + return indexer - def get_pad_indexer(self, other, limit=None): - # we coerce to ndarray-of-tuples - values = np.array(self._get_index_values()) - return algos.pad_object(values, other, limit=limit) + def get_loc(self, object key): + if is_definitely_invalid_key(key): + raise TypeError("'{key}' is an invalid key".format(key=key)) + if not PyTuple_Check(key): + raise KeyError(key) + try: + indices = [0 if checknull(v) else lev.get_loc(v) + 1 + for lev, v in zip(self.levels, key)] + except KeyError: + raise KeyError(key) - cpdef get_loc(self, object val): - if is_definitely_invalid_key(val): - raise TypeError("'{val}' is an invalid key".format(val=val)) + # Transform indices into single integer: + lab_int = self._codes_to_ints(np.array(indices, dtype='uint64')) - self._ensure_mapping_populated() - if not self.unique: - return self._get_loc_duplicates(val) + return self._base.get_loc(self, lab_int) - try: - return self.mapping.get_item(val) - except TypeError: - raise KeyError(val) + def get_indexer_non_unique(self, object target): + # This needs to be overridden just because the default one works on + # target._values, and target can be itself a MultiIndex. - def get_indexer(self, values): - self._ensure_mapping_populated() - return self.mapping.lookup(values) + lab_ints = self._extract_level_codes(target) + indexer = self._base.get_indexer_non_unique(self, lab_ints) + + return indexer + + def __contains__(self, object val): + # Default __contains__ looks in the underlying mapping, which in this + # case only contains integer representations. + try: + self.get_loc(val) + return True + except (KeyError, TypeError, ValueError): + return False - cdef _make_hash_table(self, n): - return _hash.MultiIndexHashTable(n) # Generated from template. include "index_class_helper.pxi" diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx new file mode 100644 index 0000000000000..b46a05a0842c3 --- /dev/null +++ b/pandas/_libs/internals.pyx @@ -0,0 +1,438 @@ +# -*- coding: utf-8 -*- + +cimport cython +from cython cimport Py_ssize_t + +from cpython cimport PyObject +from cpython.slice cimport PySlice_Check + +cdef extern from "Python.h": + Py_ssize_t PY_SSIZE_T_MAX + +import numpy as np +from numpy cimport int64_t + +cdef extern from "compat_helper.h": + cdef int slice_get_indices(PyObject* s, Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, + Py_ssize_t *step, + Py_ssize_t *slicelength) except -1 + + +cdef class BlockPlacement: + # __slots__ = '_as_slice', '_as_array', '_len' + cdef slice _as_slice + cdef object _as_array + + cdef bint _has_slice, _has_array, _is_known_slice_like + + def __init__(self, val): + cdef slice slc + + self._has_slice = False + self._has_array = False + + if PySlice_Check(val): + slc = slice_canonize(val) + + if slc.start != slc.stop: + self._as_slice = slc + self._has_slice = True + else: + arr = np.empty(0, dtype=np.int64) + self._as_array = arr + self._has_array = True + else: + # Cython memoryview interface requires ndarray to be writeable. + arr = np.require(val, dtype=np.int64, requirements='W') + assert arr.ndim == 1 + self._as_array = arr + self._has_array = True + + def __str__(self): + cdef slice s = self._ensure_has_slice() + if s is not None: + v = self._as_slice + else: + v = self._as_array + + return '%s(%r)' % (self.__class__.__name__, v) + + __repr__ = __str__ + + def __len__(self): + cdef slice s = self._ensure_has_slice() + if s is not None: + return slice_len(s) + else: + return len(self._as_array) + + def __iter__(self): + cdef slice s = self._ensure_has_slice() + cdef Py_ssize_t start, stop, step, _ + if s is not None: + start, stop, step, _ = slice_get_indices_ex(s) + return iter(range(start, stop, step)) + else: + return iter(self._as_array) + + @property + def as_slice(self): + cdef slice s = self._ensure_has_slice() + if s is None: + raise TypeError('Not slice-like') + else: + return s + + @property + def indexer(self): + cdef slice s = self._ensure_has_slice() + if s is not None: + return s + else: + return self._as_array + + def isin(self, arr): + from pandas.core.index import Int64Index + return Int64Index(self.as_array, copy=False).isin(arr) + + @property + def as_array(self): + cdef Py_ssize_t start, stop, end, _ + if not self._has_array: + start, stop, step, _ = slice_get_indices_ex(self._as_slice) + self._as_array = np.arange(start, stop, step, + dtype=np.int64) + self._has_array = True + return self._as_array + + @property + def is_slice_like(self): + cdef slice s = self._ensure_has_slice() + return s is not None + + def __getitem__(self, loc): + cdef slice s = self._ensure_has_slice() + if s is not None: + val = slice_getitem(s, loc) + else: + val = self._as_array[loc] + + if not PySlice_Check(val) and val.ndim == 0: + return val + + return BlockPlacement(val) + + def delete(self, loc): + return BlockPlacement(np.delete(self.as_array, loc, axis=0)) + + def append(self, others): + if len(others) == 0: + return self + + return BlockPlacement(np.concatenate([self.as_array] + + [o.as_array for o in others])) + + cdef iadd(self, other): + cdef slice s = self._ensure_has_slice() + cdef Py_ssize_t other_int, start, stop, step, l + + if isinstance(other, int) and s is not None: + other_int = other + + if other_int == 0: + return self + + start, stop, step, l = slice_get_indices_ex(s) + start += other_int + stop += other_int + + if ((step > 0 and start < 0) or + (step < 0 and stop < step)): + raise ValueError("iadd causes length change") + + if stop < 0: + self._as_slice = slice(start, None, step) + else: + self._as_slice = slice(start, stop, step) + + self._has_array = False + self._as_array = None + else: + newarr = self.as_array + other + if (newarr < 0).any(): + raise ValueError("iadd causes length change") + + self._as_array = newarr + self._has_array = True + self._has_slice = False + self._as_slice = None + + return self + + cdef BlockPlacement copy(self): + cdef slice s = self._ensure_has_slice() + if s is not None: + return BlockPlacement(s) + else: + return BlockPlacement(self._as_array) + + def add(self, other): + return self.copy().iadd(other) + + def sub(self, other): + return self.add(-other) + + cdef slice _ensure_has_slice(self): + if not self._has_slice: + self._as_slice = indexer_as_slice(self._as_array) + self._has_slice = True + return self._as_slice + + +cpdef slice_canonize(slice s): + """ + Convert slice to canonical bounded form. + """ + cdef: + Py_ssize_t start = 0, stop = 0, step = 1, length + + if s.step is None: + step = 1 + else: + step = s.step + if step == 0: + raise ValueError("slice step cannot be zero") + + if step > 0: + if s.stop is None: + raise ValueError("unbounded slice") + + stop = s.stop + if s.start is None: + start = 0 + else: + start = s.start + if start > stop: + start = stop + elif step < 0: + if s.start is None: + raise ValueError("unbounded slice") + + start = s.start + if s.stop is None: + stop = -1 + else: + stop = s.stop + if stop > start: + stop = start + + if start < 0 or (stop < 0 and s.stop is not None): + raise ValueError("unbounded slice") + + if stop < 0: + return slice(start, None, step) + else: + return slice(start, stop, step) + + +cpdef Py_ssize_t slice_len( + slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: + """ + Get length of a bounded slice. + + The slice must not have any "open" bounds that would create dependency on + container size, i.e.: + - if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None`` + - if ``s.step < 0``, ``s.start`` is not ``None`` + + Otherwise, the result is unreliable. + + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc must be slice") + + slice_get_indices(slc, objlen, + &start, &stop, &step, &length) + + return length + + +cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): + """ + Get (start, stop, step, length) tuple for a slice. + + If `objlen` is not specified, slice must be bounded, otherwise the result + will be wrong. + + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc should be a slice") + + slice_get_indices(slc, objlen, + &start, &stop, &step, &length) + + return start, stop, step, length + + +def slice_getitem(slice slc not None, ind): + cdef: + Py_ssize_t s_start, s_stop, s_step, s_len + Py_ssize_t ind_start, ind_stop, ind_step, ind_len + + s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) + + if PySlice_Check(ind): + ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, + s_len) + + if ind_step > 0 and ind_len == s_len: + # short-cut for no-op slice + if ind_len == s_len: + return slc + + if ind_step < 0: + s_start = s_stop - s_step + ind_step = -ind_step + + s_step *= ind_step + s_stop = s_start + ind_stop * s_step + s_start = s_start + ind_start * s_step + + if s_step < 0 and s_stop < 0: + return slice(s_start, None, s_step) + else: + return slice(s_start, s_stop, s_step) + + else: + return np.arange(s_start, s_stop, s_step, dtype=np.int64)[ind] + + +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef slice indexer_as_slice(int64_t[:] vals): + cdef: + Py_ssize_t i, n, start, stop + int64_t d + + if vals is None: + raise TypeError("vals must be ndarray") + + n = vals.shape[0] + + if n == 0 or vals[0] < 0: + return None + + if n == 1: + return slice(vals[0], vals[0] + 1, 1) + + if vals[1] < 0: + return None + + # n > 2 + d = vals[1] - vals[0] + + if d == 0: + return None + + for i in range(2, n): + if vals[i] < 0 or vals[i] - vals[i - 1] != d: + return None + + start = vals[0] + stop = start + n * d + if stop < 0 and d < 0: + return slice(start, None, d) + else: + return slice(start, stop, d) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_blkno_indexers(int64_t[:] blknos, bint group=True): + """ + Enumerate contiguous runs of integers in ndarray. + + Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))`` + pairs for each contiguous run found. + + If `group` is True and there is more than one run for a certain blkno, + ``(blkno, array)`` with an array containing positions of all elements equal + to blkno. + + Returns + ------- + iter : iterator of (int, slice or array) + + """ + # There's blkno in this function's name because it's used in block & + # blockno handling. + cdef: + int64_t cur_blkno + Py_ssize_t i, start, stop, n, diff + + object blkno + list group_order + dict group_slices + int64_t[:] res_view + + n = blknos.shape[0] + + if n == 0: + return + + start = 0 + cur_blkno = blknos[start] + + if group == False: + for i in range(1, n): + if blknos[i] != cur_blkno: + yield cur_blkno, slice(start, i) + + start = i + cur_blkno = blknos[i] + + yield cur_blkno, slice(start, n) + else: + group_order = [] + group_dict = {} + + for i in range(1, n): + if blknos[i] != cur_blkno: + if cur_blkno not in group_dict: + group_order.append(cur_blkno) + group_dict[cur_blkno] = [(start, i)] + else: + group_dict[cur_blkno].append((start, i)) + + start = i + cur_blkno = blknos[i] + + if cur_blkno not in group_dict: + group_order.append(cur_blkno) + group_dict[cur_blkno] = [(start, n)] + else: + group_dict[cur_blkno].append((start, n)) + + for blkno in group_order: + slices = group_dict[blkno] + if len(slices) == 1: + yield blkno, slice(slices[0][0], slices[0][1]) + else: + tot_len = sum(stop - start for start, stop in slices) + result = np.empty(tot_len, dtype=np.int64) + res_view = result + + i = 0 + for start, stop in slices: + for diff in range(start, stop): + res_view[i] = diff + i += 1 + + yield blkno, result \ No newline at end of file diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index f1da60057186c..c0b2ca66e30a6 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -1,4 +1,4 @@ -cimport numpy as np +cimport numpy as cnp import numpy as np cimport util @@ -6,7 +6,7 @@ cimport cython import cython from numpy cimport ndarray from tslib import Timestamp -from tslibs.timezones cimport get_timezone +from tslibs.timezones cimport tz_compare from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, PyObject_RichCompare) @@ -109,6 +109,7 @@ cdef class Interval(IntervalMixin): cut, qcut : Convert arrays of continuous data into Categoricals/Series of Interval. """ + _typ = "interval" cdef readonly object left """Left bound for the interval""" @@ -131,7 +132,7 @@ cdef class Interval(IntervalMixin): if not left <= right: raise ValueError('left side of interval must be <= right side') if (isinstance(left, Timestamp) and - get_timezone(left.tzinfo) != get_timezone(right.tzinfo)): + not tz_compare(left.tzinfo, right.tzinfo)): # GH 18538 msg = ("left and right must have the same time zone, got " "'{left_tz}' and '{right_tz}'") diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 344c5d25d0c3d..27d2a639d13e6 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -1,16 +1,15 @@ # cython: profile=False -cimport numpy as np -import numpy as np - cimport cython from cython cimport Py_ssize_t -np.import_array() - +import numpy as np +cimport numpy as cnp from numpy cimport (ndarray, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) +cnp.import_array() + cdef double NaN = np.NaN cdef double nan = NaN diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f6c70027ae6f1..c3a654b01022c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -5,19 +5,15 @@ cimport cython from cython cimport Py_ssize_t import numpy as np -cimport numpy as np -from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, PyArray_SETITEM, +cimport numpy as cnp +from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, flatiter, NPY_OBJECT, int64_t, float32_t, float64_t, uint8_t, uint64_t, complex128_t) -# initialize numpy -np.import_array() -np.import_ufunc() - -from libc.stdlib cimport malloc, free +cnp.import_array() from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyList_Check, PyFloat_Check, @@ -27,39 +23,22 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyTuple_New, PyObject_RichCompareBool, PyBytes_GET_SIZE, - PyUnicode_GET_SIZE, - PyObject) + PyUnicode_GET_SIZE) try: from cpython cimport PyString_GET_SIZE except ImportError: from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE -cdef extern from "Python.h": - Py_ssize_t PY_SSIZE_T_MAX - -cdef extern from "compat_helper.h": - - cdef int slice_get_indices( - PyObject* s, Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, - Py_ssize_t *slicelength) except -1 - cimport cpython -isnan = np.isnan -cdef double NaN = np.NaN -cdef double nan = NaN from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, PyDateTime_IMPORT) PyDateTime_IMPORT -from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value - from tslib import NaT, Timestamp, Timedelta, array_to_datetime -from interval import Interval from missing cimport checknull @@ -67,8 +46,6 @@ cimport util cdef int64_t NPY_NAT = util.get_nat() from util cimport is_array, _checknull -from libc.math cimport fabs, sqrt - def values_from_object(object o): """ return my values or the object if we are say an ndarray """ @@ -116,7 +93,7 @@ cpdef bint is_scalar(object val): """ - return (np.PyArray_IsAnyScalar(val) + return (cnp.PyArray_IsAnyScalar(val) # As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3. or PyBytes_Check(val) # We differ from numpy (as of 1.10), which claims that None is @@ -731,7 +708,7 @@ def clean_index_list(list obj): for i in range(n): v = obj[i] - if not (PyList_Check(v) or np.PyArray_Check(v) or hasattr(v, '_data')): + if not (PyList_Check(v) or util.is_array(v) or hasattr(v, '_data')): all_arrays = 0 break @@ -896,38 +873,6 @@ def write_csv_rows(list data, ndarray data_index, # ------------------------------------------------------------------------------ # Groupby-related functions -@cython.wraparound(False) -@cython.boundscheck(False) -def is_lexsorted(list list_of_arrays): - cdef: - int i - Py_ssize_t n, nlevels - int64_t k, cur, pre - ndarray arr - - nlevels = len(list_of_arrays) - n = len(list_of_arrays[0]) - - cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) - for i from 0 <= i < nlevels: - arr = list_of_arrays[i] - vecs[i] = arr.data - - # Assume uniqueness?? - for i from 1 <= i < n: - for k from 0 <= k < nlevels: - cur = vecs[k][i] - pre = vecs[k][i - 1] - if cur == pre: - continue - elif cur > pre: - break - else: - return False - free(vecs) - return True - - # TODO: could do even better if we know something about the data. eg, index has # 1-min data, binner has 5-min data, then bins are just strides in index. This # is a general, O(max(len(values), len(binner))) method. @@ -1161,424 +1106,4 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, return result -@cython.boundscheck(False) -@cython.wraparound(False) -def get_blkno_indexers(int64_t[:] blknos, bint group=True): - """ - Enumerate contiguous runs of integers in ndarray. - - Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))`` - pairs for each contiguous run found. - - If `group` is True and there is more than one run for a certain blkno, - ``(blkno, array)`` with an array containing positions of all elements equal - to blkno. - - Returns - ------- - iter : iterator of (int, slice or array) - - """ - # There's blkno in this function's name because it's used in block & - # blockno handling. - cdef: - int64_t cur_blkno - Py_ssize_t i, start, stop, n, diff - - object blkno - list group_order - dict group_slices - int64_t[:] res_view - - n = blknos.shape[0] - - if n == 0: - return - - start = 0 - cur_blkno = blknos[start] - - if group == False: - for i in range(1, n): - if blknos[i] != cur_blkno: - yield cur_blkno, slice(start, i) - - start = i - cur_blkno = blknos[i] - - yield cur_blkno, slice(start, n) - else: - group_order = [] - group_dict = {} - - for i in range(1, n): - if blknos[i] != cur_blkno: - if cur_blkno not in group_dict: - group_order.append(cur_blkno) - group_dict[cur_blkno] = [(start, i)] - else: - group_dict[cur_blkno].append((start, i)) - - start = i - cur_blkno = blknos[i] - - if cur_blkno not in group_dict: - group_order.append(cur_blkno) - group_dict[cur_blkno] = [(start, n)] - else: - group_dict[cur_blkno].append((start, n)) - - for blkno in group_order: - slices = group_dict[blkno] - if len(slices) == 1: - yield blkno, slice(slices[0][0], slices[0][1]) - else: - tot_len = sum(stop - start for start, stop in slices) - result = np.empty(tot_len, dtype=np.int64) - res_view = result - - i = 0 - for start, stop in slices: - for diff in range(start, stop): - res_view[i] = diff - i += 1 - - yield blkno, result - - -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef slice indexer_as_slice(int64_t[:] vals): - cdef: - Py_ssize_t i, n, start, stop - int64_t d - - if vals is None: - raise TypeError("vals must be ndarray") - - n = vals.shape[0] - - if n == 0 or vals[0] < 0: - return None - - if n == 1: - return slice(vals[0], vals[0] + 1, 1) - - if vals[1] < 0: - return None - - # n > 2 - d = vals[1] - vals[0] - - if d == 0: - return None - - for i in range(2, n): - if vals[i] < 0 or vals[i] - vals[i - 1] != d: - return None - - start = vals[0] - stop = start + n * d - if stop < 0 and d < 0: - return slice(start, None, d) - else: - return slice(start, stop, d) - - -cpdef slice_canonize(slice s): - """ - Convert slice to canonical bounded form. - """ - cdef: - Py_ssize_t start = 0, stop = 0, step = 1, length - - if s.step is None: - step = 1 - else: - step = s.step - if step == 0: - raise ValueError("slice step cannot be zero") - - if step > 0: - if s.stop is None: - raise ValueError("unbounded slice") - - stop = s.stop - if s.start is None: - start = 0 - else: - start = s.start - if start > stop: - start = stop - elif step < 0: - if s.start is None: - raise ValueError("unbounded slice") - - start = s.start - if s.stop is None: - stop = -1 - else: - stop = s.stop - if stop > start: - stop = start - - if start < 0 or (stop < 0 and s.stop is not None): - raise ValueError("unbounded slice") - - if stop < 0: - return slice(start, None, step) - else: - return slice(start, stop, step) - - -cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): - """ - Get (start, stop, step, length) tuple for a slice. - - If `objlen` is not specified, slice must be bounded, otherwise the result - will be wrong. - - """ - cdef: - Py_ssize_t start, stop, step, length - - if slc is None: - raise TypeError("slc should be a slice") - - slice_get_indices(slc, objlen, - &start, &stop, &step, &length) - - return start, stop, step, length - - -cpdef Py_ssize_t slice_len( - slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: - """ - Get length of a bounded slice. - - The slice must not have any "open" bounds that would create dependency on - container size, i.e.: - - if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None`` - - if ``s.step < 0``, ``s.start`` is not ``None`` - - Otherwise, the result is unreliable. - - """ - cdef: - Py_ssize_t start, stop, step, length - - if slc is None: - raise TypeError("slc must be slice") - - slice_get_indices(slc, objlen, - &start, &stop, &step, &length) - - return length - - -def slice_getitem(slice slc not None, ind): - cdef: - Py_ssize_t s_start, s_stop, s_step, s_len - Py_ssize_t ind_start, ind_stop, ind_step, ind_len - - s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) - - if isinstance(ind, slice): - ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, - s_len) - - if ind_step > 0 and ind_len == s_len: - # short-cut for no-op slice - if ind_len == s_len: - return slc - - if ind_step < 0: - s_start = s_stop - s_step - ind_step = -ind_step - - s_step *= ind_step - s_stop = s_start + ind_stop * s_step - s_start = s_start + ind_start * s_step - - if s_step < 0 and s_stop < 0: - return slice(s_start, None, s_step) - else: - return slice(s_start, s_stop, s_step) - - else: - return np.arange(s_start, s_stop, s_step, dtype=np.int64)[ind] - - -cdef class BlockPlacement: - # __slots__ = '_as_slice', '_as_array', '_len' - cdef slice _as_slice - cdef object _as_array - - cdef bint _has_slice, _has_array, _is_known_slice_like - - def __init__(self, val): - cdef slice slc - - self._has_slice = False - self._has_array = False - - if isinstance(val, slice): - slc = slice_canonize(val) - - if slc.start != slc.stop: - self._as_slice = slc - self._has_slice = True - else: - arr = np.empty(0, dtype=np.int64) - self._as_array = arr - self._has_array = True - else: - # Cython memoryview interface requires ndarray to be writeable. - arr = np.require(val, dtype=np.int64, requirements='W') - assert arr.ndim == 1 - self._as_array = arr - self._has_array = True - - def __str__(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - v = self._as_slice - else: - v = self._as_array - - return '%s(%r)' % (self.__class__.__name__, v) - - __repr__ = __str__ - - def __len__(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return slice_len(s) - else: - return len(self._as_array) - - def __iter__(self): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t start, stop, step, _ - if s is not None: - start, stop, step, _ = slice_get_indices_ex(s) - return iter(range(start, stop, step)) - else: - return iter(self._as_array) - - @property - def as_slice(self): - cdef slice s = self._ensure_has_slice() - if s is None: - raise TypeError('Not slice-like') - else: - return s - - @property - def indexer(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return s - else: - return self._as_array - - def isin(self, arr): - from pandas.core.index import Int64Index - return Int64Index(self.as_array, copy=False).isin(arr) - - @property - def as_array(self): - cdef Py_ssize_t start, stop, end, _ - if not self._has_array: - start, stop, step, _ = slice_get_indices_ex(self._as_slice) - self._as_array = np.arange(start, stop, step, - dtype=np.int64) - self._has_array = True - return self._as_array - - @property - def is_slice_like(self): - cdef slice s = self._ensure_has_slice() - return s is not None - - def __getitem__(self, loc): - cdef slice s = self._ensure_has_slice() - if s is not None: - val = slice_getitem(s, loc) - else: - val = self._as_array[loc] - - if not isinstance(val, slice) and val.ndim == 0: - return val - - return BlockPlacement(val) - - def delete(self, loc): - return BlockPlacement(np.delete(self.as_array, loc, axis=0)) - - def append(self, others): - if len(others) == 0: - return self - - return BlockPlacement(np.concatenate([self.as_array] + - [o.as_array for o in others])) - - cdef iadd(self, other): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t other_int, start, stop, step, l - - if isinstance(other, int) and s is not None: - other_int = other - - if other_int == 0: - return self - - start, stop, step, l = slice_get_indices_ex(s) - start += other_int - stop += other_int - - if ((step > 0 and start < 0) or - (step < 0 and stop < step)): - raise ValueError("iadd causes length change") - - if stop < 0: - self._as_slice = slice(start, None, step) - else: - self._as_slice = slice(start, stop, step) - - self._has_array = False - self._as_array = None - else: - newarr = self.as_array + other - if (newarr < 0).any(): - raise ValueError("iadd causes length change") - - self._as_array = newarr - self._has_array = True - self._has_slice = False - self._as_slice = None - - return self - - cdef BlockPlacement copy(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return BlockPlacement(s) - else: - return BlockPlacement(self._as_array) - - def add(self, other): - return self.copy().iadd(other) - - def sub(self, other): - return self.add(-other) - - cdef slice _ensure_has_slice(self): - if not self._has_slice: - self._as_slice = indexer_as_slice(self._as_array) - self._has_slice = True - return self._as_slice - - -include "reduce.pyx" include "inference.pyx" diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 0b60fc2c5b4d1..dfd044131afb4 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -7,9 +7,9 @@ cimport cython from cython cimport Py_ssize_t import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport ndarray, int64_t, uint8_t -np.import_array() +cnp.import_array() cimport util diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index cf63b5083885e..efe61716d0831 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -7,9 +7,8 @@ import warnings from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE -from libc.stdio cimport fopen, fclose -from libc.stdlib cimport malloc, free -from libc.string cimport strncpy, strlen, strcmp, strcasecmp +from libc.stdlib cimport free +from libc.string cimport strncpy, strlen, strcasecmp cimport cython from cython cimport Py_ssize_t @@ -27,9 +26,6 @@ cdef extern from "Python.h": object PyUnicode_Decode(char *v, Py_ssize_t size, char *encoding, char *errors) -cdef extern from "stdlib.h": - void memcpy(void *dst, void *src, size_t n) - import numpy as np cimport numpy as cnp @@ -50,12 +46,12 @@ from khash cimport ( import pandas.compat as compat from pandas.core.dtypes.common import ( - is_categorical_dtype, CategoricalDtype, + is_categorical_dtype, is_integer_dtype, is_float_dtype, is_bool_dtype, is_object_dtype, is_datetime64_dtype, pandas_dtype) -from pandas.core.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.dtypes.concat import union_categoricals import pandas.io.common as com @@ -90,9 +86,6 @@ try: except NameError: basestring = str -cdef extern from "src/numpy_helper.h": - void transfer_object_column(char *dst, char *src, size_t stride, - size_t length) cdef extern from "parser/tokenizer.h": @@ -232,8 +225,6 @@ cdef extern from "parser/tokenizer.h": int parser_trim_buffers(parser_t *self) - void debug_print_parser(parser_t *self) - int tokenize_all_rows(parser_t *self) nogil int tokenize_nrows(parser_t *self, size_t nrows) nogil @@ -249,7 +240,6 @@ cdef extern from "parser/tokenizer.h": double round_trip(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing) nogil - int to_longlong(char *item, long long *p_value) nogil int to_boolean(const char *item, uint8_t *val) nogil @@ -288,7 +278,7 @@ cdef class TextReader: object file_handle, na_fvalues object true_values, false_values object handle - bint na_filter, verbose, has_usecols, has_mi_columns + bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns int64_t parser_start list clocks char *c_encoding @@ -352,6 +342,8 @@ cdef class TextReader: na_filter=True, na_values=None, na_fvalues=None, + keep_default_na=True, + true_values=None, false_values=None, allow_leading_cols=True, @@ -378,8 +370,8 @@ cdef class TextReader: self.parser = parser_new() self.parser.chunksize = tokenize_chunksize - self.mangle_dupe_cols=mangle_dupe_cols - self.tupleize_cols=tupleize_cols + self.mangle_dupe_cols = mangle_dupe_cols + self.tupleize_cols = tupleize_cols # For timekeeping self.clocks = [] @@ -477,6 +469,7 @@ cdef class TextReader: self.true_set = kset_from_list(self.true_values) self.false_set = kset_from_list(self.false_values) + self.keep_default_na = keep_default_na self.converters = converters self.na_filter = na_filter @@ -872,9 +865,6 @@ cdef class TextReader: return header, field_count - cdef _implicit_index_count(self): - pass - def read(self, rows=None): """ rows=None --> read all rows @@ -994,9 +984,6 @@ cdef class TextReader: return columns - def debug_print(self): - debug_print_parser(self.parser) - cdef _start_clock(self): self.clocks.append(time.time()) @@ -1299,7 +1286,10 @@ cdef class TextReader: elif i in self.na_values: key = i else: # No na_values provided for this column. - return _NA_VALUES, set() + if self.keep_default_na: + return _NA_VALUES, set() + + return list(), set() values = self.na_values[key] if values is not None and not isinstance(values, list): @@ -1340,6 +1330,7 @@ cdef class TextReader: else: return None + cdef object _true_values = [b'True', b'TRUE', b'true'] cdef object _false_values = [b'False', b'FALSE', b'false'] @@ -1369,21 +1360,6 @@ cdef asbytes(object o): _NA_VALUES = _ensure_encoded(list(com._NA_VALUES)) -def _is_file_like(obj): - if PY3: - import io - if isinstance(obj, io.TextIOWrapper): - raise ParserError('Cannot handle open unicode files (yet)') - - # BufferedReader is a byte reader for Python 3 - file = io.BufferedReader - else: - import __builtin__ - file = __builtin__.file - - return isinstance(obj, (basestring, file)) - - def _maybe_upcast(arr): """ @@ -1473,6 +1449,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col, return result, na_count + cdef _string_box_utf8(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): @@ -1526,6 +1503,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col, return result, na_count + cdef _string_box_decode(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset, @@ -1656,6 +1634,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col, kh_destroy_str(table) return np.asarray(codes), result, na_count + cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, int64_t width): cdef: @@ -1673,6 +1652,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, return result + cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, size_t width, char *data) nogil: @@ -1688,10 +1668,12 @@ cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, strncpy(data, word, width) data += width + cdef char* cinf = b'inf' cdef char* cposinf = b'+inf' cdef char* cneginf = b'-inf' + cdef _try_double(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset, object na_flist): @@ -1732,6 +1714,7 @@ cdef _try_double(parser_t *parser, int64_t col, return None, None return result, na_count + cdef inline int _try_double_nogil(parser_t *parser, double (*double_converter)( const char *, char **, char, @@ -1802,6 +1785,7 @@ cdef inline int _try_double_nogil(parser_t *parser, return 0 + cdef _try_uint64(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): @@ -1837,6 +1821,7 @@ cdef _try_uint64(parser_t *parser, int64_t col, return result + cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, @@ -1875,6 +1860,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col, return 0 + cdef _try_int64(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, kh_str_t *na_hashset): @@ -1903,6 +1889,7 @@ cdef _try_int64(parser_t *parser, int64_t col, return result, na_count + cdef inline int _try_int64_nogil(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, @@ -1942,69 +1929,6 @@ cdef inline int _try_int64_nogil(parser_t *parser, int64_t col, return 0 -cdef _try_bool(parser_t *parser, int64_t col, - int64_t line_start, int64_t line_end, - bint na_filter, kh_str_t *na_hashset): - cdef: - int na_count - Py_ssize_t lines = line_end - line_start - uint8_t *data - cnp.ndarray[cnp.uint8_t, ndim=1] result - - uint8_t NA = na_values[np.bool_] - - result = np.empty(lines) - data = result.data - - with nogil: - error = _try_bool_nogil(parser, col, line_start, - line_end, na_filter, - na_hashset, NA, data, - &na_count) - if error != 0: - return None, None - return result.view(np.bool_), na_count - -cdef inline int _try_bool_nogil(parser_t *parser, int64_t col, - int64_t line_start, - int64_t line_end, bint na_filter, - const kh_str_t *na_hashset, uint8_t NA, - uint8_t *data, int *na_count) nogil: - cdef: - int error - Py_ssize_t i, lines = line_end - line_start - coliter_t it - const char *word = NULL - khiter_t k - na_count[0] = 0 - - coliter_setup(&it, parser, col, line_start) - - if na_filter: - for i in range(lines): - COLITER_NEXT(it, word) - - k = kh_get_str(na_hashset, word) - # in the hash table - if k != na_hashset.n_buckets: - na_count[0] += 1 - data[0] = NA - data += 1 - continue - - error = to_boolean(word, data) - if error != 0: - return error - data += 1 - else: - for i in range(lines): - COLITER_NEXT(it, word) - - error = to_boolean(word, data) - if error != 0: - return error - data += 1 - return 0 cdef _try_bool_flex(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, @@ -2033,6 +1957,7 @@ cdef _try_bool_flex(parser_t *parser, int64_t col, return None, None return result.view(np.bool_), na_count + cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, bint na_filter, @@ -2125,6 +2050,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL: return table + cdef kh_float64_t* kset_float64_from_list(values) except NULL: # caller takes responsibility for freeing the hash table cdef: diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/reduction.pyx similarity index 96% rename from pandas/_libs/src/reduce.pyx rename to pandas/_libs/reduction.pyx index f0ec8d284ef0e..3588ac14c87d1 100644 --- a/pandas/_libs/src/reduce.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,17 +1,32 @@ # -*- coding: utf-8 -*- # cython: profile=False +from distutils.version import LooseVersion + +from cython cimport Py_ssize_t +from cpython cimport Py_INCREF + +from libc.stdlib cimport malloc, free + import numpy as np +cimport numpy as cnp +from numpy cimport (ndarray, + int64_t, + PyArray_SETITEM, + PyArray_ITER_NEXT, PyArray_ITER_DATA, PyArray_IterNew, + flatiter) +cnp.import_array() -from distutils.version import LooseVersion +cimport util +from lib import maybe_convert_objects is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2' cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt): - if isinstance(obj, np.ndarray) \ - or isinstance(obj, list) and len(obj) == cnt \ - or getattr(obj, 'shape', None) == (cnt,): + if (util.is_array(obj) or + isinstance(obj, list) and len(obj) == cnt or + getattr(obj, 'shape', None) == (cnt,)): raise ValueError('function does not reduce') return np.empty(size, dtype='O') @@ -135,8 +150,7 @@ cdef class Reducer: else: res = self.f(chunk) - if hasattr(res, 'values') and isinstance( - res.values, np.ndarray): + if hasattr(res, 'values') and util.is_array(res.values): res = res.values if i == 0: result = _get_result_array(res, @@ -418,10 +432,10 @@ cdef class SeriesGrouper: cdef inline _extract_result(object res): """ extract the result object, it might be a 0-dim ndarray or a len-1 0-dim, or a scalar """ - if hasattr(res, 'values') and isinstance(res.values, np.ndarray): + if hasattr(res, 'values') and util.is_array(res.values): res = res.values if not np.isscalar(res): - if isinstance(res, np.ndarray): + if util.is_array(res): if res.ndim == 0: res = res.item() elif res.ndim == 1 and len(res) == 1: diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index c4104b66e009f..1d7893f69c31d 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -1,16 +1,15 @@ # cython: profile=False -cimport numpy as np -import numpy as np - cimport cython from cython cimport Py_ssize_t -np.import_array() - +import numpy as np +cimport numpy as cnp from numpy cimport (ndarray, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) +cnp.import_array() + cdef double NaN = np.NaN cdef double nan = NaN diff --git a/pandas/_libs/skiplist.pyx b/pandas/_libs/skiplist.pyx index c96413edfb0f2..5ede31b24118d 100644 --- a/pandas/_libs/skiplist.pyx +++ b/pandas/_libs/skiplist.pyx @@ -8,20 +8,20 @@ from libc.math cimport log +import numpy as np +cimport numpy as cnp +from numpy cimport double_t +cnp.import_array() + + # MSVC does not have log2! cdef double Log2(double x): return log(x) / log(2.) -cimport numpy as np -import numpy as np -from numpy cimport double_t from random import random -# initialize numpy -np.import_array() - # TODO: optimize this, make less messy cdef class Node: diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index bb8b0ed14e1d9..2abd270652433 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -1,12 +1,15 @@ -from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t, - float64_t, float32_t) -cimport numpy as np +# -*- coding: utf-8 -*- +import operator +import sys cimport cython import numpy as np -import operator -import sys +cimport numpy as cnp +from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t, + float64_t, float32_t) +cnp.import_array() + from distutils.version import LooseVersion @@ -15,8 +18,6 @@ _np_version = np.version.short_version _np_version_under1p10 = LooseVersion(_np_version) < LooseVersion('1.10') _np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') -np.import_array() -np.import_ufunc() # ----------------------------------------------------------------------------- # Preamble stuff diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index b74b3a79fd69a..b29a2e519efcd 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -5,15 +5,30 @@ cimport cython from tslibs.nattype import NaT from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 -from tslibs.timezones cimport get_timezone +from tslibs.timezones cimport get_timezone, tz_compare from datetime import datetime, timedelta iNaT = util.get_nat() cdef bint PY2 = sys.version_info[0] == 2 - -from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX, - INT8_MIN, INT8_MAX, INT16_MIN, INT16_MAX, - INT32_MAX, INT32_MIN, INT64_MAX, INT64_MIN) +cdef double nan = np.NaN + +cdef extern from "numpy/arrayobject.h": + # cython's numpy.dtype specification is incorrect, which leads to + # errors in issubclass(self.dtype.type, np.bool_), so we directly + # include the correct version + # https://github.com/cython/cython/issues/2022 + + ctypedef class numpy.dtype [object PyArray_Descr]: + # Use PyDataType_* macros when possible, however there are no macros + # for accessing some of the fields, so some are defined. Please + # ask on cython-dev if you need more. + cdef int type_num + cdef int itemsize "elsize" + cdef char byteorder + cdef object fields + cdef tuple names + +from util cimport UINT8_MAX, UINT64_MAX, INT64_MAX, INT64_MIN # core.common import for fast inference checks @@ -38,7 +53,7 @@ cpdef bint is_decimal(object obj): cpdef bint is_interval(object obj): - return isinstance(obj, Interval) + return getattr(obj, '_typ', '_typ') == 'interval' cpdef bint is_period(object val): @@ -331,7 +346,7 @@ def infer_dtype(object value, bint skipna=False): bint seen_pdnat = False bint seen_val = False - if isinstance(value, np.ndarray): + if util.is_array(value): values = value elif hasattr(value, 'dtype'): @@ -349,7 +364,7 @@ def infer_dtype(object value, bint skipna=False): raise ValueError("cannot infer type for {0}".format(type(value))) else: - if not isinstance(value, list): + if not PyList_Check(value): value = list(value) from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike) @@ -610,13 +625,13 @@ cdef class Validator: cdef: Py_ssize_t n - np.dtype dtype + dtype dtype bint skipna def __cinit__( self, Py_ssize_t n, - np.dtype dtype=np.dtype(np.object_), + dtype dtype=np.dtype(np.object_), bint skipna=False ): self.n = n @@ -824,7 +839,7 @@ cdef class TemporalValidator(Validator): def __cinit__( self, Py_ssize_t n, - np.dtype dtype=np.dtype(np.object_), + dtype dtype=np.dtype(np.object_), bint skipna=False ): self.n = n @@ -907,7 +922,7 @@ cpdef bint is_datetime_with_singletz_array(ndarray values): val = values[j] if val is not NaT: tz = getattr(val, 'tzinfo', None) - if base_tz != tz and base_tz != get_timezone(tz): + if not tz_compare(base_tz, tz): return False break @@ -1390,10 +1405,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return objects -def convert_sql_column(x): - return maybe_convert_objects(x, try_float=1) - - def sanitize_objects(ndarray[object] values, set na_values, convert_empty=True): cdef: diff --git a/pandas/_libs/src/klib/ktypes.h b/pandas/_libs/src/klib/ktypes.h deleted file mode 100644 index 981f17372a2d5..0000000000000 --- a/pandas/_libs/src/klib/ktypes.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __KTYPES_H -#define __KTYPES_H - -/* compipler specific configuration */ - -#endif /* __KTYPES_H */ diff --git a/pandas/_libs/src/klib/kvec.h b/pandas/_libs/src/klib/kvec.h deleted file mode 100644 index c5e6e6c407dfc..0000000000000 --- a/pandas/_libs/src/klib/kvec.h +++ /dev/null @@ -1,151 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, by Attractive Chaos - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* - An example: - -#include "kvec.h" -int main() { - kvec_t(int) array; - kv_init(array); - kv_push(int, array, 10); // append - kv_a(int, array, 20) = 5; // dynamic - kv_A(array, 20) = 4; // static - kv_destroy(array); - return 0; -} -*/ - -/* - 2008-09-22 (0.1.0): - - * The initial version. - -*/ - -#ifndef AC_KVEC_H -#define AC_KVEC_H - -#include -#include -#include - -#ifndef PANDAS_INLINE - #if defined(__GNUC__) - #define PANDAS_INLINE static __inline__ - #elif defined(_MSC_VER) - #define PANDAS_INLINE static __inline - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define PANDAS_INLINE static inline - #else - #define PANDAS_INLINE - #endif -#endif - -#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) - -#define kvec_t(type) struct { size_t n, m; type *a; } -#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) -#define kv_destroy(v) free((v).a) -#define kv_A(v, i) ((v).a[(i)]) -#define kv_pop(v) ((v).a[--(v).n]) -#define kv_size(v) ((v).n) -#define kv_max(v) ((v).m) - -#define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m)) - -#define kv_copy(type, v1, v0) do { \ - if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \ - (v1).n = (v0).n; \ - memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ - } while (0) \ - -#define kv_push(type, v, x) do { \ - if ((v)->n == (v)->m) { \ - (v)->m = (v)->m? (v)->m<<1 : 2; \ - (v)->a = (type*)realloc((v)->a, sizeof(type) * (v)->m); \ - } \ - (v)->a[(v)->n++] = (x); \ - } while (0) - -#define kv_pushp(type, v) (((v).n == (v).m)? \ - ((v).m = ((v).m? (v).m<<1 : 2), \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ - : 0), ((v).a + ((v).n++)) - -#define kv_a(type, v, i) ((v).m <= (size_t)(i)? \ - ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ - : (v).n <= (size_t)(i)? (v).n = (i) \ - : 0), (v).a[(i)] - -// #define kv_int64_push(v, x) (kv_push(int64_t, (v), (x))) - -typedef struct { - size_t n, m; - int64_t* a; -} kv_int64_t; - -typedef struct { - size_t n, m; - double* a; -} kv_double; - -typedef struct { - size_t n, m; - PyObject** a; -} kv_object_t; - -void PANDAS_INLINE kv_object_push(kv_object_t *v, PyObject *x) { - do { - if (v->n == v->m) { - v->m = v->m? v->m<<1 : 2; - v->a = (PyObject**)realloc(v->a, sizeof(PyObject*) * v->m); - } - v->a[v->n++] = x; - } while (0); - // kv_push(PyObject*, v, x); - Py_INCREF(x); -} - -void PANDAS_INLINE kv_int64_push(kv_int64_t *v, int64_t x) { - kv_push(int64_t, v, x); -} - -void PANDAS_INLINE kv_double_push(kv_double *v, double x) { - kv_push(double, v, x); -} - -void PANDAS_INLINE kv_object_destroy(kv_object_t *v) { - int i; - for (i = 0; i < v->n; ++i) - { - Py_XDECREF(v->a[i]); - } - free(v->a); -} - - -#endif diff --git a/pandas/_libs/src/numpy.pxd b/pandas/_libs/src/numpy.pxd deleted file mode 100644 index 8ce398ce218a8..0000000000000 --- a/pandas/_libs/src/numpy.pxd +++ /dev/null @@ -1,994 +0,0 @@ -# NumPy static imports for Cython -# -# If any of the PyArray_* functions are called, import_array must be -# called first. -# -# This also defines backwards-compatability buffer acquisition -# code for use in Python 2.x (or Python <= 2.5 when NumPy starts -# implementing PEP-3118 directly). -# -# Because of laziness, the format string of the buffer is statically -# allocated. Increase the size if this is not enough, or submit a -# patch to do this properly. -# -# Author: Dag Sverre Seljebotn -# - -DEF _buffer_format_string_len = 255 - -cimport cpython.buffer as pybuf -from cpython.ref cimport Py_INCREF, Py_XDECREF -from cpython.object cimport PyObject -cimport libc.stdlib as stdlib -cimport libc.stdio as stdio - -cdef extern from "Python.h": - ctypedef int Py_intptr_t - -cdef extern from "numpy/arrayobject.h": - ctypedef Py_intptr_t npy_intp - ctypedef size_t npy_uintp - - cdef enum NPY_TYPES: - NPY_BOOL - NPY_BYTE - NPY_UBYTE - NPY_SHORT - NPY_USHORT - NPY_INT - NPY_UINT - NPY_LONG - NPY_ULONG - NPY_LONGLONG - NPY_ULONGLONG - NPY_FLOAT - NPY_DOUBLE - NPY_LONGDOUBLE - NPY_CFLOAT - NPY_CDOUBLE - NPY_CLONGDOUBLE - NPY_OBJECT - NPY_STRING - NPY_UNICODE - NPY_VOID - NPY_NTYPES - NPY_NOTYPE - - NPY_INT8 - NPY_INT16 - NPY_INT32 - NPY_INT64 - NPY_INT128 - NPY_INT256 - NPY_UINT8 - NPY_UINT16 - NPY_UINT32 - NPY_UINT64 - NPY_UINT128 - NPY_UINT256 - NPY_FLOAT16 - NPY_FLOAT32 - NPY_FLOAT64 - NPY_FLOAT80 - NPY_FLOAT96 - NPY_FLOAT128 - NPY_FLOAT256 - NPY_COMPLEX32 - NPY_COMPLEX64 - NPY_COMPLEX128 - NPY_COMPLEX160 - NPY_COMPLEX192 - NPY_COMPLEX256 - NPY_COMPLEX512 - - NPY_DATETIME - NPY_TIMEDELTA - - NPY_INTP - - ctypedef enum NPY_ORDER: - NPY_ANYORDER - NPY_CORDER - NPY_FORTRANORDER - - ctypedef enum NPY_CLIPMODE: - NPY_CLIP - NPY_WRAP - NPY_RAISE - - ctypedef enum NPY_SCALARKIND: - NPY_NOSCALAR, - NPY_BOOL_SCALAR, - NPY_INTPOS_SCALAR, - NPY_INTNEG_SCALAR, - NPY_FLOAT_SCALAR, - NPY_COMPLEX_SCALAR, - NPY_OBJECT_SCALAR - - ctypedef enum NPY_SORTKIND: - NPY_QUICKSORT - NPY_HEAPSORT - NPY_MERGESORT - - ctypedef enum NPY_SEARCHSIDE: - NPY_SEARCHLEFT - NPY_SEARCHRIGHT - - enum: - NPY_C_CONTIGUOUS - NPY_F_CONTIGUOUS - NPY_CONTIGUOUS - NPY_FORTRAN - NPY_OWNDATA - NPY_FORCECAST - NPY_ENSURECOPY - NPY_ENSUREARRAY - NPY_ELEMENTSTRIDES - NPY_ALIGNED - NPY_NOTSWAPPED - NPY_WRITEABLE - NPY_UPDATEIFCOPY - NPY_ARR_HAS_DESCR - - NPY_BEHAVED - NPY_BEHAVED_NS - NPY_CARRAY - NPY_CARRAY_RO - NPY_FARRAY - NPY_FARRAY_RO - NPY_DEFAULT - - NPY_IN_ARRAY - NPY_OUT_ARRAY - NPY_INOUT_ARRAY - NPY_IN_FARRAY - NPY_OUT_FARRAY - NPY_INOUT_FARRAY - - NPY_UPDATE_ALL - - cdef enum: - NPY_MAXDIMS - - npy_intp NPY_MAX_ELSIZE - - ctypedef void (*PyArray_VectorUnaryFunc)( - void *, void *, npy_intp, void *, void *) - - ctypedef class numpy.dtype [object PyArray_Descr]: - # Use PyDataType_* macros when possible, however there are no macros - # for accessing some of the fields, so some are defined. Please - # ask on cython-dev if you need more. - cdef int type_num - cdef int itemsize "elsize" - cdef char byteorder - cdef object fields - cdef tuple names - - ctypedef extern class numpy.flatiter [object PyArrayIterObject]: - # Use through macros - pass - - ctypedef extern class numpy.broadcast [object PyArrayMultiIterObject]: - # Use through macros - pass - - ctypedef struct PyArrayObject: - # For use in situations where ndarray can't replace PyArrayObject*, - # like PyArrayObject**. - pass - - ctypedef class numpy.ndarray [object PyArrayObject]: - cdef __cythonbufferdefaults__ = {"mode": "strided"} - - cdef: - # Only taking a few of the most commonly used and stable fields. - # One should use PyArray_* macros instead to access the C fields. - char *data - int ndim "nd" - npy_intp *shape "dimensions" - npy_intp *strides - dtype descr - PyObject* base - - # Note: This syntax (function definition in pxd files) is an - # experimental exception made for __getbuffer__ and __releasebuffer__ - # -- the details of this may change. - def __getbuffer__(ndarray self, Py_buffer* info, int flags): - # This implementation of getbuffer is geared towards Cython - # requirements, and does not yet fulfill the PEP. - # In particular strided access is always provided regardless - # of flags - - if info == NULL: return - - cdef int copy_shape, i, ndim - cdef int endian_detector = 1 - cdef bint little_endian = ((&endian_detector)[0] != 0) - - ndim = PyArray_NDIM(self) - - if sizeof(npy_intp) != sizeof(Py_ssize_t): - copy_shape = 1 - else: - copy_shape = 0 - - if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) - and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): - raise ValueError(u"ndarray is not C contiguous") - - if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) - and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): - raise ValueError(u"ndarray is not Fortran contiguous") - - info.buf = PyArray_DATA(self) - info.ndim = ndim - if copy_shape: - # Allocate new buffer for strides and shape info. - # This is allocated as one block, strides first. - info.strides = stdlib.malloc( - sizeof(Py_ssize_t) * ndim * 2) - - info.shape = info.strides + ndim - for i in range(ndim): - info.strides[i] = PyArray_STRIDES(self)[i] - info.shape[i] = PyArray_DIMS(self)[i] - else: - info.strides = PyArray_STRIDES(self) - info.shape = PyArray_DIMS(self) - info.suboffsets = NULL - info.itemsize = PyArray_ITEMSIZE(self) - info.readonly = not PyArray_ISWRITEABLE(self) - - cdef int t - cdef char* f = NULL - cdef dtype descr = self.descr - cdef list stack - cdef int offset - - cdef bint hasfields = PyDataType_HASFIELDS(descr) - - if not hasfields and not copy_shape: - # do not call releasebuffer - info.obj = None - else: - # need to call releasebuffer - info.obj = self - - if not hasfields: - t = descr.type_num - if ((descr.byteorder == '>' and little_endian) or - (descr.byteorder == '<' and not little_endian)): - raise ValueError(u"Non-native byte order not supported") - if t == NPY_BYTE: f = "b" - elif t == NPY_UBYTE: f = "B" - elif t == NPY_SHORT: f = "h" - elif t == NPY_USHORT: f = "H" - elif t == NPY_INT: f = "i" - elif t == NPY_UINT: f = "I" - elif t == NPY_LONG: f = "l" - elif t == NPY_ULONG: f = "L" - elif t == NPY_LONGLONG: f = "q" - elif t == NPY_ULONGLONG: f = "Q" - elif t == NPY_FLOAT: f = "f" - elif t == NPY_DOUBLE: f = "d" - elif t == NPY_LONGDOUBLE: f = "g" - elif t == NPY_CFLOAT: f = "Zf" - elif t == NPY_CDOUBLE: f = "Zd" - elif t == NPY_CLONGDOUBLE: f = "Zg" - elif t == NPY_OBJECT: f = "O" - else: - raise ValueError( - u"unknown dtype code in numpy.pxd (%d)" % t) - info.format = f - return - else: - info.format = stdlib.malloc(_buffer_format_string_len) - info.format[0] = '^' # Native data types, manual alignment - offset = 0 - f = _util_dtypestring(descr, info.format + 1, - info.format + _buffer_format_string_len, - &offset) - f[0] = 0 # Terminate format string - - def __releasebuffer__(ndarray self, Py_buffer* info): - if PyArray_HASFIELDS(self): - stdlib.free(info.format) - if sizeof(npy_intp) != sizeof(Py_ssize_t): - stdlib.free(info.strides) - # info.shape was stored after info.strides in the same block - - ctypedef signed char npy_bool - - ctypedef signed char npy_byte - ctypedef signed short npy_short - ctypedef signed int npy_int - ctypedef signed long npy_long - ctypedef signed long long npy_longlong - - ctypedef unsigned char npy_ubyte - ctypedef unsigned short npy_ushort - ctypedef unsigned int npy_uint - ctypedef unsigned long npy_ulong - ctypedef unsigned long long npy_ulonglong - - ctypedef float npy_float - ctypedef double npy_double - ctypedef long double npy_longdouble - - ctypedef signed char npy_int8 - ctypedef signed short npy_int16 - ctypedef signed int npy_int32 - ctypedef signed long long npy_int64 - ctypedef signed long long npy_int96 - ctypedef signed long long npy_int128 - - ctypedef unsigned char npy_uint8 - ctypedef unsigned short npy_uint16 - ctypedef unsigned int npy_uint32 - ctypedef unsigned long long npy_uint64 - ctypedef unsigned long long npy_uint96 - ctypedef unsigned long long npy_uint128 - - ctypedef float npy_float16 - ctypedef float npy_float32 - ctypedef double npy_float64 - ctypedef long double npy_float80 - ctypedef long double npy_float96 - ctypedef long double npy_float128 - - ctypedef struct npy_cfloat: - double real - double imag - - ctypedef struct npy_cdouble: - double real - double imag - - ctypedef struct npy_clongdouble: - double real - double imag - - ctypedef struct npy_complex64: - double real - double imag - - ctypedef struct npy_complex128: - double real - double imag - - ctypedef struct npy_complex160: - double real - double imag - - ctypedef struct npy_complex192: - double real - double imag - - ctypedef struct npy_complex256: - double real - double imag - - ctypedef struct PyArray_Dims: - npy_intp *ptr - int len - - void import_array() - - # - # Macros from ndarrayobject.h - # - bint PyArray_CHKFLAGS(ndarray m, int flags) - bint PyArray_ISCONTIGUOUS(ndarray m) - bint PyArray_ISWRITEABLE(ndarray m) - bint PyArray_ISALIGNED(ndarray m) - - int PyArray_NDIM(ndarray) - bint PyArray_ISONESEGMENT(ndarray) - bint PyArray_ISFORTRAN(ndarray) - int PyArray_FORTRANIF(ndarray) - - void* PyArray_DATA(ndarray) - char* PyArray_BYTES(ndarray) - npy_intp* PyArray_DIMS(ndarray) - npy_intp* PyArray_STRIDES(ndarray) - npy_intp PyArray_DIM(ndarray, size_t) - npy_intp PyArray_STRIDE(ndarray, size_t) - - # object PyArray_BASE(ndarray) wrong refcount semantics - # dtype PyArray_DESCR(ndarray) wrong refcount semantics - int PyArray_FLAGS(ndarray) - npy_intp PyArray_ITEMSIZE(ndarray) - int PyArray_TYPE(ndarray arr) - - object PyArray_GETITEM(ndarray arr, void *itemptr) - int PyArray_SETITEM(ndarray arr, void *itemptr, object obj) - - bint PyTypeNum_ISBOOL(int) - bint PyTypeNum_ISUNSIGNED(int) - bint PyTypeNum_ISSIGNED(int) - bint PyTypeNum_ISINTEGER(int) - bint PyTypeNum_ISFLOAT(int) - bint PyTypeNum_ISNUMBER(int) - bint PyTypeNum_ISSTRING(int) - bint PyTypeNum_ISCOMPLEX(int) - bint PyTypeNum_ISPYTHON(int) - bint PyTypeNum_ISFLEXIBLE(int) - bint PyTypeNum_ISUSERDEF(int) - bint PyTypeNum_ISEXTENDED(int) - bint PyTypeNum_ISOBJECT(int) - - bint PyDataType_ISBOOL(dtype) - bint PyDataType_ISUNSIGNED(dtype) - bint PyDataType_ISSIGNED(dtype) - bint PyDataType_ISINTEGER(dtype) - bint PyDataType_ISFLOAT(dtype) - bint PyDataType_ISNUMBER(dtype) - bint PyDataType_ISSTRING(dtype) - bint PyDataType_ISCOMPLEX(dtype) - bint PyDataType_ISPYTHON(dtype) - bint PyDataType_ISFLEXIBLE(dtype) - bint PyDataType_ISUSERDEF(dtype) - bint PyDataType_ISEXTENDED(dtype) - bint PyDataType_ISOBJECT(dtype) - bint PyDataType_HASFIELDS(dtype) - - bint PyArray_ISBOOL(ndarray) - bint PyArray_ISUNSIGNED(ndarray) - bint PyArray_ISSIGNED(ndarray) - bint PyArray_ISINTEGER(ndarray) - bint PyArray_ISFLOAT(ndarray) - bint PyArray_ISNUMBER(ndarray) - bint PyArray_ISSTRING(ndarray) - bint PyArray_ISCOMPLEX(ndarray) - bint PyArray_ISPYTHON(ndarray) - bint PyArray_ISFLEXIBLE(ndarray) - bint PyArray_ISUSERDEF(ndarray) - bint PyArray_ISEXTENDED(ndarray) - bint PyArray_ISOBJECT(ndarray) - bint PyArray_HASFIELDS(ndarray) - - bint PyArray_ISVARIABLE(ndarray) - - bint PyArray_SAFEALIGNEDCOPY(ndarray) - bint PyArray_ISNBO(ndarray) - bint PyArray_IsNativeByteOrder(ndarray) - bint PyArray_ISNOTSWAPPED(ndarray) - bint PyArray_ISBYTESWAPPED(ndarray) - - bint PyArray_FLAGSWAP(ndarray, int) - - bint PyArray_ISCARRAY(ndarray) - bint PyArray_ISCARRAY_RO(ndarray) - bint PyArray_ISFARRAY(ndarray) - bint PyArray_ISFARRAY_RO(ndarray) - bint PyArray_ISBEHAVED(ndarray) - bint PyArray_ISBEHAVED_RO(ndarray) - - bint PyDataType_ISNOTSWAPPED(dtype) - bint PyDataType_ISBYTESWAPPED(dtype) - - bint PyArray_DescrCheck(object) - - bint PyArray_Check(object) - bint PyArray_CheckExact(object) - - # Cannot be supported due to out arg: - # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&) - # bint PyArray_HasArrayInterface(op, out) - - bint PyArray_IsZeroDim(object) - # Cannot be supported due to ## ## in macro: - # bint PyArray_IsScalar(object, verbatim work) - bint PyArray_CheckScalar(object) - bint PyArray_IsPythonNumber(object) - bint PyArray_IsPythonScalar(object) - bint PyArray_IsAnyScalar(object) - bint PyArray_CheckAnyScalar(object) - ndarray PyArray_GETCONTIGUOUS(ndarray) - bint PyArray_SAMESHAPE(ndarray, ndarray) - npy_intp PyArray_SIZE(ndarray) - npy_intp PyArray_NBYTES(ndarray) - - object PyArray_FROM_O(object) - object PyArray_FROM_OF(object m, int flags) - bint PyArray_FROM_OT(object m, int type) - bint PyArray_FROM_OTF(object m, int type, int flags) - object PyArray_FROMANY(object m, int type, int min, int max, int flags) - object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran) - object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran) - void PyArray_FILLWBYTE(object, int val) - npy_intp PyArray_REFCOUNT(object) - object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth) - unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2) - bint PyArray_EquivByteorders(int b1, int b2) - object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum) - object PyArray_SimpleNewFromData(int nd, npy_intp* dims, - int typenum, void* data) - #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr) - object PyArray_ToScalar(void* data, ndarray arr) - - void* PyArray_GETPTR1(ndarray m, npy_intp i) - void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j) - void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k) - void* PyArray_GETPTR4(ndarray m, npy_intp i, - npy_intp j, npy_intp k, npy_intp l) - - void PyArray_XDECREF_ERR(ndarray) - # Cannot be supported due to out arg - # void PyArray_DESCR_REPLACE(descr) - - object PyArray_Copy(ndarray) - object PyArray_FromObject(object op, int type, - int min_depth, int max_depth) - object PyArray_ContiguousFromObject(object op, int type, - int min_depth, int max_depth) - object PyArray_CopyFromObject(object op, int type, - int min_depth, int max_depth) - - object PyArray_Cast(ndarray mp, int type_num) - object PyArray_Take(ndarray ap, object items, int axis) - object PyArray_Put(ndarray ap, object items, object values) - - void PyArray_ITER_RESET(flatiter it) nogil - void PyArray_ITER_NEXT(flatiter it) nogil - void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil - void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil - void* PyArray_ITER_DATA(flatiter it) nogil - bint PyArray_ITER_NOTDONE(flatiter it) nogil - - void PyArray_MultiIter_RESET(broadcast multi) nogil - void PyArray_MultiIter_NEXT(broadcast multi) nogil - void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil - void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil - void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil - void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil - bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil - - # Functions from __multiarray_api.h - - # Functions taking dtype and returning object/ndarray are disabled - # for now as they steal dtype references. I'm conservative and disable - # more than is probably needed until it can be checked further. - int PyArray_SetNumericOps (object) - object PyArray_GetNumericOps () - int PyArray_INCREF (ndarray) - int PyArray_XDECREF (ndarray) - void PyArray_SetStringFunction (object, int) - dtype PyArray_DescrFromType (int) - object PyArray_TypeObjectFromType (int) - char * PyArray_Zero (ndarray) - char * PyArray_One (ndarray) - #object PyArray_CastToType (ndarray, dtype, int) - int PyArray_CastTo (ndarray, ndarray) - int PyArray_CastAnyTo (ndarray, ndarray) - int PyArray_CanCastSafely (int, int) - npy_bool PyArray_CanCastTo (dtype, dtype) - int PyArray_ObjectType (object, int) - dtype PyArray_DescrFromObject (object, dtype) - #ndarray* PyArray_ConvertToCommonType (object, int *) - dtype PyArray_DescrFromScalar (object) - dtype PyArray_DescrFromTypeObject (object) - npy_intp PyArray_Size (object) - #object PyArray_Scalar (void *, dtype, object) - #object PyArray_FromScalar (object, dtype) - void PyArray_ScalarAsCtype (object, void *) - #int PyArray_CastScalarToCtype (object, void *, dtype) - #int PyArray_CastScalarDirect (object, dtype, void *, int) - object PyArray_ScalarFromObject (object) - #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int) - object PyArray_FromDims (int, int *, int) - #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *) - #object PyArray_FromAny (object, dtype, int, int, int, object) - object PyArray_EnsureArray (object) - object PyArray_EnsureAnyArray (object) - #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *) - #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *) - #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp) - #object PyArray_FromIter (object, dtype, npy_intp) - object PyArray_Return (ndarray) - #object PyArray_GetField (ndarray, dtype, int) - #int PyArray_SetField (ndarray, dtype, int, object) - object PyArray_Byteswap (ndarray, npy_bool) - object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER) - int PyArray_MoveInto (ndarray, ndarray) - int PyArray_CopyInto (ndarray, ndarray) - int PyArray_CopyAnyInto (ndarray, ndarray) - int PyArray_CopyObject (ndarray, object) - object PyArray_NewCopy (ndarray, NPY_ORDER) - object PyArray_ToList (ndarray) - object PyArray_ToString (ndarray, NPY_ORDER) - int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *) - int PyArray_Dump (object, object, int) - object PyArray_Dumps (object, int) - int PyArray_ValidType (int) - void PyArray_UpdateFlags (ndarray, int) - object PyArray_New (type, int, npy_intp *, int, npy_intp *, - void *, int, int, object) - #dtype PyArray_DescrNew (dtype) - dtype PyArray_DescrNewFromType (int) - double PyArray_GetPriority (object, double) - object PyArray_IterNew (object) - object PyArray_MultiIterNew (int, ...) - - int PyArray_PyIntAsInt (object) - npy_intp PyArray_PyIntAsIntp (object) - int PyArray_Broadcast (broadcast) - void PyArray_FillObjectArray (ndarray, object) - int PyArray_FillWithScalar (ndarray, object) - npy_bool PyArray_CheckStrides ( - int, int, npy_intp, npy_intp, npy_intp *, npy_intp *) - dtype PyArray_DescrNewByteorder (dtype, char) - object PyArray_IterAllButAxis (object, int *) - #object PyArray_CheckFromAny (object, dtype, int, int, int, object) - #object PyArray_FromArray (ndarray, dtype, int) - object PyArray_FromInterface (object) - object PyArray_FromStructInterface (object) - #object PyArray_FromArrayAttr (object, dtype, object) - #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*) - int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND) - object PyArray_NewFlagsObject (object) - npy_bool PyArray_CanCastScalar (type, type) - #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t) - int PyArray_RemoveSmallest (broadcast) - int PyArray_ElementStrides (object) - void PyArray_Item_INCREF (char *, dtype) - void PyArray_Item_XDECREF (char *, dtype) - object PyArray_FieldNames (object) - object PyArray_Transpose (ndarray, PyArray_Dims *) - object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE) - object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE) - object PyArray_PutMask (ndarray, object, object) - object PyArray_Repeat (ndarray, object, int) - object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE) - int PyArray_Sort (ndarray, int, NPY_SORTKIND) - object PyArray_ArgSort (ndarray, int, NPY_SORTKIND) - object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE) - object PyArray_ArgMax (ndarray, int, ndarray) - object PyArray_ArgMin (ndarray, int, ndarray) - object PyArray_Reshape (ndarray, object) - object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER) - object PyArray_Squeeze (ndarray) - #object PyArray_View (ndarray, dtype, type) - object PyArray_SwapAxes (ndarray, int, int) - object PyArray_Max (ndarray, int, ndarray) - object PyArray_Min (ndarray, int, ndarray) - object PyArray_Ptp (ndarray, int, ndarray) - object PyArray_Mean (ndarray, int, int, ndarray) - object PyArray_Trace (ndarray, int, int, int, int, ndarray) - object PyArray_Diagonal (ndarray, int, int, int) - object PyArray_Clip (ndarray, object, object, ndarray) - object PyArray_Conjugate (ndarray, ndarray) - object PyArray_Nonzero (ndarray) - object PyArray_Std (ndarray, int, int, ndarray, int) - object PyArray_Sum (ndarray, int, int, ndarray) - object PyArray_CumSum (ndarray, int, int, ndarray) - object PyArray_Prod (ndarray, int, int, ndarray) - object PyArray_CumProd (ndarray, int, int, ndarray) - object PyArray_All (ndarray, int, ndarray) - object PyArray_Any (ndarray, int, ndarray) - object PyArray_Compress (ndarray, object, int, ndarray) - object PyArray_Flatten (ndarray, NPY_ORDER) - object PyArray_Ravel (ndarray, NPY_ORDER) - npy_intp PyArray_MultiplyList (npy_intp *, int) - int PyArray_MultiplyIntList (int *, int) - void * PyArray_GetPtr (ndarray, npy_intp*) - int PyArray_CompareLists (npy_intp *, npy_intp *, int) - #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype) - #int PyArray_As1D (object*, char **, int *, int) - #int PyArray_As2D (object*, char ***, int *, int *, int) - int PyArray_Free (object, void *) - #int PyArray_Converter (object, object*) - int PyArray_IntpFromSequence (object, npy_intp *, int) - object PyArray_Concatenate (object, int) - object PyArray_InnerProduct (object, object) - object PyArray_MatrixProduct (object, object) - object PyArray_CopyAndTranspose (object) - object PyArray_Correlate (object, object, int) - int PyArray_TypestrConvert (int, int) - #int PyArray_DescrConverter (object, dtype*) - #int PyArray_DescrConverter2 (object, dtype*) - int PyArray_IntpConverter (object, PyArray_Dims *) - #int PyArray_BufferConverter (object, chunk) - int PyArray_AxisConverter (object, int *) - int PyArray_BoolConverter (object, npy_bool *) - int PyArray_ByteorderConverter (object, char *) - int PyArray_OrderConverter (object, NPY_ORDER *) - unsigned char PyArray_EquivTypes (dtype, dtype) - #object PyArray_Zeros (int, npy_intp *, dtype, int) - #object PyArray_Empty (int, npy_intp *, dtype, int) - object PyArray_Where (object, object, object) - object PyArray_Arange (double, double, double, int) - #object PyArray_ArangeObj (object, object, object, dtype) - int PyArray_SortkindConverter (object, NPY_SORTKIND *) - object PyArray_LexSort (object, int) - object PyArray_Round (ndarray, int, ndarray) - unsigned char PyArray_EquivTypenums (int, int) - int PyArray_RegisterDataType (dtype) - int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *) - int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND) - #void PyArray_InitArrFuncs (PyArray_ArrFuncs *) - object PyArray_IntTupleFromIntp (int, npy_intp *) - int PyArray_TypeNumFromName (char *) - int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *) - #int PyArray_OutputConverter (object, ndarray*) - object PyArray_BroadcastToShape (object, npy_intp *, int) - void _PyArray_SigintHandler (int) - void* _PyArray_GetSigintBuf () - #int PyArray_DescrAlignConverter (object, dtype*) - #int PyArray_DescrAlignConverter2 (object, dtype*) - int PyArray_SearchsideConverter (object, void *) - object PyArray_CheckAxis (ndarray, int *, int) - npy_intp PyArray_OverflowMultiplyList (npy_intp *, int) - int PyArray_CompareString (char *, char *, size_t) - - -# Typedefs that matches the runtime dtype objects in -# the numpy module. - -# The ones that are commented out needs an IFDEF function -# in Cython to enable them only on the right systems. - -ctypedef npy_int8 int8_t -ctypedef npy_int16 int16_t -ctypedef npy_int32 int32_t -ctypedef npy_int64 int64_t -#ctypedef npy_int96 int96_t -#ctypedef npy_int128 int128_t - -ctypedef npy_uint8 uint8_t -ctypedef npy_uint16 uint16_t -ctypedef npy_uint32 uint32_t -ctypedef npy_uint64 uint64_t -#ctypedef npy_uint96 uint96_t -#ctypedef npy_uint128 uint128_t - -ctypedef npy_float16 float16_t -ctypedef npy_float32 float32_t -ctypedef npy_float64 float64_t -#ctypedef npy_float80 float80_t -#ctypedef npy_float128 float128_t - -ctypedef float complex complex64_t -ctypedef double complex complex128_t - -# The int types are mapped a bit surprising -- -# numpy.int corresponds to 'l' and numpy.long to 'q' -ctypedef npy_long int_t -ctypedef npy_longlong long_t -ctypedef npy_longlong longlong_t - -ctypedef npy_ulong uint_t -ctypedef npy_ulonglong ulong_t -ctypedef npy_ulonglong ulonglong_t - -ctypedef npy_intp intp_t -ctypedef npy_uintp uintp_t - -ctypedef npy_double float_t -ctypedef npy_double double_t -ctypedef npy_longdouble longdouble_t - -ctypedef npy_cfloat cfloat_t -ctypedef npy_cdouble cdouble_t -ctypedef npy_clongdouble clongdouble_t - -ctypedef npy_cdouble complex_t - -cdef inline object PyArray_MultiIterNew1(a): - return PyArray_MultiIterNew(1, a) - -cdef inline object PyArray_MultiIterNew2(a, b): - return PyArray_MultiIterNew(2, a, b) - -cdef inline object PyArray_MultiIterNew3(a, b, c): - return PyArray_MultiIterNew(3, a, b, c) - -cdef inline object PyArray_MultiIterNew4(a, b, c, d): - return PyArray_MultiIterNew(4, a, b, c, d) - -cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): - return PyArray_MultiIterNew(5, a, b, - c, d, e) - -cdef inline char* _util_dtypestring(dtype descr, char* f, - char* end, int* offset) except NULL: - # Recursive utility function used in __getbuffer__ to get format - # string. The new location in the format string is returned. - - cdef dtype child - cdef int delta_offset - cdef tuple i - cdef int endian_detector = 1 - cdef bint little_endian = ((&endian_detector)[0] != 0) - cdef tuple fields - - for childname in descr.names: - fields = descr.fields[childname] - child, new_offset = fields - - if (end - f) - (new_offset - offset[0]) < 15: - raise RuntimeError( - u"Format string allocated too short, see comment in numpy.pxd") - - if ((child.byteorder == '>' and little_endian) or - (child.byteorder == '<' and not little_endian)): - raise ValueError(u"Non-native byte order not supported") - # One could encode it in the format string and have Cython - # complain instead, BUT: < and > in format strings also imply - # standardized sizes for datatypes, and we rely on native in - # order to avoid reencoding data types based on their size. - # - # A proper PEP 3118 exporter for other clients than Cython - # must deal properly with this! - - # Output padding bytes - while offset[0] < new_offset: - f[0] = 120 # "x"; pad byte - f += 1 - offset[0] += 1 - - offset[0] += child.itemsize - - if not PyDataType_HASFIELDS(child): - t = child.type_num - if end - f < 5: - raise RuntimeError(u"Format string allocated too short.") - - # Until ticket #99 is fixed, use integers to avoid warnings - if t == NPY_BYTE: f[0] = 98 #"b" - elif t == NPY_UBYTE: f[0] = 66 #"B" - elif t == NPY_SHORT: f[0] = 104 #"h" - elif t == NPY_USHORT: f[0] = 72 #"H" - elif t == NPY_INT: f[0] = 105 #"i" - elif t == NPY_UINT: f[0] = 73 #"I" - elif t == NPY_LONG: f[0] = 108 #"l" - elif t == NPY_ULONG: f[0] = 76 #"L" - elif t == NPY_LONGLONG: f[0] = 113 #"q" - elif t == NPY_ULONGLONG: f[0] = 81 #"Q" - elif t == NPY_FLOAT: f[0] = 102 #"f" - elif t == NPY_DOUBLE: f[0] = 100 #"d" - elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" - elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf - elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd - elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg - elif t == NPY_OBJECT: f[0] = 79 #"O" - else: - raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) - f += 1 - else: - # Cython ignores struct boundary information ("T{...}"), - # so don't output it - f = _util_dtypestring(child, f, end, offset) - return f - - -# -# ufunc API -# - -cdef extern from "numpy/ufuncobject.h": - - ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, - npy_intp *, void *) - - ctypedef extern class numpy.ufunc [object PyUFuncObject]: - cdef: - int nin, nout, nargs - int identity - PyUFuncGenericFunction *functions - void **data - int ntypes - int check_return - char *name - char *types - char *doc - void *ptr - PyObject *obj - PyObject *userloops - - cdef enum: - PyUFunc_Zero - PyUFunc_One - PyUFunc_None - UFUNC_ERR_IGNORE - UFUNC_ERR_WARN - UFUNC_ERR_RAISE - UFUNC_ERR_CALL - UFUNC_ERR_PRINT - UFUNC_ERR_LOG - UFUNC_MASK_DIVIDEBYZERO - UFUNC_MASK_OVERFLOW - UFUNC_MASK_UNDERFLOW - UFUNC_MASK_INVALID - UFUNC_SHIFT_DIVIDEBYZERO - UFUNC_SHIFT_OVERFLOW - UFUNC_SHIFT_UNDERFLOW - UFUNC_SHIFT_INVALID - UFUNC_FPE_DIVIDEBYZERO - UFUNC_FPE_OVERFLOW - UFUNC_FPE_UNDERFLOW - UFUNC_FPE_INVALID - UFUNC_ERR_DEFAULT - UFUNC_ERR_DEFAULT2 - - object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *, - void **, char *, int, int, int, int, char *, char *, int) - int PyUFunc_RegisterLoopForType(ufunc, int, - PyUFuncGenericFunction, int *, void *) - int PyUFunc_GenericFunction \ - (ufunc, PyObject *, PyObject *, PyArrayObject **) - void PyUFunc_f_f_As_d_d \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_d_d \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_f_f \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_g_g \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_F_F_As_D_D \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_F_F \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_D_D \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_G_G \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_O_O \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_ff_f_As_dd_d \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_ff_f \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_dd_d \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_gg_g \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_FF_F_As_DD_D \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_DD_D \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_FF_F \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_GG_G \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_OO_O \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_O_O_method \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_OO_O_method \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_On_Om \ - (char **, npy_intp *, npy_intp *, void *) - int PyUFunc_GetPyValues \ - (char *, int *, int *, PyObject **) - int PyUFunc_checkfperr \ - (int, PyObject *, int *) - void PyUFunc_clearfperr() - int PyUFunc_getfperr() - int PyUFunc_handlefperr \ - (int, PyObject *, int, int *) - int PyUFunc_ReplaceLoopBySignature \ - (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *) - object PyUFunc_FromFuncAndDataAndSignature \ - (PyUFuncGenericFunction *, void **, char *, int, int, int, - int, char *, char *, int, char *) - - void import_ufunc() - - -cdef inline void set_array_base(ndarray arr, object base): - cdef PyObject* baseptr - if base is None: - baseptr = NULL - else: - Py_INCREF(base) # important to do this before decref below! - baseptr = base - Py_XDECREF(arr.base) - arr.base = baseptr - -cdef inline object get_array_base(ndarray arr): - if arr.base is NULL: - return None - else: - return arr.base diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index de3486eca3e9b..6c2029fff8a1a 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -75,22 +75,6 @@ PANDAS_INLINE PyObject* char_to_string(char* data) { #endif } -void transfer_object_column(char* dst, char* src, size_t stride, - size_t length) { - size_t i; - size_t sz = sizeof(PyObject*); - - for (i = 0; i < length; ++i) { - // uninitialized data - - // Py_XDECREF(*((PyObject**) dst)); - - memcpy(dst, src, sz); - Py_INCREF(*((PyObject**)dst)); - src += sz; - dst += stride; - } -} void set_array_not_contiguous(PyArrayObject* ao) { ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS); diff --git a/pandas/_libs/src/parser/.gitignore b/pandas/_libs/src/parser/.gitignore deleted file mode 100644 index f07e771a35eec..0000000000000 --- a/pandas/_libs/src/parser/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -!*.c -test* \ No newline at end of file diff --git a/pandas/_libs/src/parser/Makefile b/pandas/_libs/src/parser/Makefile deleted file mode 100644 index ec88eaf44ba15..0000000000000 --- a/pandas/_libs/src/parser/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -PYTHONBASE = /Library/Frameworks/EPD64.framework/Versions/Current -NUMPY_INC = /Library/Frameworks/EPD64.framework/Versions/7.1/lib/python2.7/site-packages/numpy/core/include -PYTHON_INC = -I$(PYTHONBASE)/include/python2.7 -I$(NUMPY_INC) -PYTHON_LINK = -L$(PYTHONBASE)/lib -lpython - -SOURCES = conversions.c parser.c str_to.c - -check-syntax: - gcc -g $(PYTHON_INC) -o /dev/null -S ${CHK_SOURCES} - -test: $(SOURCES) - gcc $(PYTHON_INC) -o test $(SOURCES) - ./test \ No newline at end of file diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 2e4ade209fa38..6e8c220eab6b8 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -1317,21 +1317,6 @@ int parser_trim_buffers(parser_t *self) { return 0; } -void debug_print_parser(parser_t *self) { - int64_t j, line; - char *token; - - for (line = 0; line < self->lines; ++line) { - printf("(Parsed) Line %lld: ", (long long)line); - - for (j = 0; j < self->line_fields[j]; ++j) { - token = self->words[j + self->line_start[line]]; - printf("%s ", token); - } - printf("\n"); - } -} - /* nrows : number of rows to tokenize (or until reach EOF) all : tokenize all the data vs. certain number of rows diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 9462608a26814..63baf91e3c136 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -247,8 +247,6 @@ void parser_del(parser_t *self); void parser_set_default_options(parser_t *self); -void debug_print_parser(parser_t *self); - int tokenize_nrows(parser_t *self, size_t nrows); int tokenize_all_rows(parser_t *self); diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 61e3752a49639..e7f334b267461 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -44,7 +44,6 @@ Numeric decoder derived from from TCL library #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) -#include // NOLINT(build/include_order) #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) @@ -60,6 +59,8 @@ static PyTypeObject *cls_series; static PyTypeObject *cls_index; static PyTypeObject *cls_nat; +npy_int64 get_nat(void) { return NPY_MIN_INT64; } + typedef void *(*PFN_PyTypeToJSON)(JSOBJ obj, JSONTypeContext *ti, void *outValue, size_t *_outLen); diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c7035df8ac15c..81df7981096ba 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- # cython: profile=False -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t, ndarray, float64_t import numpy as np -np.import_array() +cnp.import_array() from cpython cimport PyFloat_Check diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index ebd5fc12775a4..ae52f7dd30165 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -8,10 +8,9 @@ Cython implementations of functions resembling the stdlib calendar module cimport cython from cython cimport Py_ssize_t -import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t, int32_t -np.import_array() +cnp.import_array() # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 11e1787cd77da..a32bfc1f6836c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -5,9 +5,9 @@ cimport cython from cython cimport Py_ssize_t import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t, int32_t, ndarray -np.import_array() +cnp.import_array() import pytz @@ -29,13 +29,13 @@ from np_datetime cimport (check_dts_bounds, from util cimport (is_string_object, is_datetime64_object, - is_integer_object, is_float_object) + is_integer_object, is_float_object, is_array) from timedeltas cimport cast_from_unit from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, get_utcoffset, get_dst_info, - get_timezone, maybe_get_tz) + get_timezone, maybe_get_tz, tz_compare) from parsing import parse_datetime_string from nattype import nat_strings, NaT @@ -45,6 +45,8 @@ from nattype cimport NPY_NAT, checknull_with_nat # Constants cdef int64_t DAY_NS = 86400000000000LL +NS_DTYPE = np.dtype('M8[ns]') +TD_DTYPE = np.dtype('m8[ns]') UTC = pytz.UTC @@ -73,13 +75,14 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1: return ival -def ensure_datetime64ns(ndarray arr): +def ensure_datetime64ns(ndarray arr, copy=True): """ Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]' Parameters ---------- arr : ndarray + copy : boolean, default True Returns ------- @@ -104,6 +107,8 @@ def ensure_datetime64ns(ndarray arr): unit = get_datetime64_unit(arr.flat[0]) if unit == PANDAS_FR_ns: + if copy: + arr = arr.copy() result = arr else: for i in range(n): @@ -117,6 +122,23 @@ def ensure_datetime64ns(ndarray arr): return result +def ensure_timedelta64ns(ndarray arr, copy=True): + """ + Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]' + + Parameters + ---------- + arr : ndarray + copy : boolean, default True + + Returns + ------- + result : ndarray with dtype timedelta64[ns] + + """ + return arr.astype(TD_DTYPE, copy=copy) + + def datetime_to_datetime64(ndarray[object] values): """ Convert ndarray of datetime-like objects to int64 array representing @@ -147,7 +169,7 @@ def datetime_to_datetime64(ndarray[object] values): elif PyDateTime_Check(val): if val.tzinfo is not None: if inferred_tz is not None: - if get_timezone(val.tzinfo) != inferred_tz: + if not tz_compare(val.tzinfo, inferred_tz): raise ValueError('Array must be all same time zone') else: inferred_tz = get_timezone(val.tzinfo) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 18101c834c737..a8a865eec38dd 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -9,9 +9,9 @@ cimport cython from cython cimport Py_ssize_t import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport ndarray, int64_t, int32_t, int8_t -np.import_array() +cnp.import_array() from ccalendar cimport (get_days_in_month, is_leapyear, dayofweek, diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index cce3600371300..abaf8cad09bdb 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -4,10 +4,9 @@ import re cimport cython -import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t -np.import_array() +cnp.import_array() from util cimport is_integer_object, is_string_object diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 683be4c9aa3a8..9f4ef4e515058 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -13,9 +13,9 @@ from cpython.datetime cimport (datetime, PyDateTime_IMPORT import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t -np.import_array() +cnp.import_array() from util cimport (get_nat, is_integer_object, is_float_object, @@ -156,7 +156,7 @@ cdef class _NaT(datetime): neg_other = -other return self + neg_other - elif getattr(other, '_typ', None) in ['period', + elif getattr(other, '_typ', None) in ['period', 'series', 'periodindex', 'dateoffset']: return NotImplemented diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 585c904a601ed..e02818dd818df 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -10,9 +10,9 @@ from cpython.datetime cimport datetime, timedelta, time as dt_time from dateutil.relativedelta import relativedelta import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t -np.import_array() +cnp.import_array() from util cimport is_string_object, is_integer_object @@ -290,27 +290,6 @@ class CacheableOffset(object): _cacheable = True -class EndMixin(object): - # helper for vectorized offsets - - def _end_apply_index(self, i, freq): - """Offsets index to end of Period frequency""" - - off = i.to_perioddelta('D') - - base, mult = get_freq_code(freq) - base_period = i.to_period(base) - if self.n > 0: - # when adding, dates on end roll to next - roll = np.where(base_period.to_timestamp(how='end') == i - off, - self.n, self.n - 1) - else: - roll = self.n - - base = (base_period + roll).to_timestamp(how='end') - return base + off - - # --------------------------------------------------------------------- # Base Classes @@ -327,8 +306,8 @@ class _BaseOffset(object): def __call__(self, other): return self.apply(other) - def __mul__(self, someInt): - return self.__class__(n=someInt * self.n, normalize=self.normalize, + def __mul__(self, other): + return self.__class__(n=other * self.n, normalize=self.normalize, **self.kwds) def __neg__(self): @@ -395,8 +374,8 @@ class _BaseOffset(object): class BaseOffset(_BaseOffset): # Here we add __rfoo__ methods that don't play well with cdef classes - def __rmul__(self, someInt): - return self.__mul__(someInt) + def __rmul__(self, other): + return self.__mul__(other) def __radd__(self, other): return self.__add__(other) @@ -675,11 +654,8 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): months_to_roll = months compare_day = get_firstbday(dts.year, dts.month) - if months_to_roll > 0 and dts.day < compare_day: - months_to_roll -= 1 - elif months_to_roll <= 0 and dts.day > compare_day: - # as if rolled forward already - months_to_roll += 1 + months_to_roll = roll_convention(dts.day, months_to_roll, + compare_day) dts.year = year_add_months(dts, months_to_roll) dts.month = month_add_months(dts, months_to_roll) @@ -698,11 +674,8 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): months_to_roll = months compare_day = get_lastbday(dts.year, dts.month) - if months_to_roll > 0 and dts.day < compare_day: - months_to_roll -= 1 - elif months_to_roll <= 0 and dts.day > compare_day: - # as if rolled forward already - months_to_roll += 1 + months_to_roll = roll_convention(dts.day, months_to_roll, + compare_day) dts.year = year_add_months(dts, months_to_roll) dts.month = month_add_months(dts, months_to_roll) @@ -823,7 +796,7 @@ cpdef int get_day_of_month(datetime other, day_opt) except? -1: raise ValueError(day_opt) -cpdef int roll_convention(int other, int n, int compare): +cpdef int roll_convention(int other, int n, int compare) nogil: """ Possibly increment or decrement the number of periods to shift based on rollforward/rollbackward conventions. @@ -847,29 +820,6 @@ cpdef int roll_convention(int other, int n, int compare): return n -cpdef int roll_monthday(datetime other, int n, datetime compare): - """ - Possibly increment or decrement the number of periods to shift - based on rollforward/rollbackward conventions. - - Parameters - ---------- - other : datetime - n : number of periods to increment, before adjusting for rolling - compare : datetime - - Returns - ------- - n : int number of periods to increment - """ - if n > 0 and other < compare: - n -= 1 - elif n <= 0 and other > compare: - # as if rolled forward already - n += 1 - return n - - cpdef int roll_qtrday(datetime other, int n, int month, object day_opt, int modby=3) except? -1: """ @@ -890,6 +840,8 @@ cpdef int roll_qtrday(datetime other, int n, int month, object day_opt, ------- n : int number of periods to increment """ + cdef: + int months_since # TODO: Merge this with roll_yearday by setting modby=12 there? # code de-duplication versus perf hit? # TODO: with small adjustments this could be used in shift_quarters diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index a9a5500cd7447..09aeff852a0f2 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -18,9 +18,9 @@ from datetime import datetime import time import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t, ndarray -np.import_array() +cnp.import_array() # Avoid import from outside _libs if sys.version_info.major == 2: diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 6eb867377bf54..b166babe5992c 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -4,9 +4,9 @@ from cython cimport Py_ssize_t import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport ndarray, int64_t -np.import_array() +cnp.import_array() from util cimport is_string_object, get_nat diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 2921291973373..e7dabb94f8975 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -27,7 +27,6 @@ from cpython cimport PyFloat_Check cimport cython import numpy as np -cimport numpy as np from numpy cimport ndarray, int64_t from datetime import date as datetime_date diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index af3fa738fad14..1e6ea7794dfff 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # cython: profile=False import collections -import re import sys cdef bint PY3 = (sys.version_info[0] >= 3) @@ -11,9 +10,9 @@ from cython cimport Py_ssize_t from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t, ndarray -np.import_array() +cnp.import_array() from cpython.datetime cimport (datetime, timedelta, PyDateTime_CheckExact, @@ -236,6 +235,14 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: return (base *m) + (frac *m) +cdef inline _decode_if_necessary(object ts): + # decode ts if necessary + if not PyUnicode_Check(ts) and not PY3: + ts = str(ts).decode('utf-8') + + return ts + + cdef inline parse_timedelta_string(object ts): """ Parse a regular format timedelta string. Return an int64_t (in ns) @@ -258,9 +265,7 @@ cdef inline parse_timedelta_string(object ts): if len(ts) == 0 or ts in nat_strings: return NPY_NAT - # decode ts if necessary - if not PyUnicode_Check(ts) and not PY3: - ts = str(ts).decode('utf-8') + ts = _decode_if_necessary(ts) for c in ts: @@ -507,26 +512,14 @@ def _binary_op_method_timedeltalike(op, name): # ---------------------------------------------------------------------- # Timedelta Construction -iso_pater = re.compile(r"""P - (?P-?[0-9]*)DT - (?P[0-9]{1,2})H - (?P[0-9]{1,2})M - (?P[0-9]{0,2}) - (\. - (?P[0-9]{1,3}) - (?P[0-9]{0,3}) - (?P[0-9]{0,3}) - )?S""", re.VERBOSE) - - -cdef int64_t parse_iso_format_string(object iso_fmt) except? -1: +cdef inline int64_t parse_iso_format_string(object ts) except? -1: """ Extracts and cleanses the appropriate values from a match object with groups for each component of an ISO 8601 duration Parameters ---------- - iso_fmt: + ts: ISO 8601 Duration formatted string Returns @@ -537,25 +530,93 @@ cdef int64_t parse_iso_format_string(object iso_fmt) except? -1: Raises ------ ValueError - If ``iso_fmt`` cannot be parsed + If ``ts`` cannot be parsed """ - cdef int64_t ns = 0 + cdef: + unicode c + int64_t result = 0, r + int p=0 + object dec_unit = 'ms', err_msg + bint have_dot=0, have_value=0, neg=0 + list number=[], unit=[] - match = re.match(iso_pater, iso_fmt) - if match: - match_dict = match.groupdict(default='0') - for comp in ['milliseconds', 'microseconds', 'nanoseconds']: - match_dict[comp] = '{:0<3}'.format(match_dict[comp]) + ts = _decode_if_necessary(ts) - for k, v in match_dict.items(): - ns += timedelta_from_spec(v, '0', k) + err_msg = "Invalid ISO 8601 Duration format - {}".format(ts) - else: - raise ValueError("Invalid ISO 8601 Duration format - " - "{}".format(iso_fmt)) + for c in ts: + # number (ascii codes) + if ord(c) >= 48 and ord(c) <= 57: - return ns + have_value = 1 + if have_dot: + if p == 3 and dec_unit != 'ns': + unit.append(dec_unit) + if dec_unit == 'ms': + dec_unit = 'us' + elif dec_unit == 'us': + dec_unit = 'ns' + p = 0 + p += 1 + + if not len(unit): + number.append(c) + else: + # if in days, pop trailing T + if unit[-1] == 'T': + unit.pop() + elif 'H' in unit or 'M' in unit: + if len(number) > 2: + raise ValueError(err_msg) + r = timedelta_from_spec(number, '0', unit) + result += timedelta_as_neg(r, neg) + + neg = 0 + unit, number = [], [c] + else: + if c == 'P': + pass # ignore leading character + elif c == '-': + if neg or have_value: + raise ValueError(err_msg) + else: + neg = 1 + elif c in ['D', 'T', 'H', 'M']: + unit.append(c) + elif c == '.': + # append any seconds + if len(number): + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) + unit, number = [], [] + have_dot = 1 + elif c == 'S': + if have_dot: # ms, us, or ns + if not len(number) or p > 3: + raise ValueError(err_msg) + # pad to 3 digits as required + pad = 3 - p + while pad > 0: + number.append('0') + pad -= 1 + + r = timedelta_from_spec(number, '0', dec_unit) + result += timedelta_as_neg(r, neg) + else: # seconds + if len(number) <= 2: + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) + else: + raise ValueError(err_msg) + else: + raise ValueError(err_msg) + + if not have_value: + # Received string only - never parsed any values + raise ValueError(err_msg) + + return result cdef _to_py_int_float(v): @@ -1031,13 +1092,27 @@ class Timedelta(_Timedelta): __rdiv__ = __rtruediv__ def __floordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if hasattr(other, '_typ'): + # Series, DataFrame, ... + return NotImplemented + if hasattr(other, 'dtype'): - # work with i8 - other = other.astype('m8[ns]').astype('i8') - return self.value // other + if other.dtype.kind == 'm': + # also timedelta-like + return _broadcast_floordiv_td64(self.value, other, _floordiv) + elif other.dtype.kind in ['i', 'u', 'f']: + if other.ndim == 0: + return Timedelta(self.value // other) + else: + return self.to_timedelta64() // other - elif is_integer_object(other): - # integers only + raise TypeError('Invalid dtype {dtype} for ' + '{op}'.format(dtype=other.dtype, + op='__floordiv__')) + + elif is_integer_object(other) or is_float_object(other): return Timedelta(self.value // other, unit='ns') elif not _validate_ops_compat(other): @@ -1049,20 +1124,79 @@ class Timedelta(_Timedelta): return self.value // other.value def __rfloordiv__(self, other): - if hasattr(other, 'dtype'): - # work with i8 - other = other.astype('m8[ns]').astype('i8') - return other // self.value + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if hasattr(other, '_typ'): + # Series, DataFrame, ... + return NotImplemented + if hasattr(other, 'dtype'): + if other.dtype.kind == 'm': + # also timedelta-like + return _broadcast_floordiv_td64(self.value, other, _rfloordiv) + raise TypeError('Invalid dtype {dtype} for ' + '{op}'.format(dtype=other.dtype, + op='__floordiv__')) + + if is_float_object(other) and util._checknull(other): + # i.e. np.nan + return NotImplemented elif not _validate_ops_compat(other): return NotImplemented other = Timedelta(other) if other is NaT: - return NaT + return np.nan return other.value // self.value +cdef _floordiv(int64_t value, right): + return value // right + + +cdef _rfloordiv(int64_t value, right): + # analogous to referencing operator.div, but there is no operator.rfloordiv + return right // value + + +cdef _broadcast_floordiv_td64(int64_t value, object other, + object (*operation)(int64_t value, + object right)): + """Boilerplate code shared by Timedelta.__floordiv__ and + Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. + + Parameters + ---------- + value : int64_t; `self.value` from a Timedelta object + other : object + operation : function, either _floordiv or _rfloordiv + + Returns + ------- + result : varies based on `other` + """ + # assumes other.dtype.kind == 'm', i.e. other is timedelta-like + cdef: + int ndim = getattr(other, 'ndim', -1) + + # We need to watch out for np.timedelta64('NaT'). + mask = other.view('i8') == NPY_NAT + + if ndim == 0: + if mask: + return np.nan + + return operation(value, other.astype('m8[ns]').astype('i8')) + + else: + res = operation(value, other.astype('m8[ns]').astype('i8')) + + if mask.any(): + res = res.astype('f8') + res[mask] = np.nan + return res + + # resolution in ns -Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) +Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) Timedelta.max = Timedelta(np.iinfo(np.int64).max) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c7744bf9db58e..b9be9c16eb6c3 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -6,9 +6,9 @@ from cpython cimport (PyObject_RichCompareBool, PyObject_RichCompare, Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE) import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport int64_t, int32_t, ndarray -np.import_array() +cnp.import_array() from datetime import time as datetime_time from cpython.datetime cimport (datetime, @@ -33,7 +33,8 @@ from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, is_leapyear) from timedeltas import Timedelta from timedeltas cimport delta_to_nanoseconds -from timezones cimport get_timezone, is_utc, maybe_get_tz +from timezones cimport ( + get_timezone, is_utc, maybe_get_tz, treat_tz_as_pytz, tz_compare) # ---------------------------------------------------------------------- # Constants @@ -266,7 +267,7 @@ cdef class _Timestamp(datetime): other = Timestamp(other) # validate tz's - if get_timezone(self.tzinfo) != get_timezone(other.tzinfo): + if not tz_compare(self.tzinfo, other.tzinfo): raise TypeError("Timestamp subtraction must have the " "same timezones or no timezones") @@ -389,9 +390,6 @@ class Timestamp(_Timestamp): Unit used for conversion if ts_input is of type int or float. The valid values are 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For example, 's' means seconds and 'ms' means milliseconds. - offset : str, DateOffset - Deprecated, use freq - year, month, day : int .. versionadded:: 0.19.0 hour, minute, second, microsecond : int, optional, default 0 @@ -922,8 +920,18 @@ class Timestamp(_Timestamp): _tzinfo = tzinfo # reconstruct & check bounds - ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tzinfo=_tzinfo) + if _tzinfo is not None and treat_tz_as_pytz(_tzinfo): + # replacing across a DST boundary may induce a new tzinfo object + # see GH#18319 + ts_input = _tzinfo.localize(datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, + dts.us)) + _tzinfo = ts_input.tzinfo + else: + ts_input = datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, + tzinfo=_tzinfo) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 95e0474b3a174..67353f3eec614 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -7,6 +7,7 @@ cdef bint is_tzlocal(object tz) cdef bint treat_tz_as_pytz(object tz) cdef bint treat_tz_as_dateutil(object tz) +cpdef bint tz_compare(object start, object end) cpdef object get_timezone(object tz) cpdef object maybe_get_tz(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index fdcf40337fab9..c22e0b8e555a3 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -18,9 +18,9 @@ UTC = pytz.utc import numpy as np -cimport numpy as np +cimport numpy as cnp from numpy cimport ndarray, int64_t -np.import_array() +cnp.import_array() # ---------------------------------------------------------------------- from util cimport is_string_object, is_integer_object, get_nat @@ -275,7 +275,7 @@ cdef object get_dst_info(object tz): def infer_tzinfo(start, end): if start is not None and end is not None: tz = start.tzinfo - if not (get_timezone(tz) == get_timezone(end.tzinfo)): + if not tz_compare(tz, end.tzinfo): msg = 'Inputs must both have the same timezone, {tz1} != {tz2}' raise AssertionError(msg.format(tz1=tz, tz2=end.tzinfo)) elif start is not None: @@ -285,3 +285,32 @@ def infer_tzinfo(start, end): else: tz = None return tz + + +cpdef bint tz_compare(object start, object end): + """ + Compare string representations of timezones + + The same timezone can be represented as different instances of + timezones. For example + `` and + `` are essentially same + timezones but aren't evaluted such, but the string representation + for both of these is `'Europe/Paris'`. + + This exists only to add a notion of equality to pytz-style zones + that is compatible with the notion of equality expected of tzinfo + subclasses. + + Parameters + ---------- + start : tzinfo + end : tzinfo + + Returns: + ------- + compare : bint + + """ + # GH 18523 + return get_timezone(start) == get_timezone(end) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index e46bf24c36f18..cacb073da581c 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1,43 +1,40 @@ # cython: profile=False # cython: boundscheck=False, wraparound=False, cdivision=True +cimport cython from cython cimport Py_ssize_t -cimport numpy as np +from libc.stdlib cimport malloc, free + import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, double_t, int64_t, float64_t +cnp.import_array() -cimport cython -np.import_array() +cdef extern from "../src/headers/math.h": + int signbit(double) nogil + double sqrt(double x) nogil cimport util - -from libc.stdlib cimport malloc, free - -from numpy cimport ndarray, double_t, int64_t, float64_t +from util cimport numeric from skiplist cimport (IndexableSkiplist, node_t, skiplist_t, skiplist_init, skiplist_destroy, skiplist_get, skiplist_insert, skiplist_remove) -cdef np.float32_t MINfloat32 = np.NINF -cdef np.float64_t MINfloat64 = np.NINF +cdef cnp.float32_t MINfloat32 = np.NINF +cdef cnp.float64_t MINfloat64 = np.NINF -cdef np.float32_t MAXfloat32 = np.inf -cdef np.float64_t MAXfloat64 = np.inf +cdef cnp.float32_t MAXfloat32 = np.inf +cdef cnp.float64_t MAXfloat64 = np.inf cdef double NaN = np.NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b -from util cimport numeric - -cdef extern from "../src/headers/math.h": - int signbit(double) nogil - double sqrt(double x) nogil - # Cython implementations of rolling sum, mean, variance, skewness, # other statistical moment functions diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py index fcbf42f6dabc4..afff059e7b601 100644 --- a/pandas/api/__init__.py +++ b/pandas/api/__init__.py @@ -1 +1,2 @@ """ public toolkit API """ +from . import types, extensions # noqa diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py new file mode 100644 index 0000000000000..64f5e8fb939a4 --- /dev/null +++ b/pandas/api/extensions/__init__.py @@ -0,0 +1,4 @@ +"""Public API for extending panadas objects.""" +from pandas.core.accessor import (register_dataframe_accessor, # noqa + register_index_accessor, + register_series_accessor) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 07b34961ce25d..f651fbbf56316 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -108,7 +108,11 @@ def load_reduce(self): ('pandas.tseries.index', 'DatetimeIndex'): ('pandas.core.indexes.datetimes', 'DatetimeIndex'), ('pandas.tseries.period', 'PeriodIndex'): - ('pandas.core.indexes.period', 'PeriodIndex') + ('pandas.core.indexes.period', 'PeriodIndex'), + + # 19269, arrays moving + ('pandas.core.categorical', 'Categorical'): + ('pandas.core.arrays', 'Categorical'), } diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py index f46487cfa1b79..d194cd2404c9d 100644 --- a/pandas/computation/expressions.py +++ b/pandas/computation/expressions.py @@ -2,6 +2,10 @@ def set_use_numexpr(v=True): + """ + .. deprecated:: 0.20.0 + Use ``pandas.set_option('compute.use_numexpr', v)`` instead. + """ warnings.warn("pandas.computation.expressions.set_use_numexpr is " "deprecated and will be removed in a future version.\n" "you can toggle usage of numexpr via " diff --git a/pandas/conftest.py b/pandas/conftest.py index 4cf5c9da44697..4fe66d4cf7e1f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -4,6 +4,7 @@ import numpy import pandas import dateutil +import pandas.util._test_decorators as td def pytest_addoption(parser): @@ -73,3 +74,22 @@ def ip(): is_dateutil_gt_261 = pytest.mark.skipif( LooseVersion(dateutil.__version__) <= LooseVersion('2.6.1'), reason="dateutil stable version") + + +@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip', + pytest.param('xz', marks=td.skip_if_no_lzma)]) +def compression(request): + """ + Fixture for trying common compression types in compression tests + """ + return request.param + + +@pytest.fixture(params=[None, 'gzip', 'bz2', + pytest.param('xz', marks=td.skip_if_no_lzma)]) +def compression_no_zip(request): + """ + Fixture for trying common compression types in compression tests + except zip + """ + return request.param diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 73e01fbf17205..96bf628c8d7ff 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -5,7 +5,9 @@ that can be mixed into or pinned onto other pandas classes. """ -from pandas.core.common import AbstractMethodError +import warnings + +from pandas.util._decorators import Appender class DirNamesMixin(object): @@ -37,38 +39,9 @@ def __dir__(self): return sorted(rv) -class AccessorProperty(object): - """Descriptor for implementing accessor properties like Series.str - """ - - def __init__(self, accessor_cls, construct_accessor=None): - self.accessor_cls = accessor_cls - self.construct_accessor = (construct_accessor or - accessor_cls._make_accessor) - self.__doc__ = accessor_cls.__doc__ - - def __get__(self, instance, owner=None): - if instance is None: - # this ensures that Series.str. is well defined - return self.accessor_cls - return self.construct_accessor(instance) - - def __set__(self, instance, value): - raise AttributeError("can't set attribute") - - def __delete__(self, instance): - raise AttributeError("can't delete attribute") - - class PandasDelegate(object): """ an abstract base class for delegating methods/properties """ - @classmethod - def _make_accessor(cls, data): - raise AbstractMethodError("_make_accessor should be implemented" - "by subclass and return an instance" - "of `cls`.") - def _delegate_property_get(self, name, *args, **kwargs): raise TypeError("You cannot access the " "property {name}".format(name=name)) @@ -129,3 +102,138 @@ def f(self, *args, **kwargs): # don't overwrite existing methods/properties if overwrite or not hasattr(cls, name): setattr(cls, name, f) + + +# Ported with modifications from xarray +# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py +# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors +# 2. We use a UserWarning instead of a custom Warning + +class CachedAccessor(object): + """Custom property-like object (descriptor) for caching accessors. + + Parameters + ---------- + name : str + The namespace this will be accessed under, e.g. ``df.foo`` + accessor : cls + The class with the extension methods. The class' __init__ method + should expect one of a ``Series``, ``DataFrame`` or ``Index`` as + the single argument ``data`` + """ + def __init__(self, name, accessor): + self._name = name + self._accessor = accessor + + def __get__(self, obj, cls): + if obj is None: + # we're accessing the attribute of the class, i.e., Dataset.geo + return self._accessor + accessor_obj = self._accessor(obj) + # Replace the property with the accessor object. Inspired by: + # http://www.pydanny.com/cached-property.html + # We need to use object.__setattr__ because we overwrite __setattr__ on + # NDFrame + object.__setattr__(obj, self._name, accessor_obj) + return accessor_obj + + +def _register_accessor(name, cls): + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + 'registration of accessor {!r} under name {!r} for type ' + '{!r} is overriding a preexisting attribute with the same ' + 'name.'.format(accessor, name, cls), + UserWarning, + stacklevel=2) + setattr(cls, name, CachedAccessor(name, accessor)) + return accessor + return decorator + + +_doc = """Register a custom accessor on %(klass)s objects. + +Parameters +---------- +name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + +Notes +----- +When accessed, your accessor will be initialized with the pandas object +the user is interacting with. So the signature must be + +.. code-block:: python + + def __init__(self, pandas_object): + +For consistency with pandas methods, you should raise an ``AttributeError`` +if the data passed to your accessor has an incorrect dtype. + +>>> pd.Series(['a', 'b']).dt +Traceback (most recent call last): +... +AttributeError: Can only use .dt accessor with datetimelike values + +Examples +-------- + +In your library code:: + + import pandas as pd + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Back in an interactive IPython session: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +See also +-------- +%(others)s +""" + + +@Appender(_doc % dict(klass="DataFrame", + others=("register_series_accessor, " + "register_index_accessor"))) +def register_dataframe_accessor(name): + from pandas import DataFrame + return _register_accessor(name, DataFrame) + + +@Appender(_doc % dict(klass="Series", + others=("register_dataframe_accessor, " + "register_index_accessor"))) +def register_series_accessor(name): + from pandas import Series + return _register_accessor(name, Series) + + +@Appender(_doc % dict(klass="Index", + others=("register_dataframe_accessor, " + "register_series_accessor"))) +def register_index_accessor(name): + from pandas import Index + return _register_accessor(name, Index) diff --git a/pandas/core/api.py b/pandas/core/api.py index b228a97c99074..aa37ddffa1156 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -6,7 +6,7 @@ from pandas.core.algorithms import factorize, unique, value_counts from pandas.core.dtypes.missing import isna, isnull, notna, notnull -from pandas.core.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.groupby import Grouper from pandas.io.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2f43087f7dff9..4cdec54b9a07a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1,6 +1,6 @@ import numpy as np from pandas import compat -from pandas._libs import lib +from pandas._libs import reduction from pandas.core.dtypes.common import ( is_extension_type, is_sequence) @@ -114,7 +114,7 @@ def apply_empty_result(self): def apply_raw(self): try: - result = lib.reduce(self.values, self.f, axis=self.axis) + result = reduction.reduce(self.values, self.f, axis=self.axis) except Exception: result = np.apply_along_axis(self.f, self.axis, self.values) @@ -150,10 +150,10 @@ def apply_standard(self): try: labels = self.agg_axis - result = lib.reduce(values, self.f, - axis=self.axis, - dummy=dummy, - labels=labels) + result = reduction.reduce(values, self.f, + axis=self.axis, + dummy=dummy, + labels=labels) return Series(result, index=labels) except Exception: pass diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py new file mode 100644 index 0000000000000..ee32b12f0e712 --- /dev/null +++ b/pandas/core/arrays/__init__.py @@ -0,0 +1 @@ +from .categorical import Categorical # noqa diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py new file mode 100644 index 0000000000000..b50e01b0fb55a --- /dev/null +++ b/pandas/core/arrays/categorical.py @@ -0,0 +1,2330 @@ +# pylint: disable=E1101,W0232 + +import numpy as np +from warnings import warn +import types + +from pandas import compat +from pandas.compat import u, lzip +from pandas._libs import lib, algos as libalgos + +from pandas.core.dtypes.generic import ( + ABCSeries, ABCIndexClass, ABCCategoricalIndex) +from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.cast import ( + maybe_infer_to_datetimelike, + coerce_indexer_dtype) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.common import ( + _ensure_int64, + _ensure_object, + _ensure_platform_int, + is_dtype_equal, + is_datetimelike, + is_datetime64_dtype, + is_timedelta64_dtype, + is_categorical, + is_categorical_dtype, + is_list_like, is_sequence, + is_scalar, + is_dict_like) + +from pandas.core.algorithms import factorize, take_1d, unique1d +from pandas.core.accessor import PandasDelegate +from pandas.core.base import (PandasObject, + NoNewAttributesMixin, _shared_docs) +import pandas.core.common as com +from pandas.core.missing import interpolate_2d +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + Appender, cache_readonly, deprecate_kwarg, Substitution) + +from pandas.io.formats.terminal import get_terminal_size +from pandas.util._validators import validate_bool_kwarg +from pandas.core.config import get_option + + +def _cat_compare_op(op): + def f(self, other): + # On python2, you can usually compare any type to any type, and + # Categoricals can be seen as a custom type, but having different + # results depending whether categories are the same or not is kind of + # insane, so be a bit stricter here and use the python3 idea of + # comparing only things of equal type. + if not self.ordered: + if op in ['__lt__', '__gt__', '__le__', '__ge__']: + raise TypeError("Unordered Categoricals can only compare " + "equality or not") + if isinstance(other, Categorical): + # Two Categoricals can only be be compared if the categories are + # the same (maybe up to ordering, depending on ordered) + + msg = ("Categoricals can only be compared if " + "'categories' are the same.") + if len(self.categories) != len(other.categories): + raise TypeError(msg + " Categories are different lengths") + elif (self.ordered and not (self.categories == + other.categories).all()): + raise TypeError(msg) + elif not set(self.categories) == set(other.categories): + raise TypeError(msg) + + if not (self.ordered == other.ordered): + raise TypeError("Categoricals can only be compared if " + "'ordered' is the same") + if not self.ordered and not self.categories.equals( + other.categories): + # both unordered and different order + other_codes = _get_codes_for_values(other, self.categories) + else: + other_codes = other._codes + + na_mask = (self._codes == -1) | (other_codes == -1) + f = getattr(self._codes, op) + ret = f(other_codes) + if na_mask.any(): + # In other series, the leads to False, so do that here too + ret[na_mask] = False + return ret + + # Numpy-1.9 and earlier may convert a scalar to a zerodim array during + # comparison operation when second arg has higher priority, e.g. + # + # cat[0] < cat + # + # With cat[0], for example, being ``np.int64(1)`` by the time it gets + # into this function would become ``np.array(1)``. + other = lib.item_from_zerodim(other) + if is_scalar(other): + if other in self.categories: + i = self.categories.get_loc(other) + return getattr(self._codes, op)(i) + else: + if op == '__eq__': + return np.repeat(False, len(self)) + elif op == '__ne__': + return np.repeat(True, len(self)) + else: + msg = ("Cannot compare a Categorical for op {op} with a " + "scalar, which is not a category.") + raise TypeError(msg.format(op=op)) + else: + + # allow categorical vs object dtype array comparisons for equality + # these are only positional comparisons + if op in ['__eq__', '__ne__']: + return getattr(np.array(self), op)(np.array(other)) + + msg = ("Cannot compare a Categorical for op {op} with type {typ}." + "\nIf you want to compare values, use 'np.asarray(cat) " + " other'.") + raise TypeError(msg.format(op=op, typ=type(other))) + + f.__name__ = op + + return f + + +def _maybe_to_categorical(array): + """ + Coerce to a categorical if a series is given. + + Internal use ONLY. + """ + if isinstance(array, (ABCSeries, ABCCategoricalIndex)): + return array._values + elif isinstance(array, np.ndarray): + return Categorical(array) + return array + + +_codes_doc = """The category codes of this categorical. + +Level codes are an array if integer which are the positions of the real +values in the categories array. + +There is not setter, use the other categorical methods and the normal item +setter to change values in the categorical. +""" + + +class Categorical(PandasObject): + """ + Represents a categorical variable in classic R / S-plus fashion + + `Categoricals` can only take on only a limited, and usually fixed, number + of possible values (`categories`). In contrast to statistical categorical + variables, a `Categorical` might have an order, but numerical operations + (additions, divisions, ...) are not possible. + + All values of the `Categorical` are either in `categories` or `np.nan`. + Assigning values outside of `categories` will raise a `ValueError`. Order + is defined by the order of the `categories`, not lexical order of the + values. + + Parameters + ---------- + values : list-like + The values of the categorical. If categories are given, values not in + categories will be replaced with NaN. + categories : Index-like (unique), optional + The unique categories for this categorical. If not given, the + categories are assumed to be the unique values of values. + ordered : boolean, (default False) + Whether or not this categorical is treated as a ordered categorical. + If not given, the resulting categorical will not be ordered. + dtype : CategoricalDtype + An instance of ``CategoricalDtype`` to use for this categorical + + .. versionadded:: 0.21.0 + + Attributes + ---------- + categories : Index + The categories of this categorical + codes : ndarray + The codes (integer positions, which point to the categories) of this + categorical, read only. + ordered : boolean + Whether or not this Categorical is ordered. + dtype : CategoricalDtype + The instance of ``CategoricalDtype`` storing the ``categories`` + and ``ordered``. + + .. versionadded:: 0.21.0 + + Methods + ------- + from_codes + __array__ + + Raises + ------ + ValueError + If the categories do not validate. + TypeError + If an explicit ``ordered=True`` is given but no `categories` and the + `values` are not sortable. + + Examples + -------- + >>> pd.Categorical([1, 2, 3, 1, 2, 3]) + [1, 2, 3, 1, 2, 3] + Categories (3, int64): [1, 2, 3] + + >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) + [a, b, c, a, b, c] + Categories (3, object): [a, b, c] + + Ordered `Categoricals` can be sorted according to the custom order + of the categories and can have a min and max value. + + >>> c = pd.Categorical(['a','b','c','a','b','c'], ordered=True, + ... categories=['c', 'b', 'a']) + >>> c + [a, b, c, a, b, c] + Categories (3, object): [c < b < a] + >>> c.min() + 'c' + + Notes + ----- + See the `user guide + `_ for more. + + See also + -------- + pandas.api.types.CategoricalDtype : Type for categorical data + CategoricalIndex : An Index with an underlying ``Categorical`` + """ + + # For comparisons, so that numpy uses our implementation if the compare + # ops, which raise + __array_priority__ = 1000 + _dtype = CategoricalDtype() + _deprecations = frozenset(['labels']) + _typ = 'categorical' + + def __init__(self, values, categories=None, ordered=None, dtype=None, + fastpath=False): + + # Ways of specifying the dtype (prioritized ordered) + # 1. dtype is a CategoricalDtype + # a.) with known categories, use dtype.categories + # b.) else with Categorical values, use values.dtype + # c.) else, infer from values + # d.) specifying dtype=CategoricalDtype and categories is an error + # 2. dtype is a string 'category' + # a.) use categories, ordered + # b.) use values.dtype + # c.) infer from values + # 3. dtype is None + # a.) use categories, ordered + # b.) use values.dtype + # c.) infer from values + + if dtype is not None: + # The dtype argument takes precedence over values.dtype (if any) + if isinstance(dtype, compat.string_types): + if dtype == 'category': + dtype = CategoricalDtype(categories, ordered) + else: + msg = "Unknown `dtype` {dtype}" + raise ValueError(msg.format(dtype=dtype)) + elif categories is not None or ordered is not None: + raise ValueError("Cannot specify both `dtype` and `categories`" + " or `ordered`.") + + categories = dtype.categories + ordered = dtype.ordered + + elif is_categorical(values): + # If no "dtype" was passed, use the one from "values", but honor + # the "ordered" and "categories" arguments + dtype = values.dtype._from_categorical_dtype(values.dtype, + categories, ordered) + else: + # If dtype=None and values is not categorical, create a new dtype + dtype = CategoricalDtype(categories, ordered) + + # At this point, dtype is always a CategoricalDtype + # if dtype.categories is None, we are inferring + + if fastpath: + self._codes = coerce_indexer_dtype(values, categories) + self._dtype = dtype + return + + # null_mask indicates missing values we want to exclude from inference. + # This means: only missing values in list-likes (not arrays/ndframes). + null_mask = np.array(False) + + # sanitize input + if is_categorical_dtype(values): + if dtype.categories is None: + dtype = CategoricalDtype(values.categories, dtype.ordered) + + elif not isinstance(values, (ABCIndexClass, ABCSeries)): + # _sanitize_array coerces np.nan to a string under certain versions + # of numpy + values = maybe_infer_to_datetimelike(values, convert_dates=True) + if not isinstance(values, np.ndarray): + values = _convert_to_list_like(values) + from pandas.core.series import _sanitize_array + # By convention, empty lists result in object dtype: + if len(values) == 0: + sanitize_dtype = 'object' + else: + sanitize_dtype = None + null_mask = isna(values) + if null_mask.any(): + values = [values[idx] for idx in np.where(~null_mask)[0]] + values = _sanitize_array(values, None, dtype=sanitize_dtype) + + if dtype.categories is None: + try: + codes, categories = factorize(values, sort=True) + except TypeError: + codes, categories = factorize(values, sort=False) + if dtype.ordered: + # raise, as we don't have a sortable data structure and so + # the user should give us one by specifying categories + raise TypeError("'values' is not ordered, please " + "explicitly specify the categories order " + "by passing in a categories argument.") + except ValueError: + + # FIXME + raise NotImplementedError("> 1 ndim Categorical are not " + "supported at this time") + + # we're inferring from values + dtype = CategoricalDtype(categories, dtype.ordered) + + elif is_categorical_dtype(values): + old_codes = (values.cat.codes if isinstance(values, ABCSeries) + else values.codes) + codes = _recode_for_categories(old_codes, values.dtype.categories, + dtype.categories) + + else: + codes = _get_codes_for_values(values, dtype.categories) + + if null_mask.any(): + # Reinsert -1 placeholders for previously removed missing values + full_codes = - np.ones(null_mask.shape, dtype=codes.dtype) + full_codes[~null_mask] = codes + codes = full_codes + + self._dtype = dtype + self._codes = coerce_indexer_dtype(codes, dtype.categories) + + @property + def categories(self): + """The categories of this categorical. + + Setting assigns new values to each category (effectively a rename of + each individual category). + + The assigned value has to be a list-like object. All items must be + unique and the number of items in the new categories must be the same + as the number of items in the old categories. + + Assigning to `categories` is a inplace operation! + + Raises + ------ + ValueError + If the new categories do not validate as categories or if the + number of new categories is unequal the number of old categories + + See also + -------- + rename_categories + reorder_categories + add_categories + remove_categories + remove_unused_categories + set_categories + """ + return self.dtype.categories + + @categories.setter + def categories(self, categories): + new_dtype = CategoricalDtype(categories, ordered=self.ordered) + if (self.dtype.categories is not None and + len(self.dtype.categories) != len(new_dtype.categories)): + raise ValueError("new categories need to have the same number of " + "items as the old categories!") + self._dtype = new_dtype + + @property + def ordered(self): + """Whether the categories have an ordered relationship""" + return self.dtype.ordered + + @property + def dtype(self): + """The :class:`~pandas.api.types.CategoricalDtype` for this instance""" + return self._dtype + + @property + def _constructor(self): + return Categorical + + def copy(self): + """ Copy constructor. """ + return self._constructor(values=self._codes.copy(), + categories=self.categories, + ordered=self.ordered, + fastpath=True) + + def astype(self, dtype, copy=True): + """ + Coerce this type to another dtype + + Parameters + ---------- + dtype : numpy dtype or pandas type + copy : bool, default True + By default, astype always returns a newly allocated object. + If copy is set to False and dtype is categorical, the original + object is returned. + + .. versionadded:: 0.19.0 + + """ + if is_categorical_dtype(dtype): + # GH 10696/18593 + dtype = self.dtype._update_dtype(dtype) + self = self.copy() if copy else self + if dtype == self.dtype: + return self + return self._set_dtype(dtype) + return np.array(self, dtype=dtype, copy=copy) + + @cache_readonly + def ndim(self): + """Number of dimensions of the Categorical """ + return self._codes.ndim + + @cache_readonly + def size(self): + """ return the len of myself """ + return len(self) + + @cache_readonly + def itemsize(self): + """ return the size of a single category """ + return self.categories.itemsize + + def tolist(self): + """ + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + """ + if is_datetimelike(self.categories): + return [com._maybe_box_datetimelike(x) for x in self] + return np.array(self).tolist() + + @property + def base(self): + """ compat, we are always our own object """ + return None + + @classmethod + def _from_inferred_categories(cls, inferred_categories, inferred_codes, + dtype): + """Construct a Categorical from inferred values + + For inferred categories (`dtype` is None) the categories are sorted. + For explicit `dtype`, the `inferred_categories` are cast to the + appropriate type. + + Parameters + ---------- + + inferred_categories : Index + inferred_codes : Index + dtype : CategoricalDtype or 'category' + + Returns + ------- + Categorical + """ + from pandas import Index, to_numeric, to_datetime, to_timedelta + + cats = Index(inferred_categories) + + known_categories = (isinstance(dtype, CategoricalDtype) and + dtype.categories is not None) + + if known_categories: + # Convert to a specialzed type with `dtype` if specified + if dtype.categories.is_numeric(): + cats = to_numeric(inferred_categories, errors='coerce') + elif is_datetime64_dtype(dtype.categories): + cats = to_datetime(inferred_categories, errors='coerce') + elif is_timedelta64_dtype(dtype.categories): + cats = to_timedelta(inferred_categories, errors='coerce') + + if known_categories: + # recode from observation oder to dtype.categories order + categories = dtype.categories + codes = _recode_for_categories(inferred_codes, cats, categories) + elif not cats.is_monotonic_increasing: + # sort categories and recode for unknown categories + unsorted = cats.copy() + categories = cats.sort_values() + codes = _recode_for_categories(inferred_codes, unsorted, + categories) + dtype = CategoricalDtype(categories, ordered=False) + else: + dtype = CategoricalDtype(cats, ordered=False) + codes = inferred_codes + + return cls(codes, dtype=dtype, fastpath=True) + + @classmethod + def from_codes(cls, codes, categories, ordered=False): + """ + Make a Categorical type from codes and categories arrays. + + This constructor is useful if you already have codes and categories and + so do not need the (computation intensive) factorization step, which is + usually done on the constructor. + + If your data does not follow this convention, please use the normal + constructor. + + Parameters + ---------- + codes : array-like, integers + An integer array, where each integer points to a category in + categories or -1 for NaN + categories : index-like + The categories for the categorical. Items need to be unique. + ordered : boolean, (default False) + Whether or not this categorical is treated as a ordered + categorical. If not given, the resulting categorical will be + unordered. + """ + try: + codes = np.asarray(codes, np.int64) + except (ValueError, TypeError): + raise ValueError( + "codes need to be convertible to an arrays of integers") + + categories = CategoricalDtype._validate_categories(categories) + + if len(codes) and (codes.max() >= len(categories) or codes.min() < -1): + raise ValueError("codes need to be between -1 and " + "len(categories)-1") + + return cls(codes, categories=categories, ordered=ordered, + fastpath=True) + + _codes = None + + def _get_codes(self): + """ Get the codes. + + Returns + ------- + codes : integer array view + A non writable view of the `codes` array. + """ + v = self._codes.view() + v.flags.writeable = False + return v + + def _set_codes(self, codes): + """ + Not settable by the user directly + """ + raise ValueError("cannot set Categorical codes directly") + + codes = property(fget=_get_codes, fset=_set_codes, doc=_codes_doc) + + def _set_categories(self, categories, fastpath=False): + """ Sets new categories inplace + + Parameters + ---------- + fastpath : boolean (default: False) + Don't perform validation of the categories for uniqueness or nulls + + Examples + -------- + >>> c = Categorical(['a', 'b']) + >>> c + [a, b] + Categories (2, object): [a, b] + + >>> c._set_categories(pd.Index(['a', 'c'])) + >>> c + [a, c] + Categories (2, object): [a, c] + """ + + if fastpath: + new_dtype = CategoricalDtype._from_fastpath(categories, + self.ordered) + else: + new_dtype = CategoricalDtype(categories, ordered=self.ordered) + if (not fastpath and self.dtype.categories is not None and + len(new_dtype.categories) != len(self.dtype.categories)): + raise ValueError("new categories need to have the same number of " + "items than the old categories!") + + self._dtype = new_dtype + + def _codes_for_groupby(self, sort): + """ + If sort=False, return a copy of self, coded with categories as + returned by .unique(), followed by any categories not appearing in + the data. If sort=True, return self. + + This method is needed solely to ensure the categorical index of the + GroupBy result has categories in the order of appearance in the data + (GH-8868). + + Parameters + ---------- + sort : boolean + The value of the sort parameter groupby was called with. + + Returns + ------- + Categorical + If sort=False, the new categories are set to the order of + appearance in codes (unless ordered=True, in which case the + original order is preserved), followed by any unrepresented + categories in the original order. + """ + + # Already sorted according to self.categories; all is fine + if sort: + return self + + # sort=False should order groups in as-encountered order (GH-8868) + cat = self.unique() + + # But for groupby to work, all categories should be present, + # including those missing from the data (GH-13179), which .unique() + # above dropped + cat.add_categories( + self.categories[~self.categories.isin(cat.categories)], + inplace=True) + + return self.reorder_categories(cat.categories) + + def _set_dtype(self, dtype): + """Internal method for directly updating the CategoricalDtype + + Parameters + ---------- + dtype : CategoricalDtype + + Notes + ----- + We don't do any validation here. It's assumed that the dtype is + a (valid) instance of `CategoricalDtype`. + """ + codes = _recode_for_categories(self.codes, self.categories, + dtype.categories) + return type(self)(codes, dtype=dtype, fastpath=True) + + def set_ordered(self, value, inplace=False): + """ + Sets the ordered attribute to the boolean value + + Parameters + ---------- + value : boolean to set whether this categorical is ordered (True) or + not (False) + inplace : boolean (default: False) + Whether or not to set the ordered attribute inplace or return a copy + of this categorical with ordered set to the value + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + new_dtype = CategoricalDtype(self.categories, ordered=value) + cat = self if inplace else self.copy() + cat._dtype = new_dtype + if not inplace: + return cat + + def as_ordered(self, inplace=False): + """ + Sets the Categorical to be ordered + + Parameters + ---------- + inplace : boolean (default: False) + Whether or not to set the ordered attribute inplace or return a copy + of this categorical with ordered set to True + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + return self.set_ordered(True, inplace=inplace) + + def as_unordered(self, inplace=False): + """ + Sets the Categorical to be unordered + + Parameters + ---------- + inplace : boolean (default: False) + Whether or not to set the ordered attribute inplace or return a copy + of this categorical with ordered set to False + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + return self.set_ordered(False, inplace=inplace) + + def set_categories(self, new_categories, ordered=None, rename=False, + inplace=False): + """ Sets the categories to the specified new_categories. + + `new_categories` can include new categories (which will result in + unused categories) or remove old categories (which results in values + set to NaN). If `rename==True`, the categories will simple be renamed + (less or more items than in old categories will result in values set to + NaN or in unused categories respectively). + + This method can be used to perform more than one action of adding, + removing, and reordering simultaneously and is therefore faster than + performing the individual steps via the more specialised methods. + + On the other hand this methods does not do checks (e.g., whether the + old categories are included in the new categories on a reorder), which + can result in surprising changes, for example when using special string + dtypes on python3, which does not considers a S1 string equal to a + single char python string. + + Raises + ------ + ValueError + If new_categories does not validate as categories + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : boolean, (default: False) + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + rename : boolean (default: False) + Whether or not the new_categories should be considered as a rename + of the old categories or as reordered categories. + inplace : boolean (default: False) + Whether or not to reorder the categories inplace or return a copy of + this categorical with reordered categories. + + Returns + ------- + cat : Categorical with reordered categories or None if inplace. + + See also + -------- + rename_categories + reorder_categories + add_categories + remove_categories + remove_unused_categories + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + if ordered is None: + ordered = self.dtype.ordered + new_dtype = CategoricalDtype(new_categories, ordered=ordered) + + cat = self if inplace else self.copy() + if rename: + if (cat.dtype.categories is not None and + len(new_dtype.categories) < len(cat.dtype.categories)): + # remove all _codes which are larger and set to -1/NaN + self._codes[self._codes >= len(new_dtype.categories)] = -1 + else: + codes = _recode_for_categories(self.codes, self.categories, + new_dtype.categories) + cat._codes = codes + cat._dtype = new_dtype + + if not inplace: + return cat + + def rename_categories(self, new_categories, inplace=False): + """ Renames categories. + + Raises + ------ + ValueError + If new categories are list-like and do not have the same number of + items than the current categories or do not validate as categories + + Parameters + ---------- + new_categories : list-like, dict-like or callable + + * list-like: all items must be unique and the number of items in + the new categories must match the existing number of categories. + + * dict-like: specifies a mapping from + old categories to new. Categories not contained in the mapping + are passed through and extra categories in the mapping are + ignored. + + .. versionadded:: 0.21.0 + + * callable : a callable that is called on all items in the old + categories and whose return values comprise the new categories. + + .. versionadded:: 0.23.0 + + .. warning:: + + Currently, Series are considered list like. In a future version + of pandas they'll be considered dict-like. + + inplace : boolean (default: False) + Whether or not to rename the categories inplace or return a copy of + this categorical with renamed categories. + + Returns + ------- + cat : Categorical or None + With ``inplace=False``, the new categorical is returned. + With ``inplace=True``, there is no return value. + + See also + -------- + reorder_categories + add_categories + remove_categories + remove_unused_categories + set_categories + + Examples + -------- + >>> c = Categorical(['a', 'a', 'b']) + >>> c.rename_categories([0, 1]) + [0, 0, 1] + Categories (2, int64): [0, 1] + + For dict-like ``new_categories``, extra keys are ignored and + categories not in the dictionary are passed through + + >>> c.rename_categories({'a': 'A', 'c': 'C'}) + [A, A, b] + Categories (2, object): [A, b] + + You may also provide a callable to create the new categories + + >>> c.rename_categories(lambda x: x.upper()) + [A, A, B] + Categories (2, object): [A, B] + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + cat = self if inplace else self.copy() + + if isinstance(new_categories, ABCSeries): + msg = ("Treating Series 'new_categories' as a list-like and using " + "the values. In a future version, 'rename_categories' will " + "treat Series like a dictionary.\n" + "For dict-like, use 'new_categories.to_dict()'\n" + "For list-like, use 'new_categories.values'.") + warn(msg, FutureWarning, stacklevel=2) + new_categories = list(new_categories) + + if is_dict_like(new_categories): + cat.categories = [new_categories.get(item, item) + for item in cat.categories] + elif callable(new_categories): + cat.categories = [new_categories(item) for item in cat.categories] + else: + cat.categories = new_categories + if not inplace: + return cat + + def reorder_categories(self, new_categories, ordered=None, inplace=False): + """ Reorders categories as specified in new_categories. + + `new_categories` need to include all old categories and no new category + items. + + Raises + ------ + ValueError + If the new categories do not contain all old category items or any + new ones + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : boolean, optional + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + inplace : boolean (default: False) + Whether or not to reorder the categories inplace or return a copy of + this categorical with reordered categories. + + Returns + ------- + cat : Categorical with reordered categories or None if inplace. + + See also + -------- + rename_categories + add_categories + remove_categories + remove_unused_categories + set_categories + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + if set(self.dtype.categories) != set(new_categories): + raise ValueError("items in new_categories are not the same as in " + "old categories") + return self.set_categories(new_categories, ordered=ordered, + inplace=inplace) + + def add_categories(self, new_categories, inplace=False): + """ Add new categories. + + `new_categories` will be included at the last/highest place in the + categories and will be unused directly after this call. + + Raises + ------ + ValueError + If the new categories include old categories or do not validate as + categories + + Parameters + ---------- + new_categories : category or list-like of category + The new categories to be included. + inplace : boolean (default: False) + Whether or not to add the categories inplace or return a copy of + this categorical with added categories. + + Returns + ------- + cat : Categorical with new categories added or None if inplace. + + See also + -------- + rename_categories + reorder_categories + remove_categories + remove_unused_categories + set_categories + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + if not is_list_like(new_categories): + new_categories = [new_categories] + already_included = set(new_categories) & set(self.dtype.categories) + if len(already_included) != 0: + msg = ("new categories must not include old categories: " + "{already_included!s}") + raise ValueError(msg.format(already_included=already_included)) + new_categories = list(self.dtype.categories) + list(new_categories) + new_dtype = CategoricalDtype(new_categories, self.ordered) + + cat = self if inplace else self.copy() + cat._dtype = new_dtype + cat._codes = coerce_indexer_dtype(cat._codes, new_dtype.categories) + if not inplace: + return cat + + def remove_categories(self, removals, inplace=False): + """ Removes the specified categories. + + `removals` must be included in the old categories. Values which were in + the removed categories will be set to NaN + + Raises + ------ + ValueError + If the removals are not contained in the categories + + Parameters + ---------- + removals : category or list of categories + The categories which should be removed. + inplace : boolean (default: False) + Whether or not to remove the categories inplace or return a copy of + this categorical with removed categories. + + Returns + ------- + cat : Categorical with removed categories or None if inplace. + + See also + -------- + rename_categories + reorder_categories + add_categories + remove_unused_categories + set_categories + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + if not is_list_like(removals): + removals = [removals] + + removal_set = set(list(removals)) + not_included = removal_set - set(self.dtype.categories) + new_categories = [c for c in self.dtype.categories + if c not in removal_set] + + # GH 10156 + if any(isna(removals)): + not_included = [x for x in not_included if notna(x)] + new_categories = [x for x in new_categories if notna(x)] + + if len(not_included) != 0: + msg = "removals must all be in old categories: {not_included!s}" + raise ValueError(msg.format(not_included=not_included)) + + return self.set_categories(new_categories, ordered=self.ordered, + rename=False, inplace=inplace) + + def remove_unused_categories(self, inplace=False): + """ Removes categories which are not used. + + Parameters + ---------- + inplace : boolean (default: False) + Whether or not to drop unused categories inplace or return a copy of + this categorical with unused categories dropped. + + Returns + ------- + cat : Categorical with unused categories dropped or None if inplace. + + See also + -------- + rename_categories + reorder_categories + add_categories + remove_categories + set_categories + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + cat = self if inplace else self.copy() + idx, inv = np.unique(cat._codes, return_inverse=True) + + if idx.size != 0 and idx[0] == -1: # na sentinel + idx, inv = idx[1:], inv - 1 + + new_categories = cat.dtype.categories.take(idx) + new_dtype = CategoricalDtype._from_fastpath(new_categories, + ordered=self.ordered) + cat._dtype = new_dtype + cat._codes = coerce_indexer_dtype(inv, new_dtype.categories) + + if not inplace: + return cat + + def map(self, mapper): + """Apply mapper function to its categories (not codes). + + Parameters + ---------- + mapper : callable + Function to be applied. When all categories are mapped + to different categories, the result will be Categorical which has + the same order property as the original. Otherwise, the result will + be np.ndarray. + + Returns + ------- + applied : Categorical or Index. + + """ + new_categories = self.categories.map(mapper) + try: + return self.from_codes(self._codes.copy(), + categories=new_categories, + ordered=self.ordered) + except ValueError: + return np.take(new_categories, self._codes) + + __eq__ = _cat_compare_op('__eq__') + __ne__ = _cat_compare_op('__ne__') + __lt__ = _cat_compare_op('__lt__') + __gt__ = _cat_compare_op('__gt__') + __le__ = _cat_compare_op('__le__') + __ge__ = _cat_compare_op('__ge__') + + # for Series/ndarray like compat + @property + def shape(self): + """ Shape of the Categorical. + + For internal compatibility with numpy arrays. + + Returns + ------- + shape : tuple + """ + + return tuple([len(self._codes)]) + + def shift(self, periods): + """ + Shift Categorical by desired number of periods. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative + + Returns + ------- + shifted : Categorical + """ + # since categoricals always have ndim == 1, an axis parameter + # doesn't make any sense here. + codes = self.codes + if codes.ndim > 1: + raise NotImplementedError("Categorical with ndim > 1.") + if np.prod(codes.shape) and (periods != 0): + codes = np.roll(codes, _ensure_platform_int(periods), axis=0) + if periods > 0: + codes[:periods] = -1 + else: + codes[periods:] = -1 + + return self.from_codes(codes, categories=self.categories, + ordered=self.ordered) + + def __array__(self, dtype=None): + """ + The numpy array interface. + + Returns + ------- + values : numpy array + A numpy array of either the specified dtype or, + if dtype==None (default), the same dtype as + categorical.categories.dtype + """ + ret = take_1d(self.categories.values, self._codes) + if dtype and not is_dtype_equal(dtype, self.categories.dtype): + return np.asarray(ret, dtype) + return ret + + def __setstate__(self, state): + """Necessary for making this object picklable""" + if not isinstance(state, dict): + raise Exception('invalid pickle state') + + # Provide compatibility with pre-0.15.0 Categoricals. + if '_categories' not in state and '_levels' in state: + state['_categories'] = self.dtype._validate_categories(state.pop( + '_levels')) + if '_codes' not in state and 'labels' in state: + state['_codes'] = coerce_indexer_dtype( + state.pop('labels'), state['_categories']) + + # 0.16.0 ordered change + if '_ordered' not in state: + + # >=15.0 < 0.16.0 + if 'ordered' in state: + state['_ordered'] = state.pop('ordered') + else: + state['_ordered'] = False + + # 0.21.0 CategoricalDtype change + if '_dtype' not in state: + state['_dtype'] = CategoricalDtype(state['_categories'], + state['_ordered']) + + for k, v in compat.iteritems(state): + setattr(self, k, v) + + @property + def T(self): + return self + + @property + def nbytes(self): + return self._codes.nbytes + self.dtype.categories.values.nbytes + + def memory_usage(self, deep=False): + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self._codes.nbytes + self.dtype.categories.memory_usage( + deep=deep) + + @Substitution(klass='Categorical') + @Appender(_shared_docs['searchsorted']) + @deprecate_kwarg(old_arg_name='v', new_arg_name='value') + def searchsorted(self, value, side='left', sorter=None): + if not self.ordered: + raise ValueError("Categorical not ordered\nyou can use " + ".as_ordered() to change the Categorical to an " + "ordered one") + + from pandas.core.series import Series + + values_as_codes = _get_codes_for_values(Series(value).values, + self.categories) + + if -1 in values_as_codes: + raise ValueError("Value(s) to be inserted must be in categories.") + + return self.codes.searchsorted(values_as_codes, side=side, + sorter=sorter) + + def isna(self): + """ + Detect missing values + + Both missing values (-1 in .codes) and NA as a category are detected. + + Returns + ------- + a boolean array of whether my values are null + + See also + -------- + isna : top-level isna + isnull : alias of isna + Categorical.notna : boolean inverse of Categorical.isna + + """ + + ret = self._codes == -1 + + # String/object and float categories can hold np.nan + if self.categories.dtype.kind in ['S', 'O', 'f']: + if np.nan in self.categories: + nan_pos = np.where(isna(self.categories))[0] + # we only have one NA in categories + ret = np.logical_or(ret, self._codes == nan_pos) + return ret + isnull = isna + + def notna(self): + """ + Inverse of isna + + Both missing values (-1 in .codes) and NA as a category are detected as + null. + + Returns + ------- + a boolean array of whether my values are not null + + See also + -------- + notna : top-level notna + notnull : alias of notna + Categorical.isna : boolean inverse of Categorical.notna + + """ + return ~self.isna() + notnull = notna + + def put(self, *args, **kwargs): + """ + Replace specific elements in the Categorical with given values. + """ + raise NotImplementedError(("'put' is not yet implemented " + "for Categorical")) + + def dropna(self): + """ + Return the Categorical without null values. + + Both missing values (-1 in .codes) and NA as a category are detected. + NA is removed from the categories if present. + + Returns + ------- + valid : Categorical + """ + result = self[self.notna()] + if isna(result.categories).any(): + result = result.remove_categories([np.nan]) + return result + + def value_counts(self, dropna=True): + """ + Returns a Series containing counts of each category. + + Every category will have an entry, even those with a count of 0. + + Parameters + ---------- + dropna : boolean, default True + Don't include counts of NaN, even if NaN is a category. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + + """ + from numpy import bincount + from pandas import isna, Series, CategoricalIndex + + obj = (self.remove_categories([np.nan]) if dropna and + isna(self.categories).any() else self) + code, cat = obj._codes, obj.categories + ncat, mask = len(cat), 0 <= code + ix, clean = np.arange(ncat), mask.all() + + if dropna or clean: + obs = code if clean else code[mask] + count = bincount(obs, minlength=ncat or None) + else: + count = bincount(np.where(mask, code, ncat)) + ix = np.append(ix, -1) + + ix = self._constructor(ix, dtype=self.dtype, + fastpath=True) + + return Series(count, index=CategoricalIndex(ix), dtype='int64') + + def get_values(self): + """ Return the values. + + For internal compatibility with pandas formatting. + + Returns + ------- + values : numpy array + A numpy array of the same dtype as categorical.categories.dtype or + Index if datetime / periods + """ + # if we are a datetime and period index, return Index to keep metadata + if is_datetimelike(self.categories): + return self.categories.take(self._codes, fill_value=np.nan) + return np.array(self) + + def check_for_ordered(self, op): + """ assert that we are ordered """ + if not self.ordered: + raise TypeError("Categorical is not ordered for operation {op}\n" + "you can use .as_ordered() to change the " + "Categorical to an ordered one\n".format(op=op)) + + def argsort(self, ascending=True, kind='quicksort', *args, **kwargs): + """ + Returns the indices that would sort the Categorical instance if + 'sort_values' was called. This function is implemented to provide + compatibility with numpy ndarray objects. + + While an ordering is applied to the category values, arg-sorting + in this context refers more to organizing and grouping together + based on matching category values. Thus, this function can be + called on an unordered Categorical instance unlike the functions + 'Categorical.min' and 'Categorical.max'. + + Returns + ------- + argsorted : numpy array + + See also + -------- + numpy.ndarray.argsort + """ + ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) + result = np.argsort(self._codes.copy(), kind=kind, **kwargs) + if not ascending: + result = result[::-1] + return result + + def sort_values(self, inplace=False, ascending=True, na_position='last'): + """ Sorts the Categorical by category value returning a new + Categorical by default. + + While an ordering is applied to the category values, sorting in this + context refers more to organizing and grouping together based on + matching category values. Thus, this function can be called on an + unordered Categorical instance unlike the functions 'Categorical.min' + and 'Categorical.max'. + + Parameters + ---------- + inplace : boolean, default False + Do operation in place. + ascending : boolean, default True + Order ascending. Passing False orders descending. The + ordering parameter provides the method by which the + category values are organized. + na_position : {'first', 'last'} (optional, default='last') + 'first' puts NaNs at the beginning + 'last' puts NaNs at the end + + Returns + ------- + y : Categorical or None + + See Also + -------- + Categorical.sort + Series.sort_values + + Examples + -------- + >>> c = pd.Categorical([1, 2, 2, 1, 5]) + >>> c + [1, 2, 2, 1, 5] + Categories (3, int64): [1, 2, 5] + >>> c.sort_values() + [1, 1, 2, 2, 5] + Categories (3, int64): [1, 2, 5] + >>> c.sort_values(ascending=False) + [5, 2, 2, 1, 1] + Categories (3, int64): [1, 2, 5] + + Inplace sorting can be done as well: + + >>> c.sort_values(inplace=True) + >>> c + [1, 1, 2, 2, 5] + Categories (3, int64): [1, 2, 5] + >>> + >>> c = pd.Categorical([1, 2, 2, 1, 5]) + + 'sort_values' behaviour with NaNs. Note that 'na_position' + is independent of the 'ascending' parameter: + + >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5]) + >>> c + [NaN, 2.0, 2.0, NaN, 5.0] + Categories (2, int64): [2, 5] + >>> c.sort_values() + [2.0, 2.0, 5.0, NaN, NaN] + Categories (2, int64): [2, 5] + >>> c.sort_values(ascending=False) + [5.0, 2.0, 2.0, NaN, NaN] + Categories (2, int64): [2, 5] + >>> c.sort_values(na_position='first') + [NaN, NaN, 2.0, 2.0, 5.0] + Categories (2, int64): [2, 5] + >>> c.sort_values(ascending=False, na_position='first') + [NaN, NaN, 5.0, 2.0, 2.0] + Categories (2, int64): [2, 5] + """ + inplace = validate_bool_kwarg(inplace, 'inplace') + if na_position not in ['last', 'first']: + msg = 'invalid na_position: {na_position!r}' + raise ValueError(msg.format(na_position=na_position)) + + codes = np.sort(self._codes) + if not ascending: + codes = codes[::-1] + + # NaN handling + na_mask = (codes == -1) + if na_mask.any(): + n_nans = len(codes[na_mask]) + if na_position == "first": + # in this case sort to the front + new_codes = codes.copy() + new_codes[0:n_nans] = -1 + new_codes[n_nans:] = codes[~na_mask] + codes = new_codes + elif na_position == "last": + # ... and to the end + new_codes = codes.copy() + pos = len(codes) - n_nans + new_codes[0:pos] = codes[~na_mask] + new_codes[pos:] = -1 + codes = new_codes + if inplace: + self._codes = codes + return + else: + return self._constructor(values=codes, categories=self.categories, + ordered=self.ordered, fastpath=True) + + def _values_for_rank(self): + """ + For correctly ranking ordered categorical data. See GH#15420 + + Ordered categorical data should be ranked on the basis of + codes with -1 translated to NaN. + + Returns + ------- + numpy array + + """ + from pandas import Series + if self.ordered: + values = self.codes + mask = values == -1 + if mask.any(): + values = values.astype('float64') + values[mask] = np.nan + elif self.categories.is_numeric(): + values = np.array(self) + else: + # reorder the categories (so rank can use the float codes) + # instead of passing an object array to rank + values = np.array( + self.rename_categories(Series(self.categories).rank().values) + ) + return values + + def ravel(self, order='C'): + """ Return a flattened (numpy) array. + + For internal compatibility with numpy arrays. + + Returns + ------- + raveled : numpy array + """ + return np.array(self) + + def view(self): + """Return a view of myself. + + For internal compatibility with numpy arrays. + + Returns + ------- + view : Categorical + Returns `self`! + """ + return self + + def to_dense(self): + """Return my 'dense' representation + + For internal compatibility with numpy arrays. + + Returns + ------- + dense : array + """ + return np.asarray(self) + + @deprecate_kwarg(old_arg_name='fill_value', new_arg_name='value') + def fillna(self, value=None, method=None, limit=None): + """ Fill NA/NaN values using the specified method. + + Parameters + ---------- + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + value : scalar, dict, Series + If a scalar value is passed it is used to fill all missing values. + Alternatively, a Series or dict can be used to fill in different + values for each index. The value should not be a list. The + value(s) passed should either be in the categories or should be + NaN. + limit : int, default None + (Not implemented yet for Categorical!) + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + filled : Categorical with NA/NaN filled + """ + + if value is None: + value = np.nan + if limit is not None: + raise NotImplementedError("specifying a limit for fillna has not " + "been implemented yet") + + values = self._codes + + # Make sure that we also get NA in categories + if self.categories.dtype.kind in ['S', 'O', 'f']: + if np.nan in self.categories: + values = values.copy() + nan_pos = np.where(isna(self.categories))[0] + # we only have one NA in categories + values[values == nan_pos] = -1 + + # pad / bfill + if method is not None: + + values = self.to_dense().reshape(-1, len(self)) + values = interpolate_2d(values, method, 0, None, + value).astype(self.categories.dtype)[0] + values = _get_codes_for_values(values, self.categories) + + else: + + # If value is a dict or a Series (a dict value has already + # been converted to a Series) + if isinstance(value, ABCSeries): + if not value[~value.isin(self.categories)].isna().all(): + raise ValueError("fill value must be in categories") + + values_codes = _get_codes_for_values(value, self.categories) + indexer = np.where(values_codes != -1) + values[indexer] = values_codes[values_codes != -1] + + # If value is not a dict or Series it should be a scalar + elif is_scalar(value): + if not isna(value) and value not in self.categories: + raise ValueError("fill value must be in categories") + + mask = values == -1 + if mask.any(): + values = values.copy() + if isna(value): + values[mask] = -1 + else: + values[mask] = self.categories.get_loc(value) + + else: + raise TypeError('"value" parameter must be a scalar, dict ' + 'or Series, but you passed a ' + '"{0}"'.format(type(value).__name__)) + + return self._constructor(values, categories=self.categories, + ordered=self.ordered, fastpath=True) + + def take_nd(self, indexer, allow_fill=True, fill_value=None): + """ Take the codes by the indexer, fill with the fill_value. + + For internal compatibility with numpy arrays. + """ + + # filling must always be None/nan here + # but is passed thru internally + assert isna(fill_value) + + codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1) + result = self._constructor(codes, categories=self.categories, + ordered=self.ordered, fastpath=True) + return result + + take = take_nd + + def _slice(self, slicer): + """ Return a slice of myself. + + For internal compatibility with numpy arrays. + """ + + # only allow 1 dimensional slicing, but can + # in a 2-d case be passd (slice(None),....) + if isinstance(slicer, tuple) and len(slicer) == 2: + if not com.is_null_slice(slicer[0]): + raise AssertionError("invalid slicing for a 1-ndim " + "categorical") + slicer = slicer[1] + + _codes = self._codes[slicer] + return self._constructor(values=_codes, categories=self.categories, + ordered=self.ordered, fastpath=True) + + def __len__(self): + """The length of this Categorical.""" + return len(self._codes) + + def __iter__(self): + """Returns an Iterator over the values of this Categorical.""" + return iter(self.get_values()) + + def _tidy_repr(self, max_vals=10, footer=True): + """ a short repr displaying only max_vals and an optional (but default + footer) + """ + num = max_vals // 2 + head = self[:num]._get_repr(length=False, footer=False) + tail = self[-(max_vals - num):]._get_repr(length=False, footer=False) + + result = u('{head}, ..., {tail}').format(head=head[:-1], tail=tail[1:]) + if footer: + result = u('{result}\n{footer}').format(result=result, + footer=self._repr_footer()) + + return compat.text_type(result) + + def _repr_categories(self): + """ return the base repr for the categories """ + max_categories = (10 if get_option("display.max_categories") == 0 else + get_option("display.max_categories")) + from pandas.io.formats import format as fmt + if len(self.categories) > max_categories: + num = max_categories // 2 + head = fmt.format_array(self.categories[:num], None) + tail = fmt.format_array(self.categories[-num:], None) + category_strs = head + ["..."] + tail + else: + category_strs = fmt.format_array(self.categories, None) + + # Strip all leading spaces, which format_array adds for columns... + category_strs = [x.strip() for x in category_strs] + return category_strs + + def _repr_categories_info(self): + """ Returns a string representation of the footer.""" + + category_strs = self._repr_categories() + dtype = getattr(self.categories, 'dtype_str', + str(self.categories.dtype)) + + levheader = "Categories ({length}, {dtype}): ".format( + length=len(self.categories), dtype=dtype) + width, height = get_terminal_size() + max_width = get_option("display.width") or width + if com.in_ipython_frontend(): + # 0 = no breaks + max_width = 0 + levstring = "" + start = True + cur_col_len = len(levheader) # header + sep_len, sep = (3, " < ") if self.ordered else (2, ", ") + linesep = sep.rstrip() + "\n" # remove whitespace + for val in category_strs: + if max_width != 0 and cur_col_len + sep_len + len(val) > max_width: + levstring += linesep + (" " * (len(levheader) + 1)) + cur_col_len = len(levheader) + 1 # header + a whitespace + elif not start: + levstring += sep + cur_col_len += len(val) + levstring += val + start = False + # replace to simple save space by + return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]" + + def _repr_footer(self): + + return u('Length: {length}\n{info}').format( + length=len(self), info=self._repr_categories_info()) + + def _get_repr(self, length=True, na_rep='NaN', footer=True): + from pandas.io.formats import format as fmt + formatter = fmt.CategoricalFormatter(self, length=length, + na_rep=na_rep, footer=footer) + result = formatter.to_string() + return compat.text_type(result) + + def __unicode__(self): + """ Unicode representation. """ + _maxlen = 10 + if len(self._codes) > _maxlen: + result = self._tidy_repr(_maxlen) + elif len(self._codes) > 0: + result = self._get_repr(length=len(self) > _maxlen) + else: + msg = self._get_repr(length=False, footer=True).replace("\n", ", ") + result = ('[], {repr_msg}'.format(repr_msg=msg)) + + return result + + def _maybe_coerce_indexer(self, indexer): + """ return an indexer coerced to the codes dtype """ + if isinstance(indexer, np.ndarray) and indexer.dtype.kind == 'i': + indexer = indexer.astype(self._codes.dtype) + return indexer + + def __getitem__(self, key): + """ Return an item. """ + if isinstance(key, (int, np.integer)): + i = self._codes[key] + if i == -1: + return np.nan + else: + return self.categories[i] + else: + return self._constructor(values=self._codes[key], + categories=self.categories, + ordered=self.ordered, fastpath=True) + + def __setitem__(self, key, value): + """ Item assignment. + + + Raises + ------ + ValueError + If (one or more) Value is not in categories or if a assigned + `Categorical` does not have the same categories + """ + + # require identical categories set + if isinstance(value, Categorical): + if not value.categories.equals(self.categories): + raise ValueError("Cannot set a Categorical with another, " + "without identical categories") + + rvalue = value if is_list_like(value) else [value] + + from pandas import Index + to_add = Index(rvalue).difference(self.categories) + + # no assignments of values not in categories, but it's always ok to set + # something to np.nan + if len(to_add) and not isna(to_add).all(): + raise ValueError("Cannot setitem on a Categorical with a new " + "category, set the categories first") + + # set by position + if isinstance(key, (int, np.integer)): + pass + + # tuple of indexers (dataframe) + elif isinstance(key, tuple): + # only allow 1 dimensional slicing, but can + # in a 2-d case be passd (slice(None),....) + if len(key) == 2: + if not com.is_null_slice(key[0]): + raise AssertionError("invalid slicing for a 1-ndim " + "categorical") + key = key[1] + elif len(key) == 1: + key = key[0] + else: + raise AssertionError("invalid slicing for a 1-ndim " + "categorical") + + # slicing in Series or Categorical + elif isinstance(key, slice): + pass + + # Array of True/False in Series or Categorical + else: + # There is a bug in numpy, which does not accept a Series as a + # indexer + # https://github.com/pandas-dev/pandas/issues/6168 + # https://github.com/numpy/numpy/issues/4240 -> fixed in numpy 1.9 + # FIXME: remove when numpy 1.9 is the lowest numpy version pandas + # accepts... + key = np.asarray(key) + + lindexer = self.categories.get_indexer(rvalue) + + # FIXME: the following can be removed after GH7820 is fixed: + # https://github.com/pandas-dev/pandas/issues/7820 + # float categories do currently return -1 for np.nan, even if np.nan is + # included in the index -> "repair" this here + if isna(rvalue).any() and isna(self.categories).any(): + nan_pos = np.where(isna(self.categories))[0] + lindexer[lindexer == -1] = nan_pos + + lindexer = self._maybe_coerce_indexer(lindexer) + self._codes[key] = lindexer + + def _reverse_indexer(self): + """ + Compute the inverse of a categorical, returning + a dict of categories -> indexers. + + *This is an internal function* + + Returns + ------- + dict of categories -> indexers + + Example + ------- + In [1]: c = pd.Categorical(list('aabca')) + + In [2]: c + Out[2]: + [a, a, b, c, a] + Categories (3, object): [a, b, c] + + In [3]: c.categories + Out[3]: Index([u'a', u'b', u'c'], dtype='object') + + In [4]: c.codes + Out[4]: array([0, 0, 1, 2, 0], dtype=int8) + + In [5]: c._reverse_indexer() + Out[5]: {'a': array([0, 1, 4]), 'b': array([2]), 'c': array([3])} + + """ + categories = self.categories + r, counts = libalgos.groupsort_indexer(self.codes.astype('int64'), + categories.size) + counts = counts.cumsum() + result = [r[counts[indexer]:counts[indexer + 1]] + for indexer in range(len(counts) - 1)] + result = dict(zip(categories, result)) + return result + + # reduction ops # + def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, + filter_type=None, **kwds): + """ perform the reduction type operation """ + func = getattr(self, name, None) + if func is None: + msg = 'Categorical cannot perform the operation {op}' + raise TypeError(msg.format(op=name)) + return func(numeric_only=numeric_only, **kwds) + + def min(self, numeric_only=None, **kwargs): + """ The minimum value of the object. + + Only ordered `Categoricals` have a minimum! + + Raises + ------ + TypeError + If the `Categorical` is not `ordered`. + + Returns + ------- + min : the minimum of this `Categorical` + """ + self.check_for_ordered('min') + if numeric_only: + good = self._codes != -1 + pointer = self._codes[good].min(**kwargs) + else: + pointer = self._codes.min(**kwargs) + if pointer == -1: + return np.nan + else: + return self.categories[pointer] + + def max(self, numeric_only=None, **kwargs): + """ The maximum value of the object. + + Only ordered `Categoricals` have a maximum! + + Raises + ------ + TypeError + If the `Categorical` is not `ordered`. + + Returns + ------- + max : the maximum of this `Categorical` + """ + self.check_for_ordered('max') + if numeric_only: + good = self._codes != -1 + pointer = self._codes[good].max(**kwargs) + else: + pointer = self._codes.max(**kwargs) + if pointer == -1: + return np.nan + else: + return self.categories[pointer] + + def mode(self): + """ + Returns the mode(s) of the Categorical. + + Always returns `Categorical` even if only one value. + + Returns + ------- + modes : `Categorical` (sorted) + """ + + import pandas._libs.hashtable as htable + good = self._codes != -1 + values = sorted(htable.mode_int64(_ensure_int64(self._codes[good]))) + result = self._constructor(values=values, categories=self.categories, + ordered=self.ordered, fastpath=True) + return result + + def unique(self): + """ + Return the ``Categorical`` which ``categories`` and ``codes`` are + unique. Unused categories are NOT returned. + + - unordered category: values and categories are sorted by appearance + order. + - ordered category: values are sorted by appearance order, categories + keeps existing order. + + Returns + ------- + unique values : ``Categorical`` + + Examples + -------- + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.Categorical(list('baabc')) + [b, a, c] + Categories (3, object): [b, a, c] + + >>> pd.Categorical(list('baabc'), categories=list('abc')) + [b, a, c] + Categories (3, object): [b, a, c] + + An ordered Categorical preserves the category ordering. + + >>> pd.Categorical(list('baabc'), + ... categories=list('abc'), + ... ordered=True) + [b, a, c] + Categories (3, object): [a < b < c] + + See Also + -------- + unique + CategoricalIndex.unique + Series.unique + + """ + + # unlike np.unique, unique1d does not sort + unique_codes = unique1d(self.codes) + cat = self.copy() + + # keep nan in codes + cat._codes = unique_codes + + # exclude nan from indexer for categories + take_codes = unique_codes[unique_codes != -1] + if self.ordered: + take_codes = sorted(take_codes) + return cat.set_categories(cat.categories.take(take_codes)) + + def equals(self, other): + """ + Returns True if categorical arrays are equal. + + Parameters + ---------- + other : `Categorical` + + Returns + ------- + are_equal : boolean + """ + if self.is_dtype_equal(other): + if self.categories.equals(other.categories): + # fastpath to avoid re-coding + other_codes = other._codes + else: + other_codes = _recode_for_categories(other.codes, + other.categories, + self.categories) + return np.array_equal(self._codes, other_codes) + return False + + def is_dtype_equal(self, other): + """ + Returns True if categoricals are the same dtype + same categories, and same ordered + + Parameters + ---------- + other : Categorical + + Returns + ------- + are_equal : boolean + """ + + try: + return hash(self.dtype) == hash(other.dtype) + except (AttributeError, TypeError): + return False + + def describe(self): + """ Describes this Categorical + + Returns + ------- + description: `DataFrame` + A dataframe with frequency and counts by category. + """ + counts = self.value_counts(dropna=False) + freqs = counts / float(counts.sum()) + + from pandas.core.reshape.concat import concat + result = concat([counts, freqs], axis=1) + result.columns = ['counts', 'freqs'] + result.index.name = 'categories' + + return result + + def repeat(self, repeats, *args, **kwargs): + """ + Repeat elements of a Categorical. + + See also + -------- + numpy.ndarray.repeat + + """ + nv.validate_repeat(args, kwargs) + codes = self._codes.repeat(repeats) + return self._constructor(values=codes, categories=self.categories, + ordered=self.ordered, fastpath=True) + +# The Series.cat accessor + + +class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): + """ + Accessor object for categorical properties of the Series values. + + Be aware that assigning to `categories` is a inplace operation, while all + methods return new categorical data per default (but can be called with + `inplace=True`). + + Parameters + ---------- + data : Series or CategoricalIndex + + Examples + -------- + >>> s.cat.categories + >>> s.cat.categories = list('abc') + >>> s.cat.rename_categories(list('cab')) + >>> s.cat.reorder_categories(list('cab')) + >>> s.cat.add_categories(['d','e']) + >>> s.cat.remove_categories(['d']) + >>> s.cat.remove_unused_categories() + >>> s.cat.set_categories(list('abcde')) + >>> s.cat.as_ordered() + >>> s.cat.as_unordered() + + """ + + def __init__(self, data): + self._validate(data) + self.categorical = data.values + self.index = data.index + self.name = data.name + self._freeze() + + @staticmethod + def _validate(data): + if not is_categorical_dtype(data.dtype): + raise AttributeError("Can only use .cat accessor with a " + "'category' dtype") + + def _delegate_property_get(self, name): + return getattr(self.categorical, name) + + def _delegate_property_set(self, name, new_values): + return setattr(self.categorical, name, new_values) + + @property + def codes(self): + from pandas import Series + return Series(self.categorical.codes, index=self.index) + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + method = getattr(self.categorical, name) + res = method(*args, **kwargs) + if res is not None: + return Series(res, index=self.index, name=self.name) + + +CategoricalAccessor._add_delegate_accessors(delegate=Categorical, + accessors=["categories", + "ordered"], + typ='property') +CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=[ + "rename_categories", "reorder_categories", "add_categories", + "remove_categories", "remove_unused_categories", "set_categories", + "as_ordered", "as_unordered"], typ='method') + +# utility routines + + +def _get_codes_for_values(values, categories): + """ + utility routine to turn values into codes given the specified categories + """ + + from pandas.core.algorithms import _get_data_algo, _hashtables + if not is_dtype_equal(values.dtype, categories.dtype): + values = _ensure_object(values) + categories = _ensure_object(categories) + + (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables) + (_, _), cats = _get_data_algo(categories, _hashtables) + t = hash_klass(len(cats)) + t.map_locations(cats) + return coerce_indexer_dtype(t.lookup(vals), cats) + + +def _recode_for_categories(codes, old_categories, new_categories): + """ + Convert a set of codes for to a new set of categories + + Parameters + ---------- + codes : array + old_categories, new_categories : Index + + Returns + ------- + new_codes : array + + Examples + -------- + >>> old_cat = pd.Index(['b', 'a', 'c']) + >>> new_cat = pd.Index(['a', 'b']) + >>> codes = np.array([0, 1, 1, 2]) + >>> _recode_for_categories(codes, old_cat, new_cat) + array([ 1, 0, 0, -1]) + """ + from pandas.core.algorithms import take_1d + + if len(old_categories) == 0: + # All null anyway, so just retain the nulls + return codes.copy() + indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories), + new_categories) + new_codes = take_1d(indexer, codes.copy(), fill_value=-1) + return new_codes + + +def _convert_to_list_like(list_like): + if hasattr(list_like, "dtype"): + return list_like + if isinstance(list_like, list): + return list_like + if (is_sequence(list_like) or isinstance(list_like, tuple) or + isinstance(list_like, types.GeneratorType)): + return list(list_like) + elif is_scalar(list_like): + return [list_like] + else: + # is this reached? + return [list_like] + + +def _factorize_from_iterable(values): + """ + Factorize an input `values` into `categories` and `codes`. Preserves + categorical dtype in `categories`. + + *This is an internal function* + + Parameters + ---------- + values : list-like + + Returns + ------- + codes : ndarray + categories : Index + If `values` has a categorical dtype, then `categories` is + a CategoricalIndex keeping the categories and order of `values`. + """ + from pandas.core.indexes.category import CategoricalIndex + + if not is_list_like(values): + raise TypeError("Input must be list-like") + + if is_categorical(values): + if isinstance(values, (ABCCategoricalIndex, ABCSeries)): + values = values._values + categories = CategoricalIndex(values.categories, + categories=values.categories, + ordered=values.ordered) + codes = values.codes + else: + cat = Categorical(values, ordered=True) + categories = cat.categories + codes = cat.codes + return codes, categories + + +def _factorize_from_iterables(iterables): + """ + A higher-level wrapper over `_factorize_from_iterable`. + + *This is an internal function* + + Parameters + ---------- + iterables : list-like of list-likes + + Returns + ------- + codes_list : list of ndarrays + categories_list : list of Indexes + + Notes + ----- + See `_factorize_from_iterable` for more info. + """ + if len(iterables) == 0: + # For consistency, it should return a list of 2 lists. + return [[], []] + return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables])) diff --git a/pandas/core/base.py b/pandas/core/base.py index e90794c6c2e1a..54d25a16a10a3 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -24,7 +24,6 @@ from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) -from pandas.core.common import AbstractMethodError, _maybe_box_datetimelike from pandas.core.accessor import DirNamesMixin @@ -46,7 +45,7 @@ class StringMixin(object): # Formatting def __unicode__(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def __str__(self): """ @@ -145,10 +144,14 @@ def _freeze(self): # prevent adding any attribute via s.xxx.new_attribute = ... def __setattr__(self, key, value): # _cache is used by a decorator - # dict lookup instead of getattr as getattr is false for getter - # which error - if getattr(self, "__frozen", False) and not \ - (key in type(self).__dict__ or key == "_cache"): + # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) + # because + # 1.) getattr is false for attributes that raise errors + # 2.) cls.__dict__ doesn't traverse into base classes + if (getattr(self, "__frozen", False) and not + (key == "_cache" or + key in type(self).__dict__ or + getattr(self, key, None) is not None)): raise AttributeError("You cannot add any new attribute '{key}'". format(key=key)) object.__setattr__(self, key, value) @@ -274,10 +277,10 @@ def _gotitem(self, key, ndim, subset=None): subset to act on """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def aggregate(self, func, *args, **kwargs): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) agg = aggregate @@ -811,7 +814,7 @@ def tolist(self): """ if is_datetimelike(self): - return [_maybe_box_datetimelike(x) for x in self._values] + return [com._maybe_box_datetimelike(x) for x in self._values] else: return self._values.tolist() @@ -1234,4 +1237,4 @@ def duplicated(self, keep='first'): # abstracts def _update_inplace(self, result, **kwargs): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d47cb0762447b..530a3ecb5f378 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1,2332 +1,8 @@ -# pylint: disable=E1101,W0232 +import warnings -import numpy as np -from warnings import warn -import types +# TODO: Remove after 0.23.x +warnings.warn("'pandas.core' is private. Use 'pandas.Categorical'", + FutureWarning, stacklevel=2) -from pandas import compat -from pandas.compat import u, lzip -from pandas._libs import lib, algos as libalgos - -from pandas.core.dtypes.generic import ( - ABCSeries, ABCIndexClass, ABCCategoricalIndex) -from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.cast import ( - maybe_infer_to_datetimelike, - coerce_indexer_dtype) -from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.common import ( - _ensure_int64, - _ensure_object, - _ensure_platform_int, - is_dtype_equal, - is_datetimelike, - is_datetime64_dtype, - is_timedelta64_dtype, - is_categorical, - is_categorical_dtype, - is_list_like, is_sequence, - is_scalar, - is_dict_like) -from pandas.core.common import is_null_slice, _maybe_box_datetimelike - -from pandas.core.algorithms import factorize, take_1d, unique1d -from pandas.core.accessor import PandasDelegate -from pandas.core.base import (PandasObject, - NoNewAttributesMixin, _shared_docs) -import pandas.core.common as com -from pandas.core.missing import interpolate_2d -from pandas.compat.numpy import function as nv -from pandas.util._decorators import ( - Appender, cache_readonly, deprecate_kwarg, Substitution) - -from pandas.io.formats.terminal import get_terminal_size -from pandas.util._validators import validate_bool_kwarg -from pandas.core.config import get_option - - -def _cat_compare_op(op): - def f(self, other): - # On python2, you can usually compare any type to any type, and - # Categoricals can be seen as a custom type, but having different - # results depending whether categories are the same or not is kind of - # insane, so be a bit stricter here and use the python3 idea of - # comparing only things of equal type. - if not self.ordered: - if op in ['__lt__', '__gt__', '__le__', '__ge__']: - raise TypeError("Unordered Categoricals can only compare " - "equality or not") - if isinstance(other, Categorical): - # Two Categoricals can only be be compared if the categories are - # the same (maybe up to ordering, depending on ordered) - - msg = ("Categoricals can only be compared if " - "'categories' are the same.") - if len(self.categories) != len(other.categories): - raise TypeError(msg + " Categories are different lengths") - elif (self.ordered and not (self.categories == - other.categories).all()): - raise TypeError(msg) - elif not set(self.categories) == set(other.categories): - raise TypeError(msg) - - if not (self.ordered == other.ordered): - raise TypeError("Categoricals can only be compared if " - "'ordered' is the same") - if not self.ordered and not self.categories.equals( - other.categories): - # both unordered and different order - other_codes = _get_codes_for_values(other, self.categories) - else: - other_codes = other._codes - - na_mask = (self._codes == -1) | (other_codes == -1) - f = getattr(self._codes, op) - ret = f(other_codes) - if na_mask.any(): - # In other series, the leads to False, so do that here too - ret[na_mask] = False - return ret - - # Numpy-1.9 and earlier may convert a scalar to a zerodim array during - # comparison operation when second arg has higher priority, e.g. - # - # cat[0] < cat - # - # With cat[0], for example, being ``np.int64(1)`` by the time it gets - # into this function would become ``np.array(1)``. - other = lib.item_from_zerodim(other) - if is_scalar(other): - if other in self.categories: - i = self.categories.get_loc(other) - return getattr(self._codes, op)(i) - else: - if op == '__eq__': - return np.repeat(False, len(self)) - elif op == '__ne__': - return np.repeat(True, len(self)) - else: - msg = ("Cannot compare a Categorical for op {op} with a " - "scalar, which is not a category.") - raise TypeError(msg.format(op=op)) - else: - - # allow categorical vs object dtype array comparisons for equality - # these are only positional comparisons - if op in ['__eq__', '__ne__']: - return getattr(np.array(self), op)(np.array(other)) - - msg = ("Cannot compare a Categorical for op {op} with type {typ}." - "\nIf you want to compare values, use 'np.asarray(cat) " - " other'.") - raise TypeError(msg.format(op=op, typ=type(other))) - - f.__name__ = op - - return f - - -def _maybe_to_categorical(array): - """ - Coerce to a categorical if a series is given. - - Internal use ONLY. - """ - if isinstance(array, (ABCSeries, ABCCategoricalIndex)): - return array._values - elif isinstance(array, np.ndarray): - return Categorical(array) - return array - - -_codes_doc = """The category codes of this categorical. - -Level codes are an array if integer which are the positions of the real -values in the categories array. - -There is not setter, use the other categorical methods and the normal item -setter to change values in the categorical. -""" - - -class Categorical(PandasObject): - """ - Represents a categorical variable in classic R / S-plus fashion - - `Categoricals` can only take on only a limited, and usually fixed, number - of possible values (`categories`). In contrast to statistical categorical - variables, a `Categorical` might have an order, but numerical operations - (additions, divisions, ...) are not possible. - - All values of the `Categorical` are either in `categories` or `np.nan`. - Assigning values outside of `categories` will raise a `ValueError`. Order - is defined by the order of the `categories`, not lexical order of the - values. - - Parameters - ---------- - values : list-like - The values of the categorical. If categories are given, values not in - categories will be replaced with NaN. - categories : Index-like (unique), optional - The unique categories for this categorical. If not given, the - categories are assumed to be the unique values of values. - ordered : boolean, (default False) - Whether or not this categorical is treated as a ordered categorical. - If not given, the resulting categorical will not be ordered. - dtype : CategoricalDtype - An instance of ``CategoricalDtype`` to use for this categorical - - .. versionadded:: 0.21.0 - - Attributes - ---------- - categories : Index - The categories of this categorical - codes : ndarray - The codes (integer positions, which point to the categories) of this - categorical, read only. - ordered : boolean - Whether or not this Categorical is ordered. - dtype : CategoricalDtype - The instance of ``CategoricalDtype`` storing the ``categories`` - and ``ordered``. - - .. versionadded:: 0.21.0 - - Methods - ------- - from_codes - __array__ - - Raises - ------ - ValueError - If the categories do not validate. - TypeError - If an explicit ``ordered=True`` is given but no `categories` and the - `values` are not sortable. - - Examples - -------- - >>> pd.Categorical([1, 2, 3, 1, 2, 3]) - [1, 2, 3, 1, 2, 3] - Categories (3, int64): [1, 2, 3] - - >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) - [a, b, c, a, b, c] - Categories (3, object): [a, b, c] - - Ordered `Categoricals` can be sorted according to the custom order - of the categories and can have a min and max value. - - >>> c = pd.Categorical(['a','b','c','a','b','c'], ordered=True, - ... categories=['c', 'b', 'a']) - >>> c - [a, b, c, a, b, c] - Categories (3, object): [c < b < a] - >>> c.min() - 'c' - - Notes - ----- - See the `user guide - `_ for more. - - See also - -------- - pandas.api.types.CategoricalDtype : Type for categorical data - CategoricalIndex : An Index with an underlying ``Categorical`` - """ - - # For comparisons, so that numpy uses our implementation if the compare - # ops, which raise - __array_priority__ = 1000 - _dtype = CategoricalDtype() - _deprecations = frozenset(['labels']) - _typ = 'categorical' - - def __init__(self, values, categories=None, ordered=None, dtype=None, - fastpath=False): - - # Ways of specifying the dtype (prioritized ordered) - # 1. dtype is a CategoricalDtype - # a.) with known categories, use dtype.categories - # b.) else with Categorical values, use values.dtype - # c.) else, infer from values - # d.) specifying dtype=CategoricalDtype and categories is an error - # 2. dtype is a string 'category' - # a.) use categories, ordered - # b.) use values.dtype - # c.) infer from values - # 3. dtype is None - # a.) use categories, ordered - # b.) use values.dtype - # c.) infer from values - - if dtype is not None: - # The dtype argument takes precedence over values.dtype (if any) - if isinstance(dtype, compat.string_types): - if dtype == 'category': - dtype = CategoricalDtype(categories, ordered) - else: - msg = "Unknown `dtype` {dtype}" - raise ValueError(msg.format(dtype=dtype)) - elif categories is not None or ordered is not None: - raise ValueError("Cannot specify both `dtype` and `categories`" - " or `ordered`.") - - categories = dtype.categories - ordered = dtype.ordered - - elif is_categorical(values): - # If no "dtype" was passed, use the one from "values", but honor - # the "ordered" and "categories" arguments - dtype = values.dtype._from_categorical_dtype(values.dtype, - categories, ordered) - else: - # If dtype=None and values is not categorical, create a new dtype - dtype = CategoricalDtype(categories, ordered) - - # At this point, dtype is always a CategoricalDtype - # if dtype.categories is None, we are inferring - - if fastpath: - self._codes = coerce_indexer_dtype(values, categories) - self._dtype = dtype - return - - # null_mask indicates missing values we want to exclude from inference. - # This means: only missing values in list-likes (not arrays/ndframes). - null_mask = np.array(False) - - # sanitize input - if is_categorical_dtype(values): - if dtype.categories is None: - dtype = CategoricalDtype(values.categories, dtype.ordered) - - elif not isinstance(values, (ABCIndexClass, ABCSeries)): - # _sanitize_array coerces np.nan to a string under certain versions - # of numpy - values = maybe_infer_to_datetimelike(values, convert_dates=True) - if not isinstance(values, np.ndarray): - values = _convert_to_list_like(values) - from pandas.core.series import _sanitize_array - # By convention, empty lists result in object dtype: - if len(values) == 0: - sanitize_dtype = 'object' - else: - sanitize_dtype = None - null_mask = isna(values) - if null_mask.any(): - values = [values[idx] for idx in np.where(~null_mask)[0]] - values = _sanitize_array(values, None, dtype=sanitize_dtype) - - if dtype.categories is None: - try: - codes, categories = factorize(values, sort=True) - except TypeError: - codes, categories = factorize(values, sort=False) - if dtype.ordered: - # raise, as we don't have a sortable data structure and so - # the user should give us one by specifying categories - raise TypeError("'values' is not ordered, please " - "explicitly specify the categories order " - "by passing in a categories argument.") - except ValueError: - - # FIXME - raise NotImplementedError("> 1 ndim Categorical are not " - "supported at this time") - - # we're inferring from values - dtype = CategoricalDtype(categories, dtype.ordered) - - elif is_categorical_dtype(values): - old_codes = (values.cat.codes if isinstance(values, ABCSeries) - else values.codes) - codes = _recode_for_categories(old_codes, values.dtype.categories, - dtype.categories) - - else: - codes = _get_codes_for_values(values, dtype.categories) - - if null_mask.any(): - # Reinsert -1 placeholders for previously removed missing values - full_codes = - np.ones(null_mask.shape, dtype=codes.dtype) - full_codes[~null_mask] = codes - codes = full_codes - - self._dtype = dtype - self._codes = coerce_indexer_dtype(codes, dtype.categories) - - @property - def categories(self): - """The categories of this categorical. - - Setting assigns new values to each category (effectively a rename of - each individual category). - - The assigned value has to be a list-like object. All items must be - unique and the number of items in the new categories must be the same - as the number of items in the old categories. - - Assigning to `categories` is a inplace operation! - - Raises - ------ - ValueError - If the new categories do not validate as categories or if the - number of new categories is unequal the number of old categories - - See also - -------- - rename_categories - reorder_categories - add_categories - remove_categories - remove_unused_categories - set_categories - """ - return self.dtype.categories - - @categories.setter - def categories(self, categories): - new_dtype = CategoricalDtype(categories, ordered=self.ordered) - if (self.dtype.categories is not None and - len(self.dtype.categories) != len(new_dtype.categories)): - raise ValueError("new categories need to have the same number of " - "items as the old categories!") - self._dtype = new_dtype - - @property - def ordered(self): - """Whether the categories have an ordered relationship""" - return self.dtype.ordered - - @property - def dtype(self): - """The :class:`~pandas.api.types.CategoricalDtype` for this instance""" - return self._dtype - - @property - def _constructor(self): - return Categorical - - def copy(self): - """ Copy constructor. """ - return self._constructor(values=self._codes.copy(), - categories=self.categories, - ordered=self.ordered, - fastpath=True) - - def astype(self, dtype, copy=True): - """ - Coerce this type to another dtype - - Parameters - ---------- - dtype : numpy dtype or pandas type - copy : bool, default True - By default, astype always returns a newly allocated object. - If copy is set to False and dtype is categorical, the original - object is returned. - - .. versionadded:: 0.19.0 - - """ - if is_categorical_dtype(dtype): - # GH 10696/18593 - dtype = self.dtype._update_dtype(dtype) - self = self.copy() if copy else self - if dtype == self.dtype: - return self - return self._set_dtype(dtype) - return np.array(self, dtype=dtype, copy=copy) - - @cache_readonly - def ndim(self): - """Number of dimensions of the Categorical """ - return self._codes.ndim - - @cache_readonly - def size(self): - """ return the len of myself """ - return len(self) - - @cache_readonly - def itemsize(self): - """ return the size of a single category """ - return self.categories.itemsize - - def tolist(self): - """ - Return a list of the values. - - These are each a scalar type, which is a Python scalar - (for str, int, float) or a pandas scalar - (for Timestamp/Timedelta/Interval/Period) - """ - if is_datetimelike(self.categories): - return [_maybe_box_datetimelike(x) for x in self] - return np.array(self).tolist() - - @property - def base(self): - """ compat, we are always our own object """ - return None - - @classmethod - def _from_inferred_categories(cls, inferred_categories, inferred_codes, - dtype): - """Construct a Categorical from inferred values - - For inferred categories (`dtype` is None) the categories are sorted. - For explicit `dtype`, the `inferred_categories` are cast to the - appropriate type. - - Parameters - ---------- - - inferred_categories : Index - inferred_codes : Index - dtype : CategoricalDtype or 'category' - - Returns - ------- - Categorical - """ - from pandas import Index, to_numeric, to_datetime, to_timedelta - - cats = Index(inferred_categories) - - known_categories = (isinstance(dtype, CategoricalDtype) and - dtype.categories is not None) - - if known_categories: - # Convert to a specialzed type with `dtype` if specified - if dtype.categories.is_numeric(): - cats = to_numeric(inferred_categories, errors='coerce') - elif is_datetime64_dtype(dtype.categories): - cats = to_datetime(inferred_categories, errors='coerce') - elif is_timedelta64_dtype(dtype.categories): - cats = to_timedelta(inferred_categories, errors='coerce') - - if known_categories: - # recode from observation oder to dtype.categories order - categories = dtype.categories - codes = _recode_for_categories(inferred_codes, cats, categories) - elif not cats.is_monotonic_increasing: - # sort categories and recode for unknown categories - unsorted = cats.copy() - categories = cats.sort_values() - codes = _recode_for_categories(inferred_codes, unsorted, - categories) - dtype = CategoricalDtype(categories, ordered=False) - else: - dtype = CategoricalDtype(cats, ordered=False) - codes = inferred_codes - - return cls(codes, dtype=dtype, fastpath=True) - - @classmethod - def from_codes(cls, codes, categories, ordered=False): - """ - Make a Categorical type from codes and categories arrays. - - This constructor is useful if you already have codes and categories and - so do not need the (computation intensive) factorization step, which is - usually done on the constructor. - - If your data does not follow this convention, please use the normal - constructor. - - Parameters - ---------- - codes : array-like, integers - An integer array, where each integer points to a category in - categories or -1 for NaN - categories : index-like - The categories for the categorical. Items need to be unique. - ordered : boolean, (default False) - Whether or not this categorical is treated as a ordered - categorical. If not given, the resulting categorical will be - unordered. - """ - try: - codes = np.asarray(codes, np.int64) - except: - raise ValueError( - "codes need to be convertible to an arrays of integers") - - categories = CategoricalDtype._validate_categories(categories) - - if len(codes) and (codes.max() >= len(categories) or codes.min() < -1): - raise ValueError("codes need to be between -1 and " - "len(categories)-1") - - return cls(codes, categories=categories, ordered=ordered, - fastpath=True) - - _codes = None - - def _get_codes(self): - """ Get the codes. - - Returns - ------- - codes : integer array view - A non writable view of the `codes` array. - """ - v = self._codes.view() - v.flags.writeable = False - return v - - def _set_codes(self, codes): - """ - Not settable by the user directly - """ - raise ValueError("cannot set Categorical codes directly") - - codes = property(fget=_get_codes, fset=_set_codes, doc=_codes_doc) - - def _get_labels(self): - """ - Get the category labels (deprecated). - - Deprecated, use .codes! - """ - warn("'labels' is deprecated. Use 'codes' instead", FutureWarning, - stacklevel=2) - return self.codes - - labels = property(fget=_get_labels, fset=_set_codes) - - def _set_categories(self, categories, fastpath=False): - """ Sets new categories inplace - - Parameters - ---------- - fastpath : boolean (default: False) - Don't perform validation of the categories for uniqueness or nulls - - Examples - -------- - >>> c = Categorical(['a', 'b']) - >>> c - [a, b] - Categories (2, object): [a, b] - - >>> c._set_categories(pd.Index(['a', 'c'])) - >>> c - [a, c] - Categories (2, object): [a, c] - """ - - if fastpath: - new_dtype = CategoricalDtype._from_fastpath(categories, - self.ordered) - else: - new_dtype = CategoricalDtype(categories, ordered=self.ordered) - if (not fastpath and self.dtype.categories is not None and - len(new_dtype.categories) != len(self.dtype.categories)): - raise ValueError("new categories need to have the same number of " - "items than the old categories!") - - self._dtype = new_dtype - - def _codes_for_groupby(self, sort): - """ - If sort=False, return a copy of self, coded with categories as - returned by .unique(), followed by any categories not appearing in - the data. If sort=True, return self. - - This method is needed solely to ensure the categorical index of the - GroupBy result has categories in the order of appearance in the data - (GH-8868). - - Parameters - ---------- - sort : boolean - The value of the sort parameter groupby was called with. - - Returns - ------- - Categorical - If sort=False, the new categories are set to the order of - appearance in codes (unless ordered=True, in which case the - original order is preserved), followed by any unrepresented - categories in the original order. - """ - - # Already sorted according to self.categories; all is fine - if sort: - return self - - # sort=False should order groups in as-encountered order (GH-8868) - cat = self.unique() - - # But for groupby to work, all categories should be present, - # including those missing from the data (GH-13179), which .unique() - # above dropped - cat.add_categories( - self.categories[~self.categories.isin(cat.categories)], - inplace=True) - - return self.reorder_categories(cat.categories) - - def _set_dtype(self, dtype): - """Internal method for directly updating the CategoricalDtype - - Parameters - ---------- - dtype : CategoricalDtype - - Notes - ----- - We don't do any validation here. It's assumed that the dtype is - a (valid) instance of `CategoricalDtype`. - """ - codes = _recode_for_categories(self.codes, self.categories, - dtype.categories) - return type(self)(codes, dtype=dtype, fastpath=True) - - def set_ordered(self, value, inplace=False): - """ - Sets the ordered attribute to the boolean value - - Parameters - ---------- - value : boolean to set whether this categorical is ordered (True) or - not (False) - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to the value - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - new_dtype = CategoricalDtype(self.categories, ordered=value) - cat = self if inplace else self.copy() - cat._dtype = new_dtype - if not inplace: - return cat - - def as_ordered(self, inplace=False): - """ - Sets the Categorical to be ordered - - Parameters - ---------- - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to True - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - return self.set_ordered(True, inplace=inplace) - - def as_unordered(self, inplace=False): - """ - Sets the Categorical to be unordered - - Parameters - ---------- - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to False - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - return self.set_ordered(False, inplace=inplace) - - def set_categories(self, new_categories, ordered=None, rename=False, - inplace=False): - """ Sets the categories to the specified new_categories. - - `new_categories` can include new categories (which will result in - unused categories) or remove old categories (which results in values - set to NaN). If `rename==True`, the categories will simple be renamed - (less or more items than in old categories will result in values set to - NaN or in unused categories respectively). - - This method can be used to perform more than one action of adding, - removing, and reordering simultaneously and is therefore faster than - performing the individual steps via the more specialised methods. - - On the other hand this methods does not do checks (e.g., whether the - old categories are included in the new categories on a reorder), which - can result in surprising changes, for example when using special string - dtypes on python3, which does not considers a S1 string equal to a - single char python string. - - Raises - ------ - ValueError - If new_categories does not validate as categories - - Parameters - ---------- - new_categories : Index-like - The categories in new order. - ordered : boolean, (default: False) - Whether or not the categorical is treated as a ordered categorical. - If not given, do not change the ordered information. - rename : boolean (default: False) - Whether or not the new_categories should be considered as a rename - of the old categories or as reordered categories. - inplace : boolean (default: False) - Whether or not to reorder the categories inplace or return a copy of - this categorical with reordered categories. - - Returns - ------- - cat : Categorical with reordered categories or None if inplace. - - See also - -------- - rename_categories - reorder_categories - add_categories - remove_categories - remove_unused_categories - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if ordered is None: - ordered = self.dtype.ordered - new_dtype = CategoricalDtype(new_categories, ordered=ordered) - - cat = self if inplace else self.copy() - if rename: - if (cat.dtype.categories is not None and - len(new_dtype.categories) < len(cat.dtype.categories)): - # remove all _codes which are larger and set to -1/NaN - self._codes[self._codes >= len(new_dtype.categories)] = -1 - else: - codes = _recode_for_categories(self.codes, self.categories, - new_dtype.categories) - cat._codes = codes - cat._dtype = new_dtype - - if not inplace: - return cat - - def rename_categories(self, new_categories, inplace=False): - """ Renames categories. - - Raises - ------ - ValueError - If new categories are list-like and do not have the same number of - items than the current categories or do not validate as categories - - Parameters - ---------- - new_categories : list-like, dict-like or callable - - * list-like: all items must be unique and the number of items in - the new categories must match the existing number of categories. - - * dict-like: specifies a mapping from - old categories to new. Categories not contained in the mapping - are passed through and extra categories in the mapping are - ignored. - - .. versionadded:: 0.21.0 - - * callable : a callable that is called on all items in the old - categories and whose return values comprise the new categories. - - .. versionadded:: 0.23.0 - - .. warning:: - - Currently, Series are considered list like. In a future version - of pandas they'll be considered dict-like. - - inplace : boolean (default: False) - Whether or not to rename the categories inplace or return a copy of - this categorical with renamed categories. - - Returns - ------- - cat : Categorical or None - With ``inplace=False``, the new categorical is returned. - With ``inplace=True``, there is no return value. - - See also - -------- - reorder_categories - add_categories - remove_categories - remove_unused_categories - set_categories - - Examples - -------- - >>> c = Categorical(['a', 'a', 'b']) - >>> c.rename_categories([0, 1]) - [0, 0, 1] - Categories (2, int64): [0, 1] - - For dict-like ``new_categories``, extra keys are ignored and - categories not in the dictionary are passed through - - >>> c.rename_categories({'a': 'A', 'c': 'C'}) - [A, A, b] - Categories (2, object): [A, b] - - You may also provide a callable to create the new categories - - >>> c.rename_categories(lambda x: x.upper()) - [A, A, B] - Categories (2, object): [A, B] - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - cat = self if inplace else self.copy() - - if isinstance(new_categories, ABCSeries): - msg = ("Treating Series 'new_categories' as a list-like and using " - "the values. In a future version, 'rename_categories' will " - "treat Series like a dictionary.\n" - "For dict-like, use 'new_categories.to_dict()'\n" - "For list-like, use 'new_categories.values'.") - warn(msg, FutureWarning, stacklevel=2) - new_categories = list(new_categories) - - if is_dict_like(new_categories): - cat.categories = [new_categories.get(item, item) - for item in cat.categories] - elif callable(new_categories): - cat.categories = [new_categories(item) for item in cat.categories] - else: - cat.categories = new_categories - if not inplace: - return cat - - def reorder_categories(self, new_categories, ordered=None, inplace=False): - """ Reorders categories as specified in new_categories. - - `new_categories` need to include all old categories and no new category - items. - - Raises - ------ - ValueError - If the new categories do not contain all old category items or any - new ones - - Parameters - ---------- - new_categories : Index-like - The categories in new order. - ordered : boolean, optional - Whether or not the categorical is treated as a ordered categorical. - If not given, do not change the ordered information. - inplace : boolean (default: False) - Whether or not to reorder the categories inplace or return a copy of - this categorical with reordered categories. - - Returns - ------- - cat : Categorical with reordered categories or None if inplace. - - See also - -------- - rename_categories - add_categories - remove_categories - remove_unused_categories - set_categories - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if set(self.dtype.categories) != set(new_categories): - raise ValueError("items in new_categories are not the same as in " - "old categories") - return self.set_categories(new_categories, ordered=ordered, - inplace=inplace) - - def add_categories(self, new_categories, inplace=False): - """ Add new categories. - - `new_categories` will be included at the last/highest place in the - categories and will be unused directly after this call. - - Raises - ------ - ValueError - If the new categories include old categories or do not validate as - categories - - Parameters - ---------- - new_categories : category or list-like of category - The new categories to be included. - inplace : boolean (default: False) - Whether or not to add the categories inplace or return a copy of - this categorical with added categories. - - Returns - ------- - cat : Categorical with new categories added or None if inplace. - - See also - -------- - rename_categories - reorder_categories - remove_categories - remove_unused_categories - set_categories - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if not is_list_like(new_categories): - new_categories = [new_categories] - already_included = set(new_categories) & set(self.dtype.categories) - if len(already_included) != 0: - msg = ("new categories must not include old categories: " - "{already_included!s}") - raise ValueError(msg.format(already_included=already_included)) - new_categories = list(self.dtype.categories) + list(new_categories) - new_dtype = CategoricalDtype(new_categories, self.ordered) - - cat = self if inplace else self.copy() - cat._dtype = new_dtype - cat._codes = coerce_indexer_dtype(cat._codes, new_dtype.categories) - if not inplace: - return cat - - def remove_categories(self, removals, inplace=False): - """ Removes the specified categories. - - `removals` must be included in the old categories. Values which were in - the removed categories will be set to NaN - - Raises - ------ - ValueError - If the removals are not contained in the categories - - Parameters - ---------- - removals : category or list of categories - The categories which should be removed. - inplace : boolean (default: False) - Whether or not to remove the categories inplace or return a copy of - this categorical with removed categories. - - Returns - ------- - cat : Categorical with removed categories or None if inplace. - - See also - -------- - rename_categories - reorder_categories - add_categories - remove_unused_categories - set_categories - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if not is_list_like(removals): - removals = [removals] - - removal_set = set(list(removals)) - not_included = removal_set - set(self.dtype.categories) - new_categories = [c for c in self.dtype.categories - if c not in removal_set] - - # GH 10156 - if any(isna(removals)): - not_included = [x for x in not_included if notna(x)] - new_categories = [x for x in new_categories if notna(x)] - - if len(not_included) != 0: - msg = "removals must all be in old categories: {not_included!s}" - raise ValueError(msg.format(not_included=not_included)) - - return self.set_categories(new_categories, ordered=self.ordered, - rename=False, inplace=inplace) - - def remove_unused_categories(self, inplace=False): - """ Removes categories which are not used. - - Parameters - ---------- - inplace : boolean (default: False) - Whether or not to drop unused categories inplace or return a copy of - this categorical with unused categories dropped. - - Returns - ------- - cat : Categorical with unused categories dropped or None if inplace. - - See also - -------- - rename_categories - reorder_categories - add_categories - remove_categories - set_categories - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - cat = self if inplace else self.copy() - idx, inv = np.unique(cat._codes, return_inverse=True) - - if idx.size != 0 and idx[0] == -1: # na sentinel - idx, inv = idx[1:], inv - 1 - - new_categories = cat.dtype.categories.take(idx) - new_dtype = CategoricalDtype._from_fastpath(new_categories, - ordered=self.ordered) - cat._dtype = new_dtype - cat._codes = coerce_indexer_dtype(inv, new_dtype.categories) - - if not inplace: - return cat - - def map(self, mapper): - """Apply mapper function to its categories (not codes). - - Parameters - ---------- - mapper : callable - Function to be applied. When all categories are mapped - to different categories, the result will be Categorical which has - the same order property as the original. Otherwise, the result will - be np.ndarray. - - Returns - ------- - applied : Categorical or Index. - - """ - new_categories = self.categories.map(mapper) - try: - return self.from_codes(self._codes.copy(), - categories=new_categories, - ordered=self.ordered) - except ValueError: - return np.take(new_categories, self._codes) - - __eq__ = _cat_compare_op('__eq__') - __ne__ = _cat_compare_op('__ne__') - __lt__ = _cat_compare_op('__lt__') - __gt__ = _cat_compare_op('__gt__') - __le__ = _cat_compare_op('__le__') - __ge__ = _cat_compare_op('__ge__') - - # for Series/ndarray like compat - @property - def shape(self): - """ Shape of the Categorical. - - For internal compatibility with numpy arrays. - - Returns - ------- - shape : tuple - """ - - return tuple([len(self._codes)]) - - def shift(self, periods): - """ - Shift Categorical by desired number of periods. - - Parameters - ---------- - periods : int - Number of periods to move, can be positive or negative - - Returns - ------- - shifted : Categorical - """ - # since categoricals always have ndim == 1, an axis parameter - # doesn't make any sense here. - codes = self.codes - if codes.ndim > 1: - raise NotImplementedError("Categorical with ndim > 1.") - if np.prod(codes.shape) and (periods != 0): - codes = np.roll(codes, _ensure_platform_int(periods), axis=0) - if periods > 0: - codes[:periods] = -1 - else: - codes[periods:] = -1 - - return self.from_codes(codes, categories=self.categories, - ordered=self.ordered) - - def __array__(self, dtype=None): - """ - The numpy array interface. - - Returns - ------- - values : numpy array - A numpy array of either the specified dtype or, - if dtype==None (default), the same dtype as - categorical.categories.dtype - """ - ret = take_1d(self.categories.values, self._codes) - if dtype and not is_dtype_equal(dtype, self.categories.dtype): - return np.asarray(ret, dtype) - return ret - - def __setstate__(self, state): - """Necessary for making this object picklable""" - if not isinstance(state, dict): - raise Exception('invalid pickle state') - - # Provide compatibility with pre-0.15.0 Categoricals. - if '_categories' not in state and '_levels' in state: - state['_categories'] = self.dtype._validate_categories(state.pop( - '_levels')) - if '_codes' not in state and 'labels' in state: - state['_codes'] = coerce_indexer_dtype( - state.pop('labels'), state['_categories']) - - # 0.16.0 ordered change - if '_ordered' not in state: - - # >=15.0 < 0.16.0 - if 'ordered' in state: - state['_ordered'] = state.pop('ordered') - else: - state['_ordered'] = False - - # 0.21.0 CategoricalDtype change - if '_dtype' not in state: - state['_dtype'] = CategoricalDtype(state['_categories'], - state['_ordered']) - - for k, v in compat.iteritems(state): - setattr(self, k, v) - - @property - def T(self): - return self - - @property - def nbytes(self): - return self._codes.nbytes + self.dtype.categories.values.nbytes - - def memory_usage(self, deep=False): - """ - Memory usage of my values - - Parameters - ---------- - deep : bool - Introspect the data deeply, interrogate - `object` dtypes for system-level memory consumption - - Returns - ------- - bytes used - - Notes - ----- - Memory usage does not include memory consumed by elements that - are not components of the array if deep=False - - See Also - -------- - numpy.ndarray.nbytes - """ - return self._codes.nbytes + self.dtype.categories.memory_usage( - deep=deep) - - @Substitution(klass='Categorical') - @Appender(_shared_docs['searchsorted']) - @deprecate_kwarg(old_arg_name='v', new_arg_name='value') - def searchsorted(self, value, side='left', sorter=None): - if not self.ordered: - raise ValueError("Categorical not ordered\nyou can use " - ".as_ordered() to change the Categorical to an " - "ordered one") - - from pandas.core.series import Series - - values_as_codes = _get_codes_for_values(Series(value).values, - self.categories) - - if -1 in values_as_codes: - raise ValueError("Value(s) to be inserted must be in categories.") - - return self.codes.searchsorted(values_as_codes, side=side, - sorter=sorter) - - def isna(self): - """ - Detect missing values - - Both missing values (-1 in .codes) and NA as a category are detected. - - Returns - ------- - a boolean array of whether my values are null - - See also - -------- - isna : top-level isna - isnull : alias of isna - Categorical.notna : boolean inverse of Categorical.isna - - """ - - ret = self._codes == -1 - - # String/object and float categories can hold np.nan - if self.categories.dtype.kind in ['S', 'O', 'f']: - if np.nan in self.categories: - nan_pos = np.where(isna(self.categories))[0] - # we only have one NA in categories - ret = np.logical_or(ret, self._codes == nan_pos) - return ret - isnull = isna - - def notna(self): - """ - Inverse of isna - - Both missing values (-1 in .codes) and NA as a category are detected as - null. - - Returns - ------- - a boolean array of whether my values are not null - - See also - -------- - notna : top-level notna - notnull : alias of notna - Categorical.isna : boolean inverse of Categorical.notna - - """ - return ~self.isna() - notnull = notna - - def put(self, *args, **kwargs): - """ - Replace specific elements in the Categorical with given values. - """ - raise NotImplementedError(("'put' is not yet implemented " - "for Categorical")) - - def dropna(self): - """ - Return the Categorical without null values. - - Both missing values (-1 in .codes) and NA as a category are detected. - NA is removed from the categories if present. - - Returns - ------- - valid : Categorical - """ - result = self[self.notna()] - if isna(result.categories).any(): - result = result.remove_categories([np.nan]) - return result - - def value_counts(self, dropna=True): - """ - Returns a Series containing counts of each category. - - Every category will have an entry, even those with a count of 0. - - Parameters - ---------- - dropna : boolean, default True - Don't include counts of NaN, even if NaN is a category. - - Returns - ------- - counts : Series - - See Also - -------- - Series.value_counts - - """ - from numpy import bincount - from pandas import isna, Series, CategoricalIndex - - obj = (self.remove_categories([np.nan]) if dropna and - isna(self.categories).any() else self) - code, cat = obj._codes, obj.categories - ncat, mask = len(cat), 0 <= code - ix, clean = np.arange(ncat), mask.all() - - if dropna or clean: - obs = code if clean else code[mask] - count = bincount(obs, minlength=ncat or None) - else: - count = bincount(np.where(mask, code, ncat)) - ix = np.append(ix, -1) - - ix = self._constructor(ix, dtype=self.dtype, - fastpath=True) - - return Series(count, index=CategoricalIndex(ix), dtype='int64') - - def get_values(self): - """ Return the values. - - For internal compatibility with pandas formatting. - - Returns - ------- - values : numpy array - A numpy array of the same dtype as categorical.categories.dtype or - Index if datetime / periods - """ - # if we are a datetime and period index, return Index to keep metadata - if is_datetimelike(self.categories): - return self.categories.take(self._codes, fill_value=np.nan) - return np.array(self) - - def check_for_ordered(self, op): - """ assert that we are ordered """ - if not self.ordered: - raise TypeError("Categorical is not ordered for operation {op}\n" - "you can use .as_ordered() to change the " - "Categorical to an ordered one\n".format(op=op)) - - def argsort(self, ascending=True, kind='quicksort', *args, **kwargs): - """ - Returns the indices that would sort the Categorical instance if - 'sort_values' was called. This function is implemented to provide - compatibility with numpy ndarray objects. - - While an ordering is applied to the category values, arg-sorting - in this context refers more to organizing and grouping together - based on matching category values. Thus, this function can be - called on an unordered Categorical instance unlike the functions - 'Categorical.min' and 'Categorical.max'. - - Returns - ------- - argsorted : numpy array - - See also - -------- - numpy.ndarray.argsort - """ - ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) - result = np.argsort(self._codes.copy(), kind=kind, **kwargs) - if not ascending: - result = result[::-1] - return result - - def sort_values(self, inplace=False, ascending=True, na_position='last'): - """ Sorts the Categorical by category value returning a new - Categorical by default. - - While an ordering is applied to the category values, sorting in this - context refers more to organizing and grouping together based on - matching category values. Thus, this function can be called on an - unordered Categorical instance unlike the functions 'Categorical.min' - and 'Categorical.max'. - - Parameters - ---------- - inplace : boolean, default False - Do operation in place. - ascending : boolean, default True - Order ascending. Passing False orders descending. The - ordering parameter provides the method by which the - category values are organized. - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - - Returns - ------- - y : Categorical or None - - See Also - -------- - Categorical.sort - Series.sort_values - - Examples - -------- - >>> c = pd.Categorical([1, 2, 2, 1, 5]) - >>> c - [1, 2, 2, 1, 5] - Categories (3, int64): [1, 2, 5] - >>> c.sort_values() - [1, 1, 2, 2, 5] - Categories (3, int64): [1, 2, 5] - >>> c.sort_values(ascending=False) - [5, 2, 2, 1, 1] - Categories (3, int64): [1, 2, 5] - - Inplace sorting can be done as well: - - >>> c.sort_values(inplace=True) - >>> c - [1, 1, 2, 2, 5] - Categories (3, int64): [1, 2, 5] - >>> - >>> c = pd.Categorical([1, 2, 2, 1, 5]) - - 'sort_values' behaviour with NaNs. Note that 'na_position' - is independent of the 'ascending' parameter: - - >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5]) - >>> c - [NaN, 2.0, 2.0, NaN, 5.0] - Categories (2, int64): [2, 5] - >>> c.sort_values() - [2.0, 2.0, 5.0, NaN, NaN] - Categories (2, int64): [2, 5] - >>> c.sort_values(ascending=False) - [5.0, 2.0, 2.0, NaN, NaN] - Categories (2, int64): [2, 5] - >>> c.sort_values(na_position='first') - [NaN, NaN, 2.0, 2.0, 5.0] - Categories (2, int64): [2, 5] - >>> c.sort_values(ascending=False, na_position='first') - [NaN, NaN, 5.0, 2.0, 2.0] - Categories (2, int64): [2, 5] - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if na_position not in ['last', 'first']: - msg = 'invalid na_position: {na_position!r}' - raise ValueError(msg.format(na_position=na_position)) - - codes = np.sort(self._codes) - if not ascending: - codes = codes[::-1] - - # NaN handling - na_mask = (codes == -1) - if na_mask.any(): - n_nans = len(codes[na_mask]) - if na_position == "first": - # in this case sort to the front - new_codes = codes.copy() - new_codes[0:n_nans] = -1 - new_codes[n_nans:] = codes[~na_mask] - codes = new_codes - elif na_position == "last": - # ... and to the end - new_codes = codes.copy() - pos = len(codes) - n_nans - new_codes[0:pos] = codes[~na_mask] - new_codes[pos:] = -1 - codes = new_codes - if inplace: - self._codes = codes - return - else: - return self._constructor(values=codes, categories=self.categories, - ordered=self.ordered, fastpath=True) - - def _values_for_rank(self): - """ - For correctly ranking ordered categorical data. See GH#15420 - - Ordered categorical data should be ranked on the basis of - codes with -1 translated to NaN. - - Returns - ------- - numpy array - - """ - from pandas import Series - if self.ordered: - values = self.codes - mask = values == -1 - if mask.any(): - values = values.astype('float64') - values[mask] = np.nan - elif self.categories.is_numeric(): - values = np.array(self) - else: - # reorder the categories (so rank can use the float codes) - # instead of passing an object array to rank - values = np.array( - self.rename_categories(Series(self.categories).rank().values) - ) - return values - - def ravel(self, order='C'): - """ Return a flattened (numpy) array. - - For internal compatibility with numpy arrays. - - Returns - ------- - raveled : numpy array - """ - return np.array(self) - - def view(self): - """Return a view of myself. - - For internal compatibility with numpy arrays. - - Returns - ------- - view : Categorical - Returns `self`! - """ - return self - - def to_dense(self): - """Return my 'dense' representation - - For internal compatibility with numpy arrays. - - Returns - ------- - dense : array - """ - return np.asarray(self) - - @deprecate_kwarg(old_arg_name='fill_value', new_arg_name='value') - def fillna(self, value=None, method=None, limit=None): - """ Fill NA/NaN values using the specified method. - - Parameters - ---------- - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed Series - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - value : scalar, dict, Series - If a scalar value is passed it is used to fill all missing values. - Alternatively, a Series or dict can be used to fill in different - values for each index. The value should not be a list. The - value(s) passed should either be in the categories or should be - NaN. - limit : int, default None - (Not implemented yet for Categorical!) - If method is specified, this is the maximum number of consecutive - NaN values to forward/backward fill. In other words, if there is - a gap with more than this number of consecutive NaNs, it will only - be partially filled. If method is not specified, this is the - maximum number of entries along the entire axis where NaNs will be - filled. - - Returns - ------- - filled : Categorical with NA/NaN filled - """ - - if value is None: - value = np.nan - if limit is not None: - raise NotImplementedError("specifying a limit for fillna has not " - "been implemented yet") - - values = self._codes - - # Make sure that we also get NA in categories - if self.categories.dtype.kind in ['S', 'O', 'f']: - if np.nan in self.categories: - values = values.copy() - nan_pos = np.where(isna(self.categories))[0] - # we only have one NA in categories - values[values == nan_pos] = -1 - - # pad / bfill - if method is not None: - - values = self.to_dense().reshape(-1, len(self)) - values = interpolate_2d(values, method, 0, None, - value).astype(self.categories.dtype)[0] - values = _get_codes_for_values(values, self.categories) - - else: - - # If value is a dict or a Series (a dict value has already - # been converted to a Series) - if isinstance(value, ABCSeries): - if not value[~value.isin(self.categories)].isna().all(): - raise ValueError("fill value must be in categories") - - values_codes = _get_codes_for_values(value, self.categories) - indexer = np.where(values_codes != -1) - values[indexer] = values_codes[values_codes != -1] - - # If value is not a dict or Series it should be a scalar - elif is_scalar(value): - if not isna(value) and value not in self.categories: - raise ValueError("fill value must be in categories") - - mask = values == -1 - if mask.any(): - values = values.copy() - if isna(value): - values[mask] = -1 - else: - values[mask] = self.categories.get_loc(value) - - else: - raise TypeError('"value" parameter must be a scalar, dict ' - 'or Series, but you passed a ' - '"{0}"'.format(type(value).__name__)) - - return self._constructor(values, categories=self.categories, - ordered=self.ordered, fastpath=True) - - def take_nd(self, indexer, allow_fill=True, fill_value=None): - """ Take the codes by the indexer, fill with the fill_value. - - For internal compatibility with numpy arrays. - """ - - # filling must always be None/nan here - # but is passed thru internally - assert isna(fill_value) - - codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1) - result = self._constructor(codes, categories=self.categories, - ordered=self.ordered, fastpath=True) - return result - - take = take_nd - - def _slice(self, slicer): - """ Return a slice of myself. - - For internal compatibility with numpy arrays. - """ - - # only allow 1 dimensional slicing, but can - # in a 2-d case be passd (slice(None),....) - if isinstance(slicer, tuple) and len(slicer) == 2: - if not is_null_slice(slicer[0]): - raise AssertionError("invalid slicing for a 1-ndim " - "categorical") - slicer = slicer[1] - - _codes = self._codes[slicer] - return self._constructor(values=_codes, categories=self.categories, - ordered=self.ordered, fastpath=True) - - def __len__(self): - """The length of this Categorical.""" - return len(self._codes) - - def __iter__(self): - """Returns an Iterator over the values of this Categorical.""" - return iter(self.get_values()) - - def _tidy_repr(self, max_vals=10, footer=True): - """ a short repr displaying only max_vals and an optional (but default - footer) - """ - num = max_vals // 2 - head = self[:num]._get_repr(length=False, footer=False) - tail = self[-(max_vals - num):]._get_repr(length=False, footer=False) - - result = u('{head}, ..., {tail}').format(head=head[:-1], tail=tail[1:]) - if footer: - result = u('{result}\n{footer}').format(result=result, - footer=self._repr_footer()) - - return compat.text_type(result) - - def _repr_categories(self): - """ return the base repr for the categories """ - max_categories = (10 if get_option("display.max_categories") == 0 else - get_option("display.max_categories")) - from pandas.io.formats import format as fmt - if len(self.categories) > max_categories: - num = max_categories // 2 - head = fmt.format_array(self.categories[:num], None) - tail = fmt.format_array(self.categories[-num:], None) - category_strs = head + ["..."] + tail - else: - category_strs = fmt.format_array(self.categories, None) - - # Strip all leading spaces, which format_array adds for columns... - category_strs = [x.strip() for x in category_strs] - return category_strs - - def _repr_categories_info(self): - """ Returns a string representation of the footer.""" - - category_strs = self._repr_categories() - dtype = getattr(self.categories, 'dtype_str', - str(self.categories.dtype)) - - levheader = "Categories ({length}, {dtype}): ".format( - length=len(self.categories), dtype=dtype) - width, height = get_terminal_size() - max_width = get_option("display.width") or width - if com.in_ipython_frontend(): - # 0 = no breaks - max_width = 0 - levstring = "" - start = True - cur_col_len = len(levheader) # header - sep_len, sep = (3, " < ") if self.ordered else (2, ", ") - linesep = sep.rstrip() + "\n" # remove whitespace - for val in category_strs: - if max_width != 0 and cur_col_len + sep_len + len(val) > max_width: - levstring += linesep + (" " * (len(levheader) + 1)) - cur_col_len = len(levheader) + 1 # header + a whitespace - elif not start: - levstring += sep - cur_col_len += len(val) - levstring += val - start = False - # replace to simple save space by - return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]" - - def _repr_footer(self): - - return u('Length: {length}\n{info}').format( - length=len(self), info=self._repr_categories_info()) - - def _get_repr(self, length=True, na_rep='NaN', footer=True): - from pandas.io.formats import format as fmt - formatter = fmt.CategoricalFormatter(self, length=length, - na_rep=na_rep, footer=footer) - result = formatter.to_string() - return compat.text_type(result) - - def __unicode__(self): - """ Unicode representation. """ - _maxlen = 10 - if len(self._codes) > _maxlen: - result = self._tidy_repr(_maxlen) - elif len(self._codes) > 0: - result = self._get_repr(length=len(self) > _maxlen) - else: - msg = self._get_repr(length=False, footer=True).replace("\n", ", ") - result = ('[], {repr_msg}'.format(repr_msg=msg)) - - return result - - def _maybe_coerce_indexer(self, indexer): - """ return an indexer coerced to the codes dtype """ - if isinstance(indexer, np.ndarray) and indexer.dtype.kind == 'i': - indexer = indexer.astype(self._codes.dtype) - return indexer - - def __getitem__(self, key): - """ Return an item. """ - if isinstance(key, (int, np.integer)): - i = self._codes[key] - if i == -1: - return np.nan - else: - return self.categories[i] - else: - return self._constructor(values=self._codes[key], - categories=self.categories, - ordered=self.ordered, fastpath=True) - - def __setitem__(self, key, value): - """ Item assignment. - - - Raises - ------ - ValueError - If (one or more) Value is not in categories or if a assigned - `Categorical` does not have the same categories - """ - - # require identical categories set - if isinstance(value, Categorical): - if not value.categories.equals(self.categories): - raise ValueError("Cannot set a Categorical with another, " - "without identical categories") - - rvalue = value if is_list_like(value) else [value] - - from pandas import Index - to_add = Index(rvalue).difference(self.categories) - - # no assignments of values not in categories, but it's always ok to set - # something to np.nan - if len(to_add) and not isna(to_add).all(): - raise ValueError("Cannot setitem on a Categorical with a new " - "category, set the categories first") - - # set by position - if isinstance(key, (int, np.integer)): - pass - - # tuple of indexers (dataframe) - elif isinstance(key, tuple): - # only allow 1 dimensional slicing, but can - # in a 2-d case be passd (slice(None),....) - if len(key) == 2: - if not is_null_slice(key[0]): - raise AssertionError("invalid slicing for a 1-ndim " - "categorical") - key = key[1] - elif len(key) == 1: - key = key[0] - else: - raise AssertionError("invalid slicing for a 1-ndim " - "categorical") - - # slicing in Series or Categorical - elif isinstance(key, slice): - pass - - # Array of True/False in Series or Categorical - else: - # There is a bug in numpy, which does not accept a Series as a - # indexer - # https://github.com/pandas-dev/pandas/issues/6168 - # https://github.com/numpy/numpy/issues/4240 -> fixed in numpy 1.9 - # FIXME: remove when numpy 1.9 is the lowest numpy version pandas - # accepts... - key = np.asarray(key) - - lindexer = self.categories.get_indexer(rvalue) - - # FIXME: the following can be removed after GH7820 is fixed: - # https://github.com/pandas-dev/pandas/issues/7820 - # float categories do currently return -1 for np.nan, even if np.nan is - # included in the index -> "repair" this here - if isna(rvalue).any() and isna(self.categories).any(): - nan_pos = np.where(isna(self.categories))[0] - lindexer[lindexer == -1] = nan_pos - - lindexer = self._maybe_coerce_indexer(lindexer) - self._codes[key] = lindexer - - def _reverse_indexer(self): - """ - Compute the inverse of a categorical, returning - a dict of categories -> indexers. - - *This is an internal function* - - Returns - ------- - dict of categories -> indexers - - Example - ------- - In [1]: c = pd.Categorical(list('aabca')) - - In [2]: c - Out[2]: - [a, a, b, c, a] - Categories (3, object): [a, b, c] - - In [3]: c.categories - Out[3]: Index([u'a', u'b', u'c'], dtype='object') - - In [4]: c.codes - Out[4]: array([0, 0, 1, 2, 0], dtype=int8) - - In [5]: c._reverse_indexer() - Out[5]: {'a': array([0, 1, 4]), 'b': array([2]), 'c': array([3])} - - """ - categories = self.categories - r, counts = libalgos.groupsort_indexer(self.codes.astype('int64'), - categories.size) - counts = counts.cumsum() - result = [r[counts[indexer]:counts[indexer + 1]] - for indexer in range(len(counts) - 1)] - result = dict(zip(categories, result)) - return result - - # reduction ops # - def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, - filter_type=None, **kwds): - """ perform the reduction type operation """ - func = getattr(self, name, None) - if func is None: - msg = 'Categorical cannot perform the operation {op}' - raise TypeError(msg.format(op=name)) - return func(numeric_only=numeric_only, **kwds) - - def min(self, numeric_only=None, **kwargs): - """ The minimum value of the object. - - Only ordered `Categoricals` have a minimum! - - Raises - ------ - TypeError - If the `Categorical` is not `ordered`. - - Returns - ------- - min : the minimum of this `Categorical` - """ - self.check_for_ordered('min') - if numeric_only: - good = self._codes != -1 - pointer = self._codes[good].min(**kwargs) - else: - pointer = self._codes.min(**kwargs) - if pointer == -1: - return np.nan - else: - return self.categories[pointer] - - def max(self, numeric_only=None, **kwargs): - """ The maximum value of the object. - - Only ordered `Categoricals` have a maximum! - - Raises - ------ - TypeError - If the `Categorical` is not `ordered`. - - Returns - ------- - max : the maximum of this `Categorical` - """ - self.check_for_ordered('max') - if numeric_only: - good = self._codes != -1 - pointer = self._codes[good].max(**kwargs) - else: - pointer = self._codes.max(**kwargs) - if pointer == -1: - return np.nan - else: - return self.categories[pointer] - - def mode(self): - """ - Returns the mode(s) of the Categorical. - - Always returns `Categorical` even if only one value. - - Returns - ------- - modes : `Categorical` (sorted) - """ - - import pandas._libs.hashtable as htable - good = self._codes != -1 - values = sorted(htable.mode_int64(_ensure_int64(self._codes[good]))) - result = self._constructor(values=values, categories=self.categories, - ordered=self.ordered, fastpath=True) - return result - - def unique(self): - """ - Return the ``Categorical`` which ``categories`` and ``codes`` are - unique. Unused categories are NOT returned. - - - unordered category: values and categories are sorted by appearance - order. - - ordered category: values are sorted by appearance order, categories - keeps existing order. - - Returns - ------- - unique values : ``Categorical`` - - Examples - -------- - An unordered Categorical will return categories in the - order of appearance. - - >>> pd.Categorical(list('baabc')) - [b, a, c] - Categories (3, object): [b, a, c] - - >>> pd.Categorical(list('baabc'), categories=list('abc')) - [b, a, c] - Categories (3, object): [b, a, c] - - An ordered Categorical preserves the category ordering. - - >>> pd.Categorical(list('baabc'), - ... categories=list('abc'), - ... ordered=True) - [b, a, c] - Categories (3, object): [a < b < c] - - See Also - -------- - unique - CategoricalIndex.unique - Series.unique - - """ - - # unlike np.unique, unique1d does not sort - unique_codes = unique1d(self.codes) - cat = self.copy() - - # keep nan in codes - cat._codes = unique_codes - - # exclude nan from indexer for categories - take_codes = unique_codes[unique_codes != -1] - if self.ordered: - take_codes = sorted(take_codes) - return cat.set_categories(cat.categories.take(take_codes)) - - def equals(self, other): - """ - Returns True if categorical arrays are equal. - - Parameters - ---------- - other : `Categorical` - - Returns - ------- - are_equal : boolean - """ - return (self.is_dtype_equal(other) and - np.array_equal(self._codes, other._codes)) - - def is_dtype_equal(self, other): - """ - Returns True if categoricals are the same dtype - same categories, and same ordered - - Parameters - ---------- - other : Categorical - - Returns - ------- - are_equal : boolean - """ - - try: - return hash(self.dtype) == hash(other.dtype) - except (AttributeError, TypeError): - return False - - def describe(self): - """ Describes this Categorical - - Returns - ------- - description: `DataFrame` - A dataframe with frequency and counts by category. - """ - counts = self.value_counts(dropna=False) - freqs = counts / float(counts.sum()) - - from pandas.core.reshape.concat import concat - result = concat([counts, freqs], axis=1) - result.columns = ['counts', 'freqs'] - result.index.name = 'categories' - - return result - - def repeat(self, repeats, *args, **kwargs): - """ - Repeat elements of a Categorical. - - See also - -------- - numpy.ndarray.repeat - - """ - nv.validate_repeat(args, kwargs) - codes = self._codes.repeat(repeats) - return self._constructor(values=codes, categories=self.categories, - ordered=self.ordered, fastpath=True) - -# The Series.cat accessor - - -class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): - """ - Accessor object for categorical properties of the Series values. - - Be aware that assigning to `categories` is a inplace operation, while all - methods return new categorical data per default (but can be called with - `inplace=True`). - - Examples - -------- - >>> s.cat.categories - >>> s.cat.categories = list('abc') - >>> s.cat.rename_categories(list('cab')) - >>> s.cat.reorder_categories(list('cab')) - >>> s.cat.add_categories(['d','e']) - >>> s.cat.remove_categories(['d']) - >>> s.cat.remove_unused_categories() - >>> s.cat.set_categories(list('abcde')) - >>> s.cat.as_ordered() - >>> s.cat.as_unordered() - - """ - - def __init__(self, values, index, name): - self.categorical = values - self.index = index - self.name = name - self._freeze() - - def _delegate_property_get(self, name): - return getattr(self.categorical, name) - - def _delegate_property_set(self, name, new_values): - return setattr(self.categorical, name, new_values) - - @property - def codes(self): - from pandas import Series - return Series(self.categorical.codes, index=self.index) - - def _delegate_method(self, name, *args, **kwargs): - from pandas import Series - method = getattr(self.categorical, name) - res = method(*args, **kwargs) - if res is not None: - return Series(res, index=self.index, name=self.name) - - @classmethod - def _make_accessor(cls, data): - if not is_categorical_dtype(data.dtype): - raise AttributeError("Can only use .cat accessor with a " - "'category' dtype") - return CategoricalAccessor(data.values, data.index, - getattr(data, 'name', None),) - - -CategoricalAccessor._add_delegate_accessors(delegate=Categorical, - accessors=["categories", - "ordered"], - typ='property') -CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=[ - "rename_categories", "reorder_categories", "add_categories", - "remove_categories", "remove_unused_categories", "set_categories", - "as_ordered", "as_unordered"], typ='method') - -# utility routines - - -def _get_codes_for_values(values, categories): - """ - utility routine to turn values into codes given the specified categories - """ - - from pandas.core.algorithms import _get_data_algo, _hashtables - if not is_dtype_equal(values.dtype, categories.dtype): - values = _ensure_object(values) - categories = _ensure_object(categories) - - (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables) - (_, _), cats = _get_data_algo(categories, _hashtables) - t = hash_klass(len(cats)) - t.map_locations(cats) - return coerce_indexer_dtype(t.lookup(vals), cats) - - -def _recode_for_categories(codes, old_categories, new_categories): - """ - Convert a set of codes for to a new set of categories - - Parameters - ---------- - codes : array - old_categories, new_categories : Index - - Returns - ------- - new_codes : array - - Examples - -------- - >>> old_cat = pd.Index(['b', 'a', 'c']) - >>> new_cat = pd.Index(['a', 'b']) - >>> codes = np.array([0, 1, 1, 2]) - >>> _recode_for_categories(codes, old_cat, new_cat) - array([ 1, 0, 0, -1]) - """ - from pandas.core.algorithms import take_1d - - if len(old_categories) == 0: - # All null anyway, so just retain the nulls - return codes.copy() - indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories), - new_categories) - new_codes = take_1d(indexer, codes.copy(), fill_value=-1) - return new_codes - - -def _convert_to_list_like(list_like): - if hasattr(list_like, "dtype"): - return list_like - if isinstance(list_like, list): - return list_like - if (is_sequence(list_like) or isinstance(list_like, tuple) or - isinstance(list_like, types.GeneratorType)): - return list(list_like) - elif is_scalar(list_like): - return [list_like] - else: - # is this reached? - return [list_like] - - -def _factorize_from_iterable(values): - """ - Factorize an input `values` into `categories` and `codes`. Preserves - categorical dtype in `categories`. - - *This is an internal function* - - Parameters - ---------- - values : list-like - - Returns - ------- - codes : ndarray - categories : Index - If `values` has a categorical dtype, then `categories` is - a CategoricalIndex keeping the categories and order of `values`. - """ - from pandas.core.indexes.category import CategoricalIndex - - if not is_list_like(values): - raise TypeError("Input must be list-like") - - if is_categorical(values): - if isinstance(values, (ABCCategoricalIndex, ABCSeries)): - values = values._values - categories = CategoricalIndex(values.categories, - categories=values.categories, - ordered=values.ordered) - codes = values.codes - else: - cat = Categorical(values, ordered=True) - categories = cat.categories - codes = cat.codes - return codes, categories - - -def _factorize_from_iterables(iterables): - """ - A higher-level wrapper over `_factorize_from_iterable`. - - *This is an internal function* - - Parameters - ---------- - iterables : list-like of list-likes - - Returns - ------- - codes_list : list of ndarrays - categories_list : list of Indexes - - Notes - ----- - See `_factorize_from_iterable` for more info. - """ - if len(iterables) == 0: - # For consistency, it should return a list of 2 lists. - return [[], []] - return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables])) +from pandas.core.arrays import Categorical # noqa +from pandas.core.dtypes.dtypes import CategoricalDtype # noqa diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 2e912b0075bfd..22c8b641cf974 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -10,7 +10,7 @@ import pandas as pd from pandas import compat from pandas.errors import PerformanceWarning -from pandas.core.common import flatten +import pandas.core.common as com from pandas.core.computation.common import _result_type_many @@ -117,7 +117,7 @@ def _align(terms): """Align a set of terms""" try: # flatten the parse tree (a nested list, really) - terms = list(flatten(terms)) + terms = list(com.flatten(terms)) except TypeError: # can't iterate so it must just be a constant or single variable if isinstance(terms.value, pd.core.generic.NDFrame): diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 1dc19d33f3365..781101f5804e6 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -8,7 +8,8 @@ import warnings import numpy as np -from pandas.core.common import _values_from_object + +import pandas.core.common as com from pandas.core.computation.check import _NUMEXPR_INSTALLED from pandas.core.config import get_option @@ -122,8 +123,8 @@ def _evaluate_numexpr(op, op_str, a, b, truediv=True, def _where_standard(cond, a, b): - return np.where(_values_from_object(cond), _values_from_object(a), - _values_from_object(b)) + return np.where(com._values_from_object(cond), com._values_from_object(a), + com._values_from_object(b)) def _where_numexpr(cond, a, b): diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index c3307c60b8ed9..da42cdbf10233 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -11,8 +11,7 @@ """ import pandas.core.config as cf from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory, - is_one_of_factory, get_default_val, - is_callable) + is_one_of_factory, is_callable) from pandas.io.formats.console import detect_console_encoding # compute @@ -170,11 +169,6 @@ def use_numexpr_cb(key): frame is truncated (e.g. not display all rows and/or columns) """ -pc_line_width_doc = """ -: int - Deprecated. -""" - pc_east_asian_width_doc = """ : boolean Whether to use the Unicode East Asian Width to calculate the display text @@ -223,11 +217,6 @@ def use_numexpr_cb(key): terminal and hence it is not possible to correctly detect the width. """ -pc_height_doc = """ -: int - Deprecated. -""" - pc_chop_threshold_doc = """ : float or None if set to a float value, all float values smaller then the given threshold @@ -344,13 +333,8 @@ def table_schema_cb(key): validator=is_one_of_factory([True, False, 'truncate'])) cf.register_option('chop_threshold', None, pc_chop_threshold_doc) cf.register_option('max_seq_items', 100, pc_max_seq_items) - cf.register_option('height', 60, pc_height_doc, - validator=is_instance_factory([type(None), int])) cf.register_option('width', 80, pc_width_doc, validator=is_instance_factory([type(None), int])) - # redirected to width, make defval identical - cf.register_option('line_width', get_default_val('display.width'), - pc_line_width_doc) cf.register_option('memory_usage', True, pc_memory_usage_doc, validator=is_one_of_factory([None, True, False, 'deep'])) diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index 3444d09c6ed1b..83167a45369c4 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -1,4 +1,8 @@ -"""A collection of random tools for dealing with dates in Python""" +"""A collection of random tools for dealing with dates in Python. + +.. deprecated:: 0.19.0 + Use pandas.tseries module instead. +""" # flake8: noqa diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index a2180ecc4632f..738e1ea9062f6 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -55,6 +55,7 @@ is_dict_like, is_iterator, is_file_like, + is_array_like, is_list_like, is_hashable, is_named_tuple) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5fcb5f09dfae7..5155662d2f97d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -20,7 +20,7 @@ is_integer_dtype, is_datetime_or_timedelta_dtype, is_bool_dtype, is_scalar, - _string_dtypes, + is_string_dtype, _string_dtypes, pandas_dtype, _ensure_int8, _ensure_int16, _ensure_int32, _ensure_int64, @@ -649,40 +649,48 @@ def astype_nansafe(arr, dtype, copy=True): if issubclass(dtype.type, text_type): # in Py3 that's str, in Py2 that's unicode return lib.astype_unicode(arr.ravel()).reshape(arr.shape) + elif issubclass(dtype.type, string_types): return lib.astype_str(arr.ravel()).reshape(arr.shape) + elif is_datetime64_dtype(arr): - if dtype == object: + if is_object_dtype(dtype): return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) - elif dtype != _NS_DTYPE: - raise TypeError("cannot astype a datetimelike from [{from_dtype}] " - "to [{to_dtype}]".format(from_dtype=arr.dtype, - to_dtype=dtype)) - return arr.astype(_NS_DTYPE) + + # allow frequency conversions + if dtype.kind == 'M': + return arr.astype(dtype) + + raise TypeError("cannot astype a datetimelike from [{from_dtype}] " + "to [{to_dtype}]".format(from_dtype=arr.dtype, + to_dtype=dtype)) + elif is_timedelta64_dtype(arr): - if dtype == np.int64: - return arr.view(dtype) - elif dtype == object: + if is_object_dtype(dtype): return tslib.ints_to_pytimedelta(arr.view(np.int64)) + elif dtype == np.int64: + return arr.view(dtype) # in py3, timedelta64[ns] are int64 - elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or - (not PY3 and dtype != _TD_DTYPE)): + if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or + (not PY3 and dtype != _TD_DTYPE)): # allow frequency conversions + # we return a float here! if dtype.kind == 'm': mask = isna(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result + elif dtype == _TD_DTYPE: + return arr.astype(_TD_DTYPE, copy=copy) - raise TypeError("cannot astype a timedelta from [{from_dtype}] " - "to [{to_dtype}]".format(from_dtype=arr.dtype, - to_dtype=dtype)) + raise TypeError("cannot astype a timedelta from [{from_dtype}] " + "to [{to_dtype}]".format(from_dtype=arr.dtype, + to_dtype=dtype)) - return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): @@ -690,9 +698,21 @@ def astype_nansafe(arr, dtype, copy=True): raise ValueError('Cannot convert non-finite values (NA or inf) to ' 'integer') - elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): + elif is_object_dtype(arr): + # work around NumPy brokenness, #1987 - return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + if np.issubdtype(dtype.type, np.integer): + return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + + # if we have a datetime/timedelta array of objects + # then coerce to a proper dtype and recall astype_nansafe + + elif is_datetime64_dtype(dtype): + from pandas import to_datetime + return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) + elif is_timedelta64_dtype(dtype): + from pandas import to_timedelta + return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): msg = ("Passing in '{dtype}' dtype with no frequency is " @@ -703,7 +723,7 @@ def astype_nansafe(arr, dtype, copy=True): dtype = np.dtype(dtype.name + "[ns]") if copy: - return arr.astype(dtype) + return arr.astype(dtype, copy=True) return arr.view(dtype) @@ -1003,12 +1023,20 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): if is_datetime64: value = to_datetime(value, errors=errors)._values elif is_datetime64tz: - # input has to be UTC at this point, so just - # localize - value = (to_datetime(value, errors=errors) - .tz_localize('UTC') - .tz_convert(dtype.tz) - ) + # The string check can be removed once issue #13712 + # is solved. String data that is passed with a + # datetime64tz is assumed to be naive which should + # be localized to the timezone. + is_dt_string = is_string_dtype(value) + value = to_datetime(value, errors=errors) + if is_dt_string: + # Strings here are naive, so directly localize + value = value.tz_localize(dtype.tz) + else: + # Numeric values are UTC at this point, + # so localize and convert + value = (value.tz_localize('UTC') + .tz_convert(dtype.tz)) elif is_timedelta64: value = to_timedelta(value, errors=errors)._values except (AttributeError, ValueError, TypeError): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e2ee3deb5396e..dca9a5fde0d74 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -4,6 +4,7 @@ from pandas.compat import (string_types, text_type, binary_type, PY3, PY36) from pandas._libs import algos, lib +from pandas._libs.tslibs import conversion from .dtypes import (CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, @@ -21,8 +22,8 @@ for t in ['O', 'int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64']]) -_NS_DTYPE = np.dtype('M8[ns]') -_TD_DTYPE = np.dtype('m8[ns]') +_NS_DTYPE = conversion.NS_DTYPE +_TD_DTYPE = conversion.TD_DTYPE _INT64_DTYPE = np.dtype(np.int64) # oh the troubles to reduce import time @@ -31,6 +32,9 @@ _ensure_float64 = algos.ensure_float64 _ensure_float32 = algos.ensure_float32 +_ensure_datetime64ns = conversion.ensure_datetime64ns +_ensure_timedelta64ns = conversion.ensure_timedelta64ns + def _ensure_float(arr): """ @@ -758,10 +762,9 @@ def is_dtype_union_equal(source, target): def is_any_int_dtype(arr_or_dtype): - """ - DEPRECATED: This function will be removed in a future version. + """Check whether the provided array or dtype is of an integer dtype. - Check whether the provided array or dtype is of an integer dtype. + .. deprecated:: 0.20.0 In this function, timedelta64 instances are also considered "any-integer" type objects and will return True. @@ -1557,12 +1560,11 @@ def is_float_dtype(arr_or_dtype): def is_floating_dtype(arr_or_dtype): - """ - DEPRECATED: This function will be removed in a future version. - - Check whether the provided array or dtype is an instance of + """Check whether the provided array or dtype is an instance of numpy's float dtype. + .. deprecated:: 0.20.0 + Unlike, `is_float_dtype`, this check is a lot stricter, as it requires `isinstance` of `np.floating` and not `issubclass`. """ diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index cd98064dee86e..3e54ce61cd5b2 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -314,7 +314,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): Categories (3, object): [b, c, a] """ from pandas import Index, Categorical, CategoricalIndex, Series - from pandas.core.categorical import _recode_for_categories + from pandas.core.arrays.categorical import _recode_for_categories if len(to_union) == 0: raise ValueError('No Categoricals to union') @@ -339,7 +339,16 @@ def _maybe_unwrap(x): # identical categories - fastpath categories = first.categories ordered = first.ordered - new_codes = np.concatenate([c.codes for c in to_union]) + + if all(first.categories.equals(other.categories) + for other in to_union[1:]): + new_codes = np.concatenate([c.codes for c in to_union]) + else: + codes = [first.codes] + [_recode_for_categories(other.codes, + other.categories, + first.categories) + for other in to_union[1:]] + new_codes = np.concatenate(codes) if sort_categories and not ignore_order and ordered: raise TypeError("Cannot use sort_categories=True with " diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 08773354d44d8..1eb87aa99fd1e 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -626,6 +626,7 @@ class IntervalDtype(ExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE """ + name = 'interval' type = IntervalDtypeType kind = None str = '|O08' @@ -653,8 +654,8 @@ def __new__(cls, subtype=None): u.subtype = None return u elif (isinstance(subtype, compat.string_types) and - subtype == 'interval'): - subtype = '' + subtype.lower() == 'interval'): + subtype = None else: if isinstance(subtype, compat.string_types): m = cls._match.search(subtype) @@ -666,11 +667,6 @@ def __new__(cls, subtype=None): except TypeError: raise ValueError("could not construct IntervalDtype") - if subtype is None: - u = object.__new__(cls) - u.subtype = None - return u - if is_categorical_dtype(subtype) or is_string_dtype(subtype): # GH 19016 msg = ('category, object, and string subtypes are not supported ' @@ -692,31 +688,30 @@ def construct_from_string(cls, string): if its not possible """ if isinstance(string, compat.string_types): - try: - return cls(string) - except ValueError: - pass - raise TypeError("could not construct IntervalDtype") + return cls(string) + msg = "a string needs to be passed, got type {typ}" + raise TypeError(msg.format(typ=type(string))) def __unicode__(self): if self.subtype is None: return "interval" return "interval[{subtype}]".format(subtype=self.subtype) - @property - def name(self): - return str(self) - def __hash__(self): # make myself hashable return hash(str(self)) def __eq__(self, other): if isinstance(other, compat.string_types): - return other == self.name or other == self.name.title() - - return (isinstance(other, IntervalDtype) and - self.subtype == other.subtype) + return other.lower() in (self.name.lower(), str(self).lower()) + elif not isinstance(other, IntervalDtype): + return False + elif self.subtype is None or other.subtype is None: + # None should match any subtype + return True + else: + from pandas.core.dtypes.common import is_dtype_equal + return is_dtype_equal(self.subtype, other.subtype) @classmethod def is_dtype(cls, dtype): diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 629d88aa7f086..6fae09c43d2be 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -54,6 +54,7 @@ def _check(cls, inst): ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) ABCDateOffset = create_pandas_abc_type("ABCDateOffset", "_typ", ("dateoffset",)) +ABCInterval = create_pandas_abc_type("ABCInterval", "_typ", ("interval", )) class _ABCGeneric(type): diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 8010a213efaf0..6fed25a0012f2 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -267,6 +267,39 @@ def is_list_like(obj): not isinstance(obj, string_and_binary_types)) +def is_array_like(obj): + """ + Check if the object is array-like. + + For an object to be considered array-like, it must be list-like and + have a `dtype` attribute. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_array_like : bool + Whether `obj` has array-like properties. + + Examples + -------- + >>> is_array_like(np.array([1, 2, 3])) + True + >>> is_array_like(pd.Series(["a", "b"])) + True + >>> is_array_like(pd.Index(["2016-01-01"])) + True + >>> is_array_like([1, 2, 3]) + False + >>> is_array_like(("a", "b")) + False + """ + + return is_list_like(obj) and hasattr(obj, "dtype") + + def is_nested_list_like(obj): """ Check if the object is list-like, and that all of its elements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9acc82b50aabf..7328cd336babf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,6 +23,7 @@ import numpy as np import numpy.ma as ma +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.cast import ( maybe_upcast, cast_scalar_to_array, @@ -61,12 +62,6 @@ from pandas.core.dtypes.missing import isna, notna -from pandas.core.common import (_try_sort, - _default_index, - _values_from_object, - _maybe_box_datetimelike, - _dict_compat, - standardize_mapping) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, _ensure_index_from_sequences) @@ -76,7 +71,7 @@ create_block_manager_from_arrays, create_block_manager_from_blocks) from pandas.core.series import Series -from pandas.core.categorical import Categorical +from pandas.core.arrays import Categorical import pandas.core.algorithms as algorithms from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) @@ -92,7 +87,6 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.core import accessor import pandas.core.common as com import pandas.core.nanops as nanops import pandas.core.ops as ops @@ -113,7 +107,15 @@ axes_single_arg="{0 or 'index', 1 or 'columns'}", optional_by=""" by : str or list of str - Name or list of names which refer to the axis items.""", + Name or list of names to sort by. + + - if `axis` is 0 or `'index'` then `by` may contain index + levels and/or column labels + - if `axis` is 1 or `'columns'` then `by` may contain column + levels and/or index labels + + .. versionmodified:: 0.23.0 + Allow specifying index or column level names.""", versionadded_to_excel='', optional_labels="""labels : array-like, optional New labels / index to conform the axis specified by 'axis' to.""", @@ -242,7 +244,7 @@ class DataFrame(NDFrame): """ Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). Arithmetic operations align on both row and column labels. Can be thought of as a dict-like - container for Series objects. The primary pandas data structure + container for Series objects. The primary pandas data structure. Parameters ---------- @@ -379,9 +381,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if isinstance(data[0], Series): index = _get_names_from_index(data) elif isinstance(data[0], Categorical): - index = _default_index(len(data[0])) + index = com._default_index(len(data[0])) else: - index = _default_index(len(data)) + index = com._default_index(len(data)) mgr = _arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) @@ -458,7 +460,7 @@ def _init_dict(self, data, index, columns, dtype=None): else: keys = list(data.keys()) if not isinstance(data, OrderedDict): - keys = _try_sort(keys) + keys = com._try_sort(keys) columns = data_names = Index(keys) arrays = [data[k] for k in keys] @@ -485,12 +487,12 @@ def _get_axes(N, K, index=index, columns=columns): # return axes or defaults if index is None: - index = _default_index(N) + index = com._default_index(N) else: index = _ensure_index(index) if columns is None: - columns = _default_index(K) + columns = com._default_index(K) else: columns = _ensure_index(columns) return index, columns @@ -510,7 +512,11 @@ def _get_axes(N, K, index=index, columns=columns): return _arrays_to_mgr([values], columns, index, columns, dtype=dtype) elif is_datetimetz(values): - return self._init_dict({0: values}, index, columns, dtype=dtype) + # GH19157 + if columns is None: + columns = [0] + return _arrays_to_mgr([values], columns, index, columns, + dtype=dtype) # by definition an array here # the dtypes will be coerced to a single dtype @@ -982,7 +988,7 @@ def to_dict(self, orient='dict', into=dict): "columns will be omitted.", UserWarning, stacklevel=2) # GH16122 - into_c = standardize_mapping(into) + into_c = com.standardize_mapping(into) if orient.lower().startswith('d'): return into_c( (k, v.to_dict(into)) for k, v in compat.iteritems(self)) @@ -992,13 +998,13 @@ def to_dict(self, orient='dict', into=dict): return into_c((('index', self.index.tolist()), ('columns', self.columns.tolist()), ('data', lib.map_infer(self.values.ravel(), - _maybe_box_datetimelike) + com._maybe_box_datetimelike) .reshape(self.values.shape).tolist()))) elif orient.lower().startswith('s'): - return into_c((k, _maybe_box_datetimelike(v)) + return into_c((k, com._maybe_box_datetimelike(v)) for k, v in compat.iteritems(self)) elif orient.lower().startswith('r'): - return [into_c((k, _maybe_box_datetimelike(v)) + return [into_c((k, com._maybe_box_datetimelike(v)) for k, v in zip(self.columns, np.atleast_1d(row))) for row in self.values] elif orient.lower().startswith('i'): @@ -1318,9 +1324,10 @@ def _from_arrays(cls, arrays, columns, index, dtype=None): def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=None, infer_datetime_format=False): - """ - Read CSV file (DEPRECATED, please use :func:`pandas.read_csv` - instead). + """Read CSV file. + + .. deprecated:: 0.21.0 + Use :func:`pandas.read_csv` instead. It is preferable to use the more powerful :func:`pandas.read_csv` for most general purposes, but ``from_csv`` makes for an easy @@ -1661,8 +1668,8 @@ def to_parquet(self, fname, engine='auto', compression='snappy', Parquet reader library to use. If 'auto', then the option 'io.parquet.engine' is used. If 'auto', then the first library to be installed is used. - compression : str, optional, default 'snappy' - compression method, includes {'gzip', 'snappy', 'brotli'} + compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. kwargs Additional keyword arguments passed to the engine """ @@ -1938,30 +1945,28 @@ def transpose(self, *args, **kwargs): # legacy pickle formats def _unpickle_frame_compat(self, state): # pragma: no cover - from pandas.core.common import _unpickle_array if len(state) == 2: # pragma: no cover series, idx = state columns = sorted(series) else: series, cols, idx = state - columns = _unpickle_array(cols) + columns = com._unpickle_array(cols) - index = _unpickle_array(idx) + index = com._unpickle_array(idx) self._data = self._init_dict(series, index, columns, None) def _unpickle_matrix_compat(self, state): # pragma: no cover - from pandas.core.common import _unpickle_array # old unpickling (vals, idx, cols), object_state = state - index = _unpickle_array(idx) - dm = DataFrame(vals, index=index, columns=_unpickle_array(cols), + index = com._unpickle_array(idx) + dm = DataFrame(vals, index=index, columns=com._unpickle_array(cols), copy=False) if object_state is not None: ovals, _, ocols = object_state objects = DataFrame(ovals, index=index, - columns=_unpickle_array(ocols), copy=False) + columns=com._unpickle_array(ocols), copy=False) dm = dm.join(objects) @@ -1971,12 +1976,10 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover # Getting and setting elements def get_value(self, index, col, takeable=False): - """ - Quickly retrieve single value at passed column and index + """Quickly retrieve single value at passed column and index .. deprecated:: 0.21.0 - - Please use .at[] or .iat[] accessors. + Use .at[] or .iat[] accessors instead. Parameters ---------- @@ -1999,7 +2002,7 @@ def _get_value(self, index, col, takeable=False): if takeable: series = self._iget_item_cache(col) - return _maybe_box_datetimelike(series._values[index]) + return com._maybe_box_datetimelike(series._values[index]) series = self._get_item_cache(col) engine = self.index._engine @@ -2016,12 +2019,10 @@ def _get_value(self, index, col, takeable=False): _get_value.__doc__ = get_value.__doc__ def set_value(self, index, col, value, takeable=False): - """ - Put single value at passed column and index + """Put single value at passed column and index .. deprecated:: 0.21.0 - - Please use .at[] or .iat[] accessors. + Use .at[] or .iat[] accessors instead. Parameters ---------- @@ -2412,17 +2413,18 @@ def select_dtypes(self, include=None, exclude=None): Notes ----- - * To select all *numeric* types use the numpy dtype ``numpy.number`` + * To select all *numeric* types, use ``np.number`` or ``'number'`` * To select strings you must use the ``object`` dtype, but note that this will return *all* object dtype columns * See the `numpy dtype hierarchy `__ - * To select datetimes, use np.datetime64, 'datetime' or 'datetime64' - * To select timedeltas, use np.timedelta64, 'timedelta' or - 'timedelta64' - * To select Pandas categorical dtypes, use 'category' - * To select Pandas datetimetz dtypes, use 'datetimetz' (new in 0.20.0), - or a 'datetime64[ns, tz]' string + * To select datetimes, use ``np.datetime64``, ``'datetime'`` or + ``'datetime64'`` + * To select timedeltas, use ``np.timedelta64``, ``'timedelta'`` or + ``'timedelta64'`` + * To select Pandas categorical dtypes, use ``'category'`` + * To select Pandas datetimetz dtypes, use ``'datetimetz'`` (new in + 0.20.0) or ``'datetime64[ns, tz]'`` Examples -------- @@ -2431,12 +2433,12 @@ def select_dtypes(self, include=None, exclude=None): ... 'c': [1.0, 2.0] * 3}) >>> df a b c - 0 0.3962 True 1 - 1 0.1459 False 2 - 2 0.2623 True 1 - 3 0.0764 False 2 - 4 -0.9703 True 1 - 5 -1.2094 False 2 + 0 0.3962 True 1.0 + 1 0.1459 False 2.0 + 2 0.2623 True 1.0 + 3 0.0764 False 2.0 + 4 -0.9703 True 1.0 + 5 -1.2094 False 2.0 >>> df.select_dtypes(include='bool') c 0 True @@ -2447,12 +2449,12 @@ def select_dtypes(self, include=None, exclude=None): 5 False >>> df.select_dtypes(include=['float64']) c - 0 1 - 1 2 - 2 1 - 3 2 - 4 1 - 5 2 + 0 1.0 + 1 2.0 + 2 1.0 + 3 2.0 + 4 1.0 + 5 2.0 >>> df.select_dtypes(exclude=['floating']) b 0 True @@ -3365,7 +3367,7 @@ def _maybe_casted_values(index, labels=None): values, mask, np.nan) return values - new_index = _default_index(len(new_obj)) + new_index = com._default_index(len(new_obj)) if level is not None: if not isinstance(level, (tuple, list)): level = [level] @@ -3623,7 +3625,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): inplace = validate_bool_kwarg(inplace, 'inplace') axis = self._get_axis_number(axis) - other_axis = 0 if axis == 1 else 1 + stacklevel = 2 # Number of stack levels from df.sort_values if not isinstance(by, list): by = [by] @@ -3635,10 +3637,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, keys = [] for x in by: - k = self.xs(x, axis=other_axis).values - if k.ndim == 2: - raise ValueError('Cannot sort by duplicate column %s' % - str(x)) + k = self._get_label_or_level_values(x, axis=axis, + stacklevel=stacklevel) keys.append(k) indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position) @@ -3647,17 +3647,9 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, from pandas.core.sorting import nargsort by = by[0] - k = self.xs(by, axis=other_axis).values - if k.ndim == 2: - - # try to be helpful - if isinstance(self.columns, MultiIndex): - raise ValueError('Cannot sort by column %s in a ' - 'multi-index you need to explicitly ' - 'provide all the levels' % str(by)) + k = self._get_label_or_level_values(by, axis=axis, + stacklevel=stacklevel) - raise ValueError('Cannot sort by duplicate column %s' % - str(by)) if isinstance(ascending, (tuple, list)): ascending = ascending[0] @@ -3739,12 +3731,13 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, def sortlevel(self, level=0, axis=0, ascending=True, inplace=False, sort_remaining=True): - """ - DEPRECATED: use :meth:`DataFrame.sort_index` - - Sort multilevel index by chosen axis and primary level. Data will be + """Sort multilevel index by chosen axis and primary level. Data will be lexicographically sorted by the chosen level followed by the other - levels (in order) + levels (in order). + + .. deprecated:: 0.20.0 + Use :meth:`DataFrame.sort_index` + Parameters ---------- @@ -6006,8 +5999,7 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - plot = accessor.AccessorProperty(gfx.FramePlotMethods, - gfx.FramePlotMethods) + plot = CachedAccessor("plot", gfx.FramePlotMethods) hist = gfx.hist_frame boxplot = gfx.boxplot_frame @@ -6088,7 +6080,7 @@ def extract_index(data): (lengths[0], len(index))) raise ValueError(msg) else: - index = _default_index(lengths[0]) + index = com._default_index(lengths[0]) return _ensure_index(index) @@ -6159,7 +6151,7 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): dtype=dtype) elif isinstance(data[0], Categorical): if columns is None: - columns = _default_index(len(data)) + columns = com._default_index(len(data)) return data, columns elif (isinstance(data, (np.ndarray, Series, Index)) and data.dtype.names is not None): @@ -6183,7 +6175,7 @@ def _masked_rec_array_to_mgr(data, index, columns, dtype, copy): if index is None: index = _get_names_from_index(fdata) if index is None: - index = _default_index(len(data)) + index = com._default_index(len(data)) index = _ensure_index(index) if columns is not None: @@ -6243,14 +6235,14 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): for s in data: index = getattr(s, 'index', None) if index is None: - index = _default_index(len(s)) + index = com._default_index(len(s)) if id(index) in indexer_cache: indexer = indexer_cache[id(index)] else: indexer = indexer_cache[id(index)] = index.get_indexer(columns) - values = _values_from_object(s) + values = com._values_from_object(s) aligned_values.append(algorithms.take_1d(values, indexer)) values = np.vstack(aligned_values) @@ -6280,7 +6272,7 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): def _convert_object_array(content, columns, coerce_float=False, dtype=None): if columns is None: - columns = _default_index(len(content)) + columns = com._default_index(len(content)) else: if len(columns) != len(content): # pragma: no cover # caller's responsibility to check for this... @@ -6302,7 +6294,7 @@ def convert(arr): def _get_names_from_index(data): has_some_name = any(getattr(s, 'name', None) is not None for s in data) if not has_some_name: - return _default_index(len(data)) + return com._default_index(len(data)) index = lrange(len(data)) count = 0 @@ -6337,7 +6329,7 @@ def _homogenize(data, index, dtype=None): oindex = index.astype('O') if isinstance(index, (DatetimeIndex, TimedeltaIndex)): - v = _dict_compat(v) + v = com._dict_compat(v) else: v = dict(v) v = lib.fast_multiget(v, oindex.values, default=np.nan) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2b5e4b912247e..6e777281b11e1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -30,10 +30,6 @@ from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame -from pandas.core.common import (_count_not_none, - _maybe_box_datetimelike, _values_from_object, - AbstractMethodError, SettingWithCopyError, - SettingWithCopyWarning) from pandas.core.base import PandasObject, SelectionMixin from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -69,7 +65,7 @@ args_transpose='axes to permute (int or label for object)', optional_by=""" by : str or list of str - Name or list of names which refer to the axis items.""") + Name or list of names to sort by""") def _single_replace(self, to_replace, method, inplace, limit): @@ -198,7 +194,7 @@ def _constructor(self): """Used when a manipulation result has the same dimensions as the original. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def __unicode__(self): # unicode representation based upon iterating over self @@ -220,7 +216,7 @@ def _constructor_sliced(self): """Used when a manipulation result has one lower dimension(s) as the original, such as DataFrame single columns slicing. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) @property def _constructor_expanddim(self): @@ -1026,7 +1022,7 @@ def _indexed_same(self, other): for a in self._AXIS_ORDERS) def __neg__(self): - values = _values_from_object(self) + values = com._values_from_object(self) if values.dtype == np.bool_: arr = operator.inv(values) else: @@ -1035,7 +1031,7 @@ def __neg__(self): def __invert__(self): try: - arr = operator.inv(_values_from_object(self)) + arr = operator.inv(com._values_from_object(self)) return self.__array_wrap__(arr) except Exception: @@ -1156,7 +1152,7 @@ def _is_label_or_level_reference(self, key, axis=0): return (self._is_level_reference(key, axis=axis) or self._is_label_reference(key, axis=axis)) - def _check_label_or_level_ambiguity(self, key, axis=0): + def _check_label_or_level_ambiguity(self, key, axis=0, stacklevel=1): """ Check whether `key` matches both a level of the input `axis` and a label of the other axis and raise a ``FutureWarning`` if this is the @@ -1169,9 +1165,10 @@ def _check_label_or_level_ambiguity(self, key, axis=0): ---------- key: str or object label or level name - axis: int, default 0 Axis that levels are associated with (0 for index, 1 for columns) + stacklevel: int, default 1 + Stack level used when a FutureWarning is raised (see below). Returns ------- @@ -1216,12 +1213,12 @@ def _check_label_or_level_ambiguity(self, key, axis=0): label_article=label_article, label_type=label_type) - warnings.warn(msg, FutureWarning, stacklevel=2) + warnings.warn(msg, FutureWarning, stacklevel=stacklevel + 1) return True else: return False - def _get_label_or_level_values(self, key, axis=0): + def _get_label_or_level_values(self, key, axis=0, stacklevel=1): """ Return a 1-D array of values associated with `key`, a label or level from the given `axis`. @@ -1240,6 +1237,8 @@ def _get_label_or_level_values(self, key, axis=0): Label or level name. axis: int, default 0 Axis that levels are associated with (0 for index, 1 for columns) + stacklevel: int, default 1 + Stack level used when a FutureWarning is raised (see below). Returns ------- @@ -1251,6 +1250,9 @@ def _get_label_or_level_values(self, key, axis=0): if `key` matches neither a label nor a level ValueError if `key` matches multiple labels + FutureWarning + if `key` is ambiguous. This will become an ambiguity error in a + future version """ axis = self._get_axis_number(axis) @@ -1262,7 +1264,8 @@ def _get_label_or_level_values(self, key, axis=0): .format(type=type(self))) if self._is_label_reference(key, axis=axis): - self._check_label_or_level_ambiguity(key, axis=axis) + self._check_label_or_level_ambiguity(key, axis=axis, + stacklevel=stacklevel + 1) values = self.xs(key, axis=other_axes[0])._values elif self._is_level_reference(key, axis=axis): values = self.axes[axis].get_level_values(key)._values @@ -1271,11 +1274,22 @@ def _get_label_or_level_values(self, key, axis=0): # Check for duplicates if values.ndim > 1: + + if other_axes and isinstance( + self._get_axis(other_axes[0]), MultiIndex): + multi_message = ('\n' + 'For a multi-index, the label must be a ' + 'tuple with elements corresponding to ' + 'each level.') + else: + multi_message = '' + label_axis_name = 'column' if axis == 0 else 'index' raise ValueError(("The {label_axis_name} label '{key}' " - "is not unique") + "is not unique.{multi_message}") .format(key=key, - label_axis_name=label_axis_name)) + label_axis_name=label_axis_name, + multi_message=multi_message)) return values @@ -1472,7 +1486,7 @@ def __round__(self, decimals=0): # Array Interface def __array__(self, dtype=None): - return _values_from_object(self) + return com._values_from_object(self) def __array_wrap__(self, result, context=None): d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) @@ -1824,8 +1838,8 @@ def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs): return packers.to_msgpack(path_or_buf, self, encoding=encoding, **kwargs) - def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', - index=True, index_label=None, chunksize=None, dtype=None): + def to_sql(self, name, con, schema=None, if_exists='fail', index=True, + index_label=None, chunksize=None, dtype=None): """ Write records stored in a DataFrame to a SQL database. @@ -1836,10 +1850,6 @@ def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', con : SQLAlchemy engine or DBAPI2 connection (legacy mode) Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. - flavor : 'sqlite', default None - .. deprecated:: 0.19.0 - 'sqlite' is the only supported option if SQLAlchemy is not - used. schema : string, default None Specify the schema (if database flavor supports this). If None, use default schema. @@ -1862,9 +1872,9 @@ def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', """ from pandas.io import sql - sql.to_sql(self, name, con, flavor=flavor, schema=schema, - if_exists=if_exists, index=index, index_label=index_label, - chunksize=chunksize, dtype=dtype) + sql.to_sql(self, name, con, schema=schema, if_exists=if_exists, + index=index, index_label=index_label, chunksize=chunksize, + dtype=dtype) def to_pickle(self, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): @@ -2190,7 +2200,7 @@ def _iget_item_cache(self, item): return lower def _box_item_values(self, key, values): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. @@ -2383,9 +2393,10 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): ) if value == 'raise': - raise SettingWithCopyError(t) + raise com.SettingWithCopyError(t) elif value == 'warn': - warnings.warn(t, SettingWithCopyWarning, stacklevel=stacklevel) + warnings.warn(t, com.SettingWithCopyWarning, + stacklevel=stacklevel) def __delitem__(self, key): """ @@ -2682,7 +2693,7 @@ def xs(self, key, axis=0, level=None, drop_level=True): # that means that their are list/ndarrays inside the Series! # so just return them (GH 6394) if not is_list_like(new_values) or self.ndim == 1: - return _maybe_box_datetimelike(new_values) + return com._maybe_box_datetimelike(new_values) result = self._constructor_sliced( new_values, index=self.columns, @@ -2700,10 +2711,10 @@ def xs(self, key, axis=0, level=None, drop_level=True): _xs = xs def select(self, crit, axis=0): - """ - Return data corresponding to axis labels matching criteria + """Return data corresponding to axis labels matching criteria - DEPRECATED: use df.loc[df.index.map(crit)] to select via labels + .. deprecated:: 0.21.0 + Use df.loc[df.index.map(crit)] to select via labels Parameters ---------- @@ -2792,6 +2803,11 @@ def drop(self, labels=None, axis=0, index=None, columns=None, level=None, ------- dropped : type of caller + Raises + ------ + KeyError + If none of the labels are found in the selected axis + Examples -------- >>> df = pd.DataFrame(np.arange(12).reshape(3,4), @@ -2895,6 +2911,9 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): else: indexer = ~axis.isin(labels) + if errors == 'raise' and indexer.all(): + raise KeyError('{} not found in axis'.format(labels)) + slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer @@ -2956,7 +2975,7 @@ def add_suffix(self, suffix): Parameters ----------%(optional_by)s axis : %(axes_single_arg)s, default 0 - Axis to direct sorting + Axis to be sorted ascending : bool or list of bool, default True Sort ascending vs. descending. Specify list for multiple sort orders. If this is a list of bools, must match the length of @@ -3535,7 +3554,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): """ import re - nkw = _count_not_none(items, like, regex) + nkw = com._count_not_none(items, like, regex) if nkw > 1: raise TypeError('Keyword arguments `items`, `like`, or `regex` ' 'are mutually exclusive') @@ -4090,8 +4109,11 @@ def _consolidate(self, inplace=False): return self._constructor(cons_data).__finalize__(self) def consolidate(self, inplace=False): - """ - DEPRECATED: consolidate will be an internal implementation only. + """Compute NDFrame with "consolidated" internals (data of each dtype + grouped together in a single ndarray). + + .. deprecated:: 0.20.0 + Consolidate will be an internal implementation only. """ # 15483 warnings.warn("consolidate is deprecated and will be removed in a " @@ -4142,11 +4164,10 @@ def _get_bool_data(self): # Internal Interface Methods def as_matrix(self, columns=None): - """ - DEPRECATED: as_matrix will be removed in a future version. - Use :meth:`DataFrame.values` instead. + """Convert the frame to its Numpy-array representation. - Convert the frame to its Numpy-array representation. + .. deprecated:: 0.23.0 + Use :meth:`DataFrame.values` instead. Parameters ---------- @@ -4461,12 +4482,11 @@ def _convert(self, datetime=False, numeric=False, timedelta=False, timedelta=timedelta, coerce=coerce, copy=copy)).__finalize__(self) - # TODO: Remove in 0.18 or 2017, which ever is sooner def convert_objects(self, convert_dates=True, convert_numeric=False, convert_timedeltas=True, copy=True): - """ - Deprecated. - Attempt to infer better dtype for object columns + """Attempt to infer better dtype for object columns. + + .. deprecated:: 0.21.0 Parameters ---------- @@ -6334,7 +6354,8 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, if try_quick: try: - new_other = _values_from_object(self).copy() + new_other = com._values_from_object(self) + new_other = new_other.copy() new_other[icond] = other other = new_other except Exception: @@ -7295,7 +7316,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1) if freq is None: - mask = isna(_values_from_object(self)) + mask = isna(com._values_from_object(self)) np.putmask(rs.values, mask, np.nan) return rs @@ -7755,7 +7776,7 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): else: axis = self._get_axis_number(axis) - y = _values_from_object(self).copy() + y = com._values_from_object(self).copy() if (skipna and issubclass(y.dtype.type, (np.datetime64, np.timedelta64))): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 285a347153a82..2c1deb9db7bba 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -39,15 +39,11 @@ from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.missing import isna, notna, _maybe_fill -from pandas.core.common import (_values_from_object, AbstractMethodError, - _default_index, _not_none, _get_callable_name, - _asarray_tuplesafe, _pipe) - from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) from pandas.core.index import (Index, MultiIndex, CategoricalIndex, _ensure_index) -from pandas.core.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.internals import BlockManager, make_block @@ -61,12 +57,15 @@ from pandas.io.formats.printing import pprint_thing from pandas.util._validators import validate_kwargs +import pandas.core.common as com import pandas.core.algorithms as algorithms from pandas.core.config import option_context from pandas.plotting._core import boxplot_frame_groupby -from pandas._libs import lib, groupby as libgroupby, Timestamp, NaT, iNaT +from pandas._libs import (lib, reduction, + groupby as libgroupby, + Timestamp, NaT, iNaT) from pandas._libs.lib import count_level_2d _doc_template = """ @@ -346,6 +345,8 @@ _cython_transforms = frozenset(['cumprod', 'cumsum', 'shift', 'cummin', 'cummax']) +_cython_cast_blacklist = frozenset(['rank', 'count', 'size']) + class Grouper(object): """ @@ -751,7 +752,7 @@ def __getattr__(self, attr): b 2""") @Appender(_pipe_template) def pipe(self, func, *args, **kwargs): - return _pipe(self, func, *args, **kwargs) + return com._pipe(self, func, *args, **kwargs) plot = property(GroupByPlot) @@ -895,7 +896,7 @@ def _iterate_slices(self): yield self._selection_name, self._selected_obj def transform(self, func, *args, **kwargs): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _cumcount_array(self, ascending=True): """ @@ -966,6 +967,21 @@ def _try_cast(self, result, obj, numeric_only=False): return result + def _transform_should_cast(self, func_nm): + """ + Parameters: + ----------- + func_nm: str + The name of the aggregation function being performed + + Returns: + -------- + bool + Whether transform should attempt to cast the result of aggregation + """ + return (self.size().fillna(0) > 0).any() and (func_nm not in + _cython_cast_blacklist) + def _cython_transform(self, how, numeric_only=True): output = collections.OrderedDict() for name, obj in self._iterate_slices(): @@ -1037,7 +1053,7 @@ def _python_agg_general(self, func, *args, **kwargs): return self._wrap_aggregated_output(output) def _wrap_applied_output(self, *args, **kwargs): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat @@ -1045,7 +1061,7 @@ def _concat_objects(self, keys, values, not_indexed_same=False): def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing - for v in _not_none(*values): + for v in com._not_none(*values): ax = v._get_axis(self.axis) ax._reset_identity() return values @@ -1975,13 +1991,13 @@ def apply(self, f, data, axis=0): group_keys = self._get_group_keys() # oh boy - f_name = _get_callable_name(f) + f_name = com._get_callable_name(f) if (f_name not in _plotting_methods and hasattr(splitter, 'fast_apply') and axis == 0): try: values, mutated = splitter.fast_apply(f, group_keys) return group_keys, values, mutated - except (lib.InvalidApply): + except reduction.InvalidApply: # we detect a mutation of some kind # so take slow path pass @@ -2009,7 +2025,7 @@ def indices(self): return self.groupings[0].indices else: label_list = [ping.labels for ping in self.groupings] - keys = [_values_from_object(ping.group_index) + keys = [com._values_from_object(ping.group_index) for ping in self.groupings] return get_indexer_dict(label_list, keys) @@ -2404,8 +2420,8 @@ def _aggregate_series_fast(self, obj, func): obj = obj._take(indexer, convert=False).to_dense() group_index = algorithms.take_nd( group_index, indexer, allow_fill=False) - grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, - dummy) + grouper = reduction.SeriesGrouper(obj, func, group_index, ngroups, + dummy) result, counts = grouper.get_result() return result, counts @@ -2618,7 +2634,7 @@ def groupings(self): def agg_series(self, obj, func): dummy = obj[:0] - grouper = lib.SeriesBinGrouper(obj, func, self.bins, dummy) + grouper = reduction.SeriesBinGrouper(obj, func, self.bins, dummy) return grouper.get_result() # ---------------------------------------------------------------------- @@ -2707,7 +2723,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, self.grouper = self.obj[self.name] elif isinstance(self.grouper, (list, tuple)): - self.grouper = _asarray_tuplesafe(self.grouper) + self.grouper = com._asarray_tuplesafe(self.grouper) # a passed Categorical elif is_categorical_dtype(self.grouper): @@ -2934,7 +2950,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: - keys = [_asarray_tuplesafe(keys)] + keys = [com._asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: @@ -2972,7 +2988,9 @@ def is_in_obj(gpr): elif is_in_axis(gpr): # df.groupby('name') if gpr in obj: if validate: - obj._check_label_or_level_ambiguity(gpr) + stacklevel = 5 # Number of stack levels from df.groupby + obj._check_label_or_level_ambiguity( + gpr, stacklevel=stacklevel) in_axis, name, gpr = True, gpr, obj[gpr] exclusions.append(name) elif obj._is_level_reference(gpr): @@ -3227,7 +3245,7 @@ def _aggregate_multiple_funcs(self, arg, _level): columns.append(f) else: # protect against callables without names - columns.append(_get_callable_name(f)) + columns.append(com._get_callable_name(f)) arg = lzip(columns, arg) results = {} @@ -3332,7 +3350,7 @@ def transform(self, func, *args, **kwargs): else: # cythonized aggregation and merge return self._transform_fast( - lambda: getattr(self, func)(*args, **kwargs)) + lambda: getattr(self, func)(*args, **kwargs), func) # reg transform klass = self._selected_obj.__class__ @@ -3363,7 +3381,7 @@ def transform(self, func, *args, **kwargs): result.index = self._selected_obj.index return result - def _transform_fast(self, func): + def _transform_fast(self, func, func_nm): """ fast version of transform, only applicable to builtin/cythonizable functions @@ -3372,7 +3390,7 @@ def _transform_fast(self, func): func = getattr(self, func) ids, _, ngroup = self.grouper.group_info - cast = (self.size().fillna(0) > 0).any() + cast = self._transform_should_cast(func_nm) out = algorithms.take_1d(func().values, ids) if cast: out = self._try_cast(out, self.obj) @@ -3827,7 +3845,7 @@ def _aggregate_generic(self, func, *args, **kwargs): return self._wrap_generic_output(result, obj) def _wrap_aggregated_output(self, output, names=None): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _aggregate_item_by_item(self, func, *args, **kwargs): # only for axis==0 @@ -3889,7 +3907,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): # GH12824. def first_not_none(values): try: - return next(_not_none(*values)) + return next(com._not_none(*values)) except StopIteration: return None @@ -4126,15 +4144,15 @@ def transform(self, func, *args, **kwargs): if not result.columns.equals(obj.columns): return self._transform_general(func, *args, **kwargs) - return self._transform_fast(result, obj) + return self._transform_fast(result, obj, func) - def _transform_fast(self, result, obj): + def _transform_fast(self, result, obj, func_nm): """ Fast transform path for aggregations """ # if there were groups with no observations (Categorical only?) # try casting data to original dtype - cast = (self.size().fillna(0) > 0).any() + cast = self._transform_should_cast(func_nm) # for each col, reshape to to size of original frame # by take operation @@ -4583,7 +4601,7 @@ def groupby_series(obj, col=None): results = concat(results, axis=1) if not self.as_index: - results.index = _default_index(len(results)) + results.index = com._default_index(len(results)) return results boxplot = boxplot_frame_groupby @@ -4673,7 +4691,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): raise ValueError("axis value must be greater than 0") def _wrap_aggregated_output(self, output, names=None): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) class NDArrayGroupBy(GroupBy): @@ -4729,7 +4747,7 @@ def _chop(self, sdata, slice_obj): return sdata.iloc[slice_obj] def apply(self, f): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) class ArraySplitter(DataSplitter): @@ -4756,7 +4774,8 @@ def fast_apply(self, f, names): return [], True sdata = self._get_sorted_data() - results, mutated = lib.apply_frame_axis0(sdata, f, names, starts, ends) + results, mutated = reduction.apply_frame_axis0(sdata, f, names, + starts, ends) return results, mutated diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 116c7eb8c7958..d40230386216c 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -4,6 +4,7 @@ import numpy as np +from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( is_period_arraylike, is_datetime_arraylike, is_integer_dtype, @@ -20,81 +21,44 @@ from pandas.core.algorithms import take_1d -def is_datetimelike(data): - """ - return a boolean if we can be successfully converted to a datetimelike - """ - try: - maybe_to_datetimelike(data) - return True - except (Exception): - pass - return False +class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): + def __init__(self, data, orig): + if not isinstance(data, ABCSeries): + raise TypeError("cannot convert an object of type {0} to a " + "datetimelike index".format(type(data))) -def maybe_to_datetimelike(data, copy=False): - """ - return a DelegatedClass of a Series that is datetimelike - (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) - raise TypeError if this is not possible. + self.values = data + self.orig = orig + self.name = getattr(data, 'name', None) + self.index = getattr(data, 'index', None) + self._freeze() - Parameters - ---------- - data : Series - copy : boolean, default False - copy the input data + def _get_values(self): + data = self.values + if is_datetime64_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) - Returns - ------- - DelegatedClass + elif is_datetime64tz_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) - """ - from pandas import Series + elif is_timedelta64_dtype(data.dtype): + return TimedeltaIndex(data, copy=False, name=self.name) + + else: + if is_period_arraylike(data): + return PeriodIndex(data, copy=False, name=self.name) + if is_datetime_arraylike(data): + return DatetimeIndex(data, copy=False, name=self.name) - if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) - index = data.index - name = data.name - orig = data if is_categorical_dtype(data) else None - if orig is not None: - data = orig.values.categories - - if is_datetime64_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy), - index, name=name, orig=orig) - elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy), - index, data.name, orig=orig) - elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(TimedeltaIndex(data, copy=copy), index, - name=name, orig=orig) - else: - if is_period_arraylike(data): - return PeriodProperties(PeriodIndex(data, copy=copy), index, - name=name, orig=orig) - if is_datetime_arraylike(data): - return DatetimeProperties(DatetimeIndex(data, copy=copy), index, - name=name, orig=orig) - - raise TypeError("cannot convert an object of type {0} to a " - "datetimelike index".format(type(data))) - - -class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): - - def __init__(self, values, index, name, orig=None): - self.values = values - self.index = index - self.name = name - self.orig = orig - self._freeze() - def _delegate_property_get(self, name): from pandas import Series + values = self._get_values() - result = getattr(self.values, name) + result = getattr(values, name) # maybe need to upcast (ints) if isinstance(result, np.ndarray): @@ -126,8 +90,9 @@ def _delegate_property_set(self, name, value, *args, **kwargs): def _delegate_method(self, name, *args, **kwargs): from pandas import Series + values = self._get_values() - method = getattr(self.values, name) + method = getattr(values, name) result = method(*args, **kwargs) if not is_list_like(result): @@ -158,11 +123,11 @@ class DatetimeProperties(Properties): """ def to_pydatetime(self): - return self.values.to_pydatetime() + return self._get_values().to_pydatetime() @property def freq(self): - return self.values.inferred_freq + return self._get_values().inferred_freq DatetimeProperties._add_delegate_accessors( @@ -189,7 +154,7 @@ class TimedeltaProperties(Properties): """ def to_pytimedelta(self): - return self.values.to_pytimedelta() + return self._get_values().to_pytimedelta() @property def components(self): @@ -202,11 +167,11 @@ def components(self): a DataFrame """ - return self.values.components.set_index(self.index) + return self._get_values().components.set_index(self.index) @property def freq(self): - return self.values.inferred_freq + return self._get_values().inferred_freq TimedeltaProperties._add_delegate_accessors( @@ -245,15 +210,38 @@ class PeriodProperties(Properties): class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): - # This class is never instantiated, and exists solely for the benefit of - # the Series.dt class property. For Series objects, .dt will always be one - # of the more specific classes above. - __doc__ = DatetimeProperties.__doc__ - @classmethod - def _make_accessor(cls, data): + def __new__(cls, data): + # CombinedDatetimelikeProperties isn't really instantiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + from pandas import Series + + if not isinstance(data, Series): + raise TypeError("cannot convert an object of type {0} to a " + "datetimelike index".format(type(data))) + + orig = data if is_categorical_dtype(data) else None + if orig is not None: + data = Series(orig.values.categories, + name=orig.name, + copy=False) + try: - return maybe_to_datetimelike(data) + if is_datetime64_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_datetime64tz_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_timedelta64_dtype(data.dtype): + return TimedeltaProperties(data, orig) + else: + if is_period_arraylike(data): + return PeriodProperties(data, orig) + if is_datetime_arraylike(data): + return DatetimeProperties(data, orig) except Exception: - raise AttributeError("Can only use .dt accessor with " - "datetimelike values") + pass # we raise an attribute error anyway + + raise AttributeError("Can only use .dt accessor with datetimelike " + "values") diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 55a26d57fa1d6..626f3dc86556a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5,15 +5,16 @@ import numpy as np from pandas._libs import (lib, index as libindex, tslib as libts, algos as libalgos, join as libjoin, - Timestamp, Timedelta, ) + Timestamp) from pandas._libs.lib import is_datetime_array from pandas.compat import range, u, set_function_name from pandas.compat.numpy import function as nv from pandas import compat +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.generic import ( - ABCSeries, + ABCSeries, ABCDataFrame, ABCMultiIndex, ABCPeriodIndex, ABCDateOffset) @@ -40,11 +41,9 @@ needs_i8_conversion, is_iterator, is_list_like, is_scalar) -from pandas.core.common import (is_bool_indexer, _values_from_object, - _asarray_tuplesafe, _not_none, - _index_labels_to_array) from pandas.core.base import PandasObject, IndexOpsMixin +import pandas.core.common as com import pandas.core.base as base from pandas.util._decorators import ( Appender, Substitution, cache_readonly, deprecate_kwarg) @@ -55,8 +54,8 @@ import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core import strings, accessor from pandas.core.config import get_option +from pandas.core.strings import StringMethods # simplify @@ -142,12 +141,10 @@ class Index(IndexOpsMixin, PandasObject): _join_precedence = 1 # Cython methods - _arrmap = libalgos.arrmap_object _left_indexer_unique = libjoin.left_join_indexer_unique_object _left_indexer = libjoin.left_join_indexer_object _inner_indexer = libjoin.inner_join_indexer_object _outer_indexer = libjoin.outer_join_indexer_object - _box_scalars = False _typ = 'index' _data = None @@ -156,9 +153,6 @@ class Index(IndexOpsMixin, PandasObject): asi8 = None _comparables = ['name'] _attributes = ['name'] - _allow_index_ops = True - _allow_datetime_index_ops = False - _allow_period_index_ops = False _is_numeric_dtype = False _can_hold_na = True @@ -172,9 +166,7 @@ class Index(IndexOpsMixin, PandasObject): _engine_type = libindex.ObjectEngine _accessors = frozenset(['str']) - - # String Methods - str = accessor.AccessorProperty(strings.StringMethods) + str = CachedAccessor("str", StringMethods) def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): @@ -203,7 +195,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # interval if is_interval_dtype(data) or is_interval_dtype(dtype): from .interval import IntervalIndex - return IntervalIndex(data, dtype=dtype, name=name, copy=copy) + closed = kwargs.get('closed', None) + return IntervalIndex(data, dtype=dtype, name=name, copy=copy, + closed=closed) # index-like elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -293,7 +287,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): subarr = data.astype('object') else: - subarr = _asarray_tuplesafe(data, dtype=object) + subarr = com._asarray_tuplesafe(data, dtype=object) # _asarray_tuplesafe does not always copy underlying data, # so need to make sure that this happens @@ -316,8 +310,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return Float64Index(subarr, copy=copy, name=name) elif inferred == 'interval': from .interval import IntervalIndex - return IntervalIndex.from_intervals(subarr, name=name, - copy=copy) + return IntervalIndex(subarr, name=name, copy=copy) elif inferred == 'boolean': # don't support boolean explicitly ATM pass @@ -362,7 +355,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return MultiIndex.from_tuples( data, names=name or kwargs.get('names')) # other iterable of some kind - subarr = _asarray_tuplesafe(data, dtype=object) + subarr = com._asarray_tuplesafe(data, dtype=object) return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) """ @@ -1499,7 +1492,7 @@ def _convert_listlike_indexer(self, keyarr, kind=None): @Appender(_index_shared_docs['_convert_arr_indexer']) def _convert_arr_indexer(self, keyarr): - keyarr = _asarray_tuplesafe(keyarr) + keyarr = com._asarray_tuplesafe(keyarr) return keyarr _index_shared_docs['_convert_index_indexer'] = """ @@ -1737,10 +1730,10 @@ def __getitem__(self, key): # pessimization of basic indexing. return promote(getitem(key)) - if is_bool_indexer(key): + if com.is_bool_indexer(key): key = np.asarray(key) - key = _values_from_object(key) + key = com._values_from_object(key) result = getitem(key) if not is_scalar(result): return promote(result) @@ -2023,8 +2016,8 @@ def equals(self, other): return other.equals(self) try: - return array_equivalent(_values_from_object(self), - _values_from_object(other)) + return array_equivalent(com._values_from_object(self), + com._values_from_object(other)) except Exception: return False @@ -2540,8 +2533,8 @@ def get_value(self, series, key): # invalid type as an indexer pass - s = _values_from_object(series) - k = _values_from_object(key) + s = com._values_from_object(series) + k = com._values_from_object(key) k = self._convert_scalar_indexer(k, kind='getitem') try: @@ -2574,8 +2567,8 @@ def set_value(self, arr, key, value): Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ - self._engine.set_value(_values_from_object(arr), - _values_from_object(key), value) + self._engine.set_value(com._values_from_object(arr), + com._values_from_object(key), value) def _get_level_values(self, level): """ @@ -3194,8 +3187,8 @@ def _join_multi(self, other, how, return_indexers=True): other_is_mi = isinstance(other, MultiIndex) # figure out join names - self_names = _not_none(*self.names) - other_names = _not_none(*other.names) + self_names = com._not_none(*self.names) + other_names = com._not_none(*other.names) overlap = list(set(self_names) & set(other_names)) # need at least 1 in common, but not more than 1 @@ -3760,15 +3753,20 @@ def drop(self, labels, errors='raise'): Returns ------- dropped : Index + + Raises + ------ + KeyError + If none of the labels are found in the selected axis """ arr_dtype = 'object' if self.dtype == 'object' else None - labels = _index_labels_to_array(labels, dtype=arr_dtype) + labels = com._index_labels_to_array(labels, dtype=arr_dtype) indexer = self.get_indexer(labels) mask = indexer == -1 if mask.any(): if errors != 'ignore': - raise ValueError('labels %s not contained in axis' % - labels[mask]) + raise KeyError( + 'labels %s not contained in axis' % labels[mask]) indexer = indexer[~mask] return self.delete(indexer) @@ -3861,7 +3859,7 @@ def dropna(self, how='any'): return self._shallow_copy(self.values[~self._isnan]) return self._shallow_copy() - def _evaluate_with_timedelta_like(self, other, op, opstr): + def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): raise TypeError("can only perform ops with timedelta like values") def _evaluate_with_datetime_like(self, other, op, opstr): @@ -3975,7 +3973,7 @@ def _validate_for_numeric_binop(self, other, op, opstr): internal method called by ops """ # if we are an inheritor of numeric, - # but not actually numeric (e.g. DatetimeIndex/PeriodInde) + # but not actually numeric (e.g. DatetimeIndex/PeriodIndex) if not self._is_numeric_dtype: raise TypeError("cannot evaluate a numeric op {opstr} " "for type: {typ}".format( @@ -3997,12 +3995,12 @@ def _validate_for_numeric_binop(self, other, op, opstr): if len(self) != len(other): raise ValueError("cannot evaluate a numeric op with " "unequal lengths") - other = _values_from_object(other) + other = com._values_from_object(other) if other.dtype.kind not in ['f', 'i', 'u']: raise TypeError("cannot evaluate a numeric op " "with a non-numeric dtype") elif isinstance(other, (ABCDateOffset, np.timedelta64, - Timedelta, datetime.timedelta)): + datetime.timedelta)): # higher up to handle pass elif isinstance(other, (Timestamp, np.datetime64)): @@ -4020,16 +4018,20 @@ def _add_numeric_methods_binary(cls): def _make_evaluate_binop(op, opstr, reversed=False, constructor=Index): def _evaluate_numeric_binop(self, other): + if isinstance(other, (ABCSeries, ABCDataFrame)): + return NotImplemented + other = self._validate_for_numeric_binop(other, op, opstr) # handle time-based others if isinstance(other, (ABCDateOffset, np.timedelta64, - Timedelta, datetime.timedelta)): - return self._evaluate_with_timedelta_like(other, op, opstr) + datetime.timedelta)): + return self._evaluate_with_timedelta_like(other, op, opstr, + reversed) elif isinstance(other, (Timestamp, np.datetime64)): return self._evaluate_with_datetime_like(other, op, opstr) - # if we are a reversed non-communative op + # if we are a reversed non-commutative op values = self.values if reversed: values, other = other, values @@ -4073,11 +4075,8 @@ def _evaluate_numeric_binop(self, other): cls.__divmod__ = _make_evaluate_binop( divmod, '__divmod__', - constructor=lambda result, **attrs: ( - Index(result[0], **attrs), - Index(result[1], **attrs), - ), - ) + constructor=lambda result, **attrs: (Index(result[0], **attrs), + Index(result[1], **attrs))) @classmethod def _add_numeric_methods_unary(cls): @@ -4267,8 +4266,7 @@ def _ensure_index(index_like, copy=False): def _get_na_value(dtype): if is_datetime64_any_dtype(dtype) or is_timedelta64_dtype(dtype): return libts.NaT - return {np.datetime64: libts.NaT, - np.timedelta64: libts.NaT}.get(dtype, np.nan) + return np.nan def _ensure_has_len(seq): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index ac7cb30fa823d..2c7be2b21f959 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -11,8 +11,6 @@ is_list_like, is_interval_dtype, is_scalar) -from pandas.core.common import (_asarray_tuplesafe, - _values_from_object) from pandas.core.dtypes.missing import array_equivalent, isna from pandas.core.algorithms import take_1d @@ -21,6 +19,7 @@ from pandas.core.config import get_option from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core import accessor +import pandas.core.common as com import pandas.core.base as base import pandas.core.missing as missing import pandas.core.indexes.base as ibase @@ -125,7 +124,7 @@ def _create_from_codes(self, codes, categories=None, ordered=None, CategoricalIndex """ - from pandas.core.categorical import Categorical + from pandas.core.arrays import Categorical if categories is None: categories = self.categories if ordered is None: @@ -162,7 +161,7 @@ def _create_categorical(self, data, categories=None, ordered=None, if not isinstance(data, ABCCategorical): if ordered is None and dtype is None: ordered = False - from pandas.core.categorical import Categorical + from pandas.core.arrays import Categorical data = Categorical(data, categories=categories, ordered=ordered, dtype=dtype) else: @@ -342,7 +341,7 @@ def __array__(self, dtype=None): def astype(self, dtype, copy=True): if is_interval_dtype(dtype): from pandas import IntervalIndex - return IntervalIndex.from_intervals(np.array(self)) + return IntervalIndex(np.array(self)) elif is_categorical_dtype(dtype): # GH 18630 dtype = self.dtype._update_dtype(dtype) @@ -442,7 +441,7 @@ def get_value(self, series, key): know what you're doing """ try: - k = _values_from_object(key) + k = com._values_from_object(key) k = self._convert_scalar_indexer(k, kind='getitem') indexer = self.get_loc(k) return series.iloc[indexer] @@ -462,7 +461,7 @@ def where(self, cond, other=None): other = self._na_value values = np.where(cond, self.values, other) - from pandas.core.categorical import Categorical + from pandas.core.arrays import Categorical cat = Categorical(values, categories=self.categories, ordered=self.ordered) @@ -620,7 +619,7 @@ def _convert_list_indexer(self, keyarr, kind=None): @Appender(_index_shared_docs['_convert_arr_indexer']) def _convert_arr_indexer(self, keyarr): - keyarr = _asarray_tuplesafe(keyarr) + keyarr = com._asarray_tuplesafe(keyarr) if self.categories._defer_to_indexing: return keyarr @@ -775,7 +774,7 @@ def _delegate_method(self, name, *args, **kwargs): def _add_accessors(cls): """ add in Categorical accessor methods """ - from pandas.core.categorical import Categorical + from pandas.core.arrays import Categorical CategoricalIndex._add_delegate_accessors( delegate=Categorical, accessors=["rename_categories", "reorder_categories", diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ee2fdd213dd9a..f43c6dc567f69 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -31,8 +31,7 @@ from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms from pandas.core.algorithms import checked_add_with_arr -from pandas.core.common import AbstractMethodError - +from pandas.errors import NullFrequencyError import pandas.io.formats.printing as printing from pandas._libs import lib, iNaT, NaT from pandas._libs.tslibs.period import Period @@ -244,7 +243,7 @@ def _box_func(self): """ box function to get object from internal representation """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _box_values(self, values): """ @@ -441,9 +440,10 @@ def _isnan(self): @property def asobject(self): - """DEPRECATED: Use ``astype(object)`` instead. + """Return object Index which contains boxed values. - return object Index which contains boxed values + .. deprecated:: 0.23.0 + Use ``astype(object)`` instead. *this is an internal non-public method* """ @@ -587,7 +587,7 @@ def argmax(self, axis=None, *args, **kwargs): @property def _formatter_func(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _format_attrs(self): """ @@ -645,7 +645,7 @@ def _add_datelike(self, other): type(other).__name__)) def _sub_datelike(self, other): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _sub_period(self, other): return NotImplemented @@ -675,22 +675,25 @@ def __add__(self, other): return NotImplemented elif is_timedelta64_dtype(other): return self._add_delta(other) + elif isinstance(other, (DateOffset, timedelta)): + return self._add_delta(other) + elif is_offsetlike(other): + # Array/Index of DateOffset objects + return self._add_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if hasattr(other, '_add_delta'): return other._add_delta(self) raise TypeError("cannot add TimedeltaIndex and {typ}" .format(typ=type(other))) - elif isinstance(other, (DateOffset, timedelta)): - return self._add_delta(other) elif is_integer(other): return self.shift(other) elif isinstance(other, (datetime, np.datetime64)): return self._add_datelike(other) - elif is_offsetlike(other): - # Array/Index of DateOffset objects - return self._add_offset_array(other) elif isinstance(other, Index): return self._add_datelike(other) + elif is_integer_dtype(other) and self.freq is None: + # GH#19123 + raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented @@ -708,6 +711,11 @@ def __sub__(self, other): return NotImplemented elif is_timedelta64_dtype(other): return self._add_delta(-other) + elif isinstance(other, (DateOffset, timedelta)): + return self._add_delta(-other) + elif is_offsetlike(other): + # Array/Index of DateOffset objects + return self._sub_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if not isinstance(other, TimedeltaIndex): raise TypeError("cannot subtract TimedeltaIndex and {typ}" @@ -715,22 +723,19 @@ def __sub__(self, other): return self._add_delta(-other) elif isinstance(other, DatetimeIndex): return self._sub_datelike(other) - elif isinstance(other, (DateOffset, timedelta)): - return self._add_delta(-other) elif is_integer(other): return self.shift(-other) elif isinstance(other, (datetime, np.datetime64)): return self._sub_datelike(other) elif isinstance(other, Period): return self._sub_period(other) - elif is_offsetlike(other): - # Array/Index of DateOffset objects - return self._sub_offset_array(other) elif isinstance(other, Index): raise TypeError("cannot subtract {typ1} and {typ2}" .format(typ1=type(self).__name__, typ2=type(other).__name__)) - + elif is_integer_dtype(other) and self.freq is None: + # GH#19123 + raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented @@ -830,7 +835,7 @@ def shift(self, n, freq=None): return self if self.freq is None: - raise ValueError("Cannot shift with no freq") + raise NullFrequencyError("Cannot shift with no freq") start = self[0] + n * self.freq end = self[-1] + n * self.freq diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ef0406a4b9f9d..8dd41c022d163 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -13,14 +13,14 @@ _INT64_DTYPE, _NS_DTYPE, is_object_dtype, - is_datetime64_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, is_datetimetz, is_dtype_equal, is_timedelta64_dtype, is_integer, is_float, is_integer_dtype, - is_datetime64_ns_dtype, + is_datetime64_ns_dtype, is_datetimelike, is_period_dtype, is_bool_dtype, is_string_like, @@ -34,7 +34,6 @@ import pandas.core.dtypes.concat as _concat from pandas.errors import PerformanceWarning -from pandas.core.common import _values_from_object, _maybe_box from pandas.core.algorithms import checked_add_with_arr from pandas.core.indexes.base import Index, _index_shared_docs @@ -106,8 +105,12 @@ def _dt_index_cmp(opname, cls, nat_result=False): def wrapper(self, other): func = getattr(super(DatetimeIndex, self), opname) - if (isinstance(other, datetime) or - isinstance(other, compat.string_types)): + + if isinstance(other, (datetime, compat.string_types)): + if isinstance(other, datetime): + # GH#18435 strings get a pass from tzawareness compat + self._assert_tzawareness_compat(other) + other = _to_m8(other, tz=self.tz) result = func(other) if isna(other): @@ -117,8 +120,12 @@ def wrapper(self, other): other = DatetimeIndex(other) elif not isinstance(other, (np.ndarray, Index, ABCSeries)): other = _ensure_datetime64(other) + + if is_datetimelike(other): + self._assert_tzawareness_compat(other) + result = func(np.asarray(other)) - result = _values_from_object(result) + result = com._values_from_object(result) if isinstance(other, Index): o_mask = other.values.view('i8') == libts.iNaT @@ -275,7 +282,6 @@ def _join_i8_wrapper(joinf, **kwargs): _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer_int64) _left_indexer_unique = _join_i8_wrapper( libjoin.left_join_indexer_unique_int64, with_indexers=False) - _arrmap = None @classmethod def _add_comparison_methods(cls): @@ -513,8 +519,7 @@ def _generate(cls, start, end, periods, name, offset, tz = tz.localize(date.replace(tzinfo=None)).tzinfo if tz is not None and inferred_tz is not None: - if not (timezones.get_timezone(inferred_tz) == - timezones.get_timezone(tz)): + if not timezones.tz_compare(inferred_tz, tz): raise AssertionError("Inferred time zone not equal to passed " "time zone") @@ -652,6 +657,23 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, result._reset_identity() return result + def _assert_tzawareness_compat(self, other): + # adapted from _Timestamp._assert_tzawareness_compat + other_tz = getattr(other, 'tzinfo', None) + if is_datetime64tz_dtype(other): + # Get tzinfo from Series dtype + other_tz = other.dtype.tz + if other is libts.NaT: + # pd.NaT quacks both aware and naive + pass + elif self.tz is None: + if other_tz is not None: + raise TypeError('Cannot compare tz-naive and tz-aware ' + 'datetime-like objects.') + elif other_tz is None: + raise TypeError('Cannot compare tz-naive and tz-aware ' + 'datetime-like objects') + @property def tzinfo(self): """ @@ -1170,7 +1192,7 @@ def _maybe_utc_convert(self, other): raise TypeError('Cannot join tz-naive with tz-aware ' 'DatetimeIndex') - if self.tz != other.tz: + if not timezones.tz_compare(self.tz, other.tz): this = self.tz_convert('UTC') other = other.tz_convert('UTC') return this, other @@ -1274,7 +1296,7 @@ def __iter__(self): def _wrap_union_result(self, other, result): name = self.name if self.name == other.name else None - if self.tz != other.tz: + if not timezones.tz_compare(self.tz, other.tz): raise ValueError('Passed item and index have different timezone') return self._simple_new(result, name=name, freq=None, tz=self.tz) @@ -1464,8 +1486,8 @@ def get_value(self, series, key): return series.take(locs) try: - return _maybe_box(self, Index.get_value(self, series, key), - series, key) + return com._maybe_box(self, Index.get_value(self, series, key), + series, key) except KeyError: try: loc = self._get_string_slice(key) @@ -1484,9 +1506,9 @@ def get_value_maybe_box(self, series, key): key = Timestamp(key, tz=self.tz) elif not isinstance(key, Timestamp): key = Timestamp(key) - values = self._engine.get_value(_values_from_object(series), + values = self._engine.get_value(com._values_from_object(series), key, tz=self.tz) - return _maybe_box(self, values, series, key) + return com._maybe_box(self, values, series, key) def get_loc(self, key, method=None, tolerance=None): """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index fd1980f9ab429..3bf783b5a2faa 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1,11 +1,12 @@ """ define the IntervalIndex """ import numpy as np +import warnings from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex from pandas.core.dtypes.dtypes import IntervalDtype -from pandas.core.dtypes.cast import maybe_convert_platform +from pandas.core.dtypes.cast import maybe_convert_platform, find_common_type from pandas.core.dtypes.common import ( _ensure_platform_int, is_list_like, @@ -16,10 +17,12 @@ is_integer_dtype, is_float_dtype, is_interval_dtype, + is_object_dtype, is_scalar, is_float, is_number, - is_integer) + is_integer, + pandas_dtype) from pandas.core.indexes.base import ( Index, _ensure_index, default_pprint, _index_shared_docs) @@ -33,9 +36,7 @@ from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.indexes.multi import MultiIndex from pandas.compat.numpy import function as nv -from pandas.core.common import ( - _all_not_none, _any_none, _asarray_tuplesafe, _count_not_none, - is_bool_indexer, _maybe_box_datetimelike, _not_none) +import pandas.core.common as com from pandas.util._decorators import cache_readonly, Appender from pandas.core.config import get_option from pandas.tseries.frequencies import to_offset @@ -114,7 +115,7 @@ def maybe_convert_platform_interval(values): # GH 19016 # empty lists/tuples get object dtype by default, but this is not # prohibited for IntervalIndex, so coerce to integer instead - return np.array([], dtype=np.intp) + return np.array([], dtype=np.int64) return maybe_convert_platform(values) @@ -151,6 +152,10 @@ class IntervalIndex(IntervalMixin, Index): Name to be stored in the index. copy : boolean, default False Copy the meta-data + dtype : dtype or None, default None + If None, dtype will be inferred + + ..versionadded:: 0.23.0 Attributes ---------- @@ -167,7 +172,6 @@ class IntervalIndex(IntervalMixin, Index): from_arrays from_tuples from_breaks - from_intervals contains Examples @@ -181,8 +185,7 @@ class IntervalIndex(IntervalMixin, Index): It may also be constructed using one of the constructor methods: :meth:`IntervalIndex.from_arrays`, - :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_intervals` - and :meth:`IntervalIndex.from_tuples`. + :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. See further examples in the doc strings of ``interval_range`` and the mentioned constructor methods. @@ -204,15 +207,13 @@ class IntervalIndex(IntervalMixin, Index): _typ = 'intervalindex' _comparables = ['name'] _attributes = ['name', 'closed'] - _allow_index_ops = True # we would like our indexing holder to defer to us _defer_to_indexing = True _mask = None - def __new__(cls, data, closed=None, - name=None, copy=False, dtype=None, + def __new__(cls, data, closed=None, name=None, copy=False, dtype=None, fastpath=False, verify_integrity=True): if fastpath: @@ -235,7 +236,8 @@ def __new__(cls, data, closed=None, data = maybe_convert_platform_interval(data) left, right, infer_closed = intervals_to_interval_bounds(data) - if _all_not_none(closed, infer_closed) and closed != infer_closed: + if (com._all_not_none(closed, infer_closed) and + closed != infer_closed): # GH 18421 msg = ("conflicting values for closed: constructor got " "'{closed}', inferred from data '{infer_closed}'" @@ -244,19 +246,28 @@ def __new__(cls, data, closed=None, closed = closed or infer_closed - return cls._simple_new(left, right, closed, name, - copy=copy, verify_integrity=verify_integrity) + return cls._simple_new(left, right, closed, name, copy=copy, + dtype=dtype, verify_integrity=verify_integrity) @classmethod - def _simple_new(cls, left, right, closed=None, name=None, - copy=False, verify_integrity=True): + def _simple_new(cls, left, right, closed=None, name=None, copy=False, + dtype=None, verify_integrity=True): result = IntervalMixin.__new__(cls) - if closed is None: - closed = 'right' + closed = closed or 'right' left = _ensure_index(left, copy=copy) right = _ensure_index(right, copy=copy) + if dtype is not None: + # GH 19262: dtype must be an IntervalDtype to override inferred + dtype = pandas_dtype(dtype) + if not is_interval_dtype(dtype): + msg = 'dtype must be an IntervalDtype, got {dtype}' + raise TypeError(msg.format(dtype=dtype)) + elif dtype.subtype is not None: + left = left.astype(dtype.subtype) + right = right.astype(dtype.subtype) + # coerce dtypes to match if needed if is_float_dtype(left) and is_integer_dtype(right): right = right.astype(left.dtype) @@ -303,7 +314,7 @@ def _shallow_copy(self, left=None, right=None, **kwargs): # only single value passed, could be an IntervalIndex # or array of Intervals if not isinstance(left, IntervalIndex): - left = type(self).from_intervals(left) + left = self._constructor(left) left, right = left.left, left.right else: @@ -321,7 +332,7 @@ def _validate(self): Verify that the IntervalIndex is valid. """ if self.closed not in _VALID_CLOSED: - raise ValueError("invalid options for 'closed': {closed}" + raise ValueError("invalid option for 'closed': {closed}" .format(closed=self.closed)) if len(self.left) != len(self.right): raise ValueError('left and right must have the same length') @@ -355,7 +366,7 @@ def _engine(self): @property def _constructor(self): - return type(self).from_intervals + return type(self) def __contains__(self, key): """ @@ -401,7 +412,8 @@ def contains(self, key): return False @classmethod - def from_breaks(cls, breaks, closed='right', name=None, copy=False): + def from_breaks(cls, breaks, closed='right', name=None, copy=False, + dtype=None): """ Construct an IntervalIndex from an array of splits @@ -416,6 +428,10 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): Name to be stored in the index. copy : boolean, default False copy the data + dtype : dtype or None, default None + If None, dtype will be inferred + + ..versionadded:: 0.23.0 Examples -------- @@ -429,18 +445,17 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_arrays : Construct an IntervalIndex from a left and right array - IntervalIndex.from_intervals : Construct an IntervalIndex from an array - of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ breaks = maybe_convert_platform_interval(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, - name=name, copy=copy) + name=name, copy=copy, dtype=dtype) @classmethod - def from_arrays(cls, left, right, closed='right', name=None, copy=False): + def from_arrays(cls, left, right, closed='right', name=None, copy=False, + dtype=None): """ Construct an IntervalIndex from a a left and right array @@ -457,6 +472,10 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): Name to be stored in the index. copy : boolean, default False copy the data + dtype : dtype or None, default None + If None, dtype will be inferred + + ..versionadded:: 0.23.0 Examples -------- @@ -470,22 +489,23 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): interval_range : Function to create a fixed frequency IntervalIndex IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits - IntervalIndex.from_intervals : Construct an IntervalIndex from an array - of Interval objects IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ left = maybe_convert_platform_interval(left) right = maybe_convert_platform_interval(right) - return cls._simple_new(left, right, closed, name=name, - copy=copy, verify_integrity=True) + return cls._simple_new(left, right, closed, name=name, copy=copy, + dtype=dtype, verify_integrity=True) @classmethod - def from_intervals(cls, data, name=None, copy=False): + def from_intervals(cls, data, closed=None, name=None, copy=False, + dtype=None): """ Construct an IntervalIndex from a 1d array of Interval objects + .. deprecated:: 0.23.0 + Parameters ---------- data : array-like (1-dimensional) @@ -495,6 +515,10 @@ def from_intervals(cls, data, name=None, copy=False): Name to be stored in the index. copy : boolean, default False by-default copy the data, this is compat only and ignored + dtype : dtype or None, default None + If None, dtype will be inferred + + ..versionadded:: 0.23.0 Examples -------- @@ -520,16 +544,14 @@ def from_intervals(cls, data, name=None, copy=False): IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ - if isinstance(data, IntervalIndex): - left, right, closed = data.left, data.right, data.closed - name = name or data.name - else: - data = maybe_convert_platform_interval(data) - left, right, closed = intervals_to_interval_bounds(data) - return cls.from_arrays(left, right, closed, name=name, copy=False) + msg = ('IntervalIndex.from_intervals is deprecated and will be ' + 'removed in a future version; use IntervalIndex(...) instead') + warnings.warn(msg, FutureWarning, stacklevel=2) + return cls(data, closed=closed, name=name, copy=copy, dtype=dtype) @classmethod - def from_tuples(cls, data, closed='right', name=None, copy=False): + def from_tuples(cls, data, closed='right', name=None, copy=False, + dtype=None): """ Construct an IntervalIndex from a list/array of tuples @@ -544,10 +566,14 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): Name to be stored in the index. copy : boolean, default False by-default copy the data, this is compat only and ignored + dtype : dtype or None, default None + If None, dtype will be inferred + + ..versionadded:: 0.23.0 Examples -------- - >>> pd.IntervalIndex.from_tuples([(0, 1), (1,2)]) + >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) IntervalIndex([(0, 1], (1, 2]], closed='right', dtype='interval[int64]') @@ -558,8 +584,6 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): right array IntervalIndex.from_breaks : Construct an IntervalIndex from an array of splits - IntervalIndex.from_intervals : Construct an IntervalIndex from an array - of Interval objects """ if len(data): left, right = [], [] @@ -570,15 +594,22 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): if isna(d): lhs = rhs = np.nan else: - lhs, rhs = d + try: + # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] + lhs, rhs = d + except ValueError: + msg = ('IntervalIndex.from_tuples requires tuples of ' + 'length 2, got {tpl}').format(tpl=d) + raise ValueError(msg) + except TypeError: + msg = ('IntervalIndex.from_tuples received an invalid ' + 'item, {tpl}').format(tpl=d) + raise TypeError(msg) left.append(lhs) right.append(rhs) - # TODO - # if we have nulls and we previous had *only* - # integer data, then we have changed the dtype - - return cls.from_arrays(left, right, closed, name=name, copy=False) + return cls.from_arrays(left, right, closed, name=name, copy=False, + dtype=dtype) def to_tuples(self, na_tuple=True): """ @@ -600,7 +631,7 @@ def to_tuples(self, na_tuple=True): >>> idx.to_tuples(na_tuple=False) Index([(0.0, 1.0), nan, (2.0, 3.0)], dtype='object') """ - tuples = _asarray_tuplesafe(zip(self.left, self.right)) + tuples = com._asarray_tuplesafe(zip(self.left, self.right)) if not na_tuple: # GH 18756 tuples = np.where(~self._isnan, tuples, np.nan) @@ -698,8 +729,16 @@ def copy(self, deep=False, name=None): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): - if is_interval_dtype(dtype): - return self.copy() if copy else self + dtype = pandas_dtype(dtype) + if is_interval_dtype(dtype) and dtype != self.dtype: + try: + new_left = self.left.astype(dtype.subtype) + new_right = self.right.astype(dtype.subtype) + except TypeError: + msg = ('Cannot convert {dtype} to {new_dtype}; subtypes are ' + 'incompatible') + raise TypeError(msg.format(dtype=self.dtype, new_dtype=dtype)) + return self._shallow_copy(new_left, new_right) return super(IntervalIndex, self).astype(dtype, copy=copy) @cache_readonly @@ -912,7 +951,7 @@ def get_loc(self, key, method=None): Examples --------- >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) - >>> index = pd.IntervalIndex.from_intervals([i1, i2]) + >>> index = pd.IntervalIndex([i1, i2]) >>> index.get_loc(1) 0 @@ -928,7 +967,7 @@ def get_loc(self, key, method=None): relevant intervals. >>> i3 = pd.Interval(0, 2) - >>> overlapping_index = pd.IntervalIndex.from_intervals([i2, i3]) + >>> overlapping_index = pd.IntervalIndex([i2, i3]) >>> overlapping_index.get_loc(1.5) array([0, 1], dtype=int64) """ @@ -965,7 +1004,7 @@ def get_loc(self, key, method=None): return self._engine.get_loc(key) def get_value(self, series, key): - if is_bool_indexer(key): + if com.is_bool_indexer(key): loc = key elif is_list_like(key): loc = self.get_indexer(key) @@ -1142,12 +1181,17 @@ def insert(self, loc, item): new_right = self.right.insert(loc, right_insert) return self._shallow_copy(new_left, new_right) - def _as_like_interval_index(self, other, error_msg): + def _as_like_interval_index(self, other): self._assert_can_do_setop(other) other = _ensure_index(other) - if (not isinstance(other, IntervalIndex) or - self.closed != other.closed): - raise ValueError(error_msg) + if not isinstance(other, IntervalIndex): + msg = ('the other index needs to be an IntervalIndex too, but ' + 'was type {}').format(other.__class__.__name__) + raise TypeError(msg) + elif self.closed != other.closed: + msg = ('can only do set operations between two IntervalIndex ' + 'objects that are closed on the same side') + raise ValueError(msg) return other def _concat_same_dtype(self, to_concat, name): @@ -1286,12 +1330,26 @@ def equals(self, other): def _setop(op_name): def func(self, other): - msg = ('can only do set operations between two IntervalIndex ' - 'objects that are closed on the same side') - other = self._as_like_interval_index(other, msg) + other = self._as_like_interval_index(other) + + # GH 19016: ensure set op will not return a prohibited dtype + subtypes = [self.dtype.subtype, other.dtype.subtype] + common_subtype = find_common_type(subtypes) + if is_object_dtype(common_subtype): + msg = ('can only do {op} between two IntervalIndex ' + 'objects that have compatible dtypes') + raise TypeError(msg.format(op=op_name)) + result = getattr(self._multiindex, op_name)(other._multiindex) result_name = self.name if self.name == other.name else None - return type(self).from_tuples(result.values, closed=self.closed, + + # GH 19101: ensure empty results have correct dtype + if result.empty: + result = result.values.astype(self.dtype.subtype) + else: + result = result.values + + return type(self).from_tuples(result, closed=self.closed, name=result_name) return func @@ -1321,7 +1379,7 @@ def _is_type_compatible(a, b): return ((is_number(a) and is_number(b)) or (is_ts_compat(a) and is_ts_compat(b)) or (is_td_compat(a) and is_td_compat(b)) or - _any_none(a, b)) + com._any_none(a, b)) def interval_range(start=None, end=None, periods=None, freq=None, @@ -1400,13 +1458,13 @@ def interval_range(start=None, end=None, periods=None, freq=None, -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ - if _count_not_none(start, end, periods) != 2: + if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - start = _maybe_box_datetimelike(start) - end = _maybe_box_datetimelike(end) - endpoint = next(_not_none(start, end)) + start = com._maybe_box_datetimelike(start) + end = com._maybe_box_datetimelike(end) + endpoint = next(com._not_none(start, end)) if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7107378671ba5..510f7245cebd8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -5,7 +5,7 @@ from sys import getsizeof import numpy as np -from pandas._libs import index as libindex, lib, Timestamp +from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp from pandas.compat import range, zip, lrange, lzip, map from pandas.compat.numpy import function as nv @@ -22,11 +22,6 @@ is_scalar) from pandas.core.dtypes.missing import isna, array_equivalent from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.core.common import (_any_not_none, - _values_from_object, - is_bool_indexer, - is_null_slice, - is_true_slices) import pandas.core.base as base from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg @@ -50,6 +45,87 @@ target_klass='MultiIndex or list of tuples')) +class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, + libindex.UInt64Engine): + """ + This class manages a MultiIndex by mapping label combinations to positive + integers. + """ + _base = libindex.UInt64Engine + + def _codes_to_ints(self, codes): + """ + Transform combination(s) of uint64 in one uint64 (each), in a strictly + monotonic way (i.e. respecting the lexicographic order of integer + combinations): see BaseMultiIndexCodesEngine documentation. + + Parameters + ---------- + codes : 1- or 2-dimensional array of dtype uint64 + Combinations of integers (one per row) + + Returns + ------ + int_keys : scalar or 1-dimensional array, of dtype uint64 + Integer(s) representing one combination (each) + """ + # Shift the representation of each level by the pre-calculated number + # of bits: + codes <<= self.offsets + + # Now sum and OR are in fact interchangeable. This is a simple + # composition of the (disjunct) significant bits of each level (i.e. + # each column in "codes") in a single positive integer: + if codes.ndim == 1: + # Single key + return np.bitwise_or.reduce(codes) + + # Multiple keys + return np.bitwise_or.reduce(codes, axis=1) + + +class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, + libindex.ObjectEngine): + """ + This class manages those (extreme) cases in which the number of possible + label combinations overflows the 64 bits integers, and uses an ObjectEngine + containing Python integers. + """ + _base = libindex.ObjectEngine + + def _codes_to_ints(self, codes): + """ + Transform combination(s) of uint64 in one Python integer (each), in a + strictly monotonic way (i.e. respecting the lexicographic order of + integer combinations): see BaseMultiIndexCodesEngine documentation. + + Parameters + ---------- + codes : 1- or 2-dimensional array of dtype uint64 + Combinations of integers (one per row) + + Returns + ------ + int_keys : int, or 1-dimensional array of dtype object + Integer(s) representing one combination (each) + """ + + # Shift the representation of each level by the pre-calculated number + # of bits. Since this can overflow uint64, first make sure we are + # working with Python integers: + codes = codes.astype('object') << self.offsets + + # Now sum and OR are in fact interchangeable. This is a simple + # composition of the (disjunct) significant bits of each level (i.e. + # each column in "codes") in a single positive integer (per row): + if codes.ndim == 1: + # Single key + return np.bitwise_or.reduce(codes) + + # Multiple keys + return np.bitwise_or.reduce(codes, axis=1) + + class MultiIndex(Index): """ A multi-level, or hierarchical, index object for pandas objects @@ -328,8 +404,9 @@ def _set_labels(self, labels, level=None, copy=False, validate=True, else: level = [self._get_level_number(l) for l in level] new_labels = list(self._labels) - for l, lev, lab in zip(level, self.levels, labels): - new_labels[l] = _ensure_frozen( + for lev_idx, lab in zip(level, labels): + lev = self.levels[lev_idx] + new_labels[lev_idx] = _ensure_frozen( lab, lev, copy=copy)._shallow_copy() new_labels = FrozenList(new_labels) @@ -538,7 +615,7 @@ def _format_attrs(self): max_seq_items=False)), ('labels', ibase.default_pprint(self._labels, max_seq_items=False))] - if _any_not_none(*self.names): + if com._any_not_none(*self.names): attrs.append(('names', ibase.default_pprint(self.names))) if self.sortorder is not None: attrs.append(('sortorder', ibase.default_pprint(self.sortorder))) @@ -691,16 +768,25 @@ def _get_level_number(self, level): @cache_readonly def _engine(self): - - # choose our engine based on our size - # the hashing based MultiIndex for larger - # sizes, and the MultiIndexOjbect for smaller - # xref: https://github.com/pandas-dev/pandas/pull/16324 - l = len(self) - if l > 10000: - return libindex.MultiIndexHashEngine(lambda: self, l) - - return libindex.MultiIndexObjectEngine(lambda: self.values, l) + # Calculate the number of bits needed to represent labels in each + # level, as log2 of their sizes (including -1 for NaN): + sizes = np.ceil(np.log2([len(l) + 1 for l in self.levels])) + + # Sum bit counts, starting from the _right_.... + lev_bits = np.cumsum(sizes[::-1])[::-1] + + # ... in order to obtain offsets such that sorting the combination of + # shifted codes (one for each level, resulting in a unique integer) is + # equivalent to sorting lexicographically the codes themselves. Notice + # that each level needs to be shifted by the number of bits needed to + # represent the _previous_ ones: + offsets = np.concatenate([lev_bits[1:], [0]]).astype('uint64') + + # Check the total number of bits needed for our representation: + if lev_bits[0] > 64: + # The levels would overflow a 64 bit uint - use Python integers: + return MultiIndexPyIntEngine(self.levels, self.labels, offsets) + return MultiIndexUIntEngine(self.levels, self.labels, offsets) @property def values(self): @@ -862,8 +948,8 @@ def get_value(self, series, key): from pandas.core.indexing import maybe_droplevels # Label-based - s = _values_from_object(series) - k = _values_from_object(key) + s = com._values_from_object(series) + k = com._values_from_object(key) def _try_mi(k): # TODO: what if a level contains tuples?? @@ -1136,7 +1222,7 @@ def lexsort_depth(self): int64_labels = [_ensure_int64(lab) for lab in self.labels] for k in range(self.nlevels, 0, -1): - if lib.is_lexsorted(int64_labels[:k]): + if libalgos.is_lexsorted(int64_labels[:k]): return k return 0 @@ -1181,7 +1267,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None): if len(arrays[i]) != len(arrays[i - 1]): raise ValueError('all arrays must be same length') - from pandas.core.categorical import _factorize_from_iterables + from pandas.core.arrays.categorical import _factorize_from_iterables labels, levels = _factorize_from_iterables(arrays) if names is None: @@ -1275,7 +1361,7 @@ def from_product(cls, iterables, sortorder=None, names=None): MultiIndex.from_arrays : Convert list of arrays to MultiIndex MultiIndex.from_tuples : Convert list of tuples to MultiIndex """ - from pandas.core.categorical import _factorize_from_iterables + from pandas.core.arrays.categorical import _factorize_from_iterables from pandas.core.reshape.util import cartesian_product if not is_list_like(iterables): @@ -1473,7 +1559,7 @@ def __getitem__(self, key): return tuple(retval) else: - if is_bool_indexer(key): + if com.is_bool_indexer(key): key = np.asarray(key) sortorder = self.sortorder else: @@ -1611,7 +1697,7 @@ def drop(self, labels, level=None, errors='raise'): inds.append(loc) elif isinstance(loc, slice): inds.extend(lrange(loc.start, loc.stop)) - elif is_bool_indexer(loc): + elif com.is_bool_indexer(loc): if self.lexsort_depth == 0: warnings.warn('dropping on a non-lexsorted multi-index' ' without a level parameter may impact ' @@ -1748,7 +1834,7 @@ def _get_labels_for_sorting(self): for sorting, where we need to disambiguate that -1 is not a valid valid """ - from pandas.core.categorical import Categorical + from pandas.core.arrays import Categorical def cats(label): return np.arange(np.array(label).max() + 1 if len(label) else 0, @@ -1889,16 +1975,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if tolerance is not None: raise NotImplementedError("tolerance not implemented yet " 'for MultiIndex') - indexer = self._get_fill_indexer(target, method, limit) + indexer = self._engine.get_indexer(target, method, limit) elif method == 'nearest': raise NotImplementedError("method='nearest' not implemented yet " 'for MultiIndex; see GitHub issue 9365') else: - # we may not compare equally because of hashing if we - # don't have the same dtypes - if self._inferred_type_levels != target._inferred_type_levels: - return Index(self.values).get_indexer(target.values) - indexer = self._engine.get_indexer(target) return _ensure_platform_int(indexer) @@ -2122,6 +2203,11 @@ def _maybe_to_slice(loc): if not isinstance(key, tuple): loc = self._get_level_indexer(key, level=0) + + # _get_level_indexer returns an empty slice if the key has + # been dropped from the MultiIndex + if isinstance(loc, slice) and loc.start == loc.stop: + raise KeyError(key) return _maybe_to_slice(loc) keylen = len(key) @@ -2130,17 +2216,6 @@ def _maybe_to_slice(loc): ''.format(keylen, self.nlevels)) if keylen == self.nlevels and self.is_unique: - - def _maybe_str_to_time_stamp(key, lev): - if lev.is_all_dates and not isinstance(key, Timestamp): - try: - return Timestamp(key, tz=getattr(lev, 'tz', None)) - except Exception: - pass - return key - - key = _values_from_object(key) - key = tuple(map(_maybe_str_to_time_stamp, key, self.levels)) return self._engine.get_loc(key) # -- partial selection or non-unique index @@ -2273,34 +2348,9 @@ def partial_selection(key, indexer=None): return indexer, maybe_droplevels(indexer, ilevels, drop_level) - if len(key) == self.nlevels: - - if self.is_unique: - - # here we have a completely specified key, but are - # using some partial string matching here - # GH4758 - all_dates = ((l.is_all_dates and - not isinstance(k, compat.string_types)) - for k, l in zip(key, self.levels)) - can_index_exactly = any(all_dates) - if (any(l.is_all_dates - for k, l in zip(key, self.levels)) and - not can_index_exactly): - indexer = self.get_loc(key) - - # we have a multiple selection here - if (not isinstance(indexer, slice) or - indexer.stop - indexer.start != 1): - return partial_selection(key, indexer) - - key = tuple(self[indexer].tolist()[0]) - - return (self._engine.get_loc( - _values_from_object(key)), None) - - else: - return partial_selection(key) + if len(key) == self.nlevels and self.is_unique: + # Complete key in unique index -> standard get_loc + return (self._engine.get_loc(key), None) else: return partial_selection(key) else: @@ -2457,7 +2507,7 @@ def get_locs(self, seq): """ # must be lexsorted to at least as many levels - true_slices = [i for (i, s) in enumerate(is_true_slices(seq)) if s] + true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] if true_slices and true_slices[-1] >= self.lexsort_depth: raise UnsortedIndexError('MultiIndex slicing requires the index ' 'to be lexsorted: slicing on levels {0}, ' @@ -2474,7 +2524,7 @@ def _convert_to_indexer(r): m = np.zeros(n, dtype=bool) m[r] = True r = m.nonzero()[0] - elif is_bool_indexer(r): + elif com.is_bool_indexer(r): if len(r) != n: raise ValueError("cannot index with a boolean indexer " "that is not the same length as the " @@ -2492,7 +2542,7 @@ def _update_indexer(idxr, indexer=indexer): for i, k in enumerate(seq): - if is_bool_indexer(k): + if com.is_bool_indexer(k): # a boolean indexer, must be the same length! k = np.asarray(k) indexer = _update_indexer(_convert_to_indexer(k), @@ -2521,7 +2571,7 @@ def _update_indexer(idxr, indexer=indexer): # no matches we are done return Int64Index([])._values - elif is_null_slice(k): + elif com.is_null_slice(k): # empty slice indexer = _update_indexer(None, indexer=indexer) @@ -2588,8 +2638,8 @@ def equals(self, other): return False if not isinstance(other, MultiIndex): - return array_equivalent(self._values, - _values_from_object(_ensure_index(other))) + other_vals = com._values_from_object(_ensure_index(other)) + return array_equivalent(self._values, other_vals) if self.nlevels != other.nlevels: return False diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 6337c2f73d5ec..b02aee0495d8c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -1,6 +1,6 @@ import numpy as np from pandas._libs import (index as libindex, - algos as libalgos, join as libjoin) + join as libjoin) from pandas.core.dtypes.common import ( is_dtype_equal, pandas_dtype, @@ -9,10 +9,10 @@ is_bool, is_bool_dtype, is_scalar) -from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat from pandas.core import algorithms +import pandas.core.common as com from pandas.core.indexes.base import ( Index, InvalidIndexError, _index_shared_docs) from pandas.util._decorators import Appender, cache_readonly @@ -158,7 +158,6 @@ class Int64Index(NumericIndex): __doc__ = _num_index_shared_docs['class_descr'] % _int64_descr_args _typ = 'int64index' - _arrmap = libalgos.arrmap_int64 _left_indexer_unique = libjoin.left_join_indexer_unique_int64 _left_indexer = libjoin.left_join_indexer_int64 _inner_indexer = libjoin.inner_join_indexer_int64 @@ -217,7 +216,6 @@ class UInt64Index(NumericIndex): __doc__ = _num_index_shared_docs['class_descr'] % _uint64_descr_args _typ = 'uint64index' - _arrmap = libalgos.arrmap_uint64 _left_indexer_unique = libjoin.left_join_indexer_unique_uint64 _left_indexer = libjoin.left_join_indexer_uint64 _inner_indexer = libjoin.inner_join_indexer_uint64 @@ -251,9 +249,9 @@ def _convert_arr_indexer(self, keyarr): # Cast the indexer to uint64 if possible so # that the values returned from indexing are # also uint64. - keyarr = _asarray_tuplesafe(keyarr) + keyarr = com._asarray_tuplesafe(keyarr) if is_integer_dtype(keyarr): - return _asarray_tuplesafe(keyarr, dtype=np.uint64) + return com._asarray_tuplesafe(keyarr, dtype=np.uint64) return keyarr @Appender(_index_shared_docs['_convert_index_indexer']) @@ -296,7 +294,6 @@ class Float64Index(NumericIndex): _typ = 'float64index' _engine_type = libindex.Float64Engine - _arrmap = libalgos.arrmap_float64 _left_indexer_unique = libjoin.left_join_indexer_unique_float64 _left_indexer = libjoin.left_join_indexer_float64 _inner_indexer = libjoin.inner_join_indexer_float64 @@ -357,9 +354,9 @@ def get_value(self, series, key): if not is_scalar(key): raise InvalidIndexError - k = _values_from_object(key) + k = com._values_from_object(key) loc = self.get_loc(k) - new_values = _values_from_object(series)[loc] + new_values = com._values_from_object(series)[loc] return new_values diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8b35b1a231551..1f8542ed5ee60 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -204,7 +204,6 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): DatetimeIndex : Index with datetime64 data TimedeltaIndex : Index of timedelta64 data """ - _box_scalars = True _typ = 'periodindex' _attributes = ['name', 'freq'] diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 741dca6be0630..a82ee6b2b44af 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -8,11 +8,13 @@ is_integer, is_scalar, is_int64_dtype) +from pandas.core.dtypes.generic import ABCSeries from pandas import compat from pandas.compat import lrange, range, get_range_parameters from pandas.compat.numpy import function as nv -from pandas.core.common import _all_none + +import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat @@ -88,7 +90,7 @@ def _ensure_int(value, field): return new_value - if _all_none(start, stop, step): + if com._all_none(start, stop, step): msg = "RangeIndex(...) must be called with integers" raise TypeError(msg) elif start is None: @@ -583,6 +585,8 @@ def _make_evaluate_binop(op, opstr, reversed=False, step=False): """ def _evaluate_numeric_binop(self, other): + if isinstance(other, ABCSeries): + return NotImplemented other = self._validate_for_numeric_binop(other, op, opstr) attrs = self._get_attributes_dict() @@ -592,7 +596,7 @@ def _evaluate_numeric_binop(self, other): self, other = other, self try: - # alppy if we have an override + # apply if we have an override if step: with np.errstate(all='ignore'): rstep = step(self._step, other) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index d28a09225e8b8..4b543262fc485 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,6 +1,8 @@ """ implement the TimedeltaIndex """ from datetime import timedelta +import warnings + import numpy as np from pandas.core.dtypes.common import ( _TD_DTYPE, @@ -15,7 +17,6 @@ _ensure_int64) from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCSeries -from pandas.core.common import _maybe_box, _values_from_object from pandas.core.indexes.base import Index from pandas.core.indexes.numeric import Int64Index @@ -75,7 +76,7 @@ def wrapper(self, other): other = TimedeltaIndex(other).values result = func(other) - result = _values_from_object(result) + result = com._values_from_object(result) if isinstance(other, Index): o_mask = other.values.view('i8') == iNaT @@ -169,7 +170,6 @@ def _join_i8_wrapper(joinf, **kwargs): _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer_int64) _left_indexer_unique = _join_i8_wrapper( libjoin.left_join_indexer_unique_int64, with_indexers=False) - _arrmap = None # define my properties & methods for delegation _other_ops = [] @@ -364,13 +364,16 @@ def _add_delta(self, delta): # update name when delta is index name = com._maybe_match_name(self, delta) else: - raise ValueError("cannot add the type {0} to a TimedeltaIndex" - .format(type(delta))) + raise TypeError("cannot add the type {0} to a TimedeltaIndex" + .format(type(delta))) result = TimedeltaIndex(new_values, freq='infer', name=name) return result - def _evaluate_with_timedelta_like(self, other, op, opstr): + def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): + if isinstance(other, ABCSeries): + # GH#19042 + return NotImplemented # allow division by a timedelta if opstr in ['__div__', '__truediv__', '__floordiv__']: @@ -381,10 +384,14 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): "division by pd.NaT not implemented") i8 = self.asi8 + left, right = i8, other.value + if reversed: + left, right = right, left + if opstr in ['__floordiv__']: - result = i8 // other.value + result = left // right else: - result = op(i8, float(other.value)) + result = op(left, np.float64(right)) result = self._maybe_mask_results(result, convert='float64') return Index(result, name=self.name, copy=False) @@ -394,7 +401,8 @@ def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike from pandas import Timestamp, DatetimeIndex if other is NaT: - result = self._nat_new(box=False) + # GH#19124 pd.NaT is treated like a timedelta + return self._nat_new() else: other = Timestamp(other) i8 = self.asi8 @@ -404,12 +412,54 @@ def _add_datelike(self, other): return DatetimeIndex(result, name=self.name, copy=False) def _sub_datelike(self, other): - from pandas import DatetimeIndex + # GH#19124 Timedelta - datetime is not in general well-defined. + # We make an exception for pd.NaT, which in this case quacks + # like a timedelta. if other is NaT: - result = self._nat_new(box=False) + return self._nat_new() else: raise TypeError("cannot subtract a datelike from a TimedeltaIndex") - return DatetimeIndex(result, name=self.name, copy=False) + + def _add_offset_array(self, other): + # Array/Index of DateOffset objects + try: + # TimedeltaIndex can only operate with a subset of DateOffset + # subclasses. Incompatible classes will raise AttributeError, + # which we re-raise as TypeError + if isinstance(other, ABCSeries): + return NotImplemented + elif len(other) == 1: + return self + other[0] + else: + from pandas.errors import PerformanceWarning + warnings.warn("Adding/subtracting array of DateOffsets to " + "{} not vectorized".format(type(self)), + PerformanceWarning) + return self.astype('O') + np.array(other) + # TODO: This works for __add__ but loses dtype in __sub__ + except AttributeError: + raise TypeError("Cannot add non-tick DateOffset to TimedeltaIndex") + + def _sub_offset_array(self, other): + # Array/Index of DateOffset objects + try: + # TimedeltaIndex can only operate with a subset of DateOffset + # subclasses. Incompatible classes will raise AttributeError, + # which we re-raise as TypeError + if isinstance(other, ABCSeries): + return NotImplemented + elif len(other) == 1: + return self - other[0] + else: + from pandas.errors import PerformanceWarning + warnings.warn("Adding/subtracting array of DateOffsets to " + "{} not vectorized".format(type(self)), + PerformanceWarning) + res_values = self.astype('O').values - np.array(other) + return self.__class__(res_values, freq='infer') + except AttributeError: + raise TypeError("Cannot subtrack non-tick DateOffset from" + " TimedeltaIndex") def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): @@ -658,8 +708,8 @@ def get_value(self, series, key): return self.get_value_maybe_box(series, key) try: - return _maybe_box(self, Index.get_value(self, series, key), - series, key) + return com._maybe_box(self, Index.get_value(self, series, key), + series, key) except KeyError: try: loc = self._get_string_slice(key) @@ -675,8 +725,8 @@ def get_value(self, series, key): def get_value_maybe_box(self, series, key): if not isinstance(key, Timedelta): key = Timedelta(key) - values = self._engine.get_value(_values_from_object(series), key) - return _maybe_box(self, values, series, key) + values = self._engine.get_value(com._values_from_object(series), key) + return com._maybe_box(self, values, series, key) def get_loc(self, key, method=None, tolerance=None): """ @@ -926,6 +976,7 @@ def _is_convertible_to_index(other): def _is_convertible_to_td(key): + # TODO: Not all DateOffset objects are convertible to Timedelta return isinstance(key, (DateOffset, timedelta, Timedelta, np.timedelta64, compat.string_types)) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fa6614d27cd19..9463512ac11de 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -20,9 +20,6 @@ from pandas.core.index import Index, MultiIndex import pandas.core.common as com -from pandas.core.common import (is_bool_indexer, _asarray_tuplesafe, - is_null_slice, is_full_slice, - _values_from_object) from pandas._libs.indexing import _NDFrameIndexerBase @@ -314,7 +311,7 @@ def _setitem_with_indexer(self, indexer, value): # (not null slices) then we must take the split path, xref # GH 10360 if (isinstance(ax, MultiIndex) and - not (is_integer(i) or is_null_slice(i))): + not (is_integer(i) or com.is_null_slice(i))): take_split_path = True break @@ -519,8 +516,8 @@ def setter(item, v): # multi-dim object # GH6149 (null slice), GH10408 (full bounds) if (isinstance(pi, tuple) and - all(is_null_slice(idx) or - is_full_slice(idx, len(self.obj)) + all(com.is_null_slice(idx) or + com.is_full_slice(idx, len(self.obj)) for idx in pi)): s = v else: @@ -613,8 +610,10 @@ def can_do_equal_len(): # logic here if (len(indexer) > info_axis and is_integer(indexer[info_axis]) and - all(is_null_slice(idx) for i, idx in enumerate(indexer) - if i != info_axis) and item_labels.is_unique): + all(com.is_null_slice(idx) + for i, idx in enumerate(indexer) + if i != info_axis) and + item_labels.is_unique): self.obj[item_labels[indexer[info_axis]]] = value return @@ -667,7 +666,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): ravel = lambda i: i.ravel() if isinstance(i, np.ndarray) else i indexer = tuple(map(ravel, indexer)) - aligners = [not is_null_slice(idx) for idx in indexer] + aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 is_frame = self.obj.ndim == 2 @@ -706,7 +705,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): # multiple aligners (or null slices) if is_sequence(idx) or isinstance(idx, slice): - if single_aligner and is_null_slice(idx): + if single_aligner and com.is_null_slice(idx): continue new_ix = ax[idx] if not is_list_like_indexer(new_ix): @@ -767,7 +766,7 @@ def _align_frame(self, indexer, df): if isinstance(indexer, tuple): - aligners = [not is_null_slice(idx) for idx in indexer] + aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) # TODO: single_aligner is not used single_aligner = sum_aligners == 1 # noqa @@ -869,7 +868,7 @@ def _getitem_tuple(self, tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') - if is_null_slice(key): + if com.is_null_slice(key): continue retval = getattr(retval, self.name)._getitem_axis(key, axis=i) @@ -890,7 +889,7 @@ def _multi_take_opportunity(self, tup): for indexer, ax in zip(tup, self.obj._data.axes): if isinstance(ax, MultiIndex): return False - elif is_bool_indexer(indexer): + elif com.is_bool_indexer(indexer): return False elif not ax.is_unique: return False @@ -915,7 +914,7 @@ def _convert_for_reindex(self, key, axis=None): axis = self.axis or 0 labels = self.obj._get_axis(axis) - if is_bool_indexer(key): + if com.is_bool_indexer(key): key = check_bool_indexer(labels, key) return labels[key] else: @@ -923,7 +922,7 @@ def _convert_for_reindex(self, key, axis=None): keyarr = labels._convert_index_indexer(key) else: # asarray can be unsafe, NumPy strings are weird - keyarr = _asarray_tuplesafe(key) + keyarr = com._asarray_tuplesafe(key) if is_integer_dtype(keyarr): # Cast the indexer to uint64 if possible so @@ -1011,7 +1010,7 @@ def _getitem_lowerdim(self, tup): # Slices should return views, but calling iloc/loc with a null # slice returns a new object. - if is_null_slice(new_key): + if com.is_null_slice(new_key): return section # This is an elided recursive call to iloc/loc/etc' return getattr(section, self.name)[new_key] @@ -1040,7 +1039,7 @@ def _getitem_nested_tuple(self, tup): axis = 0 for i, key in enumerate(tup): - if is_null_slice(key): + if com.is_null_slice(key): axis += 1 continue @@ -1113,7 +1112,7 @@ def _getitem_iterable(self, key, axis=None): labels = self.obj._get_axis(axis) - if is_bool_indexer(key): + if com.is_bool_indexer(key): key = check_bool_indexer(labels, key) inds, = key.nonzero() return self.obj._take(inds, axis=axis, convert=False) @@ -1235,7 +1234,7 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False): elif is_list_like_indexer(obj): - if is_bool_indexer(obj): + if com.is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds @@ -1265,7 +1264,7 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False): raise KeyError('{mask} not in index' .format(mask=objarr[mask])) - return _values_from_object(indexer) + return com._values_from_object(indexer) else: try: @@ -1336,7 +1335,7 @@ def _has_valid_type(self, key, axis): if isinstance(key, slice): return True - elif is_bool_indexer(key): + elif com.is_bool_indexer(key): return True elif is_list_like_indexer(key): @@ -1448,7 +1447,7 @@ def _has_valid_type(self, key, axis): if isinstance(key, slice): return True - elif is_bool_indexer(key): + elif com.is_bool_indexer(key): return True elif is_list_like_indexer(key): @@ -1479,7 +1478,7 @@ def _has_valid_type(self, key, axis): KeyError in the future, you can use .reindex() as an alternative. See the documentation here: - http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""") # noqa + https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""") # noqa if not (ax.is_categorical() or ax.is_interval()): warnings.warn(_missing_key_warning, @@ -1576,7 +1575,7 @@ def _getitem_axis(self, key, axis=None): if isinstance(key, slice): self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) - elif is_bool_indexer(key): + elif com.is_bool_indexer(key): return self._getbool_axis(key, axis=axis) elif is_list_like_indexer(key): @@ -1653,7 +1652,7 @@ class _iLocIndexer(_LocationIndexer): _exception = IndexError def _has_valid_type(self, key, axis): - if is_bool_indexer(key): + if com.is_bool_indexer(key): if hasattr(key, 'index') and isinstance(key.index, Index): if key.index.inferred_type == 'integer': raise NotImplementedError("iLocation based boolean " @@ -1743,7 +1742,7 @@ def _getitem_tuple(self, tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') - if is_null_slice(key): + if com.is_null_slice(key): axis += 1 continue @@ -1807,7 +1806,7 @@ def _getitem_axis(self, key, axis=None): except TypeError: # pragma: no cover pass - if is_bool_indexer(key): + if com.is_bool_indexer(key): self._has_valid_type(key, axis) return self._getbool_axis(key, axis=axis) @@ -1937,10 +1936,6 @@ def _convert_key(self, key, is_setter=False): return key -# 32-bit floating point machine epsilon -_eps = 1.1920929e-07 - - def length_of_indexer(indexer, target=None): """return the length of a single non-tuple indexer which could be a slice """ @@ -1993,19 +1988,6 @@ def convert_to_index_sliceable(obj, key): return None -def is_index_slice(obj): - def _is_valid_index(x): - return (is_integer(x) or is_float(x) and - np.allclose(x, int(x), rtol=_eps, atol=0)) - - def _crit(v): - return v is None or _is_valid_index(v) - - both_none = obj.start is None and obj.stop is None - - return not both_none and (_crit(obj.start) and _crit(obj.stop)) - - def check_bool_indexer(ax, key): # boolean indexing, need to check that the data are aligned, otherwise # disallowed diff --git a/pandas/core/internals.py b/pandas/core/internals.py index ba90503e3bf40..c2d3d0852384c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -11,6 +11,8 @@ import numpy as np +from pandas._libs import internals as libinternals + from pandas.core.base import PandasObject from pandas.core.dtypes.dtypes import ( @@ -54,12 +56,12 @@ import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex -from pandas.core.common import is_null_slice, _any_not_none +import pandas.core.common as com import pandas.core.algorithms as algos from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer -from pandas.core.categorical import Categorical, _maybe_to_categorical +from pandas.core.arrays.categorical import Categorical, _maybe_to_categorical from pandas.core.indexes.datetimes import DatetimeIndex from pandas.io.formats.printing import pprint_thing @@ -67,7 +69,7 @@ from pandas.core.sparse.array import _maybe_to_sparse, SparseArray from pandas._libs import lib, tslib from pandas._libs.tslib import Timedelta -from pandas._libs.lib import BlockPlacement +from pandas._libs.internals import BlockPlacement from pandas._libs.tslibs import conversion from pandas.util._decorators import cache_readonly @@ -97,7 +99,6 @@ class Block(PandasObject): is_sparse = False _box_to_block_values = True _can_hold_na = False - _downcast_dtype = None _can_consolidate = True _verify_integrity = True _validate_ndim = True @@ -105,7 +106,7 @@ class Block(PandasObject): _holder = None _concatenator = staticmethod(np.concatenate) - def __init__(self, values, placement, ndim=None, fastpath=False): + def __init__(self, values, placement, ndim=None): if ndim is None: ndim = values.ndim elif values.ndim != ndim: @@ -191,6 +192,13 @@ def fill_value(self): def mgr_locs(self): return self._mgr_locs + @mgr_locs.setter + def mgr_locs(self, new_mgr_locs): + if not isinstance(new_mgr_locs, BlockPlacement): + new_mgr_locs = BlockPlacement(new_mgr_locs) + + self._mgr_locs = new_mgr_locs + @property def array_dtype(self): """ the dtype to return if I want to construct this block as an @@ -198,7 +206,7 @@ def array_dtype(self): """ return self.dtype - def make_block(self, values, placement=None, ndim=None, **kwargs): + def make_block(self, values, placement=None, ndim=None): """ Create a new block, with type inference propagate any values that are not specified @@ -208,28 +216,20 @@ def make_block(self, values, placement=None, ndim=None, **kwargs): if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim, **kwargs) + return make_block(values, placement=placement, ndim=ndim) - def make_block_scalar(self, values, **kwargs): + def make_block_scalar(self, values): """ Create a ScalarBlock """ return ScalarBlock(values) - def make_block_same_class(self, values, placement=None, fastpath=True, - **kwargs): + def make_block_same_class(self, values, placement=None, ndim=None): """ Wrap given values in a block of same type as self. """ if placement is None: placement = self.mgr_locs - return make_block(values, placement=placement, klass=self.__class__, - fastpath=fastpath, **kwargs) - - @mgr_locs.setter - def mgr_locs(self, new_mgr_locs): - if not isinstance(new_mgr_locs, BlockPlacement): - new_mgr_locs = BlockPlacement(new_mgr_locs) - - self._mgr_locs = new_mgr_locs + return make_block(values, placement=placement, ndim=ndim, + klass=self.__class__) def __unicode__(self): @@ -303,10 +303,6 @@ def getitem_block(self, slicer, new_mgr_locs=None): def shape(self): return self.values.shape - @property - def itemsize(self): - return self.values.itemsize - @property def dtype(self): return self.values.dtype @@ -327,21 +323,6 @@ def concat_same_type(self, to_concat, placement=None): return self.make_block_same_class( values, placement=placement or slice(0, len(values), 1)) - def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, - limit=None, mask_info=None): - """ - Reindex using pre-computed indexer information - """ - if axis < 1: - raise AssertionError( - 'axis must be at least 1, got {axis}'.format(axis=axis)) - if fill_value is None: - fill_value = self.fill_value - - new_values = algos.take_nd(self.values, indexer, axis, - fill_value=fill_value, mask_info=mask_info) - return self.make_block(new_values, fastpath=True) - def iget(self, i): return self.values[i] @@ -459,7 +440,7 @@ def make_a_block(nv, ref_loc): except (AttributeError, NotImplementedError): pass block = self.make_block(values=nv, - placement=ref_loc, fastpath=True) + placement=ref_loc) return block # ndim == 1 @@ -518,7 +499,7 @@ def downcast(self, dtypes=None, mgr=None): dtypes = 'infer' nv = maybe_downcast_to_dtype(values, dtypes) - return self.make_block(nv, fastpath=True) + return self.make_block(nv) # ndim > 1 if dtypes is None: @@ -590,7 +571,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, categories = kwargs.get('categories', None) ordered = kwargs.get('ordered', None) - if _any_not_none(categories, ordered): + if com._any_not_none(categories, ordered): dtype = CategoricalDtype(categories, ordered) if is_categorical_dtype(self.values): @@ -631,7 +612,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, values = astype_nansafe(values.ravel(), dtype, copy=True) values = values.reshape(self.shape) - newb = make_block(values, placement=self.mgr_locs, dtype=dtype, + newb = make_block(values, placement=self.mgr_locs, klass=klass) except: if errors == 'raise': @@ -840,7 +821,6 @@ def setitem(self, indexer, value, mgr=None): transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) values = transf(values) - l = len(values) # length checking # boolean with truth values == len of the value is ok too @@ -855,7 +835,7 @@ def setitem(self, indexer, value, mgr=None): # slice elif isinstance(indexer, slice): - if is_list_like(value) and l: + if is_list_like(value) and len(values): if len(value) != length_of_indexer(indexer, values): raise ValueError("cannot set using a slice indexer with a " "different length than the value") @@ -910,7 +890,7 @@ def _is_empty_indexer(indexer): # coerce and try to infer the dtypes of the result values = self._try_coerce_and_cast_result(values, dtype) - block = self.make_block(transf(values), fastpath=True) + block = self.make_block(transf(values)) return block def putmask(self, mask, new, align=True, inplace=False, axis=0, @@ -937,11 +917,8 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, new_values = self.values if inplace else self.values.copy() - if hasattr(new, 'reindex_axis'): - new = new.values - - if hasattr(mask, 'reindex_axis'): - mask = mask.values + new = getattr(new, 'values', new) + mask = getattr(mask, 'values', mask) # if we are passed a scalar None, convert it here if not is_list_like(new) and isna(new) and not self.is_object: @@ -1026,7 +1003,7 @@ def f(m, v, i): if transpose: new_values = new_values.T - return [self.make_block(new_values, fastpath=True)] + return [self.make_block(new_values)] def coerce_to_target_dtype(self, other): """ @@ -1161,7 +1138,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, dtype=self.dtype) values = self._try_coerce_result(values) - blocks = [self.make_block(values, klass=self.__class__, fastpath=True)] + blocks = [self.make_block_same_class(values, ndim=self.ndim)] return self._maybe_downcast(blocks, downcast) def _interpolate(self, method=None, index=None, values=None, @@ -1201,8 +1178,7 @@ def func(x): # interp each column independently interp_values = np.apply_along_axis(func, axis, data) - blocks = [self.make_block(interp_values, klass=self.__class__, - fastpath=True)] + blocks = [self.make_block_same_class(interp_values)] return self._maybe_downcast(blocks, downcast) def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): @@ -1230,7 +1206,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): if new_mgr_locs is None: if axis == 0: - slc = lib.indexer_as_slice(indexer) + slc = libinternals.indexer_as_slice(indexer) if slc is not None: new_mgr_locs = self.mgr_locs[slc] else: @@ -1246,7 +1222,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): def diff(self, n, axis=1, mgr=None): """ return block for the diff of the values """ new_values = algos.diff(self.values, n, axis=axis) - return [self.make_block(values=new_values, fastpath=True)] + return [self.make_block(values=new_values)] def shift(self, periods, axis=0, mgr=None): """ shift the block by periods, possibly upcast """ @@ -1276,7 +1252,7 @@ def shift(self, periods, axis=0, mgr=None): if f_ordered: new_values = new_values.T - return [self.make_block(new_values, fastpath=True)] + return [self.make_block(new_values)] def eval(self, func, other, errors='raise', try_cast=False, mgr=None): """ @@ -1299,8 +1275,7 @@ def eval(self, func, other, errors='raise', try_cast=False, mgr=None): orig_other = other values = self.values - if hasattr(other, 'reindex_axis'): - other = other.values + other = getattr(other, 'values', other) # make sure that we can broadcast is_transposed = False @@ -1416,7 +1391,7 @@ def handle_error(): result = self._try_cast_result(result) result = _block_shape(result, ndim=self.ndim) - return [self.make_block(result, fastpath=True, )] + return [self.make_block(result)] def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False, mgr=None): @@ -1448,11 +1423,8 @@ def where(self, other, cond, align=True, errors='raise', if transpose: values = values.T - if hasattr(other, 'reindex_axis'): - other = other.values - - if hasattr(cond, 'reindex_axis'): - cond = cond.values + other = getattr(other, 'values', other) + cond = getattr(cond, 'values', cond) # If the default broadcasting would go in the wrong direction, then # explicitly reshape other instead @@ -1696,7 +1668,7 @@ class NonConsolidatableMixIn(object): _validate_ndim = False _holder = None - def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs): + def __init__(self, values, placement, ndim=None): # Placement must be converted to BlockPlacement via property setter # before ndim logic, because placement may be a slice which doesn't @@ -1733,7 +1705,7 @@ def iget(self, col): if self.ndim == 2 and isinstance(col, tuple): col, loc = col - if not is_null_slice(col) and col != 0: + if not com.is_null_slice(col) and col != 0: raise IndexError("{0} only contains one item".format(self)) return self.values[loc] else: @@ -1842,7 +1814,6 @@ def equals(self, other): class FloatBlock(FloatOrComplexBlock): __slots__ = () is_float = True - _downcast_dtype = 'int64' def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) @@ -1954,6 +1925,13 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): _can_hold_na = True is_numeric = False + def __init__(self, values, placement, ndim=None): + if values.dtype != _TD_DTYPE: + values = conversion.ensure_timedelta64ns(values) + + super(TimeDeltaBlock, self).__init__(values, + placement=placement, ndim=ndim) + @property def _box_func(self): return lambda x: tslib.Timedelta(x, unit='ns') @@ -2085,13 +2063,12 @@ class ObjectBlock(Block): is_object = True _can_hold_na = True - def __init__(self, values, ndim=2, fastpath=False, placement=None, - **kwargs): + def __init__(self, values, placement=None, ndim=2): if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object) - super(ObjectBlock, self).__init__(values, ndim=ndim, fastpath=fastpath, - placement=placement, **kwargs) + super(ObjectBlock, self).__init__(values, ndim=ndim, + placement=placement) @property def is_bool(self): @@ -2338,12 +2315,11 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): _holder = Categorical _concatenator = staticmethod(_concat._concat_categorical) - def __init__(self, values, placement, fastpath=False, **kwargs): + def __init__(self, values, placement, ndim=None): # coerce to categorical if we can super(CategoricalBlock, self).__init__(_maybe_to_categorical(values), - fastpath=True, - placement=placement, **kwargs) + placement=placement, ndim=ndim) @property def is_view(self): @@ -2460,12 +2436,12 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block): is_datetime = True _can_hold_na = True - def __init__(self, values, placement, fastpath=False, **kwargs): + def __init__(self, values, placement, ndim=None): if values.dtype != _NS_DTYPE: values = conversion.ensure_datetime64ns(values) - super(DatetimeBlock, self).__init__(values, fastpath=True, - placement=placement, **kwargs) + super(DatetimeBlock, self).__init__(values, + placement=placement, ndim=ndim) def _astype(self, dtype, mgr=None, **kwargs): """ @@ -2596,13 +2572,11 @@ class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock): _concatenator = staticmethod(_concat._concat_datetime) is_datetimetz = True - def __init__(self, values, placement, ndim=2, **kwargs): + def __init__(self, values, placement, ndim=2, dtype=None): if not isinstance(values, self._holder): values = self._holder(values) - dtype = kwargs.pop('dtype', None) - if dtype is not None: if isinstance(dtype, compat.string_types): dtype = DatetimeTZDtype.construct_from_string(dtype) @@ -2612,7 +2586,7 @@ def __init__(self, values, placement, ndim=2, **kwargs): raise ValueError("cannot create a DatetimeTZBlock without a tz") super(DatetimeTZBlock, self).__init__(values, placement=placement, - ndim=ndim, **kwargs) + ndim=ndim) def copy(self, deep=True, mgr=None): """ copy constructor """ @@ -2630,16 +2604,15 @@ def external_values(self): def get_values(self, dtype=None): # return object dtype as Timestamps with the zones if is_object_dtype(dtype): - f = lambda x: lib.Timestamp(x, tz=self.values.tz) return lib.map_infer( - self.values.ravel(), f).reshape(self.values.shape) + self.values.ravel(), self._box_func).reshape(self.values.shape) return self.values def _slice(self, slicer): """ return a slice of my values """ if isinstance(slicer, tuple): col, loc = slicer - if not is_null_slice(col) and col != 0: + if not com.is_null_slice(col) and col != 0: raise IndexError("{0} only contains one item".format(self)) return self.values[loc] return self.values[slicer] @@ -2760,10 +2733,6 @@ class SparseBlock(NonConsolidatableMixIn, Block): def shape(self): return (len(self.mgr_locs), self.sp_index.length) - @property - def itemsize(self): - return self.dtype.itemsize - @property def fill_value(self): # return np.nan @@ -2818,7 +2787,7 @@ def copy(self, deep=True, mgr=None): def make_block_same_class(self, values, placement, sparse_index=None, kind=None, dtype=None, fill_value=None, - copy=False, fastpath=True, **kwargs): + copy=False, ndim=None): """ return a new block """ if dtype is None: dtype = values.dtype @@ -2837,8 +2806,7 @@ def make_block_same_class(self, values, placement, sparse_index=None, # won't take space since there's 0 items, plus it will preserve # the dtype. return self.make_block(np.empty(values.shape, dtype=dtype), - placement, - fastpath=True) + placement) elif nitems > 1: raise ValueError("Only 1-item 2d sparse blocks are supported") else: @@ -2847,7 +2815,7 @@ def make_block_same_class(self, values, placement, sparse_index=None, new_values = SparseArray(values, sparse_index=sparse_index, kind=kind or self.kind, dtype=dtype, fill_value=fill_value, copy=copy) - return self.make_block(new_values, fastpath=fastpath, + return self.make_block(new_values, placement=placement) def interpolate(self, method='pad', axis=0, inplace=False, limit=None, @@ -2888,22 +2856,6 @@ def shift(self, periods, axis=0, mgr=None): return [self.make_block_same_class(new_values, placement=self.mgr_locs)] - def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, - limit=None, mask_info=None): - """ - Reindex using pre-computed indexer information - """ - if axis < 1: - raise AssertionError( - 'axis must be at least 1, got {axis}'.format(axis=axis)) - - # taking on the 0th axis always here - if fill_value is None: - fill_value = self.fill_value - return self.make_block_same_class(self.values.take(indexer), - fill_value=fill_value, - placement=self.mgr_locs) - def sparse_reindex(self, new_index): """ sparse reindex and return a new block current reindex only works for float64 dtype! """ @@ -2914,43 +2866,62 @@ def sparse_reindex(self, new_index): placement=self.mgr_locs) +def get_block_type(values, dtype=None): + """ + Find the appropriate Block subclass to use for the given values and dtype. + + Parameters + ---------- + values : ndarray-like + dtype : numpy or pandas dtype + + Returns + ------- + cls : class, subclass of Block + """ + dtype = dtype or values.dtype + vtype = dtype.type + + if is_sparse(values): + cls = SparseBlock + elif issubclass(vtype, np.floating): + cls = FloatBlock + elif issubclass(vtype, np.timedelta64): + assert issubclass(vtype, np.integer) + cls = TimeDeltaBlock + elif issubclass(vtype, np.complexfloating): + cls = ComplexBlock + elif issubclass(vtype, np.datetime64): + assert not is_datetimetz(values) + cls = DatetimeBlock + elif is_datetimetz(values): + cls = DatetimeTZBlock + elif issubclass(vtype, np.integer): + cls = IntBlock + elif dtype == np.bool_: + cls = BoolBlock + elif is_categorical(values): + cls = CategoricalBlock + else: + cls = ObjectBlock + return cls + + def make_block(values, placement, klass=None, ndim=None, dtype=None, - fastpath=False): + fastpath=None): + if fastpath is not None: + # GH#19265 pyarrow is passing this + warnings.warn("fastpath argument is deprecated, will be removed " + "in a future release.", DeprecationWarning) if klass is None: dtype = dtype or values.dtype - vtype = dtype.type - - if isinstance(values, SparseArray): - klass = SparseBlock - elif issubclass(vtype, np.floating): - klass = FloatBlock - elif (issubclass(vtype, np.integer) and - issubclass(vtype, np.timedelta64)): - klass = TimeDeltaBlock - elif (issubclass(vtype, np.integer) and - not issubclass(vtype, np.datetime64)): - klass = IntBlock - elif dtype == np.bool_: - klass = BoolBlock - elif issubclass(vtype, np.datetime64): - if hasattr(values, 'tz'): - klass = DatetimeTZBlock - else: - klass = DatetimeBlock - elif is_datetimetz(values): - klass = DatetimeTZBlock - elif issubclass(vtype, np.complexfloating): - klass = ComplexBlock - elif is_categorical(values): - klass = CategoricalBlock - else: - klass = ObjectBlock + klass = get_block_type(values, dtype) elif klass is DatetimeTZBlock and not is_datetimetz(values): - return klass(values, ndim=ndim, fastpath=fastpath, + return klass(values, ndim=ndim, placement=placement, dtype=dtype) - return klass(values, ndim=ndim, fastpath=fastpath, placement=placement) + return klass(values, ndim=ndim, placement=placement) # TODO: flexible with index=None and/or items=None @@ -3010,7 +2981,7 @@ class BlockManager(PandasObject): __slots__ = ['axes', 'blocks', '_ndim', '_shape', '_known_consolidated', '_is_consolidated', '_blknos', '_blklocs'] - def __init__(self, blocks, axes, do_integrity_check=True, fastpath=True): + def __init__(self, blocks, axes, do_integrity_check=True): self.axes = [_ensure_index(ax) for ax in axes] self.blocks = tuple(blocks) @@ -3306,7 +3277,7 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, aligned_args = dict((k, kwargs[k]) for k in align_keys - if hasattr(kwargs[k], 'reindex_axis')) + if hasattr(kwargs[k], 'values')) for b in self.blocks: if filter is not None: @@ -3621,8 +3592,7 @@ def get_slice(self, slobj, axis=0): new_axes = list(self.axes) new_axes[axis] = new_axes[axis][slobj] - bm = self.__class__(new_blocks, new_axes, do_integrity_check=False, - fastpath=True) + bm = self.__class__(new_blocks, new_axes, do_integrity_check=False) bm._consolidate_inplace() return bm @@ -3777,7 +3747,7 @@ def xs(self, key, axis=1, copy=True, takeable=False): # we must copy here as we are mixed type for blk in self.blocks: newb = make_block(values=blk.values[slicer], - klass=blk.__class__, fastpath=True, + klass=blk.__class__, placement=blk.mgr_locs) new_blocks.append(newb) elif len(self.blocks) == 1: @@ -3787,8 +3757,7 @@ def xs(self, key, axis=1, copy=True, takeable=False): vals = vals.copy() new_blocks = [make_block(values=vals, placement=block.mgr_locs, - klass=block.__class__, - fastpath=True, )] + klass=block.__class__)] return self.__class__(new_blocks, new_axes) @@ -3891,7 +3860,7 @@ def iget(self, i, fastpath=True): return SingleBlockManager( [block.make_block_same_class(values, placement=slice(0, len(values)), - ndim=1, fastpath=True)], + ndim=1)], self.axes[1]) def get_scalar(self, tup): @@ -4413,8 +4382,7 @@ def __init__(self, block, axis, do_integrity_check=False, fastpath=False): block = block[0] if not isinstance(block, Block): - block = make_block(block, placement=slice(0, len(axis)), ndim=1, - fastpath=True) + block = make_block(block, placement=slice(0, len(axis)), ndim=1) self.blocks = [block] @@ -4439,42 +4407,6 @@ def _blklocs(self): """ compat with BlockManager """ return None - def reindex(self, new_axis, indexer=None, method=None, fill_value=None, - limit=None, copy=True): - # if we are the same and don't copy, just return - if self.index.equals(new_axis): - if copy: - return self.copy(deep=True) - else: - return self - - values = self._block.get_values() - - if indexer is None: - indexer = self.items.get_indexer_for(new_axis) - - if fill_value is None: - fill_value = np.nan - - new_values = algos.take_1d(values, indexer, fill_value=fill_value) - - # fill if needed - if method is not None or limit is not None: - new_values = missing.interpolate_2d(new_values, - method=method, - limit=limit, - fill_value=fill_value) - - if self._block.is_sparse: - make_block = self._block.make_block_same_class - - block = make_block(new_values, copy=copy, - placement=slice(0, len(new_axis))) - - mgr = SingleBlockManager(block, new_axis) - mgr._consolidate_inplace() - return mgr - def get_slice(self, slobj, axis=0): if axis >= self.ndim: raise IndexError("Requested axis not found in manager") @@ -4537,10 +4469,6 @@ def asobject(self): """ return self._block.get_values(dtype=object) - @property - def itemsize(self): - return self._block.values.itemsize - @property def _can_hold_na(self): return self._block._can_hold_na @@ -4660,15 +4588,7 @@ def create_block_manager_from_arrays(arrays, names, axes): def form_blocks(arrays, names, axes): # put "leftover" items in float bucket, where else? # generalize? - float_items = [] - complex_items = [] - int_items = [] - bool_items = [] - object_items = [] - sparse_items = [] - datetime_items = [] - datetime_tz_items = [] - cat_items = [] + items_dict = defaultdict(list) extra_locs = [] names_idx = Index(names) @@ -4686,72 +4606,53 @@ def form_blocks(arrays, names, axes): k = names[name_idx] v = arrays[name_idx] - if is_sparse(v): - sparse_items.append((i, k, v)) - elif issubclass(v.dtype.type, np.floating): - float_items.append((i, k, v)) - elif issubclass(v.dtype.type, np.complexfloating): - complex_items.append((i, k, v)) - elif issubclass(v.dtype.type, np.datetime64): - if v.dtype != _NS_DTYPE: - v = conversion.ensure_datetime64ns(v) - - if is_datetimetz(v): - datetime_tz_items.append((i, k, v)) - else: - datetime_items.append((i, k, v)) - elif is_datetimetz(v): - datetime_tz_items.append((i, k, v)) - elif issubclass(v.dtype.type, np.integer): - int_items.append((i, k, v)) - elif v.dtype == np.bool_: - bool_items.append((i, k, v)) - elif is_categorical(v): - cat_items.append((i, k, v)) - else: - object_items.append((i, k, v)) + block_type = get_block_type(v) + items_dict[block_type.__name__].append((i, k, v)) blocks = [] - if len(float_items): - float_blocks = _multi_blockify(float_items) + if len(items_dict['FloatBlock']): + float_blocks = _multi_blockify(items_dict['FloatBlock']) blocks.extend(float_blocks) - if len(complex_items): - complex_blocks = _multi_blockify(complex_items) + if len(items_dict['ComplexBlock']): + complex_blocks = _multi_blockify(items_dict['ComplexBlock']) blocks.extend(complex_blocks) - if len(int_items): - int_blocks = _multi_blockify(int_items) + if len(items_dict['TimeDeltaBlock']): + timedelta_blocks = _multi_blockify(items_dict['TimeDeltaBlock']) + blocks.extend(timedelta_blocks) + + if len(items_dict['IntBlock']): + int_blocks = _multi_blockify(items_dict['IntBlock']) blocks.extend(int_blocks) - if len(datetime_items): - datetime_blocks = _simple_blockify(datetime_items, _NS_DTYPE) + if len(items_dict['DatetimeBlock']): + datetime_blocks = _simple_blockify(items_dict['DatetimeBlock'], + _NS_DTYPE) blocks.extend(datetime_blocks) - if len(datetime_tz_items): + if len(items_dict['DatetimeTZBlock']): dttz_blocks = [make_block(array, klass=DatetimeTZBlock, - fastpath=True, - placement=[i], ) - for i, _, array in datetime_tz_items] + placement=[i]) + for i, _, array in items_dict['DatetimeTZBlock']] blocks.extend(dttz_blocks) - if len(bool_items): - bool_blocks = _simple_blockify(bool_items, np.bool_) + if len(items_dict['BoolBlock']): + bool_blocks = _simple_blockify(items_dict['BoolBlock'], np.bool_) blocks.extend(bool_blocks) - if len(object_items) > 0: - object_blocks = _simple_blockify(object_items, np.object_) + if len(items_dict['ObjectBlock']) > 0: + object_blocks = _simple_blockify(items_dict['ObjectBlock'], np.object_) blocks.extend(object_blocks) - if len(sparse_items) > 0: - sparse_blocks = _sparse_blockify(sparse_items) + if len(items_dict['SparseBlock']) > 0: + sparse_blocks = _sparse_blockify(items_dict['SparseBlock']) blocks.extend(sparse_blocks) - if len(cat_items) > 0: - cat_blocks = [make_block(array, klass=CategoricalBlock, fastpath=True, - placement=[i]) - for i, _, array in cat_items] + if len(items_dict['CategoricalBlock']) > 0: + cat_blocks = [make_block(array, klass=CategoricalBlock, placement=[i]) + for i, _, array in items_dict['CategoricalBlock']] blocks.extend(cat_blocks) if len(extra_locs): @@ -4806,8 +4707,7 @@ def _sparse_blockify(tuples, dtype=None): new_blocks = [] for i, names, array in tuples: array = _maybe_to_sparse(array) - block = make_block(array, klass=SparseBlock, fastpath=True, - placement=[i]) + block = make_block(array, klass=SparseBlock, placement=[i]) new_blocks.append(block) return new_blocks @@ -4891,7 +4791,7 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True): new_values = new_values[argsort] new_mgr_locs = new_mgr_locs[argsort] - return make_block(new_values, fastpath=True, placement=new_mgr_locs) + return make_block(new_values, placement=new_mgr_locs) # no merge return blocks @@ -5023,7 +4923,7 @@ def _get_blkno_placements(blknos, blk_count, group=True): blknos = _ensure_int64(blknos) # FIXME: blk_count is unused, but it may avoid the use of dicts in cython - for blkno, indexer in lib.get_blkno_indexers(blknos, group): + for blkno, indexer in libinternals.get_blkno_indexers(blknos, group): yield blkno, BlockPlacement(indexer) @@ -5665,8 +5565,8 @@ def _fast_count_smallints(arr): def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): if isinstance(slice_or_indexer, slice): - return 'slice', slice_or_indexer, lib.slice_len(slice_or_indexer, - length) + return ('slice', slice_or_indexer, + libinternals.slice_len(slice_or_indexer, length)) elif (isinstance(slice_or_indexer, np.ndarray) and slice_or_indexer.dtype == np.bool_): return 'mask', slice_or_indexer, slice_or_indexer.sum() diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index d1a355021f388..eda86f12d501d 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -6,7 +6,7 @@ import numpy as np from pandas import compat -from pandas._libs import tslib, algos, lib +from pandas._libs import tslib, lib from pandas.core.dtypes.common import ( _get_dtype, is_float, is_scalar, @@ -20,7 +20,7 @@ from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype from pandas.core.config import get_option -from pandas.core.common import _values_from_object +import pandas.core.common as com _BOTTLENECK_INSTALLED = False _MIN_BOTTLENECK_VERSION = '1.0.0' @@ -205,7 +205,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, if necessary copy and mask using the specified fill_value copy = True will force the copy """ - values = _values_from_object(values) + values = com._values_from_object(values) if isfinite: mask = _isfinite(values) else: @@ -370,14 +370,13 @@ def nanmean(values, axis=None, skipna=True): @bottleneck_switch() def nanmedian(values, axis=None, skipna=True): - values, mask, dtype, dtype_max = _get_values(values, skipna) - def get_median(x): mask = notna(x) if not skipna and not mask.all(): return np.nan - return algos.median(_values_from_object(x[mask])) + return np.nanmedian(x[mask]) + values, mask, dtype, dtype_max = _get_values(values, skipna) if not is_float_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -389,10 +388,15 @@ def get_median(x): # an array from a frame if values.ndim > 1: + # there's a non-empty array to apply over otherwise numpy raises if notempty: - return _wrap_results( - np.apply_along_axis(get_median, axis, values), dtype) + if not skipna: + return _wrap_results( + np.apply_along_axis(get_median, axis, values), dtype) + + # fastpath for the skipna case + return _wrap_results(np.nanmedian(values, axis), dtype) # must return the correct shape, but median is not defined for the # empty set so return nans of shape "everything but the passed axis" @@ -437,7 +441,7 @@ def nanstd(values, axis=None, skipna=True, ddof=1): @bottleneck_switch(ddof=1) def nanvar(values, axis=None, skipna=True, ddof=1): - values = _values_from_object(values) + values = com._values_from_object(values) dtype = values.dtype mask = isna(values) if is_any_int_dtype(values): @@ -546,7 +550,7 @@ def nanskew(values, axis=None, skipna=True): """ - values = _values_from_object(values) + values = com._values_from_object(values) mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') @@ -604,7 +608,7 @@ def nankurt(values, axis=None, skipna=True): central moment. """ - values = _values_from_object(values) + values = com._values_from_object(values) mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 554f0cb3803e9..ba8a15b60ba56 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -6,31 +6,30 @@ # necessary to enforce truediv in Python 2.X from __future__ import division import operator -import warnings + import numpy as np import pandas as pd -import datetime from pandas._libs import (lib, index as libindex, - tslib as libts, algos as libalgos, iNaT) + algos as libalgos) from pandas import compat from pandas.util._decorators import Appender from pandas.compat import bind_method import pandas.core.missing as missing +import pandas.core.common as com -from pandas.errors import PerformanceWarning -from pandas.core.common import _values_from_object, _maybe_match_name +from pandas.errors import NullFrequencyError from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.common import ( needs_i8_conversion, is_datetimelike_v_numeric, is_integer_dtype, is_categorical_dtype, is_object_dtype, is_timedelta64_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, is_datetime64_ns_dtype, - is_bool_dtype, is_datetimetz, - is_list_like, is_offsetlike, + is_datetime64_dtype, is_datetime64tz_dtype, + is_bool_dtype, + is_list_like, is_scalar, _ensure_object) from pandas.core.dtypes.cast import ( @@ -39,22 +38,343 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, - ABCIndex, ABCDatetimeIndex, - ABCPeriodIndex) + ABCIndex, + ABCPeriodIndex, + ABCSparseSeries) + + +def _gen_eval_kwargs(name): + """ + Find the keyword arguments to pass to numexpr for the given operation. + + Parameters + ---------- + name : str + + Returns + ------- + eval_kwargs : dict + + Examples + -------- + >>> _gen_eval_kwargs("__add__") + {} + + >>> _gen_eval_kwargs("rtruediv") + {"reversed": True, "truediv": True} + """ + kwargs = {} + + # Series and Panel appear to only pass __add__, __radd__, ... + # but DataFrame gets both these dunder names _and_ non-dunder names + # add, radd, ... + name = name.replace('__', '') + + if name.startswith('r'): + if name not in ['radd', 'rand', 'ror', 'rxor']: + # Exclude commutative operations + kwargs['reversed'] = True + + if name in ['truediv', 'rtruediv']: + kwargs['truediv'] = True + + if name in ['ne']: + kwargs['masker'] = True + + return kwargs + + +def _gen_fill_zeros(name): + """ + Find the appropriate fill value to use when filling in undefined values + in the results of the given operation caused by operating on + (generally dividing by) zero. + + Parameters + ---------- + name : str + + Returns + ------- + fill_value : {None, np.nan, np.inf} + """ + name = name.strip('__') + if 'div' in name: + # truediv, floordiv, div, and reversed variants + fill_value = np.inf + elif 'mod' in name: + # mod, rmod + fill_value = np.nan + else: + fill_value = None + return fill_value + + +def _get_frame_op_default_axis(name): + """ + Only DataFrame cares about default_axis, specifically: + special methods have default_axis=None and flex methods + have default_axis='columns'. + + Parameters + ---------- + name : str + + Returns + ------- + default_axis: str or None + """ + if name.replace('__r', '__') in ['__and__', '__or__', '__xor__']: + # bool methods + return 'columns' + elif name.startswith('__'): + # __add__, __mul__, ... + return None + else: + # add, mul, ... + return 'columns' + + +# ----------------------------------------------------------------------------- +# Docstring Generation and Templates + +_op_descriptions = { + 'add': {'op': '+', + 'desc': 'Addition', + 'reversed': False, + 'reverse': 'radd'}, + 'sub': {'op': '-', + 'desc': 'Subtraction', + 'reversed': False, + 'reverse': 'rsub'}, + 'mul': {'op': '*', + 'desc': 'Multiplication', + 'reversed': False, + 'reverse': 'rmul'}, + 'mod': {'op': '%', + 'desc': 'Modulo', + 'reversed': False, + 'reverse': 'rmod'}, + 'pow': {'op': '**', + 'desc': 'Exponential power', + 'reversed': False, + 'reverse': 'rpow'}, + 'truediv': {'op': '/', + 'desc': 'Floating division', + 'reversed': False, + 'reverse': 'rtruediv'}, + 'floordiv': {'op': '//', + 'desc': 'Integer division', + 'reversed': False, + 'reverse': 'rfloordiv'}, + 'divmod': {'op': 'divmod', + 'desc': 'Integer division and modulo', + 'reversed': False, + 'reverse': None}, + + 'eq': {'op': '==', + 'desc': 'Equal to', + 'reversed': False, + 'reverse': None}, + 'ne': {'op': '!=', + 'desc': 'Not equal to', + 'reversed': False, + 'reverse': None}, + 'lt': {'op': '<', + 'desc': 'Less than', + 'reversed': False, + 'reverse': None}, + 'le': {'op': '<=', + 'desc': 'Less than or equal to', + 'reversed': False, + 'reverse': None}, + 'gt': {'op': '>', + 'desc': 'Greater than', + 'reversed': False, + 'reverse': None}, + 'ge': {'op': '>=', + 'desc': 'Greater than or equal to', + 'reversed': False, + 'reverse': None}} + +_op_names = list(_op_descriptions.keys()) +for key in _op_names: + reverse_op = _op_descriptions[key]['reverse'] + if reverse_op is not None: + _op_descriptions[reverse_op] = _op_descriptions[key].copy() + _op_descriptions[reverse_op]['reversed'] = True + _op_descriptions[reverse_op]['reverse'] = key + +_flex_doc_SERIES = """ +{desc} of series and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other : Series or scalar value +fill_value : None or float value, default None (NaN) + Fill missing (NaN) values with this value. If both Series are + missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Returns +------- +result : Series + +See also +-------- +Series.{reverse} +""" + +_arith_doc_FRAME = """ +Binary operator %s with support to substitute a fill_value for missing data in +one of the inputs + +Parameters +---------- +other : Series, DataFrame, or constant +axis : {0, 1, 'index', 'columns'} + For Series input, axis to match Series index on +fill_value : None or float value, default None + Fill missing (NaN) values with this value. If both DataFrame locations are + missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Notes +----- +Mismatched indices will be unioned together + +Returns +------- +result : DataFrame +""" + +_flex_doc_FRAME = """ +{desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other : Series, DataFrame, or constant +axis : {{0, 1, 'index', 'columns'}} + For Series input, axis to match Series index on +fill_value : None or float value, default None + Fill missing (NaN) values with this value. If both DataFrame + locations are missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Notes +----- +Mismatched indices will be unioned together + +Returns +------- +result : DataFrame + +See also +-------- +DataFrame.{reverse} +""" + +_flex_doc_PANEL = """ +{desc} of series and other, element-wise (binary operator `{op_name}`). +Equivalent to ``{equiv}``. + +Parameters +---------- +other : DataFrame or Panel +axis : {{items, major_axis, minor_axis}} + Axis to broadcast over + +Returns +------- +Panel + +See also +-------- +Panel.{reverse} +""" + + +_agg_doc_PANEL = """ +Wrapper method for {op_name} + +Parameters +---------- +other : DataFrame or Panel +axis : {{items, major_axis, minor_axis}} + Axis to broadcast over + +Returns +------- +Panel +""" + + +def _make_flex_doc(op_name, typ): + """ + Make the appropriate substitutions for the given operation and class-typ + into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring + to attach to a generated method. + + Parameters + ---------- + op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...} + typ : str {series, 'dataframe']} + + Returns + ------- + doc : str + """ + op_name = op_name.replace('__', '') + op_desc = _op_descriptions[op_name] + + if op_desc['reversed']: + equiv = 'other ' + op_desc['op'] + ' ' + typ + else: + equiv = typ + ' ' + op_desc['op'] + ' other' + + if typ == 'series': + base_doc = _flex_doc_SERIES + elif typ == 'dataframe': + base_doc = _flex_doc_FRAME + elif typ == 'panel': + base_doc = _flex_doc_PANEL + else: + raise AssertionError('Invalid typ argument.') + + doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, + equiv=equiv, reverse=op_desc['reverse']) + return doc + # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods -def _create_methods(arith_method, comp_method, bool_method, - use_numexpr, special=False, default_axis='columns', - have_divmod=False): +def _create_methods(cls, arith_method, comp_method, bool_method, + special=False): # creates actual methods based upon arithmetic, comp and bool method # constructors. - # NOTE: Only frame cares about default_axis, specifically: special methods - # have default axis None, whereas flex methods have default axis 'columns' + # numexpr is available for non-sparse classes + subtyp = getattr(cls, '_subtyp', '') + use_numexpr = 'sparse' not in subtyp + + have_divmod = issubclass(cls, ABCSeries) + # divmod is available for Series and SparseSeries + # if we're not using numexpr, then don't pass a str_rep if use_numexpr: op = lambda x: x @@ -70,48 +390,28 @@ def names(x): else: names = lambda x: x - # Inframe, all special methods have default_axis=None, flex methods have - # default_axis set to the default (columns) # yapf: disable new_methods = dict( - add=arith_method(operator.add, names('add'), op('+'), - default_axis=default_axis), - radd=arith_method(lambda x, y: y + x, names('radd'), op('+'), - default_axis=default_axis), - sub=arith_method(operator.sub, names('sub'), op('-'), - default_axis=default_axis), - mul=arith_method(operator.mul, names('mul'), op('*'), - default_axis=default_axis), - truediv=arith_method(operator.truediv, names('truediv'), op('/'), - truediv=True, fill_zeros=np.inf, - default_axis=default_axis), - floordiv=arith_method(operator.floordiv, names('floordiv'), op('//'), - default_axis=default_axis, fill_zeros=np.inf), + add=arith_method(operator.add, names('add'), op('+')), + radd=arith_method(lambda x, y: y + x, names('radd'), op('+')), + sub=arith_method(operator.sub, names('sub'), op('-')), + mul=arith_method(operator.mul, names('mul'), op('*')), + truediv=arith_method(operator.truediv, names('truediv'), op('/')), + floordiv=arith_method(operator.floordiv, names('floordiv'), op('//')), # Causes a floating point exception in the tests when numexpr enabled, # so for now no speedup - mod=arith_method(operator.mod, names('mod'), None, - default_axis=default_axis, fill_zeros=np.nan), - pow=arith_method(operator.pow, names('pow'), op('**'), - default_axis=default_axis), + mod=arith_method(operator.mod, names('mod'), None), + pow=arith_method(operator.pow, names('pow'), op('**')), # not entirely sure why this is necessary, but previously was included # so it's here to maintain compatibility - rmul=arith_method(operator.mul, names('rmul'), op('*'), - default_axis=default_axis, reversed=True), - rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-'), - default_axis=default_axis, reversed=True), + rmul=arith_method(operator.mul, names('rmul'), op('*')), + rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-')), rtruediv=arith_method(lambda x, y: operator.truediv(y, x), - names('rtruediv'), op('/'), truediv=True, - fill_zeros=np.inf, default_axis=default_axis, - reversed=True), + names('rtruediv'), op('/')), rfloordiv=arith_method(lambda x, y: operator.floordiv(y, x), - names('rfloordiv'), op('//'), - default_axis=default_axis, fill_zeros=np.inf, - reversed=True), - rpow=arith_method(lambda x, y: y**x, names('rpow'), op('**'), - default_axis=default_axis, reversed=True), - rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'), - default_axis=default_axis, fill_zeros=np.nan, - reversed=True),) + names('rfloordiv'), op('//')), + rpow=arith_method(lambda x, y: y**x, names('rpow'), op('**')), + rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'))) # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] @@ -120,11 +420,11 @@ def names(x): if comp_method: new_methods.update(dict( eq=comp_method(operator.eq, names('eq'), op('==')), - ne=comp_method(operator.ne, names('ne'), op('!='), masker=True), + ne=comp_method(operator.ne, names('ne'), op('!=')), lt=comp_method(operator.lt, names('lt'), op('<')), gt=comp_method(operator.gt, names('gt'), op('>')), le=comp_method(operator.le, names('le'), op('<=')), - ge=comp_method(operator.ge, names('ge'), op('>=')), )) + ge=comp_method(operator.ge, names('ge'), op('>=')))) if bool_method: new_methods.update( dict(and_=bool_method(operator.and_, names('and_'), op('&')), @@ -139,34 +439,13 @@ def names(x): names('rxor'), op('^')))) if have_divmod: # divmod doesn't have an op that is supported by numexpr - new_methods['divmod'] = arith_method( - divmod, - names('divmod'), - None, - default_axis=default_axis, - construct_result=_construct_divmod_result, - ) + new_methods['divmod'] = arith_method(divmod, names('divmod'), None) new_methods = {names(k): v for k, v in new_methods.items()} return new_methods -def add_methods(cls, new_methods, force, select, exclude): - if select and exclude: - raise TypeError("May only pass either select or exclude") - - if select: - select = set(select) - methods = {} - for key, method in new_methods.items(): - if key in select: - methods[key] = method - new_methods = methods - - if exclude: - for k in exclude: - new_methods.pop(k, None) - +def add_methods(cls, new_methods, force): for name, method in new_methods.items(): if force or name not in cls.__dict__: bind_method(cls, name, method) @@ -176,8 +455,7 @@ def add_methods(cls, new_methods, force, select, exclude): # Arithmetic def add_special_arithmetic_methods(cls, arith_method=None, comp_method=None, bool_method=None, - use_numexpr=True, force=False, select=None, - exclude=None, have_divmod=False): + force=False): """ Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. @@ -186,31 +464,17 @@ def add_special_arithmetic_methods(cls, arith_method=None, ---------- arith_method : function (optional) factory for special arithmetic methods, with op string: - f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) + f(op, name, str_rep) comp_method : function (optional) factory for rich comparison - signature: f(op, name, str_rep) bool_method : function (optional) factory for boolean methods - signature: f(op, name, str_rep) - use_numexpr : bool, default True - whether to accelerate with numexpr, defaults to True force : bool, default False if False, checks whether function is defined **on ``cls.__dict__``** before defining if True, always defines functions on class base - select : iterable of strings (optional) - if passed, only sets functions with names in select - exclude : iterable of strings (optional) - if passed, will not set functions with names in exclude - have_divmod : bool, (optional) - should a divmod method be added? this method is special because it - returns a tuple of cls instead of a single element of type cls """ - - # in frame, special methods have default_axis = None, comp methods use - # 'columns' - - new_methods = _create_methods(arith_method, comp_method, - bool_method, use_numexpr, default_axis=None, - special=True, have_divmod=have_divmod) + new_methods = _create_methods(cls, arith_method, comp_method, bool_method, + special=True) # inplace operators (I feel like these should get passed an `inplace=True` # or just be removed @@ -248,14 +512,12 @@ def f(self, other): __ior__=_wrap_inplace_method(new_methods["__or__"]), __ixor__=_wrap_inplace_method(new_methods["__xor__"]))) - add_methods(cls, new_methods=new_methods, force=force, select=select, - exclude=exclude) + add_methods(cls, new_methods=new_methods, force=force) def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None, flex_bool_method=None, - use_numexpr=True, force=False, select=None, - exclude=None): + force=False): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. @@ -263,24 +525,16 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, Parameters ---------- flex_arith_method : function - factory for special arithmetic methods, with op string: - f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) + factory for flex arithmetic methods, with op string: + f(op, name, str_rep) flex_comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) - use_numexpr : bool, default True - whether to accelerate with numexpr, defaults to True force : bool, default False if False, checks whether function is defined **on ``cls.__dict__``** before defining if True, always defines functions on class base - select : iterable of strings (optional) - if passed, only sets functions with names in select - exclude : iterable of strings (optional) - if passed, will not set functions with names in exclude """ - # in frame, default axis is 'columns', doesn't matter for series and panel - new_methods = _create_methods(flex_arith_method, + new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, flex_bool_method, - use_numexpr, default_axis='columns', special=False) new_methods.update(dict(multiply=new_methods['mul'], subtract=new_methods['sub'], @@ -290,290 +544,11 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, if k in new_methods: new_methods.pop(k) - add_methods(cls, new_methods=new_methods, force=force, select=select, - exclude=exclude) - - -class _Op(object): - - """ - Wrapper around Series arithmetic operations. - Generally, you should use classmethod ``_Op.get_op`` as an entry point. - - This validates and coerces lhs and rhs depending on its dtype and - based on op. See _TimeOp also. - - Parameters - ---------- - left : Series - lhs of op - right : object - rhs of op - name : str - name of op - na_op : callable - a function which wraps op - """ - - fill_value = np.nan - wrap_results = staticmethod(lambda x: x) - dtype = None - - def __init__(self, left, right, name, na_op): - self.left = left - self.right = right - - self.name = name - self.na_op = na_op - - self.lvalues = left - self.rvalues = right - - @classmethod - def get_op(cls, left, right, name, na_op): - """ - Get op dispatcher, returns _Op or _TimeOp. - - If ``left`` and ``right`` are appropriate for datetime arithmetic with - operation ``name``, processes them and returns a ``_TimeOp`` object - that stores all the required values. Otherwise, it will generate - either a ``_Op``, indicating that the operation is performed via - normal numpy path. - """ - is_timedelta_lhs = is_timedelta64_dtype(left) - - if not is_timedelta_lhs: - return _Op(left, right, name, na_op) - else: - return _TimeOp(left, right, name, na_op) - - -class _TimeOp(_Op): - """ - Wrapper around Series datetime/time/timedelta arithmetic operations. - Generally, you should use classmethod ``_Op.get_op`` as an entry point. - """ - fill_value = iNaT - - def __init__(self, left, right, name, na_op): - super(_TimeOp, self).__init__(left, right, name, na_op) - - lvalues = self._convert_to_array(left, name=name) - rvalues = self._convert_to_array(right, name=name, other=lvalues) - - # left - self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) - assert self.is_timedelta_lhs - - # right - self.is_offset_rhs = is_offsetlike(right) - self.is_datetime64_rhs = is_datetime64_dtype(rvalues) - self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) - self.is_datetime_rhs = (self.is_datetime64_rhs or - self.is_datetime64tz_rhs) - self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) - self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') - self.is_floating_rhs = rvalues.dtype.kind == 'f' - - self._validate(lvalues, rvalues, name) - self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, - rvalues) - - def _validate_timedelta(self, name): - # assumes self.is_timedelta_lhs - - if self.is_integer_rhs or self.is_floating_rhs: - # timedelta and integer mul/div - self._check_timedelta_with_numeric(name) - elif self.is_timedelta_rhs or self.is_offset_rhs: - # 2 timedeltas - if name not in ('__div__', '__rdiv__', '__truediv__', - '__rtruediv__', '__add__', '__radd__', '__sub__', - '__rsub__', '__floordiv__', '__rfloordiv__'): - raise TypeError("can only operate on a timedeltas for addition" - ", subtraction, and division, but the operator" - " [{name}] was passed".format(name=name)) - elif self.is_datetime_rhs: - if name not in ('__add__', '__radd__', '__rsub__'): - raise TypeError("can only operate on a timedelta/DateOffset " - "with a rhs of a datetime for addition, " - "but the operator [{name}] was passed" - .format(name=name)) - else: - raise TypeError('cannot operate on a series without a rhs ' - 'of a series/ndarray of type datetime64[ns] ' - 'or a timedelta') - - def _validate(self, lvalues, rvalues, name): - return self._validate_timedelta(name) - - def _check_timedelta_with_numeric(self, name): - if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'): - raise TypeError("can only operate on a timedelta and an " - "integer or a float for division and " - "multiplication, but the operator [{name}] " - "was passed".format(name=name)) - - def _convert_to_array(self, values, name=None, other=None): - """converts values to ndarray""" - from pandas.core.tools.timedeltas import to_timedelta - - ovalues = values - supplied_dtype = None - if not is_list_like(values): - values = np.array([values]) - - # if this is a Series that contains relevant dtype info, then use this - # instead of the inferred type; this avoids coercing Series([NaT], - # dtype='datetime64[ns]') to Series([NaT], dtype='timedelta64[ns]') - elif (isinstance(values, (pd.Series, ABCDatetimeIndex)) and - (is_timedelta64_dtype(values) or is_datetime64_dtype(values))): - supplied_dtype = values.dtype - - inferred_type = lib.infer_dtype(values) - if (inferred_type in ('datetime64', 'datetime', 'date', 'time') or - is_datetimetz(inferred_type)): - # if we have a other of timedelta, but use pd.NaT here we - # we are in the wrong path - if (supplied_dtype is None and other is not None and - (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and - isna(values).all()): - values = np.empty(values.shape, dtype='timedelta64[ns]') - values[:] = iNaT - - elif isinstance(values, ABCDatetimeIndex): - # a datelike - pass - elif isinstance(ovalues, datetime.datetime): - # datetime scalar - values = pd.DatetimeIndex(values) - # datetime array with tz - elif is_datetimetz(values): - if isinstance(values, ABCSeries): - values = values._values - elif not (isinstance(values, (np.ndarray, ABCSeries)) and - is_datetime64_dtype(values)): - values = libts.array_to_datetime(values) - elif (is_datetime64_dtype(values) and - not is_datetime64_ns_dtype(values)): - # GH#7996 e.g. np.datetime64('2013-01-01') is datetime64[D] - values = values.astype('datetime64[ns]') - - elif inferred_type in ('timedelta', 'timedelta64'): - # have a timedelta, convert to to ns here - values = to_timedelta(values, errors='coerce', box=False) - if isinstance(other, ABCDatetimeIndex): - # GH#13905 - # Defer to DatetimeIndex/TimedeltaIndex operations where - # timezones are handled carefully. - values = pd.TimedeltaIndex(values) - elif inferred_type == 'integer': - # py3 compat where dtype is 'm' but is an integer - if values.dtype.kind == 'm': - values = values.astype('timedelta64[ns]') - elif isinstance(values, pd.PeriodIndex): - values = values.to_timestamp().to_series() - elif name not in ('__truediv__', '__div__', '__mul__', '__rmul__'): - raise TypeError("incompatible type for a datetime/timedelta " - "operation [{name}]".format(name=name)) - elif inferred_type == 'floating': - if (isna(values).all() and - name in ('__add__', '__radd__', '__sub__', '__rsub__')): - values = np.empty(values.shape, dtype=other.dtype) - values[:] = iNaT - return values - elif is_offsetlike(values): - return values - else: - raise TypeError("incompatible type [{dtype}] for a " - "datetime/timedelta operation" - .format(dtype=np.array(values).dtype)) - - return values - - def _convert_for_datetime(self, lvalues, rvalues): - from pandas.core.tools.timedeltas import to_timedelta + add_methods(cls, new_methods=new_methods, force=force) - mask = isna(lvalues) | isna(rvalues) - - # datetimes require views - if self.is_datetime_rhs: - - # datetime subtraction means timedelta - if self.is_datetime64tz_rhs: - self.dtype = rvalues.dtype - else: - self.dtype = 'datetime64[ns]' - - # if adding single offset try vectorized path - # in DatetimeIndex; otherwise elementwise apply - def _offset(lvalues, rvalues): - if len(lvalues) == 1: - rvalues = pd.DatetimeIndex(rvalues) - lvalues = lvalues[0] - else: - warnings.warn("Adding/subtracting array of DateOffsets to " - "Series not vectorized", PerformanceWarning) - rvalues = rvalues.astype('O') - - # pass thru on the na_op - self.na_op = lambda x, y: getattr(x, self.name)(y) - return lvalues, rvalues - - if self.is_offset_rhs: - rvalues, lvalues = _offset(rvalues, lvalues) - else: - - # with tz, convert to UTC - if self.is_datetime64tz_rhs: - rvalues = rvalues.tz_convert('UTC').tz_localize(None) - - lvalues = lvalues.view(np.int64) - rvalues = rvalues.view(np.int64) - - # otherwise it's a timedelta - else: - - self.dtype = 'timedelta64[ns]' - - # convert Tick DateOffset to underlying delta - if self.is_offset_rhs: - rvalues = to_timedelta(rvalues, box=False) - - lvalues = lvalues.astype(np.int64) - if not self.is_floating_rhs: - rvalues = rvalues.astype(np.int64) - - # time delta division -> unit less - # integer gets converted to timedelta in np < 1.6 - if ((self.is_timedelta_lhs and self.is_timedelta_rhs) and - not self.is_integer_rhs and - self.name in ('__div__', '__rdiv__', - '__truediv__', '__rtruediv__', - '__floordiv__', '__rfloordiv__')): - self.dtype = 'float64' - self.fill_value = np.nan - lvalues = lvalues.astype(np.float64) - rvalues = rvalues.astype(np.float64) - - # if we need to mask the results - if mask.any(): - - def f(x): - - # datetime64[ns]/timedelta64[ns] masking - try: - x = np.array(x, dtype=self.dtype) - except TypeError: - x = np.array(x, dtype='datetime64[ns]') - - np.putmask(x, mask, self.fill_value) - return x - - self.wrap_results = f - - return lvalues, rvalues +# ----------------------------------------------------------------------------- +# Series def _align_method_SERIES(left, right, align_asobject=False): """ align lhs and rhs Series """ @@ -597,7 +572,15 @@ def _align_method_SERIES(left, right, align_asobject=False): def _construct_result(left, result, index, name, dtype): - return left._constructor(result, index=index, name=name, dtype=dtype) + """ + If the raw op result has a non-None name (e.g. it is an Index object) and + the name argument is None, then passing name to the constructor will + not be enough; we still need to override the name attribute. + """ + out = left._constructor(result, index=index, dtype=dtype) + + out.name = name + return out def _construct_divmod_result(left, result, index, name, dtype): @@ -610,12 +593,16 @@ def _construct_divmod_result(left, result, index, name, dtype): ) -def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None, - construct_result=_construct_result, **eval_kwargs): +def _arith_method_SERIES(op, name, str_rep): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ + eval_kwargs = _gen_eval_kwargs(name) + fill_zeros = _gen_fill_zeros(name) + construct_result = (_construct_divmod_result + if op is divmod else _construct_result) + def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -626,7 +613,7 @@ def na_op(x, y): dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) mask = notna(x) & notna(y) - result[mask] = op(x[mask], _values_from_object(y[mask])) + result[mask] = op(x[mask], com._values_from_object(y[mask])) elif isinstance(x, np.ndarray): result = np.empty(len(x), dtype=x.dtype) mask = notna(x) @@ -646,15 +633,9 @@ def safe_na_op(lvalues, rvalues): with np.errstate(all='ignore'): return na_op(lvalues, rvalues) except Exception: - if isinstance(rvalues, ABCSeries): - if is_object_dtype(rvalues): - # if dtype is object, try elementwise op - return libalgos.arrmap_object(rvalues, - lambda x: op(lvalues, x)) - else: - if is_object_dtype(lvalues): - return libalgos.arrmap_object(lvalues, - lambda x: op(x, rvalues)) + if is_object_dtype(lvalues): + return libalgos.arrmap_object(lvalues, + lambda x: op(x, rvalues)) raise def wrapper(left, right, name=name, na_op=na_op): @@ -663,53 +644,70 @@ def wrapper(left, right, name=name, na_op=na_op): return NotImplemented left, right = _align_method_SERIES(left, right) + res_name = _get_series_op_result_name(left, right) + if is_datetime64_dtype(left) or is_datetime64tz_dtype(left): - result = op(pd.DatetimeIndex(left), right) - res_name = _get_series_op_result_name(left, right) - result.name = res_name # needs to be overriden if None + result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex) return construct_result(left, result, index=left.index, name=res_name, dtype=result.dtype) - converted = _Op.get_op(left, right, name, na_op) - - lvalues, rvalues = converted.lvalues, converted.rvalues - dtype = converted.dtype - wrap_results = converted.wrap_results - na_op = converted.na_op + elif is_timedelta64_dtype(left): + result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex) + return construct_result(left, result, + index=left.index, name=res_name, + dtype=result.dtype) + lvalues = left.values + rvalues = right if isinstance(rvalues, ABCSeries): - lvalues = getattr(lvalues, 'values', lvalues) - rvalues = getattr(rvalues, 'values', rvalues) - # _Op aligns left and right - else: - if (hasattr(lvalues, 'values') and - not isinstance(lvalues, ABCDatetimeIndex)): - lvalues = lvalues.values - - if isinstance(right, (ABCSeries, pd.Index)): - # `left` is always a Series object - res_name = _maybe_match_name(left, right) - else: - res_name = left.name + rvalues = rvalues.values - result = wrap_results(safe_na_op(lvalues, rvalues)) - res_name = _get_series_op_result_name(left, right) - return construct_result( - left, - result, - index=left.index, - name=res_name, - dtype=dtype, - ) + result = safe_na_op(lvalues, rvalues) + return construct_result(left, result, + index=left.index, name=res_name, dtype=None) return wrapper +def dispatch_to_index_op(op, left, right, index_class): + """ + Wrap Series left in the given index_class to delegate the operation op + to the index implementation. DatetimeIndex and TimedeltaIndex perform + type checking, timezone handling, overflow checks, etc. + + Parameters + ---------- + op : binary operator (operator.add, operator.sub, ...) + left : Series + right : object + index_class : DatetimeIndex or TimedeltaIndex + + Returns + ------- + result : object, usually DatetimeIndex, TimedeltaIndex, or Series + """ + left_idx = index_class(left) + + # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, + # left_idx may inherit a freq from a cached DatetimeIndex. + # See discussion in GH#19147. + if left_idx.freq is not None: + left_idx = left_idx._shallow_copy(freq=None) + try: + result = op(left_idx, right) + except NullFrequencyError: + # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError + # on add/sub of integers (or int-like). We re-raise as a TypeError. + raise TypeError('incompatible type for a datetime/timedelta ' + 'operation [{name}]'.format(name=op.__name__)) + return result + + def _get_series_op_result_name(left, right): # `left` is always a pd.Series if isinstance(right, (ABCSeries, pd.Index)): - name = _maybe_match_name(left, right) + name = com._maybe_match_name(left, right) else: name = left.name return name @@ -731,11 +729,12 @@ def _comp_method_OBJECT_ARRAY(op, x, y): return result -def _comp_method_SERIES(op, name, str_rep, masker=False): +def _comp_method_SERIES(op, name, str_rep): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ + masker = _gen_eval_kwargs(name).get('masker', False) def na_op(x, y): @@ -772,7 +771,7 @@ def na_op(x, y): if is_scalar(y): mask = isna(x) - y = libindex.convert_scalar(x, _values_from_object(y)) + y = libindex.convert_scalar(x, com._values_from_object(y)) else: mask = isna(x) | isna(y) y = y.view('i8') @@ -797,7 +796,7 @@ def wrapper(self, other, axis=None): self._get_axis_number(axis) if isinstance(other, ABCSeries): - name = _maybe_match_name(self, other) + name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) @@ -849,7 +848,7 @@ def wrapper(self, other, axis=None): .format(typ=type(other))) # always return a full value series here - res = _values_from_object(res) + res = com._values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') return res @@ -901,7 +900,7 @@ def wrapper(self, other): self, other = _align_method_SERIES(self, other, align_asobject=True) if isinstance(other, ABCSeries): - name = _maybe_match_name(self, other) + name = com._maybe_match_name(self, other) is_other_int_dtype = is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) @@ -924,109 +923,8 @@ def wrapper(self, other): return wrapper -_op_descriptions = {'add': {'op': '+', - 'desc': 'Addition', - 'reversed': False, - 'reverse': 'radd'}, - 'sub': {'op': '-', - 'desc': 'Subtraction', - 'reversed': False, - 'reverse': 'rsub'}, - 'mul': {'op': '*', - 'desc': 'Multiplication', - 'reversed': False, - 'reverse': 'rmul'}, - 'mod': {'op': '%', - 'desc': 'Modulo', - 'reversed': False, - 'reverse': 'rmod'}, - 'pow': {'op': '**', - 'desc': 'Exponential power', - 'reversed': False, - 'reverse': 'rpow'}, - 'truediv': {'op': '/', - 'desc': 'Floating division', - 'reversed': False, - 'reverse': 'rtruediv'}, - 'floordiv': {'op': '//', - 'desc': 'Integer division', - 'reversed': False, - 'reverse': 'rfloordiv'}, - 'divmod': {'op': 'divmod', - 'desc': 'Integer division and modulo', - 'reversed': False, - 'reverse': None}, - - 'eq': {'op': '==', - 'desc': 'Equal to', - 'reversed': False, - 'reverse': None}, - 'ne': {'op': '!=', - 'desc': 'Not equal to', - 'reversed': False, - 'reverse': None}, - 'lt': {'op': '<', - 'desc': 'Less than', - 'reversed': False, - 'reverse': None}, - 'le': {'op': '<=', - 'desc': 'Less than or equal to', - 'reversed': False, - 'reverse': None}, - 'gt': {'op': '>', - 'desc': 'Greater than', - 'reversed': False, - 'reverse': None}, - 'ge': {'op': '>=', - 'desc': 'Greater than or equal to', - 'reversed': False, - 'reverse': None}} - -_op_names = list(_op_descriptions.keys()) -for k in _op_names: - reverse_op = _op_descriptions[k]['reverse'] - _op_descriptions[reverse_op] = _op_descriptions[k].copy() - _op_descriptions[reverse_op]['reversed'] = True - _op_descriptions[reverse_op]['reverse'] = k - - -_flex_doc_SERIES = """ -%s of series and other, element-wise (binary operator `%s`). - -Equivalent to ``%s``, but with support to substitute a fill_value for -missing data in one of the inputs. - -Parameters ----------- -other : Series or scalar value -fill_value : None or float value, default None (NaN) - Fill missing (NaN) values with this value. If both Series are - missing, the result will be missing -level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - -Returns -------- -result : Series - -See also --------- -Series.%s -""" - - -def _flex_method_SERIES(op, name, str_rep, default_axis=None, fill_zeros=None, - **eval_kwargs): - op_name = name.replace('__', '') - op_desc = _op_descriptions[op_name] - if op_desc['reversed']: - equiv = 'other ' + op_desc['op'] + ' series' - else: - equiv = 'series ' + op_desc['op'] + ' other' - - doc = _flex_doc_SERIES % (op_desc['desc'], op_name, equiv, - op_desc['reverse']) +def _flex_method_SERIES(op, name, str_rep): + doc = _make_flex_doc(name, 'series') @Appender(doc) def flex_wrapper(self, other, level=None, fill_value=None, axis=0): @@ -1056,65 +954,11 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): series_special_funcs = dict(arith_method=_arith_method_SERIES, comp_method=_comp_method_SERIES, - bool_method=_bool_method_SERIES, - have_divmod=True) - -_arith_doc_FRAME = """ -Binary operator %s with support to substitute a fill_value for missing data in -one of the inputs + bool_method=_bool_method_SERIES) -Parameters ----------- -other : Series, DataFrame, or constant -axis : {0, 1, 'index', 'columns'} - For Series input, axis to match Series index on -fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame locations are - missing, the result will be missing -level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - -Notes ------ -Mismatched indices will be unioned together - -Returns -------- -result : DataFrame -""" - -_flex_doc_FRAME = """ -%s of dataframe and other, element-wise (binary operator `%s`). - -Equivalent to ``%s``, but with support to substitute a fill_value for -missing data in one of the inputs. - -Parameters ----------- -other : Series, DataFrame, or constant -axis : {0, 1, 'index', 'columns'} - For Series input, axis to match Series index on -fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame - locations are missing, the result will be missing -level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - -Notes ------ -Mismatched indices will be unioned together - -Returns -------- -result : DataFrame - -See also --------- -DataFrame.%s -""" +# ----------------------------------------------------------------------------- +# DataFrame def _align_method_FRAME(left, right, axis): """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ @@ -1160,8 +1004,11 @@ def to_series(right): return right -def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns', - fill_zeros=None, **eval_kwargs): +def _arith_method_FRAME(op, name, str_rep=None): + eval_kwargs = _gen_eval_kwargs(name) + fill_zeros = _gen_fill_zeros(name) + default_axis = _get_frame_op_default_axis(name) + def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -1206,15 +1053,8 @@ def na_op(x, y): return result if name in _op_descriptions: - op_name = name.replace('__', '') - op_desc = _op_descriptions[op_name] - if op_desc['reversed']: - equiv = 'other ' + op_desc['op'] + ' dataframe' - else: - equiv = 'dataframe ' + op_desc['op'] + ' other' - - doc = _flex_doc_FRAME % (op_desc['desc'], op_name, equiv, - op_desc['reverse']) + # i.e. include "add" but not "__add__" + doc = _make_flex_doc(name, 'dataframe') else: doc = _arith_doc_FRAME % name @@ -1238,9 +1078,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f -# Masker unused for now -def _flex_comp_method_FRAME(op, name, str_rep=None, default_axis='columns', - masker=False): +def _flex_comp_method_FRAME(op, name, str_rep=None): + default_axis = _get_frame_op_default_axis(name) + def na_op(x, y): try: with np.errstate(invalid='ignore'): @@ -1286,7 +1126,7 @@ def f(self, other, axis=default_axis, level=None): return f -def _comp_method_FRAME(func, name, str_rep, masker=False): +def _comp_method_FRAME(func, name, str_rep): @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other): if isinstance(other, ABCDataFrame): # Another DataFrame @@ -1315,9 +1155,10 @@ def f(self, other): bool_method=_arith_method_FRAME) -def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None, - default_axis=None, **eval_kwargs): +# ----------------------------------------------------------------------------- +# Panel +def _arith_method_PANEL(op, name, str_rep=None): # work only for scalars def f(self, other): if not is_scalar(other): @@ -1331,7 +1172,7 @@ def f(self, other): return f -def _comp_method_PANEL(op, name, str_rep=None, masker=False): +def _comp_method_PANEL(op, name, str_rep=None): def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -1377,6 +1218,122 @@ def f(self, other, axis=None): return f +def _flex_method_PANEL(op, name, str_rep=None): + eval_kwargs = _gen_eval_kwargs(name) + fill_zeros = _gen_fill_zeros(name) + + def na_op(x, y): + import pandas.core.computation.expressions as expressions + + try: + result = expressions.evaluate(op, str_rep, x, y, + errors='raise', + **eval_kwargs) + except TypeError: + result = op(x, y) + + # handles discrepancy between numpy and numexpr on division/mod + # by 0 though, given that these are generally (always?) + # non-scalars, I'm not sure whether it's worth it at the moment + result = missing.fill_zeros(result, x, y, name, fill_zeros) + return result + + if name in _op_descriptions: + doc = _make_flex_doc(name, 'panel') + else: + # doc strings substitors + doc = _agg_doc_PANEL.format(op_name=name) + + @Appender(doc) + def f(self, other, axis=0): + return self._combine(other, na_op, axis=axis) + + f.__name__ = name + return f + + panel_special_funcs = dict(arith_method=_arith_method_PANEL, comp_method=_comp_method_PANEL, bool_method=_arith_method_PANEL) + + +# ----------------------------------------------------------------------------- +# Sparse + + +def _arith_method_SPARSE_SERIES(op, name, str_rep=None): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + + str_rep is not used, but is present for compatibility. + """ + + def wrapper(self, other): + if isinstance(other, ABCDataFrame): + return NotImplemented + elif isinstance(other, ABCSeries): + if not isinstance(other, ABCSparseSeries): + other = other.to_sparse(fill_value=self.fill_value) + return _sparse_series_op(self, other, op, name) + elif is_scalar(other): + with np.errstate(all='ignore'): + new_values = op(self.values, other) + return self._constructor(new_values, + index=self.index, + name=self.name) + else: # pragma: no cover + raise TypeError('operation with {other} not supported' + .format(other=type(other))) + + wrapper.__name__ = name + if name.startswith("__"): + # strip special method names, e.g. `__add__` needs to be `add` when + # passed to _sparse_series_op + name = name[2:-2] + return wrapper + + +def _sparse_series_op(left, right, op, name): + left, right = left.align(right, join='outer', copy=False) + new_index = left.index + new_name = com._maybe_match_name(left, right) + + from pandas.core.sparse.array import _sparse_array_op + result = _sparse_array_op(left.values, right.values, op, name, + series=True) + return left._constructor(result, index=new_index, name=new_name) + + +def _arith_method_SPARSE_ARRAY(op, name, str_rep=None): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + + def wrapper(self, other): + from pandas.core.sparse.array import ( + SparseArray, _sparse_array_op, _wrap_result, _get_fill) + if isinstance(other, np.ndarray): + if len(self) != len(other): + raise AssertionError("length mismatch: {self} vs. {other}" + .format(self=len(self), other=len(other))) + if not isinstance(other, SparseArray): + dtype = getattr(other, 'dtype', None) + other = SparseArray(other, fill_value=self.fill_value, + dtype=dtype) + return _sparse_array_op(self, other, op, name) + elif is_scalar(other): + with np.errstate(all='ignore'): + fill = op(_get_fill(self), np.asarray(other)) + result = op(self.sp_values, other) + + return _wrap_result(name, result, self.sp_index, fill) + else: # pragma: no cover + raise TypeError('operation with {other} not supported' + .format(other=type(other))) + + if name.startswith("__"): + name = name[2:-2] + wrapper.__name__ = name + return wrapper diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 1c401c4854306..2cb80e938afb9 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -16,12 +16,10 @@ from pandas.core.dtypes.missing import notna import pandas.core.ops as ops -import pandas.core.missing as missing +import pandas.core.common as com from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict) from pandas.compat.numpy import function as nv -from pandas.core.common import (_try_sort, _default_index, _all_not_none, - _any_not_none, _apply_if_callable) from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -31,7 +29,6 @@ from pandas.core.internals import (BlockManager, create_block_manager_from_arrays, create_block_manager_from_blocks) -from pandas.core.ops import _op_descriptions from pandas.core.series import Series from pandas.core.reshape.util import cartesian_product from pandas.util._decorators import Appender @@ -151,7 +148,7 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, "http://xarray.pydata.org/en/stable/.\n" "Pandas provides a `.to_xarray()` method to help " "automate this conversion.\n", - DeprecationWarning, stacklevel=3) + FutureWarning, stacklevel=3) self._init_data(data=data, items=items, major_axis=major_axis, minor_axis=minor_axis, copy=copy, dtype=dtype) @@ -174,7 +171,7 @@ def _init_data(self, data, copy, dtype, **kwargs): axes = None if isinstance(data, BlockManager): - if _any_not_none(*passed_axes): + if com._any_not_none(*passed_axes): axes = [x if x is not None else y for x, y in zip(passed_axes, data.axes)] mgr = data @@ -186,7 +183,7 @@ def _init_data(self, data, copy, dtype, **kwargs): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None - elif is_scalar(data) and _all_not_none(*passed_axes): + elif is_scalar(data) and com._all_not_none(*passed_axes): values = cast_scalar_to_array([len(x) for x in passed_axes], data, dtype=dtype) mgr = self._init_matrix(values, passed_axes, dtype=values.dtype, @@ -209,7 +206,7 @@ def _init_dict(self, data, axes, dtype=None): else: ks = list(data.keys()) if not isinstance(data, OrderedDict): - ks = _try_sort(ks) + ks = com._try_sort(ks) haxis = Index(ks) for k, v in compat.iteritems(data): @@ -287,7 +284,7 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): return cls(**d) def __getitem__(self, key): - key = _apply_if_callable(key, self) + key = com._apply_if_callable(key, self) if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) @@ -325,7 +322,7 @@ def _init_matrix(self, data, axes, dtype=None, copy=False): fixed_axes = [] for i, ax in enumerate(axes): if ax is None: - ax = _default_index(shape[i]) + ax = com._default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) @@ -477,8 +474,7 @@ def as_matrix(self): # Getting and setting elements def get_value(self, *args, **kwargs): - """ - Quickly retrieve single value at (item, major, minor) location + """Quickly retrieve single value at (item, major, minor) location .. deprecated:: 0.21.0 @@ -525,8 +521,7 @@ def _get_value(self, *args, **kwargs): _get_value.__doc__ = get_value.__doc__ def set_value(self, *args, **kwargs): - """ - Quickly set single value at (item, major, minor) location + """Quickly set single value at (item, major, minor) location .. deprecated:: 0.21.0 @@ -603,7 +598,7 @@ def _box_item_values(self, key, values): return self._constructor_sliced(values, **d) def __setitem__(self, key, value): - key = _apply_if_callable(key, self) + key = com._apply_if_callable(key, self) shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): value = value.reindex( @@ -1525,89 +1520,6 @@ def _extract_axis(self, data, axis=0, intersect=False): return _ensure_index(index) - @classmethod - def _add_aggregate_operations(cls, use_numexpr=True): - """ add the operations to the cls; evaluate the doc strings again """ - - def _panel_arith_method(op, name, str_rep=None, default_axis=None, - fill_zeros=None, **eval_kwargs): - def na_op(x, y): - import pandas.core.computation.expressions as expressions - - try: - result = expressions.evaluate(op, str_rep, x, y, - errors='raise', - **eval_kwargs) - except TypeError: - result = op(x, y) - - # handles discrepancy between numpy and numexpr on division/mod - # by 0 though, given that these are generally (always?) - # non-scalars, I'm not sure whether it's worth it at the moment - result = missing.fill_zeros(result, x, y, name, fill_zeros) - return result - - if name in _op_descriptions: - op_name = name.replace('__', '') - op_desc = _op_descriptions[op_name] - if op_desc['reversed']: - equiv = 'other ' + op_desc['op'] + ' panel' - else: - equiv = 'panel ' + op_desc['op'] + ' other' - - _op_doc = """ -{desc} of series and other, element-wise (binary operator `{op_name}`). -Equivalent to ``{equiv}``. - -Parameters ----------- -other : {construct} or {cls_name} -axis : {{{axis_order}}} - Axis to broadcast over - -Returns -------- -{cls_name} - -See also --------- -{cls_name}.{reverse}\n""" - doc = _op_doc.format( - desc=op_desc['desc'], op_name=op_name, equiv=equiv, - construct=cls._constructor_sliced.__name__, - cls_name=cls.__name__, reverse=op_desc['reverse'], - axis_order=', '.join(cls._AXIS_ORDERS)) - else: - # doc strings substitors - _agg_doc = """ - Wrapper method for {wrp_method} - - Parameters - ---------- - other : {construct} or {cls_name} - axis : {{{axis_order}}} - Axis to broadcast over - - Returns - ------- - {cls_name}\n""" - doc = _agg_doc.format( - construct=cls._constructor_sliced.__name__, - cls_name=cls.__name__, wrp_method=name, - axis_order=', '.join(cls._AXIS_ORDERS)) - - @Appender(doc) - def f(self, other, axis=0): - return self._combine(other, na_op, axis=axis) - - f.__name__ = name - return f - - # add `div`, `mul`, `pow`, etc.. - ops.add_flex_arithmetic_methods( - cls, _panel_arith_method, use_numexpr=use_numexpr, - flex_comp_method=ops._comp_method_PANEL) - Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, stat_axis=1, aliases={'major': 'major_axis', @@ -1616,7 +1528,8 @@ def f(self, other, axis=0): 'minor_axis': 'columns'}) ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) -Panel._add_aggregate_operations() +ops.add_flex_arithmetic_methods(Panel, ops._flex_method_PANEL, + flex_comp_method=ops._comp_method_PANEL) Panel._add_numeric_operations() diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 5447ce7470b9d..706bec9e44892 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -5,7 +5,7 @@ from textwrap import dedent import pandas as pd -from pandas.core.base import AbstractMethodError, GroupByMixin +from pandas.core.base import GroupByMixin from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy, SeriesGroupBy, groupby, PanelGroupBy, @@ -233,7 +233,7 @@ def _convert_obj(self, obj): return obj def _get_binner_for_time(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _set_binner(self): """ @@ -372,10 +372,10 @@ def transform(self, arg, *args, **kwargs): arg, *args, **kwargs) def _downsample(self, f): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _upsample(self, f, limit=None, fill_value=None): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _gotitem(self, key, ndim, subset=None): """ @@ -1061,6 +1061,17 @@ class TimeGrouper(Grouper): def __init__(self, freq='Min', closed=None, label=None, how='mean', axis=0, fill_method=None, limit=None, loffset=None, kind=None, convention=None, base=0, **kwargs): + # Check for correctness of the keyword arguments which would + # otherwise silently use the default if misspelled + if label not in {None, 'left', 'right'}: + raise ValueError('Unsupported value {} for `label`'.format(label)) + if closed not in {None, 'left', 'right'}: + raise ValueError('Unsupported value {} for `closed`'.format( + closed)) + if convention not in {None, 'start', 'end', 'e', 's'}: + raise ValueError('Unsupported value {} for `convention`' + .format(convention)) + freq = to_offset(freq) end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W']) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index aaadf6d3ca32f..20f4384a3d698 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -7,8 +7,8 @@ from pandas.core.index import (_get_objs_combined_axis, _ensure_index, _get_consensus_names, _all_indexes_same) -from pandas.core.categorical import (_factorize_from_iterable, - _factorize_from_iterables) +from pandas.core.arrays.categorical import (_factorize_from_iterable, + _factorize_from_iterables) from pandas.core.internals import concatenate_block_managers from pandas.core import common as com from pandas.core.generic import NDFrame diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index b648c426a877f..01445eb30a9e5 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -4,7 +4,7 @@ from pandas.core.dtypes.common import is_list_like from pandas import compat -from pandas.core.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.dtypes.generic import ABCMultiIndex @@ -80,8 +80,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, mdata[col] = np.asanyarray(frame.columns ._get_level_values(i)).repeat(N) - from pandas import DataFrame - return DataFrame(mdata, columns=mcolumns) + return frame._constructor(mdata, columns=mcolumns) def lreshape(data, groups, dropna=True, label=None): @@ -152,8 +151,7 @@ def lreshape(data, groups, dropna=True, label=None): if not mask.all(): mdata = {k: v[mask] for k, v in compat.iteritems(mdata)} - from pandas import DataFrame - return DataFrame(mdata, columns=id_cols + pivot_cols) + return data._constructor(mdata, columns=id_cols + pivot_cols) def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 455c6f42ac74a..99ea2c4fe4688 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -10,7 +10,7 @@ from pandas.compat import range, lzip, zip, map, filter import pandas.compat as compat -from pandas import (Categorical, Series, DataFrame, +from pandas import (Categorical, DataFrame, Index, MultiIndex, Timedelta) from pandas.core.frame import _merge_doc from pandas.core.dtypes.common import ( @@ -18,6 +18,7 @@ is_datetime64_dtype, needs_i8_conversion, is_int64_dtype, + is_array_like, is_categorical_dtype, is_integer_dtype, is_float_dtype, @@ -193,19 +194,17 @@ def merge_ordered(left, right, on=None, 5 e 3 b >>> merge_ordered(A, B, fill_method='ffill', left_by='group') - key lvalue group rvalue - 0 a 1 a NaN - 1 b 1 a 1 - 2 c 2 a 2 - 3 d 2 a 3 - 4 e 3 a 3 - 5 f 3 a 4 - 6 a 1 b NaN - 7 b 1 b 1 - 8 c 2 b 2 - 9 d 2 b 3 - 10 e 3 b 3 - 11 f 3 b 4 + group key lvalue rvalue + 0 a a 1 NaN + 1 a b 1 1.0 + 2 a c 2 2.0 + 3 a d 2 3.0 + 4 a e 3 3.0 + 5 b a 1 NaN + 6 b b 1 1.0 + 7 b c 2 2.0 + 8 b d 2 3.0 + 9 b e 3 3.0 Returns ------- @@ -814,12 +813,12 @@ def _get_merge_keys(self): join_names = [] right_drop = [] left_drop = [] + left, right = self.left, self.right + stacklevel = 5 # Number of stack levels from df.merge - is_lkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(left) - is_rkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(right) + is_lkey = lambda x: is_array_like(x) and len(x) == len(left) + is_rkey = lambda x: is_array_like(x) and len(x) == len(right) # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A # user could, for example, request 'left_index' and 'left_by'. In a @@ -842,7 +841,8 @@ def _get_merge_keys(self): else: if rk is not None: right_keys.append( - right._get_label_or_level_values(rk)) + right._get_label_or_level_values( + rk, stacklevel=stacklevel)) join_names.append(rk) else: # work-around for merge_asof(right_index=True) @@ -852,7 +852,8 @@ def _get_merge_keys(self): if not is_rkey(rk): if rk is not None: right_keys.append( - right._get_label_or_level_values(rk)) + right._get_label_or_level_values( + rk, stacklevel=stacklevel)) else: # work-around for merge_asof(right_index=True) right_keys.append(right.index) @@ -865,7 +866,8 @@ def _get_merge_keys(self): else: right_keys.append(rk) if lk is not None: - left_keys.append(left._get_label_or_level_values(lk)) + left_keys.append(left._get_label_or_level_values( + lk, stacklevel=stacklevel)) join_names.append(lk) else: # work-around for merge_asof(left_index=True) @@ -877,7 +879,8 @@ def _get_merge_keys(self): left_keys.append(k) join_names.append(None) else: - left_keys.append(left._get_label_or_level_values(k)) + left_keys.append(left._get_label_or_level_values( + k, stacklevel=stacklevel)) join_names.append(k) if isinstance(self.right.index, MultiIndex): right_keys = [lev._values.take(lab) @@ -891,7 +894,8 @@ def _get_merge_keys(self): right_keys.append(k) join_names.append(None) else: - right_keys.append(right._get_label_or_level_values(k)) + right_keys.append(right._get_label_or_level_values( + k, stacklevel=stacklevel)) join_names.append(k) if isinstance(self.left.index, MultiIndex): left_keys = [lev._values.take(lab) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 77babf718d78c..0e92fc4edce85 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -75,7 +75,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', for key in keys: try: values = values.drop(key) - except (TypeError, ValueError): + except (TypeError, ValueError, KeyError): pass values = list(values) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 1ca014baa9ec8..c8bca476c65f2 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -21,7 +21,8 @@ from pandas.core.sparse.array import SparseArray from pandas._libs.sparse import IntIndex -from pandas.core.categorical import Categorical, _factorize_from_iterable +from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import _factorize_from_iterable from pandas.core.sorting import (get_group_index, get_compressed_ids, compress_group_index, decons_obs_group_ids) @@ -37,8 +38,23 @@ class _Unstacker(object): Parameters ---------- + values : ndarray + Values of DataFrame to "Unstack" + index : object + Pandas ``Index`` level : int or str, default last level Level to "unstack". Accepts a name for the level. + value_columns : Index, optional + Pandas ``Index`` or ``MultiIndex`` object if unstacking a DataFrame + fill_value : scalar, optional + Default value to fill in missing values if subgroups do not have the + same set of labels. By default, missing values will be replaced with + the default fill value for that data type, NaN for float, NaT for + datetimelike, etc. For integer types, by default data will converted to + float and missing values will be set to NaN. + constructor : object + Pandas ``DataFrame`` or subclass used to create unstacked + response. If None, DataFrame or SparseDataFrame will be used. Examples -------- @@ -69,7 +85,7 @@ class _Unstacker(object): """ def __init__(self, values, index, level=-1, value_columns=None, - fill_value=None): + fill_value=None, constructor=None): self.is_categorical = None self.is_sparse = is_sparse(values) @@ -86,21 +102,30 @@ def __init__(self, values, index, level=-1, value_columns=None, self.value_columns = value_columns self.fill_value = fill_value + if constructor is None: + if self.is_sparse: + self.constructor = SparseDataFrame + else: + self.constructor = DataFrame + else: + self.constructor = constructor + if value_columns is None and values.shape[1] != 1: # pragma: no cover raise ValueError('must pass column labels for multi-column data') - self.index = index + self.index = index.remove_unused_levels() self.level = self.index._get_level_number(level) # when index includes `nan`, need to lift levels/strides by 1 self.lift = 1 if -1 in self.index.labels[self.level] else 0 - self.new_index_levels = list(index.levels) - self.new_index_names = list(index.names) + self.new_index_levels = list(self.index.levels) + self.new_index_names = list(self.index.names) self.removed_name = self.new_index_names.pop(self.level) self.removed_level = self.new_index_levels.pop(self.level) + self.removed_level_full = index.levels[self.level] self._make_sorted_values_labels() self._make_selectors() @@ -150,21 +175,10 @@ def _make_selectors(self): self.compressor = comp_index.searchsorted(np.arange(ngroups)) def get_result(self): - # TODO: find a better way than this masking business - - values, value_mask = self.get_new_values() + values, _ = self.get_new_values() columns = self.get_new_columns() index = self.get_new_index() - # filter out missing levels - if values.shape[1] > 0: - col_inds, obs_ids = compress_group_index(self.sorted_labels[-1]) - # rare case, level values not observed - if len(obs_ids) < self.full_shape[1]: - inds = (value_mask.sum(0) > 0).nonzero()[0] - values = algos.take_nd(values, inds, axis=1) - columns = columns[inds] - # may need to coerce categoricals here if self.is_categorical is not None: categories = self.is_categorical.categories @@ -173,8 +187,7 @@ def get_result(self): ordered=ordered) for i in range(values.shape[-1])] - klass = SparseDataFrame if self.is_sparse else DataFrame - return klass(values, index=index, columns=columns) + return self.constructor(values, index=index, columns=columns) def get_new_values(self): values = self.values @@ -253,17 +266,28 @@ def get_new_columns(self): width = len(self.value_columns) propagator = np.repeat(np.arange(width), stride) if isinstance(self.value_columns, MultiIndex): - new_levels = self.value_columns.levels + (self.removed_level,) + new_levels = self.value_columns.levels + (self.removed_level_full,) new_names = self.value_columns.names + (self.removed_name,) new_labels = [lab.take(propagator) for lab in self.value_columns.labels] else: - new_levels = [self.value_columns, self.removed_level] + new_levels = [self.value_columns, self.removed_level_full] new_names = [self.value_columns.name, self.removed_name] new_labels = [propagator] - new_labels.append(np.tile(np.arange(stride) - self.lift, width)) + # The two indices differ only if the unstacked level had unused items: + if len(self.removed_level_full) != len(self.removed_level): + # In this case, we remap the new labels to the original level: + repeater = self.removed_level_full.get_indexer(self.removed_level) + if self.lift: + repeater = np.insert(repeater, 0, -1) + else: + # Otherwise, we just use each level item exactly once: + repeater = np.arange(stride) - self.lift + + # The entire level is then just a repetition of the single chunk: + new_labels.append(np.tile(repeater, width)) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False) @@ -374,8 +398,9 @@ def pivot(self, index=None, columns=None, values=None): index = self.index else: index = self[index] - indexed = Series(self[values].values, - index=MultiIndex.from_arrays([index, self[columns]])) + indexed = self._constructor_sliced( + self[values].values, + index=MultiIndex.from_arrays([index, self[columns]])) return indexed.unstack(columns) @@ -461,7 +486,8 @@ def unstack(obj, level, fill_value=None): return obj.T.stack(dropna=False) else: unstacker = _Unstacker(obj.values, obj.index, level=level, - fill_value=fill_value) + fill_value=fill_value, + constructor=obj._constructor_expanddim) return unstacker.get_result() @@ -470,12 +496,12 @@ def _unstack_frame(obj, level, fill_value=None): unstacker = partial(_Unstacker, index=obj.index, level=level, fill_value=fill_value) blocks = obj._data.unstack(unstacker) - klass = type(obj) - return klass(blocks) + return obj._constructor(blocks) else: unstacker = _Unstacker(obj.values, obj.index, level=level, value_columns=obj.columns, - fill_value=fill_value) + fill_value=fill_value, + constructor=obj._constructor) return unstacker.get_result() @@ -528,8 +554,7 @@ def factorize(index): new_values = new_values[mask] new_index = new_index[mask] - klass = type(frame)._constructor_sliced - return klass(new_values, index=new_index) + return frame._constructor_sliced(new_values, index=new_index) def stack_multiple(frame, level, dropna=True): @@ -628,7 +653,11 @@ def _convert_level_number(level_num, columns): levsize = len(level_labels) drop_cols = [] for key in unique_groups: - loc = this.columns.get_loc(key) + try: + loc = this.columns.get_loc(key) + except KeyError: + drop_cols.append(key) + continue # can make more efficient? # we almost always return a slice @@ -639,10 +668,7 @@ def _convert_level_number(level_num, columns): else: slice_len = loc.stop - loc.start - if slice_len == 0: - drop_cols.append(key) - continue - elif slice_len != levsize: + if slice_len != levsize: chunk = this.loc[:, this.columns[loc]] chunk.columns = level_vals.take(chunk.columns.labels[-1]) value_slice = chunk.reindex(columns=level_vals_used).values @@ -675,7 +701,7 @@ def _convert_level_number(level_num, columns): new_index = MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False) - result = DataFrame(new_data, index=new_index, columns=new_columns) + result = frame._constructor(new_data, index=new_index, columns=new_columns) # more efficient way to go about this? can do the whole masking biz but # will only save a small amount of time... diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 2adf17a227a59..777f08bd9db2b 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -348,8 +348,7 @@ def _format_labels(bins, precision, right=True, # account that we are all right closed v = adjust(labels[0].left) - i = IntervalIndex.from_intervals( - [Interval(v, labels[0].right, closed='right')]) + i = IntervalIndex([Interval(v, labels[0].right, closed='right')]) labels = i.append(labels[1:]) return labels diff --git a/pandas/core/series.py b/pandas/core/series.py index 5d8092fd30496..78b4c3a70a519 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,6 +13,7 @@ import numpy as np import numpy.ma as ma +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.common import ( is_categorical_dtype, is_bool, @@ -38,22 +39,12 @@ construct_1d_arraylike_from_scalar) from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike -from pandas.core.common import (is_bool_indexer, - _default_index, - _asarray_tuplesafe, - _values_from_object, - _maybe_match_name, - SettingWithCopyError, - _maybe_box_datetimelike, - standardize_mapping, - _any_none) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices from pandas.core import generic, base from pandas.core.internals import SingleBlockManager -from pandas.core.categorical import Categorical, CategoricalAccessor -import pandas.core.strings as strings +from pandas.core.arrays.categorical import Categorical, CategoricalAccessor from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex @@ -64,7 +55,6 @@ zip, u, OrderedDict, StringIO, range, get_range_parameters) from pandas.compat.numpy import function as nv -from pandas.core import accessor import pandas.core.ops as ops import pandas.core.algorithms as algorithms @@ -77,6 +67,7 @@ from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option +from pandas.core.strings import StringMethods import pandas.plotting._core as gfx @@ -93,8 +84,10 @@ # see gh-16971 def remove_na(arr): - """ - DEPRECATED : this function will be removed in a future version. + """Remove null values from array like structure. + + .. deprecated:: 0.21.0 + Use s[s.notnull()] instead. """ warnings.warn("remove_na is deprecated and is a private " @@ -151,7 +144,6 @@ class Series(base.IndexOpsMixin, generic.NDFrame): _deprecations = generic.NDFrame._deprecations | frozenset( ['asobject', 'sortlevel', 'reshape', 'get_value', 'set_value', 'from_csv', 'valid']) - _allow_index_ops = True def __init__(self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False): @@ -204,8 +196,13 @@ def __init__(self, data=None, index=None, dtype=None, name=None, elif isinstance(data, SingleBlockManager): if index is None: index = data.index - else: - data = data.reindex(index, copy=copy) + elif not data.index.equals(index) or copy: + # GH#19275 SingleBlockManager input should only be called + # internally + raise AssertionError('Cannot pass both SingleBlockManager ' + '`data` argument and a different ' + '`index` argument. `copy` must ' + 'be False.') elif isinstance(data, Categorical): # GH12574: Allow dtype=category only, otherwise error if ((dtype is not None) and @@ -228,7 +225,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, if index is None: if not is_list_like(data): data = [data] - index = _default_index(len(data)) + index = com._default_index(len(data)) # create/copy the manager if isinstance(data, SingleBlockManager): @@ -290,8 +287,10 @@ def _init_dict(self, data, index=None, dtype=None): @classmethod def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, fastpath=False): - """ - DEPRECATED: use the pd.Series(..) constructor instead. + """Construct Series from array. + + .. deprecated :: 0.23.0 + Use pd.Series(..) constructor instead. """ warnings.warn("'from_array' is deprecated and will be removed in a " @@ -450,9 +449,11 @@ def get_values(self): @property def asobject(self): - """DEPRECATED: Use ``astype(object)`` instead. + """Return object Series which contains boxed values. + + .. deprecated :: 0.23.0 + Use ``astype(object) instead. - return object Series which contains boxed values *this is an internal non-public method* """ @@ -484,7 +485,7 @@ def compress(self, condition, *args, **kwargs): def nonzero(self): """ - Return the indices of the elements that are non-zero + Return the *integer* indices of the elements that are non-zero This method is equivalent to calling `numpy.nonzero` on the series data. For compatibility with NumPy, the return value is @@ -502,6 +503,15 @@ def nonzero(self): 3 4 dtype: int64 + >>> s = pd.Series([0, 3, 0, 4], index=['a', 'b', 'c', 'd']) + # same return although index of s is different + >>> s.nonzero() + (array([1, 3]),) + >>> s.iloc[s.nonzero()[0]] + b 3 + d 4 + dtype: int64 + See Also -------- numpy.nonzero @@ -682,7 +692,7 @@ def __getitem__(self, key): pass elif key is Ellipsis: return self - elif is_bool_indexer(key): + elif com.is_bool_indexer(key): pass else: @@ -756,7 +766,7 @@ def _get_with(self, key): def _get_values_tuple(self, key): # mpl hackaround - if _any_none(*key): + if com._any_none(*key): return self._get_values(key) if not isinstance(self.index, MultiIndex): @@ -781,7 +791,7 @@ def setitem(key, value): try: self._set_with_engine(key, value) return - except (SettingWithCopyError): + except com.SettingWithCopyError: raise except (KeyError, ValueError): values = self._values @@ -881,7 +891,7 @@ def _set_labels(self, key, value): if isinstance(key, Index): key = key.values else: - key = _asarray_tuplesafe(key) + key = com._asarray_tuplesafe(key) indexer = self.index.get_indexer(key) mask = indexer == -1 if mask.any(): @@ -911,12 +921,10 @@ def repeat(self, repeats, *args, **kwargs): index=new_index).__finalize__(self) def get_value(self, label, takeable=False): - """ - Quickly retrieve single value at passed index label + """Quickly retrieve single value at passed index label .. deprecated:: 0.21.0 - - Please use .at[] or .iat[] accessors. + Please use .at[] or .iat[] accessors. Parameters ---------- @@ -935,19 +943,17 @@ def get_value(self, label, takeable=False): def _get_value(self, label, takeable=False): if takeable is True: - return _maybe_box_datetimelike(self._values[label]) + return com._maybe_box_datetimelike(self._values[label]) return self.index.get_value(self._values, label) _get_value.__doc__ = get_value.__doc__ def set_value(self, label, value, takeable=False): - """ - Quickly set single value at passed label. If label is not contained, a - new object is created with the label placed at the end of the result - index + """Quickly set single value at passed label. If label is not contained, + a new object is created with the label placed at the end of the result + index. .. deprecated:: 0.21.0 - - Please use .at[] or .iat[] accessors. + Please use .at[] or .iat[] accessors. Parameters ---------- @@ -1037,7 +1043,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): """ inplace = validate_bool_kwarg(inplace, 'inplace') if drop: - new_index = _default_index(len(self)) + new_index = com._default_index(len(self)) if level is not None and isinstance(self.index, MultiIndex): if not isinstance(level, (tuple, list)): level = [level] @@ -1180,7 +1186,7 @@ def to_dict(self, into=dict): defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) """ # GH16122 - into_c = standardize_mapping(into) + into_c = com.standardize_mapping(into) return into_c(compat.iteritems(self)) def to_frame(self, name=None): @@ -1258,7 +1264,7 @@ def count(self, level=None): from pandas.core.index import _get_na_value if level is None: - return notna(_values_from_object(self)).sum() + return notna(com._values_from_object(self)).sum() if isinstance(level, compat.string_types): level = self.index._get_level_number(level) @@ -1340,7 +1346,7 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs): numpy.ndarray.argmin """ skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) - i = nanops.nanargmin(_values_from_object(self), skipna=skipna) + i = nanops.nanargmin(com._values_from_object(self), skipna=skipna) if i == -1: return np.nan return self.index[i] @@ -1376,19 +1382,19 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs): numpy.ndarray.argmax """ skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) - i = nanops.nanargmax(_values_from_object(self), skipna=skipna) + i = nanops.nanargmax(com._values_from_object(self), skipna=skipna) if i == -1: return np.nan return self.index[i] # ndarray compat - argmin = deprecate('argmin', idxmin, + argmin = deprecate('argmin', idxmin, '0.21.0', msg="'argmin' is deprecated, use 'idxmin' instead. " "The behavior of 'argmin' will be corrected to " "return the positional minimum in the future. " "Use 'series.values.argmin' to get the position of " "the minimum now.") - argmax = deprecate('argmax', idxmax, + argmax = deprecate('argmax', idxmax, '0.21.0', msg="'argmax' is deprecated, use 'idxmax' instead. " "The behavior of 'argmax' will be corrected to " "return the positional maximum in the future. " @@ -1417,7 +1423,7 @@ def round(self, decimals=0, *args, **kwargs): """ nv.validate_round(args, kwargs) - result = _values_from_object(self).round(decimals) + result = com._values_from_object(self).round(decimals) result = self._constructor(result, index=self.index).__finalize__(self) return result @@ -1534,7 +1540,7 @@ def diff(self, periods=1): ------- diffed : Series """ - result = algorithms.diff(_values_from_object(self), periods) + result = algorithms.diff(com._values_from_object(self), periods) return self._constructor(result, index=self.index).__finalize__(self) def autocorr(self, lag=1): @@ -1735,7 +1741,7 @@ def _binop(self, other, func, level=None, fill_value=None): with np.errstate(all='ignore'): result = func(this_vals, other_vals) - name = _maybe_match_name(self, other) + name = com._maybe_match_name(self, other) result = self._constructor(result, index=new_index, name=name) result = result.__finalize__(self) if name is None: @@ -1776,7 +1782,7 @@ def combine(self, other, func, fill_value=np.nan): """ if isinstance(other, Series): new_index = self.index.union(other.index) - new_name = _maybe_match_name(self, other) + new_name = com._maybe_match_name(self, other) new_values = np.empty(len(new_index), dtype=self.dtype) for i, idx in enumerate(new_index): lv = self.get(idx, fill_value) @@ -1821,7 +1827,7 @@ def combine_first(self, other): this = self.reindex(new_index, copy=False) other = other.reindex(new_index, copy=False) # TODO: do we need name? - name = _maybe_match_name(self, other) # noqa + name = com._maybe_match_name(self, other) # noqa rs_vals = com._where_compat(isna(this), other._values, this._values) return self._constructor(rs_vals, index=new_index).__finalize__(self) @@ -1909,7 +1915,7 @@ def _try_kind_sort(arr): bad = isna(arr) good = ~bad - idx = _default_index(len(self)) + idx = com._default_index(len(self)) argsorted = _try_kind_sort(arr[good]) @@ -2120,12 +2126,12 @@ def nsmallest(self, n=5, keep='first'): return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() def sortlevel(self, level=0, ascending=True, sort_remaining=True): - """ - DEPRECATED: use :meth:`Series.sort_index` - - Sort Series with MultiIndex by chosen level. Data will be + """Sort Series with MultiIndex by chosen level. Data will be lexicographically sorted by the chosen level followed by the other - levels (in order) + levels (in order), + + .. deprecated:: 0.20.0 + Use :meth:`Series.sort_index` Parameters ---------- @@ -2670,7 +2676,12 @@ def shift(self, periods=1, freq=None, axis=0): return super(Series, self).shift(periods=periods, freq=freq, axis=axis) def reindex_axis(self, labels, axis=0, **kwargs): - """ for compatibility with higher dims """ + """Conform Series to new index with optional filling logic. + + .. deprecated:: 0.21.0 + Use ``Series.reindex`` instead. + """ + # for compatibility with higher dims if axis != 0: raise ValueError("cannot reindex series on non-zero axis!") msg = ("'.reindex_axis' is deprecated and will be removed in a future " @@ -2777,7 +2788,7 @@ def isin(self, values): dtype: bool """ - result = algorithms.isin(_values_from_object(self), values) + result = algorithms.isin(com._values_from_object(self), values) return self._constructor(result, index=self.index).__finalize__(self) def between(self, left, right, inclusive=True): @@ -2808,9 +2819,10 @@ def between(self, left, right, inclusive=True): @classmethod def from_csv(cls, path, sep=',', parse_dates=True, header=None, index_col=0, encoding=None, infer_datetime_format=False): - """ - Read CSV file (DEPRECATED, please use :func:`pandas.read_csv` - instead). + """Read CSV file. + + .. deprecated:: 0.21.0 + Use :func:`pandas.read_csv` instead. It is preferable to use the more powerful :func:`pandas.read_csv` for most general purposes, but ``from_csv`` makes for an easy @@ -2873,7 +2885,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, def to_csv(self, path=None, index=True, sep=",", na_rep='', float_format=None, header=False, index_label=None, - mode='w', encoding=None, date_format=None, decimal='.'): + mode='w', encoding=None, compression=None, date_format=None, + decimal='.'): """ Write Series to a comma-separated values (csv) file @@ -2900,6 +2913,10 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', encoding : string, optional a string representing the encoding to use if the contents are non-ascii, for python versions prior to 3 + compression : string, optional + a string representing the compression to use in the output file, + allowed values are 'gzip', 'bz2', 'xz', only used when the first + argument is a filename date_format: string, default None Format string for datetime objects. decimal: string, default '.' @@ -2912,8 +2929,8 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep, float_format=float_format, header=header, index_label=index_label, mode=mode, - encoding=encoding, date_format=date_format, - decimal=decimal) + encoding=encoding, compression=compression, + date_format=date_format, decimal=decimal) if path is None: return result @@ -2978,8 +2995,10 @@ def dropna(self, axis=0, inplace=False, **kwargs): return self.copy() def valid(self, inplace=False, **kwargs): - """DEPRECATED. Series.valid will be removed in a future version. - Use :meth:`Series.dropna` instead. + """Return Series without null values. + + .. deprecated:: 0.23.0 + Use :meth:`Series.dropna` instead. """ warnings.warn("Method .valid will be removed in a future version. " "Use .dropna instead.", FutureWarning, stacklevel=2) @@ -3059,21 +3078,16 @@ def to_period(self, freq=None, copy=True): return self._constructor(new_values, index=new_index).__finalize__(self) - # ------------------------------------------------------------------------- - # Datetimelike delegation methods - dt = accessor.AccessorProperty(CombinedDatetimelikeProperties) - - # ------------------------------------------------------------------------- - # Categorical methods - cat = accessor.AccessorProperty(CategoricalAccessor) - - # String Methods - str = accessor.AccessorProperty(strings.StringMethods) + # ---------------------------------------------------------------------- + # Accessor Methods + # ---------------------------------------------------------------------- + str = CachedAccessor("str", StringMethods) + dt = CachedAccessor("dt", CombinedDatetimelikeProperties) + cat = CachedAccessor("cat", CategoricalAccessor) + plot = CachedAccessor("plot", gfx.SeriesPlotMethods) # ---------------------------------------------------------------------- # Add plotting methods to Series - plot = accessor.AccessorProperty(gfx.SeriesPlotMethods, - gfx.SeriesPlotMethods) hist = gfx.hist_series @@ -3243,7 +3257,7 @@ def _try_cast(arr, take_fast_path): if isinstance(data, np.ndarray): raise Exception('Data must be 1-dimensional') else: - subarr = _asarray_tuplesafe(data, dtype=dtype) + subarr = com._asarray_tuplesafe(data, dtype=dtype) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 27252b9616a44..e550976d1deeb 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -182,7 +182,7 @@ def indexer_from_factorized(labels, shape, compress=True): def lexsort_indexer(keys, orders=None, na_position='last'): - from pandas.core.categorical import Categorical + from pandas.core.arrays import Categorical labels = [] shape = [] diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 9b2650359bf68..fa07400a0706e 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -14,8 +14,7 @@ from pandas.compat import range from pandas.compat.numpy import function as nv -from pandas.core.dtypes.generic import ( - ABCSparseArray, ABCSparseSeries) +from pandas.core.dtypes.generic import ABCSparseSeries from pandas.core.dtypes.common import ( _ensure_platform_int, is_float, is_integer, @@ -43,39 +42,6 @@ _sparray_doc_kwargs = dict(klass='SparseArray') -def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, - **eval_kwargs): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - """ - - def wrapper(self, other): - if isinstance(other, np.ndarray): - if len(self) != len(other): - raise AssertionError("length mismatch: {self} vs. {other}" - .format(self=len(self), other=len(other))) - if not isinstance(other, ABCSparseArray): - dtype = getattr(other, 'dtype', None) - other = SparseArray(other, fill_value=self.fill_value, - dtype=dtype) - return _sparse_array_op(self, other, op, name) - elif is_scalar(other): - with np.errstate(all='ignore'): - fill = op(_get_fill(self), np.asarray(other)) - result = op(self.sp_values, other) - - return _wrap_result(name, result, self.sp_index, fill) - else: # pragma: no cover - raise TypeError('operation with {other} not supported' - .format(other=type(other))) - - if name.startswith("__"): - name = name[2:-2] - wrapper.__name__ = name - return wrapper - - def _get_fill(arr): # coerce fill_value to arr dtype if possible # int64 SparseArray can have NaN as fill_value if there is no missing @@ -864,7 +830,7 @@ def _make_index(length, indices, kind): return index -ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method, - comp_method=_arith_method, - bool_method=_arith_method, - use_numexpr=False) +ops.add_special_arithmetic_methods(SparseArray, + arith_method=ops._arith_method_SPARSE_ARRAY, + comp_method=ops._arith_method_SPARSE_ARRAY, + bool_method=ops._arith_method_SPARSE_ARRAY) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 05f39a8caa6f6..99bf0d5b7ac51 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -14,12 +14,10 @@ from pandas.core.dtypes.cast import maybe_upcast, find_common_type from pandas.core.dtypes.common import _ensure_platform_int, is_scipy_sparse -from pandas.core.common import _try_sort from pandas.compat.numpy import function as nv from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.series import Series -from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray, - _default_index) +from pandas.core.frame import DataFrame, extract_index, _prep_ndarray import pandas.core.algorithms as algos from pandas.core.internals import (BlockManager, create_block_manager_from_arrays) @@ -28,7 +26,7 @@ from pandas._libs.sparse import BlockIndex, get_blocks from pandas.util._decorators import Appender import pandas.core.ops as ops - +import pandas.core.common as com _shared_doc_kwargs = dict(klass='SparseDataFrame') @@ -97,6 +95,9 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, dtype=dtype, copy=copy) elif isinstance(data, DataFrame): mgr = self._init_dict(data, data.index, data.columns, dtype=dtype) + elif isinstance(data, Series): + mgr = self._init_dict(data.to_frame(), data.index, + columns=None, dtype=dtype) elif isinstance(data, BlockManager): mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) @@ -118,6 +119,10 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, mgr = to_manager(data, columns, index) if dtype is not None: mgr = mgr.astype(dtype) + else: + msg = ('SparseDataFrame called with unkown type "{data_type}" ' + 'for data argument') + raise TypeError(msg.format(data_type=type(data).__name__)) generic.NDFrame.__init__(self, mgr) @@ -133,7 +138,7 @@ def _init_dict(self, data, index, columns, dtype=None): columns = _ensure_index(columns) data = {k: v for k, v in compat.iteritems(data) if k in columns} else: - columns = Index(_try_sort(list(data.keys()))) + columns = Index(com._try_sort(list(data.keys()))) if index is None: index = extract_index(list(data.values())) @@ -208,9 +213,9 @@ def _init_spmatrix(self, data, index, columns, dtype=None, def _prep_index(self, data, index, columns): N, K = data.shape if index is None: - index = _default_index(N) + index = com._default_index(N) if columns is None: - columns = _default_index(K) + columns = com._default_index(K) if len(columns) != K: raise ValueError('Column length mismatch: {columns} vs. {K}' @@ -820,12 +825,12 @@ def cumsum(self, axis=0, *args, **kwargs): return self.apply(lambda x: x.cumsum(), axis=axis) - @Appender(generic._shared_docs['isna']) + @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) def isna(self): return self._apply_columns(lambda x: x.isna()) isnull = isna - @Appender(generic._shared_docs['notna']) + @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs) def notna(self): return self._apply_columns(lambda x: x.notna()) notnull = notna @@ -983,7 +988,5 @@ def homogenize(series_dict): # use unaccelerated ops for sparse objects -ops.add_flex_arithmetic_methods(SparseDataFrame, use_numexpr=False, - **ops.frame_flex_funcs) -ops.add_special_arithmetic_methods(SparseDataFrame, use_numexpr=False, - **ops.frame_special_funcs) +ops.add_flex_arithmetic_methods(SparseDataFrame, **ops.frame_flex_funcs) +ops.add_special_arithmetic_methods(SparseDataFrame, **ops.frame_special_funcs) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 8a38b1054a1f5..4e207f9d1838c 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -9,22 +9,19 @@ import warnings from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.common import is_scalar -from pandas.core.common import _values_from_object, _maybe_match_name from pandas.compat.numpy import function as nv from pandas.core.index import Index, _ensure_index, InvalidIndexError from pandas.core.series import Series -from pandas.core.frame import DataFrame from pandas.core.internals import SingleBlockManager from pandas.core import generic import pandas.core.common as com import pandas.core.ops as ops -import pandas._libs.index as _index +import pandas._libs.index as libindex from pandas.util._decorators import Appender from pandas.core.sparse.array import ( - make_sparse, _sparse_array_op, SparseArray, + make_sparse, SparseArray, _make_index) from pandas._libs.sparse import BlockIndex, IntIndex import pandas._libs.sparse as splib @@ -38,54 +35,6 @@ axes_single_arg="{0, 'index'}", optional_labels='', optional_axis='') -# ----------------------------------------------------------------------------- -# Wrapper function for Series arithmetic methods - - -def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, - **eval_kwargs): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - - str_rep, default_axis, fill_zeros and eval_kwargs are not used, but are - present for compatibility. - """ - - def wrapper(self, other): - if isinstance(other, Series): - if not isinstance(other, SparseSeries): - other = other.to_sparse(fill_value=self.fill_value) - return _sparse_series_op(self, other, op, name) - elif isinstance(other, DataFrame): - return NotImplemented - elif is_scalar(other): - with np.errstate(all='ignore'): - new_values = op(self.values, other) - return self._constructor(new_values, - index=self.index, - name=self.name) - else: # pragma: no cover - raise TypeError('operation with {other} not supported' - .format(other=type(other))) - - wrapper.__name__ = name - if name.startswith("__"): - # strip special method names, e.g. `__add__` needs to be `add` when - # passed to _sparse_series_op - name = name[2:-2] - return wrapper - - -def _sparse_series_op(left, right, op, name): - left, right = left.align(right, join='outer', copy=False) - new_index = left.index - new_name = _maybe_match_name(left, right) - - result = _sparse_array_op(left.values, right.values, op, name, - series=True) - return left._constructor(result, index=new_index, name=new_name) - class SparseSeries(Series): """Data structure for labeled, sparse floating point data @@ -167,9 +116,13 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', data = data.astype(dtype) if index is None: index = data.index.view() - else: - - data = data.reindex(index, copy=False) + elif not data.index.equals(index) or copy: # pragma: no cover + # GH#19275 SingleBlockManager input should only be called + # internally + raise AssertionError('Cannot pass both SingleBlockManager ' + '`data` argument and a different ' + '`index` argument. `copy` must ' + 'be False.') else: length = len(index) @@ -255,9 +208,10 @@ def npoints(self): @classmethod def from_array(cls, arr, index=None, name=None, copy=False, fill_value=None, fastpath=False): - """ - DEPRECATED: use the pd.SparseSeries(..) constructor instead. + """Construct SparseSeries from array. + .. deprecated:: 0.23.0 + Use the pd.SparseSeries(..) constructor instead. """ warnings.warn("'from_array' is deprecated and will be removed in a " "future version. Please use the pd.SparseSeries(..) " @@ -422,7 +376,7 @@ def __getitem__(self, key): # Could not hash item, must be array-like? pass - key = _values_from_object(key) + key = com._values_from_object(key) if self.index.nlevels > 1 and isinstance(key, tuple): # to handle MultiIndex labels key = self.index.get_loc(key) @@ -560,7 +514,7 @@ def _set_values(self, key, value): key = key.values values = self.values.to_dense() - values[key] = _index.convert_scalar(values, value) + values[key] = libindex.convert_scalar(values, value) values = SparseArray(values, fill_value=self.fill_value, kind=self.kind) self._data = SingleBlockManager(values, self.index) @@ -571,8 +525,9 @@ def to_dense(self, sparse_only=False): Parameters ---------- - sparse_only: bool, default False - DEPRECATED: this argument will be removed in a future version. + sparse_only : bool, default False + .. deprecated:: 0.20.0 + This argument will be removed in a future version. If True, return just the non-sparse values, or the dense version of `self.values` if False. @@ -679,7 +634,7 @@ def cumsum(self, axis=0, *args, **kwargs): new_array, index=self.index, sparse_index=new_array.sp_index).__finalize__(self) - @Appender(generic._shared_docs['isna']) + @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) def isna(self): arr = SparseArray(isna(self.values.sp_values), sparse_index=self.values.sp_index, @@ -687,7 +642,7 @@ def isna(self): return self._constructor(arr, index=self.index).__finalize__(self) isnull = isna - @Appender(generic._shared_docs['notna']) + @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs) def notna(self): arr = SparseArray(notna(self.values.sp_values), sparse_index=self.values.sp_index, @@ -857,13 +812,11 @@ def from_coo(cls, A, dense_index=False): # overwrite series methods with unaccelerated versions -ops.add_special_arithmetic_methods(SparseSeries, use_numexpr=False, - **ops.series_special_funcs) -ops.add_flex_arithmetic_methods(SparseSeries, use_numexpr=False, - **ops.series_flex_funcs) +ops.add_special_arithmetic_methods(SparseSeries, **ops.series_special_funcs) +ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs) # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. -ops.add_special_arithmetic_methods(SparseSeries, _arith_method, - comp_method=_arith_method, - bool_method=None, use_numexpr=False, - force=True) +ops.add_special_arithmetic_methods(SparseSeries, + ops._arith_method_SPARSE_SERIES, + comp_method=ops._arith_method_SPARSE_SERIES, + bool_method=None, force=True) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index fab4e77ce4467..5c31b9a5668ff 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -12,8 +12,8 @@ is_scalar, is_integer, is_re) -from pandas.core.common import _values_from_object +import pandas.core.common as com from pandas.core.algorithms import take_1d import pandas.compat as compat from pandas.core.base import NoNewAttributesMixin @@ -37,7 +37,7 @@ def _get_array_list(arr, others): from pandas.core.series import Series - if len(others) and isinstance(_values_from_object(others)[0], + if len(others) and isinstance(com._values_from_object(others)[0], (list, np.ndarray, Series)): arrays = [arr] + list(others) else: @@ -461,7 +461,7 @@ def rep(x, r): return compat.text_type.__mul__(x, r) repeats = np.asarray(repeats, dtype=object) - result = lib.vec_binop(_values_from_object(arr), repeats, rep) + result = lib.vec_binop(com._values_from_object(arr), repeats, rep) return result @@ -478,7 +478,8 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE na : default NaN, fill value for missing values. - as_indexer : DEPRECATED - Keyword is ignored. + as_indexer + .. deprecated:: 0.21.0 Returns ------- @@ -794,12 +795,10 @@ def str_extractall(arr, pat, flags=0): result_key = tuple(subject_key + (match_i, )) index_list.append(result_key) - if 0 < len(index_list): - from pandas import MultiIndex - index = MultiIndex.from_tuples( - index_list, names=arr.index.names + ["match"]) - else: - index = None + from pandas import MultiIndex + index = MultiIndex.from_tuples( + index_list, names=arr.index.names + ["match"]) + result = arr._constructor_expanddim(match_list, index=index, columns=columns) return result @@ -1236,7 +1235,6 @@ def str_translate(arr, table, deletechars=None): if deletechars is None: f = lambda x: x.translate(table) else: - from pandas import compat if compat.PY3: raise ValueError("deletechars is not a valid argument for " "str.translate in python 3. You should simply " @@ -1371,12 +1369,44 @@ class StringMethods(NoNewAttributesMixin): """ def __init__(self, data): + self._validate(data) self._is_categorical = is_categorical_dtype(data) self._data = data.cat.categories if self._is_categorical else data # save orig to blow up categoricals to the right type self._orig = data self._freeze() + @staticmethod + def _validate(data): + from pandas.core.index import Index + + if (isinstance(data, ABCSeries) and + not ((is_categorical_dtype(data.dtype) and + is_object_dtype(data.values.categories)) or + (is_object_dtype(data.dtype)))): + # it's neither a string series not a categorical series with + # strings inside the categories. + # this really should exclude all series with any non-string values + # (instead of test for object dtype), but that isn't practical for + # performance reasons until we have a str dtype (GH 9343) + raise AttributeError("Can only use .str accessor with string " + "values, which use np.object_ dtype in " + "pandas") + elif isinstance(data, Index): + # can't use ABCIndex to exclude non-str + + # see scc/inferrence.pyx which can contain string values + allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') + if data.inferred_type not in allowed_types: + message = ("Can only use .str accessor with string values " + "(i.e. inferred_type is 'string', 'unicode' or " + "'mixed')") + raise AttributeError(message) + if data.nlevels > 1: + message = ("Can only use .str accessor with Index, not " + "MultiIndex") + raise AttributeError(message) + def __getitem__(self, key): if isinstance(key, slice): return self.slice(start=key.start, stop=key.stop, step=key.step) @@ -1896,32 +1926,5 @@ def rindex(self, sub, start=0, end=None): @classmethod def _make_accessor(cls, data): - from pandas.core.index import Index - - if (isinstance(data, ABCSeries) and - not ((is_categorical_dtype(data.dtype) and - is_object_dtype(data.values.categories)) or - (is_object_dtype(data.dtype)))): - # it's neither a string series not a categorical series with - # strings inside the categories. - # this really should exclude all series with any non-string values - # (instead of test for object dtype), but that isn't practical for - # performance reasons until we have a str dtype (GH 9343) - raise AttributeError("Can only use .str accessor with string " - "values, which use np.object_ dtype in " - "pandas") - elif isinstance(data, Index): - # can't use ABCIndex to exclude non-str - - # see scc/inferrence.pyx which can contain string values - allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') - if data.inferred_type not in allowed_types: - message = ("Can only use .str accessor with string values " - "(i.e. inferred_type is 'string', 'unicode' or " - "'mixed')") - raise AttributeError(message) - if data.nlevels > 1: - message = ("Can only use .str accessor with Index, not " - "MultiIndex") - raise AttributeError(message) + cls._validate(data) return cls(data) diff --git a/pandas/core/window.py b/pandas/core/window.py index 76ba76b7a9da9..4d6a1de60f59b 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -32,7 +32,7 @@ from pandas.core.base import (PandasObject, SelectionMixin, GroupByMixin) -from pandas.core.common import _asarray_tuplesafe, _count_not_none +import pandas.core.common as com import pandas._libs.window as _window from pandas import compat @@ -508,7 +508,7 @@ def _prep_window(self, **kwargs): window = self._get_window() if isinstance(window, (list, tuple, np.ndarray)): - return _asarray_tuplesafe(window).astype(float) + return com._asarray_tuplesafe(window).astype(float) elif is_integer(window): import scipy.signal as sig @@ -1286,7 +1286,7 @@ class Expanding(_Rolling_and_Expanding): Parameters ---------- - min_periods : int, default None + min_periods : int, default 1 Minimum number of observations in window required to have a value (otherwise result is NA). center : boolean, default False @@ -1908,33 +1908,33 @@ def dataframe_from_int_dict(data, frame_template): return _flex_binary_moment(arg2, arg1, f) -def _get_center_of_mass(com, span, halflife, alpha): - valid_count = _count_not_none(com, span, halflife, alpha) +def _get_center_of_mass(comass, span, halflife, alpha): + valid_count = com._count_not_none(comass, span, halflife, alpha) if valid_count > 1: - raise ValueError("com, span, halflife, and alpha " + raise ValueError("comass, span, halflife, and alpha " "are mutually exclusive") # Convert to center of mass; domain checks ensure 0 < alpha <= 1 - if com is not None: - if com < 0: - raise ValueError("com must satisfy: com >= 0") + if comass is not None: + if comass < 0: + raise ValueError("comass must satisfy: comass >= 0") elif span is not None: if span < 1: raise ValueError("span must satisfy: span >= 1") - com = (span - 1) / 2. + comass = (span - 1) / 2. elif halflife is not None: if halflife <= 0: raise ValueError("halflife must satisfy: halflife > 0") decay = 1 - np.exp(np.log(0.5) / halflife) - com = 1 / decay - 1 + comass = 1 / decay - 1 elif alpha is not None: if alpha <= 0 or alpha > 1: raise ValueError("alpha must satisfy: 0 < alpha <= 1") - com = (1.0 - alpha) / alpha + comass = (1.0 - alpha) / alpha else: - raise ValueError("Must pass one of com, span, halflife, or alpha") + raise ValueError("Must pass one of comass, span, halflife, or alpha") - return float(com) + return float(comass) def _offset(window, center): diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index b3d1ce31d66ae..22b6d33be9d38 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -65,3 +65,15 @@ class MergeError(ValueError): Error raised when problems arise during merging due to problems with input data. Subclass of `ValueError`. """ + + +class NullFrequencyError(ValueError): + """ + Error raised when a null `freq` attribute is used in an operation + that needs a non-null frequency, particularly `DatetimeIndex.shift`, + `TimedeltaIndex.shift`, `PeriodIndex.shift`. + """ + + +class AccessorRegistrationWarning(Warning): + """Warning for attribute conflicts in accessor registration.""" diff --git a/pandas/io/common.py b/pandas/io/common.py index da60698fe529f..4ba969f0abac4 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -9,7 +9,7 @@ from pandas.compat import StringIO, BytesIO, string_types, text_type from pandas import compat from pandas.io.formats.printing import pprint_thing -from pandas.core.common import AbstractMethodError +import pandas.core.common as com from pandas.core.dtypes.common import is_number, is_file_like # compat @@ -66,7 +66,7 @@ def __iter__(self): return self def __next__(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) if not compat.PY3: @@ -91,14 +91,6 @@ def _is_url(url): return False -def _is_s3_url(url): - """Check for an s3, s3n, or s3a url""" - try: - return parse_url(url).scheme in ['s3', 's3n', 's3a'] - except: - return False - - def _expand_user(filepath_or_buffer): """Return the argument with an initial component of ~ or ~user replaced by that user's home directory. @@ -168,8 +160,16 @@ def _stringify_path(filepath_or_buffer): return filepath_or_buffer +def is_s3_url(url): + """Check for an s3, s3n, or s3a url""" + try: + return parse_url(url).scheme in ['s3', 's3n', 's3a'] + except: # noqa + return False + + def get_filepath_or_buffer(filepath_or_buffer, encoding=None, - compression=None): + compression=None, mode=None): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. @@ -179,10 +179,11 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' + mode : str, optional Returns ------- - a filepath_or_buffer, the encoding, the compression + a filepath_ or buffer or S3File instance, the encoding, the compression """ filepath_or_buffer = _stringify_path(filepath_or_buffer) @@ -195,11 +196,12 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, reader = BytesIO(req.read()) return reader, encoding, compression - if _is_s3_url(filepath_or_buffer): + if is_s3_url(filepath_or_buffer): from pandas.io import s3 return s3.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, - compression=compression) + compression=compression, + mode=mode) if isinstance(filepath_or_buffer, (compat.string_types, compat.binary_type, diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 92b29c8da7e3f..b03987e933bff 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -4,7 +4,7 @@ # --------------------------------------------------------------------- # ExcelFile class -from datetime import datetime, date, time, MINYEAR +from datetime import datetime, date, time, MINYEAR, timedelta import os import abc @@ -21,7 +21,6 @@ from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, get_filepath_or_buffer, _NA_VALUES, _stringify_path) -from pandas.core.indexes.period import Period import pandas._libs.json as json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types, OrderedDict) @@ -777,17 +776,30 @@ def _pop_header_name(row, index_col): def _conv_value(val): - # Convert numpy types to Python types for the Excel writers. + """ Convert numpy types to Python types for the Excel writers. + + Parameters + ---------- + val : object + Value to be written into cells + + Returns + ------- + If val is a numpy int, float, or bool, then the equivalent Python + types are returned. :obj:`datetime`, :obj:`date`, and :obj:`timedelta` + are passed and formatting must be handled in the writer. :obj:`str` + representation is returned for all other types. + """ if is_integer(val): val = int(val) elif is_float(val): val = float(val) elif is_bool(val): val = bool(val) - elif isinstance(val, Period): - val = "{val}".format(val=val) - elif is_list_like(val): - val = str(val) + elif isinstance(val, (datetime, date, timedelta)): + pass + else: + val = compat.to_str(val) return val @@ -1460,6 +1472,9 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, num_format_str = self.datetime_format elif isinstance(cell.val, date): num_format_str = self.date_format + elif isinstance(cell.val, timedelta): + delta = cell.val + val = delta.total_seconds() / float(86400) stylekey = json.dumps(cell.style) if num_format_str: diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index aff3e35861434..81e8881f3f06b 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -10,7 +10,7 @@ from pandas.compat import reduce from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.printing import pprint_thing -from pandas.core.common import _any_not_none +import pandas.core.common as com from pandas.core.dtypes.common import is_float, is_scalar from pandas.core.dtypes import missing from pandas import Index, MultiIndex, PeriodIndex @@ -277,7 +277,9 @@ def build_font(self, props): NAMED_COLORS = { 'maroon': '800000', + 'brown': 'A52A2A', 'red': 'FF0000', + 'pink': 'FFC0CB', 'orange': 'FFA500', 'yellow': 'FFFF00', 'olive': '808000', @@ -291,6 +293,7 @@ def build_font(self, props): 'navy': '000080', 'black': '000000', 'gray': '808080', + 'grey': '808080', 'silver': 'C0C0C0', 'white': 'FFFFFF', } @@ -549,7 +552,7 @@ def _format_hierarchical_rows(self): self.rowcounter += 1 # if index labels are not empty go ahead and dump - if _any_not_none(*index_labels) and self.header is not False: + if com._any_not_none(*index_labels) and self.header is not False: for cidx, name in enumerate(index_labels): yield ExcelCell(self.rowcounter - 1, cidx, name, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index a4678e5b40849..2293032ebb8a1 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -27,7 +27,7 @@ is_list_like) from pandas.core.dtypes.generic import ABCSparseArray from pandas.core.base import PandasObject -from pandas.core.common import _any_not_none, sentinel_factory +import pandas.core.common as com from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat from pandas.compat import (StringIO, lzip, range, map, zip, u, @@ -1277,7 +1277,7 @@ def _column_header(): if self.fmt.sparsify: # GH3547 - sentinel = sentinel_factory() + sentinel = com.sentinel_factory() else: sentinel = None levels = self.columns.format(sparsify=sentinel, adjoin=False, @@ -1446,7 +1446,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): if self.fmt.sparsify: # GH3547 - sentinel = sentinel_factory() + sentinel = com.sentinel_factory() levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) @@ -2188,7 +2188,7 @@ def _is_dates_only(values): consider_values = values_int != iNaT one_day_nanos = (86400 * 1e9) even_days = np.logical_and(consider_values, - values_int % one_day_nanos != 0).sum() == 0 + values_int % int(one_day_nanos) != 0).sum() == 0 if even_days: return True return False @@ -2372,7 +2372,7 @@ def single_row_table(row): # pragma: no cover def _has_names(index): if isinstance(index, MultiIndex): - return _any_not_none(*index.names) + return com._any_not_none(*index.names) else: return index.name is not None diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 2c3d92cea0ad8..58796aa30f0bf 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -27,7 +27,7 @@ from pandas.compat import range from pandas.core.config import get_option from pandas.core.generic import _shared_docs -from pandas.core.common import _any_not_none, sentinel_factory +import pandas.core.common as com from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice from pandas.util._decorators import Appender try: @@ -257,7 +257,8 @@ def format_attr(pair): row_es.append(es) head.append(row_es) - if (self.data.index.names and _any_not_none(*self.data.index.names) and + if (self.data.index.names and + com._any_not_none(*self.data.index.names) and not hidden_index): index_header_row = [] @@ -1207,7 +1208,7 @@ def _get_level_lengths(index, hidden_elements=None): Result is a dictionary of (level, inital_position): span """ - sentinel = sentinel_factory() + sentinel = com.sentinel_factory() levels = index.format(sparsify=sentinel, adjoin=False, names=False) if hidden_elements is None: diff --git a/pandas/io/html.py b/pandas/io/html.py index e7794864ccb3e..be4854bc19cc6 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -20,7 +20,7 @@ from pandas.compat import (lrange, lmap, u, string_types, iteritems, raise_with_traceback, binary_type) from pandas import Series -from pandas.core.common import AbstractMethodError +import pandas.core.common as com from pandas.io.formats.printing import pprint_thing _IMPORTS = False @@ -234,7 +234,7 @@ def _text_getter(self, obj): text : str or unicode The text from an individual DOM node. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_td(self, obj): """Return the td elements from a row element. @@ -248,7 +248,7 @@ def _parse_td(self, obj): columns : list of node-like These are the elements of each row, i.e., the columns. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_tables(self, doc, match, attrs): """Return all tables from the parsed DOM. @@ -275,7 +275,7 @@ def _parse_tables(self, doc, match, attrs): tables : list of node-like A list of elements to be parsed into raw data. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_tr(self, table): """Return the list of row elements from the parsed table element. @@ -290,7 +290,7 @@ def _parse_tr(self, table): rows : list of node-like A list row elements of a table, usually or ... element. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_tbody(self, table): """Return the body of the table. @@ -320,7 +320,7 @@ def _parse_tbody(self, table): tbody : node-like A ... element. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_tfoot(self, table): """Return the footer of the table if any. @@ -335,7 +335,7 @@ def _parse_tfoot(self, table): tfoot : node-like A ... element. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _build_doc(self): """Return a tree-like object that can be used to iterate over the DOM. @@ -344,7 +344,7 @@ def _build_doc(self): ------- obj : tree-like """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _build_table(self, table): header = self._parse_raw_thead(table) diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 72ec5c59c90af..e3a1321336fb3 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -12,11 +12,11 @@ _infer_compression, _stringify_path, BaseIterator) from pandas.io.parsers import _validate_integer -from pandas.core.common import AbstractMethodError +import pandas.core.common as com from pandas.core.reshape.concat import concat from pandas.io.formats.printing import pprint_thing from .normalize import _convert_to_line_delimits -from .table_schema import build_table_schema +from .table_schema import build_table_schema, parse_table_schema from pandas.core.dtypes.common import is_period_dtype loads = json.loads @@ -93,7 +93,7 @@ def __init__(self, obj, orient, date_format, double_precision, self._format_axes() def _format_axes(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def write(self): return self._write(self.obj, self.orient, self.double_precision, @@ -261,13 +261,16 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, * when ``typ == 'frame'``, - allowed orients are ``{'split','records','index', - 'columns','values'}`` + 'columns','values', 'table'}`` - default is ``'columns'`` - The DataFrame index must be unique for orients ``'index'`` and ``'columns'``. - The DataFrame columns must be unique for orients ``'index'``, ``'columns'``, and ``'records'``. + .. versionadded:: 0.23.0 + 'table' as an allowed value for the ``orient`` argument + typ : type of object to recover (series or frame), default 'frame' dtype : boolean or dict, default True If True, infer dtypes, if a dict of column to dtype, then use those, @@ -336,6 +339,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, ------- result : Series or DataFrame, depending on the value of `typ`. + Notes + ----- + Specific to ``orient='table'``, if a :class:`DataFrame` with a literal + :class:`Index` name of `index` gets written with :func:`to_json`, the + subsequent read operation will incorrectly set the :class:`Index` name to + ``None``. This is because `index` is also used by :func:`DataFrame.to_json` + to denote a missing :class:`Index` name, and the subsequent + :func:`read_json` operation cannot distinguish between the two. The same + limitation is encountered with a :class:`MultiIndex` and any names + beginning with 'level_'. + See Also -------- DataFrame.to_json @@ -634,7 +648,7 @@ def _convert_axes(self): setattr(self.obj, axis, new_axis) def _try_convert_types(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): @@ -747,7 +761,7 @@ def _try_convert_to_date(self, data): return data, False def _try_convert_dates(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) class SeriesParser(Parser): @@ -839,6 +853,9 @@ def _parse_no_numpy(self): elif orient == "index": self.obj = DataFrame( loads(json, precise_float=self.precise_float), dtype=None).T + elif orient == 'table': + self.obj = parse_table_schema(json, + precise_float=self.precise_float) else: self.obj = DataFrame( loads(json, precise_float=self.precise_float), dtype=None) diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index 9cec5b3d6ba49..01f7db7d68664 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -3,13 +3,20 @@ http://specs.frictionlessdata.io/json-table-schema/ """ -from pandas.core.common import _all_not_none +import warnings + +import pandas._libs.json as json +from pandas import DataFrame +from pandas.api.types import CategoricalDtype +import pandas.core.common as com from pandas.core.dtypes.common import ( is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype, is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_categorical_dtype, is_period_dtype, is_string_dtype ) +loads = json.loads + def as_json_table_type(x): """ @@ -62,7 +69,13 @@ def as_json_table_type(x): def set_default_names(data): """Sets index names to 'index' for regular, or 'level_x' for Multi""" - if _all_not_none(*data.index.names): + if com._all_not_none(*data.index.names): + nms = data.index.names + if len(nms) == 1 and data.index.name == 'index': + warnings.warn("Index name of 'index' is not round-trippable") + elif len(nms) > 1 and any(x.startswith('level_') for x in nms): + warnings.warn("Index names beginning with 'level_' are not " + "round-trippable") return data data = data.copy() @@ -75,7 +88,7 @@ def set_default_names(data): return data -def make_field(arr, dtype=None): +def convert_pandas_type_to_json_field(arr, dtype=None): dtype = dtype or arr.dtype if arr.name is None: name = 'values' @@ -103,6 +116,69 @@ def make_field(arr, dtype=None): return field +def convert_json_field_to_pandas_type(field): + """ + Converts a JSON field descriptor into its corresponding NumPy / pandas type + + Parameters + ---------- + field + A JSON field descriptor + + Returns + ------- + dtype + + Raises + ----- + ValueError + If the type of the provided field is unknown or currently unsupported + + Examples + -------- + >>> convert_json_field_to_pandas_type({'name': 'an_int', + 'type': 'integer'}) + 'int64' + >>> convert_json_field_to_pandas_type({'name': 'a_categorical', + 'type': 'any', + 'contraints': {'enum': [ + 'a', 'b', 'c']}, + 'ordered': True}) + 'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)' + >>> convert_json_field_to_pandas_type({'name': 'a_datetime', + 'type': 'datetime'}) + 'datetime64[ns]' + >>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz', + 'type': 'datetime', + 'tz': 'US/Central'}) + 'datetime64[ns, US/Central]' + """ + typ = field['type'] + if typ == 'string': + return 'object' + elif typ == 'integer': + return 'int64' + elif typ == 'number': + return 'float64' + elif typ == 'boolean': + return 'bool' + elif typ == 'duration': + return 'timedelta64' + elif typ == 'datetime': + if field.get('tz'): + return 'datetime64[ns, {tz}]'.format(tz=field['tz']) + else: + return 'datetime64[ns]' + elif typ == 'any': + if 'constraints' in field and 'ordered' in field: + return CategoricalDtype(categories=field['constraints']['enum'], + ordered=field['ordered']) + else: + return 'object' + + raise ValueError("Unsupported or invalid field type: {}".format(typ)) + + def build_table_schema(data, index=True, primary_key=None, version=True): """ Create a Table schema from ``data``. @@ -158,15 +234,15 @@ def build_table_schema(data, index=True, primary_key=None, version=True): if index: if data.index.nlevels > 1: for level in data.index.levels: - fields.append(make_field(level)) + fields.append(convert_pandas_type_to_json_field(level)) else: - fields.append(make_field(data.index)) + fields.append(convert_pandas_type_to_json_field(data.index)) if data.ndim > 1: for column, s in data.iteritems(): - fields.append(make_field(s)) + fields.append(convert_pandas_type_to_json_field(s)) else: - fields.append(make_field(data)) + fields.append(convert_pandas_type_to_json_field(data)) schema['fields'] = fields if index and data.index.is_unique and primary_key is None: @@ -180,3 +256,69 @@ def build_table_schema(data, index=True, primary_key=None, version=True): if version: schema['pandas_version'] = '0.20.0' return schema + + +def parse_table_schema(json, precise_float): + """ + Builds a DataFrame from a given schema + + Parameters + ---------- + json : + A JSON table schema + precise_float : boolean + Flag controlling precision when decoding string to double values, as + dictated by ``read_json`` + + Returns + ------- + df : DataFrame + + Raises + ------ + NotImplementedError + If the JSON table schema contains either timezone or timedelta data + + Notes + ----- + Because :func:`DataFrame.to_json` uses the string 'index' to denote a + name-less :class:`Index`, this function sets the name of the returned + :class:`DataFrame` to ``None`` when said string is encountered with a + normal :class:`Index`. For a :class:`MultiIndex`, the same limitation + applies to any strings beginning with 'level_'. Therefore, an + :class:`Index` name of 'index' and :class:`MultiIndex` names starting + with 'level_' are not supported. + + See also + -------- + build_table_schema : inverse function + pandas.read_json + """ + table = loads(json, precise_float=precise_float) + col_order = [field['name'] for field in table['schema']['fields']] + df = DataFrame(table['data'])[col_order] + + dtypes = {field['name']: convert_json_field_to_pandas_type(field) + for field in table['schema']['fields']} + + # Cannot directly use as_type with timezone data on object; raise for now + if any(str(x).startswith('datetime64[ns, ') for x in dtypes.values()): + raise NotImplementedError('table="orient" can not yet read timezone ' + 'data') + + # No ISO constructor for Timedelta as of yet, so need to raise + if 'timedelta64' in dtypes.values(): + raise NotImplementedError('table="orient" can not yet read ' + 'ISO-formatted Timedelta data') + + df = df.astype(dtypes) + + df = df.set_index(table['schema']['primaryKey']) + if len(df.index.names) == 1: + if df.index.name == 'index': + df.index.name = None + else: + df.index.names = [None if x.startswith('level_') else x for x in + df.index.names] + + return df diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index e431c9447e8f8..6e1b6e14861c3 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -4,8 +4,8 @@ from distutils.version import LooseVersion from pandas import DataFrame, RangeIndex, Int64Index, get_option from pandas.compat import string_types -from pandas.core.common import AbstractMethodError -from pandas.io.common import get_filepath_or_buffer +import pandas.core.common as com +from pandas.io.common import get_filepath_or_buffer, is_s3_url def get_engine(engine): @@ -64,10 +64,10 @@ def validate_dataframe(df): raise ValueError("Index level names must be strings") def write(self, df, path, compression, **kwargs): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def read(self, path, columns=None, **kwargs): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) class PyArrowImpl(BaseImpl): @@ -107,7 +107,7 @@ def write(self, df, path, compression='snappy', self.validate_dataframe(df) if self._pyarrow_lt_070: self._validate_write_lt_070(df) - path, _, _ = get_filepath_or_buffer(path) + path, _, _ = get_filepath_or_buffer(path, mode='wb') if self._pyarrow_lt_060: table = self.api.Table.from_pandas(df, timestamps_to_ms=True) @@ -194,14 +194,35 @@ def write(self, df, path, compression='snappy', **kwargs): # thriftpy/protocol/compact.py:339: # DeprecationWarning: tostring() is deprecated. # Use tobytes() instead. - path, _, _ = get_filepath_or_buffer(path) + + if is_s3_url(path): + # path is s3:// so we need to open the s3file in 'wb' mode. + # TODO: Support 'ab' + + path, _, _ = get_filepath_or_buffer(path, mode='wb') + # And pass the opened s3file to the fastparquet internal impl. + kwargs['open_with'] = lambda path, _: path + else: + path, _, _ = get_filepath_or_buffer(path) + with catch_warnings(record=True): self.api.write(path, df, compression=compression, **kwargs) def read(self, path, columns=None, **kwargs): - path, _, _ = get_filepath_or_buffer(path) - parquet_file = self.api.ParquetFile(path) + if is_s3_url(path): + # When path is s3:// an S3File is returned. + # We need to retain the original path(str) while also + # pass the S3File().open function to fsatparquet impl. + s3, _, _ = get_filepath_or_buffer(path) + try: + parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open) + finally: + s3.close() + else: + path, _, _ = get_filepath_or_buffer(path) + parquet_file = self.api.ParquetFile(path) + return parquet_file.to_pandas(columns=columns, **kwargs) @@ -218,8 +239,8 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): Parquet reader library to use. If 'auto', then the option 'io.parquet.engine' is used. If 'auto', then the first library to be installed is used. - compression : str, optional, default 'snappy' - compression method, includes {'gzip', 'snappy', 'brotli'} + compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. kwargs Additional keyword arguments passed to the engine """ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index acb7d00284693..5135bb01fb378 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -28,9 +28,9 @@ _ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core import algorithms -from pandas.core.common import AbstractMethodError +import pandas.core.common as com from pandas.io.date_converters import generic_parser from pandas.errors import ParserWarning, ParserError, EmptyDataError from pandas.io.common import (get_filepath_or_buffer, is_file_like, @@ -114,7 +114,7 @@ prefix : str, default None Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... mangle_dupe_cols : boolean, default True - Duplicate columns will be specified as 'X.0'...'X.N', rather than + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than 'X'...'X'. Passing in False will cause data to be overwritten if there are duplicate names in the columns. dtype : Type name or dict of column -> type, default None @@ -149,8 +149,20 @@ NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True - If na_values are specified and keep_default_na is False the default NaN - values are overridden, otherwise they're appended to. + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. na_filter : boolean, default True Detect missing value markers (empty strings and the value of na_values). In data without any NAs, passing na_filter=False can improve the performance @@ -910,9 +922,6 @@ def _clean_options(self, options, engine): na_values = options['na_values'] skiprows = options['skiprows'] - # really delete this one - keep_default_na = result.pop('keep_default_na') - _validate_header_arg(options['header']) depr_warning = '' @@ -957,6 +966,7 @@ def _clean_options(self, options, engine): converters = {} # Converting values to NA + keep_default_na = options['keep_default_na'] na_values, na_fvalues = _clean_na_values(na_values, keep_default_na) # handle skiprows; this is internally handled by the @@ -1000,7 +1010,7 @@ def _make_engine(self, engine='c'): self._engine = klass(self.f, **self.options) def _failover_to_python(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def read(self, nrows=None): nrows = _validate_integer('nrows', nrows) @@ -1225,6 +1235,7 @@ def __init__(self, kwds): self.na_values = kwds.get('na_values') self.na_fvalues = kwds.get('na_fvalues') self.na_filter = kwds.get('na_filter', False) + self.keep_default_na = kwds.get('keep_default_na', True) self.true_values = kwds.get('true_values') self.false_values = kwds.get('false_values') @@ -1487,7 +1498,8 @@ def _agg_index(self, index, try_parse_dates=True): col_name = self.index_names[i] if col_name is not None: col_na_values, col_na_fvalues = _get_na_values( - col_name, self.na_values, self.na_fvalues) + col_name, self.na_values, self.na_fvalues, + self.keep_default_na) arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) @@ -1510,7 +1522,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, if self.na_filter: col_na_values, col_na_fvalues = _get_na_values( - c, na_values, na_fvalues) + c, na_values, na_fvalues, self.keep_default_na) else: col_na_values, col_na_fvalues = set(), set() @@ -3097,16 +3109,23 @@ def _clean_na_values(na_values, keep_default_na=True): na_values = set() na_fvalues = set() elif isinstance(na_values, dict): - na_values = na_values.copy() # Prevent aliasing. - if keep_default_na: - for k, v in compat.iteritems(na_values): - if not is_list_like(v): - v = [v] + old_na_values = na_values.copy() + na_values = {} # Prevent aliasing. + + # Convert the values in the na_values dictionary + # into array-likes for further use. This is also + # where we append the default NaN values, provided + # that `keep_default_na=True`. + for k, v in compat.iteritems(old_na_values): + if not is_list_like(v): + v = [v] + + if keep_default_na: v = set(v) | _NA_VALUES - na_values[k] = v - na_fvalues = dict( - (k, _floatify_na_values(v)) for k, v in na_values.items() # noqa - ) + + na_values[k] = v + na_fvalues = dict((k, _floatify_na_values(v)) + for k, v in na_values.items()) else: if not is_list_like(na_values): na_values = [na_values] @@ -3225,12 +3244,38 @@ def _stringify_na_values(na_values): return set(result) -def _get_na_values(col, na_values, na_fvalues): +def _get_na_values(col, na_values, na_fvalues, keep_default_na): + """ + Get the NaN values for a given column. + + Parameters + ---------- + col : str + The name of the column. + na_values : array-like, dict + The object listing the NaN values as strings. + na_fvalues : array-like, dict + The object listing the NaN values as floats. + keep_default_na : bool + If `na_values` is a dict, and the column is not mapped in the + dictionary, whether to return the default NaN values or the empty set. + + Returns + ------- + nan_tuple : A length-two tuple composed of + + 1) na_values : the string NaN values for that column. + 2) na_fvalues : the float NaN values for that column. + """ + if isinstance(na_values, dict): if col in na_values: return na_values[col], na_fvalues[col] else: - return _NA_VALUES, set() + if keep_default_na: + return _NA_VALUES, set() + + return set(), set() else: return na_values, na_fvalues diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 72543bb6f825e..106823199ee93 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -34,9 +34,10 @@ from pandas.core.base import StringMixin from pandas.io.formats.printing import adjoin, pprint_thing from pandas.errors import PerformanceWarning -from pandas.core.common import _asarray_tuplesafe, _all_none +import pandas.core.common as com from pandas.core.algorithms import match, unique -from pandas.core.categorical import Categorical, _factorize_from_iterables +from pandas.core.arrays.categorical import (Categorical, + _factorize_from_iterables) from pandas.core.internals import (BlockManager, make_block, _block2d_to_blocknd, _factor_indexer, _block_shape) @@ -902,7 +903,7 @@ def remove(self, key, where=None, start=None, stop=None): raise KeyError('No object named %s in the file' % key) # remove the node - if _all_none(where, start, stop): + if com._all_none(where, start, stop): s.group._f_remove(recursive=True) # delete from the table @@ -2367,7 +2368,7 @@ def delete(self, where=None, start=None, stop=None, **kwargs): support fully deleting the node in its entirety (only) - where specification must be None """ - if _all_none(where, start, stop): + if com._all_none(where, start, stop): self._handle.remove_node(self.group, recursive=True) return None @@ -3843,7 +3844,7 @@ def read(self, where=None, columns=None, **kwargs): tuple_index = long_index.values unique_tuples = lib.fast_unique(tuple_index) - unique_tuples = _asarray_tuplesafe(unique_tuples) + unique_tuples = com._asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) indexer = _ensure_platform_int(indexer) diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 5e48de757d00e..e2650e29c0db3 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -19,10 +19,14 @@ def _strip_schema(url): def get_filepath_or_buffer(filepath_or_buffer, encoding=None, - compression=None): + compression=None, mode=None): + + if mode is None: + mode = 'rb' + fs = s3fs.S3FileSystem(anon=False) try: - filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer)) + filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) except (OSError, NoCredentialsError): # boto3 has troubles when trying to access a public file # when credentialed... @@ -31,5 +35,5 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, # A NoCredentialsError is raised if you don't have creds # for that bucket. fs = s3fs.S3FileSystem(anon=True) - filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer)) + filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) return filepath_or_buffer, None, compression diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 41c03cb2799a3..e2a1107969990 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -2,16 +2,16 @@ # cython: boundscheck=False, initializedcheck=False import numpy as np -cimport numpy as np -from numpy cimport uint8_t, uint16_t, int8_t, int64_t +cimport numpy as cnp +from numpy cimport uint8_t, uint16_t, int8_t, int64_t, ndarray import sas_constants as const # rle_decompress decompresses data using a Run Length Encoding # algorithm. It is partially documented here: # # https://cran.r-project.org/web/packages/sas7bdat/vignettes/sas7bdat.pdf -cdef np.ndarray[uint8_t, ndim=1] rle_decompress( - int result_length, np.ndarray[uint8_t, ndim=1] inbuff): +cdef ndarray[uint8_t, ndim=1] rle_decompress( + int result_length, ndarray[uint8_t, ndim=1] inbuff): cdef: uint8_t control_byte, x @@ -114,8 +114,8 @@ cdef np.ndarray[uint8_t, ndim=1] rle_decompress( # rdc_decompress decompresses data using the Ross Data Compression algorithm: # # http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm -cdef np.ndarray[uint8_t, ndim=1] rdc_decompress( - int result_length, np.ndarray[uint8_t, ndim=1] inbuff): +cdef ndarray[uint8_t, ndim=1] rdc_decompress( + int result_length, ndarray[uint8_t, ndim=1] inbuff): cdef: uint8_t cmd @@ -226,8 +226,8 @@ cdef class Parser(object): int subheader_pointer_length int current_page_type bint is_little_endian - np.ndarray[uint8_t, ndim=1] (*decompress)( - int result_length, np.ndarray[uint8_t, ndim=1] inbuff) + ndarray[uint8_t, ndim=1] (*decompress)( + int result_length, ndarray[uint8_t, ndim=1] inbuff) object parser def __init__(self, object parser): @@ -391,7 +391,7 @@ cdef class Parser(object): Py_ssize_t j int s, k, m, jb, js, current_row int64_t lngt, start, ct - np.ndarray[uint8_t, ndim=1] source + ndarray[uint8_t, ndim=1] source int64_t[:] column_types int64_t[:] lengths int64_t[:] offsets diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e2f3033c580a5..437e279e90979 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -41,24 +41,6 @@ class DatabaseError(IOError): _SQLALCHEMY_INSTALLED = None -def _validate_flavor_parameter(flavor): - """ - Checks whether a database 'flavor' was specified. - If not None, produces FutureWarning if 'sqlite' and - raises a ValueError if anything else. - """ - if flavor is not None: - if flavor == 'sqlite': - warnings.warn("the 'flavor' parameter is deprecated " - "and will be removed in a future version, " - "as 'sqlite' is the only supported option " - "when SQLAlchemy is not installed.", - FutureWarning, stacklevel=2) - else: - raise ValueError("database flavor {flavor} is not " - "supported".format(flavor=flavor)) - - def _is_sqlalchemy_connectable(con): global _SQLALCHEMY_INSTALLED if _SQLALCHEMY_INSTALLED is None: @@ -415,8 +397,8 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, chunksize=chunksize) -def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', - index=True, index_label=None, chunksize=None, dtype=None): +def to_sql(frame, name, con, schema=None, if_exists='fail', index=True, + index_label=None, chunksize=None, dtype=None): """ Write records stored in a DataFrame to a SQL database. @@ -430,10 +412,6 @@ def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. - flavor : 'sqlite', default None - .. deprecated:: 0.19.0 - 'sqlite' is the only supported option if SQLAlchemy is not - used. schema : string, default None Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). @@ -459,7 +437,7 @@ def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', if if_exists not in ('fail', 'replace', 'append'): raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) - pandas_sql = pandasSQL_builder(con, schema=schema, flavor=flavor) + pandas_sql = pandasSQL_builder(con, schema=schema) if isinstance(frame, Series): frame = frame.to_frame() @@ -472,7 +450,7 @@ def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', chunksize=chunksize, dtype=dtype) -def has_table(table_name, con, flavor=None, schema=None): +def has_table(table_name, con, schema=None): """ Check if DataBase has named table. @@ -484,10 +462,6 @@ def has_table(table_name, con, flavor=None, schema=None): Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. - flavor : 'sqlite', default None - .. deprecated:: 0.19.0 - 'sqlite' is the only supported option if SQLAlchemy is not - installed. schema : string, default None Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). @@ -496,7 +470,7 @@ def has_table(table_name, con, flavor=None, schema=None): ------- boolean """ - pandas_sql = pandasSQL_builder(con, flavor=flavor, schema=schema) + pandas_sql = pandasSQL_builder(con, schema=schema) return pandas_sql.has_table(table_name) @@ -521,14 +495,12 @@ def _engine_builder(con): return con -def pandasSQL_builder(con, flavor=None, schema=None, meta=None, +def pandasSQL_builder(con, schema=None, meta=None, is_cursor=False): """ Convenience function to return the correct PandasSQL subclass based on the provided parameters. """ - _validate_flavor_parameter(flavor) - # When support for DBAPI connections is removed, # is_cursor should not be necessary. con = _engine_builder(con) @@ -1378,9 +1350,7 @@ class SQLiteDatabase(PandasSQL): """ - def __init__(self, con, flavor=None, is_cursor=False): - _validate_flavor_parameter(flavor) - + def __init__(self, con, is_cursor=False): self.is_cursor = is_cursor self.con = con @@ -1534,7 +1504,7 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): return str(table.sql_schema()) -def get_schema(frame, name, flavor=None, keys=None, con=None, dtype=None): +def get_schema(frame, name, keys=None, con=None, dtype=None): """ Get the SQL db table schema for the given frame. @@ -1549,15 +1519,11 @@ def get_schema(frame, name, flavor=None, keys=None, con=None, dtype=None): Using SQLAlchemy makes it possible to use any DB supported by that library, default: None If a DBAPI2 object, only sqlite3 is supported. - flavor : 'sqlite', default None - .. deprecated:: 0.19.0 - 'sqlite' is the only supported option if SQLAlchemy is not - installed. dtype : dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. """ - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + pandas_sql = pandasSQL_builder(con=con) return pandas_sql._create_sql_schema(frame, name, keys=keys, dtype=dtype) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 2b97b447921bb..b409cf20e9a09 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -24,7 +24,7 @@ from pandas.compat import (lrange, lmap, lzip, text_type, string_types, range, zip, BytesIO) from pandas.core.base import StringMixin -from pandas.core.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.dtypes.common import (is_categorical_dtype, _ensure_object, is_datetime64_dtype) from pandas.core.frame import DataFrame diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 2a45e20dda4cc..66ee7fa98491f 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -244,7 +244,7 @@ def _convert_1d(values, units, axis): if not hasattr(axis, 'freq'): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (compat.string_types, datetime, - Period, pydt.date, pydt.time) + Period, pydt.date, pydt.time, np.datetime64) if (isinstance(values, valid_types) or is_integer(values) or is_float(values)): return get_datevalue(values, axis.freq) @@ -263,7 +263,7 @@ def get_datevalue(date, freq): if isinstance(date, Period): return date.asfreq(freq).ordinal elif isinstance(date, (compat.string_types, datetime, - pydt.date, pydt.time)): + pydt.date, pydt.time, np.datetime64)): return Period(date, freq).ordinal elif (is_integer(date) or is_float(date) or (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 3094d7d0ab1c6..8b03d6ddde4ec 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -10,6 +10,7 @@ import numpy as np from pandas.util._decorators import cache_readonly +import pandas.core.common as com from pandas.core.base import PandasObject from pandas.core.config import get_option from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike @@ -21,7 +22,6 @@ is_iterator) from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame -from pandas.core.common import AbstractMethodError, _try_sort, _any_not_none from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex @@ -225,7 +225,7 @@ def _iter_data(self, data=None, keep_index=False, fillna=None): # TODO: unused? # if self.sort_columns: - # columns = _try_sort(data.columns) + # columns = com._try_sort(data.columns) # else: # columns = data.columns @@ -367,7 +367,7 @@ def _compute_plot_data(self): self.data = numeric_data def _make_plot(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _add_table(self): if self.table is False: @@ -609,7 +609,7 @@ def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): def _get_index_name(self): if isinstance(self.data.index, MultiIndex): name = self.data.index.names - if _any_not_none(*name): + if com._any_not_none(*name): name = ','.join(pprint_thing(x) for x in name) else: name = None @@ -957,7 +957,7 @@ def _make_plot(self): it = self._iter_data() stacking_id = self._get_stacking_id() - is_errorbar = _any_not_none(*self.errors.values()) + is_errorbar = com._any_not_none(*self.errors.values()) colors = self._get_colors() for i, (label, y) in enumerate(it): @@ -2182,7 +2182,7 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, layout=layout) _axes = _flatten(axes) - for i, col in enumerate(_try_sort(data.columns)): + for i, col in enumerate(com._try_sort(data.columns)): ax = _axes[i] ax.hist(data[col].dropna().values, bins=bins, **kwds) ax.set_title(col) diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py index 887202e22b4e0..426b29a8840f4 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_style.py @@ -44,12 +44,12 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', if isinstance(colors, compat.string_types): colors = list(colors) elif color_type == 'random': - from pandas.core.common import _random_state + import pandas.core.common as com def random_color(column): """ Returns a random color represented as a list of length 3""" # GH17525 use common._random_state to avoid resetting the seed - rs = _random_state(column) + rs = com._random_state(column) return rs.rand(3).tolist() colors = lmap(random_color, lrange(num_colors)) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 8962eb90be828..c20767b09178c 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +import sys from warnings import catch_warnings import pytest @@ -122,7 +122,7 @@ def test_api(self): class TestApi(Base): - allowed = ['types'] + allowed = ['types', 'extensions'] def test_api(self): @@ -249,3 +249,18 @@ def test_deprecation_cdaterange(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): cdate_range('2017-01-01', '2017-12-31') + + +class TestCategoricalMove(object): + + def test_categorical_move(self): + # May have been cached by another import, e.g. pickle tests. + sys.modules.pop("pandas.core.categorical", None) + + with tm.assert_produces_warning(FutureWarning): + from pandas.core.categorical import Categorical # noqa + + sys.modules.pop("pandas.core.categorical", None) + + with tm.assert_produces_warning(FutureWarning): + from pandas.core.categorical import CategoricalDtype # noqa diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 1cbcf3f9109a4..7e6430accc546 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -30,7 +30,7 @@ class TestTypes(Base): 'is_period_dtype', 'is_interval', 'is_interval_dtype', 'is_re', 'is_re_compilable', 'is_dict_like', 'is_iterator', 'is_file_like', - 'is_list_like', 'is_hashable', + 'is_list_like', 'is_hashable', 'is_array_like', 'is_named_tuple', 'pandas_dtype', 'union_categoricals', 'infer_dtype'] deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] diff --git a/pandas/tests/categorical/test_api.py b/pandas/tests/categorical/test_api.py index 12db4a9bea28b..ad5b78b36438b 100644 --- a/pandas/tests/categorical/test_api.py +++ b/pandas/tests/categorical/test_api.py @@ -7,7 +7,7 @@ import pandas.util.testing as tm from pandas import Categorical, CategoricalIndex, Index, Series, DataFrame -from pandas.core.categorical import _recode_for_categories +from pandas.core.arrays.categorical import _recode_for_categories from pandas.tests.categorical.common import TestCategorical @@ -400,15 +400,6 @@ def test_remove_unused_categories(self): out = cat.remove_unused_categories() assert out.get_values().tolist() == val.tolist() - def test_deprecated_labels(self): - # TODO: labels is deprecated and should be removed in 0.18 or 2017, - # whatever is earlier - cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - exp = cat.codes - with tm.assert_produces_warning(FutureWarning): - res = cat.labels - tm.assert_numpy_array_equal(res, exp) - class TestCategoricalAPIWithFactor(TestCategorical): diff --git a/pandas/tests/categorical/test_constructors.py b/pandas/tests/categorical/test_constructors.py index abea7e9a0e0b4..b29d75bed5c6f 100644 --- a/pandas/tests/categorical/test_constructors.py +++ b/pandas/tests/categorical/test_constructors.py @@ -76,9 +76,7 @@ def test_constructor_unsortable(self): def test_constructor_interval(self): result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True) - ii = IntervalIndex.from_intervals([Interval(1, 2), - Interval(2, 3), - Interval(3, 6)]) + ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) exp = Categorical(ii, ordered=True) tm.assert_categorical_equal(result, exp) tm.assert_index_equal(result.categories, ii) diff --git a/pandas/tests/categorical/test_operators.py b/pandas/tests/categorical/test_operators.py index 09a0607b67a88..fa8bb817616e4 100644 --- a/pandas/tests/categorical/test_operators.py +++ b/pandas/tests/categorical/test_operators.py @@ -250,6 +250,13 @@ def test_compare_different_lengths(self): with tm.assert_raises_regex(TypeError, msg): c1 == c2 + def test_compare_unordered_different_order(self): + # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- + # 349290078 + a = pd.Categorical(['a'], categories=['a', 'b']) + b = pd.Categorical(['b'], categories=['b', 'a']) + assert not a.equals(b) + def test_numeric_like_ops(self): df = DataFrame({'value': np.random.randint(0, 10000, 100)}) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 6a3715fd66159..d800a7b92b559 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -433,7 +433,7 @@ def test_hash_vs_equality(self): assert dtype2 == dtype assert dtype3 == dtype assert dtype is dtype2 - assert dtype2 is dtype + assert dtype2 is dtype3 assert dtype3 is dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) @@ -451,14 +451,19 @@ def test_hash_vs_equality(self): assert hash(dtype2) == hash(dtype2) assert hash(dtype2) == hash(dtype3) - def test_construction(self): - with pytest.raises(ValueError): - IntervalDtype('xx') + @pytest.mark.parametrize('subtype', [ + 'interval[int64]', 'Interval[int64]', 'int64', np.dtype('int64')]) + def test_construction(self, subtype): + i = IntervalDtype(subtype) + assert i.subtype == np.dtype('int64') + assert is_interval_dtype(i) - for s in ['interval[int64]', 'Interval[int64]', 'int64']: - i = IntervalDtype(s) - assert i.subtype == np.dtype('int64') - assert is_interval_dtype(i) + @pytest.mark.parametrize('subtype', [None, 'interval', 'Interval']) + def test_construction_generic(self, subtype): + # generic + i = IntervalDtype(subtype) + assert i.subtype is None + assert is_interval_dtype(i) @pytest.mark.parametrize('subtype', [ CategoricalDtype(list('abc'), False), @@ -471,17 +476,27 @@ def test_construction_not_supported(self, subtype): with tm.assert_raises_regex(TypeError, msg): IntervalDtype(subtype) - def test_construction_generic(self): - # generic - i = IntervalDtype('interval') - assert i.subtype == '' - assert is_interval_dtype(i) - assert str(i) == 'interval[]' + def test_construction_errors(self): + msg = 'could not construct IntervalDtype' + with tm.assert_raises_regex(ValueError, msg): + IntervalDtype('xx') - i = IntervalDtype() - assert i.subtype is None - assert is_interval_dtype(i) - assert str(i) == 'interval' + def test_construction_from_string(self): + result = IntervalDtype('interval[int64]') + assert is_dtype_equal(self.dtype, result) + result = IntervalDtype.construct_from_string('interval[int64]') + assert is_dtype_equal(self.dtype, result) + + @pytest.mark.parametrize('string', [ + 'foo', 'interval[foo]', 'foo[int64]', 0, 3.14, ('a', 'b'), None]) + def test_construction_from_string_errors(self, string): + if isinstance(string, string_types): + error, msg = ValueError, 'could not construct IntervalDtype' + else: + error, msg = TypeError, 'a string needs to be passed, got type' + + with tm.assert_raises_regex(error, msg): + IntervalDtype.construct_from_string(string) def test_subclass(self): a = IntervalDtype('interval[int64]') @@ -506,36 +521,51 @@ def test_is_dtype(self): assert not IntervalDtype.is_dtype(np.int64) assert not IntervalDtype.is_dtype(np.float64) - def test_identity(self): - assert (IntervalDtype('interval[int64]') == - IntervalDtype('interval[int64]')) - def test_coerce_to_dtype(self): assert (_coerce_to_dtype('interval[int64]') == IntervalDtype('interval[int64]')) - def test_construction_from_string(self): - result = IntervalDtype('interval[int64]') - assert is_dtype_equal(self.dtype, result) - result = IntervalDtype.construct_from_string('interval[int64]') - assert is_dtype_equal(self.dtype, result) - with pytest.raises(TypeError): - IntervalDtype.construct_from_string('foo') - with pytest.raises(TypeError): - IntervalDtype.construct_from_string('interval[foo]') - with pytest.raises(TypeError): - IntervalDtype.construct_from_string('foo[int64]') - def test_equality(self): assert is_dtype_equal(self.dtype, 'interval[int64]') assert is_dtype_equal(self.dtype, IntervalDtype('int64')) - assert is_dtype_equal(self.dtype, IntervalDtype('int64')) assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64')) assert not is_dtype_equal(self.dtype, 'int64') assert not is_dtype_equal(IntervalDtype('int64'), IntervalDtype('float64')) + # invalid subtype comparisons do not raise when directly compared + dtype1 = IntervalDtype('float64') + dtype2 = IntervalDtype('datetime64[ns, US/Eastern]') + assert dtype1 != dtype2 + assert dtype2 != dtype1 + + @pytest.mark.parametrize('subtype', [ + None, 'interval', 'Interval', 'int64', 'uint64', 'float64', + 'complex128', 'datetime64', 'timedelta64', PeriodDtype('Q')]) + def test_equality_generic(self, subtype): + # GH 18980 + dtype = IntervalDtype(subtype) + assert is_dtype_equal(dtype, 'interval') + assert is_dtype_equal(dtype, IntervalDtype()) + + @pytest.mark.parametrize('subtype', [ + 'int64', 'uint64', 'float64', 'complex128', 'datetime64', + 'timedelta64', PeriodDtype('Q')]) + def test_name_repr(self, subtype): + # GH 18980 + dtype = IntervalDtype(subtype) + expected = 'interval[{subtype}]'.format(subtype=subtype) + assert str(dtype) == expected + assert dtype.name == 'interval' + + @pytest.mark.parametrize('subtype', [None, 'interval', 'Interval']) + def test_name_repr_generic(self, subtype): + # GH 18980 + dtype = IntervalDtype(subtype) + assert str(dtype) == 'interval' + assert dtype.name == 'interval' + def test_basic(self): assert is_interval_dtype(self.dtype) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index bd365f9c3281f..58cb182e7d403 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -45,6 +45,8 @@ def test_abc_types(self): gt.ABCDateOffset) assert not isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCDateOffset) + assert isinstance(pd.Interval(0, 1.5), gt.ABCInterval) + assert not isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCInterval) def test_setattr_warnings(): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 33c570a814e7d..b4f5d67530fbd 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -78,6 +78,23 @@ def test_is_list_like_fails(ll): assert not inference.is_list_like(ll) +def test_is_array_like(): + assert inference.is_array_like(Series([])) + assert inference.is_array_like(Series([1, 2])) + assert inference.is_array_like(np.array(["a", "b"])) + assert inference.is_array_like(Index(["2016-01-01"])) + + class DtypeList(list): + dtype = "special" + + assert inference.is_array_like(DtypeList()) + + assert not inference.is_array_like([1, 2, 3]) + assert not inference.is_array_like(tuple()) + assert not inference.is_array_like("foo") + assert not inference.is_array_like(123) + + @pytest.mark.parametrize('inner', [ [], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), Series([]), Series(['a']).str, (x for x in range(5)) diff --git a/pandas/tests/dtypes/test_io.py b/pandas/tests/dtypes/test_io.py deleted file mode 100644 index 06b61371c9a0b..0000000000000 --- a/pandas/tests/dtypes/test_io.py +++ /dev/null @@ -1,73 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -import pandas._libs.lib as lib -import pandas.util.testing as tm - -from pandas.compat import long, u - - -class TestParseSQL(object): - - def test_convert_sql_column_floats(self): - arr = np.array([1.5, None, 3, 4.2], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - tm.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_strings(self): - arr = np.array(['1.5', None, '3', '4.2'], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) - tm.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_unicode(self): - arr = np.array([u('1.5'), None, u('3'), u('4.2')], - dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], - dtype=object) - tm.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_ints(self): - arr = np.array([1, 2, 3, 4], dtype='O') - arr2 = np.array([1, 2, 3, 4], dtype='i4').astype('O') - result = lib.convert_sql_column(arr) - result2 = lib.convert_sql_column(arr2) - expected = np.array([1, 2, 3, 4], dtype='i8') - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result2, expected) - - arr = np.array([1, 2, 3, None, 4], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - tm.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_longs(self): - arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, 4], dtype='i8') - tm.assert_numpy_array_equal(result, expected) - - arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - tm.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_bools(self): - arr = np.array([True, False, True, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, True, False], dtype=bool) - tm.assert_numpy_array_equal(result, expected) - - arr = np.array([True, False, None, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, np.nan, False], dtype=object) - tm.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_decimals(self): - from decimal import Decimal - arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 65dd166e1f6a8..e0fc6c470fe57 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -428,6 +428,16 @@ def test_applymap(self): result = frame.applymap(func) tm.assert_frame_equal(result, frame) + def test_applymap_box_timestamps(self): + # #2689, #2627 + ser = pd.Series(date_range('1/1/2000', periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + pd.DataFrame(ser).applymap(func) + def test_applymap_box(self): # ufunc will not be boxed. Same test cases as the test_map_box df = pd.DataFrame({'a': [pd.Timestamp('2011-01-01'), diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py new file mode 100644 index 0000000000000..3f4e3877a276a --- /dev/null +++ b/pandas/tests/frame/test_arithmetic.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + + +class TestPeriodFrameArithmetic(object): + + def test_ops_frame_period(self): + # GH 13043 + df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), + pd.Period('2015-02', freq='M')], + 'B': [pd.Period('2014-01', freq='M'), + pd.Period('2014-02', freq='M')]}) + assert df['A'].dtype == object + assert df['B'].dtype == object + + p = pd.Period('2015-03', freq='M') + # dtype will be object because of original dtype + exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), + 'B': np.array([14, 13], dtype=object)}) + tm.assert_frame_equal(p - df, exp) + tm.assert_frame_equal(df - p, -exp) + + df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), + pd.Period('2015-06', freq='M')], + 'B': [pd.Period('2015-05', freq='M'), + pd.Period('2015-06', freq='M')]}) + assert df2['A'].dtype == object + assert df2['B'].dtype == object + + exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), + 'B': np.array([16, 16], dtype=object)}) + tm.assert_frame_equal(df2 - df, exp) + tm.assert_frame_equal(df - df2, -exp) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 343e235fb741c..28e82f7585850 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -41,8 +41,8 @@ def test_drop_names(self): assert obj.columns.name == 'second' assert list(df.columns) == ['d', 'e', 'f'] - pytest.raises(ValueError, df.drop, ['g']) - pytest.raises(ValueError, df.drop, ['g'], 1) + pytest.raises(KeyError, df.drop, ['g']) + pytest.raises(KeyError, df.drop, ['g'], 1) # errors = 'ignore' dropped = df.drop(['g'], errors='ignore') @@ -87,10 +87,10 @@ def test_drop(self): assert_frame_equal(simple.drop( [0, 3], axis='index'), simple.loc[[1, 2], :]) - pytest.raises(ValueError, simple.drop, 5) - pytest.raises(ValueError, simple.drop, 'C', 1) - pytest.raises(ValueError, simple.drop, [1, 5]) - pytest.raises(ValueError, simple.drop, ['A', 'C'], 1) + pytest.raises(KeyError, simple.drop, 5) + pytest.raises(KeyError, simple.drop, 'C', 1) + pytest.raises(KeyError, simple.drop, [1, 5]) + pytest.raises(KeyError, simple.drop, ['A', 'C'], 1) # errors = 'ignore' assert_frame_equal(simple.drop(5, errors='ignore'), simple) @@ -1128,3 +1128,26 @@ def test_reindex_multi(self): expected = df.reindex([0, 1]).reindex(columns=['a', 'b']) assert_frame_equal(result, expected) + + data = [[1, 2, 3], [1, 2, 3]] + + @pytest.mark.parametrize('actual', [ + DataFrame(data=data, index=['a', 'a']), + DataFrame(data=data, index=['a', 'b']), + DataFrame(data=data, index=['a', 'b']).set_index([0, 1]), + DataFrame(data=data, index=['a', 'a']).set_index([0, 1]) + ]) + def test_raise_on_drop_duplicate_index(self, actual): + + # issue 19186 + level = 0 if isinstance(actual.index, MultiIndex) else None + with pytest.raises(KeyError): + actual.drop('c', level=level, axis=0) + with pytest.raises(KeyError): + actual.T.drop('c', level=level, axis=1) + expected_no_err = actual.drop('c', axis=0, level=level, + errors='ignore') + assert_frame_equal(expected_no_err, actual) + expected_no_err = actual.T.drop('c', axis=1, level=level, + errors='ignore') + assert_frame_equal(expected_no_err.T, actual) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b7d3a60ecf6e4..8b57e96e6fa06 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2092,3 +2092,14 @@ def test_frame_timeseries_to_records(self): result['index'].dtype == 'M8[ns]' result = df.to_records(index=False) + + def test_frame_timeseries_column(self): + # GH19157 + dr = date_range(start='20130101T10:00:00', periods=3, freq='T', + tz='US/Eastern') + result = DataFrame(dr, columns=['timestamps']) + expected = DataFrame({'timestamps': [ + Timestamp('20130101T10:00:00', tz='US/Eastern'), + Timestamp('20130101T10:01:00', tz='US/Eastern'), + Timestamp('20130101T10:02:00', tz='US/Eastern')]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 21c028e634bc0..38bdecc9eb88f 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -640,6 +640,87 @@ def test_astype_categoricaldtype_class_raises(self, cls): with tm.assert_raises_regex(TypeError, xpr): df['A'].astype(cls) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_from_datetimelike_to_objectt(self, dtype, unit): + # tests astype to object dtype + # gh-19223 / gh-12425 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(object) + assert (result.dtypes == object).all() + + if dtype.startswith('M8'): + assert result.iloc[0, 0] == pd.to_datetime(1, unit=unit) + else: + assert result.iloc[0, 0] == pd.to_timedelta(1, unit=unit) + + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units from numeric origination + # gh-19223 / gh-12425 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([[1, 2, 3]], dtype=arr_dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_to_datetime_unit(self, unit): + # tests all units from datetime origination + # gh-19223 + dtype = "M8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ['ns']) + def test_astype_to_timedelta_unit_ns(self, unit): + # preserver the timedelta conversion + # gh-19223 + dtype = "m8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ['us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_to_timedelta_unit(self, unit): + # coerce to float + # gh-19223 + dtype = "m8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(df.values.astype(dtype).astype(float)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_to_incorrect_datetimelike(self, unit): + # trying to astype a m to a M, or vice-versa + # gh-19224 + dtype = "M8[{}]".format(unit) + other = "m8[{}]".format(unit) + + df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) + with pytest.raises(TypeError): + df.astype(other) + + df = DataFrame(np.array([[1, 2, 3]], dtype=other)) + with pytest.raises(TypeError): + df.astype(dtype) + def test_timedeltas(self): df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3, freq='D')), diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 26e2b801f6460..9acdf2f17d86a 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -193,9 +193,10 @@ def test_delitem_multiindex(self): with pytest.raises(KeyError): del df[('A',)] - # xref: https://github.com/pandas-dev/pandas/issues/2770 - # the 'A' is STILL in the columns! - assert 'A' in df.columns + # behavior of dropped/deleted MultiIndex levels changed from + # GH 2770 to GH 19027: MultiIndex no longer '.__contains__' + # levels which are dropped/deleted + assert 'A' not in df.columns with pytest.raises(KeyError): del df['A'] diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index fd1eb23643c2b..0bc4a7df6a55b 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -28,6 +28,53 @@ _check_mixed_int) +class TestDataFrameArithmetic(object): + + @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') + def test_frame_sub_datetime64_not_ns(self): + df = pd.DataFrame(date_range('20130101', periods=3)) + dt64 = np.datetime64('2013-01-01') + assert dt64.dtype == 'datetime64[D]' + res = df - dt64 + expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1), + pd.Timedelta(days=2)]) + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_radd_str_invalid(self, dtype, data): + df = DataFrame(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + df + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_int(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([2, 3, 4], dtype=dtype) + result = 1 + df + assert_frame_equal(result, expected) + result = df + 1 + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_nan(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + result = np.nan + df + assert_frame_equal(result, expected) + result = df + np.nan + assert_frame_equal(result, expected) + + def test_frame_radd_str(self): + df = pd.DataFrame(['x', np.nan, 'x']) + assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) + assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) + + class TestDataFrameOperators(TestData): def test_operators(self): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 5ff4f58774322..7907486c7c98d 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -560,6 +560,74 @@ def test_unstack_dtypes(self): assert left.shape == (3, 2) tm.assert_frame_equal(left, right) + def test_unstack_unused_levels(self): + # GH 17845: unused labels in index make unstack() cast int to float + idx = pd.MultiIndex.from_product([['a'], ['A', 'B', 'C', 'D']])[:-1] + df = pd.DataFrame([[1, 0]] * 3, index=idx) + + result = df.unstack() + exp_col = pd.MultiIndex.from_product([[0, 1], ['A', 'B', 'C']]) + expected = pd.DataFrame([[1, 1, 1, 0, 0, 0]], index=['a'], + columns=exp_col) + tm.assert_frame_equal(result, expected) + assert((result.columns.levels[1] == idx.levels[1]).all()) + + # Unused items on both levels + levels = [[0, 1, 7], [0, 1, 2, 3]] + labels = [[0, 0, 1, 1], [0, 2, 0, 2]] + idx = pd.MultiIndex(levels, labels) + block = np.arange(4).reshape(2, 2) + df = pd.DataFrame(np.concatenate([block, block + 4]), index=idx) + result = df.unstack() + expected = pd.DataFrame(np.concatenate([block * 2, block * 2 + 1], + axis=1), + columns=idx) + tm.assert_frame_equal(result, expected) + assert((result.columns.levels[1] == idx.levels[1]).all()) + + # With mixed dtype and NaN + levels = [['a', 2, 'c'], [1, 3, 5, 7]] + labels = [[0, -1, 1, 1], [0, 2, -1, 2]] + idx = pd.MultiIndex(levels, labels) + data = np.arange(8) + df = pd.DataFrame(data.reshape(4, 2), index=idx) + + cases = ((0, [13, 16, 6, 9, 2, 5, 8, 11], + [np.nan, 'a', 2], [np.nan, 5, 1]), + (1, [8, 11, 1, 4, 12, 15, 13, 16], + [np.nan, 5, 1], [np.nan, 'a', 2])) + for level, idces, col_level, idx_level in cases: + result = df.unstack(level=level) + exp_data = np.zeros(18) * np.nan + exp_data[idces] = data + cols = pd.MultiIndex.from_product([[0, 1], col_level]) + expected = pd.DataFrame(exp_data.reshape(3, 6), + index=idx_level, columns=cols) + # Broken (GH 18455): + # tm.assert_frame_equal(result, expected) + diff = result - expected + assert(diff.sum().sum() == 0) + assert((diff + 1).sum().sum() == 8) + + assert((result.columns.levels[1] == idx.levels[level]).all()) + + @pytest.mark.parametrize("cols", [['A', 'C'], slice(None)]) + def test_unstack_unused_level(self, cols): + # GH 18562 : unused labels on the unstacked level + df = pd.DataFrame([[2010, 'a', 'I'], + [2011, 'b', 'II']], + columns=['A', 'B', 'C']) + + ind = df.set_index(['A', 'B', 'C'], drop=False) + selection = ind.loc[(slice(None), slice(None), 'I'), cols] + result = selection.unstack() + + expected = ind.iloc[[0]][cols] + expected.columns = MultiIndex.from_product([expected.columns, ['I']], + names=[None, 'C']) + expected.index = expected.index.droplevel('C') + tm.assert_frame_equal(result, expected) + def test_unstack_nan_index(self): # GH7466 cast = lambda val: '{0:1}'.format('' if val != val else val) nan = np.nan diff --git a/pandas/tests/frame/test_sort_values_level_as_str.py b/pandas/tests/frame/test_sort_values_level_as_str.py new file mode 100644 index 0000000000000..3b4eadfce81cd --- /dev/null +++ b/pandas/tests/frame/test_sort_values_level_as_str.py @@ -0,0 +1,126 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index +from pandas.errors import PerformanceWarning +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal + + +@pytest.fixture +def df_none(): + return DataFrame({ + 'outer': ['a', 'a', 'a', 'b', 'b', 'b'], + 'inner': [1, 2, 2, 2, 1, 1], + 'A': np.arange(6, 0, -1), + ('B', 5): ['one', 'one', 'two', 'two', 'one', 'one']}) + + +@pytest.fixture(params=[ + ['outer'], + ['outer', 'inner'] +]) +def df_idx(request, df_none): + levels = request.param + return df_none.set_index(levels) + + +@pytest.fixture(params=[ + 'inner', # index level + ['outer'], # list of index level + 'A', # column + [('B', 5)], # list of column + ['inner', 'outer'], # two index levels + [('B', 5), 'outer'], # index level and column + ['A', ('B', 5)], # Two columns + ['inner', 'outer'] # two index levels and column +]) +def sort_names(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def ascending(request): + return request.param + + +def test_sort_index_level_and_column_label( + df_none, df_idx, sort_names, ascending): + + # GH 14353 + + # Get index levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on columns and the setting index + expected = df_none.sort_values(by=sort_names, + ascending=ascending, + axis=0).set_index(levels) + + # Compute result sorting on mix on columns and index levels + result = df_idx.sort_values(by=sort_names, + ascending=ascending, + axis=0) + + assert_frame_equal(result, expected) + + +def test_sort_column_level_and_index_label( + df_none, df_idx, sort_names, ascending): + + # GH 14353 + + # Get levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on axis=0, setting index levels, and then + # transposing. For some cases this will result in a frame with + # multiple column levels + expected = df_none.sort_values(by=sort_names, + ascending=ascending, + axis=0).set_index(levels).T + + # Compute result by transposing and sorting on axis=1. + result = df_idx.T.sort_values(by=sort_names, + ascending=ascending, + axis=1) + + if len(levels) > 1: + # Accessing multi-level columns that are not lexsorted raises a + # performance warning + with tm.assert_produces_warning(PerformanceWarning, + check_stacklevel=False): + assert_frame_equal(result, expected) + else: + assert_frame_equal(result, expected) + + +def test_sort_values_column_index_level_precedence(): + # GH 14353, when a string passed as the `by` parameter + # matches a column and an index level the column takes + # precedence + + # Construct DataFrame with index and column named 'idx' + idx = Index(np.arange(1, 7), name='idx') + df = DataFrame({'A': np.arange(11, 17), + 'idx': np.arange(6, 0, -1)}, + index=idx) + + # Sorting by 'idx' should sort by the idx column and raise a + # FutureWarning + with tm.assert_produces_warning(FutureWarning): + result = df.sort_values(by='idx') + + # This should be equivalent to sorting by the 'idx' index level in + # descending order + expected = df.sort_index(level='idx', ascending=False) + assert_frame_equal(result, expected) + + # Perform same test with MultiIndex + df_multi = df.set_index('A', append=True) + + with tm.assert_produces_warning(FutureWarning): + result = df_multi.sort_values(by='idx') + + expected = df_multi.sort_index(level='idx', ascending=False) + assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index a98439797dc28..5bd239f8a3034 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -455,26 +455,26 @@ def test_sort_index_duplicates(self): df = DataFrame([lrange(5, 9), lrange(4)], columns=['a', 'a', 'b', 'b']) - with tm.assert_raises_regex(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with tm.assert_raises_regex(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): df.sort_values(by='a') - with tm.assert_raises_regex(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by=['a']) - with tm.assert_raises_regex(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): df.sort_values(by=['a']) - with tm.assert_raises_regex(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): # multi-column 'by' is separate codepath df.sort_index(by=['a', 'b']) - with tm.assert_raises_regex(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): # multi-column 'by' is separate codepath df.sort_values(by=['a', 'b']) @@ -482,11 +482,11 @@ def test_sort_index_duplicates(self): # GH4370 df = DataFrame(np.random.randn(4, 2), columns=MultiIndex.from_tuples([('a', 0), ('a', 1)])) - with tm.assert_raises_regex(ValueError, 'levels'): + with tm.assert_raises_regex(ValueError, 'level'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with tm.assert_raises_regex(ValueError, 'levels'): + with tm.assert_raises_regex(ValueError, 'level'): df.sort_values(by='a') # convert tuples to a list of tuples diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 52c591e4dcbb0..c52b512c2930a 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -5,7 +5,7 @@ from warnings import catch_warnings import numpy as np -from pandas import DataFrame, Series, MultiIndex, Panel +from pandas import DataFrame, Series, MultiIndex, Panel, Index import pandas as pd import pandas.util.testing as tm @@ -247,3 +247,270 @@ def test_subclass_sparse_transpose(self): [2, 5], [3, 6]]) tm.assert_sp_frame_equal(ossdf.T, essdf) + + def test_subclass_stack(self): + # GH 15564 + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.stack() + exp = tm.SubclassedSeries( + [1, 2, 3, 4, 5, 6, 7, 8, 9], + index=[list('aaabbbccc'), list('XYZXYZXYZ')]) + + tm.assert_series_equal(res, exp) + + def test_subclass_stack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 12], + [11, 13], + [20, 22], + [21, 23], + [30, 32], + [31, 33], + [40, 42], + [41, 43]], + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), + names=['aaa', 'ccc', 'yyy']), + columns=Index(['W', 'X'], name='www')) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack('yyy') + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame([ + [10, 11], + [12, 13], + [20, 21], + [22, 23], + [30, 31], + [32, 33], + [40, 41], + [42, 43]], + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), + names=['aaa', 'ccc', 'www']), + columns=Index(['y', 'z'], name='yyy')) + + res = df.stack('www') + tm.assert_frame_equal(res, exp) + + def test_subclass_stack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame([ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0]], + index=MultiIndex.from_tuples( + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 12.0], + [11, 13.0], + [20, 22.0], + [21, 23.0], + [30, 32.0], + [31, 33.0], + [40, 42.0], + [41, 43.0]], + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), + names=['aaa', 'ccc', 'yyy']), + columns=Index(['W', 'X'], name='www')) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack('yyy') + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame([ + [10.0, 11.0], + [12.0, 13.0], + [20.0, 21.0], + [22.0, 23.0], + [30.0, 31.0], + [32.0, 33.0], + [40.0, 41.0], + [42.0, 43.0]], + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), + names=['aaa', 'ccc', 'www']), + columns=Index(['y', 'z'], name='yyy')) + + res = df.stack('www') + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack(self): + # GH 15564 + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.unstack() + exp = tm.SubclassedSeries( + [1, 4, 7, 2, 5, 8, 3, 6, 9], + index=[list('XXXYYYZZZ'), list('abcabcabc')]) + + tm.assert_series_equal(res, exp) + + def test_subclass_unstack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 20, 11, 21, 12, 22, 13, 23], + [30, 40, 31, 41, 32, 42, 33, 43]], + index=Index(['A', 'B'], name='aaa'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), + names=['www', 'yyy', 'ccc'])) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack('ccc') + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame([ + [10, 30, 11, 31, 12, 32, 13, 33], + [20, 40, 21, 41, 22, 42, 23, 43]], + index=Index(['c', 'd'], name='ccc'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), + names=['www', 'yyy', 'aaa'])) + + res = df.unstack('aaa') + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame([ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0]], + index=MultiIndex.from_tuples( + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0], + [30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0]], + index=Index(['A', 'B'], name='aaa'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), + names=['www', 'yyy', 'ccc'])) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack('ccc') + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame([ + [10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0], + [20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0]], + index=Index(['c', 'd'], name='ccc'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), + names=['www', 'yyy', 'aaa'])) + + res = df.unstack('aaa') + tm.assert_frame_equal(res, exp) + + def test_subclass_pivot(self): + # GH 15564 + df = tm.SubclassedDataFrame({ + 'index': ['A', 'B', 'C', 'C', 'B', 'A'], + 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], + 'values': [1., 2., 3., 3., 2., 1.]}) + + pivoted = df.pivot( + index='index', columns='columns', values='values') + + expected = tm.SubclassedDataFrame({ + 'One': {'A': 1., 'B': 2., 'C': 3.}, + 'Two': {'A': 1., 'B': 2., 'C': 3.}}) + + expected.index.name, expected.columns.name = 'index', 'columns' + + tm.assert_frame_equal(pivoted, expected) + + def test_subclassed_melt(self): + # GH 15564 + cheese = tm.SubclassedDataFrame({ + 'first': ['John', 'Mary'], + 'last': ['Doe', 'Bo'], + 'height': [5.5, 6.0], + 'weight': [130, 150]}) + + melted = pd.melt(cheese, id_vars=['first', 'last']) + + expected = tm.SubclassedDataFrame([ + ['John', 'Doe', 'height', 5.5], + ['Mary', 'Bo', 'height', 6.0], + ['John', 'Doe', 'weight', 130], + ['Mary', 'Bo', 'weight', 150]], + columns=['first', 'last', 'variable', 'value']) + + tm.assert_frame_equal(melted, expected) + + def test_subclassed_wide_to_long(self): + # GH 9762 + + np.random.seed(123) + x = np.random.randn(3) + df = tm.SubclassedDataFrame({ + "A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: .7}, + "B1980": {0: 3.2, 1: 1.3, 2: .1}, + "X": dict(zip(range(3), x))}) + + df["id"] = df.index + exp_data = {"X": x.tolist() + x.tolist(), + "A": ['a', 'b', 'c', 'd', 'e', 'f'], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2]} + expected = tm.SubclassedDataFrame(exp_data) + expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] + long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year") + + tm.assert_frame_equal(long_frame, expected) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 0ca25735fc03f..a3ba34ae92283 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -9,7 +9,7 @@ import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) -from pandas.core.common import _all_none +import pandas.core.common as com from pandas.errors import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) @@ -21,7 +21,6 @@ ensure_clean, makeCustomDataframe as mkdf) import pandas.util.testing as tm -import pandas.util._test_decorators as td from pandas.tests.frame.common import TestData @@ -572,7 +571,7 @@ def _make_frame(names=None): df = _make_frame(True) df.to_csv(path, index=False) result = read_csv(path, header=[0, 1]) - assert _all_none(*result.columns.names) + assert com._all_none(*result.columns.names) result.columns.names = df.columns.names assert_frame_equal(df, result) @@ -920,73 +919,29 @@ def test_to_csv_path_is_none(self): recons = pd.read_csv(StringIO(csv_str), index_col=0) assert_frame_equal(self.frame, recons) - def test_to_csv_compression_gzip(self): - # GH7615 - # use the compression kw in to_csv - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean() as filename: - - df.to_csv(filename, compression="gzip") - - # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="gzip", index_col=0) - assert_frame_equal(df, rs) - - # explicitly make sure file is gziped - import gzip - f = gzip.open(filename, 'rb') - text = f.read().decode('utf8') - f.close() - for col in df.columns: - assert col in text + def test_to_csv_compression(self, compression_no_zip): - def test_to_csv_compression_bz2(self): - # GH7615 - # use the compression kw in to_csv df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean() as filename: - df.to_csv(filename, compression="bz2") + df.to_csv(filename, compression=compression_no_zip) # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="bz2", index_col=0) + rs = read_csv(filename, compression=compression_no_zip, + index_col=0) assert_frame_equal(df, rs) - # explicitly make sure file is bz2ed - import bz2 - f = bz2.BZ2File(filename, 'rb') - text = f.read().decode('utf8') - f.close() - for col in df.columns: - assert col in text - - @td.skip_if_no_lzma - def test_to_csv_compression_xz(self): - # GH11852 - # use the compression kw in to_csv - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean() as filename: - - df.to_csv(filename, compression="xz") - - # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="xz", index_col=0) - assert_frame_equal(df, rs) + # explicitly make sure file is compressed + with tm.decompress_file(filename, compression_no_zip) as fh: + text = fh.read().decode('utf8') + for col in df.columns: + assert col in text - # explicitly make sure file is xzipped - lzma = compat.import_lzma() - f = lzma.open(filename, 'rb') - assert_frame_equal(df, read_csv(f, index_col=0)) - f.close() + with tm.decompress_file(filename, compression_no_zip) as fh: + assert_frame_equal(df, read_csv(fh, index_col=0)) def test_to_csv_compression_value_error(self): # GH7615 diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py index 456cb48020500..8b133e654a869 100644 --- a/pandas/tests/generic/test_label_or_level_utils.py +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -46,7 +46,7 @@ def df_duplabels(df): @pytest.fixture def panel(): - with tm.assert_produces_warning(DeprecationWarning, + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): return pd.Panel() @@ -175,8 +175,7 @@ def test_check_label_or_level_ambiguity_df(df_ambig, axis): # df_ambig has both an on-axis level and off-axis label named L1 # Therefore L1 is ambiguous with tm.assert_produces_warning(FutureWarning, - clear=True, - check_stacklevel=False) as w: + clear=True) as w: assert df_ambig._check_label_or_level_ambiguity('L1', axis=axis) warning_msg = w[0].message.args[0] @@ -245,7 +244,8 @@ def assert_label_values(frame, labels, axis): else: expected = frame.loc[label]._values - result = frame._get_label_or_level_values(label, axis=axis) + result = frame._get_label_or_level_values(label, axis=axis, + stacklevel=2) assert array_equivalent(expected, result) @@ -288,8 +288,7 @@ def test_get_label_or_level_values_df_ambig(df_ambig, axis): # df has both an on-axis level and off-axis label named L1 # Therefore L1 is ambiguous but will default to label - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): assert_label_values(df_ambig, ['L1'], axis=axis) # df has an on-axis level named L2 and it is not ambiguous diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index caf2365a54ec8..7cc6c2fa7b88c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -10,285 +10,298 @@ import pandas as pd from pandas import concat, DataFrame, Index, MultiIndex, Series -from pandas.core.groupby import SpecificationError +from pandas.core.groupby import Grouping, SpecificationError from pandas.compat import OrderedDict import pandas.util.testing as tm -class TestGroupByAggregate(object): - - def setup_method(self, method): - self.ts = tm.makeTimeSeries() - - self.seriesd = tm.getSeriesData() - self.tsd = tm.getTimeSeriesData() - self.frame = DataFrame(self.seriesd) - self.tsframe = DataFrame(self.tsd) - - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - self.df_mixed_floats = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.array(np.random.randn(8), dtype='float32')}) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) - - self.three_group = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) - - def test_agg_regression1(self): - grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) - result = grouped.agg(np.mean) - expected = grouped.mean() - tm.assert_frame_equal(result, expected) - - def test_agg_must_agg(self): - grouped = self.df.groupby('A')['C'] - - msg = "Must produce aggregated value" - with tm.assert_raises_regex(Exception, msg): - grouped.agg(lambda x: x.describe()) - with tm.assert_raises_regex(Exception, msg): - grouped.agg(lambda x: x.index[:2]) - - def test_agg_ser_multi_key(self): - # TODO(wesm): unused - ser = self.df.C # noqa - - f = lambda x: x.sum() - results = self.df.C.groupby([self.df.A, self.df.B]).aggregate(f) - expected = self.df.groupby(['A', 'B']).sum()['C'] - tm.assert_series_equal(results, expected) - - def test_agg_apply_corner(self): - # nothing to group, all NA - grouped = self.ts.groupby(self.ts * np.nan) - assert self.ts.dtype == np.float64 - - # groupby float64 values results in Float64Index - exp = Series([], dtype=np.float64, - index=pd.Index([], dtype=np.float64)) - tm.assert_series_equal(grouped.sum(), exp) - tm.assert_series_equal(grouped.agg(np.sum), exp) - tm.assert_series_equal(grouped.apply(np.sum), exp, - check_index_type=False) - - # DataFrame - grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan) - exp_df = DataFrame(columns=self.tsframe.columns, dtype=float, - index=pd.Index([], dtype=np.float64)) - tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False) - tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) - tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0], - check_names=False) - - def test_agg_grouping_is_list_tuple(self): - from pandas.core.groupby import Grouping - - df = tm.makeTimeDataFrame() - - grouped = df.groupby(lambda x: x.year) - grouper = grouped.grouper.groupings[0].grouper - grouped.grouper.groupings[0] = Grouping(self.ts.index, list(grouper)) - - result = grouped.agg(np.mean) - expected = grouped.mean() - tm.assert_frame_equal(result, expected) - - grouped.grouper.groupings[0] = Grouping(self.ts.index, tuple(grouper)) - - result = grouped.agg(np.mean) - expected = grouped.mean() - tm.assert_frame_equal(result, expected) - - def test_agg_python_multiindex(self): - grouped = self.mframe.groupby(['A', 'B']) - - result = grouped.agg(np.mean) - expected = grouped.mean() - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize('groupbyfunc', [ - lambda x: x.weekday(), - [lambda x: x.month, lambda x: x.weekday()], - ]) - def test_aggregate_str_func(self, groupbyfunc): - grouped = self.tsframe.groupby(groupbyfunc) - - # single series - result = grouped['A'].agg('std') - expected = grouped['A'].std() - tm.assert_series_equal(result, expected) - - # group frame by function name - result = grouped.aggregate('var') - expected = grouped.var() - tm.assert_frame_equal(result, expected) - - # group frame by function dict - result = grouped.agg(OrderedDict([['A', 'var'], - ['B', 'std'], - ['C', 'mean'], - ['D', 'sem']])) - expected = DataFrame(OrderedDict([['A', grouped['A'].var()], - ['B', grouped['B'].std()], - ['C', grouped['C'].mean()], - ['D', grouped['D'].sem()]])) - tm.assert_frame_equal(result, expected) - - def test_aggregate_item_by_item(self): - df = self.df.copy() - df['E'] = ['a'] * len(self.df) - grouped = self.df.groupby('A') - - aggfun = lambda ser: ser.size - result = grouped.agg(aggfun) - foo = (self.df.A == 'foo').sum() - bar = (self.df.A == 'bar').sum() - K = len(result.columns) - - # GH5782 - # odd comparisons can result here, so cast to make easy - exp = pd.Series(np.array([foo] * K), index=list('BCD'), - dtype=np.float64, name='foo') - tm.assert_series_equal(result.xs('foo'), exp) - - exp = pd.Series(np.array([bar] * K), index=list('BCD'), - dtype=np.float64, name='bar') - tm.assert_almost_equal(result.xs('bar'), exp) - - def aggfun(ser): - return ser.size - - result = DataFrame().groupby(self.df.A).agg(aggfun) - assert isinstance(result, DataFrame) - assert len(result) == 0 - - def test_wrap_agg_out(self): - grouped = self.three_group.groupby(['A', 'B']) - - def func(ser): - if ser.dtype == np.object: - raise TypeError - else: - return ser.sum() - - result = grouped.aggregate(func) - exp_grouped = self.three_group.loc[:, self.three_group.columns != 'C'] - expected = exp_grouped.groupby(['A', 'B']).aggregate(func) - tm.assert_frame_equal(result, expected) - - def test_agg_multiple_functions_maintain_order(self): - # GH #610 - funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)] - result = self.df.groupby('A')['C'].agg(funcs) - exp_cols = Index(['mean', 'max', 'min']) - - tm.assert_index_equal(result.columns, exp_cols) - - def test_multiple_functions_tuples_and_non_tuples(self): - # #1359 - funcs = [('foo', 'mean'), 'std'] - ex_funcs = [('foo', 'mean'), ('std', 'std')] - - result = self.df.groupby('A')['C'].agg(funcs) - expected = self.df.groupby('A')['C'].agg(ex_funcs) - tm.assert_frame_equal(result, expected) - - result = self.df.groupby('A').agg(funcs) - expected = self.df.groupby('A').agg(ex_funcs) - tm.assert_frame_equal(result, expected) - - def test_agg_multiple_functions_too_many_lambdas(self): - grouped = self.df.groupby('A') - funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] - - msg = 'Function names must be unique, found multiple named ' - with tm.assert_raises_regex(SpecificationError, msg): - grouped.agg(funcs) - - def test_more_flexible_frame_multi_function(self): - grouped = self.df.groupby('A') - - exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) - exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) - - expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) - expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) - - d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) - result = grouped.aggregate(d) +@pytest.fixture +def ts(): + return tm.makeTimeSeries() + + +@pytest.fixture +def tsframe(): + return DataFrame(tm.getTimeSeriesData()) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + +@pytest.fixture +def mframe(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + return DataFrame(np.random.randn(10, 3), + index=index, + columns=['A', 'B', 'C']) + + +@pytest.fixture +def three_group(): + return DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', + 'bar', 'bar', 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', + 'one', 'two', 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', + 'shiny', 'dull', 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) + + +def test_agg_regression1(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_must_agg(df): + grouped = df.groupby('A')['C'] + + msg = "Must produce aggregated value" + with tm.assert_raises_regex(Exception, msg): + grouped.agg(lambda x: x.describe()) + with tm.assert_raises_regex(Exception, msg): + grouped.agg(lambda x: x.index[:2]) + + +def test_agg_ser_multi_key(df): + # TODO(wesm): unused + ser = df.C # noqa + + f = lambda x: x.sum() + results = df.C.groupby([df.A, df.B]).aggregate(f) + expected = df.groupby(['A', 'B']).sum()['C'] + tm.assert_series_equal(results, expected) + + +def test_agg_apply_corner(ts, tsframe): + # nothing to group, all NA + grouped = ts.groupby(ts * np.nan) + assert ts.dtype == np.float64 + + # groupby float64 values results in Float64Index + exp = Series([], dtype=np.float64, + index=pd.Index([], dtype=np.float64)) + tm.assert_series_equal(grouped.sum(), exp) + tm.assert_series_equal(grouped.agg(np.sum), exp) + tm.assert_series_equal(grouped.apply(np.sum), exp, + check_index_type=False) + + # DataFrame + grouped = tsframe.groupby(tsframe['A'] * np.nan) + exp_df = DataFrame(columns=tsframe.columns, dtype=float, + index=pd.Index([], dtype=np.float64)) + tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False) + tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) + tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0], + check_names=False) + + +def test_agg_grouping_is_list_tuple(ts): + df = tm.makeTimeDataFrame() + + grouped = df.groupby(lambda x: x.year) + grouper = grouped.grouper.groupings[0].grouper + grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_python_multiindex(mframe): + grouped = mframe.groupby(['A', 'B']) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('groupbyfunc', [ + lambda x: x.weekday(), + [lambda x: x.month, lambda x: x.weekday()], +]) +def test_aggregate_str_func(tsframe, groupbyfunc): + grouped = tsframe.groupby(groupbyfunc) + + # single series + result = grouped['A'].agg('std') + expected = grouped['A'].std() + tm.assert_series_equal(result, expected) + + # group frame by function name + result = grouped.aggregate('var') + expected = grouped.var() + tm.assert_frame_equal(result, expected) + + # group frame by function dict + result = grouped.agg(OrderedDict([['A', 'var'], + ['B', 'std'], + ['C', 'mean'], + ['D', 'sem']])) + expected = DataFrame(OrderedDict([['A', grouped['A'].var()], + ['B', grouped['B'].std()], + ['C', grouped['C'].mean()], + ['D', grouped['D'].sem()]])) + tm.assert_frame_equal(result, expected) + + +def test_aggregate_item_by_item(df): + grouped = df.groupby('A') + + aggfun = lambda ser: ser.size + result = grouped.agg(aggfun) + foo = (df.A == 'foo').sum() + bar = (df.A == 'bar').sum() + K = len(result.columns) + + # GH5782 + # odd comparisons can result here, so cast to make easy + exp = pd.Series(np.array([foo] * K), index=list('BCD'), + dtype=np.float64, name='foo') + tm.assert_series_equal(result.xs('foo'), exp) - tm.assert_frame_equal(result, expected) + exp = pd.Series(np.array([bar] * K), index=list('BCD'), + dtype=np.float64, name='bar') + tm.assert_almost_equal(result.xs('bar'), exp) - # be careful - result = grouped.aggregate(OrderedDict([['C', np.mean], - ['D', [np.mean, np.std]]])) - expected = grouped.aggregate(OrderedDict([['C', np.mean], - ['D', [np.mean, np.std]]])) - tm.assert_frame_equal(result, expected) + def aggfun(ser): + return ser.size - def foo(x): - return np.mean(x) + result = DataFrame().groupby(df.A).agg(aggfun) + assert isinstance(result, DataFrame) + assert len(result) == 0 - def bar(x): - return np.std(x, ddof=1) - # this uses column selection & renaming - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - d = OrderedDict([['C', np.mean], - ['D', OrderedDict([['foo', np.mean], - ['bar', np.std]])]]) - result = grouped.aggregate(d) +def test_wrap_agg_out(three_group): + grouped = three_group.groupby(['A', 'B']) - d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) + def func(ser): + if ser.dtype == np.object: + raise TypeError + else: + return ser.sum() + + result = grouped.aggregate(func) + exp_grouped = three_group.loc[:, three_group.columns != 'C'] + expected = exp_grouped.groupby(['A', 'B']).aggregate(func) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_maintain_order(df): + # GH #610 + funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)] + result = df.groupby('A')['C'].agg(funcs) + exp_cols = Index(['mean', 'max', 'min']) + + tm.assert_index_equal(result.columns, exp_cols) + + +def test_multiple_functions_tuples_and_non_tuples(df): + # #1359 + funcs = [('foo', 'mean'), 'std'] + ex_funcs = [('foo', 'mean'), ('std', 'std')] + + result = df.groupby('A')['C'].agg(funcs) + expected = df.groupby('A')['C'].agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + result = df.groupby('A').agg(funcs) + expected = df.groupby('A').agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_too_many_lambdas(df): + grouped = df.groupby('A') + funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] + + msg = 'Function names must be unique, found multiple named ' + with tm.assert_raises_regex(SpecificationError, msg): + grouped.agg(funcs) + + +def test_more_flexible_frame_multi_function(df): + grouped = df.groupby('A') + + exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) + exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) + + expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) + expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) + + d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) + result = grouped.aggregate(d) + + tm.assert_frame_equal(result, expected) + + # be careful + result = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + expected = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + tm.assert_frame_equal(result, expected) + + def foo(x): + return np.mean(x) + + def bar(x): + return np.std(x, ddof=1) + + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + d = OrderedDict([['C', np.mean], + ['D', OrderedDict([['foo', np.mean], + ['bar', np.std]])]]) + result = grouped.aggregate(d) + + d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) + expected = grouped.aggregate(d) + + tm.assert_frame_equal(result, expected) + + +def test_multi_function_flexible_mix(df): + # GH #1268 + grouped = df.groupby('A') + + # Expected + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', {'sum': 'sum'}]]) + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = grouped.aggregate(d) - tm.assert_frame_equal(result, expected) - - def test_multi_function_flexible_mix(self): - # GH #1268 - grouped = self.df.groupby('A') - - # Expected - d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], - ['D', {'sum': 'sum'}]]) - # this uses column selection & renaming - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = grouped.aggregate(d) - - # Test 1 - d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], - ['D', 'sum']]) - # this uses column selection & renaming - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = grouped.aggregate(d) - tm.assert_frame_equal(result, expected) - - # Test 2 - d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], - ['D', ['sum']]]) - # this uses column selection & renaming - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = grouped.aggregate(d) - tm.assert_frame_equal(result, expected) + # Test 1 + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', 'sum']]) + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = grouped.aggregate(d) + tm.assert_frame_equal(result, expected) + + # Test 2 + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', ['sum']]]) + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = grouped.aggregate(d) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index f8e44b1548819..575eae1916f4c 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -8,13 +8,15 @@ import pytest -from datetime import datetime, timedelta +import datetime as dt from functools import partial import numpy as np import pandas as pd -from pandas import date_range, DataFrame, Index, MultiIndex, Series +from pandas import ( + date_range, DataFrame, Index, MultiIndex, PeriodIndex, period_range, Series +) from pandas.core.groupby import SpecificationError from pandas.io.formats.printing import pprint_thing import pandas.util.testing as tm @@ -50,7 +52,8 @@ def test_agg_datetimes_mixed(): 'value': [x[2] for x in data]}) data = [[row[0], - datetime.strptime(row[1], '%Y-%m-%d').date() if row[1] else None, + (dt.datetime.strptime(row[1], '%Y-%m-%d').date() + if row[1] else None), row[2]] for row in data] @@ -68,7 +71,6 @@ def test_agg_datetimes_mixed(): def test_agg_period_index(): - from pandas import period_range, PeriodIndex prng = period_range('2012-1-1', freq='M', periods=3) df = DataFrame(np.random.randn(3, 2), index=prng) rs = df.groupby(level=0).sum() @@ -125,7 +127,7 @@ def test_agg_dict_parameter_cast_result_dtypes(): def test_agg_cast_results_dtypes(): # similar to GH12821 # xref #11444 - u = [datetime(2015, x + 1, 1) for x in range(12)] + u = [dt.datetime(2015, x + 1, 1) for x in range(12)] v = list('aaabbbbbbccd') df = pd.DataFrame({'X': v, 'Y': u}) @@ -292,9 +294,7 @@ def test_agg_nested_dicts(): def test_agg_item_by_item_raise_typeerror(): - from numpy.random import randint - - df = DataFrame(randint(10, size=(20, 10))) + df = DataFrame(np.random.randint(10, size=(20, 10))) def raiseException(df): pprint_thing('----------------------------------------') @@ -344,7 +344,6 @@ def P1(a): except Exception: return np.nan - import datetime as dt df = DataFrame({'col1': [1, 2, 3, 4], 'col2': [10, 25, 26, 31], 'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10), @@ -403,7 +402,8 @@ def test_agg_timezone_round_trip(): # GH 15426 ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific') df = pd.DataFrame({'a': 1, - 'b': [ts + timedelta(minutes=nn) for nn in range(10)]}) + 'b': [ts + dt.timedelta(minutes=nn) + for nn in range(10)]}) result1 = df.groupby('a')['b'].agg(np.min).iloc[0] result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0] diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 8b95455b53d22..979b2f7a539af 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -9,7 +9,7 @@ from pandas import Index, isna from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm -from pandas._libs import lib, groupby +from pandas._libs import lib, groupby, reduction def test_series_grouper(): @@ -19,7 +19,7 @@ def test_series_grouper(): labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) - grouper = lib.SeriesGrouper(obj, np.mean, labels, 2, dummy) + grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy) result, counts = grouper.get_result() expected = np.array([obj[3:6].mean(), obj[6:].mean()]) @@ -36,7 +36,7 @@ def test_series_bin_grouper(): bins = np.array([3, 6]) - grouper = lib.SeriesBinGrouper(obj, np.mean, bins, dummy) + grouper = reduction.SeriesBinGrouper(obj, np.mean, bins, dummy) result, counts = grouper.get_result() expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) @@ -127,26 +127,27 @@ def test_int_index(self): from pandas.core.series import Series arr = np.random.randn(100, 4) - result = lib.reduce(arr, np.sum, labels=Index(np.arange(4))) + result = reduction.reduce(arr, np.sum, labels=Index(np.arange(4))) expected = arr.sum(0) assert_almost_equal(result, expected) - result = lib.reduce(arr, np.sum, axis=1, labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, + labels=Index(np.arange(100))) expected = arr.sum(1) assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(100)) - result = lib.reduce(arr, np.sum, dummy=dummy, - labels=Index(np.arange(4))) + result = reduction.reduce(arr, np.sum, dummy=dummy, + labels=Index(np.arange(4))) expected = arr.sum(0) assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(4)) - result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, - labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) expected = arr.sum(1) assert_almost_equal(result, expected) - result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, - labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) assert_almost_equal(result, expected) diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index cee78eab3a636..9fe677664049e 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -99,7 +99,7 @@ def test_grouper_column_index_level_precedence(frame, frame['inner'] = [1, 1, 1, 1, 1, 1] # Performing a groupby with strings should produce warning - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = frame.groupby(key_strs).mean() # Grouping with key Grouper should produce the same result and no warning diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 8f72da293a50c..4159d0f709a13 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -582,3 +582,28 @@ def test_transform_with_non_scalar_group(self): 'group.*', df.groupby(axis=1, level=1).transform, lambda z: z.div(z.sum(axis=1), axis=0)) + + @pytest.mark.parametrize('cols,exp,comp_func', [ + ('a', pd.Series([1, 1, 1], name='a'), tm.assert_series_equal), + (['a', 'c'], pd.DataFrame({'a': [1, 1, 1], 'c': [1, 1, 1]}), + tm.assert_frame_equal) + ]) + @pytest.mark.parametrize('agg_func', [ + 'count', 'rank', 'size']) + def test_transform_numeric_ret(self, cols, exp, comp_func, agg_func): + if agg_func == 'size' and isinstance(cols, list): + pytest.xfail("'size' transformation not supported with " + "NDFrameGroupy") + + # GH 19200 + df = pd.DataFrame( + {'a': pd.date_range('2018-01-01', periods=3), + 'b': range(3), + 'c': range(7, 10)}) + + result = df.groupby('b')[cols].transform(agg_func) + + if agg_func == 'rank': + exp = exp.astype('float') + + comp_func(result, exp) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 381e2ef3041e7..480f025db17ca 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -1,12 +1,14 @@ # -*- coding: utf-8 -*- import warnings from datetime import datetime, timedelta +import operator import pytest import numpy as np import pandas as pd +from pandas.compat.numpy import np_datetime64_compat import pandas.util.testing as tm from pandas.errors import PerformanceWarning from pandas import (Timestamp, Timedelta, Series, @@ -41,6 +43,187 @@ def addend(request): return request.param +class TestDatetimeIndexComparisons(object): + # TODO: De-duplicate with test_comparisons_nat below + def test_dti_cmp_nat(self): + left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, + pd.Timestamp('2011-01-03')]) + right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) + + for lhs, rhs in [(left, right), + (left.astype(object), right.astype(object))]: + result = rhs == lhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = lhs != rhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > lhs, expected) + + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_comparison_tzawareness_compat(self, op): + # GH#18162 + dr = pd.date_range('2016-01-01', periods=6) + dz = dr.tz_localize('US/Pacific') + + with pytest.raises(TypeError): + op(dr, dz) + with pytest.raises(TypeError): + op(dr, list(dz)) + with pytest.raises(TypeError): + op(dz, dr) + with pytest.raises(TypeError): + op(dz, list(dr)) + + # Check that there isn't a problem aware-aware and naive-naive do not + # raise + assert (dr == dr).all() + assert (dr == list(dr)).all() + assert (dz == dz).all() + assert (dz == list(dz)).all() + + # Check comparisons against scalar Timestamps + ts = pd.Timestamp('2000-03-14 01:59') + ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam') + + assert (dr > ts).all() + with pytest.raises(TypeError): + op(dr, ts_tz) + + assert (dz > ts_tz).all() + with pytest.raises(TypeError): + op(dz, ts) + + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_nat_comparison_tzawareness(self, op): + # GH#19276 + # tzaware DatetimeIndex should not raise when compared to NaT + dti = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, + '2014-05-01', '2014-07-01']) + expected = np.array([op == operator.ne] * len(dti)) + result = op(dti, pd.NaT) + tm.assert_numpy_array_equal(result, expected) + + result = op(dti.tz_localize('US/Pacific'), pd.NaT) + tm.assert_numpy_array_equal(result, expected) + + def test_comparisons_coverage(self): + rng = date_range('1/1/2000', periods=10) + + # raise TypeError for now + pytest.raises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + tm.assert_numpy_array_equal(result, exp) + + def test_comparisons_nat(self): + + fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) + fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) + + didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, + '2014-05-01', '2014-07-01']) + didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT, + '2014-06-01', '2014-07-01']) + darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'), + np_datetime64_compat('2014-03-01 00:00Z'), + np_datetime64_compat('nat'), np.datetime64('nat'), + np_datetime64_compat('2014-06-01 00:00Z'), + np_datetime64_compat('2014-07-01 00:00Z')]) + + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: + result = idx1 < val + expected = np.array([False, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, True, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: + result = idx1 < val + expected = np.array([True, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + expected = np.array([False, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + expected = np.array([True, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + expected = np.array([False, False, True, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + expected = np.array([False, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, False, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + class TestDatetimeIndexArithmetic(object): def test_dti_add_timestamp_raises(self): @@ -447,6 +630,112 @@ def test_dti_with_offset_series(self, tz, names): tm.assert_series_equal(res3, expected_sub) +@pytest.mark.parametrize('klass,assert_func', [ + (Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) +def test_dt64_with_offset_array(klass, assert_func): + # GH#10699 + # array of offsets + box = Series if klass is Series else pd.Index + with tm.assert_produces_warning(PerformanceWarning): + s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + result = s + box([pd.offsets.DateOffset(years=1), + pd.offsets.MonthEnd()]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29')]) + assert_func(result, exp) + + # same offset + result = s + box([pd.offsets.DateOffset(years=1), + pd.offsets.DateOffset(years=1)]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) + assert_func(result, exp) + + +@pytest.mark.parametrize('klass,assert_func', [ + (Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) +def test_dt64_with_DateOffsets_relativedelta(klass, assert_func): + # GH#10699 + vec = klass([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + + # DateOffset relativedelta fastpath + relative_kwargs = [('years', 2), ('months', 5), ('days', 3), + ('hours', 5), ('minutes', 10), ('seconds', 2), + ('microseconds', 5)] + for i, kwd in enumerate(relative_kwargs): + op = pd.DateOffset(**dict([kwd])) + assert_func(klass([x + op for x in vec]), vec + op) + assert_func(klass([x - op for x in vec]), vec - op) + op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) + assert_func(klass([x + op for x in vec]), vec + op) + assert_func(klass([x - op for x in vec]), vec - op) + + +@pytest.mark.parametrize('cls_and_kwargs', [ + 'YearBegin', ('YearBegin', {'month': 5}), + 'YearEnd', ('YearEnd', {'month': 5}), + 'MonthBegin', 'MonthEnd', + 'SemiMonthEnd', 'SemiMonthBegin', + 'Week', ('Week', {'weekday': 3}), + 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', + 'CustomBusinessDay', 'CDay', 'CBMonthEnd', + 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', + 'BusinessHour', 'BYearBegin', 'BYearEnd', + 'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}), + ('FY5253Quarter', {'qtr_with_extra_week': 1, + 'startingMonth': 1, + 'weekday': 2, + 'variation': 'nearest'}), + ('FY5253', {'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}), + ('WeekOfMonth', {'weekday': 2, 'week': 2}), + 'Easter', ('DateOffset', {'day': 4}), + ('DateOffset', {'month': 5})]) +@pytest.mark.parametrize('normalize', [True, False]) +@pytest.mark.parametrize('klass,assert_func', [ + (Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) +def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_and_kwargs): + # GH#10699 + # assert these are equal on a piecewise basis + vec = klass([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + + if isinstance(cls_and_kwargs, tuple): + # If cls_name param is a tuple, then 2nd entry is kwargs for + # the offset constructor + cls_name, kwargs = cls_and_kwargs + else: + cls_name = cls_and_kwargs + kwargs = {} + + offset_cls = getattr(pd.offsets, cls_name) + + with warnings.catch_warnings(record=True): + for n in [0, 5]: + if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth', + 'FY5253Quarter', 'FY5253'] and n == 0): + # passing n = 0 is invalid for these offset classes + continue + + offset = offset_cls(n, normalize=normalize, **kwargs) + assert_func(klass([x + offset for x in vec]), vec + offset) + assert_func(klass([x - offset for x in vec]), vec - offset) + assert_func(klass([offset + x for x in vec]), offset + vec) + + # GH 10699 @pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex], [tm.assert_series_equal, @@ -480,84 +769,3 @@ def test_datetime64_with_DateOffset(klass, assert_func): Timestamp('2000-02-29', tz='US/Central')], name='a') assert_func(result, exp) assert_func(result2, exp) - - # array of offsets - valid for Series only - if klass is Series: - with tm.assert_produces_warning(PerformanceWarning): - s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) - result = s + Series([pd.offsets.DateOffset(years=1), - pd.offsets.MonthEnd()]) - exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29') - ]) - assert_func(result, exp) - - # same offset - result = s + Series([pd.offsets.DateOffset(years=1), - pd.offsets.DateOffset(years=1)]) - exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) - assert_func(result, exp) - - s = klass([Timestamp('2000-01-05 00:15:00'), - Timestamp('2000-01-31 00:23:00'), - Timestamp('2000-01-01'), - Timestamp('2000-03-31'), - Timestamp('2000-02-29'), - Timestamp('2000-12-31'), - Timestamp('2000-05-15'), - Timestamp('2001-06-15')]) - - # DateOffset relativedelta fastpath - relative_kwargs = [('years', 2), ('months', 5), ('days', 3), - ('hours', 5), ('minutes', 10), ('seconds', 2), - ('microseconds', 5)] - for i, kwd in enumerate(relative_kwargs): - op = pd.DateOffset(**dict([kwd])) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - - # assert these are equal on a piecewise basis - offsets = ['YearBegin', ('YearBegin', {'month': 5}), - 'YearEnd', ('YearEnd', {'month': 5}), - 'MonthBegin', 'MonthEnd', - 'SemiMonthEnd', 'SemiMonthBegin', - 'Week', ('Week', {'weekday': 3}), - 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', - 'CustomBusinessDay', 'CDay', 'CBMonthEnd', - 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', - 'BusinessHour', 'BYearBegin', 'BYearEnd', - 'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}), - ('FY5253Quarter', {'qtr_with_extra_week': 1, - 'startingMonth': 1, - 'weekday': 2, - 'variation': 'nearest'}), - ('FY5253', {'weekday': 0, - 'startingMonth': 2, - 'variation': - 'nearest'}), - ('WeekOfMonth', {'weekday': 2, - 'week': 2}), - 'Easter', ('DateOffset', {'day': 4}), - ('DateOffset', {'month': 5})] - - with warnings.catch_warnings(record=True): - for normalize in (True, False): - for do in offsets: - if isinstance(do, tuple): - do, kwargs = do - else: - do = do - kwargs = {} - - for n in [0, 5]: - if (do in ['WeekOfMonth', 'LastWeekOfMonth', - 'FY5253Quarter', 'FY5253'] and n == 0): - continue - op = getattr(pd.offsets, do)(n, - normalize=normalize, - **kwargs) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - assert_func(klass([op + x for x in s]), op + s) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 076c3d6f25a89..49f94bfa65543 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -1,3 +1,4 @@ + import pytest import numpy as np @@ -7,7 +8,6 @@ import pandas as pd import pandas.util.testing as tm from pandas.compat import lrange -from pandas.compat.numpy import np_datetime64_compat from pandas import (DatetimeIndex, Index, date_range, DataFrame, Timestamp, offsets) @@ -248,106 +248,6 @@ def test_append_join_nondatetimeindex(self): # it works rng.join(idx, how='outer') - def test_comparisons_coverage(self): - rng = date_range('1/1/2000', periods=10) - - # raise TypeError for now - pytest.raises(TypeError, rng.__lt__, rng[3].value) - - result = rng == list(rng) - exp = rng == rng - tm.assert_numpy_array_equal(result, exp) - - def test_comparisons_nat(self): - - fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) - fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) - - didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, - '2014-05-01', '2014-07-01']) - didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT, - '2014-06-01', '2014-07-01']) - darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'), - np_datetime64_compat('2014-03-01 00:00Z'), - np_datetime64_compat('nat'), np.datetime64('nat'), - np_datetime64_compat('2014-06-01 00:00Z'), - np_datetime64_compat('2014-07-01 00:00Z')]) - - cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] - - # Check pd.NaT is handles as the same as np.nan - with tm.assert_produces_warning(None): - for idx1, idx2 in cases: - - result = idx1 < idx2 - expected = np.array([True, False, False, False, True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = idx2 > idx1 - expected = np.array([True, False, False, False, True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - with tm.assert_produces_warning(None): - for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: - result = idx1 < val - expected = np.array([False, False, False, False, False, False]) - tm.assert_numpy_array_equal(result, expected) - result = idx1 > val - tm.assert_numpy_array_equal(result, expected) - - result = idx1 <= val - tm.assert_numpy_array_equal(result, expected) - result = idx1 >= val - tm.assert_numpy_array_equal(result, expected) - - result = idx1 == val - tm.assert_numpy_array_equal(result, expected) - - result = idx1 != val - expected = np.array([True, True, True, True, True, True]) - tm.assert_numpy_array_equal(result, expected) - - # Check pd.NaT is handles as the same as np.nan - with tm.assert_produces_warning(None): - for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: - result = idx1 < val - expected = np.array([True, False, False, False, False, False]) - tm.assert_numpy_array_equal(result, expected) - result = idx1 > val - expected = np.array([False, False, False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 <= val - expected = np.array([True, False, True, False, False, False]) - tm.assert_numpy_array_equal(result, expected) - result = idx1 >= val - expected = np.array([False, False, True, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 == val - expected = np.array([False, False, True, False, False, False]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 != val - expected = np.array([True, True, False, True, True, True]) - tm.assert_numpy_array_equal(result, expected) - def test_map(self): rng = date_range('1/1/2000', periods=10) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 538e10e6011ec..9d6d27ecb4b6f 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -21,28 +21,7 @@ def create_index(self): return date_range('20130101', periods=5) def test_shift(self): - - # test shift for datetimeIndex and non datetimeIndex - # GH8083 - - drange = self.create_index() - result = drange.shift(1) - expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', - '2013-01-06'], freq='D') - tm.assert_index_equal(result, expected) - - result = drange.shift(-1) - expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', - '2013-01-03', '2013-01-04'], - freq='D') - tm.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D') - expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', - '2013-01-10', - '2013-01-11'], freq='D') - tm.assert_index_equal(result, expected) + pass # handled in test_ops def test_pickle_compat_construction(self): pass diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index a7a6e3caab727..fb8dd1a43aa7f 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,6 +7,7 @@ from itertools import product import pandas as pd +from pandas.errors import NullFrequencyError import pandas._libs.tslib as tslib from pandas._libs.tslibs.offsets import shift_months import pandas.util.testing as tm @@ -143,6 +144,25 @@ def test_numpy_minmax(self): tm.assert_raises_regex( ValueError, errmsg, np.argmax, dr, out=0) + def test_round_daily(self): + dti = pd.date_range('20130101 09:10:11', periods=5) + result = dti.round('D') + expected = pd.date_range('20130101', periods=5) + tm.assert_index_equal(result, expected) + + dti = dti.tz_localize('UTC').tz_convert('US/Eastern') + result = dti.round('D') + expected = pd.date_range('20130101', + periods=5).tz_localize('US/Eastern') + tm.assert_index_equal(result, expected) + + result = dti.round('s') + tm.assert_index_equal(result, dti) + + # invalid + for freq in ['Y', 'M', 'foobar']: + pytest.raises(ValueError, lambda: dti.round(freq)) + def test_round(self): for tz in self.tz: rng = pd.date_range(start='2016-01-01', periods=5, @@ -384,33 +404,6 @@ def test_resolution(self): tz=tz) assert idx.resolution == expected - def test_comp_nat(self): - left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')]) - right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) - - for lhs, rhs in [(left, right), - (left.astype(object), right.astype(object))]: - result = rhs == lhs - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = lhs != rhs - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(lhs == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == rhs, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(lhs != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != lhs, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(lhs < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > lhs, expected) - def test_value_counts_unique(self): # GH 7735 for tz in self.tz: @@ -593,6 +586,12 @@ def test_nat_new(self): exp = np.array([tslib.iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) + def test_shift_no_freq(self): + # GH#19147 + dti = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01'], freq=None) + with pytest.raises(NullFrequencyError): + dti.shift(2) + def test_shift(self): # GH 9903 for tz in self.tz: @@ -610,6 +609,29 @@ def test_shift(self): '2011-01-01 09:00'], name='xxx', tz=tz) tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + # TODO: moved from test_datetimelike; de-duplicate with test_shift above + def test_shift2(self): + # test shift for datetimeIndex and non datetimeIndex + # GH8083 + drange = pd.date_range('20130101', periods=5) + result = drange.shift(1) + expected = pd.DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', + '2013-01-06'], freq='D') + tm.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = pd.DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', + '2013-01-03', '2013-01-04'], + freq='D') + tm.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D') + expected = pd.DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', + '2013-01-10', + '2013-01-11'], freq='D') + tm.assert_index_equal(result, expected) + def test_nat(self): assert pd.DatetimeIndex._na_value is pd.NaT assert pd.DatetimeIndex([])._na_value is pd.NaT diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py new file mode 100644 index 0000000000000..b3a4bfa878c3f --- /dev/null +++ b/pandas/tests/indexes/interval/test_astype.py @@ -0,0 +1,209 @@ +from __future__ import division + +import pytest +import numpy as np +from pandas import ( + Index, + IntervalIndex, + interval_range, + CategoricalIndex, + Timestamp, + Timedelta, + NaT) +from pandas.core.dtypes.dtypes import CategoricalDtype, IntervalDtype +import pandas.util.testing as tm + + +class Base(object): + """Tests common to IntervalIndex with any subtype""" + + def test_astype_idempotent(self, index): + result = index.astype('interval') + tm.assert_index_equal(result, index) + + result = index.astype(index.dtype) + tm.assert_index_equal(result, index) + + def test_astype_object(self, index): + result = index.astype(object) + expected = Index(index.values, dtype='object') + tm.assert_index_equal(result, expected) + assert not result.equals(index) + + def test_astype_category(self, index): + result = index.astype('category') + expected = CategoricalIndex(index.values) + tm.assert_index_equal(result, expected) + + result = index.astype(CategoricalDtype()) + tm.assert_index_equal(result, expected) + + # non-default params + categories = index.dropna().unique().values[:-1] + dtype = CategoricalDtype(categories=categories, ordered=True) + result = index.astype(dtype) + expected = CategoricalIndex( + index.values, categories=categories, ordered=True) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('dtype', [ + 'int64', 'uint64', 'float64', 'complex128', 'period[M]', + 'timedelta64', 'timedelta64[ns]', 'datetime64', 'datetime64[ns]', + 'datetime64[ns, US/Eastern]']) + def test_astype_cannot_cast(self, index, dtype): + msg = 'Cannot cast IntervalIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + def test_astype_invalid_dtype(self, index): + msg = 'data type "fake_dtype" not understood' + with tm.assert_raises_regex(TypeError, msg): + index.astype('fake_dtype') + + +class TestIntSubtype(Base): + """Tests specific to IntervalIndex with integer-like subtype""" + + indexes = [ + IntervalIndex.from_breaks(np.arange(-10, 11, dtype='int64')), + IntervalIndex.from_breaks( + np.arange(100, dtype='uint64'), closed='left'), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', [ + 'float64', 'datetime64[ns]', 'timedelta64[ns]']) + def test_subtype_conversion(self, index, subtype): + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('subtype_start, subtype_end', [ + ('int64', 'uint64'), ('uint64', 'int64')]) + def test_subtype_integer(self, subtype_start, subtype_end): + index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) + dtype = IntervalDtype(subtype_end) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype_end), + index.right.astype(subtype_end), + closed=index.closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason='GH 15832') + def test_subtype_integer_errors(self): + # int64 -> uint64 fails with negative values + index = interval_range(-10, 10) + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + +class TestFloatSubtype(Base): + """Tests specific to IntervalIndex with float subtype""" + + indexes = [ + interval_range(-10.0, 10.0, closed='neither'), + IntervalIndex.from_arrays([-1.5, np.nan, 0., 0., 1.5], + [-0.5, np.nan, 1., 1., 3.], + closed='both'), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', ['int64', 'uint64']) + def test_subtype_integer(self, subtype): + index = interval_range(0.0, 10.0) + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + # raises with NA + msg = 'Cannot convert NA to integer' + with tm.assert_raises_regex(ValueError, msg): + index.insert(0, np.nan).astype(dtype) + + @pytest.mark.xfail(reason='GH 15832') + def test_subtype_integer_errors(self): + # float64 -> uint64 fails with negative values + index = interval_range(-10.0, 10.0) + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + # float64 -> integer-like fails with non-integer valued floats + index = interval_range(0.0, 10.0, freq=0.25) + dtype = IntervalDtype('int64') + with pytest.raises(ValueError): + index.astype(dtype) + + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + @pytest.mark.parametrize('subtype', ['datetime64[ns]', 'timedelta64[ns]']) + def test_subtype_datetimelike(self, index, subtype): + dtype = IntervalDtype(subtype) + msg = 'Cannot convert .* to .*; subtypes are incompatible' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + +class TestDatetimelikeSubtype(Base): + """Tests specific to IntervalIndex with datetime-like subtype""" + + indexes = [ + interval_range(Timestamp('2018-01-01'), periods=10, closed='neither'), + interval_range(Timestamp('2018-01-01'), periods=10).insert(2, NaT), + interval_range(Timestamp('2018-01-01', tz='US/Eastern'), periods=10), + interval_range(Timedelta('0 days'), periods=10, closed='both'), + interval_range(Timedelta('0 days'), periods=10).insert(2, NaT), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', ['int64', 'uint64']) + def test_subtype_integer(self, index, subtype): + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + def test_subtype_float(self, index): + dtype = IntervalDtype('float64') + msg = 'Cannot convert .* to .*; subtypes are incompatible' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + def test_subtype_datetimelike(self): + # datetime -> timedelta raises + dtype = IntervalDtype('timedelta64[ns]') + msg = 'Cannot convert .* to .*; subtypes are incompatible' + + index = interval_range(Timestamp('2018-01-01'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + # timedelta -> datetime raises + dtype = IntervalDtype('datetime64[ns]') + index = interval_range(Timedelta('0 days'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py new file mode 100644 index 0000000000000..5fdf92dcb2044 --- /dev/null +++ b/pandas/tests/indexes/interval/test_construction.py @@ -0,0 +1,342 @@ +from __future__ import division + +import pytest +import numpy as np +from functools import partial + +from pandas import ( + Interval, IntervalIndex, Index, Int64Index, Float64Index, Categorical, + date_range, timedelta_range, period_range, notna) +from pandas.compat import lzip +from pandas.core.dtypes.dtypes import IntervalDtype +import pandas.core.common as com +import pandas.util.testing as tm + + +@pytest.fixture(params=['left', 'right', 'both', 'neither']) +def closed(request): + return request.param + + +@pytest.fixture(params=[None, 'foo']) +def name(request): + return request.param + + +class Base(object): + """ + Common tests for all variations of IntervalIndex construction. Input data + to be supplied in breaks format, then converted by the subclass method + get_kwargs_from_breaks to the expected format. + """ + + @pytest.mark.parametrize('breaks', [ + [3, 14, 15, 92, 653], + np.arange(10, dtype='int64'), + Int64Index(range(-10, 11)), + Float64Index(np.arange(20, 30, 0.5)), + date_range('20180101', periods=10), + date_range('20180101', periods=10, tz='US/Eastern'), + timedelta_range('1 day', periods=10)]) + def test_constructor(self, constructor, breaks, closed, name): + result_kwargs = self.get_kwargs_from_breaks(breaks, closed) + result = constructor(closed=closed, name=name, **result_kwargs) + + assert result.closed == closed + assert result.name == name + assert result.dtype.subtype == getattr(breaks, 'dtype', 'int64') + tm.assert_index_equal(result.left, Index(breaks[:-1])) + tm.assert_index_equal(result.right, Index(breaks[1:])) + + @pytest.mark.parametrize('breaks, subtype', [ + (Int64Index([0, 1, 2, 3, 4]), 'float64'), + (Int64Index([0, 1, 2, 3, 4]), 'datetime64[ns]'), + (Int64Index([0, 1, 2, 3, 4]), 'timedelta64[ns]'), + (Float64Index([0, 1, 2, 3, 4]), 'int64'), + (date_range('2017-01-01', periods=5), 'int64'), + (timedelta_range('1 day', periods=5), 'int64')]) + def test_constructor_dtype(self, constructor, breaks, subtype): + # GH 19262: conversion via dtype parameter + expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) + expected = constructor(**expected_kwargs) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + iv_dtype = IntervalDtype(subtype) + for dtype in (iv_dtype, str(iv_dtype)): + result = constructor(dtype=dtype, **result_kwargs) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('breaks', [ + [np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) + def test_constructor_nan(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_subtype = np.float64 + expected_values = np.array(breaks[:-1], dtype=object) + + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(result.values, expected_values) + + @pytest.mark.parametrize('breaks', [ + [], + np.array([], dtype='int64'), + np.array([], dtype='float64'), + np.array([], dtype='datetime64[ns]'), + np.array([], dtype='timedelta64[ns]')]) + def test_constructor_empty(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_values = np.array([], dtype=object) + expected_subtype = getattr(breaks, 'dtype', np.int64) + + assert result.empty + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(result.values, expected_values) + + @pytest.mark.parametrize('breaks', [ + tuple('0123456789'), + list('abcdefghij'), + np.array(list('abcdefghij'), dtype=object), + np.array(list('abcdefghij'), dtype=' with value 0 " + "is not an interval") + with tm.assert_raises_regex(TypeError, msg): + constructor([0, 1]) + + +class TestFromIntervals(TestClassConstructors): + """ + Tests for IntervalIndex.from_intervals, which is deprecated in favor of the + IntervalIndex constructor. Same tests as the IntervalIndex constructor, + plus deprecation test. Should only need to delete this class when removed. + """ + + @pytest.fixture + def constructor(self): + def from_intervals_ignore_warnings(*args, **kwargs): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + return IntervalIndex.from_intervals(*args, **kwargs) + return from_intervals_ignore_warnings + + def test_deprecated(self): + ivs = [Interval(0, 1), Interval(1, 2)] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + IntervalIndex.from_intervals(ivs) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index dd673294b128f..71a6f78125004 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -4,9 +4,9 @@ import numpy as np from pandas import ( Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp, - Timedelta, date_range, timedelta_range, Categorical) + Timedelta, date_range, timedelta_range) from pandas.compat import lzip -from pandas.core.common import _asarray_tuplesafe +import pandas.core.common as com from pandas.tests.indexes.common import Base import pandas.util.testing as tm import pandas as pd @@ -40,249 +40,6 @@ def create_index_with_nan(self, closed='right'): np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed) - @pytest.mark.parametrize('data', [ - Index([0, 1, 2, 3, 4]), - date_range('2017-01-01', periods=5), - date_range('2017-01-01', periods=5, tz='US/Eastern'), - timedelta_range('1 day', periods=5)]) - def test_constructors(self, data, closed, name): - left, right = data[:-1], data[1:] - ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)] - expected = IntervalIndex._simple_new( - left=left, right=right, closed=closed, name=name) - - # validate expected - assert expected.closed == closed - assert expected.name == name - assert expected.dtype.subtype == data.dtype - tm.assert_index_equal(expected.left, data[:-1]) - tm.assert_index_equal(expected.right, data[1:]) - - # validated constructors - result = IntervalIndex(ivs, name=name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_intervals(ivs, name=name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_breaks(data, closed=closed, name=name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_arrays( - left, right, closed=closed, name=name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_tuples( - lzip(left, right), closed=closed, name=name) - tm.assert_index_equal(result, expected) - - result = Index(ivs, name=name) - assert isinstance(result, IntervalIndex) - tm.assert_index_equal(result, expected) - - # idempotent - tm.assert_index_equal(Index(expected), expected) - tm.assert_index_equal(IntervalIndex(expected), expected) - - result = IntervalIndex.from_intervals(expected) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_intervals( - expected.values, name=expected.name) - tm.assert_index_equal(result, expected) - - left, right = expected.left, expected.right - result = IntervalIndex.from_arrays( - left, right, closed=expected.closed, name=expected.name) - tm.assert_index_equal(result, expected) - - result = IntervalIndex.from_tuples( - expected.to_tuples(), closed=expected.closed, name=expected.name) - tm.assert_index_equal(result, expected) - - breaks = expected.left.tolist() + [expected.right[-1]] - result = IntervalIndex.from_breaks( - breaks, closed=expected.closed, name=expected.name) - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize('data', [[np.nan], [np.nan] * 2, [np.nan] * 50]) - def test_constructors_nan(self, closed, data): - # GH 18421 - expected_values = np.array(data, dtype=object) - expected_idx = IntervalIndex(data, closed=closed) - - # validate the expected index - assert expected_idx.closed == closed - tm.assert_numpy_array_equal(expected_idx.values, expected_values) - - result = IntervalIndex.from_tuples(data, closed=closed) - tm.assert_index_equal(result, expected_idx) - tm.assert_numpy_array_equal(result.values, expected_values) - - result = IntervalIndex.from_breaks([np.nan] + data, closed=closed) - tm.assert_index_equal(result, expected_idx) - tm.assert_numpy_array_equal(result.values, expected_values) - - result = IntervalIndex.from_arrays(data, data, closed=closed) - tm.assert_index_equal(result, expected_idx) - tm.assert_numpy_array_equal(result.values, expected_values) - - if closed == 'right': - # Can't specify closed for IntervalIndex.from_intervals - result = IntervalIndex.from_intervals(data) - tm.assert_index_equal(result, expected_idx) - tm.assert_numpy_array_equal(result.values, expected_values) - - @pytest.mark.parametrize('data', [ - [], - np.array([], dtype='int64'), - np.array([], dtype='float64'), - np.array([], dtype='datetime64[ns]')]) - def test_constructors_empty(self, data, closed): - # GH 18421 - expected_dtype = getattr(data, 'dtype', np.intp) - expected_values = np.array([], dtype=object) - expected_index = IntervalIndex(data, closed=closed) - - # validate the expected index - assert expected_index.empty - assert expected_index.closed == closed - assert expected_index.dtype.subtype == expected_dtype - tm.assert_numpy_array_equal(expected_index.values, expected_values) - - result = IntervalIndex.from_tuples(data, closed=closed) - tm.assert_index_equal(result, expected_index) - tm.assert_numpy_array_equal(result.values, expected_values) - - result = IntervalIndex.from_breaks(data, closed=closed) - tm.assert_index_equal(result, expected_index) - tm.assert_numpy_array_equal(result.values, expected_values) - - result = IntervalIndex.from_arrays(data, data, closed=closed) - tm.assert_index_equal(result, expected_index) - tm.assert_numpy_array_equal(result.values, expected_values) - - if closed == 'right': - # Can't specify closed for IntervalIndex.from_intervals - result = IntervalIndex.from_intervals(data) - tm.assert_index_equal(result, expected_index) - tm.assert_numpy_array_equal(result.values, expected_values) - - def test_constructors_errors(self): - - # scalar - msg = (r'IntervalIndex\(...\) must be called with a collection of ' - 'some kind, 5 was passed') - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex(5) - - # not an interval - msg = ("type <(class|type) 'numpy.int64'> with value 0 " - "is not an interval") - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex([0, 1]) - - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex.from_intervals([0, 1]) - - # invalid closed - msg = "invalid options for 'closed': invalid" - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid') - - # mismatched closed within intervals - msg = 'intervals must all be closed on the same side' - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_intervals([Interval(0, 1), - Interval(1, 2, closed='left')]) - - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex([Interval(0, 1), Interval(2, 3, closed='left')]) - - with tm.assert_raises_regex(ValueError, msg): - Index([Interval(0, 1), Interval(2, 3, closed='left')]) - - # mismatched closed inferred from intervals vs constructor. - msg = 'conflicting values for closed' - with tm.assert_raises_regex(ValueError, msg): - iv = [Interval(0, 1, closed='both'), Interval(1, 2, closed='both')] - IntervalIndex(iv, closed='neither') - - # no point in nesting periods in an IntervalIndex - msg = 'Period dtypes are not supported, use a PeriodIndex instead' - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_breaks( - pd.period_range('2000-01-01', periods=3)) - - # decreasing breaks/arrays - msg = 'left side of interval must be <= right side' - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_breaks(range(10, -1, -1)) - - with tm.assert_raises_regex(ValueError, msg): - IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1)) - - # GH 19016: categorical data - data = Categorical(list('01234abcde'), ordered=True) - msg = ('category, object, and string subtypes are not supported ' - 'for IntervalIndex') - - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex.from_breaks(data) - - with tm.assert_raises_regex(TypeError, msg): - IntervalIndex.from_arrays(data[:-1], data[1:]) - - @pytest.mark.parametrize('data', [ - tuple('0123456789'), - list('abcdefghij'), - np.array(list('abcdefghij'), dtype=object), - np.array(list('abcdefghij'), dtype=' lhs, expected) - def test_value_counts_unique(self): # GH 7735 idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10) @@ -732,77 +705,6 @@ def test_pi_comp_period_nat(self): self._check(idx, f, exp) -class TestSeriesPeriod(object): - - def setup_method(self, method): - self.series = Series(period_range('2000-01-01', periods=10, freq='D')) - - def test_ops_series_timedelta(self): - # GH 13043 - s = pd.Series([pd.Period('2015-01-01', freq='D'), - pd.Period('2015-01-02', freq='D')], name='xxx') - assert s.dtype == object - - exp = pd.Series([pd.Period('2015-01-02', freq='D'), - pd.Period('2015-01-03', freq='D')], name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - tm.assert_series_equal(s + pd.tseries.offsets.Day(), exp) - tm.assert_series_equal(pd.tseries.offsets.Day() + s, exp) - - def test_ops_series_period(self): - # GH 13043 - s = pd.Series([pd.Period('2015-01-01', freq='D'), - pd.Period('2015-01-02', freq='D')], name='xxx') - assert s.dtype == object - - p = pd.Period('2015-01-10', freq='D') - # dtype will be object because of original dtype - exp = pd.Series([9, 8], name='xxx', dtype=object) - tm.assert_series_equal(p - s, exp) - tm.assert_series_equal(s - p, -exp) - - s2 = pd.Series([pd.Period('2015-01-05', freq='D'), - pd.Period('2015-01-04', freq='D')], name='xxx') - assert s2.dtype == object - - exp = pd.Series([4, 2], name='xxx', dtype=object) - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - -class TestFramePeriod(object): - - def test_ops_frame_period(self): - # GH 13043 - df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), - pd.Period('2015-02', freq='M')], - 'B': [pd.Period('2014-01', freq='M'), - pd.Period('2014-02', freq='M')]}) - assert df['A'].dtype == object - assert df['B'].dtype == object - - p = pd.Period('2015-03', freq='M') - # dtype will be object because of original dtype - exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), - 'B': np.array([14, 13], dtype=object)}) - tm.assert_frame_equal(p - df, exp) - tm.assert_frame_equal(df - p, -exp) - - df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), - pd.Period('2015-06', freq='M')], - 'B': [pd.Period('2015-05', freq='M'), - pd.Period('2015-06', freq='M')]}) - assert df2['A'].dtype == object - assert df2['B'].dtype == object - - exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), - 'B': np.array([16, 16], dtype=object)}) - tm.assert_frame_equal(df2 - df, exp) - tm.assert_frame_equal(df - df2, -exp) - - class TestPeriodIndexComparisons(object): def test_pi_pi_comp(self): @@ -942,3 +844,31 @@ def test_pi_nat_comp(self): with tm.assert_raises_regex( period.IncompatibleFrequency, msg): idx1 == diff + + # TODO: De-duplicate with test_pi_nat_comp + def test_comp_nat(self): + left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT, + pd.Period('2011-01-03')]) + right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')]) + + for lhs, rhs in [(left, right), + (left.astype(object), right.astype(object))]: + result = lhs == rhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = lhs != rhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > lhs, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5109542403b43..508c3a73f48c7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1396,8 +1396,8 @@ def test_drop(self): expected = self.strIndex[lrange(5) + lrange(10, n)] tm.assert_index_equal(dropped, expected) - pytest.raises(ValueError, self.strIndex.drop, ['foo', 'bar']) - pytest.raises(ValueError, self.strIndex.drop, ['1', 'bar']) + pytest.raises(KeyError, self.strIndex.drop, ['foo', 'bar']) + pytest.raises(KeyError, self.strIndex.drop, ['1', 'bar']) # errors='ignore' mixed = drop.tolist() + ['foo'] @@ -1419,7 +1419,7 @@ def test_drop(self): tm.assert_index_equal(dropped, expected) # errors='ignore' - pytest.raises(ValueError, ser.drop, [3, 4]) + pytest.raises(KeyError, ser.drop, [3, 4]) dropped = ser.drop(4, errors='ignore') expected = Index([1, 2, 3]) @@ -1448,7 +1448,7 @@ def test_drop_tuple(self, values, to_drop): removed = index.drop(to_drop[1]) for drop_me in to_drop[1], [to_drop[1]]: - pytest.raises(ValueError, removed.drop, drop_me) + pytest.raises(KeyError, removed.drop, drop_me) def test_tuple_union_bug(self): import pandas @@ -2262,6 +2262,26 @@ def test_intersect_str_dates(self): assert len(res) == 0 + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_comparison_tzawareness_compat(self, op): + # GH#18162 + dr = pd.date_range('2016-01-01', periods=6) + dz = dr.tz_localize('US/Pacific') + + # Check that there isn't a problem aware-aware and naive-naive do not + # raise + naive_series = Series(dr) + aware_series = Series(dz) + with pytest.raises(TypeError): + op(dz, naive_series) + with pytest.raises(TypeError): + op(dr, aware_series) + + # TODO: implement _assert_tzawareness_compat for the reverse + # comparison with the Series on the left-hand side + class TestIndexUtils(object): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index dc4f60ce5f0f1..c2e40c79f8914 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -422,7 +422,7 @@ def test_astype(self): expected = ii.take([0, 1, -1]) tm.assert_index_equal(result, expected) - result = IntervalIndex.from_intervals(result.values) + result = IntervalIndex(result.values) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('name', [None, 'foo']) @@ -756,6 +756,15 @@ def test_equals_categorical(self): ordered=True)) assert ci.equals(ci.copy()) + def test_equals_categoridcal_unordered(self): + # https://github.com/pandas-dev/pandas/issues/16603 + a = pd.CategoricalIndex(['A'], categories=['A', 'B']) + b = pd.CategoricalIndex(['A'], categories=['B', 'A']) + c = pd.CategoricalIndex(['C'], categories=['B', 'A']) + assert a.equals(b) + assert not a.equals(c) + assert not b.equals(c) + def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex(['a', 'bb', 'ccc']) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 2a7c020f4c9e9..aedc957ec67da 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -327,6 +327,21 @@ def assert_matching(actual, expected): assert_matching(ind2.labels, new_labels) assert_matching(self.index.labels, labels) + # label changing for levels of different magnitude of categories + ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_labels = range(129, -1, -1) + expected = pd.MultiIndex.from_tuples( + [(0, i) for i in new_labels]) + + # [w/o mutation] + result = ind.set_labels(labels=new_labels, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + result.set_labels(labels=new_labels, level=1, inplace=True) + assert result.equals(expected) + def test_set_levels_labels_names_bad_input(self): levels, labels = self.index.levels, self.index.labels names = self.index.names @@ -1243,6 +1258,17 @@ def test_get_loc_level(self): assert result == expected assert new_index.equals(index.droplevel(0)) + @pytest.mark.parametrize('level', [0, 1]) + @pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None]) + def test_get_loc_nan(self, level, null_val): + # GH 18485 : NaN in MultiIndex + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + levels[level] = np.array([0, null_val], dtype=type(null_val)) + key[level] = null_val + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + def test_get_loc_missing_nan(self): # GH 8569 idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) @@ -1251,6 +1277,38 @@ def test_get_loc_missing_nan(self): pytest.raises(KeyError, idx.get_loc, np.nan) pytest.raises(KeyError, idx.get_loc, [np.nan]) + @pytest.mark.parametrize('dtype1', [int, float, bool, str]) + @pytest.mark.parametrize('dtype2', [int, float, bool, str]) + def test_get_loc_multiple_dtypes(self, dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), + np.array([0, 1]).astype(dtype2)] + idx = pd.MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + @pytest.mark.parametrize('level', [0, 1]) + @pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) + def test_get_loc_implicit_cast(self, level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + def test_get_loc_cast_bool(self): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype='int64')] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + pytest.raises(KeyError, idx.get_loc, (False, True)) + pytest.raises(KeyError, idx.get_loc, (True, False)) + def test_slice_locs(self): df = tm.makeTimeDataFrame() stacked = df.stack() diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index dcd592345b91c..3de1c4c982654 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -29,20 +29,21 @@ def full_like(array, value): class Numeric(Base): def test_numeric_compat(self): + pass # override Base method + def test_mul_int(self): idx = self.create_index() - didx = idx * idx - result = idx * 1 tm.assert_index_equal(result, idx) + def test_rmul_int(self): + idx = self.create_index() + result = 1 * idx tm.assert_index_equal(result, idx) - # in general not true for RangeIndex - if not isinstance(idx, RangeIndex): - result = idx * idx - tm.assert_index_equal(result, idx ** 2) + def test_div_int(self): + idx = self.create_index() # truediv under PY3 result = idx / 1 @@ -57,9 +58,16 @@ def test_numeric_compat(self): expected = Index(idx.values / 2) tm.assert_index_equal(result, expected) + def test_floordiv_int(self): + idx = self.create_index() + result = idx // 1 tm.assert_index_equal(result, idx) + def test_mul_int_array(self): + idx = self.create_index() + didx = idx * idx + result = idx * np.array(5, dtype='int64') tm.assert_index_equal(result, idx * 5) @@ -67,19 +75,45 @@ def test_numeric_compat(self): result = idx * np.arange(5, dtype=arr_dtype) tm.assert_index_equal(result, didx) + def test_mul_int_series(self): + idx = self.create_index() + didx = idx * idx + + arr_dtype = 'uint64' if isinstance(idx, UInt64Index) else 'int64' result = idx * Series(np.arange(5, dtype=arr_dtype)) - tm.assert_index_equal(result, didx) + tm.assert_series_equal(result, Series(didx)) - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - expected = Float64Index(np.arange(5, dtype='float64') * - (np.arange(5, dtype='float64') + 0.1)) - tm.assert_index_equal(result, expected) + def test_mul_float_series(self): + idx = self.create_index() + rng5 = np.arange(5, dtype='float64') - # invalid - pytest.raises(TypeError, - lambda: idx * date_range('20130101', periods=5)) - pytest.raises(ValueError, lambda: idx * idx[0:3]) - pytest.raises(ValueError, lambda: idx * np.array([1, 2])) + result = idx * Series(rng5 + 0.1) + expected = Series(rng5 * (rng5 + 0.1)) + tm.assert_series_equal(result, expected) + + def test_mul_index(self): + idx = self.create_index() + + # in general not true for RangeIndex + if not isinstance(idx, RangeIndex): + result = idx * idx + tm.assert_index_equal(result, idx ** 2) + + def test_mul_datelike_raises(self): + idx = self.create_index() + with pytest.raises(TypeError): + idx * date_range('20130101', periods=5) + + def test_mul_size_mismatch_raises(self): + idx = self.create_index() + + with pytest.raises(ValueError): + idx * idx[0:3] + with pytest.raises(ValueError): + idx * np.array([1, 2]) + + def test_divmod(self): + idx = self.create_index() result = divmod(idx, 2) with np.errstate(all='ignore'): @@ -95,24 +129,33 @@ def test_numeric_compat(self): for r, e in zip(result, expected): tm.assert_index_equal(r, e) - result = divmod(idx, Series(full_like(idx.values, 2))) - with np.errstate(all='ignore'): - div, mod = divmod( - idx.values, - full_like(idx.values, 2), - ) - expected = Index(div), Index(mod) - for r, e in zip(result, expected): - tm.assert_index_equal(r, e) + def test_pow_float(self): + # test power calculations both ways, GH 14973 + idx = self.create_index() + expected = pd.Float64Index(idx.values**2.0) + result = idx**2.0 + tm.assert_index_equal(result, expected) + + def test_rpow_float(self): # test power calculations both ways, GH 14973 + idx = self.create_index() + expected = pd.Float64Index(2.0**idx.values) result = 2.0**idx tm.assert_index_equal(result, expected) - expected = pd.Float64Index(idx.values**2.0) - result = idx**2.0 - tm.assert_index_equal(result, expected) + @pytest.mark.xfail(reason='GH#19252 Series has no __rdivmod__') + def test_divmod_series(self): + idx = self.create_index() + + result = divmod(idx, Series(full_like(idx.values, 2))) + with np.errstate(all='ignore'): + div, mod = divmod(idx.values, full_like(idx.values, 2)) + expected = Series(div), Series(mod) + + for r, e in zip(result, expected): + tm.assert_series_equal(r, e) def test_explicit_conversions(self): diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 3ecfcaff63bc5..ef6523a9eb270 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -10,6 +10,7 @@ to_timedelta, timedelta_range, date_range, Series, Timestamp, Timedelta) +from pandas.errors import PerformanceWarning @pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2), @@ -25,61 +26,281 @@ def freq(request): return request.param +class TestTimedeltaIndexComparisons(object): + def test_tdi_cmp_str_invalid(self): + # GH 13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + + for left, right in [(tdi, 'a'), ('a', tdi)]: + with pytest.raises(TypeError): + left > right + + with pytest.raises(TypeError): + left == right + + with pytest.raises(TypeError): + left != right + + def test_comparisons_coverage(self): + rng = timedelta_range('1 days', periods=10) + + result = rng < rng[3] + exp = np.array([True, True, True] + [False] * 7) + tm.assert_numpy_array_equal(result, exp) + + # raise TypeError for now + pytest.raises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + tm.assert_numpy_array_equal(result, exp) + + def test_comp_nat(self): + left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, + pd.Timedelta('3 days')]) + right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) + + for lhs, rhs in [(left, right), + (left.astype(object), right.astype(object))]: + result = rhs == lhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = rhs != lhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > lhs, expected) + + def test_comparisons_nat(self): + tdidx1 = pd.TimedeltaIndex(['1 day', pd.NaT, '1 day 00:00:01', pd.NaT, + '1 day 00:00:01', '5 day 00:00:03']) + tdidx2 = pd.TimedeltaIndex(['2 day', '2 day', pd.NaT, pd.NaT, + '1 day 00:00:02', '5 days 00:00:03']) + tdarr = np.array([np.timedelta64(2, 'D'), + np.timedelta64(2, 'D'), np.timedelta64('nat'), + np.timedelta64('nat'), + np.timedelta64(1, 'D') + np.timedelta64(2, 's'), + np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) + + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + + # Check pd.NaT is handles as the same as np.nan + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + class TestTimedeltaIndexArithmetic(object): _holder = TimedeltaIndex - @pytest.mark.xfail(reason='GH#18824 ufunc add cannot use operands...') - def test_tdi_with_offset_array(self): + # ------------------------------------------------------------- + # Invalid Operations + + def test_tdi_add_str_invalid(self): + # GH 13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + + with pytest.raises(TypeError): + tdi + 'a' + with pytest.raises(TypeError): + 'a' + tdi + + # ------------------------------------------------------------- + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_tdi_add_offset_array(self, box): # GH#18849 - tdi = pd.TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) - offs = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) - expected = pd.TimedeltaIndex(['1 days 01:00:00', '3 days 04:02:00']) + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) + other = box([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) - res = tdi + offs + expected = TimedeltaIndex([tdi[n] + other[n] for n in range(len(tdi))], + freq='infer') + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other tm.assert_index_equal(res, expected) - res2 = offs + tdi + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi tm.assert_index_equal(res2, expected) - anchored = np.array([pd.offsets.QuarterEnd(), - pd.offsets.Week(weekday=2)]) + anchored = box([pd.offsets.QuarterEnd(), + pd.offsets.Week(weekday=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + tdi + anchored + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + anchored + tdi + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_tdi_sub_offset_array(self, box): + # GH#18824 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) + other = box([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + + expected = TimedeltaIndex([tdi[n] - other[n] for n in range(len(tdi))], + freq='infer') + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi - other + tm.assert_index_equal(res, expected) + + anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + tdi - anchored + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + anchored - tdi + + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_tdi_with_offset_series(self, names): + # GH#18849 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], + name=names[0]) + other = Series([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], + name=names[1]) + + expected_add = Series([tdi[n] + other[n] for n in range(len(tdi))], + name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_series_equal(res, expected_add) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_series_equal(res2, expected_add) + + expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))], + name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res3 = tdi - other + tm.assert_series_equal(res3, expected_sub) + + anchored = Series([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], + name=names[1]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + tdi + anchored with pytest.raises(TypeError): - tdi + anchored + with tm.assert_produces_warning(PerformanceWarning): + anchored + tdi + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + tdi - anchored + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + anchored - tdi - # TODO: Split by ops, better name - def test_numeric_compat(self): + def test_mul_int(self): idx = self._holder(np.arange(5, dtype='int64')) - didx = self._holder(np.arange(5, dtype='int64') ** 2) result = idx * 1 tm.assert_index_equal(result, idx) + def test_rmul_int(self): + idx = self._holder(np.arange(5, dtype='int64')) result = 1 * idx tm.assert_index_equal(result, idx) + def test_div_int(self): + idx = self._holder(np.arange(5, dtype='int64')) result = idx / 1 tm.assert_index_equal(result, idx) + def test_floordiv_int(self): + idx = self._holder(np.arange(5, dtype='int64')) result = idx // 1 tm.assert_index_equal(result, idx) + def test_mul_int_array_zerodim(self): + rng5 = np.arange(5, dtype='int64') + idx = self._holder(rng5) + expected = self._holder(rng5 * 5) result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, - self._holder(np.arange(5, dtype='int64') * 5)) + tm.assert_index_equal(result, expected) + + def test_mul_int_array(self): + rng5 = np.arange(5, dtype='int64') + idx = self._holder(rng5) + didx = self._holder(rng5 ** 2) - result = idx * np.arange(5, dtype='int64') + result = idx * rng5 tm.assert_index_equal(result, didx) + def test_mul_int_series(self): + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) + result = idx * Series(np.arange(5, dtype='int64')) - tm.assert_index_equal(result, didx) - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - tm.assert_index_equal(result, self._holder(np.arange( - 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) + tm.assert_series_equal(result, Series(didx)) + + def test_mul_float_series(self): + idx = self._holder(np.arange(5, dtype='int64')) + + rng5f = np.arange(5, dtype='float64') + result = idx * Series(rng5f + 0.1) + expected = Series(self._holder(rng5f * (rng5f + 0.1))) + tm.assert_series_equal(result, expected) + + def test_dti_mul_dti_raises(self): + idx = self._holder(np.arange(5, dtype='int64')) + with pytest.raises(TypeError): + idx * idx - # invalid - pytest.raises(TypeError, lambda: idx * idx) - pytest.raises(ValueError, lambda: idx * self._holder(np.arange(3))) - pytest.raises(ValueError, lambda: idx * np.array([1, 2])) + def test_dti_mul_too_short_raises(self): + idx = self._holder(np.arange(5, dtype='int64')) + with pytest.raises(ValueError): + idx * self._holder(np.arange(3)) + with pytest.raises(ValueError): + idx * np.array([1, 2]) def test_ufunc_coercions(self): # normal ops are also tested in tseries/test_timedeltas.py @@ -204,39 +425,53 @@ def test_tdi_radd_timestamp(self): # ------------------------------------------------------------- - # TODO: Split by operation, better name - def test_ops_compat(self): + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=10, seconds=7), + Timedelta('10m7s'), + Timedelta('10m7s').to_timedelta64()]) + def test_tdi_floordiv_timedelta_scalar(self, scalar_td): + # GH#19125 + tdi = TimedeltaIndex(['00:05:03', '00:05:03', pd.NaT], freq=None) + expected = pd.Index([2.0, 2.0, np.nan]) - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] + res = tdi.__rfloordiv__(scalar_td) + tm.assert_index_equal(res, expected) - rng = timedelta_range('1 days', '10 days', name='foo') + expected = pd.Index([0.0, 0.0, np.nan]) - # multiply - for offset in offsets: - pytest.raises(TypeError, lambda: rng * offset) + res = tdi // (scalar_td) + tm.assert_index_equal(res, expected) - # divide + def test_tdi_floordiv_tdlike_scalar(self, delta): + tdi = timedelta_range('1 days', '10 days', name='foo') expected = Int64Index((np.arange(10) + 1) * 12, name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected, exact=False) - # floor divide + result = tdi // delta + tm.assert_index_equal(result, expected, exact=False) + + def test_tdi_mul_tdlike_scalar_raises(self, delta): + rng = timedelta_range('1 days', '10 days', name='foo') + with pytest.raises(TypeError): + rng * delta + + def test_tdi_div_nat_raises(self): + # don't allow division by NaT (make could in the future) + rng = timedelta_range('1 days', '10 days', name='foo') + with pytest.raises(TypeError): + rng / pd.NaT + + def test_tdi_div_tdlike_scalar(self, delta): + rng = timedelta_range('1 days', '10 days', name='foo') expected = Int64Index((np.arange(10) + 1) * 12, name='foo') - for offset in offsets: - result = rng // offset - tm.assert_index_equal(result, expected, exact=False) - # divide with nats + result = rng / delta + tm.assert_index_equal(result, expected, exact=False) + + def test_tdi_div_tdlike_scalar_with_nat(self, delta): rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') expected = Float64Index([12, np.nan, 24], name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected) - - # don't allow division by NaT (make could in the future) - pytest.raises(TypeError, lambda: rng / pd.NaT) + result = rng / delta + tm.assert_index_equal(result, expected) def test_subtraction_ops(self): # with datetimes/timedelta and tdi/dti @@ -552,14 +787,14 @@ def test_timedelta_ops_with_missing_values(self): actual = -timedelta_NaT + s1 tm.assert_series_equal(actual, sn) - actual = s1 + NA - tm.assert_series_equal(actual, sn) - actual = NA + s1 - tm.assert_series_equal(actual, sn) - actual = s1 - NA - tm.assert_series_equal(actual, sn) - actual = -NA + s1 - tm.assert_series_equal(actual, sn) + with pytest.raises(TypeError): + s1 + np.nan + with pytest.raises(TypeError): + np.nan + s1 + with pytest.raises(TypeError): + s1 - np.nan + with pytest.raises(TypeError): + -np.nan + s1 actual = s1 + pd.NaT tm.assert_series_equal(actual, sn) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 081e299caa876..112c62b7e2f8d 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -212,33 +212,6 @@ def test_summary(self): result = idx.summary() assert result == expected - def test_comp_nat(self): - left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')]) - right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) - - for lhs, rhs in [(left, right), - (left.astype(object), right.astype(object))]: - result = rhs == lhs - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = rhs != lhs - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(lhs == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == rhs, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(lhs != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != lhs, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(lhs < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > lhs, expected) - def test_value_counts_unique(self): # GH 7735 @@ -493,23 +466,6 @@ def test_equals(self): class TestTimedeltas(object): _multiprocess_can_split_ = True - def test_ops_error_str(self): - # GH 13624 - tdi = TimedeltaIndex(['1 day', '2 days']) - - for l, r in [(tdi, 'a'), ('a', tdi)]: - with pytest.raises(TypeError): - l + r - - with pytest.raises(TypeError): - l > r - - with pytest.raises(TypeError): - l == r - - with pytest.raises(TypeError): - l != r - def test_timedelta_ops(self): # GH4984 # make sure ops return Timedelta @@ -564,18 +520,3 @@ def test_timedelta_ops(self): s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15')]) assert s.diff().median() == timedelta(days=6) - - def test_compare_timedelta_series(self): - # regresssion test for GH5963 - s = pd.Series([timedelta(days=1), timedelta(days=2)]) - actual = s > timedelta(days=1) - expected = pd.Series([False, True]) - tm.assert_series_equal(actual, expected) - - def test_compare_timedelta_ndarray(self): - # GH11835 - periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] - arr = np.array(periods) - result = arr[0] > arr - expected = np.array([False, False]) - tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index e25384ebf7d62..1af971e8a4326 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -4,6 +4,7 @@ from datetime import timedelta import pandas as pd +from pandas.errors import NullFrequencyError import pandas.util.testing as tm from pandas import (timedelta_range, date_range, Series, Timedelta, TimedeltaIndex, Index, DataFrame, @@ -50,6 +51,12 @@ def test_shift(self): '10 days 01:00:03'], freq='D') tm.assert_index_equal(result, expected) + def test_shift_no_freq(self): + # GH#19147 + tdi = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00'], freq=None) + with pytest.raises(NullFrequencyError): + tdi.shift(2) + def test_pickle_compat_construction(self): pass @@ -196,61 +203,6 @@ def test_map(self): exp = Int64Index([f(x) for x in rng]) tm.assert_index_equal(result, exp) - def test_comparisons_nat(self): - - tdidx1 = pd.TimedeltaIndex(['1 day', pd.NaT, '1 day 00:00:01', pd.NaT, - '1 day 00:00:01', '5 day 00:00:03']) - tdidx2 = pd.TimedeltaIndex(['2 day', '2 day', pd.NaT, pd.NaT, - '1 day 00:00:02', '5 days 00:00:03']) - tdarr = np.array([np.timedelta64(2, 'D'), - np.timedelta64(2, 'D'), np.timedelta64('nat'), - np.timedelta64('nat'), - np.timedelta64(1, 'D') + np.timedelta64(2, 's'), - np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) - - cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] - - # Check pd.NaT is handles as the same as np.nan - for idx1, idx2 in cases: - - result = idx1 < idx2 - expected = np.array([True, False, False, False, True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = idx2 > idx1 - expected = np.array([True, False, False, False, True, False]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - def test_comparisons_coverage(self): - rng = timedelta_range('1 days', periods=10) - - result = rng < rng[3] - exp = np.array([True, True, True] + [False] * 7) - tm.assert_numpy_array_equal(result, exp) - - # raise TypeError for now - pytest.raises(TypeError, rng.__lt__, rng[3].value) - - result = rng == list(rng) - exp = rng == rng - tm.assert_numpy_array_equal(result, exp) - def test_total_seconds(self): # GH 10939 # test index diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 52b2d7205c849..de756375db8cb 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -821,6 +821,9 @@ def test_replace_series(self, how, to_key, from_key): if (from_key.startswith('datetime') and to_key.startswith('datetime')): # tested below return + elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']: + # tested below + return if how == 'dict': replacer = dict(zip(self.rep[from_key], self.rep[to_key])) @@ -849,6 +852,37 @@ def test_replace_series(self, how, to_key, from_key): tm.assert_series_equal(result, exp) + # TODO(jbrockmendel) commented out to only have a single xfail printed + @pytest.mark.xfail(reason='GH #18376, tzawareness-compat bug ' + 'in BlockManager.replace_list') + # @pytest.mark.parametrize('how', ['dict', 'series']) + # @pytest.mark.parametrize('to_key', ['timedelta64[ns]', 'bool', 'object', + # 'complex128', 'float64', 'int64']) + # @pytest.mark.parametrize('from_key', ['datetime64[ns, UTC]', + # 'datetime64[ns, US/Eastern]']) + # def test_replace_series_datetime_tz(self, how, to_key, from_key): + def test_replace_series_datetime_tz(self): + how = 'series' + from_key = 'datetime64[ns, US/Eastern]' + to_key = 'timedelta64[ns]' + + index = pd.Index([3, 4], name='xxx') + obj = pd.Series(self.rep[from_key], index=index, name='yyy') + assert obj.dtype == from_key + + if how == 'dict': + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == 'series': + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError + + result = obj.replace(replacer) + exp = pd.Series(self.rep[to_key], index=index, name='yyy') + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + # TODO(jreback) commented out to only have a single xfail printed @pytest.mark.xfail(reason="different tz, " "currently mask_missing raises SystemError") diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index fb5f094f9462b..433b0d87ac005 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -420,6 +420,13 @@ def test_loc_setitem_consistency(self): df.loc[:, 'date'] = 1.0 tm.assert_frame_equal(df, expected) + # GH 15494 + # setting on frame with single row + df = DataFrame({'date': Series([Timestamp('20180101')])}) + df.loc[:, 'date'] = 'string' + expected = DataFrame({'date': Series(['string'])}) + tm.assert_frame_equal(df, expected) + def test_loc_setitem_consistency_empty(self): # empty (essentially noops) expected = DataFrame(columns=['x', 'y']) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index f69b9d98143b0..43656a392e582 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -705,6 +705,26 @@ def test_multiindex_symmetric_difference(self): result = idx ^ idx2 assert result.names == [None, None] + def test_multiindex_contains_dropped(self): + # GH 19027 + # test that dropped MultiIndex levels are not in the MultiIndex + # despite continuing to be in the MultiIndex's levels + idx = MultiIndex.from_product([[1, 2], [3, 4]]) + assert 2 in idx + idx = idx.drop(2) + + # drop implementation keeps 2 in the levels + assert 2 in idx.levels[0] + # but it should no longer be in the index itself + assert 2 not in idx + + # also applies to strings + idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) + assert 'a' in idx + idx = idx.drop('a') + assert 'a' in idx.levels[0] + assert 'a' not in idx + class TestMultiIndexSlicers(object): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 623d2d39607c2..57884e9816ed3 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -19,7 +19,6 @@ import pandas.core.algorithms as algos import pandas.util.testing as tm import pandas as pd -from pandas._libs import lib from pandas.util.testing import (assert_almost_equal, assert_frame_equal, randn, assert_series_equal) from pandas.compat import zip, u @@ -39,8 +38,8 @@ def mgr(): def assert_block_equal(left, right): tm.assert_numpy_array_equal(left.values, right.values) assert left.dtype == right.dtype - assert isinstance(left.mgr_locs, lib.BlockPlacement) - assert isinstance(right.mgr_locs, lib.BlockPlacement) + assert isinstance(left.mgr_locs, BlockPlacement) + assert isinstance(right.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array) @@ -222,7 +221,7 @@ def _check(blk): _check(self.bool_block) def test_mgr_locs(self): - assert isinstance(self.fblock.mgr_locs, lib.BlockPlacement) + assert isinstance(self.fblock.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64)) @@ -264,14 +263,14 @@ def test_insert(self): def test_delete(self): newb = self.fblock.copy() newb.delete(0) - assert isinstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([2, 4], dtype=np.int64)) assert (newb.values[0] == 1).all() newb = self.fblock.copy() newb.delete(1) - assert isinstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([0, 4], dtype=np.int64)) assert (newb.values[1] == 2).all() @@ -679,7 +678,7 @@ def test_consolidate_ordering_issues(self, mgr): assert cons.nblocks == 4 cons = mgr.consolidate().get_numeric_data() assert cons.nblocks == 1 - assert isinstance(cons.blocks[0].mgr_locs, lib.BlockPlacement) + assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64)) @@ -1254,3 +1253,11 @@ def test_binop_other(self, op, value, dtype): result = op(s, e).dtypes expected = op(s, value).dtypes assert_series_equal(result, expected) + + +def test_deprecated_fastpath(): + # GH#19265 + values = np.random.rand(3, 3) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + make_block(values, placement=np.arange(3), fastpath=True) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 23b42b612dace..b277d8256e612 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -883,6 +883,29 @@ def test_datetimelike_frame(self): '[10 rows x 2 columns]') assert repr(df) == expected + @pytest.mark.parametrize('start_date', [ + '2017-01-01 23:59:59.999999999', + '2017-01-01 23:59:59.99999999', + '2017-01-01 23:59:59.9999999', + '2017-01-01 23:59:59.999999', + '2017-01-01 23:59:59.99999', + '2017-01-01 23:59:59.9999', + ]) + def test_datetimeindex_highprecision(self, start_date): + # GH19030 + # Check that high-precision time values for the end of day are + # included in repr for DatetimeIndex + df = DataFrame({'A': date_range(start=start_date, + freq='D', periods=5)}) + result = str(df) + assert start_date in result + + dti = date_range(start=start_date, + freq='D', periods=5) + df = DataFrame({'A': range(5)}, index=dti) + result = str(df.index) + assert start_date in result + def test_nonunicode_nonascii_alignment(self): df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) rep_str = df.to_string() @@ -1914,6 +1937,27 @@ def test_datetimeindex(self): result = str(s2.index) assert 'NaT' in result + @pytest.mark.parametrize('start_date', [ + '2017-01-01 23:59:59.999999999', + '2017-01-01 23:59:59.99999999', + '2017-01-01 23:59:59.9999999', + '2017-01-01 23:59:59.999999', + '2017-01-01 23:59:59.99999', + '2017-01-01 23:59:59.9999' + ]) + def test_datetimeindex_highprecision(self, start_date): + # GH19030 + # Check that high-precision time values for the end of day are + # included in repr for DatetimeIndex + s1 = Series(date_range(start=start_date, freq='D', periods=5)) + result = str(s1) + assert start_date in result + + dti = date_range(start=start_date, freq='D', periods=5) + s2 = Series(3, index=dti) + result = str(s2.index) + assert start_date in result + def test_timedelta64(self): from datetime import datetime, timedelta diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py index 26a9bb018f30a..2d691bf2c5d8e 100644 --- a/pandas/tests/io/formats/test_to_excel.py +++ b/pandas/tests/io/formats/test_to_excel.py @@ -4,7 +4,9 @@ """ import pytest +import pandas.util.testing as tm +from warnings import catch_warnings from pandas.io.formats.excel import CSSToExcelConverter @@ -212,3 +214,61 @@ def test_css_to_excel_multiple(): def test_css_to_excel_inherited(css, inherited, expected): convert = CSSToExcelConverter(inherited) assert expected == convert(css) + + +@pytest.mark.parametrize("input_color,output_color", ( + [(name, rgb) for name, rgb in CSSToExcelConverter.NAMED_COLORS.items()] + + [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()] + + [("#F0F", "FF00FF"), ("#ABC", "AABBCC")]) +) +def test_css_to_excel_good_colors(input_color, output_color): + # see gh-18392 + css = ("border-top-color: {color}; " + "border-right-color: {color}; " + "border-bottom-color: {color}; " + "border-left-color: {color}; " + "background-color: {color}; " + "color: {color}").format(color=input_color) + + expected = dict() + + expected["fill"] = { + "patternType": "solid", + "fgColor": output_color + } + + expected["font"] = { + "color": output_color + } + + expected["border"] = { + k: { + "color": output_color, + } for k in ("top", "right", "bottom", "left") + } + + with tm.assert_produces_warning(None): + convert = CSSToExcelConverter() + assert expected == convert(css) + + +@pytest.mark.parametrize("input_color", [None, "not-a-color"]) +def test_css_to_excel_bad_colors(input_color): + # see gh-18392 + css = ("border-top-color: {color}; " + "border-right-color: {color}; " + "border-bottom-color: {color}; " + "border-left-color: {color}; " + "background-color: {color}; " + "color: {color}").format(color=input_color) + + expected = dict() + + if input_color is not None: + expected["fill"] = { + "patternType": "solid" + } + + with catch_warnings(record=True): + convert = CSSToExcelConverter() + assert expected == convert(css) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index a83ec53904b28..08335293f9292 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -1,50 +1,24 @@ import pytest import pandas as pd -from pandas import compat import pandas.util.testing as tm -import pandas.util._test_decorators as td from pandas.util.testing import assert_frame_equal, assert_raises_regex -COMPRESSION_TYPES = [None, 'bz2', 'gzip', - pytest.param('xz', marks=td.skip_if_no_lzma)] - - -def decompress_file(path, compression): - if compression is None: - f = open(path, 'rb') - elif compression == 'gzip': - import gzip - f = gzip.GzipFile(path, 'rb') - elif compression == 'bz2': - import bz2 - f = bz2.BZ2File(path, 'rb') - elif compression == 'xz': - lzma = compat.import_lzma() - f = lzma.open(path, 'rb') - else: - msg = 'Unrecognized compression type: {}'.format(compression) - raise ValueError(msg) - - result = f.read().decode('utf8') - f.close() - return result - - -@pytest.mark.parametrize('compression', COMPRESSION_TYPES) -def test_compression_roundtrip(compression): +def test_compression_roundtrip(compression_no_zip): df = pd.DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: - df.to_json(path, compression=compression) - assert_frame_equal(df, pd.read_json(path, compression=compression)) + df.to_json(path, compression=compression_no_zip) + assert_frame_equal(df, pd.read_json(path, + compression=compression_no_zip)) # explicitly ensure file was compressed. - uncompressed_content = decompress_file(path, compression) - assert_frame_equal(df, pd.read_json(uncompressed_content)) + with tm.decompress_file(path, compression_no_zip) as fh: + result = fh.read().decode('utf8') + assert_frame_equal(df, pd.read_json(result)) def test_compress_zip_value_error(): @@ -67,8 +41,7 @@ def test_read_zipped_json(): assert_frame_equal(uncompressed_df, compressed_df) -@pytest.mark.parametrize('compression', COMPRESSION_TYPES) -def test_with_s3_url(compression): +def test_with_s3_url(compression_no_zip): boto3 = pytest.importorskip('boto3') pytest.importorskip('s3fs') moto = pytest.importorskip('moto') @@ -79,33 +52,36 @@ def test_with_s3_url(compression): bucket = conn.create_bucket(Bucket="pandas-test") with tm.ensure_clean() as path: - df.to_json(path, compression=compression) + df.to_json(path, compression=compression_no_zip) with open(path, 'rb') as f: bucket.put_object(Key='test-1', Body=f) roundtripped_df = pd.read_json('s3://pandas-test/test-1', - compression=compression) + compression=compression_no_zip) assert_frame_equal(df, roundtripped_df) -@pytest.mark.parametrize('compression', COMPRESSION_TYPES) -def test_lines_with_compression(compression): +def test_lines_with_compression(compression_no_zip): + with tm.ensure_clean() as path: df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') - df.to_json(path, orient='records', lines=True, compression=compression) + df.to_json(path, orient='records', lines=True, + compression=compression_no_zip) roundtripped_df = pd.read_json(path, lines=True, - compression=compression) + compression=compression_no_zip) assert_frame_equal(df, roundtripped_df) -@pytest.mark.parametrize('compression', COMPRESSION_TYPES) -def test_chunksize_with_compression(compression): +def test_chunksize_with_compression(compression_no_zip): + with tm.ensure_clean() as path: df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') - df.to_json(path, orient='records', lines=True, compression=compression) + df.to_json(path, orient='records', lines=True, + compression=compression_no_zip) - roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1, - compression=compression)) + res = pd.read_json(path, lines=True, chunksize=1, + compression=compression_no_zip) + roundtripped_df = pd.concat(res) assert_frame_equal(df, roundtripped_df) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index dab56e264b955..49b39c17238ae 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -12,8 +12,10 @@ from pandas.io.json.table_schema import ( as_json_table_type, build_table_schema, - make_field, + convert_pandas_type_to_json_field, + convert_json_field_to_pandas_type, set_default_names) +import pandas.util.testing as tm class TestBuildSchema(object): @@ -86,82 +88,82 @@ def test_multiindex(self): class TestTableSchemaType(object): - def test_as_json_table_type_int_data(self): + @pytest.mark.parametrize('int_type', [ + np.int, np.int16, np.int32, np.int64]) + def test_as_json_table_type_int_data(self, int_type): int_data = [1, 2, 3] - int_types = [np.int, np.int16, np.int32, np.int64] - for t in int_types: - assert as_json_table_type(np.array( - int_data, dtype=t)) == 'integer' + assert as_json_table_type(np.array( + int_data, dtype=int_type)) == 'integer' - def test_as_json_table_type_float_data(self): + @pytest.mark.parametrize('float_type', [ + np.float, np.float16, np.float32, np.float64]) + def test_as_json_table_type_float_data(self, float_type): float_data = [1., 2., 3.] - float_types = [np.float, np.float16, np.float32, np.float64] - for t in float_types: - assert as_json_table_type(np.array( - float_data, dtype=t)) == 'number' + assert as_json_table_type(np.array( + float_data, dtype=float_type)) == 'number' - def test_as_json_table_type_bool_data(self): + @pytest.mark.parametrize('bool_type', [bool, np.bool]) + def test_as_json_table_type_bool_data(self, bool_type): bool_data = [True, False] - bool_types = [bool, np.bool] - for t in bool_types: - assert as_json_table_type(np.array( - bool_data, dtype=t)) == 'boolean' - - def test_as_json_table_type_date_data(self): - date_data = [pd.to_datetime(['2016']), - pd.to_datetime(['2016'], utc=True), - pd.Series(pd.to_datetime(['2016'])), - pd.Series(pd.to_datetime(['2016'], utc=True)), - pd.period_range('2016', freq='A', periods=3)] - for arr in date_data: - assert as_json_table_type(arr) == 'datetime' - - def test_as_json_table_type_string_data(self): - strings = [pd.Series(['a', 'b']), pd.Index(['a', 'b'])] - for t in strings: - assert as_json_table_type(t) == 'string' - - def test_as_json_table_type_categorical_data(self): - assert as_json_table_type(pd.Categorical(['a'])) == 'any' - assert as_json_table_type(pd.Categorical([1])) == 'any' - assert as_json_table_type(pd.Series(pd.Categorical([1]))) == 'any' - assert as_json_table_type(pd.CategoricalIndex([1])) == 'any' - assert as_json_table_type(pd.Categorical([1])) == 'any' + assert as_json_table_type(np.array( + bool_data, dtype=bool_type)) == 'boolean' + + @pytest.mark.parametrize('date_data', [ + pd.to_datetime(['2016']), + pd.to_datetime(['2016'], utc=True), + pd.Series(pd.to_datetime(['2016'])), + pd.Series(pd.to_datetime(['2016'], utc=True)), + pd.period_range('2016', freq='A', periods=3) + ]) + def test_as_json_table_type_date_data(self, date_data): + assert as_json_table_type(date_data) == 'datetime' + + @pytest.mark.parametrize('str_data', [ + pd.Series(['a', 'b']), pd.Index(['a', 'b'])]) + def test_as_json_table_type_string_data(self, str_data): + assert as_json_table_type(str_data) == 'string' + + @pytest.mark.parametrize('cat_data', [ + pd.Categorical(['a']), + pd.Categorical([1]), + pd.Series(pd.Categorical([1])), + pd.CategoricalIndex([1]), + pd.Categorical([1])]) + def test_as_json_table_type_categorical_data(self, cat_data): + assert as_json_table_type(cat_data) == 'any' # ------ # dtypes # ------ - def test_as_json_table_type_int_dtypes(self): - integers = [np.int, np.int16, np.int32, np.int64] - for t in integers: - assert as_json_table_type(t) == 'integer' - - def test_as_json_table_type_float_dtypes(self): - floats = [np.float, np.float16, np.float32, np.float64] - for t in floats: - assert as_json_table_type(t) == 'number' - - def test_as_json_table_type_bool_dtypes(self): - bools = [bool, np.bool] - for t in bools: - assert as_json_table_type(t) == 'boolean' - - def test_as_json_table_type_date_dtypes(self): + @pytest.mark.parametrize('int_dtype', [ + np.int, np.int16, np.int32, np.int64]) + def test_as_json_table_type_int_dtypes(self, int_dtype): + assert as_json_table_type(int_dtype) == 'integer' + + @pytest.mark.parametrize('float_dtype', [ + np.float, np.float16, np.float32, np.float64]) + def test_as_json_table_type_float_dtypes(self, float_dtype): + assert as_json_table_type(float_dtype) == 'number' + + @pytest.mark.parametrize('bool_dtype', [bool, np.bool]) + def test_as_json_table_type_bool_dtypes(self, bool_dtype): + assert as_json_table_type(bool_dtype) == 'boolean' + + @pytest.mark.parametrize('date_dtype', [ + np.datetime64, np.dtype(" arr + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + class TestTimedeltas(object): _multiprocess_can_split_ = True diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index a3e9a0442ea0b..2b72eef2c6712 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -19,17 +19,13 @@ from pandas.tseries import offsets -from pandas._libs.tslibs import conversion, period +from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR from pandas.compat import long, PY3 -from pandas.util.testing import assert_series_equal from pandas.compat.numpy import np_datetime64_compat -from pandas import (Timestamp, date_range, Period, Timedelta, compat, - Series, NaT, DataFrame) -from pandas.tseries.frequencies import (RESO_DAY, RESO_HR, RESO_MIN, RESO_US, - RESO_MS, RESO_SEC) +from pandas import Timestamp, Period, Timedelta, NaT class TestTimestampArithmetic(object): @@ -54,6 +50,50 @@ def test_delta_preserve_nanos(self): result = val + timedelta(1) assert result.nanosecond == val.nanosecond + def test_timestamp_sub_datetime(self): + dt = datetime(2013, 10, 12) + ts = Timestamp(datetime(2013, 10, 13)) + assert (ts - dt).days == 1 + assert (dt - ts).days == -1 + + def test_addition_subtraction_types(self): + # Assert on the types resulting from Timestamp +/- various date/time + # objects + dt = datetime(2014, 3, 4) + td = timedelta(seconds=1) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + ts = Timestamp(dt, freq='D') + + assert type(ts + 1) == Timestamp + assert type(ts - 1) == Timestamp + + # Timestamp + datetime not supported, though subtraction is supported + # and yields timedelta more tests in tseries/base/tests/test_base.py + assert type(ts - dt) == Timedelta + assert type(ts + td) == Timestamp + assert type(ts - td) == Timestamp + + # Timestamp +/- datetime64 not supported, so not tested (could possibly + # assert error raised?) + td64 = np.timedelta64(1, 'D') + assert type(ts + td64) == Timestamp + assert type(ts - td64) == Timestamp + + def test_addition_subtraction_preserve_frequency(self): + ts = Timestamp('2014-03-05', freq='D') + td = timedelta(days=1) + original_freq = ts.freq + + assert (ts + 1).freq == original_freq + assert (ts - 1).freq == original_freq + assert (ts + td).freq == original_freq + assert (ts - td).freq == original_freq + + td64 = np.timedelta64(1, 'D') + assert (ts + td64).freq == original_freq + assert (ts - td64).freq == original_freq + class TestTimestampProperties(object): @@ -76,6 +116,112 @@ def test_properties_business(self): assert control.is_month_end assert control.is_quarter_end + def test_fields(self): + def check(value, equal): + # that we are int/long like + assert isinstance(value, (int, long)) + assert value == equal + + # GH 10050 + ts = Timestamp('2015-05-10 09:06:03.000100001') + check(ts.year, 2015) + check(ts.month, 5) + check(ts.day, 10) + check(ts.hour, 9) + check(ts.minute, 6) + check(ts.second, 3) + pytest.raises(AttributeError, lambda: ts.millisecond) + check(ts.microsecond, 100) + check(ts.nanosecond, 1) + check(ts.dayofweek, 6) + check(ts.quarter, 2) + check(ts.dayofyear, 130) + check(ts.week, 19) + check(ts.daysinmonth, 31) + check(ts.daysinmonth, 31) + + # GH 13303 + ts = Timestamp('2014-12-31 23:59:00-05:00', tz='US/Eastern') + check(ts.year, 2014) + check(ts.month, 12) + check(ts.day, 31) + check(ts.hour, 23) + check(ts.minute, 59) + check(ts.second, 0) + pytest.raises(AttributeError, lambda: ts.millisecond) + check(ts.microsecond, 0) + check(ts.nanosecond, 0) + check(ts.dayofweek, 2) + check(ts.quarter, 4) + check(ts.dayofyear, 365) + check(ts.week, 1) + check(ts.daysinmonth, 31) + + ts = Timestamp('2014-01-01 00:00:00+01:00') + starts = ['is_month_start', 'is_quarter_start', 'is_year_start'] + for start in starts: + assert getattr(ts, start) + ts = Timestamp('2014-12-31 23:59:59+01:00') + ends = ['is_month_end', 'is_year_end', 'is_quarter_end'] + for end in ends: + assert getattr(ts, end) + + @pytest.mark.parametrize('data, expected', + [(Timestamp('2017-08-28 23:00:00'), 'Monday'), + (Timestamp('2017-08-28 23:00:00', tz='EST'), + 'Monday')]) + def test_weekday_name(self, data, expected): + # GH 17354 + assert data.weekday_name == expected + + @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']) + def test_is_leap_year(self, tz): + # GH 13727 + dt = Timestamp('2000-01-01 00:00:00', tz=tz) + assert dt.is_leap_year + assert isinstance(dt.is_leap_year, bool) + + dt = Timestamp('1999-01-01 00:00:00', tz=tz) + assert not dt.is_leap_year + + dt = Timestamp('2004-01-01 00:00:00', tz=tz) + assert dt.is_leap_year + + dt = Timestamp('2100-01-01 00:00:00', tz=tz) + assert not dt.is_leap_year + + def test_woy_boundary(self): + # make sure weeks at year boundaries are correct + d = datetime(2013, 12, 31) + result = Timestamp(d).week + expected = 1 # ISO standard + assert result == expected + + d = datetime(2008, 12, 28) + result = Timestamp(d).week + expected = 52 # ISO standard + assert result == expected + + d = datetime(2009, 12, 31) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + d = datetime(2010, 1, 1) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + d = datetime(2010, 1, 3) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + result = np.array([Timestamp(datetime(*args)).week + for args in [(2000, 1, 1), (2000, 1, 2), ( + 2005, 1, 1), (2005, 1, 2)]]) + assert (result == [52, 52, 53, 53]).all() + class TestTimestampConstructors(object): @@ -310,24 +456,60 @@ def test_constructor_fromordinal(self): ts = Timestamp.fromordinal(dt_tz.toordinal(), tz='US/Eastern') assert ts.to_pydatetime() == dt_tz + def test_out_of_bounds_value(self): + one_us = np.timedelta64(1).astype('timedelta64[us]') -class TestTimestamp(object): + # By definition we can't go out of bounds in [ns], so we + # convert the datetime64s to [us] so we can go out of bounds + min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') + max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') - def test_conversion(self): - # GH 9255 - ts = Timestamp('2000-01-01') + # No error for the min/max datetimes + Timestamp(min_ts_us) + Timestamp(max_ts_us) - result = ts.to_pydatetime() - expected = datetime(2000, 1, 1) - assert result == expected - assert type(result) == type(expected) + # One us less than the minimum is an error + with pytest.raises(ValueError): + Timestamp(min_ts_us - one_us) - result = ts.to_datetime64() - expected = np.datetime64(ts.value, 'ns') - assert result == expected - assert type(result) == type(expected) - assert result.dtype == expected.dtype + # One us more than the maximum is an error + with pytest.raises(ValueError): + Timestamp(max_ts_us + one_us) + + def test_out_of_bounds_string(self): + with pytest.raises(ValueError): + Timestamp('1676-01-01') + with pytest.raises(ValueError): + Timestamp('2263-01-01') + def test_bounds_with_different_units(self): + out_of_bounds_dates = ('1677-09-21', '2262-04-12') + + time_units = ('D', 'h', 'm', 's', 'ms', 'us') + + for date_string in out_of_bounds_dates: + for unit in time_units: + dt64 = np.datetime64(date_string, dtype='M8[%s]' % unit) + with pytest.raises(ValueError): + Timestamp(dt64) + + in_bounds_dates = ('1677-09-23', '2262-04-11') + + for date_string in in_bounds_dates: + for unit in time_units: + dt64 = np.datetime64(date_string, dtype='M8[%s]' % unit) + Timestamp(dt64) + + def test_min_valid(self): + # Ensure that Timestamp.min is a valid Timestamp + Timestamp(Timestamp.min) + + def test_max_valid(self): + # Ensure that Timestamp.max is a valid Timestamp + Timestamp(Timestamp.max) + + +class TestTimestamp(object): @pytest.mark.parametrize('freq', ['D', 'M', 'S', 'N']) @pytest.mark.parametrize('date', ['2014-03-07', '2014-01-01 09:00', '2014-01-01 00:00:00.000000001']) @@ -394,22 +576,6 @@ def test_timestamp_repr_pre1900(self): result = repr(stamp) assert iso8601 in result - def test_bounds_with_different_units(self): - out_of_bounds_dates = ('1677-09-21', '2262-04-12', ) - - time_units = ('D', 'h', 'm', 's', 'ms', 'us') - - for date_string in out_of_bounds_dates: - for unit in time_units: - pytest.raises(ValueError, Timestamp, np.datetime64( - date_string, dtype='M8[%s]' % unit)) - - in_bounds_dates = ('1677-09-23', '2262-04-11', ) - - for date_string in in_bounds_dates: - for unit in time_units: - Timestamp(np.datetime64(date_string, dtype='M8[%s]' % unit)) - def test_tz(self): t = '2014-02-01 09:00' ts = Timestamp(t) @@ -435,11 +601,9 @@ def test_tz_localize_ambiguous(self): ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) - rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') - assert rng[1] == ts_dst - assert rng[2] == ts_no_dst - pytest.raises(ValueError, ts.tz_localize, 'US/Eastern', - ambiguous='infer') + assert (ts_no_dst.value - ts_dst.value) / 1e9 == 3600 + with pytest.raises(ValueError): + ts.tz_localize('US/Eastern', ambiguous='infer') # GH 8025 with tm.assert_raises_regex(TypeError, @@ -501,24 +665,6 @@ def test_tz_convert_roundtrip(self, tz): assert reset.tzinfo is None assert reset == converted.tz_convert('UTC').tz_localize(None) - def test_barely_oob_dts(self): - one_us = np.timedelta64(1).astype('timedelta64[us]') - - # By definition we can't go out of bounds in [ns], so we - # convert the datetime64s to [us] so we can go out of bounds - min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') - max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') - - # No error for the min/max datetimes - Timestamp(min_ts_us) - Timestamp(max_ts_us) - - # One us less than the minimum is an error - pytest.raises(ValueError, Timestamp, min_ts_us - one_us) - - # One us more than the maximum is an error - pytest.raises(ValueError, Timestamp, max_ts_us + one_us) - def test_utc_z_designator(self): assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) == 'UTC' @@ -569,64 +715,6 @@ def test_asm8(self): assert (Timestamp('nat').asm8.view('i8') == np.datetime64('nat', 'ns').view('i8')) - def test_fields(self): - def check(value, equal): - # that we are int/long like - assert isinstance(value, (int, compat.long)) - assert value == equal - - # GH 10050 - ts = Timestamp('2015-05-10 09:06:03.000100001') - check(ts.year, 2015) - check(ts.month, 5) - check(ts.day, 10) - check(ts.hour, 9) - check(ts.minute, 6) - check(ts.second, 3) - pytest.raises(AttributeError, lambda: ts.millisecond) - check(ts.microsecond, 100) - check(ts.nanosecond, 1) - check(ts.dayofweek, 6) - check(ts.quarter, 2) - check(ts.dayofyear, 130) - check(ts.week, 19) - check(ts.daysinmonth, 31) - check(ts.daysinmonth, 31) - - # GH 13303 - ts = Timestamp('2014-12-31 23:59:00-05:00', tz='US/Eastern') - check(ts.year, 2014) - check(ts.month, 12) - check(ts.day, 31) - check(ts.hour, 23) - check(ts.minute, 59) - check(ts.second, 0) - pytest.raises(AttributeError, lambda: ts.millisecond) - check(ts.microsecond, 0) - check(ts.nanosecond, 0) - check(ts.dayofweek, 2) - check(ts.quarter, 4) - check(ts.dayofyear, 365) - check(ts.week, 1) - check(ts.daysinmonth, 31) - - ts = Timestamp('2014-01-01 00:00:00+01:00') - starts = ['is_month_start', 'is_quarter_start', 'is_year_start'] - for start in starts: - assert getattr(ts, start) - ts = Timestamp('2014-12-31 23:59:59+01:00') - ends = ['is_month_end', 'is_year_end', 'is_quarter_end'] - for end in ends: - assert getattr(ts, end) - - @pytest.mark.parametrize('data, expected', - [(Timestamp('2017-08-28 23:00:00'), 'Monday'), - (Timestamp('2017-08-28 23:00:00', tz='EST'), - 'Monday')]) - def test_weekday_name(self, data, expected): - # GH 17354 - assert data.weekday_name == expected - def test_pprint(self): # GH12622 import pprint @@ -646,16 +734,6 @@ def test_pprint(self): 'foo': 1}""" assert result == expected - def test_to_pydatetime_nonzero_nano(self): - ts = Timestamp('2011-01-01 9:00:00.123456789') - - # Warn the user of data loss (nanoseconds). - with tm.assert_produces_warning(UserWarning, - check_stacklevel=False): - expected = datetime(2011, 1, 1, 9, 0, 0, 123456) - result = ts.to_pydatetime() - assert result == expected - def test_round(self): # round @@ -684,11 +762,6 @@ def test_round(self): expected = Timestamp('20130104 12:30:00') assert result == expected - dti = date_range('20130101 09:10:11', periods=5) - result = dti.round('D') - expected = date_range('20130101', periods=5) - tm.assert_index_equal(result, expected) - # floor dt = Timestamp('20130101 09:10:11') result = dt.floor('D') @@ -711,19 +784,6 @@ def test_round(self): result = dt.round('s') assert result == dt - dti = date_range('20130101 09:10:11', - periods=5).tz_localize('UTC').tz_convert('US/Eastern') - result = dti.round('D') - expected = date_range('20130101', periods=5).tz_localize('US/Eastern') - tm.assert_index_equal(result, expected) - - result = dti.round('s') - tm.assert_index_equal(result, dti) - - # invalid - for freq in ['Y', 'M', 'foobar']: - pytest.raises(ValueError, lambda: dti.round(freq)) - # GH 14440 & 15578 result = Timestamp('2016-10-17 12:00:00.0015').round('ms') expected = Timestamp('2016-10-17 12:00:00.002000') @@ -845,7 +905,7 @@ def check(val, unit=None, h=1, s=1, us=0): check(days, unit='D', h=0) # using truediv, so these are like floats - if compat.PY3: + if PY3: check((val + 500000) / long(1000000000), unit='s', us=500) check((val + 500000000) / long(1000000000), unit='s', us=500000) check((val + 500000) / long(1000000), unit='ms', us=500) @@ -900,22 +960,6 @@ def test_hash_equivalent(self): stamp = Timestamp(datetime(2011, 1, 1)) assert d[stamp] == 5 - @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']) - def test_is_leap_year(self, tz): - # GH 13727 - dt = Timestamp('2000-01-01 00:00:00', tz=tz) - assert dt.is_leap_year - assert isinstance(dt.is_leap_year, bool) - - dt = Timestamp('1999-01-01 00:00:00', tz=tz) - assert not dt.is_leap_year - - dt = Timestamp('2004-01-01 00:00:00', tz=tz) - assert dt.is_leap_year - - dt = Timestamp('2100-01-01 00:00:00', tz=tz) - assert not dt.is_leap_year - @td.skip_if_windows def test_timestamp(self): # GH#17329 @@ -1017,13 +1061,6 @@ def test_compare_invalid(self): assert val != np.float64(1) assert val != np.int64(1) - # ops testing - df = DataFrame(np.random.randn(5, 2)) - a = df[0] - b = Series(np.random.randn(5)) - b.name = Timestamp('2000-01-01') - tm.assert_series_equal(a / b, 1 / (b / a)) - def test_cant_compare_tz_naive_w_aware(self): # see gh-1404 a = Timestamp('3/12/2012') @@ -1112,41 +1149,6 @@ def test_timestamp_compare_scalars(self): result = right_f(nat, rhs) assert result == expected - def test_timestamp_compare_series(self): - # make sure we can compare Timestamps on the right AND left hand side - # GH4982 - s = Series(date_range('20010101', periods=10), name='dates') - s_nat = s.copy(deep=True) - - s[0] = Timestamp('nat') - s[3] = Timestamp('nat') - - ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - - # no nats - expected = left_f(s, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s) - tm.assert_series_equal(result, expected) - - # nats - expected = left_f(s, Timestamp('nat')) - result = right_f(Timestamp('nat'), s) - tm.assert_series_equal(result, expected) - - # compare to timestamp with series containing nats - expected = left_f(s_nat, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s_nat) - tm.assert_series_equal(result, expected) - - # compare to nat with series containing nats - expected = left_f(s_nat, Timestamp('nat')) - result = right_f(Timestamp('nat'), s_nat) - tm.assert_series_equal(result, expected) - def test_timestamp_compare_with_early_datetime(self): # e.g. datetime.min stamp = Timestamp('2012-01-01') @@ -1250,79 +1252,6 @@ def test_nanosecond_timestamp(self): assert t.nanosecond == 10 -class TestTimestampOps(object): - - def test_timestamp_and_datetime(self): - assert ((Timestamp(datetime(2013, 10, 13)) - - datetime(2013, 10, 12)).days == 1) - assert ((datetime(2013, 10, 12) - - Timestamp(datetime(2013, 10, 13))).days == -1) - - def test_timestamp_and_series(self): - timestamp_series = Series(date_range('2014-03-17', periods=2, freq='D', - tz='US/Eastern')) - first_timestamp = timestamp_series[0] - - delta_series = Series([np.timedelta64(0, 'D'), np.timedelta64(1, 'D')]) - assert_series_equal(timestamp_series - first_timestamp, delta_series) - assert_series_equal(first_timestamp - timestamp_series, -delta_series) - - def test_addition_subtraction_types(self): - # Assert on the types resulting from Timestamp +/- various date/time - # objects - datetime_instance = datetime(2014, 3, 4) - timedelta_instance = timedelta(seconds=1) - # build a timestamp with a frequency, since then it supports - # addition/subtraction of integers - timestamp_instance = Timestamp(datetime_instance, freq='D') - - assert type(timestamp_instance + 1) == Timestamp - assert type(timestamp_instance - 1) == Timestamp - - # Timestamp + datetime not supported, though subtraction is supported - # and yields timedelta more tests in tseries/base/tests/test_base.py - assert type(timestamp_instance - datetime_instance) == Timedelta - assert type(timestamp_instance + timedelta_instance) == Timestamp - assert type(timestamp_instance - timedelta_instance) == Timestamp - - # Timestamp +/- datetime64 not supported, so not tested (could possibly - # assert error raised?) - timedelta64_instance = np.timedelta64(1, 'D') - assert type(timestamp_instance + timedelta64_instance) == Timestamp - assert type(timestamp_instance - timedelta64_instance) == Timestamp - - def test_addition_subtraction_preserve_frequency(self): - timestamp_instance = Timestamp('2014-03-05', freq='D') - timedelta_instance = timedelta(days=1) - original_freq = timestamp_instance.freq - - assert (timestamp_instance + 1).freq == original_freq - assert (timestamp_instance - 1).freq == original_freq - assert (timestamp_instance + timedelta_instance).freq == original_freq - assert (timestamp_instance - timedelta_instance).freq == original_freq - - timedelta64_instance = np.timedelta64(1, 'D') - assert (timestamp_instance + - timedelta64_instance).freq == original_freq - assert (timestamp_instance - - timedelta64_instance).freq == original_freq - - @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo', 'US/Eastern', - 'dateutil/US/Eastern']) - def test_resolution(self, tz): - - for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', - 'S', 'L', 'U'], - [RESO_DAY, RESO_DAY, - RESO_DAY, RESO_DAY, - RESO_HR, RESO_MIN, - RESO_SEC, RESO_MS, - RESO_US]): - idx = date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) - result = period.resolution(idx.asi8, idx.tz) - assert result == expected - - class TestTimestampToJulianDate(object): def test_compare_1700(self): @@ -1347,6 +1276,31 @@ def test_compare_hour13(self): class TestTimestampConversion(object): + def test_conversion(self): + # GH#9255 + ts = Timestamp('2000-01-01') + + result = ts.to_pydatetime() + expected = datetime(2000, 1, 1) + assert result == expected + assert type(result) == type(expected) + + result = ts.to_datetime64() + expected = np.datetime64(ts.value, 'ns') + assert result == expected + assert type(result) == type(expected) + assert result.dtype == expected.dtype + + def test_to_pydatetime_nonzero_nano(self): + ts = Timestamp('2011-01-01 9:00:00.123456789') + + # Warn the user of data loss (nanoseconds). + with tm.assert_produces_warning(UserWarning, + check_stacklevel=False): + expected = datetime(2011, 1, 1, 9, 0, 0, 123456) + result = ts.to_pydatetime() + assert result == expected + def test_timestamp_to_datetime(self): stamp = Timestamp('20090415', tz='US/Eastern', freq='D') dtval = stamp.to_pydatetime() @@ -1384,102 +1338,3 @@ def test_to_datetime_bijective(self): with tm.assert_produces_warning(exp_warning, check_stacklevel=False): assert (Timestamp(Timestamp.min.to_pydatetime()).value / 1000 == Timestamp.min.value / 1000) - - -class TestTimeSeries(object): - - def test_timestamp_date_out_of_range(self): - pytest.raises(ValueError, Timestamp, '1676-01-01') - pytest.raises(ValueError, Timestamp, '2263-01-01') - - def test_timestamp_equality(self): - - # GH 11034 - s = Series([Timestamp('2000-01-29 01:59:00'), 'NaT']) - result = s != s - assert_series_equal(result, Series([False, True])) - result = s != s[0] - assert_series_equal(result, Series([False, True])) - result = s != s[1] - assert_series_equal(result, Series([True, True])) - - result = s == s - assert_series_equal(result, Series([True, False])) - result = s == s[0] - assert_series_equal(result, Series([True, False])) - result = s == s[1] - assert_series_equal(result, Series([False, False])) - - def test_series_box_timestamp(self): - rng = date_range('20090415', '20090519', freq='B') - s = Series(rng) - - assert isinstance(s[5], Timestamp) - - rng = date_range('20090415', '20090519', freq='B') - s = Series(rng, index=rng) - assert isinstance(s[5], Timestamp) - - assert isinstance(s.iat[5], Timestamp) - - def test_to_html_timestamp(self): - rng = date_range('2000-01-01', periods=10) - df = DataFrame(np.random.randn(10, 4), index=rng) - - result = df.to_html() - assert '2000-01-01' in result - - def test_series_map_box_timestamps(self): - # #2689, #2627 - s = Series(date_range('1/1/2000', periods=10)) - - def f(x): - return (x.hour, x.day, x.month) - - # it works! - s.map(f) - s.apply(f) - DataFrame(s).applymap(f) - - def test_woy_boundary(self): - # make sure weeks at year boundaries are correct - d = datetime(2013, 12, 31) - result = Timestamp(d).week - expected = 1 # ISO standard - assert result == expected - - d = datetime(2008, 12, 28) - result = Timestamp(d).week - expected = 52 # ISO standard - assert result == expected - - d = datetime(2009, 12, 31) - result = Timestamp(d).week - expected = 53 # ISO standard - assert result == expected - - d = datetime(2010, 1, 1) - result = Timestamp(d).week - expected = 53 # ISO standard - assert result == expected - - d = datetime(2010, 1, 3) - result = Timestamp(d).week - expected = 53 # ISO standard - assert result == expected - - result = np.array([Timestamp(datetime(*args)).week - for args in [(2000, 1, 1), (2000, 1, 2), ( - 2005, 1, 1), (2005, 1, 2)]]) - assert (result == [52, 52, 53, 53]).all() - - -class TestTsUtil(object): - - def test_min_valid(self): - # Ensure that Timestamp.min is a valid Timestamp - Timestamp(Timestamp.min) - - def test_max_valid(self): - # Ensure that Timestamp.max is a valid Timestamp - Timestamp(Timestamp.max) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 8ae7feab451f9..cf8698bc5ed5e 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -511,7 +511,7 @@ def test_cat_accessor(self): def test_cat_accessor_api(self): # GH 9322 - from pandas.core.categorical import CategoricalAccessor + from pandas.core.arrays.categorical import CategoricalAccessor assert Series.cat is CategoricalAccessor s = Series(list('aabbcde')).astype('category') assert isinstance(s.cat, CategoricalAccessor) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 8899ab585d6cb..3822ecd0a1b0e 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -77,6 +77,17 @@ def test_apply_args(self): assert result[0] == ['foo', 'bar'] assert isinstance(result[0], list) + def test_series_map_box_timestamps(self): + # GH#2689, GH#2627 + ser = Series(pd.date_range('1/1/2000', periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + ser.map(func) + ser.apply(func) + def test_apply_box(self): # ufunc will not be boxed. Same test cases as the test_map_box vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py new file mode 100644 index 0000000000000..ca558dd6b7cd5 --- /dev/null +++ b/pandas/tests/series/test_arithmetic.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +from datetime import timedelta +import operator + +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + + +class TestSeriesComparison(object): + def test_compare_invalid(self): + # GH#8058 + # ops testing + a = pd.Series(np.random.randn(5), name=0) + b = pd.Series(np.random.randn(5)) + b.name = pd.Timestamp('2000-01-01') + tm.assert_series_equal(a / b, 1 / (b / a)) + + +class TestTimestampSeriesComparison(object): + def test_timestamp_compare_series(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH#4982 + ser = pd.Series(pd.date_range('20010101', periods=10), name='dates') + s_nat = ser.copy(deep=True) + + ser[0] = pd.Timestamp('nat') + ser[3] = pd.Timestamp('nat') + + ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(ser, pd.Timestamp('20010109')) + result = right_f(pd.Timestamp('20010109'), ser) + tm.assert_series_equal(result, expected) + + # nats + expected = left_f(ser, pd.Timestamp('nat')) + result = right_f(pd.Timestamp('nat'), ser) + tm.assert_series_equal(result, expected) + + # compare to timestamp with series containing nats + expected = left_f(s_nat, pd.Timestamp('20010109')) + result = right_f(pd.Timestamp('20010109'), s_nat) + tm.assert_series_equal(result, expected) + + # compare to nat with series containing nats + expected = left_f(s_nat, pd.Timestamp('nat')) + result = right_f(pd.Timestamp('nat'), s_nat) + tm.assert_series_equal(result, expected) + + def test_timestamp_equality(self): + # GH#11034 + ser = pd.Series([pd.Timestamp('2000-01-29 01:59:00'), 'NaT']) + result = ser != ser + tm.assert_series_equal(result, pd.Series([False, True])) + result = ser != ser[0] + tm.assert_series_equal(result, pd.Series([False, True])) + result = ser != ser[1] + tm.assert_series_equal(result, pd.Series([True, True])) + + result = ser == ser + tm.assert_series_equal(result, pd.Series([True, False])) + result = ser == ser[0] + tm.assert_series_equal(result, pd.Series([True, False])) + result = ser == ser[1] + tm.assert_series_equal(result, pd.Series([False, False])) + + +class TestTimedeltaSeriesComparisons(object): + def test_compare_timedelta_series(self): + # regresssion test for GH5963 + s = pd.Series([timedelta(days=1), timedelta(days=2)]) + actual = s > timedelta(days=1) + expected = pd.Series([False, True]) + tm.assert_series_equal(actual, expected) + + +class TestPeriodSeriesArithmetic(object): + def test_ops_series_timedelta(self): + # GH 13043 + ser = pd.Series([pd.Period('2015-01-01', freq='D'), + pd.Period('2015-01-02', freq='D')], name='xxx') + assert ser.dtype == object + + expected = pd.Series([pd.Period('2015-01-02', freq='D'), + pd.Period('2015-01-03', freq='D')], name='xxx') + + result = ser + pd.Timedelta('1 days') + tm.assert_series_equal(result, expected) + + result = pd.Timedelta('1 days') + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.tseries.offsets.Day() + tm.assert_series_equal(result, expected) + + result = pd.tseries.offsets.Day() + ser + tm.assert_series_equal(result, expected) + + def test_ops_series_period(self): + # GH 13043 + ser = pd.Series([pd.Period('2015-01-01', freq='D'), + pd.Period('2015-01-02', freq='D')], name='xxx') + assert ser.dtype == object + + per = pd.Period('2015-01-10', freq='D') + # dtype will be object because of original dtype + expected = pd.Series([9, 8], name='xxx', dtype=object) + tm.assert_series_equal(per - ser, expected) + tm.assert_series_equal(ser - per, -expected) + + s2 = pd.Series([pd.Period('2015-01-05', freq='D'), + pd.Period('2015-01-04', freq='D')], name='xxx') + assert s2.dtype == object + + expected = pd.Series([4, 2], name='xxx', dtype=object) + tm.assert_series_equal(s2 - ser, expected) + tm.assert_series_equal(ser - s2, -expected) + + +class TestTimestampSeriesArithmetic(object): + def test_timestamp_sub_series(self): + ser = pd.Series(pd.date_range('2014-03-17', periods=2, freq='D', + tz='US/Eastern')) + ts = ser[0] + + delta_series = pd.Series([np.timedelta64(0, 'D'), + np.timedelta64(1, 'D')]) + tm.assert_series_equal(ser - ts, delta_series) + tm.assert_series_equal(ts - ser, -delta_series) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5de5f1f0584f4..33737387edffa 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -552,10 +552,6 @@ def test_constructor_dtype_datetime64(self): s.iloc[0] = np.nan assert s.dtype == 'M8[ns]' - # invalid astypes - for t in ['s', 'D', 'us', 'ms']: - pytest.raises(TypeError, s.astype, 'M8[%s]' % t) - # GH3414 related pytest.raises(TypeError, lambda x: Series( Series(dates).astype('int') / 1000000, dtype='M8[ms]')) @@ -707,6 +703,28 @@ def test_constructor_with_datetime_tz(self): expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected) + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units + # gh-19223 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([1, 2, 3], dtype=arr_dtype) + s = Series(arr) + result = s.astype(dtype) + expected = Series(arr.astype(dtype)) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('arg', + ['2013-01-01 00:00:00', pd.NaT, np.nan, None]) + def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg): + # GH 17415: With naive string + result = Series([arg], dtype='datetime64[ns, CET]') + expected = Series(pd.Timestamp(arg)).dt.tz_localize('CET') + assert_series_equal(result, expected) + def test_construction_interval(self): # construction from interval & array of intervals index = IntervalIndex.from_breaks(np.arange(3), closed='right') diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 441e811706487..56ff092dd0a27 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -3,7 +3,7 @@ import pytest -from datetime import datetime +from datetime import datetime, timedelta import sys import string @@ -29,6 +29,18 @@ class TestSeriesDtypes(TestData): + def test_dt64_series_astype_object(self): + dt64ser = Series(date_range('20130101', periods=3)) + result = dt64ser.astype(object) + assert isinstance(result.iloc[0], datetime) + assert result.dtype == np.object_ + + def test_td64_series_astype_object(self): + tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + result = tdser.astype(object) + assert isinstance(result.iloc[0], timedelta) + assert result.dtype == np.object_ + @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) def test_astype(self, dtype): diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 0503a7b30e91c..fbfbad547ce1b 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -450,6 +450,13 @@ def test_getitem_setitem_datetimeindex(self): lb = "1990-01-01 04:00:00" rb = "1990-01-01 07:00:00" + # GH#18435 strings get a pass from tzawareness compat + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + lb = "1990-01-01 04:00:00-0500" + rb = "1990-01-01 07:00:00-0500" result = ts[(ts.index >= lb) & (ts.index <= rb)] expected = ts[4:8] assert_series_equal(result, expected) @@ -475,6 +482,13 @@ def test_getitem_setitem_datetimeindex(self): lb = datetime(1990, 1, 1, 4) rb = datetime(1990, 1, 1, 7) + with pytest.raises(TypeError): + # tznaive vs tzaware comparison is invalid + # see GH#18376, GH#18162 + ts[(ts.index >= lb) & (ts.index <= rb)] + + lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo) + rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo) result = ts[(ts.index >= lb) & (ts.index <= rb)] expected = ts[4:8] assert_series_equal(result, expected) @@ -596,6 +610,18 @@ def test_getitem_box_float64(self): value = self.ts[5] assert isinstance(value, np.float64) + def test_series_box_timestamp(self): + rng = pd.date_range('20090415', '20090519', freq='B') + ser = Series(rng) + + assert isinstance(ser[5], pd.Timestamp) + + rng = pd.date_range('20090415', '20090519', freq='B') + ser = Series(rng, index=rng) + assert isinstance(ser[5], pd.Timestamp) + + assert isinstance(ser.iat[5], pd.Timestamp) + def test_getitem_ambiguous_keyerror(self): s = Series(lrange(10), index=lrange(0, 20, 2)) pytest.raises(KeyError, s.__getitem__, 1) @@ -1824,8 +1850,8 @@ def test_drop(self): # single string/tuple-like s = Series(range(3), index=list('abc')) - pytest.raises(ValueError, s.drop, 'bc') - pytest.raises(ValueError, s.drop, ('a', )) + pytest.raises(KeyError, s.drop, 'bc') + pytest.raises(KeyError, s.drop, ('a', )) # errors='ignore' s = Series(range(3), index=list('abc')) @@ -1847,7 +1873,7 @@ def test_drop(self): # GH 16877 s = Series([2, 3], index=[0, 1]) - with tm.assert_raises_regex(ValueError, 'not contained in axis'): + with tm.assert_raises_regex(KeyError, 'not contained in axis'): s.drop([False, True]) def test_align(self): diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index ad51261a47c5c..62d1372525cc8 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -138,6 +138,30 @@ def test_to_csv_path_is_none(self): csv_str = s.to_csv(path=None) assert isinstance(csv_str, str) + def test_to_csv_compression(self, compression_no_zip): + + s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], + name='X') + + with ensure_clean() as filename: + + s.to_csv(filename, compression=compression_no_zip, header=True) + + # test the round trip - to_csv -> read_csv + rs = pd.read_csv(filename, compression=compression_no_zip, + index_col=0, squeeze=True) + assert_series_equal(s, rs) + + # explicitly ensure file was compressed + with tm.decompress_file(filename, compression_no_zip) as fh: + text = fh.read().decode('utf8') + assert s.name in text + + with tm.decompress_file(filename, compression_no_zip) as fh: + assert_series_equal(s, pd.read_csv(fh, + index_col=0, + squeeze=True)) + class TestSeriesIO(TestData): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 2350477c2302a..7505e6b0cec3b 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -28,6 +28,11 @@ from .common import TestData +@pytest.fixture +def tdser(): + return Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + + class TestSeriesComparisons(object): def test_series_comparison_scalars(self): series = Series(date_range('1/1/2000', periods=10)) @@ -178,18 +183,18 @@ def test_comparison_tuples(self): assert_series_equal(result, expected) def test_comparison_operators_with_nas(self): - s = Series(bdate_range('1/1/2000', periods=10), dtype=object) - s[::2] = np.nan + ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) + ser[::2] = np.nan # test that comparisons work ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: - val = s[5] + val = ser[5] f = getattr(operator, op) - result = f(s, val) + result = f(ser, val) - expected = f(s.dropna(), val).reindex(s.index) + expected = f(ser.dropna(), val).reindex(ser.index) if op == 'ne': expected = expected.fillna(True).astype(bool) @@ -206,28 +211,28 @@ def test_comparison_operators_with_nas(self): # boolean &, |, ^ should work with object arrays and propagate NAs ops = ['and_', 'or_', 'xor'] - mask = s.isna() + mask = ser.isna() for bool_op in ops: - f = getattr(operator, bool_op) + func = getattr(operator, bool_op) - filled = s.fillna(s[0]) + filled = ser.fillna(ser[0]) - result = f(s < s[9], s > s[3]) + result = func(ser < ser[9], ser > ser[3]) - expected = f(filled < filled[9], filled > filled[3]) + expected = func(filled < filled[9], filled > filled[3]) expected[mask] = False assert_series_equal(result, expected) def test_comparison_object_numeric_nas(self): - s = Series(np.random.randn(10), dtype=object) - shifted = s.shift(2) + ser = Series(np.random.randn(10), dtype=object) + shifted = ser.shift(2) ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: - f = getattr(operator, op) + func = getattr(operator, op) - result = f(s, shifted) - expected = f(s.astype(float), shifted.astype(float)) + result = func(ser, shifted) + expected = func(ser.astype(float), shifted.astype(float)) assert_series_equal(result, expected) def test_comparison_invalid(self): @@ -272,98 +277,94 @@ def f(): tm.assert_series_equal(cat == "d", Series([False, False, False])) tm.assert_series_equal(cat != "d", Series([True, True, True])) - def test_more_na_comparisons(self): - for dtype in [None, object]: - left = Series(['a', np.nan, 'c'], dtype=dtype) - right = Series(['a', np.nan, 'd'], dtype=dtype) - - result = left == right - expected = Series([True, False, False]) - assert_series_equal(result, expected) - - result = left != right - expected = Series([False, True, True]) - assert_series_equal(result, expected) - - result = left == np.nan - expected = Series([False, False, False]) - assert_series_equal(result, expected) - - result = left != np.nan - expected = Series([True, True, True]) - assert_series_equal(result, expected) - - def test_nat_comparisons(self): - data = [([pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')], - [pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]), - - ([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')], - [pd.NaT, pd.NaT, pd.Timedelta('3 days')]), - - ([pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')], - [pd.NaT, pd.NaT, pd.Period('2011-03', freq='M')])] - - # add lhs / rhs switched data - data = data + [(r, l) for l, r in data] - - for l, r in data: - for dtype in [None, object]: - left = Series(l, dtype=dtype) - - # Series, Index - for right in [Series(r, dtype=dtype), Index(r, dtype=dtype)]: - expected = Series([False, False, True]) - assert_series_equal(left == right, expected) - - expected = Series([True, True, False]) - assert_series_equal(left != right, expected) - - expected = Series([False, False, False]) - assert_series_equal(left < right, expected) - - expected = Series([False, False, False]) - assert_series_equal(left > right, expected) - - expected = Series([False, False, True]) - assert_series_equal(left >= right, expected) + @pytest.mark.parametrize('dtype', [None, object]) + def test_more_na_comparisons(self, dtype): + left = Series(['a', np.nan, 'c'], dtype=dtype) + right = Series(['a', np.nan, 'd'], dtype=dtype) - expected = Series([False, False, True]) - assert_series_equal(left <= right, expected) - - def test_nat_comparisons_scalar(self): - data = [[pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')], - - [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')], - - [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')]] - - for l in data: - for dtype in [None, object]: - left = Series(l, dtype=dtype) + result = left == right + expected = Series([True, False, False]) + assert_series_equal(result, expected) - expected = Series([False, False, False]) - assert_series_equal(left == pd.NaT, expected) - assert_series_equal(pd.NaT == left, expected) + result = left != right + expected = Series([False, True, True]) + assert_series_equal(result, expected) - expected = Series([True, True, True]) - assert_series_equal(left != pd.NaT, expected) - assert_series_equal(pd.NaT != left, expected) + result = left == np.nan + expected = Series([False, False, False]) + assert_series_equal(result, expected) - expected = Series([False, False, False]) - assert_series_equal(left < pd.NaT, expected) - assert_series_equal(pd.NaT > left, expected) - assert_series_equal(left <= pd.NaT, expected) - assert_series_equal(pd.NaT >= left, expected) + result = left != np.nan + expected = Series([True, True, True]) + assert_series_equal(result, expected) - assert_series_equal(left > pd.NaT, expected) - assert_series_equal(pd.NaT < left, expected) - assert_series_equal(left >= pd.NaT, expected) - assert_series_equal(pd.NaT <= left, expected) + @pytest.mark.parametrize('pair', [ + ([pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')], + [NaT, NaT, pd.Timestamp('2011-01-03')]), + + ([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], + [NaT, NaT, pd.Timedelta('3 days')]), + + ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], + [NaT, NaT, pd.Period('2011-03', freq='M')])]) + @pytest.mark.parametrize('reverse', [True, False]) + @pytest.mark.parametrize('box', [Series, Index]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_nat_comparisons(self, dtype, box, reverse, pair): + l, r = pair + if reverse: + # add lhs / rhs switched data + l, r = r, l + + left = Series(l, dtype=dtype) + right = box(r, dtype=dtype) + # Series, Index + + expected = Series([False, False, True]) + assert_series_equal(left == right, expected) + + expected = Series([True, True, False]) + assert_series_equal(left != right, expected) + + expected = Series([False, False, False]) + assert_series_equal(left < right, expected) + + expected = Series([False, False, False]) + assert_series_equal(left > right, expected) + + expected = Series([False, False, True]) + assert_series_equal(left >= right, expected) + + expected = Series([False, False, True]) + assert_series_equal(left <= right, expected) + + @pytest.mark.parametrize('data', [ + [pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')], + [pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], + [pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')] + ]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_nat_comparisons_scalar(self, dtype, data): + left = Series(data, dtype=dtype) + + expected = Series([False, False, False]) + assert_series_equal(left == pd.NaT, expected) + assert_series_equal(pd.NaT == left, expected) + + expected = Series([True, True, True]) + assert_series_equal(left != pd.NaT, expected) + assert_series_equal(pd.NaT != left, expected) + + expected = Series([False, False, False]) + assert_series_equal(left < pd.NaT, expected) + assert_series_equal(pd.NaT > left, expected) + assert_series_equal(left <= pd.NaT, expected) + assert_series_equal(pd.NaT >= left, expected) + + assert_series_equal(left > pd.NaT, expected) + assert_series_equal(pd.NaT < left, expected) + assert_series_equal(left >= pd.NaT, expected) + assert_series_equal(pd.NaT <= left, expected) def test_comparison_different_length(self): a = Series(['a', 'b', 'c']) @@ -554,27 +555,27 @@ def test_comp_ops_df_compat(self): s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') - for l, r in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: + for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: msg = "Can only compare identically-labeled Series objects" with tm.assert_raises_regex(ValueError, msg): - l == r + left == right with tm.assert_raises_regex(ValueError, msg): - l != r + left != right with tm.assert_raises_regex(ValueError, msg): - l < r + left < right msg = "Can only compare identically-labeled DataFrame objects" with tm.assert_raises_regex(ValueError, msg): - l.to_frame() == r.to_frame() + left.to_frame() == right.to_frame() with tm.assert_raises_regex(ValueError, msg): - l.to_frame() != r.to_frame() + left.to_frame() != right.to_frame() with tm.assert_raises_regex(ValueError, msg): - l.to_frame() < r.to_frame() + left.to_frame() < right.to_frame() class TestSeriesArithmetic(object): @@ -667,83 +668,197 @@ def test_div(self): assert_series_equal(result, expected) -class TestTimedeltaSeriesArithmetic(object): - def test_timedelta_series_ops(self): - # GH11925 - s = Series(timedelta_range('1 day', periods=3)) - ts = Timestamp('2012-01-01') - expected = Series(date_range('2012-01-02', periods=3)) - assert_series_equal(ts + s, expected) - assert_series_equal(s + ts, expected) +class TestTimedeltaSeriesArithmeticWithIntegers(object): + # Tests for Series with dtype 'timedelta64[ns]' arithmetic operations + # with integer and int-like others - expected2 = Series(date_range('2011-12-31', periods=3, freq='-1D')) - assert_series_equal(ts - s, expected2) - assert_series_equal(ts + (-s), expected2) + # ------------------------------------------------------------------ + # Addition and Subtraction + + def test_td64series_add_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + tdser + Series([2, 3, 4]) + + @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') + def test_td64series_radd_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + Series([2, 3, 4]) + tdser + + def test_td64series_sub_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + tdser - Series([2, 3, 4]) + + @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') + def test_td64series_rsub_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + Series([2, 3, 4]) - tdser - def test_timedelta64_operations_with_integers(self): + def test_td64_series_add_intlike(self): + # GH#19123 + tdi = pd.TimedeltaIndex(['59 days', '59 days', 'NaT']) + ser = Series(tdi) + + other = Series([20, 30, 40], dtype='uint8') + + pytest.raises(TypeError, ser.__add__, 1) + pytest.raises(TypeError, ser.__sub__, 1) + + pytest.raises(TypeError, ser.__add__, other) + pytest.raises(TypeError, ser.__sub__, other) + + pytest.raises(TypeError, ser.__add__, other.values) + pytest.raises(TypeError, ser.__sub__, other.values) + + pytest.raises(TypeError, ser.__add__, pd.Index(other)) + pytest.raises(TypeError, ser.__sub__, pd.Index(other)) + + @pytest.mark.parametrize('scalar', [1, 1.5, np.array(2)]) + def test_td64series_add_sub_numeric_scalar_invalid(self, scalar, tdser): + with pytest.raises(TypeError): + tdser + scalar + with pytest.raises(TypeError): + scalar + tdser + with pytest.raises(TypeError): + tdser - scalar + with pytest.raises(TypeError): + scalar - tdser + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [ + np.array([1, 2, 3]), + pd.Index([1, 2, 3]), + pytest.param(Series([1, 2, 3]), + marks=pytest.mark.xfail(reason='GH#19123 integer ' + 'interpreted as nanos')) + ]) + def test_td64series_add_sub_numeric_array_invalid(self, vector, + dtype, tdser): + vector = vector.astype(dtype) + with pytest.raises(TypeError): + tdser + vector + with pytest.raises(TypeError): + vector + tdser + with pytest.raises(TypeError): + tdser - vector + with pytest.raises(TypeError): + vector - tdser + + # ------------------------------------------------------------------ + # Multiplicaton and Division + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])]) + def test_td64series_div_numeric_array(self, vector, dtype, tdser): # GH 4521 # divide/multiply by integers - startdate = Series(date_range('2013-01-01', '2013-01-03')) - enddate = Series(date_range('2013-03-01', '2013-03-03')) + vector = vector.astype(dtype) + expected = Series(['2.95D', '1D 23H 12m', 'NaT'], + dtype='timedelta64[ns]') - s1 = enddate - startdate - s1[2] = np.nan - s2 = Series([2, 3, 4]) - expected = Series(s1.values.astype(np.int64) / s2, dtype='m8[ns]') - expected[2] = np.nan - result = s1 / s2 + result = tdser / vector assert_series_equal(result, expected) - s2 = Series([20, 30, 40]) - expected = Series(s1.values.astype(np.int64) / s2, dtype='m8[ns]') - expected[2] = np.nan - result = s1 / s2 + with pytest.raises(TypeError): + vector / tdser + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])]) + def test_td64series_mul_numeric_array(self, vector, dtype, tdser): + # GH 4521 + # divide/multiply by integers + vector = vector.astype(dtype) + + expected = Series(['1180 Days', '1770 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser * vector assert_series_equal(result, expected) - result = s1 / 2 - expected = Series(s1.values.astype(np.int64) / 2, dtype='m8[ns]') - expected[2] = np.nan + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [ + np.array([20, 30, 40]), + pytest.param(pd.Index([20, 30, 40]), + marks=pytest.mark.xfail(reason='__mul__ raises ' + 'instead of returning ' + 'NotImplemented')), + Series([20, 30, 40]) + ]) + def test_td64series_rmul_numeric_array(self, vector, dtype, tdser): + # GH 4521 + # divide/multiply by integers + vector = vector.astype(dtype) + + expected = Series(['1180 Days', '1770 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = vector * tdser assert_series_equal(result, expected) - s2 = Series([20, 30, 40]) - expected = Series(s1.values.astype(np.int64) * s2, dtype='m8[ns]') - expected[2] = np.nan - result = s1 * s2 + @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)]) + def test_td64series_mul_numeric_scalar(self, one, tdser): + # GH 4521 + # divide/multiply by integers + expected = Series(['-59 Days', '-59 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser * (-one) + assert_series_equal(result, expected) + result = (-one) * tdser assert_series_equal(result, expected) - for dtype in ['int32', 'int16', 'uint32', 'uint64', 'uint32', 'uint16', - 'uint8']: - s2 = Series([20, 30, 40], dtype=dtype) - expected = Series( - s1.values.astype(np.int64) * s2.astype(np.int64), - dtype='m8[ns]') - expected[2] = np.nan - result = s1 * s2 - assert_series_equal(result, expected) + expected = Series(['118 Days', '118 Days', 'NaT'], + dtype='timedelta64[ns]') - result = s1 * 2 - expected = Series(s1.values.astype(np.int64) * 2, dtype='m8[ns]') - expected[2] = np.nan + result = tdser * (2 * one) + assert_series_equal(result, expected) + result = (2 * one) * tdser assert_series_equal(result, expected) - result = s1 * -1 - expected = Series(s1.values.astype(np.int64) * -1, dtype='m8[ns]') - expected[2] = np.nan + @pytest.mark.parametrize('two', [ + 2, 2.0, + pytest.param(np.array(2), + marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' + 'incorrectly True.')), + pytest.param(np.array(2.0), + marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' + 'incorrectly True.')), + ]) + def test_td64series_div_numeric_scalar(self, two, tdser): + # GH 4521 + # divide/multiply by integers + expected = Series(['29.5D', '29.5D', 'NaT'], dtype='timedelta64[ns]') + + result = tdser / two assert_series_equal(result, expected) - # invalid ops - assert_series_equal(s1 / s2.astype(float), - Series([Timedelta('2 days 22:48:00'), Timedelta( - '1 days 23:12:00'), Timedelta('NaT')])) - assert_series_equal(s1 / 2.0, - Series([Timedelta('29 days 12:00:00'), Timedelta( - '29 days 12:00:00'), Timedelta('NaT')])) - - for op in ['__add__', '__sub__']: - sop = getattr(s1, op, None) - if sop is not None: - pytest.raises(TypeError, sop, 1) - pytest.raises(TypeError, sop, s2.values) + +class TestTimedeltaSeriesArithmetic(object): + def test_td64series_add_sub_timestamp(self): + # GH11925 + tdser = Series(timedelta_range('1 day', periods=3)) + ts = Timestamp('2012-01-01') + expected = Series(date_range('2012-01-02', periods=3)) + assert_series_equal(ts + tdser, expected) + assert_series_equal(tdser + ts, expected) + + expected2 = Series(date_range('2011-12-31', periods=3, freq='-1D')) + assert_series_equal(ts - tdser, expected2) + assert_series_equal(ts + (-tdser), expected2) + + with pytest.raises(TypeError): + tdser - ts def test_timedelta64_operations_with_DateOffset(self): # GH 10699 @@ -993,7 +1108,7 @@ def test_operators_timedelta64_with_timedelta_invalid(self, scalar_td): # check that we are getting a TypeError # with 'operate' (from core/ops.py) for the ops that are not # defined - pattern = 'operate|unsupported|cannot' + pattern = 'operate|unsupported|cannot|not supported' with tm.assert_raises_regex(TypeError, pattern): td1 * scalar_td with tm.assert_raises_regex(TypeError, pattern): @@ -1005,9 +1120,7 @@ def test_operators_timedelta64_with_timedelta_invalid(self, scalar_td): @pytest.mark.parametrize('scalar_td', [ timedelta(minutes=5, seconds=4), - pytest.param(Timedelta('5m4s'), - marks=pytest.mark.xfail(reason="Timedelta.__floordiv__ " - "bug GH#18846")), + Timedelta('5m4s'), Timedelta('5m4s').to_timedelta64()]) def test_timedelta_rfloordiv(self, scalar_td): # GH#18831 @@ -1045,6 +1158,74 @@ def test_timedelta_floordiv(self, scalar_td): expected = Series([0, 0, np.nan]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_td64_series_with_tdi(self, names): + # GH#17250 make sure result dtype is correct + # GH#19043 make sure names are propogated correctly + tdi = pd.TimedeltaIndex(['0 days', '1 day'], name=names[0]) + ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) + expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], + name=names[2]) + + result = tdi + ser + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + result = ser + tdi + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)], + name=names[2]) + + result = tdi - ser + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + result = ser - tdi + tm.assert_series_equal(result, -expected) + assert result.dtype == 'timedelta64[ns]' + + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_tdi_mul_int_series(self, names): + # GH#19042 + tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], + name=names[0]) + ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) + + expected = Series(['0days', '1day', '4days', '9days', '16days'], + dtype='timedelta64[ns]', + name=names[2]) + + result = ser * tdi + tm.assert_series_equal(result, expected) + + # The direct operation tdi * ser still needs to be fixed. + result = ser.__rmul__(tdi) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_float_series_rdiv_tdi(self, names): + # GH#19042 + # TODO: the direct operation TimedeltaIndex / Series still + # needs to be fixed. + tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], + name=names[0]) + ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) + + expected = Series([tdi[n] / ser[n] for n in range(len(ser))], + dtype='timedelta64[ns]', + name=names[2]) + + result = ser.__rdiv__(tdi) + tm.assert_series_equal(result, expected) + class TestDatetimeSeriesArithmetic(object): @pytest.mark.parametrize( @@ -1065,16 +1246,6 @@ def test_sub_datetime64_not_ns(self, box, assert_func): res = dt64 - obj assert_func(res, -expected) - @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') - def test_frame_sub_datetime64_not_ns(self): - df = pd.DataFrame(date_range('20130101', periods=3)) - dt64 = np.datetime64('2013-01-01') - assert dt64.dtype == 'datetime64[D]' - res = df - dt64 - expected = pd.DataFrame([Timedelta(days=0), Timedelta(days=1), - Timedelta(days=2)]) - tm.assert_frame_equal(res, expected) - def test_operators_datetimelike(self): def run_ops(ops, get_ser, test_ser): @@ -1215,7 +1386,7 @@ def test_sub_datetime_compat(self): assert_series_equal(s - dt, exp) assert_series_equal(s - Timestamp(dt), exp) - def test_datetime_series_with_timedelta(self): + def test_dt64_series_with_timedelta(self): # scalar timedeltas/np.timedelta64 objects # operate with np.timedelta64 correctly s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) @@ -1234,25 +1405,51 @@ def test_datetime_series_with_timedelta(self): assert_series_equal(result, expected) assert_series_equal(result2, expected) - def test_datetime_series_with_DateOffset(self): + def test_dt64_series_add_tick_DateOffset(self): # GH 4532 # operate with pd.offsets - s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) - - result = s + pd.offsets.Second(5) - result2 = pd.offsets.Second(5) + s + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) expected = Series([Timestamp('20130101 9:01:05'), Timestamp('20130101 9:02:05')]) + + result = ser + pd.offsets.Second(5) assert_series_equal(result, expected) + + result2 = pd.offsets.Second(5) + ser assert_series_equal(result2, expected) - result = s - pd.offsets.Second(5) - result2 = -pd.offsets.Second(5) + s + def test_dt64_series_sub_tick_DateOffset(self): + # GH 4532 + # operate with pd.offsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) expected = Series([Timestamp('20130101 9:00:55'), Timestamp('20130101 9:01:55')]) + + result = ser - pd.offsets.Second(5) assert_series_equal(result, expected) + + result2 = -pd.offsets.Second(5) + ser assert_series_equal(result2, expected) + with pytest.raises(TypeError): + pd.offsets.Second(5) - ser + + @pytest.mark.parametrize('cls_name', ['Day', 'Hour', 'Minute', 'Second', + 'Milli', 'Micro', 'Nano']) + def test_dt64_series_with_tick_DateOffset_smoke(self, cls_name): + # GH 4532 + # smoke tests for valid DateOffsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + + offset_cls = getattr(pd.offsets, cls_name) + ser + offset_cls(5) + offset_cls(5) + ser + + def test_dt64_series_add_mixed_tick_DateOffset(self): + # GH 4532 + # operate with pd.offsets + s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + result = s + pd.offsets.Milli(5) result2 = pd.offsets.Milli(5) + s expected = Series([Timestamp('20130101 9:01:00.005'), @@ -1265,14 +1462,7 @@ def test_datetime_series_with_DateOffset(self): Timestamp('20130101 9:07:00.005')]) assert_series_equal(result, expected) - # valid DateOffsets - for do in ['Hour', 'Minute', 'Second', 'Day', 'Micro', 'Milli', - 'Nano']: - op = getattr(pd.offsets, do) - s + op(5) - op(5) + s - - def test_dt64_sub_NaT(self): + def test_dt64_series_sub_NaT(self): # GH#18808 dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) ser = pd.Series(dti) @@ -1313,23 +1503,25 @@ def test_datetime64_ops_nat(self): assert_series_equal(NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) + @pytest.mark.parametrize('dt64_series', [ + Series([Timestamp('19900315'), Timestamp('19900315')]), + Series([NaT, Timestamp('19900315')]), + Series([NaT, NaT], dtype='datetime64[ns]')]) + @pytest.mark.parametrize('one', [1, 1.0, np.array(1)]) + def test_dt64_mul_div_numeric_invalid(self, one, dt64_series): # multiplication with pytest.raises(TypeError): - datetime_series * 1 - with pytest.raises(TypeError): - nat_series_dtype_timestamp * 1 + dt64_series * one with pytest.raises(TypeError): - datetime_series * 1.0 - with pytest.raises(TypeError): - nat_series_dtype_timestamp * 1.0 + one * dt64_series # division with pytest.raises(TypeError): - nat_series_dtype_timestamp / 1.0 + dt64_series / one with pytest.raises(TypeError): - nat_series_dtype_timestamp / 1 + one / dt64_series - def test_dt64series_arith_overflow(self): + def test_dt64_series_arith_overflow(self): # GH#12534, fixed by #19024 dt = pd.Timestamp('1700-01-31') td = pd.Timedelta('20000 Days') @@ -1360,6 +1552,26 @@ def test_dt64series_arith_overflow(self): res = dt - ser tm.assert_series_equal(res, -expected) + @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo']) + def test_dt64_series_add_intlike(self, tz): + # GH#19123 + dti = pd.DatetimeIndex(['2016-01-02', '2016-02-03', 'NaT'], tz=tz) + ser = Series(dti) + + other = Series([20, 30, 40], dtype='uint8') + + pytest.raises(TypeError, ser.__add__, 1) + pytest.raises(TypeError, ser.__sub__, 1) + + pytest.raises(TypeError, ser.__add__, other) + pytest.raises(TypeError, ser.__sub__, other) + + pytest.raises(TypeError, ser.__add__, other.values) + pytest.raises(TypeError, ser.__sub__, other.values) + + pytest.raises(TypeError, ser.__add__, pd.Index(other)) + pytest.raises(TypeError, ser.__sub__, pd.Index(other)) + class TestSeriesOperators(TestData): def test_op_method(self): @@ -1481,41 +1693,26 @@ def test_invalid_ops(self): pytest.raises(Exception, self.objSeries.__sub__, np.array(1, dtype=np.int64)) - def test_timedelta64_conversions(self): + @pytest.mark.parametrize("m", [1, 3, 10]) + @pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns']) + def test_timedelta64_conversions(self, m, unit): + startdate = Series(date_range('2013-01-01', '2013-01-03')) enddate = Series(date_range('2013-03-01', '2013-03-03')) s1 = enddate - startdate s1[2] = np.nan - for m in [1, 3, 10]: - for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']: - - # op - expected = s1.apply(lambda x: x / np.timedelta64(m, unit)) - result = s1 / np.timedelta64(m, unit) - assert_series_equal(result, expected) - - if m == 1 and unit != 'ns': - - # astype - result = s1.astype("timedelta64[{0}]".format(unit)) - assert_series_equal(result, expected) - - # reverse op - expected = s1.apply( - lambda x: Timedelta(np.timedelta64(m, unit)) / x) - result = np.timedelta64(m, unit) / s1 - - # astype - s = Series(date_range('20130101', periods=3)) - result = s.astype(object) - assert isinstance(result.iloc[0], datetime) - assert result.dtype == np.object_ + # op + expected = s1.apply(lambda x: x / np.timedelta64(m, unit)) + result = s1 / np.timedelta64(m, unit) + assert_series_equal(result, expected) - result = s1.astype(object) - assert isinstance(result.iloc[0], timedelta) - assert result.dtype == np.object_ + # reverse op + expected = s1.apply( + lambda x: Timedelta(np.timedelta64(m, unit)) / x) + result = np.timedelta64(m, unit) / s1 + assert_series_equal(result, expected) @pytest.mark.parametrize('op', [operator.add, operator.sub]) def test_timedelta64_equal_timedelta_supported_ops(self, op): @@ -1542,13 +1739,7 @@ def timedelta64(*args): lhs = op(ser, nptd) rhs = op(ser, pytd) - try: - assert_series_equal(lhs, rhs) - except: - raise AssertionError( - "invalid comparison [op->{0},d->{1},h->{2},m->{3}," - "s->{4},us->{5}]\n{6}\n{7}\n".format(op, d, h, m, s, - us, lhs, rhs)) + assert_series_equal(lhs, rhs) def test_ops_nat_mixed_datetime64_timedelta64(self): # GH 11349 @@ -1922,72 +2113,56 @@ def test_series_frame_radd_bug(self): with pytest.raises(TypeError): self.ts + datetime.now() - def test_series_radd_more(self): - data = [[1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), - pd.NaT], - ['x', 'y', 1]] - - for d in data: - for dtype in [None, object]: - s = Series(d, dtype=dtype) - with pytest.raises(TypeError): - 'foo_' + s - - for dtype in [None, object]: - res = 1 + pd.Series([1, 2, 3], dtype=dtype) - exp = pd.Series([2, 3, 4], dtype=dtype) - assert_series_equal(res, exp) - res = pd.Series([1, 2, 3], dtype=dtype) + 1 - assert_series_equal(res, exp) - - res = np.nan + pd.Series([1, 2, 3], dtype=dtype) - exp = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) - assert_series_equal(res, exp) - res = pd.Series([1, 2, 3], dtype=dtype) + np.nan - assert_series_equal(res, exp) - - s = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), - pd.Timedelta('3 days')], dtype=dtype) - exp = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), - pd.Timedelta('6 days')]) - assert_series_equal(pd.Timedelta('3 days') + s, exp) - assert_series_equal(s + pd.Timedelta('3 days'), exp) - - s = pd.Series(['x', np.nan, 'x']) - assert_series_equal('a' + s, pd.Series(['ax', np.nan, 'ax'])) - assert_series_equal(s + 'a', pd.Series(['xa', np.nan, 'xa'])) - - def test_frame_radd_more(self): - data = [[1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), - pd.NaT], - ['x', 'y', 1]] - - for d in data: - for dtype in [None, object]: - s = DataFrame(d, dtype=dtype) - with pytest.raises(TypeError): - 'foo_' + s - - for dtype in [None, object]: - res = 1 + pd.DataFrame([1, 2, 3], dtype=dtype) - exp = pd.DataFrame([2, 3, 4], dtype=dtype) - assert_frame_equal(res, exp) - res = pd.DataFrame([1, 2, 3], dtype=dtype) + 1 - assert_frame_equal(res, exp) - - res = np.nan + pd.DataFrame([1, 2, 3], dtype=dtype) - exp = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) - assert_frame_equal(res, exp) - res = pd.DataFrame([1, 2, 3], dtype=dtype) + np.nan - assert_frame_equal(res, exp) - - df = pd.DataFrame(['x', np.nan, 'x']) - assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) - assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) + def test_series_radd_str(self): + ser = pd.Series(['x', np.nan, 'x']) + assert_series_equal('a' + ser, pd.Series(['ax', np.nan, 'ax'])) + assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa'])) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_timedelta(self, dtype): + ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days')], dtype=dtype) + expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), + pd.Timedelta('6 days')]) + + result = pd.Timedelta('3 days') + ser + assert_series_equal(result, expected) + + result = ser + pd.Timedelta('3 days') + assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_int(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([2, 3, 4], dtype=dtype) + + result = 1 + ser + assert_series_equal(result, expected) + + result = ser + 1 + assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_nan(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) + + result = np.nan + ser + assert_series_equal(result, expected) + + result = ser + np.nan + assert_series_equal(result, expected) + + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_radd_str_invalid(self, dtype, data): + ser = Series(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + ser def test_operators_frame(self): # rpow does not work with DataFrame @@ -2082,24 +2257,23 @@ def test_operators_na_handling(self): assert_series_equal(result, expected) def test_datetime64_with_index(self): - # arithmetic integer ops with an index - s = Series(np.random.randn(5)) - expected = s - s.index.to_series() - result = s - s.index + ser = Series(np.random.randn(5)) + expected = ser - ser.index.to_series() + result = ser - ser.index assert_series_equal(result, expected) # GH 4629 # arithmetic datetime64 ops with an index - s = Series(date_range('20130101', periods=5), - index=date_range('20130101', periods=5)) - expected = s - s.index.to_series() - result = s - s.index + ser = Series(date_range('20130101', periods=5), + index=date_range('20130101', periods=5)) + expected = ser - ser.index.to_series() + result = ser - ser.index assert_series_equal(result, expected) with pytest.raises(TypeError): # GH#18850 - result = s - s.index.to_period() + result = ser - ser.index.to_period() df = DataFrame(np.random.randn(5, 2), index=date_range('20130101', periods=5)) diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 37c8d7343f7f1..60afaa3b821e1 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -13,24 +13,31 @@ def test_indexing_sliced(self): res = s.loc[['a', 'b']] exp = tm.SubclassedSeries([1, 2], index=list('ab')) tm.assert_series_equal(res, exp) - assert isinstance(res, tm.SubclassedSeries) res = s.iloc[[2, 3]] exp = tm.SubclassedSeries([3, 4], index=list('cd')) tm.assert_series_equal(res, exp) - assert isinstance(res, tm.SubclassedSeries) res = s.loc[['a', 'b']] exp = tm.SubclassedSeries([1, 2], index=list('ab')) tm.assert_series_equal(res, exp) - assert isinstance(res, tm.SubclassedSeries) def test_to_frame(self): s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'), name='xxx') res = s.to_frame() exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd')) tm.assert_frame_equal(res, exp) - assert isinstance(res, tm.SubclassedDataFrame) + + def test_subclass_unstack(self): + # GH 15564 + s = tm.SubclassedSeries( + [1, 2, 3, 4], index=[list('aabb'), list('xyxy')]) + + res = s.unstack() + exp = tm.SubclassedDataFrame( + {'x': [1, 3], 'y': [2, 4]}, index=['a', 'b']) + + tm.assert_frame_equal(res, exp) class TestSparseSeriesSubclassing(object): diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 6e711abf4491b..7be801629e387 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -11,6 +11,8 @@ import pandas.util._test_decorators as td from pandas._libs.tslib import iNaT from pandas.compat import lrange, StringIO, product +from pandas.errors import NullFrequencyError + from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.datetimes import DatetimeIndex from pandas.tseries.offsets import BDay, BMonthEnd @@ -123,7 +125,7 @@ def test_shift2(self): tm.assert_index_equal(result.index, exp_index) idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - pytest.raises(ValueError, idx.shift, 1) + pytest.raises(NullFrequencyError, idx.shift, 1) def test_shift_dst(self): # GH 13926 @@ -935,7 +937,7 @@ def test_from_M8_structured(self): assert isinstance(s[0], Timestamp) assert s[0] == dates[0][0] - with pytest.warns(FutureWarning): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): s = Series.from_array(arr['Date'], Index([0])) assert s[0] == dates[0][0] diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 058892e3b85ff..2b589ebd4735e 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -199,6 +199,29 @@ def test_constructor_from_series(self): # without sparse value raises error # df2 = SparseDataFrame([x2_sparse, y]) + def test_constructor_from_dense_series(self): + # GH 19393 + # series with name + x = Series(np.random.randn(10000), name='a') + result = SparseDataFrame(x) + expected = x.to_frame().to_sparse() + tm.assert_sp_frame_equal(result, expected) + + # series with no name + x = Series(np.random.randn(10000)) + result = SparseDataFrame(x) + expected = x.to_frame().to_sparse() + tm.assert_sp_frame_equal(result, expected) + + def test_constructor_from_unknown_type(self): + # GH 19393 + class Unknown: + pass + with pytest.raises(TypeError, + message='SparseDataFrame called with unkown type ' + '"Unknown" for data argument'): + SparseDataFrame(Unknown()) + def test_constructor_preserve_attr(self): # GH 13866 arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 6b3b519d49f7f..b1e3177547ac6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -17,7 +17,7 @@ from pandas._libs.hashtable import unique_label_indices from pandas.compat import lrange, range import pandas.core.algorithms as algos -from pandas.core.common import _asarray_tuplesafe +import pandas.core.common as com import pandas.util.testing as tm import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import CategoricalDtype as CDT @@ -217,7 +217,8 @@ def test_factorize_tuple_list(self, data, expected_label, expected_level): tm.assert_numpy_array_equal(result[0], np.array(expected_label, dtype=np.intp)) - expected_level_array = _asarray_tuplesafe(expected_level, dtype=object) + expected_level_array = com._asarray_tuplesafe(expected_level, + dtype=object) tm.assert_numpy_array_equal(result[1], expected_level_array) def test_complex_sorting(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index cb905d8186ea9..df2547fc7b0da 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -114,7 +114,7 @@ def __init__(self, obj): def setup_method(self, method): pass - def test_invalida_delgation(self): + def test_invalid_delegation(self): # these show that in order for the delegation to work # the _delegate_* methods need to be overridden to not raise # a TypeError @@ -265,8 +265,8 @@ class TestIndexOps(Ops): def setup_method(self, method): super(TestIndexOps, self).setup_method(method) - self.is_valid_objs = [o for o in self.objs if o._allow_index_ops] - self.not_valid_objs = [o for o in self.objs if not o._allow_index_ops] + self.is_valid_objs = self.objs + self.not_valid_objs = [] def test_none_comparison(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 424ba6aab9a56..9582264a8c716 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1590,6 +1590,38 @@ def test_unstack_group_index_overflow(self): result = s.unstack(4) assert result.shape == (500, 2) + def test_pyint_engine(self): + # GH 18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [tuple(l) for l in [[0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N]] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype='int64') + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype='int64') + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) + def test_getitem_lowerdim_corner(self): pytest.raises(KeyError, self.frame.loc.__getitem__, (('bar', 'three'), 'B')) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 770560134d8d6..1955fc301be9b 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2302,7 +2302,7 @@ def check_drop(drop_val, axis_number, aliases, expected): expected = Panel({"One": df}) check_drop('Two', 0, ['items'], expected) - pytest.raises(ValueError, panel.drop, 'Three') + pytest.raises(KeyError, panel.drop, 'Three') # errors = 'ignore' dropped = panel.drop('Three', errors='ignore') diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py new file mode 100644 index 0000000000000..fe0cf4c9b38af --- /dev/null +++ b/pandas/tests/test_register_accessor.py @@ -0,0 +1,87 @@ +import contextlib + +import pytest + +import pandas as pd +import pandas.util.testing as tm + + +@contextlib.contextmanager +def ensure_removed(obj, attr): + """Ensure that an attribute added to 'obj' during the test is + removed when we're done""" + try: + yield + finally: + try: + delattr(obj, attr) + except AttributeError: + pass + + +class MyAccessor(object): + + def __init__(self, obj): + self.obj = obj + self.item = 'item' + + @property + def prop(self): + return self.item + + def method(self): + return self.item + + +@pytest.mark.parametrize('obj, registrar', [ + (pd.Series, pd.api.extensions.register_series_accessor), + (pd.DataFrame, pd.api.extensions.register_dataframe_accessor), + (pd.Index, pd.api.extensions.register_index_accessor) +]) +def test_series_register(obj, registrar): + with ensure_removed(obj, 'mine'): + before = set(dir(obj)) + registrar('mine')(MyAccessor) + assert obj([]).mine.prop == 'item' + after = set(dir(obj)) + assert (before ^ after) == {'mine'} + + +def test_accessor_works(): + with ensure_removed(pd.Series, 'mine'): + pd.api.extensions.register_series_accessor('mine')(MyAccessor) + + s = pd.Series([1, 2]) + assert s.mine.obj is s + + assert s.mine.prop == 'item' + assert s.mine.method() == 'item' + + +def test_overwrite_warns(): + # Need to restore mean + mean = pd.Series.mean + try: + with tm.assert_produces_warning(UserWarning) as w: + pd.api.extensions.register_series_accessor('mean')(MyAccessor) + s = pd.Series([1, 2]) + assert s.mean.prop == 'item' + msg = str(w[0].message) + assert 'mean' in msg + assert 'MyAccessor' in msg + assert 'Series' in msg + finally: + pd.Series.mean = mean + + +def test_raises_attribute_error(): + + with ensure_removed(pd.Series, 'bad'): + + @pd.api.extensions.register_series_accessor("bad") + class Bad(object): + def __init__(self, data): + raise AttributeError("whoops") + + with tm.assert_raises_regex(AttributeError, "whoops"): + pd.Series([]).bad diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index e9a517605020a..a5aaa328a8e06 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -20,9 +20,10 @@ from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict -from pandas.core.base import SpecificationError, AbstractMethodError +from pandas.core.base import SpecificationError from pandas.errors import UnsupportedFunctionCall from pandas.core.groupby import DataError +import pandas.core.common as com from pandas.tseries.frequencies import to_offset from pandas.core.indexes.datetimes import date_range @@ -726,7 +727,7 @@ def index(self, _index_start, _index_end, _index_freq): @pytest.fixture def _series_name(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) @pytest.fixture def _static_values(self, index): @@ -963,6 +964,7 @@ def test_resample_basic(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', name='index') s = Series(np.random.randn(14), index=rng) + result = s.resample('5min', closed='right', label='right').mean() exp_idx = date_range('1/1/2000', periods=4, freq='5min', name='index') @@ -985,6 +987,20 @@ def test_resample_basic(self): expect = s.groupby(grouper).agg(lambda x: x[-1]) assert_series_equal(result, expect) + def test_resample_string_kwargs(self): + # Test for issue #19303 + rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', + name='index') + s = Series(np.random.randn(14), index=rng) + + # Check that wrong keyword argument strings raise an error + with pytest.raises(ValueError): + s.resample('5min', label='righttt').mean() + with pytest.raises(ValueError): + s.resample('5min', closed='righttt').mean() + with pytest.raises(ValueError): + s.resample('5min', convention='starttt').mean() + def test_resample_how(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', name='index') diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 8aa69bcbfdf7f..973fe74429551 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1072,28 +1072,50 @@ def test_extractall_single_group_with_quantifier(self): e = DataFrame(['ab', 'abc', 'd', 'cd'], i) tm.assert_frame_equal(r, e) - def test_extractall_no_matches(self): - s = Series(['a3', 'b3', 'd4c2'], name='series_name') + @pytest.mark.parametrize('data, names', [ + ([], (None, )), + ([], ('i1', )), + ([], (None, 'i2')), + ([], ('i1', 'i2')), + (['a3', 'b3', 'd4c2'], (None, )), + (['a3', 'b3', 'd4c2'], ('i1', 'i2')), + (['a3', 'b3', 'd4c2'], (None, 'i2')), + (['a3', 'b3', 'd4c2'], ('i1', 'i2')), + ]) + def test_extractall_no_matches(self, data, names): + # GH19075 extractall with no matches should return a valid MultiIndex + n = len(data) + if len(names) == 1: + i = Index(range(n), name=names[0]) + else: + a = (tuple([i] * (n - 1)) for i in range(n)) + i = MultiIndex.from_tuples(a, names=names) + s = Series(data, name='series_name', index=i, dtype='object') + ei = MultiIndex.from_tuples([], names=(names + ('match',))) + # one un-named group. r = s.str.extractall('(z)') - e = DataFrame(columns=[0]) + e = DataFrame(columns=[0], index=ei) tm.assert_frame_equal(r, e) + # two un-named groups. r = s.str.extractall('(z)(z)') - e = DataFrame(columns=[0, 1]) + e = DataFrame(columns=[0, 1], index=ei) tm.assert_frame_equal(r, e) + # one named group. r = s.str.extractall('(?Pz)') - e = DataFrame(columns=["first"]) + e = DataFrame(columns=["first"], index=ei) tm.assert_frame_equal(r, e) + # two named groups. r = s.str.extractall('(?Pz)(?Pz)') - e = DataFrame(columns=["first", "second"]) + e = DataFrame(columns=["first", "second"], index=ei) tm.assert_frame_equal(r, e) + # one named, one un-named. r = s.str.extractall('(z)(?Pz)') - e = DataFrame(columns=[0, - "second"]) + e = DataFrame(columns=[0, "second"], index=ei) tm.assert_frame_equal(r, e) def test_extractall_stringindex(self): diff --git a/pandas/tests/tseries/offsets/test_liboffsets.py b/pandas/tests/tseries/offsets/test_liboffsets.py index 1e0ecc39084eb..a31a79d2f68ed 100644 --- a/pandas/tests/tseries/offsets/test_liboffsets.py +++ b/pandas/tests/tseries/offsets/test_liboffsets.py @@ -156,22 +156,6 @@ def test_roll_qtrday(): assert roll_qtrday(other, n, month, 'business_end', modby=3) == n -def test_roll_monthday(): - other = Timestamp('2017-12-29', tz='US/Pacific') - before = Timestamp('2017-12-01', tz='US/Pacific') - after = Timestamp('2017-12-31', tz='US/Pacific') - - n = 42 - assert liboffsets.roll_monthday(other, n, other) == n - assert liboffsets.roll_monthday(other, n, before) == n - assert liboffsets.roll_monthday(other, n, after) == n - 1 - - n = -4 - assert liboffsets.roll_monthday(other, n, other) == n - assert liboffsets.roll_monthday(other, n, before) == n + 1 - assert liboffsets.roll_monthday(other, n, after) == n - - def test_roll_convention(): other = 29 before = 1 diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 23e627aeba017..b086884ecd250 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -3087,6 +3087,13 @@ def test_get_offset_day_error(): DateOffset()._get_offset_day(datetime.now()) +def test_valid_default_arguments(offset_types): + # GH#19142 check that the calling the constructors without passing + # any keyword arguments produce valid offsets + cls = offset_types + cls() + + @pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds))) def test_valid_month_attributes(kwd, month_classes): # GH#18226 diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index b3813d03532fb..7ae63d7d080cc 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -61,6 +61,10 @@ def tzstr(self, tz): def localize(self, tz, x): return tz.localize(x) + def normalize(self, ts): + tzinfo = ts.tzinfo + return tzinfo.normalize(ts) + def cmptz(self, tz1, tz2): # Compare two timezones. Overridden in subclass to parameterize # tests. @@ -935,6 +939,27 @@ def test_datetimeindex_tz_nat(self): assert isna(idx[1]) assert idx[0].tzinfo is not None + def test_replace_across_dst(self): + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization + tz = self.tz('US/Eastern') + + ts_naive = Timestamp('2017-12-03 16:03:30') + ts_aware = self.localize(tz, ts_naive) + + # Preliminary sanity-check + assert ts_aware == self.normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = self.normalize(ts2) + assert ts2 == ts2b + class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): @@ -959,6 +984,10 @@ def cmptz(self, tz1, tz2): def localize(self, tz, x): return x.replace(tzinfo=tz) + def normalize(self, ts): + # no-op for dateutil + return ts + @td.skip_if_windows def test_utc_with_system_utc(self): from pandas._libs.tslibs.timezones import maybe_get_tz diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 289592939e3da..fe8d75539879e 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -290,18 +290,3 @@ def test_hash_collisions(self): result = hash_array(np.asarray(L, dtype=object), 'utf8') tm.assert_numpy_array_equal( result, np.concatenate([expected1, expected2], axis=0)) - - -def test_deprecation(): - - with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): - from pandas.tools.hashing import hash_pandas_object - obj = Series(list('abc')) - hash_pandas_object(obj, hash_key='9876543210123456') - - with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): - from pandas.tools.hashing import hash_array - obj = np.array([1, 2, 3]) - hash_array(obj, hash_key='9876543210123456') diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index 8da2b401fc848..3b0a428218771 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -8,7 +8,7 @@ import pytest from pandas.compat import intern -from pandas.core.common import _all_none +import pandas.core.common as com from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf from pandas.util._decorators import deprecate_kwarg, make_signature from pandas.util._validators import (validate_args, validate_kwargs, @@ -438,7 +438,7 @@ def test_set_locale(self): pytest.skip("Only a single locale found, no point in " "trying to test setting another locale") - if _all_none(*self.current_locale): + if com._all_none(*self.current_locale): # Not sure why, but on some travis runs with pytest, # getlocale() returned (None, None). pytest.skip("Current locale is not set.") diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py deleted file mode 100644 index ba38710b607af..0000000000000 --- a/pandas/tools/hashing.py +++ /dev/null @@ -1,18 +0,0 @@ -import warnings -import sys - -m = sys.modules['pandas.tools.hashing'] -for t in ['hash_pandas_object', 'hash_array']: - - def outer(t=t): - - def wrapper(*args, **kwargs): - from pandas import util - warnings.warn("pandas.tools.hashing is deprecated and will be " - "removed in a future version, import " - "from pandas.util", - DeprecationWarning, stacklevel=3) - return getattr(util, t)(*args, **kwargs) - return wrapper - - setattr(m, t, outer(t)) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 7c5fe2f0314e4..ec206e0997d0b 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -9,14 +9,14 @@ from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod from pandas.core.tools.datetimes import to_datetime -from pandas.core.common import AbstractMethodError +import pandas.core.common as com # import after tools, dateutil check from dateutil.easter import easter from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta from pandas.util._decorators import cache_readonly -from pandas._libs.tslibs import ccalendar +from pandas._libs.tslibs import ccalendar, frequencies as libfrequencies from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds import pandas._libs.tslibs.offsets as liboffsets from pandas._libs.tslibs.offsets import ( @@ -27,7 +27,6 @@ apply_index_wraps, roll_yearday, shift_month, - EndMixin, BaseOffset) @@ -1039,55 +1038,62 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', _CustomMixin.__init__(self, weekmask, holidays, calendar) @cache_readonly - def cbday(self): - kwds = self.kwds - return CustomBusinessDay(n=self.n, normalize=self.normalize, **kwds) + def cbday_roll(self): + """Define default roll function to be called in apply method""" + cbday = CustomBusinessDay(n=self.n, normalize=False, **self.kwds) + + if self._prefix.endswith('S'): + # MonthBegin + roll_func = cbday.rollforward + else: + # MonthEnd + roll_func = cbday.rollback + return roll_func @cache_readonly def m_offset(self): if self._prefix.endswith('S'): - # MonthBegin: - return MonthBegin(n=1, normalize=self.normalize) + # MonthBegin + moff = MonthBegin(n=1, normalize=False) else: # MonthEnd - return MonthEnd(n=1, normalize=self.normalize) + moff = MonthEnd(n=1, normalize=False) + return moff - -class CustomBusinessMonthEnd(_CustomBusinessMonth): - __doc__ = _CustomBusinessMonth.__doc__.replace('[BEGIN/END]', 'end') - _prefix = 'CBM' + @cache_readonly + def month_roll(self): + """Define default roll function to be called in apply method""" + if self._prefix.endswith('S'): + # MonthBegin + roll_func = self.m_offset.rollback + else: + # MonthEnd + roll_func = self.m_offset.rollforward + return roll_func @apply_wraps def apply(self, other): # First move to month offset - cur_mend = self.m_offset.rollforward(other) + cur_month_offset_date = self.month_roll(other) # Find this custom month offset - compare_date = self.cbday.rollback(cur_mend) - n = liboffsets.roll_monthday(other, self.n, compare_date) + compare_date = self.cbday_roll(cur_month_offset_date) + n = liboffsets.roll_convention(other.day, self.n, compare_date.day) - new = cur_mend + n * self.m_offset - result = self.cbday.rollback(new) + new = cur_month_offset_date + n * self.m_offset + result = self.cbday_roll(new) return result +class CustomBusinessMonthEnd(_CustomBusinessMonth): + __doc__ = _CustomBusinessMonth.__doc__.replace('[BEGIN/END]', 'end') + _prefix = 'CBM' + + class CustomBusinessMonthBegin(_CustomBusinessMonth): __doc__ = _CustomBusinessMonth.__doc__.replace('[BEGIN/END]', 'beginning') _prefix = 'CBMS' - @apply_wraps - def apply(self, other): - # First move to month offset - cur_mbegin = self.m_offset.rollback(other) - - # Find this custom month offset - compare_date = self.cbday.rollforward(cur_mbegin) - n = liboffsets.roll_monthday(other, self.n, compare_date) - - new = cur_mbegin + n * self.m_offset - result = self.cbday.rollforward(new) - return result - # --------------------------------------------------------------------- # Semi-Month Based Offset Classes @@ -1142,7 +1148,7 @@ def apply(self, other): def _apply(self, n, other): """Handle specific apply logic for child classes""" - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) @apply_index_wraps def apply_index(self, i): @@ -1176,11 +1182,11 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): The roll array is based on the fact that i gets rolled back to the first day of the month. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _apply_index_days(self, i, roll): """Apply the correct day for each date in i""" - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) class SemiMonthEnd(SemiMonthOffset): @@ -1226,7 +1232,19 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): - i += (roll % 2) * Timedelta(days=self.day_of_month).value + """Add days portion of offset to DatetimeIndex i + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ + nanos = (roll % 2) * Timedelta(days=self.day_of_month).value + i += nanos.astype('timedelta64[ns]') return i + Timedelta(days=-1) @@ -1271,13 +1289,25 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): - return i + (roll % 2) * Timedelta(days=self.day_of_month - 1).value + """Add days portion of offset to DatetimeIndex i + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ + nanos = (roll % 2) * Timedelta(days=self.day_of_month - 1).value + return i + nanos.astype('timedelta64[ns]') # --------------------------------------------------------------------- # Week-Based Offset Classes -class Week(EndMixin, DateOffset): +class Week(DateOffset): """ Weekly offset @@ -1325,7 +1355,34 @@ def apply_index(self, i): return ((i.to_period('W') + self.n).to_timestamp() + i.to_perioddelta('W')) else: - return self._end_apply_index(i, self.freqstr) + return self._end_apply_index(i) + + def _end_apply_index(self, dtindex): + """Add self to the given DatetimeIndex, specialized for case where + self.weekday is non-null. + + Parameters + ---------- + dtindex : DatetimeIndex + + Returns + ------- + result : DatetimeIndex + """ + off = dtindex.to_perioddelta('D') + + base, mult = libfrequencies.get_freq_code(self.freqstr) + base_period = dtindex.to_period(base) + if self.n > 0: + # when adding, dates on end roll to next + normed = dtindex - off + roll = np.where(base_period.to_timestamp(how='end') == normed, + self.n, self.n - 1) + else: + roll = self.n + + base = (base_period + roll).to_timestamp(how='end') + return base + off def onOffset(self, dt): if self.normalize and not _is_normalized(dt): @@ -1380,9 +1437,9 @@ class WeekOfMonth(_WeekOfMonthMixin, DateOffset): Parameters ---------- n : int - week : {0, 1, 2, 3, ...}, default None + week : {0, 1, 2, 3, ...}, default 0 0 is 1st week of month, 1 2nd week, etc. - weekday : {0, 1, ..., 6}, default None + weekday : {0, 1, ..., 6}, default 0 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1394,7 +1451,7 @@ class WeekOfMonth(_WeekOfMonthMixin, DateOffset): _prefix = 'WOM' _adjust_dst = True - def __init__(self, n=1, normalize=False, week=None, weekday=None): + def __init__(self, n=1, normalize=False, week=0, weekday=0): self.n = self._validate_n(n) self.normalize = normalize self.weekday = weekday @@ -1457,7 +1514,7 @@ class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset): Parameters ---------- n : int, default 1 - weekday : {0, 1, ..., 6}, default None + weekday : {0, 1, ..., 6}, default 0 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1470,7 +1527,7 @@ class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset): _prefix = 'LWOM' _adjust_dst = True - def __init__(self, n=1, normalize=False, weekday=None): + def __init__(self, n=1, normalize=False, weekday=0): self.n = self._validate_n(n) self.normalize = normalize self.weekday = weekday diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 6be6152b09fc8..eed9cee54efb3 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -7,10 +7,15 @@ from functools import wraps, update_wrapper -def deprecate(name, alternative, alt_name=None, klass=None, - stacklevel=2, msg=None): - """ - Return a new function that emits a deprecation warning on use. +def deprecate(name, alternative, version, alt_name=None, + klass=None, stacklevel=2, msg=None): + """Return a new function that emits a deprecation warning on use. + + To use this method for a deprecated function, another function + `alternative` with the same signature must exist. The deprecated + function will emit a deprecation warning, and in the docstring + it will contain the deprecation directive with the provided version + so it can be detected for future removal. Parameters ---------- @@ -18,6 +23,8 @@ def deprecate(name, alternative, alt_name=None, klass=None, Name of function to deprecate alternative : str Name of function to use instead + version : str + Version of pandas in which the method has been deprecated alt_name : str, optional Name to use in preference of alternative.__name__ klass : Warning, default FutureWarning @@ -29,16 +36,24 @@ def deprecate(name, alternative, alt_name=None, klass=None, alt_name = alt_name or alternative.__name__ klass = klass or FutureWarning - msg = msg or "{} is deprecated, use {} instead".format(name, alt_name) + warning_msg = msg or '{} is deprecated, use {} instead'.format(name, + alt_name) @wraps(alternative) def wrapper(*args, **kwargs): - warnings.warn(msg, klass, stacklevel=stacklevel) + warnings.warn(warning_msg, klass, stacklevel=stacklevel) return alternative(*args, **kwargs) - if getattr(wrapper, '__doc__', None) is not None: - wrapper.__doc__ = ('\n'.join(wrap(msg, 70)) + '\n' - + dedent(wrapper.__doc__)) + # adding deprecated directive to the docstring + msg = msg or 'Use `{alt_name}` instead.' + docstring = '.. deprecated:: {}\n'.format(version) + docstring += dedent(' ' + ('\n'.join(wrap(msg, 70)))) + + if getattr(wrapper, '__doc__') is not None: + docstring += dedent(wrapper.__doc__) + + wrapper.__doc__ = docstring + return wrapper diff --git a/pandas/util/hashing.py b/pandas/util/hashing.py deleted file mode 100644 index f97a7ac507407..0000000000000 --- a/pandas/util/hashing.py +++ /dev/null @@ -1,18 +0,0 @@ -import warnings -import sys - -m = sys.modules['pandas.util.hashing'] -for t in ['hash_pandas_object', 'hash_array']: - - def outer(t=t): - - def wrapper(*args, **kwargs): - from pandas import util - warnings.warn("pandas.util.hashing is deprecated and will be " - "removed in a future version, import " - "from pandas.util", - DeprecationWarning, stacklevel=3) - return getattr(util, t)(*args, **kwargs) - return wrapper - - setattr(m, t, outer(t)) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b99f019a8e98f..941bdcbc8b064 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -32,7 +32,7 @@ is_list_like) from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d -from pandas.core.common import _all_not_none +import pandas.core.common as com import pandas.compat as compat from pandas.compat import ( @@ -162,6 +162,52 @@ def round_trip_localpath(writer, reader, path=None): return obj +@contextmanager +def decompress_file(path, compression): + """ + Open a compressed file and return a file object + + Parameters + ---------- + path : str + The path where the file is read from + + compression : {'gzip', 'bz2', 'xz', None} + Name of the decompression to use + + Returns + ------- + f : file object + """ + + if compression is None: + f = open(path, 'rb') + elif compression == 'gzip': + import gzip + f = gzip.open(path, 'rb') + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(path, 'rb') + elif compression == 'xz': + lzma = compat.import_lzma() + f = lzma.LZMAFile(path, 'rb') + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(path) + zip_names = zip_file.namelist() + if len(zip_names) == 1: + f = zip_file.open(zip_names.pop()) + else: + raise ValueError('ZIP file {} error. Only one file per ZIP.' + .format(path)) + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + + yield f + f.close() + + def assert_almost_equal(left, right, check_exact=False, check_dtype='equiv', check_less_precise=False, **kwargs): @@ -449,7 +495,7 @@ def set_locale(new_locale, lc_var=locale.LC_ALL): except ValueError: yield new_locale else: - if _all_not_none(*normalized_locale): + if com._all_not_none(*normalized_locale): yield '.'.join(normalized_locale) else: yield new_locale @@ -2339,12 +2385,44 @@ def exception_matches(self, exc_type, exc_value, trace_back): def assert_produces_warning(expected_warning=Warning, filter_level="always", clear=None, check_stacklevel=True): """ - Context manager for running code that expects to raise (or not raise) - warnings. Checks that code raises the expected warning and only the - expected warning. Pass ``False`` or ``None`` to check that it does *not* - raise a warning. Defaults to ``exception.Warning``, baseclass of all - Warnings. (basically a wrapper around ``warnings.catch_warnings``). + Context manager for running code expected to either raise a specific + warning, or not raise any warnings. Verifies that the code raises the + expected warning, and that it does not raise any other unexpected + warnings. It is basically a wrapper around ``warnings.catch_warnings``. + Parameters + ---------- + expected_warning : {Warning, False, None}, default Warning + The type of Exception raised. ``exception.Warning`` is the base + class for all warnings. To check that no warning is returned, + specify ``False`` or ``None``. + filter_level : str, default "always" + Specifies whether warnings are ignored, displayed, or turned + into errors. + Valid values are: + + * "error" - turns matching warnings into exeptions + * "ignore" - discard the warning + * "always" - always emit a warning + * "default" - print the warning the first time it is generated + from each location + * "module" - print the warning the first time it is generated + from each module + * "once" - print the warning the first time it is generated + + clear : str, default None + If not ``None`` then remove any previously raised warnings from + the ``__warningsregistry__`` to ensure that no warning messages are + suppressed by this context manager. If ``None`` is specified, + the ``__warningsregistry__`` keeps track of which warnings have been + shown, and does not show them again. + check_stacklevel : bool, default True + If True, displays the line that called the function containing + the warning to show were the function is called. Otherwise, the + line that implements the function is displayed. + + Examples + -------- >>> import warnings >>> with assert_produces_warning(): ... warnings.warn(UserWarning()) diff --git a/scripts/announce.py b/scripts/announce.py old mode 100644 new mode 100755 index 1459d2fc18d2a..7b7933eba54dd --- a/scripts/announce.py +++ b/scripts/announce.py @@ -30,7 +30,7 @@ From the bash command line with $GITHUB token. - $ ./scripts/announce $GITHUB v1.11.0..v1.11.1 > announce.rst + $ ./scripts/announce.py $GITHUB v1.11.0..v1.11.1 > announce.rst """ from __future__ import print_function, division diff --git a/scripts/api_rst_coverage.py b/scripts/api_rst_coverage.py old mode 100644 new mode 100755 index 45340ba0923c4..4800e80d82891 --- a/scripts/api_rst_coverage.py +++ b/scripts/api_rst_coverage.py @@ -1,6 +1,27 @@ -import pandas as pd -import inspect +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +""" +Script to generate a report with the coverage of the API in the docs. + +The output of this script shows the existing methods that are not +included in the API documentation, as well as the methods documented +that do not exist. Ideally, no method should be listed. Currently it +considers the methods of Series, DataFrame and Panel. + +Deprecated methods are usually removed from the documentation, while +still available for three minor versions. They are listed with the +word deprecated and the version number next to them. + +Usage:: + + $ PYTHONPATH=.. ./api_rst_coverage.py + +""" +import os import re +import inspect +import pandas as pd + def main(): # classes whose members to check @@ -13,16 +34,46 @@ def class_name_sort_key(x): else: return x + def get_docstring(x): + class_name, method = x.split('.') + obj = getattr(getattr(pd, class_name), method) + return obj.__doc__ + + def deprecation_version(x): + pattern = re.compile('\.\. deprecated:: ([0-9]+\.[0-9]+\.[0-9]+)') + doc = get_docstring(x) + match = pattern.search(doc) + if match: + return match.groups()[0] + + def add_notes(x): + # Some methods are not documented in api.rst because they + # have been deprecated. Adding a comment to detect them easier. + doc = get_docstring(x) + note = None + if not doc: + note = 'no docstring' + else: + version = deprecation_version(x) + if version: + note = 'deprecated in {}'.format(version) + + return '{} ({})'.format(x, note) if note else x + # class members class_members = set() for cls in classes: - class_members.update([cls.__name__ + '.' + x[0] for x in inspect.getmembers(cls)]) + for member in inspect.getmembers(cls): + class_members.add('{cls}.{member}'.format(cls=cls.__name__, + member=member[0])) # class members referenced in api.rst api_rst_members = set() - file_name = '../doc/source/api.rst' - with open(file_name, 'r') as f: - pattern = re.compile('({})\.(\w+)'.format('|'.join(cls.__name__ for cls in classes))) + base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + api_rst_fname = os.path.join(base_path, 'doc', 'source', 'api.rst') + class_names = (cls.__name__ for cls in classes) + pattern = re.compile('({})\.(\w+)'.format('|'.join(class_names))) + with open(api_rst_fname, 'r') as f: for line in f: match = pattern.search(line) if match: @@ -30,14 +81,18 @@ def class_name_sort_key(x): print() print("Documented members in api.rst that aren't actual class members:") - for x in sorted(api_rst_members.difference(class_members), key=class_name_sort_key): + for x in sorted(api_rst_members.difference(class_members), + key=class_name_sort_key): print(x) print() - print("Class members (other than those beginning with '_') missing from api.rst:") - for x in sorted(class_members.difference(api_rst_members), key=class_name_sort_key): + print("Class members (other than those beginning with '_') " + "missing from api.rst:") + for x in sorted(class_members.difference(api_rst_members), + key=class_name_sort_key): if '._' not in x: - print(x) + print(add_notes(x)) + if __name__ == "__main__": main() diff --git a/scripts/build_dist_for_release.sh b/scripts/build_dist_for_release.sh old mode 100644 new mode 100755 diff --git a/scripts/convert_deps.py b/scripts/convert_deps.py old mode 100644 new mode 100755 diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index 0dd609417d7ba..29eb4161718ff 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -1,135 +1,148 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - # copyright 2013, y-p @ github - -from __future__ import print_function -from pandas.compat import range, lrange, map, string_types, text_type - -"""Search the git history for all commits touching a named method +""" +Search the git history for all commits touching a named method You need the sh module to run this -WARNING: this script uses git clean -f, running it on a repo with untracked files -will probably erase them. +WARNING: this script uses git clean -f, running it on a repo with untracked +files will probably erase them. + +Usage:: + $ ./find_commits_touching_func.py (see arguments below) """ +from __future__ import print_function import logging import re import os +import argparse from collections import namedtuple -from pandas.compat import parse_date - +from pandas.compat import lrange, map, string_types, text_type, parse_date try: import sh except ImportError: - raise ImportError("The 'sh' package is required in order to run this script. ") + raise ImportError("The 'sh' package is required to run this script.") -import argparse desc = """ Find all commits touching a specified function across the codebase. """.strip() argparser = argparse.ArgumentParser(description=desc) argparser.add_argument('funcname', metavar='FUNCNAME', - help='Name of function/method to search for changes on.') + help='Name of function/method to search for changes on') argparser.add_argument('-f', '--file-masks', metavar='f_re(,f_re)*', default=["\.py.?$"], - help='comma separated list of regexes to match filenames against\n'+ - 'defaults all .py? files') + help='comma separated list of regexes to match ' + 'filenames against\ndefaults all .py? files') argparser.add_argument('-d', '--dir-masks', metavar='d_re(,d_re)*', default=[], - help='comma separated list of regexes to match base path against') + help='comma separated list of regexes to match base ' + 'path against') argparser.add_argument('-p', '--path-masks', metavar='p_re(,p_re)*', default=[], - help='comma separated list of regexes to match full file path against') + help='comma separated list of regexes to match full ' + 'file path against') argparser.add_argument('-y', '--saw-the-warning', - action='store_true',default=False, - help='must specify this to run, acknowledge you realize this will erase untracked files') + action='store_true', default=False, + help='must specify this to run, acknowledge you ' + 'realize this will erase untracked files') argparser.add_argument('--debug-level', default="CRITICAL", - help='debug level of messages (DEBUG,INFO,etc...)') - + help='debug level of messages (DEBUG, INFO, etc...)') args = argparser.parse_args() lfmt = logging.Formatter(fmt='%(levelname)-8s %(message)s', - datefmt='%m-%d %H:%M:%S' -) - + datefmt='%m-%d %H:%M:%S') shh = logging.StreamHandler() shh.setFormatter(lfmt) - -logger=logging.getLogger("findit") +logger = logging.getLogger("findit") logger.addHandler(shh) +Hit = namedtuple("Hit", "commit path") +HASH_LEN = 8 -Hit=namedtuple("Hit","commit path") -HASH_LEN=8 def clean_checkout(comm): - h,s,d = get_commit_vitals(comm) + h, s, d = get_commit_vitals(comm) if len(s) > 60: s = s[:60] + "..." - s=s.split("\n")[0] - logger.info("CO: %s %s" % (comm,s )) + s = s.split("\n")[0] + logger.info("CO: %s %s" % (comm, s)) - sh.git('checkout', comm ,_tty_out=False) + sh.git('checkout', comm, _tty_out=False) sh.git('clean', '-f') -def get_hits(defname,files=()): - cs=set() + +def get_hits(defname, files=()): + cs = set() for f in files: try: - r=sh.git('blame', '-L', '/def\s*{start}/,/def/'.format(start=defname),f,_tty_out=False) + r = sh.git('blame', + '-L', + '/def\s*{start}/,/def/'.format(start=defname), + f, + _tty_out=False) except sh.ErrorReturnCode_128: logger.debug("no matches in %s" % f) continue lines = r.strip().splitlines()[:-1] # remove comment lines - lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#",x)] - hits = set(map(lambda x: x.split(" ")[0],lines)) - cs.update(set(Hit(commit=c,path=f) for c in hits)) + lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#", x)] + hits = set(map(lambda x: x.split(" ")[0], lines)) + cs.update(set(Hit(commit=c, path=f) for c in hits)) return cs -def get_commit_info(c,fmt,sep='\t'): - r=sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c,c),"-n","1",_tty_out=False) + +def get_commit_info(c, fmt, sep='\t'): + r = sh.git('log', + "--format={}".format(fmt), + '{}^..{}'.format(c, c), + "-n", + "1", + _tty_out=False) return text_type(r).split(sep) -def get_commit_vitals(c,hlen=HASH_LEN): - h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") - return h[:hlen],s,parse_date(d) -def file_filter(state,dirname,fnames): - if args.dir_masks and not any(re.search(x,dirname) for x in args.dir_masks): +def get_commit_vitals(c, hlen=HASH_LEN): + h, s, d = get_commit_info(c, '%H\t%s\t%ci', "\t") + return h[:hlen], s, parse_date(d) + + +def file_filter(state, dirname, fnames): + if (args.dir_masks and + not any(re.search(x, dirname) for x in args.dir_masks)): return for f in fnames: - p = os.path.abspath(os.path.join(os.path.realpath(dirname),f)) - if any(re.search(x,f) for x in args.file_masks)\ - or any(re.search(x,p) for x in args.path_masks): + p = os.path.abspath(os.path.join(os.path.realpath(dirname), f)) + if (any(re.search(x, f) for x in args.file_masks) or + any(re.search(x, p) for x in args.path_masks)): if os.path.isfile(p): state['files'].append(p) -def search(defname,head_commit="HEAD"): - HEAD,s = get_commit_vitals("HEAD")[:2] - logger.info("HEAD at %s: %s" % (HEAD,s)) + +def search(defname, head_commit="HEAD"): + HEAD, s = get_commit_vitals("HEAD")[:2] + logger.info("HEAD at %s: %s" % (HEAD, s)) done_commits = set() # allhits = set() files = [] state = dict(files=files) - os.path.walk('.',file_filter,state) + os.walk('.', file_filter, state) # files now holds a list of paths to files # seed with hits from q - allhits= set(get_hits(defname, files = files)) + allhits = set(get_hits(defname, files=files)) q = set([HEAD]) try: while q: - h=q.pop() + h = q.pop() clean_checkout(h) - hits = get_hits(defname, files = files) + hits = get_hits(defname, files=files) for x in hits: - prevc = get_commit_vitals(x.commit+"^")[0] + prevc = get_commit_vitals(x.commit + "^")[0] if prevc not in done_commits: q.add(prevc) allhits.update(hits) @@ -141,43 +154,46 @@ def search(defname,head_commit="HEAD"): clean_checkout(HEAD) return allhits + def pprint_hits(hits): - SUBJ_LEN=50 + SUBJ_LEN = 50 PATH_LEN = 20 - hits=list(hits) + hits = list(hits) max_p = 0 for hit in hits: - p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] - max_p=max(max_p,len(p)) + p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] + max_p = max(max_p, len(p)) if max_p < PATH_LEN: SUBJ_LEN += PATH_LEN - max_p PATH_LEN = max_p def sorter(i): - h,s,d=get_commit_vitals(hits[i].commit) - return hits[i].path,d + h, s, d = get_commit_vitals(hits[i].commit) + return hits[i].path, d - print("\nThese commits touched the %s method in these files on these dates:\n" \ - % args.funcname) - for i in sorted(lrange(len(hits)),key=sorter): + print(('\nThese commits touched the %s method in these files ' + 'on these dates:\n') % args.funcname) + for i in sorted(lrange(len(hits)), key=sorter): hit = hits[i] - h,s,d=get_commit_vitals(hit.commit) - p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] + h, s, d = get_commit_vitals(hit.commit) + p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] fmt = "{:%d} {:10} {:<%d} {:<%d}" % (HASH_LEN, SUBJ_LEN, PATH_LEN) if len(s) > SUBJ_LEN: - s = s[:SUBJ_LEN-5] + " ..." - print(fmt.format(h[:HASH_LEN],d.isoformat()[:10],s,p[-20:]) ) + s = s[:SUBJ_LEN - 5] + " ..." + print(fmt.format(h[:HASH_LEN], d.isoformat()[:10], s, p[-20:])) print("\n") + def main(): if not args.saw_the_warning: argparser.print_help() print(""" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -WARNING: this script uses git clean -f, running it on a repo with untracked files. +WARNING: +this script uses git clean -f, running it on a repo with untracked files. It's recommended that you make a fresh clone and run from its root directory. You must specify the -y argument to ignore this warning. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -190,12 +206,11 @@ def main(): if isinstance(args.dir_masks, string_types): args.dir_masks = args.dir_masks.split(',') - logger.setLevel(getattr(logging,args.debug_level)) + logger.setLevel(getattr(logging, args.debug_level)) - hits=search(args.funcname) + hits = search(args.funcname) pprint_hits(hits) - pass if __name__ == "__main__": import sys diff --git a/scripts/find_undoc_args.py b/scripts/find_undoc_args.py index 32b23a67b187f..a135c8e5171a1 100755 --- a/scripts/find_undoc_args.py +++ b/scripts/find_undoc_args.py @@ -1,126 +1,135 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +""" +Script that compares the signature arguments with the ones in the docsting +and returns the differences in plain text or GitHub task list format. +Usage:: + $ ./find_undoc_args.py (see arguments below) +""" from __future__ import print_function - +import sys from collections import namedtuple -from itertools import islice import types import os import re import argparse -#http://docs.python.org/2/library/argparse.html -# arg name is positional is not prefixed with - or -- +import inspect + parser = argparse.ArgumentParser(description='Program description.') parser.add_argument('-p', '--path', metavar='PATH', type=str, required=False, - default=None, - help='full path relative to which paths wills be reported',action='store') -parser.add_argument('-m', '--module', metavar='MODULE', type=str,required=True, - help='name of package to import and examine',action='store') -parser.add_argument('-G', '--github_repo', metavar='REPO', type=str,required=False, - help='github project where the code lives, e.g. "pandas-dev/pandas"', - default=None,action='store') - + default=None, action='store', + help='full path relative to which paths wills be reported') +parser.add_argument('-m', '--module', metavar='MODULE', type=str, + required=True, action='store', + help='name of package to import and examine') +parser.add_argument('-G', '--github_repo', metavar='REPO', type=str, + required=False, default=None, action='store', + help='github project where the code lives, ' + 'e.g. "pandas-dev/pandas"') args = parser.parse_args() -Entry=namedtuple("Entry","func path lnum undoc_names missing_args nsig_names ndoc_names") +Entry = namedtuple('Entry', + 'func path lnum undoc_names missing_args ' + 'nsig_names ndoc_names') -def entry_gen(root_ns,module_name): - q=[root_ns] - seen=set() +def entry_gen(root_ns, module_name): + """Walk and yield all methods and functions in the module root_ns and + submodules.""" + q = [root_ns] + seen = set() while q: ns = q.pop() for x in dir(ns): - cand = getattr(ns,x) - if (isinstance(cand,types.ModuleType) - and cand.__name__ not in seen - and cand.__name__.startswith(module_name)): - # print(cand.__name__) + cand = getattr(ns, x) + if (isinstance(cand, types.ModuleType) and + cand.__name__ not in seen and + cand.__name__.startswith(module_name)): seen.add(cand.__name__) - q.insert(0,cand) - elif (isinstance(cand,(types.MethodType,types.FunctionType)) and + q.insert(0, cand) + elif (isinstance(cand, (types.MethodType, types.FunctionType)) and cand not in seen and cand.__doc__): seen.add(cand) yield cand + def cmp_docstring_sig(f): + """Return an `Entry` object describing the differences between the + arguments in the signature and the documented ones.""" def build_loc(f): - path=f.__code__.co_filename.split(args.path,1)[-1][1:] - return dict(path=path,lnum=f.__code__.co_firstlineno) + path = f.__code__.co_filename.split(args.path, 1)[-1][1:] + return dict(path=path, lnum=f.__code__.co_firstlineno) - import inspect - sig_names=set(inspect.getargspec(f).args) + sig_names = set(inspect.getargspec(f).args) + # XXX numpydoc can be used to get the list of parameters doc = f.__doc__.lower() - doc = re.split("^\s*parameters\s*",doc,1,re.M)[-1] - doc = re.split("^\s*returns*",doc,1,re.M)[0] - doc_names={x.split(":")[0].strip() for x in doc.split("\n") - if re.match("\s+[\w_]+\s*:",x)} - sig_names.discard("self") - doc_names.discard("kwds") - doc_names.discard("kwargs") - doc_names.discard("args") - return Entry(func=f,path=build_loc(f)['path'],lnum=build_loc(f)['lnum'], + doc = re.split('^\s*parameters\s*', doc, 1, re.M)[-1] + doc = re.split('^\s*returns*', doc, 1, re.M)[0] + doc_names = {x.split(":")[0].strip() for x in doc.split('\n') + if re.match('\s+[\w_]+\s*:', x)} + sig_names.discard('self') + doc_names.discard('kwds') + doc_names.discard('kwargs') + doc_names.discard('args') + return Entry(func=f, path=build_loc(f)['path'], lnum=build_loc(f)['lnum'], undoc_names=sig_names.difference(doc_names), - missing_args=doc_names.difference(sig_names),nsig_names=len(sig_names), - ndoc_names=len(doc_names)) + missing_args=doc_names.difference(sig_names), + nsig_names=len(sig_names), ndoc_names=len(doc_names)) + def format_id(i): return i -def format_item_as_github_task_list( i,item,repo): - tmpl = "- [ ] {id}) [{file}:{lnum} ({func_name}())]({link}) - __Missing__[{nmissing}/{total_args}]: {undoc_names}" +def format_item_as_github_task_list(i, item, repo): + tmpl = ('- [ ] {id_}) [{fname}:{lnum} ({func_name}())]({link}) - ' + '__Missing__[{nmissing}/{total_args}]: {undoc_names}') link_tmpl = "https://github.com/{repo}/blob/master/{file}#L{lnum}" - - link = link_tmpl.format(repo=repo,file=item.path ,lnum=item.lnum ) - - s = tmpl.format(id=i,file=item.path , - lnum=item.lnum, - func_name=item.func.__name__, - link=link, - nmissing=len(item.undoc_names), - total_args=item.nsig_names, - undoc_names=list(item.undoc_names)) - + link = link_tmpl.format(repo=repo, file=item.path, lnum=item.lnum) + s = tmpl.format(id_=i, fname=item.path, lnum=item.lnum, + func_name=item.func.__name__, link=link, + nmissing=len(item.undoc_names), + total_args=item.nsig_names, + undoc_names=list(item.undoc_names)) if item.missing_args: - s+= " __Extra__(?): {missing_args}".format(missing_args=list(item.missing_args)) - + s += ' __Extra__(?): %s' % list(item.missing_args) return s -def format_item_as_plain(i,item): - tmpl = "+{lnum} {path} {func_name}(): Missing[{nmissing}/{total_args}]={undoc_names}" - - s = tmpl.format(path=item.path , - lnum=item.lnum, - func_name=item.func.__name__, - nmissing=len(item.undoc_names), - total_args=item.nsig_names, - undoc_names=list(item.undoc_names)) +def format_item_as_plain(i, item): + tmpl = ('+{lnum} {path} {func_name}(): ' + 'Missing[{nmissing}/{total_args}]={undoc_names}') + s = tmpl.format(path=item.path, lnum=item.lnum, + func_name=item.func.__name__, + nmissing=len(item.undoc_names), + total_args=item.nsig_names, + undoc_names=list(item.undoc_names)) if item.missing_args: - s+= " Extra(?)={missing_args}".format(missing_args=list(item.missing_args)) - + s += ' Extra(?)=%s' % list(item.missing_args) return s + def main(): module = __import__(args.module) if not args.path: - args.path=os.path.dirname(module.__file__) - collect=[cmp_docstring_sig(e) for e in entry_gen(module,module.__name__)] - # only include if there are missing arguments in the docstring (fewer false positives) - # and there are at least some documented arguments - collect = [e for e in collect if e.undoc_names and len(e.undoc_names) != e.nsig_names] - collect.sort(key=lambda x:x.path) + args.path = os.path.dirname(module.__file__) + collect = [cmp_docstring_sig(e) + for e in entry_gen(module, module.__name__)] + # only include if there are missing arguments in the docstring + # (fewer false positives) and there are at least some documented arguments + collect = [e for e in collect + if e.undoc_names and len(e.undoc_names) != e.nsig_names] + collect.sort(key=lambda x: x.path) if args.github_repo: - for i,item in enumerate(collect,1): - print( format_item_as_github_task_list(i,item,args.github_repo)) + for i, item in enumerate(collect, 1): + print(format_item_as_github_task_list(i, item, args.github_repo)) else: - for i,item in enumerate(collect,1): - print( format_item_as_plain(i, item)) + for i, item in enumerate(collect, 1): + print(format_item_as_plain(i, item)) + -if __name__ == "__main__": - import sys +if __name__ == '__main__': sys.exit(main()) diff --git a/scripts/list_future_warnings.sh b/scripts/list_future_warnings.sh new file mode 100755 index 0000000000000..0c4046bbb5f49 --- /dev/null +++ b/scripts/list_future_warnings.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# Check all future warnings in Python files, and report them with the version +# where the FutureWarning was added. +# +# This is useful to detect features that have been deprecated, and should be +# removed from the code. For example, if a line of code contains: +# +# warning.warn('Method deprecated', FutureWarning, stacklevel=2) +# +# Which is released in Pandas 0.20.0, then it is expected that the method +# is removed before releasing Pandas 0.24.0, including the warning. If it +# is not, this script will list this line, with the version 0.20.0, which +# will make it easy to detect that it had to be removed. +# +# In some cases this script can return false positives, for example in files +# where FutureWarning is used to detect deprecations, or similar. The EXCLUDE +# variable can be used to ignore files that use FutureWarning, but do not +# deprecate functionality. +# +# Usage: +# +# $ ./list_future_warnings.sh + +EXCLUDE="^pandas/tests/|" # tests validate that FutureWarnings are raised +EXCLUDE+="^pandas/util/_decorators.py$|" # generic deprecate function that raises warning +EXCLUDE+="^pandas/util/_depr_module.py$|" # generic deprecate module that raises warnings +EXCLUDE+="^pandas/util/testing.py$|" # contains function to evaluate if warning is raised +EXCLUDE+="^pandas/io/parsers.py$" # implements generic deprecation system in io reading + +BASE_DIR="$(dirname $0)/.." +cd $BASE_DIR +FILES=`grep -RIl "FutureWarning" pandas/* | grep -vE "$EXCLUDE"` +OUTPUT=() +IFS=$'\n' + +for FILE in $FILES; do + FILE_LINES=`git blame -sf $FILE | grep FutureWarning | tr -s " " | cut -d " " -f1,3` + for FILE_LINE in $FILE_LINES; do + TAG=$(git tag --contains $(echo $FILE_LINE | cut -d" " -f1) | head -n1) + OUTPUT_ROW=`printf "%-14s %-16s %s" ${TAG:-"(not released)"} $FILE_LINE $FILE` + OUTPUT+=($OUTPUT_ROW) + done +done + +printf "%s\n" "${OUTPUT[@]}" | sort -V diff --git a/scripts/merge-pr.py b/scripts/merge-pr.py index 4062a96d8e08d..31264cad52e4f 100755 --- a/scripts/merge-pr.py +++ b/scripts/merge-pr.py @@ -22,7 +22,6 @@ # usage: ./apache-pr-merge.py (see config env vars below) # # Lightly modified from version of this script in incubator-parquet-format - from __future__ import print_function from subprocess import check_output @@ -223,7 +222,7 @@ def update_pr(pr_num, user_login, base_ref): try: run_cmd( 'git push -f %s %s:%s' % (push_user_remote, pr_branch_name, - base_ref)) + base_ref)) except Exception as e: fail("Exception while pushing: %s" % e) clean_up() @@ -275,6 +274,7 @@ def fix_version_from_branch(branch, versions): branch_ver = branch.replace("branch-", "") return filter(lambda x: x.name.startswith(branch_ver), versions)[-1] + pr_num = input("Which pull request would you like to merge? (e.g. 34): ") pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) @@ -297,9 +297,15 @@ def fix_version_from_branch(branch, versions): continue_maybe(msg) print("\n=== Pull Request #%s ===" % pr_num) -print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" - % (title, pr_repo_desc, target_ref, url)) +# we may have un-printable unicode in our title +try: + title = title.encode('raw_unicode_escape') +except Exception: + pass + +print("title\t{title}\nsource\t{source}\ntarget\t{target}\nurl\t{url}".format( + title=title, source=pr_repo_desc, target=target_ref, url=url)) merged_refs = [target_ref] diff --git a/setup.py b/setup.py index 7dbf6c84a0451..859d50303ecb1 100755 --- a/setup.py +++ b/setup.py @@ -302,12 +302,14 @@ class CheckSDist(sdist_class): 'pandas/_libs/hashtable.pyx', 'pandas/_libs/tslib.pyx', 'pandas/_libs/index.pyx', + 'pandas/_libs/internals.pyx', 'pandas/_libs/algos.pyx', 'pandas/_libs/join.pyx', 'pandas/_libs/indexing.pyx', 'pandas/_libs/interval.pyx', 'pandas/_libs/hashing.pyx', 'pandas/_libs/missing.pyx', + 'pandas/_libs/reduction.pyx', 'pandas/_libs/testing.pyx', 'pandas/_libs/window.pyx', 'pandas/_libs/skiplist.pyx', @@ -478,6 +480,8 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.indexing': { 'pyxfile': '_libs/indexing'}, + '_libs.internals': { + 'pyxfile': '_libs/internals'}, '_libs.interval': { 'pyxfile': '_libs/interval', 'pxdfiles': ['_libs/hashtable'], @@ -503,6 +507,8 @@ def pxd(name): 'pandas/_libs/src/numpy_helper.h'], 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, + '_libs.reduction': { + 'pyxfile': '_libs/reduction'}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', 'pxdfiles': ['_libs/src/util', @@ -688,10 +694,9 @@ def pxd(name): 'pandas/_libs/src/ujson/lib/ultrajsonenc.c', 'pandas/_libs/src/ujson/lib/ultrajsondec.c'] + np_datetime_sources), - include_dirs=(['pandas/_libs/src/ujson/python', - 'pandas/_libs/src/ujson/lib', - 'pandas/_libs/src/datetime'] + - common_include), + include_dirs=['pandas/_libs/src/ujson/python', + 'pandas/_libs/src/ujson/lib', + 'pandas/_libs/src/datetime'], extra_compile_args=(['-D_GNU_SOURCE'] + extra_compile_args)) From e58292f4a1ded59ed65bd282908f6b4209720824 Mon Sep 17 00:00:00 2001 From: Ming Li <14131823+minggli@users.noreply.github.com> Date: Mon, 29 Jan 2018 12:42:59 +0000 Subject: [PATCH 002/217] Regression in make_block_same_class (tests failing for new fastparquet release) (#19434) --- doc/source/io.rst | 2 +- pandas/core/internals.py | 9 +++++++-- pandas/tests/internals/test_internals.py | 7 +++++++ pandas/tests/io/test_parquet.py | 22 +++++++++++++++++----- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index ae04996b4fddf..4199f161501ec 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4537,7 +4537,7 @@ See the documentation for `pyarrow `__ and .. note:: These engines are very similar and should read/write nearly identical parquet format files. - Currently ``pyarrow`` does not support timedelta data, and ``fastparquet`` does not support timezone aware datetimes (they are coerced to UTC). + Currently ``pyarrow`` does not support timedelta data, ``fastparquet>=0.1.4`` supports timezone aware datetimes. These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). .. ipython:: python diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c2d3d0852384c..ec884035fe0c4 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -224,12 +224,17 @@ def make_block_scalar(self, values): """ return ScalarBlock(values) - def make_block_same_class(self, values, placement=None, ndim=None): + def make_block_same_class(self, values, placement=None, ndim=None, + dtype=None): """ Wrap given values in a block of same type as self. """ + if dtype is not None: + # issue 19431 fastparquet is passing this + warnings.warn("dtype argument is deprecated, will be removed " + "in a future release.", FutureWarning) if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, - klass=self.__class__) + klass=self.__class__, dtype=dtype) def __unicode__(self): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 57884e9816ed3..f17306b8b52f9 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -285,6 +285,13 @@ def test_delete(self): with pytest.raises(Exception): newb.delete(3) + def test_make_block_same_class(self): + # issue 19431 + block = create_block('M8[ns, US/Eastern]', [3]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + block.make_block_same_class(block.values, dtype=block.values.dtype) + class TestDatetimeBlock(object): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 6c172c80514e7..11cbea8ce6331 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -71,6 +71,15 @@ def fp(): return 'fastparquet' +@pytest.fixture +def fp_lt_014(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): + pytest.skip("fastparquet is >= 0.1.4") + return 'fastparquet' + + @pytest.fixture def df_compat(): return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'}) @@ -435,8 +444,10 @@ def test_basic(self, fp, df_full): df = df_full # additional supported types for fastparquet + if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): + df['datetime_tz'] = pd.date_range('20130101', periods=3, + tz='US/Eastern') df['timedelta'] = pd.timedelta_range('1 day', periods=3) - check_round_trip(df, fp) @pytest.mark.skip(reason="not supported") @@ -468,14 +479,15 @@ def test_categorical(self, fp): df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) check_round_trip(df, fp) - def test_datetime_tz(self, fp): - # doesn't preserve tz + def test_datetime_tz(self, fp_lt_014): + + # fastparquet<0.1.4 doesn't preserve tz df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, tz='US/Eastern')}) - # warns on the coercion with catch_warnings(record=True): - check_round_trip(df, fp, expected=df.astype('datetime64[ns]')) + check_round_trip(df, fp_lt_014, + expected=df.astype('datetime64[ns]')) def test_filter_row_groups(self, fp): d = {'a': list(range(0, 3))} From b00eeb3ad4cf555c8211409284ec9313d707d205 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 29 Jan 2018 15:07:02 +0100 Subject: [PATCH 003/217] TST: fix test for MultiIndexPyIntEngine on 32 bit (#19440) closes #19439 --- pandas/tests/test_multilevel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9582264a8c716..65332ae7153e2 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1611,7 +1611,7 @@ def test_pyint_engine(self): index = MultiIndex.from_tuples(keys) assert index.get_loc(keys[idx]) == idx - expected = np.arange(idx + 1, dtype='int64') + expected = np.arange(idx + 1, dtype=np.intp) result = index.get_indexer([keys[i] for i in expected]) tm.assert_numpy_array_equal(result, expected) From ab51851422567a55e98d0a5c472897277e12d4c6 Mon Sep 17 00:00:00 2001 From: luzpaz Date: Mon, 29 Jan 2018 09:14:33 -0500 Subject: [PATCH 004/217] Misc typos (#19430) Found via `codespell -q 3` --- asv_bench/benchmarks/replace.py | 8 ++++---- asv_bench/benchmarks/rolling.py | 16 ++++++++-------- doc/source/api.rst | 2 +- doc/source/io.rst | 2 +- doc/sphinxext/numpydoc/tests/test_docscrape.py | 6 +++--- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/core/frame.py | 2 +- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/sparse/frame.py | 2 +- pandas/core/sparse/series.py | 2 +- pandas/core/strings.py | 2 +- pandas/core/util/hashing.py | 2 +- pandas/io/formats/format.py | 2 +- pandas/io/pytables.py | 2 +- pandas/tests/categorical/test_constructors.py | 2 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/io/formats/test_format.py | 2 +- pandas/tests/series/test_analytics.py | 2 +- pandas/tests/series/test_operators.py | 2 +- pandas/tests/sparse/frame/test_frame.py | 2 +- pandas/util/testing.py | 2 +- 22 files changed, 34 insertions(+), 34 deletions(-) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 6330a2b36c516..41208125e8f32 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -44,15 +44,15 @@ class Convert(object): goal_time = 0.5 params = (['DataFrame', 'Series'], ['Timestamp', 'Timedelta']) - param_names = ['contructor', 'replace_data'] + param_names = ['constructor', 'replace_data'] - def setup(self, contructor, replace_data): + def setup(self, constructor, replace_data): N = 10**3 data = {'Series': pd.Series(np.random.randint(N, size=N)), 'DataFrame': pd.DataFrame({'A': np.random.randint(N, size=N), 'B': np.random.randint(N, size=N)})} self.to_replace = {i: getattr(pd, replace_data) for i in range(N)} - self.data = data[contructor] + self.data = data[constructor] - def time_replace(self, contructor, replace_data): + def time_replace(self, constructor, replace_data): self.data.replace(self.to_replace) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 45142c53dcd01..59cf7d090a622 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -12,14 +12,14 @@ class Methods(object): ['int', 'float'], ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt', 'sum', 'corr', 'cov']) - param_names = ['contructor', 'window', 'dtype', 'method'] + param_names = ['constructor', 'window', 'dtype', 'method'] - def setup(self, contructor, window, dtype, method): + def setup(self, constructor, window, dtype, method): N = 10**5 arr = np.random.random(N).astype(dtype) - self.roll = getattr(pd, contructor)(arr).rolling(window) + self.roll = getattr(pd, constructor)(arr).rolling(window) - def time_rolling(self, contructor, window, dtype, method): + def time_rolling(self, constructor, window, dtype, method): getattr(self.roll, method)() @@ -30,12 +30,12 @@ class Quantile(object): [10, 1000], ['int', 'float'], [0, 0.5, 1]) - param_names = ['contructor', 'window', 'dtype', 'percentile'] + param_names = ['constructor', 'window', 'dtype', 'percentile'] - def setup(self, contructor, window, dtype, percentile): + def setup(self, constructor, window, dtype, percentile): N = 10**5 arr = np.random.random(N).astype(dtype) - self.roll = getattr(pd, contructor)(arr).rolling(window) + self.roll = getattr(pd, constructor)(arr).rolling(window) - def time_quantile(self, contructor, window, dtype, percentile): + def time_quantile(self, constructor, window, dtype, percentile): self.roll.quantile(percentile) diff --git a/doc/source/api.rst b/doc/source/api.rst index ddd09327935ce..44f87aa3e1cec 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2500,7 +2500,7 @@ Scalar introspection Extensions ---------- -These are primarily intented for library authors looking to extend pandas +These are primarily intended for library authors looking to extend pandas objects. .. currentmodule:: pandas diff --git a/doc/source/io.rst b/doc/source/io.rst index 4199f161501ec..60dc89f8fd495 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2675,7 +2675,7 @@ file, and the ``sheet_name`` indicating which sheet to parse. +++++++++++++++++++ To facilitate working with multiple sheets from the same file, the ``ExcelFile`` -class can be used to wrap the file and can be be passed into ``read_excel`` +class can be used to wrap the file and can be passed into ``read_excel`` There will be a performance benefit for reading multiple sheets as the file is read into memory only once. diff --git a/doc/sphinxext/numpydoc/tests/test_docscrape.py b/doc/sphinxext/numpydoc/tests/test_docscrape.py index b682504e1618f..b412124d774bb 100755 --- a/doc/sphinxext/numpydoc/tests/test_docscrape.py +++ b/doc/sphinxext/numpydoc/tests/test_docscrape.py @@ -42,7 +42,7 @@ ------- out : ndarray The drawn samples, arranged according to `shape`. If the - shape given is (m,n,...), then the shape of `out` is is + shape given is (m,n,...), then the shape of `out` is (m,n,...,N). In other words, each entry ``out[i,j,...,:]`` is an N-dimensional @@ -222,7 +222,7 @@ def test_str(): ------- out : ndarray The drawn samples, arranged according to `shape`. If the - shape given is (m,n,...), then the shape of `out` is is + shape given is (m,n,...), then the shape of `out` is (m,n,...,N). In other words, each entry ``out[i,j,...,:]`` is an N-dimensional @@ -340,7 +340,7 @@ def test_sphinx_str(): **out** : ndarray The drawn samples, arranged according to `shape`. If the - shape given is (m,n,...), then the shape of `out` is is + shape given is (m,n,...), then the shape of `out` is (m,n,...,N). In other words, each entry ``out[i,j,...,:]`` is an N-dimensional diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 1e6ea7794dfff..37693068e0974 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -897,7 +897,7 @@ class Timedelta(_Timedelta): Represents a duration, the difference between two dates or times. Timedelta is the pandas equivalent of python's ``datetime.timedelta`` - and is interchangable with it in most cases. + and is interchangeable with it in most cases. Parameters ---------- diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index c22e0b8e555a3..215ae9ce087ee 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -295,7 +295,7 @@ cpdef bint tz_compare(object start, object end): timezones. For example `` and `` are essentially same - timezones but aren't evaluted such, but the string representation + timezones but aren't evaluated such, but the string representation for both of these is `'Europe/Paris'`. This exists only to add a notion of equality to pytz-style zones diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7328cd336babf..788b236b0ec59 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4115,7 +4115,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): series[this_mask] = fill_value otherSeries[other_mask] = fill_value - # if we have different dtypes, possibily promote + # if we have different dtypes, possibly promote new_dtype = this_dtype if not is_dtype_equal(this_dtype, other_dtype): new_dtype = find_common_type([this_dtype, other_dtype]) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f43c6dc567f69..8e77c7a7fa48c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -332,7 +332,7 @@ def freqstr(self): @cache_readonly def inferred_freq(self): """ - Trys to return a string representing a frequency guess, + Tryies to return a string representing a frequency guess, generated by infer_freq. Returns None if it can't autodetect the frequency. """ diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 99bf0d5b7ac51..91dc44e3f185e 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -120,7 +120,7 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, if dtype is not None: mgr = mgr.astype(dtype) else: - msg = ('SparseDataFrame called with unkown type "{data_type}" ' + msg = ('SparseDataFrame called with unknown type "{data_type}" ' 'for data argument') raise TypeError(msg.format(data_type=type(data).__name__)) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 4e207f9d1838c..1c23527cf57c4 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -493,7 +493,7 @@ def _set_value(self, label, value, takeable=False): values = self.to_dense() # if the label doesn't exist, we will create a new object here - # and possibily change the index + # and possibly change the index new_values = values._set_value(label, value, takeable=takeable) if new_values is not None: values = new_values diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 5c31b9a5668ff..12c7feb5f2b15 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1395,7 +1395,7 @@ def _validate(data): elif isinstance(data, Index): # can't use ABCIndex to exclude non-str - # see scc/inferrence.pyx which can contain string values + # see src/inference.pyx which can contain string values allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') if data.inferred_type not in allowed_types: message = ("Can only use .str accessor with string values " diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 0c82773b75c28..7edb5b16ce77a 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -210,7 +210,7 @@ def _hash_categorical(c, encoding, hash_key): # we have uint64, as we don't directly support missing values # we don't want to use take_nd which will coerce to float - # instead, directly construt the result with a + # instead, directly construct the result with a # max(np.uint64) as the missing value indicator # # TODO: GH 15362 diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2293032ebb8a1..bca0b64cb53fe 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1961,7 +1961,7 @@ def formatter(value): def get_result_as_array(self): """ Returns the float values converted into strings using - the parameters given at initalisation, as a numpy array + the parameters given at initialisation, as a numpy array """ if self.formatter is not None: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 106823199ee93..5376473f83f22 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3763,7 +3763,7 @@ def write(self, **kwargs): class LegacyTable(Table): """ an appendable table: allow append/query/delete operations to a - (possibily) already existing appendable table this table ALLOWS + (possibly) already existing appendable table this table ALLOWS append (but doesn't require them), and stores the data in a format that can be easily searched diff --git a/pandas/tests/categorical/test_constructors.py b/pandas/tests/categorical/test_constructors.py index b29d75bed5c6f..6cc34770a65e0 100644 --- a/pandas/tests/categorical/test_constructors.py +++ b/pandas/tests/categorical/test_constructors.py @@ -382,7 +382,7 @@ def test_constructor_from_categorical_with_unknown_dtype(self): ordered=True) tm.assert_categorical_equal(result, expected) - def test_contructor_from_categorical_string(self): + def test_constructor_from_categorical_string(self): values = Categorical(['a', 'b', 'd']) # use categories, ordered result = Categorical(values, categories=['a', 'b', 'c'], ordered=True, diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8b57e96e6fa06..b24ae22162a34 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -543,7 +543,7 @@ def test_nested_dict_frame_constructor(self): tm.assert_frame_equal(result, df) def _check_basic_constructor(self, empty): - # mat: 2d matrix with shpae (3, 2) to input. empty - makes sized + # mat: 2d matrix with shape (3, 2) to input. empty - makes sized # objects mat = empty((2, 3), dtype=float) # 2-D input diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b277d8256e612..e0ce27de5c31f 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2531,7 +2531,7 @@ def test_date_tz(self): [datetime(2013, 1, 1), pd.NaT], utc=True).format() assert formatted[0] == "2013-01-01 00:00:00+00:00" - def test_date_explict_date_format(self): + def test_date_explicit_date_format(self): formatted = pd.to_datetime([datetime(2003, 2, 1), pd.NaT]).format( date_format="%m-%d-%Y", na_rep="UT") assert formatted[0] == "02-01-2003" diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index f2b7c20b774b0..0e6e44e839464 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -43,7 +43,7 @@ def test_empty(self, method, unit, use_bottleneck): result = getattr(s, method)() assert result == unit - # Explict + # Explicit result = getattr(s, method)(min_count=0) assert result == unit diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 7505e6b0cec3b..38e5753d1752d 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1163,7 +1163,7 @@ def test_timedelta_floordiv(self, scalar_td): ('NCC1701D', 'NCC1701D', 'NCC1701D')]) def test_td64_series_with_tdi(self, names): # GH#17250 make sure result dtype is correct - # GH#19043 make sure names are propogated correctly + # GH#19043 make sure names are propagated correctly tdi = pd.TimedeltaIndex(['0 days', '1 day'], name=names[0]) ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 2b589ebd4735e..0b7948cc32d24 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -218,7 +218,7 @@ def test_constructor_from_unknown_type(self): class Unknown: pass with pytest.raises(TypeError, - message='SparseDataFrame called with unkown type ' + message='SparseDataFrame called with unknown type ' '"Unknown" for data argument'): SparseDataFrame(Unknown()) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 941bdcbc8b064..0009e26f8b100 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2401,7 +2401,7 @@ class for all warnings. To check that no warning is returned, into errors. Valid values are: - * "error" - turns matching warnings into exeptions + * "error" - turns matching warnings into exceptions * "ignore" - discard the warning * "always" - always emit a warning * "default" - print the warning the first time it is generated From e4e725596dbbe8b2197500ce1181a7f32d8e3913 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 29 Jan 2018 22:39:09 +0100 Subject: [PATCH 005/217] Change Future to DeprecationWarning for make_block_same_class (#19442) --- pandas/core/internals.py | 2 +- pandas/tests/internals/test_internals.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index ec884035fe0c4..f3e5e4c99a899 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -230,7 +230,7 @@ def make_block_same_class(self, values, placement=None, ndim=None, if dtype is not None: # issue 19431 fastparquet is passing this warnings.warn("dtype argument is deprecated, will be removed " - "in a future release.", FutureWarning) + "in a future release.", DeprecationWarning) if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index f17306b8b52f9..e3490f465b24a 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -288,9 +288,10 @@ def test_delete(self): def test_make_block_same_class(self): # issue 19431 block = create_block('M8[ns, US/Eastern]', [3]) - with tm.assert_produces_warning(FutureWarning, + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): - block.make_block_same_class(block.values, dtype=block.values.dtype) + block.make_block_same_class(block.values.values, + dtype=block.values.dtype) class TestDatetimeBlock(object): From 22228c7a6309fc82a1569265eaecbf19b781045f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 29 Jan 2018 15:59:32 -0800 Subject: [PATCH 006/217] catch PerformanceWarning (#19446) --- pandas/tests/series/test_operators.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 38e5753d1752d..8feee6e6cff68 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -19,6 +19,7 @@ from pandas.core.indexes.timedeltas import Timedelta import pandas.core.nanops as nanops +from pandas.errors import PerformanceWarning from pandas.compat import range, zip from pandas import compat from pandas.util.testing import (assert_series_equal, assert_almost_equal, @@ -871,8 +872,9 @@ def test_timedelta64_operations_with_DateOffset(self): expected = Series([timedelta(minutes=4, seconds=3)] * 3) assert_series_equal(result, expected) - result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3), - pd.offsets.Hour(2)]) + with tm.assert_produces_warning(PerformanceWarning): + result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3), + pd.offsets.Hour(2)]) expected = Series([timedelta(minutes=6, seconds=3), timedelta( minutes=5, seconds=6), timedelta(hours=2, minutes=5, seconds=3)]) assert_series_equal(result, expected) From 60a8218f55ce4114d58fdf471424334b6cfa76b4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 30 Jan 2018 06:36:16 -0500 Subject: [PATCH 007/217] CI: pin pymysql<0.8.0 (#19461) --- ci/requirements-3.6.run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 822144a80bc9a..e30461d06b8ea 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -13,7 +13,7 @@ lxml html5lib jinja2 sqlalchemy -pymysql +pymysql<0.8.0 feather-format pyarrow psycopg2 From f6c492e4dc78f84695cb84fb83c7be711dd86141 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 30 Jan 2018 12:36:36 +0100 Subject: [PATCH 008/217] TST: fix (other check of) test for MultiIndexPyIntEngine on 32 bit (#19455) --- pandas/tests/test_multilevel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 65332ae7153e2..79e05c90a21b0 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1617,7 +1617,7 @@ def test_pyint_engine(self): # With missing key: idces = range(len(keys)) - expected = np.array([-1] + list(idces), dtype='int64') + expected = np.array([-1] + list(idces), dtype=np.intp) missing = tuple([0, 1] * 5 * N) result = index.get_indexer([missing] + [keys[i] for i in idces]) tm.assert_numpy_array_equal(result, expected) From ae64e59af3afb27aae71fc6166144f20498eea08 Mon Sep 17 00:00:00 2001 From: Tim Hoffmann <2836374+timhoffm@users.noreply.github.com> Date: Tue, 30 Jan 2018 12:37:16 +0100 Subject: [PATCH 009/217] remove reference to deprecated .ix from 10min.rst (#19452) --- doc/source/10min.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index da7679d8a3f54..fbbe94a72c71e 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -154,7 +154,7 @@ Selection While standard Python / Numpy expressions for selecting and setting are intuitive and come in handy for interactive work, for production code, we recommend the optimized pandas data access methods, ``.at``, ``.iat``, - ``.loc``, ``.iloc`` and ``.ix``. + ``.loc`` and ``.iloc``. See the indexing documentation :ref:`Indexing and Selecting Data ` and :ref:`MultiIndex / Advanced Indexing `. From d4d3b3379fb787a700ec9a6936ebec91e5ef7f18 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 30 Jan 2018 15:55:19 -0800 Subject: [PATCH 010/217] remove unused (#19466) --- pandas/_libs/src/period_helper.c | 32 ------------------------------ pandas/_libs/src/period_helper.h | 10 ---------- pandas/_libs/tslibs/period.pyx | 9 --------- pandas/tests/scalar/test_period.py | 3 --- 4 files changed, 54 deletions(-) diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index 01fc46481d5b4..f1367978bd6c9 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -1275,38 +1275,6 @@ npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq) { return toDaily(period_ordinal, 'E', &af_info) + ORD_OFFSET; } -char *str_replace(const char *s, const char *old, const char *new) { - char *ret; - int i, count = 0; - size_t newlen = strlen(new); - size_t oldlen = strlen(old); - - for (i = 0; s[i] != '\0'; i++) { - if (strstr(&s[i], old) == &s[i]) { - count++; - i += oldlen - 1; - } - } - - ret = PyArray_malloc(i + 1 + count * (newlen - oldlen)); - if (ret == NULL) { - return (char *)PyErr_NoMemory(); - } - - i = 0; - while (*s) { - if (strstr(s, old) == s) { - strncpy(&ret[i], new, sizeof(char) * newlen); - i += newlen; - s += oldlen; - } else { - ret[i++] = *s++; - } - } - ret[i] = '\0'; - - return ret; -} // function to generate a nice string representation of the period // object, originally from DateObject_strftime diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index 45afc074cab72..35dd20848a2ec 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -112,15 +112,6 @@ frequency conversion routines. #define INT_ERR_CODE INT32_MIN -#define MEM_CHECK(item) \ - if (item == NULL) { \ - return PyErr_NoMemory(); \ - } -#define ERR_CHECK(item) \ - if (item == NULL) { \ - return NULL; \ - } - typedef struct asfreq_info { int from_week_end; // day the week ends on in the "from" frequency int to_week_end; // day the week ends on in the "to" frequency @@ -182,7 +173,6 @@ int pminute(npy_int64 ordinal, int freq); int psecond(npy_int64 ordinal, int freq); int pdays_in_month(npy_int64 ordinal, int freq); -double getAbsTime(int freq, npy_int64 dailyDate, npy_int64 originalDate); char *c_strftime(struct date_info *dinfo, char *fmt); int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year); diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 46365035a0b9a..e2caebe4c4afc 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -372,15 +372,6 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN -def get_period_field(int code, int64_t value, int freq): - cdef accessor f = _get_accessor_func(code) - if f is NULL: - raise ValueError('Unrecognized period code: %d' % code) - if value == iNaT: - return np.nan - return f(value, freq) - - def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): cdef: Py_ssize_t i, sz diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index ce733829c2315..41b3bb55bfff1 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -914,9 +914,6 @@ def test_round_trip(self): class TestPeriodField(object): - def test_get_period_field_raises_on_out_of_range(self): - pytest.raises(ValueError, libperiod.get_period_field, -1, 0, 0) - def test_get_period_field_array_raises_on_out_of_range(self): pytest.raises(ValueError, libperiod.get_period_field_arr, -1, np.empty(1), 0) From e25a47586a3f577989cc5592ba0a48cccf48d595 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 31 Jan 2018 02:48:53 -0800 Subject: [PATCH 011/217] setup.py fixup, closes #19467 (#19472) --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 859d50303ecb1..721e6f62bd3e4 100755 --- a/setup.py +++ b/setup.py @@ -416,7 +416,7 @@ def get_tag(self): cmdclass['build_src'] = DummyBuildSrc cmdclass['build_ext'] = CheckingBuildExt -lib_depends = ['reduce', 'inference'] +lib_depends = ['inference'] def srcpath(name=None, suffix='.pyx', subdir='src'): @@ -508,11 +508,12 @@ def pxd(name): 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, '_libs.reduction': { - 'pyxfile': '_libs/reduction'}, + 'pyxfile': '_libs/reduction', + 'pxdfiles': ['_libs/src/util']}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', 'pxdfiles': ['_libs/src/util', - '_libs/lib', + '_libs/missing', '_libs/tslibs/timedeltas', '_libs/tslibs/timezones', '_libs/tslibs/nattype'], From 74cf2dd8089fa086b883efb9a4673f7e01b5cb7c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 31 Jan 2018 03:17:05 -0800 Subject: [PATCH 012/217] Centralize Arithmetic Tests (#19471) --- pandas/tests/frame/test_arithmetic.py | 103 ++++++++++ pandas/tests/frame/test_operators.py | 91 +-------- pandas/tests/series/test_arithmetic.py | 211 +++++++++++++++++++- pandas/tests/series/test_datetime_values.py | 11 +- pandas/tests/series/test_operators.py | 61 ------ pandas/tests/series/test_period.py | 116 +---------- 6 files changed, 316 insertions(+), 277 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 3f4e3877a276a..9b99a7b73b82b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1,11 +1,114 @@ # -*- coding: utf-8 -*- +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm +# ------------------------------------------------------------------- +# Comparisons + +class TestFrameComparisons(object): + def test_df_boolean_comparison_error(self): + # GH#4576 + # boolean comparisons with a tuple/list give unexpected results + df = pd.DataFrame(np.arange(6).reshape((3, 2))) + + # not shape compatible + with pytest.raises(ValueError): + df == (2, 2) + with pytest.raises(ValueError): + df == [2, 2] + + def test_df_float_none_comparison(self): + df = pd.DataFrame(np.random.randn(8, 3), index=range(8), + columns=['A', 'B', 'C']) + + with pytest.raises(TypeError): + df.__eq__(None) + + def test_df_string_comparison(self): + df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) + mask_a = df.a > 1 + tm.assert_frame_equal(df[mask_a], df.loc[1:1, :]) + tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :]) + + mask_b = df.b == "foo" + tm.assert_frame_equal(df[mask_b], df.loc[0:0, :]) + tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :]) + + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) + def test_df_flex_cmp_constant_return_types(self, opname): + # GH#15077, non-empty DataFrame + df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) + const = 2 + + result = getattr(df, opname)(const).get_dtype_counts() + tm.assert_series_equal(result, pd.Series([2], ['bool'])) + + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) + def test_df_flex_cmp_constant_return_types_empty(self, opname): + # GH#15077 empty DataFrame + df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) + const = 2 + + empty = df.iloc[:0] + result = getattr(empty, opname)(const).get_dtype_counts() + tm.assert_series_equal(result, pd.Series([2], ['bool'])) + + +# ------------------------------------------------------------------- +# Arithmetic + +class TestFrameArithmetic(object): + + @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') + def test_df_sub_datetime64_not_ns(self): + df = pd.DataFrame(pd.date_range('20130101', periods=3)) + dt64 = np.datetime64('2013-01-01') + assert dt64.dtype == 'datetime64[D]' + res = df - dt64 + expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1), + pd.Timedelta(days=2)]) + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_df_radd_str_invalid(self, dtype, data): + df = pd.DataFrame(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + df + + @pytest.mark.parametrize('dtype', [None, object]) + def test_df_with_dtype_radd_int(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([2, 3, 4], dtype=dtype) + result = 1 + df + tm.assert_frame_equal(result, expected) + result = df + 1 + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_df_with_dtype_radd_nan(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + result = np.nan + df + tm.assert_frame_equal(result, expected) + result = df + np.nan + tm.assert_frame_equal(result, expected) + + def test_df_radd_str(self): + df = pd.DataFrame(['x', np.nan, 'x']) + tm.assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) + tm.assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) + + class TestPeriodFrameArithmetic(object): def test_ops_frame_period(self): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 0bc4a7df6a55b..bdccbec6111d3 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -10,7 +10,7 @@ from numpy import nan, random import numpy as np -from pandas.compat import lrange, range +from pandas.compat import range from pandas import compat from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range) @@ -28,53 +28,6 @@ _check_mixed_int) -class TestDataFrameArithmetic(object): - - @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') - def test_frame_sub_datetime64_not_ns(self): - df = pd.DataFrame(date_range('20130101', periods=3)) - dt64 = np.datetime64('2013-01-01') - assert dt64.dtype == 'datetime64[D]' - res = df - dt64 - expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1), - pd.Timedelta(days=2)]) - tm.assert_frame_equal(res, expected) - - @pytest.mark.parametrize('data', [ - [1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], - ['x', 'y', 1]]) - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_radd_str_invalid(self, dtype, data): - df = DataFrame(data, dtype=dtype) - with pytest.raises(TypeError): - 'foo_' + df - - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_with_dtype_radd_int(self, dtype): - df = pd.DataFrame([1, 2, 3], dtype=dtype) - expected = pd.DataFrame([2, 3, 4], dtype=dtype) - result = 1 + df - assert_frame_equal(result, expected) - result = df + 1 - assert_frame_equal(result, expected) - - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_with_dtype_radd_nan(self, dtype): - df = pd.DataFrame([1, 2, 3], dtype=dtype) - expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) - result = np.nan + df - assert_frame_equal(result, expected) - result = df + np.nan - assert_frame_equal(result, expected) - - def test_frame_radd_str(self): - df = pd.DataFrame(['x', np.nan, 'x']) - assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) - assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) - - class TestDataFrameOperators(TestData): def test_operators(self): @@ -714,22 +667,6 @@ def _test_seq(df, idx_ser, col_ser): exp = DataFrame({'col': [False, True, False]}) assert_frame_equal(result, exp) - def test_return_dtypes_bool_op_costant(self): - # GH15077 - df = DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) - const = 2 - - # not empty DataFrame - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - result = getattr(df, op)(const).get_dtype_counts() - tm.assert_series_equal(result, Series([2], ['bool'])) - - # empty DataFrame - empty = df.iloc[:0] - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - result = getattr(empty, op)(const).get_dtype_counts() - tm.assert_series_equal(result, Series([2], ['bool'])) - def test_dti_tz_convert_to_utc(self): base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz='UTC') @@ -1009,22 +946,6 @@ def test_comparison_protected_from_errstate(self): result = (missing_df < 0).values tm.assert_numpy_array_equal(result, expected) - def test_string_comparison(self): - df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) - mask_a = df.a > 1 - assert_frame_equal(df[mask_a], df.loc[1:1, :]) - assert_frame_equal(df[-mask_a], df.loc[0:0, :]) - - mask_b = df.b == "foo" - assert_frame_equal(df[mask_b], df.loc[0:0, :]) - assert_frame_equal(df[-mask_b], df.loc[1:1, :]) - - def test_float_none_comparison(self): - df = DataFrame(np.random.randn(8, 3), index=lrange(8), - columns=['A', 'B', 'C']) - - pytest.raises(TypeError, df.__eq__, None) - def test_boolean_comparison(self): # GH 4576 @@ -1091,16 +1012,6 @@ def test_boolean_comparison(self): result = df == tup assert_frame_equal(result, expected) - def test_boolean_comparison_error(self): - - # GH 4576 - # boolean comparisons with a tuple/list give unexpected results - df = DataFrame(np.arange(6).reshape((3, 2))) - - # not shape compatible - pytest.raises(ValueError, lambda: df == (2, 2)) - pytest.raises(ValueError, lambda: df == [2, 2]) - def test_combine_generic(self): df1 = self.frame df2 = self.frame.loc[self.frame.index[:-5], ['A', 'B', 'C']] diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index ca558dd6b7cd5..1d9fa9dc15531 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,13 +1,20 @@ # -*- coding: utf-8 -*- -from datetime import timedelta +from datetime import datetime, timedelta import operator import numpy as np +import pytest + +from pandas import Series, Timestamp, Period +from pandas._libs.tslibs.period import IncompatibleFrequency import pandas as pd import pandas.util.testing as tm +# ------------------------------------------------------------------ +# Comparisons + class TestSeriesComparison(object): def test_compare_invalid(self): # GH#8058 @@ -17,8 +24,39 @@ def test_compare_invalid(self): b.name = pd.Timestamp('2000-01-01') tm.assert_series_equal(a / b, 1 / (b / a)) + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) + def test_ser_flex_cmp_return_dtypes(self, opname): + # GH#15115 + ser = Series([1, 3, 2], index=range(3)) + const = 2 + + result = getattr(ser, opname)(const).get_dtype_counts() + tm.assert_series_equal(result, Series([1], ['bool'])) + + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) + def test_ser_flex_cmp_return_dtypes_empty(self, opname): + # GH#15115 empty Series case + ser = Series([1, 3, 2], index=range(3)) + empty = ser.iloc[:0] + const = 2 + + result = getattr(empty, opname)(const).get_dtype_counts() + tm.assert_series_equal(result, Series([1], ['bool'])) + class TestTimestampSeriesComparison(object): + def test_dt64ser_cmp_period_scalar(self): + ser = Series(pd.period_range('2000-01-01', periods=10, freq='D')) + val = Period('2000-01-04', freq='D') + result = ser > val + expected = Series([x > val for x in ser]) + tm.assert_series_equal(result, expected) + + val = ser[5] + result = ser > val + expected = Series([x > val for x in ser]) + tm.assert_series_equal(result, expected) + def test_timestamp_compare_series(self): # make sure we can compare Timestamps on the right AND left hand side # GH#4982 @@ -81,6 +119,170 @@ def test_compare_timedelta_series(self): tm.assert_series_equal(actual, expected) +class TestPeriodSeriesComparisons(object): + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_cmp_series_period_scalar(self, freq): + # GH 13200 + base = Series([Period(x, freq=freq) for x in + ['2011-01', '2011-02', '2011-03', '2011-04']]) + p = Period('2011-02', freq=freq) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base == p, exp) + tm.assert_series_equal(p == base, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base != p, exp) + tm.assert_series_equal(p != base, exp) + + exp = Series([False, False, True, True]) + tm.assert_series_equal(base > p, exp) + tm.assert_series_equal(p < base, exp) + + exp = Series([True, False, False, False]) + tm.assert_series_equal(base < p, exp) + tm.assert_series_equal(p > base, exp) + + exp = Series([False, True, True, True]) + tm.assert_series_equal(base >= p, exp) + tm.assert_series_equal(p <= base, exp) + + exp = Series([True, True, False, False]) + tm.assert_series_equal(base <= p, exp) + tm.assert_series_equal(p >= base, exp) + + # different base freq + msg = "Input has different freq=A-DEC from Period" + with tm.assert_raises_regex(IncompatibleFrequency, msg): + base <= Period('2011', freq='A') + + with tm.assert_raises_regex(IncompatibleFrequency, msg): + Period('2011', freq='A') >= base + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_cmp_series_period_series(self, freq): + # GH#13200 + base = Series([Period(x, freq=freq) for x in + ['2011-01', '2011-02', '2011-03', '2011-04']]) + + ser = Series([Period(x, freq=freq) for x in + ['2011-02', '2011-01', '2011-03', '2011-05']]) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == ser, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != ser, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > ser, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < ser, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= ser, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= ser, exp) + + ser2 = Series([Period(x, freq='A') for x in + ['2011', '2011', '2011', '2011']]) + + # different base freq + msg = "Input has different freq=A-DEC from Period" + with tm.assert_raises_regex(IncompatibleFrequency, msg): + base <= ser2 + + def test_cmp_series_period_series_mixed_freq(self): + # GH#13200 + base = Series([Period('2011', freq='A'), + Period('2011-02', freq='M'), + Period('2013', freq='A'), + Period('2011-04', freq='M')]) + + ser = Series([Period('2012', freq='A'), + Period('2011-01', freq='M'), + Period('2013', freq='A'), + Period('2011-05', freq='M')]) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == ser, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != ser, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > ser, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < ser, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= ser, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= ser, exp) + + +# ------------------------------------------------------------------ +# Arithmetic + +class TestSeriesArithmetic(object): + # Standard, numeric, or otherwise not-Timestamp/Timedelta/Period dtypes + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [Timestamp('2011-01-01'), Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_radd_str_invalid(self, dtype, data): + ser = Series(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + ser + + # TODO: parametrize, better name + def test_object_ser_add_invalid(self): + # invalid ops + obj_ser = tm.makeObjectSeries() + obj_ser.name = 'objects' + with pytest.raises(Exception): + obj_ser + 1 + with pytest.raises(Exception): + obj_ser + np.array(1, dtype=np.int64) + with pytest.raises(Exception): + obj_ser - 1 + with pytest.raises(Exception): + obj_ser - np.array(1, dtype=np.int64) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_nan(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) + + result = np.nan + ser + tm.assert_series_equal(result, expected) + + result = ser + np.nan + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_int(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([2, 3, 4], dtype=dtype) + + result = 1 + ser + tm.assert_series_equal(result, expected) + + result = ser + 1 + tm.assert_series_equal(result, expected) + + def test_series_radd_str(self): + ser = pd.Series(['x', np.nan, 'x']) + tm.assert_series_equal('a' + ser, pd.Series(['ax', np.nan, 'ax'])) + tm.assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa'])) + + class TestPeriodSeriesArithmetic(object): def test_ops_series_timedelta(self): # GH 13043 @@ -134,3 +336,10 @@ def test_timestamp_sub_series(self): np.timedelta64(1, 'D')]) tm.assert_series_equal(ser - ts, delta_series) tm.assert_series_equal(ts - ser, -delta_series) + + def test_dt64ser_sub_datetime_dtype(self): + ts = Timestamp(datetime(1993, 1, 7, 13, 30, 00)) + dt = datetime(1993, 6, 22, 13, 30) + ser = Series([ts]) + result = pd.to_timedelta(np.abs(ser - dt)) + assert result.dtype == 'timedelta64[ns]' diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index b79d8def905af..49b4600b10738 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -11,7 +11,7 @@ from pandas.core.dtypes.common import is_integer_dtype, is_list_like from pandas import (Index, Series, DataFrame, bdate_range, date_range, period_range, timedelta_range, - PeriodIndex, Timestamp, DatetimeIndex, TimedeltaIndex) + PeriodIndex, DatetimeIndex, TimedeltaIndex) import pandas.core.common as com from pandas.util.testing import assert_series_equal @@ -377,15 +377,6 @@ def test_dt_accessor_api(self): s.dt assert not hasattr(s, 'dt') - def test_sub_of_datetime_from_TimeSeries(self): - from pandas.core.tools.timedeltas import to_timedelta - from datetime import datetime - a = Timestamp(datetime(1993, 0o1, 0o7, 13, 30, 00)) - b = datetime(1993, 6, 22, 13, 30) - a = Series([a]) - result = to_timedelta(np.abs(a - b)) - assert result.dtype == 'timedelta64[ns]' - def test_between(self): s = Series(bdate_range('1/1/2000', periods=20).astype(object)) s[::2] = np.nan diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 8feee6e6cff68..05ccb25960b1f 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1686,15 +1686,6 @@ def test_operators_empty_int_corner(self): s2 = Series({'x': 0.}) assert_series_equal(s1 * s2, Series([np.nan], index=['x'])) - def test_invalid_ops(self): - # invalid ops - pytest.raises(Exception, self.objSeries.__add__, 1) - pytest.raises(Exception, self.objSeries.__add__, - np.array(1, dtype=np.int64)) - pytest.raises(Exception, self.objSeries.__sub__, 1) - pytest.raises(Exception, self.objSeries.__sub__, - np.array(1, dtype=np.int64)) - @pytest.mark.parametrize("m", [1, 3, 10]) @pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns']) def test_timedelta64_conversions(self, m, unit): @@ -1817,20 +1808,6 @@ def test_ops_datetimelike_align(self): result = (dt2.to_frame() - dt.to_frame())[0] assert_series_equal(result, expected) - def test_return_dtypes_bool_op_costant(self): - # gh15115 - s = pd.Series([1, 3, 2], index=range(3)) - const = 2 - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - result = getattr(s, op)(const).get_dtype_counts() - tm.assert_series_equal(result, Series([1], ['bool'])) - - # empty Series - empty = s.iloc[:0] - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - result = getattr(empty, op)(const).get_dtype_counts() - tm.assert_series_equal(result, Series([1], ['bool'])) - def test_operators_bitwise(self): # GH 9016: support bitwise op for integer types index = list('bca') @@ -2115,11 +2092,6 @@ def test_series_frame_radd_bug(self): with pytest.raises(TypeError): self.ts + datetime.now() - def test_series_radd_str(self): - ser = pd.Series(['x', np.nan, 'x']) - assert_series_equal('a' + ser, pd.Series(['ax', np.nan, 'ax'])) - assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa'])) - @pytest.mark.parametrize('dtype', [None, object]) def test_series_with_dtype_radd_timedelta(self, dtype): ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), @@ -2133,39 +2105,6 @@ def test_series_with_dtype_radd_timedelta(self, dtype): result = ser + pd.Timedelta('3 days') assert_series_equal(result, expected) - @pytest.mark.parametrize('dtype', [None, object]) - def test_series_with_dtype_radd_int(self, dtype): - ser = pd.Series([1, 2, 3], dtype=dtype) - expected = pd.Series([2, 3, 4], dtype=dtype) - - result = 1 + ser - assert_series_equal(result, expected) - - result = ser + 1 - assert_series_equal(result, expected) - - @pytest.mark.parametrize('dtype', [None, object]) - def test_series_with_dtype_radd_nan(self, dtype): - ser = pd.Series([1, 2, 3], dtype=dtype) - expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) - - result = np.nan + ser - assert_series_equal(result, expected) - - result = ser + np.nan - assert_series_equal(result, expected) - - @pytest.mark.parametrize('data', [ - [1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], - ['x', 'y', 1]]) - @pytest.mark.parametrize('dtype', [None, object]) - def test_series_radd_str_invalid(self, dtype, data): - ser = Series(data, dtype=dtype) - with pytest.raises(TypeError): - 'foo_' + ser - def test_operators_frame(self): # rpow does not work with DataFrame df = DataFrame({'A': self.ts}) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 9d5ef5e51ff20..8ff2071e351d0 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm import pandas.core.indexes.period as period -from pandas import Series, period_range, DataFrame, Period +from pandas import Series, period_range, DataFrame def _permute(obj): @@ -63,17 +63,6 @@ def test_dropna(self): tm.assert_series_equal(s.dropna(), Series([pd.Period('2011-01', freq='M')])) - def test_series_comparison_scalars(self): - val = pd.Period('2000-01-04', freq='D') - result = self.series > val - expected = pd.Series([x > val for x in self.series]) - tm.assert_series_equal(result, expected) - - val = self.series[5] - result = self.series > val - expected = pd.Series([x > val for x in self.series]) - tm.assert_series_equal(result, expected) - def test_between(self): left, right = self.series[[2, 7]] result = self.series.between(left, right) @@ -128,109 +117,6 @@ def test_intercept_astype_object(self): result = df.values.squeeze() assert (result[:, 0] == expected.values).all() - def test_comp_series_period_scalar(self): - # GH 13200 - for freq in ['M', '2M', '3M']: - base = Series([Period(x, freq=freq) for x in - ['2011-01', '2011-02', '2011-03', '2011-04']]) - p = Period('2011-02', freq=freq) - - exp = pd.Series([False, True, False, False]) - tm.assert_series_equal(base == p, exp) - tm.assert_series_equal(p == base, exp) - - exp = pd.Series([True, False, True, True]) - tm.assert_series_equal(base != p, exp) - tm.assert_series_equal(p != base, exp) - - exp = pd.Series([False, False, True, True]) - tm.assert_series_equal(base > p, exp) - tm.assert_series_equal(p < base, exp) - - exp = pd.Series([True, False, False, False]) - tm.assert_series_equal(base < p, exp) - tm.assert_series_equal(p > base, exp) - - exp = pd.Series([False, True, True, True]) - tm.assert_series_equal(base >= p, exp) - tm.assert_series_equal(p <= base, exp) - - exp = pd.Series([True, True, False, False]) - tm.assert_series_equal(base <= p, exp) - tm.assert_series_equal(p >= base, exp) - - # different base freq - msg = "Input has different freq=A-DEC from Period" - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - base <= Period('2011', freq='A') - - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - Period('2011', freq='A') >= base - - def test_comp_series_period_series(self): - # GH 13200 - for freq in ['M', '2M', '3M']: - base = Series([Period(x, freq=freq) for x in - ['2011-01', '2011-02', '2011-03', '2011-04']]) - - s = Series([Period(x, freq=freq) for x in - ['2011-02', '2011-01', '2011-03', '2011-05']]) - - exp = Series([False, False, True, False]) - tm.assert_series_equal(base == s, exp) - - exp = Series([True, True, False, True]) - tm.assert_series_equal(base != s, exp) - - exp = Series([False, True, False, False]) - tm.assert_series_equal(base > s, exp) - - exp = Series([True, False, False, True]) - tm.assert_series_equal(base < s, exp) - - exp = Series([False, True, True, False]) - tm.assert_series_equal(base >= s, exp) - - exp = Series([True, False, True, True]) - tm.assert_series_equal(base <= s, exp) - - s2 = Series([Period(x, freq='A') for x in - ['2011', '2011', '2011', '2011']]) - - # different base freq - msg = "Input has different freq=A-DEC from Period" - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - base <= s2 - - def test_comp_series_period_object(self): - # GH 13200 - base = Series([Period('2011', freq='A'), Period('2011-02', freq='M'), - Period('2013', freq='A'), Period('2011-04', freq='M')]) - - s = Series([Period('2012', freq='A'), Period('2011-01', freq='M'), - Period('2013', freq='A'), Period('2011-05', freq='M')]) - - exp = Series([False, False, True, False]) - tm.assert_series_equal(base == s, exp) - - exp = Series([True, True, False, True]) - tm.assert_series_equal(base != s, exp) - - exp = Series([False, True, False, False]) - tm.assert_series_equal(base > s, exp) - - exp = Series([True, False, False, True]) - tm.assert_series_equal(base < s, exp) - - exp = Series([False, True, True, False]) - tm.assert_series_equal(base >= s, exp) - - exp = Series([True, False, True, True]) - tm.assert_series_equal(base <= s, exp) - def test_align_series(self): rng = period_range('1/1/2000', '1/1/2010', freq='A') ts = Series(np.random.randn(len(rng)), index=rng) From 69f1bdc18026aa024acedcfbe284293baa711e26 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 31 Jan 2018 03:30:21 -0800 Subject: [PATCH 013/217] implement bits of numpy_helper in cython where possible (#19450) --- pandas/_libs/src/numpy_helper.h | 40 --------------- pandas/_libs/src/util.pxd | 89 +++++++++++++++++++++++++++++---- setup.py | 3 +- 3 files changed, 81 insertions(+), 51 deletions(-) diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 6c2029fff8a1a..844be9b292be3 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -18,33 +18,6 @@ The full license is in the LICENSE file, distributed with this software. PANDAS_INLINE npy_int64 get_nat(void) { return NPY_MIN_INT64; } -PANDAS_INLINE int is_integer_object(PyObject* obj) { - return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj); -} - -PANDAS_INLINE int is_float_object(PyObject* obj) { - return (PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating)); -} -PANDAS_INLINE int is_complex_object(PyObject* obj) { - return (PyComplex_Check(obj) || PyArray_IsScalar(obj, ComplexFloating)); -} - -PANDAS_INLINE int is_bool_object(PyObject* obj) { - return (PyBool_Check(obj) || PyArray_IsScalar(obj, Bool)); -} - -PANDAS_INLINE int is_string_object(PyObject* obj) { - return (PyString_Check(obj) || PyUnicode_Check(obj)); -} - -PANDAS_INLINE int is_datetime64_object(PyObject* obj) { - return PyArray_IsScalar(obj, Datetime); -} - -PANDAS_INLINE int is_timedelta64_object(PyObject* obj) { - return PyArray_IsScalar(obj, Timedelta); -} - PANDAS_INLINE int assign_value_1d(PyArrayObject* ap, Py_ssize_t _i, PyObject* v) { npy_intp i = (npy_intp)_i; @@ -80,17 +53,4 @@ void set_array_not_contiguous(PyArrayObject* ao) { ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS); } -// If arr is zerodim array, return a proper array scalar (e.g. np.int64). -// Otherwise, return arr as is. -PANDAS_INLINE PyObject* unbox_if_zerodim(PyObject* arr) { - if (PyArray_IsZeroDim(arr)) { - PyObject* ret; - ret = PyArray_ToScalar(PyArray_DATA(arr), arr); - return ret; - } else { - Py_INCREF(arr); - return arr; - } -} - #endif // PANDAS__LIBS_SRC_NUMPY_HELPER_H_ diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd index be6591a118dc5..cf23df1279f34 100644 --- a/pandas/_libs/src/util.pxd +++ b/pandas/_libs/src/util.pxd @@ -1,24 +1,76 @@ -from numpy cimport ndarray +from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS cimport numpy as cnp +cnp.import_array() + cimport cpython +from cpython cimport PyTypeObject + +cdef extern from "Python.h": + # Note: importing extern-style allows us to declare these as nogil + # functions, whereas `from cpython cimport` does not. + bint PyUnicode_Check(object obj) nogil + bint PyString_Check(object obj) nogil + bint PyBool_Check(object obj) nogil + bint PyFloat_Check(object obj) nogil + bint PyComplex_Check(object obj) nogil + bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + + +cdef extern from "numpy/arrayobject.h": + PyTypeObject PyFloatingArrType_Type + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + PyTypeObject PyComplexFloatingArrType_Type + PyTypeObject PyBoolArrType_Type + + bint PyArray_IsIntegerScalar(obj) nogil + bint PyArray_Check(obj) nogil + +# -------------------------------------------------------------------- +# Type Checking + +cdef inline bint is_string_object(object obj) nogil: + return PyString_Check(obj) or PyUnicode_Check(obj) + + +cdef inline bint is_integer_object(object obj) nogil: + return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) + + +cdef inline bint is_float_object(object obj) nogil: + return (PyFloat_Check(obj) or + (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) + +cdef inline bint is_complex_object(object obj) nogil: + return (PyComplex_Check(obj) or + PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) + + +cdef inline bint is_bool_object(object obj) nogil: + return (PyBool_Check(obj) or + PyObject_TypeCheck(obj, &PyBoolArrType_Type)) + + +cdef inline bint is_timedelta64_object(object obj) nogil: + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef inline bint is_datetime64_object(object obj) nogil: + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + +# -------------------------------------------------------------------- cdef extern from "numpy_helper.h": void set_array_not_contiguous(ndarray ao) - int is_integer_object(object) - int is_float_object(object) - int is_complex_object(object) - int is_bool_object(object) - int is_string_object(object) - int is_datetime64_object(object) - int is_timedelta64_object(object) int assign_value_1d(ndarray, Py_ssize_t, object) except -1 cnp.int64_t get_nat() object get_value_1d(ndarray, Py_ssize_t) char *get_c_string(object) except NULL object char_to_string(char*) - object unbox_if_zerodim(object arr) ctypedef fused numeric: cnp.int8_t @@ -112,3 +164,22 @@ cdef inline bint _checknan(object val): cdef inline bint is_period_object(object val): return getattr(val, '_typ', '_typ') == 'period' + + +cdef inline object unbox_if_zerodim(object arr): + """ + If arr is zerodim array, return a proper array scalar (e.g. np.int64). + Otherwise, return arr as is. + + Parameters + ---------- + arr : object + + Returns + ------- + result : object + """ + if cnp.PyArray_IsZeroDim(arr): + return cnp.PyArray_ToScalar(cnp.PyArray_DATA(arr), arr) + else: + return arr diff --git a/setup.py b/setup.py index 721e6f62bd3e4..27943a776c414 100755 --- a/setup.py +++ b/setup.py @@ -687,8 +687,7 @@ def pxd(name): ext.sources[0] = root + suffix ujson_ext = Extension('pandas._libs.json', - depends=['pandas/_libs/src/ujson/lib/ultrajson.h', - 'pandas/_libs/src/numpy_helper.h'], + depends=['pandas/_libs/src/ujson/lib/ultrajson.h'], sources=(['pandas/_libs/src/ujson/python/ujson.c', 'pandas/_libs/src/ujson/python/objToJSON.c', 'pandas/_libs/src/ujson/python/JSONtoObj.c', From 1c64d7d789d9dd5c120bdbd6dfb99ff05f49fa1b Mon Sep 17 00:00:00 2001 From: Ming Li <14131823+minggli@users.noreply.github.com> Date: Wed, 31 Jan 2018 11:34:12 +0000 Subject: [PATCH 014/217] [#7292] BUG: asfreq / pct_change strange behavior (#19410) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/generic.py | 1 + pandas/tests/frame/test_timeseries.py | 36 +++++++++++++++++++++++++- pandas/tests/series/test_timeseries.py | 31 +++++++++++++++++++++- 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 1890636bc8e1a..4a5f0bda8c692 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -420,6 +420,7 @@ Datetimelike - Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) - Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) +- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) Timezones ^^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6e777281b11e1..bee954aa9bba8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7315,6 +7315,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1) + rs = rs.reindex_like(data) if freq is None: mask = isna(com._values_from_object(self)) np.putmask(rs.values, mask, np.nan) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 3af798acdede5..e6b47fd69cb05 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -108,7 +108,9 @@ def test_pct_change(self): rs = self.tsframe.pct_change(freq='5D') filled = self.tsframe.fillna(method='pad') - assert_frame_equal(rs, filled / filled.shift(freq='5D') - 1) + assert_frame_equal(rs, + (filled / filled.shift(freq='5D') - 1) + .reindex_like(filled)) def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) @@ -120,6 +122,38 @@ def test_pct_change_shift_over_nas(self): edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) + def test_pct_change_periods_freq(self): + # GH 7292 + rs_freq = self.tsframe.pct_change(freq='5B') + rs_periods = self.tsframe.pct_change(5) + assert_frame_equal(rs_freq, rs_periods) + + rs_freq = self.tsframe.pct_change(freq='3B', fill_method=None) + rs_periods = self.tsframe.pct_change(3, fill_method=None) + assert_frame_equal(rs_freq, rs_periods) + + rs_freq = self.tsframe.pct_change(freq='3B', fill_method='bfill') + rs_periods = self.tsframe.pct_change(3, fill_method='bfill') + assert_frame_equal(rs_freq, rs_periods) + + rs_freq = self.tsframe.pct_change(freq='7B', + fill_method='pad', + limit=1) + rs_periods = self.tsframe.pct_change(7, fill_method='pad', limit=1) + assert_frame_equal(rs_freq, rs_periods) + + rs_freq = self.tsframe.pct_change(freq='7B', + fill_method='bfill', + limit=3) + rs_periods = self.tsframe.pct_change(7, fill_method='bfill', limit=3) + assert_frame_equal(rs_freq, rs_periods) + + empty_ts = DataFrame(index=self.tsframe.index, + columns=self.tsframe.columns) + rs_freq = empty_ts.pct_change(freq='14B') + rs_periods = empty_ts.pct_change(14) + assert_frame_equal(rs_freq, rs_periods) + def test_frame_ctor_datetime64_column(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') dates = np.asarray(rng) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 7be801629e387..7a1aff1cc223c 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -344,7 +344,9 @@ def test_pct_change(self): rs = self.ts.pct_change(freq='5D') filled = self.ts.fillna(method='pad') - assert_series_equal(rs, filled / filled.shift(freq='5D') - 1) + assert_series_equal(rs, + (filled / filled.shift(freq='5D') - 1) + .reindex_like(filled)) def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) @@ -353,6 +355,33 @@ def test_pct_change_shift_over_nas(self): expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) assert_series_equal(chg, expected) + def test_pct_change_periods_freq(self): + # GH 7292 + rs_freq = self.ts.pct_change(freq='5B') + rs_periods = self.ts.pct_change(5) + assert_series_equal(rs_freq, rs_periods) + + rs_freq = self.ts.pct_change(freq='3B', fill_method=None) + rs_periods = self.ts.pct_change(3, fill_method=None) + assert_series_equal(rs_freq, rs_periods) + + rs_freq = self.ts.pct_change(freq='3B', fill_method='bfill') + rs_periods = self.ts.pct_change(3, fill_method='bfill') + assert_series_equal(rs_freq, rs_periods) + + rs_freq = self.ts.pct_change(freq='7B', fill_method='pad', limit=1) + rs_periods = self.ts.pct_change(7, fill_method='pad', limit=1) + assert_series_equal(rs_freq, rs_periods) + + rs_freq = self.ts.pct_change(freq='7B', fill_method='bfill', limit=3) + rs_periods = self.ts.pct_change(7, fill_method='bfill', limit=3) + assert_series_equal(rs_freq, rs_periods) + + empty_ts = Series(index=self.ts.index) + rs_freq = empty_ts.pct_change(freq='14B') + rs_periods = empty_ts.pct_change(14) + assert_series_equal(rs_freq, rs_periods) + def test_autocorr(self): # Just run the function corr1 = self.ts.autocorr() From 73c8d237cb5dfa197a00b8dc6a935e9b973d2b63 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Wed, 31 Jan 2018 12:15:14 +0000 Subject: [PATCH 015/217] DEPR: Deprecate from_items (#18529) --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/frame.py | 15 ++++- pandas/io/stata.py | 9 +-- pandas/tests/frame/test_constructors.py | 69 ++++++++++++++------ pandas/tests/frame/test_nonunique_indexes.py | 7 +- pandas/tests/io/parser/common.py | 6 +- pandas/tests/io/test_excel.py | 58 ++++++++-------- pandas/tests/io/test_stata.py | 3 +- 8 files changed, 106 insertions(+), 63 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 4a5f0bda8c692..592c0788070a1 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -329,7 +329,7 @@ Deprecations - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) - +- :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 788b236b0ec59..96d28581cfdd9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -313,7 +313,7 @@ def _constructor(self): _constructor_sliced = Series _deprecations = NDFrame._deprecations | frozenset( - ['sortlevel', 'get_value', 'set_value', 'from_csv']) + ['sortlevel', 'get_value', 'set_value', 'from_csv', 'from_items']) @property def _constructor_expanddim(self): @@ -1246,6 +1246,12 @@ def to_records(self, index=True, convert_datetime64=True): @classmethod def from_items(cls, items, columns=None, orient='columns'): """ + .. deprecated:: 0.23.0 + from_items is deprecated and will be removed in a + future version. Use :meth:`DataFrame.from_dict(dict())` + instead. :meth:`DataFrame.from_dict(OrderedDict(...))` may be used + to preserve the key order. + Convert (key, value) pairs to DataFrame. The keys will be the axis index (usually the columns, but depends on the specified orientation). The values should be arrays or Series. @@ -1266,6 +1272,13 @@ def from_items(cls, items, columns=None, orient='columns'): ------- frame : DataFrame """ + + warnings.warn("from_items is deprecated. Please use " + "DataFrame.from_dict(dict()) instead. " + "DataFrame.from_dict(OrderedDict()) may be used to " + "preserve the key order.", + FutureWarning, stacklevel=2) + keys, values = lzip(*items) if orient == 'columns': diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b409cf20e9a09..0922a4a9c3e9b 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -13,6 +13,7 @@ import datetime import struct import sys +from collections import OrderedDict import numpy as np from dateutil.relativedelta import relativedelta @@ -1571,7 +1572,7 @@ def read(self, nrows=None, convert_dates=None, else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame.from_items(data_formatted) + data = DataFrame.from_dict(OrderedDict(data_formatted)) del data_formatted self._do_convert_missing(data, convert_missing) @@ -1609,7 +1610,7 @@ def read(self, nrows=None, convert_dates=None, convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame.from_items(retyped_data) + data = DataFrame.from_dict(OrderedDict(retyped_data)) if index_col is not None: data = data.set_index(data.pop(index_col)) @@ -1722,7 +1723,7 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist, cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame.from_items(cat_converted_data) + data = DataFrame.from_dict(OrderedDict(cat_converted_data)) return data def data_label(self): @@ -1997,7 +1998,7 @@ def _prepare_categoricals(self, data): data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame.from_items(data_formatted) + return DataFrame.from_dict(OrderedDict(data_formatted)) def _replace_nans(self, data): # return data diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b24ae22162a34..8abd88d8a379c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -871,7 +871,7 @@ def __len__(self, n): # GH 4297 # support Array import array - result = DataFrame.from_items([('A', array.array('i', range(10)))]) + result = DataFrame({'A': array.array('i', range(10))}) expected = DataFrame({'A': list(range(10))}) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -1175,28 +1175,35 @@ def test_constructor_manager_resize(self): def test_constructor_from_items(self): items = [(c, self.frame[c]) for c in self.frame.columns] - recons = DataFrame.from_items(items) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(items) tm.assert_frame_equal(recons, self.frame) # pass some columns - recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) tm.assert_frame_equal(recons, self.frame.loc[:, ['C', 'B', 'A']]) # orient='index' row_items = [(idx, self.mixed_frame.xs(idx)) for idx in self.mixed_frame.index] - - recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, - orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(row_items, + columns=self.mixed_frame.columns, + orient='index') tm.assert_frame_equal(recons, self.mixed_frame) assert recons['A'].dtype == np.float64 with tm.assert_raises_regex(TypeError, "Must pass columns with " "orient='index'"): - DataFrame.from_items(row_items, orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples arr = construct_1d_object_array_from_listlike( @@ -1204,15 +1211,19 @@ def test_constructor_from_items(self): self.mixed_frame['foo'] = arr row_items = [(idx, list(self.mixed_frame.xs(idx))) for idx in self.mixed_frame.index] - recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, - orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(row_items, + columns=self.mixed_frame.columns, + orient='index') tm.assert_frame_equal(recons, self.mixed_frame) assert isinstance(recons['foo'][0], tuple) - rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], - orient='index', - columns=['one', 'two', 'three']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], + orient='index', + columns=['one', 'two', 'three']) xp = DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', 'B'], columns=['one', 'two', 'three']) tm.assert_frame_equal(rs, xp) @@ -1222,12 +1233,28 @@ def test_constructor_from_items_scalars(self): with tm.assert_raises_regex(ValueError, r'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): - DataFrame.from_items([('A', 1), ('B', 4)]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', 1), ('B', 4)]) with tm.assert_raises_regex(ValueError, r'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): - DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + orient='index') + + def test_from_items_deprecation(self): + # GH 17320 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])]) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], + columns=['col1', 'col2', 'col3'], orient='index') def test_constructor_mix_series_nonseries(self): @@ -1256,13 +1283,13 @@ def test_constructor_column_duplicates(self): tm.assert_frame_equal(df, edf) - idf = DataFrame.from_items( - [('a', [8]), ('a', [5])], columns=['a', 'a']) + idf = DataFrame.from_records([(8, 5)], + columns=['a', 'a']) + tm.assert_frame_equal(idf, edf) - pytest.raises(ValueError, DataFrame.from_items, - [('a', [8]), ('a', [5]), ('b', [6])], - columns=['b', 'a', 'a']) + pytest.raises(ValueError, DataFrame.from_dict, + OrderedDict([('b', 8), ('a', 5), ('a', 6)])) def test_constructor_empty_with_string_dtype(self): # GH 9428 diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index f0a21cde4fbd9..36465db78361f 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -214,9 +214,10 @@ def check(result, expected=None): for index in [df.index, pd.Index(list('edcba'))]: this_df = df.copy() expected_ser = pd.Series(index.values, index=this_df.index) - expected_df = DataFrame.from_items([('A', expected_ser), - ('B', this_df['B']), - ('A', expected_ser)]) + expected_df = DataFrame({'A': expected_ser, + 'B': this_df['B'], + 'A': expected_ser}, + columns=['A', 'B', 'A']) this_df['A'] = index check(this_df, expected_df) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 8525cb42c2455..f677b356a77a5 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -8,6 +8,7 @@ import re import sys from datetime import datetime +from collections import OrderedDict import pytest import numpy as np @@ -924,8 +925,9 @@ def test_float_parser(self): def test_scientific_no_exponent(self): # see gh-12215 - df = DataFrame.from_items([('w', ['2e']), ('x', ['3E']), - ('y', ['42e']), ('z', ['632E'])]) + df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']), + ('y', ['42e']), + ('z', ['632E'])])) data = df.to_csv(index=False) for prec in self.float_precision_choices: df_roundtrip = self.read_csv( diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index efbabcfd8fc4c..ebb8424b78ed4 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -6,6 +6,7 @@ from distutils.version import LooseVersion from functools import partial from warnings import catch_warnings +from collections import OrderedDict import numpy as np import pytest @@ -315,7 +316,7 @@ def test_excel_table(self): def test_reader_special_dtypes(self): - expected = DataFrame.from_items([ + expected = DataFrame.from_dict(OrderedDict([ ("IntCol", [1, 2, -3, 4, 0]), ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), ("BoolCol", [True, False, True, True, False]), @@ -325,8 +326,7 @@ def test_reader_special_dtypes(self): ("DateCol", [datetime(2013, 10, 30), datetime(2013, 10, 31), datetime(1905, 1, 1), datetime(2013, 12, 14), datetime(2015, 3, 14)]) - ]) - + ])) basename = 'test_types' # should read in correctly and infer types @@ -363,12 +363,12 @@ def test_reader_converters(self): basename = 'test_converters' - expected = DataFrame.from_items([ + expected = DataFrame.from_dict(OrderedDict([ ("IntCol", [1, 2, -3, -1000, 0]), ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]), ("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']), ("StrCol", ['1', np.nan, '3', '4', '5']), - ]) + ])) converters = {'IntCol': lambda x: int(x) if x != '' else -1000, 'FloatCol': lambda x: 10 * x if x else np.nan, @@ -718,32 +718,30 @@ def test_reader_seconds(self): if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): # Xlrd >= 0.9.3 can handle Excel milliseconds. - expected = DataFrame.from_items([("Time", - [time(1, 2, 3), - time(2, 45, 56, 100000), - time(4, 29, 49, 200000), - time(6, 13, 42, 300000), - time(7, 57, 35, 400000), - time(9, 41, 28, 500000), - time(11, 25, 21, 600000), - time(13, 9, 14, 700000), - time(14, 53, 7, 800000), - time(16, 37, 0, 900000), - time(18, 20, 54)])]) + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54)]}) else: # Xlrd < 0.9.3 rounds Excel milliseconds. - expected = DataFrame.from_items([("Time", - [time(1, 2, 3), - time(2, 45, 56), - time(4, 29, 49), - time(6, 13, 42), - time(7, 57, 35), - time(9, 41, 29), - time(11, 25, 22), - time(13, 9, 15), - time(14, 53, 8), - time(16, 37, 1), - time(18, 20, 54)])]) + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56), + time(4, 29, 49), + time(6, 13, 42), + time(7, 57, 35), + time(9, 41, 29), + time(11, 25, 22), + time(13, 9, 15), + time(14, 53, 8), + time(16, 37, 1), + time(18, 20, 54)]}) actual = self.get_exceldf('times_1900', 'Sheet1') tm.assert_frame_equal(actual, expected) @@ -1988,7 +1986,7 @@ def test_datetimes(self): datetime(2013, 1, 13, 18, 20, 52)] with ensure_clean(self.ext) as path: - write_frame = DataFrame.from_items([('A', datetimes)]) + write_frame = DataFrame({'A': datetimes}) write_frame.to_excel(path, 'Sheet1') read_frame = read_excel(path, 'Sheet1', header=0) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index d0d7f881b37d0..89d76061329a3 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -8,6 +8,7 @@ import warnings from datetime import datetime from distutils.version import LooseVersion +from collections import OrderedDict import numpy as np import pandas as pd @@ -945,7 +946,7 @@ def test_categorical_order(self, file): cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) - expected = DataFrame.from_items(cols) + expected = DataFrame.from_dict(OrderedDict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) From d32f0c28a85e4f81031e19028bc0fc9b3786c44d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 31 Jan 2018 11:00:26 -0600 Subject: [PATCH 016/217] BUG: Fixed accessor for Categorical[Datetime] (#19469) * BUG: Fixed accessor for Categorical[Datetime] * Fixup --- pandas/core/indexes/accessors.py | 5 ++++- pandas/tests/series/test_datetime_values.py | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index d40230386216c..c5b300848876e 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -72,9 +72,12 @@ def _delegate_property_get(self, name): # blow up if we operate on categories if self.orig is not None: result = take_1d(result, self.orig.cat.codes) + index = self.orig.index + else: + index = self.index # return the result as a Series, which is by definition a copy - result = Series(result, index=self.index, name=self.name) + result = Series(result, index=index, name=self.name) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ("modifications to a property of a datetimelike " diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 49b4600b10738..93c8ebc5f05df 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -259,6 +259,14 @@ def f(): pytest.raises(com.SettingWithCopyError, f) + def test_dt_namespace_accessor_categorical(self): + # GH 19468 + dti = DatetimeIndex(['20171111', '20181212']).repeat(2) + s = Series(pd.Categorical(dti), name='foo') + result = s.dt.year + expected = Series([2017, 2017, 2018, 2018], name='foo') + tm.assert_series_equal(result, expected) + def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(date_range('20130101', periods=5, freq='D')) From 178501039433f0ae0299aeb04ec8e855f025a182 Mon Sep 17 00:00:00 2001 From: Tommy <10076072+tommyod@users.noreply.github.com> Date: Thu, 1 Feb 2018 00:54:15 +0100 Subject: [PATCH 017/217] DOC: Spellcheck of categorical.rst and visualization.rst (#19428) --- doc/source/categorical.rst | 187 ++++++++++++++++++----------------- doc/source/visualization.rst | 130 ++++++++++++++---------- 2 files changed, 176 insertions(+), 141 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 7364167611730..efcc04d688334 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -19,10 +19,11 @@ Categorical Data This is an introduction to pandas categorical data type, including a short comparison with R's ``factor``. -`Categoricals` are a pandas data type, which correspond to categorical variables in -statistics: a variable, which can take on only a limited, and usually fixed, -number of possible values (`categories`; `levels` in R). Examples are gender, social class, -blood types, country affiliations, observation time or ratings via Likert scales. +`Categoricals` are a pandas data type corresponding to categorical variables in +statistics. A categorical variable takes on a limited, and usually fixed, +number of possible values (`categories`; `levels` in R). Examples are gender, +social class, blood type, country affiliation, observation time or rating via +Likert scales. In contrast to statistical categorical variables, categorical data might have an order (e.g. 'strongly agree' vs 'agree' or 'first observation' vs. 'second observation'), but numerical @@ -48,16 +49,16 @@ See also the :ref:`API docs on categoricals`. Object Creation --------------- -Categorical `Series` or columns in a `DataFrame` can be created in several ways: +Categorical ``Series`` or columns in a ``DataFrame`` can be created in several ways: -By specifying ``dtype="category"`` when constructing a `Series`: +By specifying ``dtype="category"`` when constructing a ``Series``: .. ipython:: python s = pd.Series(["a","b","c","a"], dtype="category") s -By converting an existing `Series` or column to a ``category`` dtype: +By converting an existing ``Series`` or column to a ``category`` dtype: .. ipython:: python @@ -65,18 +66,17 @@ By converting an existing `Series` or column to a ``category`` dtype: df["B"] = df["A"].astype('category') df -By using some special functions: +By using special functions, such as :func:`~pandas.cut`, which groups data into +discrete bins. See the :ref:`example on tiling ` in the docs. .. ipython:: python df = pd.DataFrame({'value': np.random.randint(0, 100, 20)}) - labels = [ "{0} - {1}".format(i, i + 9) for i in range(0, 100, 10) ] + labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels) df.head(10) -See :ref:`documentation ` for :func:`~pandas.cut`. - By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to a `DataFrame`. .. ipython:: python @@ -89,10 +89,11 @@ By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to df["B"] = raw_cat df -Anywhere above we passed a keyword ``dtype='category'``, we used the default behavior of +In the examples above where we passed ``dtype='category'``, we used the default +behavior: -1. categories are inferred from the data -2. categories are unordered. +1. Categories are inferred from the data. +2. Categories are unordered. To control those behaviors, instead of passing ``'category'``, use an instance of :class:`~pandas.api.types.CategoricalDtype`. @@ -123,8 +124,8 @@ Categorical data has a specific ``category`` :ref:`dtype `: In contrast to R's `factor` function, there is currently no way to assign/change labels at creation time. Use `categories` to change the categories after creation time. -To get back to the original Series or `numpy` array, use ``Series.astype(original_dtype)`` or -``np.asarray(categorical)``: +To get back to the original ``Series`` or NumPy array, use +``Series.astype(original_dtype)`` or ``np.asarray(categorical)``: .. ipython:: python @@ -135,8 +136,9 @@ To get back to the original Series or `numpy` array, use ``Series.astype(origina s2.astype(str) np.asarray(s2) -If you have already `codes` and `categories`, you can use the :func:`~pandas.Categorical.from_codes` -constructor to save the factorize step during normal constructor mode: +If you already have `codes` and `categories`, you can use the +:func:`~pandas.Categorical.from_codes` constructor to save the factorize step +during normal constructor mode: .. ipython:: python @@ -171,7 +173,7 @@ by default. A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas expects a `dtype`. For example :func:`pandas.read_csv`, -:func:`pandas.DataFrame.astype`, or in the Series constructor. +:func:`pandas.DataFrame.astype`, or in the ``Series`` constructor. .. note:: @@ -185,8 +187,8 @@ Equality Semantics ~~~~~~~~~~~~~~~~~~ Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal -whenever they have the same categories and orderedness. When comparing two -unordered categoricals, the order of the ``categories`` is not considered +whenever they have the same categories and order. When comparing two +unordered categoricals, the order of the ``categories`` is not considered. .. ipython:: python @@ -198,7 +200,7 @@ unordered categoricals, the order of the ``categories`` is not considered # Unequal, since the second CategoricalDtype is ordered c1 == CategoricalDtype(['a', 'b', 'c'], ordered=True) -All instances of ``CategoricalDtype`` compare equal to the string ``'category'`` +All instances of ``CategoricalDtype`` compare equal to the string ``'category'``. .. ipython:: python @@ -215,8 +217,8 @@ All instances of ``CategoricalDtype`` compare equal to the string ``'category'`` Description ----------- -Using ``.describe()`` on categorical data will produce similar output to a `Series` or -`DataFrame` of type ``string``. +Using :meth:`~DataFrame.describe` on categorical data will produce similar +output to a ``Series`` or ``DataFrame`` of type ``string``. .. ipython:: python @@ -230,10 +232,10 @@ Using ``.describe()`` on categorical data will produce similar output to a `Seri Working with categories ----------------------- -Categorical data has a `categories` and a `ordered` property, which list their possible values and -whether the ordering matters or not. These properties are exposed as ``s.cat.categories`` and -``s.cat.ordered``. If you don't manually specify categories and ordering, they are inferred from the -passed in values. +Categorical data has a `categories` and a `ordered` property, which list their +possible values and whether the ordering matters or not. These properties are +exposed as ``s.cat.categories`` and ``s.cat.ordered``. If you don't manually +specify categories and ordering, they are inferred from the passed arguments. .. ipython:: python @@ -251,13 +253,13 @@ It's also possible to pass in the categories in a specific order: .. note:: - New categorical data are NOT automatically ordered. You must explicitly pass ``ordered=True`` to - indicate an ordered ``Categorical``. + New categorical data are **not** automatically ordered. You must explicitly + pass ``ordered=True`` to indicate an ordered ``Categorical``. .. note:: - The result of ``Series.unique()`` is not always the same as ``Series.cat.categories``, + The result of :meth:`~Series.unique` is not always the same as ``Series.cat.categories``, because ``Series.unique()`` has a couple of guarantees, namely that it returns categories in the order of appearance, and it only includes values that are actually present. @@ -275,8 +277,10 @@ It's also possible to pass in the categories in a specific order: Renaming categories ~~~~~~~~~~~~~~~~~~~ -Renaming categories is done by assigning new values to the ``Series.cat.categories`` property or -by using the :func:`Categorical.rename_categories` method: +Renaming categories is done by assigning new values to the +``Series.cat.categories`` property or by using the +:meth:`~pandas.Categorical.rename_categories` method: + .. ipython:: python @@ -296,8 +300,8 @@ by using the :func:`Categorical.rename_categories` method: .. note:: - Be aware that assigning new categories is an inplace operations, while most other operation - under ``Series.cat`` per default return a new Series of dtype `category`. + Be aware that assigning new categories is an inplace operation, while most other operations + under ``Series.cat`` per default return a new ``Series`` of dtype `category`. Categories must be unique or a `ValueError` is raised: @@ -320,7 +324,8 @@ Categories must also not be ``NaN`` or a `ValueError` is raised: Appending new categories ~~~~~~~~~~~~~~~~~~~~~~~~ -Appending categories can be done by using the :func:`Categorical.add_categories` method: +Appending categories can be done by using the +:meth:`~pandas.Categorical.add_categories` method: .. ipython:: python @@ -331,8 +336,9 @@ Appending categories can be done by using the :func:`Categorical.add_categories` Removing categories ~~~~~~~~~~~~~~~~~~~ -Removing categories can be done by using the :func:`Categorical.remove_categories` method. Values -which are removed are replaced by ``np.nan``.: +Removing categories can be done by using the +:meth:`~pandas.Categorical.remove_categories` method. Values which are removed +are replaced by ``np.nan``.: .. ipython:: python @@ -353,8 +359,10 @@ Removing unused categories can also be done: Setting categories ~~~~~~~~~~~~~~~~~~ -If you want to do remove and add new categories in one step (which has some speed advantage), -or simply set the categories to a predefined scale, use :func:`Categorical.set_categories`. +If you want to do remove and add new categories in one step (which has some +speed advantage), or simply set the categories to a predefined scale, +use :meth:`~pandas.Categorical.set_categories`. + .. ipython:: python @@ -366,7 +374,7 @@ or simply set the categories to a predefined scale, use :func:`Categorical.set_c .. note:: Be aware that :func:`Categorical.set_categories` cannot know whether some category is omitted intentionally or because it is misspelled or (under Python3) due to a type difference (e.g., - numpys S1 dtype and Python strings). This can result in surprising behaviour! + NumPy S1 dtype and Python strings). This can result in surprising behaviour! Sorting and Order ----------------- @@ -374,7 +382,7 @@ Sorting and Order .. _categorical.sort: If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a -meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a `TypeError`. +meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a ``TypeError``. .. ipython:: python @@ -411,8 +419,8 @@ This is even true for strings and numeric data: Reordering ~~~~~~~~~~ -Reordering the categories is possible via the :func:`Categorical.reorder_categories` and -the :func:`Categorical.set_categories` methods. For :func:`Categorical.reorder_categories`, all +Reordering the categories is possible via the :meth:`Categorical.reorder_categories` and +the :meth:`Categorical.set_categories` methods. For :meth:`Categorical.reorder_categories`, all old categories must be included in the new categories and no new categories are allowed. This will necessarily make the sort order the same as the categories order. @@ -428,16 +436,16 @@ necessarily make the sort order the same as the categories order. .. note:: Note the difference between assigning new categories and reordering the categories: the first - renames categories and therefore the individual values in the `Series`, but if the first + renames categories and therefore the individual values in the ``Series``, but if the first position was sorted last, the renamed value will still be sorted last. Reordering means that the way values are sorted is different afterwards, but not that individual values in the - `Series` are changed. + ``Series`` are changed. .. note:: - If the `Categorical` is not ordered, ``Series.min()`` and ``Series.max()`` will raise + If the ``Categorical`` is not ordered, :meth:`Series.min` and :meth:`Series.max` will raise ``TypeError``. Numeric operations like ``+``, ``-``, ``*``, ``/`` and operations based on them - (e.g. ``Series.median()``, which would need to compute the mean between two values if the length + (e.g. :meth:`Series.median`, which would need to compute the mean between two values if the length of an array is even) do not work and raise a ``TypeError``. Multi Column Sorting @@ -464,19 +472,19 @@ Comparisons Comparing categorical data with other objects is possible in three cases: - * comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array, + * Comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array, ...) of the same length as the categorical data. - * all comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to + * All comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to another categorical Series, when ``ordered==True`` and the `categories` are the same. - * all comparisons of a categorical data to a scalar. + * All comparisons of a categorical data to a scalar. All other comparisons, especially "non-equality" comparisons of two categoricals with different -categories or a categorical with any list-like object, will raise a TypeError. +categories or a categorical with any list-like object, will raise a ``TypeError``. .. note:: - Any "non-equality" comparisons of categorical data with a `Series`, `np.array`, `list` or - categorical data with different categories or ordering will raise an `TypeError` because custom + Any "non-equality" comparisons of categorical data with a ``Series``, ``np.array``, ``list`` or + categorical data with different categories or ordering will raise a ``TypeError`` because custom categories ordering could be interpreted in two ways: one with taking into account the ordering and one without. @@ -546,11 +554,11 @@ When you compare two unordered categoricals with the same categories, the order Operations ---------- -Apart from ``Series.min()``, ``Series.max()`` and ``Series.mode()``, the following operations are -possible with categorical data: +Apart from :meth:`Series.min`, :meth:`Series.max` and :meth:`Series.mode`, the +following operations are possible with categorical data: -`Series` methods like `Series.value_counts()` will use all categories, even if some categories are not -present in the data: +``Series`` methods like :meth:`Series.value_counts` will use all categories, +even if some categories are not present in the data: .. ipython:: python @@ -588,8 +596,8 @@ that only values already in `categories` can be assigned. Getting ~~~~~~~ -If the slicing operation returns either a `DataFrame` or a column of type `Series`, -the ``category`` dtype is preserved. +If the slicing operation returns either a ``DataFrame`` or a column of type +``Series``, the ``category`` dtype is preserved. .. ipython:: python @@ -602,8 +610,8 @@ the ``category`` dtype is preserved. df.loc["h":"j","cats"] df[df["cats"] == "b"] -An example where the category type is not preserved is if you take one single row: the -resulting `Series` is of dtype ``object``: +An example where the category type is not preserved is if you take one single +row: the resulting ``Series`` is of dtype ``object``: .. ipython:: python @@ -620,10 +628,11 @@ of length "1". df.at["h","cats"] # returns a string .. note:: - This is a difference to R's `factor` function, where ``factor(c(1,2,3))[1]`` + The is in contrast to R's `factor` function, where ``factor(c(1,2,3))[1]`` returns a single value `factor`. -To get a single value `Series` of type ``category`` pass in a list with a single value: +To get a single value ``Series`` of type ``category``, you pass in a list with +a single value: .. ipython:: python @@ -632,8 +641,8 @@ To get a single value `Series` of type ``category`` pass in a list with a single String and datetime accessors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The accessors ``.dt`` and ``.str`` will work if the ``s.cat.categories`` are of an appropriate -type: +The accessors ``.dt`` and ``.str`` will work if the ``s.cat.categories`` are of +an appropriate type: .. ipython:: python @@ -676,8 +685,8 @@ That means, that the returned values from methods and properties on the accessor Setting ~~~~~~~ -Setting values in a categorical column (or `Series`) works as long as the value is included in the -`categories`: +Setting values in a categorical column (or ``Series``) works as long as the +value is included in the `categories`: .. ipython:: python @@ -704,7 +713,7 @@ Setting values by assigning categorical data will also check that the `categorie except ValueError as e: print("ValueError: " + str(e)) -Assigning a `Categorical` to parts of a column of other types will use the values: +Assigning a ``Categorical`` to parts of a column of other types will use the values: .. ipython:: python @@ -719,7 +728,7 @@ Assigning a `Categorical` to parts of a column of other types will use the value Merging ~~~~~~~ -You can concat two `DataFrames` containing categorical data together, +You can concat two ``DataFrames`` containing categorical data together, but the categories of these categoricals need to be the same: .. ipython:: python @@ -731,7 +740,7 @@ but the categories of these categoricals need to be the same: res res.dtypes -In this case the categories are not the same and so an error is raised: +In this case the categories are not the same, and therefore an error is raised: .. ipython:: python @@ -754,10 +763,10 @@ Unioning .. versionadded:: 0.19.0 -If you want to combine categoricals that do not necessarily have -the same categories, the ``union_categoricals`` function will -combine a list-like of categoricals. The new categories -will be the union of the categories being combined. +If you want to combine categoricals that do not necessarily have the same +categories, the :func:`~pandas.api.types.union_categoricals` function will +combine a list-like of categoricals. The new categories will be the union of +the categories being combined. .. ipython:: python @@ -805,8 +814,9 @@ using the ``ignore_ordered=True`` argument. b = pd.Categorical(["c", "b", "a"], ordered=True) union_categoricals([a, b], ignore_order=True) -``union_categoricals`` also works with a ``CategoricalIndex``, or ``Series`` containing -categorical data, but note that the resulting array will always be a plain ``Categorical`` +:func:`~pandas.api.types.union_categoricals` also works with a +``CategoricalIndex``, or ``Series`` containing categorical data, but note that +the resulting array will always be a plain ``Categorical``: .. ipython:: python @@ -956,7 +966,7 @@ Differences to R's `factor` The following differences to R's factor functions can be observed: -* R's `levels` are named `categories` +* R's `levels` are named `categories`. * R's `levels` are always of type string, while `categories` in pandas can be of any dtype. * It's not possible to specify labels at creation time. Use ``s.cat.rename_categories(new_labels)`` afterwards. @@ -1009,10 +1019,10 @@ an ``object`` dtype is a constant times the length of the data. `Categorical` is not a `numpy` array ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Currently, categorical data and the underlying `Categorical` is implemented as a python -object and not as a low-level `numpy` array dtype. This leads to some problems. +Currently, categorical data and the underlying ``Categorical`` is implemented as a Python +object and not as a low-level NumPy array dtype. This leads to some problems. -`numpy` itself doesn't know about the new `dtype`: +NumPy itself doesn't know about the new `dtype`: .. ipython:: python @@ -1041,7 +1051,7 @@ To check if a Series contains Categorical data, use ``hasattr(s, 'cat')``: hasattr(pd.Series(['a'], dtype='category'), 'cat') hasattr(pd.Series(['a']), 'cat') -Using `numpy` functions on a `Series` of type ``category`` should not work as `Categoricals` +Using NumPy functions on a ``Series`` of type ``category`` should not work as `Categoricals` are not numeric data (even in the case that ``.categories`` is numeric). .. ipython:: python @@ -1080,7 +1090,7 @@ and allows efficient indexing and storage of an index with a large number of dup See the :ref:`advanced indexing docs ` for a more detailed explanation. -Setting the index will create a ``CategoricalIndex`` +Setting the index will create a ``CategoricalIndex``: .. ipython:: python @@ -1095,8 +1105,9 @@ Setting the index will create a ``CategoricalIndex`` Side Effects ~~~~~~~~~~~~ -Constructing a `Series` from a `Categorical` will not copy the input `Categorical`. This -means that changes to the `Series` will in most cases change the original `Categorical`: +Constructing a ``Series`` from a ``Categorical`` will not copy the input +``Categorical``. This means that changes to the ``Series`` will in most cases +change the original ``Categorical``: .. ipython:: python @@ -1109,7 +1120,7 @@ means that changes to the `Series` will in most cases change the original `Categ df["cat"].cat.categories = [1,2,3,4,5] cat -Use ``copy=True`` to prevent such a behaviour or simply don't reuse `Categoricals`: +Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``: .. ipython:: python @@ -1120,6 +1131,6 @@ Use ``copy=True`` to prevent such a behaviour or simply don't reuse `Categorical cat .. note:: - This also happens in some cases when you supply a `numpy` array instead of a `Categorical`: - using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behaviour, while using + This also happens in some cases when you supply a NumPy array instead of a ``Categorical``: + using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using a string array (e.g. ``np.array(["a","b","c","a"])``) will not. diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index cbd17493beb7e..ee93f06fbc958 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -37,7 +37,8 @@ libraries that go beyond the basics documented here. Basic Plotting: ``plot`` ------------------------ -See the :ref:`cookbook` for some advanced strategies +We will demonstrate the basics, see the :ref:`cookbook` for +some advanced strategies. The ``plot`` method on Series and DataFrame is just a simple wrapper around :meth:`plt.plot() `: @@ -94,7 +95,8 @@ You can plot one column versus another using the `x` and `y` keywords in .. note:: - For more formatting and styling options, see :ref:`below `. + For more formatting and styling options, see + :ref:`formatting ` below. .. ipython:: python :suppress: @@ -107,14 +109,13 @@ Other Plots ----------- Plotting methods allow for a handful of plot styles other than the -default Line plot. These methods can be provided as the ``kind`` -keyword argument to :meth:`~DataFrame.plot`. -These include: +default line plot. These methods can be provided as the ``kind`` +keyword argument to :meth:`~DataFrame.plot`, and include: * :ref:`'bar' ` or :ref:`'barh' ` for bar plots * :ref:`'hist' ` for histogram * :ref:`'box' ` for boxplot -* :ref:`'kde' ` or ``'density'`` for density plots +* :ref:`'kde' ` or :ref:`'density' ` for density plots * :ref:`'area' ` for area plots * :ref:`'scatter' ` for scatter plots * :ref:`'hexbin' ` for hexagonal bin plots @@ -220,7 +221,7 @@ To get horizontal bar plots, use the ``barh`` method: Histograms ~~~~~~~~~~ -Histogram can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods. +Histograms can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods. .. ipython:: python @@ -238,7 +239,8 @@ Histogram can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Serie plt.close('all') -Histogram can be stacked by ``stacked=True``. Bin size can be changed by ``bins`` keyword. +A histogram can be stacked using ``stacked=True``. Bin size can be changed +using the ``bins`` keyword. .. ipython:: python @@ -252,7 +254,9 @@ Histogram can be stacked by ``stacked=True``. Bin size can be changed by ``bins` plt.close('all') -You can pass other keywords supported by matplotlib ``hist``. For example, horizontal and cumulative histogram can be drawn by ``orientation='horizontal'`` and ``cumulative=True``. +You can pass other keywords supported by matplotlib ``hist``. For example, +horizontal and cumulative histograms can be drawn by +``orientation='horizontal'`` and ``cumulative=True``. .. ipython:: python @@ -463,7 +467,7 @@ keyword, will affect the output type as well: ``'both'`` Yes Series of namedtuples ================ ======= ========================== -``Groupby.boxplot`` always returns a Series of ``return_type``. +``Groupby.boxplot`` always returns a ``Series`` of ``return_type``. .. ipython:: python :okwarning: @@ -481,7 +485,9 @@ keyword, will affect the output type as well: plt.close('all') -Compare to: +The subplots above are split by the numeric columns first, then the value of +the ``g`` column. Below the subplots are first split by the value of ``g``, +then by the numeric columns. .. ipython:: python :okwarning: @@ -536,8 +542,8 @@ Scatter Plot ~~~~~~~~~~~~ Scatter plot can be drawn by using the :meth:`DataFrame.plot.scatter` method. -Scatter plot requires numeric columns for x and y axis. -These can be specified by ``x`` and ``y`` keywords each. +Scatter plot requires numeric columns for the x and y axes. +These can be specified by the ``x`` and ``y`` keywords. .. ipython:: python :suppress: @@ -581,8 +587,9 @@ each point: plt.close('all') -You can pass other keywords supported by matplotlib ``scatter``. -Below example shows a bubble chart using a dataframe column values as bubble size. +You can pass other keywords supported by matplotlib +:meth:`scatter `. The example below shows a +bubble chart using a column of the ``DataFrame`` as the bubble size. .. ipython:: python @@ -631,7 +638,7 @@ You can specify alternative aggregations by passing values to the ``C`` and and ``reduce_C_function`` is a function of one argument that reduces all the values in a bin to a single number (e.g. ``mean``, ``max``, ``sum``, ``std``). In this example the positions are given by columns ``a`` and ``b``, while the value is -given by column ``z``. The bins are aggregated with numpy's ``max`` function. +given by column ``z``. The bins are aggregated with NumPy's ``max`` function. .. ipython:: python :suppress: @@ -685,14 +692,16 @@ A ``ValueError`` will be raised if there are any negative values in your data. plt.close('all') -For pie plots it's best to use square figures, one's with an equal aspect ratio. You can create the -figure with equal width and height, or force the aspect ratio to be equal after plotting by -calling ``ax.set_aspect('equal')`` on the returned ``axes`` object. +For pie plots it's best to use square figures, i.e. a figure aspect ratio 1. +You can create the figure with equal width and height, or force the aspect ratio +to be equal after plotting by calling ``ax.set_aspect('equal')`` on the returned +``axes`` object. -Note that pie plot with :class:`DataFrame` requires that you either specify a target column by the ``y`` -argument or ``subplots=True``. When ``y`` is specified, pie plot of selected column -will be drawn. If ``subplots=True`` is specified, pie plots for each column are drawn as subplots. -A legend will be drawn in each pie plots by default; specify ``legend=False`` to hide it. +Note that pie plot with :class:`DataFrame` requires that you either specify a +target column by the ``y`` argument or ``subplots=True``. When ``y`` is +specified, pie plot of selected column will be drawn. If ``subplots=True`` is +specified, pie plots for each column are drawn as subplots. A legend will be +drawn in each pie plots by default; specify ``legend=False`` to hide it. .. ipython:: python :suppress: @@ -762,7 +771,7 @@ See the `matplotlib pie documentation `_ +for more information. By coloring these curves differently for each class it is possible to visualize data clustering. Curves belonging to samples of the same class will usually be closer together and form larger structures. @@ -883,8 +893,10 @@ of the same class will usually be closer together and form larger structures. Parallel Coordinates ~~~~~~~~~~~~~~~~~~~~ -Parallel coordinates is a plotting technique for plotting multivariate data. -It allows one to see clusters in data and to estimate other statistics visually. +Parallel coordinates is a plotting technique for plotting multivariate data, +see the `Wikipedia entry`_ +for an introduction. +Parallel coordinates allows one to see clusters in data and to estimate other statistics visually. Using parallel coordinates points are represented as connected line segments. Each vertical line represents one attribute. One set of connected line segments represents one data point. Points that tend to cluster will appear closer together. @@ -912,7 +924,9 @@ Lag Plot Lag plots are used to check if a data set or time series is random. Random data should not exhibit any structure in the lag plot. Non-random structure -implies that the underlying data are not random. +implies that the underlying data are not random. The ``lag`` argument may +be passed, and when ``lag=1`` the plot is essentially ``data[:-1]`` vs. +``data[1:]``. .. ipython:: python :suppress: @@ -947,7 +961,9 @@ If time series is random, such autocorrelations should be near zero for any and all time-lag separations. If time series is non-random then one or more of the autocorrelations will be significantly non-zero. The horizontal lines displayed in the plot correspond to 95% and 99% confidence bands. The dashed line is 99% -confidence band. +confidence band. See the +`Wikipedia entry`_ for more about +autocorrelation plots. .. ipython:: python :suppress: @@ -1016,6 +1032,8 @@ unit interval). The point in the plane, where our sample settles to (where the forces acting on our sample are at an equilibrium) is where a dot representing our sample will be drawn. Depending on which class that sample belongs it will be colored differently. +See the R package `Radviz`_ +for more information. **Note**: The "Iris" dataset is available `here `__. @@ -1046,7 +1064,7 @@ Setting the plot style From version 1.5 and up, matplotlib offers a range of preconfigured plotting styles. Setting the style can be used to easily give plots the general look that you want. Setting the style is as easy as calling ``matplotlib.style.use(my_plot_style)`` before -creating your plot. For example you could do ``matplotlib.style.use('ggplot')`` for ggplot-style +creating your plot. For example you could write ``matplotlib.style.use('ggplot')`` for ggplot-style plots. You can see the various available style names at ``matplotlib.style.available`` and it's very @@ -1147,7 +1165,7 @@ To plot data on a secondary y-axis, use the ``secondary_y`` keyword: plt.close('all') -To plot some columns in a DataFrame, give the column names to the ``secondary_y`` +To plot some columns in a ``DataFrame``, give the column names to the ``secondary_y`` keyword: .. ipython:: python @@ -1248,7 +1266,7 @@ See the :meth:`autofmt_xdate ` method and the Subplots ~~~~~~~~ -Each Series in a DataFrame can be plotted on a different axis +Each ``Series`` in a ``DataFrame`` can be plotted on a different axis with the ``subplots`` keyword: .. ipython:: python @@ -1264,9 +1282,9 @@ with the ``subplots`` keyword: Using Layout and Targeting Multiple Axes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The layout of subplots can be specified by ``layout`` keyword. It can accept +The layout of subplots can be specified by the ``layout`` keyword. It can accept ``(rows, columns)``. The ``layout`` keyword can be used in -``hist`` and ``boxplot`` also. If input is invalid, ``ValueError`` will be raised. +``hist`` and ``boxplot`` also. If the input is invalid, a ``ValueError`` will be raised. The number of axes which can be contained by rows x columns specified by ``layout`` must be larger than the number of required subplots. If layout can contain more axes than required, @@ -1284,7 +1302,7 @@ or columns needed, given the other. plt.close('all') -The above example is identical to using +The above example is identical to using: .. ipython:: python @@ -1298,11 +1316,11 @@ The above example is identical to using The required number of columns (3) is inferred from the number of series to plot and the given number of rows (2). -Also, you can pass multiple axes created beforehand as list-like via ``ax`` keyword. -This allows to use more complicated layout. +You can pass multiple axes created beforehand as list-like via ``ax`` keyword. +This allows more complicated layouts. The passed axes must be the same number as the subplots being drawn. -When multiple axes are passed via ``ax`` keyword, ``layout``, ``sharex`` and ``sharey`` keywords +When multiple axes are passed via the ``ax`` keyword, ``layout``, ``sharex`` and ``sharey`` keywords don't affect to the output. You should explicitly pass ``sharex=False`` and ``sharey=False``, otherwise you will see a warning. @@ -1359,13 +1377,13 @@ Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a Plotting With Error Bars ~~~~~~~~~~~~~~~~~~~~~~~~ -Plotting with error bars is now supported in the :meth:`DataFrame.plot` and :meth:`Series.plot` +Plotting with error bars is supported in :meth:`DataFrame.plot` and :meth:`Series.plot`. -Horizontal and vertical errorbars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats. +Horizontal and vertical error bars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats: -- As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series` -- As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values -- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series` +- As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series`. +- As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values. +- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series`. Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``M`` length :class:`Series`, a ``Mx2`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors should be in a ``Mx2xN`` array. @@ -1420,7 +1438,10 @@ Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and : plt.close('all') -Also, you can pass different :class:`DataFrame` or :class:`Series` for ``table`` keyword. The data will be drawn as displayed in print method (not transposed automatically). If required, it should be transposed manually as below example. +Also, you can pass a different :class:`DataFrame` or :class:`Series` to the +``table`` keyword. The data will be drawn as displayed in print method +(not transposed automatically). If required, it should be transposed manually +as seen in the example below. .. ipython:: python @@ -1434,7 +1455,10 @@ Also, you can pass different :class:`DataFrame` or :class:`Series` for ``table`` plt.close('all') -Finally, there is a helper function ``pandas.plotting.table`` to create a table from :class:`DataFrame` and :class:`Series`, and add it to an ``matplotlib.Axes``. This function can accept keywords which matplotlib table has. +There also exists a helper function ``pandas.plotting.table``, which creates a +table from :class:`DataFrame` or :class:`Series`, and adds it to an +``matplotlib.Axes`` instance. This function can accept keywords which the +matplotlib `table `__ has. .. ipython:: python @@ -1461,18 +1485,18 @@ Colormaps A potential issue when plotting a large number of columns is that it can be difficult to distinguish some series due to repetition in the default colors. To -remedy this, DataFrame plotting supports the use of the ``colormap=`` argument, +remedy this, ``DataFrame`` plotting supports the use of the ``colormap`` argument, which accepts either a Matplotlib `colormap `__ or a string that is a name of a colormap registered with Matplotlib. A visualization of the default matplotlib colormaps is available `here -`__. +`__. As matplotlib does not directly support colormaps for line-based plots, the colors are selected based on an even spacing determined by the number of columns -in the DataFrame. There is no consideration made for background color, so some +in the ``DataFrame``. There is no consideration made for background color, so some colormaps will produce lines that are not easily visible. -To use the cubehelix colormap, we can simply pass ``'cubehelix'`` to ``colormap=`` +To use the cubehelix colormap, we can pass ``colormap='cubehelix'``. .. ipython:: python :suppress: @@ -1494,7 +1518,7 @@ To use the cubehelix colormap, we can simply pass ``'cubehelix'`` to ``colormap= plt.close('all') -or we can pass the colormap itself +Alternatively, we can pass the colormap itself: .. ipython:: python @@ -1565,9 +1589,9 @@ Plotting directly with matplotlib In some situations it may still be preferable or necessary to prepare plots directly with matplotlib, for instance when a certain type of plot or -customization is not (yet) supported by pandas. Series and DataFrame objects -behave like arrays and can therefore be passed directly to matplotlib functions -without explicit casts. +customization is not (yet) supported by pandas. ``Series`` and ``DataFrame`` +objects behave like arrays and can therefore be passed directly to +matplotlib functions without explicit casts. pandas also automatically registers formatters and locators that recognize date indices, thereby extending date and time support to practically all plot types From c4bf26ce218873255d8b26ae298ca64c39cb15d0 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Thu, 1 Feb 2018 09:12:00 +0000 Subject: [PATCH 018/217] DEPR/CLN: Remove pd.rolling_*, pd.expanding* and pd.ewm* (#18723) * remove pd.running_*, pd.expanding_* and pd.ewm* and related code * added test_expanding_func and test_expanding_apply * recreate _check_ndarray inline in _check_moment_func --- doc/source/computation.rst | 11 +- doc/source/whatsnew/v0.23.0.txt | 2 + pandas/__init__.py | 1 - pandas/stats/__init__.py | 0 pandas/stats/api.py | 7 - pandas/stats/moments.py | 855 -------------------------------- pandas/tests/api/test_api.py | 17 +- pandas/tests/test_window.py | 725 +++++++++++---------------- 8 files changed, 282 insertions(+), 1336 deletions(-) delete mode 100644 pandas/stats/__init__.py delete mode 100644 pandas/stats/api.py delete mode 100644 pandas/stats/moments.py diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 06afa440aa26c..a64542fa71705 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -209,19 +209,12 @@ Window Functions .. currentmodule:: pandas.core.window -.. warning:: - - Prior to version 0.18.0, ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` were module level - functions and are now deprecated. These are replaced by using the :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.EWM`. objects and a corresponding method call. - - The deprecation warning will show the new syntax, see an example :ref:`here `. - -For working with data, a number of windows functions are provided for +For working with data, a number of window functions are provided for computing common *window* or *rolling* statistics. Among these are count, sum, mean, median, correlation, variance, covariance, standard deviation, skewness, and kurtosis. -Starting in version 0.18.1, the ``rolling()`` and ``expanding()`` +The ``rolling()`` and ``expanding()`` functions can be used directly from DataFrameGroupBy objects, see the :ref:`groupby docs `. diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 592c0788070a1..2bd2bb199bf1f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -361,6 +361,8 @@ Removal of prior version deprecations/changes - The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attribute:`Categorical.codes` (:issue:`7768`) - The ``flavor`` parameter have been removed from func:`to_sql` method (:issue:`13611`) - The modules `pandas.tools.hashing` and `pandas.util.hashing` have been removed (:issue:`16223`) +- The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). + Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) .. _whatsnew_0230.performance: diff --git a/pandas/__init__.py b/pandas/__init__.py index 78501620d780b..97ae73174c09c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -41,7 +41,6 @@ from pandas.core.api import * from pandas.core.sparse.api import * -from pandas.stats.api import * from pandas.tseries.api import * from pandas.core.computation.api import * from pandas.core.reshape.api import * diff --git a/pandas/stats/__init__.py b/pandas/stats/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/stats/api.py b/pandas/stats/api.py deleted file mode 100644 index 2a11456d4f9e5..0000000000000 --- a/pandas/stats/api.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Common namespace of statistical functions -""" - -# flake8: noqa - -from pandas.stats.moments import * diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py deleted file mode 100644 index 1cd98feb05ea0..0000000000000 --- a/pandas/stats/moments.py +++ /dev/null @@ -1,855 +0,0 @@ -""" -Provides rolling statistical moments and related descriptive -statistics implemented in Cython -""" -from __future__ import division - -import warnings -import numpy as np -from pandas.core.dtypes.common import is_scalar -from pandas.core.api import DataFrame, Series -from pandas.util._decorators import Substitution, Appender - -__all__ = ['rolling_count', 'rolling_max', 'rolling_min', - 'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov', - 'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt', - 'rolling_quantile', 'rolling_median', 'rolling_apply', - 'rolling_window', - 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov', - 'expanding_count', 'expanding_max', 'expanding_min', - 'expanding_sum', 'expanding_mean', 'expanding_std', - 'expanding_cov', 'expanding_corr', 'expanding_var', - 'expanding_skew', 'expanding_kurt', 'expanding_quantile', - 'expanding_median', 'expanding_apply'] - -# ----------------------------------------------------------------------------- -# Docs - -# The order of arguments for the _doc_template is: -# (header, args, kwargs, returns, notes) - -_doc_template = """ -%s - -Parameters ----------- -%s%s -Returns -------- -%s -%s -""" - -_roll_kw = """window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. -min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. -center : boolean, default False - Set the labels at the center of the window. -how : string, default '%s' - Method for down- or re-sampling -""" - -_roll_notes = r""" -Notes ------ -By default, the result is set to the right edge of the window. This can be -changed to the center of the window by setting ``center=True``. - -The `freq` keyword is used to conform time series data to a specified -frequency by resampling the data. This is done with the default parameters -of :meth:`~pandas.Series.resample` (i.e. using the `mean`). -""" - - -_ewm_kw = r"""com : float, optional - Specify decay in terms of center of mass, - :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0` -span : float, optional - Specify decay in terms of span, - :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1` -halflife : float, optional - Specify decay in terms of half-life, - :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0` -alpha : float, optional - Specify smoothing factor :math:`\alpha` directly, - :math:`0 < \alpha \leq 1` - - .. versionadded:: 0.18.0 - -min_periods : int, default 0 - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : None or string alias / date offset object, default=None - Frequency to conform to before computing statistic -adjust : boolean, default True - Divide by decaying adjustment factor in beginning periods to account for - imbalance in relative weightings (viewing EWMA as a moving average) -how : string, default 'mean' - Method for down- or re-sampling -ignore_na : boolean, default False - Ignore missing values when calculating weights; - specify True to reproduce pre-0.15.0 behavior -""" - -_ewm_notes = r""" -Notes ------ -Exactly one of center of mass, span, half-life, and alpha must be provided. -Allowed values and relationship between the parameters are specified in the -parameter descriptions above; see the link at the end of this section for -a detailed explanation. - -When adjust is True (default), weighted averages are calculated using weights - (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. - -When adjust is False, weighted averages are calculated recursively as: - weighted_average[0] = arg[0]; - weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. - -When ignore_na is False (default), weights are based on absolute positions. -For example, the weights of x and y used in calculating the final weighted -average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and -(1-alpha)**2 and alpha (if adjust is False). - -When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based on -relative positions. For example, the weights of x and y used in calculating -the final weighted average of [x, None, y] are 1-alpha and 1 (if adjust is -True), and 1-alpha and alpha (if adjust is False). - -More details can be found at -http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows -""" - -_expanding_kw = """min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. -""" - - -_type_of_input_retval = "y : type of input argument" - -_flex_retval = """y : type depends on inputs - DataFrame / DataFrame -> DataFrame (matches on columns) or Panel (pairwise) - DataFrame / Series -> Computes result for each column - Series / Series -> Series""" - -_pairwise_retval = "y : Panel whose items are df1.index values" - -_unary_arg = "arg : Series, DataFrame\n" - -_binary_arg_flex = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray, optional - if not supplied then will default to arg1 and produce pairwise output -""" - -_binary_arg = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray -""" - -_pairwise_arg = """df1 : DataFrame -df2 : DataFrame -""" - -_pairwise_kw = """pairwise : bool, default False - If False then only matching columns between arg1 and arg2 will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. -""" - -_ddof_kw = """ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. -""" - -_bias_kw = r"""bias : boolean, default False - Use a standard estimation bias correction -""" - - -def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): - """ - wrapper function to dispatch to the appropriate window functions - wraps/unwraps ndarrays for compat - - can be removed when ndarray support is removed - """ - is_ndarray = isinstance(arg, np.ndarray) - if is_ndarray: - if arg.ndim == 1: - arg = Series(arg) - elif arg.ndim == 2: - arg = DataFrame(arg) - else: - raise AssertionError("cannot support ndim > 2 for ndarray compat") - - warnings.warn("pd.{dispatch}_{name} is deprecated for ndarrays and " - "will be removed " - "in a future version" - .format(dispatch=dispatch, name=name), - FutureWarning, stacklevel=3) - - # get the functional keywords here - if func_kw is None: - func_kw = [] - kwds = {} - for k in func_kw: - value = kwargs.pop(k, None) - if value is not None: - kwds[k] = value - - # TODO: the below is only in place temporary until this module is removed. - kwargs.pop('freq', None) # freq removed in 0.23 - # how is a keyword that if not-None should be in kwds - how = kwargs.pop('how', None) - if how is not None: - kwds['how'] = how - - r = getattr(arg, dispatch)(**kwargs) - - if not is_ndarray: - - # give a helpful deprecation message - # with copy-pastable arguments - pargs = ','.join("{a}={b}".format(a=a, b=b) - for a, b in kwargs.items() if b is not None) - aargs = ','.join(args) - if len(aargs): - aargs += ',' - - def f(a, b): - if is_scalar(b): - return "{a}={b}".format(a=a, b=b) - return "{a}=<{b}>".format(a=a, b=type(b).__name__) - aargs = ','.join(f(a, b) for a, b in kwds.items() if b is not None) - warnings.warn("pd.{dispatch}_{name} is deprecated for {klass} " - "and will be removed in a future version, replace with " - "\n\t{klass}.{dispatch}({pargs}).{name}({aargs})" - .format(klass=type(arg).__name__, pargs=pargs, - aargs=aargs, dispatch=dispatch, name=name), - FutureWarning, stacklevel=3) - - result = getattr(r, name)(*args, **kwds) - - if is_ndarray: - result = result.values - return result - - -def rolling_count(arg, window, **kwargs): - """ - Rolling count of number of non-NaN observations inside provided window. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - how : string, default 'mean' - Method for down- or re-sampling - - Returns - ------- - rolling_count : type of caller - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', 'count', arg, window=window, **kwargs) - - -@Substitution("Unbiased moving covariance.", _binary_arg_flex, - _roll_kw % 'None' + _pairwise_kw + _ddof_kw, _flex_retval, - _roll_notes) -@Appender(_doc_template) -def rolling_cov(arg1, arg2=None, window=None, pairwise=None, **kwargs): - if window is None and isinstance(arg2, (int, float)): - window = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - elif arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - return ensure_compat('rolling', - 'cov', - arg1, - other=arg2, - window=window, - pairwise=pairwise, - func_kw=['other', 'pairwise', 'ddof'], - **kwargs) - - -@Substitution("Moving sample correlation.", _binary_arg_flex, - _roll_kw % 'None' + _pairwise_kw, _flex_retval, _roll_notes) -@Appender(_doc_template) -def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): - if window is None and isinstance(arg2, (int, float)): - window = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - elif arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - return ensure_compat('rolling', - 'corr', - arg1, - other=arg2, - window=window, - pairwise=pairwise, - func_kw=['other', 'pairwise'], - **kwargs) - - -# ----------------------------------------------------------------------------- -# Exponential moving moments - - -@Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw, - _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewma(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - freq=None, adjust=True, how=None, ignore_na=False): - return ensure_compat('ewm', - 'mean', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na) - - -@Substitution("Exponentially-weighted moving variance", _unary_arg, - _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmvar(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - bias=False, freq=None, how=None, ignore_na=False, adjust=True): - return ensure_compat('ewm', - 'var', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na, - bias=bias, - func_kw=['bias']) - - -@Substitution("Exponentially-weighted moving std", _unary_arg, - _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmstd(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - bias=False, freq=None, how=None, ignore_na=False, adjust=True): - return ensure_compat('ewm', - 'std', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na, - bias=bias, - func_kw=['bias']) - - -ewmvol = ewmstd - - -@Substitution("Exponentially-weighted moving covariance", _binary_arg_flex, - _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None, - min_periods=0, bias=False, freq=None, pairwise=None, how=None, - ignore_na=False, adjust=True): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and com is None: - com = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - - return ensure_compat('ewm', - 'cov', - arg1, - other=arg2, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - bias=bias, - freq=freq, - how=how, - ignore_na=ignore_na, - adjust=adjust, - pairwise=pairwise, - func_kw=['other', 'pairwise', 'bias']) - - -@Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, - _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None, - min_periods=0, freq=None, pairwise=None, how=None, ignore_na=False, - adjust=True): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and com is None: - com = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('ewm', - 'corr', - arg1, - other=arg2, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - how=how, - ignore_na=ignore_na, - adjust=adjust, - pairwise=pairwise, - func_kw=['other', 'pairwise']) - -# --------------------------------------------------------------------- -# Python interface to Cython functions - - -def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): - if how is None: - how_arg_str = 'None' - else: - how_arg_str = "'{how}".format(how=how) - - @Substitution(desc, _unary_arg, _roll_kw % how_arg_str + additional_kw, - _type_of_input_retval, _roll_notes) - @Appender(_doc_template) - def f(arg, window, min_periods=None, freq=None, center=False, - **kwargs): - - return ensure_compat('rolling', - name, - arg, - window=window, - min_periods=min_periods, - freq=freq, - center=center, - func_kw=func_kw, - **kwargs) - return f - - -rolling_max = _rolling_func('max', 'Moving maximum.', how='max') -rolling_min = _rolling_func('min', 'Moving minimum.', how='min') -rolling_sum = _rolling_func('sum', 'Moving sum.') -rolling_mean = _rolling_func('mean', 'Moving mean.') -rolling_median = _rolling_func('median', 'Moving median.', how='median') -rolling_std = _rolling_func('std', 'Moving standard deviation.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -rolling_var = _rolling_func('var', 'Moving variance.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -rolling_skew = _rolling_func('skew', 'Unbiased moving skewness.') -rolling_kurt = _rolling_func('kurt', 'Unbiased moving kurtosis.') - - -def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, - center=False): - """Moving quantile. - - Parameters - ---------- - arg : Series, DataFrame - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - quantile : float - 0 <= quantile <= 1 - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - - Returns - ------- - y : type of input argument - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', - 'quantile', - arg, - window=window, - freq=freq, - center=center, - min_periods=min_periods, - func_kw=['quantile'], - quantile=quantile) - - -def rolling_apply(arg, window, func, min_periods=None, freq=None, - center=False, args=(), kwargs={}): - """Generic moving function application. - - Parameters - ---------- - arg : Series, DataFrame - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - func : function - Must produce a single value from an ndarray input - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input argument - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', - 'apply', - arg, - window=window, - freq=freq, - center=center, - min_periods=min_periods, - func_kw=['func', 'args', 'kwargs'], - func=func, - args=args, - kwargs=kwargs) - - -def rolling_window(arg, window=None, win_type=None, min_periods=None, - freq=None, center=False, mean=True, - axis=0, how=None, **kwargs): - """ - Applies a moving window of type ``window_type`` and size ``window`` - on the data. - - Parameters - ---------- - arg : Series, DataFrame - window : int or ndarray - Weighting window specification. If the window is an integer, then it is - treated as the window length and win_type is required - win_type : str, default None - Window type (see Notes) - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - mean : boolean, default True - If True computes weighted mean, else weighted sum - axis : {0, 1}, default 0 - how : string, default 'mean' - Method for down- or re-sampling - - Returns - ------- - y : type of input argument - - Notes - ----- - The recognized window types are: - - * ``boxcar`` - * ``triang`` - * ``blackman`` - * ``hamming`` - * ``bartlett`` - * ``parzen`` - * ``bohman`` - * ``blackmanharris`` - * ``nuttall`` - * ``barthann`` - * ``kaiser`` (needs beta) - * ``gaussian`` (needs std) - * ``general_gaussian`` (needs power, width) - * ``slepian`` (needs width). - - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - func = 'mean' if mean else 'sum' - return ensure_compat('rolling', - func, - arg, - window=window, - win_type=win_type, - freq=freq, - center=center, - min_periods=min_periods, - axis=axis, - func_kw=kwargs.keys(), - **kwargs) - - -def _expanding_func(name, desc, func_kw=None, additional_kw=''): - @Substitution(desc, _unary_arg, _expanding_kw + additional_kw, - _type_of_input_retval, "") - @Appender(_doc_template) - def f(arg, min_periods=1, freq=None, **kwargs): - return ensure_compat('expanding', - name, - arg, - min_periods=min_periods, - func_kw=func_kw, - **kwargs) - return f - - -expanding_max = _expanding_func('max', 'Expanding maximum.') -expanding_min = _expanding_func('min', 'Expanding minimum.') -expanding_sum = _expanding_func('sum', 'Expanding sum.') -expanding_mean = _expanding_func('mean', 'Expanding mean.') -expanding_median = _expanding_func('median', 'Expanding median.') - -expanding_std = _expanding_func('std', 'Expanding standard deviation.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -expanding_var = _expanding_func('var', 'Expanding variance.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -expanding_skew = _expanding_func('skew', 'Unbiased expanding skewness.') -expanding_kurt = _expanding_func('kurt', 'Unbiased expanding kurtosis.') - - -def expanding_count(arg, freq=None): - """ - Expanding count of number of non-NaN observations. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - - Returns - ------- - expanding_count : type of caller - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', 'count', arg, freq=freq) - - -def expanding_quantile(arg, quantile, min_periods=1, freq=None): - """Expanding quantile. - - Parameters - ---------- - arg : Series, DataFrame - quantile : float - 0 <= quantile <= 1 - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - - Returns - ------- - y : type of input argument - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', - 'quantile', - arg, - freq=freq, - min_periods=min_periods, - func_kw=['quantile'], - quantile=quantile) - - -@Substitution("Unbiased expanding covariance.", _binary_arg_flex, - _expanding_kw + _pairwise_kw + _ddof_kw, _flex_retval, "") -@Appender(_doc_template) -def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, - pairwise=None, ddof=1): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and min_periods is None: - min_periods = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('expanding', - 'cov', - arg1, - other=arg2, - min_periods=min_periods, - pairwise=pairwise, - freq=freq, - ddof=ddof, - func_kw=['other', 'pairwise', 'ddof']) - - -@Substitution("Expanding sample correlation.", _binary_arg_flex, - _expanding_kw + _pairwise_kw, _flex_retval, "") -@Appender(_doc_template) -def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and min_periods is None: - min_periods = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('expanding', - 'corr', - arg1, - other=arg2, - min_periods=min_periods, - pairwise=pairwise, - freq=freq, - func_kw=['other', 'pairwise', 'ddof']) - - -def expanding_apply(arg, func, min_periods=1, freq=None, - args=(), kwargs={}): - """Generic expanding function application. - - Parameters - ---------- - arg : Series, DataFrame - func : function - Must produce a single value from an ndarray input - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input argument - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', - 'apply', - arg, - freq=freq, - min_periods=min_periods, - func_kw=['func', 'args', 'kwargs'], - func=func, - args=args, - kwargs=kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index c20767b09178c..ea6c250420b13 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -35,8 +35,7 @@ class TestPDApi(Base): 'util', 'options', 'io'] # these are already deprecated; awaiting removal - deprecated_modules = ['stats', 'datetools', 'parser', - 'json', 'lib', 'tslib'] + deprecated_modules = ['datetools', 'parser', 'json', 'lib', 'tslib'] # misc misc = ['IndexSlice', 'NaT'] @@ -91,19 +90,7 @@ class TestPDApi(Base): deprecated_funcs_in_future = [] # these are already deprecated; awaiting removal - deprecated_funcs = ['ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', - 'ewmvol', 'expanding_apply', 'expanding_corr', - 'expanding_count', 'expanding_cov', 'expanding_kurt', - 'expanding_max', 'expanding_mean', 'expanding_median', - 'expanding_min', 'expanding_quantile', - 'expanding_skew', 'expanding_std', 'expanding_sum', - 'expanding_var', 'rolling_apply', - 'rolling_corr', 'rolling_count', 'rolling_cov', - 'rolling_kurt', 'rolling_max', 'rolling_mean', - 'rolling_median', 'rolling_min', 'rolling_quantile', - 'rolling_skew', 'rolling_std', 'rolling_sum', - 'rolling_var', 'rolling_window', - 'pnow', 'match', 'groupby', 'get_store', + deprecated_funcs = ['pnow', 'match', 'groupby', 'get_store', 'plot_params', 'scatter_matrix'] def test_api(self): diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 6f9e872526d0a..22526d14a7168 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1,6 +1,5 @@ from itertools import product import pytest -import sys import warnings from warnings import catch_warnings @@ -9,16 +8,15 @@ import numpy as np import pandas as pd -from pandas import (Series, DataFrame, bdate_range, isna, - notna, concat, Timestamp, Index) -import pandas.stats.moments as mom +from pandas import (Series, DataFrame, bdate_range, + isna, notna, concat, Timestamp, Index) import pandas.core.window as rwindow import pandas.tseries.offsets as offsets from pandas.core.base import SpecificationError from pandas.errors import UnsupportedFunctionCall import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.compat import range, zip, PY3 +from pandas.compat import range, zip N, K = 100, 10 @@ -610,19 +608,6 @@ def test_numpy_compat(self): getattr(e, func), dtype=np.float64) -class TestDeprecations(Base): - """ test that we are catching deprecation warnings """ - - def setup_method(self, method): - self._create_data() - - def test_deprecations(self): - - with catch_warnings(record=True): - mom.rolling_mean(np.ones(10), 3, center=True, axis=0) - mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) - - # gh-12373 : rolling functions error on float32 data # make sure rolling functions works for different dtypes # @@ -863,72 +848,55 @@ def test_centered_axis_validation(self): .rolling(window=3, center=True, axis=2).mean()) def test_rolling_sum(self): - self._check_moment_func(mom.rolling_sum, np.nansum, name='sum', + self._check_moment_func(np.nansum, name='sum', zero_min_periods_equal=False) def test_rolling_count(self): counter = lambda x: np.isfinite(x).astype(float).sum() - self._check_moment_func(mom.rolling_count, counter, name='count', - has_min_periods=False, preserve_nan=False, + self._check_moment_func(counter, name='count', has_min_periods=False, fill_value=0) def test_rolling_mean(self): - self._check_moment_func(mom.rolling_mean, np.mean, name='mean') + self._check_moment_func(np.mean, name='mean') @td.skip_if_no_scipy def test_cmov_mean(self): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, - 12.952, np.nan, np.nan]) - - with catch_warnings(record=True): - rs = mom.rolling_mean(vals, 5, center=True) - tm.assert_almost_equal(xp, rs) - - xp = Series(rs) - rs = Series(vals).rolling(5, center=True).mean() - tm.assert_series_equal(xp, rs) + result = Series(vals).rolling(5, center=True).mean() + expected = Series([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, + 12.818, 12.952, np.nan, np.nan]) + tm.assert_series_equal(expected, result) @td.skip_if_no_scipy def test_cmov_window(self): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, - 12.952, np.nan, np.nan]) - - with catch_warnings(record=True): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - tm.assert_almost_equal(xp, rs) - - xp = Series(rs) - rs = Series(vals).rolling(5, win_type='boxcar', center=True).mean() - tm.assert_series_equal(xp, rs) + result = Series(vals).rolling(5, win_type='boxcar', center=True).mean() + expected = Series([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, + 12.818, 12.952, np.nan, np.nan]) + tm.assert_series_equal(expected, result) @td.skip_if_no_scipy def test_cmov_window_corner(self): # GH 8238 # all nan - vals = np.empty(10, dtype=float) - vals.fill(np.nan) - with catch_warnings(record=True): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - assert np.isnan(rs).all() + vals = pd.Series([np.nan] * 10) + result = vals.rolling(5, center=True, win_type='boxcar').mean() + assert np.isnan(result).all() # empty - vals = np.array([]) - with catch_warnings(record=True): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - assert len(rs) == 0 + vals = pd.Series([]) + result = vals.rolling(5, center=True, win_type='boxcar').mean() + assert len(result) == 0 # shorter than window - vals = np.random.randn(5) - with catch_warnings(record=True): - rs = mom.rolling_window(vals, 10, 'boxcar') - assert np.isnan(rs).all() - assert len(rs) == 5 + vals = pd.Series(np.random.randn(5)) + result = vals.rolling(10, win_type='boxcar').mean() + assert np.isnan(result).all() + assert len(result) == 5 @td.skip_if_no_scipy def test_cmov_window_frame(self): @@ -1097,38 +1065,31 @@ def test_cmov_window_special_linear_range(self): tm.assert_series_equal(xp, rs) def test_rolling_median(self): - with catch_warnings(record=True): - self._check_moment_func(mom.rolling_median, np.median, - name='median') + self._check_moment_func(np.median, name='median') def test_rolling_min(self): + self._check_moment_func(np.min, name='min') - with catch_warnings(record=True): - self._check_moment_func(mom.rolling_min, np.min, name='min') - - with catch_warnings(record=True): - a = np.array([1, 2, 3, 4, 5]) - b = mom.rolling_min(a, window=100, min_periods=1) - tm.assert_almost_equal(b, np.ones(len(a))) + a = pd.Series([1, 2, 3, 4, 5]) + result = a.rolling(window=100, min_periods=1).min() + expected = pd.Series(np.ones(len(a))) + tm.assert_series_equal(result, expected) - pytest.raises(ValueError, mom.rolling_min, np.array([1, 2, 3]), - window=3, min_periods=5) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() def test_rolling_max(self): + self._check_moment_func(np.max, name='max') - with catch_warnings(record=True): - self._check_moment_func(mom.rolling_max, np.max, name='max') + a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) + b = a.rolling(window=100, min_periods=1).max() + tm.assert_almost_equal(a, b) - with catch_warnings(record=True): - a = np.array([1, 2, 3, 4, 5], dtype=np.float64) - b = mom.rolling_max(a, window=100, min_periods=1) - tm.assert_almost_equal(a, b) - - pytest.raises(ValueError, mom.rolling_max, np.array([1, 2, 3]), - window=3, min_periods=5) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() - def test_rolling_quantile(self): - qs = [0.0, .1, .5, .9, 1.0] + @pytest.mark.parametrize('q', [0.0, .1, .5, .9, 1.0]) + def test_rolling_quantile(self, q): def scoreatpercentile(a, per): values = np.sort(a, axis=0) @@ -1147,18 +1108,11 @@ def scoreatpercentile(a, per): return retval - for q in qs: - - def f(x, window, quantile, min_periods=None, freq=None, - center=False): - return mom.rolling_quantile(x, window, quantile, - min_periods=min_periods, freq=freq, - center=center) + def quantile_func(x): + return scoreatpercentile(x, q) - def alt(x): - return scoreatpercentile(x, q) - - self._check_moment_func(f, alt, name='quantile', quantile=q) + self._check_moment_func(quantile_func, name='quantile', + quantile=q) def test_rolling_quantile_np_percentile(self): # #9413: Tests that rolling window's quantile default behavior @@ -1207,15 +1161,10 @@ def test_rolling_apply(self): tm.assert_series_equal(ser, ser.rolling(10).apply(lambda x: x.mean())) - f = lambda x: x[np.isfinite(x)].mean() - - def roll_mean(x, window, min_periods=None, freq=None, center=False, - **kwargs): - return mom.rolling_apply(x, window, func=f, - min_periods=min_periods, freq=freq, - center=center) + def f(x): + return x[np.isfinite(x)].mean() - self._check_moment_func(roll_mean, np.mean, name='apply', func=f) + self._check_moment_func(np.mean, name='apply', func=f) # GH 8080 s = Series([None, None, None]) @@ -1228,39 +1177,34 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False, def test_rolling_apply_out_of_bounds(self): # #1850 - arr = np.arange(4) + vals = pd.Series([1, 2, 3, 4]) - # it works! - with catch_warnings(record=True): - result = mom.rolling_apply(arr, 10, np.sum) - assert isna(result).all() + result = vals.rolling(10).apply(np.sum) + assert result.isna().all() - with catch_warnings(record=True): - result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) - tm.assert_almost_equal(result, result) + result = vals.rolling(10, min_periods=1).apply(np.sum) + expected = pd.Series([1, 3, 6, 10], dtype=float) + tm.assert_almost_equal(result, expected) def test_rolling_std(self): - self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=1), + self._check_moment_func(lambda x: np.std(x, ddof=1), name='std') - self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=0), + self._check_moment_func(lambda x: np.std(x, ddof=0), name='std', ddof=0) def test_rolling_std_1obs(self): - with catch_warnings(record=True): - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1) - expected = np.array([np.nan] * 5) - tm.assert_almost_equal(result, expected) + vals = pd.Series([1., 2., 3., 4., 5.]) - with catch_warnings(record=True): - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1, ddof=0) - expected = np.zeros(5) - tm.assert_almost_equal(result, expected) + result = vals.rolling(1, min_periods=1).std() + expected = pd.Series([np.nan] * 5) + tm.assert_series_equal(result, expected) - with catch_warnings(record=True): - result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), - 3, min_periods=2) + result = vals.rolling(1, min_periods=1).std(ddof=0) + expected = pd.Series([0.] * 5) + tm.assert_series_equal(result, expected) + + result = (pd.Series([np.nan, np.nan, 3, 4, 5]) + .rolling(3, min_periods=2).std()) assert np.isnan(result[2]) def test_rolling_std_neg_sqrt(self): @@ -1268,208 +1212,53 @@ def test_rolling_std_neg_sqrt(self): # Test move_nanstd for neg sqrt. - a = np.array([0.0011448196318903589, 0.00028718669878572767, - 0.00028718669878572767, 0.00028718669878572767, - 0.00028718669878572767]) - with catch_warnings(record=True): - b = mom.rolling_std(a, window=3) + a = pd.Series([0.0011448196318903589, 0.00028718669878572767, + 0.00028718669878572767, 0.00028718669878572767, + 0.00028718669878572767]) + b = a.rolling(window=3).std() assert np.isfinite(b[2:]).all() - with catch_warnings(record=True): - b = mom.ewmstd(a, span=3) + b = a.ewm(span=3).std() assert np.isfinite(b[2:]).all() def test_rolling_var(self): - self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), - test_stable=True, name='var') - self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=0), + self._check_moment_func(lambda x: np.var(x, ddof=1), + name='var') + self._check_moment_func(lambda x: np.var(x, ddof=0), name='var', ddof=0) @td.skip_if_no_scipy def test_rolling_skew(self): from scipy.stats import skew - self._check_moment_func(mom.rolling_skew, - lambda x: skew(x, bias=False), name='skew') + self._check_moment_func(lambda x: skew(x, bias=False), name='skew') @td.skip_if_no_scipy def test_rolling_kurt(self): from scipy.stats import kurtosis - self._check_moment_func(mom.rolling_kurt, - lambda x: kurtosis(x, bias=False), name='kurt') - - def test_fperr_robustness(self): - # TODO: remove this once python 2.5 out of picture - if PY3: - pytest.skip("doesn't work on python 3") - - # #2114 - data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' # noqa - - arr = np.frombuffer(data, dtype='= 0).all() - - with catch_warnings(record=True): - result = mom.rolling_mean(arr, 2) - assert (result[1:] >= 0).all() - - with catch_warnings(record=True): - result = mom.rolling_var(arr, 2) - assert (result[1:] >= 0).all() + self._check_moment_func(lambda x: kurtosis(x, bias=False), + name='kurt') - # #2527, ugh - arr = np.array([0.00012456, 0.0003, 0]) - with catch_warnings(record=True): - result = mom.rolling_mean(arr, 1) - assert result[-1] >= 0 - - with catch_warnings(record=True): - result = mom.rolling_mean(-arr, 1) - assert result[-1] <= 0 - - def _check_moment_func(self, f, static_comp, name=None, window=50, - has_min_periods=True, has_center=True, - has_time_rule=True, preserve_nan=True, - fill_value=None, test_stable=False, - zero_min_periods_equal=True, + def _check_moment_func(self, static_comp, name, has_min_periods=True, + has_center=True, has_time_rule=True, + fill_value=None, zero_min_periods_equal=True, **kwargs): - with warnings.catch_warnings(record=True): - self._check_ndarray(f, static_comp, window=window, - has_min_periods=has_min_periods, - preserve_nan=preserve_nan, - has_center=has_center, fill_value=fill_value, - test_stable=test_stable, - zero_min_periods_equal=zero_min_periods_equal, - **kwargs) - - with warnings.catch_warnings(record=True): - self._check_structures(f, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - fill_value=fill_value, - has_center=has_center, **kwargs) - - # new API - if name is not None: - self._check_structures(f, static_comp, name=name, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - fill_value=fill_value, - has_center=has_center, **kwargs) - - def _check_ndarray(self, f, static_comp, window=50, has_min_periods=True, - preserve_nan=True, has_center=True, fill_value=None, - test_stable=False, test_window=True, - zero_min_periods_equal=True, **kwargs): - def get_result(arr, window, min_periods=None, center=False): - return f(arr, window, min_periods=min_periods, center=center, ** - kwargs) - - result = get_result(self.arr, window) - tm.assert_almost_equal(result[-1], static_comp(self.arr[-50:])) - - if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) - - # excluding NaNs correctly - arr = randn(50) - arr[:10] = np.NaN - arr[-10:] = np.NaN - - if has_min_periods: - result = get_result(arr, 50, min_periods=30) - tm.assert_almost_equal(result[-1], static_comp(arr[10:-10])) - - # min_periods is working correctly - result = get_result(arr, 20, min_periods=15) - assert np.isnan(result[23]) - assert not np.isnan(result[24]) - - assert not np.isnan(result[-6]) - assert np.isnan(result[-5]) - - arr2 = randn(20) - result = get_result(arr2, 10, min_periods=5) - assert isna(result[3]) - assert notna(result[4]) - - if zero_min_periods_equal: - # min_periods=0 may be equivalent to min_periods=1 - result0 = get_result(arr, 20, min_periods=0) - result1 = get_result(arr, 20, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = get_result(arr, 50) - tm.assert_almost_equal(result[-1], static_comp(arr[10:-10])) - - # GH 7925 - if has_center: - if has_min_periods: - result = get_result(arr, 20, min_periods=15, center=True) - expected = get_result( - np.concatenate((arr, np.array([np.NaN] * 9))), 20, - min_periods=15)[9:] - else: - result = get_result(arr, 20, center=True) - expected = get_result( - np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] - - tm.assert_numpy_array_equal(result, expected) - - if test_stable: - result = get_result(self.arr + 1e9, window) - tm.assert_almost_equal(result[-1], - static_comp(self.arr[-50:] + 1e9)) - - # Test window larger than array, #7297 - if test_window: - if has_min_periods: - for minp in (0, len(self.arr) - 1, len(self.arr)): - result = get_result(self.arr, len(self.arr) + 1, - min_periods=minp) - expected = get_result(self.arr, len(self.arr), - min_periods=minp) - nan_mask = np.isnan(result) - tm.assert_numpy_array_equal(nan_mask, np.isnan(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], - expected[nan_mask]) - else: - result = get_result(self.arr, len(self.arr) + 1) - expected = get_result(self.arr, len(self.arr)) - nan_mask = np.isnan(result) - tm.assert_numpy_array_equal(nan_mask, np.isnan(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - - def _check_structures(self, f, static_comp, name=None, - has_min_periods=True, has_time_rule=True, - has_center=True, fill_value=None, **kwargs): def get_result(obj, window, min_periods=None, center=False): - - # check via the API calls if name is provided - if name is not None: - r = obj.rolling(window=window, min_periods=min_periods, - center=center) - return getattr(r, name)(**kwargs) - - # check via the moments API - with catch_warnings(record=True): - return f(obj, window=window, min_periods=min_periods, - center=center, **kwargs) + r = obj.rolling(window=window, min_periods=min_periods, + center=center) + return getattr(r, name)(**kwargs) series_result = get_result(self.series, window=50) - frame_result = get_result(self.frame, window=50) - assert isinstance(series_result, Series) - assert type(frame_result) == DataFrame + tm.assert_almost_equal(series_result.iloc[-1], + static_comp(self.series[-50:])) + + frame_result = get_result(self.frame, window=50) + assert isinstance(frame_result, DataFrame) + tm.assert_series_equal(frame_result.iloc[-1, :], + self.frame.iloc[-50:, :].apply(static_comp, + axis=0), + check_names=False) # check time_rule works if has_time_rule: @@ -1500,8 +1289,72 @@ def get_result(obj, window, min_periods=None, center=False): trunc_frame.apply(static_comp), check_names=False) - # GH 7925 + # excluding NaNs correctly + obj = Series(randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + if has_min_periods: + result = get_result(obj, 50, min_periods=30) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # min_periods is working correctly + result = get_result(obj, 20, min_periods=15) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(randn(20)) + result = get_result(obj2, 10, min_periods=5) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + if zero_min_periods_equal: + # min_periods=0 may be equivalent to min_periods=1 + result0 = get_result(obj, 20, min_periods=0) + result1 = get_result(obj, 20, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = get_result(obj, 50) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # window larger than series length (#7297) + if has_min_periods: + for minp in (0, len(self.series) - 1, len(self.series)): + result = get_result(self.series, len(self.series) + 1, + min_periods=minp) + expected = get_result(self.series, len(self.series), + min_periods=minp) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], + expected[nan_mask]) + else: + result = get_result(self.series, len(self.series) + 1) + expected = get_result(self.series, len(self.series)) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + # check center=True if has_center: + if has_min_periods: + result = get_result(obj, 20, min_periods=15, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, + min_periods=15)[9:].reset_index(drop=True) + else: + result = get_result(obj, 20, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), + 20)[9:].reset_index(drop=True) + + tm.assert_series_equal(result, expected) # shifter index s = ['x%d' % x for x in range(12)] @@ -1541,12 +1394,11 @@ def get_result(obj, window, min_periods=None, center=False): tm.assert_frame_equal(frame_xp, frame_rs) def test_ewma(self): - self._check_ew(mom.ewma, name='mean') + self._check_ew(name='mean') - arr = np.zeros(1000) - arr[5] = 1 - with catch_warnings(record=True): - result = mom.ewma(arr, span=100, adjust=False).sum() + vals = pd.Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() assert np.abs(result - 1) < 1e-2 s = Series([1.0, 2.0, 4.0, 8.0]) @@ -1626,55 +1478,34 @@ def simple_wma(s, w): tm.assert_series_equal(result, expected) def test_ewmvar(self): - self._check_ew(mom.ewmvar, name='var') + self._check_ew(name='var') def test_ewmvol(self): - self._check_ew(mom.ewmvol, name='vol') + self._check_ew(name='vol') def test_ewma_span_com_args(self): - with catch_warnings(record=True): - A = mom.ewma(self.arr, com=9.5) - B = mom.ewma(self.arr, span=20) - tm.assert_almost_equal(A, B) + A = self.series.ewm(com=9.5).mean() + B = self.series.ewm(span=20).mean() + tm.assert_almost_equal(A, B) - pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, span=20) - pytest.raises(ValueError, mom.ewma, self.arr) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20) + with pytest.raises(ValueError): + self.series.ewm().mean() def test_ewma_halflife_arg(self): - with catch_warnings(record=True): - A = mom.ewma(self.arr, com=13.932726172912965) - B = mom.ewma(self.arr, halflife=10.0) - tm.assert_almost_equal(A, B) - - pytest.raises(ValueError, mom.ewma, self.arr, span=20, - halflife=50) - pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, - halflife=50) - pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, span=20, - halflife=50) - pytest.raises(ValueError, mom.ewma, self.arr) - - def test_ewma_alpha_old_api(self): - # GH 10789 - with catch_warnings(record=True): - a = mom.ewma(self.arr, alpha=0.61722699889169674) - b = mom.ewma(self.arr, com=0.62014947789973052) - c = mom.ewma(self.arr, span=2.240298955799461) - d = mom.ewma(self.arr, halflife=0.721792864318) - tm.assert_numpy_array_equal(a, b) - tm.assert_numpy_array_equal(a, c) - tm.assert_numpy_array_equal(a, d) - - def test_ewma_alpha_arg_old_api(self): - # GH 10789 - with catch_warnings(record=True): - pytest.raises(ValueError, mom.ewma, self.arr) - pytest.raises(ValueError, mom.ewma, self.arr, - com=10.0, alpha=0.5) - pytest.raises(ValueError, mom.ewma, self.arr, - span=10.0, alpha=0.5) - pytest.raises(ValueError, mom.ewma, self.arr, - halflife=10.0, alpha=0.5) + A = self.series.ewm(com=13.932726172912965).mean() + B = self.series.ewm(halflife=10.0).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + self.series.ewm(span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm() def test_ewm_alpha(self): # GH 10789 @@ -1689,11 +1520,15 @@ def test_ewm_alpha(self): def test_ewm_alpha_arg(self): # GH 10789 - s = Series(self.arr) - pytest.raises(ValueError, s.ewm) - pytest.raises(ValueError, s.ewm, com=10.0, alpha=0.5) - pytest.raises(ValueError, s.ewm, span=10.0, alpha=0.5) - pytest.raises(ValueError, s.ewm, halflife=10.0, alpha=0.5) + s = self.series + with pytest.raises(ValueError): + s.ewm() + with pytest.raises(ValueError): + s.ewm(com=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(span=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(halflife=10.0, alpha=0.5) def test_ewm_domain_checks(self): # GH 12492 @@ -1719,24 +1554,25 @@ def test_ewm_domain_checks(self): s.ewm(alpha=1.0) pytest.raises(ValueError, s.ewm, alpha=1.1) - def test_ew_empty_arrays(self): - arr = np.array([], dtype=np.float64) + def test_ew_empty_series(self): + vals = pd.Series([], dtype=np.float64) - funcs = [mom.ewma, mom.ewmvol, mom.ewmvar] + ewm = vals.ewm(3) + funcs = ['mean', 'vol', 'var'] for f in funcs: - with catch_warnings(record=True): - result = f(arr, 3) - tm.assert_almost_equal(result, arr) + result = getattr(ewm, f)() + tm.assert_almost_equal(result, vals) - def _check_ew(self, func, name=None): - with catch_warnings(record=True): - self._check_ew_ndarray(func, name=name) - self._check_ew_structures(func, name=name) + def _check_ew(self, name=None, preserve_nan=False): + series_result = getattr(self.series.ewm(com=10), name)() + assert isinstance(series_result, Series) + + frame_result = getattr(self.frame.ewm(com=10), name)() + assert type(frame_result) == DataFrame - def _check_ew_ndarray(self, func, preserve_nan=False, name=None): - result = func(self.arr, com=10) + result = getattr(self.series.ewm(com=10), name)() if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) + assert result[self._nan_locs].isna().all() # excluding NaNs correctly arr = randn(50) @@ -1746,45 +1582,40 @@ def _check_ew_ndarray(self, func, preserve_nan=False, name=None): # check min_periods # GH 7898 - result = func(s, 50, min_periods=2) - assert np.isnan(result.values[:11]).all() - assert not np.isnan(result.values[11:]).any() + result = getattr(s.ewm(com=50, min_periods=2), name)() + assert result[:11].isna().all() + assert not result[11:].isna().any() for min_periods in (0, 1): - result = func(s, 50, min_periods=min_periods) - if func == mom.ewma: - assert np.isnan(result.values[:10]).all() - assert not np.isnan(result.values[10:]).any() + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == 'mean': + assert result[:10].isna().all() + assert not result[10:].isna().any() else: - # ewmstd, ewmvol, ewmvar (with bias=False) require at least two - # values - assert np.isnan(result.values[:11]).all() - assert not np.isnan(result.values[11:]).any() + # ewm.std, ewm.vol, ewm.var (with bias=False) require at least + # two values + assert result[:11].isna().all() + assert not result[11:].isna().any() # check series of length 0 - result = func(Series([]), 50, min_periods=min_periods) - tm.assert_series_equal(result, Series([])) + result = getattr(Series().ewm(com=50, min_periods=min_periods), + name)() + tm.assert_series_equal(result, Series()) # check series of length 1 - result = func(Series([1.]), 50, min_periods=min_periods) - if func == mom.ewma: + result = getattr(Series([1.]).ewm(50, min_periods=min_periods), + name)() + if name == 'mean': tm.assert_series_equal(result, Series([1.])) else: - # ewmstd, ewmvol, ewmvar with bias=False require at least two - # values + # ewm.std, ewm.vol, ewm.var with bias=False require at least + # two values tm.assert_series_equal(result, Series([np.NaN])) # pass in ints - result2 = func(np.arange(50), span=10) + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() assert result2.dtype == np.float_ - def _check_ew_structures(self, func, name): - series_result = getattr(self.series.ewm(com=10), name)() - assert isinstance(series_result, Series) - - frame_result = getattr(self.frame.ewm(com=10), name)() - assert type(frame_result) == DataFrame - class TestPairwise(object): @@ -2021,9 +1852,6 @@ class TestMomentsConsistency(Base): # lambda v: Series(v).skew(), 3, 'skew'), # (lambda v: Series(v).kurt(), 4, 'kurt'), - # (lambda x, min_periods: mom.expanding_quantile(x, 0.3, - # min_periods=min_periods, 'quantile'), - # restore once GH 8084 is fixed # lambda v: Series(v).quantile(0.3), None, 'quantile'), @@ -2585,22 +2413,6 @@ def func(A, B, com, **kwargs): pytest.raises(Exception, func, A, randn(50), 20, min_periods=5) - def test_expanding_apply(self): - ser = Series([]) - tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) - - def expanding_mean(x, min_periods=1): - return mom.expanding_apply(x, lambda x: x.mean(), - min_periods=min_periods) - - self._check_expanding(expanding_mean, np.mean) - - # GH 8080 - s = Series([None, None, None]) - result = s.expanding(min_periods=0).apply(lambda x: len(x)) - expected = Series([1., 2., 3.]) - tm.assert_series_equal(result, expected) - def test_expanding_apply_args_kwargs(self): def mean_w_arg(x, const): return np.mean(x) + const @@ -2648,9 +2460,6 @@ def test_expanding_cov(self): tm.assert_almost_equal(rolling_result, result) - def test_expanding_max(self): - self._check_expanding(mom.expanding_max, np.max, preserve_nan=False) - def test_expanding_cov_pairwise(self): result = self.frame.expanding().corr() @@ -2980,55 +2789,73 @@ def test_rolling_kurt_eq_value_fperr(self): a = Series([1.1] * 15).rolling(window=10).kurt() assert np.isnan(a).all() - def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, - has_time_rule=True, preserve_nan=True): - result = func(self.arr) + @pytest.mark.parametrize('func,static_comp', [('sum', np.sum), + ('mean', np.mean), + ('max', np.max), + ('min', np.min)], + ids=['sum', 'mean', 'max', 'min']) + def test_expanding_func(self, func, static_comp): + def expanding_func(x, min_periods=1, center=False, axis=0): + exp = x.expanding(min_periods=min_periods, + center=center, axis=axis) + return getattr(exp, func)() + self._check_expanding(expanding_func, static_comp, preserve_nan=False) + + def test_expanding_apply(self): + + def expanding_mean(x, min_periods=1): + exp = x.expanding(min_periods=min_periods) + return exp.apply(lambda x: x.mean()) + + self._check_expanding(expanding_mean, np.mean) + + ser = Series([]) + tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) - tm.assert_almost_equal(result[10], static_comp(self.arr[:11])) + # GH 8080 + s = Series([None, None, None]) + result = s.expanding(min_periods=0).apply(lambda x: len(x)) + expected = Series([1., 2., 3.]) + tm.assert_series_equal(result, expected) + + def _check_expanding(self, func, static_comp, has_min_periods=True, + has_time_rule=True, preserve_nan=True): + + series_result = func(self.series) + assert isinstance(series_result, Series) + frame_result = func(self.frame) + assert isinstance(frame_result, DataFrame) + + result = func(self.series) + tm.assert_almost_equal(result[10], static_comp(self.series[:11])) if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) + assert result.iloc[self._nan_locs].isna().all() - arr = randn(50) + ser = Series(randn(50)) if has_min_periods: - result = func(arr, min_periods=30) - assert (np.isnan(result[:29]).all()) - tm.assert_almost_equal(result[-1], static_comp(arr[:50])) + result = func(ser, min_periods=30) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) # min_periods is working correctly - result = func(arr, min_periods=15) - assert np.isnan(result[13]) - assert not np.isnan(result[14]) + result = func(ser, min_periods=15) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) - arr2 = randn(20) - result = func(arr2, min_periods=5) + ser2 = Series(randn(20)) + result = func(ser2, min_periods=5) assert isna(result[3]) assert notna(result[4]) # min_periods=0 - result0 = func(arr, min_periods=0) - result1 = func(arr, min_periods=1) + result0 = func(ser, min_periods=0) + result1 = func(ser, min_periods=1) tm.assert_almost_equal(result0, result1) else: - result = func(arr) - tm.assert_almost_equal(result[-1], static_comp(arr[:50])) - - def _check_expanding_structures(self, func): - series_result = func(self.series) - assert isinstance(series_result, Series) - frame_result = func(self.frame) - assert type(frame_result) == DataFrame - - def _check_expanding(self, func, static_comp, has_min_periods=True, - has_time_rule=True, preserve_nan=True): - with warnings.catch_warnings(record=True): - self._check_expanding_ndarray(func, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - preserve_nan=preserve_nan) - with warnings.catch_warnings(record=True): - self._check_expanding_structures(func) + result = func(ser) + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" From 9bd1bc5d91f5147cd5033282827448392c9a8d6a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 1 Feb 2018 03:23:03 -0800 Subject: [PATCH 019/217] Organize, Split, Parametrize timezones/timestamps tests (#19473) --- pandas/tests/scalar/test_timestamp.py | 570 +----------------- pandas/tests/scalar/timestamp/__init__.py | 0 .../tests/scalar/timestamp/test_arithmetic.py | 76 +++ .../scalar/timestamp/test_comparisons.py | 194 ++++++ .../tests/scalar/timestamp/test_rendering.py | 96 +++ .../tests/scalar/timestamp/test_timezones.py | 87 +++ .../tests/scalar/timestamp/test_unary_ops.py | 217 +++++++ pandas/tests/tseries/test_timezones.py | 96 +-- 8 files changed, 710 insertions(+), 626 deletions(-) create mode 100644 pandas/tests/scalar/timestamp/__init__.py create mode 100644 pandas/tests/scalar/timestamp/test_arithmetic.py create mode 100644 pandas/tests/scalar/timestamp/test_comparisons.py create mode 100644 pandas/tests/scalar/timestamp/test_rendering.py create mode 100644 pandas/tests/scalar/timestamp/test_timezones.py create mode 100644 pandas/tests/scalar/timestamp/test_unary_ops.py diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2b72eef2c6712..301f6da140866 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1,18 +1,14 @@ """ test the scalar Timestamp """ -import sys import pytz import pytest import dateutil -import operator import calendar import numpy as np from dateutil.tz import tzutc from pytz import timezone, utc from datetime import datetime, timedelta -from distutils.version import LooseVersion -from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError import pandas.util.testing as tm import pandas.util._test_decorators as td @@ -21,78 +17,10 @@ from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz -from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR from pandas.compat import long, PY3 from pandas.compat.numpy import np_datetime64_compat -from pandas import Timestamp, Period, Timedelta, NaT - - -class TestTimestampArithmetic(object): - def test_overflow_offset(self): - # xref https://github.com/statsmodels/statsmodels/issues/3374 - # ends up multiplying really large numbers which overflow - - stamp = Timestamp('2017-01-13 00:00:00', freq='D') - offset = 20169940 * offsets.Day(1) - - with pytest.raises(OverflowError): - stamp + offset - - with pytest.raises(OverflowError): - offset + stamp - - with pytest.raises(OverflowError): - stamp - offset - - def test_delta_preserve_nanos(self): - val = Timestamp(long(1337299200000000123)) - result = val + timedelta(1) - assert result.nanosecond == val.nanosecond - - def test_timestamp_sub_datetime(self): - dt = datetime(2013, 10, 12) - ts = Timestamp(datetime(2013, 10, 13)) - assert (ts - dt).days == 1 - assert (dt - ts).days == -1 - - def test_addition_subtraction_types(self): - # Assert on the types resulting from Timestamp +/- various date/time - # objects - dt = datetime(2014, 3, 4) - td = timedelta(seconds=1) - # build a timestamp with a frequency, since then it supports - # addition/subtraction of integers - ts = Timestamp(dt, freq='D') - - assert type(ts + 1) == Timestamp - assert type(ts - 1) == Timestamp - - # Timestamp + datetime not supported, though subtraction is supported - # and yields timedelta more tests in tseries/base/tests/test_base.py - assert type(ts - dt) == Timedelta - assert type(ts + td) == Timestamp - assert type(ts - td) == Timestamp - - # Timestamp +/- datetime64 not supported, so not tested (could possibly - # assert error raised?) - td64 = np.timedelta64(1, 'D') - assert type(ts + td64) == Timestamp - assert type(ts - td64) == Timestamp - - def test_addition_subtraction_preserve_frequency(self): - ts = Timestamp('2014-03-05', freq='D') - td = timedelta(days=1) - original_freq = ts.freq - - assert (ts + 1).freq == original_freq - assert (ts - 1).freq == original_freq - assert (ts + td).freq == original_freq - assert (ts - td).freq == original_freq - - td64 = np.timedelta64(1, 'D') - assert (ts + td64).freq == original_freq - assert (ts - td64).freq == original_freq +from pandas import Timestamp, Period, Timedelta class TestTimestampProperties(object): @@ -508,168 +436,8 @@ def test_max_valid(self): # Ensure that Timestamp.max is a valid Timestamp Timestamp(Timestamp.max) - -class TestTimestamp(object): - @pytest.mark.parametrize('freq', ['D', 'M', 'S', 'N']) - @pytest.mark.parametrize('date', ['2014-03-07', '2014-01-01 09:00', - '2014-01-01 00:00:00.000000001']) - def test_repr(self, date, freq): - # dateutil zone change (only matters for repr) - if LooseVersion(dateutil.__version__) >= LooseVersion('2.6.0'): - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/US/Pacific'] - else: - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/America/Los_Angeles'] - - for tz in timezones: - - # avoid to match with timezone name - freq_repr = "'{0}'".format(freq) - if tz.startswith('dateutil'): - tz_repr = tz.replace('dateutil', '') - else: - tz_repr = tz - - date_only = Timestamp(date) - assert date in repr(date_only) - assert tz_repr not in repr(date_only) - assert freq_repr not in repr(date_only) - assert date_only == eval(repr(date_only)) - - date_tz = Timestamp(date, tz=tz) - assert date in repr(date_tz) - assert tz_repr in repr(date_tz) - assert freq_repr not in repr(date_tz) - assert date_tz == eval(repr(date_tz)) - - date_freq = Timestamp(date, freq=freq) - assert date in repr(date_freq) - assert tz_repr not in repr(date_freq) - assert freq_repr in repr(date_freq) - assert date_freq == eval(repr(date_freq)) - - date_tz_freq = Timestamp(date, tz=tz, freq=freq) - assert date in repr(date_tz_freq) - assert tz_repr in repr(date_tz_freq) - assert freq_repr in repr(date_tz_freq) - assert date_tz_freq == eval(repr(date_tz_freq)) - - def test_repr_utcoffset(self): - # This can cause the tz field to be populated, but it's redundant to - # include this information in the date-string. - date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) - assert '2014-03-13 00:00:00-0400' in repr(date_with_utc_offset) - assert 'tzoffset' not in repr(date_with_utc_offset) - assert 'pytz.FixedOffset(-240)' in repr(date_with_utc_offset) - expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", - 'pytz.FixedOffset(-240)') - assert date_with_utc_offset == eval(expr) - - def test_timestamp_repr_pre1900(self): - # pre-1900 - stamp = Timestamp('1850-01-01', tz='US/Eastern') - repr(stamp) - - iso8601 = '1850-01-01 01:23:45.012345' - stamp = Timestamp(iso8601, tz='US/Eastern') - result = repr(stamp) - assert iso8601 in result - - def test_tz(self): - t = '2014-02-01 09:00' - ts = Timestamp(t) - local = ts.tz_localize('Asia/Tokyo') - assert local.hour == 9 - assert local == Timestamp(t, tz='Asia/Tokyo') - conv = local.tz_convert('US/Eastern') - assert conv == Timestamp('2014-01-31 19:00', tz='US/Eastern') - assert conv.hour == 19 - - # preserves nanosecond - ts = Timestamp(t) + offsets.Nano(5) - local = ts.tz_localize('Asia/Tokyo') - assert local.hour == 9 - assert local.nanosecond == 5 - conv = local.tz_convert('US/Eastern') - assert conv.nanosecond == 5 - assert conv.hour == 19 - - def test_tz_localize_ambiguous(self): - - ts = Timestamp('2014-11-02 01:00') - ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) - ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) - - assert (ts_no_dst.value - ts_dst.value) / 1e9 == 3600 - with pytest.raises(ValueError): - ts.tz_localize('US/Eastern', ambiguous='infer') - - # GH 8025 - with tm.assert_raises_regex(TypeError, - 'Cannot localize tz-aware Timestamp, ' - 'use tz_convert for conversions'): - Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') - - with tm.assert_raises_regex(TypeError, - 'Cannot convert tz-naive Timestamp, ' - 'use tz_localize to localize'): - Timestamp('2011-01-01').tz_convert('Asia/Tokyo') - - def test_tz_localize_nonexistent(self): - # see gh-13057 - times = ['2015-03-08 02:00', '2015-03-08 02:30', - '2015-03-29 02:00', '2015-03-29 02:30'] - timezones = ['US/Eastern', 'US/Pacific', - 'Europe/Paris', 'Europe/Belgrade'] - for t, tz in zip(times, timezones): - ts = Timestamp(t) - pytest.raises(NonExistentTimeError, ts.tz_localize, - tz) - pytest.raises(NonExistentTimeError, ts.tz_localize, - tz, errors='raise') - assert ts.tz_localize(tz, errors='coerce') is NaT - - def test_tz_localize_errors_ambiguous(self): - # see gh-13057 - ts = Timestamp('2015-11-1 01:00') - pytest.raises(AmbiguousTimeError, - ts.tz_localize, 'US/Pacific', errors='coerce') - - @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', - 'US/Eastern', 'dateutil/US/Pacific']) - def test_tz_localize_roundtrip(self, tz): - for t in ['2014-02-01 09:00', '2014-07-08 09:00', - '2014-11-01 17:00', '2014-11-05 00:00']: - ts = Timestamp(t) - localized = ts.tz_localize(tz) - assert localized == Timestamp(t, tz=tz) - - with pytest.raises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - assert reset == ts - assert reset.tzinfo is None - - @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', - 'US/Eastern', 'dateutil/US/Pacific']) - def test_tz_convert_roundtrip(self, tz): - for t in ['2014-02-01 09:00', '2014-07-08 09:00', - '2014-11-01 17:00', '2014-11-05 00:00']: - ts = Timestamp(t, tz='UTC') - converted = ts.tz_convert(tz) - - reset = converted.tz_convert(None) - assert reset == Timestamp(t) - assert reset.tzinfo is None - assert reset == converted.tz_convert('UTC').tz_localize(None) - - def test_utc_z_designator(self): - assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) == 'UTC' - def test_now(self): - # #9000 + # GH#9000 ts_from_string = Timestamp('now') ts_from_method = Timestamp.now() ts_datetime = datetime.now() @@ -687,7 +455,6 @@ def test_now(self): ts_from_method_tz.tz_localize(None)) < delta) def test_today(self): - ts_from_string = Timestamp('today') ts_from_method = Timestamp.today() ts_datetime = datetime.today() @@ -704,6 +471,31 @@ def test_today(self): assert (abs(ts_from_string_tz.tz_localize(None) - ts_from_method_tz.tz_localize(None)) < delta) + +class TestTimestamp(object): + + def test_tz(self): + tstr = '2014-02-01 09:00' + ts = Timestamp(tstr) + local = ts.tz_localize('Asia/Tokyo') + assert local.hour == 9 + assert local == Timestamp(tstr, tz='Asia/Tokyo') + conv = local.tz_convert('US/Eastern') + assert conv == Timestamp('2014-01-31 19:00', tz='US/Eastern') + assert conv.hour == 19 + + # preserves nanosecond + ts = Timestamp(tstr) + offsets.Nano(5) + local = ts.tz_localize('Asia/Tokyo') + assert local.hour == 9 + assert local.nanosecond == 5 + conv = local.tz_convert('US/Eastern') + assert conv.nanosecond == 5 + assert conv.hour == 19 + + def test_utc_z_designator(self): + assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) == 'UTC' + def test_asm8(self): np.random.seed(7960929) ns = [Timestamp.min.value, Timestamp.max.value, 1000] @@ -715,110 +507,6 @@ def test_asm8(self): assert (Timestamp('nat').asm8.view('i8') == np.datetime64('nat', 'ns').view('i8')) - def test_pprint(self): - # GH12622 - import pprint - nested_obj = {'foo': 1, - 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} - result = pprint.pformat(nested_obj, width=50) - expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], - 'foo': 1}""" - assert result == expected - - def test_round(self): - - # round - dt = Timestamp('20130101 09:10:11') - result = dt.round('D') - expected = Timestamp('20130101') - assert result == expected - - dt = Timestamp('20130101 19:10:11') - result = dt.round('D') - expected = Timestamp('20130102') - assert result == expected - - dt = Timestamp('20130201 12:00:00') - result = dt.round('D') - expected = Timestamp('20130202') - assert result == expected - - dt = Timestamp('20130104 12:00:00') - result = dt.round('D') - expected = Timestamp('20130105') - assert result == expected - - dt = Timestamp('20130104 12:32:00') - result = dt.round('30Min') - expected = Timestamp('20130104 12:30:00') - assert result == expected - - # floor - dt = Timestamp('20130101 09:10:11') - result = dt.floor('D') - expected = Timestamp('20130101') - assert result == expected - - # ceil - dt = Timestamp('20130101 09:10:11') - result = dt.ceil('D') - expected = Timestamp('20130102') - assert result == expected - - # round with tz - dt = Timestamp('20130101 09:10:11', tz='US/Eastern') - result = dt.round('D') - expected = Timestamp('20130101', tz='US/Eastern') - assert result == expected - - dt = Timestamp('20130101 09:10:11', tz='US/Eastern') - result = dt.round('s') - assert result == dt - - # GH 14440 & 15578 - result = Timestamp('2016-10-17 12:00:00.0015').round('ms') - expected = Timestamp('2016-10-17 12:00:00.002000') - assert result == expected - - result = Timestamp('2016-10-17 12:00:00.00149').round('ms') - expected = Timestamp('2016-10-17 12:00:00.001000') - assert result == expected - - ts = Timestamp('2016-10-17 12:00:00.0015') - for freq in ['us', 'ns']: - assert ts == ts.round(freq) - - result = Timestamp('2016-10-17 12:00:00.001501031').round('10ns') - expected = Timestamp('2016-10-17 12:00:00.001501030') - assert result == expected - - with tm.assert_produces_warning(): - Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') - - def test_round_misc(self): - stamp = Timestamp('2000-01-05 05:09:15.13') - - def _check_round(freq, expected): - result = stamp.round(freq=freq) - assert result == expected - - for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), - ('H', Timestamp('2000-01-05 05:00:00')), - ('S', Timestamp('2000-01-05 05:09:15'))]: - _check_round(freq, expected) - - with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): - stamp.round('foo') - def test_class_ops_pytz(self): def compare(x, y): assert (int(Timestamp(x).value / 1e9) == @@ -960,210 +648,6 @@ def test_hash_equivalent(self): stamp = Timestamp(datetime(2011, 1, 1)) assert d[stamp] == 5 - @td.skip_if_windows - def test_timestamp(self): - # GH#17329 - # tz-naive --> treat it as if it were UTC for purposes of timestamp() - ts = Timestamp.now() - uts = ts.replace(tzinfo=utc) - assert ts.timestamp() == uts.timestamp() - - tsc = Timestamp('2014-10-11 11:00:01.12345678', tz='US/Central') - utsc = tsc.tz_convert('UTC') - - # utsc is a different representation of the same time - assert tsc.timestamp() == utsc.timestamp() - - if PY3: - - # datetime.timestamp() converts in the local timezone - with tm.set_timezone('UTC'): - - # should agree with datetime.timestamp method - dt = ts.to_pydatetime() - assert dt.timestamp() == ts.timestamp() - - -class TestTimestampComparison(object): - def test_comparison_object_array(self): - # GH#15183 - ts = Timestamp('2011-01-03 00:00:00-0500', tz='US/Eastern') - other = Timestamp('2011-01-01 00:00:00-0500', tz='US/Eastern') - naive = Timestamp('2011-01-01 00:00:00') - - arr = np.array([other, ts], dtype=object) - res = arr == ts - expected = np.array([False, True], dtype=bool) - assert (res == expected).all() - - # 2D case - arr = np.array([[other, ts], - [ts, other]], - dtype=object) - res = arr != ts - expected = np.array([[True, False], [False, True]], dtype=bool) - assert res.shape == expected.shape - assert (res == expected).all() - - # tzaware mismatch - arr = np.array([naive], dtype=object) - with pytest.raises(TypeError): - arr < ts - - def test_comparison(self): - # 5-18-2012 00:00:00.000 - stamp = long(1337299200000000000) - - val = Timestamp(stamp) - - assert val == val - assert not val != val - assert not val < val - assert val <= val - assert not val > val - assert val >= val - - other = datetime(2012, 5, 18) - assert val == other - assert not val != other - assert not val < other - assert val <= other - assert not val > other - assert val >= other - - other = Timestamp(stamp + 100) - - assert val != other - assert val != other - assert val < other - assert val <= other - assert other > val - assert other >= val - - def test_compare_invalid(self): - # GH 8058 - val = Timestamp('20130101 12:01:02') - assert not val == 'foo' - assert not val == 10.0 - assert not val == 1 - assert not val == long(1) - assert not val == [] - assert not val == {'foo': 1} - assert not val == np.float64(1) - assert not val == np.int64(1) - - assert val != 'foo' - assert val != 10.0 - assert val != 1 - assert val != long(1) - assert val != [] - assert val != {'foo': 1} - assert val != np.float64(1) - assert val != np.int64(1) - - def test_cant_compare_tz_naive_w_aware(self): - # see gh-1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz='utc') - - pytest.raises(Exception, a.__eq__, b) - pytest.raises(Exception, a.__ne__, b) - pytest.raises(Exception, a.__lt__, b) - pytest.raises(Exception, a.__gt__, b) - pytest.raises(Exception, b.__eq__, a) - pytest.raises(Exception, b.__ne__, a) - pytest.raises(Exception, b.__lt__, a) - pytest.raises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - pytest.raises(Exception, a.__eq__, b.to_pydatetime()) - pytest.raises(Exception, a.to_pydatetime().__eq__, b) - else: - assert not a == b.to_pydatetime() - assert not a.to_pydatetime() == b - - def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): - # see gh-1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=utc) - - pytest.raises(Exception, a.__eq__, b) - pytest.raises(Exception, a.__ne__, b) - pytest.raises(Exception, a.__lt__, b) - pytest.raises(Exception, a.__gt__, b) - pytest.raises(Exception, b.__eq__, a) - pytest.raises(Exception, b.__ne__, a) - pytest.raises(Exception, b.__lt__, a) - pytest.raises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - pytest.raises(Exception, a.__eq__, b.to_pydatetime()) - pytest.raises(Exception, a.to_pydatetime().__eq__, b) - else: - assert not a == b.to_pydatetime() - assert not a.to_pydatetime() == b - - def test_cant_compare_tz_naive_w_aware_dateutil(self): - # see gh-1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=tzutc()) - - pytest.raises(Exception, a.__eq__, b) - pytest.raises(Exception, a.__ne__, b) - pytest.raises(Exception, a.__lt__, b) - pytest.raises(Exception, a.__gt__, b) - pytest.raises(Exception, b.__eq__, a) - pytest.raises(Exception, b.__ne__, a) - pytest.raises(Exception, b.__lt__, a) - pytest.raises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - pytest.raises(Exception, a.__eq__, b.to_pydatetime()) - pytest.raises(Exception, a.to_pydatetime().__eq__, b) - else: - assert not a == b.to_pydatetime() - assert not a.to_pydatetime() == b - - def test_timestamp_compare_scalars(self): - # case where ndim == 0 - lhs = np.datetime64(datetime(2013, 12, 6)) - rhs = Timestamp('now') - nat = Timestamp('nat') - - ops = {'gt': 'lt', - 'lt': 'gt', - 'ge': 'le', - 'le': 'ge', - 'eq': 'eq', - 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - expected = left_f(lhs, rhs) - - result = right_f(rhs, lhs) - assert result == expected - - expected = left_f(rhs, nat) - result = right_f(nat, rhs) - assert result == expected - - def test_timestamp_compare_with_early_datetime(self): - # e.g. datetime.min - stamp = Timestamp('2012-01-01') - - assert not stamp == datetime.min - assert not stamp == datetime(1600, 1, 1) - assert not stamp == datetime(2700, 1, 1) - assert stamp != datetime.min - assert stamp != datetime(1600, 1, 1) - assert stamp != datetime(2700, 1, 1) - assert stamp > datetime(1600, 1, 1) - assert stamp >= datetime(1600, 1, 1) - assert stamp < datetime(2700, 1, 1) - assert stamp <= datetime(2700, 1, 1) - class TestTimestampNsOperations(object): diff --git a/pandas/tests/scalar/timestamp/__init__.py b/pandas/tests/scalar/timestamp/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py new file mode 100644 index 0000000000000..8f4809c93e28b --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +from datetime import datetime, timedelta + +import pytest +import numpy as np + +from pandas.compat import long +from pandas.tseries import offsets +from pandas import Timestamp, Timedelta + + +class TestTimestampArithmetic(object): + def test_overflow_offset(self): + # xref https://github.com/statsmodels/statsmodels/issues/3374 + # ends up multiplying really large numbers which overflow + + stamp = Timestamp('2017-01-13 00:00:00', freq='D') + offset = 20169940 * offsets.Day(1) + + with pytest.raises(OverflowError): + stamp + offset + + with pytest.raises(OverflowError): + offset + stamp + + with pytest.raises(OverflowError): + stamp - offset + + def test_delta_preserve_nanos(self): + val = Timestamp(long(1337299200000000123)) + result = val + timedelta(1) + assert result.nanosecond == val.nanosecond + + def test_timestamp_sub_datetime(self): + dt = datetime(2013, 10, 12) + ts = Timestamp(datetime(2013, 10, 13)) + assert (ts - dt).days == 1 + assert (dt - ts).days == -1 + + def test_addition_subtraction_types(self): + # Assert on the types resulting from Timestamp +/- various date/time + # objects + dt = datetime(2014, 3, 4) + td = timedelta(seconds=1) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + ts = Timestamp(dt, freq='D') + + assert type(ts + 1) == Timestamp + assert type(ts - 1) == Timestamp + + # Timestamp + datetime not supported, though subtraction is supported + # and yields timedelta more tests in tseries/base/tests/test_base.py + assert type(ts - dt) == Timedelta + assert type(ts + td) == Timestamp + assert type(ts - td) == Timestamp + + # Timestamp +/- datetime64 not supported, so not tested (could possibly + # assert error raised?) + td64 = np.timedelta64(1, 'D') + assert type(ts + td64) == Timestamp + assert type(ts - td64) == Timestamp + + def test_addition_subtraction_preserve_frequency(self): + ts = Timestamp('2014-03-05', freq='D') + td = timedelta(days=1) + original_freq = ts.freq + + assert (ts + 1).freq == original_freq + assert (ts - 1).freq == original_freq + assert (ts + td).freq == original_freq + assert (ts - td).freq == original_freq + + td64 = np.timedelta64(1, 'D') + assert (ts + td64).freq == original_freq + assert (ts - td64).freq == original_freq diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py new file mode 100644 index 0000000000000..72d87be619917 --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +import sys +from datetime import datetime +import operator + +import pytest +import numpy as np + +from dateutil.tz import tzutc +from pytz import utc + +from pandas.compat import long +from pandas import Timestamp + + +class TestTimestampComparison(object): + def test_comparison_object_array(self): + # GH#15183 + ts = Timestamp('2011-01-03 00:00:00-0500', tz='US/Eastern') + other = Timestamp('2011-01-01 00:00:00-0500', tz='US/Eastern') + naive = Timestamp('2011-01-01 00:00:00') + + arr = np.array([other, ts], dtype=object) + res = arr == ts + expected = np.array([False, True], dtype=bool) + assert (res == expected).all() + + # 2D case + arr = np.array([[other, ts], + [ts, other]], + dtype=object) + res = arr != ts + expected = np.array([[True, False], [False, True]], dtype=bool) + assert res.shape == expected.shape + assert (res == expected).all() + + # tzaware mismatch + arr = np.array([naive], dtype=object) + with pytest.raises(TypeError): + arr < ts + + def test_comparison(self): + # 5-18-2012 00:00:00.000 + stamp = long(1337299200000000000) + + val = Timestamp(stamp) + + assert val == val + assert not val != val + assert not val < val + assert val <= val + assert not val > val + assert val >= val + + other = datetime(2012, 5, 18) + assert val == other + assert not val != other + assert not val < other + assert val <= other + assert not val > other + assert val >= other + + other = Timestamp(stamp + 100) + + assert val != other + assert val != other + assert val < other + assert val <= other + assert other > val + assert other >= val + + def test_compare_invalid(self): + # GH 8058 + val = Timestamp('20130101 12:01:02') + assert not val == 'foo' + assert not val == 10.0 + assert not val == 1 + assert not val == long(1) + assert not val == [] + assert not val == {'foo': 1} + assert not val == np.float64(1) + assert not val == np.int64(1) + + assert val != 'foo' + assert val != 10.0 + assert val != 1 + assert val != long(1) + assert val != [] + assert val != {'foo': 1} + assert val != np.float64(1) + assert val != np.int64(1) + + def test_cant_compare_tz_naive_w_aware(self): + # see gh-1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz='utc') + + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) + else: + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b + + def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): + # see gh-1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz=utc) + + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) + else: + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b + + def test_cant_compare_tz_naive_w_aware_dateutil(self): + # see gh-1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz=tzutc()) + + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) + else: + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b + + def test_timestamp_compare_scalars(self): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp('now') + nat = Timestamp('nat') + + ops = {'gt': 'lt', + 'lt': 'gt', + 'ge': 'le', + 'le': 'ge', + 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + assert result == expected + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + assert result == expected + + def test_timestamp_compare_with_early_datetime(self): + # e.g. datetime.min + stamp = Timestamp('2012-01-01') + + assert not stamp == datetime.min + assert not stamp == datetime(1600, 1, 1) + assert not stamp == datetime(2700, 1, 1) + assert stamp != datetime.min + assert stamp != datetime(1600, 1, 1) + assert stamp != datetime(2700, 1, 1) + assert stamp > datetime(1600, 1, 1) + assert stamp >= datetime(1600, 1, 1) + assert stamp < datetime(2700, 1, 1) + assert stamp <= datetime(2700, 1, 1) diff --git a/pandas/tests/scalar/timestamp/test_rendering.py b/pandas/tests/scalar/timestamp/test_rendering.py new file mode 100644 index 0000000000000..c404b60567daf --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_rendering.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +import pytest +import dateutil +import pytz # noqa # a test below uses pytz but only inside a `eval` call + +import pprint +from distutils.version import LooseVersion + +from pandas import Timestamp + + +class TestTimestampRendering(object): + + # dateutil zone change (only matters for repr) + if LooseVersion(dateutil.__version__) >= LooseVersion('2.6.0'): + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific'] + else: + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/America/Los_Angeles'] + + @pytest.mark.parametrize('tz', timezones) + @pytest.mark.parametrize('freq', ['D', 'M', 'S', 'N']) + @pytest.mark.parametrize('date', ['2014-03-07', '2014-01-01 09:00', + '2014-01-01 00:00:00.000000001']) + def test_repr(self, date, freq, tz): + # avoid to match with timezone name + freq_repr = "'{0}'".format(freq) + if tz.startswith('dateutil'): + tz_repr = tz.replace('dateutil', '') + else: + tz_repr = tz + + date_only = Timestamp(date) + assert date in repr(date_only) + assert tz_repr not in repr(date_only) + assert freq_repr not in repr(date_only) + assert date_only == eval(repr(date_only)) + + date_tz = Timestamp(date, tz=tz) + assert date in repr(date_tz) + assert tz_repr in repr(date_tz) + assert freq_repr not in repr(date_tz) + assert date_tz == eval(repr(date_tz)) + + date_freq = Timestamp(date, freq=freq) + assert date in repr(date_freq) + assert tz_repr not in repr(date_freq) + assert freq_repr in repr(date_freq) + assert date_freq == eval(repr(date_freq)) + + date_tz_freq = Timestamp(date, tz=tz, freq=freq) + assert date in repr(date_tz_freq) + assert tz_repr in repr(date_tz_freq) + assert freq_repr in repr(date_tz_freq) + assert date_tz_freq == eval(repr(date_tz_freq)) + + def test_repr_utcoffset(self): + # This can cause the tz field to be populated, but it's redundant to + # include this information in the date-string. + date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) + assert '2014-03-13 00:00:00-0400' in repr(date_with_utc_offset) + assert 'tzoffset' not in repr(date_with_utc_offset) + assert 'pytz.FixedOffset(-240)' in repr(date_with_utc_offset) + expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", + 'pytz.FixedOffset(-240)') + assert date_with_utc_offset == eval(expr) + + def test_timestamp_repr_pre1900(self): + # pre-1900 + stamp = Timestamp('1850-01-01', tz='US/Eastern') + repr(stamp) + + iso8601 = '1850-01-01 01:23:45.012345' + stamp = Timestamp(iso8601, tz='US/Eastern') + result = repr(stamp) + assert iso8601 in result + + def test_pprint(self): + # GH#12622 + nested_obj = {'foo': 1, + 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} + result = pprint.pformat(nested_obj, width=50) + expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], + 'foo': 1}""" + assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py new file mode 100644 index 0000000000000..eeec70cc234f5 --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +""" +Tests for Timestamp timezone-related methods +""" + +import pytest +from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError + +import pandas.util.testing as tm +from pandas import Timestamp, NaT + + +class TestTimestampTZOperations(object): + # -------------------------------------------------------------- + # Timestamp.tz_localize + + def test_tz_localize_ambiguous(self): + ts = Timestamp('2014-11-02 01:00') + ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) + ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) + + assert (ts_no_dst.value - ts_dst.value) / 1e9 == 3600 + with pytest.raises(ValueError): + ts.tz_localize('US/Eastern', ambiguous='infer') + + # GH#8025 + with tm.assert_raises_regex(TypeError, + 'Cannot localize tz-aware Timestamp, ' + 'use tz_convert for conversions'): + Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') + + with tm.assert_raises_regex(TypeError, + 'Cannot convert tz-naive Timestamp, ' + 'use tz_localize to localize'): + Timestamp('2011-01-01').tz_convert('Asia/Tokyo') + + @pytest.mark.parametrize('stamp, tz', [ + ('2015-03-08 02:00', 'US/Eastern'), + ('2015-03-08 02:30', 'US/Pacific'), + ('2015-03-29 02:00', 'Europe/Paris'), + ('2015-03-29 02:30', 'Europe/Belgrade')]) + def test_tz_localize_nonexistent(self, stamp, tz): + # GH#13057 + ts = Timestamp(stamp) + with pytest.raises(NonExistentTimeError): + ts.tz_localize(tz) + with pytest.raises(NonExistentTimeError): + ts.tz_localize(tz, errors='raise') + assert ts.tz_localize(tz, errors='coerce') is NaT + + def test_tz_localize_errors_ambiguous(self): + # GH#13057 + ts = Timestamp('2015-11-1 01:00') + with pytest.raises(AmbiguousTimeError): + ts.tz_localize('US/Pacific', errors='coerce') + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', + 'US/Eastern', 'dateutil/US/Pacific']) + @pytest.mark.parametrize('stamp', ['2014-02-01 09:00', '2014-07-08 09:00', + '2014-11-01 17:00', '2014-11-05 00:00']) + def test_tz_localize_roundtrip(self, stamp, tz): + ts = Timestamp(stamp) + localized = ts.tz_localize(tz) + assert localized == Timestamp(stamp, tz=tz) + + with pytest.raises(TypeError): + localized.tz_localize(tz) + + reset = localized.tz_localize(None) + assert reset == ts + assert reset.tzinfo is None + + # ------------------------------------------------------------------ + # Timestamp.tz_convert + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', + 'US/Eastern', 'dateutil/US/Pacific']) + @pytest.mark.parametrize('stamp', ['2014-02-01 09:00', '2014-07-08 09:00', + '2014-11-01 17:00', '2014-11-05 00:00']) + def test_tz_convert_roundtrip(self, stamp, tz): + ts = Timestamp(stamp, tz='UTC') + converted = ts.tz_convert(tz) + + reset = converted.tz_convert(None) + assert reset == Timestamp(stamp) + assert reset.tzinfo is None + assert reset == converted.tz_convert('UTC').tz_localize(None) diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py new file mode 100644 index 0000000000000..70c7308dd3991 --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +from datetime import datetime + +import pytest +import pytz +from pytz import utc + +import pandas.util.testing as tm +import pandas.util._test_decorators as td + +from pandas.compat import PY3 +from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR +from pandas import Timestamp + + +class TestTimestampUnaryOps(object): + + # -------------------------------------------------------------- + # Timestamp.round + + def test_round_day_naive(self): + dt = Timestamp('20130101 09:10:11') + result = dt.round('D') + expected = Timestamp('20130101') + assert result == expected + + dt = Timestamp('20130101 19:10:11') + result = dt.round('D') + expected = Timestamp('20130102') + assert result == expected + + dt = Timestamp('20130201 12:00:00') + result = dt.round('D') + expected = Timestamp('20130202') + assert result == expected + + dt = Timestamp('20130104 12:00:00') + result = dt.round('D') + expected = Timestamp('20130105') + assert result == expected + + def test_round_tzaware(self): + dt = Timestamp('20130101 09:10:11', tz='US/Eastern') + result = dt.round('D') + expected = Timestamp('20130101', tz='US/Eastern') + assert result == expected + + dt = Timestamp('20130101 09:10:11', tz='US/Eastern') + result = dt.round('s') + assert result == dt + + def test_round_30min(self): + # round + dt = Timestamp('20130104 12:32:00') + result = dt.round('30Min') + expected = Timestamp('20130104 12:30:00') + assert result == expected + + def test_round_subsecond(self): + # GH#14440 & GH#15578 + result = Timestamp('2016-10-17 12:00:00.0015').round('ms') + expected = Timestamp('2016-10-17 12:00:00.002000') + assert result == expected + + result = Timestamp('2016-10-17 12:00:00.00149').round('ms') + expected = Timestamp('2016-10-17 12:00:00.001000') + assert result == expected + + ts = Timestamp('2016-10-17 12:00:00.0015') + for freq in ['us', 'ns']: + assert ts == ts.round(freq) + + result = Timestamp('2016-10-17 12:00:00.001501031').round('10ns') + expected = Timestamp('2016-10-17 12:00:00.001501030') + assert result == expected + + def test_round_nonstandard_freq(self): + with tm.assert_produces_warning(): + Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') + + def test_round_invalid_arg(self): + stamp = Timestamp('2000-01-05 05:09:15.13') + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): + stamp.round('foo') + + @pytest.mark.parametrize('freq, expected', [ + ('D', Timestamp('2000-01-05 00:00:00')), + ('H', Timestamp('2000-01-05 05:00:00')), + ('S', Timestamp('2000-01-05 05:09:15'))]) + def test_round_frequencies(self, freq, expected): + stamp = Timestamp('2000-01-05 05:09:15.13') + + result = stamp.round(freq=freq) + assert result == expected + + def test_ceil(self): + dt = Timestamp('20130101 09:10:11') + result = dt.ceil('D') + expected = Timestamp('20130102') + assert result == expected + + def test_floor(self): + dt = Timestamp('20130101 09:10:11') + result = dt.floor('D') + expected = Timestamp('20130101') + assert result == expected + + # -------------------------------------------------------------- + # Timestamp.replace + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] + + def test_replace_naive(self): + # GH#14621, GH#7825 + ts = Timestamp('2016-01-01 09:00:00') + result = ts.replace(hour=0) + expected = Timestamp('2016-01-01 00:00:00') + assert result == expected + + @pytest.mark.parametrize('tz', timezones) + def test_replace_aware(self, tz): + # GH#14621, GH#7825 + # replacing datetime components with and w/o presence of a timezone + ts = Timestamp('2016-01-01 09:00:00', tz=tz) + result = ts.replace(hour=0) + expected = Timestamp('2016-01-01 00:00:00', tz=tz) + assert result == expected + + @pytest.mark.parametrize('tz', timezones) + def test_replace_preserves_nanos(self, tz): + # GH#14621, GH#7825 + ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) + result = ts.replace(hour=0) + expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz) + assert result == expected + + @pytest.mark.parametrize('tz', timezones) + def test_replace_multiple(self, tz): + # GH#14621, GH#7825 + # replacing datetime components with and w/o presence of a timezone + # test all + ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) + result = ts.replace(year=2015, month=2, day=2, hour=0, minute=5, + second=5, microsecond=5, nanosecond=5) + expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz) + assert result == expected + + @pytest.mark.parametrize('tz', timezones) + def test_replace_invalid_kwarg(self, tz): + # GH#14621, GH#7825 + ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) + with pytest.raises(TypeError): + ts.replace(foo=5) + + @pytest.mark.parametrize('tz', timezones) + def test_replace_integer_args(self, tz): + # GH#14621, GH#7825 + ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) + with pytest.raises(ValueError): + ts.replace(hour=0.1) + + def test_replace_tzinfo_equiv_tz_localize_none(self): + # GH#14621, GH#7825 + # assert conversion to naive is the same as replacing tzinfo with None + ts = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') + assert ts.tz_localize(None) == ts.replace(tzinfo=None) + + @td.skip_if_windows + def test_replace_tzinfo(self): + # GH#15683 + dt = datetime(2016, 3, 27, 1) + tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo + + result_dt = dt.replace(tzinfo=tzinfo) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo) + + if PY3: + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + assert result_dt.timestamp() == result_pd.timestamp() + + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None) + + if PY3: + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + assert result_dt.timestamp() == result_pd.timestamp() + + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + # -------------------------------------------------------------- + + @td.skip_if_windows + def test_timestamp(self): + # GH#17329 + # tz-naive --> treat it as if it were UTC for purposes of timestamp() + ts = Timestamp.now() + uts = ts.replace(tzinfo=utc) + assert ts.timestamp() == uts.timestamp() + + tsc = Timestamp('2014-10-11 11:00:01.12345678', tz='US/Central') + utsc = tsc.tz_convert('UTC') + + # utsc is a different representation of the same time + assert tsc.timestamp() == utsc.timestamp() + + if PY3: + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + + # should agree with datetime.timestamp method + dt = ts.to_pydatetime() + assert dt.timestamp() == ts.timestamp() diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 7ae63d7d080cc..cc5f4d30f9aaf 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -14,7 +14,7 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td import pandas.tseries.offsets as offsets -from pandas.compat import lrange, zip, PY3 +from pandas.compat import lrange, zip from pandas.core.indexes.datetimes import bdate_range, date_range from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas._libs import tslib @@ -1198,65 +1198,23 @@ def test_tz_convert_tzlocal(self): class TestTimeZoneCacheKey(object): - def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): - tzs = pytz.common_timezones - for tz_name in tzs: - if tz_name == 'UTC': - # skip utc as it's a special case in dateutil - continue - tz_p = timezones.maybe_get_tz(tz_name) - tz_d = timezones.maybe_get_tz('dateutil/' + tz_name) - if tz_d is None: - # skip timezones that dateutil doesn't know about. - continue - assert (timezones._p_tz_cache_key(tz_p) != - timezones._p_tz_cache_key(tz_d)) + @pytest.mark.parametrize('tz_name', list(pytz.common_timezones)) + def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self, tz_name): + if tz_name == 'UTC': + # skip utc as it's a special case in dateutil + return + tz_p = timezones.maybe_get_tz(tz_name) + tz_d = timezones.maybe_get_tz('dateutil/' + tz_name) + if tz_d is None: + # skip timezones that dateutil doesn't know about. + return + assert (timezones._p_tz_cache_key(tz_p) != + timezones._p_tz_cache_key(tz_d)) class TestTimeZones(object): timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] - def test_replace(self): - # GH 14621 - # GH 7825 - # replacing datetime components with and w/o presence of a timezone - dt = Timestamp('2016-01-01 09:00:00') - result = dt.replace(hour=0) - expected = Timestamp('2016-01-01 00:00:00') - assert result == expected - - for tz in self.timezones: - dt = Timestamp('2016-01-01 09:00:00', tz=tz) - result = dt.replace(hour=0) - expected = Timestamp('2016-01-01 00:00:00', tz=tz) - assert result == expected - - # we preserve nanoseconds - dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) - result = dt.replace(hour=0) - expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz) - assert result == expected - - # test all - dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) - result = dt.replace(year=2015, month=2, day=2, hour=0, minute=5, - second=5, microsecond=5, nanosecond=5) - expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz) - assert result == expected - - # error - def f(): - dt.replace(foo=5) - pytest.raises(TypeError, f) - - def f(): - dt.replace(hour=0.1) - pytest.raises(ValueError, f) - - # assert conversion to naive is the same as replacing tzinfo with None - dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') - assert dt.tz_localize(None) == dt.replace(tzinfo=None) - def test_ambiguous_compat(self): # validate that pytz and dateutil are compat for dst # when the transition happens @@ -1298,34 +1256,6 @@ def test_ambiguous_compat(self): assert (result_pytz.to_pydatetime().tzname() == result_dateutil.to_pydatetime().tzname()) - @td.skip_if_windows - def test_replace_tzinfo(self): - # GH 15683 - dt = datetime(2016, 3, 27, 1) - tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo - - result_dt = dt.replace(tzinfo=tzinfo) - result_pd = Timestamp(dt).replace(tzinfo=tzinfo) - - if PY3: - # datetime.timestamp() converts in the local timezone - with tm.set_timezone('UTC'): - assert result_dt.timestamp() == result_pd.timestamp() - - assert result_dt == result_pd - assert result_dt == result_pd.to_pydatetime() - - result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None) - result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None) - - if PY3: - # datetime.timestamp() converts in the local timezone - with tm.set_timezone('UTC'): - assert result_dt.timestamp() == result_pd.timestamp() - - assert result_dt == result_pd - assert result_dt == result_pd.to_pydatetime() - def test_index_equals_with_tz(self): left = date_range('1/1/2011', periods=100, freq='H', tz='utc') right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern') From c5da136f7c843fa146533f53038259ec7d74658c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 1 Feb 2018 03:26:45 -0800 Subject: [PATCH 020/217] implement test_scalar_compat (#19479) --- .../indexes/datetimes/test_arithmetic.py | 60 +++++- .../tests/indexes/datetimes/test_datetime.py | 39 +--- .../indexes/datetimes/test_datetimelike.py | 32 +-- pandas/tests/indexes/datetimes/test_misc.py | 78 +------- pandas/tests/indexes/datetimes/test_ops.py | 117 ----------- .../indexes/datetimes/test_scalar_compat.py | 188 ++++++++++++++++++ pandas/tests/indexes/datetimes/test_setops.py | 32 +++ 7 files changed, 284 insertions(+), 262 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_scalar_compat.py diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 480f025db17ca..671071b5e4945 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -10,7 +10,7 @@ import pandas as pd from pandas.compat.numpy import np_datetime64_compat import pandas.util.testing as tm -from pandas.errors import PerformanceWarning +from pandas.errors import PerformanceWarning, NullFrequencyError from pandas import (Timestamp, Timedelta, Series, DatetimeIndex, TimedeltaIndex, date_range) @@ -274,6 +274,64 @@ def test_dti_isub_int(self, tz, one): rng -= one tm.assert_index_equal(rng, expected) + # ------------------------------------------------------------- + # DatetimeIndex.shift is used in integer addition + + def test_dti_shift_tzaware(self, tz): + # GH#9903 + idx = pd.DatetimeIndex([], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' + '2011-01-01 12:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' + '2011-01-01 15:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' + '2011-01-01 09:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + def test_dti_shift_freqs(self): + # test shift for DatetimeIndex and non DatetimeIndex + # GH#8083 + drange = pd.date_range('20130101', periods=5) + result = drange.shift(1) + expected = pd.DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', + '2013-01-06'], freq='D') + tm.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = pd.DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', + '2013-01-03', '2013-01-04'], + freq='D') + tm.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D') + expected = pd.DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', + '2013-01-10', + '2013-01-11'], freq='D') + tm.assert_index_equal(result, expected) + + def test_dti_shift_int(self): + rng = date_range('1/1/2000', periods=20) + + result = rng + 5 + expected = rng.shift(5) + tm.assert_index_equal(result, expected) + + result = rng - 5 + expected = rng.shift(-5) + tm.assert_index_equal(result, expected) + + def test_dti_shift_no_freq(self): + # GH#19147 + dti = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01'], freq=None) + with pytest.raises(NullFrequencyError): + dti.shift(2) + # ------------------------------------------------------------- # Binary operations DatetimeIndex and timedelta-like diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 49f94bfa65543..a75ace2933b71 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -2,7 +2,7 @@ import pytest import numpy as np -from datetime import date, timedelta, time, datetime +from datetime import date, timedelta, time import dateutil import pandas as pd @@ -16,31 +16,6 @@ randn = np.random.randn -class TestDatetimeIndexLikeTimestamp(object): - # Tests for DatetimeIndex behaving like a vectorized Timestamp - - def test_dti_date_out_of_range(self): - # see gh-1475 - pytest.raises(ValueError, DatetimeIndex, ['1400-01-01']) - pytest.raises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) - - def test_timestamp_fields(self): - # extra fields from DatetimeIndex like quarter and week - idx = tm.makeDateIndex(100) - - fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', - 'days_in_month', 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'] - for f in fields: - expected = getattr(idx, f)[-1] - result = getattr(Timestamp(idx[-1]), f) - assert result == expected - - assert idx.freq == Timestamp(idx[-1], idx.freq).freq - assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr - - class TestDatetimeIndex(object): def test_get_loc(self): @@ -371,18 +346,6 @@ def test_isin(self): assert_almost_equal(index.isin([index[2], 5]), np.array([False, False, True, False])) - def test_time(self): - rng = pd.date_range('1/1/2000', freq='12min', periods=10) - result = pd.Index(rng).time - expected = [t.time() for t in rng] - assert (result == expected).all() - - def test_date(self): - rng = pd.date_range('1/1/2000', freq='12H', periods=10) - result = pd.Index(rng).date - expected = [t.date() for t in rng] - assert (result == expected).all() - def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe(10, 10, data_gen_f=lambda *args, **kwargs: randn(), diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 9d6d27ecb4b6f..c6b3a77773dc7 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -1,9 +1,7 @@ """ generic tests from the Datetimelike class """ -import numpy as np -import pandas as pd from pandas.util import testing as tm -from pandas import Series, Index, DatetimeIndex, date_range +from pandas import DatetimeIndex, date_range from ..datetimelike import DatetimeLike @@ -27,31 +25,7 @@ def test_pickle_compat_construction(self): pass def test_intersection(self): - first = self.index - second = self.index[5:] - intersect = first.intersection(second) - assert tm.equalContents(intersect, second) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.intersection(case) - assert tm.equalContents(result, second) - - third = Index(['a', 'b', 'c']) - result = first.intersection(third) - expected = pd.Index([], dtype=object) - tm.assert_index_equal(result, expected) + pass # handled in test_setops def test_union(self): - first = self.index[:5] - second = self.index[5:] - everything = self.index - union = first.union(second) - assert tm.equalContents(union, everything) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.union(case) - assert tm.equalContents(result, everything) + pass # handled in test_setops diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 951aa2c520d0f..4a46c3b04bbad 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -4,53 +4,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import (Index, DatetimeIndex, datetime, offsets, - Float64Index, date_range, Timestamp) - - -class TestDateTimeIndexToJulianDate(object): - - def test_1700(self): - r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, - 2345901.5]) - r2 = date_range(start=Timestamp('1710-10-01'), periods=5, - freq='D').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_2000(self): - r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, - 2451605.5]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='D').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_hour(self): - r1 = Float64Index( - [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, - 2451601.625, 2451601.6666666666666666]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='H').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_minute(self): - r1 = Float64Index( - [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, - 2451601.5020833333333333, 2451601.5027777777777777]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='T').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_second(self): - r1 = Float64Index( - [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, - 2451601.5000347222222222, 2451601.5000462962962962]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='S').to_julian_date() - assert isinstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) + date_range, Timestamp) class TestTimeSeries(object): @@ -129,17 +83,6 @@ def test_range_edges(self): '1970-01-03', '1970-01-04']) tm.assert_index_equal(idx, exp) - def test_datetimeindex_integers_shift(self): - rng = date_range('1/1/2000', periods=20) - - result = rng + 5 - expected = rng.shift(5) - tm.assert_index_equal(result, expected) - - result = rng - 5 - expected = rng.shift(-5) - tm.assert_index_equal(result, expected) - def test_datetimeindex_repr_short(self): dr = date_range(start='1/1/2012', periods=1) repr(dr) @@ -150,25 +93,6 @@ def test_datetimeindex_repr_short(self): dr = date_range(start='1/1/2012', periods=3) repr(dr) - def test_normalize(self): - rng = date_range('1/1/2000 9:30', periods=10, freq='D') - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D') - tm.assert_index_equal(result, expected) - - rng_ns = pd.DatetimeIndex(np.array([1380585623454345752, - 1380585612343234312]).astype( - "datetime64[ns]")) - rng_ns_normalized = rng_ns.normalize() - expected = pd.DatetimeIndex(np.array([1380585600000000000, - 1380585600000000000]).astype( - "datetime64[ns]")) - tm.assert_index_equal(rng_ns_normalized, expected) - - assert result.is_normalized - assert not rng.is_normalized - class TestDatetime64(object): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index fb8dd1a43aa7f..4f386eb28cc0f 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,7 +7,6 @@ from itertools import product import pandas as pd -from pandas.errors import NullFrequencyError import pandas._libs.tslib as tslib from pandas._libs.tslibs.offsets import shift_months import pandas.util.testing as tm @@ -144,76 +143,6 @@ def test_numpy_minmax(self): tm.assert_raises_regex( ValueError, errmsg, np.argmax, dr, out=0) - def test_round_daily(self): - dti = pd.date_range('20130101 09:10:11', periods=5) - result = dti.round('D') - expected = pd.date_range('20130101', periods=5) - tm.assert_index_equal(result, expected) - - dti = dti.tz_localize('UTC').tz_convert('US/Eastern') - result = dti.round('D') - expected = pd.date_range('20130101', - periods=5).tz_localize('US/Eastern') - tm.assert_index_equal(result, expected) - - result = dti.round('s') - tm.assert_index_equal(result, dti) - - # invalid - for freq in ['Y', 'M', 'foobar']: - pytest.raises(ValueError, lambda: dti.round(freq)) - - def test_round(self): - for tz in self.tz: - rng = pd.date_range(start='2016-01-01', periods=5, - freq='30Min', tz=tz) - elt = rng[1] - - expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(rng.round(freq='H'), expected_rng) - assert elt.round(freq='H') == expected_elt - - msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR - with tm.assert_raises_regex(ValueError, msg): - rng.round(freq='foo') - with tm.assert_raises_regex(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assert_raises_regex(ValueError, msg, rng.round, freq='M') - tm.assert_raises_regex(ValueError, msg, elt.round, freq='M') - - # GH 14440 & 15578 - index = pd.DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) - result = index.round('ms') - expected = pd.DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz) - tm.assert_index_equal(result, expected) - - for freq in ['us', 'ns']: - tm.assert_index_equal(index, index.round(freq)) - - index = pd.DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz) - result = index.round('ms') - expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz) - tm.assert_index_equal(result, expected) - - index = pd.DatetimeIndex(['2016-10-17 12:00:00.001501031']) - result = index.round('10ns') - expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001501030']) - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(): - ts = '2016-10-17 12:00:00.001501031' - pd.DatetimeIndex([ts]).round('1010ns') - def test_repeat_range(self): rng = date_range('1/1/2000', '1/1/2001') @@ -586,52 +515,6 @@ def test_nat_new(self): exp = np.array([tslib.iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) - def test_shift_no_freq(self): - # GH#19147 - dti = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01'], freq=None) - with pytest.raises(NullFrequencyError): - dti.shift(2) - - def test_shift(self): - # GH 9903 - for tz in self.tz: - idx = pd.DatetimeIndex([], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' - '2011-01-01 12:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' - '2011-01-01 15:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' - '2011-01-01 09:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - # TODO: moved from test_datetimelike; de-duplicate with test_shift above - def test_shift2(self): - # test shift for datetimeIndex and non datetimeIndex - # GH8083 - drange = pd.date_range('20130101', periods=5) - result = drange.shift(1) - expected = pd.DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', - '2013-01-06'], freq='D') - tm.assert_index_equal(result, expected) - - result = drange.shift(-1) - expected = pd.DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', - '2013-01-03', '2013-01-04'], - freq='D') - tm.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D') - expected = pd.DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', - '2013-01-10', - '2013-01-11'], freq='D') - tm.assert_index_equal(result, expected) - def test_nat(self): assert pd.DatetimeIndex._na_value is pd.NaT assert pd.DatetimeIndex([])._na_value is pd.NaT diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py new file mode 100644 index 0000000000000..111f68ba14775 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +""" +Tests for DatetimeIndex methods behaving like their Timestamp counterparts +""" +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util.testing as tm +import pandas as pd + +from pandas import date_range, Timestamp, DatetimeIndex + + +@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', + 'US/Eastern', 'dateutil/Asia/Singapore', + 'dateutil/US/Pacific']) +def tz(request): + return request.param + + +class TestDatetimeIndexOps(object): + def test_dti_time(self): + rng = date_range('1/1/2000', freq='12min', periods=10) + result = pd.Index(rng).time + expected = [t.time() for t in rng] + assert (result == expected).all() + + def test_dti_date(self): + rng = date_range('1/1/2000', freq='12H', periods=10) + result = pd.Index(rng).date + expected = [t.date() for t in rng] + assert (result == expected).all() + + def test_dti_date_out_of_range(self): + # GH#1475 + pytest.raises(ValueError, DatetimeIndex, ['1400-01-01']) + pytest.raises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) + + def test_dti_timestamp_fields(self): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + + fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', + 'days_in_month', 'is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', 'is_year_start', + 'is_year_end', 'weekday_name'] + for f in fields: + expected = getattr(idx, f)[-1] + result = getattr(Timestamp(idx[-1]), f) + assert result == expected + + assert idx.freq == Timestamp(idx[-1], idx.freq).freq + assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr + + # ---------------------------------------------------------------- + # DatetimeIndex.round + + def test_round_daily(self): + dti = date_range('20130101 09:10:11', periods=5) + result = dti.round('D') + expected = date_range('20130101', periods=5) + tm.assert_index_equal(result, expected) + + dti = dti.tz_localize('UTC').tz_convert('US/Eastern') + result = dti.round('D') + expected = date_range('20130101', + periods=5).tz_localize('US/Eastern') + tm.assert_index_equal(result, expected) + + result = dti.round('s') + tm.assert_index_equal(result, dti) + + # invalid + for freq in ['Y', 'M', 'foobar']: + pytest.raises(ValueError, lambda: dti.round(freq)) + + def test_round(self, tz): + rng = date_range(start='2016-01-01', periods=5, + freq='30Min', tz=tz) + elt = rng[1] + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(rng.round(freq='H'), expected_rng) + assert elt.round(freq='H') == expected_elt + + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): + rng.round(freq='foo') + with tm.assert_raises_regex(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + tm.assert_raises_regex(ValueError, msg, rng.round, freq='M') + tm.assert_raises_regex(ValueError, msg, elt.round, freq='M') + + # GH#14440 & GH#15578 + index = DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) + result = index.round('ms') + expected = DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz) + tm.assert_index_equal(result, expected) + + for freq in ['us', 'ns']: + tm.assert_index_equal(index, index.round(freq)) + + index = DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz) + result = index.round('ms') + expected = DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz) + tm.assert_index_equal(result, expected) + + index = DatetimeIndex(['2016-10-17 12:00:00.001501031']) + result = index.round('10ns') + expected = DatetimeIndex(['2016-10-17 12:00:00.001501030']) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(): + ts = '2016-10-17 12:00:00.001501031' + DatetimeIndex([ts]).round('1010ns') + + # ---------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize(self): + rng = date_range('1/1/2000 9:30', periods=10, freq='D') + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D') + tm.assert_index_equal(result, expected) + + arr_ns = np.array([1380585623454345752, + 1380585612343234312]).astype("datetime64[ns]") + rng_ns = DatetimeIndex(arr_ns) + rng_ns_normalized = rng_ns.normalize() + + arr_ns = np.array([1380585600000000000, + 1380585600000000000]).astype("datetime64[ns]") + expected = DatetimeIndex(arr_ns) + tm.assert_index_equal(rng_ns_normalized, expected) + + assert result.is_normalized + assert not rng.is_normalized + + +class TestDateTimeIndexToJulianDate(object): + + def test_1700(self): + dr = date_range(start=Timestamp('1710-10-01'), periods=5, freq='D') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='D') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='H') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='T') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='S') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index b74da4922429d..84632e59e2bfb 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -17,6 +17,20 @@ class TestDatetimeIndexSetOps(object): tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', 'dateutil/US/Pacific'] + # TODO: moved from test_datetimelike; dedup with version below + def test_union2(self): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + union = first.union(second) + assert tm.equalContents(union, everything) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.union(case) + assert tm.equalContents(result, everything) + @pytest.mark.parametrize("tz", tz) def test_union(self, tz): rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) @@ -101,6 +115,24 @@ def test_union_with_DatetimeIndex(self): i1.union(i2) # Works i2.union(i1) # Fails with "AttributeError: can't set attribute" + # TODO: moved from test_datetimelike; de-duplicate with version below + def test_intersection2(self): + first = tm.makeDateIndex(10) + second = first[5:] + intersect = first.intersection(second) + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + assert tm.equalContents(result, second) + + third = Index(['a', 'b', 'c']) + result = first.intersection(third) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("tz", [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']) def test_intersection(self, tz): From 72329323056b3b2fe82a1fe170c618c3429b4e60 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 1 Feb 2018 03:33:00 -0800 Subject: [PATCH 021/217] Refactor out libwriters, fix references to Timestamp, Timedelta (#19413) --- pandas/_libs/lib.pyx | 196 +----------------- pandas/_libs/parsers.pyx | 34 +++ pandas/_libs/src/inference.pyx | 26 +-- pandas/_libs/writers.pyx | 174 ++++++++++++++++ pandas/core/computation/scope.py | 2 +- pandas/core/dtypes/cast.py | 2 +- pandas/core/generic.py | 8 +- pandas/core/internals.py | 4 +- pandas/core/nanops.py | 4 +- pandas/core/resample.py | 2 +- pandas/io/formats/format.py | 5 +- pandas/io/json/normalize.py | 2 +- pandas/io/parsers.py | 5 +- pandas/io/pytables.py | 13 +- pandas/io/stata.py | 3 +- pandas/plotting/_converter.py | 6 +- .../indexes/datetimes/test_construction.py | 3 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/indexes/test_numeric.py | 2 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/io/parser/common.py | 2 +- pandas/tests/io/parser/converters.py | 2 +- pandas/tests/io/parser/parse_dates.py | 2 +- pandas/tests/io/parser/test_parsers.py | 2 +- pandas/tests/io/parser/usecols.py | 2 +- pandas/tests/series/test_indexing.py | 4 +- pandas/tests/test_lib.py | 10 +- setup.py | 4 + 29 files changed, 262 insertions(+), 263 deletions(-) create mode 100644 pandas/_libs/writers.pyx diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c3a654b01022c..e1d59f807a7fd 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -21,14 +21,7 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyBytes_Check, PyUnicode_Check, PyTuple_New, - PyObject_RichCompareBool, - PyBytes_GET_SIZE, - PyUnicode_GET_SIZE) - -try: - from cpython cimport PyString_GET_SIZE -except ImportError: - from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE + PyObject_RichCompareBool) cimport cpython @@ -38,7 +31,7 @@ from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyDateTime_IMPORT) PyDateTime_IMPORT -from tslib import NaT, Timestamp, Timedelta, array_to_datetime +from tslib import NaT, array_to_datetime from missing cimport checknull @@ -127,28 +120,6 @@ def item_from_zerodim(object val): return util.unbox_if_zerodim(val) -@cython.wraparound(False) -@cython.boundscheck(False) -def fast_unique(ndarray[object] values): - cdef: - Py_ssize_t i, n = len(values) - list uniques = [] - dict table = {} - object val, stub = 0 - - for i from 0 <= i < n: - val = values[i] - if val not in table: - table[val] = stub - uniques.append(val) - try: - uniques.sort() - except Exception: - pass - - return uniques - - @cython.wraparound(False) @cython.boundscheck(False) def fast_unique_multiple(list arrays): @@ -368,30 +339,6 @@ def has_infs_f8(ndarray[float64_t] arr): return False -def convert_timestamps(ndarray values): - cdef: - object val, f, result - dict cache = {} - Py_ssize_t i, n = len(values) - ndarray[object] out - - # for HDFStore, a bit temporary but... - - from datetime import datetime - f = datetime.fromtimestamp - - out = np.empty(n, dtype='O') - - for i in range(n): - val = util.get_value_1d(values, i) - if val in cache: - out[i] = cache[val] - else: - cache[val] = out[i] = f(val) - - return out - - def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) @@ -731,145 +678,6 @@ def clean_index_list(list obj): return np.asarray(obj), 0 -ctypedef fused pandas_string: - str - unicode - bytes - - -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): - """ return the maximum size of elements in a 1-dim string array """ - cdef: - Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] - pandas_string v - - for i in range(length): - v = arr[i] - if PyString_Check(v): - l = PyString_GET_SIZE(v) - elif PyBytes_Check(v): - l = PyBytes_GET_SIZE(v) - elif PyUnicode_Check(v): - l = PyUnicode_GET_SIZE(v) - - if l > m: - m = l - - return m - - -@cython.boundscheck(False) -@cython.wraparound(False) -def string_array_replace_from_nan_rep( - ndarray[object, ndim=1] arr, object nan_rep, - object replace=None): - """ - Replace the values in the array with 'replacement' if - they are 'nan_rep'. Return the same array. - """ - - cdef int length = arr.shape[0], i = 0 - if replace is None: - replace = np.nan - - for i from 0 <= i < length: - if arr[i] == nan_rep: - arr[i] = replace - - return arr - - -@cython.boundscheck(False) -@cython.wraparound(False) -def convert_json_to_lines(object arr): - """ - replace comma separated json with line feeds, paying special attention - to quotes & brackets - """ - cdef: - Py_ssize_t i = 0, num_open_brackets_seen = 0, length - bint in_quotes = 0, is_escaping = 0 - ndarray[uint8_t] narr - unsigned char v, comma, left_bracket, right_brack, newline - - newline = ord('\n') - comma = ord(',') - left_bracket = ord('{') - right_bracket = ord('}') - quote = ord('"') - backslash = ord('\\') - - narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() - length = narr.shape[0] - for i in range(length): - v = narr[i] - if v == quote and i > 0 and not is_escaping: - in_quotes = ~in_quotes - if v == backslash or is_escaping: - is_escaping = ~is_escaping - if v == comma: # commas that should be \n - if num_open_brackets_seen == 0 and not in_quotes: - narr[i] = newline - elif v == left_bracket: - if not in_quotes: - num_open_brackets_seen += 1 - elif v == right_bracket: - if not in_quotes: - num_open_brackets_seen -= 1 - - return narr.tostring().decode('utf-8') - - -@cython.boundscheck(False) -@cython.wraparound(False) -def write_csv_rows(list data, ndarray data_index, - int nlevels, ndarray cols, object writer): - - cdef int N, j, i, ncols - cdef list rows - cdef object val - - # In crude testing, N>100 yields little marginal improvement - N=100 - - # pre-allocate rows - ncols = len(cols) - rows = [[None] * (nlevels + ncols) for x in range(N)] - - j = -1 - if nlevels == 1: - for j in range(len(data_index)): - row = rows[j % N] - row[0] = data_index[j] - for i in range(ncols): - row[1 + i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - elif nlevels > 1: - for j in range(len(data_index)): - row = rows[j % N] - row[:nlevels] = list(data_index[j]) - for i in range(ncols): - row[nlevels + i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - else: - for j in range(len(data_index)): - row = rows[j % N] - for i in range(ncols): - row[i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - - if j >= 0 and (j < N - 1 or (j % N) != N - 1): - writer.writerows(rows[:((j + 1) % N)]) - - # ------------------------------------------------------------------------------ # Groupby-related functions diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index efe61716d0831..89d2de6de213a 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -2225,3 +2225,37 @@ def _maybe_encode(values): if values is None: return [] return [x.encode('utf-8') if isinstance(x, unicode) else x for x in values] + + +def sanitize_objects(ndarray[object] values, set na_values, + convert_empty=True): + """ + Convert specified values, including the given set na_values and empty + strings if convert_empty is True, to np.nan. + + Parameters + ---------- + values : ndarray[object] + na_values : set + convert_empty : bool (default True) + """ + cdef: + Py_ssize_t i, n + object val, onan + Py_ssize_t na_count = 0 + dict memo = {} + + n = len(values) + onan = np.nan + + for i from 0 <= i < n: + val = values[i] + if (convert_empty and val == '') or (val in na_values): + values[i] = onan + na_count += 1 + elif val in memo: + values[i] = memo[val] + else: + memo[val] = val + + return na_count diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index b29a2e519efcd..75bff34e4a391 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -6,7 +6,7 @@ from tslibs.nattype import NaT from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone, tz_compare -from datetime import datetime, timedelta + iNaT = util.get_nat() cdef bint PY2 = sys.version_info[0] == 2 @@ -1405,30 +1405,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return objects -def sanitize_objects(ndarray[object] values, set na_values, - convert_empty=True): - cdef: - Py_ssize_t i, n - object val, onan - Py_ssize_t na_count = 0 - dict memo = {} - - n = len(values) - onan = np.nan - - for i from 0 <= i < n: - val = values[i] - if (convert_empty and val == '') or (val in na_values): - values[i] = onan - na_count += 1 - elif val in memo: - values[i] = memo[val] - else: - memo[val] = val - - return na_count - - def maybe_convert_bool(ndarray[object] arr, true_values=None, false_values=None): cdef: diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx new file mode 100644 index 0000000000000..6f07d04b3fad3 --- /dev/null +++ b/pandas/_libs/writers.pyx @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- + +cimport cython +from cython cimport Py_ssize_t + +from cpython cimport (PyString_Check, PyBytes_Check, PyUnicode_Check, + PyBytes_GET_SIZE, PyUnicode_GET_SIZE) + +try: + from cpython cimport PyString_GET_SIZE +except ImportError: + from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, uint8_t +cnp.import_array() + +cimport util + + +ctypedef fused pandas_string: + str + unicode + bytes + + +@cython.boundscheck(False) +@cython.wraparound(False) +def write_csv_rows(list data, ndarray data_index, + int nlevels, ndarray cols, object writer): + """ + Write the given data to the writer object, pre-allocating where possible + for performance improvements. + + Parameters + ---------- + data : list + data_index : ndarray + nlevels : int + cols : ndarray + writer : object + """ + cdef int N, j, i, ncols + cdef list rows + cdef object val + + # In crude testing, N>100 yields little marginal improvement + N = 100 + + # pre-allocate rows + ncols = len(cols) + rows = [[None] * (nlevels + ncols) for x in range(N)] + + j = -1 + if nlevels == 1: + for j in range(len(data_index)): + row = rows[j % N] + row[0] = data_index[j] + for i in range(ncols): + row[1 + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + elif nlevels > 1: + for j in range(len(data_index)): + row = rows[j % N] + row[:nlevels] = list(data_index[j]) + for i in range(ncols): + row[nlevels + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + else: + for j in range(len(data_index)): + row = rows[j % N] + for i in range(ncols): + row[i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + + if j >= 0 and (j < N - 1 or (j % N) != N - 1): + writer.writerows(rows[:((j + 1) % N)]) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def convert_json_to_lines(object arr): + """ + replace comma separated json with line feeds, paying special attention + to quotes & brackets + """ + cdef: + Py_ssize_t i = 0, num_open_brackets_seen = 0, length + bint in_quotes = 0, is_escaping = 0 + ndarray[uint8_t] narr + unsigned char v, comma, left_bracket, right_brack, newline + + newline = ord('\n') + comma = ord(',') + left_bracket = ord('{') + right_bracket = ord('}') + quote = ord('"') + backslash = ord('\\') + + narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() + length = narr.shape[0] + for i in range(length): + v = narr[i] + if v == quote and i > 0 and not is_escaping: + in_quotes = ~in_quotes + if v == backslash or is_escaping: + is_escaping = ~is_escaping + if v == comma: # commas that should be \n + if num_open_brackets_seen == 0 and not in_quotes: + narr[i] = newline + elif v == left_bracket: + if not in_quotes: + num_open_brackets_seen += 1 + elif v == right_bracket: + if not in_quotes: + num_open_brackets_seen -= 1 + + return narr.tostring().decode('utf-8') + + +# stata, pytables +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): + """ return the maximum size of elements in a 1-dim string array """ + cdef: + Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] + pandas_string v + + for i in range(length): + v = arr[i] + if PyString_Check(v): + l = PyString_GET_SIZE(v) + elif PyBytes_Check(v): + l = PyBytes_GET_SIZE(v) + elif PyUnicode_Check(v): + l = PyUnicode_GET_SIZE(v) + + if l > m: + m = l + + return m + + +# ------------------------------------------------------------------ +# PyTables Helpers + + +@cython.boundscheck(False) +@cython.wraparound(False) +def string_array_replace_from_nan_rep( + ndarray[object, ndim=1] arr, object nan_rep, + object replace=None): + """ + Replace the values in the array with 'replacement' if + they are 'nan_rep'. Return the same array. + """ + + cdef int length = arr.shape[0], i = 0 + if replace is None: + replace = np.nan + + for i from 0 <= i < length: + if arr[i] == nan_rep: + arr[i] = replace + + return arr diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 6a298f5137eb1..c3128be0f5599 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -48,7 +48,7 @@ def _raw_hex_id(obj): _DEFAULT_GLOBALS = { - 'Timestamp': pandas._libs.lib.Timestamp, + 'Timestamp': pandas._libs.tslib.Timestamp, 'datetime': datetime.datetime, 'True': True, 'False': False, diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5155662d2f97d..b2816343fc8eb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -282,7 +282,7 @@ def maybe_promote(dtype, fill_value=np.nan): fill_value = iNaT elif issubclass(dtype.type, np.timedelta64): try: - fill_value = lib.Timedelta(fill_value).value + fill_value = tslib.Timedelta(fill_value).value except Exception: # as for datetimes, cannot upcast to object fill_value = iNaT diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bee954aa9bba8..5a15d720c5790 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from pandas._libs import tslib, lib, properties +from pandas._libs import tslib, properties from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, @@ -7216,9 +7216,9 @@ def describe_categorical_1d(data): if is_datetime64_dtype(data): asint = data.dropna().values.view('i8') names += ['top', 'freq', 'first', 'last'] - result += [lib.Timestamp(top), freq, - lib.Timestamp(asint.min()), - lib.Timestamp(asint.max())] + result += [tslib.Timestamp(top), freq, + tslib.Timestamp(asint.min()), + tslib.Timestamp(asint.max())] else: names += ['top', 'freq'] result += [top, freq] diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f3e5e4c99a899..22d38d3df071e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2656,7 +2656,7 @@ def _try_coerce_args(self, values, other): other = other.asi8 other_mask = isna(other) elif isinstance(other, (np.datetime64, datetime, date)): - other = lib.Timestamp(other) + other = tslib.Timestamp(other) tz = getattr(other, 'tz', None) # test we can have an equal time zone @@ -2675,7 +2675,7 @@ def _try_coerce_result(self, result): if result.dtype.kind in ['i', 'f', 'O']: result = result.astype('M8[ns]') elif isinstance(result, (np.integer, np.float, np.datetime64)): - result = lib.Timestamp(result, tz=self.values.tz) + result = tslib.Timestamp(result, tz=self.values.tz) if isinstance(result, np.ndarray): # allow passing of > 1dim if its trivial if result.ndim > 1: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index eda86f12d501d..d4851f579dda4 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -268,7 +268,7 @@ def _wrap_results(result, dtype): if is_datetime64_dtype(dtype): if not isinstance(result, np.ndarray): - result = lib.Timestamp(result) + result = tslib.Timestamp(result) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): @@ -278,7 +278,7 @@ def _wrap_results(result, dtype): if np.fabs(result) > _int64_max: raise ValueError("overflow in timedelta operation") - result = lib.Timedelta(result, unit='ns') + result = tslib.Timedelta(result, unit='ns') else: result = result.astype('i8').view(dtype) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 706bec9e44892..961c8c004e9e3 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -24,7 +24,7 @@ from pandas.compat.numpy import function as nv from pandas._libs import lib, tslib -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.util._decorators import Appender, Substitution diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index bca0b64cb53fe..269c81b380b5e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -38,7 +38,7 @@ _stringify_path) from pandas.io.formats.printing import adjoin, justify, pprint_thing from pandas.io.formats.common import get_level_lengths -from pandas._libs import lib +from pandas._libs import lib, writers as libwriters from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, format_array_from_datetime) from pandas.core.indexes.datetimes import DatetimeIndex @@ -1789,7 +1789,8 @@ def _save_chunk(self, start_i, end_i): date_format=self.date_format, quoting=self.quoting) - lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer) + libwriters.write_csv_rows(self.data, ix, self.nlevels, + self.cols, self.writer) # ---------------------------------------------------------------------- diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 595031b04e367..c7901f4352d00 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -5,7 +5,7 @@ from collections import defaultdict import numpy as np -from pandas._libs.lib import convert_json_to_lines +from pandas._libs.writers import convert_json_to_lines from pandas import compat, DataFrame diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5135bb01fb378..af1441f4a0fc9 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1596,11 +1596,12 @@ def _infer_types(self, values, na_values, try_num_bool=True): except Exception: result = values if values.dtype == np.object_: - na_count = lib.sanitize_objects(result, na_values, False) + na_count = parsers.sanitize_objects(result, na_values, + False) else: result = values if values.dtype == np.object_: - na_count = lib.sanitize_objects(values, na_values, False) + na_count = parsers.sanitize_objects(values, na_values, False) if result.dtype == np.object_ and try_num_bool: result = lib.maybe_convert_bool(values, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5376473f83f22..0d833807602e1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -47,7 +47,7 @@ from pandas.core.config import get_option from pandas.core.computation.pytables import Expr, maybe_expression -from pandas._libs import algos, lib +from pandas._libs import algos, lib, writers as libwriters from pandas._libs.tslibs import timezones from distutils.version import LooseVersion @@ -3843,7 +3843,7 @@ def read(self, where=None, columns=None, **kwargs): # need a better algorithm tuple_index = long_index.values - unique_tuples = lib.fast_unique(tuple_index) + unique_tuples = unique(tuple_index) unique_tuples = com._asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) @@ -4561,7 +4561,8 @@ def _convert_string_array(data, encoding, itemsize=None): # create the sized dtype if itemsize is None: - itemsize = lib.max_len_string_array(_ensure_object(data.ravel())) + ensured = _ensure_object(data.ravel()) + itemsize = libwriters.max_len_string_array(ensured) data = np.asarray(data, dtype="S%d" % itemsize) return data @@ -4590,7 +4591,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None): encoding = _ensure_encoding(encoding) if encoding is not None and len(data): - itemsize = lib.max_len_string_array(_ensure_object(data)) + itemsize = libwriters.max_len_string_array(_ensure_object(data)) if compat.PY3: dtype = "U{0}".format(itemsize) else: @@ -4604,7 +4605,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None): if nan_rep is None: nan_rep = 'nan' - data = lib.string_array_replace_from_nan_rep(data, nan_rep) + data = libwriters.string_array_replace_from_nan_rep(data, nan_rep) return data.reshape(shape) @@ -4621,7 +4622,7 @@ def _get_converter(kind, encoding): if kind == 'datetime64': return lambda x: np.asarray(x, dtype='M8[ns]') elif kind == 'datetime': - return lib.convert_timestamps + return lambda x: to_datetime(x, cache=True).to_pydatetime() elif kind == 'string': return lambda x: _unconvert_string_array(x, encoding=encoding) else: # pragma: no cover diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 0922a4a9c3e9b..adbff06364dbe 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -17,8 +17,9 @@ import numpy as np from dateutil.relativedelta import relativedelta -from pandas._libs.lib import max_len_string_array, infer_dtype +from pandas._libs.lib import infer_dtype from pandas._libs.tslib import NaT, Timestamp +from pandas._libs.writers import max_len_string_array import pandas as pd from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 66ee7fa98491f..07163615c6ba4 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -23,7 +23,7 @@ from pandas.compat import lrange import pandas.compat as compat -import pandas._libs.lib as lib +from pandas._libs import tslib import pandas.core.common as com from pandas.core.index import Index @@ -52,7 +52,7 @@ def get_pairs(): pairs = [ - (lib.Timestamp, DatetimeConverter), + (tslib.Timestamp, DatetimeConverter), (Period, PeriodConverter), (pydt.datetime, DatetimeConverter), (pydt.date, DatetimeConverter), @@ -312,7 +312,7 @@ def try_parse(values): if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, np.datetime64): - return _dt_to_float_ordinal(lib.Timestamp(values)) + return _dt_to_float_ordinal(tslib.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) elif (is_integer(values) or is_float(values)): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index b59dd25ead57f..197a42bdaacbb 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -7,7 +7,6 @@ import pandas as pd from pandas import offsets import pandas.util.testing as tm -from pandas._libs import lib from pandas._libs.tslib import OutOfBoundsDatetime from pandas._libs.tslibs import conversion from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, @@ -537,7 +536,7 @@ def test_datetimeindex_constructor_misc(self): arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] idx2 = DatetimeIndex(arr) - arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', + arr = [Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', '2005-01-04'] idx3 = DatetimeIndex(arr) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 508c3a73f48c7..974099f1fbbe9 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -29,7 +29,7 @@ from pandas.core.indexes.datetimes import _to_m8 import pandas as pd -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp class TestIndex(Base): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index aedc957ec67da..e59456b8a2d5e 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -19,7 +19,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.indexes.base import InvalidIndexError from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp import pandas.util.testing as tm diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 3de1c4c982654..0c1bec7a6f1a9 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -13,7 +13,7 @@ import pandas.util.testing as tm import pandas as pd -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas.tests.indexes.common import Base diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index cd1685f282bd2..e949772981eb7 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -425,7 +425,7 @@ def test_npy_nat(self): assert ujson.encode(input) == 'null', "Expected null" def test_datetime_units(self): - from pandas._libs.lib import Timestamp + from pandas._libs.tslib import Timestamp val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index f677b356a77a5..cf7ec9e2f2652 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -12,7 +12,7 @@ import pytest import numpy as np -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp import pandas as pd import pandas.util.testing as tm diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index 1176b1e84e29b..ae35d45591dc5 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -13,7 +13,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas import DataFrame, Index from pandas.compat import parse_date, StringIO, lmap diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index b7d0dd1a3484f..919b357f14236 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -11,7 +11,7 @@ import pytest import numpy as np from pandas._libs.tslibs import parsing -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp import pandas as pd import pandas.io.parsers as parsers diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index ec240531925e3..7717102b64fc5 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -5,7 +5,7 @@ from pandas import read_csv, read_table, DataFrame import pandas.core.common as com -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas.compat import StringIO from .common import ParserTests diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 8767055239cd5..195fb4cba2aed 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -11,7 +11,7 @@ import pandas.util.testing as tm from pandas import DataFrame, Index -from pandas._libs.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas.compat import StringIO diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index fbfbad547ce1b..e5c3d6f7d3ee1 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -17,7 +17,7 @@ Categorical) from pandas.core.indexing import IndexingError from pandas.tseries.offsets import BDay -from pandas._libs import tslib, lib +from pandas._libs import tslib from pandas.compat import lrange, range from pandas import compat @@ -2707,7 +2707,7 @@ def test_fancy_getitem(self): assert s['1/2/2009'] == 48 assert s['2009-1-2'] == 48 assert s[datetime(2009, 1, 2)] == 48 - assert s[lib.Timestamp(datetime(2009, 1, 2))] == 48 + assert s[Timestamp(datetime(2009, 1, 2))] == 48 pytest.raises(KeyError, s.__getitem__, '2009-1-3') assert_series_equal(s['3/6/2009':'2009-06-05'], diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 10061204df42a..502f0c3bced61 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -3,7 +3,7 @@ import pytest import numpy as np -from pandas._libs import lib +from pandas._libs import lib, writers as libwriters import pandas.util.testing as tm @@ -12,19 +12,19 @@ class TestMisc(object): def test_max_len_string_array(self): arr = a = np.array(['foo', 'b', np.nan], dtype='object') - assert lib.max_len_string_array(arr) == 3 + assert libwriters.max_len_string_array(arr) == 3 # unicode arr = a.astype('U').astype(object) - assert lib.max_len_string_array(arr) == 3 + assert libwriters.max_len_string_array(arr) == 3 # bytes for python3 arr = a.astype('S').astype(object) - assert lib.max_len_string_array(arr) == 3 + assert libwriters.max_len_string_array(arr) == 3 # raises pytest.raises(TypeError, - lambda: lib.max_len_string_array(arr.astype('U'))) + lambda: libwriters.max_len_string_array(arr.astype('U'))) def test_fast_unique_multiple_list_gen_sort(self): keys = [['p', 'a'], ['n', 'd'], ['a', 's']] diff --git a/setup.py b/setup.py index 27943a776c414..5397a1b84dc4d 100755 --- a/setup.py +++ b/setup.py @@ -328,6 +328,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/_libs/tslibs/resolution.pyx', 'pandas/_libs/tslibs/parsing.pyx', + 'pandas/_libs/writers.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): @@ -616,6 +617,9 @@ def pxd(name): '_libs.window': { 'pyxfile': '_libs/window', 'pxdfiles': ['_libs/skiplist', '_libs/src/util']}, + '_libs.writers': { + 'pyxfile': '_libs/writers', + 'pxdfiles': ['_libs/src/util']}, 'io.sas._sas': { 'pyxfile': 'io/sas/sas'}} From 23cfb3867e4a2584b9bd392e9bcbe0e0c2cc4eaa Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 1 Feb 2018 07:45:15 -0500 Subject: [PATCH 022/217] PERF: remove use of Panel & perf in rolling corr/cov (#19257) * PERF: remove use of Panel & perf in rolling corr/cov closes #17917 --- asv_bench/benchmarks/rolling.py | 25 ++++++++++++++++-- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/reshape/pivot.py | 8 ++---- pandas/core/window.py | 47 +++++++++++++++++++++------------ pandas/tests/test_window.py | 22 ++++++++------- 5 files changed, 69 insertions(+), 35 deletions(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 59cf7d090a622..75990d83f8212 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -11,8 +11,8 @@ class Methods(object): [10, 1000], ['int', 'float'], ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt', - 'sum', 'corr', 'cov']) - param_names = ['constructor', 'window', 'dtype', 'method'] + 'sum']) + param_names = ['contructor', 'window', 'dtype', 'method'] def setup(self, constructor, window, dtype, method): N = 10**5 @@ -23,6 +23,27 @@ def time_rolling(self, constructor, window, dtype, method): getattr(self.roll, method)() +class Pairwise(object): + + sample_time = 0.2 + params = ([10, 1000, None], + ['corr', 'cov'], + [True, False]) + param_names = ['window', 'method', 'pairwise'] + + def setup(self, window, method, pairwise): + N = 10**4 + arr = np.random.random(N) + self.df = pd.DataFrame(arr) + + def time_pairwise(self, window, method, pairwise): + if window is None: + r = self.df.expanding() + else: + r = self.df.rolling(window=window) + getattr(r, method)(self.df, pairwise=pairwise) + + class Quantile(object): sample_time = 0.2 diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 2bd2bb199bf1f..5db29cb76b106 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -383,7 +383,7 @@ Performance Improvements - :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) - Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) - Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) - +- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) .. _whatsnew_0230.docs: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0e92fc4edce85..a4c9848dca900 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -99,19 +99,15 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if not dropna: from pandas import MultiIndex - try: + if table.index.nlevels > 1: m = MultiIndex.from_arrays(cartesian_product(table.index.levels), names=table.index.names) table = table.reindex(m, axis=0) - except AttributeError: - pass # it's a single level - try: + if table.columns.nlevels > 1: m = MultiIndex.from_arrays(cartesian_product(table.columns.levels), names=table.columns.names) table = table.reindex(m, axis=1) - except AttributeError: - pass # it's a single level or a series if isinstance(table, ABCDataFrame): table = table.sort_index(axis=1) diff --git a/pandas/core/window.py b/pandas/core/window.py index 4d6a1de60f59b..a3f19ef50459d 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1863,25 +1863,38 @@ def dataframe_from_int_dict(data, frame_template): results[i][j] = f(*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])) - # TODO: not the most efficient (perf-wise) - # though not bad code-wise - from pandas import Panel, MultiIndex, concat - - with warnings.catch_warnings(record=True): - p = Panel.from_dict(results).swapaxes('items', 'major') - if len(p.major_axis) > 0: - p.major_axis = arg1.columns[p.major_axis] - if len(p.minor_axis) > 0: - p.minor_axis = arg2.columns[p.minor_axis] - - if len(p.items): + from pandas import MultiIndex, concat + + result_index = arg1.index.union(arg2.index) + if len(result_index): + + # construct result frame result = concat( - [p.iloc[i].T for i in range(len(p.items))], - keys=p.items) + [concat([results[i][j] + for j, c in enumerate(arg2.columns)], + ignore_index=True) + for i, c in enumerate(arg1.columns)], + ignore_index=True, + axis=1) + result.columns = arg1.columns + + # set the index and reorder + if arg2.columns.nlevels > 1: + result.index = MultiIndex.from_product( + arg2.columns.levels + [result_index]) + result = result.reorder_levels([2, 0, 1]).sort_index() + else: + result.index = MultiIndex.from_product( + [range(len(arg2.columns)), + range(len(result_index))]) + result = result.swaplevel(1, 0).sort_index() + result.index = MultiIndex.from_product( + [result_index] + [arg2.columns]) else: + # empty result result = DataFrame( - index=MultiIndex(levels=[arg1.index, arg1.columns], + index=MultiIndex(levels=[arg1.index, arg2.columns], labels=[[], []]), columns=arg2.columns, dtype='float64') @@ -1890,9 +1903,9 @@ def dataframe_from_int_dict(data, frame_template): # reset our column names to arg2 names # careful not to mutate the original names result.columns = result.columns.set_names( - arg2.columns.names) + arg1.columns.names) result.index = result.index.set_names( - arg1.index.names + arg1.columns.names) + result_index.names + arg2.columns.names) return result diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 22526d14a7168..dabdb1e8e689c 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -14,6 +14,7 @@ import pandas.tseries.offsets as offsets from pandas.core.base import SpecificationError from pandas.errors import UnsupportedFunctionCall +from pandas.core.sorting import safe_sort import pandas.util.testing as tm import pandas.util._test_decorators as td from pandas.compat import range, zip @@ -1645,7 +1646,7 @@ def compare(self, result, expected): result = result.dropna().values expected = expected.dropna().values - tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) @pytest.mark.parametrize('f', [lambda x: x.cov(), lambda x: x.corr()]) def test_no_flex(self, f): @@ -1670,15 +1671,19 @@ def test_no_flex(self, f): def test_pairwise_with_self(self, f): # DataFrame with itself, pairwise=True - results = [f(df) for df in self.df1s] - for (df, result) in zip(self.df1s, results): + # note that we may construct the 1st level of the MI + # in a non-motononic way, so compare accordingly + results = [] + for i, df in enumerate(self.df1s): + result = f(df) tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) - tm.assert_index_equal(result.index.levels[1], - df.columns, - check_names=False) + tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), + safe_sort(df.columns.unique())) tm.assert_index_equal(result.columns, df.columns) + results.append(df) + for i, result in enumerate(results): if i > 0: self.compare(result, results[0]) @@ -1716,9 +1721,8 @@ def test_pairwise_with_other(self, f): tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) - tm.assert_index_equal(result.index.levels[1], - self.df2.columns, - check_names=False) + tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), + safe_sort(self.df2.columns.unique())) for i, result in enumerate(results): if i > 0: self.compare(result, results[0]) From 05400a1e81259941d5f0b5c5475e1de91e1e538e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 1 Feb 2018 07:54:56 -0500 Subject: [PATCH 023/217] TST: fix up pandas_datareader downstream tests (#19490) closes #18935 --- pandas/tests/test_downstream.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 0f0abd8cd3400..b438d6a6137b0 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -89,7 +89,8 @@ def test_pandas_gbq(df): def test_pandas_datareader(): pandas_datareader = import_module('pandas_datareader') # noqa - pandas_datareader.get_data_google('AAPL') + pandas_datareader.DataReader( + 'F', 'quandl', '2017-01-01', '2017-02-01') def test_geopandas(): From e2954354fb895e8e3be2c2bc7d14ecc4c551912e Mon Sep 17 00:00:00 2001 From: Matt Kirk Date: Thu, 1 Feb 2018 20:09:17 +0700 Subject: [PATCH 024/217] BUG: fix issue with concat creating SparseFrame if not all series are sparse. (#18924) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/dtypes/concat.py | 10 ++- pandas/core/dtypes/generic.py | 2 + pandas/tests/dtypes/test_generic.py | 2 + pandas/tests/reshape/test_reshape.py | 9 +++ pandas/tests/sparse/test_combine_concat.py | 85 +++++++++++++--------- 6 files changed, 71 insertions(+), 38 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 5db29cb76b106..6cbdc3be07f13 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -540,6 +540,7 @@ Reshaping - Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) - Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) - Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) +- Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) - diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 3e54ce61cd5b2..ddecbe85087d8 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -19,7 +19,7 @@ _TD_DTYPE) from pandas.core.dtypes.generic import ( ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex, ABCRangeIndex) + ABCPeriodIndex, ABCRangeIndex, ABCSparseDataFrame) def get_dtype_kinds(l): @@ -89,14 +89,16 @@ def _get_series_result_type(result, objs=None): def _get_frame_result_type(result, objs): """ return appropriate class of DataFrame-like concat - if any block is SparseBlock, return SparseDataFrame + if all blocks are SparseBlock, return SparseDataFrame otherwise, return 1st obj """ - if any(b.is_sparse for b in result.blocks): + + if result.blocks and all(b.is_sparse for b in result.blocks): from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: - return objs[0] + return next(obj for obj in objs if not isinstance(obj, + ABCSparseDataFrame)) def _concat_compat(to_concat, axis=0): diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 6fae09c43d2be..b032cb6f14d4c 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -43,6 +43,8 @@ def _check(cls, inst): ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series", )) ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe", )) +ABCSparseDataFrame = create_pandas_abc_type("ABCSparseDataFrame", "_subtyp", + ("sparse_frame", )) ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",)) ABCSparseSeries = create_pandas_abc_type("ABCSparseSeries", "_subtyp", ('sparse_series', diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 58cb182e7d403..53f92b98f022e 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -18,6 +18,7 @@ class TestABCClasses(object): df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index) sparse_series = pd.Series([1, 2, 3]).to_sparse() sparse_array = pd.SparseArray(np.random.randn(10)) + sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]}) def test_abc_types(self): assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) @@ -37,6 +38,7 @@ def test_abc_types(self): assert isinstance(self.df.to_panel(), gt.ABCPanel) assert isinstance(self.sparse_series, gt.ABCSparseSeries) assert isinstance(self.sparse_array, gt.ABCSparseArray) + assert isinstance(self.sparse_frame, gt.ABCSparseDataFrame) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 22925cceb30d1..c9d079421532f 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -454,6 +454,15 @@ def test_dataframe_dummies_preserve_categorical_dtype(self, dtype): tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('sparse', [True, False]) + def test_get_dummies_dont_sparsify_all_columns(self, sparse): + # GH18914 + df = DataFrame.from_items([('GDP', [1, 2]), ('Nation', ['AB', 'CD'])]) + df = get_dummies(df, columns=['Nation'], sparse=sparse) + df2 = df.reindex(columns=['GDP']) + + tm.assert_frame_equal(df[['GDP']], df2) + class TestCategoricalReshape(object): diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py index 15639fbe156c6..70fd1da529d46 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -1,8 +1,10 @@ # pylint: disable-msg=E1101,W0612 +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm +import itertools class TestSparseSeriesConcat(object): @@ -317,37 +319,52 @@ def test_concat_axis1(self): assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) - def test_concat_sparse_dense(self): - sparse = self.dense1.to_sparse() - - res = pd.concat([sparse, self.dense2]) - exp = pd.concat([self.dense1, self.dense2]) - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense2, sparse]) - exp = pd.concat([self.dense2, self.dense1]) - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - sparse = self.dense1.to_sparse(fill_value=0) - - res = pd.concat([sparse, self.dense2]) - exp = pd.concat([self.dense1, self.dense2]) - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense2, sparse]) - exp = pd.concat([self.dense2, self.dense1]) - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense3, sparse], axis=1) - exp = pd.concat([self.dense3, self.dense1], axis=1) - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res, exp) - - res = pd.concat([sparse, self.dense3], axis=1) - exp = pd.concat([self.dense1, self.dense3], axis=1) - assert isinstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res, exp) + @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', + itertools.product([None, 0, 1, np.nan], + [0, 1], + [1, 0])) + def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): + frames = [self.dense1, self.dense2] + sparse_frame = [frames[dense_idx], + frames[sparse_idx].to_sparse(fill_value=fill_value)] + dense_frame = [frames[dense_idx], frames[sparse_idx]] + + # This will try both directions sparse + dense and dense + sparse + for _ in range(2): + res = pd.concat(sparse_frame) + exp = pd.concat(dense_frame) + + assert isinstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + sparse_frame = sparse_frame[::-1] + dense_frame = dense_frame[::-1] + + @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', + itertools.product([None, 0, 1, np.nan], + [0, 1], + [1, 0])) + def test_concat_sparse_dense_cols(self, fill_value, sparse_idx, dense_idx): + # See GH16874, GH18914 and #18686 for why this should be a DataFrame + + frames = [self.dense1, self.dense3] + + sparse_frame = [frames[dense_idx], + frames[sparse_idx].to_sparse(fill_value=fill_value)] + dense_frame = [frames[dense_idx], frames[sparse_idx]] + + # This will try both directions sparse + dense and dense + sparse + for _ in range(2): + res = pd.concat(sparse_frame, axis=1) + exp = pd.concat(dense_frame, axis=1) + + for column in frames[dense_idx].columns: + if dense_idx == sparse_idx: + tm.assert_frame_equal(res[column], exp[column]) + else: + tm.assert_series_equal(res[column], exp[column]) + + tm.assert_frame_equal(res, exp) + + sparse_frame = sparse_frame[::-1] + dense_frame = dense_frame[::-1] From fba0737f2b55f9ea50c14888b2a16ed391270262 Mon Sep 17 00:00:00 2001 From: Mitch Negus <21086604+mitchnegus@users.noreply.github.com> Date: Thu, 1 Feb 2018 05:15:42 -0800 Subject: [PATCH 025/217] updated hist documentation (#19366) --- pandas/plotting/_core.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 8b03d6ddde4ec..88b899ad60313 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2156,10 +2156,18 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, The size of the figure to create in inches by default layout : tuple, optional Tuple of (rows, columns) for the layout of the histograms - bins : integer, default 10 - Number of histogram bins to be used + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. `**kwds` : other plotting keyword arguments To be passed to hist function + + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + """ _converter._WARN = False if by is not None: @@ -2219,14 +2227,19 @@ def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, rotation of y axis labels figsize : tuple, default None figure size in inches by default + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. bins: integer, default 10 Number of histogram bins to be used `**kwds` : keywords To be passed to the actual plotting function - Notes - ----- - See matplotlib documentation online for more on this + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. """ import matplotlib.pyplot as plt From 826390ef4bf57ae248e1249ecab525408a830b7b Mon Sep 17 00:00:00 2001 From: Upkar Lidder Date: Thu, 1 Feb 2018 05:26:35 -0800 Subject: [PATCH 026/217] CLN: GH19404 Changing function signature to match logic (#19425) --- pandas/core/generic.py | 2 +- pandas/io/clipboards.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5a15d720c5790..48981a27f3c7e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1906,7 +1906,7 @@ def to_pickle(self, path, compression='infer', return to_pickle(self, path, compression=compression, protocol=protocol) - def to_clipboard(self, excel=None, sep=None, **kwargs): + def to_clipboard(self, excel=True, sep=None, **kwargs): """ Attempt to write text representation of object to the system clipboard This can be pasted into Excel, for example. diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 347ec41baf0e1..dcc221ce978b3 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -63,7 +63,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover return read_table(StringIO(text), sep=sep, **kwargs) -def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover +def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover """ Attempt to write text representation of object to the system clipboard The clipboard can be then pasted into Excel for example. From a11f48df833542ffab1f959098b94191a8764581 Mon Sep 17 00:00:00 2001 From: WBare Date: Thu, 1 Feb 2018 08:37:19 -0500 Subject: [PATCH 027/217] ENH limit_area added to interpolate1d closes #16284 --- doc/source/missing_data.rst | 53 +++++++++--- doc/source/whatsnew/v0.23.0.txt | 35 +++++++- pandas/core/generic.py | 10 ++- pandas/core/internals.py | 10 ++- pandas/core/missing.py | 130 +++++++++++++++++----------- pandas/core/resample.py | 4 +- pandas/tests/series/test_missing.py | 39 +++++++++ 7 files changed, 208 insertions(+), 73 deletions(-) diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index f56378b533909..ee0e2c7462f66 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -190,7 +190,7 @@ Sum/Prod of Empties/Nans .. warning:: This behavior is now standard as of v0.21.0; previously sum/prod would give different - results if the ``bottleneck`` package was installed. + results if the ``bottleneck`` package was installed. See the :ref:`v0.21.0 whatsnew `. With ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, the result will be all-``NaN``. @@ -353,7 +353,11 @@ examined :ref:`in the API `. Interpolation ~~~~~~~~~~~~~ -Both Series and DataFrame objects have an :meth:`~DataFrame.interpolate` method +.. versionadded:: 0.21.0 + + The ``limit_area`` keyword argument was added. + +Both Series and DataFrame objects have an :meth:`~DataFrame.interpolate` method that, by default, performs linear interpolation at missing datapoints. .. ipython:: python @@ -477,33 +481,54 @@ at the new values. .. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation .. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html +.. _missing_data.interp_limits: + Interpolation Limits ^^^^^^^^^^^^^^^^^^^^ Like other pandas fill methods, ``interpolate`` accepts a ``limit`` keyword -argument. Use this argument to limit the number of consecutive interpolations, -keeping ``NaN`` values for interpolations that are too far from the last valid -observation: +argument. Use this argument to limit the number of consecutive ``NaN`` values +filled since the last valid observation: .. ipython:: python - ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13]) - ser.interpolate(limit=2) + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13, np.nan, np.nan]) -By default, ``limit`` applies in a forward direction, so that only ``NaN`` -values after a non-``NaN`` value can be filled. If you provide ``'backward'`` or -``'both'`` for the ``limit_direction`` keyword argument, you can fill ``NaN`` -values before non-``NaN`` values, or both before and after non-``NaN`` values, -respectively: + # fill all consecutive values in a forward direction + ser.interpolate() -.. ipython:: python + # fill one consecutive value in a forward direction + ser.interpolate(limit=1) + +By default, ``NaN`` values are filled in a ``forward`` direction. Use +``limit_direction`` parameter to fill ``backward`` or from ``both`` directions. - ser.interpolate(limit=1) # limit_direction == 'forward' +.. ipython:: python + # fill one consecutive value backwards ser.interpolate(limit=1, limit_direction='backward') + # fill one consecutive value in both directions ser.interpolate(limit=1, limit_direction='both') + # fill all consecutive values in both directions + ser.interpolate(limit_direction='both') + +By default, ``NaN`` values are filled whether they are inside (surrounded by) +existing valid values, or outside existing valid values. Introduced in v0.23 +the ``limit_area`` parameter restricts filling to either inside or outside values. + +.. ipython:: python + + # fill one consecutive inside value in both directions + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + + # fill all consecutive outside values backward + ser.interpolate(limit_direction='backward', limit_area='outside') + + # fill all consecutive outside values in both directions + ser.interpolate(limit_direction='both', limit_area='outside') + .. _missing_data.replace: Replacing Generic Values diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6cbdc3be07f13..66e88e181ac0f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -13,10 +13,38 @@ version. New features ~~~~~~~~~~~~ -- -- -- +.. _whatsnew_0210.enhancements.limit_area: + +``DataFrame.interpolate`` has gained the ``limit_area`` kwarg +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`DataFrame.interpolate` has gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. +Use `limit_area='inside'` to fill only NaNs surrounded by valid values or use `limit_area='outside'` to fill only ``NaN`` s +outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. + +.. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13, np.nan, np.nan]) + ser + +Fill one consecutive inside value in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + +Fill all consecutive outside values backward + +.. ipython:: python + + ser.interpolate(limit_direction='backward', limit_area='outside') + +Fill all consecutive outside values in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='outside') .. _whatsnew_0210.enhancements.get_dummies_dtype: @@ -207,6 +235,7 @@ Other Enhancements :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas to register custom accessors like ``.cat`` on pandas objects. See :ref:`Registering Custom Accessors ` for more (:issue:`14781`). + - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 48981a27f3c7e..d34a85b5b4388 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5085,6 +5085,12 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, limit : int, default None. Maximum number of consecutive NaNs to fill. Must be greater than 0. limit_direction : {'forward', 'backward', 'both'}, default 'forward' + limit_area : {'inside', 'outside'}, default None + * None: (default) no fill restriction + * 'inside' Only fill NaNs surrounded by valid values (interpolate). + * 'outside' Only fill NaNs outside valid values (extrapolate). + .. versionadded:: 0.21.0 + If limit is specified, consecutive NaNs will be filled in this direction. inplace : bool, default False @@ -5118,7 +5124,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, @Appender(_shared_docs['interpolate'] % _shared_doc_kwargs) def interpolate(self, method='linear', axis=0, limit=None, inplace=False, - limit_direction='forward', downcast=None, **kwargs): + limit_direction='forward', limit_area=None, + downcast=None, **kwargs): """ Interpolate values according to different methods. """ @@ -5167,6 +5174,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, new_data = data.interpolate(method=method, axis=ax, index=index, values=_maybe_transposed_self, limit=limit, limit_direction=limit_direction, + limit_area=limit_area, inplace=inplace, downcast=downcast, **kwargs) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 22d38d3df071e..4b12d931ade35 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1073,8 +1073,8 @@ def coerce_to_target_dtype(self, other): def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, limit_direction='forward', - fill_value=None, coerce=False, downcast=None, mgr=None, - **kwargs): + limit_area=None, fill_value=None, coerce=False, + downcast=None, mgr=None, **kwargs): inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1115,6 +1115,7 @@ def check_int_bool(self, inplace): return self._interpolate(method=m, index=index, values=values, axis=axis, limit=limit, limit_direction=limit_direction, + limit_area=limit_area, fill_value=fill_value, inplace=inplace, downcast=downcast, mgr=mgr, **kwargs) @@ -1148,8 +1149,8 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, def _interpolate(self, method=None, index=None, values=None, fill_value=None, axis=0, limit=None, - limit_direction='forward', inplace=False, downcast=None, - mgr=None, **kwargs): + limit_direction='forward', limit_area=None, + inplace=False, downcast=None, mgr=None, **kwargs): """ interpolate using scipy wrappers """ inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1177,6 +1178,7 @@ def func(x): # i.e. not an arg to missing.interpolate_1d return missing.interpolate_1d(index, x, method=method, limit=limit, limit_direction=limit_direction, + limit_area=limit_area, fill_value=fill_value, bounds_error=False, **kwargs) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 74fa21fa4b53d..2eccc5777bca6 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -111,7 +111,7 @@ def clean_interp_method(method, **kwargs): def interpolate_1d(xvalues, yvalues, method='linear', limit=None, - limit_direction='forward', fill_value=None, + limit_direction='forward', limit_area=None, fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs @@ -151,28 +151,12 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, raise ValueError(msg.format(valid=valid_limit_directions, invalid=limit_direction)) - from pandas import Series - ys = Series(yvalues) - start_nans = set(range(ys.first_valid_index())) - end_nans = set(range(1 + ys.last_valid_index(), len(valid))) - - # violate_limit is a list of the indexes in the series whose yvalue is - # currently NaN, and should still be NaN after the interpolation. - # Specifically: - # - # If limit_direction='forward' or None then the list will contain NaNs at - # the beginning of the series, and NaNs that are more than 'limit' away - # from the prior non-NaN. - # - # If limit_direction='backward' then the list will contain NaNs at - # the end of the series, and NaNs that are more than 'limit' away - # from the subsequent non-NaN. - # - # If limit_direction='both' then the list will contain NaNs that - # are more than 'limit' away from any non-NaN. - # - # If limit=None, then use default behavior of filling an unlimited number - # of NaNs in the direction specified by limit_direction + if limit_area is not None: + valid_limit_areas = ['inside', 'outside'] + limit_area = limit_area.lower() + if limit_area not in valid_limit_areas: + raise ValueError('Invalid limit_area: expecting one of {}, got ' + '{}.'.format(valid_limit_areas, limit_area)) # default limit is unlimited GH #16282 if limit is None: @@ -183,22 +167,43 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, elif limit < 1: raise ValueError('Limit must be greater than 0') - # each possible limit_direction - # TODO: do we need sorted? - if limit_direction == 'forward' and limit is not None: - violate_limit = sorted(start_nans | - set(_interp_limit(invalid, limit, 0))) - elif limit_direction == 'forward': - violate_limit = sorted(start_nans) - elif limit_direction == 'backward' and limit is not None: - violate_limit = sorted(end_nans | - set(_interp_limit(invalid, 0, limit))) + from pandas import Series + ys = Series(yvalues) + + # These are sets of index pointers to invalid values... i.e. {0, 1, etc... + all_nans = set(np.flatnonzero(invalid)) + start_nans = set(range(ys.first_valid_index())) + end_nans = set(range(1 + ys.last_valid_index(), len(valid))) + mid_nans = all_nans - start_nans - end_nans + + # Like the sets above, preserve_nans contains indices of invalid values, + # but in this case, it is the final set of indices that need to be + # preserved as NaN after the interpolation. + + # For example if limit_direction='forward' then preserve_nans will + # contain indices of NaNs at the beginning of the series, and NaNs that + # are more than'limit' away from the prior non-NaN. + + # set preserve_nans based on direction using _interp_limit + if limit_direction == 'forward': + preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == 'backward': - violate_limit = sorted(end_nans) - elif limit_direction == 'both' and limit is not None: - violate_limit = sorted(_interp_limit(invalid, limit, limit)) + preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: - violate_limit = [] + # both directions... just use _interp_limit + preserve_nans = set(_interp_limit(invalid, limit, limit)) + + # if limit_area is set, add either mid or outside indices + # to preserve_nans GH #16284 + if limit_area == 'inside': + # preserve NaNs on the outside + preserve_nans |= start_nans | end_nans + elif limit_area == 'outside': + # preserve NaNs on the inside + preserve_nans |= mid_nans + + # sort preserve_nans and covert to list + preserve_nans = sorted(preserve_nans) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) @@ -215,7 +220,7 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) - result[violate_limit] = np.nan + result[preserve_nans] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', @@ -234,7 +239,7 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) - result[violate_limit] = np.nan + result[preserve_nans] = np.nan return result @@ -646,8 +651,24 @@ def fill_zeros(result, x, y, name, fill): def _interp_limit(invalid, fw_limit, bw_limit): - """Get idx of values that won't be filled b/c they exceed the limits. + """ + Get indexers of values that won't be filled + because they exceed the limits. + + Parameters + ---------- + invalid : boolean ndarray + fw_limit : int or None + forward limit to index + bw_limit : int or None + backward limit to index + + Returns + ------- + set of indexers + Notes + ----- This is equivalent to the more readable, but slower .. code-block:: python @@ -660,6 +681,8 @@ def _interp_limit(invalid, fw_limit, bw_limit): # 1. operate on the reversed array # 2. subtract the returned indicies from N - 1 N = len(invalid) + f_idx = set() + b_idx = set() def inner(invalid, limit): limit = min(limit, N) @@ -668,18 +691,25 @@ def inner(invalid, limit): set(np.where((~invalid[:limit + 1]).cumsum() == 0)[0])) return idx - if fw_limit == 0: - f_idx = set(np.where(invalid)[0]) - else: - f_idx = inner(invalid, fw_limit) + if fw_limit is not None: - if bw_limit == 0: - # then we don't even need to care about backwards, just use forwards - return f_idx - else: - b_idx = set(N - 1 - np.asarray(list(inner(invalid[::-1], bw_limit)))) if fw_limit == 0: - return b_idx + f_idx = set(np.where(invalid)[0]) + else: + f_idx = inner(invalid, fw_limit) + + if bw_limit is not None: + + if bw_limit == 0: + # then we don't even need to care about backwards + # just use forwards + return f_idx + else: + b_idx = list(inner(invalid[::-1], bw_limit)) + b_idx = set(N - 1 - np.asarray(b_idx)) + if fw_limit == 0: + return b_idx + return f_idx & b_idx diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 961c8c004e9e3..df656092f476e 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -557,7 +557,8 @@ def fillna(self, method, limit=None): @Appender(_shared_docs['interpolate'] % _shared_docs_kwargs) def interpolate(self, method='linear', axis=0, limit=None, inplace=False, - limit_direction='forward', downcast=None, **kwargs): + limit_direction='forward', limit_area=None, + downcast=None, **kwargs): """ Interpolate values according to different methods. @@ -567,6 +568,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, return result.interpolate(method=method, axis=axis, limit=limit, inplace=inplace, limit_direction=limit_direction, + limit_area=limit_area, downcast=downcast, **kwargs) def asfreq(self, fill_value=None): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 0dc5e23184af7..2bc44cb1c683f 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1079,6 +1079,45 @@ def test_interp_limit_bad_direction(self): pytest.raises(ValueError, s.interpolate, method='linear', limit_direction='abc') + # limit_area introduced GH #16284 + def test_interp_limit_area(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([nan, nan, 3, nan, nan, nan, 7, nan, nan]) + + expected = Series([nan, nan, 3., 4., 5., 6., 7., nan, nan]) + result = s.interpolate(method='linear', limit_area='inside') + assert_series_equal(result, expected) + + expected = Series([nan, nan, 3., 4., nan, nan, 7., nan, nan]) + result = s.interpolate(method='linear', limit_area='inside', + limit=1) + + expected = Series([nan, nan, 3., 4., nan, 6., 7., nan, nan]) + result = s.interpolate(method='linear', limit_area='inside', + limit_direction='both', limit=1) + assert_series_equal(result, expected) + + expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., 7.]) + result = s.interpolate(method='linear', limit_area='outside') + assert_series_equal(result, expected) + + expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., nan]) + result = s.interpolate(method='linear', limit_area='outside', + limit=1) + + expected = Series([nan, 3., 3., nan, nan, nan, 7., 7., nan]) + result = s.interpolate(method='linear', limit_area='outside', + limit_direction='both', limit=1) + assert_series_equal(result, expected) + + expected = Series([3., 3., 3., nan, nan, nan, 7., nan, nan]) + result = s.interpolate(method='linear', limit_area='outside', + direction='backward') + + # raises an error even if limit type is wrong. + pytest.raises(ValueError, s.interpolate, method='linear', + limit_area='abc') + def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From c753b3f1013220722a825a0267b857f777ab3ff0 Mon Sep 17 00:00:00 2001 From: Matt Kirk Date: Fri, 2 Feb 2018 02:26:15 +0700 Subject: [PATCH 028/217] BUG: Fix problem with SparseDataFrame not persisting to csv (#19441) * BUG: Fix problem with SparseDataFrame not persisting to csv * FIX: Remove comment and move test with more coverage * FIX: Flake8 issues cleanup * Fix failing test due to blank lines * FIX: linting errors on whitespace * Use parametrize on test * Move bug description to sparse header * Add GH issue to test * Fix linting error --- doc/source/whatsnew/v0.23.0.txt | 3 +-- pandas/core/internals.py | 3 ++- pandas/tests/sparse/frame/test_to_csv.py | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 pandas/tests/sparse/frame/test_to_csv.py diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 66e88e181ac0f..91362c7640575 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -529,7 +529,6 @@ I/O - Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) - :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) - :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for xls file type (:issue:`19242`, :issue:`9155`) -- Plotting ^^^^^^^^ @@ -553,7 +552,7 @@ Sparse ^^^^^^ - Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) -- +- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) - Reshaping diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4b12d931ade35..52e8317f5209a 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -709,7 +709,8 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ - values = self.values + values = self.get_values() + if slicer is not None: values = values[:, slicer] mask = isna(values) diff --git a/pandas/tests/sparse/frame/test_to_csv.py b/pandas/tests/sparse/frame/test_to_csv.py new file mode 100644 index 0000000000000..b0243dfde8d3f --- /dev/null +++ b/pandas/tests/sparse/frame/test_to_csv.py @@ -0,0 +1,20 @@ +import numpy as np +import pytest +from pandas import SparseDataFrame, read_csv +from pandas.util import testing as tm + + +class TestSparseDataFrameToCsv(object): + fill_values = [np.nan, 0, None, 1] + + @pytest.mark.parametrize('fill_value', fill_values) + def test_to_csv_sparse_dataframe(self, fill_value): + # GH19384 + sdf = SparseDataFrame({'a': type(self).fill_values}, + default_fill_value=fill_value) + + with tm.ensure_clean('sparse_df.csv') as path: + sdf.to_csv(path, index=False) + df = read_csv(path, skip_blank_lines=False) + + tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf) From 073188b7eb36dd33e00efb0ce044b4b6c30b1217 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Feb 2018 05:03:29 -0600 Subject: [PATCH 029/217] Added E741 to flake8 config (#19496) --- setup.cfg | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 828ef80971f7b..942b2b0a1a0bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,11 @@ tag_prefix = v parentdir_prefix = pandas- [flake8] -ignore = E731,E402,W503 +ignore = + E402, # module level import not at top of file + E731, # do not assign a lambda expression, use a def + E741, # do not use variables named 'l', 'O', or 'I' + W503 # line break before binary operator max-line-length = 79 [yapf] From 129a6b84b08e5172b31f6634c88214ece4d2416a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Feb 2018 03:05:30 -0800 Subject: [PATCH 030/217] implement timedeltas.test_scalar_compat (#19503) --- .../indexes/timedeltas/test_arithmetic.py | 56 ++++++++++++++++- pandas/tests/indexes/timedeltas/test_ops.py | 46 +------------- .../indexes/timedeltas/test_scalar_compat.py | 63 +++++++++++++++++++ .../indexes/timedeltas/test_timedelta.py | 49 +-------------- 4 files changed, 120 insertions(+), 94 deletions(-) create mode 100644 pandas/tests/indexes/timedeltas/test_scalar_compat.py diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index ef6523a9eb270..3dc60ed33b958 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -10,7 +10,7 @@ to_timedelta, timedelta_range, date_range, Series, Timestamp, Timedelta) -from pandas.errors import PerformanceWarning +from pandas.errors import PerformanceWarning, NullFrequencyError @pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2), @@ -138,6 +138,60 @@ def test_tdi_add_str_invalid(self): with pytest.raises(TypeError): 'a' + tdi + # ------------------------------------------------------------- + # TimedeltaIndex.shift is used by __add__/__sub__ + + def test_tdi_shift_empty(self): + # GH#9903 + idx = pd.TimedeltaIndex([], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + def test_tdi_shift_hours(self): + # GH#9903 + idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + def test_tdi_shift_minutes(self): + # GH#9903 + idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='T'), idx) + exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], + name='xxx') + tm.assert_index_equal(idx.shift(3, freq='T'), exp) + exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], + name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='T'), exp) + + def test_tdi_shift_int(self): + # GH#8083 + trange = pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + result = trange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + tm.assert_index_equal(result, expected) + + def test_tdi_shift_nonstandard_freq(self): + # GH#8083 + trange = pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + result = trange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + tm.assert_index_equal(result, expected) + + def test_shift_no_freq(self): + # GH#19147 + tdi = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00'], freq=None) + with pytest.raises(NullFrequencyError): + tdi.shift(2) + # ------------------------------------------------------------- @pytest.mark.parametrize('box', [np.array, pd.Index]) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 112c62b7e2f8d..e944aad13f8d5 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -98,32 +98,6 @@ def test_numpy_minmax(self): tm.assert_raises_regex( ValueError, errmsg, np.argmax, td, out=0) - def test_round(self): - td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') - elt = td[1] - - expected_rng = TimedeltaIndex([ - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 01:00:00'), - Timedelta('16801 days 02:00:00'), - Timedelta('16801 days 02:00:00'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(td.round(freq='H'), expected_rng) - assert elt.round(freq='H') == expected_elt - - msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR - with tm.assert_raises_regex(ValueError, msg): - td.round(freq='foo') - with tm.assert_raises_regex(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assert_raises_regex(ValueError, msg, td.round, freq='M') - tm.assert_raises_regex(ValueError, msg, elt.round, freq='M') - def test_representation(self): idx1 = TimedeltaIndex([], freq='D') idx2 = TimedeltaIndex(['1 days'], freq='D') @@ -387,25 +361,7 @@ def test_nat_new(self): tm.assert_numpy_array_equal(result, exp) def test_shift(self): - # GH 9903 - idx = pd.TimedeltaIndex([], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - tm.assert_index_equal(idx.shift(0, freq='T'), idx) - exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], - name='xxx') - tm.assert_index_equal(idx.shift(3, freq='T'), exp) - exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], - name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='T'), exp) + pass # handled in test_arithmetic.py def test_repeat(self): index = pd.timedelta_range('1 days', periods=2, freq='D') diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py new file mode 100644 index 0000000000000..7d97e1fadea30 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" +Tests for TimedeltaIndex methods behaving like their Timedelta counterparts +""" + +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import timedelta_range, Timedelta, TimedeltaIndex, Index, Series + + +class TestVectorizedTimedelta(object): + def test_tdi_total_seconds(self): + # GH#10939 + # test index + rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, + freq='s') + expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, + 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] + tm.assert_almost_equal(rng.total_seconds(), Index(expt)) + + # test Series + ser = Series(rng) + s_expt = Series(expt, index=[0, 1]) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with nat + ser[1] = np.nan + s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 + + 12 + 100123456. / 1e9, np.nan], index=[0, 1]) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with both nat + ser = Series([np.nan, np.nan], dtype='timedelta64[ns]') + tm.assert_series_equal(ser.dt.total_seconds(), + Series([np.nan, np.nan], index=[0, 1])) + + def test_tdi_round(self): + td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') + elt = td[1] + + expected_rng = TimedeltaIndex([Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 01:00:00'), + Timedelta('16801 days 02:00:00'), + Timedelta('16801 days 02:00:00')]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq='H'), expected_rng) + assert elt.round(freq='H') == expected_elt + + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): + td.round(freq='foo') + with tm.assert_raises_regex(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + with tm.assert_raises_regex(ValueError, msg): + td.round(freq='M') + with tm.assert_raises_regex(ValueError, msg): + elt.round(freq='M') diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 1af971e8a4326..32157a9a44e04 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -4,7 +4,6 @@ from datetime import timedelta import pandas as pd -from pandas.errors import NullFrequencyError import pandas.util.testing as tm from pandas import (timedelta_range, date_range, Series, Timedelta, TimedeltaIndex, Index, DataFrame, @@ -34,28 +33,7 @@ def test_numeric_compat(self): pass def test_shift(self): - # test shift for TimedeltaIndex - # err8083 - - drange = self.create_index() - result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'], - freq='D') - tm.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', - '8 days 01:00:03', '9 days 01:00:03', - '10 days 01:00:03'], freq='D') - tm.assert_index_equal(result, expected) - - def test_shift_no_freq(self): - # GH#19147 - tdi = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00'], freq=None) - with pytest.raises(NullFrequencyError): - tdi.shift(2) + pass # this is handled in test_arithmetic.py def test_pickle_compat_construction(self): pass @@ -203,31 +181,6 @@ def test_map(self): exp = Int64Index([f(x) for x in rng]) tm.assert_index_equal(result, exp) - def test_total_seconds(self): - # GH 10939 - # test index - rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, - freq='s') - expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, - 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] - tm.assert_almost_equal(rng.total_seconds(), Index(expt)) - - # test Series - s = Series(rng) - s_expt = Series(expt, index=[0, 1]) - tm.assert_series_equal(s.dt.total_seconds(), s_expt) - - # with nat - s[1] = np.nan - s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 + - 12 + 100123456. / 1e9, np.nan], index=[0, 1]) - tm.assert_series_equal(s.dt.total_seconds(), s_expt) - - # with both nat - s = Series([np.nan, np.nan], dtype='timedelta64[ns]') - tm.assert_series_equal(s.dt.total_seconds(), - Series([np.nan, np.nan], index=[0, 1])) - def test_pass_TimedeltaIndex_to_index(self): rng = timedelta_range('1 days', '10 days') From 4e0a32d4eb544bf480e31399093443a66349cb94 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Feb 2018 03:29:53 -0800 Subject: [PATCH 031/217] Continue de-nesting core.ops (#19448) --- pandas/core/ops.py | 142 ++++++++++++++++++----------------- pandas/core/sparse/series.py | 2 +- 2 files changed, 74 insertions(+), 70 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index ba8a15b60ba56..6ea4a81cb52a1 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -39,8 +39,7 @@ ABCSeries, ABCDataFrame, ABCIndex, - ABCPeriodIndex, - ABCSparseSeries) + ABCSparseSeries, ABCSparseArray) def _gen_eval_kwargs(name): @@ -445,8 +444,14 @@ def names(x): return new_methods -def add_methods(cls, new_methods, force): +def add_methods(cls, new_methods): for name, method in new_methods.items(): + # For most methods, if we find that the class already has a method + # of the same name, it is OK to over-write it. The exception is + # inplace methods (__iadd__, __isub__, ...) for SparseArray, which + # retain the np.ndarray versions. + force = not (issubclass(cls, ABCSparseArray) and + name.startswith('__i')) if force or name not in cls.__dict__: bind_method(cls, name, method) @@ -454,8 +459,7 @@ def add_methods(cls, new_methods, force): # ---------------------------------------------------------------------- # Arithmetic def add_special_arithmetic_methods(cls, arith_method=None, - comp_method=None, bool_method=None, - force=False): + comp_method=None, bool_method=None): """ Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. @@ -469,9 +473,6 @@ def add_special_arithmetic_methods(cls, arith_method=None, factory for rich comparison - signature: f(op, name, str_rep) bool_method : function (optional) factory for boolean methods - signature: f(op, name, str_rep) - force : bool, default False - if False, checks whether function is defined **on ``cls.__dict__``** - before defining if True, always defines functions on class base """ new_methods = _create_methods(cls, arith_method, comp_method, bool_method, special=True) @@ -512,12 +513,11 @@ def f(self, other): __ior__=_wrap_inplace_method(new_methods["__or__"]), __ixor__=_wrap_inplace_method(new_methods["__xor__"]))) - add_methods(cls, new_methods=new_methods, force=force) + add_methods(cls, new_methods=new_methods) def add_flex_arithmetic_methods(cls, flex_arith_method, - flex_comp_method=None, flex_bool_method=None, - force=False): + flex_comp_method=None, flex_bool_method=None): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. @@ -529,9 +529,6 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, f(op, name, str_rep) flex_comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) - force : bool, default False - if False, checks whether function is defined **on ``cls.__dict__``** - before defining if True, always defines functions on class base """ new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, flex_bool_method, @@ -544,7 +541,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, if k in new_methods: new_methods.pop(k) - add_methods(cls, new_methods=new_methods, force=force) + add_methods(cls, new_methods=new_methods) # ----------------------------------------------------------------------------- @@ -614,14 +611,11 @@ def na_op(x, y): result = np.empty(x.size, dtype=dtype) mask = notna(x) & notna(y) result[mask] = op(x[mask], com._values_from_object(y[mask])) - elif isinstance(x, np.ndarray): + else: + assert isinstance(x, np.ndarray) result = np.empty(len(x), dtype=x.dtype) mask = notna(x) result[mask] = op(x[mask], y) - else: - raise TypeError("{typ} cannot perform the operation " - "{op}".format(typ=type(x).__name__, - op=str_rep)) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) @@ -658,6 +652,10 @@ def wrapper(left, right, name=name, na_op=na_op): index=left.index, name=res_name, dtype=result.dtype) + elif is_categorical_dtype(left): + raise TypeError("{typ} cannot perform the operation " + "{op}".format(typ=type(left).__name__, op=str_rep)) + lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): @@ -745,8 +743,12 @@ def na_op(x, y): elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) - if is_object_dtype(x.dtype): + elif is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) + + elif is_datetimelike_v_numeric(x, y): + raise TypeError("invalid type comparison") + else: # we want to compare like types @@ -754,15 +756,6 @@ def na_op(x, y): # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons - if is_datetimelike_v_numeric(x, y): - raise TypeError("invalid type comparison") - - # numpy does not like comparisons vs None - if is_scalar(y) and isna(y): - if name == '__ne__': - return np.ones(len(x), dtype=bool) - else: - return np.zeros(len(x), dtype=bool) # we have a datetime/timedelta and may need to convert mask = None @@ -795,15 +788,18 @@ def wrapper(self, other, axis=None): if axis is not None: self._get_axis_number(axis) - if isinstance(other, ABCSeries): + if isinstance(other, ABCDataFrame): # pragma: no cover + # Defer to DataFrame implementation; fail early + return NotImplemented + + elif isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) if not self._indexed_same(other): msg = 'Can only compare identically-labeled Series objects' raise ValueError(msg) - return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) - elif isinstance(other, ABCDataFrame): # pragma: no cover - return NotImplemented + res_values = na_op(self.values, other.values) + return self._constructor(res_values, index=self.index, name=name) + elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast @@ -811,23 +807,25 @@ def wrapper(self, other, axis=None): len(self) != len(other)): raise ValueError('Lengths must match to compare') - if isinstance(other, ABCPeriodIndex): - # temp workaround until fixing GH 13637 - # tested in test_nat_comparisons - # (pandas.tests.series.test_operators.TestSeriesOperators) - return self._constructor(na_op(self.values, - other.astype(object).values), - index=self.index) - - return self._constructor(na_op(self.values, np.asarray(other)), + res_values = na_op(self.values, np.asarray(other)) + return self._constructor(res_values, index=self.index).__finalize__(self) - elif isinstance(other, pd.Categorical): - if not is_categorical_dtype(self): - msg = ("Cannot compare a Categorical for op {op} with Series " - "of dtype {typ}.\nIf you want to compare values, use " - "'series np.asarray(other)'.") - raise TypeError(msg.format(op=op, typ=self.dtype)) + elif (isinstance(other, pd.Categorical) and + not is_categorical_dtype(self)): + raise TypeError("Cannot compare a Categorical for op {op} with " + "Series of dtype {typ}.\nIf you want to compare " + "values, use 'series np.asarray(other)'." + .format(op=op, typ=self.dtype)) + + elif is_scalar(other) and isna(other): + # numpy does not like comparisons vs None + if op is operator.ne: + res_values = np.ones(len(self), dtype=bool) + else: + res_values = np.zeros(len(self), dtype=bool) + return self._constructor(res_values, index=self.index, + name=self.name, dtype='bool') if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, @@ -877,11 +875,10 @@ def na_op(x, y): y = _ensure_object(y) result = lib.vec_binop(x, y, op) else: + # let null fall thru + if not isna(y): + y = bool(y) try: - - # let null fall thru - if not isna(y): - y = bool(y) result = lib.scalar_binop(x, y, op) except: msg = ("cannot compare a dtyped [{dtype}] array " @@ -899,26 +896,31 @@ def wrapper(self, other): self, other = _align_method_SERIES(self, other, align_asobject=True) - if isinstance(other, ABCSeries): + if isinstance(other, ABCDataFrame): + # Defer to DataFrame implementation; fail early + return NotImplemented + + elif isinstance(other, ABCSeries): name = com._maybe_match_name(self, other) is_other_int_dtype = is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) filler = (fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool) - return filler(self._constructor(na_op(self.values, other.values), - index=self.index, name=name)) - elif isinstance(other, ABCDataFrame): - return NotImplemented + res_values = na_op(self.values, other.values) + unfilled = self._constructor(res_values, + index=self.index, name=name) + return filler(unfilled) else: # scalars, list, tuple, np.array filler = (fill_int if is_self_int_dtype and is_integer_dtype(np.asarray(other)) else fill_bool) - return filler(self._constructor( - na_op(self.values, other), - index=self.index)).__finalize__(self) + + res_values = na_op(self.values, other) + unfilled = self._constructor(res_values, index=self.index) + return filler(unfilled).__finalize__(self) return wrapper @@ -1023,21 +1025,23 @@ def na_op(x, y): mask = notna(xrav) & notna(yrav) xrav = xrav[mask] - # we may need to manually - # broadcast a 1 element array if yrav.shape != mask.shape: - yrav = np.empty(mask.shape, dtype=yrav.dtype) - yrav.fill(yrav.item()) + # FIXME: GH#5284, GH#5035, GH#19448 + # Without specifically raising here we get mismatched + # errors in Py3 (TypeError) vs Py2 (ValueError) + raise ValueError('Cannot broadcast operands together.') yrav = yrav[mask] - if np.prod(xrav.shape) and np.prod(yrav.shape): + if xrav.size: with np.errstate(all='ignore'): result[mask] = op(xrav, yrav) - elif hasattr(x, 'size'): + + elif isinstance(x, np.ndarray): + # mask is only meaningful for x result = np.empty(x.size, dtype=x.dtype) mask = notna(xrav) xrav = xrav[mask] - if np.prod(xrav.shape): + if xrav.size: with np.errstate(all='ignore'): result[mask] = op(xrav, y) else: diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 1c23527cf57c4..62a467bec2683 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -819,4 +819,4 @@ def from_coo(cls, A, dense_index=False): ops.add_special_arithmetic_methods(SparseSeries, ops._arith_method_SPARSE_SERIES, comp_method=ops._arith_method_SPARSE_SERIES, - bool_method=None, force=True) + bool_method=None) From 14ad618f7a5059d874e7ccbbd1911310dd79be56 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Feb 2018 03:32:49 -0800 Subject: [PATCH 032/217] Make DateOffset.kwds a property (#19403) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslibs/offsets.pyx | 8 ++ pandas/core/indexes/datetimes.py | 8 +- pandas/tests/tseries/offsets/test_offsets.py | 2 +- pandas/tseries/offsets.py | 131 +++++++++---------- 5 files changed, 78 insertions(+), 72 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 91362c7640575..818b17baa38aa 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -341,6 +341,7 @@ Other API Changes - :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) - Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) +- :class:`DateOffset` objects render more simply, e.g. "" instead of "" (:issue:`19403`) .. _whatsnew_0230.deprecations: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e02818dd818df..8caf9ea0e0389 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -302,6 +302,14 @@ class _BaseOffset(object): _normalize_cache = True _cacheable = False _day_opt = None + _attributes = frozenset(['n', 'normalize']) + + @property + def kwds(self): + # for backwards-compatibility + kwds = {name: getattr(self, name, None) for name in self._attributes + if name not in ['n', 'normalize']} + return {name: kwds[name] for name in kwds if kwds[name] is not None} def __call__(self, other): return self.apply(other) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8dd41c022d163..76219a07f4943 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -71,9 +71,11 @@ def f(self): if field in ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end']: - month_kw = (self.freq.kwds.get('startingMonth', - self.freq.kwds.get('month', 12)) - if self.freq else 12) + freq = self.freq + month_kw = 12 + if freq: + kwds = freq.kwds + month_kw = kwds.get('startingMonth', kwds.get('month', 12)) result = fields.get_start_end_field(values, field, self.freqstr, month_kw) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index b086884ecd250..d96ebab615d12 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -218,7 +218,7 @@ def test_offset_freqstr(self, offset_types): freqstr = offset.freqstr if freqstr not in ('', - "", + "", 'LWOM-SAT', ): code = get_offset(freqstr) assert offset.rule_code == code diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index ec206e0997d0b..2e4be7fbdeebf 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -185,6 +185,8 @@ def __add__(date): """ _use_relativedelta = False _adjust_dst = False + _attributes = frozenset(['n', 'normalize'] + + list(liboffsets.relativedelta_kwds)) # default for prior pickles normalize = False @@ -192,9 +194,9 @@ def __add__(date): def __init__(self, n=1, normalize=False, **kwds): self.n = self._validate_n(n) self.normalize = normalize - self.kwds = kwds self._offset, self._use_relativedelta = _determine_offset(kwds) + self.__dict__.update(kwds) @apply_wraps def apply(self, other): @@ -238,30 +240,31 @@ def apply_index(self, i): y : DatetimeIndex """ - if not type(self) is DateOffset: + if type(self) is not DateOffset: raise NotImplementedError("DateOffset subclass {name} " "does not have a vectorized " "implementation".format( name=self.__class__.__name__)) + kwds = self.kwds relativedelta_fast = set(['years', 'months', 'weeks', 'days', 'hours', 'minutes', 'seconds', 'microseconds']) # relativedelta/_offset path only valid for base DateOffset if (self._use_relativedelta and - set(self.kwds).issubset(relativedelta_fast)): + set(kwds).issubset(relativedelta_fast)): - months = ((self.kwds.get('years', 0) * 12 + - self.kwds.get('months', 0)) * self.n) + months = ((kwds.get('years', 0) * 12 + + kwds.get('months', 0)) * self.n) if months: shifted = liboffsets.shift_months(i.asi8, months) i = i._shallow_copy(shifted) - weeks = (self.kwds.get('weeks', 0)) * self.n + weeks = (kwds.get('weeks', 0)) * self.n if weeks: i = (i.to_period('W') + weeks).to_timestamp() + \ i.to_perioddelta('W') - timedelta_kwds = {k: v for k, v in self.kwds.items() + timedelta_kwds = {k: v for k, v in kwds.items() if k in ['days', 'hours', 'minutes', 'seconds', 'microseconds']} if timedelta_kwds: @@ -273,7 +276,7 @@ def apply_index(self, i): return i + (self._offset * self.n) else: # relativedelta with other keywords - kwd = set(self.kwds) - relativedelta_fast + kwd = set(kwds) - relativedelta_fast raise NotImplementedError("DateOffset with relativedelta " "keyword(s) {kwd} not able to be " "applied vectorized".format(kwd=kwd)) @@ -284,7 +287,7 @@ def isAnchored(self): return (self.n == 1) def _params(self): - all_paras = dict(list(vars(self).items()) + list(self.kwds.items())) + all_paras = self.__dict__.copy() if 'holidays' in all_paras and not all_paras['holidays']: all_paras.pop('holidays') exclude = ['kwds', 'name', 'normalize', 'calendar'] @@ -301,15 +304,8 @@ def _repr_attrs(self): exclude = set(['n', 'inc', 'normalize']) attrs = [] for attr in sorted(self.__dict__): - if attr.startswith('_'): + if attr.startswith('_') or attr == 'kwds': continue - elif attr == 'kwds': # TODO: get rid of this - kwds_new = {} - for key in self.kwds: - if not hasattr(self, key): - kwds_new[key] = self.kwds[key] - if len(kwds_new) > 0: - attrs.append('kwds={kwds_new}'.format(kwds_new=kwds_new)) elif attr not in exclude: value = getattr(self, attr) attrs.append('{attr}={value}'.format(attr=attr, value=value)) @@ -427,6 +423,30 @@ def _offset_str(self): def nanos(self): raise ValueError("{name} is a non-fixed frequency".format(name=self)) + def __setstate__(self, state): + """Reconstruct an instance from a pickled state""" + if 'offset' in state: + # Older (<0.22.0) versions have offset attribute instead of _offset + if '_offset' in state: # pragma: no cover + raise AssertionError('Unexpected key `_offset`') + state['_offset'] = state.pop('offset') + state['kwds']['offset'] = state['_offset'] + + if '_offset' in state and not isinstance(state['_offset'], timedelta): + # relativedelta, we need to populate using its kwds + offset = state['_offset'] + odict = offset.__dict__ + kwds = {key: odict[key] for key in odict if odict[key]} + state.update(kwds) + + self.__dict__ = state + if 'weekmask' in state and 'holidays' in state: + calendar, holidays = _get_calendar(weekmask=self.weekmask, + holidays=self.holidays, + calendar=None) + self.calendar = calendar + self.holidays = holidays + class SingleConstructorOffset(DateOffset): @classmethod @@ -450,10 +470,9 @@ def __init__(self, weekmask, holidays, calendar): # following two attributes. See DateOffset._params() # holidays, weekmask - # assumes self.kwds already exists - self.kwds['weekmask'] = self.weekmask = weekmask - self.kwds['holidays'] = self.holidays = holidays - self.kwds['calendar'] = self.calendar = calendar + self.weekmask = weekmask + self.holidays = holidays + self.calendar = calendar class BusinessMixin(object): @@ -490,23 +509,6 @@ def __getstate__(self): return state - def __setstate__(self, state): - """Reconstruct an instance from a pickled state""" - if 'offset' in state: - # Older versions have offset attribute instead of _offset - if '_offset' in state: # pragma: no cover - raise ValueError('Unexpected key `_offset`') - state['_offset'] = state.pop('offset') - state['kwds']['offset'] = state['_offset'] - self.__dict__ = state - if 'weekmask' in state and 'holidays' in state: - calendar, holidays = _get_calendar(weekmask=self.weekmask, - holidays=self.holidays, - calendar=None) - self.kwds['calendar'] = self.calendar = calendar - self.kwds['holidays'] = self.holidays = holidays - self.kwds['weekmask'] = state['weekmask'] - class BusinessDay(BusinessMixin, SingleConstructorOffset): """ @@ -514,11 +516,11 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): """ _prefix = 'B' _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'offset']) def __init__(self, n=1, normalize=False, offset=timedelta(0)): self.n = self._validate_n(n) self.normalize = normalize - self.kwds = {'offset': offset} self._offset = offset def _offset_str(self): @@ -615,10 +617,8 @@ class BusinessHourMixin(BusinessMixin): def __init__(self, start='09:00', end='17:00', offset=timedelta(0)): # must be validated here to equality check - kwds = {'offset': offset} - self.start = kwds['start'] = liboffsets._validate_business_time(start) - self.end = kwds['end'] = liboffsets._validate_business_time(end) - self.kwds.update(kwds) + self.start = liboffsets._validate_business_time(start) + self.end = liboffsets._validate_business_time(end) self._offset = offset @cache_readonly @@ -843,12 +843,12 @@ class BusinessHour(BusinessHourMixin, SingleConstructorOffset): """ _prefix = 'BH' _anchor = 0 + _attributes = frozenset(['n', 'normalize', 'start', 'end', 'offset']) def __init__(self, n=1, normalize=False, start='09:00', end='17:00', offset=timedelta(0)): self.n = self._validate_n(n) self.normalize = normalize - self.kwds = {} super(BusinessHour, self).__init__(start=start, end=end, offset=offset) @@ -872,13 +872,14 @@ class CustomBusinessDay(_CustomMixin, BusinessDay): """ _cacheable = False _prefix = 'C' + _attributes = frozenset(['n', 'normalize', + 'weekmask', 'holidays', 'calendar', 'offset']) def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, offset=timedelta(0)): self.n = self._validate_n(n) self.normalize = normalize self._offset = offset - self.kwds = {'offset': offset} _CustomMixin.__init__(self, weekmask, holidays, calendar) @@ -930,6 +931,9 @@ class CustomBusinessHour(_CustomMixin, BusinessHourMixin, """ _prefix = 'CBH' _anchor = 0 + _attributes = frozenset(['n', 'normalize', + 'weekmask', 'holidays', 'calendar', + 'start', 'end', 'offset']) def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, @@ -937,7 +941,6 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.n = self._validate_n(n) self.normalize = normalize self._offset = offset - self.kwds = {'offset': offset} _CustomMixin.__init__(self, weekmask, holidays, calendar) BusinessHourMixin.__init__(self, start=start, end=end, offset=offset) @@ -949,11 +952,11 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', class MonthOffset(SingleConstructorOffset): _adjust_dst = True + _attributes = frozenset(['n', 'normalize']) def __init__(self, n=1, normalize=False): self.n = self._validate_n(n) self.normalize = normalize - self.kwds = {} @property def name(self): @@ -1024,6 +1027,8 @@ class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): calendar : pd.HolidayCalendar or np.busdaycalendar """ _cacheable = False + _attributes = frozenset(['n', 'normalize', + 'weekmask', 'holidays', 'calendar', 'offset']) onOffset = DateOffset.onOffset # override MonthOffset method apply_index = DateOffset.apply_index # override MonthOffset method @@ -1033,7 +1038,6 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.n = self._validate_n(n) self.normalize = normalize self._offset = offset - self.kwds = {'offset': offset} _CustomMixin.__init__(self, weekmask, holidays, calendar) @@ -1102,6 +1106,7 @@ class SemiMonthOffset(DateOffset): _adjust_dst = True _default_day_of_month = 15 _min_day_of_month = 2 + _attributes = frozenset(['n', 'normalize', 'day_of_month']) def __init__(self, n=1, normalize=False, day_of_month=None): if day_of_month is None: @@ -1115,7 +1120,6 @@ def __init__(self, n=1, normalize=False, day_of_month=None): self.n = self._validate_n(n) self.normalize = normalize - self.kwds = {'day_of_month': self.day_of_month} @classmethod def _from_name(cls, suffix=None): @@ -1319,6 +1323,7 @@ class Week(DateOffset): _adjust_dst = True _inc = timedelta(weeks=1) _prefix = 'W' + _attributes = frozenset(['n', 'normalize', 'weekday']) def __init__(self, n=1, normalize=False, weekday=None): self.n = self._validate_n(n) @@ -1330,8 +1335,6 @@ def __init__(self, n=1, normalize=False, weekday=None): raise ValueError('Day must be 0<=day<=6, got {day}' .format(day=self.weekday)) - self.kwds = {'weekday': weekday} - def isAnchored(self): return (self.n == 1 and self.weekday is not None) @@ -1450,6 +1453,7 @@ class WeekOfMonth(_WeekOfMonthMixin, DateOffset): """ _prefix = 'WOM' _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'week', 'weekday']) def __init__(self, n=1, normalize=False, week=0, weekday=0): self.n = self._validate_n(n) @@ -1467,8 +1471,6 @@ def __init__(self, n=1, normalize=False, week=0, weekday=0): raise ValueError('Week must be 0<=week<=3, got {week}' .format(week=self.week)) - self.kwds = {'weekday': weekday, 'week': week} - def _get_offset_day(self, other): """ Find the day in the same month as other that has the same @@ -1526,6 +1528,7 @@ class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset): """ _prefix = 'LWOM' _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'weekday']) def __init__(self, n=1, normalize=False, weekday=0): self.n = self._validate_n(n) @@ -1539,8 +1542,6 @@ def __init__(self, n=1, normalize=False, weekday=0): raise ValueError('Day must be 0<=day<=6, got {day}' .format(day=self.weekday)) - self.kwds = {'weekday': weekday} - def _get_offset_day(self, other): """ Find the day in the same month as other that has the same @@ -1584,6 +1585,7 @@ class QuarterOffset(DateOffset): _default_startingMonth = None _from_name_startingMonth = None _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'startingMonth']) # TODO: Consider combining QuarterOffset and YearOffset __init__ at some # point. Also apply_index, onOffset, rule_code if # startingMonth vs month attr names are resolved @@ -1595,8 +1597,6 @@ def __init__(self, n=1, normalize=False, startingMonth=None): startingMonth = self._default_startingMonth self.startingMonth = startingMonth - self.kwds = {'startingMonth': startingMonth} - def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -1690,6 +1690,7 @@ class QuarterBegin(QuarterOffset): class YearOffset(DateOffset): """DateOffset that just needs a month""" _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'month']) def _get_offset_day(self, other): # override BaseOffset method to use self.month instead of other.month @@ -1725,8 +1726,6 @@ def __init__(self, n=1, normalize=False, month=None): if self.month < 1 or self.month > 12: raise ValueError('Month must go from 1 to 12') - self.kwds = {'month': month} - @classmethod def _from_name(cls, suffix=None): kwargs = {} @@ -1811,6 +1810,7 @@ class FY5253(DateOffset): """ _prefix = 'RE' _adjust_dst = True + _attributes = frozenset(['weekday', 'startingMonth', 'variation']) def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, variation="nearest"): @@ -1821,9 +1821,6 @@ def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, self.variation = variation - self.kwds = {'weekday': weekday, 'startingMonth': startingMonth, - 'variation': variation} - if self.n == 0: raise ValueError('N cannot be 0') @@ -2012,6 +2009,8 @@ class FY5253Quarter(DateOffset): _prefix = 'REQ' _adjust_dst = True + _attributes = frozenset(['weekday', 'startingMonth', 'qtr_with_extra_week', + 'variation']) def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, qtr_with_extra_week=1, variation="nearest"): @@ -2023,10 +2022,6 @@ def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, self.qtr_with_extra_week = qtr_with_extra_week self.variation = variation - self.kwds = {'weekday': weekday, 'startingMonth': startingMonth, - 'qtr_with_extra_week': qtr_with_extra_week, - 'variation': variation} - if self.n == 0: raise ValueError('N cannot be 0') @@ -2170,11 +2165,11 @@ class Easter(DateOffset): 1583-4099. """ _adjust_dst = True + _attributes = frozenset(['n', 'normalize']) def __init__(self, n=1, normalize=False): self.n = self._validate_n(n) self.normalize = normalize - self.kwds = {} @apply_wraps def apply(self, other): @@ -2217,12 +2212,12 @@ def f(self, other): class Tick(SingleConstructorOffset): _inc = Timedelta(microseconds=1000) _prefix = 'undefined' + _attributes = frozenset(['n', 'normalize']) def __init__(self, n=1, normalize=False): # TODO: do Tick classes with normalize=True make sense? self.n = self._validate_n(n) self.normalize = normalize - self.kwds = {} __gt__ = _tick_comp(operator.gt) __ge__ = _tick_comp(operator.ge) From 01609274d9eaf3dcdc73add03f44408f08972e8a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Feb 2018 03:38:05 -0800 Subject: [PATCH 033/217] Fix DTI comparison with None, datetime.date (#19301) --- doc/source/whatsnew/v0.23.0.txt | 4 +- pandas/core/indexes/datetimes.py | 18 +- .../indexes/datetimes/test_arithmetic.py | 208 ++++++++++++------ 3 files changed, 156 insertions(+), 74 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 818b17baa38aa..b28378f13057b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -453,6 +453,8 @@ Datetimelike - Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) - Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) +- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) +- Timezones ^^^^^^^^^ @@ -484,8 +486,6 @@ Numeric - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) -- - Indexing ^^^^^^^^ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 76219a07f4943..e09fa87477122 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -120,8 +120,16 @@ def wrapper(self, other): else: if isinstance(other, list): other = DatetimeIndex(other) - elif not isinstance(other, (np.ndarray, Index, ABCSeries)): - other = _ensure_datetime64(other) + elif not isinstance(other, (np.datetime64, np.ndarray, + Index, ABCSeries)): + # Following Timestamp convention, __eq__ is all-False + # and __ne__ is all True, others raise TypeError. + if opname == '__eq__': + return np.zeros(shape=self.shape, dtype=bool) + elif opname == '__ne__': + return np.ones(shape=self.shape, dtype=bool) + raise TypeError('%s type object %s' % + (type(other), str(other))) if is_datetimelike(other): self._assert_tzawareness_compat(other) @@ -148,12 +156,6 @@ def wrapper(self, other): return compat.set_function_name(wrapper, opname, cls) -def _ensure_datetime64(other): - if isinstance(other, np.datetime64): - return other - raise TypeError('%s type object %s' % (type(other), str(other))) - - _midnight = time(0, 0) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 671071b5e4945..09a6b35a0ff0e 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -14,6 +14,7 @@ from pandas import (Timestamp, Timedelta, Series, DatetimeIndex, TimedeltaIndex, date_range) +from pandas._libs import tslib @pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', @@ -44,7 +45,83 @@ def addend(request): class TestDatetimeIndexComparisons(object): - # TODO: De-duplicate with test_comparisons_nat below + @pytest.mark.parametrize('other', [datetime(2016, 1, 1), + Timestamp('2016-01-01'), + np.datetime64('2016-01-01')]) + def test_dti_cmp_datetimelike(self, other, tz): + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + if tz is not None: + if isinstance(other, np.datetime64): + # no tzaware version available + return + elif isinstance(other, Timestamp): + other = other.tz_localize(dti.tzinfo) + else: + other = tslib._localize_pydatetime(other, dti.tzinfo) + + result = dti == other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti > other + expected = np.array([False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti >= other + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti < other + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti <= other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + def dti_cmp_non_datetime(self, tz): + # GH#19301 by convention datetime.date is not considered comparable + # to Timestamp or DatetimeIndex. This may change in the future. + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + + other = datetime(2016, 1, 1).date() + assert not (dti == other).any() + assert (dti != other).all() + with pytest.raises(TypeError): + dti < other + with pytest.raises(TypeError): + dti <= other + with pytest.raises(TypeError): + dti > other + with pytest.raises(TypeError): + dti >= other + + @pytest.mark.parametrize('other', [None, np.nan, pd.NaT]) + def test_dti_eq_null_scalar(self, other, tz): + # GH#19301 + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + assert not (dti == other).any() + + @pytest.mark.parametrize('other', [None, np.nan, pd.NaT]) + def test_dti_ne_null_scalar(self, other, tz): + # GH#19301 + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + assert (dti != other).all() + + @pytest.mark.parametrize('other', [None, np.nan]) + def test_dti_cmp_null_scalar_inequality(self, tz, other): + # GH#19301 + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + + with pytest.raises(TypeError): + dti < other + with pytest.raises(TypeError): + dti <= other + with pytest.raises(TypeError): + dti > other + with pytest.raises(TypeError): + dti >= other + def test_dti_cmp_nat(self): left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')]) @@ -72,69 +149,7 @@ def test_dti_cmp_nat(self): tm.assert_numpy_array_equal(lhs < pd.NaT, expected) tm.assert_numpy_array_equal(pd.NaT > lhs, expected) - @pytest.mark.parametrize('op', [operator.eq, operator.ne, - operator.gt, operator.ge, - operator.lt, operator.le]) - def test_comparison_tzawareness_compat(self, op): - # GH#18162 - dr = pd.date_range('2016-01-01', periods=6) - dz = dr.tz_localize('US/Pacific') - - with pytest.raises(TypeError): - op(dr, dz) - with pytest.raises(TypeError): - op(dr, list(dz)) - with pytest.raises(TypeError): - op(dz, dr) - with pytest.raises(TypeError): - op(dz, list(dr)) - - # Check that there isn't a problem aware-aware and naive-naive do not - # raise - assert (dr == dr).all() - assert (dr == list(dr)).all() - assert (dz == dz).all() - assert (dz == list(dz)).all() - - # Check comparisons against scalar Timestamps - ts = pd.Timestamp('2000-03-14 01:59') - ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam') - - assert (dr > ts).all() - with pytest.raises(TypeError): - op(dr, ts_tz) - - assert (dz > ts_tz).all() - with pytest.raises(TypeError): - op(dz, ts) - - @pytest.mark.parametrize('op', [operator.eq, operator.ne, - operator.gt, operator.ge, - operator.lt, operator.le]) - def test_nat_comparison_tzawareness(self, op): - # GH#19276 - # tzaware DatetimeIndex should not raise when compared to NaT - dti = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, - '2014-05-01', '2014-07-01']) - expected = np.array([op == operator.ne] * len(dti)) - result = op(dti, pd.NaT) - tm.assert_numpy_array_equal(result, expected) - - result = op(dti.tz_localize('US/Pacific'), pd.NaT) - tm.assert_numpy_array_equal(result, expected) - - def test_comparisons_coverage(self): - rng = date_range('1/1/2000', periods=10) - - # raise TypeError for now - pytest.raises(TypeError, rng.__lt__, rng[3].value) - - result = rng == list(rng) - exp = rng == rng - tm.assert_numpy_array_equal(result, exp) - - def test_comparisons_nat(self): - + def test_dti_cmp_nat_behaves_like_float_cmp_nan(self): fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) @@ -223,6 +238,71 @@ def test_comparisons_nat(self): expected = np.array([True, True, False, True, True, True]) tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_comparison_tzawareness_compat(self, op): + # GH#18162 + dr = pd.date_range('2016-01-01', periods=6) + dz = dr.tz_localize('US/Pacific') + + with pytest.raises(TypeError): + op(dr, dz) + with pytest.raises(TypeError): + op(dr, list(dz)) + with pytest.raises(TypeError): + op(dz, dr) + with pytest.raises(TypeError): + op(dz, list(dr)) + + # Check that there isn't a problem aware-aware and naive-naive do not + # raise + assert (dr == dr).all() + assert (dr == list(dr)).all() + assert (dz == dz).all() + assert (dz == list(dz)).all() + + # Check comparisons against scalar Timestamps + ts = pd.Timestamp('2000-03-14 01:59') + ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam') + + assert (dr > ts).all() + with pytest.raises(TypeError): + op(dr, ts_tz) + + assert (dz > ts_tz).all() + with pytest.raises(TypeError): + op(dz, ts) + + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_nat_comparison_tzawareness(self, op): + # GH#19276 + # tzaware DatetimeIndex should not raise when compared to NaT + dti = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, + '2014-05-01', '2014-07-01']) + expected = np.array([op == operator.ne] * len(dti)) + result = op(dti, pd.NaT) + tm.assert_numpy_array_equal(result, expected) + + result = op(dti.tz_localize('US/Pacific'), pd.NaT) + tm.assert_numpy_array_equal(result, expected) + + def test_dti_cmp_int_raises(self): + rng = date_range('1/1/2000', periods=10) + + # raise TypeError for now + with pytest.raises(TypeError): + rng < rng[3].value + + def test_dti_cmp_list(self): + rng = date_range('1/1/2000', periods=10) + + result = rng == list(rng) + expected = rng == rng + tm.assert_numpy_array_equal(result, expected) + class TestDatetimeIndexArithmetic(object): From 5c29123b316164f934a307dfd6c0e5bb6a1439a9 Mon Sep 17 00:00:00 2001 From: Tommy <10076072+tommyod@users.noreply.github.com> Date: Fri, 2 Feb 2018 13:50:46 +0100 Subject: [PATCH 034/217] DOC: Exposed arguments in plot.kde (#19229) * Exposed arguments in plot.kde, added number of sample points as option * Added a test for plot.kde with as an integer * Added whatsnew. Fixed flake8 errors. Used is_integer to infer type. * Updated scipy reference * Added test, rewrote whatsnew, removed import * Changed from Series to DataFrame in doc * Fixed PEP8 errors in test file * Fixed typo which made tests crash --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/plotting/_core.py | 32 ++++++++++++++++++++++++---- pandas/tests/plotting/test_series.py | 14 ++++++------ 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index b28378f13057b..26a7a78bb5c55 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -536,7 +536,7 @@ Plotting - :func: `DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) - Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). -- +- :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). - Groupby/Resample/Rolling diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 88b899ad60313..b15c5271ae321 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1398,6 +1398,10 @@ def _get_ind(self, y): sample_range = np.nanmax(y) - np.nanmin(y) ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, np.nanmax(y) + 0.5 * sample_range, 1000) + elif is_integer(self.ind): + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, self.ind) else: ind = self.ind return ind @@ -2598,12 +2602,22 @@ def hist(self, bins=10, **kwds): """ return self(kind='hist', bins=bins, **kwds) - def kde(self, **kwds): + def kde(self, bw_method=None, ind=None, **kwds): """ Kernel Density Estimate plot Parameters ---------- + bw_method: str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points. If None (default), 1000 equally spaced points + are used. If `ind` is a NumPy array, the kde is evaluated at the + points passed. If `ind` is an integer, `ind` number of equally + spaced points are used. `**kwds` : optional Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. @@ -2611,7 +2625,7 @@ def kde(self, **kwds): ------- axes : matplotlib.AxesSubplot or np.array of them """ - return self(kind='kde', **kwds) + return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) density = kde @@ -2766,12 +2780,22 @@ def hist(self, by=None, bins=10, **kwds): """ return self(kind='hist', by=by, bins=bins, **kwds) - def kde(self, **kwds): + def kde(self, bw_method=None, ind=None, **kwds): """ Kernel Density Estimate plot Parameters ---------- + bw_method: str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points. If None (default), 1000 equally spaced points + are used. If `ind` is a NumPy array, the kde is evaluated at the + points passed. If `ind` is an integer, `ind` number of equally + spaced points are used. `**kwds` : optional Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. @@ -2779,7 +2803,7 @@ def kde(self, **kwds): ------- axes : matplotlib.AxesSubplot or np.array of them """ - return self(kind='kde', **kwds) + return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) density = kde diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 2458fc0dc992c..278be433183fa 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -621,14 +621,16 @@ def test_kde_kwargs(self): if not self.mpl_ge_1_5_0: pytest.skip("mpl is not supported") - from numpy import linspace - _check_plot_works(self.ts.plot.kde, bw_method=.5, - ind=linspace(-100, 100, 20)) + sample_points = np.linspace(-100, 100, 20) + _check_plot_works(self.ts.plot.kde, bw_method='scott', ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=np.int(20)) + _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=sample_points) _check_plot_works(self.ts.plot.density, bw_method=.5, - ind=linspace(-100, 100, 20)) + ind=sample_points) _, ax = self.plt.subplots() - ax = self.ts.plot.kde(logy=True, bw_method=.5, - ind=linspace(-100, 100, 20), ax=ax) + ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=sample_points, + ax=ax) self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density') From f2873e9b5e8f63dd98ae4c791c55a7492d19b1da Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Feb 2018 15:34:20 -0600 Subject: [PATCH 035/217] ENH: Array Interface and Categorical internals Refactor (#19268) * REF: Define extension base classes * Updated for comments * removed take_nd * Changed to_dense to return get_values * Fixed docstrings, types * Removed is_sparse * Remove metaclasses from PeriodDtype and IntervalDtype * Fixup form_blocks rebase * Restore concat casting cat -> object * Remove _slice, clarify semantics around __getitem__ * Document and use take. * Clarify type, kind, init * Remove base * API: Remove unused __iter__ and get_values * API: Implement repr and str * Remove default value_counts for now * Fixed merge conflicts * Remove implementation of construct_from_string * Example implementation of take * Cleanup ExtensionBlock * Pass through ndim * Use series._values * Removed repr, updated take doc * Various cleanups * Handle get_values, to_dense, is_view * Docs * Remove is_extension, is_bool Remove inherited convert * Sparse formatter * Revert "Sparse formatter" This reverts commit ab2f0457839fece3b3ef067f29994b42908bd037. * Unbox SparseSeries * Added test for sparse consolidation * Docs * Moved to errors * Handle classmethods, properties * Use our AbstractMethodError * Lint * Cleanup * Move ndim validation to a method. * Try this * Make ExtensionBlock._holder a property Removed ExtensionBlock.__init__ * Make _holder a property for all * Refactored validate_ndim * fixup! Refactored validate_ndim * lint --- pandas/core/arrays/__init__.py | 1 + pandas/core/arrays/base.py | 247 +++++++++++++ pandas/core/arrays/categorical.py | 18 +- pandas/core/common.py | 16 +- pandas/core/dtypes/base.py | 129 +++++++ pandas/core/dtypes/common.py | 29 ++ pandas/core/dtypes/dtypes.py | 16 +- pandas/core/internals.py | 329 ++++++++++++++---- pandas/errors/__init__.py | 23 ++ pandas/tests/dtypes/test_dtypes.py | 32 +- pandas/tests/internals/test_external_block.py | 4 +- pandas/tests/internals/test_internals.py | 24 +- pandas/tests/sparse/frame/test_frame.py | 9 + pandas/tests/test_errors.py | 29 ++ 14 files changed, 803 insertions(+), 103 deletions(-) create mode 100644 pandas/core/arrays/base.py create mode 100644 pandas/core/dtypes/base.py diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index ee32b12f0e712..f8adcf520c15b 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1 +1,2 @@ +from .base import ExtensionArray # noqa from .categorical import Categorical # noqa diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py new file mode 100644 index 0000000000000..1556b653819a6 --- /dev/null +++ b/pandas/core/arrays/base.py @@ -0,0 +1,247 @@ +"""An interface for extending pandas with custom arrays.""" +from pandas.errors import AbstractMethodError + +_not_implemented_message = "{} does not implement {}." + + +class ExtensionArray(object): + """Abstract base class for custom 1-D array types. + + pandas will recognize instances of this class as proper arrays + with a custom type and will not attempt to coerce them to objects. They + may be stored directly inside a :class:`DataFrame` or :class:`Series`. + + Notes + ----- + The interface includes the following abstract methods that must be + implemented by subclasses: + + * __getitem__ + * __len__ + * dtype + * nbytes + * isna + * take + * copy + * _formatting_values + * _concat_same_type + + Some additional methods are required to satisfy pandas' internal, private + block API. + + * _concat_same_type + * _can_hold_na + + This class does not inherit from 'abc.ABCMeta' for performance reasons. + Methods and properties required by the interface raise + ``pandas.errors.AbstractMethodError`` and no ``register`` method is + provided for registering virtual subclasses. + + ExtensionArrays are limited to 1 dimension. + + They may be backed by none, one, or many NumPy ararys. For example, + ``pandas.Categorical`` is an extension array backed by two arrays, + one for codes and one for categories. An array of IPv6 address may + be backed by a NumPy structured array with two fields, one for the + lower 64 bits and one for the upper 64 bits. Or they may be backed + by some other storage type, like Python lists. Pandas makes no + assumptions on how the data are stored, just that it can be converted + to a NumPy array. + + Extension arrays should be able to be constructed with instances of + the class, i.e. ``ExtensionArray(extension_array)`` should return + an instance, not error. + + Additionally, certain methods and interfaces are required for proper + this array to be properly stored inside a ``DataFrame`` or ``Series``. + """ + # ------------------------------------------------------------------------ + # Must be a Sequence + # ------------------------------------------------------------------------ + def __getitem__(self, item): + # type (Any) -> Any + """Select a subset of self. + + Parameters + ---------- + item : int, slice, or ndarray + * int: The position in 'self' to get. + + * slice: A slice object, where 'start', 'stop', and 'step' are + integers or None + + * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' + + Returns + ------- + item : scalar or ExtensionArray + + Notes + ----- + For scalar ``item``, return a scalar value suitable for the array's + type. This should be an instance of ``self.dtype.type``. + + For slice ``key``, return an instance of ``ExtensionArray``, even + if the slice is length 0 or 1. + + For a boolean mask, return an instance of ``ExtensionArray``, filtered + to the values where ``item`` is True. + """ + raise AbstractMethodError(self) + + def __setitem__(self, key, value): + # type: (Any, Any) -> None + raise NotImplementedError(_not_implemented_message.format( + type(self), '__setitem__') + ) + + def __len__(self): + """Length of this array + + Returns + ------- + length : int + """ + # type: () -> int + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + # Required attributes + # ------------------------------------------------------------------------ + @property + def dtype(self): + # type: () -> ExtensionDtype + """An instance of 'ExtensionDtype'.""" + raise AbstractMethodError(self) + + @property + def shape(self): + # type: () -> Tuple[int, ...] + return (len(self),) + + @property + def ndim(self): + # type: () -> int + """Extension Arrays are only allowed to be 1-dimensional.""" + return 1 + + @property + def nbytes(self): + # type: () -> int + """The number of bytes needed to store this object in memory. + + If this is expensive to compute, return an approximate lower bound + on the number of bytes needed. + """ + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + # Additional Methods + # ------------------------------------------------------------------------ + def isna(self): + # type: () -> np.ndarray + """Boolean NumPy array indicating if each value is missing. + + This should return a 1-D array the same length as 'self'. + """ + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + # Indexing methods + # ------------------------------------------------------------------------ + def take(self, indexer, allow_fill=True, fill_value=None): + # type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray + """Take elements from an array. + + Parameters + ---------- + indexer : sequence of integers + indices to be taken. -1 is used to indicate values + that are missing. + allow_fill : bool, default True + If False, indexer is assumed to contain no -1 values so no filling + will be done. This short-circuits computation of a mask. Result is + undefined if allow_fill == False and -1 is present in indexer. + fill_value : any, default None + Fill value to replace -1 values with. By default, this uses + the missing value sentinel for this type, ``self._fill_value``. + + Notes + ----- + This should follow pandas' semantics where -1 indicates missing values. + Positions where indexer is ``-1`` should be filled with the missing + value for this type. + + This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the + indexer is a sequence of values. + + Examples + -------- + Suppose the extension array somehow backed by a NumPy structured array + and that the underlying structured array is stored as ``self.data``. + Then ``take`` may be written as + + .. code-block:: python + + def take(self, indexer, allow_fill=True, fill_value=None): + mask = indexer == -1 + result = self.data.take(indexer) + result[mask] = self._fill_value + return type(self)(result) + """ + raise AbstractMethodError(self) + + def copy(self, deep=False): + # type: (bool) -> ExtensionArray + """Return a copy of the array. + + Parameters + ---------- + deep : bool, default False + Also copy the underlying data backing this array. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + # Block-related methods + # ------------------------------------------------------------------------ + @property + def _fill_value(self): + # type: () -> Any + """The missing value for this type, e.g. np.nan""" + return None + + def _formatting_values(self): + # type: () -> np.ndarray + # At the moment, this has to be an array since we use result.dtype + """An array of values to be printed in, e.g. the Series repr""" + raise AbstractMethodError(self) + + @classmethod + def _concat_same_type(cls, to_concat): + # type: (Sequence[ExtensionArray]) -> ExtensionArray + """Concatenate multiple array + + Parameters + ---------- + to_concat : sequence of this type + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(cls) + + def _can_hold_na(self): + # type: () -> bool + """Whether your array can hold missing values. True by default. + + Notes + ----- + Setting this to false will optimize some operations like fillna. + """ + return True diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b50e01b0fb55a..62c6a6b16cbe9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -43,6 +43,8 @@ from pandas.util._validators import validate_bool_kwarg from pandas.core.config import get_option +from .base import ExtensionArray + def _cat_compare_op(op): def f(self, other): @@ -148,7 +150,7 @@ def _maybe_to_categorical(array): """ -class Categorical(PandasObject): +class Categorical(ExtensionArray, PandasObject): """ Represents a categorical variable in classic R / S-plus fashion @@ -2130,6 +2132,20 @@ def repeat(self, repeats, *args, **kwargs): return self._constructor(values=codes, categories=self.categories, ordered=self.ordered, fastpath=True) + # Implement the ExtensionArray interface + @property + def _can_hold_na(self): + return True + + @classmethod + def _concat_same_type(self, to_concat): + from pandas.core.dtypes.concat import _concat_categorical + + return _concat_categorical(to_concat) + + def _formatting_values(self): + return self + # The Series.cat accessor diff --git a/pandas/core/common.py b/pandas/core/common.py index e606be3cc2a23..6748db825acf0 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -25,7 +25,8 @@ # compat from pandas.errors import ( # noqa - PerformanceWarning, UnsupportedFunctionCall, UnsortedIndexError) + PerformanceWarning, UnsupportedFunctionCall, UnsortedIndexError, + AbstractMethodError) # back-compat of public API # deprecate these functions @@ -88,19 +89,6 @@ class SettingWithCopyWarning(Warning): pass -class AbstractMethodError(NotImplementedError): - """Raise this error instead of NotImplementedError for abstract methods - while keeping compatibility with Python 2 and Python 3. - """ - - def __init__(self, class_instance): - self.class_instance = class_instance - - def __str__(self): - msg = "This method must be defined in the concrete class of {name}" - return (msg.format(name=self.class_instance.__class__.__name__)) - - def flatten(l): """Flatten an arbitrarily nested sequence. diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py new file mode 100644 index 0000000000000..c7c5378801f02 --- /dev/null +++ b/pandas/core/dtypes/base.py @@ -0,0 +1,129 @@ +"""Extend pandas with custom array types""" +from pandas.errors import AbstractMethodError + + +class ExtensionDtype(object): + """A custom data type, to be paired with an ExtensionArray. + + Notes + ----- + The interface includes the following abstract methods that must + be implemented by subclasses: + + * type + * name + * construct_from_string + + This class does not inherit from 'abc.ABCMeta' for performance reasons. + Methods and properties required by the interface raise + ``pandas.errors.AbstractMethodError`` and no ``register`` method is + provided for registering virtual subclasses. + """ + + def __str__(self): + return self.name + + @property + def type(self): + # type: () -> type + """The scalar type for the array, e.g. ``int`` + + It's expected ``ExtensionArray[item]`` returns an instance + of ``ExtensionDtype.type`` for scalar ``item``. + """ + raise AbstractMethodError(self) + + @property + def kind(self): + # type () -> str + """A character code (one of 'biufcmMOSUV'), default 'O' + + This should match the NumPy dtype used when the array is + converted to an ndarray, which is probably 'O' for object if + the extension type cannot be represented as a built-in NumPy + type. + + See Also + -------- + numpy.dtype.kind + """ + return 'O' + + @property + def name(self): + # type: () -> str + """A string identifying the data type. + + Will be used for display in, e.g. ``Series.dtype`` + """ + raise AbstractMethodError(self) + + @property + def names(self): + # type: () -> Optional[List[str]] + """Ordered list of field names, or None if there are no fields. + + This is for compatibility with NumPy arrays, and may be removed in the + future. + """ + return None + + @classmethod + def construct_from_string(cls, string): + """Attempt to construct this type from a string. + + Parameters + ---------- + string : str + + Returns + ------- + self : instance of 'cls' + + Raises + ------ + TypeError + If a class cannot be constructed from this 'string'. + + Examples + -------- + If the extension dtype can be constructed without any arguments, + the following may be an adequate implementation. + + >>> @classmethod + ... def construct_from_string(cls, string) + ... if string == cls.name: + ... return cls() + ... else: + ... raise TypeError("Cannot construct a '{}' from " + ... "'{}'".format(cls, string)) + """ + raise AbstractMethodError(cls) + + @classmethod + def is_dtype(cls, dtype): + """Check if we match 'dtype' + + Parameters + ---------- + dtype : str or dtype + + Returns + ------- + is_dtype : bool + + Notes + ----- + The default implementation is True if + + 1. ``cls.construct_from_string(dtype)`` is an instance + of ``cls``. + 2. 'dtype' is ``cls`` or a subclass of ``cls``. + """ + if isinstance(dtype, str): + try: + return isinstance(cls.construct_from_string(dtype), cls) + except TypeError: + return False + else: + return issubclass(dtype, cls) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dca9a5fde0d74..c66e7fcfc6978 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1685,6 +1685,35 @@ def is_extension_type(arr): return False +def is_extension_array_dtype(arr_or_dtype): + """Check if an object is a pandas extension array type. + + Parameters + ---------- + arr_or_dtype : object + + Returns + ------- + bool + + Notes + ----- + This checks whether an object implements the pandas extension + array interface. In pandas, this includes: + + * Categorical + + Third-party libraries may implement arrays or types satisfying + this interface as well. + """ + from pandas.core.arrays import ExtensionArray + + # we want to unpack series, anything else? + if isinstance(arr_or_dtype, ABCSeries): + arr_or_dtype = arr_or_dtype._values + return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray)) + + def is_complex_dtype(arr_or_dtype): """ Check whether the provided array or dtype is of a complex dtype. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1eb87aa99fd1e..d8d3a96992757 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -5,15 +5,15 @@ from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex +from .base import ExtensionDtype -class ExtensionDtype(object): + +class PandasExtensionDtype(ExtensionDtype): """ A np.dtype duck-typed class, suitable for holding a custom dtype. THIS IS NOT A REAL NUMPY DTYPE """ - name = None - names = None type = None subdtype = None kind = None @@ -108,7 +108,7 @@ class CategoricalDtypeType(type): pass -class CategoricalDtype(ExtensionDtype): +class CategoricalDtype(PandasExtensionDtype): """ Type for categorical data with the categories and orderedness @@ -387,7 +387,7 @@ class DatetimeTZDtypeType(type): pass -class DatetimeTZDtype(ExtensionDtype): +class DatetimeTZDtype(PandasExtensionDtype): """ A np.dtype duck-typed class, suitable for holding a custom datetime with tz @@ -501,8 +501,7 @@ class PeriodDtypeType(type): pass -class PeriodDtype(ExtensionDtype): - __metaclass__ = PeriodDtypeType +class PeriodDtype(PandasExtensionDtype): """ A Period duck-typed class, suitable for holding a period with freq dtype. @@ -619,8 +618,7 @@ class IntervalDtypeType(type): pass -class IntervalDtype(ExtensionDtype): - __metaclass__ = IntervalDtypeType +class IntervalDtype(PandasExtensionDtype): """ A Interval duck-typed class, suitable for holding an interval diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 52e8317f5209a..f553e1a02c9d6 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -33,6 +33,7 @@ is_datetimelike_v_numeric, is_float_dtype, is_numeric_dtype, is_numeric_v_string_like, is_extension_type, + is_extension_array_dtype, is_list_like, is_re, is_re_compilable, @@ -61,8 +62,9 @@ from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer -from pandas.core.arrays.categorical import Categorical, _maybe_to_categorical +from pandas.core.arrays import Categorical from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.io.formats.printing import pprint_thing import pandas.core.missing as missing @@ -103,24 +105,58 @@ class Block(PandasObject): _verify_integrity = True _validate_ndim = True _ftype = 'dense' - _holder = None _concatenator = staticmethod(np.concatenate) def __init__(self, values, placement, ndim=None): - if ndim is None: - ndim = values.ndim - elif values.ndim != ndim: - raise ValueError('Wrong number of dimensions') - self.ndim = ndim - + self.ndim = self._check_ndim(values, ndim) self.mgr_locs = placement self.values = values - if ndim and len(self.mgr_locs) != len(self.values): + if (self._validate_ndim and self.ndim and + len(self.mgr_locs) != len(self.values)): raise ValueError( 'Wrong number of items passed {val}, placement implies ' '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs))) + def _check_ndim(self, values, ndim): + """ndim inference and validation. + + Infers ndim from 'values' if not provided to __init__. + Validates that values.ndim and ndim are consistent if and only if + the class variable '_validate_ndim' is True. + + Parameters + ---------- + values : array-like + ndim : int or None + + Returns + ------- + ndim : int + + Raises + ------ + ValueError : the number of dimensions do not match + """ + if ndim is None: + ndim = values.ndim + + if self._validate_ndim and values.ndim != ndim: + msg = ("Wrong number of dimensions. values.ndim != ndim " + "[{} != {}]") + raise ValueError(msg.format(values.ndim, ndim)) + + return ndim + + @property + def _holder(self): + """The array-like that can hold the underlying values. + + None for 'Block', overridden by subclasses that don't + use an ndarray. + """ + return None + @property def _consolidate_key(self): return (self._can_consolidate, self.dtype.name) @@ -279,7 +315,6 @@ def reshape_nd(self, labels, shape, ref_items, mgr=None): return a new block that is transformed to a nd block """ - return _block2d_to_blocknd(values=self.get_values().T, placement=self.mgr_locs, shape=shape, labels=labels, ref_items=ref_items) @@ -535,15 +570,20 @@ def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): def _astype(self, dtype, copy=False, errors='raise', values=None, klass=None, mgr=None, **kwargs): - """ - Coerce to the new type + """Coerce to the new type + Parameters + ---------- dtype : str, dtype convertible copy : boolean, default False copy if indicated errors : str, {'raise', 'ignore'}, default 'ignore' - ``raise`` : allow exceptions to be raised - ``ignore`` : suppress exceptions. On error return original object + + Returns + ------- + Block """ errors_legal_values = ('raise', 'ignore') @@ -1674,27 +1714,28 @@ class NonConsolidatableMixIn(object): _can_consolidate = False _verify_integrity = False _validate_ndim = False - _holder = None def __init__(self, values, placement, ndim=None): + """Initialize a non-consolidatable block. - # Placement must be converted to BlockPlacement via property setter - # before ndim logic, because placement may be a slice which doesn't - # have a length. - self.mgr_locs = placement + 'ndim' may be inferred from 'placement'. - # kludgetastic + This will call continue to call __init__ for the other base + classes mixed in with this Mixin. + """ + # Placement must be converted to BlockPlacement so that we can check + # its length + if not isinstance(placement, BlockPlacement): + placement = BlockPlacement(placement) + + # Maybe infer ndim from placement if ndim is None: - if len(self.mgr_locs) != 1: + if len(placement) != 1: ndim = 1 else: ndim = 2 - self.ndim = ndim - - if not isinstance(values, self._holder): - raise TypeError("values must be {0}".format(self._holder.__name__)) - - self.values = values + super(NonConsolidatableMixIn, self).__init__(values, placement, + ndim=ndim) @property def shape(self): @@ -1745,7 +1786,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, Returns ------- - a new block(s), the result of the putmask + a new block, the result of the putmask """ inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1803,6 +1844,92 @@ def _unstack(self, unstacker_func, new_columns): return blocks, mask +class ExtensionBlock(NonConsolidatableMixIn, Block): + """Block for holding extension types. + + Notes + ----- + This holds all 3rd-party extension array types. It's also the immediate + parent class for our internal extension types' blocks, CategoricalBlock. + + ExtensionArrays are limited to 1-D. + """ + @property + def _holder(self): + # For extension blocks, the holder is values-dependent. + return type(self.values) + + @property + def is_view(self): + """Extension arrays are never treated as views.""" + return False + + def get_values(self, dtype=None): + # ExtensionArrays must be iterable, so this works. + values = np.asarray(self.values) + if values.ndim == self.ndim - 1: + values = values.reshape((1,) + values.shape) + return values + + def to_dense(self): + return np.asarray(self.values) + + def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): + """ + Take values according to indexer and return them as a block. + """ + if fill_tuple is None: + fill_value = None + else: + fill_value = fill_tuple[0] + + # axis doesn't matter; we are really a single-dim object + # but are passed the axis depending on the calling routing + # if its REALLY axis 0, then this will be a reindex and not a take + new_values = self.values.take(indexer, fill_value=fill_value) + + # if we are a 1-dim object, then always place at 0 + if self.ndim == 1: + new_mgr_locs = [0] + else: + if new_mgr_locs is None: + new_mgr_locs = self.mgr_locs + + return self.make_block_same_class(new_values, new_mgr_locs) + + def _can_hold_element(self, element): + # XXX: We may need to think about pushing this onto the array. + # We're doing the same as CategoricalBlock here. + return True + + def _slice(self, slicer): + """ return a slice of my values """ + + # slice the category + # return same dims as we currently have + + if isinstance(slicer, tuple) and len(slicer) == 2: + if not com.is_null_slice(slicer[0]): + raise AssertionError("invalid slicing for a 1-ndim " + "categorical") + slicer = slicer[1] + + return self.values[slicer] + + def formatting_values(self): + return self.values._formatting_values() + + def concat_same_type(self, to_concat, placement=None): + """ + Concatenate list of single blocks of the same type. + """ + values = self._holder._concat_same_type( + [blk.values for blk in to_concat]) + placement = placement or slice(0, len(values), 1) + return self.make_block_same_class(values, ndim=self.ndim, + placement=placement) + + class NumericBlock(Block): __slots__ = () is_numeric = True @@ -1908,6 +2035,11 @@ def should_store(self, value): class DatetimeLikeBlockMixin(object): + """Mixin class for DatetimeBlock and DatetimeTZBlock.""" + + @property + def _holder(self): + return DatetimeIndex @property def _na_value(self): @@ -1940,6 +2072,10 @@ def __init__(self, values, placement, ndim=None): super(TimeDeltaBlock, self).__init__(values, placement=placement, ndim=ndim) + @property + def _holder(self): + return TimedeltaIndex + @property def _box_func(self): return lambda x: tslib.Timedelta(x, unit='ns') @@ -2315,30 +2451,24 @@ def re_replacer(s): return block -class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): +class CategoricalBlock(ExtensionBlock): __slots__ = () is_categorical = True _verify_integrity = True _can_hold_na = True - _holder = Categorical _concatenator = staticmethod(_concat._concat_categorical) def __init__(self, values, placement, ndim=None): + from pandas.core.arrays.categorical import _maybe_to_categorical # coerce to categorical if we can super(CategoricalBlock, self).__init__(_maybe_to_categorical(values), - placement=placement, ndim=ndim) + placement=placement, + ndim=ndim) @property - def is_view(self): - """ I am never a view """ - return False - - def to_dense(self): - return self.values.to_dense().view() - - def convert(self, copy=True, **kwargs): - return self.copy() if copy else self + def _holder(self): + return Categorical @property def array_dtype(self): @@ -2347,13 +2477,6 @@ def array_dtype(self): """ return np.object_ - def _slice(self, slicer): - """ return a slice of my values """ - - # slice the category - # return same dims as we currently have - return self.values._slice(slicer) - def _try_coerce_result(self, result): """ reverse of try_coerce_args """ @@ -2390,28 +2513,11 @@ def shift(self, periods, axis=0, mgr=None): return self.make_block_same_class(values=self.values.shift(periods), placement=self.mgr_locs) - def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): - """ - Take values according to indexer and return them as a block.bb - """ - if fill_tuple is None: - fill_value = None - else: - fill_value = fill_tuple[0] - - # axis doesn't matter; we are really a single-dim object - # but are passed the axis depending on the calling routing - # if its REALLY axis 0, then this will be a reindex and not a take - new_values = self.values.take_nd(indexer, fill_value=fill_value) - - # if we are a 1-dim object, then always place at 0 - if self.ndim == 1: - new_mgr_locs = [0] - else: - if new_mgr_locs is None: - new_mgr_locs = self.mgr_locs - - return self.make_block_same_class(new_values, new_mgr_locs) + def to_dense(self): + # Categorical.get_values returns a DatetimeIndex for datetime + # categories, so we can't simply use `np.asarray(self.values)` like + # other types. + return self.values.get_values() def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ @@ -2430,6 +2536,15 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): def concat_same_type(self, to_concat, placement=None): """ Concatenate list of single blocks of the same type. + + Note that this CategoricalBlock._concat_same_type *may* not + return a CategoricalBlock. When the categories in `to_concat` + differ, this will return an object ndarray. + + If / when we decide we don't like that behavior: + + 1. Change Categorical._concat_same_type to use union_categoricals + 2. Delete this method. """ values = self._concatenator([blk.values for blk in to_concat], axis=self.ndim - 1) @@ -2445,12 +2560,29 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block): _can_hold_na = True def __init__(self, values, placement, ndim=None): - if values.dtype != _NS_DTYPE: - values = conversion.ensure_datetime64ns(values) - + values = self._maybe_coerce_values(values) super(DatetimeBlock, self).__init__(values, placement=placement, ndim=ndim) + def _maybe_coerce_values(self, values): + """Input validation for values passed to __init__. Ensure that + we have datetime64ns, coercing if nescessary. + + Parametetrs + ----------- + values : array-like + Must be convertable to datetime64 + + Returns + ------- + values : ndarray[datetime64ns] + + Overridden by DatetimeTZBlock. + """ + if values.dtype != _NS_DTYPE: + values = conversion.ensure_datetime64ns(values) + return values + def _astype(self, dtype, mgr=None, **kwargs): """ these automatically copy, so copy=True has no effect @@ -2576,12 +2708,37 @@ def set(self, locs, values, check=False): class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock): """ implement a datetime64 block with a tz attribute """ __slots__ = () - _holder = DatetimeIndex _concatenator = staticmethod(_concat._concat_datetime) is_datetimetz = True def __init__(self, values, placement, ndim=2, dtype=None): + # XXX: This will end up calling _maybe_coerce_values twice + # when dtype is not None. It's relatively cheap (just an isinstance) + # but it'd nice to avoid. + # + # If we can remove dtype from __init__, and push that conversion + # push onto the callers, then we can remove this entire __init__ + # and just use DatetimeBlock's. + if dtype is not None: + values = self._maybe_coerce_values(values, dtype=dtype) + super(DatetimeTZBlock, self).__init__(values, placement=placement, + ndim=ndim) + + def _maybe_coerce_values(self, values, dtype=None): + """Input validation for values passed to __init__. Ensure that + we have datetime64TZ, coercing if nescessary. + Parametetrs + ----------- + values : array-like + Must be convertable to datetime64 + dtype : string or DatetimeTZDtype, optional + Does a shallow copy to this tz + + Returns + ------- + values : ndarray[datetime64ns] + """ if not isinstance(values, self._holder): values = self._holder(values) @@ -2593,8 +2750,7 @@ def __init__(self, values, placement, ndim=2, dtype=None): if values.tz is None: raise ValueError("cannot create a DatetimeTZBlock without a tz") - super(DatetimeTZBlock, self).__init__(values, placement=placement, - ndim=ndim) + return values def copy(self, deep=True, mgr=None): """ copy constructor """ @@ -2734,9 +2890,19 @@ class SparseBlock(NonConsolidatableMixIn, Block): _box_to_block_values = False _can_hold_na = True _ftype = 'sparse' - _holder = SparseArray _concatenator = staticmethod(_concat._concat_sparse) + def __init__(self, values, placement, ndim=None): + # Ensure that we have the underlying SparseArray here... + if isinstance(values, ABCSeries): + values = values.values + assert isinstance(values, SparseArray) + super(SparseBlock, self).__init__(values, placement, ndim=ndim) + + @property + def _holder(self): + return SparseArray + @property def shape(self): return (len(self.mgr_locs), self.sp_index.length) @@ -2910,6 +3076,8 @@ def get_block_type(values, dtype=None): cls = BoolBlock elif is_categorical(values): cls = CategoricalBlock + elif is_extension_array_dtype(values): + cls = ExtensionBlock else: cls = ObjectBlock return cls @@ -4663,6 +4831,19 @@ def form_blocks(arrays, names, axes): for i, _, array in items_dict['CategoricalBlock']] blocks.extend(cat_blocks) + if len(items_dict['ExtensionBlock']): + + external_blocks = [] + for i, _, array in items_dict['ExtensionBlock']: + if isinstance(array, ABCSeries): + array = array.values + # Allow our internal arrays to chose their block type. + block_type = getattr(array, '_block_type', ExtensionBlock) + external_blocks.append( + make_block(array, klass=block_type, + fastpath=True, placement=[i])) + blocks.extend(external_blocks) + if len(extra_locs): shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:]) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 22b6d33be9d38..af4e83f506257 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -77,3 +77,26 @@ class NullFrequencyError(ValueError): class AccessorRegistrationWarning(Warning): """Warning for attribute conflicts in accessor registration.""" + + +class AbstractMethodError(NotImplementedError): + """Raise this error instead of NotImplementedError for abstract methods + while keeping compatibility with Python 2 and Python 3. + """ + + def __init__(self, class_instance, methodtype='method'): + types = {'method', 'classmethod', 'staticmethod', 'property'} + if methodtype not in types: + msg = 'methodtype must be one of {}, got {} instead.'.format( + methodtype, types) + raise ValueError(msg) + self.methodtype = methodtype + self.class_instance = class_instance + + def __str__(self): + if self.methodtype == 'classmethod': + name = self.class_instance.__name__ + else: + name = self.class_instance.__class__.__name__ + msg = "This {methodtype} must be defined in the concrete class {name}" + return (msg.format(methodtype=self.methodtype, name=name)) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index d800a7b92b559..eca4dd4cf2106 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -10,12 +10,14 @@ Series, Categorical, CategoricalIndex, IntervalIndex, date_range) from pandas.compat import string_types +from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, - IntervalDtype, CategoricalDtype) + IntervalDtype, CategoricalDtype, ExtensionDtype) from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, + is_extension_array_dtype, is_period_dtype, is_period, is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype, is_interval_dtype, @@ -742,3 +744,31 @@ def test_categorical_categories(self): tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) c1 = CategoricalDtype(CategoricalIndex(['a', 'b'])) tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) + + +class DummyArray(ExtensionArray): + pass + + +class DummyDtype(ExtensionDtype): + pass + + +class TestExtensionArrayDtype(object): + + @pytest.mark.parametrize('values', [ + pd.Categorical([]), + pd.Categorical([]).dtype, + pd.Series(pd.Categorical([])), + DummyDtype(), + DummyArray(), + ]) + def test_is_extension_array_dtype(self, values): + assert is_extension_array_dtype(values) + + @pytest.mark.parametrize('values', [ + np.array([]), + pd.Series(np.array([])), + ]) + def test_is_not_extension_array_dtype(self, values): + assert not is_extension_array_dtype(values) diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/internals/test_external_block.py index 729ee0093b6dc..2487363df8f99 100644 --- a/pandas/tests/internals/test_external_block.py +++ b/pandas/tests/internals/test_external_block.py @@ -5,12 +5,12 @@ import pandas as pd from pandas.core.internals import ( - Block, BlockManager, SingleBlockManager, NonConsolidatableMixIn) + BlockManager, SingleBlockManager, ExtensionBlock) import pytest -class CustomBlock(NonConsolidatableMixIn, Block): +class CustomBlock(ExtensionBlock): _holder = np.ndarray diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index e3490f465b24a..9338aba90d7cb 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -11,9 +11,8 @@ from distutils.version import LooseVersion import itertools from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex, - Series, Categorical) + Series, Categorical, TimedeltaIndex, SparseArray) from pandas.compat import OrderedDict, lrange -from pandas.core.sparse.array import SparseArray from pandas.core.internals import (BlockPlacement, SingleBlockManager, make_block, BlockManager) import pandas.core.algorithms as algos @@ -1263,9 +1262,30 @@ def test_binop_other(self, op, value, dtype): assert_series_equal(result, expected) +@pytest.mark.parametrize('typestr, holder', [ + ('category', Categorical), + ('M8[ns]', DatetimeIndex), + ('M8[ns, US/Central]', DatetimeIndex), + ('m8[ns]', TimedeltaIndex), + ('sparse', SparseArray), +]) +def test_holder(typestr, holder): + blk = create_block(typestr, [1]) + assert blk._holder is holder + + def test_deprecated_fastpath(): # GH#19265 values = np.random.rand(3, 3) with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): make_block(values, placement=np.arange(3), fastpath=True) + + +def test_validate_ndim(): + values = np.array([1.0, 2.0]) + placement = slice(2) + msg = "Wrong number of dimensions. values.ndim != ndim \[1 != 2\]" + + with tm.assert_raises_regex(ValueError, msg): + make_block(values, placement, ndim=2) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 0b7948cc32d24..54f567bcd2a8c 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -574,6 +574,15 @@ def test_setitem_array(self): self.frame['F'].reindex(index), check_names=False) + def test_setitem_chained_no_consolidate(self): + # https://github.com/pandas-dev/pandas/pull/19268 + # issuecomment-361696418 + # chained setitem used to cause consolidation + sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) + with pd.option_context('mode.chained_assignment', None): + sdf[0][1] = 2 + assert len(sdf._data.blocks) == 2 + def test_delitem(self): A = self.frame['A'] C = self.frame['C'] diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index babf88ef1df8d..e2a142366a89e 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -4,6 +4,8 @@ from warnings import catch_warnings import pandas # noqa import pandas as pd +from pandas.errors import AbstractMethodError +import pandas.util.testing as tm @pytest.mark.parametrize( @@ -50,3 +52,30 @@ def test_error_rename(): raise ParserError() except pd.parser.CParserError: pass + + +class Foo: + @classmethod + def classmethod(cls): + raise AbstractMethodError(cls, methodtype='classmethod') + + @property + def property(self): + raise AbstractMethodError(self, methodtype='property') + + def method(self): + raise AbstractMethodError(self) + + +def test_AbstractMethodError_classmethod(): + xpr = "This classmethod must be defined in the concrete class Foo" + with tm.assert_raises_regex(AbstractMethodError, xpr): + Foo.classmethod() + + xpr = "This property must be defined in the concrete class Foo" + with tm.assert_raises_regex(AbstractMethodError, xpr): + Foo().property + + xpr = "This method must be defined in the concrete class Foo" + with tm.assert_raises_regex(AbstractMethodError, xpr): + Foo().method() From 14d5bd1084c40238aa8ea41690e20cc0ad257992 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 3 Feb 2018 20:30:29 +0000 Subject: [PATCH 036/217] ERR: Better error msg when merging on tz-aware and tz-naive columns (#19525) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/reshape/merge.py | 27 +++++++++--------------- pandas/tests/reshape/merge/test_merge.py | 4 +++- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 26a7a78bb5c55..69965f44d87a8 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -342,6 +342,7 @@ Other API Changes - Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. "" instead of "" (:issue:`19403`) +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 99ea2c4fe4688..3ec78ce52c6e5 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -940,6 +940,11 @@ def _maybe_coerce_merge_keys(self): elif is_dtype_equal(lk.dtype, rk.dtype): continue + msg = ("You are trying to merge on {lk_dtype} and " + "{rk_dtype} columns. If you wish to proceed " + "you should use pd.concat".format(lk_dtype=lk.dtype, + rk_dtype=rk.dtype)) + # if we are numeric, then allow differing # kinds to proceed, eg. int64 and int8, int and float # further if we are object, but we infer to @@ -968,30 +973,18 @@ def _maybe_coerce_merge_keys(self): pass # Check if we are trying to merge on obviously - # incompatible dtypes GH 9780 + # incompatible dtypes GH 9780, GH 15800 elif is_numeric_dtype(lk) and not is_numeric_dtype(rk): - msg = ("You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, - rk_dtype=rk.dtype)) raise ValueError(msg) elif not is_numeric_dtype(lk) and is_numeric_dtype(rk): - msg = ("You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, - rk_dtype=rk.dtype)) raise ValueError(msg) elif is_datetimelike(lk) and not is_datetimelike(rk): - msg = ("You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, - rk_dtype=rk.dtype)) raise ValueError(msg) elif not is_datetimelike(lk) and is_datetimelike(rk): - msg = ("You are trying to merge on {lk_dtype} and " - "{rk_dtype} columns. If you wish to proceed " - "you should use pd.concat".format(lk_dtype=lk.dtype, - rk_dtype=rk.dtype)) + raise ValueError(msg) + elif is_datetime64tz_dtype(lk) and not is_datetime64tz_dtype(rk): + raise ValueError(msg) + elif not is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): raise ValueError(msg) # Houston, we have a problem! diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a8319339c6435..f63c206c0c407 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1512,11 +1512,13 @@ def test_merge_on_ints_floats_warning(self): '2011-01-02']), (pd.date_range('1/1/2011', periods=2, freq='D'), [0, 1]), (pd.date_range('1/1/2011', periods=2, freq='D'), [0.0, 1.0]), + (pd.date_range('20130101', periods=3), + pd.date_range('20130101', periods=3, tz='US/Eastern')), ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), ]) def test_merge_incompat_dtypes(self, df1_vals, df2_vals): - # GH 9780 + # GH 9780, GH 15800 # Raise a ValueError when a user tries to merge on # dtypes that are incompatible (e.g., obj and int/float) From da6f51ed135fbc01f570499bf4ce1ab90e85c558 Mon Sep 17 00:00:00 2001 From: Tommy <10076072+tommyod@users.noreply.github.com> Date: Sat, 3 Feb 2018 21:32:56 +0100 Subject: [PATCH 037/217] DOC: Spellcheck of enhancingperf.rst (#19516) * Spellchecked enhancingperf, sparse * Uppercased 'cython' to 'Cython' * Typeset variants of numba as 'Numba', as on their page * Updated reference to Numba docs to latest version --- doc/source/enhancingperf.rst | 81 +++++++++++++++++++++++------------- doc/source/sparse.rst | 8 ++-- 2 files changed, 55 insertions(+), 34 deletions(-) diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 7afa852262a38..b786b1d0c134a 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -19,6 +19,13 @@ Enhancing Performance ********************* +In this part of the tutorial, we will investigate how to speed up certain +functions operating on pandas ``DataFrames`` using three different techniques: +Cython, Numba and :func:`pandas.eval`. We will see a speed improvement of ~200 +when we use Cython and Numba on a test function operating row-wise on the +``DataFrame``. Using :func:`pandas.eval` we will speed up a sum by an order of +~2. + .. _enhancingperf.cython: Cython (Writing C extensions for pandas) @@ -29,20 +36,20 @@ computationally heavy applications however, it can be possible to achieve sizeab speed-ups by offloading work to `cython `__. This tutorial assumes you have refactored as much as possible in Python, for example -trying to remove for loops and making use of NumPy vectorization, it's always worth +by trying to remove for-loops and making use of NumPy vectorization. It's always worth optimising in Python first. This tutorial walks through a "typical" process of cythonizing a slow computation. -We use an `example from the cython documentation `__ +We use an `example from the Cython documentation `__ but in the context of pandas. Our final cythonized solution is around 100 times -faster than the pure Python. +faster than the pure Python solution. .. _enhancingperf.pure: Pure python ~~~~~~~~~~~ -We have a DataFrame to which we want to apply a function row-wise. +We have a ``DataFrame`` to which we want to apply a function row-wise. .. ipython:: python @@ -91,10 +98,10 @@ hence we'll concentrate our efforts cythonizing these two functions. .. _enhancingperf.plain: -Plain cython +Plain Cython ~~~~~~~~~~~~ -First we're going to need to import the cython magic function to ipython: +First we're going to need to import the Cython magic function to ipython: .. ipython:: python :okwarning: @@ -102,7 +109,7 @@ First we're going to need to import the cython magic function to ipython: %load_ext Cython -Now, let's simply copy our functions over to cython as is (the suffix +Now, let's simply copy our functions over to Cython as is (the suffix is here to distinguish between function versions): .. ipython:: @@ -177,8 +184,8 @@ in Python, so maybe we could minimize these by cythonizing the apply part. .. note:: - We are now passing ndarrays into the cython function, fortunately cython plays - very nicely with numpy. + We are now passing ndarrays into the Cython function, fortunately Cython plays + very nicely with NumPy. .. ipython:: @@ -213,9 +220,9 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra .. warning:: You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter - to a cython function. Instead pass the actual ``ndarray`` using the - ``.values`` attribute of the Series. The reason is that the cython - definition is specific to an ndarray and not the passed Series. + to a Cython function. Instead pass the actual ``ndarray`` using the + ``.values`` attribute of the ``Series``. The reason is that the Cython + definition is specific to an ndarray and not the passed ``Series``. So, do not do this: @@ -223,7 +230,7 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra apply_integrate_f(df['a'], df['b'], df['N']) - But rather, use ``.values`` to get the underlying ``ndarray`` + But rather, use ``.values`` to get the underlying ``ndarray``: .. code-block:: python @@ -255,7 +262,7 @@ More advanced techniques ~~~~~~~~~~~~~~~~~~~~~~~~ There is still hope for improvement. Here's an example of using some more -advanced cython techniques: +advanced Cython techniques: .. ipython:: @@ -289,16 +296,17 @@ advanced cython techniques: In [4]: %timeit apply_integrate_f_wrap(df['a'].values, df['b'].values, df['N'].values) 1000 loops, best of 3: 987 us per loop -Even faster, with the caveat that a bug in our cython code (an off-by-one error, +Even faster, with the caveat that a bug in our Cython code (an off-by-one error, for example) might cause a segfault because memory access isn't checked. - +For more about ``boundscheck`` and ``wraparound``, see the Cython docs on +`compiler directives `__. .. _enhancingperf.numba: -Using numba +Using Numba ----------- -A recent alternative to statically compiling cython code, is to use a *dynamic jit-compiler*, ``numba``. +A recent alternative to statically compiling Cython code, is to use a *dynamic jit-compiler*, Numba. Numba gives you the power to speed up your applications with high performance functions written directly in Python. With a few annotations, array-oriented and math-heavy Python code can be just-in-time compiled to native machine instructions, similar in performance to C, C++ and Fortran, without having to switch languages or Python interpreters. @@ -306,16 +314,17 @@ Numba works by generating optimized machine code using the LLVM compiler infrast .. note:: - You will need to install ``numba``. This is easy with ``conda``, by using: ``conda install numba``, see :ref:`installing using miniconda`. + You will need to install Numba. This is easy with ``conda``, by using: ``conda install numba``, see :ref:`installing using miniconda`. .. note:: - As of ``numba`` version 0.20, pandas objects cannot be passed directly to numba-compiled functions. Instead, one must pass the ``numpy`` array underlying the ``pandas`` object to the numba-compiled function as demonstrated below. + As of Numba version 0.20, pandas objects cannot be passed directly to Numba-compiled functions. Instead, one must pass the NumPy array underlying the pandas object to the Numba-compiled function as demonstrated below. Jit ~~~ -Using ``numba`` to just-in-time compile your code. We simply take the plain Python code from above and annotate with the ``@jit`` decorator. +We demonstrate how to use Numba to just-in-time compile our code. We simply +take the plain Python code from above and annotate with the ``@jit`` decorator. .. code-block:: python @@ -346,17 +355,19 @@ Using ``numba`` to just-in-time compile your code. We simply take the plain Pyth result = apply_integrate_f_numba(df['a'].values, df['b'].values, df['N'].values) return pd.Series(result, index=df.index, name='result') -Note that we directly pass ``numpy`` arrays to the numba function. ``compute_numba`` is just a wrapper that provides a nicer interface by passing/returning pandas objects. +Note that we directly pass NumPy arrays to the Numba function. ``compute_numba`` is just a wrapper that provides a nicer interface by passing/returning pandas objects. .. code-block:: ipython In [4]: %timeit compute_numba(df) 1000 loops, best of 3: 798 us per loop +In this example, using Numba was faster than Cython. + Vectorize ~~~~~~~~~ -``numba`` can also be used to write vectorized functions that do not require the user to explicitly +Numba can also be used to write vectorized functions that do not require the user to explicitly loop over the observations of a vector; a vectorized function will be applied to each row automatically. Consider the following toy example of doubling each observation: @@ -389,13 +400,23 @@ Caveats .. note:: - ``numba`` will execute on any function, but can only accelerate certain classes of functions. + Numba will execute on any function, but can only accelerate certain classes of functions. -``numba`` is best at accelerating functions that apply numerical functions to NumPy arrays. When passed a function that only uses operations it knows how to accelerate, it will execute in ``nopython`` mode. +Numba is best at accelerating functions that apply numerical functions to NumPy +arrays. When passed a function that only uses operations it knows how to +accelerate, it will execute in ``nopython`` mode. -If ``numba`` is passed a function that includes something it doesn't know how to work with -- a category that currently includes sets, lists, dictionaries, or string functions -- it will revert to ``object mode``. In ``object mode``, numba will execute but your code will not speed up significantly. If you would prefer that ``numba`` throw an error if it cannot compile a function in a way that speeds up your code, pass numba the argument ``nopython=True`` (e.g. ``@numba.jit(nopython=True)``). For more on troubleshooting ``numba`` modes, see the `numba troubleshooting page `__. +If Numba is passed a function that includes something it doesn't know how to +work with -- a category that currently includes sets, lists, dictionaries, or +string functions -- it will revert to ``object mode``. In ``object mode``, +Numba will execute but your code will not speed up significantly. If you would +prefer that Numba throw an error if it cannot compile a function in a way that +speeds up your code, pass Numba the argument +``nopython=True`` (e.g. ``@numba.jit(nopython=True)``). For more on +troubleshooting Numba modes, see the `Numba troubleshooting page +`__. -Read more in the `numba docs `__. +Read more in the `Numba docs `__. .. _enhancingperf.eval: @@ -448,7 +469,7 @@ These operations are supported by :func:`pandas.eval`: - Attribute access, e.g., ``df.a`` - Subscript expressions, e.g., ``df[0]`` - Simple variable evaluation, e.g., ``pd.eval('df')`` (this is not very useful) -- Math functions, `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`, +- Math functions: `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`, `sqrt`, `sinh`, `cosh`, `tanh`, `arcsin`, `arccos`, `arctan`, `arccosh`, `arcsinh`, `arctanh`, `abs` and `arctan2`. @@ -581,7 +602,7 @@ on the original ``DataFrame`` or return a copy with the new column. For backwards compatibility, ``inplace`` defaults to ``True`` if not specified. This will change in a future version of pandas - if your code depends on an inplace assignment you should update to explicitly - set ``inplace=True`` + set ``inplace=True``. .. ipython:: python @@ -780,7 +801,7 @@ Technical Minutia Regarding Expression Evaluation Expressions that would result in an object dtype or involve datetime operations (because of ``NaT``) must be evaluated in Python space. The main reason for this behavior is to maintain backwards compatibility with versions of NumPy < -1.7. In those versions of ``numpy`` a call to ``ndarray.astype(str)`` will +1.7. In those versions of NumPy a call to ``ndarray.astype(str)`` will truncate any strings that are more than 60 characters in length. Second, we can't pass ``object`` arrays to ``numexpr`` thus string comparisons must be evaluated in Python space. diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index 2e224f103a95e..260d8aa32ef52 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -17,11 +17,11 @@ Sparse data structures .. note:: The ``SparsePanel`` class has been removed in 0.19.0 -We have implemented "sparse" versions of Series and DataFrame. These are not sparse +We have implemented "sparse" versions of ``Series`` and ``DataFrame``. These are not sparse in the typical "mostly 0". Rather, you can view these objects as being "compressed" where any data matching a specific value (``NaN`` / missing value, though any value can be chosen) is omitted. A special ``SparseIndex`` object tracks where data has been -"sparsified". This will make much more sense in an example. All of the standard pandas +"sparsified". This will make much more sense with an example. All of the standard pandas data structures have a ``to_sparse`` method: .. ipython:: python @@ -32,7 +32,7 @@ data structures have a ``to_sparse`` method: sts The ``to_sparse`` method takes a ``kind`` argument (for the sparse index, see -below) and a ``fill_value``. So if we had a mostly zero Series, we could +below) and a ``fill_value``. So if we had a mostly zero ``Series``, we could convert it to sparse with ``fill_value=0``: .. ipython:: python @@ -40,7 +40,7 @@ convert it to sparse with ``fill_value=0``: ts.fillna(0).to_sparse(fill_value=0) The sparse objects exist for memory efficiency reasons. Suppose you had a -large, mostly NA DataFrame: +large, mostly NA ``DataFrame``: .. ipython:: python From c2adaf78de0975de115fca60dfe1a0c4b54a2eed Mon Sep 17 00:00:00 2001 From: jschendel Date: Sun, 4 Feb 2018 08:54:14 -0700 Subject: [PATCH 038/217] TST: Remove duplicate TimdeltaIndex tests (#19509) --- .../tests/indexes/timedeltas/test_astype.py | 49 +++---------------- 1 file changed, 6 insertions(+), 43 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index af16fe71edcf3..c3bd857036efc 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -2,36 +2,20 @@ import numpy as np -import pandas as pd import pandas.util.testing as tm from pandas import (TimedeltaIndex, timedelta_range, Int64Index, Float64Index, - Index, Timedelta) + Index, Timedelta, NaT) -from ..datetimelike import DatetimeLike - -class TestTimedeltaIndex(DatetimeLike): - _holder = TimedeltaIndex +class TestTimedeltaIndex(object): _multiprocess_can_split_ = True - def test_numeric_compat(self): - # Dummy method to override super's version; this test is now done - # in test_arithmetic.py - pass - - def setup_method(self, method): - self.indices = dict(index=tm.makeTimedeltaIndex(10)) - self.setup_indices() - - def create_index(self): - return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - def test_astype(self): # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) result = idx.astype(object) - expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, + expected = Index([Timedelta('1 days 03:46:40')] + [NaT] * 3, dtype=object) tm.assert_index_equal(result, expected) @@ -51,7 +35,7 @@ def test_astype(self): def test_astype_timedelta64(self): # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) result = idx.astype('timedelta64') expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') @@ -69,28 +53,7 @@ def test_astype_timedelta64(self): float, 'datetime64', 'datetime64[ns]']) def test_astype_raises(self, dtype): # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) msg = 'Cannot cast TimedeltaIndex to dtype' with tm.assert_raises_regex(TypeError, msg): idx.astype(dtype) - - def test_pickle_compat_construction(self): - pass - - def test_shift(self): - # test shift for TimedeltaIndex - # err8083 - - drange = self.create_index() - result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'], - freq='D') - tm.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', - '8 days 01:00:03', '9 days 01:00:03', - '10 days 01:00:03'], freq='D') - tm.assert_index_equal(result, expected) From 90f59e9f1ae24e94d671c5305294f25db3e790da Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Feb 2018 08:05:30 -0800 Subject: [PATCH 039/217] Frame specific parts of #19504 (#19512) --- pandas/tests/frame/test_timezones.py | 135 +++++++++++++++++++++++++ pandas/tests/tseries/test_timezones.py | 123 ++-------------------- 2 files changed, 144 insertions(+), 114 deletions(-) create mode 100644 pandas/tests/frame/test_timezones.py diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py new file mode 100644 index 0000000000000..fa589a0aa4817 --- /dev/null +++ b/pandas/tests/frame/test_timezones.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +""" +Tests for DataFrame timezone-related methods +""" +from datetime import datetime + +import pytest +import pytz +import numpy as np + +import pandas.util.testing as tm +from pandas.compat import lrange +from pandas.core.indexes.datetimes import date_range +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas import Series, DataFrame + + +class TestDataFrameTimezones(object): + def test_frame_from_records_utc(self): + rec = {'datum': 1.5, + 'begin_time': datetime(2006, 4, 27, tzinfo=pytz.utc)} + + # it works + DataFrame.from_records([rec], index='begin_time') + + def test_frame_tz_localize(self): + rng = date_range('1/1/2011', periods=100, freq='H') + + df = DataFrame({'a': 1}, index=rng) + result = df.tz_localize('utc') + expected = DataFrame({'a': 1}, rng.tz_localize('UTC')) + assert result.index.tz.zone == 'UTC' + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_localize('utc', axis=1) + assert result.columns.tz.zone == 'UTC' + tm.assert_frame_equal(result, expected.T) + + def test_frame_tz_convert(self): + rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') + + df = DataFrame({'a': 1}, index=rng) + result = df.tz_convert('Europe/Berlin') + expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin')) + assert result.index.tz.zone == 'Europe/Berlin' + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_convert('Europe/Berlin', axis=1) + assert result.columns.tz.zone == 'Europe/Berlin' + tm.assert_frame_equal(result, expected.T) + + def test_frame_join_tzaware(self): + test1 = DataFrame(np.zeros((6, 3)), + index=date_range("2012-11-15 00:00:00", periods=6, + freq="100L", tz="US/Central")) + test2 = DataFrame(np.zeros((3, 3)), + index=date_range("2012-11-15 00:00:00", periods=3, + freq="250L", tz="US/Central"), + columns=lrange(3, 6)) + + result = test1.join(test2, how='outer') + ex_index = test1.index.union(test2.index) + + tm.assert_index_equal(result.index, ex_index) + assert result.index.tz.zone == 'US/Central' + + def test_frame_add_tz_mismatch_converts_to_utc(self): + rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') + df = DataFrame(np.random.randn(len(rng)), index=rng, columns=['a']) + + df_moscow = df.tz_convert('Europe/Moscow') + result = df + df_moscow + assert result.index.tz is pytz.utc + + result = df_moscow + df + assert result.index.tz is pytz.utc + + def test_frame_align_aware(self): + idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') + idx2 = date_range('2001', periods=5, freq='2H', tz='US/Eastern') + df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) + df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) + new1, new2 = df1.align(df2) + assert df1.index.tz == new1.index.tz + assert df2.index.tz == new2.index.tz + + # different timezones convert to UTC + + # frame with frame + df1_central = df1.tz_convert('US/Central') + new1, new2 = df1.align(df1_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + # frame with Series + new1, new2 = df1.align(df1_central[0], axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + df1[0].align(df1_central, axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_frame_no_datetime64_dtype(self, tz): + # after GH#7822 + # these retain the timezones on dict construction + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + dr_tz = dr.tz_localize(tz) + df = DataFrame({'A': 'foo', 'B': dr_tz}, index=dr) + tz_expected = DatetimeTZDtype('ns', dr_tz.tzinfo) + assert df['B'].dtype == tz_expected + + # GH#2810 (with timezones) + datetimes_naive = [ts.to_pydatetime() for ts in dr] + datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] + df = DataFrame({'dr': dr, + 'dr_tz': dr_tz, + 'datetimes_naive': datetimes_naive, + 'datetimes_with_tz': datetimes_with_tz}) + result = df.get_dtype_counts().sort_index() + expected = Series({'datetime64[ns]': 2, + str(tz_expected): 2}).sort_index() + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_frame_reset_index(self, tz): + dr = date_range('2012-06-02', periods=10, tz=tz) + df = DataFrame(np.random.randn(len(dr)), dr) + roundtripped = df.reset_index().set_index('index') + xp = df.index.tz + rs = roundtripped.index.tz + assert xp == rs diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index cc5f4d30f9aaf..e47be69b79feb 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -16,13 +16,11 @@ import pandas.tseries.offsets as offsets from pandas.compat import lrange, zip from pandas.core.indexes.datetimes import bdate_range, date_range -from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas._libs import tslib from pandas._libs.tslibs import timezones, conversion -from pandas import (Index, Series, DataFrame, isna, Timestamp, NaT, +from pandas import (Index, Series, isna, Timestamp, NaT, DatetimeIndex, to_datetime) -from pandas.util.testing import (assert_frame_equal, assert_series_equal, - set_timezone) +from pandas.util.testing import assert_series_equal, set_timezone class FixedOffset(tzinfo): @@ -786,29 +784,6 @@ def test_to_datetime_tzlocal(self): result = to_datetime(arr, utc=True) assert result.tz is pytz.utc - def test_frame_no_datetime64_dtype(self): - - # after 7822 - # these retain the timezones on dict construction - - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - dr_tz = dr.tz_localize(self.tzstr('US/Eastern')) - e = DataFrame({'A': 'foo', 'B': dr_tz}, index=dr) - tz_expected = DatetimeTZDtype('ns', dr_tz.tzinfo) - assert e['B'].dtype == tz_expected - - # GH 2810 (with timezones) - datetimes_naive = [ts.to_pydatetime() for ts in dr] - datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] - df = DataFrame({'dr': dr, - 'dr_tz': dr_tz, - 'datetimes_naive': datetimes_naive, - 'datetimes_with_tz': datetimes_with_tz}) - result = df.get_dtype_counts().sort_index() - expected = Series({'datetime64[ns]': 2, - str(tz_expected): 2}).sort_index() - assert_series_equal(result, expected) - def test_hongkong_tz_convert(self): # #1673 dr = date_range('2012-01-01', '2012-01-10', freq='D', tz='Hongkong') @@ -872,21 +847,6 @@ def test_convert_datetime_list(self): assert dr.tz == dr2.tz assert dr2.name == 'foo' - def test_frame_from_records_utc(self): - rec = {'datum': 1.5, - 'begin_time': datetime(2006, 4, 27, tzinfo=pytz.utc)} - - # it works - DataFrame.from_records([rec], index='begin_time') - - def test_frame_reset_index(self): - dr = date_range('2012-06-02', periods=10, tz=self.tzstr('US/Eastern')) - df = DataFrame(np.random.randn(len(dr)), dr) - roundtripped = df.reset_index().set_index('index') - xp = df.index.tz - rs = roundtripped.index.tz - assert xp == rs - def test_dateutil_tzoffset_support(self): values = [188.5, 328.25] tzinfo = tzoffset(None, 7200) @@ -1289,7 +1249,7 @@ def test_tz_localize_roundtrip(self): tm.assert_index_equal(reset, idx) assert reset.tzinfo is None - def test_series_frame_tz_localize(self): + def test_series_tz_localize(self): rng = date_range('1/1/2011', periods=100, freq='H') ts = Series(1, index=rng) @@ -1297,41 +1257,19 @@ def test_series_frame_tz_localize(self): result = ts.tz_localize('utc') assert result.index.tz.zone == 'UTC' - df = DataFrame({'a': 1}, index=rng) - result = df.tz_localize('utc') - expected = DataFrame({'a': 1}, rng.tz_localize('UTC')) - assert result.index.tz.zone == 'UTC' - assert_frame_equal(result, expected) - - df = df.T - result = df.tz_localize('utc', axis=1) - assert result.columns.tz.zone == 'UTC' - assert_frame_equal(result, expected.T) - # Can't localize if already tz-aware rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') ts = Series(1, index=rng) tm.assert_raises_regex(TypeError, 'Already tz-aware', ts.tz_localize, 'US/Eastern') - def test_series_frame_tz_convert(self): + def test_series_tz_convert(self): rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') ts = Series(1, index=rng) result = ts.tz_convert('Europe/Berlin') assert result.index.tz.zone == 'Europe/Berlin' - df = DataFrame({'a': 1}, index=rng) - result = df.tz_convert('Europe/Berlin') - expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin')) - assert result.index.tz.zone == 'Europe/Berlin' - assert_frame_equal(result, expected) - - df = df.T - result = df.tz_convert('Europe/Berlin', axis=1) - assert result.columns.tz.zone == 'Europe/Berlin' - assert_frame_equal(result, expected.T) - # can't convert tz-naive rng = date_range('1/1/2011', periods=200, freq='D') ts = Series(1, index=rng) @@ -1389,20 +1327,6 @@ def test_join_aware(self): pytest.raises(Exception, ts.__add__, ts_utc) pytest.raises(Exception, ts_utc.__add__, ts) - test1 = DataFrame(np.zeros((6, 3)), - index=date_range("2012-11-15 00:00:00", periods=6, - freq="100L", tz="US/Central")) - test2 = DataFrame(np.zeros((3, 3)), - index=date_range("2012-11-15 00:00:00", periods=3, - freq="250L", tz="US/Central"), - columns=lrange(3, 6)) - - result = test1.join(test2, how='outer') - ex_index = test1.index.union(test2.index) - - tm.assert_index_equal(result.index, ex_index) - assert result.index.tz.zone == 'US/Central' - # non-overlapping rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central") @@ -1413,34 +1337,13 @@ def test_join_aware(self): result = rng.union(rng2) assert result.tz.zone == 'UTC' - def test_align_aware(self): + def test_series_align_aware(self): idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') - idx2 = date_range('2001', periods=5, freq='2H', tz='US/Eastern') - df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) - df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) - new1, new2 = df1.align(df2) - assert df1.index.tz == new1.index.tz - assert df2.index.tz == new2.index.tz - + ser = Series(np.random.randn(len(idx1)), index=idx1) + ser_central = ser.tz_convert('US/Central') # # different timezones convert to UTC - # frame - df1_central = df1.tz_convert('US/Central') - new1, new2 = df1.align(df1_central) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC - - # series - new1, new2 = df1[0].align(df1_central[0]) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC - - # combination - new1, new2 = df1.align(df1_central[0], axis=0) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC - - df1[0].align(df1_central, axis=0) + new1, new2 = ser.align(ser_central) assert new1.index.tz == pytz.UTC assert new2.index.tz == pytz.UTC @@ -1523,7 +1426,7 @@ def test_append_aware_naive(self): assert ts_result.index.equals(ts1.index.astype(object).append( ts2.index)) - def test_equal_join_ensure_utc(self): + def test_series_add_tz_mismatch_converts_to_utc(self): rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') ts = Series(np.random.randn(len(rng)), index=rng) @@ -1535,14 +1438,6 @@ def test_equal_join_ensure_utc(self): result = ts_moscow + ts assert result.index.tz is pytz.utc - df = DataFrame({'a': ts}) - df_moscow = df.tz_convert('Europe/Moscow') - result = df + df_moscow - assert result.index.tz is pytz.utc - - result = df_moscow + df - assert result.index.tz is pytz.utc - def test_arith_utc_convert(self): rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') From c5c59fa294204359d2c4a710452ba21449229da4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Feb 2018 08:06:51 -0800 Subject: [PATCH 040/217] split Timestamp tests off of 19504 (#19511) --- .../tests/scalar/timestamp/test_timezones.py | 189 +++++++++++++++++ pandas/tests/tseries/test_timezones.py | 195 +----------------- 2 files changed, 190 insertions(+), 194 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index eeec70cc234f5..7a5c6feb8b651 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -2,11 +2,18 @@ """ Tests for Timestamp timezone-related methods """ +from datetime import date, timedelta +from distutils.version import LooseVersion import pytest +import pytz from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError +import dateutil +from dateutil.tz import gettz, tzoffset import pandas.util.testing as tm +import pandas.util._test_decorators as td + from pandas import Timestamp, NaT @@ -14,6 +21,22 @@ class TestTimestampTZOperations(object): # -------------------------------------------------------------- # Timestamp.tz_localize + def test_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + # GH#14402 + ts = Timestamp('2015-11-01 01:00:03') + expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') + expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') + + with pytest.raises(pytz.AmbiguousTimeError): + ts.tz_localize('US/Central') + + result = ts.tz_localize('US/Central', ambiguous=True) + assert result == expected0 + + result = ts.tz_localize('US/Central', ambiguous=False) + assert result == expected1 + def test_tz_localize_ambiguous(self): ts = Timestamp('2014-11-02 01:00') ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) @@ -70,6 +93,55 @@ def test_tz_localize_roundtrip(self, stamp, tz): assert reset == ts assert reset.tzinfo is None + def test_tz_localize_ambiguous_compat(self): + # validate that pytz and dateutil are compat for dst + # when the transition happens + naive = Timestamp('2013-10-27 01:00:00') + + pytz_zone = 'Europe/London' + dateutil_zone = 'dateutil/Europe/London' + result_pytz = naive.tz_localize(pytz_zone, ambiguous=0) + result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=0) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382835600000000000 + + if LooseVersion(dateutil.__version__) < LooseVersion('2.6.0'): + # dateutil 2.6 buggy w.r.t. ambiguous=0 + # see gh-14621 + # see https://github.com/dateutil/dateutil/issues/321 + assert (result_pytz.to_pydatetime().tzname() == + result_dateutil.to_pydatetime().tzname()) + assert str(result_pytz) == str(result_dateutil) + elif LooseVersion(dateutil.__version__) > LooseVersion('2.6.0'): + # fixed ambiguous behavior + assert result_pytz.to_pydatetime().tzname() == 'GMT' + assert result_dateutil.to_pydatetime().tzname() == 'BST' + assert str(result_pytz) != str(result_dateutil) + + # 1 hour difference + result_pytz = naive.tz_localize(pytz_zone, ambiguous=1) + result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=1) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382832000000000000 + + # dateutil < 2.6 is buggy w.r.t. ambiguous timezones + if LooseVersion(dateutil.__version__) > LooseVersion('2.5.3'): + # see gh-14621 + assert str(result_pytz) == str(result_dateutil) + assert (result_pytz.to_pydatetime().tzname() == + result_dateutil.to_pydatetime().tzname()) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern'), + 'US/Eastern', 'dateutil/US/Eastern']) + def test_timestamp_tz_localize(self, tz): + stamp = Timestamp('3/11/2012 04:00') + + result = stamp.tz_localize(tz) + expected = Timestamp('3/11/2012 04:00', tz=tz) + assert result.hour == expected.hour + assert result == expected + # ------------------------------------------------------------------ # Timestamp.tz_convert @@ -85,3 +157,120 @@ def test_tz_convert_roundtrip(self, stamp, tz): assert reset == Timestamp(stamp) assert reset.tzinfo is None assert reset == converted.tz_convert('UTC').tz_localize(None) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_astimezone(self, tzstr): + # astimezone is an alias for tz_convert, so keep it with + # the tz_convert tests + utcdate = Timestamp('3/11/2012 22:00', tz='UTC') + expected = utcdate.tz_convert(tzstr) + result = utcdate.astimezone(tzstr) + assert expected == result + assert isinstance(result, Timestamp) + + @td.skip_if_windows + def test_tz_convert_utc_with_system_utc(self): + from pandas._libs.tslibs.timezones import maybe_get_tz + + # from system utc to real utc + ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) + # check that the time hasn't changed. + assert ts == ts.tz_convert(dateutil.tz.tzutc()) + + # from system utc to real utc + ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) + # check that the time hasn't changed. + assert ts == ts.tz_convert(dateutil.tz.tzutc()) + + # ------------------------------------------------------------------ + # Timestamp.__init__ with tz str or tzinfo + + def test_timestamp_constructor_tz_utc(self): + utc_stamp = Timestamp('3/11/2012 05:00', tz='utc') + assert utc_stamp.tzinfo is pytz.utc + assert utc_stamp.hour == 5 + + utc_stamp = Timestamp('3/11/2012 05:00').tz_localize('utc') + assert utc_stamp.hour == 5 + + def test_timestamp_to_datetime_tzoffset(self): + tzinfo = tzoffset(None, 7200) + expected = Timestamp('3/11/2012 04:00', tz=tzinfo) + result = Timestamp(expected.to_pydatetime()) + assert expected == result + + def test_timestamp_constructor_near_dst_boundary(self): + # GH#11481 & GH#15777 + # Naive string timestamps were being localized incorrectly + # with tz_convert_single instead of tz_localize_to_utc + + for tz in ['Europe/Brussels', 'Europe/Prague']: + result = Timestamp('2015-10-25 01:00', tz=tz) + expected = Timestamp('2015-10-25 01:00').tz_localize(tz) + assert result == expected + + with pytest.raises(pytz.AmbiguousTimeError): + Timestamp('2015-10-25 02:00', tz=tz) + + result = Timestamp('2017-03-26 01:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 01:00').tz_localize('Europe/Paris') + assert result == expected + + with pytest.raises(pytz.NonExistentTimeError): + Timestamp('2017-03-26 02:00', tz='Europe/Paris') + + # GH#11708 + naive = Timestamp('2015-11-18 10:00:00') + result = naive.tz_localize('UTC').tz_convert('Asia/Kolkata') + expected = Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata') + assert result == expected + + # GH#15823 + result = Timestamp('2017-03-26 00:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 00:00:00+0100', tz='Europe/Paris') + assert result == expected + + result = Timestamp('2017-03-26 01:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 01:00:00+0100', tz='Europe/Paris') + assert result == expected + + with pytest.raises(pytz.NonExistentTimeError): + Timestamp('2017-03-26 02:00', tz='Europe/Paris') + + result = Timestamp('2017-03-26 02:00:00+0100', tz='Europe/Paris') + naive = Timestamp(result.value) + expected = naive.tz_localize('UTC').tz_convert('Europe/Paris') + assert result == expected + + result = Timestamp('2017-03-26 03:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 03:00:00+0200', tz='Europe/Paris') + assert result == expected + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern'), + 'US/Eastern', 'dateutil/US/Eastern']) + def test_timestamp_constructed_by_date_and_tz(self, tz): + # GH#2993, Timestamp cannot be constructed by datetime.date + # and tz correctly + + result = Timestamp(date(2012, 3, 11), tz=tz) + + expected = Timestamp('3/11/2012', tz=tz) + assert result.hour == expected.hour + assert result == expected + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern'), + 'US/Eastern', 'dateutil/US/Eastern']) + def test_timestamp_add_timedelta_push_over_dst_boundary(self, tz): + # GH#1389 + + # 4 hours before DST transition + stamp = Timestamp('3/10/2012 22:00', tz=tz) + + result = stamp + timedelta(hours=6) + + # spring forward, + "7" hours + expected = Timestamp('3/11/2012 05:00', tz=tz) + + assert result == expected diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index e47be69b79feb..2630984a70807 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -9,7 +9,7 @@ from pytz import NonExistentTimeError from distutils.version import LooseVersion from dateutil.tz import tzlocal, tzoffset -from datetime import datetime, timedelta, tzinfo, date +from datetime import datetime, timedelta, tzinfo import pandas.util.testing as tm import pandas.util._test_decorators as td @@ -119,120 +119,6 @@ def test_localize_utc_conversion_explicit(self): pytest.raises(NonExistentTimeError, rng.tz_localize, self.tz('US/Eastern')) - def test_timestamp_tz_localize(self): - stamp = Timestamp('3/11/2012 04:00') - - result = stamp.tz_localize(self.tzstr('US/Eastern')) - expected = Timestamp('3/11/2012 04:00', tz=self.tzstr('US/Eastern')) - assert result.hour == expected.hour - assert result == expected - - def test_timestamp_tz_localize_explicit(self): - stamp = Timestamp('3/11/2012 04:00') - - result = stamp.tz_localize(self.tz('US/Eastern')) - expected = Timestamp('3/11/2012 04:00', tz=self.tz('US/Eastern')) - assert result.hour == expected.hour - assert result == expected - - def test_timestamp_constructed_by_date_and_tz(self): - # Fix Issue 2993, Timestamp cannot be constructed by datetime.date - # and tz correctly - - result = Timestamp(date(2012, 3, 11), tz=self.tzstr('US/Eastern')) - - expected = Timestamp('3/11/2012', tz=self.tzstr('US/Eastern')) - assert result.hour == expected.hour - assert result == expected - - def test_timestamp_constructed_by_date_and_tz_explicit(self): - # Fix Issue 2993, Timestamp cannot be constructed by datetime.date - # and tz correctly - - result = Timestamp(date(2012, 3, 11), tz=self.tz('US/Eastern')) - - expected = Timestamp('3/11/2012', tz=self.tz('US/Eastern')) - assert result.hour == expected.hour - assert result == expected - - def test_timestamp_constructor_near_dst_boundary(self): - # GH 11481 & 15777 - # Naive string timestamps were being localized incorrectly - # with tz_convert_single instead of tz_localize_to_utc - - for tz in ['Europe/Brussels', 'Europe/Prague']: - result = Timestamp('2015-10-25 01:00', tz=tz) - expected = Timestamp('2015-10-25 01:00').tz_localize(tz) - assert result == expected - - with pytest.raises(pytz.AmbiguousTimeError): - Timestamp('2015-10-25 02:00', tz=tz) - - result = Timestamp('2017-03-26 01:00', tz='Europe/Paris') - expected = Timestamp('2017-03-26 01:00').tz_localize('Europe/Paris') - assert result == expected - - with pytest.raises(pytz.NonExistentTimeError): - Timestamp('2017-03-26 02:00', tz='Europe/Paris') - - # GH 11708 - result = to_datetime("2015-11-18 15:30:00+05:30").tz_localize( - 'UTC').tz_convert('Asia/Kolkata') - expected = Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata') - assert result == expected - - # GH 15823 - result = Timestamp('2017-03-26 00:00', tz='Europe/Paris') - expected = Timestamp('2017-03-26 00:00:00+0100', tz='Europe/Paris') - assert result == expected - - result = Timestamp('2017-03-26 01:00', tz='Europe/Paris') - expected = Timestamp('2017-03-26 01:00:00+0100', tz='Europe/Paris') - assert result == expected - - with pytest.raises(pytz.NonExistentTimeError): - Timestamp('2017-03-26 02:00', tz='Europe/Paris') - result = Timestamp('2017-03-26 02:00:00+0100', tz='Europe/Paris') - expected = Timestamp(result.value).tz_localize( - 'UTC').tz_convert('Europe/Paris') - assert result == expected - - result = Timestamp('2017-03-26 03:00', tz='Europe/Paris') - expected = Timestamp('2017-03-26 03:00:00+0200', tz='Europe/Paris') - assert result == expected - - def test_timestamp_to_datetime_tzoffset(self): - tzinfo = tzoffset(None, 7200) - expected = Timestamp('3/11/2012 04:00', tz=tzinfo) - result = Timestamp(expected.to_pydatetime()) - assert expected == result - - def test_timedelta_push_over_dst_boundary(self): - # #1389 - - # 4 hours before DST transition - stamp = Timestamp('3/10/2012 22:00', tz=self.tzstr('US/Eastern')) - - result = stamp + timedelta(hours=6) - - # spring forward, + "7" hours - expected = Timestamp('3/11/2012 05:00', tz=self.tzstr('US/Eastern')) - - assert result == expected - - def test_timedelta_push_over_dst_boundary_explicit(self): - # #1389 - - # 4 hours before DST transition - stamp = Timestamp('3/10/2012 22:00', tz=self.tz('US/Eastern')) - - result = stamp + timedelta(hours=6) - - # spring forward, + "7" hours - expected = Timestamp('3/11/2012 05:00', tz=self.tz('US/Eastern')) - - assert result == expected - def test_tz_localize_dti(self): dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256', freq='L') @@ -267,13 +153,6 @@ def test_tz_localize_empty_series(self): ts2 = ts.tz_localize(self.tzstr('US/Eastern')) assert self.cmptz(ts2.index.tz, self.tz('US/Eastern')) - def test_astimezone(self): - utc = Timestamp('3/11/2012 22:00', tz='UTC') - expected = utc.tz_convert(self.tzstr('US/Eastern')) - result = utc.astimezone(self.tzstr('US/Eastern')) - assert expected == result - assert isinstance(result, Timestamp) - def test_create_with_tz(self): stamp = Timestamp('3/11/2012 05:00', tz=self.tzstr('US/Eastern')) assert stamp.hour == 5 @@ -283,13 +162,6 @@ def test_create_with_tz(self): assert stamp == rng[1] - utc_stamp = Timestamp('3/11/2012 05:00', tz='utc') - assert utc_stamp.tzinfo is pytz.utc - assert utc_stamp.hour == 5 - - utc_stamp = Timestamp('3/11/2012 05:00').tz_localize('utc') - assert utc_stamp.hour == 5 - def test_create_with_fixed_tz(self): off = FixedOffset(420, '+07:00') start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) @@ -591,16 +463,6 @@ def test_ambiguous_bool(self): expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') - def f(): - t.tz_localize('US/Central') - pytest.raises(pytz.AmbiguousTimeError, f) - - result = t.tz_localize('US/Central', ambiguous=True) - assert result == expected0 - - result = t.tz_localize('US/Central', ambiguous=False) - assert result == expected1 - s = Series([t]) expected0 = Series([expected0]) expected1 = Series([expected1]) @@ -948,20 +810,6 @@ def normalize(self, ts): # no-op for dateutil return ts - @td.skip_if_windows - def test_utc_with_system_utc(self): - from pandas._libs.tslibs.timezones import maybe_get_tz - - # from system utc to real utc - ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) - # check that the time hasn't changed. - assert ts == ts.tz_convert(dateutil.tz.tzutc()) - - # from system utc to real utc - ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) - # check that the time hasn't changed. - assert ts == ts.tz_convert(dateutil.tz.tzutc()) - def test_tz_convert_hour_overflow_dst(self): # Regression test for: # https://github.com/pandas-dev/pandas/issues/13306 @@ -1175,47 +1023,6 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self, tz_name): class TestTimeZones(object): timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] - def test_ambiguous_compat(self): - # validate that pytz and dateutil are compat for dst - # when the transition happens - - pytz_zone = 'Europe/London' - dateutil_zone = 'dateutil/Europe/London' - result_pytz = (Timestamp('2013-10-27 01:00:00') - .tz_localize(pytz_zone, ambiguous=0)) - result_dateutil = (Timestamp('2013-10-27 01:00:00') - .tz_localize(dateutil_zone, ambiguous=0)) - assert result_pytz.value == result_dateutil.value - assert result_pytz.value == 1382835600000000000 - - if LooseVersion(dateutil.__version__) < LooseVersion('2.6.0'): - # dateutil 2.6 buggy w.r.t. ambiguous=0 - # see gh-14621 - # see https://github.com/dateutil/dateutil/issues/321 - assert (result_pytz.to_pydatetime().tzname() == - result_dateutil.to_pydatetime().tzname()) - assert str(result_pytz) == str(result_dateutil) - elif LooseVersion(dateutil.__version__) > LooseVersion('2.6.0'): - # fixed ambiguous behavior - assert result_pytz.to_pydatetime().tzname() == 'GMT' - assert result_dateutil.to_pydatetime().tzname() == 'BST' - assert str(result_pytz) != str(result_dateutil) - - # 1 hour difference - result_pytz = (Timestamp('2013-10-27 01:00:00') - .tz_localize(pytz_zone, ambiguous=1)) - result_dateutil = (Timestamp('2013-10-27 01:00:00') - .tz_localize(dateutil_zone, ambiguous=1)) - assert result_pytz.value == result_dateutil.value - assert result_pytz.value == 1382832000000000000 - - # dateutil < 2.6 is buggy w.r.t. ambiguous timezones - if LooseVersion(dateutil.__version__) > LooseVersion('2.5.3'): - # see gh-14621 - assert str(result_pytz) == str(result_dateutil) - assert (result_pytz.to_pydatetime().tzname() == - result_dateutil.to_pydatetime().tzname()) - def test_index_equals_with_tz(self): left = date_range('1/1/2011', periods=100, freq='H', tz='utc') right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern') From 98c5fea7b4de322b8e704c52728714467a58e554 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 4 Feb 2018 08:18:10 -0800 Subject: [PATCH 041/217] ops cleanup, named functions instead of lambdas (#19515) --- pandas/core/ops.py | 92 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 28 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6ea4a81cb52a1..6db84aedce7e7 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -42,6 +42,60 @@ ABCSparseSeries, ABCSparseArray) +# ----------------------------------------------------------------------------- +# Reversed Operations not available in the stdlib operator module. +# Defining these instead of using lambdas allows us to reference them by name. + +def radd(left, right): + return right + left + + +def rsub(left, right): + return right - left + + +def rmul(left, right): + return right * left + + +def rdiv(left, right): + return right / left + + +def rtruediv(left, right): + return right / left + + +def rfloordiv(left, right): + return right // left + + +def rmod(left, right): + return right % left + + +def rdivmod(left, right): + return divmod(right, left) + + +def rpow(left, right): + return right ** left + + +def rand_(left, right): + return operator.and_(right, left) + + +def ror_(left, right): + return operator.or_(right, left) + + +def rxor(left, right): + return operator.xor(right, left) + + +# ----------------------------------------------------------------------------- + def _gen_eval_kwargs(name): """ Find the keyword arguments to pass to numexpr for the given operation. @@ -140,64 +194,51 @@ def _get_frame_op_default_axis(name): _op_descriptions = { 'add': {'op': '+', 'desc': 'Addition', - 'reversed': False, 'reverse': 'radd'}, 'sub': {'op': '-', 'desc': 'Subtraction', - 'reversed': False, 'reverse': 'rsub'}, 'mul': {'op': '*', 'desc': 'Multiplication', - 'reversed': False, 'reverse': 'rmul'}, 'mod': {'op': '%', 'desc': 'Modulo', - 'reversed': False, 'reverse': 'rmod'}, 'pow': {'op': '**', 'desc': 'Exponential power', - 'reversed': False, 'reverse': 'rpow'}, 'truediv': {'op': '/', 'desc': 'Floating division', - 'reversed': False, 'reverse': 'rtruediv'}, 'floordiv': {'op': '//', 'desc': 'Integer division', - 'reversed': False, 'reverse': 'rfloordiv'}, 'divmod': {'op': 'divmod', 'desc': 'Integer division and modulo', - 'reversed': False, 'reverse': None}, 'eq': {'op': '==', 'desc': 'Equal to', - 'reversed': False, 'reverse': None}, 'ne': {'op': '!=', 'desc': 'Not equal to', - 'reversed': False, 'reverse': None}, 'lt': {'op': '<', 'desc': 'Less than', - 'reversed': False, 'reverse': None}, 'le': {'op': '<=', 'desc': 'Less than or equal to', - 'reversed': False, 'reverse': None}, 'gt': {'op': '>', 'desc': 'Greater than', - 'reversed': False, 'reverse': None}, 'ge': {'op': '>=', 'desc': 'Greater than or equal to', - 'reversed': False, 'reverse': None}} _op_names = list(_op_descriptions.keys()) for key in _op_names: + _op_descriptions[key]['reversed'] = False reverse_op = _op_descriptions[key]['reverse'] if reverse_op is not None: _op_descriptions[reverse_op] = _op_descriptions[key].copy() @@ -392,7 +433,7 @@ def names(x): # yapf: disable new_methods = dict( add=arith_method(operator.add, names('add'), op('+')), - radd=arith_method(lambda x, y: y + x, names('radd'), op('+')), + radd=arith_method(radd, names('radd'), op('+')), sub=arith_method(operator.sub, names('sub'), op('-')), mul=arith_method(operator.mul, names('mul'), op('*')), truediv=arith_method(operator.truediv, names('truediv'), op('/')), @@ -404,13 +445,11 @@ def names(x): # not entirely sure why this is necessary, but previously was included # so it's here to maintain compatibility rmul=arith_method(operator.mul, names('rmul'), op('*')), - rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-')), - rtruediv=arith_method(lambda x, y: operator.truediv(y, x), - names('rtruediv'), op('/')), - rfloordiv=arith_method(lambda x, y: operator.floordiv(y, x), - names('rfloordiv'), op('//')), - rpow=arith_method(lambda x, y: y**x, names('rpow'), op('**')), - rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'))) + rsub=arith_method(rsub, names('rsub'), op('-')), + rtruediv=arith_method(rtruediv, names('rtruediv'), op('/')), + rfloordiv=arith_method(rfloordiv, names('rfloordiv'), op('//')), + rpow=arith_method(rpow, names('rpow'), op('**')), + rmod=arith_method(rmod, names('rmod'), op('%'))) # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] @@ -430,12 +469,9 @@ def names(x): or_=bool_method(operator.or_, names('or_'), op('|')), # For some reason ``^`` wasn't used in original. xor=bool_method(operator.xor, names('xor'), op('^')), - rand_=bool_method(lambda x, y: operator.and_(y, x), - names('rand_'), op('&')), - ror_=bool_method(lambda x, y: operator.or_(y, x), - names('ror_'), op('|')), - rxor=bool_method(lambda x, y: operator.xor(y, x), - names('rxor'), op('^')))) + rand_=bool_method(rand_, names('rand_'), op('&')), + ror_=bool_method(ror_, names('ror_'), op('|')), + rxor=bool_method(rxor, names('rxor'), op('^')))) if have_divmod: # divmod doesn't have an op that is supported by numexpr new_methods['divmod'] = arith_method(divmod, names('divmod'), None) From ac941cc9edd6f23d9431ff15b38d647d70507b20 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 4 Feb 2018 16:32:52 +0000 Subject: [PATCH 042/217] DOC: Improve replace docstring (#18100) --- pandas/core/frame.py | 8 ++ pandas/core/generic.py | 212 +++++++++++++++++++++++++++++++++-------- pandas/core/series.py | 8 ++ 3 files changed, 187 insertions(+), 41 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 96d28581cfdd9..201d8ba427c8a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3080,6 +3080,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, limit=limit, downcast=downcast, **kwargs) + @Appender(_shared_docs['replace'] % _shared_doc_kwargs) + def replace(self, to_replace=None, value=None, inplace=False, limit=None, + regex=False, method='pad', axis=None): + return super(DataFrame, self).replace(to_replace=to_replace, + value=value, inplace=inplace, + limit=limit, regex=regex, + method=method, axis=axis) + @Appender(_shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): return super(DataFrame, self).shift(periods=periods, freq=freq, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d34a85b5b4388..0f038cd687dfd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -69,6 +69,10 @@ def _single_replace(self, to_replace, method, inplace, limit): + """ + Replaces values in a Series using the fill method specified when no + replacement value is given in the replace method + """ if self.ndim != 1: raise TypeError('cannot replace {0} with method {1} on a {2}' .format(to_replace, method, type(self).__name__)) @@ -4787,94 +4791,111 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None): return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) - def replace(self, to_replace=None, value=None, inplace=False, limit=None, - regex=False, method='pad', axis=None): - """ + _shared_docs['replace'] = (""" Replace values given in 'to_replace' with 'value'. Parameters ---------- to_replace : str, regex, list, dict, Series, numeric, or None - * str or regex: + * numeric, str or regex: - - str: string exactly matching `to_replace` will be replaced - with `value` - - regex: regexs matching `to_replace` will be replaced with - `value` + - numeric: numeric values equal to ``to_replace`` will be + replaced with ``value`` + - str: string exactly matching ``to_replace`` will be replaced + with ``value`` + - regex: regexs matching ``to_replace`` will be replaced with + ``value`` * list of str, regex, or numeric: - - First, if `to_replace` and `value` are both lists, they + - First, if ``to_replace`` and ``value`` are both lists, they **must** be the same length. - Second, if ``regex=True`` then all of the strings in **both** lists will be interpreted as regexs otherwise they will match - directly. This doesn't matter much for `value` since there + directly. This doesn't matter much for ``value`` since there are only a few possible substitution regexes you can use. - - str and regex rules apply as above. + - str, regex and numeric rules apply as above. * dict: - - Nested dictionaries, e.g., {'a': {'b': nan}}, are read as - follows: look in column 'a' for the value 'b' and replace it - with nan. You can nest regular expressions as well. Note that + - Dicts can be used to specify different replacement values + for different existing values. For example, + {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the ``value`` + parameter should be ``None``. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + {'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and + the value 'z' in column 'b' and replaces these values with + whatever is specified in ``value``. The ``value`` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + {'a': {'b': np.nan}}, are read as follows: look in column 'a' + for the value 'b' and replace it with NaN. The ``value`` + parameter should be ``None`` to use a nested dict in this + way. You can nest regular expressions as well. Note that column names (the top-level dictionary keys in a nested dictionary) **cannot** be regular expressions. - - Keys map to column names and values map to substitution - values. You can treat this as a special case of passing two - lists except that you are specifying the column to search in. * None: - This means that the ``regex`` argument must be a string, compiled regular expression, or list, dict, ndarray or Series - of such elements. If `value` is also ``None`` then this + of such elements. If ``value`` is also ``None`` then this **must** be a nested dictionary or ``Series``. See the examples section for examples of each of these. value : scalar, dict, list, str, regex, default None - Value to use to fill holes (e.g. 0), alternately a dict of values - specifying which value to use for each column (columns not in the - dict will not be filled). Regular expressions, strings and lists or - dicts of such objects are also allowed. + Value to replace any values matching ``to_replace`` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. inplace : boolean, default False If True, in place. Note: this will modify any other views on this object (e.g. a column from a DataFrame). Returns the caller if this is True. limit : int, default None Maximum size gap to forward or backward fill - regex : bool or same types as `to_replace`, default False - Whether to interpret `to_replace` and/or `value` as regular - expressions. If this is ``True`` then `to_replace` *must* be a - string. Otherwise, `to_replace` must be ``None`` because this - parameter will be interpreted as a regular expression or a list, - dict, or array of regular expressions. + regex : bool or same types as ``to_replace``, default False + Whether to interpret ``to_replace`` and/or ``value`` as regular + expressions. If this is ``True`` then ``to_replace`` *must* be a + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + ``to_replace`` must be ``None``. method : string, optional, {'pad', 'ffill', 'bfill'} The method to use when for replacement, when ``to_replace`` is a ``list``. See Also -------- - NDFrame.reindex - NDFrame.asfreq - NDFrame.fillna + %(klass)s.fillna : Fill NA/NaN values + %(klass)s.where : Replace values based on boolean condition Returns ------- - filled : NDFrame + filled : %(klass)s Raises ------ AssertionError - * If `regex` is not a ``bool`` and `to_replace` is not ``None``. + * If ``regex`` is not a ``bool`` and ``to_replace`` is not + ``None``. TypeError - * If `to_replace` is a ``dict`` and `value` is not a ``list``, + * If ``to_replace`` is a ``dict`` and ``value`` is not a ``list``, ``dict``, ``ndarray``, or ``Series`` - * If `to_replace` is ``None`` and `regex` is not compilable into a - regular expression or is a list, dict, ndarray, or Series. + * If ``to_replace`` is ``None`` and ``regex`` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to ``to_replace`` does not match the type of the + value being replaced ValueError - * If `to_replace` and `value` are ``list`` s or ``ndarray`` s, but - they are not the same length. + * If a ``list`` or an ``ndarray`` is passed to ``to_replace`` and + `value` but they are not the same length. Notes ----- @@ -4883,12 +4904,121 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, * Regular expressions will only substitute on strings, meaning you cannot provide, for example, a regular expression matching floating point numbers and expect the columns in your frame that have a - numeric dtype to be matched. However, if those floating point numbers - *are* strings, then you can do this. + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. * This method has *a lot* of options. You are encouraged to experiment and play with this method to gain intuition about how it works. - """ + Examples + -------- + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.replace(0, 5) + 0 5 + 1 1 + 2 2 + 3 3 + 4 4 + dtype: int64 + >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + >>> s.replace([1, 2], method='bfill') + 0 0 + 1 3 + 2 3 + 3 3 + 4 4 + dtype: int64 + + >>> df.replace({0: 10, 1: 100}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + >>> df.replace({'A': 0, 'B': 5}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + >>> df.replace({'A': {0: 100, 4: 400}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Note that when replacing multiple ``bool`` or ``datetime64`` objects, + the data types in the ``to_replace`` parameter must match the data + type of the value being replaced: + + >>> df = pd.DataFrame({'A': [True, False, True], + ... 'B': [False, True, False]}) + >>> df.replace({'a string': 'new value', True: False}) # raises + TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' + + This raises a ``TypeError`` because one of the ``dict`` keys is not of + the correct type for replacement. + """) + + @Appender(_shared_docs['replace'] % _shared_doc_kwargs) + def replace(self, to_replace=None, value=None, inplace=False, limit=None, + regex=False, method='pad', axis=None): inplace = validate_bool_kwarg(inplace, 'inplace') if not is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is " diff --git a/pandas/core/series.py b/pandas/core/series.py index 78b4c3a70a519..e4b8979d6393a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2671,6 +2671,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, limit=limit, downcast=downcast, **kwargs) + @Appender(generic._shared_docs['replace'] % _shared_doc_kwargs) + def replace(self, to_replace=None, value=None, inplace=False, limit=None, + regex=False, method='pad', axis=None): + return super(Series, self).replace(to_replace=to_replace, value=value, + inplace=inplace, limit=limit, + regex=regex, method=method, + axis=axis) + @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): return super(Series, self).shift(periods=periods, freq=freq, axis=axis) From 6f302c67ed881de3c31336c801293e02b3af51fd Mon Sep 17 00:00:00 2001 From: topper-123 Date: Sun, 4 Feb 2018 21:44:39 +0000 Subject: [PATCH 043/217] DOC: minor groupby and resampler improvements (#19514) --- doc/source/groupby.rst | 7 ++++--- pandas/core/generic.py | 27 ++++++++++++++++++++++++--- pandas/core/groupby.py | 2 +- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 413138b1e52fc..407fad39ba232 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -1219,8 +1219,8 @@ see :ref:`here `. Combining ``.groupby`` and ``.pipe`` is often useful when you need to reuse GroupBy objects. -For an example, imagine having a DataFrame with columns for stores, products, -revenue and sold quantity. We'd like to do a groupwise calculation of *prices* +As an example, imagine having a DataFrame with columns for stores, products, +revenue and quantity sold. We'd like to do a groupwise calculation of *prices* (i.e. revenue/quantity) per store and per product. We could do this in a multi-step operation, but expressing it in terms of piping can make the code more readable. First we set the data: @@ -1230,7 +1230,8 @@ code more readable. First we set the data: import numpy as np n = 1000 df = pd.DataFrame({'Store': np.random.choice(['Store_1', 'Store_2'], n), - 'Product': np.random.choice(['Product_1', 'Product_2', 'Product_3'], n), + 'Product': np.random.choice(['Product_1', + 'Product_2'], n), 'Revenue': (np.random.random(n)*50+10).round(2), 'Quantity': np.random.randint(1, 10, size=n)}) df.head(2) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0f038cd687dfd..cb4bbb7b27c42 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5691,6 +5691,10 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, reduce the dimensionality of the return type if possible, otherwise return a consistent type + Returns + ------- + GroupBy object + Examples -------- DataFrame results @@ -5702,10 +5706,15 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, >>> data.groupby(['col1', 'col2']).mean() - Returns - ------- - GroupBy object + Notes + ----- + See the `user guide + `_ for more. + See also + -------- + resample : Convenience method for frequency conversion and resampling + of time series. """ from pandas.core.groupby import groupby @@ -5904,8 +5913,16 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, .. versionadded:: 0.19.0 + Returns + ------- + Resampler object + Notes ----- + See the `user guide + `_ + for more. + To learn more about the offset strings, please see `this link `__. @@ -6071,6 +6088,10 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, a b c d 2000-01-01 00:00:00 0 6 12 18 2000-01-01 00:03:00 0 4 8 12 + + See also + -------- + groupby : Group by mapping, function, label, or list of labels. """ from pandas.core.resample import (resample, _maybe_process_deprecations) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2c1deb9db7bba..88af80e295d74 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -230,7 +230,7 @@ Notes ----- See more `here -`_ +`_ Examples -------- From d45afd992ddd06e2c00d50021cda618ce68f8b0a Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 5 Feb 2018 08:39:43 +0000 Subject: [PATCH 044/217] DEPR: Changing default of str.extract(expand=False) to str.extract(expand=True) (#19118) --- doc/source/text.rst | 3 ++- doc/source/whatsnew/v0.23.0.txt | 47 +++++++++++++++++++++++++++++++++ pandas/core/strings.py | 15 +++-------- pandas/tests/test_strings.py | 9 ++++--- 4 files changed, 58 insertions(+), 16 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 2b6459b581c1e..1e620acb1f88a 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -218,7 +218,8 @@ Extract first match in each subject (extract) ``DataFrame``, depending on the subject and regular expression pattern (same behavior as pre-0.18.0). When ``expand=True`` it always returns a ``DataFrame``, which is more consistent and less - confusing from the perspective of a user. + confusing from the perspective of a user. ``expand=True`` is the + default since version 0.23.0. The ``extract`` method accepts a `regular expression `__ with at least one diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 69965f44d87a8..0ac27a2f23386 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -296,6 +296,53 @@ Build Changes - Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) - Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) +Extraction of matching patterns from strings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, extracting matching patterns from strings with :func:`str.extract` used to return a +``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was +extracted``). As of Pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless +``expand`` is set to ``False`` (:issue:`11386`). + +Also, ``None`` was an accepted value for the ``expand`` parameter (which was equivalent to +``False``), but now raises a ``ValueError``. + +Previous Behavior: + +.. code-block:: ipython + + In [1]: s = pd.Series(['number 10', '12 eggs']) + + In [2]: extracted = s.str.extract('.*(\d\d).*') + + In [3]: extracted + Out [3]: + 0 10 + 1 12 + dtype: object + + In [4]: type(extracted) + Out [4]: + pandas.core.series.Series + +New Behavior: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract('.*(\d\d).*') + extracted + type(extracted) + +To restore previous behavior, simply set ``expand`` to ``False``: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract('.*(\d\d).*', expand=False) + extracted + type(extracted) + .. _whatsnew_0230.api: Other API Changes diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 12c7feb5f2b15..b1c1ede66236c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -598,7 +598,7 @@ def _str_extract_frame(arr, pat, flags=0): dtype=object) -def str_extract(arr, pat, flags=0, expand=None): +def str_extract(arr, pat, flags=0, expand=True): r""" For each subject string in the Series, extract groups from the first match of regular expression pat. @@ -610,7 +610,7 @@ def str_extract(arr, pat, flags=0, expand=None): flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE - expand : bool, default False + expand : bool, default True * If True, return DataFrame. * If False, return Series/Index/DataFrame. @@ -676,15 +676,6 @@ def str_extract(arr, pat, flags=0, expand=None): dtype: object """ - if expand is None: - warnings.warn( - "currently extract(expand=None) " + - "means expand=False (return Index/Series/DataFrame) " + - "but in a future version of pandas this will be changed " + - "to expand=True (return DataFrame)", - FutureWarning, - stacklevel=3) - expand = False if not isinstance(expand, bool): raise ValueError("expand must be True or False") if expand: @@ -1739,7 +1730,7 @@ def translate(self, table, deletechars=None): findall = _pat_wrapper(str_findall, flags=True) @copy(str_extract) - def extract(self, pat, flags=0, expand=None): + def extract(self, pat, flags=0, expand=True): return str_extract(self, pat, flags=flags, expand=expand) @copy(str_extractall) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 973fe74429551..178c5ff655b04 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -612,13 +612,16 @@ def test_match(self): def test_extract_expand_None(self): values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_raises_regex(ValueError, + 'expand must be True or False'): values.str.extract('.*(BAD[_]+).*(BAD)', expand=None) def test_extract_expand_unspecified(self): values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_produces_warning(FutureWarning): - values.str.extract('.*(BAD[_]+).*(BAD)') + result_unspecified = values.str.extract('.*(BAD[_]+).*') + assert isinstance(result_unspecified, DataFrame) + result_true = values.str.extract('.*(BAD[_]+).*', expand=True) + tm.assert_frame_equal(result_unspecified, result_true) def test_extract_expand_False(self): # Contains tests like those in test_match and some others. From 68d6c0be7a490e16db3b698a3db558bc8fc72d9f Mon Sep 17 00:00:00 2001 From: jschendel Date: Mon, 5 Feb 2018 04:05:20 -0700 Subject: [PATCH 045/217] TST: Remove legacy instances of _multiprocess_can_split_ (#19536) --- pandas/tests/frame/test_apply.py | 2 -- pandas/tests/indexes/period/test_period.py | 1 - pandas/tests/indexes/timedeltas/test_astype.py | 1 - pandas/tests/indexes/timedeltas/test_construction.py | 1 - pandas/tests/indexes/timedeltas/test_indexing.py | 1 - pandas/tests/indexes/timedeltas/test_ops.py | 1 - pandas/tests/indexes/timedeltas/test_setops.py | 1 - pandas/tests/indexes/timedeltas/test_timedelta.py | 2 -- pandas/tests/indexes/timedeltas/test_timedelta_range.py | 1 - pandas/tests/indexes/timedeltas/test_tools.py | 1 - pandas/tests/scalar/test_timedelta.py | 2 -- pandas/tests/series/test_apply.py | 2 -- 12 files changed, 16 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index e0fc6c470fe57..d69ddcd8f14d4 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -496,8 +496,6 @@ def zip_frames(*frames): class TestDataFrameAggregate(TestData): - _multiprocess_can_split_ = True - def test_agg_transform(self): with np.errstate(all='ignore'): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index ab341b70dfe91..6fc7fa5486f82 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -14,7 +14,6 @@ class TestPeriodIndex(DatetimeLike): _holder = PeriodIndex - _multiprocess_can_split_ = True def setup_method(self, method): self.indices = dict(index=tm.makePeriodIndex(10), diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index c3bd857036efc..6c644d239069a 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -8,7 +8,6 @@ class TestTimedeltaIndex(object): - _multiprocess_can_split_ = True def test_astype(self): # GH 13149, GH 13209 diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 70aadd9f57174..68dc0003e2312 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -9,7 +9,6 @@ class TestTimedeltaIndex(object): - _multiprocess_can_split_ = True def test_construction_base_constructor(self): arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index e64c4e6ac54a5..59e38c2e738b0 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -9,7 +9,6 @@ class TestTimedeltaIndex(object): - _multiprocess_can_split_ = True def test_insert(self): diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index e944aad13f8d5..86d7dd4e1b117 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -420,7 +420,6 @@ def test_equals(self): class TestTimedeltas(object): - _multiprocess_can_split_ = True def test_timedelta_ops(self): # GH4984 diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 22546d25273a7..020e9079b3436 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -6,7 +6,6 @@ class TestTimedeltaIndex(object): - _multiprocess_can_split_ = True def test_union(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 32157a9a44e04..ce0f3b89b753e 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -18,7 +18,6 @@ class TestTimedeltaIndex(DatetimeLike): _holder = TimedeltaIndex - _multiprocess_can_split_ = True def setup_method(self, method): self.indices = dict(index=tm.makeTimedeltaIndex(10)) @@ -300,7 +299,6 @@ def test_freq_conversion(self): class TestTimeSeries(object): - _multiprocess_can_split_ = True def test_series_box_timedelta(self): rng = timedelta_range('1 day 1 s', periods=5, freq='h') diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 7624e1f79af15..784ef845fea10 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -7,7 +7,6 @@ class TestTimedeltas(object): - _multiprocess_can_split_ = True def test_timedelta_range(self): diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index b4ad28eeacb69..daa9739132d9e 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -11,7 +11,6 @@ class TestTimedeltas(object): - _multiprocess_can_split_ = True def test_to_timedelta(self): def conv(v): diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 64d4940082978..667266be2a89b 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -13,7 +13,6 @@ class TestTimedeltaArithmetic(object): - _multiprocess_can_split_ = True def test_arithmetic_overflow(self): with pytest.raises(OverflowError): @@ -286,7 +285,6 @@ def test_compare_timedelta_ndarray(self): class TestTimedeltas(object): - _multiprocess_can_split_ = True def setup_method(self, method): pass diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 3822ecd0a1b0e..0780c846a6c19 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -164,8 +164,6 @@ def test_apply_dict_depr(self): class TestSeriesAggregate(TestData): - _multiprocess_can_split_ = True - def test_transform(self): # transforming functions From 9c25d3cd234d4e95147626f0f0474872f69a67b6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 5 Feb 2018 03:06:42 -0800 Subject: [PATCH 046/217] remove unused calendar options from period_helper (#19534) --- pandas/_libs/src/period_helper.c | 119 +++++++++++-------------------- pandas/_libs/src/period_helper.h | 4 -- pandas/_libs/tslibs/period.pyx | 1 - 3 files changed, 43 insertions(+), 81 deletions(-) diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index f1367978bd6c9..8f1c527a68455 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -47,13 +47,10 @@ static int days_in_month[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; -/* Return 1/0 iff year points to a leap year in calendar. */ -static int dInfoCalc_Leapyear(npy_int64 year, int calendar) { - if (calendar == GREGORIAN_CALENDAR) { - return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); - } else { - return (year % 4 == 0); - } +/* Return 1/0 iff year points to a leap year. + * Assumes GREGORIAN_CALENDAR */ +static int dInfoCalc_Leapyear(npy_int64 year) { + return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); } /* Return the day of the week for the given absolute date. */ @@ -71,40 +68,33 @@ static int dInfoCalc_DayOfWeek(npy_int64 absdate) { static int monthToQuarter(int month) { return ((month - 1) / 3) + 1; } /* Return the year offset, that is the absolute date of the day - 31.12.(year-1) in the given calendar. + 31.12.(year-1) + + Assumes GREGORIAN_CALENDAR + + This is equivalent to: + + (datetime(year, 1, 1) - datetime(1970, 1, 1)).days Note: For the Julian calendar we shift the absdate (which is measured using the Gregorian Epoch) value by two days because the Epoch (0001-01-01) in the Julian calendar lies 2 days before the Epoch in the Gregorian calendar. */ -static int dInfoCalc_YearOffset(npy_int64 year, int calendar) { +static int dInfoCalc_YearOffset(npy_int64 year) { year--; - if (calendar == GREGORIAN_CALENDAR) { - if (year >= 0 || -1 / 4 == -1) - return year * 365 + year / 4 - year / 100 + year / 400; - else - return year * 365 + (year - 3) / 4 - (year - 99) / 100 + + if (year >= 0 || -1 / 4 == -1) + return year * 365 + year / 4 - year / 100 + year / 400; + else + return year * 365 + (year - 3) / 4 - (year - 99) / 100 + (year - 399) / 400; - } else if (calendar == JULIAN_CALENDAR) { - if (year >= 0 || -1 / 4 == -1) - return year * 365 + year / 4 - 2; - else - return year * 365 + (year - 3) / 4 - 2; - } - Py_Error(PyExc_ValueError, "unknown calendar"); -onError: - return INT_ERR_CODE; } -/* Set the instance's value using the given date and time. calendar may be set - * to the flags: GREGORIAN_CALENDAR, JULIAN_CALENDAR to indicate the calendar - * to be used. */ - +/* Set the instance's value using the given date and time. + * Assumes GREGORIAN_CALENDAR */ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int year, int month, int day, int hour, - int minute, double second, - int calendar) { + int minute, double second) { /* Calculate the absolute date */ { int leap; @@ -116,7 +106,7 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int year, PyExc_ValueError, "year out of range: %i", year); /* Is it a leap year ? */ - leap = dInfoCalc_Leapyear(year, calendar); + leap = dInfoCalc_Leapyear(year); /* Negative month values indicate months relative to the years end */ if (month < 0) month += 13; @@ -128,7 +118,7 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int year, Py_AssertWithArg(day >= 1 && day <= days_in_month[leap][month - 1], PyExc_ValueError, "day out of range: %i", day); - yearoffset = dInfoCalc_YearOffset(year, calendar); + yearoffset = dInfoCalc_YearOffset(year); if (yearoffset == INT_ERR_CODE) goto onError; absdate = day + month_offset[leap][month - 1] + yearoffset; @@ -142,8 +132,6 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int year, dinfo->day_of_week = dInfoCalc_DayOfWeek(absdate); dinfo->day_of_year = (short)(absdate - yearoffset); - - dinfo->calendar = calendar; } /* Calculate the absolute time */ @@ -171,33 +159,27 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int year, return INT_ERR_CODE; } -/* Sets the date part of the date_info struct using the indicated - calendar. +/* Sets the date part of the date_info struct + Assumes GREGORIAN_CALENDAR XXX This could also be done using some integer arithmetics rather than with this iterative approach... */ static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, - npy_int64 absdate, int calendar) { + npy_int64 absdate) { register npy_int64 year; npy_int64 yearoffset; int leap, dayoffset; int *monthoffset; /* Approximate year */ - if (calendar == GREGORIAN_CALENDAR) { - year = (npy_int64)(((double)absdate) / 365.2425); - } else if (calendar == JULIAN_CALENDAR) { - year = (npy_int64)(((double)absdate) / 365.25); - } else { - Py_Error(PyExc_ValueError, "unknown calendar"); - } + year = (npy_int64)(((double)absdate) / 365.2425); if (absdate > 0) year++; /* Apply corrections to reach the correct year */ while (1) { /* Calculate the year offset */ - yearoffset = dInfoCalc_YearOffset(year, calendar); + yearoffset = dInfoCalc_YearOffset(year); if (yearoffset == INT_ERR_CODE) goto onError; /* Backward correction: absdate must be greater than the @@ -208,7 +190,7 @@ static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, } dayoffset = absdate - yearoffset; - leap = dInfoCalc_Leapyear(year, calendar); + leap = dInfoCalc_Leapyear(year); /* Forward correction: non leap years only have 365 days */ if (dayoffset > 365 && !leap) { @@ -219,7 +201,6 @@ static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, } dinfo->year = year; - dinfo->calendar = calendar; /* Now iterate to find the month */ monthoffset = month_offset[leap]; @@ -410,8 +391,7 @@ static npy_int64 DtoB_WeekendToFriday(npy_int64 absdate, int day_of_week) { static npy_int64 absdate_from_ymd(int y, int m, int d) { struct date_info tempDate; - if (dInfoCalc_SetFromDateAndTime(&tempDate, y, m, d, 0, 0, 0, - GREGORIAN_CALENDAR)) { + if (dInfoCalc_SetFromDateAndTime(&tempDate, y, m, d, 0, 0, 0)) { return INT_ERR_CODE; } return tempDate.absdate; @@ -423,8 +403,7 @@ static npy_int64 asfreq_DTtoA(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; ordinal = downsample_daytime(ordinal, af_info, 0); - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) + if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET)) return INT_ERR_CODE; if (dinfo.month > af_info->to_a_year_end) { return (npy_int64)(dinfo.year + 1 - BASE_YEAR); @@ -436,8 +415,7 @@ static npy_int64 asfreq_DTtoA(npy_int64 ordinal, char relation, static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year, int *quarter) { struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) + if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET)) return INT_ERR_CODE; if (af_info->to_q_year_end != 12) { dinfo.month -= af_info->to_q_year_end; @@ -474,8 +452,7 @@ static npy_int64 asfreq_DTtoM(npy_int64 ordinal, char relation, ordinal = downsample_daytime(ordinal, af_info, 0); - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) + if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET)) return INT_ERR_CODE; return (npy_int64)((dinfo.year - BASE_YEAR) * 12 + dinfo.month - 1); } @@ -493,8 +470,7 @@ static npy_int64 asfreq_DTtoB(npy_int64 ordinal, char relation, ordinal = downsample_daytime(ordinal, af_info, 0); - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) + if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET)) return INT_ERR_CODE; if (relation == 'S') { @@ -595,8 +571,7 @@ static npy_int64 asfreq_WtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_WtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) + &dinfo, asfreq_WtoDT(ordinal, relation, af_info) + ORD_OFFSET)) return INT_ERR_CODE; if (relation == 'S') { @@ -655,8 +630,7 @@ static npy_int64 asfreq_MtoB(npy_int64 ordinal, char relation, struct date_info dinfo; if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_MtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) + &dinfo, asfreq_MtoDT(ordinal, relation, af_info) + ORD_OFFSET)) return INT_ERR_CODE; if (relation == 'S') { @@ -731,8 +705,7 @@ static npy_int64 asfreq_QtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_QtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) + &dinfo, asfreq_QtoDT(ordinal, relation, af_info) + ORD_OFFSET)) return INT_ERR_CODE; if (relation == 'S') { @@ -803,8 +776,7 @@ static npy_int64 asfreq_AtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_AtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) + &dinfo, asfreq_AtoDT(ordinal, relation, af_info) + ORD_OFFSET)) return INT_ERR_CODE; if (relation == 'S') { @@ -1096,19 +1068,17 @@ static int dInfoCalc_SetFromAbsTime(struct date_info *dinfo, double abstime) { return 0; } -/* Set the instance's value using the given date and time. calendar - may be set to the flags: GREGORIAN_CALENDAR, JULIAN_CALENDAR to - indicate the calendar to be used. */ +/* Set the instance's value using the given date and time. + Assumes GREGORIAN_CALENDAR. */ static int dInfoCalc_SetFromAbsDateTime(struct date_info *dinfo, - npy_int64 absdate, double abstime, - int calendar) { + npy_int64 absdate, double abstime) { /* Bounds check */ Py_AssertWithArg(abstime >= 0.0 && abstime <= SECONDS_PER_DAY, PyExc_ValueError, "abstime out of range (0.0 - 86400.0): %f", abstime); /* Calculate the date */ - if (dInfoCalc_SetFromAbsDate(dinfo, absdate, calendar)) goto onError; + if (dInfoCalc_SetFromAbsDate(dinfo, absdate)) goto onError; /* Calculate the time */ if (dInfoCalc_SetFromAbsTime(dinfo, abstime)) goto onError; @@ -1356,8 +1326,7 @@ static int _ISOWeek(struct date_info *dinfo) { /* Verify */ if (week < 0) { /* The day lies in last week of the previous year */ - if ((week > -2) || (week == -2 && dInfoCalc_Leapyear(dinfo->year - 1, - dinfo->calendar))) + if ((week > -2) || (week == -2 && dInfoCalc_Leapyear(dinfo->year - 1))) week = 53; else week = 52; @@ -1384,8 +1353,7 @@ int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo) { absdate += 1; } - if (dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime, - GREGORIAN_CALENDAR)) + if (dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime)) return INT_ERR_CODE; return 0; @@ -1480,7 +1448,6 @@ int pdays_in_month(npy_int64 ordinal, int freq) { if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) return INT_ERR_CODE; - days = days_in_month[dInfoCalc_Leapyear(dinfo.year, dinfo.calendar)] - [dinfo.month - 1]; + days = days_in_month[dInfoCalc_Leapyear(dinfo.year)][dinfo.month - 1]; return days; } diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index 35dd20848a2ec..d3d32f81d1f66 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -24,9 +24,6 @@ frequency conversion routines. * declarations from period here */ -#define GREGORIAN_CALENDAR 0 -#define JULIAN_CALENDAR 1 - #define SECONDS_PER_DAY ((double)86400.0) #define Py_AssertWithArg(x, errortype, errorstr, a1) \ @@ -138,7 +135,6 @@ typedef struct date_info { int year; int day_of_week; int day_of_year; - int calendar; } date_info; typedef npy_int64 (*freq_conv_func)(npy_int64, char, asfreq_info *); diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e2caebe4c4afc..5098e5c9100ff 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -59,7 +59,6 @@ cdef extern from "period_helper.h": int year int day_of_week int day_of_year - int calendar ctypedef struct asfreq_info: int from_week_end From 4ee165cdd9280ec5bdfbe7fea00f83f2b526bf04 Mon Sep 17 00:00:00 2001 From: discort Date: Mon, 5 Feb 2018 06:12:02 -0500 Subject: [PATCH 047/217] BUG: groupby with resample using on parameter errors when selecting column to apply function closes #17813 Author: discort Closes #19433 from discort/fix_17813 and squashes the following commits: 2f25d40a0 [discort] Fixed bug in df.resample using 'on' parameter --- doc/source/whatsnew/v0.23.0.txt | 8 ++++++-- pandas/core/groupby.py | 18 +++++++++++++++--- pandas/tests/test_resample.py | 9 +++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 0ac27a2f23386..b3905824f7e44 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -289,6 +289,8 @@ Convert to an xarray DataArray p.to_xarray() +.. _whatsnew_0230.api_breaking.build_changes: + Build Changes ^^^^^^^^^^^^^ @@ -296,6 +298,8 @@ Build Changes - Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) - Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) +.. _whatsnew_0230.api_breaking.extract: + Extraction of matching patterns from strings ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -594,8 +598,8 @@ Groupby/Resample/Rolling - Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) - Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) - Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) -- Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) -- +- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) +- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) Sparse ^^^^^^ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 88af80e295d74..ab0070777c190 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -37,6 +37,7 @@ _ensure_categorical, _ensure_float) from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna, notna, _maybe_fill from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, @@ -423,6 +424,7 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): self.obj = None self.indexer = None self.binner = None + self._grouper = None @property def ax(self): @@ -465,12 +467,22 @@ def _set_grouper(self, obj, sort=False): raise ValueError( "The Grouper cannot specify both a key and a level!") + # Keep self.grouper value before overriding + if self._grouper is None: + self._grouper = self.grouper + # the key must be a valid info item if self.key is not None: key = self.key - if key not in obj._info_axis: - raise KeyError("The grouper name {0} is not found".format(key)) - ax = Index(obj[key], name=key) + # The 'on' is already defined + if getattr(self.grouper, 'name', None) == key and \ + isinstance(obj, ABCSeries): + ax = self._grouper.take(obj.index) + else: + if key not in obj._info_axis: + raise KeyError( + "The grouper name {0} is not found".format(key)) + ax = Index(obj[key], name=key) else: ax = obj._get_axis(self.axis) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index a5aaa328a8e06..2de890ea459f0 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -3077,6 +3077,15 @@ def test_getitem_multiple(self): result = r['buyer'].count() assert_series_equal(result, expected) + def test_groupby_resample_on_api_with_getitem(self): + # GH 17813 + df = pd.DataFrame({'id': list('aabbb'), + 'date': pd.date_range('1-1-2016', periods=5), + 'data': 1}) + exp = df.set_index('date').groupby('id').resample('2D')['data'].sum() + result = df.groupby('id').resample('2D', on='date')['data'].sum() + assert_series_equal(result, exp) + def test_nearest(self): # GH 17496 From 181fea420ba2c5ff32ca0e97ae76f51bc31e40b6 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 5 Feb 2018 06:35:03 -0500 Subject: [PATCH 048/217] TST: Fix makeIntIndex, benchmark get loc Author: Pietro Battiston Closes #19483 from toobaz/test_get_loc and squashes the following commits: 51d691106 [Pietro Battiston] TST: benchmark get_loc in various cases d424f63df [Pietro Battiston] TST: produce unsorted integer index (consistently with other types) --- asv_bench/benchmarks/index_object.py | 17 +++++++++++++++++ pandas/tests/indexes/test_base.py | 16 +++++++++------- pandas/tests/indexing/test_floats.py | 15 +++++++-------- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 970760373632a..f1703e163917a 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -147,6 +147,11 @@ def setup(self, dtype): self.idx = getattr(tm, 'make{}Index'.format(dtype))(N) self.array_mask = (np.arange(N) % 3) == 0 self.series_mask = Series(self.array_mask) + self.sorted = self.idx.sort_values() + half = N // 2 + self.non_unique = self.idx[:half].append(self.idx[:half]) + self.non_unique_sorted = self.sorted[:half].append(self.sorted[:half]) + self.key = self.sorted[N // 4] def time_boolean_array(self, dtype): self.idx[self.array_mask] @@ -163,6 +168,18 @@ def time_slice(self, dtype): def time_slice_step(self, dtype): self.idx[::2] + def time_get_loc(self, dtype): + self.idx.get_loc(self.key) + + def time_get_loc_sorted(self, dtype): + self.sorted.get_loc(self.key) + + def time_get_loc_non_unique(self, dtype): + self.non_unique.get_loc(self.key) + + def time_get_loc_non_unique_sorted(self, dtype): + self.non_unique_sorted.get_loc(self.key) + class Float64IndexMethod(object): # GH 13166 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 974099f1fbbe9..90edcb526bb2e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -830,15 +830,16 @@ def test_map_with_tuples(self): # Test that returning a single tuple from an Index # returns an Index. - boolean_index = tm.makeIntIndex(3).map(lambda x: (x,)) - expected = Index([(0,), (1,), (2,)]) - tm.assert_index_equal(boolean_index, expected) + idx = tm.makeIntIndex(3) + result = tm.makeIntIndex(3).map(lambda x: (x,)) + expected = Index([(i,) for i in idx]) + tm.assert_index_equal(result, expected) # Test that returning a tuple from a map of a single index # returns a MultiIndex object. - boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1)) - expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)]) - tm.assert_index_equal(boolean_index, expected) + result = idx.map(lambda x: (x, x == 1)) + expected = MultiIndex.from_tuples([(i, i == 1) for i in idx]) + tm.assert_index_equal(result, expected) # Test that returning a single object from a MultiIndex # returns an Index. @@ -870,7 +871,8 @@ def test_map_tseries_indices_return_index(self): def test_map_dictlike(self, mapper): # GH 12756 expected = Index(['foo', 'bar', 'baz']) - result = tm.makeIntIndex(3).map(mapper(expected.values, [0, 1, 2])) + idx = tm.makeIntIndex(3) + result = idx.map(mapper(expected.values, idx)) tm.assert_index_equal(result, expected) for name in self.indices.keys(): diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index d2692c7dc302e..e3f93924aca0d 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -4,7 +4,8 @@ from warnings import catch_warnings import numpy as np -from pandas import Series, DataFrame, Index, Float64Index +from pandas import (Series, DataFrame, Index, Float64Index, Int64Index, + RangeIndex) from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm @@ -206,9 +207,8 @@ def test_scalar_integer(self): # test how scalar float indexers work on int indexes # integer index - for index in [tm.makeIntIndex, tm.makeRangeIndex]: + for i in [Int64Index(range(5)), RangeIndex(5)]: - i = index(5) for s in [Series(np.arange(len(i))), DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i)]: @@ -362,9 +362,9 @@ def test_slice_integer(self): # these coerce to a like integer # oob indicates if we are out of bounds # of positional indexing - for index, oob in [(tm.makeIntIndex(5), False), - (tm.makeRangeIndex(5), False), - (tm.makeIntIndex(5) + 10, True)]: + for index, oob in [(Int64Index(range(5)), False), + (RangeIndex(5), False), + (Int64Index(range(5)) + 10, True)]: # s is an in-range index s = Series(range(5), index=index) @@ -486,9 +486,8 @@ def f(): def test_slice_integer_frame_getitem(self): # similar to above, but on the getitem dim (of a DataFrame) - for index in [tm.makeIntIndex, tm.makeRangeIndex]: + for index in [Int64Index(range(5)), RangeIndex(5)]: - index = index(5) s = DataFrame(np.random.randn(5, 2), index=index) def f(idxr): From ad9e20596d7ba87d72b8bc49b001abf8bb05a309 Mon Sep 17 00:00:00 2001 From: Pepe Flores Date: Mon, 5 Feb 2018 20:43:02 +0200 Subject: [PATCH 049/217] DOC: Fix typo in example (#19537) Fix typo in the example for pandas.io.formats.style.Styler.format --- pandas/io/formats/style.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 58796aa30f0bf..20e72dd6bde91 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -364,7 +364,7 @@ def format(self, formatter, subset=None): >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) >>> df.style.format("{:.2%}") >>> df['c'] = ['a', 'b', 'c', 'd'] - >>> df.style.format({'C': str.upper}) + >>> df.style.format({'c': str.upper}) """ if subset is None: row_locs = range(len(self.data)) From e4ddbaf8a4799e3296513551de39e2735b1b05c9 Mon Sep 17 00:00:00 2001 From: Matthew Kirk Date: Mon, 5 Feb 2018 20:24:00 -0500 Subject: [PATCH 050/217] BUG: don't assume series is length > 0 closes #19368 Author: Matthew Kirk Closes #19438 from hexgnu/segfault_memory_usage and squashes the following commits: f9433d844 [Matthew Kirk] Use shared docstring and get rid of if condition 4ead141c0 [Matthew Kirk] Move whatsnew doc to Sparse ae9f74d58 [Matthew Kirk] Revert base.py cdd4141e4 [Matthew Kirk] Fix linting error 93a0c3daa [Matthew Kirk] Merge remote-tracking branch 'upstream/master' into segfault_memory_usage 207bc74d2 [Matthew Kirk] Define memory_usage on SparseArray 21ae14707 [Matthew Kirk] FIX: revert change to lib.pyx 3f52a44f6 [Matthew Kirk] Ah ha I think I got it 5e59e9cbc [Matthew Kirk] Use range over 0 <= for loops e25158713 [Matthew Kirk] Fix failing test with indexing 27df317be [Matthew Kirk] Merge remote-tracking branch 'upstream/master' into segfault_memory_usage 7fdd03e94 [Matthew Kirk] Take out comment and use product 6bd6ddd02 [Matthew Kirk] BUG: don't assume series is length > 0 --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/base.py | 2 +- pandas/core/sparse/array.py | 16 ++++++++++++++-- pandas/tests/sparse/series/test_series.py | 13 +++++++++++++ 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index b3905824f7e44..e4f00990d28c0 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -606,7 +606,7 @@ Sparse - Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) -- +- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/base.py b/pandas/core/base.py index 54d25a16a10a3..d5b204dba063e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1048,7 +1048,7 @@ def is_monotonic_decreasing(self): def memory_usage(self, deep=False): """ - Memory usage of my values + Memory usage of the values Parameters ---------- diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index fa07400a0706e..65aefd9fb8c0a 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -8,10 +8,10 @@ import warnings import pandas as pd -from pandas.core.base import PandasObject +from pandas.core.base import PandasObject, IndexOpsMixin from pandas import compat -from pandas.compat import range +from pandas.compat import range, PYPY from pandas.compat.numpy import function as nv from pandas.core.dtypes.generic import ABCSparseSeries @@ -30,6 +30,7 @@ from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype import pandas._libs.sparse as splib +import pandas._libs.lib as lib from pandas._libs.sparse import SparseIndex, BlockIndex, IntIndex from pandas._libs import index as libindex import pandas.core.algorithms as algos @@ -238,6 +239,17 @@ def kind(self): elif isinstance(self.sp_index, IntIndex): return 'integer' + @Appender(IndexOpsMixin.memory_usage.__doc__) + def memory_usage(self, deep=False): + values = self.sp_values + + v = values.nbytes + + if deep and is_object_dtype(self) and not PYPY: + v += lib.memory_usage_of_objects(values) + + return v + def __array_wrap__(self, out_arr, context=None): """ NumPy calls this method when ufunc is applied diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 2ea1e63433520..3f5d5a59cc540 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -23,6 +23,8 @@ from pandas.core.sparse.api import SparseSeries from pandas.tests.series.test_api import SharedWithSparse +from itertools import product + def _test_data1(): # nan-based @@ -971,6 +973,17 @@ def test_combine_first(self): tm.assert_sp_series_equal(result, result2) tm.assert_sp_series_equal(result, expected) + @pytest.mark.parametrize('deep,fill_values', [([True, False], + [0, 1, np.nan, None])]) + def test_memory_usage_deep(self, deep, fill_values): + for deep, fill_value in product(deep, fill_values): + sparse_series = SparseSeries(fill_values, fill_value=fill_value) + dense_series = Series(fill_values) + sparse_usage = sparse_series.memory_usage(deep=deep) + dense_usage = dense_series.memory_usage(deep=deep) + + assert sparse_usage < dense_usage + class TestSparseHandlingMultiIndexes(object): From d7dcac2e46ffac97b15ca2d68e598650736081ed Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 5 Feb 2018 20:29:15 -0500 Subject: [PATCH 051/217] TST: fix and test index division by zero Related: #19336 Author: Brock Mendel Closes #19347 from jbrockmendel/div_zero2 and squashes the following commits: be1e2e1b8 [Brock Mendel] move fixture to conftest 64b0c0853 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 aa969f8d2 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 000aefde0 [Brock Mendel] fix long again 9de356ab0 [Brock Mendel] revert fixture to fix test_range failures b8cf21d3e [Brock Mendel] flake8 remove unused import afedba98b [Brock Mendel] whatsnew clarification b51c2e14c [Brock Mendel] fixturize 37efd5108 [Brock Mendel] make zero a fixture 965f7214e [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 d648ef698 [Brock Mendel] requested edits 1ef3a6c74 [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 78de1a4df [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 0277d9fca [Brock Mendel] add ipython output to whatsnew 5d7e3ea0c [Brock Mendel] Merge branch 'master' of https://github.com/pandas-dev/pandas into div_zero2 ea75c3ca0 [Brock Mendel] ipython block 6fc61bd99 [Brock Mendel] elaborate docstring ca3bf4241 [Brock Mendel] Whatsnew section cd543497c [Brock Mendel] move dispatch_missing to core.missing 06df02a89 [Brock Mendel] py3 fix 84c74c54a [Brock Mendel] remove operator.div for py3 6acc2f78a [Brock Mendel] fix missing import e0e89b978 [Brock Mendel] fix and and tests for divmod 969f342e1 [Brock Mendel] fix and test index division by zero --- doc/source/whatsnew/v0.23.0.txt | 44 +++++++++++++++ pandas/core/indexes/base.py | 2 + pandas/core/indexes/range.py | 31 +++++------ pandas/core/missing.py | 82 ++++++++++++++++++++++++++++ pandas/tests/indexes/conftest.py | 18 +++++- pandas/tests/indexes/test_numeric.py | 42 ++++++++++++++ 6 files changed, 200 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index e4f00990d28c0..ea56ebad7d782 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -204,6 +204,50 @@ Please note that the string `index` is not supported with the round trip format, new_df print(new_df.index.name) +.. _whatsnew_0230.enhancements.index_division_by_zero: + +Index Division By Zero Fills Correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) + +Previous Behavior: + +.. code-block:: ipython + + In [6]: index = pd.Int64Index([-1, 0, 1]) + + In [7]: index / 0 + Out[7]: Int64Index([0, 0, 0], dtype='int64') + + # Previous behavior yielded different results depending on the type of zero in the divisor + In [8]: index / 0.0 + Out[8]: Float64Index([-inf, nan, inf], dtype='float64') + + In [9]: index = pd.UInt64Index([0, 1]) + + In [10]: index / np.array([0, 0], dtype=np.uint64) + Out[10]: UInt64Index([0, 0], dtype='uint64') + + In [11]: pd.RangeIndex(1, 5) / 0 + ZeroDivisionError: integer division or modulo by zero + +Current Behavior: + +.. ipython:: python + + index = pd.Int64Index([-1, 0, 1]) + # division by zero gives -infinity where negative, +infinity where positive, and NaN for 0 / 0 + index / 0 + + # The result of division by zero should not depend on whether the zero is int or float + index / 0.0 + + index = pd.UInt64Index([0, 1]) + index / np.array([0, 0], dtype=np.uint64) + + pd.RangeIndex(1, 5) / 0 + .. _whatsnew_0230.enhancements.other: Other Enhancements diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 626f3dc86556a..1e1bb0d49b3df 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4040,6 +4040,8 @@ def _evaluate_numeric_binop(self, other): attrs = self._maybe_update_attributes(attrs) with np.errstate(all='ignore'): result = op(values, other) + + result = missing.dispatch_missing(op, values, other, result) return constructor(result, **attrs) return _evaluate_numeric_binop diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a82ee6b2b44af..0ed92a67c7e14 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -550,7 +550,7 @@ def __getitem__(self, key): return super_getitem(key) def __floordiv__(self, other): - if is_integer(other): + if is_integer(other) and other != 0: if (len(self) == 0 or self._start % other == 0 and self._step % other == 0): @@ -592,14 +592,15 @@ def _evaluate_numeric_binop(self, other): attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) + left, right = self, other if reversed: - self, other = other, self + left, right = right, left try: # apply if we have an override if step: with np.errstate(all='ignore'): - rstep = step(self._step, other) + rstep = step(left._step, right) # we don't have a representable op # so return a base index @@ -607,11 +608,11 @@ def _evaluate_numeric_binop(self, other): raise ValueError else: - rstep = self._step + rstep = left._step with np.errstate(all='ignore'): - rstart = op(self._start, other) - rstop = op(self._stop, other) + rstart = op(left._start, right) + rstop = op(left._stop, right) result = RangeIndex(rstart, rstop, @@ -627,18 +628,12 @@ def _evaluate_numeric_binop(self, other): return result - except (ValueError, TypeError, AttributeError): - pass - - # convert to Int64Index ops - if isinstance(self, RangeIndex): - self = self.values - if isinstance(other, RangeIndex): - other = other.values - - with np.errstate(all='ignore'): - results = op(self, other) - return Index(results, **attrs) + except (ValueError, TypeError, AttributeError, + ZeroDivisionError): + # Defer to Int64Index implementation + if reversed: + return op(other, self._int64index) + return op(self._int64index, other) return _evaluate_numeric_binop diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 2eccc5777bca6..31c489e2f8941 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,6 +1,7 @@ """ Routines for filling missing data """ +import operator import numpy as np from distutils.version import LooseVersion @@ -650,6 +651,87 @@ def fill_zeros(result, x, y, name, fill): return result +def mask_zero_div_zero(x, y, result, copy=False): + """ + Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes + of the numerator or the denominator. + + Parameters + ---------- + x : ndarray + y : ndarray + result : ndarray + copy : bool (default False) + Whether to always create a new array or try to fill in the existing + array if possible. + + Returns + ------- + filled_result : ndarray + + Examples + -------- + >>> x = np.array([1, 0, -1], dtype=np.int64) + >>> y = 0 # int 0; numpy behavior is different with float + >>> result = x / y + >>> result # raw numpy result does not fill division by zero + array([0, 0, 0]) + >>> mask_zero_div_zero(x, y, result) + array([ inf, nan, -inf]) + """ + if is_scalar(y): + y = np.array(y) + + zmask = y == 0 + if zmask.any(): + shape = result.shape + + nan_mask = (zmask & (x == 0)).ravel() + neginf_mask = (zmask & (x < 0)).ravel() + posinf_mask = (zmask & (x > 0)).ravel() + + if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): + # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN + result = result.astype('float64', copy=copy).ravel() + + np.putmask(result, nan_mask, np.nan) + np.putmask(result, posinf_mask, np.inf) + np.putmask(result, neginf_mask, -np.inf) + + result = result.reshape(shape) + + return result + + +def dispatch_missing(op, left, right, result): + """ + Fill nulls caused by division by zero, casting to a diffferent dtype + if necessary. + + Parameters + ---------- + op : function (operator.add, operator.div, ...) + left : object (Index for non-reversed ops) + right : object (Index fof reversed ops) + result : ndarray + + Returns + ------- + result : ndarray + """ + opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__') + if op in [operator.truediv, operator.floordiv, + getattr(operator, 'div', None)]: + result = mask_zero_div_zero(left, right, result) + elif op is operator.mod: + result = fill_zeros(result, left, right, opstr, np.nan) + elif op is divmod: + res0 = mask_zero_div_zero(left, right, result[0]) + res1 = fill_zeros(result[1], left, right, opstr, np.nan) + result = (res0, res1) + return result + + def _interp_limit(invalid, fw_limit, bw_limit): """ Get indexers of values that won't be filled diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 217ee07affa84..6d88ef0cfa6c5 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -1,9 +1,10 @@ import pytest import numpy as np +import pandas as pd import pandas.util.testing as tm from pandas.core.indexes.api import Index, MultiIndex -from pandas.compat import lzip +from pandas.compat import lzip, long @pytest.fixture(params=[tm.makeUnicodeIndex(100), @@ -29,3 +30,18 @@ def indices(request): def one(request): # zero-dim integer array behaves like an integer return request.param + + +zeros = [box([0] * 5, dtype=dtype) + for box in [pd.Index, np.array] + for dtype in [np.int64, np.uint64, np.float64]] +zeros.extend([np.array(0, dtype=dtype) + for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([0, 0.0, long(0)]) + + +@pytest.fixture(params=zeros) +def zero(request): + # For testing division by (or of) zero for Index with length 5, this + # gives several scalar-zeros and length-5 vector-zeros + return request.param diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 0c1bec7a6f1a9..c6883df7ee91a 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -157,6 +157,48 @@ def test_divmod_series(self): for r, e in zip(result, expected): tm.assert_series_equal(r, e) + def test_div_zero(self, zero): + idx = self.create_index() + + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + result = idx / zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_floordiv_zero(self, zero): + idx = self.create_index() + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + + result = idx // zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_mod_zero(self, zero): + idx = self.create_index() + + expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan], + dtype=np.float64) + result = idx % zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_divmod_zero(self, zero): + idx = self.create_index() + + exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan], + dtype=np.float64) + + result = divmod(idx, zero) + tm.assert_index_equal(result[0], exleft) + tm.assert_index_equal(result[1], exright) + def test_explicit_conversions(self): # GH 8608 From cc1b1e7107b05984a3a038dd7395173e3d25b20e Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Tue, 6 Feb 2018 05:15:50 -0500 Subject: [PATCH 052/217] DOC: Remove repeated duplicated word (#19546) --- doc/source/advanced.rst | 2 +- doc/source/comparison_with_sas.rst | 4 ++-- doc/source/computation.rst | 2 +- doc/source/io.rst | 2 +- doc/source/release.rst | 10 +++++----- doc/source/tutorials.rst | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 25f7c5a3ad948..ca903dadc6eb1 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -672,7 +672,7 @@ The ``CategoricalIndex`` is **preserved** after indexing: df2.loc['a'].index Sorting the index will sort by the order of the categories (Recall that we -created the index with with ``CategoricalDtype(list('cab'))``, so the sorted +created the index with ``CategoricalDtype(list('cab'))``, so the sorted order is ``cab``.). .. ipython:: python diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index e9e0d7716af3a..214667119f7e0 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -279,7 +279,7 @@ date/datetime columns. The equivalent pandas operations are shown below. In addition to these functions pandas supports other Time Series features -not available in Base SAS (such as resampling and and custom offsets) - +not available in Base SAS (such as resampling and custom offsets) - see the :ref:`timeseries documentation` for more details. .. ipython:: python @@ -584,7 +584,7 @@ For example, in SAS you could do this to filter missing values. if value_x ^= .; run; -Which doesn't work in in pandas. Instead, the ``pd.isna`` or ``pd.notna`` functions +Which doesn't work in pandas. Instead, the ``pd.isna`` or ``pd.notna`` functions should be used for comparisons. .. ipython:: python diff --git a/doc/source/computation.rst b/doc/source/computation.rst index a64542fa71705..4285767654e25 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -512,7 +512,7 @@ a same sized result as the input. When using ``.resample()`` with an offset. Construct a new index that is the frequency of the offset. For each frequency bin, aggregate points from the input within a backwards-in-time looking window that fall in that bin. The result of this -aggregation is the output for that frequency point. The windows are fixed size size in the frequency space. Your result +aggregation is the output for that frequency point. The windows are fixed size in the frequency space. Your result will have the shape of a regular frequency between the min and the max of the original input object. To summarize, ``.rolling()`` is a time-based window operation, while ``.resample()`` is a frequency-based window operation. diff --git a/doc/source/io.rst b/doc/source/io.rst index 60dc89f8fd495..1785de54b7dd6 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4529,7 +4529,7 @@ Several caveats. on an attempt at serialization. You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``. -If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, then +If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, then ``pyarrow`` is tried, and falling back to ``fastparquet``. See the documentation for `pyarrow `__ and `fastparquet `__ diff --git a/doc/source/release.rst b/doc/source/release.rst index cd763de42d162..8e063116cbf07 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -406,7 +406,7 @@ of all enhancements and bugs that have been fixed in 0.20.1. .. note:: - This is a combined release for 0.20.0 and and 0.20.1. + This is a combined release for 0.20.0 and 0.20.1. Version 0.20.1 contains one additional change for backwards-compatibility with downstream projects using pandas' ``utils`` routines. (:issue:`16250`) Thanks @@ -2918,7 +2918,7 @@ Improvements to existing features - clipboard functions use pyperclip (no dependencies on Windows, alternative dependencies offered for Linux) (:issue:`3837`). - Plotting functions now raise a ``TypeError`` before trying to plot anything - if the associated objects have have a dtype of ``object`` (:issue:`1818`, + if the associated objects have a dtype of ``object`` (:issue:`1818`, :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to numeric arrays if possible so that you can still plot, for example, an object array with floats. This happens before any drawing takes place which @@ -4082,7 +4082,7 @@ Bug Fixes columns (:issue:`1943`) - Fix time zone localization bug causing improper fields (e.g. hours) in time zones that have not had a UTC transition in a long time (:issue:`1946`) -- Fix errors when parsing and working with with fixed offset timezones +- Fix errors when parsing and working with fixed offset timezones (:issue:`1922`, :issue:`1928`) - Fix text parser bug when handling UTC datetime objects generated by dateutil (:issue:`1693`) @@ -4383,7 +4383,7 @@ Bug Fixes error (:issue:`1090`) - Consistently set name on groupby pieces (:issue:`184`) - Treat dict return values as Series in GroupBy.apply (:issue:`823`) -- Respect column selection for DataFrame in in GroupBy.transform (:issue:`1365`) +- Respect column selection for DataFrame in GroupBy.transform (:issue:`1365`) - Fix MultiIndex partial indexing bug (:issue:`1352`) - Enable assignment of rows in mixed-type DataFrame via .ix (:issue:`1432`) - Reset index mapping when grouping Series in Cython (:issue:`1423`) @@ -5040,7 +5040,7 @@ New Features - Add `melt` function to `pandas.core.reshape` - Add `level` parameter to group by level in Series and DataFrame descriptive statistics (:issue:`313`) -- Add `head` and `tail` methods to Series, analogous to to DataFrame (PR +- Add `head` and `tail` methods to Series, analogous to DataFrame (PR :issue:`296`) - Add `Series.isin` function which checks if each value is contained in a passed sequence (:issue:`289`) diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index 43ccd372d9d5b..710212bc237cd 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -19,7 +19,7 @@ pandas Cookbook The goal of this cookbook (by `Julia Evans `_) is to give you some concrete examples for getting started with pandas. These are examples with real-world data, and all the bugs and weirdness that -that entails. +entails. Here are links to the v0.1 release. For an up-to-date table of contents, see the `pandas-cookbook GitHub repository `_. To run the examples in this tutorial, you'll need to From 09c6317e6458bd82a29c754b747330fbd18c212e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 6 Feb 2018 03:20:35 -0800 Subject: [PATCH 053/217] centralize and split frame division tests (#19527) --- pandas/tests/frame/test_arithmetic.py | 122 +++++++++++++++++++++++++- pandas/tests/frame/test_operators.py | 70 --------------- pandas/tests/frame/test_timeseries.py | 9 -- 3 files changed, 121 insertions(+), 80 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 9b99a7b73b82b..1bb8e8edffc6e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- - import pytest import numpy as np +from pandas.compat import range + import pandas as pd import pandas.util.testing as tm @@ -58,10 +59,129 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): result = getattr(empty, opname)(const).get_dtype_counts() tm.assert_series_equal(result, pd.Series([2], ['bool'])) + @pytest.mark.parametrize('timestamps', [ + [pd.Timestamp('2012-01-01 13:00:00+00:00')] * 2, + [pd.Timestamp('2012-01-01 13:00:00')] * 2]) + def test_tz_aware_scalar_comparison(self, timestamps): + # Test for issue #15966 + df = pd.DataFrame({'test': timestamps}) + expected = pd.DataFrame({'test': [False, False]}) + tm.assert_frame_equal(df == -1, expected) + # ------------------------------------------------------------------- # Arithmetic +class TestFrameMulDiv(object): + """Tests for DataFrame multiplication and division""" + # ------------------------------------------------------------------ + # Mod By Zero + + def test_df_mod_zero_df(self): + # GH#3590, modulo as ints + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + # this is technically wrong, as the integer portion is coerced to float + # ### + first = pd.Series([0, 0, 0, 0], dtype='float64') + second = pd.Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({'first': first, 'second': second}) + result = df % df + tm.assert_frame_equal(result, expected) + + def test_df_mod_zero_array(self): + # GH#3590, modulo as ints + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + # this is technically wrong, as the integer portion is coerced to float + # ### + first = pd.Series([0, 0, 0, 0], dtype='float64') + second = pd.Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({'first': first, 'second': second}) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all='ignore'): + arr = df.values % df.values + result2 = pd.DataFrame(arr, index=df.index, + columns=df.columns, dtype='float64') + result2.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_int(self): + # GH#3590, modulo as ints + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + result = df % 0 + expected = pd.DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all='ignore'): + arr = df.values.astype('float64') % 0 + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_series_does_not_commute(self): + # GH#3590, modulo as ints + # not commutative with series + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser % df + res2 = df % ser + assert not res.fillna(0).equals(res2.fillna(0)) + + # ------------------------------------------------------------------ + # Division By Zero + + def test_df_div_zero_df(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + result = df / df + + first = pd.Series([1.0, 1.0, 1.0, 1.0]) + second = pd.Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({'first': first, 'second': second}) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_array(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + first = pd.Series([1.0, 1.0, 1.0, 1.0]) + second = pd.Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({'first': first, 'second': second}) + + with np.errstate(all='ignore'): + arr = df.values.astype('float') / df.values + result = pd.DataFrame(arr, index=df.index, + columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_int(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + result = df / 0 + expected = pd.DataFrame(np.inf, index=df.index, columns=df.columns) + expected.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all='ignore'): + arr = df.values.astype('float64') / 0 + result2 = pd.DataFrame(arr, index=df.index, + columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_div_zero_series_does_not_commute(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser / df + res2 = df / ser + assert not res.fillna(0).equals(res2.fillna(0)) + + class TestFrameArithmetic(object): @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index bdccbec6111d3..bf895be8bc813 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -203,76 +203,6 @@ def test_timestamp_compare(self): result = right_f(Timestamp('nat'), df) assert_frame_equal(result, expected) - def test_modulo(self): - # GH3590, modulo as ints - p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) - - # this is technically wrong as the integer portion is coerced to float - # ### - expected = DataFrame({'first': Series([0, 0, 0, 0], dtype='float64'), - 'second': Series([np.nan, np.nan, np.nan, 0])}) - result = p % p - assert_frame_equal(result, expected) - - # numpy has a slightly different (wrong) treatement - with np.errstate(all='ignore'): - arr = p.values % p.values - result2 = DataFrame(arr, index=p.index, - columns=p.columns, dtype='float64') - result2.iloc[0:3, 1] = np.nan - assert_frame_equal(result2, expected) - - result = p % 0 - expected = DataFrame(np.nan, index=p.index, columns=p.columns) - assert_frame_equal(result, expected) - - # numpy has a slightly different (wrong) treatement - with np.errstate(all='ignore'): - arr = p.values.astype('float64') % 0 - result2 = DataFrame(arr, index=p.index, columns=p.columns) - assert_frame_equal(result2, expected) - - # not commutative with series - p = DataFrame(np.random.randn(10, 5)) - s = p[0] - res = s % p - res2 = p % s - assert not res.fillna(0).equals(res2.fillna(0)) - - def test_div(self): - - # integer div, but deal with the 0's (GH 9144) - p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) - result = p / p - - expected = DataFrame({'first': Series([1.0, 1.0, 1.0, 1.0]), - 'second': Series([nan, nan, nan, 1])}) - assert_frame_equal(result, expected) - - with np.errstate(all='ignore'): - arr = p.values.astype('float') / p.values - result2 = DataFrame(arr, index=p.index, - columns=p.columns) - assert_frame_equal(result2, expected) - - result = p / 0 - expected = DataFrame(np.inf, index=p.index, columns=p.columns) - expected.iloc[0:3, 1] = nan - assert_frame_equal(result, expected) - - # numpy has a slightly different (wrong) treatement - with np.errstate(all='ignore'): - arr = p.values.astype('float64') / 0 - result2 = DataFrame(arr, index=p.index, - columns=p.columns) - assert_frame_equal(result2, expected) - - p = DataFrame(np.random.randn(10, 5)) - s = p[0] - res = s / p - res2 = p / s - assert not res.fillna(0).equals(res2.fillna(0)) - def test_logical_operators(self): def _check_bin_op(op): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index e6b47fd69cb05..25dd285e883a0 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -738,12 +738,3 @@ def test_tz_convert_and_localize(self, fn): with assert_raises_regex(ValueError, 'not valid'): df = DataFrame(index=l0) df = getattr(df, fn)('US/Pacific', level=1) - - @pytest.mark.parametrize('timestamps', [ - [Timestamp('2012-01-01 13:00:00+00:00')] * 2, - [Timestamp('2012-01-01 13:00:00')] * 2]) - def test_tz_aware_scalar_comparison(self, timestamps): - # Test for issue #15966 - df = DataFrame({'test': timestamps}) - expected = DataFrame({'test': [False, False]}) - assert_frame_equal(df == -1, expected) From 99850775cc9b5bbff3465e4f25cee6b3df08e1c4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 6 Feb 2018 03:27:16 -0800 Subject: [PATCH 054/217] Fix parsing corner case closes #19382 (#19529) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslib.pyx | 30 ++++++++++++++++---- pandas/_libs/tslibs/conversion.pyx | 8 ++++++ pandas/tests/indexes/datetimes/test_tools.py | 16 ++++++++++- pandas/tests/scalar/test_timestamp.py | 8 ++++++ 5 files changed, 56 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ea56ebad7d782..ca625f492b61f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -550,6 +550,7 @@ Datetimelike - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) - Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) - Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) +- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Timezones diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 81df7981096ba..877d7deff6ff4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -609,20 +609,38 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', value = tz_convert_single(value, tz, 'UTC') iresult[i] = value check_dts_bounds(&dts) + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if require_iso8601: + if _parse_today_now(val, &iresult[i]): + continue + elif is_coerce: + iresult[i] = NPY_NAT + continue + elif is_raise: + raise ValueError("time data {val} doesn't match " + "format specified" + .format(val=val)) + return values + elif is_coerce: + iresult[i] = NPY_NAT + continue + raise except ValueError: # if requiring iso8601 strings, skip trying other formats if require_iso8601: if _parse_today_now(val, &iresult[i]): continue - if is_coerce: + elif is_coerce: iresult[i] = NPY_NAT continue elif is_raise: - raise ValueError( - "time data %r doesn't match format " - "specified" % (val,)) - else: - return values + raise ValueError("time data {val} doesn't match " + "format specified" + .format(val=val)) + return values try: py_dt = parse_datetime_string(val, dayfirst=dayfirst, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a32bfc1f6836c..4f1a053da6f1d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -26,6 +26,7 @@ from np_datetime cimport (check_dts_bounds, dt64_to_dtstruct, dtstruct_to_dt64, get_datetime64_unit, get_datetime64_value, pydatetime_to_dt64) +from np_datetime import OutOfBoundsDatetime from util cimport (is_string_object, is_datetime64_object, @@ -472,6 +473,13 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, ambiguous='raise', errors='raise')[0] + + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to dateutil + # parser will return incorrect result because it will ignore + # nanoseconds + raise + except ValueError: try: ts = parse_datetime_string(ts, dayfirst=dayfirst, diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 44f3c21d23e62..f8b1f68ba33ce 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -17,6 +17,7 @@ from pandas._libs.tslibs import parsing from pandas.core.tools import datetimes as tools +from pandas.errors import OutOfBoundsDatetime from pandas.compat import lmap from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import is_datetime64_ns_dtype @@ -783,7 +784,6 @@ def test_dataframe_dtypes(self, cache): class TestToDatetimeMisc(object): - @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_iso8601(self, cache): result = to_datetime(["2012-01-01 00:00:00"], cache=cache) @@ -1596,6 +1596,20 @@ def test_coerce_of_invalid_datetimes(self): ) ) + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) + + with pytest.raises(OutOfBoundsDatetime): + to_datetime(arr) + + with pytest.raises(OutOfBoundsDatetime): + # Essentially the same as above, but more directly calling + # the relevant function + tslib.array_to_datetime(arr) + def test_normalize_date(): value = date(2012, 9, 7) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 301f6da140866..7695c94409232 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -18,6 +18,7 @@ from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz +from pandas.errors import OutOfBoundsDatetime from pandas.compat import long, PY3 from pandas.compat.numpy import np_datetime64_compat from pandas import Timestamp, Period, Timedelta @@ -410,6 +411,13 @@ def test_out_of_bounds_string(self): with pytest.raises(ValueError): Timestamp('2263-01-01') + def test_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + with pytest.raises(OutOfBoundsDatetime): + Timestamp('2262-04-11 23:47:16.854775808') + def test_bounds_with_different_units(self): out_of_bounds_dates = ('1677-09-21', '2262-04-12') From 88628124080288668c44b5db4e87998b284a4257 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 6 Feb 2018 03:34:32 -0800 Subject: [PATCH 055/217] Collect Series timezone tests (#19541) --- pandas/tests/series/test_timezones.py | 293 +++++++++++++++++++++++++ pandas/tests/tseries/test_timezones.py | 258 +--------------------- 2 files changed, 296 insertions(+), 255 deletions(-) create mode 100644 pandas/tests/series/test_timezones.py diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py new file mode 100644 index 0000000000000..2e15c964e4e93 --- /dev/null +++ b/pandas/tests/series/test_timezones.py @@ -0,0 +1,293 @@ +# -*- coding: utf-8 -*- +""" +Tests for Series timezone-related methods +""" +from datetime import datetime + +import pytest +import pytz +import numpy as np +from dateutil.tz import tzoffset + +import pandas.util.testing as tm +from pandas._libs import tslib +from pandas._libs.tslibs import timezones +from pandas.compat import lrange +from pandas.core.indexes.datetimes import date_range +from pandas import Series, Timestamp, DatetimeIndex, Index + + +class TestSeriesTimezones(object): + # ----------------------------------------------------------------- + # Series.tz_localize + def test_series_tz_localize(self): + + rng = date_range('1/1/2011', periods=100, freq='H') + ts = Series(1, index=rng) + + result = ts.tz_localize('utc') + assert result.index.tz.zone == 'UTC' + + # Can't localize if already tz-aware + rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') + ts = Series(1, index=rng) + tm.assert_raises_regex(TypeError, 'Already tz-aware', + ts.tz_localize, 'US/Eastern') + + def test_series_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + + # GH#14402 + ts = Timestamp('2015-11-01 01:00:03') + expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') + expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') + + ser = Series([ts]) + expected0 = Series([expected0]) + expected1 = Series([expected1]) + + with pytest.raises(pytz.AmbiguousTimeError): + ser.dt.tz_localize('US/Central') + + result = ser.dt.tz_localize('US/Central', ambiguous=True) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize('US/Central', ambiguous=[True]) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize('US/Central', ambiguous=False) + tm.assert_series_equal(result, expected1) + + result = ser.dt.tz_localize('US/Central', ambiguous=[False]) + tm.assert_series_equal(result, expected1) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_series_tz_localize_empty(self, tzstr): + # GH#2248 + ser = Series() + + ser2 = ser.tz_localize('utc') + assert ser2.index.tz == pytz.utc + + ser2 = ser.tz_localize(tzstr) + timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) + + # ----------------------------------------------------------------- + # Series.tz_convert + + def test_series_tz_convert(self): + rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') + ts = Series(1, index=rng) + + result = ts.tz_convert('Europe/Berlin') + assert result.index.tz.zone == 'Europe/Berlin' + + # can't convert tz-naive + rng = date_range('1/1/2011', periods=200, freq='D') + ts = Series(1, index=rng) + tm.assert_raises_regex(TypeError, "Cannot convert tz-naive", + ts.tz_convert, 'US/Eastern') + + # ----------------------------------------------------------------- + # Series.append + + def test_series_append_aware(self): + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', + tz='US/Eastern') + rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', + tz='US/Eastern') + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], + tz='US/Eastern') + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC') + rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='UTC') + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], + tz='UTC') + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + utc = rng1.tz + assert utc == ts_result.index.tz + + # GH#7795 + # different tz coerces to object dtype, not UTC + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', + tz='US/Eastern') + rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', + tz='US/Central') + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'), + Timestamp('1/1/2011 02:00', tz='US/Central')]) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + + def test_series_append_aware_naive(self): + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') + rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', + tz='US/Eastern') + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index.astype(object)) + assert ts_result.index.equals(expected) + + # mixed + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') + rng2 = lrange(100) + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index) + assert ts_result.index.equals(expected) + + def test_series_append_dst(self): + rng1 = date_range('1/1/2016 01:00', periods=3, freq='H', + tz='US/Eastern') + rng2 = date_range('8/1/2016 01:00', periods=3, freq='H', + tz='US/Eastern') + ser1 = Series([1, 2, 3], index=rng1) + ser2 = Series([10, 11, 12], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(['2016-01-01 01:00', '2016-01-01 02:00', + '2016-01-01 03:00', '2016-08-01 01:00', + '2016-08-01 02:00', '2016-08-01 03:00'], + tz='US/Eastern') + exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + # ----------------------------------------------------------------- + + def test_dateutil_tzoffset_support(self): + values = [188.5, 328.25] + tzinfo = tzoffset(None, 7200) + index = [datetime(2012, 5, 11, 11, tzinfo=tzinfo), + datetime(2012, 5, 11, 12, tzinfo=tzinfo)] + series = Series(data=values, index=index) + + assert series.index.tz == tzinfo + + # it works! #2443 + repr(series.index[0]) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_tz_aware_asfreq(self, tz): + dr = date_range('2011-12-01', '2012-07-20', freq='D', tz=tz) + + ser = Series(np.random.randn(len(dr)), index=dr) + + # it works! + ser.asfreq('T') + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_string_index_alias_tz_aware(self, tz): + rng = date_range('1/1/2000', periods=10, tz=tz) + ser = Series(np.random.randn(len(rng)), index=rng) + + result = ser['1/3/2000'] + tm.assert_almost_equal(result, ser[2]) + + # TODO: De-duplicate with test below + def test_series_add_tz_mismatch_converts_to_utc_duplicate(self): + rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') + ser = Series(np.random.randn(len(rng)), index=rng) + + ts_moscow = ser.tz_convert('Europe/Moscow') + + result = ser + ts_moscow + assert result.index.tz is pytz.utc + + result = ts_moscow + ser + assert result.index.tz is pytz.utc + + def test_series_add_tz_mismatch_converts_to_utc(self): + rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') + + perm = np.random.permutation(100)[:90] + ser1 = Series(np.random.randn(90), + index=rng.take(perm).tz_convert('US/Eastern')) + + perm = np.random.permutation(100)[:90] + ser2 = Series(np.random.randn(90), + index=rng.take(perm).tz_convert('Europe/Berlin')) + + result = ser1 + ser2 + + uts1 = ser1.tz_convert('utc') + uts2 = ser2.tz_convert('utc') + expected = uts1 + uts2 + + assert result.index.tz == pytz.UTC + tm.assert_series_equal(result, expected) + + def test_series_add_aware_naive_raises(self): + rng = date_range('1/1/2011', periods=10, freq='H') + ser = Series(np.random.randn(len(rng)), index=rng) + + ser_utc = ser.tz_localize('utc') + + with pytest.raises(Exception): + ser + ser_utc + + with pytest.raises(Exception): + ser_utc + ser + + def test_series_align_aware(self): + idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') + ser = Series(np.random.randn(len(idx1)), index=idx1) + ser_central = ser.tz_convert('US/Central') + # # different timezones convert to UTC + + new1, new2 = ser.align(ser_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_localized_at_time_between_time(self, tzstr): + from datetime import time + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range('4/16/2012', '5/1/2012', freq='H') + ts = Series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + result = ts_local.at_time(time(10, 0)) + expected = ts.at_time(time(10, 0)).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + t1, t2 = time(10, 0), time(11, 0) + result = ts_local.between_time(t1, t2) + expected = ts.between_time(t1, t2).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + @pytest.mark.parametrize('tzstr', ['Europe/Berlin', + 'dateutil/Europe/Berlin']) + def test_getitem_pydatetime_tz(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + index = date_range(start='2012-12-24 16:00', end='2012-12-24 18:00', + freq='H', tz=tzstr) + ts = Series(index=index, data=index.hour) + time_pandas = Timestamp('2012-12-24 17:00', tz=tzstr) + + dt = datetime(2012, 12, 24, 17, 0) + time_datetime = tslib._localize_pydatetime(dt, tz) + assert ts[time_pandas] == ts[time_datetime] diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 2630984a70807..8f46e0a58580e 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -8,7 +8,7 @@ from dateutil.parser import parse from pytz import NonExistentTimeError from distutils.version import LooseVersion -from dateutil.tz import tzlocal, tzoffset +from dateutil.tz import tzlocal from datetime import datetime, timedelta, tzinfo import pandas.util.testing as tm @@ -18,9 +18,9 @@ from pandas.core.indexes.datetimes import bdate_range, date_range from pandas._libs import tslib from pandas._libs.tslibs import timezones, conversion -from pandas import (Index, Series, isna, Timestamp, NaT, +from pandas import (Index, isna, Timestamp, NaT, DatetimeIndex, to_datetime) -from pandas.util.testing import assert_series_equal, set_timezone +from pandas.util.testing import set_timezone class FixedOffset(tzinfo): @@ -142,17 +142,6 @@ def test_tz_localize_dti(self): pytest.raises(pytz.NonExistentTimeError, dti.tz_localize, self.tzstr('US/Eastern')) - def test_tz_localize_empty_series(self): - # #2248 - - ts = Series() - - ts2 = ts.tz_localize('utc') - assert ts2.index.tz == pytz.utc - - ts2 = ts.tz_localize(self.tzstr('US/Eastern')) - assert self.cmptz(ts2.index.tz, self.tz('US/Eastern')) - def test_create_with_tz(self): stamp = Timestamp('3/11/2012 05:00', tz=self.tzstr('US/Eastern')) assert stamp.hour == 5 @@ -455,34 +444,6 @@ def test_ambiguous_nat(self): # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] tm.assert_numpy_array_equal(di_test.values, localized.values) - def test_ambiguous_bool(self): - # make sure that we are correctly accepting bool values as ambiguous - - # gh-14402 - t = Timestamp('2015-11-01 01:00:03') - expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') - expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') - - s = Series([t]) - expected0 = Series([expected0]) - expected1 = Series([expected1]) - - def f(): - s.dt.tz_localize('US/Central') - pytest.raises(pytz.AmbiguousTimeError, f) - - result = s.dt.tz_localize('US/Central', ambiguous=True) - assert_series_equal(result, expected0) - - result = s.dt.tz_localize('US/Central', ambiguous=[True]) - assert_series_equal(result, expected0) - - result = s.dt.tz_localize('US/Central', ambiguous=False) - assert_series_equal(result, expected1) - - result = s.dt.tz_localize('US/Central', ambiguous=[False]) - assert_series_equal(result, expected1) - def test_nonexistent_raise_coerce(self): # See issue 13057 from pytz.exceptions import NonExistentTimeError @@ -565,34 +526,6 @@ def test_index_astype_asobject_tzinfos(self): assert x == exval assert x.tzinfo == exval.tzinfo - def test_localized_at_time_between_time(self): - from datetime import time - - rng = date_range('4/16/2012', '5/1/2012', freq='H') - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_local = ts.tz_localize(self.tzstr('US/Eastern')) - - result = ts_local.at_time(time(10, 0)) - expected = ts.at_time(time(10, 0)).tz_localize(self.tzstr( - 'US/Eastern')) - assert_series_equal(result, expected) - assert self.cmptz(result.index.tz, self.tz('US/Eastern')) - - t1, t2 = time(10, 0), time(11, 0) - result = ts_local.between_time(t1, t2) - expected = ts.between_time(t1, - t2).tz_localize(self.tzstr('US/Eastern')) - assert_series_equal(result, expected) - assert self.cmptz(result.index.tz, self.tz('US/Eastern')) - - def test_string_index_alias_tz_aware(self): - rng = date_range('1/1/2000', periods=10, tz=self.tzstr('US/Eastern')) - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts['1/3/2000'] - tm.assert_almost_equal(result, ts[2]) - def test_fixed_offset(self): dates = [datetime(2000, 1, 1, tzinfo=fixed_off), datetime(2000, 1, 2, tzinfo=fixed_off), @@ -668,15 +601,6 @@ def test_shift_localized(self): result = dr_tz.shift(1, '10T') assert result.tz == dr_tz.tz - def test_tz_aware_asfreq(self): - dr = date_range('2011-12-01', '2012-07-20', freq='D', - tz=self.tzstr('US/Eastern')) - - s = Series(np.random.randn(len(dr)), index=dr) - - # it works! - s.asfreq('T') - def test_static_tzinfo(self): # it works! index = DatetimeIndex([datetime(2012, 1, 1)], tz=self.tzstr('EST')) @@ -709,28 +633,6 @@ def test_convert_datetime_list(self): assert dr.tz == dr2.tz assert dr2.name == 'foo' - def test_dateutil_tzoffset_support(self): - values = [188.5, 328.25] - tzinfo = tzoffset(None, 7200) - index = [datetime(2012, 5, 11, 11, tzinfo=tzinfo), - datetime(2012, 5, 11, 12, tzinfo=tzinfo)] - series = Series(data=values, index=index) - - assert series.index.tz == tzinfo - - # it works! #2443 - repr(series.index[0]) - - def test_getitem_pydatetime_tz(self): - index = date_range(start='2012-12-24 16:00', end='2012-12-24 18:00', - freq='H', tz=self.tzstr('Europe/Berlin')) - ts = Series(index=index, data=index.hour) - time_pandas = Timestamp('2012-12-24 17:00', - tz=self.tzstr('Europe/Berlin')) - time_datetime = self.localize( - self.tz('Europe/Berlin'), datetime(2012, 12, 24, 17, 0)) - assert ts[time_pandas] == ts[time_datetime] - def test_index_drop_dont_lose_tz(self): # #2621 ind = date_range("2012-12-01", periods=10, tz="utc") @@ -1056,33 +958,6 @@ def test_tz_localize_roundtrip(self): tm.assert_index_equal(reset, idx) assert reset.tzinfo is None - def test_series_tz_localize(self): - - rng = date_range('1/1/2011', periods=100, freq='H') - ts = Series(1, index=rng) - - result = ts.tz_localize('utc') - assert result.index.tz.zone == 'UTC' - - # Can't localize if already tz-aware - rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') - ts = Series(1, index=rng) - tm.assert_raises_regex(TypeError, 'Already tz-aware', - ts.tz_localize, 'US/Eastern') - - def test_series_tz_convert(self): - rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') - ts = Series(1, index=rng) - - result = ts.tz_convert('Europe/Berlin') - assert result.index.tz.zone == 'Europe/Berlin' - - # can't convert tz-naive - rng = date_range('1/1/2011', periods=200, freq='D') - ts = Series(1, index=rng) - tm.assert_raises_regex(TypeError, "Cannot convert tz-naive", - ts.tz_convert, 'US/Eastern') - def test_tz_convert_roundtrip(self): for tz in self.timezones: idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M', @@ -1127,12 +1002,6 @@ def test_join_utc_convert(self): def test_join_aware(self): rng = date_range('1/1/2011', periods=10, freq='H') - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_utc = ts.tz_localize('utc') - - pytest.raises(Exception, ts.__add__, ts_utc) - pytest.raises(Exception, ts_utc.__add__, ts) # non-overlapping rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", @@ -1144,127 +1013,6 @@ def test_join_aware(self): result = rng.union(rng2) assert result.tz.zone == 'UTC' - def test_series_align_aware(self): - idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') - ser = Series(np.random.randn(len(idx1)), index=idx1) - ser_central = ser.tz_convert('US/Central') - # # different timezones convert to UTC - - new1, new2 = ser.align(ser_central) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC - - def test_append_aware(self): - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', - tz='US/Eastern') - rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', - tz='US/Eastern') - ts1 = Series([1], index=rng1) - ts2 = Series([2], index=rng2) - ts_result = ts1.append(ts2) - - exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], - tz='US/Eastern') - exp = Series([1, 2], index=exp_index) - assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz - - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC') - rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='UTC') - ts1 = Series([1], index=rng1) - ts2 = Series([2], index=rng2) - ts_result = ts1.append(ts2) - - exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], - tz='UTC') - exp = Series([1, 2], index=exp_index) - assert_series_equal(ts_result, exp) - utc = rng1.tz - assert utc == ts_result.index.tz - - # GH 7795 - # different tz coerces to object dtype, not UTC - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', - tz='US/Eastern') - rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', - tz='US/Central') - ts1 = Series([1], index=rng1) - ts2 = Series([2], index=rng2) - ts_result = ts1.append(ts2) - exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'), - Timestamp('1/1/2011 02:00', tz='US/Central')]) - exp = Series([1, 2], index=exp_index) - assert_series_equal(ts_result, exp) - - def test_append_dst(self): - rng1 = date_range('1/1/2016 01:00', periods=3, freq='H', - tz='US/Eastern') - rng2 = date_range('8/1/2016 01:00', periods=3, freq='H', - tz='US/Eastern') - ts1 = Series([1, 2, 3], index=rng1) - ts2 = Series([10, 11, 12], index=rng2) - ts_result = ts1.append(ts2) - - exp_index = DatetimeIndex(['2016-01-01 01:00', '2016-01-01 02:00', - '2016-01-01 03:00', '2016-08-01 01:00', - '2016-08-01 02:00', '2016-08-01 03:00'], - tz='US/Eastern') - exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) - assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz - - def test_append_aware_naive(self): - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') - rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', - tz='US/Eastern') - ts1 = Series(np.random.randn(len(rng1)), index=rng1) - ts2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ts1.append(ts2) - - assert ts_result.index.equals(ts1.index.astype(object).append( - ts2.index.astype(object))) - - # mixed - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') - rng2 = lrange(100) - ts1 = Series(np.random.randn(len(rng1)), index=rng1) - ts2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ts1.append(ts2) - assert ts_result.index.equals(ts1.index.astype(object).append( - ts2.index)) - - def test_series_add_tz_mismatch_converts_to_utc(self): - rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_moscow = ts.tz_convert('Europe/Moscow') - - result = ts + ts_moscow - assert result.index.tz is pytz.utc - - result = ts_moscow + ts - assert result.index.tz is pytz.utc - - def test_arith_utc_convert(self): - rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') - - perm = np.random.permutation(100)[:90] - ts1 = Series(np.random.randn(90), - index=rng.take(perm).tz_convert('US/Eastern')) - - perm = np.random.permutation(100)[:90] - ts2 = Series(np.random.randn(90), - index=rng.take(perm).tz_convert('Europe/Berlin')) - - result = ts1 + ts2 - - uts1 = ts1.tz_convert('utc') - uts2 = ts2.tz_convert('utc') - expected = uts1 + uts2 - - assert result.index.tz == pytz.UTC - assert_series_equal(result, expected) - def test_intersection(self): rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') From bc1cd6df14fa0f18c78595bf52bb7c8b4be7d3e0 Mon Sep 17 00:00:00 2001 From: Sangwoong Yoon Date: Tue, 6 Feb 2018 23:16:13 +0900 Subject: [PATCH 056/217] DOC/ERR: better error message on no common merge keys (#19427) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/frame.py | 2 +- pandas/core/reshape/merge.py | 7 ++++++- pandas/tests/reshape/merge/test_merge.py | 8 ++++++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ca625f492b61f..54dba831f7216 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -667,6 +667,7 @@ Reshaping - Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) - Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) - Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) +- Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 201d8ba427c8a..3d1983f65d70d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -233,7 +233,7 @@ -------- merge_ordered merge_asof - +DataFrame.join """ # ----------------------------------------------------------------------- diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 3ec78ce52c6e5..9dbb327e3d956 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1021,7 +1021,12 @@ def _validate_specification(self): common_cols = self.left.columns.intersection( self.right.columns) if len(common_cols) == 0: - raise MergeError('No common columns to perform merge on') + raise MergeError( + 'No common columns to perform merge on. ' + 'Merge options: left_on={lon}, right_on={ron}, ' + 'left_index={lidx}, right_index={ridx}' + .format(lon=self.left_on, ron=self.right_on, + lidx=self.left_index, ridx=self.right_index)) if not common_cols.is_unique: raise MergeError("Data columns not unique: {common!r}" .format(common=common_cols)) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index f63c206c0c407..32f83ab972be5 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -270,6 +270,14 @@ def test_no_overlap_more_informative_error(self): df2 = DataFrame({'y': ['b', 'c']}, index=[dt, dt]) pytest.raises(MergeError, merge, df1, df2) + msg = ('No common columns to perform merge on. ' + 'Merge options: left_on={lon}, right_on={ron}, ' + 'left_index={lidx}, right_index={ridx}' + .format(lon=None, ron=None, lidx=False, ridx=False)) + + with tm.assert_raises_regex(MergeError, msg): + merge(df1, df2) + def test_merge_non_unique_indexes(self): dt = datetime(2012, 5, 1) From 325df9f0fa8bd32811f50e2abf4b024e97e3af57 Mon Sep 17 00:00:00 2001 From: miker985 Date: Tue, 6 Feb 2018 06:17:14 -0800 Subject: [PATCH 057/217] BUGFIX - AttributeError raised in StataReader.value_labels() (#19510) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/io/stata.py | 8 +++++--- pandas/tests/io/test_stata.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 54dba831f7216..b5bf7ccbda0b6 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -627,6 +627,7 @@ I/O - Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) - :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) - :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for xls file type (:issue:`19242`, :issue:`9155`) +- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) Plotting ^^^^^^^^ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index adbff06364dbe..ee6975ea1d938 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1341,12 +1341,14 @@ def _null_terminate(self, s): return s def _read_value_labels(self): - if self.format_version <= 108: - # Value labels are not supported in version 108 and earlier. - return if self._value_labels_read: # Don't read twice return + if self.format_version <= 108: + # Value labels are not supported in version 108 and earlier. + self._value_labels_read = True + self.value_label_dict = dict() + return if self.format_version >= 117: self.path_or_buf.seek(self.seek_value_labels) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 89d76061329a3..4e259d0994bdb 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -589,6 +589,16 @@ def test_105(self): df0['psch_dis'] = df0["psch_dis"].astype(np.float32) tm.assert_frame_equal(df.head(3), df0) + def test_value_labels_old_format(self): + # GH 19417 + # + # Test that value_labels() returns an empty dict if the file format + # predates supporting value labels. + dpath = os.path.join(self.dirpath, 'S4_EDUC1.dta') + reader = StataReader(dpath) + assert reader.value_labels() == {} + reader.close() + def test_date_export_formats(self): columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty'] conversions = {c: c for c in columns} From 25c2f08423b764817ec03f4595d4a8f163ae29ab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 6 Feb 2018 15:41:47 -0800 Subject: [PATCH 058/217] separate DatetimeIndex timezone tests (#19545) --- .../indexes/datetimes/test_arithmetic.py | 26 + .../tests/indexes/datetimes/test_timezones.py | 1018 +++++++++++++++++ pandas/tests/tseries/test_timezones.py | 1007 +--------------- 3 files changed, 1047 insertions(+), 1004 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_timezones.py diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 09a6b35a0ff0e..f6f8eccf4e30c 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -412,6 +412,14 @@ def test_dti_shift_no_freq(self): with pytest.raises(NullFrequencyError): dti.shift(2) + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_shift_localized(self, tzstr): + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + dr_tz = dr.tz_localize(tzstr) + + result = dr_tz.shift(1, '10T') + assert result.tz == dr_tz.tz + # ------------------------------------------------------------- # Binary operations DatetimeIndex and timedelta-like @@ -767,6 +775,24 @@ def test_dti_with_offset_series(self, tz, names): res3 = dti - other tm.assert_series_equal(res3, expected_sub) + def test_dti_add_offset_tzaware(self): + dates = date_range('2012-11-01', periods=3, tz='US/Pacific') + offset = dates + pd.offsets.Hour(5) + assert dates[0] + pd.offsets.Hour(5) == offset[0] + + # GH#6818 + for tz in ['UTC', 'US/Pacific', 'Asia/Tokyo']: + dates = date_range('2010-11-01 00:00', periods=3, tz=tz, freq='H') + expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', + '2010-11-01 07:00'], freq='H', tz=tz) + + offset = dates + pd.offsets.Hour(5) + tm.assert_index_equal(offset, expected) + offset = dates + np.timedelta64(5, 'h') + tm.assert_index_equal(offset, expected) + offset = dates + timedelta(hours=5) + tm.assert_index_equal(offset, expected) + @pytest.mark.parametrize('klass,assert_func', [ (Series, tm.assert_series_equal), diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py new file mode 100644 index 0000000000000..075d239df5f7a --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -0,0 +1,1018 @@ +# -*- coding: utf-8 -*- +""" +Tests for DatetimeIndex timezone-related methods +""" +from datetime import datetime, timedelta, tzinfo +from distutils.version import LooseVersion + +import pytest +import pytz +import dateutil +from dateutil.tz import gettz, tzlocal +import numpy as np + +import pandas.util.testing as tm +import pandas.util._test_decorators as td + +import pandas as pd +from pandas._libs import tslib +from pandas._libs.tslibs import timezones +from pandas.compat import lrange, zip +from pandas import (DatetimeIndex, date_range, bdate_range, + Timestamp, isna, to_datetime, Index) + + +class FixedOffset(tzinfo): + """Fixed offset in minutes east from UTC.""" + + def __init__(self, offset, name): + self.__offset = timedelta(minutes=offset) + self.__name = name + + def utcoffset(self, dt): + return self.__offset + + def tzname(self, dt): + return self.__name + + def dst(self, dt): + return timedelta(0) + + +fixed_off = FixedOffset(-420, '-07:00') +fixed_off_no_name = FixedOffset(-330, None) + + +class TestDatetimeIndexTimezones(object): + # ------------------------------------------------------------- + # DatetimeIndex.tz_convert + def test_tz_convert_nat(self): + # GH#5546 + dates = [pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize('US/Pacific') + tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Pacific')) + idx = idx.tz_convert('US/Eastern') + tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Eastern')) + idx = idx.tz_convert('UTC') + tm.assert_index_equal(idx, DatetimeIndex(dates, tz='UTC')) + + dates = ['2010-12-01 00:00', '2010-12-02 00:00', pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize('US/Pacific') + tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Pacific')) + idx = idx.tz_convert('US/Eastern') + expected = ['2010-12-01 03:00', '2010-12-02 03:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + + idx = idx + pd.offsets.Hour(5) + expected = ['2010-12-01 08:00', '2010-12-02 08:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + idx = idx.tz_convert('US/Pacific') + expected = ['2010-12-01 05:00', '2010-12-02 05:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Pacific')) + + idx = idx + np.timedelta64(3, 'h') + expected = ['2010-12-01 08:00', '2010-12-02 08:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Pacific')) + + idx = idx.tz_convert('US/Eastern') + expected = ['2010-12-01 11:00', '2010-12-02 11:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + + @pytest.mark.parametrize('prefix', ['', 'dateutil/']) + def test_dti_tz_convert_compat_timestamp(self, prefix): + strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] + idx = DatetimeIndex(strdates, tz=prefix + 'US/Eastern') + + conv = idx[0].tz_convert(prefix + 'US/Pacific') + expected = idx.tz_convert(prefix + 'US/Pacific')[0] + + assert conv == expected + + def test_dti_tz_convert_hour_overflow_dst(self): + # Regression test for: + # https://github.com/pandas-dev/pandas/issues/13306 + + # sorted case US/Eastern -> UTC + ts = ['2008-05-12 09:50:00', + '2008-12-12 09:50:35', + '2009-05-12 09:50:32'] + tt = DatetimeIndex(ts).tz_localize('US/Eastern') + ut = tt.tz_convert('UTC') + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = ['2008-05-12 13:50:00', + '2008-12-12 14:50:35', + '2009-05-12 13:50:32'] + tt = DatetimeIndex(ts).tz_localize('UTC') + ut = tt.tz_convert('US/Eastern') + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = ['2008-05-12 09:50:00', + '2008-12-12 09:50:35', + '2008-05-12 09:50:32'] + tt = DatetimeIndex(ts).tz_localize('US/Eastern') + ut = tt.tz_convert('UTC') + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = ['2008-05-12 13:50:00', + '2008-12-12 14:50:35', + '2008-05-12 13:50:32'] + tt = DatetimeIndex(ts).tz_localize('UTC') + ut = tt.tz_convert('US/Eastern') + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): + # Regression test for GH#13306 + + # sorted case US/Eastern -> UTC + ts = [Timestamp('2008-05-12 09:50:00', tz=tz), + Timestamp('2008-12-12 09:50:35', tz=tz), + Timestamp('2009-05-12 09:50:32', tz=tz)] + tt = DatetimeIndex(ts) + ut = tt.tz_convert('UTC') + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), + Timestamp('2008-12-12 14:50:35', tz='UTC'), + Timestamp('2009-05-12 13:50:32', tz='UTC')] + tt = DatetimeIndex(ts) + ut = tt.tz_convert('US/Eastern') + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = [Timestamp('2008-05-12 09:50:00', tz=tz), + Timestamp('2008-12-12 09:50:35', tz=tz), + Timestamp('2008-05-12 09:50:32', tz=tz)] + tt = DatetimeIndex(ts) + ut = tt.tz_convert('UTC') + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), + Timestamp('2008-12-12 14:50:35', tz='UTC'), + Timestamp('2008-05-12 13:50:32', tz='UTC')] + tt = DatetimeIndex(ts) + ut = tt.tz_convert('US/Eastern') + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + def test_dti_tz_convert_trans_pos_plus_1__bug(self): + # Regression test for tslib.tz_convert(vals, tz1, tz2). + # See https://github.com/pandas-dev/pandas/issues/4496 for details. + for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: + idx = date_range(datetime(2011, 3, 26, 23), + datetime(2011, 3, 27, 1), freq=freq) + idx = idx.tz_localize('UTC') + idx = idx.tz_convert('Europe/Moscow') + + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + def test_dti_tz_convert_dst(self): + for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: + # Start DST + idx = date_range('2014-03-08 23:00', '2014-03-09 09:00', freq=freq, + tz='UTC') + idx = idx.tz_convert('US/Eastern') + expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, + 0, 1, 3, 4, 5]), + np.array([n, n, n, n, n, n, n, n, n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, + tz='US/Eastern') + idx = idx.tz_convert('UTC') + expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + np.array([n, n, n, n, n, n, n, n, n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + # End DST + idx = date_range('2014-11-01 23:00', '2014-11-02 09:00', freq=freq, + tz='UTC') + idx = idx.tz_convert('US/Eastern') + expected = np.repeat(np.array([19, 20, 21, 22, 23, + 0, 1, 1, 2, 3, 4]), + np.array([n, n, n, n, n, n, n, n, n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range('2014-11-01 18:00', '2014-11-02 05:00', freq=freq, + tz='US/Eastern') + idx = idx.tz_convert('UTC') + expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10]), + np.array([n, n, n, n, n, n, n, n, n, + n, n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + # daily + # Start DST + idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', + tz='UTC') + idx = idx.tz_convert('US/Eastern') + tm.assert_index_equal(idx.hour, Index([19, 19])) + + idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', + tz='US/Eastern') + idx = idx.tz_convert('UTC') + tm.assert_index_equal(idx.hour, Index([5, 5])) + + # End DST + idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', + tz='UTC') + idx = idx.tz_convert('US/Eastern') + tm.assert_index_equal(idx.hour, Index([20, 20])) + + idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', + tz='US/Eastern') + idx = idx.tz_convert('UTC') + tm.assert_index_equal(idx.hour, Index([4, 4])) + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific']) + def test_tz_convert_roundtrip(self, tz): + idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M', + tz='UTC') + exp1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') + + idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D', + tz='UTC') + exp2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') + + idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H', + tz='UTC') + exp3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') + + idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T', + tz='UTC') + exp4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') + + for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), + (idx4, exp4)]: + converted = idx.tz_convert(tz) + reset = converted.tz_convert(None) + tm.assert_index_equal(reset, expected) + assert reset.tzinfo is None + expected = converted.tz_convert('UTC').tz_localize(None) + tm.assert_index_equal(reset, expected) + + def test_dti_tz_convert_tzlocal(self): + # GH#13583 + # tz_convert doesn't affect to internal + dti = date_range(start='2001-01-01', end='2001-03-01', tz='UTC') + dti2 = dti.tz_convert(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + dti = date_range(start='2001-01-01', end='2001-03-01', + tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_convert(None) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern', + pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_convert_utc_to_local_no_modify(self, tz): + rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') + rng_eastern = rng.tz_convert(tz) + + # Values are unmodified + tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) + + assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz)) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_tz_convert_unsorted(self, tzstr): + dr = date_range('2012-03-09', freq='H', periods=100, tz='utc') + dr = dr.tz_convert(tzstr) + + result = dr[::-1].hour + exp = dr.hour[::-1] + tm.assert_almost_equal(result, exp) + + # ------------------------------------------------------------- + # DatetimeIndex.tz_localize + + def test_dti_tz_localize_nonexistent_raise_coerce(self): + # GH#13057 + times = ['2015-03-08 01:00', '2015-03-08 02:00', '2015-03-08 03:00'] + index = DatetimeIndex(times) + tz = 'US/Eastern' + with pytest.raises(pytz.NonExistentTimeError): + index.tz_localize(tz=tz) + + with pytest.raises(pytz.NonExistentTimeError): + index.tz_localize(tz=tz, errors='raise') + + result = index.tz_localize(tz=tz, errors='coerce') + test_times = ['2015-03-08 01:00-05:00', 'NaT', + '2015-03-08 03:00-04:00'] + dti = DatetimeIndex(test_times) + expected = dti.tz_localize('UTC').tz_convert('US/Eastern') + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_ambiguous_infer(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + # With no repeated hours, we cannot infer the transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, + freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError): + dr.tz_localize(tz) + + # With repeated hours, we can infer the transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, + freq=pd.offsets.Hour(), tz=tz) + times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', + '11/06/2011 02:00', '11/06/2011 03:00'] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous='infer') + tm.assert_index_equal(dr, localized) + with tm.assert_produces_warning(FutureWarning): + localized_old = di.tz_localize(tz, infer_dst=True) + tm.assert_index_equal(dr, localized_old) + tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, + ambiguous='infer')) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, + freq=pd.offsets.Hour()) + localized = dr.tz_localize(tz) + localized_infer = dr.tz_localize(tz, ambiguous='infer') + tm.assert_index_equal(localized, localized_infer) + with tm.assert_produces_warning(FutureWarning): + localized_infer_old = dr.tz_localize(tz, infer_dst=True) + tm.assert_index_equal(localized, localized_infer_old) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_ambiguous_times(self, tz): + # March 13, 2011, spring forward, skip from 2 AM to 3 AM + dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, + freq=pd.offsets.Hour()) + with pytest.raises(pytz.NonExistentTimeError): + dr.tz_localize(tz) + + # after dst transition, it works + dr = date_range(datetime(2011, 3, 13, 3, 30), periods=3, + freq=pd.offsets.Hour(), tz=tz) + + # November 6, 2011, fall back, repeat 2 AM hour + dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, + freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError): + dr.tz_localize(tz) + + # UTC is OK + dr = date_range(datetime(2011, 3, 13), periods=48, + freq=pd.offsets.Minute(30), tz=pytz.utc) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_tz_localize_pass_dates_to_utc(self, tzstr): + strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] + + idx = DatetimeIndex(strdates) + conv = idx.tz_localize(tzstr) + + fromdates = DatetimeIndex(strdates, tz=tzstr) + + assert conv.tz == fromdates.tz + tm.assert_numpy_array_equal(conv.values, fromdates.values) + + @pytest.mark.parametrize('prefix', ['', 'dateutil/']) + def test_dti_tz_localize(self, prefix): + tzstr = prefix + 'US/Eastern' + dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256', + freq='L') + dti2 = dti.tz_localize(tzstr) + + dti_utc = DatetimeIndex(start='1/1/2005 05:00', + end='1/1/2005 5:00:30.256', freq='L', tz='utc') + + tm.assert_numpy_array_equal(dti2.values, dti_utc.values) + + dti3 = dti2.tz_convert(prefix + 'US/Pacific') + tm.assert_numpy_array_equal(dti3.values, dti_utc.values) + + dti = DatetimeIndex(start='11/6/2011 1:59', end='11/6/2011 2:00', + freq='L') + with pytest.raises(pytz.AmbiguousTimeError): + dti.tz_localize(tzstr) + + dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00', + freq='L') + with pytest.raises(pytz.NonExistentTimeError): + dti.tz_localize(tzstr) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern', + pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_utc_conversion(self, tz): + # Localizing to time zone should: + # 1) check for DST ambiguities + # 2) convert to UTC + + rng = date_range('3/10/2012', '3/11/2012', freq='30T') + + converted = rng.tz_localize(tz) + expected_naive = rng + pd.offsets.Hour(5) + tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) + + # DST ambiguity, this should fail + rng = date_range('3/11/2012', '3/12/2012', freq='30T') + # Is this really how it should fail?? + with pytest.raises(pytz.NonExistentTimeError): + rng.tz_localize(tz) + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific']) + def test_dti_tz_localize_roundtrip(self, tz): + idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') + idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') + idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') + idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') + for idx in [idx1, idx2, idx3, idx4]: + localized = idx.tz_localize(tz) + expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, + tz=tz) + tm.assert_index_equal(localized, expected) + + with pytest.raises(TypeError): + localized.tz_localize(tz) + + reset = localized.tz_localize(None) + tm.assert_index_equal(reset, idx) + assert reset.tzinfo is None + + def test_dti_tz_localize_naive(self): + rng = date_range('1/1/2011', periods=100, freq='H') + + conv = rng.tz_localize('US/Pacific') + exp = date_range('1/1/2011', periods=100, freq='H', tz='US/Pacific') + + tm.assert_index_equal(conv, exp) + + def test_dti_tz_localize_tzlocal(self): + # GH#13583 + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = int(offset.total_seconds() * 1000000000) + + dti = date_range(start='2001-01-01', end='2001-03-01') + dti2 = dti.tz_localize(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) + + dti = date_range(start='2001-01-01', end='2001-03-01', + tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_localize(None) + tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_ambiguous_nat(self, tz): + times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', + '11/06/2011 02:00', '11/06/2011 03:00'] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous='NaT') + + times = ['11/06/2011 00:00', np.NaN, np.NaN, '11/06/2011 02:00', + '11/06/2011 03:00'] + di_test = DatetimeIndex(times, tz='US/Eastern') + + # left dtype is datetime64[ns, US/Eastern] + # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] + tm.assert_numpy_array_equal(di_test.values, localized.values) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_ambiguous_flags(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + + # Pass in flags to determine right dst transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, + freq=pd.offsets.Hour(), tz=tz) + times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', + '11/06/2011 02:00', '11/06/2011 03:00'] + + # Test tz_localize + di = DatetimeIndex(times) + is_dst = [1, 1, 0, 0, 0] + localized = di.tz_localize(tz, ambiguous=is_dst) + tm.assert_index_equal(dr, localized) + tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, + ambiguous=is_dst)) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) + tm.assert_index_equal(dr, localized) + + localized = di.tz_localize(tz, + ambiguous=np.array(is_dst).astype('bool')) + tm.assert_index_equal(dr, localized) + + # Test constructor + localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) + tm.assert_index_equal(dr, localized) + + # Test duplicate times where infer_dst fails + times += times + di = DatetimeIndex(times) + + # When the sizes are incompatible, make sure error is raised + with pytest.raises(Exception): + di.tz_localize(tz, ambiguous=is_dst) + + # When sizes are compatible and there are repeats ('infer' won't work) + is_dst = np.hstack((is_dst, is_dst)) + localized = di.tz_localize(tz, ambiguous=is_dst) + dr = dr.append(dr) + tm.assert_index_equal(dr, localized) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, + freq=pd.offsets.Hour()) + is_dst = np.array([1] * 10) + localized = dr.tz_localize(tz) + localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) + tm.assert_index_equal(localized, localized_is_dst) + + # TODO: belongs outside tz_localize tests? + @pytest.mark.parametrize('tz', ['Europe/London', 'dateutil/Europe/London']) + def test_dti_construction_ambiguous_endpoint(self, tz): + # construction with an ambiguous end-point + # GH#11626 + + # FIXME: This next block fails to raise; it was taken from an older + # version of this test that had an indention mistake that caused it + # to not get executed. + # with pytest.raises(pytz.AmbiguousTimeError): + # date_range("2013-10-26 23:00", "2013-10-27 01:00", + # tz="Europe/London", freq="H") + + times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H", + tz=tz, ambiguous='infer') + assert times[0] == Timestamp('2013-10-26 23:00', tz=tz, freq="H") + + if str(tz).startswith('dateutil'): + if LooseVersion(dateutil.__version__) < LooseVersion('2.6.0'): + # see GH#14621 + assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', + tz=tz, freq="H") + elif LooseVersion(dateutil.__version__) > LooseVersion('2.6.0'): + # fixed ambiguous behavior + assert times[-1] == Timestamp('2013-10-27 01:00:00+0100', + tz=tz, freq="H") + else: + assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', + tz=tz, freq="H") + + def test_dti_tz_localize_bdate_range(self): + dr = pd.bdate_range('1/1/2009', '1/1/2010') + dr_utc = pd.bdate_range('1/1/2009', '1/1/2010', tz=pytz.utc) + localized = dr.tz_localize(pytz.utc) + tm.assert_index_equal(dr_utc, localized) + + # ------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize_tz(self): + rng = date_range('1/1/2000 9:30', periods=10, freq='D', + tz='US/Eastern') + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D', + tz='US/Eastern') + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz='UTC') + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D', tz='UTC') + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz=tzlocal()) + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D', tz=tzlocal()) + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + @td.skip_if_windows + @pytest.mark.parametrize('timezone', ['US/Pacific', 'US/Eastern', 'UTC', + 'Asia/Kolkata', 'Asia/Shanghai', + 'Australia/Canberra']) + def test_normalize_tz_local(self, timezone): + # GH#13459 + with tm.set_timezone(timezone): + rng = date_range('1/1/2000 9:30', periods=10, freq='D', + tz=tzlocal()) + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D', + tz=tzlocal()) + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + # ------------------------------------------------------------ + # DatetimeIndex.__new__ + + @pytest.mark.parametrize('prefix', ['', 'dateutil/']) + def test_dti_constructor_static_tzinfo(self, prefix): + # it works! + index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + 'EST') + index.hour + index[0] + + def test_dti_constructor_with_fixed_tz(self): + off = FixedOffset(420, '+07:00') + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + rng2 = date_range(start, periods=len(rng), tz=off) + tm.assert_index_equal(rng, rng2) + + rng3 = date_range('3/11/2012 05:00:00+07:00', + '6/11/2012 05:00:00+07:00') + assert (rng.values == rng3.values).all() + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_convert_datetime_list(self, tzstr): + dr = date_range('2012-06-02', periods=10, + tz=tzstr, name='foo') + dr2 = DatetimeIndex(list(dr), name='foo') + tm.assert_index_equal(dr, dr2) + assert dr.tz == dr2.tz + assert dr2.name == 'foo' + + def test_dti_construction_univalent(self): + rng = date_range('03/12/2012 00:00', periods=10, freq='W-FRI', + tz='US/Eastern') + rng2 = DatetimeIndex(data=rng, tz='US/Eastern') + tm.assert_index_equal(rng, rng2) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_from_tzaware_datetime(self, tz): + d = [datetime(2012, 8, 19, tzinfo=tz)] + + index = DatetimeIndex(d) + assert timezones.tz_compare(index.tz, tz) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_tz_constructors(self, tzstr): + """ Test different DatetimeIndex constructions with timezone + Follow-up of GH#4229 + """ + + arr = ['11/10/2005 08:00:00', '11/10/2005 09:00:00'] + + idx1 = to_datetime(arr).tz_localize(tzstr) + idx2 = DatetimeIndex(start="2005-11-10 08:00:00", freq='H', periods=2, + tz=tzstr) + idx3 = DatetimeIndex(arr, tz=tzstr) + idx4 = DatetimeIndex(np.array(arr), tz=tzstr) + + for other in [idx2, idx3, idx4]: + tm.assert_index_equal(idx1, other) + + # ------------------------------------------------------------- + # Unsorted + + def test_join_utc_convert(self): + rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') + + left = rng.tz_convert('US/Eastern') + right = rng.tz_convert('Europe/Berlin') + + for how in ['inner', 'outer', 'left', 'right']: + result = left.join(left[:-5], how=how) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz + + result = left.join(right[:-5], how=how) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == 'UTC' + + def test_dti_drop_dont_lose_tz(self): + # GH#2621 + ind = date_range("2012-12-01", periods=10, tz="utc") + ind = ind.drop(ind[-1]) + + assert ind.tz is not None + + def test_date_range_localize(self): + rng = date_range('3/11/2012 03:00', periods=15, freq='H', + tz='US/Eastern') + rng2 = DatetimeIndex(['3/11/2012 03:00', '3/11/2012 04:00'], + tz='US/Eastern') + rng3 = date_range('3/11/2012 03:00', periods=15, freq='H') + rng3 = rng3.tz_localize('US/Eastern') + + tm.assert_index_equal(rng, rng3) + + # DST transition time + val = rng[0] + exp = Timestamp('3/11/2012 03:00', tz='US/Eastern') + + assert val.hour == 3 + assert exp.hour == 3 + assert val == exp # same UTC value + tm.assert_index_equal(rng[:2], rng2) + + # Right before the DST transition + rng = date_range('3/11/2012 00:00', periods=2, freq='H', + tz='US/Eastern') + rng2 = DatetimeIndex(['3/11/2012 00:00', '3/11/2012 01:00'], + tz='US/Eastern') + tm.assert_index_equal(rng, rng2) + exp = Timestamp('3/11/2012 00:00', tz='US/Eastern') + assert exp.hour == 0 + assert rng[0] == exp + exp = Timestamp('3/11/2012 01:00', tz='US/Eastern') + assert exp.hour == 1 + assert rng[1] == exp + + rng = date_range('3/11/2012 00:00', periods=10, freq='H', + tz='US/Eastern') + assert rng[2].hour == 3 + + def test_timestamp_equality_different_timezones(self): + utc_range = date_range('1/1/2000', periods=20, tz='UTC') + eastern_range = utc_range.tz_convert('US/Eastern') + berlin_range = utc_range.tz_convert('Europe/Berlin') + + for a, b, c in zip(utc_range, eastern_range, berlin_range): + assert a == b + assert b == c + assert a == c + + assert (utc_range == eastern_range).all() + assert (utc_range == berlin_range).all() + assert (berlin_range == eastern_range).all() + + def test_dti_intersection(self): + rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') + + left = rng[10:90][::-1] + right = rng[20:80][::-1] + + assert left.tz == rng.tz + result = left.intersection(right) + assert result.tz == left.tz + + def test_dti_equals_with_tz(self): + left = date_range('1/1/2011', periods=100, freq='H', tz='utc') + right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern') + + assert not left.equals(right) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_tz_nat(self, tzstr): + idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT]) + + assert isna(idx[1]) + assert idx[0].tzinfo is not None + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_astype_asobject_tzinfos(self, tzstr): + # GH#1345 + + # dates around a dst transition + rng = date_range('2/13/2010', '5/6/2010', tz=tzstr) + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_with_timezone_repr(self, tzstr): + rng = date_range('4/13/2010', '5/6/2010') + + rng_eastern = rng.tz_localize(tzstr) + + rng_repr = repr(rng_eastern) + assert '2010-04-13 00:00:00' in rng_repr + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_take_dont_lose_meta(self, tzstr): + rng = date_range('1/1/2000', periods=20, tz=tzstr) + + result = rng.take(lrange(5)) + assert result.tz == rng.tz + assert result.freq == rng.freq + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_utc_box_timestamp_and_localize(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') + rng_eastern = rng.tz_convert(tzstr) + + expected = rng[-1].astimezone(tz) + + stamp = rng_eastern[-1] + assert stamp == expected + assert stamp.tzinfo == expected.tzinfo + + # right tzinfo + rng = date_range('3/13/2012', '3/14/2012', freq='H', tz='utc') + rng_eastern = rng.tz_convert(tzstr) + # test not valid for dateutil timezones. + # assert 'EDT' in repr(rng_eastern[0].tzinfo) + assert ('EDT' in repr(rng_eastern[0].tzinfo) or + 'tzfile' in repr(rng_eastern[0].tzinfo)) + + def test_dti_to_pydatetime(self): + dt = dateutil.parser.parse('2012-06-13T01:39:00Z') + dt = dt.replace(tzinfo=tzlocal()) + + arr = np.array([dt], dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + rng = date_range('2012-11-03 03:00', '2012-11-05 03:00', tz=tzlocal()) + arr = rng.to_pydatetime() + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_dti_to_pydatetime_fizedtz(self): + dates = np.array([datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off)]) + dti = DatetimeIndex(dates) + + result = dti.to_pydatetime() + tm.assert_numpy_array_equal(dates, result) + + result = dti._mpl_repr() + tm.assert_numpy_array_equal(dates, result) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Central'), + gettz('US/Central')]) + def test_with_tz(self, tz): + # just want it to work + start = datetime(2011, 3, 12, tzinfo=pytz.utc) + dr = bdate_range(start, periods=50, freq=pd.offsets.Hour()) + assert dr.tz is pytz.utc + + # DateRange with naive datetimes + dr = bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc) + dr = bdate_range('1/1/2005', '1/1/2009', tz=tz) + + # normalized + central = dr.tz_convert(tz) + assert central.tz is tz + naive = central[0].to_pydatetime().replace(tzinfo=None) + comp = tslib._localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # compare vs a localized tz + naive = dr[0].to_pydatetime().replace(tzinfo=None) + comp = tslib._localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # datetimes with tzinfo set + dr = bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), + datetime(2009, 1, 1, tzinfo=pytz.utc)) + with pytest.raises(Exception): + bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), '1/1/2009', + tz=tz) + + @pytest.mark.parametrize('prefix', ['', 'dateutil/']) + def test_field_access_localize(self, prefix): + strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] + rng = DatetimeIndex(strdates, tz=prefix + 'US/Eastern') + assert (rng.hour == 0).all() + + # a more unusual time zone, #1946 + dr = date_range('2011-10-02 00:00', freq='h', periods=10, + tz=prefix + 'America/Atikokan') + + expected = Index(np.arange(10, dtype=np.int64)) + tm.assert_index_equal(dr.hour, expected) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_convert_tz_aware_datetime_datetime(self, tz): + # GH#1581 + dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), + datetime(2000, 1, 3)] + + dates_aware = [tslib._localize_pydatetime(x, tz) for x in dates] + result = DatetimeIndex(dates_aware) + assert timezones.tz_compare(result.tz, tz) + + converted = to_datetime(dates_aware, utc=True) + ex_vals = np.array([Timestamp(x).value for x in dates_aware]) + tm.assert_numpy_array_equal(converted.asi8, ex_vals) + assert converted.tz is pytz.utc + + def test_dti_union_aware(self): + # non-overlapping + rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", + tz="US/Central") + + rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", + tz="US/Eastern") + + result = rng.union(rng2) + assert result.tz.zone == 'UTC' + + +class TestDateRange(object): + """Tests for date_range with timezones""" + def test_hongkong_tz_convert(self): + # GH#1673 smoke test + dr = date_range('2012-01-01', '2012-01-10', freq='D', tz='Hongkong') + + # it works! + dr.hour + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_date_range_span_dst_transition(self, tzstr): + # GH#1778 + + # Standard -> Daylight Savings Time + dr = date_range('03/06/2012 00:00', periods=200, freq='W-FRI', + tz='US/Eastern') + + assert (dr.hour == 0).all() + + dr = date_range('2012-11-02', periods=10, tz=tzstr) + assert (dr.hour == 0).all() + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_date_range_timezone_str_argument(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + result = date_range('1/1/2000', periods=10, tz=tzstr) + expected = date_range('1/1/2000', periods=10, tz=tz) + + tm.assert_index_equal(result, expected) + + def test_date_range_with_fixedoffset_noname(self): + off = fixed_off_no_name + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + idx = Index([start, end]) + assert off == idx.tz + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_date_range_with_tz(self, tzstr): + stamp = Timestamp('3/11/2012 05:00', tz=tzstr) + assert stamp.hour == 5 + + rng = date_range('3/11/2012 04:00', periods=10, freq='H', + tz=tzstr) + + assert stamp == rng[1] + + +class TestToDatetime(object): + """Tests for the to_datetime constructor with timezones""" + def test_to_datetime_utc(self): + arr = np.array([dateutil.parser.parse('2012-06-13T01:39:00Z')], + dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_to_datetime_fixed_offset(self): + dates = [datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off)] + result = to_datetime(dates) + assert result.tz == fixed_off diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 8f46e0a58580e..565e735c14c80 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -5,43 +5,13 @@ import dateutil import numpy as np -from dateutil.parser import parse -from pytz import NonExistentTimeError -from distutils.version import LooseVersion -from dateutil.tz import tzlocal -from datetime import datetime, timedelta, tzinfo +from datetime import datetime import pandas.util.testing as tm -import pandas.util._test_decorators as td -import pandas.tseries.offsets as offsets -from pandas.compat import lrange, zip -from pandas.core.indexes.datetimes import bdate_range, date_range +from pandas.core.indexes.datetimes import date_range from pandas._libs import tslib from pandas._libs.tslibs import timezones, conversion -from pandas import (Index, isna, Timestamp, NaT, - DatetimeIndex, to_datetime) -from pandas.util.testing import set_timezone - - -class FixedOffset(tzinfo): - """Fixed offset in minutes east from UTC.""" - - def __init__(self, offset, name): - self.__offset = timedelta(minutes=offset) - self.__name = name - - def utcoffset(self, dt): - return self.__offset - - def tzname(self, dt): - return self.__name - - def dst(self, dt): - return timedelta(0) - - -fixed_off = FixedOffset(-420, '-07:00') -fixed_off_no_name = FixedOffset(-330, None) +from pandas import Timestamp class TestTimeZoneSupportPytz(object): @@ -68,399 +38,6 @@ def cmptz(self, tz1, tz2): # tests. return tz1.zone == tz2.zone - def test_utc_to_local_no_modify(self): - rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') - rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) - - # Values are unmodified - tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) - - assert self.cmptz(rng_eastern.tz, self.tz('US/Eastern')) - - def test_utc_to_local_no_modify_explicit(self): - rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') - rng_eastern = rng.tz_convert(self.tz('US/Eastern')) - - # Values are unmodified - tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) - - assert rng_eastern.tz == self.tz('US/Eastern') - - def test_localize_utc_conversion(self): - # Localizing to time zone should: - # 1) check for DST ambiguities - # 2) convert to UTC - - rng = date_range('3/10/2012', '3/11/2012', freq='30T') - - converted = rng.tz_localize(self.tzstr('US/Eastern')) - expected_naive = rng + offsets.Hour(5) - tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) - - # DST ambiguity, this should fail - rng = date_range('3/11/2012', '3/12/2012', freq='30T') - # Is this really how it should fail?? - pytest.raises(NonExistentTimeError, rng.tz_localize, - self.tzstr('US/Eastern')) - - def test_localize_utc_conversion_explicit(self): - # Localizing to time zone should: - # 1) check for DST ambiguities - # 2) convert to UTC - - rng = date_range('3/10/2012', '3/11/2012', freq='30T') - converted = rng.tz_localize(self.tz('US/Eastern')) - expected_naive = rng + offsets.Hour(5) - tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) - - # DST ambiguity, this should fail - rng = date_range('3/11/2012', '3/12/2012', freq='30T') - # Is this really how it should fail?? - pytest.raises(NonExistentTimeError, rng.tz_localize, - self.tz('US/Eastern')) - - def test_tz_localize_dti(self): - dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256', - freq='L') - dti2 = dti.tz_localize(self.tzstr('US/Eastern')) - - dti_utc = DatetimeIndex(start='1/1/2005 05:00', - end='1/1/2005 5:00:30.256', freq='L', tz='utc') - - tm.assert_numpy_array_equal(dti2.values, dti_utc.values) - - dti3 = dti2.tz_convert(self.tzstr('US/Pacific')) - tm.assert_numpy_array_equal(dti3.values, dti_utc.values) - - dti = DatetimeIndex(start='11/6/2011 1:59', end='11/6/2011 2:00', - freq='L') - pytest.raises(pytz.AmbiguousTimeError, dti.tz_localize, - self.tzstr('US/Eastern')) - - dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00', - freq='L') - pytest.raises(pytz.NonExistentTimeError, dti.tz_localize, - self.tzstr('US/Eastern')) - - def test_create_with_tz(self): - stamp = Timestamp('3/11/2012 05:00', tz=self.tzstr('US/Eastern')) - assert stamp.hour == 5 - - rng = date_range('3/11/2012 04:00', periods=10, freq='H', - tz=self.tzstr('US/Eastern')) - - assert stamp == rng[1] - - def test_create_with_fixed_tz(self): - off = FixedOffset(420, '+07:00') - start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) - end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) - rng = date_range(start=start, end=end) - assert off == rng.tz - - rng2 = date_range(start, periods=len(rng), tz=off) - tm.assert_index_equal(rng, rng2) - - rng3 = date_range('3/11/2012 05:00:00+07:00', - '6/11/2012 05:00:00+07:00') - assert (rng.values == rng3.values).all() - - def test_create_with_fixedoffset_noname(self): - off = fixed_off_no_name - start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) - end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) - rng = date_range(start=start, end=end) - assert off == rng.tz - - idx = Index([start, end]) - assert off == idx.tz - - def test_date_range_localize(self): - rng = date_range('3/11/2012 03:00', periods=15, freq='H', - tz='US/Eastern') - rng2 = DatetimeIndex(['3/11/2012 03:00', '3/11/2012 04:00'], - tz='US/Eastern') - rng3 = date_range('3/11/2012 03:00', periods=15, freq='H') - rng3 = rng3.tz_localize('US/Eastern') - - tm.assert_index_equal(rng, rng3) - - # DST transition time - val = rng[0] - exp = Timestamp('3/11/2012 03:00', tz='US/Eastern') - - assert val.hour == 3 - assert exp.hour == 3 - assert val == exp # same UTC value - tm.assert_index_equal(rng[:2], rng2) - - # Right before the DST transition - rng = date_range('3/11/2012 00:00', periods=2, freq='H', - tz='US/Eastern') - rng2 = DatetimeIndex(['3/11/2012 00:00', '3/11/2012 01:00'], - tz='US/Eastern') - tm.assert_index_equal(rng, rng2) - exp = Timestamp('3/11/2012 00:00', tz='US/Eastern') - assert exp.hour == 0 - assert rng[0] == exp - exp = Timestamp('3/11/2012 01:00', tz='US/Eastern') - assert exp.hour == 1 - assert rng[1] == exp - - rng = date_range('3/11/2012 00:00', periods=10, freq='H', - tz='US/Eastern') - assert rng[2].hour == 3 - - def test_utc_box_timestamp_and_localize(self): - rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') - rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) - - tz = self.tz('US/Eastern') - expected = rng[-1].astimezone(tz) - - stamp = rng_eastern[-1] - assert stamp == expected - assert stamp.tzinfo == expected.tzinfo - - # right tzinfo - rng = date_range('3/13/2012', '3/14/2012', freq='H', tz='utc') - rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) - # test not valid for dateutil timezones. - # assert 'EDT' in repr(rng_eastern[0].tzinfo) - assert ('EDT' in repr(rng_eastern[0].tzinfo) or - 'tzfile' in repr(rng_eastern[0].tzinfo)) - - def test_timestamp_tz_convert(self): - strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] - idx = DatetimeIndex(strdates, tz=self.tzstr('US/Eastern')) - - conv = idx[0].tz_convert(self.tzstr('US/Pacific')) - expected = idx.tz_convert(self.tzstr('US/Pacific'))[0] - - assert conv == expected - - def test_pass_dates_localize_to_utc(self): - strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] - - idx = DatetimeIndex(strdates) - conv = idx.tz_localize(self.tzstr('US/Eastern')) - - fromdates = DatetimeIndex(strdates, tz=self.tzstr('US/Eastern')) - - assert conv.tz == fromdates.tz - tm.assert_numpy_array_equal(conv.values, fromdates.values) - - def test_field_access_localize(self): - strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] - rng = DatetimeIndex(strdates, tz=self.tzstr('US/Eastern')) - assert (rng.hour == 0).all() - - # a more unusual time zone, #1946 - dr = date_range('2011-10-02 00:00', freq='h', periods=10, - tz=self.tzstr('America/Atikokan')) - - expected = Index(np.arange(10, dtype=np.int64)) - tm.assert_index_equal(dr.hour, expected) - - def test_with_tz(self): - tz = self.tz('US/Central') - - # just want it to work - start = datetime(2011, 3, 12, tzinfo=pytz.utc) - dr = bdate_range(start, periods=50, freq=offsets.Hour()) - assert dr.tz is pytz.utc - - # DateRange with naive datetimes - dr = bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc) - dr = bdate_range('1/1/2005', '1/1/2009', tz=tz) - - # normalized - central = dr.tz_convert(tz) - assert central.tz is tz - comp = self.localize(tz, central[0].to_pydatetime().replace( - tzinfo=None)).tzinfo - assert central[0].tz is comp - - # compare vs a localized tz - comp = self.localize(tz, - dr[0].to_pydatetime().replace(tzinfo=None)).tzinfo - assert central[0].tz is comp - - # datetimes with tzinfo set - dr = bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), - datetime(2009, 1, 1, tzinfo=pytz.utc)) - - pytest.raises(Exception, bdate_range, - datetime(2005, 1, 1, tzinfo=pytz.utc), '1/1/2009', - tz=tz) - - def test_tz_localize(self): - dr = bdate_range('1/1/2009', '1/1/2010') - dr_utc = bdate_range('1/1/2009', '1/1/2010', tz=pytz.utc) - localized = dr.tz_localize(pytz.utc) - tm.assert_index_equal(dr_utc, localized) - - def test_with_tz_ambiguous_times(self): - tz = self.tz('US/Eastern') - - # March 13, 2011, spring forward, skip from 2 AM to 3 AM - dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, - freq=offsets.Hour()) - pytest.raises(pytz.NonExistentTimeError, dr.tz_localize, tz) - - # after dst transition, it works - dr = date_range(datetime(2011, 3, 13, 3, 30), periods=3, - freq=offsets.Hour(), tz=tz) - - # November 6, 2011, fall back, repeat 2 AM hour - dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, - freq=offsets.Hour()) - pytest.raises(pytz.AmbiguousTimeError, dr.tz_localize, tz) - - # UTC is OK - dr = date_range(datetime(2011, 3, 13), periods=48, - freq=offsets.Minute(30), tz=pytz.utc) - - def test_ambiguous_infer(self): - # November 6, 2011, fall back, repeat 2 AM hour - # With no repeated hours, we cannot infer the transition - tz = self.tz('US/Eastern') - dr = date_range(datetime(2011, 11, 6, 0), periods=5, - freq=offsets.Hour()) - pytest.raises(pytz.AmbiguousTimeError, dr.tz_localize, tz) - - # With repeated hours, we can infer the transition - dr = date_range(datetime(2011, 11, 6, 0), periods=5, - freq=offsets.Hour(), tz=tz) - times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', - '11/06/2011 02:00', '11/06/2011 03:00'] - di = DatetimeIndex(times) - localized = di.tz_localize(tz, ambiguous='infer') - tm.assert_index_equal(dr, localized) - with tm.assert_produces_warning(FutureWarning): - localized_old = di.tz_localize(tz, infer_dst=True) - tm.assert_index_equal(dr, localized_old) - tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, - ambiguous='infer')) - - # When there is no dst transition, nothing special happens - dr = date_range(datetime(2011, 6, 1, 0), periods=10, - freq=offsets.Hour()) - localized = dr.tz_localize(tz) - localized_infer = dr.tz_localize(tz, ambiguous='infer') - tm.assert_index_equal(localized, localized_infer) - with tm.assert_produces_warning(FutureWarning): - localized_infer_old = dr.tz_localize(tz, infer_dst=True) - tm.assert_index_equal(localized, localized_infer_old) - - def test_ambiguous_flags(self): - # November 6, 2011, fall back, repeat 2 AM hour - tz = self.tz('US/Eastern') - - # Pass in flags to determine right dst transition - dr = date_range(datetime(2011, 11, 6, 0), periods=5, - freq=offsets.Hour(), tz=tz) - times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', - '11/06/2011 02:00', '11/06/2011 03:00'] - - # Test tz_localize - di = DatetimeIndex(times) - is_dst = [1, 1, 0, 0, 0] - localized = di.tz_localize(tz, ambiguous=is_dst) - tm.assert_index_equal(dr, localized) - tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, - ambiguous=is_dst)) - - localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) - tm.assert_index_equal(dr, localized) - - localized = di.tz_localize(tz, - ambiguous=np.array(is_dst).astype('bool')) - tm.assert_index_equal(dr, localized) - - # Test constructor - localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) - tm.assert_index_equal(dr, localized) - - # Test duplicate times where infer_dst fails - times += times - di = DatetimeIndex(times) - - # When the sizes are incompatible, make sure error is raised - pytest.raises(Exception, di.tz_localize, tz, ambiguous=is_dst) - - # When sizes are compatible and there are repeats ('infer' won't work) - is_dst = np.hstack((is_dst, is_dst)) - localized = di.tz_localize(tz, ambiguous=is_dst) - dr = dr.append(dr) - tm.assert_index_equal(dr, localized) - - # When there is no dst transition, nothing special happens - dr = date_range(datetime(2011, 6, 1, 0), periods=10, - freq=offsets.Hour()) - is_dst = np.array([1] * 10) - localized = dr.tz_localize(tz) - localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) - tm.assert_index_equal(localized, localized_is_dst) - - # construction with an ambiguous end-point - # GH 11626 - tz = self.tzstr("Europe/London") - - def f(): - date_range("2013-10-26 23:00", "2013-10-27 01:00", - tz="Europe/London", freq="H") - pytest.raises(pytz.AmbiguousTimeError, f) - - times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H", - tz=tz, ambiguous='infer') - assert times[0] == Timestamp('2013-10-26 23:00', tz=tz, freq="H") - - if str(tz).startswith('dateutil'): - if LooseVersion(dateutil.__version__) < LooseVersion('2.6.0'): - # see gh-14621 - assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', - tz=tz, freq="H") - elif LooseVersion(dateutil.__version__) > LooseVersion('2.6.0'): - # fixed ambiguous behavior - assert times[-1] == Timestamp('2013-10-27 01:00:00+0100', - tz=tz, freq="H") - else: - assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', - tz=tz, freq="H") - - def test_ambiguous_nat(self): - tz = self.tz('US/Eastern') - times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', - '11/06/2011 02:00', '11/06/2011 03:00'] - di = DatetimeIndex(times) - localized = di.tz_localize(tz, ambiguous='NaT') - - times = ['11/06/2011 00:00', np.NaN, np.NaN, '11/06/2011 02:00', - '11/06/2011 03:00'] - di_test = DatetimeIndex(times, tz='US/Eastern') - - # left dtype is datetime64[ns, US/Eastern] - # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] - tm.assert_numpy_array_equal(di_test.values, localized.values) - - def test_nonexistent_raise_coerce(self): - # See issue 13057 - from pytz.exceptions import NonExistentTimeError - times = ['2015-03-08 01:00', '2015-03-08 02:00', '2015-03-08 03:00'] - index = DatetimeIndex(times) - tz = 'US/Eastern' - pytest.raises(NonExistentTimeError, - index.tz_localize, tz=tz) - pytest.raises(NonExistentTimeError, - index.tz_localize, tz=tz, errors='raise') - result = index.tz_localize(tz=tz, errors='coerce') - test_times = ['2015-03-08 01:00-05:00', 'NaT', - '2015-03-08 03:00-04:00'] - expected = DatetimeIndex(test_times)\ - .tz_localize('UTC').tz_convert('US/Eastern') - tm.assert_index_equal(result, expected) - # test utility methods def test_infer_tz(self): eastern = self.tz('US/Eastern') @@ -486,183 +63,6 @@ def test_infer_tz(self): pytest.raises(Exception, timezones.infer_tzinfo, start, end) pytest.raises(Exception, timezones.infer_tzinfo, end, start) - def test_tz_string(self): - result = date_range('1/1/2000', periods=10, - tz=self.tzstr('US/Eastern')) - expected = date_range('1/1/2000', periods=10, tz=self.tz('US/Eastern')) - - tm.assert_index_equal(result, expected) - - def test_take_dont_lose_meta(self): - rng = date_range('1/1/2000', periods=20, tz=self.tzstr('US/Eastern')) - - result = rng.take(lrange(5)) - assert result.tz == rng.tz - assert result.freq == rng.freq - - def test_index_with_timezone_repr(self): - rng = date_range('4/13/2010', '5/6/2010') - - rng_eastern = rng.tz_localize(self.tzstr('US/Eastern')) - - rng_repr = repr(rng_eastern) - assert '2010-04-13 00:00:00' in rng_repr - - def test_index_astype_asobject_tzinfos(self): - # #1345 - - # dates around a dst transition - rng = date_range('2/13/2010', '5/6/2010', tz=self.tzstr('US/Eastern')) - - objs = rng.astype(object) - for i, x in enumerate(objs): - exval = rng[i] - assert x == exval - assert x.tzinfo == exval.tzinfo - - objs = rng.astype(object) - for i, x in enumerate(objs): - exval = rng[i] - assert x == exval - assert x.tzinfo == exval.tzinfo - - def test_fixed_offset(self): - dates = [datetime(2000, 1, 1, tzinfo=fixed_off), - datetime(2000, 1, 2, tzinfo=fixed_off), - datetime(2000, 1, 3, tzinfo=fixed_off)] - result = to_datetime(dates) - assert result.tz == fixed_off - - def test_fixedtz_topydatetime(self): - dates = np.array([datetime(2000, 1, 1, tzinfo=fixed_off), - datetime(2000, 1, 2, tzinfo=fixed_off), - datetime(2000, 1, 3, tzinfo=fixed_off)]) - result = to_datetime(dates).to_pydatetime() - tm.assert_numpy_array_equal(dates, result) - result = to_datetime(dates)._mpl_repr() - tm.assert_numpy_array_equal(dates, result) - - def test_convert_tz_aware_datetime_datetime(self): - # #1581 - - tz = self.tz('US/Eastern') - - dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), - datetime(2000, 1, 3)] - - dates_aware = [self.localize(tz, x) for x in dates] - result = to_datetime(dates_aware) - assert self.cmptz(result.tz, self.tz('US/Eastern')) - - converted = to_datetime(dates_aware, utc=True) - ex_vals = np.array([Timestamp(x).value for x in dates_aware]) - tm.assert_numpy_array_equal(converted.asi8, ex_vals) - assert converted.tz is pytz.utc - - def test_to_datetime_utc(self): - arr = np.array([parse('2012-06-13T01:39:00Z')], dtype=object) - - result = to_datetime(arr, utc=True) - assert result.tz is pytz.utc - - def test_to_datetime_tzlocal(self): - dt = parse('2012-06-13T01:39:00Z') - dt = dt.replace(tzinfo=tzlocal()) - - arr = np.array([dt], dtype=object) - - result = to_datetime(arr, utc=True) - assert result.tz is pytz.utc - - rng = date_range('2012-11-03 03:00', '2012-11-05 03:00', tz=tzlocal()) - arr = rng.to_pydatetime() - result = to_datetime(arr, utc=True) - assert result.tz is pytz.utc - - def test_hongkong_tz_convert(self): - # #1673 - dr = date_range('2012-01-01', '2012-01-10', freq='D', tz='Hongkong') - - # it works! - dr.hour - - def test_tz_convert_unsorted(self): - dr = date_range('2012-03-09', freq='H', periods=100, tz='utc') - dr = dr.tz_convert(self.tzstr('US/Eastern')) - - result = dr[::-1].hour - exp = dr.hour[::-1] - tm.assert_almost_equal(result, exp) - - def test_shift_localized(self): - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - dr_tz = dr.tz_localize(self.tzstr('US/Eastern')) - - result = dr_tz.shift(1, '10T') - assert result.tz == dr_tz.tz - - def test_static_tzinfo(self): - # it works! - index = DatetimeIndex([datetime(2012, 1, 1)], tz=self.tzstr('EST')) - index.hour - index[0] - - def test_tzaware_datetime_to_index(self): - d = [datetime(2012, 8, 19, tzinfo=self.tz('US/Eastern'))] - - index = DatetimeIndex(d) - assert self.cmptz(index.tz, self.tz('US/Eastern')) - - def test_date_range_span_dst_transition(self): - # #1778 - - # Standard -> Daylight Savings Time - dr = date_range('03/06/2012 00:00', periods=200, freq='W-FRI', - tz='US/Eastern') - - assert (dr.hour == 0).all() - - dr = date_range('2012-11-02', periods=10, tz=self.tzstr('US/Eastern')) - assert (dr.hour == 0).all() - - def test_convert_datetime_list(self): - dr = date_range('2012-06-02', periods=10, - tz=self.tzstr('US/Eastern'), name='foo') - dr2 = DatetimeIndex(list(dr), name='foo') - tm.assert_index_equal(dr, dr2) - assert dr.tz == dr2.tz - assert dr2.name == 'foo' - - def test_index_drop_dont_lose_tz(self): - # #2621 - ind = date_range("2012-12-01", periods=10, tz="utc") - ind = ind.drop(ind[-1]) - - assert ind.tz is not None - - def test_datetimeindex_tz(self): - """ Test different DatetimeIndex constructions with timezone - Follow-up of #4229 - """ - - arr = ['11/10/2005 08:00:00', '11/10/2005 09:00:00'] - - idx1 = to_datetime(arr).tz_localize(self.tzstr('US/Eastern')) - idx2 = DatetimeIndex(start="2005-11-10 08:00:00", freq='H', periods=2, - tz=self.tzstr('US/Eastern')) - idx3 = DatetimeIndex(arr, tz=self.tzstr('US/Eastern')) - idx4 = DatetimeIndex(np.array(arr), tz=self.tzstr('US/Eastern')) - - for other in [idx2, idx3, idx4]: - tm.assert_index_equal(idx1, other) - - def test_datetimeindex_tz_nat(self): - idx = to_datetime([Timestamp("2013-1-1", tz=self.tzstr('US/Eastern')), - NaT]) - - assert isna(idx[1]) - assert idx[0].tzinfo is not None - def test_replace_across_dst(self): # GH#18319 check that 1) timezone is correctly normalized and # 2) that hour is not incorrectly changed by this normalization @@ -712,159 +112,6 @@ def normalize(self, ts): # no-op for dateutil return ts - def test_tz_convert_hour_overflow_dst(self): - # Regression test for: - # https://github.com/pandas-dev/pandas/issues/13306 - - # sorted case US/Eastern -> UTC - ts = ['2008-05-12 09:50:00', - '2008-12-12 09:50:35', - '2009-05-12 09:50:32'] - tt = to_datetime(ts).tz_localize('US/Eastern') - ut = tt.tz_convert('UTC') - expected = Index([13, 14, 13]) - tm.assert_index_equal(ut.hour, expected) - - # sorted case UTC -> US/Eastern - ts = ['2008-05-12 13:50:00', - '2008-12-12 14:50:35', - '2009-05-12 13:50:32'] - tt = to_datetime(ts).tz_localize('UTC') - ut = tt.tz_convert('US/Eastern') - expected = Index([9, 9, 9]) - tm.assert_index_equal(ut.hour, expected) - - # unsorted case US/Eastern -> UTC - ts = ['2008-05-12 09:50:00', - '2008-12-12 09:50:35', - '2008-05-12 09:50:32'] - tt = to_datetime(ts).tz_localize('US/Eastern') - ut = tt.tz_convert('UTC') - expected = Index([13, 14, 13]) - tm.assert_index_equal(ut.hour, expected) - - # unsorted case UTC -> US/Eastern - ts = ['2008-05-12 13:50:00', - '2008-12-12 14:50:35', - '2008-05-12 13:50:32'] - tt = to_datetime(ts).tz_localize('UTC') - ut = tt.tz_convert('US/Eastern') - expected = Index([9, 9, 9]) - tm.assert_index_equal(ut.hour, expected) - - def test_tz_convert_hour_overflow_dst_timestamps(self): - # Regression test for: - # https://github.com/pandas-dev/pandas/issues/13306 - - tz = self.tzstr('US/Eastern') - - # sorted case US/Eastern -> UTC - ts = [Timestamp('2008-05-12 09:50:00', tz=tz), - Timestamp('2008-12-12 09:50:35', tz=tz), - Timestamp('2009-05-12 09:50:32', tz=tz)] - tt = to_datetime(ts) - ut = tt.tz_convert('UTC') - expected = Index([13, 14, 13]) - tm.assert_index_equal(ut.hour, expected) - - # sorted case UTC -> US/Eastern - ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), - Timestamp('2008-12-12 14:50:35', tz='UTC'), - Timestamp('2009-05-12 13:50:32', tz='UTC')] - tt = to_datetime(ts) - ut = tt.tz_convert('US/Eastern') - expected = Index([9, 9, 9]) - tm.assert_index_equal(ut.hour, expected) - - # unsorted case US/Eastern -> UTC - ts = [Timestamp('2008-05-12 09:50:00', tz=tz), - Timestamp('2008-12-12 09:50:35', tz=tz), - Timestamp('2008-05-12 09:50:32', tz=tz)] - tt = to_datetime(ts) - ut = tt.tz_convert('UTC') - expected = Index([13, 14, 13]) - tm.assert_index_equal(ut.hour, expected) - - # unsorted case UTC -> US/Eastern - ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), - Timestamp('2008-12-12 14:50:35', tz='UTC'), - Timestamp('2008-05-12 13:50:32', tz='UTC')] - tt = to_datetime(ts) - ut = tt.tz_convert('US/Eastern') - expected = Index([9, 9, 9]) - tm.assert_index_equal(ut.hour, expected) - - def test_tslib_tz_convert_trans_pos_plus_1__bug(self): - # Regression test for tslib.tz_convert(vals, tz1, tz2). - # See https://github.com/pandas-dev/pandas/issues/4496 for details. - for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: - idx = date_range(datetime(2011, 3, 26, 23), - datetime(2011, 3, 27, 1), freq=freq) - idx = idx.tz_localize('UTC') - idx = idx.tz_convert('Europe/Moscow') - - expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) - - def test_tslib_tz_convert_dst(self): - for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: - # Start DST - idx = date_range('2014-03-08 23:00', '2014-03-09 09:00', freq=freq, - tz='UTC') - idx = idx.tz_convert('US/Eastern') - expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, - 0, 1, 3, 4, 5]), - np.array([n, n, n, n, n, n, n, n, n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) - - idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, - tz='US/Eastern') - idx = idx.tz_convert('UTC') - expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - np.array([n, n, n, n, n, n, n, n, n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) - - # End DST - idx = date_range('2014-11-01 23:00', '2014-11-02 09:00', freq=freq, - tz='UTC') - idx = idx.tz_convert('US/Eastern') - expected = np.repeat(np.array([19, 20, 21, 22, 23, - 0, 1, 1, 2, 3, 4]), - np.array([n, n, n, n, n, n, n, n, n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) - - idx = date_range('2014-11-01 18:00', '2014-11-02 05:00', freq=freq, - tz='US/Eastern') - idx = idx.tz_convert('UTC') - expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10]), - np.array([n, n, n, n, n, n, n, n, n, - n, n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) - - # daily - # Start DST - idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', - tz='UTC') - idx = idx.tz_convert('US/Eastern') - tm.assert_index_equal(idx.hour, Index([19, 19])) - - idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', - tz='US/Eastern') - idx = idx.tz_convert('UTC') - tm.assert_index_equal(idx.hour, Index([5, 5])) - - # End DST - idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', - tz='UTC') - idx = idx.tz_convert('US/Eastern') - tm.assert_index_equal(idx.hour, Index([20, 20])) - - idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', - tz='US/Eastern') - idx = idx.tz_convert('UTC') - tm.assert_index_equal(idx.hour, Index([4, 4])) - def test_tzlocal(self): # GH 13583 ts = Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()) @@ -879,32 +126,6 @@ def test_tzlocal(self): offset = offset.total_seconds() * 1000000000 assert ts.value + offset == Timestamp('2011-01-01').value - def test_tz_localize_tzlocal(self): - # GH 13583 - offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) - offset = int(offset.total_seconds() * 1000000000) - - dti = date_range(start='2001-01-01', end='2001-03-01') - dti2 = dti.tz_localize(dateutil.tz.tzlocal()) - tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) - - dti = date_range(start='2001-01-01', end='2001-03-01', - tz=dateutil.tz.tzlocal()) - dti2 = dti.tz_localize(None) - tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) - - def test_tz_convert_tzlocal(self): - # GH 13583 - # tz_convert doesn't affect to internal - dti = date_range(start='2001-01-01', end='2001-03-01', tz='UTC') - dti2 = dti.tz_convert(dateutil.tz.tzlocal()) - tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) - - dti = date_range(start='2001-01-01', end='2001-03-01', - tz=dateutil.tz.tzlocal()) - dti2 = dti.tz_convert(None) - tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) - class TestTimeZoneCacheKey(object): @@ -922,228 +143,6 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self, tz_name): timezones._p_tz_cache_key(tz_d)) -class TestTimeZones(object): - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] - - def test_index_equals_with_tz(self): - left = date_range('1/1/2011', periods=100, freq='H', tz='utc') - right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern') - - assert not left.equals(right) - - def test_tz_localize_naive(self): - rng = date_range('1/1/2011', periods=100, freq='H') - - conv = rng.tz_localize('US/Pacific') - exp = date_range('1/1/2011', periods=100, freq='H', tz='US/Pacific') - - tm.assert_index_equal(conv, exp) - - def test_tz_localize_roundtrip(self): - for tz in self.timezones: - idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') - idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') - idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') - idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') - for idx in [idx1, idx2, idx3, idx4]: - localized = idx.tz_localize(tz) - expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, - tz=tz) - tm.assert_index_equal(localized, expected) - - with pytest.raises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - tm.assert_index_equal(reset, idx) - assert reset.tzinfo is None - - def test_tz_convert_roundtrip(self): - for tz in self.timezones: - idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M', - tz='UTC') - exp1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') - - idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D', - tz='UTC') - exp2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') - - idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H', - tz='UTC') - exp3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') - - idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T', - tz='UTC') - exp4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') - - for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), - (idx4, exp4)]: - converted = idx.tz_convert(tz) - reset = converted.tz_convert(None) - tm.assert_index_equal(reset, expected) - assert reset.tzinfo is None - tm.assert_index_equal(reset, converted.tz_convert( - 'UTC').tz_localize(None)) - - def test_join_utc_convert(self): - rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') - - left = rng.tz_convert('US/Eastern') - right = rng.tz_convert('Europe/Berlin') - - for how in ['inner', 'outer', 'left', 'right']: - result = left.join(left[:-5], how=how) - assert isinstance(result, DatetimeIndex) - assert result.tz == left.tz - - result = left.join(right[:-5], how=how) - assert isinstance(result, DatetimeIndex) - assert result.tz.zone == 'UTC' - - def test_join_aware(self): - rng = date_range('1/1/2011', periods=10, freq='H') - - # non-overlapping - rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", - tz="US/Central") - - rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", - tz="US/Eastern") - - result = rng.union(rng2) - assert result.tz.zone == 'UTC' - - def test_intersection(self): - rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') - - left = rng[10:90][::-1] - right = rng[20:80][::-1] - - assert left.tz == rng.tz - result = left.intersection(right) - assert result.tz == left.tz - - def test_timestamp_equality_different_timezones(self): - utc_range = date_range('1/1/2000', periods=20, tz='UTC') - eastern_range = utc_range.tz_convert('US/Eastern') - berlin_range = utc_range.tz_convert('Europe/Berlin') - - for a, b, c in zip(utc_range, eastern_range, berlin_range): - assert a == b - assert b == c - assert a == c - - assert (utc_range == eastern_range).all() - assert (utc_range == berlin_range).all() - assert (berlin_range == eastern_range).all() - - def test_datetimeindex_tz(self): - rng = date_range('03/12/2012 00:00', periods=10, freq='W-FRI', - tz='US/Eastern') - rng2 = DatetimeIndex(data=rng, tz='US/Eastern') - tm.assert_index_equal(rng, rng2) - - def test_normalize_tz(self): - rng = date_range('1/1/2000 9:30', periods=10, freq='D', - tz='US/Eastern') - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D', - tz='US/Eastern') - tm.assert_index_equal(result, expected) - - assert result.is_normalized - assert not rng.is_normalized - - rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz='UTC') - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D', tz='UTC') - tm.assert_index_equal(result, expected) - - assert result.is_normalized - assert not rng.is_normalized - - rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz=tzlocal()) - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D', tz=tzlocal()) - tm.assert_index_equal(result, expected) - - assert result.is_normalized - assert not rng.is_normalized - - @td.skip_if_windows - def test_normalize_tz_local(self): - # see gh-13459 - timezones = ['US/Pacific', 'US/Eastern', 'UTC', 'Asia/Kolkata', - 'Asia/Shanghai', 'Australia/Canberra'] - - for timezone in timezones: - with set_timezone(timezone): - rng = date_range('1/1/2000 9:30', periods=10, freq='D', - tz=tzlocal()) - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D', - tz=tzlocal()) - tm.assert_index_equal(result, expected) - - assert result.is_normalized - assert not rng.is_normalized - - def test_tzaware_offset(self): - dates = date_range('2012-11-01', periods=3, tz='US/Pacific') - offset = dates + offsets.Hour(5) - assert dates[0] + offsets.Hour(5) == offset[0] - - # GH 6818 - for tz in ['UTC', 'US/Pacific', 'Asia/Tokyo']: - dates = date_range('2010-11-01 00:00', periods=3, tz=tz, freq='H') - expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', - '2010-11-01 07:00'], freq='H', tz=tz) - - offset = dates + offsets.Hour(5) - tm.assert_index_equal(offset, expected) - offset = dates + np.timedelta64(5, 'h') - tm.assert_index_equal(offset, expected) - offset = dates + timedelta(hours=5) - tm.assert_index_equal(offset, expected) - - def test_nat(self): - # GH 5546 - dates = [NaT] - idx = DatetimeIndex(dates) - idx = idx.tz_localize('US/Pacific') - tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Pacific')) - idx = idx.tz_convert('US/Eastern') - tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Eastern')) - idx = idx.tz_convert('UTC') - tm.assert_index_equal(idx, DatetimeIndex(dates, tz='UTC')) - - dates = ['2010-12-01 00:00', '2010-12-02 00:00', NaT] - idx = DatetimeIndex(dates) - idx = idx.tz_localize('US/Pacific') - tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Pacific')) - idx = idx.tz_convert('US/Eastern') - expected = ['2010-12-01 03:00', '2010-12-02 03:00', NaT] - tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) - - idx = idx + offsets.Hour(5) - expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT] - tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) - idx = idx.tz_convert('US/Pacific') - expected = ['2010-12-01 05:00', '2010-12-02 05:00', NaT] - tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Pacific')) - - idx = idx + np.timedelta64(3, 'h') - expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT] - tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Pacific')) - - idx = idx.tz_convert('US/Eastern') - expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] - tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) - - class TestTslib(object): def test_tslib_tz_convert(self): From 5fa85e9fcc3216982b3ec04255f9fdc4aff9818b Mon Sep 17 00:00:00 2001 From: Jason Bandlow Date: Tue, 6 Feb 2018 18:48:55 -0500 Subject: [PATCH 059/217] BUG: Fix ts precision issue with groupby and NaT (#19526) closes #19526 Author: Jason Bandlow Closes #19530 from jbandlow/timestamp_float_conversion and squashes the following commits: 2fb23d673 [Jason Bandlow] merge af37225d4 [Jason Bandlow] BUG: Fix ts precision issue with groupby and NaT (#19526) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/groupby.py | 2 +- pandas/tests/groupby/aggregate/test_cython.py | 19 ++++++++++++++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index b5bf7ccbda0b6..7322bd9fe3327 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -644,6 +644,7 @@ Groupby/Resample/Rolling - Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) - Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) - Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) +- Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) - Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) - Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ab0070777c190..f352b80ba3069 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2336,7 +2336,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1): result = self._transform( result, values, labels, func, is_numeric, is_datetimelike) - if is_integer_dtype(result): + if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype('float64') diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index c8ee05ddbb74f..cef3a699ed24b 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -12,7 +12,8 @@ from numpy import nan import pandas as pd -from pandas import bdate_range, DataFrame, Index, Series +from pandas import (bdate_range, DataFrame, Index, Series, Timestamp, + Timedelta, NaT) from pandas.core.groupby import DataError import pandas.util.testing as tm @@ -187,3 +188,19 @@ def test_cython_agg_empty_buckets_nanops(): {"a": [1, 1, 1716, 1]}, index=pd.CategoricalIndex(intervals, name='a', ordered=True)) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('op', ['first', 'last', 'max', 'min']) +@pytest.mark.parametrize('data', [ + Timestamp('2016-10-14 21:00:44.557'), + Timedelta('17088 days 21:00:44.557'), ]) +def test_cython_with_timestamp_and_nat(op, data): + # https://github.com/pandas-dev/pandas/issues/19526 + df = DataFrame({'a': [0, 1], 'b': [data, NaT]}) + index = Index([0, 1], name='a') + + # We will group by a and test the cython aggregations + expected = DataFrame({'b': [data, NaT]}, index=index) + + result = df.groupby('a').aggregate(op) + tm.assert_frame_equal(expected, result) From 390aa9d7038e8ad845707018f85381c4710510f2 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Wed, 7 Feb 2018 03:01:53 -0800 Subject: [PATCH 060/217] Cleaned up return of _get_cython_function (#19561) --- pandas/core/groupby.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f352b80ba3069..01241db7c0c42 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2240,7 +2240,7 @@ def wrapper(*args, **kwargs): raise NotImplementedError("function is not implemented for this" "dtype: [how->%s,dtype->%s]" % (how, dtype_str)) - return func, dtype_str + return func def _cython_operation(self, kind, values, how, axis, min_count=-1): assert kind in ['transform', 'aggregate'] @@ -2304,12 +2304,12 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1): values = values.astype(object) try: - func, dtype_str = self._get_cython_function( + func = self._get_cython_function( kind, how, values, is_numeric) except NotImplementedError: if is_numeric: values = _ensure_float64(values) - func, dtype_str = self._get_cython_function( + func = self._get_cython_function( kind, how, values, is_numeric) else: raise From 1c824e63721e521058d0421d951071b24d1d400d Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Wed, 7 Feb 2018 11:04:28 +0000 Subject: [PATCH 061/217] DEPR/CLN: fix from_items deprecation warnings (#19559) --- pandas/tests/groupby/aggregate/test_other.py | 3 ++- pandas/tests/reshape/test_reshape.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 575eae1916f4c..4c407ad8a0d93 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -7,6 +7,7 @@ from __future__ import print_function import pytest +from collections import OrderedDict import datetime as dt from functools import partial @@ -81,7 +82,7 @@ def test_agg_period_index(): s1 = Series(np.random.rand(len(index)), index=index) s2 = Series(np.random.rand(len(index)), index=index) series = [('s1', s1), ('s2', s2)] - df = DataFrame.from_items(series) + df = DataFrame.from_dict(OrderedDict(series)) grouped = df.groupby(df.index.month) list(grouped) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index c9d079421532f..a57c3c41b3637 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -3,6 +3,7 @@ from warnings import catch_warnings import pytest +from collections import OrderedDict from pandas import DataFrame, Series import pandas as pd @@ -457,7 +458,8 @@ def test_dataframe_dummies_preserve_categorical_dtype(self, dtype): @pytest.mark.parametrize('sparse', [True, False]) def test_get_dummies_dont_sparsify_all_columns(self, sparse): # GH18914 - df = DataFrame.from_items([('GDP', [1, 2]), ('Nation', ['AB', 'CD'])]) + df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]), + ('Nation', ['AB', 'CD'])])) df = get_dummies(df, columns=['Nation'], sparse=sparse) df2 = df.reindex(columns=['GDP']) From 88455cbda59a52faebdfb7eb55da1a6e2b8c8d3b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 7 Feb 2018 03:09:33 -0800 Subject: [PATCH 062/217] Implement get_day_of_year, tests (#19555) --- pandas/_libs/tslibs/ccalendar.pxd | 1 + pandas/_libs/tslibs/ccalendar.pyx | 43 ++++++++++++++++++++++----- pandas/_libs/tslibs/fields.pyx | 13 ++------ pandas/_libs/tslibs/period.pyx | 5 ++-- pandas/_libs/tslibs/timestamps.pyx | 20 +++---------- pandas/core/indexes/datetimes.py | 4 +-- pandas/tests/tslibs/__init__.py | 0 pandas/tests/tslibs/test_ccalendar.py | 18 +++++++++++ setup.py | 1 + 9 files changed, 67 insertions(+), 38 deletions(-) create mode 100644 pandas/tests/tslibs/__init__.py create mode 100644 pandas/tests/tslibs/test_ccalendar.py diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index a1bbeea1cb69a..42473a97a7150 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -10,3 +10,4 @@ cdef int dayofweek(int y, int m, int m) nogil cdef bint is_leapyear(int64_t year) nogil cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil cpdef int32_t get_week_of_year(int year, int month, int day) nogil +cpdef int32_t get_day_of_year(int year, int month, int day) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index ae52f7dd30165..613e111443636 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -142,17 +142,13 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil: Assumes the inputs describe a valid date. """ cdef: - bint isleap, isleap_prev - int32_t mo_off + bint isleap int32_t doy, dow int woy isleap = is_leapyear(year) - isleap_prev = is_leapyear(year - 1) - - mo_off = _month_offset[isleap * 13 + month - 1] - doy = mo_off + day + doy = get_day_of_year(year, month, day) dow = dayofweek(year, month, day) # estimate @@ -162,7 +158,7 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil: # verify if woy < 0: - if (woy > -2) or (woy == -2 and isleap_prev): + if (woy > -2) or (woy == -2 and is_leapyear(year - 1)): woy = 53 else: woy = 52 @@ -171,3 +167,36 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil: woy = 1 return woy + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_day_of_year(int year, int month, int day) nogil: + """Return the ordinal day-of-year for the given day. + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + day_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + bint isleap + int32_t mo_off + int32_t doy, dow + int woy + + isleap = is_leapyear(year) + + mo_off = _month_offset[isleap * 13 + month - 1] + + day_of_year = mo_off + day + return day_of_year diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index a8a865eec38dd..7a4b9775bd56e 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -15,7 +15,7 @@ cnp.import_array() from ccalendar cimport (get_days_in_month, is_leapyear, dayofweek, - get_week_of_year) + get_week_of_year, get_day_of_year) from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct) from nattype cimport NPY_NAT @@ -374,15 +374,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): cdef: Py_ssize_t i, count = 0 ndarray[int32_t] out - ndarray[int32_t, ndim=2] _month_offset - int isleap, isleap_prev pandas_datetimestruct dts - int mo_off, doy, dow - - _month_offset = np.array( - [[0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365], - [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]], - dtype=np.int32 ) count = len(dtindex) out = np.empty(count, dtype='i4') @@ -482,8 +474,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): continue dt64_to_dtstruct(dtindex[i], &dts) - isleap = is_leapyear(dts.year) - out[i] = _month_offset[isleap, dts.month -1] + dts.day + out[i] = get_day_of_year(dts.year, dts.month, dts.day) return out elif field == 'dow': diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 5098e5c9100ff..e82c9c613c62a 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -22,7 +22,7 @@ from cpython.datetime cimport PyDateTime_Check, PyDateTime_IMPORT PyDateTime_IMPORT from np_datetime cimport (pandas_datetimestruct, dtstruct_to_dt64, - dt64_to_dtstruct, is_leapyear) + dt64_to_dtstruct) cimport util from util cimport is_period_object, is_string_object, INT32_MIN @@ -34,11 +34,12 @@ from timezones cimport is_utc, is_tzlocal, get_utcoffset, get_dst_info from timedeltas cimport delta_to_nanoseconds from ccalendar import MONTH_NUMBERS +from ccalendar cimport is_leapyear from frequencies cimport (get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, get_rule_month) from parsing import parse_time_string, NAT_SENTINEL -from resolution import resolution, Resolution +from resolution import Resolution from nattype import nat_strings, NaT, iNaT from nattype cimport _nat_scalar_rules, NPY_NAT diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b9be9c16eb6c3..47179a4e1d761 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -29,8 +29,7 @@ from nattype import NaT from nattype cimport NPY_NAT from np_datetime import OutOfBoundsDatetime from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, - pandas_datetimestruct, dt64_to_dtstruct, - is_leapyear) + pandas_datetimestruct, dt64_to_dtstruct) from timedeltas import Timedelta from timedeltas cimport delta_to_nanoseconds from timezones cimport ( @@ -291,14 +290,6 @@ cdef class _Timestamp(datetime): val = tz_convert_single(self.value, 'UTC', self.tz) return val - cpdef int _get_field(self, field): - cdef: - int64_t val - ndarray[int32_t] out - val = self._maybe_convert_value_to_local() - out = get_date_field(np.array([val], dtype=np.int64), field) - return int(out[0]) - cpdef bint _get_start_end_field(self, str field): cdef: int64_t val @@ -695,14 +686,11 @@ class Timestamp(_Timestamp): @property def dayofyear(self): - return self._get_field('doy') + return ccalendar.get_day_of_year(self.year, self.month, self.day) @property def week(self): - if self.freq is None: - # fastpath for non-business - return ccalendar.get_week_of_year(self.year, self.month, self.day) - return self._get_field('woy') + return ccalendar.get_week_of_year(self.year, self.month, self.day) weekofyear = week @@ -764,7 +752,7 @@ class Timestamp(_Timestamp): @property def is_leap_year(self): - return bool(is_leapyear(self.year)) + return bool(ccalendar.is_leapyear(self.year)) def tz_localize(self, tz, ambiguous='raise', errors='raise'): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e09fa87477122..61c941c3d2333 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -55,7 +55,7 @@ from pandas._libs import (lib, index as libindex, tslib as libts, join as libjoin, Timestamp) from pandas._libs.tslibs import (timezones, conversion, fields, parsing, - period as libperiod) + resolution as libresolution) # -------- some conversion wrapper functions @@ -1795,7 +1795,7 @@ def is_normalized(self): @cache_readonly def _resolution(self): - return libperiod.resolution(self.asi8, self.tz) + return libresolution.resolution(self.asi8, self.tz) def insert(self, loc, item): """ diff --git a/pandas/tests/tslibs/__init__.py b/pandas/tests/tslibs/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py new file mode 100644 index 0000000000000..b5d562a7b5a9c --- /dev/null +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from datetime import datetime + +import numpy as np + +from pandas._libs.tslibs import ccalendar + + +def test_get_day_of_year(): + assert ccalendar.get_day_of_year(2001, 3, 1) == 60 + assert ccalendar.get_day_of_year(2004, 3, 1) == 61 + assert ccalendar.get_day_of_year(1907, 12, 31) == 365 + assert ccalendar.get_day_of_year(2004, 12, 31) == 366 + + dt = datetime.fromordinal(1 + np.random.randint(365 * 4000)) + result = ccalendar.get_day_of_year(dt.year, dt.month, dt.day) + expected = (dt - dt.replace(month=1, day=1)).days + 1 + assert result == expected diff --git a/setup.py b/setup.py index 5397a1b84dc4d..2332503e558ed 100755 --- a/setup.py +++ b/setup.py @@ -515,6 +515,7 @@ def pxd(name): 'pyxfile': '_libs/tslibs/period', 'pxdfiles': ['_libs/src/util', '_libs/missing', + '_libs/tslibs/ccalendar', '_libs/tslibs/timedeltas', '_libs/tslibs/timezones', '_libs/tslibs/nattype'], From 0359bd655ca10ba233a95c3c7462b2121193bf08 Mon Sep 17 00:00:00 2001 From: Alex Rychyk Date: Wed, 7 Feb 2018 13:12:58 +0200 Subject: [PATCH 063/217] fixed bug in df.aggregate passing non-existent columns (#19552) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/base.py | 4 ++++ pandas/tests/test_resample.py | 18 ++++++++++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 7322bd9fe3327..c48f6d19e3b10 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -647,6 +647,7 @@ Groupby/Resample/Rolling - Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) - Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) - Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) +- Bug in :func:`DataFrame.resample().aggregate` not raising a `ValueError` when aggregating a non-existent column (:issue:`16766`) Sparse ^^^^^^ diff --git a/pandas/core/base.py b/pandas/core/base.py index d5b204dba063e..0969717d85e4f 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -392,6 +392,10 @@ def nested_renaming_depr(level=4): elif isinstance(obj, ABCSeries): nested_renaming_depr() + elif isinstance(obj, ABCDataFrame) and \ + k not in obj.columns: + raise ValueError( + "Column '{col}' does not exist!".format(col=k)) arg = new_arg diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 2de890ea459f0..9feba3fd042dd 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -20,7 +20,6 @@ from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict -from pandas.core.base import SpecificationError from pandas.errors import UnsupportedFunctionCall from pandas.core.groupby import DataError import pandas.core.common as com @@ -614,7 +613,7 @@ def f(): t[['A']].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) - pytest.raises(SpecificationError, f) + pytest.raises(ValueError, f) def test_agg_nested_dicts(self): @@ -659,6 +658,21 @@ def f(): 'B': {'rb': ['mean', 'std']}}) assert_frame_equal(result, expected, check_like=True) + def test_try_aggregate_non_existing_column(self): + # GH 16766 + data = [ + {'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0}, + {'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0}, + {'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5} + ] + df = DataFrame(data).set_index('dt') + + # Error as we don't have 'z' column + with pytest.raises(ValueError): + df.resample('30T').agg({'x': ['mean'], + 'y': ['median'], + 'z': ['sum']}) + def test_selection_api_validation(self): # GH 13500 index = date_range(datetime(2005, 1, 1), From e5fa17c2839db8f61db4c7bf14fa0d29bcf4c8ed Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 7 Feb 2018 07:30:16 -0500 Subject: [PATCH 064/217] ERR: raise KeyError on invalid column name in aggregate (#19566) xref #19552 --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/base.py | 2 +- pandas/tests/test_resample.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c48f6d19e3b10..eaa8841b79a78 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -647,7 +647,7 @@ Groupby/Resample/Rolling - Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) - Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) - Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) -- Bug in :func:`DataFrame.resample().aggregate` not raising a `ValueError` when aggregating a non-existent column (:issue:`16766`) +- Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) Sparse ^^^^^^ diff --git a/pandas/core/base.py b/pandas/core/base.py index 0969717d85e4f..3d8f5f265e3db 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -394,7 +394,7 @@ def nested_renaming_depr(level=4): nested_renaming_depr() elif isinstance(obj, ABCDataFrame) and \ k not in obj.columns: - raise ValueError( + raise KeyError( "Column '{col}' does not exist!".format(col=k)) arg = new_arg diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 9feba3fd042dd..23cc18de34778 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -613,7 +613,7 @@ def f(): t[['A']].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) - pytest.raises(ValueError, f) + pytest.raises(KeyError, f) def test_agg_nested_dicts(self): @@ -668,7 +668,7 @@ def test_try_aggregate_non_existing_column(self): df = DataFrame(data).set_index('dt') # Error as we don't have 'z' column - with pytest.raises(ValueError): + with pytest.raises(KeyError): df.resample('30T').agg({'x': ['mean'], 'y': ['median'], 'z': ['sum']}) From 7a1978199b55c28ab6160c0d0a4914897325fbd5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 7 Feb 2018 04:32:37 -0800 Subject: [PATCH 065/217] Frame ops prelims - de-duplicate, remove unused kwargs (#19522) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/frame.py | 49 ++++++++++------------------ pandas/core/indexes/base.py | 47 ++++++++++++-------------- pandas/core/ops.py | 8 +++-- pandas/core/sparse/frame.py | 14 +++----- pandas/tests/frame/test_operators.py | 13 ++++++++ 6 files changed, 61 insertions(+), 71 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index eaa8841b79a78..80c5352701540 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -582,6 +582,7 @@ Numeric - Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) +- Bug in :class:`DataFrame` flex arithmetic (e.g. `df.add(other, fill_value=foo)`) with a `fill_value` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) Indexing diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3d1983f65d70d..b0ead3f0c7f00 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3915,8 +3915,7 @@ def reorder_levels(self, order, axis=0): # ---------------------------------------------------------------------- # Arithmetic / combination related - def _combine_frame(self, other, func, fill_value=None, level=None, - try_cast=True): + def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns @@ -3968,52 +3967,40 @@ def f(i): def _combine_series(self, other, func, fill_value=None, axis=None, level=None, try_cast=True): + if fill_value is not None: + raise NotImplementedError("fill_value {fill} not supported." + .format(fill=fill_value)) + if axis is not None: axis = self._get_axis_name(axis) if axis == 'index': - return self._combine_match_index(other, func, level=level, - fill_value=fill_value, - try_cast=try_cast) + return self._combine_match_index(other, func, level=level) else: return self._combine_match_columns(other, func, level=level, - fill_value=fill_value, try_cast=try_cast) - return self._combine_series_infer(other, func, level=level, - fill_value=fill_value, - try_cast=try_cast) - - def _combine_series_infer(self, other, func, level=None, - fill_value=None, try_cast=True): - if len(other) == 0: - return self * np.nan + else: + if not len(other): + return self * np.nan - if len(self) == 0: - # Ambiguous case, use _series so works with DataFrame - return self._constructor(data=self._series, index=self.index, - columns=self.columns) + if not len(self): + # Ambiguous case, use _series so works with DataFrame + return self._constructor(data=self._series, index=self.index, + columns=self.columns) - return self._combine_match_columns(other, func, level=level, - fill_value=fill_value, - try_cast=try_cast) + # default axis is columns + return self._combine_match_columns(other, func, level=level, + try_cast=try_cast) - def _combine_match_index(self, other, func, level=None, - fill_value=None, try_cast=True): + def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join='outer', axis=0, level=level, copy=False) - if fill_value is not None: - raise NotImplementedError("fill_value %r not supported." % - fill_value) return self._constructor(func(left.values.T, right.values).T, index=left.index, columns=self.columns, copy=False) - def _combine_match_columns(self, other, func, level=None, - fill_value=None, try_cast=True): + def _combine_match_columns(self, other, func, level=None, try_cast=True): left, right = self.align(other, join='outer', axis=1, level=level, copy=False) - if fill_value is not None: - raise NotImplementedError("fill_value %r not supported" % - fill_value) new_data = left._data.eval(func=func, other=right, axes=[left.columns, self.index], diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1e1bb0d49b3df..15df77bf772dc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -80,6 +80,26 @@ def _try_get_item(x): return x +def _make_invalid_op(name): + """ + Return a binary method that always raises a TypeError. + + Parameters + ---------- + name : str + + Returns + ------- + invalid_op : function + """ + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + + invalid_op.__name__ = name + return invalid_op + + class InvalidIndexError(Exception): pass @@ -3916,30 +3936,12 @@ def _evaluate_compare(self, other): @classmethod def _add_numeric_methods_add_sub_disabled(cls): """ add in the numeric add/sub methods to disable """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable other than add/sub """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__') cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') @@ -4147,15 +4149,6 @@ def logical_func(self, *args, **kwargs): @classmethod def _add_logical_methods_disabled(cls): """ add in logical methods to disable """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - cls.all = _make_invalid_op('all') cls.any = _make_invalid_op('any') diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6db84aedce7e7..effa35695fcd1 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1106,12 +1106,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame return self._combine_frame(other, na_op, fill_value, level) elif isinstance(other, ABCSeries): - return self._combine_series(other, na_op, fill_value, axis, level) + return self._combine_series(other, na_op, fill_value, axis, level, + try_cast=True) else: if fill_value is not None: self = self.fillna(fill_value) - return self._combine_const(other, na_op) + return self._combine_const(other, na_op, try_cast=True) f.__name__ = name @@ -1172,7 +1173,8 @@ def f(self, other): if isinstance(other, ABCDataFrame): # Another DataFrame return self._compare_frame(other, func, str_rep) elif isinstance(other, ABCSeries): - return self._combine_series_infer(other, func, try_cast=False) + return self._combine_series(other, func, + axis=None, try_cast=False) else: # straight boolean comparisons we want to allow all columns diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 91dc44e3f185e..122c2b11f25f9 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -540,8 +540,7 @@ def xs(self, key, axis=0, copy=False): # ---------------------------------------------------------------------- # Arithmetic-related methods - def _combine_frame(self, other, func, fill_value=None, level=None, - try_cast=True): + def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns @@ -584,12 +583,9 @@ def _combine_frame(self, other, func, fill_value=None, level=None, default_fill_value=new_fill_value ).__finalize__(self) - def _combine_match_index(self, other, func, level=None, fill_value=None, - try_cast=True): + def _combine_match_index(self, other, func, level=None): new_data = {} - if fill_value is not None: - raise NotImplementedError("'fill_value' argument is not supported") if level is not None: raise NotImplementedError("'level' argument is not supported") @@ -605,6 +601,7 @@ def _combine_match_index(self, other, func, level=None, fill_value=None, new_data[col] = func(series.values, other.values) # fill_value is a function of our operator + fill_value = None if isna(other.fill_value) or isna(self.default_fill_value): fill_value = np.nan else: @@ -615,15 +612,12 @@ def _combine_match_index(self, other, func, level=None, fill_value=None, new_data, index=new_index, columns=self.columns, default_fill_value=fill_value).__finalize__(self) - def _combine_match_columns(self, other, func, level=None, fill_value=None, - try_cast=True): + def _combine_match_columns(self, other, func, level=None, try_cast=True): # patched version of DataFrame._combine_match_columns to account for # NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series, # where 3.0 is numpy.float64 and series is a SparseSeries. Still # possible for this to happen, which is bothersome - if fill_value is not None: - raise NotImplementedError("'fill_value' argument is not supported") if level is not None: raise NotImplementedError("'level' argument is not supported") diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index bf895be8bc813..26974b6398694 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -381,6 +381,19 @@ def test_arith_flex_frame(self): with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], axis='index', fill_value=3) + def test_arith_flex_zero_len_raises(self): + # GH#19522 passing fill_value to frame flex arith methods should + # raise even in the zero-length special cases + ser_len0 = pd.Series([]) + df_len0 = pd.DataFrame([], columns=['A', 'B']) + df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) + + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + df.add(ser_len0, fill_value='E') + + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + df_len0.sub(df['A'], axis=None, fill_value=3) + def test_binary_ops_align(self): # test aligning binary ops From 2e45a27d7142cf52ce57924926b957a0a68187c3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 7 Feb 2018 08:06:46 -0500 Subject: [PATCH 066/217] API/BUG: .apply will correctly infer output shape when axis=1 (#18577) closes #16353 closes #17348 closes #17437 closes #18573 closes #17970 closes #17892 closes #17602 closes #18775 closes #18901 closes #18919 --- doc/source/basics.rst | 10 +- doc/source/whatsnew/v0.23.0.txt | 73 ++++- pandas/core/apply.py | 288 ++++++++++++------ pandas/core/frame.py | 136 ++++++++- pandas/core/sparse/frame.py | 42 ++- pandas/io/formats/style.py | 4 +- pandas/tests/frame/test_apply.py | 386 ++++++++++++++++++++++-- pandas/tests/sparse/frame/test_apply.py | 92 ++++++ pandas/tests/sparse/frame/test_frame.py | 46 --- 9 files changed, 885 insertions(+), 192 deletions(-) create mode 100644 pandas/tests/sparse/frame/test_apply.py diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 18da53506f018..fb9e5a6cc75cb 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -793,8 +793,14 @@ The :meth:`~DataFrame.apply` method will also dispatch on a string method name. df.apply('mean') df.apply('mean', axis=1) -Depending on the return type of the function passed to :meth:`~DataFrame.apply`, -the result will either be of lower dimension or the same dimension. +The return type of the function passed to :meth:`~DataFrame.apply` affects the +type of the ultimate output from DataFrame.apply + +* If the applied function returns a ``Series``, the ultimate output is a ``DataFrame``. + The columns match the index of the ``Series`` returned by the applied function. +* If the applied function returns any other type, the ultimate output is a ``Series``. +* A ``result_type`` kwarg is accepted with the options: ``reduce``, ``broadcast``, and ``expand``. + These will determine how list-likes return results expand (or not) to a ``DataFrame``. :meth:`~DataFrame.apply` combined with some cleverness can be used to answer many questions about a data set. For example, suppose we wanted to extract the date where the diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 80c5352701540..1c6b698605521 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -142,7 +142,7 @@ Previous Behavior: 4 NaN dtype: float64 -Current Behavior +Current Behavior: .. ipython:: python @@ -167,7 +167,7 @@ Previous Behavior: 3 2.5 dtype: float64 -Current Behavior +Current Behavior: .. ipython:: python @@ -332,6 +332,73 @@ Convert to an xarray DataArray p.to_xarray() +.. _whatsnew_0230.api_breaking.apply: + +Apply Changes +~~~~~~~~~~~~~ + +:func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies +are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case +where a list-like (e.g. ``tuple`` or ``list`` is returned), (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, +:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`) + +.. ipython:: python + + df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, columns=['A', 'B', 'C']) + df + +Previous Behavior. If the returned shape happened to match the original columns, this would return a ``DataFrame``. +If the return shape did not match, a ``Series`` with lists was returned. + +.. code-block:: python + + In [3]: df.apply(lambda x: [1, 2, 3], axis=1) + Out[3]: + A B C + 0 1 2 3 + 1 1 2 3 + 2 1 2 3 + 3 1 2 3 + 4 1 2 3 + 5 1 2 3 + + In [4]: df.apply(lambda x: [1, 2], axis=1) + Out[4]: + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + 3 [1, 2] + 4 [1, 2] + 5 [1, 2] + dtype: object + + +New Behavior. The behavior is consistent. These will *always* return a ``Series``. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1) + df.apply(lambda x: [1, 2], axis=1) + +To have expanded columns, you can use ``result_type='expand'`` + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') + +To have broadcast the result across, you can use ``result_type='broadcast'``. The shape +must match the original columns. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') + +Returning a ``Series`` allows one to control the exact return structure and column names: + +.. ipython:: python + + df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1) + .. _whatsnew_0230.api_breaking.build_changes: @@ -456,6 +523,8 @@ Deprecations - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) - :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`) +- The ``broadcast`` parameter of ``.apply()`` is removed in favor of ``result_type='broadcast'`` (:issue:`18577`) +- The ``reduce`` parameter of ``.apply()`` is removed in favor of ``result_type='reduce'`` (:issue:`18577`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 4cdec54b9a07a..c65943fbbb201 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1,15 +1,20 @@ +import warnings import numpy as np from pandas import compat from pandas._libs import reduction +from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( is_extension_type, is_sequence) +from pandas.util._decorators import cache_readonly from pandas.io.formats.printing import pprint_thing -def frame_apply(obj, func, axis=0, broadcast=False, - raw=False, reduce=None, args=(), **kwds): +def frame_apply(obj, func, axis=0, broadcast=None, + raw=False, reduce=None, result_type=None, + ignore_failures=False, + args=None, kwds=None): """ construct and return a row or column based frame apply object """ axis = obj._get_axis_number(axis) @@ -19,20 +24,49 @@ def frame_apply(obj, func, axis=0, broadcast=False, klass = FrameColumnApply return klass(obj, func, broadcast=broadcast, - raw=raw, reduce=reduce, args=args, kwds=kwds) + raw=raw, reduce=reduce, result_type=result_type, + ignore_failures=ignore_failures, + args=args, kwds=kwds) class FrameApply(object): - def __init__(self, obj, func, broadcast, raw, reduce, args, kwds): + def __init__(self, obj, func, broadcast, raw, reduce, result_type, + ignore_failures, args, kwds): self.obj = obj - self.broadcast = broadcast self.raw = raw - self.reduce = reduce - self.args = args - - self.ignore_failures = kwds.pop('ignore_failures', False) - self.kwds = kwds + self.ignore_failures = ignore_failures + self.args = args or () + self.kwds = kwds or {} + + if result_type not in [None, 'reduce', 'broadcast', 'expand']: + raise ValueError("invalid value for result_type, must be one " + "of {None, 'reduce', 'broadcast', 'expand'}") + + if broadcast is not None: + warnings.warn("The broadcast argument is deprecated and will " + "be removed in a future version. You can specify " + "result_type='broadcast' to broadcast the result " + "to the original dimensions", + FutureWarning, stacklevel=4) + if broadcast: + result_type = 'broadcast' + + if reduce is not None: + warnings.warn("The reduce argument is deprecated and will " + "be removed in a future version. You can specify " + "result_type='reduce' to try to reduce the result " + "to the original dimensions", + FutureWarning, stacklevel=4) + if reduce: + + if result_type is not None: + raise ValueError( + "cannot pass both reduce=True and result_type") + + result_type = 'reduce' + + self.result_type = result_type # curry if needed if kwds or args and not isinstance(func, np.ufunc): @@ -43,6 +77,11 @@ def f(x): self.f = f + # results + self.result = None + self.res_index = None + self.res_columns = None + @property def columns(self): return self.obj.columns @@ -51,10 +90,14 @@ def columns(self): def index(self): return self.obj.index - @property + @cache_readonly def values(self): return self.obj.values + @cache_readonly + def dtypes(self): + return self.obj.dtypes + @property def agg_axis(self): return self.obj._get_agg_axis(self.axis) @@ -68,8 +111,7 @@ def get_result(self): # string dispatch if isinstance(self.f, compat.string_types): - if self.axis: - self.kwds['axis'] = self.axis + self.kwds['axis'] = self.axis return getattr(self.obj, self.f)(*self.args, **self.kwds) # ufunc @@ -80,25 +122,37 @@ def get_result(self): columns=self.columns, copy=False) # broadcasting - if self.broadcast: + if self.result_type == 'broadcast': return self.apply_broadcast() # one axis empty - if not all(self.obj.shape): + elif not all(self.obj.shape): return self.apply_empty_result() # raw - if self.raw and not self.obj._is_mixed_type: + elif self.raw and not self.obj._is_mixed_type: return self.apply_raw() return self.apply_standard() def apply_empty_result(self): - from pandas import Series - reduce = self.reduce + """ + we have an empty result; at least 1 axis is 0 + + we will try to apply the function to an empty + series in order to see if this is a reduction function + """ + + # we are not asked to reduce or infer reduction + # so just return a copy of the existing object + if self.result_type not in ['reduce', None]: + return self.obj.copy() + + # we may need to infer + reduce = self.result_type == 'reduce' - if reduce is None: - reduce = False + from pandas import Series + if not reduce: EMPTY_SERIES = Series([]) try: @@ -113,6 +167,8 @@ def apply_empty_result(self): return self.obj.copy() def apply_raw(self): + """ apply to the values as a numpy array """ + try: result = reduction.reduce(self.values, self.f, axis=self.axis) except Exception: @@ -125,49 +181,70 @@ def apply_raw(self): else: return Series(result, index=self.agg_axis) - def apply_standard(self): - from pandas import Series + def apply_broadcast(self, target): + result_values = np.empty_like(target.values) + + # axis which we want to compare compliance + result_compare = target.shape[0] + + for i, col in enumerate(target.columns): + res = self.f(target[col]) + ares = np. asarray(res).ndim + + # must be a scalar or 1d + if ares > 1: + raise ValueError("too many dims to broadcast") + elif ares == 1: + + # must match return dim + if result_compare != len(res): + raise ValueError("cannot broadcast result") - reduce = self.reduce - if reduce is None: - reduce = True + result_values[:, i] = res + + # we *always* preserve the original index / columns + result = self.obj._constructor(result_values, + index=target.index, + columns=target.columns) + return result + + def apply_standard(self): # try to reduce first (by default) # this only matters if the reduction in values is of different dtype # e.g. if we want to apply to a SparseFrame, then can't directly reduce - if reduce: - values = self.values - # we cannot reduce using non-numpy dtypes, - # as demonstrated in gh-12244 - if not is_extension_type(values): + # we cannot reduce using non-numpy dtypes, + # as demonstrated in gh-12244 + if (self.result_type in ['reduce', None] and + not self.dtypes.apply(is_extension_type).any()): - # Create a dummy Series from an empty array - index = self.obj._get_axis(self.axis) - empty_arr = np.empty(len(index), dtype=values.dtype) - - dummy = Series(empty_arr, index=index, dtype=values.dtype) + # Create a dummy Series from an empty array + from pandas import Series + values = self.values + index = self.obj._get_axis(self.axis) + labels = self.agg_axis + empty_arr = np.empty(len(index), dtype=values.dtype) + dummy = Series(empty_arr, index=index, dtype=values.dtype) - try: - labels = self.agg_axis - result = reduction.reduce(values, self.f, - axis=self.axis, - dummy=dummy, - labels=labels) - return Series(result, index=labels) - except Exception: - pass + try: + result = reduction.reduce(values, self.f, + axis=self.axis, + dummy=dummy, + labels=labels) + return Series(result, index=labels) + except Exception: + pass # compute the result using the series generator - results, res_index, res_columns = self._apply_series_generator() + self.apply_series_generator() # wrap results - return self.wrap_results(results, res_index, res_columns) + return self.wrap_results() - def _apply_series_generator(self): + def apply_series_generator(self): series_gen = self.series_generator res_index = self.result_index - res_columns = self.result_columns i = None keys = [] @@ -201,40 +278,23 @@ def _apply_series_generator(self): pprint_thing(k), ) raise - return results, res_index, res_columns + self.results = results + self.res_index = res_index + self.res_columns = self.result_columns - def wrap_results(self, results, res_index, res_columns): - from pandas import Series + def wrap_results(self): + results = self.results + # see if we can infer the results if len(results) > 0 and is_sequence(results[0]): - if not isinstance(results[0], Series): - index = res_columns - else: - index = None - result = self.obj._constructor(data=results, index=index) - result.columns = res_index + return self.wrap_results_for_axis() - if self.axis == 1: - result = result.T - result = result._convert( - datetime=True, timedelta=True, copy=False) - - else: - - result = Series(results) - result.index = res_index - - return result - - def _apply_broadcast(self, target): - result_values = np.empty_like(target.values) - columns = target.columns - for i, col in enumerate(columns): - result_values[:, i] = self.f(target[col]) + # dict of scalars + from pandas import Series + result = Series(results) + result.index = self.res_index - result = self.obj._constructor(result_values, index=target.index, - columns=target.columns) return result @@ -251,7 +311,7 @@ def get_result(self): return super(FrameRowApply, self).get_result() def apply_broadcast(self): - return self._apply_broadcast(self.obj) + return super(FrameRowApply, self).apply_broadcast(self.obj) @property def series_generator(self): @@ -266,29 +326,37 @@ def result_index(self): def result_columns(self): return self.index + def wrap_results_for_axis(self): + """ return the results for the rows """ -class FrameColumnApply(FrameApply): - axis = 1 + results = self.results + result = self.obj._constructor(data=results) - def __init__(self, obj, func, broadcast, raw, reduce, args, kwds): - super(FrameColumnApply, self).__init__(obj, func, broadcast, - raw, reduce, args, kwds) + if not isinstance(results[0], ABCSeries): + try: + result.index = self.res_columns + except ValueError: + pass - # skip if we are mixed datelike and trying reduce across axes - # GH6125 - if self.reduce: - if self.obj._is_mixed_type and self.obj._is_datelike_mixed_type: - self.reduce = False + try: + result.columns = self.res_index + except ValueError: + pass + + return result + + +class FrameColumnApply(FrameApply): + axis = 1 def apply_broadcast(self): - return self._apply_broadcast(self.obj.T).T + result = super(FrameColumnApply, self).apply_broadcast(self.obj.T) + return result.T @property def series_generator(self): - from pandas import Series - dtype = object if self.obj._is_mixed_type else None - return (Series._from_array(arr, index=self.columns, name=name, - dtype=dtype) + constructor = self.obj._constructor_sliced + return (constructor(arr, index=self.columns, name=name) for i, (arr, name) in enumerate(zip(self.values, self.index))) @@ -299,3 +367,39 @@ def result_index(self): @property def result_columns(self): return self.columns + + def wrap_results_for_axis(self): + """ return the results for the columns """ + results = self.results + + # we have requested to expand + if self.result_type == 'expand': + result = self.infer_to_same_shape() + + # we have a non-series and don't want inference + elif not isinstance(results[0], ABCSeries): + from pandas import Series + + result = Series(results) + result.index = self.res_index + + # we may want to infer results + else: + result = self.infer_to_same_shape() + + return result + + def infer_to_same_shape(self): + """ infer the results to the same shape as the input object """ + results = self.results + + result = self.obj._constructor(data=results) + result = result.T + + # set the index + result.index = self.res_index + + # infer dtypes + result = result.infer_objects() + + return result diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b0ead3f0c7f00..9487f51919108 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4820,8 +4820,8 @@ def aggregate(self, func, axis=0, *args, **kwargs): agg = aggregate - def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, - args=(), **kwds): + def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, + result_type=None, args=(), **kwds): """Applies function along input axis of DataFrame. Objects passed to functions are Series objects having index @@ -4836,9 +4836,14 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, axis : {0 or 'index', 1 or 'columns'}, default 0 * 0 or 'index': apply function to each column * 1 or 'columns': apply function to each row - broadcast : boolean, default False + broadcast : boolean, optional For aggregation functions, return object of same size with values propagated + + .. deprecated:: 0.23.0 + This argument will be removed in a future version, replaced + by result_type='broadcast'. + raw : boolean, default False If False, convert each row or column into a Series. If raw=True the passed function will receive ndarray objects instead. If you are @@ -4852,6 +4857,24 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, while guessing, exceptions raised by func will be ignored). If reduce is True a Series will always be returned, and if False a DataFrame will always be returned. + + .. deprecated:: 0.23.0 + This argument will be removed in a future version, replaced + by result_type='reduce'. + + result_type : {'expand', 'reduce', 'broadcast, None} + These only act when axis=1 {columns} + * 'expand' : list-like results will be turned into columns. + * 'reduce' : return a Series if possible rather than expanding + list-like results. This is the opposite to 'expand'. + * 'broadcast' : results will be broadcast to the original shape + of the frame, the original index & columns will be retained. + * None : list-like results will be returned as a list + in a single column. However if the apply function + returns a Series these are expanded to columns. + + .. versionadded:: 0.23.0 + args : tuple Positional arguments to pass to function in addition to the array/series @@ -4867,9 +4890,96 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, Examples -------- - >>> df.apply(numpy.sqrt) # returns DataFrame - >>> df.apply(numpy.sum, axis=0) # equiv to df.sum(0) - >>> df.apply(numpy.sum, axis=1) # equiv to df.sum(1) + + We use this DataFrame to illustrate + + >>> df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + ... columns=['A', 'B', 'C']) + >>> df + A B C + 0 1 2 3 + 1 1 2 3 + 2 1 2 3 + 3 1 2 3 + 4 1 2 3 + 5 1 2 3 + + Using a ufunc + + >>> df.apply(np.sqrt) + A B C + 0 1.0 1.414214 1.732051 + 1 1.0 1.414214 1.732051 + 2 1.0 1.414214 1.732051 + 3 1.0 1.414214 1.732051 + 4 1.0 1.414214 1.732051 + 5 1.0 1.414214 1.732051 + + Using a reducing function on either axis + + >>> df.apply(np.sum, axis=0) + A 6 + B 12 + C 18 + dtype: int64 + + >>> df.apply(np.sum, axis=1) + 0 6 + 1 6 + 2 6 + 3 6 + 4 6 + 5 6 + dtype: int64 + + Retuning a list-like will result in a Series + + >>> df.apply(lambda x: [1, 2], axis=1) + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + 3 [1, 2] + 4 [1, 2] + 5 [1, 2] + + Passing result_type='expand' will expand list-like results + to columns of a Dataframe + + >>> df.apply(lambda x: [1, 2], axis=1, result_type='expand') + 0 1 + 0 1 2 + 1 1 2 + 2 1 2 + 3 1 2 + 4 1 2 + 5 1 2 + + Return a Series inside the function is similar to passing + Passing result_type='expand'. The resulting column names + will be the Series index. + + >>> df.apply(lambda x: Series([1, 2], index=['foo', 'bar']), axis=1) + foo bar + 0 1 2 + 1 1 2 + 2 1 2 + 3 1 2 + 4 1 2 + 5 1 2 + + + Passing result_type='broadcast' will take a same shape + result, whether list-like or scalar and broadcast it + along the axis. The resulting column names will be the originals. + + >>> df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') + A B C + 0 1 2 3 + 1 1 2 3 + 2 1 2 3 + 3 1 2 3 + 4 1 2 3 + 5 1 2 3 See also -------- @@ -4888,7 +4998,9 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, broadcast=broadcast, raw=raw, reduce=reduce, - args=args, **kwds) + result_type=result_type, + args=args, + kwds=kwds) return op.get_result() def applymap(self, func): @@ -5592,12 +5704,16 @@ def f(x): # numeric_only and yet we have tried a # column-by-column reduction, where we have mixed type. # So let's just do what we can - result = self.apply(f, reduce=False, - ignore_failures=True) + from pandas.core.apply import frame_apply + opa = frame_apply(self, + func=f, + result_type='expand', + ignore_failures=True) + result = opa.get_result() if result.ndim == self.ndim: result = result.iloc[0] return result - except: + except Exception: pass if filter_type is None or filter_type == 'numeric': diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 122c2b11f25f9..371377ce2899c 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -829,7 +829,8 @@ def notna(self): return self._apply_columns(lambda x: x.notna()) notnull = notna - def apply(self, func, axis=0, broadcast=False, reduce=False): + def apply(self, func, axis=0, broadcast=None, reduce=None, + result_type=None): """ Analogous to DataFrame.apply, for SparseDataFrame @@ -842,6 +843,35 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): For aggregation functions, return object of same size with values propagated + .. deprecated:: 0.23.0 + This argument will be removed in a future version, replaced + by result_type='broadcast'. + + reduce : boolean or None, default None + Try to apply reduction procedures. If the DataFrame is empty, + apply will use reduce to determine whether the result should be a + Series or a DataFrame. If reduce is None (the default), apply's + return value will be guessed by calling func an empty Series (note: + while guessing, exceptions raised by func will be ignored). If + reduce is True a Series will always be returned, and if False a + DataFrame will always be returned. + + .. deprecated:: 0.23.0 + This argument will be removed in a future version, replaced + by result_type='reduce'. + + result_type : {'expand', 'reduce', 'broadcast, None} + These only act when axis=1 {columns} + * 'expand' : list-like results will be turned into columns + * 'reduce' : return a Series if possible rather than expanding + list-like results. This is the opposite to 'expand' + * 'broadcast' : scalar results will be broadcast to all columns + * None : list-like results will be returned as a list + in a single column. However if the apply function + returns a Series these are expanded to columns. + + .. versionadded:: 0.23.0 + Returns ------- applied : Series or SparseDataFrame @@ -865,12 +895,10 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): op = frame_apply(self, func=func, axis=axis, - reduce=reduce) - - if broadcast: - return op.apply_broadcast() - - return op.apply_standard() + reduce=reduce, + broadcast=broadcast, + result_type=result_type) + return op.get_result() def applymap(self, func): """ diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 20e72dd6bde91..525f487d8aa39 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -509,7 +509,9 @@ def _apply(self, func, axis=0, subset=None, **kwargs): subset = _non_reducing_slice(subset) data = self.data.loc[subset] if axis is not None: - result = data.apply(func, axis=axis, **kwargs) + result = data.apply(func, axis=axis, + result_type='expand', **kwargs) + result.columns = data.columns else: result = func(data, **kwargs) if not isinstance(result, pd.DataFrame): diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index d69ddcd8f14d4..d1ad9f71e6350 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -82,24 +82,30 @@ def test_apply_empty(self): rs = xp.apply(lambda x: x['a'], axis=1) assert_frame_equal(xp, rs) + def test_apply_with_reduce_empty(self): # reduce with an empty DataFrame x = [] - result = self.empty.apply(x.append, axis=1, reduce=False) + result = self.empty.apply(x.append, axis=1, result_type='expand') assert_frame_equal(result, self.empty) - result = self.empty.apply(x.append, axis=1, reduce=True) + result = self.empty.apply(x.append, axis=1, result_type='reduce') assert_series_equal(result, Series( [], index=pd.Index([], dtype=object))) empty_with_cols = DataFrame(columns=['a', 'b', 'c']) - result = empty_with_cols.apply(x.append, axis=1, reduce=False) + result = empty_with_cols.apply(x.append, axis=1, result_type='expand') assert_frame_equal(result, empty_with_cols) - result = empty_with_cols.apply(x.append, axis=1, reduce=True) + result = empty_with_cols.apply(x.append, axis=1, result_type='reduce') assert_series_equal(result, Series( [], index=pd.Index([], dtype=object))) # Ensure that x.append hasn't been called assert x == [] + def test_apply_deprecate_reduce(self): + with warnings.catch_warnings(record=True): + x = [] + self.empty.apply(x.append, axis=1, result_type='reduce') + def test_apply_standard_nonunique(self): df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) @@ -121,17 +127,79 @@ def test_with_string_args(self): expected = getattr(self.frame, arg)(axis=1) tm.assert_series_equal(result, expected) + def test_apply_broadcast_deprecated(self): + with tm.assert_produces_warning(FutureWarning): + self.frame.apply(np.mean, broadcast=True) + def test_apply_broadcast(self): - broadcasted = self.frame.apply(np.mean, broadcast=True) - agged = self.frame.apply(np.mean) - for col, ts in compat.iteritems(broadcasted): - assert (ts == agged[col]).all() + # scalars + result = self.frame.apply(np.mean, result_type='broadcast') + expected = DataFrame([self.frame.mean()], index=self.frame.index) + tm.assert_frame_equal(result, expected) + + result = self.frame.apply(np.mean, axis=1, result_type='broadcast') + m = self.frame.mean(axis=1) + expected = DataFrame({c: m for c in self.frame.columns}) + tm.assert_frame_equal(result, expected) + + # lists + result = self.frame.apply( + lambda x: list(range(len(self.frame.columns))), + axis=1, + result_type='broadcast') + m = list(range(len(self.frame.columns))) + expected = DataFrame([m] * len(self.frame.index), + dtype='float64', + index=self.frame.index, + columns=self.frame.columns) + tm.assert_frame_equal(result, expected) - broadcasted = self.frame.apply(np.mean, axis=1, broadcast=True) - agged = self.frame.apply(np.mean, axis=1) - for idx in broadcasted.index: - assert (broadcasted.xs(idx) == agged[idx]).all() + result = self.frame.apply(lambda x: list(range(len(self.frame.index))), + result_type='broadcast') + m = list(range(len(self.frame.index))) + expected = DataFrame({c: m for c in self.frame.columns}, + dtype='float64', + index=self.frame.index) + tm.assert_frame_equal(result, expected) + + # preserve columns + df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + columns=list('ABC')) + result = df.apply(lambda x: [1, 2, 3], + axis=1, + result_type='broadcast') + tm.assert_frame_equal(result, df) + + df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + columns=list('ABC')) + result = df.apply(lambda x: Series([1, 2, 3], index=list('abc')), + axis=1, + result_type='broadcast') + expected = df.copy() + tm.assert_frame_equal(result, expected) + + def test_apply_broadcast_error(self): + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + # > 1 ndim + with pytest.raises(ValueError): + df.apply(lambda x: np.array([1, 2]).reshape(-1, 2), + axis=1, + result_type='broadcast') + + # cannot broadcast + with pytest.raises(ValueError): + df.apply(lambda x: [1, 2], + axis=1, + result_type='broadcast') + + with pytest.raises(ValueError): + df.apply(lambda x: Series([1, 2]), + axis=1, + result_type='broadcast') def test_apply_raw(self): result0 = self.frame.apply(np.mean, raw=True) @@ -208,7 +276,7 @@ def _checkit(axis=0, raw=False): _check(no_index, lambda x: x) _check(no_index, lambda x: x.mean()) - result = no_cols.apply(lambda x: x.mean(), broadcast=True) + result = no_cols.apply(lambda x: x.mean(), result_type='broadcast') assert isinstance(result, DataFrame) def test_apply_with_args_kwds(self): @@ -350,33 +418,37 @@ def test_apply_attach_name(self): result = self.frame.apply(lambda x: np.repeat(x.name, len(x)), axis=1) - expected = DataFrame(np.tile(self.frame.index, - (len(self.frame.columns), 1)).T, - index=self.frame.index, - columns=self.frame.columns) - assert_frame_equal(result, expected) + expected = Series(np.repeat(t[0], len(self.frame.columns)) + for t in self.frame.itertuples()) + expected.index = self.frame.index + assert_series_equal(result, expected) def test_apply_multi_index(self): - s = DataFrame([[1, 2], [3, 4], [5, 6]]) - s.index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']]) - s.columns = ['col1', 'col2'] - res = s.apply(lambda x: Series({'min': min(x), 'max': max(x)}), 1) - assert isinstance(res.index, MultiIndex) + index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']]) + s = DataFrame([[1, 2], [3, 4], [5, 6]], + index=index, + columns=['col1', 'col2']) + result = s.apply( + lambda x: Series({'min': min(x), 'max': max(x)}), 1) + expected = DataFrame([[1, 2], [3, 4], [5, 6]], + index=index, + columns=['min', 'max']) + assert_frame_equal(result, expected, check_like=True) def test_apply_dict(self): # GH 8735 A = DataFrame([['foo', 'bar'], ['spam', 'eggs']]) - A_dicts = pd.Series([dict([(0, 'foo'), (1, 'spam')]), - dict([(0, 'bar'), (1, 'eggs')])]) + A_dicts = Series([dict([(0, 'foo'), (1, 'spam')]), + dict([(0, 'bar'), (1, 'eggs')])]) B = DataFrame([[0, 1], [2, 3]]) - B_dicts = pd.Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])]) + B_dicts = Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])]) fn = lambda x: x.to_dict() for df, dicts in [(A, A_dicts), (B, B_dicts)]: - reduce_true = df.apply(fn, reduce=True) - reduce_false = df.apply(fn, reduce=False) - reduce_none = df.apply(fn, reduce=None) + reduce_true = df.apply(fn, result_type='reduce') + reduce_false = df.apply(fn, result_type='expand') + reduce_none = df.apply(fn) assert_series_equal(reduce_true, dicts) assert_frame_equal(reduce_false, df) @@ -465,8 +537,8 @@ def test_frame_apply_dont_convert_datetime64(self): assert df.x1.dtype == 'M8[ns]' - # See gh-12244 def test_apply_non_numpy_dtype(self): + # See gh-12244 df = DataFrame({'dt': pd.date_range( "2015-01-01", periods=3, tz='Europe/Brussels')}) result = df.apply(lambda x: x) @@ -482,6 +554,256 @@ def test_apply_non_numpy_dtype(self): assert_frame_equal(result, df) +class TestInferOutputShape(object): + # the user has supplied an opaque UDF where + # they are transforming the input that requires + # us to infer the output + + def test_infer_row_shape(self): + # gh-17437 + # if row shape is changing, infer it + df = pd.DataFrame(np.random.rand(10, 2)) + result = df.apply(np.fft.fft, axis=0) + assert result.shape == (10, 2) + + result = df.apply(np.fft.rfft, axis=0) + assert result.shape == (6, 2) + + def test_with_dictlike_columns(self): + # gh 17602 + df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) + result = df.apply(lambda x: {'s': x['a'] + x['b']}, + axis=1) + expected = Series([{'s': 3} for t in df.itertuples()]) + assert_series_equal(result, expected) + + df['tm'] = [pd.Timestamp('2017-05-01 00:00:00'), + pd.Timestamp('2017-05-02 00:00:00')] + result = df.apply(lambda x: {'s': x['a'] + x['b']}, + axis=1) + assert_series_equal(result, expected) + + # compose a series + result = (df['a'] + df['b']).apply(lambda x: {'s': x}) + expected = Series([{'s': 3}, {'s': 3}]) + assert_series_equal(result, expected) + + # gh-18775 + df = DataFrame() + df["author"] = ["X", "Y", "Z"] + df["publisher"] = ["BBC", "NBC", "N24"] + df["date"] = pd.to_datetime(['17-10-2010 07:15:30', + '13-05-2011 08:20:35', + '15-01-2013 09:09:09']) + result = df.apply(lambda x: {}, axis=1) + expected = Series([{}, {}, {}]) + assert_series_equal(result, expected) + + def test_with_dictlike_columns_with_infer(self): + # gh 17602 + df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) + result = df.apply(lambda x: {'s': x['a'] + x['b']}, + axis=1, result_type='expand') + expected = DataFrame({'s': [3, 3]}) + assert_frame_equal(result, expected) + + df['tm'] = [pd.Timestamp('2017-05-01 00:00:00'), + pd.Timestamp('2017-05-02 00:00:00')] + result = df.apply(lambda x: {'s': x['a'] + x['b']}, + axis=1, result_type='expand') + assert_frame_equal(result, expected) + + def test_with_listlike_columns(self): + # gh-17348 + df = DataFrame({'a': Series(np.random.randn(4)), + 'b': ['a', 'list', 'of', 'words'], + 'ts': date_range('2016-10-01', periods=4, freq='H')}) + + result = df[['a', 'b']].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[['a', 'b']].itertuples()]) + assert_series_equal(result, expected) + + result = df[['a', 'ts']].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[['a', 'ts']].itertuples()]) + assert_series_equal(result, expected) + + # gh-18919 + df = DataFrame({'x': Series([['a', 'b'], ['q']]), + 'y': Series([['z'], ['q', 't']])}) + df.index = MultiIndex.from_tuples([('i0', 'j0'), ('i1', 'j1')]) + + result = df.apply( + lambda row: [el for el in row['x'] if el in row['y']], + axis=1) + expected = Series([[], ['q']], index=df.index) + assert_series_equal(result, expected) + + def test_infer_output_shape_columns(self): + # gh-18573 + + df = DataFrame({'number': [1., 2.], + 'string': ['foo', 'bar'], + 'datetime': [pd.Timestamp('2017-11-29 03:30:00'), + pd.Timestamp('2017-11-29 03:45:00')]}) + result = df.apply(lambda row: (row.number, row.string), axis=1) + expected = Series([t[2:] for t in df.itertuples()]) + assert_series_equal(result, expected) + + def test_infer_output_shape_listlike_columns(self): + # gh-16353 + + df = DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C']) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + expected = Series([[1, 2, 3] for t in df.itertuples()]) + assert_series_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1) + expected = Series([[1, 2] for t in df.itertuples()]) + assert_series_equal(result, expected) + + # gh-17970 + df = DataFrame({"a": [1, 2, 3]}, index=list('abc')) + + result = df.apply(lambda row: np.ones(1), axis=1) + expected = Series([np.ones(1) for t in df.itertuples()], + index=df.index) + assert_series_equal(result, expected) + + result = df.apply(lambda row: np.ones(2), axis=1) + expected = Series([np.ones(2) for t in df.itertuples()], + index=df.index) + assert_series_equal(result, expected) + + # gh-17892 + df = pd.DataFrame({'a': [pd.Timestamp('2010-02-01'), + pd.Timestamp('2010-02-04'), + pd.Timestamp('2010-02-05'), + pd.Timestamp('2010-02-06')], + 'b': [9, 5, 4, 3], + 'c': [5, 3, 4, 2], + 'd': [1, 2, 3, 4]}) + + def fun(x): + return (1, 2) + + result = df.apply(fun, axis=1) + expected = Series([(1, 2) for t in df.itertuples()]) + assert_series_equal(result, expected) + + def test_consistent_coerce_for_shapes(self): + # we want column names to NOT be propagated + # just because the shape matches the input shape + df = DataFrame(np.random.randn(4, 3), columns=['A', 'B', 'C']) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + expected = Series([[1, 2, 3] for t in df.itertuples()]) + assert_series_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1) + expected = Series([[1, 2] for t in df.itertuples()]) + assert_series_equal(result, expected) + + def test_consistent_names(self): + # if a Series is returned, we should use the resulting index names + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + result = df.apply(lambda x: Series([1, 2, 3], + index=['test', 'other', 'cols']), + axis=1) + expected = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['test', 'other', 'cols']) + assert_frame_equal(result, expected) + + result = df.apply( + lambda x: pd.Series([1, 2], index=['test', 'other']), axis=1) + expected = DataFrame( + np.tile(np.arange(2, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['test', 'other']) + assert_frame_equal(result, expected) + + def test_result_type(self): + # result_type should be consistent no matter which + # path we take in the code + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') + expected = df.copy() + expected.columns = [0, 1, 2] + assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1, result_type='expand') + expected = df[['A', 'B']].copy() + expected.columns = [0, 1] + assert_frame_equal(result, expected) + + # broadcast result + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') + expected = df.copy() + assert_frame_equal(result, expected) + + columns = ['other', 'col', 'names'] + result = df.apply( + lambda x: pd.Series([1, 2, 3], + index=columns), + axis=1, + result_type='broadcast') + expected = df.copy() + assert_frame_equal(result, expected) + + # series result + result = df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1) + expected = df.copy() + assert_frame_equal(result, expected) + + # series result with other index + columns = ['other', 'col', 'names'] + result = df.apply( + lambda x: pd.Series([1, 2, 3], index=columns), + axis=1) + expected = df.copy() + expected.columns = columns + assert_frame_equal(result, expected) + + @pytest.mark.parametrize("result_type", ['foo', 1]) + def test_result_type_error(self, result_type): + # allowed result_type + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + with pytest.raises(ValueError): + df.apply(lambda x: [1, 2, 3], + axis=1, + result_type=result_type) + + @pytest.mark.parametrize( + "box", + [lambda x: list(x), + lambda x: tuple(x), + lambda x: np.array(x, dtype='int64')], + ids=['list', 'tuple', 'array']) + def test_consistency_for_boxed(self, box): + # passing an array or list should not affect the output shape + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + result = df.apply(lambda x: box([1, 2]), axis=1) + expected = Series([box([1, 2]) for t in df.itertuples()]) + assert_series_equal(result, expected) + + result = df.apply(lambda x: box([1, 2]), axis=1, result_type='expand') + expected = DataFrame( + np.tile(np.arange(2, dtype='int64'), 6).reshape(6, -1) + 1) + assert_frame_equal(result, expected) + + def zip_frames(*frames): """ take a list of frames, zip the columns together for each @@ -657,13 +979,13 @@ def test_non_callable_aggregates(self): # Function aggregate result = df.agg({'A': 'count'}) - expected = pd.Series({'A': 2}) + expected = Series({'A': 2}) assert_series_equal(result, expected) # Non-function aggregate result = df.agg({'A': 'size'}) - expected = pd.Series({'A': 3}) + expected = Series({'A': 3}) assert_series_equal(result, expected) diff --git a/pandas/tests/sparse/frame/test_apply.py b/pandas/tests/sparse/frame/test_apply.py new file mode 100644 index 0000000000000..07e4b1bf7c913 --- /dev/null +++ b/pandas/tests/sparse/frame/test_apply.py @@ -0,0 +1,92 @@ +import pytest +import numpy as np +from pandas import SparseDataFrame, DataFrame, Series, bdate_range +from pandas.core import nanops +from pandas.util import testing as tm + + +@pytest.fixture +def dates(): + return bdate_range('1/1/2011', periods=10) + + +@pytest.fixture +def empty(): + return SparseDataFrame() + + +@pytest.fixture +def frame(dates): + data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6], + 'C': np.arange(10, dtype=np.float64), + 'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]} + + return SparseDataFrame(data, index=dates) + + +@pytest.fixture +def fill_frame(frame): + values = frame.values.copy() + values[np.isnan(values)] = 2 + + return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], + default_fill_value=2, + index=frame.index) + + +def test_apply(frame): + applied = frame.apply(np.sqrt) + assert isinstance(applied, SparseDataFrame) + tm.assert_almost_equal(applied.values, np.sqrt(frame.values)) + + # agg / broadcast + with tm.assert_produces_warning(FutureWarning): + broadcasted = frame.apply(np.sum, broadcast=True) + assert isinstance(broadcasted, SparseDataFrame) + + with tm.assert_produces_warning(FutureWarning): + exp = frame.to_dense().apply(np.sum, broadcast=True) + tm.assert_frame_equal(broadcasted.to_dense(), exp) + + applied = frame.apply(np.sum) + tm.assert_series_equal(applied, + frame.to_dense().apply(nanops.nansum)) + + +def test_apply_fill(fill_frame): + applied = fill_frame.apply(np.sqrt) + assert applied['A'].fill_value == np.sqrt(2) + + +def test_apply_empty(empty): + assert empty.apply(np.sqrt) is empty + + +def test_apply_nonuq(): + orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'a', 'c']) + sparse = orig.to_sparse() + res = sparse.apply(lambda s: s[0], axis=1) + exp = orig.apply(lambda s: s[0], axis=1) + + # dtype must be kept + assert res.dtype == np.int64 + + # ToDo: apply must return subclassed dtype + assert isinstance(res, Series) + tm.assert_series_equal(res.to_dense(), exp) + + # df.T breaks + sparse = orig.T.to_sparse() + res = sparse.apply(lambda s: s[0], axis=0) # noqa + exp = orig.T.apply(lambda s: s[0], axis=0) + + # TODO: no non-unique columns supported in sparse yet + # tm.assert_series_equal(res.to_dense(), exp) + + +def test_applymap(frame): + # just test that it works + result = frame.applymap(lambda x: x * 2) + assert isinstance(result, SparseDataFrame) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 54f567bcd2a8c..29fad3c8eefaf 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -621,52 +621,6 @@ def test_append(self): tm.assert_sp_frame_equal(appended.iloc[:, :3], self.frame.iloc[:, :3], exact_indices=False) - def test_apply(self): - applied = self.frame.apply(np.sqrt) - assert isinstance(applied, SparseDataFrame) - tm.assert_almost_equal(applied.values, np.sqrt(self.frame.values)) - - applied = self.fill_frame.apply(np.sqrt) - assert applied['A'].fill_value == np.sqrt(2) - - # agg / broadcast - broadcasted = self.frame.apply(np.sum, broadcast=True) - assert isinstance(broadcasted, SparseDataFrame) - - exp = self.frame.to_dense().apply(np.sum, broadcast=True) - tm.assert_frame_equal(broadcasted.to_dense(), exp) - - assert self.empty.apply(np.sqrt) is self.empty - - from pandas.core import nanops - applied = self.frame.apply(np.sum) - tm.assert_series_equal(applied, - self.frame.to_dense().apply(nanops.nansum)) - - def test_apply_nonuq(self): - orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], - index=['a', 'a', 'c']) - sparse = orig.to_sparse() - res = sparse.apply(lambda s: s[0], axis=1) - exp = orig.apply(lambda s: s[0], axis=1) - # dtype must be kept - assert res.dtype == np.int64 - # ToDo: apply must return subclassed dtype - assert isinstance(res, pd.Series) - tm.assert_series_equal(res.to_dense(), exp) - - # df.T breaks - sparse = orig.T.to_sparse() - res = sparse.apply(lambda s: s[0], axis=0) # noqa - exp = orig.T.apply(lambda s: s[0], axis=0) - # TODO: no non-unique columns supported in sparse yet - # tm.assert_series_equal(res.to_dense(), exp) - - def test_applymap(self): - # just test that it works - result = self.frame.applymap(lambda x: x * 2) - assert isinstance(result, SparseDataFrame) - def test_astype(self): sparse = pd.SparseDataFrame({'A': SparseArray([1, 2, 3, 4], dtype=np.int64), From a44efdbefdedb65902da3f0c84db507e70f1679a Mon Sep 17 00:00:00 2001 From: cbertinato Date: Wed, 7 Feb 2018 10:25:38 -0500 Subject: [PATCH 067/217] BUG: Fixes rounding error in Timestamp.floor() (#19240) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslibs/timestamps.pyx | 60 +++++++++++++------ pandas/core/indexes/datetimelike.py | 17 +----- .../indexes/datetimes/test_scalar_compat.py | 21 +++++++ .../tests/scalar/timestamp/test_unary_ops.py | 25 +++++++- 5 files changed, 90 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 1c6b698605521..a7300f7d1ceb0 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -620,6 +620,7 @@ Datetimelike - Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) - Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) +- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Timezones diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 47179a4e1d761..ed77916a1d887 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -58,6 +58,46 @@ cdef inline object create_timestamp_from_ts(int64_t value, return ts_base +def round_ns(values, rounder, freq): + """ + Applies rounding function at given frequency + + Parameters + ---------- + values : int, :obj:`ndarray` + rounder : function + freq : str, obj + + Returns + ------- + int or :obj:`ndarray` + """ + from pandas.tseries.frequencies import to_offset + unit = to_offset(freq).nanos + if unit < 1000: + # for nano rounding, work with the last 6 digits separately + # due to float precision + buff = 1000000 + r = (buff * (values // buff) + unit * + (rounder((values % buff) * (1 / float(unit)))).astype('i8')) + else: + if unit % 1000 != 0: + msg = 'Precision will be lost using frequency: {}' + warnings.warn(msg.format(freq)) + + # GH19206 + # to deal with round-off when unit is large + if unit >= 1e9: + divisor = 10 ** int(np.log10(unit / 1e7)) + else: + divisor = 10 + + r = (unit * rounder((values * (divisor / float(unit))) / divisor) + .astype('i8')) + + return r + + # This is PITA. Because we inherit from datetime, which has very specific # construction requirements, we need to do object instantiation in python # (see Timestamp class above). This will serve as a C extension type that @@ -581,28 +621,12 @@ class Timestamp(_Timestamp): return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) def _round(self, freq, rounder): - - cdef: - int64_t unit, r, value, buff = 1000000 - object result - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos if self.tz is not None: value = self.tz_localize(None).value else: value = self.value - if unit < 1000 and unit % 1000 != 0: - # for nano rounding, work with the last 6 digits separately - # due to float precision - r = (buff * (value // buff) + unit * - (rounder((value % buff) / float(unit))).astype('i8')) - elif unit >= 1000 and unit % 1000 != 0: - msg = 'Precision will be lost using frequency: {}' - warnings.warn(msg.format(freq)) - r = (unit * rounder(value / float(unit)).astype('i8')) - else: - r = (unit * rounder(value / float(unit)).astype('i8')) + + r = round_ns(value, rounder, freq) result = Timestamp(r, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8e77c7a7fa48c..4a526955d9bf4 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -36,6 +36,7 @@ from pandas._libs import lib, iNaT, NaT from pandas._libs.tslibs.period import Period from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds +from pandas._libs.tslibs.timestamps import round_ns from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly @@ -90,23 +91,9 @@ class TimelikeOps(object): """) def _round(self, freq, rounder): - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos # round the local times values = _ensure_datetimelike_to_i8(self) - if unit < 1000 and unit % 1000 != 0: - # for nano rounding, work with the last 6 digits separately - # due to float precision - buff = 1000000 - result = (buff * (values // buff) + unit * - (rounder((values % buff) / float(unit))).astype('i8')) - elif unit >= 1000 and unit % 1000 != 0: - msg = 'Precision will be lost using frequency: {}' - warnings.warn(msg.format(freq)) - result = (unit * rounder(values / float(unit)).astype('i8')) - else: - result = (unit * rounder(values / float(unit)).astype('i8')) + result = round_ns(values, rounder, freq) result = self._maybe_mask_results(result, fill_value=NaT) attribs = self._get_attributes_dict() diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 111f68ba14775..83e7a0cd68d63 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -126,6 +126,27 @@ def test_round(self, tz): ts = '2016-10-17 12:00:00.001501031' DatetimeIndex([ts]).round('1010ns') + @pytest.mark.parametrize('test_input, rounder, freq, expected', [ + (['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']), + (['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']), + (['2117-01-01 00:00:45.000000012'], 'floor', '10ns', + ['2117-01-01 00:00:45.000000010']), + (['1823-01-01 00:00:01.000000012'], 'ceil', '10ns', + ['1823-01-01 00:00:01.000000020']), + (['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']), + (['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']), + (('NaT', '1823-01-01 00:00:01'), 'floor', '1s', + ('NaT', '1823-01-01 00:00:01')), + (('NaT', '1823-01-01 00:00:01'), 'ceil', '1s', + ('NaT', '1823-01-01 00:00:01')) + ]) + def test_ceil_floor_edge(self, tz, test_input, rounder, freq, expected): + dt = DatetimeIndex(list(test_input)) + func = getattr(dt, rounder) + result = func(freq) + expected = DatetimeIndex(list(expected)) + assert expected.equals(result) + # ---------------------------------------------------------------- # DatetimeIndex.normalize diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 70c7308dd3991..8a6989c909cb2 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -10,7 +10,7 @@ from pandas.compat import PY3 from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR -from pandas import Timestamp +from pandas import Timestamp, NaT class TestTimestampUnaryOps(object): @@ -93,6 +93,29 @@ def test_round_frequencies(self, freq, expected): result = stamp.round(freq=freq) assert result == expected + @pytest.mark.parametrize('test_input, rounder, freq, expected', [ + ('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'), + ('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'), + ('2117-01-01 00:00:45.000000012', 'floor', '10ns', + '2117-01-01 00:00:45.000000010'), + ('1823-01-01 00:00:01.000000012', 'ceil', '10ns', + '1823-01-01 00:00:01.000000020'), + ('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'), + ('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01'), + ('NaT', 'floor', '1s', 'NaT'), + ('NaT', 'ceil', '1s', 'NaT') + ]) + def test_ceil_floor_edge(self, test_input, rounder, freq, expected): + dt = Timestamp(test_input) + func = getattr(dt, rounder) + result = func(freq) + + if dt is NaT: + assert result is NaT + else: + expected = Timestamp(expected) + assert result == expected + def test_ceil(self): dt = Timestamp('20130101 09:10:11') result = dt.ceil('D') From a10f2e084568dac30b63dba8a565fa2adde1dfb1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 8 Feb 2018 01:57:19 +0100 Subject: [PATCH 068/217] DOC: some clean-up of the apply docs (follow-up #18577) (#19573) --- doc/source/basics.rst | 16 ++++++++------- doc/source/whatsnew/v0.23.0.txt | 23 +++++++++++---------- pandas/core/frame.py | 36 ++++++++++++++++++--------------- pandas/core/sparse/frame.py | 18 ++++++++++------- 4 files changed, 52 insertions(+), 41 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index fb9e5a6cc75cb..749d4be11ad45 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -774,9 +774,9 @@ We encourage you to view the source code of :meth:`~DataFrame.pipe`. Row or Column-wise Function Application ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Arbitrary functions can be applied along the axes of a DataFrame or Panel +Arbitrary functions can be applied along the axes of a DataFrame using the :meth:`~DataFrame.apply` method, which, like the descriptive -statistics methods, take an optional ``axis`` argument: +statistics methods, takes an optional ``axis`` argument: .. ipython:: python @@ -794,13 +794,15 @@ The :meth:`~DataFrame.apply` method will also dispatch on a string method name. df.apply('mean', axis=1) The return type of the function passed to :meth:`~DataFrame.apply` affects the -type of the ultimate output from DataFrame.apply +type of the final output from ``DataFrame.apply`` for the default behaviour: -* If the applied function returns a ``Series``, the ultimate output is a ``DataFrame``. +* If the applied function returns a ``Series``, the final output is a ``DataFrame``. The columns match the index of the ``Series`` returned by the applied function. -* If the applied function returns any other type, the ultimate output is a ``Series``. -* A ``result_type`` kwarg is accepted with the options: ``reduce``, ``broadcast``, and ``expand``. - These will determine how list-likes return results expand (or not) to a ``DataFrame``. +* If the applied function returns any other type, the final output is a ``Series``. + +This default behaviour can be overridden using the ``result_type``, which +accepts three options: ``reduce``, ``broadcast``, and ``expand``. +These will determine how list-likes return values expand (or not) to a ``DataFrame``. :meth:`~DataFrame.apply` combined with some cleverness can be used to answer many questions about a data set. For example, suppose we wanted to extract the date where the diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a7300f7d1ceb0..7782e5f1ffa56 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -334,20 +334,20 @@ Convert to an xarray DataArray .. _whatsnew_0230.api_breaking.apply: -Apply Changes -~~~~~~~~~~~~~ +Changes to make output of ``DataFrame.apply`` consistent +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case -where a list-like (e.g. ``tuple`` or ``list`` is returned), (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, -:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`) +where a list-like (e.g. ``tuple`` or ``list`` is returned) (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, +:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`). .. ipython:: python df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, columns=['A', 'B', 'C']) df -Previous Behavior. If the returned shape happened to match the original columns, this would return a ``DataFrame``. +Previous Behavior: if the returned shape happened to match the length of original columns, this would return a ``DataFrame``. If the return shape did not match, a ``Series`` with lists was returned. .. code-block:: python @@ -373,7 +373,7 @@ If the return shape did not match, a ``Series`` with lists was returned. dtype: object -New Behavior. The behavior is consistent. These will *always* return a ``Series``. +New Behavior: When the applied function returns a list-like, this will now *always* return a ``Series``. .. ipython:: python @@ -386,8 +386,9 @@ To have expanded columns, you can use ``result_type='expand'`` df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') -To have broadcast the result across, you can use ``result_type='broadcast'``. The shape -must match the original columns. +To broadcast the result across the original columns (the old behaviour for +list-likes of the correct length), you can use ``result_type='broadcast'``. +The shape must match the original columns. .. ipython:: python @@ -397,7 +398,7 @@ Returning a ``Series`` allows one to control the exact return structure and colu .. ipython:: python - df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1) + df.apply(lambda x: Series([1, 2, 3], index=['D', 'E', 'F']]), axis=1) .. _whatsnew_0230.api_breaking.build_changes: @@ -523,8 +524,8 @@ Deprecations - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) - :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`) -- The ``broadcast`` parameter of ``.apply()`` is removed in favor of ``result_type='broadcast'`` (:issue:`18577`) -- The ``reduce`` parameter of ``.apply()`` is removed in favor of ``result_type='reduce'`` (:issue:`18577`) +- The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) +- The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9487f51919108..28923f0fbf240 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4822,12 +4822,12 @@ def aggregate(self, func, axis=0, *args, **kwargs): def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, result_type=None, args=(), **kwds): - """Applies function along input axis of DataFrame. + """Applies function along an axis of the DataFrame. Objects passed to functions are Series objects having index either the DataFrame's index (axis=0) or the columns (axis=1). - Return type depends on whether passed function aggregates, or the - reduce argument if the DataFrame is empty. + Final return type depends on the return type of the applied function, + or on the `result_type` argument. Parameters ---------- @@ -4863,15 +4863,18 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, by result_type='reduce'. result_type : {'expand', 'reduce', 'broadcast, None} - These only act when axis=1 {columns} + These only act when axis=1 {columns}: + * 'expand' : list-like results will be turned into columns. * 'reduce' : return a Series if possible rather than expanding list-like results. This is the opposite to 'expand'. * 'broadcast' : results will be broadcast to the original shape of the frame, the original index & columns will be retained. - * None : list-like results will be returned as a list - in a single column. However if the apply function - returns a Series these are expanded to columns. + + The default behaviour (None) depends on the return value of the + applied function: list-like results will be returned as a Series + of those. However if the apply function returns a Series these + are expanded to columns. .. versionadded:: 0.23.0 @@ -4893,8 +4896,8 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, We use this DataFrame to illustrate - >>> df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, - ... columns=['A', 'B', 'C']) + >>> df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + ... columns=['A', 'B', 'C']) >>> df A B C 0 1 2 3 @@ -4904,7 +4907,8 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 4 1 2 3 5 1 2 3 - Using a ufunc + Using a numpy universal function (in this case the same as + ``np.sqrt(df)``): >>> df.apply(np.sqrt) A B C @@ -4954,8 +4958,8 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 4 1 2 5 1 2 - Return a Series inside the function is similar to passing - Passing result_type='expand'. The resulting column names + Returning a Series inside the function is similar to passing + ``result_type='expand'``. The resulting column names will be the Series index. >>> df.apply(lambda x: Series([1, 2], index=['foo', 'bar']), axis=1) @@ -4967,10 +4971,10 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 4 1 2 5 1 2 - - Passing result_type='broadcast' will take a same shape - result, whether list-like or scalar and broadcast it - along the axis. The resulting column names will be the originals. + Passing ``result_type='broadcast'`` will ensure the same shape + result, whether list-like or scalar is returned by the function, + and broadcast it along the axis. The resulting column names will + be the originals. >>> df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') A B C diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 371377ce2899c..19b126216db81 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -861,14 +861,18 @@ def apply(self, func, axis=0, broadcast=None, reduce=None, by result_type='reduce'. result_type : {'expand', 'reduce', 'broadcast, None} - These only act when axis=1 {columns} - * 'expand' : list-like results will be turned into columns + These only act when axis=1 {columns}: + + * 'expand' : list-like results will be turned into columns. * 'reduce' : return a Series if possible rather than expanding - list-like results. This is the opposite to 'expand' - * 'broadcast' : scalar results will be broadcast to all columns - * None : list-like results will be returned as a list - in a single column. However if the apply function - returns a Series these are expanded to columns. + list-like results. This is the opposite to 'expand'. + * 'broadcast' : results will be broadcast to the original shape + of the frame, the original index & columns will be retained. + + The default behaviour (None) depends on the return value of the + applied function: list-like results will be returned as a Series + of those. However if the apply function returns a Series these + are expanded to columns. .. versionadded:: 0.23.0 From b1e3422c9476a57181989f160616e352e5f90022 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 7 Feb 2018 17:09:48 -0800 Subject: [PATCH 069/217] Remove duplicated logic from period_helper (#19540) --- pandas/_libs/src/period_helper.c | 519 +++++------------------------- pandas/_libs/src/period_helper.h | 29 +- pandas/_libs/tslibs/ccalendar.pyx | 3 +- pandas/_libs/tslibs/period.pyx | 178 +++++++--- 4 files changed, 216 insertions(+), 513 deletions(-) diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index 8f1c527a68455..570f20b790750 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -14,6 +14,7 @@ See end of file for stuff pandas uses (search for 'pandas'). */ #include "period_helper.h" +#include "../datetime/np_datetime.h" /* ------------------------------------------------------------------ * Code derived from scikits.timeseries @@ -37,193 +38,39 @@ static int floordiv(int x, int divisor) { } } -/* Table with day offsets for each month (0-based, without and with leap) */ -static int month_offset[2][13] = { - {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, - {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}}; - -/* Table of number of days in a month (0-based, without and with leap) */ -static int days_in_month[2][12] = { - {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, - {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; - -/* Return 1/0 iff year points to a leap year. - * Assumes GREGORIAN_CALENDAR */ -static int dInfoCalc_Leapyear(npy_int64 year) { - return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); -} - -/* Return the day of the week for the given absolute date. */ -static int dInfoCalc_DayOfWeek(npy_int64 absdate) { - int day_of_week; - - if (absdate >= 1) { - day_of_week = (absdate - 1) % 7; - } else { - day_of_week = 6 - ((-absdate) % 7); - } - return day_of_week; -} static int monthToQuarter(int month) { return ((month - 1) / 3) + 1; } -/* Return the year offset, that is the absolute date of the day - 31.12.(year-1) - Assumes GREGORIAN_CALENDAR - - This is equivalent to: - - (datetime(year, 1, 1) - datetime(1970, 1, 1)).days - - Note: - For the Julian calendar we shift the absdate (which is measured - using the Gregorian Epoch) value by two days because the Epoch - (0001-01-01) in the Julian calendar lies 2 days before the Epoch in - the Gregorian calendar. */ -static int dInfoCalc_YearOffset(npy_int64 year) { - year--; - if (year >= 0 || -1 / 4 == -1) - return year * 365 + year / 4 - year / 100 + year / 400; - else - return year * 365 + (year - 3) / 4 - (year - 99) / 100 + - (year - 399) / 400; -} - -/* Set the instance's value using the given date and time. +/* Find the absdate (days elapsed since datetime(1, 1, 1) + * for the given year/month/day. * Assumes GREGORIAN_CALENDAR */ -static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int year, - int month, int day, int hour, - int minute, double second) { +static npy_int64 dInfoCalc_SetFromDateAndTime(int year, int month, int day) { /* Calculate the absolute date */ - { - int leap; - npy_int64 absdate; - int yearoffset; - - /* Range check */ - Py_AssertWithArg(year > -(INT_MAX / 366) && year < (INT_MAX / 366), - PyExc_ValueError, "year out of range: %i", year); - - /* Is it a leap year ? */ - leap = dInfoCalc_Leapyear(year); + pandas_datetimestruct dts; + npy_int64 unix_date; - /* Negative month values indicate months relative to the years end */ - if (month < 0) month += 13; - Py_AssertWithArg(month >= 1 && month <= 12, PyExc_ValueError, - "month out of range (1-12): %i", month); - - /* Negative values indicate days relative to the months end */ - if (day < 0) day += days_in_month[leap][month - 1] + 1; - Py_AssertWithArg(day >= 1 && day <= days_in_month[leap][month - 1], - PyExc_ValueError, "day out of range: %i", day); - - yearoffset = dInfoCalc_YearOffset(year); - if (yearoffset == INT_ERR_CODE) goto onError; - - absdate = day + month_offset[leap][month - 1] + yearoffset; - - dinfo->absdate = absdate; - - dinfo->year = year; - dinfo->month = month; - dinfo->quarter = ((month - 1) / 3) + 1; - dinfo->day = day; - - dinfo->day_of_week = dInfoCalc_DayOfWeek(absdate); - dinfo->day_of_year = (short)(absdate - yearoffset); - } - - /* Calculate the absolute time */ - { - Py_AssertWithArg(hour >= 0 && hour <= 23, PyExc_ValueError, - "hour out of range (0-23): %i", hour); - Py_AssertWithArg(minute >= 0 && minute <= 59, PyExc_ValueError, - "minute out of range (0-59): %i", minute); - Py_AssertWithArg( - second >= (double)0.0 && - (second < (double)60.0 || - (hour == 23 && minute == 59 && second < (double)61.0)), - PyExc_ValueError, - "second out of range (0.0 - <60.0; <61.0 for 23:59): %f", second); - - dinfo->abstime = (double)(hour * 3600 + minute * 60) + second; - - dinfo->hour = hour; - dinfo->minute = minute; - dinfo->second = second; - } - return 0; - -onError: - return INT_ERR_CODE; + memset(&dts, 0, sizeof(pandas_datetimestruct)); + dts.year = year; + dts.month = month; + dts.day = day; + unix_date = pandas_datetimestruct_to_datetime(PANDAS_FR_D, &dts); + return ORD_OFFSET + unix_date; } /* Sets the date part of the date_info struct - Assumes GREGORIAN_CALENDAR - - XXX This could also be done using some integer arithmetics rather - than with this iterative approach... */ + Assumes GREGORIAN_CALENDAR */ static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, npy_int64 absdate) { - register npy_int64 year; - npy_int64 yearoffset; - int leap, dayoffset; - int *monthoffset; - - /* Approximate year */ - year = (npy_int64)(((double)absdate) / 365.2425); - - if (absdate > 0) year++; - - /* Apply corrections to reach the correct year */ - while (1) { - /* Calculate the year offset */ - yearoffset = dInfoCalc_YearOffset(year); - if (yearoffset == INT_ERR_CODE) goto onError; - - /* Backward correction: absdate must be greater than the - yearoffset */ - if (yearoffset >= absdate) { - year--; - continue; - } + pandas_datetimestruct dts; - dayoffset = absdate - yearoffset; - leap = dInfoCalc_Leapyear(year); + pandas_datetime_to_datetimestruct(absdate - ORD_OFFSET, PANDAS_FR_D, &dts); + dinfo->year = dts.year; + dinfo->month = dts.month; + dinfo->day = dts.day; - /* Forward correction: non leap years only have 365 days */ - if (dayoffset > 365 && !leap) { - year++; - continue; - } - break; - } - - dinfo->year = year; - - /* Now iterate to find the month */ - monthoffset = month_offset[leap]; - { - register int month; - - for (month = 1; month < 13; month++) { - if (monthoffset[month] >= dayoffset) break; - } - - dinfo->month = month; - dinfo->quarter = monthToQuarter(month); - dinfo->day = dayoffset - month_offset[leap][month - 1]; - } - - dinfo->day_of_week = dInfoCalc_DayOfWeek(absdate); - dinfo->day_of_year = dayoffset; dinfo->absdate = absdate; - return 0; - -onError: - return INT_ERR_CODE; } /////////////////////////////////////////////// @@ -358,9 +205,6 @@ PANDAS_INLINE npy_int64 transform_via_day(npy_int64 ordinal, char relation, asfreq_info *af_info, freq_conv_func first_func, freq_conv_func second_func) { - // printf("transform_via_day(%ld, %ld, %d)\n", ordinal, - // af_info->intraday_conversion_factor, - // af_info->intraday_conversion_upsample); npy_int64 result; result = (*first_func)(ordinal, relation, af_info); @@ -373,28 +217,26 @@ static npy_int64 DtoB_weekday(npy_int64 absdate) { return (((absdate) / 7) * 5) + (absdate) % 7 - BDAY_OFFSET; } -static npy_int64 DtoB_WeekendToMonday(npy_int64 absdate, int day_of_week) { - if (day_of_week > 4) { - // change to Monday after weekend - absdate += (7 - day_of_week); - } - return DtoB_weekday(absdate); -} +static npy_int64 DtoB(struct date_info *dinfo, int roll_back) { + int day_of_week = dayofweek(dinfo->year, dinfo->month, dinfo->day); + npy_int64 absdate = dinfo->absdate; -static npy_int64 DtoB_WeekendToFriday(npy_int64 absdate, int day_of_week) { - if (day_of_week > 4) { - // change to friday before weekend - absdate -= (day_of_week - 4); + if (roll_back == 1) { + if (day_of_week > 4) { + // change to friday before weekend + absdate -= (day_of_week - 4); + } + } else { + if (day_of_week > 4) { + // change to Monday after weekend + absdate += (7 - day_of_week); + } } return DtoB_weekday(absdate); } static npy_int64 absdate_from_ymd(int y, int m, int d) { - struct date_info tempDate; - if (dInfoCalc_SetFromDateAndTime(&tempDate, y, m, d, 0, 0, 0)) { - return INT_ERR_CODE; - } - return tempDate.absdate; + return dInfoCalc_SetFromDateAndTime(y, m, d); } //************ FROM DAILY *************** @@ -403,8 +245,7 @@ static npy_int64 asfreq_DTtoA(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; ordinal = downsample_daytime(ordinal, af_info, 0); - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET)) - return INT_ERR_CODE; + dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); if (dinfo.month > af_info->to_a_year_end) { return (npy_int64)(dinfo.year + 1 - BASE_YEAR); } else { @@ -415,8 +256,7 @@ static npy_int64 asfreq_DTtoA(npy_int64 ordinal, char relation, static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year, int *quarter) { struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET)) - return INT_ERR_CODE; + dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); if (af_info->to_q_year_end != 12) { dinfo.month -= af_info->to_q_year_end; if (dinfo.month <= 0) { @@ -424,11 +264,10 @@ static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year, } else { dinfo.year += 1; } - dinfo.quarter = monthToQuarter(dinfo.month); } *year = dinfo.year; - *quarter = dinfo.quarter; + *quarter = monthToQuarter(dinfo.month); return 0; } @@ -439,10 +278,7 @@ static npy_int64 asfreq_DTtoQ(npy_int64 ordinal, char relation, ordinal = downsample_daytime(ordinal, af_info, 0); - if (DtoQ_yq(ordinal, af_info, &year, &quarter) == INT_ERR_CODE) { - return INT_ERR_CODE; - } - + DtoQ_yq(ordinal, af_info, &year, &quarter); return (npy_int64)((year - BASE_YEAR) * 4 + quarter - 1); } @@ -452,8 +288,7 @@ static npy_int64 asfreq_DTtoM(npy_int64 ordinal, char relation, ordinal = downsample_daytime(ordinal, af_info, 0); - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET)) - return INT_ERR_CODE; + dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); return (npy_int64)((dinfo.year - BASE_YEAR) * 12 + dinfo.month - 1); } @@ -467,17 +302,15 @@ static npy_int64 asfreq_DTtoW(npy_int64 ordinal, char relation, static npy_int64 asfreq_DTtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; + int roll_back; ordinal = downsample_daytime(ordinal, af_info, 0); - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET)) - return INT_ERR_CODE; + dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); - if (relation == 'S') { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } + // This usage defines roll_back the opposite way from the others + roll_back = (relation == 'S') ? 1 : 0; + return DtoB(&dinfo, roll_back); } // all intra day calculations are now done within one function @@ -570,15 +403,12 @@ static npy_int64 asfreq_WtoW(npy_int64 ordinal, char relation, static npy_int64 asfreq_WtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_WtoDT(ordinal, relation, af_info) + ORD_OFFSET)) - return INT_ERR_CODE; + int roll_back; + dInfoCalc_SetFromAbsDate( + &dinfo, asfreq_WtoDT(ordinal, relation, af_info) + ORD_OFFSET); - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } + roll_back = (relation == 'S') ? 0 : 1; + return DtoB(&dinfo, roll_back); } //************ FROM MONTHLY *************** @@ -596,8 +426,7 @@ static npy_int64 asfreq_MtoDT(npy_int64 ordinal, char relation, ordinal += 1; } MtoD_ym(ordinal, &y, &m); - if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) - return INT_ERR_CODE; + absdate = absdate_from_ymd(y, m, 1); ordinal = absdate - ORD_OFFSET; if (relation == 'E') { @@ -628,16 +457,13 @@ static npy_int64 asfreq_MtoW(npy_int64 ordinal, char relation, static npy_int64 asfreq_MtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; + int roll_back; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_MtoDT(ordinal, relation, af_info) + ORD_OFFSET)) - return INT_ERR_CODE; + dInfoCalc_SetFromAbsDate( + &dinfo, asfreq_MtoDT(ordinal, relation, af_info) + ORD_OFFSET); - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } + roll_back = (relation == 'S') ? 0 : 1; + return DtoB(&dinfo, roll_back); } //************ FROM QUARTERLY *************** @@ -667,8 +493,7 @@ static npy_int64 asfreq_QtoDT(npy_int64 ordinal, char relation, QtoD_ym(ordinal, &y, &m, af_info); - if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) - return INT_ERR_CODE; + absdate = absdate_from_ymd(y, m, 1); if (relation == 'E') { absdate -= 1; @@ -704,15 +529,12 @@ static npy_int64 asfreq_QtoW(npy_int64 ordinal, char relation, static npy_int64 asfreq_QtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_QtoDT(ordinal, relation, af_info) + ORD_OFFSET)) - return INT_ERR_CODE; + int roll_back; + dInfoCalc_SetFromAbsDate( + &dinfo, asfreq_QtoDT(ordinal, relation, af_info) + ORD_OFFSET); - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } + roll_back = (relation == 'S') ? 0 : 1; + return DtoB(&dinfo, roll_back); } //************ FROM ANNUAL *************** @@ -737,10 +559,6 @@ static npy_int64 asfreq_AtoDT(npy_int64 year, char relation, absdate = absdate_from_ymd(year, month, 1); - if (absdate == INT_ERR_CODE) { - return INT_ERR_CODE; - } - if (relation == 'E') { absdate -= 1; } @@ -775,15 +593,12 @@ static npy_int64 asfreq_AtoW(npy_int64 ordinal, char relation, static npy_int64 asfreq_AtoB(npy_int64 ordinal, char relation, asfreq_info *af_info) { struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_AtoDT(ordinal, relation, af_info) + ORD_OFFSET)) - return INT_ERR_CODE; + int roll_back; + dInfoCalc_SetFromAbsDate( + &dinfo, asfreq_AtoDT(ordinal, relation, af_info) + ORD_OFFSET); - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } + roll_back = (relation == 'S') ? 0 : 1; + return DtoB(&dinfo, roll_back); } static npy_int64 nofunc(npy_int64 ordinal, char relation, @@ -815,10 +630,6 @@ void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) { get_freq_group_index(max_value(fromGroup, FR_DAY)), get_freq_group_index(max_value(toGroup, FR_DAY))); - // printf("get_asfreq_info(%d, %d) %ld, %d\n", fromFreq, toFreq, - // af_info->intraday_conversion_factor, - // af_info->intraday_conversion_upsample); - switch (fromGroup) { case FR_WK: af_info->from_week_end = calc_week_end(fromFreq, fromGroup); @@ -1014,8 +825,6 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq) { } double get_abs_time(int freq, npy_int64 date_ordinal, npy_int64 ordinal) { - // printf("get_abs_time %d %lld %lld\n", freq, date_ordinal, ordinal); - int freq_index, day_index, base_index; npy_int64 per_day, start_ord; double unit, result; @@ -1028,23 +837,15 @@ double get_abs_time(int freq, npy_int64 date_ordinal, npy_int64 ordinal) { day_index = get_freq_group_index(FR_DAY); base_index = get_freq_group_index(FR_SEC); - // printf(" indices: day %d, freq %d, base %d\n", day_index, freq_index, - // base_index); - per_day = get_daytime_conversion_factor(day_index, freq_index); unit = get_daytime_conversion_factor(freq_index, base_index); - // printf(" per_day: %lld, unit: %f\n", per_day, unit); - if (base_index < freq_index) { unit = 1 / unit; - // printf(" corrected unit: %f\n", unit); } start_ord = date_ordinal * per_day; - // printf("start_ord: %lld\n", start_ord); result = (double)(unit * (ordinal - start_ord)); - // printf(" result: %f\n", result); return result; } @@ -1062,9 +863,6 @@ static int dInfoCalc_SetFromAbsTime(struct date_info *dinfo, double abstime) { dinfo->hour = hour; dinfo->minute = minute; dinfo->second = second; - - dinfo->abstime = abstime; - return 0; } @@ -1073,19 +871,16 @@ static int dInfoCalc_SetFromAbsTime(struct date_info *dinfo, double abstime) { static int dInfoCalc_SetFromAbsDateTime(struct date_info *dinfo, npy_int64 absdate, double abstime) { /* Bounds check */ - Py_AssertWithArg(abstime >= 0.0 && abstime <= SECONDS_PER_DAY, - PyExc_ValueError, - "abstime out of range (0.0 - 86400.0): %f", abstime); + // The calling function is responsible for ensuring that + // abstime >= 0.0 && abstime <= 86400 /* Calculate the date */ - if (dInfoCalc_SetFromAbsDate(dinfo, absdate)) goto onError; + dInfoCalc_SetFromAbsDate(dinfo, absdate); /* Calculate the time */ - if (dInfoCalc_SetFromAbsTime(dinfo, abstime)) goto onError; + dInfoCalc_SetFromAbsTime(dinfo, abstime); return 0; -onError: - return INT_ERR_CODE; } /* ------------------------------------------------------------------ @@ -1102,19 +897,8 @@ npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, get_asfreq_info(freq1, freq2, &finfo); - // printf("\n%x %d %d %ld %ld\n", func, freq1, freq2, - // finfo.intraday_conversion_factor, -finfo.intraday_conversion_factor); - val = (*func)(period_ordinal, relation, &finfo); - - if (val == INT_ERR_CODE) { - // Py_Error(PyExc_ValueError, "Unable to convert to desired - // frequency."); - goto onError; - } return val; -onError: - return INT_ERR_CODE; } /* generate an ordinal in period space */ @@ -1155,9 +939,7 @@ npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, } if (freq == FR_HR) { - if ((absdays = absdate_from_ymd(year, month, day)) == INT_ERR_CODE) { - goto onError; - } + absdays = absdate_from_ymd(year, month, day); delta = (absdays - ORD_OFFSET); return (npy_int64)(delta * 24 + hour); } @@ -1171,9 +953,7 @@ npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, } if (freq == FR_BUS) { - if ((days = absdate_from_ymd(year, month, day)) == INT_ERR_CODE) { - goto onError; - } + days = absdate_from_ymd(year, month, day); // calculate the current week assuming sunday as last day of a week weeks = (days - BASE_WEEK_TO_DAY_OFFSET) / DAYS_PER_WEEK; // calculate the current weekday (in range 1 .. 7) @@ -1187,10 +967,7 @@ npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, } if (freq_group == FR_WK) { - if ((ordinal = (npy_int64)absdate_from_ymd(year, month, day)) == - INT_ERR_CODE) { - goto onError; - } + ordinal = (npy_int64)absdate_from_ymd(year, month, day); day_adj = freq - FR_WK; return (ordinal - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET; } @@ -1246,32 +1023,6 @@ npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq) { } -// function to generate a nice string representation of the period -// object, originally from DateObject_strftime - -char *c_strftime(struct date_info *tmp, char *fmt) { - struct tm c_date; - char *result; - struct date_info dinfo = *tmp; - int result_len = strlen(fmt) + 50; - - c_date.tm_sec = (int)dinfo.second; - c_date.tm_min = dinfo.minute; - c_date.tm_hour = dinfo.hour; - c_date.tm_mday = dinfo.day; - c_date.tm_mon = dinfo.month - 1; - c_date.tm_year = dinfo.year - 1900; - c_date.tm_wday = (dinfo.day_of_week + 1) % 7; - c_date.tm_yday = dinfo.day_of_year - 1; - c_date.tm_isdst = -1; - - result = malloc(result_len * sizeof(char)); - - strftime(result, result_len, fmt, &c_date); - - return result; -} - int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year) { asfreq_info af_info; int qtr_freq; @@ -1290,12 +1041,11 @@ int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year) { } get_asfreq_info(FR_DAY, qtr_freq, &af_info); - if (DtoQ_yq(daily_ord, &af_info, year, quarter) == INT_ERR_CODE) return -1; - + DtoQ_yq(daily_ord, &af_info, year, quarter); return 0; } -static int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) { +int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) { asfreq_info af_info; int qtr_freq; @@ -1308,37 +1058,13 @@ static int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) { get_asfreq_info(FR_DAY, qtr_freq, &af_info); - if (DtoQ_yq(ordinal, &af_info, year, quarter) == INT_ERR_CODE) - return INT_ERR_CODE; + DtoQ_yq(ordinal, &af_info, year, quarter); if ((qtr_freq % 1000) > 12) *year -= 1; return 0; } -static int _ISOWeek(struct date_info *dinfo) { - int week; - - /* Estimate */ - week = (dinfo->day_of_year - 1) - dinfo->day_of_week + 3; - if (week >= 0) week = week / 7 + 1; - - /* Verify */ - if (week < 0) { - /* The day lies in last week of the previous year */ - if ((week > -2) || (week == -2 && dInfoCalc_Leapyear(dinfo->year - 1))) - week = 53; - else - week = 52; - } else if (week == 53) { - /* Check if the week belongs to year or year+1 */ - if (31 - dinfo->day + dinfo->day_of_week < 3) { - week = 1; - } - } - - return week; -} int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo) { npy_int64 absdate = get_python_ordinal(ordinal, freq); @@ -1353,101 +1079,6 @@ int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo) { absdate += 1; } - if (dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime)) - return INT_ERR_CODE; - + dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime); return 0; } - -int pyear(npy_int64 ordinal, int freq) { - struct date_info dinfo; - get_date_info(ordinal, freq, &dinfo); - return dinfo.year; -} - -int pqyear(npy_int64 ordinal, int freq) { - int year, quarter; - if (_quarter_year(ordinal, freq, &year, &quarter) == INT_ERR_CODE) - return INT_ERR_CODE; - return year; -} - -int pquarter(npy_int64 ordinal, int freq) { - int year, quarter; - if (_quarter_year(ordinal, freq, &year, &quarter) == INT_ERR_CODE) - return INT_ERR_CODE; - return quarter; -} - -int pmonth(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.month; -} - -int pday(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day; -} - -int pweekday(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_week; -} - -int pday_of_week(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_week; -} - -int pday_of_year(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_year; -} - -int pweek(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return _ISOWeek(&dinfo); -} - -int phour(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.hour; -} - -int pminute(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.minute; -} - -int psecond(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return (int)dinfo.second; -} - -int pdays_in_month(npy_int64 ordinal, int freq) { - int days; - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - - days = days_in_month[dInfoCalc_Leapyear(dinfo.year)][dinfo.month - 1]; - return days; -} diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index d3d32f81d1f66..2c74659346b15 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -24,15 +24,6 @@ frequency conversion routines. * declarations from period here */ -#define SECONDS_PER_DAY ((double)86400.0) - -#define Py_AssertWithArg(x, errortype, errorstr, a1) \ - { \ - if (!(x)) { \ - PyErr_Format(errortype, errorstr, a1); \ - goto onError; \ - } \ - } #define Py_Error(errortype, errorstr) \ { \ PyErr_SetString(errortype, errorstr); \ @@ -124,17 +115,13 @@ typedef struct asfreq_info { typedef struct date_info { npy_int64 absdate; - double abstime; double second; int minute; int hour; int day; int month; - int quarter; int year; - int day_of_week; - int day_of_year; } date_info; typedef npy_int64 (*freq_conv_func)(npy_int64, char, asfreq_info *); @@ -155,22 +142,8 @@ int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo); freq_conv_func get_asfreq_func(int fromFreq, int toFreq); void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info); -int pyear(npy_int64 ordinal, int freq); -int pqyear(npy_int64 ordinal, int freq); -int pquarter(npy_int64 ordinal, int freq); -int pmonth(npy_int64 ordinal, int freq); -int pday(npy_int64 ordinal, int freq); -int pweekday(npy_int64 ordinal, int freq); -int pday_of_week(npy_int64 ordinal, int freq); -int pday_of_year(npy_int64 ordinal, int freq); -int pweek(npy_int64 ordinal, int freq); -int phour(npy_int64 ordinal, int freq); -int pminute(npy_int64 ordinal, int freq); -int psecond(npy_int64 ordinal, int freq); -int pdays_in_month(npy_int64 ordinal, int freq); - -char *c_strftime(struct date_info *dinfo, char *fmt); int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year); +int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter); void initialize_daytime_conversion_factor_matrix(void); diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 613e111443636..9bd315b43ea9e 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -191,8 +191,7 @@ cpdef int32_t get_day_of_year(int year, int month, int day) nogil: cdef: bint isleap int32_t mo_off - int32_t doy, dow - int woy + int day_of_year isleap = is_leapyear(year) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e82c9c613c62a..ba17b3d345ac8 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -11,7 +11,9 @@ from numpy cimport int64_t, import_array, ndarray import numpy as np import_array() -from libc.stdlib cimport free +from libc.stdlib cimport free, malloc +from libc.time cimport strftime, tm +from libc.string cimport strlen from pandas.compat import PY2 @@ -33,6 +35,8 @@ from timestamps import Timestamp from timezones cimport is_utc, is_tzlocal, get_utcoffset, get_dst_info from timedeltas cimport delta_to_nanoseconds +cimport ccalendar +from ccalendar cimport dayofweek, get_day_of_year from ccalendar import MONTH_NUMBERS from ccalendar cimport is_leapyear from frequencies cimport (get_freq_code, get_base_alias, @@ -49,17 +53,12 @@ from pandas.tseries import frequencies cdef extern from "period_helper.h": ctypedef struct date_info: - int64_t absdate - double abstime double second int minute int hour int day int month - int quarter int year - int day_of_week - int day_of_year ctypedef struct asfreq_info: int from_week_end @@ -85,28 +84,43 @@ cdef extern from "period_helper.h": int freq) nogil except INT32_MIN int get_date_info(int64_t ordinal, int freq, - date_info *dinfo) nogil except INT32_MIN - - int pyear(int64_t ordinal, int freq) except INT32_MIN - int pqyear(int64_t ordinal, int freq) except INT32_MIN - int pquarter(int64_t ordinal, int freq) except INT32_MIN - int pmonth(int64_t ordinal, int freq) except INT32_MIN - int pday(int64_t ordinal, int freq) except INT32_MIN - int pweekday(int64_t ordinal, int freq) except INT32_MIN - int pday_of_week(int64_t ordinal, int freq) except INT32_MIN - # TODO: pday_of_week and pweekday are identical. Make one an alias instead - # of importing them separately. - int pday_of_year(int64_t ordinal, int freq) except INT32_MIN - int pweek(int64_t ordinal, int freq) except INT32_MIN - int phour(int64_t ordinal, int freq) except INT32_MIN - int pminute(int64_t ordinal, int freq) except INT32_MIN - int psecond(int64_t ordinal, int freq) except INT32_MIN - int pdays_in_month(int64_t ordinal, int freq) except INT32_MIN - char *c_strftime(date_info *dinfo, char *fmt) + date_info *dinfo) nogil + int get_yq(int64_t ordinal, int freq, int *quarter, int *year) + int _quarter_year(int64_t ordinal, int freq, int *year, int *quarter) + initialize_daytime_conversion_factor_matrix() + +@cython.cdivision +cdef char* c_strftime(date_info *dinfo, char *fmt): + """ + function to generate a nice string representation of the period + object, originally from DateObject_strftime + """ + cdef: + tm c_date + char *result + int result_len = strlen(fmt) + 50 + + c_date.tm_sec = dinfo.second + c_date.tm_min = dinfo.minute + c_date.tm_hour = dinfo.hour + c_date.tm_mday = dinfo.day + c_date.tm_mon = dinfo.month - 1 + c_date.tm_year = dinfo.year - 1900 + c_date.tm_wday = (dayofweek(dinfo.year, dinfo.month, dinfo.day) + 1) % 7 + c_date.tm_yday = get_day_of_year(dinfo.year, dinfo.month, dinfo.day) - 1 + c_date.tm_isdst = -1 + + result = malloc(result_len * sizeof(char)) + + strftime(result, result_len, fmt, &c_date) + + return result + + # ---------------------------------------------------------------------- # Period logic @@ -367,19 +381,105 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): return result + +# ---------------------------------------------------------------------- # period accessors ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN +cdef int pyear(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return dinfo.year + + +cdef int pqyear(int64_t ordinal, int freq): + cdef: + int year, quarter + _quarter_year(ordinal, freq, &year, &quarter) + return year + + +cdef int pquarter(int64_t ordinal, int freq): + cdef: + int year, quarter + _quarter_year(ordinal, freq, &year, &quarter) + return quarter + + +cdef int pmonth(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return dinfo.month + + +cdef int pday(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return dinfo.day + + +cdef int pweekday(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return dayofweek(dinfo.year, dinfo.month, dinfo.day) + + +cdef int pday_of_year(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return get_day_of_year(dinfo.year, dinfo.month, dinfo.day) + + +cdef int pweek(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return ccalendar.get_week_of_year(dinfo.year, dinfo.month, dinfo.day) + + +cdef int phour(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return dinfo.hour + + +cdef int pminute(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return dinfo.minute + + +cdef int psecond(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return dinfo.second + + +cdef int pdays_in_month(int64_t ordinal, int freq): + cdef: + date_info dinfo + get_date_info(ordinal, freq, &dinfo) + return ccalendar.get_days_in_month(dinfo.year, dinfo.month) + + def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): cdef: Py_ssize_t i, sz ndarray[int64_t] out accessor f - f = _get_accessor_func(code) - if f is NULL: + func = _get_accessor_func(code) + if func is NULL: raise ValueError('Unrecognized period code: %d' % code) sz = len(arr) @@ -389,36 +489,36 @@ def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): if arr[i] == iNaT: out[i] = -1 continue - out[i] = f(arr[i], freq) + out[i] = func(arr[i], freq) return out cdef accessor _get_accessor_func(int code): if code == 0: - return &pyear + return pyear elif code == 1: - return &pqyear + return pqyear elif code == 2: - return &pquarter + return pquarter elif code == 3: - return &pmonth + return pmonth elif code == 4: - return &pday + return pday elif code == 5: - return &phour + return phour elif code == 6: - return &pminute + return pminute elif code == 7: - return &psecond + return psecond elif code == 8: - return &pweek + return pweek elif code == 9: - return &pday_of_year + return pday_of_year elif code == 10: - return &pweekday + return pweekday elif code == 11: - return &pdays_in_month + return pdays_in_month return NULL From f9db3b5ace38b5eb2be0053c752b8a096c90e41d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 7 Feb 2018 17:17:24 -0800 Subject: [PATCH 070/217] CI: Run ASV on Travis for failed benchmarks (#19236) --- .travis.yml | 8 +++++ asv_bench/benchmarks/algorithms.py | 4 ++- asv_bench/benchmarks/categoricals.py | 10 +++++-- asv_bench/benchmarks/frame_methods.py | 9 ++++-- asv_bench/benchmarks/gil.py | 43 +++++++++++++-------------- asv_bench/benchmarks/groupby.py | 6 ++-- asv_bench/benchmarks/indexing.py | 19 ++++++++---- asv_bench/benchmarks/io/hdf.py | 21 ++++++++----- asv_bench/benchmarks/join_merge.py | 23 +++++++++----- asv_bench/benchmarks/offset.py | 7 +++-- asv_bench/benchmarks/panel_ctor.py | 10 +++++-- asv_bench/benchmarks/panel_methods.py | 11 +++++-- asv_bench/benchmarks/reindex.py | 4 --- asv_bench/benchmarks/reshape.py | 4 +-- asv_bench/benchmarks/strings.py | 5 +++- asv_bench/benchmarks/timeseries.py | 6 ++-- ci/asv.sh | 35 ++++++++++++++++++++++ ci/requirements-3.6_ASV.build | 5 ++++ ci/requirements-3.6_ASV.run | 25 ++++++++++++++++ ci/requirements-3.6_ASV.sh | 7 +++++ ci/script_multi.sh | 3 ++ ci/script_single.sh | 3 ++ 22 files changed, 199 insertions(+), 69 deletions(-) create mode 100755 ci/asv.sh create mode 100644 ci/requirements-3.6_ASV.build create mode 100644 ci/requirements-3.6_ASV.run create mode 100755 ci/requirements-3.6_ASV.sh diff --git a/.travis.yml b/.travis.yml index bd5cac8955c8d..4cbe7f86bd2fa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,6 +73,10 @@ matrix: env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" # In allow_failures + - dist: trusty + env: + - JOB="3.6_ASV" ASV=true + # In allow_failures - dist: trusty env: - JOB="3.6_DOC" DOC=true @@ -93,6 +97,9 @@ matrix: - dist: trusty env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" + - dist: trusty + env: + - JOB="3.6_ASV" ASV=true - dist: trusty env: - JOB="3.6_DOC" DOC=true @@ -128,6 +135,7 @@ script: - ci/script_single.sh - ci/script_multi.sh - ci/lint.sh + - ci/asv.sh - echo "checking imports" - source activate pandas && python ci/check_imports.py - echo "script done" diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 45d62163ae80b..cccd38ef11251 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -1,3 +1,4 @@ +import warnings from importlib import import_module import numpy as np @@ -83,7 +84,8 @@ def setup(self): self.all = self.uniques.repeat(10) def time_match_string(self): - pd.match(self.all, self.uniques) + with warnings.catch_warnings(record=True): + pd.match(self.all, self.uniques) class Hashing(object): diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 1613ca1b97f4b..7743921003353 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pandas as pd import pandas.util.testing as tm @@ -119,11 +121,15 @@ def setup(self): self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) self.s_str_cat = self.s_str.astype('category') - self.s_str_cat_ordered = self.s_str.astype('category', ordered=True) + with warnings.catch_warnings(record=True): + self.s_str_cat_ordered = self.s_str.astype('category', + ordered=True) self.s_int = pd.Series(np.random.randint(0, ncats, size=N)) self.s_int_cat = self.s_int.astype('category') - self.s_int_cat_ordered = self.s_int.astype('category', ordered=True) + with warnings.catch_warnings(record=True): + self.s_int_cat_ordered = self.s_int.astype('category', + ordered=True) def time_rank_string(self): self.s_str.rank() diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 4cecf12a27042..4ff71c706cd34 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -1,4 +1,6 @@ import string +import warnings + import numpy as np import pandas.util.testing as tm from pandas import (DataFrame, Series, MultiIndex, date_range, period_range, @@ -15,7 +17,8 @@ def setup(self): self.df = DataFrame(np.random.randn(10000, 25)) self.df['foo'] = 'bar' self.df['bar'] = 'baz' - self.df = self.df.consolidate() + with warnings.catch_warnings(record=True): + self.df = self.df.consolidate() def time_frame_get_numeric_data(self): self.df._get_numeric_data() @@ -141,8 +144,8 @@ class Repr(object): def setup(self): nrows = 10000 data = np.random.randn(nrows, 10) - idx = MultiIndex.from_arrays(np.tile(np.random.randn(3, nrows / 100), - 100)) + arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100) + idx = MultiIndex.from_arrays(arrays) self.df3 = DataFrame(data, index=idx) self.df4 = DataFrame(data, index=np.random.randn(nrows)) self.df_tall = DataFrame(np.random.randn(nrows, 10)) diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 7d63d78084270..21c1ccf46e1c4 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -1,9 +1,13 @@ import numpy as np import pandas.util.testing as tm -from pandas import (DataFrame, Series, rolling_median, rolling_mean, - rolling_min, rolling_max, rolling_var, rolling_skew, - rolling_kurt, rolling_std, read_csv, factorize, date_range) +from pandas import DataFrame, Series, read_csv, factorize, date_range from pandas.core.algorithms import take_1d +try: + from pandas import (rolling_median, rolling_mean, rolling_min, rolling_max, + rolling_var, rolling_skew, rolling_kurt, rolling_std) + have_rolling_methods = True +except ImportError: + have_rolling_methods = False try: from pandas._libs import algos except ImportError: @@ -171,8 +175,7 @@ def run(period): class ParallelRolling(object): goal_time = 0.2 - params = ['rolling_median', 'rolling_mean', 'rolling_min', 'rolling_max', - 'rolling_var', 'rolling_skew', 'rolling_kurt', 'rolling_std'] + params = ['median', 'mean', 'min', 'max', 'var', 'skew', 'kurt', 'std'] param_names = ['method'] def setup(self, method): @@ -181,34 +184,28 @@ def setup(self, method): win = 100 arr = np.random.rand(100000) if hasattr(DataFrame, 'rolling'): - rolling = {'rolling_median': 'median', - 'rolling_mean': 'mean', - 'rolling_min': 'min', - 'rolling_max': 'max', - 'rolling_var': 'var', - 'rolling_skew': 'skew', - 'rolling_kurt': 'kurt', - 'rolling_std': 'std'} df = DataFrame(arr).rolling(win) @test_parallel(num_threads=2) def parallel_rolling(): - getattr(df, rolling[method])() + getattr(df, method)() self.parallel_rolling = parallel_rolling - else: - rolling = {'rolling_median': rolling_median, - 'rolling_mean': rolling_mean, - 'rolling_min': rolling_min, - 'rolling_max': rolling_max, - 'rolling_var': rolling_var, - 'rolling_skew': rolling_skew, - 'rolling_kurt': rolling_kurt, - 'rolling_std': rolling_std} + elif have_rolling_methods: + rolling = {'median': rolling_median, + 'mean': rolling_mean, + 'min': rolling_min, + 'max': rolling_max, + 'var': rolling_var, + 'skew': rolling_skew, + 'kurt': rolling_kurt, + 'std': rolling_std} @test_parallel(num_threads=2) def parallel_rolling(): rolling[method](arr, win) self.parallel_rolling = parallel_rolling + else: + raise NotImplementedError def time_rolling(self, method): self.parallel_rolling() diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 4dfd215e6dc3a..8aa67d8bc6a6a 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1,3 +1,4 @@ +import warnings from string import ascii_letters from itertools import product from functools import partial @@ -340,7 +341,8 @@ def time_dt_size(self): self.df.groupby(['dates']).size() def time_dt_timegrouper_size(self): - self.df.groupby(TimeGrouper(key='dates', freq='M')).size() + with warnings.catch_warnings(record=True): + self.df.groupby(TimeGrouper(key='dates', freq='M')).size() def time_category_size(self): self.draws.groupby(self.cats).size() @@ -467,7 +469,7 @@ class SumMultiLevel(object): def setup(self): N = 50 - self.df = DataFrame({'A': range(N) * 2, + self.df = DataFrame({'A': list(range(N)) * 2, 'B': range(N * 2), 'C': 1}).set_index(['A', 'B']) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index b35f00db2b054..77e013e1e4fb0 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pandas.util.testing as tm from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index, @@ -91,7 +93,8 @@ def time_getitem_pos_slice(self, index): self.s[:80000] def time_get_value(self, index): - self.s.get_value(self.lbl) + with warnings.catch_warnings(record=True): + self.s.get_value(self.lbl) def time_getitem_scalar(self, index): self.s[self.lbl] @@ -112,7 +115,8 @@ def setup(self): self.bool_obj_indexer = self.bool_indexer.astype(object) def time_get_value(self): - self.df.get_value(self.idx_scalar, self.col_scalar) + with warnings.catch_warnings(record=True): + self.df.get_value(self.idx_scalar, self.col_scalar) def time_ix(self): self.df.ix[self.idx_scalar, self.col_scalar] @@ -231,11 +235,13 @@ class PanelIndexing(object): goal_time = 0.2 def setup(self): - self.p = Panel(np.random.randn(100, 100, 100)) - self.inds = range(0, 100, 10) + with warnings.catch_warnings(record=True): + self.p = Panel(np.random.randn(100, 100, 100)) + self.inds = range(0, 100, 10) def time_subset(self): - self.p.ix[(self.inds, self.inds, self.inds)] + with warnings.catch_warnings(record=True): + self.p.ix[(self.inds, self.inds, self.inds)] class MethodLookup(object): @@ -295,7 +301,8 @@ def setup(self): def time_insert(self): np.random.seed(1234) for i in range(100): - self.df.insert(0, i, np.random.randn(self.N)) + self.df.insert(0, i, np.random.randn(self.N), + allow_duplicates=True) def time_assign_with_setitem(self): np.random.seed(1234) diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py index 5c0e9586c1cb5..4b6e1d69af92d 100644 --- a/asv_bench/benchmarks/io/hdf.py +++ b/asv_bench/benchmarks/io/hdf.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from pandas import DataFrame, Panel, date_range, HDFStore, read_hdf import pandas.util.testing as tm @@ -105,22 +107,25 @@ class HDFStorePanel(BaseIO): def setup(self): self.fname = '__test__.h5' - self.p = Panel(np.random.randn(20, 1000, 25), - items=['Item%03d' % i for i in range(20)], - major_axis=date_range('1/1/2000', periods=1000), - minor_axis=['E%03d' % i for i in range(25)]) - self.store = HDFStore(self.fname) - self.store.append('p1', self.p) + with warnings.catch_warnings(record=True): + self.p = Panel(np.random.randn(20, 1000, 25), + items=['Item%03d' % i for i in range(20)], + major_axis=date_range('1/1/2000', periods=1000), + minor_axis=['E%03d' % i for i in range(25)]) + self.store = HDFStore(self.fname) + self.store.append('p1', self.p) def teardown(self): self.store.close() self.remove(self.fname) def time_read_store_table_panel(self): - self.store.select('p1') + with warnings.catch_warnings(record=True): + self.store.select('p1') def time_write_store_table_panel(self): - self.store.append('p2', self.p) + with warnings.catch_warnings(record=True): + self.store.append('p2', self.p) class HDF(BaseIO): diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 5b40a29d54683..de0a3b33da147 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -1,3 +1,4 @@ +import warnings import string import numpy as np @@ -26,7 +27,8 @@ def setup(self): self.mdf1['obj2'] = 'bar' self.mdf1['int1'] = 5 try: - self.mdf1.consolidate(inplace=True) + with warnings.catch_warnings(record=True): + self.mdf1.consolidate(inplace=True) except: pass self.mdf2 = self.mdf1.copy() @@ -75,16 +77,23 @@ class ConcatPanels(object): param_names = ['axis', 'ignore_index'] def setup(self, axis, ignore_index): - panel_c = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='C')) - self.panels_c = [panel_c] * 20 - panel_f = Panel(np.zeros((10000, 200, 2), dtype=np.float32, order='F')) - self.panels_f = [panel_f] * 20 + with warnings.catch_warnings(record=True): + panel_c = Panel(np.zeros((10000, 200, 2), + dtype=np.float32, + order='C')) + self.panels_c = [panel_c] * 20 + panel_f = Panel(np.zeros((10000, 200, 2), + dtype=np.float32, + order='F')) + self.panels_f = [panel_f] * 20 def time_c_ordered(self, axis, ignore_index): - concat(self.panels_c, axis=axis, ignore_index=ignore_index) + with warnings.catch_warnings(record=True): + concat(self.panels_c, axis=axis, ignore_index=ignore_index) def time_f_ordered(self, axis, ignore_index): - concat(self.panels_f, axis=axis, ignore_index=ignore_index) + with warnings.catch_warnings(record=True): + concat(self.panels_f, axis=axis, ignore_index=ignore_index) class ConcatDataFrames(object): diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py index 034e861e7fc01..e161b887ee86f 100644 --- a/asv_bench/benchmarks/offset.py +++ b/asv_bench/benchmarks/offset.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import warnings from datetime import datetime import numpy as np @@ -76,7 +77,8 @@ def setup(self, offset): self.data = pd.Series(rng) def time_add_offset(self, offset): - self.data + offset + with warnings.catch_warnings(record=True): + self.data + offset class OffsetDatetimeIndexArithmetic(object): @@ -90,7 +92,8 @@ def setup(self, offset): self.data = pd.date_range(start='1/1/2000', periods=N, freq='T') def time_add_offset(self, offset): - self.data + offset + with warnings.catch_warnings(record=True): + self.data + offset class OffestDatetimeArithmetic(object): diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index 456fe959c5aa3..ce946c76ed199 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,3 +1,4 @@ +import warnings from datetime import datetime, timedelta from pandas import DataFrame, DatetimeIndex, date_range @@ -19,7 +20,8 @@ def setup(self): self.data_frames[x] = df def time_from_dict(self): - Panel.from_dict(self.data_frames) + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) class SameIndexes(object): @@ -34,7 +36,8 @@ def setup(self): self.data_frames = dict(enumerate([df] * 100)) def time_from_dict(self): - Panel.from_dict(self.data_frames) + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) class TwoIndexes(object): @@ -53,4 +56,5 @@ def setup(self): self.data_frames = dict(enumerate(dfs)) def time_from_dict(self): - Panel.from_dict(self.data_frames) + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index 9ee1949b311db..a5b1a92e9cf67 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from .pandas_vb_common import Panel, setup # noqa @@ -10,10 +12,13 @@ class PanelMethods(object): param_names = ['axis'] def setup(self, axis): - self.panel = Panel(np.random.randn(100, 1000, 100)) + with warnings.catch_warnings(record=True): + self.panel = Panel(np.random.randn(100, 1000, 100)) def time_pct_change(self, axis): - self.panel.pct_change(1, axis=axis) + with warnings.catch_warnings(record=True): + self.panel.pct_change(1, axis=axis) def time_shift(self, axis): - self.panel.shift(1, axis=axis) + with warnings.catch_warnings(record=True): + self.panel.shift(1, axis=axis) diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 69a1a604b1ccc..413427a16f40b 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -167,10 +167,6 @@ def setup(self): col_array2 = col_array.copy() col_array2[:, :10000] = np.nan self.col_array_list = list(col_array) - self.col_array_list2 = list(col_array2) def time_lib_fast_zip(self): lib.fast_zip(self.col_array_list) - - def time_lib_fast_zip_fillna(self): - lib.fast_zip_fillna(self.col_array_list2) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index bd3b580d9d130..9044b080c45f9 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -104,9 +104,9 @@ def setup(self): self.letters = list('ABCD') yrvars = [l + str(num) for l, num in product(self.letters, range(1, nyrs + 1))] - + columns = [str(i) for i in range(nidvars)] + yrvars self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)), - columns=list(range(nidvars)) + yrvars) + columns=columns) self.df['id'] = self.df.index def time_wide_to_long_big(self): diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index 4435327e1eb38..b203c8b0fa5c9 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from pandas import Series import pandas.util.testing as tm @@ -23,7 +25,8 @@ def time_endswith(self): self.s.str.endswith('A') def time_extract(self): - self.s.str.extract('(\\w*)A(\\w*)') + with warnings.catch_warnings(record=True): + self.s.str.extract('(\\w*)A(\\w*)') def time_findall(self): self.s.str.findall('[A-Z]+') diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index ea2f077f980d0..e1a6bc7a68e9d 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -1,3 +1,4 @@ +import warnings from datetime import timedelta import numpy as np @@ -74,7 +75,8 @@ def setup(self): freq='S')) def time_infer_dst(self): - self.index.tz_localize('US/Eastern', infer_dst=True) + with warnings.catch_warnings(record=True): + self.index.tz_localize('US/Eastern', infer_dst=True) class ResetIndex(object): @@ -365,7 +367,7 @@ class ToDatetimeCache(object): def setup(self, cache): N = 10000 - self.unique_numeric_seconds = range(N) + self.unique_numeric_seconds = list(range(N)) self.dup_numeric_seconds = [1000] * N self.dup_string_dates = ['2000-02-11'] * N self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * N diff --git a/ci/asv.sh b/ci/asv.sh new file mode 100755 index 0000000000000..1e9a8d6380eb5 --- /dev/null +++ b/ci/asv.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +echo "inside $0" + +source activate pandas + +RET=0 + +if [ "$ASV" ]; then + echo "Check for failed asv benchmarks" + + cd asv_bench + + asv machine --yes + + time asv dev | tee failed_asv.txt + + echo "The following asvs benchmarks (if any) failed." + + cat failed_asv.txt | grep "failed" failed_asv.txt + + if [ $? = "0" ]; then + RET=1 + fi + + echo "DONE displaying failed asvs benchmarks." + + rm failed_asv.txt + + echo "Check for failed asv benchmarks DONE" +else + echo "NOT checking for failed asv benchmarks" +fi + +exit $RET diff --git a/ci/requirements-3.6_ASV.build b/ci/requirements-3.6_ASV.build new file mode 100644 index 0000000000000..bc72eed2a0d4e --- /dev/null +++ b/ci/requirements-3.6_ASV.build @@ -0,0 +1,5 @@ +python=3.6* +python-dateutil +pytz +numpy=1.13* +cython diff --git a/ci/requirements-3.6_ASV.run b/ci/requirements-3.6_ASV.run new file mode 100644 index 0000000000000..6c45e3371e9cf --- /dev/null +++ b/ci/requirements-3.6_ASV.run @@ -0,0 +1,25 @@ +ipython +ipykernel +ipywidgets +sphinx=1.5* +nbconvert +nbformat +notebook +matplotlib +seaborn +scipy +lxml +beautifulsoup4 +html5lib +pytables +python-snappy +openpyxl +xlrd +xlwt +xlsxwriter +sqlalchemy +numexpr +bottleneck +statsmodels +xarray +pyqt diff --git a/ci/requirements-3.6_ASV.sh b/ci/requirements-3.6_ASV.sh new file mode 100755 index 0000000000000..8a46f85dbb6bc --- /dev/null +++ b/ci/requirements-3.6_ASV.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +source activate pandas + +echo "[install ASV_BUILD deps]" + +pip install git+https://github.com/spacetelescope/asv diff --git a/ci/script_multi.sh b/ci/script_multi.sh index c1fa756ece965..766e51625fbe6 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -37,6 +37,9 @@ if [ "$PIP_BUILD_TEST" ] || [ "$CONDA_BUILD_TEST" ]; then elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" +elif [ "$ASV" ]; then + echo "We are not running pytest as this is an asv-build" + elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas diff --git a/ci/script_single.sh b/ci/script_single.sh index 005c648ee025f..153847ab2e8c9 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -22,6 +22,9 @@ if [ "$PIP_BUILD_TEST" ] || [ "$CONDA_BUILD_TEST" ]; then elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" +elif [ "$ASV" ]; then + echo "We are not running pytest as this is an asv-build" + elif [ "$COVERAGE" ]; then echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas From 6c88f53ecbb3b1b20d2e6e9165ac7f45335f25f9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 7 Feb 2018 19:20:28 -0600 Subject: [PATCH 071/217] BUG: Fixed merge on dtype equal categories (#19553) --- doc/source/whatsnew/v0.23.0.txt | 40 +++++++++++++++-------- pandas/core/indexes/category.py | 11 +++++-- pandas/core/reshape/merge.py | 10 +++++- pandas/tests/indexing/test_categorical.py | 17 ++++++++++ pandas/tests/reshape/merge/test_merge.py | 19 +++++++++++ 5 files changed, 80 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 7782e5f1ffa56..bed0c077c1348 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -598,6 +598,32 @@ Documentation Changes Bug Fixes ~~~~~~~~~ +Categorical +^^^^^^^^^^^ + +.. warning:: + + A class of bugs were introduced in pandas 0.21 with ``CategoricalDtype`` that + affects the correctness of operations like ``merge``, ``concat``, and + indexing when comparing multiple unordered ``Categorical`` arrays that have + the same categories, but in a different order. We highly recommend upgrading + or manually aligning your categories before doing these operations. + +- Bug in ``Categorical.equals`` returning the wrong result when comparing two + unordered ``Categorical`` arrays with the same categories, but in a different + order (:issue:`16603`) +- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result + when for unordered categoricals with the categories in a different order. + This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). +- Bug in :func:`pandas.merge` returning the wrong result when joining on an + unordered ``Categorical`` that had the same categories but in a different + order (:issue:`19551`) +- Bug in :meth:`CategoricalIndex.get_indexer` returning the wrong result when + ``target`` was an unordered ``Categorical`` that had the same categories as + ``self`` but in a different order (:issue:`19551`) +- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) +- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) Datetimelike ^^^^^^^^^^^^ @@ -745,20 +771,6 @@ Reshaping - Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) - - -Categorical -^^^^^^^^^^^ - -- -- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result - when all the categoricals had the same categories, but in a different order. - This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). -- Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`) -- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) -- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) -- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) -- - Other ^^^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2c7be2b21f959..b36bc1df23247 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -553,6 +553,8 @@ def _reindex_non_unique(self, target): @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): + from pandas.core.arrays.categorical import _recode_for_categories + method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) @@ -568,8 +570,13 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if (isinstance(target, CategoricalIndex) and self.values.is_dtype_equal(target)): - # we have the same codes - codes = target.codes + if self.values.equals(target.values): + # we have the same codes + codes = target.codes + else: + codes = _recode_for_categories(target.codes, + target.categories, + self.values.categories) else: if isinstance(target, CategoricalIndex): code_indexer = self.categories.get_indexer(target.categories) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 9dbb327e3d956..4b99b0407cfcc 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -12,6 +12,7 @@ from pandas import (Categorical, DataFrame, Index, MultiIndex, Timedelta) +from pandas.core.arrays.categorical import _recode_for_categories from pandas.core.frame import _merge_doc from pandas.core.dtypes.common import ( is_datetime64tz_dtype, @@ -1540,8 +1541,15 @@ def _factorize_keys(lk, rk, sort=True): is_categorical_dtype(rk) and lk.is_dtype_equal(rk)): klass = libhashtable.Int64Factorizer + + if lk.categories.equals(rk.categories): + rk = rk.codes + else: + # Same categories in different orders -> recode + rk = _recode_for_categories(rk.codes, rk.categories, lk.categories) + lk = _ensure_int64(lk.codes) - rk = _ensure_int64(rk.codes) + rk = _ensure_int64(rk) elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): klass = libhashtable.Int64Factorizer lk = _ensure_int64(com._values_from_object(lk)) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index f2182687d047f..634ad0d8160ed 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -432,6 +432,23 @@ def test_get_indexer_array(self): expected = np.array([0, 1], dtype='intp') tm.assert_numpy_array_equal(result, expected) + def test_get_indexer_same_categories_same_order(self): + ci = CategoricalIndex(['a', 'b'], categories=['a', 'b']) + + result = ci.get_indexer(CategoricalIndex(['b', 'b'], + categories=['a', 'b'])) + expected = np.array([1, 1], dtype='intp') + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19551 + ci = CategoricalIndex(['a', 'b'], categories=['a', 'b']) + + result = ci.get_indexer(CategoricalIndex(['b', 'b'], + categories=['b', 'a'])) + expected = np.array([1, 1], dtype='intp') + tm.assert_numpy_array_equal(result, expected) + def test_getitem_with_listlike(self): # GH 16115 cats = Categorical([Timestamp('12-31-1999'), diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 32f83ab972be5..101d34ebdb89f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1643,6 +1643,25 @@ def test_merge_categorical(self): result = pd.merge(cleft, cright, how='left', left_on='b', right_on='c') tm.assert_frame_equal(result, expected) + def tests_merge_categorical_unordered_equal(self): + # GH-19551 + df1 = DataFrame({ + 'Foo': Categorical(['A', 'B', 'C'], categories=['A', 'B', 'C']), + 'Left': ['A0', 'B0', 'C0'], + }) + + df2 = DataFrame({ + 'Foo': Categorical(['C', 'B', 'A'], categories=['C', 'B', 'A']), + 'Right': ['C1', 'B1', 'A1'], + }) + result = pd.merge(df1, df2, on=['Foo']) + expected = DataFrame({ + 'Foo': pd.Categorical(['A', 'B', 'C']), + 'Left': ['A0', 'B0', 'C0'], + 'Right': ['A1', 'B1', 'C1'], + }) + assert_frame_equal(result, expected) + def test_other_columns(self, left, right): # non-merge columns should preserve if possible right = right.assign(Z=right.Z.astype('category')) From e30498ac869d4fd38bc26e10479b230c2e355f0d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 7 Feb 2018 19:32:02 -0600 Subject: [PATCH 072/217] PERF: Correct signature for group_nth / group_object (#19579) --- asv_bench/benchmarks/groupby.py | 16 ++++++++++++++++ doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/groupby.pyx | 10 ++++++++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 8aa67d8bc6a6a..61db39528a5fb 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -160,6 +160,22 @@ def time_series_nth(self, df): df[1].groupby(df[0]).nth(0) +class NthObject(object): + + goal_time = 0.2 + + def setup_cache(self): + df = DataFrame(np.random.randint(1, 100, (10000,)), columns=['g']) + df['obj'] = ['a'] * 5000 + ['b'] * 5000 + return df + + def time_nth(self, df): + df.groupby('g').nth(5) + + def time_nth_last(self, df): + df.groupby('g').last() + + class DateAttributes(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bed0c077c1348..6c4fce35529ad 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -746,6 +746,7 @@ Groupby/Resample/Rolling - Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) - Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) - Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) +- Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`) Sparse ^^^^^^ diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 9cc15fb6692d9..55de700c9af52 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -36,7 +36,8 @@ def group_nth_object(ndarray[object, ndim=2] out, ndarray[int64_t] counts, ndarray[object, ndim=2] values, ndarray[int64_t] labels, - int64_t rank): + int64_t rank, + Py_ssize_t min_count=-1): """ Only aggregates on axis=0 """ @@ -47,6 +48,8 @@ def group_nth_object(ndarray[object, ndim=2] out, ndarray[int64_t, ndim=2] nobs ndarray[object, ndim=2] resx + assert min_count == -1, "'min_count' only used in add and prod" + nobs = np.zeros(( out).shape, dtype=np.int64) resx = np.empty(( out).shape, dtype=object) @@ -80,7 +83,8 @@ def group_nth_object(ndarray[object, ndim=2] out, def group_last_object(ndarray[object, ndim=2] out, ndarray[int64_t] counts, ndarray[object, ndim=2] values, - ndarray[int64_t] labels): + ndarray[int64_t] labels, + Py_ssize_t min_count=-1): """ Only aggregates on axis=0 """ @@ -91,6 +95,8 @@ def group_last_object(ndarray[object, ndim=2] out, ndarray[object, ndim=2] resx ndarray[int64_t, ndim=2] nobs + assert min_count == -1, "'min_count' only used in add and prod" + nobs = np.zeros(( out).shape, dtype=np.int64) resx = np.empty(( out).shape, dtype=object) From c259dad64b037a745fd4c1223db1683bb3726435 Mon Sep 17 00:00:00 2001 From: xpvpc <32843902+xpvpc@users.noreply.github.com> Date: Thu, 8 Feb 2018 12:17:12 +0100 Subject: [PATCH 073/217] DOC: doc/source/indexing.rst says pd.df.ix is deprecated, show warning in generated doc. (#19596) --- pandas/core/indexing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9463512ac11de..352ce921d1d44 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1300,6 +1300,9 @@ class _IXIndexer(_NDFrameIndexer): """A primarily label-location based indexer, with integer position fallback. + Warning: Starting in 0.20.0, the .ix indexer is deprecated, in + favor of the more strict .iloc and .loc indexers. + ``.ix[]`` supports mixed integer and label based access. It is primarily label based, but will fall back to integer positional access unless the corresponding axis is of integer type. From 845a74a4f50963af72c0731ce74fc8d888cd4d22 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 8 Feb 2018 03:20:39 -0800 Subject: [PATCH 074/217] Simplify argument passing in period_helper (#19550) --- pandas/_libs/src/period_helper.c | 410 ++++++++++--------------------- pandas/_libs/src/period_helper.h | 11 +- pandas/_libs/tslibs/period.pyx | 32 ++- 3 files changed, 157 insertions(+), 296 deletions(-) diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index 570f20b790750..f0e24fec685d0 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -82,11 +82,14 @@ static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, // helpers for frequency conversion routines // -static int daytime_conversion_factors[][2] = { - {FR_DAY, 1}, {FR_HR, 24}, {FR_MIN, 60}, {FR_SEC, 60}, - {FR_MS, 1000}, {FR_US, 1000}, {FR_NS, 1000}, {0, 0}}; - -static npy_int64 **daytime_conversion_factor_matrix = NULL; +static npy_int64 daytime_conversion_factor_matrix[7][7] = { + {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, + {0, 1, 60, 3600, 3600000, 3600000000, 3600000000000}, + {0, 0, 1, 60, 60000, 60000000, 60000000000}, + {0, 0, 0, 1, 1000, 1000000, 1000000000}, + {0, 0, 0, 0, 1, 1000, 1000000}, + {0, 0, 0, 0, 0, 1, 1000}, + {0, 0, 0, 0, 0, 0, 1}}; PANDAS_INLINE int max_value(int a, int b) { return a > b ? a : b; } @@ -96,100 +99,24 @@ PANDAS_INLINE int get_freq_group(int freq) { return (freq / 1000) * 1000; } PANDAS_INLINE int get_freq_group_index(int freq) { return freq / 1000; } -static int calc_conversion_factors_matrix_size(void) { - int matrix_size = 0; - int index; - for (index = 0;; index++) { - int period_value = - get_freq_group_index(daytime_conversion_factors[index][0]); - if (period_value == 0) { - break; - } - matrix_size = max_value(matrix_size, period_value); - } - return matrix_size + 1; -} - -static void alloc_conversion_factors_matrix(int matrix_size) { - int row_index; - int column_index; - daytime_conversion_factor_matrix = - malloc(matrix_size * sizeof(**daytime_conversion_factor_matrix)); - for (row_index = 0; row_index < matrix_size; row_index++) { - daytime_conversion_factor_matrix[row_index] = - malloc(matrix_size * sizeof(**daytime_conversion_factor_matrix)); - for (column_index = 0; column_index < matrix_size; column_index++) { - daytime_conversion_factor_matrix[row_index][column_index] = 0; - } - } -} - -static npy_int64 calculate_conversion_factor(int start_value, int end_value) { - npy_int64 conversion_factor = 0; - int index; - for (index = 0;; index++) { - int freq_group = daytime_conversion_factors[index][0]; - - if (freq_group == 0) { - conversion_factor = 0; - break; - } - - if (freq_group == start_value) { - conversion_factor = 1; - } else { - conversion_factor *= daytime_conversion_factors[index][1]; - } - - if (freq_group == end_value) { - break; - } - } - return conversion_factor; -} - -static void populate_conversion_factors_matrix(void) { - int row_index_index; - int row_value, row_index; - int column_index_index; - int column_value, column_index; - - for (row_index_index = 0;; row_index_index++) { - row_value = daytime_conversion_factors[row_index_index][0]; - if (row_value == 0) { - break; - } - row_index = get_freq_group_index(row_value); - for (column_index_index = row_index_index;; column_index_index++) { - column_value = daytime_conversion_factors[column_index_index][0]; - if (column_value == 0) { - break; - } - column_index = get_freq_group_index(column_value); - - daytime_conversion_factor_matrix[row_index][column_index] = - calculate_conversion_factor(row_value, column_value); - } - } -} - -void initialize_daytime_conversion_factor_matrix() { - if (daytime_conversion_factor_matrix == NULL) { - int matrix_size = calc_conversion_factors_matrix_size(); - alloc_conversion_factors_matrix(matrix_size); - populate_conversion_factors_matrix(); - } -} PANDAS_INLINE npy_int64 get_daytime_conversion_factor(int from_index, int to_index) { - return daytime_conversion_factor_matrix[min_value(from_index, to_index)] - [max_value(from_index, to_index)]; + int row = min_value(from_index, to_index); + int col = max_value(from_index, to_index); + // row or col < 6 means frequency strictly lower than Daily, which + // do not use daytime_conversion_factors + if (row < 6) { + return 0; + } else if (col < 6) { + return 0; + } + return daytime_conversion_factor_matrix[row - 6][col - 6]; } PANDAS_INLINE npy_int64 upsample_daytime(npy_int64 ordinal, - asfreq_info *af_info, int atEnd) { - if (atEnd) { + asfreq_info *af_info) { + if (af_info->is_end) { return (ordinal + 1) * af_info->intraday_conversion_factor - 1; } else { return ordinal * af_info->intraday_conversion_factor; @@ -197,18 +124,18 @@ PANDAS_INLINE npy_int64 upsample_daytime(npy_int64 ordinal, } PANDAS_INLINE npy_int64 downsample_daytime(npy_int64 ordinal, - asfreq_info *af_info, int atEnd) { + asfreq_info *af_info) { return ordinal / (af_info->intraday_conversion_factor); } -PANDAS_INLINE npy_int64 transform_via_day(npy_int64 ordinal, char relation, +PANDAS_INLINE npy_int64 transform_via_day(npy_int64 ordinal, asfreq_info *af_info, freq_conv_func first_func, freq_conv_func second_func) { npy_int64 result; - result = (*first_func)(ordinal, relation, af_info); - result = (*second_func)(result, relation, af_info); + result = (*first_func)(ordinal, af_info); + result = (*second_func)(result, af_info); return result; } @@ -241,10 +168,9 @@ static npy_int64 absdate_from_ymd(int y, int m, int d) { //************ FROM DAILY *************** -static npy_int64 asfreq_DTtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_DTtoA(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - ordinal = downsample_daytime(ordinal, af_info, 0); + ordinal = downsample_daytime(ordinal, af_info); dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); if (dinfo.month > af_info->to_a_year_end) { return (npy_int64)(dinfo.year + 1 - BASE_YEAR); @@ -272,142 +198,110 @@ static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year, return 0; } -static npy_int64 asfreq_DTtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_DTtoQ(npy_int64 ordinal, asfreq_info *af_info) { int year, quarter; - ordinal = downsample_daytime(ordinal, af_info, 0); + ordinal = downsample_daytime(ordinal, af_info); DtoQ_yq(ordinal, af_info, &year, &quarter); return (npy_int64)((year - BASE_YEAR) * 4 + quarter - 1); } -static npy_int64 asfreq_DTtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_DTtoM(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - ordinal = downsample_daytime(ordinal, af_info, 0); + ordinal = downsample_daytime(ordinal, af_info); dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); return (npy_int64)((dinfo.year - BASE_YEAR) * 12 + dinfo.month - 1); } -static npy_int64 asfreq_DTtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - ordinal = downsample_daytime(ordinal, af_info, 0); +static npy_int64 asfreq_DTtoW(npy_int64 ordinal, asfreq_info *af_info) { + ordinal = downsample_daytime(ordinal, af_info); return (ordinal + ORD_OFFSET - (1 + af_info->to_week_end)) / 7 + 1 - WEEK_OFFSET; } -static npy_int64 asfreq_DTtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_DTtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; int roll_back; - ordinal = downsample_daytime(ordinal, af_info, 0); + ordinal = downsample_daytime(ordinal, af_info); dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); // This usage defines roll_back the opposite way from the others - roll_back = (relation == 'S') ? 1 : 0; + roll_back = 1 - af_info->is_end; return DtoB(&dinfo, roll_back); } // all intra day calculations are now done within one function -static npy_int64 asfreq_DownsampleWithinDay(npy_int64 ordinal, char relation, +static npy_int64 asfreq_DownsampleWithinDay(npy_int64 ordinal, asfreq_info *af_info) { - return downsample_daytime(ordinal, af_info, relation == 'E'); + return downsample_daytime(ordinal, af_info); } -static npy_int64 asfreq_UpsampleWithinDay(npy_int64 ordinal, char relation, +static npy_int64 asfreq_UpsampleWithinDay(npy_int64 ordinal, asfreq_info *af_info) { - return upsample_daytime(ordinal, af_info, relation == 'E'); + return upsample_daytime(ordinal, af_info); } //************ FROM BUSINESS *************** -static npy_int64 asfreq_BtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_BtoDT(npy_int64 ordinal, asfreq_info *af_info) { ordinal += BDAY_OFFSET; ordinal = (((ordinal - 1) / 5) * 7 + mod_compat(ordinal - 1, 5) + 1 - ORD_OFFSET); - return upsample_daytime(ordinal, af_info, relation != 'S'); + return upsample_daytime(ordinal, af_info); } -static npy_int64 asfreq_BtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoA); +static npy_int64 asfreq_BtoA(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoA); } -static npy_int64 asfreq_BtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoQ); +static npy_int64 asfreq_BtoQ(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoQ); } -static npy_int64 asfreq_BtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoM); +static npy_int64 asfreq_BtoM(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoM); } -static npy_int64 asfreq_BtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoW); +static npy_int64 asfreq_BtoW(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_BtoDT, asfreq_DTtoW); } //************ FROM WEEKLY *************** -static npy_int64 asfreq_WtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - ordinal += WEEK_OFFSET; - if (relation != 'S') { - ordinal += 1; - } - - ordinal = ordinal * 7 - 6 + af_info->from_week_end - ORD_OFFSET; - - if (relation != 'S') { - ordinal -= 1; - } - - return upsample_daytime(ordinal, af_info, relation != 'S'); +static npy_int64 asfreq_WtoDT(npy_int64 ordinal, asfreq_info *af_info) { + ordinal = (ordinal + WEEK_OFFSET) * 7 + + af_info->from_week_end - ORD_OFFSET + + (7 - 1) * (af_info->is_end - 1); + return upsample_daytime(ordinal, af_info); } -static npy_int64 asfreq_WtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoA); +static npy_int64 asfreq_WtoA(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoA); } -static npy_int64 asfreq_WtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoQ); +static npy_int64 asfreq_WtoQ(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoQ); } -static npy_int64 asfreq_WtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoM); +static npy_int64 asfreq_WtoM(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoM); } -static npy_int64 asfreq_WtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoW); +static npy_int64 asfreq_WtoW(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_WtoDT, asfreq_DTtoW); } -static npy_int64 asfreq_WtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_WtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - int roll_back; + int roll_back = af_info->is_end; dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_WtoDT(ordinal, relation, af_info) + ORD_OFFSET); + &dinfo, asfreq_WtoDT(ordinal, af_info) + ORD_OFFSET); - roll_back = (relation == 'S') ? 0 : 1; return DtoB(&dinfo, roll_back); } @@ -417,52 +311,38 @@ static void MtoD_ym(npy_int64 ordinal, int *y, int *m) { *m = mod_compat(ordinal, 12) + 1; } -static npy_int64 asfreq_MtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_MtoDT(npy_int64 ordinal, asfreq_info *af_info) { npy_int64 absdate; int y, m; - if (relation == 'E') { - ordinal += 1; - } + ordinal += af_info->is_end; MtoD_ym(ordinal, &y, &m); absdate = absdate_from_ymd(y, m, 1); ordinal = absdate - ORD_OFFSET; - if (relation == 'E') { - ordinal -= 1; - } - - return upsample_daytime(ordinal, af_info, relation != 'S'); + ordinal -= af_info->is_end; + return upsample_daytime(ordinal, af_info); } -static npy_int64 asfreq_MtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoA); +static npy_int64 asfreq_MtoA(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoA); } -static npy_int64 asfreq_MtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoQ); +static npy_int64 asfreq_MtoQ(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoQ); } -static npy_int64 asfreq_MtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoW); +static npy_int64 asfreq_MtoW(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_MtoDT, asfreq_DTtoW); } -static npy_int64 asfreq_MtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_MtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - int roll_back; + int roll_back = af_info->is_end; dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_MtoDT(ordinal, relation, af_info) + ORD_OFFSET); + &dinfo, asfreq_MtoDT(ordinal, af_info) + ORD_OFFSET); - roll_back = (relation == 'S') ? 0 : 1; return DtoB(&dinfo, roll_back); } @@ -482,130 +362,94 @@ static void QtoD_ym(npy_int64 ordinal, int *y, int *m, asfreq_info *af_info) { } } -static npy_int64 asfreq_QtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_QtoDT(npy_int64 ordinal, asfreq_info *af_info) { npy_int64 absdate; int y, m; - if (relation == 'E') { - ordinal += 1; - } - + ordinal += af_info->is_end; QtoD_ym(ordinal, &y, &m, af_info); absdate = absdate_from_ymd(y, m, 1); - if (relation == 'E') { - absdate -= 1; - } - - return upsample_daytime(absdate - ORD_OFFSET, af_info, relation != 'S'); + absdate -= af_info->is_end; + return upsample_daytime(absdate - ORD_OFFSET, af_info); } -static npy_int64 asfreq_QtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoQ); +static npy_int64 asfreq_QtoQ(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoQ); } -static npy_int64 asfreq_QtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoA); +static npy_int64 asfreq_QtoA(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoA); } -static npy_int64 asfreq_QtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoM); +static npy_int64 asfreq_QtoM(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoM); } -static npy_int64 asfreq_QtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoW); +static npy_int64 asfreq_QtoW(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_QtoDT, asfreq_DTtoW); } -static npy_int64 asfreq_QtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_QtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - int roll_back; + int roll_back = af_info->is_end; + dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_QtoDT(ordinal, relation, af_info) + ORD_OFFSET); + &dinfo, asfreq_QtoDT(ordinal, af_info) + ORD_OFFSET); - roll_back = (relation == 'S') ? 0 : 1; return DtoB(&dinfo, roll_back); } //************ FROM ANNUAL *************** -static npy_int64 asfreq_AtoDT(npy_int64 year, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_AtoDT(npy_int64 ordinal, asfreq_info *af_info) { npy_int64 absdate; - int month = (af_info->from_a_year_end) % 12; // start from 1970 - year += BASE_YEAR; - - month += 1; + npy_int64 year = ordinal + BASE_YEAR; + int month = (af_info->from_a_year_end % 12) + 1; if (af_info->from_a_year_end != 12) { year -= 1; } - if (relation == 'E') { - year += 1; - } - + year += af_info->is_end; absdate = absdate_from_ymd(year, month, 1); - if (relation == 'E') { - absdate -= 1; - } - - return upsample_daytime(absdate - ORD_OFFSET, af_info, relation != 'S'); + absdate -= af_info->is_end; + return upsample_daytime(absdate - ORD_OFFSET, af_info); } -static npy_int64 asfreq_AtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoA); +static npy_int64 asfreq_AtoA(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoA); } -static npy_int64 asfreq_AtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoQ); +static npy_int64 asfreq_AtoQ(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoQ); } -static npy_int64 asfreq_AtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoM); +static npy_int64 asfreq_AtoM(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoM); } -static npy_int64 asfreq_AtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoW); +static npy_int64 asfreq_AtoW(npy_int64 ordinal, asfreq_info *af_info) { + return transform_via_day(ordinal, af_info, asfreq_AtoDT, asfreq_DTtoW); } -static npy_int64 asfreq_AtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 asfreq_AtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - int roll_back; + int roll_back = af_info->is_end; dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_AtoDT(ordinal, relation, af_info) + ORD_OFFSET); + &dinfo, asfreq_AtoDT(ordinal, af_info) + ORD_OFFSET); - roll_back = (relation == 'S') ? 0 : 1; return DtoB(&dinfo, roll_back); } -static npy_int64 nofunc(npy_int64 ordinal, char relation, - asfreq_info *af_info) { +static npy_int64 nofunc(npy_int64 ordinal, asfreq_info *af_info) { return INT_ERR_CODE; } -static npy_int64 no_op(npy_int64 ordinal, char relation, asfreq_info *af_info) { +static npy_int64 no_op(npy_int64 ordinal, asfreq_info *af_info) { return ordinal; } @@ -622,10 +466,17 @@ static int calc_a_year_end(int freq, int group) { static int calc_week_end(int freq, int group) { return freq - group; } -void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) { +void get_asfreq_info(int fromFreq, int toFreq, char relation, + asfreq_info *af_info) { int fromGroup = get_freq_group(fromFreq); int toGroup = get_freq_group(toFreq); + if (relation == 'E') { + af_info->is_end = 1; + } else { + af_info->is_end = 0; + } + af_info->intraday_conversion_factor = get_daytime_conversion_factor( get_freq_group_index(max_value(fromGroup, FR_DAY)), get_freq_group_index(max_value(toGroup, FR_DAY))); @@ -895,9 +746,8 @@ npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, func = get_asfreq_func(freq1, freq2); - get_asfreq_info(freq1, freq2, &finfo); - - val = (*func)(period_ordinal, relation, &finfo); + get_asfreq_info(freq1, freq2, relation, &finfo); + val = (*func)(period_ordinal, &finfo); return val; } @@ -1017,9 +867,9 @@ npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq) { if (freq == FR_DAY) return period_ordinal + ORD_OFFSET; toDaily = get_asfreq_func(freq, FR_DAY); - get_asfreq_info(freq, FR_DAY, &af_info); + get_asfreq_info(freq, FR_DAY, 'E', &af_info); - return toDaily(period_ordinal, 'E', &af_info) + ORD_OFFSET; + return toDaily(period_ordinal, &af_info) + ORD_OFFSET; } @@ -1027,19 +877,19 @@ int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year) { asfreq_info af_info; int qtr_freq; npy_int64 daily_ord; - npy_int64 (*toDaily)(npy_int64, char, asfreq_info *) = NULL; + freq_conv_func toDaily = NULL; toDaily = get_asfreq_func(freq, FR_DAY); - get_asfreq_info(freq, FR_DAY, &af_info); + get_asfreq_info(freq, FR_DAY, 'E', &af_info); - daily_ord = toDaily(ordinal, 'E', &af_info); + daily_ord = toDaily(ordinal, &af_info); if (get_freq_group(freq) == FR_QTR) { qtr_freq = freq; } else { qtr_freq = FR_QTR; } - get_asfreq_info(FR_DAY, qtr_freq, &af_info); + get_asfreq_info(FR_DAY, qtr_freq, 'E', &af_info); DtoQ_yq(daily_ord, &af_info, year, quarter); return 0; @@ -1056,7 +906,7 @@ int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) { else qtr_freq = FR_QTR; - get_asfreq_info(FR_DAY, qtr_freq, &af_info); + get_asfreq_info(FR_DAY, qtr_freq, 'E', &af_info); DtoQ_yq(ordinal, &af_info, year, quarter); diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index 2c74659346b15..f14aec268a1fb 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -101,6 +101,10 @@ frequency conversion routines. #define INT_ERR_CODE INT32_MIN typedef struct asfreq_info { + int is_end; + // char relation == 'S' (for START) --> is_end = 0 + // char relation == 'E' (for END) --> is_end = 1 + int from_week_end; // day the week ends on in the "from" frequency int to_week_end; // day the week ends on in the "to" frequency @@ -124,7 +128,7 @@ typedef struct date_info { int year; } date_info; -typedef npy_int64 (*freq_conv_func)(npy_int64, char, asfreq_info *); +typedef npy_int64 (*freq_conv_func)(npy_int64, asfreq_info *af_info); /* * new pandas API helper functions here @@ -140,11 +144,10 @@ npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq); int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo); freq_conv_func get_asfreq_func(int fromFreq, int toFreq); -void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info); +void get_asfreq_info(int fromFreq, int toFreq, char relation, + asfreq_info *af_info); int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year); int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter); -void initialize_daytime_conversion_factor_matrix(void); - #endif // PANDAS__LIBS_SRC_PERIOD_HELPER_H_ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index ba17b3d345ac8..3c396a9ff4f3c 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -61,6 +61,8 @@ cdef extern from "period_helper.h": int year ctypedef struct asfreq_info: + int is_end + int from_week_end int to_week_end @@ -70,13 +72,13 @@ cdef extern from "period_helper.h": int from_q_year_end int to_q_year_end - ctypedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*) + ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) - void initialize_daytime_conversion_factor_matrix() int64_t asfreq(int64_t dtordinal, int freq1, int freq2, char relation) except INT32_MIN freq_conv_func get_asfreq_func(int fromFreq, int toFreq) - void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) + void get_asfreq_info(int fromFreq, int toFreq, char relation, + asfreq_info *af_info) int64_t get_period_ordinal(int year, int month, int day, int hour, int minute, int second, @@ -90,14 +92,20 @@ cdef extern from "period_helper.h": int _quarter_year(int64_t ordinal, int freq, int *year, int *quarter) -initialize_daytime_conversion_factor_matrix() - - @cython.cdivision cdef char* c_strftime(date_info *dinfo, char *fmt): """ - function to generate a nice string representation of the period + Generate a nice string representation of the period object, originally from DateObject_strftime + + Parameters + ---------- + dinfo : date_info* + fmt : char* + + Returns + ------- + result : char* """ cdef: tm c_date @@ -224,26 +232,26 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): n = len(arr) result = np.empty(n, dtype=np.int64) - func = get_asfreq_func(freq1, freq2) - get_asfreq_info(freq1, freq2, &finfo) - if end: relation = END else: relation = START + func = get_asfreq_func(freq1, freq2) + get_asfreq_info(freq1, freq2, relation, &finfo) + mask = arr == iNaT if mask.any(): # NaT process for i in range(n): val = arr[i] if val != iNaT: - val = func(val, relation, &finfo) + val = func(val, &finfo) if val == INT32_MIN: raise ValueError("Unable to convert to desired frequency.") result[i] = val else: for i in range(n): - val = func(arr[i], relation, &finfo) + val = func(arr[i], &finfo) if val == INT32_MIN: raise ValueError("Unable to convert to desired frequency.") result[i] = val From e0c6c2567c8c62c7632b8701e9f0870625e2f70e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 8 Feb 2018 03:28:54 -0800 Subject: [PATCH 075/217] separate numeric tests so we can isolate division by zero (#19336) --- pandas/tests/series/test_operators.py | 196 ++++++++++++++------------ 1 file changed, 102 insertions(+), 94 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 05ccb25960b1f..554b3e15d8f10 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -596,77 +596,81 @@ def test_divide_decimal(self): assert_series_equal(expected, s) - def test_div(self): + @pytest.mark.parametrize( + 'dtype2', + [ + np.int64, np.int32, np.int16, np.int8, + np.float64, np.float32, np.float16, + np.uint64, np.uint32, + np.uint16, np.uint8 + ]) + @pytest.mark.parametrize('dtype1', [np.int64, np.float64, np.uint64]) + def test_ser_div_ser(self, dtype1, dtype2): + # no longer do integer div for any ops, but deal with the 0's + first = Series([3, 4, 5, 8], name='first').astype(dtype1) + second = Series([0, 0, 0, 3], name='second').astype(dtype2) + with np.errstate(all='ignore'): - # no longer do integer div for any ops, but deal with the 0's - p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) - result = p['first'] / p['second'] - expected = Series( - p['first'].values.astype(float) / p['second'].values, - dtype='float64') - expected.iloc[0:3] = np.inf - assert_series_equal(result, expected) + expected = Series(first.values.astype(np.float64) / second.values, + dtype='float64', name=None) + expected.iloc[0:3] = np.inf - result = p['first'] / 0 - expected = Series(np.inf, index=p.index, name='first') - assert_series_equal(result, expected) + result = first / second + assert_series_equal(result, expected) + assert not result.equals(second / first) - p = p.astype('float64') - result = p['first'] / p['second'] - expected = Series(p['first'].values / p['second'].values) - assert_series_equal(result, expected) + def test_div_equiv_binop(self): + # Test Series.div as well as Series.__div__ + # float/integer issue + # GH#7785 + first = pd.Series([1, 0], name='first') + second = pd.Series([-0.01, -0.02], name='second') + expected = Series([-0.01, -np.inf]) - p = DataFrame({'first': [3, 4, 5, 8], 'second': [1, 1, 1, 1]}) - result = p['first'] / p['second'] - assert_series_equal(result, p['first'].astype('float64'), - check_names=False) - assert result.name is None - assert not result.equals(p['second'] / p['first']) - - # inf signing - s = Series([np.nan, 1., -1.]) - result = s / 0 - expected = Series([np.nan, np.inf, -np.inf]) - assert_series_equal(result, expected) + result = second.div(first) + assert_series_equal(result, expected, check_names=False) - # float/integer issue - # GH 7785 - p = DataFrame({'first': (1, 0), 'second': (-0.01, -0.02)}) - expected = Series([-0.01, -np.inf]) + result = second / first + assert_series_equal(result, expected) - result = p['second'].div(p['first']) - assert_series_equal(result, expected, check_names=False) + def test_rdiv_zero_compat(self): + # GH#8674 + zero_array = np.array([0] * 5) + data = np.random.randn(5) + expected = pd.Series([0.] * 5) - result = p['second'] / p['first'] - assert_series_equal(result, expected) + result = zero_array / pd.Series(data) + assert_series_equal(result, expected) - # GH 9144 - s = Series([-1, 0, 1]) + result = pd.Series(zero_array) / data + assert_series_equal(result, expected) - result = 0 / s - expected = Series([0.0, nan, 0.0]) - assert_series_equal(result, expected) + result = pd.Series(zero_array) / pd.Series(data) + assert_series_equal(result, expected) - result = s / 0 - expected = Series([-inf, nan, inf]) - assert_series_equal(result, expected) + def test_div_zero_inf_signs(self): + # GH#9144, inf signing + ser = Series([-1, 0, 1], name='first') + expected = Series([-np.inf, np.nan, np.inf], name='first') - result = s // 0 - expected = Series([-inf, nan, inf]) - assert_series_equal(result, expected) + result = ser / 0 + assert_series_equal(result, expected) - # GH 8674 - zero_array = np.array([0] * 5) - data = np.random.randn(5) - expected = pd.Series([0.] * 5) - result = zero_array / pd.Series(data) - assert_series_equal(result, expected) + def test_rdiv_zero(self): + # GH#9144 + ser = Series([-1, 0, 1], name='first') + expected = Series([0.0, np.nan, 0.0], name='first') - result = pd.Series(zero_array) / data - assert_series_equal(result, expected) + result = 0 / ser + assert_series_equal(result, expected) - result = pd.Series(zero_array) / pd.Series(data) - assert_series_equal(result, expected) + def test_floordiv_div(self): + # GH#9144 + ser = Series([-1, 0, 1], name='first') + + result = ser // 0 + expected = Series([-inf, nan, inf], name='first') + assert_series_equal(result, expected) class TestTimedeltaSeriesArithmeticWithIntegers(object): @@ -1576,33 +1580,42 @@ def test_dt64_series_add_intlike(self, tz): class TestSeriesOperators(TestData): - def test_op_method(self): - def check(series, other, check_reverse=False): - simple_ops = ['add', 'sub', 'mul', 'floordiv', 'truediv', 'pow'] - if not compat.PY3: - simple_ops.append('div') - - for opname in simple_ops: - op = getattr(Series, opname) - - if op == 'div': - alt = operator.truediv - else: - alt = getattr(operator, opname) - - result = op(series, other) - expected = alt(series, other) - assert_almost_equal(result, expected) - if check_reverse: - rop = getattr(Series, "r" + opname) - result = rop(series, other) - expected = alt(other, series) - assert_almost_equal(result, expected) + @pytest.mark.parametrize( + 'ts', + [ + (lambda x: x, lambda x: x * 2, False), + (lambda x: x, lambda x: x[::2], False), + (lambda x: x, lambda x: 5, True), + (lambda x: tm.makeFloatSeries(), + lambda x: tm.makeFloatSeries(), + True) + ]) + @pytest.mark.parametrize('opname', ['add', 'sub', 'mul', 'floordiv', + 'truediv', 'div', 'pow']) + def test_op_method(self, opname, ts): + # check that Series.{opname} behaves like Series.__{opname}__, + series = ts[0](self.ts) + other = ts[1](self.ts) + check_reverse = ts[2] + + if opname == 'div' and compat.PY3: + pytest.skip('div test only for Py3') + + op = getattr(Series, opname) + + if op == 'div': + alt = operator.truediv + else: + alt = getattr(operator, opname) - check(self.ts, self.ts * 2) - check(self.ts, self.ts[::2]) - check(self.ts, 5, check_reverse=True) - check(tm.makeFloatSeries(), tm.makeFloatSeries(), check_reverse=True) + result = op(series, other) + expected = alt(series, other) + assert_almost_equal(result, expected) + if check_reverse: + rop = getattr(Series, "r" + opname) + result = rop(series, other) + expected = alt(other, series) + assert_almost_equal(result, expected) def test_neg(self): assert_series_equal(-self.series, -1 * self.series) @@ -1971,20 +1984,15 @@ def test_operators_corner(self): index=self.ts.index[:-5], name='ts') tm.assert_series_equal(added[:-5], expected) - def test_operators_reverse_object(self): + @pytest.mark.parametrize('op', [operator.add, operator.sub, operator.mul, + operator.truediv, operator.floordiv]) + def test_operators_reverse_object(self, op): # GH 56 arr = Series(np.random.randn(10), index=np.arange(10), dtype=object) - def _check_op(arr, op): - result = op(1., arr) - expected = op(1., arr.astype(float)) - assert_series_equal(result.astype(float), expected) - - _check_op(arr, operator.add) - _check_op(arr, operator.sub) - _check_op(arr, operator.mul) - _check_op(arr, operator.truediv) - _check_op(arr, operator.floordiv) + result = op(1., arr) + expected = op(1., arr.astype(float)) + assert_series_equal(result.astype(float), expected) def test_arith_ops_df_compat(self): # GH 1134 From 91c76ccb0ca148fd9fd57fc7b82108f69e589b4a Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Thu, 8 Feb 2018 05:32:04 -0600 Subject: [PATCH 076/217] Bug: adds support for unary plus (#19297) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/generic.py | 19 +++++++- pandas/tests/computation/test_eval.py | 60 ++++++++------------------ pandas/tests/frame/test_arithmetic.py | 4 +- pandas/tests/frame/test_operators.py | 43 ++++++++++++++++-- pandas/tests/series/test_arithmetic.py | 4 +- 6 files changed, 80 insertions(+), 51 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6c4fce35529ad..5e94b9c15fa57 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -253,6 +253,7 @@ Current Behavior: Other Enhancements ^^^^^^^^^^^^^^^^^^ +- Unary ``+`` now permitted for ``Series`` and ``DataFrame`` as numeric operator (:issue:`16073`) - Better support for :func:`Dataframe.style.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`) - :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`) - :func:`MultiIndex.unique` now supports the ``level=`` argument, to get unique values from a specific index level (:issue:`17896`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cb4bbb7b27c42..35f866c9e7d58 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -25,6 +25,7 @@ is_list_like, is_dict_like, is_re_compilable, + is_period_arraylike, pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.inference import is_hashable @@ -1027,10 +1028,24 @@ def _indexed_same(self, other): def __neg__(self): values = com._values_from_object(self) - if values.dtype == np.bool_: + if is_bool_dtype(values): arr = operator.inv(values) - else: + elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)): arr = operator.neg(values) + else: + raise TypeError("Unary negative expects numeric dtype, not {}" + .format(values.dtype)) + return self.__array_wrap__(arr) + + def __pos__(self): + values = com._values_from_object(self) + if (is_bool_dtype(values) or is_period_arraylike(values)): + arr = values + elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)): + arr = operator.pos(values) + else: + raise TypeError("Unary plus expects numeric dtype, not {}" + .format(values.dtype)) return self.__array_wrap__(arr) def __invert__(self): diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 9c3572f9ffe72..07ba0b681418e 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -542,66 +542,42 @@ def test_frame_pos(self): # float lhs = DataFrame(randn(5, 2)) - if self.engine == 'python': - with pytest.raises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_frame_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_frame_equal(expect, result) # int lhs = DataFrame(randint(5, size=(5, 2))) - if self.engine == 'python': - with pytest.raises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_frame_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_frame_equal(expect, result) # bool doesn't work with numexpr but works elsewhere lhs = DataFrame(rand(5, 2) > 0.5) - if self.engine == 'python': - with pytest.raises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_frame_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_frame_equal(expect, result) def test_series_pos(self): expr = self.ex('+') # float lhs = Series(randn(5)) - if self.engine == 'python': - with pytest.raises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_series_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_series_equal(expect, result) # int lhs = Series(randint(5, size=5)) - if self.engine == 'python': - with pytest.raises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_series_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_series_equal(expect, result) # bool doesn't work with numexpr but works elsewhere lhs = Series(rand(5) > 0.5) - if self.engine == 'python': - with pytest.raises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_series_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_series_equal(expect, result) def test_scalar_unary(self): with pytest.raises(TypeError): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 1bb8e8edffc6e..a3a799aed1c55 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -245,7 +245,7 @@ def test_ops_frame_period(self): exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), 'B': np.array([14, 13], dtype=object)}) tm.assert_frame_equal(p - df, exp) - tm.assert_frame_equal(df - p, -exp) + tm.assert_frame_equal(df - p, -1 * exp) df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), pd.Period('2015-06', freq='M')], @@ -257,4 +257,4 @@ def test_ops_frame_period(self): exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), 'B': np.array([16, 16], dtype=object)}) tm.assert_frame_equal(df2 - df, exp) - tm.assert_frame_equal(df - df2, -exp) + tm.assert_frame_equal(df - df2, -1 * exp) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 26974b6398694..5df50f3d7835b 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -271,13 +271,50 @@ def test_logical_with_nas(self): expected = Series([True, True]) assert_series_equal(result, expected) - def test_neg(self): - # what to do? - assert_frame_equal(-self.frame, -1 * self.frame) + @pytest.mark.parametrize('df,expected', [ + (pd.DataFrame({'a': [-1, 1]}), pd.DataFrame({'a': [1, -1]})), + (pd.DataFrame({'a': [False, True]}), + pd.DataFrame({'a': [True, False]})), + (pd.DataFrame({'a': pd.Series(pd.to_timedelta([-1, 1]))}), + pd.DataFrame({'a': pd.Series(pd.to_timedelta([1, -1]))})) + ]) + def test_neg_numeric(self, df, expected): + assert_frame_equal(-df, expected) + assert_series_equal(-df['a'], expected['a']) + + @pytest.mark.parametrize('df', [ + pd.DataFrame({'a': ['a', 'b']}), + pd.DataFrame({'a': pd.to_datetime(['2017-01-22', '1970-01-01'])}), + ]) + def test_neg_raises(self, df): + with pytest.raises(TypeError): + (- df) + with pytest.raises(TypeError): + (- df['a']) def test_invert(self): assert_frame_equal(-(self.frame < 0), ~(self.frame < 0)) + @pytest.mark.parametrize('df', [ + pd.DataFrame({'a': [-1, 1]}), + pd.DataFrame({'a': [False, True]}), + pd.DataFrame({'a': pd.Series(pd.to_timedelta([-1, 1]))}), + ]) + def test_pos_numeric(self, df): + # GH 16073 + assert_frame_equal(+df, df) + assert_series_equal(+df['a'], df['a']) + + @pytest.mark.parametrize('df', [ + pd.DataFrame({'a': ['a', 'b']}), + pd.DataFrame({'a': pd.to_datetime(['2017-01-22', '1970-01-01'])}), + ]) + def test_pos_raises(self, df): + with pytest.raises(TypeError): + (+ df) + with pytest.raises(TypeError): + (+ df['a']) + def test_arith_flex_frame(self): ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] if not compat.PY3: diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 1d9fa9dc15531..94da97ef45301 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -315,7 +315,7 @@ def test_ops_series_period(self): # dtype will be object because of original dtype expected = pd.Series([9, 8], name='xxx', dtype=object) tm.assert_series_equal(per - ser, expected) - tm.assert_series_equal(ser - per, -expected) + tm.assert_series_equal(ser - per, -1 * expected) s2 = pd.Series([pd.Period('2015-01-05', freq='D'), pd.Period('2015-01-04', freq='D')], name='xxx') @@ -323,7 +323,7 @@ def test_ops_series_period(self): expected = pd.Series([4, 2], name='xxx', dtype=object) tm.assert_series_equal(s2 - ser, expected) - tm.assert_series_equal(ser - s2, -expected) + tm.assert_series_equal(ser - s2, -1 * expected) class TestTimestampSeriesArithmetic(object): From 51c976cc2eac282275cbe189d2e34e4d2e1636ed Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 9 Feb 2018 06:25:32 -0600 Subject: [PATCH 077/217] Ignore warnings when reading pickle files (#19580) --- pandas/io/pickle.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index fa953f7d876cc..756096dd0c9ce 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,4 +1,5 @@ """ pickle compat """ +import warnings import numpy as np from numpy.lib.format import read_array, write_array @@ -96,7 +97,9 @@ def try_read(path, encoding=None): # cpickle # GH 6899 try: - return read_wrapper(lambda f: pkl.load(f)) + with warnings.catch_warnings(record=True): + # We want to silencce any warnings about, e.g. moved modules. + return read_wrapper(lambda f: pkl.load(f)) except Exception: # reg/patched pickle try: From f523886d4f0fa1d970207830b9a8968c204a5ab6 Mon Sep 17 00:00:00 2001 From: samghelms Date: Fri, 9 Feb 2018 07:29:02 -0500 Subject: [PATCH 078/217] ENH: added an optional css id to `
elements. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_thead(self, table): """Return the header of a table. @@ -305,7 +305,7 @@ def _parse_thead(self, table): thead : node-like A
` tags created by `frame.to_html()` (#19594) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/frame.py | 10 ++++++++-- pandas/io/formats/format.py | 25 +++++++++++++++++++------ pandas/tests/io/formats/test_format.py | 4 ++-- pandas/tests/io/formats/test_to_html.py | 7 +++++++ 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 5e94b9c15fa57..083242cd69b74 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -717,6 +717,7 @@ I/O ^^^ - :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) +- :meth:`DataFrame.to_html` now has an option to add an id to the leading `
` tag (:issue:`8496`) - Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) - Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) - Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 28923f0fbf240..6d8dcb8a1ca89 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1727,7 +1727,7 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, sparsify=None, index_names=True, justify=None, bold_rows=True, classes=None, escape=True, max_rows=None, max_cols=None, show_dimensions=False, notebook=False, decimal='.', - border=None): + border=None, table_id=None): """ Render a DataFrame as an HTML table. @@ -1755,6 +1755,12 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, `
` tag. Default ``pd.options.html.border``. .. versionadded:: 0.19.0 + + table_id : str, optional + A css id is included in the opening `
` tag if specified. + + .. versionadded:: 0.23.0 + """ if (justify is not None and @@ -1772,7 +1778,7 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, max_rows=max_rows, max_cols=max_cols, show_dimensions=show_dimensions, - decimal=decimal) + decimal=decimal, table_id=table_id) # TODO: a generic formatter wld b in DataFrameFormatter formatter.to_html(classes=classes, notebook=notebook, border=border) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 269c81b380b5e..621641747f376 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -77,7 +77,11 @@ index_names : bool, optional Prints the names of the indexes, default True line_width : int, optional - Width to wrap a line in characters, default no wrap""" + Width to wrap a line in characters, default no wrap + table_id : str, optional + id for the
element create by to_html + + .. versionadded:: 0.23.0""" _VALID_JUSTIFY_PARAMETERS = ("left", "right", "center", "justify", "justify-all", "start", "end", "inherit", @@ -387,7 +391,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, index_names=True, line_width=None, max_rows=None, - max_cols=None, show_dimensions=False, decimal='.', **kwds): + max_cols=None, show_dimensions=False, decimal='.', + table_id=None, **kwds): self.frame = frame if buf is not None: self.buf = _expand_user(_stringify_path(buf)) @@ -413,6 +418,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame)) self.show_dimensions = show_dimensions + self.table_id = table_id if justify is None: self.justify = get_option("display.colheader_justify") @@ -740,7 +746,8 @@ def to_html(self, classes=None, notebook=False, border=None): max_rows=self.max_rows, max_cols=self.max_cols, notebook=notebook, - border=border) + border=border, + table_id=self.table_id) if hasattr(self.buf, 'write'): html_renderer.write_result(self.buf) elif isinstance(self.buf, compat.string_types): @@ -1082,7 +1089,7 @@ class HTMLFormatter(TableFormatter): indent_delta = 2 def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, - notebook=False, border=None): + notebook=False, border=None, table_id=None): self.fmt = formatter self.classes = classes @@ -1101,6 +1108,7 @@ def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, if border is None: border = get_option('display.html.border') self.border = border + self.table_id = table_id def write(self, s, indent=0): rs = pprint_thing(s) @@ -1197,6 +1205,7 @@ def write_style(self): def write_result(self, buf): indent = 0 + id_section = "" frame = self.frame _classes = ['dataframe'] # Default class. @@ -1220,8 +1229,12 @@ def write_result(self, buf): self.write(''.format(style=div_style)) self.write_style() - self.write('
' - .format(border=self.border, cls=' '.join(_classes)), indent) + + if self.table_id is not None: + id_section = ' id="{table_id}"'.format(table_id=self.table_id) + self.write('
' + .format(border=self.border, cls=' '.join(_classes), + id_section=id_section), indent) indent += self.indent_delta indent = self._write_header(indent) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index e0ce27de5c31f..dddba5b425c3b 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1492,7 +1492,7 @@ def test_repr_html_float(self): 'B': np.arange(41, 41 + h)}).set_index('idx') reg_repr = df._repr_html_() assert '..' not in reg_repr - assert str(40 + h) in reg_repr + assert ''.format(val=str(40 + h)) in reg_repr h = max_rows + 1 df = DataFrame({'idx': np.linspace(-10, 10, h), @@ -1500,7 +1500,7 @@ def test_repr_html_float(self): 'B': np.arange(41, 41 + h)}).set_index('idx') long_repr = df._repr_html_() assert '..' in long_repr - assert '31' not in long_repr + assert ''.format(val='31') not in long_repr assert u('{h} rows ').format(h=h) in long_repr assert u('2 columns') in long_repr diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 9e063c2d176e1..f69cac62513d4 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1864,3 +1864,10 @@ def test_to_html_with_index_names_false(self): name='myindexname')) result = df.to_html(index_names=False) assert 'myindexname' not in result + + def test_to_html_with_id(self): + # gh-8496 + df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(['a', 'b'], + name='myindexname')) + result = df.to_html(index_names=False, table_id="TEST_ID") + assert ' id="TEST_ID"' in result From b5d4128dac32923225e16c0e63fa7656a746dff2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 9 Feb 2018 10:11:17 -0600 Subject: [PATCH 079/217] CI: Fixed NumPy pinning in conda-build (#19575) * CI: Fixed NumPy pinning in conda-build * Unpin NumPy Quite install * Pin numpy * Unpin everywhere else * Build vs. 1.11 * remove one more pin * Remove one more pin * bump pyarrow --- ci/install_travis.sh | 6 +++--- ci/requirements-3.5_CONDA_BUILD_TEST.build | 2 +- ci/requirements-3.5_CONDA_BUILD_TEST.run | 2 +- ci/requirements-3.5_CONDA_BUILD_TEST.sh | 2 +- ci/requirements-3.6.build | 2 +- conda.recipe/meta.yaml | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 4ec5b0a9d8820..6e270519e60c3 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -110,7 +110,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest>=3.1.0 -time pip install pytest-xdist moto +time pip install -q pytest-xdist moto if [ "$LINT" ]; then conda install flake8=3.4.1 @@ -181,10 +181,10 @@ elif [ "$CONDA_BUILD_TEST" ]; then # build & install testing echo "[building conda recipe]" - time conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test + time conda build ./conda.recipe --python 3.5 -q --no-test || exit 1 echo "[installing]" - conda install pandas --use-local + conda install pandas --use-local || exit 1 else diff --git a/ci/requirements-3.5_CONDA_BUILD_TEST.build b/ci/requirements-3.5_CONDA_BUILD_TEST.build index 6648e3778777c..f7befe3b31865 100644 --- a/ci/requirements-3.5_CONDA_BUILD_TEST.build +++ b/ci/requirements-3.5_CONDA_BUILD_TEST.build @@ -2,5 +2,5 @@ python=3.5* python-dateutil pytz nomkl -numpy=1.13* +numpy cython diff --git a/ci/requirements-3.5_CONDA_BUILD_TEST.run b/ci/requirements-3.5_CONDA_BUILD_TEST.run index 19d9a91e86585..669cf437f2164 100644 --- a/ci/requirements-3.5_CONDA_BUILD_TEST.run +++ b/ci/requirements-3.5_CONDA_BUILD_TEST.run @@ -1,5 +1,5 @@ pytz -numpy=1.13* +numpy openpyxl xlsxwriter xlrd diff --git a/ci/requirements-3.5_CONDA_BUILD_TEST.sh b/ci/requirements-3.5_CONDA_BUILD_TEST.sh index 09d6775cfc894..093fdbcf21d78 100644 --- a/ci/requirements-3.5_CONDA_BUILD_TEST.sh +++ b/ci/requirements-3.5_CONDA_BUILD_TEST.sh @@ -8,4 +8,4 @@ echo "install 35 CONDA_BUILD_TEST" conda remove -n pandas python-dateutil --force pip install python-dateutil -conda install -n pandas -c conda-forge feather-format pyarrow=0.5.0 +conda install -n pandas -c conda-forge feather-format pyarrow=0.7.1 diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build index 94e1152450d87..1c4b46aea3865 100644 --- a/ci/requirements-3.6.build +++ b/ci/requirements-3.6.build @@ -2,5 +2,5 @@ python=3.6* python-dateutil pytz nomkl -numpy=1.13.* +numpy cython diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 87a79f7e5a987..86bed996c8aab 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -14,14 +14,14 @@ requirements: build: - python - cython - - {{ pin_compatible('numpy', upper_bound='1.14') }} + - numpy 1.11.* - setuptools >=3.3 - python-dateutil >=2.5.0 - pytz run: - python - - {{ pin_compatible('numpy', upper_bound='1.14') }} + - numpy >=1.11.* - python-dateutil >=2.5.0 - pytz From 724638115754af34572430f5a8c2c9f55661b1ab Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 9 Feb 2018 14:53:41 -0600 Subject: [PATCH 080/217] API: Default ExtensionArray.astype (#19604) * API: Default ExtensionArray.astype (cherry picked from commit 943a915562b72bed147c857de927afa0daf31c1a) * Py2 compat * Moved * Moved dtypes --- pandas/core/arrays/base.py | 21 +++++++++ pandas/tests/dtypes/test_dtypes.py | 32 +------------ pandas/tests/extension/__init__.py | 0 pandas/tests/extension/test_common.py | 67 +++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 31 deletions(-) create mode 100644 pandas/tests/extension/__init__.py create mode 100644 pandas/tests/extension/test_common.py diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1556b653819a6..553e1e0ac2066 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1,4 +1,6 @@ """An interface for extending pandas with custom arrays.""" +import numpy as np + from pandas.errors import AbstractMethodError _not_implemented_message = "{} does not implement {}." @@ -138,6 +140,25 @@ def nbytes(self): # ------------------------------------------------------------------------ # Additional Methods # ------------------------------------------------------------------------ + def astype(self, dtype, copy=True): + """Cast to a NumPy array with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : ndarray + NumPy ndarray with 'dtype' for its dtype. + """ + return np.array(self, dtype=dtype, copy=copy) + def isna(self): # type: () -> np.ndarray """Boolean NumPy array indicating if each value is missing. diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index eca4dd4cf2106..d800a7b92b559 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -10,14 +10,12 @@ Series, Categorical, CategoricalIndex, IntervalIndex, date_range) from pandas.compat import string_types -from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, - IntervalDtype, CategoricalDtype, ExtensionDtype) + IntervalDtype, CategoricalDtype) from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, - is_extension_array_dtype, is_period_dtype, is_period, is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype, is_interval_dtype, @@ -744,31 +742,3 @@ def test_categorical_categories(self): tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) c1 = CategoricalDtype(CategoricalIndex(['a', 'b'])) tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) - - -class DummyArray(ExtensionArray): - pass - - -class DummyDtype(ExtensionDtype): - pass - - -class TestExtensionArrayDtype(object): - - @pytest.mark.parametrize('values', [ - pd.Categorical([]), - pd.Categorical([]).dtype, - pd.Series(pd.Categorical([])), - DummyDtype(), - DummyArray(), - ]) - def test_is_extension_array_dtype(self, values): - assert is_extension_array_dtype(values) - - @pytest.mark.parametrize('values', [ - np.array([]), - pd.Series(np.array([])), - ]) - def test_is_not_extension_array_dtype(self, values): - assert not is_extension_array_dtype(values) diff --git a/pandas/tests/extension/__init__.py b/pandas/tests/extension/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py new file mode 100644 index 0000000000000..1f4582f687415 --- /dev/null +++ b/pandas/tests/extension/test_common.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.core.arrays import ExtensionArray +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype + + +class DummyDtype(ExtensionDtype): + pass + + +class DummyArray(ExtensionArray): + + def __init__(self, data): + self.data = data + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return self.data.dtype + + +class TestExtensionArrayDtype(object): + + @pytest.mark.parametrize('values', [ + pd.Categorical([]), + pd.Categorical([]).dtype, + pd.Series(pd.Categorical([])), + DummyDtype(), + DummyArray(np.array([1, 2])), + ]) + def test_is_extension_array_dtype(self, values): + assert is_extension_array_dtype(values) + + @pytest.mark.parametrize('values', [ + np.array([]), + pd.Series(np.array([])), + ]) + def test_is_not_extension_array_dtype(self, values): + assert not is_extension_array_dtype(values) + + +def test_astype(): + + arr = DummyArray(np.array([1, 2, 3])) + expected = np.array([1, 2, 3], dtype=object) + + result = arr.astype(object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype('object') + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_no_copy(): + arr = DummyArray(np.array([1, 2, 3], dtype=np.int64)) + result = arr.astype(arr.dtype, copy=False) + + assert arr.data is result + + result = arr.astype(arr.dtype) + assert arr.data is not result From 5ea49ef045837ddc9da7e63b466219d007ce01ef Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sat, 10 Feb 2018 08:05:51 -0800 Subject: [PATCH 081/217] PERF: Cythonize Groupby Rank (#19481) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/algos.pxd | 8 ++ pandas/_libs/algos.pyx | 8 -- pandas/_libs/groupby.pyx | 5 +- pandas/_libs/groupby_helper.pxi.in | 165 ++++++++++++++++++++++++++ pandas/core/groupby.py | 76 +++++++++--- pandas/tests/groupby/test_groupby.py | 166 +++++++++++++++++++++++++++ 7 files changed, 406 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 083242cd69b74..cf5a44442045b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -581,6 +581,7 @@ Performance Improvements - Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) - Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) - Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) +- Improved performance of :func:`DataFrameGroupBy.rank` (:issue:`15779`) .. _whatsnew_0230.docs: diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd index 6d80e6f0073eb..a535872ff7279 100644 --- a/pandas/_libs/algos.pxd +++ b/pandas/_libs/algos.pxd @@ -11,3 +11,11 @@ cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: a[0] = b[0] b[0] = t return 0 + +cdef enum TiebreakEnumType: + TIEBREAK_AVERAGE + TIEBREAK_MIN, + TIEBREAK_MAX + TIEBREAK_FIRST + TIEBREAK_FIRST_DESCENDING + TIEBREAK_DENSE diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 5d17488963b1c..a418e54e4da9b 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -31,14 +31,6 @@ cdef double nan = NaN cdef int64_t iNaT = get_nat() -cdef: - int TIEBREAK_AVERAGE = 0 - int TIEBREAK_MIN = 1 - int TIEBREAK_MAX = 2 - int TIEBREAK_FIRST = 3 - int TIEBREAK_FIRST_DESCENDING = 4 - int TIEBREAK_DENSE = 5 - tiebreakers = { 'average': TIEBREAK_AVERAGE, 'min': TIEBREAK_MIN, diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 55de700c9af52..d75c3a71896e3 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -16,8 +16,9 @@ from numpy cimport (ndarray, from libc.stdlib cimport malloc, free from util cimport numeric, get_nat -from algos cimport swap -from algos import take_2d_axis1_float64_float64, groupsort_indexer +from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN, + TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE) +from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers cdef int64_t iNaT = get_nat() diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index a751fadaf48cf..b24444c422efa 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -444,8 +444,173 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, else: out[i, j] = resx[i, j] + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, + ndarray[{{c_type}}, ndim=2] values, + ndarray[int64_t] labels, + bint is_datetimelike, object ties_method, + bint ascending, bint pct, object na_option): + """Provides the rank of values within each group + + Parameters + ---------- + out : array of float64_t values which this method will write its results to + values : array of {{c_type}} values to be ranked + labels : array containing unique label for each group, with its ordering + matching up to the corresponding record in `values` + is_datetimelike : bool + unused in this method but provided for call compatability with other + Cython transformations + ties_method : {'keep', 'top', 'bottom'} + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + ascending : boolean + False for ranks by high (1) to low (N) + pct : boolean + Compute percentage rank of data within each group + + Notes + ----- + This method modifies the `out` parameter rather than returning an object + """ + cdef: + TiebreakEnumType tiebreak + Py_ssize_t i, j, N, K, val_start=0, grp_start=0, dups=0, sum_ranks=0 + Py_ssize_t grp_vals_seen=1, grp_na_count=0 + ndarray[int64_t] _as + ndarray[float64_t, ndim=2] grp_sizes + ndarray[{{c_type}}] masked_vals + ndarray[uint8_t] mask + bint keep_na + {{c_type}} nan_fill_val + + tiebreak = tiebreakers[ties_method] + keep_na = na_option == 'keep' + N, K = ( values).shape + grp_sizes = np.ones_like(out) + + # Copy values into new array in order to fill missing data + # with mask, without obfuscating location of missing data + # in values array + masked_vals = np.array(values[:, 0], copy=True) + {{if name=='int64'}} + mask = (masked_vals == {{nan_val}}).astype(np.uint8) + {{else}} + mask = np.isnan(masked_vals).astype(np.uint8) + {{endif}} + + if ascending ^ (na_option == 'top'): + {{if name == 'int64'}} + nan_fill_val = np.iinfo(np.int64).max + {{else}} + nan_fill_val = np.inf + {{endif}} + order = (masked_vals, mask, labels) + else: + {{if name == 'int64'}} + nan_fill_val = np.iinfo(np.int64).min + {{else}} + nan_fill_val = -np.inf + {{endif}} + order = (masked_vals, ~mask, labels) + np.putmask(masked_vals, mask, nan_fill_val) + + # lexsort using labels, then mask, then actual values + # each label corresponds to a different group value, + # the mask helps you differentiate missing values before + # performing sort on the actual values + _as = np.lexsort(order) + + if not ascending: + _as = _as[::-1] + + with nogil: + # Loop over the length of the value array + # each incremental i value can be looked up in the _as array + # that we sorted previously, which gives us the location of + # that sorted value for retrieval back from the original + # values / masked_vals arrays + for i in range(N): + # dups and sum_ranks will be incremented each loop where + # the value / group remains the same, and should be reset + # when either of those change + # Used to calculate tiebreakers + dups += 1 + sum_ranks += i - grp_start + 1 + + # if keep_na, check for missing values and assign back + # to the result where appropriate + if keep_na and masked_vals[_as[i]] == nan_fill_val: + grp_na_count += 1 + out[_as[i], 0] = nan + else: + # this implementation is inefficient because it will + # continue overwriting previously encountered dups + # i.e. if 5 duplicated values are encountered it will + # write to the result as follows (assumes avg tiebreaker): + # 1 + # .5 .5 + # .33 .33 .33 + # .25 .25 .25 .25 + # .2 .2 .2 .2 .2 + # + # could potentially be optimized to only write to the + # result once the last duplicate value is encountered + if tiebreak == TIEBREAK_AVERAGE: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = i - grp_start - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = i - grp_start + 1 + elif tiebreak == TIEBREAK_FIRST: + for j in range(i - dups + 1, i + 1): + if ascending: + out[_as[j], 0] = j + 1 - grp_start + else: + out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start + elif tiebreak == TIEBREAK_DENSE: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = grp_vals_seen + + # look forward to the next value (using the sorting in _as) + # if the value does not equal the current value then we need to + # reset the dups and sum_ranks, knowing that a new value is coming + # up. the conditional also needs to handle nan equality and the + # end of iteration + if (i == N - 1 or ( + (masked_vals[_as[i]] != masked_vals[_as[i+1]]) and not + (mask[_as[i]] and mask[_as[i+1]]))): + dups = sum_ranks = 0 + val_start = i + grp_vals_seen += 1 + + # Similar to the previous conditional, check now if we are moving + # to a new group. If so, keep track of the index where the new + # group occurs, so the tiebreaker calculations can decrement that + # from their position. fill in the size of each group encountered + # (used by pct calculations later). also be sure to reset any of + # the items helping to calculate dups + if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]: + for j in range(grp_start, i + 1): + grp_sizes[_as[j], 0] = i - grp_start + 1 - grp_na_count + dups = sum_ranks = 0 + grp_na_count = 0 + val_start = i + 1 + grp_start = i + 1 + grp_vals_seen = 1 + + if pct: + for i in range(N): + out[i, 0] = out[i, 0] / grp_sizes[i, 0] {{endfor}} + #---------------------------------------------------------------------- # group_min, group_max #---------------------------------------------------------------------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 01241db7c0c42..0363bcd02aa16 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -994,7 +994,7 @@ def _transform_should_cast(self, func_nm): return (self.size().fillna(0) > 0).any() and (func_nm not in _cython_cast_blacklist) - def _cython_transform(self, how, numeric_only=True): + def _cython_transform(self, how, numeric_only=True, **kwargs): output = collections.OrderedDict() for name, obj in self._iterate_slices(): is_numeric = is_numeric_dtype(obj.dtype) @@ -1002,12 +1002,16 @@ def _cython_transform(self, how, numeric_only=True): continue try: - result, names = self.grouper.transform(obj.values, how) + result, names = self.grouper.transform(obj.values, how, + **kwargs) except NotImplementedError: continue except AssertionError as e: raise GroupByError(str(e)) - output[name] = self._try_cast(result, obj) + if self._transform_should_cast(how): + output[name] = self._try_cast(result, obj) + else: + output[name] = result if len(output) == 0: raise DataError('No numeric types to aggregate') @@ -1768,6 +1772,37 @@ def cumcount(self, ascending=True): cumcounts = self._cumcount_array(ascending=ascending) return Series(cumcounts, index) + @Substitution(name='groupby') + @Appender(_doc_template) + def rank(self, method='average', ascending=True, na_option='keep', + pct=False, axis=0): + """Provides the rank of values within each group + + Parameters + ---------- + method : {'average', 'min', 'max', 'first', 'dense'}, efault 'average' + * average: average rank of group + * min: lowest rank in group + * max: highest rank in group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups + method : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + ascending : boolean, default True + False for ranks by high (1) to low (N) + pct : boolean, default False + Compute percentage rank of data within each group + + Returns + ----- + DataFrame with ranking of values within each group + """ + return self._cython_transform('rank', numeric_only=False, + ties_method=method, ascending=ascending, + na_option=na_option, pct=pct, axis=axis) + @Substitution(name='groupby') @Appender(_doc_template) def cumprod(self, axis=0, *args, **kwargs): @@ -2183,6 +2218,16 @@ def get_group_levels(self): 'cumsum': 'group_cumsum', 'cummin': 'group_cummin', 'cummax': 'group_cummax', + 'rank': { + 'name': 'group_rank', + 'f': lambda func, a, b, c, d, **kwargs: func( + a, b, c, d, + kwargs.get('ties_method', 'average'), + kwargs.get('ascending', True), + kwargs.get('pct', False), + kwargs.get('na_option', 'keep') + ) + } } } @@ -2242,7 +2287,8 @@ def wrapper(*args, **kwargs): (how, dtype_str)) return func - def _cython_operation(self, kind, values, how, axis, min_count=-1): + def _cython_operation(self, kind, values, how, axis, min_count=-1, + **kwargs): assert kind in ['transform', 'aggregate'] # can we do this operation with our cython functions @@ -2314,10 +2360,13 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1): else: raise - if is_numeric: - out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize) + if how == 'rank': + out_dtype = 'float' else: - out_dtype = 'object' + if is_numeric: + out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize) + else: + out_dtype = 'object' labels, _, _ = self.group_info @@ -2334,7 +2383,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1): # TODO: min_count result = self._transform( - result, values, labels, func, is_numeric, is_datetimelike) + result, values, labels, func, is_numeric, is_datetimelike, + **kwargs) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT @@ -2373,8 +2423,8 @@ def aggregate(self, values, how, axis=0, min_count=-1): return self._cython_operation('aggregate', values, how, axis, min_count=min_count) - def transform(self, values, how, axis=0): - return self._cython_operation('transform', values, how, axis) + def transform(self, values, how, axis=0, **kwargs): + return self._cython_operation('transform', values, how, axis, **kwargs) def _aggregate(self, result, counts, values, comp_ids, agg_func, is_numeric, is_datetimelike, min_count=-1): @@ -2394,7 +2444,7 @@ def _aggregate(self, result, counts, values, comp_ids, agg_func, return result def _transform(self, result, values, comp_ids, transform_func, - is_numeric, is_datetimelike): + is_numeric, is_datetimelike, **kwargs): comp_ids, _, ngroups = self.group_info if values.ndim > 3: @@ -2406,9 +2456,9 @@ def _transform(self, result, values, comp_ids, transform_func, chunk = chunk.squeeze() transform_func(result[:, :, i], values, - comp_ids, is_datetimelike) + comp_ids, is_datetimelike, **kwargs) else: - transform_func(result, values, comp_ids, is_datetimelike) + transform_func(result, values, comp_ids, is_datetimelike, **kwargs) return result diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 5172efe25d697..2db772ac54369 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1895,6 +1895,172 @@ def test_rank_apply(self): expected = expected.reindex(result.index) assert_series_equal(result, expected) + @pytest.mark.parametrize("grps", [ + ['qux'], ['qux', 'quux']]) + @pytest.mark.parametrize("vals", [ + [2, 2, 8, 2, 6], + [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), + pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'), + pd.Timestamp('2018-01-06')]]) + @pytest.mark.parametrize("ties_method,ascending,pct,exp", [ + ('average', True, False, [2., 2., 5., 2., 4.]), + ('average', True, True, [0.4, 0.4, 1.0, 0.4, 0.8]), + ('average', False, False, [4., 4., 1., 4., 2.]), + ('average', False, True, [.8, .8, .2, .8, .4]), + ('min', True, False, [1., 1., 5., 1., 4.]), + ('min', True, True, [0.2, 0.2, 1.0, 0.2, 0.8]), + ('min', False, False, [3., 3., 1., 3., 2.]), + ('min', False, True, [.6, .6, .2, .6, .4]), + ('max', True, False, [3., 3., 5., 3., 4.]), + ('max', True, True, [0.6, 0.6, 1.0, 0.6, 0.8]), + ('max', False, False, [5., 5., 1., 5., 2.]), + ('max', False, True, [1., 1., .2, 1., .4]), + ('first', True, False, [1., 2., 5., 3., 4.]), + ('first', True, True, [0.2, 0.4, 1.0, 0.6, 0.8]), + ('first', False, False, [3., 4., 1., 5., 2.]), + ('first', False, True, [.6, .8, .2, 1., .4]), + ('dense', True, False, [1., 1., 3., 1., 2.]), + ('dense', True, True, [0.2, 0.2, 0.6, 0.2, 0.4]), + ('dense', False, False, [3., 3., 1., 3., 2.]), + ('dense', False, True, [.6, .6, .2, .6, .4]), + ]) + def test_rank_args(self, grps, vals, ties_method, ascending, pct, exp): + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({'key': key, 'val': vals}) + result = df.groupby('key').rank(method=ties_method, + ascending=ascending, pct=pct) + + exp_df = DataFrame(exp * len(grps), columns=['val']) + assert_frame_equal(result, exp_df) + + @pytest.mark.parametrize("grps", [ + ['qux'], ['qux', 'quux']]) + @pytest.mark.parametrize("vals", [ + [2, 2, np.nan, 8, 2, 6, np.nan, np.nan], # floats + [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), np.nan, + pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'), + pd.Timestamp('2018-01-06'), np.nan, np.nan] + ]) + @pytest.mark.parametrize("ties_method,ascending,na_option,pct,exp", [ + ('average', True, 'keep', False, + [2., 2., np.nan, 5., 2., 4., np.nan, np.nan]), + ('average', True, 'keep', True, + [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan]), + ('average', False, 'keep', False, + [4., 4., np.nan, 1., 4., 2., np.nan, np.nan]), + ('average', False, 'keep', True, + [.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan]), + ('min', True, 'keep', False, + [1., 1., np.nan, 5., 1., 4., np.nan, np.nan]), + ('min', True, 'keep', True, + [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]), + ('min', False, 'keep', False, + [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]), + ('min', False, 'keep', True, + [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]), + ('max', True, 'keep', False, + [3., 3., np.nan, 5., 3., 4., np.nan, np.nan]), + ('max', True, 'keep', True, + [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]), + ('max', False, 'keep', False, + [5., 5., np.nan, 1., 5., 2., np.nan, np.nan]), + ('max', False, 'keep', True, + [1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan]), + ('first', True, 'keep', False, + [1., 2., np.nan, 5., 3., 4., np.nan, np.nan]), + ('first', True, 'keep', True, + [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]), + ('first', False, 'keep', False, + [3., 4., np.nan, 1., 5., 2., np.nan, np.nan]), + ('first', False, 'keep', True, + [.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan]), + ('dense', True, 'keep', False, + [1., 1., np.nan, 3., 1., 2., np.nan, np.nan]), + ('dense', True, 'keep', True, + [0.2, 0.2, np.nan, 0.6, 0.2, 0.4, np.nan, np.nan]), + ('dense', False, 'keep', False, + [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]), + ('dense', False, 'keep', True, + [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]), + ('average', True, 'no_na', False, [2., 2., 7., 5., 2., 4., 7., 7.]), + ('average', True, 'no_na', True, + [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875]), + ('average', False, 'no_na', False, [4., 4., 7., 1., 4., 2., 7., 7.]), + ('average', False, 'no_na', True, + [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875]), + ('min', True, 'no_na', False, [1., 1., 6., 5., 1., 4., 6., 6.]), + ('min', True, 'no_na', True, + [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75]), + ('min', False, 'no_na', False, [3., 3., 6., 1., 3., 2., 6., 6.]), + ('min', False, 'no_na', True, + [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75]), + ('max', True, 'no_na', False, [3., 3., 8., 5., 3., 4., 8., 8.]), + ('max', True, 'no_na', True, + [0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.]), + ('max', False, 'no_na', False, [5., 5., 8., 1., 5., 2., 8., 8.]), + ('max', False, 'no_na', True, + [0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.]), + ('first', True, 'no_na', False, [1., 2., 6., 5., 3., 4., 7., 8.]), + ('first', True, 'no_na', True, + [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.]), + ('first', False, 'no_na', False, [3., 4., 6., 1., 5., 2., 7., 8.]), + ('first', False, 'no_na', True, + [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.]), + ('dense', True, 'no_na', False, [1., 1., 4., 3., 1., 2., 4., 4.]), + ('dense', True, 'no_na', True, + [0.125, 0.125, 0.5, 0.375, 0.125, 0.25, 0.5, 0.5]), + ('dense', False, 'no_na', False, [3., 3., 4., 1., 3., 2., 4., 4.]), + ('dense', False, 'no_na', True, + [0.375, 0.375, 0.5, 0.125, 0.375, 0.25, 0.5, 0.5]) + ]) + def test_rank_args_missing(self, grps, vals, ties_method, ascending, + na_option, pct, exp): + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({'key': key, 'val': vals}) + result = df.groupby('key').rank(method=ties_method, + ascending=ascending, + na_option=na_option, pct=pct) + + exp_df = DataFrame(exp * len(grps), columns=['val']) + assert_frame_equal(result, exp_df) + + @pytest.mark.parametrize("pct,exp", [ + (False, [3., 3., 3., 3., 3.]), + (True, [.6, .6, .6, .6, .6])]) + def test_rank_resets_each_group(self, pct, exp): + df = DataFrame( + {'key': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'], + 'val': [1] * 10} + ) + result = df.groupby('key').rank(pct=pct) + exp_df = DataFrame(exp * 2, columns=['val']) + assert_frame_equal(result, exp_df) + + def test_rank_avg_even_vals(self): + df = DataFrame({'key': ['a'] * 4, 'val': [1] * 4}) + result = df.groupby('key').rank() + exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=['val']) + assert_frame_equal(result, exp_df) + + @pytest.mark.parametrize("ties_method", [ + 'average', 'min', 'max', 'first', 'dense']) + @pytest.mark.parametrize("ascending", [True, False]) + @pytest.mark.parametrize("na_option", ["keep", "top", "bottom"]) + @pytest.mark.parametrize("pct", [True, False]) + @pytest.mark.parametrize("vals", [ + ['bar', 'bar', 'foo', 'bar', 'baz'], + ['bar', np.nan, 'foo', np.nan, 'baz'] + ]) + def test_rank_object_raises(self, ties_method, ascending, na_option, + pct, vals): + df = DataFrame({'key': ['foo'] * 5, 'val': vals}) + with tm.assert_raises_regex(TypeError, "not callable"): + df.groupby('key').rank(method=ties_method, + ascending=ascending, + na_option=na_option, pct=pct) + def test_dont_clobber_name_column(self): df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'], 'name': ['foo', 'bar', 'baz'] * 2}) From 94a696a7b4b043bc2d988b0fc476ec856bc88457 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sat, 10 Feb 2018 08:08:58 -0800 Subject: [PATCH 082/217] Consolidate nth / last object Groupby Implementations (#19610) --- pandas/_libs/groupby.pyx | 99 ---------------------------- pandas/_libs/groupby_helper.pxi.in | 32 +++++---- pandas/tests/groupby/test_groupby.py | 56 ++++++++-------- 3 files changed, 47 insertions(+), 140 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index d75c3a71896e3..866683ce378ab 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -26,105 +26,6 @@ cdef double NaN = np.NaN cdef double nan = NaN -# TODO: aggregate multiple columns in single pass -# ---------------------------------------------------------------------- -# first, nth, last - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_nth_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] labels, - int64_t rank, - Py_ssize_t min_count=-1): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, lab - object val - float64_t count - ndarray[int64_t, ndim=2] nobs - ndarray[object, ndim=2] resx - - assert min_count == -1, "'min_count' only used in add and prod" - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty(( out).shape, dtype=object) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - if nobs[lab, j] == rank: - resx[lab, j] = val - - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_last_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] labels, - Py_ssize_t min_count=-1): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, lab - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[int64_t, ndim=2] nobs - - assert min_count == -1, "'min_count' only used in add and prod" - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty(( out).shape, dtype=object) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - resx[lab, j] = val - - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - cdef inline float64_t median_linear(float64_t* a, int n) nogil: cdef int i, j, na_count = 0 cdef float64_t result diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index b24444c422efa..58a944a8241dd 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -325,7 +325,8 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, # name, c_type, dest_type2, nan_val dtypes = [('float64', 'float64_t', 'float64_t', 'NAN'), ('float32', 'float32_t', 'float32_t', 'NAN'), - ('int64', 'int64_t', 'int64_t', 'iNaT')] + ('int64', 'int64_t', 'int64_t', 'iNaT'), + ('object', 'object', 'object', 'NAN')] def get_dispatch(dtypes): @@ -350,7 +351,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count + {{dest_type2}} val ndarray[{{dest_type2}}, ndim=2] resx ndarray[int64_t, ndim=2] nobs @@ -360,11 +361,19 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, raise AssertionError("len(index) != len(labels)") nobs = np.zeros(( out).shape, dtype=np.int64) + {{if name=='object'}} + resx = np.empty(( out).shape, dtype=object) + {{else}} resx = np.empty_like(out) + {{endif}} N, K = ( values).shape + {{if name == "object"}} + if True: # make templating happy + {{else}} with nogil: + {{endif}} for i in range(N): lab = labels[i] if lab < 0: @@ -375,11 +384,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} if val == val and val != {{nan_val}}: - {{endif}} nobs[lab, j] += 1 resx[lab, j] = val @@ -390,7 +395,6 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, else: out[i, j] = resx[i, j] - @cython.wraparound(False) @cython.boundscheck(False) def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @@ -403,7 +407,7 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count + {{dest_type2}} val ndarray[{{dest_type2}}, ndim=2] resx ndarray[int64_t, ndim=2] nobs @@ -413,11 +417,19 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, raise AssertionError("len(index) != len(labels)") nobs = np.zeros(( out).shape, dtype=np.int64) + {{if name=='object'}} + resx = np.empty(( out).shape, dtype=object) + {{else}} resx = np.empty_like(out) + {{endif}} N, K = ( values).shape + {{if name == "object"}} + if True: # make templating happy + {{else}} with nogil: + {{endif}} for i in range(N): lab = labels[i] if lab < 0: @@ -428,11 +440,7 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} if val == val and val != {{nan_val}}: - {{endif}} nobs[lab, j] += 1 if nobs[lab, j] == rank: resx[lab, j] = val diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2db772ac54369..6eacd45deb7bc 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2252,7 +2252,19 @@ def test_median_empty_bins(self): expected = df.groupby(bins).agg(lambda x: x.median()) assert_frame_equal(result, expected) - def test_groupby_non_arithmetic_agg_types(self): + @pytest.mark.parametrize("dtype", [ + 'int8', 'int16', 'int32', 'int64', 'float32', 'float64']) + @pytest.mark.parametrize("method,data", [ + ('first', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}), + ('last', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}), + ('min', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}), + ('max', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}), + ('nth', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], + 'args': [1]}), + ('count', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], + 'out_type': 'int64'}) + ]) + def test_groupby_non_arithmetic_agg_types(self, dtype, method, data): # GH9311, GH6620 df = pd.DataFrame( [{'a': 1, 'b': 1}, @@ -2260,39 +2272,25 @@ def test_groupby_non_arithmetic_agg_types(self): {'a': 2, 'b': 3}, {'a': 2, 'b': 4}]) - dtypes = ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'] - - grp_exp = {'first': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, - 'last': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, - 'min': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, - 'max': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, - 'nth': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], - 'args': [1]}, - 'count': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], - 'out_type': 'int64'}} + df['b'] = df.b.astype(dtype) - for dtype in dtypes: - df_in = df.copy() - df_in['b'] = df_in.b.astype(dtype) + if 'args' not in data: + data['args'] = [] - for method, data in compat.iteritems(grp_exp): - if 'args' not in data: - data['args'] = [] - - if 'out_type' in data: - out_type = data['out_type'] - else: - out_type = dtype + if 'out_type' in data: + out_type = data['out_type'] + else: + out_type = dtype - exp = data['df'] - df_out = pd.DataFrame(exp) + exp = data['df'] + df_out = pd.DataFrame(exp) - df_out['b'] = df_out.b.astype(out_type) - df_out.set_index('a', inplace=True) + df_out['b'] = df_out.b.astype(out_type) + df_out.set_index('a', inplace=True) - grpd = df_in.groupby('a') - t = getattr(grpd, method)(*data['args']) - assert_frame_equal(t, df_out) + grpd = df.groupby('a') + t = getattr(grpd, method)(*data['args']) + assert_frame_equal(t, df_out) def test_groupby_non_arithmetic_agg_intlike_precision(self): # GH9311, GH6620 From 58c80096c4e7e952a24feb54e435da1ecea7d899 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 10 Feb 2018 11:09:58 -0500 Subject: [PATCH 083/217] Revert "Consolidate nth / last object Groupby Implementations (#19610)" This reverts commit d4730e65fd2dd6235158930f756e1f1afd298488. --- pandas/_libs/groupby.pyx | 99 ++++++++++++++++++++++++++++ pandas/_libs/groupby_helper.pxi.in | 32 ++++----- pandas/tests/groupby/test_groupby.py | 56 ++++++++-------- 3 files changed, 140 insertions(+), 47 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 866683ce378ab..d75c3a71896e3 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -26,6 +26,105 @@ cdef double NaN = np.NaN cdef double nan = NaN +# TODO: aggregate multiple columns in single pass +# ---------------------------------------------------------------------- +# first, nth, last + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_nth_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] labels, + int64_t rank, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + object val + float64_t count + ndarray[int64_t, ndim=2] nobs + ndarray[object, ndim=2] resx + + assert min_count == -1, "'min_count' only used in add and prod" + + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty(( out).shape, dtype=object) + + N, K = ( values).shape + + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_last_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] labels, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + object val + float64_t count + ndarray[object, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + + assert min_count == -1, "'min_count' only used in add and prod" + + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty(( out).shape, dtype=object) + + N, K = ( values).shape + + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + cdef inline float64_t median_linear(float64_t* a, int n) nogil: cdef int i, j, na_count = 0 cdef float64_t result diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 58a944a8241dd..b24444c422efa 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -325,8 +325,7 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, # name, c_type, dest_type2, nan_val dtypes = [('float64', 'float64_t', 'float64_t', 'NAN'), ('float32', 'float32_t', 'float32_t', 'NAN'), - ('int64', 'int64_t', 'int64_t', 'iNaT'), - ('object', 'object', 'object', 'NAN')] + ('int64', 'int64_t', 'int64_t', 'iNaT')] def get_dispatch(dtypes): @@ -351,7 +350,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val + {{dest_type2}} val, count ndarray[{{dest_type2}}, ndim=2] resx ndarray[int64_t, ndim=2] nobs @@ -361,19 +360,11 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, raise AssertionError("len(index) != len(labels)") nobs = np.zeros(( out).shape, dtype=np.int64) - {{if name=='object'}} - resx = np.empty(( out).shape, dtype=object) - {{else}} resx = np.empty_like(out) - {{endif}} N, K = ( values).shape - {{if name == "object"}} - if True: # make templating happy - {{else}} with nogil: - {{endif}} for i in range(N): lab = labels[i] if lab < 0: @@ -384,7 +375,11 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # not nan + {{if name == 'int64'}} + if val != {{nan_val}}: + {{else}} if val == val and val != {{nan_val}}: + {{endif}} nobs[lab, j] += 1 resx[lab, j] = val @@ -395,6 +390,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, else: out[i, j] = resx[i, j] + @cython.wraparound(False) @cython.boundscheck(False) def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @@ -407,7 +403,7 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val + {{dest_type2}} val, count ndarray[{{dest_type2}}, ndim=2] resx ndarray[int64_t, ndim=2] nobs @@ -417,19 +413,11 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, raise AssertionError("len(index) != len(labels)") nobs = np.zeros(( out).shape, dtype=np.int64) - {{if name=='object'}} - resx = np.empty(( out).shape, dtype=object) - {{else}} resx = np.empty_like(out) - {{endif}} N, K = ( values).shape - {{if name == "object"}} - if True: # make templating happy - {{else}} with nogil: - {{endif}} for i in range(N): lab = labels[i] if lab < 0: @@ -440,7 +428,11 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # not nan + {{if name == 'int64'}} + if val != {{nan_val}}: + {{else}} if val == val and val != {{nan_val}}: + {{endif}} nobs[lab, j] += 1 if nobs[lab, j] == rank: resx[lab, j] = val diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6eacd45deb7bc..2db772ac54369 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2252,19 +2252,7 @@ def test_median_empty_bins(self): expected = df.groupby(bins).agg(lambda x: x.median()) assert_frame_equal(result, expected) - @pytest.mark.parametrize("dtype", [ - 'int8', 'int16', 'int32', 'int64', 'float32', 'float64']) - @pytest.mark.parametrize("method,data", [ - ('first', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}), - ('last', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}), - ('min', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}), - ('max', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}), - ('nth', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], - 'args': [1]}), - ('count', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], - 'out_type': 'int64'}) - ]) - def test_groupby_non_arithmetic_agg_types(self, dtype, method, data): + def test_groupby_non_arithmetic_agg_types(self): # GH9311, GH6620 df = pd.DataFrame( [{'a': 1, 'b': 1}, @@ -2272,25 +2260,39 @@ def test_groupby_non_arithmetic_agg_types(self, dtype, method, data): {'a': 2, 'b': 3}, {'a': 2, 'b': 4}]) - df['b'] = df.b.astype(dtype) + dtypes = ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'] - if 'args' not in data: - data['args'] = [] + grp_exp = {'first': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, + 'last': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, + 'min': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, + 'max': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, + 'nth': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], + 'args': [1]}, + 'count': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], + 'out_type': 'int64'}} - if 'out_type' in data: - out_type = data['out_type'] - else: - out_type = dtype + for dtype in dtypes: + df_in = df.copy() + df_in['b'] = df_in.b.astype(dtype) + + for method, data in compat.iteritems(grp_exp): + if 'args' not in data: + data['args'] = [] + + if 'out_type' in data: + out_type = data['out_type'] + else: + out_type = dtype - exp = data['df'] - df_out = pd.DataFrame(exp) + exp = data['df'] + df_out = pd.DataFrame(exp) - df_out['b'] = df_out.b.astype(out_type) - df_out.set_index('a', inplace=True) + df_out['b'] = df_out.b.astype(out_type) + df_out.set_index('a', inplace=True) - grpd = df.groupby('a') - t = getattr(grpd, method)(*data['args']) - assert_frame_equal(t, df_out) + grpd = df_in.groupby('a') + t = getattr(grpd, method)(*data['args']) + assert_frame_equal(t, df_out) def test_groupby_non_arithmetic_agg_intlike_precision(self): # GH9311, GH6620 From b98e59544a287f0d16933496146e22e8ef25a67f Mon Sep 17 00:00:00 2001 From: Jan Koch Date: Sat, 10 Feb 2018 17:20:17 +0100 Subject: [PATCH 084/217] ENH: df.assign accepting dependent **kwargs (#14207) (#18852) --- doc/source/dsintro.rst | 85 +++++++++++++++++------ doc/source/whatsnew/v0.23.0.txt | 40 +++++++++++ pandas/core/frame.py | 49 ++++++++----- pandas/tests/frame/test_mutate_columns.py | 26 ++++++- 4 files changed, 163 insertions(+), 37 deletions(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index d7650b6b0938f..78e2fdb46f659 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -95,7 +95,7 @@ constructed from the sorted keys of the dict, if possible. NaN (not a number) is the standard missing data marker used in pandas. -**From scalar value** +**From scalar value** If ``data`` is a scalar value, an index must be provided. The value will be repeated to match the length of **index**. @@ -154,7 +154,7 @@ See also the :ref:`section on attribute access`. Vectorized operations and label alignment with Series ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When working with raw NumPy arrays, looping through value-by-value is usually +When working with raw NumPy arrays, looping through value-by-value is usually not necessary. The same is true when working with Series in pandas. Series can also be passed into most NumPy methods expecting an ndarray. @@ -324,7 +324,7 @@ From a list of dicts From a dict of tuples ~~~~~~~~~~~~~~~~~~~~~ -You can automatically create a multi-indexed frame by passing a tuples +You can automatically create a multi-indexed frame by passing a tuples dictionary. .. ipython:: python @@ -347,7 +347,7 @@ column name provided). **Missing Data** Much more will be said on this topic in the :ref:`Missing data ` -section. To construct a DataFrame with missing data, we use ``np.nan`` to +section. To construct a DataFrame with missing data, we use ``np.nan`` to represent missing values. Alternatively, you may pass a ``numpy.MaskedArray`` as the data argument to the DataFrame constructor, and its masked entries will be considered missing. @@ -370,7 +370,7 @@ set to ``'index'`` in order to use the dict keys as row labels. ``DataFrame.from_records`` takes a list of tuples or an ndarray with structured dtype. It works analogously to the normal ``DataFrame`` constructor, except that -the resulting DataFrame index may be a specific field of the structured +the resulting DataFrame index may be a specific field of the structured dtype. For example: .. ipython:: python @@ -506,25 +506,70 @@ to be inserted (for example, a ``Series`` or NumPy array), or a function of one argument to be called on the ``DataFrame``. A *copy* of the original DataFrame is returned, with the new values inserted. +.. versionmodified:: 0.23.0 + +Starting with Python 3.6 the order of ``**kwargs`` is preserved. This allows +for *dependent* assignment, where an expression later in ``**kwargs`` can refer +to a column created earlier in the same :meth:`~DataFrame.assign`. + +.. ipython:: python + + dfa = pd.DataFrame({"A": [1, 2, 3], + "B": [4, 5, 6]}) + dfa.assign(C=lambda x: x['A'] + x['B'], + D=lambda x: x['A'] + x['C']) + +In the second expression, ``x['C']`` will refer to the newly created column, +that's equal to ``dfa['A'] + dfa['B']``. + +To write code compatible with all versions of Python, split the assignment in two. + +.. ipython:: python + + dependent = pd.DataFrame({"A": [1, 1, 1]}) + (dependent.assign(A=lambda x: x['A'] + 1) + .assign(B=lambda x: x['A'] + 2)) + .. warning:: - Since the function signature of ``assign`` is ``**kwargs``, a dictionary, - the order of the new columns in the resulting DataFrame cannot be guaranteed - to match the order you pass in. To make things predictable, items are inserted - alphabetically (by key) at the end of the DataFrame. + Dependent assignment maybe subtly change the behavior of your code between + Python 3.6 and older versions of Python. + + If you wish write code that supports versions of python before and after 3.6, + you'll need to take care when passing ``assign`` expressions that + + * Updating an existing column + * Refering to the newly updated column in the same ``assign`` + + For example, we'll update column "A" and then refer to it when creating "B". + + .. code-block:: python + + >>> dependent = pd.DataFrame({"A": [1, 1, 1]}) + >>> dependent.assign(A=lambda x: x["A"] + 1, + B=lambda x: x["A"] + 2) + + For Python 3.5 and earlier the expression creating ``B`` refers to the + "old" value of ``A``, ``[1, 1, 1]``. The output is then + + .. code-block:: python + + A B + 0 2 3 + 1 2 3 + 2 2 3 + + For Python 3.6 and later, the expression creating ``A`` refers to the + "new" value of ``A``, ``[2, 2, 2]``, which results in + + .. code-block:: python - All expressions are computed first, and then assigned. So you can't refer - to another column being assigned in the same call to ``assign``. For example: + A B + 0 2 4 + 1 2 4 + 2 2 4 - .. ipython:: - :verbatim: - In [1]: # Don't do this, bad reference to `C` - df.assign(C = lambda x: x['A'] + x['B'], - D = lambda x: x['A'] + x['C']) - In [2]: # Instead, break it into two assigns - (df.assign(C = lambda x: x['A'] + x['B']) - .assign(D = lambda x: x['A'] + x['C'])) Indexing / Selection ~~~~~~~~~~~~~~~~~~~~ @@ -914,7 +959,7 @@ For example, using the earlier example data, we could do: Squeezing ~~~~~~~~~ -Another way to change the dimensionality of an object is to ``squeeze`` a 1-len +Another way to change the dimensionality of an object is to ``squeeze`` a 1-len object, similar to ``wp['Item1']``. .. ipython:: python diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index cf5a44442045b..db5c79dcb3c42 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -248,6 +248,46 @@ Current Behavior: pd.RangeIndex(1, 5) / 0 +.. _whatsnew_0230.enhancements.assign_dependent: + +``.assign()`` accepts dependent arguments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`DataFrame.assign` now accepts dependent keyword arguments for python version later than 3.6 (see also `PEP 468 +`_). Later keyword arguments may now refer to earlier ones if the argument is a callable. See the +:ref:`documentation here ` (:issue:`14207`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3]}) + df + df.assign(B=df.A, C=lambda x:x['A']+ x['B']) + +.. warning:: + + This may subtly change the behavior of your code when you're + using ``.assign()`` to update an existing column. Previously, callables + referring to other variables being updated would get the "old" values + + Previous Behaviour: + + .. code-block:: ipython + + In [2]: df = pd.DataFrame({"A": [1, 2, 3]}) + + In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1) + Out[3]: + A C + 0 2 -1 + 1 3 -2 + 2 4 -3 + + New Behaviour: + + .. ipython:: python + + df.assign(A=df.A+1, C= lambda df: df.A* -1) + .. _whatsnew_0230.enhancements.other: Other Enhancements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6d8dcb8a1ca89..c99c59db1d8cb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2687,12 +2687,17 @@ def assign(self, **kwargs): Notes ----- - For python 3.6 and above, the columns are inserted in the order of - \*\*kwargs. For python 3.5 and earlier, since \*\*kwargs is unordered, - the columns are inserted in alphabetical order at the end of your - DataFrame. Assigning multiple columns within the same ``assign`` - is possible, but you cannot reference other columns created within - the same ``assign`` call. + Assigning multiple columns within the same ``assign`` is possible. + For Python 3.6 and above, later items in '\*\*kwargs' may refer to + newly created or modified columns in 'df'; items are computed and + assigned into 'df' in order. For Python 3.5 and below, the order of + keyword arguments is not specified, you cannot refer to newly created + or modified columns. All items are computed first, and then assigned + in alphabetical order. + + .. versionmodified :: 0.23.0 + + Keyword argument order is maintained for Python 3.6 and later. Examples -------- @@ -2728,22 +2733,34 @@ def assign(self, **kwargs): 7 8 -1.495604 2.079442 8 9 0.549296 2.197225 9 10 -0.758542 2.302585 + + Where the keyword arguments depend on each other + + >>> df = pd.DataFrame({'A': [1, 2, 3]}) + + >>> df.assign(B=df.A, C=lambda x:x['A']+ x['B']) + A B C + 0 1 1 2 + 1 2 2 4 + 2 3 3 6 """ data = self.copy() - # do all calculations first... - results = OrderedDict() - for k, v in kwargs.items(): - results[k] = com._apply_if_callable(v, data) - - # preserve order for 3.6 and later, but sort by key for 3.5 and earlier + # >= 3.6 preserve order of kwargs if PY36: - results = results.items() + for k, v in kwargs.items(): + data[k] = com._apply_if_callable(v, data) else: + # <= 3.5: do all calculations first... + results = OrderedDict() + for k, v in kwargs.items(): + results[k] = com._apply_if_callable(v, data) + + # <= 3.5 and earlier results = sorted(results.items()) - # ... and then assign - for k, v in results: - data[k] = v + # ... and then assign + for k, v in results: + data[k] = v return data def _sanitize_column(self, key, value, broadcast=True): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 9acdf2f17d86a..8236a41d00243 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -89,11 +89,35 @@ def test_assign_bad(self): df.assign(lambda x: x.A) with pytest.raises(AttributeError): df.assign(C=df.A, D=df.A + df.C) + + @pytest.mark.skipif(PY36, reason="""Issue #14207: valid for python + 3.6 and above""") + def test_assign_dependent_old_python(self): + df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + + # Key C does not exist at defition time of df with pytest.raises(KeyError): - df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C']) + df.assign(C=lambda df: df.A, + D=lambda df: df['A'] + df['C']) with pytest.raises(KeyError): df.assign(C=df.A, D=lambda x: x['A'] + x['C']) + @pytest.mark.skipif(not PY36, reason="""Issue #14207: not valid for + python 3.5 and below""") + def test_assign_dependent(self): + df = DataFrame({'A': [1, 2], 'B': [3, 4]}) + + result = df.assign(C=df.A, D=lambda x: x['A'] + x['C']) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], + columns=list('ABCD')) + assert_frame_equal(result, expected) + + result = df.assign(C=lambda df: df.A, + D=lambda df: df['A'] + df['C']) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], + columns=list('ABCD')) + assert_frame_equal(result, expected) + def test_insert_error_msmgs(self): # GH 7432 From fbdd61351a0c4a215bc6728a8c623ff123021662 Mon Sep 17 00:00:00 2001 From: elrubio <1485187+elrubio@users.noreply.github.com> Date: Sat, 10 Feb 2018 17:53:38 +0100 Subject: [PATCH 085/217] Fix left join turning into outer join (#19624) --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/frame.py | 13 ++++++------- pandas/tests/frame/test_join.py | 17 +++++++++++++++++ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index db5c79dcb3c42..03e8bce7e5102 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -813,7 +813,7 @@ Reshaping - Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) - Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) - Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) -- +- Bug in :func:`DataFrame.join` which does an *outer* instead of a *left* join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) Other ^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c99c59db1d8cb..23579d84a3964 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5345,18 +5345,17 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', raise ValueError('Joining multiple DataFrames only supported' ' for joining on index') - # join indexes only using concat - if how == 'left': - how = 'outer' - join_axes = [self.index] - else: - join_axes = None - frames = [self] + list(other) can_concat = all(df.index.is_unique for df in frames) + # join indexes only using concat if can_concat: + if how == 'left': + how = 'outer' + join_axes = [self.index] + else: + join_axes = None return concat(frames, axis=1, join=how, join_axes=join_axes, verify_integrity=True) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index afecba2026dd7..ccdba6df2521a 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -165,3 +165,20 @@ def test_join_period_index(frame_with_period_index): index=frame_with_period_index.index) tm.assert_frame_equal(joined, expected) + + +def test_join_left_sequence_non_unique_index(): + # https://github.com/pandas-dev/pandas/issues/19607 + df1 = DataFrame({'a': [0, 10, 20]}, index=[1, 2, 3]) + df2 = DataFrame({'b': [100, 200, 300]}, index=[4, 3, 2]) + df3 = DataFrame({'c': [400, 500, 600]}, index=[2, 2, 4]) + + joined = df1.join([df2, df3], how='left') + + expected = DataFrame({ + 'a': [0, 10, 10, 20], + 'b': [np.nan, 300, 300, 200], + 'c': [np.nan, 400, 500, np.nan] + }, index=[1, 2, 2, 3]) + + tm.assert_frame_equal(joined, expected) From 12b9c0cd15ea438e489fbad41844225e5e065483 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Feb 2018 08:59:40 -0800 Subject: [PATCH 086/217] function for frequently repeated tzconversion code (#19625) --- pandas/_libs/tslibs/conversion.pxd | 4 +- pandas/_libs/tslibs/conversion.pyx | 118 ++++++++++++++++++----------- pandas/_libs/tslibs/period.pyx | 9 +-- pandas/_libs/tslibs/resolution.pyx | 9 +-- 4 files changed, 85 insertions(+), 55 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 6e7df10e7c424..0d5e9e3fc5152 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # cython: profile=False -from cpython.datetime cimport datetime +from cpython.datetime cimport datetime, tzinfo from numpy cimport int64_t, int32_t @@ -30,3 +30,5 @@ cdef int64_t get_datetime64_nanos(object val) except? -1 cpdef int64_t pydt_to_i8(object pydt) except? -1 cdef maybe_datetimelike_to_i8(object val) + +cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 4f1a053da6f1d..cfbcb922cb47d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -499,7 +499,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz): """ cdef: ndarray[int64_t] trans, deltas - int64_t delta + int64_t delta, local_val Py_ssize_t posn datetime dt @@ -510,11 +510,8 @@ cdef inline void _localize_tso(_TSObject obj, object tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - dt64_to_dtstruct(obj.value, &obj.dts) - dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, - obj.dts.min, obj.dts.sec, obj.dts.us, tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - dt64_to_dtstruct(obj.value + delta, &obj.dts) + local_val = tz_convert_utc_to_tzlocal(obj.value, tz) + dt64_to_dtstruct(local_val, &obj.dts) else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) @@ -556,6 +553,66 @@ cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): # ---------------------------------------------------------------------- # Timezone Conversion +cdef inline int64_t tz_convert_tzlocal_to_utc(int64_t val, tzinfo tz): + """ + Parameters + ---------- + val : int64_t + tz : tzinfo + + Returns + ------- + utc_date : int64_t + + See Also + -------- + tz_convert_utc_to_tzlocal + """ + cdef: + pandas_datetimestruct dts + int64_t utc_date, delta + datetime dt + + dt64_to_dtstruct(val, &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + utc_date = val - delta + return utc_date + + +cdef inline int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz): + """ + Parameters + ---------- + utc_val : int64_t + tz : tzinfo + + Returns + ------- + local_val : int64_t + + See Also + -------- + tz_convert_tzlocal_to_utc + + Notes + ----- + The key difference between this and tz_convert_tzlocal_to_utc is a + an addition flipped to a subtraction in the last line. + """ + cdef: + pandas_datetimestruct dts + int64_t local_val, delta + datetime dt + + dt64_to_dtstruct(utc_val, &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + local_val = utc_val + delta + return local_val + cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): """ @@ -590,11 +647,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): # Convert to UTC if is_tzlocal(tz1): - dt64_to_dtstruct(val, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz1) - delta = int(get_utcoffset(tz1, dt).total_seconds()) * 1000000000 - utc_date = val - delta + utc_date = tz_convert_tzlocal_to_utc(val, tz1) elif get_timezone(tz1) != 'UTC': trans, deltas, typ = get_dst_info(tz1) pos = trans.searchsorted(val, side='right') - 1 @@ -608,11 +661,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): if get_timezone(tz2) == 'UTC': return utc_date elif is_tzlocal(tz2): - dt64_to_dtstruct(utc_date, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz2) - delta = int(get_utcoffset(tz2, dt).total_seconds()) * 1000000000 - return utc_date + delta + return tz_convert_utc_to_tzlocal(utc_date, tz2) # Convert UTC to other timezone trans, deltas, typ = get_dst_info(tz2) @@ -662,12 +711,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): if v == NPY_NAT: utc_dates[i] = NPY_NAT else: - dt64_to_dtstruct(v, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz1) - delta = (int(get_utcoffset(tz1, dt).total_seconds()) * - 1000000000) - utc_dates[i] = v - delta + utc_dates[i] = tz_convert_tzlocal_to_utc(v, tz1) else: trans, deltas, typ = get_dst_info(tz1) @@ -702,12 +746,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): if v == NPY_NAT: result[i] = NPY_NAT else: - dt64_to_dtstruct(v, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz2) - delta = (int(get_utcoffset(tz2, dt).total_seconds()) * - 1000000000) - result[i] = v + delta + result[i] = tz_convert_utc_to_tzlocal(v, tz2) return result # Convert UTC to other timezone @@ -777,11 +816,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if is_tzlocal(tz): for i in range(n): v = vals[i] - dt64_to_dtstruct(v, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - result[i] = v - delta + result[i] = tz_convert_tzlocal_to_utc(v, tz) return result if is_string_object(ambiguous): @@ -1024,11 +1059,8 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz): if stamps[i] == NPY_NAT: result[i] = NPY_NAT continue - dt64_to_dtstruct(stamps[i], &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - dt64_to_dtstruct(stamps[i] + delta, &dts) + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + dt64_to_dtstruct(local_val, &dts) result[i] = _normalized_stamp(&dts) else: # Adjust datetime64 timestamp, recompute datetimestruct @@ -1097,7 +1129,7 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None): Py_ssize_t i, n = len(stamps) ndarray[int64_t] trans, deltas pandas_datetimestruct dts - datetime dt + int64_t local_val if tz is None or is_utc(tz): for i in range(n): @@ -1106,11 +1138,9 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None): return False elif is_tzlocal(tz): for i in range(n): - dt64_to_dtstruct(stamps[i], &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tz) - dt = dt + tz.utcoffset(dt) - if (dt.hour + dt.minute + dt.second + dt.microsecond) > 0: + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + dt64_to_dtstruct(local_val, &dts) + if (dts.hour + dts.min + dts.sec + dts.us) > 0: return False else: trans, deltas, typ = get_dst_info(tz) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 3c396a9ff4f3c..dc5d058f41d11 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -39,6 +39,7 @@ cimport ccalendar from ccalendar cimport dayofweek, get_day_of_year from ccalendar import MONTH_NUMBERS from ccalendar cimport is_leapyear +from conversion cimport tz_convert_utc_to_tzlocal from frequencies cimport (get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, get_rule_month) @@ -591,6 +592,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, ndarray[int64_t] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts + int64_t local_val if is_utc(tz): for i in range(n): @@ -607,11 +609,8 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, if stamps[i] == NPY_NAT: result[i] = NPY_NAT continue - dt64_to_dtstruct(stamps[i], &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - dt64_to_dtstruct(stamps[i] + delta, &dts) + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + dt64_to_dtstruct(local_val, &dts) result[i] = get_period_ordinal(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index b166babe5992c..d0a9501afe566 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -23,6 +23,7 @@ from timezones cimport (is_utc, is_tzlocal, maybe_get_tz, get_dst_info, get_utcoffset) from fields import build_field_sarray from conversion import tz_convert +from conversion cimport tz_convert_utc_to_tzlocal from ccalendar import MONTH_ALIASES, int_to_weekday from pandas._libs.properties import cache_readonly @@ -78,6 +79,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz): int reso = RESO_DAY, curr_reso ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts + int64_t local_val if is_utc(tz): for i in range(n): @@ -91,11 +93,8 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz): for i in range(n): if stamps[i] == NPY_NAT: continue - dt64_to_dtstruct(stamps[i], &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - dt64_to_dtstruct(stamps[i] + delta, &dts) + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + dt64_to_dtstruct(local_val, &dts) curr_reso = _reso_stamp(&dts) if curr_reso < reso: reso = curr_reso From 848857280cc340fa03ed2027c1ae8c28a91595de Mon Sep 17 00:00:00 2001 From: jschendel Date: Sat, 10 Feb 2018 10:02:28 -0700 Subject: [PATCH 087/217] API: Allow ordered=None in CategoricalDtype (#18889) --- doc/source/whatsnew/v0.23.0.txt | 23 ++++ pandas/core/arrays/categorical.py | 12 +- pandas/core/dtypes/dtypes.py | 54 ++++++--- pandas/core/indexes/category.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 179 ++++++++++++++++------------- 5 files changed, 168 insertions(+), 102 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 03e8bce7e5102..6f48d9a6c63c9 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -500,6 +500,29 @@ To restore previous behavior, simply set ``expand`` to ``False``: extracted type(extracted) +.. _whatsnew_0230.api_breaking.cdt_ordered: + +Default value for the ``ordered`` parameter of ``CategoricalDtype`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default value of the ``ordered`` parameter for :class:`~pandas.api.types.CategoricalDtype` has changed from ``False`` to ``None`` to allow updating of ``categories`` without impacting ``ordered``. Behavior should remain consistent for downstream objects, such as :class:`Categorical` (:issue:`18790`) + +In previous versions, the default value for the ``ordered`` parameter was ``False``. This could potentially lead to the ``ordered`` parameter unintentionally being changed from ``True`` to ``False`` when users attempt to update ``categories`` if ``ordered`` is not explicitly specified, as it would silently default to ``False``. The new behavior for ``ordered=None`` is to retain the existing value of ``ordered``. + +New Behavior: + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) + cat + cdt = CategoricalDtype(categories=list('cbad')) + cat.astype(cdt) + +Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. + +Note that the unintenional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. + .. _whatsnew_0230.api: Other API Changes diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 62c6a6b16cbe9..93250bdbb5054 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -243,7 +243,7 @@ class Categorical(ExtensionArray, PandasObject): # For comparisons, so that numpy uses our implementation if the compare # ops, which raise __array_priority__ = 1000 - _dtype = CategoricalDtype() + _dtype = CategoricalDtype(ordered=False) _deprecations = frozenset(['labels']) _typ = 'categorical' @@ -294,7 +294,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, if fastpath: self._codes = coerce_indexer_dtype(values, categories) - self._dtype = dtype + self._dtype = self._dtype.update_dtype(dtype) return # null_mask indicates missing values we want to exclude from inference. @@ -358,7 +358,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, full_codes[~null_mask] = codes codes = full_codes - self._dtype = dtype + self._dtype = self._dtype.update_dtype(dtype) self._codes = coerce_indexer_dtype(codes, dtype.categories) @property @@ -438,7 +438,7 @@ def astype(self, dtype, copy=True): """ if is_categorical_dtype(dtype): # GH 10696/18593 - dtype = self.dtype._update_dtype(dtype) + dtype = self.dtype.update_dtype(dtype) self = self.copy() if copy else self if dtype == self.dtype: return self @@ -560,7 +560,7 @@ def from_codes(cls, codes, categories, ordered=False): raise ValueError( "codes need to be convertible to an arrays of integers") - categories = CategoricalDtype._validate_categories(categories) + categories = CategoricalDtype.validate_categories(categories) if len(codes) and (codes.max() >= len(categories) or codes.min() < -1): raise ValueError("codes need to be between -1 and " @@ -1165,7 +1165,7 @@ def __setstate__(self, state): # Provide compatibility with pre-0.15.0 Categoricals. if '_categories' not in state and '_levels' in state: - state['_categories'] = self.dtype._validate_categories(state.pop( + state['_categories'] = self.dtype.validate_categories(state.pop( '_levels')) if '_codes' not in state and 'labels' in state: state['_codes'] = coerce_indexer_dtype( diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d8d3a96992757..99e4033f104db 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -159,11 +159,11 @@ class CategoricalDtype(PandasExtensionDtype): _metadata = ['categories', 'ordered'] _cache = {} - def __init__(self, categories=None, ordered=False): + def __init__(self, categories=None, ordered=None): self._finalize(categories, ordered, fastpath=False) @classmethod - def _from_fastpath(cls, categories=None, ordered=False): + def _from_fastpath(cls, categories=None, ordered=None): self = cls.__new__(cls) self._finalize(categories, ordered, fastpath=True) return self @@ -180,14 +180,12 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None): def _finalize(self, categories, ordered, fastpath=False): - if ordered is None: - ordered = False - else: - self._validate_ordered(ordered) + if ordered is not None: + self.validate_ordered(ordered) if categories is not None: - categories = self._validate_categories(categories, - fastpath=fastpath) + categories = self.validate_categories(categories, + fastpath=fastpath) self._categories = categories self._ordered = ordered @@ -208,6 +206,17 @@ def __hash__(self): return int(self._hash_categories(self.categories, self.ordered)) def __eq__(self, other): + """ + Rules for CDT equality: + 1) Any CDT is equal to the string 'category' + 2) Any CDT is equal to a CDT with categories=None regardless of ordered + 3) A CDT with ordered=True is only equal to another CDT with + ordered=True and identical categories in the same order + 4) A CDT with ordered={False, None} is only equal to another CDT with + ordered={False, None} and identical categories, but same order is + not required. There is no distinction between False/None. + 5) Any other comparison returns False + """ if isinstance(other, compat.string_types): return other == self.name @@ -220,12 +229,16 @@ def __eq__(self, other): # CDT(., .) = CDT(None, False) and *all* # CDT(., .) = CDT(None, True). return True - elif self.ordered: - return other.ordered and self.categories.equals(other.categories) - elif other.ordered: - return False + elif self.ordered or other.ordered: + # At least one has ordered=True; equal if both have ordered=True + # and the same values for categories in the same order. + return ((self.ordered == other.ordered) and + self.categories.equals(other.categories)) else: - # both unordered; this could probably be optimized / cached + # Neither has ordered=True; equal if both have the same categories, + # but same order is not necessary. There is no distinction between + # ordered=False and ordered=None: CDT(., False) and CDT(., None) + # will be equal if they have the same categories. return hash(self) == hash(other) def __repr__(self): @@ -288,7 +301,7 @@ def construct_from_string(cls, string): raise TypeError("cannot construct a CategoricalDtype") @staticmethod - def _validate_ordered(ordered): + def validate_ordered(ordered): """ Validates that we have a valid ordered parameter. If it is not a boolean, a TypeError will be raised. @@ -308,7 +321,7 @@ def _validate_ordered(ordered): raise TypeError("'ordered' must either be 'True' or 'False'") @staticmethod - def _validate_categories(categories, fastpath=False): + def validate_categories(categories, fastpath=False): """ Validates that we have good categories @@ -340,7 +353,7 @@ def _validate_categories(categories, fastpath=False): return categories - def _update_dtype(self, dtype): + def update_dtype(self, dtype): """ Returns a CategoricalDtype with categories and ordered taken from dtype if specified, otherwise falling back to self if unspecified @@ -361,11 +374,16 @@ def _update_dtype(self, dtype): 'got {dtype!r}').format(dtype=dtype) raise ValueError(msg) - # dtype is CDT: keep current categories if None (ordered can't be None) + # dtype is CDT: keep current categories/ordered if None new_categories = dtype.categories if new_categories is None: new_categories = self.categories - return CategoricalDtype(new_categories, dtype.ordered) + + new_ordered = dtype.ordered + if new_ordered is None: + new_ordered = self.ordered + + return CategoricalDtype(new_categories, new_ordered) @property def categories(self): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b36bc1df23247..60f5552576ea1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -344,7 +344,7 @@ def astype(self, dtype, copy=True): return IntervalIndex(np.array(self)) elif is_categorical_dtype(dtype): # GH 18630 - dtype = self.dtype._update_dtype(dtype) + dtype = self.dtype.update_dtype(dtype) if dtype == self.dtype: return self.copy() if copy else self diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index d800a7b92b559..cc833af03ae66 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -24,6 +24,11 @@ import pandas.util.testing as tm +@pytest.fixture(params=[True, False, None]) +def ordered(request): + return request.param + + class Base(object): def setup_method(self, method): @@ -124,41 +129,6 @@ def test_tuple_categories(self): result = CategoricalDtype(categories) assert all(result.categories == categories) - @pytest.mark.parametrize('dtype', [ - CategoricalDtype(list('abc'), False), - CategoricalDtype(list('abc'), True)]) - @pytest.mark.parametrize('new_dtype', [ - 'category', - CategoricalDtype(None, False), - CategoricalDtype(None, True), - CategoricalDtype(list('abc'), False), - CategoricalDtype(list('abc'), True), - CategoricalDtype(list('cba'), False), - CategoricalDtype(list('cba'), True), - CategoricalDtype(list('wxyz'), False), - CategoricalDtype(list('wxyz'), True)]) - def test_update_dtype(self, dtype, new_dtype): - if isinstance(new_dtype, string_types) and new_dtype == 'category': - expected_categories = dtype.categories - expected_ordered = dtype.ordered - else: - expected_categories = new_dtype.categories - if expected_categories is None: - expected_categories = dtype.categories - expected_ordered = new_dtype.ordered - - result = dtype._update_dtype(new_dtype) - tm.assert_index_equal(result.categories, expected_categories) - assert result.ordered is expected_ordered - - @pytest.mark.parametrize('bad_dtype', [ - 'foo', object, np.int64, PeriodDtype('Q')]) - def test_update_dtype_errors(self, bad_dtype): - dtype = CategoricalDtype(list('abc'), False) - msg = 'a CategoricalDtype must be passed to perform an update, ' - with tm.assert_raises_regex(ValueError, msg): - dtype._update_dtype(bad_dtype) - class TestDatetimeTZDtype(Base): @@ -609,17 +579,12 @@ def test_caching(self): class TestCategoricalDtypeParametrized(object): - @pytest.mark.parametrize('categories, ordered', [ - (['a', 'b', 'c', 'd'], False), - (['a', 'b', 'c', 'd'], True), - (np.arange(1000), False), - (np.arange(1000), True), - (['a', 'b', 10, 2, 1.3, True], False), - ([True, False], True), - ([True, False], False), - (pd.date_range('2017', periods=4), True), - (pd.date_range('2017', periods=4), False), - ]) + @pytest.mark.parametrize('categories', [ + list('abcd'), + np.arange(1000), + ['a', 'b', 10, 2, 1.3, True], + [True, False], + pd.date_range('2017', periods=4)]) def test_basic(self, categories, ordered): c1 = CategoricalDtype(categories, ordered=ordered) tm.assert_index_equal(c1.categories, pd.Index(categories)) @@ -627,21 +592,24 @@ def test_basic(self, categories, ordered): def test_order_matters(self): categories = ['a', 'b'] - c1 = CategoricalDtype(categories, ordered=False) - c2 = CategoricalDtype(categories, ordered=True) + c1 = CategoricalDtype(categories, ordered=True) + c2 = CategoricalDtype(categories, ordered=False) + c3 = CategoricalDtype(categories, ordered=None) assert c1 is not c2 + assert c1 is not c3 - def test_unordered_same(self): - c1 = CategoricalDtype(['a', 'b']) - c2 = CategoricalDtype(['b', 'a']) + @pytest.mark.parametrize('ordered', [False, None]) + def test_unordered_same(self, ordered): + c1 = CategoricalDtype(['a', 'b'], ordered=ordered) + c2 = CategoricalDtype(['b', 'a'], ordered=ordered) assert hash(c1) == hash(c2) def test_categories(self): result = CategoricalDtype(['a', 'b', 'c']) tm.assert_index_equal(result.categories, pd.Index(['a', 'b', 'c'])) - assert result.ordered is False + assert result.ordered is None - def test_equal_but_different(self): + def test_equal_but_different(self, ordered): c1 = CategoricalDtype([1, 2, 3]) c2 = CategoricalDtype([1., 2., 3.]) assert c1 is not c2 @@ -652,9 +620,11 @@ def test_equal_but_different(self): ([1, 2, 3], [3, 2, 1]), ]) def test_order_hashes_different(self, v1, v2): - c1 = CategoricalDtype(v1) + c1 = CategoricalDtype(v1, ordered=False) c2 = CategoricalDtype(v2, ordered=True) + c3 = CategoricalDtype(v1, ordered=None) assert c1 is not c2 + assert c1 is not c3 def test_nan_invalid(self): with pytest.raises(ValueError): @@ -669,26 +639,46 @@ def test_same_categories_different_order(self): c2 = CategoricalDtype(['b', 'a'], ordered=True) assert c1 is not c2 - @pytest.mark.parametrize('ordered, other, expected', [ - (True, CategoricalDtype(['a', 'b'], True), True), - (False, CategoricalDtype(['a', 'b'], False), True), - (True, CategoricalDtype(['a', 'b'], False), False), - (False, CategoricalDtype(['a', 'b'], True), False), - (True, CategoricalDtype([1, 2], False), False), - (False, CategoricalDtype([1, 2], True), False), - (False, CategoricalDtype(None, True), True), - (True, CategoricalDtype(None, True), True), - (False, CategoricalDtype(None, False), True), - (True, CategoricalDtype(None, False), True), - (True, 'category', True), - (False, 'category', True), - (True, 'not a category', False), - (False, 'not a category', False), - ]) - def test_categorical_equality(self, ordered, other, expected): - c1 = CategoricalDtype(['a', 'b'], ordered) + @pytest.mark.parametrize('ordered1', [True, False, None]) + @pytest.mark.parametrize('ordered2', [True, False, None]) + def test_categorical_equality(self, ordered1, ordered2): + # same categories, same order + # any combination of None/False are equal + # True/True is the only combination with True that are equal + c1 = CategoricalDtype(list('abc'), ordered1) + c2 = CategoricalDtype(list('abc'), ordered2) + result = c1 == c2 + expected = bool(ordered1) is bool(ordered2) + assert result is expected + + # same categories, different order + # any combination of None/False are equal (order doesn't matter) + # any combination with True are not equal (different order of cats) + c1 = CategoricalDtype(list('abc'), ordered1) + c2 = CategoricalDtype(list('cab'), ordered2) + result = c1 == c2 + expected = (bool(ordered1) is False) and (bool(ordered2) is False) + assert result is expected + + # different categories + c2 = CategoricalDtype([1, 2, 3], ordered2) + assert c1 != c2 + + # none categories + c1 = CategoricalDtype(list('abc'), ordered1) + c2 = CategoricalDtype(None, ordered2) + c3 = CategoricalDtype(None, ordered1) + assert c1 == c2 + assert c2 == c1 + assert c2 == c3 + + @pytest.mark.parametrize('categories', [list('abc'), None]) + @pytest.mark.parametrize('other', ['category', 'not a category']) + def test_categorical_equality_strings(self, categories, ordered, other): + c1 = CategoricalDtype(categories, ordered) result = c1 == other - assert result == expected + expected = other == 'category' + assert result is expected def test_invalid_raises(self): with tm.assert_raises_regex(TypeError, 'ordered'): @@ -729,12 +719,12 @@ def test_from_categorical_dtype_both(self): c1, categories=[1, 2], ordered=False) assert result == CategoricalDtype([1, 2], ordered=False) - def test_str_vs_repr(self): - c1 = CategoricalDtype(['a', 'b']) + def test_str_vs_repr(self, ordered): + c1 = CategoricalDtype(['a', 'b'], ordered=ordered) assert str(c1) == 'category' # Py2 will have unicode prefixes - pat = r"CategoricalDtype\(categories=\[.*\], ordered=False\)" - assert re.match(pat, repr(c1)) + pat = r"CategoricalDtype\(categories=\[.*\], ordered={ordered}\)" + assert re.match(pat.format(ordered=ordered), repr(c1)) def test_categorical_categories(self): # GH17884 @@ -742,3 +732,38 @@ def test_categorical_categories(self): tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) c1 = CategoricalDtype(CategoricalIndex(['a', 'b'])) tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) + + @pytest.mark.parametrize('new_categories', [ + list('abc'), list('cba'), list('wxyz'), None]) + @pytest.mark.parametrize('new_ordered', [True, False, None]) + def test_update_dtype(self, ordered, new_categories, new_ordered): + dtype = CategoricalDtype(list('abc'), ordered) + new_dtype = CategoricalDtype(new_categories, new_ordered) + + expected_categories = new_dtype.categories + if expected_categories is None: + expected_categories = dtype.categories + + expected_ordered = new_dtype.ordered + if expected_ordered is None: + expected_ordered = dtype.ordered + + result = dtype.update_dtype(new_dtype) + tm.assert_index_equal(result.categories, expected_categories) + assert result.ordered is expected_ordered + + def test_update_dtype_string(self, ordered): + dtype = CategoricalDtype(list('abc'), ordered) + expected_categories = dtype.categories + expected_ordered = dtype.ordered + result = dtype.update_dtype('category') + tm.assert_index_equal(result.categories, expected_categories) + assert result.ordered is expected_ordered + + @pytest.mark.parametrize('bad_dtype', [ + 'foo', object, np.int64, PeriodDtype('Q')]) + def test_update_dtype_errors(self, bad_dtype): + dtype = CategoricalDtype(list('abc'), False) + msg = 'a CategoricalDtype must be passed to perform an update, ' + with tm.assert_raises_regex(ValueError, msg): + dtype.update_dtype(bad_dtype) From 0a10bbe2d5a39411eea834e86aec178ec8c29243 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Feb 2018 09:08:40 -0800 Subject: [PATCH 088/217] order of exceptions in array_to_datetime (#19621) --- pandas/_libs/tslib.pyx | 84 ++++++++++---------- pandas/tests/indexes/datetimes/test_tools.py | 9 ++- 2 files changed, 52 insertions(+), 41 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 877d7deff6ff4..a035bab2a7049 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -7,7 +7,7 @@ import numpy as np cnp.import_array() -from cpython cimport PyFloat_Check +from cpython cimport PyFloat_Check, PyUnicode_Check from util cimport (is_integer_object, is_float_object, is_string_object, is_datetime64_object) @@ -56,6 +56,8 @@ from tslibs.timestamps cimport (create_timestamp_from_ts, _NS_UPPER_BOUND, _NS_LOWER_BOUND) from tslibs.timestamps import Timestamp +cdef bint PY2 = str == bytes + cdef inline object create_datetime_from_ts( int64_t value, pandas_datetimestruct dts, @@ -549,10 +551,10 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise elif PyDate_Check(val): + seen_datetime = 1 iresult[i] = pydate_to_dt64(val, &dts) try: check_dts_bounds(&dts) - seen_datetime = 1 except ValueError: if is_coerce: iresult[i] = NPY_NAT @@ -560,12 +562,12 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise elif is_datetime64_object(val): + seen_datetime = 1 if get_datetime64_value(val) == NPY_NAT: iresult[i] = NPY_NAT else: try: iresult[i] = get_datetime64_nanos(val) - seen_datetime = 1 except ValueError: if is_coerce: iresult[i] = NPY_NAT @@ -574,19 +576,18 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', elif is_integer_object(val) or is_float_object(val): # these must be ns unit by-definition + seen_integer = 1 if val != val or val == NPY_NAT: iresult[i] = NPY_NAT elif is_raise or is_ignore: iresult[i] = val - seen_integer = 1 else: # coerce # we now need to parse this as if unit='ns' # we can ONLY accept integers at this point # if we have previously (or in future accept # datetimes/strings, then we must coerce) - seen_integer = 1 try: iresult[i] = cast_from_unit(val, 'ns') except: @@ -594,46 +595,25 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', elif is_string_object(val): # string + seen_string = 1 if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue - - seen_string = 1 + if PyUnicode_Check(val) and PY2: + val = val.encode('utf-8') try: _string_to_dts(val, &dts, &out_local, &out_tzoffset) - value = dtstruct_to_dt64(&dts) - if out_local == 1: - tz = pytz.FixedOffset(out_tzoffset) - value = tz_convert_single(value, tz, 'UTC') - iresult[i] = value - check_dts_bounds(&dts) - except OutOfBoundsDatetime: - # GH#19382 for just-barely-OutOfBounds falling back to - # dateutil parser will return incorrect result because - # it will ignore nanoseconds - if require_iso8601: - if _parse_today_now(val, &iresult[i]): - continue - elif is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise ValueError("time data {val} doesn't match " - "format specified" - .format(val=val)) - return values - elif is_coerce: - iresult[i] = NPY_NAT - continue - raise except ValueError: - # if requiring iso8601 strings, skip trying other formats - if require_iso8601: - if _parse_today_now(val, &iresult[i]): - continue - elif is_coerce: + # A ValueError at this point is a _parsing_ error + # specifically _not_ OutOfBoundsDatetime + if _parse_today_now(val, &iresult[i]): + continue + elif require_iso8601: + # if requiring iso8601 strings, skip trying + # other formats + if is_coerce: iresult[i] = NPY_NAT continue elif is_raise: @@ -646,8 +626,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) except Exception: - if _parse_today_now(val, &iresult[i]): - continue if is_coerce: iresult[i] = NPY_NAT continue @@ -656,16 +634,42 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', try: _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value - except ValueError: + except OutOfBoundsDatetime: if is_coerce: iresult[i] = NPY_NAT continue raise except: + # TODO: What exception are we concerned with here? if is_coerce: iresult[i] = NPY_NAT continue raise + else: + # No error raised by string_to_dts, pick back up + # where we left off + value = dtstruct_to_dt64(&dts) + if out_local == 1: + tz = pytz.FixedOffset(out_tzoffset) + value = tz_convert_single(value, tz, 'UTC') + iresult[i] = value + try: + check_dts_bounds(&dts) + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if is_coerce: + iresult[i] = NPY_NAT + continue + elif require_iso8601: + if is_raise: + raise ValueError("time data {val} doesn't " + "match format specified" + .format(val=val)) + return values + raise + else: if is_coerce: iresult[i] = NPY_NAT diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index f8b1f68ba33ce..b95ae07052ecb 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -18,7 +18,7 @@ from pandas.core.tools import datetimes as tools from pandas.errors import OutOfBoundsDatetime -from pandas.compat import lmap +from pandas.compat import lmap, PY3 from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm @@ -238,6 +238,13 @@ def test_to_datetime_today(self): assert pdtoday.tzinfo is None assert pdtoday2.tzinfo is None + def test_to_datetime_today_now_unicode_bytes(self): + to_datetime([u'now']) + to_datetime([u'today']) + if not PY3: + to_datetime(['now']) + to_datetime(['today']) + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_dt64s(self, cache): in_bound_dts = [ From 308558ca00398e2ad1ad70c4756ae2f93040fd8b Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sat, 10 Feb 2018 10:48:15 -0800 Subject: [PATCH 089/217] Consolidated Groupby nth / last object Templates (#19635) --- pandas/_libs/groupby.pyx | 99 ---------------------------- pandas/_libs/groupby_helper.pxi.in | 36 ++++++---- pandas/tests/groupby/test_groupby.py | 56 ++++++++-------- 3 files changed, 50 insertions(+), 141 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index d75c3a71896e3..866683ce378ab 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -26,105 +26,6 @@ cdef double NaN = np.NaN cdef double nan = NaN -# TODO: aggregate multiple columns in single pass -# ---------------------------------------------------------------------- -# first, nth, last - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_nth_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] labels, - int64_t rank, - Py_ssize_t min_count=-1): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, lab - object val - float64_t count - ndarray[int64_t, ndim=2] nobs - ndarray[object, ndim=2] resx - - assert min_count == -1, "'min_count' only used in add and prod" - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty(( out).shape, dtype=object) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - if nobs[lab, j] == rank: - resx[lab, j] = val - - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_last_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] labels, - Py_ssize_t min_count=-1): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, lab - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[int64_t, ndim=2] nobs - - assert min_count == -1, "'min_count' only used in add and prod" - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty(( out).shape, dtype=object) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - resx[lab, j] = val - - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - cdef inline float64_t median_linear(float64_t* a, int n) nogil: cdef int i, j, na_count = 0 cdef float64_t result diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index b24444c422efa..48dac7bf10362 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -317,7 +317,7 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{endfor}} #---------------------------------------------------------------------- -# group_nth, group_last +# group_nth, group_last, group_rank #---------------------------------------------------------------------- {{py: @@ -325,7 +325,8 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, # name, c_type, dest_type2, nan_val dtypes = [('float64', 'float64_t', 'float64_t', 'NAN'), ('float32', 'float32_t', 'float32_t', 'NAN'), - ('int64', 'int64_t', 'int64_t', 'iNaT')] + ('int64', 'int64_t', 'int64_t', 'iNaT'), + ('object', 'object', 'object', 'NAN')] def get_dispatch(dtypes): @@ -350,7 +351,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count + {{dest_type2}} val ndarray[{{dest_type2}}, ndim=2] resx ndarray[int64_t, ndim=2] nobs @@ -360,11 +361,19 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, raise AssertionError("len(index) != len(labels)") nobs = np.zeros(( out).shape, dtype=np.int64) + {{if name=='object'}} + resx = np.empty(( out).shape, dtype=object) + {{else}} resx = np.empty_like(out) + {{endif}} N, K = ( values).shape + {{if name == "object"}} + if True: # make templating happy + {{else}} with nogil: + {{endif}} for i in range(N): lab = labels[i] if lab < 0: @@ -375,11 +384,7 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} if val == val and val != {{nan_val}}: - {{endif}} nobs[lab, j] += 1 resx[lab, j] = val @@ -390,7 +395,6 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, else: out[i, j] = resx[i, j] - @cython.wraparound(False) @cython.boundscheck(False) def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, @@ -403,7 +407,7 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - {{dest_type2}} val, count + {{dest_type2}} val ndarray[{{dest_type2}}, ndim=2] resx ndarray[int64_t, ndim=2] nobs @@ -413,11 +417,19 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, raise AssertionError("len(index) != len(labels)") nobs = np.zeros(( out).shape, dtype=np.int64) + {{if name=='object'}} + resx = np.empty(( out).shape, dtype=object) + {{else}} resx = np.empty_like(out) + {{endif}} N, K = ( values).shape + {{if name == "object"}} + if True: # make templating happy + {{else}} with nogil: + {{endif}} for i in range(N): lab = labels[i] if lab < 0: @@ -428,11 +440,7 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, val = values[i, j] # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} if val == val and val != {{nan_val}}: - {{endif}} nobs[lab, j] += 1 if nobs[lab, j] == rank: resx[lab, j] = val @@ -445,6 +453,7 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, out[i, j] = resx[i, j] +{{if name != 'object'}} @cython.boundscheck(False) @cython.wraparound(False) def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, @@ -608,6 +617,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, if pct: for i in range(N): out[i, 0] = out[i, 0] / grp_sizes[i, 0] +{{endif}} {{endfor}} diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2db772ac54369..6eacd45deb7bc 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2252,7 +2252,19 @@ def test_median_empty_bins(self): expected = df.groupby(bins).agg(lambda x: x.median()) assert_frame_equal(result, expected) - def test_groupby_non_arithmetic_agg_types(self): + @pytest.mark.parametrize("dtype", [ + 'int8', 'int16', 'int32', 'int64', 'float32', 'float64']) + @pytest.mark.parametrize("method,data", [ + ('first', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}), + ('last', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}), + ('min', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}), + ('max', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}), + ('nth', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], + 'args': [1]}), + ('count', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], + 'out_type': 'int64'}) + ]) + def test_groupby_non_arithmetic_agg_types(self, dtype, method, data): # GH9311, GH6620 df = pd.DataFrame( [{'a': 1, 'b': 1}, @@ -2260,39 +2272,25 @@ def test_groupby_non_arithmetic_agg_types(self): {'a': 2, 'b': 3}, {'a': 2, 'b': 4}]) - dtypes = ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'] - - grp_exp = {'first': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, - 'last': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, - 'min': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, - 'max': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, - 'nth': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], - 'args': [1]}, - 'count': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], - 'out_type': 'int64'}} + df['b'] = df.b.astype(dtype) - for dtype in dtypes: - df_in = df.copy() - df_in['b'] = df_in.b.astype(dtype) + if 'args' not in data: + data['args'] = [] - for method, data in compat.iteritems(grp_exp): - if 'args' not in data: - data['args'] = [] - - if 'out_type' in data: - out_type = data['out_type'] - else: - out_type = dtype + if 'out_type' in data: + out_type = data['out_type'] + else: + out_type = dtype - exp = data['df'] - df_out = pd.DataFrame(exp) + exp = data['df'] + df_out = pd.DataFrame(exp) - df_out['b'] = df_out.b.astype(out_type) - df_out.set_index('a', inplace=True) + df_out['b'] = df_out.b.astype(out_type) + df_out.set_index('a', inplace=True) - grpd = df_in.groupby('a') - t = getattr(grpd, method)(*data['args']) - assert_frame_equal(t, df_out) + grpd = df.groupby('a') + t = getattr(grpd, method)(*data['args']) + assert_frame_equal(t, df_out) def test_groupby_non_arithmetic_agg_intlike_precision(self): # GH9311, GH6620 From d07884d010960ae4754231c3b08299c005735de5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Feb 2018 13:12:37 -0800 Subject: [PATCH 090/217] Continue porting period_helper to cython (#19608) --- pandas/_libs/src/period_helper.c | 286 ++----------------------- pandas/_libs/src/period_helper.h | 12 +- pandas/_libs/tslibs/period.pyx | 356 ++++++++++++++++++++++++++++--- 3 files changed, 349 insertions(+), 305 deletions(-) diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index f0e24fec685d0..7c4de8e42e73b 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -45,7 +45,7 @@ static int monthToQuarter(int month) { return ((month - 1) / 3) + 1; } /* Find the absdate (days elapsed since datetime(1, 1, 1) * for the given year/month/day. * Assumes GREGORIAN_CALENDAR */ -static npy_int64 dInfoCalc_SetFromDateAndTime(int year, int month, int day) { +npy_int64 absdate_from_ymd(int year, int month, int day) { /* Calculate the absolute date */ pandas_datetimestruct dts; npy_int64 unix_date; @@ -68,8 +68,6 @@ static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, dinfo->year = dts.year; dinfo->month = dts.month; dinfo->day = dts.day; - - dinfo->absdate = absdate; return 0; } @@ -100,8 +98,7 @@ PANDAS_INLINE int get_freq_group(int freq) { return (freq / 1000) * 1000; } PANDAS_INLINE int get_freq_group_index(int freq) { return freq / 1000; } -PANDAS_INLINE npy_int64 get_daytime_conversion_factor(int from_index, - int to_index) { +npy_int64 get_daytime_conversion_factor(int from_index, int to_index) { int row = min_value(from_index, to_index); int col = max_value(from_index, to_index); // row or col < 6 means frequency strictly lower than Daily, which @@ -144,9 +141,9 @@ static npy_int64 DtoB_weekday(npy_int64 absdate) { return (((absdate) / 7) * 5) + (absdate) % 7 - BDAY_OFFSET; } -static npy_int64 DtoB(struct date_info *dinfo, int roll_back) { +static npy_int64 DtoB(struct date_info *dinfo, + int roll_back, npy_int64 absdate) { int day_of_week = dayofweek(dinfo->year, dinfo->month, dinfo->day); - npy_int64 absdate = dinfo->absdate; if (roll_back == 1) { if (day_of_week > 4) { @@ -162,9 +159,6 @@ static npy_int64 DtoB(struct date_info *dinfo, int roll_back) { return DtoB_weekday(absdate); } -static npy_int64 absdate_from_ymd(int y, int m, int d) { - return dInfoCalc_SetFromDateAndTime(y, m, d); -} //************ FROM DAILY *************** @@ -224,15 +218,16 @@ static npy_int64 asfreq_DTtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_DTtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; + npy_int64 absdate; int roll_back; ordinal = downsample_daytime(ordinal, af_info); - - dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); + absdate = ordinal + ORD_OFFSET; + dInfoCalc_SetFromAbsDate(&dinfo, absdate); // This usage defines roll_back the opposite way from the others roll_back = 1 - af_info->is_end; - return DtoB(&dinfo, roll_back); + return DtoB(&dinfo, roll_back, absdate); } // all intra day calculations are now done within one function @@ -298,11 +293,11 @@ static npy_int64 asfreq_WtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_WtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; + npy_int64 absdate = asfreq_WtoDT(ordinal, af_info) + ORD_OFFSET; int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_WtoDT(ordinal, af_info) + ORD_OFFSET); + dInfoCalc_SetFromAbsDate(&dinfo, absdate); - return DtoB(&dinfo, roll_back); + return DtoB(&dinfo, roll_back, absdate); } //************ FROM MONTHLY *************** @@ -338,12 +333,12 @@ static npy_int64 asfreq_MtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_MtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; + npy_int64 absdate = asfreq_MtoDT(ordinal, af_info) + ORD_OFFSET; int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_MtoDT(ordinal, af_info) + ORD_OFFSET); + dInfoCalc_SetFromAbsDate(&dinfo, absdate); - return DtoB(&dinfo, roll_back); + return DtoB(&dinfo, roll_back, absdate); } //************ FROM QUARTERLY *************** @@ -393,12 +388,12 @@ static npy_int64 asfreq_QtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_QtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; + npy_int64 absdate = asfreq_QtoDT(ordinal, af_info) + ORD_OFFSET; int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_QtoDT(ordinal, af_info) + ORD_OFFSET); + dInfoCalc_SetFromAbsDate(&dinfo, absdate); - return DtoB(&dinfo, roll_back); + return DtoB(&dinfo, roll_back, absdate); } //************ FROM ANNUAL *************** @@ -439,11 +434,11 @@ static npy_int64 asfreq_AtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_AtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; + npy_int64 absdate = asfreq_AtoDT(ordinal, af_info) + ORD_OFFSET; int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_AtoDT(ordinal, af_info) + ORD_OFFSET); + dInfoCalc_SetFromAbsDate(&dinfo, absdate); - return DtoB(&dinfo, roll_back); + return DtoB(&dinfo, roll_back, absdate); } static npy_int64 nofunc(npy_int64 ordinal, asfreq_info *af_info) { @@ -675,65 +670,6 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq) { } } -double get_abs_time(int freq, npy_int64 date_ordinal, npy_int64 ordinal) { - int freq_index, day_index, base_index; - npy_int64 per_day, start_ord; - double unit, result; - - if (freq <= FR_DAY) { - return 0; - } - - freq_index = get_freq_group_index(freq); - day_index = get_freq_group_index(FR_DAY); - base_index = get_freq_group_index(FR_SEC); - - per_day = get_daytime_conversion_factor(day_index, freq_index); - unit = get_daytime_conversion_factor(freq_index, base_index); - - if (base_index < freq_index) { - unit = 1 / unit; - } - - start_ord = date_ordinal * per_day; - result = (double)(unit * (ordinal - start_ord)); - return result; -} - -/* Sets the time part of the DateTime object. */ -static int dInfoCalc_SetFromAbsTime(struct date_info *dinfo, double abstime) { - int inttime; - int hour, minute; - double second; - - inttime = (int)abstime; - hour = inttime / 3600; - minute = (inttime % 3600) / 60; - second = abstime - (double)(hour * 3600 + minute * 60); - - dinfo->hour = hour; - dinfo->minute = minute; - dinfo->second = second; - return 0; -} - -/* Set the instance's value using the given date and time. - Assumes GREGORIAN_CALENDAR. */ -static int dInfoCalc_SetFromAbsDateTime(struct date_info *dinfo, - npy_int64 absdate, double abstime) { - /* Bounds check */ - // The calling function is responsible for ensuring that - // abstime >= 0.0 && abstime <= 86400 - - /* Calculate the date */ - dInfoCalc_SetFromAbsDate(dinfo, absdate); - - /* Calculate the time */ - dInfoCalc_SetFromAbsTime(dinfo, abstime); - - return 0; -} - /* ------------------------------------------------------------------ * New pandas API-helper code, to expose to cython * ------------------------------------------------------------------*/ @@ -750,185 +686,3 @@ npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, val = (*func)(period_ordinal, &finfo); return val; } - -/* generate an ordinal in period space */ -npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, - int second, int microseconds, int picoseconds, - int freq) { - npy_int64 absdays, delta, seconds; - npy_int64 weeks, days; - npy_int64 ordinal, day_adj; - int freq_group, fmonth, mdiff; - freq_group = get_freq_group(freq); - - if (freq == FR_SEC || freq == FR_MS || freq == FR_US || freq == FR_NS) { - absdays = absdate_from_ymd(year, month, day); - delta = (absdays - ORD_OFFSET); - seconds = - (npy_int64)(delta * 86400 + hour * 3600 + minute * 60 + second); - - switch (freq) { - case FR_MS: - return seconds * 1000 + microseconds / 1000; - - case FR_US: - return seconds * 1000000 + microseconds; - - case FR_NS: - return seconds * 1000000000 + microseconds * 1000 + - picoseconds / 1000; - } - - return seconds; - } - - if (freq == FR_MIN) { - absdays = absdate_from_ymd(year, month, day); - delta = (absdays - ORD_OFFSET); - return (npy_int64)(delta * 1440 + hour * 60 + minute); - } - - if (freq == FR_HR) { - absdays = absdate_from_ymd(year, month, day); - delta = (absdays - ORD_OFFSET); - return (npy_int64)(delta * 24 + hour); - } - - if (freq == FR_DAY) { - return (npy_int64)(absdate_from_ymd(year, month, day) - ORD_OFFSET); - } - - if (freq == FR_UND) { - return (npy_int64)(absdate_from_ymd(year, month, day) - ORD_OFFSET); - } - - if (freq == FR_BUS) { - days = absdate_from_ymd(year, month, day); - // calculate the current week assuming sunday as last day of a week - weeks = (days - BASE_WEEK_TO_DAY_OFFSET) / DAYS_PER_WEEK; - // calculate the current weekday (in range 1 .. 7) - delta = (days - BASE_WEEK_TO_DAY_OFFSET) % DAYS_PER_WEEK + 1; - // return the number of business days in full weeks plus the business - // days in the last - possible partial - week - return (npy_int64)(weeks * BUSINESS_DAYS_PER_WEEK) + - (delta <= BUSINESS_DAYS_PER_WEEK ? delta - : BUSINESS_DAYS_PER_WEEK + 1) - - BDAY_OFFSET; - } - - if (freq_group == FR_WK) { - ordinal = (npy_int64)absdate_from_ymd(year, month, day); - day_adj = freq - FR_WK; - return (ordinal - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET; - } - - if (freq == FR_MTH) { - return (year - BASE_YEAR) * 12 + month - 1; - } - - if (freq_group == FR_QTR) { - fmonth = freq - FR_QTR; - if (fmonth == 0) fmonth = 12; - - mdiff = month - fmonth; - if (mdiff < 0) mdiff += 12; - if (month >= fmonth) mdiff += 12; - - return (year - BASE_YEAR) * 4 + (mdiff - 1) / 3; - } - - if (freq_group == FR_ANN) { - fmonth = freq - FR_ANN; - if (fmonth == 0) fmonth = 12; - if (month <= fmonth) { - return year - BASE_YEAR; - } else { - return year - BASE_YEAR + 1; - } - } - - Py_Error(PyExc_RuntimeError, "Unable to generate frequency ordinal"); - -onError: - return INT_ERR_CODE; -} - -/* - Returns the proleptic Gregorian ordinal of the date, as an integer. - This corresponds to the number of days since Jan., 1st, 1AD. - When the instance has a frequency less than daily, the proleptic date - is calculated for the last day of the period. - */ - -npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq) { - asfreq_info af_info; - freq_conv_func toDaily = NULL; - - if (freq == FR_DAY) return period_ordinal + ORD_OFFSET; - - toDaily = get_asfreq_func(freq, FR_DAY); - get_asfreq_info(freq, FR_DAY, 'E', &af_info); - - return toDaily(period_ordinal, &af_info) + ORD_OFFSET; -} - - -int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year) { - asfreq_info af_info; - int qtr_freq; - npy_int64 daily_ord; - freq_conv_func toDaily = NULL; - - toDaily = get_asfreq_func(freq, FR_DAY); - get_asfreq_info(freq, FR_DAY, 'E', &af_info); - - daily_ord = toDaily(ordinal, &af_info); - - if (get_freq_group(freq) == FR_QTR) { - qtr_freq = freq; - } else { - qtr_freq = FR_QTR; - } - get_asfreq_info(FR_DAY, qtr_freq, 'E', &af_info); - - DtoQ_yq(daily_ord, &af_info, year, quarter); - return 0; -} - -int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) { - asfreq_info af_info; - int qtr_freq; - - ordinal = get_python_ordinal(ordinal, freq) - ORD_OFFSET; - - if (get_freq_group(freq) == FR_QTR) - qtr_freq = freq; - else - qtr_freq = FR_QTR; - - get_asfreq_info(FR_DAY, qtr_freq, 'E', &af_info); - - DtoQ_yq(ordinal, &af_info, year, quarter); - - if ((qtr_freq % 1000) > 12) *year -= 1; - - return 0; -} - - -int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo) { - npy_int64 absdate = get_python_ordinal(ordinal, freq); - double abstime = get_abs_time(freq, absdate - ORD_OFFSET, ordinal); - - while (abstime < 0) { - abstime += 86400; - absdate -= 1; - } - while (abstime >= 86400) { - abstime -= 86400; - absdate += 1; - } - - dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime); - return 0; -} diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index f14aec268a1fb..1573b1eeec74b 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -118,8 +118,6 @@ typedef struct asfreq_info { } asfreq_info; typedef struct date_info { - npy_int64 absdate; - double second; int minute; int hour; @@ -136,18 +134,10 @@ typedef npy_int64 (*freq_conv_func)(npy_int64, asfreq_info *af_info); npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, char relation); -npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, - int second, int microseconds, int picoseconds, - int freq); - -npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq); - -int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo); freq_conv_func get_asfreq_func(int fromFreq, int toFreq); void get_asfreq_info(int fromFreq, int toFreq, char relation, asfreq_info *af_info); -int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year); -int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter); +npy_int64 get_daytime_conversion_factor(int from_index, int to_index); #endif // PANDAS__LIBS_SRC_PERIOD_HELPER_H_ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index dc5d058f41d11..c11a8b149bc13 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -13,7 +13,7 @@ import_array() from libc.stdlib cimport free, malloc from libc.time cimport strftime, tm -from libc.string cimport strlen +from libc.string cimport strlen, memset from pandas.compat import PY2 @@ -24,7 +24,15 @@ from cpython.datetime cimport PyDateTime_Check, PyDateTime_IMPORT PyDateTime_IMPORT from np_datetime cimport (pandas_datetimestruct, dtstruct_to_dt64, - dt64_to_dtstruct) + dt64_to_dtstruct, + PANDAS_FR_D, + pandas_datetime_to_datetimestruct, + PANDAS_DATETIMEUNIT) + +cdef extern from "../src/datetime/np_datetime.h": + int64_t pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, + pandas_datetimestruct *d + ) nogil cimport util from util cimport is_period_object, is_string_object, INT32_MIN @@ -53,6 +61,24 @@ from pandas.tseries import frequencies cdef extern from "period_helper.h": + int FR_ANN + int FR_QTR + int FR_MTH + int FR_WK + int FR_DAY + int FR_HR + int FR_MIN + int FR_SEC + int FR_MS + int FR_US + int FR_NS + int FR_BUS + int FR_UND + + int ORD_OFFSET + int WEEK_OFFSET + int BDAY_OFFSET + ctypedef struct date_info: double second int minute @@ -73,24 +99,15 @@ cdef extern from "period_helper.h": int from_q_year_end int to_q_year_end - ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) + ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil int64_t asfreq(int64_t dtordinal, int freq1, int freq2, char relation) except INT32_MIN - freq_conv_func get_asfreq_func(int fromFreq, int toFreq) + freq_conv_func get_asfreq_func(int fromFreq, int toFreq) nogil void get_asfreq_info(int fromFreq, int toFreq, char relation, - asfreq_info *af_info) - - int64_t get_period_ordinal(int year, int month, int day, - int hour, int minute, int second, - int microseconds, int picoseconds, - int freq) nogil except INT32_MIN - - int get_date_info(int64_t ordinal, int freq, - date_info *dinfo) nogil + asfreq_info *af_info) nogil - int get_yq(int64_t ordinal, int freq, int *quarter, int *year) - int _quarter_year(int64_t ordinal, int freq, int *year, int *quarter) + int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil @cython.cdivision @@ -130,6 +147,285 @@ cdef char* c_strftime(date_info *dinfo, char *fmt): return result +# ---------------------------------------------------------------------- +# Conversion between date_info and pandas_datetimestruct + +cdef inline int get_freq_group(int freq) nogil: + return (freq // 1000) * 1000 + + +@cython.cdivision +cdef int64_t get_period_ordinal(int year, int month, int day, + int hour, int minute, int second, + int microseconds, int picoseconds, + int freq) nogil: + """generate an ordinal in period space""" + cdef: + int64_t absdays, unix_date, seconds, delta + int64_t weeks + int64_t day_adj + int freq_group, fmonth, mdiff + + freq_group = get_freq_group(freq) + + if freq_group == FR_ANN: + fmonth = freq - FR_ANN + if fmonth == 0: + fmonth = 12 + if month <= fmonth: + return year - 1970 + else: + return year - 1970 + 1 + + elif freq_group == FR_QTR: + fmonth = freq - FR_QTR + if fmonth == 0: + fmonth = 12 + + mdiff = month - fmonth + # TODO: Aren't the next two conditions equivalent to + # unconditional incrementing? + if mdiff < 0: + mdiff += 12 + if month >= fmonth: + mdiff += 12 + + return (year - 1970) * 4 + (mdiff - 1) / 3 + + elif freq == FR_MTH: + return (year - 1970) * 12 + month - 1 + + absdays = absdate_from_ymd(year, month, day) + unix_date = absdays - ORD_OFFSET + + if freq >= FR_SEC: + seconds = unix_date * 86400 + hour * 3600 + minute * 60 + second + + if freq == FR_MS: + return seconds * 1000 + microseconds / 1000 + + elif freq == FR_US: + return seconds * 1000000 + microseconds + + elif freq == FR_NS: + return (seconds * 1000000000 + + microseconds * 1000 + picoseconds / 1000) + + else: + return seconds + + elif freq == FR_MIN: + return unix_date * 1440 + hour * 60 + minute + + elif freq == FR_HR: + return unix_date * 24 + hour + + elif freq == FR_DAY: + return unix_date + + elif freq == FR_UND: + return unix_date + + elif freq == FR_BUS: + # calculate the current week assuming sunday as last day of a week + # Jan 1 0001 is a Monday, so subtract 1 to get to end-of-week + weeks = (unix_date + ORD_OFFSET - 1) / 7 + # calculate the current weekday (in range 1 .. 7) + delta = (unix_date + ORD_OFFSET - 1) % 7 + 1 + # return the number of business days in full weeks plus the business + # days in the last - possible partial - week + if delta <= 5: + return (weeks * 5) + delta - BDAY_OFFSET + else: + return (weeks * 5) + (5 + 1) - BDAY_OFFSET + + elif freq_group == FR_WK: + day_adj = freq - FR_WK + return (unix_date + ORD_OFFSET - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET + + # raise ValueError + + +cdef int get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil: + cdef: + int64_t absdate + double abstime + + absdate = get_python_ordinal(ordinal, freq); + abstime = get_abs_time(freq, absdate - ORD_OFFSET, ordinal) + + while abstime < 0: + abstime += 86400 + absdate -= 1 + + while abstime >= 86400: + abstime -= 86400 + absdate += 1 + + dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime) + return 0 + + +cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil: + """ + Returns the proleptic Gregorian ordinal of the date, as an integer. + This corresponds to the number of days since Jan., 1st, 1AD. + When the instance has a frequency less than daily, the proleptic date + is calculated for the last day of the period. + """ + cdef: + asfreq_info af_info + freq_conv_func toDaily = NULL + + if freq == FR_DAY: + return period_ordinal + ORD_OFFSET + + toDaily = get_asfreq_func(freq, FR_DAY) + get_asfreq_info(freq, FR_DAY, 'E', &af_info) + return toDaily(period_ordinal, &af_info) + ORD_OFFSET + + +cdef int dInfoCalc_SetFromAbsDateTime(date_info *dinfo, + int64_t absdate, double abstime) nogil: + """ + Set the instance's value using the given date and time. + Assumes GREGORIAN_CALENDAR. + """ + # Bounds check + # The calling function is responsible for ensuring that + # abstime >= 0.0 and abstime <= 86400 + + # Calculate the date + dInfoCalc_SetFromAbsDate(dinfo, absdate) + + # Calculate the time + dInfoCalc_SetFromAbsTime(dinfo, abstime) + return 0 + + +cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil: + """ + Sets the date part of the date_info struct + Assumes GREGORIAN_CALENDAR + """ + cdef: + pandas_datetimestruct dts + + pandas_datetime_to_datetimestruct(absdate - ORD_OFFSET, PANDAS_FR_D, &dts) + dinfo.year = dts.year + dinfo.month = dts.month + dinfo.day = dts.day + return 0 + + +@cython.cdivision +cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: + """ + Sets the time part of the DateTime object. + """ + cdef: + int inttime + int hour, minute + double second + + inttime = abstime + hour = inttime / 3600 + minute = (inttime % 3600) / 60 + second = abstime - (hour * 3600 + minute * 60) + + dinfo.hour = hour + dinfo.minute = minute + dinfo.second = second + return 0 + + +@cython.cdivision +cdef double get_abs_time(int freq, int64_t date_ordinal, + int64_t ordinal) nogil: + cdef: + int freq_index, day_index, base_index + int64_t per_day, start_ord + double unit, result + + if freq <= FR_DAY: + return 0 + + freq_index = freq // 1000 + day_index = FR_DAY // 1000 + base_index = FR_SEC // 1000 + + per_day = get_daytime_conversion_factor(day_index, freq_index) + unit = get_daytime_conversion_factor(freq_index, base_index) + + if base_index < freq_index: + unit = 1 / unit + + start_ord = date_ordinal * per_day + result = (unit * (ordinal - start_ord)) + return result + + +cdef int64_t absdate_from_ymd(int year, int month, int day) nogil: + """ + Find the absdate (days elapsed since datetime(1, 1, 1) + for the given year/month/day. + Assumes GREGORIAN_CALENDAR + """ + # /* Calculate the absolute date + cdef: + pandas_datetimestruct dts + int64_t unix_date + + memset(&dts, 0, sizeof(pandas_datetimestruct)) + dts.year = year + dts.month = month + dts.day = day + unix_date = pandas_datetimestruct_to_datetime(PANDAS_FR_D, &dts) + return ORD_OFFSET + unix_date + + +cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): + cdef: + asfreq_info af_info + int qtr_freq + int64_t daily_ord + + daily_ord = get_python_ordinal(ordinal, freq) - ORD_OFFSET + + if get_freq_group(freq) == FR_QTR: + qtr_freq = freq + else: + qtr_freq = FR_QTR + + get_asfreq_info(FR_DAY, qtr_freq, 'E', &af_info) + + DtoQ_yq(daily_ord, &af_info, year, quarter) + return qtr_freq + + +cdef int64_t DtoQ_yq(int64_t ordinal, asfreq_info *af_info, + int *year, int *quarter): + cdef: + date_info dinfo + + dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET) + + if af_info.to_q_year_end != 12: + dinfo.month -= af_info.to_q_year_end + if dinfo.month <= 0: + dinfo.month += 12 + else: + dinfo.year += 1 + + year[0] = dinfo.year + quarter[0] = monthToQuarter(dinfo.month) + return 0 + + +cdef inline int monthToQuarter(int month): + return (month - 1) // 3 + 1 + + # ---------------------------------------------------------------------- # Period logic @@ -194,8 +490,7 @@ cdef char START = 'S' cdef char END = 'E' -cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, - bint end): +cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): """ Convert period ordinal from one frequency to another, and if upsampling, choose to use start ('S') or end ('E') of period. @@ -203,13 +498,13 @@ cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, cdef: int64_t retval - if period_ordinal == iNaT: + if ordinal == iNaT: return iNaT if end: - retval = asfreq(period_ordinal, freq1, freq2, END) + retval = asfreq(ordinal, freq1, freq2, END) else: - retval = asfreq(period_ordinal, freq1, freq2, START) + retval = asfreq(ordinal, freq1, freq2, START) if retval == INT32_MIN: raise ValueError('Frequency conversion failed') @@ -226,7 +521,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): ndarray[int64_t] result Py_ssize_t i, n freq_conv_func func - asfreq_info finfo + asfreq_info af_info int64_t val char relation @@ -239,20 +534,20 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): relation = START func = get_asfreq_func(freq1, freq2) - get_asfreq_info(freq1, freq2, relation, &finfo) + get_asfreq_info(freq1, freq2, relation, &af_info) mask = arr == iNaT if mask.any(): # NaT process for i in range(n): val = arr[i] if val != iNaT: - val = func(val, &finfo) + val = func(val, &af_info) if val == INT32_MIN: raise ValueError("Unable to convert to desired frequency.") result[i] = val else: for i in range(n): - val = func(arr[i], &finfo) + val = func(arr[i], &af_info) if val == INT32_MIN: raise ValueError("Unable to convert to desired frequency.") result[i] = val @@ -404,17 +699,22 @@ cdef int pyear(int64_t ordinal, int freq): return dinfo.year +@cython.cdivision cdef int pqyear(int64_t ordinal, int freq): cdef: - int year, quarter - _quarter_year(ordinal, freq, &year, &quarter) + int year, quarter, qtr_freq + qtr_freq = get_yq(ordinal, freq, &quarter, &year) + if (qtr_freq % 1000) > 12: + year -= 1 return year cdef int pquarter(int64_t ordinal, int freq): cdef: - int year, quarter - _quarter_year(ordinal, freq, &year, &quarter) + int year, quarter, qtr_freq + qtr_freq = get_yq(ordinal, freq, &quarter, &year) + if (qtr_freq % 1000) > 12: + year -= 1 return quarter From 5d17b20817252b78c6f2b0779aa7e384e8758c42 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Feb 2018 16:04:50 -0800 Subject: [PATCH 091/217] fix overflows in Timestamp.tz_localize near boundaries (#19626) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslibs/conversion.pxd | 2 - pandas/_libs/tslibs/conversion.pyx | 56 ++++++++++++++++--- .../tests/scalar/timestamp/test_timezones.py | 17 ++++++ 4 files changed, 67 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6f48d9a6c63c9..6fdd551accbf1 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -727,6 +727,7 @@ Timezones - Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) - Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) - Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) +- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) Offsets ^^^^^^^ diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 0d5e9e3fc5152..868c2641b34db 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -21,8 +21,6 @@ cdef convert_to_tsobject(object ts, object tz, object unit, cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, int32_t nanos=*) -cdef void _localize_tso(_TSObject obj, object tz) - cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) cdef int64_t get_datetime64_nanos(object val) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index cfbcb922cb47d..beaca1a8483c7 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -309,12 +309,13 @@ cdef convert_to_tsobject(object ts, object tz, object unit, raise TypeError('Cannot convert input [{}] of type {} to ' 'Timestamp'.format(ts, type(ts))) - if obj.value != NPY_NAT: - check_dts_bounds(&obj.dts) - if tz is not None: - _localize_tso(obj, tz) + localize_tso(obj, tz) + if obj.value != NPY_NAT: + # check_overflows needs to run after localize_tso + check_dts_bounds(&obj.dts) + check_overflows(obj) return obj @@ -391,6 +392,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.dts.ps = nanos * 1000 check_dts_bounds(&obj.dts) + check_overflows(obj) return obj @@ -454,6 +456,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') if tz is None: check_dts_bounds(&obj.dts) + check_overflows(obj) return obj else: # Keep the converter same as PyDateTime's @@ -469,7 +472,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, else: ts = obj.value if tz is not None: - # shift for _localize_tso + # shift for localize_tso ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, ambiguous='raise', errors='raise')[0] @@ -490,12 +493,51 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) +cdef inline check_overflows(_TSObject obj): + """ + Check that we haven't silently overflowed in timezone conversion + + Parameters + ---------- + obj : _TSObject + + Returns + ------- + None + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#12677 + if obj.dts.year == 1677: + if not (obj.value < 0): + raise OutOfBoundsDatetime + elif obj.dts.year == 2262: + if not (obj.value > 0): + raise OutOfBoundsDatetime + + # ---------------------------------------------------------------------- # Localization -cdef inline void _localize_tso(_TSObject obj, object tz): +cdef inline void localize_tso(_TSObject obj, tzinfo tz): """ - Take a TSObject in UTC and localizes to timezone tz. + Given the UTC nanosecond timestamp in obj.value, find the wall-clock + representation of that timestamp in the given timezone. + + Parameters + ---------- + obj : _TSObject + tz : tzinfo + + Returns + ------- + None + + Notes + ----- + Sets obj.tzinfo inplace, alters obj.dts inplace. """ cdef: ndarray[int64_t] trans, deltas diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 7a5c6feb8b651..f43651dc6f0db 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -15,12 +15,29 @@ import pandas.util._test_decorators as td from pandas import Timestamp, NaT +from pandas.errors import OutOfBoundsDatetime class TestTimestampTZOperations(object): # -------------------------------------------------------------- # Timestamp.tz_localize + def test_tz_localize_pushes_out_of_bounds(self): + # GH#12677 + # tz_localize that pushes away from the boundary is OK + pac = Timestamp.min.tz_localize('US/Pacific') + assert pac.value > Timestamp.min.value + pac.tz_convert('Asia/Tokyo') # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime): + Timestamp.min.tz_localize('Asia/Tokyo') + + # tz_localize that pushes away from the boundary is OK + tokyo = Timestamp.max.tz_localize('Asia/Tokyo') + assert tokyo.value < Timestamp.max.value + tokyo.tz_convert('US/Pacific') # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime): + Timestamp.max.tz_localize('US/Pacific') + def test_tz_localize_ambiguous_bool(self): # make sure that we are correctly accepting bool values as ambiguous # GH#14402 From 8433c0e4fb5bb5b6d0c95160e845056a72155f09 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Feb 2018 16:49:22 -0800 Subject: [PATCH 092/217] move shift_months test to test_arithmetic (#19636) --- .../tests/indexes/datetimes/test_arithmetic.py | 17 +++++++++++++++++ pandas/tests/indexes/datetimes/test_ops.py | 15 --------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index f6f8eccf4e30c..ddc97636ae0a8 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -15,6 +15,7 @@ DatetimeIndex, TimedeltaIndex, date_range) from pandas._libs import tslib +from pandas._libs.tslibs.offsets import shift_months @pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', @@ -933,3 +934,19 @@ def test_datetime64_with_DateOffset(klass, assert_func): Timestamp('2000-02-29', tz='US/Central')], name='a') assert_func(result, exp) assert_func(result2, exp) + + +@pytest.mark.parametrize('years', [-1, 0, 1]) +@pytest.mark.parametrize('months', [-2, 0, 2]) +def test_shift_months(years, months): + s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31')]) + actual = DatetimeIndex(shift_months(s.asi8, years * 12 + months)) + + raw = [x + pd.offsets.DateOffset(years=years, months=months) + for x in s] + expected = DatetimeIndex(raw) + tm.assert_index_equal(actual, expected) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 4f386eb28cc0f..440478100ddd5 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -5,10 +5,8 @@ import numpy as np from datetime import datetime -from itertools import product import pandas as pd import pandas._libs.tslib as tslib -from pandas._libs.tslibs.offsets import shift_months import pandas.util.testing as tm from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, date_range, _np_version_under1p10, Index, @@ -568,19 +566,6 @@ def test_equals(self): assert not idx.equals(pd.Series(idx3)) -@pytest.mark.parametrize('years,months', product([-1, 0, 1], [-2, 0, 2])) -def test_shift_months(years, months): - s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), - Timestamp('2000-01-31 00:23:00'), - Timestamp('2000-01-01'), - Timestamp('2000-02-29'), - Timestamp('2000-12-31')]) - actual = DatetimeIndex(shift_months(s.asi8, years * 12 + months)) - expected = DatetimeIndex([x + pd.offsets.DateOffset( - years=years, months=months) for x in s]) - tm.assert_index_equal(actual, expected) - - class TestBusinessDatetimeIndex(object): def setup_method(self, method): From 8dffb15b131fb6d19c37279ebe7ebe2cc6243689 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Feb 2018 06:44:07 -0800 Subject: [PATCH 093/217] move libfreqs and liboffsets tests to test_tslibs, move parsing tests, with to_datetime test moved to test_tools (#19638) --- pandas/tests/indexes/datetimes/test_tools.py | 12 ++++++++++++ .../{tseries => tslibs}/test_libfrequencies.py | 0 .../{tseries/offsets => tslibs}/test_liboffsets.py | 0 pandas/tests/{scalar => tslibs}/test_parsing.py | 13 ------------- 4 files changed, 12 insertions(+), 13 deletions(-) rename pandas/tests/{tseries => tslibs}/test_libfrequencies.py (100%) rename pandas/tests/{tseries/offsets => tslibs}/test_liboffsets.py (100%) rename pandas/tests/{scalar => tslibs}/test_parsing.py (96%) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b95ae07052ecb..35f34dc3a4974 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -187,6 +187,18 @@ def test_to_datetime_format_weeks(self, cache): class TestToDatetime(object): + def test_to_datetime_pydatetime(self): + actual = pd.to_datetime(datetime(2008, 1, 15)) + assert actual == datetime(2008, 1, 15) + + def test_to_datetime_YYYYMMDD(self): + actual = pd.to_datetime('20080115') + assert actual == datetime(2008, 1, 15) + + def test_to_datetime_unparseable_ignore(self): + # unparseable + s = 'Month 1, 1999' + assert pd.to_datetime(s, errors='ignore') == s @td.skip_if_windows # `tm.set_timezone` does not work in windows def test_to_datetime_now(self): diff --git a/pandas/tests/tseries/test_libfrequencies.py b/pandas/tests/tslibs/test_libfrequencies.py similarity index 100% rename from pandas/tests/tseries/test_libfrequencies.py rename to pandas/tests/tslibs/test_libfrequencies.py diff --git a/pandas/tests/tseries/offsets/test_liboffsets.py b/pandas/tests/tslibs/test_liboffsets.py similarity index 100% rename from pandas/tests/tseries/offsets/test_liboffsets.py rename to pandas/tests/tslibs/test_liboffsets.py diff --git a/pandas/tests/scalar/test_parsing.py b/pandas/tests/tslibs/test_parsing.py similarity index 96% rename from pandas/tests/scalar/test_parsing.py rename to pandas/tests/tslibs/test_parsing.py index bff0de649ac5e..34cce088a8b42 100644 --- a/pandas/tests/scalar/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -7,7 +7,6 @@ import pytest from dateutil.parser import parse -import pandas as pd import pandas.util._test_decorators as td from pandas.conftest import is_dateutil_le_261, is_dateutil_gt_261 from pandas import compat @@ -16,18 +15,6 @@ from pandas._libs.tslibs.parsing import parse_time_string -def test_to_datetime1(): - actual = pd.to_datetime(datetime(2008, 1, 15)) - assert actual == datetime(2008, 1, 15) - - actual = pd.to_datetime('20080115') - assert actual == datetime(2008, 1, 15) - - # unparseable - s = 'Month 1, 1999' - assert pd.to_datetime(s, errors='ignore') == s - - class TestParseQuarters(object): def test_parse_time_string(self): From b4cdff86a02e48cc1fca298a0f3e3e9baea50623 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Feb 2018 06:48:36 -0800 Subject: [PATCH 094/217] Fix uncaught OutOfBounds in array_to_datetime (#19612) --- doc/source/whatsnew/v0.23.0.txt | 3 ++- pandas/_libs/tslib.pyx | 13 ++++++------- pandas/tests/indexes/datetimes/test_tools.py | 11 ++++++++++- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6fdd551accbf1..acab9d0bbebf8 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -703,7 +703,7 @@ Datetimelike - Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) - Bug in :class:`Series`` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) - Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (issue:`19042`) -- Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (issue:`19043`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) - Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) - Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) - Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) @@ -713,6 +713,7 @@ Datetimelike - Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) +- Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) - Timezones diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a035bab2a7049..85e667521e5f2 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -524,11 +524,10 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', seen_datetime = 1 if val.tzinfo is not None: if utc_convert: - _ts = convert_datetime_to_tsobject(val, None) - iresult[i] = _ts.value try: - check_dts_bounds(&_ts.dts) - except ValueError: + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + except OutOfBoundsDatetime: if is_coerce: iresult[i] = NPY_NAT continue @@ -544,7 +543,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', iresult[i] += val.nanosecond try: check_dts_bounds(&dts) - except ValueError: + except OutOfBoundsDatetime: if is_coerce: iresult[i] = NPY_NAT continue @@ -555,7 +554,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', iresult[i] = pydate_to_dt64(val, &dts) try: check_dts_bounds(&dts) - except ValueError: + except OutOfBoundsDatetime: if is_coerce: iresult[i] = NPY_NAT continue @@ -568,7 +567,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', else: try: iresult[i] = get_datetime64_nanos(val) - except ValueError: + except OutOfBoundsDatetime: if is_coerce: iresult[i] = NPY_NAT continue diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 35f34dc3a4974..bd3fa5e73cd11 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -8,7 +8,7 @@ import dateutil import numpy as np from dateutil.parser import parse -from datetime import datetime, date, time +from datetime import datetime, date, time, timedelta from distutils.version import LooseVersion import pandas as pd @@ -1503,6 +1503,15 @@ def test_parsers_iso8601(self): class TestArrayToDatetime(object): + def test_coerce_out_of_bounds_utc(self): + # GH#19612 + ts = Timestamp('1900-01-01', tz='US/Pacific') + dt = ts.to_pydatetime() - timedelta(days=365 * 300) # ~1600AD + arr = np.array([dt]) + result = tslib.array_to_datetime(arr, utc=True, errors='coerce') + expected = np.array(['NaT'], dtype='datetime64[ns]') + tm.assert_numpy_array_equal(result, expected) + def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) tm.assert_numpy_array_equal( From cb480abdfc5b7ee16a70703e64bf7f13b37a4c51 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Feb 2018 06:50:46 -0800 Subject: [PATCH 095/217] test_astype portion of #19627 (#19637) --- pandas/tests/indexes/datetimes/test_astype.py | 24 +++++++++++ pandas/tests/indexes/datetimes/test_ops.py | 43 ------------------- 2 files changed, 24 insertions(+), 43 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 4b989eb35e900..8acdd301f241a 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -138,6 +138,30 @@ def test_astype_object(self): tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) assert casted.tolist() == exp_values + @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo']) + def test_astype_object_tz(self, tz): + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', + name='idx', tz=tz) + expected_list = [Timestamp('2013-01-31', tz=tz), + Timestamp('2013-02-28', tz=tz), + Timestamp('2013-03-31', tz=tz), + Timestamp('2013-04-30', tz=tz)] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), + pd.NaT, datetime(2013, 1, 4)], name='idx') + expected_list = [Timestamp('2013-01-01'), + Timestamp('2013-01-02'), pd.NaT, + Timestamp('2013-01-04')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + @pytest.mark.parametrize('dtype', [ float, 'timedelta64', 'timedelta64[ns]', 'datetime64', 'datetime64[D]']) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 440478100ddd5..bc43b427fe0aa 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -49,49 +49,6 @@ def test_ops_properties_basic(self): assert s.day == 10 pytest.raises(AttributeError, lambda: s.weekday) - def test_astype_object(self): - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', - name='idx') - expected_list = [Timestamp('2013-01-31'), - Timestamp('2013-02-28'), - Timestamp('2013-03-31'), - Timestamp('2013-04-30')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.astype(object) - assert isinstance(result, Index) - - assert result.dtype == object - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert idx.tolist() == expected_list - - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', - name='idx', tz='Asia/Tokyo') - expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'), - Timestamp('2013-02-28', tz='Asia/Tokyo'), - Timestamp('2013-03-31', tz='Asia/Tokyo'), - Timestamp('2013-04-30', tz='Asia/Tokyo')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.astype(object) - assert isinstance(result, Index) - assert result.dtype == object - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert idx.tolist() == expected_list - - idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), - pd.NaT, datetime(2013, 1, 4)], name='idx') - expected_list = [Timestamp('2013-01-01'), - Timestamp('2013-01-02'), pd.NaT, - Timestamp('2013-01-04')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.astype(object) - assert isinstance(result, Index) - assert result.dtype == object - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert idx.tolist() == expected_list - def test_minmax(self): for tz in self.tz: # monotonic From 8bbb469587e6d479cdabfa8e04b7ce70aee91f43 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Feb 2018 06:51:47 -0800 Subject: [PATCH 096/217] move timedelta test_astype test (#19639) --- .../tests/indexes/timedeltas/test_astype.py | 20 ++++++++++++++ pandas/tests/indexes/timedeltas/test_ops.py | 27 +------------------ 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 6c644d239069a..329f0c2467e8b 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -1,3 +1,5 @@ +from datetime import timedelta + import pytest import numpy as np @@ -8,6 +10,24 @@ class TestTimedeltaIndex(object): + def test_astype_object(self): + idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), + Timedelta('3 days'), Timedelta('4 days')] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name='idx') + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), NaT, + timedelta(days=4)], name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), NaT, + Timedelta('4 days')] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name='idx') + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list def test_astype(self): # GH 13149, GH 13209 diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 86d7dd4e1b117..d154aa2172ef7 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -8,7 +8,7 @@ from pandas import to_timedelta from pandas import (Series, Timedelta, Timestamp, TimedeltaIndex, timedelta_range, - _np_version_under1p10, Index) + _np_version_under1p10) from pandas._libs.tslib import iNaT from pandas.tests.test_base import Ops @@ -25,31 +25,6 @@ def test_ops_properties(self): self.check_ops_properties(TimedeltaIndex._field_ops, f) self.check_ops_properties(TimedeltaIndex._object_ops, f) - def test_astype_object(self): - idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), - Timedelta('3 days'), Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.astype(object) - assert isinstance(result, Index) - - assert result.dtype == object - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert idx.tolist() == expected_list - - idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, - timedelta(days=4)], name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, - Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.astype(object) - assert isinstance(result, Index) - assert result.dtype == object - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert idx.tolist() == expected_list - def test_minmax(self): # monotonic From c9334fed8e6593ef5e5648336a0e241a01c4e2d9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Feb 2018 06:56:28 -0800 Subject: [PATCH 097/217] Organize PeriodIndex tests (#19641) --- .../tests/indexes/period/test_arithmetic.py | 256 ++++++++++++++++++ pandas/tests/indexes/period/test_ops.py | 192 +------------ pandas/tests/indexes/period/test_period.py | 78 +----- .../indexes/period/test_scalar_compat.py | 17 ++ pandas/tests/indexes/period/test_tools.py | 79 ------ pandas/tests/tslibs/test_period_asfreq.py | 81 ++++++ 6 files changed, 358 insertions(+), 345 deletions(-) create mode 100644 pandas/tests/indexes/period/test_scalar_compat.py create mode 100644 pandas/tests/tslibs/test_period_asfreq.py diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py index 356ea5fc656de..81171920f635f 100644 --- a/pandas/tests/indexes/period/test_arithmetic.py +++ b/pandas/tests/indexes/period/test_arithmetic.py @@ -11,6 +11,171 @@ import pandas.core.indexes.period as period +class TestPeriodIndexComparisons(object): + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_pi_cmp_pi(self, freq): + base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq=freq) + per = Period('2011-02', freq=freq) + + exp = np.array([False, True, False, False]) + tm.assert_numpy_array_equal(base == per, exp) + tm.assert_numpy_array_equal(per == base, exp) + + exp = np.array([True, False, True, True]) + tm.assert_numpy_array_equal(base != per, exp) + tm.assert_numpy_array_equal(per != base, exp) + + exp = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(base > per, exp) + tm.assert_numpy_array_equal(per < base, exp) + + exp = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(base < per, exp) + tm.assert_numpy_array_equal(per > base, exp) + + exp = np.array([False, True, True, True]) + tm.assert_numpy_array_equal(base >= per, exp) + tm.assert_numpy_array_equal(per <= base, exp) + + exp = np.array([True, True, False, False]) + tm.assert_numpy_array_equal(base <= per, exp) + tm.assert_numpy_array_equal(per >= base, exp) + + idx = PeriodIndex(['2011-02', '2011-01', '2011-03', '2011-05'], + freq=freq) + + exp = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(base == idx, exp) + + exp = np.array([True, True, False, True]) + tm.assert_numpy_array_equal(base != idx, exp) + + exp = np.array([False, True, False, False]) + tm.assert_numpy_array_equal(base > idx, exp) + + exp = np.array([True, False, False, True]) + tm.assert_numpy_array_equal(base < idx, exp) + + exp = np.array([False, True, True, False]) + tm.assert_numpy_array_equal(base >= idx, exp) + + exp = np.array([True, False, True, True]) + tm.assert_numpy_array_equal(base <= idx, exp) + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_pi_cmp_pi_mismatched_freq_raises(self, freq): + # different base freq + base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq=freq) + + msg = "Input has different freq=A-DEC from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='A') + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + Period('2011', freq='A') >= base + + idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + base <= idx + + # Different frequency + msg = "Input has different freq=4M from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='4M') + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + Period('2011', freq='4M') >= base + + idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + base <= idx + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_pi_cmp_nat(self, freq): + idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) + + result = idx1 > Period('2011-02', freq=freq) + exp = np.array([False, False, False, True]) + tm.assert_numpy_array_equal(result, exp) + result = Period('2011-02', freq=freq) < idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == Period('NaT', freq=freq) + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + result = Period('NaT', freq=freq) == idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != Period('NaT', freq=freq) + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + result = Period('NaT', freq=freq) != idx1 + tm.assert_numpy_array_equal(result, exp) + + idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq=freq) + result = idx1 < idx2 + exp = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx2 + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx2 + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx1 + exp = np.array([True, True, False, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx1 + exp = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_pi_cmp_nat_mismatched_freq_raises(self, freq): + idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) + + diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M') + msg = "Input has different freq=4M from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + idx1 > diff + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + idx1 == diff + + # TODO: De-duplicate with test_pi_cmp_nat + def test_comp_nat(self): + left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT, + pd.Period('2011-01-03')]) + right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')]) + + for lhs, rhs in [(left, right), + (left.astype(object), right.astype(object))]: + result = lhs == rhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = lhs != rhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > lhs, expected) + + class TestPeriodIndexArithmetic(object): def test_pi_add_offset_array(self): # GH#18849 @@ -250,6 +415,97 @@ def test_sub_isub(self): rng -= 1 tm.assert_index_equal(rng, expected) + # --------------------------------------------------------------- + # PeriodIndex.shift is used by __add__ and __sub__ + + def test_pi_shift_ndarray(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(np.array([1, 2, 3, 4])) + expected = PeriodIndex(['2011-02', '2011-04', 'NaT', + '2011-08'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(np.array([1, -2, 3, -4])) + expected = PeriodIndex(['2011-02', '2010-12', 'NaT', + '2010-12'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + + def test_shift(self): + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') + + tm.assert_index_equal(pi1.shift(0), pi1) + + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + def test_shift_corner_cases(self): + # GH#9903 + idx = pd.PeriodIndex([], name='xxx', freq='H') + + with pytest.raises(TypeError): + # period shift doesn't accept freq + idx.shift(1, freq='H') + + tm.assert_index_equal(idx.shift(0), idx) + tm.assert_index_equal(idx.shift(3), idx) + + idx = pd.PeriodIndex(['2011-01-01 10:00', '2011-01-01 11:00' + '2011-01-01 12:00'], name='xxx', freq='H') + tm.assert_index_equal(idx.shift(0), idx) + exp = pd.PeriodIndex(['2011-01-01 13:00', '2011-01-01 14:00' + '2011-01-01 15:00'], name='xxx', freq='H') + tm.assert_index_equal(idx.shift(3), exp) + exp = pd.PeriodIndex(['2011-01-01 07:00', '2011-01-01 08:00' + '2011-01-01 09:00'], name='xxx', freq='H') + tm.assert_index_equal(idx.shift(-3), exp) + + def test_shift_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(1) + expected = PeriodIndex(['2011-02', '2011-03', 'NaT', + '2011-05'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_shift_gh8083(self): + # test shift for PeriodIndex + # GH#8083 + drange = pd.period_range('20130101', periods=5, freq='D') + result = drange.shift(1) + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') + tm.assert_index_equal(result, expected) + class TestPeriodIndexSeriesMethods(object): """ Test PeriodIndex and Period Series Ops consistency """ diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 21a9ffdde3444..8745de0c2a7aa 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,11 +1,9 @@ -import pytest import numpy as np import pandas as pd import pandas._libs.tslib as tslib import pandas.util.testing as tm -import pandas.core.indexes.period as period from pandas import (DatetimeIndex, PeriodIndex, Series, Period, _np_version_under1p10, Index) @@ -521,25 +519,8 @@ def test_nat_new(self): tm.assert_numpy_array_equal(result, exp) def test_shift(self): - # GH 9903 - idx = pd.PeriodIndex([], name='xxx', freq='H') - - with pytest.raises(TypeError): - # period shift doesn't accept freq - idx.shift(1, freq='H') - - tm.assert_index_equal(idx.shift(0), idx) - tm.assert_index_equal(idx.shift(3), idx) - - idx = pd.PeriodIndex(['2011-01-01 10:00', '2011-01-01 11:00' - '2011-01-01 12:00'], name='xxx', freq='H') - tm.assert_index_equal(idx.shift(0), idx) - exp = pd.PeriodIndex(['2011-01-01 13:00', '2011-01-01 14:00' - '2011-01-01 15:00'], name='xxx', freq='H') - tm.assert_index_equal(idx.shift(3), exp) - exp = pd.PeriodIndex(['2011-01-01 07:00', '2011-01-01 08:00' - '2011-01-01 09:00'], name='xxx', freq='H') - tm.assert_index_equal(idx.shift(-3), exp) + # This is tested in test_arithmetic + pass def test_repeat(self): index = pd.period_range('2001-01-01', periods=2, freq='D') @@ -703,172 +684,3 @@ def test_pi_comp_period_nat(self): f = lambda x: tslib.NaT >= x exp = np.array([False, False, False, False], dtype=np.bool) self._check(idx, f, exp) - - -class TestPeriodIndexComparisons(object): - - def test_pi_pi_comp(self): - - for freq in ['M', '2M', '3M']: - base = PeriodIndex(['2011-01', '2011-02', - '2011-03', '2011-04'], freq=freq) - p = Period('2011-02', freq=freq) - - exp = np.array([False, True, False, False]) - tm.assert_numpy_array_equal(base == p, exp) - tm.assert_numpy_array_equal(p == base, exp) - - exp = np.array([True, False, True, True]) - tm.assert_numpy_array_equal(base != p, exp) - tm.assert_numpy_array_equal(p != base, exp) - - exp = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(base > p, exp) - tm.assert_numpy_array_equal(p < base, exp) - - exp = np.array([True, False, False, False]) - tm.assert_numpy_array_equal(base < p, exp) - tm.assert_numpy_array_equal(p > base, exp) - - exp = np.array([False, True, True, True]) - tm.assert_numpy_array_equal(base >= p, exp) - tm.assert_numpy_array_equal(p <= base, exp) - - exp = np.array([True, True, False, False]) - tm.assert_numpy_array_equal(base <= p, exp) - tm.assert_numpy_array_equal(p >= base, exp) - - idx = PeriodIndex(['2011-02', '2011-01', '2011-03', - '2011-05'], freq=freq) - - exp = np.array([False, False, True, False]) - tm.assert_numpy_array_equal(base == idx, exp) - - exp = np.array([True, True, False, True]) - tm.assert_numpy_array_equal(base != idx, exp) - - exp = np.array([False, True, False, False]) - tm.assert_numpy_array_equal(base > idx, exp) - - exp = np.array([True, False, False, True]) - tm.assert_numpy_array_equal(base < idx, exp) - - exp = np.array([False, True, True, False]) - tm.assert_numpy_array_equal(base >= idx, exp) - - exp = np.array([True, False, True, True]) - tm.assert_numpy_array_equal(base <= idx, exp) - - # different base freq - msg = "Input has different freq=A-DEC from PeriodIndex" - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - base <= Period('2011', freq='A') - - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - Period('2011', freq='A') >= base - - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') - base <= idx - - # Different frequency - msg = "Input has different freq=4M from PeriodIndex" - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - base <= Period('2011', freq='4M') - - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - Period('2011', freq='4M') >= base - - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') - base <= idx - - def test_pi_nat_comp(self): - for freq in ['M', '2M', '3M']: - idx1 = PeriodIndex( - ['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) - - result = idx1 > Period('2011-02', freq=freq) - exp = np.array([False, False, False, True]) - tm.assert_numpy_array_equal(result, exp) - result = Period('2011-02', freq=freq) < idx1 - tm.assert_numpy_array_equal(result, exp) - - result = idx1 == Period('NaT', freq=freq) - exp = np.array([False, False, False, False]) - tm.assert_numpy_array_equal(result, exp) - result = Period('NaT', freq=freq) == idx1 - tm.assert_numpy_array_equal(result, exp) - - result = idx1 != Period('NaT', freq=freq) - exp = np.array([True, True, True, True]) - tm.assert_numpy_array_equal(result, exp) - result = Period('NaT', freq=freq) != idx1 - tm.assert_numpy_array_equal(result, exp) - - idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', - 'NaT'], freq=freq) - result = idx1 < idx2 - exp = np.array([True, False, False, False]) - tm.assert_numpy_array_equal(result, exp) - - result = idx1 == idx2 - exp = np.array([False, False, False, False]) - tm.assert_numpy_array_equal(result, exp) - - result = idx1 != idx2 - exp = np.array([True, True, True, True]) - tm.assert_numpy_array_equal(result, exp) - - result = idx1 == idx1 - exp = np.array([True, True, False, True]) - tm.assert_numpy_array_equal(result, exp) - - result = idx1 != idx1 - exp = np.array([False, False, True, False]) - tm.assert_numpy_array_equal(result, exp) - - diff = PeriodIndex(['2011-02', '2011-01', '2011-04', - 'NaT'], freq='4M') - msg = "Input has different freq=4M from PeriodIndex" - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - idx1 > diff - - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - idx1 == diff - - # TODO: De-duplicate with test_pi_nat_comp - def test_comp_nat(self): - left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT, - pd.Period('2011-01-03')]) - right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')]) - - for lhs, rhs in [(left, right), - (left.astype(object), right.astype(object))]: - result = lhs == rhs - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = lhs != rhs - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(lhs == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == rhs, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(lhs != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != lhs, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(lhs < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > lhs, expected) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6fc7fa5486f82..f3469b829f8a3 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -453,16 +453,6 @@ def test_periods_number_check(self): with pytest.raises(ValueError): period_range('2011-1-1', '2012-1-1', 'B') - def test_start_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') - expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') - tm.assert_index_equal(index.start_time, expected_index) - - def test_end_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') - expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') - tm.assert_index_equal(index.end_time, expected_index) - def test_index_duplicate_periods(self): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') @@ -495,78 +485,14 @@ def test_index_unique(self): tm.assert_index_equal(idx.unique(), expected) assert idx.nunique() == 3 - def test_shift_gh8083(self): - - # test shift for PeriodIndex - # GH8083 - drange = self.create_index() - result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', '2013-01-06'], freq='D') - tm.assert_index_equal(result, expected) - def test_shift(self): - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') - - tm.assert_index_equal(pi1.shift(0), pi1) - - assert len(pi1) == len(pi2) - tm.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') - assert len(pi1) == len(pi2) - tm.assert_index_equal(pi1.shift(-1), pi2) - - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') - assert len(pi1) == len(pi2) - tm.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') - assert len(pi1) == len(pi2) - tm.assert_index_equal(pi1.shift(-1), pi2) - - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') - assert len(pi1) == len(pi2) - tm.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') - assert len(pi1) == len(pi2) - tm.assert_index_equal(pi1.shift(-1), pi2) - - def test_shift_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(1) - expected = PeriodIndex(['2011-02', '2011-03', 'NaT', - '2011-05'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - assert result.name == expected.name + # This is tested in test_arithmetic + pass @td.skip_if_32bit def test_ndarray_compat_properties(self): super(TestPeriodIndex, self).test_ndarray_compat_properties() - def test_shift_ndarray(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(np.array([1, 2, 3, 4])) - expected = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(np.array([1, -2, 3, -4])) - expected = PeriodIndex(['2011-02', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - def test_negative_ordinals(self): Period(ordinal=-1000, freq='A') Period(ordinal=0, freq='A') diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py new file mode 100644 index 0000000000000..56bd2adf58719 --- /dev/null +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +"""Tests for PeriodIndex behaving like a vectorized Period scalar""" + +from pandas import PeriodIndex, date_range +import pandas.util.testing as tm + + +class TestPeriodIndexOps(object): + def test_start_time(self): + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') + tm.assert_index_equal(index.end_time, expected_index) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 0e72cadb5d494..f5e7c8269dc4f 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -6,8 +6,6 @@ import pandas.core.indexes.period as period from pandas.compat import lrange -from pandas._libs.tslibs.frequencies import get_freq -from pandas._libs.tslibs.period import period_ordinal, period_asfreq from pandas._libs.tslibs.ccalendar import MONTHS from pandas import (PeriodIndex, Period, DatetimeIndex, Timestamp, Series, @@ -76,83 +74,6 @@ def test_negone_ordinals(self): repr(period) -class TestTslib(object): - def test_intraday_conversion_factors(self): - assert period_asfreq(1, get_freq('D'), get_freq('H'), False) == 24 - assert period_asfreq(1, get_freq('D'), get_freq('T'), False) == 1440 - assert period_asfreq(1, get_freq('D'), get_freq('S'), False) == 86400 - assert period_asfreq(1, get_freq('D'), - get_freq('L'), False) == 86400000 - assert period_asfreq(1, get_freq('D'), - get_freq('U'), False) == 86400000000 - assert period_asfreq(1, get_freq('D'), - get_freq('N'), False) == 86400000000000 - - assert period_asfreq(1, get_freq('H'), get_freq('T'), False) == 60 - assert period_asfreq(1, get_freq('H'), get_freq('S'), False) == 3600 - assert period_asfreq(1, get_freq('H'), - get_freq('L'), False) == 3600000 - assert period_asfreq(1, get_freq('H'), - get_freq('U'), False) == 3600000000 - assert period_asfreq(1, get_freq('H'), - get_freq('N'), False) == 3600000000000 - - assert period_asfreq(1, get_freq('T'), get_freq('S'), False) == 60 - assert period_asfreq(1, get_freq('T'), get_freq('L'), False) == 60000 - assert period_asfreq(1, get_freq('T'), - get_freq('U'), False) == 60000000 - assert period_asfreq(1, get_freq('T'), - get_freq('N'), False) == 60000000000 - - assert period_asfreq(1, get_freq('S'), get_freq('L'), False) == 1000 - assert period_asfreq(1, get_freq('S'), - get_freq('U'), False) == 1000000 - assert period_asfreq(1, get_freq('S'), - get_freq('N'), False) == 1000000000 - - assert period_asfreq(1, get_freq('L'), get_freq('U'), False) == 1000 - assert period_asfreq(1, get_freq('L'), - get_freq('N'), False) == 1000000 - - assert period_asfreq(1, get_freq('U'), get_freq('N'), False) == 1000 - - def test_period_ordinal_start_values(self): - # information for 1.1.1970 - assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('A')) == 0 - assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('M')) == 0 - assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('W')) == 1 - assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('D')) == 0 - assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('B')) == 0 - - def test_period_ordinal_week(self): - assert period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, get_freq('W')) == 1 - assert period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, get_freq('W')) == 2 - assert period_ordinal(2013, 10, 6, 0, - 0, 0, 0, 0, get_freq('W')) == 2284 - assert period_ordinal(2013, 10, 7, 0, - 0, 0, 0, 0, get_freq('W')) == 2285 - - def test_period_ordinal_business_day(self): - # Thursday - assert period_ordinal(2013, 10, 3, 0, - 0, 0, 0, 0, get_freq('B')) == 11415 - # Friday - assert period_ordinal(2013, 10, 4, 0, - 0, 0, 0, 0, get_freq('B')) == 11416 - # Saturday - assert period_ordinal(2013, 10, 5, 0, - 0, 0, 0, 0, get_freq('B')) == 11417 - # Sunday - assert period_ordinal(2013, 10, 6, 0, - 0, 0, 0, 0, get_freq('B')) == 11417 - # Monday - assert period_ordinal(2013, 10, 7, 0, - 0, 0, 0, 0, get_freq('B')) == 11417 - # Tuesday - assert period_ordinal(2013, 10, 8, 0, - 0, 0, 0, 0, get_freq('B')) == 11418 - - class TestPeriodIndex(object): def setup_method(self, method): diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py new file mode 100644 index 0000000000000..98959adf6fda4 --- /dev/null +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +from pandas._libs.tslibs.frequencies import get_freq +from pandas._libs.tslibs.period import period_ordinal, period_asfreq + + +class TestPeriodFreqConversion(object): + def test_intraday_conversion_factors(self): + assert period_asfreq(1, get_freq('D'), get_freq('H'), False) == 24 + assert period_asfreq(1, get_freq('D'), get_freq('T'), False) == 1440 + assert period_asfreq(1, get_freq('D'), get_freq('S'), False) == 86400 + assert period_asfreq(1, get_freq('D'), + get_freq('L'), False) == 86400000 + assert period_asfreq(1, get_freq('D'), + get_freq('U'), False) == 86400000000 + assert period_asfreq(1, get_freq('D'), + get_freq('N'), False) == 86400000000000 + + assert period_asfreq(1, get_freq('H'), get_freq('T'), False) == 60 + assert period_asfreq(1, get_freq('H'), get_freq('S'), False) == 3600 + assert period_asfreq(1, get_freq('H'), + get_freq('L'), False) == 3600000 + assert period_asfreq(1, get_freq('H'), + get_freq('U'), False) == 3600000000 + assert period_asfreq(1, get_freq('H'), + get_freq('N'), False) == 3600000000000 + + assert period_asfreq(1, get_freq('T'), get_freq('S'), False) == 60 + assert period_asfreq(1, get_freq('T'), get_freq('L'), False) == 60000 + assert period_asfreq(1, get_freq('T'), + get_freq('U'), False) == 60000000 + assert period_asfreq(1, get_freq('T'), + get_freq('N'), False) == 60000000000 + + assert period_asfreq(1, get_freq('S'), get_freq('L'), False) == 1000 + assert period_asfreq(1, get_freq('S'), + get_freq('U'), False) == 1000000 + assert period_asfreq(1, get_freq('S'), + get_freq('N'), False) == 1000000000 + + assert period_asfreq(1, get_freq('L'), get_freq('U'), False) == 1000 + assert period_asfreq(1, get_freq('L'), + get_freq('N'), False) == 1000000 + + assert period_asfreq(1, get_freq('U'), get_freq('N'), False) == 1000 + + def test_period_ordinal_start_values(self): + # information for 1.1.1970 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('A')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('M')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('W')) == 1 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('D')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('B')) == 0 + + def test_period_ordinal_week(self): + assert period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, get_freq('W')) == 1 + assert period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, get_freq('W')) == 2 + assert period_ordinal(2013, 10, 6, 0, + 0, 0, 0, 0, get_freq('W')) == 2284 + assert period_ordinal(2013, 10, 7, 0, + 0, 0, 0, 0, get_freq('W')) == 2285 + + def test_period_ordinal_business_day(self): + # Thursday + assert period_ordinal(2013, 10, 3, 0, + 0, 0, 0, 0, get_freq('B')) == 11415 + # Friday + assert period_ordinal(2013, 10, 4, 0, + 0, 0, 0, 0, get_freq('B')) == 11416 + # Saturday + assert period_ordinal(2013, 10, 5, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 + # Sunday + assert period_ordinal(2013, 10, 6, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 + # Monday + assert period_ordinal(2013, 10, 7, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 + # Tuesday + assert period_ordinal(2013, 10, 8, 0, + 0, 0, 0, 0, get_freq('B')) == 11418 From c416fea965f4f32e21d7ddb0b1da4d8111bed7cf Mon Sep 17 00:00:00 2001 From: Licht Takeuchi Date: Mon, 12 Feb 2018 06:11:10 +0900 Subject: [PATCH 098/217] TST: Add to_csv test when writing the single column CSV (#19091) Closes gh-18676 --- pandas/tests/io/formats/test_to_csv.py | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index e12a7196dce6b..dfa3751bff57a 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import sys import numpy as np import pandas as pd import pytest @@ -9,6 +10,37 @@ class TestToCSV(object): + @pytest.mark.xfail((3, 6, 5) > sys.version_info >= (3, 5), + reason=("Python csv library bug " + "(see https://bugs.python.org/issue32255)")) + def test_to_csv_with_single_column(self): + # see gh-18676, https://bugs.python.org/issue32255 + # + # Python's CSV library adds an extraneous '""' + # before the newline when the NaN-value is in + # the first row. Otherwise, only the newline + # character is added. This behavior is inconsistent + # and was patched in https://bugs.python.org/pull_request4672. + df1 = DataFrame([None, 1]) + expected1 = """\ +"" +1.0 +""" + with tm.ensure_clean('test.csv') as path: + df1.to_csv(path, header=None, index=None) + with open(path, 'r') as f: + assert f.read() == expected1 + + df2 = DataFrame([1, None]) + expected2 = """\ +1.0 +"" +""" + with tm.ensure_clean('test.csv') as path: + df2.to_csv(path, header=None, index=None) + with open(path, 'r') as f: + assert f.read() == expected2 + def test_to_csv_defualt_encoding(self): # GH17097 df = DataFrame({'col': [u"AAAAA", u"ÄÄÄÄÄ", u"ßßßßß", u"聞聞聞聞聞"]}) From 82f011bb34a6e2efb56313fe4491645f4600d840 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 11 Feb 2018 23:24:34 +0100 Subject: [PATCH 099/217] TST: set multi_statement flag for pymysql tests (#19619) * Revert "CI: pin pymysql<0.8.0 (#19461)" This reverts commit 44bbd5a4d33643c9270bbefd7419f45aecaa4667. * Enable multi-statements for pymysql connection --- ci/requirements-3.6.run | 2 +- pandas/tests/io/test_sql.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index e30461d06b8ea..822144a80bc9a 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -13,7 +13,7 @@ lxml html5lib jinja2 sqlalchemy -pymysql<0.8.0 +pymysql feather-format pyarrow psycopg2 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0cc4101cd6304..f3ab74d37a2bc 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1731,13 +1731,16 @@ class _TestMySQLAlchemy(object): @classmethod def connect(cls): url = 'mysql+{driver}://root@localhost/pandas_nosetest' - return sqlalchemy.create_engine(url.format(driver=cls.driver)) + return sqlalchemy.create_engine(url.format(driver=cls.driver), + connect_args=cls.connect_args) @classmethod def setup_driver(cls): try: import pymysql # noqa cls.driver = 'pymysql' + from pymysql.constants import CLIENT + cls.connect_args = {'client_flag': CLIENT.MULTI_STATEMENTS} except ImportError: pytest.skip('pymysql not installed') From 067984a6e7d7fbe1422ab5bb9ad6698578aacf81 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 12 Feb 2018 03:33:33 -0800 Subject: [PATCH 100/217] move array_to_datetime timests (#19640) --- pandas/tests/indexes/datetimes/test_tools.py | 186 +----------------- pandas/tests/tslibs/test_array_to_datetime.py | 145 ++++++++++++++ 2 files changed, 155 insertions(+), 176 deletions(-) create mode 100644 pandas/tests/tslibs/test_array_to_datetime.py diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index bd3fa5e73cd11..b5926933544e8 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -8,7 +8,7 @@ import dateutil import numpy as np from dateutil.parser import parse -from datetime import datetime, date, time, timedelta +from datetime import datetime, date, time from distutils.version import LooseVersion import pandas as pd @@ -19,7 +19,6 @@ from pandas.errors import OutOfBoundsDatetime from pandas.compat import lmap, PY3 -from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm import pandas.util._test_decorators as td @@ -803,6 +802,15 @@ def test_dataframe_dtypes(self, cache): class TestToDatetimeMisc(object): + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) + + with pytest.raises(OutOfBoundsDatetime): + to_datetime(arr) + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_iso8601(self, cache): result = to_datetime(["2012-01-01 00:00:00"], cache=cache) @@ -1464,180 +1472,6 @@ def test_parsers_timezone_minute_offsets_roundtrip(self, cache): converted_time = dt_time.tz_localize('UTC').tz_convert(tz) assert dt_string_repr == repr(converted_time) - def test_parsers_iso8601(self): - # GH 12060 - # test only the iso parser - flexibility to different - # separators and leadings 0s - # Timestamp construction falls back to dateutil - cases = {'2011-01-02': datetime(2011, 1, 2), - '2011-1-2': datetime(2011, 1, 2), - '2011-01': datetime(2011, 1, 1), - '2011-1': datetime(2011, 1, 1), - '2011 01 02': datetime(2011, 1, 2), - '2011.01.02': datetime(2011, 1, 2), - '2011/01/02': datetime(2011, 1, 2), - '2011\\01\\02': datetime(2011, 1, 2), - '2013-01-01 05:30:00': datetime(2013, 1, 1, 5, 30), - '2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)} - for date_str, exp in compat.iteritems(cases): - actual = tslib._test_parse_iso8601(date_str) - assert actual == exp - - # separators must all match - YYYYMM not valid - invalid_cases = ['2011-01/02', '2011^11^11', - '201401', '201111', '200101', - # mixed separated and unseparated - '2005-0101', '200501-01', - '20010101 12:3456', '20010101 1234:56', - # HHMMSS must have two digits in each component - # if unseparated - '20010101 1', '20010101 123', '20010101 12345', - '20010101 12345Z', - # wrong separator for HHMMSS - '2001-01-01 12-34-56'] - for date_str in invalid_cases: - with pytest.raises(ValueError): - tslib._test_parse_iso8601(date_str) - # If no ValueError raised, let me know which case failed. - raise Exception(date_str) - - -class TestArrayToDatetime(object): - def test_coerce_out_of_bounds_utc(self): - # GH#19612 - ts = Timestamp('1900-01-01', tz='US/Pacific') - dt = ts.to_pydatetime() - timedelta(days=365 * 300) # ~1600AD - arr = np.array([dt]) - result = tslib.array_to_datetime(arr, utc=True, errors='coerce') - expected = np.array(['NaT'], dtype='datetime64[ns]') - tm.assert_numpy_array_equal(result, expected) - - def test_parsing_valid_dates(self): - arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - '2013-01-02T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-09-16T00:00:00.000000000-0000', - '2013-09-17T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_parsing_timezone_offsets(self): - # All of these datetime strings with offsets are equivalent - # to the same datetime after the timezone offset is added - dt_strings = [ - '01-01-2013 08:00:00+08:00', - '2013-01-01T08:00:00.000000000+0800', - '2012-12-31T16:00:00.000000000-0800', - '12-31-2012 23:00:00-01:00' - ] - - expected_output = tslib.array_to_datetime(np.array( - ['01-01-2013 00:00:00'], dtype=object)) - - for dt_string in dt_strings: - tm.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([dt_string], dtype=object) - ), - expected_output - ) - - def test_number_looking_strings_not_into_datetime(self): - # #4601 - # These strings don't look like datetimes so they shouldn't be - # attempted to be converted - arr = np.array(['-352.737091', '183.575577'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - def test_coercing_dates_outside_of_datetime64_ns_bounds(self): - invalid_dates = [ - date(1000, 1, 1), - datetime(1000, 1, 1), - '1000-01-01', - 'Jan 1, 1000', - np.datetime64('1000-01-01'), - ] - - for invalid_date in invalid_dates: - pytest.raises(ValueError, - tslib.array_to_datetime, - np.array([invalid_date], dtype='object'), - errors='raise', ) - tm.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([invalid_date], dtype='object'), - errors='coerce'), - np.array([tslib.iNaT], dtype='M8[ns]') - ) - - arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - tslib.iNaT, - '2000-01-01T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_coerce_of_invalid_datetimes(self): - arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) - - # Without coercing, the presence of any invalid dates prevents - # any values from being converted - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - # With coercing, the invalid dates becomes iNaT - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - tslib.iNaT, - tslib.iNaT - ], - dtype='M8[ns]' - ) - ) - - def test_to_datetime_barely_out_of_bounds(self): - # GH#19529 - # GH#19382 close enough to bounds that dropping nanos would result - # in an in-bounds datetime - arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) - - with pytest.raises(OutOfBoundsDatetime): - to_datetime(arr) - - with pytest.raises(OutOfBoundsDatetime): - # Essentially the same as above, but more directly calling - # the relevant function - tslib.array_to_datetime(arr) - def test_normalize_date(): value = date(2012, 9, 7) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py new file mode 100644 index 0000000000000..eb77e52e7c91d --- /dev/null +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +from datetime import datetime, date + +import numpy as np +import pytest + +from pandas._libs import tslib +from pandas.compat.numpy import np_array_datetime64_compat +import pandas.util.testing as tm + + +class TestParseISO8601(object): + @pytest.mark.parametrize('date_str, exp', [ + ('2011-01-02', datetime(2011, 1, 2)), + ('2011-1-2', datetime(2011, 1, 2)), + ('2011-01', datetime(2011, 1, 1)), + ('2011-1', datetime(2011, 1, 1)), + ('2011 01 02', datetime(2011, 1, 2)), + ('2011.01.02', datetime(2011, 1, 2)), + ('2011/01/02', datetime(2011, 1, 2)), + ('2011\\01\\02', datetime(2011, 1, 2)), + ('2013-01-01 05:30:00', datetime(2013, 1, 1, 5, 30)), + ('2013-1-1 5:30:00', datetime(2013, 1, 1, 5, 30))]) + def test_parsers_iso8601(self, date_str, exp): + # GH#12060 + # test only the iso parser - flexibility to different + # separators and leadings 0s + # Timestamp construction falls back to dateutil + actual = tslib._test_parse_iso8601(date_str) + assert actual == exp + + @pytest.mark.parametrize( + 'date_str', + ['2011-01/02', '2011^11^11', + '201401', '201111', '200101', + # mixed separated and unseparated + '2005-0101', '200501-01', + '20010101 12:3456', + '20010101 1234:56', + # HHMMSS must have two digits in + # each component if unseparated + '20010101 1', '20010101 123', + '20010101 12345', '20010101 12345Z', + # wrong separator for HHMMSS + '2001-01-01 12-34-56']) + def test_parsers_iso8601_invalid(self, date_str): + # separators must all match - YYYYMM not valid + with pytest.raises(ValueError): + tslib._test_parse_iso8601(date_str) + + +class TestArrayToDatetime(object): + def test_parsing_valid_dates(self): + arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) + result = tslib.array_to_datetime(arr) + expected = ['2013-01-01T00:00:00.000000000-0000', + '2013-01-02T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) + result = tslib.array_to_datetime(arr) + expected = ['2013-09-16T00:00:00.000000000-0000', + '2013-09-17T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + @pytest.mark.parametrize('dt_string', [ + '01-01-2013 08:00:00+08:00', + '2013-01-01T08:00:00.000000000+0800', + '2012-12-31T16:00:00.000000000-0800', + '12-31-2012 23:00:00-01:00']) + def test_parsing_timezone_offsets(self, dt_string): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added + arr = np.array(['01-01-2013 00:00:00'], dtype=object) + expected = tslib.array_to_datetime(arr) + + arr = np.array([dt_string], dtype=object) + result = tslib.array_to_datetime(arr) + tm.assert_numpy_array_equal(result, expected) + + def test_number_looking_strings_not_into_datetime(self): + # GH#4601 + # These strings don't look like datetimes so they shouldn't be + # attempted to be converted + arr = np.array(['-352.737091', '183.575577'], dtype=object) + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + arr = np.array(['1', '2', '3', '4', '5'], dtype=object) + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + @pytest.mark.parametrize('invalid_date', [ + date(1000, 1, 1), + datetime(1000, 1, 1), + '1000-01-01', + 'Jan 1, 1000', + np.datetime64('1000-01-01')]) + def test_coerce_outside_ns_bounds(self, invalid_date): + arr = np.array([invalid_date], dtype='object') + with pytest.raises(ValueError): + tslib.array_to_datetime(arr, errors='raise') + + result = tslib.array_to_datetime(arr, errors='coerce') + expected = np.array([tslib.iNaT], dtype='M8[ns]') + tm.assert_numpy_array_equal(result, expected) + + def test_coerce_outside_ns_bounds_one_valid(self): + arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) + result = tslib.array_to_datetime(arr, errors='coerce') + expected = [tslib.iNaT, + '2000-01-01T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + def test_coerce_of_invalid_datetimes(self): + arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) + + # Without coercing, the presence of any invalid dates prevents + # any values from being converted + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + # With coercing, the invalid dates becomes iNaT + result = tslib.array_to_datetime(arr, errors='coerce') + expected = ['2013-01-01T00:00:00.000000000-0000', + tslib.iNaT, + tslib.iNaT] + + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) + with pytest.raises(tslib.OutOfBoundsDatetime): + tslib.array_to_datetime(arr) From aa976482b444458800ae017e623788f6d6050005 Mon Sep 17 00:00:00 2001 From: Matt Kirk Date: Mon, 12 Feb 2018 19:06:11 +0700 Subject: [PATCH 101/217] BUG: assign doesnt cast SparseDataFrame to DataFrame (#19178) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/sparse/array.py | 9 +++++---- pandas/tests/sparse/frame/test_frame.py | 11 +++++++++++ pandas/tests/sparse/test_array.py | 15 +++++++++++++++ 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index acab9d0bbebf8..72f63a4da0f4d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -822,6 +822,7 @@ Sparse - Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) - Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) +- Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 65aefd9fb8c0a..3cbae717d0e07 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -26,7 +26,8 @@ is_scalar, is_dtype_equal) from pandas.core.dtypes.cast import ( maybe_convert_platform, maybe_promote, - astype_nansafe, find_common_type) + astype_nansafe, find_common_type, infer_dtype_from_scalar, + construct_1d_arraylike_from_scalar) from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype import pandas._libs.sparse as splib @@ -162,9 +163,9 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', data = np.nan if not is_scalar(data): raise Exception("must only pass scalars with an index ") - values = np.empty(len(index), dtype='float64') - values.fill(data) - data = values + dtype = infer_dtype_from_scalar(data)[0] + data = construct_1d_arraylike_from_scalar( + data, len(index), dtype) if isinstance(data, ABCSparseSeries): data = data.values diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 29fad3c8eefaf..0e8b2161cafc4 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -1257,3 +1257,14 @@ def test_quantile_multi(self): tm.assert_frame_equal(result, dense_expected) tm.assert_sp_frame_equal(result, sparse_expected) + + def test_assign_with_sparse_frame(self): + # GH 19163 + df = pd.DataFrame({"a": [1, 2, 3]}) + res = df.to_sparse(fill_value=False).assign(newcol=False) + exp = df.assign(newcol=False).to_sparse(fill_value=False) + + tm.assert_sp_frame_equal(res, exp) + + for column in res.columns: + assert type(res[column]) is SparseSeries diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 8de93ff320961..6c0c83cf65ff7 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -113,6 +113,21 @@ def test_constructor_spindex_dtype(self): assert arr.dtype == np.int64 assert arr.fill_value == 0 + @pytest.mark.parametrize('scalar,dtype', [ + (False, bool), + (0.0, 'float64'), + (1, 'int64'), + ('z', 'object')]) + def test_scalar_with_index_infer_dtype(self, scalar, dtype): + # GH 19163 + arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar) + exp = SparseArray([scalar, scalar, scalar], fill_value=scalar) + + tm.assert_sp_array_equal(arr, exp) + + assert arr.dtype == dtype + assert exp.dtype == dtype + def test_sparseseries_roundtrip(self): # GH 13999 for kind in ['integer', 'block']: From 49a016b13f415774c4cdd7e9528590d2d6690fbb Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 12 Feb 2018 19:12:55 -0500 Subject: [PATCH 102/217] TST: placement of network error catching in s3 tests (#19645) --- pandas/io/common.py | 13 +++++-- pandas/io/excel.py | 2 +- pandas/io/json/json.py | 10 ++++- pandas/io/packers.py | 8 +++- pandas/io/parquet.py | 30 +++++++++------ pandas/io/parsers.py | 9 ++++- pandas/io/s3.py | 4 +- pandas/io/sas/sas7bdat.py | 2 +- pandas/io/sas/sas_xport.py | 3 +- pandas/io/stata.py | 2 +- pandas/tests/io/conftest.py | 53 +++++++++++++++----------- pandas/tests/io/json/test_pandas.py | 1 - pandas/tests/io/parser/test_network.py | 28 +++++++++----- pandas/tests/io/test_common.py | 8 +++- 14 files changed, 114 insertions(+), 59 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 4ba969f0abac4..e312181f08512 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -183,7 +183,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, Returns ------- - a filepath_ or buffer or S3File instance, the encoding, the compression + tuple of ({a filepath_ or buffer or S3File instance}, + encoding, str, + compression, str, + should_close, bool) """ filepath_or_buffer = _stringify_path(filepath_or_buffer) @@ -194,7 +197,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, # Override compression based on Content-Encoding header compression = 'gzip' reader = BytesIO(req.read()) - return reader, encoding, compression + req.close() + return reader, encoding, compression, True if is_s3_url(filepath_or_buffer): from pandas.io import s3 @@ -206,13 +210,13 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, if isinstance(filepath_or_buffer, (compat.string_types, compat.binary_type, mmap.mmap)): - return _expand_user(filepath_or_buffer), None, compression + return _expand_user(filepath_or_buffer), None, compression, False if not is_file_like(filepath_or_buffer): msg = "Invalid file path or buffer object type: {_type}" raise ValueError(msg.format(_type=type(filepath_or_buffer))) - return filepath_or_buffer, None, compression + return filepath_or_buffer, None, compression, False def file_path_to_url(path): @@ -309,6 +313,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, is_text : boolean, default True whether file/buffer is in text format (csv, json, etc.), or in binary mode (pickle, etc.) + Returns ------- f : file-like diff --git a/pandas/io/excel.py b/pandas/io/excel.py index b03987e933bff..0d3d4286f5a3c 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -381,7 +381,7 @@ def __init__(self, io, **kwds): if _is_url(self._io): io = _urlopen(self._io) elif not isinstance(self.io, (ExcelFile, xlrd.Book)): - io, _, _ = get_filepath_or_buffer(self._io) + io, _, _, _ = get_filepath_or_buffer(self._io) if engine == 'xlrd' and isinstance(io, xlrd.Book): self.book = io diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index e3a1321336fb3..24364fe07405e 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -404,7 +404,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, """ compression = _infer_compression(path_or_buf, compression) - filepath_or_buffer, _, compression = get_filepath_or_buffer( + filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression, ) @@ -419,7 +419,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, if chunksize: return json_reader - return json_reader.read() + result = json_reader.read() + if should_close: + try: + filepath_or_buffer.close() + except: # noqa: flake8 + pass + return result class JsonReader(BaseIterator): diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 9289853a1bbfd..d3e6f0cf4a1bc 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -180,7 +180,7 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): obj : type of object stored in file """ - path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) + path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) @@ -188,6 +188,12 @@ def read(fh): l = list(unpack(fh, encoding=encoding, **kwargs)) if len(l) == 1: return l[0] + + if should_close: + try: + path_or_buf.close() + except: # noqa: flake8 + pass return l # see if we have an actual file diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 6e1b6e14861c3..1c22a305c089d 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -107,7 +107,7 @@ def write(self, df, path, compression='snappy', self.validate_dataframe(df) if self._pyarrow_lt_070: self._validate_write_lt_070(df) - path, _, _ = get_filepath_or_buffer(path, mode='wb') + path, _, _, _ = get_filepath_or_buffer(path, mode='wb') if self._pyarrow_lt_060: table = self.api.Table.from_pandas(df, timestamps_to_ms=True) @@ -121,13 +121,21 @@ def write(self, df, path, compression='snappy', coerce_timestamps=coerce_timestamps, **kwargs) def read(self, path, columns=None, **kwargs): - path, _, _ = get_filepath_or_buffer(path) + path, _, _, should_close = get_filepath_or_buffer(path) if self._pyarrow_lt_070: - return self.api.parquet.read_pandas(path, columns=columns, - **kwargs).to_pandas() - kwargs['use_pandas_metadata'] = True - return self.api.parquet.read_table(path, columns=columns, - **kwargs).to_pandas() + result = self.api.parquet.read_pandas(path, columns=columns, + **kwargs).to_pandas() + else: + kwargs['use_pandas_metadata'] = True + result = self.api.parquet.read_table(path, columns=columns, + **kwargs).to_pandas() + if should_close: + try: + path.close() + except: # noqa: flake8 + pass + + return result def _validate_write_lt_070(self, df): # Compatibility shim for pyarrow < 0.7.0 @@ -199,11 +207,11 @@ def write(self, df, path, compression='snappy', **kwargs): # path is s3:// so we need to open the s3file in 'wb' mode. # TODO: Support 'ab' - path, _, _ = get_filepath_or_buffer(path, mode='wb') + path, _, _, _ = get_filepath_or_buffer(path, mode='wb') # And pass the opened s3file to the fastparquet internal impl. kwargs['open_with'] = lambda path, _: path else: - path, _, _ = get_filepath_or_buffer(path) + path, _, _, _ = get_filepath_or_buffer(path) with catch_warnings(record=True): self.api.write(path, df, @@ -214,13 +222,13 @@ def read(self, path, columns=None, **kwargs): # When path is s3:// an S3File is returned. # We need to retain the original path(str) while also # pass the S3File().open function to fsatparquet impl. - s3, _, _ = get_filepath_or_buffer(path) + s3, _, _, should_close = get_filepath_or_buffer(path) try: parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open) finally: s3.close() else: - path, _, _ = get_filepath_or_buffer(path) + path, _, _, _ = get_filepath_or_buffer(path) parquet_file = self.api.ParquetFile(path) return parquet_file.to_pandas(columns=columns, **kwargs) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index af1441f4a0fc9..7ea6d321e0fdd 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -413,7 +413,7 @@ def _read(filepath_or_buffer, kwds): compression = kwds.get('compression') compression = _infer_compression(filepath_or_buffer, compression) - filepath_or_buffer, _, compression = get_filepath_or_buffer( + filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( filepath_or_buffer, encoding, compression) kwds['compression'] = compression @@ -439,6 +439,13 @@ def _read(filepath_or_buffer, kwds): data = parser.read(nrows) finally: parser.close() + + if should_close: + try: + filepath_or_buffer.close() + except: # noqa: flake8 + pass + return data diff --git a/pandas/io/s3.py b/pandas/io/s3.py index e2650e29c0db3..bd2286c5c8569 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -27,7 +27,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, fs = s3fs.S3FileSystem(anon=False) try: filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) - except (OSError, NoCredentialsError): + except (compat.FileNotFoundError, NoCredentialsError): # boto3 has troubles when trying to access a public file # when credentialed... # An OSError is raised if you have credentials, but they @@ -36,4 +36,4 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, # for that bucket. fs = s3fs.S3FileSystem(anon=True) filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) - return filepath_or_buffer, None, compression + return filepath_or_buffer, None, compression, True diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 26e39f0df8b29..806cbddaa2ee2 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -90,7 +90,7 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self._current_row_on_page_index = 0 self._current_row_in_file_index = 0 - self._path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) + self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf) if isinstance(self._path_or_buf, compat.string_types): self._path_or_buf = open(self._path_or_buf, 'rb') self.handle = self._path_or_buf diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index c14524f7d7cd6..7994517b9f303 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -236,7 +236,8 @@ def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', self._chunksize = chunksize if isinstance(filepath_or_buffer, str): - filepath_or_buffer, encoding, compression = get_filepath_or_buffer( + (filepath_or_buffer, encoding, + compression, should_close) = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index ee6975ea1d938..9646831cb612c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -988,7 +988,7 @@ def __init__(self, path_or_buf, convert_dates=True, self._native_byteorder = _set_endianness(sys.byteorder) path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): - path_or_buf, encoding, _ = get_filepath_or_buffer( + path_or_buf, encoding, _, should_close = get_filepath_or_buffer( path_or_buf, encoding=self._default_encoding ) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 57e72da2fd3f4..8deb51e190bab 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -2,30 +2,34 @@ import pytest from pandas.io.parsers import read_table +from pandas.util import testing as tm -HERE = os.path.dirname(__file__) +@pytest.fixture +def parser_data(request): + return os.path.join(tm.get_data_path(), '..', 'parser', 'data') -@pytest.fixture(scope='module') -def tips_file(): + +@pytest.fixture +def tips_file(parser_data): """Path to the tips dataset""" - return os.path.join(HERE, 'parser', 'data', 'tips.csv') + return os.path.join(parser_data, 'tips.csv') -@pytest.fixture(scope='module') -def jsonl_file(): +@pytest.fixture +def jsonl_file(parser_data): """Path a JSONL dataset""" - return os.path.join(HERE, 'parser', 'data', 'items.jsonl') + return os.path.join(parser_data, 'items.jsonl') -@pytest.fixture(scope='module') -def salaries_table(): +@pytest.fixture +def salaries_table(parser_data): """DataFrame with the salaries dataset""" - path = os.path.join(HERE, 'parser', 'data', 'salaries.csv') + path = os.path.join(parser_data, 'salaries.csv') return read_table(path) -@pytest.fixture(scope='module') +@pytest.fixture def s3_resource(tips_file, jsonl_file): """Fixture for mocking S3 interaction. @@ -41,8 +45,8 @@ def s3_resource(tips_file, jsonl_file): is yielded by the fixture. """ pytest.importorskip('s3fs') + boto3 = pytest.importorskip('boto3') moto = pytest.importorskip('moto') - moto.mock_s3().start() test_s3_files = [ ('tips.csv', tips_file), @@ -58,17 +62,22 @@ def add_tips_files(bucket_name): Key=s3_key, Body=f) - boto3 = pytest.importorskip('boto3') - # see gh-16135 - bucket = 'pandas-test' + try: - conn = boto3.resource("s3", region_name="us-east-1") - conn.create_bucket(Bucket=bucket) - add_tips_files(bucket) + s3 = moto.mock_s3() + s3.start() - conn.create_bucket(Bucket='cant_get_it', ACL='private') - add_tips_files('cant_get_it') + # see gh-16135 + bucket = 'pandas-test' + conn = boto3.resource("s3", region_name="us-east-1") - yield conn + conn.create_bucket(Bucket=bucket) + add_tips_files(bucket) - moto.mock_s3().stop() + conn.create_bucket(Bucket='cant_get_it', ACL='private') + add_tips_files('cant_get_it') + yield conn + except: # noqa: flake8 + pytest.skip("failure to use s3 resource") + finally: + s3.stop() diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 10139eb07a925..a72744e08fa7c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1039,7 +1039,6 @@ def test_read_inline_jsonl(self): assert_frame_equal(result, expected) def test_read_s3_jsonl(self, s3_resource): - pytest.importorskip('s3fs') # GH17200 result = read_json('s3n://pandas-test/items.jsonl', lines=True) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 10f6cef04b593..f16338fda6245 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -46,6 +46,7 @@ def check_compressed_urls(salaries_table, compression, extension, mode, class TestS3(object): + @tm.network def test_parse_public_s3_bucket(self): pytest.importorskip('s3fs') @@ -65,7 +66,8 @@ def test_parse_public_s3_bucket(self): assert not df.empty tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df) - def test_parse_public_s3n_bucket(self, s3_resource): + @tm.network + def test_parse_public_s3n_bucket(self): # Read from AWS s3 as "s3n" URL df = read_csv('s3n://pandas-test/tips.csv', nrows=10) @@ -74,7 +76,8 @@ def test_parse_public_s3n_bucket(self, s3_resource): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - def test_parse_public_s3a_bucket(self, s3_resource): + @tm.network + def test_parse_public_s3a_bucket(self): # Read from AWS s3 as "s3a" URL df = read_csv('s3a://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) @@ -82,7 +85,8 @@ def test_parse_public_s3a_bucket(self, s3_resource): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - def test_parse_public_s3_bucket_nrows(self, s3_resource): + @tm.network + def test_parse_public_s3_bucket_nrows(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, nrows=10, compression=comp) @@ -91,7 +95,8 @@ def test_parse_public_s3_bucket_nrows(self, s3_resource): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - def test_parse_public_s3_bucket_chunked(self, s3_resource): + @tm.network + def test_parse_public_s3_bucket_chunked(self): # Read with a chunksize chunksize = 5 local_tips = read_csv(tm.get_data_path('tips.csv')) @@ -109,7 +114,8 @@ def test_parse_public_s3_bucket_chunked(self, s3_resource): chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - def test_parse_public_s3_bucket_chunked_python(self, s3_resource): + @tm.network + def test_parse_public_s3_bucket_chunked_python(self): # Read with a chunksize using the Python parser chunksize = 5 local_tips = read_csv(tm.get_data_path('tips.csv')) @@ -127,7 +133,8 @@ def test_parse_public_s3_bucket_chunked_python(self, s3_resource): chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - def test_parse_public_s3_bucket_python(self, s3_resource): + @tm.network + def test_parse_public_s3_bucket_python(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression=comp) @@ -136,7 +143,8 @@ def test_parse_public_s3_bucket_python(self, s3_resource): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) - def test_infer_s3_compression(self, s3_resource): + @tm.network + def test_infer_s3_compression(self): for ext in ['', '.gz', '.bz2']: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression='infer') @@ -145,7 +153,8 @@ def test_infer_s3_compression(self, s3_resource): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) - def test_parse_public_s3_bucket_nrows_python(self, s3_resource): + @tm.network + def test_parse_public_s3_bucket_nrows_python(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', nrows=10, compression=comp) @@ -154,7 +163,8 @@ def test_parse_public_s3_bucket_nrows_python(self, s3_resource): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - def test_s3_fails(self, s3_resource): + @tm.network + def test_s3_fails(self): with pytest.raises(IOError): read_csv('s3://nyqpug/asdf.csv') diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a0070dce6a7f1..a89156db38ae3 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -102,15 +102,19 @@ def test_infer_compression_from_path(self, extension, expected, path_type): def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' - filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) + filepath_or_buffer, _, _, should_close = common.get_filepath_or_buffer( + filename) assert filepath_or_buffer != filename assert isabs(filepath_or_buffer) assert os.path.expanduser(filename) == filepath_or_buffer + assert not should_close def test_get_filepath_or_buffer_with_buffer(self): input_buffer = StringIO() - filepath_or_buffer, _, _ = common.get_filepath_or_buffer(input_buffer) + filepath_or_buffer, _, _, should_close = common.get_filepath_or_buffer( + input_buffer) assert filepath_or_buffer == input_buffer + assert not should_close def test_iterator(self): reader = read_csv(StringIO(self.data1), chunksize=1) From 89a5df27e89ab6fad370d4d02a772a8d28faa5fb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 12 Feb 2018 16:19:23 -0800 Subject: [PATCH 103/217] De-duplicate masking/fallback logic in ops (#19613) --- pandas/core/frame.py | 12 +---- pandas/core/ops.py | 109 +++++++++++++++++++++++++++++------------- pandas/core/series.py | 15 +----- 3 files changed, 78 insertions(+), 58 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 23579d84a3964..2782ee7b9d201 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3943,17 +3943,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): new_index, new_columns = this.index, this.columns def _arith_op(left, right): - if fill_value is not None: - left_mask = isna(left) - right_mask = isna(right) - left = left.copy() - right = right.copy() - - # one but not both - mask = left_mask ^ right_mask - left[left_mask & mask] = fill_value - right[right_mask & mask] = fill_value - + left, right = ops.fill_binop(left, right, fill_value) return func(left, right) if this._is_mixed_type or other._is_mixed_type: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index effa35695fcd1..4c234ccb4dd47 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -398,6 +398,79 @@ def _make_flex_doc(op_name, typ): return doc +# ----------------------------------------------------------------------------- +# Masking NA values and fallbacks for operations numpy does not support + +def fill_binop(left, right, fill_value): + """ + If a non-None fill_value is given, replace null entries in left and right + with this value, but only in positions where _one_ of left/right is null, + not both. + + Parameters + ---------- + left : array-like + right : array-like + fill_value : object + + Returns + ------- + left : array-like + right : array-like + + Notes + ----- + Makes copies if fill_value is not None + """ + # TODO: can we make a no-copy implementation? + if fill_value is not None: + left_mask = isna(left) + right_mask = isna(right) + left = left.copy() + right = right.copy() + + # one but not both + mask = left_mask ^ right_mask + left[left_mask & mask] = fill_value + right[right_mask & mask] = fill_value + return left, right + + +def mask_cmp_op(x, y, op, allowed_types): + """ + Apply the function `op` to only non-null points in x and y. + + Parameters + ---------- + x : array-like + y : array-like + op : binary operation + allowed_types : class or tuple of classes + + Returns + ------- + result : ndarray[bool] + """ + # TODO: Can we make the allowed_types arg unnecessary? + xrav = x.ravel() + result = np.empty(x.size, dtype=bool) + if isinstance(y, allowed_types): + yrav = y.ravel() + mask = notna(xrav) & notna(yrav) + result[mask] = op(np.array(list(xrav[mask])), + np.array(list(yrav[mask]))) + else: + mask = notna(xrav) + result[mask] = op(np.array(list(xrav[mask])), y) + + if op == operator.ne: # pragma: no cover + np.putmask(result, ~mask, True) + else: + np.putmask(result, ~mask, False) + result = result.reshape(x.shape) + return result + + # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods @@ -1127,23 +1200,7 @@ def na_op(x, y): with np.errstate(invalid='ignore'): result = op(x, y) except TypeError: - xrav = x.ravel() - result = np.empty(x.size, dtype=bool) - if isinstance(y, (np.ndarray, ABCSeries)): - yrav = y.ravel() - mask = notna(xrav) & notna(yrav) - result[mask] = op(np.array(list(xrav[mask])), - np.array(list(yrav[mask]))) - else: - mask = notna(xrav) - result[mask] = op(np.array(list(xrav[mask])), y) - - if op == operator.ne: # pragma: no cover - np.putmask(result, ~mask, True) - else: - np.putmask(result, ~mask, False) - result = result.reshape(x.shape) - + result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries)) return result @Appender('Wrapper for flexible comparison methods {name}' @@ -1221,23 +1278,7 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y) except TypeError: - xrav = x.ravel() - result = np.empty(x.size, dtype=bool) - if isinstance(y, np.ndarray): - yrav = y.ravel() - mask = notna(xrav) & notna(yrav) - result[mask] = op(np.array(list(xrav[mask])), - np.array(list(yrav[mask]))) - else: - mask = notna(xrav) - result[mask] = op(np.array(list(xrav[mask])), y) - - if op == operator.ne: # pragma: no cover - np.putmask(result, ~mask, True) - else: - np.putmask(result, ~mask, False) - result = result.reshape(x.shape) - + result = mask_cmp_op(x, y, op, np.ndarray) return result @Appender('Wrapper for comparison method {name}'.format(name=name)) diff --git a/pandas/core/series.py b/pandas/core/series.py index e4b8979d6393a..655eaa5373f5a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1725,19 +1725,8 @@ def _binop(self, other, func, level=None, fill_value=None): copy=False) new_index = this.index - this_vals = this.values - other_vals = other.values - - if fill_value is not None: - this_mask = isna(this_vals) - other_mask = isna(other_vals) - this_vals = this_vals.copy() - other_vals = other_vals.copy() - - # one but not both - mask = this_mask ^ other_mask - this_vals[this_mask & mask] = fill_value - other_vals[other_mask & mask] = fill_value + this_vals, other_vals = ops.fill_binop(this.values, other.values, + fill_value) with np.errstate(all='ignore'): result = func(this_vals, other_vals) From 541b5e5936322ad9a2a0a46331ecc48b7f16f8ba Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 13 Feb 2018 08:50:54 -0600 Subject: [PATCH 104/217] REF: Internal / External values (#19558) * REF/Clean: Internal / External values * Move to index base * Cleanup unique handling * Simplify object concat * Use values for intersection I think eventually we'll want to ndarray_values for this, but it'll require a bit more work to support. Currently, using ndarary_values causes occasional failures on categorical. * hmm * Additional testing * More tests * ndarray_values * API: Default ExtensionArray.astype (cherry picked from commit 943a915562b72bed147c857de927afa0daf31c1a) (cherry picked from commit fbf0a0672380e210d3cb3c527fa8045a204d81be) * Simplify concat_as_object * Py2 compat (cherry picked from commit b20e12cae68dd86ff51597464045656763d369f7) * Set-ops ugliness * better docstrings * tolist * linting * Moved dtypes (cherry picked from commit d1362271bca8a7b183f3241e5c2f040c422118b8) * clean * cleanup * NumPy compat * Use base _values for CategoricalIndex * Update dev docs * cleanup * Linting * Precision in tests * Push _ndarray_values to ExtensionArray Now IndexOpsMixin._ndarray_values will dispatch all the way down to the EA. Subclasses like Categorical can override it as they see fit. * Clean up tolist * Move test locations * Fixed test * REF: Update per comments * lint * REF: Use _values for size and shape * PERF: Implement size, shape for IntervalIndex * PERF: Avoid materializing values for PeriodIndex shape, size * Cleanup * Override nbytes --- doc/source/internals.rst | 19 +++ pandas/core/arrays/base.py | 12 ++ pandas/core/arrays/categorical.py | 4 + pandas/core/base.py | 21 +++- pandas/core/dtypes/cast.py | 2 +- pandas/core/dtypes/common.py | 2 +- pandas/core/dtypes/concat.py | 6 +- pandas/core/indexes/base.py | 108 ++++++++++++++---- pandas/core/indexes/category.py | 9 +- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/datetimes.py | 43 +++++++ pandas/core/indexes/interval.py | 10 ++ pandas/core/indexes/multi.py | 34 +++--- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/period.py | 49 +++++--- pandas/io/pytables.py | 2 +- pandas/plotting/_converter.py | 6 +- pandas/tests/indexes/common.py | 6 +- .../tests/indexes/datetimes/test_datetime.py | 9 ++ .../tests/indexes/period/test_construction.py | 4 +- pandas/tests/indexes/period/test_period.py | 6 +- pandas/tests/indexes/period/test_tools.py | 2 +- pandas/tests/indexes/test_category.py | 8 ++ pandas/tests/indexes/test_multi.py | 47 ++++++++ pandas/tests/test_base.py | 58 +++++++++- 25 files changed, 386 insertions(+), 85 deletions(-) diff --git a/doc/source/internals.rst b/doc/source/internals.rst index ee4df879d9478..957f82fd9eba7 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -89,6 +89,25 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but if you compute the levels and labels yourself, please be careful. +Values +~~~~~~ + +Pandas extends NumPy's type system with custom types, like ``Categorical`` or +datetimes with a timezone, so we have multiple notions of "values". For 1-D +containers (``Index`` classes and ``Series``) we have the following convention: + +* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally, + ``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``, + this returns the codes, not the array of objects. +* ``cls._values`` refers is the "best possible" array. This could be an + ``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the + process of removing the index subclasses here so that it's always an + ``ndarray`` or ``ExtensionArray``). + +So, for example, ``Series[category]._values`` is a ``Categorical``, while +``Series[category]._ndarray_values`` is the underlying codes. + + .. _ref-subclassing-pandas: Subclassing pandas Data Structures diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 553e1e0ac2066..e618dc6b69b2d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -266,3 +266,15 @@ def _can_hold_na(self): Setting this to false will optimize some operations like fillna. """ return True + + @property + def _ndarray_values(self): + # type: () -> np.ndarray + """Internal pandas method for lossy conversion to a NumPy ndarray. + + This method is not part of the pandas interface. + + The expectation is that this is cheap to compute, and is primarily + used for interacting with our indexers. + """ + return np.array(self) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 93250bdbb5054..bcf9cb7646704 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -410,6 +410,10 @@ def dtype(self): """The :class:`~pandas.api.types.CategoricalDtype` for this instance""" return self._dtype + @property + def _ndarray_values(self): + return self.codes + @property def _constructor(self): return Categorical diff --git a/pandas/core/base.py b/pandas/core/base.py index 3d8f5f265e3db..0ca029ffd4c25 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -13,7 +13,8 @@ is_list_like, is_scalar, is_datetimelike, - is_extension_type) + is_extension_type, + is_extension_array_dtype) from pandas.util._validators import validate_bool_kwarg @@ -738,7 +739,7 @@ def data(self): @property def itemsize(self): """ return the size of the dtype of the item of the underlying data """ - return self._values.itemsize + return self._ndarray_values.itemsize @property def nbytes(self): @@ -748,7 +749,7 @@ def nbytes(self): @property def strides(self): """ return the strides of the underlying data """ - return self._values.strides + return self._ndarray_values.strides @property def size(self): @@ -768,8 +769,17 @@ def base(self): return self.values.base @property - def _values(self): - """ the internal implementation """ + def _ndarray_values(self): + """The data as an ndarray, possibly losing information. + + The expectation is that this is cheap to compute, and is primarily + used for interacting with our indexers. + + - categorical -> codes + """ + # type: () -> np.ndarray + if is_extension_array_dtype(self): + return self.values._ndarray_values return self.values @property @@ -979,6 +989,7 @@ def unique(self): values = self._values if hasattr(values, 'unique'): + result = values.unique() else: from pandas.core.algorithms import unique1d diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b2816343fc8eb..55919fb2bea0d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -927,7 +927,7 @@ def try_timedelta(v): # will try first with a string & object conversion from pandas import to_timedelta try: - return to_timedelta(v)._values.reshape(shape) + return to_timedelta(v)._ndarray_values.reshape(shape) except Exception: return v.reshape(shape) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index c66e7fcfc6978..c2b71bc316fe8 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1709,7 +1709,7 @@ def is_extension_array_dtype(arr_or_dtype): from pandas.core.arrays import ExtensionArray # we want to unpack series, anything else? - if isinstance(arr_or_dtype, ABCSeries): + if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)): arr_or_dtype = arr_or_dtype._values return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray)) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ddecbe85087d8..d306d0d78f1f4 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -488,12 +488,14 @@ def _concat_index_asobject(to_concat, name=None): concat all inputs as object. DatetimeIndex, TimedeltaIndex and PeriodIndex are converted to object dtype before concatenation """ + from pandas import Index + from pandas.core.arrays import ExtensionArray - klasses = ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex + klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, + ExtensionArray) to_concat = [x.astype(object) if isinstance(x, klasses) else x for x in to_concat] - from pandas import Index self = to_concat[0] attribs = self._get_attributes_dict() attribs['name'] = name diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 15df77bf772dc..be7c1624936bf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -31,12 +31,14 @@ is_object_dtype, is_categorical_dtype, is_interval_dtype, + is_period_dtype, is_bool, is_bool_dtype, is_signed_integer_dtype, is_unsigned_integer_dtype, is_integer_dtype, is_float_dtype, is_datetime64_any_dtype, + is_datetime64tz_dtype, is_timedelta64_dtype, needs_i8_conversion, is_iterator, is_list_like, @@ -412,7 +414,7 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs): values = np.array(values, copy=False) if is_object_dtype(values): values = cls(values, name=name, dtype=dtype, - **kwargs)._values + **kwargs)._ndarray_values result = object.__new__(cls) result._data = values @@ -594,6 +596,40 @@ def values(self): """ return the underlying data as an ndarray """ return self._data.view(np.ndarray) + @property + def _values(self): + # type: () -> Union[ExtensionArray, Index] + # TODO(EA): remove index types as they become extension arrays + """The best array representation. + + This is an ndarray, ExtensionArray, or Index subclass. This differs + from ``_ndarray_values``, which always returns an ndarray. + + Both ``_values`` and ``_ndarray_values`` are consistent between + ``Series`` and ``Index``. + + It may differ from the public '.values' method. + + index | values | _values | _ndarray_values | + ----------------- | -------------- -| ----------- | --------------- | + CategoricalIndex | Categorical | Categorical | codes | + DatetimeIndex[tz] | ndarray[M8ns] | DTI[tz] | ndarray[M8ns] | + + For the following, the ``._values`` is currently ``ndarray[object]``, + but will soon be an ``ExtensionArray`` + + index | values | _values | _ndarray_values | + ----------------- | --------------- | ------------ | --------------- | + PeriodIndex | ndarray[object] | ndarray[obj] | ndarray[int] | + IntervalIndex | ndarray[object] | ndarray[obj] | ndarray[object] | + + See Also + -------- + values + _ndarray_values + """ + return self.values + def get_values(self): """ return the underlying data as an ndarray """ return self.values @@ -664,7 +700,7 @@ def ravel(self, order='C'): -------- numpy.ndarray.ravel """ - return self._values.ravel(order=order) + return self._ndarray_values.ravel(order=order) # construction helpers @classmethod @@ -1597,7 +1633,7 @@ def _constructor(self): @cache_readonly def _engine(self): # property, for now, slow to look up - return self._engine_type(lambda: self._values, len(self)) + return self._engine_type(lambda: self._ndarray_values, len(self)) def _validate_index_level(self, level): """ @@ -2228,27 +2264,37 @@ def union(self, other): other = other.astype('O') return this.union(other) + # TODO(EA): setops-refactor, clean all this up + if is_period_dtype(self) or is_datetime64tz_dtype(self): + lvals = self._ndarray_values + else: + lvals = self._values + if is_period_dtype(other) or is_datetime64tz_dtype(other): + rvals = other._ndarray_values + else: + rvals = other._values + if self.is_monotonic and other.is_monotonic: try: - result = self._outer_indexer(self._values, other._values)[0] + result = self._outer_indexer(lvals, rvals)[0] except TypeError: # incomparable objects - result = list(self._values) + result = list(lvals) # worth making this faster? a very unusual case - value_set = set(self._values) - result.extend([x for x in other._values if x not in value_set]) + value_set = set(lvals) + result.extend([x for x in rvals if x not in value_set]) else: indexer = self.get_indexer(other) indexer, = (indexer == -1).nonzero() if len(indexer) > 0: - other_diff = algos.take_nd(other._values, indexer, + other_diff = algos.take_nd(rvals, indexer, allow_fill=False) - result = _concat._concat_compat((self._values, other_diff)) + result = _concat._concat_compat((lvals, other_diff)) try: - self._values[0] < other_diff[0] + lvals[0] < other_diff[0] except TypeError as e: warnings.warn("%s, sort order is undefined for " "incomparable objects" % e, RuntimeWarning, @@ -2260,7 +2306,7 @@ def union(self, other): result.sort() else: - result = self._values + result = lvals try: result = np.sort(result) @@ -2311,20 +2357,30 @@ def intersection(self, other): other = other.astype('O') return this.intersection(other) + # TODO(EA): setops-refactor, clean all this up + if is_period_dtype(self): + lvals = self._ndarray_values + else: + lvals = self._values + if is_period_dtype(other): + rvals = other._ndarray_values + else: + rvals = other._values + if self.is_monotonic and other.is_monotonic: try: - result = self._inner_indexer(self._values, other._values)[0] + result = self._inner_indexer(lvals, rvals)[0] return self._wrap_union_result(other, result) except TypeError: pass try: - indexer = Index(other._values).get_indexer(self._values) + indexer = Index(rvals).get_indexer(lvals) indexer = indexer.take((indexer != -1).nonzero()[0]) except Exception: # duplicates indexer = algos.unique1d( - Index(other._values).get_indexer_non_unique(self._values)[0]) + Index(rvals).get_indexer_non_unique(lvals)[0]) indexer = indexer[indexer != -1] taken = other.take(indexer) @@ -2700,7 +2756,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): raise ValueError('limit argument only valid if doing pad, ' 'backfill or nearest reindexing') - indexer = self._engine.get_indexer(target._values) + indexer = self._engine.get_indexer(target._ndarray_values) return _ensure_platform_int(indexer) @@ -2716,12 +2772,13 @@ def _get_fill_indexer(self, target, method, limit=None, tolerance=None): if self.is_monotonic_increasing and target.is_monotonic_increasing: method = (self._engine.get_pad_indexer if method == 'pad' else self._engine.get_backfill_indexer) - indexer = method(target._values, limit) + indexer = method(target._ndarray_values, limit) else: indexer = self._get_fill_indexer_searchsorted(target, method, limit) if tolerance is not None: - indexer = self._filter_indexer_tolerance(target._values, indexer, + indexer = self._filter_indexer_tolerance(target._ndarray_values, + indexer, tolerance) return indexer @@ -2812,7 +2869,7 @@ def get_indexer_non_unique(self, target): self = Index(self.asi8) tgt_values = target.asi8 else: - tgt_values = target._values + tgt_values = target._ndarray_values indexer, missing = self._engine.get_indexer_non_unique(tgt_values) return _ensure_platform_int(indexer), missing @@ -3247,16 +3304,17 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how='left', return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers - left_idx, right_idx = _get_join_indexers([self._values], - [other._values], how=how, + left_idx, right_idx = _get_join_indexers([self._ndarray_values], + [other._ndarray_values], + how=how, sort=True) left_idx = _ensure_platform_int(left_idx) right_idx = _ensure_platform_int(right_idx) - join_index = np.asarray(self._values.take(left_idx)) + join_index = np.asarray(self._ndarray_values.take(left_idx)) mask = left_idx == -1 - np.putmask(join_index, mask, other._values.take(right_idx)) + np.putmask(join_index, mask, other._ndarray_values.take(right_idx)) join_index = self._wrap_joined_index(join_index, other) @@ -3403,8 +3461,8 @@ def _join_monotonic(self, other, how='left', return_indexers=False): else: return ret_index - sv = self._values - ov = other._values + sv = self._ndarray_values + ov = other._ndarray_values if self.is_unique and other.is_unique: # We can perform much better than the general case @@ -3756,7 +3814,7 @@ def insert(self, loc, item): item = self._na_value _self = np.asarray(self) - item = self._coerce_scalar_to_index(item)._values + item = self._coerce_scalar_to_index(item)._ndarray_values idx = np.concatenate((_self[:loc], item, _self[loc:])) return self._shallow_copy_with_infer(idx) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 60f5552576ea1..a4d0f787cc6ec 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -293,6 +293,11 @@ def values(self): """ return the underlying data, which is a Categorical """ return self._data + @property + def itemsize(self): + # Size of the items in categories, not codes. + return self.values.itemsize + def get_values(self): """ return the underlying data as an ndarray """ return self._data.get_values() @@ -386,8 +391,8 @@ def is_monotonic_decreasing(self): def unique(self, level=None): if level is not None: self._validate_index_level(level) - result = base.IndexOpsMixin.unique(self) - # CategoricalIndex._shallow_copy uses keeps original categories + result = self.values.unique() + # CategoricalIndex._shallow_copy keeps original categories # and ordered if not otherwise specified return self._shallow_copy(result, categories=result.categories, ordered=result.ordered) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 4a526955d9bf4..c98f8ceea0ffa 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -376,7 +376,7 @@ def sort_values(self, return_indexer=False, ascending=True): sorted_index = self.take(_as) return sorted_index, _as else: - sorted_values = np.sort(self._values) + sorted_values = np.sort(self._ndarray_values) attribs = self._get_attributes_dict() freq = attribs['freq'] diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 61c941c3d2333..cc9ce1f3fd5eb 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -678,6 +678,15 @@ def _assert_tzawareness_compat(self, other): raise TypeError('Cannot compare tz-naive and tz-aware ' 'datetime-like objects') + @property + def _values(self): + # tz-naive -> ndarray + # tz-aware -> DatetimeIndex + if self.tz is not None: + return self + else: + return self.values + @property def tzinfo(self): """ @@ -685,6 +694,27 @@ def tzinfo(self): """ return self.tz + @property + def size(self): + # TODO: Remove this when we have a DatetimeTZArray + # Necessary to avoid recursion error since DTI._values is a DTI + # for TZ-aware + return self._ndarray_values.size + + @property + def shape(self): + # TODO: Remove this when we have a DatetimeTZArray + # Necessary to avoid recursion error since DTI._values is a DTI + # for TZ-aware + return self._ndarray_values.shape + + @property + def nbytes(self): + # TODO: Remove this when we have a DatetimeTZArray + # Necessary to avoid recursion error since DTI._values is a DTI + # for TZ-aware + return self._ndarray_values.nbytes + @cache_readonly def _timezone(self): """ Comparable timezone both for pytz / dateutil""" @@ -1086,6 +1116,19 @@ def snap(self, freq='S'): # we know it conforms; skip check return DatetimeIndex(snapped, freq=freq, verify_integrity=False) + def unique(self, level=None): + # Override here since IndexOpsMixin.unique uses self._values.unique + # For DatetimeIndex with TZ, that's a DatetimeIndex -> recursion error + # So we extract the tz-naive DatetimeIndex, unique that, and wrap the + # result with out TZ. + if self.tz is not None: + naive = type(self)(self._ndarray_values, copy=False) + else: + naive = self + result = super(DatetimeIndex, naive).unique(level=level) + return self._simple_new(result, name=self.name, tz=self.tz, + freq=self.freq) + def union(self, other): """ Specialized union for DatetimeIndex objects. If combine diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3bf783b5a2faa..d431ea1e51e31 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -680,6 +680,16 @@ def length(self): 'e.g. Intervals with string endpoints') raise TypeError(msg) + @property + def size(self): + # Avoid materializing self.values + return self.left.size + + @property + def shape(self): + # Avoid materializing self.values + return self.left.shape + def __len__(self): return len(self.left) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 510f7245cebd8..94dbd8b884e47 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -799,9 +799,11 @@ def values(self): box = hasattr(lev, '_box_values') # Try to minimize boxing. if box and len(lev) > len(lab): - taken = lev._box_values(algos.take_1d(lev._values, lab)) + taken = lev._box_values(algos.take_1d(lev._ndarray_values, + lab)) elif box: - taken = algos.take_1d(lev._box_values(lev._values), lab, + taken = algos.take_1d(lev._box_values(lev._ndarray_values), + lab, fill_value=_get_na_value(lev.dtype.type)) else: taken = algos.take_1d(np.asarray(lev._values), lab) @@ -2410,7 +2412,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): mapper = Series(indexer) indexer = labels.take(_ensure_platform_int(indexer)) result = Series(Index(indexer).isin(r).nonzero()[0]) - m = result.map(mapper)._values + m = result.map(mapper)._ndarray_values else: m = np.zeros(len(labels), dtype=bool) @@ -2505,6 +2507,7 @@ def get_locs(self, seq): MultiIndex.slice_locs : Get slice location given start label(s) and end label(s). """ + from .numeric import Int64Index # must be lexsorted to at least as many levels true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] @@ -2530,7 +2533,6 @@ def _convert_to_indexer(r): "that is not the same length as the " "index") r = r.nonzero()[0] - from .numeric import Int64Index return Int64Index(r) def _update_indexer(idxr, indexer=indexer): @@ -2567,9 +2569,8 @@ def _update_indexer(idxr, indexer=indexer): if indexers is not None: indexer = _update_indexer(indexers, indexer=indexer) else: - from .numeric import Int64Index # no matches we are done - return Int64Index([])._values + return Int64Index([])._ndarray_values elif com.is_null_slice(k): # empty slice @@ -2589,8 +2590,8 @@ def _update_indexer(idxr, indexer=indexer): # empty indexer if indexer is None: - return Int64Index([])._values - return indexer._values + return Int64Index([])._ndarray_values + return indexer._ndarray_values def truncate(self, before=None, after=None): """ @@ -2639,7 +2640,7 @@ def equals(self, other): if not isinstance(other, MultiIndex): other_vals = com._values_from_object(_ensure_index(other)) - return array_equivalent(self._values, other_vals) + return array_equivalent(self._ndarray_values, other_vals) if self.nlevels != other.nlevels: return False @@ -2655,8 +2656,9 @@ def equals(self, other): olabels = other.labels[i] olabels = olabels[olabels != -1] - ovalues = algos.take_nd(np.asarray(other.levels[i]._values), - olabels, allow_fill=False) + ovalues = algos.take_nd( + np.asarray(other.levels[i]._values), + olabels, allow_fill=False) # since we use NaT both datetime64 and timedelta64 # we can have a situation where a level is typed say @@ -2704,7 +2706,8 @@ def union(self, other): if len(other) == 0 or self.equals(other): return self - uniq_tuples = lib.fast_unique_multiple([self._values, other._values]) + uniq_tuples = lib.fast_unique_multiple([self._ndarray_values, + other._ndarray_values]) return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) @@ -2726,8 +2729,8 @@ def intersection(self, other): if self.equals(other): return self - self_tuples = self._values - other_tuples = other._values + self_tuples = self._ndarray_values + other_tuples = other._ndarray_values uniq_tuples = sorted(set(self_tuples) & set(other_tuples)) if len(uniq_tuples) == 0: return MultiIndex(levels=[[]] * self.nlevels, @@ -2756,7 +2759,8 @@ def difference(self, other): labels=[[]] * self.nlevels, names=result_names, verify_integrity=False) - difference = sorted(set(self._values) - set(other._values)) + difference = sorted(set(self._ndarray_values) - + set(other._ndarray_values)) if len(difference) == 0: return MultiIndex(levels=[[]] * self.nlevels, diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index b02aee0495d8c..a4558116bfa63 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -378,7 +378,7 @@ def equals(self, other): if (not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape): return False - left, right = self._values, other._values + left, right = self._ndarray_values, other._ndarray_values return ((left == right) | (self._isnan & other._isnan)).all() except (TypeError, ValueError): return False diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 1f8542ed5ee60..8f2d7d382a16e 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -54,7 +54,7 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - result = get_period_field_arr(alias, self._values, base) + result = get_period_field_arr(alias, self._ndarray_values, base) return Index(result, name=self.name) f.__name__ = name f.__doc__ = docstring @@ -82,7 +82,7 @@ def _period_index_cmp(opname, cls, nat_result=False): def wrapper(self, other): if isinstance(other, Period): - func = getattr(self._values, opname) + func = getattr(self._ndarray_values, opname) other_base, _ = _gfc(other.freq) if other.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) @@ -94,7 +94,8 @@ def wrapper(self, other): msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - result = getattr(self._values, opname)(other._values) + op = getattr(self._ndarray_values, opname) + result = op(other._ndarray_values) mask = self._isnan | other._isnan if mask.any(): @@ -102,11 +103,11 @@ def wrapper(self, other): return result elif other is tslib.NaT: - result = np.empty(len(self._values), dtype=bool) + result = np.empty(len(self._ndarray_values), dtype=bool) result.fill(nat_result) else: other = Period(other, freq=self.freq) - func = getattr(self._values, opname) + func = getattr(self._ndarray_values, opname) result = func(other.ordinal) if self.hasnans: @@ -275,11 +276,11 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: # no freq change freq = data.freq - data = data._values + data = data._ndarray_values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) - data = period.period_asfreq_arr(data._values, + data = period.period_asfreq_arr(data._ndarray_values, base1, base2, 1) return cls._simple_new(data, name=name, freq=freq) @@ -374,7 +375,7 @@ def _shallow_copy(self, values=None, freq=None, **kwargs): if freq is None: freq = self.freq if values is None: - values = self._values + values = self._ndarray_values return super(PeriodIndex, self)._shallow_copy(values=values, freq=freq, **kwargs) @@ -407,7 +408,7 @@ def __contains__(self, key): @property def asi8(self): - return self._values.view('i8') + return self._ndarray_values.view('i8') @cache_readonly def _int64index(self): @@ -418,7 +419,8 @@ def values(self): return self.astype(object).values @property - def _values(self): + def _ndarray_values(self): + # Ordinals return self._data def __array__(self, dtype=None): @@ -475,6 +477,16 @@ def _to_embed(self, keep_tz=False, dtype=None): return self.astype(object).values + @property + def size(self): + # Avoid materializing self._values + return self._ndarray_values.size + + @property + def shape(self): + # Avoid materializing self._values + return self._ndarray_values.shape + @property def _formatter_func(self): return lambda x: "'%s'" % x @@ -489,13 +501,15 @@ def asof_locs(self, where, mask): if isinstance(where_idx, DatetimeIndex): where_idx = PeriodIndex(where_idx.values, freq=self.freq) - locs = self._values[mask].searchsorted(where_idx._values, side='right') + locs = self._ndarray_values[mask].searchsorted( + where_idx._ndarray_values, side='right') locs = np.where(locs > 0, locs - 1, 0) result = np.arange(len(self))[mask].take(locs) first = mask.argmax() - result[(locs == 0) & (where_idx._values < self._values[first])] = -1 + result[(locs == 0) & (where_idx._ndarray_values < + self._ndarray_values[first])] = -1 return result @@ -523,7 +537,8 @@ def searchsorted(self, value, side='left', sorter=None): elif isinstance(value, compat.string_types): value = Period(value, freq=self.freq).ordinal - return self._values.searchsorted(value, side=side, sorter=sorter) + return self._ndarray_values.searchsorted(value, side=side, + sorter=sorter) @property def is_all_dates(self): @@ -664,7 +679,7 @@ def to_timestamp(self, freq=None, how='start'): base, mult = _gfc(freq) new_data = self.asfreq(freq, how) - new_data = period.periodarr_to_dt64arr(new_data._values, base) + new_data = period.periodarr_to_dt64arr(new_data._ndarray_values, base) return DatetimeIndex(new_data, freq='infer', name=self.name) def _maybe_convert_timedelta(self, other): @@ -744,7 +759,7 @@ def shift(self, n): ------- shifted : PeriodIndex """ - values = self._values + n * self.freq.n + values = self._ndarray_values + n * self.freq.n if self.hasnans: values[self._isnan] = tslib.iNaT return self._shallow_copy(values=values) @@ -775,7 +790,7 @@ def get_value(self, series, key): grp = resolution.Resolution.get_freq_group(reso) freqn = resolution.get_freq_group(self.freq) - vals = self._values + vals = self._ndarray_values # if our data is higher resolution than requested key, slice if grp < freqn: @@ -786,7 +801,7 @@ def get_value(self, series, key): if ord2 < vals[0] or ord1 > vals[-1]: raise KeyError(key) - pos = np.searchsorted(self._values, [ord1, ord2]) + pos = np.searchsorted(self._ndarray_values, [ord1, ord2]) key = slice(pos[0], pos[1] + 1) return series[key] elif grp == freqn: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 0d833807602e1..2437b7d396e84 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4430,7 +4430,7 @@ def _convert_index(index, encoding=None, format_type=None): elif isinstance(index, (Int64Index, PeriodIndex)): atom = _tables().Int64Col() # avoid to store ndarray of Period objects - return IndexCol(index._values, 'integer', atom, + return IndexCol(index._ndarray_values, 'integer', atom, freq=getattr(index, 'freq', None), index_name=index_name) diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 07163615c6ba4..9ca06475290e4 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -249,11 +249,11 @@ def _convert_1d(values, units, axis): is_float(values)): return get_datevalue(values, axis.freq) if isinstance(values, PeriodIndex): - return values.asfreq(axis.freq)._values + return values.asfreq(axis.freq)._ndarray_values if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) if is_period_arraylike(values): - return PeriodIndex(values, freq=axis.freq)._values + return PeriodIndex(values, freq=axis.freq)._ndarray_values if isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values @@ -642,7 +642,7 @@ def _daily_finder(vmin, vmax, freq): info = np.zeros(span, dtype=[('val', np.int64), ('maj', bool), ('min', bool), ('fmt', '|S20')]) - info['val'][:] = dates_._values + info['val'][:] = dates_._ndarray_values info['fmt'][:] = '' info['maj'][[0, -1]] = True # .. and set some shortcuts diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 8948c5f79900d..2d8d70aa2ac84 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -314,7 +314,8 @@ def test_ensure_copied_data(self): # .values an object array of Period, thus copied result = index_type(ordinal=index.asi8, copy=False, **init_kwargs) - tm.assert_numpy_array_equal(index._values, result._values, + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, check_same='same') elif isinstance(index, IntervalIndex): # checked in test_interval.py @@ -323,7 +324,8 @@ def test_ensure_copied_data(self): result = index_type(index.values, copy=False, **init_kwargs) tm.assert_numpy_array_equal(index.values, result.values, check_same='same') - tm.assert_numpy_array_equal(index._values, result._values, + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, check_same='same') def test_copy_and_deepcopy(self, indices): diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index a75ace2933b71..05678b0c8dd45 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -469,3 +469,12 @@ def test_factorize_dst(self): arr, res = obj.factorize() tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) tm.assert_index_equal(res, idx) + + @pytest.mark.parametrize('arr, expected', [ + (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])), + (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'), + pd.DatetimeIndex(['2017'], tz='US/Eastern')), + ]) + def test_unique(self, arr, expected): + result = arr.unique() + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 639a9272c3808..eca80d17b1dc3 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -119,8 +119,8 @@ def test_constructor_fromarraylike(self): tm.assert_index_equal(PeriodIndex(idx.values), idx) tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) - pytest.raises(ValueError, PeriodIndex, idx._values) - pytest.raises(ValueError, PeriodIndex, list(idx._values)) + pytest.raises(ValueError, PeriodIndex, idx._ndarray_values) + pytest.raises(ValueError, PeriodIndex, list(idx._ndarray_values)) pytest.raises(TypeError, PeriodIndex, data=Period('2007', freq='A')) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index f3469b829f8a3..b3f059018493c 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -205,7 +205,7 @@ def test_values(self): tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') @@ -213,7 +213,7 @@ def test_values(self): tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([492, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') @@ -222,7 +222,7 @@ def test_values(self): tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) def test_period_index_length(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index f5e7c8269dc4f..97500f2f5ed95 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -20,7 +20,7 @@ class TestPeriodRepresentation(object): def _check_freq(self, freq, base_date): rng = PeriodIndex(start=base_date, periods=10, freq=freq) exp = np.arange(10, dtype=np.int64) - tm.assert_numpy_array_equal(rng._values, exp) + tm.assert_numpy_array_equal(rng.asi8, exp) def test_annual(self): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index c2e40c79f8914..e9fddfde90348 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -353,6 +353,14 @@ def test_append(self): expected = Index(list('caaabbca')) tm.assert_index_equal(result, expected, exact=True) + def test_append_to_another(self): + # hits _concat_index_asobject + fst = Index(['a', 'b']) + snd = CategoricalIndex(['d', 'e']) + result = fst.append(snd) + expected = Index(['a', 'b', 'd', 'e']) + tm.assert_index_equal(result, expected) + def test_insert(self): ci = self.create_index() diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index e59456b8a2d5e..cd6a5c761d0c2 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -962,6 +962,53 @@ def test_values_boxed(self): # Check that code branches for boxed values produce identical results tm.assert_numpy_array_equal(result.values[:4], result[:4].values) + def test_values_multiindex_datetimeindex(self): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(10**18, 10**18 + 5) + naive = pd.DatetimeIndex(ints) + aware = pd.DatetimeIndex(ints, tz='US/Central') + + idx = pd.MultiIndex.from_arrays([naive, aware]) + result = idx.values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive[:2]) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware[:2]) + + def test_values_multiindex_periodindex(self): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(2007, 2012) + pidx = pd.PeriodIndex(ints, freq='D') + + idx = pd.MultiIndex.from_arrays([ints, pidx]) + result = idx.values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints[:2])) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx[:2]) + def test_append(self): result = self.index[:3].append(self.index[3:]) assert result.equals(self.index) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index df2547fc7b0da..4b5ad336139b0 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -338,8 +338,9 @@ def test_ops(self): if not isinstance(o, PeriodIndex): expected = getattr(o.values, op)() else: - expected = pd.Period(ordinal=getattr(o._values, op)(), - freq=o.freq) + expected = pd.Period( + ordinal=getattr(o._ndarray_values, op)(), + freq=o.freq) try: assert result == expected except TypeError: @@ -450,7 +451,7 @@ def test_value_counts_unique_nunique_null(self): for orig in self.objs: o = orig.copy() klass = type(o) - values = o._values + values = o._ndarray_values if not self._allow_na_ops(o): continue @@ -1175,3 +1176,54 @@ def test_iter_box(self): assert isinstance(res, pd.Period) assert res.freq == 'M' assert res == exp + + +@pytest.mark.parametrize('array, expected_type, dtype', [ + (np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'), + (np.array(['a', 'b']), np.ndarray, 'object'), + (pd.Categorical(['a', 'b']), pd.Categorical, 'category'), + (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'), + (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex, + 'datetime64[ns, US/Central]'), + (pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'), + (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'), + (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'), +]) +def test_values_consistent(array, expected_type, dtype): + l_values = pd.Series(array)._values + r_values = pd.Index(array)._values + assert type(l_values) is expected_type + assert type(l_values) is type(r_values) + + if isinstance(l_values, np.ndarray): + tm.assert_numpy_array_equal(l_values, r_values) + elif isinstance(l_values, pd.Index): + tm.assert_index_equal(l_values, r_values) + elif pd.api.types.is_categorical(l_values): + tm.assert_categorical_equal(l_values, r_values) + else: + raise TypeError("Unexpected type {}".format(type(l_values))) + + assert l_values.dtype == dtype + assert r_values.dtype == dtype + + +@pytest.mark.parametrize('array, expected', [ + (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)), + (np.array(['0', '1']), np.array(['0', '1'], dtype=object)), + (pd.Categorical(['a', 'a']), np.array([0, 0], dtype='int8')), + (pd.DatetimeIndex(['2017-01-01T00:00:00']), + np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')), + (pd.DatetimeIndex(['2017-01-01T00:00:00'], tz="US/Eastern"), + np.array(['2017-01-01T05:00:00'], dtype='M8[ns]')), + (pd.TimedeltaIndex([10**10]), np.array([10**10], dtype='m8[ns]')), + pytest.mark.xfail(reason='PeriodArray not implemented')(( + pd.PeriodIndex(['2017', '2018'], freq='D'), + np.array([17167, 17532]), + )), +]) +def test_ndarray_values(array, expected): + l_values = pd.Series(array)._ndarray_values + r_values = pd.Index(array)._ndarray_values + tm.assert_numpy_array_equal(l_values, r_values) + tm.assert_numpy_array_equal(l_values, expected) From eb52d993df0dcf1644cd6043d9fe314236673e1d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 13 Feb 2018 20:01:42 +0100 Subject: [PATCH 105/217] DOC: ignore Panel deprecation warnings during doc build (#19663) --- doc/source/conf.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/source/conf.py b/doc/source/conf.py index c188f83f80250..7c4edd0486636 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -15,6 +15,8 @@ import re import inspect import importlib +import warnings + from pandas.compat import u, PY3 try: @@ -375,6 +377,13 @@ 'wiki': ('https://github.com/pandas-dev/pandas/wiki/%s', 'wiki ')} + +# ignore all deprecation warnings from Panel during doc build +# (to avoid the need to add :okwarning: in many places) +warnings.filterwarnings("ignore", message="\nPanel is deprecated", + category=FutureWarning) + + ipython_exec_lines = [ 'import numpy as np', 'import pandas as pd', From 2134e52cacc4ba0675c950021a9efc61ce5ab040 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Tue, 13 Feb 2018 13:41:14 -0800 Subject: [PATCH 106/217] DOC: fix IPython spelling (#19683) It's upper case I and P (or full lower case), --- pandas/core/frame.py | 2 +- pandas/io/gbq.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2782ee7b9d201..bc045d74cee52 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1059,7 +1059,7 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, private_key : str (optional) Service account private key in JSON format. Can be file path or string contents. This is useful for remote server - authentication (eg. jupyter iPython notebook on remote host) + authentication (eg. Jupyter/IPython notebook on remote host) """ from pandas.io import gbq diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index b452b0cf5ddd4..f9bc6ae1a5451 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -65,7 +65,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, private_key : str (optional) Service account private key in JSON format. Can be file path or string contents. This is useful for remote server - authentication (eg. jupyter iPython notebook on remote host) + authentication (eg. Jupyter/IPython notebook on remote host) dialect : {'legacy', 'standard'}, default 'legacy' 'legacy' : Use BigQuery's legacy SQL dialect. From 16fc7515c985f86245f95d47c40107702e57ce7e Mon Sep 17 00:00:00 2001 From: William Ayd Date: Tue, 13 Feb 2018 15:56:31 -0800 Subject: [PATCH 107/217] Explicitly set dtype of np.lexsort in group_rank (#19679) --- pandas/_libs/groupby_helper.pxi.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 48dac7bf10362..1d77a373bb7dd 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -531,7 +531,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # each label corresponds to a different group value, # the mask helps you differentiate missing values before # performing sort on the actual values - _as = np.lexsort(order) + _as = np.lexsort(order).view(dtype=np.int64) if not ascending: _as = _as[::-1] From e42b61fb7f1297e979f141431a820b34ad355d31 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 14 Feb 2018 03:05:46 -0800 Subject: [PATCH 108/217] BUG: Do not round DatetimeIndex nanosecond precision when iterating (#19628) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslib.pyx | 15 ++++++++------- pandas/conftest.py | 6 ++++++ pandas/tests/indexes/datetimes/test_timezones.py | 13 ++++++++++++- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 72f63a4da0f4d..b6316bd39f396 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -729,6 +729,7 @@ Timezones - Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) - Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) - Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) +- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`) Offsets ^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 85e667521e5f2..fec7f21d6e6eb 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -46,7 +46,8 @@ from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_pytz, get_dst_info) from tslibs.conversion cimport (tz_convert_single, _TSObject, convert_datetime_to_tsobject, - get_datetime64_nanos) + get_datetime64_nanos, + tz_convert_utc_to_tzlocal) from tslibs.conversion import tz_convert_single from tslibs.nattype import NaT, nat_strings, iNaT @@ -144,12 +145,12 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, if value == NPY_NAT: result[i] = NaT else: - dt64_to_dtstruct(value, &dts) - dt = create_datetime_from_ts(value, dts, tz, freq) - dt = dt + tz.utcoffset(dt) - if box: - dt = Timestamp(dt) - result[i] = dt + # Python datetime objects do not support nanosecond + # resolution (yet, PEP 564). Need to compute new value + # using the i8 representation. + local_value = tz_convert_utc_to_tzlocal(value, tz) + dt64_to_dtstruct(local_value, &dts) + result[i] = func_create(value, dts, tz, freq) else: trans, deltas, typ = get_dst_info(tz) diff --git a/pandas/conftest.py b/pandas/conftest.py index 4fe66d4cf7e1f..37f0a2f818a3b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -93,3 +93,9 @@ def compression_no_zip(request): except zip """ return request.param + + +@pytest.fixture(scope='module') +def datetime_tz_utc(): + from datetime import timezone + return timezone.utc diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 075d239df5f7a..62854676d43be 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -17,7 +17,7 @@ import pandas as pd from pandas._libs import tslib from pandas._libs.tslibs import timezones -from pandas.compat import lrange, zip +from pandas.compat import lrange, zip, PY3 from pandas import (DatetimeIndex, date_range, bdate_range, Timestamp, isna, to_datetime, Index) @@ -949,6 +949,17 @@ def test_dti_union_aware(self): result = rng.union(rng2) assert result.tz.zone == 'UTC' + @pytest.mark.parametrize('tz', [None, 'UTC', "US/Central", + dateutil.tz.tzoffset(None, -28800)]) + @pytest.mark.usefixtures("datetime_tz_utc") + @pytest.mark.skipif(not PY3, reason="datetime.timezone not in PY2") + def test_iteration_preserves_nanoseconds(self, tz): + # GH 19603 + index = DatetimeIndex(["2018-02-08 15:00:00.168456358", + "2018-02-08 15:00:00.168456359"], tz=tz) + for i, ts in enumerate(index): + assert ts == index[i] + class TestDateRange(object): """Tests for date_range with timezones""" From 491801eee43db48468b3caded53126ab4d8008bf Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Wed, 14 Feb 2018 11:12:07 +0000 Subject: [PATCH 109/217] COMPAT-18589: Supporting axis in Series.rename (#18923) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/generic.py | 3 +++ pandas/tests/series/test_alter_axes.py | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index b6316bd39f396..dddd370780ab6 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -841,6 +841,7 @@ Reshaping - Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) - Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) - Bug in :func:`DataFrame.join` which does an *outer* instead of a *left* join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) +- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) Other ^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 35f866c9e7d58..297450417e3cf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -863,6 +863,9 @@ def rename(self, *args, **kwargs): copy = kwargs.pop('copy', True) inplace = kwargs.pop('inplace', False) level = kwargs.pop('level', None) + axis = kwargs.pop('axis', None) + if axis is not None: + axis = self._get_axis_number(axis) if kwargs: raise TypeError('rename() got an unexpected keyword ' diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 714e43a4af1f8..dce4e82cbdcf1 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -81,6 +81,14 @@ def test_rename_set_name_inplace(self): exp = np.array(['a', 'b', 'c'], dtype=np.object_) tm.assert_numpy_array_equal(s.index.values, exp) + def test_rename_axis_supported(self): + # Supporting axis for compatibility, detailed in GH-18589 + s = Series(range(5)) + s.rename({}, axis=0) + s.rename({}, axis='index') + with tm.assert_raises_regex(ValueError, 'No axis named 5'): + s.rename({}, axis=5) + def test_set_name_attribute(self): s = Series([1, 2, 3]) s2 = Series([1, 2, 3], name='bar') From 93bfedeaf2c4794ba091d483ff58489b55d2cba1 Mon Sep 17 00:00:00 2001 From: Matt Kirk Date: Wed, 14 Feb 2018 18:13:19 +0700 Subject: [PATCH 110/217] Performance increase rolling min max (#19549) --- asv_bench/benchmarks/rolling.py | 16 +++++++++- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/src/headers/cmath | 15 +++++++++ pandas/_libs/src/headers/math.h | 11 ------- pandas/_libs/window.pyx | 54 ++++++++++++++++++++++----------- setup.py | 5 +-- 6 files changed, 70 insertions(+), 32 deletions(-) create mode 100644 pandas/_libs/src/headers/cmath delete mode 100644 pandas/_libs/src/headers/math.h diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 75990d83f8212..ba25ad6c5eda6 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -16,12 +16,26 @@ class Methods(object): def setup(self, constructor, window, dtype, method): N = 10**5 - arr = np.random.random(N).astype(dtype) + arr = (100 * np.random.random(N)).astype(dtype) self.roll = getattr(pd, constructor)(arr).rolling(window) def time_rolling(self, constructor, window, dtype, method): getattr(self.roll, method)() +class VariableWindowMethods(Methods): + sample_time = 0.2 + params = (['DataFrame', 'Series'], + ['50s', '1h', '1d'], + ['int', 'float'], + ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt', + 'sum']) + param_names = ['contructor', 'window', 'dtype', 'method'] + + def setup(self, constructor, window, dtype, method): + N = 10**5 + arr = (100 * np.random.random(N)).astype(dtype) + index = pd.date_range('2017-01-01', periods=N, freq='5s') + self.roll = getattr(pd, constructor)(arr, index=index).rolling(window) class Pairwise(object): diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index dddd370780ab6..932618ba1df21 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -645,6 +645,7 @@ Performance Improvements - Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) - Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) - Improved performance of :func:`DataFrameGroupBy.rank` (:issue:`15779`) +- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) .. _whatsnew_0230.docs: diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath new file mode 100644 index 0000000000000..d8e2239406cae --- /dev/null +++ b/pandas/_libs/src/headers/cmath @@ -0,0 +1,15 @@ +#ifndef _PANDAS_MATH_H_ +#define _PANDAS_MATH_H_ + +// In older versions of Visual Studio there wasn't a std::signbit defined +// This defines it using _copysign +#if defined(_MSC_VER) && (_MSC_VER < 1800) +#include +namespace std { + __inline int signbit(double num) { return _copysign(1.0, num) < 0; } +} +#else +#include +#endif + +#endif diff --git a/pandas/_libs/src/headers/math.h b/pandas/_libs/src/headers/math.h deleted file mode 100644 index 34ad9f24a58f9..0000000000000 --- a/pandas/_libs/src/headers/math.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _PANDAS_MATH_H_ -#define _PANDAS_MATH_H_ - -#if defined(_MSC_VER) && (_MSC_VER < 1800) -#include -__inline int signbit(double num) { return _copysign(1.0, num) < 0; } -#else -#include -#endif - -#endif diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index cacb073da581c..aa13f03d8e9e4 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -3,6 +3,7 @@ cimport cython from cython cimport Py_ssize_t +from libcpp.deque cimport deque from libc.stdlib cimport malloc, free @@ -12,7 +13,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t cnp.import_array() -cdef extern from "../src/headers/math.h": +cdef extern from "../src/headers/cmath" namespace "std": int signbit(double) nogil double sqrt(double x) nogil @@ -1222,8 +1223,9 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, cdef: numeric ai bint is_variable, should_replace - int64_t s, e, N, i, j, removed + int64_t N, i, removed, window_i Py_ssize_t nobs = 0 + deque Q[int64_t] ndarray[int64_t] starti, endi ndarray[numeric, ndim=1] output cdef: @@ -1242,32 +1244,48 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, output = np.empty(N, dtype=input.dtype) + Q = deque[int64_t]() + if is_variable: with nogil: - for i in range(N): - s = starti[i] - e = endi[i] + # This is using a modified version of the C++ code in this + # SO post: http://bit.ly/2nOoHlY + # The original impl didn't deal with variable window sizes + # So the code was optimized for that - r = input[s] - nobs = 0 - for j in range(s, e): + for i from starti[0] <= i < endi[0]: + ai = init_mm(input[i], &nobs, is_max) - # adds, death at the i offset - ai = init_mm(input[j], &nobs, is_max) + if is_max: + while not Q.empty() and ai >= input[Q.back()]: + Q.pop_back() + else: + while not Q.empty() and ai <= input[Q.back()]: + Q.pop_back() + Q.push_back(i) - if is_max: - if ai > r: - r = ai - else: - if ai < r: - r = ai + for i from endi[0] <= i < N: + output[i-1] = calc_mm(minp, nobs, input[Q.front()]) - output[i] = calc_mm(minp, nobs, r) + ai = init_mm(input[i], &nobs, is_max) - else: + if is_max: + while not Q.empty() and ai >= input[Q.back()]: + Q.pop_back() + else: + while not Q.empty() and ai <= input[Q.back()]: + Q.pop_back() + while not Q.empty() and Q.front() <= i - (endi[i] - starti[i]): + Q.pop_front() + + Q.push_back(i) + + output[N-1] = calc_mm(minp, nobs, input[Q.front()]) + + else: # setup the rings of death! ring = malloc(win * sizeof(numeric)) death = malloc(win * sizeof(int64_t)) diff --git a/setup.py b/setup.py index 2332503e558ed..c66979dd19ef0 100755 --- a/setup.py +++ b/setup.py @@ -617,7 +617,8 @@ def pxd(name): 'pyxfile': '_libs/testing'}, '_libs.window': { 'pyxfile': '_libs/window', - 'pxdfiles': ['_libs/skiplist', '_libs/src/util']}, + 'pxdfiles': ['_libs/skiplist', '_libs/src/util'], + 'language': 'c++'}, '_libs.writers': { 'pyxfile': '_libs/writers', 'pxdfiles': ['_libs/src/util']}, @@ -640,11 +641,11 @@ def pxd(name): sources=sources, depends=data.get('depends', []), include_dirs=include, + language=data.get('language', 'c'), extra_compile_args=extra_compile_args) extensions.append(obj) - # ---------------------------------------------------------------------- # msgpack From 984d06827ea85e725a34e8c2977fcffe5ce10cb1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 14 Feb 2018 03:26:51 -0800 Subject: [PATCH 111/217] tests for tslibs.conversion and tslibs.timezones (#19642) --- pandas/tests/tseries/test_timezones.py | 86 +------------------------- pandas/tests/tslibs/test_conversion.py | 57 +++++++++++++++++ pandas/tests/tslibs/test_timezones.py | 37 +++++++++++ 3 files changed, 95 insertions(+), 85 deletions(-) create mode 100644 pandas/tests/tslibs/test_conversion.py create mode 100644 pandas/tests/tslibs/test_timezones.py diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 565e735c14c80..97326dc04a522 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -2,15 +2,10 @@ import pytest import pytz -import dateutil -import numpy as np from datetime import datetime -import pandas.util.testing as tm -from pandas.core.indexes.datetimes import date_range -from pandas._libs import tslib -from pandas._libs.tslibs import timezones, conversion +from pandas._libs.tslibs import timezones from pandas import Timestamp @@ -111,82 +106,3 @@ def localize(self, tz, x): def normalize(self, ts): # no-op for dateutil return ts - - def test_tzlocal(self): - # GH 13583 - ts = Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()) - assert ts.tz == dateutil.tz.tzlocal() - assert "tz='tzlocal()')" in repr(ts) - - tz = timezones.maybe_get_tz('tzlocal()') - assert tz == dateutil.tz.tzlocal() - - # get offset using normal datetime for test - offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) - offset = offset.total_seconds() * 1000000000 - assert ts.value + offset == Timestamp('2011-01-01').value - - -class TestTimeZoneCacheKey(object): - - @pytest.mark.parametrize('tz_name', list(pytz.common_timezones)) - def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self, tz_name): - if tz_name == 'UTC': - # skip utc as it's a special case in dateutil - return - tz_p = timezones.maybe_get_tz(tz_name) - tz_d = timezones.maybe_get_tz('dateutil/' + tz_name) - if tz_d is None: - # skip timezones that dateutil doesn't know about. - return - assert (timezones._p_tz_cache_key(tz_p) != - timezones._p_tz_cache_key(tz_d)) - - -class TestTslib(object): - - def test_tslib_tz_convert(self): - def compare_utc_to_local(tz_didx, utc_didx): - f = lambda x: conversion.tz_convert_single(x, 'UTC', tz_didx.tz) - result = conversion.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) - result_single = np.vectorize(f)(tz_didx.asi8) - tm.assert_numpy_array_equal(result, result_single) - - def compare_local_to_utc(tz_didx, utc_didx): - f = lambda x: conversion.tz_convert_single(x, tz_didx.tz, 'UTC') - result = conversion.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') - result_single = np.vectorize(f)(utc_didx.asi8) - tm.assert_numpy_array_equal(result, result_single) - - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']: - # US: 2014-03-09 - 2014-11-11 - # MOSCOW: 2014-10-26 / 2014-12-31 - tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) - utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') - compare_utc_to_local(tz_didx, utc_didx) - # local tz to UTC can be differ in hourly (or higher) freqs because - # of DST - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz) - utc_didx = date_range('2000-01-01', '2020-01-01', freq='D') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz) - utc_didx = date_range('2000-01-01', '2100-01-01', freq='A') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - # Check empty array - result = conversion.tz_convert(np.array([], dtype=np.int64), - timezones.maybe_get_tz('US/Eastern'), - timezones.maybe_get_tz('Asia/Tokyo')) - tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) - - # Check all-NaT array - result = conversion.tz_convert(np.array([tslib.iNaT], dtype=np.int64), - timezones.maybe_get_tz('US/Eastern'), - timezones.maybe_get_tz('Asia/Tokyo')) - tm.assert_numpy_array_equal(result, np.array( - [tslib.iNaT], dtype=np.int64)) diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py new file mode 100644 index 0000000000000..76038136c26cb --- /dev/null +++ b/pandas/tests/tslibs/test_conversion.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +import pandas.util.testing as tm +from pandas import date_range +from pandas._libs.tslib import iNaT +from pandas._libs.tslibs import conversion, timezones + + +def compare_utc_to_local(tz_didx, utc_didx): + f = lambda x: conversion.tz_convert_single(x, 'UTC', tz_didx.tz) + result = conversion.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) + result_single = np.vectorize(f)(tz_didx.asi8) + tm.assert_numpy_array_equal(result, result_single) + + +def compare_local_to_utc(tz_didx, utc_didx): + f = lambda x: conversion.tz_convert_single(x, tz_didx.tz, 'UTC') + result = conversion.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') + result_single = np.vectorize(f)(utc_didx.asi8) + tm.assert_numpy_array_equal(result, result_single) + + +class TestTZConvert(object): + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', + 'US/Eastern', 'Europe/Moscow']) + def test_tz_convert_single_matches_tz_convert_hourly(self, tz): + # US: 2014-03-09 - 2014-11-11 + # MOSCOW: 2014-10-26 / 2014-12-31 + tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) + utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') + compare_utc_to_local(tz_didx, utc_didx) + + # local tz to UTC can be differ in hourly (or higher) freqs because + # of DST + compare_local_to_utc(tz_didx, utc_didx) + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', + 'US/Eastern', 'Europe/Moscow']) + @pytest.mark.parametrize('freq', ['D', 'A']) + def test_tz_convert_single_matches_tz_convert(self, tz, freq): + tz_didx = date_range('2000-01-01', '2020-01-01', freq=freq, tz=tz) + utc_didx = date_range('2000-01-01', '2020-01-01', freq=freq) + compare_utc_to_local(tz_didx, utc_didx) + compare_local_to_utc(tz_didx, utc_didx) + + @pytest.mark.parametrize('arr', [ + pytest.param(np.array([], dtype=np.int64), id='empty'), + pytest.param(np.array([iNaT], dtype=np.int64), id='all_nat')]) + def test_tz_convert_corner(self, arr): + result = conversion.tz_convert(arr, + timezones.maybe_get_tz('US/Eastern'), + timezones.maybe_get_tz('Asia/Tokyo')) + tm.assert_numpy_array_equal(result, arr) diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py new file mode 100644 index 0000000000000..603c5e3fea26f --- /dev/null +++ b/pandas/tests/tslibs/test_timezones.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +from datetime import datetime + +import pytest +import pytz +import dateutil.tz + +from pandas._libs.tslibs import timezones +from pandas import Timestamp + + +@pytest.mark.parametrize('tz_name', list(pytz.common_timezones)) +def test_cache_keys_are_distinct_for_pytz_vs_dateutil(tz_name): + if tz_name == 'UTC': + # skip utc as it's a special case in dateutil + return + tz_p = timezones.maybe_get_tz(tz_name) + tz_d = timezones.maybe_get_tz('dateutil/' + tz_name) + if tz_d is None: + # skip timezones that dateutil doesn't know about. + return + assert timezones._p_tz_cache_key(tz_p) != timezones._p_tz_cache_key(tz_d) + + +def test_tzlocal(): + # GH#13583 + ts = Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()) + assert ts.tz == dateutil.tz.tzlocal() + assert "tz='tzlocal()')" in repr(ts) + + tz = timezones.maybe_get_tz('tzlocal()') + assert tz == dateutil.tz.tzlocal() + + # get offset using normal datetime for test + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = offset.total_seconds() * 1000000000 + assert ts.value + offset == Timestamp('2011-01-01').value From eed6647751c725f94736153e9ec793b645126c83 Mon Sep 17 00:00:00 2001 From: Tommy <10076072+tommyod@users.noreply.github.com> Date: Wed, 14 Feb 2018 12:31:20 +0100 Subject: [PATCH 112/217] Spellchecked io.rst (#19660) --- doc/source/io.rst | 418 ++++++++++++++++++++++++---------------------- 1 file changed, 217 insertions(+), 201 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 1785de54b7dd6..7bb34e4d232dd 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -28,8 +28,11 @@ IO Tools (Text, CSV, HDF5, ...) =============================== -The pandas I/O API is a set of top level ``reader`` functions accessed like ``pd.read_csv()`` that generally return a ``pandas`` -object. The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` +The pandas I/O API is a set of top level ``reader`` functions accessed like +:func:`pandas.read_csv` that generally return a pandas object. The corresponding +``writer`` functions are object methods that are accessed like +:meth:`DataFrame.to_csv`. Below is a table containing available ``readers`` and +``writers``. .. csv-table:: :header: "Format Type", "Data Description", "Reader", "Writer" @@ -65,13 +68,14 @@ CSV & Text files The two workhorse functions for reading text files (a.k.a. flat files) are :func:`read_csv` and :func:`read_table`. They both use the same parsing code to -intelligently convert tabular data into a DataFrame object. See the +intelligently convert tabular data into a ``DataFrame`` object. See the :ref:`cookbook` for some advanced strategies. Parsing options ''''''''''''''' -:func:`read_csv` and :func:`read_table` accept the following arguments: +The functions :func:`read_csv` and :func:`read_table` accept the following +common arguments: Basic +++++ @@ -94,7 +98,7 @@ delimiter : str, default ``None`` delim_whitespace : boolean, default False Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be used as the delimiter. Equivalent to setting ``sep='\s+'``. - If this option is set to True, nothing should be passed in for the + If this option is set to ``True``, nothing should be passed in for the ``delimiter`` parameter. .. versionadded:: 0.18.1 support for the Python parser. @@ -122,7 +126,7 @@ names : array-like, default ``None`` explicitly pass ``header=None``. Duplicates in this list will cause a ``UserWarning`` to be issued. index_col : int or sequence or ``False``, default ``None`` - Column to use as the row labels of the DataFrame. If a sequence is given, a + Column to use as the row labels of the ``DataFrame``. If a sequence is given, a MultiIndex is used. If you have a malformed file with delimiters at the end of each line, you might consider ``index_col=False`` to force pandas to *not* use the first column as the index (row names). @@ -131,8 +135,8 @@ usecols : array-like or callable, default ``None`` be positional (i.e. integer indices into the document columns) or strings that correspond to column names provided either by the user in `names` or inferred from the document header row(s). For example, a valid array-like - `usecols` parameter would be [0, 1, 2] or ['foo', 'bar', 'baz']. Element - order is ignored, so usecols=[0,1] is the same as [1, 0]. + `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. + Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. If callable, the callable function will be evaluated against the column names, returning names where the callable function evaluates to True: @@ -145,12 +149,12 @@ usecols : array-like or callable, default ``None`` Using this parameter results in much faster parsing time and lower memory usage. squeeze : boolean, default ``False`` - If the parsed data only contains one column then return a Series. + If the parsed data only contains one column then return a ``Series``. prefix : str, default ``None`` Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... mangle_dupe_cols : boolean, default ``True`` Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'. - Passing in False will cause data to be overwritten if there are duplicate + Passing in ``False`` will cause data to be overwritten if there are duplicate names in the columns. General Parsing Configuration @@ -197,7 +201,7 @@ low_memory : boolean, default ``True`` Internally process the file in chunks, resulting in lower memory use while parsing, but possibly mixed type inference. To ensure no mixed types either set ``False``, or specify the type with the ``dtype`` parameter. - Note that the entire file is read into a single DataFrame regardless, + Note that the entire file is read into a single ``DataFrame`` regardless, use the ``chunksize`` or ``iterator`` parameter to return the data in chunks. (Only valid with C parser) memory_map : boolean, default False @@ -217,16 +221,16 @@ keep_default_na : boolean, default ``True`` Whether or not to include the default NaN values when parsing the data. Depending on whether `na_values` is passed in, the behavior is as follows: - * If `keep_default_na` is True, and `na_values` are specified, `na_values` + * If `keep_default_na` is ``True``, and `na_values` are specified, `na_values` is appended to the default NaN values used for parsing. - * If `keep_default_na` is True, and `na_values` are not specified, only + * If `keep_default_na` is ``True``, and `na_values` are not specified, only the default NaN values are used for parsing. - * If `keep_default_na` is False, and `na_values` are specified, only + * If `keep_default_na` is ``False``, and `na_values` are specified, only the NaN values specified `na_values` are used for parsing. - * If `keep_default_na` is False, and `na_values` are not specified, no + * If `keep_default_na` is ``False``, and `na_values` are not specified, no strings will be parsed as NaN. - Note that if `na_filter` is passed in as False, the `keep_default_na` and + Note that if `na_filter` is passed in as ``False``, the `keep_default_na` and `na_values` parameters will be ignored. na_filter : boolean, default ``True`` Detect missing value markers (empty strings and the value of na_values). In @@ -341,9 +345,9 @@ Error Handling error_bad_lines : boolean, default ``True`` Lines with too many fields (e.g. a csv line with too many commas) will by - default cause an exception to be raised, and no DataFrame will be returned. If - ``False``, then these "bad lines" will dropped from the DataFrame that is - returned. See :ref:`bad lines ` + default cause an exception to be raised, and no ``DataFrame`` will be + returned. If ``False``, then these "bad lines" will dropped from the + ``DataFrame`` that is returned. See :ref:`bad lines ` below. warn_bad_lines : boolean, default ``True`` If error_bad_lines is ``False``, and warn_bad_lines is ``True``, a warning for @@ -354,8 +358,8 @@ warn_bad_lines : boolean, default ``True`` Specifying column data types '''''''''''''''''''''''''''' -You can indicate the data type for the whole DataFrame or -individual columns: +You can indicate the data type for the whole ``DataFrame`` or individual +columns: .. ipython:: python @@ -368,11 +372,11 @@ individual columns: df = pd.read_csv(StringIO(data), dtype={'b': object, 'c': np.float64}) df.dtypes -Fortunately, ``pandas`` offers more than one way to ensure that your column(s) +Fortunately, pandas offers more than one way to ensure that your column(s) contain only one ``dtype``. If you're unfamiliar with these concepts, you can see :ref:`here` to learn more about dtypes, and :ref:`here` to learn more about ``object`` conversion in -``pandas``. +pandas. For instance, you can use the ``converters`` argument @@ -395,7 +399,7 @@ dtypes after reading in the data, df2 df2['col_1'].apply(type).value_counts() -which would convert all valid parsing to floats, leaving the invalid parsing +which will convert all valid parsing to floats, leaving the invalid parsing as ``NaN``. Ultimately, how you deal with reading in columns containing mixed dtypes @@ -407,7 +411,7 @@ worth trying. .. versionadded:: 0.20.0 support for the Python parser. - The ``dtype`` option is supported by the 'python' engine + The ``dtype`` option is supported by the 'python' engine. .. note:: In some cases, reading in abnormal data with columns containing mixed dtypes @@ -453,7 +457,8 @@ Specifying Categorical dtype pd.read_csv(StringIO(data)).dtypes pd.read_csv(StringIO(data), dtype='category').dtypes -Individual columns can be parsed as a ``Categorical`` using a dict specification +Individual columns can be parsed as a ``Categorical`` using a dict +specification: .. ipython:: python @@ -551,17 +556,18 @@ If the header is in a row other than the first, pass the row number to Duplicate names parsing ''''''''''''''''''''''' -If the file or header contains duplicate names, pandas by default will deduplicate -these names so as to prevent data overwrite: +If the file or header contains duplicate names, pandas will by default +distinguish between them so as to prevent overwriting data: .. ipython :: python data = 'a,b,a\n0,1,2\n3,4,5' pd.read_csv(StringIO(data)) -There is no more duplicate data because ``mangle_dupe_cols=True`` by default, which modifies -a series of duplicate columns 'X'...'X' to become 'X', 'X.1',...'X.N'. If ``mangle_dupe_cols -=False``, duplicate data can arise: +There is no more duplicate data because ``mangle_dupe_cols=True`` by default, +which modifies a series of duplicate columns 'X', ..., 'X' to become +'X', 'X.1', ..., 'X.N'. If ``mangle_dupe_cols=False``, duplicate data can +arise: .. code-block :: python @@ -716,7 +722,7 @@ result in byte strings being decoded to unicode in the result: Some formats which encode all characters as multiple bytes, like UTF-16, won't parse correctly at all without specifying the encoding. `Full list of Python standard encodings -`_ +`_. .. _io.index_col: @@ -724,7 +730,7 @@ Index columns and trailing delimiters ''''''''''''''''''''''''''''''''''''' If a file has one more column of data than the number of column names, the -first column will be used as the DataFrame's row names: +first column will be used as the ``DataFrame``'s row names: .. ipython:: python @@ -894,30 +900,31 @@ Pandas will try to call the ``date_parser`` function in three different ways. If an exception is raised, the next one is tried: 1. ``date_parser`` is first called with one or more arrays as arguments, - as defined using `parse_dates` (e.g., ``date_parser(['2013', '2013'], ['1', '2'])``) + as defined using `parse_dates` (e.g., ``date_parser(['2013', '2013'], ['1', '2'])``). 2. If #1 fails, ``date_parser`` is called with all the columns - concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``) + concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``). 3. If #2 fails, ``date_parser`` is called once for every row with one or more string arguments from the columns indicated with `parse_dates` (e.g., ``date_parser('2013', '1')`` for the first row, ``date_parser('2013', '2')`` - for the second, etc.) + for the second, etc.). Note that performance-wise, you should try these methods of parsing dates in order: -1. Try to infer the format using ``infer_datetime_format=True`` (see section below) +1. Try to infer the format using ``infer_datetime_format=True`` (see section below). 2. If you know the format, use ``pd.to_datetime()``: - ``date_parser=lambda x: pd.to_datetime(x, format=...)`` + ``date_parser=lambda x: pd.to_datetime(x, format=...)``. 3. If you have a really non-standard format, use a custom ``date_parser`` function. For optimal performance, this should be vectorized, i.e., it should accept arrays as arguments. -You can explore the date parsing functionality in ``date_converters.py`` and -add your own. We would love to turn this module into a community supported set -of date/time parsers. To get you started, ``date_converters.py`` contains +You can explore the date parsing functionality in +`date_converters.py `__ +and add your own. We would love to turn this module into a community supported +set of date/time parsers. To get you started, ``date_converters.py`` contains functions to parse dual date and time columns, year/month/day columns, and year/month/day/hour/minute/second columns. It also contains a ``generic_parser`` function so you can curry it with a function that deals with @@ -945,7 +952,7 @@ of strings. So in general, ``infer_datetime_format`` should not have any negative consequences if enabled. Here are some examples of datetime strings that can be guessed (All -representing December 30th, 2011 at 00:00:00) +representing December 30th, 2011 at 00:00:00): - "20111230" - "2011/12/30" @@ -954,7 +961,7 @@ representing December 30th, 2011 at 00:00:00) - "30/Dec/2011 00:00:00" - "30/December/2011 00:00:00" -``infer_datetime_format`` is sensitive to ``dayfirst``. With +Note that ``infer_datetime_format`` is sensitive to ``dayfirst``. With ``dayfirst=True``, it will guess "01/12/2011" to be December 1st. With ``dayfirst=False`` (default) it will guess "01/12/2011" to be January 12th. @@ -1030,7 +1037,7 @@ correctly: with open('tmp.csv', 'w') as fh: fh.write(data) -By default, numbers with a thousands separator will be parsed as strings +By default, numbers with a thousands separator will be parsed as strings: .. ipython:: python @@ -1040,7 +1047,7 @@ By default, numbers with a thousands separator will be parsed as strings df.level.dtype -The ``thousands`` keyword allows integers to be parsed correctly +The ``thousands`` keyword allows integers to be parsed correctly: .. ipython:: python @@ -1060,11 +1067,12 @@ The ``thousands`` keyword allows integers to be parsed correctly NA Values ''''''''' -To control which values are parsed as missing values (which are signified by ``NaN``), specify a -string in ``na_values``. If you specify a list of strings, then all values in -it are considered to be missing values. If you specify a number (a ``float``, like ``5.0`` or an ``integer`` like ``5``), -the corresponding equivalent values will also imply a missing value (in this case effectively -``[5.0,5]`` are recognized as ``NaN``. +To control which values are parsed as missing values (which are signified by +``NaN``), specify a string in ``na_values``. If you specify a list of strings, +then all values in it are considered to be missing values. If you specify a +number (a ``float``, like ``5.0`` or an ``integer`` like ``5``), the +corresponding equivalent values will also imply a missing value (in this case +effectively ``[5.0, 5]`` are recognized as ``NaN``). To completely override the default values that are recognized as missing, specify ``keep_default_na=False``. @@ -1073,29 +1081,34 @@ To completely override the default values that are recognized as missing, specif The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``. +Let us consider some examples: + .. code-block:: python read_csv(path, na_values=[5]) -the default values, in addition to ``5`` , ``5.0`` when interpreted as numbers are recognized as ``NaN`` +In the example above ``5`` and ``5.0`` will be recognized as ``NaN``, in +addition to the defaults. A string will first be interpreted as a numerical +``5``, then as a ``NaN``. .. code-block:: python read_csv(path, keep_default_na=False, na_values=[""]) -only an empty field will be ``NaN`` +Above, only an empty field will be recognized as ``NaN``. .. code-block:: python read_csv(path, keep_default_na=False, na_values=["NA", "0"]) -only ``NA`` and ``0`` as strings are ``NaN`` +Above, both ``NA`` and ``0`` as strings are ``NaN``. .. code-block:: python read_csv(path, na_values=["Nope"]) -the default values, in addition to the string ``"Nope"`` are recognized as ``NaN`` +The default values, in addition to the string ``"Nope"`` are recognized as +``NaN``. .. _io.infinity: @@ -1143,9 +1156,9 @@ Boolean values '''''''''''''' The common values ``True``, ``False``, ``TRUE``, and ``FALSE`` are all -recognized as boolean. Sometime you would want to recognize some other values -as being boolean. To do this use the ``true_values`` and ``false_values`` -options: +recognized as boolean. Occasionally you might want to recognize other values +as being boolean. To do this, use the ``true_values`` and ``false_values`` +options as follows: .. ipython:: python @@ -1161,7 +1174,7 @@ Handling "bad" lines Some files may have malformed lines with too few fields or too many. Lines with too few fields will have NA values filled in the trailing fields. Lines with -too many will cause an error by default: +too many fields will raise an error by default: .. ipython:: python :suppress: @@ -1228,7 +1241,7 @@ By default, ``read_csv`` uses the Excel dialect and treats the double quote as the quote character, which causes it to fail when it finds a newline before it finds the closing double quote. -We can get around this using ``dialect`` +We can get around this using ``dialect``: .. ipython:: python :okwarning: @@ -1253,9 +1266,9 @@ after a delimiter: print(data) pd.read_csv(StringIO(data), skipinitialspace=True) -The parsers make every attempt to "do the right thing" and not be very -fragile. Type inference is a pretty big deal. So if a column can be coerced to -integer dtype without altering the contents, it will do so. Any non-numeric +The parsers make every attempt to "do the right thing" and not be fragile. Type +inference is a pretty big deal. If a column can be coerced to integer dtype +without altering the contents, the parser will do so. Any non-numeric columns will come through as object dtype as with the rest of pandas objects. .. _io.quoting: @@ -1278,7 +1291,7 @@ should pass the ``escapechar`` option: Files with Fixed Width Columns '''''''''''''''''''''''''''''' -While ``read_csv`` reads delimited data, the :func:`read_fwf` function works +While :func:`read_csv` reads delimited data, the :func:`read_fwf` function works with data files that have known and fixed column widths. The function parameters to ``read_fwf`` are largely the same as `read_csv` with two extra parameters, and a different usage of the ``delimiter`` parameter: @@ -1287,7 +1300,7 @@ a different usage of the ``delimiter`` parameter: fixed-width fields of each line as half-open intervals (i.e., [from, to[ ). String value 'infer' can be used to instruct the parser to try detecting the column specifications from the first 100 rows of the data. Default - behaviour, if not specified, is to infer. + behavior, if not specified, is to infer. - ``widths``: A list of field widths which can be used instead of 'colspecs' if the intervals are contiguous. - ``delimiter``: Characters to consider as filler characters in the fixed-width file. @@ -1312,7 +1325,7 @@ Consider a typical fixed-width data file: print(open('bar.csv').read()) -In order to parse this file into a DataFrame, we simply need to supply the +In order to parse this file into a ``DataFrame``, we simply need to supply the column specifications to the `read_fwf` function along with the file name: .. ipython:: python @@ -1383,7 +1396,7 @@ column: print(open('foo.csv').read()) In this special case, ``read_csv`` assumes that the first column is to be used -as the index of the DataFrame: +as the index of the ``DataFrame``: .. ipython:: python @@ -1436,10 +1449,10 @@ rows will skip the intervening rows. .. ipython:: python from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) + df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) df.to_csv('mi.csv') print(open('mi.csv').read()) - pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1]) + pd.read_csv('mi.csv', header=[0, 1, 2, 3], index_col=[0, 1]) ``read_csv`` is also able to interpret a more common format of multi-columns indices. @@ -1448,17 +1461,17 @@ of multi-columns indices. :suppress: data = ",a,a,a,b,c,c\n,q,r,s,t,u,v\none,1,2,3,4,5,6\ntwo,7,8,9,10,11,12" - fh = open('mi2.csv','w') + fh = open('mi2.csv', 'w') fh.write(data) fh.close() .. ipython:: python print(open('mi2.csv').read()) - pd.read_csv('mi2.csv',header=[0,1],index_col=0) + pd.read_csv('mi2.csv', header=[0, 1], index_col=0) Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it -with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will be *lost*. +with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will be *lost*. .. ipython:: python :suppress: @@ -1578,7 +1591,7 @@ Writing out Data Writing to CSV format +++++++++++++++++++++ -The Series and DataFrame objects have an instance method ``to_csv`` which +The ``Series`` and ``DataFrame`` objects have an instance method ``to_csv`` which allows storing the contents of the object as a comma-separated-values file. The function takes a number of arguments. Only the first is required. @@ -1591,7 +1604,7 @@ function takes a number of arguments. Only the first is required. - ``index``: whether to write row (index) names (default True) - ``index_label``: Column label(s) for index column(s) if desired. If None (default), and `header` and `index` are True, then the index names are - used. (A sequence should be given if the DataFrame uses MultiIndex). + used. (A sequence should be given if the ``DataFrame`` uses MultiIndex). - ``mode`` : Python write mode, default 'w' - ``encoding``: a string representing the encoding to use if the contents are non-ASCII, for Python versions prior to 3 @@ -1611,7 +1624,7 @@ Writing a formatted string .. _io.formatting: -The DataFrame object has an instance method ``to_string`` which allows control +The ``DataFrame`` object has an instance method ``to_string`` which allows control over the string representation of the object. All arguments are optional: - ``buf`` default None, for example a StringIO object @@ -1622,8 +1635,8 @@ over the string representation of the object. All arguments are optional: which takes a single argument and returns a formatted string - ``float_format`` default None, a function which takes a single (float) argument and returns a formatted string; to be applied to floats in the - DataFrame. - - ``sparsify`` default True, set to False for a DataFrame with a hierarchical + ``DataFrame``. + - ``sparsify`` default True, set to False for a ``DataFrame`` with a hierarchical index to print every multiindex key at each row. - ``index_names`` default True, will print the names of the indices - ``index`` default True, will print the index (ie, row labels) @@ -1631,7 +1644,7 @@ over the string representation of the object. All arguments are optional: - ``justify`` default ``left``, will print column headers left- or right-justified -The Series object also has a ``to_string`` method, but with only the ``buf``, +The ``Series`` object also has a ``to_string`` method, but with only the ``buf``, ``na_rep``, ``float_format`` arguments. There is also a ``length`` argument which, if set to ``True``, will additionally output the length of the Series. @@ -1654,11 +1667,11 @@ with optional parameters: This can be ``None`` in which case a JSON string is returned - ``orient`` : - Series : + ``Series``: - default is ``index`` - allowed values are {``split``, ``records``, ``index``} - DataFrame + ``DataFrame``: - default is ``columns`` - allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``} @@ -1693,7 +1706,7 @@ Orient Options ++++++++++++++ There are a number of different options for the format of the resulting JSON -file / string. Consider the following DataFrame and Series: +file / string. Consider the following ``DataFrame`` and ``Series``: .. ipython:: python @@ -1720,8 +1733,8 @@ but the index labels are now primary: sjo.to_json(orient="index") **Record oriented** serializes the data to a JSON array of column -> value records, -index labels are not included. This is useful for passing DataFrame data to plotting -libraries, for example the JavaScript library d3.js: +index labels are not included. This is useful for passing ``DataFrame`` data to plotting +libraries, for example the JavaScript library ``d3.js``: .. ipython:: python @@ -1756,7 +1769,7 @@ preservation of metadata including but not limited to dtypes and index names. Date Handling +++++++++++++ -Writing in ISO date format +Writing in ISO date format: .. ipython:: python @@ -1766,21 +1779,21 @@ Writing in ISO date format json = dfd.to_json(date_format='iso') json -Writing in ISO date format, with microseconds +Writing in ISO date format, with microseconds: .. ipython:: python json = dfd.to_json(date_format='iso', date_unit='us') json -Epoch timestamps, in seconds +Epoch timestamps, in seconds: .. ipython:: python json = dfd.to_json(date_format='epoch', date_unit='s') json -Writing to a file, with a date index and a date column +Writing to a file, with a date index and a date column: .. ipython:: python @@ -1795,7 +1808,8 @@ Writing to a file, with a date index and a date column Fallback Behavior +++++++++++++++++ -If the JSON serializer cannot handle the container contents directly it will fallback in the following manner: +If the JSON serializer cannot handle the container contents directly it will +fall back in the following manner: - if the dtype is unsupported (e.g. ``np.complex``) then the ``default_handler``, if provided, will be called for each value, otherwise an exception is raised. @@ -1864,13 +1878,13 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` ``table``; adhering to the JSON `Table Schema`_ -- ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if False, then don't infer dtypes at all, default is True, apply only to the data -- ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is True -- ``convert_dates`` : a list of columns to parse for dates; If True, then try to parse date-like columns, default is True -- ``keep_default_dates`` : boolean, default True. If parsing dates, then parse the default date-like columns -- ``numpy`` : direct decoding to NumPy arrays. default is False; - Supports numeric data only, although labels may be non-numeric. Also note that the JSON ordering **MUST** be the same for each term if ``numpy=True`` -- ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality +- ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data. +- ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True`` +- ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``. +- ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns. +- ``numpy`` : direct decoding to NumPy arrays. default is ``False``; + Supports numeric data only, although labels may be non-numeric. Also note that the JSON ordering **MUST** be the same for each term if ``numpy=True``. +- ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality. - ``date_unit`` : string, the timestamp unit to detect if converting dates. Default None. By default the timestamp precision will be detected, if this is not desired then pass one of 's', 'ms', 'us' or 'ns' to force timestamp precision to @@ -1888,9 +1902,11 @@ overview. Data Conversion +++++++++++++++ -The default of ``convert_axes=True``, ``dtype=True``, and ``convert_dates=True`` will try to parse the axes, and all of the data -into appropriate types, including dates. If you need to override specific dtypes, pass a dict to ``dtype``. ``convert_axes`` should only -be set to ``False`` if you need to preserve string-like numbers (e.g. '1', '2') in an axes. +The default of ``convert_axes=True``, ``dtype=True``, and ``convert_dates=True`` +will try to parse the axes, and all of the data into appropriate types, +including dates. If you need to override specific dtypes, pass a dict to +``dtype``. ``convert_axes`` should only be set to ``False`` if you need to +preserve string-like numbers (e.g. '1', '2') in an axes. .. note:: @@ -2175,7 +2191,7 @@ A few notes on the generated table schema: - Periods are converted to timestamps before serialization, and so have the same behavior of being converted to UTC. In addition, periods will contain - and additional field ``freq`` with the period's frequency, e.g. ``'A-DEC'`` + and additional field ``freq`` with the period's frequency, e.g. ``'A-DEC'``. .. ipython:: python @@ -2184,7 +2200,7 @@ A few notes on the generated table schema: build_table_schema(s_per) - Categoricals use the ``any`` type and an ``enum`` constraint listing - the set of possible values. Additionally, an ``ordered`` field is included + the set of possible values. Additionally, an ``ordered`` field is included: .. ipython:: python @@ -2212,7 +2228,7 @@ A few notes on the generated table schema: + For series, the ``object.name`` is used. If that's none, then the name is ``values`` - + For DataFrames, the stringified version of the column name is used + + For ``DataFrames``, the stringified version of the column name is used + For ``Index`` (not ``MultiIndex``), ``index.name`` is used, with a fallback to ``index`` if that is None. + For ``MultiIndex``, ``mi.names`` is used. If any level has no name, @@ -2268,15 +2284,15 @@ Reading HTML Content below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. The top-level :func:`~pandas.io.html.read_html` function can accept an HTML -string/file/URL and will parse HTML tables into list of pandas DataFrames. +string/file/URL and will parse HTML tables into list of pandas ``DataFrames``. Let's look at a few examples. .. note:: ``read_html`` returns a ``list`` of ``DataFrame`` objects, even if there is - only a single table contained in the HTML content + only a single table contained in the HTML content. -Read a URL with no options +Read a URL with no options: .. ipython:: python @@ -2290,7 +2306,7 @@ Read a URL with no options and the data below may be slightly different. Read in the content of the file from the above URL and pass it to ``read_html`` -as a string +as a string: .. ipython:: python :suppress: @@ -2304,7 +2320,7 @@ as a string dfs = pd.read_html(f.read()) dfs -You can even pass in an instance of ``StringIO`` if you so desire +You can even pass in an instance of ``StringIO`` if you so desire: .. ipython:: python @@ -2323,7 +2339,7 @@ You can even pass in an instance of ``StringIO`` if you so desire `__. -Read a URL and match a table that contains specific text +Read a URL and match a table that contains specific text: .. code-block:: python @@ -2339,26 +2355,26 @@ from the data minus the parsed header elements (`` - + @@ -30,10 +30,19 @@ + + + + + @@ -44,8 +53,16 @@ + + + + @@ -106,31 +123,31 @@ Here are just a few of the things that pandas does well: moving window linear regressions, date shifting and lagging, etc. - [missing-data]: http://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data - [insertion-deletion]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion - [alignment]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures - [groupby]: http://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine - [conversion]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe - [slicing]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges - [fancy-indexing]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix - [subsetting]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing - [merging]: http://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging - [joining]: http://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index - [reshape]: http://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables - [pivot-table]: http://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations - [mi]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex - [flat-files]: http://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files - [excel]: http://pandas.pydata.org/pandas-docs/stable/io.html#excel-files - [db]: http://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries - [hdfstore]: http://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables - [timeseries]: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality + [missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data + [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion + [alignment]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures + [groupby]: https://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine + [conversion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe + [slicing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges + [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix + [subsetting]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing + [merging]: https://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging + [joining]: https://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index + [reshape]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables + [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations + [mi]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex + [flat-files]: https://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files + [excel]: https://pandas.pydata.org/pandas-docs/stable/io.html#excel-files + [db]: https://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries + [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables + [timeseries]: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality ## Where to get it The source code is currently hosted on GitHub at: -http://github.com/pandas-dev/pandas +https://github.com/pandas-dev/pandas Binary installers for the latest released version are available at the [Python -package index](http://pypi.python.org/pypi/pandas/) and on conda. +package index](https://pypi.python.org/pypi/pandas) and on conda. ```sh # conda @@ -143,12 +160,11 @@ pip install pandas ``` ## Dependencies -- [NumPy](http://www.numpy.org): 1.7.0 or higher -- [python-dateutil](http://labix.org/python-dateutil): 1.5 or higher -- [pytz](http://pytz.sourceforge.net) - - Needed for time zone support with ``pandas.date_range`` +- [NumPy](http://www.numpy.org): 1.9.0 or higher +- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher +- [pytz](https://pythonhosted.org/pytz): 2011k or higher -See the [full installation instructions](http://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) +See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for recommended and optional dependencies. ## Installation from sources @@ -180,32 +196,36 @@ mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs pip install -e . ``` -On Windows, you will need to install MinGW and execute: - -```sh -python setup.py build --compiler=mingw32 -python setup.py install -``` - -See http://pandas.pydata.org/ for more information. +See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source). ## License -BSD +[BSD 3](LICENSE) ## Documentation -The official documentation is hosted on PyData.org: http://pandas.pydata.org/ - -The Sphinx documentation should provide a good starting point for learning how -to use the library. Expect the docs to continue to expand as time goes on. +The official documentation is hosted on PyData.org: https://pandas.pydata.org/pandas-docs/stable ## Background Work on ``pandas`` started at AQR (a quantitative hedge fund) in 2008 and has been under active development since then. +## Getting Help + +For usage questions, the best place to go to is [StackOverflow](https://stackoverflow.com/questions/tagged/pandas). +Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata). + ## Discussion and Development -Since pandas development is related to a number of other scientific -Python projects, questions are welcome on the scipy-user mailing -list. Specialized discussions or design issues should take place on -the PyData mailing list / Google group: +Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions. + +## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas) + +All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. + +A detailed overview on how to contribute can be found in the **[contributing guide.](https://pandas.pydata.org/pandas-docs/stable/contributing.html)** + +If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out. + +You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas). + +Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it! -https://groups.google.com/forum/#!forum/pydata +Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). diff --git a/RELEASE.md b/RELEASE.md index a181412be2719..efd075dabcba9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,6 @@ Release Notes ============= -The list of changes to pandas between each release can be found +The list of changes to Pandas between each release can be found [here](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html). For full details, see the commit logs at http://github.com/pandas-dev/pandas. diff --git a/appveyor.yml b/appveyor.yml index 42c3be13af809..ba001208864a8 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -11,32 +11,26 @@ matrix: environment: global: # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the - # /E:ON and /V:ON options are not enabled in the batch script intepreter + # /E:ON and /V:ON options are not enabled in the batch script interpreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\run_with_env.cmd" clone_folder: C:\projects\pandas + PANDAS_TESTING_MODE: "deprecate" matrix: - - CONDA_ROOT: "C:\\Miniconda3.5_64" + - CONDA_ROOT: "C:\\Miniconda3_64" PYTHON_VERSION: "3.6" PYTHON_ARCH: "64" CONDA_PY: "36" - CONDA_NPY: "111" + CONDA_NPY: "113" - - CONDA_ROOT: "C:\\Miniconda3.5_64" + - CONDA_ROOT: "C:\\Miniconda3_64" PYTHON_VERSION: "2.7" PYTHON_ARCH: "64" CONDA_PY: "27" CONDA_NPY: "110" - - CONDA_ROOT: "C:\\Miniconda3.5_64" - PYTHON_VERSION: "3.5" - PYTHON_ARCH: "64" - CONDA_PY: "35" - CONDA_NPY: "111" - - # We always use a 64-bit machine, but can build x86 distributions # with the PYTHON_ARCH variable (which is used by CMD_IN_ENV). platform: @@ -66,8 +60,7 @@ install: # install our build environment - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false - #- cmd: conda update -q conda - - cmd: conda install conda=4.2.15 + - cmd: conda update -q conda - cmd: conda config --set ssl_verify false # add the pandas channel *before* defaults to have defaults take priority @@ -79,23 +72,25 @@ install: # this is now the downloaded conda... - cmd: conda info -a - # build em using the local source checkout in the correct windows env - - cmd: '%CMD_IN_ENV% conda build ci\appveyor.recipe -q' - # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% nose pytest + - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist - cmd: activate pandas - - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run + - cmd: pip install moto + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" - - cmd: conda install -n pandas -q --file=%REQ% + - cmd: conda install -n pandas --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" - - ps: conda install -n pandas (conda build ci\appveyor.recipe -q --output) + + # add some pip only reqs to the env + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip + - cmd: echo "installing requirements from %REQ%" + - cmd: pip install -Ur %REQ% + + # build em using the local source checkout in the correct windows env + - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' test_script: # tests - cmd: activate pandas - - cmd: conda list - - cmd: cd \ - - cmd: python -c "import pandas; pandas.test(['--skip-slow', '--skip-network'])" - + - cmd: test.bat diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 155deb5bdbd1f..9c333f62810f4 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -26,7 +26,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.4"], - "pythons": ["2.7"], + "pythons": ["3.6"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty @@ -46,11 +46,14 @@ "numexpr": [], "pytables": [null, ""], // platform dependent, see excludes below "tables": [null, ""], - "libpython": [null, ""], "openpyxl": [], "xlsxwriter": [], "xlrd": [], - "xlwt": [] + "xlwt": [], + "pytest": [], + // If using Windows with python 2.7 and want to build using the + // mingw toolchain (rather than MSVC), uncomment the following line. + // "libpython": [], }, // Combinations of libraries/python versions can be excluded/included @@ -79,10 +82,6 @@ {"environment_type": "conda", "pytables": null}, {"environment_type": "(?!conda).*", "tables": null}, {"environment_type": "(?!conda).*", "pytables": ""}, - // On conda&win32, install libpython - {"sys_platform": "(?!win32).*", "libpython": ""}, - {"environment_type": "conda", "sys_platform": "win32", "libpython": null}, - {"environment_type": "(?!conda).*", "libpython": ""} ], "include": [], @@ -118,8 +117,10 @@ // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // - // "regressions_first_commits": { - // "some_benchmark": "352cdf", // Consider regressions only after this commit - // "another_benchmark": null, // Skip regression detection altogether - // } + "regressions_first_commits": { + ".*": "v0.20.0" + }, + "regression_thresholds": { + ".*": 0.05 + } } diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index fe657936c403e..cccd38ef11251 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -1,115 +1,128 @@ +import warnings +from importlib import import_module + import numpy as np import pandas as pd from pandas.util import testing as tm +for imp in ['pandas.util', 'pandas.tools.hashing']: + try: + hashing = import_module(imp) + break + except: + pass + +from .pandas_vb_common import setup # noqa + + +class Factorize(object): -class Algorithms(object): goal_time = 0.2 - def setup(self): - N = 100000 - np.random.seed(1234) + params = [True, False] + param_names = ['sort'] - self.int_unique = pd.Int64Index(np.arange(N * 5)) - # cache is_unique - self.int_unique.is_unique + def setup(self, sort): + N = 10**5 + self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) + self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) + self.string_idx = tm.makeStringIndex(N) - self.int = pd.Int64Index(np.arange(N).repeat(5)) - self.float = pd.Float64Index(np.random.randn(N).repeat(5)) + def time_factorize_int(self, sort): + self.int_idx.factorize(sort=sort) - # Convenience naming. - self.checked_add = pd.core.algorithms.checked_add_with_arr + def time_factorize_float(self, sort): + self.float_idx.factorize(sort=sort) - self.arr = np.arange(1000000) - self.arrpos = np.arange(1000000) - self.arrneg = np.arange(-1000000, 0) - self.arrmixed = np.array([1, -1]).repeat(500000) - self.strings = tm.makeStringIndex(100000) + def time_factorize_string(self, sort): + self.string_idx.factorize(sort=sort) - self.arr_nan = np.random.choice([True, False], size=1000000) - self.arrmixed_nan = np.random.choice([True, False], size=1000000) - # match - self.uniques = tm.makeStringIndex(1000).values - self.all = self.uniques.repeat(10) +class Duplicated(object): - def time_factorize_string(self): - self.strings.factorize() + goal_time = 0.2 - def time_factorize_int(self): - self.int.factorize() + params = ['first', 'last', False] + param_names = ['keep'] - def time_factorize_float(self): - self.int.factorize() + def setup(self, keep): + N = 10**5 + self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) + self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) + self.string_idx = tm.makeStringIndex(N) - def time_duplicated_int_unique(self): - self.int_unique.duplicated() + def time_duplicated_int(self, keep): + self.int_idx.duplicated(keep=keep) - def time_duplicated_int(self): - self.int.duplicated() + def time_duplicated_float(self, keep): + self.float_idx.duplicated(keep=keep) - def time_duplicated_float(self): - self.float.duplicated() + def time_duplicated_string(self, keep): + self.string_idx.duplicated(keep=keep) - def time_match_strings(self): - pd.match(self.all, self.uniques) - def time_add_overflow_pos_scalar(self): - self.checked_add(self.arr, 1) +class DuplicatedUniqueIndex(object): - def time_add_overflow_neg_scalar(self): - self.checked_add(self.arr, -1) + goal_time = 0.2 - def time_add_overflow_zero_scalar(self): - self.checked_add(self.arr, 0) + def setup(self): + N = 10**5 + self.idx_int_dup = pd.Int64Index(np.arange(N * 5)) + # cache is_unique + self.idx_int_dup.is_unique - def time_add_overflow_pos_arr(self): - self.checked_add(self.arr, self.arrpos) + def time_duplicated_unique_int(self): + self.idx_int_dup.duplicated() - def time_add_overflow_neg_arr(self): - self.checked_add(self.arr, self.arrneg) - def time_add_overflow_mixed_arr(self): - self.checked_add(self.arr, self.arrmixed) +class Match(object): - def time_add_overflow_first_arg_nan(self): - self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan) + goal_time = 0.2 - def time_add_overflow_second_arg_nan(self): - self.checked_add(self.arr, self.arrmixed, b_mask=self.arrmixed_nan) + def setup(self): + self.uniques = tm.makeStringIndex(1000).values + self.all = self.uniques.repeat(10) - def time_add_overflow_both_arg_nan(self): - self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan, - b_mask=self.arrmixed_nan) + def time_match_string(self): + with warnings.catch_warnings(record=True): + pd.match(self.all, self.uniques) class Hashing(object): + goal_time = 0.2 - def setup(self): - N = 100000 - - self.df = pd.DataFrame( - {'A': pd.Series(tm.makeStringIndex(100).take( - np.random.randint(0, 100, size=N))), - 'B': pd.Series(tm.makeStringIndex(10000).take( - np.random.randint(0, 10000, size=N))), - 'D': np.random.randn(N), - 'E': np.arange(N), - 'F': pd.date_range('20110101', freq='s', periods=N), - 'G': pd.timedelta_range('1 day', freq='s', periods=N), - }) - self.df['C'] = self.df['B'].astype('category') - self.df.iloc[10:20] = np.nan - - def time_frame(self): - self.df.hash() - - def time_series_int(self): - self.df.E.hash() - - def time_series_string(self): - self.df.B.hash() - - def time_series_categorical(self): - self.df.C.hash() + def setup_cache(self): + N = 10**5 + + df = pd.DataFrame( + {'strings': pd.Series(tm.makeStringIndex(10000).take( + np.random.randint(0, 10000, size=N))), + 'floats': np.random.randn(N), + 'ints': np.arange(N), + 'dates': pd.date_range('20110101', freq='s', periods=N), + 'timedeltas': pd.timedelta_range('1 day', freq='s', periods=N)}) + df['categories'] = df['strings'].astype('category') + df.iloc[10:20] = np.nan + return df + + def time_frame(self, df): + hashing.hash_pandas_object(df) + + def time_series_int(self, df): + hashing.hash_pandas_object(df['ints']) + + def time_series_string(self, df): + hashing.hash_pandas_object(df['strings']) + + def time_series_float(self, df): + hashing.hash_pandas_object(df['floats']) + + def time_series_categorical(self, df): + hashing.hash_pandas_object(df['categories']) + + def time_series_timedeltas(self, df): + hashing.hash_pandas_object(df['timedeltas']) + + def time_series_dates(self, df): + hashing.hash_pandas_object(df['dates']) diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py index 9210f1f2878d4..48f0b7d71144c 100644 --- a/asv_bench/benchmarks/attrs_caching.py +++ b/asv_bench/benchmarks/attrs_caching.py @@ -1,8 +1,15 @@ -from .pandas_vb_common import * -from pandas.util.decorators import cache_readonly +import numpy as np +from pandas import DataFrame +try: + from pandas.util import cache_readonly +except ImportError: + from pandas.util.decorators import cache_readonly + +from .pandas_vb_common import setup # noqa class DataFrameAttributes(object): + goal_time = 0.2 def setup(self): @@ -17,6 +24,7 @@ def time_set_index(self): class CacheReadonly(object): + goal_time = 0.2 def setup(self): diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 53cb1cf465698..cc8766e1fa39c 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -1,8 +1,16 @@ -from .pandas_vb_common import * -import pandas.computation.expressions as expr +import numpy as np +from pandas import DataFrame, Series, date_range +from pandas.core.algorithms import checked_add_with_arr +try: + import pandas.core.computation.expressions as expr +except ImportError: + import pandas.computation.expressions as expr + +from .pandas_vb_common import setup # noqa class Ops(object): + goal_time = 0.2 params = [[True, False], ['default', 1]] @@ -17,18 +25,17 @@ def setup(self, use_numexpr, threads): if not use_numexpr: expr.set_use_numexpr(False) - def time_frame_add(self, use_numexpr, threads): - (self.df + self.df2) + self.df + self.df2 def time_frame_mult(self, use_numexpr, threads): - (self.df * self.df2) + self.df * self.df2 def time_frame_multi_and(self, use_numexpr, threads): - self.df[((self.df > 0) & (self.df2 > 0))] + self.df[(self.df > 0) & (self.df2 > 0)] def time_frame_comparison(self, use_numexpr, threads): - (self.df > self.df2) + self.df > self.df2 def teardown(self, use_numexpr, threads): expr.set_use_numexpr(True) @@ -36,75 +43,109 @@ def teardown(self, use_numexpr, threads): class Ops2(object): + goal_time = 0.2 def setup(self): - self.df = DataFrame(np.random.randn(1000, 1000)) - self.df2 = DataFrame(np.random.randn(1000, 1000)) + N = 10**3 + self.df = DataFrame(np.random.randn(N, N)) + self.df2 = DataFrame(np.random.randn(N, N)) - self.df_int = DataFrame( - np.random.random_integers(np.iinfo(np.int16).min, - np.iinfo(np.int16).max, - size=(1000, 1000))) - self.df2_int = DataFrame( - np.random.random_integers(np.iinfo(np.int16).min, - np.iinfo(np.int16).max, - size=(1000, 1000))) + self.df_int = DataFrame(np.random.randint(np.iinfo(np.int16).min, + np.iinfo(np.int16).max, + size=(N, N))) + self.df2_int = DataFrame(np.random.randint(np.iinfo(np.int16).min, + np.iinfo(np.int16).max, + size=(N, N))) - ## Division + # Division def time_frame_float_div(self): - (self.df // self.df2) + self.df // self.df2 def time_frame_float_div_by_zero(self): - (self.df / 0) + self.df / 0 def time_frame_float_floor_by_zero(self): - (self.df // 0) + self.df // 0 def time_frame_int_div_by_zero(self): - (self.df_int / 0) + self.df_int / 0 - ## Modulo + # Modulo def time_frame_int_mod(self): - (self.df / self.df2) + self.df_int % self.df2_int def time_frame_float_mod(self): - (self.df / self.df2) + self.df % self.df2 class Timeseries(object): + goal_time = 0.2 - def setup(self): - self.N = 1000000 - self.halfway = ((self.N // 2) - 1) - self.s = Series(date_range('20010101', periods=self.N, freq='T')) - self.ts = self.s[self.halfway] + params = [None, 'US/Eastern'] + param_names = ['tz'] - self.s2 = Series(date_range('20010101', periods=self.N, freq='s')) + def setup(self, tz): + N = 10**6 + halfway = (N // 2) - 1 + self.s = Series(date_range('20010101', periods=N, freq='T', tz=tz)) + self.ts = self.s[halfway] - def time_series_timestamp_compare(self): - (self.s <= self.ts) + self.s2 = Series(date_range('20010101', periods=N, freq='s', tz=tz)) - def time_timestamp_series_compare(self): - (self.ts >= self.s) + def time_series_timestamp_compare(self, tz): + self.s <= self.ts - def time_timestamp_ops_diff1(self): + def time_timestamp_series_compare(self, tz): + self.ts >= self.s + + def time_timestamp_ops_diff(self, tz): self.s2.diff() - def time_timestamp_ops_diff2(self): - (self.s - self.s.shift()) + def time_timestamp_ops_diff_with_shift(self, tz): + self.s - self.s.shift() + +class AddOverflowScalar(object): + goal_time = 0.2 -class TimeseriesTZ(Timeseries): + params = [1, -1, 0] + param_names = ['scalar'] + + def setup(self, scalar): + N = 10**6 + self.arr = np.arange(N) + + def time_add_overflow_scalar(self, scalar): + checked_add_with_arr(self.arr, scalar) - def setup(self): - self.N = 1000000 - self.halfway = ((self.N // 2) - 1) - self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern')) - self.ts = self.s[self.halfway] - self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern')) \ No newline at end of file +class AddOverflowArray(object): + + goal_time = 0.2 + + def setup(self): + N = 10**6 + self.arr = np.arange(N) + self.arr_rev = np.arange(-N, 0) + self.arr_mixed = np.array([1, -1]).repeat(N / 2) + self.arr_nan_1 = np.random.choice([True, False], size=N) + self.arr_nan_2 = np.random.choice([True, False], size=N) + + def time_add_overflow_arr_rev(self): + checked_add_with_arr(self.arr, self.arr_rev) + + def time_add_overflow_arr_mask_nan(self): + checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1) + + def time_add_overflow_b_mask_nan(self): + checked_add_with_arr(self.arr, self.arr_mixed, + b_mask=self.arr_nan_1) + + def time_add_overflow_both_arg_nan(self): + checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, + b_mask=self.arr_nan_2) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index cca652c68cf15..7743921003353 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -1,65 +1,150 @@ -from .pandas_vb_common import * +import warnings + +import numpy as np +import pandas as pd +import pandas.util.testing as tm try: - from pandas.types.concat import union_categoricals + from pandas.api.types import union_categoricals except ImportError: - pass + try: + from pandas.types.concat import union_categoricals + except ImportError: + pass + +from .pandas_vb_common import setup # noqa + +class Concat(object): -class Categoricals(object): goal_time = 0.2 def setup(self): - N = 100000 - self.s = pd.Series((list('aabbcd') * N)).astype('category') + N = 10**5 + self.s = pd.Series(list('aabbcd') * N).astype('category') + + self.a = pd.Categorical(list('aabbcd') * N) + self.b = pd.Categorical(list('bbcdjk') * N) + + def time_concat(self): + pd.concat([self.s, self.s]) - self.a = pd.Categorical((list('aabbcd') * N)) - self.b = pd.Categorical((list('bbcdjk') * N)) + def time_union(self): + union_categoricals([self.a, self.b]) + + +class Constructor(object): + + goal_time = 0.2 + def setup(self): + N = 10**5 self.categories = list('abcde') - self.cat_idx = Index(self.categories) + self.cat_idx = pd.Index(self.categories) self.values = np.tile(self.categories, N) self.codes = np.tile(range(len(self.categories)), N) - self.datetimes = pd.Series(pd.date_range( - '1995-01-01 00:00:00', periods=10000, freq='s')) + self.datetimes = pd.Series(pd.date_range('1995-01-01 00:00:00', + periods=N / 10, + freq='s')) + self.datetimes_with_nat = self.datetimes.copy() + self.datetimes_with_nat.iloc[-1] = pd.NaT - def time_concat(self): - concat([self.s, self.s]) + self.values_some_nan = list(np.tile(self.categories + [np.nan], N)) + self.values_all_nan = [np.nan] * len(self.values) - def time_union(self): - union_categoricals([self.a, self.b]) + def time_regular(self): + pd.Categorical(self.values, self.categories) + + def time_fastpath(self): + pd.Categorical(self.codes, self.cat_idx, fastpath=True) + + def time_datetimes(self): + pd.Categorical(self.datetimes) - def time_constructor_regular(self): - Categorical(self.values, self.categories) + def time_datetimes_with_nat(self): + pd.Categorical(self.datetimes_with_nat) - def time_constructor_fastpath(self): - Categorical(self.codes, self.cat_idx, fastpath=True) + def time_with_nan(self): + pd.Categorical(self.values_some_nan) - def time_constructor_datetimes(self): - Categorical(self.datetimes) + def time_all_nan(self): + pd.Categorical(self.values_all_nan) - def time_constructor_datetimes_with_nat(self): - t = self.datetimes - t.iloc[-1] = pd.NaT - Categorical(t) +class ValueCounts(object): -class Categoricals2(object): goal_time = 0.2 - def setup(self): - n = 500000 - np.random.seed(2718281) + params = [True, False] + param_names = ['dropna'] + + def setup(self, dropna): + n = 5 * 10**5 arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] - self.ts = Series(arr).astype('category') + self.ts = pd.Series(arr).astype('category') + + def time_value_counts(self, dropna): + self.ts.value_counts(dropna=dropna) - self.sel = self.ts.loc[[0]] - def time_value_counts(self): - self.ts.value_counts(dropna=False) +class Repr(object): + + goal_time = 0.2 - def time_value_counts_dropna(self): - self.ts.value_counts(dropna=True) + def setup(self): + self.sel = pd.Series(['s1234']).astype('category') def time_rendering(self): str(self.sel) + + +class SetCategories(object): + + goal_time = 0.2 + + def setup(self): + n = 5 * 10**5 + arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] + self.ts = pd.Series(arr).astype('category') + + def time_set_categories(self): + self.ts.cat.set_categories(self.ts.cat.categories[::2]) + + +class Rank(object): + + goal_time = 0.2 + + def setup(self): + N = 10**5 + ncats = 100 + + self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) + self.s_str_cat = self.s_str.astype('category') + with warnings.catch_warnings(record=True): + self.s_str_cat_ordered = self.s_str.astype('category', + ordered=True) + + self.s_int = pd.Series(np.random.randint(0, ncats, size=N)) + self.s_int_cat = self.s_int.astype('category') + with warnings.catch_warnings(record=True): + self.s_int_cat_ordered = self.s_int.astype('category', + ordered=True) + + def time_rank_string(self): + self.s_str.rank() + + def time_rank_string_cat(self): + self.s_str_cat.rank() + + def time_rank_string_cat_ordered(self): + self.s_str_cat_ordered.rank() + + def time_rank_int(self): + self.s_int.rank() + + def time_rank_int_cat(self): + self.s_int_cat.rank() + + def time_rank_int_cat_ordered(self): + self.s_int_cat_ordered.rank() diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py index b5694a3a21502..3f9016787aab4 100644 --- a/asv_bench/benchmarks/ctors.py +++ b/asv_bench/benchmarks/ctors.py @@ -1,30 +1,66 @@ -from .pandas_vb_common import * +import numpy as np +import pandas.util.testing as tm +from pandas import Series, Index, DatetimeIndex, Timestamp, MultiIndex +from .pandas_vb_common import setup # noqa + + +class SeriesConstructors(object): -class Constructors(object): goal_time = 0.2 - def setup(self): - self.arr = np.random.randn(100, 100) - self.arr_str = np.array(['foo', 'bar', 'baz'], dtype=object) + param_names = ["data_fmt", "with_index"] + params = [[lambda x: x, + list, + lambda arr: list(arr.astype(str)), + lambda arr: dict(zip(range(len(arr)), arr)), + lambda arr: [(i, -i) for i in arr], + lambda arr: [[i, -i] for i in arr], + lambda arr: ([(i, -i) for i in arr][:-1] + [None]), + lambda arr: ([[i, -i] for i in arr][:-1] + [None])], + [False, True]] + + def setup(self, data_fmt, with_index): + N = 10**4 + arr = np.random.randn(N) + self.data = data_fmt(arr) + self.index = np.arange(N) if with_index else None - self.data = np.random.randn(100) - self.index = Index(np.arange(100)) + def time_series_constructor(self, data_fmt, with_index): + Series(self.data, index=self.index) - self.s = Series(([Timestamp('20110101'), Timestamp('20120101'), - Timestamp('20130101')] * 1000)) - def time_frame_from_ndarray(self): - DataFrame(self.arr) +class SeriesDtypesConstructors(object): - def time_series_from_ndarray(self): - pd.Series(self.data, index=self.index) + goal_time = 0.2 + + def setup(self): + N = 10**4 + self.arr = np.random.randn(N, N) + self.arr_str = np.array(['foo', 'bar', 'baz'], dtype=object) + self.s = Series([Timestamp('20110101'), Timestamp('20120101'), + Timestamp('20130101')] * N * 10) def time_index_from_array_string(self): Index(self.arr_str) + def time_index_from_array_floats(self): + Index(self.arr) + def time_dtindex_from_series(self): DatetimeIndex(self.s) - def time_dtindex_from_series2(self): + def time_dtindex_from_index_with_series(self): Index(self.s) + + +class MultiIndexConstructor(object): + + goal_time = 0.2 + + def setup(self): + N = 10**4 + self.iterables = [tm.makeStringIndex(N), range(20)] + + def time_multiindex_from_iterables(self): + MultiIndex.from_product(self.iterables) diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py index a0819e33dc254..8e581dcf22b4c 100644 --- a/asv_bench/benchmarks/eval.py +++ b/asv_bench/benchmarks/eval.py @@ -1,67 +1,67 @@ -from .pandas_vb_common import * +import numpy as np import pandas as pd -import pandas.computation.expressions as expr +try: + import pandas.core.computation.expressions as expr +except ImportError: + import pandas.computation.expressions as expr + +from .pandas_vb_common import setup # noqa class Eval(object): + goal_time = 0.2 params = [['numexpr', 'python'], [1, 'all']] param_names = ['engine', 'threads'] def setup(self, engine, threads): - self.df = DataFrame(np.random.randn(20000, 100)) - self.df2 = DataFrame(np.random.randn(20000, 100)) - self.df3 = DataFrame(np.random.randn(20000, 100)) - self.df4 = DataFrame(np.random.randn(20000, 100)) + self.df = pd.DataFrame(np.random.randn(20000, 100)) + self.df2 = pd.DataFrame(np.random.randn(20000, 100)) + self.df3 = pd.DataFrame(np.random.randn(20000, 100)) + self.df4 = pd.DataFrame(np.random.randn(20000, 100)) if threads == 1: expr.set_numexpr_threads(1) def time_add(self, engine, threads): - df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4 - pd.eval('df + df2 + df3 + df4', engine=engine) + pd.eval('self.df + self.df2 + self.df3 + self.df4', engine=engine) def time_and(self, engine, threads): - df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4 - pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine=engine) + pd.eval('(self.df > 0) & (self.df2 > 0) & ' + '(self.df3 > 0) & (self.df4 > 0)', engine=engine) def time_chained_cmp(self, engine, threads): - df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4 - pd.eval('df < df2 < df3 < df4', engine=engine) + pd.eval('self.df < self.df2 < self.df3 < self.df4', engine=engine) def time_mult(self, engine, threads): - df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4 - pd.eval('df * df2 * df3 * df4', engine=engine) + pd.eval('self.df * self.df2 * self.df3 * self.df4', engine=engine) def teardown(self, engine, threads): expr.set_numexpr_threads() class Query(object): + goal_time = 0.2 def setup(self): - self.N = 1000000 - self.halfway = ((self.N // 2) - 1) - self.index = date_range('20010101', periods=self.N, freq='T') - self.s = Series(self.index) - self.ts = self.s.iloc[self.halfway] - self.df = DataFrame({'a': np.random.randn(self.N), }, index=self.index) - self.df2 = DataFrame({'dates': self.s.values,}) - - self.df3 = DataFrame({'a': np.random.randn(self.N),}) - self.min_val = self.df3['a'].min() - self.max_val = self.df3['a'].max() + N = 10**6 + halfway = (N // 2) - 1 + index = pd.date_range('20010101', periods=N, freq='T') + s = pd.Series(index) + self.ts = s.iloc[halfway] + self.df = pd.DataFrame({'a': np.random.randn(N), 'dates': s}, + index=index) + data = np.random.randn(N) + self.min_val = data.min() + self.max_val = data.max() def time_query_datetime_index(self): - ts = self.ts - self.df.query('index < @ts') + self.df.query('index < @self.ts') - def time_query_datetime_series(self): - ts = self.ts - self.df2.query('dates < @ts') + def time_query_datetime_column(self): + self.df.query('dates < @self.ts') def time_query_with_boolean_selection(self): - min_val, max_val = self.min_val, self.max_val - self.df.query('(a >= @min_val) & (a <= @max_val)') + self.df.query('(a >= @self.min_val) & (a <= @self.max_val)') diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 05c1a27fdf8ca..21b20cb123ed6 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -1,138 +1,95 @@ -from .pandas_vb_common import * +import numpy as np +import pandas.util.testing as tm +from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range try: - from pandas.tseries.offsets import * -except: - from pandas.core.datetools import * + from pandas.tseries.offsets import Nano, Hour +except ImportError: + # For compatibility with older versions + from pandas.core.datetools import * # noqa +from .pandas_vb_common import setup # noqa -#---------------------------------------------------------------------- -# Creation from nested dict class FromDicts(object): + goal_time = 0.2 def setup(self): - (N, K) = (5000, 50) - self.index = tm.makeStringIndex(N) - self.columns = tm.makeStringIndex(K) - self.frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns) - try: - self.data = self.frame.to_dict() - except: - self.data = self.frame.toDict() - self.some_dict = self.data.values()[0] - self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values] - - self.data2 = dict( - ((i, dict(((j, float(j)) for j in range(100)))) for i in - xrange(2000))) - - def time_frame_ctor_list_of_dict(self): + N, K = 5000, 50 + index = tm.makeStringIndex(N) + columns = tm.makeStringIndex(K) + frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) + self.data = frame.to_dict() + self.some_dict = list(self.data.values())[0] + self.dict_list = frame.to_dict(orient='records') + self.data2 = {i: {j: float(j) for j in range(100)} + for i in range(2000)} + + def time_list_of_dict(self): DataFrame(self.dict_list) - def time_frame_ctor_nested_dict(self): + def time_nested_dict(self): DataFrame(self.data) - def time_series_ctor_from_dict(self): + def time_dict(self): Series(self.some_dict) - def time_frame_ctor_nested_dict_int64(self): + def time_nested_dict_int64(self): # nested dict, integer indexes, regression described in #621 - DataFrame(self.data) + DataFrame(self.data2) -# from a mi-series +class FromSeries(object): -class frame_from_series(object): goal_time = 0.2 def setup(self): - self.mi = MultiIndex.from_tuples([(x, y) for x in range(100) for y in range(100)]) - self.s = Series(randn(10000), index=self.mi) + mi = MultiIndex.from_product([range(100), range(100)]) + self.s = Series(np.random.randn(10000), index=mi) - def time_frame_from_mi_series(self): + def time_mi_series(self): DataFrame(self.s) -#---------------------------------------------------------------------- -# get_numeric_data +class FromDictwithTimestamp(object): -class frame_get_numeric_data(object): goal_time = 0.2 + params = [Nano(1), Hour(1)] + param_names = ['offset'] - def setup(self): - self.df = DataFrame(randn(10000, 25)) - self.df['foo'] = 'bar' - self.df['bar'] = 'baz' - self.df = self.df.consolidate() - - def time_frame_get_numeric_data(self): - self.df._get_numeric_data() - - -# ---------------------------------------------------------------------- -# From dict with DatetimeIndex with all offsets - -# dynamically generate benchmarks for every offset -# -# get_period_count & get_index_for_offset are there because blindly taking each -# offset times 1000 can easily go out of Timestamp bounds and raise errors. - - -def get_period_count(start_date, off): - ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days - if (ten_offsets_in_days == 0): - return 1000 - else: - return min((9 * ((Timestamp.max - start_date).days // ten_offsets_in_days)), 1000) - - -def get_index_for_offset(off): - start_date = Timestamp('1/1/1900') - return date_range(start_date, periods=min(1000, get_period_count( - start_date, off)), freq=off) - - -all_offsets = offsets.__all__ -# extra cases -for off in ['FY5253', 'FY5253Quarter']: - all_offsets.pop(all_offsets.index(off)) - all_offsets.extend([off + '_1', off + '_2']) + def setup(self, offset): + N = 10**3 + np.random.seed(1234) + idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N) + df = DataFrame(np.random.randn(N, 10), index=idx) + self.d = df.to_dict() + def time_dict_with_timestamp_offsets(self, offset): + DataFrame(self.d) -class FrameConstructorDTIndexFromOffsets(object): - params = [all_offsets, [1, 2]] - param_names = ['offset', 'n_steps'] +class FromRecords(object): - offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1}, - 'LastWeekOfMonth': {'weekday': 1, 'week': 1}, - 'FY5253': {'startingMonth': 1, 'weekday': 1}, - 'FY5253Quarter': {'qtr_with_extra_week': 1, 'startingMonth': 1, 'weekday': 1}} + goal_time = 0.2 + params = [None, 1000] + param_names = ['nrows'] - offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']}, - 'FY5253Quarter': {'variation': ['nearest', 'last']}} + def setup(self, nrows): + N = 100000 + self.gen = ((x, (x * 20), (x * 100)) for x in range(N)) - def setup(self, offset, n_steps): + def time_frame_from_records_generator(self, nrows): + # issue-6700 + self.df = DataFrame.from_records(self.gen, nrows=nrows) - extra = False - if offset.endswith("_", None, -1): - extra = int(offset[-1]) - offset = offset[:-2] - kwargs = {} - if offset in self.offset_kwargs: - kwargs = self.offset_kwargs[offset] +class FromNDArray(object): - if extra: - extras = self.offset_extra_cases[offset] - for extra_arg in extras: - kwargs[extra_arg] = extras[extra_arg][extra -1] + goal_time = 0.2 - offset = getattr(offsets, offset) - self.idx = get_index_for_offset(offset(n_steps, **kwargs)) - self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx) - self.d = dict([(col, self.df[col]) for col in self.df.columns]) + def setup(self): + N = 100000 + self.data = np.random.randn(N) - def time_frame_ctor(self, offset, n_steps): - DataFrame(self.d) + def time_frame_from_ndarray(self): + self.df = DataFrame(self.data) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 9f491302a4d6f..4ff71c706cd34 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -1,20 +1,43 @@ -from .pandas_vb_common import * import string +import warnings +import numpy as np +import pandas.util.testing as tm +from pandas import (DataFrame, Series, MultiIndex, date_range, period_range, + isnull, NaT) -#---------------------------------------------------------------------- -# lookup +from .pandas_vb_common import setup # noqa + + +class GetNumericData(object): + + goal_time = 0.2 + + def setup(self): + self.df = DataFrame(np.random.randn(10000, 25)) + self.df['foo'] = 'bar' + self.df['bar'] = 'baz' + with warnings.catch_warnings(record=True): + self.df = self.df.consolidate() + + def time_frame_get_numeric_data(self): + self.df._get_numeric_data() + + +class Lookup(object): -class frame_fancy_lookup(object): goal_time = 0.2 def setup(self): - self.df = DataFrame(np.random.randn(10000, 8), columns=list('abcdefgh')) + self.df = DataFrame(np.random.randn(10000, 8), + columns=list('abcdefgh')) self.df['foo'] = 'bar' self.row_labels = list(self.df.index[::10])[:900] - self.col_labels = (list(self.df.columns) * 100) - self.row_labels_all = np.array((list(self.df.index) * len(self.df.columns)), dtype='object') - self.col_labels_all = np.array((list(self.df.columns) * len(self.df.index)), dtype='object') + self.col_labels = list(self.df.columns) * 100 + self.row_labels_all = np.array( + list(self.df.index) * len(self.df.columns), dtype='object') + self.col_labels_all = np.array( + list(self.df.columns) * len(self.df.index), dtype='object') def time_frame_fancy_lookup(self): self.df.lookup(self.row_labels, self.col_labels) @@ -23,25 +46,20 @@ def time_frame_fancy_lookup_all(self): self.df.lookup(self.row_labels_all, self.col_labels_all) -#---------------------------------------------------------------------- -# reindex - class Reindex(object): + goal_time = 0.2 def setup(self): - self.df = DataFrame(randn(10000, 1000)) - self.idx = np.arange(4000, 7000) - + N = 10**3 + self.df = DataFrame(np.random.randn(N * 10, N)) + self.idx = np.arange(4 * N, 7 * N) self.df2 = DataFrame( - dict([(c, {0: randint(0, 2, 1000).astype(np.bool_), - 1: randint(0, 1000, 1000).astype( - np.int16), - 2: randint(0, 1000, 1000).astype( - np.int32), - 3: randint(0, 1000, 1000).astype( - np.int64),}[randint(0, 4)]) for c in - range(1000)])) + {c: {0: np.random.randint(0, 2, N).astype(np.bool_), + 1: np.random.randint(0, N, N).astype(np.int16), + 2: np.random.randint(0, N, N).astype(np.int32), + 3: np.random.randint(0, N, N).astype(np.int64)} + [np.random.randint(0, 4)] for c in range(N)}) def time_reindex_axis0(self): self.df.reindex(self.idx) @@ -53,81 +71,86 @@ def time_reindex_both_axes(self): self.df.reindex(index=self.idx, columns=self.idx) def time_reindex_both_axes_ix(self): - self.df.ix[(self.idx, self.idx)] + self.df.ix[self.idx, self.idx] def time_reindex_upcast(self): - self.df2.reindex(permutation(range(1200))) + self.df2.reindex(np.random.permutation(range(1200))) -#---------------------------------------------------------------------- -# iteritems (monitor no-copying behaviour) - class Iteration(object): + goal_time = 0.2 def setup(self): - self.df = DataFrame(randn(10000, 1000)) - self.df2 = DataFrame(np.random.randn(50000, 10)) - self.df3 = pd.DataFrame(np.random.randn(1000,5000), - columns=['C'+str(c) for c in range(5000)]) + N = 1000 + self.df = DataFrame(np.random.randn(N * 10, N)) + self.df2 = DataFrame(np.random.randn(N * 50, 10)) + self.df3 = DataFrame(np.random.randn(N, 5 * N), + columns=['C' + str(c) for c in range(N * 5)]) - def f(self): + def time_iteritems(self): + # (monitor no-copying behaviour) if hasattr(self.df, '_item_cache'): self.df._item_cache.clear() - for (name, col) in self.df.iteritems(): - pass - - def g(self): - for (name, col) in self.df.iteritems(): + for name, col in self.df.iteritems(): pass - def time_iteritems(self): - self.f() - def time_iteritems_cached(self): - self.g() + for name, col in self.df.iteritems(): + pass def time_iteritems_indexing(self): - df = self.df3 - for col in df: - df[col] + for col in self.df3: + self.df3[col] def time_itertuples(self): for row in self.df2.itertuples(): pass + def time_iterrows(self): + for row in self.df.iterrows(): + pass -#---------------------------------------------------------------------- -# to_string, to_html, repr -class Formatting(object): +class ToString(object): + goal_time = 0.2 def setup(self): - self.df = DataFrame(randn(100, 10)) + self.df = DataFrame(np.random.randn(100, 10)) - self.nrows = 500 - self.df2 = DataFrame(randn(self.nrows, 10)) - self.df2[0] = period_range('2000', '2010', self.nrows) - self.df2[1] = range(self.nrows) + def time_to_string_floats(self): + self.df.to_string() - self.nrows = 10000 - self.data = randn(self.nrows, 10) - self.idx = MultiIndex.from_arrays(np.tile(randn(3, int(self.nrows / 100)), 100)) - self.df3 = DataFrame(self.data, index=self.idx) - self.idx = randn(self.nrows) - self.df4 = DataFrame(self.data, index=self.idx) - self.df_tall = pandas.DataFrame(np.random.randn(10000, 10)) +class ToHTML(object): - self.df_wide = pandas.DataFrame(np.random.randn(10, 10000)) + goal_time = 0.2 - def time_to_string_floats(self): - self.df.to_string() + def setup(self): + nrows = 500 + self.df2 = DataFrame(np.random.randn(nrows, 10)) + self.df2[0] = period_range('2000', periods=nrows) + self.df2[1] = range(nrows) def time_to_html_mixed(self): self.df2.to_html() + +class Repr(object): + + goal_time = 0.2 + + def setup(self): + nrows = 10000 + data = np.random.randn(nrows, 10) + arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100) + idx = MultiIndex.from_arrays(arrays) + self.df3 = DataFrame(data, index=idx) + self.df4 = DataFrame(data, index=np.random.randn(nrows)) + self.df_tall = DataFrame(np.random.randn(nrows, 10)) + self.df_wide = DataFrame(np.random.randn(10, nrows)) + def time_html_repr_trunc_mi(self): self.df3._repr_html_() @@ -141,21 +164,16 @@ def time_frame_repr_wide(self): repr(self.df_wide) -#---------------------------------------------------------------------- -# nulls/masking - +class MaskBool(object): -## masking - -class frame_mask_bools(object): goal_time = 0.2 def setup(self): - self.data = np.random.randn(1000, 500) - self.df = DataFrame(self.data) - self.df = self.df.where((self.df > 0)) - self.bools = (self.df > 0) - self.mask = isnull(self.df) + data = np.random.randn(1000, 500) + df = DataFrame(data) + df = df.where(df > 0) + self.bools = df > 0 + self.mask = isnull(df) def time_frame_mask_bools(self): self.bools.mask(self.mask) @@ -164,31 +182,26 @@ def time_frame_mask_floats(self): self.bools.astype(float).mask(self.mask) -## isnull +class Isnull(object): -class FrameIsnull(object): goal_time = 0.2 def setup(self): - self.df_no_null = DataFrame(np.random.randn(1000, 1000)) - - np.random.seed(1234) - self.sample = np.array([np.nan, 1.0]) - self.data = np.random.choice(self.sample, (1000, 1000)) - self.df = DataFrame(self.data) - - np.random.seed(1234) - self.sample = np.array(list(string.ascii_lowercase) + - list(string.ascii_uppercase) + - list(string.whitespace)) - self.data = np.random.choice(self.sample, (1000, 1000)) - self.df_strings= DataFrame(self.data) - - np.random.seed(1234) - self.sample = np.array([NaT, np.nan, None, np.datetime64('NaT'), - np.timedelta64('NaT'), 0, 1, 2.0, '', 'abcd']) - self.data = np.random.choice(self.sample, (1000, 1000)) - self.df_obj = DataFrame(self.data) + N = 10**3 + self.df_no_null = DataFrame(np.random.randn(N, N)) + + sample = np.array([np.nan, 1.0]) + data = np.random.choice(sample, (N, N)) + self.df = DataFrame(data) + + sample = np.array(list(string.ascii_letters + string.whitespace)) + data = np.random.choice(sample, (N, N)) + self.df_strings = DataFrame(data) + + sample = np.array([NaT, np.nan, None, np.datetime64('NaT'), + np.timedelta64('NaT'), 0, 1, 2.0, '', 'abcd']) + data = np.random.choice(sample, (N, N)) + self.df_obj = DataFrame(data) def time_isnull_floats_no_null(self): isnull(self.df_no_null) @@ -203,92 +216,74 @@ def time_isnull_obj(self): isnull(self.df_obj) -# ---------------------------------------------------------------------- -# fillna in place +class Fillna(object): -class frame_fillna_inplace(object): goal_time = 0.2 + params = ([True, False], ['pad', 'bfill']) + param_names = ['inplace', 'method'] - def setup(self): - self.df = DataFrame(randn(10000, 100)) - self.df.values[::2] = np.nan - - def time_frame_fillna_inplace(self): - self.df.fillna(0, inplace=True) - - - -class frame_fillna_many_columns_pad(object): - goal_time = 0.2 - - def setup(self): - self.values = np.random.randn(1000, 1000) - self.values[::2] = np.nan - self.df = DataFrame(self.values) - - def time_frame_fillna_many_columns_pad(self): - self.df.fillna(method='pad') + def setup(self, inplace, method): + values = np.random.randn(10000, 100) + values[::2] = np.nan + self.df = DataFrame(values) + def time_frame_fillna(self, inplace, method): + self.df.fillna(inplace=inplace, method=method) class Dropna(object): + goal_time = 0.2 + params = (['all', 'any'], [0, 1]) + param_names = ['how', 'axis'] - def setup(self): - self.data = np.random.randn(10000, 1000) - self.df = DataFrame(self.data) + def setup(self, how, axis): + self.df = DataFrame(np.random.randn(10000, 1000)) self.df.ix[50:1000, 20:50] = np.nan self.df.ix[2000:3000] = np.nan self.df.ix[:, 60:70] = np.nan self.df_mixed = self.df.copy() self.df_mixed['foo'] = 'bar' - self.df_mi = self.df.copy() - self.df_mi.index = MultiIndex.from_tuples(self.df_mi.index.map((lambda x: (x, x)))) - self.df_mi.columns = MultiIndex.from_tuples(self.df_mi.columns.map((lambda x: (x, x)))) - - self.df_mixed_mi = self.df_mixed.copy() - self.df_mixed_mi.index = MultiIndex.from_tuples(self.df_mixed_mi.index.map((lambda x: (x, x)))) - self.df_mixed_mi.columns = MultiIndex.from_tuples(self.df_mixed_mi.columns.map((lambda x: (x, x)))) + def time_dropna(self, how, axis): + self.df.dropna(how=how, axis=axis) - def time_dropna_axis0_all(self): - self.df.dropna(how='all', axis=0) + def time_dropna_axis_mixed_dtypes(self, how, axis): + self.df_mixed.dropna(how=how, axis=axis) - def time_dropna_axis0_any(self): - self.df.dropna(how='any', axis=0) - def time_dropna_axis1_all(self): - self.df.dropna(how='all', axis=1) +class Count(object): - def time_dropna_axis1_any(self): - self.df.dropna(how='any', axis=1) - - def time_dropna_axis0_all_mixed_dtypes(self): - self.df_mixed.dropna(how='all', axis=0) - - def time_dropna_axis0_any_mixed_dtypes(self): - self.df_mixed.dropna(how='any', axis=0) - - def time_dropna_axis1_all_mixed_dtypes(self): - self.df_mixed.dropna(how='all', axis=1) + goal_time = 0.2 - def time_dropna_axis1_any_mixed_dtypes(self): - self.df_mixed.dropna(how='any', axis=1) + params = [0, 1] + param_names = ['axis'] - def time_count_level_axis0_multi(self): - self.df_mi.count(axis=0, level=1) + def setup(self, axis): + self.df = DataFrame(np.random.randn(10000, 1000)) + self.df.ix[50:1000, 20:50] = np.nan + self.df.ix[2000:3000] = np.nan + self.df.ix[:, 60:70] = np.nan + self.df_mixed = self.df.copy() + self.df_mixed['foo'] = 'bar' - def time_count_level_axis1_multi(self): - self.df_mi.count(axis=1, level=1) + self.df.index = MultiIndex.from_arrays([self.df.index, self.df.index]) + self.df.columns = MultiIndex.from_arrays([self.df.columns, + self.df.columns]) + self.df_mixed.index = MultiIndex.from_arrays([self.df_mixed.index, + self.df_mixed.index]) + self.df_mixed.columns = MultiIndex.from_arrays([self.df_mixed.columns, + self.df_mixed.columns]) - def time_count_level_axis0_mixed_dtypes_multi(self): - self.df_mixed_mi.count(axis=0, level=1) + def time_count_level_multi(self, axis): + self.df.count(axis=axis, level=1) - def time_count_level_axis1_mixed_dtypes_multi(self): - self.df_mixed_mi.count(axis=1, level=1) + def time_count_level_mixed_dtypes_multi(self, axis): + self.df_mixed.count(axis=axis, level=1) class Apply(object): + goal_time = 0.2 def setup(self): @@ -296,32 +291,29 @@ def setup(self): self.s = Series(np.arange(1028.0)) self.df2 = DataFrame({i: self.s for i in range(1028)}) - self.df3 = DataFrame(np.random.randn(1000, 3), columns=list('ABC')) def time_apply_user_func(self): - self.df2.apply((lambda x: np.corrcoef(x, self.s)[(0, 1)])) + self.df2.apply(lambda x: np.corrcoef(x, self.s)[(0, 1)]) def time_apply_axis_1(self): - self.df.apply((lambda x: (x + 1)), axis=1) + self.df.apply(lambda x: x + 1, axis=1) def time_apply_lambda_mean(self): - self.df.apply((lambda x: x.mean())) + self.df.apply(lambda x: x.mean()) def time_apply_np_mean(self): self.df.apply(np.mean) def time_apply_pass_thru(self): - self.df.apply((lambda x: x)) + self.df.apply(lambda x: x) def time_apply_ref_by_name(self): - self.df3.apply((lambda x: (x['A'] + x['B'])), axis=1) + self.df3.apply(lambda x: x['A'] + x['B'], axis=1) -#---------------------------------------------------------------------- -# dtypes +class Dtypes(object): -class frame_dtypes(object): goal_time = 0.2 def setup(self): @@ -330,316 +322,170 @@ def setup(self): def time_frame_dtypes(self): self.df.dtypes -#---------------------------------------------------------------------- -# equals class Equals(object): + goal_time = 0.2 def setup(self): - self.float_df = DataFrame(np.random.randn(1000, 1000)) - self.object_df = DataFrame(([(['foo'] * 1000)] * 1000)) - self.nonunique_cols = self.object_df.copy() - self.nonunique_cols.columns = (['A'] * len(self.nonunique_cols.columns)) - self.pairs = dict([(name, self.make_pair(frame)) for (name, frame) in ( - ('float_df', self.float_df), ('object_df', self.object_df), - ('nonunique_cols', self.nonunique_cols))]) - - def make_pair(self, frame): - self.df = frame - self.df2 = self.df.copy() - self.df2.ix[((-1), (-1))] = np.nan - return (self.df, self.df2) + N = 10**3 + self.float_df = DataFrame(np.random.randn(N, N)) + self.float_df_nan = self.float_df.copy() + self.float_df_nan.iloc[-1, -1] = np.nan - def test_equal(self, name): - (self.df, self.df2) = self.pairs[name] - return self.df.equals(self.df) + self.object_df = DataFrame('foo', index=range(N), columns=range(N)) + self.object_df_nan = self.object_df.copy() + self.object_df_nan.iloc[-1, -1] = np.nan - def test_unequal(self, name): - (self.df, self.df2) = self.pairs[name] - return self.df.equals(self.df2) + self.nonunique_cols = self.object_df.copy() + self.nonunique_cols.columns = ['A'] * len(self.nonunique_cols.columns) + self.nonunique_cols_nan = self.nonunique_cols.copy() + self.nonunique_cols_nan.iloc[-1, -1] = np.nan def time_frame_float_equal(self): - self.test_equal('float_df') + self.float_df.equals(self.float_df) def time_frame_float_unequal(self): - self.test_unequal('float_df') + self.float_df.equals(self.float_df_nan) def time_frame_nonunique_equal(self): - self.test_equal('nonunique_cols') + self.nonunique_cols.equals(self.nonunique_cols) def time_frame_nonunique_unequal(self): - self.test_unequal('nonunique_cols') + self.nonunique_cols.equals(self.nonunique_cols_nan) def time_frame_object_equal(self): - self.test_equal('object_df') + self.object_df.equals(self.object_df) def time_frame_object_unequal(self): - self.test_unequal('object_df') + self.object_df.equals(self.object_df_nan) class Interpolate(object): + goal_time = 0.2 + params = [None, 'infer'] + param_names = ['downcast'] - def setup(self): + def setup(self, downcast): + N = 10000 # this is the worst case, where every column has NaNs. - self.df = DataFrame(randn(10000, 100)) + self.df = DataFrame(np.random.randn(N, 100)) self.df.values[::2] = np.nan - self.df2 = DataFrame( - {'A': np.arange(0, 10000), 'B': np.random.randint(0, 100, 10000), - 'C': randn(10000), 'D': randn(10000),}) + self.df2 = DataFrame({'A': np.arange(0, N), + 'B': np.random.randint(0, 100, N), + 'C': np.random.randn(N), + 'D': np.random.randn(N)}) self.df2.loc[1::5, 'A'] = np.nan self.df2.loc[1::5, 'C'] = np.nan - def time_interpolate(self): - self.df.interpolate() - - def time_interpolate_some_good(self): - self.df2.interpolate() + def time_interpolate(self, downcast): + self.df.interpolate(downcast=downcast) - def time_interpolate_some_good_infer(self): - self.df2.interpolate(downcast='infer') + def time_interpolate_some_good(self, downcast): + self.df2.interpolate(downcast=downcast) class Shift(object): # frame shift speedup issue-5609 goal_time = 0.2 + params = [0, 1] + param_names = ['axis'] - def setup(self): + def setup(self, axis): self.df = DataFrame(np.random.rand(10000, 500)) - def time_shift_axis0(self): - self.df.shift(1, axis=0) - - def time_shift_axis_1(self): - self.df.shift(1, axis=1) - - -#----------------------------------------------------------------------------- -# from_records issue-6700 - -class frame_from_records_generator(object): - goal_time = 0.2 - - def get_data(self, n=100000): - return ((x, (x * 20), (x * 100)) for x in range(n)) - - def time_frame_from_records_generator(self): - self.df = DataFrame.from_records(self.get_data()) - - def time_frame_from_records_generator_nrows(self): - self.df = DataFrame.from_records(self.get_data(), nrows=1000) + def time_shift(self, axis): + self.df.shift(1, axis=axis) - -#----------------------------------------------------------------------------- -# nunique - -class frame_nunique(object): +class Nunique(object): def setup(self): - self.data = np.random.randn(10000, 1000) - self.df = DataFrame(self.data) + self.df = DataFrame(np.random.randn(10000, 1000)) def time_frame_nunique(self): self.df.nunique() +class Duplicated(object): -#----------------------------------------------------------------------------- -# duplicated - -class frame_duplicated(object): goal_time = 0.2 def setup(self): - self.n = (1 << 20) - self.t = date_range('2015-01-01', freq='S', periods=(self.n // 64)) - self.xs = np.random.randn((self.n // 64)).round(2) - self.df = DataFrame({'a': np.random.randint(((-1) << 8), (1 << 8), self.n), 'b': np.random.choice(self.t, self.n), 'c': np.random.choice(self.xs, self.n), }) - - self.df2 = DataFrame(np.random.randn(1000, 100).astype(str)) + n = (1 << 20) + t = date_range('2015-01-01', freq='S', periods=(n // 64)) + xs = np.random.randn(n // 64).round(2) + self.df = DataFrame({'a': np.random.randint(-1 << 8, 1 << 8, n), + 'b': np.random.choice(t, n), + 'c': np.random.choice(xs, n)}) + self.df2 = DataFrame(np.random.randn(1000, 100).astype(str)).T def time_frame_duplicated(self): self.df.duplicated() def time_frame_duplicated_wide(self): - self.df2.T.duplicated() - - - - - - - - - - - - - - - - - -class frame_xs_col(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(randn(1, 100000)) - - def time_frame_xs_col(self): - self.df.xs(50000, axis=1) - - -class frame_xs_row(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(randn(100000, 1)) - - def time_frame_xs_row(self): - self.df.xs(50000) - - -class frame_sort_index(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(randn(1000000, 2), columns=list('AB')) - - def time_frame_sort_index(self): - self.df.sort_index() - - -class frame_sort_index_by_columns(object): - goal_time = 0.2 - - def setup(self): - self.N = 10000 - self.K = 10 - self.key1 = tm.makeStringIndex(self.N).values.repeat(self.K) - self.key2 = tm.makeStringIndex(self.N).values.repeat(self.K) - self.df = DataFrame({'key1': self.key1, 'key2': self.key2, 'value': np.random.randn((self.N * self.K)), }) - self.col_array_list = list(self.df.values.T) - - def time_frame_sort_index_by_columns(self): - self.df.sort_index(by=['key1', 'key2']) - - -class frame_quantile_axis1(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(1000, 3), - columns=list('ABC')) - - def time_frame_quantile_axis1(self): - self.df.quantile([0.1, 0.5], axis=1) - - -#---------------------------------------------------------------------- -# boolean indexing - -class frame_boolean_row_select(object): - goal_time = 0.2 + self.df2.duplicated() - def setup(self): - self.df = DataFrame(randn(10000, 100)) - self.bool_arr = np.zeros(10000, dtype=bool) - self.bool_arr[:1000] = True - def time_frame_boolean_row_select(self): - self.df[self.bool_arr] +class XS(object): -class frame_getitem_single_column(object): goal_time = 0.2 + params = [0, 1] + param_names = ['axis'] - def setup(self): - self.df = DataFrame(randn(10000, 1000)) - self.df2 = DataFrame(randn(3000, 1), columns=['A']) - self.df3 = DataFrame(randn(3000, 1)) - - def h(self): - for i in range(10000): - self.df2['A'] - - def j(self): - for i in range(10000): - self.df3[0] + def setup(self, axis): + self.N = 10**4 + self.df = DataFrame(np.random.randn(self.N, self.N)) - def time_frame_getitem_single_column(self): - self.h() + def time_frame_xs(self, axis): + self.df.xs(self.N / 2, axis=axis) - def time_frame_getitem_single_column2(self): - self.j() +class SortValues(object): -#---------------------------------------------------------------------- -# assignment - -class frame_assign_timeseries_index(object): goal_time = 0.2 + params = [True, False] + param_names = ['ascending'] - def setup(self): - self.idx = date_range('1/1/2000', periods=100000, freq='D') - self.df = DataFrame(randn(100000, 1), columns=['A'], index=self.idx) - - def time_frame_assign_timeseries_index(self): - self.f(self.df) + def setup(self, ascending): + self.df = DataFrame(np.random.randn(1000000, 2), columns=list('AB')) - def f(self, df): - self.x = self.df.copy() - self.x['date'] = self.x.index + def time_frame_sort_values(self, ascending): + self.df.sort_values(by='A', ascending=ascending) +class SortIndexByColumns(object): -# insert many columns - -class frame_insert_100_columns_begin(object): goal_time = 0.2 def setup(self): - self.N = 1000 - - def f(self, K=100): - self.df = DataFrame(index=range(self.N)) - self.new_col = np.random.randn(self.N) - for i in range(K): - self.df.insert(0, i, self.new_col) + N = 10000 + K = 10 + self.df = DataFrame({'key1': tm.makeStringIndex(N).values.repeat(K), + 'key2': tm.makeStringIndex(N).values.repeat(K), + 'value': np.random.randn(N * K)}) - def g(self, K=500): - self.df = DataFrame(index=range(self.N)) - self.new_col = np.random.randn(self.N) - for i in range(K): - self.df[i] = self.new_col + def time_frame_sort_values_by_columns(self): + self.df.sort_values(by=['key1', 'key2']) - def time_frame_insert_100_columns_begin(self): - self.f() - def time_frame_insert_500_columns_end(self): - self.g() +class Quantile(object): - - -#---------------------------------------------------------------------- -# strings methods, #2602 - -class series_string_vector_slice(object): goal_time = 0.2 + params = [0, 1] + param_names = ['axis'] - def setup(self): - self.s = Series((['abcdefg', np.nan] * 500000)) - - def time_series_string_vector_slice(self): - self.s.str[:5] + def setup(self, axis): + self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC')) + def time_frame_quantile(self, axis): + self.df.quantile([0.1, 0.5], axis=axis) -#---------------------------------------------------------------------- -# df.info() and get_dtype_counts() # 2807 -class frame_get_dtype_counts(object): +class GetDtypeCounts(object): + # 2807 goal_time = 0.2 def setup(self): @@ -648,13 +494,21 @@ def setup(self): def time_frame_get_dtype_counts(self): self.df.get_dtype_counts() + def time_info(self): + self.df.info() + + +class NSort(object): -class frame_nlargest(object): goal_time = 0.2 + params = ['first', 'last'] + param_names = ['keep'] - def setup(self): - self.df = DataFrame(np.random.randn(1000, 3), - columns=list('ABC')) + def setup(self, keep): + self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC')) + + def time_nlargest(self, keep): + self.df.nlargest(100, 'A', keep=keep) - def time_frame_nlargest(self): - self.df.nlargest(100, 'A') + def time_nsmallest(self, keep): + self.df.nsmallest(100, 'A', keep=keep) diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 1c5e59672cb57..21c1ccf46e1c4 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -1,235 +1,139 @@ -from .pandas_vb_common import * -from pandas.core import common as com - +import numpy as np +import pandas.util.testing as tm +from pandas import DataFrame, Series, read_csv, factorize, date_range +from pandas.core.algorithms import take_1d try: - from cStringIO import StringIO + from pandas import (rolling_median, rolling_mean, rolling_min, rolling_max, + rolling_var, rolling_skew, rolling_kurt, rolling_std) + have_rolling_methods = True except ImportError: - from io import StringIO - + have_rolling_methods = False +try: + from pandas._libs import algos +except ImportError: + from pandas import algos try: from pandas.util.testing import test_parallel - have_real_test_parallel = True except ImportError: have_real_test_parallel = False - def test_parallel(num_threads=1): - def wrapper(fname): return fname - return wrapper +from .pandas_vb_common import BaseIO, setup # noqa -class NoGilGroupby(object): - goal_time = 0.2 - def setup(self): - self.N = 1000000 - self.ngroups = 1000 - np.random.seed(1234) - self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) +class ParallelGroupbyMethods(object): - np.random.seed(1234) - self.size = 2 ** 22 - self.ngroups = 100 - self.data = Series(np.random.randint(0, self.ngroups, size=self.size)) + goal_time = 0.2 + params = ([2, 4, 8], ['count', 'last', 'max', 'mean', 'min', 'prod', + 'sum', 'var']) + param_names = ['threads', 'method'] - if (not have_real_test_parallel): + def setup(self, threads, method): + if not have_real_test_parallel: raise NotImplementedError + N = 10**6 + ngroups = 10**3 + df = DataFrame({'key': np.random.randint(0, ngroups, size=N), + 'data': np.random.randn(N)}) - @test_parallel(num_threads=2) - def _pg2_count(self): - self.df.groupby('key')['data'].count() - - def time_count_2(self): - self._pg2_count() - - @test_parallel(num_threads=2) - def _pg2_last(self): - self.df.groupby('key')['data'].last() - - def time_last_2(self): - self._pg2_last() - - @test_parallel(num_threads=2) - def _pg2_max(self): - self.df.groupby('key')['data'].max() - - def time_max_2(self): - self._pg2_max() - - @test_parallel(num_threads=2) - def _pg2_mean(self): - self.df.groupby('key')['data'].mean() - - def time_mean_2(self): - self._pg2_mean() - - @test_parallel(num_threads=2) - def _pg2_min(self): - self.df.groupby('key')['data'].min() - - def time_min_2(self): - self._pg2_min() + @test_parallel(num_threads=threads) + def parallel(): + getattr(df.groupby('key')['data'], method)() + self.parallel = parallel - @test_parallel(num_threads=2) - def _pg2_prod(self): - self.df.groupby('key')['data'].prod() + def loop(): + getattr(df.groupby('key')['data'], method)() + self.loop = loop - def time_prod_2(self): - self._pg2_prod() + def time_parallel(self, threads, method): + self.parallel() - @test_parallel(num_threads=2) - def _pg2_sum(self): - self.df.groupby('key')['data'].sum() + def time_loop(self, threads, method): + for i in range(threads): + self.loop() - def time_sum_2(self): - self._pg2_sum() - @test_parallel(num_threads=4) - def _pg4_sum(self): - self.df.groupby('key')['data'].sum() +class ParallelGroups(object): - def time_sum_4(self): - self._pg4_sum() - - def time_sum_4_notp(self): - for i in range(4): - self.df.groupby('key')['data'].sum() - - def _f_sum(self): - self.df.groupby('key')['data'].sum() - - @test_parallel(num_threads=8) - def _pg8_sum(self): - self._f_sum() - - def time_sum_8(self): - self._pg8_sum() - - def time_sum_8_notp(self): - for i in range(8): - self._f_sum() - - @test_parallel(num_threads=2) - def _pg2_var(self): - self.df.groupby('key')['data'].var() - - def time_var_2(self): - self._pg2_var() - - # get groups - - def _groups(self): - self.data.groupby(self.data).groups - - @test_parallel(num_threads=2) - def _pg2_groups(self): - self._groups() - - def time_groups_2(self): - self._pg2_groups() - - @test_parallel(num_threads=4) - def _pg4_groups(self): - self._groups() - - def time_groups_4(self): - self._pg4_groups() - - @test_parallel(num_threads=8) - def _pg8_groups(self): - self._groups() - - def time_groups_8(self): - self._pg8_groups() - - - -class nogil_take1d_float64(object): goal_time = 0.2 + params = [2, 4, 8] + param_names = ['threads'] - def setup(self): - self.N = 1000000 - self.ngroups = 1000 - np.random.seed(1234) - self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - if (not have_real_test_parallel): + def setup(self, threads): + if not have_real_test_parallel: raise NotImplementedError - self.N = 10000000.0 - self.df = DataFrame({'int64': np.arange(self.N, dtype='int64'), 'float64': np.arange(self.N, dtype='float64'), }) - self.indexer = np.arange(100, (len(self.df) - 100)) + size = 2**22 + ngroups = 10**3 + data = Series(np.random.randint(0, ngroups, size=size)) - def time_nogil_take1d_float64(self): - self.take_1d_pg2_int64() + @test_parallel(num_threads=threads) + def get_groups(): + data.groupby(data).groups + self.get_groups = get_groups - @test_parallel(num_threads=2) - def take_1d_pg2_int64(self): - com.take_1d(self.df.int64.values, self.indexer) + def time_get_groups(self, threads): + self.get_groups() - @test_parallel(num_threads=2) - def take_1d_pg2_float64(self): - com.take_1d(self.df.float64.values, self.indexer) +class ParallelTake1D(object): -class nogil_take1d_int64(object): goal_time = 0.2 + params = ['int64', 'float64'] + param_names = ['dtype'] - def setup(self): - self.N = 1000000 - self.ngroups = 1000 - np.random.seed(1234) - self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), }) - if (not have_real_test_parallel): + def setup(self, dtype): + if not have_real_test_parallel: raise NotImplementedError - self.N = 10000000.0 - self.df = DataFrame({'int64': np.arange(self.N, dtype='int64'), 'float64': np.arange(self.N, dtype='float64'), }) - self.indexer = np.arange(100, (len(self.df) - 100)) + N = 10**6 + df = DataFrame({'col': np.arange(N, dtype=dtype)}) + indexer = np.arange(100, len(df) - 100) - def time_nogil_take1d_int64(self): - self.take_1d_pg2_float64() + @test_parallel(num_threads=2) + def parallel_take1d(): + take_1d(df['col'].values, indexer) + self.parallel_take1d = parallel_take1d - @test_parallel(num_threads=2) - def take_1d_pg2_int64(self): - com.take_1d(self.df.int64.values, self.indexer) + def time_take1d(self, dtype): + self.parallel_take1d() - @test_parallel(num_threads=2) - def take_1d_pg2_float64(self): - com.take_1d(self.df.float64.values, self.indexer) +class ParallelKth(object): -class nogil_kth_smallest(object): number = 1 repeat = 5 def setup(self): - if (not have_real_test_parallel): + if not have_real_test_parallel: raise NotImplementedError - np.random.seed(1234) - self.N = 10000000 - self.k = 500000 - self.a = np.random.randn(self.N) - self.b = self.a.copy() - self.kwargs_list = [{'arr': self.a}, {'arr': self.b}] + N = 10**7 + k = 5 * 10**5 + kwargs_list = [{'arr': np.random.randn(N)}, + {'arr': np.random.randn(N)}] + + @test_parallel(num_threads=2, kwargs_list=kwargs_list) + def parallel_kth_smallest(arr): + algos.kth_smallest(arr, k) + self.parallel_kth_smallest = parallel_kth_smallest - def time_nogil_kth_smallest(self): - @test_parallel(num_threads=2, kwargs_list=self.kwargs_list) - def run(arr): - algos.kth_smallest(arr, self.k) - run() + def time_kth_smallest(self): + self.parallel_kth_smallest() -class nogil_datetime_fields(object): +class ParallelDatetimeFields(object): + goal_time = 0.2 def setup(self): - self.N = 100000000 - self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D') - self.period = self.dti.to_period('D') - if (not have_real_test_parallel): + if not have_real_test_parallel: raise NotImplementedError + N = 10**6 + self.dti = date_range('1900-01-01', periods=N, freq='T') + self.period = self.dti.to_period('D') def time_datetime_field_year(self): @test_parallel(num_threads=2) @@ -268,149 +172,104 @@ def run(period): run(self.period) -class nogil_rolling_algos_slow(object): - goal_time = 0.2 - - def setup(self): - self.win = 100 - np.random.seed(1234) - self.arr = np.random.rand(100000) - if (not have_real_test_parallel): - raise NotImplementedError +class ParallelRolling(object): - def time_nogil_rolling_median(self): - @test_parallel(num_threads=2) - def run(arr, win): - rolling_median(arr, win) - run(self.arr, self.win) - - -class nogil_rolling_algos_fast(object): goal_time = 0.2 + params = ['median', 'mean', 'min', 'max', 'var', 'skew', 'kurt', 'std'] + param_names = ['method'] - def setup(self): - self.win = 100 - np.random.seed(1234) - self.arr = np.random.rand(1000000) - if (not have_real_test_parallel): + def setup(self, method): + if not have_real_test_parallel: + raise NotImplementedError + win = 100 + arr = np.random.rand(100000) + if hasattr(DataFrame, 'rolling'): + df = DataFrame(arr).rolling(win) + + @test_parallel(num_threads=2) + def parallel_rolling(): + getattr(df, method)() + self.parallel_rolling = parallel_rolling + elif have_rolling_methods: + rolling = {'median': rolling_median, + 'mean': rolling_mean, + 'min': rolling_min, + 'max': rolling_max, + 'var': rolling_var, + 'skew': rolling_skew, + 'kurt': rolling_kurt, + 'std': rolling_std} + + @test_parallel(num_threads=2) + def parallel_rolling(): + rolling[method](arr, win) + self.parallel_rolling = parallel_rolling + else: raise NotImplementedError - def time_nogil_rolling_mean(self): - @test_parallel(num_threads=2) - def run(arr, win): - rolling_mean(arr, win) - run(self.arr, self.win) - - def time_nogil_rolling_min(self): - @test_parallel(num_threads=2) - def run(arr, win): - rolling_min(arr, win) - run(self.arr, self.win) - - def time_nogil_rolling_max(self): - @test_parallel(num_threads=2) - def run(arr, win): - rolling_max(arr, win) - run(self.arr, self.win) - - def time_nogil_rolling_var(self): - @test_parallel(num_threads=2) - def run(arr, win): - rolling_var(arr, win) - run(self.arr, self.win) - - def time_nogil_rolling_skew(self): - @test_parallel(num_threads=2) - def run(arr, win): - rolling_skew(arr, win) - run(self.arr, self.win) - - def time_nogil_rolling_kurt(self): - @test_parallel(num_threads=2) - def run(arr, win): - rolling_kurt(arr, win) - run(self.arr, self.win) + def time_rolling(self, method): + self.parallel_rolling() - def time_nogil_rolling_std(self): - @test_parallel(num_threads=2) - def run(arr, win): - rolling_std(arr, win) - run(self.arr, self.win) +class ParallelReadCSV(BaseIO): -class nogil_read_csv(object): number = 1 repeat = 5 + params = ['float', 'object', 'datetime'] + param_names = ['dtype'] - def setup(self): - if (not have_real_test_parallel): + def setup(self, dtype): + if not have_real_test_parallel: raise NotImplementedError - # Using the values - self.df = DataFrame(np.random.randn(10000, 50)) - self.df.to_csv('__test__.csv') - - self.rng = date_range('1/1/2000', periods=10000) - self.df_date_time = DataFrame(np.random.randn(10000, 50), index=self.rng) - self.df_date_time.to_csv('__test_datetime__.csv') + rows = 10000 + cols = 50 + data = {'float': DataFrame(np.random.randn(rows, cols)), + 'datetime': DataFrame(np.random.randn(rows, cols), + index=date_range('1/1/2000', + periods=rows)), + 'object': DataFrame('foo', + index=range(rows), + columns=['object%03d'.format(i) + for i in range(5)])} + + self.fname = '__test_{}__.csv'.format(dtype) + df = data[dtype] + df.to_csv(self.fname) - self.df_object = DataFrame('foo', index=self.df.index, columns=self.create_cols('object')) - self.df_object.to_csv('__test_object__.csv') - - def create_cols(self, name): - return [('%s%03d' % (name, i)) for i in range(5)] - - @test_parallel(num_threads=2) - def pg_read_csv(self): - read_csv('__test__.csv', sep=',', header=None, float_precision=None) - - def time_read_csv(self): - self.pg_read_csv() - - @test_parallel(num_threads=2) - def pg_read_csv_object(self): - read_csv('__test_object__.csv', sep=',') - - def time_read_csv_object(self): - self.pg_read_csv_object() + @test_parallel(num_threads=2) + def parallel_read_csv(): + read_csv(self.fname) + self.parallel_read_csv = parallel_read_csv - @test_parallel(num_threads=2) - def pg_read_csv_datetime(self): - read_csv('__test_datetime__.csv', sep=',', header=None) + def time_read_csv(self, dtype): + self.parallel_read_csv() - def time_read_csv_datetime(self): - self.pg_read_csv_datetime() +class ParallelFactorize(object): -class nogil_factorize(object): number = 1 repeat = 5 + params = [2, 4, 8] + param_names = ['threads'] - def setup(self): - if (not have_real_test_parallel): + def setup(self, threads): + if not have_real_test_parallel: raise NotImplementedError - np.random.seed(1234) - self.strings = tm.makeStringIndex(100000) - - def factorize_strings(self): - pd.factorize(self.strings) - - @test_parallel(num_threads=4) - def _pg_factorize_strings_4(self): - self.factorize_strings() + strings = tm.makeStringIndex(100000) - def time_factorize_strings_4(self): - for i in range(2): - self._pg_factorize_strings_4() + @test_parallel(num_threads=threads) + def parallel(): + factorize(strings) + self.parallel = parallel - @test_parallel(num_threads=2) - def _pg_factorize_strings_2(self): - self.factorize_strings() + def loop(): + factorize(strings) + self.loop = loop - def time_factorize_strings_2(self): - for i in range(4): - self._pg_factorize_strings_2() + def time_parallel(self, threads): + self.parallel() - def time_factorize_strings(self): - for i in range(8): - self.factorize_strings() + def time_loop(self, threads): + for i in range(threads): + self.loop() diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 03ff62568b405..7777322071957 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1,483 +1,391 @@ -from .pandas_vb_common import * -from string import ascii_letters, digits +import warnings +from string import ascii_letters from itertools import product +from functools import partial +import numpy as np +from pandas import (DataFrame, Series, MultiIndex, date_range, period_range, + TimeGrouper, Categorical) +import pandas.util.testing as tm -class groupby_agg_builtins(object): - goal_time = 0.2 - - def setup(self): - np.random.seed(27182) - self.n = 100000 - self.df = DataFrame(np.random.randint(1, (self.n / 100), (self.n, 3)), columns=['jim', 'joe', 'jolie']) +from .pandas_vb_common import setup # noqa - def time_groupby_agg_builtins1(self): - self.df.groupby('jim').agg([sum, min, max]) - def time_groupby_agg_builtins2(self): - self.df.groupby(['jim', 'joe']).agg([sum, min, max]) +method_blacklist = { + 'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean', + 'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min', + 'var', 'mad', 'describe', 'std'}, + 'datetime': {'median', 'prod', 'sem', 'cumsum', 'sum', 'mean', 'skew', + 'cumprod', 'cummax', 'pct_change', 'var', 'mad', 'describe', + 'std'} +} -#---------------------------------------------------------------------- -# dict return values -class groupby_apply_dict_return(object): +class ApplyDictReturn(object): goal_time = 0.2 def setup(self): self.labels = np.arange(1000).repeat(10) - self.data = Series(randn(len(self.labels))) - self.f = (lambda x: {'first': x.values[0], 'last': x.values[(-1)], }) + self.data = Series(np.random.randn(len(self.labels))) def time_groupby_apply_dict_return(self): - self.data.groupby(self.labels).apply(self.f) - - -#---------------------------------------------------------------------- -# groups - -class Groups(object): - goal_time = 0.1 - - size = 2 ** 22 - data = { - 'int64_small': Series(np.random.randint(0, 100, size=size)), - 'int64_large' : Series(np.random.randint(0, 10000, size=size)), - 'object_small': Series(tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size))), - 'object_large': Series(tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size))) - } - - param_names = ['df'] - params = ['int64_small', 'int64_large', 'object_small', 'object_large'] - - def setup(self, df): - self.df = self.data[df] - - def time_groupby_groups(self, df): - self.df.groupby(self.df).groups + self.data.groupby(self.labels).apply(lambda x: {'first': x.values[0], + 'last': x.values[-1]}) -#---------------------------------------------------------------------- -# First / last functions +class Apply(object): -class FirstLast(object): goal_time = 0.2 - param_names = ['dtype'] - params = ['float32', 'float64', 'datetime', 'object'] + def setup_cache(self): + N = 10**4 + labels = np.random.randint(0, 2000, size=N) + labels2 = np.random.randint(0, 3, size=N) + df = DataFrame({'key': labels, + 'key2': labels2, + 'value1': np.random.randn(N), + 'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4) + }) + return df - # with datetimes (GH7555) + def time_scalar_function_multi_col(self, df): + df.groupby(['key', 'key2']).apply(lambda x: 1) - def setup(self, dtype): + def time_scalar_function_single_col(self, df): + df.groupby('key').apply(lambda x: 1) - if dtype == 'datetime': - self.df = DataFrame( - {'values': date_range('1/1/2011', periods=100000, freq='s'), - 'key': range(100000),}) - elif dtype == 'object': - self.df = DataFrame( - {'values': (['foo'] * 100000), - 'key': range(100000)}) - else: - labels = np.arange(10000).repeat(10) - data = Series(randn(len(labels)), dtype=dtype) - data[::3] = np.nan - data[1::3] = np.nan - labels = labels.take(np.random.permutation(len(labels))) - self.df = DataFrame({'values': data, 'key': labels}) - - def time_groupby_first(self, dtype): - self.df.groupby('key').first() - - def time_groupby_last(self, dtype): - self.df.groupby('key').last() + @staticmethod + def df_copy_function(g): + # ensure that the group name is available (see GH #15062) + g.name + return g.copy() - def time_groupby_nth_any(self, dtype): - self.df.groupby('key').nth(0, dropna='all') + def time_copy_function_multi_col(self, df): + df.groupby(['key', 'key2']).apply(self.df_copy_function) - def time_groupby_nth_none(self, dtype): - self.df.groupby('key').nth(0) + def time_copy_overhead_single_col(self, df): + df.groupby('key').apply(self.df_copy_function) -#---------------------------------------------------------------------- -# DataFrame Apply overhead +class Groups(object): -class groupby_frame_apply(object): goal_time = 0.2 - def setup(self): - self.N = 10000 - self.labels = np.random.randint(0, 2000, size=self.N) - self.labels2 = np.random.randint(0, 3, size=self.N) - self.df = DataFrame({'key': self.labels, 'key2': self.labels2, 'value1': randn(self.N), 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N / 4)), }) + param_names = ['key'] + params = ['int64_small', 'int64_large', 'object_small', 'object_large'] - def f(self, g): - return 1 + def setup_cache(self): + size = 10**6 + data = {'int64_small': Series(np.random.randint(0, 100, size=size)), + 'int64_large': Series(np.random.randint(0, 10000, size=size)), + 'object_small': Series( + tm.makeStringIndex(100).take( + np.random.randint(0, 100, size=size))), + 'object_large': Series( + tm.makeStringIndex(10000).take( + np.random.randint(0, 10000, size=size)))} + return data - def time_groupby_frame_apply(self): - self.df.groupby(['key', 'key2']).apply(self.f) + def setup(self, data, key): + self.ser = data[key] - def time_groupby_frame_apply_overhead(self): - self.df.groupby('key').apply(self.f) + def time_series_groups(self, data, key): + self.ser.groupby(self.ser).groups -#---------------------------------------------------------------------- -# 2d grouping, aggregate many columns +class GroupManyLabels(object): -class groupby_frame_cython_many_columns(object): goal_time = 0.2 + params = [1, 1000] + param_names = ['ncols'] - def setup(self): - self.labels = np.random.randint(0, 100, size=1000) - self.df = DataFrame(randn(1000, 1000)) + def setup(self, ncols): + N = 1000 + data = np.random.randn(N, ncols) + self.labels = np.random.randint(0, 100, size=N) + self.df = DataFrame(data) - def time_sum(self): + def time_sum(self, ncols): self.df.groupby(self.labels).sum() -#---------------------------------------------------------------------- -# single key, long, integer key +class Nth(object): -class groupby_frame_singlekey_integer(object): goal_time = 0.2 - def setup(self): - self.data = np.random.randn(100000, 1) - self.labels = np.random.randint(0, 1000, size=100000) - self.df = DataFrame(self.data) - - def time_sum(self): - self.df.groupby(self.labels).sum() + param_names = ['dtype'] + params = ['float32', 'float64', 'datetime', 'object'] + def setup(self, dtype): + N = 10**5 + # with datetimes (GH7555) + if dtype == 'datetime': + values = date_range('1/1/2011', periods=N, freq='s') + elif dtype == 'object': + values = ['foo'] * N + else: + values = np.arange(N).astype(dtype) -#---------------------------------------------------------------------- -# DataFrame nth + key = np.arange(N) + self.df = DataFrame({'key': key, 'values': values}) + self.df.iloc[1, 1] = np.nan # insert missing data -class groupby_nth(object): - goal_time = 0.2 + def time_frame_nth_any(self, dtype): + self.df.groupby('key').nth(0, dropna='any') - def setup(self): - self.df = DataFrame(np.random.randint(1, 100, (10000, 2))) + def time_groupby_nth_all(self, dtype): + self.df.groupby('key').nth(0, dropna='all') - def time_groupby_frame_nth_any(self): - self.df.groupby(0).nth(0, dropna='any') + def time_frame_nth(self, dtype): + self.df.groupby('key').nth(0) - def time_groupby_frame_nth_none(self): - self.df.groupby(0).nth(0) + def time_series_nth_any(self, dtype): + self.df['values'].groupby(self.df['key']).nth(0, dropna='any') - def time_groupby_series_nth_any(self): - self.df[1].groupby(self.df[0]).nth(0, dropna='any') + def time_groupby_nth_all(self, dtype): + self.df['values'].groupby(self.df['key']).nth(0, dropna='all') - def time_groupby_series_nth_none(self): - self.df[1].groupby(self.df[0]).nth(0) + def time_series_nth(self, dtype): + self.df['values'].groupby(self.df['key']).nth(0) -#---------------------------------------------------------------------- -# groupby_indices replacement, chop up Series +class DateAttributes(object): -class groupby_indices(object): goal_time = 0.2 def setup(self): - try: - self.rng = date_range('1/1/2000', '12/31/2005', freq='H') - (self.year, self.month, self.day) = (self.rng.year, self.rng.month, self.rng.day) - except: - self.rng = date_range('1/1/2000', '12/31/2000', offset=datetools.Hour()) - self.year = self.rng.map((lambda x: x.year)) - self.month = self.rng.map((lambda x: x.month)) - self.day = self.rng.map((lambda x: x.day)) - self.ts = Series(np.random.randn(len(self.rng)), index=self.rng) - - def time_groupby_indices(self): + rng = date_range('1/1/2000', '12/31/2005', freq='H') + self.year, self.month, self.day = rng.year, rng.month, rng.day + self.ts = Series(np.random.randn(len(rng)), index=rng) + + def time_len_groupby_object(self): len(self.ts.groupby([self.year, self.month, self.day])) -class groupby_int64_overflow(object): +class Int64(object): + goal_time = 0.2 def setup(self): - self.arr = np.random.randint(((-1) << 12), (1 << 12), ((1 << 17), 5)) - self.i = np.random.choice(len(self.arr), (len(self.arr) * 5)) - self.arr = np.vstack((self.arr, self.arr[self.i])) - self.i = np.random.permutation(len(self.arr)) - self.arr = self.arr[self.i] - self.df = DataFrame(self.arr, columns=list('abcde')) - (self.df['jim'], self.df['joe']) = (np.random.randn(2, len(self.df)) * 10) + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 17, 5)) + i = np.random.choice(len(arr), len(arr) * 5) + arr = np.vstack((arr, arr[i])) + i = np.random.permutation(len(arr)) + arr = arr[i] + self.cols = list('abcde') + self.df = DataFrame(arr, columns=self.cols) + self.df['jim'], self.df['joe'] = np.random.randn(2, len(self.df)) * 10 - def time_groupby_int64_overflow(self): - self.df.groupby(list('abcde')).max() + def time_overflow(self): + self.df.groupby(self.cols).max() -#---------------------------------------------------------------------- -# count() speed +class CountMultiDtype(object): -class groupby_multi_count(object): goal_time = 0.2 - def setup(self): - self.n = 10000 - self.offsets = np.random.randint(self.n, size=self.n).astype('timedelta64[ns]') - self.dates = (np.datetime64('now') + self.offsets) - self.dates[(np.random.rand(self.n) > 0.5)] = np.datetime64('nat') - self.offsets[(np.random.rand(self.n) > 0.5)] = np.timedelta64('nat') - self.value2 = np.random.randn(self.n) - self.value2[(np.random.rand(self.n) > 0.5)] = np.nan - self.obj = np.random.choice(list('ab'), size=self.n).astype(object) - self.obj[(np.random.randn(self.n) > 0.5)] = np.nan - self.df = DataFrame({'key1': np.random.randint(0, 500, size=self.n), - 'key2': np.random.randint(0, 100, size=self.n), - 'dates': self.dates, - 'value2': self.value2, - 'value3': np.random.randn(self.n), - 'ints': np.random.randint(0, 1000, size=self.n), - 'obj': self.obj, - 'offsets': self.offsets, }) - - def time_groupby_multi_count(self): - self.df.groupby(['key1', 'key2']).count() - - -class groupby_int_count(object): + def setup_cache(self): + n = 10000 + offsets = np.random.randint(n, size=n).astype('timedelta64[ns]') + dates = np.datetime64('now') + offsets + dates[np.random.rand(n) > 0.5] = np.datetime64('nat') + offsets[np.random.rand(n) > 0.5] = np.timedelta64('nat') + value2 = np.random.randn(n) + value2[np.random.rand(n) > 0.5] = np.nan + obj = np.random.choice(list('ab'), size=n).astype(object) + obj[np.random.randn(n) > 0.5] = np.nan + df = DataFrame({'key1': np.random.randint(0, 500, size=n), + 'key2': np.random.randint(0, 100, size=n), + 'dates': dates, + 'value2': value2, + 'value3': np.random.randn(n), + 'ints': np.random.randint(0, 1000, size=n), + 'obj': obj, + 'offsets': offsets}) + return df + + def time_multi_count(self, df): + df.groupby(['key1', 'key2']).count() + + +class CountMultiInt(object): + goal_time = 0.2 - def setup(self): - self.n = 10000 - self.df = DataFrame({'key1': randint(0, 500, size=self.n), - 'key2': randint(0, 100, size=self.n), - 'ints': randint(0, 1000, size=self.n), - 'ints2': randint(0, 1000, size=self.n), }) + def setup_cache(self): + n = 10000 + df = DataFrame({'key1': np.random.randint(0, 500, size=n), + 'key2': np.random.randint(0, 100, size=n), + 'ints': np.random.randint(0, 1000, size=n), + 'ints2': np.random.randint(0, 1000, size=n)}) + return df - def time_groupby_int_count(self): - self.df.groupby(['key1', 'key2']).count() + def time_multi_int_count(self, df): + df.groupby(['key1', 'key2']).count() + def time_multi_int_nunique(self, df): + df.groupby(['key1', 'key2']).nunique() -#---------------------------------------------------------------------- -# nunique() speed -class groupby_nunique(object): +class AggFunctions(object): - def setup(self): - self.n = 10000 - self.df = DataFrame({'key1': randint(0, 500, size=self.n), - 'key2': randint(0, 100, size=self.n), - 'ints': randint(0, 1000, size=self.n), - 'ints2': randint(0, 1000, size=self.n), }) - - def time_groupby_nunique(self): - self.df.groupby(['key1', 'key2']).nunique() + goal_time = 0.2 + def setup_cache(): + N = 10**5 + fac1 = np.array(['A', 'B', 'C'], dtype='O') + fac2 = np.array(['one', 'two'], dtype='O') + df = DataFrame({'key1': fac1.take(np.random.randint(0, 3, size=N)), + 'key2': fac2.take(np.random.randint(0, 2, size=N)), + 'value1': np.random.randn(N), + 'value2': np.random.randn(N), + 'value3': np.random.randn(N)}) + return df -#---------------------------------------------------------------------- -# group with different functions per column + def time_different_str_functions(self, df): + df.groupby(['key1', 'key2']).agg({'value1': 'mean', + 'value2': 'var', + 'value3': 'sum'}) -class groupby_agg_multi(object): - goal_time = 0.2 + def time_different_numpy_functions(self, df): + df.groupby(['key1', 'key2']).agg({'value1': np.mean, + 'value2': np.var, + 'value3': np.sum}) - def setup(self): - self.fac1 = np.array(['A', 'B', 'C'], dtype='O') - self.fac2 = np.array(['one', 'two'], dtype='O') - self.df = DataFrame({'key1': self.fac1.take(np.random.randint(0, 3, size=100000)), 'key2': self.fac2.take(np.random.randint(0, 2, size=100000)), 'value1': np.random.randn(100000), 'value2': np.random.randn(100000), 'value3': np.random.randn(100000), }) + def time_different_python_functions_multicol(self, df): + df.groupby(['key1', 'key2']).agg([sum, min, max]) - def time_groupby_multi_different_functions(self): - self.df.groupby(['key1', 'key2']).agg({'value1': 'mean', 'value2': 'var', 'value3': 'sum'}) + def time_different_python_functions_singlecol(self, df): + df.groupby('key1').agg([sum, min, max]) - def time_groupby_multi_different_numpy_functions(self): - self.df.groupby(['key1', 'key2']).agg({'value1': np.mean, 'value2': np.var, 'value3': np.sum}) +class GroupStrings(object): -class groupby_multi_index(object): goal_time = 0.2 def setup(self): - self.n = (((5 * 7) * 11) * (1 << 9)) - self.alpha = list(map(''.join, product((ascii_letters + digits), repeat=4))) - self.f = (lambda k: np.repeat(np.random.choice(self.alpha, (self.n // k)), k)) - self.df = DataFrame({'a': self.f(11), 'b': self.f(7), 'c': self.f(5), 'd': self.f(1), }) + n = 2 * 10**5 + alpha = list(map(''.join, product(ascii_letters, repeat=4))) + data = np.random.choice(alpha, (n // 5, 4), replace=False) + data = np.repeat(data, 5, axis=0) + self.df = DataFrame(data, columns=list('abcd')) self.df['joe'] = (np.random.randn(len(self.df)) * 10).round(3) - self.i = np.random.permutation(len(self.df)) - self.df = self.df.iloc[self.i].reset_index(drop=True).copy() + self.df = self.df.sample(frac=1).reset_index(drop=True) - def time_groupby_multi_index(self): + def time_multi_columns(self): self.df.groupby(list('abcd')).max() -class groupby_multi(object): - goal_time = 0.2 - - def setup(self): - self.N = 100000 - self.ngroups = 100 - self.df = DataFrame({'key1': self.get_test_data(ngroups=self.ngroups), 'key2': self.get_test_data(ngroups=self.ngroups), 'data1': np.random.randn(self.N), 'data2': np.random.randn(self.N), }) - self.simple_series = Series(np.random.randn(self.N)) - self.key1 = self.df['key1'] - - def get_test_data(self, ngroups=100, n=100000): - self.unique_groups = range(self.ngroups) - self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) - if (len(self.arr) < n): - self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) - random.shuffle(self.arr) - return self.arr +class MultiColumn(object): - def f(self): - self.df.groupby(['key1', 'key2']).agg((lambda x: x.values.sum())) + goal_time = 0.2 - def time_groupby_multi_cython(self): - self.df.groupby(['key1', 'key2']).sum() + def setup_cache(self): + N = 10**5 + key1 = np.tile(np.arange(100, dtype=object), 1000) + key2 = key1.copy() + np.random.shuffle(key1) + np.random.shuffle(key2) + df = DataFrame({'key1': key1, + 'key2': key2, + 'data1': np.random.randn(N), + 'data2': np.random.randn(N)}) + return df - def time_groupby_multi_python(self): - self.df.groupby(['key1', 'key2'])['data1'].agg((lambda x: x.values.sum())) + def time_lambda_sum(self, df): + df.groupby(['key1', 'key2']).agg(lambda x: x.values.sum()) - def time_groupby_multi_series_op(self): - self.df.groupby(['key1', 'key2'])['data1'].agg(np.std) + def time_cython_sum(self, df): + df.groupby(['key1', 'key2']).sum() - def time_groupby_series_simple_cython(self): - self.simple_series.groupby(self.key1).sum() + def time_col_select_lambda_sum(self, df): + df.groupby(['key1', 'key2'])['data1'].agg(lambda x: x.values.sum()) - def time_groupby_series_simple_rank(self): - self.df.groupby('key1').rank(pct=True) + def time_col_select_numpy_sum(self, df): + df.groupby(['key1', 'key2'])['data1'].agg(np.sum) -#---------------------------------------------------------------------- -# size() speed +class Size(object): -class groupby_size(object): goal_time = 0.2 def setup(self): - self.n = 100000 - self.offsets = np.random.randint(self.n, size=self.n).astype('timedelta64[ns]') - self.dates = (np.datetime64('now') + self.offsets) - self.df = DataFrame({'key1': np.random.randint(0, 500, size=self.n), 'key2': np.random.randint(0, 100, size=self.n), 'value1': np.random.randn(self.n), 'value2': np.random.randn(self.n), 'value3': np.random.randn(self.n), 'dates': self.dates, }) - - def time_groupby_multi_size(self): + n = 10**5 + offsets = np.random.randint(n, size=n).astype('timedelta64[ns]') + dates = np.datetime64('now') + offsets + self.df = DataFrame({'key1': np.random.randint(0, 500, size=n), + 'key2': np.random.randint(0, 100, size=n), + 'value1': np.random.randn(n), + 'value2': np.random.randn(n), + 'value3': np.random.randn(n), + 'dates': dates}) + self.draws = Series(np.random.randn(n)) + labels = Series(['foo', 'bar', 'baz', 'qux'] * (n // 4)) + self.cats = labels.astype('category') + + def time_multi_size(self): self.df.groupby(['key1', 'key2']).size() - def time_groupby_dt_size(self): - self.df.groupby(['dates']).size() + def time_dt_timegrouper_size(self): + with warnings.catch_warnings(record=True): + self.df.groupby(TimeGrouper(key='dates', freq='M')).size() - def time_groupby_dt_timegrouper_size(self): - self.df.groupby(TimeGrouper(key='dates', freq='M')).size() + def time_category_size(self): + self.draws.groupby(self.cats).size() -#---------------------------------------------------------------------- -# groupby with a variable value for ngroups +class GroupByMethods(object): -class GroupBySuite(object): goal_time = 0.2 - param_names = ['dtype', 'ngroups'] - params = [['int', 'float'], [100, 10000]] - - def setup(self, dtype, ngroups): - np.random.seed(1234) + param_names = ['dtype', 'method', 'application'] + params = [['int', 'float', 'object', 'datetime'], + ['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin', + 'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head', + 'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique', + 'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew', + 'std', 'sum', 'tail', 'unique', 'value_counts', 'var'], + ['direct', 'transformation']] + + def setup(self, dtype, method, application): + if method in method_blacklist.get(dtype, {}): + raise NotImplementedError # skip benchmark + ngroups = 1000 size = ngroups * 2 rng = np.arange(ngroups) values = rng.take(np.random.randint(0, ngroups, size=size)) if dtype == 'int': key = np.random.randint(0, size, size=size) - else: + elif dtype == 'float': key = np.concatenate([np.random.random(ngroups) * 0.1, np.random.random(ngroups) * 10.0]) + elif dtype == 'object': + key = ['foo'] * size + elif dtype == 'datetime': + key = date_range('1/1/2011', periods=size, freq='s') - self.df = DataFrame({'values': values, - 'key': key}) - - def time_all(self, dtype, ngroups): - self.df.groupby('key')['values'].all() - - def time_any(self, dtype, ngroups): - self.df.groupby('key')['values'].any() - - def time_count(self, dtype, ngroups): - self.df.groupby('key')['values'].count() - - def time_cumcount(self, dtype, ngroups): - self.df.groupby('key')['values'].cumcount() - - def time_cummax(self, dtype, ngroups): - self.df.groupby('key')['values'].cummax() - - def time_cummin(self, dtype, ngroups): - self.df.groupby('key')['values'].cummin() - - def time_cumprod(self, dtype, ngroups): - self.df.groupby('key')['values'].cumprod() - - def time_cumsum(self, dtype, ngroups): - self.df.groupby('key')['values'].cumsum() - - def time_describe(self, dtype, ngroups): - self.df.groupby('key')['values'].describe() - - def time_diff(self, dtype, ngroups): - self.df.groupby('key')['values'].diff() - - def time_first(self, dtype, ngroups): - self.df.groupby('key')['values'].first() - - def time_head(self, dtype, ngroups): - self.df.groupby('key')['values'].head() - - def time_last(self, dtype, ngroups): - self.df.groupby('key')['values'].last() - - def time_mad(self, dtype, ngroups): - self.df.groupby('key')['values'].mad() - - def time_max(self, dtype, ngroups): - self.df.groupby('key')['values'].max() - - def time_mean(self, dtype, ngroups): - self.df.groupby('key')['values'].mean() - - def time_median(self, dtype, ngroups): - self.df.groupby('key')['values'].median() - - def time_min(self, dtype, ngroups): - self.df.groupby('key')['values'].min() - - def time_nunique(self, dtype, ngroups): - self.df.groupby('key')['values'].nunique() - - def time_pct_change(self, dtype, ngroups): - self.df.groupby('key')['values'].pct_change() - - def time_prod(self, dtype, ngroups): - self.df.groupby('key')['values'].prod() - - def time_rank(self, dtype, ngroups): - self.df.groupby('key')['values'].rank() - - def time_sem(self, dtype, ngroups): - self.df.groupby('key')['values'].sem() - - def time_size(self, dtype, ngroups): - self.df.groupby('key')['values'].size() - - def time_skew(self, dtype, ngroups): - self.df.groupby('key')['values'].skew() - - def time_std(self, dtype, ngroups): - self.df.groupby('key')['values'].std() - - def time_sum(self, dtype, ngroups): - self.df.groupby('key')['values'].sum() + df = DataFrame({'values': values, 'key': key}) - def time_tail(self, dtype, ngroups): - self.df.groupby('key')['values'].tail() + if application == 'transform': + if method == 'describe': + raise NotImplementedError - def time_unique(self, dtype, ngroups): - self.df.groupby('key')['values'].unique() + self.as_group_method = lambda: df.groupby( + 'key')['values'].transform(method) + self.as_field_method = lambda: df.groupby( + 'values')['key'].transform(method) + else: + self.as_group_method = getattr(df.groupby('key')['values'], method) + self.as_field_method = getattr(df.groupby('values')['key'], method) - def time_value_counts(self, dtype, ngroups): - self.df.groupby('key')['values'].value_counts() + def time_dtype_as_group(self, dtype, method, application): + self.as_group_method() - def time_var(self, dtype, ngroups): - self.df.groupby('key')['values'].var() + def time_dtype_as_field(self, dtype, method, application): + self.as_field_method() -class groupby_float32(object): +class Float32(object): # GH 13335 goal_time = 0.2 @@ -488,134 +396,112 @@ def setup(self): arr = np.repeat(tmp, 10) self.df = DataFrame(dict(a=arr, b=arr)) - def time_groupby_sum(self): + def time_sum(self): self.df.groupby(['a'])['b'].sum() -class groupby_period(object): - # GH 14338 - goal_time = 0.2 - - def make_grouper(self, N): - return pd.period_range('1900-01-01', freq='D', periods=N) +class Categories(object): - def setup(self): - N = 10000 - self.grouper = self.make_grouper(N) - self.df = pd.DataFrame(np.random.randn(N, 2)) - - def time_groupby_sum(self): - self.df.groupby(self.grouper).sum() - - -class groupby_datetime(groupby_period): - def make_grouper(self, N): - return pd.date_range('1900-01-01', freq='D', periods=N) - - -class groupby_datetimetz(groupby_period): - def make_grouper(self, N): - return pd.date_range('1900-01-01', freq='D', periods=N, - tz='US/Central') - -#---------------------------------------------------------------------- -# Series.value_counts - -class series_value_counts(object): goal_time = 0.2 def setup(self): - self.s = Series(np.random.randint(0, 1000, size=100000)) - self.s2 = self.s.astype(float) + N = 10**5 + arr = np.random.random(N) + data = {'a': Categorical(np.random.randint(10000, size=N)), + 'b': arr} + self.df = DataFrame(data) + data = {'a': Categorical(np.random.randint(10000, size=N), + ordered=True), + 'b': arr} + self.df_ordered = DataFrame(data) + data = {'a': Categorical(np.random.randint(100, size=N), + categories=np.arange(10000)), + 'b': arr} + self.df_extra_cat = DataFrame(data) - self.K = 1000 - self.N = 100000 - self.uniques = tm.makeStringIndex(self.K).values - self.s3 = Series(np.tile(self.uniques, (self.N // self.K))) + def time_groupby_sort(self): + self.df.groupby('a')['b'].count() - def time_value_counts_int64(self): - self.s.value_counts() + def time_groupby_nosort(self): + self.df.groupby('a', sort=False)['b'].count() - def time_value_counts_float64(self): - self.s2.value_counts() + def time_groupby_ordered_sort(self): + self.df_ordered.groupby('a')['b'].count() - def time_value_counts_strings(self): - self.s.value_counts() + def time_groupby_ordered_nosort(self): + self.df_ordered.groupby('a', sort=False)['b'].count() + def time_groupby_extra_cat_sort(self): + self.df_extra_cat.groupby('a')['b'].count() -#---------------------------------------------------------------------- -# pivot_table + def time_groupby_extra_cat_nosort(self): + self.df_extra_cat.groupby('a', sort=False)['b'].count() -class groupby_pivot_table(object): - goal_time = 0.2 - - def setup(self): - self.fac1 = np.array(['A', 'B', 'C'], dtype='O') - self.fac2 = np.array(['one', 'two'], dtype='O') - self.ind1 = np.random.randint(0, 3, size=100000) - self.ind2 = np.random.randint(0, 2, size=100000) - self.df = DataFrame({'key1': self.fac1.take(self.ind1), 'key2': self.fac2.take(self.ind2), 'key3': self.fac2.take(self.ind2), 'value1': np.random.randn(100000), 'value2': np.random.randn(100000), 'value3': np.random.randn(100000), }) - - def time_groupby_pivot_table(self): - self.df.pivot_table(index='key1', columns=['key2', 'key3']) +class Datelike(object): + # GH 14338 + goal_time = 0.2 + params = ['period_range', 'date_range', 'date_range_tz'] + param_names = ['grouper'] + + def setup(self, grouper): + N = 10**4 + rng_map = {'period_range': period_range, + 'date_range': date_range, + 'date_range_tz': partial(date_range, tz='US/Central')} + self.grouper = rng_map[grouper]('1900-01-01', freq='D', periods=N) + self.df = DataFrame(np.random.randn(10**4, 2)) + + def time_sum(self, grouper): + self.df.groupby(self.grouper).sum() -#---------------------------------------------------------------------- -# Sum booleans #2692 -class groupby_sum_booleans(object): +class SumBools(object): + # GH 2692 goal_time = 0.2 def setup(self): - self.N = 500 - self.df = DataFrame({'ii': range(self.N), 'bb': [True for x in range(self.N)], }) + N = 500 + self.df = DataFrame({'ii': range(N), + 'bb': [True] * N}) def time_groupby_sum_booleans(self): self.df.groupby('ii').sum() -#---------------------------------------------------------------------- -# multi-indexed group sum #9049 - -class groupby_sum_multiindex(object): +class SumMultiLevel(object): + # GH 9049 goal_time = 0.2 + timeout = 120.0 def setup(self): - self.N = 50 - self.df = DataFrame({'A': (list(range(self.N)) * 2), 'B': list(range((self.N * 2))), 'C': 1, }).set_index(['A', 'B']) + N = 50 + self.df = DataFrame({'A': list(range(N)) * 2, + 'B': range(N * 2), + 'C': 1}).set_index(['A', 'B']) def time_groupby_sum_multiindex(self): self.df.groupby(level=[0, 1]).sum() -#------------------------------------------------------------------------------- -# Transform testing - class Transform(object): + goal_time = 0.2 def setup(self): n1 = 400 n2 = 250 - - index = MultiIndex( - levels=[np.arange(n1), pd.util.testing.makeStringIndex(n2)], - labels=[[i for i in range(n1) for _ in range(n2)], - (list(range(n2)) * n1)], - names=['lev1', 'lev2']) - - data = DataFrame(np.random.randn(n1 * n2, 3), - index=index, columns=['col1', 'col20', 'col3']) - step = int((n1 * n2 * 0.1)) - for col in range(len(data.columns)): - idx = col - while (idx < len(data)): - data.set_value(data.index[idx], data.columns[col], np.nan) - idx += step + index = MultiIndex(levels=[np.arange(n1), tm.makeStringIndex(n2)], + labels=[np.repeat(range(n1), n2).tolist(), + list(range(n2)) * n1], + names=['lev1', 'lev2']) + arr = np.random.randn(n1 * n2, 3) + arr[::10000, 0] = np.nan + arr[1::10000, 1] = np.nan + arr[2::10000, 2] = np.nan + data = DataFrame(arr, index=index, columns=['col1', 'col20', 'col3']) self.df = data - self.f_fillna = (lambda x: x.fillna(method='pad')) - np.random.seed(2718281) n = 20000 self.df1 = DataFrame(np.random.randint(1, n, (n, 3)), columns=['jim', 'joe', 'jolie']) @@ -627,10 +513,10 @@ def setup(self): self.df4 = self.df3.copy() self.df4['jim'] = self.df4['joe'] - def time_transform_func(self): - self.df.groupby(level='lev2').transform(self.f_fillna) + def time_transform_lambda_max(self): + self.df.groupby(level='lev1').transform(lambda x: max(x)) - def time_transform_ufunc(self): + def time_transform_ufunc_max(self): self.df.groupby(level='lev1').transform(np.max) def time_transform_multi_key1(self): @@ -646,63 +532,31 @@ def time_transform_multi_key4(self): self.df4.groupby(['jim', 'joe'])['jolie'].transform('max') +class TransformBools(object): - -np.random.seed(0) -N = 120000 -N_TRANSITIONS = 1400 -transition_points = np.random.permutation(np.arange(N))[:N_TRANSITIONS] -transition_points.sort() -transitions = np.zeros((N,), dtype=np.bool) -transitions[transition_points] = True -g = transitions.cumsum() -df = DataFrame({'signal': np.random.rand(N), }) - - - - - -class groupby_transform_series(object): goal_time = 0.2 def setup(self): - np.random.seed(0) N = 120000 transition_points = np.sort(np.random.choice(np.arange(N), 1400)) - transitions = np.zeros((N,), dtype=np.bool) + transitions = np.zeros(N, dtype=np.bool) transitions[transition_points] = True self.g = transitions.cumsum() self.df = DataFrame({'signal': np.random.rand(N)}) - def time_groupby_transform_series(self): + def time_transform_mean(self): self.df['signal'].groupby(self.g).transform(np.mean) -class groupby_transform_series2(object): +class TransformNaN(object): + # GH 12737 goal_time = 0.2 def setup(self): - np.random.seed(0) - self.df = DataFrame({'key': (np.arange(100000) // 3), - 'val': np.random.randn(100000)}) - - self.df_nans = pd.DataFrame({'key': np.repeat(np.arange(1000), 10), - 'B': np.nan, - 'C': np.nan}) - self.df_nans.ix[4::10, 'B':'C'] = 5 - - def time_transform_series2(self): - self.df.groupby('key')['val'].transform(np.mean) - - def time_cumprod(self): - self.df.groupby('key').cumprod() - - def time_cumsum(self): - self.df.groupby('key').cumsum() - - def time_shift(self): - self.df.groupby('key').shift() + self.df_nans = DataFrame({'key': np.repeat(np.arange(1000), 10), + 'B': np.nan, + 'C': np.nan}) + self.df_nans.loc[4::10, 'B':'C'] = 5 - def time_transform_dataframe(self): - # GH 12737 + def time_first(self): self.df_nans.groupby('key').transform('first') diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py deleted file mode 100644 index 78de5267a2969..0000000000000 --- a/asv_bench/benchmarks/hdfstore_bench.py +++ /dev/null @@ -1,122 +0,0 @@ -from .pandas_vb_common import * -import os - - -class HDF5(object): - goal_time = 0.2 - - def setup(self): - self.index = tm.makeStringIndex(25000) - self.df = DataFrame({'float1': randn(25000), 'float2': randn(25000),}, - index=self.index) - - self.df_mixed = DataFrame( - {'float1': randn(25000), 'float2': randn(25000), - 'string1': (['foo'] * 25000), - 'bool1': ([True] * 25000), - 'int1': np.random.randint(0, 250000, size=25000),}, - index=self.index) - - self.df_wide = DataFrame(np.random.randn(25000, 100)) - - self.df2 = DataFrame({'float1': randn(25000), 'float2': randn(25000)}, - index=date_range('1/1/2000', periods=25000)) - self.df_wide2 = DataFrame(np.random.randn(25000, 100), - index=date_range('1/1/2000', periods=25000)) - - self.df_dc = DataFrame(np.random.randn(10000, 10), - columns=[('C%03d' % i) for i in range(10)]) - - self.f = '__test__.h5' - self.remove(self.f) - - self.store = HDFStore(self.f) - self.store.put('df1', self.df) - self.store.put('df_mixed', self.df_mixed) - - self.store.append('df5', self.df_mixed) - self.store.append('df7', self.df) - - self.store.append('df9', self.df_wide) - - self.store.append('df11', self.df_wide2) - self.store.append('df12', self.df2) - - def teardown(self): - self.store.close() - - def remove(self, f): - try: - os.remove(self.f) - except: - pass - - def time_read_store(self): - self.store.get('df1') - - def time_read_store_mixed(self): - self.store.get('df_mixed') - - def time_write_store(self): - self.store.put('df2', self.df) - - def time_write_store_mixed(self): - self.store.put('df_mixed2', self.df_mixed) - - def time_read_store_table_mixed(self): - self.store.select('df5') - - def time_write_store_table_mixed(self): - self.store.append('df6', self.df_mixed) - - def time_read_store_table(self): - self.store.select('df7') - - def time_write_store_table(self): - self.store.append('df8', self.df) - - def time_read_store_table_wide(self): - self.store.select('df9') - - def time_write_store_table_wide(self): - self.store.append('df10', self.df_wide) - - def time_write_store_table_dc(self): - self.store.append('df15', self.df, data_columns=True) - - def time_query_store_table_wide(self): - self.store.select('df11', [('index', '>', self.df_wide2.index[10000]), - ('index', '<', self.df_wide2.index[15000])]) - - def time_query_store_table(self): - self.store.select('df12', [('index', '>', self.df2.index[10000]), - ('index', '<', self.df2.index[15000])]) - - -class HDF5Panel(object): - goal_time = 0.2 - - def setup(self): - self.f = '__test__.h5' - self.p = Panel(randn(20, 1000, 25), - items=[('Item%03d' % i) for i in range(20)], - major_axis=date_range('1/1/2000', periods=1000), - minor_axis=[('E%03d' % i) for i in range(25)]) - self.remove(self.f) - self.store = HDFStore(self.f) - self.store.append('p1', self.p) - - def teardown(self): - self.store.close() - - def remove(self, f): - try: - os.remove(self.f) - except: - pass - - def time_read_store_table_panel(self): - self.store.select('p1') - - def time_write_store_table_panel(self): - self.store.append('p2', self.p) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 3fb53ce9b3c98..f1703e163917a 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -1,201 +1,194 @@ -from .pandas_vb_common import * +import numpy as np +import pandas.util.testing as tm +from pandas import (Series, date_range, DatetimeIndex, Index, RangeIndex, + Float64Index) + +from .pandas_vb_common import setup # noqa class SetOperations(object): - goal_time = 0.2 - def setup(self): - self.rng = date_range('1/1/2000', periods=10000, freq='T') - self.rng2 = self.rng[:(-1)] + goal_time = 0.2 + params = (['datetime', 'date_string', 'int', 'strings'], + ['intersection', 'union', 'symmetric_difference']) + param_names = ['dtype', 'method'] + + def setup(self, dtype, method): + N = 10**5 + dates_left = date_range('1/1/2000', periods=N, freq='T') + fmt = '%Y-%m-%d %H:%M:%S' + date_str_left = Index(dates_left.strftime(fmt)) + int_left = Index(np.arange(N)) + str_left = tm.makeStringIndex(N) + data = {'datetime': {'left': dates_left, 'right': dates_left[:-1]}, + 'date_string': {'left': date_str_left, + 'right': date_str_left[:-1]}, + 'int': {'left': int_left, 'right': int_left[:-1]}, + 'strings': {'left': str_left, 'right': str_left[:-1]}} + self.left = data[dtype]['left'] + self.right = data[dtype]['right'] + + def time_operation(self, dtype, method): + getattr(self.left, method)(self.right) + + +class SetDisjoint(object): - # object index with datetime values - if (self.rng.dtype == object): - self.idx_rng = self.rng.view(Index) - else: - self.idx_rng = self.rng.asobject - self.idx_rng2 = self.idx_rng[:(-1)] + goal_time = 0.2 - # other datetime - N = 100000 - A = N - 20000 + def setup(self): + N = 10**5 B = N + 20000 - self.dtidx1 = DatetimeIndex(range(N)) - self.dtidx2 = DatetimeIndex(range(A, B)) - self.dtidx3 = DatetimeIndex(range(N, B)) - - # integer - self.N = 1000000 - self.options = np.arange(self.N) - self.left = Index( - self.options.take(np.random.permutation(self.N)[:(self.N // 2)])) - self.right = Index( - self.options.take(np.random.permutation(self.N)[:(self.N // 2)])) - - # strings - N = 10000 - strs = tm.rands_array(10, N) - self.leftstr = Index(strs[:N * 2 // 3]) - self.rightstr = Index(strs[N // 3:]) - - def time_datetime_intersection(self): - self.rng.intersection(self.rng2) - - def time_datetime_union(self): - self.rng.union(self.rng2) - - def time_datetime_difference(self): - self.dtidx1.difference(self.dtidx2) + self.datetime_left = DatetimeIndex(range(N)) + self.datetime_right = DatetimeIndex(range(N, B)) def time_datetime_difference_disjoint(self): - self.dtidx1.difference(self.dtidx3) - - def time_datetime_symmetric_difference(self): - self.dtidx1.symmetric_difference(self.dtidx2) - - def time_index_datetime_intersection(self): - self.idx_rng.intersection(self.idx_rng2) - - def time_index_datetime_union(self): - self.idx_rng.union(self.idx_rng2) - - def time_int64_intersection(self): - self.left.intersection(self.right) - - def time_int64_union(self): - self.left.union(self.right) - - def time_int64_difference(self): - self.left.difference(self.right) - - def time_int64_symmetric_difference(self): - self.left.symmetric_difference(self.right) - - def time_str_difference(self): - self.leftstr.difference(self.rightstr) - - def time_str_symmetric_difference(self): - self.leftstr.symmetric_difference(self.rightstr) + self.datetime_left.difference(self.datetime_right) class Datetime(object): + goal_time = 0.2 def setup(self): - self.dr = pd.date_range('20000101', freq='D', periods=10000) + self.dr = date_range('20000101', freq='D', periods=10000) def time_is_dates_only(self): self.dr._is_dates_only -class Float64(object): - goal_time = 0.2 - - def setup(self): - self.idx = tm.makeFloatIndex(1000000) - self.mask = ((np.arange(self.idx.size) % 3) == 0) - self.series_mask = Series(self.mask) +class Ops(object): - self.baseidx = np.arange(1000000.0) + sample_time = 0.2 + params = ['float', 'int'] + param_names = ['dtype'] - def time_boolean_indexer(self): - self.idx[self.mask] + def setup(self, dtype): + N = 10**6 + indexes = {'int': 'makeIntIndex', 'float': 'makeFloatIndex'} + self.index = getattr(tm, indexes[dtype])(N) - def time_boolean_series_indexer(self): - self.idx[self.series_mask] + def time_add(self, dtype): + self.index + 2 - def time_construct(self): - Index(self.baseidx) + def time_subtract(self, dtype): + self.index - 2 - def time_div(self): - (self.idx / 2) + def time_multiply(self, dtype): + self.index * 2 - def time_get(self): - self.idx[1] + def time_divide(self, dtype): + self.index / 2 - def time_mul(self): - (self.idx * 2) + def time_modulo(self, dtype): + self.index % 2 - def time_slice_indexer_basic(self): - self.idx[:(-1)] - def time_slice_indexer_even(self): - self.idx[::2] +class Range(object): - -class StringIndex(object): goal_time = 0.2 def setup(self): - self.idx = tm.makeStringIndex(1000000) - self.mask = ((np.arange(1000000) % 3) == 0) - self.series_mask = Series(self.mask) + self.idx_inc = RangeIndex(start=0, stop=10**7, step=3) + self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3) - def time_boolean_indexer(self): - self.idx[self.mask] + def time_max(self): + self.idx_inc.max() - def time_boolean_series_indexer(self): - self.idx[self.series_mask] + def time_max_trivial(self): + self.idx_dec.max() - def time_slice_indexer_basic(self): - self.idx[:(-1)] + def time_min(self): + self.idx_dec.min() - def time_slice_indexer_even(self): - self.idx[::2] + def time_min_trivial(self): + self.idx_inc.min() -class Multi1(object): +class IndexAppend(object): + goal_time = 0.2 def setup(self): - (n, k) = (200, 5000) - self.levels = [np.arange(n), tm.makeStringIndex(n).values, (1000 + np.arange(n))] - self.labels = [np.random.choice(n, (k * n)) for lev in self.levels] - self.mi = MultiIndex(levels=self.levels, labels=self.labels) - self.iterables = [tm.makeStringIndex(10000), range(20)] + N = 10000 + self.range_idx = RangeIndex(0, 100) + self.int_idx = self.range_idx.astype(int) + self.obj_idx = self.int_idx.astype(str) + self.range_idxs = [] + self.int_idxs = [] + self.object_idxs = [] + for i in range(1, N): + r_idx = RangeIndex(i * 100, (i + 1) * 100) + self.range_idxs.append(r_idx) + i_idx = r_idx.astype(int) + self.int_idxs.append(i_idx) + o_idx = i_idx.astype(str) + self.object_idxs.append(o_idx) - def time_duplicated(self): - self.mi.duplicated() + def time_append_range_list(self): + self.range_idx.append(self.range_idxs) - def time_from_product(self): - MultiIndex.from_product(self.iterables) + def time_append_int_list(self): + self.int_idx.append(self.int_idxs) + def time_append_obj_list(self): + self.obj_idx.append(self.object_idxs) + + +class Indexing(object): -class Multi2(object): goal_time = 0.2 + params = ['String', 'Float', 'Int'] + param_names = ['dtype'] + + def setup(self, dtype): + N = 10**6 + self.idx = getattr(tm, 'make{}Index'.format(dtype))(N) + self.array_mask = (np.arange(N) % 3) == 0 + self.series_mask = Series(self.array_mask) + self.sorted = self.idx.sort_values() + half = N // 2 + self.non_unique = self.idx[:half].append(self.idx[:half]) + self.non_unique_sorted = self.sorted[:half].append(self.sorted[:half]) + self.key = self.sorted[N // 4] + + def time_boolean_array(self, dtype): + self.idx[self.array_mask] + + def time_boolean_series(self, dtype): + self.idx[self.series_mask] - def setup(self): - self.n = ((((3 * 5) * 7) * 11) * (1 << 10)) - (low, high) = (((-1) << 12), (1 << 12)) - self.f = (lambda k: np.repeat(np.random.randint(low, high, (self.n // k)), k)) - self.i = np.random.permutation(self.n) - self.mi = MultiIndex.from_arrays([self.f(11), self.f(7), self.f(5), self.f(3), self.f(1)])[self.i] + def time_get(self, dtype): + self.idx[1] - self.a = np.repeat(np.arange(100), 1000) - self.b = np.tile(np.arange(1000), 100) - self.midx2 = MultiIndex.from_arrays([self.a, self.b]) - self.midx2 = self.midx2.take(np.random.permutation(np.arange(100000))) + def time_slice(self, dtype): + self.idx[:-1] - def time_sortlevel_int64(self): - self.mi.sortlevel() + def time_slice_step(self, dtype): + self.idx[::2] + + def time_get_loc(self, dtype): + self.idx.get_loc(self.key) + + def time_get_loc_sorted(self, dtype): + self.sorted.get_loc(self.key) - def time_sortlevel_zero(self): - self.midx2.sortlevel(0) + def time_get_loc_non_unique(self, dtype): + self.non_unique.get_loc(self.key) - def time_sortlevel_one(self): - self.midx2.sortlevel(1) + def time_get_loc_non_unique_sorted(self, dtype): + self.non_unique_sorted.get_loc(self.key) -class Multi3(object): +class Float64IndexMethod(object): + # GH 13166 goal_time = 0.2 def setup(self): - self.level1 = range(1000) - self.level2 = date_range(start='1/1/2012', periods=100) - self.mi = MultiIndex.from_product([self.level1, self.level2]) - - def time_datetime_level_values_full(self): - self.mi.copy().values + N = 100000 + a = np.arange(N) + self.ind = Float64Index(a * 4.8000000418824129e-08) - def time_datetime_level_values_sliced(self): - self.mi[:10].values + def time_get_loc(self): + self.ind.get_loc(0) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 27cd320c661e0..77e013e1e4fb0 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -1,211 +1,310 @@ -from .pandas_vb_common import * -try: - import pandas.computation.expressions as expr -except: - expr = None +import warnings +import numpy as np +import pandas.util.testing as tm +from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index, + IntervalIndex, IndexSlice, concat, date_range) +from .pandas_vb_common import setup, Panel # noqa + + +class NumericSeriesIndexing(object): -class Int64Indexing(object): goal_time = 0.2 + params = [Int64Index, Float64Index] + param = ['index'] - def setup(self): - self.s = Series(np.random.rand(1000000)) + def setup(self, index): + N = 10**6 + idx = index(range(N)) + self.data = Series(np.random.rand(N), index=idx) + self.array = np.arange(10000) + self.array_list = self.array.tolist() - def time_getitem_scalar(self): - self.s[800000] + def time_getitem_scalar(self, index): + self.data[800000] - def time_getitem_slice(self): - self.s[:800000] + def time_getitem_slice(self, index): + self.data[:800000] - def time_getitem_list_like(self): - self.s[[800000]] + def time_getitem_list_like(self, index): + self.data[[800000]] - def time_getitem_array(self): - self.s[np.arange(10000)] + def time_getitem_array(self, index): + self.data[self.array] - def time_iloc_array(self): - self.s.iloc[np.arange(10000)] + def time_getitem_lists(self, index): + self.data[self.array_list] - def time_iloc_list_like(self): - self.s.iloc[[800000]] + def time_iloc_array(self, index): + self.data.iloc[self.array] - def time_iloc_scalar(self): - self.s.iloc[800000] + def time_iloc_list_like(self, index): + self.data.iloc[[800000]] - def time_iloc_slice(self): - self.s.iloc[:800000] + def time_iloc_scalar(self, index): + self.data.iloc[800000] - def time_ix_array(self): - self.s.ix[np.arange(10000)] + def time_iloc_slice(self, index): + self.data.iloc[:800000] - def time_ix_list_like(self): - self.s.ix[[800000]] + def time_ix_array(self, index): + self.data.ix[self.array] - def time_ix_scalar(self): - self.s.ix[800000] + def time_ix_list_like(self, index): + self.data.ix[[800000]] - def time_ix_slice(self): - self.s.ix[:800000] + def time_ix_scalar(self, index): + self.data.ix[800000] - def time_loc_array(self): - self.s.loc[np.arange(10000)] + def time_ix_slice(self, index): + self.data.ix[:800000] - def time_loc_list_like(self): - self.s.loc[[800000]] + def time_loc_array(self, index): + self.data.loc[self.array] - def time_loc_scalar(self): - self.s.loc[800000] + def time_loc_list_like(self, index): + self.data.loc[[800000]] - def time_loc_slice(self): - self.s.loc[:800000] + def time_loc_scalar(self, index): + self.data.loc[800000] + def time_loc_slice(self, index): + self.data.loc[:800000] -class StringIndexing(object): - goal_time = 0.2 - def setup(self): - self.index = tm.makeStringIndex(1000000) - self.s = Series(np.random.rand(1000000), index=self.index) - self.lbl = self.s.index[800000] +class NonNumericSeriesIndexing(object): - def time_getitem_label_slice(self): + goal_time = 0.2 + params = ['string', 'datetime'] + param_names = ['index'] + + def setup(self, index): + N = 10**5 + indexes = {'string': tm.makeStringIndex(N), + 'datetime': date_range('1900', periods=N, freq='s')} + index = indexes[index] + self.s = Series(np.random.rand(N), index=index) + self.lbl = index[80000] + + def time_getitem_label_slice(self, index): self.s[:self.lbl] - def time_getitem_pos_slice(self): - self.s[:800000] - - def time_get_value(self): - self.s.get_value(self.lbl) + def time_getitem_pos_slice(self, index): + self.s[:80000] + def time_get_value(self, index): + with warnings.catch_warnings(record=True): + self.s.get_value(self.lbl) -class DatetimeIndexing(object): - goal_time = 0.2 + def time_getitem_scalar(self, index): + self.s[self.lbl] - def setup(self): - tm.N = 1000 - self.ts = tm.makeTimeSeries() - self.dt = self.ts.index[500] - def time_getitem_scalar(self): - self.ts[self.dt] - +class DataFrameStringIndexing(object): -class DataFrameIndexing(object): goal_time = 0.2 def setup(self): - self.index = tm.makeStringIndex(1000) - self.columns = tm.makeStringIndex(30) - self.df = DataFrame(np.random.randn(1000, 30), index=self.index, - columns=self.columns) - self.idx = self.index[100] - self.col = self.columns[10] - - self.df2 = DataFrame(np.random.randn(10000, 4), - columns=['A', 'B', 'C', 'D']) - self.indexer = (self.df2['B'] > 0) - self.obj_indexer = self.indexer.astype('O') - - # duptes - self.idx_dupe = (np.array(range(30)) * 99) - self.df3 = DataFrame({'A': ([0.1] * 1000), 'B': ([1] * 1000),}) - self.df3 = concat([self.df3, (2 * self.df3), (3 * self.df3)]) - - self.df_big = DataFrame(dict(A=(['foo'] * 1000000))) + index = tm.makeStringIndex(1000) + columns = tm.makeStringIndex(30) + self.df = DataFrame(np.random.randn(1000, 30), index=index, + columns=columns) + self.idx_scalar = index[100] + self.col_scalar = columns[10] + self.bool_indexer = self.df[self.col_scalar] > 0 + self.bool_obj_indexer = self.bool_indexer.astype(object) def time_get_value(self): - self.df.get_value(self.idx, self.col) + with warnings.catch_warnings(record=True): + self.df.get_value(self.idx_scalar, self.col_scalar) + + def time_ix(self): + self.df.ix[self.idx_scalar, self.col_scalar] - def time_get_value_ix(self): - self.df.ix[(self.idx, self.col)] + def time_loc(self): + self.df.loc[self.idx_scalar, self.col_scalar] def time_getitem_scalar(self): - self.df[self.col][self.idx] + self.df[self.col_scalar][self.idx_scalar] def time_boolean_rows(self): - self.df2[self.indexer] + self.df[self.bool_indexer] def time_boolean_rows_object(self): - self.df2[self.obj_indexer] - - def time_iloc_dups(self): - self.df3.iloc[self.idx_dupe] + self.df[self.bool_obj_indexer] - def time_loc_dups(self): - self.df3.loc[self.idx_dupe] - - def time_iloc_big(self): - self.df_big.iloc[:100, 0] +class DataFrameNumericIndexing(object): -class IndexingMethods(object): - # GH 13166 goal_time = 0.2 def setup(self): - a = np.arange(100000) - self.ind = pd.Float64Index(a * 4.8000000418824129e-08) + self.idx_dupe = np.array(range(30)) * 99 + self.df = DataFrame(np.random.randn(10000, 5)) + self.df_dup = concat([self.df, 2 * self.df, 3 * self.df]) + self.bool_indexer = [True] * 5000 + [False] * 5000 - self.s = Series(np.random.rand(100000)) - self.ts = Series(np.random.rand(100000), - index=date_range('2011-01-01', freq='S', periods=100000)) - self.indexer = ([True, False, True, True, False] * 20000) + def time_iloc_dups(self): + self.df_dup.iloc[self.idx_dupe] + + def time_loc_dups(self): + self.df_dup.loc[self.idx_dupe] - def time_get_loc_float(self): - self.ind.get_loc(0) + def time_iloc(self): + self.df.iloc[:100, 0] - def time_take_dtindex(self): - self.ts.take(self.indexer) + def time_loc(self): + self.df.loc[:100, 0] - def time_take_intindex(self): + def time_bool_indexer(self): + self.df[self.bool_indexer] + + +class Take(object): + + goal_time = 0.2 + params = ['int', 'datetime'] + param_names = ['index'] + + def setup(self, index): + N = 100000 + indexes = {'int': Int64Index(np.arange(N)), + 'datetime': date_range('2011-01-01', freq='S', periods=N)} + index = indexes[index] + self.s = Series(np.random.rand(N), index=index) + self.indexer = [True, False, True, True, False] * 20000 + + def time_take(self, index): self.s.take(self.indexer) class MultiIndexing(object): + goal_time = 0.2 def setup(self): - self.mi = MultiIndex.from_tuples([(x, y) for x in range(1000) for y in range(1000)]) - self.s = Series(np.random.randn(1000000), index=self.mi) + mi = MultiIndex.from_product([range(1000), range(1000)]) + self.s = Series(np.random.randn(1000000), index=mi) self.df = DataFrame(self.s) - # slicers - np.random.seed(1234) - self.idx = pd.IndexSlice - self.n = 100000 - self.mdt = pandas.DataFrame() - self.mdt['A'] = np.random.choice(range(10000, 45000, 1000), self.n) - self.mdt['B'] = np.random.choice(range(10, 400), self.n) - self.mdt['C'] = np.random.choice(range(1, 150), self.n) - self.mdt['D'] = np.random.choice(range(10000, 45000), self.n) - self.mdt['x'] = np.random.choice(range(400), self.n) - self.mdt['y'] = np.random.choice(range(25), self.n) - self.test_A = 25000 - self.test_B = 25 - self.test_C = 40 - self.test_D = 35000 - self.eps_A = 5000 - self.eps_B = 5 - self.eps_C = 5 - self.eps_D = 5000 - self.mdt2 = self.mdt.set_index(['A', 'B', 'C', 'D']).sortlevel() - - def time_series_xs_mi_ix(self): + n = 100000 + self.mdt = DataFrame({'A': np.random.choice(range(10000, 45000, 1000), + n), + 'B': np.random.choice(range(10, 400), n), + 'C': np.random.choice(range(1, 150), n), + 'D': np.random.choice(range(10000, 45000), n), + 'x': np.random.choice(range(400), n), + 'y': np.random.choice(range(25), n)}) + self.idx = IndexSlice[20000:30000, 20:30, 35:45, 30000:40000] + self.mdt = self.mdt.set_index(['A', 'B', 'C', 'D']).sort_index() + + def time_series_ix(self): self.s.ix[999] - def time_frame_xs_mi_ix(self): + def time_frame_ix(self): self.df.ix[999] - def time_multiindex_slicers(self): - self.mdt2.loc[self.idx[(self.test_A - self.eps_A):(self.test_A + self.eps_A), (self.test_B - self.eps_B):(self.test_B + self.eps_B), (self.test_C - self.eps_C):(self.test_C + self.eps_C), (self.test_D - self.eps_D):(self.test_D + self.eps_D)], :] + def time_index_slice(self): + self.mdt.loc[self.idx, :] + + +class IntervalIndexing(object): + + goal_time = 0.2 + + def setup_cache(self): + idx = IntervalIndex.from_breaks(np.arange(1000001)) + monotonic = Series(np.arange(1000000), index=idx) + return monotonic + + def time_getitem_scalar(self, monotonic): + monotonic[80000] + + def time_loc_scalar(self, monotonic): + monotonic.loc[80000] + + def time_getitem_list(self, monotonic): + monotonic[80000:] + + def time_loc_list(self, monotonic): + monotonic.loc[80000:] class PanelIndexing(object): + goal_time = 0.2 def setup(self): - self.p = Panel(np.random.randn(100, 100, 100)) - self.inds = range(0, 100, 10) + with warnings.catch_warnings(record=True): + self.p = Panel(np.random.randn(100, 100, 100)) + self.inds = range(0, 100, 10) def time_subset(self): - self.p.ix[(self.inds, self.inds, self.inds)] + with warnings.catch_warnings(record=True): + self.p.ix[(self.inds, self.inds, self.inds)] + + +class MethodLookup(object): + + goal_time = 0.2 + + def setup_cache(self): + s = Series() + return s + + def time_lookup_iloc(self, s): + s.iloc + + def time_lookup_ix(self, s): + s.ix + + def time_lookup_loc(self, s): + s.loc + + +class GetItemSingleColumn(object): + + goal_time = 0.2 + + def setup(self): + self.df_string_col = DataFrame(np.random.randn(3000, 1), columns=['A']) + self.df_int_col = DataFrame(np.random.randn(3000, 1)) + + def time_frame_getitem_single_column_label(self): + self.df_string_col['A'] + + def time_frame_getitem_single_column_int(self): + self.df_int_col[0] + + +class AssignTimeseriesIndex(object): + + goal_time = 0.2 + + def setup(self): + N = 100000 + idx = date_range('1/1/2000', periods=N, freq='H') + self.df = DataFrame(np.random.randn(N, 1), columns=['A'], index=idx) + + def time_frame_assign_timeseries_index(self): + self.df['date'] = self.df.index + + +class InsertColumns(object): + + goal_time = 0.2 + + def setup(self): + self.N = 10**3 + self.df = DataFrame(index=range(self.N)) + + def time_insert(self): + np.random.seed(1234) + for i in range(100): + self.df.insert(0, i, np.random.randn(self.N), + allow_duplicates=True) + + def time_assign_with_setitem(self): + np.random.seed(1234) + for i in range(100): + self.df[i] = np.random.randn(self.N) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 3635438a7f76b..16d9e7cd73cbb 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -1,77 +1,80 @@ -from .pandas_vb_common import * -import pandas as pd +import numpy as np +import pandas.util.testing as tm +from pandas import DataFrame, Series, to_numeric +from .pandas_vb_common import numeric_dtypes, lib, setup # noqa -class DtypeInfer(object): - goal_time = 0.2 +class NumericInferOps(object): # from GH 7332 + goal_time = 0.2 + params = numeric_dtypes + param_names = ['dtype'] + + def setup(self, dtype): + N = 5 * 10**5 + self.df = DataFrame({'A': np.arange(N).astype(dtype), + 'B': np.arange(N).astype(dtype)}) + + def time_add(self, dtype): + self.df['A'] + self.df['B'] + + def time_subtract(self, dtype): + self.df['A'] - self.df['B'] - def setup(self): - self.N = 500000 - self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'), - B=np.arange(self.N, dtype='int64'))) - self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'), - B=np.arange(self.N, dtype='int32'))) - self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'), - B=np.arange(self.N, dtype='uint32'))) - self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'), - B=np.arange(self.N, dtype='float64'))) - self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'), - B=np.arange(self.N, dtype='float32'))) - self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'), - B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'))) - self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), - B=self.df_datetime64['B'])) + def time_multiply(self, dtype): + self.df['A'] * self.df['B'] - def time_int64(self): - (self.df_int64['A'] + self.df_int64['B']) + def time_divide(self, dtype): + self.df['A'] / self.df['B'] - def time_int32(self): - (self.df_int32['A'] + self.df_int32['B']) + def time_modulo(self, dtype): + self.df['A'] % self.df['B'] - def time_uint32(self): - (self.df_uint32['A'] + self.df_uint32['B']) - def time_float64(self): - (self.df_float64['A'] + self.df_float64['B']) +class DateInferOps(object): + # from GH 7332 + goal_time = 0.2 + + def setup_cache(self): + N = 5 * 10**5 + df = DataFrame({'datetime64': np.arange(N).astype('datetime64[ms]')}) + df['timedelta'] = df['datetime64'] - df['datetime64'] + return df - def time_float32(self): - (self.df_float32['A'] + self.df_float32['B']) + def time_subtract_datetimes(self, df): + df['datetime64'] - df['datetime64'] - def time_datetime64(self): - (self.df_datetime64['A'] - self.df_datetime64['B']) + def time_timedelta_plus_datetime(self, df): + df['timedelta'] + df['datetime64'] - def time_timedelta64_1(self): - (self.df_timedelta64['A'] + self.df_timedelta64['B']) + def time_add_timedeltas(self, df): + df['timedelta'] + df['timedelta'] - def time_timedelta64_2(self): - (self.df_timedelta64['A'] + self.df_timedelta64['A']) +class ToNumeric(object): -class to_numeric(object): goal_time = 0.2 + params = ['ignore', 'coerce'] + param_names = ['errors'] - def setup(self): - self.n = 10000 - self.float = Series(np.random.randn(self.n * 100)) + def setup(self, errors): + N = 10000 + self.float = Series(np.random.randn(N)) self.numstr = self.float.astype('str') - self.str = Series(tm.makeStringIndex(self.n)) + self.str = Series(tm.makeStringIndex(N)) - def time_from_float(self): - pd.to_numeric(self.float) + def time_from_float(self, errors): + to_numeric(self.float, errors=errors) - def time_from_numeric_str(self): - pd.to_numeric(self.numstr) + def time_from_numeric_str(self, errors): + to_numeric(self.numstr, errors=errors) - def time_from_str_ignore(self): - pd.to_numeric(self.str, errors='ignore') + def time_from_str(self, errors): + to_numeric(self.str, errors=errors) - def time_from_str_coerce(self): - pd.to_numeric(self.str, errors='coerce') - -class to_numeric_downcast(object): +class ToNumericDowncast(object): param_names = ['dtype', 'downcast'] params = [['string-float', 'string-int', 'string-nint', 'datetime64', @@ -81,37 +84,30 @@ class to_numeric_downcast(object): N = 500000 N2 = int(N / 2) - data_dict = { - 'string-int': (['1'] * N2) + ([2] * N2), - 'string-nint': (['-1'] * N2) + ([2] * N2), - 'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'], - dtype='datetime64[D]'), N), - 'string-float': (['1.1'] * N2) + ([2] * N2), - 'int-list': ([1] * N2) + ([2] * N2), - 'int32': np.repeat(np.int32(1), N) - } + data_dict = {'string-int': ['1'] * N2 + [2] * N2, + 'string-nint': ['-1'] * N2 + [2] * N2, + 'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'], + dtype='datetime64[D]'), N), + 'string-float': ['1.1'] * N2 + [2] * N2, + 'int-list': [1] * N2 + [2] * N2, + 'int32': np.repeat(np.int32(1), N)} def setup(self, dtype, downcast): self.data = self.data_dict[dtype] def time_downcast(self, dtype, downcast): - pd.to_numeric(self.data, downcast=downcast) + to_numeric(self.data, downcast=downcast) class MaybeConvertNumeric(object): - def setup(self): - n = 1000000 - arr = np.repeat([2**63], n) - arr = arr + np.arange(n).astype('uint64') - arr = np.array([arr[i] if i%2 == 0 else - str(arr[i]) for i in range(n)], - dtype=object) - - arr[-1] = -1 - self.data = arr - self.na_values = set() - - def time_convert(self): - pd.lib.maybe_convert_numeric(self.data, self.na_values, - coerce_numeric=False) + def setup_cache(self): + N = 10**6 + arr = np.repeat([2**63], N) + np.arange(N).astype('uint64') + data = arr.astype(object) + data[1::2] = arr[1::2].astype(str) + data[-1] = -1 + return data + + def time_convert(self, data): + lib.maybe_convert_numeric(data, set(), coerce_numeric=False) diff --git a/pandas/api/tests/__init__.py b/asv_bench/benchmarks/io/__init__.py similarity index 100% rename from pandas/api/tests/__init__.py rename to asv_bench/benchmarks/io/__init__.py diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py new file mode 100644 index 0000000000000..3b7fdc6e2d78c --- /dev/null +++ b/asv_bench/benchmarks/io/csv.py @@ -0,0 +1,249 @@ +import random +import timeit +import string + +import numpy as np +import pandas.util.testing as tm +from pandas import DataFrame, Categorical, date_range, read_csv +from pandas.compat import PY2 +from pandas.compat import cStringIO as StringIO + +from ..pandas_vb_common import setup, BaseIO # noqa + + +class ToCSV(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + params = ['wide', 'long', 'mixed'] + param_names = ['kind'] + + def setup(self, kind): + wide_frame = DataFrame(np.random.randn(3000, 30)) + long_frame = DataFrame({'A': np.arange(50000), + 'B': np.arange(50000) + 1., + 'C': np.arange(50000) + 2., + 'D': np.arange(50000) + 3.}) + mixed_frame = DataFrame({'float': np.random.randn(5000), + 'int': np.random.randn(5000).astype(int), + 'bool': (np.arange(5000) % 2) == 0, + 'datetime': date_range('2001', + freq='s', + periods=5000), + 'object': ['foo'] * 5000}) + mixed_frame.loc[30:500, 'float'] = np.nan + data = {'wide': wide_frame, + 'long': long_frame, + 'mixed': mixed_frame} + self.df = data[kind] + + def time_frame(self, kind): + self.df.to_csv(self.fname) + + +class ToCSVDatetime(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + + def setup(self): + rng = date_range('1/1/2000', periods=1000) + self.data = DataFrame(rng, index=rng) + + def time_frame_date_formatting(self): + self.data.to_csv(self.fname, date_format='%Y%m%d') + + +class ReadCSVDInferDatetimeFormat(object): + + goal_time = 0.2 + params = ([True, False], ['custom', 'iso8601', 'ymd']) + param_names = ['infer_datetime_format', 'format'] + + def setup(self, infer_datetime_format, format): + rng = date_range('1/1/2000', periods=1000) + formats = {'custom': '%m/%d/%Y %H:%M:%S.%f', + 'iso8601': '%Y-%m-%d %H:%M:%S', + 'ymd': '%Y%m%d'} + dt_format = formats[format] + self.data = StringIO('\n'.join(rng.strftime(dt_format).tolist())) + + def time_read_csv(self, infer_datetime_format, format): + read_csv(self.data, header=None, names=['foo'], parse_dates=['foo'], + infer_datetime_format=infer_datetime_format) + + +class ReadCSVSkipRows(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + params = [None, 10000] + param_names = ['skiprows'] + + def setup(self, skiprows): + N = 20000 + index = tm.makeStringIndex(N) + df = DataFrame({'float1': np.random.randn(N), + 'float2': np.random.randn(N), + 'string1': ['foo'] * N, + 'bool1': [True] * N, + 'int1': np.random.randint(0, N, size=N)}, + index=index) + df.to_csv(self.fname) + + def time_skipprows(self, skiprows): + read_csv(self.fname, skiprows=skiprows) + + +class ReadUint64Integers(object): + + goal_time = 0.2 + + def setup(self): + self.na_values = [2**63 + 500] + arr = np.arange(10000).astype('uint64') + 2**63 + self.data1 = StringIO('\n'.join(arr.astype(str).tolist())) + arr = arr.astype(object) + arr[500] = -1 + self.data2 = StringIO('\n'.join(arr.astype(str).tolist())) + + def time_read_uint64(self): + read_csv(self.data1, header=None, names=['foo']) + + def time_read_uint64_neg_values(self): + read_csv(self.data2, header=None, names=['foo']) + + def time_read_uint64_na_values(self): + read_csv(self.data1, header=None, names=['foo'], + na_values=self.na_values) + + +class S3(object): + # Make sure that we can read part of a file from S3 without + # needing to download the entire thing. Use the timeit.default_timer + # to measure wall time instead of CPU time -- we want to see + # how long it takes to download the data. + timer = timeit.default_timer + params = ([None, "gzip", "bz2"], ["python", "c"]) + param_names = ["compression", "engine"] + + def setup(self, compression, engine): + if compression == "bz2" and engine == "c" and PY2: + # The Python 2 C parser can't read bz2 from open files. + raise NotImplementedError + try: + import s3fs # noqa + except ImportError: + # Skip these benchmarks if `boto` is not installed. + raise NotImplementedError + + ext = "" + if compression == "gzip": + ext = ".gz" + elif compression == "bz2": + ext = ".bz2" + self.big_fname = "s3://pandas-test/large_random.csv" + ext + + def time_read_csv_10_rows(self, compression, engine): + # Read a small number of rows from a huge (100,000 x 50) table. + read_csv(self.big_fname, nrows=10, compression=compression, + engine=engine) + + +class ReadCSVThousands(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + params = ([',', '|'], [None, ',']) + param_names = ['sep', 'thousands'] + + def setup(self, sep, thousands): + N = 10000 + K = 8 + data = np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)) + df = DataFrame(data) + if thousands is not None: + fmt = ':{}'.format(thousands) + fmt = '{' + fmt + '}' + df = df.applymap(lambda x: fmt.format(x)) + df.to_csv(self.fname, sep=sep) + + def time_thousands(self, sep, thousands): + read_csv(self.fname, sep=sep, thousands=thousands) + + +class ReadCSVComment(object): + + goal_time = 0.2 + + def setup(self): + data = ['A,B,C'] + (['1,2,3 # comment'] * 100000) + self.s_data = StringIO('\n'.join(data)) + + def time_comment(self): + read_csv(self.s_data, comment='#', header=None, names=list('abc')) + + +class ReadCSVFloatPrecision(object): + + goal_time = 0.2 + params = ([',', ';'], ['.', '_'], [None, 'high', 'round_trip']) + param_names = ['sep', 'decimal', 'float_precision'] + + def setup(self, sep, decimal, float_precision): + floats = [''.join(random.choice(string.digits) for _ in range(28)) + for _ in range(15)] + rows = sep.join(['0{}'.format(decimal) + '{}'] * 3) + '\n' + data = rows * 5 + data = data.format(*floats) * 200 # 1000 x 3 strings csv + self.s_data = StringIO(data) + + def time_read_csv(self, sep, decimal, float_precision): + read_csv(self.s_data, sep=sep, header=None, names=list('abc'), + float_precision=float_precision) + + def time_read_csv_python_engine(self, sep, decimal, float_precision): + read_csv(self.s_data, sep=sep, header=None, engine='python', + float_precision=None, names=list('abc')) + + +class ReadCSVCategorical(BaseIO): + + goal_time = 0.2 + fname = '__test__.csv' + + def setup(self): + N = 100000 + group1 = ['aaaaaaaa', 'bbbbbbb', 'cccccccc', 'dddddddd', 'eeeeeeee'] + df = DataFrame(np.random.choice(group1, (N, 3)), columns=list('abc')) + df.to_csv(self.fname, index=False) + + def time_convert_post(self): + read_csv(self.fname).apply(Categorical) + + def time_convert_direct(self): + read_csv(self.fname, dtype='category') + + +class ReadCSVParseDates(object): + + goal_time = 0.2 + + def setup(self): + data = """{},19:00:00,18:56:00,0.8100,2.8100,7.2000,0.0000,280.0000\n + {},20:00:00,19:56:00,0.0100,2.2100,7.2000,0.0000,260.0000\n + {},21:00:00,20:56:00,-0.5900,2.2100,5.7000,0.0000,280.0000\n + {},21:00:00,21:18:00,-0.9900,2.0100,3.6000,0.0000,270.0000\n + {},22:00:00,21:56:00,-0.5900,1.7100,5.1000,0.0000,290.0000\n + """ + two_cols = ['KORD,19990127'] * 5 + data = data.format(*two_cols) + self.s_data = StringIO(data) + + def time_multiple_date(self): + read_csv(self.s_data, sep=',', header=None, + names=list(string.digits[:9]), parse_dates=[[1, 2], [1, 3]]) + + def time_baseline(self): + read_csv(self.s_data, sep=',', header=None, parse_dates=[1], + names=list(string.digits[:9])) diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py new file mode 100644 index 0000000000000..58ab6bb8046c5 --- /dev/null +++ b/asv_bench/benchmarks/io/excel.py @@ -0,0 +1,36 @@ +import numpy as np +from pandas import DataFrame, date_range, ExcelWriter, read_excel +from pandas.compat import BytesIO +import pandas.util.testing as tm + +from ..pandas_vb_common import BaseIO, setup # noqa + + +class Excel(object): + + goal_time = 0.2 + params = ['openpyxl', 'xlsxwriter', 'xlwt'] + param_names = ['engine'] + + def setup(self, engine): + N = 2000 + C = 5 + self.df = DataFrame(np.random.randn(N, C), + columns=['float{}'.format(i) for i in range(C)], + index=date_range('20000101', periods=N, freq='H')) + self.df['object'] = tm.makeStringIndex(N) + self.bio_read = BytesIO() + self.writer_read = ExcelWriter(self.bio_read, engine=engine) + self.df.to_excel(self.writer_read, sheet_name='Sheet1') + self.writer_read.save() + self.bio_read.seek(0) + + def time_read_excel(self, engine): + read_excel(self.bio_read) + + def time_write_excel(self, engine): + bio_write = BytesIO() + bio_write.seek(0) + writer_write = ExcelWriter(bio_write, engine=engine) + self.df.to_excel(writer_write, sheet_name='Sheet1') + writer_write.save() diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py new file mode 100644 index 0000000000000..4b6e1d69af92d --- /dev/null +++ b/asv_bench/benchmarks/io/hdf.py @@ -0,0 +1,151 @@ +import warnings + +import numpy as np +from pandas import DataFrame, Panel, date_range, HDFStore, read_hdf +import pandas.util.testing as tm + +from ..pandas_vb_common import BaseIO, setup # noqa + + +class HDFStoreDataFrame(BaseIO): + + goal_time = 0.2 + + def setup(self): + N = 25000 + index = tm.makeStringIndex(N) + self.df = DataFrame({'float1': np.random.randn(N), + 'float2': np.random.randn(N)}, + index=index) + self.df_mixed = DataFrame({'float1': np.random.randn(N), + 'float2': np.random.randn(N), + 'string1': ['foo'] * N, + 'bool1': [True] * N, + 'int1': np.random.randint(0, N, size=N)}, + index=index) + self.df_wide = DataFrame(np.random.randn(N, 100)) + self.start_wide = self.df_wide.index[10000] + self.stop_wide = self.df_wide.index[15000] + self.df2 = DataFrame({'float1': np.random.randn(N), + 'float2': np.random.randn(N)}, + index=date_range('1/1/2000', periods=N)) + self.start = self.df2.index[10000] + self.stop = self.df2.index[15000] + self.df_wide2 = DataFrame(np.random.randn(N, 100), + index=date_range('1/1/2000', periods=N)) + self.df_dc = DataFrame(np.random.randn(N, 10), + columns=['C%03d' % i for i in range(10)]) + + self.fname = '__test__.h5' + + self.store = HDFStore(self.fname) + self.store.put('fixed', self.df) + self.store.put('fixed_mixed', self.df_mixed) + self.store.append('table', self.df2) + self.store.append('table_mixed', self.df_mixed) + self.store.append('table_wide', self.df_wide) + self.store.append('table_wide2', self.df_wide2) + + def teardown(self): + self.store.close() + self.remove(self.fname) + + def time_read_store(self): + self.store.get('fixed') + + def time_read_store_mixed(self): + self.store.get('fixed_mixed') + + def time_write_store(self): + self.store.put('fixed_write', self.df) + + def time_write_store_mixed(self): + self.store.put('fixed_mixed_write', self.df_mixed) + + def time_read_store_table_mixed(self): + self.store.select('table_mixed') + + def time_write_store_table_mixed(self): + self.store.append('table_mixed_write', self.df_mixed) + + def time_read_store_table(self): + self.store.select('table') + + def time_write_store_table(self): + self.store.append('table_write', self.df) + + def time_read_store_table_wide(self): + self.store.select('table_wide') + + def time_write_store_table_wide(self): + self.store.append('table_wide_write', self.df_wide) + + def time_write_store_table_dc(self): + self.store.append('table_dc_write', self.df_dc, data_columns=True) + + def time_query_store_table_wide(self): + self.store.select('table_wide', where="index > self.start_wide and " + "index < self.stop_wide") + + def time_query_store_table(self): + self.store.select('table', where="index > self.start and " + "index < self.stop") + + def time_store_repr(self): + repr(self.store) + + def time_store_str(self): + str(self.store) + + def time_store_info(self): + self.store.info() + + +class HDFStorePanel(BaseIO): + + goal_time = 0.2 + + def setup(self): + self.fname = '__test__.h5' + with warnings.catch_warnings(record=True): + self.p = Panel(np.random.randn(20, 1000, 25), + items=['Item%03d' % i for i in range(20)], + major_axis=date_range('1/1/2000', periods=1000), + minor_axis=['E%03d' % i for i in range(25)]) + self.store = HDFStore(self.fname) + self.store.append('p1', self.p) + + def teardown(self): + self.store.close() + self.remove(self.fname) + + def time_read_store_table_panel(self): + with warnings.catch_warnings(record=True): + self.store.select('p1') + + def time_write_store_table_panel(self): + with warnings.catch_warnings(record=True): + self.store.append('p2', self.p) + + +class HDF(BaseIO): + + goal_time = 0.2 + params = ['table', 'fixed'] + param_names = ['format'] + + def setup(self, format): + self.fname = '__test__.h5' + N = 100000 + C = 5 + self.df = DataFrame(np.random.randn(N, C), + columns=['float{}'.format(i) for i in range(C)], + index=date_range('20000101', periods=N, freq='H')) + self.df['object'] = tm.makeStringIndex(N) + self.df.to_hdf(self.fname, 'df', format=format) + + def time_read_hdf(self, format): + read_hdf(self.fname, 'df') + + def time_write_hdf(self, format): + self.df.to_hdf(self.fname, 'df', format=format) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py new file mode 100644 index 0000000000000..acfdd327c3b51 --- /dev/null +++ b/asv_bench/benchmarks/io/json.py @@ -0,0 +1,127 @@ +import numpy as np +import pandas.util.testing as tm +from pandas import DataFrame, date_range, timedelta_range, concat, read_json + +from ..pandas_vb_common import setup, BaseIO # noqa + + +class ReadJSON(BaseIO): + + goal_time = 0.2 + fname = "__test__.json" + params = (['split', 'index', 'records'], ['int', 'datetime']) + param_names = ['orient', 'index'] + + def setup(self, orient, index): + N = 100000 + indexes = {'int': np.arange(N), + 'datetime': date_range('20000101', periods=N, freq='H')} + df = DataFrame(np.random.randn(N, 5), + columns=['float_{}'.format(i) for i in range(5)], + index=indexes[index]) + df.to_json(self.fname, orient=orient) + + def time_read_json(self, orient, index): + read_json(self.fname, orient=orient) + + +class ReadJSONLines(BaseIO): + + goal_time = 0.2 + fname = "__test_lines__.json" + params = ['int', 'datetime'] + param_names = ['index'] + + def setup(self, index): + N = 100000 + indexes = {'int': np.arange(N), + 'datetime': date_range('20000101', periods=N, freq='H')} + df = DataFrame(np.random.randn(N, 5), + columns=['float_{}'.format(i) for i in range(5)], + index=indexes[index]) + df.to_json(self.fname, orient='records', lines=True) + + def time_read_json_lines(self, index): + read_json(self.fname, orient='records', lines=True) + + def time_read_json_lines_concat(self, index): + concat(read_json(self.fname, orient='records', lines=True, + chunksize=25000)) + + def peakmem_read_json_lines(self, index): + read_json(self.fname, orient='records', lines=True) + + def peakmem_read_json_lines_concat(self, index): + concat(read_json(self.fname, orient='records', lines=True, + chunksize=25000)) + + +class ToJSON(BaseIO): + + goal_time = 0.2 + fname = "__test__.json" + params = ['split', 'columns', 'index'] + param_names = ['orient'] + + def setup(self, lines_orient): + N = 10**5 + ncols = 5 + index = date_range('20000101', periods=N, freq='H') + timedeltas = timedelta_range(start=1, periods=N, freq='s') + datetimes = date_range(start=1, periods=N, freq='s') + ints = np.random.randint(100000000, size=N) + floats = np.random.randn(N) + strings = tm.makeStringIndex(N) + self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) + self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td_int_ts = DataFrame({'td_1': timedeltas, + 'td_2': timedeltas, + 'int_1': ints, + 'int_2': ints, + 'ts_1': datetimes, + 'ts_2': datetimes}, + index=index) + self.df_int_floats = DataFrame({'int_1': ints, + 'int_2': ints, + 'int_3': ints, + 'float_1': floats, + 'float_2': floats, + 'float_3': floats}, + index=index) + self.df_int_float_str = DataFrame({'int_1': ints, + 'int_2': ints, + 'float_1': floats, + 'float_2': floats, + 'str_1': strings, + 'str_2': strings}, + index=index) + + def time_floats_with_int_index(self, orient): + self.df.to_json(self.fname, orient=orient) + + def time_floats_with_dt_index(self, orient): + self.df_date_idx.to_json(self.fname, orient=orient) + + def time_delta_int_tstamp(self, orient): + self.df_td_int_ts.to_json(self.fname, orient=orient) + + def time_float_int(self, orient): + self.df_int_floats.to_json(self.fname, orient=orient) + + def time_float_int_str(self, orient): + self.df_int_float_str.to_json(self.fname, orient=orient) + + def time_floats_with_int_idex_lines(self, orient): + self.df.to_json(self.fname, orient='records', lines=True) + + def time_floats_with_dt_index_lines(self, orient): + self.df_date_idx.to_json(self.fname, orient='records', lines=True) + + def time_delta_int_tstamp_lines(self, orient): + self.df_td_int_ts.to_json(self.fname, orient='records', lines=True) + + def time_float_int_lines(self, orient): + self.df_int_floats.to_json(self.fname, orient='records', lines=True) + + def time_float_int_str_lines(self, orient): + self.df_int_float_str.to_json(self.fname, orient='records', lines=True) diff --git a/asv_bench/benchmarks/io/msgpack.py b/asv_bench/benchmarks/io/msgpack.py new file mode 100644 index 0000000000000..8ccce01117ca4 --- /dev/null +++ b/asv_bench/benchmarks/io/msgpack.py @@ -0,0 +1,26 @@ +import numpy as np +from pandas import DataFrame, date_range, read_msgpack +import pandas.util.testing as tm + +from ..pandas_vb_common import BaseIO, setup # noqa + + +class MSGPack(BaseIO): + + goal_time = 0.2 + + def setup(self): + self.fname = '__test__.msg' + N = 100000 + C = 5 + self.df = DataFrame(np.random.randn(N, C), + columns=['float{}'.format(i) for i in range(C)], + index=date_range('20000101', periods=N, freq='H')) + self.df['object'] = tm.makeStringIndex(N) + self.df.to_msgpack(self.fname) + + def time_read_msgpack(self): + read_msgpack(self.fname) + + def time_write_msgpack(self): + self.df.to_msgpack(self.fname) diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py new file mode 100644 index 0000000000000..2ad0fcca6eb26 --- /dev/null +++ b/asv_bench/benchmarks/io/pickle.py @@ -0,0 +1,26 @@ +import numpy as np +from pandas import DataFrame, date_range, read_pickle +import pandas.util.testing as tm + +from ..pandas_vb_common import BaseIO, setup # noqa + + +class Pickle(BaseIO): + + goal_time = 0.2 + + def setup(self): + self.fname = '__test__.pkl' + N = 100000 + C = 5 + self.df = DataFrame(np.random.randn(N, C), + columns=['float{}'.format(i) for i in range(C)], + index=date_range('20000101', periods=N, freq='H')) + self.df['object'] = tm.makeStringIndex(N) + self.df.to_pickle(self.fname) + + def time_read_pickle(self): + read_pickle(self.fname) + + def time_write_pickle(self): + self.df.to_pickle(self.fname) diff --git a/asv_bench/benchmarks/io/sas.py b/asv_bench/benchmarks/io/sas.py new file mode 100644 index 0000000000000..526c524de7fff --- /dev/null +++ b/asv_bench/benchmarks/io/sas.py @@ -0,0 +1,21 @@ +import os + +from pandas import read_sas + + +class SAS(object): + + goal_time = 0.2 + params = ['sas7bdat', 'xport'] + param_names = ['format'] + + def setup(self, format): + # Read files that are located in 'pandas/io/tests/sas/data' + files = {'sas7bdat': 'test1.sas7bdat', 'xport': 'paxraw_d_short.xpt'} + file = files[format] + paths = [os.path.dirname(__file__), '..', '..', '..', 'pandas', + 'tests', 'io', 'sas', 'data', file] + self.f = os.path.join(*paths) + + def time_read_msgpack(self, format): + read_sas(self.f, format=format) diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py new file mode 100644 index 0000000000000..ef4e501e5f3b9 --- /dev/null +++ b/asv_bench/benchmarks/io/sql.py @@ -0,0 +1,132 @@ +import sqlite3 + +import numpy as np +import pandas.util.testing as tm +from pandas import DataFrame, date_range, read_sql_query, read_sql_table +from sqlalchemy import create_engine + +from ..pandas_vb_common import setup # noqa + + +class SQL(object): + + goal_time = 0.2 + params = ['sqlalchemy', 'sqlite'] + param_names = ['connection'] + + def setup(self, connection): + N = 10000 + con = {'sqlalchemy': create_engine('sqlite:///:memory:'), + 'sqlite': sqlite3.connect(':memory:')} + self.table_name = 'test_type' + self.query_all = 'SELECT * FROM {}'.format(self.table_name) + self.con = con[connection] + self.df = DataFrame({'float': np.random.randn(N), + 'float_with_nan': np.random.randn(N), + 'string': ['foo'] * N, + 'bool': [True] * N, + 'int': np.random.randint(0, N, size=N), + 'datetime': date_range('2000-01-01', + periods=N, + freq='s')}, + index=tm.makeStringIndex(N)) + self.df.loc[1000:3000, 'float_with_nan'] = np.nan + self.df['datetime_string'] = self.df['datetime'].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists='replace') + + def time_to_sql_dataframe(self, connection): + self.df.to_sql('test1', self.con, if_exists='replace') + + def time_read_sql_query(self, connection): + read_sql_query(self.query_all, self.con) + + +class WriteSQLDtypes(object): + + goal_time = 0.2 + params = (['sqlalchemy', 'sqlite'], + ['float', 'float_with_nan', 'string', 'bool', 'int', 'datetime']) + param_names = ['connection', 'dtype'] + + def setup(self, connection, dtype): + N = 10000 + con = {'sqlalchemy': create_engine('sqlite:///:memory:'), + 'sqlite': sqlite3.connect(':memory:')} + self.table_name = 'test_type' + self.query_col = 'SELECT {} FROM {}'.format(dtype, self.table_name) + self.con = con[connection] + self.df = DataFrame({'float': np.random.randn(N), + 'float_with_nan': np.random.randn(N), + 'string': ['foo'] * N, + 'bool': [True] * N, + 'int': np.random.randint(0, N, size=N), + 'datetime': date_range('2000-01-01', + periods=N, + freq='s')}, + index=tm.makeStringIndex(N)) + self.df.loc[1000:3000, 'float_with_nan'] = np.nan + self.df['datetime_string'] = self.df['datetime'].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists='replace') + + def time_to_sql_dataframe_column(self, connection, dtype): + self.df[[dtype]].to_sql('test1', self.con, if_exists='replace') + + def time_read_sql_query_select_column(self, connection, dtype): + read_sql_query(self.query_col, self.con) + + +class ReadSQLTable(object): + + goal_time = 0.2 + + def setup(self): + N = 10000 + self.table_name = 'test' + self.con = create_engine('sqlite:///:memory:') + self.df = DataFrame({'float': np.random.randn(N), + 'float_with_nan': np.random.randn(N), + 'string': ['foo'] * N, + 'bool': [True] * N, + 'int': np.random.randint(0, N, size=N), + 'datetime': date_range('2000-01-01', + periods=N, + freq='s')}, + index=tm.makeStringIndex(N)) + self.df.loc[1000:3000, 'float_with_nan'] = np.nan + self.df['datetime_string'] = self.df['datetime'].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists='replace') + + def time_read_sql_table_all(self): + read_sql_table(self.table_name, self.con) + + def time_read_sql_table_parse_dates(self): + read_sql_table(self.table_name, self.con, columns=['datetime_string'], + parse_dates=['datetime_string']) + + +class ReadSQLTableDtypes(object): + + goal_time = 0.2 + + params = ['float', 'float_with_nan', 'string', 'bool', 'int', 'datetime'] + param_names = ['dtype'] + + def setup(self, dtype): + N = 10000 + self.table_name = 'test' + self.con = create_engine('sqlite:///:memory:') + self.df = DataFrame({'float': np.random.randn(N), + 'float_with_nan': np.random.randn(N), + 'string': ['foo'] * N, + 'bool': [True] * N, + 'int': np.random.randint(0, N, size=N), + 'datetime': date_range('2000-01-01', + periods=N, + freq='s')}, + index=tm.makeStringIndex(N)) + self.df.loc[1000:3000, 'float_with_nan'] = np.nan + self.df['datetime_string'] = self.df['datetime'].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists='replace') + + def time_read_sql_table_column(self, dtype): + read_sql_table(self.table_name, self.con, columns=[dtype]) diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py new file mode 100644 index 0000000000000..e0f5752ca930f --- /dev/null +++ b/asv_bench/benchmarks/io/stata.py @@ -0,0 +1,37 @@ +import numpy as np +from pandas import DataFrame, date_range, read_stata +import pandas.util.testing as tm + +from ..pandas_vb_common import BaseIO, setup # noqa + + +class Stata(BaseIO): + + goal_time = 0.2 + params = ['tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'] + param_names = ['convert_dates'] + + def setup(self, convert_dates): + self.fname = '__test__.dta' + N = 100000 + C = 5 + self.df = DataFrame(np.random.randn(N, C), + columns=['float{}'.format(i) for i in range(C)], + index=date_range('20000101', periods=N, freq='H')) + self.df['object'] = tm.makeStringIndex(N) + self.df['int8_'] = np.random.randint(np.iinfo(np.int8).min, + np.iinfo(np.int8).max - 27, N) + self.df['int16_'] = np.random.randint(np.iinfo(np.int16).min, + np.iinfo(np.int16).max - 27, N) + self.df['int32_'] = np.random.randint(np.iinfo(np.int32).min, + np.iinfo(np.int32).max - 27, N) + self.df['float32_'] = np.array(np.random.randn(N), + dtype=np.float32) + self.convert_dates = {'index': convert_dates} + self.df.to_stata(self.fname, self.convert_dates) + + def time_read_stata(self, convert_dates): + read_stata(self.fname) + + def time_write_stata(self, convert_dates): + self.df.to_stata(self.fname, self.convert_dates) diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py deleted file mode 100644 index 52064d2cdb8a2..0000000000000 --- a/asv_bench/benchmarks/io_bench.py +++ /dev/null @@ -1,194 +0,0 @@ -from .pandas_vb_common import * -from pandas import concat, Timestamp, compat -try: - from StringIO import StringIO -except ImportError: - from io import StringIO -import timeit - - -class frame_to_csv(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(3000, 30)) - - def time_frame_to_csv(self): - self.df.to_csv('__test__.csv') - - -class frame_to_csv2(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame({'A': range(50000), }) - self.df['B'] = (self.df.A + 1.0) - self.df['C'] = (self.df.A + 2.0) - self.df['D'] = (self.df.A + 3.0) - - def time_frame_to_csv2(self): - self.df.to_csv('__test__.csv') - - -class frame_to_csv_date_formatting(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = DataFrame(self.rng, index=self.rng) - - def time_frame_to_csv_date_formatting(self): - self.data.to_csv('__test__.csv', date_format='%Y%m%d') - - -class frame_to_csv_mixed(object): - goal_time = 0.2 - - def setup(self): - self.df_float = DataFrame(np.random.randn(5000, 5), dtype='float64', columns=self.create_cols('float')) - self.df_int = DataFrame(np.random.randn(5000, 5), dtype='int64', columns=self.create_cols('int')) - self.df_bool = DataFrame(True, index=self.df_float.index, columns=self.create_cols('bool')) - self.df_object = DataFrame('foo', index=self.df_float.index, columns=self.create_cols('object')) - self.df_dt = DataFrame(Timestamp('20010101'), index=self.df_float.index, columns=self.create_cols('date')) - self.df_float.ix[30:500, 1:3] = np.nan - self.df = concat([self.df_float, self.df_int, self.df_bool, self.df_object, self.df_dt], axis=1) - - def time_frame_to_csv_mixed(self): - self.df.to_csv('__test__.csv') - - def create_cols(self, name): - return [('%s%03d' % (name, i)) for i in range(5)] - - -class read_csv_infer_datetime_format_custom(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = '\n'.join(self.rng.map((lambda x: x.strftime('%m/%d/%Y %H:%M:%S.%f')))) - - def time_read_csv_infer_datetime_format_custom(self): - read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True) - - -class read_csv_infer_datetime_format_iso8601(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = '\n'.join(self.rng.map((lambda x: x.strftime('%Y-%m-%d %H:%M:%S')))) - - def time_read_csv_infer_datetime_format_iso8601(self): - read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True) - - -class read_csv_infer_datetime_format_ymd(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = '\n'.join(self.rng.map((lambda x: x.strftime('%Y%m%d')))) - - def time_read_csv_infer_datetime_format_ymd(self): - read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True) - - -class read_csv_skiprows(object): - goal_time = 0.2 - - def setup(self): - self.index = tm.makeStringIndex(20000) - self.df = DataFrame({'float1': randn(20000), 'float2': randn(20000), 'string1': (['foo'] * 20000), 'bool1': ([True] * 20000), 'int1': np.random.randint(0, 200000, size=20000), }, index=self.index) - self.df.to_csv('__test__.csv') - - def time_read_csv_skiprows(self): - read_csv('__test__.csv', skiprows=10000) - - -class read_csv_standard(object): - goal_time = 0.2 - - def setup(self): - self.index = tm.makeStringIndex(10000) - self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index) - self.df.to_csv('__test__.csv') - - def time_read_csv_standard(self): - read_csv('__test__.csv') - - -class read_parse_dates_iso8601(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range('1/1/2000', periods=1000) - self.data = '\n'.join(self.rng.map((lambda x: x.strftime('%Y-%m-%d %H:%M:%S')))) - - def time_read_parse_dates_iso8601(self): - read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo']) - - -class read_uint64_integers(object): - goal_time = 0.2 - - def setup(self): - self.na_values = [2**63 + 500] - - self.arr1 = np.arange(10000).astype('uint64') + 2**63 - self.data1 = '\n'.join(map(lambda x: str(x), self.arr1)) - - self.arr2 = self.arr1.copy().astype(object) - self.arr2[500] = -1 - self.data2 = '\n'.join(map(lambda x: str(x), self.arr2)) - - def time_read_uint64(self): - read_csv(StringIO(self.data1), header=None) - - def time_read_uint64_neg_values(self): - read_csv(StringIO(self.data2), header=None) - - def time_read_uint64_na_values(self): - read_csv(StringIO(self.data1), header=None, na_values=self.na_values) - - -class write_csv_standard(object): - goal_time = 0.2 - - def setup(self): - self.index = tm.makeStringIndex(10000) - self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index) - - def time_write_csv_standard(self): - self.df.to_csv('__test__.csv') - - -class read_csv_from_s3(object): - # Make sure that we can read part of a file from S3 without - # needing to download the entire thing. Use the timeit.default_timer - # to measure wall time instead of CPU time -- we want to see - # how long it takes to download the data. - timer = timeit.default_timer - params = ([None, "gzip", "bz2"], ["python", "c"]) - param_names = ["compression", "engine"] - - def setup(self, compression, engine): - if compression == "bz2" and engine == "c" and compat.PY2: - # The Python 2 C parser can't read bz2 from open files. - raise NotImplementedError - try: - import s3fs - except ImportError: - # Skip these benchmarks if `boto` is not installed. - raise NotImplementedError - - self.big_fname = "s3://pandas-test/large_random.csv" - - def time_read_nrows(self, compression, engine): - # Read a small number of rows from a huge (100,000 x 50) table. - ext = "" - if compression == "gzip": - ext = ".gz" - elif compression == "bz2": - ext = ".bz2" - pd.read_csv(self.big_fname + ext, nrows=10, - compression=compression, engine=engine) diff --git a/asv_bench/benchmarks/io_sql.py b/asv_bench/benchmarks/io_sql.py deleted file mode 100644 index ec855e5d33525..0000000000000 --- a/asv_bench/benchmarks/io_sql.py +++ /dev/null @@ -1,105 +0,0 @@ -import sqlalchemy -from .pandas_vb_common import * -import sqlite3 -from sqlalchemy import create_engine - - -#------------------------------------------------------------------------------- -# to_sql - -class WriteSQL(object): - goal_time = 0.2 - - def setup(self): - self.engine = create_engine('sqlite:///:memory:') - self.con = sqlite3.connect(':memory:') - self.index = tm.makeStringIndex(10000) - self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index) - - def time_fallback(self): - self.df.to_sql('test1', self.con, if_exists='replace') - - def time_sqlalchemy(self): - self.df.to_sql('test1', self.engine, if_exists='replace') - - -#------------------------------------------------------------------------------- -# read_sql - -class ReadSQL(object): - goal_time = 0.2 - - def setup(self): - self.engine = create_engine('sqlite:///:memory:') - self.con = sqlite3.connect(':memory:') - self.index = tm.makeStringIndex(10000) - self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index) - self.df.to_sql('test2', self.engine, if_exists='replace') - self.df.to_sql('test2', self.con, if_exists='replace') - - def time_read_query_fallback(self): - read_sql_query('SELECT * FROM test2', self.con) - - def time_read_query_sqlalchemy(self): - read_sql_query('SELECT * FROM test2', self.engine) - - def time_read_table_sqlalchemy(self): - read_sql_table('test2', self.engine) - - -#------------------------------------------------------------------------------- -# type specific write - -class WriteSQLTypes(object): - goal_time = 0.2 - - def setup(self): - self.engine = create_engine('sqlite:///:memory:') - self.con = sqlite3.connect(':memory:') - self.df = DataFrame({'float': randn(10000), 'string': (['foo'] * 10000), 'bool': ([True] * 10000), 'datetime': date_range('2000-01-01', periods=10000, freq='s'), }) - self.df.loc[1000:3000, 'float'] = np.nan - - def time_string_fallback(self): - self.df[['string']].to_sql('test_string', self.con, if_exists='replace') - - def time_string_sqlalchemy(self): - self.df[['string']].to_sql('test_string', self.engine, if_exists='replace') - - def time_float_fallback(self): - self.df[['float']].to_sql('test_float', self.con, if_exists='replace') - - def time_float_sqlalchemy(self): - self.df[['float']].to_sql('test_float', self.engine, if_exists='replace') - - def time_datetime_sqlalchemy(self): - self.df[['datetime']].to_sql('test_datetime', self.engine, if_exists='replace') - - -#------------------------------------------------------------------------------- -# type specific read - -class ReadSQLTypes(object): - goal_time = 0.2 - - def setup(self): - self.engine = create_engine('sqlite:///:memory:') - self.con = sqlite3.connect(':memory:') - self.df = DataFrame({'float': randn(10000), 'datetime': date_range('2000-01-01', periods=10000, freq='s'), }) - self.df['datetime_string'] = self.df['datetime'].map(str) - self.df.to_sql('test_type', self.engine, if_exists='replace') - self.df[['float', 'datetime_string']].to_sql('test_type', self.con, if_exists='replace') - - def time_datetime_read_and_parse_sqlalchemy(self): - read_sql_table('test_type', self.engine, columns=['datetime_string'], parse_dates=['datetime_string']) - - def time_datetime_read_as_native_sqlalchemy(self): - read_sql_table('test_type', self.engine, columns=['datetime']) - - def time_float_read_query_fallback(self): - read_sql_query('SELECT float FROM test_type', self.con) - - def time_float_read_query_sqlalchemy(self): - read_sql_query('SELECT float FROM test_type', self.engine) - - def time_float_read_table_sqlalchemy(self): - read_sql_table('test_type', self.engine, columns=['float']) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index d9c631fa92efd..de0a3b33da147 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -1,20 +1,25 @@ -from .pandas_vb_common import * +import warnings +import string +import numpy as np +import pandas.util.testing as tm +from pandas import (DataFrame, Series, MultiIndex, date_range, concat, merge, + merge_asof) try: from pandas import merge_ordered except ImportError: from pandas import ordered_merge as merge_ordered +from .pandas_vb_common import Panel, setup # noqa -#---------------------------------------------------------------------- -# Append class Append(object): + goal_time = 0.2 def setup(self): - self.df1 = pd.DataFrame(np.random.randn(10000, 4), - columns=['A', 'B', 'C', 'D']) + self.df1 = DataFrame(np.random.randn(10000, 4), + columns=['A', 'B', 'C', 'D']) self.df2 = self.df1.copy() self.df2.index = np.arange(10000, 20000) self.mdf1 = self.df1.copy() @@ -22,7 +27,8 @@ def setup(self): self.mdf1['obj2'] = 'bar' self.mdf1['int1'] = 5 try: - self.mdf1.consolidate(inplace=True) + with warnings.catch_warnings(record=True): + self.mdf1.consolidate(inplace=True) except: pass self.mdf2 = self.mdf1.copy() @@ -35,325 +41,322 @@ def time_append_mixed(self): self.mdf1.append(self.mdf2) -#---------------------------------------------------------------------- -# Concat - class Concat(object): - goal_time = 0.2 - def setup(self): - self.n = 1000 - self.indices = tm.makeStringIndex(1000) - self.s = Series(self.n, index=self.indices) - self.pieces = [self.s[i:(- i)] for i in range(1, 10)] - self.pieces = (self.pieces * 50) - - self.df_small = pd.DataFrame(randn(5, 4)) + goal_time = 0.2 + params = [0, 1] + param_names = ['axis'] - # empty - self.df = pd.DataFrame(dict(A=range(10000)), index=date_range('20130101', periods=10000, freq='s')) - self.empty = pd.DataFrame() + def setup(self, axis): + N = 1000 + s = Series(N, index=tm.makeStringIndex(N)) + self.series = [s[i:- i] for i in range(1, 10)] * 50 + self.small_frames = [DataFrame(np.random.randn(5, 4))] * 1000 + df = DataFrame({'A': range(N)}, + index=date_range('20130101', periods=N, freq='s')) + self.empty_left = [DataFrame(), df] + self.empty_right = [df, DataFrame()] - def time_concat_series_axis1(self): - concat(self.pieces, axis=1) + def time_concat_series(self, axis): + concat(self.series, axis=axis) - def time_concat_small_frames(self): - concat(([self.df_small] * 1000)) + def time_concat_small_frames(self, axis): + concat(self.small_frames, axis=axis) - def time_concat_empty_frames1(self): - concat([self.df, self.empty]) + def time_concat_empty_right(self, axis): + concat(self.empty_right, axis=axis) - def time_concat_empty_frames2(self): - concat([self.empty, self.df]) + def time_concat_empty_left(self, axis): + concat(self.empty_left, axis=axis) class ConcatPanels(object): - goal_time = 0.2 - def setup(self): - dataset = np.zeros((10000, 200, 2), dtype=np.float32) - self.panels_f = [pd.Panel(np.copy(dataset, order='F')) - for i in range(20)] - self.panels_c = [pd.Panel(np.copy(dataset, order='C')) - for i in range(20)] - - def time_c_ordered_axis0(self): - concat(self.panels_c, axis=0, ignore_index=True) - - def time_f_ordered_axis0(self): - concat(self.panels_f, axis=0, ignore_index=True) + goal_time = 0.2 + params = ([0, 1, 2], [True, False]) + param_names = ['axis', 'ignore_index'] - def time_c_ordered_axis1(self): - concat(self.panels_c, axis=1, ignore_index=True) + def setup(self, axis, ignore_index): + with warnings.catch_warnings(record=True): + panel_c = Panel(np.zeros((10000, 200, 2), + dtype=np.float32, + order='C')) + self.panels_c = [panel_c] * 20 + panel_f = Panel(np.zeros((10000, 200, 2), + dtype=np.float32, + order='F')) + self.panels_f = [panel_f] * 20 - def time_f_ordered_axis1(self): - concat(self.panels_f, axis=1, ignore_index=True) + def time_c_ordered(self, axis, ignore_index): + with warnings.catch_warnings(record=True): + concat(self.panels_c, axis=axis, ignore_index=ignore_index) - def time_c_ordered_axis2(self): - concat(self.panels_c, axis=2, ignore_index=True) + def time_f_ordered(self, axis, ignore_index): + with warnings.catch_warnings(record=True): + concat(self.panels_f, axis=axis, ignore_index=ignore_index) - def time_f_ordered_axis2(self): - concat(self.panels_f, axis=2, ignore_index=True) +class ConcatDataFrames(object): -class ConcatFrames(object): goal_time = 0.2 + params = ([0, 1], [True, False]) + param_names = ['axis', 'ignore_index'] - def setup(self): - dataset = np.zeros((10000, 200), dtype=np.float32) - - self.frames_f = [pd.DataFrame(np.copy(dataset, order='F')) - for i in range(20)] - self.frames_c = [pd.DataFrame(np.copy(dataset, order='C')) - for i in range(20)] + def setup(self, axis, ignore_index): + frame_c = DataFrame(np.zeros((10000, 200), + dtype=np.float32, order='C')) + self.frame_c = [frame_c] * 20 + frame_f = DataFrame(np.zeros((10000, 200), + dtype=np.float32, order='F')) + self.frame_f = [frame_f] * 20 - def time_c_ordered_axis0(self): - concat(self.frames_c, axis=0, ignore_index=True) + def time_c_ordered(self, axis, ignore_index): + concat(self.frame_c, axis=axis, ignore_index=ignore_index) - def time_f_ordered_axis0(self): - concat(self.frames_f, axis=0, ignore_index=True) + def time_f_ordered(self, axis, ignore_index): + concat(self.frame_f, axis=axis, ignore_index=ignore_index) - def time_c_ordered_axis1(self): - concat(self.frames_c, axis=1, ignore_index=True) - - def time_f_ordered_axis1(self): - concat(self.frames_f, axis=1, ignore_index=True) - - -#---------------------------------------------------------------------- -# Joins class Join(object): - goal_time = 0.2 - def setup(self): - self.level1 = tm.makeStringIndex(10).values - self.level2 = tm.makeStringIndex(1000).values - self.label1 = np.arange(10).repeat(1000) - self.label2 = np.tile(np.arange(1000), 10) - self.key1 = np.tile(self.level1.take(self.label1), 10) - self.key2 = np.tile(self.level2.take(self.label2), 10) - self.shuf = np.arange(100000) - random.shuffle(self.shuf) - try: - self.index2 = MultiIndex(levels=[self.level1, self.level2], - labels=[self.label1, self.label2]) - self.index3 = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], - labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), - index=self.index2, - columns=['A', 'B', 'C', 'D']) - except: - pass - self.df = pd.DataFrame({'data1': np.random.randn(100000), - 'data2': np.random.randn(100000), - 'key1': self.key1, - 'key2': self.key2}) - self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), - index=self.level1, - columns=['A', 'B', 'C', 'D']) - self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), - index=self.level2, - columns=['A', 'B', 'C', 'D']) - self.df_shuf = self.df.reindex(self.df.index[self.shuf]) - - def time_join_dataframe_index_multi(self): - self.df.join(self.df_multi, on=['key1', 'key2']) - - def time_join_dataframe_index_single_key_bigger(self): - self.df.join(self.df_key2, on='key2') - - def time_join_dataframe_index_single_key_bigger_sort(self): - self.df_shuf.join(self.df_key2, on='key2', sort=True) - - def time_join_dataframe_index_single_key_small(self): - self.df.join(self.df_key1, on='key1') + goal_time = 0.2 + params = [True, False] + param_names = ['sort'] + + def setup(self, sort): + level1 = tm.makeStringIndex(10).values + level2 = tm.makeStringIndex(1000).values + label1 = np.arange(10).repeat(1000) + label2 = np.tile(np.arange(1000), 10) + index2 = MultiIndex(levels=[level1, level2], + labels=[label1, label2]) + self.df_multi = DataFrame(np.random.randn(len(index2), 4), + index=index2, + columns=['A', 'B', 'C', 'D']) + + self.key1 = np.tile(level1.take(label1), 10) + self.key2 = np.tile(level2.take(label2), 10) + self.df = DataFrame({'data1': np.random.randn(100000), + 'data2': np.random.randn(100000), + 'key1': self.key1, + 'key2': self.key2}) + + self.df_key1 = DataFrame(np.random.randn(len(level1), 4), + index=level1, + columns=['A', 'B', 'C', 'D']) + self.df_key2 = DataFrame(np.random.randn(len(level2), 4), + index=level2, + columns=['A', 'B', 'C', 'D']) + + shuf = np.arange(100000) + np.random.shuffle(shuf) + self.df_shuf = self.df.reindex(self.df.index[shuf]) + + def time_join_dataframe_index_multi(self, sort): + self.df.join(self.df_multi, on=['key1', 'key2'], sort=sort) + + def time_join_dataframe_index_single_key_bigger(self, sort): + self.df.join(self.df_key2, on='key2', sort=sort) + + def time_join_dataframe_index_single_key_small(self, sort): + self.df.join(self.df_key1, on='key1', sort=sort) + + def time_join_dataframe_index_shuffle_key_bigger_sort(self, sort): + self.df_shuf.join(self.df_key2, on='key2', sort=sort) class JoinIndex(object): + goal_time = 0.2 def setup(self): - np.random.seed(2718281) - self.n = 50000 - self.left = pd.DataFrame(np.random.randint(1, (self.n / 500), (self.n, 2)), columns=['jim', 'joe']) - self.right = pd.DataFrame(np.random.randint(1, (self.n / 500), (self.n, 2)), columns=['jolie', 'jolia']).set_index('jolie') + N = 50000 + self.left = DataFrame(np.random.randint(1, N / 500, (N, 2)), + columns=['jim', 'joe']) + self.right = DataFrame(np.random.randint(1, N / 500, (N, 2)), + columns=['jolie', 'jolia']).set_index('jolie') def time_left_outer_join_index(self): self.left.join(self.right, on='jim') -class join_non_unique_equal(object): +class JoinNonUnique(object): # outer join of non-unique # GH 6329 - goal_time = 0.2 def setup(self): - self.date_index = date_range('01-Jan-2013', '23-Jan-2013', freq='T') - self.daily_dates = self.date_index.to_period('D').to_timestamp('S', 'S') - self.fracofday = (self.date_index.view(np.ndarray) - self.daily_dates.view(np.ndarray)) - self.fracofday = (self.fracofday.astype('timedelta64[ns]').astype(np.float64) / 86400000000000.0) - self.fracofday = Series(self.fracofday, self.daily_dates) - self.index = date_range(self.date_index.min().to_period('A').to_timestamp('D', 'S'), self.date_index.max().to_period('A').to_timestamp('D', 'E'), freq='D') - self.temp = Series(1.0, self.index) + date_index = date_range('01-Jan-2013', '23-Jan-2013', freq='T') + daily_dates = date_index.to_period('D').to_timestamp('S', 'S') + self.fracofday = date_index.values - daily_dates.values + self.fracofday = self.fracofday.astype('timedelta64[ns]') + self.fracofday = self.fracofday.astype(np.float64) / 86400000000000.0 + self.fracofday = Series(self.fracofday, daily_dates) + index = date_range(date_index.min(), date_index.max(), freq='D') + self.temp = Series(1.0, index)[self.fracofday.index] def time_join_non_unique_equal(self): - (self.fracofday * self.temp[self.fracofday.index]) + self.fracofday * self.temp -#---------------------------------------------------------------------- -# Merges - class Merge(object): - goal_time = 0.2 - def setup(self): - self.N = 10000 - self.indices = tm.makeStringIndex(self.N).values - self.indices2 = tm.makeStringIndex(self.N).values - self.key = np.tile(self.indices[:8000], 10) - self.key2 = np.tile(self.indices2[:8000], 10) - self.left = pd.DataFrame({'key': self.key, 'key2': self.key2, - 'value': np.random.randn(80000)}) - self.right = pd.DataFrame({'key': self.indices[2000:], - 'key2': self.indices2[2000:], - 'value2': np.random.randn(8000)}) - - self.df = pd.DataFrame({'key1': np.tile(np.arange(500).repeat(10), 2), - 'key2': np.tile(np.arange(250).repeat(10), 4), - 'value': np.random.randn(10000)}) - self.df2 = pd.DataFrame({'key1': np.arange(500), 'value2': randn(500)}) + goal_time = 0.2 + params = [True, False] + param_names = ['sort'] + + def setup(self, sort): + N = 10000 + indices = tm.makeStringIndex(N).values + indices2 = tm.makeStringIndex(N).values + key = np.tile(indices[:8000], 10) + key2 = np.tile(indices2[:8000], 10) + self.left = DataFrame({'key': key, 'key2': key2, + 'value': np.random.randn(80000)}) + self.right = DataFrame({'key': indices[2000:], + 'key2': indices2[2000:], + 'value2': np.random.randn(8000)}) + + self.df = DataFrame({'key1': np.tile(np.arange(500).repeat(10), 2), + 'key2': np.tile(np.arange(250).repeat(10), 4), + 'value': np.random.randn(10000)}) + self.df2 = DataFrame({'key1': np.arange(500), + 'value2': np.random.randn(500)}) self.df3 = self.df[:5000] - def time_merge_2intkey_nosort(self): - merge(self.left, self.right, sort=False) + def time_merge_2intkey(self, sort): + merge(self.left, self.right, sort=sort) - def time_merge_2intkey_sort(self): - merge(self.left, self.right, sort=True) + def time_merge_dataframe_integer_2key(self, sort): + merge(self.df, self.df3, sort=sort) - def time_merge_dataframe_integer_2key(self): - merge(self.df, self.df3) + def time_merge_dataframe_integer_key(self, sort): + merge(self.df, self.df2, on='key1', sort=sort) - def time_merge_dataframe_integer_key(self): - merge(self.df, self.df2, on='key1') +class I8Merge(object): -class i8merge(object): goal_time = 0.2 + params = ['inner', 'outer', 'left', 'right'] + param_names = ['how'] - def setup(self): - (low, high, n) = (((-1) << 10), (1 << 10), (1 << 20)) - self.left = pd.DataFrame(np.random.randint(low, high, (n, 7)), - columns=list('ABCDEFG')) + def setup(self, how): + low, high, n = -1000, 1000, 10**6 + self.left = DataFrame(np.random.randint(low, high, (n, 7)), + columns=list('ABCDEFG')) self.left['left'] = self.left.sum(axis=1) - self.i = np.random.permutation(len(self.left)) - self.right = self.left.iloc[self.i].copy() - self.right.columns = (self.right.columns[:(-1)].tolist() + ['right']) - self.right.index = np.arange(len(self.right)) - self.right['right'] *= (-1) + self.right = self.left.sample(frac=1).rename({'left': 'right'}, axis=1) + self.right = self.right.reset_index(drop=True) + self.right['right'] *= -1 - def time_i8merge(self): - merge(self.left, self.right, how='outer') + def time_i8merge(self, how): + merge(self.left, self.right, how=how) -#---------------------------------------------------------------------- -# Ordered merge +class MergeCategoricals(object): -class MergeOrdered(object): + goal_time = 0.2 def setup(self): + self.left_object = DataFrame( + {'X': np.random.choice(range(0, 10), size=(10000,)), + 'Y': np.random.choice(['one', 'two', 'three'], size=(10000,))}) - groups = tm.makeStringIndex(10).values + self.right_object = DataFrame( + {'X': np.random.choice(range(0, 10), size=(10000,)), + 'Z': np.random.choice(['jjj', 'kkk', 'sss'], size=(10000,))}) + + self.left_cat = self.left_object.assign( + Y=self.left_object['Y'].astype('category')) + self.right_cat = self.right_object.assign( + Z=self.right_object['Z'].astype('category')) + + def time_merge_object(self): + merge(self.left_object, self.right_object, on='X') + + def time_merge_cat(self): + merge(self.left_cat, self.right_cat, on='X') - self.left = pd.DataFrame({'group': groups.repeat(5000), - 'key' : np.tile(np.arange(0, 10000, 2), 10), - 'lvalue': np.random.randn(50000)}) - self.right = pd.DataFrame({'key' : np.arange(10000), - 'rvalue' : np.random.randn(10000)}) +class MergeOrdered(object): + + def setup(self): + groups = tm.makeStringIndex(10).values + self.left = DataFrame({'group': groups.repeat(5000), + 'key': np.tile(np.arange(0, 10000, 2), 10), + 'lvalue': np.random.randn(50000)}) + self.right = DataFrame({'key': np.arange(10000), + 'rvalue': np.random.randn(10000)}) def time_merge_ordered(self): merge_ordered(self.left, self.right, on='key', left_by='group') -# ---------------------------------------------------------------------- -# asof merge - class MergeAsof(object): def setup(self): - import string - np.random.seed(0) one_count = 200000 two_count = 1000000 - self.df1 = pd.DataFrame( + df1 = DataFrame( {'time': np.random.randint(0, one_count / 20, one_count), - 'key': np.random.choice(list(string.uppercase), one_count), + 'key': np.random.choice(list(string.ascii_uppercase), one_count), 'key2': np.random.randint(0, 25, one_count), 'value1': np.random.randn(one_count)}) - self.df2 = pd.DataFrame( + df2 = DataFrame( {'time': np.random.randint(0, two_count / 20, two_count), - 'key': np.random.choice(list(string.uppercase), two_count), + 'key': np.random.choice(list(string.ascii_uppercase), two_count), 'key2': np.random.randint(0, 25, two_count), 'value2': np.random.randn(two_count)}) - self.df1 = self.df1.sort_values('time') - self.df2 = self.df2.sort_values('time') + df1 = df1.sort_values('time') + df2 = df2.sort_values('time') - self.df1['time32'] = np.int32(self.df1.time) - self.df2['time32'] = np.int32(self.df2.time) + df1['time32'] = np.int32(df1.time) + df2['time32'] = np.int32(df2.time) - self.df1a = self.df1[['time', 'value1']] - self.df2a = self.df2[['time', 'value2']] - self.df1b = self.df1[['time', 'key', 'value1']] - self.df2b = self.df2[['time', 'key', 'value2']] - self.df1c = self.df1[['time', 'key2', 'value1']] - self.df2c = self.df2[['time', 'key2', 'value2']] - self.df1d = self.df1[['time32', 'value1']] - self.df2d = self.df2[['time32', 'value2']] - self.df1e = self.df1[['time', 'key', 'key2', 'value1']] - self.df2e = self.df2[['time', 'key', 'key2', 'value2']] + self.df1a = df1[['time', 'value1']] + self.df2a = df2[['time', 'value2']] + self.df1b = df1[['time', 'key', 'value1']] + self.df2b = df2[['time', 'key', 'value2']] + self.df1c = df1[['time', 'key2', 'value1']] + self.df2c = df2[['time', 'key2', 'value2']] + self.df1d = df1[['time32', 'value1']] + self.df2d = df2[['time32', 'value2']] + self.df1e = df1[['time', 'key', 'key2', 'value1']] + self.df2e = df2[['time', 'key', 'key2', 'value2']] - def time_noby(self): + def time_on_int(self): merge_asof(self.df1a, self.df2a, on='time') + def time_on_int32(self): + merge_asof(self.df1d, self.df2d, on='time32') + def time_by_object(self): merge_asof(self.df1b, self.df2b, on='time', by='key') def time_by_int(self): merge_asof(self.df1c, self.df2c, on='time', by='key2') - def time_on_int32(self): - merge_asof(self.df1d, self.df2d, on='time32') - def time_multiby(self): merge_asof(self.df1e, self.df2e, on='time', by=['key', 'key2']) -#---------------------------------------------------------------------- -# data alignment - class Align(object): + goal_time = 0.2 def setup(self): - self.n = 1000000 - self.sz = 500000 - self.rng = np.arange(0, 10000000000000, 10000000) - self.stamps = (np.datetime64(datetime.now()).view('i8') + self.rng) - self.idx1 = np.sort(self.sample(self.stamps, self.sz)) - self.idx2 = np.sort(self.sample(self.stamps, self.sz)) - self.ts1 = Series(np.random.randn(self.sz), self.idx1) - self.ts2 = Series(np.random.randn(self.sz), self.idx2) - - def sample(self, values, k): - self.sampler = np.random.permutation(len(values)) - return values.take(self.sampler[:k]) + size = 5 * 10**5 + rng = np.arange(0, 10**13, 10**7) + stamps = np.datetime64('now').view('i8') + rng + idx1 = np.sort(np.random.choice(stamps, size, replace=False)) + idx2 = np.sort(np.random.choice(stamps, size, replace=False)) + self.ts1 = Series(np.random.randn(size), idx1) + self.ts2 = Series(np.random.randn(size), idx2) def time_series_align_int64_index(self): - (self.ts1 + self.ts2) + self.ts1 + self.ts2 def time_series_align_left_monotonic(self): self.ts1.align(self.ts2, join='left') diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py new file mode 100644 index 0000000000000..0c92214795557 --- /dev/null +++ b/asv_bench/benchmarks/multiindex_object.py @@ -0,0 +1,140 @@ +import string + +import numpy as np +import pandas.util.testing as tm +from pandas import date_range, MultiIndex + +from .pandas_vb_common import setup # noqa + + +class GetLoc(object): + + goal_time = 0.2 + + def setup(self): + self.mi_large = MultiIndex.from_product( + [np.arange(1000), np.arange(20), list(string.ascii_letters)], + names=['one', 'two', 'three']) + self.mi_med = MultiIndex.from_product( + [np.arange(1000), np.arange(10), list('A')], + names=['one', 'two', 'three']) + self.mi_small = MultiIndex.from_product( + [np.arange(100), list('A'), list('A')], + names=['one', 'two', 'three']) + + def time_large_get_loc(self): + self.mi_large.get_loc((999, 19, 'Z')) + + def time_large_get_loc_warm(self): + for _ in range(1000): + self.mi_large.get_loc((999, 19, 'Z')) + + def time_med_get_loc(self): + self.mi_med.get_loc((999, 9, 'A')) + + def time_med_get_loc_warm(self): + for _ in range(1000): + self.mi_med.get_loc((999, 9, 'A')) + + def time_string_get_loc(self): + self.mi_small.get_loc((99, 'A', 'A')) + + def time_small_get_loc_warm(self): + for _ in range(1000): + self.mi_small.get_loc((99, 'A', 'A')) + + +class Duplicates(object): + + goal_time = 0.2 + + def setup(self): + size = 65536 + arrays = [np.random.randint(0, 8192, size), + np.random.randint(0, 1024, size)] + mask = np.random.rand(size) < 0.1 + self.mi_unused_levels = MultiIndex.from_arrays(arrays) + self.mi_unused_levels = self.mi_unused_levels[mask] + + def time_remove_unused_levels(self): + self.mi_unused_levels.remove_unused_levels() + + +class Integer(object): + + goal_time = 0.2 + + def setup(self): + self.mi_int = MultiIndex.from_product([np.arange(1000), + np.arange(1000)], + names=['one', 'two']) + self.obj_index = np.array([(0, 10), (0, 11), (0, 12), + (0, 13), (0, 14), (0, 15), + (0, 16), (0, 17), (0, 18), + (0, 19)], dtype=object) + + def time_get_indexer(self): + self.mi_int.get_indexer(self.obj_index) + + def time_is_monotonic(self): + self.mi_int.is_monotonic + + +class Duplicated(object): + + goal_time = 0.2 + + def setup(self): + n, k = 200, 5000 + levels = [np.arange(n), + tm.makeStringIndex(n).values, + 1000 + np.arange(n)] + labels = [np.random.choice(n, (k * n)) for lev in levels] + self.mi = MultiIndex(levels=levels, labels=labels) + + def time_duplicated(self): + self.mi.duplicated() + + +class Sortlevel(object): + + goal_time = 0.2 + + def setup(self): + n = 1182720 + low, high = -4096, 4096 + arrs = [np.repeat(np.random.randint(low, high, (n // k)), k) + for k in [11, 7, 5, 3, 1]] + self.mi_int = MultiIndex.from_arrays(arrs)[np.random.permutation(n)] + + a = np.repeat(np.arange(100), 1000) + b = np.tile(np.arange(1000), 100) + self.mi = MultiIndex.from_arrays([a, b]) + self.mi = self.mi.take(np.random.permutation(np.arange(100000))) + + def time_sortlevel_int64(self): + self.mi_int.sortlevel() + + def time_sortlevel_zero(self): + self.mi.sortlevel(0) + + def time_sortlevel_one(self): + self.mi.sortlevel(1) + + +class Values(object): + + goal_time = 0.2 + + def setup_cache(self): + + level1 = range(1000) + level2 = date_range(start='1/1/2012', periods=100) + mi = MultiIndex.from_product([level1, level2]) + return mi + + def time_datetime_level_values_copy(self, mi): + mi.copy().values + + def time_datetime_level_values_sliced(self, mi): + mi[:10].values diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py new file mode 100644 index 0000000000000..e161b887ee86f --- /dev/null +++ b/asv_bench/benchmarks/offset.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +import warnings +from datetime import datetime + +import numpy as np +import pandas as pd +try: + import pandas.tseries.holiday # noqa +except ImportError: + pass + +hcal = pd.tseries.holiday.USFederalHolidayCalendar() +# These offests currently raise a NotImplimentedError with .apply_index() +non_apply = [pd.offsets.Day(), + pd.offsets.BYearEnd(), + pd.offsets.BYearBegin(), + pd.offsets.BQuarterEnd(), + pd.offsets.BQuarterBegin(), + pd.offsets.BMonthEnd(), + pd.offsets.BMonthBegin(), + pd.offsets.CustomBusinessDay(), + pd.offsets.CustomBusinessDay(calendar=hcal), + pd.offsets.CustomBusinessMonthBegin(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal)] +other_offsets = [pd.offsets.YearEnd(), pd.offsets.YearBegin(), + pd.offsets.QuarterEnd(), pd.offsets.QuarterBegin(), + pd.offsets.MonthEnd(), pd.offsets.MonthBegin(), + pd.offsets.DateOffset(months=2, days=2), + pd.offsets.BusinessDay(), pd.offsets.SemiMonthEnd(), + pd.offsets.SemiMonthBegin()] +offsets = non_apply + other_offsets + + +class ApplyIndex(object): + + goal_time = 0.2 + + params = other_offsets + param_names = ['offset'] + + def setup(self, offset): + N = 10000 + self.rng = pd.date_range(start='1/1/2000', periods=N, freq='T') + + def time_apply_index(self, offset): + offset.apply_index(self.rng) + + +class OnOffset(object): + + goal_time = 0.2 + + params = offsets + param_names = ['offset'] + + def setup(self, offset): + self.dates = [datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] + if not (m == 11 and d == 31)] + + def time_on_offset(self, offset): + for date in self.dates: + offset.onOffset(date) + + +class OffsetSeriesArithmetic(object): + + goal_time = 0.2 + params = offsets + param_names = ['offset'] + + def setup(self, offset): + N = 1000 + rng = pd.date_range(start='1/1/2000', periods=N, freq='T') + self.data = pd.Series(rng) + + def time_add_offset(self, offset): + with warnings.catch_warnings(record=True): + self.data + offset + + +class OffsetDatetimeIndexArithmetic(object): + + goal_time = 0.2 + params = offsets + param_names = ['offset'] + + def setup(self, offset): + N = 1000 + self.data = pd.date_range(start='1/1/2000', periods=N, freq='T') + + def time_add_offset(self, offset): + with warnings.catch_warnings(record=True): + self.data + offset + + +class OffestDatetimeArithmetic(object): + + goal_time = 0.2 + params = offsets + param_names = ['offset'] + + def setup(self, offset): + self.date = datetime(2011, 1, 1) + self.dt64 = np.datetime64('2011-01-01 09:00Z') + + def time_apply(self, offset): + offset.apply(self.date) + + def time_apply_np_dt64(self, offset): + offset.apply(self.dt64) + + def time_add(self, offset): + self.date + offset + + def time_add_10(self, offset): + self.date + (10 * offset) + + def time_subtract(self, offset): + self.date - offset + + def time_subtract_10(self, offset): + self.date - (10 * offset) diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py deleted file mode 100644 index cd43e305ead8f..0000000000000 --- a/asv_bench/benchmarks/packers.py +++ /dev/null @@ -1,316 +0,0 @@ -from .pandas_vb_common import * -from numpy.random import randint -import pandas as pd -from collections import OrderedDict -from pandas.compat import BytesIO -import sqlite3 -import os -from sqlalchemy import create_engine -import numpy as np -from random import randrange - -class _Packers(object): - goal_time = 0.2 - - def _setup(self): - self.f = '__test__.msg' - self.N = 100000 - self.C = 5 - self.index = date_range('20000101', periods=self.N, freq='H') - self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index) - self.df2 = self.df.copy() - self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)] - self.remove(self.f) - - def remove(self, f): - try: - os.remove(self.f) - except: - pass - -class Packers(_Packers): - goal_time = 0.2 - - def setup(self): - self._setup() - self.df.to_csv(self.f) - - def time_packers_read_csv(self): - pd.read_csv(self.f) - -class packers_read_excel(_Packers): - goal_time = 0.2 - - def setup(self): - self._setup() - self.bio = BytesIO() - self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlsxwriter') - self.df[:2000].to_excel(self.writer) - self.writer.save() - - def time_packers_read_excel(self): - self.bio.seek(0) - pd.read_excel(self.bio) - - -class packers_read_hdf_store(_Packers): - goal_time = 0.2 - - def setup(self): - self._setup() - self.df2.to_hdf(self.f, 'df') - - def time_packers_read_hdf_store(self): - pd.read_hdf(self.f, 'df') - - -class packers_read_hdf_table(_Packers): - - def setup(self): - self._setup() - self.df2.to_hdf(self.f, 'df', format='table') - - def time_packers_read_hdf_table(self): - pd.read_hdf(self.f, 'df') - - -class packers_read_json(_Packers): - - def setup(self): - self._setup() - self.df.to_json(self.f, orient='split') - self.df.index = np.arange(self.N) - - def time_packers_read_json(self): - pd.read_json(self.f, orient='split') - - -class packers_read_json_date_index(_Packers): - - def setup(self): - self._setup() - self.remove(self.f) - self.df.to_json(self.f, orient='split') - - def time_packers_read_json_date_index(self): - pd.read_json(self.f, orient='split') - - -class packers_read_pack(_Packers): - - def setup(self): - self._setup() - self.df2.to_msgpack(self.f) - - def time_packers_read_pack(self): - pd.read_msgpack(self.f) - - -class packers_read_pickle(_Packers): - - def setup(self): - self._setup() - self.df2.to_pickle(self.f) - - def time_packers_read_pickle(self): - pd.read_pickle(self.f) - -class packers_read_sql(_Packers): - - def setup(self): - self._setup() - self.engine = create_engine('sqlite:///:memory:') - self.df2.to_sql('table', self.engine, if_exists='replace') - - def time_packers_read_sql(self): - pd.read_sql_table('table', self.engine) - - -class packers_read_stata(_Packers): - - def setup(self): - self._setup() - self.df.to_stata(self.f, {'index': 'tc', }) - - def time_packers_read_stata(self): - pd.read_stata(self.f) - - -class packers_read_stata_with_validation(_Packers): - - def setup(self): - self._setup() - self.df['int8_'] = [randint(np.iinfo(np.int8).min, (np.iinfo(np.int8).max - 27)) for _ in range(self.N)] - self.df['int16_'] = [randint(np.iinfo(np.int16).min, (np.iinfo(np.int16).max - 27)) for _ in range(self.N)] - self.df['int32_'] = [randint(np.iinfo(np.int32).min, (np.iinfo(np.int32).max - 27)) for _ in range(self.N)] - self.df['float32_'] = np.array(randn(self.N), dtype=np.float32) - self.df.to_stata(self.f, {'index': 'tc', }) - - def time_packers_read_stata_with_validation(self): - pd.read_stata(self.f) - - -class packers_read_sas(_Packers): - - def setup(self): - self.f = os.path.join(os.path.dirname(__file__), '..', '..', - 'pandas', 'io', 'tests', 'sas', 'data', - 'test1.sas7bdat') - self.f2 = os.path.join(os.path.dirname(__file__), '..', '..', - 'pandas', 'io', 'tests', 'sas', 'data', - 'paxraw_d_short.xpt') - - def time_read_sas7bdat(self): - pd.read_sas(self.f, format='sas7bdat') - - def time_read_xport(self): - pd.read_sas(self.f, format='xport') - - -class CSV(_Packers): - - def setup(self): - self._setup() - - def time_write_csv(self): - self.df.to_csv(self.f) - - def teardown(self): - self.remove(self.f) - - -class Excel(_Packers): - - def setup(self): - self._setup() - self.bio = BytesIO() - - def time_write_excel_openpyxl(self): - self.bio.seek(0) - self.writer = pd.io.excel.ExcelWriter(self.bio, engine='openpyxl') - self.df[:2000].to_excel(self.writer) - self.writer.save() - - def time_write_excel_xlsxwriter(self): - self.bio.seek(0) - self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlsxwriter') - self.df[:2000].to_excel(self.writer) - self.writer.save() - - def time_write_excel_xlwt(self): - self.bio.seek(0) - self.writer = pd.io.excel.ExcelWriter(self.bio, engine='xlwt') - self.df[:2000].to_excel(self.writer) - self.writer.save() - - -class HDF(_Packers): - - def setup(self): - self._setup() - - def time_write_hdf_store(self): - self.df2.to_hdf(self.f, 'df') - - def time_write_hdf_table(self): - self.df2.to_hdf(self.f, 'df', table=True) - - def teardown(self): - self.remove(self.f) - -class JSON(_Packers): - - def setup(self): - self._setup() - self.df_date = self.df.copy() - self.df.index = np.arange(self.N) - self.cols = [(lambda i: ('{0}_timedelta'.format(i), [pd.Timedelta(('%d seconds' % randrange(1000000.0))) for _ in range(self.N)])), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N))), (lambda i: ('{0}_timestamp'.format(i), [pd.Timestamp((1418842918083256000 + randrange(1000000000.0, 1e+18, 200))) for _ in range(self.N)]))] - self.df_mixed = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) - - self.cols = [(lambda i: ('{0}_float'.format(i), randn(self.N))), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N)))] - self.df_mixed2 = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) - - self.cols = [(lambda i: ('{0}_float'.format(i), randn(self.N))), (lambda i: ('{0}_int'.format(i), randint(100000000.0, size=self.N))), (lambda i: ('{0}_str'.format(i), [('%08x' % randrange((16 ** 8))) for _ in range(self.N)]))] - self.df_mixed3 = DataFrame(OrderedDict([self.cols[(i % len(self.cols))](i) for i in range(self.C)]), index=self.index) - - def time_write_json(self): - self.df.to_json(self.f, orient='split') - - def time_write_json_T(self): - self.df.to_json(self.f, orient='columns') - - def time_write_json_date_index(self): - self.df_date.to_json(self.f, orient='split') - - def time_write_json_mixed_delta_int_tstamp(self): - self.df_mixed.to_json(self.f, orient='split') - - def time_write_json_mixed_float_int(self): - self.df_mixed2.to_json(self.f, orient='index') - - def time_write_json_mixed_float_int_T(self): - self.df_mixed2.to_json(self.f, orient='columns') - - def time_write_json_mixed_float_int_str(self): - self.df_mixed3.to_json(self.f, orient='split') - - def time_write_json_lines(self): - self.df.to_json(self.f, orient="records", lines=True) - - def teardown(self): - self.remove(self.f) - - -class MsgPack(_Packers): - - def setup(self): - self._setup() - - def time_write_msgpack(self): - self.df2.to_msgpack(self.f) - - def teardown(self): - self.remove(self.f) - - -class Pickle(_Packers): - - def setup(self): - self._setup() - - def time_write_pickle(self): - self.df2.to_pickle(self.f) - - def teardown(self): - self.remove(self.f) - - -class SQL(_Packers): - - def setup(self): - self._setup() - self.engine = create_engine('sqlite:///:memory:') - - def time_write_sql(self): - self.df2.to_sql('table', self.engine, if_exists='replace') - - -class STATA(_Packers): - - def setup(self): - self._setup() - - self.df3=self.df.copy() - self.df3['int8_'] = [randint(np.iinfo(np.int8).min, (np.iinfo(np.int8).max - 27)) for _ in range(self.N)] - self.df3['int16_'] = [randint(np.iinfo(np.int16).min, (np.iinfo(np.int16).max - 27)) for _ in range(self.N)] - self.df3['int32_'] = [randint(np.iinfo(np.int32).min, (np.iinfo(np.int32).max - 27)) for _ in range(self.N)] - self.df3['float32_'] = np.array(randn(self.N), dtype=np.float32) - - def time_write_stata(self): - self.df.to_stata(self.f, {'index': 'tc', }) - - def time_write_stata_with_validation(self): - self.df3.to_stata(self.f, {'index': 'tc', }) - - def teardown(self): - self.remove(self.f) diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 25b0b5dd4d1b0..c0d24afae4219 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -1,31 +1,46 @@ -from pandas import * -import pandas as pd -from datetime import timedelta -from numpy.random import randn -from numpy.random import randint -from numpy.random import permutation -import pandas.util.testing as tm -import random +import os +from importlib import import_module + import numpy as np -import threading try: - from pandas.compat import range + from pandas import Panel except ImportError: - pass + from pandas import WidePanel as Panel # noqa -np.random.seed(1234) -try: - import pandas._tseries as lib -except: - import pandas.lib as lib +# Compatibility import for lib +for imp in ['pandas._libs.lib', 'pandas.lib']: + try: + lib = import_module(imp) + break + except: + pass -try: - Panel = Panel -except Exception: - Panel = WidePanel +numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32, + np.float64, np.int16, np.int8, np.uint16, np.uint8] +datetime_dtypes = [np.datetime64, np.timedelta64] -# didn't add to namespace until later -try: - from pandas.core.index import MultiIndex -except ImportError: - pass + +def setup(*args, **kwargs): + # This function just needs to be imported into each benchmark file to + # set up the random seed before each function. + # http://asv.readthedocs.io/en/latest/writing_benchmarks.html + np.random.seed(1234) + + +class BaseIO(object): + """ + Base class for IO benchmarks + """ + fname = None + + def remove(self, f): + """Remove created files""" + try: + os.remove(f) + except: + # On Windows, attempting to remove a file that is in use + # causes an exception to be raised + pass + + def teardown(self, *args, **kwargs): + self.remove(self.fname) diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index faedce6c574ec..ce946c76ed199 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,64 +1,60 @@ -from .pandas_vb_common import * +import warnings +from datetime import datetime, timedelta +from pandas import DataFrame, DatetimeIndex, date_range -class Constructors1(object): - goal_time = 0.2 - - def setup(self): - self.data_frames = {} - self.start = datetime(1990, 1, 1) - self.end = datetime(2012, 1, 1) - for x in range(100): - self.end += timedelta(days=1) - self.dr = np.asarray(date_range(self.start, self.end)) - self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) - self.data_frames[x] = self.df - - def time_panel_from_dict_all_different_indexes(self): - Panel.from_dict(self.data_frames) +from .pandas_vb_common import Panel, setup # noqa -class Constructors2(object): +class DifferentIndexes(object): goal_time = 0.2 def setup(self): self.data_frames = {} + start = datetime(1990, 1, 1) + end = datetime(2012, 1, 1) for x in range(100): - self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1))) - self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) - self.data_frames[x] = self.df + end += timedelta(days=1) + idx = date_range(start, end) + df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx) + self.data_frames[x] = df - def time_panel_from_dict_equiv_indexes(self): - Panel.from_dict(self.data_frames) + def time_from_dict(self): + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) -class Constructors3(object): +class SameIndexes(object): + goal_time = 0.2 def setup(self): - self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1))) - self.data_frames = {} - for x in range(100): - self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) - self.data_frames[x] = self.df + idx = DatetimeIndex(start=datetime(1990, 1, 1), + end=datetime(2012, 1, 1), + freq='D') + df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx) + self.data_frames = dict(enumerate([df] * 100)) - def time_panel_from_dict_same_index(self): - Panel.from_dict(self.data_frames) + def time_from_dict(self): + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) -class Constructors4(object): +class TwoIndexes(object): + goal_time = 0.2 def setup(self): - self.data_frames = {} - self.start = datetime(1990, 1, 1) - self.end = datetime(2012, 1, 1) - for x in range(100): - if (x == 50): - self.end += timedelta(days=1) - self.dr = np.asarray(date_range(self.start, self.end)) - self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) - self.data_frames[x] = self.df - - def time_panel_from_dict_two_different_indexes(self): - Panel.from_dict(self.data_frames) + start = datetime(1990, 1, 1) + end = datetime(2012, 1, 1) + df1 = DataFrame({'a': 0, 'b': 1, 'c': 2}, + index=DatetimeIndex(start=start, end=end, freq='D')) + end += timedelta(days=1) + df2 = DataFrame({'a': 0, 'b': 1, 'c': 2}, + index=DatetimeIndex(start=start, end=end, freq='D')) + dfs = [df1] * 50 + [df2] * 50 + self.data_frames = dict(enumerate(dfs)) + + def time_from_dict(self): + with warnings.catch_warnings(record=True): + Panel.from_dict(self.data_frames) diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index ebe278f6e68b5..a5b1a92e9cf67 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -1,24 +1,24 @@ -from .pandas_vb_common import * +import warnings +import numpy as np -class PanelMethods(object): - goal_time = 0.2 +from .pandas_vb_common import Panel, setup # noqa - def setup(self): - self.index = date_range(start='2000', freq='D', periods=1000) - self.panel = Panel(np.random.randn(100, len(self.index), 1000)) - def time_pct_change_items(self): - self.panel.pct_change(1, axis='items') +class PanelMethods(object): - def time_pct_change_major(self): - self.panel.pct_change(1, axis='major') + goal_time = 0.2 + params = ['items', 'major', 'minor'] + param_names = ['axis'] - def time_pct_change_minor(self): - self.panel.pct_change(1, axis='minor') + def setup(self, axis): + with warnings.catch_warnings(record=True): + self.panel = Panel(np.random.randn(100, 1000, 100)) - def time_shift(self): - self.panel.shift(1) + def time_pct_change(self, axis): + with warnings.catch_warnings(record=True): + self.panel.pct_change(1, axis=axis) - def time_shift_minor(self): - self.panel.shift(1, axis='minor') \ No newline at end of file + def time_shift(self, axis): + with warnings.catch_warnings(record=True): + self.panel.shift(1, axis=axis) diff --git a/asv_bench/benchmarks/parser_vb.py b/asv_bench/benchmarks/parser_vb.py deleted file mode 100644 index 32bf7e50d1a89..0000000000000 --- a/asv_bench/benchmarks/parser_vb.py +++ /dev/null @@ -1,121 +0,0 @@ -from .pandas_vb_common import * -import os -from pandas import read_csv -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - - -class read_csv1(object): - goal_time = 0.2 - - def setup(self): - self.N = 10000 - self.K = 8 - self.df = DataFrame((np.random.randn(self.N, self.K) * np.random.randint(100, 10000, (self.N, self.K)))) - self.df.to_csv('test.csv', sep='|') - - self.format = (lambda x: '{:,}'.format(x)) - self.df2 = self.df.applymap(self.format) - self.df2.to_csv('test2.csv', sep='|') - - def time_sep(self): - read_csv('test.csv', sep='|') - - def time_thousands(self): - read_csv('test.csv', sep='|', thousands=',') - - def teardown(self): - os.remove('test.csv') - os.remove('test2.csv') - - -class read_csv2(object): - goal_time = 0.2 - - def setup(self): - self.data = ['A,B,C'] - self.data = (self.data + (['1,2,3 # comment'] * 100000)) - self.data = '\n'.join(self.data) - - def time_comment(self): - read_csv(StringIO(self.data), comment='#') - - -class read_csv3(object): - goal_time = 0.2 - - def setup(self): - self.data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n -0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n -0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n -0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n -0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n""" - self.data2 = self.data.replace(',', ';').replace('.', ',') - self.data = (self.data * 200) - self.data2 = (self.data2 * 200) - - def time_default_converter(self): - read_csv(StringIO(self.data), sep=',', header=None, - float_precision=None) - - def time_default_converter_with_decimal(self): - read_csv(StringIO(self.data2), sep=';', header=None, - float_precision=None, decimal=',') - - def time_default_converter_python_engine(self): - read_csv(StringIO(self.data), sep=',', header=None, - float_precision=None, engine='python') - - def time_default_converter_with_decimal_python_engine(self): - read_csv(StringIO(self.data2), sep=';', header=None, - float_precision=None, decimal=',', engine='python') - - def time_precise_converter(self): - read_csv(StringIO(self.data), sep=',', header=None, - float_precision='high') - - def time_roundtrip_converter(self): - read_csv(StringIO(self.data), sep=',', header=None, - float_precision='round_trip') - - -class read_csv_categorical(object): - goal_time = 0.2 - - def setup(self): - N = 100000 - group1 = ['aaaaaaaa', 'bbbbbbb', 'cccccccc', 'dddddddd', 'eeeeeeee'] - df = DataFrame({'a': np.random.choice(group1, N).astype('object'), - 'b': np.random.choice(group1, N).astype('object'), - 'c': np.random.choice(group1, N).astype('object')}) - df.to_csv('strings.csv', index=False) - - def time_convert_post(self): - read_csv('strings.csv').apply(pd.Categorical) - - def time_convert_direct(self): - read_csv('strings.csv', dtype='category') - - def teardown(self): - os.remove('strings.csv') - - -class read_csv_dateparsing(object): - goal_time = 0.2 - - def setup(self): - self.N = 10000 - self.K = 8 - self.data = 'KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n ' - self.data = (self.data * 200) - self.data2 = 'KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n ' - self.data2 = (self.data2 * 200) - - def time_multiple_date(self): - read_csv(StringIO(self.data), sep=',', header=None, - parse_dates=[[1, 2], [1, 3]]) - - def time_baseline(self): - read_csv(StringIO(self.data2), sep=',', header=None, parse_dates=[1]) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index f9837191a7bae..897a3338c164c 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -1,55 +1,95 @@ -import pandas as pd -from pandas import Series, Period, PeriodIndex, date_range +from pandas import (DataFrame, Series, Period, PeriodIndex, date_range, + period_range) -class Constructor(object): +class PeriodProperties(object): + + params = (['M', 'min'], + ['year', 'month', 'day', 'hour', 'minute', 'second', + 'is_leap_year', 'quarter', 'qyear', 'week', 'daysinmonth', + 'dayofweek', 'dayofyear', 'start_time', 'end_time']) + param_names = ['freq', 'attr'] + + def setup(self, freq, attr): + self.per = Period('2012-06-01', freq=freq) + + def time_property(self, freq, attr): + getattr(self.per, attr) + + +class PeriodUnaryMethods(object): + + params = ['M', 'min'] + param_names = ['freq'] + + def setup(self, freq): + self.per = Period('2012-06-01', freq=freq) + + def time_to_timestamp(self, freq): + self.per.to_timestamp() + + def time_now(self, freq): + self.per.now(freq) + + def time_asfreq(self, freq): + self.per.asfreq('A') + + +class PeriodIndexConstructor(object): + goal_time = 0.2 - def setup(self): + params = ['D'] + param_names = ['freq'] + + def setup(self, freq): self.rng = date_range('1985', periods=1000) self.rng2 = date_range('1985', periods=1000).to_pydatetime() - def time_from_date_range(self): - PeriodIndex(self.rng, freq='D') + def time_from_date_range(self, freq): + PeriodIndex(self.rng, freq=freq) - def time_from_pydatetime(self): - PeriodIndex(self.rng2, freq='D') + def time_from_pydatetime(self, freq): + PeriodIndex(self.rng2, freq=freq) -class DataFrame(object): +class DataFramePeriodColumn(object): + goal_time = 0.2 def setup(self): - self.rng = pd.period_range(start='1/1/1990', freq='S', periods=20000) - self.df = pd.DataFrame(index=range(len(self.rng))) + self.rng = period_range(start='1/1/1990', freq='S', periods=20000) + self.df = DataFrame(index=range(len(self.rng))) def time_setitem_period_column(self): self.df['col'] = self.rng class Algorithms(object): + goal_time = 0.2 - def setup(self): + params = ['index', 'series'] + param_names = ['typ'] + + def setup(self, typ): data = [Period('2011-01', freq='M'), Period('2011-02', freq='M'), Period('2011-03', freq='M'), Period('2011-04', freq='M')] - self.s = Series(data * 1000) - self.i = PeriodIndex(data, freq='M') - def time_drop_duplicates_pseries(self): - self.s.drop_duplicates() + if typ == 'index': + self.vector = PeriodIndex(data * 1000, freq='M') + elif typ == 'series': + self.vector = Series(data * 1000) - def time_drop_duplicates_pindex(self): - self.i.drop_duplicates() + def time_drop_duplicates(self, typ): + self.vector.drop_duplicates() - def time_value_counts_pseries(self): - self.s.value_counts() + def time_value_counts(self, typ): + self.vector.value_counts() - def time_value_counts_pindex(self): - self.i.value_counts() +class Indexing(object): -class period_standard_indexing(object): goal_time = 0.2 def setup(self): @@ -70,7 +110,7 @@ def time_series_loc(self): self.series.loc[self.period] def time_align(self): - pd.DataFrame({'a': self.series, 'b': self.series[:500]}) + DataFrame({'a': self.series, 'b': self.series[:500]}) def time_intersection(self): self.index[:750].intersection(self.index[250:]) diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 757c3e27dd333..5b49112b0e07d 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -1,21 +1,44 @@ -from .pandas_vb_common import * +import numpy as np +from pandas import DataFrame, Series, DatetimeIndex, date_range try: - from pandas import date_range + from pandas.plotting import andrews_curves except ImportError: - def date_range(start=None, end=None, periods=None, freq=None): - return DatetimeIndex(start, end, periods=periods, offset=freq) -from pandas.tools.plotting import andrews_curves + from pandas.tools.plotting import andrews_curves +import matplotlib +matplotlib.use('Agg') + +from .pandas_vb_common import setup # noqa + + +class Plotting(object): + + goal_time = 0.2 + + def setup(self): + self.s = Series(np.random.randn(1000000)) + self.df = DataFrame({'col': self.s}) + + def time_series_plot(self): + self.s.plot() + + def time_frame_plot(self): + self.df.plot() class TimeseriesPlotting(object): + goal_time = 0.2 def setup(self): - import matplotlib - matplotlib.use('Agg') - self.N = 2000 - self.M = 5 - self.df = DataFrame(np.random.randn(self.N, self.M), index=date_range('1/1/1975', periods=self.N)) + N = 2000 + M = 5 + idx = date_range('1/1/1975', periods=N) + self.df = DataFrame(np.random.randn(N, M), index=idx) + + idx_irregular = DatetimeIndex(np.concatenate((idx.values[0:10], + idx.values[12:]))) + self.df2 = DataFrame(np.random.randn(len(idx_irregular), M), + index=idx_irregular) def time_plot_regular(self): self.df.plot() @@ -23,18 +46,19 @@ def time_plot_regular(self): def time_plot_regular_compat(self): self.df.plot(x_compat=True) + def time_plot_irregular(self): + self.df2.plot() + class Misc(object): + goal_time = 0.6 def setup(self): - import matplotlib - matplotlib.use('Agg') - self.N = 500 - self.M = 10 - data_dict = {x: np.random.randn(self.N) for x in range(self.M)} - data_dict["Name"] = ["A"] * self.N - self.df = DataFrame(data_dict) + N = 500 + M = 10 + self.df = DataFrame(np.random.randn(N, M)) + self.df['Name'] = ["A"] * N def time_plot_andrews_curves(self): andrews_curves(self.df, "Name") diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 8db0cd7629332..413427a16f40b 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -1,89 +1,77 @@ -from .pandas_vb_common import * -from random import shuffle +import numpy as np +import pandas.util.testing as tm +from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index, + date_range) +from .pandas_vb_common import setup, lib # noqa -class Reindexing(object): +class Reindex(object): + goal_time = 0.2 def setup(self): - self.rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min') - self.df = DataFrame(np.random.rand(10000, 10), index=self.rng, + rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min') + self.df = DataFrame(np.random.rand(10000, 10), index=rng, columns=range(10)) self.df['foo'] = 'bar' - self.rng2 = Index(self.rng[::2]) - + self.rng_subset = Index(rng[::2]) self.df2 = DataFrame(index=range(10000), data=np.random.rand(10000, 30), columns=range(30)) - - # multi-index - N = 1000 - K = 20 + N = 5000 + K = 200 level1 = tm.makeStringIndex(N).values.repeat(K) level2 = np.tile(tm.makeStringIndex(K).values, N) index = MultiIndex.from_arrays([level1, level2]) - self.s1 = Series(np.random.randn((N * K)), index=index) - self.s2 = self.s1[::2] + self.s = Series(np.random.randn(N * K), index=index) + self.s_subset = self.s[::2] def time_reindex_dates(self): - self.df.reindex(self.rng2) + self.df.reindex(self.rng_subset) def time_reindex_columns(self): self.df2.reindex(columns=self.df.columns[1:5]) def time_reindex_multiindex(self): - self.s1.reindex(self.s2.index) + self.s.reindex(self.s_subset.index) -#---------------------------------------------------------------------- -# Pad / backfill +class ReindexMethod(object): - -class FillMethod(object): goal_time = 0.2 + params = ['pad', 'backfill'] + param_names = ['method'] - def setup(self): - self.rng = date_range('1/1/2000', periods=100000, freq='1min') - self.ts = Series(np.random.randn(len(self.rng)), index=self.rng) - self.ts2 = self.ts[::2] - self.ts3 = self.ts2.reindex(self.ts.index) - self.ts4 = self.ts3.astype('float32') - - def pad(self, source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') + def setup(self, method): + N = 100000 + self.idx = date_range('1/1/2000', periods=N, freq='1min') + self.ts = Series(np.random.randn(N), index=self.idx)[::2] - def backfill(self, source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') + def time_reindex_method(self, method): + self.ts.reindex(self.idx, method=method) - def time_backfill_dates(self): - self.backfill(self.ts2, self.ts.index) - def time_pad_daterange(self): - self.pad(self.ts2, self.ts.index) +class Fillna(object): - def time_backfill(self): - self.ts3.fillna(method='backfill') - - def time_backfill_float32(self): - self.ts4.fillna(method='backfill') - - def time_pad(self): - self.ts3.fillna(method='pad') + goal_time = 0.2 + params = ['pad', 'backfill'] + param_names = ['method'] - def time_pad_float32(self): - self.ts4.fillna(method='pad') + def setup(self, method): + N = 100000 + self.idx = date_range('1/1/2000', periods=N, freq='1min') + ts = Series(np.random.randn(N), index=self.idx)[::2] + self.ts_reindexed = ts.reindex(self.idx) + self.ts_float32 = self.ts_reindexed.astype('float32') + def time_reindexed(self, method): + self.ts_reindexed.fillna(method=method) -#---------------------------------------------------------------------- -# align on level + def time_float_32(self, method): + self.ts_float32.fillna(method=method) class LevelAlign(object): + goal_time = 0.2 def setup(self): @@ -92,7 +80,6 @@ def setup(self): labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) self.df_level = DataFrame(np.random.randn(100, 4), @@ -102,101 +89,84 @@ def time_align_level(self): self.df.align(self.df_level, level=1, copy=False) def time_reindex_level(self): - self.df_level.reindex(self.df.index, level=1) - + self.df_level.reindex(self.index, level=1) -#---------------------------------------------------------------------- -# drop_duplicates +class DropDuplicates(object): -class Duplicates(object): goal_time = 0.2 - - def setup(self): - self.N = 10000 - self.K = 10 - self.key1 = tm.makeStringIndex(self.N).values.repeat(self.K) - self.key2 = tm.makeStringIndex(self.N).values.repeat(self.K) - self.df = DataFrame({'key1': self.key1, 'key2': self.key2, - 'value': np.random.randn((self.N * self.K)),}) - self.col_array_list = list(self.df.values.T) - - self.df2 = self.df.copy() - self.df2.ix[:10000, :] = np.nan + params = [True, False] + param_names = ['inplace'] + + def setup(self, inplace): + N = 10000 + K = 10 + key1 = tm.makeStringIndex(N).values.repeat(K) + key2 = tm.makeStringIndex(N).values.repeat(K) + self.df = DataFrame({'key1': key1, 'key2': key2, + 'value': np.random.randn(N * K)}) + self.df_nan = self.df.copy() + self.df_nan.iloc[:10000, :] = np.nan self.s = Series(np.random.randint(0, 1000, size=10000)) - self.s2 = Series(np.tile(tm.makeStringIndex(1000).values, 10)) - - np.random.seed(1234) - self.N = 1000000 - self.K = 10000 - self.key1 = np.random.randint(0, self.K, size=self.N) - self.df_int = DataFrame({'key1': self.key1}) - - def time_frame_drop_dups(self): - self.df.drop_duplicates(['key1', 'key2']) - - def time_frame_drop_dups_inplace(self): - self.df.drop_duplicates(['key1', 'key2'], inplace=True) + self.s_str = Series(np.tile(tm.makeStringIndex(1000).values, 10)) - def time_frame_drop_dups_na(self): - self.df2.drop_duplicates(['key1', 'key2']) + N = 1000000 + K = 10000 + key1 = np.random.randint(0, K, size=N) + self.df_int = DataFrame({'key1': key1}) + self.df_bool = DataFrame(np.random.randint(0, 2, size=(K, 10), + dtype=bool)) - def time_frame_drop_dups_na_inplace(self): - self.df2.drop_duplicates(['key1', 'key2'], inplace=True) + def time_frame_drop_dups(self, inplace): + self.df.drop_duplicates(['key1', 'key2'], inplace=inplace) - def time_series_drop_dups_int(self): - self.s.drop_duplicates() + def time_frame_drop_dups_na(self, inplace): + self.df_nan.drop_duplicates(['key1', 'key2'], inplace=inplace) - def time_series_drop_dups_string(self): - self.s2.drop_duplicates() + def time_series_drop_dups_int(self, inplace): + self.s.drop_duplicates(inplace=inplace) - def time_frame_drop_dups_int(self): - self.df_int.drop_duplicates() + def time_series_drop_dups_string(self, inplace): + self.s_str.drop_duplicates(inplace=inplace) + def time_frame_drop_dups_int(self, inplace): + self.df_int.drop_duplicates(inplace=inplace) -#---------------------------------------------------------------------- -# blog "pandas escaped the zoo" + def time_frame_drop_dups_bool(self, inplace): + self.df_bool.drop_duplicates(inplace=inplace) class Align(object): + # blog "pandas escaped the zoo" goal_time = 0.2 def setup(self): n = 50000 indices = tm.makeStringIndex(n) subsample_size = 40000 - - def sample(values, k): - sampler = np.arange(len(values)) - shuffle(sampler) - return values.take(sampler[:k]) - - self.x = Series(np.random.randn(50000), indices) + self.x = Series(np.random.randn(n), indices) self.y = Series(np.random.randn(subsample_size), - index=sample(indices, subsample_size)) + index=np.random.choice(indices, subsample_size, + replace=False)) def time_align_series_irregular_string(self): - (self.x + self.y) + self.x + self.y class LibFastZip(object): + goal_time = 0.2 def setup(self): - self.N = 10000 - self.K = 10 - self.key1 = tm.makeStringIndex(self.N).values.repeat(self.K) - self.key2 = tm.makeStringIndex(self.N).values.repeat(self.K) - self.df = DataFrame({'key1': self.key1, 'key2': self.key2, 'value': np.random.randn((self.N * self.K)), }) - self.col_array_list = list(self.df.values.T) - - self.df2 = self.df.copy() - self.df2.ix[:10000, :] = np.nan - self.col_array_list2 = list(self.df2.values.T) + N = 10000 + K = 10 + key1 = tm.makeStringIndex(N).values.repeat(K) + key2 = tm.makeStringIndex(N).values.repeat(K) + col_array = np.vstack([key1, key2, np.random.randn(N * K)]) + col_array2 = col_array.copy() + col_array2[:, :10000] = np.nan + self.col_array_list = list(col_array) def time_lib_fast_zip(self): lib.fast_zip(self.col_array_list) - - def time_lib_fast_zip_fillna(self): - lib.fast_zip_fillna(self.col_array_list2) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 66b8af53801ac..41208125e8f32 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -1,72 +1,58 @@ -from .pandas_vb_common import * -from pandas.compat import range -from datetime import timedelta +import numpy as np +import pandas as pd +from .pandas_vb_common import setup # noqa -class replace_fillna(object): - goal_time = 0.2 - - def setup(self): - self.N = 1000000 - try: - self.rng = date_range('1/1/2000', periods=self.N, freq='min') - except NameError: - self.rng = DatetimeIndex('1/1/2000', periods=self.N, offset=datetools.Minute()) - self.date_range = DateRange - self.ts = Series(np.random.randn(self.N), index=self.rng) - def time_replace_fillna(self): - self.ts.fillna(0.0, inplace=True) +class FillNa(object): - -class replace_large_dict(object): goal_time = 0.2 + params = [True, False] + param_names = ['inplace'] - def setup(self): - self.n = (10 ** 6) - self.start_value = (10 ** 5) - self.to_rep = dict(((i, (self.start_value + i)) for i in range(self.n))) - self.s = Series(np.random.randint(self.n, size=(10 ** 3))) - - def time_replace_large_dict(self): - self.s.replace(self.to_rep, inplace=True) + def setup(self, inplace): + N = 10**6 + rng = pd.date_range('1/1/2000', periods=N, freq='min') + data = np.random.randn(N) + data[::2] = np.nan + self.ts = pd.Series(data, index=rng) + def time_fillna(self, inplace): + self.ts.fillna(0.0, inplace=inplace) -class replace_convert(object): - goal_time = 0.5 + def time_replace(self, inplace): + self.ts.replace(np.nan, 0.0, inplace=inplace) - def setup(self): - self.n = (10 ** 3) - self.to_ts = dict(((i, pd.Timestamp(i)) for i in range(self.n))) - self.to_td = dict(((i, pd.Timedelta(i)) for i in range(self.n))) - self.s = Series(np.random.randint(self.n, size=(10 ** 3))) - self.df = DataFrame({'A': np.random.randint(self.n, size=(10 ** 3)), - 'B': np.random.randint(self.n, size=(10 ** 3))}) - def time_replace_series_timestamp(self): - self.s.replace(self.to_ts) +class ReplaceDict(object): - def time_replace_series_timedelta(self): - self.s.replace(self.to_td) + goal_time = 0.2 + params = [True, False] + param_names = ['inplace'] - def time_replace_frame_timestamp(self): - self.df.replace(self.to_ts) + def setup(self, inplace): + N = 10**5 + start_value = 10**5 + self.to_rep = dict(enumerate(np.arange(N) + start_value)) + self.s = pd.Series(np.random.randint(N, size=10**3)) - def time_replace_frame_timedelta(self): - self.df.replace(self.to_td) + def time_replace_series(self, inplace): + self.s.replace(self.to_rep, inplace=inplace) -class replace_replacena(object): - goal_time = 0.2 +class Convert(object): - def setup(self): - self.N = 1000000 - try: - self.rng = date_range('1/1/2000', periods=self.N, freq='min') - except NameError: - self.rng = DatetimeIndex('1/1/2000', periods=self.N, offset=datetools.Minute()) - self.date_range = DateRange - self.ts = Series(np.random.randn(self.N), index=self.rng) - - def time_replace_replacena(self): - self.ts.replace(np.nan, 0.0, inplace=True) + goal_time = 0.5 + params = (['DataFrame', 'Series'], ['Timestamp', 'Timedelta']) + param_names = ['constructor', 'replace_data'] + + def setup(self, constructor, replace_data): + N = 10**3 + data = {'Series': pd.Series(np.random.randint(N, size=N)), + 'DataFrame': pd.DataFrame({'A': np.random.randint(N, size=N), + 'B': np.random.randint(N, size=N)})} + self.to_replace = {i: getattr(pd, replace_data) for i in range(N)} + self.data = data[constructor] + + def time_replace(self, constructor, replace_data): + self.data.replace(self.to_replace) diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index a3ecfff52c794..9044b080c45f9 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -1,13 +1,16 @@ -from .pandas_vb_common import * -from pandas.core.reshape import melt, wide_to_long +from itertools import product +import numpy as np +from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long + +from .pandas_vb_common import setup # noqa + + +class Melt(object): -class melt_dataframe(object): goal_time = 0.2 def setup(self): - self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)]) - self.df = DataFrame(np.random.randn(10000, 4), index=self.index) self.df = DataFrame(np.random.randn(10000, 3), columns=['A', 'B', 'C']) self.df['id1'] = np.random.randint(0, 10, 10000) self.df['id2'] = np.random.randint(100, 1000, 10000) @@ -16,83 +19,116 @@ def time_melt_dataframe(self): melt(self.df, id_vars=['id1', 'id2']) -class reshape_pivot_time_series(object): +class Pivot(object): + goal_time = 0.2 def setup(self): - self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)]) - self.df = DataFrame(np.random.randn(10000, 4), index=self.index) - self.index = date_range('1/1/2000', periods=10000, freq='h') - self.df = DataFrame(randn(10000, 50), index=self.index, columns=range(50)) - self.pdf = self.unpivot(self.df) - self.f = (lambda : self.pdf.pivot('date', 'variable', 'value')) + N = 10000 + index = date_range('1/1/2000', periods=N, freq='h') + data = {'value': np.random.randn(N * 50), + 'variable': np.arange(50).repeat(N), + 'date': np.tile(index.values, 50)} + self.df = DataFrame(data) def time_reshape_pivot_time_series(self): - self.f() + self.df.pivot('date', 'variable', 'value') - def unpivot(self, frame): - (N, K) = frame.shape - self.data = {'value': frame.values.ravel('F'), 'variable': np.asarray(frame.columns).repeat(N), 'date': np.tile(np.asarray(frame.index), K), } - return DataFrame(self.data, columns=['date', 'variable', 'value']) +class SimpleReshape(object): -class reshape_stack_simple(object): goal_time = 0.2 def setup(self): - self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)]) - self.df = DataFrame(np.random.randn(10000, 4), index=self.index) + arrays = [np.arange(100).repeat(100), + np.roll(np.tile(np.arange(100), 100), 25)] + index = MultiIndex.from_arrays(arrays) + self.df = DataFrame(np.random.randn(10000, 4), index=index) self.udf = self.df.unstack(1) - def time_reshape_stack_simple(self): + def time_stack(self): self.udf.stack() + def time_unstack(self): + self.df.unstack(1) + + +class Unstack(object): -class reshape_unstack_simple(object): goal_time = 0.2 def setup(self): - self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)]) - self.df = DataFrame(np.random.randn(10000, 4), index=self.index) + m = 100 + n = 1000 + + levels = np.arange(m) + index = MultiIndex.from_product([levels] * 2) + columns = np.arange(n) + values = np.arange(m * m * n).reshape(m * m, n) + self.df = DataFrame(values, index, columns) + self.df2 = self.df.iloc[:-1] + + def time_full_product(self): + self.df.unstack() + + def time_without_last_row(self): + self.df2.unstack() - def time_reshape_unstack_simple(self): - self.df.unstack(1) +class SparseIndex(object): -class unstack_sparse_keyspace(object): goal_time = 0.2 def setup(self): - self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)]) - self.df = DataFrame(np.random.randn(10000, 4), index=self.index) - self.NUM_ROWS = 1000 - for iter in range(10): - self.df = DataFrame({'A': np.random.randint(50, size=self.NUM_ROWS), 'B': np.random.randint(50, size=self.NUM_ROWS), 'C': np.random.randint((-10), 10, size=self.NUM_ROWS), 'D': np.random.randint((-10), 10, size=self.NUM_ROWS), 'E': np.random.randint(10, size=self.NUM_ROWS), 'F': np.random.randn(self.NUM_ROWS), }) - self.idf = self.df.set_index(['A', 'B', 'C', 'D', 'E']) - if (len(self.idf.index.unique()) == self.NUM_ROWS): - break + NUM_ROWS = 1000 + self.df = DataFrame({'A': np.random.randint(50, size=NUM_ROWS), + 'B': np.random.randint(50, size=NUM_ROWS), + 'C': np.random.randint(-10, 10, size=NUM_ROWS), + 'D': np.random.randint(-10, 10, size=NUM_ROWS), + 'E': np.random.randint(10, size=NUM_ROWS), + 'F': np.random.randn(NUM_ROWS)}) + self.df = self.df.set_index(['A', 'B', 'C', 'D', 'E']) - def time_unstack_sparse_keyspace(self): - self.idf.unstack() + def time_unstack(self): + self.df.unstack() -class wide_to_long_big(object): +class WideToLong(object): + goal_time = 0.2 def setup(self): - vars = 'ABCD' nyrs = 20 nidvars = 20 N = 5000 - yrvars = [] - for var in vars: - for yr in range(1, nyrs + 1): - yrvars.append(var + str(yr)) - - self.df = pd.DataFrame(np.random.randn(N, nidvars + len(yrvars)), - columns=list(range(nidvars)) + yrvars) - self.vars = vars + self.letters = list('ABCD') + yrvars = [l + str(num) + for l, num in product(self.letters, range(1, nyrs + 1))] + columns = [str(i) for i in range(nidvars)] + yrvars + self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)), + columns=columns) + self.df['id'] = self.df.index def time_wide_to_long_big(self): - self.df['id'] = self.df.index - wide_to_long(self.df, list(self.vars), i='id', j='year') + wide_to_long(self.df, self.letters, i='id', j='year') + + +class PivotTable(object): + + goal_time = 0.2 + + def setup(self): + N = 100000 + fac1 = np.array(['A', 'B', 'C'], dtype='O') + fac2 = np.array(['one', 'two'], dtype='O') + ind1 = np.random.randint(0, 3, size=N) + ind2 = np.random.randint(0, 2, size=N) + self.df = DataFrame({'key1': fac1.take(ind1), + 'key2': fac2.take(ind2), + 'key3': fac2.take(ind2), + 'value1': np.random.randn(N), + 'value2': np.random.randn(N), + 'value3': np.random.randn(N)}) + + def time_pivot_table(self): + self.df.pivot_table(index='key1', columns=['key2', 'key3']) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py new file mode 100644 index 0000000000000..ba25ad6c5eda6 --- /dev/null +++ b/asv_bench/benchmarks/rolling.py @@ -0,0 +1,76 @@ +import pandas as pd +import numpy as np + +from .pandas_vb_common import setup # noqa + + +class Methods(object): + + sample_time = 0.2 + params = (['DataFrame', 'Series'], + [10, 1000], + ['int', 'float'], + ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt', + 'sum']) + param_names = ['contructor', 'window', 'dtype', 'method'] + + def setup(self, constructor, window, dtype, method): + N = 10**5 + arr = (100 * np.random.random(N)).astype(dtype) + self.roll = getattr(pd, constructor)(arr).rolling(window) + + def time_rolling(self, constructor, window, dtype, method): + getattr(self.roll, method)() + +class VariableWindowMethods(Methods): + sample_time = 0.2 + params = (['DataFrame', 'Series'], + ['50s', '1h', '1d'], + ['int', 'float'], + ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt', + 'sum']) + param_names = ['contructor', 'window', 'dtype', 'method'] + + def setup(self, constructor, window, dtype, method): + N = 10**5 + arr = (100 * np.random.random(N)).astype(dtype) + index = pd.date_range('2017-01-01', periods=N, freq='5s') + self.roll = getattr(pd, constructor)(arr, index=index).rolling(window) + +class Pairwise(object): + + sample_time = 0.2 + params = ([10, 1000, None], + ['corr', 'cov'], + [True, False]) + param_names = ['window', 'method', 'pairwise'] + + def setup(self, window, method, pairwise): + N = 10**4 + arr = np.random.random(N) + self.df = pd.DataFrame(arr) + + def time_pairwise(self, window, method, pairwise): + if window is None: + r = self.df.expanding() + else: + r = self.df.rolling(window=window) + getattr(r, method)(self.df, pairwise=pairwise) + + +class Quantile(object): + + sample_time = 0.2 + params = (['DataFrame', 'Series'], + [10, 1000], + ['int', 'float'], + [0, 0.5, 1]) + param_names = ['constructor', 'window', 'dtype', 'percentile'] + + def setup(self, constructor, window, dtype, percentile): + N = 10**5 + arr = np.random.random(N).astype(dtype) + self.roll = getattr(pd, constructor)(arr).rolling(window) + + def time_quantile(self, constructor, window, dtype, percentile): + self.roll.quantile(percentile) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 413c4e044fd3a..478aba278029c 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -1,122 +1,123 @@ -from .pandas_vb_common import * +from datetime import datetime +import numpy as np +import pandas.util.testing as tm +from pandas import Series, date_range, NaT -class series_constructor_no_data_datetime_index(object): - goal_time = 0.2 - - def setup(self): - self.dr = pd.date_range( - start=datetime(2015,10,26), - end=datetime(2016,1,1), - freq='50s' - ) # ~100k long +from .pandas_vb_common import setup # noqa - def time_series_constructor_no_data_datetime_index(self): - Series(data=None, index=self.dr) +class SeriesConstructor(object): -class series_constructor_dict_data_datetime_index(object): goal_time = 0.2 + params = [None, 'dict'] + param_names = ['data'] - def setup(self): - self.dr = pd.date_range( - start=datetime(2015, 10, 26), - end=datetime(2016, 1, 1), - freq='50s' - ) # ~100k long - self.data = {d: v for d, v in zip(self.dr, range(len(self.dr)))} + def setup(self, data): + self.idx = date_range(start=datetime(2015, 10, 26), + end=datetime(2016, 1, 1), + freq='50s') + dict_data = dict(zip(self.idx, range(len(self.idx)))) + self.data = None if data is None else dict_data + + def time_constructor(self, data): + Series(data=self.data, index=self.idx) - def time_series_constructor_no_data_datetime_index(self): - Series(data=self.data, index=self.dr) +class IsIn(object): -class series_isin_int64(object): goal_time = 0.2 + params = ['int64', 'object'] + param_names = ['dtype'] - def setup(self): - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.s4 = Series(np.random.randint(1, 100, 10000000)).astype('int64') + def setup(self, dtype): + self.s = Series(np.random.randint(1, 10, 100000)).astype(dtype) self.values = [1, 2] - def time_series_isin_int64(self): - self.s3.isin(self.values) + def time_isin(self, dtypes): + self.s.isin(self.values) - def time_series_isin_int64_large(self): - self.s4.isin(self.values) +class NSort(object): -class series_isin_object(object): goal_time = 0.2 + params = ['last', 'first'] + param_names = ['keep'] - def setup(self): - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.values = [1, 2] - self.s4 = self.s3.astype('object') + def setup(self, keep): + self.s = Series(np.random.randint(1, 10, 100000)) - def time_series_isin_object(self): - self.s4.isin(self.values) + def time_nlargest(self, keep): + self.s.nlargest(3, keep=keep) + def time_nsmallest(self, keep): + self.s.nsmallest(3, keep=keep) -class series_nlargest1(object): - goal_time = 0.2 - def setup(self): - self.s1 = Series(np.random.randn(10000)) - self.s2 = Series(np.random.randint(1, 10, 10000)) - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.values = [1, 2] - self.s4 = self.s3.astype('object') +class Dropna(object): + + goal_time = 0.2 + params = ['int', 'datetime'] + param_names = ['dtype'] + + def setup(self, dtype): + N = 10**6 + data = {'int': np.random.randint(1, 10, N), + 'datetime': date_range('2000-01-01', freq='S', periods=N)} + self.s = Series(data[dtype]) + if dtype == 'datetime': + self.s[np.random.randint(1, N, 100)] = NaT + + def time_dropna(self, dtype): + self.s.dropna() - def time_series_nlargest1(self): - self.s1.nlargest(3, take_last=True) - self.s1.nlargest(3, take_last=False) +class Map(object): -class series_nlargest2(object): goal_time = 0.2 + params = ['dict', 'Series'] + param_names = 'mapper' - def setup(self): - self.s1 = Series(np.random.randn(10000)) - self.s2 = Series(np.random.randint(1, 10, 10000)) - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.values = [1, 2] - self.s4 = self.s3.astype('object') + def setup(self, mapper): + map_size = 1000 + map_data = Series(map_size - np.arange(map_size)) + self.map_data = map_data if mapper == 'Series' else map_data.to_dict() + self.s = Series(np.random.randint(0, map_size, 10000)) + + def time_map(self, mapper): + self.s.map(self.map_data) - def time_series_nlargest2(self): - self.s2.nlargest(3, take_last=True) - self.s2.nlargest(3, take_last=False) +class Clip(object): -class series_nsmallest2(object): goal_time = 0.2 def setup(self): - self.s1 = Series(np.random.randn(10000)) - self.s2 = Series(np.random.randint(1, 10, 10000)) - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.values = [1, 2] - self.s4 = self.s3.astype('object') + self.s = Series(np.random.randn(50)) - def time_series_nsmallest2(self): - self.s2.nsmallest(3, take_last=True) - self.s2.nsmallest(3, take_last=False) + def time_clip(self): + self.s.clip(0, 1) -class series_dropna_int64(object): +class ValueCounts(object): + goal_time = 0.2 + params = ['int', 'float', 'object'] + param_names = ['dtype'] - def setup(self): - self.s = Series(np.random.randint(1, 10, 1000000)) + def setup(self, dtype): + self.s = Series(np.random.randint(0, 1000, size=100000)).astype(dtype) - def time_series_dropna_int64(self): - self.s.dropna() + def time_value_counts(self, dtype): + self.s.value_counts() + + +class Dir(object): -class series_dropna_datetime(object): goal_time = 0.2 def setup(self): - self.s = Series(pd.date_range('2000-01-01', freq='S', periods=1000000)) - self.s[np.random.randint(1, 1000000, 100)] = pd.NaT + self.s = Series(index=tm.makeStringIndex(10000)) - def time_series_dropna_datetime(self): - self.s.dropna() + def time_dir_strings(self): + dir(self.s) diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index 717fe7218ceda..dcb7694abc2ad 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -1,142 +1,162 @@ -from .pandas_vb_common import * -import pandas.sparse.series +import itertools + +import numpy as np import scipy.sparse -from pandas.core.sparse import SparseSeries, SparseDataFrame -from pandas.core.sparse import SparseDataFrame +from pandas import (SparseSeries, SparseDataFrame, SparseArray, Series, + date_range, MultiIndex) + +from .pandas_vb_common import setup # noqa + + +def make_array(size, dense_proportion, fill_value, dtype): + dense_size = int(size * dense_proportion) + arr = np.full(size, fill_value, dtype) + indexer = np.random.choice(np.arange(size), dense_size, replace=False) + arr[indexer] = np.random.choice(np.arange(100, dtype=dtype), dense_size) + return arr -class sparse_series_to_frame(object): +class SparseSeriesToFrame(object): + goal_time = 0.2 def setup(self): - self.K = 50 - self.N = 50000 - self.rng = np.asarray(date_range('1/1/2000', periods=self.N, freq='T')) + K = 50 + N = 50001 + rng = date_range('1/1/2000', periods=N, freq='T') self.series = {} - for i in range(1, (self.K + 1)): - self.data = np.random.randn(self.N)[:(- i)] - self.this_rng = self.rng[:(- i)] - self.data[100:] = np.nan - self.series[i] = SparseSeries(self.data, index=self.this_rng) + for i in range(1, K): + data = np.random.randn(N)[:-i] + idx = rng[:-i] + data[100:] = np.nan + self.series[i] = SparseSeries(data, index=idx) - def time_sparse_series_to_frame(self): + def time_series_to_frame(self): SparseDataFrame(self.series) -class sparse_frame_constructor(object): - goal_time = 0.2 - - def time_sparse_frame_constructor(self): - SparseDataFrame(columns=np.arange(100), index=np.arange(1000)) +class SparseArrayConstructor(object): - -class sparse_series_from_coo(object): goal_time = 0.2 + params = ([0.1, 0.01], [0, np.nan], + [np.int64, np.float64, np.object]) + param_names = ['dense_proportion', 'fill_value', 'dtype'] - def setup(self): - self.A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100)) + def setup(self, dense_proportion, fill_value, dtype): + N = 10**6 + self.array = make_array(N, dense_proportion, fill_value, dtype) - def time_sparse_series_from_coo(self): - self.ss = pandas.sparse.series.SparseSeries.from_coo(self.A) + def time_sparse_array(self, dense_proportion, fill_value, dtype): + SparseArray(self.array, fill_value=fill_value, dtype=dtype) -class sparse_series_to_coo(object): +class SparseDataFrameConstructor(object): + goal_time = 0.2 def setup(self): - self.s = pd.Series(([np.nan] * 10000)) - self.s[0] = 3.0 - self.s[100] = (-1.0) - self.s[999] = 12.1 - self.s.index = pd.MultiIndex.from_product((range(10), range(10), range(10), range(10))) - self.ss = self.s.to_sparse() + N = 1000 + self.arr = np.arange(N) + self.sparse = scipy.sparse.rand(N, N, 0.005) + self.dict = dict(zip(range(N), itertools.repeat([0]))) - def time_sparse_series_to_coo(self): - self.ss.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True) + def time_constructor(self): + SparseDataFrame(columns=self.arr, index=self.arr) + def time_from_scipy(self): + SparseDataFrame(self.sparse) -class sparse_arithmetic_int(object): - goal_time = 0.2 + def time_from_dict(self): + SparseDataFrame(self.dict) - def setup(self): - np.random.seed(1) - self.a_10percent = self.make_sparse_array(length=1000000, dense_size=100000, fill_value=np.nan) - self.b_10percent = self.make_sparse_array(length=1000000, dense_size=100000, fill_value=np.nan) - - self.a_10percent_zero = self.make_sparse_array(length=1000000, dense_size=100000, fill_value=0) - self.b_10percent_zero = self.make_sparse_array(length=1000000, dense_size=100000, fill_value=0) - - self.a_1percent = self.make_sparse_array(length=1000000, dense_size=10000, fill_value=np.nan) - self.b_1percent = self.make_sparse_array(length=1000000, dense_size=10000, fill_value=np.nan) - def make_sparse_array(self, length, dense_size, fill_value): - arr = np.array([fill_value] * length, dtype=np.float64) - indexer = np.unique(np.random.randint(0, length, dense_size)) - arr[indexer] = np.random.randint(0, 100, len(indexer)) - return pd.SparseArray(arr, fill_value=fill_value) +class FromCoo(object): - def time_sparse_make_union(self): - self.a_10percent.sp_index.make_union(self.b_10percent.sp_index) + goal_time = 0.2 - def time_sparse_intersect(self): - self.a_10percent.sp_index.intersect(self.b_10percent.sp_index) + def setup(self): + self.matrix = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], + ([1, 0, 0], [0, 2, 3])), + shape=(100, 100)) - def time_sparse_addition_10percent(self): - self.a_10percent + self.b_10percent + def time_sparse_series_from_coo(self): + SparseSeries.from_coo(self.matrix) - def time_sparse_addition_10percent_zero(self): - self.a_10percent_zero + self.b_10percent_zero - def time_sparse_addition_1percent(self): - self.a_1percent + self.b_1percent +class ToCoo(object): - def time_sparse_division_10percent(self): - self.a_10percent / self.b_10percent + goal_time = 0.2 - def time_sparse_division_10percent_zero(self): - self.a_10percent_zero / self.b_10percent_zero + def setup(self): + s = Series([np.nan] * 10000) + s[0] = 3.0 + s[100] = -1.0 + s[999] = 12.1 + s.index = MultiIndex.from_product([range(10)] * 4) + self.ss = s.to_sparse() - def time_sparse_division_1percent(self): - self.a_1percent / self.b_1percent + def time_sparse_series_to_coo(self): + self.ss.to_coo(row_levels=[0, 1], + column_levels=[2, 3], + sort_labels=True) +class Arithmetic(object): -class sparse_arithmetic_block(object): goal_time = 0.2 + params = ([0.1, 0.01], [0, np.nan]) + param_names = ['dense_proportion', 'fill_value'] - def setup(self): - np.random.seed(1) - self.a = self.make_sparse_array(length=1000000, num_blocks=1000, - block_size=10, fill_value=np.nan) - self.b = self.make_sparse_array(length=1000000, num_blocks=1000, - block_size=10, fill_value=np.nan) - - self.a_zero = self.make_sparse_array(length=1000000, num_blocks=1000, - block_size=10, fill_value=0) - self.b_zero = self.make_sparse_array(length=1000000, num_blocks=1000, - block_size=10, fill_value=np.nan) + def setup(self, dense_proportion, fill_value): + N = 10**6 + arr1 = make_array(N, dense_proportion, fill_value, np.int64) + self.array1 = SparseArray(arr1, fill_value=fill_value) + arr2 = make_array(N, dense_proportion, fill_value, np.int64) + self.array2 = SparseArray(arr2, fill_value=fill_value) - def make_sparse_array(self, length, num_blocks, block_size, fill_value): - a = np.array([fill_value] * length) - for block in range(num_blocks): - i = np.random.randint(0, length) - a[i:i + block_size] = np.random.randint(0, 100, len(a[i:i + block_size])) - return pd.SparseArray(a, fill_value=fill_value) + def time_make_union(self, dense_proportion, fill_value): + self.array1.sp_index.make_union(self.array2.sp_index) - def time_sparse_make_union(self): - self.a.sp_index.make_union(self.b.sp_index) + def time_intersect(self, dense_proportion, fill_value): + self.array1.sp_index.intersect(self.array2.sp_index) - def time_sparse_intersect(self): - self.a.sp_index.intersect(self.b.sp_index) + def time_add(self, dense_proportion, fill_value): + self.array1 + self.array2 - def time_sparse_addition(self): - self.a + self.b + def time_divide(self, dense_proportion, fill_value): + self.array1 / self.array2 - def time_sparse_addition_zero(self): - self.a_zero + self.b_zero - def time_sparse_division(self): - self.a / self.b +class ArithmeticBlock(object): - def time_sparse_division_zero(self): - self.a_zero / self.b_zero + goal_time = 0.2 + params = [np.nan, 0] + param_names = ['fill_value'] + + def setup(self, fill_value): + N = 10**6 + self.arr1 = self.make_block_array(length=N, num_blocks=1000, + block_size=10, fill_value=fill_value) + self.arr2 = self.make_block_array(length=N, num_blocks=1000, + block_size=10, fill_value=fill_value) + + def make_block_array(self, length, num_blocks, block_size, fill_value): + arr = np.full(length, fill_value) + indicies = np.random.choice(np.arange(0, length, block_size), + num_blocks, + replace=False) + for ind in indicies: + arr[ind:ind + block_size] = np.random.randint(0, 100, block_size) + return SparseArray(arr, fill_value=fill_value) + + def time_make_union(self, fill_value): + self.arr1.sp_index.make_union(self.arr2.sp_index) + + def time_intersect(self, fill_value): + self.arr2.sp_index.intersect(self.arr2.sp_index) + + def time_addition(self, fill_value): + self.arr1 + self.arr2 + + def time_division(self, fill_value): + self.arr1 / self.arr2 diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 12fbb2478c2a5..c447c78d0d070 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -1,261 +1,114 @@ -from .pandas_vb_common import * +import numpy as np +import pandas as pd +from .pandas_vb_common import setup # noqa -class stat_ops_frame_mean_float_axis_0(object): - goal_time = 0.2 - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) +ops = ['mean', 'sum', 'median', 'std', 'skew', 'kurt', 'mad', 'prod', 'sem', + 'var'] - def time_stat_ops_frame_mean_float_axis_0(self): - self.df.mean() +class FrameOps(object): -class stat_ops_frame_mean_float_axis_1(object): goal_time = 0.2 + params = [ops, ['float', 'int'], [0, 1], [True, False]] + param_names = ['op', 'dtype', 'axis', 'use_bottleneck'] - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_float_axis_1(self): - self.df.mean(1) - - -class stat_ops_frame_mean_int_axis_0(object): - goal_time = 0.2 + def setup(self, op, dtype, axis, use_bottleneck): + df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype) + try: + pd.options.compute.use_bottleneck = use_bottleneck + except: + from pandas.core import nanops + nanops._USE_BOTTLENECK = use_bottleneck + self.df_func = getattr(df, op) - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) + def time_op(self, op, dtype, axis, use_bottleneck): + self.df_func(axis=axis) - def time_stat_ops_frame_mean_int_axis_0(self): - self.dfi.mean() +class FrameMultiIndexOps(object): -class stat_ops_frame_mean_int_axis_1(object): goal_time = 0.2 + params = ([0, 1, [0, 1]], ops) + param_names = ['level', 'op'] - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) + def setup(self, level, op): + levels = [np.arange(10), np.arange(100), np.arange(100)] + labels = [np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10)] + index = pd.MultiIndex(levels=levels, labels=labels) + df = pd.DataFrame(np.random.randn(len(index), 4), index=index) + self.df_func = getattr(df, op) - def time_stat_ops_frame_mean_int_axis_1(self): - self.dfi.mean(1) + def time_op(self, level, op): + self.df_func(level=level) -class stat_ops_frame_sum_float_axis_0(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_sum_float_axis_0(self): - self.df.sum() +class SeriesOps(object): - -class stat_ops_frame_sum_float_axis_1(object): goal_time = 0.2 + params = [ops, ['float', 'int'], [True, False]] + param_names = ['op', 'dtype', 'use_bottleneck'] - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) + def setup(self, op, dtype, use_bottleneck): + s = pd.Series(np.random.randn(100000)).astype(dtype) + try: + pd.options.compute.use_bottleneck = use_bottleneck + except: + from pandas.core import nanops + nanops._USE_BOTTLENECK = use_bottleneck + self.s_func = getattr(s, op) - def time_stat_ops_frame_sum_float_axis_1(self): - self.df.sum(1) + def time_op(self, op, dtype, use_bottleneck): + self.s_func() -class stat_ops_frame_sum_int_axis_0(object): - goal_time = 0.2 +class SeriesMultiIndexOps(object): - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_sum_int_axis_0(self): - self.dfi.sum() - - -class stat_ops_frame_sum_int_axis_1(object): goal_time = 0.2 + params = ([0, 1, [0, 1]], ops) + param_names = ['level', 'op'] - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) + def setup(self, level, op): + levels = [np.arange(10), np.arange(100), np.arange(100)] + labels = [np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10)] + index = pd.MultiIndex(levels=levels, labels=labels) + s = pd.Series(np.random.randn(len(index)), index=index) + self.s_func = getattr(s, op) - def time_stat_ops_frame_sum_int_axis_1(self): - self.dfi.sum(1) + def time_op(self, level, op): + self.s_func(level=level) -class stat_ops_level_frame_sum(object): - goal_time = 0.2 - - def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) +class Rank(object): - def time_stat_ops_level_frame_sum(self): - self.df.sum(level=1) - - -class stat_ops_level_frame_sum_multiple(object): goal_time = 0.2 + params = [['DataFrame', 'Series'], [True, False]] + param_names = ['constructor', 'pct'] - def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) + def setup(self, constructor, pct): + values = np.random.randn(10**5) + self.data = getattr(pd, constructor)(values) - def time_stat_ops_level_frame_sum_multiple(self): - self.df.sum(level=[0, 1]) + def time_rank(self, constructor, pct): + self.data.rank(pct=pct) + def time_average_old(self, constructor, pct): + self.data.rank(pct=pct) / len(self.data) -class stat_ops_level_series_sum(object): - goal_time = 0.2 - - def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) - def time_stat_ops_level_series_sum(self): - self.df[1].sum(level=1) +class Correlation(object): - -class stat_ops_level_series_sum_multiple(object): - goal_time = 0.2 - - def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) - random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) - - def time_stat_ops_level_series_sum_multiple(self): - self.df[1].sum(level=[0, 1]) - - -class stat_ops_series_std(object): - goal_time = 0.2 - - def setup(self): - self.s = Series(np.random.randn(100000), index=np.arange(100000)) - self.s[::2] = np.nan - - def time_stat_ops_series_std(self): - self.s.std() - - -class stats_corr_spearman(object): goal_time = 0.2 + params = ['spearman', 'kendall', 'pearson'] + param_names = ['method'] - def setup(self): - self.df = DataFrame(np.random.randn(1000, 30)) - - def time_stats_corr_spearman(self): - self.df.corr(method='spearman') - - -class stats_rank2d_axis0_average(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(5000, 50)) - - def time_stats_rank2d_axis0_average(self): - self.df.rank() - - -class stats_rank2d_axis1_average(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(5000, 50)) - - def time_stats_rank2d_axis1_average(self): - self.df.rank(1) - - -class stats_rank_average(object): - goal_time = 0.2 - - def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) - self.s = Series(self.values) - - def time_stats_rank_average(self): - self.s.rank() - - -class stats_rank_average_int(object): - goal_time = 0.2 - - def setup(self): - self.values = np.random.randint(0, 100000, size=200000) - self.s = Series(self.values) - - def time_stats_rank_average_int(self): - self.s.rank() - - -class stats_rank_pct_average(object): - goal_time = 0.2 - - def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) - self.s = Series(self.values) - - def time_stats_rank_pct_average(self): - self.s.rank(pct=True) - - -class stats_rank_pct_average_old(object): - goal_time = 0.2 - - def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) - self.s = Series(self.values) - - def time_stats_rank_pct_average_old(self): - (self.s.rank() / len(self.s)) - - -class stats_rolling_mean(object): - goal_time = 0.2 - - def setup(self): - self.arr = np.random.randn(100000) - self.win = 100 - - def time_rolling_mean(self): - rolling_mean(self.arr, self.win) - - def time_rolling_median(self): - rolling_median(self.arr, self.win) - - def time_rolling_min(self): - rolling_min(self.arr, self.win) - - def time_rolling_max(self): - rolling_max(self.arr, self.win) - - def time_rolling_sum(self): - rolling_sum(self.arr, self.win) - - def time_rolling_std(self): - rolling_std(self.arr, self.win) - - def time_rolling_var(self): - rolling_var(self.arr, self.win) - - def time_rolling_skew(self): - rolling_skew(self.arr, self.win) + def setup(self, method): + self.df = pd.DataFrame(np.random.randn(1000, 30)) - def time_rolling_kurt(self): - rolling_kurt(self.arr, self.win) + def time_corr(self, method): + self.df.corr(method=method) diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index c1600d4e07f58..b203c8b0fa5c9 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -1,107 +1,147 @@ -from .pandas_vb_common import * -import string -import itertools as IT -import pandas.util.testing as testing +import warnings +import numpy as np +from pandas import Series +import pandas.util.testing as tm -class StringMethods(object): - goal_time = 0.2 - def make_series(self, letters, strlen, size): - return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) +class Methods(object): + + goal_time = 0.2 def setup(self): - self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) - self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) - self.s = self.make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|') + self.s = Series(tm.makeStringIndex(10**5)) def time_cat(self): - self.many.str.cat(sep=',') + self.s.str.cat(sep=',') def time_center(self): - self.many.str.center(100) - - def time_contains_few(self): - self.few.str.contains('matchthis') - - def time_contains_few_noregex(self): - self.few.str.contains('matchthis', regex=False) - - def time_contains_many(self): - self.many.str.contains('matchthis') - - def time_contains_many_noregex(self): - self.many.str.contains('matchthis', regex=False) + self.s.str.center(100) def time_count(self): - self.many.str.count('matchthis') + self.s.str.count('A') def time_endswith(self): - self.many.str.endswith('matchthis') + self.s.str.endswith('A') def time_extract(self): - self.many.str.extract('(\\w*)matchthis(\\w*)') + with warnings.catch_warnings(record=True): + self.s.str.extract('(\\w*)A(\\w*)') def time_findall(self): - self.many.str.findall('[A-Z]+') + self.s.str.findall('[A-Z]+') def time_get(self): - self.many.str.get(0) - - def time_join_split(self): - self.many.str.join('--').str.split('--') - - def time_join_split_expand(self): - self.many.str.join('--').str.split('--', expand=True) + self.s.str.get(0) def time_len(self): - self.many.str.len() + self.s.str.len() def time_match(self): - self.many.str.match('mat..this') + self.s.str.match('A') def time_pad(self): - self.many.str.pad(100, side='both') - - def time_repeat(self): - self.many.str.repeat(list(IT.islice(IT.cycle(range(1, 4)), len(self.many)))) + self.s.str.pad(100, side='both') def time_replace(self): - self.many.str.replace('(matchthis)', '\x01\x01') + self.s.str.replace('A', '\x01\x01') def time_slice(self): - self.many.str.slice(5, 15, 2) + self.s.str.slice(5, 15, 2) def time_startswith(self): - self.many.str.startswith('matchthis') + self.s.str.startswith('A') def time_strip(self): - self.many.str.strip('matchthis') + self.s.str.strip('A') def time_rstrip(self): - self.many.str.rstrip('matchthis') + self.s.str.rstrip('A') def time_lstrip(self): - self.many.str.lstrip('matchthis') + self.s.str.lstrip('A') def time_title(self): - self.many.str.title() + self.s.str.title() def time_upper(self): - self.many.str.upper() + self.s.str.upper() def time_lower(self): - self.many.str.lower() + self.s.str.lower() + + +class Repeat(object): + + goal_time = 0.2 + params = ['int', 'array'] + param_names = ['repeats'] + + def setup(self, repeats): + N = 10**5 + self.s = Series(tm.makeStringIndex(N)) + repeat = {'int': 1, 'array': np.random.randint(1, 3, N)} + self.repeat = repeat[repeats] + + def time_repeat(self, repeats): + self.s.str.repeat(self.repeat) + + +class Contains(object): + + goal_time = 0.2 + params = [True, False] + param_names = ['regex'] + + def setup(self, regex): + self.s = Series(tm.makeStringIndex(10**5)) + + def time_contains(self, regex): + self.s.str.contains('A', regex=regex) + + +class Split(object): + + goal_time = 0.2 + params = [True, False] + param_names = ['expand'] + + def setup(self, expand): + self.s = Series(tm.makeStringIndex(10**5)).str.join('--') + + def time_split(self, expand): + self.s.str.split('--', expand=expand) + + +class Dummies(object): + + goal_time = 0.2 + + def setup(self): + self.s = Series(tm.makeStringIndex(10**5)).str.join('|') def time_get_dummies(self): self.s.str.get_dummies('|') -class StringEncode(object): +class Encode(object): + goal_time = 0.2 def setup(self): - self.ser = Series(testing.makeUnicodeIndex()) + self.ser = Series(tm.makeUnicodeIndex()) def time_encode_decode(self): self.ser.str.encode('utf-8').str.decode('utf-8') + + +class Slice(object): + + goal_time = 0.2 + + def setup(self): + self.s = Series(['abcdefg', np.nan] * 500000) + + def time_vector_slice(self): + # GH 2602 + self.s.str[:5] diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index c112d1ef72eb8..3fe75b3c34299 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,42 +1,129 @@ -from .pandas_vb_common import * -from pandas import to_timedelta, Timestamp +import datetime +import numpy as np +from pandas import Series, timedelta_range, to_timedelta, Timestamp, Timedelta + + +class TimedeltaConstructor(object): -class ToTimedelta(object): goal_time = 0.2 - def setup(self): - self.arr = np.random.randint(0, 1000, size=10000) - self.arr2 = ['{0} days'.format(i) for i in self.arr] + def time_from_int(self): + Timedelta(123456789) + + def time_from_unit(self): + Timedelta(1, unit='d') + + def time_from_components(self): + Timedelta(days=1, hours=2, minutes=3, seconds=4, milliseconds=5, + microseconds=6, nanoseconds=7) + + def time_from_datetime_timedelta(self): + Timedelta(datetime.timedelta(days=1, seconds=1)) + + def time_from_np_timedelta(self): + Timedelta(np.timedelta64(1, 'ms')) + + def time_from_string(self): + Timedelta('1 days') + + def time_from_iso_format(self): + Timedelta('P4DT12H30M5S') - self.arr3 = np.random.randint(0, 60, size=10000) - self.arr3 = ['00:00:{0:02d}'.format(i) for i in self.arr3] + def time_from_missing(self): + Timedelta('nat') - self.arr4 = list(self.arr2) - self.arr4[-1] = 'apple' + +class ToTimedelta(object): + + goal_time = 0.2 + + def setup(self): + self.ints = np.random.randint(0, 60, size=10000) + self.str_days = [] + self.str_seconds = [] + for i in self.ints: + self.str_days.append('{0} days'.format(i)) + self.str_seconds.append('00:00:{0:02d}'.format(i)) def time_convert_int(self): - to_timedelta(self.arr, unit='s') + to_timedelta(self.ints, unit='s') - def time_convert_string(self): - to_timedelta(self.arr2) + def time_convert_string_days(self): + to_timedelta(self.str_days) def time_convert_string_seconds(self): - to_timedelta(self.arr3) + to_timedelta(self.str_seconds) + + +class ToTimedeltaErrors(object): + + goal_time = 0.2 + params = ['coerce', 'ignore'] + param_names = ['errors'] - def time_convert_coerce(self): - to_timedelta(self.arr4, errors='coerce') + def setup(self, errors): + ints = np.random.randint(0, 60, size=10000) + self.arr = ['{0} days'.format(i) for i in ints] + self.arr[-1] = 'apple' - def time_convert_ignore(self): - to_timedelta(self.arr4, errors='ignore') + def time_convert(self, errors): + to_timedelta(self.arr, errors=errors) -class Ops(object): +class TimedeltaOps(object): + goal_time = 0.2 def setup(self): self.td = to_timedelta(np.arange(1000000)) self.ts = Timestamp('2000') - def test_add_td_ts(self): + def time_add_td_ts(self): self.td + self.ts + + +class TimedeltaProperties(object): + + goal_time = 0.2 + + def setup_cache(self): + td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35) + return td + + def time_timedelta_days(self, td): + td.days + + def time_timedelta_seconds(self, td): + td.seconds + + def time_timedelta_microseconds(self, td): + td.microseconds + + def time_timedelta_nanoseconds(self, td): + td.nanoseconds + + +class DatetimeAccessor(object): + + goal_time = 0.2 + + def setup_cache(self): + N = 100000 + series = Series(timedelta_range('1 days', periods=N, freq='h')) + return series + + def time_dt_accessor(self, series): + series.dt + + def time_timedelta_days(self, series): + series.dt.days + + def time_timedelta_seconds(self, series): + series.dt.seconds + + def time_timedelta_microseconds(self, series): + series.dt.microseconds + + def time_timedelta_nanoseconds(self, series): + series.dt.nanoseconds diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 6e9ef4b10273c..e1a6bc7a68e9d 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -1,349 +1,332 @@ -from pandas.tseries.converter import DatetimeConverter -from .pandas_vb_common import * -import pandas as pd +import warnings from datetime import timedelta -import datetime as dt + +import numpy as np +from pandas import to_datetime, date_range, Series, DataFrame, period_range +from pandas.tseries.frequencies import infer_freq try: - import pandas.tseries.holiday + from pandas.plotting._converter import DatetimeConverter except ImportError: - pass -from pandas.tseries.frequencies import infer_freq -import numpy as np + from pandas.tseries.converter import DatetimeConverter -if hasattr(Series, 'convert'): - Series.resample = Series.convert +from .pandas_vb_common import setup # noqa class DatetimeIndex(object): + goal_time = 0.2 + params = ['dst', 'repeated', 'tz_aware', 'tz_naive'] + param_names = ['index_type'] - def setup(self): - self.N = 100000 - self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') - self.delta_offset = pd.offsets.Day() - self.fast_offset = pd.offsets.DateOffset(months=2, days=2) - self.slow_offset = pd.offsets.BusinessDay() + def setup(self, index_type): + N = 100000 + dtidxes = {'dst': date_range(start='10/29/2000 1:00:00', + end='10/29/2000 1:59:59', freq='S'), + 'repeated': date_range(start='2000', + periods=N / 10, + freq='s').repeat(10), + 'tz_aware': date_range(start='2000', + periods=N, + freq='s', + tz='US/Eastern'), + 'tz_naive': date_range(start='2000', + periods=N, + freq='s')} + self.index = dtidxes[index_type] - self.rng2 = date_range(start='1/1/2000 9:30', periods=10000, freq='S', tz='US/Eastern') + def time_add_timedelta(self, index_type): + self.index + timedelta(minutes=2) - self.index_repeated = date_range(start='1/1/2000', periods=1000, freq='T').repeat(10) + def time_normalize(self, index_type): + self.index.normalize() - self.rng3 = date_range(start='1/1/2000', periods=1000, freq='H') - self.df = DataFrame(np.random.randn(len(self.rng3), 2), self.rng3) + def time_unique(self, index_type): + self.index.unique() - self.rng4 = date_range(start='1/1/2000', periods=1000, freq='H', tz='US/Eastern') - self.df2 = DataFrame(np.random.randn(len(self.rng4), 2), index=self.rng4) + def time_to_time(self, index_type): + self.index.time - N = 100000 - self.dti = pd.date_range('2011-01-01', freq='H', periods=N).repeat(5) - self.dti_tz = pd.date_range('2011-01-01', freq='H', periods=N, - tz='Asia/Tokyo').repeat(5) + def time_get(self, index_type): + self.index[0] - self.rng5 = date_range(start='1/1/2000', end='3/1/2000', tz='US/Eastern') + def time_timeseries_is_month_start(self, index_type): + self.index.is_month_start - self.dst_rng = date_range(start='10/29/2000 1:00:00', end='10/29/2000 1:59:59', freq='S') - self.index = date_range(start='10/29/2000', end='10/29/2000 00:59:59', freq='S') - self.index = self.index.append(self.dst_rng) - self.index = self.index.append(self.dst_rng) - self.index = self.index.append(date_range(start='10/29/2000 2:00:00', end='10/29/2000 3:00:00', freq='S')) + def time_to_date(self, index_type): + self.index.date - self.N = 10000 - self.rng6 = date_range(start='1/1/1', periods=self.N, freq='B') + def time_to_pydatetime(self, index_type): + self.index.to_pydatetime() - self.rng7 = date_range(start='1/1/1700', freq='D', periods=100000) - self.a = self.rng7[:50000].append(self.rng7[50002:]) - def time_add_timedelta(self): - (self.rng + timedelta(minutes=2)) +class TzLocalize(object): - def time_add_offset_delta(self): - (self.rng + self.delta_offset) + goal_time = 0.2 - def time_add_offset_fast(self): - (self.rng + self.fast_offset) + def setup(self): + dst_rng = date_range(start='10/29/2000 1:00:00', + end='10/29/2000 1:59:59', freq='S') + self.index = date_range(start='10/29/2000', + end='10/29/2000 00:59:59', freq='S') + self.index = self.index.append(dst_rng) + self.index = self.index.append(dst_rng) + self.index = self.index.append(date_range(start='10/29/2000 2:00:00', + end='10/29/2000 3:00:00', + freq='S')) + + def time_infer_dst(self): + with warnings.catch_warnings(record=True): + self.index.tz_localize('US/Eastern', infer_dst=True) - def time_add_offset_slow(self): - (self.rng + self.slow_offset) - def time_normalize(self): - self.rng2.normalize() +class ResetIndex(object): - def time_unique(self): - self.index_repeated.unique() + goal_time = 0.2 + params = [None, 'US/Eastern'] + param_names = 'tz' - def time_reset_index(self): + def setup(self, tz): + idx = date_range(start='1/1/2000', periods=1000, freq='H', tz=tz) + self.df = DataFrame(np.random.randn(1000, 2), index=idx) + + def time_reest_datetimeindex(self, tz): self.df.reset_index() - def time_reset_index_tz(self): - self.df2.reset_index() - def time_dti_factorize(self): +class Factorize(object): + + goal_time = 0.2 + params = [None, 'Asia/Tokyo'] + param_names = 'tz' + + def setup(self, tz): + N = 100000 + self.dti = date_range('2011-01-01', freq='H', periods=N, tz=tz) + self.dti = self.dti.repeat(5) + + def time_factorize(self, tz): self.dti.factorize() - def time_dti_tz_factorize(self): - self.dti_tz.factorize() - def time_timestamp_tzinfo_cons(self): - self.rng5[0] +class InferFreq(object): - def time_infer_dst(self): - self.index.tz_localize('US/Eastern', infer_dst=True) + goal_time = 0.2 + params = [None, 'D', 'B'] + param_names = ['freq'] - def time_timeseries_is_month_start(self): - self.rng6.is_month_start + def setup(self, freq): + if freq is None: + self.idx = date_range(start='1/1/1700', freq='D', periods=10000) + self.idx.freq = None + else: + self.idx = date_range(start='1/1/1700', freq=freq, periods=10000) - def time_infer_freq(self): - infer_freq(self.a) + def time_infer_freq(self, freq): + infer_freq(self.idx) class TimeDatetimeConverter(object): + goal_time = 0.2 def setup(self): - self.N = 100000 - self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') + N = 100000 + self.rng = date_range(start='1/1/2000', periods=N, freq='T') def time_convert(self): DatetimeConverter.convert(self.rng, None, None) class Iteration(object): - goal_time = 0.2 - - def setup(self): - self.N = 1000000 - self.M = 10000 - self.idx1 = date_range(start='20140101', freq='T', periods=self.N) - self.idx2 = period_range(start='20140101', freq='T', periods=self.N) - - def iter_n(self, iterable, n=None): - self.i = 0 - for _ in iterable: - self.i += 1 - if ((n is not None) and (self.i > n)): - break - - def time_iter_datetimeindex(self): - self.iter_n(self.idx1) - def time_iter_datetimeindex_preexit(self): - self.iter_n(self.idx1, self.M) + goal_time = 0.2 + params = [date_range, period_range] + param_names = ['time_index'] - def time_iter_periodindex(self): - self.iter_n(self.idx2) + def setup(self, time_index): + N = 10**6 + self.idx = time_index(start='20140101', freq='T', periods=N) + self.exit = 10000 - def time_iter_periodindex_preexit(self): - self.iter_n(self.idx2, self.M) + def time_iter(self, time_index): + for _ in self.idx: + pass + def time_iter_preexit(self, time_index): + for i, _ in enumerate(self.idx): + if i > self.exit: + break -#---------------------------------------------------------------------- -# Resampling class ResampleDataFrame(object): - goal_time = 0.2 - - def setup(self): - self.rng = date_range(start='20130101', periods=100000, freq='50L') - self.df = DataFrame(np.random.randn(100000, 2), index=self.rng) - def time_max_numpy(self): - self.df.resample('1s', how=np.max) - - def time_max_string(self): - self.df.resample('1s', how='max') - - def time_mean_numpy(self): - self.df.resample('1s', how=np.mean) - - def time_mean_string(self): - self.df.resample('1s', how='mean') + goal_time = 0.2 + params = ['max', 'mean', 'min'] + param_names = ['method'] - def time_min_numpy(self): - self.df.resample('1s', how=np.min) + def setup(self, method): + rng = date_range(start='20130101', periods=100000, freq='50L') + df = DataFrame(np.random.randn(100000, 2), index=rng) + self.resample = getattr(df.resample('1s'), method) - def time_min_string(self): - self.df.resample('1s', how='min') + def time_method(self, method): + self.resample() class ResampleSeries(object): + + goal_time = 0.2 + params = (['period', 'datetime'], ['5min', '1D'], ['mean', 'ohlc']) + param_names = ['index', 'freq', 'method'] + + def setup(self, index, freq, method): + indexes = {'period': period_range(start='1/1/2000', + end='1/1/2001', + freq='T'), + 'datetime': date_range(start='1/1/2000', + end='1/1/2001', + freq='T')} + idx = indexes[index] + ts = Series(np.random.randn(len(idx)), index=idx) + self.resample = getattr(ts.resample(freq), method) + + def time_resample(self, index, freq, method): + self.resample() + + +class ResampleDatetetime64(object): + # GH 7754 goal_time = 0.2 def setup(self): - self.rng1 = period_range(start='1/1/2000', end='1/1/2001', freq='T') - self.ts1 = Series(np.random.randn(len(self.rng1)), index=self.rng1) - - self.rng2 = date_range(start='1/1/2000', end='1/1/2001', freq='T') - self.ts2 = Series(np.random.randn(len(self.rng2)), index=self.rng2) + rng3 = date_range(start='2000-01-01 00:00:00', + end='2000-01-01 10:00:00', freq='555000U') + self.dt_ts = Series(5, rng3, dtype='datetime64[ns]') - self.rng3 = date_range(start='2000-01-01 00:00:00', end='2000-01-01 10:00:00', freq='555000U') - self.int_ts = Series(5, self.rng3, dtype='int64') - self.dt_ts = self.int_ts.astype('datetime64[ns]') - - def time_period_downsample_mean(self): - self.ts1.resample('D', how='mean') - - def time_timestamp_downsample_mean(self): - self.ts2.resample('D', how='mean') - - def time_resample_datetime64(self): - # GH 7754 - self.dt_ts.resample('1S', how='last') - - def time_1min_5min_mean(self): - self.ts2[:10000].resample('5min', how='mean') - - def time_1min_5min_ohlc(self): - self.ts2[:10000].resample('5min', how='ohlc') + def time_resample(self): + self.dt_ts.resample('1S').last() class AsOf(object): - goal_time = 0.2 - def setup(self): - self.N = 10000 - self.rng = date_range(start='1/1/1990', periods=self.N, freq='53s') - self.ts = Series(np.random.randn(self.N), index=self.rng) - self.dates = date_range(start='1/1/1990', periods=(self.N * 10), freq='5s') + goal_time = 0.2 + params = ['DataFrame', 'Series'] + param_names = ['constructor'] + + def setup(self, constructor): + N = 10000 + M = 10 + rng = date_range(start='1/1/1990', periods=N, freq='53s') + data = {'DataFrame': DataFrame(np.random.randn(N, M)), + 'Series': Series(np.random.randn(N))} + self.ts = data[constructor] + self.ts.index = rng self.ts2 = self.ts.copy() - self.ts2[250:5000] = np.nan + self.ts2.iloc[250:5000] = np.nan self.ts3 = self.ts.copy() - self.ts3[-5000:] = np.nan + self.ts3.iloc[-5000:] = np.nan + self.dates = date_range(start='1/1/1990', periods=N * 10, freq='5s') + self.date = self.dates[0] + self.date_last = self.dates[-1] + self.date_early = self.date - timedelta(10) # test speed of pre-computing NAs. - def time_asof(self): + def time_asof(self, constructor): self.ts.asof(self.dates) # should be roughly the same as above. - def time_asof_nan(self): + def time_asof_nan(self, constructor): self.ts2.asof(self.dates) # test speed of the code path for a scalar index # without *while* loop - def time_asof_single(self): - self.ts.asof(self.dates[0]) + def time_asof_single(self, constructor): + self.ts.asof(self.date) # test speed of the code path for a scalar index # before the start. should be the same as above. - def time_asof_single_early(self): - self.ts.asof(self.dates[0] - dt.timedelta(10)) + def time_asof_single_early(self, constructor): + self.ts.asof(self.date_early) # test the speed of the code path for a scalar index # with a long *while* loop. should still be much # faster than pre-computing all the NAs. - def time_asof_nan_single(self): - self.ts3.asof(self.dates[-1]) + def time_asof_nan_single(self, constructor): + self.ts3.asof(self.date_last) -class AsOfDataFrame(object): - goal_time = 0.2 - - def setup(self): - self.N = 10000 - self.M = 100 - self.rng = date_range(start='1/1/1990', periods=self.N, freq='53s') - self.dates = date_range(start='1/1/1990', periods=(self.N * 10), freq='5s') - self.ts = DataFrame(np.random.randn(self.N, self.M), index=self.rng) - self.ts2 = self.ts.copy() - self.ts2.iloc[250:5000] = np.nan - self.ts3 = self.ts.copy() - self.ts3.iloc[-5000:] = np.nan +class SortIndex(object): - # test speed of pre-computing NAs. - def time_asof(self): - self.ts.asof(self.dates) + goal_time = 0.2 + params = [True, False] + param_names = ['monotonic'] - # should be roughly the same as above. - def time_asof_nan(self): - self.ts2.asof(self.dates) + def setup(self, monotonic): + N = 10**5 + idx = date_range(start='1/1/2000', periods=N, freq='s') + self.s = Series(np.random.randn(N), index=idx) + if not monotonic: + self.s = self.s.sample(frac=1) - # test speed of the code path for a scalar index - # with pre-computing all NAs. - def time_asof_single(self): - self.ts.asof(self.dates[0]) + def time_sort_index(self, monotonic): + self.s.sort_index() - # should be roughly the same as above. - def time_asof_nan_single(self): - self.ts3.asof(self.dates[-1]) + def time_get_slice(self, monotonic): + self.s[:10000] - # test speed of the code path for a scalar index - # before the start. should be without the cost of - # pre-computing all the NAs. - def time_asof_single_early(self): - self.ts.asof(self.dates[0] - dt.timedelta(10)) +class IrregularOps(object): -class TimeSeries(object): goal_time = 0.2 def setup(self): - self.N = 100000 - self.rng = date_range(start='1/1/2000', periods=self.N, freq='s') - self.rng = self.rng.take(np.random.permutation(self.N)) - self.ts = Series(np.random.randn(self.N), index=self.rng) + N = 10**5 + idx = date_range(start='1/1/2000', periods=N, freq='s') + s = Series(np.random.randn(N), index=idx) + self.left = s.sample(frac=1) + self.right = s.sample(frac=1) - self.rng2 = date_range(start='1/1/2000', periods=self.N, freq='T') - self.ts2 = Series(np.random.randn(self.N), index=self.rng2) + def time_add(self): + self.left + self.right - self.lindex = np.random.permutation(self.N)[:(self.N // 2)] - self.rindex = np.random.permutation(self.N)[:(self.N // 2)] - self.left = Series(self.ts2.values.take(self.lindex), index=self.ts2.index.take(self.lindex)) - self.right = Series(self.ts2.values.take(self.rindex), index=self.ts2.index.take(self.rindex)) - self.rng3 = date_range(start='1/1/2000', periods=1500000, freq='S') - self.ts3 = Series(1, index=self.rng3) +class Lookup(object): - def time_sort_index(self): - self.ts.sort_index() + goal_time = 0.2 - def time_timeseries_slice_minutely(self): - self.ts2[:10000] + def setup(self): + N = 1500000 + rng = date_range(start='1/1/2000', periods=N, freq='S') + self.ts = Series(1, index=rng) + self.lookup_val = rng[N // 2] - def time_add_irregular(self): - (self.left + self.right) + def time_lookup_and_cleanup(self): + self.ts[self.lookup_val] + self.ts.index._cleanup() - def time_large_lookup_value(self): - self.ts3[self.ts3.index[(len(self.ts3) // 2)]] - self.ts3.index._cleanup() +class ToDatetimeYYYYMMDD(object): -class SeriesArithmetic(object): goal_time = 0.2 def setup(self): - self.N = 100000 - self.s = Series(date_range(start='20140101', freq='T', periods=self.N)) - self.delta_offset = pd.offsets.Day() - self.fast_offset = pd.offsets.DateOffset(months=2, days=2) - self.slow_offset = pd.offsets.BusinessDay() + rng = date_range(start='1/1/2000', periods=10000, freq='D') + self.stringsD = Series(rng.strftime('%Y%m%d')) - def time_add_offset_delta(self): - (self.s + self.delta_offset) - - def time_add_offset_fast(self): - (self.s + self.fast_offset) + def time_format_YYYYMMDD(self): + to_datetime(self.stringsD, format='%Y%m%d') - def time_add_offset_slow(self): - (self.s + self.slow_offset) +class ToDatetimeISO8601(object): -class ToDatetime(object): goal_time = 0.2 def setup(self): - self.rng = date_range(start='1/1/2000', periods=10000, freq='D') - self.stringsD = Series((((self.rng.year * 10000) + (self.rng.month * 100)) + self.rng.day), dtype=np.int64).apply(str) - - self.rng = date_range(start='1/1/2000', periods=20000, freq='H') - self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng] - self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng] + rng = date_range(start='1/1/2000', periods=20000, freq='H') + self.strings = rng.strftime('%Y-%m-%d %H:%M:%S').tolist() + self.strings_nosep = rng.strftime('%Y%m%d %H:%M:%S').tolist() self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800' - for x in self.rng] - - self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000)) - self.s2 = self.s.str.replace(':\\S+$', '') - - def time_format_YYYYMMDD(self): - to_datetime(self.stringsD, format='%Y%m%d') + for x in rng] def time_iso8601(self): to_datetime(self.strings) @@ -360,138 +343,59 @@ def time_iso8601_format_no_sep(self): def time_iso8601_tz_spaceformat(self): to_datetime(self.strings_tz_space) - def time_format_exact(self): - to_datetime(self.s2, format='%d%b%y') - - def time_format_no_exact(self): - to_datetime(self.s, format='%d%b%y', exact=False) +class ToDatetimeFormat(object): -class Offsets(object): goal_time = 0.2 def setup(self): - self.date = dt.datetime(2011, 1, 1) - self.dt64 = np.datetime64('2011-01-01 09:00Z') - self.hcal = pd.tseries.holiday.USFederalHolidayCalendar() - self.day = pd.offsets.Day() - self.year = pd.offsets.YearBegin() - self.cday = pd.offsets.CustomBusinessDay() - self.cmb = pd.offsets.CustomBusinessMonthBegin(calendar=self.hcal) - self.cme = pd.offsets.CustomBusinessMonthEnd(calendar=self.hcal) - self.cdayh = pd.offsets.CustomBusinessDay(calendar=self.hcal) - - def time_timeseries_day_apply(self): - self.day.apply(self.date) - - def time_timeseries_day_incr(self): - (self.date + self.day) - - def time_timeseries_year_apply(self): - self.year.apply(self.date) - - def time_timeseries_year_incr(self): - (self.date + self.year) - - # custom business offsets - - def time_custom_bday_decr(self): - (self.date - self.cday) - - def time_custom_bday_incr(self): - (self.date + self.cday) - - def time_custom_bday_apply(self): - self.cday.apply(self.date) - - def time_custom_bday_apply_dt64(self): - self.cday.apply(self.dt64) - - def time_custom_bday_cal_incr(self): - self.date + 1 * self.cdayh - - def time_custom_bday_cal_decr(self): - self.date - 1 * self.cdayh - - def time_custom_bday_cal_incr_n(self): - self.date + 10 * self.cdayh - - def time_custom_bday_cal_incr_neg_n(self): - self.date - 10 * self.cdayh - - # Increment custom business month - - def time_custom_bmonthend_incr(self): - (self.date + self.cme) - - def time_custom_bmonthend_incr_n(self): - (self.date + (10 * self.cme)) + self.s = Series(['19MAY11', '19MAY11:00:00:00'] * 100000) + self.s2 = self.s.str.replace(':\\S+$', '') - def time_custom_bmonthend_decr_n(self): - (self.date - (10 * self.cme)) + def time_exact(self): + to_datetime(self.s2, format='%d%b%y') - def time_custom_bmonthbegin_decr_n(self): - (self.date - (10 * self.cmb)) + def time_no_exact(self): + to_datetime(self.s, format='%d%b%y', exact=False) - def time_custom_bmonthbegin_incr_n(self): - (self.date + (10 * self.cmb)) +class ToDatetimeCache(object): -class SemiMonthOffset(object): goal_time = 0.2 + params = [True, False] + param_names = ['cache'] - def setup(self): - self.N = 100000 - self.rng = date_range(start='1/1/2000', periods=self.N, freq='T') - # date is not on an offset which will be slowest case - self.date = dt.datetime(2011, 1, 2) - self.semi_month_end = pd.offsets.SemiMonthEnd() - self.semi_month_begin = pd.offsets.SemiMonthBegin() - - def time_end_apply(self): - self.semi_month_end.apply(self.date) - - def time_end_incr(self): - self.date + self.semi_month_end + def setup(self, cache): + N = 10000 + self.unique_numeric_seconds = list(range(N)) + self.dup_numeric_seconds = [1000] * N + self.dup_string_dates = ['2000-02-11'] * N + self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * N - def time_end_incr_n(self): - self.date + 10 * self.semi_month_end + def time_unique_seconds_and_unit(self, cache): + to_datetime(self.unique_numeric_seconds, unit='s', cache=cache) - def time_end_decr(self): - self.date - self.semi_month_end + def time_dup_seconds_and_unit(self, cache): + to_datetime(self.dup_numeric_seconds, unit='s', cache=cache) - def time_end_decr_n(self): - self.date - 10 * self.semi_month_end + def time_dup_string_dates(self, cache): + to_datetime(self.dup_string_dates, cache=cache) - def time_end_apply_index(self): - self.semi_month_end.apply_index(self.rng) + def time_dup_string_dates_and_format(self, cache): + to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=cache) - def time_end_incr_rng(self): - self.rng + self.semi_month_end + def time_dup_string_tzoffset_dates(self, cache): + to_datetime(self.dup_string_with_tz, cache=cache) - def time_end_decr_rng(self): - self.rng - self.semi_month_end - def time_begin_apply(self): - self.semi_month_begin.apply(self.date) +class DatetimeAccessor(object): - def time_begin_incr(self): - self.date + self.semi_month_begin - - def time_begin_incr_n(self): - self.date + 10 * self.semi_month_begin - - def time_begin_decr(self): - self.date - self.semi_month_begin - - def time_begin_decr_n(self): - self.date - 10 * self.semi_month_begin - - def time_begin_apply_index(self): - self.semi_month_begin.apply_index(self.rng) + def setup(self): + N = 100000 + self.series = Series(date_range(start='1/1/2000', periods=N, freq='T')) - def time_begin_incr_rng(self): - self.rng + self.semi_month_begin + def time_dt_accessor(self): + self.series.dt - def time_begin_decr_rng(self): - self.rng - self.semi_month_begin + def time_dt_accessor_normalize(self): + self.series.dt.normalize() diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py new file mode 100644 index 0000000000000..c142a9b59fc43 --- /dev/null +++ b/asv_bench/benchmarks/timestamp.py @@ -0,0 +1,119 @@ +import datetime + +from pandas import Timestamp +import pytz + + +class TimestampConstruction(object): + + def time_parse_iso8601_no_tz(self): + Timestamp('2017-08-25 08:16:14') + + def time_parse_iso8601_tz(self): + Timestamp('2017-08-25 08:16:14-0500') + + def time_parse_dateutil(self): + Timestamp('2017/08/25 08:16:14 AM') + + def time_parse_today(self): + Timestamp('today') + + def time_parse_now(self): + Timestamp('now') + + def time_fromordinal(self): + Timestamp.fromordinal(730120) + + def time_fromtimestamp(self): + Timestamp.fromtimestamp(1515448538) + + +class TimestampProperties(object): + goal_time = 0.2 + + _tzs = [None, pytz.timezone('Europe/Amsterdam')] + _freqs = [None, 'B'] + params = [_tzs, _freqs] + param_names = ['tz', 'freq'] + + def setup(self, tz, freq): + self.ts = Timestamp('2017-08-25 08:16:14', tzinfo=tz, freq=freq) + + def time_tz(self, tz, freq): + self.ts.tz + + def time_dayofweek(self, tz, freq): + self.ts.dayofweek + + def time_weekday_name(self, tz, freq): + self.ts.weekday_name + + def time_dayofyear(self, tz, freq): + self.ts.dayofyear + + def time_week(self, tz, freq): + self.ts.week + + def time_quarter(self, tz, freq): + self.ts.quarter + + def time_days_in_month(self, tz, freq): + self.ts.days_in_month + + def time_freqstr(self, tz, freq): + self.ts.freqstr + + def time_is_month_start(self, tz, freq): + self.ts.is_month_start + + def time_is_month_end(self, tz, freq): + self.ts.is_month_end + + def time_is_quarter_start(self, tz, freq): + self.ts.is_quarter_start + + def time_is_quarter_end(self, tz, freq): + self.ts.is_quarter_end + + def time_is_year_start(self, tz, freq): + self.ts.is_quarter_end + + def time_is_year_end(self, tz, freq): + self.ts.is_quarter_end + + def time_is_leap_year(self, tz, freq): + self.ts.is_quarter_end + + def time_microsecond(self, tz, freq): + self.ts.microsecond + + +class TimestampOps(object): + goal_time = 0.2 + + params = [None, 'US/Eastern'] + param_names = ['tz'] + + def setup(self, tz): + self.ts = Timestamp('2017-08-25 08:16:14', tz=tz) + + def time_replace_tz(self, tz): + self.ts.replace(tzinfo=pytz.timezone('US/Eastern')) + + def time_replace_None(self, tz): + self.ts.replace(tzinfo=None) + + def time_to_pydatetime(self, tz): + self.ts.to_pydatetime() + + +class TimestampAcrossDst(object): + goal_time = 0.2 + + def setup(self): + dt = datetime.datetime(2016, 3, 27, 1) + self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo + self.ts2 = Timestamp(dt) + + def time_replace_across_dst(self): + self.ts2.replace(tzinfo=self.tzinfo) diff --git a/asv_bench/vbench_to_asv.py b/asv_bench/vbench_to_asv.py index c3041ec2b1ba1..b1179387e65d5 100644 --- a/asv_bench/vbench_to_asv.py +++ b/asv_bench/vbench_to_asv.py @@ -69,7 +69,7 @@ def visit_ClassDef(self, node): return node def visit_TryExcept(self, node): - if any([isinstance(x, (ast.Import, ast.ImportFrom)) for x in node.body]): + if any(isinstance(x, (ast.Import, ast.ImportFrom)) for x in node.body): self.imports.append(node) else: self.generic_visit(node) @@ -114,7 +114,7 @@ def translate_module(target_module): l_vars = {} exec('import ' + target_module) in g_vars - print target_module + print(target_module) module = eval(target_module, g_vars) benchmarks = [] @@ -157,7 +157,7 @@ def translate_module(target_module): mod = os.path.basename(module) if mod in ['make.py', 'measure_memory_consumption.py', 'perf_HEAD.py', 'run_suite.py', 'test_perf.py', 'generate_rst_files.py', 'test.py', 'suite.py']: continue - print - print mod + print('') + print(mod) translate_module(mod.replace('.py', '')) diff --git a/bench/alignment.py b/bench/alignment.py deleted file mode 100644 index bc3134f597ee0..0000000000000 --- a/bench/alignment.py +++ /dev/null @@ -1,22 +0,0 @@ -# Setup -from pandas.compat import range, lrange -import numpy as np -import pandas -import la -N = 1000 -K = 50 -arr1 = np.random.randn(N, K) -arr2 = np.random.randn(N, K) -idx1 = lrange(N) -idx2 = lrange(K) - -# pandas -dma1 = pandas.DataFrame(arr1, idx1, idx2) -dma2 = pandas.DataFrame(arr2, idx1[::-1], idx2[::-1]) - -# larry -lar1 = la.larry(arr1, [idx1, idx2]) -lar2 = la.larry(arr2, [idx1[::-1], idx2[::-1]]) - -for i in range(100): - result = lar1 + lar2 diff --git a/bench/bench_dense_to_sparse.py b/bench/bench_dense_to_sparse.py deleted file mode 100644 index e1dcd3456e88d..0000000000000 --- a/bench/bench_dense_to_sparse.py +++ /dev/null @@ -1,14 +0,0 @@ -from pandas import * - -K = 100 -N = 100000 -rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute()) - -rng2 = np.asarray(rng).astype('M8[us]').astype('i8') - -series = {} -for i in range(1, K + 1): - data = np.random.randn(N)[:-i] - this_rng = rng2[:-i] - data[100:] = np.nan - series[i] = SparseSeries(data, index=this_rng) diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py deleted file mode 100644 index 427e0b1b10a22..0000000000000 --- a/bench/bench_get_put_value.py +++ /dev/null @@ -1,56 +0,0 @@ -from pandas import * -from pandas.util.testing import rands -from pandas.compat import range - -N = 1000 -K = 50 - - -def _random_index(howmany): - return Index([rands(10) for _ in range(howmany)]) - -df = DataFrame(np.random.randn(N, K), index=_random_index(N), - columns=_random_index(K)) - - -def get1(): - for col in df.columns: - for row in df.index: - _ = df[col][row] - - -def get2(): - for col in df.columns: - for row in df.index: - _ = df.get_value(row, col) - - -def put1(): - for col in df.columns: - for row in df.index: - df[col][row] = 0 - - -def put2(): - for col in df.columns: - for row in df.index: - df.set_value(row, col, 0) - - -def resize1(): - buf = DataFrame() - for col in df.columns: - for row in df.index: - buf = buf.set_value(row, col, 5.) - return buf - - -def resize2(): - from collections import defaultdict - - buf = defaultdict(dict) - for col in df.columns: - for row in df.index: - buf[col][row] = 5. - - return DataFrame(buf) diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py deleted file mode 100644 index d7a2853e1e7b2..0000000000000 --- a/bench/bench_groupby.py +++ /dev/null @@ -1,66 +0,0 @@ -from pandas import * -from pandas.util.testing import rands -from pandas.compat import range - -import string -import random - -k = 20000 -n = 10 - -foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n) -foo2 = list(foo) -random.shuffle(foo) -random.shuffle(foo2) - -df = DataFrame({'A': foo, - 'B': foo2, - 'C': np.random.randn(n * k)}) - -import pandas._sandbox as sbx - - -def f(): - table = sbx.StringHashTable(len(df)) - ret = table.factorize(df['A']) - return ret - - -def g(): - table = sbx.PyObjectHashTable(len(df)) - ret = table.factorize(df['A']) - return ret - -ret = f() - -""" -import pandas._tseries as lib - -f = np.std - - -grouped = df.groupby(['A', 'B']) - -label_list = [ping.labels for ping in grouped.groupings] -shape = [len(ping.ids) for ping in grouped.groupings] - -from pandas.core.groupby import get_group_index - - -group_index = get_group_index(label_list, shape, - sort=True, xnull=True).astype('i4') - -ngroups = np.prod(shape) - -indexer = lib.groupsort_indexer(group_index, ngroups) - -values = df['C'].values.take(indexer) -group_index = group_index.take(indexer) - -f = lambda x: x.std(ddof=1) - -grouper = lib.Grouper(df['C'], np.ndarray.std, group_index, ngroups) -result = grouper.get_result() - -expected = grouped.std() -""" diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py deleted file mode 100644 index f3c3f8ba15f70..0000000000000 --- a/bench/bench_join_panel.py +++ /dev/null @@ -1,85 +0,0 @@ -# reasonably efficient - - -def create_panels_append(cls, panels): - """ return an append list of panels """ - panels = [a for a in panels if a is not None] - # corner cases - if len(panels) == 0: - return None - elif len(panels) == 1: - return panels[0] - elif len(panels) == 2 and panels[0] == panels[1]: - return panels[0] - # import pdb; pdb.set_trace() - # create a joint index for the axis - - def joint_index_for_axis(panels, axis): - s = set() - for p in panels: - s.update(list(getattr(p, axis))) - return sorted(list(s)) - - def reindex_on_axis(panels, axis, axis_reindex): - new_axis = joint_index_for_axis(panels, axis) - new_panels = [p.reindex(**{axis_reindex: new_axis, - 'copy': False}) for p in panels] - return new_panels, new_axis - # create the joint major index, dont' reindex the sub-panels - we are - # appending - major = joint_index_for_axis(panels, 'major_axis') - # reindex on minor axis - panels, minor = reindex_on_axis(panels, 'minor_axis', 'minor') - # reindex on items - panels, items = reindex_on_axis(panels, 'items', 'items') - # concatenate values - try: - values = np.concatenate([p.values for p in panels], axis=1) - except Exception as detail: - raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" - % (','.join(["%s" % p for p in panels]), str(detail))) - # pm('append - create_panel') - p = Panel(values, items=items, major_axis=major, - minor_axis=minor) - # pm('append - done') - return p - - -# does the job but inefficient (better to handle like you read a table in -# pytables...e.g create a LongPanel then convert to Wide) -def create_panels_join(cls, panels): - """ given an array of panels's, create a single panel """ - panels = [a for a in panels if a is not None] - # corner cases - if len(panels) == 0: - return None - elif len(panels) == 1: - return panels[0] - elif len(panels) == 2 and panels[0] == panels[1]: - return panels[0] - d = dict() - minor, major, items = set(), set(), set() - for panel in panels: - items.update(panel.items) - major.update(panel.major_axis) - minor.update(panel.minor_axis) - values = panel.values - for item, item_index in panel.items.indexMap.items(): - for minor_i, minor_index in panel.minor_axis.indexMap.items(): - for major_i, major_index in panel.major_axis.indexMap.items(): - try: - d[(minor_i, major_i, item)] = values[item_index, major_index, minor_index] - except: - pass - # stack the values - minor = sorted(list(minor)) - major = sorted(list(major)) - items = sorted(list(items)) - # create the 3d stack (items x columns x indicies) - data = np.dstack([np.asarray([np.asarray([d.get((minor_i, major_i, item), np.nan) - for item in items]) - for major_i in major]).transpose() - for minor_i in minor]) - # construct the panel - return Panel(data, items, major, minor) -add_class_method(Panel, create_panels_join, 'join_many') diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py deleted file mode 100644 index 054fc36131b65..0000000000000 --- a/bench/bench_khash_dict.py +++ /dev/null @@ -1,89 +0,0 @@ -""" -Some comparisons of khash.h to Python dict -""" -from __future__ import print_function - -import numpy as np -import os - -from vbench.api import Benchmark -from pandas.util.testing import rands -from pandas.compat import range -import pandas._tseries as lib -import pandas._sandbox as sbx -import time - -import psutil - -pid = os.getpid() -proc = psutil.Process(pid) - - -def object_test_data(n): - pass - - -def string_test_data(n): - return np.array([rands(10) for _ in range(n)], dtype='O') - - -def int_test_data(n): - return np.arange(n, dtype='i8') - -N = 1000000 - -#---------------------------------------------------------------------- -# Benchmark 1: map_locations - - -def map_locations_python_object(): - arr = string_test_data(N) - return _timeit(lambda: lib.map_indices_object(arr)) - - -def map_locations_khash_object(): - arr = string_test_data(N) - - def f(): - table = sbx.PyObjectHashTable(len(arr)) - table.map_locations(arr) - return _timeit(f) - - -def _timeit(f, iterations=10): - start = time.time() - for _ in range(iterations): - foo = f() - elapsed = time.time() - start - return elapsed - -#---------------------------------------------------------------------- -# Benchmark 2: lookup_locations - - -def lookup_python(values): - table = lib.map_indices_object(values) - return _timeit(lambda: lib.merge_indexer_object(values, table)) - - -def lookup_khash(values): - table = sbx.PyObjectHashTable(len(values)) - table.map_locations(values) - locs = table.lookup_locations(values) - # elapsed = _timeit(lambda: table.lookup_locations2(values)) - return table - - -def leak(values): - for _ in range(100): - print(proc.get_memory_info()) - table = lookup_khash(values) - # table.destroy() - -arr = string_test_data(N) - -#---------------------------------------------------------------------- -# Benchmark 3: unique - -#---------------------------------------------------------------------- -# Benchmark 4: factorize diff --git a/bench/bench_merge.R b/bench/bench_merge.R deleted file mode 100644 index 3ed4618494857..0000000000000 --- a/bench/bench_merge.R +++ /dev/null @@ -1,161 +0,0 @@ -library(plyr) -library(data.table) -N <- 10000 -indices = rep(NA, N) -indices2 = rep(NA, N) -for (i in 1:N) { - indices[i] <- paste(sample(letters, 10), collapse="") - indices2[i] <- paste(sample(letters, 10), collapse="") -} -left <- data.frame(key=rep(indices[1:8000], 10), - key2=rep(indices2[1:8000], 10), - value=rnorm(80000)) -right <- data.frame(key=indices[2001:10000], - key2=indices2[2001:10000], - value2=rnorm(8000)) - -right2 <- data.frame(key=rep(right$key, 2), - key2=rep(right$key2, 2), - value2=rnorm(16000)) - -left.dt <- data.table(left, key=c("key", "key2")) -right.dt <- data.table(right, key=c("key", "key2")) -right2.dt <- data.table(right2, key=c("key", "key2")) - -# left.dt2 <- data.table(left) -# right.dt2 <- data.table(right) - -## left <- data.frame(key=rep(indices[1:1000], 10), -## key2=rep(indices2[1:1000], 10), -## value=rnorm(100000)) -## right <- data.frame(key=indices[1:1000], -## key2=indices2[1:1000], -## value2=rnorm(10000)) - -timeit <- function(func, niter=10) { - timing = rep(NA, niter) - for (i in 1:niter) { - gc() - timing[i] <- system.time(func())[3] - } - mean(timing) -} - -left.join <- function(sort=FALSE) { - result <- base::merge(left, right, all.x=TRUE, sort=sort) -} - -right.join <- function(sort=FALSE) { - result <- base::merge(left, right, all.y=TRUE, sort=sort) -} - -outer.join <- function(sort=FALSE) { - result <- base::merge(left, right, all=TRUE, sort=sort) -} - -inner.join <- function(sort=FALSE) { - result <- base::merge(left, right, all=FALSE, sort=sort) -} - -left.join.dt <- function(sort=FALSE) { - result <- right.dt[left.dt] -} - -right.join.dt <- function(sort=FALSE) { - result <- left.dt[right.dt] -} - -outer.join.dt <- function(sort=FALSE) { - result <- merge(left.dt, right.dt, all=TRUE, sort=sort) -} - -inner.join.dt <- function(sort=FALSE) { - result <- merge(left.dt, right.dt, all=FALSE, sort=sort) -} - -plyr.join <- function(type) { - result <- plyr::join(left, right, by=c("key", "key2"), - type=type, match="first") -} - -sort.options <- c(FALSE, TRUE) - -# many-to-one - -results <- matrix(nrow=4, ncol=3) -colnames(results) <- c("base::merge", "plyr", "data.table") -rownames(results) <- c("inner", "outer", "left", "right") - -base.functions <- c(inner.join, outer.join, left.join, right.join) -plyr.functions <- c(function() plyr.join("inner"), - function() plyr.join("full"), - function() plyr.join("left"), - function() plyr.join("right")) -dt.functions <- c(inner.join.dt, outer.join.dt, left.join.dt, right.join.dt) -for (i in 1:4) { - base.func <- base.functions[[i]] - plyr.func <- plyr.functions[[i]] - dt.func <- dt.functions[[i]] - results[i, 1] <- timeit(base.func) - results[i, 2] <- timeit(plyr.func) - results[i, 3] <- timeit(dt.func) -} - - -# many-to-many - -left.join <- function(sort=FALSE) { - result <- base::merge(left, right2, all.x=TRUE, sort=sort) -} - -right.join <- function(sort=FALSE) { - result <- base::merge(left, right2, all.y=TRUE, sort=sort) -} - -outer.join <- function(sort=FALSE) { - result <- base::merge(left, right2, all=TRUE, sort=sort) -} - -inner.join <- function(sort=FALSE) { - result <- base::merge(left, right2, all=FALSE, sort=sort) -} - -left.join.dt <- function(sort=FALSE) { - result <- right2.dt[left.dt] -} - -right.join.dt <- function(sort=FALSE) { - result <- left.dt[right2.dt] -} - -outer.join.dt <- function(sort=FALSE) { - result <- merge(left.dt, right2.dt, all=TRUE, sort=sort) -} - -inner.join.dt <- function(sort=FALSE) { - result <- merge(left.dt, right2.dt, all=FALSE, sort=sort) -} - -sort.options <- c(FALSE, TRUE) - -# many-to-one - -results <- matrix(nrow=4, ncol=3) -colnames(results) <- c("base::merge", "plyr", "data.table") -rownames(results) <- c("inner", "outer", "left", "right") - -base.functions <- c(inner.join, outer.join, left.join, right.join) -plyr.functions <- c(function() plyr.join("inner"), - function() plyr.join("full"), - function() plyr.join("left"), - function() plyr.join("right")) -dt.functions <- c(inner.join.dt, outer.join.dt, left.join.dt, right.join.dt) -for (i in 1:4) { - base.func <- base.functions[[i]] - plyr.func <- plyr.functions[[i]] - dt.func <- dt.functions[[i]] - results[i, 1] <- timeit(base.func) - results[i, 2] <- timeit(plyr.func) - results[i, 3] <- timeit(dt.func) -} - diff --git a/bench/bench_merge.py b/bench/bench_merge.py deleted file mode 100644 index 330dba7b9af69..0000000000000 --- a/bench/bench_merge.py +++ /dev/null @@ -1,105 +0,0 @@ -import random -import gc -import time -from pandas import * -from pandas.compat import range, lrange, StringIO -from pandas.util.testing import rands - -N = 10000 -ngroups = 10 - - -def get_test_data(ngroups=100, n=N): - unique_groups = lrange(ngroups) - arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) - - if len(arr) < n: - arr = np.asarray(list(arr) + unique_groups[:n - len(arr)], - dtype=object) - - random.shuffle(arr) - return arr - -# aggregate multiple columns -# df = DataFrame({'key1' : get_test_data(ngroups=ngroups), -# 'key2' : get_test_data(ngroups=ngroups), -# 'data1' : np.random.randn(N), -# 'data2' : np.random.randn(N)}) - -# df2 = DataFrame({'key1' : get_test_data(ngroups=ngroups, n=N//10), -# 'key2' : get_test_data(ngroups=ngroups//2, n=N//10), -# 'value' : np.random.randn(N // 10)}) -# result = merge.merge(df, df2, on='key2') - -N = 10000 - -indices = np.array([rands(10) for _ in range(N)], dtype='O') -indices2 = np.array([rands(10) for _ in range(N)], dtype='O') -key = np.tile(indices[:8000], 10) -key2 = np.tile(indices2[:8000], 10) - -left = DataFrame({'key': key, 'key2': key2, - 'value': np.random.randn(80000)}) -right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:], - 'value2': np.random.randn(8000)}) - -right2 = right.append(right, ignore_index=True) - - -join_methods = ['inner', 'outer', 'left', 'right'] -results = DataFrame(index=join_methods, columns=[False, True]) -niter = 10 -for sort in [False, True]: - for join_method in join_methods: - f = lambda: merge(left, right, how=join_method, sort=sort) - gc.disable() - start = time.time() - for _ in range(niter): - f() - elapsed = (time.time() - start) / niter - gc.enable() - results[sort][join_method] = elapsed -# results.columns = ['pandas'] -results.columns = ['dont_sort', 'sort'] - - -# R results -# many to one -r_results = read_table(StringIO(""" base::merge plyr data.table -inner 0.2475 0.1183 0.1100 -outer 0.4213 0.1916 0.2090 -left 0.2998 0.1188 0.0572 -right 0.3102 0.0536 0.0376 -"""), sep='\s+') - -presults = results[['dont_sort']].rename(columns={'dont_sort': 'pandas'}) -all_results = presults.join(r_results) - -all_results = all_results.div(all_results['pandas'], axis=0) - -all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', - 'base::merge']] - -sort_results = DataFrame.from_items([('pandas', results['sort']), - ('R', r_results['base::merge'])]) -sort_results['Ratio'] = sort_results['R'] / sort_results['pandas'] - - -nosort_results = DataFrame.from_items([('pandas', results['dont_sort']), - ('R', r_results['base::merge'])]) -nosort_results['Ratio'] = nosort_results['R'] / nosort_results['pandas'] - -# many to many - -# many to one -r_results = read_table(StringIO("""base::merge plyr data.table -inner 0.4610 0.1276 0.1269 -outer 0.9195 0.1881 0.2725 -left 0.6559 0.1257 0.0678 -right 0.6425 0.0522 0.0428 -"""), sep='\s+') - -all_results = presults.join(r_results) -all_results = all_results.div(all_results['pandas'], axis=0) -all_results = all_results.ix[:, ['pandas', 'data.table', 'plyr', - 'base::merge']] diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py deleted file mode 100644 index 3ad4b810119c3..0000000000000 --- a/bench/bench_merge_sqlite.py +++ /dev/null @@ -1,87 +0,0 @@ -import numpy as np -from collections import defaultdict -import gc -import time -from pandas import DataFrame -from pandas.util.testing import rands -from pandas.compat import range, zip -import random - -N = 10000 - -indices = np.array([rands(10) for _ in range(N)], dtype='O') -indices2 = np.array([rands(10) for _ in range(N)], dtype='O') -key = np.tile(indices[:8000], 10) -key2 = np.tile(indices2[:8000], 10) - -left = DataFrame({'key': key, 'key2': key2, - 'value': np.random.randn(80000)}) -right = DataFrame({'key': indices[2000:], 'key2': indices2[2000:], - 'value2': np.random.randn(8000)}) - -# right2 = right.append(right, ignore_index=True) -# right = right2 - -# random.shuffle(key2) -# indices2 = indices.copy() -# random.shuffle(indices2) - -# Prepare Database -import sqlite3 -create_sql_indexes = True - -conn = sqlite3.connect(':memory:') -conn.execute( - 'create table left( key varchar(10), key2 varchar(10), value int);') -conn.execute( - 'create table right( key varchar(10), key2 varchar(10), value2 int);') -conn.executemany('insert into left values (?, ?, ?)', - zip(key, key2, left['value'])) -conn.executemany('insert into right values (?, ?, ?)', - zip(right['key'], right['key2'], right['value2'])) - -# Create Indices -if create_sql_indexes: - conn.execute('create index left_ix on left(key, key2)') - conn.execute('create index right_ix on right(key, key2)') - - -join_methods = ['inner', 'left outer', 'left'] # others not supported -sql_results = DataFrame(index=join_methods, columns=[False]) -niter = 5 -for sort in [False]: - for join_method in join_methods: - sql = """CREATE TABLE test as select * - from left - %s join right - on left.key=right.key - and left.key2 = right.key2;""" % join_method - sql = """select * - from left - %s join right - on left.key=right.key - and left.key2 = right.key2;""" % join_method - - if sort: - sql = '%s order by key, key2' % sql - f = lambda: list(conn.execute(sql)) # list fetches results - g = lambda: conn.execute(sql) # list fetches results - gc.disable() - start = time.time() - # for _ in range(niter): - g() - elapsed = (time.time() - start) / niter - gc.enable() - - cur = conn.execute("DROP TABLE test") - conn.commit() - - sql_results[sort][join_method] = elapsed - sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort'] - sql_results.index = ['inner', 'outer', 'left'] - - sql = """select * - from left - inner join right - on left.key=right.key - and left.key2 = right.key2;""" diff --git a/bench/bench_pivot.R b/bench/bench_pivot.R deleted file mode 100644 index 06dc6a105bc43..0000000000000 --- a/bench/bench_pivot.R +++ /dev/null @@ -1,27 +0,0 @@ -library(reshape2) - - -n <- 100000 -a.size <- 5 -b.size <- 5 - -data <- data.frame(a=sample(letters[1:a.size], n, replace=T), - b=sample(letters[1:b.size], n, replace=T), - c=rnorm(n), - d=rnorm(n)) - -timings <- numeric() - -# acast(melt(data, id=c("a", "b")), a ~ b, mean) -# acast(melt(data, id=c("a", "b")), a + b ~ variable, mean) - -for (i in 1:10) { - gc() - tim <- system.time(acast(melt(data, id=c("a", "b")), a ~ b, mean, - subset=.(variable=="c"))) - timings[i] = tim[3] -} - -mean(timings) - -acast(melt(data, id=c("a", "b")), a ~ b, mean, subset=.(variable="c")) diff --git a/bench/bench_pivot.py b/bench/bench_pivot.py deleted file mode 100644 index 007bd0aaebc2f..0000000000000 --- a/bench/bench_pivot.py +++ /dev/null @@ -1,16 +0,0 @@ -from pandas import * -import string - - -n = 100000 -asize = 5 -bsize = 5 - -letters = np.asarray(list(string.letters), dtype=object) - -data = DataFrame(dict(foo=letters[:asize][np.random.randint(0, asize, n)], - bar=letters[:bsize][np.random.randint(0, bsize, n)], - baz=np.random.randn(n), - qux=np.random.randn(n))) - -table = pivot_table(data, xby=['foo', 'bar']) diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py deleted file mode 100644 index 5fb584bcfe45f..0000000000000 --- a/bench/bench_take_indexing.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import print_function -import numpy as np - -from pandas import * -import pandas._tseries as lib - -from pandas import DataFrame -import timeit -from pandas.compat import zip - -setup = """ -from pandas import Series -import pandas._tseries as lib -import random -import numpy as np - -import random -n = %d -k = %d -arr = np.random.randn(n, k) -indexer = np.arange(n, dtype=np.int32) -indexer = indexer[::-1] -""" - -sizes = [100, 1000, 10000, 100000] -iters = [1000, 1000, 100, 1] - -fancy_2d = [] -take_2d = [] -cython_2d = [] - -n = 1000 - - -def _timeit(stmt, size, k=5, iters=1000): - timer = timeit.Timer(stmt=stmt, setup=setup % (sz, k)) - return timer.timeit(n) / n - -for sz, its in zip(sizes, iters): - print(sz) - fancy_2d.append(_timeit('arr[indexer]', sz, iters=its)) - take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its)) - cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its)) - -df = DataFrame({'fancy': fancy_2d, - 'take': take_2d, - 'cython': cython_2d}) - -print(df) - -from pandas.rpy.common import r -r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)') -r('set.seed(12345') -r('indexer <- sample(1:10000)') -r('mat[indexer,]') diff --git a/bench/bench_unique.py b/bench/bench_unique.py deleted file mode 100644 index 87bd2f2df586c..0000000000000 --- a/bench/bench_unique.py +++ /dev/null @@ -1,278 +0,0 @@ -from __future__ import print_function -from pandas import * -from pandas.util.testing import rands -from pandas.compat import range, zip -import pandas._tseries as lib -import numpy as np -import matplotlib.pyplot as plt - -N = 50000 -K = 10000 - -groups = np.array([rands(10) for _ in range(K)], dtype='O') -groups2 = np.array([rands(10) for _ in range(K)], dtype='O') - -labels = np.tile(groups, N // K) -labels2 = np.tile(groups2, N // K) -data = np.random.randn(N) - - -def timeit(f, niter): - import gc - import time - gc.disable() - start = time.time() - for _ in range(niter): - f() - elapsed = (time.time() - start) / niter - gc.enable() - return elapsed - - -def algo1(): - unique_labels = np.unique(labels) - result = np.empty(len(unique_labels)) - for i, label in enumerate(unique_labels): - result[i] = data[labels == label].sum() - - -def algo2(): - unique_labels = np.unique(labels) - indices = lib.groupby_indices(labels) - result = np.empty(len(unique_labels)) - - for i, label in enumerate(unique_labels): - result[i] = data.take(indices[label]).sum() - - -def algo3_nosort(): - rizer = lib.DictFactorizer() - labs, counts = rizer.factorize(labels, sort=False) - k = len(rizer.uniques) - out = np.empty(k) - lib.group_add(out, counts, data, labs) - - -def algo3_sort(): - rizer = lib.DictFactorizer() - labs, counts = rizer.factorize(labels, sort=True) - k = len(rizer.uniques) - out = np.empty(k) - lib.group_add(out, counts, data, labs) - -import numpy as np -import random - - -# dict to hold results -counts = {} - -# a hack to generate random key, value pairs. -# 5k keys, 100k values -x = np.tile(np.arange(5000, dtype='O'), 20) -random.shuffle(x) -xarr = x -x = [int(y) for y in x] -data = np.random.uniform(0, 1, 100000) - - -def f(): - # groupby sum - for k, v in zip(x, data): - try: - counts[k] += v - except KeyError: - counts[k] = v - - -def f2(): - rizer = lib.DictFactorizer() - labs, counts = rizer.factorize(xarr, sort=False) - k = len(rizer.uniques) - out = np.empty(k) - lib.group_add(out, counts, data, labs) - - -def algo4(): - rizer = lib.DictFactorizer() - labs1, _ = rizer.factorize(labels, sort=False) - k1 = len(rizer.uniques) - - rizer = lib.DictFactorizer() - labs2, _ = rizer.factorize(labels2, sort=False) - k2 = len(rizer.uniques) - - group_id = labs1 * k2 + labs2 - max_group = k1 * k2 - - if max_group > 1e6: - rizer = lib.Int64Factorizer(len(group_id)) - group_id, _ = rizer.factorize(group_id.astype('i8'), sort=True) - max_group = len(rizer.uniques) - - out = np.empty(max_group) - counts = np.zeros(max_group, dtype='i4') - lib.group_add(out, counts, data, group_id) - -# cumtime percall filename:lineno(function) -# 0.592 0.592 :1() - # 0.584 0.006 groupby_ex.py:37(algo3_nosort) - # 0.535 0.005 {method 'factorize' of DictFactorizer' objects} - # 0.047 0.000 {pandas._tseries.group_add} - # 0.002 0.000 numeric.py:65(zeros_like) - # 0.001 0.000 {method 'fill' of 'numpy.ndarray' objects} - # 0.000 0.000 {numpy.core.multiarray.empty_like} - # 0.000 0.000 {numpy.core.multiarray.empty} - -# UNIQUE timings - -# N = 10000000 -# K = 500000 - -# groups = np.array([rands(10) for _ in range(K)], dtype='O') - -# labels = np.tile(groups, N // K) -data = np.random.randn(N) - -data = np.random.randn(N) - -Ks = [100, 1000, 5000, 10000, 25000, 50000, 100000] - -# Ks = [500000, 1000000, 2500000, 5000000, 10000000] - -import psutil -import os -import gc - -pid = os.getpid() -proc = psutil.Process(pid) - - -def dict_unique(values, expected_K, sort=False, memory=False): - if memory: - gc.collect() - before_mem = proc.get_memory_info().rss - - rizer = lib.DictFactorizer() - result = rizer.unique_int64(values) - - if memory: - result = proc.get_memory_info().rss - before_mem - return result - - if sort: - result.sort() - assert(len(result) == expected_K) - return result - - -def khash_unique(values, expected_K, size_hint=False, sort=False, - memory=False): - if memory: - gc.collect() - before_mem = proc.get_memory_info().rss - - if size_hint: - rizer = lib.Factorizer(len(values)) - else: - rizer = lib.Factorizer(100) - - result = [] - result = rizer.unique(values) - - if memory: - result = proc.get_memory_info().rss - before_mem - return result - - if sort: - result.sort() - assert(len(result) == expected_K) - - -def khash_unique_str(values, expected_K, size_hint=False, sort=False, - memory=False): - if memory: - gc.collect() - before_mem = proc.get_memory_info().rss - - if size_hint: - rizer = lib.StringHashTable(len(values)) - else: - rizer = lib.StringHashTable(100) - - result = [] - result = rizer.unique(values) - - if memory: - result = proc.get_memory_info().rss - before_mem - return result - - if sort: - result.sort() - assert(len(result) == expected_K) - - -def khash_unique_int64(values, expected_K, size_hint=False, sort=False): - if size_hint: - rizer = lib.Int64HashTable(len(values)) - else: - rizer = lib.Int64HashTable(100) - - result = [] - result = rizer.unique(values) - - if sort: - result.sort() - assert(len(result) == expected_K) - - -def hash_bench(): - numpy = [] - dict_based = [] - dict_based_sort = [] - khash_hint = [] - khash_nohint = [] - for K in Ks: - print(K) - # groups = np.array([rands(10) for _ in range(K)]) - # labels = np.tile(groups, N // K).astype('O') - - groups = np.random.randint(0, long(100000000000), size=K) - labels = np.tile(groups, N // K) - dict_based.append(timeit(lambda: dict_unique(labels, K), 20)) - khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20)) - khash_hint.append(timeit(lambda: khash_unique_int64(labels, K, - size_hint=True), 20)) - - # memory, hard to get - # dict_based.append(np.mean([dict_unique(labels, K, memory=True) - # for _ in range(10)])) - # khash_nohint.append(np.mean([khash_unique(labels, K, memory=True) - # for _ in range(10)])) - # khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True) - # for _ in range(10)])) - - # dict_based_sort.append(timeit(lambda: dict_unique(labels, K, - # sort=True), 10)) - # numpy.append(timeit(lambda: np.unique(labels), 10)) - - # unique_timings = DataFrame({'numpy.unique' : numpy, - # 'dict, no sort' : dict_based, - # 'dict, sort' : dict_based_sort}, - # columns=['dict, no sort', - # 'dict, sort', 'numpy.unique'], - # index=Ks) - - unique_timings = DataFrame({'dict': dict_based, - 'khash, preallocate': khash_hint, - 'khash': khash_nohint}, - columns=['khash, preallocate', 'khash', 'dict'], - index=Ks) - - unique_timings.plot(kind='bar', legend=False) - plt.legend(loc='best') - plt.title('Unique on 100,000 values, int64') - plt.xlabel('Number of unique labels') - plt.ylabel('Mean execution time') - - plt.show() diff --git a/bench/bench_with_subset.R b/bench/bench_with_subset.R deleted file mode 100644 index 69d0f7a9eec63..0000000000000 --- a/bench/bench_with_subset.R +++ /dev/null @@ -1,53 +0,0 @@ -library(microbenchmark) -library(data.table) - - -data.frame.subset.bench <- function (n=1e7, times=30) { - df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - print(microbenchmark(subset(df, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c), - times=times)) -} - - -# data.table allows something very similar to query with an expression -# but we have chained comparisons AND we're faster BOO YAH! -data.table.subset.expression.bench <- function (n=1e7, times=30) { - dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - print(microbenchmark(dt[, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c], - times=times)) -} - - -# compare against subset with data.table for good measure -data.table.subset.bench <- function (n=1e7, times=30) { - dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - print(microbenchmark(subset(dt, a <= b & b <= (c ^ 2 + b ^ 2 - a) & b > c), - times=times)) -} - - -data.frame.with.bench <- function (n=1e7, times=30) { - df <- data.frame(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - - print(microbenchmark(with(df, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3), - times=times)) -} - - -data.table.with.bench <- function (n=1e7, times=30) { - dt <- data.table(a=rnorm(n), b=rnorm(n), c=rnorm(n)) - print(microbenchmark(with(dt, a + b * (c ^ 2 + b ^ 2 - a) / (a * c) ^ 3), - times=times)) -} - - -bench <- function () { - data.frame.subset.bench() - data.table.subset.expression.bench() - data.table.subset.bench() - data.frame.with.bench() - data.table.with.bench() -} - - -bench() diff --git a/bench/bench_with_subset.py b/bench/bench_with_subset.py deleted file mode 100644 index 017401df3f7f3..0000000000000 --- a/bench/bench_with_subset.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python - -""" -Microbenchmarks for comparison with R's "with" and "subset" functions -""" - -from __future__ import print_function -import numpy as np -from numpy import array -from timeit import repeat as timeit -from pandas.compat import range, zip -from pandas import DataFrame - - -setup_common = """from pandas import DataFrame -from numpy.random import randn -df = DataFrame(randn(%d, 3), columns=list('abc')) -%s""" - - -setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'" - - -def bench_with(n, times=10, repeat=3, engine='numexpr'): - return np.array(timeit('df.eval(s, engine=%r)' % engine, - setup=setup_common % (n, setup_with), - repeat=repeat, number=times)) / times - - -setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'" - - -def bench_subset(n, times=10, repeat=3, engine='numexpr'): - return np.array(timeit('df.query(s, engine=%r)' % engine, - setup=setup_common % (n, setup_subset), - repeat=repeat, number=times)) / times - - -def bench(mn=1, mx=7, num=100, engines=('python', 'numexpr'), verbose=False): - r = np.logspace(mn, mx, num=num).round().astype(int) - - ev = DataFrame(np.empty((num, len(engines))), columns=engines) - qu = ev.copy(deep=True) - - ev['size'] = qu['size'] = r - - for engine in engines: - for i, n in enumerate(r): - if verbose: - print('engine: %r, i == %d' % (engine, i)) - ev.loc[i, engine] = bench_with(n, times=1, repeat=1, engine=engine) - qu.loc[i, engine] = bench_subset(n, times=1, repeat=1, - engine=engine) - - return ev, qu - - -def plot_perf(df, engines, title, filename=None): - from matplotlib.pyplot import figure, rc - - try: - from mpltools import style - except ImportError: - pass - else: - style.use('ggplot') - - rc('text', usetex=True) - - fig = figure(figsize=(4, 3), dpi=100) - ax = fig.add_subplot(111) - - for engine in engines: - ax.plot(df.size, df[engine], label=engine, lw=2) - - ax.set_xlabel('Number of Rows') - ax.set_ylabel('Time (s)') - ax.set_title(title) - ax.legend(loc='best') - ax.tick_params(top=False, right=False) - - fig.tight_layout() - - if filename is not None: - fig.savefig(filename) - - -if __name__ == '__main__': - import os - import pandas as pd - - pandas_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) - static_path = os.path.join(pandas_dir, 'doc', 'source', '_static') - - join = lambda p: os.path.join(static_path, p) - - fn = join('eval-query-perf-data.h5') - - engines = 'python', 'numexpr' - - if not os.path.exists(fn): - ev, qu = bench(verbose=True) - ev.to_hdf(fn, 'eval') - qu.to_hdf(fn, 'query') - else: - ev = pd.read_hdf(fn, 'eval') - qu = pd.read_hdf(fn, 'query') - - plot_perf(ev, engines, 'DataFrame.eval()', filename=join('eval-perf.png')) - plot_perf(qu, engines, 'DataFrame.query()', - filename=join('query-perf.png')) - - plot_perf(ev[ev.size <= 50000], engines, 'DataFrame.eval()', - filename=join('eval-perf-small.png')) - plot_perf(qu[qu.size <= 500000], engines, 'DataFrame.query()', - filename=join('query-perf-small.png')) diff --git a/bench/better_unique.py b/bench/better_unique.py deleted file mode 100644 index e03a4f433ce66..0000000000000 --- a/bench/better_unique.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import print_function -from pandas import DataFrame -from pandas.compat import range, zip -import timeit - -setup = """ -from pandas import Series -import pandas._tseries as _tseries -from pandas.compat import range -import random -import numpy as np - -def better_unique(values): - uniques = _tseries.fast_unique(values) - id_map = _tseries.map_indices_buf(uniques) - labels = _tseries.get_unique_labels(values, id_map) - return uniques, labels - -tot = 100000 - -def get_test_data(ngroups=100, n=tot): - unique_groups = range(ngroups) - random.shuffle(unique_groups) - arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) - - if len(arr) < n: - arr = np.asarray(list(arr) + unique_groups[:n - len(arr)], - dtype=object) - - return arr - -arr = get_test_data(ngroups=%d) -""" - -group_sizes = [10, 100, 1000, 10000, - 20000, 30000, 40000, - 50000, 60000, 70000, - 80000, 90000, 100000] - -numbers = [100, 100, 50] + [10] * 10 - -numpy = [] -wes = [] - -for sz, n in zip(group_sizes, numbers): - # wes_timer = timeit.Timer(stmt='better_unique(arr)', - # setup=setup % sz) - wes_timer = timeit.Timer(stmt='_tseries.fast_unique(arr)', - setup=setup % sz) - - numpy_timer = timeit.Timer(stmt='np.unique(arr)', - setup=setup % sz) - - print(n) - numpy_result = numpy_timer.timeit(number=n) / n - wes_result = wes_timer.timeit(number=n) / n - - print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result)) - - wes.append(wes_result) - numpy.append(numpy_result) - -result = DataFrame({'wes': wes, 'numpy': numpy}, index=group_sizes) - - -def make_plot(numpy, wes): - pass - -# def get_test_data(ngroups=100, n=100000): -# unique_groups = range(ngroups) -# random.shuffle(unique_groups) -# arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) - -# if len(arr) < n: -# arr = np.asarray(list(arr) + unique_groups[:n - len(arr)], -# dtype=object) - -# return arr - -# arr = get_test_data(ngroups=1000) diff --git a/bench/duplicated.R b/bench/duplicated.R deleted file mode 100644 index eb2376df2932a..0000000000000 --- a/bench/duplicated.R +++ /dev/null @@ -1,22 +0,0 @@ -N <- 100000 - -k1 = rep(NA, N) -k2 = rep(NA, N) -for (i in 1:N){ - k1[i] <- paste(sample(letters, 1), collapse="") - k2[i] <- paste(sample(letters, 1), collapse="") -} -df <- data.frame(a=k1, b=k2, c=rep(1:100, N / 100)) -df2 <- data.frame(a=k1, b=k2) - -timings <- numeric() -timings2 <- numeric() -for (i in 1:50) { - gc() - timings[i] = system.time(deduped <- df[!duplicated(df),])[3] - gc() - timings2[i] = system.time(deduped <- df[!duplicated(df[,c("a", "b")]),])[3] -} - -mean(timings) -mean(timings2) diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py deleted file mode 100644 index d87da0ec6321a..0000000000000 --- a/bench/io_roundtrip.py +++ /dev/null @@ -1,116 +0,0 @@ -from __future__ import print_function -import time -import os -import numpy as np - -import la -import pandas -from pandas.compat import range -from pandas import datetools, DatetimeIndex - - -def timeit(f, iterations): - start = time.clock() - - for i in range(iterations): - f() - - return time.clock() - start - - -def rountrip_archive(N, K=50, iterations=10): - # Create data - arr = np.random.randn(N, K) - # lar = la.larry(arr) - dma = pandas.DataFrame(arr, - DatetimeIndex('1/1/2000', periods=N, - offset=datetools.Minute())) - dma[201] = 'bar' - - # filenames - filename_numpy = '/Users/wesm/tmp/numpy.npz' - filename_larry = '/Users/wesm/tmp/archive.hdf5' - filename_pandas = '/Users/wesm/tmp/pandas_tmp' - - # Delete old files - try: - os.unlink(filename_numpy) - except: - pass - try: - os.unlink(filename_larry) - except: - pass - - try: - os.unlink(filename_pandas) - except: - pass - - # Time a round trip save and load - # numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr) - # numpy_time = timeit(numpy_f, iterations) / iterations - - # larry_f = lambda: larry_roundtrip(filename_larry, lar, lar) - # larry_time = timeit(larry_f, iterations) / iterations - - pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) - pandas_time = timeit(pandas_f, iterations) / iterations - print('pandas (HDF5) %7.4f seconds' % pandas_time) - - pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) - pickle_time = timeit(pickle_f, iterations) / iterations - print('pandas (pickle) %7.4f seconds' % pickle_time) - - # print('Numpy (npz) %7.4f seconds' % numpy_time) - # print('larry (HDF5) %7.4f seconds' % larry_time) - - # Delete old files - try: - os.unlink(filename_numpy) - except: - pass - try: - os.unlink(filename_larry) - except: - pass - - try: - os.unlink(filename_pandas) - except: - pass - - -def numpy_roundtrip(filename, arr1, arr2): - np.savez(filename, arr1=arr1, arr2=arr2) - npz = np.load(filename) - arr1 = npz['arr1'] - arr2 = npz['arr2'] - - -def larry_roundtrip(filename, lar1, lar2): - io = la.IO(filename) - io['lar1'] = lar1 - io['lar2'] = lar2 - lar1 = io['lar1'] - lar2 = io['lar2'] - - -def pandas_roundtrip(filename, dma1, dma2): - # What's the best way to code this? - from pandas.io.pytables import HDFStore - store = HDFStore(filename) - store['dma1'] = dma1 - store['dma2'] = dma2 - dma1 = store['dma1'] - dma2 = store['dma2'] - - -def pandas_roundtrip_pickle(filename, dma1, dma2): - dma1.save(filename) - dma1 = pandas.DataFrame.load(filename) - dma2.save(filename) - dma2 = pandas.DataFrame.load(filename) - -if __name__ == '__main__': - rountrip_archive(10000, K=200) diff --git a/bench/serialize.py b/bench/serialize.py deleted file mode 100644 index b0edd6a5752d2..0000000000000 --- a/bench/serialize.py +++ /dev/null @@ -1,89 +0,0 @@ -from __future__ import print_function -from pandas.compat import range, lrange -import time -import os -import numpy as np - -import la -import pandas - - -def timeit(f, iterations): - start = time.clock() - - for i in range(iterations): - f() - - return time.clock() - start - - -def roundtrip_archive(N, iterations=10): - - # Create data - arr = np.random.randn(N, N) - lar = la.larry(arr) - dma = pandas.DataFrame(arr, lrange(N), lrange(N)) - - # filenames - filename_numpy = '/Users/wesm/tmp/numpy.npz' - filename_larry = '/Users/wesm/tmp/archive.hdf5' - filename_pandas = '/Users/wesm/tmp/pandas_tmp' - - # Delete old files - try: - os.unlink(filename_numpy) - except: - pass - try: - os.unlink(filename_larry) - except: - pass - try: - os.unlink(filename_pandas) - except: - pass - - # Time a round trip save and load - numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr) - numpy_time = timeit(numpy_f, iterations) / iterations - - larry_f = lambda: larry_roundtrip(filename_larry, lar, lar) - larry_time = timeit(larry_f, iterations) / iterations - - pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) - pandas_time = timeit(pandas_f, iterations) / iterations - - print('Numpy (npz) %7.4f seconds' % numpy_time) - print('larry (HDF5) %7.4f seconds' % larry_time) - print('pandas (HDF5) %7.4f seconds' % pandas_time) - - -def numpy_roundtrip(filename, arr1, arr2): - np.savez(filename, arr1=arr1, arr2=arr2) - npz = np.load(filename) - arr1 = npz['arr1'] - arr2 = npz['arr2'] - - -def larry_roundtrip(filename, lar1, lar2): - io = la.IO(filename) - io['lar1'] = lar1 - io['lar2'] = lar2 - lar1 = io['lar1'] - lar2 = io['lar2'] - - -def pandas_roundtrip(filename, dma1, dma2): - from pandas.io.pytables import HDFStore - store = HDFStore(filename) - store['dma1'] = dma1 - store['dma2'] = dma2 - dma1 = store['dma1'] - dma2 = store['dma2'] - - -def pandas_roundtrip_pickle(filename, dma1, dma2): - dma1.save(filename) - dma1 = pandas.DataFrame.load(filename) - dma2.save(filename) - dma2 = pandas.DataFrame.load(filename) diff --git a/bench/test.py b/bench/test.py deleted file mode 100644 index 2339deab313a1..0000000000000 --- a/bench/test.py +++ /dev/null @@ -1,70 +0,0 @@ -import numpy as np -import itertools -import collections -import scipy.ndimage as ndi -from pandas.compat import zip, range - -N = 10000 - -lat = np.random.randint(0, 360, N) -lon = np.random.randint(0, 360, N) -data = np.random.randn(N) - - -def groupby1(lat, lon, data): - indexer = np.lexsort((lon, lat)) - lat = lat.take(indexer) - lon = lon.take(indexer) - sorted_data = data.take(indexer) - - keys = 1000. * lat + lon - unique_keys = np.unique(keys) - bounds = keys.searchsorted(unique_keys) - - result = group_agg(sorted_data, bounds, lambda x: x.mean()) - - decoder = keys.searchsorted(unique_keys) - - return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result)) - - -def group_mean(lat, lon, data): - indexer = np.lexsort((lon, lat)) - lat = lat.take(indexer) - lon = lon.take(indexer) - sorted_data = data.take(indexer) - - keys = 1000 * lat + lon - unique_keys = np.unique(keys) - - result = ndi.mean(sorted_data, labels=keys, index=unique_keys) - decoder = keys.searchsorted(unique_keys) - - return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result)) - - -def group_mean_naive(lat, lon, data): - grouped = collections.defaultdict(list) - for lt, ln, da in zip(lat, lon, data): - grouped[(lt, ln)].append(da) - - averaged = dict((ltln, np.mean(da)) for ltln, da in grouped.items()) - - return averaged - - -def group_agg(values, bounds, f): - N = len(values) - result = np.empty(len(bounds), dtype=float) - for i, left_bound in enumerate(bounds): - if i == len(bounds) - 1: - right_bound = N - else: - right_bound = bounds[i + 1] - - result[i] = f(values[left_bound: right_bound]) - - return result - -# for i in range(10): -# groupby1(lat, lon, data) diff --git a/bench/zoo_bench.R b/bench/zoo_bench.R deleted file mode 100644 index 294d55f51a9ab..0000000000000 --- a/bench/zoo_bench.R +++ /dev/null @@ -1,71 +0,0 @@ -library(zoo) -library(xts) -library(fts) -library(tseries) -library(its) -library(xtable) - -## indices = rep(NA, 100000) -## for (i in 1:100000) -## indices[i] <- paste(sample(letters, 10), collapse="") - - - -## x <- zoo(rnorm(100000), indices) -## y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)]) - -## indices <- as.POSIXct(1:100000) - -indices <- as.POSIXct(Sys.Date()) + seq(1, 100000000, 100) - -sz <- 500000 - -## x <- xts(rnorm(sz), sample(indices, sz)) -## y <- xts(rnorm(sz), sample(indices, sz)) - -zoo.bench <- function(){ - x <- zoo(rnorm(sz), sample(indices, sz)) - y <- zoo(rnorm(sz), sample(indices, sz)) - timeit(function() {x + y}) -} - -xts.bench <- function(){ - x <- xts(rnorm(sz), sample(indices, sz)) - y <- xts(rnorm(sz), sample(indices, sz)) - timeit(function() {x + y}) -} - -fts.bench <- function(){ - x <- fts(rnorm(sz), sort(sample(indices, sz))) - y <- fts(rnorm(sz), sort(sample(indices, sz)) - timeit(function() {x + y}) -} - -its.bench <- function(){ - x <- its(rnorm(sz), sort(sample(indices, sz))) - y <- its(rnorm(sz), sort(sample(indices, sz))) - timeit(function() {x + y}) -} - -irts.bench <- function(){ - x <- irts(sort(sample(indices, sz)), rnorm(sz)) - y <- irts(sort(sample(indices, sz)), rnorm(sz)) - timeit(function() {x + y}) -} - -timeit <- function(f){ - timings <- numeric() - for (i in 1:10) { - gc() - timings[i] = system.time(f())[3] - } - mean(timings) -} - -bench <- function(){ - results <- c(xts.bench(), fts.bench(), its.bench(), zoo.bench()) - names <- c("xts", "fts", "its", "zoo") - data.frame(results, names) -} - -result <- bench() diff --git a/bench/zoo_bench.py b/bench/zoo_bench.py deleted file mode 100644 index 74cb1952a5a2a..0000000000000 --- a/bench/zoo_bench.py +++ /dev/null @@ -1,36 +0,0 @@ -from pandas import * -from pandas.util.testing import rands - -n = 1000000 -# indices = Index([rands(10) for _ in xrange(n)]) - - -def sample(values, k): - sampler = np.random.permutation(len(values)) - return values.take(sampler[:k]) -sz = 500000 -rng = np.arange(0, 10000000000000, 10000000) -stamps = np.datetime64(datetime.now()).view('i8') + rng -idx1 = np.sort(sample(stamps, sz)) -idx2 = np.sort(sample(stamps, sz)) -ts1 = Series(np.random.randn(sz), idx1) -ts2 = Series(np.random.randn(sz), idx2) - - -# subsample_size = 90000 - -# x = Series(np.random.randn(100000), indices) -# y = Series(np.random.randn(subsample_size), -# index=sample(indices, subsample_size)) - - -# lx = larry(np.random.randn(100000), [list(indices)]) -# ly = larry(np.random.randn(subsample_size), [list(y.index)]) - -# Benchmark 1: Two 1-million length time series (int64-based index) with -# randomly chosen timestamps - -# Benchmark 2: Join two 5-variate time series DataFrames (outer and inner join) - -# df1 = DataFrame(np.random.randn(1000000, 5), idx1, columns=range(5)) -# df2 = DataFrame(np.random.randn(1000000, 5), idx2, columns=range(5, 10)) diff --git a/ci/appveyor.recipe/bld.bat b/ci/appveyor.recipe/bld.bat deleted file mode 100644 index 284926fae8c04..0000000000000 --- a/ci/appveyor.recipe/bld.bat +++ /dev/null @@ -1,2 +0,0 @@ -@echo off -%PYTHON% setup.py install diff --git a/ci/appveyor.recipe/build.sh b/ci/appveyor.recipe/build.sh deleted file mode 100644 index f341bce6fcf96..0000000000000 --- a/ci/appveyor.recipe/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -$PYTHON setup.py install diff --git a/ci/appveyor.recipe/meta.yaml b/ci/appveyor.recipe/meta.yaml deleted file mode 100644 index 777fd9d682d48..0000000000000 --- a/ci/appveyor.recipe/meta.yaml +++ /dev/null @@ -1,37 +0,0 @@ -package: - name: pandas - version: 0.20.0 - -build: - number: {{environ.get('APPVEYOR_BUILD_NUMBER', 0)}} # [win] - string: np{{ environ.get('CONDA_NPY') }}py{{ environ.get('CONDA_PY') }}_{{ environ.get('APPVEYOR_BUILD_NUMBER', 0) }} # [win] - -source: - - # conda-build needs a full clone - # rather than a shallow git_url type clone - # https://github.com/conda/conda-build/issues/780 - path: ../../ - -requirements: - build: - - python - - cython - - numpy x.x - - setuptools - - pytz - - python-dateutil - - run: - - python - - numpy x.x - - python-dateutil - - pytz - -test: - imports: - - pandas - -about: - home: http://pandas.pydata.org - license: BSD diff --git a/ci/before_install_travis.sh b/ci/before_install_travis.sh deleted file mode 100755 index f90427f97d3b7..0000000000000 --- a/ci/before_install_travis.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -# If envars.sh determined we're running in an authorized fork -# and the user opted in to the network cache,and that cached versions -# are available on the cache server, download and deploy the cached -# files to the local filesystem - -echo "inside $0" - -# overview -if [ "${TRAVIS_OS_NAME}" == "linux" ]; then - sh -e /etc/init.d/xvfb start -fi - -true # never fail because bad things happened here diff --git a/ci/before_script_travis.sh b/ci/before_script_travis.sh new file mode 100755 index 0000000000000..0b3939b1906a2 --- /dev/null +++ b/ci/before_script_travis.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +echo "inside $0" + +if [ "${TRAVIS_OS_NAME}" == "linux" ]; then + sh -e /etc/init.d/xvfb start + sleep 3 +fi + +# Never fail because bad things happened here. +true diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 5dc649a91c4f7..a038304fe0f7a 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -17,15 +17,12 @@ if [ "$?" != "0" ]; then fi -if [ x"$DOC_BUILD" != x"" ]; then +if [ "$DOC" ]; then echo "Will build docs" source activate pandas - # install sudo deps - time sudo apt-get $APT_ARGS install dvipng texlive-latex-base texlive-latex-extra - mv "$TRAVIS_BUILD_DIR"/doc /tmp cd /tmp/doc @@ -43,10 +40,10 @@ if [ x"$DOC_BUILD" != x"" ]; then cd /tmp/doc/build/html git config --global user.email "pandas-docs-bot@localhost.foo" git config --global user.name "pandas-docs-bot" - git config --global credential.helper cache # create the repo git init + touch README git add README git commit -m "Initial commit" --allow-empty @@ -55,9 +52,22 @@ if [ x"$DOC_BUILD" != x"" ]; then touch .nojekyll git add --all . git commit -m "Version" --allow-empty + git remote remove origin - git remote add origin "https://${PANDAS_GH_TOKEN}@github.com/pandas-docs/pandas-docs-travis.git" + git remote add origin "https://${PANDAS_GH_TOKEN}@github.com/pandas-dev/pandas-docs-travis.git" + git fetch origin + git remote -v + git push origin gh-pages -f + + echo "Running doctests" + cd "$TRAVIS_BUILD_DIR" + pytest --doctest-modules \ + pandas/core/reshape/concat.py \ + pandas/core/reshape/pivot.py \ + pandas/core/reshape/reshape.py \ + pandas/core/reshape/tile.py + fi exit 0 diff --git a/ci/check_cache.sh b/ci/check_cache.sh index cd7a6e8f6b6f9..b83144fc45ef4 100755 --- a/ci/check_cache.sh +++ b/ci/check_cache.sh @@ -1,5 +1,9 @@ #!/bin/bash +# currently not used +# script to make sure that cache is clean +# Travis CI now handles this + if [ "$TRAVIS_PULL_REQUEST" == "false" ] then echo "Not a PR: checking for changes in ci/ from last 2 commits" @@ -12,14 +16,12 @@ else ci_changes=$(git diff PR_HEAD~2 --numstat | grep -E "ci/"| wc -l) fi -MINICONDA_DIR="$HOME/miniconda/" CACHE_DIR="$HOME/.cache/" CCACHE_DIR="$HOME/.ccache/" if [ $ci_changes -ne 0 ] then echo "Files have changed in ci/ deleting all caches" - rm -rf "$MINICONDA_DIR" rm -rf "$CACHE_DIR" rm -rf "$CCACHE_DIR" -fi \ No newline at end of file +fi diff --git a/ci/check_imports.py b/ci/check_imports.py new file mode 100644 index 0000000000000..d6f24ebcc4d3e --- /dev/null +++ b/ci/check_imports.py @@ -0,0 +1,35 @@ +""" +Check that certain modules are not loaded by `import pandas` +""" +import sys + +blacklist = { + 'bs4', + 'html5lib', + 'ipython', + 'jinja2' + 'lxml', + 'numexpr', + 'openpyxl', + 'py', + 'pytest', + 's3fs', + 'scipy', + 'tables', + 'xlrd', + 'xlsxwriter', + 'xlwt', +} + + +def main(): + import pandas # noqa + + modules = set(x.split('.')[0] for x in sys.modules) + imported = modules & blacklist + if modules & blacklist: + sys.exit("Imported {}".format(imported)) + + +if __name__ == '__main__': + main() diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml new file mode 100644 index 0000000000000..1337fc54e9aac --- /dev/null +++ b/ci/environment-dev.yaml @@ -0,0 +1,15 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - Cython + - NumPy + - flake8 + - moto + - pytest>=3.1 + - python-dateutil>=2.5.0 + - python=3 + - pytz + - setuptools>=3.3 + - sphinx diff --git a/ci/install_circle.sh b/ci/install_circle.sh new file mode 100755 index 0000000000000..fd79f907625e9 --- /dev/null +++ b/ci/install_circle.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +home_dir=$(pwd) +echo "[home_dir: $home_dir]" + +echo "[ls -ltr]" +ls -ltr + +echo "[Using clean Miniconda install]" +rm -rf "$MINICONDA_DIR" + +# install miniconda +wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1 +bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 + +export PATH="$MINICONDA_DIR/bin:$PATH" + +echo "[update conda]" +conda config --set ssl_verify false || exit 1 +conda config --set always_yes true --set changeps1 false || exit 1 +conda update -q conda + +# add the pandas channel to take priority +# to add extra packages +echo "[add channels]" +conda config --add channels pandas || exit 1 +conda config --remove channels defaults || exit 1 +conda config --add channels defaults || exit 1 + +# Useful for debugging any issues with conda +conda info -a || exit 1 + +# support env variables passed +export ENVS_FILE=".envs" + +# make sure that the .envs file exists. it is ok if it is empty +touch $ENVS_FILE + +# assume all command line arguments are environmental variables +for var in "$@" +do + echo "export $var" >> $ENVS_FILE +done + +echo "[environmental variable file]" +cat $ENVS_FILE +source $ENVS_FILE + +export REQ_BUILD=ci/requirements-${JOB}.build +export REQ_RUN=ci/requirements-${JOB}.run +export REQ_PIP=ci/requirements-${JOB}.pip + +# edit the locale override if needed +if [ -n "$LOCALE_OVERRIDE" ]; then + echo "[Adding locale to the first line of pandas/__init__.py]" + rm -f pandas/__init__.pyc + sedc="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n" + sed -i "$sedc" pandas/__init__.py + echo "[head -4 pandas/__init__.py]" + head -4 pandas/__init__.py + echo +fi + +# create envbuild deps +echo "[create env: ${REQ_BUILD}]" +time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 +time conda install -n pandas pytest>=3.1.0 || exit 1 + +source activate pandas +time pip install moto || exit 1 + +# build but don't install +echo "[build em]" +time python setup.py build_ext --inplace || exit 1 + +# we may have run installations +echo "[conda installs: ${REQ_RUN}]" +if [ -e ${REQ_RUN} ]; then + time conda install -q --file=${REQ_RUN} || exit 1 +fi + +# we may have additional pip installs +echo "[pip installs: ${REQ_PIP}]" +if [ -e ${REQ_PIP} ]; then + pip install -r $REQ_PIP +fi diff --git a/ci/install_db_circle.sh b/ci/install_db_circle.sh new file mode 100755 index 0000000000000..a00f74f009f54 --- /dev/null +++ b/ci/install_db_circle.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +echo "installing dbs" +mysql -e 'create database pandas_nosetest;' +psql -c 'create database pandas_nosetest;' -U postgres + +echo "done" +exit 0 diff --git a/ci/install_db.sh b/ci/install_db_travis.sh similarity index 100% rename from ci/install_db.sh rename to ci/install_db_travis.sh diff --git a/ci/install_test.sh b/ci/install_test.sh deleted file mode 100755 index 9ace633d7f39d..0000000000000 --- a/ci/install_test.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -echo "inside $0" - -if [ "$INSTALL_TEST" ]; then - source activate pandas - echo "Starting installation test." - conda uninstall cython || exit 1 - python "$TRAVIS_BUILD_DIR"/setup.py sdist --formats=zip,gztar || exit 1 - pip install "$TRAVIS_BUILD_DIR"/dist/*tar.gz || exit 1 - pytest pandas/tests/test_series.py --junitxml=/tmp/pytest_install.xml -else - echo "Skipping installation test." -fi -RET="$?" - -exit "$RET" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ad804b96a0d82..9ccb4baf25505 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -1,18 +1,6 @@ #!/bin/bash -# There are 2 distinct pieces that get zipped and cached -# - The venv site-packages dir including the installed dependencies -# - The pandas build artifacts, using the build cache support via -# scripts/use_build_cache.py -# -# if the user opted in to use the cache and we're on a whitelisted fork -# - if the server doesn't hold a cached version of venv/pandas build, -# do things the slow way, and put the results on the cache server -# for the next time. -# - if the cache files are available, instal some necessaries via apt -# (no compiling needed), then directly goto script and collect 200$. -# - +# edit the locale file if needed function edit_init() { if [ -n "$LOCALE_OVERRIDE" ]; then @@ -26,94 +14,100 @@ function edit_init() fi } +echo echo "[install_travis]" edit_init home_dir=$(pwd) -echo "[home_dir: $home_dir]" +echo +echo "[home_dir]: $home_dir" +# install miniconda MINICONDA_DIR="$HOME/miniconda3" -if [ -d "$MINICONDA_DIR" ] && [ -e "$MINICONDA_DIR/bin/conda" ] && [ "$USE_CACHE" ]; then - echo "[Miniconda install already present from cache: $MINICONDA_DIR]" - - conda config --set always_yes yes --set changeps1 no || exit 1 - echo "[update conda]" - conda update -q conda || exit 1 - - # Useful for debugging any issues with conda - conda info -a || exit 1 - - # set the compiler cache to work - if [ "${TRAVIS_OS_NAME}" == "linux" ]; then - echo "[Using ccache]" - export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH - gcc=$(which gcc) - echo "[gcc: $gcc]" - ccache=$(which ccache) - echo "[ccache: $ccache]" - export CC='ccache gcc' - fi +echo +echo "[Using clean Miniconda install]" -else - echo "[Using clean Miniconda install]" - echo "[Not using ccache]" +if [ -d "$MINICONDA_DIR" ]; then rm -rf "$MINICONDA_DIR" - # install miniconda - if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 - else - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 - fi - bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 - - echo "[update conda]" - conda config --set ssl_verify false || exit 1 - conda config --set always_yes true --set changeps1 false || exit 1 - conda update -q conda - - # add the pandas channel to take priority - # to add extra packages - echo "[add channels]" - conda config --add channels pandas || exit 1 - conda config --remove channels defaults || exit 1 - conda config --add channels defaults || exit 1 - - conda install anaconda-client - - # Useful for debugging any issues with conda - conda info -a || exit 1 - fi -# may have installation instructions for this build -INSTALL="ci/install-${PYTHON_VERSION}${JOB_TAG}.sh" -if [ -e ${INSTALL} ]; then - time bash $INSTALL || exit 1 +# install miniconda +if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -q -O miniconda.sh || exit 1 else - # create new env - time conda create -n pandas python=$PYTHON_VERSION pytest || exit 1 + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1 +fi +time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 + +echo +echo "[show conda]" +which conda + +echo +echo "[update conda]" +conda config --set ssl_verify false || exit 1 +conda config --set quiet true --set always_yes true --set changeps1 false || exit 1 +conda update -q conda + +echo +echo "[add channels]" +conda config --remove channels defaults || exit 1 +conda config --add channels defaults || exit 1 + +if [ "$CONDA_FORGE" ]; then + # add conda-forge channel as priority + conda config --add channels conda-forge || exit 1 fi -# build deps -echo "[build installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build" -if [ -e ${REQ} ]; then - time conda install -n pandas --file=${REQ} || exit 1 +# Useful for debugging any issues with conda +conda info -a || exit 1 + +# set the compiler cache to work +echo +if [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then + echo "[Using ccache]" + export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH + gcc=$(which gcc) + echo "[gcc]: $gcc" + ccache=$(which ccache) + echo "[ccache]: $ccache" + export CC='ccache gcc' +elif [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then + echo "[Install ccache]" + brew install ccache > /dev/null 2>&1 + echo "[Using ccache]" + export PATH=/usr/local/opt/ccache/libexec:$PATH + gcc=$(which gcc) + echo "[gcc]: $gcc" + ccache=$(which ccache) + echo "[ccache]: $ccache" +else + echo "[Not using ccache]" fi +echo +echo "[create env]" + +# create our environment +REQ="ci/requirements-${JOB}.build" +time conda create -n pandas --file=${REQ} || exit 1 + +source activate pandas + # may have addtl installation instructions for this build +echo echo "[build addtl installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build.sh" +REQ="ci/requirements-${JOB}.build.sh" if [ -e ${REQ} ]; then time bash $REQ || exit 1 fi -source activate pandas +time conda install -n pandas pytest>=3.1.0 +time pip install -q pytest-xdist moto -pip install pytest-xdist if [ "$LINT" ]; then - conda install flake8 + conda install flake8=3.4.1 pip install cpplint fi @@ -121,50 +115,50 @@ if [ "$COVERAGE" ]; then pip install coverage pytest-cov fi -if [ "$BUILD_TEST" ]; then - - # build testing - pip uninstall --yes cython - pip install cython==0.23 - ( python setup.py build_ext --inplace && python setup.py develop ) || true - -else - - # build but don't install - echo "[build em]" - time python setup.py build_ext --inplace || exit 1 +# we may have run installations +echo +echo "[conda installs]" +REQ="ci/requirements-${JOB}.run" +if [ -e ${REQ} ]; then + time conda install -n pandas --file=${REQ} || exit 1 +fi - # we may have run installations - echo "[conda installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" - if [ -e ${REQ} ]; then - time conda install -n pandas --file=${REQ} || exit 1 - fi +# we may have additional pip installs +echo +echo "[pip installs]" +REQ="ci/requirements-${JOB}.pip" +if [ -e ${REQ} ]; then + pip install -r $REQ +fi - # we may have additional pip installs - echo "[pip installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" - if [ -e ${REQ} ]; then - pip install -r $REQ - fi +# may have addtl installation instructions for this build +echo +echo "[addtl installs]" +REQ="ci/requirements-${JOB}.sh" +if [ -e ${REQ} ]; then + time bash $REQ || exit 1 +fi - # may have addtl installation instructions for this build - echo "[addtl installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" - if [ -e ${REQ} ]; then - time bash $REQ || exit 1 - fi +# remove any installed pandas package +# w/o removing anything else +echo +echo "[removing installed pandas]" +conda remove pandas -y --force +pip uninstall -y pandas - # remove any installed pandas package - # w/o removing anything else - echo "[removing installed pandas]" - conda remove pandas --force +echo +echo "[no installed pandas]" +conda list pandas +pip list --format columns |grep pandas - # install our pandas - echo "[running setup.py develop]" - python setup.py develop || exit 1 +# build and install +echo "[running setup.py develop]" +python setup.py develop || exit 1 -fi +echo +echo "[show pandas]" +conda list pandas +echo echo "[done]" exit 0 diff --git a/ci/lint.sh b/ci/lint.sh index 2ffc68e5eb139..545ac9c90c5c1 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -8,16 +8,44 @@ RET=0 if [ "$LINT" ]; then - # pandas/src is C code, so no need to search there. - echo "Linting *.py" - flake8 pandas --filename=*.py --exclude pandas/src + # pandas/_libs/src is C code, so no need to search there. + echo "Linting *.py" + flake8 pandas --filename=*.py --exclude pandas/_libs/src if [ $? -ne "0" ]; then RET=1 fi echo "Linting *.py DONE" + echo "Linting setup.py" + flake8 setup.py + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Linting setup.py DONE" + + echo "Linting asv_bench/benchmarks/" + flake8 asv_bench/benchmarks/ --exclude=asv_bench/benchmarks/*.py --ignore=F811 + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Linting asv_bench/benchmarks/*.py DONE" + + echo "Linting scripts/*.py" + flake8 scripts --filename=*.py + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Linting scripts/*.py DONE" + + echo "Linting doc scripts" + flake8 doc/make.py doc/source/conf.py + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Linting doc scripts DONE" + echo "Linting *.pyx" - flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126 + flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403 if [ $? -ne "0" ]; then RET=1 fi @@ -27,14 +55,24 @@ if [ "$LINT" ]; then for path in 'src' do echo "linting -> pandas/$path" - flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126 + flake8 pandas/$path --filename=*.pxi.in --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 if [ $? -ne "0" ]; then RET=1 fi - done echo "Linting *.pxi.in DONE" + echo "Linting *.pxd" + for path in '_libs' + do + echo "linting -> pandas/$path" + flake8 pandas/$path --filename=*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 + if [ $? -ne "0" ]; then + RET=1 + fi + done + echo "Linting *.pxd DONE" + # readability/casting: Warnings about C casting instead of C++ casting # runtime/int: Warnings about using C number types instead of C++ ones # build/include_subdir: Warnings about prefacing included header files with directory @@ -46,8 +84,8 @@ if [ "$LINT" ]; then echo "Linting *.c and *.h" for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson' do - echo "linting -> pandas/src/$path" - cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path + echo "linting -> pandas/_libs/src/$path" + cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/src/$path if [ $? -ne "0" ]; then RET=1 fi @@ -55,12 +93,79 @@ if [ "$LINT" ]; then echo "Linting *.c and *.h DONE" echo "Check for invalid testing" - grep -r -E --include '*.py' --exclude testing.py '(numpy|np)\.testing' pandas + + # Check for the following code in testing: + # + # np.testing + # np.array_equal + grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/ + + if [ $? = "0" ]; then + RET=1 + fi + + # Check for pytest.warns + grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ + + if [ $? = "0" ]; then + RET=1 + fi + + # Check for the following code in the extension array base tests + # tm.assert_frame_equal + # tm.assert_series_equal + grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base + if [ $? = "0" ]; then RET=1 fi + echo "Check for invalid testing DONE" + # Check for imports from pandas.core.common instead + # of `import pandas.core.common as com` + echo "Check for non-standard imports" + grep -R --include="*.py*" -E "from pandas.core.common import " pandas + if [ $? = "0" ]; then + RET=1 + fi + echo "Check for non-standard imports DONE" + + echo "Check for use of lists instead of generators in built-in Python functions" + + # Example: Avoid `any([i for i in some_iterator])` in favor of `any(i for i in some_iterator)` + # + # Check the following functions: + # any(), all(), sum(), max(), min(), list(), dict(), set(), frozenset(), tuple(), str.join() + grep -R --include="*.py*" -E "[^_](any|all|sum|max|min|list|dict|set|frozenset|tuple|join)\(\[.* for .* in .*\]\)" pandas + + if [ $? = "0" ]; then + RET=1 + fi + echo "Check for use of lists instead of generators in built-in Python functions DONE" + + echo "Check for incorrect sphinx directives" + SPHINX_DIRECTIVES=$(echo \ + "autosummary|contents|currentmodule|deprecated|function|image|"\ + "important|include|ipython|literalinclude|math|module|note|raw|"\ + "seealso|toctree|versionadded|versionchanged|warning" | tr -d "[:space:]") + for path in './pandas' './doc/source' + do + grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. ($SPHINX_DIRECTIVES):[^:]" $path + if [ $? = "0" ]; then + RET=1 + fi + done + echo "Check for incorrect sphinx directives DONE" + + echo "Check for deprecated messages without sphinx directive" + grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas + + if [ $? = "0" ]; then + RET=1 + fi + echo "Check for deprecated messages without sphinx directive DONE" + else echo "NOT Linting" fi diff --git a/ci/prep_cython_cache.sh b/ci/prep_cython_cache.sh index e091bb00ccedc..18d9388327ddc 100755 --- a/ci/prep_cython_cache.sh +++ b/ci/prep_cython_cache.sh @@ -22,7 +22,7 @@ fi home_dir=$(pwd) -if [ -f "$CACHE_File" ] && [ "$USE_CACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then +if [ -f "$CACHE_File" ] && [ -z "$NOCACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then echo "Cache available - checking pyx diff" @@ -57,16 +57,16 @@ if [ -f "$CACHE_File" ] && [ "$USE_CACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then fi -if [ $clear_cache -eq 0 ] && [ "$USE_CACHE" ] +if [ $clear_cache -eq 0 ] && [ -z "$NOCACHE" ] then - # No and use_cache is set + # No and nocache is not set echo "Will reuse cached cython file" cd / tar xvmf $CACHE_File cd $home_dir else echo "Rebuilding cythonized files" - echo "Use cache (Blank if not set) = $USE_CACHE" + echo "No cache = $NOCACHE" echo "Clear cache (1=YES) = $clear_cache" fi diff --git a/ci/print_skipped.py b/ci/print_skipped.py index 9fb05df64bcea..dd2180f6eeb19 100755 --- a/ci/print_skipped.py +++ b/ci/print_skipped.py @@ -30,20 +30,21 @@ def parse_results(filename): i += 1 assert i - 1 == len(skipped) assert i - 1 == len(skipped) - assert len(skipped) == int(root.attrib['skip']) + # assert len(skipped) == int(root.attrib['skip']) return '\n'.join(skipped) def main(args): print('SKIPPED TESTS:') - print(parse_results(args.filename)) + for fn in args.filename: + print(parse_results(fn)) return 0 def parse_args(): import argparse parser = argparse.ArgumentParser() - parser.add_argument('filename', help='XUnit file to parse') + parser.add_argument('filename', nargs='+', help='XUnit file to parse') return parser.parse_args() diff --git a/ci/requirements-2.7.build b/ci/requirements-2.7.build index 836385671d603..17d34f3895c64 100644 --- a/ci/requirements-2.7.build +++ b/ci/requirements-2.7.build @@ -1,4 +1,6 @@ -python-dateutil=2.4.1 +python=2.7* +python-dateutil=2.5.0 pytz=2013b -numpy -cython=0.23 +nomkl +numpy=1.13* +cython=0.24 diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index d16b932c8be4f..876d9e978fa84 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,9 +1,10 @@ blosc -httplib2 -google-api-python-client==1.2 -python-gflags==2.0 -oauth2client==1.5.0 +pandas-gbq +html5lib +beautifulsoup4 pathlib backports.lzma py PyCrypto +mock +ipython diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index 62e31e4ae24e3..7c10b98fb6e14 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -1,22 +1,20 @@ -python-dateutil=2.4.1 +python-dateutil=2.5.0 pytz=2013b numpy xlwt=0.7.5 numexpr pytables matplotlib -openpyxl=1.6.2 +openpyxl=2.4.0 xlrd=0.9.2 sqlalchemy=0.9.6 -lxml=3.2.1 +lxml scipy -xlsxwriter=0.4.6 +xlsxwriter=0.5.2 s3fs bottleneck -psycopg2=2.5.2 +psycopg2 patsy pymysql=0.6.3 -html5lib=1.0b2 -beautiful-soup=4.2.1 jinja2=2.8 xarray=0.8.0 diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index 64d470e5c6e0e..95169e5dcce57 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 jemalloc=4.5.0.post fastparquet diff --git a/ci/requirements-2.7_BUILD_TEST.build b/ci/requirements-2.7_BUILD_TEST.build deleted file mode 100644 index faf1e3559f7f1..0000000000000 --- a/ci/requirements-2.7_BUILD_TEST.build +++ /dev/null @@ -1,4 +0,0 @@ -dateutil -pytz -numpy -cython diff --git a/ci/requirements-2.7_COMPAT.build b/ci/requirements-2.7_COMPAT.build index 95e3da03f161b..0a83a7346e8b5 100644 --- a/ci/requirements-2.7_COMPAT.build +++ b/ci/requirements-2.7_COMPAT.build @@ -1,4 +1,5 @@ -numpy=1.7.1 -cython=0.23 -dateutil=1.5 +python=2.7* +numpy=1.9.2 +cython=0.24 +python-dateutil=2.5.0 pytz=2013b diff --git a/ci/requirements-2.7_COMPAT.pip b/ci/requirements-2.7_COMPAT.pip index 9533a630d06a4..13cd35a923124 100644 --- a/ci/requirements-2.7_COMPAT.pip +++ b/ci/requirements-2.7_COMPAT.pip @@ -1,2 +1,4 @@ +html5lib==1.0b2 +beautifulsoup4==4.2.0 openpyxl argparse diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index d27b6a72c2d15..c3daed6e6e1da 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -1,16 +1,14 @@ -numpy=1.7.1 -dateutil=1.5 +numpy=1.9.2 +python-dateutil=2.5.0 pytz=2013b -scipy=0.11.0 +scipy=0.14.0 xlwt=0.7.5 xlrd=0.9.2 -bottleneck=0.8.0 -numexpr=2.2.2 -pytables=3.0.0 -html5lib=1.0b2 -beautiful-soup=4.2.0 -psycopg2=2.5.1 +bottleneck=1.0.0 +numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr +pytables=3.2.2 +psycopg2 pymysql=0.6.0 sqlalchemy=0.7.8 -xlsxwriter=0.4.6 +xlsxwriter=0.5.2 jinja2=2.8 diff --git a/ci/requirements-2.7_LOCALE.build b/ci/requirements-2.7_LOCALE.build index c17730b912651..a6f2e25387910 100644 --- a/ci/requirements-2.7_LOCALE.build +++ b/ci/requirements-2.7_LOCALE.build @@ -1,4 +1,5 @@ +python=2.7* python-dateutil pytz=2013b -numpy=1.7.1 -cython=0.23 +numpy=1.9.2 +cython=0.24 diff --git a/ci/requirements-2.7_LOCALE.pip b/ci/requirements-2.7_LOCALE.pip index cf8e6b8b3d3a6..1b825bbf492ca 100644 --- a/ci/requirements-2.7_LOCALE.pip +++ b/ci/requirements-2.7_LOCALE.pip @@ -1 +1,3 @@ +html5lib==1.0b2 +beautifulsoup4==4.2.1 blosc diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 1a9b42d832b0b..0a809a7dd6e5d 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -1,16 +1,12 @@ python-dateutil -pytz=2013b -numpy=1.7.1 +pytz +numpy=1.9.2 xlwt=0.7.5 -openpyxl=1.6.2 -xlsxwriter=0.4.6 +openpyxl=2.4.0 +xlsxwriter=0.5.2 xlrd=0.9.2 -bottleneck=0.8.0 -matplotlib=1.2.1 -patsy=0.1.0 +bottleneck=1.0.0 +matplotlib=1.4.3 sqlalchemy=0.8.1 -html5lib=1.0b2 -lxml=3.2.1 -scipy=0.11.0 -beautiful-soup=4.2.1 -bigquery=2.0.17 +lxml +scipy diff --git a/ci/requirements-2.7_SLOW.build b/ci/requirements-2.7_SLOW.build index 664e8b418def7..a665ab9edd585 100644 --- a/ci/requirements-2.7_SLOW.build +++ b/ci/requirements-2.7_SLOW.build @@ -1,4 +1,5 @@ +python=2.7* python-dateutil pytz -numpy=1.8.2 +numpy=1.10* cython diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run index c2d2a14285ad6..db95a6ccb2314 100644 --- a/ci/requirements-2.7_SLOW.run +++ b/ci/requirements-2.7_SLOW.run @@ -1,7 +1,7 @@ python-dateutil pytz -numpy=1.8.2 -matplotlib=1.3.1 +numpy=1.10* +matplotlib=1.4.3 scipy patsy xlwt @@ -13,8 +13,7 @@ pytables sqlalchemy lxml s3fs -bottleneck psycopg2 pymysql html5lib -beautiful-soup +beautifulsoup4 diff --git a/bench/larry.py b/ci/requirements-2.7_WIN.pip similarity index 100% rename from bench/larry.py rename to ci/requirements-2.7_WIN.pip diff --git a/ci/requirements-2.7-64.run b/ci/requirements-2.7_WIN.run similarity index 84% rename from ci/requirements-2.7-64.run rename to ci/requirements-2.7_WIN.run index f953682f52d45..c4ca7fc736bb1 100644 --- a/ci/requirements-2.7-64.run +++ b/ci/requirements-2.7_WIN.run @@ -8,11 +8,11 @@ matplotlib openpyxl xlrd sqlalchemy -lxml=3.2.1 +lxml scipy xlsxwriter s3fs bottleneck html5lib -beautiful-soup +beautifulsoup4 jinja2=2.8 diff --git a/ci/requirements-3.4-64.run b/ci/requirements-3.4-64.run deleted file mode 100644 index 106cc5b7168ba..0000000000000 --- a/ci/requirements-3.4-64.run +++ /dev/null @@ -1,12 +0,0 @@ -python-dateutil -pytz -numpy=1.9* -openpyxl -xlsxwriter -xlrd -xlwt -scipy -numexpr -pytables -bottleneck -jinja2=2.8 diff --git a/ci/requirements-3.4.build b/ci/requirements-3.4.build deleted file mode 100644 index e6e59dcba63fe..0000000000000 --- a/ci/requirements-3.4.build +++ /dev/null @@ -1,3 +0,0 @@ -numpy=1.8.1 -cython=0.24.1 -libgfortran=1.0 diff --git a/ci/requirements-3.4.pip b/ci/requirements-3.4.pip deleted file mode 100644 index 55986a0220bf0..0000000000000 --- a/ci/requirements-3.4.pip +++ /dev/null @@ -1,5 +0,0 @@ -python-dateutil==2.2 -blosc -httplib2 -google-api-python-client -oauth2client diff --git a/ci/requirements-3.4.run b/ci/requirements-3.4.run deleted file mode 100644 index 3e12adae7dd9f..0000000000000 --- a/ci/requirements-3.4.run +++ /dev/null @@ -1,18 +0,0 @@ -pytz=2015.7 -numpy=1.8.1 -openpyxl -xlsxwriter -xlrd -xlwt -html5lib -patsy -beautiful-soup -scipy -numexpr -pytables -lxml -sqlalchemy -bottleneck -pymysql=0.6.3 -psycopg2 -jinja2=2.8 diff --git a/ci/requirements-3.4_SLOW.pip b/ci/requirements-3.4_SLOW.pip deleted file mode 100644 index 05c938abcbab6..0000000000000 --- a/ci/requirements-3.4_SLOW.pip +++ /dev/null @@ -1,3 +0,0 @@ -httplib2 -google-api-python-client -oauth2client diff --git a/ci/requirements-3.4_SLOW.sh b/ci/requirements-3.4_SLOW.sh deleted file mode 100644 index 24f1e042ed69e..0000000000000 --- a/ci/requirements-3.4_SLOW.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -source activate pandas - -echo "install 34_slow" - -conda install -n pandas -c conda-forge matplotlib diff --git a/ci/requirements-3.5.build b/ci/requirements-3.5.build index 2fc2053e64fe9..f7befe3b31865 100644 --- a/ci/requirements-3.5.build +++ b/ci/requirements-3.5.build @@ -1,4 +1,6 @@ +python=3.5* python-dateutil pytz -numpy=1.11.3 +nomkl +numpy cython diff --git a/ci/requirements-3.5.pip b/ci/requirements-3.5.pip index 0d9e44cf39fa4..c9565f2173070 100644 --- a/ci/requirements-3.5.pip +++ b/ci/requirements-3.5.pip @@ -1 +1,2 @@ xarray==0.9.1 +pandas_gbq diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index b07ce611c79a2..669cf437f2164 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -1,6 +1,5 @@ -python-dateutil pytz -numpy=1.11.3 +numpy openpyxl xlsxwriter xlrd @@ -18,3 +17,4 @@ pymysql psycopg2 s3fs beautifulsoup4 +ipython diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index d0f0b81802dc6..529e1e8742722 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -4,4 +4,8 @@ source activate pandas echo "install 35" -conda install -n pandas -c conda-forge feather-format +# pip install python-dateutil to get latest +conda remove -n pandas python-dateutil --force +pip install python-dateutil + +conda install -n pandas -c conda-forge feather-format pyarrow=0.7.1 diff --git a/ci/requirements-3.5_ASCII.build b/ci/requirements-3.5_ASCII.build index 9558cf00ddf5c..f7befe3b31865 100644 --- a/ci/requirements-3.5_ASCII.build +++ b/ci/requirements-3.5_ASCII.build @@ -1,4 +1,6 @@ +python=3.5* python-dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-3.5_DOC_BUILD.sh b/ci/requirements-3.5_DOC_BUILD.sh deleted file mode 100644 index 25bc63acc96d1..0000000000000 --- a/ci/requirements-3.5_DOC_BUILD.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -source activate pandas - -echo "[install DOC_BUILD deps]" - -conda install -n pandas -c conda-forge feather-format - -conda install -n pandas -c r r rpy2 --yes diff --git a/ci/requirements-3.5_NUMPY_DEV.build.sh b/ci/requirements-3.5_NUMPY_DEV.build.sh deleted file mode 100644 index 91fa15491bbf7..0000000000000 --- a/ci/requirements-3.5_NUMPY_DEV.build.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -source activate pandas - -echo "install numpy master wheel" - -# remove the system installed numpy -pip uninstall numpy -y - -# install numpy wheel from master -pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy scipy - -true diff --git a/ci/requirements-3.5_NUMPY_DEV.run b/ci/requirements-3.5_NUMPY_DEV.run deleted file mode 100644 index 0aa987baefb1d..0000000000000 --- a/ci/requirements-3.5_NUMPY_DEV.run +++ /dev/null @@ -1,2 +0,0 @@ -python-dateutil -pytz diff --git a/ci/requirements-3.5_OSX.build b/ci/requirements-3.5_OSX.build index a201be352b8e4..f5bc01b67a20a 100644 --- a/ci/requirements-3.5_OSX.build +++ b/ci/requirements-3.5_OSX.build @@ -1,2 +1,4 @@ +python=3.5* +nomkl numpy=1.10.4 cython diff --git a/ci/requirements-3.5_OSX.sh b/ci/requirements-3.5_OSX.sh index cfbd2882a8a2d..c2978b175968c 100644 --- a/ci/requirements-3.5_OSX.sh +++ b/ci/requirements-3.5_OSX.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 35_OSX" -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge feather-format==0.3.1 fastparquet diff --git a/ci/requirements-3.6-64.run b/ci/requirements-3.6-64.run deleted file mode 100644 index 58ba103504b2c..0000000000000 --- a/ci/requirements-3.6-64.run +++ /dev/null @@ -1,13 +0,0 @@ -python-dateutil -pytz -numpy -openpyxl -xlsxwriter -xlrd -#xlwt -scipy -feather-format -numexpr -pytables -matplotlib -blosc diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build index 9558cf00ddf5c..1c4b46aea3865 100644 --- a/ci/requirements-3.6.build +++ b/ci/requirements-3.6.build @@ -1,4 +1,6 @@ +python=3.6* python-dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-3.6.pip b/ci/requirements-3.6.pip index e69de29bb2d1d..753a60d6c119a 100644 --- a/ci/requirements-3.6.pip +++ b/ci/requirements-3.6.pip @@ -0,0 +1 @@ +brotlipy diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 5d9cb05a7b402..822144a80bc9a 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -14,7 +14,12 @@ html5lib jinja2 sqlalchemy pymysql -# psycopg2 (not avail on defaults ATM) +feather-format +pyarrow +psycopg2 +python-snappy +fastparquet beautifulsoup4 s3fs xarray +ipython diff --git a/ci/requirements-3.6.sh b/ci/requirements-3.6.sh index 7d88ede751ec8..f5c3dbf59a29d 100644 --- a/ci/requirements-3.6.sh +++ b/ci/requirements-3.6.sh @@ -2,6 +2,6 @@ source activate pandas -echo "install 36" +echo "[install 3.6 downstream deps]" -conda install -n pandas -c conda-forge feather-format +conda install -n pandas -c conda-forge pandas-datareader xarray geopandas seaborn statsmodels scikit-learn dask diff --git a/ci/requirements-3.4_SLOW.build b/ci/requirements-3.6_DOC.build similarity index 53% rename from ci/requirements-3.4_SLOW.build rename to ci/requirements-3.6_DOC.build index c05a68a14b402..bc72eed2a0d4e 100644 --- a/ci/requirements-3.4_SLOW.build +++ b/ci/requirements-3.6_DOC.build @@ -1,4 +1,5 @@ +python=3.6* python-dateutil pytz -numpy=1.10* +numpy=1.13* cython diff --git a/ci/requirements-3.5_DOC_BUILD.run b/ci/requirements-3.6_DOC.run similarity index 69% rename from ci/requirements-3.5_DOC_BUILD.run rename to ci/requirements-3.6_DOC.run index 644a16f51f4b6..fa9cab32c0ac2 100644 --- a/ci/requirements-3.5_DOC_BUILD.run +++ b/ci/requirements-3.6_DOC.run @@ -1,16 +1,19 @@ ipython ipykernel +ipywidgets sphinx nbconvert nbformat notebook -matplotlib +matplotlib=2.1* +seaborn scipy lxml beautifulsoup4 html5lib pytables -openpyxl=1.8.5 +python-snappy +openpyxl xlrd xlwt xlsxwriter @@ -18,4 +21,5 @@ sqlalchemy numexpr bottleneck statsmodels -pyqt=4.11.4 +xarray +pyqt diff --git a/ci/requirements-3.6_DOC.sh b/ci/requirements-3.6_DOC.sh new file mode 100644 index 0000000000000..aec0f62148622 --- /dev/null +++ b/ci/requirements-3.6_DOC.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +source activate pandas + +echo "[install DOC_BUILD deps]" + +pip install pandas-gbq + +conda install -n pandas -c conda-forge feather-format pyarrow nbsphinx pandoc fastparquet + +conda install -n pandas -c r r rpy2 --yes diff --git a/ci/requirements-3.5_DOC_BUILD.build b/ci/requirements-3.6_LOCALE.build similarity index 65% rename from ci/requirements-3.5_DOC_BUILD.build rename to ci/requirements-3.6_LOCALE.build index 9558cf00ddf5c..1c4b46aea3865 100644 --- a/ci/requirements-3.5_DOC_BUILD.build +++ b/ci/requirements-3.6_LOCALE.build @@ -1,4 +1,6 @@ +python=3.6* python-dateutil pytz +nomkl numpy cython diff --git a/pandas/computation/tests/__init__.py b/ci/requirements-3.6_LOCALE.pip similarity index 100% rename from pandas/computation/tests/__init__.py rename to ci/requirements-3.6_LOCALE.pip diff --git a/ci/requirements_all.txt b/ci/requirements-3.6_LOCALE.run similarity index 58% rename from ci/requirements_all.txt rename to ci/requirements-3.6_LOCALE.run index 4ff80a478f247..ad54284c6f7e3 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements-3.6_LOCALE.run @@ -1,26 +1,22 @@ -pytest -pytest-cov -pytest-xdist -flake8 -sphinx -ipython python-dateutil pytz +numpy +scipy openpyxl xlsxwriter xlrd xlwt -html5lib -patsy -beautiful-soup -numpy -cython -scipy numexpr pytables matplotlib lxml +html5lib +jinja2 sqlalchemy -bottleneck pymysql -Jinja2 +# feather-format (not available on defaults ATM) +psycopg2 +beautifulsoup4 +s3fs +xarray +ipython diff --git a/ci/requirements-3.5_NUMPY_DEV.build b/ci/requirements-3.6_LOCALE_SLOW.build similarity index 53% rename from ci/requirements-3.5_NUMPY_DEV.build rename to ci/requirements-3.6_LOCALE_SLOW.build index d15edbfa3d2c1..1c4b46aea3865 100644 --- a/ci/requirements-3.5_NUMPY_DEV.build +++ b/ci/requirements-3.6_LOCALE_SLOW.build @@ -1,3 +1,6 @@ +python=3.6* python-dateutil pytz +nomkl +numpy cython diff --git a/pandas/indexes/__init__.py b/ci/requirements-3.6_LOCALE_SLOW.pip similarity index 100% rename from pandas/indexes/__init__.py rename to ci/requirements-3.6_LOCALE_SLOW.pip diff --git a/ci/requirements-3.4_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run similarity index 53% rename from ci/requirements-3.4_SLOW.run rename to ci/requirements-3.6_LOCALE_SLOW.run index 39018439a1223..ad54284c6f7e3 100644 --- a/ci/requirements-3.4_SLOW.run +++ b/ci/requirements-3.6_LOCALE_SLOW.run @@ -1,20 +1,22 @@ python-dateutil pytz -numpy=1.10* +numpy +scipy openpyxl xlsxwriter xlrd xlwt -html5lib -patsy -beautiful-soup -scipy -numexpr=2.4.4 +numexpr pytables matplotlib lxml +html5lib +jinja2 sqlalchemy -bottleneck pymysql +# feather-format (not available on defaults ATM) psycopg2 -jinja2=2.8 +beautifulsoup4 +s3fs +xarray +ipython diff --git a/ci/requirements-3.6_NUMPY_DEV.build b/ci/requirements-3.6_NUMPY_DEV.build new file mode 100644 index 0000000000000..336fbe86b57d8 --- /dev/null +++ b/ci/requirements-3.6_NUMPY_DEV.build @@ -0,0 +1,2 @@ +python=3.6* +pytz diff --git a/ci/requirements-3.6_NUMPY_DEV.build.sh b/ci/requirements-3.6_NUMPY_DEV.build.sh new file mode 100644 index 0000000000000..9145bf1d3481c --- /dev/null +++ b/ci/requirements-3.6_NUMPY_DEV.build.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +source activate pandas + +echo "install numpy master wheel" + +# remove the system installed numpy +pip uninstall numpy -y + +# install numpy wheel from master +PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com" +pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy + +# install dateutil from master +# pip install -U git+git://github.com/dateutil/dateutil.git +pip install dateutil + +# cython via pip +pip install cython + +true diff --git a/pandas/io/tests/__init__.py b/ci/requirements-3.6_NUMPY_DEV.pip similarity index 100% rename from pandas/io/tests/__init__.py rename to ci/requirements-3.6_NUMPY_DEV.pip diff --git a/ci/requirements-3.6_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run new file mode 100644 index 0000000000000..af44f198c687e --- /dev/null +++ b/ci/requirements-3.6_NUMPY_DEV.run @@ -0,0 +1 @@ +pytz diff --git a/pandas/io/tests/json/__init__.py b/ci/requirements-3.6_WIN.pip similarity index 100% rename from pandas/io/tests/json/__init__.py rename to ci/requirements-3.6_WIN.pip diff --git a/ci/requirements-3.5-64.run b/ci/requirements-3.6_WIN.run similarity index 65% rename from ci/requirements-3.5-64.run rename to ci/requirements-3.6_WIN.run index 905c2ff3625bd..3042888763863 100644 --- a/ci/requirements-3.5-64.run +++ b/ci/requirements-3.6_WIN.run @@ -1,6 +1,7 @@ python-dateutil pytz -numpy +numpy=1.13* +bottleneck openpyxl xlsxwriter xlrd @@ -11,3 +12,6 @@ numexpr pytables matplotlib blosc +thrift=0.10* +fastparquet +pyarrow diff --git a/ci/requirements-optional-conda.txt b/ci/requirements-optional-conda.txt new file mode 100644 index 0000000000000..6edb8d17337e4 --- /dev/null +++ b/ci/requirements-optional-conda.txt @@ -0,0 +1,27 @@ +beautifulsoup4 +blosc +bottleneck +fastparquet +feather-format +html5lib +ipython +ipykernel +jinja2 +lxml +matplotlib +nbsphinx +numexpr +openpyxl +pyarrow +pymysql +pytables +pytest-cov +pytest-xdist +s3fs +scipy +seaborn +sqlalchemy +xarray +xlrd +xlsxwriter +xlwt diff --git a/ci/requirements-optional-pip.txt b/ci/requirements-optional-pip.txt new file mode 100644 index 0000000000000..8d4421ba2b681 --- /dev/null +++ b/ci/requirements-optional-pip.txt @@ -0,0 +1,29 @@ +# This file was autogenerated by scripts/convert_deps.py +# Do not modify directly +beautifulsoup4 +blosc +bottleneck +fastparquet +feather-format +html5lib +ipython +ipykernel +jinja2 +lxml +matplotlib +nbsphinx +numexpr +openpyxl +pyarrow +pymysql +tables +pytest-cov +pytest-xdist +s3fs +scipy +seaborn +sqlalchemy +xarray +xlrd +xlsxwriter +xlwt \ No newline at end of file diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index b0a8adc8df5cb..fcbe0da5de305 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -1,8 +1,11 @@ -python-dateutil -pytz -numpy -cython -pytest -pytest-cov -pytest-xdist +# This file was autogenerated by scripts/convert_deps.py +# Do not modify directly +Cython +NumPy flake8 +moto +pytest>=3.1 +python-dateutil>=2.5.0 +pytz +setuptools>=3.3 +sphinx \ No newline at end of file diff --git a/ci/run_circle.sh b/ci/run_circle.sh new file mode 100755 index 0000000000000..435985bd42148 --- /dev/null +++ b/ci/run_circle.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +echo "[running tests]" +export PATH="$MINICONDA_DIR/bin:$PATH" + +source activate pandas + +echo "pytest --strict --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas" +pytest --strict --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas diff --git a/ci/script.sh b/ci/script.sh deleted file mode 100755 index c52fa0fdb33a3..0000000000000 --- a/ci/script.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -echo "inside $0" - -source activate pandas - -# don't run the tests for the doc build -if [ x"$DOC_BUILD" != x"" ]; then - exit 0 -fi - -if [ -n "$LOCALE_OVERRIDE" ]; then - export LC_ALL="$LOCALE_OVERRIDE"; - echo "Setting LC_ALL to $LOCALE_OVERRIDE" - - pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))' - python -c "$pycmd" -fi - -if [ "$BUILD_TEST" ]; then - echo "We are not running pytest as this is simply a build test." -elif [ "$COVERAGE" ]; then - echo pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas - pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas -else - echo pytest $TEST_ARGS pandas - pytest $TEST_ARGS pandas # TODO: doctest -fi - -RET="$?" - -exit "$RET" diff --git a/ci/script_multi.sh b/ci/script_multi.sh new file mode 100755 index 0000000000000..2b2d4d5488b91 --- /dev/null +++ b/ci/script_multi.sh @@ -0,0 +1,46 @@ +#!/bin/bash -e + +echo "[script multi]" + +source activate pandas + +if [ -n "$LOCALE_OVERRIDE" ]; then + export LC_ALL="$LOCALE_OVERRIDE"; + echo "Setting LC_ALL to $LOCALE_OVERRIDE" + + pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))' + python -c "$pycmd" +fi + +# Enforce absent network during testing by faking a proxy +if echo "$TEST_ARGS" | grep -e --skip-network -q; then + export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; +fi + +# Workaround for pytest-xdist flaky collection order +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') +echo PYTHONHASHSEED=$PYTHONHASHSEED + +if [ "$DOC" ]; then + echo "We are not running pytest as this is a doc-build" + +elif [ "$COVERAGE" ]; then + echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas + pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas + +elif [ "$SLOW" ]; then + TEST_ARGS="--only-slow --skip-network" + echo pytest -r xX -m "not single and slow" -v --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas + pytest -r xX -m "not single and slow" -v --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas + +else + echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas + pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas # TODO: doctest + +fi + +RET="$?" + +exit "$RET" diff --git a/ci/script_single.sh b/ci/script_single.sh new file mode 100755 index 0000000000000..f376c920ac71b --- /dev/null +++ b/ci/script_single.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +echo "[script_single]" + +source activate pandas + +if [ -n "$LOCALE_OVERRIDE" ]; then + export LC_ALL="$LOCALE_OVERRIDE"; + echo "Setting LC_ALL to $LOCALE_OVERRIDE" + + pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))' + python -c "$pycmd" +fi + +if [ "$SLOW" ]; then + TEST_ARGS="--only-slow --skip-network" +fi + +# Enforce absent network during testing by faking a proxy +if echo "$TEST_ARGS" | grep -e --skip-network -q; then + export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; +fi + +if [ "$DOC" ]; then + echo "We are not running pytest as this is a doc-build" + +elif [ "$COVERAGE" ]; then + echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + +else + echo pytest -m "single" -r xX --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas + pytest -m "single" -r xX --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest + +fi + +RET="$?" + +exit "$RET" diff --git a/ci/show_circle.sh b/ci/show_circle.sh new file mode 100755 index 0000000000000..bfaa65c1d84f2 --- /dev/null +++ b/ci/show_circle.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +echo "[installed versions]" + +export PATH="$MINICONDA_DIR/bin:$PATH" +source activate pandas + +python -c "import pandas; pandas.show_versions();" diff --git a/ci/submit_cython_cache.sh b/ci/submit_cython_cache.sh index cfbced4988357..b87acef0ba11c 100755 --- a/ci/submit_cython_cache.sh +++ b/ci/submit_cython_cache.sh @@ -9,7 +9,7 @@ rm -rf $PYX_CACHE_DIR home_dir=$(pwd) -mkdir $PYX_CACHE_DIR +mkdir -p $PYX_CACHE_DIR rsync -Rv $pyx_file_list $PYX_CACHE_DIR echo "pyx files:" diff --git a/ci/upload_coverage.sh b/ci/upload_coverage.sh new file mode 100755 index 0000000000000..a7ef2fa908079 --- /dev/null +++ b/ci/upload_coverage.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +if [ -z "$COVERAGE" ]; then + echo "coverage is not selected for this build" + exit 0 +fi + +source activate pandas + +echo "uploading coverage" +bash <(curl -s https://codecov.io/bash) -Z -c -F single -f /tmp/cov-single.xml +bash <(curl -s https://codecov.io/bash) -Z -c -F multiple -f /tmp/cov-multiple.xml diff --git a/circle.yml b/circle.yml new file mode 100644 index 0000000000000..9d49145af54e3 --- /dev/null +++ b/circle.yml @@ -0,0 +1,38 @@ +machine: + environment: + # these are globally set + MINICONDA_DIR: /home/ubuntu/miniconda3 + + +database: + override: + - ./ci/install_db_circle.sh + + +checkout: + post: + # since circleci does a shallow fetch + # we need to populate our tags + - git fetch --depth=1000 + + +dependencies: + override: + - > + case $CIRCLE_NODE_INDEX in + 0) + sudo apt-get install language-pack-it && ./ci/install_circle.sh JOB="2.7_COMPAT" LOCALE_OVERRIDE="it_IT.UTF-8" ;; + 1) + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.6_LOCALE" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + 2) + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.6_LOCALE_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + 3) + ./ci/install_circle.sh JOB="3.5_ASCII" LOCALE_OVERRIDE="C" ;; + esac + - ./ci/show_circle.sh + + +test: + override: + - case $CIRCLE_NODE_INDEX in 0) ./ci/run_circle.sh --skip-slow --skip-network ;; 1) ./ci/run_circle.sh --only-slow --skip-network ;; 2) ./ci/run_circle.sh --skip-slow --skip-network ;; 3) ./ci/run_circle.sh --skip-slow --skip-network ;; esac: + parallel: true diff --git a/codecov.yml b/codecov.yml index 45a6040c6a50d..512bc2e82a736 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,9 +1,13 @@ +codecov: + branch: master + coverage: status: project: default: + enabled: no target: '82' patch: default: + enabled: no target: '50' - branches: null diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 2aee11772896f..86bed996c8aab 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -1,9 +1,9 @@ package: name: pandas - version: {{ GIT_DESCRIBE_TAG|replace("v","") }} + version: {{ environ.get('GIT_DESCRIBE_TAG','').replace('v', '', 1) }} build: - number: {{ GIT_DESCRIBE_NUMBER|int }} + number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }} {% if GIT_DESCRIBE_NUMBER|int == 0 %}string: np{{ CONDA_NPY }}py{{ CONDA_PY }}_0 {% else %}string: np{{ CONDA_NPY }}py{{ CONDA_PY }}_{{ GIT_BUILD_STR }}{% endif %} @@ -14,15 +14,15 @@ requirements: build: - python - cython - - numpy x.x - - setuptools + - numpy 1.11.* + - setuptools >=3.3 + - python-dateutil >=2.5.0 - pytz - - python-dateutil run: - python - - numpy x.x - - python-dateutil + - numpy >=1.11.* + - python-dateutil >=2.5.0 - pytz test: diff --git a/doc/README.rst b/doc/README.rst index a3733846d9ed1..efa21fdd3a2d9 100644 --- a/doc/README.rst +++ b/doc/README.rst @@ -3,9 +3,11 @@ Contributing to the documentation ================================= -If you're not the developer type, contributing to the documentation is still -of huge value. You don't even have to be an expert on -*pandas* to do so! Something as simple as rewriting small passages for clarity +Whether you are someone who loves writing, teaching, or development, +contributing to the documentation is a huge value. If you don't see yourself +as a developer type, please don't stress and know that we want you to +contribute. You don't even have to be an expert on *pandas* to do so! +Something as simple as rewriting small passages for clarity as you reference the docs is a simple but effective way to contribute. The next person to read that passage will be in your debt! @@ -81,7 +83,9 @@ have ``sphinx`` and ``ipython`` installed. `numpydoc `_ is used to parse the docstrings that follow the Numpy Docstring Standard (see above), but you don't need to install this because a local copy of ``numpydoc`` is included in the pandas source -code. +code. `nbsphinx `_ is used to convert +Jupyter notebooks. You will need to install it if you intend to modify any of +the notebooks included in the documentation. Furthermore, it is recommended to have all `optional dependencies `_ @@ -156,8 +160,8 @@ Where to start? There are a number of issues listed under `Docs `_ -and `Good as first PR -`_ +and `good first issue +`_ where you could start out. Or maybe you have an idea of your own, by using pandas, looking for something diff --git a/doc/_templates/api_redirect.html b/doc/_templates/api_redirect.html index 24bdd8363830f..c04a8b58ce544 100644 --- a/doc/_templates/api_redirect.html +++ b/doc/_templates/api_redirect.html @@ -1,15 +1,10 @@ -{% set pgn = pagename.split('.') -%} -{% if pgn[-2][0].isupper() -%} - {% set redirect = ["pandas", pgn[-2], pgn[-1], 'html']|join('.') -%} -{% else -%} - {% set redirect = ["pandas", pgn[-1], 'html']|join('.') -%} -{% endif -%} +{% set redirect = redirects[pagename.split("/")[-1]] %} - + This API page has moved -

This API page has moved here.

+

This API page has moved here.

- \ No newline at end of file + diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet.pdf index d504926d225809d21b3f900c7956a2f9cea421cc..0492805a1408b1cd5b398eaad3bb0df2852c4232 100644 GIT binary patch literal 175124 zcmY&Xo(ZsfG+qR7fzdY~xb$-?AUDefH*REc*)#M5y zVswo3%rNAGNB2h;B@cgQ2Zv!;h!}|MjI3aIc!++>nA(~zlq zHipioBBsW6CZCg8EF#qu04$@x>_59K9izy*OVnDz|NZen>-xr^Lmx%8J z3;8wg!ai2dOMBkFTx=tsWxV`37v0<6|8|~xHok8@NA!L5uX;Xx^+Ue9zf)_vj=TUn zD<{u*Ju7+dpJlLBBltV-1HT0F-h|z_mb|%ERNZ*+k5Ku~yEj1payH7#ZY{nWyLp#) zn0=RYo;@xS%q&%N;^mc}dv6oQg_UIHK63K1P9@Y>PP8yV*qh7lNGgA-Fymh}7Z&?) zfq5ghKF+0b1OidBv7Rg_yKr5Bk^#PPUMjk+0YJ+{3(^OT%{La(4RdVA zY2mZkr1m8NvAY|egdw@;7tX8Sd7qZw6|H3YUl^Xtx?X&X_3?lW71^Cj1o@Hj^Eg1y z_f=Z`usr0~O5WS~XyQC0t)2~6wtUrrve3`Ul}{I%KJOOs-SD>% zu9W>`2s1HMCgT`Yyt?P}@_?>ocdlnnl$TWKKo@O=HbAZIc}(#wt*l&ANoz51BPx|I zo2)b6yuk$g(R)@|bZ1ua+!wMDMxKjES1)dM-UO5vuDSDKpT{Yy{)7P>!0P3Cj&yuJ zP^Txp-}EpF;3@sbWceL{+K*ososp{fRpnK^lAGMi&mZ_;64S%MH}Ea82q{drjf1vB||!0T4F4(?7AQA$>kbixtfKLt0;TCy#%vR z8|O*$OwXW%P-{&1q;d(-u>Q@Rr9S;nzPn~k%5nRG_k_28H_>V9-!&ILj_XSPmP2(G z>ST+@`LkUdJj>~!f_By|)LvOO`PxWUr!la&-zC?c6DNW-?w*Alo=N(;lf#guif&8I zJ2Tv^B*oy*U5??6)YIbFr%$cx-B)`)BXDV~y5ihHuXkSbyQCrAfJJ9wVhoss>XI%l z{XZUKqn2t_q3qE?G7UBm>psh&Hjvkj7l}L0f`dUB^X)xcg-xbVNp#$TEoI_;a*2gq zkp%1c#;$07#d3@UvfzUVlM5Xg<_rU-j&C+Fh|_SN6qaV)N8} zc<;@BGi^W%)>pMV73ggZbc@DH6K;p*IXLIUU6UTG+z?`89bRKmS{zAS65jA9KA9!| zZaUigEc8uA9e0Vec!x1bI{j=kdECI z^KVL=mc!Vu)tJFS#clx7EY-zm+S!xT>7?Y5E}tcs??rc{%mR1tG)Pfv1he$2hZb=& z^d1dj1u7`9-TIa=I+B0g9AjgdHYGoHQoLyiu0j(x zx8`F8ujUfAp?>h-L77-g&~JO@;%0nvl+OztQr^PEMSytCg0g!4($c8QQU>n2R`*r| z=Pbhc_(pt(d}$7(3f~1J50>8RU!j*&76=1P+cWB$}>loJaunc z`9#(;dbq5cAO6i6$@^cA#7@lrW_DI=<7!O@EirK8b-~9wYW16tWo_jk(C_hR^#Z<` zpU|4g(^RL&>Zb_`!EfQstc@nt-8u6_f{4$@gt50A^P4FELWz>eBDATg=2Qo;0eTW* zhudPhei85Avi{*_f#LwGB!T%gi!5C^gpJbzQmAO4C7opi zVD~8*vz;Z)i9KBVwCZZ)AqLTyb!#}uQS{Q%J>jVcu#L99Wf=ApyBwsKAh_z7e)eCJ zxi<5a6}8hCzOf>?O=7O&jx?EzwGDSvm}Wi-1htrsqUSBKVbvpdZUKxW$Ow#4R8mtTlrZ_N|?U|^z5t^mD;;UqG3X^ z8n9~WMoIP-AP=-e-K>!di97y*aW^&_Y*poYVzoB6!VQ&3fw^1< zO)7c!r-vxkqH92Vl7mdt%?$scqm7GE-OwP)QnWITm5Ko4Eewh!h9(4mU&|+tJo8P} zz7E!M8I0E8B`zdyu(PV3!B2HSYkJ)a9uD2q&s$9kyTzxUWuw9>qg%B?)L`y6Th8s% z*0GU_tQB($xIOcNXG7_PrW~E*L_W2F;jF7w=G=)k zGN;>FckfEGXU`ydsQ0Z8NTCTOvL1xMAI|skGy>D;#>oUVa~v$MeP=Mvn@>Ryo8%WP zweJE=k@qV@lqnNl#lIk_&g1KXDW~#;e4%E?`1XD6i|W8UrE;~D_2EuKY!YI#Q|W5Z z$EnlZ<4cE|z0tyi;*(v^k%Y=SW3}O$L==M6ijjuMST=SPMs+3|1ixtpQTgq9aWB}u zdqC(Kmtj;nzJRDo!!Wh3Xh|59_9E%|=X0vD6;6_^Sf^P7Svrj^{wg`(di`m0Q(Wcl zt~n!(?Hs?p4v3>tI@Ad5;@1ZsZ<=eILOzIGkPt?7j4nX6EK$Jf6V#evmJ_50 zE|dGh3Y5espYezS(t4mVl?P!hgk%2Rh4ys@501uo?lk_(p5u(tNIat5;hZ}2eb}O2k+}BUYG1ppE`4#6IncA zwh-(Qq=Mp&izH*Hxl^N&S=!h`Fr3}mX5?5{JmH4cbO=MZ@X%{;NAd9od|}+yN!Y{I z%Cc4Hx`2UBf!Kk}>VW{-*w4#wyfgra9bVP!-{=T@@qZfK3xSwWBgUb^S+>D_D+I(> zY_a)k85{%;ApbHo7M@~>3ozr$>Ws<>MeS;6wyW6f;qn0}(7}uRBFM?}8{DDk6UsiN z_}Yn{K{B5_BMN?%#?F)UQL)g6)OV?c^trVn&8vi-;akop5I8rtdU9a?7lPAPTV(oi}!c+nQQ=8b-5`7wlDz^ zptg z$`v*U_%PvRR8*Hp<8|uPBJ@qa-=c5iFF8Q*hn~&`qS8j+ z;jX|5eryq`UPKn;6@h5mad0)2MC36PSiCqXow9V6!6QPJz5wfE-wPzHLJCb) zJCG%f8GwY`NKlg~YI$ZqP`zS!PUWBh8#Yiin1&5eHEv8U(YQiOMDBEtw>aRr;!#n@Mx z+9}_XIh$C`^-A2kq;%~kIc_zSqd-ME(1?&sHn^>VuixUIfz8a}+)-e+^q3^HAQP%~cRHm^ET}_IjE2jVKEc zVXza;+FH%|Jvj!99dj5pyE!{66KN&{Gp0l%X@}Q!yNSuAkZA=53#JcpFP}_Jckj(m z1FN4-z`ez#X?KY{e(AWjS3rO$Sq(}o8oqPAdu9Y$Yq;HCdJ#QFkOh0D$NqqjQcXEm%sZBPU zon2jAyDX{wRZo^{;D*~P>BVc^%|1!&*P`J=Kk0TP?l1z9d|^)Ac_ewQ{6^%OCN$u-~|=VtcB-xQwYKFsm{rCtlTL*_1ae z|Ch>8@U~?rE2g#s=hj^PEbjDpUVn;a1MTdDWq?oM@RFUQnEn3RYx-^)%U_#)rV+8= zYGkE_mYCg8JfzKG&WZs>U%%EtesXE#k06QQcrZoIDPewiHH%QgKaR4d5qAp0J-cG7PZm)wY+5b*u$1maUWV)7|gkpzE20HjmrbWGbmjwhx3hBShJ7Gs+4Ibc8Z~xGrUW@Obw^aR8@uA z4W5(Obo59>z#-;g9>X2oK#b$fhvcPOY8@U%YQ$q}4LvA#Un8zx=xy{lY;%|}LqF?H zaSsQ!$=?qMWa$+}_6-XG8`@Pix5>vx)qBm*z`O@l!XhdlLN@H{?{^!6kQx`oNGqr; zA{zE&Y9k~2cPN3Cs<4M-%-ad>n6JrNe)31fr@tTxHx~5?h=ogYewxR5q_CB?k~0bTvT{k2?LgF1hs zCfJgTom*VFlW{Y*t$9NaC6a9ZU+~zpV_I0mmQAW83fmtRd@mpv?CP@FG`Qs9^&%=1 zahfMwEwmoV88p#-Ufk?}Rk2}SI#bO;2D1Y>S4;{r+;vn3PVs1cN7-R2f1DR}sBxl9 zXk2}#A0NwnFkK3wrcu;s7aY42)ug@xevMDWR6@lfbaI`fUN*_@@{9glPDSL7c?a8)WMJjajwG$OnNC<6~ih_P<$fFML_HFmxci$8v}r5=?u-JmJ~z#ts47&UJ`26?)MUI@$`$MU!@s(ngjaAe2i1xJ$_f@K)O`u!1|A2#uB6usljB#vi=4~z`&j|s2qAZ`SYG+ zOpKBux%U}FDwM}g_(O0}b;&lWl~w*zw79NRMM3^il%C^;3NH|nA0B$2;eCf5PngW< zk++m(z|4jWNs%nfB-8PI9Hq!Xf)?_7gOY=Hxug&WysQe#q3lKs>rQHeZT@|$3Sw{& zvu9p z=!JrztJJHiT8vG^r^_3Vx?e_*90aQdDG4NA*Zh^Q!zA;7#`KoON5*3RHtp$bnqe#t zxojumlC+H7rRa-o=@*5DKQs#p*_-|g5tTW@JlN30hJveV;udMsbRU_lK#GT!V(%~0 zXqLm$fD{h?tQ0g{nKs@ z*JyK-#_V;f=kwo&-+Rw@-uH|CPssM)Hx2aLq#2GGQRByJiyxWhS6pT5JP0UmW3Pgc z2hSYMKxbU=sg`!8g3W+QSU zPqMzFevu-z6Gf?9{8wf!O2Lau*fHmzh-OKXzzdNZ4kYRTVEAT)z5MmA)OGuA=-wnVNON?^hY zCYmuPX>b+-N(~u)IPowjs_3B$rb>PBgMvU*Hm(sW0UzGd0@!8Ef;pg6L{DgHU>X`d zP)-760ExlIz4nwUBK@#of0AhJFH8v2WSqeEP$frXWtdxxy{#}>w=W|hoR&GXV!eLMqYq$3Ag*{PPz7gj zA#1?Yl4=4{hWR82XP09BQf6sWP7K^M8SKFf(em4CWS3+U5aEk$6pW&B?Y;_SjKvY99^Dq1fSWkPK)T`UYZ?KTho--J`<2uMlU5 z|G>I!FsWW5?=xybNc^zK(%<=V72`gF62OY=U;6t0 z#>3>Oxlt7#fAp*E5tMA%^%oh5gS5RIAF6c+0<^*yT}|PEd|U+kwgqBIABxuD$>7gZ z23i=N6bAQM!k#gKLfPBgD820P&75?KqTEMB@n5T z5b;6kgK_**nfy@v{4wO4aYKM4ILS1XnF>UUq+=omB0kvy?CffeI3gX9&s%Xi*GT|0=;1XWaFI#K9SW)#1FY8qJu; z4F=*`K5(nPc=F;azueGvDYWdkg^i%oNQt%*j9QfbngojP1Leh$H1u(^sfBi`MAT*( zco7p@vx|i>IS&_+aZ(tQbdyBhpxC6BB7eR$s_~JIQ7fzx&{`mSJnxyKFi4Ue1q}mx zrUpS2YwOF_*GlliA~o6zz#FVEJmi?uVqxh}(SqdfuSZDjLCkwnyS`6Aq7X>ksZVH( z7JjP-fv>18#12u$Nm2(zKE+xR40~#dwGaMjs-zj(QrfGb4IreolgyoiYxTzj zE&9aF^A8kPUj*3&n_6cOod9;h>EDMx(Nk43^xAE73dcOHW4Ll>wtpP?ni8451FTbt zHQ35(mcASlG_l=7`_Y!jk|EKS-BHnynIKG7lx49G_L`$yRx&g>U>1~^(U#R{?`sMe z@L7co0WE@tI98$k>$XAtj|*@iOZ`lm@f!fU^iPb;n$^My}6mT@KlDk{9jTBZ{DbZHmUZ7H93>&5RQy>1(Fp)5h<{j0tidRx?(SIkP-VcNdbbi62QTd zm92!AQ!K&Pq)BWk=5T?Nm8@{*nTiMHu9DY78->b%#;KaX&p`@X7HB&bl7bDyZggyj zvXW*&Cx2&}s~TwLi6)u^6QfvnBBONBk>H|IR@m2=Ky@J}LTMAgvS|+L&MvD=2Ah{1 zamZjf;4M4jBUJi?DTfey+M9b^#EcdTLi|g#*oaZ!XsXwqJxgKL6K5PTzRxnQ`x|aN z7m9nXk+NnVXY_oyW-?g@hCD8<)E6qvI)!?4K8QYgfs(FPWUIrlUr^w6uo@nS zRdpVXp*_d*0y)Job6z){p2|7bX343U-;h~a(R@xaz#$C}6Xw)~j~kxxfta=5N1r%P zMMA=Hs&nWF{5Wm}Z3@K&_Z z5aoy@5eFYIMCj8sN65nm9>e#`Ss?0MJs=7p4;#TO*6tg;4|mS`);tyW+t`|!Uu5d0{fxKU30n%&z0qV*>%S1#EK27m+9mg*ENS1 zt!^4`q!n3OebtOCR%6<;*lGZkCvT5mP`aPB-ye%DcN+7t-Ik-gFV=p=eX_Qq9Z3{uDHP_+YcA4 z5Z}H3)`zp}nzLF(PdBa`#wET(7o6bD3ul9!Isa=RD@j?qd=Y*eZnI+c%NT$RZKvhB z5_)-e9CKK1Kq6Sn^VDgLJH@N&!;@nz$$J=~2XlUx4wIT?Ottn~jL4e2`c|ZwLaLhw zUWha5)l1coR%j`KCOSW)+(*Meh|l42E|;2prUQm2Y;x^n4m?I#~CAgiyNtj8vTLGhbIx)p;TlfIxe9WCli2qZByRKvmP8c*0u@J zgd97xL2VBwl8d6C7;U6-p8qKCzH1Yh{N!g92a9b1s!#rhf`B7pqz)RxKhg*-o??0& zNb*D-q|DiOFN8fe2s`Q@DxD>Pr5E9ke@;VZU+wrw8gY6bm(>T3gP)3lmzl_R9Zib8UUrU=k9sA7fps72-0X8RJ= z>aJ#zM^K4Iq~r)PjxEg>%L55TqFAeql~-yKP=N!iQ96lJgX`78fge6X9mK4>(-Grc z26xN2S9A-rju#E8!q0rlIKIMkc0F+>@gxd{ULydqWSyXLXJAZGdsIm{$v^N! zyosW@El|nWDUyB_XD)z$CYXQ&cNtOW(Apc)$X1gt7LS=g@Ur&EB=`!28=M7NGFw0p~mBH*nmvY{_Vjs8J{;E zdx=UCGGdcHks(L}P|3_BELgaRCfx`Tw24f1ygF%%M+G9yaGqKI&2XFkx+aSlvMdOZ z|1Ecr*oqW8504&>REa7P#;Vj|yOlpz`xSVaNPFxy%1$BH<2onVd6)Eb&_HS!-l1utH{4HtnsMp>gjoJ;2#6WNlIE&5xPXAlJ+O=-@2 z1R0X*xPU?%J_#TwbC3*_W{4~-eHnBJ^a%I_L;*Dwq11UUSaA)^<6kmUrS*!w3W5n0 zSQKJp7W_#Iyd^oGD@PANVd%lwFUK9$`roitZtmda+dTk8D9>p4C8E7b$uN*BEx>D=(3UVbkWK%a)ozm&W`8Bm52Bp>qIUxZWxh9z*Jc+h7h{3Xd^Sm2KgI zq8kELXsRX4bM2T#CXMBGIA_rWZ5$QiL@y4Z5@f|Y@c9NXy+N6kN|SYy4&YW(L^{Y;r|^7 zWA*`^nNx#5aU&7K5wZ;xZmMOqXx2g@Wk$^0;?x6)r~?=7IVK~dBk@(FKf8dGcOaCV3ncI+`@uEtW-*XJ23mg9zduZ55;)p0b60Y*-oi)O36!rx z@)if%hX!(35#5>-vB@dmBh6vEW+yxbb|rSd*72gehXHfyi^r@iB3L;J8ueq7Cm0Fw625&I(JcGnv9to~?I0@p z5)G{}oPUWejd+glZ-dh3l1;H9?!4LZECZQyHZ-%%L41RtgLS#2EIhwgf70NH8}%ZE zsCSIw5i*H1&*DOr6y_{>Ibow8Iy;z4GQ{#E^PMD9%vu%dSFkFO7EAiqBpc-H;A^Lg5r9YR=}-gCCyO@E1!OgYgT z*AKC1MrOD0UF@p7`E3G$wr(mIb1^j`{>{(4Y6juzURAJjsPWq=WDDkZTQ0WqE4#hy zPjrLXY*rFrR1U@$hK+YqaV-p0Pj|R}M>m*1|(g1_hYL~ZV_4k^8;M>KZyd0ex;-Z&K3hwYhWeBk4O{u%lu=3&rs$0Gk) zOxY33OWaq9Gc~6EP)D1MA-AF{=uA(GE{cTBJ#zPjhZGWCEP^ogpNQ_;_fKO7vb%=i zMAzH~^WcpVW2f<(fWcy?t>Rs=lO;H_sLEJ~`=O4lt8~!R>RihDIlP=pZVAh^(>`k9u!-bY>ho#V*YqD)jgKf&vGkEwkb7qmvCY)rj?-n3JL+_H#Z=lFAPd!%l=a;6WuRUc`=-cM-v6QjQv@`3PLk{9Z#m)aEO$r zVamb~t>!8Enj1o220rW!Pcxx5SHG(A3`_#Jeds}6^0=5Ns4Jac{I~Qf>$*ng z*|xs?Sik$*uofk^w?B&#gCAxu2_Lq;<2u~#!T?Yoh;ImCn;; zXI7ae{Yo`8qshA@_`UqJl)GRgJeZwiYk&SCov5|=6qeK#|1Alj{Tl^v=qJ;>N%Xw` zIw;q^d#z?-t>e%H_a!O%w$#arS|fd1rSkW?;{7}fQ6KsA@#!;*&=)=NsaffmqI9Lz zft>ZH5NO&0^{ONzX_o{AHt7_n%e~+6UL(LnDwGb@cakaI>x=R+WwSHw<(S|V?wVLO zFF99%wdvl<8f)w@c>i4(D+k%iZdz$XIHO)g&ELHqJ51C!dK4~X#v3uMP!FQ`iPQC- zZvNCt@{-s)UNxDF5d_i$4fOBgZuO0IE;8BI+(yx$g*<4%L_kXjF7zz=yPxow!Zz}? z>oSQd8*cdxs<}Bpgy6msHD7Lxo$(y0c_CK-E5I6*F~SlIM2cInFMw9BtnCy)wLs^X zJU~Ht3x}b#59KPu)jSGzAmNeD(&UpiCBET34&BOjiG$zQa#x3-sJqKPo7m29hy|-0 zMJtgx+GH+kOfl$)>mf; zi&9(Y2GIESv%r(gV<4=sp`uu>Ez zysV$)Au*oPp{dG5s+Tdp&D@~xYC872;mB<#kHO{bx1?)AvnWCz z!H9kszY6w5bxG=Wz>80n=>54NDqe0w%NXve`FJxD$lsl03?`zGAkk;x#4c8>_d7P; z4`7uyqkSX6?syHW|BPxE+xQwEcCf|<5US=0Bo29%Pewo1L3!=5r2KWBgpM4 z8zoPq^XOU}TihyQVAcatH_2tmltv5#JdLFzst5ZjqGA+Onr@5i(t8|=-T15AVe;Ox-?YMl zB_b)js01^D$eb#OZLQ9##M0lFs1<4oPzqxu)-024_*MbJ6B<$s(isY3XKXLE%2Aid zN-8l`wJQ;sp^RUQtr?Ot{znE_BLP#^x7kh1)VR;nLsBfI{^bmJ@s4P`%bZk>N@0I! zU|61$#uFI&CpoKBx2V_Q%n{%DF zF)b+0Ku^M&mCx+b)>z)ji2tD|yw8aBte$jUbOLGc{+n}1s$#;g&%eM}I#z1)_aJwI z8UH1_m&eV$3*$+O5T_h5lx^uJP6!n_KIZtZFuI^4VwTJyaIM^Yid_pWTt4TF7z5CO45N=K z5z+~~V-F_Eln`ASs^mRZTYK^02VS|F${0XJ6`Q6Kp8*N_33rla)JPZm?_VfX z3=9!HIPxb+5tf+F@IzRoo#$*{o!~Kyadvw~u-knm(2uxtqwphbL$M(!ZMFlByOS7(b8MMN6G?xIpye zZ2p~8E?{C-DI@>==knH3K)uGL8*t`kn(mxa-TRo;^IJ$1ivvSDx#v3gaM@+#OlXQ5 zmjxo|G%iv4fQ09AIx-v)Owt}v-;x6D^7s@BR|W&#H!}7kG0z!sD=wDctr^FkE#tU+ zLL%6dU<0FbY_eR%=i7ac#X}uM!F-l3wD-8$d+1^P>*XNNmEY%c=we|;?}h(H{~IJt zRZ%EP9;?j1B%3v#yn{ph-JC@kxA$U&|I-w!ujXyVZ>#4kiy;uiVNl%v1y%TQ=nUu| z6dwX9hu=aE1^n*Qza;sdV1$nvvX&9Zz=q26GGwH!HOwRuk*M0aJD$$3BEQcIl|83l zK>c?INe533@P{~aepJ#1vtjZKB9rFNefznWx{2)fo~+w0F6mjY+XXB<*hGR0908I{ z{`_-*Q1=0w-ARiYWSC|0Q2N`$mlGz6&?&mUSL|$;IYk_#sP{Y(5sTWvqit`}7PN#) zIS$YRM{~ylE_oGOWB-l)rh%bb(JjQ72kmV!QVb7j@nJo@+e(KoT*pHc=h@w3Jk7i6 zobhv4yf$+19E+V3hr@+4^H1y7-G8{P2F;cjS0mk)R5OZ%v+21*UUId*?l=EV_sWdN?-q4(be?h#m=2ae6?0Ip7tNbD}AwKR3^_guc<3`JOuPOD_-m? zqnAod>LxUJjWZ3O2zt#b^>R8N`ZUkc6!}@E)r|zGB&NsaM@m1Gukn!IvC)SeVhPKX z<`n%bFH4Yiig_e?uQ}@;>rFaOC0tyIA2%lsR+;l^-FFA^iF1C)=V&=tH2Q5SJzsHP z8Z0o{OO6Of6l)Vg)ZSSc$RdStWlfr01z?KYo z>or>f?{nangDYi`Jw&~5RS7Ap-E!*5K&l~ZGW|g5&Nx%F=QNz{K-$p*o}vka${kOO zz!{JDr`j>+IVW>#jK}(T64F$I<^A;JD^P69Mr#Ra(^az&@|vwH5W66GWb`|ysIxto zYA`6rH7E8aIJ#^!Tc;wOJSG@BA~0imN)Rgnbg;ij|FY2nb`FlOLth1515_-p<<>o4 zkVZZFe;G_km(uX1=wsiZ{GUt~ahfm;Jbv?$_S+v0nNT1jX)L%vd86zD=l2`_ubMRB zU(tO|7HgcE?!_?xg3Wd9GdV|u8X>@1z9MfJK6M=lx6H=UoRQP7X2?Wg00jK7kdqfP z=y&`oj63uJ9u0XPl{`-=j>^{}7Ur4Nx-9Vf*jOl1ovtKb9-fOUwEiBs)6U*9nxgK0 zqII}@k=m%99x{NtX+%VnZ;;2dIq*WqF_=%eH%Q|MTc+h{6q!?;1)(k&ROZ_Qp)It4 zx3imVwhqSpQQ?%I2%)ZDi+vEy3@z}g1@lZHSVEWNR;gUxQo(N%K7{$L6-tpjA;nM( z%w0gPF1M*~`EuYCoOSYsZCQn7RoS z@+-RvH7BKUPFuHiXDbl0qvlg0xbHojOO%$9b$&`MNRhJFd%a8bE1XG$v=*8lJm?;* zJgtQm6jxs8a)n*JZzN>3Et22{ZVy)rx?s_PCSBNL_JK*;Z3~d!Lve0-q>jF;*37fp zRc3I$L@wu7(7pS8b<~8sYks}@VvUO6`jzG70RZE{UUErU7Mj31d_$nWy~ohp2H;t& z2TPZ(*m>ZQN|idb1e3qk$&N0^(uwYR6{tQ$J3if`4&7!AOoXxbq=4YN-SBhq`Md^& z4M6_BxyX`lqmWHHA-fM&z!$ZY2~upOIF^@k<@IXGqiT5pvF@+n^L-`rZnb-t*NX_M z7@RW->MNPHisH7bn_n3o_WRU=p;|h0(rtc*epp{m>KZhQPJtK8)}FL@eSrO#J}9du zIs78!2G9#Z5-wNNpQAQ+{c200;!j|hCOkY8$$K)|YdHW?+LbQ^XVKf+lN$iP?MhoD zOOK|$;v+<@Tj#(p8$X>B;%m%WM$5#Vs4WQL8ST)R>+dICBsUYp)!Q0-lPD7{oU!FB zb@q}$uZE{C#2;%Q?OYvuK*aDfM8@tC%lnWJX2dAxOkM<{#==*pnyESbu%zG`*a}J_ zbJ#S}O#RD4CPC%gK&l@JN{lFGfr<_4K8s-Tksv6b2WG*LfmA?lsz==fqaEENeM|=| z45`N-M|TKej7OFUBi!fE9BfESv*{{QZ5#d2h zDI5y!Fl@E--x4qMW3KsV9$H}JgtS~bBUle{Jin{PRx(w5XP?vE#tMoN_q36Micghk zPV-@?BiAM7dCDAla(GXr=*dY>uy+Kl7nH(sRNZLn1narUjnS|pZ;JU6wl@KQNHj*OZ!n=LCn{hH$t_7uN10${qoa#!O*=#DkqF2@ZWH`dR=b9L@6 zj=s(#Jud^5vdrf+B?weM)HS;ol*O9#Rt<&zvD~3O6I+6Qua?C{Al^y5C*bjp6VFJ3 z0rzO8vNO|KbRs$=^(j59Nhainu0Yrs!#!kSC##iSZh20mL3}lhGg7SOghd%F3jk`0S|b1I1M#3W8kLUJ1k2^}J zhi-$w7J%wwPhn|Mj|%qAwT8X_a#YDsoGrChaTwy&|sGmw9!p4b_offn7d)KNFz%Tkd zB95_ru^3>1ULW-QOpROb=2w3U@-c`m z2i~lm&#ka`Y)L7vUEan!Rz@5ufu38uTThs^o+1+MpA$WDF%`+OoK-9RMtmqpC=pj$ z_o#wVLHe#Ht70VuGW5ZAbA(PX5@!^dp9!1!iq%^`(97QEgo3l}uP9^aVyCX9z*hD= zRXtN@6D=AU7L82g6P!N4p?7KO#bX+F3S1UXlGNZ4d_3!N zBE4zgySNc^9u6b;d6@n+7kH8%_wlFN257mv^7U7jD9VNYl+(o;Gh}0NC+748HFE4* zEf1%uX$_6NowA5ypU!_ww%4CEc?E9}UPfjZyEbUm;i6D0D@3MR2S@T0D6A+Q|uwX-&V;y))L(n8XxG~`cI>SWwV(3dGZZ0$F*z^F(j6TjlrZ7bNSDnb@a{ka1u|7T*2n0w>YaE6=~DBr3QVXGRnC~cxhc_7-y@nq;JJ( z-DK|8lnB!v!vW#h-v*+{S$ouIcWC&!{P}<0Y>MZ-V?dbG>0BdO*z&HF#PPGHT+0qj znx*m43x1GsAQeF=gXA$5ii8k^g?qvc&}o`p`V)G zA6X5$=cUFCKa*uoy8buh2gx8|T5A}33gOe%olE1SFw#^Wjlw$;FD_b(P7fmpZmF#* zq3Yqt14dxZaLW)cO!lm+Q1SZXI;w~Nr0(^W83Wk+1-BGPSnf2=d3p{1yRtk+a00{% z{rtrvxI-{Bu5K(PtN6Zn&l{eil~t_`Kcd*usopn4#Mc}=kZBtN$|h_p7X2;vSL@0O^xok@a_aTx&PlXN2b5~izZ$ah}zjMl>AU`Oh4H|$Z`PwU9jWXi?F z*S1QXugR|a%Nza|zbX?n-Qw`z>g31v{&x*mLMI9f&D-97pDi4S_Z~nt=j6t<6J3gz zIeTnf*2>VDJ)5+MD*WUwj%2cor!BLP22+iaonXz5C^MctE=X97z&8IKxg@C#5p9AtyyEp47#LZKF8$KlQ4F|A%1a{-1v5 z=_Ki3{-;gi^?ADgIy3(3{82BX;9_r}X$ELSp~~^CY>qQQ0%YPOGJ%@-@t`@$C znrWm{?a5FxMY21P1;gD^S#)=gy=1$Wx_n!wwA9lqguh>EErl=u+T_-IqD$JD_tg$Mesj_k1>>=ojLT$~ zg3+61%MR;S9G#PU-OAzuF>omvKQ~Sa3ojQZ`DfCkmM76bWku2niR!|rlV}Ud#CZUa z#aD;5lMb)9Qkk$c$yTKC&@NBCw%rM}C6q$Lc1iLgxQ3l5Ch!rM*li zjQnoa5a^Pn^f^<#e!sw-?wJvxGSgr82%1aT#I z1Aiq8ENgY|^Rs-^TQO)zXcGE+S9}PuO8cv8=x7#`M|av9N1o@D>xpR8 zB4lJ>P|AsRujQ;CNP+ilfwad$+%UiwZWGo2+663IBy)^aFi-a(Ur9nWRcDl6={;05 zJ?m-RjiF&U5INRDrhb|88bGJnIt3k6&Qve8PwcYqjj&o1rxrSib@m}qEHHt;n^d2l6Jh1PK)Y{ zAA9K2U%t>|5xo*ac3X$KB`gu!>0nk#`C+ z?;4geZ|9IS@8FQM?qHFmxS9j7yN|EXFh)SpSguw4h8kF?RIYp$nx8?y6^P=Sjhz)8 zAcc~GBrJ8UVGbRlPg##TP)$9blgmo?e81e>N7Gpffswz5hwJ;Go~o-Xy9l{nY^$-PcW;tvHl!E zoo{nvIZod;Cks~aFSP;|ZFyCCn(!H>#!?%8dy)yV!^(H+O36#~W(AT~G1|2Hvv|n6 z7H9@~;rm%!HvcNhRg&1ERT1GbI4(s^P^_h$O@!0o@_f8YZNaOB!+YQD>lwiF9_FEW z&P3ckBdlzSP(}vW+D;-yNa!X0k!}i~S!-Z>RVVTEaORem^`VuG16Ck48n`72^Z>1^ z66u3i3##9yluc{Z#Z83byJ-gnjn}`(LHrW}{{s7lpHom8B*`3@VMlTA+{GvgY4hkS zTVNe7L9*{D@?%I+NhL_~@>zG?^zNI7Qtue{X5WI8A7&m<{G_0GMW7VH9O?5;k-gGn zw6SmnLGm@Mtl-sbO(L>*^YO=WyA_&5h5OP(vv4Hv7f>pYkn<>uYrH8%o$eIs()H0 z!HDbKB|*)TS@*nldJP^Di?uM7tS38@H#ZaqgXNw|$G;YmEFw4Hhrtxm?fFVn8po`` zUPfYcGG3XY;tP|2HD{T5A{$uRZg*0-whm8lT|EfEz{k|=xtm;7AoNskRqjd2BM(X{ zT3FrcC|E*J9i3%XbP8f;6LKdnb15}RmlgP+46%&C5C%0dVrpFavM2I%rfI*hSghH} zPc_)wzrIs*iB!z83yf#&8-@tJ=$iMXN}h5H(ctQSd9f~p0M6IdbY{Jo>ixW1fU~!% znWO+(w0cUS*v%!}_R_Zp5eJdpVDC6ON`ibhV8N$&?ZIJ*H zP|4>B;7mv?do?nTuZHAq_6ahf6F`F#WfQwcY=HU z#5D!yQlov4ht?0N>5=k8UuBiOyaDW+QFbk&mT=<>VWNECP)r&|2|n(%dWybRrylTz zhEvpRF_E7l)j|jo_xL7)OJ#&#heXj0K%Apyxc9|s;oujFyRrT7NEFox*26#97CyGE z9MbeiL*}*GJu#;+Wek5JfCWgUh;vSLCM2lOAD1Io5Z zr3jefQ*?8yq9xnwN3>h4ikP39zx?i$D47R z9!tLplg{wiJ;hK+QbXs0V#zV3ib|J-v5w3UwW$PGr&%)N?1Maw6eM@8im?MKpuTMyClSdDaBS~HA<}9T+;~; z13#77&+gQKUREwOK3o-d`Z7#^dOQ!$xw9WikMp>;!*g^k(|%&v{e!fI#4jmwOOp=| zOITUAzrYyaT{{x;<)Y!}z-HI;@}=dr#c3Ay8T7@E;d@XESgLGVi9>Z40*KKT0;4PD zvylhyL}yRBl=(Hw*-pUs@M~^!ZTMROMdzI%)WRGGLxgKkR5UTwWTTtE|3lT5g5Q z9q(&~ohwJw`jDu;p!fpf`M_!CAgoJwyv@{240HpM4dydy0vH$>Bl*&IF^ZNq)O?Ul zq>q7Zw&k3rJ_k%6H2$-}M`Bfy_y%Yc&6&cBjx~hPhu#)L`6}5QpWcfQ6)X+M)Wo!C zMZTX=*Ci+c7NE!r7+R0w&qP_lg%-hk-vF6HAVK0qK+gx|e!{E#@C?i))F=4nPmtTa zJsOr7zlC)-+MoGT-{MttK0G_0LLac`?&@L*a!~aee#Z9IA$QqTe+n%aT1px{JID|8 zngSy{Lp;-;pI1UZYZ|C8rJy{`QYXtw7ORia(>E~7(brS4owp}L9CBh@6Q{#8vYmG& zOSvD9cM0gzD_jj9a1Tnb_K+{s3>IxL&WeTf5ULi&+~SAxkAG)v=Dn>&s;%ns^sVPB z57)Ub$^3l%Hz?%#wkp>6mC{NG|5RcQ>@u}5UbBZt3kyLA3!nhM^Oj$^Hfkx zU4Q(}>GS78Ie7_^oexsarj|7g@%#y5IVFo~5?-;M*X$3_B_J*De`3B)@&rl=?h4iH!GIYtMwq0H{)VDbj)jZB{2zK*MWur24ipYD&*p zDQW8lrPV&UH^C15X|P1=HhDpZ+w#p%O_BQ|nnP8PUOEhFESy@M3vRCb1Hj&8-A z%#tqs5%AbePV%Y-*|C#MWI|ssF+Ml7Na+*mn z0C@d|cj2>#P*b+pvYek==EzE$Kf+v`Yqsh%RSH56k~9Mjld>NcAvx`RO+CyilxD=! zx)O9F_GJO?cTa+vO}#Vs%+biQ%;||_mrK<2%IV2qpYn9?$tSY}0FmK)BY6#Y1XJ!f zRbBZNJDOF^>C55LhEYN~K&tAM;NPxuy&$#NfA)Pf(||{b=0vQbnPh+yDznWm?|{#% z#E!%STMEpp1QOOEZ@AZ-Y=P&PA{ip2e(tw`6pOX~l2=`S5{vs-%pIf-^I zGVN|o|FaoE2Spltf-%M@=~wGtj;*_?Bm;T^?$==m zjAT_WMlOh<2zPlc4f%92OmzJlW|8@JNPMKlfF7QxTdrujYm^I$PK8FoCbMf}v!lRw zlWj0#?%&^1(LOzBimtEQpY7mh;IJ@a*k2KP`G&*OV=&9cQ7fpQlIt*lNC`PBI^ zlImPH8Y7?`GuQAH^NevCDpwA^>W7;4yBO={jIenO3eD73k5b($`*vV-3|sF@+W zN$|&eJmETOLiTNHC<1KQZ)gpXZ<`74qL)7~wB&*>7>(^{NH=Ff;I*CBewx8K%B|RW zGnXx(T66{;&}och%!gvx6p%iR}0t#H?k>-b-JX!j(@Nh~9Q_m7_4Q3U)__TL_+&r&2aYB68Sv zXB1bpI8fX)P5YZi1xm{=w;stwM{h-H?s7BmxRJnWB9Ki`GauJI(dYtwN@_pf4#l!O4So`i#ShRkT2*R@2KaG)bIXkIT% zq-SVnCOj5WT72^wxdz>o)bq;rfZIm&JWLbOm18>BdPFEm-di!@?($j`mt}G@3eXAn z^$Z5vT|&@@7HzS2uIoZScYCIJpvQ>@z?qGMwEQfs9Mu@{?Y+f8sydb*+Qo8$-U5oO zc#$S7IJ>6$F(&NP4UF!~Y2-Z%Vc!!WS!NtUubD&4LrA~(*5Z1MnGu>As#sHV?vrkg z<%{sfl#5{)x?DfApO+r(4I^|C-iUoTEG&$`0;sjcZ*0%~_#gq(HIDlQUT>DVWL>Fc zw9?HI_cnG;&LQf|x3n zMu?NcVKPQ)&}eo1W$FEPg|X}^rkU4SMnt@uKMG~O!esC~F@vm+DF$!BIyS7L93fDR z&tPv9bwxn9SHl3|Tj*fO@kOS5;Onc@$1-%Xkt9DUyIOrX5+YWQQs^4`J|l(Vj9r&D z3A`b=i!uY{Z0sc#W{(}@b_O`ep7MyGdf5hYCAX<@%hxBGX{OtF((|2SzL_4j`O>?0 z2H1TQF7bzCoK8dN9s3dhbQaly?H?gkpi-_?G#n%oT6Db38Oi59U&*?Ug8jDtU=Clegs9VKmMBT`EKe5GaMKzW@J!dkpv@2lD~UA z@wu(x{^P`>K|NLl|Kr3c)+$(JU^88)ane7WSNa<+;d&|l_dUT!PCH3NH(N0XT`;1M zIYa$Z;sh;oqo|WR`EHe1bW>@Z;<>(WIoI^+6sM{DM%acy_yTrQR?*9LSVTQS zXN@q3QlXwUqD@S8zwceJAP5>;MDU|9-%Dy|BMqVl1SQIwn;$XZ7d%(VPos{c5mdfv zn11c;LJhB0kS`-5icxV5_Bdu8sIEK&5p?Y$DXy`@|uYvtztMH#L6ZqWM_6=J-bxtgwYC%sI z>7MBVs@J?6@nzVFK4rq70~H$#&~1i}Z;#d!z!-(D`Gq7NkiV1;B(va`WKaMy;wk!- zNu}tQO$uACJR9?l%;AgDMn_dvlq3=@lqQ%}l0sgLMh#XfRD(Wsr_Cx`)v;aMuy6@(|0(d`~5t))Kvo+q-6iS?%d(5 z`6YiMlo84KYnnB#QQj0Y`ArCtdwW0b7$7GFbo7Cdg7DPk_BWbhOFszv4?+!6B5adV zt?I!>j~3u$eFK~&@gL(<_MTJ6nbcZG1Qj3L-*7;$IHUE1^DpP{%1b(`^dy2XqZgyg z7s0A|D`7O0v}{C-yqxG1rW;qp3k_@o1Uf*Fc`k@KvT^~gwiI>hfxSN_9`GD#Xx0D( z6hFL}I~T6Lvt>JlSP7+HznkqxJ#&q`EEqMkx9irguIFK9zma=$c_bC;E-d!tO@_~9 zDUa0eF1y|v6C}3n*lAiad{r14)n87(e3_6w@OgSFM7s&|r#u_Qfc_9->G3&po~mP@ z31ZIE>Y>{4a13lYtWP>ITv7aJ^th#>N`$vp9Quj7Qj}}9JQDk6u^njMqbw5f{!ab& zrf;k)hx)U@joQHY>|%rG{p8}Et)Jt4rnKcF`Km6#l@Lu2ao(^>bG3+rf(a#*He{Y8 zAzUcwwR#$*dc2?8vDm8AliJY;<_<1|Nmi$)y{0 zlPcKNUe&kvTmuzD9G#RIO8!xwv8u%DY9#+L78f2G%A7E^5)MRY+bxB>juayo5)-qc zP&M(X=pU%Y9P9OU=OQO1FwJ~-@u#@Ou^`0J2e6TBuWGT?EqpN%U+ zXi4>n&*7Mq>~?v|*J~cgVK)Sv1izAwoUh^Cy8*VP71-evQb}+~`+sw!b&6eNcAt+l z2&FY&vm4jTtWgT}DwJ-&9X;-aNEG_mjaO8Y0kRkGK_D5NzaI#-xN&^n50h!HQpIKi z0(}9t7T90l_peyc^csJ!OqG5TS%#?43C9S%iPscdoj$wO?# zbU;Zgp7$$jcD1vTeGY0LPP<*JBysI3=z*!fQlg^E-_~m6_PSbH<3L!j@8!MfSf|0J zB{9e&3R%lZW<~K1Wx&z!5G$?eWerHtr8}4v-+A)& zxNH>cFkYWOfRiB^E+CDp4gaQV{=ECm^8B0H`S+Eco{f?9_v1f-|3U$+=w@q#Pa~^m z^5<-CWbKI0@E0Alype;Alf8kF13vR#5dt>Wj-SsR@PAWCfAU3385x@C@!PoKYtVj1 z(9zM-;=EhdJ) z`p&=G?oa3Y2h(4J=wCe)pN5Z5R+62M@2~p)FPVO~?Ejj{SkKbo5A*upqln>m_xvZ< z-<0hP^q+B`eOdZ*w9@eX=54qB92)pEf=13}21fEC{GV<7ztFj-|8!AQSV9e&Xk%%c zL?px~zQuHhz@O$2MFEiu5&JF+6zEUlEC?!$j6kkzsMrZ85=3Yqgbd~PLw^C{8eX=) zOHNQ|Z!O#c;-Z}CWhFdvE0i6wBo6Z*J~4zW0z_W`Acf|b zIlW2H_JHCQYWgtfKR||W6UgZU>F80jA#%zHcY1$W-WTqFYZ*#PB-zczR2#==Wy}a~ zkzG$A{v>k*$<|CF-ZaVe1T$(={3Nv5dmx*cc}$Z8phZsB89gA$whW;cLdG}`xds9N zkMCt`wTBIRVl&MEa4j&|4Fw4i(=R|+NHW_y95jBus!@2-r~4M)BefOmDmigm3mr-O z=|f`kQwWe)%_`Y6=Byn1>`ZNb-}I?tX#>|=;f-t1{YB}DM3-8`mnal+~z%?j9?Rwe`ZlI-hZ0|NSDOA|$sigy8rUmu4Lzb7! zA2!1sfou=SBj+#590h1RzF);q7>tDW3JMD(1V=}qVG}0|-yYyUb~@S2#)f(*(=t2J zt^y8~0oQ%az<*rAqvY_FM5E@xCG*e2W2u9?3Gm6X zA_Mmcgha!~@;*(H6rof2m1GOZa@~G)#B2uF@_Wt#o5W}Q@&@STkC61~g#fb%Q2%|Y zk^>uDq#%>q*OHj665;|%>#!ir0_?Eu1-n_mz?Oc*UlCmV)KC>}A zgYVOieEqqq0#(v79%l$%J)F-csT;yvN!zPiALlz4pLX#&VPp{Dvv9Tu}C;c^dOdrUr2OG)S=EK)gkZ^e=m4#1d{9G?Nac~ ztyIny-m2~93J?=Y$w|-;)=$w7(f8YC>XRdGjrEOfK$I!wJ*u20&L_a+pUHQdZmgB6 zWnVU0ro|Cq$$OcJJLIv;4n%!V{h&^pHX-?ZRMD8YCX6k+yyC2oYKl8=t(2?Wpr}UGBv-m-97!vdT7!kJ zL9;=@gDu~@N)At{B$*>y$T=84(~Xp4PQ62?-dmVungRW-FT z;rT4K9?n6}T({JxZ%|xes$uD!_k=GbFLYpBL9Rh1yNT<1pj7P|#?XXtPx4_EUCSjo9`v)P~gII z0_#23dH{^;2gjS!33>^^6Y?4P03hBa_ZKoU9$lqs(EsQH1^5-IQJH5uBl$p%-r^V>NfFT<|1>2$9c!Tm{BSFZO0xBXX(oWD+?VP zRz5R;5x{+5+_=cttv-VfvKHlhR(*P_TV*{HCppK>9VR|HK7wbWS1}VJ0V4SW-BC_+ z9J327MJ(~m-4U_(o)$nUA(XP z`vb1RLePTPRUEmSYj>y9D=I72M|=~1X&R>U zZ3?UyPMc@co%PN1&ERw=hZItVBBO+-{B7=W!MM2X_B3ljccLcPBr32`faW9N>6c6o z=tS|vH{n|0W07)^iSVcJHTSC9zRQvD#F?Fd4)XW+EW*)l$}JV%u$(B??qB2kaBmS3Bq5m#%0S-mue%2yK? zXE=IFG%h&}XgB7C;QV80cM^YCS{Q_SM1n0qBsAy zink68o@g0b71DFd3l*CtEJ}L`;_EfAcIDbx;!Pw$-x~9^+K?}ST{3tLCqQA?fl@@^ zuQ{E2%z+VsXe*CQB5@OaS!__0r3w59uAr`-RY5clo6fMZoiedltFwwiw}ck+;wj@e z0v=64O(JeAjd?7xH8(51PJ<=mIQ>r0bsh4Qy`UK&+Hz1KY?~tAe=*esNP=xttW05F z8VJ6pM;v>CxU$%m7~-aZ1>$w6ndLPBe^J+bK1yZ}^*AOhJsYX&JM<%26sX4peoh?K~&?i8o^vvjR;exu4SS}wH|ef)TGz!?;` zV2hzA3fbuAn47~NJB8^Z5s{ggSbBQ(Xdf?&lq(YjOVH$}Mt1U;(zWx2EAFUTN+1cxV>MD%D5#Mh`ZbLEu{3rVVM6gG)? zKXu!WLP8 zZw6I!cP`CxHuTq_*&(bh;uea70~V1bY?9B-=x($&x&&!)!=n*banc~9LE#`)y?*+& z=%#k19FD=@o8Yw38y2&dH>xH|c-#=-wht0O$776vH~+ZIqvM>Z4N&9ip;k zcC(A^bx}C|+MMjr9@K87!a438X!@b!`rvh8H-IVl4Rva82bU$#We@lQG2MgTDru~q z=lZGrYqhw?1M5S3HrcukPlGzk6)u)*nCZ=!@=4}{M)?@rrjOs@k9WMROi!i%hgkb9 ziGE9`Poh}1e?-N- z*;*QX+Pr8K%-oGW4K;KO%uK&6H-A}jAb;C?{#bJ4^{hXQbbkeZNBtfBzfJS!y1{gi zQ~&@jqg3*gO3HK1apsiE`>G$-JuXmqnTh-*tC3=(g-DhlqWuZ5c=&H3InLMDJn*tU zHhAv1Vt)k2ZD81Ri@B4z2}rN{Z+7*<+-{P%+In%=+D5edoQZT?U+A0_Tx7r8R}5Z# z!^>sy-iy$w9-wZkl7MUHa2IQ}6 zu(ZuRSsO51s9JaGEpx!B;)rq-EWZi9kv-v@J;ye`oZ|?}@?*+(&Qlz{j}MhIlOb4^ z7uOKili0Rak88BIkhhllj_RCD47ese7(9Ijvg7NRYR(0o03g}nU>>z;N82v`XZt@G)bRT1q#VZo73(8^K@OQTO`s$ZG$NbcN7D!wd| zx+if~&fHK;N>S*iVvNu$sF6QIYZl1C1x5GGC7Td>Ge!omz>$vKUbfmRhMTQ$AU73M zY<`y+s|@C@7_h9^cagMMkjXw@v1T86=ydoOK7{SYL79z3924x7MIq$#NWy2~fKAb7 z_UFsyD9TG1fi?jk>2{Z3*1?CIUv6OYoQ-c5xV9)&Hgf`w59Q4-%2{pStb2`IE~;CC z&X3gsooSkETYhS8Zflt=5H>kXL1@`J%X6TnrE%cg=yDjZyuz{uEwUeE`8d_m*LynQ zkF%>A5nBay^4dceFsd|uTa z$MvG7D-?-(v1x#Pl4yur&3^%!IG>iQThi*4wqm}@l%{eIx@SI?T^HVAN^+lciYGR#zWuV%9^%u`Bbt(@&FKsG; zvRJNPvG0K&W#Z3eh;L^3{+a}CkUsJRdNhfg1l-h#@O@DuTSnVDS3cxOe)&g)`y%;- zrXhz2cX}bg4m)~uj||?*SHj8&4YZ}AkDJ!nmw>O{+0kvhu6UXs5G+1#BCn&bccY$s z#<<&UoPb;^wo!CnRd{zzcI6!&F?#XrA3rWW0F^k(ApRfH?Vm*fI$C<>f214zZ=>t~ zlw5z?um7pVG0XeX{9$;@-A>%OFsvAyj)(-uA&B1cwVwu+uycS{Xe{2 zM;5rZJe*d3YOUREShU=`ngQKB|FXHaPzKFh#`+=`WrCL67<%2^rVDlXCwA3J5XL)?CIBY-fq@-x& z#LATxmczAhcD0f@B+iresN-Q4#sDSJQ|ZqI{$nBbb86!F9tc%FR3hI;`OkNr-2eYFo zTQXiS2iK8@XJ#XX$}wrmp904U3i0y}ON>??aolOCc#8B0ZqXY;dwlUiB+@185y$f z*O+$x)Sf0*rAqKY!-&x5-lzIVwKiDo(F?#AIUk>s-0-X(j?D0S{WA6)hf`O81qY10yJ-i>L;B=!g3C!6P%c#kW^6q{IS`zMFRtIHY;5^btBR(9jykw<4Pw6-l*TV;US7*``zZ3OEckAmQ5#i zg$5OHhE)c)IW)p66~?x2qa$}WT9$p;VLc3oCRuIDyeq!ox;z07F&NzFSx=P@=c0VQ zUF}lL;^L3XhHMedPAeKozVSNc@t^wvrM1lL?Kgkr0*lNKV2!iOXZf`yal1CZUn0W6@BX1Q5kaa!h8S*5n{p0D0w!%j=th|~(X1#lCUzSs{?wVD>MK(DgP zvhD&JaysUlDrA%4uLD`x)(*UnN_5 zNzX}x$YPae5MFSNLs${3cQ?(i4xV=Rx0G~m&$pHx9^4Ba>v23>9`eYCtAQD`w_kW~ zLLW7+m-s$!n_?d)GbS=TobNMNcfTTa+RuYHXY-k%pC;kdA`ONpU#5;}jQp1{OZ%b9K1xl%UYa#XtYQ$EA53c4!E3`Z`%<%{y6-4hV^#T5$+l$;t@e zT*%L=NyULv4hM*?5%PW0F~KOHBCoPYwT}0BTuBT`6#GZ9VJGX414yGDMS4|Mps2BN zyt)Sm($_5R85L|let(Q3RhWM3a3uQZF*Od8P;m?#{`cgE3yLoHCa2hRRZVGoPTA%a zZEFn0aZ$f>Y&eNublD}j%Kid0?$1Jo@!hYRq`W*&5RTjShBk#o zK3*rGquOgRu)P3s$o!kK9fRW5U1l@ANepTG_iujA(5%6Iw?pUCT6BFbMJ13Oiad@M zIFRspg!A4$`6(eB_WgQhhxFYfO&50j>RpA{L|ygf4QvZ|;B{e3CdlJOqR^*`ga;13 zU0Xt<{dK?RF*}4OAP(Ok^1%gCa}GoiR>{i>Jo}gS zjF1xolU~-5wmTnn18$P6Yawj zB!y=Kqh31hJ~tEk4lO2P@M&;4!mwl!kMTK`&+x?)m9apDLLJXEm9Z%>z3In^DV58U z4N!@?L-25u4-5P<&=ZGz9#+tcE~&B<`s$GiikZzvM}`hb0%i+arFG0=bDYV0afbd? z$%IaSx_h>>B3b`6&&|&LeMV(b^c)@wkm~+NIT8$(mvK(OB&VdK;5^F%UR4wGDB{z1 zvwVXmSkvKLrZB~DMBp}bDh1WC;h;`;*&KnLYge6O+gGzk{OK!g9%(rw=%TL@WtknAlt@{ z7tQ9FnKHi`5@zqr*t^I52kxpY6U}$CAG8(#Ck}kdqR>v^7>YWaoY(KJzOi`j!k)#Dz>}sVM*90pSp-7 zOKLXw_9obZ19c=Wju{1`mGLNG^lc}%7;SQ-^yeGv|4|{QwLCLw8OJq5F`^8ze?UeQibeoQ^pRcCZK{&IPB9 z*}~VPyD^#c39{4%QbeAp{saShglc61J3xUW9`zk-eV+qV$QZgyFf4l#>EI0?3-)^Wo998)kDt zmwM-V!)A(!9`}*+N~%1>jrKgN1OatY{a3}ffYYj;w)3xNJ@tp!M$;W0~RZPks2%#Q3Q^FA2rt6zKLB38-+Nv zZCdrIMM70}RD5IOq|~V8>FB$BO+pZCVso*em{7GD=NzQ{?wTyCmXHyn((#zh`u+}f zgS5#fw*Kfww~aL40cErEeiYOI#+arsW^r`o*>9_DhE#31hMJ3WSFt;4^OIRP|>PFDkOj@Xkyd$b!H!KuVN$W_FQ#e zW%X2&M`1+q5mN9f=!~ONAQ7Sa!!#lxs$yvB`uBe4An+A$HPLca@)tusK)NBjOky=? zi2S}nw)ZyDt&0tQZj9jPd*2N>)AI|grEa2|w_!_{lxZ)Yx^F%U3yKoLwGh;IsDaof zjvO`M%)fg{gl)WUZHN$y_NGDCX&1QTX= z@(Ql^bYTHxB(@~jgiTXgr*xyz=GwPY9JFHb@D*Dw(0kyR#DUI$FsO0!Ygw~40(bQ^ zlNk6_xvC{b(y0kZJM+(9)ztx`uHal%7U1j`xU`{_w#8BW^f6IX@tl4nvA)n`MF6W* z@glcFayIYS+xI%;njGD?KbWU)SZmB5C_}vmWl~=BNW`=l&DeP>qUw`moKMHRk9?SN z?hZ@5$JSJL&78JbNi4&93)8?I`*~fguJWiHNe=9@rv@Z!5z2bUMn#ubDR(*NWD(l( zSgVrutnGbShpqs|upU0fCxD7rs9=A%q)sn`+Mq?ZA((P_Z<^#y#VdD#2 z;z1@Y0GsI6Mj?$KY52R<&gA9ufD)K_UL;9wRfMXU<9>Qgj`i2D?F>o;>Npd9Xxma> zusdo}@3L&Z#kYEd4{?{NjiNOVH;ZMy>#td53(xUfRB#{Lih2cJFDEShV530Rc*Y)R zM=7ivm5E2G5r!+8Q5EluiaG%W&;lR&GLp=Jq*r}+7H4zZsx&nsv$!j`iyDz(upEp z32`r+zRpjj&biY)ujntWi|M>9D$@q~gpP7QPd3toU$|~y&S!fiog3khz7S+UzPvMF z<@gE?Etx1uT+JtWT?Kt>$I~w7uq4bB@a)GZcfQW)Q6-TybB<_004;oqgl`=!uP{+b z9Ig*K!`{KJKv|Tlo39VvdCySedDDR%3TxbUbk58LJ;j=NNqRaa_3gWA zuRwQ-wNC-r^R0XCGa-oh5@%XhE9(TsA{Hy7h7S&^FGy!fa^ zD74AHhU&~Oq>)BrC^hfuMpWQ6^m9m==Y7a!__2#4X6F9vmm;JO8aFMZ7SbB|#5B{o z^SB##vtuvfZF|mADPo##!!M{V0vs9OWKH6pCB`^(dw!=&%o~Hmy9oQ?GZD#Hx zJX5T`Fe_jV?fb5IXQf6`_7Jb3{simbv^xqXlbs1zPzNUXHPWyaNs_S)^<^VjXp*Z( zE#ZE^?FsYMr?|y7wy5ctum`Y-%y!Gru@z_zcw-EH5f4X;fe*ZR$?-QS#$-!TPC4a7 z?{j$4Z`2?`X`xPbcK-R4l#wl_Yn7~s5z5T&WHoA?7CG7%+U>6I`|sDOCO%iC_be0b zPjfQvH1kX{1{>obJMa*mYu}ym{bw-lE_O%x**UW6ISFn{3QN(p!wzl?-BDT!1G0xc2tibsOSYerzqE zoL~08M}RH9J3ZxmW?s@ZLWl<%RD0J`#b16r z+kSAPQkZ+Es+2r2s^GtfTg|+@Z=KuxkpEKHz%+9gEOKD`3*hT%Jt$)LJrwf9kGq>- zQ1)Y>CjDOrZY`Sgag2(0=+^6AM}7xSm~m8-qI@IsNr+Dtw{6WzuW)JeXc}0huQxh zOV=2rNz-h1cXn*s&)BwY?AW$#+qP}nwr$%s?z~^zKh@dQ9nl?KRi_fCGpXG^dm{T7 z#yLo94Rf)h1B|EkRvmML(+<}7)9DUUE6v}a;FEEIHXaQ7_}ae4;ytS;&RFzOK0-G2 z&O4o+Z-C&CyIg9D9niP9EF_~>{6A9`_$C?eke%NN$#!ql`!}&+uY&Icod9}_X)(c*mP9hXpbU zpF+m321Rjwps-uFoP4ferQmcU;lp4V1TJ@=%3s_j_<-&^R~Ys?_(y%77fR4elp|J? zaT5(5R9F|MP}pH2Lesf(Qb>#785`{oF7ZhTV;8YEJvi><*RR8|i?^I^ zpo%^5pLo)FFX|eAh@>m6cl-7ws`^YTVmgR;*TB;E<_k@7psH< zZairV-kQH|n4?(m*GqergvMJ-gF0LYi;+s1&zSs}gQkq6*aridvH7vH_3ZmPJk)VG z7tR1sLb*5uzYw>^732sZerIj0kZBh7s3glpeP$7-;_SY zHRy!Iens9i35Lc!zS22SiqU})!RWefe5uXu=NaU)8rF{gJI)EjNHsK8F$bKm>ibj0 zGHON`%PlI1MQ#XRq0x%VjY=e=cd-Q%mL(RHfb)@X!2n(P|D2RTJ+MNxz($p5l#Ffc z)9!Gy=Jg4(2WVv)GC?2!iJG|n-Dp7C@>Eb0Qdjy5vIZ`spgR6H(AS9{9(jp7qmCrr z*NWelTd-^}3@zPZp4}mPG3YK%$iqiK_g&`(c-TQL18iW|>RKY}mo-6herQ>l$!J?O z)Ixp>27zOO@^c3qnVvtLsoa5&Eb0VtLAPJ>=ourn5c!MXo4%cYu*w5BKlyKAE2HXj z=w2xU^Q2$TE9keHc`^_odT<+hyFQ@&s=s657t%WKjS7s0jf-1xa0gS4v~MUjJVp7W zd@M)69M>)ec#j&5rBu57f-z+RkObPVAAD-%~@qa^u5aA)QnP{iGIS z#vDP?DUYVJQL{HaO~cXU84UC?Q;t__EF?3N(M}zIf0yH{h>o#|2+EMkv}N$=Kw-Av zqrN4YdOm&}8f(WPIURF}&w*snv$@T4r9F=v+XE2Yhaver6dop1x>N{)^%WwyTBy8Y zmNBx9inmhV0M=EsPd6OUM)kvmtH}i%=Y8BK@p0`6FZTbitPqrsD<9mMiQGq0U3AW0 zsK(^gCErq4byGFIjq11<{kbs6}tSzrx2s<$j z(0)@!Fp?lEVL+63$h2I9U~~DUP2q_{sT%a=9>CNrZ*TV)884ep82wA4HsqKy{oH$v z7j)WaP`d+}MO_E07pHrrp6KoI2Of^#7x<)uoMOwTv{6>Mu>T6a$Afy947TR>_?n$( zd7d<;Mbk@Unv3ht-p|w1&;n(EPXzW-CMgv=loJqNiJVVNn8S9R?Q;ZvS0M2z(%SfIFCB)zifrR_$bu+vOwt`&!sG0H!3tx+bV|fl#RbKaD(+_q1+M3XxCoEgaXyQ(PvC4g zPIU8fAo+sJmH2Pv20}?Lt%= z?b}RZq}Rft;#&0DChvP2#mc}695IJ6!{xB%>&eg7^|bHA{U-V@v>hlA2TL2`IItwo z&AV-=S8xOZ(*BbUW8y_YZ^Qww@XPPI55)ZrEqJ8Wgk5<3>~i#X@bBkdzGY733^5}M zCzUc36wZl0aKC)ME-4Qyn5}MA&FVLgi1yw!jjqB zD28q}=MA^jkSl@DY^P(x-Ajql-YfLz-c=e7Z`I)|fw#dd`uX2#%G_VN64>42-_Q@* zsCAJ0xO3i>6u93tg;6|;#7Ykc`QK$JLhhYe1#Z0Gi&YjM`AMTM`7kxRL@P#q$Zmkv zaJJD&j&Y+=uAy6Qz%y1+UyqwZ;ZeIN!Pn(x0@q>CfO5aW4dR~&%X5wtqhF=E z75Qq{W__F0k!GYP4hmrGhzqMfJ>>2R0=I?v;)BXhB}?sN*o&g1oAhOiRm>wS+q#_| zh^vU!MQ0J*EvX3I^{QQyyC-ac3fnd6`cfQZJykIlIJFzyqZq9)B2Yi08Z4InH@MfS zPxp?v_rIaF+1|;L#v9~iWOc|!W?^Ykm_WC|#p)_YP>r9V@ZA#puLw19uU-v~ND~x7 zG%xvt(P&O6wn|GUaz-yGcD$X0Rx^}sQ(v!#!nvQmo<349eD#fG5*c;3B zpN~kSk(%|V*;2eJQTjn0(8ok%jY~6ss5|VZFM}DZD3`EFZAP{}b?D^q&GZH_I5f(1 zSgJf)-S`~Z6>d_=B>*Xu6Cy-}753?4h`pqczd-YN5SyBioIZ6yDHYcB;H26l7RE-&jeFyjr zEJCl%OdD{?{zvdlSC;t!rR#Gk_QX03aotvb3KUX=Y@AV75vlf#B2iP6#ZEGU_JIsF z9bvZd#`k#Lnfb;(Rxsi?WS!$nyc>v>Q*lQvBonf8Nke&y?F<=>f_bxYh{k+3G<>}tB+I|EnqOji9M(KyL2@KBxCKWp?|eUX_xuNlXN}`4 zK~1eZ`gBSGqx!M9X)ZA%|8{F?P>REQ4)7L_B?8Sk4PZdQGnsGz)cIobyK-?f!T$&| zsQD~vY@!Olh_^I&YU-7y;S+QuY>ARQ9rCKg9?EL{1v8Os2XV0qPW0{X`Roqvc4kef z_I%u*{SAHtFT*yuLvdweT$F?y68>wbU_nK{-g67>>4 zTW@M28D=rk)=7r^ixG!TA|?uR)VSwxNQg+*P0Xp+Syxw<|HfIC%HyU$i6(GKpL1d@h~2Y_U#x zjiM_pc^ZqDLK)2pnJjeu$wp>SYyRc7-6t#oeW?Wz79$9M_bP*p8NDVYZqb#6Gz#jzW4ykE_<+aO~WiW(<6E!^p9 z9D+z;k((@2PPr=YP3Gu;9Z`FRONsum%$~}qle<041MBTa)o<+sP!zP__H-80`?sIz zl#=1oMqiIUKmdZ7T-g$?m^fq%36#@)nY*7yg=1#|tp z>)66%v_WtimMQDS)n?F)zVftIo01wIv0yvk{3#?t5;I%^4l%r#Da|GD_OWl1`HFSs z0GKpEyUXJqAHyUWT}9+WG_?P?Hi3~e0d`_zS5Td{;9&won_M6J&%Ay1fGE8>=Qd1} zytZKEGV1J7qjsJ_@OpB@$WRRQAg-QwTUVF1g^D&A>ho_A?2GGtpZU(1;+LU83U!E6 zU6}PYm*b+BtI1E$>FX{J7qvy1Y}CX1Htpd?KulsK;jVfz1A2^`eSkUTSZRnjtt&bBhhfm}=wVXm#xArA*05wKgRGDeabW}$Nn=Tpa7ca(gMY~gMTrroshipADU9Sbf<%>S7Yjyir<=v^y#v6V1TH@kxQ*|67 zvF^FjG_IG>htpxTv&P#iJMv8=Y@F3tj;4K_0TXJ8jen7n-+4|aMNsElV$;Ki%+m!Y zPlem+3@k=oiOfqXJ;*|?TNKgXrc>8s1;`4w@OU?8^wAeMh;Daqy*-;Y&N><)ylsS}U0 zP;Dd_B7cC?&d8mC9b5Mj9#Pi{O@& zuZ_R?y4iW@05Fv#Y^gF!2C+a~A*yY=BiCl9$_+uF6zwEopUX@+AT0U*Y>o9ze}DZ8RMC23U~pWk3Sm9K+w%*o zMkvj~7Ze+y5@x;c75{N}z$tg79C5zYG>iCO2IQC07$=a1dN5;ww1b0YjT&s418!At zTmAi$UpUEr^c?UnPpb1Y0=gd?++%Ujyr42Ij2;R6|Fu|4p*@LG-58x8{XyTqSgAXn z2FuTYqP$&!LDESvHa7~1CaqL#!t~1E4*bZ)L1n?EJ(02?Ff3!drpSv5QiY_7D!b=e zM8#iV!gQ5q(inU7O|aU!07p8UB2yb@Pvg3Kw5@CfDj3bE65}O0mK^(djI6GEOaNu2 zy+RM^FVLipplm6(SZ#1?>8BJ;Y=$=FhX@3BOT)w@N z5zn+h5d-GlTAPVBU?gt{e=9y^z}sh3>JL-Et!s{OaJQKhaFnA^9$_p+D_L1!l3dM{ z%dJ1-kglj?BcbLz0PA!yN$$!$+q^8`*|qVoV46v@@BL>~o?7v#dy5$rsn^gxBJF;g zM`4J(gB|Mm4KzZ&EJZlsuW2l>))`JfAp9ee99f8#rBRAWp^n9)Td`!&?T`0v>scjF z{Dj%0{xCj9AOu6`q!&Gu#Xg~PJEV-$dKjN08588M`*9u3V#S^!f@AnwF465(v|xOe zunQmjqMbXaF)CsHM+DTrL6`uQ{Ui%~GaQ(n2$j7D2duv5fZ^&O&zy>3I+9o#cN;TO ze(n{)osBCndQ%Gh69~Tv819nr%{mSEa@r3F+T)g`abe}44|Dz}Sdl956LnLisaTC< z7{O^)D3XtP<09<kYYJW#ZXt`rs16IQMB+kEWrLE6<(iZpE*#j=oGMleq9>(mODT|S9>AlX)bU*aCZf*(x>Vzkc zzJ!B+RyQ%`NzLJp7(Fb&{}{f?++LpZJ&}g3qz%#CD6b-Yd!t{eGv8(sgc#Nk1tEh*o zsjP7(B=F&|*1lW_{+EG(H9jrvFA$bJ3$bX#w z#hyD?6Xsu3)x`CG6lLxh8Ne1HZ$G%upi|EvIti1V zJVk!NjecI&GRQOF3nKQw2fAEPqrV5(YdS}2VEKBSTNDeY*-&Y@g4>yuBV9WhcfW!r zvTuC^$4`2I+l|4%6G zM_Kvy{BQoET;+v>3s%h6lRNLK3^6beKzrfn?WV1(bbGy-cRPQb8^Uo=^*)2Pw6$O^2W_SZrUv!NEX7|^&04<3g<@J zubISI@1I%R9f?WVX>b~DqoaZe>5hh7lOMzLQ{s}}DKV_g8WSq^cFuABlblaaxfCh% zx3ydiBA`m=*?Owf3L8IKw%izgg^OHn8@dSCqR>v(CY_X^_BgM&33eSv3x zK4CeFQP*z(FIJc4CeHzZhW+Pij^9muiy5gQgrO|q2dyKVS}cD-q?80p21f_m&ju1% z@soj^^OR+u4`V5QC@7gWtgSJUEv)~t8t*Xif@FiQ8mVvTlQ{QcOq*dP5sr&@Xy5_` zepY@9H$Xwk=|0*P7))cX=1Yaerza+cs#*G}mc_?r53fnQ_t^$0Bmi3{rTZ6U2iO63 zB`FtS({uSZ#cdv=$Ml{~bQ@J2Fn;XYh?d35U9wx`?C$A^9S_km$T8u&evRWE_T}ASvmMgMvNq&_EjV$Ito*_#L15qDi1g>)E zo8-x~%ZDS@{~+31@~llPXY%K87WdCiZlW0}J7XTUr@qH}n>N)d1w(7Ug^H7b=hk}Z6f+SOEY zcK-%zzW#MoFiV-X^d>Gm=pdmt?EyEx2(l5A(zjSz$we8*2p+PsE}$U* zT?G_m^Dc|8c!qq&mF$;b??0B*K&!1xNMePqZXyCe)*(Yn94&5pC4VA&2(}ZsOA2|% z`&C6h^q?Ic2`boKh|?##6KM%+anG>99|F~Fp-5r1j7uxYl~f#+fNyi`#H?&{!H`dq z1_?%SOtH#VG~;am{7*83lgu3Um1zWHo)K^r&3)nP!_3;ruI%w1Rji ze%+RAwoGR!qGACeysFl_biy~aII2nFnLTJ*>x;Apd&k}3sQv8zPCSM``hs|lz z@+G&Mv|P}V36Ns^&FJ1qd`gTB^YrAXocS$+ew97kcdjIYcxvr2+IRhVy<1K8n>{hYKWFYPIry4wy##QVWa|BvK!(y}u9S1Id z(qJfqW9)*d;V_9+E^aj_Fdt3nb;q80y&rNlyp59+w%m%bo|CajDrS8PxVBtEfGOf8 zgJyeWeg2-#FgKbB5uq4MGb+#0x9984k(6S$i}b61+b0^y&D*^69Rn^sbBqRPA?HqN z&O?;wK#9yd4s=R>am2CHG89RH;tjB>5lE#W4SI!X=lw91Ix(foh~xYT$P z5qu^=y73`rWO?)#CnS0F6{r7~7TOFcrVP39ekFhR1>O3aad?pyOwsVX-`(s5N<@Wa@&$L1lhOX1WY8 zz)McH89DJ`arGxoi1N^GI#9;Hl4;ENxa;Gx#$^+(hr_-ozf_yEMLHn?Ao;!lY z5S1ZBOC>?uQCubbb%$s>!}qII&D=)`$=7e;aW&5N^j0NRhHV40&V-SA(;&9e^@~WVD@+tJYRB@N3!pVL!pC}5>ibTr#bPs{5<@ke`X;N@XuzMKP2W(A@Y{kY5eEfx*FLI| ztJax0jXRW&U;Yv}Y@F@|RIu0ybt!mEJi-EQjfF7NZ-7tzdAk`BM7eFz^Lw-L6=RGN zxeUPQ35}@k?6D>(i}BXnZv-ac!tgiBa?JYxwN03J)j; z-b&<^HQ0&bA9Me)CQQu_?Q+=DLq2a6T~WPhtzQVoqRdrQgzE z8z^E9ws1TGz#IAncw}1AW3U`TR`mh5)IddC#z+Hw_G^KJnF>M(4DJ{`2A6)^ zaYA=*G(^%P4;7qLQ=qW1W_q}(gJe@4_7e#JYL!Rq{U_Ih>D!!n|F_vpb|MVhwvA~D z(K7*;a9tiS^-h=%iD-L5%|N%5y+^x4O`0J!K1)bJ-32E`M7BYIaHDljAg?MvSbY8y zBl@Xcw0_GS-T|e#8&-P9&{u;9lg`tue0y*mjU7U2tHYncdL`o8<< zo|aNNG80%bm@q(OfQKQx!QNS0x@#A{E=`R?p52(A5Eynv@RH@Z#^tWsMY**Rt`6Pp z`4Dd*gUFsV4&4+Mhji2#*id{iQb-`_q(}D>x*=T4ID~80f7GmI5%%Fhux#%oT%)1; zN`=N2?$7(_)LKLE;yDpH_e0I*+uCrm=uU7McR zds*tNmimF$xQ9oR{K(O0RYKO^I-gKA{N6>Tn(+jMImnM%aIqdAG?5<1b@sz5niDo3=^lirm6H4h17wKL?rGmr;D$ z(~lT(4gQo;RqC@R7-!$$NirNvyCxft(?Mz3HA-UAh|LPc0tti{6Y_s0apYFSsT%~Y zF;?wAOwm_WaPllEd>Rx4Kw<6AQ0fZ}pqYgoI>nLks6aNB2nsFQn9+u-Sp*W3i(N=} z$>2e&o~G#sHvRG87gh{sYNYRpm5&8%?MEk!MMk#l{@Nd#s_fO_eId3*Tzkg%9e@w$ zdQ&oRC*#2S{oX|ofgvPMGv+QbK2*2vIH$}55=cvpUFN&B>HuZHFtkYaF!@)%78&njA>WRN7C{{AN;kS*d^`Ak-;~l;@JHY6(A+|qY(d**=%6G+ ztXbX|q?{wxhoAsxBK{=X5LGqK|k~C!!bM~ZGyCHtni;}0lB|qsiWxv z6w`!J4pQkh18itx8!WS;Nj(2~{Rv!nuW-dF<54hn+sS3z=Pp06r*xV<~8_P$Ug-fof7647uNf zdCNaTi~Zdr5?}vDc!tJ($p)=soqJKA+27PG`_u}p=##T?L6zufCScGkdU*a8Bx6|u z6i^z81L9O7iUVjlz(6-)Q7?Xa7Q6c}nZF-1m)C7iK+q=&Oq5CK(fNnUX$v`F4w47a z;#U(X*Q3#aPLpy-`|c2I2Z8C4$ZmYXm|-)BH#m$8;ua?wYB}|u-iPOV;3MKjERgz} zXglJbUDi^GZeo3l|3%k|?G8a5rf38K2=KeNH=MufNrmxt{#+tjqH z{kghltoJR;TGuOXar$*A9=x=QI-l%AG$vuh5FWKi}*zOQHR zzJa%t--yszCdg#w#?T;wjYiM-{?5*x*Zd42 zHmI!8Jb|u~tvU)rcWLqz+`AMT^+dhQ;9>ZT#mo^o3S~UNO>3ouZTvo=(?!q(EFz0X z9G@VxqkOit-c2%oP`-k#Ur7txy>%ZuZd*_kG4rKMrq!+iRh@J|Ph!%oRcf|$%HFA< zL5~EnsJ?R4gP8<&mehd;Tq|#3{UdjElfn-yR{-6ja3J?YB~ZJv4G8nl&Xr?&QOGY{ z&35)XYpCryn!_ZE0|~`$HJ9&r0)LfV+T&B1Job;uudV<?NRUuRT=U|GkSzZ}uWpX7_l#+@JgYs^YqJ_A{}4W--ai0fh)xK9o307~$Eevrt^j(dXz?tBXFd_*g7kSv~4~fCd>f1CJZ0z>d^B z=p!y+d!M_y*Agmac^fu$sLk9R$ z`TewfJ1PGzCql)BDdIQD+~~gMs^DQ;L2wME&0G#D_XP^dEgBuV9cfVyp@4QJP5)~7 zJFRUI7gAR3O_eR(0@-TP5H1x=e@kVSqZq@HPj!ltCrJ;ES}ldiMa41a|V+ z&z~@MxF?bY`7%m<4bvP1;bLe=5)pEMi4fL66;lMp zNFLXK13w?x)4ku#Irx-Z+Y{_}pqYX2 z$dnf$gGOYm_UWr<;dv?U9OIBpS;gza^tP4&wDr^zlR+cCl+|S?zY^=U$b0^l2MRa zv1A8l7B|uh7SNGT|LclF^9*3CQy}5Mm+e@T$G4+~TLE)8hBe1QWN}e#uZkr&R8H#Z zA9(6JVhvYQFG-LTj^RuLN zKF-)fm(K((4AZD@6{^zgClO0O)VwgNRoaPET|T?1AFB40@BaD{=%5i*zH1<;4+CU{ z(3Ad?hz$=rm`F+FOJgqJiB64EGn@h5iX`H4v9&3V;B-g(>Pm-_H@KDqiInYdMo;j! z=J{zXWqukOb&y6}&_@$Ru+yakGuLB8r2((yY3joYgd*2yZ4?>pF0(6~sm1H5;^S0Q z`oX9?zh$HLRo0t}$J5u>dzR-%83D4^=<+jtk5~Sq+rxft;SNU2vawzFMT#>*WJV;2 z_fvt}KaZ>=+pY_nc104tWuS8+KY1n zUkIb>-vyT3o}p!b-YWb|ZXp11d6u@yX@@1T1ykMDvo?F!9{Y~#Oc@7F1nAuC6Gbof z!5Zmzn7J#(`I|%{_h+N*ODqM_j8TF1EbFP^4p~DbAtV)@q@s-CZCj-`LhW?GQx)Jz zNtje}q^Jc&Zf$8}#N-D2sJ2q0`Qr{Q2;MV~&mPDHA!k~(oqVY1vxFrO`EJQ8gEy7B zRK^PkRs-n4%GK+H234lWEr|(8Qz>~qFaNv^SjbWcK53!>4xB2TVT=kSfb=QDrWMU< zjMD+G-pK)qRDWJ3fzUx@XUqjLk3`y-`13G&H^3SGK)>z)L&KhkL{eqb(Fh{S4PGf+ z$P!)wub&C@-4p2-g;XF%eDBZybCjD7djZwk2E2m%vVkRS16#=a+O|-0$xQ@uPo)BJ zV%iY?`*$%w1m~GN@MgTknXEznj4*IVXu}xnV6^;77Q$;&#+98E2ePMVIH?E3f5|A> zukHp;1h2&?hozc&Qb#C7JqJV>UC^O(;}W<6P9AV+L%_ECn9BR^g}Z=)CGdh{j)O)@ zej}~L#T6ySoE1>fn&ri{4%?98!4Ns1q%#sxpY?)@W5%9=JeLM9wN;5_MfwR{d5$rS zfkFAex}lZ)Ryn7oDblY=Kv{bJg*Mz%qc@iBVXP!y0Y9u5AEwp8-Y(%mvR6@}Du(s` z8jUM&R!KCwaEs`5YCgXQjj_7bOw)qU?^3!r+Wo`t^Sb$))MGu}XN?w=`FX~6_!<9= z(yvM^5b=SwPF-sW&;jqC*A6+{J53|p^N%24>pU=t`I3~!d&(LH`|Hg^d2vukP>)%}8--)YkIU)$YlrL$wGeXL%lF0Dg# z_Q}{$^H%&nGiosMR9p;!c}4sH{Tj}+dei=oAI@y%wwug&D}LPT?!%@$f=l$H`Zc+; z*E!eWRJ8uD;asKr%XfzS@`+c&hlF+>iLl9J0Mwek*U!JpQ`Viu8eatHVrO~P2K21n zX@*?iABGNz9NiIoY~PuqlY7V20If4gg?pUy<{yIV$CCTlR(47B$@0Ii(yS}YCF{=e zjn~TOgh}m@J;}~-I9r?ZFOB~mZq79cvKpuCH+N>Zz@ z?d{!zM}QXxlDlM4B0zhp{ZNmgD+J+F;0=5^?|qT-j{G@lUoNQKY$(9kAyAjuYgD{L zDtk*&1r&t$)usGQ>T^)Fn>r4MWN1gPF#p)aFX6ul!V3P~2>J!0wxcSq!5>G|8V9+~ zThDUCEut>3W^;_G_aBYs!=ip^;7DR4^HLpjQ%TAAJ@18rX5e z3~JVnjUqkcgG=7e)?bjRXd&xFp}?w%h3LCpwaBTQ4ai~=AuA;J6Ybm_b{*FKw)oU? z8@;-LfIxU>o2um#b*EiEdz_8t-8(%Ufqb0ihxFFKtyUC9=WXI4;KN2kZDWVV3b^+d zeEL@@)o7R8+uQY$B$^HGURz}bL=7cY4?lEGw#p-`jnTiZ$N4GsniOTd_Tl(KXXm0B^gP_VaI*SwtQ@n z00OaLeT9;%#cdSP~QaiOd$YDr$1YUOp;9kOzJj3SAKoVK&A2{?hPH! z4`=WXb~~1T?HT1dohtuOf}F>y6cy|AXfgZhm$W*#xPyC=$9)SUwurunjrp)mW0sCT zpht_!bWOSK(lqV#@233yGjQ+{50ID10>06Fd%HhNuEx>&nA3Vg{X{1f?BP5Ntw6A6 z@|@ysJWns@KrHLuVsM900_AdsX>!&lvpJxFU(T!~$w!yu7oBwZT4y%yaY{(YH! zW7z#%#rPwZ9dadkZUtB_`+ba-i@Y!zdI3UrI12VzjmEs@_=il`r&fNIp`N)8ze#45 zxdB!CMGEjWSvJn2LKt@DA&i*SAVdQ_I3w((!CHOz z;xm!U(L#6(hP~UwSybG(oY2Xkto`sz2g5fch6Aotmc+LFA^d#jSd~@dH>sg1GJm~@ zCT2^a3_EN%scy?3Giv*Kzk5c60h*Y}=(VsJ(5L#|on1Ly<6)RdfXP+|=GzV4hp-e{ zure?$VMiS6qcA~d*CV{}kE>`oX$DcU;ABdftf+e!WVC4-?sw=o7SxyAezYWQNqWA5 z_5jnp6~{4@^Y_DK=Zw5(Jv}2;;j+(Z)YJhmlfRF|6(FYGS->{yRs>$tKTu7H^_H~I z04<2R)gZIhXu^ypYzRsEL%naP2Dl!!NFpVRw*b2@EFklV1h#I$v5`l$Oln?9q2^o$ z(#|%KTK)ZxJ6d`8)61TQ@f^fK}Sf3$0pzLJ;=Yh8Z@vqA?DAsVVt)F#vZs}%-Wp!6p z8IiUWJggF}6pagdreq<15ID4tDyi1$7)bpd>`!Nzo?i66@|w@e;@tiBoX+X&MM!t} zw>SFyf^t0DA}^aF^l`{+K^_4!HevNNmt$g+ONjKruk1S}fK_d}OXZe6f9=r)HszrY4rS+VNw@hI?vS3Jm?4SNq%vQZ1m)YY|* zw$vBT+o@RJ=yx*?B4HJh$gqg2)SuPig4ecyZNtqU#zc!3oPnxo+9Ok2nF2kVC5K7d zq$CO5;O?1iD&3;s8So-NWNJ*XgqQ;4AAO-t@0Q!hWzUIR-7I+znhs+p;C2L&*utS! zE5cwc)3X=?6mdM|Cx$i^S5{DQjWBe*hCTfkKj151966qSsSB7C+tUVpAB-uayY|bXu$%u87{8w%EC+gv5zZoXMru zi(zEfimheSD=k2pPFIv<>Ll}RHG9(677QqYKTNxws;+~Cg5bJywJ1JM9WiB|8lcBx z227ExzEcAnOPIm2>%VlDHju=sEs+H3BD+FRj4ksxW~POsORhB~U@d#ti!Jnfp2~WrF0?De~nKOfEYmORU5V|cE0d4F~OUQ|q;JAVId~dEu zhWYeoanbzL12^9oHa=(*?_Vqpa#28Zk*2G??8*>54RYvnNQ%3;Pb>>A1}W7sXK(SsU#ZPi;Yr4^veQ$+uxH0}APn zDbKgJQ_y@VXOs6NA3-FWMH?hbcNV_PUyHaHT(WsCXX9e7XBHzK&mdhccGcJ2 zS^3-PSAqFF=r+}?HG=>m$3)^u^Nyu3J_W8uLnO>kQ3a127t|~zziS)x4(|uw? zJi@R!zwUz&qn~Z_6m=mCQs@uEmc5l?pW_~)8%7UG#2+ns^(DuOU^T=-w&n^?V$B%6 zl6=;ja$&c#`Ux1Y)vV?nKEX0I1}Cj!s@bbjm4N09T&<3kF;%-8DmNx%S106kW)o7R z?T(VbiN^7TuG450Ox18p>uFX!b1PiCscLLD4w7xDRFP`|`e{xB(7fO3JmnLFZ0Sxu z`&&-VX%0^g15o z`7Sn~9Q3fIdu2?8XUV{=upYXYQb~Cauk`mHA>*r%wG)@@G#|SXid4N2TOXnu2RqaC z$*S1H@ok>zC1lbyidH|e&4KXc!U?7XwIw8u!0QA~aH;%;v^j+4YGFRVxBt&GV;yz*KXad>EHGb8kj4F`( zu;xqET|!rgwtv}Bv}*Tg=raGu+*?3Z)pc#dfHX=N=r)hxtEM;)U7dbZ7^o{a?q@0X&w>stYCr4hnAW6h@AXPkRqn0h4W^ncuQ=zD zZP(_%_BxM#J!!h}ovXNvjQ>#&(?Y5rHRP8<2e8=6b0;w(fr zq+n_|>MKWehK(?n7C%Gs*aqC0Tj9m05|Z?*PpxqBKPNZ6>C9k@bo4agN1pZ0kQ_Tt2^ab2MYyL3C-60Z3g zSurkLaCDhIqVAiEg{WIh$OvtIoT}mH(TlD0FLY~=2RYLc#g+2L!5>0_)2X&RtYFd? z1Q9b&)V+OK6!qg>EMmyH!?YjKh2MMfX6cidFdJWDo#u1ICL(ME)tyuvM3K?Zg~ZkA zu1R_&+gA^>9k>GmzfuTr_nBjzPss2z3Bxu$!mi6@3HMLUaP!9?KHO0zvT@2}8)hTU4ESBkfMAE%=Ge-YCcI^U`Zc!!zw=!&WRgFd04HB!25UqW>oIFuvSyQYPV`=3s|4ciQ?w7uz|3 z@HIE<&PwG3b?P@dqL9_0`#6VB7h1Xz?>UNUJ2dP}b~k+)z9pXj7zU^M1kF?$2lrQQ zANJy7QepHEimKV4NTOR+$j`-$3|BMQb4sd@-@2W;2e{LGqqDm=sr>@OWqMyZl_U8ZM15P+_@(TC>aSnRfd(K5<^+2K^;z}x@0@IxuU!`WvCh^Q4bRI3R%n6 zX+G6tBZMUy+BVN)-DCVr(ZdT{!%vuX&^E>H&KD7XM=Z@}Z(01M%!^Jbd^tSD>KgwC z*=a$%$jP*Z=Ay4C3`*bB;INiuSKr!_h=VAFjb7V!i0fE!55OoN)}OO>R8JGHALP$x-j;(m3qdgJzQ}}_67c=-6cBGyQb!ACC3G~%4kaBSmo%Zcm2@g zEK<~RH5{rBI;pZp3iTZ>wQR;;dw3 zWRsT1XNdqZW%CGH6#Z4{fx2md$IBls;a3ON>^-)6T!gwb3j~YES5BSA{aIu;1YId! z?dMo&gw$j0r3gEfaKK5bmKI#Hu@~0zd`@A2f5@NoC3$(GM&3wzSm%BY_cJo(daG`} z0L#&o;hayZn1~Ld>3@KwwMfcTR1ZzAZH?>Gk8z^C99&5d4c(Zqk5fA;lybf$KQdNf5$@_DVCOi|qA<0+%0S+z;QZd0 znn%%2hG%&-9#Q6z`-!0Tcr@@Qd~&n%i;Q36DIAuQr$dRB1DaN^Gq&tgKPDF=Nib*Y zIZ4foUA}*&z%)Rg6{_^^O?QNvt6g;Xvt$nc41GI7G;s`(VxwviL#>rcwQPICi>Jm5 z97H*p4Y@Nq>EL1V%-6g7FfSrl zPEm>gFUFL5@_>bquq)WVyVd)O8WHzNrlAbFJk=bE?Mv?C_}Bdvr8weJ!EbXBFE!ck zwyF8udW&Z=Asl-yhtQ*c%5CX~a;WGZDoy&3ogzja<2`j0mDh>G?sn?c7$;%F(UiKe zV+*^=Kyx=^O~1)EVM~MO2HDLpB(xV|Tu66EQrr0)tJHPP6YJF6H_pDX1y>cM_kFI; z;3&TAtvQd{=Bus3uZK4jkZ7ZOyIdM#+4^~(%1pTe{n8+-y=r%P`umADU&;LT4+WwX z{0)pLe#yT3L-L%}TgNMhE~8KIL(>kqt9MoiOYPb013q@MYf)v^h28V$!%$555*3JI z_4v_g`qk$znR^UK$$>&RuULtunJBHl+$Abkp6T+JK7UW=Sya19PsQex0_BjY=bnxiSI-)PS8bb&rXN_(rih9o=BKk}PhLIn zlM)M1{B~57qByR)>)(ytOHfN9y8Try9gB_>;l(_TA`RROD}Pk3c1j!FmpOiARD6B5 z*WHC|pRRf=7|_K~LRlS^x0f5wraR`#?)`h8n(J4gLt4_Xv2$~=|87*n{?~R7(8k;U zefN3}NUJQ+D*I3S`s>%OgD-yfuSflR|N6f)XutmH@1{8av4K6al%>A$OVDlm`d`tT zf2IDr#`dV7$Nhh7Y=7Of{(r5p{h!|SkPh~sx%|x(|K8>PPcwVabe>sV{)O2~1BdHp zNLjLybKEprl(J+a=eX|kg9^Frrk(xuZOSH`(E@xQgOzrOyPL;pK1 z?AbWk+1PkN4|{G79(F+gpa1{w_ps-HXoS*V#Reht|7)WD*~I>u;lGKx>Eh0D(;5z{ z#=k|xtnB#0;gt>O=6rUZb7I0fis2 zUI+&MGs=YL*(MtPLopkh-dm?g;MqPV$mBd3@$|RvLT+dw_ntvEAZ0;?5cMWN2#>g= z;dJc|_k_(_er}50H#+klA9&r5Z8+dq6t3y^nZiz+NyF%Pe09?6by%o-F)nDp z4Sek2kVA z7{xV3Fvyrb^=?N4Y9_j_tJ%Sb2pt*XK`Trj&>nu)o2+qc+m?aFzqyf^*Uo>cU~PN5 zSJ47oI{UTB+;BYi$#VQ*%}R#!!)pJv1-FV2Ac*z!KvLJ!)m-OUSP0GJGx0F-Od1af ztvlaUrxfedI*pdUw65EgScLhtIWD>VLSXj`PIf(Yx*rmhV;CsmyD=Z)7FT7tCmnu` z8n0WAh*qTlu;3=$nt4p6srAWXm$+h=Q+~v1_s-bW8=ve*)>0U08raJ4t{BGH;3~tm zdatMXwm&qC>wk@zClu>?T{xZ|`%P8T4^!x5`Cc+d&#yK$!I0;{dyBsmp=p9+jD{a- z5n_xLJj@~Mo2a&5PV68Z?KL6gb6APdvU&w*INF|WH0THo0#j~5V?N($#gG<(Q&haC ziY&i6&Ps8c>LV+R^kOg!TB!=Q0@|%I?b8L{hcxfPkVsv;C8T$4+dv|!QND0IAC}uI zxPlWtCr_|o2d5Yj_8=)FTRJv?J1Oz&(WHccypl>5Fw0V5Pi+ZPYcm59~hMDTj8`x39un- z&4V{oFTRbtw(&Ez?~c|6QN##bsY(wWly3?Fi{BK|dqO=>rYegKks&msm17o)w^6p* zz06ZEMxuep5tAGi@(6M7?^L4{{T|jPiigMPjiYkq_OP?C*lEh5P2GHkdB`@evUsW4 zb{ZORR_j)F{rJS4vZ@y{l$jty?UcFfO}Fm#yVO~(l^K42yOkhy^mu)?G1ECDvzRb{ zt&9adxXN5l@#OpOc{ayBXCPcFBSu4i8WQGu9CkIP8viG|9vjz|lem9no|hym}s#Sv(JQ_lz=c?TVDY=uJtj*U?*;2pt8*jH969x*uv zFExNJC|PydeX(bhr}TySLYGu>okMCL(raER%O#-@O&|PVP6m zI#*WmJgD|G`wXPGcn;JTm~M%RH)Z%nLZ#=2jR)6>h=gY8aE}Vgx!`rlg*Yr?%3RzG zV(f6pT|7z8I9`gvH0z6EbmRyLK?a^RsF#(MJu%_>#dW9-=oX0i`Jn#r*hgIa0dzhc z4e-v=d4_q=$*M%kq+_ZNX~g_dncAXfY(r3r=%sQLkQvt)Z`dh}g63oB=l7b6FC^l6 zj+E_(RCZm+=n;dcis(>{A(mrED5vnADNPSFP$s`-&Q95sgoK1U2a#K%D`O{OAgT05 z)RgOaQ$|MA?}xHIdSo&ErVj7mnqs_H-kf8wk}*^juuoQA{D5pM9-mbePx7Iaar3XJ;2o2&tD9h%xjN4Ws!bWi8;T zr`X^5M?EI7>4AD&izk{|nHC<;G3MoOMaLw(Q3|G9Ib*fleMO8bMQ4(WCU<#w>+Oc& zN-HvT6^Bt?Y<@zBO+lDr!AnCx%=oe2G#E-0YcW}bfrQPQq-SsJpp?(rX!gIq1Kju> z0pdisYq7o#JN)!&--xWceJY1ky1Y5P1N$tiiu7@5-pU`F`=Hx6)qwSiX$4B^UW`vV zrJkbj%{=~xa(SX(FP@IojMc`WAAUoyU<~Y5jDnPE7|rNRpFuu#ya+{UUIpWC#W8&G zqUHZM89+VSJ32i6JM}1cJv93t8z|ZUDJD1sxsT&XL5f z@y1@h;;cDZG-b@)>9bqy&U=l9-*7M>%`xV2_Dk_i-A)VI$yesujq*h6241fOOZdDD z4w2$q0|NtXoxq8b8kSyICsE8FGuF+qxLhP}z<{-G{(9W0KTzzM(=dx(ap*_q}xl=5c zUBTrk%{o7c8AXZwro!uZkxrXjb|n`+0kv6Y$=Zp^pcy@##pSy_&BO>Lx)IS4)e-qd zT+$_*`-=%-;pX!>iB^1P8x@_Y`>-=b1psTgO2DWQ3y6k?VaEY)7`y550=^6LQtDPF+O#DgsrFOgY=?${YMf1;gGWP)`S$9 zv0+$iAck>f{eBO>JpblLNivW3mrAjh#7F1*y&b9Z1^eo1B5JXYwMq|1pw2Z?hovb) z&V#&J*o#k`%om>wWOO1|9xj9%W~>(-4^|0|g?;=TF9xbVk#Uvg-To62`sBIBxhYE!pmd2( zv9YMYGt$%khEDnv;TATfeG<8>(D3}_v!_@nA*kBXr1(Yw6-=wCxH1*Obi;4wHF|=j*SStKC$yOEc2?QJ`XrLc#sh z=p&B6@F%h!t_}YRVfi$}#Pf`bXUH_fqcZPZ(tDVocZ$C)tqvjZ5l`_6;I^i)JmGV2b~WD5T2*)F3qYF`R46 z^)XevT9T4yp@-?cxC|7l?XvB0Ilu>Ue>5TZ*IjL=R1Y3JSa+5v?a;E3nPW$HJT=f| zDxiEo)0FYzPx^v=X5Ht477E}}P0fpczs$zZZ$6D|fHvi8fY+QZXZ~+9P?%%l*-=wf z*Cy{vfjYqp#%}`H%@od7>5MPnX4SWPlKEHbzkn3#p?ay__$NzV<K3| zlg|HIS)n!9{-xuF7ODsz$j4t+f~Oi01tJYAqDi%x4Ws5qlij&Cd~KWN8pri1urxxe zlIJHTg$6g}dy6czD(((39Nd*O`T5LGa2;gEy6(H;HMgr*GJ+mfFGPZv|9**C2sayR z&XgZ6AE5(S!xKAZvcbW@%^XC@Y#Hwulib1dt5zR{nmJl@i|GcBk4JAGq|o9GIOV+P zjHEf(b-h$Cdm+B7w)EiseG|3aro;6S>!~_k=5&d?maZ)n4*1}CI+!|enuW`E)H5KKt9&{)bt-3UKP&s7 zKmykYm8Lbh=gI8qf%fZs5tMpk)$=;PhG}6Y3tES#NBty>L%Gtpr}g0d0B;KwFUkis zct)({uh!T3S-+!y_;p?vz`j6h*GS1gWlyEtZSkqGF*U}Nv02y-vX0}nUC%udZE4ty z_5K=Y;TeRQ-{!h7Vs~TYIoM|A9MkGTKNO0YChq0SzM{Ky{k3h9d-4{@t_&P-GVl?9 zjgz)$R?PDpvLukm;JREuO$}kHoj^7|inb2_>WG$2-^E<4EYlX)Rh-)r+IXsgsRfSD_^Q55ejP z8gm_3DtOF@f+8tys<$C;;Ds~}9^m%uo|YU353q<7Ic`10<^Y3gdBGEEaYP-3g)mO@ zAH;(0$L|pV&DndrcDD9f;(4Tr&6ftvM@%Y!*G3PFHVh4~7d1w^dM25; zoDWGYsP7T0Ta)WSQ#^g1mr|;tHYO#(8|-(zRHKJZ7G$oc*)p1iYKGkGGlTgtY4OSR z(~i|u+fsQB`Wv>n>JcP&oo@|~Y`$$!#f8L&dw-1&{+{rI4Os1XbqGioOCNxKN!>&d zg@4_JL^I_})%)gKh?w84^*ivlq1fkkE@QqKM68N0*Es3F1kOt(%lCZw2FZYvWDC*) zFo}`Tg}-Zr-o2JrlIo%yitnxFSu4(T8e^#JL+(2Q2(RMoad33xTx9UvhtHDZ$=NrK zPd)vCmqAk@Q%Rll7$$dk`+Iq8tV6!xqA}9PY<$VO8rH#z{;Hslo^-peWcv%Rfl{#$L$Q+2q?TxhC?yQ_a0*MRB29Gyy;&_8isnEG9i<=t;Epv_X#%i(5k@)nVf(nHvmgc=PfsQ2VG%}K(FCi`_iW*DLTjlzOm$5sUd3`P+Tu%vFO6c zIDOM&lX2(?>-i^v5H2vSuEjsu1ziPj- zvhshhk!DtfmxzZ{>< zQ|MUA1PU9fQr|kgCBT3fWt#CZg7#lx2@XD1FD|PB(!31U_u*aF&+zD?!78*q&PKH#bk3{w>Waqpx3+x7N`;Pxe( zXC==)CLKo>fuyXqfrM^nzTs!y~zbfsNm-kkccAeVe3ocgGxTK^mEdnAUB4XmM)B!|<)Qh}^ z!y;?kjrR@Tx%yV`Zl?<$-R||WevvbnK0T6y8ilY;vhIM0Cg16(=FB($I`VC`=sT;r zhQSl3+zOlG>8UHp!uN>EyanB_^_4l5o<{CR3>p)*^)}l-dzDPdz(;RTneJUhQizvX zQ`l#^n73i9Kr$@KXg@aM{33=#G0BhLY2!^B;rR)!2B9$dZ_H_4+U!$R@Gd>d+Su0m z7hh5^hqL1MkPSzk3;D~53Q!vNGk@_oC~&bZ&TlX2ysKb>y!8X4 zd^Jl&TH5_Gmo{PLOi!Lm00m=n7$I=8#v@@hnZF~*NWEHCP(foVLm@JGt?Y?XLF!7R zk^r8E@{(JAY@9C@t((I-b&(cYhaCn9-vL;NdVDltf0sCs%Q9p077}`RG0?*OkAre< z2mO*@T~pR8)a~+QdA`>cG>14@ov1YGG_3jItOT3P*Q`E*j5hVvH?y)9RgfiV#$~Vb z0Ug^)=FWMUQFlV{d3y*^1vAWZu&;OwN6sp2^SL&-z(EC%A1DI6x=KG~XA&i`pH^Ck@J#JIoVZQ3(@&?73eb z0l<+pbGN+GDCf0LSYjxWvgu3hPe|=QO`zaO^z>hHJ5;Bp8)m1Y8#d_Ye%ZiXK9kl+ z$n}^TC!Y?IWV7hn$@;dQJ8WrI7_#ZC^XGfZCF^}K_eZccY}WUDk)x~(iH!OUYna4a z4>RdWqTA#KSMP#L#Ymbrm&71YNfKUt0QH|=hgwwZ+4-SrY;YvYVG%DayQbZcDV-%{ zbNlnYx>Qe22c?v$4v*0yO|~oJX-?OX1Rn9{#W!ZMe_VJ1JklgV-+Azl5ytr7hVsJt z6uw~tz={+pWFXv?4(sbVon`(=e%l#y9WT1@^`mB39w*oSxZONjgFYaIKAijO_pd<|YD+aP zcW?Xpg6%gMUVkBj_Hr-bM2f!l<`LPPDCnoAkN4R%b=l#K2gCZ@OGtyLvt951WwHl_ z$xF*dl)o8MEM*gy+M{D0Ba>t9@c_3kA1Bx`q;w{Pk*7<(%fSq)7iuKFt&=ZRvNOwn zn(_fG_g9rc{?D3(?Asb)`Ruus0!2`A&LAD&gf_;<8#3PlU?A*F%FsD_0Cc4Yt>Z&o z7ErM1_a{KJ4Qy=s!13X%n|nJs!OW1N zGF@6UtpaB(Vz6&x1S*LtY!-;)5idP3;=7q|A$qf-BUXRU7y2*}8hmCOmmZh2Nno3xka;_^n zg+oC{23g}ZQ=FVWZOO=d(uNub@^8O@-1EaRT#jDoT54kdoayWqTfx8tCULa7Htv5G zn%LjJ2Ify-x4?2>7S(3>$g!YfE?@bA;TMGA{bAGYz(OHV$AtQZT*X@r>Wq*2%oQ*QxUIil1E5+{ubt ztsz@O?e{Us+~>R%5RJp`^p#*EWUA`tIgGAWfn?87sdY9zt!}5&VB?5;gv_@!D*r|W z4?OXjpFjNW7!)Agre$5vX*@H%>fkv(?uCX)4BnJ7uxo<{(vYglj=p=bi#5^Tvgg!o zg3mb0zTppd5F&NFuPrN!3kJST{gI<4Z}cmDh*2$wR!+QO2%d!06#MZF+%*jAl zWUEoE<*H2&@rA}~GKi$lCEKNV(VoKxYF!zagnYM~e7dOQGb3d4nW_nbbt!o!PRT^g zwPV(w(Z{|c@HK$%d3K#D$w73rt&3Th(? zC08K<%#1naJ(%WQjE|$Cj64B>fs=pZNMJY0bG}c8Y}xfGDH+Q0dKE?*JHU}zbw8DW zJUxaq78O3Q_Io^^o1rb2gC;kL+&3&@tBbI7P9QRdXlVq9(+CyfGkrZ{v+i~Knw0!& zQV4mpAhz!C2;vgjIu{Wk6nAxX{exnkYl>;2Oh=v!X5Of3{a-cp-z4J@`QHX9a7qac+TA}1Icwi4+{>`sB(8QHJZF3&O%W-;)QpX+tROH4>C)=)*P& zEibE=7U*~ckE(lD*X{FxHq|EOvlF_wkPtptWOTtIc6}VA#~izukeNQivziqZ^H9eX z?5*l}T^xe-GKH{zsGtf)&<6+|tOA$E^NnYd+rKtO_X8iF&kjg43jo{$Adm`Q5>SEG zWXm7*o<{#t5Sm*-+jOyupQsLd__ve5$uY1YtGl`*(+1h(T!OjY;dk7ie(%68lR^9VE_@~{j1uUGL(cF9UWLKZ<4_H0B9vkx~`^XvZ{Z!`r z4%yLcQKr|>$&;6Vu!K&R5_SOlayLwCg)x&Wh#M<|Ojey|gn2XLR_-#r46ZBTrb9re zK(>KS$XX+6^0Ux_V|Jla#s1Pel8*N2gH9jlCB@a*mc*Z(}mSMGUTlahpKao!qLt?n4#6m)x00l1I%%}qF6Q5AyjUv|8yPxchTh$hE)G_ju zXRD7b9-7`(BR2dLFFJO`yz2NF67f%8G2h^?qbuLmb#QLn*0p=JfR1(3M#GBV8=RDz z0tduAFZWYp(U+C5ME@%c@YKpLSIip-b9T^iMOj{;((CFnH`UX1@mn-F}q$u11;E3UI(- z;2}XYl`}m>~<5kK}1<8DgC=Ehr;Q9hRuKXTTwoNBN4VrJ-+zOvp=o~ zH0OZ{D^;tI^8ij5noXv`h&&4T?MofI4o~QZUB%IsZ>7< z2_P$pY||h}zTZ-x`%z123GwA~a}Y`N6~%P*ns@-$v%*k2ww|vnfG3~47VqB&u(;>> zk{cBodW~$9RwMRxU=#0vaN<8>&mxV^>liUG4`PIkGq18S+s+Ojo7^{zoxcT&8*xAu zd+z>iwjeaMrAn8&hkX05&}U7c*8ICFh*OsimKML=%<3Kc1>)dDc5Lj)KILmD6?D89 zRNVdf)z{Cn>FbS;o6MNMo+!p9Zm&k-W@P72j+dSvZp5cX@>lVxA^aomTxUc7$HfN` zkZ*-5WD%}x5E80w?LtID0G>KnAGW%9Dg=pXcqG>m_2W0Lab6)nfz`sAqAO$M`+~6` z8i<;<-Lm{dg%;Z1uUO?tlLlc)P$+~Y`u4uDp-k~IrX{(#D?>OxKWO~NIFi#ts*!Xj z`G;&^l-+*_@8NPFQ~O77N4*a2G$a@hTer2V7KTs^7KHvmaoRP-orYta0|&G~mC9V! zR7wQ=0rWYT=Z5IuObk_lj=e@PA9f=4HQeF&uOOX_y!`m*sILaq02#?ZZk62iubg-o zANw5}xwENt{VNZhadXh$PzogdbcTm49xA8Iq?H(i zD;^wE9P*R`P^m+J)piGy1^C~BNk!!Ta4|$v)UTHV)^G5)dwdq`b z`VBp^e?w2^!vS-C0X*VO_)q-ryjf@+&5IlX@Uoh88H={he*+c*lrGv%I)*dbb)r?iEJcoz> zaFz9`;-#^`#bwMwEZ-vF-)#+_xTx$u(DXxX)QBGmxp{OfL|fn~#C@u|m1O~FnC+6* z&E=PDgvT1I!6Y$t56y4D32*Fx_ThI+J~>N_H*9w)J++(b4_l>=t z5q$SKou5*@YR%a9m(|7XsgNZASHeph6vfnUHP~<>Q|FkJBsY6Q#IiJo}IQk{IsDL<#6E4+O|gaj##3PYp1$l$It-@{C5!!Mvgk*kTnA z;+Z9cE6@n&gx6QTp*g6J;IM|7Gj?~UK!!M`bS8UuQ0m>x3MnIUXi6OKcMWejlGC`q zT&3cv$weH7FeC|Ek9WF1U$o;{;!`AM-&R^qOk&Mm;*l-YUS`?1S<3PdQFZ$d6#rfa zlTJ<`Gf^JQ8Ty=SQ|x;Ms!B?>Bc&gn?5AF7zr%qCgWMk4Hgnigco|dCbmIYT{qX<+X+!QG zRVp~~@Ul5(bdGJ3*ygGlFE)2O$gY}hVSod1lP>ciL3m6F@$qyx16-Vp99q5D z?|4!59K7_L(b(rEuu(F_Kw`MG?MJS03R+8!%OC3M?;}6zActarV{V{$0GxWzO1TTl zCR#a1w4(sJ)wZ2^mzg4zExuCZz32{^Pt4OuCR9;{F_WLy@-;;c4VwbQgB9~f;85Kw zCHJG+X&rDl;PO3Jisz~6)1Rc%7XbrnVP5A{hvTas)*UXrtgo2j)?*uPOEV~6ZGW`F zJqW}YLJ@t?jmK@g%SocTFj--3S9`II z@H`^7QEqBCpilXae{16W1bg}14Z9&=557Ne6>Rxmf`!u6l2=+)_g9<9E>I^BDZ(5)|;$Q6uy!lz&UT!TqH3L28}L z?u#1LHgNFd#b{b^tJleQUC&TcL4VsT~eY*ZGGXk(Q6Zq1cpD)H;m`mI@AGa5TlxDQAH}tjYW<*u|HZmO)0J zOsf^=Z$a;D(3ka+5uc9Cj9r0K68#A;46S;CmQwxy{%;lRV=^1@CEfo}HVY{#gcKUG za}qQMq=WA?@o#f%dpA|?Js>uG;ziz;#IhAk$TvEaKDZcePLr9QF@_~covv&b zFA@0d>-OaHAM>N;Mcb#v-;F1yr&(Y9s7P6^NaJrBgLCPiW2v9K`=J&&p^ zFr4@IBX7Y5U5x=>3rzGBfQXrank&uEHWZYcF08}UUtWN9t5iNiU-&29kk1^o-mY%c z19?(5!#DjflU~CkBO^mYD|h(80mcV1acRp0t5+Z}fJGV7%gWRj=qTYZDB<3WK9q^t zlU_yYWf`J@=#U%k)HR=vj9WMA&_|#C+SJOun8BMJI;Ogw2C@{)k_vmVBg-e2YL>ck zD^{EVhpX4SO<+@yliQ2ubt@>U5XIV_4}|O^>+M0Mib(`ZFeT$cJSwqvxekgCuN2Nm zTxNVg8N&X9RCiwnSKUVaFS_hTUC1ZF5iX98W~rwTBM=uvBiv1V(7OXEheQ^vmBo;Q znd%JF=Snumt4cI0&A`&ojxH$jV+QYaw)*HI;cOfqE4*CrY+OliS6wx&ir@ehp8iJOBCXG65J(Q;yvqr$Hvc7DK zdKTLeLS($5D+tySzi;aH-05;Zf1NSEm;OZXqn@*RuuOU2$>)4K1G}ht!r;PXL?gj1 zONXa|cRK^|HbOam(hdi>PpDM0Ru|}E%Pd@9oj^HJpeEBS<;Uib{UyCTpYtuGv(j1N zoVVKNEtB=`V9?yV>l@O%_S2Co-^|H##sT^!^wz%rpp?-_c`pGmkcaRlb~-c10^)I= zZ_6G*WC(v>D2j+6xt>ygES@D0q6w6gLQxFJF)RrBN_Qc5e8x%ZbX=ZaXVgD!X={Nf z7iIvAtQCQ4OpU)OY>OyFLEnVJ$QF}pwcPcWQiR+wBjj^NmBWeT8v`*YQ9ON4^ykUr+RTx!f4+JZjQeBGQ z&phL)= zLiDFYj>-V6CGe`AzQs|!fb0#NreC7z5f~d6>9;3lBBQ4OjZolldGA3JMbVor`_uVs$;MciV}= zc;?=C98KcqLXLyVCUVg|1!oNi_?;|joi>5ebGjezfa4c=%B~sSERKiq_krBrz~EWf zT*W3^!pd&>BB0%uOX4x^Nm^SoTWkc?wp?I&9^g>YqfStJnu z0mV`0muK6lHZ3T(5N|8w9*Du}udT7mLd0T=O03H93T8^bvTSn1i9&u80r`#Nu5L;FF91`zJ&%fLjW$VX5y=#9Z z_B8G5{gHkzeF~&!&cyaztP(p2#|OF*Q$Ba)AQUts_j#>O%+&P&{0?063WLf;DA|wU z;pSTfWD}{LC^-J6q>^v=xFcxbA9u5g_Mh z3Nec(C_ZrQkOgJkO0(f_(G0VVTlGiJv$(A$ke3}nrLXhqKz4e%e#OZ+sF&)OHE0iB z{ZTXy7QLT+Q15T_@+wZ1@0|Q@6fUF$e=J;_^>uxODebC->!Hg-Mmm*7#Y_pypQXaR z90S%3Cppci416G8xbZpFUK)+B*2V9g*^4-8Hs9O(8pKj>Qdsll<`_qDS^`w zXSpZx+h_a_T-Pfa%%%ET{k5gA2eQR_so2-v!{;0ed4Fn>#c^M9pzjS}oH5in3SA<{ zorU_OuB@o8B_iO(>z043{wRPe?Ot3Lr`uC_V>%f|$dk52U(7`5-3@CgJ}Cis_3q zEAN2~Z?K9qL?@sq6jyU91AO~u9G2x$UkG(NN6%g!SwLOMOzu)wyt0l?(OC(da?VG~ zai`1k6Y!P=LM{vSFj4`htqIeEjg|gP;G1e(cIQCecjb>GN5G=zZ4ic!nL~wS6bXaE zgAJVo2~m_^7l0bl=dZt-k9{kaxK`w+=a4$mFmGS=c#VReD~y5*I(O*F0Qm^10O!Mi$n}J>_yM;$B@G-)r@a zhO;f@Kzc>PI8-_RTj*Px@M6qAy+gj-UHe@3lmA)Sp~r*1|>tkddnwaDiA z*=ANGluTj-?Y#_~_iY70HSa52?5_f_5q(nNk&5_zUiw7%kMINFud$EBv}^)L=g-DR zE<1YPFBfg++X>HN!RtV@w{&~vih8)Bg!oY6l+}c}I2l49eAJM-+vg=d!s$;)qV@E? zPaTw2YI}cEJA~R`DEzql&iiY-Ar=K;iyUw`8aEC{11?SlwK1JX1J};W5%{Ku_o2Q? z3X68>`An8cVD-bl;*+l+#4?HH-D%TMd1P;y1B{35#(0o@T8hQx{hhuB4J-t}=czrA zNk$x`4i|@eJ&Bgo)&sFK&fmAfw(Gvt!*Ehen9!RbHpj}L!ZXT=f*Bc!UdOk;WEcvV zvulZGe&T&UmIVPtLFGQOjYqN@cjp#@+&M4BF5>4wbo-b~hT8(tIxQbzX7vFpcEtlm zM%whETDNcR5%ahjTtLOXo(wJ)th%Q=TfWLQ(!jX&GCN%NG9<6ksm#aH?)Jmpfw3>C zIM~+S2yc!l0P<40rqUPQVKDjd&=JOrUgbpbgp#QG_U$9Fi$vbnEi@l<%;i0!I`qJJb zrzEKX{rju~>#nBH=kDwbd<}nTVkZhWREU!!=ePO=<$Z-7D4!0TwG3|gb?*1^-92pU z3b$L|Oc&T*EidfhiLO6LacQ~G#O-}%5xhEy?I!o64i=X%fDzzO?6l{}EROeN9>_Iy zI#~O!N(heOd3xwjqMlZE%GP%|XDeZL+@bI`+Kt?Dft7S3X+X2=SZW)EiTxz*`tH6O zCbK?jOIF?vd_sbiL8X37%soDAjk?WjzuDmiG26t??!Dn2 zScZ>XqN}I0^saeIiISi#ym75#V#p>rM0jUpp?F#gBCKe;FQXD`>dJ|bh&_Bz`t$?P z=o5``$S54Fo_8f; zbJ$N?@oy6-jHqXS_%4lC(gC9o7+%2v{D0uJ|Hr1DVbH{?vnmj));hB@Cc045h&85- zyF2v`$0mP{OxyrGCm$LqU~S=y{1;g!)Ozqgcj(SB9nh2}^}c?5Nb{ifH87vEP>A`c zu%a~GA6Gk-O1B~0Vpv9#<1;0{e}~oaC(lk?4T>0k7$X8xw;dSNE%5sGeq^49sc+y! zs%Hfyb!X=GGE5S6++Q`rXmO^sWKv3H&{IgoiVrV532)KIo*y9@KiKCqkI5mB0|+d++q5{fE} zss=T15YG>CWB1)Qd{DvE0+t$&K()3$2Yw; zp=J~pvfGT@?)XGKZVvCAOn}Bh5_*Uv%3x%7e{hqY9lTKr#ywX9XRd z`jO{upwQ~JK|uQ`C?2G=2gjBRcHUr7NrV#T-web;2{+ZTNNFa-TPE9DTt4neQkb}} zi_UPNR9KQp*%cfD@~hB${m)__>VRzd5WHTXD~>f2gyg|oc}dt)=49+a6l{|*mWhH=YZ2wT}_Q! z`1x%i4^#-ZlOB54yk@{wgOv~(J{8iqy zVTU`2g38!VMz->eBHU_T(Ix+WXju5f^E--Da3cF^?HSF!}x5qL;6Y`foPuqQdu3dVQl1{T> zf_p~8|Ga_Tz}C>ciLJCWcG2C@P&^VRyHav6I%I8$+eFf8Ra_`P8Tb@e7Gx^kfV6#p zo_rO)`VNob@%4Gr&B+K$MOTqrV)V@jv=q~=WoYQ@dE}y%9~lFu7hD9hvsG8;LbT3a z1o0hM&HeL^@TsU$lB1iELpcR^*c&QPGj=%sWxrU_fv<6_Ju3;`ahjf*iLS%1Cnn77 zgVMr-)eFaA`9mIk*o=v+uUyQTju84tLoO72S#0%QeQPghFN*4b(E2=Uk@8hwk54q8 z`~ESPb`DFSRWc4USMi(DoG9SIWtaFX;&5l%ricx@-in@vPMI#feo~y8Vg$IgOP-Ih`Zl z55a;ENfMS8WKBuy)3%6)$S47n^X|RiY{R5?B~UMWO~%3_0XqVn6#rk7dPo+AYf&(` zKBn4W2K;^@<@<$u0vkq2Cf1BELA~!u-q0#~wLriO>9px^17hh4s8Wb}5(^xfgx|L* z)WKR<$}oi;B0M@JFx=eza1WI!8g2SXUdA}Xi=(B|Z!JOnlA}LADNu~t(1anfvKbcC zHa!GVcJ5wAvm^IC4l6?P%K9XaMvQ6pLDJ`vc`F6X6pjvRMc`QzB#?b!lVM;pWDZj( zD!npuL;(uc`Ch@ggi-G1lLj`JxyCw^yQ8Jbs$dK(U<0l31q@lgTJZ2{lB{!$%|SHc z8+QM-FVr3K1Sje_s~v|^?!xBCHmte8!!0tjJP3tfJ@I-ClYnXfb&&Rfb#B!y)lUwc z7=K~%e903J{PNQ2X|Y{fFuJM9J-Q%IjV6b2-XO(b^SWc#S6Pfgku6eSCcA zXqceGA>Yq)7fm8^f#o=)SINjCSH0Y$95=spR7__IJPxY+$MH(^OV1m#^QghykF34^ zN=6%jMDQ%UH7lccwSw<(9hR8}jewIj-+aH4vJ^rH8T|OYrJOGGDfyi+_a`XC+}aoJ zwmC}`DbOd9(c0QT=gIO11qsr`Vnw>hYI>b+9Y}syS?##gbi~%tQ(VHFaGY8?UOMzM zWXOQB=WO70F{qD*U|8}bvnA(9zLmYMphUmoJ=J{@13E_ z2dW-k9DI52u<#BfAgmTX{L5MSNdCuF(d>}NED`TUK2*gOy3t zOBU`Jlb+`W_m#}6VEtL7M2X{3)AO&| z8X6pKTZ$f^Wiju;uP$YZhq-34Z8vpau9Gw#P;$)k=CqIDxs5)!t~!o1^i;!2Qv}K= zdmR!^`U2$8WP2`qBat|FsmYN3?88wZu*M1n2y_+jVDQLzomYPyug~WHQE3S;mGcaf zic(SDeC{FlfA8&4Zv{Je4A+pqJ4rs*jwDJ@1U7;)x*A?dlVkhO&meIf%72tq$rUC= z1dK_2psYZ3AuX>ypq*r5HN&NY|J*V^EQc&ghpb9!$&^xQE*gI02ewC%U*k+a)HtmK z!~`VnGi%;SrsWOIeghtZ8NjZeZD-h5@2+%;Jo5j|5-s{@HsI&apYjwq)T+90N`*zM zQD@&_GUd6ibp+y||#`L=Q z;soSQ*VL?Zq0NEFsSR#z%}N@Veac@p1SJYRoPH8@t*pvxF6{HN?HLUfh2Ok6yKZ;7 zZowo`#)9s2z)CsQ>+s_X!=zKmxdvf7VZasy#(@ujZ(`s;ETF2YDx1g2SV98+^Tfs8 zY}Ed?hr42;s1#gDf2^IeN}SJkGITyX0|EosvCMsYV^Hf!odQS4eJm@07!Kd$50S`A^T3_SJn5zpyFaD=aB24?|rie&p|o zb(893vB`jG<;^Xa$O9Kt>hwVtqX}-ECB73W!w@Sv4D((NJf$;z^x#2yhb>WrmT^(4 zD{gm&HQa{$mFe~QOu-QP__%f=>vP$(cv!fO0T^_D$)50;M~D5>*6wTH_@+0)tjnmu zev1Do36q#3l?)#T>v}0&(TaN5Ct(H(bfc)&D5vqqM3=y4p1s|vY_)Pj9=RaMlf5p+ znKmfi20-9H(dOb>KhwkWBKA4&n6rHQ945ohG|86HAkn!$ZBI5+}y0o)Yto zUsF+`s0uItmZek$W;vf-JTc!PWEq|=9UiCRPhh)V4wP#l?|g{SX*_jfhyjP%N5OD3 znu^t$v_(ZOWcj#YE)yRazl*jX@WlI)BLB$z<#8VUbOrVa>Fm(O!FJg@Q>)t@ylw#K z!WywTnXWTO%IKp`fng=) zXT1&Fjq`dNCXX~7p9S`2d{A-$)L?o#Te{}@4sp8XvIKI`u03`Xn!t2qsV}p^$IXwV zR_$$tp>I^bPOv({(z&=ZH@W0kDbVAo-K?!go!RdiaHZFf!avH;?vQTS#_I`y)1Fp0 zkfb#ZYFo-zxD{_$pq8*ywTsYQVP}5niKQj2!t;xbfh+etZTqlQ!mphw{|oc=?8?x2 zMyj?{x4d=~r@{zZ`)K+iFuJym0`Gi}mfHcqA#CyS4G5y=0;EQMXe^O0ZMj(rpV7-n z;vEeH0S314d9X<_Pi|yCNyyVlCfTRk8F65N1<76SNbXcO4 z3490FkCfrx0c@Rydj%*s(a^ZHZHwJ;DaXUKirI(IbxMvL~WH6y)K4pAKlGlN{q_yfzmTZIVSoT z=Re7i-vH4B(m}^eg+Wjc)M%g{5iwxBd6$6x_}W;g{6s0&Xj6oQu}o*WgeZzR>lJQl zR3x%XJo&Gr)UPb>V=&&n*OX#8E2_pT&2mUzglaf|mVKYE13*oK>gBg^a)88G4MqQE zHDG%1Md)bwldgQB<2)0*znQ3F#?>0gb{Dp)3)%xQ{I~6@P;1sKcL(?>D2`gn zs$$&=y>q4BtD34Bk;4ek!5yi<;B~K`;RkX_8xyESpC1I)l@p$bA8Le9hSu|=AUpv; zLv*G&Q;&OJ!mXm)?S9-+w*M6wPQUi%St0$LI8J;oE~NR{C|yt)%H#$=cE^5oD~PmI zgRq3hi?zPj01gEP$mU<(?#WLC$Q~zqQlrYH`AHN2;ksQZpIO7>QPE;kf=LfP`U<7H zfM^2!U8tuPWaaGo_vzeQy;q*A7dxNKx5u;3BZ0S^kWVsF)Jf=ajg>@6C$Wdj{eBUb z@&vI)Tn!-M!1W8A2Lv5xG1g8H<^f^PcPAzhZ8IZy#{?>rDi)k59^>AY@p*?w?N^M{?z_x zjL4^V&}8Vny}hwap2~zL($NHbu0{NsC_S3=Z;-79JW!;+G-MI#@|m*Dz(1JrUSUsh zgk=ZOQsX7=ia-8R(jDa;)me?_iH~RKJistC=9szd zxK6|4dL>n8h=TJg)=bJ1?#v`|B8$%r)B4*8IZW2WJv=-l-kbRQ zx3@@&VP_Bye(kh2#b=49lK(HBvyfEA#zzFHr`vb2oygr7&C}w0Fzck5Lcd#rbG(+r_m{q3c zlmPu8A;vS3goadZ3ZL7{O3W=X5ZXZWlxa{thP>dvbz*S;4F!cOP#&l5mnDgZ(1(SNwE zo34{@YSLXcJ53W6KrP|aWO!I{?BlIndPNC;&zFSc_*_bLJ&$ypyo+q{XQ|qmL$`Ba zZcXktX!5J~Ays6d`;(@a;i_ie>BTDOrlYg-AHz z(&-CXa*Iphp&zO`hQ(6I2pZhZeicL$2q;Cl;Po^m#{xJSz8XXvJ0gZs(B0PWr3NfX zwO+rc-(H<}^moBvvpQL@D<7^wL%_@h!J*WTgQ&XWrK}{w14yfpb6s0l-!aI9{XPcE zj%NY&F_)y$kEJ%uUmlJ{oWAu8CuSG+&>5yC*Rt3S@W`Qo%?6|X6>&W?UUHIMGjesH zDI^Y#V!n-3c6FHd&KJZ2m-;^D*A+cp{jh*r(i=mrkA+m)6ivd}Dv6pTSDI)cRBX7V zpZX0(VrD|e2>&R-cRP8XXA4$o<1{^#A0pN?226&Q`Py1SML#Q5OJ!SN)IZ{Q(3;_Z zR1CVFwY;QX(>SHYiv2z{$>kuig>133QS#8M1TtW&qcLmG#zVU=;fy8qg6#s1B^3iM zox9m~KUlGMw8cp2>zHb_&~Gp02r6*T(Z8uxV?|4p9Qle9h9@{j{pM$jT9)TLrUeng zzrv`nJF9eXnnAE-L!ie{^%*i}c%<1jRx;0Xcs|RPa5ijDY$kqglniL)|qifQMBFf{IXB;30&+2knOgd0ymt zar$mNOpq@^zganydH~{zyT}~(gU6;?m3YEgb&bRfEvMI6^Z#F?na+`y6N*9t2>x?6lZkmhl}_8FM+ArHhK>jhvtgu=_wQw!GC`QI%K$*% zL`8Lkh+I!n2{Um{R+;7)TiG#EXDy7fkpg`I=deozM!_$KTD+q>kFu=gC|01O-eVCy zSeqTA61lkRtcCbFAQZT8KV1{xBu__`xgO8Gvm9Zq)&<0ZT%SGLv5Z+@L-t0uWS+XJ zr2rm}EDG2(?N4Y`au#b$I&aRu9=(Kr#0yGln=YKyMd&AQC!JgX0qaO;S9q zqCjGq8wgp_R(6byuC!};cOZ*~|1gtH1?(LiX8J@szCos9 zS@lOK^Wd?4NhjblXmYHosPF+?$Y_Z_F76cIAyt-_r}Md$6f*- zh8lSqjyk@VJ<)77CjT>eZ!}QR(>tAS#RwO3HS0$jim3OgiuD!vsNe8`haPLx=zHN9 z4Gq>4S_6#qp$W7~?tk)Q6Zkm`b41$kK08sIUc9N)6#rtI=AxoXyJMI|)h)E163op0 zw5N?3r6>rz2<9x&WI`>z7l)w!0sHvniOCP2KU!s-CXJ93_NpkYj#Yx(s&Dqd@eyBe zmDx27l;Qz{V5-cu&W}6r7$f9|Sx6zWA)vb&wYdKV&YtF@K@7Zy<&tO$CydL(%O|dm z@4h)(5O&AelP?CiRP|j-$+&l?Dd4U$Ov6Pi0FC(-s`x$Hrywpkx2xBT9SQGE(IBQ zU3?HWiwPh;`4;1WHywPaEOdR^W}}$i4(Mk-Gnwff0epA z1qMGERdFf=1g8YaFB4H#v7)g`j@SP23TaqOo-$o{z58zKok)@1{Sl4{W<5opmr*vR zHhgqU8ZDS18NL2JBTJ|PSSceEGzFQG=M#xjCo&YNv^?dG!{nnoZJ@wB)fSiR>qA<( zJ&g3Zv4=s|>5B+M+Za~v#sfn;OEJy0pd>F>H(B)AlsCG1L8~0q!J~&0u=j@v&;+t( zw?)q@d{Od!6og)^p6kc55fMS%*W*v!>cc@bi+wGzRa`Gl5lG7;cG4VD?oH@-wN4&v zVmkAf$_&qJUtP{ClBrvuJHYmsf<~(_{?iimi-b{1SSc_=9b0@is#I1kmU=qz;Nk?M z&D&vy8cu0rXXDQFyVs#!Y%4I#sW=$wimCArOUZvKt-+;~Mk?Ol+M_YH(Vb;c2C_8j z*(?(4pfEk8f57)C=91Fd<{qPlC>$Sm^YH~OB=bH=M3tTkCU@nSreJU||m#ly+qh5PUsNHCqie<%`791p3`@o`3J3lMn zu|UN0Sm;Kfu;_6Qd2QM~LgW=MwAVziOvNe`lbWT17FXQU25>yYKX(* z8doI_3svv9|y$TTCa$IVe zRZj&dn+3*LJbyS=55hW&9%(Y?Age~UB_LC-eiA<-O?9xMn5H9(Hg&MD9VI%y@?{(z>c70Lo?I)Vz+GNeQ(AaaH! z_?A~ZEz_HR8cX0mp5yqXp}eAE55SFTp`dBP03ekb|LYurK*G7pW6Mkcf8VVnh4q+Z_fieMr z_uHdzahB)D#hz<$H+Fp|;DVp9=re|5 zVyUU|(XFO`p^jHJYGUj1(|4RH8=4k*Vub>3ju*8mXW~8xCYd3I*gl#(0PGugpcCqw#BS*#ilowO8f5GpRF)ii8NOv!05szgf&>_8W*UObfawuk~I-vW=~ z)>zAG0(yoBeVfbamKZRB98!FyfyMqI1{P!v>B)o?V7!pX;u`Spr1PT!1`lqMXuMLj zS1LZC)pSswdhO5fH6igJFeV?ZszoBha#e_4Lvny_&}%+nW(}-6jRH~ZA5d%nl_t&- zdHP8i2`-MGi}oWMRYYLALdDGsfNZ9_s%`n@b_Fyg*$;2Ui`v~=l;|L`p;%v5Lfq86 z!ZhaJ-?*6$jyF955%W>bD7(uw#@;}ePJ;~J=;tGJQ2!+W>O;-E zrUoQwTYCZ>{#0RKBU7PQpj9PF+<)6?<0xk8dXczeBmsfGMku&_8`?$nAZ7tj!8Tx` z0J7Hi*~&TK2muEm)?ugDetYIs69b1iFajOmGuFsA`UcLAD6ut!7ZxHB-~#8htGVD`6nt zxiAN~fEg{gz<%l;sFpBYZ>>`JxF=?7EYk^yGhRPXA?j;`?J2nSyyau06280sCYAXk zxGg|Q>XXdlrJci9mDB}BA4kRFi?7dRNIjA&Vqt()X$ROz&tzyK;S-&5RkOztz|yEg zm!=)ZxXIB7R8R+vYvc-twRDWQEIwjt-GD6r;JY5tf?>7S_lvAK#~}vj$s$2(X^SX0 zv|UH@fq-kDHmSDJdAY~%cID-SI$ywzj*-!l>Vb}lNsS~MI2^qGm}xM7g&k7lX}s#X z;Of|hGFfpomh;tv10yi$H1m0?j=#PNw3$n(DurtSUT^{algU1*1 zw05>q$NbAa@CQEp{A@sa70OCl$C?Noq<{i|NUHvzNf9enNkAH{gg)%x?Iy>Bpc@dA zp{FeX4npk+)Pq3odHmSaqIWdNxzMob{|)N)DH2W%L-+p0agKzT{2x9x6~c+Nt5hul zP6AyTm?5cfLa=8Pp-9p3_-|8P-Kame^oA~x7?IqB{S;yPx@Qg$tRX(ZHJ_hydvG>)o3H$%|cm z|72aa;;iaHH(*4JsH38z18{Ydh|{7w4Smv~3{rJ*3LaV>bU`q9)?H66B~?wJ^DP59 z55ZTcO^Wj|acK6ZJasl*n(lLC{Lj3RAGXPhnr~U-Gb8bVoTHbaP@!$gimlt+#-N2# zr*#}IFjyo3VKqwhRHDODRc2_?1C`JNN*-FBhr1?smvi79tZ!-2 z|IOl6391hJfiy^Tbaa-^>l45U4;*1?;x%UGVFfv4du66u?5EJYhpw?i45>0HpYPzY zR3odm?H|?lKDq*?(Ah84bO0c(08`54HRy%V00=waLQyx7%|YpLovQAte-a}ewf4)h z(vkCMfyeqw00L+<;C2E4iJ!+|KJTgq)EhY{D~we?5xKi|%QAXzmki{>V%O+kdrZJk zIMwgT;H7*tDI)AYi5I;q*g$+-0=^IB;94it8l#j5qyG)cKI2^DKfuai;Ls z;Fk1LZ$1txB@-ta9H*qQLg21rZ^Fx&{TzRRI~D;YXQD$1r9+ChI@_*lnpOgcbB8n7 z%pa6lUFzuQr0$+q4*W?UQ3g|)5x?=L$$429Iu!b?%I2=#c=a!71EBwX&||BcI`y)9 z4D`-21}UFU8_tCTet}rS_urqN1VW!B;MZfl{5IC&`*7hFy5bV27l745Nq+^+;QTpf zBJmwi`B&#=E@KACe9`uKR!_d`ghV0838GQo>h?SUZ_pdjLzJ%ho{gR#U#eUnZj7fU z1e)KMT6s#kSPLR-YXB~>`q{vViIL>lJu#m8jXvBDlgnF{2Ar$Zx~C5Rl07lD?`9fv z(f_3PfOqwO^%wL8fz|TGpQb47AjcJucrrL`6FNjct?vg)G>TPRzQPcmuQF)L$jDfa z6>tQdyVT8|B)^K;2ATq_&kd{C>nhB7ntT(-KPbH992ya)4KTnj%V$cCTK1V8)}U3= zwNUGe5kDUqdwmN9%L)I2F|bGc!|2msTgSIE?6+kgLgRgP(8PfEM%LRC^vOaHND}ba z3u(1ETx>Sh?*QCqz<(|n`FFm=&U!*@o%%pn{LaGCL5& z$ysAa{Npfbce=F3sP*xY?tpwI$R1ncdMf=5a1caM-G?){?8DHBKNrei_?hKpWhHH_ z*TGuVgJ}J5prt|-dB88&I8m~)*BM!^{z~tYcV%GVxyF%OIbb)wJtn-yI3b`}xM_#LFeO zpa5hbsM~ndam7qlfBd~7BSV}?^Xt2W58FAuKf0_MK40wzPUn$2hS^s)57-R-+c7E0 zhb(`E36HWK2V}vKvfw|2Z6Z(Mwc&IQYoFA|fkPnw;}~ev0MJiQ86}2NlPr{?3@P8xgs#S{%Z%$x8eOX@b)}<>xoH2Ot~2+u3jp{d5*4 z8gngRz{-85$6Vyl@8>ht4Kxy7=j&g`9S8%41nZRGK?*TfT( zVXF!$ZRX_r+6IJA{vfU8$4vM&=*d%}hpI3#?Tvi30AhO}pfhEYb1_FpTm2|3-X#~> z8K8oOz*-n^GiD)|I1z+w0cB*j0N0AVA~bod%0$j)*7q^~$t>gxcI}%qCU#nt^0`linpBXF#S-)mBY8lPo+6Ozk z%h#-~mNLp%7mR+b;rQEwX(ku%v8!p!Fxw!BIfBVYmFCA4o5ys9#&?w+man;V(wbS9 zFDvyw9!rE$?ezgq3H{Zpg%=)_sjxU&v_dH6jif491HMJaJobXufq;!6Yi)yzE1?4A ziojigfb8ms5AP_*^8p!?pm;&;)EY>M=LMwzI7&kue;fTO_1jRI>)FB5HBx+T8Z!-8 zs5O`LoBCxhN?3zFqK@weqcV5ZzFHPMs=1rBr%7%E*5g7mc$|rL(KY)$-udrYf2Qc4 zqu2|o84MR~ID=}vTLMKF*W?;yX{fyg@|-LZ`g!j`l(>Var=OCRL`Cpe65cfoSF9^E z)q{9!pz{w~lD#yv|0H!i_aqtPF%#N$X#6?YaLOK-*iYxgo$M;6BHCCTyuR3$QU)zN zUbk*wHK@7k{~|c5UH0?z{rmSI2U4j*Re;+HkU@XZ!@P(}`_2P$xF@9L-sXlp2Kupg z%I9sa3{P?3P1`3|u7HSM2iqVT2!VJ=_ubjF%CV30cRF@$VG9K;v?2-wU%ygD_&y>xbA>?|WOoznsfjFd$TCna_}m;xU~OI4GQ^>rF(y|+-I)>HTZx3PqYykbhLaAsKuU39;u(cLoJ1)m_eDOcg0XJ7OLJ= zMd4W5&#?VI1EmojHxI2$=M;El086QAd9PrD7_qx_IDS}4`}zC?@44oR+He7t1IPW{ zh~^`&8stW_oiF=3`JFYZMrz!f!W2P@IJ)&#=)#=Mfai7tYA=j8P&wQufW8rRK{oBG z1xn=??@`iI4d61y;gR-#%Faj-g@L=nc$NPC;eV5}4$VZr&spcwj9X&V@rRS`PotgP*9s%HhIRL>ft zQ{ak|lw|m)l1p|KFYejTbuHW!IWLUQ1T)O}T~AjRyk*W`<#AlWx^|~;dRXFnaiB8q4IUqk%1o*p zqUF(@4kd2gewTY?zB_RprR(Ehho!*e$Nqg?p?X2Y2y188%AP!_9CSfpOXg5xZ0i z%6lB-FVp{Ny&z6M!G`{~^oTG#4o^~OjHO>gFfpYPURKi{msm!n@wbdQ!N=v^d~8M1 zHRd066-eP7Odki>!x#F#jj2CGCfX#(-PpdFG7sv3*nDNGTNf8#4$m@5E!=mN zcjs@_)KPKtTtV4&zK{{}54WaZ;C5CGU!>>8J%6OOr8C1obQj64qINy=XkXy{Y0iTG zo4)ky=TF8R`gol0crt%9k6{q;HX)8z^{iCE-mZEw{KBw*GlzW%dpC`379ObIXRO&YPn$%gbc&cr~CLf2dN!91lpldQY6wY)pHOE3N0|0)3Pw>4A%@N%;L zYx_f6X#b~@6UD@!OO)8l(&SF2k2(cPQjvJYvBfTJ3IJUoq_{uMW_Rv5A?XDqXl#n zF6b|ROO`*^mVmfJ9Gt8vSpEzG9bEk%XR!Z0K|w)^zv|^kJAuJiG(61Qy`3$=m#!X` ze=a2sakKpMqmG`Kn1&XEn5!+sK}i!>7vL00OE+^@TW5DC*N0qxMq!b1GGq2l=OZ=`Kq-P|Q?Ag&Y~;Kmdo|M;L|YvFElf1f8~qXWd+?S3%mIpX*0%)||hC?*beE(&&5u-ah7xs4vO$bl8JH5YTV1_l?fz{K3l zEy0TLv2p=x3*0310~0$Nn6(7N`LDu3JZzwdkcPYEb8QMfaG0Vc1bW|~;RXeZ?q7#+ zvGM&mM3#b=6I}I$_5FQPaB_m0LrM^@KcDXBalf9xVzXqY;NtvG5A6PnzaR16wj;DM zk~(x)M{9Q*HVSSYUNG3)LLv)sY!qZ{IZ@4!{K&2!?`G7yk%TJ>wE5C0vSCu_?HD4RlkNP2ZEC*MAYc01 z`EM=;=B88Vb}rYS*1SL0x9i*KxECO#KST%5{OweehAnGz9eGA-tNa~JDWwPKQm6)94MFKf4>DoJPCY0qCi zqB48K=P!n{Yy3|1of?XfT+y}K&=Z`!DM$I|mj@j(gJW{pzlynZm|yL5W60pj-}WrI z)m*-~z&LfVP?F~A{*n5ipylXhmGeeelH&}TjcBu>XM{$Ez1ZrMs|3dBN)qTZ+XjT68BC5d7>`F5<8nVs&?xSt?t`m z`ItC{tadcv%XGTbG*u1H;`gu7iW6T24yX(=Ub|1^HB4n|b#9c}@Hk^)UgHiUUBm15 zs|=^T(uNZVdex=2ie|b+r>30OHnCEwc}tcIliWpOO6QEp3$OdjH;?qyn zdrUZgja(zX>O{!FxPsi8QLl(;$<^X~neTeON>iKpt6r#gn$$;&NF)N8UsU0xyy-Jm zq4woH$;A3p!8 zmfY9w%|I=;*z`Pv`t?=G>>Fwq7Q+W}*q-}vvI!ro$AYL|``SIBena1Rq|z(HJ6vKs zGE<=$0AJw#*Wc6Y2YNfiQsc*mvJgtD#@>BGy&dEO^L%(l&g@V!i+t|K`a|NCdNx$3 zMq}b+kKfg+3Zr`K*@zH_tA?a`P9V-6FEc*o9~D=5r%H#~tXK9jG_3y3@Nk>jVYFdb z>7!BW+MH*@gED2g=tgDlL$fz9Tb4T>ttD6BTD zHB3EQHb>gS(*8Wx$kv$G$uG6xG%RDt#R%`v88V~ZE$C2cIDI{5=JHLtLnJw#~ zitQ9T?K2*9x$+rQpiyJsL|RmP(5S*?=zFkEqC&=PsMA~fWKH+!g7F~Jxm>LML0CSM z)rS@wE~g6f7T*+e_RGrm6CPdypSF!U4bi zHu{d<{^GG-quzB&d5VdyJd@Pp(tgNL{f2&+?sOIRK85`|u1|LFGpZt!4TkusD{{|# zHVm917QB9^$4^G%N;E>kA+DLQG~;ix0_47!s!i-$Yu5Wz)r9VCSA5aoFjPz7eV$_F zs>Q+7p0h_>`Q9bxvP>$sI@x=VOQE;sdMWz(BhkuZ4>jqSTl(o@JEiIr00T=@!TEN=hm{5st@diM2z)BB6Ap3avG}w!c2SxS@M$Q#U@Z^ z>^rmbxqtgE;D3Kx)0pbdtk<=|@N6ExV^^K!nH_x4C4KAEk0;6gk3=lc2(I6Hu_=hC zSN&$|BxUP_YY1O)9$HO(BZ?SiA|@XEt(0|lZi#PE=c^D2f^hmX4|=1sjc<)hP~VGU}h6y%o%P*`-v%WH?8-rG!*C+w2R%% zOa}=(O}4N)234QeDzsWhVqS=_@yNL~S2kv5_{H2F(mEMDFIqG>bmN+P;6uw5{j}Be z?a%E2qUj$T=3F1g2R_!(5*`GwYo~LnB$y`;)eUdxCblI;*M0nW{2;I?xdNW+Ltw(i zk0Qwpd)7I^vhi9EO@bkV!%_Y8R*ceQqofTKo1tnuCd4=1Muu93G)i72MYfnTSssVi5WeetPJHipZt)r z^FWN#t$OA?G-RgEj(Bqho2*b)&asoDUOo8^YJPZKQ@YdI@u1}mezbHmqMmmGNR#h5 znfuK@;cbW6mxc>w!&4IOj6Zp0iTOm^h|J-6iGLFMMjBbML%-6m+fO17gyY)+J8oY$ zD2(5|j$tN3`o!#$Fjh*pOCdBkUNGypS1d#wC;HwUqbg6pmdV(@K2J|&T1|0sW$4{c zpYC7yCdSCj=ko~C`#+>w;A6XME{{Atsj z)T8#z;T+Gt!B3IN-Wi75iaSxcBnc*ZnX?JSz`2!TvxZ-!&r5L}?0cp+`spdXmK79i z!qci){c~=atX7MNS&>3ZCb2#b3hlJV2wUkngqDf1VC-hx3buXc%=}`DbYO12Kz+!` zwX1RVBq@k3rEu;5H@}@;%_nTxYN(|ClKiYbNlD6`@N<2*v88{(UHI%xazrMI8j;*O zL{BlWq;ryO%WUGLzr;eLe8|2R?^K@;IJSXapuEII# zU}j3DH}fNr6XcCG1?Qf&#FgH-0+RK+wUr1(Hbt*KT{bm@KC@A~st@p+w#PvUg(O<; zea$lsV-3>{BWEGIeA9Yv?>6VW20Kq~Bt&p;e*}p9tl3}rOuoBP7$_>?f^mG_MK>=w z+CgXudw9WZvFPZNP4nah-`h7@wIf*Rd=9X!IZQItPUZshy#tH6W=NRDJPU-Qe4$@s z_)JCzPwa&8Htm)NY6c+=3@g1Wt1AzqWUcu`_XjB zd`gz=&qvc(jFd?)8dXbqcJY*)fjl!qktOpMqRh#{zTz00Au?EA(lce6Zk7Ig!EC{v z&k>zU0OQm28J)#yb5u5NWvGgvQs!?P8sc2m8mioF)ylM<_;NpdgFz>V?=RG5OzDq-l2R!KY&#sMJ`Z>n1`3;&H47?m=mMiak`AK3Rj?xeI2ck`_+ zzx^PNGwPD^+LR#J>K9FT1)46wH9gf+Awthtat{6Hkt+IdZQi}% zOMSv(Wfndkp@cl(b6#Tx7?xr;XeM?juS8tSGe53m_{A8j(dusKHkZip z*??gCwqUwawk%FmlxD8NZ0t2h-6+@m~DY|zgK}5oT~nM2k$Ee zf-6)i4L*~dBictr>3XW0#Z;Cs)qRL)!AYc`6`Nu=B3)vv4<>}0I8)l$vQX;AeovxP zr84p&|7i;rV#k(t6(nOtClhp!jEv&co?gqSj2C?BjfP*r7WQeBE2E!4Iy;UdR+@O` zu%P3|Kv~Sz1EHa+>d#xWowOOB!c`PbO&9f2%k^Il;38|DJz82=jv2}C|X`|saIL&J~L)Q%(PfD45xoLQshnn6d6EHEQI<_Z#k!m+W@ZCN) zuP*w|%rRRz~_{}8zPmc{Dy5@8nu=pgY zH$xffFGcF(XR6eq@bqkL()3Fgf)mX--|V;#N5mUTZv!_U$-ECL}zR&QPFSZt(Qf)ew=@#Y0sJQnywxu@YeV7`VKe@%Yk zexk3UQlUtIL3GHQI5Jn{J5@2BljxwR{ErNT^1nOLer7#*8bcLW4$GFm>h478TEaK| zQ1nOTBmbJnj-z(k%UI1~4C8?~0nAy2a^egU0NV0!_5QDT z9`_%7>EGzx|1qA&!vm1Gf8u#iX!HNeKOS}{#tHsCWToKdhK5)F{g026^FP9&(0Tuj zmHm4${*6@r6EFNPYw+LUg>3hDAuA6B8#^dG&c@El_CLo9c>yH&54;e3`UhUf@fR4V z4QBW^MEH=C2S9=U1_rqSO!zNgkd2)e1b?B=@-JZUPmS|`V84HdnxRPU-@xD>i1=T? zAQ$)l2{2f!>+6N9l_GMXtp!^9%ISC-D-o&BYGPAo7iIDU$?C}R3+flcX1Y`43xt?@ zrHhJ7n}#=3_9(uZVqZrc#m|b~@e9b+R2>|^Kd$SEeP?0VueT(Y6;rS;=_inzah9_u z|I76J>ypU%^6hN%c9y}#vfE*ET=n>YfIbCjLD$Qvw)f?v>O&P2yAFqYN$QM0kYGN% zCjR2IP;t5&2VdK?^3HPmL3Yz43X}OSQw_%3-{^|!4b<{ZEoi5DK38Qo?Y6zgwLMR1 z++u70X}dAq#J$A*S>#&aWU5NZz~Jn%Ie4I{$-{lmZya-K>imGo{HGjpSKdz-tziU; zkFG!PG<&#gk#;gZpaR3Knc&)*gelXE{F2#$0P$YS8?M97dD~W_?Cyn{;nG@ zcYUXOKW3=^K`aoX#2iTt5_^)fFhPRk~frB#$2AZspT}9 zKzPOX_SNZV{&)yh4%Wnlm(;P;J0xNhdzCejy3^=1iUk$K?bqsBv_=SIlDJqhenG2U z5;^dpf~6LpdMhU)uq1a9Io3*vdgk{~j!)ZVaqB;L&JiAOIu{che)`4vsa{%7VlKq; zJH?s91mQ7mLHn~z!w`C^*k5qn1@ZV|qO!;s;$qH4Vse9*nGK%Y6FB)5!Jl6eV@XS5 zs`a__@%34TU_BU=$CYHna@dD!^nitnfbFALqv~;%A`+`6#gZbCLZcY4_ZMTPA8MfE z&c(`C*-5+<^p`iun}89cq(>AIqai>hT8(g)#~B>H!RfaEs)GRyyVYU zt~i}}>M>j_YLv(1JVb4DVyMo{mZKOaHKfV-D*e^tSEUNO1vy5;%tQx~o}YHAe=%Q5 zk+T^Isyut};?ZHzryMVgpJ((!oJ6r5Bp8DxR#Ms&Hl-|aT%_haTRJ1hX$R}G+QdXSWr{tn= z>Eb%$KI0l$Mmy^oixeboBjMQC3_4#aa~ zFWlN4SW=l?#ltca+1s+{%n3^^LYhUTiSdVaw!UyYa{Ux-%$(zrC6M6~slhSs0#8Wp z{EG1R8zxJJg-;HT?d)CT=HE(N)+JHkt1#xoZjffTRV6mLyK2)p?QLjfbM9EYR~F|l zpO#al8}WFjNQcPpvO{e`=Pl21euchf-gT`hk`wWUK8fru5KkIQd;)#^cUJ_x%GW#f`t`mx-A!d z`MPtv-^u%!n~du=nLg6K;8&BMAkDi(9M}&lD-9y-C<7}r>!Ej<*elXwA4SVnkWu$5 ziQO-8xk|sidGg5mrpGYfU(D|MH2YjAc3YPs?$v@;P00xAELw<_zMhs|a!HSpvW8{B zJbTHzqx2(pK^Zb~4HWvu0{DD$&>nL8ks>K6X;EQ)q1*GpFUq)(sUiWxY*GP!fk~Jf zmwxP+gT-&I2hJWZ<^8KFLR8OXH2()>ZvoZD*FNeNFJ8PjgaF0e-3eOUtps;>_u|35 z#frOoTcE+ASaA#Pu7%6*`^)*xJ?GwY?^-jNS(80`&tzu5D|zzFenB;Am_2idE1Yzh z%=6Acq4p=-(aU+_nQPyrgMs;Og0h_{CSS*6uIwz?DA*~#jc3_W8YCT0$qcj6gF7SesGk9iJZM<2&t_v#g_XtxXV=ccBjwt zPL+9HiXK#G6l=->T=xL{96RF|M!NmE0CVfxy0?$Go`ff@8(ZshlK63h{R90BD<{D# z)$l{;p{c-8l*Spsc}jp1xuz6LOAFp=h?p8e{@MOV1il}FZFmV&vc7C>^``@M#DeP_ zdr?l&X&8+muvFN0*jLy-jMm--XBp98L(Llpnf(QCXC$1)0>&2C?@lX}B9}6;6}`ev z#9|oHtn!_zD{<~g+7d!PMAfcz>@KRzKGGHm7Ap=xf2@pp+hkT?R7dIK%|L>OA+w$==3P5OH^I6K&Yg8hx{$wjen8Eybu-9e3K_y_7flq=WgqgS_wp`}E;mb!Mhvl406%T9=RyTsM+;rTH11S)T|8`2|NH8VB#l z7c%@S15DXj zjBE8moBNY*Bk_!4u+M}Y`}?mm2E)i+30w$&t1yV3cn_7yw=hXb!eo>l9jXkJCxQCr z=J7U=Uy6>RK+P`LA9mGTdpb%4~^nX;+GUiI=IhL$D)ciFnJ)C6v>hM{X%6u%2s3(2Cf7 zhvt>-7webvIQfpd3w#$+%eN>YW02owYhkC7JL)b$$`D4lBNJD3`z6s(HP;>Op5(-A zqA{q{krySs;W8$^J>sg+YPFxoF9htY><;g;g(f;+zkb55giO?n4WyYTr1uW%O0vR!|1W zr+w7ZX1oW3L=0%QW=*dz-ld<`Bp=dx@N4yOXj#T;C?RvI(DzW^A64@}6aWy_>SNpZ z``3K(CBY%5fZ=v03E1~&zPno3KJ4>1#0 z0(5k|*f|OUii*3*V~38?84^l!+MxVf~$)d@|D`cDQ! z>^R4iR@Mm@zJUNaz*!($s)2b9fM4&PgFySpJs)aPk*j1JULwli=_c=4VH{<+rpZ+= zM*4}#Q?yw+Io+_5f$5m2s^i-u7mInsRa_-H3qN`XtCoV+yaNA$iw>TH^aZZg_xE<` z&OmCt5*fE!?o{0hC@0zNUMWj7q#H$SoKY3wR??QGYi)0f1TF z>+hBWZ-dlY_k0aSMav7eKYt4O=%QiYTf~nm^f6J*D7I3NurjS=Q5QIuDla=f1flx% z@b_7_6I-XqQ5_&qxAv70<-h<{P%H1NpVU|d76(>^3|)Fz*mp=!$&pEqAn^WEay<_x z-PF#v9t;QeFSPi#fD`u%MMnbr&C|%C4|BaV*1KhJPOQP-RD6cP%+F%Dj^XAp#B9YM zR+TuEzdtc9h&u$6k|n(5%J&z>7x~HiW=$M-=9;;<{ubVg%?&~b*Cr>1D+*tGle^=h zpEn2af7b>70^~ptV<3Bai!1bo`<}MthH#Y?y${tYb>$w37RjTj=8MYW+v=8hBy45Q zm{5%-U0Ss!85t_vukI-_qb<79A3p3g#VqjV=$bZuoZCnZ4V}$a4c>_JL^*xCLuwN# z>eFlVNrL3mEn(nnd#_5%PG_7cNyLZ?QEI;otnr{NJ@V zT6VN_9S8{I`%cJf@EMtyy3;(3mjO%fgPGpJL?cfg%k-9c`OkFLwDS3Bvt0^}Y?_4C~okVW>2j0s6*wxkG3xK6) z9V$Y5;{#R4mVb!}cm6n?7|#Wd0WR~64LPglPH&yS&+MU;18?mmdzF8ZK6p5*Fc|%) z-tg3S9CN($TDCWK>5VjC zSzcQbr&}afc}<7;_#oX)^(%L$;_RcO{zk<7a36m5{V-IVT!KT_6u)6k-+@;shEpP& zDi^}0>N{qC%FssO$f9WxDilqYytFNmm7=;CRaE}NYl(y@DpHm)Dm;0Edc|j*oMO7pcWc&-#hnjq0gHW z-lJwqt-pmXd3wo-vR~Uk?7fjehsY&%qNcE}sKr5?U;A{CcwC6=H#gWcLX@n2!V1^L z&Q*C}kQ(aax<^}TbW8r7;MJxk|4ZDwOrSCdaF6{tWAPG5$H#X1&Z2?A0*cuA7oS!a z*zOKbZt1EfK?)E`sE4gdC!A$TQ9UUl58qy#(Izi_?@lpb)a zvK^At@ffMj6KXkqVzb04#QERQsQ;4MxjDH0+jQsn-y2*12fa*DTj!(XN2b@ovu!@P z+WfCL>L14Ssyk^qX*t@w8fF$$y#Fwu*De1hmA#r*|1FhqaJ+Jr|E5rU9Iv_;rvMe# zYZ5u0f7CBdUaEhV+hoA3tJNv)N@V*iyZXTZ3wm8suc{yMA<@(pm!~I(R^;rH@?iD@p{2yli*E(K- z;A=bosuzu4fc>?e*K_1}ZR^$d`)B`u&Wq=jmhtev*89q$c==w>=ar`MzItq2ukZhB zeihcZUTrpB?$_gFf3?aux&9{`?_VGKf6=x71WvE{^ZxTf*t=V}zFrV}Gj|JVi+^3D ze}CM-D0RefiT$0-?E@O?w2Y}fehK0`3`7Qa?%0aso3c+Gmo6pks0#e2MIs_u${t0lmj|Ol{3;Ug-fb#0(E-3jvBac zC?%)Xw3~jl&iHp$M%-#Oj~ocgh2Cx#fnP_SkZgE;K^clhJng2O}ZQKf}Iqo{KgyYO!C<=_KiU}fd&T3E&C6UI?aKiLXy{6Q8xazYj7j&;5@!z0o+ z`akkTOlOSIEf8;l*LL7*;mbjU-Ylb3Q3?IIrY4nf0=59A*iQ*s@+tzAG&_hW6?8Pn zNb00&ed4wdLpX$TgD4h+@(pPWG$83Y#k28~aG>{FoV8FTOvX(EDxR!prx48-oHh7H z!R2Tt7yIt!@LO*oyT+gI3*!qLKA1w^dTz)JQ+Ej zt-%?c4aQ%F^C+S7(M2}WV;Jnaq}D&kaz4f2Ro5|eMqqgC3PdPxw2TPROv&y2E~|~^ zW~j-iL&xzePma?E!a;BSyW~C&f0(KNPMZH=%l}Q9d0wN!zmaAx&i{!t zzlNfJ`t<)G&Hvi)fA9GJsr>$zg386i$Ntax*!;i8fthX}|FnSxK2f(dtF>ENdI3ch z*M{69KPn(f1O036yQDW}CR(2F5H*(x?2POxn^~K+n6#zD-!5t|M$Ih}+8I(=&(Q3Ly=%Iogi2 zPqhc+ef6^$Cvl&hPmm>>vJP8n)_^}E=5arb>IMy297?xY!cqUt#r z0Vq`l3y|_2FVp-;!^V}|HEOW%c<{Y8j}G#m9v0a@rSFyC@9^0!Z3g@uLtyUmcwD^l z`0Sjp73@K1Y~W%oPp5cem?NM2+_b?YPBi(!CFup=Tlhv1HGi)w-dLHtZMeqnl(Kuf zn^G`iiS$`H6glv;F=*k1MmE29qdrIM!THE?%jEjkJMK-?P3+tKQ_4pI>QWT3hu1Mk z;)AwT6-;1tru}8Ia%pynBnVCDFcMHrI*bv7CGT1S)RA{>2R_m+&<`1dSX01>z)sqB zO^6ZXC>0z9TmmjI4iSQQ6&pYhMPR!wBo?^vVF(#Cr`VtZX@*oKgPVbsKs|;bG*G%? zgA#-ql&;Vq4HK9>N4l0J?x0Xk$RQX+Qu-7s3qUq(ue29@j@mFfb70 z5Amiwm)}h9r;X9TjRJat>>;|e_41$6`++fPxM4tFkUPYhc1pfbp-^6umfA*=9!Q)4 zR)RD_{DDJJ5PzznM@Sd_P!RCG75D;y6+oV(AYQaSa*$rd3qH`e{Dlta88Ea2dD4Tl z0(~?fVxS8-&@_CCELu|D75x`o7i_bu@444Hd zkZwqo_C5y42D&f@8K=UUfMO6>6y!-9LIk|;0Zu4hh=6*Sh8RH4ijY(J3kQ&j!bL7{ z4Fb!7_)9^oXnkZMc#t>+S3`(7M3`kr4U{0y2^ca1Mbi2xKj3D0TTq(r2YT6$7mPmhO|K~@(p?r zWr!hQXaHge9Fhg`$~PE8JRntR;CNsqtsd==0!UlF!4P5r!2mUALTDk(>EMqLI1nMI z;UgpvL@3W`0ucr6fH-v^0+7-4Vgm>(NRW0vsn`g@3(}{xNiM#B+n8>~lNe4)Q+2;tCf%b>C=2g@p zhi{36I?(7fI{(y$GeER|?HQy1}6eV?y5+&RE++NC05}j_1Uwod3>}J>sPYy{Ffoer zjwp$Rf*kK1!L1BpI1(5YivJEbi37rcGK_}yHV!;017+)@2Xw;=1Aepg(e^Ra2q6BE zDOL{F3YIbCLT5*1|7?d0=L^@7ye{nD=?VOUKKb^eOff)G^Ua1hJ!0G?&|N8f9PLiN zFKTlU1#s09e2e%b*Viitj}9ohGNB2%4-Wb~DTdg^v`+4Fd(lUIE4Q`cz z6(Dudt}DX@;ewt&NCv(8x)wVbgzhcXceuHDir7U52x402gAZg`*8vQ%$?Orq8!seQI!Wvi{1@D`>P6 z>Uv$U8JsGrw|d_^k}&mpQt&T1|Gn$|`F;%wytA^lo9N(Lw7bv2QAqxJeNylg_-Dc1 zfBtSVnBY1WwB5+S-J-S0^%3=B77%^yMDkbZGum9t+hp*1+h%0gsoYl$FUqt|2v>l^ z;6(T}4BipfxhR+v(NJvDD5lGVZ_@~=O}meWX4T`Z5zBf7j+2-qifq?=`@J-pRgyje zcvhm;Q=;2rygIM`PGk@(Uh64Nok3p-yeH}(2Gc0-D^Hh4UN{4}uFCUNmQAx9nc>$= z131q0!Lf+;__q%4>WsS<1YI8W;q0V`XW_L`bs0<}g16z*W0bIC_zW0LW1v@`&i5qF zWf0&AW9E%M^pu$hTZ9DP!#^NiC3vet|8(JCYaA})VB6KX$qLWE@s$-G2wnhyT7zqF zurW{<86?T!7iGXp(Yt7dcI4;FZ>)*W4NppyGJine$1<^;oO?$aA&whb7WyCJ5$eOWsTJVf=xiZmJ?-)kpa+%F7XjJB? z)6hQPv=YTSeZWn^$7aI+NJWv(Mx`n)DSi^2V#6ROZYa!UAk0NaK}et)^Y(4*V*?@> z?JX+w-8(WDjE$@ioL`*5LH+e)>v-z~>p1I#>$rUU1DIGiHbka)rUa%q3}b*2kTeO> zd&E7YJ!yq%`Es1(iZ?QE2*lO#ZnSXdwr*yVIUcj2dS{7Ll#-(2%a751*cX)0Ug{Ug zCo$@mzGg3+K$HtF=o*#PyB8g(iSw~T_zQ*)`UM?KwdFclY>`*``OUoute4t}>cnIH zl&V%DF!szTxHs|%?LraCRz)Od+?+0F4EMb33xZkL0WP?ps^5b?Da#s%KE1yH)D*}r zCsySE#1LQL?$FoQT8;62Ry#N@L=GdNUe$**-<(f_Hhk7X{7E6%{4o=|RBNIB(+w;a zm?JmP#&q&zD`p`Yda9zNt!Oc1ACmcm%|)F4cyTPLNNUwAwpd}CRnq$opCB0O<#w{v z4ig|zst4Hz$q9cw6c2++Bu z-0MA<40&dHWIOHMoZSAqGg5wkF}b~xaoYJbx!v0%y!|`t=Jxb>&CA;9#o}-0<1N|Z z%1fN9h~78D4BNfltI3+cG1iHiyv~`R#v$U*4c)}AitLcsinuE z8=oa?d&IpJh$$oB*n!Bp4F_`UbaFltu==l0<5f%B6F~@RZ;%e>kV!9&!0w^}oZT-^w zG8y`O@6r7?AjsCkM0NMSeu=4sGhT9=5LliiRu-?C7RQ~5)buBw8~LI%U{UU;Mn z$-&dDzv_xFYd1SBwY|KZ#e$X9j)pN_z3PeoO+`pI=C$miEQ7#ik-ngUlTepwAm@UJ zEmgqE&qyD|$o=@A?UxHy6<+6cCxd3cd%cJH%oN0gyfYcc&?7}wx?^STIt(1C8yh;J z8h5ZU#4>sQi1@z&4nPs#4hbI7G$N;rcpfb#E1eidCRzx2(%lEWoibt$7c=Y&67mF$ zKEu7EI~xA>5(AcR^+$QWGw+`k`A@&qQZN%39n1yBgW*C4l47YysY$60sb8PL zpthr`q^_i5qGqBRp&p@FM=2)%jcgOqB%V#(NX13XAdgp!!-??wO%_%ba(UE@ge^5slC3;JF|IlsLsY7S z0yS;YCwUwoDnmHF_yx635+aZYf)FFsK+PeKj~fzBt(T-E{}uv=DhdBitRLlEQurNT zKaz_i**l?r7_i!;Uh&n)C*li2m@>40Dk%I3?!pfmB()mc zg>_F3#1rmX1_oy&hXd*S6q>5A^;ypDCC5eb? zElL>so&wgHq>=Oz>W^~a00lwalAI)ZQ16jpdQh$;yd*WK3zP#oom7-WlY|2`gR;XI zpeaeUQu)DTp~6`ABrpzWQIZ^$NrXSr1v_jcDT)ePk|KB+`yLl23MHbxl{^XYhr9Uq zxZ^4WB2<;@U1h6+S1t>w%2=ycN zBb6U@hQy7yrxa_%QRq?lQLufKeVBcOeTaReeW)RP`+HX$Fbx<1ECMD6vw$(dJYWJa zEf@(b044^%ZNP6pYlsWuLicow<>VfK#%8lBMs*!pniB+Dy7~waTO<0qp zKGhg_?B&eyA?V={^I0|M`^&~Dk#0}i@p}OHX}mlq94hRJvmk-BzIF{Gbfx!6c*?E< z%m=SXQ2-N@YLm?_94_E}uDd#)gOCT=rX2x<*ZfmBeAIZ9D!If#h@2J8$}#PD)U9;B ztd>7G&RfwppLqj!!_ECQcL8*PwOaQK`8TS9yyw(|I^^{#Gj(;Y2yJ;m)142GzY0i# zuDUQrX}ZB2dXni7>W0uN>F4=>{tD-I<7f}J2juuzbJbL%m_$vw7E7A^xNp}qfND2H z*wYIQvAQxmJ0PtkW7XR&d3zHr#0U97Ex>FGayKa>(*>PAqk)DCBj&g3!vrve)KM*% z?KtprrX92yCYZX236pijQ#hKI;!`daly{^nkPrw;Use!}UvpheTY@^kJY7)^ej35t zV2&3^GqG%tN7m@S!il{Z%c>OOd1hYzSRB~So_zPh_sI7YKH0*ce88_ZRvoEUNg8#w z!?w~^NtDOrUAxk*`pKOWkb@DNL!CdR(ykx&mDhEu5UcWGvdt+;F?dlRlNIGAb!3`D z9j}$zDzspPei~=wXZ7y3#%m<`Y$0Ij+kyQ6$u<-0Bz3tBWcIZET>j*ELEF?9bvD#A z{3qi(l3(y;`T4DCX+V;RU#5SAKY_*h2Q#I@XdgtEM98nn7E1akDBhh{f~v*lK?>}! z3l=$K#wnSQRPW-zr{7bQ{qpQaG06g2feAWX$VKbdcfrQPnTzaxa`3=uV1}2M9gSYdSpzlvTt3GbKSEDB<2S?UUb`CcA zJY2Ig{6%IDO%EN5*$2NNn}tu(jw-@JxM%$2o|Ow=?mZ9vc(jq0GM?2IfMBjJM-`}F zNbs{Be-ADW(>Lj=_X&WC<0tN)YMyHfhJtJsXbkg4Nu0FmrF0tVa!n!J_(U=WTT>g; zEa!V4jFyGo1S%@}vkUr1UQ&$npiW1o-~3 zw@u)yf;l6qri>cq#!bGTQr=`Y_OSsnSOEWEgEanCO z@6iIDDm#j~ODh>ZtgXDEv8kUj^w@!xGZNa>^7?cA^Hqj|>{T`W$|$pQVJJ@TJ?Wy* zH(7on`MtaghM&hG8Zgk0N^$AZ^;&}wO`18>&`^lnIfZ5<7Thzf`YMbLyT`H4pF90v zZD3ZZu*s}R&0SwPi-ejm53X+SOyTQA^qqyQ!r>%9w7%j%7?rWGn3(>grCfY&Eie+G zs#RL;0bq@_3>xSZzjV4wncnqPSi9gTq`zbcRSgYX@6~1N*4thqM>5S*#I;gIP15SU z`KF96Z)>5SRUI{=kQ}&;2L42&A$8j4Y~z1eM=z>qO^V|!?|3+PkMKl@vm|(ALqW%+ z{q2;5)nTB!3{Z`t_q~n?Jn{J!E0-z_ua>grb7X=hgpm`vYW`5aL8ja2$lm~H zLcVr0a;L&4Rl$@D2s;mH4y#JtOxOsWKliwkg1hKUYDHHc>4)CbiO#y0CFdK}XV3zl`0XJ{<9r+h(K+1-}Z-)0mrvh)w+Q-P-h@%a1gd{mM9iMM|eM*v` zZ;Z|Vu-~hYbcNq63&ot?&*krXF!TzR>(%Mqy#0>Tl19^?ah9sM=6>H5ADy?Qv=x(<(UXq3)RwvjDqK)+gpuO^NQR+frHu55pa z_cGP<hI_X*MOTI%*K=jJW2uh!^AQL?#?n~fAK4p-x$Ke%E+;_AJdli_WJunL;mbfW_^F3 z-LDSa3^6mu-3sPyMc-e{suFj9JPVm+TJTE>;GB@+E5=*zB>!pIrS|DF<20YRP}|JV zD$>hJaE2AAcZ_0MNa11AZ<41WW5=ZA*S49Rd5ML+&HB$EV3*{LAG%D5qTZ9He_^EI zJs5K!uxd=prz&dcp9%F(?_X2O*%7ADbF+X;c4}d$*DTQaAv}F_y+Q%S{Eys82GPdk z0M$KX8GmXpuU_>Kat%Dz+12quL9|hhGQ~U@aN&VH>+5qa^Q>mF-;5TR!#UyshgiKO z*>H?n9s!0iq@|i{n~wb4cDT2MC#FNW^F*;i;))$RKXdL!Z1VN#&BW(q<82V`^bu~M z2SO#(=paMB#WPj2vJ({Y@McIG9ZHoCT90McbcHsaX$R)J%&LCq8mm57hfWkc9R6eeF2uI*Aji|F-jWRKMR>U(fp6DJ7XR zGaB6FI5>&69CK5X>dmA%3(T~6Pk7im@Up9{Z7`JoTvie18%pe4l^n z-MlJx@A=sL{0KiALLVj+c8WM*X^*`fo?vO5JDYpm=xlU%xncD)C|*;BP}VV8nBd+9 z)@{w`YjAbHx1j5Iw0Tr`M6~tj)|0zbPzTJ1`mqN&NXSZfes4uL{?J9jRvGclJnBKi z8%fi;f}%Y~D&0R!ccbSr+2(5hTQ>_!)9Ie_~Rl7Uz-Fh=0D7B5fdxFraA2} z(6rk9z#_U>+>7LRSXw_nXv|FRcN|yQbA`e3_n5S{d&B!y#wx&Oo0P3r%~&>%gPtf+ zdFoK4!`m}PJujoIysFAtv!W9B+&<^WR)wuC>Fn_5xC`}J>g!_l~keOs>1l2E#|}1qTa$vMw8`>!s+P6LxUl57}Ln^ znCro>!0P6JJM8M?BFiya%9WCXQZ|QSwO&)s4+N8Iu{M~XLXV&56e%tWnOs1{I z)Gc8DHy`MM$&Br{6dMa45$y z{6|MWKPLtwQL4cRI*Aq4?}=COYy33YPOh_KbvFU&ZPB!>pJHBD3tu1Gyen4`#ZmO) z0uo-Mx@+^gd+{)vn(fRPnVUiWQPQ8hJQN5u|~S%Dm8y^^ZhlAz1M0a;@lMo;SyfZ~e@tNUrh5uCxlyPsEH{Ys!#^P{!`8 zIQ{gnYcs0#X0-Q_UT4mzsf`6ZSwRxEU|N~k>2UJHU~Wdcw#NZ9k+90xxmv7|CLE-^U)G?NMnP$z6;hqy~LYCfh9Z zfa?=YZH>4Ny)l=@?5!KB2}dH%BP^=c6Dz$jYU08{aIwcvoc-Xg!8dDyp){(@;yy4Ctj|T51nefC_nvP~1RPbfU5gquZ=W9) z2zI@kA72l5Ck9;2imFYZpxaAnhoNQu&MQCI-tN7s6 z)?_{f+l0^XdW5W!m?~%K0+zj6^8XHBOYKGp&T5LA5hsCU=jvC{oY_s3YZdUD>F_8Y;u(id8 z7g#fkp59f%5n0)ksWZ4^aD_Og>L}P^FUISnEE!$!hUc?mLe%=#SK#{Rs}PZ~80SS``)=&f$jtZ*2;(7QMv3dAp{ZZ- z*kHE?8sQAKECfNSwWYoI`vpJMp5t~tuv1CF9qC_$RmOt8Sy^eKzd_f6dfw#p$3qPR zk8u;f(Ug$k-*-0c*Q0kY!Vb>~3ycaC=M6+v%j3<{>6XljutMC8=;;ULQXxxyUwy{(L#;n#`>or5(--4cXZbycO{kcV z0%EevSMrmVq62S0dPkHZVE4^=BJZUs+NoCFBzpA(f_MW`0z18|-liE%+_z^T*vetp z(bnW-=QZNbHBW+!63BLP8@aBr1O57tm}VfJ9f)oYqEtnjr7IE=pN`g`kfJedhjmw7 z5^{lS1nlMe%{DeiM}#q_z>RNX^W8SBx?gXt=R#{e~JsU~K6)8fwND_=KR zAkH!vyPajXV)r5Ogh`w;a?6l&_SCJ7VT>FF1;$KHCU|)R9PuA$DC%157TR?{EZmwy zuCzaw+!DN{^Klnlz+hfh_QbnKS|_4yeN`MoYly9 zkvf0KR

gHNOy~f)$#$YnAhalw5zRQ*F$lDR?_+xNVTh*T*QX5pvz)yBW8`;(!s) zn|P#Z;hl*kTqWs~xxZspniIAE#=`M1Zso!8341xvN+ zF3bJv^O7dI#!0$Q_s2^oM(MT7e@hypGT3}!V%V#f8yX`-f!75lOHX4tyUcf^jfbrT zUF}xitJh5D80~X*r=B@CzWs6Im&+J`n`9ME!-)k`PB4*^b+1CZ%hOX5`Den4O&cHo zborGz?un*9E7s*DTE$9&cTBFUJDtHYv5cz0tmM)+@bY_GP1dKTiivNYK>-tavR=&4 zj-wwK2P{VmZ8o0Yr>BW92}I5Y1P+&6XM}WCTieaLzzs%U%C4VFOEOrp{nXMNj>4X9 zW(&iuY@NhWcfSi=E@-AR>=DddsP6yw;F+4nT zy{k{6-?lH?8F=2bv+KF%f6treO<(KQp9{|k^0bTU{8C@%<|fzki~YGjBJESB*yhYY zHX&blETQPdYS?XHpY5Pn&y3%Evq3CrcH{9$+r`6<)3UJDGs}eipXX((kIFqmO@sDi zTn8Y`E|u1AKN4`Qs^RpiE%Yn5Elft5bVp{iW1lG5Ox<=v>FB2k-pdtlzokX+iGaU2 zAB;icf#_}aJ+1$l!&feO;`->6pK$#!O@U;|ft{D7xt!Y0j!z$TFWf9Wkb0Yo^pmb% zG)PxjHRi8lL83P81#Zk$zpnY5kLwR&jzqo2C4=QQ(+4cQ0d_vmjEUD2dK~m@qINa` zO%?oWjnkIzDeba|>?^wd)~C-l0`{^wpd?cq={hX7pA-MOHXMAPnX8!Vh%m^uswof} zGgsstWuSZsP)YY)If@VLcJ%kOe-OT!I>xh_nJtUWgN@;V=?u9&`Asc3(ocOBGm6r63D5ujtPvL=C%GIb z=mX4@P3$xGwA9pB$S?76CeOA8Sa{0X**FZ_e{EC~rPrVvw9RW)F+tejRGq-ELEm}& z*gD-!bZ|*b?{)`FkxJmga<|l|QWA|wq1y$wmL_``v@Oype?k9tBIx09c@h8QuO z;uI-hUZ*v_G^fQkldCb+>T(&`GrUjgWRhe!Grc}%gRf;Zm!oJ9SvosfIb4<*G_@1E zqw_m9!=DfTD-2Ku=SHl;&Ru3}t-l;l$n-AKq`QP^XS{f@T~O+TIBFc7k`J z@K^zcZU=_;fXuWL946D6o{<%yHDy5++G&#PPZ7Uh{gKKZGdlNI*Q_=_6V_eAiSoCAC@Xsf~lN*t+zk_un3m6gPA3?+%G90uIWC6~GP~V^Q z!2LjHS8ez$Mh8Ns-O)Ezi!pAjvTk~ZAyY;~*d|i|x;{s#sVAenl}=wXOEZ1kFo&nb z5_+sr+peQOjy$ZhE;p31wZBN#`etJ_d*jUYhh$G%;Pi*6b2Rh>s?0|wz7k{Ly$=3j z%*n)p_Pg*6Cg7zj+4VmLvpm{VWB zQu9l>wV#&_h0zz6T7DkVh1)GN7O1I5x@_>T51ua6`GtZxlU9&N>_*Jvj#5IZh0swW zpPi1$+ms2gmThCgACFJWpXRRj-a)hQ$>lizdg1}A52+G^Sbuq6wW`X~7?~H# zXl)AYP5ps1`l`2k1}BHwlSFf>8>+;PSi4}cBup>%{OIw{(#tzIiCCErhy0{;J1>2S z8RZzseDunS_c!CEcL3*`9rKXLSxWFjMw@H~*R(`69f*;pbk$0Y$b$^!@?xxkzp8bt zbJY`VuV9f}<`hK===j+s2g~PG&dUXX;CqpvCM_J^TERv@sELH!XOWrA zUIf?Tc8$OFwITcdFI@%)36DQxAyJ^3iG_@E^&$+4VIgfl?R4s}HuV~{F6-kT1Ge1` ztg4QfTi=_$Ka^@Dtf|!Z*$VB~akSCnV&dd?+DRvdi$zM$vvAQca9rxhYHan~ZqXX6 z#IhJ@#7Ouel9`4x;YxIEQoo+s+gAR?;kk;?`j*1_c6ZT1m*Hm|Y`>Jw_VaA9A0`#$ zd9Z)DxmitZw%S7=BTShymYFz0HbvJy z?_{h^q{?Xma%iR-%*)FiR@>pD_MEa>5m9b!Ucpmq5#+Wd?vT{eez;(_lpAA%lryz! z&7GNLci6TaoLDbLI5R-*Rztix`83>!B~Ea_qIs9+N_8o~f|+OLV{aXUjhV(Gn^2gF9XsDh+M*YD z-ecl8?*t0P`m+qDV+>5E6ncgg+95*(hT6Vko|Z^zi?iLD@wviRqVs(NC{LFQXin%` ziI8?5WL;}a*vg4iGG9+@+M`jINZOTDMQ#WtC7O`A;3di(sB3>!I?>H(7iBB1|Egyz z2XAD3kWJG9NL9kSbZ_$`Xjd_6R|(zqRy<<`-aGT1kgs%J_G?wM2=(x<_dL`<7VPX6pjokb z`}20UrZ7XUO}9E^D5>#>6*~@CH^E!J(BLl8u=30A@_N+TA59e6Gk1CC-AhYFXk1FG zXatw0zLU{eG5%Q=^QnO31b>61>5f5muxlA#hiQ<)*0AK++iAa#Tg~ePaSva=fAsKm zYNJXuAZYG3WhZTBeMCogS9WG5u5stj-1oQWGS|@Y*G@p<4OuyEwTZVh|I*;K$k@&G zGY?PXk;qcCE`oiQbj-zy|5|FqQCHWr*$|?{^`5$ zdYC3yqJ?Qm=732N!-}wV;xyB*w4CLHNkn5!ja|sCCBllpN5(SQLf^OI0-r0Aajnd` zOD$yEJAHS^5tiaAJV4e%b@|EF>trW$5;b-%x1TYI4^lA2vxUS95g<_T1t7^yAKik}Y4*f%WLH63VG(`2J|*DQ5BY#ltb9Xv$U80ebv*?tl7;zs{$ z-0Cqk?B-HsBw294u{W*!0J_}JJ~CgxnE!~VaC|yoypFBDn%z*invlc03+5K+G%Or4Xz-?A`yo&CYQORT9w%+?F3sm2&+`g)igI@ zYU1wgM^Bh#g<%P98?B}slpSUAj_LT*HFn49j^7Eoca2yQUdb=7BOhp}3DuN|hs z!i1d2bW&y6YL@lkVb4Et9fo(@&S;vnvs+#jeC2Duj0*+$$uHH14;dY;wk$j0eoFYw zY?L-GG3BtUn)0$FC1&=`C)O(}KeT^gs9ce;n!vOIsNI88Ev=#&NPGAw zZ=zRa)@@mVCWS<7YQpi(W4$Hh$8suhrXAaKrH4^(!)VaRp^@6R=k~8f?1G0G9!83R z#Caosi!vo#Nbg3Oeq<_Y=c~}E(*E6($z1x5iay~Y81n+Tkh)?R9UJYIK)svkq9_nM zMu${JG_z;ccSZ#?)XeNgb-u+a1R-QOj!3tbpP$C8lR+zgiv@>Bc;rUqmFyRd(<07QZ&!sJ|@IZj+Zh-;emc zkg>ddU)$Z}=U^_sLYxR=X8gYhyT|BA+IDaF6Ki7Iw(U%8+cqZFB#A9>GWFO@evr6Zs!we~42p}|zHbzr6}V=-1f7vI;hT&Vvr~r{Ith2Xo4crM zk=n37Eyom;A))AO}ZWf7DYSE{>=txaer3(_SMq*0SE61EW=nlH&3FCd@6 zInJEKV7ebPLRh#2K}U~5-0^gklaHU>RT5L?1RdWWX{X&^@(yM$RS1NM_f{PXNm*#^ z{l%D}SogBhZP{9E^VmAqRCFVW=(5m~nH2h5o?tOA|FN;bZL%Rpt3~L&F%a6Qp%alb zi1jjlQ~bRCoZNb9($88C&3DxB%Nx{4-!ovgr7zTJt6_O1H9VNLx-6D4u}CcB&DkD!iVS?Cw37)#R8t%%2Dl~3ab@gIaZ|QvuPdV?V zIR?SaC;{@duATYxM@y$~2ncEp^Jlw%HLaE@yp@ZCfcy6+!H?{37-aAi6?0~0Z)}a( zyD{<)(|;C-SNr@LgJ^Es5{t@0FcRl(6C)oDfp;jJD?-nRg&~D#?v2TIbSF{z+@-cpvGRy^@4}~Q^|bv&!p5!()*7B3JKhr z>l4`P@TT`YcJF$T9GkwAmg#PQ))?Qd8_mA4j?!~X8TgaJfz#3aqQELg5#gQ#fXTpq zJB$S6R(s;D#c?^*oDe}H%++XdB7D$jb0ST#Cw9u@@4aVqD({^*lNrJUVI4 zFNnit?wYZ8O);ki zRqTSvqqa1;FAUn-rkjU=V2bZ-c_qQ?JtIr8dEi7f{z98}AX5eg=5tq$W#Q??bh-^` ze-nexEYMJO;CbvR`E`_Jm;EIq&h<2l)MgP_OKqw2SPx?87Bz}K_s>=3Sqmd&+x_R4 ziZ7mDAL$J?bX9ux!tr}oDMh0(Coa~PQQ+A1*jo1Tlc_%T4pEofr`mUq-3&1u{f`zZ zPAifPHs)L(MqkU#G(Q?0uqupjlU7>c_oEYnJt+}@6>!>2QCKyO zXwE6b2B@e4{R;PJZajy67Pay>vGTH#MJ5rB^6+fAKvag?YnGJ#Yy0>I;8dUGQ2+ zhjbTqd3o;QvT8CHtdOG3t$LeXZ5KcfY$rJ+JKKsMC#68^mhgv&&oo}v^#_O)Ai>e^Vd(vZeLG79UE`rwZqGipQnO{$hd+u>^~GR=l@*H(jJay z>DNk?+rMuC=_zz89V4B_ggnu3D#$8g?W>AmyxWyQJQoAz(IX;B=PJttXQ z1)X7d?(vu^$+MTmmu(vwZh{L{S@rAV8R&=w84%fgpbe>^Gf$3UPy#NAe<{$BZ6+%t zf;G`)7`gYH!(3iNu+=*WAj*gpojr`mvR3uO^S z2L=tU2Mk*}Ks!YCh#v>{@hk`k!*W`!s_AY$u#9=p7kckK`pWA1mAY;Zne{)a%mflb z=&>m?Fc#69RROokcdyFsYfI+0TYy!Dk9@AwDf zuZwrCF|5m4B0DO3LuP{~DnKp9$3Y&(dmEn!FwqrLM=?0*cRNRJT_g88ySbDx?&XZ} zP{>a)yADChIg@FergU-@d|05x^T{R@3v=IX)n7fGh^!XQncS1 z6AElSza(rRZmmW2CE{HmHRVYcwH9UHNf$;SOy&QK$DGU+;G5w~|M^AlS=Dq`>e`%UT{dC+XizF%{&M$rQTo6(!jU(a=|k7%(c_VAkuRxm?u7k za-}=eAeHCBbl@Iqi6!i%n>W{If(?SNy!_7JfwQ=aiS$P)!ln4i)P8em-lAg5nG+}N zD)p19cE#Al(Sx^}aJUWK=^H)UV95_i+bP~!0o5N9o|Pq|Cj4pyItx>KK}G}(5>D6K zK_-B-53=~n(}2zgkgX#~NK)JKag^X!o1m=JZvZL(}_0%XnHhVC7iV8#K%Ne2%W+^+oYE~qM3$PB*zRuWo!-Rg8`%v*U*vzA*M zf+a>DautcTRw=O`HkY@D>j8ddDN#lj@Nf9@zS9cGFCQ>?*g*>e7%!wgm_qh|5C|BR zOIYEd7Y3u;but?GpBi3`KSh|!w8=_?A5IHB5z=dlR$jtVf6KYlTSX~l0!G4TsA8jM zGpQyT=44FRhLRf3w*D`l&oA{ZJcHu&Km;*>Utk zdPCHow_FKP_E*O{ni^t1m4ID@5dB-LQX`PdoKE-JU|_OF%LbVtrDy7dtu`-tKHH4- zhlLK~Kta5cj0%!<&h23!bk!#BLj86X#`c9LEj^&p2ms|kE zVD68P;(Wd$ZPr>;GQ_+a#73M&UJLV%@28CD2cCn+B29k~3=h!CBhheZa;(-dEa8y_H_xj)&t<#PTLaVj~-`RJ;}{^ zLrPa|9glthoocl1A~o2}9XJqVdCGo64sEe2y6dyiUNT$B_Y^X7ljc?GrWoUW<_;e= zTkb&2&Nb~BfR}1Vu8GIaN!Zy}h%cnw?}eV|{winoKHnSDyb4$u@;jysz0V>d-fzNL zsf3>iM1rsVu`l+E!j_$bd`glv^@Ni?2=eULToBtVHd`C7+(vpE*Gkj_mgbo47LqeH zhZsD$N%?Y*^4SoBQ+jFoI>x~vf>(}2jq2r0l7^{RR)l@eOkRwjh2uV;e}$kRS^2>? ziaq_UR`n{iTotJcQ)nC%rG4N=F)FqBx6zNY91 zU(UD!(QCB%lDg*fJ4gRA?IzM0%Q5|)2G9C=BOssI#2Ygbck2gXABBJ(E&|>OtH}yS z4&$*RL&U+ZZo0n39d|+VFTs9hH_l17IxRw_SeL72YzrViS3$llZ#iqZZPP>39G2$* z>~Nt%+M_h#?XybyoK?!rv(bLN<2!qMP-o2CSut{hU;>xE0DbfP=|evAQoGq(5${iq zo;!-F1^l199~9+nlX$};(1*_xE#l?U5kZWna;a7MN=i=wit~AM-1+BnIawaDoB4h* z-aI0qFdS1?y!Nnvc1r5@>=Zb6TXX0NC4W#~K2H=y!;{N$G^HdriY&9jF6jM)Ql}6dfUauVlfU`bfCq^R^3C4K3 zQcuL&ecsJNdOCZeJB>sB^IdMC@B@RdE~FoUo;vu)kpiaVDrcp?tjU9^XApCWc*%FG z%F9~JH*vOA_p0T^;+FQRc3<@lWQ9k~n@)8vRca;kZB9sAiSSJyNZj#BLw?nHRDGMl z7o(zq13ZI6AmIngX+g4I9A*p|icuCTm|*)gp_h2UHs-I30q($(%-po+kst-XIB}f81ki4(yl&OJ`FdKLX7*D9I}zVgnOvdyp;o>|q1 z^Z){+tI{Cm(^~}s97ZKi8`OTzkGI>r1DVG8Wb~hAt@PG%272a&-8J#MbR|~ z`dEZM2MafYf{k1g+IWI<)o&w=ejRpe4t_6l4_s!k_6R*>|x~b25 zS%!O#Ik^2LT2I@7xbI|Vr_67d4ylz$HN&esv_x+4T|;VmMf>Cc+Lx{M`wKkk@=?xC_ljq8MW?*{Yt)9*yGpK;UW>eP))5 z*gDjdIJ;TH({CiN+SAq9jUi>vl0z=dt5G_Q{hc>K}_C=3A^!3vdM)_fs>*4O9=H%_#^qpfs3)4&S^)Z zfuDnmm2Pqao5~+Q6f8Puo0Exjm7)GyPrADj7CaP{_ivvN?>70ufEbO_X z*lD)?iPoKlw~p5c?wVj-wVwW=d4ELd)rlVDxkpcr(g7Gk>~6m-R*#ZWKW1m!ANY#J zae9%ufQTZwD{!^Bx3X&?vfLDa${Fw7nG1ffsJkszYq?;WrsIU-W9JW<2&TE(V|*?} z^qXEF4I(CIinuIA3iiVs%EWbI(d!vpD95Fx{c5wZ8~b;4??SHGwszuudMnzROS*@P zsFq3_hSa%iShXqx=2V>--EAEIpvR?Z0&P`QR;CX2%~m@lvwXXUsFR9hhB7KF-A*dO z`Bl~$BB%;woF5J?kY@~9J#>O4bhb+00}PpLIk9bTLgLS@Ap8u3_tw?QQOYxBlIbkv zik$1q;Xw0tL+akkgXNqo)B}1v4$70rXNrpxz3{C?5$M`A>YhB2dN8zeM$?yC9L0~W zFf`6EG)_|98FY`oGY<7+9(HH?eYWM)^lDQyuWW-8L8Vgr{EQGCF;)mzU(h|n=rK^r zpKqtc4L0P0OwBg*SkO#ASpjQ=-i}6 z5Z=PYCo|1;eL#mxAlRa=0+NRpFFN!@wyvDzs2&GEr%c#z6u&5aObAzQE!a7!fN(eM5z1J+3OH8stea5aqeli@RdLUS2NU;MY zRd@k!88%ffGo9IB_@_V3=)R%dsgrF&YHTMIngJY>ck$6N1G@@hYb_s436x~z@> z?Vr=w)@mUZPJV-Ap!ErCGgtQ(|jH1T>B*V+$1si=NnW zOa8S76EH4N&>ReMT}F35vds?K!M zlc&OT-7f-cj?W>M=oqNa*XCGFHh`Dsk~UYf>B$>lNkv^588j*M$5BHauPL-+a5|%b z6|Z;cxTmCQ8d1J&h2DoznuSj47Bi!f4C1^OLV8}=Ifin^7knOl@y6A<$^e-cQ<{{8 z7-Jr@y11%1@6?>}e%Psgzx|=RO3;Edv0z%Bh5Kg^Rob@|L2ba0fTm{=kl)DYMEp`79T@G$7Kv@j{C$1 z1Gaw5?6&5DYJbQ>^wGYlkE83+9|qo6eI1*7&jX|0j2AWVlW*gk3~e8wP{jvqjB9@Q z?KPA$wuIBKH)jU^vIqMBD|w8Gk-{6KiIJ~+wnvM5{j_^iT=Z-=V-G{{on$x65}6j_ zK@Zn}=tLS+5|)I+=&<`S!JuyQix@-PczFD*4GOI5cAaMWTM7%bBEJLT6DZmUQq zb&QYwT!mhu^f7oOi(|8V^_`2Z{YVwwE!u?Y^;n&>7;pN~&GJoP{zUwoycT1A+B6lz zlvzP5qAcm_67#9aACX#Yw62iYvXM}0k4hHIPf1_xrH83)e*@!?`dmEL#8j37iu5ek zhZ#$L8iSEK>6~iQZm?p_?s+6Hm`c$)gM6dOA1eIbxHcq z^K$8SUVdR$0`%shCJP)jdNt|i$*~xAmF{NhQx&_R_RCyRIn=PSgqDG=X$tRRGm9e&TwNnZj7;0jOpjczlg-5;-xh(U z5A=e_nvW`p97m{oPN62s+KX*dnuLXc!N&D(Nl6V-8r(}d+;ciFrOH_>6KOfAtd!qk z=~UmVvw14tED| zAc|?6Z)5JYr4VDQwUBGI)x+y9*>mvc+jiBNb(a=$2E5Bl=D4LjTSr|_z8g_e^x=*x z)Wz%hyrG?+g&bKn;nJ$8K*yGqOF^a##<_J)U0us&<#EMPU3brSx63KRBaD25S1R`C zwxmcaedLLe0&bY}3@KD%oFjZy`(KB0GQ2+hm8hCyLLa;Ny`?iIIT;`eG? zG`A==e&@%Mqku6SIV~%FXm2>h-gMLP9j3uo*>TuVv!n7FQ;gQ@NQya|45`$8+X6=3 z7yxlbix4-gECQs@3XPuIa~;u)!$?kwiEk3(A83=)DZXmn{VRv4E;}d99bBf96NRQr zM?O|In3asZ`P)_$~hrk^0e z(sLNF^mf6`A?jEGAnBv|0my-tM4(EA#l?UyD2>Tp_=DX(^5XDtDbe{-G6LB!2?cR} zXH|s|N0qRxHwz@$%(1h^7s`{jt;e1N+{~A6*BaKZA9=~2-E-0yXDDp;=qmjeAy3j) zL?aL=m3b0|7?BWWuqpFZC{#=@M4wVPU$LYQ!nG`tUdV}%yKT=Ktw^7O^FW?!GU$vD z)$iWoDm<1-y(LR%J=|GIikV@?(T(=rHcUEH;;OyO_=;IhQ8p^1I2Lb}HseFjlyaVI zhbZo@OR#n`_1JHtea@!d$EvEd_b2pSKx;1}dT$cm%wD6MTAPT8yo9vuORXm6+)W%S z+^U-D`_`A{c$Nmu%VpQ4M{$=IUAQO9L#gToeKl&W3x-hqR+&`t`(lQtCWGT^O+8ni zv?-fFu+=m5>y5+%rrbI2Wpa|3@db#Nsdfq?iR}rfw zfkdc?hZ(_vMCnl-(Z(xAJr%_S)XtZGCL`vk)5%eL$kjBXe&8ZJ{iZotlAuKi5-)^~ zd=2!Ye20Ba=#~2*1GRq)%;4*o>UR+fpnsKj;$U+o{%vEJREBiDHm(c%`Msguk&?O< zl}YW6>=Ku`ntTy=YyiNNZVyx6YZ^z!s9z?`Of(Liuso4MR>$%JPY3Z)7OglnrYj;p zHdntb@o@%Es9spi^K;eKvuiN?Vj8Ki)wD{pU;ZmsIWG<$%8G($9MZ%HIxcZ7#fV~1 z_4{Xu9GRz~x*MY7zQcGb((QrL!(qcp?J9O(F7vHfvf)~qNrFrPa`(%onA?Fak>9~d zrkA%Y%<|WNJgE3tCeE%4H+Om;WACrmccC(@^@E3w9=f5r@zAEle|cY+6;!kJ32;0e z`8OP2Pj~n3MTl|ja9Wq?6pBu%#zVkW%mNt-K&ZMuNG+f`Ap18GR>tRb4-r2=jVu2L zZ}Q)YE(;SUJJ-L($uHFQe-bDE7h6|DLR(Hl>fg35y@-RY>3^`3|CNda`-MaPcVPGb zCXE080J~gYndkrM>vFI%efhF%Up6i$>_4_H_m@4(`j1J=`sL1YemS~d&h0-7+m|`} z&-y=JF58zg`ycOrIkaEn{=qNVn7@qLuaNtH*14EjVOjq%b-%X6$@Y)W`!(jDaUB0K zo`dttxBbWL{eNs+uCKZO!@>P$%$KM871{pJ!p6k~%l4In#PPLV)-Ugt;~yLMi}d_w zKUlsD+<(u(@ijjOD>D(-*S`I~DfC|vQ2+J0|J%L&zX@6Yn|G;~Fkzn|gckh7H%K@4 zW1pFIC>#)%FZR1rQfJ`;?3z_mlAS0D)9=mih(fN2VRCnGvn{jVjAJS^rJt+P{?0dT z)kIrQFlr;7Vb0Ym!DiM9tHJKFJx@He(H$uZanm&R<=P*+y ztsY*UMXp=>%liNxkWjiP0+h*%*D$q<6imX{sfsO~rR%%Z#wD|zp=G_Mu|gS^ zsFORMskus!V!`z`(5(NVqyL>u#?JJgR`$QT?%%BH|CUzvKezFJXJlb@=-Iy%=PyR~ zpRlrj7+FqcJt9V_pI@@JiLkwet=U&8GYY$ydktJgL$C~{wMway} z~c3}dF#GP z8<&ljGOuSuikFcT!2l+MqY%Z_!bF&13M)0{WBjPH(W|px*k$=1CS0jqq+QYIyRg)x zZFi#6RIHq$9U$vB)ixyy%}}McdG+>(aew?M+hvOXDf7s4(k1(d+X5KkikpJWz9R0S zT)#iw$1e67?gQ(%6W_oA&g2l($!GDBJ~uhgc?3=@eP>m{!T87LUR30CO0Uk=k@^s3 zol=c9_uE|~D3ilr8t&wf&%iei(}h#=v^OR7?w#?z2G%b}Ho#|_H^_pa|8-2`+hL7X z3!O{OBZ_g?!F*?Cm&tl;3=-^$HVzTwEowdyo?+m3qq@``nUSEkWm4C(I!cQ zCv2xfArNAy&!3lY-*dY5H^*XnnCn$u@^&p0HDKts5&245?e^hCR9guB_m&4o5sQ*p zILS*l3id-=8I=WU{KMXKMlr#j`3`77pJ{;>&^fzZ3QSMTKm(*05+A^|MJ0FCH8I9t zm>n>{6Y{3$Gb!+s=pAH7a|4!i2BkI9G6WnnK+Z z%%z63Gvt~ENDKWLea!-tfo3Fm#{||O`vqUioDUcQFp;t)o*}v)xhr2>02avE!q3=1 zVn7LUw#YLzFacV@Fe8@q9UP8Olq>9-3J3xq21p=1lJZ5>A{sG@z#1uxsz9R2lJF(o zCW>AP1E7#DC6=SF_YzCKV2e;kMu8_%fH%^mcq`nFJb?H5hLB`ErWS681E7O!-&gD) z-U>t5gS-O;%}Jyi>xeJlP7h><)*$sE+@S%`B3JhoyYKF$km`oig6()ir|g2I5a~wM z;_r|Eu1IuKoi)DcF$gV(I0^tgp>>cae{ew=!2!(y4agkCnZdQlMqofMfGe~O@*+{C zd>c(3>XtAUsu4O68@hzFQcN?z5eMiDFoTvsP9~xg#}dO5(gf5Z4O0VB{P zB48r?Kr#RiARHPM$yy~w8_NJ*gk1z(gj@tdgj)n#gjysVoKKE>cUl@0ZV!48`Uf;K zKm-5^@BvVXfD>odD*%RWkwmC@GmhJ+-@tznVL&3iLkoa2214~kZ;LpRa3P&TbAZ!! z_XRpIndHIrg&7J>lO7{ALgRq52f_-6+Z_^7)hfmii6S9+fFlHuAi*3{iQpK4?|=iH z04C4|MDOrBE+&6e{h)DfO_EW&;zy#Z6_3;%|{nOuOWeF z&^1PLH6)#Z(I1=~kN>JVW0TL|sSlhT4d@;*uBdBJ;N9~&W9`Wil3z@gKgJHW&(@T3 zv-t4}vCp=Z&y33QS&p%Z0tsKhH5X6;iQr$=$Km@eg8*1e*jN1YuL3k^#3T-`g)+zx z^MKwFwtWQwydk@hzGLllC!#kBba9$xeZT%HKjlUF;f<_%d8k%evn&uti%}lj?U+P0 z@MmK6HvEcnAWe%=lFrR0z2=?y_y&O8LF^esz!41iKz$}_dB@}Pt_`q3svctSCA85G zX0H)zB{9o_n7SB9|5ss>nwxAeq)f+!uX~eUw;p#n_1roW~-oe2&M~o>W*Y* z9H7ArLlj;pcs2i&XjlK_O+WctLiB+)n-G_+ICWD(bPo`;{v( zO>C*sn@C(PtVmjllnBiY9g&z^r_C@4_0C#oBuOapidu+B)P;EdPG8tU+y!yM8Mg_z zhb&wQpsx}3LZ97>^8-C$mi#(m*b{ZbLHf9mV~)c6=pNIOD zc?~7PD}pOR3sePQ0L%dt09^o)o4~JaiECmHWIkwnaLho|zJUHl(&d2+qgK*5c_AKz z6Rbi;A;`awCoYoy!S~pO3dFvkx?iWmfan8x_EYE;tdKO$5Ano3ZWH2?Amkpj@QC;W zbk-%#58*^ha!TkGyHGSPJA@7V1bO?}PZCjd70QaSFa!Ik4Y}|}9pHkjUL0izJA)2g zLadccN++rrQ44Ow3?u@61CjuOp+BJEkPb=5MUO?lZVrxQg35!+!zB(tl2FDZMI#;6V(&Dn5n4rkmwt~OJi9W9TgE%NKDbx9fyu3(uBu1|}c zEu&L*8?!Exm-PHVj4=%PrZ!iVsg(}Shf+?Z39Twz?{-dtmFE-j&H6g;z1e774*iZB zGb+XPCa0|-QQVij8Ok4XL+Ro4YaIGbzZ-geV{8k()bZ9!7n^J?^_Cj^p4thF^agqC zcT3KWdC#ac>SHE#IcsBrw7r)O4q9qglg@0bG5Xu%U3Yo>?9NWJ**i3z#x=XV9$qTk ztNe_c&fga_st*YQY2|gR=9_&=9>%k`vZY2$)~T3>oFOvx7U{yfFrj9>%x$Yf2b5*H|%6 zr^4Nz?$0kTPa>vH)4IIXr=p(kvriN^v9FHPnmYdE>xKDQ`-Br5Ie3p_J^bra1{J1_QX|p;q5xLF0 zP6C%IneAAeY@*hpbinqg3BRQh{)mKkqFm zCMQp)P^W6|fhYM0KA%BVviyqWBc$C*AkAS5Fo2=*0X~R78)|@; z4|MN@F8s<+)G2TD+sluhhExqPUVIgAHN;jwz)gSl*BO6(^nVonoZ5WY*fziyd)FErzu&BG6{KZ1cZk z`OsTQ6B`qh+8>Y~KCYX2ntO`wMijQ%XpOfavcM#vlo6LJ=r>@HF-Rbg{#34}fIQIx zd*W+!OGJnz*u&YD>bFE6b4jVSIgyh_zTz?aW;4uF|2tETBY}P&uRbYLVv=%; z95X>?xM<&7DDrOSmBpERMXH)eBfetzX=Gs^$C7-WR_}>dd(B|9!qEhY#9JGw=l}ci-(+l&TBrv`+ge z0cu$Eno;U84sAu|5oC%fX9E5=<%r)cG&Tv;uMDUW6QmD+yys^!$4Bb2s3g<7EnU0CQ zu%8pk=}IpZjW1>)8j@FkauEt33MiHDmV}Jvz#o9}s8V*Jf_@gAnS1FsiV(b?+6ZdO=nIPU3rhT{ zBoT}oO0gTveg~5yCGwjp(if?vY1%>tt7Sp$rMUOSDX*nDkX8)*o>Sazl;7M%*kVIi zr&bCnvrML5`^4%kw2fThEMYdlbC_{TU|Jc9&>Q2`=d`J)zdY!D%HA#%7~B!r`qWC2 zKc-hsj6ywhxoVGL^pO~kjH!Y(H7z9_HjkBQti|r~XJ5Qz*6Qp!Tai=!vZ7#_{Vw3k zmnujYGM45y;c*Vyk5Ux?HNaTiXlilW)jz|}!2MaJYC88D+sj3>JBg6p&_Bla- zjeJ9E%G9RBHk^V7R;gG=D7w6)SVw58;8jS$2k?Bw!k}3qK*Wb;hy)PxIH)t~UfIGv zK*aaYx|aN;7b}dyVCf^`L$-9Qw1g!)5z6IWabAqTp8%5^{P8nZo1L7^EUmmX>|CjA zM;TsYwN1LZLmvaOWD9ZbMh|kL)g<~=jsHjqJIED;`G?c!X;izhCi;4dW?II zG=%xRzFU`rEPgOf%06Qu^5vV>xjZs6@|lb?*!g6_3hyF;n}yEf^V+VE6SAZFD?DW|(>fyA%W&^5IV~0f!y2-6ATlVu6sP8qoFg%UdDu#42^q)( z6qF$nEjG6K0|jaxyOc~Rav40tv}CKNnMTb$bTS7ey-K{Pq10iSX+Cx~1hUm;A67rl z@G9)YDZkLD#MX)mvf~v!C3<1(1F2O8sBAdf)v;bX-wB~3BgBg?!Lch;f-zxS4dZ-A zWvmmmsU$`xSWOV2VeUOz)P+n_Lz2<5)DJGo>9MvKfgyS-=*C`&(mDitta#^fmKcsj z2(PjX(~3SF7WU-@>I`J&>Z(g3=$89C_xAlE5FyTZZWCXWT-5pmU*MCE z$xtUa})-)u@I&+7O&0^%XeOs;%km>MN!Sm<=i}v-t-PXSf_(q(sI$Ec!K3op28Pddt%fhYf z`vmek&Tdvum?rL4rWYh-v=${%sopfKpGCBp0Lttu6F^yu@V)oDOXW9=Uj+6DbY|z= zEe?Y*0RkLtVepzTJ#12<4`d_3r+pSFHu2@t^+Sa`^TVUKnx0_EV_Iz_^Qna#umjkj zS&b${;9js|)hVr3gp#2FQi7NW-=kyPo*)rBLf-a`8E0X#A!~@8R)VBcr6cXDqRke zU`VfSGVqq<@8G|{K(%He@?+Xn2cIFy$ArjXf14>Qx3O}<;9?M}+o(yI;o$%b+^S+k zgRU!%!SZYSWZGm{(6={23UR#9uq8&v!G!Qz7lvLzds2jsv=(ye&CTI|ujDldv)9YZ z)F}5}6mqO{K#vmUXoyOcQoJ!WCZU@DsEiiXEH@-w`nK`n_%wYSE0U20Is6w5GkAHo z7d)XA@t0vjH@*RbuDl%IS~4bHFJ+(G_oz`K3S_&d$CY3c?3Sa0}jT5`K}*6EZ` zChFVpEb`KpH2;`oQ-5;ezaS{v{=&9CnQWro0Jz^(>z@1HC4-o47N=(V(bLdK^iJ(#Y?Ty;lT{`@s(QB*s;7>CF8Bbl))jj1NpZ8o+Q zt_|_!B2ePF$~N@e^`$BTJEFuH$2o@PN~27zVnO=BA|1DJaP{ct(#c0i?T&o^R+I%U zA$yk1;e>@2UNUZF$y%wLO~ppuTHa` zuFYZE=%Z@Ce<9@$DU$Jt!Trc7rNFV?Wlt~VWJJ3yYve!~b)}NsTq!2dLUdwdV@o}6 zVjprQRm6BtaMn0-#CD~?cc&NE(0}K3prsEV zd)$Tf1&HW(dm2Z($NJP*jixJJW z;a8H1YH~UbjZ#*v)e?v=lU0=peZtl_P zA*hvR;P=6A2XR77gvNKY)3F$2z6ZsnTMJ8Wc>f(VmzH@g{Xq!`Z2K*3@bbe&ZCkm_rylIRX;g9 zl{&i!2Vm!-4nfUj>Em2v`qFsW6sJTa0R6|zyVj8#WqUf!_wst|lDRGMGdSCmLf#ps z+aEXcC>pOc-ZyB)bLLThrKX2Pm>L*GUyJz29=V?AsK}u3!57d+6lqz+pp*|8Ky-&B zXk^IZlfKUkQY&#wkgQ0aAM8Lrf}nZ7AjO^5`8R>&sx9Y>LMR%Qmj|}dTQnNGWLbe! z3_{)EA9d#L9MI_Wzemfh8MOJCU)ed>)o8EI?W>P?3KyxI?%9oL^Kd?)ksoHyt<-nY z=c!pa14Jj|=1iyHR?W9wPVn7TZnyaoICnWbNeB$`WKbi5aX2;`j5lJ{)gbS{xMQ&B z5aJ8@VdBj}`MWVr?!lQ|ID@p^oHT+0-AqgOvGSdi$}$d;q4x>$H>j039!1I(iZnzq4Cv+ zzAU(}WXqQUHFldTh{^u*9mY`Rfc_X#GFNGGX0z(omF`b&F zM%)S5DZ3y2-GF7+8kxkCBnv6>o1q|u>fv0wM%JO2vHon%5JZtQO?9#qF&V`5HEtCF zPIRDJ+MvKqY(;m4+bhZd6fH6y3_F5)Ty(uDU+#=tpoYXiffpSiI$2%?Yu@pNDobX( zK;o&tY2`c-F}k|})B0QB>&%SREisROPAcyhwUuMBOX?6v`XJT<0=5l#eE86(VTVaQ zTek9w8e2uOVVHHUGT}zqe3{wogI9D99S-%ARX|KorD|)x#`bjKl`{9OZ?{hh&xH!^ zyfe2C^b-1yg-MZI^|fOt;6y#&tk9;I^~Hn7JwjY9TKFlVGiRU^SVPF~0T8kkxLpX3 zpEV)G(U)TrRdVx$g)p^3s%axdpb_Dts|=B3&dJ2>ROA$jZ@+-%?Vu?AfQV}_;VFzVF~F%)i8{Ne@Ay>J(4o*jSpywG{m^Cfgihn{4toJC!pLzcHoN+g%7 zD}r0+&rdZ=&(|T${JnwwO@YArrMQ{|pNCD0VBtbGxO51sm5CF;Vpz0S6>$m$obz^r zwm>E>!mq^Oe@b4^31yEh1!k9%)|gz0H8mFwWJ{LrB81w6>9jM}s1_*o4U&@8SSPC# zxZxEJ_KrOt&-jKQaaN&G3b{1>4NSi#aG&>7W&^*vA=NI3 zNMYtb;{N+aw-F3CT5+GZ3PdI4qtA>hS?W_sdl12jPa7t?Wx6%;SFMRSdq^g(COL>q z0XZk_PG5nDTBq;@WT?jxPAy<9CyCQ9AO%^ zEy+=fB=12@d~QZlV`=<7nUKvTQWY{?XTiAYVfrkMP=KZ5AIB$U}V_MxhZVxozI$rV-C<=htXl8jB6 zF!UI{sFlu4E2>Sh!Z)#O`nevoBAmmsWSECKgj?h@7c98a413SefWubw(#W>iWTfJ* zH)03<}>h4qQDKuA)ZJU%B;w`s`h^T&vMU*%>)6fXC-!EHM4MT4(|Kr!f% zK5uNzDRUIrEdEbz+LaaHJrdzjU(T2`WV#whN9@$Iw_C@AI4#MHB-!2rOcr-Hj9IXm zespbLx&aNw5&>YqqY%Tw*RUGcz+YGo!`Kwpg-c zu`C9QnVFfHnZaUax>~DypS|zy?&uTu?LJgxRz^kA{BuU+81oyG+uqZ$<4Lxi`mHNS zT*mWC3v*gD&moX?lI3>hzGmeir{w51cq z5jm~vR8oR&9=rQr(|x~Vdu0_r?8<4y22#X@8nn&Jl|5LTzu}u;K(L(7Mi)=TJyc3Gh+LYX`Kp?$EjC$C^SnRs~7c!7vo@k9sx=!%$@-Cfg7LdLdRHfb5#pgAj71&^h1U%N?frwoItuH@t>6O*pu?e^umOo-Z+7cUqY&YXiIV!1{XeGEtp4!|Fi^jRRJR!^smj;Anc zAN-gWy4Pv0uc~fq;Myd8L3ful{~=$T`32ka8G-Duj7~w-TlMwgC^TTfKuuTqWDzFI zsq;`R^M|NX@lOqTM~^$^l#P=u8u>QE2b{dGMrQi{as#F12dF~tY_%dZsC9tRaG!S$ z$X0do+WwyG_`-kzPB`MrebGD!>~oqkbdj}JUs4=0)Q~3gX~Vs z^2jtP&bms-S-PB}csVZ_x+C`E>nWq*%`DDPPM*FM0!g#b&CLOrK8Q+%AeB8Qj8pO# z)gC;v`$*-v8SznJsq+4uD9fC9DtkzGrxylu3|!x<-|Ktdz8difsE+OcWcx(lJ{;|_ zzR}Tg!WJl-kvEMW7<2uhq$*0hLV_4pRWnQ;Ro7Qn8_zNeN5{a#Ds`VoX|b{!&KrMW zA7>M%NMfZ;QMXgXop(wsG}}H7r>VrHeD*P%NvA%X3mNHXGO$pjgl{QE#qG5%)h?{O z7%HW@i_>{jKFL&8lgd42vfTh^N334cg5*oxUY(=uQwV$zEUHgl5s3i~WmyBgjYp{sC6{D0y zc)&p9+lMu-R{j9SvW?(n-nLaAb{tzbnmF!MWhSa*{|?O|rb5lRdy zOk3(&LBYZ+8PCB%VKgpWVFHiLNU2X^U}C;pKgRQR!61Vjn>aZdy!1(_bmgz$8gi=@ z_{>1Nfqrj}g-rfd)g+z7EWdIq>*cZiYz1uzfvvz-nySY5YIVX)9N+X+!hV%)%4f|b zyqwARgl0XDgoZffWL#@#)I8@MMZkur-7bQMzoY3rr*AHuXTE*C57p}i#x9qREg~Qk&Rf9u|F1S`e}6cGxpK( z*pSPWbCKdIVnpl_Ti|yVd~xahyma_z{n^_|_R?d%Ot)?yFI?1xz9DyxAbf?E;1-N- zBjiE77q=dE#r9Nb{fg-1u=b0z$u(yLXrr_!z8+A)NaK*rwIZ6I>q~37*`P4zI#N)r zheS0(D23}GYB;k!r}18DOPDy-K_Fl;YIDzI9|}cdm)>qL-kI%$_6YhU?|Op(_v_+x zl)=E~E+>l4b?gp4@WivsT1xF`E_q&cN_Cr>t`au;338`C&zD_t4u#w;o?Q8oF5e^7 zIxnZV-sQe#Bz3lR>k#Pq`v0QWKg9SKB zL#ZnI5qZ_@V0mLFoH34B7r4JH(w{~nFq}epCAC9D;K4P8k9;&<{F)2kMFUbaX5%rx zKw($1YLi__bzd+H;d(CeRey8ZooPU6SB~#>7LIc3X+!v#cAv>t{a*GWZ&3TybGY#J zRg6ppRXz$o0(1UeVS;US99Ct@>SpzpJ9J&MaeulzEpusOrs?WF*ngJeQ0`c|Yt6l9 z_=sR=0#e7=H-v^A3KX=aLb+?CF=WtK7>@>I|S-Xi9~#1cmE3X$XL%Qx-w;jx?%V%5;- z55_qwY4#efV;>u@oHjAsxm6N}@&>(|-JPPxMw=eU`!V!8@V?;zl`!>5jRNZvFRNBB zt&>7?Vj^slu1W@&t}fFRoxqU5Y<1}}NSeY%Z7AzUhGT6wFtMM}Y(3%lD;`Y+dIDKE zk53F?{@6`R-*wsHA07fzsFp#-NWa^B(4chT^qJOLyr#{c-4b-Z?zNua7+CHFiI;q| zUW6{1dpk%&J?G;K4t+@m{f#y(5GlH!ZEt%Y>7I;$su7s@Wp(W|-f_en@fTB);~2Fe zPjIZIDinQw-N2Rm5*TDiEwu9@(lUerhA972VQv|86XAVJ=!;iXd$$(uljZBG$~;7UWU)8lfFojdqPe%XaFFCY(F;sd=3CwtC(7qO(> z>JUsi7=ekdUfHV5aVg?IQ)% z>w|Q4EbCeqEwDNTbDFZIzp1P{tIm72M+@`r48*JN8IXaUS7) zkrom+FUS7C|LI<;TYQxeM1mB_5klSD4_ETN({oIn<3N4CA2U);BC)iKfU>T1r7}fp zeO1?d?BeuK*uCu$bmVkA3&uAqW54l9RUJ78w6E(};_aWlph)Vm;kygydqGCXZfz(ec5FXtl}m;)4lO?`~Te|)bg zM7d65Tf!Q)JAH=2IYq1a!UZYxtv@(WuBn#Cq%|1w7=j)l8Wdb=;}#5wi_f}cqG z+XX+!?uUeE3hdZCHI->FZdxPr$Dmra#ZDNHzFRO7JO%D#7%Yg7$<>C^DX6?C z2K$0yR?e}B0DG^U9rbpBa|MPWZjzqW<)<3vZ_57pH#>6$D~Yu-cVl3FCek{~vU;m>m_a%1k=>6cWPi^ z(WsMqQn0}!zPu~FIo274%q-GgOA>IoBhmV)MfD%CXu(Ao6;B3%)u5898mR0}KZ61| z54*1LJgXf0WjESda$=KecLlh_)RW|*{&)r}=U49(hdwk%8{OLO`2zVO3I+#rj^v2T za)r|eqE&mHa-MUmwN0t>xE05*luh*vB#4^)R9$N6?pjD_Yc}jys+zEYh^){J(SyP? zf?3IRh*&2{J0}s(CL!B?k9=f9^pNjKn{IKZo80#kzPhbDKPqaT%E{OfL@g4Jlb2Ky zb+GG$C|oCw)6wX4Jfz&W;j4$zNiN%j<8veD+!FSFLBPj8W7nF`Nfgz7Eb0n@3($tQ&TibrY{m^GLkfMR4u(dCmFCE9Q)LI(Y(B>8`?6K;WO@K?^ z?v1g`@yR3_g7Hdq+_=+z8pp*mn?p%8oo$9bt?t46Ed}!coh>T%8@z}!g*6W)9zn3hmnn+@ zvetLnuY;Mrc^=7RFHLh9*21;m9KoHs5E&YZ9f)rZ1;MD_Gz2p|5?}YOLMEDLXUA_=H0lqL}cTZVu=RJBOl> zdTM^a>tIjoo{N+Ah=BkYB!jg)1CO4&*SC)&?hU>SDOD3Hte+9eM(&tM!gF+IHW})h z+AG>BsA(V|pfN=bG{Ca-mcbRN@B9vNI}Tc)#GtfTiu4fbr&Sh^tuNTtzBktM zPqcU5*7@5f4i?L`UDc^IUaaDjXEGG#E z+vLZl(^4^Kzp&3cx@F>^0^uNW@}bevKYAWvErKlxY3sT{LrdNUmf;%QAjx|C*}lRC zI&*1e{Dh6dE;kHWD1Y2=vMVFZm?hS#UMnjb>S;2%__+zUOFwcBEPxk!yy3$9BJatdS^3SDZyBw35PBl)(8`*l;C zQvYaeP;RbB$r!4isj>C)6z=&0&=aZBlaDw!eB|CR1BU#+Bt0SxMWY)eXLBFRBE+A+@(OgfB1N<99S5IdJA4~xwy;k`u z565|L&8Oygt4${OrJ40lYziE#l4t?8Lu2%K1bm(;KbN{f_NBCcwkEWbPCnI+A*HKo zYZEX-FFLOQgt9aYwAo>t2&l~n3ASIEqf<9~9Um^WHHd*xt@A!V4*kZM1}W1WN*`C} zK{lndo%r*e_S`9`P7q4EFVMl!Ny@}~p+|?{p@!6pS-6%iAgQ4Qf4G4rqV756U7L_5 zt%4Y64)4N>nXQ3*gy|uKen5)xJw6d4vbz!r2@|n3Wh2(sd~eC7%VYOi9Z3IXC7zMM z+;Df~gtY;x5`1>oqsI$LC-`Vgapd8$#$kAj2xNLtzVgv+7)AAuQ!s zZ@{qT+n9Z$j)tMeS#*6vB7Iy!M^|!tWZG)C%T}>tsg$2lvRUy3ImjN(Qq4vA*(y;T z=FrMU{IR5)j=WHQVzj>GmZpvoY9bVlyO7KmQgP^4N7N=1+6W0vjPOgbaxn=#^bvax zef{f_ zEqn3iyhvY(ydjqW(6cn zK1i7~e?Jfd)ZJzT?{dF8x0#si@K}wlQq?I`E+4iY(?+S6u_`f~Lo(&xDnrYyhhrIZ zjp&Eo*Xv##*5>)6dThCIvz)M?o{mrqC%)W#j{|8(WRqB@AwZ$u>SZ2B^Bjv6o;UMJ z8a<8q9>EikZ`eWa$=L!zaf6oC{AM*H0O7`4qz>MW#bafo&?S!6Jbb1OMmFqlRaK1? zYMLfX@LqJcv~ZSYY75mXFZYhZ>?QJxRGvbK0iK9nKTd^-O3nV}g^s6V8JcVoVSvjK z()!&2uFK-Ab{c>?p2VAJ9b&B*0&WZa=++>TH-g#cJ6beF=RP^72DHa!Kn9Jr(X8Ei^y0b4 zV`Orz&C1Np_rk@1ND4Ez#|7hqgpwjP3`H~k@o?qGTHLC6L@18$e#S7bRxzNQot=w^ zih;}?FGiHCsoP;Q>PpzDdf6O`_3*}q)$xa_Mv7(#B#a8ip+*XDr1(CnGYUr8cGF5R z3UFI|yZZebm$oZlN(OV$XSdN*A(1J{0Qx9k()O_fc@gwxd4S5C6qirAQ69Af6h4yf z+y0_krP9RyAJ#eYS)QDC$)4DoIF$RRBrieKqNOAO!S1WGlX+$nqrUCNBd2>9=F9+z z65e-|7inm+%Wtrk(f+UVDk|cE$3eu@eML^7Y75Hs(0%cyN_h=L=aVm^y_eB*{3CBE zMG^vn)vAF@MMgT_`+g>JwuFi=uEcf9bH7&iuIPI;gNiRm6NUDBMqp>Cd}Ycpd?2Mn z2hkX(fI)CY=OGtnQ5Sf?&FPzuVGfo7+SAE4y3*=cruobKafS@n*j;?GB4`MebwSC3 z7QBY;Y`JitNC6Vlglov7h`^!p~&Y~QoCQlN(SberCNlBAK! z#~;|Q_obSzaq~=CmWtAj?t#)qsguKI*fB}&Cj`Yn`n!XmQTSuJP+B)|*crpsHj~}) zX$%K1ONGlByNtopkc7a0);~BKPhfb=A%a*Qor(HE5#n{51TXPxv9@xYH8~2I;Iju( zS>f!x7=%BXzz9{h!Hecxk6aoLzd7^xGDR095r2(M7FK%sNah6OucYWcfIkR27~Bu8 z4~L!5D_w1TtoYI#FPxvVmXYGTtk&5|ct0(V7H_4FM;gD8II-N|G+=Zi+NtlVv&rs` zoFdPhCV7=-YuR2>BnP}Ams+(CXrLG!Le7Y1$RS3sYOZLE0;>l^J-?3CY#1t3?#;9w z>9+iCyACYlc=7K^34=t5)n$2Lc~np6>dU8m>=msQODq@(C>f0N&Z0G+M5;mZ@|>Q0L;IjOeR)#Hpc&hn*S^E{7*FXe?iTq z)wR^6RcJ))?VRmxjGX@~X3q6_EM-ClReLo%D-(NDGeQo=|Jd?BVCH}ErT+&rXJca| z{Lc^UU(0_*wEy1EzoDfZf8p|pnuHfeZeAU}GrRz=1N-@6(zXk&mf5 zL_-&pAo2cWk&TYr)^%aT&20{r%}1{BVu6PZGs_JXMlkc_^hTJ)DzZSvom1Yg5SkX( zKdr}UxobkkdOOT@y#yd);y_plOtqX>wl8gZ!w~cDf67t3)f7G{> z8l~IC=cv_$IsZnF7e|3*meRNb!#nYe`>&<&_YM2&CNi^f{i6^*dEbAs@Bb`>e`W{#MqvjW%5=!@n2QU#SeA1@#~3G7}*a6XPdg{lC`N zc&euds@i7A_*zjheh&ol<>_YfluF#Efu z#UJ&r9*c7dWq>*v`hlGv@4D<)&brq>3}uCBh)ua07Ppy4ka6BM8I0K!vU&rpSd$p$|U?hW8KrIYTg5yyg4{(t zbb=a1Jye49h}(#I)R7y)s#F4c{FT*cOpew^JYoKvO zH5mdlg3OUwkr%=*bD$Z*F595Lih0Nfu@Upq^jITjA{!+_D@0r-K+}k667`@G^V0X2 z5c5*^I3fQEuTl*nBjzRU=_RJc*d{{8M>C3rh7;2y?D0Y7C0=9b(M3K8bI5|O6>HK6 z@)m893DP27qw0}GHV9!m-f7#PFy_ zMbI4KjG{r^#PBFY3PCo=HKJ7!F;v3eknNC&M1neqZ;`jTk-Z|OWP)6fXGH%{2_qsa zi~Z@y{1!iQOtLvxvbQ`IZogNXLb@3^(@uHTuWnqtwBYY5C=}hQZxp(p=$2OYsP3h%MX6{y zKFs;-nzJR<8z4Be3cqGLh}IWtn^ zi%IjBQnYAoX@05yF%Y8)o-m>k$+#2ApZh)Y!>5?vVC5IX{JzY+4;4fI_L$tTqj{kz z*BQTO00$>?r5HX%^j$%JF0VSnOXD!xNnr2wy-h{cu zxTUy-o>N@XT_SH$k0fRg^Dd+|V>`(3$b3aUMdQRq6NB_O=p)7>q$8Obp&87)NvW(@ zKny>bukds7EwYt>RaRp!0_SvS9-^eT)J6OT$r;fZdAp2P=q>vZ-Ar1ZULJ=ej}(t& zWqd&qfn1{JYu=O1sw?;866RyuvcR@LK%5=PPonua@px;=`8;Z6vf=<$d1{iM z#Ou;6!K%_YQaD0R@wyxL@?+=YdE2BE#X+jF)Wqmyp%j2;>OF`|*%* z*OCBbdaBR_xe?-JsgyjBy!Sl6JY31ecxFl8b`907=jWITk~{J{;(YH)tPpyuyiC>Q zzy#@IdFiWxtqeVBax%|VvK$w>;xvxbPH%TIS0Yz(vbYMO;=EER7}7M35)QX$3O%vo zz52bJYw5Vex!5^>G>IQ%^%QzSKZxo{#Nu-#F_cNE0?_1PZ^SXS>VD@TDKies!V;%Q zipTlq#hAv48oLPqMy$Zfw?H`&C;SDvv@Ur#!Q=xsyDGOZ9bnVJDY_=NI5{ExKm`H_ zx*oQ8F6lU7J3tHqE4z>#uz;Wj0fAd^mt2qVz%N+=3SEO+xnVhvZ-xYqNIqZg10K4) z4H1_RHXts(H+Ji6$yLF%fw%?WZMjzAEhX9Zy3s6wvIj19kyjzteIf8bZv)kb#O?Ch zf*9XMzrxXk(g$S-!0fu&?%i4Jne=Dd(yGGY1ylF`=3m&YVo0V5k_oQxeX!f4i(;~# zBZ#6;07o6O6G#gt(~WWo?SVxLiRcg2OZLEP-#ML3gEbI4>cs zLy{kiOhddNMtMTqAU>o+vJuUR_Irs0()5rj?Jn9UQZ^2wIxY{?Af~fOkOdL9SjxG+ z&$y!=GiplKne70`G&4^t1-CXHm#&f{hK?4MeSP|Lqg8kcTCP$(T3V*NOtq?)&30we zc(OdnB7*6zEV7#yA|)k-fPkQ^+?zKL5+eTVcJiCl$1Bl$2D0%nF6fJq5Kc8tk)=&~ z!W^`ojj&+pyS1qOMgZo;TFl&-14UEKYfS(Fy(uBcKnbaU@)6Ppp#xN+%g6!R2L=$p&~>=Q;_$@> zmLG&5FuKdH3eg9e|BH9`%NAc1)G6defP2^K7TqQJ62!(Ao^HS;_Y$DNzuXXR30@y8 ztxJ3g)MrcW64nFT=F5pcCMy_18Q9;=Ye?q-C6# z02KHa8^R{T;|PHzLmzj6GvY^rnRMG4!WTj03Lz9hs%4|7es@xY6!+KZrprc)1h?A) zY;xpo&1*sNm2V3j|bRrfk#z=?^_MEGim-w;%8P9SAYg?F;qWnP)Te=zxH?j;{@(&-JMP9(b4z^__~ zWfL1@?-;(I)X-&;B0aC_cgK5hZTiY@ububb;>x>L`1>=>ORILiE@_vs%)hlBc;18R z45li5)NLG+-6Ox_8QGrl`f<1iWc2u=5R1v+IX2!Vg>32Nrp_PNs{YVQ>msk_9@7Zq z2lxbq((Em=t7HXgJ*Qti{Vr{ZSjSEFZ+-@bY3*u^Rk~w~cT2QjId@kk4ZJh7M;(Za zq1flsY?n_OW_6}AIF{#3I-hzLFfr&%vR7#=l{Q@`Ih^Sy=!^Sdu-pOyU?O;KYsC(vX&UeJ-2h;nF6 zC+imr?hV;s-5Ut#4!njs;Bs=VsXa{rdwyu$eva($tvJb8;UjDQdf4}Mu5@?TIEOZN z`!`Yd0b%@f_ST2fFo3OctiY9i7CvMC^%YHDeN;)wPf9GsTtsan>&242!+A^J{6s)0*2t${+L{wWVRn11i6xn#8fZ}A3+510ES*Z~a%r7CnB zStIT1Y#wu($Y`4GiDSZk0jbRRZz3*hc~?nk=C1%4F?g|=mXf=104D#{hi4kLr?2$-|Niki6%##)t=#-Xn!SvLtn!)pXm@*6Xx+Vto81;*xhCAqYhY|ZTQUrVa;Nlnb|Dn-X*!ramdTswzuJWp@yRKFw&_Ok| z-2No3QipE~OS%i-I_Mk* zbgE0&RZ6ulzq@l%iiR{;TeHT=v{L?r)Pj3m-WKj=XOkZ6nOI=89_;C$Iyj$HelU~L z=zR(Kox%_`{9Op^gO8kh@`Ic8cpPeO{Ge!}-Sqo6Zc5naFJKZcCypaN^q}$nM9EK1 z2RC0OrGCZQTC#s-u28AmDaEpwU1Cg=DOX>oJ|}u%#54f%FFrqGpq0MfK>F5$xl0Xa zNzZLO80!#Py+WzdF-RGxb`*GdX6tuoj&+uK##lCZj#&#;hh}LZ0ArRf9}Uf9GoTVD zPjJ`a6s++#$mZLe=FO}3Jx6@s3t$y$*UyXH6Xx}!ExK|6CD%p$6@HqMZ@;sw+5@w= zQ_&?M$d;*wJ$8rf)bpEz$K9;#uAns{f|*v=P?9tTpyDjb(MXLq{XkFObR}2 zD&yw>sZRfNW>v(wRO-(6bHnq^v?QH{#Mnpviw8BgKL>zS5pKR4(9lmwGbY{L#LX;w#A zmr?GRdNl59prM7tYLr-((AFA3z?aovJ#e+hhFrRe$gyo8B4m`h3YYnd4=~6`!6P6} z2ej^ff861!;{k+K5^|6S8?@=CqW<~rtW&2LS#(3Dg1rP%8(1LZL;IK-*nLy6`eX#_ zTPB(D<3Wty@=aC9a-jzmDseX>>mE_;=ZeN@q)7^@C%V9ZdlJ2Yf@$J6i!BcF+J>zGbGylX^p&B!1k;=S`facJK@-5o37%!g*f!>Pw6XIRE&|IO zBjy$Wgs+=A-QeR38o-47eHKTiix^9>lo6XOBBW2J3p~>tDayd`d)`j`cWu)eWc9Pa)K^ePW9noC_6hxV3F9 z7HJIC79M4E+}!-^#Ki0ymVEAWmV8-eMVudbuf9K6N=M@{&!vVH8rAbLqwLbW0B?)tjDO-!fDHa4hu$7}!y;gM0w zm!Xm5CZ?vXSh+;>l=2Nwt0cbf3TJr4f%}Ak#sFBB6ye<5p57W^VPUfH;Oi?0@uZ1C zOuV$|q|TY#oczdlEaSC_I(Ys$P8q^KCaM8wYDYPg)~&a0)xInFxL%!eOm!dkM;-4W zOKkVP+TvNiz@$qOuB1=2jvC!pwCRD&+N;Lul>>vRSTEGgjWu9d@S@fJxXEPF-E^#L z8%(s6OE%DO5q6j;*vQri`HIVE;oqb+nMT**V`EcsGGb#f5-UD>w-^iFc<2geX6`R~ z5Kt4AR}ouJr)o*eY!s;{?o1R<*4j z2c%>OGU~K&HX7VcZq1(G=#AK)48oat5o03}@Xzu2n>eWo?Sw_vo30^MKM-3?g8rLm z+0P8t{}6Q9J|kKGp>{I=CBy$i?fj?8`@hKj%9>gVn!^7_?*H$jWj`}n|6TL{%Uk$c zmv?e@5wSFKB4qj0d1a0M^9R#s672sh`u~dw@DDTK|I6}k1L6Pg`}%(wApdIE{Aaf2 zXGHG*odJ?C`bn@Ni9I~S($9(+YLlLUBY}fcfqG0s9p8hQhO>ju#SxCeefUmhY5}XmQiy zuUStre_lzi)zLbBt0386X!NdmFMsq*tH~u{UZ|w2H%VepnLTL6mTxX=5d)2obCWc*7#)`+^in-Xh8g+}KPFtIDF@jGo8yFb zNgcLg+y;4rYs$E#HZ`w~QW>yTPpS{mod5Y1VJ5Jp=8*-6^FT0E2M4do5?xMWzg z6O>9K+z$eS^>Z~q7tb=s!apJRbCq0c?Bnubr;>xfFDMATTUw6INJ6pye)~@x-sVDy zVS))+s;n_lBn6QG_$GBsLX9_QCFWe%Rr?e zo{yq#_2ZPzDZ_&nm{(0mrTP7HEKcLn5Aoj`!j;P}C$sv8DWQXzyx#G*3%4SE-_Q#k zXCMa%ZtUbD{~Q159~rsqEX@Dtnf~rR{%gs^*oV)}m-U&g7O6W?fcCw#+{ z@ut_D*o+qd(>uJQ%^1vDr3Z^L@Oy6?@?H@Nt2PmFVOzq8gpC`j;nSUDk*HvL1h>Qe z13n|Fj>?7{7Hb-B=deKo6cM8C)WA?!Mbb=TT2;J9pSIqsdD(P~wBS&%>3AsFUW;8# zjLdcpHWMnA^}aoQ%k--wV* z{1BJ|>`)gZzxLf&B|4`hpYbXbInj^kE_Dz(`@OV{HJ`PoIInnLx2$HYFR%BUwrQPd zxKID0f0n{zbK7@ubq{A@UR_13pG7!hsDqnjSzXDp&+TUSa>Zq9ptDE)z7Ah#*3i&s z%HHVCp<=;OK3-;3IZ!dO=k)b1z9Z|p?@K|`bSh8$ivG;eX2s%`_2w5mBU>`WbDZ8aO z@;gX`DU=ux420#}bB1;p+Km|{3Kd}wEmKsc#~Ry<=_bk$!3k9b>~cMFJ#u<*Mec5G z$MkfMclUP>6Qzitg!U;KgLIs4#dLK~bPsfoboUTtiU3130p@_uw=5aF-q=j1j;)-hV;S!KDI8|LMu6ahPUAW zSou{LhiDM|@XH%qXd_o!|F#^! z-hp#sdlIOBcu4`Mbo5B)m!ZC%@*fJsNro^36_q=QFluL>}8 z?-D$;7l?nzIJC_Kh<5S_>LmgoI&%(fBLV>qOOx9~fEP}92alLueSkOT=|NS5NSKH~ zC}Aj+FbrGF-5bzva@!r~H@eLa=-hT-2=(S%8{b|8p6@tNgz|8%O>DCR8l9Sgd&hxp zPEFyxKY@PzMo^&!PEG#3cfg8W2f9#$pwoKG5jVi1LsM$68n9xo>SwPyK!@`+y%!zG z?%b5#OAbhKT*9hJ+7c1iRY(f4IjzOzEIV|733Ub1A2=X~!hKF6?*J0YMs$KUSCiUH z4WQw=b>z(Ig$0aq(e66nhvox+IdCTR8UrFYY4@tW_xb>CILG#8{Sn z<3<>v&YWX=4(y@yfD9lvXL`R8WT+cpR^+O^sO4ubE1(>xE$o=QX2}vt4p0%P)+d#R zNtp%t2_$z;?)&ZE`**c*&N&sqq%bZ|Sp(F7c`?=S;fX+M&ID)5T}FQqs!$X_Kkx@9 zniJ(#%p_44QM3qsC=jp=tl>0u8rq7PAo?jn9eM?P1^NKnfCG+~4wBnj2U|N^dog1~ z;UdIPP@&)eHvl_89e@G20`743IWamjIx;#jI)rT_jT8L{%nYPAEehSM2eJa^fU3X* zAR15<017qYFPU2uqJ+f?iUa8j#|`rg=>mF-IJKBvDQFPT@x>SL41Y`Y2X$&Nn`lZR zyEE_|@)_wC)^v_QJl z^RId)`jy{EPqDv&Qb^^5kXp>lOBpj>DdsRTdPyEjkBt?(lbch(hKO41%!fCU^T!nF zUS!*I^E*Pv^=_9X0nOp)TmE=a5pfV;e#4;$aL}96(6=N%o4?q!P8pvx!1Q6%;njq% zH|(>kzv=5(x%x&W)-d#ID=ADl4G|;NPPeVKb(c*M`6h8;zjJJ)jV%OuC4G+B#uhVf z8=&l~Y8+?-eX1CQ6 zY{Skii?oVL!6VVfbN(91C^$?Hr#1uPCHHaLaC%{q=!rPJwKk7MI#8e!e*2kLyno81nsh{0>4N+URQW-zH5t?F`#wqP2l{}j8Y-QJb= zA+pkKis@(jnZx;>N(>~9F?*1ccJt2I1{EcRiEXD8Lg6}m!*33G{=;~5$`!?_~ZN)?g!svsZ@RgJ{_+<3WjSum}-f4J@5%c8cRlz5H4 z-?{_yF5ev97xE%5Aey--u7W-pY7vDp|2jQ4tq_c2j8iJ)h+JL8x^KKsZYJp|n~oA? zQC4jK#DJA9h7oI)=Wj!rHneZ~g8ms9D(wW)98=bN!<}bG(Mh-%ZxDyKNA5(?9HA>_ zN2)W-zU5Yfx@fTdM4s?M)||Emr?nyBC*h}XPT@WL#S=P?SMXgB-=EiB)j zTucAgjosX?Ly&q(;V!C}-rs%uuy5MG(6n8Tr)qMPrp=Z=gCvyv}y{;o$^a&+~nXXYhQ`&^}gF8M%^(ElXI6VW~q&A4{MLsU# z9)_Uy?DtHrsh%QzggO#r+QOLI`c|EyqM<9p^J`ls%8u}u!C#mIXRJUd1tSb?$?D>y zU5NA~IA&>vyoS7J{3=N};!BBcP6*d|9NG~cIz!sd{o9)6{NOy-A&P$vR}xH`D6fHG zGlpMCq&1^G3||{Fdw`;tpqS0<)tD-TD#V}77UuaRyej<~d)V*IXCbe@Fm(gQJ<~k# zJcYpvDKq50Rf+`2`dx^45o|y4guko(T6){zk!slY} zy4FkU9LqXe+&ZrQ@HaP(Snj?zoLn~GxIq87kghKMU`E)|1tmka_V+IFTG9-xR_}aW z$ihsx$E>K%5o-%5w}HIIjGD`A=V>*^>kY0Ez*n7<2}L~%r+@0B5`8q73QL2ZjOEH( ztqFI3u0%L7^H0JDp_dr~6E$9sbFRUxNr?bz`$$O`!CM?svEO}}vw7zF{PHw^yRG(D z8RywKs-N)`Y_esQrRBvHanv*;qvQ#{&@j<4OfAgJBOg2mJ(bfNIOC_0$``HJtP_=q zU=vK|Jda!Tgbdsjw{KWkO=~O^5K+A3W&QwKVpKogc!e-&76UalN2!PoUs;@_c?(^r zDcD*@B(=;#N+)(<@r&=8AT2e%LY-HZ-MO>`h+My^KIw1A6XaZNx#b3xPBc_4py#bu zv>3h}2kRWSd}U#=?9|uTElw#M9V2|>=CiwNf{S{fy(rwJDe@tStfV=^3?onVWvBEy z@j8Ii(dId}#_b==)6ocIHZpVGC3wn>^1Q2#%`PPyEGSmC(r#@V?*` z3NlCGc|~mlmC)Y9sUFg@LMtq83SGFIqAzQ91EUKd;bYw*4kBNnzPZJ6MIpu3%<=>Z zb9g&jp33>~c-I=8ju-E&<4ZP7s4U_=HG2J~Ych(WVF61`{O}BfW!Pv9$f@*}zYq@L zyJ_&So)#X8lMRj5h`=qTARcRZPq*2-I1(qpU}CCs1Fz*94Rp-3rne>ie>gkGAkBh= zOLtXuRhMnswrzIVwr$(CZQIpl+qP|EYi4$^^I^Z;fA{8%xN#%nM7(e2$@82XcSr+dZ_*t)H3En-0-NLeY`8_hTqFgg2C9w5rAE-Q_d4A_;fLn49JgFEla+Exp zX<+#YoExn2`;}v($T*^Dq56}54l}B09-ihv(^1@q;!SFkVOy zRfn%{ex-XgCKYdcGjJ&_$H!?@OfsBVIi{h7o%4<}6MH#kezqr}&w%Qzxpty&5w5O@ zK#BgtUFO~MrqnsM9XAOVtLN20wN$eB_D=BSWnRAzt(qjV)5&ti1%0ilhQ=Kq2_B^l zhB5zE>U2fTVp2q=a)|V#^y;Q1qUx233n(As(BdSyfNJcl4J9cL#^OdYt$ko_Q5HmK zG}9|{kN?q>!L%T9uAG5^_FCb|{TQk81HLn_vRpqq7Iq0~j)NZ%`S->`=EvT3m3}q^ zPjY86JUCT01xhXm}qn+0fcW4aMt$JVvF3*!56iM-oq-cJDGw-(PV66w>EFz~= z+#LIj|D~Zi=!-sVmHmWaTWgE`PSU0HVtoj(+xO{^f-G=&L{21?$I?7M_Nk83oK5+O zSqfD*A{pgchsI)dQ7Xh7_-TRy?tL6bVh>~AJR_=B-yhLCX}GGz0UX9};&!F3$XDO|4OWwA^pcUDF@ zz}7z@)Yt}mMLZz$smx>9!84xRuFIO&fT2i5tuf9sU<%laSPI7bTuQDd;>C_aItVLN z^OSKaA}w+PFZWXxhh z64Lp`HVi)NvmWdIs7Z31qQ|WXqql_ta`|J0FWNUk)Ey-SLeKCfssm&d((IR^|DsPf zN+--SbP0sQk5eGcpL!jyd=V&%T?4q^9i$ZI813GW;ywtG6nL_NfsMF$R`juius~=s z6|8tVcq4@IDWb@B18^v{@u<6m%yELJqI9E#@Ho~@27smA)PKTQuM6GcggXfR5p{Z<;`!yo8we8%;(x%|Koevg z53{Cn_&LtI-2zPHAIVF28DRaqt{vlSaSFq1sEZ5BE zDk`73SR6XN3Y8Of6x_hL2=8JU#H#PtVUVOdr-ekDQU_3}|O4+1) zn_w7sPQhXNWAVwjh~3Up#eCV;68r4F*OxSqG*y9p7T-kHMAT&2wB<_yEG-8csCmMR zu@EE0o$Bh-a?+>6ljd~6jLrmh>VHq!Ko~{M9}adAV*!qryOkR@GIE1v9Tufj;uNOH zJs_2hK{VhSP%7X`%s^FZ{0k7QiNN)Y83uY5^ze|C zaLrpq*-E-t-60&i;)v8nO`yf)HravTA_qex>lrJPFVH51Ni&w)4nvI*h-y?QJWuD1 zsYB@3v>cHnOOzMUi9+pFN%`((tU`qIwHg@JNSm$bs<&I9wR{m`^9YnDsi9(O4x^CO zUL}ba&ZZR0=Jg{~=-CgK z**K@k8En4kk`g^1uX<&nEd^2q1D{i8iBs}aib3Q6ut8yvNPnW0nAI%r%w4p^d7faf6 zK-v&Q1Tv&p1lpi8BwN7IDvIGW!<Gk9PGI4o!?<8U6-_cAosg+I)V-}guYZ;r!(hJITU7ijyrP3%i+ zRqsd1l|Kmc(VpCt6Bt!WmxhFfMlvj=lp&HQAM@XT0gs>(l+F9eRV$oE$i9+iR(!WQ zPgD>M3=H^=j@Eh^VT@ypGbAP$9dYfNsPKq6u~JzGt`_!`}mEibf~wuJgMF> zb|pjj7DzZw-0H_(Aa&0rh!Em^KYfX6@6Q7?yAh93?x1Y`C=cT%1{nz@Hmat&N1552mw7Yn#N&m<@Ave)}L3^#pJ0eG=PG+yl-uterhLWnt51xN-jmp-fnHVwIbwj z*4j)tTur=WGxU@L?E*4yllXNvo#a57=kp2&=?fvCs7*kAseZNSO!W=7jc{jU}Uu3vJ8ve z&9jo@MT$|6#^T5!*nBPboO=y!#>H)Wd!|t600|S=H%~}TOHLCg)KL`TOA;ed6}c?P z@3+Z-d+eNA!s5wWBQD*k&xqG!=8+Udz@0BM9m1`B+oE)qJVK>P5ND=Va7Sma0~FU9 zkm|?jb_7*-6K9+9zNOHkwjJ$zN+55|n-wpwaO&5~&kQq^j&G#fGp$W%rcm{I(ut6! z)Lf8Mz2-eaEMX-twgfas%+;YYfE9k*)zPIVoZDGQZKL&2ln}>gzSxlX0EObTpZG?> zU{Sg`6z6fmK%GfPpUSN#L1Cw)AXTM*cfB|hjU;9f4FDe&;r;Pi@Zp#Bg6YSESX>J; zts!P^GSk!=M0AisyL|qjXCSyKa?X=+z*s)CopTr&N5arBW58EV{!>WqE-_e%(*3!D z#!+aA`goO@!#ZJ^zI>KkNrAZ0IAO91jileg-N`(Lz-y!dcvhSm*)|hr>u3pAU2T~TFOc0(PLrg@#9fQ-D( z&`>T3M8}@i=AdUkAVE|iH3h@2$%hNNwP~dF9VCW2ysErRvqlOvQy1qwK4g+=`R7o5 zmT9sH^u5Sl+)P3*K7w^>iF~VBIy*$_Xj(It+D>}f`@t~1xsjvVfL#f-XKzKCEz5JE zqhbQ zKSYhn-$M9f58v=>iP$j{+iVvD zL>>-N9UlsREor`wKOUw*TqcdOOCZqn@|j~!4v-%IzU*!zx=DycZ=3<*Z!loiA!bI6 zikw4q{!`P340XM5#6`r&DqtYQ_tB6BsASCrUAX$rIl zbn^U=V?L<&poEuF04XzdPr`KgOhYV_S}_GTuzM2^gne8cBw`r!rsi zf&l~j99DLtRR&TFHCMxD-X~Jvp>5#ix)0k3%GCswwwI_Xw2k0l@H-sk6THVkzUqnu{T zU(wkVS3?y^f{XU)oV*VFXl}Vs!p73#5i)ga@w*5}Bhu=#Vj|@L#y15f{W+tu4t;r)WL@#0Cz0a-Au(t102$+Jo&}N zP1~s6U7sy`q&xeXXUz;F+8H;jWM~}QB^b^hBW7zpx372nf2GPeatX(aAM=VJ-zo-M~=Gj;g))T^OmKUjRROIF5Nm&qSARNOaT5u&s(iz6Io}((-7<%?!SKl#Y z(&E6>|4@gDzn=Q@9``uAey!ma109SsD%BS~_tw`+Q0>D({ULQ-oX+Ol4$KFU?z0a? zUa3^9Y$8*pJ|ZS&f^jx;+spL^%DQ`*aQUl_Ry8XQi|aA708s|>z3Hc4ZU{7-5W8?b zqaECr-JcD5-Yckq*2cFJE|s97S$86inX@^DSvAGJF66nIgmpb<^@@okr4MJoxa6Z- ze37Q*4wPU6fPs$y{&lw(NI>7(MECx<%JCSgckrny0XV}*`=pmB@;nP$u(+>f>_8%d zY6A`T;?X3m6k4klJ>BN&eiRPR&EeUr%YK9R5kRyJXB!|5pK?q{!w|cPh3@32b>b^R%`wnX|ZE zFy@Cfd@OjbIskhQdY6d6Gp0oQ7uv%Cm!BXAwhM3+6A)->HevKI6g6}-@{dC*&TCIJ zn+)l!1TKY>(X3kQosv}>*VAc8x#8t!)Se~_FW(ov=1St4g;7vrqwQe`zyrWC8Vd@Y zH#d>yFxKg6_AC0yTXyD~subncom@{93w7R@m-2)obq~=m#JMJs;ZQE@hqza@r`xV{ z-V(Fv>h}|EPE#jZ^m%sI9nUpGk|s-kCOpc7xOvX3>skfzd5zg8y2vJJ2=cy`yy~S9 zF6FJ_8gHhu7~V&1vRyE_20^yUgU^8Lg0xL@tvK!|9A)|28V5Km%*2EV;Knncdf?0jd16^kO8Zh6GZKsx+#P^^=wbtB0nx< za#l4eOJ^!_I!cvmBPJy^?60vOMOO&?dQvu61xMcO@QJ+V*m=SMernw(p7bOgi1Ha^ zMK(Zz7eH@!_W_TKEkKq3S)a1|;nBgDx}N76f{z-tm$`~6@6UXSqt6uv%;56dpT-H~ zpeAVj&-#}mQ?aTAfco4>tf6-j>!I2x1>PkwQTXmm>6vwQmxuFM#fRla{a)B%6LVF{ zIhB^vQ{Cm6_~fgi{@%)=--;9#ilp7leLZ#BX=wCY{g!-|`Gaf6;giyDe<24E8L;yNXkTt3~I~E_M+YLqwhruLBQpDWHXQY;LedDgsuC=*NJ zjwGn9B2yEw@+4Q{-+BhmK_DyL@UOKRc2j%fx*$&xftZpGnnH$b8fE(A$`AAP;b}@t zF9Qe!;cKf$6R^zC=KBlkI*Wr!JKQ#=LOf|8)J*#xR-tkgYW}3qM3hkom6CGcAc1iM zFa&&=M*ONYVLu(ibZzxz1&~+SdJi*xdCSq&OaXG)N&a@BeD`u8W5O=4a0xBzzQ_&c z=Ld?%-$e>b-JX*4gZn%e&y*H|0q0K7x6W^K$(HE@X{%b0WFcI#4b!JQ~q zjKn$D03LaqS~O#l_>2^)MHNCVo}cG<`g6VPmoQHvJSdY~Pfh`K*>owATcvQJ#o`$- zU^d(-L=6>kzya>hk{TB$!Enjo$7GvOLGQ?u^j~PHqzpryUjiSJxOt!~b66N-8B%Y* zaP-*PsNW^hUYD9n**tzhk4=DXk-)m)W9CWeZiuFgbcZF9)=&McBf$4Jap(8QpimO1 zR1(QE)VZf3ThxmT7wQjS|7I@xadB6vVlePBU9&!~+U?VtlNVXY=dk)n6~^s`Oj`Q; z282xDpWkXP1Q@xl>^qYKYA_nj;-K7HzD(f1{WMNs7|<6rC!GranCws1eC#hmFT8c- z3++;x&F~!Ef0-PRZBUUw7E!C11Fdm5%$fVhx8@@?Q)>}fE3+%LLB@f@6EaYYSB@Z( zqMN|(2T8ke!47x9!oA;0=EVR69tVpqzI?_wHr5S3h9Q}bzCu{U`q&s=R8|q}29vSQ zx5M(d=BE$UXi>g@GeZh119&A7Ggkz@)rWH&EO;pX6<+ZiDJGANNWbia>j^A03ml1v zlMf!^6*!BcPC~p}z0qh$0>5Sl3%^pBS!6+!Xv3j0T>KL`)O-AEy9bWhqi>^5@5H)J zZ$@TM;Z%i1=~!R46`w~tX>3f)CGvE+m;Xye;d?Jg!D&$ybyF{Jt1x72%AM@}uz$7P zn5skHwNxf83rkreYsr3LyuKQeA7Mp{#xq>66liPxCo7 zLJU}py5ejO8MGV$&?}v*a#?j*zM{qmMT!268@WsBIirgI=SVNUcl=WfDFWL$?uvlF zg*QISkXfuV5~6-PxE=4V%1Uo3T2Vm3&v>HB_3Sl;6czUY24znCln=vFu~MsIlDUJ7 zn8Xye#^&pccIph#pKV_E0z008KV&v!I6uT$eGvpeVEl^Z1xDr^e7r2MXtBCImV+=2 z!|&!iu9q3I0)494N$BW`SC7g^r6k8IZ!>QyVBzh{JM}|1PuKfnjSNJ<$Fwf)@>p{b zV?7$YfuHLzam91tuwiqlKH5xmK!mTSB0y73p@2t5fT2wH&hFyl@6JxZjrjylpF@GA zjeGiUzI)xIiI;TS)HNtIz{Z#Yf8w9#3x&YfTz|%{V>Y?7P-ciY|9M0wtOvs6j8<$8 zX`QzB0p(}6YUH;iO}Z$NUPWS%GVynf;tc+fcb(htr~8mJJ?A_vj4Ib4GIerS3xzPL zFHD@q=$Gl*2*qelCm9ZT0B-DKalzb@>?R9iZ%FS744uSNxVUxOgT;Ng2n&@j-njxJ z%9L=$7tRlLmhvwnf7|N`?a-C=3xPn@^Q{?{{bBTS3-@D1V{r0EMr#B-4$OF0hm`Yt z2bZ&+k;kcO@>JSy=RJ28X(*o57}=m4Az^-g5I%kgFvCuajC(a7+@l3zOnM0aaXiFSvUWtf?s}>&kS3jmobdP_!8>*6R(< z^!@##1C6;wvYKS?wcZywcI>@iD)(rp`l(wl`#lN6x`=joDqx}1CLrq+ zsywa-IyjBwk?YB%f0xdG^4s%uZ5Nk!y*BL3Zb&pi?TWabuhj6ok3?M`Wn*8gzW&&_ zS=2t(d}5fiuPrN62-@x;3Knb{`iv`3B$p{$Lm~+zik)O|$e^Qni$s;S4@|kHOGCSq zSH|X4jZy&_Xp@QvpXWm&Z-tiSEY-lz=m(E*=GFQYPU34r&BgT|2a9ycQQ~pF{asBn zidC{FJ|8P&_w>C11Ojz_4=|E)erNJ$#-t*Zq;9p6gffG`2nSW_mk2Zry0%e8;WIdR zO_Gs_pbpt>34BQM6Qhc=G6Bdaf3`1ghsy1HJ;PLG1!YDYLN?b^QQ;hgeGxLr+MPQ1 zlxB(sCsQ%~`cr@T4lu-*&V=bW4E1tr&9)EIpUzv~!2yJiw36~_2<3@kk!+AZ)xKLt z1n)*_@r~g92i?^)EBP?u%=U+=S9LI>Ln4oJ3 zVTZS;0^a^WXXEGPsx|wwq;7x&q!+7Ls_+(=DT44PQ{-aj>BuwPh0)Z->KVu^og;!F z9=QGL&;9J1b02_5&=#NFMXxmQn>j8L*tQNP);d*b2}Jy-G?};_Hja}PwEVk3Y^x26QXn_vao*Aj7@S5UdVfN z%uBmj^8%Bc*p_wa_dp$wU4XP;0!Z1GLH&2mN&G9&{yp9(PC5)#>)p8kK-&Tr7#{Ni zoiib!*mB*4``vmB3W@McG&T`w_t9}=Z0ok$b#QB0#oT67>AA#kTM8uC^ds7?#ci6r zjN6d&IK6A0rfnGZYZ5H++DNB76p^k8!ANU)m_qx&51i(VF?_fx<7;#6B7s*=u-gAFcypp-Im-H>RzqU3zC0hJ}OS zWOQ$~&!S@BKXs$I3JDv@?(obARYAd3AZ?*kTU%Yd$w(K8O%uwN8NC(X5!Ze=Ue=Ki z_j={?C#hhP_h}(KLQ>dvkE-=r!EtmWhkGFMM+GwUps$coJnBd2SVc7hN_c1g39$~< zDI@b6Gp|+AcNNVkA@!;CzleT4*gyzZB;}K7kTiQ(vd~gnb6L3@*xYWhOqS|st<5p} zSP$z(f~jx>$pnU|gIJOVoy<~Gca%&_NlR9c%Z6=Al+>QvtnY08?P#aMacQmeCy2T! zp`kKW%Ui6v!0nN!Q1&Z~jF0G;7|B>^ITaFOJM(ZiK$OygE%QC&ZQ_HO@wGF?9J6Th ziN6hU$}24@Bqgy({Lt36%9i<^9Lt;K{`HHR>_}IZa46XDZ7}eHv9_{UG~xNj&3kd- zj#4(;2l?J^MeGKW4to$CAxTrZ%`_`YgumRmBAjue9xHj)K zw)>!qaI8ZQ@?*p0V9jDDPj&GU9LMw57*zL*1oJOx%QkBW)!yw-Op78kfZilIbW1Al zRyayb+2?gD86!aak5&Y>@`A(+gqCqR$oiW0P&SC&h8ET1L`n{I52Hcuf|sRHpV@Lg zG>$1`cc5K7c7y_tS?f2+vwro+i7A$<52^Ar={BZs5-84B8cJD+e6bHi$tAs z44q7^VoU?hEK`#gT}DcChfo;Am#!F8tL(3_9^$1boc$NpIJPq|g~#jf0UFx;R#tv8 z^L@BHaq#*NM4vJqSu*su2%C{aPRU$D)T@H+nyE7oGV3%a4{{aFd^q3BB~@WS4-4Z4 zgOdJ`KsfZMO8rgki5kTs{I@T)6H}0}wx)AjS~j6Wb!6WmyrkhltR&r#Hz*d3uVn2) zM(HY@%YmQ3dx+>{vdhb89}EmvNAb_XTUibrRsIbW5vz(=Hao zl%iFAG9Rx(n+MiX4tbIqh&w4hufV`myk;t5451siyrQ`e=dSL@QF`DW>d2Bzou*{S z?_^d5`WJ_y((nUD76*5SusF0kmQ}ZYrz_ZP%wt%64rJyFK6~unsoFnTS>`)aw%NOu z@)s(0A?lx||8eoX-ge)q9&)_YVekGvnI&0|U|d5$V*6@&0_0%Q3__ANQ}^e>&h)lc z>qeY7-FaBgfiV0qV>`MDKJXO^U&wH+5pgB_=wh1^bTq6+yd>27t zHQYWeEbfG=v-wH!HwxCD{aS5pFX`=<2-9+swezaHz$L2DG`0QtbSl}qCZ}gj1htjn zMpS~Or8*g5Ojkm~c(8HmH&BCUr^l~u^@fp)?;=H*J{+*L zSc(D5u=8lGUJv5z%%0=pp_dmi1E8--fH*N{-FzzECsB5e(0cc4^WT@nSF!AJ(p&L* zQ1NG@W8p)_jEq_LhSZjz=y*gkKxU%nMgfs@709-=4 zY1~1^-~8Vf=ve7(&5mL7?KsTeq0YS5+&8UXiJ#3m?&#CUIE5se9kmD-{=PQv>7vlr?p2DkBd+<8v1`PJUUD62Nk~azLLE0xt*aMq_nXfUkL|ZRg&Im zFEr5q07IGa_4a_!2W?b2=8ko6Aat|nht_+>(&awXOFuR5`td$G zTY1&FLvA^t-ob0!B0G!RC?7D&O)xBhpun^V_pwUnhZR8ye-rV zo|%kYI1AnN7P-xTvp-5zRu7~W2Z=d(q#u{WHV_&hj6~ zh^+q?GU7K3@1JBuItE;ZZ&)593oAGi9V0H|ccAiL!-0Q&*S`gcSpUa##CEGbT1X*h zPv9X3P|9}4IX+m}0?_STnE8ue%kYMvB}9BlzrH-lh@^a>ommNKv=>URw|_U{>KsAO-}!`7H4yrafAC4sDyjCuArsovg9Mgne7po zN%)L-*)#JM{$vTZ%T_qqhk=YQKi7#yzy-@FpiJc{v#3LAEla~2DO;U4CvsR;TS-o9 z4<01Y)&_yTrxo$2-r)HDkAgd0#G%^CyqtmUrwJi~lz`~&`?Ns^388G$#51obKBs{N zn3%%5g3*W5g#Mko)lmtPa9hE94Q6U`GqkleH21z?xYv(@c8~ugrS{i}`^#j00}20; zQu}|QB>q>5_-jm!`I~C^cUaHX$_)Tv7CoU13`7asUf8Y}TK}cl&meD`cY4qRlL@8aDzwr|380i1^xBLx}$olVy z#J>*wyQ=;jkw{Db7l!Bm4Z*pP2Bw0j+~gyp_#k|Rv7rN`sp5x=3qG1hN;N%<56Exc zOwn$#?zfGMqK6DMXko-;7B|8@PAHTLj6jmspE@qNOK3ztBB{g>KaNhHDw#1L=yJ3O z0IYr3`PBL3dG%IwEdRxIl;wWadbHVQ>6?kAS*hb^1*;=|w>k)wqqU3I|9mKwiGMO# z_(RQkEhy1k>?uz>!Tcbt(y}1j`p5Ch<)&3GzYrqQ4tu6>&@R&rRY#%4u?beyQST3EuZk*t{fM@TADCtA(my#o zAlT4mVQlXB{2ME@+zbBDc+KG`FN06=RRBQ!nEEn(Gmoe~ceU7hF-6g0YJ;*} zW(NTp(Gqv^0@n*E<}nuPB@M` z4|iKS#mP_Ym?V%Qog(aMgiu$qq0o!SPpnIi=g%%td}umv1;0=TF9(V+!jKvx2t^D_ zXvUJoi2#P*BO{92PqBBnXR!y$ZK%Oywg*}pfLeA3&uI`r+yX{jW)F{P&=7A#pfX~D zmjXuY7Yke1zqox2LezwuOL7dV<`<;#bJhh=GznkCFTNh}w{Bqv1D#TGDM$cBFf; z0@lEOfi{!PL@sYDo!%jz-Wi?VeJ@r|Uxm$TqNfG2SYpPsh}Z)<&vPfMiYKcGG$gW? ze$%vAD<5Sns~ zZR_r4=%8WP8+yOofG3t-=x*lFxIIke6?tr17C7f>VZm+dFbwSIuc~&{eJRCA9 z^?TZp84`-ug#EW_2S(zUv~82{%RujjRm4hg_1SVGXq9bupNN&ksrl%WV8f7xG%6_M zFwa|3@>zRBe0D={!I6ga`Yn1X2YRw50DC2SA^E#@Mf=(~2V$d!6)|5zFnb*adU`X3 zfeaDq5PwOWeQSB3M=nb&MY7fac9yJw5!@6WFOgpyPqF|v4ooZn>}M!?1j&>jxL5)1 zJEImLUKZ!Kn$5VQU?oEEWN3N>0E7|N5sSk??BFryC#7rPDtIILk>VbF9?By50 zB4)GX15flV<|y4=;-{B=@R#bH@E60O^w%7zOfII7Cf{Rjbi^&;C<5u0$P2bO&8V|1 zHp|65kJ0L(_k6>`7wE0-oYR*;5!UCgnNO^QU%xL!T44@AM|BefNT2dB84aJW_*&PV zOTMW0`aU^gopaWs?0y_D>$iU??OAWu9}XTp4`O3n}eQg=xffm8&c$ObM9y+*q%pg4sHspI^95CMPJ2Soo#2k z$MgnjxN!#p&Xwxa#FI*HFRzz)E(<@q*cESUA=AW!(R`$?m7sMFV&}(j=ir}~otxRf zb2c#=#WLn_Io^)=5Fm+mdWIK|^}WDCc0WnXeaMCCJQFxuT?|sv_bIeL|1xJ$b5CS8QtOi9ftt0KHZnQ zUgpXZ2{E$2Q&^~P#d<(Yf-RO-K2ke~jgzpP$)H-rnfUcFgULF{-evQBIP|=F%{4u; zekI7lqdVx5xqNH1el123RHgVZX_e9oV#-PYxd)*pClHutMnhiPjNwJfqhwmaau=wm zcX!r@OkbP~G-G&YtQZRqRfSQb;ZH-he?GSK*n@rZ{lZdVq2l9NFtWmEP#O_^v$AxV z{uA$oo@%pl(DW|R4tG>L+3e8Gz(ZYoofF;3(!~~x&`qM9ho(vw54Rkdvp!xoxPXe! zGdB#P&}@?R`D#kiVk@Tg7>iR;Db9QgbM(7Q2Uf=20 zj{JWGo%?^Vlb|j%A%9ApUsSIL=H7YWH8p(q{bp|Y8>X|*YTt{1WMg_bjQ=Fp!JZcY zMefB9;bj^ZDfLw=@iVLeM9)Q2%RySzA!_I&A74VM0a@&2PheJo!Ppi*?#6WDN3rO& zJE{r1r>a3}>Vd}90$!`_K)PC+pOsz?O#Oo{+~TgX#WiXNr4f8)OSM@Y%vFP8#as8L zvZhk`>#--YC5xqJf>y3e0o~H@Kz$oJL4Ai=<6*7VgQe<&uLT9VbP>pgWWCVrsmHnI zbCRyB;xg__mlg17r_1#R7f%1i-LYZU_#HPZG*36z1Az;G^BNds=g*;l)ASU4Bb+Z z`IL{{M&l>g8j&0MtM?^m14^L}19j|i4-#mF8nu#|b+U)h8VwqwN5kwIb?bD*#`+8{ zu=QR?B@t2kUJgbg$v8eUtH-zg*R)kV0xrg9Hm%Hh-jLZg#9M({&)7-FUSc?8o;X=P zBsY}D{`uF8oIq3jUqT;b^VACz$2apr+1AZGzoyFDw|{0REPWZW0*^ZZY7KVsHa0M5 z316_TYWpc)Bf24J`OjSotaDVC+>*aTJ`JI{Ajnic?ey(n-8Z%Q6H-HM`Y@y!JQpsG zng+&S!}H)Y<$OYW!(2K}djT%;o`fBYB~hzlf(4(RA<+qq*Ke8TeQKw)+D+nEFvX$8t7;}O_fxRcOeyzf~*|=yh zcRpA+G35x^Y!ElKA#*YEbHa86H#Q_Pa|IiZ_q`^X6Rg->UF8-Qgf8hfjC8rGQ ztt{dA5G(9ouF{_1Grk~#8Waf`bonl1V3vkL*QQ;@riD*n5RO~?24{x!W^NoB-jLE0 zX0NJv6^mmMQ=u84=7xmDCb~h*=(!FLj#p^d* zT-aC*9eg@+@(aAJefg3%TwFA-FTLj35|uGVBC7(|p3!f!VCV&)mzRtS;UhV}Y7nPY`Yp zE;(0;XayAN?nZ^|H>@s83lkCryTk_}v=f&{#0RnU&DE|S&COP}&nZ}6jIGVw71z(V z3%?Oh(tR9v^znIl8*j%)BgTacz|Twp$UzpioGE`yS1y_<_?NNly=9vbkTlp%uTRBK zaPJP~O9?djn1yc#%BV9it_QR0a&sxN|nlg9MpN`kb zno-3`$-&j5(7WTUIF45(hZ#)v>Ds;CNS>O;TxaxBp$sbDfL=m#tbKUIE%Puc;7D^` zV1y9v64W9Z+Nwl}_gw*4$^pMl7|!|}w%R${6NCftKbO>#)OWD?P4$WfAInjT=)%bM zapj1Vt_E3(SjNKjyhrtK>! zMs)3n>&(H&Nh0TCGNEzw!%(CX>jyv2h|rQ*+?F*NF9QrUjS)wXx+;(dW9T^5lw}O~ zWpkl&+Iz-2%CEsd6i~S^*5xeZ-m6fl$Bkm#To*qQH;B3tMAp&>z%$fxOUEis8CeZ> zK!`nj2t9mTT z12aAsyX{Do%=Qb!4*t4(LCN{O%y|zICGDO6i+1lV0pme%diP5~$GiQgR&YsQ1CLuR z{rL}say~$%KSrSQ&4pz5pO-0xt-7-8$!h}_NMQSj^&biTS>`>5_|62|TNkZKKJtAe z2d4pQ*#ot?N##^CcMrdxdxCK&->&`VhUYX6e*sy;EYoywZI4e)jc2e2+7q%X*s8mB zY8A&1${gC;+uPOE*Bjl<6-yl!-Y6yPpUfHEwblooSNzP>)m^d%k9m4;onBCF4C{+Y)&+ZmJ2LDsWFYn+*? zp!-Q0^E)vPS)R4ZEXSrhHv7cp&=}e#)mm4Yq4VVtAehCQ{4;Cnfyac&Q9PhfKF*=d zq&fJ2lVc2F0^R~?Sx(1~iL(9Gbi@Mheq4YRbNOxYqWod?@WHofluTAu7Tte9CrfcF z4ylzp4m-Wx<}SDBwQBSxdP!m2v~<+5p>h=`?pelGk(g-mv9ZF*RRg)6j@nBjVdadz zc;2!)+kVJ$sd*wxK_NX|Glj02$y!4gz^L5h+UJ_isQky)Q8s#dr4Nz28edPvYT9#) zs%`z@;~kipAvnk^ z_^*u3cYZ`_29CS=ZnC#1u0{D`X|@)N^=!(}y{{!MJ+ns2&=G|Al}p9B;o0DH4rd2D zF0G~w)u@LN%8@h^XTT~zcJQ0W*0W2ql6JfWqumC#c&<^-TwHn*42}9icA^<`hZP5G znTRn&uE{+ynh}jUBD&H#8@Q-PTuTWFIFrH-V6>(^MnT4yurP({PjvsUk(EV5RXj?kKE^lH3?Xea*W-?I`&YVqN z@-t?zuA&}S7&Y>mYx11(1jMgT9eVL1S@`bR9)v%}AJb1kmM|niB0;Qs9bHDjEFy8l zGn09=%A;pbuHSTzBXjS1@pf3NE}Cl5SfSyq{Z(jhLIEItA&k?ZQzLtKh)vxoIedqr zy3wzt$+LBwh!Z%DPDn`tpCOU#Qd5#7*)QqD9s%0h?hmaUjb3bAmwkCsvq^1E_u@0+ z$G}F;b;P$jsiO@0_hwFBBIMb77-n!t*d2Q_{j*Og>BPWp5HS5>*Bm5=+T&^mb5X1g zE7S6{0)_oRmY$`s$8#YHVfuVCZs8xGu%Nh(D{t#VMaBe_@_}dpEbi! zen}%bo#3M9)HT(W=}D{E?xiV8VU^R5a@0;kegRYzeQx|mzlQZ6OiN~#e>4}-|9y~? z^Sg1%)#kgO=r0@V-@D+BdJccRRj_k3{M!S5U3uRo{f9dI~ajr2?p7W< z!uPU3?HiX0YD!3Od0h$xD|5w+%Q-Hn>|1FmnS5+KYXFK#1_*m$jYK3B>IkLuVNp@YI}?hPYFB0|O6?fIU*!}jRpa|={|8;Zp1`**!S(q-zVYkO8qyi;kWtpKf)TOyYgx*bPi3m)-zB5L5{&1 zZTG^`O9s&b@G}Q7#Xy0=`dYy9k9W1tGct8ChZ{JCrlpist3934+NPpPiY--KrFyqn(SqmKVs)?ChF&jKLM}Bak$zkMxN}}cm zDSR=Zb)LC(OqN^SfL^3gUqPU8{sJ)lex1bLk}>{;)=Xzd^64+Z_FhGEx4i?*3(H*l zIW^nJdT!)F*8kPmS4TzNeE*xU2$E7FunW?)y|mIDN~f%JcPN4iOM@WYDT0*JAPAB! zAl>aE2nf<3so&*!zR&ah2%mF)zd8KD`^9VK&fJ-~!#VffnP?C37b=zsP{x?Vhyy)Z z*#;@$jv-F!g=7dUe9(rpyZpuPPcW*q|+?h z>LHQtpFh4m;Erv)h2sxyb%@^sPP0ia>sNdru=fY(NPzh8E@N3RSeD}p{1GCGgYd$;B_~-X|p@gTO>xkkqyOB z#kko@$pSaNJwBVdzUp_yqt%(kpHPj(bv1?E!wc)t->7@-Cx<&ygb(u4rbhIJ#)sb| zFWcIROthn_#Aewm5A`R~ZE;yN4TS-d`r4K9CJA3i9=ICBye%y*d|7w%QN-K##hn~O z{Zt+uCBg8FKE|Fc;IK@hT%uBQA zAcyFpBm{jUEBFgXIEjc7_%3r+)EBmJ;0^LXsx2ZWNhVMNkje!xeg)MegJmIMHKR<$ zkVp!HBBB(X$nOI>Ft`2EqU8Vb6f`M=DZ(*V{GMn2?eEHD&j(OGr%JrU#41hXj~T=` z63vqTk`tF<0=)W3L{JM%0!1NMjP>8b~CxP$oV>URNKVc#PPj5ew-+#6t zjuUIH!*~K5sb*N}6}aqgpXbl#R)Z+W2i8* z31iS^r z5S?JialXwyxE&kaLt2bC(NFR<_9>Z~7$-uUM{_SxHH8D)arj0C#sGk#a$q@*eV5_H zie^R&-~yfAGqTpD-yfyeBQIeyl&VQ~qDG_8I)cRn-JX;l8j|;=ql(+ z=&I_<>MH6|dbmBT2_p&v&{EP+J|-dth~Z+H;E8HW>}~-X`Y(kd!3oesatkU83SlBf zj7RfTOcDku$>TmksoH)L-wT9OB{wTP!rI+dCY$^qufnR#rVLVMhm(INiUGLeq^I`1 z__6}kRq|OhDgDrsUPDDd<=e`@aE@@+aJE>gcJfX@E{3w%O7fYyGMx-3%RUh+k+>wN zNr_H^6G*}_PbG@Mdsrpd=gG^69Q?wfWawnEw*`slF>aDKL2+MzugLL^s4F6%mnN*b zL^OU$CMZZ0E>!hlu(uUb?xBh z7WKRmmNu&Ku6WQBf`cL9fn7`Ng}IEozpfn6ZBfieMv;M-@6H6(6F#EdBAvI#8bqN$ zj(3fNjxTO`V{M~!K#l(Ommbk?QO?U?6|jyVfxGEJqJ)oVTrO=fVBdlo{X{Pxu+3Xz zov|A6vZD}eOcVz&Qp#_gNyh7n3*8po{5>oP%Npe$1(-);rJ@KyWOqsLHb?o&16=OR zi(>_VKPA^7y&luoYl&Sr=1H+XqB5i${HLxR(9hoiRo`_8oVs$rW!7jNvLcHW#~Nd= zN3DQHCHC%?1Q=efz3D=2hBBiy%iAO@VQeQhnv{s|C{8N*Y?M?GwVkgK+WLZkR?tVr zoj-4uOD~Ook$v8Q#xL?Z z>cgqyWxGVfY!VhM!fxPJpZVK*Vb<3i?KdCiNxPNOYe*ix)&n&lVnkzFI z!j_dkT^q`CGL(jkvs$^j(_&{TObH8bBH7?Yxngcz?_O|aMFd}wxg7{#7+E*bVzeDK zf_oNw@_(Vu0@Qe>1@~D_%LKD_2SR@}#{7Po@$vWWK6<>;HlV2G6R~YrBCU`VGlfx@ z;S=d3`4GkZq81y8dpAwrn-`QVt-8)U1XuZ;1RJ%jJ(L@p6b*^Qg&ojT1sHW5j%%#a z@9~Fini==j?W^_J{uH>4Q2l1YyXWkt;HFV@cROL}sDFH7C{cSm^qbEx7x$puLxujz z(n$5iVcqA4wM(mXgZ$ytt>Q7AdUNVz-*-QQw%|(e6gawDVR*)9P}O ztj4E3C}X`G2@EKUI*DVMtNfKX3|ppOG>Nwx05DBpJ|ho z`zy|%Jv2Lfq8a_KrUzd4ZS(E+ZBcz^IqquplT$JBx6v=Lnh77?D}@2$7k7SU4wzo? zAvIbC^FHzDo!#A}^iKcqQMD{#tZS8ePh^kVJK}_CebLy`v8;M*Qup$DIoY_}rm*JM zpPb$d2h8u(#x5LfCvAqL`xfrV>`Dtr`J}cUM%Sbp)G|T@1fE~n7c_42uTK_e?wtHs zy-{q(R~K$X%44TtSN6i_l7ngCn2qj3;??r7=~qh|`nn73ODE_(*Ri`+dk*G%k(^b4 zQUf$Knj$Msgd$p->G@td#)4VhjjLj-CR)75mq_K#WkxdVuYj$Lz%_;KSf16*G(@1q z5sTYT0yxVKsA7}eP5!+%;l$B}3GW5o>1%1h6Osbi$6~qDL93B{o1a$IoFjZgjm0gW zq93GM#$?6+E=sf7WjhM>>HgK;(3CR4X^bgw&KqzM8@R{cru4g0l?%}jG5WH`CZ7ql zam8W&^*&vGwd?m5*cTPh_nc8Ldty{eRflNA;>RjQ?|I#t2o7$f=LQM?_LDXLX&x_X z<^TjZ9<|Xi6{*WL$5I`r=J$!Bc5vTqrki$)_}~(oVXMGGN)3(i7y4liNohmZ2pc~* zweJE9$#6#`$7ti?=*b7>sBhb|@_7U~?_o^j4ehV^25u(Z+va+OVY5mdC@+0`pzwpp zqK~9NScTv%jcQs2M=jzXwrAl*CX0}mD4D-pw5aZRBiRPB)r z2I8-aDq8Oh=m<{5B=9WO3`A4eYnMZsGXJ`buvHITY&g0=`slr+O{C$2bt>yx2Ks4B zbGLvgY=KC%Dwc1;J!dI3Sz^f0%eWgjOp^Yl!+vekuKsPG5l@{IhmDyh#uHi1+K?l= z<`hH_yANk8+2bUFQI3yW;Yw5tG{2H=@_X9nug%H$R2m@~Bxf9z^WzJ>sh$KB-DwKW zrzq`ndZcxSS^Omrg|*j@G2f8o?zu9Mma+X$KXnaPZ~F+CNS#!N>9FDhkiENn-p+#Y zt>(mWq~}vyf5Q(7hLqU_1YZ=bTdxDzoA>?sYFd+{)oig#*Qv3HEWfT9CI3s_QUNY1 z_E;WM6Ag>h+6!dXTXto2>pX!?U6(UVhAbD{KDkyrs(PEqc|$`q=QTNvv7tGMX$t_- zxM~N9w|@ytiIlc7brs2-wrhZW_Vaa=lmAZ1CKsf5e?q#WJu4d4U1w%hrCstP^tW+6 zCtrng!=w(Ni@Wg=$epQk8Wfjj>)XrHv^FDNO8~?0>+z6=AcC*deeEpzA+VL>>q*OA zs+*rxD>k2?6A-QQ)2a%EFS2R!c6pCrm50Wl#?W}opy!lQ(A{kgm`rHt~bhRofKDs$u>+AH7sgA$` z9QWo;77x`s^7&f@EB$oRgWn%pB5A6(4mhM-2Cjg^qker(9HPT~tAq5r|C-FYXAjA| zcJ_{$h<>> z_W~)sE$7$}e>kgK@tCJ#7%!9b4ckvT3M)cf+{~`8d6|fD^smJ<7q!BL9zRl4%BU&3wH{Sl>SID*oR{T2Nq1zmIwb6+P+MZ)F*)V#GyQU`{lMp> z$z--*qD02Dl<;sq@T!lC(vcV4{qY@!)PxYF$?+TFxs{zdfKM930^7yiClU>nx#cIB z5s;rFBYW>K9PdgBUi>nQtz>p zYaK|~ntG{QR4I0Zam{39I6@;|m5Z5e%&}*7q=Y|?!MK6E?^~IwRnrmLwusYCmSyE# zW|6DvbC|{m2s9@grCBOUS%+}cwDu2pW#>I6C5w!ih|4uH zZbsHbHRkm4(}rn96l7uQ`7RNJN8mzqJDgcU!kQ*S>6ex19z}*{r}LwDGcCmUbQkC4 zsW8(~gTzns_jb6Wzl765A$(ft{i)>8aOt3ZYUi)=Zovb=itg=K4e4{b|3Zu!6`ihh$c}z$aYC*>n{yT#TVo-|7P$ynJmOrC*3zyo1$-RHtX@%9V^ONKMwEXefw7Zsc(s1 z$J)wZ%5Ue(0|_%v0+(byfcLT;`0uU}^N_X3$mEj#OxW%FM) zjWl_;BPM3b{@Of1-m+Z!NlfH=UlMf{?CV=v{0qm5R7rtsUx@9A8-C>V?uy1eh!lGq zS#W>1^s#??uHKtRQU%rYl+K&M24lb9`xExx%r zK?oafTMc;ICq4Fg-Ca1c#7(s?Q&SZ903VqwPJDg%iASFwYgs6)<4I0;yg6U!hDjgX zu=+uP$4px8crUkJ+m}Gp070zNhMsrr(`QGKaS2B!RTmExlw}@r3Kaxz+x5hhX^D=r zbO_h-s*B#^rWd;(Edy7&U*;;xmcyaQ^5%tfqmWUv`#=RPfn@{GCu;OKXK$5fn>Jo~ zYeGZTb11g1-nPWCH^Mk$p5JM|{dTRK;H^jW+7a6+KM_5S3HelwCz_9a+5+ zoU1Mht~-hOiZ)0@t~b>5@jS#QhNJle2TW-i;D)}wu4(dpq3U_-R_L`gW1=|yo!Ir%gu?+o@0tkHNBU2TLe?qxgTx&vuEu0+M4y9 zjp=GdTj(TZ`bgeLw_dk^i;g!tUm6n@8W%xdYqfUU)n>lrgQ2^Xb10kR2Br?Ze7%d+ zrDgxgux1WdUzu6kfZywpjunrQFK?L=1@H!WF*%#7!K=4fMF4$gUd7X=lT2R7dV^OiEs>3>egNI{< zU3lU9_t~vE0xqL>DF!nE%MCvLOId9MDJ|K{;WHis1{$$UA@Is?;kK)ZD@U0m0*TbVriBtyK**~OU^1^a&OUHR-b6zLw_T4ZSHO-b~Bs!YBg{-HErI3`M9=Lm(p7pe!qaJ&)Xj&! zzuLKEw@cK0{d3FKxR~=DoFc5Q!lJk-gt^W#afZALoIF^hpRnqtZyxyb+kM@1>pPgn zQpzs*YlOwcp@ZbY45I0P;_`&OJSW6S>9@lpW~WBISI zdutR_Q2s|oDZ)07-?+_h3=5nH_q(;))mn872NC9#jL#w@a|gb-V_E|xbei)zi(+>_ zxto_wC@Q-0L92CL* z_0p3usHxU8v5srOIsM*zP8(21O)BR~qAFK}8f+Zpg1-jk#u8;{TuW=B;PH0FirDr{ zq-}H-#C2#0)LGh3vTA(OGSqL}DswaS$adYjx{XT=Om*K$+h(k2@LZpZu0MV^S?Tri z?cjQGfG7T~$_EGD0zL+-tiu|~M~PJ4`@#cD67gK!B*~)s^)^s8gW{afdJ6H~rqznr z!H$qVqdv2_5fXPv{g|~18t+@3%E9YnD5pV1KISK;ANShtW~U__MKV+UW zi`Lwg`|!TU>pb2!HyG{}^4yD0zP-M&8x*IY~T=>2xw`3h&wIy$0b zEON5eoz9o^i%4i|jN(m_43(=(7xArn_nC@I2T`jlPyI%D`lp&Vz78Btg*5E!_>~n+ z5jBMGP~_@*mip*=dRTQtX!NE@t*FM7c-9Ou?w6X$y{ZcOK+_eYZL0u%qTBbSFYl{S zoHhFkltv2^HI>0|#3ul!(y+9FR)2I~!iJU8mO%4(h(^!mKEKb#>@w+NB^GKf14tt; zarKA$1L;nti_1UC*Er%uYlMDpRm?Er0^T}|nF{b0?-Wg>Hnc&rxjhxTiYT>IzhOhv zA3V<%r~j@Y7QZg$nX77SOe4Zm_Ytge_xr^lsKmtJvs?Wg&CTHbJ*Z!!pMT{wuAAo!Q!i)FjSqOctdQgn~|g+SBH&;}vh5 z9^VU|b`#l{;nXQB2a9g-Yr))LDh9hZBo%@t-r0icsdRuFEv};bJ`?EH@y>y&e0Odk z>+AyF%v8;?+i8l2o9qS>z{1z>A69$i)TUlt{#a%(HdX z%(*3s+4|o#w^~N}_F4{Dl$w9sPm`A85jH3|=y-(UWlTXWH}h5Zq}0)tzL!bC-{YLF zH6d>8raJfJ9jWqeZ=KDr&mtv_PkO9EjjEk{tgOA&)Ff0EkJFpeuU@TW+}^Yty!ABq z^UzmFWD!gozsv{R2>kEeDh`r)<&3r%05ItJEvs}W(A0Y%*$~M$U?DA6)}=CDtNnhS-GEq%CMpqhEamS#_=m09GZej*xYdD7FJ;OZls zx2|g!+Go-=>gJFg8!$@pxoJ>E8XctXm&dar0^o7cscgBzw>tH%1^F!#Gh4s6dSfr$ zGWKKAe}(zIS5r^)n(ZewJ3iIY$W(EMm611hhy)CeOinmzYh$`%%FD}EX9{($>QqN4 z!o|ftr_XW8D|Wlr{g~+%W7PS~D_RmxOYCk?ykSt_>qVWo7!*-^eKF-B#7=A)g* z_YG4eu)%5|Mz>7W95H?~3C7pF>x7kB?53iAeqqO-bmCNBZn17q-vb`+`HJ=JW9gb_B2d#&IFCwd!3ua9+j^Jg>y0%oEg%U3-_z)mt@k2 zo{Z`KPNcr5^w0%8+CY6=+G1!#FXkqwPoJnguZ#?N6h^f#_+&k$4cWe!7W+yKp0X<& z&t`73dstM2QTF0}+Q+1v@-mhyY4>K%2cf31#I~EbnoIjl(9G)>$qH|gmvUk9w3rZN zb0H_*aGE2@+z4jCWhu3!xpu?&*4b$@Mke)iCQIKeyRiL1bFJ;~iiJMq--o_L$CGXj zVO#tipZ-+Vn<)Q!Y1yXj{n)ljVs3zueEE&umou3lt9~02hYvk?*9|qCK2dhEXmEv( zCOJ7#;_7Fiz@y9b%N(I$gT}25pY-owNq$!}B!+88R|xA-lfTfNHd~pZ(c3(L^)tE` z>dP`{=9PprP@PN?rWI)9sp-t_4l(ni4eR~yt+&ucOhMqXMyrt$k-RI{%U*86^$MRS z4Ac2VE9UguTz~u_tKZ6OU+#(mTdKx|YShz@ccdX!<`>xsHVAev@}nPX-;#_2G{E6l z*zFYB6$-VsLsLdOWfmQ7c6ovvZB0!USwt!36`6tg+<1jgOMM;`?VJcJMMN;+v@PI7_SuS*V;+6yk9d9^ZrFlZ`>mL zU%bWtq@1E)2>8G5;(yW#IaSc6Ks61ZF5a!n4}tQ-fcMe8}O_@D-JFE_4# zDhd7bmM5pWqo=uxx}&wjfBf+@>+{t2$|)@j1w&9^I1~YeK@d==J{Zh_|M|aWPSgGV zV*VfWPeg?Q(C8m&|Fm}+WTEcjXy$He?gAA2-)vSX7jv|mqYLn_F5zGN{OjA=(LoyR zW)2kk-vm?@N3%b^mCPS`I=Yy-UK9E|;ZQ(ALS0$-9~3Bb+KK!B@k*YTi@C)${3Cww zYv6wi5Q#v*5I_syUl;@h6++;{BTfs@;cpC|mk36l#RLUW_=LzmdH63Vz9XmQKkLHr z!5?QZyf)8bD52BPls|b$)VaDsFrm||(m!~AfBk_$pb)Uo**^al9#}{a1^fSL4~7Dt zzOQ&%{$p%N@L3s97#tr}aF&NcoE_WWj6Zk~I1Ine&eRnY6awSd`)T=C?%!X3U=So6 zey%V7jfX%9!p`X)CUj0N6eff?J6;$V4#O|k)ADa$ph8gSIZO}?Ifuc)=f(j=qD~`^ z{%jA4I!(d;6GI6h&#fyV=(+ar{ezs<6%2yJ=jQ&|9s+@YoRxv+;bV@@^5BB!>mtv| zK;W0b**Qlb&d(bH2?gULUH%vw3XFuGt&7L-8OeY01o6;$424faKEp$u-!p4{@I3f=3<^e^@~r)X;BCa&y#+xC!p`X#f#2t6=JJ$xcFyq_5`WXepK_6K*lC2A)=!OFY?mIfV0Z&&3{xB9$aIkO$;#baJze@uRfTE`}j}{aXg5qZp zX=-YQL<_>r;ARkWb2BsaLvtbVYyWS`-*;HJy5axR`RkB}pzzmmAg-~o$*9R*`yYsV BSpon6 literal 685284 zcmd?Rby$?^);LUqv~-7n^bA8IAl=ekk|WI!L#L#Yf|P)OAV><*4T6Azl(dvcw*u1f zdqDTzdiLJu?DxIS`~CBA%`neB&mF7pb+2dEVp5Zq<%EFva4?ywfIl2e2ptz4%-j|S zQ$z%;<>dqgOPRsV>|s`5H8U%yD;+m*TLXN@%+ZPk>d2|B$w~*-guA#|z_nbUPz@Li zPRA#7)kWFN#ZJNT9t@b~@)BxE2Xuqp2L5m`p^lbUOI%L&%UV}!fTdt=j&M3|u#%0X zt05iF<#cqs0+%1gbUzQjyz{5w0Guw+%i)*(!5UCkn460Q)D-|j#uF~52{(g7fonNU zK03au9&(!ebo`fBw{L@GVUBR%rYwZ+YBjJdH{Ew+WO?WymxR#(%kt7et{?)-^3g#A zzkkP1$MfS#fR2~z`*(tLyjKeV)2f-l;ZPSxU}iOLK04raIguI)Y6ym+Ym5%oZrFbtVHKqeA zLoIF0Bw?P0K${DIpNk8`O()351>yy6N&%<>WOAju>J652fw?&W0A6wYssYvnYq^*? zx;kBw)4~fZr3sdXy4zSlHRL3LwYAL5U4bs&dHQ_q*H}+_2m*?P#IWMHA zFf^X2hs0WXHJKfGGIboA$*$+GZas0#T#G#6&e7kVm~Hk#lz)UGu0!nOdt)z49XZj| zAA#T@$|HFqlheAxwIXoy#S-2u$>Nv^>Bau0K*y}uMKMRWvVKw_Z#F1|r6-!hR?`vg z9PstXD&R=jdH2Z5m;pT>?AWYK<9Mp1Qv1Q6Dw7} zi>4vs1;57U7H!R`b~)_=fxP@XRfEdT)4A)>oLc%0TI$+N3;E?UWOD>of%*1|9Bt&n z-H+75h90fl9=iDeJ-GvWWSEl4NWU+iKii2`<54bVf0(A#gIFgg?{J$pZxuTGOuhyX zVzGFXJOnoI*dir z%1{!|brM$>?beYDtc-U@4(^6>6|BU$kfJo228Eas2K z%s1-Hs($|pq}_rEvIvZ%yw%!4-f;LpKdSqipI$0I+#s2O)+ z`amK(scbBj)8h_*QSY5*kg&`+hxt ztFulFe*jivN3R=er(ROsag2Jp4Ysq_a>DkilTWipwR@BvF&bhaSdoeRpZ1Q$Q=c^)j1noK#2 zdAlL=37cr@ri?7%+8#P{8j$0BR<5EjBp}@q&y#hGQ$2EgoLyG;I%pTrj=2Uo~eS0uDK=~NWn&`cUjB= zd%=q)p*0>CIrr6bWO*CeK9jkYCsI}cot!EwZ7sxZU(ZT&nao|E&I3WYc^`5 z%2qs^!!JCcRS)oQG}A6rNC{ZQ`r?ldr$1(s30&098xPQCD{8qN^pxUNJBj@0J|)?+ zd#DtO2}ND(Db()^1@o#c_M<$L>iX7^5JAe_;No`UW;*>+ zysf5oC=q2*XtE*KNeIRQ30kNvQXMLNICsqWL$QEPrUx>@Iw4bbBcr=P-ot_Ic?BH;%1mMgf>xSLJ&c+{yv<*s*zs+ zq@d&$CU1kDAvRCQH~%gE3wt3g}s(t5!7hD(zsRQH!c?LnB9WWuFF~KhA1z ze1}}!x6iV9O=DId+xcvqQH=wlja@5|oT~LS0IRUxvq8t)?F8lUemX^mE( z^F_0+zA0R~;ut}ZiiFH$+x@{@1HW_6moyLh&kWr|micYD#=7MofwZ zth%+RvP)dBEDg~M1nS66&Me-_7^I>D%-)Z&;|Own(X5PR@EQh2d!_chDbCf@k#W!@)eqS}_(iO3Q-#9;+fRfj;xyTOS^_yUWU^ zsTPh>nk9YABMje!N2%G+3yf@VzAEs$%90|Djl=tnEBc$S3U0NWJ(SrS}z^R(jrZU6>yRGUBl&aMrZc_7dJ0yPP)_ zOu~ja8G6(>$}1^QK75f|!&!00S_;gH%du}K@*d*d;(=MX?XTbCbT*{Vf0wN5Gc`f^F5eNRnT__)~ zTxogho4$e#Gjwza&l~v-UM|@}Z_BV#+yk^~tJ=p3_Xe+%Jc=p{n?T(-LD@bq)rD;j z5;S~lo-pRH_p?c{bc;V2kwVP-gsgA6J=XSeVCR-t^)<3iC%K4^EkSOY>DhdWFt8bB^ zTN{lw4ul7It$U~R7>-@s3LOTsVMj@l)uSNbZK;uCCeOI77S2^N9K7q{N4Lj14uqs) zB;@F7pRB(;BZkfr!^rD*h~Xi^JWzxznvb&{)7u#migH?K$K+}OW@jwa!~HBOJ}$`{ zYv>=9(7s8XkqyWCZ?5S+srKw?O@w_FCNL4ZK)qoY*DOyf&TQ|~{~iq>uFFIvFa zV*}m#+z6W$$?|V#4G3-`T{$57DvFQz}7O1Oj&IL(<+;H>dHrR0u2cA`#CEiQIOpf-Vxv ziK>qc+A}}bIH+hQb)Qa-C7xnl+`cCT{!q&Ian{e$bceSVZ0VerFxsElK1FMQ9EG+- z?f{!9=iCn%EnnE!?cl_pJ<=XBp!1<`ZbEKfK{7_&8q>(O)SJq0B;V!&nbzu0xvmD| zFuFE6hIDLu*Tf=TzfL}!d+Bp<2Ysno4u($BIf z>(J-mkm2~cNS5jBkj|bD(KgoS`m$#DQzUm=XeTCB=Qt&ToX=xUf^}IY7{PphWA3py z!ybcVhe?KkgNL#*-0dJl$~`jTB5r*J?N-1pjSiBUAH@(JTP8jL{CWu+hz7FMFY{A7}V^L7Ri33 zTbnkL$yUpLI3&c_X#KWMSui$=sR_cSr#2UKKINwe z_Kjsdljao2?}>MlZ@q8TgbZA{XLg6R_b1%Pt85DfdMmCK2wI4DP{kGMq9#LHJE93c zeU3`#`YH+Piq@)y#GdR*Qv2Cv?a3b&pPZk!Ei2v&?H{xR-ghu*)sx)lTn6I38DzAU zs2tc`IUP(^?0t^kN{+>U%Z-7c?F`@FTDaEDHl?%m*0)c5t5_JK73__T(qFy&fWwos zx^aPXp>6t;dQ>XAmnO56E!+sCsJU~0d?tI0r1#xjr3Av}Bx{4?2U>d_rS3&5afelJ z?w<;M&N!`t-$%GrpMl_={><9#t%cu!Y7_sA1D`Hm`*AXOdYB03q#(`6D z66%LCgH;Wj#B$}uRK{Z6u;k90r8Ydz5Qzy#+1uGQ!6HdBEq7Nn>b#+z(6vOik)Xku z>+IspNOBzaX4KuRk=VM5bJl9nwaO*Z+8*K`>p!p^sw78d+j<0uph)6+V2s5W1GOAS zp%h!@^$t`mXe@&jc7Ck{GFzYgIHbNC*9WIF-8u`EC__{x+)s0UA;+%1E{&0wX_inv zfaPXqhA3IDTacghx+lk~hPF&wY;n{pU&JZIJEYx+LjbUJ#M4Y+6P1Wd~y@*9Bv+ z-98{>RYNGGK%-? z&};IIukoJQx(WX0bArg5Lv08T?s*nuf7(vUt^P7JQ8Y4rv~+BK;Q2nTEk%r@%0B&~ zXk>k)ywUdBQ=Zb7%dlkVbcz;_6yKO8`$TY)3>phjRBc=9t;v|=p_XNzl4MUwm-{be zE$AgacS0_ZyNwfICYaf`&w7Ly z>-5NfEov-5-*Dj5yQUk9cG7($ygEaO<`DzprDGF#G7Niy`&lCnL4I$Bcl18wn&%}U zUowgX<@m`q7MC#_GM6z6I{p%O919yey;zdlf7*%(h5-u$K{7s=KW+g+4M@=rHP#bu^i19^-uQbqH zb9${-C29m>h$y1o&#-`v`xNRRdKao8#)LOvCkGO9#U#q|B)EC#H+BZ<*>EFvO|j^wM`~T7LVsVT?`s$w;pT` zarqC#`F<^arSR3b`eFcmp70wj%~3Wg#4g2j2-Mwp-LmApxj%Q^GgI6qS3wPGG>=2< zPDyfM%+m)&hr*}fXWKQt$fzHBwm7tM8Bs~(u20au-C|y-xpA|GBu_P&J&+BX#;jM& zIEnHJXQH6-%iU0dX10Nyh{OZAT z*f^bspUPO_W(h+?u88M4)3e#`HTMOhF}q|q=XlYyIV#mWDe+!gAmM18Q}%rGcMa{{ zg6*MUlc#)e%T%(fl%Q5D7PyTa$oHwGpFp{67?GwkAE}w(>2e<4mp(^WUo{r(9AEr% z<{$)8SgCci^SMrXMoXfYruoO)#v83gvL{5swdAz7&PZTbGgY!{9XK;jz0ppH=JDP! zM%4PPISkZbO?}wNasC=aV0}6Q;d)pzE;3uSB+T?es#oWzg0E!7bT)xRyVqXqhNi=v zYOVJko$=;xi6f3uxWuEMy;$m(#Wl^-vgdcfaLwh$unSgd`%ct*ictK|$`@x>6uxZe zEX*~7uPg(=cuBpl%+CeFuXv%Yi0z1dJcp4Q5*-)lAiP>LUz)Qvj-nM|TMO24kg*L2H$nq>I;d&J3{&rzOA59<2|q1aB& zCaAT73}#N+N5LEx6!$~}RNFpj7r^5V<2YSeA$D~GCS%Z}ZapcRvhw!^DM7dZ^Z+NHfaMGGQ3B+^s(ngF{l6$4rxD-g5 zd%|n991}Lgsvj>jPfa}`ZPhq$gR#}CUaw(|PgyX*9!3b4qK&dtv4bbv@Vw3G{QQ#* z;hpt*)>W1~$@O|?SSXHvjs_Lu2j&$DuP&r@s&a%yR5y2dXR24i8BPJr*oTVB*aZ~e zF`FWY(V9JJY*=ADh)8&`!`1t-6)`mfc3Jjl+NR29{5++OK0eVl>{4vW71UQ^SG3kD zm?K)L8%_)W_f#ZKGllFIaccM6e9qmOJ@$BAjDi<4z#Z@e9NPg;APBLdcGvk3~y6v|sxs(Tek&2p?Oi z#&V>aJ#m(szmLKjqpCEtgVPdR)1q(wwxjLyqtVS6&l8_b4kT4|bdvjaTey+1b9}6X z$MhsBU1cdB^ig6+U!Z#yS73+l2bVeFSKO%UhoUd{ zBi>5uaXd83iE8V$WpzflcK@0Bb$>Y3k*Jw2feCAwRyinyZD{e-ofY*SakEiqO}^`* z{RG%5LHfCo$~d-$&p7qPpi{e?EsAj%`O~Rbw9__SZrjj<>)FN0`x*tr&3AZelG4H_ zPr6#-`Mp}EIXBaXSZ*yS44-^k3n4R^Z8A(^Z>ES_s4vb@u}~VGE}&3hwPsINAK;r_ zkz{UuE4{N#*7-;amvwQu7N$v=wv;IX_Y8KrO=34;it#Rt&-_J`1X)qsXLqfhHki{< zmd4=_4_2F3)5qx>op?Qx1ZuebhXg5T{?f@09;Qdll<+-Ik_pl=KvMXMbi0J@!_Cq$ zOmlb{HoeU2r*ZL#oKH+QU{XE;2P5fSx zwa;r`KjqI$d{i8!n1;#IOkU{v3|gxBblmZRafICB>`tNUJx+lGQh`Z3nvR|}YYxY| zHw#ZwGaMvdBX;6-^n9kpD(X#DStXbfEB|`7_)hNSVcP+sK^0#Sp31qa0hF^cc}^=y z2NHaS-k8)!N#^L)T-IDjQJ+i$-Fn$hYDYQ7wIPKY(814pSF0aNR>TPSNznx$QI07K z#TO6n?b)s{YuH@x>wiADGy*9@Ep_Yz$9UiLE7oB&v~bO8%y@-z`;h+WO08`F+rveC z3XX$VEIeyvdgW}R;IFm-YB$7!cWLvuV74`YRWoA z?e0c>dg6xi7$vxE9^-tS2ixp!Z*Lp71;$u-Xs|N2nXz`9K(g08*`8DLRr;uN%WE$y zIybXTDRx>Sawc3WX+#g!c@yA9RNmL`r$&_|38aKoWe!-@q=TB2!*bY0{KhJI4$dR6 z>pieU2x$^^85k-83|#hWk!r0yl5qNvohvnq`n?Ak$o6Jvo4WTN68S}as$cBxRHKwD#$iT!#_2(1kw?1TzR1%VWv-j3pPvVW z+!ih2bQ4659~+>4aLWZfh;s3A7~!%swPsanWbzXt#ma6|=KRtrm8Ei#Ul=#m8e)SluT{ zS4u;p#yz#du1mZ{lOSj}>-Kr?rQg`YA>3O)I69?EpOXJ{Olx+0)!%Pihqu{+FG2t# zEKoA}Xz)Z?>~@pH6TezLfxV@)M@0bvQmT8?k3(DA3~RQp(+H)b4cv)-<7}A~O-xqv zp@kMl%GUsL)MSbkS$`9sIy3L~3jwXB6}>1c!Ur({nZ=3J7rx|HwJ&<7l0PPp(q;~J z$M%29fVId-4wmoSSa{`meMg*iv#^!c<>BC?BjQovTX5V-0%{y;?^8MfIE8=D&fwEP z-lB^t{!|m+sX2CQtk`6<_76*)Jpwgw^WB9S>5+5^iwc!izD}QVqC}d^GYF;Gs!XPB zn*!<6Vip~P@^r2s81Zvf@j3pR{lZJ+>yR8uvqGcy&=Cuj50>G~&x*XIrWc>h z=gpRUJ^W<}*5neIOTFqs{WB())sP|=YI}zYo_pG#TQH|f*s(66v|UXIF$c3Ad!S+| zipB_4J=O4fy~VNb&?FWRyp?rrA9izG;B|FY{^t4nH13XL`v|u(ll$4~DY6@{$#42{ z;)veXh9xf+xaz#+9nnDHew;B?*E9cqlgM>?dq-5?BD{<2aq<&0Fps*5 z8CQFA4ca;I3{JvmvG&>m(TVNw3E`>c8SOGG?{HH6V!J1kiLS3X!`uXq(bQy#1sO6b zN}fU$)T@TpTK4iF88J$L%1hT+xeVRTgAs)gfk5&Rf|@cCJ_GAGDT0DTPyo+66WlkZ zCR=o9>NBE{6pF}fqqlQf3f?@E<1SQP3v~i7Ume`r_4gW9wD*;I_k3NsCcAlhhkkLt z;bhZ!S#^He$k&AFozJ0>-1BX`Nxg_#b!W;?5VPlJOjRdh=g^7zc^!#VtCs$n3k_fW zMuQ|V4C5!CVsB!&dpXy0D5Y80kSsyfE8g493}qW6d09O6JF)$?%PO$-91dQ{Wle zZ$0Wb6!mms54 zjYcEy8_thC(_eWeDX{nT;DEdg@LTv5GlJQUi~ zENIx!gX#JF^pzgglLq2JgeUMZlC4O$%^MDxCF}u%&)!j)s2)9e;6fiRX;o&yc>i9f zFmk_HS^xZ*2%1mA0WL>N^fWfLz$5cVV0?5qIsMgCPWGo57P#R1y+EuncsbDj+n=kXw3Xia{749-;4boq1SbZDJ? zpkTL%!SKn%qs=FQy?Ns-7VQ~O$RMQtXQa)sJ8E@z70dFv^o})iYw9H>M<&!LLFn18sx99N{dIz zMfHSII`;}V11140--yRUi@Ghan~zWJTF+ft&hD6QFPU12@VnOo8|;07<2yROx{YsM z2h=kr7boNvaB6y8qlW@`>?;l7a5rqmEE!dG{(?rEpon8^JyETU;f8T+|Bd|lUUT*X zSy6r(?>8>8Z<{Za27>5qdS?8_A1dnlB*!^PUB4khAp3;QN?%{iX~yd%8CeQmA+>S+ zow{cj*E6o^dQwg9l-}EsGZBA`#I^X~hV%-C=GaMvoIsf1HmfQTea~z4%C^VTqA3kI z*3j&pi8O`tIRz*E?Oqy(kq>#}Et!^J^pRxcD&5JGH4C+6$i=S0xviR?8pet4U6~r* zyhi+x;hg(j8-(qSaLm4TP@iA-b$NLn3ipZb*xCjOQY*1VOh!vSGWZFki=K48F&X^m zQT6N@!JS1Z6a;X?_BIxyr6^G#Nvr=2{AIC4pv6OUzW`1CngmsAg1{p+s0qgYy;yVn z`KDyyM`}_5;x*^$TCeojGzBt3;*lX!kDEx_1y2+pTDFW~G_C&TWXoc!(a-nf zv6(68n8KV99&gEaW501S*^hp(JJV0CzzKIL`l3QYHoww!`ffUIxbyu|)849V;>&~G zOveG48kFtTA)KlKbx_k1l7nf_*LOR+bqs?_)T|k2w*yE#7%5&t_WPHj6Xt3wc{HRj zomD15+mXXr5Sr2@qX43+f%Ty5=iyJE7nm;}^fL@X?qpWH}H*;C&tEMkoJy62`EM()kiiu%)p*L<``8xEq zpO=q{1cC`K@C>A8LF~DXfG*v3d)C?Y{5;~dR1tX!h1+N|;h}6%eM%DG!c=C_R)Z4~ zPP2PS5|N{ODzLF^w_phiDu@SmKqqrC7G}Fmer00zx=a<~34w6TZ3Y3%)Jms2Mr%>W zIO?_7*p}^N#%RwxOg=yGd(VXNdKY+7Z{$LXDF<@im6IooERBvxIjx}?)zF+`V;w(= zz8RvzsTnWT{th#bEnSELglG#r0FmP?4zn%$=AcUwo_X^jL~UhSF=g$53e=Aa1`-P?VdO~(SyNQ z$F~6ixXZbC{*2TuvVyS!DlyQJN6_m~H;?AJCASmDxwpAN88{b2-w01Jj>RN=QM`?F z*@T+Vj8_s6$8+#?`LiAQdmN=k;iydAs~H^_9=BG=$rMG6XE_=-)v+3vX(YI4$}Uni z0(0F@^s&D{{GAyU7YJNpAeYbeL$P+Uo`O0Gi1HuK+s!fuFwN*z0@Sg8syW;s#HHj zvfO8-$jtM`{+V4#pDEFo=(o6s*i_Rb_cl5S!j?Mn$5Ut2<2)xSSW`cBqjh5Qb4z7G zI`yA2v~q%;G7J!Dupjx<6`;<97I^IvpX0=_H@=CN%3%ERb_fO6D>X2VIUt-u8!i`V z(EL&hPl!lOe?b8$Rz&&JqZil6yeNX&F^YWCI#L+FkOBQIQUf731NwVp6s~LVynT#9 zibk2otaV5`xtmP3kQ2JgKl_JjbQ>78_orT*s$dH_-W? zzm`oXF%Z9j6^m(F^uuF0yHcKH6>gI@>l+B|29rDxW zb$?Q~lZQ3xDK~|M$*fP=URdBDw90?6L9+Nl#3W_Pis2fD(C>dR`ijO&_rc`e*tX@E>cfP;?czdUIc}R7YttZDYkc?=HFY;v98Z@m6?eFEiuyuR-0@Kad zvJA8c2d@aB+oY*ByXGZWUaUjbb1f=aJP!FwlPJB%AC77y_c-ze-*a5^;%WwM^grG| zH8b3{z$1|%d{GRy#G3c8t2kc#>d!f4h9v9Psww=~KcX>Uit)|b@#i34F_yt5{PRBa zh7}dKdym+QExT|JQ3e5HXA@kZ3fDeAA8|>io2p{(=O}A)D*VMrzV>Do2tQLQYk%A> zC|r-gNThV5WpRJZbz98at=dmy=CK2c-292lH9^ymk&&rz;ZPxaRRLB(E-5XGF&+N~ zLRyRBY4WKy^T_L6@l*YiH7w{a(j8gm23%+E@k7)G`Sz;)2I88_oLb3e19LmRz4Kf- zdfd5x1}j9&6t0HZsWda|2g~1y8M1xWdsjeKhGCE+B|{Meu9l0Px*Ztt(zX}*i?dbCniI9b+n(|5~Sw;_Ts-esmzX;Fng9%}srUmiO58y<0#5n=~EH( zmi;v#X1t0BN{;(zR)C9j#v+GF#3=PD@;SMe^M!MCDy44QLKb?FU5cbuZ@_Gsp-0v6 zuAGsp$&F_{LrhJcCAx+g&UsTZ4$oGevRu#zsu%1X4)e0HTwxJpEfGH zFI9b_9qBORE`@Bnq+uc1t;#uUa@fV@B4doHc4Wg;6jDf0#+Wo32e}$_kpFn7WHJN7 z?tABOd-HRAs`WB;2`%h}_(vBO>ESy*Sl5gFx>#B=7C^_6(v1hi*(~d`Ly9O*LykUc zRCqt{AwBSYqOi@Ru7k@IL1_bf7$LsNh&jqYwdUda&B<+{+qAVw!C7pybO;wpV#B<4 z{n!S2#QD8r9JV?vRXMhPulO-> z;ZS3|@KU01Y!gix5v_WVk5Iuz@oKDP(5r3vm~79gc?xFW&~<{R25U)KCu%I z^Xi1d=e&0-g-QCagI|$so5mK6K8ZXai+8^ znA9`rhB~le;;rSIbnk~LtEkqZj!@#ECFMa^%qA&8SiCKB*`-f4e5U)Rd; z4)q8c!#)mmIPez^r?Q(I3yQnd(pV3g4-hIcBcQ=nw6CWa1GHQN|QigmR z*`ios$j`qEV#_zya`JDSoIxpfwT=q(|DvyLJB7a($%Epd8G&Y8s~ruR8r8c+A7ObR zu3hg7r+4cfnNA+BGBIucAm%}aI=@79k-G|a4Nrc`s=RmDwM24W7Z84O*JRJ`DMC=7 z@f%Whw){mj253s2P5n^NTY2GO8M^@kPlR#1k}TH%gBE-jw};oo#=Qj_8pm&&OA+CX zk0h`7<<_os5bqt~#7&zaHkPyME_bBGRtESch>s%g_C2aFB-+8-Tta!~eXAuqA0Mk+ zK+;^iUho8_{>kUF+G(ic2n;r}DetBJC1HMEZkS>ENzfh+w%8W`s=I@4DJMo2+n}5X zF5UG)7}p`5Uzh8H90ThGP0i5u7Ts`6hOiOT;w7{9p9f<*CeIx@w|4t&E6Ewi{ZqC- za!bUV*gu*_dP_CIrbso&Myr5(MBUKM!`%X^l0at6ITtUO2Gqtt=q;sAxv{3Ufl%|b%u7k!$ zxw-;aqZOd9Hn|sUC3ll?yx-8WzBw?Axb_CqQ;x=bUQb63X~eg5w?)3fCbZ@-7JpPJ zmHi`miP(Md&L!Mbtr(F*hW+km%@x5Ed@gD^GaxM4n}KtmZcek~P2vc0$wpKeoXwq! zl5O{{#aicxx^=b@KTVeECD1)R;n8tTnO`oGR2~`FwMX|D%w#1a()qSjyq-KsSiQ}PE&rGBq!_q&ODs*;HoA}g$b+lwFB_21WwWmxAo z?RR|s$gasTcGvUF{$js7p}R`AJOx%NI;iHe8QD>#o2U1(NwNM>1MDw`=|_q5`pSI8 zB{CM?Cw|c|a2_hBTauJtVinsxQj)WixT$WYeT$$_Z`%XCf#K^ni~@TZkDzoHSvI?O z#EHO5q?P=G+)Yv$Y`N7}$a(`o^?A*$`zCO^=Zh#f-Ch3NEz5te=U z@Y*s&8`8*jU_0jlDX#WhhTQGN(_M~DJ{9R$eTOTBWmK=Lp+d6->U$|@Jy{Onru6k? zyWiE603kQd652p|a~O&lH4*-rRBJ>3<=Zn+Y_czJn7j70+No((%`ujdN+%Y<$V1T& z=L`w)7S1Bl`+VQu-(OoLe;R}|r%cI3Zej}=N}fy&4RSg}v|0Nativ#2EQlFS%U|Ky za?8x@oE%m_MgHO$O(;J{4+lZX1ik%4N_LgB+1Om zRuobP3EN|-$Y@vNDmMH^DC{v4+nO%Mjff~Rp1;Tnr7-p00~0k&#QOV-EDyc!g4|_r z4ruW5sq$&mb8pmF8hb}rK1|9D}x}QKr^>foFzFze@MUPEP$%+z1j^YWt zo$;%9SE`Z_ocFTL->aIDH50Y465dV13Uz;#B^=Gc^>L9*#>fOd*k~kQdh?~Efo85Y z!)Zd#d^q18BduA@Yg7L?yMvjYPt#;OJ0a)QD$$L1ji<|AYofk4Fn;|tHLo4*oy#+M zZpSa5o=(5x8s+kTNkS$b{94n(se>$lLb%lWnDE8U@Es5~v`)>Glv&%r;f4y5&wELW z>ucDi%>>(ieujvijrASxv$Ukng+LK7=bPNZu*Y_JXirC_wSCITgl|+@^9m5< zsjNwk@O8X;DCfRYsl&k8To66ye9J12qP1gNFr{0%!ck$@J`A9nIh6#3ks|GxMUl@t!^fmXb?rQ4LIU7Y^&HNo9WbUotM^Ub5 zmQ%wQEwy%r&Xj1`E%9wZk3XO(G`|6HoZa7N^14(13FjuI{1iTHb$@%E+{V5miD5)% zBKfm7@jAC$O&xVpitiBXO*t*_%~^QFyZb|t?*x<~RAR4lBL$-KW|U6NQq;(gOPyIz z!lhpsIPA#0P%aj@dxsL-?o0lHa3hxxDSVdb$t=!l$V)7Bu==;@Wdyl6b9y~L`m-sT z2(tpi`8tXoao-zj=u%IZRumZnQ+9RlB|rEWt21(4QJvz(TUJJLqIFo0iRF&4mBQ=% z+tFfGAm&tV97Zzw{+H=E$m*Y_ztj>-;SpqTt2ui$r$I#%r=7GLp3c^XHSpPGpC{dS z3*lQ-@8;oau1&4&00n2_4KSt&pBWn;mmjU4ql&9#+3KS5C~Z%(f5JmTrksXT#<##c zYR1wDk`e^)2t`|AK95J5-i6(I%$}zPJnNGF5+|=cl0nrQ3~eek2+ydXsqL_(swH&B zLq)s?3h*kcdEn6+xJizPrs7@Z%tVP4qTkxU|Gq+$7j^c_xGb#* zFXGfbgg~_5m3{hf_R7oQ4E_!#u?b2d_mHq&a(Z4(*F3DvCrg;27fHQOsu?8WlR;7h zO7ucb+X`?g)|dwpdT6B|yTEIhyz}!5`m=1_5nRR@FvBf3EDVUwsNUuqwFH0d2~%wc zym?w&r8~ZnI#lb|P5C^In_g2iN?4+4*x#EjbG?J%>~W1J;IV}oux+TW(LWD=Kx4jM z#%%nkw9T2$YSs7=x@yDG7#`&pQ`W-pY4f8&+=nBajAc*;Iy2iRnab$Fy9cOYN!?!* zGUaf?-a&gy#&lceMsMsno!P0}Jh3=a9nx;V7SS)UcyJ%*DXVtjsqfeKew0PU46^5B z{ih$kM&NxTIURpAEpSjIhg_KR);W-N9jkD}m!p<>*ZKY6@`pR575sWuPNZV<&;g!y zl((o{ruLW=&tT-}C~}EpYljB>l*%V)sYjV_?r^;kGtD6(X(OCOQ7%uJAdkJ+X5^_9 z=f*|YqZO*iQH`pR*?*rM(#i$7)5V9uuFIw@BAAVkLvep-a10h9ne~b_q!k}O#q-n6 z741wehzLQhi2iUI`7^RD6%l9iLHxGIn~%k@6^MI)DjkCH0D56oW| z-@7*VZb#vek8oaaoC#jdDbjuZTFoXhcL?O7x+KHoVwIG>lVE};!?#)q9k0-g8gPPk zAMs)ZHd17LDJt`^0EWxvwtnhMlbRvExr?72LY9y)i+am&&IDZ9!RKSF-c}D6%|?Gb zS^nwW>JzTdiZ3;FmQUeqY($T&ow9wVMVhCn`L}Dy&!q2VKcz^B_vpn*$S{h% zmar{ZPuS)8ktY9?woH}O@5u^Io`f)IA5XoWP=9a>S{P&4Rj%=GsinWA9bYEk{+5|~ zmGJs|W-wS%8LR?xaWJz7TbR*7eokk-%n|;NgQO&W9dw!0`^%uq3}djSn>qY4vsw#C zh`w51#moT;WF3Pg&0L{ZJtQQhq-A8-rGN}wHy1Ckj3baTYvX7I*0phzaCEi#y?xnP z8tQ7{V&epdxzPPcLbh>ng-cnRxzO?R0h!uAt_1{u(Ux#)S0EFZ=SODpADP&fBmV^G zA2VNNMqfesFOUHly;iPtJltRfAW7WDLc-C?9!kdrmTW=PJkEa)BWC)mx~Lm_hS?{9~Y2v40X@} zhFtZLyvlFpgz)pxT`oWe;pXMPoEyk+zN8J1cMRpeMEFOB_a6vf=5_xG;lJYy_n$cf zU~BGT!zl@~xBSQKf&54f|D8Vo-G1i}gq!;(2K~qe{{!Buv3~;i-*M(5s1OTf3 z&ZbKe|HLMs^*fvRc=&%}lYrnaY!diOHu3TN%qBjF5FID50Kg`G9y(yze`V8^0ROCY zS6TOeLi_Ld#PfH2`sXbAUN%7XKeOn2()};6UXJ|}tpAxsmud0;Mv4A0ceo)xbBFgk z<^Qb`@p1pc9WI{VxC0b1_%nCDS2p+s<3Ckk_|?7vMvm)0vqZrXuoX6NFHZR%QfO%g zXfB|=fT{Xkoj?nw?PzmZHQ-W}uPQS90>$r@1`O#SSC-9~?kC0h+1mffz!P808`KM+d4pj4GpN34X}rKu}A=AGoY6h zH{6_@?4b^qCia(-3s=kh|4Rb@a^w1)pZ~JM^8HyDVUDgado$O6ZXkXX1o_=S@cv}O zf8JxiSL67F&wsh+{2?V*Li5We`x|io!9W12{*(C@_}P2|xBf8SfGz)HmxXW%T`l`J`ZLDdmmA)1@+j-(2n1VTj+e?s2S5yS zFtc&ItW5#|=DG4{pn(2fw*PnocO-#gFqdUuxIkRo+>p!SDWJ=@kbr=o;3WV*W(9h1 zUuhN{_xEZj+~2FAa9;uSYu%Wuj(-Uj_rHK81-V?9mq+MwQV_p@;3Zc<+?Sug+EQHq z?ykzOuHY3||FuO01^KxFn*tI7oFza7KtNd+p-Xc7>!Lp!=-)_n#el!Y^zWwqpJDpD zJq8NO{DVdRJLLV@%>O93^8-;$YqLw|830BKW)E}GbTYGm0t)k^mJRsF?H`?`U@(_U zaT5>_00Ew)05?boF!Ov6pmfev1E4W*S^!QGhFA5BuKxWbhICgV5198~)Zk|W{|iC> zPt@RQuc70<@}=mwuj~~a&z0->FKTev@xLaKf5*jtmB^nB_Aj*fYl-~3X}?S4-^uI0 zF8Z_O{RfNw-L(HBHMk=A|JUTbswnl_qW?WL;1dLl?v)wi8SU{(*4(dx`wJyZ<7QS780u7X3SU{ntf* zwvqn;)4!Ycf2Ib1YyAE<3ja^Z`?G2L@2LSV4+tm(_hUD>Z2TuRxDxq)*$u8r#r?+m z%U$Qn#riv!mH%&D*8f@T;rp=({C5Xi{y$<5zm=`~N11IP6{i2o>nR9zMm3*VwaG> z&!Hl&pV)P^-T!Q7ug3lf?tjXzAAtTP(>Q_1+>g*6;Qs$F4jq7#`tzaVpRkQbK=23K z0N3g-4jor+&M)Zyyv_U;YQ1CQXa}65E#N>sNiJT{WtafO4@9CaD>n*(`1t4qf#b#H zt|qAo_=&og$7--F9WPh{XkF>3#MRjasCcNYp>X+Tv4+E)Zh^s0W{#F-t{^8bOQ1|9 z2-Z2yXy!1F)NuJGN*+#)C}%w z4TZu%PL}unj1u=R6#v=b{|l6a_=SKo)9+Y8xOf2s1?dDpg8wU$Ut;A1#7HclaF`>^ z32x(H<85|%vb@6a&#?0R0_)Fv;D5Bff`3-(A7R!1sXkv>m!I?*!Ug#|g}(CGe}Vbu zE%bMo|D8hr3gmy(<$oIg{UyREB*^=-F8_6y{?`bj0C0T&E&lsg5k?>~{4cSwtFo`Z zY0DMrmnCBV8#P{kFYWqA@m64OzX~CMWq&K<3TW~V?*Xs_Y5;iOAp|bD>44W2uwXq^ zb6dc&06nfMwes=M0WaXzk0vi2-<1z~IoJQZwku$ofT`&CuH=Ufh`j(Uz)bza+OAi| z@xN5umG>8s{N)(;_tjm2u!PX%xZfVZT~>EBu9&KhTWKgfrOorD<$WA)jYM~iyXs@% zo0!M;C9V~(U^{em8gk1~&mS)P9{TFJ(CpWkzy%P37;nqdL?sxD>D_MS9gi;ElFb=x zI{Mt77I0DnZa&(HHF4+;xH#yAwH&+b&ReMhwOtdStlgpVF#9k+!Z47aQ+p z{j2JZH{Ox9TvX3AHl1u-n{2G&><&xJ&fxdjE$YuW9kWv9Z#h}&>Fs>|j<>Jeno0A* zJ-xj}xu#B=77C>o$X=2eu?IQ4V$-WKYhCeo1qf9$q{NSZ&1;t5T}m4|kNq@8!cx-WR**uEs3EzDJ@MNx)GJ z6Cci>x-}&EysguGq?8^zo>V#K6Sth2Turvzd)N`)rd%jR(_DC`fm}eyrhv{}vdHYG z3`WdqIwQ0yraB0Zvj3ehOF7h#IGJKb_)6X!`0yNKBxPUz=WD5s`ZkdzG;V=)fIQWs zp@;C~(mb*&N3)O(J_NVFOXM3ALRA&DJE@BxF=`~GWWl63RaBu#C*64|(lKkN098GC z>ZH^ijiscCssuaKAM7=kO1Xo45ICASfg@5|M56egBHw*PzhbbmLvCTus_c-JV-VJ%J9>6hHk!7cvTi;Pnkf6;rWuDVWsBnL^U3S}@MJU&BZv0P zS13XnF=iDeclu|m3c~2{q0HE(GoWrPZ5!;x|6ydIJZi)Goo&oTA2sHg@ zsPM|6)D$2UGQ}4&r@lxCW*VP?uNPXGvbiN16}=8i7m9Pb@~OuhW|qL8^{6Ep>D6nW z7JWwEZv-$)k9A$0+m$0;rM|vC%KboYY3dF0Qp&ZS>rNOt6uCsA;GiO#Ansmg^%#dA zEkjxgY;2(&<_!kv$qX8ghBS134d|Kdvu$t-M9Qfn=9+X-bHR!lZUKD8Fh-%Tu$OIi z;B6kNOjL*l$FD~ZM;Al2Yr0oU`*=>vQ*B=F+%5WDD#{Ms>Q$8)IuV<8$wx0Y{~Y3dl&+AR#j#sKG9V_FYsG=^J87hCtPU42`;Czc*+hE@F+S6MpnQ9Wzh*NYN{OK;LmQ~$BH@0}y=Xs#sU*b_NeQQgOZws+5QoJ4vt>VvX z?3bzq*4CXlZ;mJg5tqJ0jctgY9oVqodlq;eeleU{&BrOT6mK06Hn0M z(Z=`nUaj&KK3ihwFm$S%albzIN4~Xc$5-cO~jc_E&xzqs_c_CA#Pw z=v`L09eatjx?5}O@LqJxvgM$@vp&5rb?jQwXwzDlD3oIy+Bfub^YwE7plW3;C@^Zj zGM!@*N7iz~dZZU7lco+|u=%Z}j@_;Jr$?zc~=t#h-ms{i_1EW%zrUihI|O6x0^F3&+cKlEtllTlejKT@P@62O?%l-tJl$ zVi2$oU1&jBWR#$TH?7r6{uA;-tH@Z$a6rVp#nq_nMt`4s6WCYaR=Y)H;}62IFFQ`! zTa3b%Hswg6Ibt65td3Vq&6Y;<{TeQW8k%Rw%N*mvYnbg}oPZLvYmZ$$3x=NHz=+m! zsZ&)o$B()V`jTe{KU!*V-6p}d;@7~98v7~)t%=$>;r&P2z^XuEySvXyrHhFxH88be zJr(dLQRz2yTl9;+No4H=U+4lVoy|J?D)l~+c9IX67NPst^_s6$^+ti#{jFkQN~?b5 z2l{P8By5bI1(N~u=3eEwu>_SBQJp(+Kq~izOPv4AvK1r?1cLSDD8aGO_l;qGp z#8dl}=a>s|c%(4=a28Z9cRuidf3U6!D%$!zx-PE3{(fc;OeyHId?4%BEzF+!D<0?r zqfYD29C~NH%IV-f;OIB<)6)+HG&7On8kR zMLBhj?MF~c;X}HsT153y<+3wX+a!h!3ro|I$EuM+a$)S2chcA@p>zT%;a9}sLEC?N zZdTy`bo6fkFp*U6vjjEq9|GIzyS~1Um|#W6=x+4BF2XVxCscKUB|r27kl)&?h3i>F z##!;0xeqDTS?Y<)+#BlJ#Rx9(5~^oYO`J5O{GSl(b?g$#Dm1LKr!2rDa)(Z);HdXa z!dTU;(l;gR zQ2-n=7xz1bc!9H~CbekwVc}9qLh%XO`{P09$M1EHITMZ!u}vPe9n(MxY#lBFuLLH+ zTA~x8yrynP9kadPm7VMEWK&M3vz<=UDxl&f*knf@+g3L9U*HZO(5}!Pnz2ch|?;QFnr{mjYZrys*~K&`PY!tQrG9&ZNcqDRizozN5O7-*Ng~e9yfqx zzoXiJoW8l%!U}29>8s?(O>tP-B)YoQmxj_-iOx8ro)2c8QLEuu(I~`$Y)RE}X1I(% zYcx+#f8Fg}o`2eJ_O18QO*jRt%=A0UAVi%#)K5RLH50VoI&^=rM3(46N3Iawu0BXE zLaGiJunC;gd>HhoSqu1;Cqpq(I^F!dZOY0zn2Bqulc<9cyc`YtI(nQMT9UJ#xmj9A z*%*2#!rp3rsvhpRE0Sk0O_gPy3!q@Z4^CHHZimaJ>BUx;xvP`r0j&x`t{XzJ zIn{fhcBc9<01M_nz9XFR?8CWS*_`EiX88c`sUxB$J*P@Uq5lGb~)ER zYso0N(*`RkwJN5Pg4T^2{kXcOJOu(_9r-X{vQ<(gNxxOW5k$$B9RpmPY##6iPtbv%2IL7X_65}JOQW3KH2lht}$Z2KS#C3R-4-8cs zrQU6}@Ouc*<6>Z)`r=Vl!&$!p_4%tZd$j^DG)L7s3ZZfsa3_ZD zz$nt2HYt3MEbMd7)HwuqxN=(z)-0~I=_x#@xhHA3(Vs!|6&TN;DYza3AXz&whiF~< zHcdgh`iue&xcsviqS$#Y!}6>V05Vj=GYBYj8_9oUu2obi;48fX318Vh zDyHVIV8Rf>YYCnfCs$cQC+@L2nH z9;_o8|H_ecMno6<;x*ySwD{AR>6zPwDSGkb(v}Mg&=cy%5Tm8K{;wTu%L9Pk5Ypg{ z{zkt%|#bq*mFTx)JXfOcbG_NFsPuy_suLQ|vCe~&nB-elKpc}7pk zRA0seNO(~Li3Wj)jT%EFO$Q5W?WaA5_h5fD1V`wA&Pl(%2Urm7N&IB8Ig)sKExGqkVEL z`g!u%$cN(_2`dA1S#`$;-SqV}g58}a>kYgY_yYdrebtqn?UNZ}Z20r#?}w4KB82XR zw+|S4*gWIb3!JIYd;2dgr#=&Cz*5z6ysbaNCvqaAuK7AT@w{k zkv&m+!@WFm|5k=;v!$aFPPtTb+aTtA-H!D;=Qare`<_q!!EB3NDc*tY83=$Lw_}JT zoFV+pKpCB-@diCk_#KL%05YNCp&0ZSpOv))#V`<}o^L~&x5z(#tZC)#U@NLlQjKRD zxk@T(!v)i1p=9eDk=aG(qs_PI^?gsu+x~-y0MH=Y+1QUry8ep+(sIJvfxUx3bUDHV z>JMPm^gg|c7}d1pd3AO}qrr}$K7B6#kD}H%po491=^}sg7LxgD!{!Ug@2sc-iq&5Vs4up`IHUS&jHUldWhNSc{V7=l%fhxK*@0J5a=}|ZRJ0tN zVq-S|+9-e-qS52pF;+0B{@tmD@zZxT1Vw5i(KG=BX&8~|h!~DOhamK)FjnU5Ux9NB z5Eu|;?KhbOhk`S~T(a%>Lyc{EFs058KRkenAlC;^9s7|~ny45nI%}!APD`J*h!$lA zIYY9R3PU*&r86RkDH|_*MwNx->NAMVh&h-KMOuBLv-Mepr-fV%$0IJ^u)YtNhGvJn zizJ~MQ6W15D*7Ec)X>w|E%~Fgt3;;Flj?qN;5eAmGH2+=xu$^iC9yQKLTVXSIj|By zRsp0IK-K}I5p&X(6Flo?5MO0!3Tz4OyHE4xh(2CDCj8PpCVYxM6VFe59F|;06DC>O zJw;FIFQ!$D--tmr#9jcU5i-bT@0w#35Hm2q>1x?lF>-<7T0^?#^|i_maIIn8N=93i!&X)eQB)QdCXpA6 ziEI`P?KX9wF{kzEa4)4mP25TXaQ>@gh30)Q&4zfBWk;{+a;x|+RXE#h^SwK1dVBl0 zS~`B03s^ilzJ8bTfl!<&PnZ`R*`3}%i)*ElX&4~EeTS)|(%zj%JDV(ho;F>+KZ)K* z&hoQfpW+})A-PGf>00~oP{iE~Lji#{qA9}?^NxNH*e@;UDCir@%!4%)t{VqO`K>f4 zfrUYq05&a_!?|g=-+!cC7~XReRZ@-m;s>g>aW;kqIZpOaXf`yB#>V~}MhLa5x^a+R zl`WKZCW+4)+MDR(4+qb*IWD7j_t#1o-Jqq&kk$onc0xOJ>W1i4s0*q>X7gIFrcntht0hmydUvEf-)LQLl zPzulK`oY&q-|!vCE`An>!ZgU2Igc@J@a~PtA;DBva+=El!|>bEF|i4QS)BP`y_#z_ zY)EW!ps5Gsj@B=WX+(woEEIS&Ykxu6>y)ckuAF~bAH_wxT&iWFJ>Xs%a{I=Bz#Kac z>y3NpsEfz%{d+BrWD2bL)TkiDk(-z6T-PlV%HFd%B~^ibD}BZj1(R;f!(glbt}Kq3 zGg3NI6p45q49UPYos2OQ&td0W#tt)NzIM_4zE-B}sol%X-RaA6GVy6dw$|E;ejD&y zg2@o!ZH;y%s?HH6dClD)%^3#j7&n(-gN+2NMJpd^8JSy)^tMUVKp`3Np(UUSo4Gn4 zw^9!8Z*D7xanJi_BcJDTU3?{F)Jn!Mtg=gNqQ54u^s3{BW$!^R>1d_Kl$OU-gHUL+ zdz7~D?Aj=dM4(tv1II{(+)ej>+dr`JzGoPUr0<1U3ofX&7Tl@HNWk-sQ>8rwtP=62 zHE&D?q2g3IEy8^?aPg96?6aq@QPJ)w-E=cPidLx#1FaaN?q8lf9RJ94DX&tBtGM6X zRy2o(i@uM#v2}>xFU|?glo?>s_ei_7ueleE4{uE4v!t^`?4h$?eIK_mKa~SZ%#tO8 zt~6&%k)c}hn=xPT4}bQJO&>>_6S9LW{Y*ok+1pdFLudoA4~|3_F8p&RMo}G2@QW%v z4VW{>I4Fn`mm`StRv_Zu_-cs0TV*uUyU*lz_KIx$yjX#SPVYG-#5PzwP@7Dd{#I3F z;iG6ePkB+QSon^1Zfr;683n4;gqiZx?--FWj31Znw0BY&G>lfxEQE-tZJ?bUqdnNF`lRO)WaV{2l>K1t9zY zf>YtapKjp{SO>kA)hXXOB+Zj48t(6m0mKww#=t*I=ED5xHY9X+Dc^`}@0*XtI?MQb z`KqBs?ym>T!+!BayM95w`r%x2{2tun`Z5ulwT@=uSY#kC`5{2e=IC~_$qSN&lAUn<;Lb1iD}Zkb(4-K z4LXc^@d9RdpkN8^W1xaR#ngYN+XWNTg&&=_#!M~&qo2geoD<#3a3@Ma{QOA<${zpk zveb!Jr?d^UlSan=OyCTL@j;aWI~p{@IKNs35a*V0i1p4T^Q%jl!X>{TcLOven^;Z_C4i}mc?(8JhtTUx;JI7VPtjQ-FI0zzXD zQ*bDyJHDlYlMCHF3=*Bbcj`%bl|rBVBsVfsxE8f+qFCtc+|l1vagJlIcmw9Dwn~0u z{p-z71Jb9vhZRSxtasKWk`4j{SOk=_i&@FxlGc(eR9py=|&NJJx(IJwh-Bq~DAxGp>lxVYF0_YTM{ zEt0bmDcVZhb!${twR@y4x>B*gm_taP1<5*~p5DzlGh$`Xim!#?sA!?Lc@1!a4X((6 zXi*pYU?If&U?c0IP?hPgmdW`{VBIC)fn2eK1hN|K_(4Z!ScLEtBeS@0-skxYHit_0 zTLhKjhXMoNC#x~pj9FyN<L%#e!I{_%w&57Q=QlcT9W5loxby~?*ZH!{M;Yh% z9%tZpnHo8j*6AkC;RnWTCoGcdG zy0yx98r;fwm`VjpqiDnyf~&Ht*FuWX^XVky)WGgy;2}k%NgU`-kucvfvC(QKk0FH1 zA)y594p<~b#Xi8u3iDEnr^i{(Df=U|cDUyOdevOAa=eStNHQ4I?tQ7T4IWzGPpo|M z3YOn44C%epKeX`ad!_CCvC@j5_ikm}*Mgz+pMgOz$}M>ou;uNNfdtQDP@cAu&~-bE zkle8A<@V%}!{Z%S6_W=gxcfSysX!$0%BbLw=N-cf0j(nn(;T6M78y;007mOLbb!&iP-F^~&IpWch}EX=q%y5Hz|hC^qU%;@M+87&5_zO9%p2BbpwUZy zNZG6PIf|L|-m7aE=UuV~4BJ$B`iGOT07^f(c`wz!~h$ZQziw`~2n!S};1e2nb zBZK!%0Ik3y{zzzZl9ns-1GTl+JaQ0;_)HRQWPOCHa_AUS_6HV;+>dIV=t3HEKW4NL zVj;I+k$=)8+x{j-5#W)h(JfIi+U+?X&MR8^bhYyh0&`)W<)4!X$>+KC8(=Qb4{ByK zVhXZ$K_um-=73EnhY+$dFB@R9lobM1>G(^?b=B+q&4UWxD1|C>I)DXzo6ohUC6dWi z*a}^6uAf^$8F{|4GC+07;IOLXnC6dIg{H;t*=l7#o+7xkDKj-aU#3)IVlv8-K-oD4 z>U`=g7L}y9Vo7CVg|hjtDzc}OG^vCH*)kL?!4-Ki&j4ekN}>`Q|Hxw17M6}&H#w$# zJ*%9K)N*B7r8w}DVZb3}#!a!!fIC!~ETtl4qLRl+C;_qRXWAmQ$Bf2FBjd&QyQG9e z9Clns#$Og)@qU7EaX_Eotxa4>2^#aAj@6d2UKiyD6PYK5 zW!N$A7m^MQX^m{f0MU8{PB4+MQJ-PY~S4J^-*q~rv^V{zb+y2s_bhW;VuVi zWX&Pwr2Sb7SCk!n+teK)&#hZHG(xvo$7<$(RmsA#Jd;SvqlBW(dW6z*H)h=3HA?O0 zmq`6okPY`1RW|mK)qS_Sdxl@U`{ z?+JSyvkOd7RzRaGw>d?8ykJ6Dwgxee4^h~HjLz&kct4tQiUT*d ze>FAQ4d;GHkV~TC*qU5G?0nE>GP2G-P8NY4rW8c38{r0&rg+@k-k0d zr=4(Fv};VV7I#@p;;{^sCqXtd=tGlmNqEsLF^#79L;ibvyayhh9Cacnl^##eNc1UH zzN375FXf|AZ?9q^@?9f#BC}e&Ej{)C?@SW0J`J{0<60{+^5z#PnZ5sSrHGQ+qjukS zb$QCb3!>#D3tTY}$k)u|{YVE2VmZJq*pb>2pcPU^=14i)g#r2THcM|+U z$;111iSB{R^~Z-2{JG5X4oPfs2h&6mW#f`KkfYqxF>0kCP*O%MyS%Mhq%nGj=#j+% zVD@~x+B#Qu@-#6Y;h0hfE^XX`7S*G{@>?Q&v22eAz4d25Ud1<)p^q53Rc)W1zP0&F z6WV1jDqz_r3{t(7xUzBu^|ii`$>5o7pC0PCvF!hn{Kb zp66YkOot4aMU;OT1(-*TCry}8lmyJaE)^>2kAK4H`%L!>SkG8*Gv~OmOem6Q4i1KD zroVC&NrATwV&f`qRzrlNNie4n`u1KJ zPfTX;kkM4XB@L01(*?Xe6)_@roFJI??w>UD1iH_oe|)?lSnMw`EJKvxAWdbHq65C~ zdHLmXrz<6?Y0U>93O2Ba63&3xP0w9{A0Z)wx|~>Vw%@U|qCE})>9pF)YGNs)xF2fK zr-eTpP75PvH64=th+=Sm3@BZZBkvmP0D~%eEs31zouwU{9`{pg3P&HZML`=TetxqR zgX3i}JZs;TW-mwCerx)Ja)yfQe08C~?HGE4zf*)NMVRkMk4+3-$U>TWsjN03GDKfHDl5bRIS{ybY z^n2*T=B2456g}x(6XWr4I?V;&s|lld`&J!h;Kl3ysii0P8?;2{WlKgQe@pxX;z+bi zuG}0Io z_UDt!XI#B@8}xoK9=0%NdoY$V4XqAy zLWQ`|OL=&v=3&3MGs17ot#%g$OlUBlW=SVH2Zj z&*(yV*di-@-s&bZ-`(0OPI)sa4);VW1yOc5*{dx`0P6DFzmK^rV4o^yF~q4kaBhDj zl2lay&YQ+)R=vFoDUKl*G(;kzmQx5kzG70O z?=zW!DEFQZuNy^&9S0vgn|R4x{+CZ*KJp#G_WH9&S4gpI3m3b+yTbVly=vfhdJyEF zIbwRD;zWNqXn^IFjdsEXPVH_Ha+5}TkwdYHiG2GcP=Kh889T*-YVARuL6+*hIrEc_Pyq-*KpX5Pv{m>KwLXLc!$-JIWNH>YZK)RE}pL=XSE#|q-ORwgH z^Y0i5LHMC3RCY_R3yAgOSq=7(pnmykd(8(>+^HQ9Gd-sv2IK`7i&k8;WXHM@ZSE1W ze@}jfYs4N7{xSBWcyvTXD*$0>4&UI!my<5)R~RYMpNM)Ymb`F*S<1o0alok7K#(rZ zZT%0Au7TsQ5~KrYR)V__5^WaTxWG5!aC8|6W1@8DG*3%wOngI{P)R~hA{}P(wVz}` z)~#{QSBUE?36J7?Z)VdnIB5 z6vZBn9N2M3WcGhNpyT6lz^_XW=Zfmp>H4YiGtbdssat<-Re$qDOHIlbACMP{q`yy$ z6F2BE?vJ*%kzgs-9lBSH7B>Kg_C$LpQMBsx%}lYLbq=wks+4Xc=Pmy)5+Q?bpdy+z{~WjV6?;bd#e)yrZL z&C#Y=_J_?hNSI-!rqxe**PmL_Qh4b?(la*=OG8L;yk!`##_0%=doEB%&-Ux)pS3Xq z0bN@SQn;IlD+;WW3yEp5n8qY0+54pN2iu?N5}+sFzU0`7$u9KmUhv(>$j)Iv zMTFs4W|N9`;6YCr{Y|kygpl;lwIbl#ANQH31lzDjCyS;F5#>Q&KFWCl1qbo5=nzRi zuw_aq87L&&)-<4KUd(@3{`kS|Ud~ep9@bv~J~@EC*U4b$%&$5T63Y&<2wMt<>m|bn zbA2qF7(;B_>jgjU9T|B@JS-bl3@K7kKjBnBiC4?zBW+&~j}*dQm0ineB^Hzqo!&=b5A?p81j(FLMt&|ZA%be0dfI>m z5#A}!GER>AVT&c;SCA}dDyatrI*Z9A=Vq}dFGBRb%>QB1LUHk&Mv9dBnfX1pAl|X% z5}VaSe}4A~AzhhZU{`>PNgW^*z5~SIX~SuN^T;gh5|Yi>0C^^l9K1voE9~1VxNao_ z??3yIF9V!K70}ud@=lG`BG_xoWL8C-t;ho|t|F#WrKySg7RbmYDaI*h;E2xVaO^xs zd41!~E=Z;+?O;=t1aixOVG^4LC=p@Mgmq{jXR~qx96NausZWwH!6&I_jdsh3dJrh& zVI)Wx+yt&IU8eIQ4~ppy|L)CahEj-64zpb*{HQLJ!v*J!L?(FEQe7mVNo<`E+$QwY zXo!Tti2_2GyeG3u>I~4&(nK=x#a0XPL$rc8uaZg>>&1Rvdf#l;jI}s*rTV5R+l-8p z7#Rzt1h#5t?zw5_|0|If44qdU2a0pUXkZ^n2(UJ!5(!1i`wyEyjCqHQG+2X*9TV>= zX>e+KH`1 jca1WIS0NPI2x4Ftq_2Q^Pio1-PJ5nslXLCN`dbq9;FU(|SS-qJ!rl zYo}Py=RC7~XKYq??rfxm1Ak@2)kXI_r`?wNTR$z+9IuXP7i_^zLi#UV^C(;q8|(?eDG&C1jVVp`o5Y?) zw7oH|l^Ptr)T?p_)%&`9zd!<4^bP!^k=x8}pRME4kZsBrfKTBG*(>PF^+zsU=G6Y1t>TIRi(!E0!QdBh@7 zdorY;)l_w?PY=1xBtx0d$)Tm7*>whuYf#+#nGuCvqA-|N~4`+!TxqAmWu zg1ruNR#3+o;xQOy)d&e#tG4s`Zu$1-Cd~^EW90H1qpmgI44z6Hi@-EsrB2TK$Lo3c zA?_;o*v0+ zcowtJ1xPL!d*n1{Xttu#lqHzIIJ%4fZIT8-CQCG0iZxteIYda?P}sG{PV)gRB0qYW zA}=$81k%MpG7VuP!h7h)c=QRm1YXre@0vqy&*VDwfG{`&C1y;lXM%FAjnHC6AIYXp zL!-YJk++DCMN}f5aq@6TBA2-}R)byCC%qOJjzzw>i!g@MQ;JiCdMKxOOU(w7F)7RG#y{1beQ|igvaqs9f7O*bY zn#|Ug_-MC+av5ED^DTzTjTj!`0CYoVu{&A49vqfQn876mO4eltF;g_Xc$sYjked-> zu?^Aj=+>xTTy~=FDr5f)@aM3tFxNzHY22n5B3l8e5zO4j(3*9TIag*DKxqpIGxE!` zfMbeSBN=(JRXS`j0rLmraBWW8lvuHf75Dg_gILvtK`>XeM$cg(ESwMuNXUJdTDvNs zamKVt%z-4!*MC5OPcS4CV`tx_m(5G!j{-*&na7oaB&LWlaDvpIj17Yt99nSHU_G zBRAaO!mB^8|?PkPUts0)NDFA1xFb=+BM#8@C~2(Z;Spa~POZOzos%x?Q8fop$qiDo`5{GD;bT3>yj0i|kj<2I!URq%}U zqS68UHe}cQ^1*<_7HKv9K?I$Cp>$YLbl-DyG+YIFbka_934FjW8_#9YnPg5j$H%%; zTAtt-+1R)U%UbcU8It08h8DO$R_D|Edf&glzc@H?iW$z>2Cm{&RXDVe$KcFSIRXy+ zy{$XYm!(54vpb%>Ou=9r4&`GCsk9%&-uQ_DR~MIANDWO78wXY>j(fuI8uQ5Dxp`!0 zPN9A_xd^kS@yb;D!FkE(I>VibUL5O~1MalpZ#2xbxAqcyF>ocw_@I1pgCFiYgVA0{ zRUWx5eHyRne7btqK~n1&{>Ect@yoejRN*EmFICSSy8eB?{ma77JK>2!R$!Qfe;Jgd zr*k|i4i<`@-L9Blo^^yvDb%n>J*EEhP=;Axc+4acm_%*}jq=9fk9GL+4AxurBzm+c zsL>peQF1EtpbBGgnSkR_r&(AUzPUaFe@nX*7W^$4>_i(XjP#U&Y6 z!YP{p?d0N?HL5%-75AQ)QmCcciN~{V*d$DYdv!(9cLWhH&Rg*FVYo8SY7!gNEOrPm zh9^CyA2INXJ3Eb?Ln~Hl9u}6GhlIeDd>9*MaNth9q zC~=05*22^_nh;XgN0gvf+?-*v|O0q_0Z0p;wVE06fpyFH1SQ5O7}OjP@HiV}+`$W#PJp7;`<1|o?$cgn?(yAW3wpd5 zrJsFo7;+w~>0Cw#gc9KR91mx74?iH>mQv(g`Zuk()g9A~yFk$2d|#xKlJGx}mdnQ= zBSg|rn1HRS&UhFlk@F3rMc{@Z%VQQ*c3tq7(n(vf>O}6P1q7afQ zY&YNLWHWZJ!JmJP%Fo9H6R)|ONu3i=s>FkHd3inz!KOp0E)wkpf~b!fu0ZtNd^i6i zDz&kVd-{@FZaa8GAx~kwAL~hTu|n>;rGE_UGuL;?tWo4$50t!(H_LMox1XAt{)qEF zG0}9L?!0y@(9dp!Sa-FAdI>2djJdnP4E;HzKhv(5!N15M#KX5DI(8IpLtkSteKd;C z2qqj@C%FMGLV>Jhm__)D>>fKL7OxNp5@UMq48o$HF z-R>~UB{Zud>)Y63aP+-}!gAV1AAYT?p_fC=w`(tQpv16r3Z-;Hb`+f$!3c_kK5q`c zH$)CIHO$wKosD0JjC<5eJW|J0iL#|o43_K0Jr)5=^j$bjhWK*)gtpeXJGy}nbg%Tf z%UB`$&O^ar`%MKN*^BK#K0tDWyD=+lwODs}3(v!RHjBO0ZYZ4D>!%m!anLEy=4A+c z>auE(7;X%%z7Pk(nRjL|{z6?)g+o*8qHQZ<2YWO(j~1fwU|vDK>_#ibkr9FmbX;*D zryHyqh-2vivr(OW_Gdd>k6xCKuD?9*@}WZ1LE?b!3)C^KT)dPpvpPBJ)iidl1&|YltB=HaB*NM4!-?F zH6zGtu0%KIDYztB;65;@J>DI$XjEFmE1Mli*;DcQ3HJDnapr{lSpRO%_Yzm<`^b{v zs{msy2Y@&{t|c&jcvYxLuY-SX=y>+i`tyhRSuh(~?lXU10qc{~h|`tlW?#t$LK47( z9xyn7T+xha@#3WGm9GDNXqa~FPfHdo7N(s67w1q~U5tpuz}bCj4j$}{*%-i0Nh1(z zt{OOz@$(h~#BqjICl!ul0^k^d?DwatGl3_FHos6-s6B& zadCs}w3TIt%_W?a5!bFCgNAy(ksLmqo4$YbawRXJzoiKHc}nD$ zni5+n=3+;H@AL;TS;g zJ-;%6tf5?#zl^v}QD-;=+rpZI93Df)Svuo7&LS(dCZS(Z(kDEfy-lfy++)@!jl=0} zhwFpz#$N2@`!>ayjp4LG;Xpg-@~+KK<0{oa?Cr%QbX)|j*a@vv{B}t7B#!X+`Sfn` zAfCln)>3_$?M`di?M|6x%X{o*LtMPkLmP7@3Oiw*gt?T{8u{0$(UP?OFIC03Gly{LrzUEg(gFmDjt#^(u+styb+ z_6`}V(1O3EEr`NSU?*>r@IplLUuHHeaQ<}jD&}U*we2pvRrV3sjyNKz(xT{A-y?1W zjg+tyS^54h&l?N|Mt+`&y-d|~d_6s5KDA|r(6Au#lfwpwqnS+)X3CUFZBsQ)XnwWa;~SNQ;KNms zDh0VL2o`YpUg^sc%}BI1PPMO(BZQ1I+t(^?eVyCS4XUWsE=$QcRa9SoPHOdwL~tU0 zAx2*DC_?`3OHr%@c)ael^rX|u`m^WHPuCkj@D_IhobE)a5?NbECQWPBjZ%k6HT%v; z3p5z2XxMXXyTQzs>dyVyshrq_nXQqQ9TpXqp_rf67m_Hu`$i-d#bU z5gmp^$~;gHkrp_#2mKN>7E?}-o-$jaFmZaZ@MczKTj?K#s1nm=8!Za@4VtuE|3VCO z$IBmwNF0d1uBN_~#q3W31w4=#l1^^lGEgFo%2o4lmK_0WXJ{MTNLnL7)R017`$OUg z-R0%CO$}V%KrLNsUcAU{0D=+*xF_a4L4SqHoe_NREC!wzL3cWSn(k|Vfrj+(licjP zxsY0&qwul=qTxCDMw|h=e5+rS#}*Xw0BhZuZPf}WWIlB}$Oi{Bw+M005t=MV@$|2( zs2KB5%r+u$#c+ER$?L{R6L=;!(J6AU8WoZf#y* zhvGfU8u34Z>8rj--N+T6m%Rfvg|rJ7hLJqW($b3A=5C?Ca0jRd?kEWS={**)AlmK} zgwTmI=GcbZJ~l9`(>gXZ9L)@oI_P5BekYl-n<+7JEte)Z5}U)j%)r&P=}PEeQgTzN zPcG@aq0jnVzr;r-rGVTrqfk0(M2|`Gt~4CfCs4PS@dDlT&t2%Ztmh~ws3D-5kK-J$ zTy8tiYxAo%Qi-$JA*$+=EHUXVEFquk*Kb!MoS)PkVXj-Zd;JAV{3{OjUnp9(|CUSy z0G(QR0*Gkr%q)O-UlEgk@P~hCX@Cgezl1bEt|A~@|G#7kSvWf>m^dmK*Z>%TM4bPL zX!bAA8X%OJ`5*AwU)~@QfaLk##_)2me*51eczqV1II@7zM(_M;YcNWrpA@#1EU#oQ zSA&tTg}@?YArgp!8H%p_7H_6}j)J+SU*40GlDcntW_;fL-rfPR81i2xzDD|wLX@Da zhPTlN17}8)*y4j;Y{r~uN?}9$X}4I_xp~VI$%Le+=WkS-=eX7foJh4n%qzY2*Bd#| zd;Phfu9sZQ7?`c?ldYs;ZDY)V^lj+OGH~J}v&Up~;qDL(d)VrEiXR4cghJcByUup@ zos|9=*X(mGTv5~Lguf>F?i2frCU$zmJ6gf#xcGMhJ6=q`Kr+3fn5G!MP3u<}cvcW> zA9Ye!FM6t4Qiwhb(^PI&r5sq5+t;LYfgDW>WRNPHL#3_|<7~4%a9H5Cg`Bll1%k%a}$GsvO=K$ZyDyD52nh7QcddNToPKq+sjK#G4 z-nc$GjhNLEVtN&uk5(VK6IpdAP#tyB(1b4bp+Q7-LP&#l(qyF5Ds}oC8i*NSdIR;5 z5T~F6h`$AgK_QdviB3akwrh|i#5wrVc1|*j?zbe&N0mE7UnCOh&o96kbB8+ z%f88WF{t>g2cMiL4D#p|P%747s@BSDYp%4HpWGozHD~@$F6V!~P5*}d{|iI#e}E^% z4gkur1MWa}08i)}fR6ERI^cf%e}&8awHE&gC&d1bG5A;2{x_)8|Ft0h*Mj_C3-bRL z3-TYSME?gY2rGb3`H$83#sFZId}sfc{{Lk`{#uLwY(f4bB=j$1<===V|Bkx)%Qg9* z$SERL0PyzTXza|Kod3^A&3|)2gzaqX?QH*{6mv340-S6AGwmskV@s#{P% z0>El@x<1c)VLp6$=VCxWFuM@twYA%wE*D-mJu7Q#6`iC&{ygJAN~JOyU9MLVsFb=M zM=2(gsTJ2yKtzaqkmfcvB^zw0>3lw(dU|>1r9eQ5SYO}!k0#S5fOvU%E3-g=BtRg5 zh;p#MZ1lCXHm3_o=)eA zD!V{|#{Emfold50Y;7q(jg5`b(9kY7+U)f81zhxi{F4u!-Yh`sEfy#`WxafCZ0IZU z+uPkI63O=vV|WP=r!dSb>p_6>LDmT^>YTlw%5=PzDplT|ZV0ilvHP->nT$rGOG`^{ zw@T&nbd{72<|4bhd6wwI1+S-OX4JMqu+kWG);nFQ&KZWO7#VkZyxIU-evHTc@l-6X z{_n#HRmn~mFt7M!F=k-_((ENetGp48I%J>upp#w5OK-K#%9snmuKh3o``nohr!uCpSKyS6WWAuB$ zHB*}$AD2*5OA7*((l<9h-R|+K>_P&Cs2A|?=s3q1hJpwxlgo)zcR!iVfh5cxQHL}t zYP|@POv3ZU_u<{!)n7)Abxh z4WK#n{hLRQAb=eGf$8$MIxGVOh+Ye=({S_K+SI!4FU;W=7Z=N~h2Q{Z{P_3)G!8|l zSsJc?b;(x9At@=T^X)%P`X9=$zugrCTpr~6YJEThku-6`D-L)Qq>ASx+R zf^|rfx+%> zg^q#I($ex*_y40UH{DODC_l=~gb@(?*g+h~uh$PzWIsk;21+zPxi$V|_Qc)2YPZ;& z2!#w1SKy0_uZmY$R`zir#LNsdY-?jP`ThHWsGS2m`VHAn4-`6XS=?e?#M$xw+qZ9l za?Rpyp5)5K4rd5=@5!L2iFiH!^J(e_;spwrtlF!Sow@aq?DbP&uip6xuR^IC7EdF z^lSH)h$_lH97)S19`BLHpl|v#d@iZHYy2*w_ad29(x0fs%&ti8DuirJS6N=MNNmX6 z&z?K_Gi7>&-Zc{5!Fg4R=`X*W+E`K&13oaV$%LDd8oJi4eh?gezIXLaJ#gjMQug5o5mC`@nK7BOz0`4a)1W`ZroJnEI5)ja zIC7`IWX34gmMFAJkCo5Q<0qi}h;>A*>X};XH?2JMGa_tP%UZ$>>(%J{6Oe_q7B%0& zLi7#S0|kCQzWsISgk0j!g4nYXn~(Mtf=(BF=0}cMFN|Oqvl2er(+8axw4*m^m?*wC zA^yHSRCHIApVEhsnB^^^2G=2q*l%QF)*u9%%f~56+1VqK7&)i@3z8V4yXpLl5ucHw z>2vRd>cyQzLQwP}(e#Aa@E&|1r6f`oQK)|j8dPc)h#cfUe+Nfk$(-=4B z93bVhQvZ^2(56(2Wt#?LgyP@#f`E5W2%!wP+kdG4e|!h7en$iT7xY1cj~~&4_?Io3 zDY?5Bp^WU<1O$^v2F&$TnL&&6Yf{rd_nWJ80xVKaW8d5B4$pEV&@&7&DK5_Et)Hh8 zsL>nRP?dB7+-v&oQCiLJrFYeKw7g5Fs{wU_+kk^Gkj8s6&EN znL*m%Nbnjk6Y6PXZzU9v{u*lzX&MT|_64wxQ0|@xq*i}&FlT>R%l&lzQuKQ~-b!45 z&l>#s4_1iKvl#5r+1X~`i2G?nT8oP8CKQn~1)OJpe=qg)^z6U2D=$C??ORylx*7ca z!$+AYPD}0AI}J-T{9x0`8x3{!Pp(Vt2a=G|Vx$Mm5z|nqyyxg(b*O%k!B6}}Bph5^ zI5|0=S6e+bHKpx98Sj?+)_uA?V@Z}XcyDi4;6$R*J&u;|x0t0*T(}rU;Ao=>X*%SL zTy#JmZ^@QCfD5Mk>H2v88c*VE^k;9Z9G8C70PW4bg8!r<-js-V29bainS*a%9JzUe zqX@=bUsnR*dBqth#^&1%5meiznWi6Q>) zjhZ~HRaI4WbtiAGFTK?8^zPb7MBF@}{NWR~;AK%Xj6sto_t%j2P2kTP|Fl5XadP}s z3bBAq|2L8He<2KUBLS`lGvSXDnVI50CmQW;R)jEFzYqQ`k2$15n2=%MilxjgqT)S~ zT{o?P*nLJq{{-)PmxY5evGI@3qgYgEcZ>OT|MHqI@2DrYww7WvvAN|)vd@~3EK*8w%tB0FSizCL!ja72FbF!7p#lka(*|<= z3i&S)MqqzMnxH`3G$Ri{P|^sdAY+@9yMj_mL^2tIf2+{lm#fX>bl;=thbjLS#IFbq zxFO24sU97<8HD#I#gAMUbVVt=;`W?@2Vw=bS!1RduO1h*T0wZUnb$TCH!wsMq|}#H};Tb zRP6We@qR`IRX3zNj#|*7&VI$oW; z($seu?Q&oigpoDrG!jW)j`#7%1G=CH;l6a~1QDAu*5;t(D#s6X2VO-lm{*1amp4b% z$i{d@_E%_cy_dVfPo{{gY=#Ugr`R~eGwNm18|cKv;%2Dm0t+JRJ>d+Ydyb-HMt#%H zEM0Y{s>eOAjpD=|j&;>MSpv37UQ`J$HxS;Z<7QNr^%r`_At{+1f-AN6=OP4`&d>_J zt>0uqQUc4RMSG1z!|P-p!kRvQ6{(D}ajzZgZrNQhU1GlA3U(1Oh=X5q7o)hg2+rKe0qZPqvmN)J2e?~S?;rXn`BSw!A{T!_-AhPq(|gfQx7Bhppy?Q0pQ zh34-~J@|rTcT;tshavcvzDNBmL)u{7W|2Hcv+S^S%DQgeYwtj4NR5Z*K}_B$qVM~P z#aTy%sJrThtQy}9!!C)8P(Z5kj6aQlY3`prVUA$^OX^+VQX7%-9H~xs#{Gda4xOP3 zR0{U@q-aQ4m_d$WEa73HGLHn|uWZp)W~ImxkhO4Osl#w#cm;S%+z-jUtfs`qzn%iI zWDdcUg)Qq0`aM+S;11K-Y)@1&MC(s7gtSG}VICx&P^N1MEh;OBTB920??r^$Jp$eM zx7E69P!KIr)8$SZfl%0tN@Trx^CmMh^ZomPjnNPL9?m%GOhQSNKnh+bh4tX`+R=s_ z4zLKi%r}By|A188@xMyZ8%y|y4<8(-D)cFFg9q$XNNAi*9tPkH7?@)nJ6NLA?=6+3PA+M$NZIG*@ z)n!41aJa&xb}Ta4`3>4+v%Y{?@4f=+DELdT{eq^e6*PLHP9t zw+FAu{#xJve1(*SqAWeP6*iFMfBA$#U{P7A&=}FtpK{aZHxa!^0qn&A6_hLi)$;zy(&m zzP>~0{BhR_2eGf*OZv{}d(KWjTvX=fKD;y$M#eUK4pg$8`AkFVGXx`ZBgTOU8vzW} zKV@hlGdq<<|96zU=$W9m^bY5Q_Aci;Rxj0~;zPro4B`n#P;YyqE7qVVr*YSwxh-Q$ z7ln-Sc~d`WGd5xbPdr7Je8dhvGgP);IF#T(9e6N6c!Yz4Gc+_r1A$|1m^8MvnS>(Q zb#*w+@TA{oH*QpKaV$atNZ4ykbaZrFT8UNwH{nTMsprue-5d^bVPRnn0*FQMZQ8}f z1&2|6>+Q`YfKo~Vo+?Ah#5~uOH8jR-vx5o-M|S_5p3(}rnE@AG`&Lv$M7tsJ0Z3t) zn3yU%ZZ2B(+UI6xOFj`9T7o6|%#)2X;r%5xP?QjOb zb-3EkEC6VAhn9&dNzvgKm6SZ@;K-B=k#Np0nR|+9SgMdf(*~?zm1TdCGsif}fZ_3^ zRDh|q_3C_+r>Ll?jt`&GmK#`R0Ji-2{Dxsy5W$ii_%=0F&_PI&jcx1V&lYGo1^{Ox z?*-LJQC@yHtzORxaB*`}_hNYZq%?I6!lqXdxSr_~5gEC9rIG%-lBT~ep7QeILS9~8 z;`Yk(_s9HvIaSI58AqGBdgqmZ;H~n8vuD^SA91O1KKbk|60_;Tyu6BC7!5YygIj-e zwY9~LzVfPOrdT4)zPBR+OOmg%esr!~M~^v_9upfoVCd}ZYz6(kljd=LXq~=tkOv1d zJ9({ulowG3GHk(slkL7fc@7E8oE>kF?N_#)P^gsHd#ePb{C-6!L%H7@OImEsBV}V_ zqotKtqAEr+vNrVo2**w-rvyhs;$nkg(bL@g!(bil4WoMOLbi8ybu~}Bi+y*_nvPEr znLo=AURBVgw;+`k%g461Ab?}R9!T9Ldn)}4C;d-v5JOkAo>&b3m*=tm`}y(zZvdKG z;9mf=Fwg%l05mrbFZ914(A$|9+S>i6{d>FPvJ+&9+5LM28Oo1pvkU;~zyxXJ;osxVk&$Zv_MdTxP1nFPA{<-Q3tH zBrI%gZ5{nx9{I)oZ4ONWmGIBi)$q!tR=<_?bz3K=?VX*4@$sKOe&B-_C8Z+(mPSYE zT%O%W?UFQZ@s2N@u^q{3YH9-M4lciuiOG;njYiWMfWCs=-MJf}?OKM0vj75|pO33t z!oPP7w~4vEIqd+qtTgQ)DKvi6m|k94Y1-t$B;UoV6Zq!s+wl)Jyu7@TIFthZ#~a+1 zeP7eKyI)FeY>`n=1OudYdRgHW)%nqgWNK<^tWIS)05$;v3eamk9UWrkPoO7}xYQm# zK9!}VbG7zFU%(UB#^>j$6v$l8ySk)7SfULpP;~S1B36;fbyZDr&MzqN_w|h`Yijo= zv$U`PU4^0&SK7GFH-gBXLVI-}k)>(R4Egd$CraRpmHRP1U>ZlVCD)Tf4t}CVb}v6| zY&bYLAoxdX=EnV^9S?IVY5F8@uZTo%O+&7ks!75_qxb_h$C<|X7%$djXGqqB0Mt)x z;2HD>N;rgx9I=~guXsH4CY#yanP7fVw=%C z$u-_zKg!7#_xM$mk-?9+#*>`Dy5Gch_nUOPlg=;2BL;OOvh zze7hy2UrDnFE6>w!!JZbg#N^^y5QhK05gY$?Y$1Hm(kSJ#HA720D>fA3|{8u=KA_+ z(a}$Vg{Y~i0r=e)zdygf$H&LdAIjFe48~I7o13-C#N;e8rlh2VRXYh#Vu%M~XkllS zGNm&e8*LQNl$EhtTCQAOUHO!Agr)0fYqz?u$g8WXdw5h&$1&r}qV;8fV0L;LEKMaa z+TKHxQ&ZW(p6ExDv$M0u$1oEUldD*l2j83BSNdMRetmZ4i=A9W)*$AftD;h#lao_h ztG$i4O=O4VUHIjTlDiQ-6|}sfs0e|Zosv>eT>Qv;C^r#cYdsrNb90yb%iSU(B4lKH zOC7XwF-_%(iA0YD`T3LevU>Vnsx}2*ubnPA24B~K1;SWt^k< zD7dv%X%2mbB1X$&WiKtA+sN@SoeBR;LraS{Kwzzxt7sif01Xv2YVIMBkQcPBi><=` z6Fxtra3HA$HM;GA=bG~*ck-AyIx+EpmX`Lx18qY?rO3-xAUB~_U@#jad;$W8OB0RX zkaHvLQbt{)I;9`MCPnVWbmKKpMHW59vm^+gKKgkC5ncMwtB{&pb z`$5LGAyvx6WFhI%Amky^+eo}@JuNK;645!CyDO8DljF(rkkrve@8ygIdssewI?zvR z`N9E?PEH;Vl(}kph9=!^vcF$JL*oEcsm?&PN>4@xrTp;EpCAwr~vqQV#Z`{?xa^zhIhcsk&A zfkA+RXGQXS85rQ^=I-nO=EL9LABg{7a+~#SVj}6?yLTBGUw0Iz`xpI5D6u{k78ZW| zxbJTWusVDDV&8R|Z?ZMzw-s71Fcd&JAz-7dyu7=+du+hNHHMsw>;W;cJ64`e$mLE0 z1|E&9yu6LAZR|G}>1@Q1^ZInvc4ddIr03dI%a;eFq+CWed>)KotfG>Vrb%mcn%#01 z77xA85C|!Le*OzP5*%KB{$7+&5s-~3FnGaW>Ro5;6&0MW?Sc;8Gu75Yke@=)ev!T$ zEQ%rI#x#25K^4SoJf4~+PaS4uG3Dgo z@R$bmfxWRY6)r=4eSO;3*_j!j4nJad<{D_ld`E|d;#;8FY|-)erNhU!3E`S>Y*^ji z_%^698kFTh>Xg^yK&_F0av+Sk%3WHzOD?9Ui1qjr0RaIL5>n;!HivwVn9>5J;Ub|_4bXU8M^t2#V^@Wv-em_Tf3~xX@5*1qsf!MDb_U>E#0oZ6xhhwu$zO; zZ-r@WUaq=smyYT?om4nmEK@nnNMr2F9HR#+*%jDkH=P(Y3=DZa&Z=C}K(r{SL}2$2 zk^W!!pMgb@c-DE38YomM0&-}QYS%NE%%->ciyfjwhV2><5Fo;i{OuXI5o-TJ5GaOH ztn?LAi>}R;;+W4yiPt=9uLizussbe@hDuAzargTmZl^Xd2H!!vIx}&coXglg>YEj+f1Wy0Yv7}u5e9DrSaW#SGCAql` zQ%u1(t%$if$~K3~ExDrp*;&IyoF_EEY=R2YJeF2?c(`du+k+n^abF8?C^P0rNl?qa z%+1*%;QWv&$#Yq@$4VbJd4VsH^x2CT&MZCc#-kE!$drj-M`!JN>3-$=eQ{39a(&o@ zebOB6!2AukHfLwaF8Pov4K#$2u6JJikj;|^sc+siSw(kF6a($?(T3UCu_F8*?{J)- z+j^h=48(@;@$(~j6#2gbV(^JF8Ab_g3GJuE+cO4AI`sUKoc;HinccS8RV>P*LVn|w zrxXkAL86J980sTd7ox(#b=Rz%oVWNm-C$oFWtrA5WWT5UTT9l>gy z!IAcRqA4I~kUI7hc~w>1_HT3V(^pqlN8WV@-v$lGkvmLV%q1u(DfPu^APb?x7rh6k zs|GDBER1e-zl9{eeG8lzG_J#Yr|)Mk6+>Pfz#)sOiP>OCnvnD~<#iCI=gb2H(!|&p z+E`vfcpUkoAv82pNBh{zYlw&_(Xcr1WP7H@sNTtM{|EhmE!qn^T`ONd>VVtpUJsAo zYinE0sn=d8Fk9akKckloOx)ZDAX`E4RqW085pz*Oi=S!AxY6A z_^qu`%9d}gF&N~(Ca0_w6}f$C@EOShb+ivM>kupCmtQlA=@-A-0Q;XAYQX5);3D#4 zbMq0$PE=RYn<_vx(AUr)s7tXIN0>r9ktWqWI4^Gk1OzuLN=w;TSvLmYiErK@<)DmW z?_<`4xj34#f_dz@XOjt%o|^jl-SK(~-$jsO6c+T&H0^O053dpu5`fS)HJx8yo*uX@H#&f< z!P;8K(C`a~3?G|;8Rq=WB7WA!^(Eft&q6tq5@%AUAjS#Ri&~PaJLHsF=(tta`L1ZP zAjHFi$wUQa`Q5vLHf$#LQG@>8-u(Q0FS4pwLJ9{GJboX%)0`Z-uLq{>fzc$B6B9YF zgMx0GlGXUw9+}zr|LiG#n#k9FChfV@9_AYKvAkTxSupy;_5cb7?(s+ZZEOp@M7x|1 zHtzd9-)uoyJU^e9ii(Ps*45QDVJ|W=vZb-nZ4iG5laz`D;9eW+;NYYz`kwql?r+BJ z#;&6|29Pv75*(V=MrYq1W-#$FTm%k-fOk zP)_*z%li(+#l_OaBu82x{X}26hromz!!^;+te<;#gBe*p*)jVXL?aozy;kpofM~&G zz7Z9z{hhKJ{Nr*rsd>~ZU30tm zrQiEcUw|N7CF|>gMfID331VHR(MPP~d+pucgGE)n((K$c;{0=uWNW(6Om+Z12#hWY zGBP)McN{SH&sq6bgrzs{d)z0rZnW&j1%enAr3kYHnTeLVKB`$xaq-+T6}s98112Wc zJ7txi>m|99T6Te2i*SN4cig;dpss6cYcT~2^+3aXb2QaWJ=e;ts)T@E$HWFaB%w5X z-jl=`H}Zi|y#>DBDJdd!Vo6CMVZ@ zW+rQ3p{Lg{S%B>tXL_!4MvY-p03T`h%*(4Tth;`%eWj?R;xQ-55tZd`tL%++dV0Ey z^e(_<0AUA{rZ_hb8w;!3)qH}(y>t|eW)b<)O-Co0HP3>Ib$7mr=mG5Vd+8U>AOgAH zTLB(Fq!=K3AgBT9!#lWXYw-2O<=)nh2KfY=XYwrBX*o+l8osNS3Yxz}-vyPtG1nM^I}4V&^lkm8$jjgF4K zKGw2^^jBFXTUye_4q}TX-=5=9=Ah?1d7fpWu1RTk43qt|tgJ|D z>z#e0Fbqo57VkErh*%uZmDu=r97NEUsrj0^I^tCF4Yop6lP_j3A+oP11uIKW!wRh>DxvuW7(Q(l4JhyaaD-i`1q^m&lP&? zp32LwfqZr7>sKvJ&6k?y9v+8{?lBM%tpawyDGZTTr-z{D^c6k0Eb|c_FE1s9zj^M=&DZyWuKQ#$2gur7RhE%#v0nFnsM&8)N8$z4X&Ol< zP~>yF%qyv>Rr;6^E>&4#AgtlQjus~^uU2+c+q2^MJCI;BG&VN2`0Rlcp*op|(6lf= z{~*EwG>xK!Z8^c|w=8+fetYfno;Ku{zJc(v55V%!aWo5bO())@W*2_!0Sa!GdQwyb z(tAEgeYHLjL<|EPI#ag=!NK4I)-Yil;A_BpdICgh!rsIr69AjVT7}9gDt>0` zmO`JNEW6M6yw%r!*Vu$OS;yRbIkQ<+J0rYT1bqclDm*Uk`zLc=2rf3oN3Z435WNL( zgj+})%{6sm6KzgbXJZS%5h$2TkfmjYEZF%hdhWrBU%Yq$qG&Qk09gLK1VJzm$K@3I zEp5ezNgZ0Kis(=ZZj0)+YmytbZZoyf@81iVCHat?Ap5!FU$2tc_*=M*ZLO`j401)C z$ZUUWuh!!o<@$om90V_@rn$!Kqkfr~$2C|a>4^FJ2(|Q1Ym$sofcC zrZ=EaiXMjJhHA!uz)C3wV)~SHVFNH9Z|}`TIh$?6 zY0t&q0yj6Qk=O?J%!FOD44*#TY-xW+Yqw#0z?dcM*#`pn&!0B{cSvX&083^(rGA?I>Bx+LN&SHFHW ztSg$e9{=d<=ISs;HRE@fklu9>4Gj~2(n#ttc0@sL0LggLQJIrNKz*$km3&HgJmFM$ zY&-OGUq@G$xgr3+C>_udEq{3~OZO+#fwU!c;26!$3QALEP*c!7SOZ*X0*&}%PR@ev z%%t303nHS@a4cbfz7tIG-k5~i=~hcA?04fifMHEm`aD=2Z`Zc-zXp6Npz`u@qezV$ zTpg2hbCGP#cTv2JA9$`i2xB3S)4poGdZoIjB-V6UZW&JY3e$0?*d;6sZ&oT2NftlK zhUuPzhL#Nc9=yZK>MOA3qxqGosd_n56~;~D-^2Zib$;Rv4wNGav9Jgx$1rkVKAZaf zy?6R>vK&FqE3_0mDD2VG@@J3Z*K-5?X|;D<8hV032R~O^et#Vv&Vrhw>9VppZm!iU z{G<^hWst2gK}*m9EJF~X$c8vVf?mYLNJz0uW!Bcpr3y4Y(d)9IeD+khMxV2!B*Fwd zZeWMgAor;=ezfMF*KZ8&xoJHQ{&TRxGTNs4E?0Ho)gx_neZ&GYgbnc0esx%k$5RSI z>eY!!M&9?Y^u$<&d(>TBT`a2FG!n*h^YiHelj_t!hX9lGia&H<<#MkE$w6Z; z45KDCCgx1=c*>NYl?vOGySF=g^XE@yjvZ|Ln=u84tgNhxiVA>%_K^?~5=z|o64VvL zl539-RZCZ2@F!5zVY4N@odtYYn_aTdux{6mD)P7?505@o)j9j!z+&6=DRoP1?HBRr%l5`?FMcNTT3m*;$c7;>7eEf(SaPLA$duzU_ z1X3S{{^;^NbM=TrO!r$sF_a7myKiS8k@QFsHcgw9d|+#cN7ZQ}xUUO8pVapOx0;B!t(Ib`j0?q)6lipnaInHjAM8v{r z0i^*hj<%AqFP3FKuz=%mT0BRH?DEIlsHKQ$tJ z5nUAJRj{^g6pg5^hE?W=2L9OMYycg{$E!FwvE#mVcXtP1?r*<3i?6oX;?MYbjhvmF z(vy-HFDCt*=IZ19-LRQzGSUg%4AorgKFzDUxa~L{wW%ikbWyAD`>UhkV*B~w*J-TH z!=rQQz()uE`AxhRgW=;XJ5ox#=e0tDNgx-ujCxL!FEcgkc=Qs;VGL?>^Y~$r?kgl( zbB&T9ej!*8sC*D)OE(qI6bf>IV;}ZO4fGYlh#s%2NOZE2NHE^YCLgCIqY46_yRM8KBOF=fbGk`%{q3R6Skw*sy0E<>uQ_F^$(}%ey_a18N>G1)uE`Py1i;$jSAV>dmp6d}(r9CM#Em)YBzvF#l3!OYilR@;<#J~EWH2xBy0jHi91A-Q>k`cVzhliQd)FhH%k_u;-@*Qtg)au7F@uI`rr1z8E?xs##?JmEHN_ zYVEkwR>#Wy3j-MH*w`2VQj6vRM0<$t*4*0%>V7EGa$cXidSm&|ebuh+qoc2BYBE<> zUrbJ(oGk&M#b;+41nS&@XAj|w%-@bqmpDXm9v#$H&oDzoA;(CDkmo_?Of5TFV$&I^ zsSTlBn2`5!v3!r&*g)ziZqY-ChIW$9ubXD&v$u{Fn!RM;(5X4+cQgSN+-r-t5aWxB z_sg+ewuu53D{WvL1W?L^A!kpB5jszvTpw?GUgga+H;crbb8mLj+F91PpShJ+wj0ur z`L+ZNync-j5#7C*?zrk&6VLk+nUP@}6m(SCo<3~m^fr;#8#aHTs~a214J4n3htN@` zLDZSzVwDOm^5{`{J=^;OOiMfhrHklfu&d%_WaKwb8Jzo}!|AIi3w*H;IoR2o{f?Mz z970dsv@7D`X#SL`bj>%3?6h&*eBJyu#zI2@MNBLNWy958x#>N|!fXb!g3^y4 z=gjp&0F4LO%CY5>@dw8Nr_#1V1gSK4}T>SP8Bnv^fOins4wI4Ms z38j&pjBmrl(Aub#foS29=OX{$@%oL~xwmI!YdexwTDaTRt& z{5)9s-qIY3^F%7j=G=2TtsyRsp$=ZZ1&p|VOVeUbRh5QJm`YQJE@TflQlHHh-y*bZ z2OtTeZa=FT>)EE0QvoB`{Zb~k1Wu4ES#Nj&L+l0sS) zs3nz_dR|=(-&^fdads}x%BqTsn+&<(U0Ui`ndyD`JQUn_A|rAcFjb9DbaN^~16jt{eCQ}HCdQ&l8Hoa^+Gc3HV9s+R z2!ON8q5VhQc&S>6$1xHYc5-uDP(Uayp!edfeXjIn^GhC1T}euLsXxDV%oSx5{;J{hU?7qIU@4lYaPfkbX?5o7JBpQl zPj(J!Um^kkv`C&wQ6e0V?Yw1sZujzI4FN)#cQn}tDlf-kXkZvKi#h4J-3g;MVr#9A z>StXO4>Db;yzDQUfhz(oLSJ7Wa1KhhT;$}2o4qK<3@o{?azFZ-OIO|{VYkOeM{}T7 zdVaLtBuZ-zg!5Ygz}^pKl{|;M@2jbC8O+2(1~4%Te*aDn!{9=i233bdD=R0Y=REH) zc6O+Ney4D)`})&tj;qO6fsICqqFh1P{tkB*8{`SV?Z{+WKnikm0q?`s)paCbx0=ho z9-y9|-{dhfGfPOcD=GcHtr)dHbIbC7Q8Ox{<8>;OjrqkVN;D zBtk;MfIlqwviEWL2fK%cIOyo;9z7bEoFr-60g~F_x-@)U#ATm?5i`}(L-<2T>HzWL zH-mg2-Y5T_C}CckuQ@c93fIqJK0c(M1Xx%KLu|vuK>V<+Gk8DuWqmq8^3DLTu8~Ye zHaBzKwt`~gx8c{6oofTU)aK4k(9fP|u=}W;KRO;1Q7brO0N7XWUA{A(p8@t;t!$m0 zodLTHKoD%&S4cH0J<)@+dl@&JfX>6tv4gDskg3jds$#)JFk6a?f|3#h0@2hrKRUwz z*E=uSW-OmZ-2Z`zgPa^%bJpRZ2Pk1}H?x|DgoL<|UtG>05Oayl#lcrqDPi4foHGL^ z8J~7mnFBMElZh!P47Ifx&#a5?HrGTtd#<@TIc3N;eS7$!;lqdQUey*r{rV2v_w@99 zix1wnIZz)9Yzw&!UhLIRcO((t-kzay(3|YAvaW+_Q6f(bY!U>AFEVRn*iJrc90J&*xWia(+Bb)Kz>sl$dzw)ElFdJ1-c| zY~~Qw|5#YKiL$hS2$VV5ggQHY76V-G!@o5wRcyeprgR-Toc#Ql;1=*XERv0AM@<(M zm7zoyHL%M}?i{LNU@+6_=K#UQ#AHjL7FD$gyGH=*=!X<~Ym{FaK593MVR_#w#e%QM z5NWe@t!CpNt=rln+#-`wQXW6f^`2}!%^Lq+Iwi_+8POnd6ZWe6v7xKqas z1EBhz$Rn0Q73@AF_(Q*QozSup6Ft2Vn=RxY9kd5~AD?#Q7urT4ChCowJkZQUeU?|l zvG~uqEW+M&#K#Xs;80dqFE*{0!5NvDG>DopA*!B2HjH8<4`pjY!>AXBhwXbYUsdis zMG~ba`8G9$aSzGP99jYPTb$OvaRqsquv?CRh$y^9UY;b7# z^_Oo6ZNHazrHvmxGSJrkm;>(&p_Ur@WbX7~nxbWze7BI&-DstEbb{7Wo4~iY(D^nYvK9F;}OY0?%Ou+o%E=wHhS3oTm$nLufBf$8rs>2 zyRW1>NfFS_YSfR^i9kpj`ZL7V$;rtj_zp;5kClBw%&Qm~;n5-GycvK=23^4PI&QWN zegj!(Zs_j^jQB{rey3wDIJ&T~H^`6$AJZ$M%GO9TGBbPs`rK4qogo)n3#wKk93yl4 z6t@6q!6if3R4McE<1}-hBjNQPZw`{j8A0$QE|sI%OC}@V+#AZB+TY(tD91Nka|2LY zshrVRUw_)(E(VSB`LQ4rI^7narlq9?WU$eFgNY3(9a8Jz;n6$>1KUEuXs61JB5#_F z>WLUpE*t@D1sD??+}!Tp3RF_LZfmaio%z}5K#5&b9g6(fstVY-CV)X)U)4|`#P3S{ zAv-5|o}nWaO3KRBRaNdjr>q5fQCyYd?Q1NcC0US137)0oskwd~*nG?B?`nTSu^YwPXNvjjt|FBJ}#qDNN~ zS!xC{siSk6nuLXg$wjX~F`N7FV~q_P8{7Bqw~J&4J<&wZpFf`$)TjabJf8x*Yje}$ zUBHqKz!$k`026jQmNdj@FsN(~Y>Z2w>8AjpmJF|6lj#DDS!Uvpv<17Ldm6huz5-M8FdQ zk3$~U)^StvB_vk@xA17a=AAO0y*&m4!{Bp5Z2*3Pnh>Z|FV^AmzNjCS6xhWVu_H0zRy?8^qa^c?{Z z%E&~pY6NhbUaXQ5O9)zVYnU`^XAc~sM9~O);NBYagR+lh|1iouYaXtltfAeBXrc>B z*_s7rZf*huKIE1Y0zJPvLIa4+-ebV`+1}oEGB>CHeE8$hqY=PR0kt#k5Q36{m+g=- zr-shS87FacOJoG~?}{N4BO@d3S(yzMfN!JnA$oh|lpHYXjvKaKpt!mcs?dSL{0xnz ziHWmqgY_`gtIW(AvdT`2FUHTGtM1MoV?@~m;QDO3YE(F!k@)!mhrShffY#q*CH{_X zY#ji%u+avPWpR^DK}tU6o2X{`2jn?O(}vmUj@!Gq6nJOiqS%>Loq+UP8BksPNpoi; z5H+?V%&Nx6ivbx-&tL-&BZFviXR@TSR}T)<0SUvWS)K9&9w;=3e;FKXaGrBC9B6X-o4iITV5YCW7gci7`9Q$c3@p73eQ zl?JId&)fX}K+t?Hv1`G;@4ds-L1$tmK-Xv(RjEO~<83s60@EEmt_%)JN6odhN!wvE z_@clK!TW6&n&b22-P>=U^D{H+^2HP0qXxG89zDH}wG}(W0n(*c!S|3`_(>e_hFt*e z{+Q2!CcZP8%8-+1lD3VMgiK*oE0q5C(bXI&A@C z3NWpQjun+O@&M8XSQFT351ur2Iy3}Am3FX;qH(f#|GeX5_z!6X2lh&c z)-5VA?PEoLPVRSfPZqf3jze{Iy4=Z6i=M^bH*_|6*dsnWVIM@d>q8)KqeP$(R!}R{ z@iito<5!^YR1BUCT9W+E z$C_SPa0c{3M@Qaw$~|QZE~F#HbYk)Hd7fEuvHiM%Q)mGHyTIR6D`c4h8-M{O;sTrU zB6k*;>(w8n1*E4CW5kaiVBfZ_lG2x<_k!JBU48xiVPRpQ^6<`FT^(PQ^tP_GHE^Xb z9)UoZnINo=>BqBG)i}jAe*Z3^T<|iH59|D~yBqBF{3|0317Hiv1kBIQUI5b!YEGbt zaR`pQuQ(dnsJ2TJ6cp@WWxcWi6RvM+dK)mSu zQu-a>l4*eXmzg;vk*eYXtO5+iH^kdk3u^{@SU!LL92Ir%AP&ZXs22yYD2UN>a^MR{ zMkv@Xt8#Ot^GIWXT624xie*xDLqtRb#_e>w8r(>9d3hNkjQ`-lOD?eM{rA?Ea4)rCt3E2ydu8U( zqU!3mvM(X{G+n{BH*GM^6CqT4Tif+tzZegU3=J>VhA2Z*`B<;%8yp=S&!YGMIxsms z9VI;dd3LcQ#MGr-E}#bx5(G6F^NGqo;g^7s0fN}WZ-h?((Nhy!OhO`p4GMkj%#Otx zTqK>ov%C9TU!Sfj@2a)6_2lGa&~pI!sAnN0%hJk9c0M04Gvk5Xz<)V4KHhatlN5un z^MVe=c>m9qo~fy66YUM2y0Zq4gp#T%fb4BhrxZh~?imzSR#sM2ysER<1gAKt)LCb! zmlPMj8T49fbobku8_QxJAZ)Ik)N+`ZGHV)2gP!541zu%*en7NtIB7EmTE6W*-2tYvq;jivri8WHh=%_ zBB@v0TWovw^l9gx?X-_eBK=daE*P$?7GMj-!Q6`m?fAx5EV6b zevhQ@KG1Uv&5Nf^#@cTFiu&OBmbzmi($x6)bf;+C>~!}5DF>|rECBE;T^3L>R;CU7 zXVKuvR3YnA_c>86IIA-X3t4pD&mv(J(34R}s&eDVfBpEhMJ(0Z%vx#0ykU-jeoo(G zVSYY?1SX4fo;a}+coYSLdYTU(t&cn+iu*)bT3-JBVN^y=K`buS_c!_O&GJ(%0G8c& zow@Xx8f>BBEB*dBGwQrsE_4T+NkDFiT4zZi9%uYeDQlnfUB>i_2P9*X5JQHp7%SHB z*spzs_nlvdXM7=bU}4G)oIJr?tk3}*IOmP|1_oZ0uIKKX?h8IX zo7ejPJkXDy|KE<|((7rrekdQEuplQNKb;__ zkRU%exkp+{OpHs94k{tRB~Qo4^#pwSYd6FbOPIZ@3zrs`j^@+5KUO;!%uR%g%gx-y z%G{IF&CkjlX3pvAVaxT14*XUN=HX=tQ#bdxJ9&ry?(DvQo;JuY@IQBsmz$Rp97y<& z&Iv%FoWlRh-g$!V`S|#7dU(RTtn6JmEnS_teB3xJUBNLxE-)@HHz!wfD^D&SC^tVB zl!xnX031FZ;5b7kdly>{OFL_Gn5Uh!HH_2E%I3ch>aPO{|1~JV|9MaXg5Z=jp?{4^ zkWYY9SP;xP`2K$}=XZm0^RRcZw1&C5xVpjYo$dY2@0RDUA^!KS3jU}02i^GBS#8?( zFehu)M|50IykK^&9;`~Pb}n?<;IoVe7(HlF7MzvDDk}o!NC0d|;pP^E3P1%opaOs` z&H^q|akct?T%qpaYUO2V?ZJ9;3sHgCLo6X45LbvN__Kk)AQccgh&p)h1F;6*-@UVk zxcuXL9tae|4dMM~XF%`%_f7o!1PJ`QcKjvVY8DP!FlR1RI({e=90>*Fg$^*_|EKdP z1)%&qcWwUjj6UCta+9$Njt1u-uF;S*$wr1Zd-qz}xc0ou-b4iMoz@v-K4lZE@|2l7hsgd!~Z? z(c?&~D-FD+eFb6dgMM$je%i`B)*JnV^wDPuK08GliWg(5S4IgbuXHQ&R`arV>^zQ< zU%WaXY|m-*7B&>u91nHJsM+L-D0A%CeZo&W4OfQkJ9R-e|%L?Rj(j5=4O@nYC!Mv z6Azh}Ntx~0ck`MuVRu41<5~+*4}vSS69o1?^>I?E^keH;qH9K&ug;BJ!aPq$7_L#? zgNTi;j-2da`;S|%Yt&0?#m=P7>$|&}cYLRdf^zgQ`KNum7pNomKTUOBT=o<5s zM|qaaBT^=fCDS^d6q>-PGkf*g`;Y;9tmrX59d#S?w6chD;-?<}#~mo_*eHYH=){Ts zc5AIUF*ap>$X42zdON7WW}iQz61o?jk4jToT-V_fj7qVIJo}XHK~9t+SJ@Q|)eWm7 zR4>FaQ-;QS7rlc%l_Jz6E6DhgfQrKjdj#=&@!*E-rT60)^cP5WBxz1R+V35pO%sQ6 z78m3_fYUDrpxx%U)8vn{6N~GGlF_k}n!mJ9jTLxV1-&1PLG7I$&N<0#mK&0xk1s`p zQ$adkI_`8osw~>BGs$7iqkE?$1Es@Gsu&fo3=^Npms$!p6RFWKO1VfAYixXDj0$U5 zqf&MvuRB&h+}iQk6|TPv=`}>$#|#7a1$y7p{e~@6PR+c_yad#dmd3UFo-Jp7p?fCe zTlY_&go#)(r{XW4WyjXO{^6sy%nnH^?W)OI<@Lot5;^;@La(kG;i z^km3KUtT+{4f+45VXCgM*Ub_$25Z|UEeR*DmO8$I?i)l3{TR<^N z83SPxHx*D2;bcbL*OkFCd1=Ewju9U=d6tyXtHyJ^^6U4Dj?Y2c@y~o8Goj-=E|gOc z!dllsN5Q>+qL4l$@)JE;Mc$E}r2~UOCRE(#IDNBOJvK>*tm>1b$2Fa*zfG>%Dv#Y; z3Mty!iYK{N%F58G&P;@V#Yd&^@CdvxVmb|B`OYieFrpF`%$cVaj>cSKU}b=ZF#j%Bfki&Fm?Jbm&=3sy`Zwq+ z)^k>eSa5R0G1xoqG&r~)9d#Dk?>@h19OMfT-yuXjO9MHBDz%xCOwxFiFf<4Tnb;dB zDy*7JcycKNeIqF*XX1FfwduURP{{f#5Bz{>wXcKNFU%VOl+XOp!oShJ<~WZ@u`C0n zLz$k?Un2L=B(YDWW`p{uB8`H)$=&)AZzn>gaK9pQrBii3^%RI<7mP-UJt*V+lniVs zK}ZO&uLj^8V|sKzpe`6f+Hm2VvE+@oDd*Lwl^}>Bs~~8>*@abSZs>$b#-XKQFC4#O zT;tLYR2m4ndy4CeMO2q`^%@5@r$2#s`HnB9%XolE8jd6zuhu1=xMfDfyoZXP?+|rl zvZ$G=&qRgvesqYHpEosGSv_-iYf;Ab{+4$>#B0Vb6Nx%_=kM)po)4K%@T_%o2un1T zn2iW>pkBuy0?Sr77=umX6eymfSAm9wK#n~n6b_>zRYxT(XhRB>2jmQv&INMT_=XV5MK>z@M`seF>sN!XyGUrbLdC}kzH?)_BJ#AJTqrn#?X>06Xhe67czo9b zV$d!-teuNXEl=%eJ857n_L*bViN`EGA|^D$Tga`ELtZT7$`m7%?UJ)Y;5@AI4~roE zISu=Y_)=HEQbbY)_wC04Y#6n~PCg4OUNLvhW8N(L2OeTq0f zEDm2x$|m;ElH#nUCXAEiw5Nil#LWOEjB9>JQX1h^0B;$&KUX$#*J(>6mfgHbI8Z&v zp96Nk3h4$-(iI1U5Tr<=V~eJK2HAb=HIX_svp&X+g8@c7g;L3>R2Ye|sEmqi+ntPQ z;bp02DV3m2RnsnPX?r?JONSG&kCED>aS*okk1A`h+#6X|hP#aSACK1E4|y^Nht!YO zwV9bX)c@KW4YWD+ag7_v5|81)OfYMe)NskjSt#@LZK!8Ou#&MuNYB(ZY&b)>nrZB#Y?jn<}MlvIOUd!KFzOUx)aQA`PrZ6URmbK5tvbFw8Ec zqXYHHwNqyBQzkea>|>mqUpG>7`h6l_&5T?mIc?VP(OcYhNzhvyjg}Z3ZzbXnkO`r; z`i`}t5Y&td0GV7mxZPmy)7yBQ#zDw2I#?ht26j7+t~r{~anAJ>_yPI%6Y^BE6$!7( z@j`%ieM=p8TobG!%QY#~95cJ+qnRu?;apjl8Qa`n&MjOB>{5>t)qhPQT6f}B(3XNG zkP?;s%MmyZ{QV$G_}6P{eMQ41EG)w`(}>cdRCfw?lUS66v^>e&)^ahSTN7GSd#=1# z(!>?|?tV}+Yh`_vJ2zWqQI^>g9c1*r-p3=^;`6@UUuE3^7HxDEj&-iif3 zFAjZPjzc~_ZpJTBaDbmDTt3gEou93pn9qlVKA(qz?^m26oiB}~@2}SsuM93$$Vr=G zzO}#h@GQ`)CH!)&<1W3QMkhBvC-vyMEylX$ss(30&EP2%Q6ZbAROQi@r|R1Nl(wYn z(#l6rD|510RsMEw_Kzvrd>=r^r{y2#a$;+ByLwr->HHiHv><3v6qkp%OBz2}jL_C& zoaxXls2tyBG;Nh%o$(re6c<5|%+A`-EJ#W^7&eV1m^Z?wbEWmMcE1s5-|rzyKy@x! zRJ3vT8crH^JS#`jsX42$NO@tXH3k~Lh;QZqFJ=^dd`)K+U5-NqBQJZD(Vjk6 zQuC_S)F%}-P+(m-j|I0_)4|Oi@B!DV7yCA3E;Dtb#EA64L?8;aVjl2R&RT6%`GL=yQu}(F0sCX zxn>l|16Ar5VG~^1U`|a_9yR+n*=jlvq;+wZ9nCiAW1{G+Gq>d*3gh~b=r0o-Bk7oZ z=p*?=GoH|N92bY~1EyF+LdOm4&#{+L(3BxaTkCEFq$h_y9+)_463vY_6CCA#RL(ZO zpch(5x@L9EKSJnWQ7ynt*vqB-cC9gHKRCUsWTtxKsyLP35V6hDrH2f_AMUM$&frkL zm`9hia;LI5OBMJE*N%L{lGWmwx1;i^$EG_+dq5_bO6 z4Xt)xZ448y)@hi(f54LHj>q&;`GBkOp~mDiBDK8Iu%RMshLC9~xW%5rezv7f_q>i7 zRV%=%K7P>{80CK3Q$u0u0=f7+Bw`z-qj$yfBW0y##WgK9BG57%@!!83C=rgSFT~%> z;$p|m{QmXR!O%viS4W-J6&6E)HQO$_%@+Udz_{wDTl~7w2sd!N_PTQ5WIcMp^U9Ux zp$G7JzvUEOdDZcsOX<&laHqOt<_~gDMsa6wNji7pIlm~12k3BOvuFT(A6&*ZqzCt;Em)1PCc%h5|*c_b};Z}ECJkV4|bgvf#ds?rd4|}t7&2cuf zC%T|6?*}#C#YhvXotHA^&ddAnnx{mSzD~(Z&v}`ZNA#`lEo3_9?5l?vdHs{_Uq`&- zd%-Ky=mE&RO*3G)C8xdQJOCMr9Pn|+}{&ZjJ`WG=2u#-Nep}A za4js#2Z^K%&0?5Vd#8gip4satUN{S-5QIKM?Z5rJbCJst=yQatc3eTzabYSF7!V2a ziY$*9Ij-BSy6_!iq#cBRC7Qkeku!_jx59zWfN&Cuz9H2{P#JrS#M8draGrl-Q4jP4 zhPm0dQ%>=3ypIjeHI^v~WH}$8N-tmY#@C4Il9|9{ON|tLN{vu#ON|_q3(+j^p-C@) z$CqBVMwf06M0$PFrMkaC;}baJ&deD@KksN{`F`70$*PcKp}X3qpxAzi9Vy9sd3shk z@$qMJ?w)w!Dt0qO#k;wJds5&q*TZUaIwgYKastEk-@wqu&B86=oL# zEi1S>57gaj&Qtrq5o8Q&ftI}fmIFg}K}xGrzHx!A)K!J-g*U$P@<~;bPghsuXiLzl zcZXHsB%j~BKbIucP?St0OFIH+I>!iT&VX2xr@vilVjK@TdNsYjx6q&>@%UkGVU@G4WEK&lJVTZqPRo~AI*x(de$*+0kZk=d@(SQOgsj|%MK zr_drqaxTNK3W*2NoYwqhvs6!Y!1BlR|!( zGsb8Y0{{AmPR3}1PR_hr)3&alLDQzOC}qG2Om@N%T&AowDUN3JCJ_9&G=h^|UL2;*>Hz0{xUKQZ zRdbXMJntRKO+-478i95oGYR01t%*SmQRYY`EnV#9i@3}C*2dG#8}VSj*@SIV)KUqg z+M#^yxN8B zyZQeQ%I|0G0^V4HJr#rn3*4PvWSS=m6MFBNozTz9_YS>{<~+~wdY>sd@ixGM!xQ3Z0nHyQ7;s+b~+EZ~yl`2NbW z@C*B$hJjBpT941yfwp@ltJe}sXB^eLbCuf~m1(TbJVtsduO|#IxD8MtmQor5SUQWo z)1Kv*mH#jmq$je1G4Yn?2dSdw)fdcHX{cjfCwNc>nzeTx@jcAjd%+t^%R^1K>q}E9 zrR_$-w%9EA&L<3SH=ndnBM3qh;h;_LBT;9Xw;lB-qad9`P{(R4a?rEUtfY$J_px8k zN$0f0BIQlOdsRd>-aM1e+>B@t7LMW;=B@59WVg|(q=^Ry_{bP)C2x|~J?-nuN2>3? z(3)&|TT8jnyq>5(hz7lnbX1@mQog2B1VP-W*SWL*4ViTP(wJ)I zC3Lj}YQ5$tO3qeee2x`N^gs`H2(sv}c2%BPC@tHVhAw7}bi5?JYk<^O7(}K^?%!no z#r;d5_it0ag)GvS6T)c}jE9(`5Nl=|ez||W65i!U!{^H`;m0V)sZQ6APT!9WX}&uXMVU6~ z^Nt>>`UkaymcI%oz~;|%X(tcm6FHei8!phlItuo`gC~T?i}$Np6u|=?OP3ZI{6%i- zu7`ai&Fgy>`N5Wgt&OBj6{CzN>kqq@e@?}wy-Vm4YouA@Yh1p@C1y=XDm#C{mu`O( zK%PG?FsX4!qsoLScS=sLe{y9!ef(HPnFv&NUcr)H4pSyG5oL<6A;y+&_k7R+IO+XY zI9zrv_jcXvK+2dzxwd#`NwpO8&b^fUhUxIf`-|#g$md+A@Rfi(CC0T4>QTL1Yt8#q z=t5TAs1Kl}W0Sn<@MM!{sxhUQ7k(9Q&H~2vqx)9Fpc4|6VIr{BCtqmM#(MH$mZRnN z#`!+t@Wt7qC!;oIzd<7DJtsz&x8w#T8`PdizV(W?P+&bbQrFQphAw-{Nv}>|PBSK- zuAal%Ob8VH@}=RGsUuK*?!VF4h7yI2VAFff`>=x3Nja%~_e{A+Jeg;lw?J@C^Dfo( z0OjpH1!0cZt?of6Lc}!h(35iR(_1KfwiLV@0A9gi zFnF5_C^yl`+-{SsQ!lDs*Yn3u;P%T|W@Eoh-StAEEM-406=}hEy%M)8jmTk;P`~2T zDvdiclvsiBc!LfUZ6w)UyGirT4A3$+`vKCQWot+XT35Zrw$z$CRC$FL@(3 zXId&5%t8ALcD^4Ee%k6rbG@X7gaWoNCMcub@|^?HE>(k0Kj{xW27u}Yf;JZ38|iOt!%uFlSGNP3 z*D5oQP7j=q%jOrgvul9j=E-cv2!c**FTLWowY_om?C8zlj>_9trxU^PG*is10~?4! zw>(lEhqh-U;oJMz-No$7Q+yi+vGkz1XYZEl-O)#R8hfj&S zmy(kvt~v0>{f*lAXkb+*$=z96r}9X*GtvGQPi%Zj0lqPe@;-T~qOe2o%JZ0b+N5@( zw4;fD!w=If>f7WW&#t3GgiG{mdW^CXTe|aaR@3)*`85Gk^rt8O`3r&~u>_$o%E)~X z)G#}Ki4fO9BYv(#`$BxN_DYyi+^`4*e75w3dn#M$72FP;EDrJiEo8Dd{1cRGP-bc( z!*+bxV(o=+rMQWZ3h-m0m64@DC<{O+XM{$;&58DX9q9{!;KyqkiD{@QZk(lKQ4(o* zQ7oLeCe~~ta!sQNLh9GTxROXw@dkP~HeD}12)t<#|NE&jj{iP$^M9NT;NoKazwH&l z#m@XcX9Eg#Ml0x>t+HT-1m_5~+r_^GOWILnv#s$Xuv7&EqjiNLeDc z?o5ZR*gZk|JZO{n-88~3(XCdgsnot_;om6RQn2MlokI0gsIxMKd@@3vON>KAq@02* zjrrHNOHb!+q)=G_R&%I4wkBR@Gzo9nj`P5K|yB z9cm{!Fr6I*wM3H{o74bI*_?>!OW3-|qfCWoQvj*}ACpuRiX!Sr$UaSdE)YI1H#%-s zjVF*0=M3FJKXugiiZnkg?HO_DkjxLhFy|nW0dJB_)Ng?`*>>{+L=NKPWk#xVd`l{Fj#)_Z`jLP1 z!6eB2a`7;%sI6nd8_>V85DCj>^|fg!lmUFfDxjhN8fY`o!GGx$WYR!-BU+iq^<|xs zJH`H(zOp4O!4aKZx)HL&1Do+(v+_5pL!7G)Q}8DZ4$iRe!%bI;)___0Dp_W9!Fupp zU(Ed=)7}u?otp;^1gE2sEc-TyUk&cb7QF-p>0!bT_Dz@#mQjEU8pEHIBlHRC8Oj`S zZfnh1j=PZ`vs_7I`-rzDbzKsI~WIw3-3gZ5JCiwFV_ z*HVD69c9LrzPtcpeUIRpu8(b3Zm7{P#MVS#daf30weZo-*r@@P)d8FE{lS;7Y$gmu z!aj#zzM}Hs1v}@=Ec-qB2?OiCK^U=xA=E*LXauGox?{7;Quw8tRKc>Jg5tCm{Uy>p zv2u6vEWucY{y9oYY}8R=oq-VS*NP|Z8jRo#La?yTP$N`c{D>K%N4X9}OFC2;a{9I+ z-)@1Q1QXm<`>*xFe@lG zN{J{gw+w!-1|pxN!+@z%5)SAj$$eoT)ko7>ytf7KW)4Oc3Gc@7i;~5Gs-5vXV3(~* zu2L`1Ussl=E0iza^W70*M<^AO(SYAgdD)?=YY!{53^>Yd+3(XbhqcecNR6@@BI@U0 zt4*|`;+~3${3AV`My9}CM10uB1wZa?aLjTg2C0gn0P{&HVQ-!GXm*dc4%3zHP)SEn zFvPqB&2Ep}&^?@huG)MAAW#ao8E~Pr0 zgzQ>n;sg642fHey^eAqPKT{dfoGZ#&AQR16PUhNFx@c-0n)ILxGWQ*2X%ppG(Id5A z=WT;pxK4=S2)}C_o6kmGps7u$S97it`Z!96;+LQc1Al;#XQ8hur3a01B;uzb3#uv8 zLZc?Xli&&Q8P7*Vzfo$roJB3^iPtusHRVCcV{eCU4$xg+l?gj<<`m{zi%vhJD_ ziNTid_B1qI2~Rs=sVVR!%XL-^<`szwG4JNr0#8NT2!LdQZ&Q~>+A#8pR0;g4KjRHw z?=ce($L{^LT!fa1I&P{1Y(zvQT0bgVr{p@-b&T;GoC4*sbNNDK^zjPn5?lZd!M@~S>A$b zx7x*I++wy}jthQ@Wou|j$3b8gbRAVW{)o-={v+hK9gAZQkeIODETEun5C+OTqWrc) z41UTgEJ*y`TBXzfM&;P4kLs$}EGJVCIHymI1z}>Rs*G&~&2#0|#aEPJ0S%2Bi#E5% zUqNaHhw&*yExSCRfgk&aNnpBuEC)C>vJi@DRTgt^VdO>A1{0mmbD7&Q5=1~G9_|2H zPE#KX;ru1BCQ&tr!~7R5_QaQ$!Ft3C;RIq&(j;kcS}=-|YAzx#9dIQ*^sADr^O*)S zKx-z(xuqyf>EcdkVZz*wuPV}PKPsH6L0Ql=BvdBN3N;cfj#5Bkya=AgOclBn(ZNo; z*l+tjO;&Ye9x3Jv;W4Lar+{?NXw0A+qME%U`{7kpjqrsnY1f#DhBhu5>IoQA#A>_k zcIj|@k!TrYxAI4g@O0kA0-CbIS|kJdIpdv@yXqBG7&NR33?m17_(V*E~~iVrU! zW|To`M7hq-go>L8<7XnB4mx`fZaCC{J{cUmP}2}pc)G~?;Gc-XfPdHqBPbd6m`jv9 z)Ppv%jV-eiHwc3lNZys3d#NtdBoPa#$cato*MzP~z<<#?g&4c@2pfr#vWg2c)=SEj z4V6+4U6rcH0;hQ@&@|o}r4Os4`4;amFJxcF!|(moPC}(wc*i#ENUmE(5fmT$A$tZ7mY>A??2~ zwlwMgTBv%L|FxL&-cR>E1&_6&r!Z4wg8vI0|9%gak<$pxEj4pI1wr~!;Br_)h#8Wr zl?OGw57BHY;`*r-9ElKj=MDyse4{41t2^)oZVoip4;4QFkky-1h)puk?%5<#=QI#P zC9JeYw)U@^AMcKe>qr)ig(EXcW*b={*F>qR$6#0a@mtFec@Z488Y>(cugLCczOv)p z)`=~nV$ZU{7y zxg{(jDSemE-V2Zp3nJKr`=w)mqzZ&PC4v=Lv{+Dzs5K3lJ#)-pzZ*Hkg`R_8fH(k8 z0l)Sg^>yhoSz7yFS0O;xJ-O}hD9=bkUQ2n2HSM3(@RNqTm52Jv)1^i809Bn`LB!Y> z?O8B$kT({#EfyhMTO1;!Cd621ZD=VF-U1Nb8Q)V{qIoQ?v~&-h+m2+f^4I@7-&|?v zpE&F%YQuUTK-;<@ku5x8skd|-XJyl?x>d2YneFzsmsdLu$R4OU1QBONiW*_*(HztK zeC#sWh6lq)3_PW>MkSr3nGm;GJ2!-39&2e?_YN}&JqtKp?qJ&XAgY0>kTQnR-j=hH z_l7ff?7DZpu_Q|C6*y`z0+T^$IrnXS*>Z1~7%LwnQHRlv8=41KXHzj*ok)q5Xv|nW z2R!!FT?sc5s8x_3H9%aPfY3Hx0}x?Ic(>riox3o`Ro6Evc}@dehlliv+aiygD;QaA z1I{k|r`wGVsc*XHL5u3}v2F&5ueI4=zR+KZjaS=@J7xcVdUf96%Yg7xG9GkW028p{ zojfqpb~(?$WftYFwpp*X0c8c9*P&8yKRSTEe?7Ls#X#gH=A5(Fr~58eIN1$@y$v^8 zj5gi3!df6MwgX159^WCgd%@MoZfyVIYjdKOcfz4*LM1BIAd_oyyW|FzZ`!tt-_sW7 z!#mU0f(ulD2a;O-q5KMlTEXKAe|I#$C^yc8*+vIhZXHsfh@d5cSzK|_#+K^ zH7+xK%;mUSV;Zu@a1(RgY0&TLFaS`RDcRkh)yWof29P_Z*R{OCwhG~a@Or%8w?^x=JQ|*rwEoB5WxqM&QcMz`{otA) zqZNPrc{yX!zUq5czp&^vmdot0s+^!o>}@S4kh0YOxB%WTjevHy7~bCd;oz1`oheIO zn5s5`_Q^NV?*j|=!mmAu{pE7E4142^%$zB!pc~P7t2luz)q92rt2YB^j8v4s9%|xj zyWp9L(VMaOEt4Gv5I1&?z_pqd&0p16CGiQ7iL=p`wKcX(1EfFc45-E28oN3;hXjmw zYf{w)unMxrMr@BO+C$6^BY|q8*$)+9wOeNO4<7!m;`zys%!)FRSRAAer=I zZ?tCE&1u~6a&Qu+|J@!R6c%yhS$(<9;;T08*K9?+H{ob|ex*Fi9-9>wrK&YDDeB}X z4+aUzV)nR9o-{41`~yV_=g7xz>-*9#jeJt@2}BzKMOzMn%<+;>*4~WCZ<%2M zwMwQi0O5-qT#&?En6lRX5hpLK9S7&c!9d#WM0X+>Aeb}dTPC;1u}a1SLk!oU-LCriSqi!R>~6I*Wt&=|WY!622#f4K_e57zpF zrxCz7!RalU*1x)pV!UCG6ZL8l^l;WsPEdx?2LhQ0_$B(i{HJ<(zHXf@M~q4b3uht* z6#P>dd*50BBf~Q!U}D0X%E8$-86uP4!)}<8%fb1l9~#acdSJB6KAsho5`a4T29>!VRa;|s&~{El4ys+< zj>LZiaisi@ApQzWL_TH~W^au>hq;1;)*4F$&~G!Kr{spPvxlw*>AU7K*Izuqe7EMk zooO2-!##+>?=5rxaI6Iq|6=2Rv`^te#6MG*6QmduLYbMubfDMtC+EZiqiVlf>$b64 z&F1Sm@&5UZO@im&v2}8zwm0zxyBwh*Q`h^@aoaL9(1(nKqn8}ga)9=kIKu#&l%qFW z%raR9#xnUQlqFC!?B4KWn9;Ccy@d?qfIcnwuKZV}=RD8Fh0}jNEkn253L+My=FrhM*P7nu*`KOvInU*f&81UHlWhBqM_9` z9tVav^(<=J>Wj7OdLG`cy%AT#kb~zSrUoaeGmkWVXo6O3j0;($rfu1;SS==GlDLF9 zf3DIX(7RTD#moWkL~+zY9yT?7eLa zxg)Q6g_g`N9*a_ofmb4tfb@@59otY4HlQWYa#DcL*uy!WIK%F0TAJwZD-qE;4EXU! zb0?2Dv4-xmm1iEWqK!z zI^q*E(oP^K`P;0rRswj6({yb?x8m5C03t+;$`8f&_ z!oT$QBad>kG`J7dpI5%b?6o1~@^__dKfl6tWCHa=Sa@wj`yA@qOKyJ#Ixmi3?3y8; zQ~s>r{aFF?eb@~_q&1?_5dSpy9mHAXHRxmFDeh2T3v~j*(tDrR&-FuWFbs2b)2=>av^~27e5fCAeyj%at+Qrq(c@cTYHhtbcCuui@Z{cQ zU+D5a$j2T8W|kjjjfK3xU}5Yo#q~N9JiXw=7~&JyFI~WSwsEs~&hYM{NobH4qi_fs zC`HcF-jNyPDD(&gjJr5}g+8G~47YO+$|ggclg0+gdf|?rKiFv*4&tACKqS3L723P^ zynt1k!3ZzQ0Ni)ywiM_mmIW32fB57}T$Yv&bf|x{n3t9>Y5D7GnCY48UITBdfI8zM z(|6JsV1smJQr2(>5&$NsClF9+R`D+ri?2+aiyR%*f#v z+sxJXLt8^HlZ%4ahV5U3K!lb{zm3lcVdjZ;4ZTiY)V1YdLDaR?UX(+tOS+ACt@THp zGP}Tmmz+)MM}9RFg{5Yb+9rrD{z2$d(Ev(+58nu=N5f{bjR%D*`cmS6Pc0GV=xL2j z1-_$>+U8Dy(j%8mH+BQBqG^mbH|~~>W7mJP#8?q=VAYvwEBmrcW$wl5$UEZs&Hk@_ zXSE%#3Z+xM0I?e`AI5X^q#!~mIxWZ$8`0d$ein`U#$>`x>J%X>du!kt_tYU9#J~d; z=32zdA40!P(O1m*lMoy9sC=FUN*TqMOPqd8?%D%4gHP20h9=bB%_nvo*=UATA0SSzWkLRt_jeanT!$cp z>b?FBF6m(-CLr;kKSe<16Ly!W9*MYjsnSd8#M?NGwmi6dTj-!)u)oxq?Pz zi~H%77adfBc66+~ORmPSkW>i-Ct=Y>vz(y4S7oo;%mLTB31S!DHXEZoo(Bs|a_7m8 zsGlJh%Z<#xj~yQRhl%o2=ClaDUjF?i7>x-{X(Z=$Rn9Jgucc(7{lg4%Mr1~7S~_xF z@09N3wJFzadTuc3fE>vya!NwBG-g^qh0kz4nX%buztD}kd=y)PI^pnP@z1VVcYkib z^6HEsP&414Y}~3>H>9C#FN>p1`?r^v{pnKZT+d0zv+1oN8{XRdf%~of>xpxt+wgS-9ZktoJ%KOQxP3OmNHUb>}oSJxvWhE91xpL(!+z&uL%Q9$8n%0kf$?)YTIXl#nG?w-yOVdwM{>4gMYaV+j|f(u z@PU_Hd(2bs-`eUqsrKFj z#x&?{FGtoaoQ)(T?hganziXzx*z3|9PKKX4E5;{tc04_MIy>I?j;|^{9-cjF}i zvlmxP8V>I`oZpTsKigK`?oNElu?g2RdpWcjeB%|)BNaeiVLg1lFXs^9&3vIoj%G*| z66kDU>uj-azP+JTALMSu>e4fHx_>*lIXfZKK5Ikvv34S9XerF1T!Nf{pB-Y?!Jn!s zpxeN`9lj8O3VD7}J?FCX^0+-H^K=~1I8SrK%vSMaN92Q0+zwk8yLjkRxFUTZ5JUqV z;Xrh^9mZ`&1E3qwO|idt`6K$%-zOZmuxQ%ftJ|8Vp_%PGoPoJB(bXO;wh2Z`<$g1J zg!MM_&G>5A9GBCfj@~ZaCQW2Z=7z7>JIHPRnC^fE&(-9!VoM`|A7OG>&%`_FS?xf* zyY%PZfS`o#9A`wukl?IA@J{wJhZ-Ym7Qy9-v*j1`RvPLM&YwY8wSN|hwFombI#~-0 z4DslU+<8mg6T%t=HjsHX-XKr$%N$Z?=fc2rH!hvkU*CHVe*3h(+Pzk9)IwkpH`=3Rlv}bdg3#`5$(0P4Kd2XB&I%gcd@4mKj4l)+)oUWgSjBmQEa<;yf zu3MG8$F#0yw65Q`S^>>{u2WhM3|?((J9j}8#(f&gnY~{hKR(|7zJI(Qw7#Dz60T$p zaCWw>o_~KKBjgaY*4w;P?D#y_dF6ZlSRa3I{(RW$Po#Z^rq6P zdJ(Qs?7jSZEXNVg{U z9hIACI{^>Z<|Dn`dsFoFcGCnI@}Se%k~aD@m9UCfey9Xm6bUYQ$?c%$8xbz~kW6e6 z5v}k~v3$s1a1CEA)o8J<0?ZQiX}f>=m;W17|4mzqmbXe$8=g8*UIr~gBz|=C+Z+DK zw==Y25gPFVnHg0M@*qo@uWF!+XLxG8R+=a`tI|Z3H2F}PR0~`@g;Y9`xqksDGL*(O zM)s7l^{eIS)gjR!C#)NJQ2b4~Z(^s7 zQW2eFv8M%rkE*OQ76tw}$5)b=5Ix?(3W;Dzz8Rr{|V!Y_Nv(U@3$ix zwZJ5_E%mO-oYwFSnkTg*oga+63Gy`Tp2fyyKcp-%FwKe#%xNo8Q@(;gj+^}d#7*x zh(DZcG(SB|AHl8NWTN^0n9%DJpnao=lJeHz^HlnNw=y7;dk?e}{3O4-CB*h|88i~V zIzdtEZ2e%q6@2TudA`J?#S-B-+9vr~HhE2w+?rL{`64LDu$A<`?;tX>{+H3^FCtDB z7LW<*zYmUot-ttxQ%pqMY-~D2|I1jInVI#!n+r>rTRNK9GfG&3tWw2HjBJcSX3Zwn z07sDJ4=XDN`+pM5L~Lx#Z2zC$)R8+Ev;QbGO&9DGdKUWk0Es^=?=s;dFPH2tLDWD#r3o64bFB&&XwnN zL7s=}k6(oI7wnQRAK&e>AAM3>Hrhd3@?A7PcIbbB1S3J^VoG7iWZzw-9n?m)eOCvw zYp=zk&n(?tA^;m91WV;TpdTBtL*jk~8v+xkceOjt{Jq+q(hiSW_l@Wb5M{+NsJ42j z3PwjSmK{7`opP0t2ZIb&RtCMbtIqYa!Atq68{OArXwpjC{M1p1uB5tsR|9BmVlfH7 z`d1AI+S(UP^;>$z%0m0A=RJ+@6Q3`MTJ8FRV`%d!e;M9si1fHd&T8ITpxa)^-T}vA zi=BEkIn`g})z{PCt+fO*d@c}o_`R<$3(*p*nYe2vG|gph^@!1X`9M7<}3Vq zV#ih`n#oFdOzn5wEWIEXeMGTV^U41pH=R$^Ba2MBL`urtI6z((yhFI>>~(2VB~NDR z^mo)Z%LXS%)u;Jw1=k>Zs!vG#-en|cz3;%>sTqz#U#8!~9lVW>KY5EdYOrj&`R8yf zgBOJ^7FOY7H7*jd=19kVqe_2G~{C zMUeia(QL}dJtuUFZpXa&SgwJj5(0;x>PNe!6@iVNRR_+7B0>laacfM(BPjVY1Z(Jr zN%4(-g|9m7?Ff>q`%bUBP!B#C^Kq6NijBpdmmTmy(ehLP@(}9e2D8%I40=n&?gtvuFzE2 zFNR;qNYYU|gN&s_=7LZGwg_^hH}B(@aYK)$cbbkK8Hdn`Eo&B%dQEBYrv|YGWyA) z7I(Atk;wV@wf2zo8CDS~gn;~?rbcVPlQ$vI1}OL0Q;BAR?}xG)1Rt`dLe-)&7Mcag zq5%Rh?2yNeawCjy#7G0+ctJ^P?Sfn6dRzD=Qq{V*|C?2SQaip|K%qX=B*=f7UIo=;6Ytfa`f-LC3ft=6~l5QasF`zjkTk7 z^xz40O7+mi(|3=~7;vBcxNRT)36(;u3!Xx1?MsX5ffmy%vEDz}~ccnwkxk(v;^qg*-rJ+>73JhrX-K7&2@Dto!LAKlQLoYZI=jI50-I323$LL-gvKa) zB0jk3`VATg?tHt#-T@DX*|zJW+=iR{;R=sT@`0Hx=9#{8xkWuWdgbv%zeD^)uwxKD z*>#Bq3=+*w*+Q|!zUFp?zIJoPypFlT-$4lV2`0Vb+W~IHgfjM^iP7D*!7_FOQT2*k z(RD>UgLd?`OeTR_YFyz@xc!UUYJHqk9dMOy(OtjLH-kA*W4bd!FS~waV{Pe#Uv@M1 z?V@jnxS$F8KJxhlqHJ^RNL~r-z=it+q3}=kZf=qG#cV_FNQVQv^w0!@V)!S!0k&VS z9iAYs5yJI)0HJ!_huMN%i&s!P)Zv?5DWRJ^i`jtfOI!GB-tf&{&QQUw#?XMPtV0}Z z|4TG}^vS#9bi(~vm``Hh$2zyy%jPHH)1CRJ1W*_4>KGjO{hyOJn!tOA!^Lc;V6o!o zh2+!S=bHfk%#rX%!n*rC{&ZO!6WxRU?BkWZf^nu_n#Y$4PsJ4O=0`tl* zlFP+Cf-1gW91RMP=0hLczI)7s5;30TerV3++sk~)ff}KpI0d}OhhgBLKBUbadOrIR={CRZE`(sfEB>m}Uk8cTInQ7=ZsKOj&63RvRh6 zVgAMnMJ0qK5xgX&9jOK)b;7zI#J?aIVXb;W$C&#CJqID5IoPaD@!su>RVEOY+UjcZ zD+i5BvfvLCP%{}B2+41aIe&QV;wT>zKLhunDM6SqaTCw>4?P*S@J~06BfbyN{%UoJ z94~+{>?Gb%*%>gXqaLb$bjFY7O}94!P!XbvhXrNlN-{4D_u-X%#nPD+x_afRF4nX` zNK20?m=5;7ap8`EgvWQE z6grW-#EF|p7E@J*mpmv?lO9X4A3fL`$#Al!Z6q$0Jk$IyUhh8A{#8cm=VRAYINs3^ z5#4m69u4ZOOCY(OD#OITYfe=;F(4#u=yoYVGZ2wlXAem3^pAL?Awc3GJ&hbjg_<^+ z7-yFb#HZ%(6&yV>Qpxiv`lpy?+iT{i5Y;`+&3%-fBr_NqCP8V42a(uCHhZs-$p=co=53gbZ+ zTv`cnp9v-gHYsrO;nV*{^w;_X05wmZ>(_}Q(Qd-$=jwN_YsQ7gF2$a@L4YUuf-;*2}gkMY!Ew z@kNJIY$-CV#HrG9eUyPLUJj*O6!#)&t!=H%m1Q01r>psJG|Wj$0W32Him}iMxN^;- z^3nVSp18SiMQxTN-=S5BsghomRm{^ol1o+5yT^+Xm($lUZvT$TAccjo;~ZNn&zSwT z4KQ!{l_8;-6UedqGqEYD4kuUq4A~Oxp@8sgl_w`4_@!=(LM6dUvp%DKKGZ6(uo;jl zv(CCexCkgfiEYJHYn>;DfW%b043On-_b|OrPF@Ht4@yzBc`)Iu@-lhB6drrHx58ZG zJyd_)kVmgBf~FmpYGf!;5G$28`2|&thw#T7rAj$(QiQmlr8+}7t{w$ zM*8c|`Ca|~`(^`S#hr~NS^8JL!L1pVT$>;D)YCeky_lC|b|1w=HvO|^F^Dm7H zE~kT}(>K2TN&41J+yVD`#h;#cW(Z&S@FzX=Yno|pSW}P$IC&{Yb6Vms$r7cg-mbB+ z=pW*eY0No2FQ9ce7piCY1lgSzk{N7K0$~h8^)Puj`uZXfg}4I_2Y1`0eCzDY+Bb^O z9@YfJL|^zy(u`*ViZK0Ki9eG)S4wXePVWrjffgK;H(!L&-2QXSjJIX?XjO6jVp?8nej7 zGC@nvnu;lnMx8MjoN-xt*|b6@3d@|Axbu)G7$&Z8(qZQRAj4r;0{ofonPeA>j!d3D!mdl#6KTCzaGCGAy<`s3SKUHbAIM%It ztPWP`8r9Z{eCJ!{dw$>k9z6-4hcB5?gHt=8kxkm~4gV7!9gZ^J6LNq;PgrxHRJ-u? z_w;u-{yeqRR{Hfww{{c$+13AtuWta(rD?Z~ZQHhu9ox2(9ox3OV>{WgZQJIX>?Aw3 zb@QG7o^$_ms?M#q`kn5XexB}`>gnp5e%4ysyVLhIR|c~_Ez#xi_~hV_-f{O#-J5T% ztVb8A7qa$xSIHl)9|}(5Gh-HxT6`9!F?1$iq((7AW2Z|zwix5a;LZ8;Bw|%swktEF zcbd@~h1I04HlZEDoBei%t@X_K#rk@++duHE!rgFB)Sj%|6Z~9zIXpPmD|RaQe{5N` zK7u#KJpZHzV{~LcE>A%aDAD8`5a7aeE`sq^KXqO2Q{=mJvz2S>Mg-%;cpNrJq!YS@PGD+Dkslk5+85_-7=Vnv7Bk znws>y4|OJ`7__o!LLnD*R6U#VJ9-!+vrilV70!Rq0c0?bi7yfW6h_H(-d~457>iZY z@4y~5$y7Bu7i)wbMgixFWyMT4tSGn1&A|QyNzFqi*4sK{_)Z;`#PfYCTnE%9z%H$?VcM8aF1vl%P-O3XEXwy+kR;OVHfze2*@{fQ1@5RYj5KqF$Y9G zAi?ls{x>0Nzq-gbZ{Dgto~rL$T*Uej zY$nW&J$Vm2g#8EJE&+1BaiXgRkj)E6Pd6_DvB*Y4mPZF>C;R}?Yo5m=$RvqNN{O+u znB0sBxVpP%vs`{8Fv~T4_$3j@mUZ~Fe(^SxmI2o~SZjY60SdYaB>9nq8?x{}N#^@R zoJRattS0HuvKWoOrcJMm35^U6Zl87d42J;j!`ZdP_Wv<~=gK&NZxK+L2EKM`~ zde=W+H@W6pX%DC(qBo^&&pD?+Oq$K$Gv~NNz9YvS)gRvJ+nJr-w+*g^+vd3+El}VG zL2g~KwXl+iX?$HB^jp_Gi7cMyHC_nOe#~9pd6!woBWyalLEW;W=(~bHSff* z#>CF-(sXMIi8rv?@Tq=uO+^>3>==w8TxUXW{3CmF8viiuy7GngCF^AqU20oZz_8Eb#eFKM2Gl?autyj8iW54#Si zHq_%@4aCU$KK<7FDa#UR^ND9h|1dcsXcBGJ@G|AGGp@M8u5;aa9(s92TySDm| z`mM~;Y|bBsTO{rfZ60ZTQbJ_JL1yHbz>Fc?0a**_j8G3XRs?faXMSf~XUsdSJDnr+ zBe^5`BYV2h(mK?2^z+#@J&*11v@G8!{$Yed`$dQ3w%9Exy9h{uM8+eF0*t(U#2dz2 z#+xfD8gvD!9+j2y`@NB#Elw||MkyOkmjt-F#`q$$Zivn?apSqx0@ zG$$1)uFj6Y5NWt@VDFA9$X*(UcmAylpV% z(2LW$12q8^HVB;Io}rWGyGkeR+ol|y(ZEej5{?KAl#xF-$GK1_LZ2DpkXt{yp2e)_ zyrO=Ex0&VZibJF~Nf1SJ5e zaxUk$p7U*GvJn6rTv4?c_nl}{R<^?ACNrIrnv;@;Mj2aLlUY4IX^4e|Grqie++-5A zG&N3U1=q=9YHB<~N&Znl(fI&iVy31BTUl){aEN`Hlaa;W$j509A-&Rn#Qb!pvFO=* z9f%YW0K8xuspwEoJl|oc*f_t5&(okH;Q~5kBNO7qTgT&~zPW}^S8L|@6q)%mgloHk zy*XHfs+Ovn>}Fh8&!~kv2tP4Z?tYptm++hMv73n@Zz{VgFFQpENvrQD@rHkWI88Sx z*=>7{*7SX3%2EZ^nKmm-A6Wc7g}$lmn*kpMP*NAxa8ZfB@mX}~dgdYc%;Px5eUdN*)z zbP>_gg*{@E&Mo?3Vql;fdjbrNJY@UL1z0b7T@>tRmZFt5%(oX6XklTAQNtTCx}EM- zQBt^=IVt@YhPvu)3YC%3snHK*Co~RZpGBZiiP_8%E~m9qamu=E9P+2?nrJMpi-zS; z$0|j3nQ%UXuL!gZo5jQ8preUY2(r0}JGZQwTk2*JS|YD*)eCkS9N7sW88sIyZe(5v zE;`2Yto%`4WG*rE)kc%Enp(yEMjNgCI5@K`z;m=L<{!?uwt?!kl!IfDwUEV-??_t2 z&Lb43qKS}ezr!Ae$o7C~qp6KGN$)rYnLuSj$>WgB4%?&+;ZjB>z)~2DJvBYra_<{b zrhowSj-|{B*7uvJ8wLF9wj?~shQB$vWhG?0EC~r%Mx!xQJ>H)U0gl^y;|EE%a}o0U zy~5j+)>caWT9om%70dXw(;S1D+WS?pk}*uoCMt`PNhCCMn9C`+<`ZO=WONIai&ChA zx7VLEmw{sqQ=zVx$rMI|ziFQ{SX0h)IMd5^cr+cHkJfJ5SMS2Z0?KKuP*EWjY`Tr+ z{8n9qXAfgqwN)q_IOAndnj`Y7gz4kLixAOE67!f%`<&ZLBIZY-Y+Pig;Nft`ndl;y z&k&iwlF2e7&0OD~rQ^u^Lc++&)K8m&luF{2kbU78l9b?*r}*XdA?d@2_&$2kYYTFE{<#GyTh| zIS#WezNa(l>htsEwL>>_`M2Fh-)Djq$}V54XV2cszW!3c<4ZvaRX+-{eH|HiS$W~| zjZ_%Tnk3NS?)rrsaktmW3+-BAlSs@C1uC4sk>kjiJOdtLXgTs zv9$&jNel(u@hnafLct#7;t-Ho#D&T?fp)SJLwPYkL_29`|$hPzk+jl3zg4nt8aMjQYw^qnC`1K=X$6OhjVi224+cF_Td5MFPddG>rLX~dgeTjKH_AbTAYtGnB#!6~X7K@d~ z>1l!qq@EL4A5eclr+ye#GF!cYmgk9 z!iagPz_?H1b~Risu7{{U7RuYysIRNK2csKE>+oUFUjBuKsFN5W`f0v*lW=Z1!j(LZ z_u2KJbnW+3?pm~E?TxfA-A`4%5h?E2jC%Uz`r4z#Q%auJXClQTPM6M)W~TmLZ-zPZ z=!6m5UwrW%9}zfHJ?fAwy;O}2Ut!S-tvsuE(h{U#itvyPv|vJ!ds?_*VRH7{Prios zpQeE_UFcbw%EKlYCsipYZTJJ#=szwCfxWia*2%d zE(r?$?8XL#6pJo(3ZCWZahXz!JoSu9%ALsR2=P|=Cz6DpRKGc?@h?;eWb$0+n*vKY z=kKuy35pbWf^YAmEmPvNY4Ui2eJ6s=GGuhrTiUc&HnTz~vL-3m+_=25>1ng{2HKKt zgum5VdMy{rcQodIXrNk3rmGB&I6j-wRtvJL-$wI0|n-(xh`_tELyv%RjBG) zJ3F%Zzv!D>U^w~@o7mSkZDy8*N23~7$f|7ZENw70Z1{zP@hi307fu>@m1grvc8GLcukehZ+V;Nb_N!=SYL4C_HztQ#J zZfp=`CY5=@YD`sLN>axI*%8D7Vbg>HsoV(^G_|7dm-V~8vlVOhwBx%t} z8-A)^i{WpF#S5Mnzf&G1P9-Smt&4F|s;ZMGNaHeL59=2>ioJ%AwJu0AR9ED^DFtXB z#~Ge}<6UtbcHk%pSIiLfV$d!V&$R0DjmIweA4Rw?_P8KyW|uqGZ*%Y*U>H@BD$JHv z$n)%6_-#zzXNcuAU}TktT)6DGjBDf8>qz?%Dz!d`_#wp zB>pVhz^X6Au+lN^K5^+H_qI>A=ur(f4M#X_7oBWY(n+xSeaLopR&2s)>`nTZ%AoVC zEgPM>pYXt4$X>skg0-?(Rx%G4NQ8p{e`a{k<>Bhm!SCh0z+@w4950GU;)%1XwIWvX zxFO73y-A^?k9hGUZIy#i)eLE-{iFGKc_@P;#W9R8S|OPm*LocI?u+)TGfDd}KFtbY zn)i?(O~tom!q2>;DhV5XKbHoT{I(q)gW9`#NP+?937^uo}j0u7n@=Y;^1( zg@2Kjt{*<@`66bW1$(DB`AtQvMdiyZH#?{dT%SE-U*82MgFzC-LF$L27Pg9~-7yxw zoh#rT3UIPy!(YncGR=8{i+{{`)%`O{BVnHCzT#c2ee1)&B;&D(=DKvjx}(=XkS%a4 z`8X&3LM~;qN51aM*XBR%vVfuI@{z(< zp!aB%6eG7ZxLE<;GGX1eikVkYi_mURIC3t~9OE&;x@nzs9FpucUSpIuPZTfP8SgJp zEk>OBXw_6Yb2L3;<2eNCviQ&dJKwpn`8U(DOklm&HDaR{%^%+0!}o&Qgs#{Thkvsq zC$eeRL2-azMQm3uyj5>LoPGQafY8yC)59r8c=Y%%QdaA0v8YP>q1$QZ z5mKBRv(%az_hrSC*&{H2_94N=o6wpwGsN)QvHtT2@G0r^qTRZhEd5HjzUfUrlLwzu zDM9+k9R2t)mDo7H7zt!uwKKo(<)#oALZQq7PB4E+QT|pFS4T``St@hXZiD}{;Fn8W0jUcas!z@Z(X>k>ymSf z+aR*|GXjIntSwCX{%cs12m2}RrS06B}!F+!E9%BFDzca=)(MBi|CyS4h-AYD7c zWx*@yHxqTutL~~rQ-bc_QARC3Gm8Y9QW|iz zJNvA#728p_?n>T&lgJ&uS(c(6n^g2A&c7A})Zxxng+AOVAHYp2x{}~qUpxvESrgnJ zH+O{7Tw@im&Ec?`F_fx-dGvT2>}r@?G5a$TVO(?zoLps_>rfyopji zpfI${E~KdB&VC`pZj=x0_=f21j6;{5sOmQN{BnOw;=nyok!!I3oA887N#zflQnz^A zqY)+U1%g!WZ}|YY(ayh8(OW7iiO)1zu-vKirPNF0319so)S|_!2VMDxn=19g+Vy8z z_e6xfbz!4YLtN@&)u34Qp!gYjm^;i)w??$j#NEb022b6%*Uy6PUu^u^nD_z&W8(C7 zReWV77%KWMuXe^A_3~h3Mbs_-|QZeag4BiQkpB@q~H}u(Q^_ zP=rura>+}2=0)*3C`i!1 zD?h%-|3I@f&h*F#oqurTd5b#)RfBcg!AkBpeIPqN#&x%g@|o(2L>7T5>SX(lu-4&R z9@^E~)WTGI8)0xBtIOU$hB=Duo#4nErCO6_pd#_A-(GL1Gc)U~8(UvKK{?vcv*yjN zY&4ltA_=A-S(;2>n6k2;nv&;KNU%#6&$Y2^goV{>;gFDdlQt^YUf*8$iHYIj($;V{ z#f{>=vE?~%t~I=9F`e67E>6WjO}l%ECEvrF7n{b0?`O z`OW?GweYaR+K&3z<09S7b~TCmA#%D$T8o={?)^EaBDmi8vG=9!e0Zbi>bpe;!OFJM zV7@z?Qy-smt}~&jZAyKQeLp{0#xwklp)Y;%R*w0igc z)N^&WI>mjQ>+FC0+0%Pv&;8_FL~ig27?Qu|U-JsExQSb=Iya+CLwnw*a7UnQFq{l5 z)@P8^_P$c z81&F-uyzsNUV4vjs^3q%s=6#j#&2}!2zjaheru48MrCh=__(5F41g-7{=s3VV%B~~ z!K0(9hz%CL&gvw=;S%Gu6#B}9j4qRGCg0<)r|ICY!P^j;L*7zUB}!33tjfUx7{86} z4m0<3JSTOvbH;3mrEo)jIKp7u5XS&?VsP{Q@D36IhOsfAfP)DXUe6eOcY|o7+}Qc4 z$>udP?R~QgWsKCQSvB%Rr2Yo7KQ((x?4M+BYf-*?DGNKN+YMxWh5lEfm=Fvcxm$Eg zl4OSrB``uo83r0!2@Zl1wSPQ{0N77D5R9`U9|(dJUUGqQu#MyM;}RG~^jrc@h3F?l z9U*kd4q_L) z+FYn_*oS+oB@yE>FVb}hS~B#hq}+bruCck9tEaZ+IG9gftWqz(el#bchCTPJo3KXQ zs8j@D;lOR1xC#khMh5(#3svV4YbH1<6^OpDzU?_YQbk&0g-l-;P=>ii@^mQ-Me&=w zEtMr*_x6b?G%yXCd@nCsu~W0PZ{we$^`Uku9$)JD+c8raLGg(QKVf>S*Z${)l%ee} z*X1Y2aDGOGuL%!$O>sw6HwNPzt|oRklrlSDiy2bQZ_S~E=~fdsQ@EfXcgu=uPvzVy&4&y3h4F@ z4U09P(7h1gfQ6>?1&gsJ+W6q0w&(k<57ONn^I*0<-92g-*&D}h)H!OeC&PLR$1kR0=xWzQ0`ktoYd`>72or@0$@L zdfJLhj?tWi8PmJBxV$D($$R~I`66-@MGv1~m~QEXI1nHPT5Jz929lkR`PMW6@-wU# zOWx9&@c5fn`N4y|{EHMCiRm`r{5Np~wlBZcgC@6?8fN|D4HO&^#5r*_UJQ*f2S`vQ z=ADmVgPUm-GI)pveDKp5R}Qc!T#)1raG*f(9Q$fWKTyDHrkV*boH};n$~&Rty@I$l zQ1weRQz^R%DN(ZHV?OLPN#|bEpUd_V)5`3LlIV4kj_~t_S&PvQ<0n^audp6!m>%md zkE6p-s{XZ@F@Ceyw}GwUw~!{txrC6&Utt|`-U|y|CCa>xFs@bT^2+`hF{G~S%4jgA zY0`f{THW0e^ZEy zd)Qw=U1^}bB=S2E>~-Bp3K5-*VB zryt`LtL_?ukw55M7FPS;k-|*Gb6Q zq1mtorG}jP6KC4X`0JGEm#J4f;aOAero8&W1)%bfnQIY74%h!p2TMJB*T>uYEEN`H zcqu%duK1n?N5@1ZY|7%DK)&=Z0@F^`%wVN~l9O$`1M3t^YwM3Yb!`D@S@4!D2+ve^ z4Z&`=jE6#7bfsua4NU@k6|b(P4fVCKB4KXh1o&g@x^}6O5t5ttX}*H^n0=0#hI(5k z)i+S6G)r(RdvjhwD3uq)C*-@iG9yqm)(0@r{RUuJA_hq2Da zKglWCCL`_PIJYhceo0-l`((1KK?-h`1E1Stn`QodoZ7MoJU)^WWuDh+xr2h~sy<#9 zLS)bmNK8Np&VpXuZ2Q4!MjjbMB*?h9ipWp_>Tw^;sG5 z0EHHgB00L7YG!I;jCoXWh=@|N7Abp)_`XenxI^)$IVsgHgb|W-}Flcw+dmq90qrzCv;5g&_?f8!yv+(8yyDxp<~L3e$ftm#LlLseb`-PCl%v*c8_3>QgHqv-s*k z)iXF&*QIBuK@AVP3cy1zWKlA4M!-D?23Jlx@GZ@=nwyfBMZTwfqrFCcsDl``W>HA^ zEf!B}f7N))8{ygkYHKxNi|)bVy`9E%790|9p4_uy6yc+`N-_d(Sdtz+q4oH=>Bu z{mjW;661~JNaViuK$9#w0l;|ZfIh9M0#lFWMy zFMxd8J%03%*`d^+yTj;+$$gZfD=`z?^)|`<`Xz?$oyrIa-|i|ZSSxclgLWH}>#DddxQtfsK|Ov7z>p2k1Wbrcu0=10{jxyCg05 z?N8|y$aA6dj8S86-Vder6R0o1T4haV>W=f_W$pq^i`9_{dwTph-ZLpcHcW*t7bUx? z)rS&=)~%#}#t4+J<2~qvP4LT49y~M>Ed_OPO)3qqqH&cBgi)8<<*l^9n?JIHmd{5% z`3yK%VX3sg%XzCcmeq~^?(z0z%MyB0Z$Vq%|WU`elcw3)TbBuTT7nkTsxd{1jMtlF9N6wBQIAn8ZbbB*B z!xcePaQ;=-j?+NP8=e*5$StKD9HI!0-`sym$w{%@*2n0p>?$SD$4<%x07dt2sywrWw+wz{>U+|joApIr|o%n z0X)~N8Qpe##k?u*SSLYn5Kx+49)-)0Bie2Ie&h$l#6q^RUW%|=rVyD*PN_NDxyYZg zeBRkf<7`gD6Qq2-*xG&%bhzREY4a)RijIlaIEG+-Plm7k3%FIIaPx{YuEdK#;VrSy~7MJ_) z=|5}y&*Qhi9uYeSGZ8xrHxUOb6VW#?o`{Y8pFHPxV*bCL*;rVKxH#E}IR3f9`E9}a zPn+!D5gu%u-|1U3@w>#$OvL@|?SHoL?US4B+k%sbjhX8|j_gdV-@WGew*A+67Pfz^ z*tnR8{>gDMv;M2rw}jz8R!l^!-x8Co+}uR0Y~T0vkDu>0IlomT|LNyHK3KUq{|Aig z-w6W#KQJy1Ze}*-|H5&xe(#pEd`nUOyU^Zw4y>-~@=D*T!0{B&T>#(%%Nz>}>xZF7 z)LwLA8c`up416Zce&qtuFCR3^ZMLGR34QaD6#iPMMXLtM11g>+e4S?-^h}C^#UJYZ4(7OTELjKMyI1`D;pu zhHDz?y4r&7tbD*WqY6L;v%k>e8M*y$C+!S%On`quj;5xjD13hUph79|2YGI z;~#fH4D|35;q{o=(SV01M3UC zK!={jWMo0-2XKMS-GIF6@Rzw>26gt*jWk%+f`Wz2*lMrAC>f4?vajiLx_>TkAIx;m zAnr}z1Z;;NPupd>vb_f4&VBjN&E|zeTzm^49dVinIPL_A$xy7sBLj!P1dOvXrf<6$ z$zMcR1T=3$L6AI@ViB<+S_JrzZPATgaSDW3={c-@vTT(uUQCo zIRawx6D5c@2;0s-PZz!kL#Xst4zBJI)P|o|4Yz@i0>Cz@3lP@?mP-$=@N`O|68Yg1 zkOb`_2|(O~5mEyp=v@<_V}UaLje0>+qZYV~5UKhM>h%+&8Dd3mTs?$={E>p*!IY9j zg?OMuw!DdvHNYhTl=UoFfNORgBS-e!(+rt`o$pv2Ye^Dj(FTh9P>YPdQ8!+=BmhCQ z6J<@(*pSFFT9tC6>AtS#^}Y8DQ6I1ss=|U>H2=&->#xR7i`<>~2E;ddUf+y;zVa7^ z*MumA=R^iUY6Qj~^OmCz{69aF1t1<;aY^275+0*B9^3XJaesaOD4>{O-0US3TY!3Dq^ zcc~Od;WNG`sFaF8t@#+iUV>9-l`11&e*=2N34_7FOMiSy^MWJ^Zcn2G0cT zUPItV4om9kV(mKi8R9khyKS8T^VpMY8G-2u@XreBh=hJ3c&Kd3~zp_c=#Bx1R3yT$g9 z2tqX-QJ11<3$)*1$#QFQ+mOQw7{scDszs)`DsNJ9AD-oR?I~hV&*DG|nM(EG`RfdI{V_D)<>B<_2>2u7qR3C>6q_ zMj$IP>I2Dyj4Ig=y_Xnb9uFi1x`I#1w3=Wu_TruoAk$zrubH#=XmFp{{1GVWBtBe? zo2LB7(SO|d(7&>`{xZ*R`LZd;zVmmv)Mfrh{3EOKyt+sW@hsC8(}E!@S{8_Gb*We>t=uYkx09~Zr2jUCZ#ogUuW3IP7Gkm? z3tK<@z%=d!r^pTv&mD9{aO_ z!YyT`fEM`m_ynQ^u*c^_?bDTYT!Yv z@7!*Z$fO|zX&5Q$7pYHWAQ=;Nj#TtI8pVdI!=7Hj7ke}l{p)uz6X4HoMH!rz$dX=J z-JLwQd;%2jA^#|Y=c$o4$rO}p8~6WM(!<-<#QJH>Q-0fephF36?pk9w@pOVADWer51ZJC z6-mV6F?ZD&YJSeiT8n!{kyFoSBVnRrRKYI%Rs;U@lMW?=fm@iII*g_G-qxkonnFj% z=CijG7Fh%mDOwCEz3q3+E){Rjv~-ZeUCDM{$AZ;yiAn zCeGthv6QZAGLd0ycVSFjl#%^S&l~0ry;pm<%`;rwP^vYPsV$*ZAt2Br)FKplK3PHM z_&VMrdwjz@$X;4=-<`ASc4la9l4+jmslM2bP_`Bl%QM_CGgFz}Z#kptT*2zPUZ%u# zxIW|JY?;A29W(Qr;=#QyJBlo4eaREe;rb|7B$!@~Aeu$W9nB+2phF-9zRzi!<}!MLC7ttA`;M z^H7(0@mFHpd|GePHSDCE{QLq}qbGsI;<>!DCG~Li4xlN;cwpsEhP9T4I>m~;)sR&7 z=%C`6gL$S-j?t0v?*Ir?gW$VUJSS8IRtwEYRGcjS>g)`&5+B{thM5s%9HJ+)8rSn6 z-KYKfM%-tb4A+^Z z;5EIjt)eZMV>e}6`)?hHUNZk=k-2DipXND3n$hTX4>F9u4()&F4CeO}DIgbvTX7!u zEBaM}-wnlOX)5-_NbI$h3*zk~C|@{57Q9D_C7oUouS-NkzS7I{SP^2a?tn`&C(P3j zu=a?`U}k~%cjN`-14joInOm-pC$#|dD9P6|Vp#!prlYmVZ zxV~mpp7OLiH;xjgUn2}i(bA{Lt)hke0aRuPY}1;%xvkseRGY=# zg*g)*O=#sD@r+7#^Q}n5A$+jxs6naL|9;(%apCih>YTE1wM~nyuF2Y8D*I8CwHwnrj1J&1JRsAHZvoOZkr=9v=<|9%t%r7QhzEN@N#FW=AC5e+RJ zt74X`cdnWh-QFFs89i3>$4zhxi!R8>;W=Yz{pW}?4CgP<`$cc+hi%ofXE4hIZiA>Q!R(3Kxj{aG-d| zDkX+iz$P*-`@zN@$i#();YfR7|9Zj*uWR-yk-bF@D$UfOBTF*vw_uJA`JMdTW)of6aTEv{RmET*6^e;+~q05kub$Zl2!nXLBLSEt>0T! z<5Jasz`iVP!KKJZ+Z!K%r1!oZG%n+)Xn;oax2hG7ZtvS%%gaNyijVg-eK=rUHQX95 zezki7Ju0~YB_rLQ62yBR0sCOsTAQ2mZ0P4^rzhS1GObkSaCdvPR@e>LKdil=#OX&T z@?#$%O5IB(B#!GpV>iaI`GNieh1Ce-lw=9CJSm+RArK@2b&+2dHM|XGA0=<(+&SC$pxNs=X6C3|81057m*!tmSx zg1b72Fk3mz&5&lfzw;sVvTFli(g2CHd8AeP_enEZk@r28QqmquJaW8(hAXNkdI0W# z(ZFhmzw$J}05OZ7*mKe~IKX+pBE%o*Q~r6$4DN@IaCrOLR`+f=WPnPIgXYLuo_W2BlZCh8L1t zw~U?@?GbZL1wbCa7#OTsvl$WzfvngD4fzH!BXgi$l5dGjUs2eQ*pO|BwIw@1w(tCC z%gL;z+!Aj~aexJwTe+he;RD16;6u)!t)W*F&)ApVwFlTkZ|GY%tRAfqFi zqSXDtmFGZUL}0{VgkZ#Bgi(;elsqT;lWdAemQt29TP9sPUHGSi7@`Ol)G-nT>UoHl zFuoB=L8_uix@7u~a*7yARpb$58ff&8`Vc2!?1Ep4!c-J7#A{GuA@dCT>!dwqX;`|{=%-_nwbI3tZfnfAP zB8sT0Awah$3g1dCN%=&QV2MOXSiq)CoCWc7GS86?+27nlqyU2u!asKFPq#5^N-PA# zJ`&HV*YE%)fc1bNG=GRlNG+5BN)M5b{Bz9fTR81N%rh@QMFY%^=@A-Wq9F>rPdXr^ zAqu@Oq9IDX4?6G`@}!Gw7U8akd>G*lWsmwPUb9QmB{lOYJ%fPsDeMuP{gJWJE$r$?<+WmfALm9yWj0Qw%9QfSVgKMstBOsxk zItMC|pV|i~i8wk3C`m3|LluO#4q#e|FH32%$|uBsJ7KD|Cr@1FXW~-1^DgTuw8i3V zNw*E7>j$@GWbTS;O0*@0xqvQrmI|@id7Y4uAnIV5_@X@^FVUAH772vB5}ksUKNanScVIv0FL^6oiEoE=AU`lHceQOQtrqQs zdC{KImva{FgmfT0h%ezP)*_~3wqiW+EU}lb7REs%`Ju%1n?|60H7bwjP+( zAoVn3AY*g{y{uUMYse%0zwyqL#57QHfF(~whK3Y|d(v#w@m@njm zt!`>>K~Lx_+G%pRx1xKIK%f^T!!UmNwhg$LR|ajm{Ywd2HhIW!NHp|C2#PTCun}Cp zI2B0@MNA=eIO+gY0bZx55}IyH+q6O?NY55@9ikdq;D{4+L7ciTN^I5t{@Nw{h$!pN zf^q~S?f(%${ui1#yQ@Mmve7;|J)yM z%LoUswXL;`EcJ17c#GK3ud{FUS{vJaF?9OJ6?*!|NP2quNga+BzU@NIorV^%wJ7TT z6_kAX-a6yJk3n#_M<~0wf=l!*2BqZ&oxh+E2C%*yoSi_R4gwJSd}q3EUG&Zu&Q*=) zjjdA+oH`a8Syi!**ejNgCpx9;v(57L3fnBhJBF@nv;D8h)emlOsb1bW%_{OpXVl+p=aTj?tUHGoBQmx^!EsW|v~9V${D1 zdsfSffS@G;E)ytY8L!!fDHb#;MA!aosdcQMSzo zuUSHD`#SI#`aLVq%MQYx-iQ9wT8cAZ!O~B>i1rvS3=L3Qm?7#K5sq_fUl@4AzW56Y zJJ+Siaku5tK9S+`UORzpaobCL{&fES)e2;1+Ea;%uodCMx+kEA#ctB^eg@&^4t!sY zd#wYNcF@af2zwqtxrglm{Ry(mjLtS7?SUf@$OzEkihHUigte%rdpP%7Jg_OgLW}~> zM}G=R`eLlSG`VDaC2;Z&92MCEn&g=hlTJm&aF6IbHr3hf=(+6CNU@q5w{;zu{K-G% zwKW<-lIwna*X4k&FiDT_w!PfiY=4jsXcES48br~AncPG98!&0e{SK_&uU3asGond1 zde81aUln*}NUw{?*%w>)En<=dz-U9a3x06mwdv#9fp+f~SHPa#4gIo9l<^$rcM#+Q zizm7Yo&Ls;0wEfI8pV8)USn;H3BO3pH$Y#CpMCs1Y;Loq{d5+_Y%|2Q8D%f(;#`II zHlpN<+X}60$9?QezDBD#(AQnJFvD&2xU;B{S`{1+ct+)eoE`iQ%DIe51E8-V)(%+r zmj6-sz{vH+|Ae#O)8_^959I{Vd*JSbBJO=JUI-6rcwMAlcI!rXHIIE2s^jZcb^)+e z>%r_51k0WZ3?I6W4nUg(Aqp9ScxE7YkesE@r`sg8b6%eKFQ+%l>vW?TwWI1&cj#XE z{wdMAB+{(!X90h^IElji!~Mq+3;rsdMEaY6YD zuD6VdHXyPa?z7FueM6XBsB#rEBb=rd-8z^y`n?AIw4nSc_$hDjTi$nI>Vxj?BbU?L zrF#|})BF+d@A!my|4n~G7&wfdpL7X#KMkB*%b(+;qZo{^Bdsauz#yB+NkR_oscGi_ z13y5(zY1cQ*D_k*c_zO5(?qeXH&Iv`KcX&M!P1f<<3j!Y#WSnOmz-a0wl5x+@Tw{*Io6OIr9v8Fk-g6<0*`&G2t0m{RC;`m@OhzQbfR*pRxKz3 z{O?RLh9_pnj;hiIX%&I`$e36^|KLE?kcy_>^)X42Dph1sOl%VOmDD=|BNFL8CjC(n z1_@w+_xUrj-LVT2-Jfq)2eI+yZujRl15YJswB{%+M6kdJi&|w)P=RD-!DdSmzIED+ zHZ^Eih^Wyd8xqZCqe>eA=7cD}5W}PpyTT4pMMXs+k%a}pxxpCN8Plib#`NT{=*;QU zqW0(Hc3ib@AB);IeHtaRvhfL8KX?o~c;Z*|m6e^A){qntL2H&Q#`^_|<^+o+e;A{M ziu5y!@luyY8Bvg(TWHis=k&!)lC*}QY1X_jrG~9l>dd2aN0cWAE1zYLvzD2OL&6ne zfKJP#-oQ|`q>LP5mM#qrSBs)LBJ{c5cW^JR0g&=>R~jJ=3gAv}Y~zG=J7Uz~;cDPN zb4<77@}I(~W0LX3ds}w4Ut+gs-ii3KZVS-W=0)*b9)GR3`PJk6VtUewdeU%mJ%uc@ zCnvK9_h(2U?oWN!FY}U>-GEzKGRG%ah6d;5=f-0|!ucLIiY(76n9ad_gNL3-O8J(O zX|2mA^gS4#krB_z7T^b;9v_+|Pc@5wYsMBZnNPJh z?LbcvzhJx$;0Dw0`GxHE0vv+{fUtDIo@nJ|xx>Ya9}zF{hb}+V7v&_Q3q1@hK+%s5Jh2?R=N*#a$KxanM3h zdKyB74S2%F2sc6iM0w^I)y=H7fWTV4JUu=CzLHIGVka;<iIV{XA8HxdN)C7+)fS5~+PmYU>YB*&94P7Y}_k1xXd1Y{a8q-M3F2i5~`*NLAlDbyPCHOV+rL ziOm?VWO&>YFTc=k;U-&z)&NF>UrG?G_KVVJ*?2#l8k0}3b0E}xcqgVqL!*`5?t|N~ zhEkqvARDjjW>dG@5^AI6qZDW8nO_=@*}mXHIbgQn-_C9SJ{lM&@8_mBk~l#%#;Ad< z2?$gxYMolC#EW!uda^ot2*#?? z{iU=)kqAZbHpOaNY-~^z|FsQB4%#iOhdjRh`N*Si;D~JS>c*XXM9PBGu}MxUk+{V@6DD zn5yBS?{44N+ciBh-V&Qj`&C+wPaK)B-}{-ENxLPT__|Ou@^Ff+8jiA9tg+UJo@>4+?4;;%p(37L1#{({UTfi@M z<-PsoO*?S^(uYAf+Pq0=xz?CtNzOHB`(ial+QV97t|d9gs9~RK4Y|pd9HTZ-rBf-D zcoEdSN4;t3Id7^jiM{Jhd(O;yjA;y1dSe=nM}Qgvf+mH_6AI@)hd6O;vCRGaw6~q9 zeDHDBKM8&-*5eVS>fIWj;Tg1+-HHaqg~n991`n%Sy~8^GwK_VL)?0ZIo=+p;EgG9m zt`~NXUu{5&Lct)W%G-8`Sw3k64Sd zOd1v?u>@^gYC%$$J~{ftwvY_RKh*}SLP*52E4I!g72eBQD1(Q;F972LD!uZq_Yl23{QyR50sGD^> zj-NvO1h?`Znt8VhzY7ZS5wtK3kD$U6_f%#u1>5!OU3w!-fwfqY%@Gmdr-nDwC`RU5 z25eWUTc=M7Z8hhnr9}5V6+1jqkR-Lb?-)_Bc6{$t6@84Tq-!!WwZ@^zeaUHcbu}sFx0MMqR6319p}~7l zfExD~ifBa=gyJW0|C3ml@GL%w8S$!u=o8Nq=i-f5XO5!x8-C*{CZFa>>?|7$IP<+P zn(~9#fCo}Vbj@8~tiSc6TdI(^ulwlM>b^s!>J^PmSJcFt#;<7N6mGe@uWQ=GdyZ|n z^SE>RgnNG2Ip=}JV=9;3JN5jBmK0T7dLMt<#9bufy&MavaGB?6BT>0qSO>vi5T3CG zKyZ>gNPJgI+on`%%-#L3aV%}SExcCaeE^U@u=sBAJcIp5?YvVqpZugC4SWO@ORsox z#e6NDv6}1@mTj$Dw75RKuP3X#I<@uUB71(UxN^aR3rF@f_uoI)W@h?Dj-Ii6W~pO{ zy00=}l-)D$YMggIl)?s@_ja8%I90VS`;k*7N}L{9l8$Ry|&qGk)ynZ zQTY0`(-vPF={-rj(~)1~4)n+$3a1tepR`HbJL_``z&wE{DWJCnZvm{$A z-dI)p8;9o%$uVf8Md2ygwjqYLVx%`B zBfD9KO*2NXNUe*IH&F!tf%=I%gr6p9!SuMV>)~5Ye)jK?Uj(hcjn@AON! zr6+&#i=tAzcKj8OHJ2=OjEGT7__!aKThmgR^*_XY33OanndVz-dGFP}@B6ELFR4^o zOOjnxDphIq5-+m6NM7T3i5)wRNu0&bLK8w3nk|QPfPwBg-C#?$6&ylC+%Q1W%>;J> z83=I5Ihinc4ulLeiB+C^->WK>);JD5bLt$`({ribz5n|C-~I1(smUBGZ{NMW99w<# ziyI=FhdR|l62pZOnvUf*r-Rv`COSB=dtxAlKCtt1yHZ-4(-@D~0!G^5bLaw{Y^W<9 zjCHI%GCcN)v4~V}RZDd)qs3=r%np;rld=T)f4?0_odK1@Z-G@ik*YHby_Nt_+%8^yCpd83w3N(S9WM;KbN&Xvl znN)gv)2-kaLFIKZX2>L?X)}fY2K^l&u{DQd8Jm=N7yAuPnN!hFT#Ji)^%faP$}D<3 zb?qgcMb2Fld)$64iPIX@H3$Avm0m&;621DG5C6GbCm~5~&;$7n3h2F)2m!tOkpA;} zotI%HGuZG&9SiQzEP#>Eo<~@ZB>?Jlc~%So`=oL=xf|i}I*hQKs35-J?0|cZ#pl>T z??QHv;Mu_-Ez-87<1J>I=&tPSAR#*#j;n+;Iv`NEJCpu4zfl1oI`%2lGw$+fNL(nB zyf7mLAWfhRIPuTQRTO~|LPo)SrBZ@B|&Q<2aFSG9d%D%$GT zqP^`~3JHhQ&Ek)(IL_5xe}F#ucq>w;DV? zIMIeQAsy%Ri1!lq6NEvuXw$Uaf?Awhx8hE0231~&#=GKJJT!yuJ1g9Sbj@BFyUP8j z@_3#Fb?kCf9R6{!C0W?ibo5(i3gw6Y`ABdy-(qG+kwnC}TSqdTTf3dUzRAv1xy8!} zDFOavf6QXm%T_(~(t{7Z@@St_XEnzY7FJK0?56m}yGtAIAN3k629eeeEfQ#vXa!n? z44fBoZ(riLw|gM>hQ`iGCrmSF{4B|9sS55^WX@WLpZfR5DsONaXnpkMM~js|ISNO% z?%Q|E;WCRkpZf8AZM?9v_kQ=`-0=;G*=?cqr+{3cJ%zzYLP!V4ncZ_yY**P;h}bxT zN-xM+lw-9}#W~3Y@D9aULEUl=fq}hTC&ZQ^vNQ{ctS}Ve?04Znu~u+N5a37UUUaoc zDkccAR8+Zy-Vbh(+e{uE#gAF6Wz8nHj(QVZn2a8+xKa`8JTMxb0ojNGqjw?6vn0S$ zPhT|Cv@3i>%S$eL_4S$`=+c`$Qj*r z4)Muez@7nS+=VI;5^zEl?x}ZtL$qxgil-cRX+FFqDvXc=qL1JzME z1{A|xK?QjrDe^FP1+b`%;th*JKU3LVyT;$DbEs#~6SeE=*wIJ8*fogiIpC>JPgC+q zZj>klf1X9-K_6jk{G%ln_-NbQ1M#=T5@-<;@%8J79(?Z;7*6rNkv^^8BSpG*~HZ-6yMy? zyyy}_t1ek_aU4ahIR0~xWPSdWT_LQzv`BV7q0$y(eJPuS5X&W%-=b*|BgDCR;Uc*N z{c}YKZJnFr{}l69FRql(bBs(xkic6+a*at@dA4Gf>6KE1k22W1V4ixo4^{;&K`5C7 zrk*)VaPfqvmORiO7m-KUyDH^u#p>sWXJgRS;JL(PIs$^j3pEK}yG-eQRf&=>f{4(9 z6sAL(uopPphOF2riBjRPGX+xp?yLfFHcf=JG;U!hEW71Zk#fL>GeTvi12X-3$N@nh z)JWA-^Zim(4G(ny*3pnJ(x#Y)jhQK|@)M$^%N{n%@XBi#xXZFp6BS{<9?nK=%+H8l zN^HSa?>~A&b4C-t_CtjXfIt!6eC^-rchC7la%aXjdl}0FT3xb`zvgG%K&ov>0`V0jD!;5J%V_D{V%t_=lceJBDH?DmDb1uAo7@)*oU6==XA;>bB+zd20o0 zoQzmyQCBt(K3;GQjSRWbf37WKfZVtiM9tG6YC=xL3)zsba%{-gAR7Wgq->`P+6}p9 zQEps;%OFnYANkS!_x{V7)qUXoZ`+l{&6kOh3R1n`|tr@r!MTzH<9s&MT7^Cc>g2U&-CGyWp+-As$f$ z4113z+kGnRjqy*8$0{#0N>w1FQ-gcgr3U~~uADbUx{&Ia-vVP!f;`mo90(}?Fddf% z6)CtSmTgnO@N5n$063g#Ytv=G;Lk&|;ho}YeaV>$$~kI_!eVt z4F^+pDPERZJW)@vHa&n>)=YkKcdM~!AY}-6obt7lsPY{J+unS~ZON{HMkS<3oS@{) zZ+sb6S-Gb^-EXmOXW>A9^TvFWoU(>HygxHrupgOYE_LO9t34^`AHCOqiwA%u^&`X2 zAwAg1^K6PuNiA?QFCvXyLSg3z1t6m;177;h8I(S6=_P|Z^$_Ur23BPi7t;V$ixb>% z&m=9V?xzP4R|GZa~uOUQ*Jjtglq9q1T} zIg)Orzjy71wLN~cZQx_;qjG~qDbv`rHlLm{J56$1$n1=J9lm6KM^Ev{P(Z3N$fY`` z!RSznbVi-r?A1DBtkavwZvzrB0bAG#Y{8D$&LRXz$x~VxA)i6hQ>F=uGuT?G4zW># zc3txd@vRE^b(rqFaf{@-NCGt?5#gAK0SV$eE#Ye~*0t2Z_o2WZ5-)iE1HiX^AYug( z_w$GyI|)2ni=8@8v6J#i)0|s(Ep+R-j8JvxhPdSGJofd&`~UG+2V+Ti;DU%XGZ=}K z(`MQlWBpMJjXrbq^S8DpcYf|(Y;P@J%zkxrP1R0CLND8TWI1h z*q;fBB^3%MR3bVkS5r7BW|jD7Tz)m|6_H*09WIfpXhNv*yZGu~Fb>Aoj;xzk_m9r6 z`=8D#K-oVTnB&3Zf&iPJGGkC<`)5C?(lYa=uaI4v#okw_X@a0t z@>v|arU0TK)d3Fsv%r@B2arV)S^GTV#=eLc5g!yp!jMcD!3&YXz2kbgO7v`2jxa3C zgeO@>Wt&trq9{W&8Px+7a$3xAYy3Pp*UXs_Ozo`Ar$~c%g4QBo#$7e*JLv+|}=Ol~~?o@Z{ zLGgboD`sqydVz9MP_xtMY=)a3Y)7km)Y4{JO*>{u<+4hmZcj(jHVOHC{M)3&k_?J%Y_83mRF9T7Zl1#*Z~*x2DJ{m+lUpSX)}nPl+qyU(Aumn!|X+P%1ZwF z+xQ*$YcNlu-YP#R)`2v+X&Ui*k=7Y(bylvxb?6^;sBVTyq1RGq3hw+7!;XejD$*U8 zLG@YFube3UG3O_oSk^h@-0H+-PMZ^B2&a><%v}FfR?5iGl0`2^%a->dMM%+S#o(8Y zH?vHcK=je-0!@%#C>|RdALAD4gOI2>1SE8sD_(hi?*H##xU~c5A}r8SwY3rIDA`m+ zU)4JV7pH}MCe*@$6~99r421oP^e5KkkFAe&+&O)0y~5ia>)KID%4r2Hpv<`~2io>L zwKeqTtsU!{4f(F74H27EE)>e8`L-TU->rp#+xy+kfv$ks>@-V_tj^}PxU4Gw+B2J9 zSGtpqmTYqhqMy63zfB}kD(}sZ6<~LA`CFb$OER1v2s)BFFBkO&Y_Z5NOq#>4Uwl-~RimwW*(cc|= z;Y>lnw)(emD=(CcCSA2<%8JSvvq=ZfOdZYa9f?bw&A!S{SLG8!JAo$`0E%+puOdyg zRjhMn#QOqvgzKoYA+~zgr5kSlR27MDq6H)lOp&J1ay-P%J_PrWX2s=dXI!w}T6Z~7 zZLRxAUo@}Bu7O@I*uLAC zEGJX_Ev!V$2#IePU3!_$VU%Y-HhtvGi>ErJdaG8ZcNtov!0i6^@xpCIkBzlaCT@KJ zNJ>bM$C0DR?NhtPhxfwJ5lasv<{4~!%Ih6hzkrP+B7j7XAmd07wPfi7`P6^3cKtCi zQCNEkOtysN(Oj0=SVqjavm}L!xaG5Wk%L4qSopwO_?% zI@K4^nmv>NS1aVs#Oioemc=GFkfn&gNs+dtZyhK3?A`}f%bik_zWMfZC(;{x0t)<= zY*+j4kMEfM1;v+w6gt~oY_jBfXTM&%Lp+S3hCs#^Y4ggoE>mmNXfyIUuzIYtG3YS5 zbTX|&&uQ!Qw~l8>(vS^x-@YzRQjAi;Ye;YvXy_Pn-_*K+>;|YI8=KW0c>%HI5y*Ffd7z`}&MCP-|QAuK(~y{^%Gjtb$r=Lz>at`M$EE zM3x||n9~weT2)hg2{D^^Q(jltQjE<<(9j&oBM_n}$mUmROVZ~{Ih2CRs|(a*V&~Mf zZO?~L6Dk&}9dWLOM=q_rhejC20m^oWR3zxD72iTt1+whTsOs7^bO)p4zKWJPRh6Gs z!fFezKk`-b5OOziavC|dXAqykHcuDQgHiz1$Fp>@BRL3O$JFe`8SF@wI#&Ag$ohAR zCky+bo+glSG&gm)oCNiqZA#ivFvAV*;d1W`YCbEx_W z_m>sW)#_pgV5BN@SS@KA)I`(@Cw6{PykXs<*9^MU?$7O-czitaT_{(z>cvQ#+OAg$ z1(b-O74}HRTDq;^oKUHu$aknb8IP;QtI@f|B&L?jov~H%1@Wvg`epZ?2Fk@R7`j8< zhel)3jSs9FpmaVy7Ar(^+X5I2pfS%%@PkR^$PZ_$I%iu^+1u$p8ndCu6+snw-MY_)C=D zZ?*dj^y(qe97&PkN2Nwr@2&YX5MSw(HzeQHA?@glrf%cwmWNScSmsqWZE4t z9e1>$zp3Tm+0?2+d}OkzW$$Q0=1lvb)QUiA7XT&=A}JKFcAu(Lju1QxDwqNVJdm3M z52-?!DfA)%@t;nDmJvcuV5K2qD}g;je1pIU!W^C9n@8b$)($=seUmNf|Aa{8QcQtM z#d-!Ui}m0G;=g3g)$AI)0x;jzD*PKev}G)Kb;}s!f__nLNzID?pFWxcO9Gc;S(gF7 zdz(2e3GoYl_w27tZDZX%6MZom2y8e;h$O8WkMtayI^NcK$KUTg_{E*EckxZ*v3%5k zq3=aPnXzuCN~aPk9R{sUE0gMViuM!V{MfN?pU(9heP)Y&@15?B(I|*QhU@QPp8;8< z9XV9hiB^u7V3LmoJQPR~Q_XoJJ2SUYz;+=UE7(i&Laq5G0WbZBvg3Xf+KuLn2ppTJN6162l_WXyeT4)Neo_Kr-tiq3yuMaOd`jT!_$F) z#vO&?$WWUPg7@iRjV7~m27qlES=E`L4jtS~koy2cp59hmm$O2F74wl+Fz2jzRUuV! z7v!ZQ056{fa2Z0MF0MR;Y$OETmy;aw0f9QhG4u_KVhtEtAoy@n(p8h^GMHO%EV%<< z?s8$3o2g=Mf)P?4e@Gi{cS)b*I4ROC4q+oaN>Vj`O zkJ0_B5{7spZEFS?eRXIlj82>p2t-0$>1p>btBUf*hc|@@p;*j_#UNU*4Cr}niC!R2 zAWGyJ9C7e zX@Xz)Nmk(zmt!-0>n&H~@~T~+2y_{RX+E?GIF_{rzUi%=qBX)9g}+)%rM5T4fTG^- z@N=3?hx8Hx6`A6`diAya?qta2V)c^a+8Q$9Kr z${85oixQbosduPb6X@#;6p0xIH$B9uT_m9wseMPykhaq@$khNtdPD$R8dtDw{I2^|)%$q43i?0< z-H%L6CzI((HH@1Zt=3)|3V44w4c;!4^SzTQgB(SNZ0*fAIAk)Vmue%$*{5QO*Iw@C|c|aEg$U? zH{fc7Y^msWBOJkBAt^Tw+iA z{VfiO#L?pSr|lBe3OS{8WKbLSS!p4O(Q<)IW7agq(DzuD_@O6hlSphyk2_(PO6`gF zQ!B}fPdXxsmTFzbn3oWcAOt$xyu<|koWxEd_e~ECg^xoyO^3!pw*v6+`~@lmUK#F0 z-jkcdFkgs!J2J-$RjEypWbYB^NjvhJ3%9_FzFgRzfoY?+kSx^%T2C90{2Hr?wMMY~ zE9X1PMohS(j8_mhu|yhKtOG&6#E}m8(+;WBk@g4D4xodo7gVM{z~)!8Rg*V3)ESsv zfQmFx@0^M(>gE{wKI^?<6)j`lQt6_+vxElu9Fzb#=m2uZ^r}_S&I05s*B}~fDBziTW(p?0=-}&Us%K8!7YV>QfDDh61W*_iRnXfwWYRd zPHKpDT&$Z-T&+jC`OQi^kEL0HCcB9oh2IWy2}%jocDTZ2lPDm66{Q*4lHgb};T%hb z(<0g(3TozOs>EVw-_aVVW-9U^j8ND9umo?#&{BZq7ZPM2K4n$2)SbY`K8-y69C8}F zaQ>6y<8AxeVLU1XgIW()b+z5v_QWNixd#v$W-RTU+BWbiP(WQ-1{o<89-zrb`tF8@ z!dj*e=l2zM!HZjR?a=~MATv_vDK(Wm1w|c7)Sa%Ys|qLP(iKqO{Die!Guln(q@`VP zE*&ng7tx(iqyD5rC8)esb4H+1%EN2)hEtMfen#T+V1`$?k$&Pm)hRt685!cgU)iyifPiz3#X&&lM{pY`sVc$~v;f~ZhTN7F zkB-KoHXF^$WNS3qz8{vm=Z=lX;Ei0i{V*q_>7DuU!X|i8%Eesm9L~-anoA85oAZsj z*m#@`gr`t`C;{HI3kX{3(2EjN?!She0|?$&+5Qy`d5N->)6VZeT?rYa@w3??LnL%5PVk`pKE|h zE<=qdYhB+fO1Y6;tI7ETNq+Pn7O`oiJBGdeE~yBIJtF!a^3< z|6|C)7@6ejzKa9{8{<|90 zYfx|tHYXbOv9i}>p{{waY0!1Pyy@Z1VTn|#_gTy?6$pd^xu-p_c6k#(aKu!30)H0& z3DSuSAmb=f-7_{QiwW@-S20<9v54D>Xz|xSU{F1SGCvqat)r-Z6dnD;%NkUtK@p8y zgUK|Stu6Rp+6w`DsOR|}4Cz68UT!JMHlcES(@Rev{lY`G>8*e?Qp z9eb5~8AAie#ekwDpjniQJKw}$#WEgt1io^A1{+6rA$Mw?17 zxj8csQ{!7>5q*ba;Lg#YMk;Y@EgnpSdwRFD_Z(f9aA!9(Ioi_+oo+DNw$1C>-Zya9 z+At*!RsK>KG6XZWRfGCKdUjnnhLI|l-6~I{bZnGcfdiKC68;*}0dT?ixrB9S2HP@? zNTo;)E+a{N7G$KQFVQ(6 zRl!47YmBZUnQZJ$@+{RI+ca-i3Z|?3=;Ecd?mPeK*0w#PO$xYdBVdF~s4&^vx~AFW z>F!?bt*y5C^0|VaHu!8dzn)sO-WELgg{>i4sg}rfHjS0l2$edeHZh!8=S3UEqjL(GG{Y)QqJ7~OT9uKL=wop+Ad=nps7Sa4e-L0 zHDGAmbzRPiv*DsU$B}T{u@BrvYrcOP9&B(!AD?$R#3E{^-W*hAf$&#q@k`~N$HLU! zt6a?r^jTvcP>m78kbT$RLq9|HH4nP zzgOF_vvr!{p1b^xokdTYd%(%sRaNtsfaX)kw%Y9DVBSuI0Y9~f3%f8C4r{rpN}5Hq zPMY-fnRAM|F1bpH+g=;x8b4t8qnG+r;~xAKH}CNE4SuOw-++qswS`lG1&JdL3KAOw z3X=EH2q;LF>d%-{`O-y1O|D@68l{Brp3hS996uK{3f0WD z367bjYIm1Z{0b_V8(4J=0x^hLL&)D(cbfxP4xpgbn&69?PA}}_OCa(FuR7OXY&^@Z z4Aj$I?97E*`obkcqu}Qb&&Y85U*WNKz<58(cYT2GF6YdeYAbFH&x;u|+(2ZZ6|N>v zcylvHv;r|HoXzgY=DLehM!(G-&{4%FhSImJPAGik{(kR<6aDtOI59~%D=Q5|Ytn>#ra9RsNw4a!{>)1Drv~--fc-Qf7+}=`|)g(r{T0qj& z#@BRa)-)T@w+}!6P_gIknL7@D>%n68-81+092klC2M^@Id)Pm40QSpPo+1!1$HsnH zM>AFHmp#2wzigj;@T2z2e!y2W_RB7z{^j~*0a)1L?e1u|*X_XI=hkJt{R5*>ZY};j zg}*mph{Jx_t*Q9xkOsYa?AvDwGF!w}*<3%(=Z%_C?D2K@)#Wqij%D_Y#AR?*_VwPr z#PCkuW3WqHANg%nk6~qS>4-s$GRj6pDO`e6uqTFwr<9?iSt=VWvNDamPgCNLl;BK$ z9QN8=uKEh~fgfMc8nTpgaMBTAmk5!B5~&SVr6v#tF0#NiT%9c~W{K6Vr%3|C``r;E z1?vxYduaB`V&^!J=w@ZOP)soz{;a&Wv3J1S`;fQh&SZ(y&tzGZ1=*8`ppiGzAc;_K zDbiUO0n+v~hI1#W$lB3%c=D5pJ5A+Hc#=vHl>HL4glJJYp?#-j5%3H({y3E}ZhO}F zSWpg^TE^xb)snUU=RWKvPH@5A$?Se~G_k1=(=vpJ5!1o!+Gb~ySMBL250^VUi7gMV z2@GaKDiML>LPjKJGyO4V!Y*e!2ZskcS=3rOGT@cz^qO$U;?f8WR-@GDGg^aovon<4 z*p=N^3NT8IOs28vOir~>qt{4{F10OSH#-1iaQK z-(>d|*T0i1pO(SSZA0N<$OniyK7b<>lXH9M;Aur1S9awx@aJe0Zn)vQE8l?8p}O@$ zuzd3*wosy)6*bV%BD)M3Kz;!eh-A(Pq+ar_JxUEp%f!1?-i*i9>em{~Vq8Q+HIy3F zu(A@H*4~;{J7ora)1J@o-0}F<=u&im*{P98>x4k9xpGxlt6`h%;TB`czl8oeR@Q#+ zo`Q!Y)V*x>wvk9j`4sP^7ZnOgn;&t(Rk?FINw^l4!;n^U z6)ck44GGk*t`LWfvsoeU9UwBl%q6wtY~pf4Q9G?Nd=uZnXZgjQvn#0&dz4nXs$9-K zz7uxuLKX(L%|fx$0`f#Cs#+lQ75EcQR-~Ogj!Yn1&X06=Cnl1dcMO=>1d?!q*Z-yM zOW@n6?)>My(Tp@Rx<<$77~S_R`H*b+h-Ev8oy2w=J0Z^DK*&KL39;i40%^Ds{!q#h zV1ZJO@NY|(qd;OiL4g#qOQ74*^nwrFwtv`mxBXLS!?vZAEyVJ^Hs)3%3o$J=oHGv*Hv4PGyG_hly-LWoPr-1kT>5fud3`fRrObk)# zR4E>Ix|H#P_r7LEvPv%TdBVh>oM>Do7g^`;-rX1I2;zJd1w+fdVZj!%s4CK5q{w?= zdj)~cPfhaVbk3g?mr^n3$q=jzLVg0diOES!U)g-B2L`}n$c&Ohzm#z=(Os58JlaaM zj3!&PI6@h9IHDSyc0s12t)1E^W~|0^rvcG>%-_vOvEP={XK)S2Vu~s*DTp@}D=Ytf zcH;du;+~2O?x-LTtMi?gKt9je$L1zO+<1O66uC?x`!#~%K{Q(&j~$Kn3}8+ZRBb97 zdJ*a9a-yfRwX?IcaX^jdKAP^-VHxswubmFRN=$aTS_-_L!ZGiu1A-7pw=JWH zE$c4dd->iiIGs1g-rRA0=dRB7pmiN4{FcrRO!$FrvN2_rdTM&ydP|5@@aryR&RbchLxmK0c`9F0{ z!QB{k{GCA!Lw;H=aixgFgeejEjFL0G=+nC+MNFYzkT7Muh$&Q68B^#dkd$3UTr%pM zw^Z6>x~%##^<{&Dm#Imy2eU<672aza^^5&@t4Y1FyR&OvXH{o?eRL@y;>GuO%CNW@ zOV&+M*rctdVj_yP-qJ!RrM;%lzg(XIufR7e5qoMcv3dz| znx#Di^=J6$xlVMoprBYN1rXq}iU^`G`4KsWW*wWVH>h z)N5#73y6D5_M^?NOT=Q@NFKK5IJ>r9t(B1t4cewWjOJ81Qro(yW|20g z{C3(EsGwBETk17h@gAjhH&<27^J-=HLU@ZzG$o;fVJ7~4nyz*?$Ks4^7L!$}#AL-p*02-XxYC`B z(O{WzA?~>iQ8`K6Sfug^!UJz1%!Cg{a!kUEUWAEL=HdFB)MSMexFq5Aa%>(R&8dkT z7hdG$c>5xLI+RwyU6`z9?VI?RjYQvaNm6m{-^21GlmahgiJo=(idqO2l@M-J09!*U zUgy-v9(`1%w%0~twPwJ4_!|aTvsJOQLrooem{eNh!C2Y^xVLH%>ro`5VnK7^C050d zlv?1yv*1CUMNN~miY>eWA_^t4VbvBxVXIWhvv3bNj%ec}Hk2w;1BOu(RxuCT#(h@S zYRs%9ITyH(-SxD!4p@``Lk%|ZnB6Vb^1Uc2m-YgbrZ0M zDH!B%|2C$#JCrg*`IRaoRXD9OXOq!tn_B);@+7T~)kU+SqHxM0$hBr3L}`nfto8bh z3Q}n?O*{$LTX}^-@LSN#77zq{k3304iAPFNcUI#ET8#$qG_rz^A9EVe3)fNj&M4#O z%T5(9nJJb|%~Zn)`)H0@QjDG<7d$$nxB`Keecd2h>Zy5`PU6j7kmGN5)H^d zdk?FMiQ}T3WHd_9Q#hqJQAn3Sx7whR%M=JZ$H7I6mZLARhs}&HoY2`t8xLivG_D&% zYi-6VgVScyPE05SeAXxfPryx-gNP&lb>67l*pLTLj}tgoBoCg-=`}8wXuLIo7;HE${?sczfPV_XmLrZrrO|#~R zE7Tl!vN}QAzBSi*NvrdPw5Q6c3VB>r7WgZ*N~sBWLMG(fS1pViMWfTm>Wq9UVzM{{ zOS-TARt0a-*&Q|;n%!zNyXR>o5g_V_+ELb-dLA6c0cUVL#}j%dtByVG8MbU!kJKEd zc9k5<;!e|<@|>m)!pv5CMZT4n!so_fbQC`C&aG{9)TUyBFNHfxu|mLR_X~>1ikjBd zS<9=8$#1jO_#A16=r?oZ2c08*F;4KB(+K1hoSf9sEKQQ=ufp42cdBpeLPsWQaYgU* z`7DuI#L@NeJJ3qmh!l|?H52|kIF?hh#)p4|>vn!qGfX~;biqlyuN(_KF5e~XE6=Fd zDODq#T8xT-8eLowX;RQiQVP#N_O`p?y+iH#NYF2E#WG6@m8;5LKd-6Dn+YfxMh3{V z&cbs>+gESz-Q68Ps#?SGCY{=5*3ed6cW>_!lUrqS;T_Nhv=$H09735iQ9a67(s(Th zA`ZrLTHd+c!jPeF3&YhoiKnWTZEHen5_P+QQC?gMd10|HV2xx%M_okp zM9Xuz1xbycND!?@63B)%-yJ8aIM=Rt#ho#veHp=^a_Tc?NM7=|D#?BPezb2|rO{arT1yzK(au+kxSw`R$NXaSM zt>aA^dW~A6Rt++O-){Gtm^L-KZMD=ctFu&FSsGF&4Hgx}Ds*;J(h&)}3zx__9B<*| z;C^>F;z(rr>+Ny{%Zs>vuNnLqUO`!jCZda2L+okgh-Kg_M2OIVg9!5%fIEmPq8S{_ zvGQn@TwWC={oQD(MhS5_MmLvVc3*Z!XxTl+&O0>;Ihi@EJgJ0Ar7L%5W_aa6*Y2qm zL#EJ~x6Wi+O0`3aZi!Zkuu6r1J~a;}b50rz`XfI#4Th#mckl`|*Tga2+}e6mM6F=$ z)|=8x)7DVW6+K;B7r5d<(e8Ix9R9Yob@m$J1gCx{7BV^vY%FMW8dzt*d$EH z8Y$+$PUGo}5@dl-PQ-wZAX&BB5Ql`K*0dApbGt1du+wmwZ0xHM;D1bUN3uTS^AsL0 z-&X>u$K#u~B8%mADdPAj!XTW5;W~lry#GW)C&1xnS?7ORv_sO6-#VuZ;?&^kDpM>R zjIqeV2-4Bj$fy#nxp=arH9kEmXi%`ohLf|3=hER&O>H<-rr({2ezim+;@B$iB(6`2 zs3;I#@Ep>_EchKZLUDG(kZn6(reukb#X6-Gs)?L~ignXj4syL9z&jb_k~tBKQ~!51 zDQ}@T!DqLJ%&by>%{Ns{dGtyJuoQkf2R&;Pt|H9gB_ZH4tO~p9@8=6xwZv??1 ztKBY*8zko>C9usN$28TZzpF$o8IQ1bA(1~K>i zTs;&XF;&p3RMawgrkIZK3G6rf9xrim2V5xU5y*JP7_ecM42CS#&hqm6!`A94{`4kO zp49qu=LKdspD@|!$ze%5geyYZ0!2x4AYir?XOi@$He{Omt4%XzP=`kR z*f}GX#tMln(u*h1SoKIVdQK1qa18I}BO8TbEr&G2-ftAGtn(EbmCx9QN2Y0P`Bgpr zSN8bAeS7|BYlZb*U+McJD0^mT|2tGjWLTU+7t*h zMoe8;;#Y$Y;PYri*aEB@b*5NMK%-;<8VDnsN;+i}HUh;V?G=?qplNO+a0VMs(ePX& zu;s#LOC*4Nk`iAi^iEUCo7~u(TT^e9j6gBtaikHYu@SgvWLb>it^6PHRu)cTD| z7$C|fQpsi8IF{GZ~=~ z{Q@JCD|9Y2$|l4M6dGjns{jx={RhsnLkj+BK{Ym|0}-mIoHyxMFtS&gP}YbMqf zHz_z3XLIUJDw$HNv>D7+qY!G9EU6RV73vUCO~l3suQP~OM61_u&K~^}V5%^xidH#(23}F9%$|V$l8bU`m0I0`1r_7Zuo~U&w7M-$ zijpJPwA(!@Moz8Ya2DH3yAUJEWazJ5*qZv4->0NB7Kw*Wz(*(pvR~sz7;n)Ow_{Y{ zy$l&1G7Y&#rws9_!b@|Q;T2&(gna{WFG0{}@M7DxlNZ~zZQHi(7d!vhwr%6Zw(aEJ zclUjVs=K6v+D8}=f3!L4L%Gx9D%ic+Vc^?BFkknS5v|Bj@{aOV%U3@ zCOhq0)E5a>A(+(rgmIE8Az!zOZ4;DggpABiV1pJMRWW|Q+_Pgv7va&~BC_{o1j9i+ zrw`dOE|HVaKK}quU{z~U4msFLUb+Xbf;FO#(@4@}4Dixp{GFh*L>|rBr8yk6Xu2AT zGS|JPCdyzLs`s33hwBgv7TVslvFK-`WMDT5OPU5(f`yADIf6o+G?!07&ml2m_ZI7a zkgwUN^X%14n)8;J$%Or-T!>5$Q3tcS03ViAn8ZVwR$r38g~h_`fEe_Yz(h!Q!pi6X z69SsRoMJ(kPGhmFQV!jlTM{wOB8ybqAb~U<+tKJlF@Iv}(YO>`_w#EHxOXs-ql%)0 z#Bn#rF;M#7oW3jCB_|CA!IN`>Z%F=(D^iF-p21mk6YdYdTF{fT10D8Hgc0?cm=kAL zV3-ikZfvGzn-N^($Hp%dkfNL3Snv^zJ{+b-6MX_bk5YcCUrWHR-te4mZQ~EjvvaVS zDMG`*PvTW#^ua77{;!DB3a15`A2izpz&C>%bFf2rXe{9IL0_#XHe zBvCwt4G_zrX9_IcR4Wn7S~H7@i_ab^&=rS+l%)vpq#B(M+JX|q;*z5f3?IiaV?M^- z_@F_Y@#ZHx9XHFiczF(FwtV3a4M5*OY*UJV>loP*eJWpxCs0hOhVpU&i)g`tA-2*3 zD|TuUiDcGF?-PH42-T8h-6M4QT4dV!y|8Hy^fw@ELjPjRh$wE zbVkn4G22W+2k4-Bk&L-ye=Hk+^W?XCtii{#Wqgo3FRGvf{5fl{V$ljMk|&6JpLQr~KDddGFCnkvjY zrRY7~EjVNi=K44pMk>2xNOezg6AZS=cVm`f8<(Ki-+-a0CIJe*}aVb2Dwj z6BjM`tX&xDO6n%@n3&W?Qscy|FJl%IQD9N!l^}vx;mQ6^mED@b-4p7mU(C<_@p@X* z^1q8|=2wWW{p+>H{>Jx;4oP&*41iq|ZOW!koy#MBZPiL+m~E~(oyEhQYKEU9HnR| zTeLmAX)AiKgLd=Xqt` zi;Mq$G^^M9M`=v5T}$9VGkydheBg+}Hzh~{;39eA!A=cx2#Oz#QETBK-0c01MGlpg zjkUV?-Qhx)JwPG|;kxp;09%eE+~S~<|Dmo|7eq|w*vo8kG)A|P@A{g??`;dCb_mT< zq`W)F;13MnU4*=3!3cd`8tTWAdzH*}i?LQ2!FP=TMTd%LT7~F&5y7V|j^rC*lE_o5 zo=_T+$Xo`xZJpO!hqk9rYyhLl%D9cqj0vb_63I;%Bw(L3dEry>ct4#V*b~+Q-pkD{ zfXLq&yyH^UHkRv*w3f`rMGv~3LGIB7Cw;HMl2=lXUAVY`5f&hFn4j;@Hf#@v( zwkxm{K3-Oi7@jMd;}o`N+&V(gr5AteqUggqQjnBMJrQS*p^Rp7AtoQXz6@t!bl?x> z>6{y(C7&)GoF{!?#`6fM=qgap5GJo0Qn_5CQcSxJQzD&>F|1zYDHC-7)NXjS>fzPV zmHw@fr*zvK*F(m9gxD(cTbxaEm1jr%AQF}Qd)?A#fvrj-`kLUCco_`8IzjmI6@*^T zcdGxk_Rc>i@H4H~xAM2T8{YiHXa|VnB5c|d>wW(ZQ*KRmn;rC*U2&-e&W(IBigDNeWzMO z;qvjA_tBu7G-FoiUnW)H+5I7+*CQ!ux495h1bY^I*mYjAT5m{LvLH(RdsYw9NVWWN zlq~V5JaM{VC$7g5?Hm@OO7g z{*liA5iYlZ2r5H0I)nZQiBMap;_!!K`tCAB-r)>O%Xs;RuajfqA~?1nY}uA6MQ51! z%-}IL*c(IF<946=;*&=0pU&Mw>zRrp)isQJ@1n|+eNZZ-Nkh9*)eAmFb~Hu|)}6bj zZ$;GNYM^wTKhC+*SCsPg-GE6x47M1ljHDZl#C|oCTka%Y+CV?Qf@@A}V;+P6gk*Z+ zUkd6h0@v`YREaY<`{L^`b^K!YVa0v&C?m6;_N@DWZ0`A+Pvl8nnX?QiNDmH_Nn$?X zj3Rd}NngLG;?Ibd5rdHAMo~BI6&twNr2Utrm;Fk=SLW zJ$O!@hG;8awCbYb%+i3Auw`na*IDYk)gN7X6-(1dr|oS7k+$XOSvhZ~Zhht0I$R8} zrO(pnCd9rfLq`+pJG;d?YB>i(l2@MEzwbO830OQt_Xysxj(oZ~v)k-qsSIU@6SbzjdqCbil&YvS#{hN2(uA z%Dv1CrAiT+UZA!<`S*PsW5-i*qT_a=AFvCWs=m-TN4mU~+00H`=V|k*AVg^N)E=Vn zq98sUChgvmnqUGZlw@oX20Ao68cF?XZ!pHG2tdIohU(VQOcFDlUhWSL$+B1RMp zY@+B73mP#gN~c952rCashDkMIFC~!lNeVmunSD{WtYKS}se2l^<1Kg&B;%$W?r5D`0O>5(h*sT6^vmIFAF4z{cy0oYZqg z63w$D1pldVZZLW-*tHfa3ewd*ev;9L@V4yjqd%C(cqx|hO4Tnrh>fXS&?h=u5!^@E zEVh}#iW~5b6x;4VMeBp6!-m2`8<{|Z^?1H}gNgRffo=pc2m)xN!b7EjyLU^j_@;b92;`2Z8A;rBlWfIlgBbLrX*ziaRs z(Xz%2Xbz?yG_j80kSv7N#1Prdyw&Qgs3_GP?Ja5DNHL)r2ZklWxmjt;yVFz72Kk~_ z2Nv0xJRWd7%Q0b6iD)&5tcf!nRFnh!vs->|i)$bPZ%C_+#2K@K4+q=()xZ#=)xBXDLA$n~)9Lro?5kfP%zTyW zlcp14#wDN=$h0@_%($n|M9zcYB0|z1@SdI23%FAtu{p%?ghlGeMmHR~X{N6m1Y?x3 zLBA(tU512im7>$e4tCDO-h~+)8hBIpeHaM)hYNTID{8fDQ}-5R(*Rhvny$%i)f~o< z9_1+TL)Y`%tHDfH(?y2RkV$<@-VKs^7id6&j zYhSpO`g*PJI!EZ?**=!$M3i|&56WEdx&efD|9L?5FZNjasp?xRNiNK9I*}UpjK?RU z#~?Eii?YO)Y-aLwx$sCRiu^|S_LYH*D(BgCuA;pb8#~?}LjRZ{b=8bPOwj=5!=p*y z5AWZv6LjV^m-lpBF)a`_R#^O<&D#{vNU@&O3`6|+)7CpWC>a>)_h$uB#P3cNc@7zLkBnlL=za>2YL^nONI6-&rmX6FUblU3?wp ze*XOoVj~CZI4OA^Hwb*zao4tr+4l?$1pUubNq5h&!T}gbevCF z5~x3#{^T(t5>0W7y|O;8K;4M1k3gr!TfO^w*6l;O_ET8m9N@}b#qRJrJd9VKZ`gfa z$LjmtQ6DaKXsxicR9m-nMyum=odx#tvU1!G`Qv$=0?fhH`S11yE3!sfDH;alFGW^8 zzGg-z_2^8+-bdWB9R@p{Vj@reWW&8s+b3QwF0z{UL`Yv8Ib^w_w>RBf57AfaKW4H- zIVL3mi|Cg7R~#-9Lr;^W8rVLBTiQP47TMBsj;xwTL!T%~?mZfZHl9OvcPZ`dPGUG& zlaVaxx>@^gtzayt8`14e325s?a;FM$R_3vL90kd~*j6>rwmW|K?lSyE+_FL-5ne)l z=t%afy^Dttr@wq^$1z3^pB;~stljMpcS_spsb49o>ICcWT4r zzDd&|Vh#P)1XF42M0DEp0S8_0WjGWparVfK78bB|$!1BG=k}&5jm`eDQYlHjtbK3E zpY%o*9D+72TJ=}AMaq?|N+Xa~i4JEvSl0NO&rP&^@puOY{dX_YTl2G(K2X|~Ny+E> z*WX%M7K2_j^C7>h)_$K_C~Bw|86N3ByxK!( zUc^#eI(!Itc|4vnpq-H%j64uGL=oew&a^!g0b!&%vjA~gBJ_Cg=%Z{MQrbypE7{(i znB4c`E*<{k9Nc|0Dh73{h0r^20#&goCQLMEu1B;O7)IIvaG<-<=(pY*=V(^mxzZWc zPysby)8F^8=X-z;dmk_7HnZ1LICj29thcSId92D87PnDXAty)MI;!ir$-Qa>9~Y0U zObULVTWbR8)zQ`MmSX0L$!i~@*HnNuAoaW^cJDZ}H}&qHnTd13s@DPPW!4U82FU~$U_Lat0<0cEa5U6s7~#ZoJW z!ATehzz|_yv$WH(17yhjs)`TFchKyuW5fwPF0J3xu1e73j}l~n4i6fL0*wQtz`$<2 z{vc6+(l4L`#^2~sf0QSU1kk}`Kn)rKCXb|j(m{prd(^|uP8en6d$dg?;qPEE`UP}H zE}h>_7P2cs_KzCiDoY}e>GnNYxI#~nz2yqH3OV*^10-wmm^fG{;okUPhdne`;8J^R zmQPg48;P-dt!|`mM{c_$4l==@&;UvbW2g;H`d!@NDUt0LsW~r!0~FLKy~>u-UNUkh z_RrqaNx$3QTPVT_C`H*EW-+Ry1*FehVhcg*V1QdYx=bnsRq_4oQ{z10dxHB8-49+N zmfDu1nnJ8Z{sEBErx$xfJx={O(=3Q|@=-)Hlt3h1FsfF8f+AZEDd#TEz8&bn&nYj6 znxr{aDZT?ZT95ra)yvdARWQ^LPa0-{Z=FDINYGz(bmycP;?%i+b!RPlWeHm&4!YsC zB2|w+J9IUc2#LChY{e=T5$ie$_Zd3EX!-V;M?UERR zdxDGs0$PCE(Z`kp)PUuJ#NitSp%wKOiqcS7&zuv8LdN+|R89%Nn3!Cb4uYtfgeAy@tCv|6}HmA~$$X(bJ= zG3f(@GaN*q3|qE05~%cpqR1QWlwk2e?3DV!9Q}qiyyQKN#Or~*27JTu7D&q<_p9&m zotYmNQ#_rvh80spEub*?VEN^}b9xQ@$#ogTZHw!{<(*H-b;+ktmy~~eh0^*smgM&# zc$T`Or_#T({f{HN_O78M##L(E6_2o1 z(E(Y7IHQ3{9D#>=kmC!G?(CE@7f3QD@O(Mtorhcrmwz|dzx1D%DmEr1s>g61N}x4 zoo1@(+QiB1qZ1N&zouT^l|2|AY1rRfdr8Hu_~y`B-tq- zV31z}NHxD8|LG-febrX2hUuiwhKG-yUAXSK!F z^fj@~SRbGdp^9=B#NN8oDeP77u44Bp&{UQ4)SIT0Fq!KzqOtvAv8g?%s+CsP8oG)( z>iXxZ=Sx*0C3L#=1zoOXp*N?rTAJ5R?DqDL7O>e;=EphPI^Bu{)QxBy%}#&~GgrrZ zhcSJlIxL=0F^JL?uBz-#FI&qGcodquKkN(Lu6`9kUl?wtD?2_+FGanI91Bxx(0B#h z!?h^S74~hLt59hl_v>BxsS*R1y$-c>;X)`&YL4?5@gu~&;$qbhaQHPF-v4+>9 zQml41T%RLvS)+@MZ-=rvj9;moihmWj^#C@TiD$uWvOrk& zQJXX)Y?1uO;bP~@F-T2cjXb#tx9bc&!rt@T_POzre`dAayKZKS0B4Ml-od^*?NWGvbJK~%h>$n+jei= zcD1%zTiviR)qSN#L*J}P-6bduT8V&p$D~&xZ#dueTBdQuj;F}*G>sO*ls!_v!KU}J zl{Q1^MYgzm?1Cyx$-Kd`I7SE1rCq^vCl|j%+em3pE9mb#_Q_M2VC9r87A{GGW1^XA zXzD~|p$LqU59gyS-AmL|7x-aOOamyk(`=M7{qdU@)P3?#PH@`O*B<*uKVm4ex8=Ih z$r=~Tn_@jjn8S%v>Mbo~R3}p&FNUr$S?gFJPah0RitB)j$v#kn@F-_u7(Hyzas_aT zzVS73=d(LPC$HunuxL!LXHP7XZ6DF$4Nv>|gnin!fZF^OD6#PMF4bJEpV^t;y*a&Q zmuOPhiOQ^uXelX4SHYy#kRHafElRIPBY1Gi4)uW3Yx%}waBfYz^D?!CAZXz+G^G29 zpsLTAdAuM8Z#ihR0^K!Kz`aEA8iaxlLxxK_hRKCp7J9+gK=0hqoif&PLi?yA) zd;aj{9z92}l(my1d807(EXS--n1XI*mTEU9Tjqw*S*=^imeta>|G03of;e93oECPF zhpC9qO#JVxF`kW<3dB#rni0*EC&!~m1l$%8`XK0e)^YUSb8iVY^}uC>-bR&E#PTg% zv!JN|S-yvo(L^ORW%<)t=5km&#$S7%ho)H!S$<5E6i2~Yl&_3Eb+JKc;b zyeppGK`RkdbeXld&(8^NVbi_XH`p5CXbx$X$cTI`&ihXLj?oI{qNYIs5GAOAIov+L zO5YxD2V!?ujQf-Md{~%Xp=S*3K`X;P`m(NV6$+m$(Rprn%)WZ{ryh4F`w!3?0|!B( zpNsdTy=7(pK!tM#!uKW|it}2I&ntFkbM_`C7&oNvf}-8(T1`gu+A0e>Dkf%C)YR2~ zH$EP2vPBUr%(pJ49!{#Vn9>Y7_dk4n2ng#A`GlrXN09I)B}LiE;_PHc1*DA~*I}nnA#JK}(HRf7-*4j8*K;ITKJX#zyA% zA@xJlTQwT|(3smWoHwx$?tVJZ&|md)>Ki*>=Gj}qS}dn=}dXK37Kf7Ej;SzTk8g|BK4D zMzUk)G7f_Hnf&Y@hARQJv%3moi3l6DBdyWkX0MrbJ+(t^_OoH#=9<{B)jy*f>!YkI zQOaCKid{XS-p#dC*X1;Q%$icH8=RPZv>fWNK^6L8UCh3;`|#_sVxn1lMNMj^>hJbx ziZz^7O8>;kFDE*9Y0X>)4aBmYC8geSKJ{|;@*^$4BJwTf6nOV^`4iZ~S2IpO4F2r^ z)8hcGmtZzQ9ed2C{p@`Il@g#OR=D5 z=(-zrtJNB4f|VNItgIiYp?xm^Khg)C-VEBjVimkVT6TXaJY{8FfpK+jqt98fAoo_* z5#Hw4%vCN=bRz^zm?_`}q0elRF~%P4)g9bXLlT4d3Jj8im)0@L|9xK=1uV)}N(_Wv zc4GO`93M~g6$Y@|Dk}166eWJ`Z*(I&i=AxnrPW9B2C`8-b+o%;oI39t!3D8KQw$ef z{8>JP=jr1SUK;A>@t!bII>|B1wP!%NuI zYQLSe*}Dv(v-I!ti3^9 zk!l}#DB&K5yQ4^M0b2YuW^k2y^qIZWY=fi5TxB;To|;$ZA#$ePsH1wf(zul#)MD{3Ju;YV7xdCpAUsCOFr2vb+S}K--O z26+`HxUJtqN+M2;I(7VAaX*jz%x28I)e@CXbEh3a+xx3SFoZg#p>Mwxd}l)#iW{1S zcarm%rNLu}GY@YGM_GV3l4MQBi~?OEBr^-G)+>xpvflAHhVk=n;cDn;xaX@v3E#c& z(8Tw`!golPa}v?ZtnB$QreWva!n{joFMuNd3m*WGXH?Qx0rJvwO@IWyL<}thbM3=L zM@Y1-l@3jbY}AWvt(8!D=TNt z=F4-K@nP!;Emuh8VrWB#{Rg8&ZR3KOj1?E%s=|V?Cx=pipdxK{HuI4?qs&Mtk6#HT zT@f{`Qp$uMdT9k6^?ube2tVumbyZ~rx0hM-5JhLeilNJV!3lR#*MlHl{_UQXt=G6F zt$i`We)aHUsBK;~Who&_`HkBA4Rp%Az}BqCyEd7oMYY|st7s@SQeDeg(Y)Y2oHUdz z`iVG)7uh?XTp{PVd$oV(lXUoO|mG%P=br9XfNaKZah$J$4|lNdy9G%b@Q{Hs3KR} zL0(O@|6^ojuaoVU{P#<4@ONr;^N0Oot6n54vOAH&g>TV*i25DS1MR#bYv!`ZiLU~^ zx;H+{!gMN^cVo&|P?V2#3&9BuIr?3v6x-!h4V6s<>ibsdg zskQ&|N=H3z<*lko&pLAQ8cJaiAy;LqueeZ=__eL3uvev{wmQ;NvqV|+crxeCrZV#d zZ=2=VsiDT-<*PfTiue_^I3eZiYc@QNoH@O37ycL0KWMD4#2XH~{JATGONWXB*RC7C z`?f5-+tT+$n$mm9rY&zJz)<#@`u zNmIM_!{j|%NnbEDE%~}f%IH?*)e-OnL{nXT2X3I=;9o0ZjNtJ~+$rX?D;Bw+)t81Q zO?KJqXcj9{5%~Ty+>X?(Ns??a1^%pUm5dBd&2g2T>51W1rW83*mbJ)4FlVl>;#Mpw z{ZN)fWtvr_ZHSN{kBfj(S`_P$q?QDc_j~+zwocjyxO}c(y-;#1*-DR_L;K<)^2I5x z&hcmAw(_whw6aaLfxj2)mvq!?TI;`%g4#9Ku6TSMo?Wab1gChoi$lj!-jU%Pau!zz zfevuvz<0=~`pc-f496^RvCy1$*Fzh>X)oKYzBZq@@Jid?)EXtLJB1G((ljVoxXc)C zT<+iC)z&JTAvW!v-p$i&;pTr0PX5r=|pQNId?qXt4xy6FlZ zx%MuWrZ8@rGcW)}y%{v_(@&UH;Hz}D%dAN5qJ-iF3s$h10v@5ghR(qon3r0qCg&`; z&d$*^8fqt8>u5Ca%UK2xXR}YIDr$fcqX5N>*vvqS6T4^?JV1u}m`bB{(S-!QZ|5kKh8&sgi>D9$ zUz*OD0841>z^Pn<;Dnl@{Z{>baQ!i0sIRO5*5{S3e^&dNtX++4(K7dsa}9E?8sQbq zWi&(m$Wg1+lh;+X(##cQWh+zbxr|l$Fpqln;MCNUat`sKUW*KJULG7#I-D#cj?nJ$ zu{=A@#3oIl?G@+~yt}cB>4)mmGs*iVvb!QCg8Sq($z0V}L zt%Jq;_iQ#6>sEg%*yejhi4f{tlPZTkUtarLK1lh}#DpJuq3(O%7-oLBw3Ay26S!aK zMbt{>rzW0egl-xN$(x_Cj?;RLp4G^fv17TYmbt0|pi-ZBMbPE5~ap9Rpn zsS^%&he?~I{8Px+Wb=Y@(<$DVM77bkye*Z~h`T8h&MP$^?MVC?wJ_5J1DeNF#UU{&b=!KIdZlFxMh@$Zk$BNoXbuGP^ zWRHO#wk#XUMBWFF#@@-aCDFguzGDuW4w{?+0`#+Fr`((nPQB;Xx0K!BH!t+$-_B`( zE*f7z4?onOpLBnM;vf?P{Y+>&tap$Fn&|SebWwc)FRR=rdj?0ke3G;h1FVG?FzPLS zc~gjA(3&D%ERTmq)bf9C)Rs#SYACHG{Uo{%ChRy-OVNI-OaO0 zbhRIh3mEMrPU5`E1QorNg6nlPd*5LnY2|5nTF;O76-VJXIk9=xK8B*NJ2@>?ZHAGl zr}hH;lO^OEAQhiD3q2(nEUcB8-ihy7?mi$nuUQHe#mz^7EBz~ZGs;=*wpCZ5DeSdA zp1EqO)tNd^@8=A)J&f{K7wf%?--FK|+)fRrnQ;VFR5}l@>lL(K70zAOTOEOR?&x40 zlbs}6bX(R8g|S;39leWRolm?M`lpud5#K#0<8Yif;A}E4@J};l^U2-vFW)6EvD^H# z^ZvN!$v^s%JLVUi?KjT8qwl%r-X%9=iQA{%y*IO2q**Ki$Fi-q@4x3RvbMDg$+Z4^ zdY_fb@(#ELeqT&&7T|9eVpzDR>1j~~X~I6$=&kC-KkfO?ivOef|3QiK5o%%e-_~e$ zc2-`?&-z*jY}4%JZTu~ec%vWZk?H;~Nnhx(9{HUodPErWxGNj6cuJsX=#Lj%9pe+V zf%&;}}WA3R5vSBM0eI$|7VB+C=|uEIH0pU639pUFA< z&=~5xA9)0(Pz+%Ui5oG#c(xc1%tCD6o2(It>^9h?2yWp`JP?&U2z;>~hzyWFgoI}@ z2dR3%rVhskbw1R)^PW~zPQD~PRsd5wY?c?;oWLj}y;tg8ksqAKpLZ++M5Yi-K^yT) zk*!o$xZ?tqZ6_N|BQ&YXgf(b^-E9VSxWWl|TnEPd;r`b%?SRGvOJT-)Y5^1vW5^|QbhB1bIuAHZ&)w~Q_Eg!r;w%)8xxOSUBf;3*_QTr5Z>=k_iFN|x+_ zEw?0s;8DxXlL)`~1VCftgB~G(dCNb5y_FgRxcieO6XYDB9r;z#NhM0JNFv1VD!l7? zWjTZ4XOl=JZVWZXZuvpLk%S|VhTMkah7QVGfl8GbfM<%ffq9~N8_Ufx;W-_~0)^pm zopuvI!*Mz0_CWg%%Hx4b>v@Ac&^f2})1AxJu|WTRK=ge%tbk_kfWZ=nA(BK~MdU<` zD&GLh9lT-8Ube$ zLHB7Ef{VcKRvHIDKv@KD29420{J#kyNM9*LTQ|(9)EE3*vKL+*1hof(gEz`8NA{IC z{|g}RV}ADQ^u+Jcng6|=fMhv7@qAR`$++m9NokPqLRCQcoJv6b+#lFWDIWM*J_n2^ z(+lU8ml{}bE z5zh1iZ+wk6vdInd?Fl^xl!;rQiZKU@^bb|s7tVqI8CNu$oV*XK5GYMP0$6C`g2`(^u1CKQ@@H9M z-rxwUA_MBkLA?6x7wg1MfrBH6h9Hf&jVO!|ZnMGdTs{?VZ&|8Pt@(}nD2imjSUw&*7}wC8Lv9a{t0ppm^`ECT2`ppie&r-HOW>AAtU z`TwO$p9NO_U6=*N2SdmMrLp*7(Qtup*thLlTk5Qvkb;7+km7~Vh=8mF-Bp05=BR4_ z07@>em97D5Kl$GW#1RBv{tMTE%N5*#{E2awwq+Kn`4A{ZQJOEo=Z}~6eV_{PYvN(> zQXVORq-g+wG#@$i_^1+^qJ*hSjEdN(1lm6)I4Ys~kBN@%;M z+K-n+T9}RI8f8OEsc4qb~Tv1{gVOUU0G z5gX~qm2#ww-xeFgEx7A%M%sd@oRh;XNgB5Ns2kKq4tgXl%QEs}sbRlzR-bZK! zT3n$cA7cKlBx`*W&4m=)B%8}N%tqMu&kAYZHqN1c(l&wd!~B0vbGG|?8Ct;j{o`+o zqEr&3lwhh|N#(%De_;i0tzxc0eGuk030eomkcbsfz*j3G2Qkx`U`9}M&$UKhhB-s= zMD?4)ipc}R_elD)hON*qg@pf%8cgxE zvC#$3p6Un=JWji10JxN*?g4#@q$j(9By>DS7CZyZpe?)CYNWy-yG|V zKAOI6g#;c8(4*YDuvf8#80hPiw^%52M~V}q*?1z}i`!eEt8xajq%qh+wRdy9I3FvC ztobNUa=5iPqP_Hk7sedKVqyYjurIf$o{E4+4g3!j%temVq`2)ayhk78rij55s9PZ% zEfjI-eJt+6+aE|BA`8mDD56+hm=_!lMLGb+xchYoaqs+;Ar8{M9=thxu^jR_jHPXk zUHW-tkX6!k(lK1oXmG_v#Xm`X9M%10mm&C>@yVzCzwTrI0Z##BgBXz0(V%v*(H>I7 zY;-Q=rLlnd22b-G6(u>{LOW`B8E>48!^;@Tn|i>`2B83EmBr zvWwTg<;-`=(#Mzli0Dz$;m&Yl4XZ4VU!{h+jxGh(s)?QHm*#A#;F9 zU@oQ9a|P!}qwXO@-wLEv%EF>AiyEC_U_-&Ha&|Fb6J+ zxOB53CqHev+uW{^oqc(jGtC+kzyOSVVQ-H&%E8Ohs;@s;)SFPWikN|L{{_m?=n)wKU`K zbg9Kr(oYU7I(I`KU0Eu$=O2Wg*1~hqd6K6@xOusBX~2iQRk=IP z&o)(D)ai|g;4E~;HKk#gxl)OdM_mymZ9CJ|I7gcc%@=kc4svxvxMMlS@Woo8zL$l& z8S)~}q(t-7YN>EX53h0S(;d4*TH1amEuSgZfcsn%#);KXFgqh_dZQ9;3!x8*XbLUkqaEl)vB^hN%6sV(U8cB2I?A2Y zC#WCn^GPHyhI!vnu4l~rq0G=f!dPwmfv6wzpp0r!?j0W1;wPP17IggKjp{I~&2Zc- zpkQPd#bY4Qwy@V}n-Ok`ZuDXK-CjM%9#_CHnDW7sYcvf%x|XM~Ql{WY+;I56a|%0c7Gt z_RAzIqY?59X7hk9i;>AV3hNj?h9oN(wQpxM=o8roK8gvZ5MT5 zvrxn`{QR|aZTtMBhzqf?FG!;cYCBBh>3-*4FB|B>g*Jp{sQGI0vO^P?+N9p4Ca5ou z%Df&&BUYT6(2dk7RR|K!@IxqOIb9pPYhRme+Xxho7jWmE2hB>`a>%H$*Amo0XCOwm z8(KSvX%&baj-v^~y&goj?rpmiu6V6QaIUO_%*K@l7?|glLy7}Jy_gOPnCwS>-mFHO zLM1CDQ}r4b$2r(H-u@c;Za4Uw;#c|S^55&3({`<;CNN(SPUT$Nd_@fW`EA+THP8dL z!cX)j&AApFTMf7qFm1I0a1|$sCRI%FuSQKCVPlb}3y{p#3NYWf!1bw*S~xy$0q2Qc zyqV`c=z9Dboyb%>k(+8{U$N?|0f1*^h(vP%>r&+s1WiFV{I~t=YYm3tTrqp}hR}{} z|HA{|*N2vfjqq&sC0A2PQW!uN6@fT7J4d4o0+0EzIMsv9dAovLeZM_49~3zj>x!M%=ID$X~ZL zh}oFXaG=k)jJU~-ykDOV>gRiW514vrI_zzJk0ZoiMIqb=8y|d-pQ7;`=^!10J_2g~ z+{D7i-yCxn0)mZ$SA}lY>$dj0{pCOvZ$N zl8jGaB?C^-=Z+p5$FD!3`8pN zF(HHX!i$X-$TDDM;L44R1|FRrO4xviksC>xBQz|9;TkQ~hS zlM)adB`>Y!W9fjf#RXN>=X}P!(wopiN{u{|@9n=f*CR4yYNqds7Xm(B`z0aE^VwZ@ zC^GEQx^u}9`Ur!`#s+h!tW*=iKw`f%Sv_?`%V0uHvbKFEiWe$ZbUL4XZ=PGmPYXs8-JDg%75t#mk&eu895!qck0vR_8`xKxO<3>l8r zI&6S?@VN44#9L~j^6PYHz39DiqP_`JR!R|lPbF1tOIPooA)eNgs0yNw91QJI)n*aIm*4GT&MppMr zLB}RK));JkQi~>ibkF`c&#bsNnx+bdOJD%Mz?LULRFA7R(1~zSA}cH{auN)!{kJ#A z?sC_Uhqo71>0iD%e%-8ED!o*(iV|8pKG(n|yoH|Qii^Msh8=y;H ziHbzhK^xAs&D>dr{h_5fpGFsyj7Z^I&}1|ebpH>Ou$b&fl!UtSI0kiHqNz*yIHl6T zYn+Y}UWD~TF62bYz+O6<*6=P4^VlcK&dd!l*YEuwQ++8w1E z0sUiQUcR|SR9AJ~DwIwE5i0DCI4>;?N>d7SzgARe*_q0q{(P;O*rRe-_Ka-IffTux z9?i02LBGay*?&!yZns{N&f-u)nF4*Py6Z!mBZ{%Cx}tC9j}jPJO1gMC!-G_`brAf@ z&FJYT*%7M80Sg90rY>hMi^+U-Srrlyfb7u;Yb3LDtnj?uNH$%!B6_nBu~4<}?Ht>s z#3{S5XZd6UTwrM9&_&))PORUy-197V>Yulr-b~X}(<94lO8HxvKGsb;U88~1Of?vO z)0p%$ecM)Y@}A^$&z7^ZnVVJ@g|C;Lo|j(_1;uPKCo4M*Gv1A>b zY{Ph05q@|~3VO{y+;ZWld?(vZ& z<(Clr9>TvmOr{|&7egCm(D~ZC-NIfCKVSQ>Fkx>pov(d(IN5iduYE*>us5GNU;C&i zv^S~$0P$@L6WNQtT~8Q+10M+kAr@2HAQ%I~a6+!csfvd?AQ{ta8Yb6tSb-^Q9ZuFp zm;o_PSnTgkSvRqS=t_OX}QVHRM2VY}ETtc88WS`}bzid&tc#H!Pk zE7S%hUR|t=SJx?1)Kf~H`l)iKdRpj-2z5vlls6Am@7IyaGA@6gfYOoL@xFJCXAn$oVjGejhoXLeAf? z7R82~qmZ))IbV&OZ$Qo|$T=4|dy(^8SJo3TH1XgIGX` zW4n|w$axZS&PUF(k#hxdz7IJsMb3{Q=Vy`guaWcX$oWm={4R3-2swX+oO{)OsCJCY zC_QJ}!y%k6M$U1_`FiA>ft+s@-;hVn)yR1Xa()Ckzkr043seOz2{iu zJPJ8aLC({Va~X23Le5K&^FzqF89Dz7Ilqpa+mQ1yQX~?+@InPJVk09qw$oVzo{3qo61>DBmFoRtR zh3p#Sd?Rw6ik#;n=O2mhEFp zDad&Sa-N5r7a`{r$ayVtei}J%)^iS96~g&ac`b5&2{|7^&hI1V z9=MrZLVqz}2FpawWypB}a$b&{pFz&Mk@F$s`~hCEX32RYDvsN{QwW+g^*UReM2U=R{Nx=tt#8WmC z*vtX#=(O3v=GfKvA)enf{;~1Tjc?$onV&p)oEm}n_bktK5o$c-IouY|gAKS5$ z!9I4@#WwjxiKRP1JID+zCfC)W;sBeoEzlOYyHT9if;cT^uvj`{>)PAvs2c27yJ*8= z!Xm%ur=RY}3KKLUdw)Cfth1SVZKecor&k=id%x0oSu=|Pv3ij5ag3aXdrii;K zUJt2_c!ftwpixzs&Aeg525nBL`F3qi68u}llyqw)O|h{FUgc(PgtUoAYdkWj1@i!=lpXu(DfiU9ySuaab8GNGvhY z<^r3mC7~su(p!$_4S@taLmbv_0lQUnjOc}UEETw1u6j{WZ$a0z*0ps90`+!tuicdB zJ(thwDr?g_t=$ZEF$$X3%Cex_I>6%mXQPNK?WE?kT4c+3)YDltAh< z2dX2b&yA^MrOSnW+^5vJTq`xRJ~vjup?W*8R*BUU%Rh8(Vyr~ zNU&PKD#jHZx!z%64qKmtc3I8Lsx3_W{8SY5_qKYgg;{MlY2BCs9AvP zBPi+@fq;G|2n1JO3I(PhAD0R5ib$aYL(hfoX%-iBV1dbu<3LN9m`WK9ZCd(~;gHa+ zm1*%1=+`JBNQ(^#38yI-5>9h4B;H`+taY>!)=`-SkNH$KU-R5< z3?F>XjPsIMhfb+^C3e|rYY?*^z-7)*EmF`2kSG@66==Z_YHw?CD$E%YK@>vn_ZoXc zLkJtN%i8F=o;p?U=)2!@Ogt%~zr8CYggOJ>giZ<}i|lP(B2_?DAuAhLZJ4>rl9FPr zF0jglwGx-r8v+;WLny?wFN8XUn|2BY!4L|GA#TFk6l5}pya`-*G+oIsqBwF=9AS() zZ1oyR!A(qj0=7Vb)GbSwSwtxH^z^Cq4VGnMAW$8Yl;rcZ_4K%0Rux=YFnPUJ6S&M? zj7RJ#U3cIm<`t(MoQ~isGnQ5vZJpWG=(I7XLklYO{GM*1+eKuEvZ5?iU*4t#i_kTh-~EoXBhqy zk+^4>h{UosA7$ah#DrLFhuOUnSEz2E%gS6KQPXow+p}VufxX>zRN5Dg=(*aavZtjd zu)C+;WyX082}m#UvDq4W+Pdok-3{nWA(fvb|B^u$jLCp7=nqPaN|(Z1p-~yIxtNU~ z>*($Li1M6vQZIsa2D?P=>(okGg0V?^=Ju`*tuC{m)+M&OU7cbv4n`-2r?z#L@oBT+ z&Q}DTh&r+Jb!+X4z3;*fvGdjT#$rSFd%yE+0LREPxSU<6O<1h{O-o^}AOT4ami&~g`@J!jOX&>Y>Z-Air36X{#-~pg}mc z+s51uEg(O_9Q#qvG1@IB)}O$8C+ZWaT*YJjVk%!eUfXFme6*)mq+qx3lhBCwy2KtD zvm2D**#Jr4MHDo9p&t60JKp;t?bY{)_%??mfPgTe6w!L%SXbAv1Fd`93UiwSp@-eY z>|w?n(2_u1>#Mo1quA|+B$v?oNQyeddq6M|XzvJ^_^$`5oiBvVki8E3~vKjk3*(*~n&JY0_0x~?Y8pL%=ghL$Zv2k?e9cthq3vfzUU z1oH(Z4B+Pegu(k0hITCyhW05;@G~0m8I@AvtC%CN2UTm=Y2rGys%Un+XX3&tU%Y2h zRq0*v^n29to~)vpif=2U+8xR(;ith|gm&O5Mmsn;2kqFhxoF3B_|T4Zh`(=}k(%d$ z>i1RjPpkPW|v-$E;V4C5dLlnFWc9th9Rn;?Ek zKs*zJ?OI_3jE8}c4|l>{Pz^tVW_Sk2W*r=XKf@My4fer7_%CT61~wRp@p%apV2q0I zIxU5t!LvA;P4Fi;4lly%@CF=$Po#Y~*dYOK#4)@Tis-Ku-47e!7ofo7&<=lrt*{gJ z!yn*p^fPJgnFw&eD15iL6sEy!sDSyf44#7LK!qpZUFd+9;CJvQybY(&j%D74GZRZs zn&2wjv=qZ*QX3AJK?$tIkykTwb0@=;yqu{Xm|c)}vj=-CY{aKKP8bbW!%g&2uK0D4 zd*K0`*XQy19QY|5h4*0_t`2SR4tz?_*l}}|0zZK1;`?2&9CG0_-H(MRjL0z%52-K& zZiiC12Nq)XdJujNFMt_tf%Whnd;l-QE_e$L!)LMw(Kxf$(ckj99A-cjEQ0%B1#E<0 zf(7#6NjL@{!Yi;F4!|Fw6Fx5PT80tE6OJZKBupkuBb-uPe7E0{OE{a*M>wCbk#IHP2ExsRuMzGiJR{IL zoG_6vgRr2)S3bw;C7efCL)bvLoNzT^6JayqM#2|LDk|@`ZXtYy@HN8Sg!>2&5FREx zT2@|BWbGh4NqCCz3&O6ls-j|Rk3gG^Fp_W(;V{Ak!f}LYMwrs)z!Wo2K!m{$} zN}tU~ScRP}vegkjP1r*Ci9kCiOe9PuOe37)tEj!(o=Z55a3mSa2#PuWmQRqBZDxDFwZaUPbVxSEFqjn=qH>{SWmcw za5>>aglh<&Aly(@?5lA+OL$D6(?l3gSRf|Jxtwqf;ReD@gxd-C67H|Y*l`{rY$rTU zc#`lG;TMEws>>_NTtH|cbP+}p#u5%D99n&MvEMbEa13EG;bg)*!kL6+)zt~3TzTt7Q*d_qg?^Q7Q!~dw+Y(`j}x9GJWcpDVUIw!g^(j&>mEQj zh;RsD9N`GUF@#BkX@psX1%xvROAyDneS}qnb%YIsO9@vHt`gs%ajzv@Pxv(9bA+1- zw-N5BuJu>D1B5MvZG>+Vwi6yFJW+$b;r@iMlkjW8Zh>4Ov=MT`Xu^Sn9>QUS@r0wr zjs@3)=%_zqu6UFLB}K_qyoz6GR8}b) zlr2g?Iiz$bohqo|sz*&wQ`B75t1eQXQ;(=8)vrw!(*V;j(^yl6X_~3bG~cw$wAS>j z=@runi_0>|5^qVeWLaife3p94O3Ql7Cd+GgL?%aOM;1m_L^eb|6xkfPIdW&@fym>Lrz3l!c+}vi5mCue z*-?d26;TaQ4@EUcZI0R*bs*|^)aj_6XdXQ{dPH<`bar%MbVYPS^h43j(VL@pMjwbi z9(_8xCx*uiju{b?9FrYW7*i3`5c5z>bIj(LoiPVuj>nvi=^4NW3?49IK=OcW`m6X1 z_b0TTM6WgIFBtU02K`8gZZecxaAV~GD+|#tsn_e5exiTgldP}*il_DZu*v%Rhs`wT z6$blN27RMJH}JV~h(Y%o^d^HI(D(nEkM-pvO?rK7jX|FuqGL85TW2WWPr8CDaiqpD zu}PCRXv|F*`2%pbF-*HVE>_ARQpQV}crNSl^-?aAa+Q?p^}QJXww`5DgI-U`(A%eM z(CZTnvoLXyL9f^AX|V=aXdpcn-vX0y@Un4#RfzjF@l)dlSOyQlT4;u6VKck}JD~**z!5kOC*ic#+dHzQ zoFV00DeI(MA*GDxJD!trhm;4TJSyeKQg&%_ru3$n(wk;VFP=G8$_y!|OF2)<1}Wt% z&TN)adi%`n`ca>?LqAHh+DVVbcjfaN7p<1vQcB;PC4F<2^vzj4nk3MsXk?w>+xGRi@ zF^~jl_&!yDyZRE`xzjUV*}7L|67RE8$_Vw!*1fWI?{O(lNqI(-MRM$m2I%`RTddah zeh)1!BVEP!?P!oAR$L>coaf>dWMjy6#ji*ikg`q6qsH+l86c%c?-wOAsNPV^^->!4 z3?*%Pz0{;^<90>tr6RldDSIva@X# z%J^A$M3akL=kn!6Bczm>@7^RS)6TB3K3U2%Dc2jvre2O`{XSy+FJ|dJS>JtfpLL(i zQ}@ZWyg_Ey2ASO(o|bYmSXFT)zV9{!-_l2e%xsPOq?GgBD95rLDMq()jnUDpdA}BBe`8xz4X1 zE~VVHt(M$Zkb(GqgiH*W{-Ysnybjt5#sj~a7&j5Ghnu~kiOR}*O6w~(`LCx_}NS;DGTI>b zs~aSvr=pF0dP?r8o(kwQ|5I%SJ@jo<=%+&7R`FuvX}$hTxZeJmNP})HA8OEt8T2^) z+u(CU^*hq%Z3f+9&`tW7d|ol=u^~Fv|GdYbZ`A8Ar0K_J^I^UI>tTBPU)SmNEh|Fo zQGap0LEoX*w|eyTZ>`YB(^j9J@75ZFUZvMxGVGUL+HcVJ>Gf^HC?5Ozvn?e=7v*UN zeS+cn6oWoP&+p}p2EARczgnVye*0#<{+rPT{S||L)S#cy_jgByUjOYN$`SqgCjIlj z-C@uTx^FCy-Ba{u^4opUjKc(UT?`Y z=*#qaD;RV``){Ne^!WyTkwIT>&{yj9{R0eonL+OkSx<0$-wb_&7P?`6-}D*uijXxz z=#BdMe{+d`{@y&H&mV0A_2q5D4CO-&dV)cpV9?hZ^doxzcxyN!$C6_97?iQ~^clu% zJb|rY_=aZ=H%HLtVUb{QW%3r+WWJXN_-?#oVV|;3(XNx>>cPHXUx32CVqbyEQ+X?+22}~iKK&QT7XAs!7}WnpvV|W31+{j>7Jd|QA98hQ zdqduVeGOpg`y^ZV2Pk)={vpYh5X%$N@~_hJq_q4B?AU^eEs`kkZZYc5jCCF+Cs$nb)7K^`K0gWt<#lccGgUy8dnUCEK%ULC>gca-_ zRs%m~E7%Hnm^HD-VHNwFeGaSH*X(O}l>L+a6V?dVfX5VzVu7`aOL4(EB|?dSCS`y! z03KH^Q7(Zel*^UN;it+~%2lvlNlkQ&y_Bc$6m>A4z$d7e@id;MdiZ2MS^WVoKn#Nz{ud3<%H~cqh2LCPptvZSC7xPX7W9Jk2;0_p8sCG znYZ#*HH*K&-%zLWH~E`tHh+u1rRMO1{Ggi4|G@vC-ooGE@2GkFkNl5nKK~Q{lUl&v zNNfr{ulK&{we=doz6RXr+Pd8f`6gT;9v4D)jRmt{2O&9I&Tr>FQClD z*co;Pvsf?d#q42L%wVE8GgF*dD9)_PKxH7-g8@8_$H7H> zI3EtNJf6qH#e4)G0RwpgPk>AKwftHb#INJm!KM6qemxB4H}D(aGCrP<2M@oI-v~e8 zH}RV=TZ#O8IpyD>d=j4oSMXVU77XLXycn+JrMwj4_#8e5uHruKgWw18^;WkUt1x_)5MKuHz5!hhQv! zgg*k;^GErkkjNk7kHHOm9bX6I_~ZO>7|(yoe~S6~N&Y0<$bZIv2Fd&>{uJEAf6jjn zDf}7!45ad3@L#|L{ycvkCh}kMUqTxH75^2a^DTS}WbmzgD@@|s_%@i#U*WGnCg0At z!xX-Q?|_^6>-=@dqWm|N@?SRQ&b^R>xw8dw`992^xA6U#ZS!~=X4`yz0JCiYKZIHK zR{l0-*=hVRX4%{L5nMB-^LAV_Zs$jF&6vTD^W$&_KgCbMO#T`F3}*4q`R7o`|IYso zck-|JSKyVuFCyNC#!#+(0Da5MX0cg#yOfmzGrekh6%@;VTmBnVTe2+~^Y8`lyWoBQ zZtojN-nU%rwph94y$e2e!N>khKE}-R#Lv3e5M_*-VVVwubU$C3N8f%}0xMuOJONL` z3$PV-z;4(NZ^Kde06u{)aAyz9!d&(`Q0>+B8v8<$wf03M=iBckxxjuO${M`-AMVZr zPKsjN|EH>Yre?Z(hMr+rmLMV`k|c{HQG$qoNK$bnNl@gXqQZg!P((xpMAAia&LSX^ zB%_Fmh>4s81QbL}h)8&+e%mb5SKqtd>;Jyb`~2z8R5j<@UEOu6>YNJQy?qdOkM>2} zBl-;Dtmw0ddq$t*xCbBCA945S^N4#y2O!RhzJR!A^hJ)d__&u4caOe|xJPsl;;iUk z#66?0aNLuR8;ZDl^i{+?qQekpMTaBq8GV{0(|#QGhz{g1D>{V3o_|3jBa!pRqoaJy z{N2~gXkRn0`I;HyYi6vkk#W98#`_wX=xbz>uaU{VMyB{0nd)m~y04KLzD8#H8kyy5 zWVWx7xxPl``5JlM*T{TdBX9T`8NvB~JURgynFfu_`HLD^=xbz=uaP%>jV$&x@|LfW zrM^a%`E_NvubCCTX5RKSv(nehDql0JeT}T~HL}jv$a-HR8+?tt<7?zyUnB4N8rkG) z1*VWuaU#PM!xbja>UojQC}mQ zp^;CaksYWh2mhi*zV?8u`}O$alU*PWc-7-q*+vzD9oZHS)8sk<-3L ze(^PO#@EQNzD9oYHFD0^$a!BQ7yP<%FWoyP3*_yFswq|VQYvvd8mL}WFR7Q+AT^jr!3WhL zby$6+j;N#RnEG0MqmHW+>RWYEeWy;T@6`|LNA;8XIaZm|HD7+)$`_a*Cy-DLRYQ`h zv1&|8byS_mQk~U<6jqO^$0(w@tL_w4St^Tr0=-l(a@5o6X>wH`^&G|2ST&Y%s}<@a zN{d||yPk@3>iNmG3RzcLg{`ZsBG%PbQR^D3n02km8Q;;*M3?3!{P!J*UNnr)$$lPF zsIECjTzc;MB{Mgrx-uav&ANh*E^ZYkC*{2C>!w+GtaK}{m0{(x@>>P0f_&`d>q;t0 zxvZp>+X`7>OIr~uYT1@!xmL`ITe&RHGV@&)=dCZ_q&Y`gH(E8xwQ5eKnCxlC86x7A9uO08CF)LOMptydfVsEm>N3^jvOpQUDT>hsh*PW=tV)|v0{ zBTn7q!=zq@(*7{jKgb7kcF@ZWN&`^4nTlsJrK_h7t{hdbBYux6?(l{DV6KEn$qRDIlrqVQ;PBUmG&7#?C59ZQ5 zdY$Ie8?=BHvUPZq7Smg_gqG4WTF$oOZCXjIXf>^&wX}{c#|CQ)WwQ|mM9bL$Iho3-8AVePbbS-Y)0)?RC$wck2meQ6!E4q1n-udE~1QR|rX zwe^j4+&W=>Yn`;dvrbvxTR&JoT0dDoTc@pGtTWcH)>-Q}>zsAox?o)l#X?D4OV`$Q zbX|RmuBUI+_4RGKfo`Z9>DzT4U_C^?qKE2N^)NkLkI*CaDE)UmTEC{p=&^d7 z9z`c1u9zonPxrFxlOu2<-{^-8@;uhwhy zTD?xM*BkUZ`dz(Izo$3p_w@(*L%mslq_^mg^;Z3f{#1XaKi6OAZF;-jp?B(Cdbi%A z_v(FmzdoS9)Cct;eOP~`kLaWNnEqOSqmSzo`dfWcf2U9B@AVJ*jQ&-h)xYU;`n(lxdyO;fh{iOYr{j}ZN?qm10pRu2{``ORg{q5)N z0nSs-(@t-vkJHzA#(CE1=RD{1cLq2wI0Ky*otK=Kok7lEXNdEPGt?R5jCIC2#TFuI~$yL zoOhj#&Iiti&PUD`=VNE9^NI7R^O^Ix^M$j`+2MTY9CE&LjyhjE$DQw-@0}l=pPZka z)6Or>8Ru8$tn-_5&bi=RbjcO2bd_tlAvf$Eau2&-xkuci?lJdk_Z#=Pd&2$JJ?Vbu zo^ro;e{g?ve{z3zPrJXkXWU=iv+i&1IrqGK!Mzxxn25=kidnHxEF9CZNGuw&V@}MC z#bWVTu9z1~#FDYxv9ws8Sb8jPEF+dLmOoY?Rxox&tWfOASmD@Ju_CdnV?|@v#EQkn z#>U0Q$0o!k#wNuk$EL)l#-_!l$7aN4#%AFi4b1Ihj=is(DA|Hy=6teh!2bOUbvyg_ z?J85qv(vYxC*O5P@KBTL>P@1T%9+#XJ0d#SyYw0+7x#p%h=EcwZQ$j1gf?El~X z^?%318Xx*+{pNqtXC`$=eZTIkpJESWJm)R!GaJF1HD%8<1b(t5`$*#*vwdTyKj{^p z{wMzM_ulX|_Jfx_V5)cdzvc~dydC?+(d@rdy<#oyeVD#ScF)7~JDPCM<5$1m@f-I# z?%^KC1oSy3a*yLI*V?$wRx>E?FR+&pf&o7c^7^SSxm0&YR~ z3b&AZrCZp&$}Qqv?G|;faf`Xvy2afRZplCNw$J~uzl}%ZcHH?>UU%N_{cb#0+>0lE z=Y8LEHaYLZ2XFpEFT9=o@J?s9v)9>w$sfnl<9Xqezx{8$^7-HU<#&@E1#u-@;Uhey)0jpgK4OIMGm8pSUrDD zj*@KcF~*;uT$QAWDov%)43(}*(o9uam8F%cysArU;qgC*$NvJOw{6xG>tp&Rln5n6 zM(A|tw8$4a6FMXEht7u1iUMI)mnay1F+5mY5q>2+Toeh9438GqhR1}*i&Ei<;W?sg zcwTt1s2*Mt-YDvZH-)!~JHy+;+eDl2&hT#0HoP~yU$p1G!Fkase3APG{d8qrT`bc# z>YK#sNQp>Eu`W_NQdVq;l#i4b??oy{DvM2#s*$SVeY1WhK8V~LxmkP|sU4{;Hb?43 z>WPmcw?%FfTOy4jjl{>17LgWWYvk_8-Qtr-W+YR58fhEpAU=yc7b$z3 zF8&pn#aXc=W*RL2FC3V;AM=egg>S7{thsABr=`g}VP^iMVuZC?jr)l;c(bV{Zw|g| z;Zj51;&eA}x$s`NM{RgZ;?HU3rp~;j;d=vl@TGyg`2J`|yi(JB^0^COPlliMI zg{Tmoj=7TCYZEHW?KP9G!m4f&wA8CbSJ9P<;(3{CL=TZg#YAt>o7?n@;zcSUUKWF> zq!=oOQfY3%W9T|=!EaF+u~w|3vSOonkIIY9Vl!35d{ZT~yvpcPH^8h@8_YVj%b5#% z$Da#(nKgBVEHBG*kFm0>%ss}svM%=+8^{LSV{9gyagXtCc{hc)-_wc0+~fHh_ZSD- zFHyuEWDh0Bo?*`<&z^11qolpf-cITEPJ0h!*!%56+?RgZ=|fks_vlYWjc1?|>=`Cg zDPzZ}EZgyIRGzK(Ub>zw_@b6dHKN4%OK86p+H`$%Bp;<$&r#xcr0TfMBDe(#ThYdvA|Y6#24|uNd-` zM84AO`St?N(IR^Z6+j*fBacPwqxRQS%sy^^$N4fjErXnvM@}mvr&ZYhP3Q5!Ode(2 zgk0Z(T;Ixm=`=NDuXG;oDM;>}ckx$W8*%&Wh#UcoAB7w94GEexe6xpy#+`myhS zi$=1q-$~<9_RCQA%h`i}Nh?qT-bM{ri5jp9HDEPrz#7zmwd~oyqjl`vzo+%=;m^|s z_UB>R!hM_!+Q$8wt7sqhWlGW!?v*%nl6w~k`ksA!ejWj^CFBv(qeNx|Nwv)^12r|z zZBcXQDfbrI!slEeUcT-$T$#>$Bkg0V|Qc7gV1HaKQX)gltg}7N zp2k;RYj5J(_N!y+FH14b&c2_ka3|&bt4qF0{(r}^@|k&HhX{5^zz!+cAq#dW1UnRl z9n!Ev5!j(9?2rvRmZ`~|EcqNJ!Gs)^d7foR5e*IslH-9#4mUHXZEVu%>QSDhqghSSD7B zcf*1I;!O5_CT=aNB5ot9 zA#N+GBW@>ZAihuBh`7C|iMWHf32{emc_!{8Y9YQ~)J6QDsE4>KdsGuYBJMyuCY0C2 z){;;L;-#T{CXW9WaZDV_9dS(@o)(EAUKNQWULDDW_?kU8 z*h$>L;r*hvdGT}pl+{WRfq5+4GiAEfb5w~+VPBi9lY$%N+ z=Qj_BOG9SncELE~r|&7;M%lKt3b`keOg$o~YlZGSzFYf=W+^`cyt%8AON2G^$gqA^>@ zOwRZHoQpZEtud^jxrkefd5GJH*AcfB^AWcbZy;_j79j2*79ze+EJEB-yotDzSd94o z$Q35$JQgxB=kiL#t0ILFuZ~=W_ z%0+mpTftTkTDr((J|kk@bZU$@j*3$S_Qn28lt@Qi%ZoaaVXkdb5&3fVI-8ngY%klB zQK%~j%?g?D*g}!W!($o&OC(`mj8Ds;7sZ|8ZgG!jDee-T?Uw9)GPCD}We?d$4wNI+ z7&Tihil2+0k6(yiWcxbao?uV3C)tzjDfU!*8r#}+_Ii7R{f_;vz0rQp-W2~OF5@b0 z#Y6FMT*o8!ul8B{H~XA@-o9X8blB`UuR6n=;m!zWq%+F-yEEE(%~|3sb(T5HofXd8 z&Pr#Mv)b9^>~Z!v2b_b>Vdsc*%=yMS;hc0%IX}4Cjkr{*AuXiiEH@H>Ys%|y6x?97&(XHv;E}4cZ@Dlk z^Dy_CPz%*UN`~JEFQDAvrQxNNC;UtJ7w)(5ScUSMu?qKZ_Ss)i2{U5h-poMu-PL%^ z{ub5Y(e_Si#3SzAbUS=^WBBen;Jcf^cQ=LaZU*1o9KO2+eD|I3-FLxv-wog0(z(E1 z`ySV0Pu(7#x;H%aGx0p^sh{I~uAt06Q=`)A?umPEQ-P%+rwf_HOE&lK8V|Lx)6cVT@gqosI!eNwW z9Oan{<>{e36DZFl$}>00GY#dL2j!WL^303!%s_eOLwV-sa?48v{JVET@szudbAQus zv8K(Vo)YS*Ks}YHr^-Lq4dqHXH-md6Iac6OONZq^D?m*Sp(bn8WcDH4gUa(~=SMv= z_LkWz&9R2DhLq-yEAXa^w~nM|-c=P|JM`u*%T=sW4$1BVnx1?M^!8ryB{lG-#K3^e z^b{YaDFGQUa(Ln8sdX z%I!7xnp2v0uXiuy@!EQADIKXaGH-hPlT^%^HqLZ#&dms~Go>L{%5S)YPVw(F?##VIm>qO??8%th$<~tvTQy7Hs#1InI*GSnw-YcIs99YLxf^~B8PGu$&pD}U!G*? z)BOAC@{w`}|L*5~99t0AYQpjgvS7;5<~rQRQ~6b4RqXQd;)EQ=g#EFSDuQ>1 zE}hYfe__tUF?rJL=bW)Za=D9>Ot#8xW_`Xp|yb%(At7UsJ@O6zK$(a-TyE zlEVUbIe_KESl(SeDW6HXWy~m>K`JO}@>rpXKPtP+XW6yB5!ZUtI!buWxQ>_f?)2`Y(q5+5nyy0|Den#P z22lmn_w1I;eP;4P(NL7nY-d`$*WT;I?d1Wl3x&O|-b36{9`PO}*L&Q1oN{?RyeG)> zp7NgJR@d9>OX=RTUO&p`_4oQy0dIgekP3Q(y-{?fH^!UB_c4R*hIy`E@*Ulo(?S#e z70tB-t`)+yqPUiWYsGP`T)0*mu9XMZ%D}bq<5~s0m%W#%P|g*n;)-*rqW3yqw`z_R z<7*&nhVXWTEfMz1eyfh{g8AR|9H!@3yuoC##p~)d05e~#D2Xo;q*Gz=J2+R(vDYML zuPOGpK6!P$da!ima83pl_0KU@^wL?E_nZ3rDQAi)xRZ+18Gq>hNqUCs$sit8j;0Ab z9+*Q5XbF!9*3%~1!efG6bbyZVsNe@WLl=a_<4aGZi-J7DEH283N<7BAS=1Aac$C?a z`_~=+ah%B(Q*sTuhQn*TVjLFpuI2DrkFB>?oGqzjds>pik{*w?yi%s-c%|8DO7A*Y z*fJiExxBI-kGZ^Z9*?=a@*a=5yb2zVxx9)VkGZ@`tYKq|dCcWi_IS+Y-QZQ>u!>ie z!>VkfrB}_X&S7Pc1&*QP1cdN%^H?O|OV>j_pAhNd2Rjkv8yZZXYJKbxDf<60`Y z-DGl@$sR(opJ>HlD{e#5Yt3y)a!VS-p=qxo6is=9AOmgj)ttGEf2~F4M`>fk;I-jC z!Sg1JRYKj%A`#D|=Xj)7j7u?-t;P^BORN`r#A)gAh`+9EBYUx}ohLWS1M;j&Q)N_r z)m}ZVMyUnr19ezkurjO)RwL_v>ltf|wbM482zG(mzGwk>Ziskyeqc$k51~$UBjJ zku%X`v~;vyv|aSc=!oe2=%(nw=s7#xE@wBeJ8} zFLkT)jcd7uxX0Pd?dlG2C%G%!&)t);NUTV#M(nOwx7bUuX|dI@9kCy{=UFU%^QAEu z>z=h_!dNAQ6)zD}msx*B4MBLBTmy0q$Ta~{45S!Hu>iRiHfs_l7 z@*w3w$_GdVkP09b0;D2HMUaXCQVFCINTmR|9^`tE>jR`RNM(@90dfP#4InoJNEMJO zAXNgSDo9n3ssU0Bq#8)I0I3dA9i)1I)BvdgQX@ca1i2C9#sH}aQWKg zax=)yAU6j{Es$CuwF0CzNNteX0a6E~4oIBsxNTUF04AK~+aey=fX$I0P zK$?Rz2WcK4cY@pra%X^Kf@FeZ2FSf2_k!FTAgw@JfwT&c)*!7xS_ep5khUOg1Ed{D zJCJq((jKHeNc#Zk0MY@ZLx6My=?Ky>KsteR0_hYW4}d%X@<4!e2I&maIY1r+c@X5m z0O?5#0C@`JDUhcEq&G-!klq2(2c!>3 zp8)9#(ify}fIJKGEXcC~(hsB`NWTE-57Hl`e}D`C82~aMKwbcO0px`M83-~EWMF^{ z0vQA{C_n~-3E)MW@xg1cLA3l`kngFA$advJGmcXz$G zYl6$g-Cc5Vy*MvV)y&jqF%8Te$4!e#*=r1%xGMiDdk5u$h=GN;cb%p-d7?NDr} zCL|+WpFD{7UE-IZO$0~0;~$}};YJ9_>|sVe$HS6+`D{WyA{rk8)qr4vK2rF}19M*? zo(tN9bp#MU2z8A!qDy8EJAx5UMJD&zglLe~6fLOW0S&@ttnf*IEk#J_^%cSRS@a(+06#19!-e2yt)|zvW3&IbV>O;b zWB<`=JXFdW=O(4B3PNe_JB`oC$Pi_vm#3=0SSWs)kJg!aosY>ae&Yz1H1>rt_C7Vr zjuVK!s}H*py#IbUOfvMk+`mlfh4*fZ>hx*3pa0Xn-BI^ieV8`XsZp2x;dnPMBYE_S zJ_be7Y(2?x`s$4Fn~!3FM^o^RnX;y^*l9Z6@x41|Wvyha6$tAPi0y&$r9RyO&-{?U zd(>T;x>%Z8l#Nb;4L^}CJ{_x!LZXq2(%jx`at zcSd6?H&sk){7_;xN9rL_Go!gZ&way*eupKw_Yl3x<_1yiVVkLKiC+>XnTBE@M!>y|5v3o=4Cqx_dk`cw?wS6Ie>ff~OHwxO z@V5R+WvN%1lnCw4kWP7r*Cy7fM@dWbh~YDXkB=tO$^gG?S7|7&zjLasBT z8r1n$I-;AIYO7IQoI?R*iZZFyiZB8gMs^kR$fh3=L5ij42DDSvB&~ISYbzcN{5^G{ zXBCq09B3j(7qLD`qu2^`p}C>@Ypzl}K0x%g#XLWSLw7CHH{Wyjrka{o%t9`4|FuaD zOD;hul#|xZrO+a>DdoE#m;Z2p_fQ6whkg@H(a9X$EL&f)H|FMv7-{jyTnL?Z(TPIO z{JdhdkNaJ_H#Rs~61k6yc;f3oZ&M3H`_M;j@9UYNC}}%o{YRv34;#YkC&y<*r)Q5; zRiddLu3ZHaKf=3G$BmZu3DC9%SCl~c36rrB&dr*HSZ}G6gXW>-T|5MBNp!bmCFo+< zo#NlbAwgx=;3;c=+r^c$2P}Q(fwKhn$?UpdSQMJ8BzMqVQ8}KyWSX|g8-7fQ5ob@) zmYL++!JQ+b;Zod`;ZBCS+Nk+U<1P5NCvqF=0hs{qzl^dUNV6sO1iA(c)&5#qtd_94 zY5$rFPhAq{Y|^|UB)Uv%Z)2|yp0ze7`Q4~cVD<@Du9t9iy_Fnfta6pA$WkLDpg#2K zaIhK&%Ke*!=Td~YC zE4_0T%99YrqBqpr8ifki?UtksBp985-KK`N;2|eJ_Gn&Gk({`~$+b<2nz+gLq$5^l z0bQfMQiKcPWf&u;;8wEgsCX}Biilnl<!?Nr>(^!VK?LzJ;&*b6<&=9WWPk*;<_u-1QM zf<|?7<0J9FJNU~>Po=WranYYgl=CO@vV1xu5q4AV$5s)xyH|dTJ?k3^!z_Z5Wtv<9 zT}z+(PXkz|Vm+J)^B9TAww*k9FNDv(z08h(}JeP6fHp)DtPJcqe|o~HM4^FPo%Ypw(MiS z^)Rz!T&h_neobIOT5TRLTpa(AP(f;DF~>2+F&QxIX*8%cs5EF6zb#TPQZ3T___it2 zm9?Di&Qecz<+~d+5Ee0kcoTyW@=U%S7xOH0Gv~p0pQ^48k2xsEj2f$Ih9JL~U*n{IzJm2F>ZGLn*l<82c4&8QKB(LBF&Lr@}bH`P2Z z$SBC@m-^glrO~l}egMT_{*i?T7B2aAL)|%d(gLXadly;ia>hp650Z!3bmBdVinmcjLKsA64@%Y%4+(uu_~m>!ZGy} zHB)s7b#*2+xerW?hV8sM*lIyTzi74MzA^}0&Ra2A83nHFsGtK&a;k3>mwupYLOZl4 z)LP4R7rLzqI+QkQNq$DnTium=$^(_BDty2mcY#k;p32?D1{&`_-__n#!WOtbFiPS+ zWwDhP%~fK&ieWBYcBNS-I2|Q+CAud+p6Gp=?W$^L&YZnAMBL_coXs~R+!i*RSp+oV z=JxJj1eBF#lkRLiQ{R$$^jVe|J^u{c*s(??kc6%bWtli5lX z@KqO+*|FuRR9EKNnzcj!ImQ!B7#vBC5>OZ>!B>>CRdTBX5 z0PfI`e_Ah46BHUK+t1;-$miq}x&%(<<9BqPE8aNU`v^=daUEAQNpBS#O^bN~4V0?x zm|wAgrV}2aiz0@LCr5XDO(K3JO4A0ao5^Kd3KB_j_8Ii0^xgfdY5|Ai*w^UoDm95! z2SwMc?UFSqapNwI-QJ>h@q`D|?b>z;x(7en^+{5;#*MGxAB*^zl}3`Tllhta63zFu zJrr{_i-`^@m93fxiiRp3thzGusVily8ahNKlp*EqR=JtAIOW)2^kx;HO6R<)Rja3t zK+y+62CRKo|0GVVl2ofxM1t6YtAjWAnU6x%{IFb z>uWB}-MK(-{N$RAalzbBgUt8e{uU68n=iVx5RlxeHV5n80-vC|rCAo`ZiAl4yVZoM z)Ba6(Mr~HMW_+d|#o`#H9z;+bDbD1ejA2{F+r!_2q28GPe($4R5l<@O3&+NQoS_~9 zvJx}9BX2y6*XA3kW;gi(!$ydd5fcES7B-tB%gTr!UYAz**0wnGwVQ`Nr}KcwBQBfa zX!OxBpi7fE88Z5O0PUEd$Cx$(e<0>q;jP7-G=4CAjp-rnOD~j^H_o!3cFpkE{1Wz3 z_L9cW_<_|OqrB!@Fhu?d{QT(_<<9&L*QHgbl0;*C-mzKR zR#|u9N3;IfPo>HnFmbbCch#oqMHT9N7})nt%WxdYs|lHYz568a6LmJ|4oyI9tGMjnKhMw}si}pVTkme+ zttwpb^X=;sV)tM9xvX0mAKAP`p?~a8)q-VHglw@63L^sM)H7=A3-4tA>I_k8 zzSN*TQ3ifhBAhhv^IZKJ=^ZwJ3^4h$`cq3A*cwG`W$zY7jVvnMT&+i-EMI(?jE zz0YU`r*_)@*bTe|nczW;=pS@Ux95H)!k%HFvLBH_MaoIH(S z_XuGeG;%iT)j-(=V~AHldD2Q%?Rb9O`<{z9TuJA=qi^lewlBixbY3Eq#~1zV2(n-w zGO(9Q-P4N2eBkv`=i+BioVaT8fLVj(J7Wyg-_J~#NiCbXmX{w_wOApZYk+6+Dq_+k z;Iof4E^_h?Uqh-tI(%$wJ(nbCIEK@9HaF}{^ZecQkXU0nrITW3YG@c{mi1JM{W{oW zY#{I}^iqpq0;w@|@#*GOqKRr5ze`jJR%%vm_N4B*d8Q*((uG>(L?^PBA8OwM-I5#> zgID1CjcZGbL4XyhTl;CF!i9>vW)9Tw+_DDL|+_PiI@keo}>28d+sU z$qj+GZo&oM1UzJuGF}SD=!Z8d(e@P@HjT*_FAu>a@Hj3gHNJK+JOQ_}?37-;3ub7dq z^K*labRNly4?CzsM4(J%`n$!x8e%5gM7EVkRDOvnr3L0~)6~9lCUL~kB&G6ltT^nc zwAys6IGyV^!7wR`saWv<;)vNZ>10!>LV)WxdONAY#1CgHRoL+1?4=4L<6RU+R!cbJ z+iX`vadW-&6+M*Lxo09*9jkVtLptgu1gCJ+Dcn=fim6Un)Jq$xQzrJfi*9Tr>ApCT zq)|F)JyDs;M5d{ms>{Z6nkr2G1CMl6qavF?FNO_OH;*>NbVWAr5_lj(7DZh5rCI&& zv_x~osGfiGB+g{qAp~iSbJLU4Fa}DCMyGP38piC&APzOTshAJdMOl#Tj{Y91VVQFu zO18G3pL&7W7#gXgw}n1`&dRW|E9}kE1vz?-_f82Z2*HhtG|aKnVqrOY!m`CHUi?A* zT&5I{ZNl>!2oPaCS|#HeajE%I38$WO5=0=PQ0DiOrhM<~S1|ltj)=%!egr$W1qp{+ zC>$_wak4I7b9xQm$ivjj9MS2bI%l2Fng%k-8s%l4;dFsz?^On~#z(cfsr{wRZSnJY zPWLy+WXGRwC;N52lGV)&P4tKq(dHpp8(4k!Cli?_f{4iM3^NAKA7Vy5u}2m=jenpjO<4I$*nIQE6VctM z`2a&}#rXC|@xyqXpC56|RX@d=r`w&n*}^gfD4$v7{@uHFxF}%y**C0i35-7##iD)W1hiwYP zd6f)fV%*)^-)8%5H#llRg`@U&1y=2i!?|D8`dG%G@*{MdyI!uDf>4aT(t zMntb<>Zaeiqbs$)DR45ymacBw=>KjJEV9TW_$0t}C+Ut^(ayDzrFhnru_szY zCGOJRWCR~?oKKW0ZfL(K-vU4&tRH z2=c7cV?M(FtueeA*T{?Uqsq9qRk4P0RC@zXzN@g*!)oq^!U%-C4- zH_8ikHOaiCql(W(c4727^9t=aE~=(2#aM@ql`^I)a+DRR6X{ye+{OUp@*$O4`%U%> zWn-s5METT*`H$(DnMxB!^XuN0J;fS_);CI4wmqy{(+h`uPRXm8#)sU^>}4Q2u*qa* zR;{@X6CMx7auL-n(OR{F2QgKkdMDhKyy6K{BWt z!+CxE}cBstmt9%2Y+mJANlsNYRQ+C2{XQX6ky^>Q_RLucy%(!!gPu_r&e#%u zR8js`<(n`w%<#mdHSF7R4!M1{*?_-2DyQUiL|e=nHy9Uma%Jn-lv37g(iTbe!I^pF z=oLVvd5eR6=>(3=OvRy|2?pW{+>VSTrGGq_&>lm7`}&18+7rG@{(fN)rc@9)JT*}b zHy({4lAE=pZ}Z6F`ODq|Jz^V9)W=RpE_DeSau_-ulX1jw=Dw~@fBj}azso7XR&{Kj zaD-#}fs<{k&1R@^q>HNuStTHG+ZC7hReoR7VO7rtHndbuv}kB9P!$=d)+T)r0_=Zh z`>1v+{A-aF4OxH(LyxJzCj;+tl+BmV8=P^lmsWVQjO}74jn3$Sa!w#%K7)#-fEH?L zgJ(zX4b3rNh@Nw0gmU z(1!7viQesIR0-awuee^>X%9tX@hb9an~ySH&4eIEcdJG*Z;Wgh(`-=KqE2Q^t1aD$P}wcQ0jiQDX9xZ1{{TU!^tWHd0Y zrKxx*|J~p`zP5pdzfx~82&0l-U886oI4?c1^HRV-*~sZEK-p++iAUKuWs&_P>LZf0 zpa~}I5Fu8UvIqrw&rN`_4zb+fEiRY0@n zAMbZAM;etNOJX2E+#0Lg2FOJhR**;nC&z^do~+i%ANfrIAjHeQJ%N%KtO`;4^mXt&L@WI`eYl<%HGgxMu#|9m^*z*{Up zGEN=8or(tg7Ok!C0Q#|>6^b`!^&kh;(8^kZI{TJ3x-V@N>EJ#sI(V&$wtoZGOhy`+ zDMV+QSqo}2@0&tHPAj7)H7{X+oPn1PV+VtnyU{^(R$Zgm{-3HuN2CWU3K9wqp3!PU zQYCEclJvTT&joP9(xzXE0gz*t$b>PRs)K6yYM~Bf>!8q6r@)}UB9-{rsFSKd=eIbk zcS4Vca>oRZ1XFs@+99#*-Ke?3otSU97xJ?an=h151iw%S70JUYSMtG)U-HCfr80mo zx=@tk3xMAju{UTdVeB#IbyDr`1=!7W<@VM~xn^tWLMi1n;Ek;E-v`g|r>$SQP&D|V z=w(K2iS$TmD2wBbdo*Q#86YrVcCUXh&j+@ytWs=wAJ+U~zz@qg z$a9*%o_gH;u)o(+JhqP?5QG<}Nu4*IBgS_vEszk`N0&m=i_;gaw|1FSKf8wB`S*d8 z5AtZ!8muP=YzHS_C9Cd@&!7EtoV3_IeN1TMwd^XQC@#K%rjYDRJCsd)Wubd{h%+q; zc3Ibtu=VZwKf9dp_tI$xXD#Qq?B(&SdT78;!8@qC>Xz!67(d~q#fkGjRU*oiQ=9xD zO@>FR{1m6R<;<#&mjn4Oa7^G7rTrIg<0-=75_3z!X%m`hlx!iHnUK6M zbSxA+rtyQa6p|s9=QN2v%rmU%z1a&glKsAwc~Rm2IFn7i6T;Ivovr&V@hY(!>$+Ps zx6pa&hK+8VVUR~0aguyYMW(heO|GPS%)o{IWC6gKd%|>;s$yy%(;8ao73XzQ9@;vy zCf?k|w24x=cXoaMQ|to!0^91m`>ub(`9D(YLaG}mCNYy%cr2CQ_An>CG^&+1oe zv~0PmTi`X;zEpMNxPjnSGMg5tESN;Bi7kL%fXE`&d-=^q1}8|7Va(LRxR^KW?OpcF z_F{^483}B#OK?lORe74CA#9PZ;VokQ+r!%xV}GRlp%md`Q-BG91>wo>eNd3f$Pl9qB`|Mb`54-M`f|t!Xw#%5E7IQPU z+tqEfO`?+{ z$~~GQer=T8^{GbQQ4e99Ykye~W?rQ~4W{qH6Vdh-|E(G1&UN&C!qJ|dDX!hAu;@M9 z^{sLflhOjEjTbSUnK-=$(fY5V6)OMw2H_4OP(y=Fwe?nE^p=OQ%dD@Z)y2}y zi86d|_f{_N6Z8wze1vIBGVgQM>%#ot*0z0q&6}$qpUVyEV_13TJL)^>eE%*q7!h@j zu&b3j5|3|TH(Z~oZfaPepv;5DnQDadB*xLaH9Y{czd8KBUP}_c5Iv+dkhbQbyuxc~mEpj`-0jyMPik|@-cJr?voZq4j84_LGP0N}?<9uYVU2CN06x@(hUld4^c*(W&kni? zMnZqk0az3R`b3bUavt>RA|19;>R({dM<19OL4!u)QT6Rn$L>In29??fd~4O)t|ePe zxk-urI!fMa%tyrMn2Q6*AG}xX7T4SFl5xYwh(yY%jvooVfxkFfwQUcxf2FMsCLAX7 zN{oWDd8O#WDLyL~$DbE?sn7!=#2J;tsR-%^e!nbBlMKfJlRq*fgiLU-=QM0PH5PSE z0PIZJ!}_;>TfOmWNGx4(39>xcH^ffNxR?mfrrc1SZ7akNmUG?}YDW{2nj-U_B0O`1 z&kf?A$~&0x$z|V7k{l@V3`duha2~#O)pR zB)97*{`2o4Iqp5C?5ln%bN)DXgCTtTy=YPvo;}#?E2K$xcypq-j1}Le>~KpDTqYW0 zolUXviu6>-j3aO90d&&~*4#C@&g4K7k?{CRdTNjb59!=>jI9g{-rO~lPNK1nphy(v z+%=t!C@WK55f#zgHLT9Au}+V;oT&V`&~&1E0hPe$TeiViOJnRHtl`1??bJ&i%3hf? zYS?rT;oOnAMTFRwJmR?}sYf9n;xqD_-iVYz&nU}e*l0}-Fgx=r+&N)+?ec5H4`>A> z95{&6VjHcNc)@nTwvoZc^)DVO80(7p5!;A@PR4Z!NN~D=OrVmpzbLZp zpsM$cRZiUk;J%FvOVYdAME^|vmiRZF?lsxzPKq1QgT=79j z*+|Jo&MR7IX#sZytX6_rhoZ|(HbYT-v9kN$FA&ZH<zh1vJC;pZ+}}pfWrmgF!QlPNO1KtC%EKEHh)HC> z8>mX8-1k?BhQj!(~y;C$8U9p{)>KEDXu)`O;WFW93L_E zv-(Y-kZxYH3kRZM{`ME<%GoFL7xv_xyql>Vrh|8Lt7gu(o9@8NOUv>{*?Dh)d2gS2 z?>Xh03GoRoLYEnp@~Biv{U%0m;ej$PFuKD+VP3+zfLRCLWyaHhsg{}Zp1@0?ynP8| zEy>h9X<%-3;BwcYVqF`y6twF8+U4t<;+Eq^tbcX~fm7ABPv^?!EmT>@PuV>$$ASo# zwBgbZ`uwEpA!(i?FV8`dICJKcUK2(OkBMaCApf*$VP6T28`f^@%TPr#Jka~(nKvc`cn0p$?-Dw z8clhMlNjZ>C*NGDJ*(k)t*Bme&^?ReNtsCT>{kU?k-=I;69jFrUNjt#sD#mCuIl5h z>cc$CG~TLoCr66TH7eIL!}+iv`mn(Iups-e1m(bE*B_DLb}BF!wOX3GuMyM*@tUzb zuXG&p+z>4UDa?l{aD*yw1SoI>DKz;j=)npce-b!G5IFuSaEv5y3@gxv;`v+P_SdJ| zUuZS}ZW{oH4S>-GKx6~pw*h>%0Z`ZgFl+#9YXb-!ql8B>W%!%o_7oEXip#oPYu#)A zG%lDgm^&u2_9hCnmrTDBO@on+2@_&PbC zfjHA((P^-nMRS0L*LDc10n9X*Vj2vX-Y2qfq_c3ev~bk6aP+WnG_`P4uyACua3r>H z6tHkK{@qB`wt}>718rx3tZ@EG;rz40Ih?{dio!XR!a1VCIjq9@M?17a4@o9JS|&f9 zH|b{u5+nr@I0X`v^SDnCDbb{1ktBP6YNY^br4VYRAnL8p3O&%7-ZUH@KR6zzruW<9 zSA61EUgK9NMr;sGTE5Qe7&(9sSOb*TL}GVn7VsDU%IA!0tM~~dkFf@3iKwHoT9EEq%d_3yzs><5F;* znB^ZWhd)|Izlz;WC>%TtIm$-7Feq$2E_}kpv)pXq$}6?4-aHNS9;=QjS@4V6y8Onv{c zG4B2&;QF5Ly+gY=OF$gwa32~66d%rl!ikIQ~itN9qSPwv0uiy-Vq?R;+oM! z(AdRDTCH}5scOHeExxmmu)*L>!0@!7@RqH&Go9nQ%s z-13lj65yI+I!{GzMnpwGMM6bH{i1}RgtSE#8L?YfOv^MDd0kjo{;e_`Tv%Gp1de+{pOMwAU2TxGi(yr_45GeED6=#A)&Y8~L))xYX_ z`035qgT57UF^Ia$UAyOb#22QVaZ_|d$untr#9@AalyigS*l(Ai=Rp#ORSuYys2M5ws;sS6 zDzB`dR@7G*S}r0X&ZHdGAWp0t78&VTNMafVR|tiT86!?PhOQuvNza@aVWuY9gaIrs zK4y4Qp<@;Pnm-GO#}hXlKw}Lai%bA~OTjD}w4i4himX=?k;lTUC`@OhR;@{=DE&#r zDhA3Q1C*xAs1Go*MpZ`g4G^m-tW%vAf_JBOr_{vOna)cy0X6Az%>&-7c~((ggWYOU z>on&@9lKChpB6Bq1_6~UBv_~_(DiDJC82-}Eyg_BVrq%7otHdC<2$CWS3NDR8nmm=T8$I{pz2fR zVo#~?)hm6FM|WYbKD(f?MB-KyxodaH=L}Tss)HE~aBEd@uR`5}IwH9Sv*r~(ZZt~I z0lmvg0s~&V0$@5pG}50eAEe^q&D}a~d7lAvhv28kqFvfM%;5pn4viOD-*3;MeQHR2 zLH)8XSbog)3EgR-wX})sBSz1$Yb323zGtj&j3K8>lMXB~z%hIhdx4^puTk&umqU@< zUVe*+D@R~ZX$#gl#Rn*pCEG#Rpby z-wu2xmR)@FG3`uSLv^#JEQV~)He&6#TFcgkiH}j-hNQXOiTV+0S+66X|A5dz zv>_4@7lV08$ZzFKHcJLVsVF!*c0OuzzRbnu&yEYzp1@)ty` z{TE??usVcIC>HwDp80nog#JHZY++XMnh-5ir_J&wM0EXIU~FMm37U|P)$=ujT3~JA zR}nwnt>&~rez(Z8|2=Fs&MoisTRyZfd?3na;?KNT&UnYarxWsvgopiOVTs{+p`0;} zO{NKj%ltPHJTZ@*r}6W-f;K;UG8{8buZdiI>c+gC%J+lnCUizQ7RYxJISQDB*Z<_n zax6M6ksl?J<}Zlwgm$bl9VU_%zznYs^@MOtKfRD27bpnV0q4wgyqA9>yc8q|)dA&9 zc}zV$k*_P<8fXW-fzkneD>I#w-zl8;k&onwsjkHU_uHCvd^KWXq-V%=uoUSS;wFto-5djY!jg>+&97+ECU9*f~XH#+gZ7&Pb1L>~qX>`%l zRM*U@RNc@oTYc_^1VRcSi4en0di&|;3`jD>>B8~?chm7AZIgYow#(c)*5?d(>01<+ zbCYW>)YD%wwi6C1hZx&uKrk;@FUmIeHZeDoHX&UXUJ!So%k#VQn{$ZoF|s@AJQ2ek z$t}t3u;4Yx+YAwR93aMOC4jCv{0*(kpUF&ag}x;eXr8rfeTv)?bO!L!@+USMUZ!&$ zdg~Qn$3VV7?~_9GlW&W@qzR5ge@nh}=%Yc%`}RVz13|nXe}UTR4u+s_Dm{qxLG^uH z)~~Ac!6TAB1y~DFyyR6BNNe)!ups7+ zrdTyMtZLZ;x5{4|=g+mRE*P7gQdha;&N59m>`b);fCd`&RdEYTV@*%m+8TZgYXrZL zo6hUtwB6pMmRg$31Q`}(>o692lr_Xo=&PC+6wfeBUl(OHy~MQj7ixYs(agV}cejIQ zkha0ZC7JBCdUTqj=KGD;JncGfGBpW)sn!QKjyc{UHEDhcg!@saORM&Dxf10mzdqgd zoQ1ie>@wy$BdjL53pGM!@)NU)?sQdj$ACO#D!w|*MVzEUEm0u>M;**&fB1i;X2MU1 z9ZDS<-Gr|S4KT6`vBqUgWmHWFWmtoRvaBbLCR`4`9D3fc9Kzj5I2Aqif)ozr4$W>% zZ?JFFZ#ZvUoTAzn*3I4)y!*Z9yvJYW`R#JQD`nbefE@ThB*`?l1d;4!>V-i1~$zYj%!ln!CXHUW_}v(w@EtH7Ur+11gFGwLF>bPWt8- zf*)=0J8iHKMjt8iju6VuxafnD=mRGC%P87gBifr9!n+58A8+tGMKBBMPVd*9so+b) zkV|v1hgz|RNVK;sv^PBjKf&PI>K#8DQuIl7%L8|6pZAdxzZooJl%IXMeP4eutsy+u zzPUAqy@eLt*S?_SG?GBJADhwbW6PE}HZvdyGttnQsaBpp=fpC7_owqIGf5V1jTOc@ zUW*kKoHB+v1q%MjCs6|g%Jf)PHbH3`#Au06E+-8>^aRTRE zd_0y?NKWZ*bZ_`>ylw(LX1Yw?#;a92F%g-+?qHx(sEAi^!c$1LN%%CM3lBuW>?nI- zMSR=AEJYJN>wC98?+s)_lzWh9upmK!0rcqpJ zuU!~p!QfluDvWV9c()6uBX)gq?pfGXWtP!65eDKp>_61IL3hf1EPqUYY~$H_$!Y)N z-N(p^n;E5bEJ!|)KK#<%zN`ljw0!&PJ@!)6KJUG%XU5E>na(hwe#lE2&;2QSxTtqnp=Bah_3j#^9>r6ex^pQ+LMR5O;IP zIwrTn#V;&7rt*vu1XPb;?hUv*^rq)8UWLmP3d`fD4nPu^y-4l=Up@_#!F4;$3G3Qwnj`v{%%ckNiU9*ZIi0O@$ljspm+D(eiA z5i_DdL{E8s=EJD13U%#5aYn4Dfr~*vy*NMJ8(A)fpBjVY!V_b?JR>f{fNZ@i|9^+` zh=pAxK+Hp<7DF!X$-oz@gZO1K11!P?F!@#DGNS=bZMf#3^{z|3gbnuEfZ-tFuEJHS zyRHrHJC$p=+koN~*Hz6!=Q7nBxog#e#I8@hly|-cUc~FJ7&;ofc*O^DA|h_U$U&S4 zn>32;DuSC@5PNtK(Lu$VS}|g3(DEvT7T2&6TY9(pK}W4zU3#~x9BV7`VgRxm*hb4- zu^@fT^w_e_{4RGMe=(ACZGWx*=+-XxCU&0so_sM1IUsoqSZ9EYzFv#r$HSXc@YAA> z#_bzcg$wO_8! z$xmPl+ZWSVtPvV|WS6@k2XX??t#Cx@ZO(_RCOal`KNQ+&43fll2^f|C+lQE!^r0~8Ar{BXCAzV2i@Pd;8re3mRSvM$Jm-LvG z^i)wf-?9WpA@&NJm!3QnxpY~LG6(sbn&ue0$VMtQkt{l=@}Ag>IF!!Sukb3?lqLbG zS|AMWokZ*$wUam}#tfp4bTdadRviPL`f08*T6Cg&uT5oL*Nt~GxGZ-=H2qgNO3fE9 z@ow&tDzTgzk25RHiN%`&be1;A&2h0b{Gpe6eNhNg8l9;>k@lRC_KwFKctidx$$i|i zMnomwV^#3VRDQc3S2g?C9otfZ_Ep4s)=e4wOct#E$RSjDlP)|8S_#0uz~uM^P6~+} z9ob_b{8DUrZOQiO&4(g>lE6 zcF`nfQPxL+Q(sZ)U9>Iz@{6-sN34*}a8>FlTe)$OOHY-Ripu=C!xxwTGsv;Sc>k^< z+(E<_x={UC{7g;rsJnV>^7iCorv-;p)i~7>WF;PsO{I&j{Cg4G=-kv%xY>?jJ$_}Z zI?LN?qNg5yZ;8Lk9~+(-_9>snAZ5Ajf^IKcWyOERQ%YA7s4lrDg}HFx*kaARE57b0>)PE{s zC;zYNGS5Q$T=1#`Yvrm}wr5^KQ{vOPYzocyStZZdSbiy~TvaI|K1EugMKH_DPIqeu zc64zXZA=>D0X8I&oz)qsx__+{&i02O1ynjhJrsfYSnt?={|K%L{ z3qAiF7uk7)&QkTa7up@EdG@wD1i(s@@Z3RW&9s{S%7-df(W83s7X%^VzN+jsG>ACR zS_8v3xo)gIX7c|kIgQ4eiM&ry7a|MHf1(f>#@Ul%LB*8p3+Th=-#G18dW(~(Q&tB}o> zm1!7OX{I)i5$)p-7KetMT=-@!Bif8Ms67eMoC>6CEuj2RE=1T|D=A?ch$o7nhVleH}7lvzWD}wdp2}oc3%T~A9-zMXyax0=}xE) z+{)|XHC<(Ws2x{t^=|7qbrg3)^B~|`D>Re0&-FTM|48_t`l9(%d(Cyvb!Rx?tM6z1 zWq9vZuvfSs@m2GI@QLs-tIqIn!~FdwxFI zy-`iBFy*2k54*PcEJpN=4xiJ5gEOqb{UqJTbXKm1Ra!});8b3PFJ3_H#O7qWk&^_A z+_WXMuDD}Xt}E@l;c!*wN@t0(>D67vN26o_vAi-Vm%6xX|O2il$hqs#P9 zAHk?3^d;Sp`Zgh8yKMX(R4q1SRfi)}dGl?g~aDKkjk*2Z)XWym13XwxCK z%W>pr+99MR+vrZuHaktf)0kjj#I3}xFJ%&CXTSxC2ceV1eK-+{AdTbKhi3|!z^ORz zPu@oRM3<;i08n%`;UmsNE9DkkZ+Gq!mdU&rYV{JM8RH(S2df7No1OXGV`y)25NKS} zhP%57*DG*Ne95<0`-{KFh#yS^ZU6a_}`RXu8qur zgg2BavP(D5bC_BUp>}>T{%NwOrN(h~&<*#)WA5X&Us`xs_dxkT!t|ckgs;US<1Jl3 zQRHU-#EykmKM^?#8#Bu&nP=_lJW>)G1M!}?R28NBf$3fYoNm#dI1S2%W{ETHyyzhGH&NHoqLVN9ylu#+`lbzR||ur2ltEgnp9pzG2^J zZ*YYeAPiGD=&6|klDQ7V~h7+`yJn%ofy#-L5+q$kxAV>%j+#x`4cW;6xXmEFL zg1ZL~+QA)yySux)yL;mfja}xNYwfkyx_h59&px+q)!Sb`Ivpep3v`jg*C$(aks@6r4QF%_3q2$a&(~}ENE&|VeqP=B zhFK+M!t?!v5eeWIpcSMoDfziq;=tkCnTl!AxiA6@x@sVws29UANLdJFsBju2}JCBNvV*}HanN`5mekf zJvdl)^P=`cpY*6P|C}JjkJ|%|%o!>i_scYE*M7P} zOdZYrdFps^gj5QB7Q=sjir`T}Y*LW|ht6XCEHGG*&`KcSGAfyFs#!YyjOXc~<;tL4 zz{6ybeM8tPz*Yrsku+21Og_uWS7b_lt?D323SBi&G%Yw>VD!~x*D7mFl&pplymtIa zj0NLzV6EJVoKiZeT@%Aft{!G%)V~SiM2y?wsO5g^{$uA5f;Fj@WW9oZmcRyzKOe@4 zY(0*?WXEsJFj9XB^j7QTv&V|wx1FKy%BY(C&h$HD5u29%3DB)1>iITY{hiSOlJ&wH zKl5!3`@+M&P%ekrXm%1II8!VKoqg^UK>(5Qr=4MTvLTGksBG$%(2dbS0cB#+|tie=x9O;|n<>efSyO zb4!(~f9g}t&ZYy%apUfTmNOx$!?g*2^h#%Ax#qd_BC?#R!2EGS#*HZI(Fmk4t$54- z^|YzJ!#USxuF?F4qEN`2Uf!!C$jfd8-GsfseroW(J>ZJL$F{2zH!^HB-FMOICvNyS z8$lK-XV3m568TsAFnk6U{OruW7krw>#aqk{D?`t->{ZZXjOBA4_q9Lntzhoy*}Vsz zcj)Pc@gqGkrGPkjR>{OdKYJh^Mwo$NLB;62!4c~yu5#)0&)!I}$V>8gF$xLCkVQ;o z>?uI5g|yRfe}ZoU4@;<_O5gVgn(&5gtJ6_?oQA!ox_ajZ)CRdGrfKInxze>tuONsB z7-u$J^<@E)47M(79~6jhu2{vi zF63HS8seKz-eb5Jy=A;Cy|ubLxgoxZb7#`e{S}pvl#n)0XKQ1iV<5lz*liqqh*3vz zBFmqmmgYi3!%0Iy!$BiZpgOsp-JG47egAbRYL?1UYBAM;yMX^7ifJ}}Q|dbB!R;RI z;le|#^}f}(^}5xvb)~hd)ozV~ZC08^am1l2o@aiTEp+&Pm({+dVPx5om~&`EicBT@ zr&1)NTW|-N9A@zS>qHY%ZScw_v%`-AD%C3u`wyWRE4hXl$uw7-zKIs!8=I?SxWodq zU^H*LcWqonJnA1e+ihGG<853o511FexN&J=i)~akV={8d8nMV{*nQmgz+@}6#$|Nw z_an?|>zpLgo)?O_oE^StoubKcC%nl2fVa*_d2eS~V7SELDK{muVZO{CwLNsmejx2x zmkW>~FbBr>`^p^nSID#s5O<1KoL}wOFlx4^8PUz-*ZQ<%w#mcJ%R5D-PN>$+uxn2d zC!tKUOH92>lAUr$%at>4l%1Mm_f(xykdd=+8_2C7mLUKxYD~!{nE(qfL8+Ms-({rs zO9`gjrLT|I0dZw(+YAO$3sv8VTB_DYohn+6KJ2=In?_(EWbyscu7;~obz4L(IB7yJ zFY+{{6Asb2*5-{8Q&~yiq|hk{I!=*!WiA7U%#_lc*|K)hE>I)?!0`22m}b!dwv4m6 zo7z-WlF9-*Ku56z_57Iqxb#3z=8`=%~tMASN&zGAqhO`s$yPvJ#;B~{JcHGuuqg}PTW%9`_IcA|x9w<1tC3IcCA)TE!KfVAR`SGIEs|m5X*PZL7aQS4 zzYT%Q(B!X6NdOk3^E>~cI>!7Jwx`CD$Hv>cD)7&&q0_aJ0!w7_nKpXnAx%1R=rqr_X7IA4<#Hmc^=het-A)Z@`H!mzXyuRen z9yBPN`bN?CEba2-#B(xEO?X(_mwSnn%m^T~lljCJ$8OAWEj+0fNl5m(19_{9BhycS z)~S+Pfyk9G`?(h zZLXl9mB|7Ju#^kyt_7H&j2Nr&0Igw3HrZ*8E%ijB&uS_zuTNhxBkp9mRH7eAzshkQ!iuWY#L;BYBFzFtIfI>&_a>)Z|FtWtc&t!?tn#8VrHQG`VMz30_Y1&BBt z0=^gB#W%&P1>5e7I4T7uxefATvOYsY9G&{rQCwq0(sqAI5nkX&bvcx-StJ29XUdu* zxEE1O573y3w2!&f-E^XJO!d^KUMs*gl|@M|-{DWiLh~G%hX-KaL7C?7HmjXy9>?wp z&Z-v!8Lm0BJ#87e1@iJ8!hjlLCK>dvLe@uyB|^BpMn#Cgr+p9KBgM!? z+^ZMqb*9+k-e%Z1m-xiHtrhgBQqQlzz0DugERg+#c2YxVcOjSZl2ggn60>O6|4^}L zM;^31Ps(NAKSE?D6|}raT4&$S#l4M!!m~o^Y}cPkWTzUm+(mk8*Pn>OvqXw&*Y8Jk z!5qXtNxH}T`nLQXqSyJt(fVB58-eVznJ!Pi;wgM#TlO z*l-$(Ym!Hgu&WTj&(qG?&DqroW`5knyV2in&39eiIXNXB)pZtBNWXM86sTAU(=a*!3HR%Dm4gx36`JwY!YV9KZiJ@r=UKU-MesA-#4HD4+oU9?aS~yqo^_GaVP| z0~{GeD0q&6(ugn|$f!N|MJ{|aM`iDBW+Y0gp5vY)pRt~6Xt@<{t@G*T3m$YYNv1ZI zcYfQQ>;7&I!P1&2`g)i&-thG{iTO(qDFUoaAiHJY_%payHID-idif5lnq=8zf)k)P z^6)Y2dt}&nzSwUuo{!_1nLD)>I0fTmHpjgdATp15VD_xk%8xUvLnj1n&0BND%&y!u zOs#m2m5m#F>(}pf(mPM^u$&NID;^YXt~t4SJpO^SrYk1dDNnUJO!lN~9vb%A(IJ1%C0J9mZWMvfdgw^as1J^z#&Bg-x~HkdGms#5kv=Qi zls}+4O&xRen$=Cc1^<>Go)JS|$$FNr@V0+7wuSB?`-&&n#oV*%!Q^)E?WjDVu+`dQ zYfqv)rUKi;H{3WLVrKd(W_u?S($PKBxSOV;wQ?`4C)!GWb-E8sSbO`@v(AYDPxg>K zWS7&3Qa|Y1nB}Ugw3#eZ5aS3@f~J~`{oKxLTfphj_q>QwN!AF1Plk~5yfqjW!&GjW z+xDS*3u8?Bkx`I#Eq;x)&C@IXarlufnm;;8CW+a+H#_PpA5|=t19d;+ywW#L9@mTK z0i4|GR92wmMof+(r{J&0&o?m1i1zPE=$5tabo3WqQ-H)2tmMT>uW*zcRG&=Q<;f&- znl2`p6XYFxj^FCt!l%30O!+m1W`-1JhGj;I5roLfSo5^#K>`nq+3-ir1h-o8E=}H= zKulIS?2m_|duV#BAKUY5CU{r_Kzu!YOz<3;!y#>iM-|)fZn}Y0V*$6AnsbMQ-i3X? ze|-Ri3!bP6+)@R{P z#czZ>#Vj($l)iNk3>b3StnUIiAXXs6TXwQ4pJ+Jg)l8x29f8KTo>RbR>60BZ=c(;X zg~abCh%Fr5^&KiDKDzr)9siU^GyDy#5#rb~bPP3!sbW=n9Aa3`=9`q?8{2TneZk$< zSp30aQmskT7~s|o zB>WYaB7#oT_FZ1YvXB;IVGQ_Q!%$=lx$?>){*=)NprA-iNg)p1R9%9GR|&A-*Xqh*8>#&C{NfYu zjDaSJQqw$SXU)cOD|h63d5Ol9iBYhyZzZr+j6C^H5XneFMEDrjNL$3Wwvr>`2Fpm> zTmIw2LzPH)?6c2$F%1^*`z`9*%3VReailF%IMd_t=1-rdO9hk4{NLsrI7z)!e&gFo zXIvS3?<7!kFZoz`HHhTgr)c{yBON3h$s46pV8SH{R*TtCxg>r_K}_JtDNKy=jQKuc z&Nm*G&o+`f-CR(uNHY@r`i!ZHIpOiC&}21J7-*ujmNHDuVMCE{j`?UNELPVM5GJH7#U58bp(F!70+7^ z>3lrAk#VG?WYYY*df}1v4~iqbjALVNxsJRt*S3-~qHTyx#FKOfR^N1*Qd(G*2ie!y zlr)?gH=fF!+yj9f98KSY#m?(|PaE9e=bGS{+bljv+3dgz+P8FQFvFNWzbS~Rx10O1 zZW;#TJ%+UISt``wSqyL|RU2@6OLxya>yK&*tZ0{-g`OJP@n z#t>iYrd)W{%;vcU05V{UcT}T6w%n<7%;w}$ui&s60+%V}9wzpq30+GhW*0WQ8iZB( z)=A(JinqLIUgk&SmlvIO>h|n)G?gC;GH}vw_@SuZO7~!#RIl?L34KP>JEZ?B9YP377+~clZ*! ziBKJpjSfni)Qc^sKZ3%YN4ir-4P>-XD+1Bz{7yxSq4{CvhaS-$pdQ|kKtun4`JvD* z)K1zi@uTYp`7p`ByEySU*tOtCUop`*zZY~H&O2G9PfdVA*nP*N!hC|ym%cgjTTokX zZeb|lL83Bmep_XJIK?RMUO01Ec$qspC z=%%FMZDfCF*Tc-_{p42(Uc+Cqa@qDclLw=HZ*R$7HzX^a8EZ9Q9Z-<96i_dG%1#W6 zk`V)jYW4Z*W;yU_<&9SXf<{wk1vnYA`)Y*?j~z}`1#U#NT;B?mIot4D)wK$A=GW|U zd;NNf6u_kaFY1Gy>KWMsydKwj`j1O`Zp*iRVEhPKe{&;qhT()_L;UUC9rl^_naGU*idk(G@5fZW z%cP^~S+Rvp`6S0%2eoom1M6~r0}?&Kb&=0dHc;OXxe>VE-XYlF+Pt-)bz#}GZ#Qqp z`C-={|AXyE8IiNxQQa##jvT=iP^&buEByWBbWu`+hywOIu z6S(X7LGgp?jrV5=o$aIgSy4wD)U()`<{Pstvub(vMQVIY%qRJBS_m_gC1yBhh&X{* zd`4J0RK%Nc@riVJRMB)e4tz@dQaTX$tmS^!q1_``CsPAvs)>{M;RtE3D;a-tyfKr3(!lty z@w1dlCI#I@RIFJKlm^R>xFXRa*m2;SY848NLUabK&qZ&?@!w2C#QsG<9$*b2hej}d ziDEIs%5t{cG4Vppl1XFIxfIj2%v`T^w0 zAABqSr7ggL4qW$bgkk>U{8y5rY*eR7c)kO_)T<6zh#r+^c{Jcpo2SONL^`lUT0C7K z1mKS8c|#33Lc3bH+^^c3FdfSpOOr?!Ky^?00dW7|dGir+By_dlw_jyu9aamYZhtk! zG$A)MA=f4<$l$zFt~7`6nV6v9ct5s03otKB@;wjrHCw{cf#2~}hZ97P&$B$W^<{Tu zbnB3Ds2TL)CYwYlBdj(ltZsxgMEOnSjhLvskurQQW8mWIhdh$bZU|@DQr%cjp)qJ$QTa>OMf%wUu=cp)g@9Rdaw6-)sK+18Q z*4(YT_MB(B2=MrOxf7=vA3<>{vRj25)6GE-pru~^)N;NT7l1~QaW8(Lxr9R5vl~|SHDtaPn>YI*fT=pp8zsFD=ge8YhDht5 z(pIi{da!nU5Nl2)8f7@`lOnbZUQJ_X`<~{2i_(Y2jV`g##f{&$t~T6h%S}wRE}G&W zeU6smt7wZ42mx(4co3SdV{l`ncjwt)1H#AFzk6Wst!t$OUE03!LJy=A@Rc~n^$P4e zf9n-NHQ}3A{y}aFkG|7@zN6|6)+>1Lj9;fc)a4zMp*e0&|9E!oxZnW?eIH_TMC__J z`IW3jfKziY%Hyb)0S9k|_?k|!T^nX2oepXi7Tr0VN(Pa2CQMrwut{LEB=+*$2DdOaPe zK=?j6?+7O_*8}Qd3yKg|Jde=d&lUmjQ>28Dq*L1dkeTqLp4^7s#MLupOtxk*W%vA2 zznT_}Q$_L-QGrz)s!I+2b_IsqR7sg6M~HXl(sJGkGHbZ6Xs zFiGMF+OQ$?A-BZ@gk{+xMfA`U25s3w%XWX|#v|#V;l|3_$hyOo?dy|IGEHw!y!!+Q zs5&$EfHU2y;)dVbQS=G&Rx&Ie27`=oG`8Ly$c%ChB19l+om$T2;DPsTW8rh!6UY%3-V9} zbWs*0M7B5>Om&Jale+xjnAH_O*HG>#0B?dZ);VBfut_H4K231_UW8Qp-iEy<3O-l8 z^};rNAm@T6J-9((IM$$)J%=v+{lefL>8j*;lHL8FD&Y&$s{72|(8pIpC8SCrC0Dog z0=cjGpbDkTJ7{LV8{a|fI(Fscm_R4zb)84cswPga%%I24Y0s{vw((+vrofo16-e70 zq^WqJ8{2Un_Ie(xN#FU% zC(sP|ueIAWmb<81onrJqYWHwrA-PR(KVV|*RgqzYV%WyR?_eV zH|(C97x5_SLE-xpE~+q9PxbHd4@Y#PP0;qX6dqG0m1V^c%H4)jnR>iB?q`(SsY^=7 zR`44TYQt>T-8cT49HL9o}ep5*-*6mhqR3$9?tYxNfFF}%Yg?!&m_NfVX& zGDD1}Ag_>Z7`)Jm9KR{3{8(|f)lH+{@){F~PZ5&Q1(A}6OJIhJVF>Rb(@g%?iZ3X- z-7lSj0WykZ-OBa&bDM6<#0CvLcA-1iLkw}6^mS@h70drehSudmdBHAWO6a8xfU!N` zPHXVtX9$xE&VB-595X!dRHk>8pg!C{y1SkCwI*Y!g%jI&?fb)eQ_G)9nam^JwXw{ zXzLK1B}J&cxsNUpuahWs@WOv8o|%Q7u9#Cb(%7)O9BZhK=34)bYYY4JNA4-9M_%i?)#sc|2{3|~+){CC_n2yaj`WSmV1<@hHy zDH+lNhnv6d=(b5#^tm<5fo>1Z0gW@`CArH2T!Da=r99_{E35R?V+?ot8?~c~dBci6 z15gifr~vl=#4tA*K_SR*N$V^*9Vu22C&*N11)krNcmhVlS**fr4Tme*oR3%{1Z480 zV-KWLS(R(qW+qm07REAIP{Ic)#o`z$zegH}(rX$mkPTt0*Y^Lr`+ zh6#t~;KsFb>MA3Z zz&Pcq;-vozs~BV&|8QrdUL4yqgz+L+TV1AxJJVmM-Fj?i89&F)BXjwBaJzZDT$9J* z&J<(%O+KqF3@BOYj7x;mHo5Aw7C=w^)O)g$8obo0#y;TfEE>dPAh9Wa$`QY&8-3@c zlv=;CaB~q1pQ(+3>iV)#US&dZ6#8`b=IGCcM@~Ib{1#pPANTv#kfO8Kbj*LYyU&od zKm;FKldOfMISdN#Rp!-4P4m7e2D@f_jCa_4{w2px{TzxhmuRupj=b6;A2(9ZQ!KOW zd%E6K&#=gJ{&aTO?a{YF0KpLK1lf`hBNlDf6;kTQ{r7FP|7aJ5dkRMeis56Mt7Yr| z9g2AM{2k>D#|LTEgx-oT!G}@bU@xS^KM!rFpIOIq*b!L?o5Sdaa65qJ{BV0Y07(TXb3O*g_$8KCAcZBjP_46dTI2$7xgA-axh&l0s)!y8V*&=pF0MbFzc(>~?A;`?~dH zdz~3Rh=+T)Lm$OzYdxR$VAmip6pwMYK4HrsB9!O<3Z%psu^N=N6(avvz{9vxAG4(p zA@v`C$7oz1v!M_u^_L+yVq+3FzISlNMN?@kfDA{xfqBq}1tq4HD_p@jrZH=pDCdOQ zQKEoPe1!!ZA@3a4XgE!jb%N{Im;YC-RC#cif@utI)-+zuD7B+_0UJJ%6>C^B(pK&n z_Tl|>cWPyQk(OZQ?c*E%?aQF9`UrwK+bv!zt-kQXdRro{(yd;{W$uzba{<{yTMf{Z zTgvQKHF4hcU$H=?zpp=*z>B3|*qVFgI0|XtWIGBq7Qg87>9x|f&9OhCd4)ab!KYI{ zRi3Y@K715}yJzVK33(x182%#hd&J%D`->K=J?5}HvqT5sKR|&i+fhX0e;*((83YQ1 zlD5=68JIzT8I*Ol-aF+aXJV`+>)a-D-$Yf9O+3Ghi8f%yZ`gUjzIHlY82eHP?d^sK zSg8I9S~3OLR-Z9i-(6GwVcyisutM4dqNmO)Ts6U0ENqTP-e5HMTT1t%rRnqAnZwky zHRCe&wYpRI%$2N5n)p1e)@Hi&pkzYF38G%e}%`aH5X`9wJg&6g8& zq(_3H-e|I)lwlEAiq~k88#fy$x}Rggr0ssNC^e_kcr#fOU)thu{MRgHMOY5yZ~q#8 zy9b`&HJa|HW&9K3PShJs57Khn|1u2W$2+V^ZV5^>QNv4J-(}sJT`uLFVwRsvFR6o-ht_v=oQ+ly; zpT|mZA+p}DR!%uMYo5}%vXs4XITE8xitE?tspYdCzUN~;v69FAA}fSd9Db4}OMLjG z&W%FlZ#kgYwY%8+tEh#(dd>@LSLB$3xIkViT;uZh%@Xlmfb{Vn{Xn_RKy?&}Y- z$DXTAB&cqsZa@;p@Hl3brvB^nu1kH3Pg> zDm=_zl$-G?d=4li$WO`z^A<*Yb0kuplV_8A?V{Mn49l}h3=m>Df2;V?LxvTKN&n3T z19nc{h6tAHk1&NPt@vt8d1tLtBHk#Lr~5wr>2S&ajkP3Npsx|9=@iPoG(h)+VA3fR ze<>i#35lWoMk`T3m}3_+DF2_q5kt~4Q%UfT7bp~dDbRAzGq6oaR>M!rnU>_ZJH>=h zD6;1pFo8?hG4sRT+y)oj4;NLnLEP~Cz`rM;M6H_;15 zKHA(-6}IGFj49if41fZtqD}p!sJtS<>32ojbkX)*)(%1zN{1&c9$?4YwT2~Z9f!uH zUqyRbw$n3i+9mlxvNIy4qD?>Dht(}oYo^q5o^dfw=}pVh_uIqj1AA)9=&l-t2eI5T z8|`w3j#E|<#`w0lO~2csMm0{0ear;0lxab@9RXmRECe8he``qx34 zI`NS^X@{R`;O!*?-MV?t7iFX}1LZsWdutcdq8|hztJjL9>2^;_$6Y#t*cYYIA+#wq zirevgS$W&V4HewY1D9`3Z(QowG8zXVmh4RUX3p~awkv67wN0yA`#Gn>6lDPU!1*~e$NJV7Di4RuN8j8`g)7cyxt~T`? zt_XnstwbG?z^tRPh`on{v~)Csi4=yUs%x!~M!te3#jzaf=Bk^;{EVB!a>cC9=0$6F z0MoKausgLas8vedov)@&(cS&nBYkbJ<>CDI9O{Pto&Z<_V{wcKdL0vW z$V6?EXd8p$x9a>>U&Dx4HY0-~1%4gQfp_zyE=A(wu>q}0+1k6Wz!18AK;59aGgmVZ zv!YYo{E#=s>bT1BH|;G$M)576M&uK#@jK<2Amz87t{R`^FqGW-_WgnPsYn_u@7~WUOf}KSu%B{KXa*c z`Up(E1o3JgK21KVSt}o19zWdP!EpX2A?P{|eghJjFMh6dc_g3g7hRKQV4{59zE|1S zGMqOghP_=0%-kc#MdnKSx+ZV=aOlQ8fp_PLK|w;YIMzH8Ku(p z6QXVGTxjmmKzZxa<5B!0TN39LHlWga?t9y@F@(DPu%VGDnO*Y(rmD8sW^Tym3#kNx z*%&YKvQ&cES)E{uENzDYM&4}|qRLR(2BFyH-;!4OPZ48np(N`lj~|@HrYuf-1rkp0 zU{~FtP~DzB5IdzxGw>rh{V?`@R0vu{Za2AT+f@<)LMC3|% ziC#OLN#2xq}(1Yj#r?%a7}#GGSuX%mL?#z}eoN$XktsP1=#5zYdWWTh$@ zpPt;Aqecm>y}Z&(wmFZ>P z!k5Tk@2#s4uy@YmOLf_Mi%f6!GVBne3@i)fO&*J@$#H!v0dp$>n!?GeN7X6&hJ%LX z!5T9IWa}z86Vav~@QledkgW|n_|egxmf~V`5|I*I?ON5-yg+slzU$c5-#2{Z#n$9) z5a7N@uV#czkG|t1ucHW2#aAg)u6bD^zO5WLfwideEvfXaX{nPiyMXkbZFlSFXFO}= z3*u;WsLViC&NzPw}i?_I=f2G^A7>L6&&;mx1rPbWE`oL zX;+$ZkQvLtq{%WBMU0|3GRKB5+KdXJG&QwiC6@9Qn$s$hmO0Zi*p4+h&5c9`RQDuw zTFFToW&xVUJ8Go>>;g~>WFCD*FILSwR;_^8D+~KIZFL8@T!BGLFUdK1H;3k#SNWl! zF|N>JraP4K7RtCu8f-P_Bws6Ki8+Mf=H{}*?)56o@L>gVKA8o8Vy(47+pKYAZBlUw zrA1Max?xhQaY!jE&l)G1YSK@-0-fJ(K8vszHaYL=C*NwBLzl9@QB|nb1DCL0t0Xzl z$y6)ns-@G+nue6H_Grjds9=1LeRmW0D)Z|&^BGg#>qJ@4T*dT*X&`S+l59!tlxD6% zdcm|QZB3G7Nv?dldG#Q>TQZ+ErBvO( zDH=lsH7Obvv`7h1e-?P#k`=Xiy>lXT3$|3jcd@))e?v`2_adv)sbkN^7P-1Q@M|_Y z9q>g>W3y(w^WlY+_Uf^_P48%Zbshi^CPB4dsV2bz&WGg%`b4<-jWINu!0X~%IFjn( zfaBP8FFmw7rzrTfGsz9KKU`)8;4tw1u2!^?Kd+$G{XK}2e*xQUCcjfb%XA#`N1@Oa zz9nwlJQ%ntR76odI7M4l=sZqW7Ueu{O6rL{`5GT9GW8$#04U<}R&>uas2NhEEzyYfG53fJd`VmiI&vJHrtDD*O4PWdkQG7Cxye+j%WQPWZa|#a_FW zX6;sm%3?hh-f50F$hPu{m1Bu<#QaSf~ef(#GFtk#}u_}?I>GD?eq7o#`4m$n9+gY`4o=}vQbE7u1v zc8qbm>pl^5Bkdvrak~q0L=SbB#Tx7{WWM1$x&*XCFN`;m&N^G87L?Daci7 zJafFB7GHzCv0}YMa*UN|d!Btm7-_Cq%cPoo`D=7?XNNA-cM9d|7g-sd7S-caHsfjT zSgE9xpDStQiZ0kC_;%_fYw~vLbs`RT>eGr+oqp2rb;qpenFr{I9~j5zePLobWKWpETv;V@}a+b-G+ z3KWp^<6wQwBb{MU!!1c0dxV<1L~#rG=|hffcO##HDm!px1!&3+BqFRpiBNkG&jj*? z6Op-2k7lYp!`|NgvYnf3`{EMlHx}bvQ&NdTLrKu>p>%)SF|xBpCu?&fHjy{nhAN6x zTG8MUp4G*dv!-o4B-n?_6d=PoI>i0b zstb4dGQ#wlI@Q|`gkqC7S=Q0zup&Ii;C&ft)2kh5&U=K?b;A7Es%sUrVw{lM#l9ju zwb6Ee?exm0Pi{-62|{H-(w($6#>yne^^O^hm@(uFh{5$pfx=mYtpBWuz1LSRub4K9=LD#u*n~Rf;f&t_V_lv(T zq+rH9NlmdIX08ozr$g-uuBW9he*D`QHYva2!rMmGQPh=KZGEvFwf!_ZGc}eSk%Ztb z=J$OY>{^6rNp|mAF?Z!$KQQ24xnTshSJ5n-4aW#(tXIt+Rxr2GqBRypmA9(Oh*KQt z7QL!?O?mdT5Pv!orRg{E>=mtrTP#Nl5Y^*TaXv4s?#-hesLsV>af@x9xUopP$PH9Z z1_?5A#;SwI|J!5&z(0{2DQF8J#n0ZBc(f0;{zH1#-Z`uX+DlM9+iMoP#)p?gp1sjT zz6P#%ihke+nEF^1@)Eg<+Qq^vEH1FZTd`s_vG(xSQ=n+6li_?njQ{q{tPR8|!D0|X z2+^=@phv4`RAu+kiYrGFR)gj_HKm*Y#%gG$zX;ym8=a)`*pDW%wb^^ES$~W!`rW4f zd7JU09nOc>ZO(6q^{J8kHkhHslLAC9g)?1ACz^km>Eu%;XD30ULRAT4bt)ymi7^uq z`{K2S$@iRb?~Mewnvgns3K{u*6;IHPWdH6jx1c-13By6S_NHmUx%(3l{P>J4CchbM zNd|@kTyKXy;m?QwS=CxWLwCxh6C32K0E-L~B-ZyoQ(#3$p~+0&M6xCJCmz|fpv+0V zWpJ=Y?iI((D9G&+VTLggX8Bzw`n&G=t#_d5hNTPqYQXKscSU*}#XdFP-kf&vei4dQ z?@lWJ$)S&CAsoHF53PExfJ0!O0)Fr<9pw>V6>p``;?78*)rN_NPDGzo}(Ky`{Sld zg^f}4?xAnl2cI`=%1(rFlgOojs7CuR_O^mA&)2rp749>U3Z+?`s}uM%hKU$}+aN724@wh@02zmhrphO*$Jvare}#&BJ@1uK5CE)=O;Sp9WF0x!8`58&=FP>4HmYNghZ9Q9sBxL z7dX|kqZ0KaYnNvgKve;=2i#m<%nK5&PlDlY46D6lBF|6)&(Iahgwm9V98{XG^p1?(X*=xA!GE8tPMD1U| zj9jl{&CCCmO^*_@i#`DvwTt{4wEeog3lc7gHRG@!)%w>2CJoaS3Jv3M-w9^8-p&;p z)aCbEdi=}qw_R@&5Tf;1rG4Pn|9r21{2hjU7tYGfu=vEPMyVOU}i%s6CRJ|=y_no2d0LJNYNR=m1sQtr6{1^`XB@1ej`Um@efyExK zucG!rlKkk0-v9e8>wkgelGuOhXR?h3%(@9Kf}bAqGISl>iz<;-5TD-9hD%M=>BwBZ zB6DYzsgvT|6?K`WswB7KT;igFJ|MW_-|=-uyIE6awS@9iCEB@YplN1%1$be_@ekW% zXT)%4g!~1vt(*RM813zOQ6~3%J(T-4SWQ#dncYex;O*3XU?${FF%jKgUgq!87z7mtebQQ68O4LMHi$ckD5vN=y_!e_@J>mXLjJ{^0Vrkx^#uuk_}L(U*O{5A`iHmvCRn50Pl*i(FY%o(IcMH9 zkGhC|Lz{QzTEh78zd9(g2i8}}lfyuYU$!tOjV_>BU4{GsV*40)Eqr(aJ{Dgj!1v$7 zP7%u6;b_o}Aa5hc!n3|?QBQJ*uqO%%?aHmwPjZJ=PeoMr2@^l?o;R?S&u+3#Qv01y z`}y0`_Q@H#H}-cqX3Hb2$RoT~+nyB{u&Xit}LpCPe&(0!tkP9ppHzPvod3 zkxKdvN@OH8KbWsg38OgUZUIx0EI8^5FX-Kvt8BGXS`d!w3+ocsd?}}Kepq%_OZRPV z%6f20U(%-#^5D%cncPt>8?4PXcslijOj;4pUpIq>&by2u(iS0W0=db`^4nmDWb{QuJ1b^S6cLUW!;OFfyjpzGi#JY~-X;*j~ZI>WmvmeO$ z>>LGv*e9~#;(|bex^XxVla3Bp7?-$qxqC+IeqR$^AtvqzeX1W;RCUmwx(d*m!?+;BePKW8c(KGwmsm z{?LVdCULXUe?y52#nn|p=_f3eIUYq_o$b!GCvE(~7V{KMh_N_CjSGAB1M+5(Bc-vIRmilQh6J)6ktF0FPr*1#paBOA# z0nLzfqLv9{NK(;-z z#T8VdD|~2gwvu%%R;@#-m6+WPg!o6tTO7CtsMD;OuDP*p!bx`TFY#R+6a7D|y>(Pv zF&78e7KZ}G9a<=*SaG*f+}+*XePEd44#gdc+u-iS-QC^Y1|8V>zVGbrKfCAbAA8Qr z%gryjH&2qhXJt)cOt!KMAGnOUY0Hi zVCt(Duke|h?o^TKaadI9vdwKixyJrTxvttnzHsfxQ@i5671nzs_NGq=Ob=(ap~9p! zU;JtvKCH^6KNzphW{Cl&#AV7()=H}Y*g*82`0n03F-U4$vGnV-!gJRb2z?8qjnL^ zc2U7B%=qWQ-kbV_NIP4CO%&h$iKdvVYyYqF&Wsaol%!>qZT)RYZ%bn`zLjg}>)Ft1 zs@cBBY9jGAIEm0Zi%)6ha#TWaVZQU*eLAVvUqMF9d5uAlyNThJrv!f8PEB$at(q)o zVTZWbv4KkTkW!0a`RWrYUO7nJ>y9>2ty8m4HYvPTDBT%h---h_xC_=__UM*uLKE+@ zB!n}0Z&xIcwtO{P=ub}GVun|uiYGoIyC)s?OROyHH5VtCF&Y!4) zzcII{9tfw+mGj5>&m=$lW`@$brB&Nucsa6WAQt7JjT=7yE^!Pu;>>QAKcHQA5x`j3 zas1DTf1u-F3*mvj?PJ7}0iw(%-roxhpvU??l<)G##YVtau>W0E7uZ#BR}--T)_vG? zSwwp_>@1mF`t$qSB(8R9Vhcx|U&ACuORmcJ@0GEyK%`al`=40+7&oI4EeU77Y(oNF zs!c_Ic6>)6stp7^)NP%^VAsk6Pf*kQPzevtXNq8nzwpvI$G1=fo{y&uy_;@2! z{&$$azQ_hy{dchUpYnn~8XPj#wn%HP>^?g>0{J{LiW5Hf=B54`sVnhgd(tyzOg9XLg=}tEArS|`K5W4AQIG_G4%A&lh zc79XK<`T@|IZy0!sPZsbbeU=8V?n%;+HjCA&AL2eTEpmGzGQU4#n-)I3&7iG4!5ma z&L1*V*x8c4axCy{Qs&o1C$B)nBzP$L`brxHlHN}B;g658xW1AV%o-YCd-Tj#_EDr2 zEWlMz4iEX>C-UGKxQUW0DEJqVLNQN)(+Vj4z&^Pt_17;+A-6W(^5m;FcNn64E(myA z8S{EK)hYP2Ao2EeXg~oy$@_l?^~;4T=l^N|*{v@F$Ju#jB?9LE@&RuPW7ff8`zh?i zH?-{gDwf0SX{=-Go5Fw)1i$d6};x#X^N>bR&9=20E z0|D8d^HlH^P^06U_8{Iid@8O0;R$(8r2+kT*9y(2QV<_rkwjPJJR0`naa9>)b9$uRD&7&9$ZS|>x$FWhy&&=kpk4&VQ#L#|l zJwK=p6$xiFu9EJ~Xgm2U&XWJ9?WKI+%WuLO^e627l8#^bNzTmS#$$;9ktJMa6kxq9 zDOaZEH>e?EMEEfT%aJBBu+{Lt^+0`hXYc0f^wAUI!w(Q77obqcig_C!@IKgSJd#;l zU}ay!?zb*^lucrbNJ_`FbV50Hr&-YdEdS+On)58Cv7gNZ));thThk`$aQ5MC+Wt>F z?c8PlzKTB$$B6LOg=;%iNRC59pOiCxLTdTy(T)-;Q*m}gyc*Y3*wisq4G&eiyKnaz z!zKm;2cOkZW>o<lL0ijCP@HPvZwzM0Rjs}#_Xv*s<$~t- zwY0c$+NHEOL25{ti6~y#cVcNzVlJ2m`$-a*1QlO z(;y`*FlqlxKDs)LDv)~>{$Cw!C}3C;rG68)(} zns5Fd*_Oa~-heiug1!Br45NklR5I^425C5xFGFcBkui>Ka6oP?H@ur8VRY#D>?)J1 zj0W381(wF3uWh9XA0dqk5(g$5{qFa3Qmwxsz)X(jzRr^)Uo zV^i)M5ZYB$;*|$+wpI2P={#q}U*d+eI!nfP;=&uOscoL*J98 zZ+1{KC(!^@ZAp*$0`w&4=|wKq;$9zbCyGF?$+h`h9CtxqZtcF`7sRoZ9eQu(k*u0K zx@9AR;KKfwNDz8+l&$dQL-W9u_~XOD5BuLj-I9VE|83QC_JcdqT(NCb-FI9L1Mm(= z=Zm!>uPMbJ-L`8Yn_du{ygGc6Jj?Sf)O2+gNv>6cy&L-S!-m*GhuCJdz7>!Y=*{Y; z(6orAh2BtQ*OVKz!nsHM7?R=kQ7#V#MF?V+z9qERpO&c6 zq){lH{Cz^NN#au*SPCgQ3N2B^a#8dul+Tg)t|c`z)9pgw%-)xUTYrW&AvY`n#_4L7 z2u1ASQOfCYG4ZUjZJNbh;QV1zu);(Dc&H%5=Y2Z(_Qj_95Mu`}c6av{z$@69Xx@?c@oM<99ybFTIbfaD8_4a)fV6Gs*IOv5^G#hXI#1s%KuOHn!)0hGx`bhps7;QTsX8CUwdmr*QW* zWM||f9lFDH78_0pN1r0!n{{_67P6Vi8y29Qt5A|Is8beQ3C>N$kUquA31M$7l2052 zKdVYmI3J3*J8E=KMX+bp-`lifRT0wlfOPn&(?nF2y(fCY+m|=Ld>DSx>R63lgq6uXh|e zI-SAwRcQ3l#%g?W6Zrg!!?tI!J;66-ml}cUfu({-iRlrggP6YdG0Qe&uhe<0WtH4B z$28-O6*wwjp{n+*TK`Ty*8(q*S8|Tokx`U4-gT+B+*uN%5~kPf=?o?zfww)HmGGV4 zt;kqQKXHX2^&;dd^3&VnnHdvhC*9CtJ4Dlmo>W8FPiDu=+Q$o7qp;#2lJm# z@T)h7t;g^~z!IK&+Af~;jpRFS8Fv;ZT4e$CtJ-FJOGV3%^GEI)&mT`L+__rQ=f++O zp!6rQ&s>n^c`e8z5)9?QczpiL7KYX`oo3E*FUl5|hV1lREkXPMe12*v4vr5|w~actuG<@R;b%`_g_ z=&4$V*vjp$86IWI#IqPD8r+8sMhtXXcbl(uZ5rDo^^G5|otH+I>Agoz$^)GLkjl<@ z3~srLM#tryGVq8;SDVymj@Rrs=jd9}>_%LRwTAg|Ai->L{O# za!@p`uy-|1B=3=Wpu4U6rImMq`+k@$^J*M=&1(_ICI;~+=l8E>-;8hsr%P7hy+Wk~C`&h={sPC*Fo0sW)z`?WbHRGis@2bvQw)eHD zTZ<+l16^Kv+)>a~FrjapW?Y@uu zUZA=ow7Ok;A}_dw0>YhdRRwhVZmFKI+#<8`4vLeWSfEYT1=+JVY9xB4$;Iw~ApMe- zLk>X9ap`@{px%+YJ71X)Y_k!XvXeE6Rv{&jYio<%oZR`un!WY!XS90;j!9^_w;%DT z{-J)cy$h2&lTu@Aaqx)KQx}sS!PZ7?oaEaUa@(T_v*x)tVedM z!k9B=6VmMI_lQ&gcWJJ4qNtl{8IMxOY~GY;T^N92nSc4$SX4uFgSvwFiug*&#^rSrN>d6Z_>!J^;>;$a04w3wQ-A2DfEbRmYwCl`9tlO>I9aF0cPClelryGudc=<+1_Hw++H+F_|$& zj1Flh*3(?8$_{9YsrSo=WfDAopQm?M$>-!`c2CbU@#sa(@U+#m4V?MP$;vrxMNWd( zIx!R`7m3H)Ty0ftC1137yZEul1CbQ+Ih zjAC?qrBNrR_xKUoPV7B$gyqfEFyG7o@jV3=dDsFO_HM&~ge!-)Q9%5av)dRTIo~F3 zFGe2*AC<|S(^h2kiqlFnU&VH`ivmA!4ByU<-1Y!>{C4?vd4|;$=asv>5BVV6??6B7tG#{Sd*MPJr4T7}$dAPoF0}hXE6ApQn$S-XEp+oPEGg z0AhWOyfJRb%sgWlw@+G|_F`+bgH(^VE%+V@6J#L~ zBgjZ(w&}R@%%o*U^O{S;@@bi0HN;O5Yb*O0qn`G0L&%0f=Q*l2AdhJ0=4qfh!y#cA zVW+dU2xp7Q;4vc?f-j6F0lgm0`RlRovX1m7(a?H0dT^V}Vf%Qkx7S$L*zaT8kmlj3 zmbO-W_Xa7Wb{j{xsbJ+W-yI(lU~r zjq&9?U2k@sFvx%?m1+3Gq!*F`XHK^F1Gy3+E8bC^Y@WW%4wV1XUt-_`by8H=E#58qWhI%DO(T&OdCKPTd(K0hjr) zd!;^!Y`l%wVpRM&e0m^}5PTkY@#tx~T=NHY1auwWl38d7Yc){}t(eLL-O&Ql71#I3H@&Jgs zSPfTNJ@>&HJ5vrL(KJ`q6DRqVcb7+(&!xc&o!rlL4g#&bFC9InW@pRZbrsGGnsW`Q zG5%hJ88cPET)X`0tqWr-`xg=d&-7Yzve;yru}s*>m#-6BS*X{u^xxcOYdhl_M_sRf z+Q4)k;)!t2A48ijpM=y8^6JS<6}P{0x6)VCJ9TQ=>VF#`chh?s(B61RG<*TnzKvfV zD&KI`t}@s#wC<2tfT(NPl*5=Y5w!NY9Vbre^v#E(mmx~7!oa`C>ns|nS>}J6bE{9e z)LGfWoPZ;37M#9znbl$jEEpj_E>BbfHh8hnL}eLpnIdrdu-m+S57$87)Hl~AK2dD0 zd2dcLPK_K}+~J5gPITfR_&MPBd>CK|Ug25^=*#rnNFcNWnSv<$7ZcqRn_z}E#UdLU zmsa|KekAxfr`q0~DW(fcZpPA^!F_NrD5z ztjf#{nximdC|9sOHagtO4Z}}nBh{Nmmwy<4N&JTiHYYMeYK7<&Jg>r<| z*SzBEe9|#&CoT&=YtTny@2KaUe3*awxuZh?PrUKOJ^7&iAE3yuzmXG(c%u{rmVycf zoWoK)!$IxIkZ)Vvm?tmPf$r>lBS}k)H9E`k5WiCq_k<=bg8nGH{wTRs+`L_jFo{Vn zYGXm~tnGrnnQg7DaOK=#l)T|DrTSP()d`E&-HX?u<%y>toi?VLVmGpbiUT~FWkLKU zN=CX=6E)FwCrTdWge7*}l6FMm))CCgIw7&ktDFs3V+Eh} zT|za|FMPuAMa%aE8ujYplqk7rA~!ijFXwXO+k&~zWl8UMpl+fr2h3?JSYuX3B?`0V zMrGECSkcS5?%@peGG}-LrNC14ud}{xn!BY`y}$wH^BA<94n4yAv+gDbf_@<1_A3{q zM0VK(p4JmTFiY{t$NX1ds^fcaz#;i*v!cQGx(-Q$uev}Mwp~y2!GesHcrR@911bEQ zU1_X^PKyRpHl3%?o!hx5;@C-`o57Tl!N`jh@pyLO)=PDY5FC%qpN=Tj#uL6pHc?Eh zh98q6R%R*W;$uCjCR(G_t=u*VKVn=XJ!+F8L!#_58UmXf|P}%1a9tpq?3-7AqyUOm79)N)HeBS%;M!!8d?)g?Qdj41&aglcc8H1ZX9Q?d8SJ~sO;!sG#{2uY^N1h(!!9Py1YU&@G z$_x#KrONz&OX3PEs{Z_mLKjD%p(y$_!ua1PB?Wk}1&ywT5)uCs7R#;2IZ&X-``;(T zy*Fqp=%OP{XKrrpu2dm&WovX|@tJ9yX>s1eN7|S)N4FnKa}$}t5gwoW0~)CxLiE26 z5{7c8nfqo%0nTF{Fd+ct;a}0R6K4V<<~c6Gm@ix%=iWs_M>bm zq#+ZZFY#AFHGWLlN-gW<9xB1FF|U|ncLz@{!?WJE@7thBEKWtM#Yn*zLrf=1 zPpL<%jk0%>>m>l-hZ5yvE(=<)}}{(K%K69e=iGwTeSG_Eci~{J`Xc? zJ$er7lnvpT75-To1rh`!5T0w`pKB1Fo#CJ3B5|pL>ZXSq zlfy&wN-Dlm4<3FXatJmvjYMW`@sCsz?s2`V-+m+MLQbJWPAT+N7gDJ)GzceR6-hvy zb%~8m!MYwDH@BcOac5~&)Dmvj6>fLlkJ^vA+%R2-7osw(_-x2io4%AOP4H!toYfB) zGWh-&Dr(f#O4`+`?4yQzU_nxZp6YLd0ox)-v!7L^Y80Dc#F>|uY|Ds&Ewe1M%!rR? z;qTWxO@roO zPQlzF@KeFL{8VXhq9kFiA4kOb?kUGY_mOi`{Vk0w+Stf-ZeV?pw_16VQbXF5jTq07UU-qV&ri7=VgUI2c;vayNu}ZN@myuDC)Ea*zspX^q zkqL&fjhl+2Gg*tvNh9iWnnTVDeiiR5o(h$a?MtWkqjYp67tHaE? zEBU^k{Gv7~99s%xG#CSe>Xryazh;K-T)ia|y#4Kr5Q-E0@^MAbS4U}-yjzCty>x-$IJN&Rr{1Yvxx$A_`2bYNyF9qGM( zi-wHf@@iM#`DCW-pMb>{hA}9QSN1M8*KdUw=-f^h&?v`u6$RMuT>#1*p)CYtgxeImj+Q4tn*LI5An0dP& znkXL!X~@V#m944ufX)Ob)T713vGOjC;dZeCL7`wf3#I$chbOb&K!ag2 zn33sE9rQI{)rDSt$u=CWkd6XRQeTaYeXJEdRe!FP|13#T?RsN&r}_6by76@b2d(ll za|IjZLGOM)2x>UIpn(d$!ue`|1f~}VyjC6>pKMGEc`6nZf|a@b>t6wpSH_i@NmXk| z2v+{{E;9K8I~L_|R~r4$7wD(S1*haKG{5H?U`WIe=dfVa^H(MCibDYs%_=Fmr z0e97hgA>)*>P{p;4W?io6y?EJxQzrG0{H*{{spO_EH5o&4=qOR zyy2Gokc|2j$UI{Z$m19XDi8{yLiFS@_Z-ir;Inwis&NwEg3kV}DCb`3dMQcEH&Igm z*YQx1#V^#olLtTXd$5?crIsu`BH$JS)i~S4iIm#8_Ig0*{&I!siZt$G=yBr`3>s#P z-nl=pkZP)=^_Yh`$dC2rCr>yeh+jd&u3SVtXFZCdG2u<5C&#xhgtEoR4(dTKTCaql zTp`Kn)=yDCZYj{5J0NdgHa6acb@KGDO4lWGzJgtB4>~`2+o4wUovRUr?@0lKK;e{~G;k?#=XP;s98?3VF95}qU_PEoU;nsO= zK?7m_%%_ZHyZ`-`*EyVh%GF0&gF+}+WZIb_B+@~bsUl<2d2-TaAdE}Wiv81RYPOll z;!0P;iQ_bx+>~@3tpm_q8Tk^!ef(T+l31(v4-Co0tME%vpsDJVC zm5iC!m%G!OzqR1g9kf#3$M=@O`cbaZ4o7Rprgl+guh=GTCDYSAc*|3c9iVdxEz|ZUW(!qt`x>56=0cNIe|22qCcuYs;I{U% zux_vOai^zkNOZOvG!e)T6O^v6Q z+XDGTYX`@U5dly$h3W&p*ejZ8U@N!est`ue>bNvB4bUf;CqH-)|^#_GloAxb70L_g^=Dk2CQ1O zJlyO)58!0Cau1RCBfU8Irk6cli%OpA&$&Bhl!wYU1{e^u6H$5CK1lo0zr|eNxB7w- zj(0kvsI>R)(JE}dt(W7qAg&n~k_%1E!1WSv)D8eA1UaXpflitY|(o-qbt zC4xwvi&;0wu8|&qeIA6s*J>f8ftqfEVR{L4WTm#^7#+w1wP7C>*5b*{-&qn zH^I60Jei9#Ov24`eRTrCo-=;;tUC`>PSHA00Ia`Ol5>)CPopr{6Py?T3U+!u-8I$bI%6jg?rf0&g84mP+>`b3u4Sa$w zVmnDSzTG4TbU;>~TK!~BpdG~~!9rLA_4C=EHt1C!1n&pbVuSRMWtXryzYjzZ${Wu;LFW$ACYm5N&e4*w?9{rr*Q8;d6); z8M}O@2kfx7h-AX8pGo=065mh-9&Z~8TlVb&tv>z!c;iOrwTJcM= zMPBv0YImpos!QSsIux77&2rnpj+)F|+-q0buUo&T z_LfLNRp{KiCrNa5k;4t^I6$C#DT4L;Gi8Mm>f2Q967?w!>nLrJjbgWBv*jXA*!bx; zK%!k_qqMV3&Q;P*{Y@lfPQ5-cy4cQ|1Zrr_CM_X1A)A-2Nf7EYg+MulP&3NUHsL5r zFo?(a_iJtJ(qL#fzG98#d#_xCgl}{|kIXGNyX=;bO0!^q zaEMNzU-N|TL}Eu!eV^r96>AJjRH9d;7I@TT)ZC2RM3DZ5FhVnRz%vYnCTqcnY#3n& zi+<@H<`142=9jK!jbYVFm4LD2*?LC{7+B5kDv{e<7pubj0rrd|Qlhe}VhFfD?}rHoxf>cR;P=8AOngeGQSznYjMut2 zefL|*mVNk136_0udX9L@l6`c`0OLuLTHYF}y_g%*$w6^7I6Lp$1WJ1MTW6!ZlH3@N zj3Z0UbrxQIN(r;Id~M#E)#j*}7F>{5Sd$c;*00TLjR*s1R>|!ZIO6MFXBj;H@+pp) ziIH){;JMBEsLv{)ts<==&8nPxdbB;IKUeWd35m}7h7m5Nidp-k=19w7*Xe^ zmC7__B+AVh$?Ah)Km;t4`Rb%-Ko~4A0t@J)90a<|oq%)m)?t@*$60n*JO&QWdks-@ zn!N{yn_!o1r~H69edVJ3b8|S~_#WGyQtthNU^wo=eEF$a9YVW=6vPv5rVAAoy50v6vf3?tJ2L-fIj z6&PV}4wEASlhasB9{tD!Em=479k%A&w*Z@)oSSoyxGZk>TU(yq<=swXANhbUvCrZR|VvLvY|se1E;K zZ1zxQ=vGWO_chu2{Z#j2?mHI)HtT4fdW1Dpa0dh0!f3>-ySx5`+t zczx8dIQoMPF-L3#!s;mYDj06{6xkbLvNrL1jTsTwJCPIA;CZ}{>p3H=U`w_H;L|H;lO6XEMFxbqI!17`ke8 z9oG0y;rUIaU5|eM(^B)sr`Nmk7^HtT~c9EyL6@bF zk`uWPJg0#h6b#GF8OpJ8HL?U)A{GRbqmAW?=PczktJ1ME-7=U3B?YQ0a!!Kq=FHkT zODA03acuGYrsPNZ2S}0_1FO;}9$(7-bYsDJ36MR3JArHYw`^{gSA3Uq_az8?HZL^P z{qf4d(_NEqbII!L>%i0EQ;O~h&Iyk6L25vp=l;)BCNR_YzGa&;YRsDf4O2}6oee{S zXYiYHY5m5sLV>4vS4T%}N1p6Sm0QY>nvTW}iLz)-iODz$FvTB1YcjDXi4zHHYp5+W zG7VIXSFl`2mRs0hrekVI?u^WA-&OQ|a&776&;#X061-8jWQt zw5`E45_dRdXPz>n~o6d+^Z;1Ez+9_WO zGtQOtskm~8dfhPv1j+J%)0-r*L8wkBKN#Zf&DZcjpAqhIJ=n2qJk(S^!fN7`zeUV%DXF)yeYrIyqaqXkK+r}4b;J6wfGed>v#d2LD?{Ip z-bOLRzwh%^;vL1g1CE1af@G$|z%-I~Jq0}lC9X}fO&_P5w{&OW)J5;Q!Yc}QC5t3! zYN!@OD}M3({Y+0xk{hKCdD2hrK)&JIIjCY`=XuQPd%)*J1e#W~ z*De6_i=lZ{&z<8U){{M5p2V?taT|GoMBZ-ulYRne9&C%+om-L1$ORlFwnY*MtC)fx zP*f`N!&MP>}4?sQ>Lj?`={jg_ooGK|)k}aB9d&{spK3`iB4$}cplik=R!0%EDtnls?)-#FLok zKU||OTqA?IN~J7cJ%^vET({|}TAc+OzlIv`Ru)IZp@RJBclfg?d$7kPf zVqXcI-^$;j0PcsGk|3~9;nQYvc5Vag-@2Mnl9X8&tAtAGVfz$|>K9(laQzQEq!Ju? zn4(h}Q&&@0sy}$Y@V?I)nxa*udXl`wvL2GfC1+MD9g-M{)US7O}bl@HY)Y| z90E-#wQOBEvl2Badq1~{E*ovMmiQ4bSZOPv6Gz@linQpZ-EsIM%m`VaiaBc0Xtatz zmFV3+{5d!oRZQ^Ds~|N@2BrT%pD9a`l8VVSa{-27$A15dulsMee%L6+b@L!h^U+T$&0y~2Pn+uU$(EXaX2|l<$1q#YW{u#`OC4VW@ zjAv;1IFvi`=NruHwtxG4+E5MnDIhtf>6TLQDLS1#vY`+lFCZ~DXU)7U-EBsXmRw%C zu5#hm%o5TZLNjg2T60ma zsmNl>Z>dt3`Mn*Z676efvE z+VX|tU5I_8D^4xS94pg@_zFhZmaL_ZvMmiu)3#wxsZtC07{CI@!?HjT16tB2I-?1@ zuJj^5E(IhFdAy1YT3sHACg;3KX~(^Ye~Da*!iQ0l;?}g0&2AZSLDbUt+6fH&N;QX{ zNuRK{CK*NNax?1I){s)d|7O}08l)zMXpvPRW*UAi_@ z_k zSH@??1Pk+2s?nNQ=8+FZN#$pZ5d7JHFEv4zBR*Tn2;EHSu>7cAYUQ6`3IZ_;+ti;BzHT%Oil~zlK(;{6=HV+xq|Zdv4va{9&&%Qn4>stj!1!tsX zM5Bf49w%l*BZuogE&9I@^#>HG$g7ZiRw=v^r|AMI4dvSW^!VUuJU?tg39@gLQE9kw znz@C&*mHM`*fO{Cz<;hhzQvzoMdU;=fU)OA7w-Zn@{ziFSVX7=WHAFQ0?AQ0C!Btn zh2dCBXDE_6bq8GX2U_N|h@z?8^*k9R9B}6pW|bCw@!;JKEDgmW7qKIyQBC5`@fSr- z{Q9f7{NT8`3O1Fk8U2Y^I?%D%ml*4pfGG0Bzc;Y{FE{F6#WYyI;KDu;Q~vUNpS1@- zn46}*lTOghTlfddl&l@Y>6aa}#+Rs>8-Z~EVko-%--gW4Ps7rIiW;yV<6XWy^2JOrVbD~>1?;gr;@muSIZ#Pw zaL8TE@CgtgG!6DoprMo86GJEoF8-WIfS2zKOQQY;1_hgmsc` z``jml0)8dMQc6^5ygbd=5ngGp+&oo%*91E8XVK@+uUnzTtdzM*2@#>lJ6`~xTi?>~ zP)q(7QUs1a4+qY@LMpLY_?g0P)Tq{oHVM(Z zeDlFlI63=uHD?c?+9^>Q@%h9o8lybm3~%|BzAsN(o^rd@gksoj2Hy}1}ChXaL= z4H%E=l_w&M7s7y2rz6G&IA>Z^^QfqF*ZKZVK$PPFasML#DA zuLYC=UfI)2<7n@=y07lHD&a=~H2HgMeS)-S+0^b0wtNz{J{t@PCSKCZAY&l`Du`C~ z#K7GuniI6~*XgkFlXB{UB5Fa(4D{^P9W@oSiEn6IceRoNHOp|1n~IOga8`|et293q zvgy{RxpR%f$A3i)f%ZJ$Ekdvr9HchBL16Z-_B7A`xPq$Tdf60DKVN+;L>8O8$snh5 zPuayF;FIS6?|mx`6F`K#3nhmns0Gwc*{j2#qgh_Y1mOo(QcT#bnx|~a6Sc5UC`)MNbwdzd0GUSDC$T2fy&yr6;)HaVT2+7}4Rw%@kHYCCZD z4Eyk>58C$Lf!c6jpyW_P>vx_ipXL*aN z8zWjnslf|9I(5`$?Bvf#bsMdc&)_N38;VB@Po`}p&M=?8MNzfJEmHpk5QXJPH ziF6iMcWb)ze^go%_SP;0;PbP7J1KRg2RbPYrEd%T@>*AQAXdA|+TGajUe=MWZzE{i zOSmY%R#&2Oh`L9u+T(b!XT&n{*d!Xr+7mCveG3WSaI8jsix*vGV-rf)wCiC9<9Zh= zKiz#D8R8g|ugUK{didIxg&qHvfZZrCLG=nUz9>fq;Lg~CYJ{4r1o)%P`bhr`jB%?Z zG}0?JKG2<$Aqry3yOXwE^n@v_Ket2IEnIEe3`tR65$^fQ0q8@_rsbw~_OLCZIwC1eVMra5hc=3`llcwftlKp{nn zW}S}NC|>l~Z&X2%M4zupj`8vl@_kN-e?y93BiHz$uJ7rCceCwL#Y$7Wum3ymIbON|7+5Z%~tRg$Epu(X=2x54Zt54QH5_$}4aX#e@aC@Fzv zj}X!{)=Jh4Z12E@JQ=A+d3yXcJF;BLtO{rozTDVb3u?eXif$+KO>UZh^1YxqRm2S0{hkwUf|)wawjQU$6ooRdhQ}SkVx*4KcIJX zRdw66gMNwF-6L==86&X+SdAt3C>RjS<7K6aK=g#DR#3tRkmEO8pvjEQ|4O*uc zjBfAPJ)~>C}x*Yvd5JV#{7!VkJ~bqX&#O@TWRNC;2k)FzAMZ-`(@jl ze{P_+M#(X=n`E``La^*YS6wH$h#H9ewo(SjSEZVhZD^;y-0Rj*s+YWMHZ3)U%`JuA z6C+6xNvP(*Z^*MotqrT=;j}sAMc2VkNGGgVf!XwBE{#=*98B~q`QtY!B$BFC=fnE* zPW-a3v z3ufp2K>$~|%wTh@ZHvNu`c-Q3pMjmynAIJj{T-*iiH{%UFRz=dscnpesH5oxZ+PJ? z^hL+p76v)n47K`Xu=8!4~z5`+y1^)Kv*NJ;+v>D$}?Xwt;GrkasCsb%k+M zHAwL0V;{SXra+E_qsIAT85^-yG&ZVGLXJX?Yb-N189yl%pF>~ZyqrP%j2XsqW0&zo zLf%GlJKsqS3JJjfDQ8 zuBfet;UT9}g1K`vS zzOLAVAxEj?C~Y~)h#aMsqfC*b^yDZ@$x&v?QRc`|mXV{(Ejqpha(u7icj9(e_9|l^ z%{=^;UTd#)5m(udPwY(U6?k@HzPyw^Pw>5q^8vF9&_CY09KU(91kXHdn{3ob@J+<` zQH+$#1TT<0m|qG#JmQ;((pW$#T7FAo zL=755!iR>BATFqg1Q3ZK$cMxSBBCN9A*h7w4~>@?LX4LfFEK_FM8eK||2o|>+q*lc zXjs2{zq|G8+M0TtI(6#QsdMVlE$&rFg+19nPmCSRVG>o~NiepPIIcUMUVydMZG#gl z=m_lxbY1Gkra3L$zSbHsUo5}(-!98bX;rjl%V$ct{$JJ_Bka(!C&1JJk@8rex2M!X|Fpn7|1C(vmlqelhYJTW9a+9f5L|m zC475*`w`yM`@dCJZ9&-RV=LDKtPN9t^(ulC?r?jP=h)#TfVv@!lW{Bu1e%Y9w_{d_%; zT8|pX_{TR=#NXB5)mPx};Dcr8y9KG`{%TLjpz@JlRxoS%h68TU-^Jg(v8?Zi?})O` z_r3zV$OlW?cf_CTFKVQYqxt(vL8lM=K2JRB{XVG6zJ0z%$|%Yl-%_FHD<2|di=b>K zWxQ{eG6gC7kh0RZ-2*G8K<`C8if^KCp3h5>gwjia-m9!YnaRHSjji(a_VtDhBX+&! zBCXap#Z$_XPbiS7%6Jh*&PGo4DJPT@@*x?zg8~g&IU&*;F@a?OUQ)Uv<)Cuh*Vcpm zPI;ZYk?kj0IpmW)5Z5c~edM-EPq8u}&qs~JT>HsMYm3QFmoWxlWyr9v zhI!HmcEK^}L!>3x+H0%y@w8o>Z5AsT3PMSxail8J26;Q|8cBw}BJY;pS3DzC-XJZO z*Gdnt1tp8rM&5$Wv{;%c-y_xzBf0*qh zxv!^&cQN-u{y{^OrFZ2Ho-#Y69hhS_@N6$jJLEuPDOw<{B(ZV=`6oRlD4Z#^kfy=b zf&62gVqYV5q#4o-s)MZq+)$@j$!RQAM}4IPYg~|jQk}HiQ?9Snm)c3a2=b30|L}XZ zo7$TC`hOLhN$8wIG0%Pz+sSJ+xhpQbg6fQs8@Z0lOGZ(8C9`>sshJ#*mnAl@wF>CI z7CCQflC!*|!o@~my;|ruWq6$NK8@oRkg#_p=Av}m9WFfEjZ5>zsjRP_`fBWY_8({M zvp;j=3|HTp&iIc9USiB%?THn7>ORIa9e3*W4q}>}TQcAzE7>B?<*lacRdTDwQ9XVw zb6Yio<7#=ua9)$Rel(l=(;L^7du()*IGxuI?M&RMQ_0RyN&Kp!#`W_k``2W_`{pco z-<$>So3r5U#Es?kP&*TM>Ufto!Fw?K73?Rzu)iBeR*dV$+<+X}WL9r*z$LIsgDkBI zSXvdav?^x#5c&os6?bHcQM#D%_#VfJ-hift|95y|g*7w=A#qY-yeHln%Rc-UF5#w} zx}8tlp^JMtPc{m64&9Yr|-eR;Sg`F#T11KV6}}t^WsIZ`2w?>3ha7V+7r1OfV+U zY-5sf1KnxdW&E7xT3fBx=y8FW%SGbUP7mtiuKzUoOZ9;h=Oxm)C08;+y6c5H#%5&aDbUOb8e#s-Q>kl^#?k{loOBJRHlbY7|@b8i8(V|Ut&U@s%e+*+DT(@MmCh@>K{h#}a-1A8* zTvr0u87}=heRkI$dy8&)Z za*a1#&`i)G!((ZBA9Wm;6J?B)aWZu~=8x7Kwc&h5*FT@Aa$8QRo>)Pm$m7NlcjB?$3uAgQl^ONM8X9Wtz{tK1Wi*t*?5JI!T|fctLhTZS`)QXV z++Vv4;Q`tJga>K^5iSGt96&FJKNxk$T!+kc__+>0*P)<}diYCFlOHwhz<7Vf+C{n6 zFHtM3a+=PvP4dV#DKt6+X&jmqf;0==7!olzg)nP{eh|WZ7Mc^9LlWfL_2gH_tFWKc zDe4sRLB>r*`1|UQDM!6mT>}3B^#Sln-eb2KH^VHt^5T zI=~k+E77jhrXc)7Z4vy3wKeb`*Pf&z?I~?5!mnuusX+Tw`wa5*n0A5+^c=l_a#*ev z>sRW-5gw&qOL_WOeImlQ>a*eBt>257#rk4|>-DAZAJl=B{7OgfM0D?!;c$r_(O~#BpXAGD-a%OTumy=`hal*WW8+MXh63&rW#X`I^CEF|4~T) zTw}fQIF%S1j4f1PJZHQ}#l}m|0H73MZT5VJyY`0Q|BGQu_CvG8vV&x8MP zcm*lpRpBQnH~fe2A1FWkxA3Rnq^;Hqq=Ktnf&Z%YD*WBnZuonwJ@EHg`zYUf(|Qx( z{nmc?Z(DD}f5&vz`g$h6+K-lsh4uyq*mf42S%|1Z{GNR5<4RH0o+mYAQU zn6!-ojc*S8%JYple+OBZiaFoVQR>jbXXB7j80KX{waZG^TM#pI;Ps2^=ub zWb#60bDB3VWDV;W#5zU}MpZA;m^aUZMll4kM(|f})+H`rZW20!%HvnT_#KZiJ3*a@ z5j}~=@(q9`xnubz^(N$*rcOh6x;g{#H>=PP)$ggW)YV&2pUn07xIRDECvkl;*C%m( zLduxmK+5E@)R0*UC_K*nTwe~4A|Kb6$6V`Yt_?ER2AOMv%$+{wvjEBRA@Gnaf2@?@ z*O#6fJiUL;=|DbPnf96gT%XUDHt{jDx}}QUJm94?H#lcm`OXcw^BbRVbivL})veb_bpJ`F^_EMAf^2j=Adl`=RbmzaI-jlW0L)(_^ z&C=tiNN3oO{ovQaHHG8U<-~jQ$UL)*AOhFF%+^aGeh)?L<;LY0HN!AoB%{u_5~F4~ z@+ru-6&k_w*7GD=+pKK}zku-~TQ6EKBK0NfC5)Zz)^>znwqE9uyz(22WRtSX=irIs z#tDQQj0S|0hQJLOPAzd67hfS7^MSrl`S zIS65)xhUpfvjX8tvl3z3v=Oc{s}Qa>t5GUyMiC>l8^0Mh;|M3rAqWpOha%7A&~*If zFmo8?GIt^VSLSQ*-!R{R|7-Kt@b{VjhSdEgbaL}I z<~#5Yn7@NB=Bh%Tt3o_ig?O$Cu?|(tI#e<1P{pi66|)YN!#Y$k>rln4Llv_QRm^i+ z0qam9)}e}7hbm?rs+e`CVqPN<>tpG)QKDP%JHRtP@tp6>@CwiE&P<-eb9kOuMX=sr ze>Sfoh;x0@GJEE?@vcZW>VeqN&~?1AZOKAxg;4bSnFL(Fuig&3V-aMB&0JGG{dVdB%i?bOE?X8o zVNEWpo{b)`d_FI+Z26SB3#1b{8H9s`t!Ig zV_RSlM(=bghgSbK4L1JM_)n_9=>3c;G2RNuHv3{MRk2ndV@*EJ9Grj-zk+JbUzoq3 zA?6F_3pCVx(|nUIH&bSchJokKrYpd6HB<+E{$aWj`ur*y9{ySQXEY+bJ-nSphIfZ| z(&b%4f!7k@|BTZgPeG=X_>BJ<)T z=EBL`o4Pte@RF*tA7PH_#OtT?cye<%bpXwDIqCb%F<)j)uc0!wqsO`W@OW^_MAtT* z27Z|Vy)~%b0*$qhdAkIZo*jCP-(Zz| zYJCcx{D<`q*fO74pJBXxZhcOL)<3O(QjyhQC8;5*V0pUg;M zq>!vgaio|c5iO!oX~c*a)Fu*+SkyMsCeoJLMcPN&Q-?^$NGIwT=^W`yXOej1D&(+q zi!va9s32S60pifJFAao~aBx9h5_7V9JD<}6P*C6M%KRnL+b&l&{OM)*-+?2C71ypML+-4_$ft(M%D!eWsX%^~}xjvcelej*K>yuGm7lhA7tuogtbFC8BDsins6HDt{3hP{cN9STZ zWX3~gJY>d0VmuVaL+Dv;m?I_TNQpVJfH_iPj?7_>%w>)YGDqeyN6O5R5;(Gf{8kct z8DPGYm@o60FMZ(4ASsc2aA%OYQ)BK_nL7)aJ2h};m`WlRcr?U3sxgo1%%jE3qeVP+ z#Vv&kGje~HSS@ckS}{WFsS}S-u`8O>Y=lk)ZN$1ru4Buw{0s7WgVn>I0@4 z`5DI`kDz?Uu8^@SX6#CU-C3k&ucbtK}>?h$Tk}uZwkPMT{tOe$+XIeMe@E zUfAMdE}o3K6|Pn2?*Z;};=Yx*Z)KJ?3iqm?do`yCT<6T@4>E9N%vsl=y%Y2ae4i*_ zeQbRUh{vttfOx`!W@@D@a9%`-_|Td_BmjBi(9YRQb#AK6W_>}d3y@>^^W5NLd%@3^ zLV#_A93kbczk)K28iR9>#_5AQxvjz)5Gxipx$6jm;}zyta`{zop2Dsh_2QkNG>^@3 z_xi+cO(GK~YF%vI$8aPIan=M2RIu)}mav5PF}}q3N{p|}_zGA>Jer6{lf zJ8Anp@w_VG)-}fC(s129C=RcJ^TIg^%&cv5^QP-*UVkRdb(y#!a67X*Zyn8fF;o6W zsUPwG_2Mf>jd7WBmpI>#N$(k*?5_!)phG$=_NBhC+;TWC*4{o#w*#X*v?a8iD6}KA zhw?(Nhu(lpg&aZb7Lr2Gg`Oihv^BI9`JN9!Du=d(wvj*dLg)nwgkB82NI9XGLN8Hn zXgi?f0m=@PdL{G9i*Qqe{ z2I>|klgzJK_sgM|QO6fykk1$tFb0K9FnBHW8p;YhiWrY#SDFe5yN?`cI+rbgkQ&1L zq6;fS4a3S1R%92?H}4dNYiB_=p#_F&pw*=+G*`8!+7q*FrD~H;y;2>Ewij7Nq*@^>M4DwHyrrUO%^BU7dmQ>HUhrVCT1YYWPJonH3d&!}U%%_Hyq4BK5BabnZ=Gx}0@ zH&$pgdl?fkKNIHXdJ4OGj+n#mvi=vRF14P+jNCPH4t2>u>W-8JE+mmlftFkcz3f%} zFm-0lxB{`oYKdBcHfgGcHkqo4HnmaPq7OT%ozN3!sb@h8>!tRB#(JT8A!u;1+L!Xw zOVvv$Umc(h02frL)l{g))i|Kls<3s{>(p^9FMO=u66?1L^up^Y0KIS`1zkR?@SIGV z@9-ID(#vx;X?}AkY+D7`YAJGCRnDz?3tT2zb>?Yq6-xlA14hV&E{=C2raSLI6YoI# zdHp!x>_D@1m&?1(`Mm2~puMHN4PO3t?cagmZ?)f&rXAD%iILq#KLcFcUT+W1?WlJI z=bou|0q1trzXLw)q4%J(n1)@LhTZhr^*gA$K3ktnJ$OCnyL!F8gnIH$b1&Xk?#=5y z7xP|nU*1c;g!gOv@qX<F70p4svQusjF^AG%-mk6XRh}x|udU|&+NhZdOB4$$ zVTEeK{;;2F!#Uv~4G9;7i|L9mh6KN@;alnx*UEAF_Y84!XwUxeQwSeudL+T$<<6-qMM%{<% zhp6Qv^&<*sPifD9m!H+1<985zJ2dU)%25a3|Lw|)GL{!*nbb3p zB-XMPF{X>fc=t*pUYkFYWzt}Qja6V3fUd<>F|;Pt0)|%DvPiMoS?!>?b+kG{I`pu5 zKy&M5^@8ShfpsC}Sr=OuQ@+*D>PH3E0Bax>TIE(bpjBEnl~^OKQQTIEcY7%dUiW>w zc<~N1xl&V{fZyTvY`rTrHTO2b%0LOhnz2p%W>* zJLywrs54O0%*-BT{%OZIH_N_4(%n~($j7VPe%>t#@LG4SbH_!7^(;^2H`_Q;R@}XG z?q-TsJ}BbkLwIbGl-x|5EcX`gmyt2D-Mf?z0@~0fnwQYI0_s|I9oqA_`Z%zEMtug@ zA5!0=BA)d^Jm0D6C+goxQ;(^~$kfEkQNz5`Z)p>?8>m#fQJYHbw41eCsDrjjdxSb` zk7_@qvw5e#oA$i6jk>cQ*Ms%Ap4#uVLv)_@N9~VvKD4>x)LU!NQgjjT`uAl$?h^eR z{T#YX|E~UB8lYdK_oadQrTV2bn0HnxcxT0iG@49R`c!=?MfqevOutjVlj5x3C7|Ec zQw{XHhp1M6Sbvx<*H`P0&@lZ8eG}EOt~Z=@y%9#x2+~NS$S9&wtnrOzjqh64^~SQU zcO7)SAvBJ4z3W-mo4~r>M01KcjV75ln>W)`^9SY+=q7WHIftg1bIrLl-Mrhphh~@y z%mp;lTx2ey@9~*{TUZmkHCz}jq}#(OWZnziyFnmGy z0=heVN%#`FCp;)zK|cytg{$bte4b!Y_{#7|x;K1P_$pcwzAk(nEe+ogo=W$Jr-!H0 zL#Mope>(XedS>Jd9tBzU!Xm_bVG8)4_rfB1kvz(YoPiR&Bgb|D@t&~4d%`~64fgX6 za4zov=bd!tuA%7&6EchUiOIOaxRPXJ1gs-JTStMfIZA!hmfz=4&9wgj_48 z^Y~2D`Fy6SH=k*`fX_5tXj-O47n$dp=TaZDuX!n5%<}G1mT~?0eA8v-4dztXze27J zW?5DNS#~dMUUBZJiqAb&n+MHy&F6T2) z!@`@wn_v+?8-AAR_zcvQ;XjA}OvA&UhmX?;>l*7C8s+IBhcD9S3s!;t`Fy6Hc%5A0 zb#jGIrTO`cTMn;m=kdyRKA-$5gxs4;MaB|iDJ%wY>P^k?m9PWFe*QEU{_2zV`oFT) zro8?L^V?lwl!pEkN_niSQcB;)UXvw{z`Y&kbl^PdNp*62*Vu*361mo9g=Dh!Z@LfB zPqV-hRzfYO=i3`P?6%FDV;$!h^94D6&N;4amZK?s)@QA!X`aWk z*3>l5hAee8r}O{uq&l^>%(Jmso-tYSJkc!AwORXXQ`VL>&-II}ZEK$E$*iqw-uF*s zZC~?TPiJjm^IXqlX=6)1+MK1vmbsqIQe(?pTe4DByl<}^LhYO{6d>)ntaNReZ);Y{ zw#@f@R@z$pz9-)A<1$3svQoEYz8A96w`IN;vr^c~Xqda(57BU&T^c%Bb>R*douS`@(8#0TXK{C$Fch= z{m{J6zHOY-?qA*iD_S}_U2j2HyhIVB=^>%bH|ZyH@bHj z`4%HvC~xo`2$|pd;e#f&3E@ukFZq)!>L+!lg;J4tdn;UbI1A52w51!g%#PlVYz zvH12x8@3|a^2raGtq6s0Pvr6Ki6EZ>$>ZA-K|c9W#8yNxpZo~16`}I&35~4?olk)n z?!9brd*TeXBF^BmAo+ZIBA<2Fc6@uHJ==ZlGdHr6=t-f?N_=~w1M`cY&+7X5_Jq!s zMh;sVIc#a5lwj2ui)Mf|1JyFWHCoJZoj(mHfkZ(`4<=Ybq-=65mH+hQrCQk|9 zq3FnWD0IFyWh2b_D#3g1(f`JS@EYpKpXWd(54_mq{)-Qd+!)`Rde8kX$ry1@7Svynsuw-#r1Sk%k$$Qt z-h*GIAGo}8|6Aem5|Gi43oO9-X3wk&+Vi)S!d`1*sAfT3S0&?yf zNPOSG!BP4mC^VJd&(<7@bGty|HE@v%p69PZI5z+=e(_txh%XV1qm!Igr|!t3@|zMI z_ZwKBPk4SiapM@m={c+4h^e?weKJg)H$n;9vguhwjt7;q#Y@sEYI$N%oVbg!R6M=w z+`uc@zcp$}1zM$+_N`LOk6WP@sa0x`8xFJzkFELdQ%h>pX|DyNKlNjNcgtz7hi*=Z zRj$){zQw8Mfz;&FUXS2`X|2*b&$L4Cw3eiiG55B{=sf*B(>n8v=>PV~{?xehDOxAq z(Jl5z9Bv<%3V44@S2ujxM5a&MVtzx|&-*s#3Gjt1tC z$dl@m3-->|sJdM4sNo0BE($!2k)hTW$eBd}=O68f<%S>iry z8Fyk>mbl=K1@fYJe z<9p)!;%~>_jlUQFApTMO@A2aaN(2%G2{mCQ+9f(Cx+QuhE==@Gl*M}|?1cCm#^I5P zF^TbsDTx`0+Y+-A^AZbM`YyD^i3buZ5^ECc5*rgwC$=WGCw3)XPrR9UCvhg-kSY2LLbfkCB@cw^Yj++4ta2;DXiP-GJa~~Q{yw^ zv*L5&^WzKS_3>r#mGQOl`sz>O>*Jf^o8#M}vDmQqj`;5A()ix!miYeof%u{L;rNmG zC$8HaKNe3We2E|$WX7i^+9o;yo`5J|&QElWFHH1^*C%?%XC``7e^UKP?0vun^h95{ z0Svt~Q4!zkz)r*f{cs`%H#B}EA-+JD{`O;Q(CBI2RV?7SkgD2&KK=jScGQ~RscAwT zn;@^c#Jb0N#rnkh$I4^XvD#Q&Y&8D&kByB@M0hH~Gh?&xzYcCrY<_GZ(niPXW6NB( zGPV{mb#DItND=VX1L7w9+Kjkfv2B&dtGc7qHpKL2*gKGWH{4W)E@0PYV2_666i+?e z#8#zFcfS4OQm5iQDu`~5Zj0`~vn{$C;l0uQ(F1sPXRylSuhtSU0jB(QS_QS=LO%D~ zuaA~IO1b>i*F65-YmmR|nlHXqA&ED<7f`8q|3$?u{786eqf??YqPIn7N9RQsL>EUN zh^~mPiLQ%oj6NOR8r>e<6@5MWX7rut!RY(=Ez&-WejGg-Jt5*_ax7QG$BJTltTfhv zf4?5xu{kb|X*|m3f^y6`)b z$HhCg-hs`trg(sF8uv8b(e1#!8Qu&Jvuyf5w?p6sSd{%cbKio5E?>=0?cn{D>#C1d zpNPuQT>EgesN$Zej{l|64ui)=y9^$SaQA4h%68E{(f-l$XmzwUS{EH1?NYwZaYz{( zofw@Oof(}KofDmpXJPfxXgwe<1I(4twbAue!>boZH$^vB4F}v(@N0+Y4nW@xw^u-C z&OHR!hZ(l(2Jep^89bI+^P6HI;gUf>c{xR$*3+tD0Ojt?HJ_VO4jOAFY~;|Myfa8a%aX>7acGFRxlXcwyCJ zRU4|FtlCobV%5&7JxDPfhm?I)Z&$rr^&awnRP}c}$E(IwQ`O|^K-IMBf@-zes@zq5 zTXnnY&Xv0W_nzu*Rg0>74xU;)8?K*#UR_pgS0}2rR1ag=t}8FA9yyrTs`-mztuFNj z9-)G&vMReOfyXYNkMJ;id08iWdHMVdR$sp-#_<`s0;(3@svkJ#Yd;0{4tuw~7td~c zKf;^r1NI?2`{|p1wdU)$uF2>hu!f)7K@;tMcA0J434545a!{!~#vX5vw5JTJw`bV5 zRgSY~+w<%N_ToVuY}J0iUSY4X*V!BGr|qq-TWoK)ciFEGT5G>qzQumWJ~(gy;0cHV zX2O0Sa6bgZk5SKu_7u3IgX#tJO0&|mPYCE`3#w#5XV_J_fWEORw<@>n9sB8ly{YkQ zG_5WnFPqIzZRLlR9}n12d9?C`E!(+vk*(XMc5X#)yMx_jP)XSlySv@1a-H30z|hNP z+5PQuySi+aU2E6bqg~g>9&1mur`j_I1?^e(9DBaKaKHwJ2$+Cc54g(!d1d9t_OgoJ zfV>v40o`5?w@E-Bc+lPq=nm{{fL;rK8+fa3z%Abi12bt+Xk0Z?ylThHoS6wW$xg%l?N&hUDl)WaAj%b!pb9+6DvQdJXV=>-67j& z2kjEuwAhgNQ6IBC?p}Fqp$~B4QRX zVq`JPixH8R$H`&_Sws#8vk_x9VtDu7>TUv#JL7vE&pYQmzVrLl<*TZ%tG>G5z4xp8 zr5jqFHXJin>hhrFUC=hR-d@{otijf6MvZ!Co!T~(W{Fkm8)*6Zp>kS(Wn+#hGRj{pni8mPa>kJly{lzJLjlpXO>n<5$hLoYh z&}Ha0^wgN;V(`|~)JQe%nofOD&2B?q%~`$jrK{8;wpnZ#fOZc+i-U%I*w#Wh`Vx<= z8;(O6rq)LcqtH6F{c=9E-u-euY{v{e+FR_8{T-)2yWf6coRF*EsoxD{r+!b>8QAu} za96*x&itap9{*m`ANOlOPku^}kgMN_V^(Xgi&i;7s?^tL?Ju6Kt@zV?O64LUS2wG> z4P{n$Pd5+SMcuM)Ry$gKDQgqoqn;W?xJq@ zi&u1K0q3E;aa~e3@p6G~O1DmT?ZuRCMt4(p=fzZ2uqycS4ru$nZny3MlqGDvu20YO zsuwD1C2Tueg8E$DZuWoj8%%%pIdH{8hz7_0?=DRKU8g@w@4REa2*05)SJB6Q3bFDq zU<5D<7z11cT+VK}x+}1qPS@Q4+ydMM%mEev4`Bv`Ga)rV11JC#0!jd-fO5cQ#%n7G zOj}LhwOW7)WC7p=_yG|>9Iy?r9k4_GY%N5D5SudX-WA(@EB=2l-PU2iF~AAHsTJEZ zfOCKg>2q*<2`~w`3b+oK&2D+^Z33sg2bc$WFHiyE(%Bj+5w#am`&jbq|e&}az9`{;1B?2 zqq-r$$@KoyX`F?+^MG;2=_UYE*?VESYk(QRO~4(%eZT|2lHBk3l{9))dYk)rn>ill zZ=?gb#pCfj{C{@;T0k)XY^r_(U=v{L<1IcveHEZ~#nuS0tk~KC-t3mwhvnbiit>w19u>IQ%h)C~gTSx4)R*A3T=)Q#4S z)m^N+Tz92zy6#5ZE#kXbd9H2&823KZ@>;c4qb<-D0?V?@CE8MLxpuR*g1GuAS*tYx z3tA_zUmF3AYqtU8S=+Tcw7ayu+P&I++JoA|+GD^cw5Nd2XwLy-{RQnM?WFdq_PTZ! z__p?*c3!)vUDhdet0DeGUPD|2yiQjJyivCWxKdXGtk;==C7l~M2xmpXgMlwZKPdY@ zVDwHy;O~y;PV3I<&V!em01r2=y9WMk96a24-5v03=hN~y`Zx4!`dod!eyzSZ%j@+U zfU$m4nz!n!^tE~;>_xU@WxL)B9M;ExQ~D0zE`4{Fd-Q$!0sR5}p#G@-xPDkaq94_d z0b^?y(>0g%SAeI9vF3*Umj14OPQRe7Ca%cJSpU$#8`K7kp#Wlj4ap^jQbRe&n++9) zYJ(QydSoFhI}Lu|2+R(u4ciRcfp;#XPq^kH;x-8j8n#I#u?*H;~nFD;{)T8@qRt4S0R&}Tb~d0u(y6~ zeR2JI-BA68`c1&HEc4d-s`UE~WcyRHw>}IUt4{%U)OP`Q*Y^P9S$*{b^#|$)>yOqS zuOF@-sUHO%tG@_*x&8_;)=$^psJ~Tzw|=gE0r;VbH>ph;Q-P__RAMS6E+^g$Tw$sP z)|yPfg2@T&H${NsrftA@)^^hl(=Jo5X|HLY>7ePb=@{?{(<###;&Y}8rc3GfX!r*0 zHZa!QOV`UhZ(1}hHz*rcH{>;}X(-C_x`r}ftlyaCEe(|oH4XX(GqIGF-3>wDXhRaX zy`d9$XTxsbJq`W9`x_259BCM8IN5Nz;cUbChVh1phACid?OM8Krr~CmvF1+0{e}k( zOJ;a>$YxbW#(HzEIp4e%@5@|lnk4z2dA)f9$eYYt%~j@Fn1Ui(va;Rm1rD2Iz$tSF zaF@9oxX0WFJYYTmjOPxTkD8C0hs`7AQS+GjqWN-`uMkh0Z?iZc7k2YDogaS(bK7r)8&Qw`Gr|-?HCw$Z`aD$Z`_+wB;=DdCNHP zgk=goAknCcdc{Q1?xjgk-!V;ENg@UOA*!!g+hr?DwL#e!a3oBa7j1^@+8Pth3l3jVOF?Jd=D6EthK_t zuqZ5xN^$j*jP+ukxJE1z*NJ7~MpL1KAxIHUt#7;|* zxKrE>yhrQ@-Y*^kJ|YgmGs+WBf(+*hh2m-PtbR^BFOG{7;*@wT%QM6`#XG?F#RuY& z#3U8!fRsy|pJ8dOR1Cac+8}L`wn|l5u9b|y7RfGorEp!X6q8a?N1YL41F1{umU^`1 zQlB&cd_WqMJ-Bof{h)MQ8kR=%wbH0Grmsa$Bwds)OIOe4srhx+UF}=8Q^d zL3(IZ;&`#~HnmODSdDsXE3g&XO0>PUQd>FjW?O}W*^eH_w%4}LcF-6^%WpeuJ7zmkUu!#MI|F>qc0sn! zwo7Q`Y?HRDw(AWwwprWlh8napwtKdD+alT=+p@U|Es0%eUv1Ab&)L`5i_CMV`PN$d zI(wOE*}l=f#a?NzvFq(-yJUA8H1?o9S~qP^+S|1nd#8P;eK#Dl2af5t@3$YaAF&T< z=j|u$ryDQX&)U!1$L$mLDeXb~HT#U=l>Mgtj{UyBMhuz_IgdH(Q%F!Xm8;&kVx1-0==NNDtpopTxG3YqzIPMsBjNnsn zj5@|-&+fSBxD0&7F%5jfam#VnG3QutJaqC-wNv9Pa27gCoTbik=VoVxv)ZY3nw)~u zDaW+B)lR=oDaTH7l;VsyMy5a2Ar<{A8z0Q5kgU-XwW6l%K zQ_eHabMPq_=*UaXNqxTas`I*Y)_L1`59E30Vx7^s>{8YlU8`Mr+H%(#R}t_!7sQjU zjV_2MU6rmHeXUFHG6PF4w|>MGbVY%au69GUtJAd;c(-eht6zKFwO@PHb;xzZHRL+! zIt{T0^nmNE>%41R*6qf9t_jx^v~$fhV^q3sy6(8{yB@ff+{~@Q2-BVG&UdeM7rWQH zH{iH%Z*p&SS3#5xvAVmK*yy&n?Q#U}_7aDQW5g-^6nBTr?k;z?w%6U`?$aK054aC# zuet}dlkTJLy)6aRf zdv*XrJ=E;-^vY4IXD{(S;)6KqJ%@>p$?Q4dIb}HIIb%5CIp?|Hxn#KDnKYd9T=iV{ z%o^rBw>=PrdgeWgMx|%jtAyARo{M)iah`XLx5#c5E_l~jio9jsjovNZN^gx<4>e|} zk-YH4y+Lo(oAkDOJH0!-yS;m?gWi7ce(xdg5$}-qB-Eewp7oyhj(aD(Q{HP})yaB$ zXS_GPcf9w#55yXYd6#@lwuC;FFV~mvTk9+Kt@my4ZSrmPRrzXtMxO;^04wv^!3)S< zhU~WNqkUdqxbYZzTVKqVLT~Hq@O4=S(Q5g+g>AkbU$?K%H{d(q8}uEucKD9_hJEX0 zzbx>+5lfM8)HjB)gzsWwwePa;3i@>4G_-ufcPry$~U4F2W{=NQv z{)7I*{$u_V-WmTX{~7-|{{{ag|D^w_|GIzHf7^c#`^`U3?JQcm{L9vkfHJV!couDV zATJ&500-6(7iFTF#^t~|;xgim#9Qzh0+qxy0e!%1+3nvIkSzNHZj43(LCciQABYB$ zvK(k9?nGY@*ojs*ursh5dl;jOz#ig$j2vv$f&GC)wrZ3EM~HU@h5{$8bAi)=vk*~0 zToE`Q7!ORyQH0Hp9GD{R6!-vW2<)8!z8Sb9dwlRdz=8XL2Z1Ho%LiFd70eZugZaU= z!ZOxFO>uC&?B#8vJ-L9E=51;!?09 z*d;DmP6xZeJ8up4fG>tV5B3ocP(Mrs4+ICrn&8pkad9Fz92~Lim!r_&XmHF_7`zy~ z9J~^o4&Dgf3f>LQ1s8%3Lwrab(u4{^g`tvAsctA#9@>n@gero!Le(K{{b0xx5<Ivt(9kaO~p;?fj2a5GMzzR-?X)%y{W2cYhz(kZIiLd z(qs=0HF=xD;j{H4bY!e4)zs0{)zsb8BhRyp(WX9QuxX&_0K^i;rosB!rlU>A>uce2 zhfRe|BTb{3Pj4D)x(M?rnk6+|Zo1Mip1z*xrW?Swn(j8u)z3A-yt3(Ggpa5rnn;1R z7keO57%7RA%CSm)U!**;xuGUf5vexMMYIu9M2I*e{zycg4@TmVZISJf9g$s;UW{`h zdn5ZI2f?qw%sO(I_*mpbIBJ+{Ow7iVmti*V!d9~b=ID2i*BVL17)m+rPuDPtah~$mLTbe7IYfQ_{ z`ew6f8RxLgQnR}`D9>1%qr^$#_J*0}&W7>koz1&5jB`wz7P)y(bAR)Gd0yFksQF0q zP{UI5$>!6*P!Ba{o6pO0%;qBEjl^3pifbMxo*Tg6hMYl$)qP3>Us4;3Wk45cKuX!#S zj>e3^Xe!##SnX|(c3Fy|-O(PQ+A`623go`%fG#9k zV{A*TGFB7Q$IQT6Vp7Z<3&x_cByoGJGqy9f8+Z?Je{8?2ANY`~A8l#uNNgx}GIlz4 zHg-NX9-D|w#jeF>VmD)VV)tVYVoPX6TUo2BHMcdtb!}_0xvF)2>xR}%ty^2G^mDDX ztwwRezpK>}Qn%V$y{%!{Lm{`ur1k#N)|9lqwWGC5j@;bEt=+9Xt$nQntp{2MTaUIL z*Kchd*4MUk$>Yi457OQe~-)sV%9>R82~sGA9nDBYDcolmK|5iSiy_L`7pJ8v~pX1lC zxATSkJJ>V)I{rQES-y;aKYNb>@ds%7rD zYL#j|cUAR1)%&;ys`slla=%u6L{-WCR#l~{;U1~#R9arC(yJPImCCB}^BPr?D#{nB zVyX_lMD=Sw+*inrywnlr3;Hs@5%h~oL2Z|0m; z6y=QOd{>ow`(gT47iJ>Mie9yr%iB=C{f` z#<4CoM0xlPEQhi8Y5@XZZyLL@a;doR(H`-k#7pYRa!Df{nv~G zeP05tf5bgv3efR9rqp~?^HH`+^P=WO`WDl7^2hjNjPbAXuQHDRI{$UX^QZWKfTO?7 zf19o1$NBHWNckcEL#F0`#QzA!%a8f1Y&Ad4PqSS9r~FT0)V#*O#x(pb{$FA2{F47A zjGYDkH!NSlDL7W3fPd^c#VUo0tx@DEa@ktN8pRs+7mBrtwX9I_4n+}rhoVeT#@?y; zpkgyCR#YjfSc#%qQO(}1s8PJk*5Q+9>vO-F`&IT{xT9Py|MB_;G0Jv|Q86jDi=FsJ z5Z?#l98Wv~JS3h(`vS7Qx734g_@o2bGAJFTw|kGv2)^Z`_j^4n$`xrE_=fzJPg=;9hw@uKDh1L3TOri- z*h)YuwUvXk*;WB<_SmZN9iMay*G;@7m9{BLuqn@^zxxos4FSKmPsvKz``9X04kd^E zB@{KQgz^^p9);Dc8p>Olmg%53@jD|0RY!~PMQq01AjQbd~a-ZZnnZSLT+s!1fti8+)w)Jz&!+oCH%Y57b zH^2hiA?^?hf}K6fLfj}f%9^<`?z=3?UF80W#ke1EKVWh0pSjB{LHgSUHlmEB)Em?r z*vHfx)f-v6`h)7t?BnV!>MiUO>PmGb+pexwSF=y5Usk`&I@M-%Bl|0LSlz@vrEXR? zv%iLCdmru|b$=^7JtlYoVL%L!0(7L&l}0z9hxvrC5ED{DhtMT-3q3-gFd!Td28E-- zabZ{(fiemh6D|sug)73ea6`By+!f}81>vE{i)v9L7KnvniC8L@i<`v?v0BuMCMbgF z6#ZgEjEmdE?cxq`m)Hx(?k!jl_lXC^!{RaV1jwhvGvYb%0!Wv{N%5+9U7QtfL%Ao; zi;Lp2xGX89)ddSup0q|PlGaIOP&P_i-W9~ZX;>9ml>k>6&;;o(rR)D>dfo`-?WFB_ zr0vg;wm(bSUO?LZ9BKO+()P8a?S-W6?;ve2B5f}wZGRVOdkJa#I@0#{khZTUZGSIm zdnsxA`$*g0&n`pRKsx`QNaxE*=Qonh|0U`CCery2lFomKbpFGj^E}(iDLEzk2x)r- zX?rF2POcdAzl1AcFOs%bleWJ^+Fk?N9$+tXAuhyfNzdy@&$XoII?{7J==rCak^2ny z8CK7I4)okadfq^K-pGA{`vQy$)OIUq`!`@*e3Sbo6G_`8(sny{8T8!AO>&dWrFN(t z%&qpSz09Lds1x`N*Ll10`slBL@b67I>q*9}rvYaH=KzlAY}qWE z43y2HB)V4s{~si88l^2l_N#9UJnpggeG*4jc;c0iz4BvVZtl%sg$I8+~up(x_Na1P7k;9cY#Po_-Vs1qou@aawd>SPX zSFA)iL>O-t6>=2vdk}0+$~QiRr~BF#w{3e<*lyc_SxU-Jz8-MjK{;bd@+sSyH-&Sy z3$Ld;)W#(MWjSrLZwj~N3}_b1c4Zc@-@Y19 z)Si6e{C0ci6Tp1x)7bMkw~Con+Mczaf4%GZLy+^Z_G`a~nRHI}X*{qmtpw(4Gq(K+ zTb#C)85`%wb>wGk5N38)+L5e|tbDHNXOih7%miny88gBwuL*65qe{Gn`sk>Y_0o|U zzc8*G7J%L1MSXXK0Wn7^qtA{G*uv+m%pzy<$LL=iBaTt5r+Wh*;TUsVL|@{#jCthW z&myC3$9v9XlhHn}%qWvB#x)br7nof}+v)T>sa?z`JGWyN+1cyd`(*Bz`WO0?d;;{L z^PF|sdBJ%J{g`u7IGpKc=T&EfvdYi~?Yr%~hq-0+I~hOw#52mYiOBerv@gKxLf?p4 zY0ODuMmqDH>3Dc{`0O!9jTvgpPh)nP>Ll>tH)No{M!&rh)Sm#C8~n1CR;~zHUu;SP z^atmv~v-?-9OuJn-^KbeLr34RcLA^JY>hpu*U z2tj&_e(t;s%)Fys%DHzJt;WdY-brV{ez;D{*>~DE;hLiDHP;Mg-f`Sw_8rF)-4|uy z^nHeLPqer`i=Vxu;(_`zYah0(znXyG}fZcRgIfmV} zZX;e}=6pIH#vA7S-ClRt9doDT+`hX9ubXTV+=IIhbNuc>I`55aQ2M&uM`b&nt-rM5 znDjN{y*_P^v-g9mQPTK>w(bwuQ}^-ob{OpEk;nKwJGs(UqJ5lsyly1}>O*V?kAZ#I z2ksI3A@`_z44<+4qWdyEw=3z@40qjgSdTUf*D~Nr28>@&@91G=|)&mz@f+?Yl-PF#sM6AR+CWn#LOaa{%&&t>p* zOqY()UN1KDEX#Pq*bHN@mGKwGTMd~#*U>y0;@A3R=Zf~!n?s?2!9 zbtYLE*PCS4oB(j_W!IkI*66cj#-PuRt4(m7iLWC&{$O1Bx_sSuJ-!}aAKENjZGyfh z1B@wg^$D&$!F%?N;F=TPMYLULKXBCv`jpJ^zG+;0f@@BE3%J?@^%2*b;A)f1Die$g z{1yIcYzO-Oa4XwGOj?u`A8ryumH1y$G%r37kj&PW#3K6FBBj#_JAD1+E2VaD_TC@pSn+tXlSAo#F4|~unul)#ipuG!r1-pYi z!9LVo>@Spq1Hl8R&!`K*L0tC{9L9dcZ@$v>Y6ckl|M{2yhw?A@k5kZZyYPR5%+Yt} z@zcZxs`)O-2Z?j2=CdTfo#bx%{VHo1W#Sj8<{QNF@6o&s`H_N z^7qQo-UGCkes$#$_2jaAZJo6DU+GimT*a4(d15OabDp?@YVt{@C&K-fWa?p_dYGdg zR&1l1e<7y3Q2dT$`ZZc!)&Y9@`~zb7_}7R9YG;Cu5vk@V$%DjiMOJR4`d?AaapLEx z<`rW32|B5rDcVb)$bXD#enE06$zLR<-r(u?*E#9|?j>TmXU~ z0n$D07V)p?tb25ptX;A`Z=$^?=z6G6c+wNThHB_~c)A{b4;}d%;*Zn)(iJLXO`v;J zsEK=sNn7~C#MI9UI#=<1VlAmpJV5*t;xEXpJ$eNE1Cl3+ zX?#5*{b%*`M0Sx(SNQ1kqlduXq?$^qp`K?SASRu8G)Hn5@#lz-b9uzMcy}cKkmO;K zhe`ep$)6&5JIUKgo+Wvf-TD9NKF?;!bfnsc##PzJ`mm#F>{ z$tOsTlKfecKTGo0h`;r}SUdZ8pQ`MSf6v+bobL?@-8>}`S5o9zBzg8d=X@V-(jAmW zo_@kDJyD2iBqSk&sXG`yl^T^KPsOA>rBWeDlFZEzNs`2M{q}nAZ%wae{`|ep%ldtO z>#V)@+H0@9_S*YQj;cpCT<@F(D>z)ykCf#1zi;cNwO1y=^25$ZeyXEZnl z{+PEffNNp%(kvCu3f`^|?2x-0yOx-E zmmUnYxt)xvs0S3gt$KyG^sX(RszXs~3x!IPAHKIFH#_BEzd5%u5PP zoH#S#*xm3m`l4oVZ9T(|px7xCt5x;qEo;GDg$7$i5Xs8ckaiE{E4|7}vJ1G(f?FGE z=YncbT?Ks`dLcG23QZNEi7Tz3tR_vxsBBEK+qy)EqB>RVDX#S%<5m5j)IW;Iacyi< zdnl@9MXV}vj=BiSj%n(&W9UCd?mCA51;-_Sx_?2kTee5T*VEwGS)gt-HQ_Ph(9lw9 z%hH?mwTAZgh8S8FTn5}6++1*Myz#eF+6+z;ofhgn1ishMrrvDuwcsJ(E5Q-1t}?Vy zR_BfA94l{;zeU#<>U9^%bMjr$m^aE$b&ej-y<}*vt}E2L&(Ni`nkU-Mg3qo&^CR$w zf}2@tW=#6J2b_R!@%i9R;OoHsjh?T>=Ck{}p4i`xFC-h$*8X#RrUFQ9iLbM`}-ud^R|55w;S-|A_J zo?nB!?f=gQ&Jk~Q2;cOaREOdkBRqKAH~(62)CQ)pIb-sL@Q`d#GREf~%cEO~}rmf)Wf zwC!RPC1~4)wh1y9dLDBzfwmK9JC3%06VG^m7V7>@-Z~XU-eK;Xik2gi&YzKA4;>As zBD1zzs8f+Kek{~r8RL4PMn_xnd@ixYc^q_6b+i&peLJgKyF-)QjuvN9%P=t!tr#Js$Q=ZmnU zh<;zf1Lf(r2oIFU14VeCJRT^*1Lg5R5%K&I9w?HPZm^+VYs+EQitu(HdVH21`$&&! zzHs6fdtf;DKYpM87r~YzJsQsard4w^R7OK{Y|bTqO5o&L4x^|FeSlHqlBpgb+7IFX z#dzruUOI&2d3fm%t9vo)v5tPv;J!lLI%uxr|H05kLe*+$OE`7>YvEiAZUAloE(0zD z?gj1zo=)58g7M5IZKB0{La4imzG?_oW8u_*(_KqOdR?g7-Ey$u2sShaKMwBhU&X3f zWN?0?%Pi>2n2R)R)3mKj+rH)m z;OL`ziC{*NBa+TUXpZGDmdVJ^hxUS#!>oNI)L@x4=FVW5wdWaW4pz-zq&Z@na{`)! zZLR1%qW4zxbu0ZI5E}c|a_I3ns7qY^gR!{0-HpfA!1;`}Yv8Xz{seZeArn3*)LFw$ zT9=qvh@b1?lm2+A8G8C#4qjS-p8hgMgJrz^vAmEOsm_cPB43F7qtFAiDx}o`S`~_C z3~m4}11@)e2-9ATv7aPtiOC+SmF}zi99}p>6@bx&mJnF!lmQ zT^~A$u@^A*NsPUKu}@;`1&nP!$C3ePg(7%VVJP4fyeOuoK7Q%eWpE8lyspozYuG zvryNOUKBoe9mZ0geiul;PI=2g{-_QG)8l+BIm{@=Fp9%?=CJpUQ2cgSTA8&FVuP&W zN9AqoOuV;bW1)s`&UdPvS8Co~!0PBAbLSmp9y>6ndEm9oX&!!Vh7EZ#YTZnzn@60K z;Q9SXens1QwA}%xp5>rHLkFU#9(o3%rydb95Kq=4LI&c=dT1VqC+iV)Q<=wl`l#GH z33cn0ejs-vtc4Hg_m7O|6KlD6(%5`L?yuz3qdsF*4&!aH#YAikD0Vt?c)OLibMWvS z-rmUDL5%tu*5*oVy9Svd;Ma)2+TP^`kJRQ&?XX`swfzxBewI+j?h(_y7Wnx#tP=V@ z{%L5b^)Q}kh-dQfOhY`Ahi4knV;-5|eWCg>@sCrCwv|HNGeW&}7Bd&bv)|ksV`MzW z<%xd7k^Hlhwqu1lJG1LfIIEz`u$u$=SsDNmln3 zSSAf7y9jpRt2Zo0+uuZw{z0hO#a=MDqEKfilIxMI&WhVb zUpwfl59j?dLY-Vr+?QHR^frgn3r_Rw$y!G7l2A2OsDI4j?CE%xM4eD|MfUvrf}CE3 zsup6K-;PnYK>kTa*Pf9+38y{U-lgC6r5jDZUt#|j*l+V{BmDC!ts22ug`P*?tgdx(>tg}OCLFOwOJr^y>>ve4DWlCwe$uEw2PHS*8Zy5TIoM(3%+Ae;D;cxMu6--uXS1NVs2y7UL4I|)>u>7-I zz-7t&LKPa;qhS};mSF7!G|WT81hV%8^i1F`Vjs6C6H0G{mSuMkZB6l5K9Wu8F$Rti zb+4m2mgO6H#jT*fjoXrO(xX3AdQ{_RwE!E|VZ#E;q3r@`s|VPdB|N_mnd=3+HzIkX zNIJ#5y-D6W#bnpw`0GSe6TvxUz!UJf^i=^#b`wMIf|g}p>k1XzhdZS>`&S=4(?{gh ze9;iU82jgQzc(LC5@@cBh0XEavv@BT+qgS*4#nk+vq-3Oh~5t|(tw((8}v4z?$zL2 z?v}46658m`4CfEtJk#m~aoZ5d+Eg!pVE?^NIDSuBU6-vqzZ9K9-Rh-z)VDv8^9^;{ zqoKdZJMGDA$NkoFPVr@>9H*CNw9R8(za!Wi3+ElHU-X!2@E+=&7)nL$&a*a1 z#*06Vo!g|kGv~+e|FeDoaHCR^qrMBio#`1TF zPt*f=W=H8Y@)rA>VSl0bJ*UX;1)r6(6Fe9fp`+v{b z{}efG4))CEJhgyRSRqk2n0|jgD>YT4Qn|-SvKD++lW#G3tUBxQe4EF}LjB?igMSvP zIR(4p(3vNvGvl#9R;5rAx0aTrdX%<@@#J}|zQfXsInBup@MA(_WCPWK-FXOAeFyg9 zqf&3H4z|L?pKGbW^F&_XMl5~88F?5!F9!dS+%gBwRycF$caHxQC!hMR2FMWPsOvsR2fNjST-~Zm77|a?8VM3#UAsi@a_|=MVaN z!S?JUzv`;H^gje6GtLWT2lS>0hLb^0fS$jAPjMUeqflp)aMTdq-tKiaZy(fe2}b{W z!ci{^$9WIVV&sR?*8U>1Q+o6|)8pM)J7pKXU+zVn8r+UvYB3td%3JpYoYly?$X_oU zl@RO?wboD?9yfb~ z;jD+ljhxT#gZi&xq4y*=ghS!{B)~A9dWAb~4&{wj*BlPF=C%?_D&)&TYehgYD)L3Gu zQ;Ouj;1|H31b+jw)Z={hjFSK|^cM z<5+sEL4K=2?rjQxFZ`zPyOMdnhJH+4HEN_V&i<`=RV^3KG|ET{mzFrWKXXL?kCjLU~-#1M)x*&z20l^28&CNv&TuLagMs8 zG4igR*_r!!;O4rjf-SumXUsr;LxeSWazT3t5T&CvqOqoS6I%z{O&xSC6+7 zgyYpaXI71xO>OW`sSOl&$Ywv{Oo|6SmibcOiHtWIneSxP>yh%-JLKIj7@3hoU-H^S3lzWoF+>i0wne!8OET_3``zd?dCcXC)&8FHnQLQUwvBblLc;GLx z_T3BV9>mTr%#^($+>M>@Vae|7IbB-)0c~yI z^dJW=m1t3KYqL|kUG!3^rsQw@Tz*k?J8Suilp^(+U^p|8`4SoK3DqC*z^AO-J&a{1 zW7$KEG0=?R82MX39| zP(NldZzJz@^LD;aH!79tOQWGX8n{1HtaN`Q-n%4Qk#1(!kbYm*o5)(51lwPT7|St6 zdYYVeD|iB}y3>lkIx`tebbe*M&A9TJi*00yr{HhH&f9pK$J^WRSRL}`M5@+?vVxqA zo@4-i<8Y?2%04IZnQOh8U$Wi+^ZTrc5PAC|cse5;1BVl_Q-Xz$%CA@IBKh@7t>HJF zceB3)jg-@?{+yah^5-mWyc-C0W@RH#PBiNo@As_7KlsgzRkOJ{e2H;YB0k4}@6uvh zl2P9UX9%43aE8E1+TWwIzY9yQy9(Te%&>*A``A+qe<$+Kd9s?$l95`#-yu2BJta|X zYJkz~`+{HP_t_1|Y>>YTRo$>3jysHB0e;|S>=`n~Gx1u~7=j!7hmCv_@a0rQ&vPG} zJIiUyWT!^NhRj_X?;M_&+4)+qYk$F9 z$gJ_pyHhGO*3@ld-cI)9T;E*ZE>WTX*U&A3-H)`?MN5UM<68daVTVY@4hVGy(<&j< zTPsxi&_-JB6B20aj-Ex5om4yYl!%PqgSOpmze3&J(9XKLcw6CfwadI61Repur%*Lq zN%Rg99NQ+;xr(>Dg?hQjjE62(PZ;W=p|f@vi||K5(dNyvJwo?FAA&Ab!{xUgp{jvU zcP6-&XR1fHSg>cACgvX&#bj_kD-O?T|!lN=wfVBt|L@( zLd*rbt}7g8EpO%T+nYK2`O9Rf0EfAg4Q=N>CL?m?Z)xTVc7AZ5kyYc$-)c^A#~M1p zHCfTADDuuM=+jUoREtJAiNBezm6+738>H}q$xp5f#<-$>qY%o=wqI_8|^#-U0$ z&Q)$>>@*{aoz}Vhuhq<3$AeCGTMEZrY4k^E$b;6);@xO7H1;Ff=0F>xKhOCmo-|%^ z&p2|IyA#gWXr3)Jc2wkJKZy;o*ts_RFCCf)_4^2wzf?DzCA_T)=Uq5If^P@^nYS;Q z==#iGD6~{6w9n$hNjN9roJ4bLG_*!rYexD8dfpIDyuILfdy9EHn74!Jr9Qa6 z#pvIT{_W`Bj(j&{x}pC``g#)iYv5ml7}6>U{da8G24kSPMM z1FvJ&Dlmf;;NJ=VPWbrN#kbmn@4+vDUxNPg(SJUeC{jd`z6kk?n9T&UnSj3u{wDBj z@NDpI@NRH1xR|z$Xxj+=XVHHanZ3yD1?PeDkimY1{c1FLG&01$CjNB`a0~QLM*n2^ z8os9O2ekbF`~&z05j3Ah^J#DjoC5a- z_eOp#@@vt#3!S^r*#wRi3#KME%o$z;}hfJc#Bpxfyvn%c; z_%DICfVY5&2W8`dI8?-;UlTiPf~$b5pp)44h;5Iy9&L3u*5H#^RRRC+W{p3N%;Pp@ z;1D6+WE&N5SW})|Q)C-$v&~cBr@+LCwsG<~`aj18GP7qh^Y_^QJ^Xvn%-reKv|Wv_ z2H~qgVDgD;^T_~Y29Rx->o{}m4`J*>(1}kxe4;+2uMfd*gWpEaE$F!gJP0jdBHh*#m*eoT@Lv$hkWQ?LBChfZz25_VjFtm=!xfp^O0}PcxmO)O7~?(4x@hq znm5oYmsYvR9KezT^iqemb>L%(#u9Y|%a6eC3coA-N8vvTzYP2`@L3Vcu81*quVgPS zW!5^{Ja6+kbOvo_Apb}3AJMZMJ0 zEX~Sti5!>6@&Ahczk=t1=OM$6@7f)oT}e&1S&n|2u*x<#$0Js+9* z;CI09fPJu!J!BO{R#A82$s})+aQgGMKbZYRvA=kA>9H<+@`EBjXuPVeS6hKw;mMou zpOjgQ;f}^^D7&>$2zSzoY-}-~b$8 zKedWSt>QfoejYprJO@3@rps))c+$m_>Kpp{MzC{KuyYg{GJ{KIaEUaRNYk&<@2g<; zAk7}6JAgZ&a~V39fj#j-QNyt+td(_El z6*^ae$ybU!R1u%b#^(guPN3~SY5Pz3=fOV@pInPiuB9#6SdontD^al$6~0o|SNGBO zK6H{5J)0H3g#RV-%&%g8wa5-Q2RF6K@qoOZGUz!?8 z+xm*JC_9#w@K=J#A=>5;>OF1iJx+73JO{J)*V#i~rV!wW&BpedEGaM2Q z4@ZP!!bif1;nZ+u_;ffwe9q9t;nHwr_8HEP8Ks#!<_tZPd{|;R*gFqIA?F`}B_bMuVbZ(cRJg(b#BwG&!0U%`&Jc zS`aOYUN&f1v^sh#+8Dk6>oY#u7VV1mM#a%L+2>evGAfByCta*Gb|EKkq+ipI8Kran zi!nEr{D1jALn>B2mN8F7zRysJ?=xJ;_ZcpV$@dv5#|FoS#Hz%GnWs8mX}FlLG+e@0 z8ZPB44Yl}6Lv6m&P=~KH)a5G;d3>eea=y|~pRY7r!B-j@@Rf!u`AWl8u`RLpV~t{4 z&C`UhG&JQa4bAvULkqsr&@%R~*r8af*kSXu;ad&Y@~wvJ_*TR9e5;{7-)iW4b*$hR6IzSWTATMa3`)sW^}4H@S_XI$(C z=V9mJSP#C{a1-BZDBxQSJ^4yQFXwep8K3dZk_!`yahQZ;aip59&j)q54OCGFHfcMQZH7V>k$7O@lDV4+?@?f_~;19NZDy z8;lAb3MK?of*Cw>g1N!M;Kg8xd6oxn25$$OgRQ}iV0W;O=Rj~II3Ap$eZQboXsE+7 z=KmfD4ulotkw|i{G?COW8%dSuUt-8Lvv#?OCy5?yW~8Z!Cy5@P=!u)y>0+WvzV30J ziI?6csw85{5;5iYqR06rYKEHFlK7}#qGP&=Fo}#xCN7>c5hgK_8++9}7ZDSciHRyE zBG#KomM?cyGg0uFi8+abi;068e68aW6ARzOYBKA!oC;>XYctcAF}rn`*Scn2`^NH^ z(aX(@4v*D$Mwl6Gz}Gjfbf%bDyoy)^O2aMD<-D= z$1@`_Cowm%F!5scSz`Xbm`oBEJ(`XLMg_VBy zR1IqeL&CbhJ`KXg@^stypQ8?2CTfLke|?Npa4_unyC-4(rNbUPz0K1%%O4aDGxTn& zo#*~=Y&ibEJd?v|;jFNTcsU*}V8rGL7lkhe`x6g^%fi*+Tffos?`Na&(68|nz8`K2 zcV(Zw*{4|IJN$;o-5eeZPf9F=B@#1{EAbKKgq5Q5jJ9Kx8&wyJ3^m%LT1MYCiIk`w z{-_aM6*U#FM%Re)F_!W zXl}B5vZtZ9CI=*kB!?$QB*(CpciSiVNOGc#HaRspGx>CKe)2i9TIDH|T%25*TuFqR zo|0>gzV)&ilUvNckCHo+dy@N;2a`vGWb#DNJ$YKR$jVE_QofDUux_ecs#2HgQ*Wg<218Qs zr?$~zS88vn*gW45%gAw4BrW+HT3SS~#y z>esC#XcR6pD{e%3PS8FaA8bj_O)m^4re8Giyf#>wUSjgZp7io)Li)|nc(7YZ`fZa( zw#d3pgz3$ppWd3@k=`902`8ubnM_uL)qX44X=VCA`hbx#66XKwo<0&hO|~O~vyzfy z%&a8BU}pMww~cA@K3!_Eg>U4B7z<8hv_S(hWil0l5t+)F8kx&7^)rn!&B=l@Gi@XT zX4+@E1}ihkOodE$o}MPpePnZ8vS;R2=}jUyQ#nzP86ep#Gr(vT$sw8HnGu;WK{8Xu zW-iHHW?j{b>SZdMl^+vJQsXm^WG0$tYG!8UX_KM$X69$+cavCYn|Us?SaMQgVPfi zs%3uN{08}r^IPV(&99l?F+Y)?&hL@mJHKz(H-C`%_nR-DMO)?DXTfFpeN*Fu_W4Em z9W(obF{uW;;d^L_t@1szR5}Xfi)e`>zx^g!Fed-lZ@-Hctd%dL<@b&zWWSD<8zg`8 zg|z%&%fG*nmf!n^d(B!~84i;#r5QTG#A$Ey@2&K{pnkaRhAAdD`rS(Y{q3}9N^s(a zCBdE>W+a!|Z>at91+{S64aQ2oqjtnd^Bpz$irR0!q1LE-^K^NtsfPc@-q%1ym0jtc zx>fu%O;?vRO=x1&rinw-wvAN8vX+c7@k_=w{ehY&5^-rqR|C5>sb@@CeX zHM44c`|P{V&pki;>~rqDaQ1(md7b(n=gjLBq6L1|a)^q`JmILEBeE-J(l5U<-#%-f ztt_l8d^zXk9Psli=Z9OuEtP)y>6JQUW%OG}KayE}R$pb5ezldgW%XtCl}qT?P}#tL z4?srseRK~Go^Ax5ZZtg6II-t-;fW>%Pcf<3^QPe`CLK>PkKxJXaXh(XV$bWwp7%-Y zc|CY$nSrO3r-Z|^+|!IU&EhA5MZ1J!(OwDtj6?Q1>enjjebo~bO5c(bwM+nCiPAZ&HIIqHs zgv*3$gd5Y~7UAwA;NGGG@6`zD3T{I76y!~VIa4tAx4=AtcNzo<_FFNvuisnoh=>qs z2#X&Aj$2AtruY?)i00{a2W2o;6V^T=+6fyOW&y%2Nl2vGrox?sJ%s&9_e#b=74|*? z`Uyv;(~n$Y>k{6!DyHQguTC+LJS z!a_n+t}l^VDMy=VjfgCfYe}Smu$-`x&_Y;4Shq;pc5d$m!lp&A&ylT!?Sx&6q%AI1 z{jgWs%g6y)ZiLpp2(5h)TIV9P#zpx4bl=E1)xU}Utdr|xB(a8#CG|6xbxD@NaS^Wb zu&mcx#%anJO{^!0aXKK^#t5x<5nAgaW0U!jewDngqV=wd*1alP^Qy8`nnOuAHVvk( z%a^9X@Z|dXz#5xCZW5-huZi{YcUdRv7d^NZHZ7W51C!Rfs$8j;RdnB~0>xARR23_o zCHqlDb*VB{T)7JA{#7wKZ>#F$eByRh(LJl^epO9sUL@v0!Y-06ty-n*+?L5XQq?N$ z7w4a_fm7$!lzBB_U)0<}88uS>tJ-88C-V2o`Bp{ib``DLRdkOkTA!MQUL%^=TO{(|m9`nzn`Iwx+oBap){%tHY49j} z!M;V4d%EO(T>{sVFlA3Su}Aw|_Ga@IJ-Gi0E}GbPJ+PK_sl49FK1s~2sM=RW8Hs(= z>dAQ(r9D-2nF3$aJd09%l;%vdU4d*ybjySK9^Iy}Q_im_?RBCwx1zMKiS|xE-`F-r zX)hC{y-bwmSoGB7e2bn<;s+ImBzW#cY5x+vuELv>b1+KtC`$7z8c&*k#81IQ&y9HSI?AfuBJUhb)mFfY(J{!%QYpDzM9tmYFg*3Y5lKWDD|8m>lPrkn<`=b|6M;b&T2_qqQqW>sG8luJ4E|l5>uE zu6K;qu^6piF;mt(M(bCsQjU3y>K3b$?TV2-iqX0iYmzo$!sgQW#nhS^W03yF)=PUA zqxl=#EOnRri}_fm(r40z7_BX_-LfCqKim@oPq6=m|9n^WALd<|tH^bNzo=@b#W#g8 zmlBo{RuGyAs|jn9!gdvIBy1sUBkUyXA?zm{B=i#c2}cP7gj0mGgh9d(;flg_!cD?$ z!YCmwgykY+5Ipi;Rt{k%AwMZBOuC*=@Dp@G8DU{k7$wvamZC1mQH{9N_}t65%Rgm@q=PLl_fqX=bMqXinK# zvR!sAVU`LDCc}yJMY2yhMEWdwJrQ3_C?%MLazZ5`HW_mN*mZ>Zq_C0DloYNav`&Ux zXSGINF5Y_2xfn(gLB`fV|{nLEur z=6>^_*=zQjN6i8AlzA2yG>33~#k_9bG;af=X54aF8D_-t7+qG5HPgxm3a$B;-_orz zYoQgjYON*afYo3vw^mv$)*5S_wZYnCZMC*ryR5y|0rR%iZ5^@>TgR*u)@kdUb-}u1 zUA2a-5yBnh8M8@a>`Xh$&b4RR1$L2L43yfYT@F;*F`&+_2O8}rdzH~=x2j*;1jl@v zyzrd9)d1O`g_rgj>tg-YGmV93!aj$iMR^+=kxIZW6x{tb#ibD7I z+}{(=yKlO0ilTqz(jCGpf|GxJlz&J(Y3zQk(%5Ho89hcH;fQhEIBA?IJZzjdE*h8l z%cG~3YL%AqSjuBUcwY6qN|M(+uL;fbDbJ^b)6?c@6E4qZJ)adRo~@o8B2_rFQ`%+h zz82E%OXNp|mIS_bOaaTCNnlxx5ix3v#l})&nX$rXHdep8%UEl)8yk%+#x`T8vB%hN z95i~3e&eVyV4O0}8iU4=amBc9+%#?*qek3xnHi?X%rR%0`DURxKM`;GP2DUr7n)JC z)?8vXn9I$TW{bI|Aje#1ZZJ2QTg~m}E_1JW!0a{;nTLr#W}Yxlo9E06<|Xs$i@G^% zj+l4MF-usfR;HC@0;|X>HWyl@mT8q+l~&BEv+AwJ7j>)2T4lA;bz8wDYrWNB zZMHhC9mY0m_jA40KCA0tVd6ekkJV=#F-}>>t&`Rn>%4W*x=c0MU|q9rShuXZ);(LB zyV_2lx7K#s*>;{i$DV8T+4F3#9klJ|dhH53V%OM<6aDv=ew^B$Dzq-!OYLREss6QB z*v(d#y*km4_FB8$-e_-0j87uo-e&LQ{+$T-*!%5+cJD-gPw98N-#%&&*rz7KvzBQO z+Cx+O-M(U9w{P0F?NK`(c7-!0`u!pO9rlEC!ZVHLiGEjMLOv1ZhYQ2=&5^{og#BUN zxE(Gdt#^9fe=AjUq0!Swb77h1V>A~&?pY~PJ*zyQ6d9gB zqS=w@dEN86c#`JGr-jG!8P8|L(=3SgsEkEA>-Gi?N2Vj<^lRCS$9y z-PrnUE^)g8{ehjvUc!MuztJ5!V;nLL6OIvgA}}uyFTEUy8>a(tz1KKrTre&f7YYZ6 zyQ&Ww!^Q~Vjy@O&>g~puDF~^0yP5f5J~PYAHD?hDh%3^ELb+yfD0g z6SDQ&!E=FrE62Z`YO7f%Dp8OiFwLb7{`RpyvmRkdF)felOb=DfR&Km2ip;;ZQ zJ=PV%b=Ds1rghsIwQfI~YsHO)59*TbvNLRtonaiZbL^S=pq+2l6EL=Rp*`RB+w+ZM zwjS7Nml-qdg~rU_7==;0)?Q*a*vms_?3H$ly~gaZ*U_l%wAa`h^!8Ajy@{}ud;Y1tx28}e2+avZJ!kE;-u$bK(7U5LZ!Qkm|W;iRHY2FIwhB~L}V0f0*AD&fuJfVZ( z0zy$j2gAjLQs^LGnS^q_3RfodGaL)oA+Da#n23}45H}hs3O9v{e(xGUf*w)hC9NWXV-)~xhKeaKIVD1r;w~?k!K$5s$TN=MViO(nBobK?Fo~`{C!Wk znBn=L=Y!%Y&r;7Hifqpo&ldcZ&_K&VltUg7xK-b2mAT;_rB{yUT3`94h=xNKRAZJExn^%+kyC zN<9`F(CZcyJ=3Sx>y3Jo?~uMKFRr)hZT@mB;$eBm9{JM|s(-pWdbS z=zaQ;K&F0NKk2)wpV80j7fZYK%lfrY;d9&aHd3A&`mI2lem78_w`W0zelHoE$Iy&) z!~M*);6@``FM4jHzs|@r<`{F0d4|^r8g}Utqk`*~sAHm@jND;;H`jN;=FkDIz4FrF>SsXdOTZvoEG zstMi^YDK7lZxfEqCtTR!koz;zLbu6RuKW>(ub212Wx1 zMyhmTq#naaWniQp$4F&9WbeuU&9im#Z|Jgb0}h3p;GW?A;K5)ouqW6bJQ^Gbo(i6& z>w(~4a45Jjcm?w7!JChS+rd!|Rc+owwOQn;q!vUx5#fZjN^aNu2@Q0rc5w?bFpf{5 zUD;^I)6|Y7Vk30K=6CejMM4A)NiZL*B*f(Pf|WwdU!&x8LcG{7GJ}P|`SdFc`hz-! zWx<8SM}v4yeVZC_A7`L0kN=jb9b%p6N1Yebu3Qw?Q2j0l@f6i9aEWl0FiiE!CQY-2 zn0Jq^N2Cn=9l{v3K@d_2)b3!G$O@bd3aQtMnV%pM_`q@o`go_yd9q`(*kX8kw2Jz z{RGeVdn-C9ZgW!J&MEnKOk0;nsps3Jo32d~y9xV}uDg=Lo+;q6`Upo&$8@361V zSMO`|HThN%>IkibHt_37-r?H}zSFk@;cmh{N?T817sYq^dVGDvAMqVW{7K&#iaYPS z=(|kU*9hkcHwd@D-zE7yzs9^j-S0-2O~|9P7b%?MyX>FipX;ATyw@K@yzQ^>UGzu% zHU7o^rT%4v2w?@G8T@LJul2Vx?<@9iglr388>OwGa3{s@^zZTSC;p(n7x4%E{S))6+8Yy!WPP*$>%!tJ=;Mc7L? zP|}U-LnVi)jAJDyfYXF?gbV7LaVd#EU2+v^jwN9@Dep+hoszMVJEelckaqffW=s2Ub5a)&|-G8wpzi+lb#uJY!p656Siu z4ib6^{e+`|fxs!kS&BbO=}rOLXm>h5`_lp1p$^a5J;e8f`fzDdT5$+N$hwkZ` ztc#vba1*pptDY_ItLG8s5VAu(`dodU?j-~XHes$_p-1!@eKBDvA)+tSS0HTGSL?iEh59+-L`&D=l@eigsit7P|Qyl7N34{6&@mGQ?^*x0B zge&@Wy%*t46<$aDgK2K#dO+b6hx#ZXt`7xQ8ZINl@DOqcE@P&VkFd~~Z}w7 zW}KLU(-XY|P1+}z zorE3cZo)pZi^3l8edZB_{KaCod4_NvUo0--o5d{(?}m1p_d>fZ%}Tf26lPm_z#MC? zHIG96o?u%Q2qP5MSc|Qt3d}Dftf07N!fNw|wU)qX+vPU|Ya3yw;u(7g`w0gX-%IEx z93>18PN{VKePGDCVqGWPB-~a!W0VlLuGlU@2Ejwu?G)w^W)kvcI`u_Bx6AB>c9c*{ zSVHgv4fb-ya~yvcSZS}Z*Ac$~@=f+uN>gh$`0DKKw6|)aeO4dsyZUGk)`$ICov+W{ zB}2sVzKzDs-b?ZWe9d10DCB+GdHXQ!rJDRf+FxBJT;qKf_E_`mV=_b>@6%{py=zE* zg0K0@0L9Tb+ZXV4VAviZ+#!q+E-?7bzv4%5ok_?F=i0;JS-38+uTq+#u;BKFi%DJ@ zHpAtFN`7Y_t`9d7&uH@Xg{ghvR{y+k8(}@gF*?GV!<}J%Uztu%h#B;Rn3*+Kq-4EF zPm6Q#wCJa&#Z*z7o1U95K9u`BJvp}G$#Ek;RnBOh(JaJ_*JivX99bWGIwUkcW=Z=A zJyrhL=@jfgkx#VVlaeC%9P(_UJ8>$z#uDx4kZ)zVE0t3^Tr6?yf_y2{l=Dno^C7v7 z6ZP+)oEFFnIG1yn`P5dVX#%Z8`6DRp&g4n#+GbaQ;FHfUbFCoxygE&tJwxFPcJWJ` z&>opQWnGLVozvb8+T^^A)9I-+?HnZgq#hW;x-HQ@NnL2Bcw*qIX>nw3R<=V~tfLu8^ zmAw#Y_JA&78gChAKjSsy?1SLwIG>!qgzKiAW8N_jd_7_-z`p_hB2unn-VsL$a}oR3 z$TfyE{}pKpApc9qOTiZ*HWmEG!G8fMb(HpZNO=RXZQw6}&ysb6*J`iv)X~9R)uG`Vo%RPO{wjBt~Q&OC0&&TTsG$!QRbKTi0x+ zj+Z%&vk1HkDNlp%jDM9h&?TUgkbgDaPZIQ@BNq}4^fKd%eJ%jei%t$FXjtUkiRem*zMlHJN$WMab_+3G(KE7IBMQSNZFQR*W=1 zL`{~;7RCRZN09GA<@0RKm*Wy2WM2t!_M1?NGAlV&9OU%WpKKX+U|gPNp4!g-1=>;b zHr(YgZi~Ym|1!=S+{fQskhc@P^iHmiV*n`^@Yp$a$=1c;%Rr4Om#`6ivI-I}`h0Hu zIO=(t$AWq7DEPUc>7d0->F!6tA4h3lgU^mZu0K<)9ds2WanMrKt%Ju%+k+lED%ca{ zuh3T`QbQoW26__XxEHy)QNt^sWlU3{bt%gsiF11A<#G1JF?0MT(SJsntK?l|t1*{H z)r{s|-3&gBX}X`&r$J9#n?TEXbW%$>HsvQ=hm@M}F^u@PK)W$dIqy4VO_-;C<6iyc z_|I{&?sm*y+!sAL6MD4~b*q464SH!adbJC@9=ExF;!Q}?gxC(WqJzu7{|iWl(W_iS zCTImqwCCZwEr=c-f#hU-EyjZVt=zy!T|xSE(09k*B$@(`b-SUr+3~-?y;`8RgX}2- zwH2u0yW;F8rD^=vTmstIi&j6zdgzE?Ml_)ZuJC;whcP+>oF*08NbNd~(4EaWZb+7eH4S0iF|rYRX*H|P7H)k}D!#2B}o zdZQctUyYR9mRS5Jpl{-iMbL0A=QHs?LtgIfR_+a=e}%l4k#`uH&-$Ooy+PUzZzhO+ zCH_nBEo?v0+XG0`gY^3)MW5`#jQ9#-zl;(>XwesOuaNXE%_TT7zK%mk|8122QPeqI zSs}KNI%di-PUD0oryPV0@gmJy_}^KBKA8zU$9k7Gq*ei*O>Sjl`_ZD)u*{r(JMMc2 z@`$p`QcDNXQl<51V z^bFFE@V!#@f_kB)d04p`IX&qWYkMm0GNgLu>lo$jSn0kF+r|B}1h#i0OUNd(oxcRy zhc@=W^326t+lur@WiH%hIqGu*qh>2xZK7xRE^nX@J7p_SPCCnJHRrJ&gQZ%?_f6f# ze@9HS_y<(y9|*E1&U?7aHCV6~%&7vD-_7@>S^R6v#3x~yv$0}kWBp}aJ%c&!#*A>m z(l#LdU0Cd`sL2pkRuKfr+jbq9w^iL^THyHmh)~3_YvG1bo?1i?DY0538 z&VH1z9rJe;BsThO6PHGo>Rae59^YZmt~do%96A3(?2#Vq2FW1b#Cd;gy4_+1x!ipgL~{4(%F zrT-Tpa1t*(zbKzCI15^Nc!5_J<a>9Gap%uD|)(kF}b#v(niDBmluf#Pgl z>0?EDS5badq%RfeMMe5DfX@`^Ek*iCQNB^6cLsUVK2C?09#52)6P(*DeVRx=Cenk6 z^j#vomMDKEsG;&v0#6FcF9}ZhmcB@&7ZT}zM0y^PK1YxcCBAw7L4A0N`Y2mE?S zj~>#ChxFeeJ$Fc-9nxEe^3Z{k{H0fpRCpgyo;air4tU>?emA7Y4e4t`df8C^HKb>a zVNiI}kbX3z2My&rLwe1SJ%HX*J~H4PL;1y!9xF*>CZy89p`UL?-kN- zMGD#?eN`wg6;c!6nL_%cklrYy9}4Ax0=_4d*9rKWke()_kBM}Qk@PDeJxWMl63UAN z{6|Qi5z<41^bH}sLP&oQ(i4RA0U^CVNWTx#c{xMnjF8?Cq#p$7 z0YRSVF3;zOzXR#%K>9dP-VHG8l}7{VNeW&Jl>Y*Zrt(=JeF;fl1E`7gO(4AzNPh&< z6M^zUAiWR3?|}3;AiWGI{{ql+=~F;@6HtBx;6Xt84p3eLaQ?sY6aXIq$~%CROOF7s z57G;OI{zOh|En|qLh>ZEkd5&FVF4AiT&!VetAki&PbOh z^UE{&)oJ`VhhLt+ug>0w#gOOitCRMnw+nd^zC2eSGgO|XFHg~z=jY3l^W~ZO^0a(; zPQE-LU!ILGPsLZ~;Y*u?Gw|i<_v+kxdE&k7Pn>cu&$pK++siZU)oJ$f9D7+SoLw(Z zt(WK3%aiKm8TImXdU-CrI+0$UMGxB|&!1N(&qG($Y4h@&dGww)WCy$#7W#Fn5mnX7~7OQ1fCozL|$Xb9<-qYkfveup(JMvQ- zz-NG80Db{@f9enOm(tg3MNP7q9DAUxRaru%@%Lo4} z$e}-J-vD0@eg$h<$`4u7WE)ejar*Qu=F>8e@*ZmILpdKonmoipbJBvKze20GBK9cw zQOM^)au)QTS(5%N$~19zIXecyYe@4sl>9C9(je}r;qG4q|83-DJt>xy>rZ`nh2`mP z@X#V9_o5Ds;6va)juN2%=})3Qps0^m)+cp4>NCLlfK~@ot9>Z>BS`ZK(tiG zuW5~VE|c%`7m6C<@2lrinoW*V;PLiSdkXXyc>24BCq5q}c%SL`b5Ojo7kJKf;4O{g zgOL1BP`qbwJPrO!pjnX20KWwkZ{Wwj2>Gu;@orP#{r31jK_V&M{fc4GS3$oHiZ2D@ zc*-7^UkC7|K>P_LKL>4;?*3*f(ty!d7j?}PlCp!kL&4uJk1D8_djBYR(d zA3-k()LHxyVsAnIGteJ`zJXemfyceX_aJ#cB=~k9{st2CjN{Kh9|x7=h!GSRL9GSy ze+2zFbPsjY@Fm58+B)6^30kM&n~vijKpRjFv{Af**f8XHFCjh;$up3A8B~6$fp%&C z8xp*G)DD8a8hP*#idJ^#_m- zf}&&%HFu!q4mpqT1)_FE;wzwk3kq8y#>dI33(Pfv zJ|CAofO+R=2R}z@={T*?$$@m0v7 zKUy6m@{XSdUk*8H=z#rk;tjKguOtrG9}O1Ig<3gf&)-4`(0Ktn;J|okxVz&I!G8$3 zyvQZ>8N3U6=#<|g<(M{t$G1Oc9#W=3(gF#zD~{G_(0?b!QF{UM+u-s2&V{x(UxK6x z61)RR=z^Vbz-DUrrmVptIsOhj z)-Tt$kREo!A+-W+bfGQIMUbG4De_A+=9-2caQ!JLY`YWVrJ+AlV3S?aZeYY+SPN4A z8zeu1ett=5*DcE1i?;VLugTf;eWVE~9pf}hl-B)U?!E^;s;o-$zE`h`G{$tORKg>v zsuZb8rBdX7y#zWWm4wC^BO)D{G)?!=h)74oh!_zOX_`i4Sq2elWDr>v=^msRMRJZAoljXmuM~5#w9uY0_ORu9H0((AkI|FQRR8zn zEb6aB>Kr)+$BBczt=sS5a|P7--07Lr3`>p^xw^-bBS+eAk6wEZz%k>rM;iZn@8TNj zLLxc07gHNSl-&wm9*9C*CnBx>V=R^G_Ip<`D z{doqCJqMo-jcst)}ID0!dqv~AK4f|em?$Yi39`9p;$b|hA`*{|w_y*_a1Z(Ky{G4EK^l_%`)2QLS>*E}f;0~>i zt6m>JtI*4}xQ}->!97MF+mPUnv2Q8cL!W-mF#Wu`KJI}NTn7`Jr4!uA^l@}0xYtT> z&z9iW>$mIS85KuJKgUl$_X+*nFZFZ0F|wf6&s~|ace$J!6P$k%+f@N{ zTf%GM)6vIz685#Twggw$1p7Z>UpsdK{Tyk)J<9cz-6H?>5czj0e?mEv`sc{;8V#=> z=#O2ArXClL{rPt(*K^0ap8Y2BpRfa!QcKC#%ZwqP7Smry$XxM4e+bs!IF`wk+LG^9 zx4OEyf5Yodb#BP}xT7X_o|TR-PEL%m-=c!4f|1$EM z)YnsUm73Qn@Ba!`oLVl2PI^{0?3!v4}ZjdK1|JfXuVGT zk6FV=^4F>Fpe7>y+}B6V`zaSgX2>TV^|5_z!kyiecSX9EQ+`TzcS-ABYGl2eV(eD( zuB&{SWu4p?eTwrJB3*fuU!=94@>yyolm9X0N9q4f@()rj64`fwo~6{Rq0cd%>j2L( zl$u}CCqnr->T~HopPCuu3+VYB@>!JMVEz9_pRv@uPMd~Yu~@p`jq(h z$!9_y$105-DfU5FVO`Q2j}J+s#w{}OG10g%_Vm%N4&m~533w(buToO)`UR~K$`>gy zC+L4{Oi=PU2l;GFn)7c&x-!Z4ie5gI1nYd6ZTePFFYl+^c6CzX8PpQaE#%WgI&ZRu z@3NHK->^%@QiJ2KsTsu@e3Ua~z2m26_w^m8HH&ANAXeipYx@c1*XXY(zei0aYtCS) zYH%-GZaCzVQS%a(+(%;yAS}!XC_$JD`lyVi{F@DK8Z3gGAU6h}v{4wQMD8EaekH|{I z_w`=lRcARjQ%cKU!oCT7$a~0J>G?0@-=`eImV0?Ef5O&g@#?~HTe_d{IpwQ z8)XsOIgf3S=l4zd8!Ya!jP^H;Ur-~TsCiRn1ASf}TjKf#<#x6zhjnU{ZpvjWRmpmC zS%(O&*1scT27Du19%sv+VLQ{<@;l@oqLk={QRC;bXO`ze%QCQ^ar~J* z1`IsQ+Vgm2j~P30hx7IduDQ~aS;-y3E0n9bpP9n_OanDs(_LSs{u%1qSX&uudrD;Q z|Ayw<@~)HqqU?gSZ%Zjid8VctxKr8*wq&~V$HEgo5bjz+YlPNM(z=1x&(VsJtPe9P zSf{LC!#KCRD9@BW%=Iyj0a@9{_Gj>ldO1S&Qh$wYYvFinrS+!pAsN(6;k7jKy1FTU zM)@RbILfj^SgJxaj?L8Hqh=oKpF$pCZBc5vWTZLeI|453)-m3Uf0E~ybNie92jp)u z&Y$s_YLL4xGdA{1@=Is0Ca`1ChdcX3Z+wqW;%7Mv86p!Uu)f#%mohsUx1~<$Nyp!F z=6qc|jZ7z2K=se^E{Qg`e+{|pk^b3`J@8)FGn`x2u$>K*Znoi<9I?xIUXD+xN4vqf z>jv+|4cGfA1Js-+A5VURJd4u9@gSecG!ie+`W@EnWX<)|ER&hpd6d`kG)oPk{sJX6 zPCggTS13Sn~~zs84Y|d|tTm zMd=@XFZ)L{UvTvD39_yKjctC0aw=PRky4)4zcFMsd)5JswzgkJW~;n_oA$hu-^1E&<5n`N zdg?eakTc5qAV^*H*vB#IdiS9IdsO}S@HRfY9|z-sev~Nae-QoX)xT)RT4SWOt+l&= zy|w#m4*`2?kJcV%eya8iu($R+qp$6)y(E##YVQEOc5Jn=9>9*N7Dz@_s~Q^z#sei| zTY*VHJ0Nj>tHyQ!oj@1uRbv+dOMvA(mPoZ#$yl|U$L;`jC%NDa-~ezq$-99Qz-izt za3Sd@5rQu>M({1ulaYnjC1VS(tJ?$Y10>F^=#N2f$GcrucaFFS{Yk?1 zw`~#^xCg#HC@04@t~1|4KhfXj@x6M59OY_v)%Dc7L|2~@ia_-!6ftyjoVu69>_O}z`4 zUOx+vIK2yyF9wzY5`kCr>!Dv?zo~vJ_;x@dAkV_F!1lLo`|A%Op9ier@ljwabyBzZ z9S1+f<1_W=kxRKtz*Qg_J2{@lxXBsd8Sz{oKpuu(VkiqA6OS_=546HpM?MMmN%8i& z0`TYRuhKpv-huksQAc|+&m}Km9q^5J;<$^x3*nnle-*kkz{7f=2VY|6Vv}~T>~iQ= z@mQiN7qG6RE*W1rBfcBh47>pxAU_O!cl-qT>G)aZ7l6y~-G=-c>~=)u%@R{t>W9 zG$NO}k~&=PMqo=rbKOqldw_kwLG+90kD-r_;hKBE&(-%fOo(^XZN#yF{z=;v^jQyl zdr(e}ZCqu(g?`YZ&*bwt0b zj{0;Q3+SJ;wP0-3z_$kFE7#p9{*4d5u3(#1E|&jRMechoOHz8F{r ztc>rbem(T-|s|(AF>om22q0$0Xn)Xi?V-f)}wy<~n}>~HmH zRMgG2(dd%AF&%I>nmG11h8m+l1>kS2X{=*DE}7TR{#KtB9#2eSGV`gx^u}3@bL!?d zv^CBH7N8&IH_UEa%zS<_pDp&c`k?>hc$p2kUgRqP_Fv;lU=6UId}re(=KGTQPTJq< zbCAbxt?wkz!}_*1ZU=TH^TF8r5Z!*@5c$!@|r;0NRs?i>;xo zslCDCaYu?bbu}#nmH^9vRlr(c1F*SiThk6;A+Q8k4y*#!0viC^zPss-rUOlff$pXg zO{bgAHeCQNH(hJG)pQp~j5Ed!9hW&SXPozUjAG!-ej$6dQrVx${*E%Tf0%tx)n^|W zeosAVK5ss+FT|@LOHE?D(x@7uRQZMb(8{m9Og^S44PU-yoDuud!A_zv}K8N^>3 zZsN41mMS?!W`UAwF6db*yY z2lS|J>2bY9Z_}UCXX$hGh59mmwZ1{$s_)d_&=2Xy^wauz{fd4|?{&BwnT}jX$Wi7P z;~3|d;AnSDcg%KlITkxsIMzBgIbL(@cI}=)6uNZF zZJCz;zBL!tvsO;|b_;V)>?G`_qPQl2*uyS`}7}Ri_+wZjG~A ztcl8q-Qsx+t6BU}pM!a=RbVw*Whg&JqV^5M?1RRR-Pd7tT3r&Y4`-Vbn-^PPb%D)@ zt&MH4W`j+MO|xcMbHFCX7RQ#Mq#hd|Yqh4uo&)n*Q?2RNEL_oySdG;hYXO^vHcYVE z#2T9rYl}@$de!Y%g`64Iw%C|hJT@L|B1)tEV98d2O+o3Ia$Y&7)D0#rw9ASOb%RZ( zeI8h+RU+*Nn~$qqWGw|-jC$v>Rb5t<#3clq3wz4C%vx!!0b6CQl{kc88?7zYYHJ8rF#{K5-Xm}M_oJ+TDXd6YgLyUnuO zV3iSe4z0GWQgh0B+BzJ&3MOaW7rS6vV|!xz?DI-|NS3{b^IiwLYTb;ji){qEhLS5| zYgqP5Y)Nc6Ta~(AY4Nq#ykyN6V;yYOB`MFc5^?b|*lpB%&pHQo$LfXs6xdxxV(eCW zwW@Z(dJs&nPP1-Wdu=P~-T`*6TA}VOV7(}P%36QF?5g`^mr7Zy7o`_~8P!9pGp$ar z^lEptY0U)lNX%MmDp+Q94$5Nfg)JRro53<*&&Bhj!zN{`#A@wC-4U_k?)StF~fiz$$1xfoB1C zAa*cz1nf}kXl!q6KUjC{K(!g$Ep3jSi*1R$CT))0jIEDtk}JY9y(+dAEG>2qcWH%O zFJ_L*vBlCGvAfo}SQq=sRjtS5nMq(y>5j=Wa}P7Y!B{)*g4@~~lV|1@=A0cddETyD znX#(a7~XwXED|e|QHPP?kA-AR#x}=1v0UEYbn8+qlXu?}Q!y7uj<@PgRWHYtzv^1m zEwIy8Kq8@n6b*JRtPo0FK3L|YP4&y-~UblcZbJuP`nsk$b~3Z!&% zh>5#x2F-}+X-cKY^X|r`?}bkGa3#UZgVI``kZLZugt+zg0O|jx47d zk(Hg5qr6$6tU~3_ie*_UkoCT-W>t{&bk-CV%36^11r^SEIcu4UWWAELUPZIMnYBrk zWc^vzc2$=3de%-=k@b_TBdRLvr&(_*E9;k8zfz;K{!i9#RINw%7^>bg#4|)SdY<$= zshT`bd!AO~JX1Z-swX{jJ#*FjJfHV`UNw7O_Png#@A-=7E2_n_)w5N7!1KE2b=B(m zf#(P6gPtFGexxRN{?c-dkaBtDs{5LbN~{u9*)Yn z(F2vUp@BzqFKe+XYaz=jEyzeE=FQ5?O03?M-pT?tu&~?x>0eh;x0>J z{i$rO#9Un2NFJ$dge+j4wd9S1d?t`jpuU+t%{*@-&)ZlTVW|R2FWZ(wX}4kM{dQXP zx5U4qi~bSuujrtE0sX!7_p;476$#32%EOcgMA}zcv72&-oTXwL&xKND_bN7k53N`$ zS6#8{A0N$dhYAflv_nq96dW5Pv?5IQ|YA> zN+**qfIxEa{e19tLKRVJ}ubyJU}~@<{ny z=F`e+IhHh2!&rL3{2|AZhVjGiE9mR=9r7!9TAO@wzHGzWLVpMQ+rja!{5AvIfE~c@ zcl6iF4*XUaZ)1BDa00H>v47Kj{ZN&oEY+sws?``N$J77ml2$d8wvD#iXf)soWQ=rXCN zbWU_xD#1!ybQaDw3sUSWk>|>h)LeQA))t(tCEAiK-GZ8MbiZsAW$~{pqx6jArRPPm zH*n@?dg+~LI^Nq$JA{7w+Y=~|4|b>{a}iTQnu*$Eu8hfRJDN1LN9l3#Dq+U1+3 zqb~j4puH=LJxdZJ9sund%EmK?7A?){hvA-LG{ahFJZ@aNy zyQUA-BYLwwQ(vlY(GTk9^m`7|QRA5GnCDpI*yT9xxN4*sVPib{ttWaedNI61QuIpn zdh~Ymp72pBjGc$A39phAy*EmYa*awCKFS@&&Les(dNSG*?vxb07riLn0$w8As?^%m|=CDgnRR$)g9_rrQ5daQ6$D&gujMtjH#3OB)eFuJpFaVoLqePl-p z7sI+Ux~Fh@DitT92CTv^7fy$DOZ2tEajAr|1*l8dio$WQu8VFg45d=>26_wIQHV7m zx;nb9(3MK8d84psW}yq#rO|1jE1@gV6(Wn5i#J(k2v05A*cdt%Iu>0jvbZBUH#(ne zQs@}0&C%JRouQr4xgv{O(XJ_Ew?f#xL>r?MLaRfoQ9opHNpwoImh5zBwUjNs9GVM{ zxsoE6imyctvOA%+f z+NK<#9Yo4*Jm9Z|cE1hYcnABV0QQ8)1)68gfIf1Y$D^?$T!~lo2h|yMOH0SA_C{?g zX7<(EcDx!tr`^#r^{75xpQbO+*Xld(qs1}9vB7#oef#tGw!Q+IluHO@)S+0JFo&CY$!Q_gEjFB(%6FB%`%9@t*gS~N*=JHb*{ zG_I&6ti5GSWkDA$*gyk~H}2jSZ5$eRzqq@*ySuwz+}+)wad&rjclYV<8+-31lbIir zoRjRzuBua6_2VR4)~eyCX*1QIF&w%0E@2?&kTWJNj4JX-^bo<9lX`O)m@Qm6jL&FPvDcEtKX_a+I;e#ihq9Wn0?WEb8n-K=yC2%X?@s#am= z8SQS$55S6d-Mmt2%OC#2@DwulVIWU=POuH6lYGAVqW;U-t=D?WDqnj2{!OLr%3Lqz zsw@+BsxSq~A^U2dk7}KvThGma2)zn>JH-Ylpp?tdDb*Fl`m7ElJYS9#~9nZ-U_8+oIVe=nnqQotcEpJ z4fc=OD?83063WAYb;8>~`wowDv{5mJD+1Lf zXmVFpSJpshFzf#SWKc8OzTD2%Fc8tG{2%yWtsf@uC*f)Sy;+#X_11D6w*ky9M?>n z=${FJmsIKor$EW(=G!-$!OETJ-w1(Mos|}#+|#7*b{^ zWGGZ?zL(}{5Qu+JphOe9#ebj^&Z3hRrJE=}tYi!C>&4u{gf)CId4U)V{p&Vf7kxiP zSJOLrF~e$dFtu%*cK*xl?oRbehfzlWPWz9Ja`21M-Pov^_rg2Xq9^_kT?$?I+uULC z&Ik4#)AeU~E4WqMmMy|{^^&z%5b_?yF)f>%ZCW*y&#;C2fG4C;mytJIn$_W-R+0~P z(J;^~ig}#;N$#&aK4C^5MXcm;4X6_`#N7nKoz^1oIcWST9MF{8w^EBrtbQNDC2p*M zYL6SARi|XgM@U2xd>A~q4EX!+ll%$oPETrTlO0p_F%-H$m+n=1dG1SE#DQlGk4j$_(r`(r2mzodS%~Ra+ z2oBI2#EE-|?R}$;$!*jNKU~JOD&H?>E*Z=VJSy6yAvf$?0;n9jwl?y@*9jeEWCt$8 zE|JRRF8@5iD~&!W?HQ-N5IhvTXfp|RY>hLAlp>|58Are7u=n1VBBj3=^GW@I8wX#o z#z-$bHLG8*P@GC-n3JDM&!1p8%{Q)OSN0l@Oh-7~J7l{PfAD|dsaNyrefNEdeBIah zxnDz$aLNmzM(N18H zGqX`LyFqB%tpCe%_=EVNY`#ST2?VQc) z!VlCwnS(SVT4zK&C&<}!LC)K){6kDo z2lz-tP8)!L=c{}qC<9x<-|sT*m%^R7ostE3*-I+EXg7&^I&5e!vO1O3GfU^`+ZGTJ z&qaK^QBA4SQ;!o56CqB>j|J4895Lz<$G?TvlK=;GM$ufHFBx z&8Ri88@$|(EX#v`a*EOGJcAM7&Em zo>lx{Rs4{e0sfi+0-6E6ngQW0Q0QMhRhdXpQV&=8@*kpma-|L^2OQ`+;dZ3jb$h~q zd~xydV5z~MFP+|B$$^}_<5!pYpnM4u5NF#fFO9rD6kD667fHWOti?ZiJhixJ!FO5@ zPHpH9Gyg5GBOCJ7S`KTm45Sa6gZ4c{m9H@r*4|e6PKT>!o~{TwJAUuMVFuP;ZISqX zd}~WjNDU(YlwXEwj9)%Fx}38LguYC_Oy5Nc6ug_SF(;|K1GWm|8H{Du^h$YOI=*;! zTTe^%9Kl{Td0RZkzD(XXzos%UDRhe70SDBeVu}NgFAP#BFUFe~5S&n7C_m3j6zN4m zB$Oco&NMkdJqEv7uw(VaS5HjUQ+ z2oe-IHV~>=c6CODt|9U8ff)x=4IG%m*=SbVh^Zf7`(E~B0b%#!IrCF`@b%P?xq9-G zc;5_k0%CG2bQxZwG?DETQHye<1$$W|@h7TDJ$sOZ`=F_t&gRR~fB}OKIecG54^{SQQ zgxWf1)Fs^Fp7#7O;Z=q}p2XU?AV!|dQhG8-2jr*yyKx7>Ls{c8zvrL70dko{*ipZ+ z348V7x+G+>R0dx@mZ|b7u>&e&>_lG!<|_(LvHfnt2>kHp&0aVcZK zue(@wv{BhyNh2|iu8zf%8^`Rk%_Hxxmaezky7Q^kyG@Jg#z!9Qt00S6KWZegKY??R zM};tJdU1P!t|-&JwDaN8q5gZnHZglB_p=2isS12E|Ba+zKme>TEU%CP>`mUBtDn(W z$;afEh}G)y`0Cpbx8YcK;>aQb6uY*ea-O!ov~FC{b{t*uo*jD*bJ22H7}C(_fY(1K z^S}R9Ljbpu5%I>N$gz!M2GSjh@SlU@7;JueWDv`XTweY3^7ec5D~0|oW&uTxx73&rPG(xbcGeN^k-Y^SRQ5GtiPZ1;3SmaGG ztu8PH|9+M1Mh#(rBO1NCP)RV_40%FWudkmyY{>FTi*OsHI#Yl$L-#Cu=d)jO8=)mG*L=f27)M6-_ zu6Aq@g07;cNB1a7!}YK`tfQKK*K=_${M`3AW>p*HXOx?571`46V(6R(5NX@&H&C8^ z3hA6LwFA}+DJ?uvwbzl&x5 z0)A7!E)d4kz?!@f^8yJXeBwRe?kfz8+XG5(k_3k-Ea&P z&sGXxRl^snyZ|FAL$sTpeZ=TKt~!94&i9xvk5Noe(Ih@Agod{_DeMyzYgmkGPCVLw zk0l9hfSp$ea0ffgBSHXFVs0zO-BX4B4y>(n;`BHNBMo%gBNs%28-#cadI)aJ+sCp; zSs}w9`A%TMN2laJerGNCo4?MYe@zdLes@b*(qa#L#7LK=p~q(b?i+HfSA<=PTen_u z&CXAmihyIVPV_s-CtAE%-aW%iE`g*k{l%qY3#J^gbb_iQT7X{zMM&#Lh&RA$;JMwHXy8KapA4ZJ1 zF{swp1dbfUz2+U#VL zX~88~M9_z>aS6}g9QU(@J*Np(?J4b1dR7^6j_)LkX>Y$J*FwGcVjClPuA=_N>_D(X zP3IK-0ch^wU_yD95re4xfKioL|CHFS_eH~v&baH&<@FoX<;pQK#&CW6C1syV?n^w1 zcVS6*s$S2{FWT4~tA6)QBBru${bYGke>*nuSNTLy!20cj>Ml(^oUN3nD&H@y$#3eF zsL#gt(#~L%ya=OnJfVe4{!9;ksk2aQLkItPl0E3@1c*BlUIIS^Hj3eMg0vt5xdtI) z6;ktm=7%BbO$B6pb50||^y*!+o#m7Zfe!2L+6{|~;aqjgd>&GSyB?^mIC;PiMF{gG z|E(6q+GAcpJy`$bKb#RLE$B|bjVjh2PML%(D#6EPNrmuYw#(c=7ro6NW&rssj50~Q zcO89s39wJG%T^vohvtWl>iq3>mHlQ}=y-{kFz$^~tKuN=Ad_SIs2`b%gtwLnQ~lbX zao2VGFZ;AV8!(Kmj$%hrFq%M>cKdp*e-UUIkfQ^Z7wjUkCzRo8CRBqFdbHqYP(F|{ zvdjp8{qbIc_X&$@A;>ws-=x_*L295g}L?JcUSE_aHyjB2j+zV=z}VDaL=UUtuqLOs}EkozX;m-{X#1NsrA=k9$ z{iLk?B5L2CD?USGCqgm!JPjHY7Jf!0=}bhDyhq(Lfn@k>nFGCw(}jJjj$jFX+@y-? zR~o5mLyw`hlZrtiP%eKbMGizU1DA%h=OOx+!ow9kj?TY;coKd5HH(K=i5jV#)m~FukAS^K28l z8lclJQ}hixEjN88A3|#bVqe)2OFJ-yGDl-M2#fF;M&c6u-J4k#iJJe)k#^TH>E{I4 zl>m|O#y9d9Ua5UVR^%|ef7}Oq!$5_1$e&tL^x&Hyu@N^_ z6xnRjW0G+#p#1|;Fv^jjW3z*=N{Hi~fT9rHih!$0csx1jN;F_BKlF?qO?9}BtJwYjFQ%nReOmL!Bvq}5PD378jh zC!K8$yoO)r7FJ}lrhltJuGgo@eH!Q(o6Th#H$TxV*l9Z^NKOt2A5|WXT%YSzx5#9d z_^q}D%)$zca@PhdeQb6d%@UQK3_`D~#6Fg~4J+SMC2!+*QK;xBb&hf@20T#_A!(R; z6_VgRN__};hMY&*dP)ik7Ws{p7%g4ss>GDfWo3 z$|CAsU$p^2ueaWtTg~1%TE!dw+2M}1Pw}_hR*1n7_{($ZEZff78vXR?qaliezmW7$ zA{f7tyhE67!Rg}cgflUOz!o1oR9Qt(Tx{viakOBMG9xjr2V%lziu8g`5G$y`S|wdV znZ;&(J6~Cc*aHrCkzvUeZq_VNM+d;fAW9Q7(`u6z>lC- zgP|5D9MXQ=cKvH>iM(7#_GsZx+KQA#gBtB#iYy{Fi&FCCLIeqtC0GaqiM5*|ccMf0 zvZG5C2M0v)1r)90mC~G&>Ee78SXiOsKNIf^84Q&|CDM#+2K;S_Wk{S+!Yu@w;4LLg{y7t zb}QBxOXXCQtea}5YD|-)7U3z`2M0w-DP7&|NS$Oha9cU-=9>ZCLUv6s&`02g6%<1b zE~%_N1S2V-w~Br8qd<1-euzMFuf2l;Wp0UnVh698n1+8flBpx6zAsrR7OeM)pDHW^ zLlS9_xt27KY8arn9*t8JLC=j_N-tT}f-tfUK#{NCOJK++{dx4I@&i;C===-1lQl2^n zl|t)wH=ZAQ88wUr7*!DK+~QrMViR%h+hzZvq%K1c9iwv)HsR-V_8iwjIe)fuRZ2rV zJEVEC;s}=+J7cdyvmu!gCEN`-kK*dTQH~NfO$D@kFBn%X@QVGaol7siMvLz{{mCQ= z)5`UaRiy3}(GTQZCU}ae2K1klR+b^JU}3h)Gh(9z+v1DPeY;{Zg4vy zae5lu4HJy2Pf!AHHEha|Fn%;)ViukY|EWM`p(8x|n8bMkzrj1{Qek49qryaS72L4^mDzPA zW#zdjZd4uGD)x$68e9-ZMv|+Dc~?f9iR}4#jV}>x>gkqK5G7BHc+tLFAe|Js0-BB6 zY7c-FV~ury!2P!mRrktG^K0F?9=o+UV2+O2&F&En?lsb9#Hro?=zv?JiD^=LGDehxOMJuo0Xw9adJH@Xz>zagP`DEF%`6L zNhMMH;yu)x-{QRze9N?SbrrF{$PShLG5Q!v+s3@DzN*CuJuyp6O?Vjr4i^e}6xxKa|M#!)4 zoLKN9=Bib_O@F%pF=-)*u+G%xN;s$$-QtOpPI8hbGxLGy=Beiv8>!s!ft3Y`6P@mj zbnW_4Mo>fA!IMtv}Oh1BL^YJG6XBA$Y1@ z0b86=QegD?kQ?F^={R?NRo{h)iw4p_8*|W>%^^JNWp{{PF{rzc7Y!O57=Zu7023NT zAN_8S9SM;~Dy84s=WBt2ii^)iO=Mj(GG!I0)i3o+-b^ywmIqwip8mxX#@-5VjNJ~= z=U+G4Y3wB8?%tly0>A8eml=DsLg3A5%?8^EZ<^t+FMA^FSd@daxzVUHrfhq8sq$Y# zc&=|O<95gG40#|L4C)vq^h$&^opesO$;QUXA@?ril{Dys=0wazjvpq+ylo3I1-w^! zd+Vx{oG@Fb!%qwK&{bI0?RLlR3T2hGZ^Eml**CqY12nx28b+jRAPEcO5NmvrE>_E{ zG3chx)hLH!nkU|?*1NWsp{I(Ohaf`6Lm!?F8WV~2xVT=~)*s2=4hC65biABPt?@|A zLsuNw$!;zM#?<NbQEF;`8`iuuABRFicx{|L2w*sjfWzbs zZiX$SG-TVCu$$90cA3-ZD8kLkpF#bW_%g*3dr&qW*8bt)(DCU<`QT6bJ)Tc)*YhnO;xv~}6=>+xXJUPu1Le({_XzvLAeuXy_i zD!OstD3Lm~j*oDI&G4G5C0Bnep2T&tn!4gN38epAJuLncG3)StOnO=ubfNPHQ;zkO z(<7|&W^BrA_&wM3F#&yKZ+~Z-QXUj~-~j(p9C7$ks1JslOFLC3^eN8{>8DtndnpbU zG6XORXHk&HNp7H$6TjrUZP?jt!=`gWTYIo5amXUTH^+ z&ODhT$u-66Zw*t*BV`S)A0tTQCUIK!sY3BF9*QU6PtN7IKg~nLXY;Uj#B?%!@R19l zkCEF|{t{FYTT~}DaC<>BZZMMC+X=XdvH8`~E*msx$3!OTtg!C0cmJ1VDC+8Qxt4ME z>-qa@S$S}thVsPvjuWc%!JDOgxl*w_QbyXHo$!GMZdvQ)J&xIDp%S9g6}QJ?$Qm^Xo^zUdLujm8zb#FehWo@uyPn1PYbeB` z!Iif(#ac1rlFxQKemZm--0s`64#6sGozAUCYD93cbu^Xt%f-bz>6F7$zZ#xPWlHXX zLAPZpF#)xSWF29lqP(W&Hj2I3o0y!wzwvHUmZojrFhSQtYeQw1@j;15z@P#R;u8Fo zaw=Ige$G|S1?NfQQ~QQYU6VF8o>p1`F8T2$mqxv%!>yWDb$-xX2<=$vg$o;%sGZA7 zC&QJ8(3h$;j@)&pA&X-tt>Bah(e*X$@fCZ}cfly&*Y8!vr7bzF@OQvXq<7@0p z$e%#y*w*CHcJrtb5kIABr#*iS;*&$P?zkYKpJwN^V_#RRWs8?hT z&dxGOInbJ0H`g@ayj?5sUMB{cx$)m3Kb^M?=S0&{&6c-_CHJ_YXa9~$^FpX!x`AC7 zT-5p?nO#(S)}2UAi!8wv?ugb_#LX=BGUA^tSLsC^MJit1gtg-F+Q?;GxR!0qI9y>G z8%tN3Bd7B2)J?!DubaPn`(&K_!`V4+V79Ve7VLi?z+OguMY~+NU1Zz#7_Y9ohO&2c zH1wT1TRhby^~%Hk=J<#D=CPVz#wnskChz%>Kpn|le^v>iYaVYS`rX~@ALaN{eHqWc zCN)TCWm5~5j}?VB7D8X98jqU-Vu3t;J9+Pol5)(R?aOR#=s?t67WA)NaBvY$#+ zyASrc!26O{6CD~CGTo#vD`(5)WxKj*b$-lD8t9ha=?q)O4TaPJ>340=iwQ7NNA zm#{(p{e3Q5ccSclWUzZf%n>ygz8S+SXSLJM>W+$zBCBuLCiJ$Jn)Z5kmGWr~d?#IM zBJ8XX^~&;WkeprTy2_FIy1MCzJ}G7kS#uq@G{26(No_JVQWFSM|LBY|U-M9h4Y_}~ zSxCZ(r#{%4l05v38AVVT%JejJCb<|JwQOD8M)0WXtUVhVZ*yTZxc_sa(&KrWC~WaC z!Gg{IPm)#1^^>5%A?YxUQiOa?)H|)l#pDT3%j9WqNJQpH$PQw6lCC`6!`t7``nEm= zeIdMU@*O55+2SKdpQ(fWFp06$+;knhp07t{h#R?D%X{9I23Xf>IH4zZG!; zdwER$VKICvB&{Uj&@2mabV<$U?H@agc3tBuCGaHKWz(A8wW_{}Z*%6LzZ1rOBDn=N{+jPGAtDiorQpn2NsUO$>-DZwim&~ z`<`*#XMM2wS4k7ayywsywi|12`1`H95_QL6i(y!+Cgavu%$c9&sueTjxQ-P6?Ce)~ z8Os4%tu_90Hk+A`IwWx`@F$b59!;m1>TYptz|>dk0;}Ls%FIO%ZymN2GTb+>$*_80h~p+5@vd9gnpP=(21KD&OP=-Fz-Mjf zo-*Wwh<|vCH>VHe$4FIj*o=>_626|8?-%zd*n>3AWq9ggxqhOXGMKnH`~IED+&R(7 z=8cQ(U-E`+X9OOA;cuv`~QIe9%U!-+st2Wgx zI~82(O;o^5ijp|FV;C~rjbO&j7c}NUAF||>yh39Vgp0UcQln|IxdRzpyuG+_oYds7+c@>JTj6O&JCrt-Wp39E$JBrFp3KPsO4BhUYdYwY% z8*U$=x*pD0JCvvF%CBZQGD}^UGp)EM2~+7M)7^Y5E)5er)Ta!JM%&khFntadlJIg8 zya=;T)HKW7UH&?BI7NHrCmDo|-pq8Zb+Bm=a6YM7c0A58zEy;~9Lsdmt;mhxuk+J8o7I@T_@=&XmgNF?S~KqChQ}ikY^Hvx{os!o zd@0KGk4!6sNW>IzV+@QF%=Di>ndaD-Y2$G}>%r2Pt}M!OCvnH%c7v|pU6HwTwBdOe zLvFxbVfHP^=%TN8df4ONqev(X5FT{Ywqj#S+ZnUn0QU@?3h0=#KK0{+myKX&bt2Xo zKAgE4t1Co027+tV2er*4m`)i4i$q=nMV8~6nc2Mr(4dFrW(#GP4 zcY93zYB4dXimM{V`wt&$YCSI7yYk$!_Dce}7#$m}B`jCB`)PKEMjpz^txSw&Q>C#5 zu!fG$)&tmL`={G>D{ELQIF`-WN?u9c^&y6wY(>@ig7GLijK&PLyOGUp@pdVr(t&o0 zOhLLkr*tX{o{=QE_;Qz<=L}lyj`SM<#Z)=#o5s<6va#LzlQg%29Gi*am!tw9L^=ky+@Y;2j@AliObdT ztTlPIcU5}MH(oTf>zlO(Z|kieYj}U|*S@?WmG#e>s*QGrGMG*`$J6GHF3$MXV5lbZ z-Uifj>$MM>8>?MO6P`K7$+|Kv^oOdvUmcYVyH|6Yo7~%@ss9mx@|sAyC``0oiHKn$ zzI7g{U(VOFOR&nM>|C7cx)|>tOw^N0kaGlni7k6KF*Dk~8pWiB*Ewud!6mGK)24Fs zDWD}c?D$xvy=vUL~**sCn8Wu(uh!Yg7+ng(PF zgC{idU)%Gf@1AzuujGlTRmLHmOJ}8yC)|wA$WL!HuXP9FNujz9z9;Mz^xTm^BOq%1 z*m;Xnq@W`<;mdpM;#$#$VVEJV^=8P0A@ix7uw)!axP03SDihDYY^5$-y?PuxXVh6% zUg1^xDp9gs`^bzs+sZa=yiF&(S#0HhRGn?x19U=b;mu0Y03QpQc}u7!k*hQ~sf6$D zUc#UtZ&)Cq7?WBDp3Xa!nK8fv$ zq+183rqHC{Hs&bp*F?Lv&3N=qz@+`^+Oyf{s&Q|7@)Vaii$pev^{)L+ks_8fqn*qY z2<2~Dhc5WrCiMnHC%m4bm)12nj~7V}6w>sx)%rTFXy(M(IF2VaGTBEO2<&7UzSc?o z=u+SAQi0x6-z@BLK5rwT>UO6qg#xH`K4PsmNeVRV%7-=seto&c4DLEh<;$)ucZyq9zOhMl876t=YO| zUV7D2wFED;AGmz>)qXFHe`jnT`(V7H`B$|X`|pwO;dpS-yeL0aorsyGqmezGh^3yRk&uyrjiC{pq>;6WqbU*NA7&mN1la#` zy=#V6?38sM5&Ym47v#{TEPs5JS|Nq}l9fE-TYjO(H*o3ThHoVIvCY|jWeLVG1%!_# z@2Qz>#goT;D=T)B&hL??%-+9v|iZVP*Tdh_|AK$LLa?=kPdB3cFAt)kGo* zRn&DJ?7&VAkPEyn@a0Y?VLtT*NzCSqk@5d}YMO~`on@VR_={Yq08MO*POK8OzjNg~cd!OKBa1G`rx0 zoWaC>Wb3e38}Avk2ghQBJ3NNzwCIX!MI&NKoy&#Z0Ai?%aHy+O1T5wmf18Y;{hC3oje0Vng5zCmW=99Q+iYB1m zpf<;xP?h>9ACr_he}I<{UBgi5%-I_1&16UDZ?iai{!863vvUIvPz~CZlpAs<)$ZAI zq`sW%GApuQ)gR3tO<24rrl}lzO|}{JyMZ1Ch>EA^B4h=FAW{-fJ&k3Je!X0<4iqmli{z3rf3 z$|XhKKX|4a^__0!+lN2uRXI=k?J8(1Z=~3xD*xk5X6a@2d;$cn3T`bVQC93(i^aoRm4f?+PRzE;P}=7ZbbD`}C6f)|Y;dl5F9P2aS?Dn9oZ^=Q7PzjDMvU`RY~`J*17sIK){6w5u%h zf^XsG!strjZZubUn$$(|m1ZRRysWV{4T88gs4c7qpH&wThE=E3Seri6CPP8F}+`b^z#l|~pUzyngN3d~j`_NSlb9CMcUizZ(=x4{~KPjX}}uqlyO zeWZSGegoZ--H7ww9Q>$q`uS)Fjk%qsr}9T$QK;scWz%>fs2(nub?6GVw3NLL=63%R7inD;X z&$iZ7keK-zF(K3ahnWTw_siAC%Fu`;r%+}CooN7p(1AO$)$}D8Ek8Tbfp`P|I^WDC~mRCNe+T`T$%yXm)*e9Eu>E7Sj80Y;54C%Tbows z|7)lJWr9zX85|`B){g>kGd{*?gyV0jer%;*qDLwGeZHmLY`pGZ1AGMZ;I`skw~I7S z?=qA4|KqX#_fLE+I`VSd*mR@w=6)}{B2RE7j0U%lOZCqo8R6eBqmRmCGDDHdJ!lBK z%&%+^yq|$0DR8=|?S3 zo*q^gd7;}b(>tr}btXGyo+?+GgH5+ixo=r(Z*ATua^skj4yR%B8+ICoF4XrZd>ksO zVrHvnn$MO_RnE%q9h{mwRiBO(uS)+Ra4bW<*}8(-=JH|v#5HhSeR{5D z=t!*KEQ|i^G*VzSc4L*{ElojAnes9H8o5i=Jg`t+qEY*6y5s+HXMQx9%58BNnRa?I zS0tUvRr+^)FH2Tkp38fEnIS(y1U(v0E;c(kooPLh`#dsN5DIA&hmD9SUuEjv+N@Ly zFViZ_jv?uI__}5~cAKbPeCd1P5hpCw`DO!bHRS|@y`hYnvtqB@%Bb8#_}Kke)zih2(uEceJKza1i&FMvbGgfwwThElW_y0f)UsuX~Y#f-0LK8>l(mKri9=k0h4B zSrTuHcz&#yoBF@b7ag?=e4X==srsS<5H`;!i@jTbeMgT7?VII_iMojmaBIa?ygsN% zg^jcfe<3k}Te|}#3g_W{aIkR`tGK!SUxTpMOL17sK8{TrDc#nYBaxci$PDU_YW9WQ z8dd@l$A8fp9}StpEmV*>yO8{-1Q&;7iAvfu>;2EA*qmniJ&(d__FJ;U4-nV{8|1Q1#`S^OE+25GXq_^o zJ*T_v*>@<)9oQU93{NF|)E%)Z@7qBe(TdiZKtEWltj%v(Z>+|=Dx{u=uo7dk0%*Es z6uA2=%4?MOUNZEaR2BYKF~aMl_9-ZkUB&n9UUXs>OE65DHuN^)V^;bfBtjPlFLS3U z(IvRRA$ouzMrY)f)NDIr3%iLr1g~=U0;%9lZbZcv$dF*&%`e5&3df3CNh>LhC@p=a z0p(pXhl)00<(e35(T`zM4x?3e4d=2h9^O*g<%mt<=8rDZ*%$jC^ZTmLGLaa>x6aXu zO72v~w4OM#ShQ}Ks2AEeHcxEkct(g^f>k#JZdUdcQEwWGrxaAQPgpK%Um!_Sh27WR zv~HA;3uQS6|N0qi)fcQ-MoA-YXLg78%HWh51s}_6YujM1$X+Nk(gTUdThB)S?qwJu zUO5(%3P0AHxGVe+E|4r49?+!ewm+gMaojFcnO0p{pQZ1p9XT-9OwnPmpg$DO&#!5l zkvqjUXW`J#~^tVM&;s>GHpd?Hyf}FvUrGdSf+LX&D^RVH1d71z+|MX z5M*2`Q;Wb*v_-Q0-wf*$dAXGHMjLvDR5zl3h~)nf6TMD5U@uZ3PYz*T@;F0Y+D|S1 z5b)j$4aGBag+(>l!R~$gwWUpy-Gy#ck3&7Wf65Qzs(gd2y@Ms9J;4CajTn9%%x-S~ zcPAR}WGd;?@)26xhd#?Qj>xwDAa6}|r&}Nrdylw$yR~Qx^mv$*1d|0yu7n zI3TboL8W7LIHl;1iY@W=2yD(RlYb0YCs_Hz)>u;peGlL7v%{52vuk&?U2SLC!%IhkzZZ=tXmaQSe1wt z-YHia!mW&ESI0Iry@2Fo{87wC+X<^FPd)<%vx?(7cPRhzbzLW(W@!Pul`RieN z8&kSp{i3JW+V&F+#ZbwKc1KaO>OS8lt!{$zMa?RMXScrP2b2k{V0td*l{4OprxvxO z0`bOb97m;bQ~l*Gq9kc5`q!e?Li+ZG<)Z}mNw6-bY9l&&ZbwP!FI7>}G%lSpGG|9? zoQIpUJwJ!rAW#!WabjFt$=N-MLh!DlnAfACh~vm@y7$mg+&~D$N$P+tSlyRy?5T8a zolYwJ{IrHz>3^zuQTceT0j8xW%g4tlJd}^8jMzc6F|3Pq3Q%Fn{I@Jtzib-dm z>f1L%3gqNcf{7bU_(+^67>djo%%e2sN`*el?s&|M*`p)yOz0j;i#yc4pli3@R7a$?@z4W&%?ea3eA2B1dsVVK<4iTfuLR z$9U4-=YlJuqM}8r8EoKwM&6sHI0waFs~tNkE)LV&!?y-hF|1Yq@^22ItTfk$)KkTk8PLktJfE+YmIHz9vfdB=q@O4 z@OQRnhU@%&vV=#l2WYZyuX0KV>tKIjQp{)>VPHi)`0Z8|K+uX1PTh&V*tIeFmxzXF zhD)^T7-*QR)*@3i~Ai#uRe-LX9X&KuMK6`7l^O5ha6C7$s0rSw9}- z00^Sok=vC7kWuX@>-(b=0znjV75%P&K}rVYFa}_xa-LY291x9)K|Tx_h(-}tEtG}Q z3f!k)(1b}wSpmW;?7{l_g?m7T?6m}H=kCZn^03JF0Y7`I9vuaoj=vg)l3FN91_5^ZO33CCtYJ@d|TouAt zL9SY1OCVRJFcr{pW&arPZ$WB4NL=zj(nyk^h$)qfu{D*9 zK9Mnz9-S%51ZRjj$qatZw1}igj>m$yB-X58iUXF}zqH$g2K?ZyBA?$)vQ?5@lQm!G! z>EwJ=DRr^50q`J0D+Nz7lidk6A#-gdB|bGig@ZvYmE)Bo8q!G@c<+P7bN1Y?q+5JAvucJ?SEK2*)ea}7TqWMIV+^NUW-H?6O zY5Ypg*`@y)e*#xD#lFu(>or>FrTGdyM=t5nYK=5Ebp9TCB3Oi!^1re64N;;rL6>dY zw)xt&ZQHhO+x^4fRreeAeHLrr$Mo9qLR!oPNI_H zUextALGu{8TXXW}a1;lb$I-)l0w@mW#Y+$`X>NzAfAe#HIq=@w;Uv(}#{o7EN6#A2 zoINRuGSecR9__}>0W2-_-Ef>=C>`v>@UV~}oN@x435aq#_l{}zEBHjcCA|f`^;neNFjHu>&IS7L_XEi7 z{fFKO{)+JmaRlFo4{!r~19&4k-Q0!z%e>6`T0N?{k(pr8!sfq{uNURKt_!LQ=vw$~ z@~Rt|W#)zW4tjc$pBt5B8~11=A974AohkQw7vTF&7Z6nnj68lo674aSHupO_hMySSWuLGu-YxfvccRPsLE}2CnT1(M z>k8{!x4muKrXsGgwsHl2Icn;Os8;u}1|4fj>-_4;Q#jkJ-fQf5$X1iv*I~f$#&7}Q zX1^S^A}FifN0fp<;_A zvvNyCV__>r1NGWMLupfqP0m4Xn#xUXUHh7D;=Xl8_;aud()Q&T@_N3?&~5D4@TIk? z{<{7)Zj8z?Zd>~hFNI}Zr`4m#OIS643m;uBn&+ zE%eIf+TKAwi<-NuZ2)zr*-apFjZbN_+EfQ|+f1s3V0$V0$@j0S-6_a=ZJ1$Fe_ z^M@V<3k5U-5DFmd)z?Re2ZaFv1Nh7Dmxn)#ehT~u#^|rhFGqlD55(ht&Cixc9S>Fn zqyqpxizyCx=?^Oomgq0*k9rE>41g5y%nyVBKL~i`uTB6~2QbHvf&eKDu*Z*q02K_- z$B&>7COx2KzX%A60HDz9FNXsN2iybP1K0!f1LOag80-67o(-2vV4+~M5O+`-(jRzW{;-J#r(Rw20ox#76MxS_a#xM3~< zG6Jr8e=AKdt3LHEa0USVEr30rJ$@bND$FMICu}QtD|Rb%D{?DvD{d=nE9xr9CeS9# zCdeklCV)1)Hn=vlHn29VHmEkFHlQ|~HkdUC9VjcnMSzsQ3qP?vgFRv$nmTY*$@v~E z(1V`-H}s@mz#ILaAJF5T-cR)OpZ|9-Y6xH%Ab^Em9~MC!807z}`+vrE($l|&p7aT5 zqyO^)dfeT+iJtxe^k492>EhO}HRWpw_Z6N^v!Q5dSXy(xsazPvn`thAdi~QX?T$Gt zRUX)PdDgQ%FeeY*oKpwD1=psagVgLU2b`U@^ArXeAX` z>>4OzN}-xtEpKt!nD5x4zRBAu>n4*}tw}wW8GJ}FeU4Sp3wIlTXfIXoZd|0z#Gl5> zD!(uWZ3leE`!Imm4WA8cNLeKqJ%))G`(O$fTEw z6W#5Yn{!7#a?J@)N4(NuWqDr2`3^soj<*8K0#_&Sqcebxr&xEVqzREPZSKe&ucoN- z)Uq9IW(d~#z+u}8=I6@Ic}cmkzh*>epYB$UDaUGU3jr|^3Sk6 z;jlhv&26A6&+#4C#i@C9fAfTu)uY}S#ct_#urqzHw^GNX($9Ht>a! z+F)^I^abCs4G)#yp~``SQDs6~ABBWV=Zo^YWjiF(tt!q@nWPIgbEOu{*1F%r+^US| zn`?W}^(sy#L~it;37L9I=~W?TWY??zX3ONY^%La_*93E@*z8oJN}hf;>Jh0+ zP_D3PgPk{QzVeZ?OW-D4ylmx^8V7!%@>q^CGW6i$X8_{_+k#5wYe#!4md!~lvQ&n) zrzvacK8iQdG#ql3-?mR@T{LYM`{UCnp6cuK3LENIme}Xe^I31Z2WRB&$Wj z2AAuktMv?wX6*&Q^6Z%fYK_my8-rywuEXTBM+L^`b~3)}~`SvOAcvIMVdgUaYvKr>^l$&2dOFd4GY!;FP>vWLBLyj5A3mndJ9; zd$sXi+sC(zC-BtgaPqx^+Q-|%L0+@|DRok?@jZD7 zGD_S>PBat}G)zL;PcjUI_tTGke*$+}v}-F_->F#)3YDI|F3`h9EBG52n306c^@x$E z$j;E7J%qA7jg*9dgoBMdM{9S#$8wi#94mWwmvwQA0aIFdq_#ligyT}e{4O8+0e3!s zf7AjIg>hy69MyE3deagj6ikf>ws%gUQXzsz9Agyh;Ov52-oVMD9euVhF2#d^h?lU+ zF@}gni6o7PVh}NlXV1OoK9tWzgo9WwL0WIynAy_&L0ETD(s5 z@8fQ`Jj3DWLw-U&nNu=L-yw7y)ZA12>Frb4Oy5Fh$SBFJHhEG~!>JiZOHFKxa1@47 zqws_f30wps`znLteNSgcyGyiNKJRwO`|IniGiDZD(1MA5JB(~v-x>UY6m#>XH$ukz z`yQ{22+3H=7K>|n1l4fL(37Q`5y#Bs1HZ0f} z2Qs>eW0AA$MD!V7o*vXNr><;{vCiX7F$WHV<8c!gnM{8wOO2(%Uc$$Q+s(Fa(UOkF z5?dcfr`Yk~r>z*A+awkK(MF0EW8T2(BBE22+YjepQYKky7DJ=4dYrqK+QOTT%^7lP z+!>33u&TAX>6o{SarE@1vvH)+XzKtz>@!R=?$|kVo$xo;`FQG$G~%&w72B|8V>2?W z!@Jnr8NSG^FGu9A-5~|%SK+C*_C=TqtAepSRB?o}UWeiWaRIoAqTO62r85y)E*rdf zI1C=$#74&-LmA45DIy~)8_A9)(}eEo79ER{a*fAT%}mU7t#7eX&8ETLD@V!E5f(bC zvZZlElp8&XLo8(cw5qYW#JCztL8h?5wvue6*7G5$k!0Pk^)?Nsap)>S(=8MvJftPJ z%>oDH6*c(Ds&#gY$;AEdxjqAdcULLHOTUC?(p?*6vyMg!Q<9h>%$U@p z1h%o*f9?{7ue)dvcB$)Z2^q-1A_HTHDCQQcz-4sk~NwB*_ zw@M(sk!Zom=AwThkov{BAwLJRpbBJB6-vjY{cv9OX0H(rm^ z4zXt*51UZ;*UXb1-YNw1$4WE&bq1fho4{Gu-LP;<^TR`4=6kA3N{kmoGvy-*Wn7j2 zYtEqvj+)h{=wIgc)0kOPm~g_kC&aJRh}=l|xOK0l7?A~O#LgDiF<0Gauw0a0e{{2h zdUL|Pfqd{7rbP?3!NOpI48lW^ai$g?7Hk)W8B+KM!^m_g2xu%5KPa*TvODno30vyH zR0{>%rB@h|`gIX?7r@|;pZ=H&j|k5R%vpE-y!JjbfQ8*6Uuu0VS75~FWa=Vil z8-#{c39|V|e@4W?j~V?J@l;5LN}3dzDDlKgw)By%oGfjxC(wXbE4IMTh6-bvXhZIR ztEj-q?aSP_9_T%ha0pQc=|La>DD!kwIx;MqlEsmS9_)lOGa$iiQ|3WaJ9cV%`>M>_ z$O&o2nIRWT(Zrd8Fpj9R8-|4inuqyHiD?GN{oQ#|P=-yJYSdbcZKvnAl$1N_y7gE+ zmYSJ{A`1;vEQ0tb<(a8rX1iYJQgGeJ=i_30snz)Myv4e9w5WhB5;rPpNko0^?t0RF z0`Ka{{kpA>f$4AJ@vn`5lQ<`DFvjO3djQ_=wu^RJeiKs}Q@mO*k}RSNFg1OAd5=&w zBu#1WNEykI3X%g=7Y^p2CG1I`gGXH!K6N4J=y7-L5kcrmeL51zPBxfaw->w)=N+eU zU?1fjKsT;%<}zh~f?f4UddBIM8fThzk<+|l#?%2tMJ`A}S#d`0jbWvJ+(7m$cZ8k! zqD=%#Ea!A>UpkFLN7jy%TYb)Jh^#rX8HA*Cr+*q(h!$Kvs~Ln;thXO2i1AEeA|g7Z zb*f%?W7ChirCbkH%LyOBxxTs}rdG&YtDe^XIikv^_^ZPeNeOU7xt9UeUK$kV*Ub^= z)^odiLxy0(<)aA-;{WURI&#Q17mg0o#7^R^<3ONk)q73R^Z6zvsy3$Wjs3&ZK91qq zSm03aw|CEddJp0~YXk#pCAUv0Eu3F*94;F3I7*p9x8g#>Ok-(wyI9{@mT*{CLmB$? z)+U7B2*_cRjz4j;mCY5l6~iLHQ%ER|(xo9*HjG~N^e|2!Lc7i5uJ4n*Ok$t4`)5O* zN}n+^I3wTne&UZ|x>-6_cBD+saMieZ_4)rYdwRGM7FaE_+hDBZ+rx>6LMGUS zTLsEZ8W<+h;Q*~Gv3wyew{bjrMi@WPV8@;zQcDE{j|%gwhd3+HfBnRq0sErR&o>P} zhiw0@EQw*7o1cvp)rcjg|He_0>4SL^?h?|&N@H=ygY_A|eHO)D z!`dcn>kDVi=a)XmKoX*iFNumz)%7E6lI}FDjV?My-kAc|rTVC@GSj9hBU_NTvs0$=dZy_f({wi_Brk))tYQ~JxfL3$+ zuLCkrUnH00ZPGf$X%+d&1Eh0oA-*EXzQ|iRci@<mTG5OJ@Q}4&wNMhx{8u}IUj`b& z?AM}-def&ZzSL=50Zig`QN3%&J!+|-Ix8q{D|Wr)3X+AxIO0M6RKxXcE=ALz%o-JQ zcn==N3w2sFlaa`oOpOWx>Zn%7KRIc13uIq){4@{b{=8*p`-JIndC#16bd(gP`8llA z(A!3eE&J`QZ^7F|Bf|4~?v0krk$*q03gLUZEsU=GyjNTK+Mh(SxMgqpUOl+M)wG>_ zII;#X??;)}bkwsyqiT?bTDX)oaVl$OZ#Qvv{rb%Dqjc^7{O|y$dmO;Ly8%_Jz}OoM z`9MW5GEV01??%gnHPXCPDAX%U%s4#RVqsc)ujQSa2K9k3w5c)58kPdx=^GDvcAa|E zU^u%cRN1Y~j`kxN8-2nt|~W^7lv+V96b4>sG> zXIdwdU8hPBl?v3Eo~sIoTmYS5hOvU`hy7_72&7;}k%SJW&i?dtEc^7zv@1~sQ=;}K zkEi};PzF%f3M}Bxp_T-JJ}#h35XlSxCbB&+@+QX~N{Z2#!OJM;#$mb~Hh51H3SdHo zlY~K&gx^gMCcUupzA;eh$#gS@ONV+v^h9V_&9Q~FX*zy&)z^J@M19wv|GYeh+_jGG ztvrp;T5hw4v^$oskx%(ueWLzZ9!B)=J>PN=Z0h0Ra(dgVxlAOFjMsa17< z{H3n(xuzTJ#xJ`pd<^%2RVRN2#x$YVBW7-Z$PndkQ!rH~&fA9g!$#C8JD5+0JOkxV zYY9m;94M(IP+Xp8NYgt7evO4kYBSfMRYv_ucS*>{Qa3O?8xp|(K!sxFNjIH%5PmXo zmtG*ZPM?#_c=yWR0cF+!<fdH5LEgJ?H-#^KHH?_l zsBw6&iTwWFgT;P7QAo54`Mmr@hpCyLb2+W(aQ_}bCYFBV;Z^-(dd{>1Z%lAGUAMWz z?rgQe-qL-9MGdAj({1w~83X4k(L9R4yr*dHC61M)y2_1ob@zP+wOZ+2CEd@a`}i@u zBr0#_$Rvmnq1{1+^FicsyYoiI7Il~{W;rW|cY$uaaHG~@yAYn7jhM(lWU6wz%ag5q zKH1@sU%;^j0vCZ9m?E5`KA?RrQ2AHA4(}+--u}bFpBZx|6j1I8HMxA{Jsr$#bfizp z-dBn%hKUpaxw*K~;|YKV55+msF~uiz$CLwmh9rL4W+*|tWGe#4XTR?W(o{PW(0>$E zEY8f+zbvu2W8mvQ#QR+2YB?iVOAx$y3J?skCwL4^Z0; z>}>hZuDI!KdlffVH_Dd8L|V=7p+{eAFr>zdKol$@RwGdtI7eXZMrChs zEDom5qj(&}qJB0Si%Cg1i)mv}rW$E{3?5e2(72cw(qW_;?sR0#PR=7Wjup!7XT0pO(O zgF&r_VWKX$x;jy6VRz>U+w9sLI2B7tME^bj?rxhGQmrw+bG;8de;WM(tYV9-;hs1g6260uh6x0}*BY{@N!5AnFCw+? z0Q3^ygmLdK#9b4C0XjPxnDD8Go;bw%WYdA{&vZVMTafWcCT=P7{=ip|;Zj!3z9FE^r8+U3NZklW>OvRlt_mc0*Xwfa`6r$-B zFpqI9E?GK5fNnL-{9<(QDwZghSVmB#fiixk${2cnAmzmzur95T;yPhtFWiK(uit%mwf() zVPj&&SvFT7hUxQfas!Yi`d4+QztWv(POkp^&C%Q(zSUW1=ixl;n~Gbz)s?%~UXND4 z0W{u5lV6b}3|lZ#(BIwM>Lnf8os8+Gk*cYh;ic@E?v^5^tG#eb37OICalK1k~f7)hQq6>M&CDeqwiDNEgtaLqohKgDOp;{9!wt+ zuGEWe+cGMS1~ZQfbE`@IMy&g&9Dk-=G%wzdd7FRVmrg#NT)Qx!ld=I%Azd*W_&6Ky zyTa?(_w#|+$<%ebK!$)-{Y{isheQbhA!z-H3zo%B!;mW+Q_qIyr8F*1qG#IM?D-Da z6ILzs(oceEy3~a9G(86+Zn!?vKxZbuu zw>le{be%eEZ=iLs+PR22dZ;Zmt*U`^lLb#vzK9E)KX8?JH`rtaHwzD)gGuz%R=AAR zL*WlM9v&VCU!4YlqK)x%#78}K6$V8ZJUlOC(E)#$JVOQjm_nvrT}9!on##TqzF~{J z%4CCC3!}sKbW9ru2GM+nZ)FwiZ>9GxU~b~YTRz)6*u&6+4Q`dobL^j-`EN%|I}Mq%B@O)9TL-w|>80})HNIT4BA zNK3;w!;uv6>9VSqZs-wa?pk1_H$W9+GL{R%4cPkpIrMMrUpY$Zfa@6I$OVgpeJL{< z(-eY~Q#)z?3l7#}*-~G%q09!2IWJE6R=y^Lb>Sq&A)&zNi;8vU_4a8Iwec8%vG?HVaQk1vWfH|eoR__upv72?d9nR0C{8Z{`ZY1j< z+uCO*f_k$liSLAz8?BzljT9SwhpSDYh}br1yZf@zo<-Yzz(fmgt53vKq*uaQZ&CD! z8pc0&j7FtJ_|2!nOxrj7mzt(`?*h4I+C+aunuzK&8|?jES4S^(cs|YGHEQyKU4kZj zC_=|Es$%t5^&=8=)z8tef#F}-EONg-%>n+AQ9!Dh}SB3e}%rJWNNj`wT++~;~#n|&Fndw;um!4OJ9dNrMbE1s^> z7BfeJIvEAFaz&Av{3){jMm?FA;hA1<1=oQYH)(>gatO)ZPS*+Bb>#daidu=*ZY+qL z!+yfCV_WM48u$w%*vcXH+2DK!1%EB7)`qfCrfm|w%B`Yfqm|*CYA&J?ol~dJ`iqA- zfk~%j#ax8GmdGCjFAf~*$>dB5rl6Kfz7JaTp*(sOy_1p`K-F?A8*)IgzA+t&AHZ$F zW)AtnHyo5*H-3EEGyb)8`ksO}eL}X@~L{ zasj2eY(87goApX6C_uF?8LZKZK{kiN_KVU&UjgY=4V8e}=E*2+nV|=tO;d*@@Eyy9 z;+1m$ZYPX8ha!d-`L1%;K({pkNizhWBDrsts;54B>Y%c6K~dN@l*MKyaamgG^Thom z)$b!kUSj1ri;#A&*;Nrrwj85$DnU)1oBKpL%#i$jRoxo;_&T`gs<{1>aoROnl3~u- zxxG%oD6}R)4OQ7TrqXZ=|1s?u*kfWsq`aww64&IG-J_gF4Da~I(f`nBIujCv6E-TD z^g-v{@^~F3z?Xb>Y zs;w(4vchcqY=Td(R}c%tUy?eFIqpeQ+2aer&Z4 z7t+9vQ3?wUTGMn(l$}zxvSdxhOMh$b0F5w{BbDwWOtbBR!a~a|jO{N8y~OP(sO^IC zb`vbrW=SIPn>y(A^s`pC-+~Oet?e26?N_F4`gR%@)@g^sR(sbMp&y@Tmjwa8yWL0b zOZDmJqa56o-`h57c9^g8O#8|jVq~P9eDW@YNTqZ9MKJ7 zUX+~x8FImm1+(_hP8ISd>e0|yf#$5Rg8!2Ja%9iZk& zEvx(tp_Z~frgdW#VUsT*P>*0xo8?{_GH6e#6u@eWjnr<)e!PtA6q?nTI9C~{v2zwJ zVdh9nOps9A(VjY`7ZrmZhw8}=Db+}Locx%!anp%;q3$YIHzNk$ANPfbF4B#^)L4O1 zw$^M|&_AjT`;L#E5B&B)OIFnuQ=yl8(=(rsv){QyQoip)K@+y!mOiZ|Tas!BUl(wC zF;(wxD%joezn|tVJ(f#i@hS`A5;7Fs3S#P|=dZ!?@O+cV7O5inW7!DX=C&WD( zHP$r@M&^QXCtP1$#aG_F^V`rp`(?xJH_J=)E4Ys*1n(&Rx2(Ajv@gczD&JQe^hZwY zocKu;{u2(-RJdk5e!+Bo+9TBhX(RpvE%uirAps#lk8KoLU3MCQ`M#S2lSBj_XKd65 z_D#CrN?Z4uPSDRKU`sx0yEHlQtb9=HEW}Tqnv{;&?ql=(u_S<$yO0o~DEnz?OG~o? z9Lp!-`pM|Ri|T^y@}e3?SJf<;Z-sH){lcCzqcX9=bJ_1fjf#(TpO9QS{dn#(oHu&itIQjzVy+N+%a|bqZm5phV*bhW;MsI z{X32Io-_k0(R5(-n4TI4-KJ{8yRRz$TD+7#hKb&a+93^5Ah)LM8249jcxQb~o%97! z(r1y?HmX@vjXM^vf8aqU`gbd+*!|VARd=V$0x?%6f)A+~zp;=`Cgb0jY(UjVDZJd# zU3k5p5L~H+B=}9iODf6mHnA3ISFoV$gNyKNlW{u#cpp?9FNBLUE3kB2g_D^Dfet;h ze+9}Er3)K8Nvtsy;Zqd;g)6 znCH1U+R{&=3lj_*c$1VKv3b<5_Iuu~pCkY_^gu?!MyQ)b0wkzq7n;Kmfhmq^<{-J(F%&Y?MXN`Zv)(5HKBJ`H>& z#Limsr*`A#rwrT5t7DO+8p_g$>T~FfC&+Tl0W@=54wor?3)1}o zm$ZZA@L^!G7_5ZqIbc3kA4P6DXYOHkD5cB}UvwN4zJTtra*_mtjR;2imgwlR=Yn<- z3L@F->GPvfLr!R=<34uY$IdSh$Iea_1Tz3uS-3c?u!!3)z6y3Vo9IUZtCyO1 zBO_hsmOw8RxZj$bG5GugEQz0D%CGT#PbfH=)SHhQe8klbhuW5eBW0C6RpEuSG=685{dgSn z(Y15?TM%y=Jz36t!bo6KG~a32eS3W6(V@Ebc&}5j!8)v=GuK&qOvpT)Oe!@Q@>eiY z<*@ul-cepZIou|Q5$W&I7G3Ggs`rlkjEm5c&5EDX2(Q*wr1VAVkqgd*1?BJ(LqoOi zgxRuix2xNUaM(?Q$`zLuX>6fItXA$-&FS_sZ(U_=L5J@W5_;q^&CMSU?&)~3D$NeR zDM~LF*6ptLLgckmLgT_*D_5fwb!t~_T^s&wWps1-m1C^M=5{btmsc&(E(0(>zLN$r z{T|VNcsC?}ROC9w^xY$D7ziENC=C}_DWGJqF`TJ8@!ZNx{L#E)#Hlr|ApFDK%K9QRX~lAX>pKJW6N|h0E|^xAmE~sf{Ad?uM>G_P3#9t8 znu=M4UA5q_+UT}8m3+OrTOTHzy5cgf`kgNW6e}`6^M>0_h3)%RjkeC^%0d{ zUi{=jZhXt;DIwEC4uHJ{1`!)K%N@;`DtxF->+zpR>ke=p%`3Qx_kht$94$%tLKyXm zrd;bpau|n3XY8K*#7+pOx-XX+u+z~e$>c#&m=e(}$M(8D)AMukleZFmOPc9lwemV& zx4kwzYKR*>7b;nvE3UC`wx$*mF{LP6x3p(&HQ1b};H=S6Dm8c%4dQG(wHOTGS*?~g0mHc!acsHB zB`sJb872f66-vIXPG7XD(Y(&uT1|?njYe*sYBMF;#<8<4 zGGDxn1va&u;qYnu)OvY?Me&rV|(J%6ODj+o5ap;e2d{JX{DFdpYmqf z5Al9pVljI-{hZi?jy+kji%&WTp&6xO36fG#{`sZb(hC+Lf)$fA&8P>_u8B7;f(A!l z<4e!__XGRy8~S-{Og$X|Hbg*vS(ZJ%~zEtT?sF5>uZD66LJy_XKkZi7a+EO2(!szT`; zLPPV34-}m5_jUfGjq@1L3&PJ83NC z^c&F^I+*eWn5$#S0XnXjvwtw-nK2;U zFPucdkm3_+wIp~4BW|L#O0}QlPmJ7P#T2(}|Fbx{;uzisJ}UGvTqEad6h9%@TEH`( zauLH7`P*n_Z+!bqtX0eBQCDh(nNNQWlY1cuWp>f6ePWX&en(R&bQ(+Oj4v3;7qDbx zH+HH@hTQe^;r?_2XzYvQlZPuza@n@qx4FRBCBwHSed|kRy{SWBidi)Gr?1qx_f*hpj>op zhZISWc=MA2i%?XdJLxCLD@O80HJ>edR?TgC7!w@9+}><=v=xbzcq1F=YgDXq9hW6= zdSf1B4IS!Dah}d~eznb#oT;93h`}R+H0s6@eK~#gIovEm+kubWb009ZBp2`X(qwB6 zjRb-S`?$ldXyl@aSvX*gAd|IC7-7RUwdwq|@Xysn_odin-zLloj0~F)OI7>RpFOz1 z0QwoT4axXGK!3b{zWCf-O`)elIp9Vy;vx|Lyr#M;z3o+U8ph2><?jUH1oI zhZ?*WH6(IgH{^ z6{C{vx}xj_Tkp3)9kcd`gfdKml5VSjG_NJtrS=lS*)NatC&IN*v~orAEWe~W@}%{4 z#L%CxIfF_KbHKbd=vugm)Q*=GeyZm zoU6w6f*c!YZ_d-)S+z4?67H@CQnq+MNDYyEpvlt6s~k{~+ZI1j_-o6pD`fvb*Df*Ckf$1VLZ!%Vlfdj^}* zos8xQ?IPazxZTT+3&<{gu~}bqTVro>jpS;CakxcQ_m16Zgc+ z=%w~jbLZ0-=E=uUXSCa}T@z5Ho0ex8qhC@Y0PD%25T$s%)EJBR7owt64I@EOHFcrF zFd0-()K`^tzHCG8Gw$h|+)57!SInbWOG@qLL$T|o!&B+sa;mY*L}Eu}ajLNdYieJj zjdhSwnhoSeynYXfH6UXWLJ;g;$fWf2nAH9B1k5T!3-DNGrxT8DdNc=#koYL8y{WEX z|5?)Q@f1JW;73}8Zc!>a#)M*}mWrRs)>Y3-&x`Mm9E!*Ff)vOk&(G;mV( zt)mDkJiU2t<=t+gxMc7fV3d?>*&p>G^#(eC%6{Vs*okC6ox6U}cbAubqK}->_fd+- zQe40Dkb-@Xf__U_cgx=lSaFZc%6u#TEfJVLI zhaL0x>r+P@w(Y7Epli~!Geea%6?jkxjORiQYdf#%_qgKj^aP)w==J69cVKHSoG`pH(VG5p5=@@CQgD#3 z0gyc%cYk|<%TXo)!oo`t;M?2Z^$|z$r*|&zq6LZVujMP?JGIkk;;xzcid8J;8vMM? zC^a*wUt*TY+F+MZ%+L)7LZ`wE_EPAwph9T+woXs_5I*KG;PBuW(W~$DF%TqgWq{Y9 zrcAp@pRJI;{PdmZX=ucY)1gfI@%jlHJIVy?uVAZ$)t>`$1VT&DO5M$V+fve2gSFmhnEUuG&^ThtPBhAwL;uemb zw{H0sO1f>B$;FCRyJ$?cmUU`H7wlcGan9GWI7WhwkFopfFj&ua5W1+vx!CD-5CZ{X zwKGaPPqCY17!I>{4pk(?yydy?j04zQ<8S5831}W|Mx~_b4VUPq>b0bJjQpeB_19 zs z(~+te397Dt7ovj|R-Q3T=RU3m0*7$lG2{v=gy*yLa_Ka}Os-9^e0~FlH%*!j_<7A@ z0L5fm6op43OUSd`!|A**KK3LDS^}?CMU0LjX_xvYq~!T97A50FlyKHnw~&LqI0n+KuQM-J|Ob=U^y zn%U17-k9lMBdf|Z> zMgVrrojnW}&LIxGXx}mmc>_vxBd=P>IOZj`pLum7{7o+Fh!7pBkP=e4Vny9Z*LAYi zi$T+EoZq&CH65(1V%ei-*IaoTWpasfu^{|bylf3O+36wipj(3Mw%5(RnVaAj7Y0Bl zCy+Y}@b=++W4uDQ7~1dQI1-1mx#@D=xLImNF3DvEzZ`p!ez4hn?(G6J`)>H-oN-}3 z$Q4K%3`JFU9eMg!@S3Hq)O->-_xExa55NWjRZVsl_?H?4VkY&$y@e~lMFL_B6!J{?QJhtK2D0x@ZTUMwAb+<~ zc5dcO_|-TTT?|KVW!Tr#j#O@X24Mb#`j*Bq~ZTpe<{#7_rFM}W? z73|#@$C(_ZQ}_fG0`s)JHSh+pjHAh0`_3``gO&z3S^tj@y84_F97d4GzAF!XwQa|OpnRtNLKpwDK{I}5b z4RjAO4-+*h+43(VWZUokN2Mj{lw6*j8&;n4yb1!rr)UP`em)0S-(W;G2FGGL<^x|Y zfmnAP_F1$o?g57QK1QdrgTTY`;HOBDtTCzsVef1NWynzXLd{HtGAf^`b@R9DWefd(aHwsfEKU#j+HwxtP& z6583T{w*qR8&=#nzNwXWwCKHiiayVD+N3(q0Xm1zN*+mG-IA+^bZwNo=Hb8@QvvK{ zOzJo_8)CX9fOUG?$R;jLr)f!A`jN2V1tR53KnmOis?BosK6xrpXj%Xu7R`R!s*3f& zb9!K`MojhiYcREJ>Wcjwrj0xhrKnEb)JzJDOHfAyy0djo(Wk3RKBsEuXuXUIROy!7 z?3&uW{39FZ0gh79p8`UL?-TffP70S+KM0-?89;T-UuJokZU&3#1%|dYOb1UD2wOQ@ zDPu6Wf)_|`SC_%T&S{ZCZUBPYs@r2*ij}FHK|;+)D_S-M=0b1h#zRp^{;gnrQqNlo z?XUWdR>p^t$I2>bPNJUG2BiAL zzKR*=hKAuP{dsw^b{>I)tqnn}Fj~K>|HzAUF^99j*`q5{S;Grai2hf7G1V%#47Ag6Q5m()^^Affy7Q^!w}f5;BjaN6U{~L{ zd#?piG$brh?>NT9t6W+pZ|VoH$BwWKd!E&zdeZBwTG-7wvE-s1pBEV6&EtHory<7@xN%vIeaiDqYtPopcpi zFcjeJ`#b1sX`nUMr{>~mw6V)N<($jB4kPBI*@XhyGnWYLE9nxc6Q_v|CFJBLT8F{8 zE1s_5b}4x2lv{8V)h*Vt%lPT{U2et6c9Xj6(?SpCKBqUf-UWWo5z9`~?7DU@KzZ#x zvvI)%l=PZr0E*x&0`RN6A1UBB`5^x2li)$ch~Ic6ZE72v2m33+7d9T;PR|`nx8J7Q zgjJ`VrgbpguAtf%^ivTD0>a!utZ*>_l2J8~ViHE#Q(s2PHgCOSNOK@W%w_Cilpf)w zAPLsed_UK_LDF?j9`(Q5vGQ&n*+Q41m+HCVHRqSUS{i)M`{knT+$R&JwmP=Ou7e!Q z&P&oKuz~UM?vUyK0YpH%zjDU9@0Ki=vCi$B`He4bQ`~>&#J*uCeJtn=ZOcr3bS~*; zjcHFXO(`k=hV6sHOIx$y;`VO;aDSKGUQKS;6$vkFoV~)ZICZ>PmooJVpNlu>p8K52EaQ&a)X>6GE#cO0Y^n6LS-@{gvGC zW{)H|0(Tcpje}U1el}ROy~SX*;fx`Qo!_{OYy_$v(lni!f~TJ6Q{zE+a#6tttBma} zo)MUW^M5!#4!4VYds-UODQjKD*Vc>l@n7Pzd**u0a=4ZO7kVon87uDI=!=h+%Tcj} zBwpTBj*A)MHAN)hLr?BXs*DzmPH$GjJf6vJa%|XF-51Y?bd&czee2QZ?kgL@gYiQO z+_0lqc?Xr16gJ(vcgPqY&BD3#N#Irf4YN$-Fvj5dLO0pbfL0bt8k#Vd1q@~|-&#Ew z-WCDfB8*#9Y@&jHQ(|OWPX|ixTuhCJO7(;e(4ac@DK1RTW7t}I3gvMWeEo3jj2f?z zu35dHPybEJiu%bhJSUoDVkL6SeF=ENifsS|{ zd}Fx*0ou2kumQ+-&ntx+6tMw>8)=UXu;$WDR|LXJ(?D$D{O}Oa|0e*|E5ue}HIWDQVqN(HRJo?%aLtg>?sQf>QWx{_aQ4PMQGpa^aA&@c$E zyrIX?C#w1YlW>F!JRBmJZ)+4@$R)&qu4Nb2w5|0uh|$Cors;S3FS_^t)&A__bk-`P zC<&vKD&m{=jum!n@B}uM#v|MBDtp8k-JIB%appISMMW3zz3%}&b|eM|JE$@E6TZVn z!(cY(v}Ucr358qaj1Q&`BK=KdVcC_)Spn|7hhg zaxdUdi0CAqMzzAzIk=MtDyIjp{};-3SS%}pTYNwv4@1@^)P!(c6M!<7{W_8|WU10_ zCvs`12Wh~MZ^XP6Gjt0vGl4c>E~45gtPxNj&VzGeX`~Y11jdZ~Jb`a7Brw027I-|i zwU@9(QD!{Fy=7m@rjU@dOd*p7t^SNhEBa$5kImK1z|FKqq z2olk4253+ScSfF7!$-XUo;1?(c!GpN+#&R*)(5$c$GFC=U;U1S8R8_EA-aigAm2<~ z=q2M>GFvo~li5q}zl_+G70L;-ke_PwipmMI&r?w%K&S<-u8*G%CJhIhN)eFBc=BKa zDkfHOmFi1gXqoFWm@YDUR412vU9^1*Kxdnewh4z{H)sZ4VxmtrmYD8eZJo$1a-r^8 zp@DVRhc)dNg&I>w`-L3Qb^?l8h?zR}`4NPD-X2W6p%ci)mP8xdz&bB|Fhkq8SA18@ z%fQdtOnMNXokqCj0NR%$f-vQx2vBZmmMpE^g8}1iHtp5$+q$3N@vsnk*ptIZ+6Yq{ z!D%CR0j^+O+zinSHxIAf3Ih+jX3LD{$#;Rnq9>*2CdNak%6nvdxRmOzq^kBNX=#Qs z&I?GXATku74WZY8_H}lioq&$Vd46Ji1dXQUTZa?HNkD_Rnk%xNZ(+He-o7Oxky?X=;$iM$(WIh8z3TOL`_EIWJE@GATYx=_}s6z#pi~> zjVGfdl}*6kzE^Ij1DVb~M$ zJg*yvuqRZ#D)xF{Pv~C`9R1rnj{LX7z4@bGKMG#^{?0jkU>bxnerMspG}%oe9n7Ozguq+mVo-&wj)lVmA6cK7J;_C5#sn>)XCUs)gM4z3J~R$KBLF|_+w z$8g+Iz5nUso9@{=WCF3>%72}kAKH5o#m}cv{0ymk5nFaZUau+v)ij}0iw&m894BB; zMhDS_1+i&ou-^(hgNZwai93TSi%LtlqS}z~O7v++AIEqZ_DgA(va-A&b4U zXCS!R8iq#(@@|bk^Ej63KP>4pO|=hf@uQm{tp4D8wuJ7Y~MAl0`f)x-sI1VUX0p}@VcSc3ZUJv z#R6JJ=X2zLpzS%7(1lbNky{DgM!6JJq>uQzxsGK-BF-;imqMv79qn+`M(B7>&=Km<@3Qaf^(mQ9VSUx02~7 z#x1ep?AEzrl+XVvZ1x690x#wff1Q6Z?oAfaW8_68ZZ1P~3Fl0T`?e=f zBl|{7I-O}`pJehLV)6v%tsO7*?>JDJcx!g^?5^3RSvozNp54~wUn#U;_pV9ttiv_xkX-MJ zr>&*EMfb-s&;E-Ta{U@+yaBcQ%GQv}EN2uBiPOkOnjIO* z?d^gOZ=Z5SazriwzBKbwd%49$X;);bHqZ6J?4^>MOjcbSTa%Urrw8WIV0sHs=CY7I zyCQTrptPAz7-D{^#?Nzj6b+Auk+L;jS{6MZm@b1OL7Uc^=n|&mb#m8NZ=)57Ob-OP zLzTcN>_tOq&O|55V!eoSoBtqAS1XAovaZhq`eLJ14W+G&s&yLMnvl^rbkItw>7K+0r%%)ddpNkF z^NsMN0ynW<0k)6Kpwe5oDv3Cv0f+V^YW!fahYOpLlZ1{(P7G<;#H%>1>ihm&a379&*BG5$(=yEPGHIM3)H5)Z9@I~|_wh&X?2sb$B z<2pHto7X04J)99LYIt%-lY&E8zPy0b!Ixp`zTP7)kj{>WAvirpsc5ceUJD5c$Y>PE zDBIDZ{!pa#unV7#$mPI1>MrJnlL|8V`vvGzY_Aj0|eB$taW$mHzJ=`&FH0oHa?T1Jp zl5_Kuu#EWGp~-sRE9PGgXN3Ugphd#-ds9kZHGjkzC_6l*I7j>JNQkAHRu~$NuNTvbyfB}nzknEMFMO|l6A)KHMp@oG8#KS5i z?K;B4s?=O>BQr-oB{RA$R<}Vq`8Zk{6w__i4A(h)Vw245g~>KWeZ}yuty4p{KYS|{ zsOQoC-}KJW@cdTlcq6Nc_OhW*04gR)jer{d{<}phTwwR2om|3R(u05TkgPxri|}f0 zEW4vuV}19q_kUmPgIkXo$cTZA>d6>G24diY4FNI~ApP*yf}afeNgw*uM}~Z4R7c+F zC;bqfKw+?OejkXJ{cwjI1#q(c5LW`egZ%Qt@zvl&f2`tHIV!5E2=|9*Pb3!5%3=cl zhx_I%VD7-ZWD?G22|uZ4(1ldsLiI5)E?@$vn_!$}^^Go@R(eDPL=Q!uASs$!xq`4F zJu$mhvhtFIfjcf*-9eK=vLd1XL@7;vr`2PS(SI#bDAlq*|96NDDUoZH^ft9oK?Av` zzz@ZH4z-&46~sHFLOzjGES2NgXG`X~-pmO9Lo6w}M=CnKR}U{GQRC|r(SEHFo%0&y)Q>6hRZ3yqo#Fw!IWC0Zn5P=3- zwluV9gK5$Rbm0O-SG18l&zd4r=@VGSe03~vIyNx==dZ0~ml5r<#L+L~t8PWD?si=Z< zz-77t2WjtzWVMstid|`{C?_P-#DtK>gS%2CusMNd1w!jJA0#B_ASwJbrbM490`cj) z>+rfJ+=gbBQC%OEW^)dofs&q7=;@Wem+M0wPr$5TNRob6Y6$pTA%k?~S-pW#o3&(K zVpP&QEH*7e%XOOf($uRa6$9r)XeqW61odP3B9Xw@=HDdr;7M7yt5^i>1epQHbSuUb zlp<`nL{i_dbGj%>%S_ZkMi;ICiwT%}E^LQ~Jj;V)m?%!eOxz&CsE49{ER}2J@4aeq zLWw6ov+@bO3GSv$NmK^441Tt9ocxL$!~-P=D(dnFv@C1aQwRKE2-qyuvId{lW_9S_ zdt9b>5)?5;eU~aSP9jAN5Pyk$HkoDM5_!{k0#;gIBDWXay6_pF&uKa3OOu`HVwy^) zmCiG<<-W(1$LOQN`ajyT+<>s^n@jJ)zVI0yoSe3t;)%3A{rfbnros6!=NUe>tn7P? zM`H+i00I58vG+}2UF6Tk;#xECCslNM{EqO#Kysqj8=K%qw`jawk?>H;Bi9(UgL^lO zE#w`anvD%cjJag85TgD?tyXC|!*MoQ=tyr$u|a2tOJlScf-aNUV{`XTW6FbG(6d@bs&|;p z4n33G-c`K0-|<?vM$tYrf&w3k?&qfHCrQp{HL#FUMa$O1PHe(KYS ziT%UoL@Z)eNoi=FWXf2P-=}@mD#A2 z_^rEk@4CeuR2X1SCkLMDZm9v-u^x;ad`@BSd7hjHp-hT=xTrUH57`xT>=|}B_XTy6 ztmiR`xyVg;5F2}jXP4EvFYxMSfff>5(iTVbs!(G51w}#49>_Ua$C_#I8CRuT1EPl4+44F;*a{4 z6=!V6#9g_Tg)J$>wJS&iuhw*$2j@uAK+PqjlLqJc;*avaWt-v*Z#!1srVHwK^WIi< z)GyZ3A_NW0FY2(vv~tQA-uzeZDhiqT>h0N+1w=P_k((*c~!a62f7R zZs=fDIndx@Mc z8ag5Y`Qx3315dC|bR2OV)gD3X7d6~C@^(Mv!+M@gi#G+~9&-f^{7qq<9~b($@J*;6WDpQfzY@tielQ7{zqPg zM#KmsISZz=P2}9gYyy0Rof|HZbLV8lBs6ADVhedwF&37b&L4@*oGw~6TPO>6N|%<= zy}+$et9`{&z01CB`8h=P#897a zAjI%uHZM8Ng9}(@PVb| zVIh&PWAR+QhQ4=A2|f6htu3LKN`-gSK8<3+sPzxu)Z^_pDm4+`W9e$QD_A))!QD3E zNk(11po6sshPQROGL{P})wc)woN=dSpxc#jYSKNK2ZOeWvBW@7C;6G3H6?6mF6Ypw zl~%pcMoB44q(2ZF?RK+~9$#$4qscgegH|@5$Z%awDP#NV&OU=XYVPdTyF)7nJRVBo zjIu$W&gMh=w^84xJ`QFi!7iVT86gK zF7Wmu`V)8@$4gKyOl$7Fuc`9$-x;05BHppZp<^_G&a^*}?({eAnX*acQi=jUE_Qf4 z@eZ$tpx*xp`3|WD_ZR@ybV7*n$)BE!s|h%bSV83PxDatKnV)3tg!U8N=Vg4Ok>n-l z8S{~%dxT^qWMIQ2DZHDxB2rFP4VE|zr}6wD7- z%hY*^(&#|tGz#sPzps)v30G?6pV|mk;5DgIO2a3Xn~l$`yk*qkCno+4JaJQn`?m8+ z{UP*JVukx5`~+>K=-*b~SK~e!*A_ID{wF}7bNinpjJ}YBP(QR*flNT(moSi4q1ImB7tyaA+&0UG}@^48NTU= zraHo_P^0nm(4w7y3^jpA?d=biVt7~vJ+hWZs@R3fhc_j2Oz>;kOKG&DP z)0lwQCG5>=CLf0Tl>skPp|n4)TMm96M+;4Rv+C@R1E-k@eqIa@u3fIzOMNLmeFrza zR0%|@$EU^*SHhpsg*T)-HpI;Eb!sd9&gjx?Dq1~M9z8OXj87b@#L7J$S9dwtQR;Rt z!n@_kcd0LeyM*~B%8|xGnH#m|VF{>zmjF#0099_GI8nF(Sij`uaE*R^Sm!}g7liR%V z9s_r=k^(=f2GtX=DBGEtK8RwSMEAE-X&CE(r~o|w-NN&usQ;W>)rX^k4Ea#eWK#7- z-4bnZspA>ja`zXs$C%?dFesox!+K!AipSYHp5blF+U_s#=vW*Z6j}=l#b)qTMQ^M{ zwmLY#h6bqMd{1$Ezr#PWYuKJjrd?7UYw+7~c;MHnK->$%1D*KHq#OixLWF=KKTLwJ zx5fuZ^T;2d^?(xU^9VQy9SjT94&~+OazJPCELn~=>eb%9j1k*4QRQX6@%QVfQp4(o zx5g%a0D?P4t^(7S4tP^+vt_Y$M1m%*pOt}Vnvfl`ZC8`C6;KmAx^mTq2ih z;a)!*sqd0C@Dv4%<@-${Pi}eEm-Hpo_Dkf}qMJ~6oO(U``z)31ebk<3!pq82-!Z&w zpbYGz%(3Pj#}-<*94m&i9jAD_d33n%9V!O=g^{wMj5h~D zkMTkWeOF>*%BIrk)K-he^#4iw68N@?bKf&(lXQ-xbB>PoeX+Gzmb}WlELq;-EwSS` ziMQC!;>1CToj4&ZC4`btwia%n(9(qhZCYq)NMaIQ@@VP1v~VA!@3lXmz5PLZX`#J8 z+H`xp@K`LrnR6t0A?c;>MX}DJnK^T2zWKiY_suuos3~5@c>{Kj$rMb+0!?9~O08kw zFM^uV@S2d-=`$OPrJb{Vu)R)Gk6@LuPIM0QxdN!yd68pXvw;UK?1SQL_=^geM_rOa zB0T?>AHFK-_y*Uk7vXAb8(==mr zdbr6=>sgjn_t81s_bzzF-sLW_`e~A;aDq3QA?P8;wgErVer}?a6iLFbn12D5urKLocpPQieK2CK;Hf;mq6`)YK3xNfHSLg z`xTQ+##M7n_)&{bXVOsRyQ&%oDvUJ5%X$k>*VcR1h4P}Npv+fFq675O^5CVjFLrj7 zML;@#4d2OyzYMiIJr>CATc-0s;CuUm=uvtgvKUFS3b)6jvCe7cu{l{p0!x|IjI{Q` zzGa2qWdWHB0R+KU-l|Xd{i}`oxJz&2lmz(t5u@t1xi*4{;C$#N(z_WW`NLSHji7S|KD;GAtf|7P;Gu7xekq;1Rcdzhj9%%nbC zn{Va*u4`X9aP{|JaY`J8B&i_CuMjata_7OP~g|& z__aX6TMPz^mL`tj_(7l)I}Gh!N``eOa@$lx}n?0hUR=M<(Lx+AWCY+ zAFd#hg3c}>`jzQdOiln}PJjm#^4vsqtr+m#+!5Ov4nE@1ZnI1KIF%@PA$c6=oPjUtxv; z%3(zR66^gN+9f-CCkIKBQC$8FtCbM$1TQe$JMT-|fGRn?MqxCV8II#w&}+46;kC|W z5$*cy0;SO-42cmJq*c@`rpAK#eZ3DX&?*AOU;gDKS+!bF5lJbYvh|mjmM#9WVhaZH ziY|;&Ss1vKpJ|dO9b4}ShuTpkTX?r80+HuAoH)4(C@T5YMP}YKn&J^22&h*v7K9u*Vaz zu&gD5TK4kNWpJ9DkZM>bk9H%z6LVt*y#74Pm%#eYV6d|M^Lb6;Ji3{*p*=RO=>94t zy!vXD{=5Q?M5B$Tfk2BzcAHmJs8q%xhuv>dsZ4&mqsXWNF|->A0l)AiHP28K%c(zd zI|62wH3uAypoL*9K}gdj*IQSX(yy%BVbECxOD4cMJlwb~kiE%P9(<$l;QEEbaMNN1LwJFf*W8`? za=<^>+LYf_RTeS-SfQ5Y)Kx&d+3MEOw9bt@!b*sr!e_pU)5It?4(kB2zatpxdyq~| z(wp%VCOICX6SL%^XUaV~w?J3%EmEU*7%UzgN#o^x3w|JUsBYd_15yrdRb3PK-h`X@hs8L~=3YqCg?dOa1q%L+%bt$XlJfJc&ZJ(_a%daq z#~zc9OKOvj{XlQy6*iAx;%Mr5yIU~vv_i0ns1C=MIPfEK0a91FY_aFT*DxBh!f%KP zaN;Z^E)2N<|K+terJc^tz-xK9=4nfRMcVp7Z+CAOsd1Wh7Lg@NQxT&%nkvB|z&1e4 zMUtO;Ec>m~XR_b^1|u*O!iSr_bNbxa_`*})-UPL0f?`ngvKwISo{?%-DN~jU+;rB! zVHa^7ro$+B!C4upda!c5Tt9fjT=riOl9K?>m`YQnIIDxC@6-UYyK7x9NpKDm)De_; zNh)dvZ;qWi{T)~tNJ_AdUk97doCaGTQ)wBBAYpaC{nP@i;ZsX)@HTl0a_*SS3j~N@ zUp}uiUSuuL$}|PrPFA}a)_5_^l9p#z5EhCnBB|L(#jeaPT0(SX1aE8Ew~St|@hyn& zApcg;$B(nxQ%<|XO{u*$T5IGvla~H)r?kJrZdi|pqWe0}P?6{5 zsXWG{zL++V?3F5l>LDyUa6qUD9`fqnW;Gi2)?3kfBocnTt@l3WcG#UB8~J4@eK_8z zrEOj$RZc9CkX!u+?Ly#AeZeY14^{Urw+d#}LjW774=k^_zbbbX5?h3+Dz zK|OUCN(osKQhZ(#Qb~T5B$P2+-l__`3eK$4S+oihWAK>FP`d-fZH0xS|5FMIp+b=g zar6xQA?Xq6Z}|N#KIAC8cYG881~{`FX4F7!CNXaCGZ*W{x&7dQ<_u3S>XC z>R!2jZH>@bwqo@?9`aNY!-$0> zt{!dM*w(l64_rrn#$Q_AD`|j*^y^WE3u!4wTfH37sjZLO`G+*uQJCz{FP;|BIF^TV zo+cGe!qiN}mW@F2y{gI@Y3^3mzNG^R^ioKLyz38bKCUx=o4@soSD z-ZB*SZ@P8d+rMSBRPQ#c6}-!0bm>*1r@|R&Epsu9j)mh-?Y8R6lSB0-<1_QEi35|p zF$V~6u86B^N0ZGE?Wl-#l^GBD+NN7W>pPRS>fMvW#g(lgUG_~dRJ&t*OKHv4-gaN} zfh|@3jvevF?PH^rq2ZA&L0fxYf2fGjsGw@cX)JZ=-P?jiapVD zcxq47zRyApC_6{rwTwoih~~5IYl}kyqQig8CcwADXL_oa}jN`K4g0uM%>U zU|dxaH~MZGmeq+0TS#}##8Gi))A5;85xdW6mN9>fa`pURe_voD!iJrvXl?i%eoD zKHTP?*n6;U>b~ujz=w3Rwk;Jp&Rbs+YA!JxbTtk)_8zaQo*3Ft9BFiGvaf=0WaEZj zzc&@HwWQL0B}FY;W8n?GeW9)s9f6rz9|VvTuGFfXgLfz56+mS+ixys`VpY!CGIw>c zzGCPQev8Hzsq=d3$|FW=nOAQN)+}BqU036*371q+K24F^RH?6n2{qHpgNZx z+0>E&>NDw0vdBO*vivM7Uzi2Yaf)hOng=0sD{&E35c1yNq*0fnqSfoRkn>00-d?-x zoeWkHO=0#7O?U!I-!ye&qI7!paAWOgqGMKjoxh5)2!(8SSlv( z{qKMMr|*OER}KWbr{i0{mb3+Iq7uzS0FqQW!ZF9dPY&(<{gKuiXErr?N;TB_``=tPv`8`ZeQij zsbPQY_HX>l>^A^7JzBN(+RuEhy=S_`r&0(yL95i)4wueEEPF%;jhy{=_uOP6ctt1ZokN9!VeI~TQx*@R0&<2(1n z%$1F8_^s~x6055!>8`x7!%^S74g`^WD_Z)1_!*_ghM`2gUZO!q|HjSnu3wGB=#gs0 z=&zh5f^!6Zidr`o4Vi{=wnVfxrwVOnkUJzKM=_Lhr0Z8{DXVkzSLro*%_}m8rMxKe zZeC{k2-#B%o#ho*PvNWpU&TD~5c9Lxp4=HSav3Rj3ExPiQQR6l#EG<0qc~9lXbAgF zPM!uO*>_5CinE){=qeUC)UkcY6pZ1gFVt=g1OZ3;-qXG@Be+3Iyjlaq&zL z7)pq11v96}E;zl?@d@NoExA{3UU}|7zbZkm)9rJD7KOY=8lC;#%5&Fbb^-LFK5)Q3 z_#5fRHeiF;&rlznw+{5TkV#txymX=1A1$jTi|Zg#ypYsTWG~sHMfc5?je{xD+zQW= zk)EpJq`60>>MnWTSdNqGiD|^Q%6rS4LRIwu8 zy?@hj_j^yE9(%wAXaRMtjt0)`@8A{hkgr^~aXKCMp!ZIc>>;P83@WMcF0eRoSDDGd zlfZtQ(=9&a1y&FKORqiqXS>Ji`5@niA1t4FJYGBC(Xa1lwWNYkyz`ERla8K=?4JP- zdvIduG8rSQBzx34SvS(-9mod&elwT_7|(KJz6>y)p(a*20!YDoj0M+W;2Z3yDxz8AhQvhltb4yV!a zhQd zeb1#+SX7eDiDii4`&py~3Kl5FzmHQmK-Nd3v22I>0&Q}$~4_2SS zk1w!pcRd^}^+{EoU?k}17!PG+!x?&9a@0ruih_iAmM!TEFh^9%!lv_iXq4$}8s?H5 z_mS_RAmN&v_kJ!&Z}Bon$#oB1KsySg?h|)V45wPG*BG@*QpIV2p>w24G*|RgHSg>$ zXOXT?P)eb3%fZyp@zFAC+x*tw;}uGdq1NeaI>`T>W`o-zs@`du>~Hr3l2NNW;D#K` z2-P?ZU*t6hdS^STx6fSL@qLxNW8XkFOI#1zX9T-PZl5Z=E2-%p3ihXh{r$ldp><^N znG2X!jEiwI?l3&QMcw_>{8C^}HOkLz)_wmtpGar8YY+4Md&XwBJ;3SCa8xKA)+djXgPOX62 zG!(>a^~Txu)Q&c9>EQLfH>f!kNhvvYE;Z8?u*UjhjoZ5`8K?)7IH@!=4(&^f+&&U< zH;*xlhNg3H3p|qv~aY?@AfutuBzFZ@G5w#0YW<3ml&=Ib=A0hAs@xt zjGR%>>Wh7*=%&LR@tKWvYMiRvxF3#s?-D~+LcU#sMRUF+Q4gQN>4gAEVv!6^T`-1u z&uG*Z$NG_%(%E zNl{9*;x!~9^)9=eCt6g4BJ@p->=|;?%D0#5Eft-k$3=yUGGZqTTFr;7*m6 zrSxL9$+NC#_R*QDr>}WzwvIEH^#KnLalP4Ya<|QNCB~bb1Vvhlao+7#igvLmnEgCS z)J)xuB=^y!%fv4zH&%&tV7n#m8H#7{wgt7-s;W%)2~zliPs=7Ei$8c+BI zv24Tjr}hVWTgn6_pw%kIU%$R;+ijb}gf+E(Yx&-jqk*T)bt9?b-p+*8lblGlOg1~g z*M`1wU01L>edpJ=4LreG;{U4?wX8Gq9zPFg@E?nxM|N|)4*!$Zz|zEPI)~M& z{qPcE8w#`tuA33dzPYveh&C~m>@SP-`)87JqH=4+Xn zi+CFp@JYNasb;Dgsqms@xOK76xw4Cito78_=V#eDn9velOk0Mj*2NVaTfau!f83x# zNFI$y^h0%8td_K9c>xXa)A+$tdv`p1ARg-5*U>bd^i=Hn;`H`=#-pB=ZH=9CJ;7hh z@4aT;R=*{_Yk%0+zPmj!nQ(pP#Lc&X-l1DZBB6onHpHhl_jq0H{bMz)M@Fj32KF`A zOl(@`@^ueQ;8UBXwhskbr_cRbZqI<~F4W&vAu=SbR;q%{gR!Z*M*pPQDFY-M{PmOU7qo!f{?t#r; zKGHGr=x+`kJDn~o->|E?ajLagn#)}WDd~l^n6O>KgzXY0G|8CIbVW>P%40&)KZFU1 z7b?U^UiI5PElg!Kl5hxtH6=oK!Ax+F|O zqcqEa%KFBowP32sa|O64&6E%i(~L&3IHpjuGz|q3(5}LcIVCm=bAo8P6sRS$HMF-)VeHr#jgY5l^kmNPz%Y5*I#WMKy3#_q> zhRt7pu4Iglt-z4EA-w`X!h|I#<IjVyeHRsBgp_^e6?p zX!RJ2e3oEOI#s)CZw@X3Y&S%UCt(|G&7;K%ygR8`Kj>ed^sisBG zs2;cm@d7uLfWOWm#cj(-(Ue1q;6EW!WIUq*TNI6a65&563#+aO6s2@1Qm5(Zw44$O z#;Si9ClbHb>rN^~4+>7AT6`H9zA9*ZpySw?bW?g$j7QF=N>;&?bk3}6 zog0V-20pbpzSS>*#yHZ_^t!CW*Hu1yYPSCDo=0XIbQZHlBUlBUO;B2#7FTLdck@KT zrCtphDLgiHC!~dIpj>zsV#aE09rmN#UI^!lKeyC0eo%MRIw=QW|C#{dq{*_6Z;<{YfC{vryn3 z82Hp!yqMQTHyk;&zqmJ9s)b4$P_Qc2U(;8Gc7h|UEq$9SX6_yKf7et4L1}k81f_{> ziR89s2Y77g;p*%I zQXQ*NsR5$W-RCd^r9-IHd_`1h)=8+;{C81l!G(i3ErHS~#hHq%UcZ3T;GM<$L=i&L zzg`8B28;X>ASJMkkXoLR@Mta833Bl)YEY0J0rb|OXgkPRB9_VBiRJH9=kHYK??|D5 zh2@aIUC6%z5nUH3&!;KRhwoW=N7@FUJOk0MMIV&BNP<#upxn6|{u<2S=A;#2W7ym% zur>NZD7j8bQ->nP7#yIlPRf|Oi89tbgSTSX(ktf>*)EgubLaHABcD6UQT3l&Kn@`( z6q|!VWu(esp{abOQ~64#a+OY@!6PszLIo3xQxOQ!+gCyKT+E?Y1b^ennJi zG#;>U?jHp5usBOZ0`l$uD3H%D1Nb&!XMcOs7)0_rBqU#o0swT* z_5^XCanv+LbQxw(o9BOvGI$jvu=f^lrL!BwDiZy1pEi9~i4?JF*pG`TdEi zZJWB>5|*DRX)CuNG~ZP3sy@2-Xtbv#?r}9sX#N_=n>Ir3eM&;}ZL+0pHJWc-C`RlX z@`9wQBnk0=qGTncU%|YWVLjsAB~li3+MPrg0A^Z^>{oE^D}wt~i)YA>LEcmP@_`#4 zhv+_4rdL9IsANhK0~J$uZ7s!P_fMwp8xB;j}k3nP?env|FHx>+f!j zgO@jbTgLZ#y+;oLq>|Th-8X+@%vIjF`|gdKzjSQ}RIBcMqyysn^0Ey(tDCmB7PCet zlIpwR*nE90!mpJOeyxP?wP(3$2;sG7sF{x=d@V%y(+J^fAwQk@xU9Zs#8s6zJl=keb zuiiO-$CgI}C|(Vuw_*vm-}6_jW7gNk{X7i+Q0M>ae^#@ z0JS`4)h+jM8L+evpeU>?10GLu#(=H-Ju>3qdB(Gg*&{8OE7R2@yuJ#oCwYj}%il|{ zNNUcfg@8RjE%x`(x)Yb+`UxHP6@}hqM6m?3rBFSjkX{S!p!Ke>(;Ibaz5<66oqZV3 zegW(SjUMltdH3x%Dc)h$+07;`KEP@jIb_xm1#iLo7ylh$_Kdw_j4n(NE_mjQZR?C} zp5YnWwr$(CJ@44|JGO1xwt2t1x%cjm-E1~_(y3IZDxFUMs_Lhn@?w5u)9_%$Rc&A1 zt@7WdcJWMJIuN%KE*1|P@syHonQF5OD!c8;0dgH#JF-$5ln>Ngn#V6{|IWYTvfSp< z(H!RvCa)e4ZRmG4apq==1{u=-`S__=GM}}7uv^WZ?8|soqJM&fjF78XsyzD-8@rqi zUvD9UW&yLYXpF#l2VhhaI4sZu73^1R!lh%8&E~~*`BVHi%f4n2F{g&UDPKXWezW09 z)28)gpp#V^bAwg-AOX|CBS1{}Szjk7a#(C)jFqoC9qpC6unV&su2AgI=Ea z)Cl0)l;2p#U-f(#?nPDV_^S*Y4?4jBS0+%cpQ55k0xJy!F;5Hv^waqaF)P zpL1jvonn>7w^70cFR?zz|E}Mn?kn4JFzmgs(T+PUvpxeA?7&d~9wNEm5m|ybZaN(U zE^}!9SU%zkFP@Ij5S^bdg=Jt{SZMYsDTUhG6O$>{WM!rJ`X`byktvefeBuA}&}*<_S(>Cru2Wrp;r0iVWE{K$kSpKhe5au}?4 zd>y^oX#URq-peCnc3l}9y`wX5deS#SK?qDEcT;su%X&P*#R1wmXjzIWmgmDjZLp7M zg&9Hgwp?BzU;yRVMLt%@bW-1q*J=$MQ-BPiy|r-Bec&3pWX zWADfQsU;4Iee%?PK6)NA-PeYj>S^cH1bpmRl)0U54MwhYa!%jy6l-8LhnN(7fnl0qp$Ox!CpC9MLjWzIpAr`st*7@z%)(dcD>w`wB#(VGC1C*o7{b1UQy@V+3QJmHOVdjzZj%ofR(AX<=e`8 zkt8xWCSi9IcrR|Rr2Q%1r$x!G4~NT(M({@^uuN1$FT`bWKMWPBl3rTh@Cj%?y|e?N zIZ`IaE6Wo8oSDD#9%F8*Vh#tYi8Fl@!yc#b)dZn?#k}qj6AHwQaTO5yzQV!Z?NXiV zXL$rtp@$^qne%N!$84P~43fX9imYN-o|EB6^6MhrDdX%@u{|{)oH^0p^;DR>I|)WfVAD+83{%THF#7^!h+N zyi)%4CB%Y&?)fnx>lc4up6akB2Tt`TUSKP*hm_LaCIpr04lwQdl_);3_rTfs7Nk=W zXYJ5#e5zVMEa3M*gPlH#ZiM9;KP*%?Od&ddmPFd++YtI%X8Ez_(zCyhziU@~wsovD zS7&7C%cF3jv%Uwk?A0u*un6X0llW|F^Mp@!#|Uftq`%sG7Z*~ryg##P)R~XD3}8r{3ehU ztdzqSs&tq=M@7%k&+Rdu|z9PVwZ>S>?J8KwPi$mlgg zd%^S53IkNg#_=DT9-QaIn)2<<$o;7+`OdcD!q`Slk1*>a{FCN;m+ejZSLVl8+4Wyj z-hZge`rKr@AhlG<91mCE3Vi!#`3LK8hGs9wN)_0j(3l8YcJNkx1&GC*muP){XOl+@ zL$%~=o7}cX-{!LafJyWbwZ#-s7qIUvCrbn`DnGJah@V_6QmDUc+E_g{SGWY9dv{67 zG(Mg;;&?v2`LsLvzCPY(tsHmZ|MzVtOmg~g{z9#$)l{{{b+|-Wc05diHy7Ikh?;)Y`IBei+hH*H z+)|>u=J#b8Rd5!7-7`SHc?_(CC&Ag+U<3U=0{|<|rZ*6Xw!QZ7FAlzr!t+R$;9#Ke zCZ~E@8K9(xrUM2uyPQ7DRe&|Pt>y|!{vA8eM@u{$W~t0UQ!KzFIu_e@)0M=|SX4My z;MlYf9aB?HM>}S~>SFO_~x6a4U7PM(|ucfVC*{=`1YUpv!7YsH6M8j>;7@{H~ zZ4!^@F;*duzn=lFZ@|Da!mww=mu>`jdwO0^-Ge0Jdm5)~5aLvQlRg7?ak2TZsG>&D z)70iOljKz!I6@NP(E*XF!@e7bc!j6!xt5o^>2?AMxlHS^dkDPd@-UxDgp}JJ*#6${ zzRzahYZm&uy#H=avqu5r$p4gNC}M_=o$5fBr|^g2em$pG^#eJzd{?mZ;VlKP)t{~kyYgni+trl%5i@6wV?xL4+&|P#jA-pVpB2{fLGwpzpoQZydJtSv-8x`uFOAvNw%n(Bqom&~i?=)cv6^*Z7%-ZBF0hYLGteM$ z=ub-DPYBcGb^|8>k9C4D;lE>!_0+ZJ1=xB2Vx^AHCh_ zZl83<6e{LBorEF6Qo3nMr2LCG&W`#!Ghc@nr4Z$AJ|#F0dEn_xuKEt zTX$+;=f@i*70Z9~{&a_h$lumt!Ay&Fub00+X3XVzI)X!qy}H-}()MPbkMI5LXh)FUK6^kQ5K5J1K$1R5AG1j{jc&HfCC|vELoEdK%e_a=Y>;oGHj(sK z@e@aHTOi7rSVg%RREUZHI_QyVSRe)sWm&L1QZ{IDEY~qh)J$ODEL(Ws?2w1-)A!Vtf|}KzeNdpDii^F;nzz^XdA#%8RWw_e>hL>%fH zl8r7T_LS>7Q%hKq3)?~4SHy~d@sHI}24P#gt%QnW>zMZ+M$ss5@4Wyo+wIL^DAU3X z16LSJyh4{9$-WT7CRDFb#I<*1$5JU{-8`AV=&*+ZZea2D?-qpZ;;C#;U>`j zTD7G{CL;<{e7m|v2*v(fFhzZ%@^i+>NKd|pwwvpY&+Y`{miMj7TK|z7$7$n@?92V? zD-5}nlJ4W7E2ga2&b#Rzd|lvUK+?;m0}3HxRmaEtkj!&_R&JH=*@zl2wEaIpJ$!{I zjv#e1c=b(~{CLRV%|C~~1t_5Y{u@O=I`T&!bhyAmh84mX>-WbYg@q#ZCa$@K8AeYS zL2t=yCBAPJd_W=$5f6V@ot0sS_%an9dVz~u#iXO|K54%FuVF}ipXEC) zX1Sdo$^QibVNHR_14#U|!iS7gB{*UgcZF{ZBH4jSAcl7!>q(k4VKgX$U@YR&u9n)} z8DPLOvw@r(Vpw=&Z%Q#XKw%HY_W!ZHlY@VVx6&2xWK-ig7I^DHd@l(({4zQNrxR7P zC|m8W;U6hT!AM+QAD>{Loyfa+zoHoJe-qH7&13VN>=fCZ%+G@l>ZbWmfR72n$6e=G z=X?U{DL|t^M*{@>N-$6t#8H!S91q9UQGNX{=@lL;CpvFnh?5OUx{^RAKe9Smptvb{ ze1U42!{t6V;8eDB9{*3YH2w%H1*QKf7LE?5$kpFs3Wu^bvQ>BOw%`1zH0cC|aP~m? z_AtDJJM3OcaRZE@DE=58m zD0*Gr4e_M4t0Q~){0YoZa~Fqh3yorgYI?iK{4EM5Jmt}Wvj8wQvaVAU@M=p*Axc#e zSmaR}W=(KT7J*{)1f1f3O(4Mqf6zb-FvB?okPveXaG|sW27eQqA~AtLk>n)?nsjg3#_X4` zG~i1j{U_;w1@~_)E!Okk-|CUeU z;dfkpIqu;4?~nU*+I}WghW9>eg?OfZ?(sk2d!c3V)qMv$^}{#)#mta(~drHuK^jj{i33Z|Pfj%OK;jp?M z<1RIOXGwoPKb(VnxjXNcig+8sj+{`>c{hv0hKeO{?$kvc4{r+BqbYYNi!N@n^ETQY zsOaG%#~HF-5r;?OC-9TASY{p$*8?IIco^45_Go7ip-F{Jsz=Uoz=cy>1iRggmu8-l z$oOV-&-sNxJ4Z#w%ci;|7z>nd?%IO-C5+lW!HPUv~d@up{K>@gQ*`8fnWf( zqwpWsd3$uX;95N$!!N z9L}TO3=r}$#j!Q}-a>_ot(cvB+^2q;4PRHt)8a>!G406C$xFFP1dTRv)SHbPIB!1D zltD|*W`|BITevm(mU&VV*cAnQwC?8-(vi1H9pK2;iW-s%$W?)3`4D2s5@In6&7_*< zgZb8UK-&CQVCe8jhR}8RmJ;^petOxc(?PeRh6aPXZakr3c$W>DGnh}khs;wH5(soL z$*p$1DxurSmFFBPX)&H@uq$!k8nmg3s>tjGT7ub)9gTT#o0*z&yv@Q?=BRw!Z{4lI zTDNsu=c>kGAU|bphfJ)m3jzM}XDcS6O|d6`5aU;_Z#kU|dPdPZJv6Wr(nR8f0&(c* z3ni?r9L>CcOL7lqs6RzJwktG9vIG0fXX{l_PpGFQpKZ{@#{reS1Xz3kvbZGPh0p8V z{Pp$=w$cBZ5d4*YIwl^~vS)5=QRin=FDhg~19ENs;8gy^HHJzx|Gox0zY9~S5ldVy z3&3r$9=(oSTa&`J{j|9+jH^aiS+Q#C=&1?38QGX3=XF=ERya_$K42nz9p=UE-ju|Y&1Sm zCrtXbd@;Zvg0@jM+n-G8t5Nk=D5LGO&8;2)GR)Q1O(mzPg(DG27&Lli!-G_^4L)~@ znrx0!xBt~NbaR&-33n05Cgl69+=|8`vJQJ|FqE=s_Tp7aOlbTC-e5s@9u18SBMM<} z!h6YPmW@_kh84;b?skkkvS;zJz#ynp1?N!0h}<)X$n&I&&jy$cZrRAKNEj`}T6{kV z-|3^PqJhpR4{bSbES$uP*`&++H248Jxmo3}w3@2-jd_PWFyS>>pb0BQjB=I<#@n+_ z(5-uc1w2kNSqJRXYSBfJhWu%l5O-DRjJlrY$S*uy_#j%x*71$CZWE1u>erQe|SbE(RQ4CGuli( zY;6UFc*7hOuk&yF$7qA1HN}+GmY-^9lD+5mcWB~qJBIpH&ARa4a0<9G6`YN8quKA7 zs->Yp-2zhA_wBoNwZ-tNc-)v2H-R=MWH{mMhPBpoB)bNJf4q11BDecm@qd4o%J709 z2%)ZnA*gAu8|M%Q3dVm*A%P*LyFFBItKK`)nDhS2XV+;4{qmc}U$`}Oy8XA*nPtf< zUrfH<+_EHN@RfP42~{2r<#xIC?L zzWw%gPt%EP8R;+fpXE1m|m9gLl zyC5B?xVk&GU-&EtLLn{>^g3>5H*597J%@_jkWgwxgGmeIIbje@Z1l^)*XU@aDKh2k z$~nsL4>5S!8&B+%Wi=1dk%E=6G8@X`*qmK&E?BdTz|j6Slo!0jw;DkVfIqZq<^fPf zof2e=JX$7;pz$~_UYb0k0*sO@gyZslYn`NL7vmQ{RFDufAE`t5AXqU>XHmIliK;rO;B zKb?m(G3IzVqbQXflSLlS3p<4v_z5IQBIkCIm5KTF+?nId>&ua`x88XQXkz*l{pas6 zrVWkC9Hpc(NHwxZH96U!k_G2qylS?9rJ#J+5g4fSKeb;g-p*9SPGRI}mzy1S{EOws z3pWW_qqVXHOObQJ?vH`^tB7}AU3Fpj1+@V%VBF}+Ic9o3ssfc^6`#6X?qx6xzUu(; z!;;=f*pGMMmHFARhz&U{zQ}9f}H% za)=8#qt$DpzG|2Ne|NxtRWUS$M*}Z|wzfILcW#b6l2)YLZk{sz&xl027)!>XZm#`* zmUwy>T$6h3&Zha)pN4O0m%9Vld6ks}-e5~?)>HsyDJ>2ax8y74M4{-0J|y@7(}wA4 z7tWe$1$4-h9v((bg$ALb45-j9Q#h0@tx*m@P4EEvJaeD2dhYqoyfG?p*c^4Q&3kpr zywhce0)02mn9fmX*B*2Dzi{CanQ2%mMCg-HD7A)Uv3nZTx+w6^#$Ky{xQ=FOF}M-o zELlI{mL#A)ZHEKOgs$E|gBOjc)Z&>;&|?sc=vLtl2eg@6He2jtpQX;0T!dLM7mpMX z&&D)!5KKa$=_kLu`!9KmMZW@0tJ&ko^`{`uf!xs(JuH`2*F1GtVc`ZLa50KLL$pWz zN=XtWnV6e~!0o$SWUR(cG08*BaMQoF4y3-Q&Z?t19Uab^gr2WoSvkQBp}567U9(jYCVRIRF!h`RC;&c!Tl}L+tgJvAJ;-gt`DFm@qL5mvl?^OMFf*2zgd=>o~mrzyxtiI z*~v(3F+%3^*7Pj~?fpmsn~x=JO6ru5LjDoy$9RYUXb7_u zxR5TUI%vlv`%`3u!M1T_IzRy$K)N*?+G!Ur%wH;u$cUawu3c-_czz1^oOpOvPM$FZ zBjaTGsgU`D-tvafWk>hXQ#p9xkg>?LQpxeDT3@LB){w1g~kj&cPe=4^)EM7%acZK(}|2n{L=lAeED_V`hXiGgm=t%DWG z>7zcz+%|z^?8T&r7vikS9w|ChWBrL}-4prAV7odzH3!O){dKYgD-Kj-0_yt%WVXtJ z{tH2+KY(@7G$<$dAc@l4KCNRi6S=M;5k6v&$vMLcc>jiH@=_#QY2y({Hrb+IQyJRi zQAznti@upG+*76$QWnizfY5p3KrTS4f)dX3VDH%;RJ|ZuRi>0VR8C$0n^E#8l8i6~ zQ&hSbE!i?sR>aldNko@E`*71nXp1tYN)|&$!`1kzNC%GV+p;a3&EoTvsTpbtfl6FF zU1sDGjhjvOmJ;pr{cRHc5UVWV9)*br=e zCGU2MYC-RFmSa`&J~C`TNzUr61j>N+MsgV0>H1% zbh?r~=u0~Kd$2d^FG$5|Udg;~ADTCmH<@?f?SsSRmzKb{zjyFAtU^59CvraqHaLeW z*g)rI=BC!)@eHTq{colNN{_F(*L2skhAY&TMe2Bu7nRB5KX-rauCxA*|5;&wAE_w& zcChiTs@PK5c+$XZ$Xi~V+1Lxl49fhKEwikYaWL45agPjji~b0a~dE z&N@U-_i^_EbVK>$4A(YcOjbmGQthcXqnbdN-s;Km4GKo!AQae4 zami}AX$_A>Oo*JMIYUhPaZc;0tBjOlRIHk<0WKK70$L^BM?L+;T+fE?USwe*g6!8( zWBvt!vqyTq`vPCoFnYKdzsV^i@ZPdl4n?bw53$~)-WhH)2-UXtz&w5fUr?K8dPDj7 zBls5%qw?_md@c6@9jE;LGjcS!KPjHD^~&bCeYti4J1+>|pS0hdfuG1nML(#%zOv;% zMvjQ7YlwAmPT*}h?a|*X*?pMdZ0SB_edvtHa{jpa0ZTzy1kkLlosa5h=Oaus{Kq0N zPpD*iV;!~K+z@i|t=v^;A-{!}sJSYE`$r=cx>b@0dlabU;w3U?WFA9s6moHk;^s1x zV*J@cTh`+UDI1~prc~Uu)i%D{pA;-3$(|>n-Va%EJn;nG21B5Twbth;a0Ri7)z-M! z(nZJCmXXplm;MaK7PBrsS7O z)$$Ns%zDp8NXB&B1mhu&5F?bYfjkM>s&XA!c5ES{YA^EFPPJi48!VL?$^Xt604{;jycxJ4_Ib&X)^KXScyyoxh+fA&52IEDk5xZyvCU0hM>9rtAP$cbzfv z(S$K^L%KMLtE0ugeUCfc5z1Ppe)xNVSI2?Rq=0v>oGg73mFOiTzzbouqSqi}#2>w! zKq%`-`2Fu7PK#GGl&f5&5hQnP_@WFmr;xRC;M|#|DfF{&keCDat!Zpn@$_<$$x(jB zf-15(13FsMw`zR0Cv4{P_ciEvx)fM=9gmVbD=UU`W7Qly;boaaU?U2#WyW?me2hw| zO5Kv>?wny@Gxd3Hr1Ea1`R?UQDXTsYdzO@ZrD=4}mZ+sj+SIYhFZjnIS?5eT0tGpG zoHwdB4%yiR|K+kc0?|qNWL4Jqp2bsT((?Jr8-gD-&LDYOusxzg`|GF-nK-U+@jWkn zsy-y$TaoZaoVsy7*`bi1sjTbAwXUC|HK<_}J> z$!n|FwH#JX+(4<7S3?A$bu7zho3#T82-0)=G7>@uEK=Mz-mF7fI8*Hq<9LB>fmuU@ z)zw_Au86x*Z3&Jat7&|(>I!w(ekd1*P@dYv-7--mgu2hNX+G0`DE86i465E|w@Y)b zAVK)+AdfkgBkC$XcZ*MB?=J)Kgm?ITK#y9^`xKcMCs_(N>%*4jHZ4H)glA}2M`|i> z#T1F&PI6e>SF#MXO{a;;LTLADxgMk^P~R)$3NyQQ;sHwxZH-3YBnb*&bXg1)F1$GI zBx*Gip#>wWOaYgM6)+3lN26gtOaYz3;Gauk6)wpF(V|ib1u`ez%FyA94rZ;0~{L zD2Xhj$F#!*r-?;kBG|)d5nu4!Q2!G@7uTGZzr>ol#;ikA7XG1$YmRMcsiOfM+!2b) zYZD9V$qUX+pufmd${w6KaYc~aR%r5&mRfx5LJG##70%Q^T{)47wW@u+y@|QsI`_|? zgNgHJ`W3cd%6zYx4%6Gi{YeM21scPPvg!}I`O{vN(DV+nae{Oekty0gaWk1-@~YE7 zBZm52+QM0C;fSwuDiII@5|p}z0C+(<{d|mU8|D7T-6yNdf}U7>os64-2I${ss5rW& zzJz`vbev>`kP14U(J1?;(n~c7J%Y|=l*7L#3HVjHDSlMBas9xHEKlyaNsK}NyW~w= z=(@mJ)Zcg?ca(6nk_bwa2uyEiZ(WCZ?~t9_du$ys<30PDpv#BCU6(%B~9QFXig2tfW?#QHT{4yaR$ zl&t;YI0CtI(npg>J_-*TpyE~>)K`srVp10rl3X8`&%xq)xU9P4obX0^(I5LXln@G;d5ZVN|Ko?@i5|Kdu!%sp;sL$^Q!<&kzi@6!ZtdU$ia*IjK!7 z)8c6j{0VlE2B&b%7H?LnD)tlJ`|m5LOM&CL1Pr)E#Ui!)_ondM1p><_%-!NgQQztA z5-Sakd<`@f} z%@$(jmIw%<7StJUH$)RU;m9Ovx$eRsP6k8gb?eUwLGjUS`+>)Y{+ocm^I^K< zv&HQV)L_ftwE=Of9mi^P7nGf`dSNGaXPw9!s&sBSK!-pR|+G22qcd*w{_ zWGJO5KZvqysiLitjDpFQqSpq|IxIy|onmjmJ*#3}s#oD8C8VFKSa`@1&`d4ACS9OO z=M!K!)m!-yAk1GWt>LD+m6l3rOt6JgyPd$3h2qJRaBH(p>PCPZ?ChSSe#BS?I8Q&7 z&bY@TMAJm?`Qemd$&q5d2JqK`@f$FtbgNAgDjvdaw{o!$#m57%G22A4GP_tSLkV%m zZskj^^RpFo-j2sveGH}&i#Ds8T8B@=mEMNx|DMUs?bWn@gO^H zR$4%$2qyJ78d*5#hzL}XH|7zaY*0ZA7BGtzdkW0h`9|)tz+68Z?^56!oyaYS;mDfK zko7zA1W(=rp5{ia2PDi~)o;@nklto>0&K?R0!{AzDX1|Q5VMr-#?fA z=8De?OuvYl!Wm7TKx9_5O z@w_e_pR*_JWTS5iwvPgzis z8gAVmOQ%R|hC;M9v(RrKfkC~Q=vr#V#8{7>43!SB&@=0W!=g} zoCYN&r@MXZX2GZ6T0&2Ll>nKN=>g~?vh!-ePT<%2I(Ku@ZlHSLe$fAd=C`QmHjqUGBCOpMBXr6v-aQ=|%# z3ik`8CS}N;ye=G*Dm-!R6!0dMITx6doSagzy4xJZlx&XAQx6}Fc|wLn$tW)F)qprr zz^7}wbpKS5NFI~M^?}T_Ow<&saq+R3H|*DTzPa*=nJI>u9-bWlVL~opy(ne^8r)vv z;eY%p9JPllL1bSUxUYFDg;OVsnGdtW1{112zr|u}-Q~aS#ZBcUjFPwpvxjPI^cxF3 zGdtLYa^V(w?d8-oM1Cd;bDKjy?hp02BVt>llN8LuqW=56z7%Tu*<15Wh50p^NL#yOaes9rz_V7o!G?crPs<` zcfrur`v$N>u>!IPu9Qtm2M2|Vg?Wb$lnI_md__=xK=^$|Y>V+&MJSc(Y#)*VXIFpM z*02N0u>j@Un(a5`jV+8u21-}x_08zwDQzCE8?WWb9qbCVX96%Gz*ZD zWzM&%B2x$$W`YUgxF#k@U}Dgc`@;|IPtqnRn)W+CJVqb(_=DtRGe4lc7aD5n=}M+l z+7dF$C~cfm%k#bCCa!6;ldQzW*(qz}n`}{e37OdW)JkbzZ-tnHBB# zsjOL!C5AVzJnmFhc%XCJa_Pvia&rf;Y7GY!6CEElL$S3Xucm`sNQJS}VBx58q3gXs z^Wa=ZO0iChwvvX4QmwY71#3zOXj8jxce5>T96o>gHWF*ahMsm+D?>%Az0*}w=V~D- zYnkOFq$(?dA17L3Sl%GDHKV9;53Pby<0=QRCm(zJQ|xVw=2K|^uqyr@iHh?jKMVG2nC!<}_6?KXZOi9mP={S>Cbl!Q zZ&>EbWsTbt!WwA1)^5|-ZEmwhXR+nwX1mc{*&&5}M1YTHy#}mW?XIsXCg)oSTjHjAcq4uM}Pc^Y~T?sq9!>KaFE`Fe2C;j)IXz>ZkQvJvSpa#7Und%sSp zq!hEYVMJ`ApD0&PQ+rX{I2C$jAXH5 zuv?uqn;tFO>T(#cbsEmg&uh20aIwwK*H_7}=i(5>UU8kOIXdE9ac=+WI+XtxuR z9W$+LTk7k1e(3oa@-2&2TyrQfA&8Ulk$#(xsB^j!>E>S9)28{1bfTjcMQ!EfVrTcf zCxmxUU_ulwAveM8E{X8q)vy!qx4j*T#S+p?Bytq5UN$$C!+LEuH{w?{NVJaOVYXtzsr}OF_1?)Zb2wWb?h$)Av%iBQ`_BMh2QDBZUTxHwHZTLmUdROnO%rVw(N z%tGihi=ZV-5-|AFV|scABwZ##t~utkON7KMHxD&Su@Dy!9~909tLwVLk% zQp%|=GEiyhc^Fys&8lka@mZ0+VwXGg`rw&}1*8quo zc|Sc}B$JYqRE8F)CF`}KHcfOZj$fhY>^z>X@#t}6`#GfMaxOlx0_XtEh;u3#v@%>j zo~P@ZFD{m=EuJaZfHpODmKL_koUTtdtKGI{3SEtL)5!@fi`o3zdQCb+$<^u#s+u%a zP@lQ??PP6FM=ETUMXi3^XfO8%8I{KfRTPI0=O$;JTzk=aoO(U`y)Zu;z@_od$52qO z)a8qV=FHR4Lmnkt7b}qpW-qa)%n9nGMk}usw(APnmSGq+6A_w+{V6(hy7B#Yl7Se9 zOiJq!=+@d~>s5{XnMsW`ZPRn@FmJ5N^40Ec#x7$K^8J~E=|QF^ig0n-1;RKLz8W5I z8p9N&XhNAWnm3pNJyZFf@~Bw)^|qC6!+qSdceoVU(M6V!*MEa_=fUA}w`c@@Jj@W= zoY&FD##NtQ*`Jsa%DKY_J&iNQCL-R6*(rekWPWl${fHy_J(v_SQ)Z?jDr8dK=+T>1w7;vDOJ)xwOLTe^=N75R>bsbtLJ=)Bqkx9P&(97888xC>t7$G##b1~ z+m*L;4DM4&q48lyUmh=n9J3Fv9uDKQ;c$sqLNkPA(xu6Vf+G%y@2%$>$8^Se(nycm z89j6p7stVmTAB*tc@k}*#)u?N2TC!MrA>~3C!`l0YjG!^#3z_-Mu|{Mu%k*I{8|M% zj_tbv{U?z%?edX6n37{9i894rsqEN@$z&7E9haEe(mxvC7V^~Bu#!iXiMiT;a#elX zKaH2RE5s@YH0rYs1O94w=(!y5cvu-|xSUjQTj5ycBp5%voD6=pkTye-YAn>1T>bWN zxq4X!=)T;)DZUgwr7@5`4a1udc~ltqh{uLa441@EaHQCI);U-l{8(pN!&uwye108A zntDC7(Jj;QTQdPa^Vlk%5~`(Iwi}+}Hm=R3EOG5VuKa*aRvT6rCJNsW%uI6I$%hTr zqoXD-@tsf}SBmqHHLnS!+qipQDL3RlKREb=R`?fOd7MZ-yTzcjgzh)J z6p+)gl_LkkO2bYgQo}SROmQ61JKW!UDPA>-dThQ+Yqj`q>vjmScsw*?`E+5i`0oo# zxHZ0)gwJ0C7c1&D9yjajR(hP9ZmX{DueYarm54j;ug?P`2s>`CI|%%quZ<#aAE_6( z-HMtuH=W;W7k*A(H*w1N<-GIuMMvTkMK|G3qu(UkBx?9?4+(3y*R6XwxYr$fGaQrj z;{~+;7e0TeIj#dcy$(81D=7#v-A_x-v%1|DI>e^pg*Ym0yAkfkr)oDFzEgp=Ivy8W zz~cWC`Ct5}j?~`&%JaXn#TdV~K-zM1dn_G3UabLl+&|)aa$cUax&?LHNzM>P@N2i) z4yT3Ox}DVCb=0<>$kRGfNZX+5MW;oj*xgqQwmP4d_WL#*Qys8c6o1P&J2LJD=^toRE zkG3##BTdtPSu=fn%LDUy1Gsq_-h@qkHLT|M^rxu&1{UUTq^t1o3$waI4YS`qv;OQF z=^>5a1f%`0*fhSLoOE#6Z;aSe}z z#mx9GupYxP40zeq6Pr82n)~aB7~3INwSrpbJ0d#7y^_-e^2r5i=MtYjfTRcAMKNO5 z{*`1)+PA8Ldan7^F~k+Wp;hD<$!{=pJ871eb{g~&nZJ~^%i5^`_v80_T3?!_$ulN{ zXrFLW)l)2g27+c0qZe>qu%9q$?N|vnRZ>a6X!hD@lK5nlL8s`K$tta|25a)L@-`AudDF>FRh> zuY~m=%`trP;8kx(5if)J^?soaV)nrenoxYcp{VzJQAG=*)`zYNTN4o!rHYlWkUE-{ z?qe;@*b73Et>SaSKG(y{Qf)(f4(1YRO;w>a^(FBdiy&6LCO=WGYtWtQ+w4-MPI&Hu zK(3Ck<|S=G=}tNJTBY}TQki)oCm;%RXL=&TPV?R>G7O)XFgO-s9u$&QebBkff3k6s zb^k|c?*TRAtgmUdM)ZCXn}uVz>VwMl*DZ)!DO~Z|ZF*(m1f6>M!{v_Y>J!UWJ}he( zq}l@!HIy49!^=k_I+31B6UW0%K`d@{o$uRxV1$~bw91MEA z21|)k{KzWjp+X3ZQ}mGRB+SO>0nb7C0nbLh0hNm)Fcs34-j%(RtkcqlzCT?>0(AUC ze8dh!p9wO(Ghr9*m_^e3PW4duk^A$~hPm7YnprRBX|ScGkLZB7e;Ojp!cf(LE{*g9 znNGw9bsRp@>w2ea|L@anRt}GhAb46=ePlwc{A)@%Cm8*I+AQiRx~up2$04@i^ww{D zt8}UXKGtNXXg3hN`@EBofE&=a@h1T@*~ZsjtAa-a#Bt$>>)*emdvP8GHb#2!?iA{D zKFK)$>G{AYW>R1QD_at-;+wSK!sP4w|$AbJi$qDfvN z<_3xuM5qf{6SpA6MGyvJ!DnOegUBgNMZZQ0h9UDo#Qg7vARCz*FfusPM@21%Di^^J zz@p4Y_gzX1;`@rl19hx_82684PPmGQ9a$@UWRO3|dlCan zSY6}N9(FH+8+Hp0%CrSs+1sWkr zgCP=+U!{kv5C-vY)T5RJ`c$DhH<*ern8^NrV%j!3wZI+;FnhwrDBuk-iR1@z94_Y% zIW*E&sLT@4EgSt`Ii&2hr5`$qK=oAumEU5YLguq5BA4|piQIR7P%iU-J4G#m6>toa zMX`#J|6feb@b{TMi#s@X7zMuYcmAMKB)ycA{u8k0cP zaj@!S=<)OaiT}NgCsNcDnY9Ph3xeLq)rUB@3*FiNy;1Hh|B3IOJl>k&uM~bWHJXYQ zZ#FSCTa7F>O=0PMK&c~9yx6KGsSQtUC{{MHk?#FHk-sE&wxBQX@zo_*rAZf%S zWyH1SNR7?i4$fq!yL<{(SM598U8}*U>JvYti~gWmqShXA*T&8Mzj<*+1mou3_KR1o zk~5pLUK-u}6%?*~P0GXHH=53e_5{ulo8S5iez($JCSG-j?6)I2?{2mgU3%91bvs`j z)^E7OTWC@9`?D|Ne+w;5L>=iV^-BL|r!UVaH&K{V3~|M+#sj1OYrs}Bv0jF7Mg8G# zCM3Iso~q1MQA+xFiXgIMU;UY!AAw}55bIGGJ!3%92rbnog11oZwx{W-=lwLQMT7%; z_!4j@E_Z31d+W!8@Zm=>gn#N3bQJjpxay+v9S((8`JP0+DI;n-dRyF=DGx0C210CQdBUH8s z11Edf5Ui>MnPq|w-ktxbGa)DX&6}HE=~4>tG<2gqFN@o`wZV8h?EB5@ZfKGBJ(Zfi zJsp!ZM9Lasby@%T{~TCRL{S1N#Y;@NV84gaRL`+`PDxf^&f7n?v3hha-R3Q);LXPd zM(F~Z)Tih!u=$z&%OA;Zv*>mN=ZWWDksp3O2zY7=BV^->AzBUuX#G-=z3@Ja_jW_C zk~K6iq~R^?6yLOVB2amF_tq&_^OkP>w%}NS05uyQVT;A&hR!#%Rxo4HRM8-A@Ad+L z4cyITIel+`!8Mm9%_538*}YL$%2w=4@R$`Eb|5aguM-o=c_ZcXKxuCzv{J^SzHoIn zmDRNE`fC8x4#Ih;J0^1&P24(AgwGCCo-j^>^VGz9gQ1^r;;TeHylP}t-veov8C^9} ztf&4r`T13Kp5-Rzy%_E$oS3CTHy5NyCnjd8xU)YhlP+hhHwxUNlLm)WoomTKXwXM+ zGZ1qaO*~D=m0Km4aZlGZ-b-_%F?7`lp}PsiY)>NEd z$%pF{{^v8m^+8G|5W@J8xFBwa7}3T{Wb(iE!Uc<}ttUZAp4a}53!DhXU>dgTG57}c zHwms?+pta&d|ZAiJ6b1QvA6tA=89KXPaGq)pAR)eLZ#g$^ zoA@1#q_`PM1Uh^oHPs?(m?a9)!0H7$d_z0N!_ebpQgv-y+^(SXeKtGV}6Yg8e zht0E?BCSu4W^tak_t4rX{NkL1(c@}fY?iP8!a#B8W$`9?$83AGVXU$IwBWp-AKfZI z3!h$@=0wBJA|BtaFjvT%fb2CteEIrq(sN&t3H9YnE0lgusioXVrR09ZoGx1aNgPBkEx1d*Aahlx{x(kV|02Fo{@*ok z1LNM-XkmfDKGRq4l?Q3m+N3mcBG++ZxpJ|+P#THO@%)Aw1EkPw$QWDFQP1bN7^IBI zi%HESJtq~aP9JP!PgJJD(wyzM03e_@y^3HdoDs3q`xXv8nxP~dPA2OHh&pKhH${SHMC z>9~+U(AlL@Afpyyc?cj9p~ax{Vc`y^8d?d&h7}Xk@0!N$&J)RtMgiRNhz znB$isK$KbLM7#XMVu0E-;2ipF`AoXO)m#>m?;v)qv@YrAP5D+KCR^x>bQ4J0yp zX#4lm%!C7i>5EB|`8xUht!8a&&{cmMfg2ha2_3BVmn_Ak68;=fc55*9^ z@A~w^UtvuvlRvPEt7rNZeIzE$=U-^|%-4vT=LuX4>9BdCf~0v6&8#1HQ7mWTWUUxp za7(J!wx|H(!R?n~o=FhX4zRvS?eFvbY17I8`Te=YkbNkC z;?EXf$-ys*^`jG$WXV~MdyMgFFX*aBm(4MGX;pvVXF?CgpH+V4BP<$KN;|cOy38qf ziAppo7@*wks+>_zDhQ49d+A=qu;HBIB zzQE=?YeI5>n2xmwE3}SdSrxg0o1ucqde89CXG2y{M}DgFy8#lLaBF^_;mPX7E%mRz zLB66-j4lga2T&Nn*srLA6AjWdgf*ODf}_TdYs?~lGQ(ph3<%EvY0IVKKnv;L}oG8C3A&|jmIwQ#uE`rsV0{wd6=%N63k-{wtr z26kT#+rj`G?!k2QWfE(?Ee{6WmtEhtTx&@Tdp!mHx;x@%RpDoR6n-nY<&1?f<4@b6 z1&sHCdHGnDw0j@8N4Knsf6IIwIl3XtMv_=~+aQ26^s!cQxgE<*H7iI5u&$JK#l#6- z2McZo>3ChL#mQER)Vs7Bdtftds1hvgwdCus(hYUQ*xkMD=e;nXUXMoF<43;M9x20y zbppn>3VoVFPth%+LdSny9h_UfVwqrHC4|}?_`5=UI=&i|X~HVcX!QFg6_M8mJ5n-h z;|SwfM}A#TdP%i$kI6PxGlX2+UJ%9)?PuErRS<5hk4&tt2X%^dH6>?%AUjwlpR@go z$=hVJuas+qKB|3j6eAXjXt>(PX%>>*m6hTsY0OZb?wdA*q)|( zN!;BZfHjl~LS3;eDA}I-UTw{YfL7}vori_9TrL{Vkr1&Q#L39eyA)VQd%f0|nq%;p z!NXVsh&DGn;nrY8l`s}qk&Bk&`?`XTlI@teOz+(k%d<1Dl$A!VJ{uzl-)4opu^gyH zs?vtdGhj8%)McocxFUBjWl_JXWSPY+gWQZ!6Tpdwdy02~D*g{ycsx-L9iRSE> z4cv^L$FJIMiQ0Y+?mk|_NK4vT^DXnrtwmrF@nb`FKV&Lyq81&77TxPl@?j^#7ILfh zW|Dvnu-&p0$;HpFwH2yDe{;yMgoI^4U7CZ|SE6GIzdB*i+=7j^{uaEYJ zEr~2=wCDCoeuEYE1zN6}a)~xnw#`~CSiHII_^*7SzKfqzz8|5G+IOD2&pf$S&^{fT z^Cldu9H!#MB6-hC*X*394cDAf2~FgoR5ZI~bR` z55{)i`fThaZQf8lZ+zR_@fSX4YU?hhS%e^`?~D`mR;g%en~3bnD-^SB@h@@1s-kn| z**}V>^eJ8w54YI(rBKb$=GAlDtiPYbHL6=>2SwV$yJahjaiSi;8s-w=0|K11c8$&D z4k!L|4$fFDA>;m;;K^AY=*5SL3umM?3gh6s&7*4FtRP{)N;5~vmfMqvW?T0>d7bBJ z(588v&-={;{iT&Px_ZHKckrSCv*($6kLa3X)AA{A57Yby-nPYK=NPAJE6mJwOK1L$ zLZs-~>2v1`0hZosQUnu<=+S5u;W&8XZq8GuI5bnt&4jJn`=>1Tp54KA%tfjc3AG$Q zA`yLF{_fpcpLY5`%M9pCtfE~*fANLgwYM*fG0U11FEI7Cw93rin{Sveo!vxSp1s@-`*QB6l>*V&(S)2M1dQ&1?u4tHmP}hAFgoWY$(a~JuntHwny>tkvy2Y znmcjQzdL*)?w@-yRi@4OP6*?4{YXK%J+H|(<{@xupjNi2YjT-J6D5&6basaU^9Af*UDecZ z%sN%)ppK9F?E?OOjyE^6YF|4hb!zi9XepU>Z15}zYRb$UJE0_Y7EIuO7D=Akz3VgG z4WeX%?hcQXwE)UdDkcr+m&i7-X{+(3u;UTVb*KB|st7`+{}Q`D_!3nHWR-s?1b{-@ zoqd8r;YB5vLmL5DcK&5mF-5zKk7<{pJ*XdZYN*5A7!~U{6px*Be)&URvhExRIScw# zKCwnu?-OmSuE>|6w8S<0_)&yw$V0Z?r-b*^*bF1kVU7jy^fcjM84B>;r;i>rF75kF z(zJZ|iRk)K)ZpStrW$SObWL%?0`iya=ef$M0z|(=04pMRVK^agb9?tm4MeC;;dDLU ztC6O!Pk*D=J~e;#;S8^bDh1MF!Up4n&O;)65_D|4xD2W_ayV;jh$=_2^1 zhw}A=@>ad&@2F@WxLzB7TC0w1L$V!?jE!9r3CM(2W;Lk{4m4bEd0Jm~J2ZF48GNz# z9Mq&o_pCm;qzejt2v~k1xhO8RJ!_a0y%wwEg7rm20BEFQiI@tQ3Z362ryHSIqeT6% zmXLsprKX2Rh&BF#F!+Uo2C+Vcl*N1gL|R?lh80r+@JZFCqJY4pE_ib4cQ@I#sOxi{HC7Gx#3CS5=_GY|*}daMbZSG(w1-!m z=kC7Wk$~~3s>%>lQw&;>-P`$quET?|CnKz{9y^vlg7dWlW9(bvBOmx%^d}|~l3k|} zUp&jbGa^?7wS9U$%CD6MA?`MFtfQq|7N5!kv%cK1e;vG%)=-^bkgx$8g&z|p^_fgx zr+qo70Q04$8evs+$1P5RslA=gXaKqX)@m5xL!U5;R20Z@1LLN1h%`YTPj|A!Ut+{nE^kh*BnAF- zFP#z>$A3iw^aC$PgvZL8s%ALZZhQ**c^33yqHJmo1$my@s2t`r=; zph5S+cq5#yHB+bJD(=p5FDX9}mj?oKe-22`T0)I(saYD?yV#qEY=?iQsc8qPIZ$t) zK|$g2MluHOpW!u9MR4^D70)s`I}XEko^7w>-+9fVNeFnlx?nc4__%zIQ^9w)k^k)e zQeQ95=%ahbBXBeKouweXwq!v+2Q1lC-D&sCS3n!!{pWNrIM!$*~8W81q+#wDw z6B1b(8fEzf8prNi8k(dVnn*1RJkji)M#t2#alUN&ByPv~BfX0c#M8zo5+vGpGF~uq zLPr-TvpQCL->rIC)cw$zK1v3U-Loz+cn z2l%#+oiDi;!f<#0Pi274(p0B@yn_ z*S@d)jfi~XKvpP?Cvkfa+CiY#Hr#3<-lJ(AthA+I074j{bO9 zU5OoSTXsRMS9t`hMV$_ATjKgKK@CQEhDd}*wwb~bnjMo*==y|zG}HPi@QzxqPt=q= zBX+$CwmiFnLNGm8E3)FbQ1I25%{uh-2XDCYeCdN?MYds;U(V!==C!~Ii9tohf!hl2 z#&404%m#Ab(7Sh7w#YE~GyBfZPd^;zl&-$&QU;{8Js|rMy(Sp;$6tSc!G6U&-&TVS zJ$w5HXcQkT#$G4RcXF=2wu2q_Ubo_N$%W1Llf<5QTGqlqqJf~lGC)IVU_d;G zLK66jgjTcUy`Q`Z3IrJhf|MkH*WWS&L1_T+JY>%WdW8wSl7;LccpjiauMnYEJWy4C zgBNAUo-_2ywNb{E1{?|5`vSe{ zx-huF$Xtj&1F@*V>kxCS&F(-)sFK-WJSa{I$R-KoN*E=B))KWmQ@H5Ff`F1hEdcnQ z2K)=M*98Di(tv#u+O)<&GYrYOpzL6fK@7+rp=C@4SoD7x0>GmQqe5>yslj5>z#K{7 zJpk-T1HO7Pe!)uN#8R(yO&qO(?C~XzzJu1jYXQZ8cmd!u$etf$FBr0?1a)C^feE@S z$AbLF2yDC4teovic%#-UV@*A5l9%*#MO_5YTFRw>wuz$_&|31AwI3kSAdqM*=&$5| zcmkw=o033f8gLE(SFlRb$94-aRVs6d@B0qx8q|q%W{+SP0osTH0c9=zN&(*lghc~Z zrhbDoYDf)s1b_pifv-J#3|Oy~4A;uH(VkWQ0sYAJ{HQ;M?us)5pUAU@#S!7_!F$*>i*JnLukXT0rq2 z2T7m@ZAPi(c*JX;r|W&NaI0DvcRrz z&`w%*g*fmWyvZ62qL2V?$^hd5;GDX|3~+(XK`jl`p7+W<&z;ppfV8Dt2IwdYT%`HW zN<65*UE)AP$^SZR$^ymAm0g9RFQgj7k+lh5>3t%VEs&=eigJ}Fpbd~cPAJYU-7B?A z3w(?3`(+=p7GIJUU*Z9T;O3>lV)Mc7P2?c(IB7{5KR4_QFhUQpAtqlX~1tH zRHhE6qXxg-s`;BFrez(w3k(Fgy|Dk-Ps|wr{9CYWTj%RA)p#Vz|0M zipeMFc53Hc&A^B?X4b^#{yins79*@CpZsYLfb`kuVrw5kiQ+`PY{ z5?lCK>sPxSY;Bo8eRzKgtZrNBwcq|p1e4vi&Nr4<$#iAZV`pTpcH5P17AhS7xX(}O z(r#-xHqqyzPi*1o^m8i(m4dX{PN`^o_?3_lbak>U^A#IoUoQX`SR6Xp7ogZ6O9`6yGRN3 z5kYH4Z;cO)z>j3=HoufoSw%XEi}MI$BkW?)bqmbi51XMEfCU-Gdy0$P*$dK;hBhm3 z376dUA3xMC*@NCslXR|`PZyKV{OdK|^MB<7EevF3i)@Q*BVT~xi|R4zPG}}8W4@tL zXb=1jkY9AeEHJxz#b3P59(2>^-FFEw^;>j!0MhXz|I(sQal3)MCkGw27hP>w85{0& zJ|;Z1e&3h4*LXQG8RPkK@(KwwKV8PI;b#xc+$s@S@+ZM+#PX{ChetuSdOX`!ebVJ> z93Lhvtxu#6zA47qwVy%$#ZSg@V^mJsmO|Q=Pip$?(UMO(sdq0eCKF9^WvU+boUWSd zZ^t+iuR{Dkr`oNHzbQvkK8x`lmyjz)KGu9i;Db3&4fa&XvU@bO_Rj9T1COH0d5$&s|X zyDEits+}m$udE)rStHJJGGT^Y`Iyk~+6Lp4+{V#8B#fX@c%W=RyFw+7<-7|Aml1^*`xndz2MhomQ>SgT~AcNbHHY(6I=Qq`8=<@2KV0D-FpRxe z`#?X*bOolSv-8!w)XChnXK4RXkZ*1yg)`uxBeG$Pa#1WU$Z*mV;5W3G*5;n6!9@Iz z_C*{DTk>!bTz({{7b@hqv+sBNkMLtJ5Y2x78Kw`m78UY5ZFTrSpxETLYcRBbc<;R5 zh-i{4{o^kLZMU1Ys(n^O%CyPZG~rgfDNcAa0eiu1ztLlB+nuvX2!Pi++|}WB%URnr zotG&aKXEcTs@7P%gJk{5;k~y82@fBCKCxyr!PytNuj8Y(C$55d8xb{qCGtI6#!^@# z%d)=)4X_rpgwiegu>e0jzXN_8Zq>M%R)`#zj%}W~2a+N{zM<#@ihedYFmfOi0Hfpk z)wWfZ+Xd&GvK;#vqQOq&6;pZF2xCe8%<_YcX%&ASEw^b^s!xs_ARSsqqbAoqAe%j=ngT zIh88xzMqIq%{#`gp8kjX-<kXk#61hJ9gkr#@N71Q{tehDTRqaC<&Hn zcDnN;!vM+hyiC-#$m^;0u5ktfiR}c{lBW`-U6zq6Cdz~`M~@5=aVrm=y&27V6e=H) zQ;c3(nyk{OZN15ljk|{6m>9PW}p|tV1w~2d5 z%RN_#svh^1yg{m?#|E_?#3}0PBr~YJQU{3N2r%DaimlnF-~!PM;ZnG{g%LO;&D{`O z1Yf^(CpDlWvXsvc2|FjIe37@bv9ysjkj>7*(G<`U(92R+pk0f!M9#L~))dye09n`D zzgJJEQR6)Zh);``@wtHR!Db?q!AnLKfQW{V$gvemli!QTl;RPDIgG~~j~>1`>xcsY zL-4PmZ22{brqF@p$E6^VkxNp`(o?%4u|urV%~m6jB;Z0Y9o)N9>E}Y%=m@s%k}Dyh zNOm!|`AgeSmr{eFW^YAHEZOrlBiNO(wSimbgQGPEecESLU*B$W(ye#cRlmHLs7g^5 zkIlzOg&K{E!UYB*$G_PE(g|t=8Qeg}pDr&C);Z zF{+wwK>svf3}@X2~ zt@xloY8$RfayHjlIaEZC`*gs0Kh>D6cx0dqZauHeY7H&y$e?Mswl&tLCNW+jhmYj3 z9h3NR3esT!xCy1g4>rVZjYjuZqvG0e&c#-{;L4>W?%<%GHok10i55(Wy-(g)ibBX8 zh2zgReLu(j{R6RfyPRB}Z&vd%qPpq->pbgys#gp)4ZB)*7nD zgQaZ%gdt4M#bbd>A7~6PV#mKD_$_5~dfJtyrV5yH-uNJ1Keow!k3Sq1PH}I3bb8x| ziGX}p;blE;aBje+u|R$lNft#mcnz1W@7YPTOuCGF>7Bml0l!wt7?o|Fs0a8G_?gIA z8Xxr>e1Iv*myet0H-+ePbo$QA-pZ%9U&TU$c4nPlV(AQ-q(kYV6)DU+&1BpBkP*ue zgS%12?JC98Hl~)$(UI(W@Vw^jt*>8GX!3PFuoM9oOF2DtIXx*m>8+4_A+W8ZrR-!& zH76OiRyze4x*OSD!Zs~3l;+VeR}~!EEk#V}-1^iqJ{acVw!Jhldb2#e!8tlI;ypUI zoqEJPRo$9FGyWh|*AUg=<41CRl!!9HI^9==^V|!kV4Xv5KHw)>8KI4Dh z6&3il!4vBuXG?kPfRtKs{MiLsg$O;TW){!Jy(9}PQrHr+_Qz*JOaegizVGcg_U@A!#+y?)yXe`U3+SZC}*W1RH@;*|q_ z@!tbk<(cIqlAXhCSFd&$urN!{u$;@JPuSg2&<8N^eY_hbGFj8|*VRl@;yaQj=ltbx zmtIjf)cn&y(AzE;U?~~xOpy>i+2`BI!rrQf_%i{!M8+`Xb0JNVPMEK!{veI<4+6C0 zOEMH?dby3_FmAz80}q+q?c#g%sn|jwnbIli4Da{8Z0n&m{jboRcQU=~`QGS_RD>O_ zRCdT-s(&p6`oDf^YUfBgJSQo2hEr%_1|qMjOJk@B9JcP*EJqoD3O?waVdlOEG*o*i z&;Z_CCvDT}12gz=vpat$UQ-}kh|W^4%hTAzPlL5NIctJ{MC&zPw~ z^g(=d%%mR~4he~UDJIP9v99ftRao0^gu-6a3zo2Ek3@CA(7+cbl>P2^+VHkp6wL%L z=7Xxh+~e0hBk-2P?rb4Tf!6zHtgkq{3a76iZ)0<oErI!4gk7Eg*y`M{dxY zoE@_=Xqz3O6PZs^! zNFl+?*K@wbC=B2A1HApQz3S?zCp7zs9O^`$GTS6t8Km@}HMJ$mj~)=hV9eM4t}Co- zrtK|o<>}Oy+N>%{SsCp04MD+}$1XHHuA+_q}>Z6!dF!)!t4~l;Ibp-J!sCZKjq(U zXTSG3IhbN$O$5SzyavR>3s$Taqym#P<A$p! zRAeeD24b6{VeGL^!p2C(1jZ3gHqryE4R25Is+9Rt2tAAF-a{SB5!krOyYKaY0kNz+9nNg z)L<=T^7xKR0eJp#@!r@`L*FYj&oFmZE?t~I$O+w8aGSPwDmOzKV?v@Ar*}ETJVYb< zm{3co4E3bCW^Y2K7a64uWfOde6G9xK6GGivGS8#n;Ea!LAj=PSqS%(`CFw=#-J6eQ zF4Z^goe!}^S;TgrIi>ZDa1z>9>ODXKG>uh(pvN@A;txpMdLirDa=%W9wvj^^P<&9I z=vv(*`2GZNe7I-t#SY0sQ9~WVR-($1A_9T5bap4`Sg2SkQ|~RY({MC#Hl(x_np3JH zt3%%~vu=C$KA{Sod@3bf!@{OWJe)?IMNxoKd99!<;=1FA$}~qiQEoHe9tvekhJ>TW zi^b5`hmGoBtcd7%e(B z!N;_Z6LjTLSzBfFyFXWu}xBbHg!zuN1NVRhA6w0rVAofTne&Zi+IwRTkY%%G9Q61TSiF3X5;x~Mm ztR$LHvkb$0RJ&)o8Cq{?_D?7v@=)@-9eC!=Vz$2$+H|(R(5N@&KQG)K(Jrx=#4t_qEU$$t68|2&eAx zFIQ%%hIp_Xd|nqTi7K&`!_|;E5=Cj%qsuY`AHPlFfWYfv%)iZ>O<^*H1AENkwRYzR3g$zKuG9J4qxRVD_)J&(AR`I@7z5@-&@^H0w z(F_Ji3%{bJd+6Q-&Z$2YLEBW+PU;Dqm{`&ppTRU%6_WrJ%WLa++RJ2|^jfZ95yFH2GI&gp#KZcsLEwN%od#^X8zb_WN{2O5UfDeqe;pZorS9~YmR zo;;qg9^)V5pPqK5pCJ#@QZ&mk%%R6WKY9K@yr0 z#uC;NUnN|7S3+1(S8?4L$Q1cAoP_^~snEtLl7Z7xwbQjTw3C;znt7cScJBJuBaw0A z=_!;?m2u<%$%Ay}bolhC^z*bl3>9BjlIlgQJ!X3H^W=t-ACsl8AW-LFo#D{of?=NF z@nPHKCR#`3jbFa(FsrLe=!>z*62y2n%p8Nczaf%LwumE#*1q~MUiXHNz%Ns}>BjPuG1|a(nos8@ zqU9gB%TlRJg6{-3lf~OLCT`l(WGU%I@F?T?bt9I-;ycOJYHCVu-T_wM{2x4ChOsx^#R@*2YJrO=BHnAp3=20_G2*b_)!8+L<#Z(|Y6Hj7HO}c{n5W zhXiG) zGJAa+Zc6c09yXBN|3o=VUYLLE;SiRjmYP)nm7pU{wNq1_&nY{ejAewidf{EEj+Gx^ z)E(xrcvnhS#ZK2emngYD&ap%rSS>gbIZWs9ow!=)-0TyPuZmgn95FScjrIPsqOPox zckZAMuw6qhGiJ5>!Fuu0F5L2Mn@`VYPu#d(4t|Gv_X#)o3AI|y;p(K56!jNj%kU(& zPq4zIbH-&;NXzg1+QOERjQuz@PK99!y3v$!HJ64Y8@vP2{QTOx7HqB)zD_HPqd|w6 zWTz@$i*}-d_T2p1ane-#jbgjHxyrZp%(Q&zgNC4v+#aByRI4Cre#U}MgO+){Jim5& z<{(A=j8UW<>NA&_{_xnLjIM*U?+>n^TSovmX{00JQsGK8=X~mcFL}RLVqSX3i7z+O z$Xp^^uQ+c|Xu~E{2$hpevz32LC9`(uYd`tsfuUn1jd4VFzbdx`^e)VmljE7dKVgig zThKVwbm>ZRWQ4_~#*ZY*7iOI0x=^H&X1Zo3DQE187nsNWI_J4J@eh;6`gx1UYT=h? zG2K(XiPv~&%kroj?awbGo95^(CNuQ1Mpl35Iu3JmXvp`-ygdeSxy}p8d6ZrfT>$_Z z%U>Eu5|07$on`q^Y&@Ti7mA;G9JPn?8b1$P*p5_;L_7Slb+9IX?6e^H=WVWWTasc@ zlN*LBe@E&PS$jaoFuu3pz7_4hQoLi7m!Uzj=+809xhFOnN8;i&Q)1@@lcLjrKKXwc z4q{7NN+~l7Oj<2p<7N`g#8ipzGD~sLx;%1km}u1>C|R8cZItKQ)cGNc|NmV zi<)$mJCALB+43L#3iXxL_4+b_YoIxkhtBE0%4+HJ`olJsM`R1X*#C^y^+GZx(L3hA zL)bFz{r%GLAX)$GRJlohORA!(>iG4^AajYP{fYYKBK}BM>05f%4S<<7?*&$^^_rHu zTUBkD&l7r!fk@L!ifE%s1t#yOeZEg@%MXR~qK%9SD@iu{7NagBM{zYz!|eD`#=OP& zGX&;7dUL|H(hc9Ebpg)`1xYq(XX@zRKb;(^i>wItt`t_-d~I;+61Z~o>&wkypH$aV z9z4~&G+AY^_}7Ey`hpy;_ubYzL;5Z6>+E=2Px$N@^Nj2>fxh=owMcV_yiq2<*zP5I_DYY(~B9m;4BNju7VfvHH&whv}4WTnuHrbUI_9`Wy?U}a#ne4cyYy@5$z_ZE3CQ2Dc+0f6I=*{2^~~%33TAL##d?IH4Y;AA>IwFPI>+XuGKaJPyPI839=<;qZgu$NKlI#OFrq zAc&?DrKmuh=8vOq$eGS;s~9;ow06O`da2NVK&uU5M-G!t52W1F^(i|cY|a6nZXU75 zBQZJm$a@v~ycew`{hhB<--Ke8=r$zQ;{Du6Z6&8;-;&WC2PB;0od5R4z7EBwHYzxE{>J=BxWKsG)6)J8#X_JY9p)+*B{X@e6uIHUvOV=2XB%5m}l+r8$a-F@NV+v`1Lh*H~#oq&05Q9vROaB zdb}*PinEGy$GU3YxT+m?#&^eiXMd0GA@Bt83lFdHuiDJaW4)fZbeN{N*LZZ8dD++7 z*30vw=iY`YIsa)+7>bPUmtJE761)X;IW%MTPgQI+w)k)xhT%^=W3@&NwM2T?^3 zQzi5u0@dW5GB)mjlz%1UBe8FDIxYR>wNxPl3Xn>J>>Nb@M!8BfsZAea`$&zw1E0un z^$?n?o^qv^}WC+xZ)bP!+!nJ??Fb!PsYJK?gK;5Of#`<@EcR;WvE+uwstoXi1>Z0g1c*5}n+weMGQJMfoqUGjFgtDc3PUxfR+02BmR z*DROCo+*Z`6u9x1zn6fG*LIg7obI-_!jt7w(rWdN0?kd~>ya4E8G}b`4Z`vWY$TO0&MAlZ_ zfShyEBz8`OG7QAM;Syyio)}#-xnxN+fV2cBgOzE4@auU@#@~WL%+;!|VS8&%#yYRN zpunbNYu}4#UAWP!(QB(gW93tiWm&C8U^Qz!YY6{aZc5DOMkGCi{?)XVG>IUT2sqj( z6_dGu8V`zZs(5<4T^jRTJhSU++|?`!AF;vp-AP(56I3{eU`FEi(xHfnEI4={si{#j2S8E`F5ogCCd!A1g3c(ifvzRWX#veN+9aqV_u_ zqplXMHyKASgySi?Wr z6zHOA%-b}Ol$P+=V5n=I%n25MzB|{b4d9t}cL-gaa(4*UwGq;tT(LpcT@|!p&|Ph{ zA=F*0489nCY*3_EUu*$70 zJDE~nSLgflv$euO()HdSn;Zk5Mo$REZFDH3nqGS?qXEgHoKqA2UhGZ`37Zk0G1UEG zGtbWo;kC@~bzW5KX(svZNQZ1<d~!TLdzZ^7H8;H!|W@DvZU zyWVy*hugw-g=(xl{a`;PWt6+TcD)P0SliAy(zgw|2aY!~!W$Vgw_WcoVuL*O8LKaT1|yazO|g`YTt%bRVj7X zg0p-s5B<%YC|N0m+1BdPpE{mY}- zEQuo^p5b!ECJ%j+Ky;-Pumz{(R37S~MWdx3338=#Sx<#vXwmTLN3OZkxeTR3^tEX2 zL6K`7bXQZU5P%jvF*n##p`EQY%3%j#S^=FvvY%l`jYA#1kMSX&pH3;7Yp0x_ zVImrBC%2gv7`-`w@2mmBT^zu0R$Ieql63Z5!?WXCjb%t5V?e;2mv-CAEi6WrsK=@3 z`FJMfa}j;t@AL7az7!!!)ceFAfWW`GLfW}Shp3H*tfD~)=~VRMHZ?NODA<2`|MVsx z#Aj0|0+KIt6PG=QAtvp1QlvVub~0}{*V}Vf;U@R#*AB?x!mPrcXxHP1rG}t}!foe| zxFURXf;8}eUQhUdUwsgG%_V=igIM+%qC^>8z{nTNd_GPh;>)JV$x&(&RicuZxvn$&(ax^Nx_1f2NKOW=v$xSv>ia>`mw5Zi5%O0eEru7IxAO?+Dr4x@^aG9Lu~Y zY$tY{*o3lrDr`hG?c00mZNxPlkU!|P1J3(5_s=gTp7ljjDYuYe)c)y|+p;hY|E!#? zuFK?SJhAb~i@Ikru}QXz=x3ZR7R48#E*cj9RKwjx7>0lH=1vog+#kHTdk;f+Nf6tK zhvB~9ef@|FLNP|kxj)NpWEW8_8sRu3Vz3w;~P3oUO(6*SDPj+ zs;b|mM%S0sJu;`?;j(TC;E(MyP-T*{#D*CWypzDs5*LxTlg;fQvdRfTi-WLJ+xHMC!HY}8kp(|x~@4bMEQd06Bo`8l@^6VbEfZ7BY z4Jy&{1(>D)Rc%r9rvs1TyA*)vv_`A;NMNxb=EdP#m*6!sjE;Am$qkv zzX@WHUstj%3NN%Cqd9vNu;?}BfYc|bzn3^gN;uTmONt_O2 zIZxh{x8!YkN8UBNn;)1x%n!|;W-qh1*~jc__A~pNADIKpkIjMBhgMIkm(|nm%ywZht9ZL~I7o2@O@R_j}9o3+FG!TQnKW&LFB zwtlwuSie|%tzWHu)&c7`>y&lII%i$5E?HNt>()){mUY{@W8JmxS@*36)(2UT` z(5z5mXm)50J)=Q$`!vVihfm>XL22fEny7~U{a5iS`uF`JUNnM(qN!*Ghsm2B6`e(A zIEEgt9~>7W#Yi|M#))xonmqd%@>^%gZ=EB*b)Njz1+h}Bf{WxKE*nLSVhm^op8HIn z$@2LepTqLg9D4!(Z~n9Fyv!#Hu`4v+e$yOePT<^}Wp3bU&F{^7ya3I*x1jlUeYfNd zxWHEubyEM&m#vWVF7+zxP|)F+FH~kLh7+XYz0N zo}%AS-x+xOe$~ z%^N0rJM@d=(0_+}#j-g1&|{CJk%u04)WVU+eRtgP5Jw$v;D}=~jX9>^h~oij?dzzi z&9q)V^PRkt*KgX6ozhNar?w;PGJc1HUJJCpsQo!NfL&SJl8N84HL zY<6}#hn>@o`9p8}@gMtJ6;!6O{*>2U@b`XKrBIGa`FGy;J8Qf3J^A3Bf9QqxqaQwK z9kz~I$DjFQ6{*saPyYRX^~#U`-Y=^tl}@Gqm%OvbH`|Z@9p2g5@(=v;YmBor7|f<; zIUs`7V)fu>Rf!(asejE{sdw0{02AIs6+Qny-9O$xiRJQ7@y}y<{R{ldS#keL|29^^zumu^ z)%Wl7?_*8<2mOaxGyhTlan=Gy2aj26|5F?t^p|n6I9n}C%F=9oAZH+kZ3^TLMo`3{(nKVm}6|2CA}Mff|7t?59Az zKs~lQ&>+x&{Tzr7#IrqtW`UOMmq7bKdv+j*fj>KFwlG_=LuPBU9Xn!nFuSl5W`dc( zE}^HLz%HYw-N0_6r@hCXSlnvFrPajh!QZr}*e7|PQ1wuCz9lp)G@frwy3Ylp(nYlL z(uB2fOIC)JFepa%B}dU+pZZ=@j$U^brHgQMFpF~i*E;vFb@fJFzSl z-Chw^1dmynRYoUMomB^ejbr2JZkz4!rZ2rOgD<1+1z#rLi@wahmwZ`#FZ-f>S$)}j z*?l>DIejs{T)y1CJib_8USB?6eqRA!LG}yV%YJ41*nW0^9b|{tVRnQaWyjcYc7pxJ zPO?+%G&{r2vUBV_yTC58OYE{C3`5)zcf~z%Upx>G#Ut@pJP}X-N|}LG(XZo#SbDF6 z{vV0-9FTs8oS?VK87R5zSkv5~?}XC-b#(xxS`5dFCGcZHDgBmKdcTDY)$yaC0e(`@ z^WhFO#ZM~wZ_86dJN!h@dk50c8wb)tAN-`lKFT3IOTiq-fIT%myudQCOpuB0l6et( zZ7s-*y*3_RqSf6j)L&m_9a%?+ru$~HvaT!vvaw#Q7xwHSYzX9JpR%D4!$z3=jcgO-W!uTa*X3~IC8AOE8xhn8n1>U$2zHENn^Tx?_+2Ph@ zDApyDr!GZ#QT(m0k>Is3rZK2SKB|!)$9eG}sa^pb#k~tbs+URi3Q@gMQN1F}MdlK$ z(K2%-WS}}`raERZ&zTn>n|aB+iuKYpjis9Ar<%r5O^cu(oP}}1Y>YNaQ>`mftt+E{ zx&zhGJ3Xf76*!L4JGiT`HL!oSq8bg;b!2s@-$QZ<(3~b`?O2)QGS2F;+`#A^GD&6F z1O7l3C3p#xg_ejxZR{ht_(?93dfc`iJSOH_u=O2Vg z)b^{X?Y~A({u`_z4fuvMU@d9DI?{mkqyZa912&?EzY3es%U_4h=;`S`GQ9N%Ex&t0CBGOt{E38F^KW`DJr~gL{w~Rv>hzVSDlI&1` z>`;*GkV$sPB0FT09a3b6Qji^T$PT32YQtQxC>SJ#4dV4Yb4j$rz;!E87ijc1+864?T_jICzt*;clL z?MB%gK{=dfSJ*A~0FA$&+dLJI;+c3fk3s3iu^6m<4wOY6%Hz@d>AVRmN_kUOjPhoz zIOWY*3CiDOB`I&gN>SdDm8QHE_B@@pW@RaF!zxhTo>ii}BYISwzr$Xme7rBM&W)A6 zD9Ts)(&=113|Kn%r4HCS_s#PI^hT2;ZcGpmZ}yQ~_f@39(~j%Tl8I+4}Hbb>DeIMz1} zrmK8<2IOG?8wP8NT8;HIFkKhG@dnn>ukUIKNKBsu0+>Dx1c77SaP$)>4LOsa-|PZC zVE|hB(J&Ea!U9+hYtfJGguQSCPGih}>)E;H1GPfrrhKoZYL5@%qf z0J^(dkgWjf=`4kQorr$=wI)4zlmiN&FLr07SS0CMTGEjyeQtdemM(d#oz^66d(obZ zC0(IZuaq$dEflac7}+qgM4ar4_GwWtgw07#PnsX*U3qUln2!m+lsImMi6PBW*QGt8N2Yd4vj%`N6u^ILP9`JK64-Bnx(WhkHW zE2#qJee;3&(0pV*HlLVJEi`-9=hjGTlr`EKV~w@Gu*O+mS}Uzp)@ti(YmN1dwboi^ zt+x(YN33Jk3G1YF+B$2Uw=PxM1ufE~0=+p=vtWGg#`oy*Q`=dok$ymmf2 zzg@sCXcw{z+i~_Qb`iU%UCb_Sm#|CPrR>sn8M~}q&Mt3Puq!HCg_KgMRB9EW?y39g zfy$sVD!mq&jbcBbc+ZC;hwA9lI`EF@wYy+kXY*bd*M;aVXhl(63X07fnuAR#mWrj2 z%D>pZ1XBA~`By<2|6TuG9JgV_0%>)`g5#TG<{8MT;}#sv3`XBw3}g2dP#&Z1gHQt_ z@5Asa`Rt-8i~o0hOwt`ZLQ2vUAWad}!hUK`MeUh_+S8%- zOiAsTirOP5{_H%dvqe9jlC#n}Yh(a+N|r#X%0o|lVIp=6RY2Gk?@dNkVm%q=NFPfQH@#Sc@OdqqmmW31TL<2ZEot#(_>!2@p3OhJfaY{NRA?TEMs^f2JIyE7+Q`e~r z5l$ng5u|aNIn5xFj#5KE@$nCiqW3guPmA^`Lunq}8hkCkh%Iytzjx@`v|ScF5kGse z>=1rWgq05$x%pZw+l=4s$>j!5;0Ypv&&1Ms_+64rnEoFO`3TGd$z_~Z;a9Ma9NV2;L9(0g)7!~=U=5j;LXEJpaAaj_CP^GyTN9{w5|9AJl<5tS{@r^gA*1cWBsNaoF=)u?|?G zzn#PO$_N=*DU20rxl!5cE=$t-8mRTUcjR>HppM5l^_}{V+lhCYKpyHN`JJK8P$)q9 zp45|Z%nXh%=nLYW&2^7=S~#t-zqEBafZyrpbi$tUj`J?q&il^$kizNed;pH~q4Od3 zx?WBnh;;fn{UMz*z!?A;oPo|@$mk4n#=?uvcxNVFMg3c8|=SoB8ilTF+r*ma+K6O5YOvz`QL1&x~1)YU>-lEA?49`Jn z9ZFxNv>~Palb)_ayP*FnjA>-D#Ty0&TD*=2xIpp&eS!F3J zEP<665p0I-unS{?LvRAlVpMPg?!gmgV0`JYNR|;J%p5G16~Y*^46DRyV3gSq$Jed? zHqJzg$x(x{Vw%;-hG{k@JEqwkwBAk*w4@yEX$+<@4n|u}F0DCEZnT=*$wL-4*1?#| z$?IUu<>YfP=5q2o7;`xV9E`b~f)2)9P9c=Aw#688IdKlgT+S;_5lo9XMKLXkHkv!d zoZ^@kchHkKC9qvMTJ2JpmU2pCS{ki2cgi?rF)iz0yyld1FkVB?Pyy2l4n}QGMU)?R zDmfUtIh7rZ-JB{8#%@kkY)S4^Lm$GO>RJz-8rWAj_Lw@PpLKQHp_X#&H}ROpqle(= zCmLhg82b=+nqVK|*pr50s{1SR1;d^sh@!sudGcJwKi4DEQ)^Si;55ZC!N)oc6(ZeB z0H)&MBaHO2VJpU?)fmp^u+8iUyTcuf_$%Q7NVyZE0%~I z;UE)8b%wVuQA?OZtOD7`?xQoFV0uX*TFZyH`%w^x5szc@Aqf+7xUNizwICF zpYC7l-{-$3O&Kjq$%gVhIaDUfjq;GZ6;Od3fpUSyfrP+_z`VfLz_Gx+V5(s5V5MO5 zV2|MF;G*F6;K|@4Gt$gwRx?}S2yUFY%-m_7#nEbdtB_UGYG?JgCgLb{w{_7r>`XY~ ztYdey2ijBZHTGWnN+=M@5-JgTJ=8h$NoZzheds{w297+lsWQ)EFqAz?NvEMglootO zv@WCkqBWuPImt>SE0L@ol8s0D%k-|g@dq^CSI3jT#@(Phxh`iz>*W% zR3TEuL#h&~N~EfXR3lQ2NHq_sPNX`K>K;;qNDU%2JftR(nnY@PNF5?|h}7|rxrSBhrmXHxKDfq&t!B9`XT^4~Tr=As-U?kjRG~(u+thBE3AMH<8{% zdV5G8B7KPT@sNH*`Vr~pA^nN;C(_?T1`ruQWPpbZBr=f5Ko1#2WDt=-9x|B7U?PJ( zWGIoLM233EFe1Z<4D*oTM1~U??jfHM`HaYC9x{T+2qGgqFu|924yiZ#c^V1f^{j^1~KW$O`Pg^txKwC5qKwGck)~mEdvjVh5@xGofcgkZ4 zP|R)vWAPA4>wz_)IlSjy=X6*AD`7kAfg^AhuE7J0*CSbG7Q>RRW4xYaa3lN)G~+N) z&o#Iulc;2JSjiMBnHp9yjY_6di9X^CI-=f6Ox~KEyfr6zYi`(?7tk>lCLd!_@~@UA zZ!JsS`YL&AdDt;l&@omfA7fSWuU02-eN9_QdKamC2=1{Cgwp`?^_VwIKH3Ii8{K+_ zXdd$U++aKKIUP{nBlwS@Y+mex_(md~T%wKU1zC9RT;r*tNzGm_U1{@GQBvLHwvx;JJ?eo}c9vGSDmcIQ>~cICgB z7oA6Tc)n}98A?d62km6%X~jyfj7`xEWSjj%P_)v0@|Hv9R(O{VS&Lb377O8o?f5 zNgVaICC|&@5j&Z*PQ@05m7djohy|#(^$#oGMe9!bje>o{N>?-ecIQK+pY_7Zr!uX5 zo87xCpOt<>M?}v6hgi0-^7m*B3VR)k4l8Yp-j4ysfw;%Lu<{DjpR}y+gq6lJ-SV|? zt2gWjE01KlWworgyQNw?Y3;+Qg6%6vrzB|*z1vaGK>Sl{yBsy9rPB(3t-T1x9@P=g z&1SAP}^CJo-E|{i|VeX2kENx z-ReH46KWw~-|ULlqdgIK=AwoQXE}OIKVA1Ec)kLa)N?vgI_KHkKBmhkZAWP$%11yp z`VXoZB!K>>B^E)e@vYoGGv+U^mvi!U{c-;_j$TFOxW_*_UU@uTH5@}(bUd4mr|;Ux z2+!VaY&eZ+1;n5glg4E0;+@8jv`%v8f7tsD@T#h;?R9Q))3eFl<(!iQLI@Cg2)*~J zBE1WU^d_Bvh=|Id@{I*`5CKI%KtYryAc%ruMH$C}4N>glsE8;6Qogm{dlPN|9h~`o zpZWj)<2>Bltn9PPs_$B7pL5tRPjfc#5x&wpKSDpi^nrHYmGf~i`U~v&+dF$Bk)n`l z_ovl#wmKV%Q5Q0`r1P*dzZi8kQ{&hk=MH$uP9QhW2y_c_k=Ekk{LX&1Rf_Bz9)rZV!GI84%ikkm&RPmZo=N5?BM=n`&?QFcMr6Tc#M1D zMLJ-Yc77BshP`lo$g5-#m1PmS!FuHIQ$Ru!L)TJLXm;o}(xJ_vExwSlftv>58khsB;vqo}de8i&1lTCs@EzA4l{)SpKl7`h6+>9=RHH}pkh zCN_TMMf$l9GpUc6lq{lgZ=nI9D=)H7ZY>=dh zO4w)Q@z16>Y_E$VRG&FhF>-|I=kug+jsZqjZfI-h$h| zJr~*?dOq|*=*7@Wp_fCig!Y794cTXv#9%&z`6OXJI?~U?(4^4h(3DU?XliI$=$g>< z(6!v^PoX)Xn|Xz|hUSL;EA;2kZ8*oyVQUD_F0uNkpV2$?({`qMtzJu{pViOVnL3;3 z9XU>pmlNbfIY~~IQ)GdhDyPY<@=3W(ZkJEV9r9`UjNB=A$!Fzra<_b5z93(eFUgnX zD{_x~RmG@S6;yF5UL~kRm86nYib_>Nl~8Faq@+?xt8%Km%2gFqMO8^vR#jA0RZZoo z>MCE=P&HK>)mF7r?bRjfQgxZ?pe|P(RVUS1bx~baH`QJBP(9T&b&Z;?u2nPCb!w)% zUd>WBs2kN^)O>ZXTA=Pz3)L$1uzEzTR?n&3>Us5odQrWkURJNDzpJm+Kh!tsTlJkf zq`p^&)ekzLV|1(z>Nwp>x7KZRTis5#*O%x^^<}z)zFc?Iopfj2MR(QRba&lD_tbs$ zcs)T+)RXjNJw+GjoAhk`Cp||m)64ZkdWBx8SLuiKBYL&oqPOZN^)|gz@6x;Z|4zM6 zzoXyP@9FpT2YSE$P#@4A>5uh6{fYimf2KdzUziM&X-b+>Cd-sI*`|!iF=b6TQ{Lp7 z3Z|l|WGb7grn<>D^-O)1VY(bD$FkSCMzV&;*W>{zt&A#5HB>FtnwqO#>MH80Zc?{U zf3-+GK!ep9wSk7KJ?bqQr;ezf=^7ocGiavnrF+p``U-s&-L0qU>9kPatnZ))^cwvb zt2A4PJ|tJj zm2#DQSUw_G%Qf;*xmG?VAD8RodbvSvl$+!ea)5TAop)92HPktFdaF8m}g(iE5IXtfr^}HC5fA?o{*CUFvRik6NxCQY+L-wOu`> zcBrS-Gis;WrJhwEsgKn`^@;jaeWpHFU#KtDSL&2Ht*%_= zp02MO=!UwHZmgT=rn;GKu3PAqx{n^IN9oaejJ{fr)#LQ_dX~OH->C1?3-uzsSl_Q7 z&`b1#da2&1H|ZzzW|L@=OtMKasYaL*Ce4J5G|FgWjB7mOn{-ph*ymQIL4E}J8OW>x zWL6V0s}GqqhRj++W*s53Zjjk<$ZP~;HV!hI1ewi%Y-T|o^TIU||4JJ(^u~yEZkKn+ zJLNoim%LlvBmW}j%X{Sld7oS;7siNmtfYbX8qV=jq;hkRGgu=%IR;9F=QMQFfk_91WlZYHwmVesr?5@ zhyAiY(7(zbga|S!G>X0cb)lIQejiD|AM6i7v>F+@n(#z0O9^dZ|31x3FjLseY%$l^ zw%lSQpL;d8b2u(AjpM;B97*PK+>7OTOr;=uteYr~z0($o=QE$A1k=bg!n<{N9^qaQ zeeC&!^BtUTt0)67quSB9$#muTJI-|D&+(=^e@^gDQoy_4^x*Fki?!^Dmc7ujH(H)T z%RXqyD`Y8-q_g|Aau8P+BU%mCBRE=((&HG1@k}^YO{25VTr0#S_uH9sFN3)4Nbbuq zW6d~wz8zUkU^!0W^XyqYanDFL$J5qfZ0B#Yv+%c5z>JAdo(AQaD9Tfy?1-Z51Z7u* za-p#c=Qutx#|M~Jf;~6SpS^WY(vU?@$f6hf+FjI}?%tFHd`6V%d?LvW=UNM|oF;=~rf-w^qvBt$rkC{&KxO+Dt)GyQz+BJ%jpr)xoNH(UjvZ-uF<#|=@se-&rcA!eKqwGXgWLM55ssXQR3H-_Z)B{hh;4FpwG(U~~ zbZ5Ua+k98QE7qS8(UisPZ8ZDPW#n<3TS*y=a#m9*ovK?>HY1k4tU-g=I!3r>yt}Bt z5BPCTlAquwJ0<*7UpR_;4>`tHzUz2?x?jr4^h^8MPPU)p=QufjCBK?e*01i@a4Pz> z{Mt?xzn)*;spdEI8#&eerhYT0hTqa}<<#=q_-&m!{w01NdruyP>_|L!9*Kf>Buca+ zQL-J0Qte3GhDRdTjzqp4iPG&zlx0VvGIk^?Ye%B;b|k7`N1{r0B&uRZqH2(JUC25> zt08eq*+ggWdtF9_cQZJVirO-(;f#fPvPC}67FEuFmhGf1>@Q8Gv8b6EQ2WqFE|HDyhTmvv=b&gmM+hO7@wWE0kh=B%|T@)CI|YsKaAa_+6O?81>^ zFzc~|JV!|O;c^V4GwTtCvX*gFLQYr;k%Sm6YaYH9$CGl5X>Xx!jA0kj5X6&Fj9p)$ ziHs>%({!Hs&vX+b!Tk{l5Mc&S=Xk+dE5`duQ$B-dQ`jcaB-yJIkDVr#Ra?#oOK~!S>Etz`e5; zaPRCTxp&qD?wxz%-YL!Y&b$gsEEm-pE3xHt_U@C#k!mC=a%P6D)YuG#`8tuKMSydZ z$t>M+DmP-!@r>Q-Q39j3CX~o{z7-`giocAK8NYX-6pP;|m9g7cDq%4^r7?b=LXr`8 zCOcxr+oTz9=a9=7yDE7WXOj<(XPBPscWnhNo>A>8p24H?QT8AA>HAo+i}WIjXRMdR zk(7}hB{I?rp*j<5Ga8n4N7a!%&oa##$~nSA9C>2GddeAP0M{nN+U$Z4_8Q0xto)&3 z`ui4@;|$_+JYyr7@f)9IksIspz4Bh}`5t`_b7H=p&of)E*YnJd>0`{5)A}^ejGZ#i zEMVd&3CPBJe}{3~7{rMHR!fn*B0#yk&PTcbC*%{HSsa!>@`y*}3D)$}ilxK8bOreq zX;UWS>w{E^(eC;LJoP1qOYgw;A1}c zSc7r&{Zx~Y^iry2ku=q{IGXB%vkk%7#*CspY6A4!(jDzirq=Ft?se4Bo#oD=PVSBF zjnvtl?arny?i_b6b#-rd@1S1pJa-=Taqo8TrM~V$cM)9)o@>-!EV8j2w@)sKXO6dI zskK(!DUCDEp1dN<{Ty(=ChV>ydy*Gu2xOe0GPrdaRfhRlNmZf%XS4b6-2r4p1e_PH zPgF*&{=10B3fGZcK4vlX{UYig7mEVaz~%uw66XPIQ=A8|_gWwqu=iUeS+8v-z~1jc z`5^1TGP#Urz*!x8KhEk%*sPBI-deeqBxiN&NZGuOH1x*h%x(*LjKS|ApY`&7j#gH# zB2CK0{R*bvsneqHfBwsGIWHl2Vz=X(o+jFO8&nHe+H9;EahipcYHLv3b|L zOU=!Gv!7a+1Lh-YX+ANZQET&s`GVS+ugurf-h6AmrOV9s<}h_IN6b;`XpWoX)YY6a zr>L7b;}UguW85J1bmQH4){I0qnKeVWg8I85SJD7iyB-a6Gu#Xs;%2#7G}O&@%g`{l zoSVyWwxU~+M!S{Wsx-!}?pCL9ZcVoqjd$y~^~s`H#;9jyPNyT%T@@{J`nrhvN6I{; zT;;LOW~fZo)>5pwv8uLWpRF3Hrj(+Zv;L>C{(J^D>p$3C|1clCR0-TU=ts3SUZDAt?DUd!Hdb}X{qhf!CwSwWV)7t#5o zh+c>j|JMTkU$KC29`I;H!jGVCbzbzcl?UegV!4F*zEmz}UOysNvz%?DEg>J1o0!X+ zEjJMl(>br7M@=~9&8Mb%pavLs(^UOxe(LA#WoG}LE6gdSt9KKh~z8WtL@tVS!Rphtu zeTy=(%GKOg&IMPwgvVHFwMh9KH&RteRg!J3j4I1kSzpy>TWhEql2Nazz2vIb)$1H* z-%xK+x~-B>N%g*ZpR&|`WovyW)k(IyES*JVb+*ptn46<>Si36d3LJB*>S|O$57Wb_ zlD>Pm)c~JrGoKQu4iG|9Gs#S19hP_?!L}6mvF54A;-s*mh>0*~fiYy>?imKMjurv`j$7GU~kWE}2I$ z_B-YsKJPvA0iXAwImndHS>qFbNN?7gdA(cpR>oD^^fn+i zLPa7#V;xZ>e_4ba>{HHCep}+;e=7Jd!1)k3FTr^O&b#1zX`A(wdk&<%{$deR)0>==5CC0jrn7?aJR?WjpSr+4bzv>WL6Wi!Q0 zqu2bW{ay68^Q8aJxbrx;vz+akVAl@pItF$f3%gE+6{o_A1!KNM)*)-H64q*9tu8R% z|Bc)U*I?R5su+uimJd9#IebdZ|LwtuU-hwbz}juiEn6X|S&cHLco*xOh; zf~N}Skv3nHW;yF`hPTLj2G`AtYxsJ59&GMn&dWxg9WBA|EH|LKm8lsSnCP>BZ(d!x|l}%R5Sr1r$^d+nf8nVyv5BLYDu7AcqL-o@6AJzXS z^6*gQBJ&cHo+XRixY_PTl9sH)e)>%p?=MLrfj0&$jqfdtFHK%(Ve zAj$GCkm4=zJ`PClxOXDZ9eOkkx-~bVF;5ivfPyGWfR5r>6GNhD&ZUmUo1LR*F;3k; z4xSm&IJ#xvL`*CyPK7jw0{$bu#cK}iku;X2ca$*e3HDZA1;4 z5oRPMm@#GyC3*LI51_J@&4}qe=Fn%HbHsAa@g=wWhxvx$&3EPpN;E&26C~Uq=N6iC ziv%)miknKFTf$Y8?z(O!Wx2VWLzLqjq7s#NtGRhp!OeGTQzf?^WAQw9vAcxw-PP`D zs_j1NuBAHedUpfWb2qtLse!xQ-APT|J?MaNc(}uXR3Wd_m-Qacn<#a_qQE-$e=b zY9S?ZrgogM$tit`QrJ5_LaF9wUYo|+e5@@UYsqZC1|Y!tqnkHW1zLM(AprcbveaB z7vrIe2}PM);k%bNBPZC%NSXXA{VVbI&kLbK89_TlTcBa^WAId!;iDSEM-4Fhm^Xvi zCp<&X1GQV+M&2>+820Shp2{)3EhCk)amZHh!n+B#^524C6b0mDfSj^`oT|G%_gy0j zXI*V^7TRT_T_v<@h|#QH4~B6TD)17~C=)BIik0Odqp?_xfYn@_mlK)6tC1O0M$fg- zb8F0?ZBfOsaS<|pGQyK(;K>4>)iKTkvXBQ9j2B%88ErC)##Vy(fgx9B_3KFkLz@T?^KNZPW>PxR*QHT}1ud1FUzq{f~0|;xs3` z3(W4IYa7wi?N(YGl{p8LePkczk-gf6jMeU2z}z^awYwFVa)-JTyMn{l=8AfA?u_*I z)UWo|i03ztsmtX`M%f?9gUqkP@(5@8_WGm1^~W^3|A8bb=bEGVJB%Z3{XAVAQzsZf z+WL8ET%*iE6}>F3QI^9s%JLlJujRbn*32uwQ&mLeyb{Oz$C)D=^+t{@Pv|E&wpg!K z&F*ZVJY1Q`NBz78vg=yxssF*5{?F!T_Npmv3e{x^`|Mk*xz(t?+t6*oe#v^Iuor5| zUT6UOqS5YH_CmJC-o~Bl&SfuT>+0>;Q!i$pyWU;TzUY8^fG%VIbA&p$XWTP%Iioy9 z9lZ)(1?uEA@)}WRuann_x_E=V!PJ#ywuHKQtGrdz-CO6aqaNNCZwvMG4tfWvm-lz? z@6_Ak1L^}0a|L*BaTGCjs>cz#mT&Km3-4Y}!jpXL8&OJ{zMRPqVyEs z{B*<;SMVBFvA)N^W;AT36l?~zZ~<$vePeeRRYTv^&)Z4y4E*X~d(B6UWqV9IYj@~F zqK~rZqa6CkJ+F_Q=wq(Bo%^_xeP8fj=wlb1zoX;gIrJHFQSj|igtr5_Ub(0>U_-aIy)TiHmPlV_EU`S z<1g~;aQ*8Acru$25POkh>{m{xQ=Fxq;p`y&cg^EP$fcysoOM2DYU+2L{SwZep=a{h zvp5^MSRRCTh`bCd+HjHQge}Zo9}eivW+p0!(Pv$2t}RyU3y-k}W30bmeI~45VITi0 zG;AgNaa)mLKfn>cfxp@tPe=t^`k(>E+Cm6?%2l zSQ{ek_S$NeoQKF^M|9-J@?-WJKgb_|F$S@0?dU;_9*5E6Iif7#{9?IU&S-9<+Q>P< zcD0=&&2e>{Qq*a6no<8sk6y*RYXQC+IT1t9y6y-ZmQlcmy zgW^O{J^>{rit-sK!6?e-pu|N{z5pdYit=|*63$Z6lkE5OTuWyii}k-O`u26vw{P$* z+`~ci5QysYQ&3_-;nq=oeOUDEt4I%-{~=Zu?JZ_d7BF7vh|f6pUAV8KzGC+$+6*+w zpYJc=tZ|{gkYfA?{0Dh;%lu^&@2~OKP=deKUrUMpe=DV) z=hgpa?V7F7R&q9_S1h`#=E2C=Zy*xQckFMsN6}*FMn1mb-=`N*|A>08o#%e5;eTND zFdh|OLdCZPD!#Qa%O0rs_D9WV7%IMn`)_TxfIO{;8e)p;GYRas<>84=061s(j*KaHO^`?o@-_KgN-YprkNEPH?RpeRbf@tpy5 zw$d7=O~dnZ8%NP%oFRd|PF*^?Gy842P^C4+Kg5V;R_UeL9r^WchWma8Qy8{G@eraf2LMj=ilEe2oE1Z%~zu4>!RlCq2`;8 znr|j*zS*dY=AtfI0d>(zsEbxXT{Itc(YmOM)4Er1$xj8de8%U&=-0z5PEPG^k6XbU?^;2 z6l`KFY+^iYVghVpB5Yz3Y+^EOVhU`cfNf$lUF%M8C(=xJvOAe(VJGYj?i_ax-RR!x z-by#Qcer=ZZ0v{q6ZXT-aTl@;{TFO#9&G3?*w8(&q4}_(MbNFK(5+?At%smnE1_F! zpj+#qTN|KTo1j~pp+yI)~2%6j+ z+-wVOwgWdW0XHv$CU=G=cZVkTgeLcbCijLW_kkw&g(hDCP3{Lxz6zQ=1e!byd>;-? z9sx}r2~8da?vDod$3T-OK$E9JlczzGr$duxK$CBPCf^KAz6F{*7n=NMX!1PB>|V%h z0c5riGFuFpErqm}Tbz1+OkmV@fMfVEb&_>3Q|ED(G)Rx2%8cM!k~{Cx}f`Eul!{4`szi)@#EbSTJ9jAW38#(9;#Ir$qFl*Ov zJ)HH*T2~URD+RMo#jFKpT>`TXVb&6}R+zPqS*K&x8JKk@W?d4q&cdv1_8R|dC=D1Z z78olCj1-4VB)0I)<`jNTLYP+g+=pRP98G8(1y&Og3qkb#kzlUdU-%;n$`_8K%r+7-&ZF3ZPCrzd~w$2nAcD#TU$G0R&~ zwfN-Rm8gxF@yMbXhh<|wU%=l7ML6*9ON$W#R&W=83%{`<{B1Q;*YPNu&y!YjmY3zj zK>il)CH!p~Q)fn2_u;>@I<}s(a{PB!>X6-$U5*N`f2T!ecmVT3@5Vl{%I92Xf|+P0 znaO5~DKJybG~7)cgZs8g-=ycTU$XZ|Cs6o)YQA5-gbwz#AP&5`zMRU4TiTH_7vvbjN zMTpvp+I~rdmWP)wXxQ4_`UwjG=LbP9)-*&(VK0hkrX-(0MrJR@*qWDDBAHb}PaDrv z7FL$Pcr8k39O4Yhhv9158SmWUZCN;ZNiCyGW(mKOReflNCD zQ&s)L0;VLxlxjuEZQ9_rSeHc;z6jQ^1X~)T-UJcWzU$cyKApA;IO%^l0 zG>sY=B7v$X)J#=x44sw6U<|dsBq)X9mx`QSeM_o&=!X}Xs?Mv{YwB^xYX6z4#u(a7 zNmh%_n%rH#0l=Wdw?$)32BF^zC}2*G87elX`QbHIOlcUZDJ<8oT$_*PvS(|JO%0C00!9pw6Xj6jSlSSO6?FpbjmCAxcMm#a3{ zO!p8@0N$KR2gOF7{)a<|L}!0HQ16&*1M5P?!0sD<4jq6Kh<8kd7%DPnfR7n$_M3|| zYAO*(b%nM}gky00$VFUrIZgUlv}hG=S4=(7b%Vb+vs9L$ zWVgTynGtJ8gq)>`Vo^QG!*bvC3@;rvUrV_Vd1v^xbh{L>G`191d*A6SZ>eq>Xsec5 zKTip!lL^}$$t~MT&m081`3v9HyC56<$(P6Xc`XlPmItEki#z1=muHBeFX9jbRCn@i zQ6TRgvZHe#)PwSCiO+j%L*ZjjzW$@B)5LVrf%@yA<9qC_*OmX`HJ!vYLqC$f!&Uxd zVAO77KhnPPmC>iD`feod1MG>x5(Dl5gF{IHy;n&o-UCU&gCQDk(FLq5CifhW*U4?* zCXZG2VKNLl&OnUUvi&5m#T5O4nO{8pu=^vA=wn*cEhyE&y0_EK?qZyu^*~%SEx=t0 zZ5|y#d0vr}6j-=XRv?7@F-IM_prlN@->?!gm!mwbjHr@UOUJht~nwrXd zMP6FsChKG>pp$C4`OF8@J$Z5S!k7IdaFfA$Gq%Qk<9wcK@{5W48)CfCZa@l>h{xeG zT+Jv`FIe{%8;82aB|O*EL$ymb5fUc~C*DGi%GG&v6Z+gGpOASw%-ZI*HDCAO+xas@ z4+|tqB>PH53XO-tHCFg{B!y|4oDU>F%y&n#93Opd*Z){Nj}G#|)Ul z)qX9X+fV%T^56xexiq9LBd2yML(%k zB5#lq#asn|+Q$BMV#`IRiq3R}BSL#2_PX4q8A`MXH`0m9mAV?%Wtj6tD{4hSCEMnB z?AnoPJ9C8QHtbSzYQd%X^Nvn&X@&b`Psg54wQaI`g;grybB2wg>!s&P59Psg`R1wa z(hM4O^Ki!$9}aST!t`C3KH;-@X#In@Q()QAf7C-NQ7%L+{g*In2@E{|)PQ#6d=w*O zUgC|&C6p^dfF{5Lg_p!x{M65t$PnL917IDpjJb)*2mlGMg|@(5sR0&|UC6qkTA;2F0G%N($TBFPfEJ=FIRJQw4a_T1jfI5`>VC^{ z%YMuG`m1p%kz9z)*tA@~%=yXZ7jiMyX$e`eSy zpcexlp|9wHgCQ6p*~IFk-eInYSFC`WkU^A9ae5I?sH-o)_7EzRO%cAp$9JRw{>Vo# zVCpL14f#q?MOeTb1E>*_jNC2W5%Gv%$PN@lsV4TOn~II(B4B~~*PnxA;=QLO93@*w z*FL?a9mN2&joPcH9NB0S7S9DhY%eo{&*^7LN^uc=i%pH?Q1d~x!v(_El4a-zJkkS| zTi&^X-0=JOqJB^sri5bu%7Zr2)}!yT(2i_$(?6osE|Nz%*kZJ1$`d5ABh+6G;=-z? zmq35HbGx7jW|de>2Q0aa8_7e*0aL^y^cI;DL@Ekc?weB-fKFl>PT}YIFUN9D<<_%r zfOjp!2YP-9QRQ^ox;JE9!o2ct z72go~wJ)fhLuW2#+e15ZZ@<*ip`XLtf!uAXidsImSWA?00D3sDNxMRA_L(jjUXeOd zK(RJ^OxISg6#Bge`>RiMyNfF}G%=b0X=Ym>sA=^AUsQ^Cfuw|gVlcEe8ggGZoft&H zUJ&u@A$)VwfxPJXI>m?y+p%~zCvSFPQQU~sz`7Ew-FhFpBq&-wB`{6fh}FQbLeyY* z7QLc=CT5j>AE%zzSz)!PKkaM^Dmt)LYfwPK8S9gpIcgN5X?}T@(aE2-|3dYcDg?@Bfu;4p}c752M;VKtZcurM>P31 zFETbjo$V;GZX~}p>=H?Oi(ezju^*BRPfEYn?qbCG%$UllG&;ojP(Ha03nst%?HWjW zD_t{DRL8y24Kqo-H|{1=fW)uOhe0$?7{lF(uVwTnI_*`rBrc|l4JRuuD+p&EmEteS z&MK$!0BV(#5&E&D!Z?dfCal&#S0< z+m5BIx|+V}l<*fa%w#QKT1=YCG?+9NuGlmfuQ;sOTLm`7xt^oVq%AdWnwK}sui&1! ztZ1J-tni$*+~oyNIY_{5ZwC(VapH0y5DHro=NkAB$18TKHbJm9>Pg77rEvY^31%>KvHK7EIi*)d!f_*4 zC~wY-p^NR>SN)ApmwmlQFFwTEH$C2>Hxb^d&a+fTB4Txj8&!c2qHan_T?Cx~f#^1ASq(-X%his^vvQTjbaGjnY428PdHVK@HqXBLe* z)y<$AX7)h+lVet&$))a_5vSeG6Skn83j&MABZEP|hJ)R6s9QC--GhrK1Bl;10{8Sv zR3YX@wnFV_psn83xm$C#&%F*x>+?y!dwsAb(Lypsoi}~xoyoA}*U_IUORD!=qo^mY zOO8o>ziM>vzl<8OF4J?Q@6~f#GNung)}V956xIn@GUJghMAm%1Cr_WmtO-1&X$s*V z$E^SHVZ_lRUZ>Pcn8-geP*Y1E$v-f&%*Bx<_uWu#3iX~|XT+OK-MDto5<+L#dsMx4 z-V))3KxfpONj)O$POBoO(gbfwTphqsJAMk$6!_+gQ7>3mc`DWv*-5UBSbKJg(Iiqq z_>oPb{FJ>Zy>sHehI2{3DLS!Eu=ed#&iQv|KVqF=y~rukDf0^*Z}0m+R9$zyk)acc zM|xtd3jmhPIj(b1zV>0+>>iA_PvGa?DVcMU*BI>*RYjcF$i#=iuiHI$`=74wSj)*3 zVO|vK#I>WRcSR~ouMFNXpj0BD&vx*W!4t|mr*{G< zlV}IDHTbmJ5z*bJaOv_S^-d_*zj;vn1oa9Jib?)3n$mh_sU{X&C-lLSdc&El-=6(g z8$p&e+O0>qTf9^Y+O&Dfe$=5}ISFMgQ+2&CW7voN!@Gmm@FS9mULN3s5D|b9o+9?0 zx^J;{qIlD2tQxNoty-Z+tHR{4Jyzk9zDX&AyyMCDXT&Dus;K24hmN;hW^Q@;I99IF zcG0R{Pu<+6XE%Me$insS`dMFCZ*qaMv^=4|rgeFe8S_RDaoc<`MYQ{$X#wy6oR6TW zn9Mqc+Y_Ns7eET&1B{2HGta@7TI5I6E4`P^yWSZCJ_C|3C-8B+{opmIaNL-yIw@5u zX2~Df-e`ui0qxaf6Fn9;#wqZ_CWqw+CmNS{PbqY7Kx^NQrXI4yz3UCwVT}`;p|z^} zkwB9cqUHoF=O)HiAjSKY6x&-kRn~=aH0UxRhs~?5CZAQBLh|*2ykPilt%V1}juc1= zNDR5@uc9dV5jMvRVI&`mTO^5~qS%N2QDc`CLQn~Sj1TP6Ts84`$Ae^TrH*ED@eky$ zb2a~6jw04JIpmlF~@Vm5z1d4jdHqpRt! z{PR)KS^HZ>0a=;R5XF!X_zNJ1Z5}k&s5DTzb8>Y&8Uhi*w4KTQ_4Nx*mHgLh;wrhA zBg}aV)HPE(x}V`Xz6{n7O)VHxzy*)MOt1_djqlK8L=J@pYYZ`q9Igd*x*O$i^@KVZ zLzT_HCT#_~Zv2*@*zvWsVR6Dv2wArxNrT6FU5qC|qn#S)859RYV#;z=@#%AZlj^T0 z8om^2e-+TQ`qpgOpZEOFK5`HOqp+Up7?%mEDEiVkA$TYTLLU(+@)ZKS1z)6jC>rts zvAo1bWDE8Yf}!L|e#|phjigaClAcddX8&JQTsrccjl@~^wz8gZaH05%6`-kIsQ?E0 z*=7ZWdjX6;9}sXKxK zS^qAP&6^mWp+A9a;I+^!NOt@{ad>uck#)cl=QNe!W=XeSCLEfvcXC6dPgrx`&-=A0f*vUW9K{V7Zc55PJ_yW5|Nrq7`Mi z9_ypKxg6oM>u=zddh0%|B6#*RJQWbWnp3N#^8>o8MGUOqcbJ@hRVD&=@6&)uG(b)I zX3E0h63}d+bTD2IuXNCV4vU72MT$w>74*nw2s2l#|Gc9-@+~8R3uAh!7VhKvzz2w7 z+xT_;PH`beQE!u*-X}(#>^L{f2tHdO9U$@5{Hga;`t1hNo+E!auj}5z< zd~86UU!8ZFP>dEo8;QAPGBnjP{pLcdI5)@~U^o=Z=zm64HtCK8-L!(QjjuU3_nkRH zwX$=%oSg5UBfH2v5S-h(6trVJIh|EHwGbXUx8nSyU0UxYI7W1kh{-ltH_34GGmvSl z-Rg4u)h>?Omp1qko@_ES4FFa+kHdwLJ7Kbs+&v7$P*%~ijF|W7nFU#aX?I0XIu$2K zSeTJhn}%6{%_XBCf9*@eoEz2JPNSMIDUDy!tAY@u(fzm~``y!o-kFJ-N5_5ZoC(2c z7-dJf4dqPe6EcLVgFrYJ_t>ax&bb__7fAcUUNp&#=ws})2DzPwhoq4mq*T_Wopzs+ zR24Zp(0-9{j}p`vAV?zJK{_FeO4q`UJ&hg6Bk5nt^>I5V`1FnrYvaK@`%dtDvYSzIeE})h*jiLR(p+@dw1v3Hxqw%iF;GNchT*ZZ1%%My-S)R*PKrNZqw%Ln{AM z;}zW$PWt!f&QwDU{4Ez+#!CxWsP3vi(B)s~#(RkJ*1+CvDL%i(!oETlM$TZqo}vFp zUeh;2dZ(sJRM{U|x7^@!WcN z{_=AB*~cb3IOKSC(1iXYrlIuniJN4*GQLpt`F-9_X-96{`tMnmjN099wDF(wgVIT ztP95slHp@#qKW4S&4@%*U%tP5zYK4bFY_w#qJAW5LIMZbzSv%d&nuL9H4vC1XWdn- z3@&9ufRta9FF^w^HXiZ@COmUHb}QdpD@vz|(9e`i`Bm}YCiSNYcQ(;REL<5H^lM8o zWQA|7rxY9)+x-nRmrI_hz4#ufLI|}s>Fz7+HfhYG52-@AHv3A1V3z}uDCYX=0`iPu z#70rWk|@v_WT>^16dUMsO|^ul>jKKWwnP7S?O(e^#NCR$poZO3pr6T5uOurrFy)#W z2v2tfc^!u9z<2XeEE|dFRS8LU;MnKYh9XL^wjD% zNxQBzgh`G1?I&{%K7;X~gO6UGAHAx7_1(-{wj>QeS~(yVuw(73%iX#$OI-J)hC`Sz zVn%;_?DOb=^}o~8ju|@p8H{PsGn=^iYu1SA)UdCbVrBOMOf2u0*ii46#!hA9I7vL# ze%7%X>1uOuCvbC^(3S>VEzP|6&N0sv`OI80qw-R7lSJHN$HAtEN?I}8BFcT=HKTZw zgTdmp&o`);ZBu!+x`dn;e}0o>HltG)yT@myj#z>!yodMaZf-H@Q7UU)iiDtHVLl2;*p|rr-s~wF7)CyoiGM+Acp*5S!C@A)->=;`UAs!Nm^T5lW zVNuG9Ng!aR$Ze2JFb`%x^zgbwbnJkykOG#^sGm>~_TydRqoR%FCiA94YD4To_>n!x zC`pRs36uE~5k=@l*TP+KugbnrV6q_ALK)fs$VKOJav+V_40(VBG)DDQbXp}{16Msc=o) z5r6O3TrW_-G^Y3>1b3TOF zIkm4l)r5VE$8(F)GxaQz(;z}3irp|~Yw906)!q=4Re*4ddzQl);)e;cFnw(Fi4Wux zjoX&&j`E!PMU^w$I+)elTMitI?64eHcFr`{H9fGsd7&% zktj1F$3B939)D3kS3JeJ#JS;8Y!k@7koKLxQm%Pl6H8k|t64KzQ$feVvbIT9r^~9; zs`{+tEaEKhEGT=NX{kEXAY;?Co3FdyJN?c1nc$Xp+vW!8-sT|tRB*+o45SM(e%E=U z`%Dj(3pNc#0age$_!%7B11ie5wXgLL-8lWY?l@_BKZlGdM~jwzeeW-*w9IBCF)Ctc6Ws}@`fYveJKD~=Nw}s!Uzp!7PUmFAkL?rA8{0NdZt}3=Fsw%D{eFGbOlpOiL1lC?$vjXy zRK1K~cBGNEGU%xu8MD8wmYgHZxX!aF0izC8eI%;Zz64yft5vvsJ_v>GoZ}41iS-L8 zl32U82d*5Ui*=n0F~Z5B$!)Y+t7C6nYU3JDuVc>(L%*)~bEolJPBZ@6tWHz_-;4WD zr7++53u>W5Tl1O~)sXrVhWaaM897eJk+q@brj*>3h@5?BF#N;!m&H zU1?9bq=E&Z>#VrZSF@wI1F0^_YxcO&=bW>|*OuJ_@@_-heLA$L>b#Gx5LV+q#?5Zx z40@QoAG7Vzz10-{*i7f|p2q^S$F<@{l^sWm4>w4k5QiUR7-eP|X6+4PbeqDz%NSX4 zw20b2$@)c0>qq6b$9XncPZtzjnra88+sH zOQn7C95#MKj6Dj#|L72koGKq8TO9b$VyVFJvwGDqZWkhM^t~~#0*v8a(p%Pk*;{7_ z7mwB>Pcc>Av|_iDf{3K`l|Q%c04{M4lXlgjM3(9{WVe_ixtp$GV2i;xdiATWeX9Pb zKwR{-bu)jcOFIJZ_c(==UD^wl%O2S17h)<=kI_zH+$I3?@<`wAA&T=fx`{*|m&?%W zCX1%M5!)?_iPj(bW`x*DCf$YEOX}fvNyZZe*AXiqhSUdXp}(-@1_ma(C5wo9=DTq; zo`xXA*~IIbM{)t*(A|r-WqV_MNzM6w+QyEF1l6*CT_nG_OIoFJA9We0SfKnEGaDyX z?^o@uwI4vHv(P=Yu3kpaJt9AGEsoz<;U(e-j)<>hJCA#~I7~B+dvHA{KS40OGuu6K zj8~{nKQ(SMmSS|6JARd@RN7p^+gBNgY=vlnK4cP&|Mm4lL&NWi; z4TgVc&bX~YL9!HD&PR72S(cq_jhy42Qq;ADm@S5ECQkY{^j?EJy&su(4Uv@g{) zM5k5~>M&Ef)-ab+G`1aRte_esa`JNs~xNhR@{*S!XDe47AM7FHDT zZ;K~2(FHd~;#Evq`?frbsB6vG!^w#As<6EzZAr*|tR)x^Xz|}O}2v14w7hlm3a(W?^-?% z$(lUE^E2753Sn()EOWIq;{hpI6&pOOa?!O;iH=k6q(x z!mwZ6#|)DpH%SihoqJRrgLY}0!{kKwNhS<;NhVlsqfcSYe1q0;tetzUMX%$md)0ql ztP%xMPuTTmo{Bs7Af_qZa$Q~_Ogi?82VA35$f8^$$eBM!+&`Fd3UfrJ7%{qznk8;r z8&PtvlSj|;&w%4DLP=Duj^@kRM-5(}T%-QA-_y(_%b9PcNd%w`S$nZyx_ zPrDD(o)9SZ9WNmj>ywU^-wG@$Q!}Jz@1vD(kf*Jd!gBvtf$ExlMt$yK_%>U_7FgJ@jSmxel z9>ZFI5(XKDu~2r`&`;c9M7k)y80*ecrS>Tk3qT(k%bLcmh1Uh}S0^E`Oc;5Lzo#lI;mXRjTRRXhaA;r~^`@%D(rfzKGP}m52u`>uB90H^SRDRMT#hLeZxN|eF z$-t$=OhaEF?@6YFPC2ztxL^gj#@L8j-xE^^m%9{K@WSk|R@neL?&{ zPx9T^1vGjzaak_@ZE=Fhkk2dJ^8)OAbW9fB3%(U26=fCWv-Gn(@-itL-r5TdTCP4IO6eVNKvOYh4H4aWw3`9P;fw z4w81Wd`P!L@!UwMa}zoz}~i`q6r&g zy%*{Ytd@#LvR;@MDD$!8%1#8ux)C*q=ly6O)LY`piXFq`1SaSQxPtHp=)FbCe{1hm z;CUU3W0*kKui^SeXwwXEz+0h^5T6U}qq_hwP^5KTg1#io&#Uq;;d~~`Q2}6i?;Ka! z%{h=`xb-NhM5Pmw6{E4G3=XU5=NqNs!^dK->_Kxh{YDibv?|ATP6*=luFx9r{3T;@IhfCrrCulF&0SgKBZo_p(FdEnV z{c7!v?dyovnBHejmj^lc3w<KuuzvKqgrXS$4 zi8BZg`ce~9MD(J%BW}o22z)QTKkRYw(v)QPL(>u}euStw-^RWx(k9*D_Zh&~=SbU$ zjhTve{FLI_8SZIC3FvR{|Gd6R;#iv^CuB3t(~5tf=N6}YDQ&@WB1}Wks(UMQO^7_U z#`8y4Zfr`lL+lW|*G(I9;((ZbV($3B;!XKnqej$RR11ELMLMsY>78(>Hg?uqLaABO z4CPq3bAzFF8||(F$|dV7IZy`a!=CzOF{w;MlGO?ON~7MQTj?QwLe!VnnAj_*i`Gf< zj9VD28U^Cg=D23%D^Nnetdb5&jS96$svhRlq9x-pU;teUl_K$zZ{xFw4MK6niTP#Koz_^ z3wY(3^Bfw?SXm_LwmU9v>{gkKMLaes$baTGtM*oUiE>w3g7&@tI;Af~zojzwgR?t$ zt>Uws52afHjug4Gtz7}b5iDcpJ;7JRHCNkN3qdOGMvWnO+t#hDPsvc$lvh;s<_a;( zTh~3~V4PTMpF_Hl*HpGJeLZm33~eWW>wG@fvX$xS6mpO2+^X^iQMPE$B%#`(>%tyM zL;1--a6+$rw?!NN3ThK7*nO5yhI0kJmOZe$Z_ihrtpS4owIQ6r`--`{;{sz z3hA3wI#Qz*@>~;JiIrDw)7|eWot!SwauJK?gvX3JN2;W_qCH8y$>?+Z+sfO)mX)L-SWLfbce zYTz$dU(okmBT!@dX7T2?R;ZuAumGmL>bnq{)5vOi2BA$RWP~?$&j|3u($@EROcV*p zKK=0y!5@04<*k4@ryd))mG!w9dTn+K)87Mf4b^tiPY3qg^vvnn#dh$|8~B=?t&Kk& z_yb}K+SUey3+kH3UuQu{(|xyF^TQ+8iTUP^!mk-}IZ9pWR?Glf6~%Aouq9iCpQStV zLad!dmHb1X?Pv8&VS2?G)ljHO^FG_xi-n4zz(Bw<(D)XRn+?qcaS+7GW*11MnS5o~ zy67-*-s`{5xZyd-c6fi}YCfo2cxYdEe#_F#Ydjn**9^YHhNjC$82Ul_Gk^Tq6BPzF zMR?kA75Gr;`jNm%?p1`I{SH&BE6I#Flu46q}oagNMy=~TR-eEzXf94 zl7zEA@&*al+GD#}ibkMB7@`d@N5NPQkOP#TkOFG#S0ZZao&ky#@7VJFRLoTH>OLDM zM_jWP1_N4;56BvvKugb6o6Q?{lCF!l!WQ`CmscYF1Gmg*i;iwg&l3HuJ1Tvju=Gw4 zw{=ft%lO^J^_xv+_uxx#$A{X-v`C=8-r@C92>GpisP0YOLvayPhvHuEq+!YHk)U=iUH(>PB7UdYi{teEX z=icg$e8P(gyTj_$rWSjDmkpPtRs5fTo(e95-nQ;fKRs4`2AwU#Xd`_yYzD0@?PwxB zXPqjOa;M}S>3sMHVJ|Y5o&GEaht8nLBGdmFqh57PnMLP(sjDO{iRx$<*WvxWjQs8Lla%fp)AgZQa<`U-{F%8X4(=Sqe}^XmuiiGd z1OdHbg_pY{95NBc*2#>6o;#ySq(#jDyXGctg$=?&BV=b>eG|Lp>ir@e_lb`G z^Mx`<@3>p$&7~CNbnKJ)tT5+e_?G_TEj{<$!MsamoXT9l1i`ZAom~@zTj7IH=#GTe zkX0ktPV*W9PL+OeGIkTrZTFSx{O$yy?9J#%%~@G*o9O8L@U+fDOX7jMY%$DDrKPvH zKC^wKmFeYP^k8FmZEu2qzEyAHMOn46GP>(<$Xddg`+DyBy-bz$A+g#qYEy9c0qbpJ z=V3?hh^PC#CsE*3+N$byYkPS+HDU&V!Xu+96($oS{MmZU41Nax6< zLKAnseN?T=JN1OmU<2=sXNCRE)zR^p=Dj-lzTVttg3&H0(F3U+@HQht*;fO7-L;BM zV8!3f%(oYzcS`fXPV$i&kR>s*uuvUikzuDCuwNigM&9Ye!o#t?QIlU8sdm2xQgQwRS$5u6TAkpN zrZk8`yg%Qn8(`Su??rE{Lw{!z@Sg-aJbJ2HT&VxE@OIuBbgC-5QXP1@ugV)~E$2Ha zY4o@EK8N|+GH*maY$4-^^IIh1yKw{dnA|d!}o;k{zLFc;T_n1Y4Hl#-M`)f^#F8fL3-E$;|mGC^q}uG z{t)WuyF@`A=ipv!lgQ~#ivQg?%o#b$P&5aHM7 zF97Yr26sY=aP#K5X&3VnkDdjaQApKl*;{w@KUCysE2^dPQD+qc&F8-Jm@5UtAJ-Bz z4I;+ajKiOPaa$Z}7vEsNu8MVu?6A21Lw^c$wHVSZ{>l!lHv7}Hg!HoW37@)%tk;5F z0{E8OzR!h!(T>LQaR|G4&n>GrFIPDU9vIlRb#+ghTR-x$8WLhYzFQA&9z4E?7>t7p z2!_CHQv7(kg&lHwme2KQ<14zr*H78> zS|FN%9Iw3*9pK%7YFjMt$jzzjc8ID69e z6t>Oy{Eb~PM{7S%<|EKuRjcgYUaF})!=L=Qrv$+)_@2pu&}7dhq|vHp0Of2~v~nH) zH67{CQX(86x|HDa!+eF2xlCqOe!_ka8@%G=^Ht{yEp&xd*ukL}@kl8BIO)Vv&3MHd zXMc%1%{mV-E@8C)cqpkl4q{ezG72fjboD8Jfb07{f3`qQm{7c2{YIHw=J7_kPs^ea z+B%X@<<4bU@(fORr2{9ea;`K8T6T$kdaHI_JLne5Py^B{bngi?KUoznVtFn`VO^o} zxaTA#=LqrxQ7uDKDKe$W~j?HG5&cwSqC9Z zxn<-0U3ZanLaCIP6T6wJA#RysB(AIk!*Prde^cN65*V)`q;v0BNz<=?Tx)}bDS`YX z6S#H@A@$-J>&4|mM${IGaM8cTHZ0j@m!#f$Fi(+|92snW}>PW3HZ*Qjwm2moG~se=~a~SlT}pkqb&w` zp7{WIUa=Q}CEtg4BBH550A&OR0Ou057oZg;QV3q z&AXK;@kRNcFtak+3tSn!5*Zte?lrVyz*toNk1%scj>u-0BV`GWZ;HB^0Gp;N6c0Sb zlB$+;pDhL%ef1g;L@k%$H$kj#n{zm*v6_4wRb4DNPD9tLK6Dpbq-)ckz6+~LW|hk@ z1InVAq;bsxQYk0c?21DFClc(fa!&H+PV~dVZ%h#GG*dFZ`V(woM7hFb7`Sd%d>FV- zSDwearcBF|v2PhxJ~o}+t2ehjs!M^e63RcYXz^RmssRST`I`>+ERD&VV>k1$+yVT5 z0XF6QSlq}t(*kiacHGiw$w+bY|B1vBjO$SH|AJ|3wemnI9Fw1~DmVtDJ2+oEgoBdB z+hB zuQCWHkFN07r6-S|o}Cgm#*fxB{sE!t@?To)qh(s^oe{R&y7IG3yG?UZ6=GmXFUUF+(O#6qDK)GGUnBT@j$KLaSleV1IY_x#{H`=` z`)=-R`;O;j^`o?PULu}GdQJWIx^f!K@VD7Yf5c3)W$cQ)xg^Mlz5ssqM8X1+dJoXnHs}+Ekw*F9L#8VbTN>|Rh&0|%;D!UK2t+io_-`i{X>R7PEWy{wSDf8ZzVpp+7b_)d{3B)t*Gya7%f&MGX$M%53EW} zFM7pa)c0BJ7^~Evz&rN8OKb|?)sO!pl+ViPEHA zUYkrOS8|a?$$5GHSqcBQv%9$sbiHpo>bT8iwl7Xn72)x0aN zjS`7&w&esGu5%NSc1qKG?FnkMp(+-@%ANk0ci#FfU*bo%xy_B|s}nX}mn2G5@|o`` z(M4;!&5isICAL5LV1x*lroSb?`Ea+UKSe`2N>+#%5}=;MXa)m#$QMXz5ksB+v*flm zvZGGeeBF{bSjlI-XGa&UFC(~>^8&Ue0V#~I!JZ%8h4fF67(2mH0Mi{jRn zPzSG!)uC3UiAa0v1c&_B$x9H*dH-6t%HqqpxI7bdI2RY7hXfJP&NjrK3?E;EO2M`^ zJN!*Iq7~bLW#4eDKP8+6%RzU{j%BbP|G!Ic_hzV*#ar1%3Y8cm?adNo@^$|UDeNN< zquEy;D@>7LNj7Bc*F8Q^7EFF_otrH5X1sGK&6aulHzYxKres)M+JyG2ZrzmKUM{Qw zZpE!{N3CWVZ98)W#phg3gQdLk>z|$A<`@*4zx{W~Kq;$yrrmLbib)2~&KyqhgsWrC z@MX|PIghS9Xe>Q3w)3YSnfCCZ!_>a8X||rMD_=$P*d|!Qd&vQN0iHy0Lv8-yykb}q znsEP&29)--vtQKM78S+AY}0TgoMze>LV4i~)DMA0njN zyVi~%SQ{^Wj!`@wWKNa%Q;(Y>tjlMtP8?Szcq%#4ZWcsam(2OFG+Y_GS30ZUjA%W^ zro}qBe>r72F65#!D!N`^B1)Zsy?I~6RNBmfwGInQ;zXeyo}0hWbYzNyIAHq zi=GGGv(Lk%73jpI8GKG!jlW9Uyi^SpF6Pggt%|R5yhmQz1T+;=r3=W*=3fwdL5<$n z_)ZEwMA(LB&IoU=6G9&GNG=MsA^J z8m^KGnYp>BTx3Zp3OjpVk@(yk^g7g$faHTv8`6wd%(=fKQNauGza@RwqnQl1gJJA9 zZowCX_BW1fwmbG2*tje}1_P(8BP_A9D@z1Cx~sXg zeJ+XdI5D8}Az2}L>!+?i+eAk+uGg)aXd|AWo^(6{0of{({P%+6C z#N{AwI_Zh`OLV_op9y+-?BP4*Pwzvgszti*NSS6q7_1Itj>f-yH*M}Smy=t^4+0+x zE!NPQeiGWg1z!c%aNkJq0oyUNiEe0%WrHtxYGKR0&czm9*j?xC$hPOle#r)EoR|bn zKWEm$@ewR|@KkJWJ{$zyyL&vl+szx`QJ&bqr=QsLoVD;qh!;d98*GkyF8k)CSQf<4;FXSiT5so_Z zZ3>W}H#MGKijWY9idNRke45hQ$=-Tw#hwc;_pCrX<$`bhmnXM4+iQ+0u6ef?$v^Kj zSdVDR0j!#Y@8?m~S4Ne*`)faz)CP6!I_a!uWCl+Pkz7eVV~i96 z+c9HCIEck;+c%iS?4h;un`+@6C(1l&6S=TgVUr4``OeHs|6jwXt$b)ap6MSM>&Pe& z7K8ZKdGuTwPpYCOY!@M-vWkl#N9M1SnHSd&cpNfNb!ckmF(9c?s99Pot0`$%QjZbB z^=ic^oC*x*Rl-xZGj18z{u(3dwpDWX!AIsO+U!5@w}pN<%zBU6RGv;6^bgiGL3NizVJSIXq#aFe|UQfptzcK-}_B~kl^kTAh>(* z;O_1g+}$C#4G@9^cXxLQ?hb>yGr*vO510M!v(MS5uGFnt^?g;p?)mrAYpmC*wO03f zx*uS8^hy5PB-4hPmM(+^%ZrC^88#vlsE9XPe{nYhHwl1?n_;2%aOX(T`MlUXxde+Y zk5Y4o^8ciYw0v%J{@CH+csTbFa|Wq4CQGcJTP0Jp$egjMplKKgO* z7T2~o!-bK=uhnH`>A)`%{d5lfAR4aFXcbeuJOB6Tq6y3M#?E(XYn{CJi01RrrHe0+ z4;x5|1<-9ytx)=(B3AzDWldHanyIRd#s+PZPjdGPURicvH~7;C{1p6H%}VNWV1hbm z1PHX5^$C%ktAB2yhQxLy+F!IWL)fn&L+`ffk74}$sWduUh8StLigfdJ+yuAX!ZzO{Wf^s*L@t7Fh}hjpwgmb;cL9w zouXeL$|RqSf0itWD8Eg4eS4Q$G#$@_iyN}&HUi?^v*6uxQ)e?w8rP(}X1{*%$A)4q zZV$DPtx&p9*!R94+T3Z^bxNN?bZQ5!ZC_V+Y+!dKJRqMB3T&s1cW$ecFDIqK`}u$D z9?E0U)tqSR)h~U2SpMe$0>7xWUp4+rtCyr5BuRXi0w(cT+w2HPPOa=uvLH}x=H03({3bX8e`(~7U9>u>Yf)xXx4Rs4 zc98NPqni6moiAOe1Zx3?o2fNLGaZlgqTwjnh5>dBvI5m0Fa0UdBghHH{$_PzTP0Iy zvmnh*WrC~JJpKK(eEIUNN6Td9PS+SG*!{)Fw+pws&u=@gq$jauI`Q7r%gn#6BTL8z z1L@=*j`wNP6@sDvR8!@*k(Pe6MD2F^qv~`%#;)z+lm2~O)l(oh# zu)cFNlb1GbLmAXZ{k2%_Rb|2Okr_Ky`I1@OB3%1AuB}5-e8nc;GDcSH>zdLl&a8`8Is68&&Nz7txzPFK`xVL@avnceJFvXtlmDZD zSu+QF%JGhmw!LQ_#Bs|j-==_BxrhZ~A9$3jHBhZs$Fkw>@s_XAlzXH64Z*q6Dc2MB z%tyIm_qoz_S8MJ}1Jt0Z zDeX<5GNRkO2=OgP_Th0Fqwfo`q3-=kp6-<|@fOG=i|OFlgjjhVPTXL8cYvQ*X>W)h z+fZ#EQT;(gz3V6LwVIc567JsxB~FLjMH5~|jR{@L#uN>h1_Gr8-6PJNU(M6Sp@NJz zR=HC@tT*z~nx{)bB^YnCE4-u2JaR|Z$q#XO1TpQ^YE{`hdy6Mb6nlfMUF;gd$GM&o z6P9bn!_2s-%H3T)GuOMPHb&dmAFYTlGPOAKIljd>aTBwWR`TQy(m2AQjqg_5$)446S*?N+rh1qZ4& zcOQ4%`#K>Ftf9@L^~o6MMau~qxdh09q!E05DK-_1d~?S~73u|8yWd@NZ1_RHyQdv` z8=)r=i_iyOLjwo$>p}v~66AoS1&i<3oV*ItPK?Bhv$Z$Aoyq=~CimrdS=fE`CZ5U3 zuP?1WI1t~}YmeaFyTxdD@6NePkB6$8aecDG8B5K}m}o0KIl_Skr&U8ou1J3^z4ZP@ zX`=N0T4|*8HJ`Q4p*MG2KkhcL#NGOlKjFGVzK@-Aw$t4b-Wm8LZVbwkTJ5=-c$Rlf z?m?GWY;E^o81-zK{=0g(tMrE18_MJNy`xv3)MIgI{r2*8e~(r^?ZZ$~(RiSNEatAl z5ERs(VqhT7y$Wx8X{$f5IM)Za3UwQjJ%28|>c$fgS+gO2u&8SEVLC#8O;Gc`x5F%G zV7pN{Q84%oTwg44qi#G;oJy$j?ks0KFlq19MTG+wY-MKH7Vl*8=4ndaI5+6~E1pCd z@Kh!3>hM(UTek~n#l7*OG1V;>Yrxd(()v<4GP6bS6wbE~E$CQuZtdh3Fjp3ky|sO) zMvtVeqvWSJK_9gc=O^Ep-E@>F$t-l&NTeVq%5_d2)uyv3%_#hbK(JW13hC88F)vLo zv`=o+)=x@l)2;L`lMJQY9-%SAUb8N4pp;pSVYsteEAA=jDOR3h5r0ZheuI8LG0AyU zhNtn&A;ilknf2*D51ZMca7_2OsKQkh0unFR-yrF zOR^gsY-{w5mh!hcBWX1M4RNb(1<}BR54P6#OWD?~ zg;KMuei31q;3M(TDUcG|IrMrSM51_PZaPMx*dL#2(ELj8+Rb<^ovN5ze}1tNsXlCi{GGa9I#qtu zmThHd-JrlhWOLz0{{=$rau>T!DfzaJqVxG%pV8_${>t##xk28tJ*{aZz(>UJ%*ElM z`o})C+e(%D9g&@8_luQ4*-tNwv+Sxnd-Iktd#m}sQIm^{I##x(beHRMmn-b9ikY^` z&zzxw-`M&iO5{20vWGMkUl>5Fy>g4RdW#edQf^N-{x4FE+f$7%`K_thZE@M@iX_iy z!u|uQuzc&8=b23IcGDSR?)Ft258Y#X`urCtypzRyV(}#CnXX%Td0BaRABo1hWn65f ze)Itrg~23oHLQ2<;IVpO-@PLv+lmdU1AmSt7gG}6DY3N&?2IO(a3J^8h+@)3oqqD` zE>1r>ipXbwWf=#zp05z^UzPP#szO3LVTNjKAzATgJh`6)_z9iG` zuo=+H$YUMm;K^)tq?K^0q&E&u!>09`Ir8Yeo5rUj)c zoXZiW;G6C}``X8VK(%}NHCXOGMo0R!56z-{mZ-!z6gQYjjdAngD?=v&XP8>3uRj!x zgLR7iPz?Y4zDUxs7}rbyz}292ZF^DJz8|#i|5h5=_4X*Fx+Cpa)a*%VSx~L1noTXR zOeu%z0b#@isY^9sd~h0JcAkNq_`{!k7Bxne@ORht3QFk{6V{p1{id8spo*|*sZI;d zu(w~ERUKYsU9^DgT@ndI(t?HK2M2ckNz;~`e@~Af-oe*%P0Kk0Trk%SdMC*8uFx_c z%TDMk+F!UqKVi*s7!rf8l-^aTgHD0d2~XiIf<11Z7Hw~{{8a29s=vG6Ki>N*8`xoC z{{^C^6J}Ej3!DB(Ao{k_)xZAv+Plmxg%-r~z^9zJw6V#|zagd!EBQU3g!dt?&v963 z`Sifn-G!^2Cw)D`4xmmL<8(pZ0lRDbUM@tM1;6NzTFQqxW5Io#KO#vJWmC6ynuLDs zpGmty-_N2t3&4hU2z}U$cQ>%(kgxfJit>+EA|?3i+K3JHG}gABa#2ICde@tM4OJkA z`X-2j^LHlue-f3+KKi|bYS#?jY&CB1P(n^6ec*e_dsSlsx$uD+*t%^uEd;|}%?nOn z7(!!A+c3r&Y-ci9FSIZ}Yf3KTzu88-gvt1k;E15mr`<@zSNor=eXZakl>Ce!nTr;o zfP@;g#dl8<3xgi*^nPdGBxl}va#aRV*T@ONIN3I!-Td`X1O7JDjeBu&I*EThCtSx~ zp!XUL58i4_xEOP$g*BTEVc*|fc>?r?>GsMXi zT%a;d2>EE%=gfz#HO9p%v@jZKU!x#6uKS+AzgjNiQbN91GX5HHo0c@I8)9GHO+CHL zJ-vkfJ0`H4-%DH}Uc68InSNQ5f{M6`(QG^To;X>-tQY(pf0-H+b$l!rldmO!lp<+x z1Y=Jai5J)9Emv4_(WgyBnb+qn?;r^Xme*-5FlcZ_t52Ay*EcNFDW=1?7?X^Lza8-g zfttqX9Ur2NKYi~5)nFy}5MEGqz>OKhbs*D|!EY#gdAle}IFS9iP1bw!rcNr|EF+n7 z2v^|0Q&l{9QBQ+@EGXsRSUsytCkV1{*N5!!iuuuomoYlmUyLy3<&BHq9-t>VV8*T$ z*X{c^48NO^BH8n%fi8d3i;KD%L{px`L+IOb!b73>48vyp<$Dhe9`dz1FNB=fC*Ix` z+YF%hQJBOw_zW$K7%ecf2JtHJ;d`@1?P`4uXKI%-U7UV)V2tz~F*XW>67%U-q@VO1 zcC>R5EF@W*gzlTZWyS0Sm7eY&b#nalU-kxlercoq{@+c6*lI~rm*R5&2D%gnrScgW z{|oYqM|MB89`iLwaHG zwe~fGFx+L=(Vl2Brmxk>An20i9o73?H>y6Y6ofe=-tix)EQscPL@9{Oef&oL8UL|k z<7qI5L*jqJPFd@{ljgRJZ!R!{63c8{TdLNPV z7dg$3j|iXtC#lti@CHpFjm+mLjEFqgy#IwXp?vx$QQ=9{kHP_-u(O~4KHH*w=rQt? zyY0(E>Hl}qicI8HYl}*?=F}>z=z#M*X}nsh@HQt#^28OT!JKY2MJRt8@7MMJXUL&< zv9_MX$-Q*S*kiibub}M!f#>S**V~J366j*ztM|Pj>VGo=z0v;W=T|WXAH3LaSg|5x zL@@txo+-!If(-mOZqc>=4a-NCG(kWSYW{KG{||D7m|PnK>(Q(4$UUw>#;^QQ5vvz7 z^g`_aT}Eh;hijQI*7oa|L^k$g*hEqSF=Y49{tZh*M9HTQT)kA=TLXF>Dbcttx4_GQ z{Lx!xJqP9dOLO82b7G8*igdd9r*A?Xt5Y3mSyc#^wsse`FaOLV2dIaK=F^>H9>x4W z|Job!?0%bZt{XAuB5u~9hs8|2@4oYdm^B$*T>kqWFhX-IdC4yt68+od(wWI%a$v00 zlLwdiaA#kyBjaC2acN2AS>;4mEK1`4t&G7RWjF6-NI`ZnV(LJ;HtLcYa4Fj1QH^YO zj`*94JQRcZo@icAWq9V3Epg(Gl3-l7e=}wt=dug97KKmVOsA6Y&WT!hAIYKqyD`g2 z1)HFFiz6ej-Vr4#7xpWr=K=cPba@`2WqF}p8m8Pc%RS>#K@l-uLnB{%#Xh0ug?v;N zNu(hQf>*pf(trM7pD^~s|? zqKk(_kc)_+_guGo`u`p#7x|9PYV-3BqeNE6FN^T6reqq5AT}v+{1$VUje2-xc{5qq)rgqB@e8vo@c~ecl%-?2pm{D(4I;Co9?; z*zq#_gc8yrN*H7=20NIkzb3FR6n0eKUKLyIHF!z@KSy*h?!qSK$ngizMVvyEh{&)c zBecGT{{v6P+6x^lsR&O`#!JUow@k-~NS6pi{+;)6Eq^GOE+P=!b5(FKaXtItNUY7! ztj&Hd=ws{LiK21_-T$edZ8?Iu}kij(y%%LzYmilFA-`4s2bOObV}$z7;Od0tUmk=Ync_RwH2(j6`J)w#RwS?6%!O5dE3|jXl(Mr zQtO|hB_vltO7(v-E9;Y~X?|S57I6zz`oEIHUB3kN;TFq?+Bt6u&Nuk)0^zsGW*?pX z@cwbOR1;UWB*W7A7>ZMuoih$Q`}-%iZe^q4p+9YgnsGPU2u;h+?AizV5~2Xg?XMoC zev|8tsIJPyO76rN=WD2%p_3nnLr9Z064rzeO+%u0T$Fv8qNcI8%(PYuJIpj9>(-F|9dvhpLTtj29xE5R9=xQUc9uvhOitJ0wqO@ovnBy)@4EGy7rn+FeOst=I`XRs%lC5uO|uqk4}I=ZMlGYuBzKW|B4bN|`ZpcDDmjf#8; zT;sM%35;7V$$a>SX|!{t2Oa7b%=4gjGDL|UOV9UNA;nu(o*%M84~>xcQ85=#>@eiQ zXj!m}dR$X--H#>~5IQLfzER(HL4%@>Ow~j5H&WGUzh@`a3~AJ0gr3&Mp4MV`(Yb47 z9f)NpC-W2XF-{G*kD8c`{#qjQGAJCzJC;%B^sVg+E4AlfFvE90ZP<=G;~+942wWoT zQ>T^qdGqB8S-BjL%YsA_zRmKU?K7r?2o4}<<|1mvsskZ#2cCqgLGQRHb#sU!KhoJ_a6nr6paL)VVcUYC741!qc-)pf9ykv z_@l4{_h21btyRfJSrZ_itz(*esPpdRZ870@nx&ZsoO*_84+pL(Bka2zhPzppDS)Bx z{8YQFzZ06?Ylxt9z?VxI{^L8QJrcUo4V_=HeJ5VHYtXrRxR$EiQ_wevgYqu~h{8#? zU^7DI=r(17*pzTag|Qp^$`BTEc;|m4TNk?_BdO3=^jyI5e@ZYCWKh$sFw;{k$95M@ za^P0yJEL#mI$U|j>l~21VtjYtN9u`Y@Y#>*xZa3sJkU(3!u>vU^zQfW=);vu)$>A5 z*vj#Z270kjLC;@!A4Pv$pF3_4Pqz1Yd3*(<9IktP8tj8j`FOLKilk~pCl`Fo0`qTI z!yl}?h(=@`@2h*s8_6BhqhRgB-KR--ze%>h@Cp}s3;o@R2ZT{?NqZt>g;{KAd!h{f zxxRQ02>q`P

L0Q7=ad3cC^2xC*#kej(~hx3|zp;16#@y!$9@EojL&rl!$v*?x}r z+=B=?yieLJ2$N+WFI__Jc|@Lx$oyl`gnas^NG|IopHI>Mzn23-p|Vc^e|x3S{)&Gi zmpxL{0>k6rpPWR`Z}-2FdTqb*uH1j+F|batpyhD$$Qz=5e%A)&8eiY-2O=LH3)h;-A(eiDEjJsWQh6#$_)^>sSf|& z8@(IRI&~rpZr?`OAxbCyK~I#Iit?%?-gBZ3IFIzsS#2e$mgA|Gqk0@>Rq2H&?fFIx zyhIfx5LkL)w)voyi!aC3Wdb5Yu)g7iaOr_i>w$3TF0=E*98|K3*^oFzgEES$J#yci>RYiPzD{CxDw>k8aAQN< zipOY*8g;c|uB^FAp2p5DkP_ZKT(x&X)iy#BIh6vUnwuZGRHXdXH*dhB-|&mQpR-}N zcJ@}8%>jQgiarLI$5X!`KVKjwzhX6Fje_22b*$C#A|~523+7z@+^+iQe|5L>_}3!Y zY0ZQ+JV7g(pD$N!K+~sm6_u1r2N5z4HlBo$W9nkbSn_xVh_U#L(#~IPRxFGIi>@i7 z;vTqlCuTJ*x)`^iT0Q(S$`*R)Pow1+Cij1{hhg%cY+|gOR-g>M4q$R}Z5L=ou3XK3 zQhva-b00Fl4RRgif~D+|_EQ8$;e`*ymW!=Y)85jaeXzV62R9Mstao05&2yWs8bJAj z3G-S~G1Mh9igug7WAj2>fT{EMhj8waBX>bP9r)yBDzl1l@-kU@-#<|X-%#RYbIRbj z&_N>8|1qLyvAq2Ea3Vi2mpxt(~H?@gyACVF0up6OcU39=h z-oG?NN%8gEV);b}KuysF_{LJEX6vZ{PYeP!y)EUe=Pd&@_|<({s3T{2wkC^}h5git zONx=!|IF?&TgPS(gCjGW)v@Qm?1^jHG}tn}%)**0ph1Q6o<{1$T&;o5JQSSjkA%BM z_~j<}W5!@0RhKe#mp{6*v`Um#_~e?KbVkg5@iyhy(2{!GE*NDN+}GeK<`wy(@vZeG z(WPX)$UKu9U0MA`c{BR$M$r?n@HQXNz0gp=h=z|n%ZIdm?=!n!gdj0l7eHl9`T0Ac z^0KCeADBMG^u&i9dDi$ZeuTGU{FW-opIRv;jbffCK9034cj>vaVpq_8#$Q3nq+w~h zZ+r&*){PveTJAOK*pT&wo{8A+*!ZXP2)`JQ@Wb-sog>L+$n zodU(4>o$4R{MUPMen$)DWqP8K1R=%K{c~U_UMAY?Eo!7~avc77$ix-vSZ0m3Y|3%c z_zf@Q_ygp)@^sGKxiqamrK?wzqh*@#S)+qHTp4pQ3V1pLlpw_BI*3*?eaC=R+EtgA zS^dL?Y|->0Q;L&NlwJz+Y}^&^uQ-v80Ip76^)`o_S-*U0Ae~^aN>YPn~)orqC0i-G|lCL#$1g#I&-Pr_Eyh$D*FsJd`~UnN?KmW`m4u!NJb zNjJ+RE1Fh6$nI-37lEK#TFV6hU5R?QH9muza@2o(m9fBl`KV-03(MrZC)~7BaQXf49it4g2HGT;{1lZz3o{+j@T(xTXMf8+2gjv z4e4XmDU{)yU;ya&5(n%qWg8+Uo;F9}*@A4q;n&B59s@Fj+FF-xSR4u>h zB(XHmyd~r*t)qOPowuWW$i;7ib4y?JInemaoQT^7@IWV<(gq=J@a!E$YT~Gp_n$(Osk}`boNRoo@8@TyiR@{75dF({9Sx}gv> zwK$R#e?-qAwKuT`UFb2h23s77ia(-7R7d@ctktB?JVfk_o63>{Jc&f+%^HC6#sk`^ zzc!Lw-!VT0&27lRzRqZ-XS#V5t7cl zns?V=7*YW*yI0shZK6FZ-pTitasMhdk*q1`toh1WV|}*Ys)ep`R$cc6XW;Rsr)*)c ztdYyNmO^XZKr5d>s}T|=q!FV2RE{1YUMuHUW8GM5T^SQqt2#csKQ3izUQnwXdRTVC zX8tA8pwUQ!fQ9}xCaqwR+|cey55di|O18^;uUxU~!=BA>)g@+YYrh^X8?;jpBn~%k zFP>;Gpf->_jgXxO8U51r<}Lsf)hoWm;C|~bI<0KBK&YI-6$hg}5#s0S<;o;3n44;g z_nUi<9P#u}VyRWkQX#?0`(tV9U$5Vuq$;J>2D;bnnLW%Relg>UG*D65kweEFT%6il zWE;@#B7CGfQ5iitJ+(S$>@jx-R4z-j7zMYEDg3D;nFqL7wL=~?Rm%Pxx zc%`B(xk~jG!bV4nD>0@qqC?K0YJbeUE)`N~OIAIP@LhIUW|dxd4AcNq7tyL+M`+6l+>&)K6Z?Rw{CvS zj0D&giFNSk4$m~E8}t%$7lV3pJjgvr^*d}X%lcr@4&m34Be0XSh2u-EV=4?(k=2-@$pz|!O+4<8K zr-qN0WElH9BoFf>*SNO!*Hx@ImoEIVrhPTmw`q)T)zB@kMA8kYwNK`&Sgk;DZeAEU z`EsZ2q3bdB0u%rWT>V&BIH5(=>8up2n?A9)O~|^9;MBv>#s|`PZFu*3&rVk!R9npZ9wfwV;v0vhnv%6`0HZ)}5Mq9Okw zP)vCxCXgq;Q{9Ai*S#FZbnK51`p)5y0(920)lemW@;@Fe&oNU?rycgOi*DQ+bgFm#H<*N10rv8+9~ z3&Dh_4m$b(UV`GS;_3}KB$hExKMi6+zWdZ&76`0onev-4*Ve}zo|!?|OetcO-B`m}vTZ7D zFv%ZOMCexD3mAYMZLgL)jBXD`%Z@_QBcAv?4lcn0}cRM)6ZvAeJ^FaQb*CW%b z{VDe;cX{$<>1E9p?0;s@$=6QC{oUQ8xz+gEpL@sO2I73zvYCq7D%4^zZhX$u+)s2o z7}QHdJRjWlpf>$6?O;xr504`oF&UXEo%usKYc;Bi}+g%%-9F_i~TJT7qg3nEsX2V zF9Nh9dg*F{rF9SDDz^o)tfOCN;&dt@rNMcvyyGvE z(FYNwJk~tj;?@j`9wqjP4~gx4g<#Zjn%8yJN7ob0{auIr6WH!oUkm+$Oj4s0FoYWu z_(E>R&G=?x0@9n{SWle^3?Ka$^BD?Osgb9a+iD2@8N^JV@5+ zc|N6i%eP$25kO^+O&!09mzZ+L3A$>)IKg-{VmQ3!a+TSgl0s0No(5J8%M6sSTLe*Q zPY^iOkhjk*uJ3n`TIrwFk5eAyh)v-Ain{VhD@1)E{@N$S9fq%yY9Dvh$bMmX?8>V0 zpFj_WIv=j}mD8Mq;z=#eJ<3R@pUkAJN9dmC+j!Hoa`S}_pl^P9ZXxUkFJE^ry8%>H z?fzMZcaQpM%vaIH!rh*a&v6SqiqT@w+03G*djF;sC?8`sba~mH40^-Yx~a5$iCHQ5 z)aUnT_7Hrzj=ovn?VWH3FT>7utG9E$9w0Dv;t-I-?<3ZkP@~(v3xZ@rOBOMKH*N>F zi$W06vRtW31+5987G@@uzDuF5awe6w`QEb1SM_7;$XEL~-PqTMDfNJymtb7qR629g zmkZM7E$C`9^7f<{y^wc@0It96=3<*v>fkeQr}>iEyS3$}?YI}*O}n|1w=Zk)Ti+Q2 zKY2d{xYfNtC=lbpeWr@RVt`LTB5X;|0?nDD|5CoIY8)yGTE4!yKnLn#bg%MY! z^Dce=)eZMiNzqR_p!DYD&>H6Y)$Dk#`s}Xt?c)nw)n^R)layqvR(Knu2sWN4Aa{oR z0)tTlXZa@uyd{EVrdH<dko070K4$!0h;|^Jpv#uK>rQS zGmq+9{mnz-uEUzE;2$PaX}A*z*G}loc@S42WbqSLg2AcrX+*qMc1yvfZE_tEx4jAx zl1}0B5*Y-) zj5j%t8h(2Y2t)^Uw>cMuaIZed=2pWX(+4QG^t0 zy-iEg9{5^RPe)d?YW3y?I;m9&!{=C89{l;E?&90?w7M!(l^_mKQI3{U?+Mm-ILb;U z;nEzy0w)crT0DQ-yKmp;q5Y|64_pDY6ax z5CJfb$VRLEE3gLQL}Yw+__de!k5h<>bZ$Y!RYDYHlG1iTr*t=6S5b|Q1b~u4!nhwG zBF37Q)tlm3e#rCcJHHp6Vl*oV8l#Mztc*Qa;tdb=I5v%%ruL)- z)TtXSWY7FI(zLOpI9Q?rwAMAG`u77iBQ<#*BOyv&E76*8kD(|^;=B>)spJ%Fk`e`s z6e?p8^LX*i%5rW4E%R(ls;o?lN=}U$m4zIQ3U)ad*7++^_|}+%Hq!Xj%WMYh-#c&; zF4HJ?Ipy)qXeK_q6uSa+;G$!qj87k>c87}P_(GgAn0CJs?%orWeQzGJtOCrey*mc6 z7SWHTO!lN0h$hzqAJP)9W9n1+Eu!l)$tO`KndtVPPfv@!Tg268%kaUoJkpOIG1KP@ zNlFC70-!|rFFbfJ7OHKVYYqzN-JzR7MBS7J!O;(ng8Y@w8W(z7hTpmZas$ZVpyDai z`>cv9Ii~|DjAW^d=z=gJ%H6~Vau^5W3u?gbK?lVVXd|>yZKfNgo<`hUIcl67vHnOS zv26kz#_#dCzJ0uX_j&*B1}s9~`D{?q%5AO#%U;PWb9?>Nv$+Y5d%iO{7dJcC_7#Nl z3cE+mT@Qbetno+@Ldx2PIwlfq$9>BhdQrXrKqT|2O`lmWjQmwn4yjM@5935jFQK0I z)NdZ4Q&<5A&I)(Meb+%kFlL5VhP_TH$5TD=sUYTWLb#8JPI5R8(bMJJ&)hSs&Cec6 zhv9+}MX!-@tjE<5AxS&Kdr%3Wdt)=i^eU+v&d%<(yU-cx0p%hZ0l%E7^}P1HzGFPU z@W`8Lf7TyifQi> zLSM@QWqANpHQz9`ay~Ik!{S)JN8|{EOZ)R)-YAgj`&)4M5Mm4@vVjy46jKCk0Bu1a z?TgVd2z%$Rg{gq&uR)CSF-yQEuc=-`2Zf>=u4zbsOThUezI;&h^b|k ztf_ABtMp@hUL0As0|fH~1s9$Xo)Xfwqrr10Aj7VSzpHT&3Wd9^gT0Nq*47=LD)Lty z-!S)QuY=ot|4L+(0RylJ<;YKGJUaN)W_Dls0=!^pTZR@w3u}Hq*u^KXPNKWgGacdJ zxQ@MVbNhU@xwZM1PksQ!)#m9Y(&`8pc1#NQ<7_CV)Z!fY)0F-+1e5**j;K~3P3*Qe#Wi= z_`RzEyAEAA199ehg!N4yp)Mq#0P!)T?0@tzo7Q^m__)YBI_| zsrW5)xToC?gW8y5WKS zH*wz7g$P~!u-dbM3_d?M$^&x08;uH&y9_2KmJm16;#=;h{@Ri%+mhl%`+{vH@w34) z((==u@u^hYftbq{bLOAex{|&kt^~aj}x6BQqnzLU1MVbS1e` z3(Gwg2A}{Jcals+&1dwf*H=?^-Ej?nYQah&m9>FGuRXoZOW+H3_V%ygC-sMCap80m z%`bUVc0>U(43DkLmRzLX5{E};I2pO@>#^P8#qR8tXkJ&FLV!+G`dYYn$SSqJK~YVX zUH9vEf2{Ni3XQc;iuM`Xq4tY$gLo;9PO0Omc#!JOH3{WUnG9owh^p5-M!lu51$1!yA)ImbGgOQ>70DyZts%nfrN2c;CpUB1Nm**y1CFs_f;#C%hpfd0C)Y9 z*Chz<)Pw0;Nc9AowS$khb_H~J!G#KWK%vdDpGNe%rA8;{+(vz@hNAorjXNH<#Ixmj zE?~T|DK^>h@&4ODSN07CbTE~{dUVwWbw8;f99I=Cv_`unG6%m}+FSstpt!ffw79x4 z@Fd^y*2D)oKN%7bCMTHsbS`2a(7?49Sq>U$cm1zU8REm(XY_I1#o@QUAhPW5$ehf$ zm#(Y{@|xFZ+56nh(b=MqUSu~ z=C9p;+6o5{rX(_O%_MmY+!=mKQDLlH=BAao%8J^hsHa~SNCc;u3F*JfdVYkgx*dsD zYvR;k9T?Qv2ZSy{`dSWox3eTu&Qd0d@!De>UQnhqNSXp(!2+LNGbJH&mwJ*y=M%2{ zlkYEm7{XF|uI}K4e=I*soEVxL@KZh^+^6b+`cV9oJq$g33JhS${YC9wM4D7UVqdCI5DWXy7jWUm(w zH+!yfH1$CX`_5jqAdfV|9k8jzc=&v$?19g=zHoSzN}U3(L|3ox;+P+*IJl+6>xh^> zV8Q)JU9UCx_$mKRtZT;j&hQ0$H8-AtR2{IL9UGQ?1#?rsHOW&jlw68=`V!$|57IUck`pZHCrQUL)B-S5u;Q!`!Q2_1 zM;ofGskFs8C?uWbJ0&RoK1u^`Sp|FP^ z(RXs+S56k7A~6ZPPR;ZDLL`*0>dE-^+2XP`V|AjSYL7UeT?Go8b;Xu$c6NWJT^Hoy zJVdZ6ew4B6aOe-(N-TKPBZ~x zIE#Im@lD6Bsb^0F(8{++_EAaSosu&C{eI19p|8Cg`3nKNDhP1#uBd z>vn@Q;}ub%8WzHa_gpMYrydbYfvOsd0P1$Omb5Zsj&{(%7z zes91avzfSb5qsR3*91kPQ2Z0A#?^K5)dulf1OX@D^VAI4rM?V_cQ^XrV%&kKS6m^+ zR)H?2W&&{NumRA>G_*OF?;CfI-9$dqc5LLGemL=U2AQU#c&LG`KDPeAECAW`KoIK0 zcd!*-MK@GU1D(9vovGf*IHI~6{_=DA6gGS5lmAWW!!!36w$-g&n?dC9{VOjni9d96 z8yzFr6Drq=*Erd%zy0D!RqoQ>t~a+OjmOp>(ylSHzl5SMNzYEDjui0?_Gr~KhH827 z4Dqusj!Rdp__}v}UHD})>5;zLkK0Y^MC5tP{_ImDBeJdkEj$B}ts9GfB0%rOZ&psc z0;0Av=WOLuvGwfC^TGGlt!UZIAOY#J7=r16_eH{~^X&iuuRy64O-VOK!`}Bt8G1n5 zn=o4yLJc2%=$wJMN@Uwwz$}iPCl971SLsplQ*DV+e`MfgV+GZ++M;g|{%AF)c_k0b zbs?q|rr{!{6P>vQe-T~$JwLrup@{M}=GR@(3Hgnl$xO5`zISS<=)_zY(TOZKO zHIasS`WpHleqo6Oz=7HQ>9GAvv2VC6rK;(T!OUp>(UN}BbSTsnjS-YS64yDL=y1 zT~&Tuh+Qsi$i_Yxj_4(d?9H&ztfANHq8W$Fqr$ zyAx@>Z&p{JXMJ+bV0vRB=SBs#fe)AzXszL)ic{WB8@{lfICf$`YT?F>?TOhjLKo?_ zu0{uHE9C&XsZwe$$|qUqbE_}=3_Yo-n~8I~eV}hE4cjmW^DU%cudAtQxT@mT<)vHq zMcJ1jzE%0$KYVp_$a!D*0oGuv{3%EZO*uGd?{m>P>TIwp=hxwnYzsm`8J{JQKaGCa zpBJrYDIzf*;N(oiUYxJVaaYofFl=U$UR(`<}sIRJ6cb-Nt)@E~p z|5n)5_K=rH;Qf#}|NX5r^+6yw>%_34tEi~%F)eNI#KzkDLh0gX8afhkSlRC&;~q!O zgoVX<1`R{idTl>>Mx$HKMw%322Ic5r&ETXHp`J3M#z(e$hI^4pWFTz%LYR@wMZ(Hp znXNTyd0lB|;3w@9Tn#hTC+#G8Vk32~RRpsznPp|5Y5knYzKHtijnb`>JRhGLwb~F9 zaT#uz#qUpVZrF8WVOOFPr!u(*xd-ksN@LTl5Ubp4(aKYuTtv~zGu~YHT>4Y*+*Q%_ zv&U_=Z3K5y4UhWty0&yblJdso!+M?c^_Yc~Qy;C$36f!JJBVof7&-UK+(W5=5Bl2% z4wi=0tKfQt^q0sk9gilD8ijPf|H0Qg2UpfaeWNq6ZB8b(Cr&1|ot)U##I`lDZQHgv zv2C9i_dM@c?|1*Xb*s92tzPO~XIGu7-M#v^;+)qALP0&pc`cLaypaoa$-*h3PA)_F zFGxTDnv8?2ly&6i7P9av7mg0<4E@c3)vq?4rK&VZMDs4lz8MqzC>v_B3PB#T0`6aZxz@j(8e3KI zcBooT<=WO?wXN*d_&*%s=isLr*T7TGVHlQC0oCw;D`;QebHVYZ5Dj64W8vRt)<1wu z;|(F&@(SKo7z!QDxh^(>mumrQO@3?2@!3!p)sPoH=;wXfOWKR09_t>lc-q(CQ}>Wl zn6tdt%|uL_=rXae#k747srnx6`md`2rzP;k94Io3urfJPMgQ2?qa7Fj0DmT_vijW} zLL5%Oj;DMdXAA+QrFJLkdqeiU66k;U8l-^VPJFvn@AZM%U?gwVld|vOwbxS?<$-&s zAv@6%o$X?18ZE3imvz(^=KCb~=N~0DqUaNXhP8AIFv%K-}Q$oB9sg1r6 zTgk9H9>})H3Fp4`L*_Fnvm6#@aj#_x(Wc3cMaTb`ayn7N(~f(U4_M|WPMpKBA2|oJ)zH>5bBM`q zVsHdo-k%9M*S34fG{1$v!6rGVw^dNB>ReV+jC?G9L;)(f_4*rzb--t|Tm`)t6_*DH?Xrk`2LeunDSW*rx?b_6svg~<_E5Yr@cF(ySy$!v%&wn47r`}im+zWg=M8Bt?`9v+UFS-5{AA2xp zYV3OT8}3440EKi@|8|98ldLaf7b7s9iBfM%-~Ek)uK4k#?#hU0ve0Enm9cB`(>FOq zwoF%&5gG)}BZW$vn;cAkoFt5R*mXCm*H_{9j@d;ss`sx#ioDZOm;JfI^WWI5r5iid z7|&Ei_^bSrocOAiF8!j~dPOv^b0;^I_Hu=Ex;mgyb-M!D7rchV@z)(*UdXU6x?@+d zF8qP@X%pEt&j=5+x9z1))U|W?G8VAqi8~i~-i1Fq9<3|BxG>g{leVoMtj}~{Z=3kJ5})CoAc`Ocb_$m6`ds@&xFNeC=O*=S z=^wm=J|Q=PLH%Bx{8oyE(+e{W6!Rc0W4Qk|4uOml-DT9toOsB0`W?Enq^qQ?j$;Ww zvVgBI;~URhhKREUIMz!arozA#5K$Ogj=shC3|MW2I}=3-2k|3O}p7SpF8P6@x(U^1jGd5flYtd{G0 z-jVOy?%O0^v0F2v4eRUC1$86qf!&~N4P}aZoN{;~bW!dx`8sO*f3o$aYUHBd@q7^7*qz85v9K%~)0>n0T#ePGX;b%-{0g(S6EuZd4CUGhg7ArGc z&S*M_jT4>SbXp{>QdOEguvkzz5zHw7dJ@*XH&eealPWe1{U5ZG5E;rJIBrG$=LNz< zfic@VR(@b8bfNqz$Ak;*mpE5Ivv!0zx1+*}P}HR& zPlS`#qKd`~ofke;;ZUJdiMkUW5+>i7K`>FWrJ_v~EeWm{UX@5eND`st$|Ja-$E1No zZY7-GaXEo)C8YcRZNX{lbDBFYDN1x+n$IkE{Wb$O@!h;$T=SRxmvP-cy)cVd?*otf zZZMQ~%?22MAby6jJrm#geUZ#CqDHESh}NH2oBA*QsNU?Qi^FL6tx@}Xr2_abv7lyd zSpj!bx1|KbyQ(``>iqn{Sw+`%masIlTUSrMxm$2M>W}y)>I=`Q+~|U)t5~)+uu5XP zxI+ojyQ7=X{c3hIuB78Cri<^7JPoAjD*k5&aPuZmum5oYw55LTSyo7tdtFfPkxLYm z`E>pkfB&5ihaPDzwwF~!ASAs{aBf_J5AD(6L;e7_NW5Ui=mBT(zR(Bmv;?>?DxelC z`ilF3a7IuvVsP!omrZjH>j9=}awMhN3n)=={;2dxPCq|>P$?`qwHRg9kssMgd62GU z1x~04gK}PLU7NE5yY7LsDzc2T@VlCY*nQyL@bKO6Z|Oc+G-n(SEC=q&J)%hu>#sHk z@3EiATJCzj-)7)GP|qOn#Xc$?XjcBcDj&Q6eEqxh&KMPE@Vc;0ojnK-1IX=u+u#Gm z$|nJ7QvS5a`G4o$3Xm1rPIjnb(A@4(%2B*q4WB((YanIjgkb3Z(7wI_`n!9e3c<|JU^pVG(yB-5mj`iaZuPU{F zkSRAa(bU|Tom|BCD{CnZ=^tWjTGLYRp8<5n|C>953Gq&G#R7BT3&kS+jL4D1a@n$mSCnr=J>yJqn4b_{f#kf5t} z^})gGqR%aepWe+k*Y~|W+OY6u!6f1sAxa_!RH7Im5#jv`5%v#}!38;aL`ujDX_Xi9 zDdef#_jhM!N}}k z$x>f^fFh!zb478tlHNdrrcH>6O^Z^#wThs3O*84#t60)vtR>}qrK5`IrSe<6X;s1e zSuTr;6h`d$YI0Sa>1KgcvsH&g!>b~fqCmO6_yr8_nEn@g@Y6EtRt|0F7aGqx1FS&9 zSGtRP?bpJk)PK5<<@$=%e{t0Y%YIch%SG*B5ND*Dy|%wgR?U78l~UjSw&`Hr^G|=& zeAFz>+4oOJgKbQb*l`SpSd${NZyWqZ-}fo5i01aiDTO(lLlPyRjpGY$Mug)_%JgZH zc-?v0DL)nT#KD+9b`U|KTyM@&6i%Ux9}Y2{B9U8m!rJuLEth1^Kj}acVW%U91nPzi z>V_Oj0`2h{{%yt{R2C9!;%j&l0r}oX9>*8f;QbhWU;h8Bv#@!16<{K_t;E^In8~lo z=`FqcFn-^A>TPh}UHc+qa7iCr^VYwp99(7w$74s`xq+>lP14y(-paYBTSI}TrE(FX^QaJ4Ymy>QB1w;^;If-IcwT zJus^V=|LWj;09wa{eLq&iT%kru#z`aG#s;Q54;S~xZq=vlJk$605=RR4{G}x~W(5 z%}#ND;5sa_Edwv-dd>B6s0q}D5RSdm7u^@#;#h>?SU&ktM6yPGX&8BAL@F(fp@AZ5 zhXkmkydtX8hxEJjEVos7TRwR*9gSg?h$x+`%HUd9ghp~{m`V8mm{*sEwdMc(Pi-Np zGWb_iln%s>fh6QKh9KR4LA6ChCowVWj6o4WYEXpw|0|fF$^Us25%J&hO{kd6>JI}O zKVJ>1PeMgS1?V&%1^{!&R>~p*lb9d1#&gI{|5y0`ay;@616m+Bru36g9jaFYY!GV& zaw2^-(99=3ity2(okQ-BcotvmQryL$9yJA8)RqHC=YaM_q5PE2AB^VTCYWXJa}{S{fc6Lrbmq93;GVX# za7{WWfG}QcNNmNfT=t)7@42$}us?%b1bHqF-HIjcb?gaWorLYdrG}DlxcV((U`pWE z_FyQfTUk$HJUk%y6wa!57`S;(2p>X9e3`y&2kd8tpsgE6IvQn*a7G7?1dQl&OK0hl2?eZWv zKg=GFv|loh0&SNq1?Jc~++OQPKXnXMsR$J~7c1&6TignT_FkH4m>Eq$l4Y3L&oDDb zgL@%FgL~8g5wei*A0ZZq6LZoc`6*!{jK@Db+UQX0>_K4YdvO(HhHVsHh`byjc7?11 zI{iBOcJSL!o}SMxE4I6~wR{772AYu!qKzj?B9Yap=5v_n^z9gP`O@*DBgrdAdlD{% z1X-wD0*=gCxNX$)p$zCn+~WC!(5Q0GBB%rdDk(U{<=wc8NXC^5~-Wc04y zd60<|@hr5yCQoh1FFXVV3lD2UbzJ@#L1HO{tlj3uK3vh}94-ZReqoL5h&ABi zlFW6z-?g=RbuoON`&PcWsuQzJ~SCG#l-*d`8J5e!a6m% z@?l7vz&lWsJz{e0CMl2d*157dpJNj6ens*ehYYDrgd333BjksiJPFe9Bd_?6@xBxeV zjwB>`%rVSC;gPyvonPXtLmy(5yiloI7?i{2k-i|PgCAtTe527WC+_Hb;P{w(sx!5| zp!jrohN?+hzCm1ZDP~cD+74XMnRIC&>_K0z-fPpjAPlz~KD{#Z3@b6))2=UZ-Rp2{ zk6r2BJ571sx^Vw}pF>(% zK?g76M-EN4g<(mgx1g50;>AQb>B#45`2NUg+u!UI&g2VCnn%^)9{Ad}{jt=)}UhIx2%e|yXz9JO$W_G)bmY1)d0o^gIH(EvVcev4DJ%g6jXgz-$ zv)hfIr`3jb*q+c|(BuT}a<16x!n}M#k7zTS7{yF6yd`g8m$aU5;e=G#(1S==Z>a~w z4D>N5GZJGj@{$51E{Q9*f6YKoy_ZJz$aO1|^uSiPFx346*h%$hQr9pjAbtV3okZ{W z%_}|BP4nn{g{jGDnws>yzB$QpKQ?^KPz=u>&9U$BfVUT{*Vk#V)m!q{Ee|4TC{kM2 zoS~5Axr6DGn0QKqz`1!-6A|iVftpY}YyFz)5vxvTXe*m@-nxS_hG8A_P9+JXn~}o^ z>0CtiM(ezVE~r4SQx1(s^#b%j=;FbbnS=5A9V!I@L#DOX4I7M{ zs5pKJ{K~mAgg2}K)7BlicEQ?vhIbpf9fmZZp?Dhnxc>FV(KOZHk@|@FVMjXBD!Mo0 zms*2({)hNBoMP3LE`#B2+X$fP`95dzEqJ39vyY9%adv ze!mkoX$q}~Z^mL^zpd}&n5)M?+! z;hbjb-bja319u#o_RTN1&$}XT&54)(gw60SbB|E770ov6s7ve>BxU$;e>9qdhvxXb zu|0M8Std{Ai0wW(urLDP)bA4_ORNrxw(R@ryXAakU$(|BwOTt9-LY3u`Ga?T1NQ3S zoW6K4CJy&sN911%hfyY{<;?^A)# zM#72Y2b5o?!-FCFtwSWrsHV-RC=aAVnZ>d5BSyb6O63a;C-oOfMS665k_f!9Osm9F zo3M;@weoBFqZ+k*1E!DD%CSw$ZT_In$PFkOs%O)}yZ`G7Gn+Kjb!OaiZ@_T|M87yU z@~2}=rW!HX#SC?poh_g2GkL~9Ls1y%g9S>?l2k8HlDb6j2j2muCd-+hYp4Q z%~!ypW2_IO)~9mgf)LMjr~tB4W2Hc@z&Gn5nW{77GvxeC$XBq%R+I)88NEVAAz0*7 zWD*zVQgmqNAf5ZLZiFgN6}_0Hs@}}%vsiyicNunc6>aEIkB?)jV_XF$D7fx#?|Oq# zHK&ZHjNnep*Z8ku-lUE;^Bg1}rXSAw-I$FrtW_qhS2E`~3+;XuI-`vY`dZja`Oaj& z&#A0vpcc5x)lZhezy)tPm3hZ8W{e_^S%(T<|9C_F2-s6Bc2o5GZzNI%nEj7J=GL&- zO(h3jYb0$6Z-INdF^1TGNwL_OXn~u!Q5Qi%_Eik*A^OZJmmdE$;Wfd0LitshN^_Qb zym8t{>-c=;bcW=;^OJWc;7U~HiE4o__OzOH^FHlLvDlM&f$wZV=R^bz_qn-0#k|q) zwCf=9_WImA`YDZiHHPm8;m9tp`UTekHQlb?odAp=Qlu z^(Z-6C6#Lwa$qn5E(;4p!#Atn;9jXgj-IF3+4|9FN)JXolZ?(V-6>9KM&1(giK{u| zLbXcoaO=6kFlRznzs#0LLnHrAtP%$+spv1Er@|uOfd|qW@NJcBi3%{O3>4e@v-+7fN5WZ2P z^8E&&?*0&B9tt}ho&LK9X>>x25swEW9=gMm*Ba}GImbT;nYTWtowLC33Wx~WvHI?d zYzyv<8^GvS+=Xik+f5u|Xe%@+<3X&9+f>B+N@@m#Bp7B;k;9Q0XMalbjq%5GIII(Br5V#4YR?q!oQzAlb2-F4hQ)+(-wZ7uEhc%R<6l$Ro_$l3(4dq@51l zBLg=VDwfyi@HZxvgv1SZ_)5@W8Vaq*imDjH$dwoX?S(8kRqyck;BG^le-J;CMNC(k zusLu`OaPJe4&eh~^2Y4U%+Tj5*R{8DQS>@swW-60=qNWECjs&m6DK;D7cVkip!(3N zmL#`A-%A+2^7p6YtFr{3Ximi90`N37xMtaIc+w}*2mYa9{bAi*)dZ<+mf@<GSEs428^frRy)gh(PWaR51CjeFm1Wl)&7_P`z1(Ev*^jOrV=D@fo~DHs5^}R8ZV$yXv?rn7Uman zPiQZ-91NV~)eIx7&~@T~2=1qlA2xBW%=3^2NIu8CRlqzUbjv)~ zD$)qztdzkjjIvU~H?xSkKrWel{^v(CYf6?%`ddwYt;?3aKsfFQ;D~$UXs9VC{X%y; z9L<3C2|l&t9ofduy|Z#iA%TqjipQ9qhx$G11(5#KD} zd-V5KF+%38rQUGG78Vrr@7TY3TM5ENbAv@p;|OeU-Rro$8#Ugf0xzNf{@3+%0LErdZ87VQ_BNn*g-LM*dLKiDM*5Yw5m`4JD!2~I27BN~^} zNnIEWd8|6hsq;v_?_lD>nKG#$n}@1KQQ4pvri*B)9V=|BZbNsXYu&n&p;03=TGqVg zx8piQD_mi(ssDSnV6;%;i*w5&i?!o!)v z(2T^#`F6Bw6gMO9<9CAA@T?y%xot7U|FRim0^-jYbLN$Hl~ z%9d8o@9EULA`05nrbPoPj^d`pHML{a9)%u-$MPJS+DH|RwKHt3Y&C!^q5Qr2xxXAV z8~Zo2F1_IPO3>A3>H?~Uf1*M&>ipu9VuGvIlbp`QEU?U;s11NkEUF5C|5l_2hI4>N z30|RuZ&4Rmm%3&MFXoxfzbYl3QbPTKUaio7q!r+clKy~dFAaYg)!}d^B{iq=asoA^ zi8qrIBsezn!`iT#VTk1KZju&}sovuxO@iOv2=A2dSgi%d#(h+g5?j?YX=f6c7sjAb zTa>v_M%m@FDTHI=1W*kMHwZ^LWYZ{+llngHehtbAzlWS}QpRzI;A;H+ z@pX{wDC!q6^pG@5C`#XLGvqA2EwQa16Z$@2%{Z0?&h8b^hJA;Omrv*YlLPi(mBD6& zpds7LQ*(_$yxeK7Yu*u!mNL$F9#3%By85jYtr_Wn_#DHtqKs5Fp?FSVg}WEgIU*_Z z3?N+Au$aTnFe+eC+Q=4H)3BI^CwVy(!zlQ<>z+)ko$~tnMf)pZmJ8Y-`-7xedDiky z9pDD}51(||fiMt=ny5KenP%wt3-X{yci5^b|4)6ZZ~H<@E^Kfw9dpQ_Y!{xD~H@Yu&ONTi*tv3_+s>%%1N0g z?gGNa;Q8ec8;{PlLr{`+ys3TGsRy=W#qc~BFa51Hd$a+#+tAH4N+s>o-X}u8mP4!# zYJg^^k4AEE7H86Uy$YJVcbUh>`_IRkasQ_W0yp`Y{~?~WbDcz5{~2v{3A^d_`Vc?9 z5jXWzh^uieN4I^ab{wlj+VNwkl`^8Mg45K(Y-pr0ctQF4&V}h~9HGdabK9D2kk64@ zw)?d7O+`bv00oVbd>C?POGC}Qr6=3YSMQ7Oq7r^J2TreZ=$P7z-6WSKPNGew5{h=2 zdRdS$wOl5+nn^hgcA`8zVylwIo*_m6B}d*EU<8n^cgS);Kg?)eO|Wig;R^Cd6Q0Ur z8}yj)$YE&WZ~SD`F+#3|o&!FjcyewU$F__t(0S&2({I{t8wkicZA@M9G|K58ImL5g zDobU(19N66H(_lxJv4tc9Y~8wxK@k?5H!v-&5*B3Qu;N3@)W|ul%FT#S+rk5?pUv< zxZQ~zOi6+9*X;v3-=Fw5e73!zDScRLRqZ9zfe!yhPD^&KWVa`7Qg%hs$5l)`LY|OW z)9M#47cLu2B0XXhT%af!JRvrMPvIM!yRW$%blHP^E!x?zH?8_v+)Ch-^BN$*>^?V> z*OG@kHG5C(r*>Ipth^}ueZGoGPhnCJvHQVqWO(oWLHK>J$ymmehl_#FEIs-Z8MI*qFmSNdc|Xh6hr~DRcIw>A(Lx~ zO9|z4CBmKYotsRp%Z}xp!D}zh+l<}5lJaaRd4K#YqO5jIxx_9AxkmAua2fX_O`P*V zBc0>At;7)zR-*Gu=Hg>zc~AeY2&lGo=K7HzvEjU_gi{Su4b+A%Y#l!~{#}IFyWaB# zd8YEA@Ne@H?`Bi+Ma+8+^n7z31%ysIn*&0JovBKVzP|^TN(#%aGjDDGaaGEGgI|QJ z$Skh6P5`TyjnG34Ws8xI5>6kgiG&|b?7O&>m|s# z**TdRbR$3MenZTx-lu2AFF$kye`Ff8&HSX8_tNx2e{d>z&xEO^SP_>`w=7jTQE31Z zTU%Z1W!1?3eHXt<^X9sl=$mmjJMk{$($8mP?cQ-Mv#OF*U{urh&-dk^$dj6xuqebEj%VK@Un$GuATi4L-fvfL5CDGk=p6e(09kH9x zJ^~%Y?~#|0yU2~uf31=08AYERJ;`wC&PgAe;(+zuS%{lk+1B!67b>1m?;|81_ZLl% z-1DF;2QivPj@26F@UDM8=r@FqjHo{ApMKVF!f*KFPVJn>ii@YG-=IrczzH%PO9(#6 zr^#=ixdV5Rof!fJHgJ)?q>iPY)ww{w#*wuktf3l}WM=w_jLY8NLGx!0imRY62}4}! zaLjNsYR22pLE_RMw@;vS)557MD`UTkt)Q>uH>`J^D~4yg4!eQv^Q$7TC%z}Xpl#Rf z(678VlK0q+*qz9>kw3o2pD_ea%1_GUt^!U1;O~rAdOKhLK4@_z`7B^Ag&c?8$&#eC zsT}vN@v+TPU)~81@C58`vfN^g#fYAd%PfnF`0ZzZJr@!!@CTish)XNHWc4YJW3h@5pNmJ8>h^Jy15#R}Yyjv{V}-SZO4}T)vht z*6B{c7U3Vq9?-*G%i!?KDPa~Dfv5*=_?k0n^>!E{>Li>uxkgm;uRwAMj&fY_e5r_A zIWywTDHNN?z6W@6y94Q2LK_IMa-o$!{2auEpQ9@czkFAsh%E}4RQkQ^Q0?HYNF++L zoH7HbU|t?hwKAG)Vm^{fdZ81l?`6#CwrBQysUs&U@!$@-_H472+a}oIgA4Vw@A`y+&2~jR9OC@(6=WfH&3Wa4 zD=#0&ZP+Dlcm3^F4@kMOwO_VO+xG?yK>ng{?y#8wogOj_2T7r z>hi&3q`RMF$%FxXL1{CJx)DK~=zB7WO`$|#j}ckwg($KjoMIHNBSi10KB@lQ1m$A| z=?%EWZ!T{xqjvn;jHqjv+6=kDr&5eMI26z)!>=S9jB+35rSXelBCbCW+!qYO#(_uC zg{cx`$HI;L<_^;yXw7TQ8&x~|!+76pSSg5@nMlwrtqx&VJX#!8*OpGa8gm%#cTjR3 zUMy}bN$0pmaBUnR>Jn#=H7{g(+AO1CrQV=MPgtGtnAykA;k%+ZU>MFh60-@=w4&Vv(WWbKPuPo`cd&4(g~aHQTQjWb|zG^fRQ-o*vw1+cVQn?BhbRHqLW0h zS@}UG`D1t@limNiG08nW23vmB*7r2@fXfrqBa;G5)JZbyA#E*q5Gxb_VG4v}@nyY= z1Up9}|Me`Yes?*d{uep^e&&*Ir|?-ne@A2){+1#j+-)`bVa-j^pPcaf8BoI7uaj9_ z$hnzm3*+kAPU=O}h&gq-`|$0+>x3?1Vd6Bnb{2qf3|4~NkIjw!6L6GBvax&`X)jPh z?5J}9fC!tlWji)JHVk^>0Y@b|KoCmroBhLP7RCy3Bb_CUN_2(*`Ob2}pUygo3-xS& z>=-1@ss7EUh!rhPahmE`okty`tKFNiOrW7nG@fa;)=YUXh{=ZvbuQl6AX zD2)MOd607{+EQMArEpR$l4?J<3hcQ8Id2PQJrbtb|0@5p++_!=<4@MaCvQicLCt;% zKlZW@yx@7o{ zp3V^t#EBBFOPdC$NdhgBX6vN@TlczAk#jxtu7?YwHI{ zMl&BRD=SC6V4q^FU|cJ}--F+STf$E4S#e_~4+&aAPW+UrssdfZw*=2Hgvf%*Fl1{A z2DHY@wb<6#<|%1lxh~*ZYc>Y5*TqaXW;bOQJ=T)y7By5>)lzWVcT6`UX+WP4oWq~j zEQZ@eOw*yZ-*8?Jx68N^weYQCodnPEBzXu|e8n+PEfAdcoc3f}k>s(!1Iy+~Vt|~{5faXu5fDld z%+V7ZK$EA)F5C^b3Ag#FY_t)( z8EZl3cI)~lx?_7&em@PHl8I5yn4xlKt1)UfAO#`vgUm^VRac-Zp@cJ5*>e^lC&AV< zhAWx9E5f6mkZ)-@bi`ko;2&l3-g^KUCPk8b(0dlivI-I(YY5i zPa_u7|In@=WX(6kBnmXuxQv%?smd3n7iJ~`njp!i0)|-Ni|%O<`uoMG1uVy(+^eV( z>I0PJ%GWa5f3j1tBRzLd(rQhVQxqk`z*BA}*wnY}=`jo<;fOUycGs9K1!kI?|00{R zw*FNdwz_ByH*Id49l_Prw3-e#)oiLw!y-MUG#cn)$}<@#gg?*x#3xt%5%d8;eo1*r zSw=*+Y**^DXa7@HR#V-Q#2Q04K28Q3I0nd>1WE#O27zOxHRSyz`D$raNo{K3R!OaD zd4VR4^ubN^Qwz;F$Z5B6xc?0}$icTLcxv89qT%j}hPrgdx{0aS1(T8Plab6vLtaOs zY3_<(jB4>m#l7*UM-t&&^?qDRkmADMw2NN&-D<$nqjZ#46oSwvS8;E7~I&V zc`aFDwI&P5cjk^%CJWRyw<{QLd2@J1PAul6Y9e>Mv@zs* zQR;YuTUz`r**MBc;&H>A!)_3mOfI5PIPR~j`?yCVy7NmUk3SQzvyHZxwpOJCgK6T8@CLlJ6l%_#;esV-Gk3kQ8km#G?WVW zgj|;0fmYp9T_4+O4V4$7ZRvMu-09m-&vvhc*bsmtBKsqEJtA4=Mhckm81u3Zm`zq`bx%Z=siosK**JS+ifD5U{4?fu`h{dUSNK)bYt?g4nWe_BoYr;u5#1EnU0818{#D=x>7h0p_8|9) zS(=M};t0oZ3+hnI$aeAj^VKtvgvcoBZ;rnh+Zfv-j$Te)tn*!Xp)%2S;dUhSpIYzi z5rcZMJ#ur;OV4WKk#@q=FH|Nxj$OFE=xr{8u0NKwZ(HK5%)BUv_Ikp;0^s&K_Bu+L zJ|~_T}Vp97;v{ z5Z3T#Zl;4@?Wg^<&F1vusgiTnO42e zZQN$F*=*dgxp_b8@CHJm9(D*^WHRkni^0u^n#v8ym0?yEGaJW|!HuNeXEH&P!A+q~ zFdknt8ecRUXVV{NGahF%9A`5fXEPXQGZ|+y8fP;bZ!{inEEi3?UYO|f#1u}mbyd32 z6;10Y-W6nhqTyE7CZ>6acg00#7C=8bhD^L^!e9>F-u)zw(K)c~kJ_MF#Z%KLE^H2K zTV|9jRLb2)3RVLq1992Sx|)%5$wO>U%o8k9%@d3`&6T|izQ5L{%^~p3^_zbuEbjur zKbeXbkeb_9&!yc(+k(&|m^^U!dLg}|eN>DA)_|yaD4bfwXy##!WNaF%lEJkyu&OJ( zh~E_%znODpULoW#HV{RV+|3UAt%#|z4IaX+ZNyst@ zaldkIAxda(aZzk>ExB!S>|GKx*d&j13_*5k1I&H&ePDDyk?#fkcI>9M|5^~SeFews zY+J%G*jlqg9sg=d4{(yN(+Y6X)-3Utm#$K>>|?VYW3x8eG~AT3sRDu5(HuHwvQ|QZ zz;8!*1~1(OTET0UGbcQIv)qwe;jbH94!xJ|-J9wuYeXLRFw?fCf19D`^uASM-<`?m zZJAw;fLN~6Ady2@^1NAcSo*vx_1fhy>%+xoJq_Akxb0j zxmt_q{jGr`qGn6mMpHwCNM|Yo9`j*0BVMLdOPSygdiz9N@kj@2MM_RMZE7c;Gu`Io zM|@wQ=p5V}gGFy!uNbXvw(i2Pmw*>Ew--!M2^yZAkvt20y%>+@&&3pMK26<+NugK< zM&o<1%0LA6)% z(!hy7dK{#4WLe8FsapOKbMgI<8?qFsuChR|$vh+&e#52yQs`MmB%H5*Wb5omPnk(@v44*%RI z`8_1p-0!(kUN@=X-|ac+MzHp>Fs$`3=V8S6dV;$z`Y<;!OglS7+(;CJ)jg_(K<`!7 zgrRh*Xa&h;S68#K$Zbp*(wtSVwAfFKtkJ&Zj%v=`jCQdCdcYgkCkj(^+>XrxbBDS7R4 zxHpT`2eGL0JxgL&NAzE+sf@5JYrSVABqiZ?CqV}owXgc`pGmHeY+L8yH@foKt{kEv zKsxy&j(ImFwT-lgE0qW}emea-AFl}srMr42&8r_<@T5k@Z>~Rx#Vi7a(o<&Jehypi zQ#YDr)gi6)7dt;(|9v$)dm z(iW!hOT*jxy`s`HVl(7t%Q}U(hrRx72-#@H(wgv9XT5kj31nG(j=iI2LE2oQI5SmD zvSkf)0JAh>&kdWJwHDl$aF=iw2e?PhWRA5rD{cRLzm9y^^^RweD1bY&l5M6~5bZ3` zv)#1al*4ZDStfqn1Z#(Q-JCJ^1BL-%5S!JsV&_oKHLbuAO?2#V&OHV$Qwa+uE)6?U z#cGFLA+5aB8>%_dox7N4fl%`rOTcOa9XsB05A;k)Ida)Q#XJiZ^-dX1__7YBJPl>@ zY5S9TWz(C}fb^q{^9^a%Oi+qbd-l_voxbeNu!v%Qn5|?gY=W)qqC8vvI+`rH>?sl` zK_!8cbS~THFDH+~ug$EJzb8UY=v`>uC)py~B6{sPGv6nr0m-eIL=u3s*1uR3!(_v= z`GZa2V-E_3mWDJR7eTJu@)TdfGcvyp6yuC2X(c-oiu}IZDQVMkCQHogNgQu{@YW#* zdD`KwPs%$}DsDe*h_1Jfh#j`8Xn6aBCiir7>lvFENmq%G&Lxb_sqXm1%f{_UZN)xT zk}>Rj?ziSCu7FavWtEZ(*7mqyfidxs9R+R!y;;BDv*G-p1_&43yPCF?bIu5Q76i%v|UV_EXsV} zY!q_4i~7}`#+&iiW&PfWo|b!Gg`ZkxM;<$Ti~kxe7TY835#SwDjb1Ug`_RXRu~dTl zNU|Hfh$ZR>;M0C4tQ3h=wjtas@TM2J+R~{cYYpB>9XW}XzxZ+G{M3vH9ZyPrJ!B9N zYtzX^(`ajWt#R-ltFie@m1Nth${fM!rlWf)n@a3T>=U+g@^f;HR-x*w z>5>E~&FZE=0Bs)Y3*Ee?7giZ~jwpA=%unY(fRQ4n%!RClvV?`zQ?cXv&hufGYMvZv z?wPiwG=w+x(I$9}WFQFQ=G(6t&YH!P8CsyzBiEb$z0akW>>=GggK67Rj>#%HT@O_4 zB;Kf_B}$GC-lSPd2-z>PU%fwl$Zz2g4;!RY$Upx+!y`(2rErQ2Y*uVkn2Y3w@^+7A zB58bo#lF#W+jDjdUmnBcmo2_sZKSHPKE5A!`JS^v6R?pXJoG5!iAo3MpfxpmmsATr zWb0L=_c>1b_}bI3#8e|N{O%HpQ*~Y9T6b4j+US4^L zz75mNY9I@3@*Vj?c&(h$l;@mk&twl?sGLz$m*xKAMLGYc#WTgqJ|8t6f3%8cQ9Zq?hws>2XF?!Dd==b?sDlN>V#zdtva+J2TW=TJCH~?nz7T6*PLt!F8-A{>`zqlEYh;YoVu1 zvMTW;74O=zPM16dp&1eYkEEVVV?3OS))gysHTe;2&YBiwW^Vavrbg^X62w|veaX~s zdtx?kAB3m)-|8HuTS4E#j1`wB#$8>%Lu4?o>8S!u$r^KZj!`ylw|P`ooRik(y_M?R&S?PJoY>p2=F;k;v%&~m z5d~*&h+DWYW1J%mt0#v1)MY{fnJ^Wa8y;adRiRYd9#qEgd11g|-X$EuZeX18c_GoZ zheWOO-d$DbtFSg!vYQ`eLJ z7*>CXa;S~0*bPK6naB&1xA_GsQv`fo3Y-8ef?{CYR)s=&oeP45vM*3_u-mZg} zzu5(9#M#DdGln76l;Y`tPLFDo_t-vv6h;+>5^d=KEi5H1-Os%Fwe`~1l9v{qR8um_ zMZBsUMWd-omf82f?25KmeLIrx->l`sE4n#aVaQXD$` zppZ(0S{yq05SKAyW!5Ro*glI4NCG&)F$mo$=g?+LuM? zPbIPz=Ue40+j@{JR1ayF6H@IuJ@?eLsqhaYN8oXVB?v}h(+kAi~wnk)sF0v*{2*Gk;J7QMV zX|J#H@_6~hXN!ODvovJ-+tbtEJ-eY~7HiLgNWdbYnxq9m5bg ziZ>K&sa=yi$Mf$L+$(uivuW|ilOt3b(#wl0G%c&Oa}Q{; zeOO9FmuFd~tW`XgQToK8FUge&pWy;CJ&48`$`j5SxeKPtTmErz_Io=^dKdC5=8P%V zy2x&-w6JS6m@f?iyS@JXKLDFRWWO(6yTMw9maXL(^`I4MC0dzr)o@<9TBTN_F`Aet zH|l|~rfVK8kf@WFYDNnyUf9-P!}7~D(+?J7+F`9*JDRAOAE*=UxHg~-CTfPTQIg%3&oS zt}5NiQRTQYAaU}XGN=qorZOgQ50l{n~%M3XimU)0e zxkN6LE9DxQkwvmjmpx=_EM;}70E%N}e`4ui!Qd^!we-SScSxI6%Mk3tDeY!AZL z!ytLYI<`H!XIq!Y2s|L!?`iFnknijyYKMD_Ql$ZDP#OjdN~6+%P$P{=6M)gZvwDrZ zBNoA##GnMf3pNRk-h)huH?4jp?JLMa-m&@`dAn)~d5h}%@Vg->{Cn!DC>#Ig1lL1A z0d%QCs+Me$AgPjD@=GBpDs@U-Qm@np*ws%+r}jP%NoS;U(s@#TQJN&>)6$GIE8Ujn zr2Ep6^hkQLf1k;Auphb8aM#G^mjV0W`?7bem+j;I@IJfew|kDe=efHN0DBES7kqc| z`I67O+w7iq!Zz_N06G8NbKgC0j91V5RbcPK=LNRV4}YuMzd;Mlj1%Z$pLjw%B@T&a z#B;)gc%D}|8I~8h6me3V7H2@8p<1BwdM2>N=f5L%(X72*nzUF-2@{AXJ# z643|XSjT^Aqzc`_QQOLM|)|gyKVs!a<=zs1|ItK0y#v+cUu}`0Y1^kPsC*g|37MLa)##oUkD% zJtYhYXM}V77O{wJnuYU1>Y;HVoj)sFgk=H9q)?2l*Dlm9z*=m*OPB`hhV2=!69t~( z)8HtxX#d?W?|BRPb{~O$D4jpgU*sqGX?}*|_*wooKgV%wH-DdBI^^ab@mc&6{u#ga z_Jc#vmhqeXwm=IO!OA^0gkKXfglr*CC=^PBGNDqa5g4$CEn=I~g2>O*_Hdk_3!d5@ zwwvvKdl_tlbs+#EV(YbOLKv(Ywqs!30vr<#3sye;-?ENs`~*60<@G}gyyxu=KEO@$ zVLoQd;ieg#Kg@R@I?NwsH~8a+cKCsstK1VVm7C!Q`C)#PALGS@4Db{DC4P#(%3nXU z!r$cY@biZjutjVWtS;~Y{sA}rb``hAKVWo5uelDk!8*STvVg7Etnw>h-LSn1)<=O? z`E`EGmj52t(K2NvbyNdDZ&mz6(7DUp748~D!(Q$dWG-L87lSueaHV`TZv%T^5!+;XYbLo{yui45 zm3Q+3*uBU5xkYT92dSCF)@y8hl-n?D^PO+gV80Wj3ywm5)$=EMYtnljx8J-)kb3Bu zZO}Hzm2*{GEiAd}8m|1UQrk_m|8}IG?A<%>JW7xtm0e+1K_(c#J;<&y0&8I>*!8zk z{&kL0^@1Rk9kwml7T8gCjOl{q1bYeeDfa45eUulJ5m`O>?ax)i|0>XJSJ=!$m)M*) zU)VC4GB%$rsy+tG(keGwZrft3n6j!~TaWD+Tg!6HLUk=GS6^UVY?0k>2(0niJJ}!` zVax4ZY{#1`%we#1gw0`l-h9CxWBb{YwrlKZcEpwm%TkcDV0)Y`XD_f9n8Vn5?W`?_ zy}V~TA#bj;SJ-m&FL~q9d)^1G+(tSR{r4@%c#G0|Qac}B&%wV6n3{A7eG77BHvr;~ zJp_mXIssjKOUm90ONin26M$2IA;1~HIRNB8_KSc?z%*b6FblX1m;>AgECC)N+Wv$9 z*`E>6_BFsJ=-beP!aXSqzzWC!WCQX5g@6)3nQ;zGC#x*iwKn!r$C{HYDrkj9bjslK@KCoA2U=T2D=x|(`8BLUrB`}e|CBPKmD&RWc zW@6hrWP8NS0~U;WVjk?(xv}js*$y!)fK|Xc;6>sZc8ue(CgWP!6hImv(^${uB-ZCA z08c7g3V^%MRsm}Fu7zAP%K>D73*ZGno6JT49e^W%p2YIlOUvEu`vE5t%Z|i;Ctq5B z8ZZJl3xM7-djW8He~IrIdj)XqT`y_&#{Rt`*9MV_y=&a>pJ-X~&L#G5_MUMZ;!BBg za@?1(2za=^B>q;C@HZXK=`oZ)1w2pqAGWixzht-YOHWL%+fLd}+eU0>ZR3P55WWn2 z#dZz&hV2&cUE4k2McYH*$F`@yc(3QS4cnF-+0FJ;d%8W#o@+0#7uydKuGrJ7?KWV% zR=2>hnZ&_7;1Wy~i%H5821;Q}#K#0gSC}86_Oz%)5+BAeT?)vbbEXfGZ|^ za8JkO6*7x z0o-HRW8A9<@JIRM&?~`x3CyE}#}fUN0n$?$;IBhZWdQe8fEV}&z<94^eueZqUhq4D zNk|dWgiIkv$OkSGN(p1VT&M!ZYik8gkeL<1C3t~@f-FQ5+#wthdW2&_KjD+Wr-^== z=p({e;Bnys@MYl&@HOEEFy8AH@LkaF35&u*;j!=(7>}Gh6t)})7|Ii4Cs~eMM*(oL zTvL0k(=6j%{2bW*g;+wRvKpSR$5*mCRL#U#!_>kr72; zC>Qkv8)I-W@i5`;J^iS79C$z+1RfSgfycxN;7j5Z@Ky1;cvHM1&fAf=AU+V6#T9W? zTqo9EBuaK9lejFUV8$gEvgDGyz*rAT5vfBuBK1he zq<-n7@D#IjdQTsb&H|517l1EISAefcH-K+RcY*Ioi@WgewW-nZG=; zc6a_ySHgs`j%NZBvw(O8knrd$I-U<4ATxpkWKM8^%mxmSz3`mi12P}Ds;nFHf2v7M z0mgHC>uQdg4_u^{D&1;1;VQLOscr!J}wfgh_+fuE}zasg&_OGBDjOV!e~EY+*!Y6ZaQS~1}) zbv#jS@IkFYtJZ9qpmb}h<_7i?4rx(fDAzi*F0I!#lF<9K6WS?lNIOIL+#a9TE&@+# z)4(&@Ebwh@4*0&d1pG*QqCL~rv`uYWv+1;MQHyk|ZUH?*x9Hh=p6b;L^%CGR!nkBy zuheUFMi+Jc6~^TrJ)noRQ+iB4tas~2_2c@0KBx}^V|^5O4D<>8622~ditttax}obg z^*g}x`U3C+eHnN~Uj<&*UjXkoO~A0P*6B=fra3d6InI1%k+alU{t8z)Yk@hZ>~uN3 z&Y&~m?2yl6b{^T|9_KOOe&gj@yZ2F^w2 zL+4|Auk)$%Iq-&a%ZT7EgyW#g>`HZ|vmLH1S1#LuBay4XRqQ&5qmrwF>%y_dRqe95 zpht|u>jdyA*O1|3t~2=ly3V=IyYSuYb4@}G zLp`{rT{Es(TqmyEO1E*Pt~u9z;3d~1X9Pza*Av$>*BU>7TYlH3YrBpXdg?57R^W`f zY@>ay%fqc)U142GT^Y25p1MjgjN6&InmVRV#BENUF7@MauhU-S_#_A^OF4axdU9G!bchg=_cc*UN?yp;@dr-Fw+pNGgt99#j zFY0#OCU=TE&7JAaap${>+@-P)SGuBdwptsI^itf+`5(eg8Jh6gY^~l)%CV|p3_0jsy`mXxk`o8)T z^{48GjGUIy>(4Nvkw+O>O8vR|^N_oeys`db{Uq>o{S32$eWQMs9k0J#KU+Unf4_dI z{!#st`e*fP^_%tE9vW}SSUnad+hg@)c(RSm&9>;tvyFHPJtg3KBuDj>c`EI_%%G=+ z5j~6t0v~(Xqq7|zk0$^e_Qcqyp2MDQ$iXH(N4YM~anAtopl8^f!{m8J!Ty+M!gI+p z<+)m4fNQ{W-E-4($M9{(Gtazd0qi{REOUZq#k1;J_q_1zG?m~LQ4&tLULMbzXTkJh(WUAf@Z?)IP<$49L z3Jm2?;`aKDtkqS$&)yJz>b+6Iod$cmyuFZ%LPqNC^PcdYf?O2xQty!WjQ1R0=soYf z2<-Mwf=`CM(~w)jeeup1?49-A_Rc9W?|q0+&@wCC-X-rNh?_C*6VRV|*Swo*k#`$M z1Rw3Q_^iGRE!CIp%kvfbN_=I$N?(nS@rgd&=kW!Mwms2$AN7TOG2dZdw?5$X`i}aJ z8!e%4z&Gd{_Ko_+d=tJ)>NVe#@2c;*@1~Ihs6pQyh)pEQ_~!MiyD{3g;CtX(_O19< zed|6FZGAg_lU#sXEq{tO<4^Oa_%oGZe~$XtpReBY7x_zl6GmLtRDZeB?XU9J;#|Vd z$wPkG@4_+N?^Oo;K|jP|e}^BUuy)_ygX5R~n0CtF?>`CUr~M-km3I7RcO!*=+<(D; z*?+}<4bBa62~W2FhW}Q50lqr_UH?7*qS4YS^Ztka$6)8F|G6`QW3GS0zoiL{2*;^L zb7N{_dSg~&Zeu}XapS?pipFYOZ;du=r%`BB8{Lim#!zFFOT}$?V`n1U0d`F%csHx@ zGNA8@kqsxv>w&g2ox# z>Nd_a&NkjQ@`}begYi|k&NSX{T!MH(bj+~jqsAv%b>p+fHOMF+uV~zC+z!x27U7!S z=f*xw4Onzbz#4$)AIJ{m84=%g26Lb=P!cFJqI{q-P!qtNTrmJ~AD2UkClD~Ad?5T1 z$8ekq9LC((NA&K%(ZF$t-4MY81A)Q7FhpvIO63V@VrqofrY>WeIT$LSaB8wRs-wKNZ>_aM;>Z2HKjDAHDxyCH03uHHI+7%H&r#& z0&`8WW4p=KCxQ-kTjtYEI_2^NUqU~%wZup(F; zv;~Er8gvK!iQYkNFeJ&rXs|Qb73`Hdf_=dg!BfGZ;F;h#AtDzC&+q2p!HdDk;B;^% zI1Bpi;9T&2a4GmG_$2r&xE9pwA2a-_T;{Vd!x}e@gf{&X+TAl%~wQR~2E!$x_IF0X1*h1LqNDXI% z7Q@-$JV_20hD&4=E(=%672%pN!w-bTupai{zJn2uaN7l~Vs|(Y4u@mm!{P4mQAH0Q z_hg3$!h<+=goneU;W5mhPlPXpr@~jm*MV>D@tyE|cp>~Cyc}K$ufksIMxQRc9)1zt zX*IQ`w5GLYw&t|vw-&XQww8wnaQlf_>ujxRt@Rb+*0Pl&EUOnz7=rmmDX#mH(GBQ{jAozt@m0NTOYPQ z4py{2ZGGOl(Yh5u5pyIpk`9~|$&D1?HZxL8_+X?WQXR1Y3(9gtjktsNBmPJzc;ARA zk!Ykd(iQ2A^hHiYPDO?yXCmh!=OY&*lac9$BHW7lVv(81tdJSG9hr;Vk1RzVMV>^S zMb;vl&YZ}$v#5=3vp6%`tZf-mz7e4?w`Du~{THy_me(j4nOj3nTVY#CTUlFWTaA*@ z#=<-b) zN83BwyV`r(``kJ0C)!W754E3ZKL>oi{bKuM`*izE`)vE|_Br3M*4KVtJJr6_{s{O< z`?L17_RaR~7#*|3tiTx&cPu-W7c2D6#Y$pj%u1{>Ruf}lVoZ;DVu4s#=!wN*hhyEb zqp{<$f!JVdI5rv^(`y=vViU1Tt@*L3*wxr|*zzW9c_%g>TQH6idk|ZWt;AMi>-7b( z7qOkVNzvQpl|Vcto)*v44#sn|>Ue&_F?LLE_j zuU+`}SU*IqQU3=a>buk?qNy$Fd&oq6pQezRK0uq%YxMi*RP;VNjeZ@ypMHbRL?58D z=?|mV>3sU*=!0|-{VDVY{d@H9p${bqNe+~CfIGmW>;qi~K96z^eEq=JQLYsPebky~ zRZyN)w+4`EZMObL)M)*Z_0Leh^^SEF{h{@_brUVZ**K92|3?S1q7R_g(Cf&J-iJIW zj5HKMzl8$mKcUm;1R6r$L<8t=(YMiE^mkMW`X5v(^?vky>I2jp6h&oIMU;ioDL3^Y z%1^aZIaHh)po*!J)CeU|qtx$F_0%6x-=G?kzLxYgs>jrA`U3UKrsJl5>Q_uBO}|AQ zGkwYQCF;28%ckF@e#7+prms*ZOp~T5>WikUraz$uO@C(kGiu267pA|YzGV9Erf*Oq zru(LEQeQSbGA&cTYg#e=19i^y9n*KHanpBA|3v+P>HnF&PfZ;7;DHZP|MkE(5A0AE z%?Hc{)Pnhw=1)=^=1-YRsqdOUYp$ZcXRbEaQr|b*%?xcab7qA$n>BNQwwhba5jw{l zHFwc@<}aB0=$|ux(R`Zzta-?Mj(*Gh`{oO@X#T4CtF+5JY5r@v&OC3Pr+>+O&%8u; znID=T(!Xk6HZRl1%ztlwM*o_5)x1XkhWQ`O8}xvA)4W5UOh(D?r@xe(p8N*=`^g_l z{y6=W)Jo#QyVe;Q5FC~>HKS*9ls!aZefzo+)RK@pkz-k)WVgIwb;)F-s~xM8{g)r9LL|!C^=&5HNKfwXyTL3^MrYb*Rnc_scI4)FIi;_u%@6!`lV zc>VXO@1rE}@ib(z{-X64(E;mQ*0;!SkA8%npeGQbze;}dS>i&4@Lp+|U74MIm;!#u0kH<@mk z!qf7s)cc`jd=py6G*k^O zV>YUxa;cA@x2ca)A4hg*DT@#TZKaM_Xe<4QqnfA?;;AqdMiLdJqDY3;GmaFhlj=k& z^*QQuNTa?$^&p-4HL4%Epk*CEUT9mth$){t`v0zoPyM#i+lg=1`pY?|*+HbCyy|DLP{LjHL|ylBL{I zjy`XxvQ(jNOO2%leZg|batQTUWQ&4+*%Gp}pkJ}HT3XSs!o7VCSC4(a0`4AC&I2w2 zCIQocnFMANxDA*?jmkx3Qkhm}lv(ArGN;^EmXt@z6X0jcnz9M9tQMt~SdFQN)o%5udR!e)2i0MaQFTn6P%o)d>Q(i+dQ-il&Z`UR zgN#jeSzS?A)phj+=sTK8OVQFYHnmJGN6XiWv{J1cq)MyRI86p|X>dm8cf*NL}(ka&9r@%A@}w`US>&m!KQ zO}zcX#M^U-x91XX|0wbHJmT&7#M?hkyuE;U`zMIE7ZPv(B=Po7p*fIZ;`2XCe7=PE zd@1qyPZOU%NPPY?#OHsW`1~(`&(o-aGEpYo-L#M`T=k5IYb|9Mm%dW(2_4e|E3 ziMQ8+w>P0fR5R6#Y{bv)#LpSx=PdDa4*dMrkU;%9_3Ow%eG&XzBz`UtKUb*VrhXfq z3+(M0c>5p0bMeR2A0sF6b{Fw>H#G--?xE(XdDLL3x6~uA#c%N=pCxXIaUV-;|QKz%Su8DrNxWLP~m~EDMkeC;$}WGSlmc5wjl+yv_a$VwqC;3UHb69YThT zx9xwE@J9o0vp>VQ5BwR=F^&TEzi;@Xfw#H7_f5okGVi!Q_qa+%IYGzsk9hVG&pqN% zEIjYH2mE<|2J%P4n4j!WkElHacvce6Nsc4{I=(jb>^{h6W1jK?Sz;Km^~VEmw;zGu z2^^1i?Y;~$+qsXuSpDNbVtFEx|1jX_z8m3pqx`$UZYJ<5aAxpwcJOYnn?3Ai5W88# zZYF`3`x%6hIlPM9{NcwT1F{LQ0a-;M0b~^UZ#LFwr0A%kK&+`YN6;{sCYcJNi|pDKU5a zZm>JQyo+*Y)gJKrS^%D9#`@k&v-1c!)+@mC&18w^S$-Uh8R_wN?2qTAUk09`eieA% z8^(+^nXAUj$Ihoe?0o(?IG^KrZ5MhK#_YB$>j#kQDtHHqT?hBt_Ltk@L|eJr#jF5zkYsnaylR>9CBpJ^6?$>jFKJruF$M>J#f4|?nAD?B-dau3q+H0@9_S*Zs z+b5@OItJ;R_}OvfjE|hvuV?xsWg~q-PwUJ#^s{tQEXf&Q;)xmiIrrHZkn@lozZzFL z!+KWMydi(j8EJk`=ZsDJbI!P)<5iE|=e%scx#ujsk%=y-SW!D^_@9e~O(`Na*zPP93bMI{Xyx+EvZJc{a&$DOuQCg;c z={Y4e7tmHSH}WlCx)t?4lI~~tP2bV?nwK@_scj{_Pl(pL)Xd@8sh{TScIW*1*m;@x zN^`O1;PhPleClWXM{WDr99v|6xj~P*`Z33*=hf6Nw|tMG`laVce(AaKthw_4m?P8k zWa^h&#ms}63pMwd`7pP7kAwPAMsr=U{hf(f=BLvQ{F^%Az!)>}Be%oZXB=_|LN{Yr z^M}T*#*xO8J@=41TCpg1Ozw*s7wK_uCN3J^k{rdk+zBQ&rN@X)fta|Z^640O?$|na zT&3g7nXzSLC~oFX&Yht?*J+D%dFn9Z&7Qu6Fm9<0@uMmd>$aeROIl_JCqhnlCmaHe7zK z(?j@EI*u70jVt+|{?)mn*reDr@YJay{mve5I$xw15nCVIB>#+Ui+%R9JjVQD`(j1< z;g9CWI5crfXOQ^!M65JD1~n$)fjy0s9)B8F`XZ(A7H?orCh0U1=gd++wO4B_*-34V zcaP`96Y<-TZQ@7#?l>_x&94(mI-jJ|NjjILSQMWTpBAc4LVE@&5hW@ns>)u8nu*#vVWf3}P7}#aI%6b$oifttqD0@s0Of#+ z|LNFlPZK3Z@-y-@&&aoRu1I#br-~9Yoif~ zfIUZ)IF&v@^waqv*;i+Xh-0SxI!naAJ(Dr{L$Xi$6p>C4C7(_Ek8*J3A7`H>f}V=G z=~G3LuW^!GpWKu_Lv+Sh)9ZlL93`CdMLK76B3UY$ZoaDgY~^u|^Z&oU{6F;lMgMbs z{vwO_jG)WidERzlM<^@7UjtT!@;rDZ_;WxX$_Lh% z?ca@Bk3KG?1_O5hZ-VE2;P2q>O-jjJ{r0QUoX zA>m_qI)U#4p9-u9&$EI-b0~iXR)LPqy#qj6;eG=CJrJ*O_ku^@k6;b_#62H6ec^S1 z@;Bg9@H~peu##tc!~?hf*%;2((S#oN{tCPT3V!R+JMMpgx#&-g{wClQ;DtbIy@Ql` z1&H_h*w)_+#3y|;(T2Oh@qC|_`UOCA@b3}~`+?(c0e`_S2%tO-o)0V!90~n%@Rx!7 z#XJ8CU@`hIPW@j2{|-sZwauNR+j*aX?qD!_;0~KNWo&PQ35Q+CMCdP zEK>q3#y%y$VysjGEXGzPz+x;`0xZUECBR~=R{|`?h9$saELj39#-1g>Vys#M6uwEg zv1BneECCi{!xEsuJ))(@Uv+o!OcxJyi346E@CHDljcdnm49IA5uLQ3F{sWNs>COSt z@BAfP*RydFIi8JdZGcCBcBUvwX|fefwxWrFUelC5qR_K0nwyxZx3zf7SaW|&=^%RC zu`xlVPC&;$UH)>m+XDJda6H`oGUe?m%U`Q<@i&(k=*|Y(xvV*5nS)&Pcjp7|0pjOw zV|W+~uDv;wIN&k9JffCs$E=+%tmi)j{1ggf&0PVW4P@+lAEGC7rMmF z#~7yv{R^NlpL&ckANvQ;{UngtGr(d#vDagCdmjP$tKu&8dgI~YZ{T^1IR9#J<~7eo zNaBo7toAH^8?Xr!Ea^P~r5pHl;MT87fNP-OZ~iSn;;m1=`$VU};ePqrmZb4}b9KYv0CFEa}^M*_NdTy;b1MgFbVL&m8S<g+MEx9MYquW)yEl@>X&%1AQW)8F5>& zt%ZhfR_hv3f@$>^R$`O-O}bkX9HV= zLwMVdx37RdZ(y_VE%46ZRzg$oo|L`Xz^kCQ2p&L=$~L2xJ|=fFFuYBiW%|pVPB`2^ zzw9F@^9`(KeA&S85d#-dYPvKz4Su>1twuv11l}CHId}`|ZJvU|fxm;t$UObeppk+5 z>|5dH?QQ6Z1*pA8SV1tlPCA#p4*d$?7z3NY(^Q%pI1`btMqzB?69xNmJbQ~EvTz8_y$3*F_dS*!OC(4y=N>1 zO}YRpqRBJSo8pO*!X?=oDhWY<TdwbgTM(v*p{tIy`(foU`FMC>UNw zjqd_ytCXQE1)mE(7kmu(81Q27V(^Xzze3QPUAo@rwkk9{(fjDRin=ZU--lORkfNA~ zqfaYQ(5C`ATp*p@ozm9NQ#5dQV%444X9n;tdh@Sv9>v?M2)ak%JPHr|?osgs z)u_FIzIYp(7obT2^*#n%O6>)V>!nzrfN{MP3luP}mtuhey!SCIP@oYVJO(U;@(DJ- ziMswmT{o#N|0UrW7l+`Mq8c+UiPYDV(0NjdJ{Nkzl&yxG>d2{PTZiYI0Tu3hO$J^%dr%6ScO(i zz@Gwd6Ydme_>93DBEK2Z_986{&QoyS4CQ88_YFLq1l?~e1}VZ;{9^POpt5FOsH^xR2|*)TF@RRI zRbK>k1--VEZA;mml9f-)$+>cn>=K2><)={K(rM8MB47 zZa4Ho=!KNpO{qenUv)vZ5Ucul&Mf-Q$0l8{Lvvtb+fubFXiOWsplu$#kV!A(Q7TWI zK}X<|l**&jla$JnH4MHTyb!#QQgz|43w{asCGcb!{EvcOUg>eo- zxF2f_4C)KF`Y8IjU(&x*X!|kXmsn{Intv_m&OyQ)I9sDRzU_{Kb3Szqq^|jJ&WE!d zJoDk17nr%&e_YU=N9~=k-WBvkCriOHjga37%QV6=ov=(JEYk_gG{Q2S7z@Q%rW516 zSkU0IQXDG{rS>xFs!5NR5#<^Rk2+$LhL)mwqmH505e-$B!JjgCq4hD^wHp3;P*z(C zEqxfckoK--DLr0oyi)i@F~`pZf)WF@oYK1|-V#!&uBo>_OqGd^s)sVdeCbvXz))Bex>wr=r!kXaHk{#_o3mV)U{7-alQwiNxk1& zN{YWSZ&l;XXi`KgM$n2PZ1t%){kO&GewxzR@VL(iI)^jbnX-=o7pourX6mo7n7*@F zWhVGaL1QIZ^$TijlY&#BOx0UFWS#mY=mvilWw%)zX<10vD;Qjige;Xc_*C%0;Df>M z0KWtLLvZF_^;d8?y_OZsQw}U>%H#=xZroC$iwu7?LH7wH z{0^Q>`e-+B3bpJ;pL*8*Sa`R!Fym!6_Q_-(A#4vsyF;VyA1{(46d=cg0dqiyMnfCfwCOR7Dn6_G+&N=wxH*7%1#XW z7-`1^-HEAnjX0~pIXwlR6pu!ww?Q`aKMR#@HY@GnXjMB|FoU*d)ONHU6r~cB+XCJ$)!XnJ`t+#iCi`7V_AALp;R(cJ z;;r5uj(*LEBVs@)g|b5w6T7B@6AeUnHw!wH^|CC4f3Nt1O~EL9RrE~gyG0Kj(T!E7 zVpYu!TX?&iT24~$$!Ho`ad(5SgHi~k8aDD9xshmy_eqe56sIFzQW z>mA~4)6|Yc15;zIsF%T?K*CHUe8v7o9g8!@zhFIbjJ3&Q7DvK(jd!z>)!a88-}@J< zzZHV+zu>7vsRBx6mfqlK-$3<_RlUI*to_S}zcuh{@Lc3v$hxTllnX@(TCr|=S}>Yy zap>>ab+ja#xl*;)U}UXg6fdA%YiY{@^tqC<%aDI%sphQhXgD4Xx7)oVO5IPXHt0MD zn>2zl$F4xNM;9yw_C-(Z<_`w0fHT8F@kgxS$`uMmtY4x(Xl#~q1fxfp>)BffU!m`o z)4IMIxq+RdPb&^YXr96TLv%lKuGHJ;W5IBPc)T&J84t4dKg7y+4(rV&c4q_p8oO`6 zDjQCBBcmO1jqr!zIhfj=u(;?Kux76S{W9nkfJ^NvK80_?_pJTjHk21=-FWm_ zhCbuz$Bfh(e7WKOfjOcVYwB0o#bUoGSjYPMDEkJ1-46^8;%NtkYvRPhUi)0M1Y~x+aDcwFusyI6WeX^~ z1`U^>;YcJ*N5V)XjAU2g1@=Hj+TA%xFcDjFZic5B(6Nwx!ZAp5#N!X3XYXN`V}RsG zgCyS{4F5!G{D2xKS_+acz@Gb{Unbn^K{VS5JrCRNgi@B#S@U~zTyZ{Qp74x(EHBAb zY%d_QpmZ8g_)W|ge_@~JCdu(%5`V_U==KttR6{~_bf|_MnzJK%3Z4mg_%S%IQJxl* z7xb=S#8p+MZE|?U<_JUaYLa((%xGlA;Sr)=RdSeP@-Fw&RDP%oFdLbfsYxeg$kpnD7 zx2=jQ8J)>DwjyT>`V3Wk2pbS9wi8>n5izbKBPvJcP(wLSR6eX^C`2l+8u|YTL9ZIw z$VR)q$Y@1|(2Cjidgj?Xn7KAGi>XB&tqzJC8J*E{hVmb?wtAPfm4$XC@-FKa3(4Ne zLc0n&tjoMcWMX^a*-NSZ@O(oK)*qe&sccP@)zsJr%d9T#pdLZzFVH!I)z%M!?rbc{ z8pQOV@}(^BZLAKqkPR-Ubwg>_BlOWDl$t8&cafZMA2~2P%zhWLWpaMM3wApZn6~?i z1kKt|QKA3|%wO&$jqnD95-i1ulzzFqFRun)TNm zti_bUwzA$Rx*2glkRxT2#kHdc+aX~xd6f1FS_}Hkn42o8zYH#)DOL{Y+qjX5eO8KM zM$JZQd6HFtR)4dw;Q{bJFy0HGY=Tl?Go0vha(=D%3!)c<>uZ|HLWOyLPwXJMaVS;cT!tI@A&++$4%$ zi`qNeojrCM3&{bVlqTLsYGZg6ZTtw%TD0yWT0~j*IJ$MAEytCc`E9^=K%ZrCIOnl1 zG!Oj~$Qgs21d?w?hel}i4cab4LIX;X$+%xZc?`$k}S6?Vapv_0hgu zcsaXsc6JzScVc_VtO-CkX*MPt`XkB$$S4i&U%VOb7c0_$i4rfWS?<2Vjbv381E|x;6*XTlsVM6mTawc)KZkF2z&x|Q`v!V&WCe8+O{dZ&(V5QZ7*YgxEAZmGBT)! zDiw7^!)!_wA*T>IMeNFMAxf`?J`egbJo7Si-bkq>fBiXEh?jdH$rDRiw=-;XAquvx;EF8)wQ2q?%ld=o-rH1)2lZ1}lPu^8-u^%ic?CS#ihki=1%10?@DY2J z0|R{}d6hi*MOON+qW@9KK7h9U(dQrN^8gzDgSXGyx8iqp*bE2Q3G2-fboe^pT`Szg zVP$Lm$ksMuvFX^bD_Qs5?EbBxFCKvMdnjMA$GecZcpH%O4DKQ3=m1KP%Ad=dQZE2= zq??%|?qr@dH3qk_lXe~YG{ipN^0r~BmqoF?vx(ULwB75|nHG}?3}J2+&x6qaUDn*- z$n@_=+j7VmhgL1oY6F)4jr~%!>l{|BW8ps(h-Z0FLp!weMNifyUukXPT}qAY zZ+n-f{6tyS^>|lJyDGtwov`GmPzot^wZDR~b3b08nfmV);_< zj)p_|p{$dUGgK>-U`8-sI4!EqTB#kap2@mkyLiGI;n~i}y-sh#Vc{_0@Z5}#-Np#z zo3B?4C6|6FwpkN%y6}7kPhGo%6x_z>zD+nL=n#<+BI z?DzWh==mnzvL4;er$<=Lg{cmTH49qqSuI}?U1%OxZB`&3K}2dYl8Qxa6xw)ts6`0wxx2X zK3G_kuTm6ibG2nUbh~=JhEY}t|7;cnw8B8|OTlot#nGynvVt%vXd;zqi%b0U{{zer zjH+9l8ed0;uGIB9l!vg`ekc#wu|}Lf7-(0pub{htQWwF)exSdSnA`yRF(foV^HU1H6&Sx~2bqd4<1G(A}@7?vB>EIPdR*?&#E)yYmds?=3&GK(@Y* zM77whvfnHB(QAFs?S5K)C#`08(d!4LE0lgvy4x?6#Mqj^0$?uV{W&bK8OjRa#MGC~ zmks_I?dqmDW@g?jW@JUbx8RviJTIfvdssUQ&EMl~Pe%PQcC@nCZ_J9`HarzmJHv{Z zt=Pp~Mb5?-YOiBdf#|Ij%_iaA2jC^aE-b11s*F{@X?BdS(78kt)io05Vwn%=DfVQ{ zxcjaC%MQ=vt6oC$h~F@{SG5PP8MsQgR~&rj=**0Pen~Lf(H?@~boL0k_fV>{U^rJW zh=5lGBN&_aGBbXp6#)`X>1~utsT^Pn=C2dLa)D-n&-};Lv%bEl<_Pyj>A#-y?i2I} z`+JdQIGy!^Ze3dn%2vVPh@feO;7tDowE)0CKfeMw9f=!jtsLGa1jErtI04)X91C3J zYwx{*px;o?TjBR%T*xy|K&cw$qpf!QDvIK474Ga2H2lKXi$83OoNQ#Sq-+yk`+oKP zN%91LCVI}(&N*cl0owo%q;wFTpzJh%xO^CTyA+(K+#n@krmTckONG1p(8Tv#!SI-Y z9=PXuqPQ!0tFy<=-Tjnik#jG?i&b;PkLs~ zHHynC5?)R8tfGY{kdGJ`ARh5lD4MndrCXqI59vZg<0 zn1~%+EJ}1SlziUigLee)XmN1{N1z;mas)~llrmBLgTnoT7S~&UK9u>QgcXH{6)jHL zQ5cZ>7(Rw%b3=NsgFgZ#S4uM1~Cc+eq8Ku=iQ z+6q{NZbj%>gk&UyRzh7`I+n6yMaifwJfpV7d3!l;FGs@rNPFMn$Ug?<7}EM6xeq#E z?Eq{0AH(x8^#0KMgFg!XC_JO!83o=GyeB-P;Ta7+415^)RPd?7UA)M(UWD%j_@38} zK5d80kC6Eh^nK9xfny&J`*`1ie+&P1__t&K2>VCKFF<|)_yF($;B&#}g1-y?E^T;lCRG ztC86PnJwVKe}d;N68?i_eurg#2Yn6nHC7rPawDZ~q%U^R7dzlw1?MX0 zhoB#V{u=bxEWad}Y)A65pir-C>!sD6ZS~!>;%@4lNsTkXTYDV8E*q} zHc)#VYOh0G-%;0h@GO950XS{+Xrs3RdYCYOO+mKuj$@ORj5!fdJ`}Cbp-v#;1cRP`vilsJYQrFvX zz71y+BsW1H#!JL_@vg^)*CTl`{EOjdMhNVTz<3Ysc)tgpd(fO&DT7%lB5p^u?f626 zPmu5l6r}k`^PZ!|=cwf{Jcp6+M2KW=mQ>shv*ifThJUi%_(~uWp4w= z6MQ_u9|t}ToHqKj(Vql92}^FmlAG|fJbWXMy2xHavX>BhhS)ROj6RzwOOIsGBN1gI z$_CUG*t&?ZJ~7sB4c^*Der)(Elvk;R`75-!(=|xA2K+kk>%e<~_d>!J%5K48UtzJY zz<&h)5&mc4e-;hzLc_bD_lDk^dhyi|UkzJ=w?xj1$axW*aq2NnJ$lWf*MgzQ848XC z0xaO8r*HM7_kDWbC!+c`qP_?I9%Tnnb`Ut@I$&J;wcx3R{tu%6gW!zTh|!t>PX;^z zUKHR(;c(hA9DXv!h>S5H+6P4YfVnbYuJoC|eCDt4e9E2=PgQuTf-^@4%+WqE%_pY$ z=;ouFN7m%otcje#Cuaz$E2OSq2y%vizXARR_#E&#@DrUvn}tI6q5F8Cj|Yb9;8_QK zFZ8{%mslDROMP+xpB%s|LAMg>A`S<{;SiqC<_YvmVEd&Iybw9%ky9S|Q;p{ zG0-KKbIIkRCy@CBbv;g9k0bvi@=ro1bMVO=f)U6W0lo`-7yLvU&qf=3-pA*i66b_- z=Enbccjv+Mtz5c|>#Q5NM%X*JQcn&)dl#2|&fLhAzJW`(Z=HSXR_ewrp7;)a_V4)F zr)}bfv+vzX-Luu#{>8q;ID>JomhRL#^9ed3EO=XP{HVxz9YA+?eu*7Iy`J+H&9=a+Kpc|C4Dzl>YYFXz_t2Hbjn1-G6z+3rq<+qu>`Y@RmUeSRHxpZ}V>&)akNc?a%3zn;6#Z{Y6p zj&5zYwsVu4?PfbSbN6{C?mq9#-RE7n`@Ac6pLgT#^X}YzehYV>_u%gHTiyS1pKyA) zPr6S!x!ir8$KB^K?mka&>v_^$;=beLyUX3>&TqK&{B~|V@5`;{{erQ929>S+BpuslUU?Qv}ps@haVxf1dG|eB)K}m3i9$r;}L6&C+jBsTE+`@8=sROT#O%Nar^xx z#upAbwdwV2w~FcSI`nj1diPTLwVvtMzRqRz=;fwI2RjYip{7S0a+`f4_j%KcSJI1( z>BXzKyZ&nT9e0`2#N;0>%eDQfKk`Ne7vzol+4F4P^LgX*CY^bv<;_YxbAR?M%v+kb zBK54!TW_9ChH~z+E$_3uedZ}L&k>#y;EBA_^b-qW<$vL+qVne7>ZvCy_0)|uFwa%y z*WwqRHn9$|o6kJm;k8dr>Ph_UxjlAwY+!8gnP+HhMCuv+vu8}~#WQtHh)p(6cSAY% znGu^En`a)?w}@w1Y^8x~C}E!szaV{dYzyT_r!5=Xk$U#T_M7LR`OW@?r`WXbSo$%g zPNtu@W1pW}Djvlv##Y9wo_T7V1AwBJ$Q1> zlb@pZjSn#JzB9hEB33>AP<+@gJR{>z$H&IU#b4%`5}%oR=EN76|G(Hg@5UGZB_V2MNWkNCF+J5Foc#!n?YX_WAg zZY3E{NtC0HUQAR@R1=oSl=TzYSo(%UeKU5($LpDKGb?drEGN;-NMD&~mA4|%F456= zb5WwJ@%HkGp0SgOnDOKmxqB0R;<>RFiH^okvJ7v(#65`zV#g9g62lW3kMjQ(iDzkL zk?FB0mX&xu@qm#yJ~7FR9r4g2IPF+b?`Gw#PfUvsi*3jom6&Dd4GiC;yd$v&iMa+F zA3vU0XmnhfSdmzrSfALG*v431Z=b|x3C3%pC~?F*C5aPx_Az5MQJM^>&D4}EAInNs z(U?qDHgL~^v@9%G&t<2{lC<$<))_ElSa$9Rma zl$>CGlTFKwgyalE-PR@`{oZYbuKm@x>4=Rzb`g0|DpV0#x5iBN9Ij3 zUOL$LT&q}%{HOn$z4L*IE6MZwqxmxo%k(@m49l=A$8jvjU}!T8W}*9a(|_KdUdBt< zHgvbUdsxfiyex4Dt#B z^6lcW!;&?^nP2y?Ryks(!B?jj6z8L;+9WD&j=j zF*VQ`xL%X*ZK`QJwh`zJ46u2PF@xR;+zJc_?jG|8?gz#(LJ>JIBI~GkJcw17bPtp` z5qONUShS-NOr8{FXavjN?!XhTJ@Cxy56mHF1Aa}Y&x^n!w1z-jV8z?&dyICk2R4I- zU_3=Z9?616#3e+!V=w&i{`g>WFb#3;fgbCEnZX=(HFl+7zIP^A#G)CFFM>tE(x5FU z2P=YA-rPW(9=TZTLfbNf-e9J$HyC0y37){1VPmH^SRZU;<46DaWz@SZc!tHKV9T-g z;CbIwT;(Fhm4tX81rRf8R)bf3r-IjlUC6l+?8C@-5WE?zsA;Te4Bqx#s9E)OBBmw; z?|4Uo_kv@A_25I_@UeE*@7@N?&8grd>N*{qJ=PI??!Sqeo(<0XZsGbvHC?`v;6r~1 zHM{B?_T~ncf~&!ozKq~jC?=E;5)tQwP)aC0l;!OX<%UclM1znSPnyw?J*0XEQSwk= zBG~60t!WG$33)>PP;IC#)POp+qig~78VxmtnnSJL+z{?o=o04fJ)ab6M>`~pmFiH3 zFC)+yV)u0fW2DnB2hV$ljZVdM+JQ1@JYlc37^!XsozfgcH+^DqZw6!s=(1~eDaRwk*yG)5`36V z!pDgZ&ilmhiQ%XlpUR_^?0aEjrSTu&8X~>j2=sPipf{RmTJy%x8%->|#l+E?_bqyh ziKn-ix9QF0EA-})NNZjrt$E+2HLpnTEC=XqllO&RQsz;juM@?3@NguEVe zz;o5p`9@p^y^-+LVGyzhSY;baOO3%aL#isz9Cn@wSDN~ z+yH%V$W3sYbBA+}GsfjZFbSr?>^?jP^KZZsFJqPS5^U|JI%Xdd_8|TSq=58&$O5_h zVA>6}`wgfr24+4#_JQS9+kwjUBX5WY`1irgsRea!NCRk!pgDrDPb-&g;1Xz$xK<<` zTz0c$Cc*)8nPJz?lEI0=)fXg7$F}4YT+8 z8h&2>HqXfl&+ccT*R%5sjCkH{$1FBB?{dlR`8c;De$=pj#2TXpZK`?3?L+$WgXZnB zKA&o^rl?ueV^PftpBLeI5RQw*dN~y5nj_{&p&qyBd>#W|nOl42)pmRd*T0abz!C>nB7BojrQBD%~CwO*I!wl zopo2_8mo(+n;y|8d*&9Kf8Kkq&MWWO&Kk>$b*gt#k40fW@3cOyy|emw+KvMg`n+P} zk0jQx-g&NLo$6iHiJWiVmwJrwZt3IG7vp(#9Rqv`Iy4?z_}oL>@`+rg?94$nCwyqP zFDqjHVP5%i_4(#Q|M(COeP;dH6Na^y-mxb2Hv6!)^r^gT#1G#Q-cIiCL;Ud}{`jz7 z^wsIH$cGs0WA$PFt@`tiLFZvj;KLfw$JT)C`tABSWzRw%)_y+pgRe*LFCW%+KD3*? zC-{c+e)5gz{ywzPhqasUk%!iREdGaMvk&d{&2X4a)yKB)sow8C)(;E%IP)#*{mAa4 zZ;j_|=w7wY}%FX3?v?e|MuI`#43$NI*P zHH{x@8Gj&R{CM>BfFJ7`Kh`wKW!eoOC+y~F+j{NMkq?C;Fa%3|_j9m(V6M88K95*>5hxec{dng?eB9Mlxl$WaGqLqL)wiiQDOVkx0O~BdKpw_`PIhqoM9feqUJ` z)jQH10LO)&DFezaWmvhZ+*ihl3FWc!M0uvnDT~UAvaW2Z1~pzas>y1anyKcf`Dzh9 zrK(Mp)e5yr^{OHDgj%mQs%O*|;=Fp1(pT`grgniFYM**jy{+C+R@8eaZA^WrP7>4V ztomG?SC`aP^`*L{#b^ndsLpFCTDq2{(>S;zgZj7Mzm4wf%Zt7(q^=$+6!VqTh`Y0Wy6iJpiHw+O_BkF$(F;QO_ z1R>54Ylsu#4PP-N3JC_$Z~)QdeM5?1G^826CWwYC!ygDqhFrskLbBnfhMx*4(eCJP z315vp9s2{}17oJ~BjIbtkBzcWVEl*1dZFBS%J`X}7|$60MEJyb&iG^DJH`vfKNXG| ze`5Sxs4@QBI4YbsJ~aMHXfrMw*M8Ds zlk4zP4Z5zzbBvs1kOneA4#?Mi)gtcCY^4!t8<#StV&|G_m7c~^;2`GG&TL|HX`DWf zcg-pNb1OWzlsVO?EJDK!Q^xrmW3#MOd4gG1gzFiEcVpF9O%PT(7YV9YRP2+_tO zW04SJRE(++Yy6h++rnGMDq~1UG=A530{7wj#(yMy-FVvg10mDcV*HViWo$G4Ga<)# z&v;MBHU7f*3*kfKl5t7M`~O^elu#s;?)>)Ap$_3)C0of;3JqzZQS_izZ^gbH`?es6-xt3R%V*+ef_S z{ja=-hG(?;QnC6H4w(c)1S}o32WF3sfZ3|#VR>AhkRQuW++^zP~sJ% zlB}dDnM#h5uM{bzicOJ~3Z+W%Dk0^BQm-^BXOxzYVwCgBMdgZeP3cl@D1F=cHUwX3RnMD?hC zwN|ZDX4M9@No`hJ)i(8#+OBq}-D;28uMVn1>IlvrRUZ^4qkj0PkJKr7PMtY4pgx7? zi;pYR1$9|nL(YaKXmPKT;p=FLT9THkWoX%2o>r)pXwso|&8gIDZmqKDoL0@s)dJda z?WA@}9@kE5XSH+M1ttEY2JNzTRqNEQYrWclb}QU}uk_>I{^U{{*6u1dwfo`z)yB06 z)0*};+>hE5?U^>GEov*M-MY59ogeOBF5L!qyxZtb4)^!oes`z2Gu=6gH!Q=~-P`Z1 zU)=fbB6q3V=9b+R+x`BU{&rWnz3!0vM7ZC>GHlb|i{F5Pu-N zi#hTqf++q_{GsqZ=E#qQ6!D*mZNdlQ1@VILHSwZ&Nk|v}1!m3fiQQtK@S)f*{!%EW zo$c=K%+rHQLX~n_IjfvgE-06ktN5^EC&}wdFJuXz)r0dYk9aI^!s#S$_8CUpV`u69 zmyyHDdx8?5!9E8YUt^%$6~=KLnG|rsa5fR(=lVp)J`54xTJXICxTCm5uUCIa%IPV&pB;xSXsc$Z3kG zq=0m2SxPRBCOOUJmeb@GrC2d5cEx;f7@n$}W=~X(01xw&GYgs(Kd6;6mAc*M!Mj0e zQX1?|x- zGJ0@Wd2n#pl_(b}kK|HiN|{lnK6)mXI_qSc@>F>N?9djRPnBh54Qx2liu&X;s-VV! zL|Jwi))Tt(^sdB|3qg-VgP%}&e`RC;-$3r>IG_GcYJZfLETBtOsg}e=3wM3PG z6PnvTVNX;mLA8CtIVh*80rj{Vz};Zyy?JU^t>ut*}VuF3)R*gQ>emj~1^PwPelc9M#hDS9gyk`-+yS`^rpcf}t8$&vymE^cl3N@(a*H!YJE2u+^;)BLMr%=2?YwqTyP{om zT$C@`?_zX`+BML{#@X)k_KdSVDOZL%;L6Z$ygCN8KJBJ93AKca~{9e|rBh=+4C$jALVvQYLq?+ssmKI~$*FRdu?L6l_qQ$L)95hV$IH2RGF? zmS>-E*SQ*q1D|t zs)#5q|0*JVs>)?oZjRtGxr$w8m))hhjyM7?kIQen@2Yjxxf;wvt|nKrtCiAiu1nS- zSG%jjT5BG0b=!})dR+Y$w`6{H$+cj=KCqK zxSrq}kL2SnPx;02O!*|MqwT7EN*Ps7qi7|-0=^^4{^r}M69F5 z!=f4E#wuFzq_vB7-0`F0Q9N&hV(_nR?P3?aw01GGJ&6CHP!OcP9a!2@vKwZHG&o0` zBfC~zm3-C3{t}G=EeX=fXo21>oM=x})?*^tJh!b!IonAZNbeA$N>_zw=mG<}v1|d+ zqTw6skWJ18XY>B@;4_|X{Sw*6<)zoi*4NyrlI=V7Hr=TNx>H}FJC*pFwI}=EJncKb zL-+gBL=-2*G3>bOxbGMzh8+`*$BrkCXO20?BIJr=-TcC_$?Q&pGyaV*I-W4eYvV#~ zG|`O~REQNqG)1e(cA2(EU^H(RYe53t$M>jR$<&Vb(GI_Gk;aIP{fz$ifqhbk>EFB} z2##7k?Wh9{y4}$PnuSD1g`*0e3WwJbf;@p^g`?iVe!K9kFR2mh;{>Y9SAIipw)6BI zs5wQo@pLfK zbKzZjE*$tR&WsY$ge+<={SNBy)e}+n2AYdL(a%==h$T^mXNC=ohD5`RFobq`IIP{Z zB%|zgJiUVf<$ReQY1_}f+rMdBXV-g`X5_U-oZGhN{7d`RL^&z|JII(ba#a8*#j%99}M!o3-{mOL_bEdw;zXoSNhTL#8UdfEsM<}TPiG7 z7Oy2_Ibo>>Axk4TL+KXFdB}_43bS;oLaFlm{#%vzpX z=D{Rb0;{CIgndhjVY-wciMj+S@STQCx6DfEQkIkp-6R!LzDY7$rX{zKCRnQ5p+k5Q;;1=?)!*75%Z6Aj1F1QcI!31Ed)i-J5iI}G+YtaxE}7ISHo0O~y z5q5276T{i;m~=KfTS;$oULx5JI-ISrcRPDHDIlHeSzP7<6DW4}I|rRZU<8bUe&++{ zBVLy&FasW;K2P~|Uw{Sh6xUdGt~ocHYc2uAL0@(yx{_R}t_+Y3l3aPDC%Fn;B`yg# zxs1fUkF63^lOAx|UB|&m5OAGxohEsf%TtuU+vgmmFK{k1>ADI!UDu)aIx1Yp!Aa2T z8gQK^d5g;d%HQoXOz8`p%S^iNg8Q!P(8pa9uE*dB7|buJ^>eAOl! zKs?iBqns?K$(eEvV0yk>w1?7d-6qR&1*igEV3R{+W1Il>pb?y*bPL%S=jDrgxU#KZ zle^>_fXO~^Q@$l(~qZ$Ojaju`_`KQ2XaF+YAZztGy z5}n{W=;bN9mtT?~KQz|ahka}4%91J{{7SxW<%S`PGl z$}iGNwRm+?i%DRclBTm)sL6zIp_v*gA{WSgG0xT~Z!fr{TV8^{_Xxw06c?h&;J=f;JlD4Y7)VAC)+H)d7>T!!8#hU6) z2U*BtNHq8bQZ#J5fLA*K5oC%p@&Nml<9CIns`LNr}vbn-s1-#~v z`NYS^&Gn$se8$`Y&YLfquYhZy3!FFK0Da&lxNW`z+dVM0oqy5%5KMw;mi}nfJPY~U zJa1kCtKcQq8C&LgOAJWhx(HH0I>-XKx?kZ1iwP7Lc3O%pW{VwCMfwQv98R$K4=0$a zbZMzQTx_WW4VETLGic?q&2q`o4mvE|7Qdz2RHaKxZNX_v9cZxhz^}*B&t;os&@u!@ z3RWzm;6cHrMVdkWJa5eug50z$;5?Ql$i~Ws zpJl_cP$sanB%q81c3#|A@mX;nd+rDq#5MT z^PHv-TFBk@QNl$}4Q@T%jen}F^QecyA;hbKRjC@%dH&sFRmQ7Ip z326d(3MVcUwdE^VSLR@W7i!D;~UOt%`PO>43>jr2@w4#|8_gnO_KS&IBptIaAyudr58 zzSnd`TDRIzhK);Bp3Ry}Wx%&ULOMjgCrnqMSK<6kt6X-Ul|y!!UsI+$Ru6V=J<^So zK4ZE99c69XdM%^iLD_`m5#$t@0Z*k{WPc$IfN{$L^ku2n+9FNBK2GTVdfpn&Ux4od z&SPnUY$%)gS>QZ1S6fzDVgxiP_nCVcUvS4Fb}pbR6;|I0a7Qelh!5*w5m= zoP&M=T*kesMw&tXJnyQt3UU+U{63{S!8x+AvYB5!(v6h9jxx@%^TIYu)8RGiMQ{aN zvvyf;9A2|DS^K!`V)>CimL^jgrEhX>!*}W;Xlb2GC?vixw1?&5dAl(_n)m6)PV-3TW4)eBp$c- z7cYTTE+culk8R7=d^m>m*26uZpUX%F_puFu5vG@2F1uRR35Ge>%X)2X;1Xyr>n%vK zb%5@IYRGEH6z6WIY#zJV3gC(^2$U4|jYI9_!lAYDxI=5mUqk*H^4E~RhWs_;uj%=P7s?Y0JIj+mYT>i;jKY=jY>-!! zUS3#KTVD9_dU;84X1P@EEO-AuzT3pU|GoDc1mXS9-v75k%=`cG{ZYaC{)4Y>2(Hxc zrheDZB18%Q)$kVr+fkD326Q7fR$zPWB(uNSO*;=6n8olv$llKE`ffvPt2BY_702u$ zc8q<@jxi>(tj0{T9c!qZCbDPK&O-}Kk2_61C+X;>@|UT!HQE8l&vygL7TE4SY*&>V zWcgFRV~|PU`wR)croD(Nc6JgHHfV35WZGwFkd7zW{zFlHtTe-vzB`d_C(lmO4OGH1 z9h>Pm5Z%sx*BAd-9iSV|)4o2{Z08}vIPD2kOnPzje_$n${};3y(0hDeA{>RkWInM) zRKiPkjP}sEGVMHMW|pY;Vy?4(ptDkFC!t*OxxsfH!g1@`7i>qUsxKzl9touTS=|g% zOpj7X4^Xa5`p-#!LY}Rp$CGZR+&`zYHpu?3$(~L2e?#^X((@=cj`Z)5{?GLim>%^P zXLX%&lk~cg<;51;p(mewlBk42I-a218U0APX-wbpe6fyklEW-9Ia@zPKa^AM zkid4WnjYT_$a0U`^(#8-Ur~!%=!#j?mK)TThva#R?DbTW%XIYw zecZA#V~Z$vjE>z@^FGom$bXjV(?FIH{kn8+3fUi!h3-hy4E2edU-1JfpY0=M)Z0$= znJ3Rws^P!f!ps#$*)ckke7^R@1}tsty27uio{d!6EN?rrG*LNE$!DG&qiv)=C3_Ez zs9SWEb99wC{Vve4g#25WJ$jQY)zrqqtxeh!Zj<#J9WS$E)FK_9lO>&e=1ET?`v_Tn zMV3lBn#n%Kj?pgafnvH}<5ah^boG;T-RCrNVyH#A`uO>xhK*#ZPZG1lr?BH&etnd( zUW!X4|9Pgzs^pVMmViEP*|{<4dI@Z#MO~!c^KY^J)lN|j2S`uY^0Cpi#da)vkB*ge z);RT^Li!2j8MQ`d6_UPAp5s))Rno7JeTG^+LG~Q7{4dJ=30Y>Tw4acroIL+0J0had zNT9a6spdzh6@N)}c+T#49F0Rm2WxN45<8-QKBu;PPBZ<_sD{jci%Kx@yZJel&`Z0~ zu$h}o_u^ZWYo_BIjhV+(<`nNSy&qVeWopH9>ZKI2ze7hqd7k1|+@x{1N#l8y#)j_6 zdMtzKh#RXE0hmuM9j}tbN1kbPtu*qSq_f7!r<;#1miv;%&v|x?+Tu~}1G3X>rMoZb zvs;gEG>VpmcWIvD_*OcNB8nK%3*=u%M~ZSWx2exp*tMbuw*DK9{8bwHZ2VO7$g@hL zTJKv{+bOEQKGH5zZ!D6}&&Ym?YVIce6SDVFi)Kh4B)hsbO?S4J^d{=O@hb11d+0jqs zPw0_wtBdYt5k>w^s!0*~3-pX&WnQ5D>XwB%njLlQ4u0_+=w7$W80{1ndIr&qV*4* zn=WMF>S?_G8MNaa`|AM-bQEGPk&dIj`<)=r4ioy`WBQJF`d)YX{&uvd9pBB4cCgd; zuG9Cc)Ay*;cc;^Lq|^7J)Ayg_JI~R+P5M4_`VMpY-g5t!x~~C`s=C(R=VvC7Mt;tm z$(&?KdvClEpmh=_=YF;YaNNP!|EA|k~|5s{(-q9Rg6ijg8BMWl#njFcj!m{LS8 zjfj+w%SD=Uky1=)1d7O|DKm4|diNO;d0O@PK79Io-QDkbv)11G?4Pyw*=L`T<#?5o zp5&wtIrtKj9^<61IO!!$`iGO=;G_pQ>Ge(edy_ui9Pe)M=q7!+NiS~Ff1BgEO?tYL z-rA(^5b2>!`euWFEa{I;dSa74*rfM0$M2f-xCZ|m(#x9kujY7GlRnj?H#PWClOEKh z?=;72n)H_@y|AFKq<1vw7tQgACVip7gNXEh=6F7n-p=6XOnNwzzRjdpGwI1p`Y@B; z%cS2j>9I_DDU<%mq-Qeelg#l(CjF4XkBIa=CcTbHe`C_qnB!widKZ&kaL{AYmzd*4 zO!^O#p2MVtNN-{A6XtjbgKseD_sjA1CB1w}kATvr7rc2%KVB|)f^mFz!E2ZF*Cjo5 zNgrL(JD2p!<#^;mu5@Un|1IfxOZwb$yluhHmh`73J!wfFTGD%#<2Ors%z}p@=?6=C zz>>bNq}MCy?@D^Ql0L4acPr`FO8T;rUaX}5D(Sh(@mVEj4SGO&sFFUYr1vSu?-V>v zIliXgA4qk4O5wiJ6D0h*IDVw02Px@0N_vfgzbNS`O8SU$yhEXG(j%1g1tq;eN&iod z=O^j&34WfWhbPCkll1B&{UpJYljFk)-kYS~Ch4(B`f75#G{HZUhlOBDfk0p$t^sD4}RKlo9FG|vXlJuM;eI_~HlB7>T%ns=r$?=LLeHKbj zNGMHuKazfrq{k!2*OByc1V2X7vytP|2)!r$7)cLC(sz;cS|t4yNl!)6N0H;52!4sA zM=o$sjtTTz5>^(hUy4a8J2Bdj~?;8h?zk@o=h5UZ- z9^gH|%VcdGoC$moX{gNj$9jzLh|{7Nw4+cfv{b(u?Ya*&pU7N02%ZkDK(5yz`9AXA z0!at(>yYKv)7^0$!^3hI|ruIZA*W*aO;9XnX3{FuLwXO?o3G z^B#N}Luq6`EJSHrQCbD~D@e)MO<#vJ5_ zLmIcjbYIJu#&_A^p3L@;HizwjdHfybiMj6Q*dEeObNTLLjA^I9|G?#^HG%&FT&B4V z*aIo=0*+@3;kw0-$U3`UMEcd>GS>;z`7Go)kauKE>xb5L1)f5C=Rvy>mn@>-Nb`&JfptN7Hg`k95ZrJqBKy1CavawqUT;8|#WY`1A|g4dzUdT?pyzX1Li@{Qp8z(0c}zmM9o9rj4R z&VIp@zXSg{-w{}v{26#5kW-d8+Sh@rk$xt)L<{&lzi`Sp>>)835}wgL;_q;Mq$Ke;FbqU4l+ZoC11XU=j-HRd41Nq{ zz6*W;{3Nun>i9>He1dYw8r6IFO-dbtw+6f`saLT?bdm2ze0K$>c`DBC6nQ$=4g5tQ zP6fpa;5b=QP>%Qzi1%V;JrL)j;$36DaHo)JHU1bLM#NPOT=gU68<0>Fd6_Wjt z5|TJhgOvxN#i>XV{jbXYJO~NSA(eWh!O5qBw`?5XDgvje3f{iOlR%uTsyK~NjsizP zegrlq&)!j*@=GAjPE}dv<3Q9wxf{9UxLgmu0V(B~SO{{QglMRP2CGmo`l_@;SfCOH z;*?myh>OEOl&{EB(CLui>_xc}9DSQaiTeL-ITjXC~qepd6{! zfxiag^jmxiL}>yuH~9qk&w#UlIPDR#U9wNU4_pO=U5R|;m2>1y;Jc`goLOj*_yrIa zm;_JCnxp@9tPdK_coo!8m1_>HSC!{Tv%z7R${rwEs@(@U&U=+#0dEG*0OFikL-`6? zq{bk@I-^!Yj`gA=w1%hce|k9!6ZjcM9&^`w_##hCOoDNIYq&3z-g0(`$d{N}rCD^dq04-Xl zVdYok$)r4Wgte=%G#6&KhHon>)*KyUs$rJtXr20JAZ%E}iXpK6=&&>u)}`XRi7vm| zz#3K9L-MDNZO}eJlQ#T1^Q4?R<&a}Fpt-OQ7Qzx~=aZSMvTs*A)hjqw=aE1^z%o^=jhY-CIcGJ;61aT1)_sh&W2S3ZqjaQqW3H*NA6-7< zR-tu&a5&x9eKpeH`Kx1|s(7C2L%@fifAF-@G)o$9RXr?>v+;>^7|>) zc^x|wwFl%FH4SrLkt-GU9x7HL?MIN`0RBC2Sf+;YQn6Fg@QhY5r!>qd4Rc?|H&7MN zP)%A6c0LK%a}w)S5;HCVJ4xUhWdb`BP4))%Hmdwqg1$SUcQUVs6CQ#hKf?@{t@9A+D#~;F^zwb;*p$B4@5HY z2y=eRCgk*<#D9ao1H2D73HUbhmZF>-l<)!4&x7`tkgJfKfTRleAfL38y(`+2iF?55 z9Z(ybe2Ka15#W!2W0A&ywgV(nAZZVN8#pBK^N_e8d5G}A!~jS%V1Vzdk6}yWko*OF33B!3w$oFc`{Z`?^T+7P zC($S9c~^hnchHCDSWEj9mW%{m3;6|4#`{KGr(^Ip8$LjCI18R_aJ!#t^NgC z{R6b5$9|C?(k}`ca;p*gi=C02e-DE$?M~PIFbeF69^ za1Os6bMGmu<9|n*+c;O8N>)3#uHiSep}U0AZJTU=b5e=yq?e2H4GAN%lnC$z|N4{zyZL$z%Wv-Wz_x;O86nvr>PFkw=Q2 z9eyq$u@I74NIC(pguFZ99PSNeGww16b~uO6?x_EN2wnty2KXf8VSeY*ZPaHHb5}8N z7cdX_4)C{>=5^Y?@H#dQ>s6G!1yEc|7e5#vxCaYv!F3qigS)%CyK4yU8r)riyF&=> z?(XjHJG}S3`qysNR_)duIj8%0|7PmmTYbCfF7!8SePtomg;JLv54$l7M!tJ5P3D8v8Hl##eA6En7W$(bSV^NA=Nsj8ucgu(qz*9Y(8oaP%ELJ+{PczX z{x*P+oD)p9mvfrXxfRXQnL>m&F0+=g84*5CL6$f4Yu-}gCMl|@Un0h#~A+bAn zGVGv@S*<<<=7l`b2_luOy@ZE?*FZAf=xZNM+EYuKPr}hQ1?{H8Of$pLi}nm7zB4cM zn>Eq<@QCosuxxv#wa(?iOW?7ogO~GLZ*PVNPGw)HquQ_iZH~-7jr~x*FK%3S6yHx| zE4tCHfdiso!QaC(H?fOW24xkfuUY%xSAdmAF9_c0_pC#u^aL5I0IWU-31j3VU}7g8 zmFE{*e;>G08ME`?Z*ZrA8+Eu!T;`r~d%DT327j;^&)RHJKap5srVZZ&^@bwx)vxWK)D4#Q)Mmg3Vo>QKZv&F@R9KAHL~{CZKJ@I{ zIS1d&`bKj}``FTsO5$}W&th%g_E@JWX62=AU%X=5-me|~ks88_mK=Su&3zX0YRt9vVRReoDLiBrn^pATu_`&P;h07q8uq@UWTouvkM%#Ld{UiKbl;e^&p@$Lujb z*kwz-lh|=sDGLvFH%Z~UBtP)pIb`Y2oq*i?j18=-^`b8*6m)z&EF$&Ry}9TXL;D;= zwCS^d>W;6H5ySPe?;HHn=MRz#Nu4**A!N^3;(c(waQKXDS8y(~4nU^YUG_Ou1zF1v zM`o|3_Yqd1{289{IqosM$n2YA;%fq^IA0S<^mJ zI=)s`HNK8$y9%~~&bmL zpO~iOkDZD&e|m7uUfJ|Qum*czzR;PdF@7Upy@OsOmV3nB*#DCRnxNp(FVP9wOD1Lo z;k0%GjOb3jYHIY~7k$c`Fp=irYbYOtIbYf5YD+}&2dsyC!+rIjjQ!U(VE!eUrZV@7 zaTqP=s&?(zL#s%S*s65A-zO}d=j$U)QE`Z>;2I&-I}C1xFiyVHHJ6NRzI){KM4A=n zw@QbUL7M;)9heAk(5jPw3+z6jPlu`oD>P~jo9-ipzWi)M;t;aXCZGEA#`gaNL-d0Y zLhHbn@u!A@R&uYEOQr2cK0)xpaf7@--S`FCIiB~=v;0!68rX%*ew+)<`sZ%GDpen5*ukGR6expomkZbh!~;9f1pyub{!CP|p8{$w3WZJNY2 zcT1Ap22M%LQ+P3Sz@NHs{#5ln7qo8P_iTAbc!4du*Iu#2sPlmuYLA3pAQ+FozZ`kR zb3+35e)@szeT%Xk7!9ERIt{OOA2%;J+;F)Rh}^|9aTn-NWITB(xA-WF zPL}&_t+DzLnmJmxeREL1F*=#sxzYap{)Yd$z^bzpP4V7&*xxTCAP!odDSL;COv2Xq zCzBd>sg%44O?bTW5R`FgjpAY{oqSgw7fwZAy-b#ALHJ7 zcqetp+Rj8D7u{lFm5b1W_IX~ca8Tt_EwD_sjIhkL{B40i|A~8#ggD=F zrnpr$&v2$?^iLP+hG7JPeh)JSHI0HWG=id5VyuIM5%?-op|>&^fdIk zEO~~^_D1!mp5ph+^;sC;8r=2})UqvVo}MaAKP}WO3@vyVXdYA?pJ>eWu{!)P=r)vm zILj}OEZ=(RnNzf}fB+Pk^sp+NS>MSRET3v?u>vXzMus`5(-u??N>pZKubVM?FqdcV zY$||HW?WZejac&GitesUt)cR~o}JQ`R+hu3hIg>f+1|+u_N^&5rXd-1<4bhCehYyH zW8+Ig)jBH`O?h(Rz%az$t`OCCy@jB8iu`4U_T=kn$?{g0?V%~Xi@o}uEY@%>q9 z#q32*FiUM)cic8Tx$&)^meLJe?FT9Z2KA|1!QFx3%ZuKj>Mm%Ji*odfyi#<(?ao>; zH1gQ}J!(cuW;t8MEDW)%l;@~j$X$4qOSiaJ8i`!kT_Vmc8!sM)pZjnOJRdvmu%Fv9 zHl7tSM0y%H$&jP0W5^s!(=p`_d;Tgh)k`Ffyar-o*z4xDv)uYHvRu2^Fvh0#z^;ZA#AZ-q<7F1D?NK&SlP79>gVI_qtWTFWr?Yx~6h5 zC9+qqglfqU7Wr5hX@)&>SxY1+_c013;{^iv9aQIVg_CMUgiwkm=PomT)FY2kZcTI?u5k@1s6#L@UVG6g%$53jl;xl zr);_l?YL)NjG#egVsX#r4qV5X23GO#U6-M0^Vqss2!-xzk0Z$+xvyXQQeMCGUaL+{ zU#ILlPA)4+Yin5+rkLkux%<2OH?6gGw+Y^`iKgcDAetnoli4s6vJzrKj8cqH^hobb z?@j0a)v~U@WJ|M6whfWEw!XHu4}Y}0o;Qej#C1%2Xn4$QR$Q?lV0@QcF=0)S-g2p3 zd{IIG%BB?c*j26{W4XFFr1>&BoGhT8P_q%q=?8GyBH_ch%U-L z_md9U7pQaOrK^5vLqd;Y$agNcL$FiOpA`l@^59{DiA0-!+hJ$62_u?|6_s6=E41~uN!rYH; zK7TPGO2Qp*u6pPQs6nD&n41>x2%pA^;D17p(Pe}=TutwgBWmZ*&@`jPw)|=7Tl&>l z5ej$2+HF8hr1)rDt#!pc9OSD(un?ujGz_=#0 zd(&YqxAxau{+oi#458Xujftkc#GS;Q@uH@XErpl*^!&x<#YcEb`mstV-B{V@xfil# z&fph&bPB)8Gb$gIF>$2F?Q4onjRaZJEJwzDy*Hzz_9s1^)=0u6JDJ+!8jo~EUL~E9NLwgd6n%m%7>1TZ zr9fv%Vbs+jv<2UpscZsOvRBHRm#NN%Rtcnc&IVVrJSp#Cs<)d*n{O(5e0B-K2#B#E zsS1uCSA+=42XE%@R`w2Dh9rsa5%!B{_Bnn4&T)oXgY3L*w9m*}u3pXFWnEW(wbDE( zon;;<2EaQe?a=2P4{1q=FGOh@EFONT znJbuM2(W&QvvNVPa%l}*|4Q>KLH%S^0$b*lw&mpCfj-R?;1K2@Y5Gj0XS%51POH0h@xAiqgxSzD}=85c3o1;)J_gBX^74Hx59R8J7jgq*F z2*0Aw6_7%??y;zjyz|5|^O+C7bDTDNR}p=#H>uCi$VO7?FLj(%wrF1}ZZJz!sX&}A zv|_x%#9Y_4-9f3IyI0%?BEpg3pAIGB_?SxV1s>1LA?vC1SJJGMOMQ^(tl0#-W`43c5e$jLW77GJb{y0EZ6XMfJj zU=8!}6@c)cU4F4rI55lM7Dr+Ng3=Bx^iT~XDs|&@U}<0DnqFvbl1QgA zV=C}49Gy$K+rWM9^YJ&qJ@P8CpnI{&{MVMAsYuO@b@R*e8ZSk5)0gDw6rhU@XRwdH z)QhZ^5P%*5n`yDYSYo$MR4Jb|M_dBX*X<*oBH~vq4Wd|x0m7~L>UipgT{VK_w8I|F z&apJGe&6E95G!REbU@^*6KL!%Qwy3|gSt+U>0o(wHR%M+)WaSRR%w_S>i)3l2Fb~Q zxN?_$40i0|ESC5|jfP^hZw>`-1|-(T@>x=oW3_0-XTYb2Ynh8{p?`e@ zG#rW>e0V2E85ga&Dsve*>_mh& zf2zHDBXbi0C?CbJ6j%*-w&&=yexr+V8Nl^d^SOuTG(@Hciu~k7{?y(#e3J}>w;*(& z`6=?ql`TuD@x^>WvbuC?=%VSU`W~$hU2o)w5z%PXiQpc)V>*c49Wlz*kiAw-#V`r< zA@5!M=r!^QU7S5R%95}L2Vxi3MVwF-VlzrT>E5|qZ@p8>l6WQ;k!5>oaMLXwe>El= zHSb(@?C}|M>~Z|&i3+MQN9yXy;-T%;$<4_~W*;Sl`JJil^~^wwcC0}r&Xm=4Ft?=9 zKxTpgfydqBvb#yXU#B5Wk?9`fc(T#}@BEPX^bU3m++t+8R&i$T$!jJFb`5eta#08~ zz9vNd4axi8*3-&>4CNSQvGb}IDc*#@cNsaQFzgcwOp4yHMK1tc)FP~hPyRjgR>g@) zUSIJbF9i3v*nEI4n}u0)m#aiAx%#gIndvffrMK?6?whtO!_gcZZhanNr(^MB@y(jd z+RQxD4hf%i%T>t|!YYF78DtG)g`*-mk2APCd)&Um`mWAOlJLozB-6?vo$$sk-5hBK zH3qr0r1B*3nm&zgIt5Wn)_sL9FYE56O6-8(Exmn@<}Jp3&YDOoD_q`9rZr2Z-V*D# z%*nl{1v0?wT$l9&+vHh8wRJ1j`|{H2tcD(4p`3Mjm5SzZna1D9In03ub*}Q&fM*zF zjo@;_IoM<7K*u>q1N7RNvYWCybrTgxj(p50~i3m9{wS46unH~UM1sJ2_t_mkkb~i}`L}ZFEfc8@WJcZDU=x%eyTCNx= zg?Mda8pcpEL*X`tB?D%@QiNS<*HZ8ybgcmeJBs+SGGu z>H|h{u^0g>)OBIK_$q9rtzWHgtA=_qRAft&nud?8wMowkU#Fb62ueGh{hQ`nCUGdw zYHb=iXwKgMKGMGbR?2y|?TaYIfE38jYU93yOZ8Dj=s4NMczBZkdVsrwD>09X-v7|| zO#b-WYudZk%aLj+S$0#Bi$^4{$8sLvC`9+% z9U)pSLW)$eIA|RPW;9#X3g=RbRJAEmAx|kEdsn1cjIDyz;}@cqrIsY?#P|UPQ$$*% zL`{G#?m8xZ1aY5rZk~N3Knp~Br-+Ra)46)_`qR2q_+@(LBz z%|?lG1=OmFnjztdmZcUAr-mmrZ=r=FQ*8#4xlfiq4-8g~ltb_=^I0Tz^zq!avDD_T z19A^>-f?&ZttkkwNUVRNb)(Z{9Pfq+rXOFh)=XMIG!1)oDy56rOQdxc)i;l|fv$SB z7lVAzyeC*2PW=Pl8(hYjjz{s)aYowrz~CbHhAHxMLk>$(nxFhZER2Mj2pmqtA&%1TCndN8quw7F6Gl`15m!pl&;Uz z-+Lo2K+6_1?`Pny>7Ku}zUh`xS*WV2x`K||Om*c*%w=4<211RKPjw65&8h__495Ax z@uM)guHLzjdp5-!hbFeub8o1^GM{Z9Rw%NL-kse&;6%}7Vr4`B8VBNjPNY<#REix( z3)~NQOXY5i>!c2pg*ydL_3DAF0T=eI;TN39dcmf|Lrtbk|DuMK9mDiQqp`LNYmif;;67Zusp0~ksqu}<Ci7i6>{YWMl0*gWHNFXo)8E(vOW3u~lY1);YNbk$-UR!BVtQ~FGEt); zmfcULb;P{CO8nd$_UP7F6avvjLpb*k58OnuG45H%;4g7T==bQl*7YjG+;L$$YJj+& z2~jvi-Wyooh3NU9NZgZMLiTn|1xDzmk&Xjk10F1@7umU-tKgeziE&}-!y)85jz1M? z{s9lE378M2Xx3+}jX1i(qU-((w%}mvQ5=e8@_PvJ>f!R)`U5`rB)QEtM3Ki4DOf9vY(szn^*=m8 zO)Nsb4z`Vg1Yvpu1LHG>tFhTUM`#v(c+4WRc`rPSA{H21N%|2=Ok3FOeHvd$>yfEU z^_-m8qe|78;SPYRkWaFDpN`v zGY)NZ-Kf~C1JicWL~e&V-~1CHh9_V6*P`xpR;>}|kRU?^#v4VaXMppMwh)TD&|(>5 z5iSM%p*jhk?O&9T9U>`=?>pkXQ&LvP)C!+hZLm>YY`{V5|A*Kd6Sw~rGs!n>V2`4o z!|{JHBszws3M7Dw$(&M#`@(&Zm`W7?>(dv?5q$ENzz{jFm|SQb2+G`Nfu=^`JHn;i zaEIW8khV|!gF-hrx%SR_>qkUk$rM8p<64>4vcN-iGWStLo6 z_veEb_#iC7rTgchyh`(J8KF$P0Qkvm!J9W$`W+e$s8V64QtW-dfpa1reAP)#wfziR z(6kVE$<+zX1UusKK=8Fdmrr(ZKz|e&iWU1mwK)1*E7pCigNn{Zc1VLF5D1R14FcKKI=|`j-1b7GNPuaDq?=zd>_;_Uvmu@;AfSAWZR+ z;)EQr^Y7n8^)x{n&B9n&(yU=OMBVWd0GlK~@f^|y#M!lMAa3*XMpZfiGouQ(I>>3N zu3^$AnlvZ6$ND|h*GSidg3$jD3IS6aZ0@{v%W#$GMps_NGCRoR z=~zTdDa)U@(M?EqA5rKW+y+r_#|feKdHzCdYk*(DW}L8e5bTD1X)r9INX+cQ9E1(y zgKvZKp(;arlcAqPeur7Z%oE%D{9wLw?}zyvXz*pjyT?l3`->NF%OQV`qXrAMC!+x7 zCBK5XjQP*wep8k__{b`->L_3JTF${|;0D?T(hm$IK0vVz*4$3|&$6QI#KAI{b`I}9XNNz3Uk_ofsZFk-&m<6|V9$d{;reR2 zzyO+(!)})84#SZ@pmmWWydsUWupB%v9mO5Ea4pZceS=N8i|s4pmUUs2D<&(*bHmT0 zN9Bn4d;-aysg3a+Y15aRbW|J21%C^xQx9fo?e{81bQysV$qm-DE;;FsG( zr>UUv$JdMJNsE0uRFJ9>^U@Bf%m{kMj@^|dlah!+o9F^<(8vTEBVTL#1XA06GvFG5 zGCiRV;RtNbR5tsbF<{f)GzDYnwl;;ue9EP9= zsfiBSwg6NjJ|lycy<6#4P@W9)Ccf_hHT=y_8sDHr;-V4)b{%O2Nv!%MaE;v-f20(G z;z7D>@fL_a;V~1&TtPRL*e-da?}P)CQT$Xlze+=2AAMf|eF-IDuQ!mK^Eii@3*$&9 zrtCMrPRLMKf^1!+Vc=0SgSh`bRIp*5i65nRVZ+(F*e!i)rN!7J4`__xgJ1j1f%xeV zd}9X%-(~H~CXDT*h!}$)H})Flq4K9zcCgM~b5U~+wWEM|rhI0iXfz2&a5yniI}Cw* zepdRw;$sCFY3$!O1c^;ias)*_HOtg7#t28n2^I&+Rl|4wyq$Hdx|;ZdHO&=P&NxW!}GFov;?Bty+1@kRmN)hc9LLj&6Za@dE-3|Q|Qd<9VV z?9^MK84An2g(cS8k_aPqBM-FQsCkalGb(6PaH)8q1!}Oovbu?qG}n}KNem_9`;6(# zlBwN0Ew$U~{x;~d)<;yocZi^Uwki1xw(^SYsAIy&vWPGGEnLw0DEA!C80l#+SS=F@ zRovXzqYN#KD|QO0Agj!!sM7jYvxW;enCX5>{SIDR!skwOpQ-}q@(M|KoYh2%zM&y# z9nAQ10eSP;HRtCk81fdlxa_(*aZYSkwS26Ay9#EA0XU%X&(DzJfloP{rEuut-(~mZ z-jj>f<-b?s+{ptbI;5i!KF=1b^-`i?Aas0Zgs- znl#Odc?v4JB&XyQOpHhscUy#t(Fp3e)uBKn~A<*8}!XdVn-rDu-9zUtukBp|{2gm)&=v3*IQ{J#DWKOOoN$gG`^g#c62Y_jM-Abp=o5ay#kDrH zIw3@}LBDj&jCjc-iOA~~@U_6hN5eqbC*&G_m}mY=yY($j*<46XjiG~mF_}B_t2?Zd zL4Q>gUzR$jp`0qj<>jT%EWh3eUku(bTy`gC1T)Q};y2Y7S=>1I(tsmH_RG}%4DkwA z<)V@(ajSF<^I2Q7kw7-mhVqdtOG)k24tCQ%v$Psl!95%L*{^a(sf1M>(>08*iLac? zfz^d%mkyU6hS0r*SKTUQS5FNm3VkK6#$5dsW|p2VENm@W$NI+0?#W|3tWFiaT%nHg>09c=r(WzcE#-5#a zHqJEC#DA1?p7tZ*jyTPk(U&j*@0r&xjMg?SQ=;cOM=HFE^@y&`2Qt5$gT2<$Rxa%8 zq(0o}yr#dJ&>(S(C5SbfO>q`q89D^789a&PT))ZAB75TySg|u|`4gN}_pD2+G!v%v zVUe1Y2^;l&^Y#o(&bd{1t-ZYxsU@PS{{6l+dyWXs?(Jbes0H&})&3qtd1?7OAgHXv zjL`CZt=~pozDF#k86FYt_C6AE<|`%^?iUO+oHQ7fFwpXCA?i#3HskWp1SZt2JSzC; zcaH1FafH#iwu`?@{ZXY*V~D^bMc0P6#e&=IdM)I&-|`NViIyF;&r#D|YASjCxYeEn zmYz{Mc}!ljIq4-<@1ja-f>Vo);k`S&!%Q6I;kGyJHox2LuAdw9l>P7WRl{d=Ivo}> zU3G@rtri<&8_9->_LHy>OdGscF*-OOe0A#mJ->@Q!t+3ru0mjMlQ)U;?c;c5#U}KM z(7!N-uD;Mc{&11^H*vX2g6o35+&)9K5fw49cFymMfIG_JA~KV`yKa4%0W5bEo}U3E z?Y_;#5|ds-JwOEnC$9p!?LH#1ob-%cG@KFGa$l0z_4%x5g#-7C+tf) zaU5mhWy@JvL=7+%m{gYxE1(-A=RYE36sNrkh)(cU$xL~l=I?V|y|_NSEhm|#dTn*- zxEngS5;&#q&XSWe!Wn4`liM?Mh2f`IW^vJWpXsuNc7_>>h3gGZ;CVO!gQLnIxN_4t z>YlQa1-}Njiyd{E!G%`r)+WWv>_$};n{zubzFn60T#ls+iHTY(c_n>CKQEkEP2 z>^s%HeVMJnzE>FQ)Vj{JeSvPY-{n93F}coH&nK&9`|Vo2jQ1%bZg_83KMr6iyQ6Lk z$J)lMb63Bv)T+agg5z>)Fhs6v3)4S>i8L1-+w#QeVYg2h=eGnO*D8In;z{i1Lcl^jnnz`B-$5wiH6H6 zS_#XicdGPARqd8Fh1Us2x6F#fxZO|ltc_bnFv(V~d=%DoxO^wf(9x0Mdb1eCQ@ux_ zML#;DaKNW&5wkp%@(ce2V;mg=gB@CTm9q^Se*V~aox#f3Lwo*DJm~3d=Jy{L=t>(S(JV@g=(J$O?3EdG| z$YoRuDOH`6uG;BW-OBKg_OB7rcOjyCGU~q-)0g9P3f}wD&KyPOe_C3c>_JS+dSJf((77p zx)skHlqt!m##deVm?u9Pv#CpQZ;Wh@o`jx{kDsMUh`Mm8F7AMhyJ=PXJOvHkZDQ-c zqO6qjTk-XFJY$LTny`$rWH74Dx#_-|<~ON}WfECYhZhz1;_+r3v8zDp_^K1_I3v3f zU8#GAk+^y7(2=;GY?8I2K|E%&&${ji(qJUtaYM12jIvN}oUPliJ5bfyn={3^!FC>dFAhP;zvaR=XiLr_fW&V@Z6-;Ro> zPmgA~Nv99iW%5`**?%91RHZ`7;<=`Is@~3Nf)*T$;|c+Lz1B*p5pdgqg-V5^8q(;|Q8zhTyh2)SA{%*;ruQ znTP5*v~^JL^KSQ&)W=EffyoI`qcKoJ9-dlesy4Rv2|-;_a1O5B*Otwe(Uvsk!-%i%XjrhAzG@_zexWqAi~v6|={W7GkB~Iov+< zy{7gx$E)vDGWWd;>j&qSI~}gxQ&B)2GC$$jWZ2a#c_EBnNE=nXV_!R()^#S%w6!e! zJ>CC3z~y0h9Bb*3E~^-Y5uTQE%4@cK?&)>D4KdvxwCK@uVs#OTmsxLMZoh>+tNJ`w zVsmOvjF*45TTTZs3l7lLRV;bt8=BW4bUlha94s4`bTmHpn>$l~Pf!O^pRJXgplW{8 zg}P*C80Ax0*NMgddVAMRcHiu~yv?jC#ll_h@c=#EzU;Yk7=`b%^$>z>Woq@l5FUQK zuXTgI9(%{kS*wxFmALP26gr12BQ++ZVsi{O*7p+YvGjyPh+NfhVk$zDpImwNR|2V! z?Hl^MrX{X1`0B8vCx8 z?f-h_HcsQymi5-j>eh-YQ9WpDF%TT5l6q$Cr_)`JWa)HaxOHE+%3*^4R$@8m7|nGS z?&fT~t~`ti)?7BEo|uZ0zW`c4-A=P-gXRpefyECg2x<);h zfvw5M1+pakjjnJmz*xA!`A?^V*z+kyz>czC6PTmrWOx81FPysDOSES>^Hb+D1kMYX z^b6Rc7+&= zL8){$@b<;8bnc!#?C+*4*6=qdS}C3FJp9Z~rocKMyl~NzGD4{oh z=U+o!@$Qk|YjLGko@l;yE-{>Ww3_u?nk!>PHrYQ5@@hpt=mA+ORnxjlid4X$Ngzhh zTy~#^vDffho`!2F44Cc<$y1Mhb#j&Z8gVE_T=BnJZTCSeCx5dY6tq_q@JhVA^^SgI!P+jddH(!)h1~JkU z5qMjqv`Wk?6fN+v8P{KD$eaDW>!vCjGqy}D`#&e4X zBV9uno&;tyIQWa)`REVTom2`O{w}D_xjRH^I z{eJInLk_QTMw_^v>*_qa49&gVrTS$*T`!i1OW&vUb>Q&mHS*iepSgEYF(f^Zxe>C; z_-r(rTiCZ{uC*41D70fh(FAEkap~UohxB5RrPC{))v-lsKXY73`UNHV>ZOh^Aqw`T z6!KB4%{&icCN(%in<hJ5pz^>DJxMKP#bK21oi_O57}iy%hkDu1oz$M#V}c70#6K zc10HzyWj4#k2~9oon0LPL9BI*!*BT{;{|88XEcAgG{Q$021(eU*NdZuoyYLg08c(Q z_$Z5wM)Pr_Ps3>LwS{Zs=A(By{Wvkxy8Wc^i6$>KtE4xt<}FUp%l;wzBJt zR5L_WdE18QLM^d@2n1^fURh1;Rk6kk&hb;~pan8@_>+OtQ$hBypD zyqm1L<-{x&+t3+~8xq~dlD0j6t2dAK6Fk$!E`{@6to3tEi(I93kHL$%csfxLO-Y@^ zHoM1-9IRkM-m_-27cmB!Unwm^*YL|uo!o$`NtjU%ZgYDvqnWeq>Hh!*Eg~HZk4(Bk3H+^pK9P7D_j;Z)Z6zceEV!``F5qXRfSi_9oe}K z1@B}+tD~-t6`v9nA?Pt|R?OL$bXH#+dK;`Y_eT!zNA0cGU)M#=3%pjY3fNB`7EYen z>|uv&bu}wwm2j5bJH=?eof*G1-i@#+P*n?xiQ{#|65q->)nHPQBK0BQt>%E$mUW*gvedPpIOkqV)N`G-mSQq5`&4muY=R z@aEJ0G8g`oHbk1hLrQD8<2!A7drG*tw#3y)x~tO?w}EVA5UGhnYgOD92*T81Xnj7aquuHB}o7VEV5+ z_zk#BWpfXR_)B9+9dvw#&-rrwRAlzPnsFNY6(0j{8y7#$?1f@|B;UUHhr0Ovo+d}B ztl`W3h$ik+zL$J-D{6&j+(~+d7w3_TXF{M>PG{44Q&Qz?o4?OBPovR$Kz&{T#V}J* zKeqSs$;F0HU|oW#``h*}`5&$1&)o5n&BmK&Tgz#wH1EtZ^|JPBW4RKz1(5zPOU4w4 zcUCjksrgvV6poig(?`3%oPEBTYw)FD*A?uy6^?EzM1MBFadcAyXxI)f10tueNG%BF z;Z3AujnyJT$^h%M}i{;!p0u{5#q#6nLnKI}b*O7VH(C zOCc=@s)2oxsdmgR1@f07Mb5+UMa(P*K~2kp1JF=UX_hSK+;0187PkX5=j$eC2f#H0 zbsd~?(I|?g&ixPusXWgw3dmZi!|#T=V{f;}wySeH%qk_%aq&;D5G29OZ>0eg@!f=j zWrG5ZjP*gfy>D$KrTJVsI9{B;>aRgdjpx}KJRMKpYrRj;n^mclwYjUy5=vP4l&`ut zPv15gIyR1;SyMe*jUeT<-@#k)b?0wSz?{0E>r9j2Bd~Ty6n{z&)LW+?})=T2C{Ul zo71zp9yo>?|xqtjw$+j*SJR#rh%3{^1Ys5e7R4sErK>^8R6s17sKE z1w;WDL6I`Ee~17WKfE!2=m7vA6D+K3Ahi$kOdmd(|H*)f4FKw4VgWh+Ck1v;3?TMH zh@BNA#PN{?69r5Fq2f_eTO808kW6OaM^40MHnIRGxzcl-|Ee zprHY2fl&1S<*iMmP4s`mBGdoD;s^d1|M3Ab45Ao6eEsA1zmPuKK&gBL{*m2BF8^Pg z{v-2Ikq`a<4iM7+!}tffKXCmYqCc?wuYCXqNFIdU|4{pZ*AJxrhtYpDK(PG4<9|T> zp9fIsK-9mVA3j02{Ac7rfc*Cxlp_dxAE5h(2chT#Um%MtABg%7N}xVaVL^!bA8J0} z@`00ocxF%yK*;zH5}-a17XEML1%f>P@2AcJD%}5{8&fAUP`XS^Z2w~bpahxNSb2F7;r{!Fa!Wt)hSftCUUE^s zPfx-76&`?)j3n3;v=V><3FLqUkMrO7EYCD3g_MFvj)o);OX+7X-v)^svq-!0_dLkcp4=zKv&TG39Zuy9_1-SgDDLy`By3Vz{#~ zl&{DXmn6pkj6C!gjhI5@1c0#=UBDs5siN@(1EK)lU@v9qyjo-7g~lutrkbB9hyJOL zI>zgDNtkF%+qeRC!u52{k-#`f(@2tU_8~>hea)s@{el}q z@&Zup^9NcBU!E`YVDDVJb@M2NzTn2SX+Q?jd`zrt}pr+QI;*p+@umUv_FrnI_YP8PKuC%3ZPixKkXvkkkgQ9K@L=yoKy2!6iJCig~ z+LalmA>Zftzx_p9GDpVa;vb)#A!yHSRN|67EzzB#sQRKvD&Egs{S6f;PMn6iQ&4?8 z^F>`zQWXt!Ct=SmZV$j`n%@yMKpQOP1O2QurG7W+wzyYs+;?As40FNF$i;_TiFMx z%h$@97m_TW>kmiVJ@7S7vJef+Jb@LF?(TUOlla@F>F$hA49d~X`NcVZsXos5z9aan zX!^mY@;Hgms53<%w9LyZV5i`ZZQ5&l7XQ?bSA1RjUcRsJnarJXHgC%9T6b(m70=2% zukVR({>MYSp3RNrw=ao%g(Bl^*EmPV7ONFU&HI_a{gcInsIZh>;{r0^*b1}v(hRT= zU6!7jc3^zCem|Ype9OGZM0NltDx@|TT+_ck=I?FFNciY-aBEg~%&(=KH_!KtL#m4-+Hs8dUBqf=24#V8C5nxB*axCUil^(|yrv_IGkIw0PV2`hxH%&WzszILdAoNkRz3C)K3q>9@SAS6C)1$xMqIhfB&|5KLXQPMx3JPZfyGx^ zSzmd>@`ra>WuAsOPWVBoZs>zR`_?Z}CFS`Upi2u^`TqrqKy|+b1w3vu9X@*jgxwh! zYhhPv3hbR#7(@dLL#e};Vr}t7S@XC73KTX64do8CNuZf8D&I${Y67i%DTM{hFQu@v zs(?Gl`m&Zfwvr702VLV7%ccx6j#M%~e1q~aDI*Fy9j;nme3iw8^;xSpED1h)CBj!} zb+{|pELqLI(I4P(3F;HoRv6m}*20*LT%TxA9Fj$@WV3|=r3g1`-b7TGF6B)1Sia_%E#_4!VAz^l$TAAcu%610F{t*It#(2>)LM;Npyq3OC{fD{qKuN79*LKk`GZEN53=LaN==W3 z*Vu)%24x0b*1@eu1*{FIF|(RSh{Qg9u}Yr02n)GANNYFPjrL$EL_$beQ8!8+fvu5} zO~EvTTDs6@nOfXKJGzW^%T~3gvVdg=ELmAcUl}}SX8k(C^Z39kei}i=Y{#Zxg8tPz zEb>^wx{R&Is=^6VsJPgQys)yqv#OH4PskvwUVKp>wVqCVlJ#_WtPA!9S>1U)lQoZ( z=dkh|Uan*1dh7){h+?wYYq+z@idQrCkb_t#-+eOcQt9#gXF3vgS@u^ZV0X;{W;=XF z8;(0|;u)};%ut2m$-a&nC-Z|;I9OYK;*^?7>{QVqOs4pZ=)oB10TxAS8@2<{s0M4z zX;meP@iOVC^i|r}Lmds3>JB#hNU`-)EPmdnb_9D+N@Zt+HC=rjVfP0mu3%(@Z=~G8 zODxFZVU;{c`e1loV?|X>mBQCZId(TcK7uT~#EnxxGs3MR$P%a|Y*=!lDI~~eOh$+J zV%n6<-a^_$ePtzgUd^ot7~t7vp9vm~=oiXB14IW^Q2}slX%8v0F2u`} zIj#1jN3|~`af%b?9dBHprp4KzE zq%&!%+^z_?4~xJ%ut0%>f07r{#bQv z({$7*j%qA+bXZi?S=7lswAMHSVGBIT*JL}cOY94Z#e!y&g%7K=)RB;2 z!EOXAwfN*}b+V(Ngpa3Ww))}4*~ykT!Dp9Bi%*ZEs@CbYCg4o*u{RKpKJ!r%*fzw{ z*=g7TK1XRt+p+KyJ~b;ZpDlD_*04c)okO&2DMjsR_5@P2orTbF>$`lC&f%aB^x{~o9z(qDVbGIL3r(0 zRTkD-@kuisf$34(Phm|hV#0Vcl&EJ&V1IQb(v>Cs&!#!P!Bfl5wji)cgybsH?IcNB zPbx@`v;imEHub))-@OI3l%%k`bPH^6CKb|J@EWvHBzdzjmY#vAN5FZKgp%8n8_&|s~Li|~X_l0;*h(8JOM11*3oUDWN$t>}?1?#}hc zZ;Q40U6={LJU23Jd@y&8>e3O(L-;h*aQI}L(;ZAZ@)Ff3`IMb^tQmS&Q zq`B$mR>aSKa<(sBZq|%C#q!|@0Gib^Tx~`bE z6w__Rw7Hnti)m3(VMRewennnVZiT%^YU(zI6>XI~(!B2Uc;x4)dH*)4c9Su<=#@vM zHn?n;&fk=lRfOvEyN4tc^+aPTz7m!08)KUvv73(|%6-IDN_K3r?SN`i#@3oc_Y;6HXs< z`iRqq-Ek8@dpUi;X%DAAb9$fCdz}8n>5rV=<@64xw>iDV=}k^=aC*Ia@M)meIK9g0 z6;3a6+RbSfr|ABk8^sA)1#ao z;k1=gPj_rC=wVJbhXr&~GQ!s%vCYdNjqbQ7l=Io-hNdQR7Ix|Y*5oUZ1yn$uOB zuH>|e)9*N4!Kt%5W-{n!htoVx=W%M_RL`l7Q!OVqCl{w0PSu>8 zoT@m@<#aBmb2!c6bT+5ioGLjvx(8K&&f+wSQw67)oXRy|# zPG@kM!l{_kWKKn#3ON;U%IB2FDVLL-Qx2!oIZfg;k<)3MPUV!%X+n2&H7JYIcutv| z#&OE%jxGVEb4ufdshm0RQj^$+Qj!XoN;WWBCg1wVRaT>{K1gAt!!#PNGteWgwAY3t#IFr*j$Z@Z1l$PR09+4T z2V4tW16&QP2Cf3GG`)nuk^YLDJuHq~b+Ion59`V!08-CjD|r(5YG6|>KFbWwU`rTWtW zUpV`LkD6b;Y84rpH`O<^+|ezsUq3Xja;mR`X}ew3ex?abDs2vXSW8q#)GJXRL}@xA z>3TEm@o(E?9-CSOwhuGM4=ZX3qfds>t3ua>ZVZ(-hCUf8JsJ9P=$=s79(sCSk*&?! z-qvojwY9ZbDygll-P_jYCARZ49ykOX1pW$q2Yd@00KNgf2EGFJ z1N(q4fiHm1fzNOPP2HpbR1l|B% z2VMhS1zrJO26h9xfR}(5ffs=1f#-l{foFiHft|n(;3;4`unl+;cmjAFcno+Hcm&uA z^Z*Y7TY!gv&A=w08|VUjz((K!;C|pf;9lS!;BMe9;7;HVz#YH_;C5g=unxEtxCOWw zSPQHHZUSxuZUC+at^=+Gt^uwFRs&Z7R|2bmPT+E2C9nd(_ri2Ja4CTAgy~XX32-rR z5pZEHyDtV70Skcz0KV&_UZ4$V1)6~-pb_u@^MLb!2B03O18M;`-~wuZYQPCp0ds+K zfpdU4z}Y}0-~i47W&stzOrQ)X1xkP!z?r~w;0$02Pz2-wxquzW0VV;b0olL=APX1| zWCG)W3?Lmy15$w$Aep#+o1OcwN|nF01OMBkzPH+Kv4jx4N6zYR;LWUni}F{g#>ktKKF4BmU9aZ+k!KouiBK z>$GqCqvU?P8G-1&%ym&_-N$OU9Exrj881>lRx zGI9yIjI2by4JEvG30aEL736Yq1^FFWMXn@Qp=252xdN6~1WGOkU!~fzK9-{w)}pgt zo2$vSn30>v8nTw$OjaX*4N9&f*Q4~tzVchht*Yhm<+uDz``gGm%+Zk$VXGE{y)3KtvuujSo@b zwhyV+clWE?__Nv`@(6j9JVqYJPU<0Bq1auNJwbK`@;tYl>_ExWo#NUQ` zj=V@-C9jd!)%?5UL-G|lO!kmZF%R!!PCg-jA)k>i$v%{SO}-%q$RW(pQOwm*>|8ca zNtmg@*u#mKt?YlzPX3Ri8-89SRnwQGACC3v``%*M|JTZsB#g@c4OZVAa%FE7ii35j zI+OOW_G@C&W|APSMcVB@L|@l^OpLlHO|<_IIqUEL^|!wFRV~)hE$j`2w-fvQTL*5T zsFjJH#FI3##<$$&*h;K)O?OgQ2>YjXv)K@*e*(v$L?S8s^(0C&>}HJ=a+BFSYVevk z-Hmc?n1b)PH;>ZXK=Vn?u|FT%jobc+?3Dd9<^8>H?KK~GCOkW3@6&JWO-r@dqs<{V zdC)d)@ET8?th>>J_M=z_cB5yMooa6IpqJPjTikBj?i|~08~U-Ord87Lgm4v6p^{Z^ z9hs4yIWAeUS~D}!PmyT)plDV;ELqANce z8}z|Cohc$bBse!H$V4eIv9SeQ$xxk%$)3PcMtB6xo`m7W?o7{S!;Cct1X~qx+F$N3 z_GcYt@Uh8y*(#5eWou+cWK33Mh9>jDIMbxx+~bk{`uHoiSD7B?BQgXAw(N{*j#2R z4Jwa6s4XEm`*U(~u&}eK`K`Tg?A)8pJdcExIrgxJ+iBwudSHq53sbD!Yv zC?-53+&WUHW2-bPYEVY{_^j{|Bdo)8jYUfzYEC^TY2z)rDfiaQy5y{}-SevRt}V6I zXI#0C?(KN?^2xy=^v=VJpE$3!_>!K*TQ<%2($}J*d)Ul9kC`*by(E&%QpX@gJc{ow zB8h=^+0DktC@I`vAd#iPN8r^A0{3$eLUjah>^1WdgavH0!ACr*RV-ZRX3YDHnDOJ` zzTP?>&W1|*aO?Byw3QDyZn<6Olgmr2lc&y1^~w9T&Z~Oi*-IA;&YCsp*kb0{xrE4v z;hBZZXP#}2io!|T1GhqBV}s&ihQvm_PIZGyjXl(4HwK0HbtQ4HFc+R-v9CU5MYKD+0&KF{jHc?$ILx zjweJXWMU37@%kJ-b(U*VYR#zBrlidBX=jY3wqs|${kFVsgDpQfHpp;qSmdCz>En-m zw`!I2W@jMI@;;1aFsWdp*%V@tVlkL4gX1Lq;L_k88oVVW!mlr(EXaHSY?Wl2pMmEO zF`E!(WDoL$=p+0d)sV$mb(e>kd3%Z!6P+yo6z8$CUEawtr%jv?r4;36neCpr(0I*h8N95>xPx9#fd{kOw`*sv~2Fd;b|*INu0V9Tr>qSseD4Q=jr|aucXb`^D>Dcb{3(>5VbwX z8tLKgNJ-N}M{Ei$Bjs!kchi);>XEfcvWK#o%^tOi&05@UHXOWtBH?xaqeoh6S$oQP zmQ-tWv^CW-Bz1Un^zc;e@*{^ex+CXD4^Oj*Dofh%Xf|VaNSX2ucoZJkCBz&q2%?Hxc-(V{ar_%AQ z?7__QxDy((QRovRt&!T1iHTWSIYUm2(aPHkdOwZ1YEbXq9GcO)tB_h^uNq7(MZGV{ z#S0$ky_e2mTJ|rclH4P$u!kFh?8cDdpdhVLr_*RD)%Hls`RSXH!cG%g2HrC% zst(UEv*R|LA~5E_xhn;!-Gq1yCv*|pua6A=y0MN-(6ggRmgVNQjNX?sn!dY!>;)HS zqOZ9|y192co!fgmW}Jp#1P+WKf-F`2H-#92gH7y=r9nM3`5{xd91%f!Xo@|YQj=^j zgz0p_rtt9KU~1ydhT&lUHcB#(X zbsecjTeP98Jw|(<9bP4IF9i+&`qMd!&$W0bhFht*S(CHq^rlwrVj4QsHYB~SV6oQH zm@{F1`^bz_LM9dWvj2W?4Zo=77p{x{4_$kI`I3?cu3vp!AGki_m%Q-baBcs0T$H7BOxNrVXi5xP@z)3?qSsCgXqiN6 z^8;Gb&qhmvwX*_R*RNbpCiZCqiPpRs&_-g`iszb$P1_pK!6aIHYe0w4>Dumq4khDt ze)d-#HO4?Zx#scYn#YrC9#5_{{cLzVxz_b7=kes4$CGOwPp)}9x#scYn#YrC9?yG- zLefbpNh9N-rjrIzLt05QX#;AB7iIZSt)xZ0J5kaA)kKm}lS@3f6qGlRI#Q3iHZ|u4 zyTS8ucfm4WZNYl$?m9VY>HzM!E zlNwYHl1Ul34pttu=b!g;YG8EzeVe6v-WC|8f~X{uY}}`zpBgeswHb|am+ISOwZCT7 zS1+y>^qGQdfqEswqZ6N*9Pl&+dXXB{IyC|<=wT5`YfxURKAT1J_+CYME$pk+sL$)` zuTqVNqDFBMxLutC9z`B)(OL=2L5n(4<{^6qn<)joF6tNgLQ;Wo=LXug_N!@8$K+CD z;#KF9#cF~2tQy=;{@jMZ+&96-gSM`~Q@yZI)ZdFj2=2z3t`u@kG^i%IHwWqqiJ>XS6uj$(}Cm%h3`p^A3 ztzQh-NcdR1>hr{IWc~B8x=^w}9eK05pH4nf9%<)sQFW^`+Z?#_QE}a_uDy1(CG&Q^ zIvRIhKg`mj?!mt`NB^pw@zbtIQN3ZifbDJ8(y6YTh4(1wscGYt=?yil&27!KUM0V| zwWYb$>1}9kN>*|`9;LORuHM_Gw7T2et@GWk2fzZTjwdw zwTidi-LI#**5>vWR$SBE*y3zzaJMCwx$D|J&elGgQx(zMEO%=g9;u8^&K^Ily=IiN zZM5QYE0bHBo4xg3Z_BAEDGL@XNNyA@lhJ94cTr1oU8}RDeo;z|x3;;-+ZHfoy4G3U z+AxpVRW`RPjm||%dz%|B!fjTqU<57h*2V^}*X>fO7pcw{R!qx9ZL6ATX>E43*LanN zCS^f=Lrs0Zw%~@Q8c(|mO})*EtD&vMg9kX9Txit*gBn=5o4m=2A|Bq{?BXpZaPN)+cpIHFq^M)v+1ywl>tTnd$de~wE<6cH7{uL zG&}o06iz$2tL|0><3*5}X!o|XdlC2f4K;3NQ}6b)oG_w)WHQvLNO8Gqo$VfPva_va zq4=ysdJmD+r0*{w_e&HOLAWA`-tQ-2YKi)v7BUQd5FEt~hu)dGZ0H7^am;1Kmwg{X z_0sxfL$5{ART8CXrc|R&YYUa7I4wza>Vj-Ks-erWB&u0oo?4nZwqMDR4X1PrArsYW z23|+}Ky$0x@N{;iCiLq}6J?%UI3g)>$B+XJ?pHz$af{c!oUw2HvRGH@GR=l?o18o^OkDi*8|jbpLDaPC8Wl)dO0%or}~)1WoaW*6M6QO zru>E$w(bhb3zfq1Gf&M+%gahC8lP2^R8*Lso|c$uLqm;b(^izfePtuNY+9})qhrMrst0{Z`-g}>YPIXFBNkfq&N#%4Exg>S& z_j5`T=_0ozO>Qwwr3nd*OL7^?Et4d#Dp|4E=h_cA=i@sv)4YQpWn>y@_FCi z`+Gm1_jNw&yPo~5wV$>2TF-jEYn^>QdF)y@&@3+5r!<}OqUWzUoBww5e;a->sADj2@@kV3t%hy?pcaD~`u3^}cV`V)wY}`B*%1 z#BA2_N%v|U(q{GhebLTsck_nZc!p(SVW09cBYzRJ1?lTtC(X{EF5B>8_GO!y&f9lC ze|;v$Az>?{*i|rpL@0~Y&$Y6|Gevwo*Xq*VE`%Nh&X~w zUZzj-1WiN;jgI&#B1FxZMQMj?0_nHIyQYzvMKp=vl@3saCbF{1iAnAGF2J54gQzgmzC%1iLd*fK-s46}Srep~Z_dn3d8 z9d>J88G1TXHZ^m_!7V}aW5&4-F7LnUmG17Pj`jM(*3NW)pQwMk$P({pH9WbzZ^+F- zA8N9d#q00c@6R%5c*eVCa!Pl@s1V@-b*1zWr@?fh?eX-rPA*aA&x8F6o9?O3N8|?X zCnn6#2#%;PltwL!SEpvzyEn#OEqUQLp(jIsRoSR0^R2(C%|n+uzN$CLvixI|_5o73 zp**;@UFf*ayp?ruUZ|)HYG+!+|j-yrm)ecdqoAW3d0{n((m$0xAbI4LD|Qe>WoOM@}M33&Pk7I*hS{ zO=h~F=w@tODCOjcE}c2l<$o`H{P@``8~b^! z&&vqpSl_|sJXw0GX_Z#bclu;T%c}ALSDdX@{Pe1IhM8Ua(uS4hjSbS>S!V~SF0c9M z^P5h^)I$~dUgWM1`yw{X_}%6%SCyY!@z{pzFeEow=Bw1JW7xILPD|rNu6R9(^V7$; zs?A594BQ;|{)W+O?GviG%A-DMJNcN97v5{g(D416>u$0Vgu6Z@W%uG4>m=?>dhWZp z1K(;c*Zu)B;BcRxxIc*dOR2qj=b`?K$2%@6-}b;``G%}u;+T1l!|m_372ww_Me5HV znCH(~=ynF0127`loSxLeGs5xDB$LB{tXt&y)4)P;gn3Vr`pfmn90Qh)hNyev=x^C` z5;Ew>l#s?rSQBropPM+%({IS;$AgX9Z6C5#n1g z^>5WOa+uDM(;-p7;cAFIPO2b{Lb3+Oz z5?7=aM)EE9Vm)go{&L8FtJy9UqhaT^=YNLuxr9Z8m!;aomm^E_y4EqZT{l<{M zaV8n7EymSq^>zAC?kHU7IA&P1Zdqg0n9o13jUJq12d{1Uy|B+S>FVzZA z?{vMER;r>0HghijvUqynWV{dCSpUWyPlo#+u^-;>;fLgMh2N8(qBch4jucnE zU2-PJG&W@Di(T7>y08}Y4Y@R?|J(tIExNzj{qoy{Bac5ceRu59p8YYdh0+W22N_x~ z)E%u%ogcuRU|f9UNWNr7*-o#|@k<8A@9515f8u30wQt!D%YhXWo(_BZODn(JuCm%G zUTkg4w+aeq^n0=I{+6FEkD7NXVMq+CSM$PwXSOAt9il#dc=ni8Sqp=YMrIl9JCnm} zF`V})#VMko-gn#4hVha%CjT-18+ZB}&+cBM z3!D)59|_4e9BX=E4(MJEr$W`N8Ijb92!b+oe0A*H*gCm80Zq8LX*RZ9mz;#<|0g!K zn(BiV%6xBK-B3zF>jml%7zi%n`D-OF**i&f`@_T%^Rr`$8#@!~+Z zjmxd?Y<-uCB`L2;UKZR`_FjGbh<4#>x$eF1+h*%z-km{~zI;%;z|_L?^kTa$e)pQ+ zINTfS^&wyYCl5DH$xl3{$gG_^z2luJdktrboWr_aA$ibXV;PT zE+fL-?1P;{rrW!Px`(=qaCHoI332N3V;l z2@dwv>PDQfvQ{r_g899`yi|`Ltd9NUS#>Hrck)En+pSMVmH2Eiko<8pt?tS~MS@Pt zkqwg8H->em_(P0y=Gp%~XL;qt>?Q3t2lwM%UP!ET%9E=GmeRyw2i=!6V# z)~)(fvFVDe=#KNrw0EhQ`VYf5?cef&+{jySt7x~=bMbA5d~B?SS-i>RcyQj4gJzy#^_OzkV~DMb51X^HrDL*&_HpZXef8VC z=XjAznu>37l?>s&WXC-wAKE4vZ@9Gk#*<->2LG7fQy@8+Gf}_!?UjVO+6?kSk6Ode zW$U)~o=mpy;_vwIQsjE+RzlsJwVpkev$0EY%TDlf<~IDAQ#QgoER)mk6bB2OktH9d zopsN>wTJ3%VzhkI^SjTC7A@EsX^02LXMAsD@w?#8tQk&Uhb-&FFckKKVnkD%h| z4-00x?0PUi``WC6;OstkjTtwaBn zP1+Y$aD<}-(17bm`XJ$kE*&}4<$s;E5{bX(1A^iM0{8&d+<}&l(FSG?2dg7<5)C9S zMX!$g58Ub1&vwq!0C`R!%e^ltDEj`qUjKU|=3F$a)@^Y=w}q8oHu4s3?C5oKRgWcM zE8;dzvx+#hL$Lkn%&9jXY*Q7m?am*%{qwNASe-+^r%$>(tuOOw_`)YnN^8U64F|O4 zza8;DF{QfHfwADg%-2`uz8)2rWzxzms&Nm?jSO>Lye~7fr~OUO4euY_*XnUAaLHc5 z(1sqTGmRFV-Z_H~9$}0*TySW;d)$qm;_co@{eB$iI{5s^$guBKMcaEG z{AN(nmDVCg@|t%+EfvZ$sT)@mmkf*<9As*9ynKj_`{0a`Lbu(qj|HSut9!$4(x8YtW;pDLob3MMvIa4~uh&#GpS)dLKE} zuhLi4Ffyy>(?Nn_(?aiMK6Pi$N5|GgH&{J5!`*V>#W}Nye=J|qBoT1353GIAGKnr&pWFZ;V0@%1Soi-QV=C;dKgd*GQxHa0Jx&pmIm&TgIO z2-(?(KX|Vy)e&E)+&f`-%%*pd?-yJ6+Zj!nwrT5FnRC*e{FQy~?~t~p=NEG`BQkE( zRIN|Y(P4_f8I37~0?`bJfjDajNi3;wO|fMDIis)fbc#Y90I=O8C1M0QiV zk)4tw=SaJeo!7s1^!W$&V>k&rsm@!F37a_y8#xI-b`6pP1Gb5rah=T(xQX*Wk zA7?sj%CsG4JI7xTIzVSuWI#rG-SXCwb+5U@?zekXoN|2;IXLg}p&`#J?!DGOw8ds~ zhD3ji?knw8RRhjDnbx*ku)j8W$5FvZ_C@x&iwE-_7u^1%_ezD2zq`}?A$`9;+&bj_ zeY;T>voZ=Mt(qA*FZ)Eyc~2&5A8uExK>;0Wi_QJ=Ww$pc? zpP~#&K5riCwmJF1o!9SQnPhDp^6={3%@xg4LdQRvthIf`Sk@xeb=Khp1B_1x2On#> zd$AvR`oZ{L^=+E(Paod2`E6EUdNo#+#XU9Y_2#`gBEG?v_yHBz&zQK|H)4(I}<wxzcsi=X5Xa%^CFNM$!>N+XA;`{pYQo>N&C%_VB&N zlt(qcn#&36c8i{)A4i>O8*lTcKYLLB{fp5o>v*fbT-oX`Z?V4OW-<}SlCV&zPwTyr zWwNkxeo^?EY%^=NQ@}up_EXlorS~uVVQjBeeYeT+#&U5pqtP!er;ng#gn^4!$)MdI za*S-B53+rwELiBQQ$Bbod(OqB4bMteE)#v04eWVo;%{O4xpQyX^%;>DP9!cpZ&WVR zTVAqk)NXHw6a131^WnJm)TFv@=q;Tg;k}Ptud(*ru_fI)F+p$n$w^n|&t4ZB^gcmP zo*vE28Z~Q^yTPZ`l{vpvU8}fQvfXA^Xlm`dbD0TS&#;0w7GO6TcR0qGHuzO!KVn_V zde)15B{%Ko)HhE0`NsO3Mgt4h(ECpUS@P=W$e_xlquv-U)@evw>p$ws)W=zg zgbi#@tiH?<%Q2DY4XR$qHP`;vzMTE@7jNBsatg<^yG!bRd4Y&S<6R|8$DXijbmPFO z(8}lJUwb5HN%p?EB+Dcy`G@@;WBvZT5L0S4u;@FP5ag9I`;mX&r9!K3$O9I{fKz^wfae*FO6xe9f8P)^IO~R~^6qZEw5VKdiAixt;mx;iWmJ z`fT4j^6U@%Dc6!`9eAm{)Z%3Iko1Ykj~kUxJ~ZUVb&K|ydp{35HagU1>ng#g!icLA zudW!QHDKw@h?da3+!apE0@=_jD>i=qy>dp?n`=5I&G}DVJt{(L_{P7obTn^y^&ktt zLofmx0n%s;$H(AfFa|yr9}Dt0d>qK0xF^Wt@$n#g;a(t5z`a5C!F@pH;#`n<}WN6yyuU?;uw*PGUHti186)FxnVxAh$ExLH@+}1ab$-#u%h7sf*#H9_bJA zBrWK_)_5)Gzm}KQ2arE%!M$2-T5zvcyB6H5^+^lv)#}jd0Qs}lXAIZIwBh)qjcdau zPn*!T26+gZz;HH$t&1_(dTcw8?b#k6k7mQOv-xZh$YR|H7*|p=x&-DA{THB~UjY1- z_!aOHaS8AWaRu-yaTV}4;y1wS#C5=%#7)3k#4YrUMS#VOV!+dk(|{$662NnebAY7` z7!$@t#znv~Mj7B`#$~_?Mg?F!qaN@H;|X9RqYCanR7l0yN9k;4G(NqfNI(dsBLGK|BLO`~55O_x7{GDl zIKc7bc)$td1VA4WJdhNSFv6sW1WzL+B={I9BV~XJQURzURe*k^pJv|a8K>u(o@Hu& zVP>^SZ4!{B%>smffNX6xpq@57wYI)CJhirgHaxYqp|&BQv9>XwsWv>jwl#PN&L-Jl zfz4ur1-KO~u;Cx_lMxyZAvB)B##Z8|F-GW;=m^YsM)dSKm{UY>OeE$BCAdoIWkLO( z;`3)zUrQt5&{_093Vy#_6jn65@*QtKT_ zEUX8nPyH|m^FtVcFcDz}!aRhf2=C0DGj|Twg771SIEhdnp(#R3gm&onrS$*0O+tGt z=)Dfrlm9EU)R^c}WAMKq^}x(9E6fhYc{Jt?rw26_h=rkFZ7rkMJ<&+#eHYVtKR-G* ztEO}F%XB_$JDuC5)46>lox56LBthkQn%~!$qW|R@pAbmr`~r+1s9ylLw{89ZpZ^y> z;MctoPh*IoX>KMotW5DS6EQIhaj+I*Uv0#@I*4)Eh--Bb%jzLc?Ey^l0kc5dISsMq zT*Q|}i1~g&oL7U`?kS35z!$yMy=!4j=rIzpdl<&M52+4PBT6{01@gC=>vme^gVXXM z-17A?f)CM+-xrbvBx}0fzmoIq{!UNuoj`Wiq_L2^yXDk9LQSr@MtzZMY;@*+8e4&y z6oL}Yp9v`v)UYmh26Idy$UW3Q&`9*kcPsY&z9uQcWFyAsClK<(fH;3@$dBLoJTzG&_j zm_Rb?mRmxy`4hdvU)1|Lcle8PU@F0=uAC5f(tQ?iA&DU=A^Ag^s<{r+T+fCS)s+*1 zg>>Hq%QVji<043ewDR9fYrjexHEFA6Eah~fuDd5x)-8QyhY(~!z7J9^?Sp~>%{Xgv z)Zao#ry-SU?38u04f8FyteLmYHY$ksKK|Tk+;G4MZa6oZyOC}geBeNiwMgR-q6U^VDmnb>ku{~Y(e-IVLOF90--iSeQr3X&l=J+MooT#4UI=d{FOnC-i{-`v#v`QUWL_#f9Xum%Juic| z13hC3x+?|Q+=DRZ@8lfRRz7O$IAAgF9J(tV+KS6V|11=g&rch(el$(8@A?B7Y@+Eq^0_EA%82 z^cnnporq?W>Q64JUw|-|o?m_;$fq^446XA^QR!ub6}I zl!w4L81tKv=I^n!v*uUw$1W@uVBBy4OP~j6C@=-=FR3vT+=XyI!aR+Jst+qjP$Vcp zc=0Q_TtKazg@WoX&3!=~{VcpdKv+S77QtJ1mUbZl*g~ODTc{5*JR@(tz))yR-7D;i z`frP53xw8R$>l;jsx6^Yr$*>597~T6v{fYZhH*d$dZ9qdcM!@!_7eu8`XLBsB8)^B zL$!$Ha$zj89FO$L^w{zOg{i{zs7?mh*&*D6?B{4kyBbPitqG3{i&33(2+Lr^C=3@~ z69fn=q5d6Y^9xd=T`jB;*3&Xy3Hl~f`U>HDgdJ3`L?qQK5&Mf?1glSEA~NHz6tF~= zNMi#n7Kt2??1F5jAk8SG@kGd_XPjRs5{r~z!CwTcNfaiUt?_|sLAodk)m-?cx83A& z(K6JN#4gQ!(aO&LP)|XPzi2Jee8FA1(HxpAl*Jppk+mjanUIwKzaL4c!~w z`HYm$P*G7FLB&;ZG(uQ|;y7^%@U-!C@zQk zH&A_8U*h}XI&mY&z(?Y@;&!Tzgg~e*(U%xYUC1a87O(BYM zfD)lZE&(Qz1WH1vQIgE;Nt$h8l-#9kDsoD}i=-w}GfAq{ zQfebf<;74nr4CXT=_v7Asi&0N$r@5|7n9O_M>8GWF3nAJ>)ujj7Z-R-yZQq%#Uo8p z#3@qne~Ga)3OJ%kx{w|L=`voiG?Df_;D}=BN-FY8!JnlYrCX($(tXlgX#ve;pedAQ z3W}tsrKObLN-snG3VN4<{4)Yss6tGXBC4X0Ur3?!F2Y)>l}?13;OEk2X)BC;vGgOd zvmdLi5%so3 z7C<2_Lo0SNdN21D-4#yntdLBt8e>^Bvbh+^arC@UHhKE|LP3!%iCVKVYA@6z0~V8| z%eITtY4#C`Wx!&x{jxmSQCX3!M9KvX%~{YlS-A|@3iQ?7YT12R9XAe28xhA*c3MPH zvbUgVr`EfikZaT2C)bw)L&^KfE#%g6JIQgm6YcYzY^Y(Arfw0QLq$uuJCZ5BE|QNG zNafygAI5hp6fTNU3I`;6YOqMbrFcgnruR#tDv*_(aUSt3X>2cr}Dhd>Z6fY}IBP@lQ_Z6^%Ra7XdD4$o{MOcgQ zN!LD;r2tk{v?@L-F+q`%C02{XNT9vT6mEp<=*;{3_axvwBO4xsj#8itEQmagvMCqyhl`=)RUYQ1U z(kcH`Ztsp_wQ`qozcLR>k0Luq(Q1v5XHetTiO4?mjoRxfiJw55WtQ@taiS|eT9S>l zG^HnM=_Cy5*GkvQ$CVH2{Z>uy)g~)E#DVI#oiBJClGQSe9+Eurc?v=e)c|$HYc8Yh zYIXT)&29D--?Gl!RwRxpiyFjb%1PeV>`k@A}^8V08`bF!qUlmt%yNXnNGjA%M@sprKUaeHV?328{aC|{0%0xw%G0or< z3>^kikh)ZFY$MhIdf3dkDBZEot5HSb$}X@}ef?SbtxG$v)o9APZRj6aRmJQBi8M~{;m4r%1tD4<2%hCDSIFc>hZ zV8ZvyI%+pTb5fnpcTo&uhcK5poNJF?>=2vD{!!zU3W@ruy^^~qopx#OR zQFElo$&&XQM>I_kiKKJM*!MUOXx_Mnq8X`N_hm;6O<0BsoJsNj63x(GiT1?XSfu^1 zo2iy?=FD@L`v%~%Ead1uxLsj@0JI^3*`s~zta>@k56yBuR|11kg1MtZOgU85Fio~H zIi7&rDvRp-F1f%H_75VqG~AdqwhFe>KZw&-7VNu&apt3cn6hD~$v-gJva>__%r{3h`X2Kwb0J!2_N|rxtY!wuR8mrtPqPLJs|M~RgL6XuumXvJ z33vIZ1^9*WXO*Wk@W&EV3NeHl=dlB8j&MBTekaeKC#u$ApQuRtcWb#R1nRZ>oGbG@ zQKEi;)ix4tB;X7XfeH?S)Ugl1hdnRe>0el0wC#A(e=Uv0~AQ z!9q>eX^A-#^X4F>|8@=V6mIz!(_sJYxEDFi*2m*(aWXxf&Bpe!+p|H2rjEY?U{@nl zmrO2oNhiar@=Iq@QNv!5U_Z!4l|Q5$mB*N|^1F=^z%fS)6vJXb2`z?Ps9#92YOped zqga?cD>{1uvVoNsoEM}QtQWE|PQ_0pv6W|=&SK7NVp5B3OYntPlr@x*j4P=NFY*v1!qt`#eH9^=`B34i67WSONN8Kz zD!DsMU5- zGA7>ZCBx6ah%%=_jgmWp<(%x5EtWeE4xR<4jH zEvxfK2caG>w_J`K7fGhDCOxiX;z=CveuK|mwSWA)x&@9S{O<};EY!VY)6_pR~ zsay$uLmS5ea9<{tcN3CS-L`t*;lliww$fmaG@;HRW{AU*lr1@4h&cm!^CMMLCP1s> zBniV2)xji0C!E{+srz0xcW)7~XVnoR>bm$Ox*+-yA)58vyTe`pjxn1mc#iJ9Yv5L#fR-b~`rx6g;>uq=}}<=xcw+RfuZb?qkc+O;CLf(>^pF`X~< z`Kt>UqJwx#Y|afqsCBoG?Tha1-qpOoLEv+lv+vvc6F2R(FyX1NZYWSfXl}Im{#)v( z3n9P0*(uZc8Fw;pmo3fQWTagYJI`!kN7^HY4_CpqLLeN0N-FELWlo-?x@d`{f=9B# z)}cdnKaHCr?Rk>n)-4L$nftH%+X8jBnP)$R-i*zcMdl-tEm zyWVxNpUvF#U$T1Cy~R$+yp@93;bsz&63HurGMM@k7Mn39DF!4^el@}SqRH91_qB=|p zte^@Xtcx>5gGRq7&c%FM*S@d+xx7?&ZufM&yCNSEP1a4tL%Pos%OR9QsD@?_)EX%; zV@xAsLB)oa4Llf0IDt6VL_MhH--hic8F5Sq!z7SsNB+3wBjGM6bS#AP`-4?lI{5yP+1;;NtQ-RUt}N;Anqf^DooRI2q8B1b~1NDJjmJa+y}_CL$oVj z>V~w6-y+-~-M|Orb%9bKsTjtDM$$OGz6_!+dk*19avR_s=iEY31($Xi^XTwUa7$z4 zs&s9;Tu9e*(-gXTeu-(_ltK%YGxKzw6var#skE z&oFp)v>Oz(8wbzDv=TbaafYxe;wtkM0`H(w!dx;A$$3Kaf6mSub+9qNB(a4&jHZOa z&SK;oYO}`-8&$J`a~^qG@F8=Be6%%8#)Kvs-ur*~yvRZV)2YTw$q7*O)bh&ymx zk*|2?iy~&i*qkz)<{QmM3l5n8-)9H2%E%WWzZE-0KZT#A3g9V&wwgYk&9vKN4=Gue z98CJN5N2n3Nf~BW06JNxNW!C5XeR{!TzE)O_Up83^@IR1W3fWTh)0%8dPa=XA5WMC zy{2Tcm6^5+E%<~y4b_IwCJ}eGSm88$GZIczpCU6vI-P zp7YWi{+O^#u=$#S3cs$%Lqk>T(hly4!UsHjW;Seob$?*~c|Kvjex8wvuZ62c$SueP z*|;{Fy)Z4}LiCrBy;h|4aRmA=fb9^?ftqD48)qKwEZnKmo;WwgCdw=_whA>;W<=W( ztpO1uvgSN>HBCk&G=yciPk2opQz$P#xgH@x_*mFjBo#FJ(9%%zpsN@(HI9Eh_<8dm*4U8lFT1a~Wpo6WOt(&i#>BIS7Q8Tk& z)@W{Gx~9T40W5Je%oW9QNfehBzH||)i`)^crwLL z!sMLsxN+J7d#?EecL+=6uZ_#*@A1p#%X8kK!tRQ@z_DWj%%&2w37%hhT9P};o;VA} z;+*nkl7W&!ON8irlom`A28;CpU2l0InW7}=qLktEa6AspEAc-GW<#%_}YFQwt7l@-y>ha|U+Kaz=eb-J@MB(AGUoe7Ka)r_|K zvpLQ8cKrDNbtC>?&9|d?z;5DqjL>`cx6ng?O2uXT2fLO%mLlANZ25YgNksIuJRZ9n z(FjNFJg=UNBg&oLCay{th)2fU57%wW@z^cCo zw;a}TFu6BM_IrGcdeDx@wG;Binv9QgCmYKA`bcXD`4hBsR})aQ{jyX|JWee7 z>un3;6Iaohb>tj)5$4$nGBFootLlB(Px`m5WwF$k{Jy6eW!Q_|lB?x-WS)utcwLU_ zR{bM1z9H{i$XED0252BIWEFy@Au9J89*^>;%RG$_T!(rzIU4gGNj>*pqVo@c%f@i^ zd+q)6SEQHYe60dmmN;Dy$yw#Ck_u-*uJ^`;T*djmx|KOPZsC%d0gsE-5cpnZMawHg}y!V1vbdHM_XQ0 zLP)nq?<=x46;bW0x((6nmyJ)YXI3M%horR(_OXv{Ar91x%QT(QXT_@SIlr~qS4N6+ zZ|dKH4;>756yqAxcVyxQb>ANEGrq{YWCM@C-@c@?$#=LG*J3@EWjEz9w12^6XE~aj z)38f!VycDzgw7j8&Zk^tEVimpTB#VvHB~uTQ22?JHFqhe<16#AeylA|lhMI%TWe(* z-#Ru-94(a6nLVhnf3kc%jGNL)=I(pJIO{5QVPI=_5kT84@FaY}Sd3J*s=T36lbj^9*hQt4|=~lPQ(%b(?|!%nheb==@!T38WW_p3HAG z?Umy2-WS$hAY$PW#Z|6OGKnvGK^OVsYriD!l*LI0bWU=@GDs2ac{Oiq3m+OuhR6}# z4-DpYEN}|v9qq5$KCM&$25VQ)YQHRw1nHz-$acrR5XhUtJbF?WS{~H?G6*rmx4_-& z3u@t?hX)$A=coNKa6>Ros9w-o zkNQ-)zTe`?f8fTg`bhEF+=1`I?RN>^C<@9Hx33n)L#4cW;^L}vX?bxGB_kELi)ga3 z1b9}FadY8+fcyp<_vibmSkIT2Z1YQ92vlD_vc4dQV%b{Nfo zVUDbb?|;njBU(T$K=sdk;T#;GPKBNh$sp*-ne48t5R+};t;H<(RV#4!!o2s!_s_6s zn!o&sBJP3Bs%X8=*A6>9+O*dP`IuO8f|FhzTX=S@-@kvS-VDHtnH><9>;ZD~T2tvx z<01*lB%a+aFSK_m$Yv9p%|8Y>+SbP2X!DMIRX1$Nq!aNI_*T{ z=55#x!y>=={Vie7+c%iMJs(|J;|x&w2wP5moZsb?FNGR=YG+|n>><;8AbS?pt0nX^ zom1C+2GbO{?!l*ywPH>d^XVZi1tDSl<b=c-+3Q5=AqGMQLpa!3ClGW|rU5&dlR%dQRDZW$2Uz zA^i=)19~qy;Ca1?1^n>G@w^~i*&*xXK!p&^>%k0cl3o$xp7o9dIB_{w(>W-8#|-)} z`s;6e349mI>>T^Ww9cWk8ZceQMRQO-HH~~v&R#77b44lk{#v+t1M=grm@ImqeRo{P z6#vq_nG!*A-%tNs?Xk+g{yBPTbhXjQZctHZge|l0E{|4jVo+{U4u$rakg51lmNmf6 zD2>I3Xa4)Ja(_UAjC;>?_S1H<1SY6Ca;^GKEqSnfv82=@qj zzz{EWya)btO5e(#brxw<<7_ClK5X4@H`dAK2sc4kzf)tOoej;O>Q=NLZ|?ddw~ISb z5WE-4I(9=qsteZ!6KDw5?$!Q^sL;8%O8Njq*Ns~AZ$-Q;r7v{xGL zu=z6oRS&wej(#u}*}4RHk!vvFFWrEH=V7)~?x7#ei|FJ&wE-FI{6z1MWz(7P%#zWy zVmxRS(}i()p1(}{VhR80$K^bG?BI2Pb++>Vj)=~m(uC;-Upn_Do`uY-D42^*U{T2y z%YrNxh#$b6B~Zj#THUFoc!g3>KgIbLypQBY6dI`~3cRwH`VDb7XbmIK0%cJ$;`g$B z^d$Ouy4rC3@P?I#VVpJ*HqE>5+ON5-IyBZ-@)3i)Ce_P3AG@xzy{*NuMWRrQ59G2s zasFi{cIgdk(=@8z10@>h)Vlatqxu9IVsetK9G+?V^nf8i6c+K4Wg z{(Ml|0e3pseCEAAHV*{6doq@JLlfv6rF&3~8kL#fWe8RCQ8J&;c#TR=$LCYYy4`%og`*vhy*jw8!_{;_YP zf#3+zBh5)d>q180K>p~+8h>Btk&x)=jxqR3R_o3BRs(oGLO!y~`QfQ>VRt0gqZ>#C z2Epp$q17>=-ZwI9#|PalP50v&lGab?`AJB|c#Tq?ue>RfPDR z;0+#_PT1PbNYqcnJa#9%;AY6YDUsXL`wLy+q0zs>`k=6w^NV%{JG@{%qeHXP2rA1xpSM%rRnqx-3q zKsanCLi3djVRvZHzG3Q3WrKG5y%F8Oo8Rk>2;~lWhJKaI+b|7imi9C{-s#Z~c6cND zxFgWruwAvlHs0;>dz#^!#oPP<=qI*{V+Amh+krU*9|?iIPV|F}%o$U9|9flGTk18F zU${aq(wQ6JKzKX_rwfMd2LousyczG)@9$Kq*KOE}fHEKbq@(NnI^7lmZwIXa>C&~; zW^LTwjL`6cKC@gvGOdH`$q>M)tnWuYm?`Q%{^1F0;|`4ksFSfpo{F4Cu!VksZiNE3 z1oZ?t1)mDeRq|xMun4+As*N}Tlfp;Pi(YL2=cR?_CXTX|3QrXcfbzjQi!YJ{4TL=E zx-HX7`8)S}vd(-)gt30e9K{R83qi^^oWJ%#Iig;61UkpEP41z&p(90sf=BeeIhM6G z`%Es)uW?+fS}U43rbqR4BQO8c!!G!+z=-o2-Yf2)(JkvJ&h2xkzZk4+wixSc zXA2lxEgLTj) z=YoD}nkI~Pbd2>GUyd}~G>76YA~U1s;q4b4vU}{{{MGw`+RqjQ}3yoWb zI^=fJ3Ec(FL*|Fji{8eq)Fu8Kfup-gG;bQD9~`?a$ZYfB)8{a&Nw}(fmueDdF)^L| zYu#uS)1_uX27a}Bd&*o5@t>-aEwu~!ywCJCkBenThhFU8p=7%n(+AWvKNB(#&^_@=8q8kt;Xs|pBLji6WD)=+6jH=eq@pei~ws0 z%afe1kthbess{Z#kp;I6xzl^K8DSm%{`li!c7%FV1etgQ>eo!swB-B;XIv$S00B%u z+#1ph&Aoeptx)9_Y|lt#RpIn4_$=D|^^S+U;=fmEns&y%I040f(R`u542>-CK|0I` z-?~}bP;~wj^lQRBMZUKU>mUA-5maqVh$tFK9Qa1z{To~#vopvSs(lF7yoYf4#yjt~ zr?an>U^$qF#=-u>{W*Ba(dyJ$u+DQ#x1=&YOC_=u0G z_$73d=1{f2A&4*7>*BHthT>j6?8VsWj(u?rD(aSre=Kq$-!Zrs^*8c5rumPhNZ0vH_~92a&fc#3KYWmeSXO$M zZ=2xoLAnFq;7R_vJqPyFXR@dt+Yx>D*<`TF@GXm1W0{ALeOU-GZ0Qqw0);mq2!|`mLu&QeU2bM9HGEO-otat=UrK z!6s3BX4K78w!c=UBE8sA{Bs_^;aF~3_qWdn2y?R51 zJUW4tbQT*sHdb75LH94ZW{mAHqRDR@1_yM{sEIW_0k<@D;w&&Z=r*Vm5Hg_spi={~ zj;RH91$hNu+kKLgTU{s8Z?g}vFSie7gy>|yGLK*1%n9BO)(-BehKL`K+6w<0`!}%{ zh!>0&bQ~g)^Am>-IC{`TznYP(k*|@}*_~~~#cV&R5p3gH;{sRwynk?dzexXO@K8`m z&|!pXF%_`Bp(WTU@J^2nnu$oP%punRQV&&6D80FaACw!s71Y|F-!DMsh{LHB2=f8) z5AsfSUm1{y$J3-*KL)i5O$nY0&K6V$8(Q$(kJ8B2NY{w59yx!n2CUa877Sb>SQSRB zAiJNZ|FvJD-@G3)%y`abUuPd`-(p{5-(xV!650vdiOEUm4&)Byz`KCgC~l!9IKYUM zIh=Ve7j7ED0@O8##OP~$Ar#;PA3V|z+(&H$<2E`vyGY)p-A6Iva6)k6cWT;iXM7DD znd46%jK6-8kZu0^&QJ7bdTC3}TN93%E8N^OxT)yy@UrrBKS zWQtUqbZXlyfJ=VTbBYT0+gsWcAuw2)>$i2u1m#Y+3#~TQ3uIa98l|KBZo1$6jHBJAHUEab zNPK)=`Jo)|M5(L159v2Xn?eoz)_B++ms(kJiaIu$y~CHwMcuxpecV@5!h@d7cz!qHUqAuSx2LsuusaBrT!x z?Mb)juw9=F7T=FvVdZUpj5Z(h$rLVew~o+IEXhv<(PSIT6qw4qSq0k4tYvGn zZ$_QjS*B#T(0uDDRQbuug#PWotl;dkoFS7QluBVilkk`da2=rukXF~b{LM)p>~?t{ z`9WecMtV)T;Fottn=8Wc8k0HKOM^ifV~s%>eU5Qcq|P1c^3PN02BT)!hDF^DjY(zJ z1CDJt*Cg_{yx5M3UvQ3zf7g{=%e4m*kjYR$>?9Lu_B|}Q?{GpY!kUj@@Z4CNpo0tT6bz5@&x?UT7J&H<<~m) zSoKijYxT7UFrn9F)@GWCU1DBhW}Efc^w8pKH)uCZm|eJ(0vuUf8JHf29`JayPPd}clwHibW>Kc&mH_O|tASYO3zl{aIX;5H`cj+sZ43wb9z z+cZosx3wL2bQaW8>0Ur}2HTb4Uqm#uIuz%Qb9QDsP&T{eD5ToEN6aQ|@Tyu)dUU2c zP;6atcE(>7HM_S>FI>C(ZYZCg5o}1SCUtzUk+4>Z@sbk~E}-Hrdh;qqxcAOxqV8yW zW}Kdb>Hm0i`^lJg`lyIL<+r2mnT~yCqEF9viKb7=v|QGyhIPiOPsy~{i`i=DB6+YlggK`EC|XXgM}f;L%OU zBzx`SAporG*YEIS>MFuR%)4r5+=1M8BmR1{seNbKfpY4a(*xf-d++dNT=0Xot9zUH zG%?lx^Zv_gBayzB_?gf-hRPgLaT1H4#@0*axj(fDyCBW~uGDTQ)rEkc=KVmLho~S8 zeP6nRh@WOHlIj@ht~4)7zk$S~hullW0YY(d;;wWXryxxg=mN`6!xc_-YyngV3et4@ zQ(b8JX(|R%U3mFvp!cPDzzfpmf~k(>yi^2%tLP4PU?f33jIahC7zLPH?16L#FxxI*Ql3C>P{m0K;7aHPX|lli zXkIFez=*(Z2lg)S4KT6eyHY<~ewr6x)>HQm-iEHnlYP4wg9%CpW$=iN;`4Ur9T7t!|0qR&F!>mnsQV!GJ2oqRJkKj4$xy7*ItN2da41ihDnRzO*v3Roksn zzW<=e81D5WTPkjSfa=jMN|TjSF`Mfh`Bu zfnC*}-KBY99Ht~$89-JN4;aOX-!@@qU9~dHTNt+okR8SY9JuhZjo4Wytd5XMk|zLU zo_O(cEO;HkqkB7CXm}l%c-jBhI9O+_ju=al&w<1E@zczB+1BhX4ZqU00c`jAKN3bQy=>W2tI6#0Yd(-Cnq$+^THqN4e zGsk<>>O{LFyA?oo2^_R#Z`!pw@-9h61dt^PlO>9fDGsU>yl3NWSg}9wl_bjp$e8fr z?Ktr^OxPb%7RILlWDPj+b{u#c7VHl>3*(mnvOs3M4F`4=TlRhY=&ED2DWObql zVB?Fo*aa>)W_1E-b@VbnUNb+AZ%6ZC_;=fpU)z=mZ=&Ythj$$TW6iM?k4!@P;Sq#) zWFCFRr1gUXkJyby=E%yQzFnwsP`4~YhK3VMK$8r$cWP;m*e<;!Ok%ghE~(#xhIdWl z@%4iTDizhF^dQr1nyPq6x2ysc?CqqdL@}-Aak23>zOy6D$6m!<&DIJ8hqAniXXV6& z0*8dW`qICN=uQoN*`00!^(FPt4nAzEURb_NNl}h_DDWq%zY}QL<>Im#B$BELO2}1` z&d1Kh5nDH6Ha~r|7*zT+Vpqs)qIIe>KE=9XuFCp#>;z00er4!P)k@YKubOJaKmBeU z^cmE#7O<{s3NWo}D`;BPQq-N(n$xkZJFRtJb?oVTW_()p&h#dJB67cKZ6P#0Zqu#vi zZ`1ZxmQ!-0>AKROFy$*eqxyJ{5KaMq21s@|a|`KmI$ooVLhwW_e$)6C2F1ZVE!&0rX@@M99t%fru%U(-L$^q!Xa(eK{Vb< z(oS^5+348fh!%f!@GO-a!l_df&;CyGZtR@!Y3oaKp0BH8Q|};4(>X`FRdj8P@hHD# zFPBY!F(j<)ol;SWB{U-|sI9QjS(U7qrEL3m!S-~Why6nK3hu%*@ItvsAZKjdg* z4BgIYk`}&<8(RF__8%`nV|lPuv!R@i#`3qua;U~~z)9y0!24^@MB*i24t-X%>myL> zUEGpIM2EjmevL#e+HZPXTkT9kEpG>0D~&z4!2Do3T9 za!W}SNxpGA?r35#md%!5muVxjW7tA(OD3$0PgqPoRo;}2Fe57KV5Og{$ibcm;ZvIa zxt=U2B%jsv`xfJ9V%NJCy7SrOK_EVkBE zuv_4QtY729`11FdD^P$-l|Z_1%*)~aqwjf^fwP)4?r@KbZSUsXnyyD_i=oHvwtdHd z`Uh5v1Po^>iMm>`BRMIuNeLuE?>{YeJhI_CrJp$%wF@B8l$7HAX|iRIuy}t;645F% zvE!9P{uMeRx7jY>6r3OBIdE0%uswbU_GPnK&aUln+caDa1*kEDC55s^e3J0QeTMl( zeM&yCJWE1+{>n>J4cV2WJZ{MwcyVQ=I4d*Lu{Z)t$Vn$2ABHmtU&kK9I=p%gYY@5Q z%)ck6{-$IPgdeF^&3n!Ri|-4uWn99fEvj*gm-!v(TNyWoN&+i5IaW!~=`V`umzSto zHmjgP174l3DxEJ;dB34nsezVK#49Zwb5W-0Qzi0NR*2`{hHjH0f?H})cpav5B%vqK z_J%;UOxb0IUFIixvFEY?wLNZ?EC|mSqC$)7B z9<%4cUG(dhWb!Ax<}F!&7C4a;{8Z0=_2jFr50r_v3uP~E7JJ7rR9(z*aXadIOgvIN z*tDMGwTE8xFtUe6Ck))bJxn!HMjbD7+KPnDPTpX3um?h5;d5mlDe8=g< z#Q?Is7b&y+U>elx`UgKJs3T!;iNcTs8!P-CMx<}rcl*u*;0UYa)MkZ2!9S(pRKHY^ zM)~Ph^GR0ylT}^lXJ=;3jjY-d+)9fIN1cj%jrvlljZR5x-OA>wwf$vl8)CI}iOptd z4rfg{gOU*;19)BJxp^F%p@fi7F~$}&Si&NX)H5g`3V3Gl1A(0{{IO6Jkx@eY9qDl7 zmc`^FI^d_!nIh-D&MsIJ4WSpu%fXVU9j*ck_3>I1>$|#a+4{U^up9y-OXaW;+NI1P z4-v~do5RTJ$=a7OxVi8TDF1()-u)O*%IiWTm zYe9;j-z#1|9|iWILh%qxn5_nWK)xeD7(s-hToo>>@hg?pJd9GPauh~qmRV`hvoFiT&0-hvO-%7MU7mB~E)B{_pgj6mPTi#0aKNo&} zov8)7RS9KO3w4%sIc-fN5wGm8$S&~B1@F1u=2b9Qgl4mz&ctzx3LLM7J%ocBs)6sbA zR-?Vx3FE<93*R|UFWK2llXu;(`hjC_PHy(khHJX6Yi7ZQ*#z;xgO!A_fK4cfjY-Ex zzHY;@kVAgd()&k3Szt-W0Y~%9Pk7lzwyu5E8@z6k2`Q67hx0`F0NYG{MdSxLkDkaJ|K9W2m z{X-Zn#R!d>JWMyq2t{1@7tR0(DeZDi$ZB;fQ7$A0jENc&6MxK;RnNSRtS$e7ij>Fq zo_B993aQRj!`SzWt;J*Z3)|!LG}~kTVfN{UKPWQ@jEEv=eQ`DHVQzEX3ure62#U9p zpR)4=Q#W!rNS+s_untRaarZ^%RkBS`y-JXTXJSJ&jg^n};Zi^28-@|L9Ag15(O?j0 z7v2zxl0kZk;Bm|~9>_C61b$8iJ7+_WXC(9w2kfASM0vW5Emt@&bPyRlW z0q1-WX?)O@(bsr}86!xl{@9(YJ?_B!YVX3K19TAc7gJ|ntZDna$JhIiW34Z8{=yCr zvX&b8W= z+l`(Qf*Q*%axFu@pIz?ghq4@nW922=Z^^cp2TVl@$vrBbw#&NZccQI1`hy8hRDtzW zC~~W^*78s4)OIN|m*=Vu#p47q@O~AJhKBLPCxKaUTzkBBB9M>p_Y$WSDeqQK_mh@H zE!Ny2K*wiXuIS8SqG|ep&=L!`C)UF=cT74!^Y!B@xGT-=cZ06azU&QeSEO++s3`Nw ziA*Sg|FI|o9Zgp(nQMFd}mgnd&J6W>`7%XnmeLq2&3IsCI+ahyl&{I*9L3q+(H3G7a}p}7J*lElvHBjR`j zTa3S`!1~Uiy=l1r@~v6dWq8;tl!Y$Ewr&8}KUV}g3BOsJ2t~eyo$&j`@RF_?9yu@5 ze0LmvUbr$9D2fC;-%k4mbJ|~f(r>?7bLz`SO9tO>NQgWf5^cMEj@W+%dR2K8Tt(CR^efR`{g1VwM^Lq5LZ zyZ7ed(9+lMK7THzkE(AzHH7aQL(TVZLZoYBUapT@U#L;rtv-C{-ABUrA+G1=+y55u zOObb?0p_g-P16`AvD87SQOI6 zO#cq$Dx!xzzU$Rt7gZaFhs4FjOyX#H9Eep*_7uN~*|y}`%)PF;TH9z48uD-HLxAAR zz=xJjf;0U%&KM-G9CWD=Bh#bLyga9Sai;g=&(KXcR^Y4HK7QtSp0UKqnDW~*(rxm= zKlg3>h9)y%u7};+Q1%%2tTFlZFE8tUXYQVc7%Bh<=FT|CrQuuC^v?^z3NtvK(JP27 zBS8O+j6P7>5xEh{V8X>4vMZ*}G*^o-cf#VX!w=MO9Q!`@)ucDfa4_@1f|F49_QY2U zU3adc(bfx>DU!iSz%Cr$2!9)fYhb34(4&8DAL}=!L+I84Qd@AH5&kQA0HNbyZs~#_ z+umTqsqQPHAAVo*-mv4zmdHqNy6wYtzu~BH)_b7j*g+&A-xih}KOxzhPS}`BF7sD+ z-Jn-s7syASuWCsPwE)DpF*weMTWwmH_Hbwuv_Yw1Q1 z$lLX7e4Eta-&HxPZSLwiI-Gd;MXG=VVFEnQN$_lP8A95X%dsB2LqUhtg2{zp?0xN{ z|FhYto{51dD6dSyOEl3wwhgftLpf z?zm~YTC`n)Aj>u_@|XgVfLNh zEsN1kW5l{(q}Z~#Y9zdcX@5_BQ^uf2mi=Q89mTKN8T{ew*ZZ`_-&C#0?XUzxDJ@49ywMmVM&bbhRJx6f#+iF?%4yLZLh^{<>c+B*B% zz?D1N^#kZJEGlc=E4XuaNM^xgf>1pcpy6w5j_JSbZjwZ~*x6rQJ5*z0#e1$s>=7l@ zM3gg{n>?7z{s~$UTen}&1|=&|T^u0M4J$;`In?B{%@OVx<%S-*7WAy1ICWjj`?=3< zcDNU3<^enhp}lw_{Ums>+w6rec880PFSO!&GPX*27j$uqnMCHanV}SMesvucoh^j0 zJnLA5?wWaF2EsYJDe&MVnZV1A&`6sx(6{nj5F7;*$#4edr~Pt}?`w*~SO)u{~gW}32?KtzNMkTvSb`}rUxXT^;orb{Twv7 z%)6|0{euIP~&?Y8=zo)HbC1ZVAa$&%rLdUiG?uk#R8%gKDK%U0$p5@Eah# zK*~br70ml??v^`esJBL^{o#Wwf770Z{F>$ZX>A`CVGH zw|42-&x|0#yGY>9!n$teskfTixvZ40*0<`DFq%iOYG~*u<%Pj3igcyg*V=jhidTPX z^^FI4j$dgdn)Nak6VZQH;@9k2OMvepn3AZC!T{}piApIq5hkl>zjg<$O`W4Xg5lX- z9uQ7g`a8;Eb+M0Oc@=+%Q&Gg7NkB#Loa7F=iM_UI@=mb8WqX0u!m%8k4Qafd_-|}` zng?lno2NeCSv^SiG*5Sbl@DIruOMo!#49~Y^RUv$7$KevtF#S@icy6-$7W#Eia@4V z$~SUOQ~#3N=Cay#pF20Wy20HR-T6RsRAJcdFdUEBr`uOwwWJbyc{!&0VjWiAo!i0} zewZA6TZdlck_l8bZQOfUYgkTxoohYV<;xw%siDSojUpy?m9aK;tZ4zUY*z2egH{!n ziZM-P&BCL63LX74n9(CN2^*RtO+Y)5Eymhh z+82TqAwP%6GUPX>k-EfUg#=4FA@hni4cKi;x}7VFl<{7cjM1`d!sO49}^cK)zz+uU}v=) zmOl`xDU-4>5ocq-(JSNgvY4=fO#@ z6WfVV5~*JR^hqC~%|H8b-Q=}Fg58*7&^M1X_>~}?$j$HzasQyjpp1sGmNlbIO&~h` z8)NPRV`?v^w{m3Vj0>i)VJJ=0SO)uThT4x(ObBx&DCM8dT;4|xaKoQ!4?ABdzG3vp z_DH67K-`vj0nabX3J{2Wq0W;Q@%S4>Gx7zO$I+A1l=25V}|h2vW4j>hsIJbj}p15Ec;h!nog z*a5$&1n`EiZ%3U@1$RT--S5r2mYQUkJ+i3=l@|x5_&unQa-+jw!&DUA2u%2YhACY| z%Lrqj!ofm@`Gv$LN#aQ2$l{3M$d6MTp)|p2hsh064@!wbk>n=J!)Os`d0{{VjuDkOdNE9L7>wdDbyz%e7>Yt88hseEC^bnKiX1b# z90b{bFbpXK8W@aENJ0^X6^u4Xa-aSWFHZCcn9i(bYYpWlCVsu2ISqsUIljq2QUZB z2Pg+W4zvf^!YW|%!zy5)VStD5(VtNZeq2$wp%78=1H952BO7z~Nm|gi!zB5|;82m# ziAedS{&5_D4mQBNpx=;f%l@M{AR1(X@kGBNy&}CL-9kvD2j^;q}5!V>n znD4}PKsgv+a7Q}KIfxim7j_A&3zG|*3u7O434;%d53`1*1G|Q@>O1I)B2+fNLR8bw zJE$H;2Wty!3sVEDiK2nZNLBk+i;_eGg^`S&ss)9Sf}V^WJp+xAgr0<+Qcq4xTuVwz zI7=~0GD|i~%!+iHlHF(U0u?b#J1je_5XAze9Q^{;Jxpg%BFs7rPqdY)1y)%iOQw=s z%}eyoV{{T$6(&_+;8&D1kt=C&9CI>ru#yapA~gvHdJIfB5qkYwZBUY;(}XW`#{cKF zf|3F&9VR~LBnoDXLsEbu8AX9W!41m-0X?7)MG4$|gGGhFH-;g}GosLfF%DA_g(1l@ zqRfH;gao6A!;nLu{DsJ0fA(Nz5D`;CSA(Ds5lMo{>xbEYAPMiTBvRwLK)7JPaI93V z>;slA_7CNae#3jhypUbSuAEaY{Mo5gSo{_8#&!X5p;)P?6e#W=_Xd7Jwj5b`Sc$B3 zTe2(G6YB&0th_8+iLK03R3Ir3 z$s58O#~a2Q<%9KvaABQxSu$JnaaN%O&HR_#M#zTv0(RLz4f?`)*|btp*;lDcnMlod zKhBEl0(E()GExb-I3V5!uXAqa3Wb!{3 z0gT-M)rn;tYTwi{x29scn{9N*Rteu#S?^YD#7FD;>9y^QU0>JEcwx2MBwh*0;npo% zAhMv$9a&ZDk#_BhET}7UuM@{EmwfCK$AC%dT9#invn6pRB2oClV=alx6EUJC(eLnZ zdE)Yx<&gM)!Lk2z1^$B$dysmyrA25tI_zorD6y}*#aL(&PUdnkGAI!`93FCRf8NAI zD*&h$OIh9--}&b0O4tU!f~Vj(3uf7DA=>~O;AYqd&%j}L8$JRb3t^?KLH?Iq{zpz} z6W<;B{_I`kZS^H6zw&yy)C{InC5L?g5nkL3?HcG`4W6z z@EkDpmY`S-VIoesNZG3ND33{#yrJR>>O22|YR!a&uoOyReZTL5x4`9a1+2yJT8tIn zr)`(NNxL0>0r$Z5a2srb&B$$V2m18=+D1%qFWir|d;lJVhu~p&3_9d*TKB_~@HE@B+LDFTqiG8D7CM z_si!FVGkWS=gc1YntS`M+FQ;ueI-3NpsZKnH8=*pg*V_$cnkacckp|72i}GE;C=W2 zj${9S41a__!JpwX_yWF!ui>Pe<8{nKIgZOIKJ}OMy1%S}C4B^4IKIrNL^cYQ!Lo3) z<*+>FVhh+>wt;PgtI*b)@N+Z#9JZp(Z$|6hjs7k?{w3P$KC~BE?bm3pd(djV_G`y* zuN5Cf8;G{=!!}dv_saGXHaLP76Lt_5p!Oe>t$5ZRFZ3O~2EUOlbFO`;O}$n-YqNK; zg@1s*pdGrU_y*Y$!ze4N2C+GUmpGxHIpXzft z-AVWs{tBm|8@zZA;!MGmOoMqbgy+7I&z^$6VTv=DrU$$@hebL`PN`xVre!++^?oRQ zZ|WdsU`A$=d(X*qEPw^d$6;&;voQyYW$~%UJ98&dc-st22Bydx@Xw@v8e)F`WN5Nn^$DF??UZ4E$X0 zZ4e8ADpREgo${}sR~CX%Y2z>ZzGcT$?|@!qQH1(Vez)xJI$UkcSng-u791bPaD_4} zMQs%@1-)=QD}fs9FcN0$2M2o!5+RV)?=c!RL(~V@MZh71Ed=pzZq>T13O=OWrV2_; zjWgaJ7^qO)A?3#@?qG$G*Yk2-mnp~CWzI=?+4x=;OL@2Jy)I+VVN*^@*Xvzrsm$as z$qEVLPPJ3YNY5NKas+oeM`ff(aY@9+jNqe2j^iS%2=5*x<@8MB_3;xH=BwHQR!=C1 z%xf&n$-4BH3)4@TqT<8ChDDi7QNzN*;-hduJ^iuLc4m>nbmnvZWY&zrxK_psMnq>P zSbvsQv7qOku((L0F)|JlN0>|z@#42V3T*vsY`q$EaI4tS)YQ5Xb>d&qQfs-SkrZ~7 zQlkmb>$LB=m?l-D#ilgh1@VZ(KT6@JH(WkdIs{aXl@nj)=i=c$Lhd)WFN$9I%@PsLPjoV#}Mqx<=tx14%pmeN*s#}|(+ zy0>La_tkHSUr2Re3-@E4v`|AedX#gGiYYqS-yT!3_Z12i;3L`|mpWg*w751J04vsW z(0DK>1?%?c_ootdG&w>YCMWvL!#?wxPoq7{ygOd#Iia+@DqhN?P1JaOJzPfDZ&yqE z*f-ewdiKrZ8c^s9b@^(1badffnMZRTAaSlx<}2ju;6-N5{E4tf9!q)sAdY9zk9#gO zkLW*>s9gs7879f(E&BFbw*On6({n^=>-m}op8go?xI_G_YH}T`{5FzQI;K^kja&wu zE+9~;`#@8u(Bs(2r|wD5qk)#v^ZG&K>+;yrpEnWKAb6Cp5Nedev_s1Ee!%SAhnZb5 z_4((!SH1Y6v;k{y2y5^O)<6lP{Z>*)c=g@D@+I*+?SkHMfg|Q9Ue5EK6L_Tz(az9A zr};3PiygjH&KRK2*U5&>kf(+vCp$YkBQLLao>>p3A51spat1qsEm`Q{^O`nq+imN!)M2l=SXfI|)+%Pp=O3~7=e&jN$>+T9_gIuR@0QmN zv8&|1ob5Xybt}^#2ewg*cgM#^z&;)c!H~w)9&JS26Kn{?`#SdonM>o7(1?$w1uGo| zntjZPvm}TGdtAyQ*{D5lbm3^q>3Jg&=fh!~sU~?s?1Q*}poHG}91|BmDhgNMak$ow zQ1!02me3KBGbV_KhD52P?(*voE}!2ybHtpvb5crXkBi8vZ&}$=pOt<2{?_=Ks>ycy zq{^wW`SrO&vZlAJYMGwJHeGz@{B&!?*T#s@0PFBvXKH@>uo1b_R#s1Ln?F{qH)xIr zgcvlTv6+s9(J2WjW2Rju)`oH%9Ts^7F6f<`tb?6#1uAfj(1P@#OPSAm=R1w}!##)5 z0*?@N>ecpucLuV|0oOvwT4ODu2kaqV;Z%@ z^MT2$Fpdfw7BslT5}tv7@_;&DGBIWx%wdxp{I}jGyvvy7-SHED&s0iVcbC`8Op*du zG-c|&`P)LKD{9|Is5WTa5>vx-2Y+?tuxT5to-;f~|<4bHP#2MqbMv9Az8d_;N z8C5Z&$EBLuyD0k?7jupn3i9fmeo(B0qJ~lbND-n1@j+7Gtv5ryC&#GNqcStaxCk}* zhea4J{aKPmWiP328a-u2MZ(y%`>t#aOPi8Y|H#w{Pt9L;_rmm(+s94I46i88xTK&p zCTF;1#MI>_rB~FZkB!euvL?hn8yS~4?doYg6WP<&;aSn+@+TIH`|u{;XNpMiyo46= zuYc|O$bDQd_8i>e3DJaiur!y}P@%1|ce|9mEfIIG*ZnKyvUlQ+*yS~dC=yUHsm`Zl=a>{ z&yAl}+WO?W>J^imbEbKV^g*e$SK%0I^?fEitExgIl^m{Br? zi1)IA6|j$GVZtaj(&Lh*_D$5DH?UviPCYC)>Br^6md2`7XZKKHme9C3|CHvUnOV}) zrMFeD>b!RRAjb{i`qM3wsw zI~;e+_=Y@S<=fQ{CmlF0u{qd(Om0eMmWf&>=~VWjDRr5m%gQ%*E{mHSb+TuvCAnbu zxMu9S#L+Vghwg}XC5M-`9=L4t(GB@J4cl^hH9J|o54!Q=_;#lLn{)B&lakLfJys#9vL1pV2UT#PW`HMMZ1cmn^=wIrkK^XH6JBV}+hi z46kq&&MO{j8S;$Y9HcI|`oP*XFK#O@y!>9*n7Z7E)y@9jvMX%LTfhbpePh(94ea27 zF0%pe#0EoTlv#V)5F8L-4YGAGhO2Ck%UT^&NvkZ@-Go)o$?z}52VZ{~C*MK3+g?~c z(Q5yo4E1l@Mmc)>mnm#XtMU>jW=6BE-euo<-)8Z7EqBi;SebF=W5r`Z!It#NS@#_1 zspa<{xNY(6O(RSJ-nHVF;*-2H6tRi`7z-890DtuNUaCH$u7ka~TN)c4w2wWE4>gtS z4VN}!SVkg#oH#Xup#;WplPk&DSpA&F9QyUdNfY&{l9WAcqCpd_;RZ=FQ8O{SaqKH& z%4fWgU6KESD{5Nb7!_^8J%_xK;vT}3VeCpbcF7~Wi*_UUEQFso-oXCO5k?yRPrHgR zV^~TpJc|DH0Ccjps`d2XmEF$_L$3rHw2A54(X90@9ZsO&kv-M_w6I)TZGuV ztH()nFXSiI;7#G=$$)6r#Azd^ET0n3cicB;TF+W#tM{Ic*wMp6Gk&tZvS%93wryC) zuapntT`dx>>U|c|@i90HtvKk+krAPh;o*^?5sIj2YvgHLrJ;kBxdIK!XeGBw%8=8p zuyPs(?}^o&_WKqb3n?ka9{HK=(F@DgcLzMXyfE=7AU_cBP5HXIZic3d6&+jhhErx$&~ zdwBIcOLLqtPyjxEc+I)&8*= z9wvUy5aQ78WB1`BUIM${6%KlRV$$$RRd-y4soND*5$*`Hl!sO7&o-Fmzg#CVV$)ym zk_{Ac0hws@`nNdFen%M=F_6Q^$DLiRBES+7JtpooMSwMAh%5fsql&Pk^qiqJbt-F0 z#^|9-7x6uDIdQ?&b9DQ zIG3?PuhEv%WHaaZpD3|`!nQWg4<$QyPFe;`$z!VM!YTFH{idXJO5FOb{t1aQvJ~&} zQCP=d+(J#JT0VvGp%4NQ=`789ZTgbjwBzwQ-{CVb=dSqPRrWv=FQAO*X zSh>uzrl9!hCoa3xvnJoWa=EKM*S2oy*!t`UuDJZj^-~JgA6mca@aD;d*F1aeojcPn zT3)d8uC$9*$ope&iy~6CZG7+E#1p8o3-jv42KiBo4tHK+6XnvIEA-{M8fX74uzU~k zFMDDL?f$!o`|p|0n)0gX#@>CU z1^1O<%n>g3mAU|)XsL+pHdRDnz4y4(Q=!KH;3q%Mh#oqaL9elXIHmlUn04-6%aS#4 zTggx8Bl5~>GenCD)*oCSK4wf-==}pXmfjXN<*+k%xJ7I(S3Q42xk?+Tjy*kJx_ZFw zlEEHa6HM5H7Fg<^9f8_?+$4q^5Ac9aBYp`I_=2*7sa-mkxx!-6VbAyne~PI6njUM{ z*?F=TG#8RVv^I0V!==H9E7U`1HHl@Vv=0vnQ{tjF$&*zKU-+}x7*Rb|!aJzr>#e~B?J`@Zvj7NAh;f@Z*T}Fs8#&EL~6Z_RLgEFc@ z^90KUt*_Ii4KkH#1OD1O$i%A%dL=;*I|t?clH4fAKU_@zcN=1O%5RqAM@Z*93DGtw zN|IiieAQjm&3h_l?yG8;6_PclvTWJJ*zBeEE^W9u=b~H7%BGt~&6-ks`2;&lylC}g zXK2Vf@y@YnmN;juC3IBjEZ4Y8s!~G&jzm~8Q!PVdV#C67N@s|bU)V`G7Q%aH=^j(K z)?gO*97}*!ho2!ft@(6NfT0|G;bFf1k8l}B>HdTFcjLjH-dB2tA4=Zuc~9)3OkRhA0j9|J?mOU(xu`BAQ1tP2 zSohycr~2)v0(4@5Q3Deb(NeCUb=fo(TT-blKWF*wqI;9)hf>Kik1B_EaA)tMor$d- zmp4{kzwHH+r)N#MY)ZUzs%KGE&D?2xeYZw_ddrx(zLGzHwY1ld!n=~T>M02O}JCbLEz7$D)sJ5bx<+vSl` zbx-)-0)y`@@dPGm6#)S%vq_^aS29q^HOTPSAw_;mnVs>2FFo7;(Ng8sN%3mQA!Udr zk|!PYe!1A;Sm6EQhCfX(CCqzbNlNh8cUeJmi}(N0_AP)-Rav`xpOcekoA?rupfAYefTU+4(ph^P#Xf}rAKX4Fxz4@waRe-z=LtLWt- zI*eD*anMmHApYae06D#DpCoN9c*lGHdz-dNcC+_hd+oK?`qtx|9)?xF_4H=!=ZbEMyvFG78aAdObYB27WJs^XH9*=S-*ISCkRT%#BCLj|vlR4Ez_y zkHmrjW+rJg1Z&5k)G|Ukm7{C)H@@Yof zqCAK^HRS1c7%An634;{(H=!$(9of3IYU&@QC-FpnVW zp5z7rPX|H&`#e77r@18Jbr9iy-GihV`~-ySaFXr`{2iA-yErqx@CtSJiC4p$K8{8@ z=#$JgkjZ3J$4W#Z#-Y|e86u#lz5#rqA zkxd!Ne2ob-37ooMNvS1!a?pmia~;@y{e7(9PGAooZo1DW=oiIHgpqI)r$2NEe;W-r z$qBvSMU(@*E;M_<<+5AT6YYc(Qms@ZhDTbuhLCE|8p2Ac^{U;XOcIM}<#ZHwrCb9G zUK+{BctTMbBTI;y{rAA`zt1y`x9h3?u{7((hRvczex-Y{|F^Gk|AFIb?tN{+lr6rj zP4(F`C#K^;uDYZwwJBX7Q@;KpUJ{{RCx1t7iZ1Ha=6Z#kNZp-a_s{cD3Ky!{kE~R2 zuU0IB1?|;{U@Z<3T+k^@c-VwZG9fL#CT&1rjdElqB)0Y0yp=A>EY>SH5#E%tNSThV|3n>${VQ~PXlmlEzEd<=a>&Q>_<}I7;h1?Lyxq6CUT1RAEhh>VM7r3XQiv+&%s|3t%LwP+416iHk zCKTF}lwn2ORgr<#G@`K0GDe_7W0o??+?x|lU-&691wTy`#s;4A8;}mA^+>5fDvnj? zbb5pAs#G*gVdYdj4H4}fBV>r^0iSAIfE^qDkr-(7P1@|&Pi&l(aoh8qIn0LZ8vl&a zP;j~MDD>MVrBY9U#GQfWY~>YY2?eb{%hl6Dpu$>KTA-(;aXN*VP{PN3fDEZzLgaWVXlueT zH>&G*yDeHLVV5OND9rZ6ClEGSkPX#>nZ9GMa`ZmAj~`+ zV?Q;q>wEZ1PANG3w0$Ku$aM6LJPEcxu>`@Rpq~H z?tFelK`J}lqYW14wmvd<=7Y`NyryNL|0$PK;WO4ieU5*=i~AzC7ctwDjolV3_KVG{5)j#-D4r~u9AgS7?*@t1oY%G6Y=mRuyL zQZj>-hSgZr8b`r3Q2!c~S?I|PXHEmL#yzH~1pI`L#uyb+9_PbS5o{L83gR)z*kU@M zx-nbxqpYpoN0nAAt#R2(o7u{?(!`6U_rATpyZ`Po=ft_eISWfxzp^xYex|p@8*Iqc z-_=e1DrLglpm$nPQd)Ie!3S@?QXJki@8x@I3%Z`1S9|BYk~rr>9Wx(p_lqQlB_dlc zn>3G)?wsUyQJ3**z@2)M&8^vs;#BBp0s{R>YEHa420foK5{%ti$H}73$yq9MqFJTS zD0X@4^DX5MzWd-|O6@92PfSm;Iox(zVYbCRxjld0i+5BZ3a=afhWc7S@kqnb!NQjv9? zx)t}IK0Q2)u2TWJi&j!1O#VO9{lKejK+^|c>1fcPj)RDzGU26!3ifJ9-Q1);`BaM7 zy72qh!>59<13dTfWbQoS#0ihsLm4JhM#v7&Mq@?@KS5u-wi8|?B}E$eXOMqv0(ya5Zo=E91nzItjh_wVMcqU7Yl zEVDVQFgdv>%S@k~zM-{9b`bx87`j9C@3Y`#%0NR&K+J3ny;bw1?~Z{7Y`$ zYaj<_VeC2l*p~e$4u8%{U}%LzJOeG`2S4&*ZZ087uU2U(p-|+?SNroQ7he63OV#)F zFMaq3d@(2A<6PXuZQSKcC+2M0gSUPDSN!5`G&~HHzeR5phA1IkhZcb>orU*v`1kjs zMX)k2*G-o)y-0+ty)>zzJjF^F=BlVkaFtpbJ#Isv9^tEVc~=R0DLHo%ySZ*r>7lE| zhhPjbjG>K=fdxW-44S^mETyiZrXP(FakErdj5E1YR2)ZhqHQq8W|$)zdG{LJsR}X3`5snjO?62ADHa>tSF)Q7SM(8ziF}2K zmNCLO=Dpsrv(tajkk?eYr?#ugC60UaajtAe9%u=m z_cYM^L6ly%!733bczRQ<4)H%F5@y$HPH}aBl8s2}vzkiV5fsK@tJ_F}pg^C9RLL9G@+99X*CNbHecT(l4sJ2WDf(-|<(K007v7?BG!k>%P#Y~sFN zc+GMm@%jSp@7TEDI_1Rv{@wT8x4R$vxpyP4fQ}|=r+{1^BSgOa!099XtPtC&LA;ZV zV@cdlB*usmlmQL7BUO5znup9v_~p%!P!8qFM!PNlC9^y^<2wYZX>gmB-YD)`(wyd1vW8S$v)I0T?my|dIW%jUuJ4(NHhUjU5;-pblIJS27#^`J!+xU*X6t5-b{a zL4Ums&B1-3@8h+A~{Fwu0c&|9g4{5(d`igig4*}4TK`78IVwY_fh zcX;yG7QQ}za(UTFp(Q;xF;r>hjG^gknp(HC=L@f1m!y1aaEWo3J~VC3^vZkMS>b!9 z7^UoEm03thCk5RlCc?w!VWB|De2mgjE()P1h%{dWHzyyTXA=@q3etTco7JEPL?@Bv zW@vR-hp}Y8#4+WvzqHB6)+#4V^oq;3bT-V z#}Jy?vCEB+YUhz?sO13^<)tr)cddMtCp@=M?R(b1btkRtunx z>P4a@NGVvfYU3bs{Gax|NmT)()f%EMI+K^`$I(2zw{qiKcP|}SUsLtKk-L@-++TD3 zDuuswbk_%4s?sSX#hL7@CI?%q(x|P^aeL<0y}T6s1OzHRWJ%BE%yLEhpb48&-JBQivY1#urh>9?u(B0z(v!-5z`FoCV6T; zCvziuX2vgS#3_vUMU>ALB#1R=vj+%34N%s~jc?x-S>M}tll2Xw&JHhtI7fsTh}&T*t8uPv$;r#Lphbw$pU= zu4A8E8#v9!T!FqTz&CrWnqfZ@;fsAnqs=-UUMof19fnowP<7^*Tq!F#jHH@Sq(DTO*tmoNSjPdRy&*s7b}yo8fUdk zeld2eqjd_Tf-^M42YfLp360tvbx6sVt(g9+gN{)bkqU@0E?2a|Qz-IBYo@e9k(H*e zSO4a(+mmZ+DlMf?H%N|?foy+RCO>s}1I^DhWh2_CzZKaDQx$vW{ zJC+sDjEMO#PGRxadowG&32)f)s$7lJtfu1%tz4MhbVu!)tuCo|@&an#RI5ydYY{q7Ly_z~zGb1ybSzI}<4p z``0~2t_8tKDxVJHBzpCQ@Gi{Z#-m&V_b^_CrQ8o#dV*%JAN`nihI{!Mm-F-mpw)(M zk4b4ZzTCWEVAC}G{Nfi@6bOV8=9FC3 zy{M~$+V>UrL(1%JWDnXv$C>zb`C>KSJ;uyIJk&2%D#fs}Z~N@xJn-wCtjg#d(kl}Z z7}a;mHqXT$*(IVOK8Q5+ZT2emy!MWL< z8F$o9Ts)&Z(OgpBQe1U+bAD#y@`)u2rj$+4y7O6AS$Sn;YLPGNdtFXJMP^;2I<+u8 zV`4Cuuh-=5pcf^9>_)HM+?Ska^oYd-n+^3UJuZ-6R&LD8IIomzG~(ooiR}_{mm*fY zV2l)ojz`M!bBXM#qQ&@e|FTa+ZUs>l8VHQNw#G_1N9#KSgj-zR{PoZ6YkRVH_0z2$ zG39@-V*bvRB~|P8bS`>rT@{x=q;*AmLUD0GXYkLUK6&cw^PNTd6X^}NPd1tCSuNX_ zwmo-SX~CwiKC>50$?fWpstxh8ADovHLt}z1QG8l8;4r2b1X@Id#}07pE7l0`^rF!) zG>R<0Rdl${F)>W`BTO3pkjTbO;LJ~;%esdSuO^_TZWDQqpv%+AODkgNGSksDfk&6+ z51x5uM){*>c0TyoGczW_vpz$OrXyWiwfc8kJdxb!h?)YI0w2bObJpBF=d$Z;LJL{~6{Szv`Pn zxi3r^fH{MtLl7`$hUw{X40yToiMH(Q`CHmMpJ>m{ZrL(|IiqV&8H+ik(lpZrMTg@; z40Q76{pX<0pF!&Qt+|mh9@Pnig8RW*hk7JnrN1V46G!moHxcZ(h{ujSpax5Cf;EqN zbH;$jUAU3^q!I6W55Ob!%C2`lxjy4G%yJ*Wj<9|kD&|q+fKdq#YvLeP*$0>_5n$jx zGYu%vwt#@Z9!&>|*}-Cm}!Qsm3PHEgNU9`F2IKiJ-uThZE`RKwA*ah~Ph|C)MQf zm{Msm5B_W#F%SMOWJGWpZZTm1X&D{j?{#Vxr5-d;dDWv;7%T&mGa1} zkBtXpM)Al@E@of{GFWL8XDz~=XtX$r>_+YwT;}{Kz*SEGR~PptE+csB*q;G!^#QM6 z9X8QtVeTYUz8@vv*9QzHct|y%*I5aYCL7LKEKCBqQ=M@Uz$RzEOnE8B;ZZMB9TNxV z2zs*8QS{^(MNemEJ@@6qy8K|kU@9&S>0bWBV^4oaeZq+wy9UOp#FQAT{<<4fzehb{xwLaI));<20M;5=gZ|6$({hIDaD%7_03Ab%;^GqvA zNj2a4`0UiAC<+=*r+)z63sDLyrG$Whm?#A56D&exmm}~eHxh4TqCn?M?s5d^d@NXW z{(}*uvw_#*4XEoN3gErS1W2c!l{&PDKpcmnj+iM1v0$bW6^(UlFiT*SB9vw;l4-Rz znJxD#>k?Vz*HXeYAAEy-HUZ~A$UG$1OhQRRAC8NR{0vMpsw52r)kK&|nyD*!Et4{9 z?tQg&$xGeEY2|Zr-BXGj?5gKl+n-%glvFb7AMPrz+3c(Jx~sirdwxw^TBXlw3pVEP z;HGRQbta*F_KNDcTNd~OE%(=!bj_);nkP>0ENghMJ>;9crlM#;(4K#LfJ$nQurnrLQA|E`WJJ6u5ulWx0xZI@c;)(S9k*QDz3cU^?VUyO z!h&BEwm-eJpz7|IT3cVcx0XvV%kQX5^kGH?E!(1Vd2l^Qx zPOYhO6$3@*VHdioI?&E@_oGC7iH*

a$by+1dIOS^{SKJ2puvN$GEJ%y&=@dw;su zu4=FRrXA(F-ZQG~{+6-J%&e;HYUfxAEZRG%Nn(=|Qd zD|acCSq&w#$^(@yxu>LLVbEDrSdv<^dP?TR2@7YK?_N&{82UAJoX)0I+YRwg%1z0d zx$E_{Yd$4T57 z#Mj&<(5qqABoyp3n+@P}>_;kO!B4SjgG~QCG16}sTcmqs@?K{CSn~`V{3ufO;v65@ z;BO=Z##zT3=helxJ~>C{&GYHf7KfT=saf(ZCdr>O~h^FY1x43$v z2Qnh}$5s3_aj|Gd6q=369gB2)Ial;jJ`rDXOK-@VduP=Pd(xL4+N&$ z8l5N)Rie6sC=+i-YLtq%vrx5@x+=!HT zjf*TWqwE-GT?sBFEa6RS39TWSn~}Hyz0QBj9hJ}h-kzPORlCzHu7)*@)vKmt8eKUC zOMZs=uX(;Mv&*fKCYaQ5DLFauBCE|%5;A1kHG)u6$g;RKtIX?=$JyM)895CFc48{i zOLI~cij>?k+-$HE>+B}2M50Sb;Wk;ECY4yIPH-hSoMN>lA`$5z5j8woEAETL2@duu zbDTsXG)9jyxi?Xn@gn&BqQq8dpQw4HbmcfnSn#(~84=nM&)NF=7LFFUrh6^B2@Yq|ujb{YSsYpsiQ`#ASqZS3-wi)cAI5LJV715Xx91zk6G4c!% zy?TUCw?}K+>BHC0(h1}Tiw}g)ya8v|!|CX?bRD^`O0Pzy9K;7$3CqZ&w`%NYIVv@q zjvGr&$Fb344u5o%FSq3Pn+HsK18rJ0aLf(FIJoP-b7`{aBrjsJr0)0%ZI~;>Gv>}2m_?EMLOHu?|@Ga+|S2yj5U<rD@SEdNi!W|lBZi;2Jv0@a%K48+6t*L!_GC5GyNX|9etbdb^b1WwQQV( zqNT)R5sL+&+S;L+#!akRKyCI&N+k^vS|TAx2LY=#07*WNkGJcmek)fCx_bMYPfT5-jtA)tLh?F_ZFB_19M7{doM-7ia?Jwv0&UcV=P24Yh+HJ zXXL&!iQa4aO~jab?I)cC)DuOC-WKa1$&O zhG!@kSs)qDJTgA4%SRSQE+L9ua7exM&67`l^MpW$t=wO!9o!Wx4ljf`s$mXmWR8BJ zkOrBBv6V831R)tC*$mqTix9`?&s7q0N2rGvku|_UfI0XIbX<);diGf^<5^+Z^5tCl ziWLB|F>ZqyGGPV=*?Ix)4B{CBnBEM3K>Vn=FciNNgkBPg!fo4ejz9;SdU_RKnF;;7 zpuZYTWfkP+b1DE+GO3bMsnq1(10V#zc?v6+0y35TFZR9ztch%GcqTm|gc5oU1W0Jf zB!SS2AV`-cprRO(P^1Jys5U_CsMrv7Rj{F82YX+|zSdoP@4EKh!2ir7ly%*E_xr#9 z{`)-lk@sQdoik_7dC%L=loJNJVw{#1MC;8G%}YYe?mjv=loYROA zK!&eCODfP31>$W+zy8IM(I0)FpzFEFHv|8RBf{MRQwyp~K5We=uDBUH?ldbMT?!ge z)#w~IZYo@a+B*|>5FbyBfh-{|G;ljeb>(f^u$7hVHaIiTg34r4nYwkxR<#H{kRD6N z6X}L@dpe#@H!$9Y8`;LSuZ0Oaa242ZR0W`t2@nDIgS{}*B^eQBN!{h6;$Hk~xPyj(CM1Q{LVpB3fQED7~zvxRXMxRV}iD{J?D9LWgXffi=- z^vuoN;>4W;U>$*znXMHI9gzX<2tI=(26;OR@<2UUMytNQ2@T~G)x@xluCoI-5~AWt z+Dt7lm8!s03Zq`_TAuc35-?11PhQ-;7z94UbwF8^PiPi;TYNY z@hk`iF(Z>)jZElchFoCoBh!&j8NY z3|Sb@)z;Ys2%-aaq7+)P=Asmi)KI7)5y%&ntmgfB^ve|v##|50{SJ5aV+|P3ODc^p zL5!Oh;o}uP1AVR5x(b{o^^pTy;|rtH6KQ6StoIZ$ z(=`l@MPC7}8N?_s|FwtuZX|%TGPBi?PDhn4P}kbpkWgzMNM(fC+mmf)nW6eXC(qT^ z?dB-zt&N$mYK6k+T0R)!CeK>aZMaHoE&;}cekIc(6kbmQI*{(_OjuA3n6M^=ZPWygguD-SmcX5rGv5kj7EJXhaCpbb; zyf;gWp{1*($HI{bQE|>O-fUBzzk_2RGh>mZgNLPMptCT}n?=NzbnIan(hbdZ%@}wx z(-uA};JG^?Tp{YYaVPN&K#%E=|0X()v<;R8GC&ec1E+{&9Gwo+_TWZAL|h3-!^T1| z<{!{WI`B%UMxbevF81;Q@K9Kg6^lK<1ij(<^VO?;lwMaluj5Xl@6dwDptWy_Pv=$O z1^0tKmjhqLhK6o5(#L{@1qFz&l}cr@!G1FYF$r_6ftbLI(8$P?tfxnxYubkj49EZw z!^Es=tj1ZP9};Wc=M7LRDH{BR>N|^?FP+dN?vLMO85wK7Y{TPq8BABpMr*2}iKYJ1 znXp-$i-$kMB%Yrb0VqFaWGD{loB$tp(y~PCJ160alAS%=oH!mshjsd+w)GL%Rvs9O z4zMmR=A}02>KX&BY^CVeX``BER;PAO>YHjJe)rR{EmYnLjhZAK<_l4k z!_sv?sOEDv^z(2JG>0@az`k5d;U_y)%v!gu88+#BSZWz7!d<1)@|Cb&O#}Nt&AU7; zbnOt>6i6po?_g``o#F$}?VMQZB!cGv9$W_cUjuq3LC9tj z5eFt$=r2y;@i6-9R}hZ06a9rq?57XTK<3XB!R*Vs7B_C~>{v?-?hJ(Y zbxcLaqX#(4176+|azfX9%GgTMfpqli7+R=Dw>0|$H#PF^X5pW|;X?Vy6BP&-H}jK6 zMY|10#4<;=Ki^_#0#_=+J!a}P{uEK3#MjenG&eJH4dtizH#JkwyiS4)#}CXp!Z%YW zBrGY-2-KqM(6zPc5WddXa2u|;jY_X0>7YLbL#2m-I6ek&8)dL_FXAwH^VM=j$?95p zv3o55W4y|vzgaPi@9bE!cduh~ZcsoPaRFVc!n>wZ3NNbZ7ceZ4rH^uk1!xKd{08%q zKy5skOr_w^`;7$DFh{lGD0LtMAFKY%RMRz@+Jk;=x>jt!hjv_seLGL!Xr0Gk;1`Nu zRn5rG*}klRL_3EHnIM2BUq z+S;ZjU^JVJwe{<8Ok|(9i1%6b-{;T*PZ`_PSe8aiR3f{7Vj3%xf8 z-1t^HokSrJiDY!bO9IKzm^LuYC4x!dm{u(c1sK*?2&XnI;JeZJ31(b+*vtg=>F856 z{!?_)i)Z8UXaCqo}EW<(VYoELi=FsUDW;XW&C_V znE@2CSr=w9^&r&33=BZ9$!yc3(ew;ZBAB|mdSKs1)2^eEuxtR;KVWD9-O~}w?0}H) zn)OF82j~H#WXykP7d50XQcu7(;Tr~|vN}JtZ3VpQcp0aPV8i%D1cH|zP6()4i!PEe zRs#=FfQ0~*01o1a7zl99J%A6ys{pkA_W>c8u##v@98Y{tl9E%&>nK(L@+i$zduj>w z94(mErp451q7&&U^i|pp+N&5gjNJbP9MUoRmk{)i!UA1bSMn!Vpr`#ONY=pgUU0Do z^j$PCLIao8z?@)!`(J<!#zr1v9$YShM5i=H}HFe2eiGFD*+go2~j= zowN3_-p(SiyjT-iUv2zsX4-tVwYJ^Ic4Q0LKJ32iXm%1ijh)3F!7gWy!=Q#em%W(% z$j-!0Vt3wN&pyq5r~M6wEQeDVJm;`E$(&)FY>tvMl2gr@#;M~h;jH3p{0lgP!Aq{* z59kH)|1~fRgA)Kga6dRYIS$5Pq~p;ZaPo8-0$|Dy_yIrQ2mFBl8hEV+=OE{L=QnC_ zSqb2#Ybb!p8o175^M+wC^&f|mYT%pvUqNU$RAF#X4Tv5x=m-3OAMgWyzz_HV|9t>K zkQ3aAu7ipF2(ApJ!O2i* zcRLiw30JGp_SfKAP!hJP8;a8bJ#89X8@B+yuE7~lE}>NYD}~@sNVu!PVTeLB z(%?9VK+MqKc*ukpufYkBHgSjsCqi`Ma1BlZ<;5DD4Ec7qLxBtlziV(Rq)Qy9!L=Y~ z;%p60hYX2(G`Kb#O+2T;8IYLF0=>fos)nCxT+L54uI8s2SMyVitNE$M)%;ZBYJRG5 zH9yt3nxAT1%}+J1=BFB0^HYth`KiX${9FyOAR(YY0Eqz>4P`@8NC_#RLhzpfsX$pU zz?4t{wv~Vq^w-n#As#r>2g(J&0_E9ICX@xv6=Fwnu$O~bw8dP|0$L{< z+e*N{3TsscYUY8x63Ri>%K$y2XR<))_i4(+j*9?QUA3iPp9hX5pr>q%0^WD7^wx~$ zkJJQVxC~0ds4moukOerxgS^1@+bdkVuZRLPNFgp(>IkloVHEen`cz;ePyr|a{qg~n zV88=(eoGfmLq8wjWf-qBF}@Ul7Ro@W6qILRRG>WmW2;@|8K8bTCX1Zz_6A~HVqv^3 z1N$OOGHTwcWzcmsOT&u-j0Y%%XdkHjSfJIiUh)Wm27qz*(Oj$Sb*2CtlMLgD3X?L* z#S*M9DcF9$-)xOWQSljB24#+uq#n&i?Pwj zyW2r)=3Qs~H% z$yxoB3WX|5r7G~|^GixfczIoI@j&~0Rat=|Qz1EhBLIykFK$0&5S7n0=Qcz2tui~*-U7aiPbIVxVY)6(n zFCD$Ar>*?1+TSG!t1ipV&qRfxQf5n0A@%AWT;AQ54@Lnu8}y=*=b;i+W`ka3ijw?X zg`{_4zhWSC%{JC-t=&d~xEb_gNzws!U`^72yCPxV z<6s<)DFP(ce0_D##LCFV79+R@(e9=eCIY%jFf#E22qY$MK!SjY=%EJ`CM{V~m<0r_ zQsfJC5e8aFW|HIOvOGn;Okjmrpd~b>;djkeV1rnr=kZLFp7V*>d2-hTl_al#6&u_K zu`<>cxC;=(L*OZJLqvnXp$8)9K13>8|09%XBU@w_jL1c4fPNUT-`zeVfF}HJtB+mjwk_mCZMKB6>Qs^!!QxTSHU_ED5v47 zU>Lfkn5tj$EJJa~_Dyr*$2}h${qS$bU)db_?8&>hREw3VGv}n98Zmf#!-fx?rgvHh z#V?{SJV|{UHVQwqZsrl2psD2-^lFdVOxcPzN@1OQSa|yLz1xrSeTPgxnv)@_mMwoj z%;ggK13B&Ur|mmm1U$CYb~Z_l9BX!OYVGlaJC|+_kDT<_&wO|`r1N&Vuk|3}`YGjo z3YH)JZ2kO>OUJC|u4$7b&Ic_h=T|QzTHkzf9h$jL%g0OLxgsRxLQD!pLDOHgAR;~G zOQYia#XC2fm34f|sOI0&Tqd2kc4o7w)N|pYs*hRa^pq9D+UE?HEq@+z>|<*}Y+D(x zpl`%YM@TWwW;G5E>}XjPOa&YvB9?%qmJEUs!Qg)Q>Vo~$p9}B4y1HI|_!!GPy-9}Y zkR_X7f*4g8u-!gfiVrQIJq`R^{JGV6!vXhJ9V8L0U`>cdB9ZWhehncLf;A37s?7by zAr$0fqosU}A1&l}ixHI~CPbi>JWvHmAyI%~5{YCOCPX0-NLbe~f}7y`mmUY}K(723 zEvpbFO0PYEj?lWA#8bX8NIa?~Q}xDE-L_R%Dk83>ExvN;GhebSDCgqn@bFK2hU_Ta zXPXj1*(aI}?YiA)b2I>V>~O){Pqmpke0o$>^|U6k1C!dRFu(gUcr;->xox7HMS**B(|_hXfOdS!m7f|aU3BtYEyv?lyZ^7HjD!_BrhTJ^ z{B-d59V=UFf*$^J?(^R{nk9|78*f|*o8>erhi7_g_w76F^W)gD zYfoM?iLujpcBbh})LIq7()*qKTjE^9@Hyr|v)0w8A`X|KoQETKKbcI{dCwr!znFZ& z`nbq$;-Xh?GA&%bjC?T3^6`V1WsUpU3CE^=3OPkRJ#_u)4MBvZA6Mqi&b+|A8k(?S z!s&b5P#$N^g#H8K>38uipNH4hBKZ?v4@4Gy8gqVL>qDD)V?LZ^zNTzV$ct|asa+BV zMfA(i<2Yulns@gcsWM{e$Lc1%eg@R4CDl&{ly<@kEMqC-AzdW&>DKG)(47Ze6PK*F zD(xdEIkE7D&$!u*65M7>?TugFFWdy5w2e&s`jL3xXO>o%qiF#gX+m^R!(+nmR{{~i zgIzDr^IgY+#t2It0s-jX1Vooa)dXro7%hR2%6heDs;osSr&Sm*)>I7(OybPDXV3iN zbc>cSci`RSjnd_k|G0Wq>6WcEYK&-Du{NqOxg z`!t&8qP`4shl#!1VHgsM^zG#iJ^v@}5FJOby4Anw3IVTm=S?|~itj7F_NZ-b$(563 zanbNbo@&J4JUX-G7c^2|u0?L=+4BoRwNf9vAq1 z9ZzH4_?ae|{^G(m$^Q8rPY=H;N$oSC*qPGE-ILlyT&W(*LP)mn@!5o9iJ~<=;%f&F-t${XO(AN^+M~g z%g$55t@l1Y8NK<=@>MGLZ87a5*!m8|TE6i$BL;;A8|-Rr-4LC5WJ%E1iZYvu#fC`6 z!yx^kW=9s=+MEu4&q z7gp`(ByQO_+;4JYv7{}(k-2hjbH5k*ijFD5+)bS~;*Qj?k7n#%WI0h^hVye>KXAsj zyEgYWZ#X7xD@`Q++J_grX3mBcr7f)ubwy^EW=&)k+46;}DESS8YwY(nJg+`xbN+X$ z{zn%)3%~gumMbP}jXsinSwk#9ti18(@@kl)0<}gaTa< z3O@A8Xl4DX=S;ZL$JA4ON3^%rXn~k8=0D<+xrl?>Fs*x^2b02t?96<0LIj*LI*}E0 zCALCOAqW$0VohS(eTYON(BW5^dn~zP$R2 zub#mB`Q)D?T>GpD8*)v5@$;4puG?>;od%4Iik|Xn-^)$s;ti*6X{BtN8mo0}?C0Us zhO3!`gD-FFDl%aO?kRPdpLFfnTi!MQppS#Bk#(@alnqsz;~H+~$vfYh(54}I(cVq% zuf?v<(p(y5K^rZiZXbXD)y{tGg&F&<&C5Tz$#~0Qi@r^E$Hr9Nzc1KsbZLnHnXG4f zehE80!DZc5hRyzcQw}^cpFNQI?vPJ~(qvM?=a#*Qe?!Uc_(^-qFFxEbdu^k6&97rt z>c;Bsxpl-s_Gu#T^TB=@M*FHqir1&@7~eM+r{j8a1H*i&zkOHy7z;QuQk{v~qnT*8 zx59T%-=ZjRuXvJ@G=lJA_fFB~d1Q!PC8Bh>t61tOb@Ol+NZr!Ke@KqQ@@4m8iRY@C zjXgYUH|MQ7R)m}T*U9mBowcH%5DNxCTYy>vr3U(ps&g8;aYZ~_5l<`{NP0zs0SK7K z_KF4}|F(m!fbcK6P$Bg0V2wkdZ>$@O36pNqWx>FcXt@apjqL1A!ppLhnmX1oU=C%JywjHBwQDp z_zC~93T~g;Dm96a-rLw|;6p!f?p4J$U;9VHHoX|I{i=He(e?NIOzAH7J-h5a?z{Hj z;B&lftO0TJi6w1Ex$wKeTlP}i+?v+<*nTcC-s)5xnd6hFT=7tnA5=S|W^dUpiwDhh z7E27p0$;0?G&`#Aa%#G@Nb7vZ>AA=I@3<`5Ui-eLfpIHi?yC7833K`t{kCIX-^H~1 z8ASNL``fFhX3Uc-p1f{P7$sj;WHTexb;sI~@6{a!?(de7pQ$ zGG%$y&4Q^{nVd*sa@!Pk2du15LUfj_ffue1;o3Q~-mXD>koN?3)?SiT3_|n6Ehyb+ zNotDmYjiXgo1dBbZrxLOhg)qzn9(nU^w5{IG4}|!oT?3GA6<6#q4OR4nHzM%qqjFF zGoHOWUU~O+J)vFuw*J>6GZq*QCVW{Mw)o@ANcW?^Ro=~+9;iKz1|1zUYHL_?-h;!< zM?Ct<8W8i{2$Y)H80$=Fyyp2vJK<`TGIs9MtG_c#iWcPS!>JYZW0|ZA;g_>Bg>iRo z4jwyqUx%?v%Qm01UGiI-vfP&57}0bxdy}Ne_^QEU1gRpIBUQxouIT}z_N@Am(zl_Y za?*dq)d~?r?OPrHPLkFW6$D@^0m6X0mwF;0#tsDtdiak=tt#A~;{gth2RIN9fX%hM z_@vae;H|xw-&~~|?Y869mK2*MLFP_5j|RmyZzFk{5yE$jJwUg*=8>&^^3!QdXzQv?2Uy9n`kZv)?Zrfysn(O{zBcrW5djd zk1~oM3gaF0`43tsv8P)5ZXI&*AP-;ElJ)v{-fQpFMx!^OJ8pW)*5u3FOIJ2XbzIK{ z&i-)cI$8U-)Up-fjt{i=G%!o{%<+5v`Hu4-UF+y1?$UDQO?~gJ;X^MzeHuJ-+@+D5 zMouulJM%N@?Hf-zhxTiuc>NY5gqEF=rQ5xV()?nrY*1 zf50nWHa20$PdaN&+11D2?7&Z$_CD>!>G-`hb0+TEXQQ%DGvRJI!Qpz^*Ly{XPmS6* zYn=tVX?4aEiS_VX-0($dlWyA&IcF2;AAfLja)2HF#hLQK{NLDj3Wn&!g_g8_fNt$t zgR4rrvd^$}xB0IFA|H4)>O5kH?=soecT~vT{RfogH$Fv35tI zG(?mp6k@?{Xs=)wkHjJ|y@FlP-<$gUi}qDW1U4so**vMiw}NDgH}5-1l+!=BdUVo!9+ zz!Cc7OQLyFrFja(vix53&3rXsMg7ghGBe(}i>gf9g<5rbx1_V?2i1)}TSl+lFHhsS z_E|iBJD0B} z(ic9>`jo|++u#)7l#>!JwWen052~-bJMPWC8LvZ~uYd75z1#hHzWus;>p4$PUwh41 zKc73VKAPdD^@=k2y!8R0$?X^It|tdCZVUIK9ishwXzjXtn=V~3oD>(5;wc=#F&n$_ z4d=sk7jIT}{icDFv+@;9TU7@FiKLaV6W6~gfEk^kwXZe$?X4MOEfj{MLz;^31v<%> z9T*ayKH-3+R6K9Ojmxh;yfSKBz`6Cyig~A>4UzV_Gno9-M1N8V=?rOOk+s1diDdJO ztB1@9dv5eO%-}w|F6Td)_pULu?jm%)F?9F9*Yj3TBg1s(S6H8h9NRZ8ToDjbVK z?9!!6%FAs(h0V2Y`P`3P@pkcty*XPW=iUCjsMPGqV~_b|CXrvyx3aT}?yvv!W$N!* z6_2xh)_*~s5~8NxxKWfRo#}UGQBq9*y%ou}jiq`*oAT#6crdLZET^Mv$^PAWdBcy!7c#ZWV}B7;5jG%Igf%!AMk?q2(}C;zQ^KBS zB@LAaQRkykVk%xh@BJVKP|Ls1Ef%ZJ}=ynesI zUEhKm+bTXCT4%k7tTSp$3OB-Vt4~psb>;B}yuNQ`SuvV7)t+8{Qa#lPeT-?J)u#MMePN7|b!;ly5@s)aEQs3%hR7-Bj%C9Czvg$itR}!^P zT}T{~LCV?QH15mSi#`_|D6O;C%ywxUbuj7bGN0u|d$$NB*F={XMXLiB9GXfy;P7a; zNBz+?AvQ&?2tyrN5AwW*FP43j1Rr)E8}$3k4VS#_&XuKdgIfl9%A1Cr{#s&J=rG$> zbn;n$>&?MAPH#PI+Qvs#t}8cA{*wJ>#W>xwHH8xv8uwqkbR_?h_rw`PsF{sbI4(#} z?0RHG5>(*~L6IJ&n$!QWo3r14@YYKwhae`sRZ^?x0U``K?><8m=zy$IjDT68P#_e$ zfi&@3jl5iQXs4>|%J{8&hAn-Pw&UKzty%5g+(tunCG@3Y+?jq>dMZJ?c+}xnZ?=7% zA4}7{va!^!gDNu9w_GwzzN`KDe#(h4XMejo#k_t*wPN^4(*f~kDpf7~@KZ}}&pJ3* z`&IJx2kDo>F7Wf$Jfb<<-kEWz)vqNqx!6$jAa)1$XGGR=9`Zfo=(qdW)^~m*Ztn_| zmmeB1R<+7<-A}b6c#MvE!%G(iwVnvEz{kW{F?McXts32Ab<(_a%iI0>sukx;TqE9; zB@UXQH}dPL+lJ9L>B}CnCp8*5Tja7}FL82id}QJ4*(df_w*S&uJJ`=+_?CM^`0hUi zWvyNvwB=s=E9I=~xM}}~y|00<@~HBj=j$fiq$$OeQcS}QODUx(q`4o7DNX4O&Hc#D z{mM;1HcJta#TIE6k!6uZ#7Ge-8`-`TSr(D96cLfK6fv^AEQ={pq=-ln^jlL87clDRSA2PnJG^5qHs0`zhaS8lo<1+Xw#uf0L zrdt_ij+vu0GuIpi{=-=qf7U};7=KnU>uvBCv(Pr{QWn}~UCu(=tSeb)o7I`s3I3g| zca)Z`vhnzot!3kqm#t?H1b>%XSDM>!=P1LS>n;X=uls)R!`zs=JM69jAIr&yT}{Wj zx*CP?M>z8D0${{ira`VWCW(tpG`wgR^rTY=k* zZNTlucHjGFVJuLfn{bH@I&T@fcKmC z10OIS06u6w2znR_5OqEXh$kfS7 ze&JcGO9wsjKE*rluzr@qX&9`@w>0FFu6n56%7`SBVdY6DYCURjCSBuEKBrvX*RVNL zMTtZw=+(|tAiN`+im^w?mD5#mlJYpYWe_}D;Kb1L3 zzZUw*!e{rNh*WHnqjLWs?j}{B2B>1#d6*i3r-zJcRO9*n)=Ycf>+N>lce+2M&E44-5!j%ZpPXiM%i4()SD1#-c}yQ&asS|pJ2Rf zW#sz~W8N`FySI3XAzs{?x%D{qM6NMM{aC5cQP2rcJ4p+jMScNvS*b9bHH>36oCm_G z7VfL`um|}-5YCHmsom~9^KwTI@^IyEenzf`M}Utylms<&%Ns%CoVF94d@^XNqq{4g z!CVi|c4#h13oigI0>9*{%%oEl23b&@_C?zS93k`lr!d~pcSCi zpmm^)pe>+npq-%IpnX?!J#x?p^2omiD>H{hS4d|8+cH||XfbI9Pw$WjZPONt{Xs=r|Y?nvldPnuq>J!!Nz_Y*$ z)t5thC_9uF%C9~e>MQb)N93V_Asu`%C8fl2VkL1zH$F;C5*utj)Cis9$WJ6rCQc>J zAkHSvB`zQ?s%{T02`wkCBCZ8)2yMP9*&5mb+!fjjJPK`NeLj%L((dtpuYzj}KhMB}USNRR$c~>P{!waeTDQaG7TR3zKZC9|)YTLH3 z;gaw=)Y%x`V%t2_7~U2d5#DLrAEmpm;)d`(do4nZg5iUZA7SZn;t|$4byc!8e1>B< zPszpV_K1qOBDs-Xkv@^aa8qOe`YnhI?m}5j8Be4{>PG^^5_^3k!@vjaRhHf(VV1^; znXCMU$mpw*t&y?Pdt`#FV`Ng-7{o(h;U$qNtohqGy{qPR{H!hv505lMPg`U@X4Dc{ z3|tmj8PdV8p?p0gnXFAGL>%&{$+P0XVt zzq&ozH|n`oGdhr1Oe`gqvp)7qbVM{tOkT$~L>qzQq7$!5CP$~*a|sV(1N$g$SUQP~-3gK&p zNA}cI*TjJ7no+j3LyagM0}bPAu!CxvYNoN~%uu5sBy(!!)hvXzr>OHPinY;pY0V0o z4RtOvr8*KXk>i3P6ux8-eXuI(Zv0*ZjSg=bH3&-MJ*F_FBEVy*4|t zADXdiq_h@0r8d8|uRTXfuwQBi*7n7jjNMUNTw7XO&bF0Za}=MEQ?Yi0oU64_BKF0@ z8*33|Y8z|E)lP&2k)?KO?PQ$BO|>&v2a%X~@sjc2H-h$m3?*U41m(lYDPMIpqZ71@ZoIczb+E+$UCw zS9HZP@WUmN#;d6Zql(Ak>G-JlnD}_$qw%KrwD`>UocO%>!uV71rSTQ<)$w&*vCoMe zN4n0;_{R8__%=H($9Kke1NX%bI??u6Xlwk4oSboZa{N^M4D_FmUyP3mbx6&GO1Kia zwYw6%5`DU&MxyX)B(>umBc0PPW0TYSuEf6B81X5>pb>6SES{)khQaDQQVeiH=AtPAn6@O{_%y zHTEq<{ImKjHLPbO%8M-%MBD);HW6E;mo7w2_;#W#u?LnPp0K^Q7_F8h4%vKUm>}zy zCR(q?vP$lSN|ugD9K~o)B#sIn?oElz^sGdg#D%W<;4j;emu;hL8x@_>S9o|*CuZ9* zlp`M}b4l9wkcqKn$^0btMbeWT$eMYgGil$uCR68R>YU8cPED2y+B|yMkhE`QGuX<$ zkIg2}8$vQlok{Zc-E0wcE{Y6`b|xj_bS806OHNE;A0?+IXVgx#qfab136D+AO)f|- zN-jw*PxOIgRadM`u1zAEK)yNDnB1D&5zpA)F3%C zQ$tcdUc{kT~W-AR!*Fac8h}>y7kz-UmMSK!>bXe#+Qnv(xD}(QDV2$nACXm z_$VVnWvVH9EH%v@Gi-Y-HIpNsBT+0hFEu{7B(*R#KJ`?rBsCrqx%IWBR)iW;t5fR| zQ^any#iA#Cu%ZaV(u3{$QfwLclCE=J5Z((+w4!DJXZx}AFk;Y-uu>o}Brc~{64!(pf$+ohrgW?Le0n>vjku@lo>|?N#=b}&N*_(1 zh<2viYl~yK>9eeJp$pTO>vY*ob=e}R!wFEAUxx^oK2(QjSU0e)IDGKog7n$Qp6a%` z(#RgjvAgRk>qaCG)J5x(;(>Jy7*DSBD40ADX{l=zx!kYn#?@5RO+=l^GOD_%-LbZ~ zZbsegy16J_Ks^g&w?@vUn#68h2>Buzo7{Elmek>HS+}Zgt@wZ4hEQYO<~rOP>vq)P z-UxngstL0=P76M6-#;@bGbH1)S2j~o zRL4kCJb)S1l3GOv}v7%&Bh6%*!k!J{292S(;gqS)Eyz z*_hdq*%scJ+?v^$*h z?ZEdEZS{TX3+o5e53Vn%57ZB<57vi)@%l{t==!ns6Y3|`PpO{{epY=m`1z0wt8W1= zu3uKavVKiHJO{W*)~mj?e!HO9xgP#gzo&kG{h{QN`lB+o`V(Tk`gZbXyWs`mWx)nL z?E+?#&m-m&`w~5(p<$r-RYP$ozoE1N{?ky|fLPEFZAh}Tfi>YXb*mb1@;6Lun0&3T z%taeRI_gYiO?g_M(lDEdr}fn82HSr|uyhe|3Fe|2mZQg24Qs7555K8Q)lEtYeo|#SZJUG&K^?C3FkOy%v8u; z82PC5CA(l`)5vLZ8uRQ0%IgUVyPV!WvyFwPopt0=$W&6v679qJ?I z-R3CuNq)oo48P&s#lKVizyPM6i2?n#(W@m)66$#rS}BlX1IIp!GER8~wF>V~{aeyVWQ$yjmaQE5=u}0`tRW zvv!C1MRTndF~4DcL;JY-O>>j>sQGW^cCE?WVYX>g&F`7JwZAj>m_O7$ZT`qSu6@q@ ziP^4w$vka#X#dN+WSQDZ>t-uoTW9sQZr3(i1=av-1dfo7VICO;)S5RnND!Tif;9tXX#n zPv{BPF4qNphr8K5U;l;DR7S}!#pJ3&`CC#|BWN6GB51PeTe?nC)i^u1lXpVZ z#L}lERZX^YI|T+*O_f~!k_f*TqLlkS_kButm%&F2Nb|L0+A;X*o7$W3)l=GA@YOTg z8D(m})P4zHy`)`&uU^sKQCYf6cPY1ivwpM6ksr>6f8MU&u5Qu`^gC3Z-e2#pdg^!S zcd1+Sq54qOOTSO|st@QDdWGt(SLs#iHdz_fr^jb{d`8`l`5jV6Im=%17qZ}LH7>Y1 zxGuOcxFxtPxHGsrxG#7xcqDi{cq(`X{Q2O;;Q1<5<*Lf9>Q&XJs<3JRq-UxISCv!+ zs)khstHM=re9Baft{Pi4p=wgql&a}Sv#Of$c|KB0)#9pUz?G>D|(L zwzb>0RBb|UqOrAVJ96o}4fXe+-hR|LR3$UgvasV-0oLvg?7PpYud2S-X{*$Su+yGZ z73w+luj+pFJW{3Fp?0bVRU1-J?MA9nud3J72<*c*R0KQmO%+w=)deMgt?5v4&Cm>$ z&@9bTNm?^y{n~n4rL9ZWB~@>|W4)squuO&;>9Smw`migZ2UUVbFxP@nJAy-kzW)}g z_#O0K^@jY8^7pF$ZtdUC7>2xy{M~fFAHCP>{6psX`_AdT#_^|0f9N>>E873RvekcA zzwhOL*U$gGeDe>P*YN+uy#H8sc`v=cYp=bVpS<6_`@7D$cU!05XCA++-tW`j|7}k1 zRquD5p}+4O-fz4APWoev;}6yOZu{x|_Ids1;lD?n|Lk1+d4?UyXV}q51!}T-9J0Sb>c^*Ak9rl) zvlZ$n(lB*i{YpKcevR~?en$Vf8V)bItb)b|jRG|SK6AIKhW1fvOg9U_bG=F5Am0!n z;XslP>6Uyb8+3P@EoWH;XgK6u)0<)r7LoA@x+BN4iBlfz(TV0qFzki%9wENwpAdTGW@+ZR#oYw7OmW zhuR5y>_h5@xnF=iG?(deUz?wIlHTE+bUq1T1F8TUmTy0EyB>6nv{m?68ye|0$qAxqOJ87{bU$L*$SMI9>jqpX0Cw&dx zmA<664Sb_-oNpqm5k_e-v=^gfGFn`pvO(oen(CY3n@xScndU;fohsQ&_p|`C$k!k} zOW#hK!J2mMB^b-~so1w1W1Elph9&t{`PTY2peJ2M`>tcO(_|;vpEh&dx~#v|x0QC? z3XAOkwzJMw&LAnJ=yw-odwr8pDtZq14*QNFo%Egd&hnk}uJLt1L#N-wN`e=wc~|<~ z(7Vas)8E@)fHm0f?~l)iuu_ZtgZx9harn~-zt3OcAMUT_c%*baJ!PdohBQcOcw78w z|0sAvw=vTb!~?L({xQ%u-v6k-$M{Ac{<{TF@Z0Tpn;zb1;e2XZl{N*Q&amw#TMPoU88 z{H@|^fdQC@V}-zAq>@0u-eGnd|2*u8ap+Sl8OChI^Ta>8TLE?vuc0^7&rmj#UMZh8 zV5Qash9Lz5;XoXHW{^hvIs;<^69SV0Qv%bGW(Ar9^8+m?T`Y5z(|~iH7+B_Q3#<&R zLCJcY4~?+n>A)uJmKlN8!1h2J#+HW~nZTaFexyUbanN}bwNC`vVZ**SGui`ZeR=-f zfeV4lWqMh5V6nHgEUzrTtgo-K%!58V{96K-;io6c2Ktg^eani=O8stMxp#4y%wb|# zC1%!EHli$AmZay;@HLh-V7=QgN}OEy)L1sIY+~7D-(G(~*;Htsf&Scq3E*ZU%?(WO z<@t&OC(0K1-DQhlrOL7;C~bzHRQg7suS&6T*U3}1ylhq3TJL(Xf^S^ehO*6NTg!Ho z?E>vBI{?}N>EW_tWhedH%69k*`MY8Y$EOf^38j1`PXqJO-d`7rt(XsAWbisesB2RaPgqxK_#mb8?#P_`Ip8PZB5;n$R`5&6*JLx{V&tXB%k_@Il_kYD=my=O8+3zi&<(ml zH|Pf4p#K>p-}m!h#oze8|IhMy(kwLP|B9EZ?@-@RDk~q<7vymWPjXqMpmIW|oO;Whd=+S|qqi&Ha4pYvgL;B` zg9>c@@>$BWhuGuJ_JJxu!$H;E%G01xc0bu;K#zi&x|PoZE$o&*1zHMP0b1QHUw7T- zji4>>lCOJ@@@=4<*K&?$ALtV8N19q)H4JJ9b;zw`YrdX&fI$@TOi_37E$Q|KAs8SE(m1w6y> zIf&0;q_`(T8tobDnb5PhyTUWcGsQDKSNF`4+|%rt?`c8Z#hzuJm7X=8^~g7&eJg0Y zr_Hm+v)^+F{87&dPrK)==K^XAT`tm#vWxPH@{9Twd5Q*tEB3S$l@^s3RZ4l$h@xmw zvZ#U7*t0ijT+zg$$wgC(W)#gXnp?D>Xi?FUqUA!;ll#2;e9i!b}u z8{Avm6}ft@UbOkTv=wOw(k?6GSyQyvGeM~5n4$wkhl`H6E1(}O-3NQj^&Gu5+g(w# z7AJb&cYP1No42Vdv%tJVJ#5}>R;!R1HZ$sDW`j9WP07}??^ILymi`jH zrT7-x-Nsy?Rt2vmXD!*r{DW-s$rRb=)xKdA0BLuN?bW!9VZ zYLGd~9Hs8&Z;uWBqv_T3zSe4BkmwaO{@wf(`4-|L;`Dg6}*D%r>V9 zTK^;x^D{(S&(o|kThMhA<)bN=noqOl)7G<;e2KW8b?zj7h;26!zd^}ISm)c6u+=+v z6TeRUN8*0s_lc#%FB6|5Qor>t)K*2_Bn}l+Jz0k{m0!FAevYO2l#p~UNYof2BZwU#_>>w1;^tHNgul=^}ggLwG!oNp~PKhOC-PR(~x z^BdIsxa%#kiR&MP?-U#6u+ANy8C9RC((#kmR=#h~G9JIBG9XjmsTiC7U zd_kj?+AcfzI&m%WRiccif;Bl>;}ypO`SRP4f_g~irH5ojW+_KoYFnmL-k%yu$=^8m zI&m#g#@1y|Y5TY!T8aOEowgcFTYa6L&|!TLk`GePT^z}qc3ZKM*^}}#$9a+bFb7{J zuBBXNbWv8s{Iu)SzzM8VP8>w*{X4zaYn=tZ)|xJM6R$Roirlzj9RZ$You{P^RudBK zXPkSr-S*01)afB|{m0bvHQ~*>sO=K@N@{zXc$|2I_)9@VX5pU|)Si<)YkW`InxA0l zRAPvjV9j4SV}4D>FK8Nqki1O(W%6stuc5Uck(w^~ZO#v~?c;3q7i{&H)N{t+ej`H9$vl6?A%H%oqDG(wwrtzU9)ex7y{ zogZK?-*noF&0m&YjDHdR#uK*OTp{%Z%}VPnu{I_1aifu#B^sWiwsK0mS-nKFAS@ui zdpb`>Vpv?kU&~5iy~uMlITHOD*WbwQqqW`C7Nh1<#LEtETVNx3v2%+v^1HyhtUJUr zxl;ROj%Jj43fT4v@mb=FPOG1SUnQuwT0cY0u%PxF?f(eJ{5|UlTI@BB^JSvFvh^b8 z$~LpFW|rQ;nvW0@4vPF{_WQDfSREtZx(N9Qv8R^m{5S3|#*VBX@H9(^#|omaU-A5r z6XSH&*W}!D}YAk(% z(dr5IK8KM=@J^25-)#+@OXOr@3@Uddifs|K&A+!8yRFa3nMPb={k@#|te?Xe*XY7- zGXIGuWFyDc$Q578G4q5pA7s4+P~5?@E}G!(?(Vh_+=9CYcNTYd2?W;=+}+*X-5nNp zcXzn_&wJ;c`|hpUt?BOHboW;6Z12?6H~sCktc=Vq?^u{3x@mr&s>Cg43tJJdp2S;B z8R6N@kM<2SdoshhX96U?Q!31So7(f;yCg^2U-nugifQ=vZ{&fCacyz@xP4)K&ms$@ zr7_-WUd6iD=O5ryB05vu{q~5aH_Ola$9tCjjXO8 z`KlKW+J-|$%Rjv zao36RG9CJ{w1BYpI@uFTj_Y4A%P|?W?XJ94$7NZc5ed^QZL5D|>uBfBJU&-bB6v#CUj7IKi4aI8B zXsN3gz-uGt86n!V8z~o~SSMy&`@zbl@{8ynvLKXRHaw?bJS5A z^mtXbH^8Y}yZE9|`vF(+5f18Bd9etK(U{{kaeWj)#|HtAH8Y=MH=VN#WnGtf`(YkHgE=;<78 zdTKh@oMe7zrYR`jF8ZeaqKfn!)xRVf^Wif1ScH4M9yy$9At*SC$8S{C7WQPcp>Srn ziE^}x-J*wmUKivzDEhAIDIew%bCpp`zBBKK-M}yJTb%dI&FwcH%a=vTgOHcG6#Agm zF5cd+=EK!S=os*Q+inm)eJm-Axi&g0HEWybc*No2FK=Drp9kDt!fPD68K$;7xW{bv zq*n(f{&TVl(v_2Rhw#l#`8SSzaO)Nqw6~D3UD}oe&ebHF%?oeHH-OpYRm)}gTkD(P zErR!-qMo?)^&;~oYO&l7p^^3eKAni6s`ftZ3G{kd_L+x3X6e9O!L>>4Yv0Lh0qJ4n-lf+wVPZR?;{7vMfW~Z>tOm&Gv0BRaf#p zH4kPMt0ML1Ty4MJ=FS+YlRcPi98*&-Ydv21>N~q}u4y+ow_r_U-}%K4<^B@v@!yu! zd$1qgtG=w7wd|O24P`mS7{~fWuwQ2HfybZa=V!i2)dW8`#qXgGrRM!^g6+*@bhJ(W zev?Jv$NM}LVE1GH;MlNB&}5Z0ZZqU)6YIVIm8L5SC(aIh9BKz8({X~Jk2!R6M@M-6 zfzV7{fV%B>K~eEGX)amn+Ab$|lSxhEBxcJnf11R2&Ysu~<)O3g5a22;k^bhBNyCJa zHi=gRpZrE*c^C6yKkFLda>6GTO+P5(aHjG0NbJhVhrz}Roe}Hb3rOD)%D3;~mOg3k z2ghgZX71H0#jTguRaX{tQc^{efrLL+^tCJ+m9wh8{VHk>^FnSGz$+mwsa!REoP-554@kcAyW<2gd-8 z)$iF3V#k_G3DW4?sD zxSGyK(9G@_FHRJ-AQR*TF(^&qhr|bt(bcbCMz$AAPx4+=b9>b}T6xR}#AVkh(IPjg z>Yt1CE9+g)9TkT46Kh9T9GzJ&UK^Yu3ZC#ruxdfZiMk6_{p3_WD=vqzsn#&7lEeIP z=jE}6n_Ailnn$@SVI}TG6A;yrKDd~iEf3nmFN;-hEn+1jgWBXDW?OJu>fP(0Hk9VNi((tXojG!s?^b`EBE-P&Azmx|^WT1R zE7Mi#pIWD%p3qs#<1fX<7FF!)oFUuPa2H^;HR0_?N@SXE8Rh4L#gI?64yS<7&fL%w zp?Mz{BN)Hh(+=f4gbzQ*oyYzl@WQ}};-h99t#Pz{BfJq23e*R~N^c-HGS1M^|792N zgg;q?Uyb#3gJml1H(_k%fqPv#vdG&HXR1yKFDs=Ry*)k$SDeX-!MKy)9hx>Ir&iV(#cQhc%ZdcfvY>kDx3>M!8)4Zw}wr9vmv`79_=MW^Re|t&IPJ| z{hmweH#qaJ~PY%-RNAa zu`aHBjg21j81dCahbrLd5%EI~YCy7kWrj0GtVH0Y!cw!^P%oX4;bAfS`8sr@NTv_h zVVtC9*lc2s?!wl|kDK&p5Qo+ZqMNlpvKnOw|n%RRvWr{bFZ zG-b1pM$GS)n@9-LUAzvHys@9!cfha-+6HZ?8*2rrm=K#wpfNw{HVS2Wpzd>QO)wTF zsQ~tI!w)D2uPUDa1xskVSvSUj@O9FU;P+&}V?(Dp>Mv%?eJL*`Ny_MBW8+oJu zQAp|DzHkTil&|@tR{R4}klhF* z+o^r#4yjzYi8zV?#Mc+nnDFEXTVeHc;-BYTL-1KnIe`Q5dY1=tI z_v>?5kGLcFuKSaD?>NS}JT`a(+K6b!?+;h!^Nmt!Oj)EK`>ey|!ERgrv{wJQssXp@ z%$Tljhrbk7M_uR?cg$t)+qQXz$%o~g zR%VP{#2eMm`Z1BV_F?bt4P45A&#^Tv2+!?tsYs!5vPlMro!7Zm+-^e29?mo~7H0l0 zYLbV^b~rvrZ|qRKsV&}Ewki;2B#%brTZl*P=XuPJQd6Nk%Q26rQsH7L%AZNasO)*> zmSQ%=ICt?M^g7wjo=D1R2=PVk>&PC>mxd;Bf`?jF#3h5qMwgy8@Uml6$$u&)5 zqzW}GV{ApDN+!y$>g4yN5;VC3I(@$_$g)G2(YKUZqG@zJJ zC}UVi=eS#v3XrGW9HF)TW6IlbX4I8fnuEEPK`#zBoh2M5&ZSbe<6+N!kXNFIwL_3e z_@|{TGMY3kL>=1uf>?=P*Iw8enIf4l9AuY%p91TxeWaT&P@_Tvq=eOquKj>{0D$?IEk7 znhljro<-|$o4+V;r9o`G{$BnK-Q?ZCZgf>ng>n{Ze`4@1h!4!CKVk~1sxD`Hklim| ziQrfeL0T+JV^9a+ZD3wtGXCI@x9x`Aj$7_qzqYIm8Vov{V45+S!8E|vzO6BUmRF(H zzOTWq!LK3nF=%WNLJ~pye&d6D!+1*AqTFce=7}H$M*zqF76$<@Qqt{gfW-oxjLcA) z%27)3CvD>|q zmUy)po)y{EW6TwqhC3p{qPROupQ-RZf+5=UNgM8@h-T;{`xMo!|R6y zjzp0U6;X%@6dg2IPY0>-tM?ut>S$m{*=@`PQ8Ja=Z@XiwwKjD`=1rM2-eL}}Ow^+K zh(AnPS5)>q#pEgeN-Jjg^!g0>{l@cpYH~5ETL44zB229&M9p!drvJySsyI}dk@kW- zlK$G%y20)IO=XfzLV$Xtyi(NE(aAhWqo+vOi{vm4Wg+a>FSU3A@`eN0M*b%1zyID_ zNkkoGaEIR*6%QW+YpC(OPV}3pO9`dN#4XO585rrHy?6{1B+dYi8xE{mb0!B>1syIz z==e|%C2r@RBw1-IEZOjm98TP6ZRr+FX7EXK*q@6U2PvAfD_|#h`S3Cs#(pyR(jN%0 zPWw*uUv9WgtN*y_DZVp)VW8CDQ<$bxRh268SZ!c?E3hY#4`ukZKv8iYTtyh^R1zz= zRUHqiW3JE>Nj6OD^s5=HaM+_nueNDz=JYmsZ5Ev8vHT$>+LD)FP_7vHk%wA`tf*yJvFS>)sW7kPH|%|E>8oldFz06L+d_9&805%O1& zXEBkpnr=ZQ8aln&z<8->LbdYiyuhMm*UFUmLaElnJgG~l$VoNNcqdP8w0-u5%B4d* zPieHH2BFGObEdCq*XWFA(#8Pb#pRu6Y02x3D0;!}h_l(>SEg$K@qYEiCVCF~?tAnS z^u3V$^yP^gw^G((5x10IzTl4d3;H`O=VI3J6qlx7PS+O3JN7%aZEn}hrhVy$yx)*q zMduIBy3yG+ovZ~j?ulXICKgik9cT39-`c4-=zX|Ui~WxDbJIWe77Jr7`ctTuDV!25 zEK?ov<4k9pE2KD&*Cy*(E4WsTQ)O-KR#F?&fwgiRZYkzp6 z4|RYGUbRq!!sRwAC^8oh6$r_q^esWu2r)b&;z1!kuaF3Y9{LIy{T+V)vqzFL7BEf{ z2OZc^Jp|t83pzaQRobQ@!l=Zq9Qre`1KoEy4S52BI|1RHfS|s*RL|FlC=Na3e31|m z@_tzji-AjjcwB$X*8sDlb%{?vHHt&F->Q~S^q7%OK-Y>x^1pdp$>wX~vZL2wPC!vP z(HRIXXBfpmPJbR(y7`(kVZt`tuR(;67x4td_{GVA3+qX+P^`skqJAjyEMr6a>#=2fLRCq`r)*`?O~DUw6IU(`5$#QTP?ejw#X1I+8wJ4)h<3ED#<=v!dN zyFZM~+bPP!H8d&1eTbw6*|Tp*b(0{5VVEC(ufPXOV8Mqy?5kuaKI+4Rov_AE%umn| zaai>{b5ixMn<#P6YSqs#qtKTzqI&+4`F3feQzy)16EVQ&O(~$DD(LBKM=m*rU0cv{zwX=ZQQ9$h=pmq_Uyo=*854cGN?%A?95;N=@ zvedq)v(`=mYBvD2J%HL7z)c{~I1ac+J2Y%MG&~m_%NQM75gltrNWPk0@wNe|TL;{x z0}nig#^Iw=2%}?@2*{av#q}H_+fQ(ok8wN*BKcs&GyMzNpXhNtCIC0BK;tH$aSYJ7 z3b-dXG^`yR>k}Q@L`eQhNZt?~`zJb9Bsx}*OB{(;Jeo^UNNhxIAY#Ln5V=6r1Sx#p-Wq8-i4ymLWX5#tBh({+k$tS zW3G7FjAJTr!8nDzxy`95SYm#*Q8YR|W@>zH@=(%^o7EbZPwwHbRt1OAfx|_pWJlQo zUW3{~vBU!K#GygHLLJ{%xeCqexR7`*=G4WrVF(aVIeLP+BgQ%*(Bx$o_?Q_i6X z^9$oULRYL-@x?1Q(mel(Uc)!L8r~v-dc{dE)OBVX@1NFOe-cL$#bTzVEk(*K5F4OR z3Fk4`gLq0xEIIZ|ix#j>avMu*oKxaeB}Xg^vC!>} zr>BZ%)2g6yNlQu#!)~W&Zc(R$>1F6cG| zhm@DLD-t37bwuc!FW)nA6$aXT=|dpbexo-7iX8FG?6J(8n6Mo=FGeWWVxVsnJHv*; z1#36VE8TO%+T+#j5$(1n+B5XGJ9Ne*o^U>;#N9ALAWv&_(QTM&oG1T%OvIwW@2UKt zh8?V~PdcsBpdNCpH2_a+Q?nhGgV^O#Wywr~qq1kCrI!2Hmrv4m#P30{r3Zzt!ZsdQ z_sH5ZyEt$0b&B>zr%wf;x$aDQmk=zA;eqr%oc6l@c>BtKoVz{k{8_+JCDkaPlL{*9 zbHqeuhGC9>J;YF$?6YtuE4#^L^MS}a3d+p@v%o?WGAUiQ+u!d0jo+F-h5SEw}IAhN4^3=XvWI<#z& z_JD5J`Z|q#>3@y?RV-ATW8h6AGEQQ)MF;6NNKvMizj<;%us$d@l8oJa;e4j{^|5o@}r!rzS?d$$C-Iq zrZ7>jF-Z=0V}E0Rew6+pyrjYB52A_Z*KaW1WYa*zrhUW_dwH?5A$Y}OuH7QuW>ZPz zyaq28_%#x?-*0%Hn14wbXS|j)OPzVaH|-f4@}o_5MLCljX&Y(3489&Fxf6R4tB|U( zGBteJbFbd6yid?H{g%oM?*DAXx=ibdn?dUUtz}>tO_H4MgSel$YgWJ)m;9l@kP(RD z$GQ%@KYm=RDjD?<_Sf=AUFuWwzh~dA6CGE9Ke+;C@f)FWyy>fV{1Z@;uX;p(tBL0% z7}nPkY2h*!AQUtbBs#<~kyC1kOZwpAA{vYKk4Q?Qj7_0L*fAz%QZKHjw1qY>q0rkf zCu-6)HmX^lN!3)C_2)i>V=j1Rg|7F9T>5_snlV0dTPbm=BU%Bf<8ad52+k-_JF)^d z?7m)+8w%oGMcJEa*$OH%MRGGm6VpW#Ges6hb6Dm!JX))06%#+Lv~v`+^Ay--iZqYr zIA+25Y4zJ(DCLqgw+UJ%r$Vr`>4@wx7ecAMgiS-WO`@Ac+Lx^yKVI`8Loy_INpA;K zSb=ZporRYr(FsotM_KM1?$8&b>jjO~ZxU}95lKeZiVPoN1H3g)R9I8}J4qVtXb}oV z*OK1;f<}#IYib{>+pv0A%>6t58tuqgw|qM)8hmi8n?c-my*n}*d|@|Gp~OlXTk!3n zH?|F3;k9-V)t4?Pmpu=N@DELLJDi}-IUzo%h$e$;3KNDR7;BJ(7f1r>ED!uFkNhkT{fr#@A@;B{2tTj%tQgCWeW2i1=;w#ms4rfQ^t?cW z{zB=0_WrpWDiRBT_tK35{jVK9{#WsD6v#$Cal5y8kNpt!E9{!~o7p_NyxY9P^;yJ0 zXY~&Q)D-4e82^(~`Pz!h(Z^Y=q*})TdGKB&+7PeBnJb;G}r&=q6%e{GEWNCYGi{{0!R#(wBi+V=Fu)t^+iXKqsTyN>pSS(Ik*c^RR`IghWJ7#I$E>m0u&H6^{Al`lJg zXzU1K=TP@FTx7N`s~rZ7J0ataD;UtS<>||6&Yb1v3h(>4D;NE8{V=T1WN;G0S+alF zl=|7gZ(8*sxRUvHjAi|F{lGl&aIUi{fN7k?6u!m|5;Vt4h{@s_9Kcr>+gZ2a<2?@_ zg+;HMt4!DA7T`TU8J%e|E=q__=DGd?trLg-;gLp!GqE*xa&|N^wD~X5&d3rG4gmPU zNy0+%U$Qm{J3Gt&_x>O0Ki2=z{vYXo*Z)WUzx;Jc{(q^s{yP;PAG5fHwX=yMv$(aP zvx%q)(9YO|S=PkX%-NiTjr|7~>;J0&NVwS9IamY)5aIs&qr0b{rNbKhT3C3R^09m1 zXB!!(h%+|5`3Gx=MnyqEYMoLJp;-$=YYeO&LxWJa9GD`*QNvV3Nvc|rUO1tEfl~S@ zILUsY;J|svK^7!26gX6*{9Oxca&I@j8`}{@C`|>2=xmxaQQv3EusU1dACC zCUHRy=W|)|@%`KCpKnqULrDqh^B3ujs9WD5e{)%TyQr?Wj(ympcmE;(hGmDRzodx9 zwciYx?2KVhY~tu~?p_TJ&hZS%u%M`lJ)GkB%mM$+3$2gQMn-FO)wfRa+m#E@1Gz!B zeO4tg%LnjtIpf=C!R^W+^Yz*>Z=^{D#I)8yQ>8gHwWLOt^|)P1*Hf*>fY;PG7#A}Z z30t#)?P(F$RJqh(THb=m&yGQKijm5 zm6~~VGjcbZ|CEgPjSuiOkqsNA8$~Xo5xAK~o+)&?4<|UhdB5$;m~nLd$gBUfKw4$W zW)MF9K(c6Sk@F65_N^TAeYlJAVClR~+a)i8%E#v?X}>34*xu}+J*WvMZbkYp0HI$; zUybgk$dciIz=V(INJAwTA(effRne2UvDlw3`!y|}rM3Sj-VopU^O4|UUj{1hR*@?E z2SVthkI079CJHBJLAieOxxVJ+CpO{9&({T(_FIvKg04PK@7b=Z|1HWR@p*qS#q4GB z7Dn(ibvrcNR+r!y6!Hth5t}faZ18h63|1d&GY*qUUl5p~g0jVIVE%|ygH)8%+&@>! z^^2Ap8B(;^G1dTVr}|#u@fr-K$b+L^g57;NiY0vBUqgsKtLIa(D!SzLNAKMr|i6VKwcdpm??!RZ*CtfCBSU$m)`&$_> zmjAH=v=eb`xuB0?HOtG_U^e!+Hlx}KfBMZ{niBEm?asyxL4+;l^ zUs_jEPfBUysO9+Vg!iAqZ)#`%?HJ57*8s(1=m<3bW?|zbCUEyBCDIi%XVd_&>XfSH zEa&M8gfVc86J;i2;Ol1wFJw%km z&5!Ttj357^L;}{b^6&qnc5auwajrW^6~D7%h)dv4rykQejZ)GPhlo359K(|D{j2C39=V^j zq%6H%26}0w1p~8_M~9pvLHwvm(60j02AxDR(kiy{M`u%_3(&NU|1zU*AAyDZ>D}O8 zPRJZ(Zk$Qv3KM@{49cZPh~I?2rFsqgg68E;2r zfQlI$)zp2bExnC?0ru^8Jwb!@>R^ySRnn4)j~j|9kfgq~Ft(Q{`TQinpnDSoP3tM6 zufuHz?ui!6dI(1$R3sRd!8sfL7L{fxVt+sL%CIaooHQhx3sj)am|H)Y>GYqNh<_yu)$$7Hu7; zX@NrzJlze&YS@J(3ie5xoem03VO+Z}D)k%meH~Ea_YyDk+r+ZDADQAklpNO^y)X)p zdinnV?tKC4l+YeAiILYoSh4S@`iewV>^Id%dwwUYX^q&0@mX0EjIHibJ~FC0ik;@m z!dtu~nDm5p^2|7W*e=kpS1AS;dE07s!TA;_n$!o(%yzb9f-Q;N^(2e*wbj)=CME}y zWA8#Rh)3^q9Nh(GuguX*B&1T*=NPz<+u<)YtaYKE=7+IQL^;|?ROElqleP-hUh2(b zqe;(vqkpU15J>3T-M81TcI-Zf?P%gB#IrEcg=W?a$ATdu5C9n^*a@G$#4e(8G11#w z#?fg}#tf*;%&ZV<^0(!&;hdQ>+i9|dCy~k}&yg;$M>~wDmf5uTx^Vl&V^YGiKiz7K-^qRR zt=K|HLI#~!(_qNAUl7Y^mJ0@UwppI47=bk7M-xSUlC--wY7BF(J7$b}o+=Bk$o>^< zNuF4*6kJ6Ks!rsY;+~jiD|zgT_~jSQoQip6**X4T@b6*_dKc?CpDbsl47_*@a)qtc z{{8P@@l5p$j+_cuo5rS}vaF=}18DU#Xe?qu=^?9Rrj~u3JxKAlRf9jxT~vxkrifRX ztEC!EWMJqDltAad3TKCiG0ab@rMYjq`yH}nw8wG=T7`7U2bx(zk*&xmOAs|~IbiJ? z8r&6E;mpfKc#Fb*9p)yta1qmaJDJS1)yO5rkCH4NSLIymW0tU zhb)StZ`Co9@-GfADJ0Aw6ce_y`!7L$E*hzN%u09w*w+>5b;IZ2s_J$A;n{^mTA*t= z92C8IM#hRim@Zzy3KKK#(hQV(j3E}g0V+Cq9h00>nzF`5Y%Gg35^m1sk(=rMcLjh4RPweUe%(aCg>6pRn9ep+CiPhm!}fRc-%CM0TyRKZ zORIZj&{bthf#%H}%xlryI1o>&FoqwmIYQ0I&oX-zEE3I-W8m$qhSb5X!#RY>BJ^va z+i5J;gY6c3hRs3xK^4i5P~-UhR=jYTKd(}wW&_CJ zmRu%iZI7@jFX^J2BbZ6|Y!A)Tyldf2FagV(Mjyp(>){ zU&WT~pE5kdz0o-vjNUt{EdGHM>kel)pG~EIfR6}*Z*^SjvE5$b#}V-Uso~!e(0jw* z2T#Z1>TxwIYBvy64(gCaXikC~xCqjO$2%~auu4v2FUn!GauuEAwBx@2Pz)yA$lPi z9?X3pk1%6Su%cWsnJlR+l`NSoMY=@t&ti&5>R%|MZD=9nqc8=5Xu<$eNgA?9@<_@^ z(nzXEG7L0vG&q>&K)zlWVJ6ZLmh``JFet$;EjK8dfwBQvEb)JZaD*kPpI|Hke+7t2 zlDfkZ1*H9zl_aA;(+PwS=IfENM>z}R5@zTTuSPS1!3sz*Aj5%$3dqbRH;2UwNX;gr zfrSf@M1hI?#;OsBLUo5SiPlMxPWl#QpL(U|oDef)(1iy{mOWQjXD2OhP zCP4NUc15}k-b)O7iM~PJF5@0@#j(xUn-<6kQ-!Wa-Y)$FzWo@e4#SM@E8!k?MYi1( zhz&yx(?!as;2wR&u#Ma6&|3wofbJ{i9)3l!4ciOo-3`nO9DLK z<@)I!TAej5=l-`kraI~Bn;~*e#IzI)>IL=M5~hCm6XmvEFMnVgY!}HH)tcB7_qJIe zFRUAiuk@2$?|I-VOcR04N>&_p;nTWQ|U-O0k+|KV_?c)lF*sRb)=gE z?E%|lz0$pKy+MKdfv&x*y~@34y+*x|fnvH3rv1XbV7=bGe7!oo1j5?tI`~E$ISJFV znCdbASs}4(FmteTFcX1P!sOfDy}yLPb28z|PQwR+5*N@cVHR{7e~4llN+&JEB+Dj? zCQEX~u=p!V;V2Xzh4<_SC7}QJQlYuL)<88RFk~=9F#KVNk@J`3pW@G9>OE9(6h~O@ z!0kY5;omHgf5nC=o?t~_aRW&ModaV2ic3;T24VyxZIL#^js#K&ll6#6lJujeLqPY) zhm!ZBSwqm11{;tj!%_vr8ju`4`?jVX!)OHrXHyl!NCyOvph19<|6~c+LlZw1;*PkX z9L0_A%1IZ0rXR(o$;$H%zM&n}R}>K94!WuFNPeB7p`Q6f9nDe{5Pkl(A47AQ*B)}i zG|Ee3m)9P2Lp6#|QcY&?)ACarW$LWxL(JK%;!`s%#Toj=eKWgj&@L@)Ee`29l~L6H{peWduT z+&9kAbDE}{wQxuHQKmk!#|auni z&F-f#85?ygIwy5F>5^g6G$A)Z`tImxrF|P1Q_q=W8%LY1)B2^dUP`x~!V{Me3{~?Q zW)DOeAu|t9j3twpW@oPMYUkgTQPfbxq|SHw#T{!mI9rMC>(*Uh2pLX@N4I57qqe#> zCXe?tkyUL|fFR4@iDdDWMlGXI1WY0$R!{$>mgCJ{n^a#nsw_0MQ9Amvmeg4!N#8*enw3^ z%DjO5l1wb4Q(+62H@tESq>{yL*bWaZAT=;8l_y0}i#UASE^+Uxz~hsa7#q=)RMyC} zzdz*vD-6q7SR+Ol{eXD{r?SwhxyenTe`7=S_1(ul&>!L7Q_~CdMfme6*k4233ifG> zW)5 zsfT$BrG3QOnnHVcu~!QOmvP0#2Wck~I8ZCSNcO2u@|myF+Kq(r%+gNNPW3P6P3CF+ z$~l2NSGhCaTb)ntG2W^hOAqz=+gv-rh$0_;CeinWc1iKQctsdM2PKV~f5s3kHtiBk zTRe5Y)Hc@##C^qPp4ONF6+>XzYp2xtZ^UX;rh#O77F*16p?4kR)xAq4yI@|!xHq{| z4p301O*OhNQk#;WTVKbOOf&L>5a-}}w8LYs0^r`Z8QRt0iv|;aa3jc2{sZx#?+S(w zPJ(3$Jo!`z#5_b8*!M+4VUOXp1$*{Kzh~~siVJC~yDs_Cu-)s->iThO?&1&1gBJE# z&&|Uqr7JL{LxCrhf30iJ$>a?y^UvWn#EqCctS>ZkAN`fr6L>pZRUiHp)k=5Um4G*j zoCvLN%;QG?)8iI77p&h!88q`T=lkrnvga6={{tlpA{!h1%NqkSWm1%>7{jpiGFW{( zJxHq&Nv(U|p7NpBvqeFzmvZ=i8fmPCdeP|9I#}C;&~`wt1@r!cM8V*5zn@>cd82ubPdI#GEADhFP#XavIG+$FhdN{O!#rLu zRuw+qRl0)lwqLeZxPlm|*E@n3{g4|Tl?B%wGujs~KX@HGAGc$QcagUhl%jki3%tSn z{1u?@Myz}~ZXR#1b+UR6uOQzLC%ZLaz9?P|qzjGk5JFN=I$qVe7>k}*Q^gI8?05=I zcnZye%?Ue6`6m(hLUQ#j(bGN;&^!e)?o>CByg58bS|%}M zOk|&0qL^UkOJlFW9yX9GgLY4{!Ia=MWS6@I_?O|?!5BE-mk7YZ2q6S!FjiQ>_QKwlL;BiHT$u`EmEvmIhR_ZB)mIE=8f%mxA4g9m7928NUW)ba>p>bKy zC|`pdP@H!fu$qj!X*k5SGTfqoCtzeifMB3LM&IrJX=m@w0n{6`){7lTg^*(WBKnO) zzY#}3r?`;A+B?F~-w1U8L#Au(Gn+gBG_X8BXO>LCUluI9HP3k2ytKDp`&zAq7S;+k zefHwxtMqZM@Zmm`$8t<5w(&Felsjj5s2{&L#*!V3#RREK)XTx@xGWyyXQCvfSZ-n^ zvs4_uJVjEB9py}tN&yo4e;K^_aVW0^uG9*1ZgR2z_yV65r zjwRP5c{G!ds`!D%lr8w%$$ZQ_SsuGTnZK2>QG#z z8_Q;0sqOTfF^+kxwMy57Max;ioB)$8@vC!KLFYBL?1&r(MkGO_!GU{T-YYh5vb%v{ zI&H;avso`BC*yGCgBgh14X$_=#;^g{QIF-L@~hxQe(s z`^5N5@?AB|6i$-Ct1cX(WOcY!m&CtvDd-@E>E8*M{)p7kYfMh;&UI@(reOo2Z%b^? zx*c*y9;i0BW`D31MmvbRsRZA;#zSos*us9m(lmcBe!chd^>>%@6nV#(D!Y{4kKl0C zV*z^kE)F9OU}Dc>%hD%a$2jh)q-NAqj>ydnx_#wMZM+UCY9Epj;p+iif^3D5T~ z0g;D9sLIb8>^rn7ez)PN|oZL5@Ilw`2;)O3GFVcJeq$(4NEZTq_FlBKES_^p{dZXr9Atn`!?wW&Te+wPrd<_B09@`W5O8ZPicn zu`}HW3HG(OQTD^C41l-JP+1cmRwdD8IU{|tfWTLSWc;;83mc?YAupqcak)Akd14LL ze8&sWi24u2uBj9OGJu_q_`oMFY_RxvUtT{QX zXWm@7qYP7$CYXO$6 zu{%CmON0|Vov!eq0@HEMUZiK}7iqBO7H9eKXmo5!A_@?jQ8W|SMvyaf9gbE-TS)r ztX>t{g|#(;-?aq`i&KE`VYynCYM5i2%S`M&UgMflXmz_Q>)t(;!P!=F#D zcrxTkC-OOUgR!J228G3x6)L;y$tZQhx&=;F`|ZFTo4CssstyoGz5ytFbf-e)qI0l` z1Hi%|hQ~f1o6OHd*}&2_d159v`4>r2Gebh7;7o4f%=4E<0uK|tfIuDzw~Mu!@Y}Ta zys|O%N(N@bUQfs6gWU$N{JoP?*^1g<1{KdOjxJbvcq}Yw3EVCq53!2IKwT1QX z4fzHl&HBgGJNgVKPS**q=Wv_F@i!y=SN3ZwIT0CU>P5LRbQQFaD3N=U;gdK;pB?h! z5B?CZN*$L+NPl|QA!(+u6Jl65+2rgPr`5nGe`(_@7_Ajo9r_6t>QbSI<$eUJxY2F| z9;b{F`3da;Fo)laTU)eyDjFoENzk7TH&xnlOuUApv?*7&1x{I7^>!VXP5RXVqt_`S z$cPhbUK+8Je(;w=x^1xPY{)tU75X|l?OH`_pmA{pRxN*kyd+i>T<7jAGtyJf>Zaty(QdV$AF- z8{*H{VX0K3*V(oj{91W>wz^DacY=PQ!_sjwQ#h@E8@wi0HZmor{~O(1erl@FaXLtE z7D=-Cu)rn`gTJ!EI+|+MWYRb>LSvW8AM!YhaaI5TMelX$vG%8?TUMw|A!A9sNst>T zI_jncTrbpcpcAD7KC~K$(P33LmyV%Uy4qtYY~{-AP`6xWblyH z?OiMiVzykY$yf4^m2r{-)TA)-Ce=zHv5Gl7b@$jiTPTcRKuKl_a4ap?vMU;V}xt? z*vqe)7-N%?jTk$Zu9fu6jbK~O)UFDEJd(gktsiDOIxiL502$-SoR_ zam!8$Z71e%k4yirRk+Qpua&3AH3~$67MjyDjQTI_)Fn6O_q~#+e(sE)&hL3H-m+OVp~LW{4Qo%NGc2 z0%8HO^2O-&o|prluW>*WS3w+3j2KG&ZkC$7cC-_v8l9QIFw+wwcX6K$mH{H-kd4RwPB8Kh zQG!hiJ~dp9ZkRFqzPkq_F9RquwjNYhgn7+`@_I%^Ow=eQ*z%8`)XCm#tDacPM1D8Yl5s8(~PX7$3okl8CE7WgiK5 zWxn;?#Y}F8ia8OUHjHzA2^~7(piDcLPJ_Cnv%U|9I?{b1$S39-+nfC`g(@`@|0O6P z#VeiCyzd5OYTvOlf!kNt0e)}f#xjD%PjygAIY4|`EK}*) ze<8DpJ&98sfpeO2AATt2Fc4G$t%)~@dczH+u7Dj0PgUv}!xSdvpg%Y$3YGqY$Dv8X z`67PiGP{@eL0cYw)3Nnw#c(HQetc5G;bF%Nx<-3LJrUZWhLzR@uYhWDQUS5KC#mW2 zVB?%$l&=!T_UP5}aYxFO+qL}fCB?p5yGD=Q>f-0F&0F8`M`c%?MbhU!4GodV*iG-K z!M_0mKynW*b{a|2N&93-~PhpRt=#C0-|;^BNa^&hGy z_UiT*ztTjlw^`OT;P&k5Rsib?R8+OjPwwtfxbwY1sK{||9H(R70OWf`@_MzE5z#d9 zFm&`}Sqrl?-c%iRT+MSNt01osEq9aOtgIZ*^hdtp2TuH`MpV=kUQKf9xlGJ}Lg^o5 zj5128B!cT=L?)SVwM=qjFwc4(f&>q;6Hd#zJc z`V8L5}NUhIBVP#_KeP2jr*<_}wy0vSqt$uUu+6jIJE4xZg zQ{V-#0@{tKv&BXu{n$aJoxI3pCk83_(lRyRl;FXRGdgbdn!*Z_c>WR^Kl4sb%CsRB zP&Q-4QG_hjgKnyLT5s8F*)VI}19id9+R-3Al2#hSfyv3yF6gE3JQU+}i!f|gkkVdr zj4#-Z44pZ{X{q@Znxl%ij2-3fo@D0xB-zU1$Rn6h^gzTcaXsge?Ox3Of!a}Ui{$!% z;`F}&Z9tO0fBi%$w*}W+D~t&m`FYX@>xeqi z!q8wW)=G=VWp%V-eO00g9e?pSH_+q#>oV}!(I@qPCt`ykP#x6!d_#t$#YvJY4Z4Mf zy}C44Mw*XX&yYB2NW2+!@WB7D_Z@IeU2ntp-YhZ*2?={8gbhh<5~j%BvX`i+7?LoQ zWe5UV6;P=P78MXj70{}6D`=$-P*JN^t-ER+b!#hX9d+Y(?oB{yZU23L-~0dmzwdqZ z`Q_%h=brJLXP)Pr$R#<&TLQ;H++sEq2;*SD6+C$W2?+3U=gwd^__`D4#s@rr|KRNa zVMN0Z_aojxA9}s!Ua=!`m}K$+S6ZW>C_A$FSkG=5ECFG z^^XEe{)52Sf0mdWm)H~7PAf6ucb%_&Fo^*p0QNGCv>3$5@IW>eBMo4NE=qo|CztZq zk&<;#CdSvw|0-6-4vdv^{~}f*r~5bFFeyC3{(=*Tl~&NmKoEGSR5qQ>Vi>X*43;6m zib=ILGG|(Kz+UYjJqPrO+RYPi@oeYDvp^tX;$cUMGYIofm_}hM7W~v0+~9c)NNaS5 zf4Gco+L(j&W-Y$IYns_@oFsFmUP58`pQbX zkU?IC1QV8pxMctM-X_E(74}MfI=z~#M+aE~)Bs`~#!nek5NPP=Vk&Z>iD+QqC8IET z229>g*av&n4-9=H;{%Y`VI*=y6c?H)iC~q$dS_rDVZ7sD+ueO(4(F?Gm$5&>iTAU$ zxpIHv*q`ufb;3(K2g<-82iQ;MV!H-j$P={KbM&+rBX&4P62`TnIOw}_f^$O687}FC zH6sM!0%3+-Xz}PFUiMT54LIi5WDw}k^r}R7)aW<_=|m7-fIKi@Oc0N(cY$Hx5IA&< zO6)HVa^_e7PX&hqy)OYaMSx5oAOZ*`tb{=lLIi%Qoe9C54mfrzH3`8?3*-gIrTAgo zC@C%rY;pggKb~=N6>M?WS`uJu>*Zu@g@SKyClh!9;o)nBy`Rv`)Wyq2_YFo*A=DGS z@X;xdE655Fx2Zrk-rCQK5*Q|;blSjGCKPZFM-P!C`q)HANx_v6ZX6~U zSvwjvVjH>JFwMC7upVVvxx6~X-ZmiJM;tR)WN04Y=n@y^Z*CRtp1D9JW9YTL5x9#- zMxm|bPiA1AOnfeGwZ(%(u0&W=u zfMw>sNu)fQQv?OX?hgfI|@ADJDn8;m8PkYm=JPwiaIIHS4|- zEZIUdg0J;HgsH_()yR_G8?Y6o^TEh|!Uj?tFmE>~?i)U23{h>ZJK;iz1v$WKFe)FZ z4`k`rp|;r3DZ$p3Xk@s+tOKcQhn?$yWi$X|_W?l~M6^*o0r)ns3{Q#yIbMBdq@FUF zCy)%pF``ee@+ zk#25fxnsU8@HI9g1iW>oklo^P2D5EUnHD}H0|Dhx$bO;`pqvZYL-BzOqP-CpA|M0; z1cPWcejCHcv;hQQzR6e^)&N{78~30Ec|_hRI=xrg_*{V z)@;q#v?ax%6vAAJ-k>yvi_pp1I~K)wpNe!7ek8>JZzF>Iw@@uQV0F6#TZ91e16gdS zES5yK=W4~G zAp1CcYa2srlyAuMO_XBdVKO*x$D+xDZOu3{xGa{1!*h|Njd!eg9+}E_4+`+cX&@V9Xx4_N3_SxDw~@`JH!?9|EEW*@h>?kVj!7{exL@aO`jx~B+eGrh)@+pi zpKB22ONzX`(+2YxE*mX3^0zp_rk1^T-3TO4SDWFU$W4YJ*Dcb+&G%Ssi=eyqjDZu~ zML^XDG=&2G93zE5W{`9{gAT!T7|T({x9Kr(Cx{p>0RE!^|L^O#|B&H}0-rvIjJrT! zaM*4)xzEr59=nX=q=^qsWQDB_W7=}<{ACU_@3qKrwO4p_YG5p!+`BQv*~SQyB#iGb zD3IfX>wpe@$UYDR1%0p{fFS~77Z_I$_%9fIFzbiK(*08Gln|D2OtA1xMkoSUvCD%g zi$u-I+l-qRZy<&AE-6L}%PRuE>FurpeNzG7hvPB8wvEl#!%WtUfa!T%oBjgs0O{}d z>0fnXvH1_Ssa0=6pnbZlEXl{(*imLJ8YQqqiwB1l#5()tHI6_>pv09727geT(8*iC zGSHvF=GuoT0#a+0UR1EYf^Ndch%sb8V0CGM_B<;~z9pY*NuklnbZa_Uk7(pZrr2iI$m&35_pLeKVoOXGcEQ*4_~udHeu@O;XdTMH?bkp580EFO z6KildGqKC6i>WV;&L3jzE^#%m_J}nHrF58J)%!LDFE?>FL*yhvU?4odHX>k5Adf_$ zk!CXJfq{c+d*yfm5!*}o4gQgEi5w3Z0k19#G%(}yIk2H21aUcZgJv?A!&YN!&Qf4z z^T8~x9K%?>UnB8>Bdj`0TWDP<{k5rLkDo_4C>=p=Sb z?@DWUkGF@Boz!ZO2iIOkvUqzwa%_ZvLf5CvWpGoH;R*9#cagxiTxWWemy=WFF8qKxx1+EZzh21A!9r3fh$-Ad4dK z?DNb(_ja~$xUxb1Hp!_5YiTw-8$Wkvns7bIBH!GNXE-KRI@pzG&*S?hqIbc1u<7ps zw)kHGOArgBmOv+LB!&`uh&`kVvMm5LfNPX69NP2}^o~*ksPm}vXoLSR;X8Wb-@-Bg zPybWE$YR{mkNy+X=wJR2m^9{i<~9H~-h+Yr0GO|XOIl!M{2O4|C-`3kM?S&-G+_V# z{m+cO=pVs_Pw)vo!6*0x|8v1GLkrI1#I1K5LP#umybW(*5CqOo&JRMGixTI1YPJ$Su8XZoCIHV>WPJssZ_oD|H zkw`k63b9CQbvPYzCGF7R3{dBm4%dfcNw0J`6Y``)U~k7EQg!^);#z)caVwlR2T36ml0cpSi-mF_ z1*C=wpO>H%-}*e6P1eckX>ssmbwdU!#RD|;bqYxWIh4U*DZ`&uS zFenwwJ4n}7J>W?ZJ|`v46AdnZjH9LaSPF3c__(<`T`2%{@<3aqZd47ZfpHXjUW7}b z9H+7Xe_DjkRNI3>*Ov;vVmizF)6B6Nc{t_-+F`Q7bTPAktdG%E@Tb22IhWq|uEcxF z?zb5YPPwAr#y&oK?dX4|%Xa_|u$gG*qQS@OvtBj+W}>N!oPpv9esVAtEqL9LZ z;=(+6F)z26C+BI@a-}L?t{%rL%;ITsR0Dd-Ru`5OVa1BV{33ZluBuqn|8NkmznkE~ zJS8tmBhSlK3{|O%!MHq6k&kEJb0PLz`#7dV;Uq71<|$RY2z6nhCP$+w8YC8%mX?b0 z`#KhZ!Ni*KqQY#oyeOw!tk7f?7HEofRWUqEo~h0qht(TiSi;Mfm-9-BRe*Fb9qcg= zkW{2n=jUoPDkU$o9G_HJYD^G#s>W}M)P>3tg@%_~z$?wkRpbn43%+v;6nP~|&{R{% zQ|1;I<$(d@1xnB=7gSJyTB-t#h{x+2ys#jzoadD5%v0rOVy)iyRnS-aqa@+gmAM7k zm}E5STm>ev0mFml{eAi36gcIAQ8cQ2OuXt`Fsib!v>>lgK5$S#v|LN7O3efF1x&_} z5=~Kw2Jn4Cu0n;?$x-DMeK4bcA`DyrQ zP=f}52~pm?gADKz{O^lmAFGYvo2#v8wF<4WT27|AR7X}n*M})cOO;hUD5^tXSVBjs zWRfeBfS8jYR8FS3l3^lT<%Ph+mSi*$bsJD(x!k(K5*my{JaALmFsH)J(;owrKcF)r zw|B^Ef79=WH@O?kVoa;a86%4m=C)LsD$y!pH(Etl*Fr#G1mVa4i8GCo5`Qr9nxuV8 z^h{L0pIR8Kwa`+WOhPJ=%t2CY4YM6AZblufrN!Ju|f)mlB81R7ZxZb z)~F>`LgN^H)QBZ~)CPM_;F!LDo|2od5+-Zp`9-{hkRa6BL|@_|K~XP>x5OQli~zS@ zsHFcEt!)3dP^OR4vFCISF*ZISN#cmwYj3R!LUM~Ry$eka0MG^7fKp+juJs*o=?K@QK9G42*` zPuy~8TmJgp=;NIo;gTE_Ia<19?$e?p`Uk(wupwmVU3A_vS$03zt9>CWd)mzn7bcs# z`@I=jzSCpE4U=mVHa)oI*S_9<--8_%Pt&fEFVzcHuc+IeE{#;AJ@3pTkJ{&S_9slYrw?^gpYbBjMoAbOr+jnEK^T{c{ zxE*Cp?QzQZHZ#F@{ZW_Ce%gCqztef=)5Ih1X8YVtb+su;_|aAWhv5%g^~(_gu%i`K zFcolwgjxZXS}}>nL~aB7ATy$6|Dh5G#kse)&g2}u+seRo$VxyoMU5-C0{0gelERB< zJ%O($yl!{h-0jhBfTmy-Y>2UFG#b?s(Gpf2qVos}b>0Uap=ew#Rw~vx(qeJH7%?g0 zLIheV0#(pqWIbS*BoYONi81I9G_vm=MXCq?rOUwwP^tf>XAR21=(Q&@P+DJ?1icRo zl7MN+M3Dyl&G?Y#58?jiT)(q+#^<(Y4YQ2${vvMGv?qf0*BRL<%YVJOi28WT`8E2L z?9-M?YHunbU@24cRU)Onxz=fu)02@V+ow&oA7P_sdbrvzXRJ75=_((IbDQ1oM}~); zjX2fXR3H|vxwnd4m69I!sJXN1HMFR)En7MI+)oe2JUy5f=FcFQCqx+RYEQFid|5+Z z(7EHwIliCmWzT8JvGDlDd9v>|^8|n9usE6j{nt;Ch-Z5%gX_p&`fYR{>LP3yf7!3; z1u4Jxke>AnL)W3Q_yalS+b|hHkrR| zBPTb#MgI^>jaHEPiBx-pvEIL96LUX~wvuMFHR(5UkrX4OA-Q^ID(1Rx8E8EW!} zLTbOLBtq0xYsYr)?Kla?jk@HczPBTyU&+CewuCR_GZ$Z znO48tiCfXSSCD*U&a1E=s3%5mI~OZ%PCysxV}0*HHmSP z;P!g#+_`AM=TC;COJ7Yn*VKNO-!$dLDb5qU?aBE`Tf^oyM?ym)ve=H!SzkBZJWZ|~ zvi#-r)$9l^wW@h~Pik2&yx1y1ZwACd!+W-05rpsDFHC9PWL*{{DLuO6s_(S=RynfG zN`K3n=S$k)c|Ph`uoz)wK*Zj_ zS&g#DR9&Dpg7J59qLmA^o~d#!S~;hJ%UoYICUB@@(=B_>8<%UeG=%Ky5pUb z>N-Sc442pDXLh6^-^cMd35zaI47{GUbJMWJme;J{>h(Lz9@m_>?+ZV;-dRT@9hwt) z{ZX>f<@j~;Z{C?R_H4!8+YOJ&;u(Z{^IYs~i(dc!=4ROvP>HKi{5@&aEsd?sMKkIy(sz>zi+;;vD(thjZ{mI*IeD$@) zqa*IXc!8nA1p45l+VLa8L%6%z+c(E%A8HPMS5eNdSZ0J~-3>MzZGLE(E&oKwJ=c3X zpGF>aJ9A!I5#!(zX_qnLm!S_=U0JmB$e_aAm5v%R`@sZ$*OIEejw#=48S6Kzb%K0r zK`Uof*V>3jhJ|luOY_=#uO=R<6&%jmz0~S+LnY!T+%&wd<0k*sw#`QrTgy^NXM#it z>l-$&EL+#!vY^EL!u-!UCAMPe*Lnpl>9zJi{HB6g<&p}q0oSR*MWkkR!V`;6VZ(%F-mVz+h?x|bY_TQqF z)Bay9Y<1NMe^LEk#*cc@#q+oQ7hbS1P-et_JuH^%`+VZ`H-p)dK@Vr`8zKx^89C~* z;j)M8vYgz{PIgJ16cao9@t)t>PA3`FeABMiQIkNwJoWWhYRjc;;{M;R?kX|m1$LIX zEgE|H!86fi|KOJ+Y|xD`clPG0ZHX<{^HsgiO=)w`{MbRO4?OV{KFDxunGbETjJf{( z^LKXkQ-7@6bGfPDc$>*L-&uyPwmUMV^7d`Xx5gJn`Jc>r(0MHKM77(-OHBUWJ+r$X zSkw>a{C?24LTx%T`SrRk)W4;4chbzRiRbTbuHVpVQF~^}Dpmrk^V%Ux<*Uy{ulGk} z8Sj}s$#YZ2jv1kOh=K6u%}k5s{`P(GV=CatB<)Jn9$Sg_dn-bJ`WB-H4~QqjhK?sb z+PzcuY+BCII~C#2(>9&5MH8{dY$8}oegz!phq98P66}`5agj+p-KDPXJ}Pg8EK4So z$~=68?p|4*Lb)tcCG=EyE8M+2BntOT&p#}VBMX$b5=f`3)|z;E*>21K`bY`V_}7c$ zk2>qZqGCK40Br$k4U`(_Gp5cN*hPqX2~lr68psDkgH#mcu>+z(*uNa0FChHQ5E_)x zAFL4+`oOyJm@xA?Lm6UqdE~}g-x+q>mIj{NowYta&Evw;yMy+GEn>y~v~}*y?ECqexx264;A{K3)MUHM^yqQEDe9GXuS5ocUj(9yTG#9$Wt=ddRT@Xb?{fz zOdA>f+}jh4-^cH`DEoHq^V$~XwXDXk7ri7lM3kJ}(G4&-|RpI?7 z2a-QittjEwjTG*v4}OxiW(B*|F*>nhd+1p7oyI}4f7bPh`_uHks`|O8<`Ty-nv}M6 zw%{$SUN{srXkHI5*%anv=gNDw9`!|^69qY4Wa~JT=7%gU+hRpt9p~5TY~pEgvgY@V zJ@5|qxyg~n$B3EXztN`LB3^Tu&aFSZ;?!N&8}{=y8$`u^yEcva;P>wIi{mLmVSzKTUkE~cYmBafn>SAuTH1WpI=~El`yftxK*WsJ7 zOLeU+$9?(L@vD#Lw#iqUT;l$MqE(cMXcZ~5FFjz+J*7Q*`Y;q!&iuEyS}BTZeXH|7 zS){#>3KEb?fH2_UqfI0{@mmRs-Tud;Ru%H+cz|H>00Hp;*xb5Duhc9{(T4K{YpYnX z?mHfTGmPIHY~eEQ-iU;?9b|8FV$_bQ-3;r?UgHiLo~J+Z-nWRn`H;_9m@5fBJxjk_ z`T3NFF?M;ImPLJeFK6_rt4osGXl~t`F066gIFY*P#|6WWj4>zO%bIXkn&e<8zOzm* z;fMCn?W4}`7ZFO<Sl^Iy!mVFZ3DW`NL;h=5MqVtX`9KUv4w@ znp4!$jG5Q%N1f(J`zP(+mKI<~cyw}Ny7;W%M$ssP#PHJg7tpm`>yfIApY|BF@3uIT z8hyv7)!?2WYL{t8=x1R!_jao%{;a-ZceN{g(Se8iEYp6PHs^k96uNrd7gz76H*b1# zd2`nFy-k&qdd~Gk--&Ww&2d__dUAHf?b(@S8ExX}Kc;;-va8g|>9?NzZl^l8xI{Z4MF>!dvndp8EOR z%-a5Zst3fwdml-O-Uq;s1|zfn23&;5u%t~XP0G5epK;Li(Y%Kd%u~k{9Cn4(^KaQ&jD_mF`T4XZ5k@ zoBg+{#b9;Ih$^& z>GekkgvZmDZH@Aw9i;6$xMAb1whKQQ%}fj%<}DrXXg+nzQ^yxq+y?P-7q$(bl~Yi- zdb_4OkVIYuyEyq*1#n`s=zH2@pIxh)YFTJBIc)WWTY)aB72TtfGON3-6rN4hS1&$! z@z}U^vE#L4E1OO{7^Mihkxu#YbANIv`6PKui4C_?F5mX((m@Mi=hdL^m`)F_sKoc1 zes3MQ;5>A$HGKE*CrvA<(UGi06*eaz=L1`otPBV%we~o8YWec!i4$#KMK;>3dmSOD zc(&|C*SPJ`P1k=dDKo$Si`SxZ)982S+66f!w>Q0dQ}Zjm;+I_CO>fX1V$2s;ua@L1 z=J}mmIy5f6t0K*|wTvz0Pkb0e+Y<2l>!V+d+S@vFN!s|Kagkwrf)6j5kWQMC z`tymDKOdO3I#k&GrDdnQKgIeWHWRC0$o}E4Ki7O)ql&T#BW2__#_!uZZf>d57OQB* z$gz#W8QtT~C>+PnT)Z*=^=*^YD_f|iXZIvjN{6`nHH`S8Wbmuqr}sjsjpZh(!IgRC z)@Keq^`^7Q)u8STkVlUlxo?xYoU2e>X zF(dEos58tDUz=9jGTS4zTP$vC{JgAXc+1bDhlqaA%owy-zg9NU{N#G)Zs~Ui>pTxX zU9%>A%8(aj&n_>`KVEul`Ni4*rIB=8%Z!sZ;4^v4_i%+d%z~AQX1!Ijmc7^@Npy*l zCpVCpUpJpz-BC_SI`<%KQck_gSc{1}*AKfGHqJs2+FFG;f#r$adl``>RfrKNV&kg$ z#eZyb_TvX{19Wl}YC2FQ>F*B^VK8|A6OzOLtQtL0kQGWLQcriVO#DzI-EAB#E@%7z>&py~j!*oSdeJ@S&cvV&Sgrdy4;q|c|Tfg?5^ER;1H8GjC!foMq z4F@~FkW|cFFAWdNuzGXn|FHKpa8X^?zWaO)NP%Ni1iX{t=CfPwZ?GO5^Ie$#%qkVTxz-2T4JpdgN7JmtyOES zHPje<|FzD+7;9dd+mF}&p6=iOe-`WPz4qQ~uf5lqVHgL+_8$me9yR~1wYi;7zH{%f zpDkGM)i-Z{)}D6vrpRkAUz+gLf2=*)ne*h~FS(~q>T`IIrTh1e&lDuCfBEr6rqsJW zVS6;+`oCLtthnuytM>fvQ%`*Bkw2dJQNk+^@83SIe`eFqo_YD9`Xf0V$xY?fA57nq zo;6|P;y-+%x?uILCufDeGg1E9)HL z@O^HdUxswWnB#n{A2t_KC3L)Pw-4A)JG#2s$(gTo-q$0>3 zsTlHu(u0u4N#h`wNF|WHk{7a1@Kf3VevpNwuB&uE$G)0>pKYkPz|rh zVm$f^aW6jw|4jZF_~-J^!LQ1%f`1|Z0{lz)m*B%P#wouhzXpC?ex1gnw18WcR`4EW z54cTf1MgKZa%G>g54>O54{lf5!3UHB;0~n&d`>wBKCheyUr;W9FDe(omy}E3E6Npc zkJ2L~m4+*+vMPh|4{TCR;J#{Ka6h#lxWC#ToT{dR2dD$U1J!}x!RlbJRkebLszbr| zsP};HRqqAos5#)_>TqzbnhVZX^T7_)0d}b_@F;Z@_*3eqz+==g;QQ74!4IepfIX@Q z{Gf^mQA6!9cu>n!M1mSoBj9qi99*Gd-PKC968vfP)8L2HhrkoniM)=O zqggd$j%Je?b2Q^0*kZ=YniI{5;3RVrIN6*GPBEu|Q_YyQd4L(SHV-so*5)*G8aUmY z4z`*xck^&F=4eT@z=9>o0t=R83oKYtEGZl(_Ztx+81)6=#nb>8j?o)lqHksCz9xcJ|fOMjd=lcBl9NaR^}t0oBFw_;ym+ZVo7CAVjjqB zW6t5vmkR!FSQY)5F#0|;l79#-`J^xT;4hH+iF7ekyu0#N`TwyXuJ7RdDmb(1IaM~YDdka(GpLDkX98zUGv~`boGC4wBl{vd#zi9M zzkbMklCa|3PZ5)F%1;uX;`>*MA90ZO4 zUBJof^k;6s1)v+~5wcGOEP6d(GGB#n0FVyYfMLLJ04KO_v_2kgGZq*JT>yvz#Z@>rb$o|E+E(KNqtAMoto}2xfjO$il2e2F1 zV_@HP{T={10PHdU34NdVPs`mUbtUsl>cI`*WhIRzt4o?nnoBlAvyJsTS>FPET}c~j z_A?)3KEm8}oqUq{4D$s;?uN}ClD#Ukg*lmd0CPICjd>XJ@REA3-8-6jEb};Uz#F}$ z8SkA4uJKL*&+yLXzUF)9dKY*Xd6#&XdsmjM2Crczd7XD7W`Q|+w|KXEckvvXxm7dU zY-Qg2C-PqItDXBg1n%@6=T_U$*K+SESU%^y$o4O@{T83_+I^;ydS9Y1g)I06G7r8c z2Yf@XX~z4q$+B-GS@t<~4}0gLZ2{LQGJM+&*$bONU(E1%w{Jp8mv55pf83pXjRU@E z##+$&p7hOxeh%NA$2^DYEWD-}?_11cSjw6e=w+2}t#5;GlW(hU2S&0R`t80wz09jg z0f3*Voqp`IxcF=-qdM@1ABpe@zbfE?v`%_g$gh{W7hi-`qO}iV%3%?oT4k zO*pl;<_&V%HSY9VOX~eu{v2?g-wiJG7niJt>}P!#9QRlGt0C9=ryF(h{IkGy{`st_ zXH5gwU&h?XyqdYmHxt}k(hc6s8tfYXPJhdFHQSi?GaocGR3H1r-{n8aeC7uEg1;Nw zQ>ylAN-d?y`dqwo{aK~hHKpmLwvyGQ!%Bx=V|(dn%noz(Zt>@pj-|Gx<9PgjmIKVu z8{`Y6JzF_HG$O}prq2`j(<=ko|^BU%L&~Ie< zLg^N^yq)#Cbl-aCmbRAeCAqX6b`F(xvi;+Rx4Uuo6fB%8y$HS>5L7c@g2#yK1Bw2I zK#IRBFpz9s$K>tqz~I18UBNMqh&& zd?B!#N3y3^Q)&t9>m3ixg8Uyiz?%2@LZE}?V?3S{%%{N@0_R!2#CNaII+kHim6^+u z%2N59?=DL#v+8FTzcAEI;G1B z(Y9KTN!+b1o36(U%d~^K%IZ+Rwrsw>?q!9vYxpkqR9QpWvXad-N7i7cl&v;2ZDmbm z&1IXp?Y7=IQq0h)ShkbS)v^|5?5wi=W!O_?N6Na&PC|p7Qg#8n8ge(+LEZ_fK?~Ik zCX*&OAedfuBxs}hLFAg?@Sr_78Z!1va9l9Jb)wAUgA;=_LF|R#jNojt8Jx@d1;Irn zb-^VpFXwaqgwDa0!8Jj|e{f@P3;8X$y*HOZ-bI--*vfVg|H1a)pbXT<~J> za!7iJI3bb7yfZqj?N32nNT z3u;1JubtsFp&jIb&~CPdyTO{!K4hrG&;i2(-QLbnhaP*#5uKr9bn=Jbx6tX(`Ou}% zm9QK(m(+!mNE1#CUGjH@)52DY+i;e*Gn}J8rEq*Ug!71?MhJj-hj*XgpagEdrtKk6H~tmH^|7BS!L3eOMMha1B4N%n4G z%`#YMq%0F&t=Gqn=nOaMnK#_bnYV-G&3a$ByN!8gZw@6UZ-iU)>=~XFZVMw*gb#*~ zaLpEsvoqYK=Tn-y*FrYEmhfiUmEn`japp7LK=^|GOvNMV)}O4{LJzG=L?xRM3(Lv9 z>q6}7C|wevoU$Z>C$Wevf_)Jg9M`G&4fSkdoZ)=JN80JMj2z-~hVpe+1i2$}JaS6U$T%%~BIoGLj9iRdj*6(sKRucl zO^NKfmc^D|muF92mj?sa<*)1e)p_Q1d@bXYwnhh{m%-7Y8IYkHzV%3{%2bYvtNoe&)vofO!LR(a9MR5Lm)+#H=5orB1S-^!ZEj&Ek^ zq3Ar!VqtVK`z&sV{HzC@i<;@iN<(rweU3<>VD#so#Zz*pp z-(R|<{9svAU}^aguG7`aib?(>6_c@#23AZXru|qkGi;5Dia8bYDi&7Y zX)Q9lVrj*SlDdjj+;%OkQ3Y~e#irQsvXlCJ{dE;vD|WEox+`{9>?zq?v9Fw>qSx91 zSnH@bR&gS>rQ&qOdGMu*D{(n)jwi)a<7sg#EJ%k!_}dgcb^W${MvYL=Uro0&H=Z(|D- zhduF@lFjk9I5JE8U>tE6?~0$~yJxs2A~3c*jtGqRRI2ayMUlh2y0o>@LUJXtM&*FY zbe+9RqBARPTxT@j9mb3(t=L^@FWFo5bf$_2E(1}~zVQn`d>&L)*B-+#4LiF3PhUFF8gEtT6V@vKz|`@WeK!OFey zB-n4~UfO$KwLw0_nnU`YMa<__c7o%T$16`=dp%fBdYo?sH|!4mb>C)|Ii4%eRbHfX zhBXJ7k1#juT+&^6xm?$TqXpx|coXfHcuTK_&Rdp?joq=gcMrmo-Ny5C-uOhEUD(%u z%H>v_g)Hv;Yrm_j91~XMP35c@qjam1@bf#oF*Q}aTRf`fs*}VQ_#NKo_#NIh{yo*J z7LUavIx!lrP{rLsh&3r|?~4Bv-w`{-TVk_# zTbvag;KPKLlkI8R{EAnaijOdXs$bXO|`Ca*4NmC?6kxWV-B~j|5+^O6p z^;3o@HYr&drVNu(mEp=r=?=xIjFJW_>y`CVy85WvAPrK#p>C0U>Wk`&(r49|)Sc24 z^~Y+PG(+91?w4k%uc`;6FRKUD*QKwhN7d8Plj?8P3)0usi<*%BO;fZ)X{~mrmM(47 z25TA87Hz1OBW=@0Xm;s`noDy_Ki0-*W2B#GW3>{gP4j7Csa-4A%B2o1u8o&o)2g&8 zsZ*Pz)k;UTsoGTOr1m-Oi_)*OI;~DRuPx9PNWas*sx?R#v?bay>5}$u+6w7itx;=~ z-qXIVJuCHSOr8tDLC4q`e~d(^|AvIaO=Z+T=U6 zcCB3=s2$YaknhrtYrm84)~=fFlpUrtQ<@w!4KjUA4w>3bJ#vfX8Ot~1-w8>?#1wF% z7)-xQ%FhNy3Xyk6h>>eWTE3Txr%S);#V&hJIxbT2oYAj(aqjk9pey|*5pm%Z!ZOM- zO30S`5hDt;sZy8Jg;;%CdKGvmEJ|Hz9(H339?Bx35(oc z?l1bvsd6gf^Dg-=(N9j72Z!#4(;_3jK!6GgMT2hI+C+BRx(}fv3ph^#naJ&jiSmJd+_$^UMV1 zc;REw1t2}Ex8$6plTRl4<@AmBR?DHJ(ba;+=PIykEtn!@qT=HD;tSXj^ z&BaOJBj}lj2#ChMm?VMq#I|7o)`s;s@eW;zg7~(IQ&K{bCPFk=Tpk5wD9k zL^1Z^Tf&E(__pwii{g?f72PO730qEtBu&ypn7tW6EJ=9KR!j=gc!DWG#7!1cUr}l5 zXX+a`qJ=2Y1Wmx%I?V07Nqu-$`=ahH&lNlDn%Q(>CFic3n65`^27XUY?!OfHj4j5dukjl&v9iRp=1u#$*X=_{n7&6w#nU?zShm%EF06}A3du=gYIVfA)>#QhJef3x-d-8labi_O1eUhV&bdH=2a@?myAYOmca zPCoA5{it*9X6y9l%;Tf#{W<;p-{$mT^*-th{d4E=aohbp@wXVqUuyGa`|0EMdE?LR ze@31E>0JNXKKxsZ^A>L5pA2JTV`Ded)2j?MOC8CtGH$(x^3V4kN|R`Q|J?;gw(vT# z9U~o1M?v0+yb}&*-g$5l;LSUqpXCTTV!#B)q;#l_F&OkgBT;;LAhVPDE~o>MHI=R zNXbxaq8KqVLX3m;@nYip9dsF+oMp^4kjdjjeX6n%t8(EoV(xR`-^PogA? zr%-MaPot!WucF*8{uL!vJR_E%%~J7oai>@=zA5e!|0P=Ck9L$HnENI8Lo%r*%XRZh zle_I{cB?(ho@39myX}ScV!PiSw#V&N_G-wr_UZOn_B#7~d%eBEzRcceUk#}dZ9Z6< z?9H&Dm(8%1!}dN{w!u1;ov_=8UOrg1p{3q0l}6MrHcAUd@xjt&-;Z(CahcEMAVzhB z$9QAuf-Sw&8|D37;%9mgG~ z$mfo8__=}Y*E%ldpGVv@J1#qg)8tIVc>HwNF^uCR?o4r};AhmZ7di(z2g6qHnEhO{ zmLmYGjFl>O3~&y0X7l}8*iS<|`R(=2krdyKMNTKixZLS<7C4KXUT4sVb)(?tIQnxs zV<;1xlW09S{)?fTj9BAngN^y{#C+y@wl>W<(;0M5gLf9OzgHXkqY-uWXv3OdbUlt_ zj;__tInH^`h4wb*VyqgiMXh5PW}k>QJ-okaF@B1cYUfgv6^M>u96OChZ1r%xWgG$2 z&kB3DbCu(ib1l|pG{!O9xxu-~xz)MDx!ZoyxyQNBp*jzc=barGQzLo8c?=#n;XJKJ zK1PmMbDqaMPSdP${}Rd-=;K_vxs4ZlBH8YC$u2WuZV^Ww;=`5XN_C}?7hG2Cl;w!C z<=8Fye(|H76Z1suq379HsYX{8N{%bfh0(hTQHmYyF25`6io2?4-mYp_tz#^9U$<+z zu}}Q)Tb*l`tIpo+n(wN|ody?qpxD*uTJ2hf;&wH;n&FRbdmI)wySBNuIS;sY+LK%@ z=%o$&azDJd-?iU$&~?Psd@Ts`(Gw~9V{90S}I?67KgvU`B5 z$DQuBAqGyO$7*|?dl<&t41e^vhr8_*`R>v7B==aXakYCi>^It%xyLzL-2r#h)#y0o z9&c}OPo!PpNVnIzYur=ZGw3XVSBAQ0JLkFQVpJ2|3vjoJW9AI|I+<3MPaeN}k$Z`I zIo5>Eie&dnlr`>k?v3s(?(Obf#L%|7THJeG_0Y7t5219rkGoH~&$%zUFOL$VOzuOY zOtgRXGAhww=Uh~D>-p@T^7*V5&jIw#`vCguxZDXs%AF)qbE{CQbE`+Rk7&=W&8@v} z>3vIcR_3hCot``WzRCAZ&YhJzD`yP~>FRRphHo3bEqDIN9l7-=4Y|ukwC60yY0YWP zZOm=F&v~Dd{(4M#L+Rqbnp-I9=?{&?VmzZX;OW97mWgkPzIeJ?g(nXQk1ZA+5&zaL zcK}9|j;jsVVRCo&{@nCixMaNpaHT=>FC5#pZ5taq*(4j=wry=}+qO13@ri9)8*J>{ zyzl>h_1#;y&eZfwPfzzeRa0l?nR&W@VgFZpJ{f4pGUELH|9<;;*Z9}Ief?GLD_Du7 z?lITZ?;QP{hXPyq1J95%0sIiuz5s;=>iq{!1z1&(cJY<#fws?}8`e(?-4k$6&dmQF z3Rfd?fKILGaf5FJ=KhFwvIBnvhYlhS|6uyCMgmbb6ktKE04)Pyk9mU+lmMU^@;0Dp zg4=RjIe@l-cLl2NqRdu)O+GP$rSJ63r`O!GZi(l6-gyT&E4(MX(E@%Lvan#_!14rv z6~NqpiI70agHVAc2U!8+4daB4h2VpRq0;>2YdS!y5o#d0K(Y~RdqDZ^0kQ6nAa8-- zKcF2Ec#L$Fz)ynY0GNbWFCRjT<|ME`VzlS)J)1R$pK7{leO|F2nOm-!i5lEBInfw;#p6eOJFjDuN;sfo#n>3h>OlQh#e7NP9>xN0;@xUIO~r{lP8xOeR5 z?09TxW*p`<)0X3wP+S(a6RVkXNpoq9!Uu*Ui7|CC$+#A5Os1|D_p86Mhtq8F4dvR` zNRXQm+t=)h?M&Yr0;Aw}Yd)oS~!Zbh$=8CGo>*XYg9s;{x{v>z5Ps;~H8d!y7c zv`L>)_k{0Lj$6`6#h^K&Y>K2_ok4CZV-PvgY>MQ#<%!)Fe0~>*tMD!0Je4faq{Q6k zxomZ_v^+8?{jW2>S2`5g0~bWKtWO*0)vD`MtB0v;(cfNbIhSddacP<}Ei_NGu{E)^ z(s%z5Jl!|V|0(-|xZqpNwAq6B;Jd)OaKB)(sdlS(BTMgDcHd2CUGUEFx0whiCOs^^ zRVJl{P8}3gkE0n4jrvk4uAi^%riZFBUm}}d+E~P36<1J&jzU-}GymPSlz@l!RDoQL z9Bp(?;g-~q-si*c=3uTNYOhaCVfyVIF*L5R=+MxedAO@s>Zwex4|IX zf9st#C)+FbhJD;Q+Z}F6=|#BHlkJ7!dA?LT+Xu`Ec<|Zny%%)Bx%=)N`=W2{^Wfze zdU0sr7o_E|G$Oxq;&HvS$53x9G|?U9Lp)4W;FM?fCdQFE${uqhG5eMC@IscU!}$*; z-6a>3K}xgED83Q8MMl?!eB+6wOS3QQ;-bel_JW|)3suZu!W~%sRVO6#(Ug8$f3@4? zw+1e3{(_ViFzt-?ls&~Yq1D!u>ufK#qstSjaSr`*oXF5qli68sH!DzC z8kld?S??WacV#|9U^sJaE?m;=52|zgQssU;=Bg2UTj39EK6X2KAT+k*F|W%rnhLSy z&9D7{-%{HyNu5`p^OPRZ+nO;baB?Q-9r0Dyn`C`4yRt#O7@g<)evwBwJMz^gH0$Kb z{Xhxy4L%x`soyS~C>eF3TY5*dd3ByTprF?qvc_`ZvYBQHh~Pv@GcZ{jVQ4PXxo)IJ zy_KtZf!$#=o;EsGu2jve&cx4*II~l@S4hT=Qp?m_17!IwWY>5lADO3RYx(CMt;i?> zi(ZFtz`Z^t@~+WsU%vAL+ZX$`cPt(M&3l{MsP8~F|A@3(dvz^PR@Nqc`*+RFucEbS zM=MjE=NiPwo$zXO$c0@m4$4>6vC--ISJyE^c1-Qf{c|%%*t2YIEmKzzkY_z~%uyyN zIhb>JGZM2#i>DR@7sT=}@%po7@ZYIi*x6%iSMnDhos3(GuHSs~{8}UZ9t5OB+aF;q zh5FsE_7b~m+bNkV$7QoO2yi2wP(~=GTZ`^=F*CH=g1O6p802q}HZAWYn*X zY^1c9Ol<;8g{C(vH>lMYwDf0u&LLlkfvI0*Z9oB653DQz7W0r{s zt!=<9)Z-)huZ6>XhY#H${zPpv=jw>nX9jtFm>dD$3-A&FHCi=s;C%r1O^V`rj3$|w7b<0jIUI~wjdMb6)HA|3%_}|s>3gqrv%hNa8?ZN? zimV2#hl7)cW2GZJXB#>S>m~>7_XWmtdFC9p@Q78C04n>ya z7iWc--cpDMh@(N;Ms zB`27VOVz$AVVeG7MTw?4jIMY&GS5u8@2=hzP5Ps*)Q9bvE}Ge&t%lDLsk4;gdH)Od8urGfjY~v)?SIS=&WlLN@u=4g( zy??LRV;v$buv6SgLn$kWKVWwveIp=pzTm~~2^s(0FXu_>1Ns-N^+;eXQq}P$tTwsU{RzvH<8?-n16-KdZCDW)GN&&Pnp;7{U5@ zDVsoZKApPxTap>&#e9U>8iL*_~=?^>$^yz_h2ht~$!FFsp9Qdn($)fjgv#1fMvKWRQkBlR>HiMDoV}yy`apP(8DU`VCxxe4@c7S_LuJ6!E zvZD5ta#|dTs4rIM;V4UN)tUR`K0PC#$f6>2i-P)wU0;-YqMCO8zDrPyKlVnZJfs1pxP<`HvV;L2Hw3|&%z*_LUD6V*1 zgT*V0#(C{Fzf>~|Lj{3D3SzuVoKNWG&hYZ5H2d%Zf2}S5$*HX%c16q63L5=RseVZm z)$h#W$#{Gv?5X$lOMxe8C(iJLa}8MJ;>*Dy&pPQtUs?UvrbmALr>;uI##gPlI<&aV z_t5^?fHa&e>gO2m-!7E7?*SBP*GH=~yNMpo?ur(E{z`oHFcW3W%F}F!#@XbW{!x-i z1AWV24^NkA$j4L_a|F-oX_%$8~ z+pc^?5>N%#ynDJ{KeC#SAz|UxX~As4y1Me}LiGVvKj2-LKk&|Uhs=ZY*3=#Lg4jeZ zJfnoPbGS6K@`>)w#*4>DfN9aG*!vo=-gWz2Rb|t&EcCYN<18Z$lollcR}Ex|2sA=L zB_*4ufQSj4Br^^K(!l%s8wRLff-%z$<^>+ojvnwgvKzwhzxqo==*!3(Rdv#jtEljeQ$A3)LTdk;_q16HKlE zz^NxP*YdbS*)6T#pf_;}J5g7;nz%kuS*5A1mh2vd^Gq>*Dy0 zR4>@f?Zy8q@mxxA784`$C5 zXw3O5QVh~09ktSrkZ~FGYaH)#{>Jp5imaMd!OOoJUkxC6wWN9oiBgp!0OfUfXZ|ij z_mVM9_juM`&>HlEoJ(cr@@Cm47{p>PgvD&*7B$%;8o9@oif0ZvDe9^l%x?i^XH=Gl zsz9rTLzeX~?4d;oc{ER(U^NHm{r(1zUvHM>qMuOz#Oxr)D?!+!T^Bb*f#{^=V!bkl zA~)15&_^p0um+l&5v(&+HI_PE>Jkq+1qawXD0fQ)B_ju$JETfV_eWmb!CaNKWDg35 z@*w&k6a~W-G1ED6tZ-Pt7T|imAzoeB4b;YuqH!KB$KJUY7!ubz1M_)F831OHsw;YR z!zH31t{m~$^un~RO|^{aC9{Tim8D9}Xw6DoIK}-Gu?M)oP8bhmQU0G38-T4O@6}WJ zuAvn|&rG!dR5kUU9|%p+jm@&*OSDWs-obz&ft9~E;cp2HF{km&^U)bV%rEp;bfOmR z0LlO}fbI1RfH8Q)hqyX*8lf`ps=EDjdOUjRmrxP!6yBjj6>uWa1>;?B2rPTQhAC_3 zS9Lxd?F*0beGMew*Dk>!2cT^3oi00Gcd}i2NgWep0wo!d;86CbpIU)hZtV8ag;iSr zW6Gt93-x|83G=L{KWb%iVAh|-O+>A0urSVWXmDsdj(cgd8MMX@*@LWdMhdi@{za!Lcx!?g9r!yxVrGkR-zgG7E70s(%S{4 z__=cAj%+9(G-pZi;M(Srru^+J;PejxA?@#X%)=Vh@h|(KLvkp6yKczxFz)Q&49eGS zTvfzQX|qbTyLidTa$p~|HN7gCtEV@)KoG=cZ`-2}By`-1Zhr5S-TUNjT4 zoQ0?|c@ig|o?4$Zqw+_d@oxLvBl(H98c=sBU#{DkVErnRfEah}OIqUJ z)+hHQEviR!_ZJ5X`9+6Z)})q~1gSk`hR#^8&V|S2*oj{t(~Li?$*gfbWv?B4gBLkk z9({Ki4L#uwt+GaSK|2W>a2pt3^`QhJwR=T)m!xxp^QPyW{~{em4XZs8q+a?DaX_jE zMSwV09jH2C$X@u%x)V~N-wxAwrP^f2CGJIL#_GGOW%?AhHluuT9I7R}foUX6a}8n+ zPe0Ig2+Qh+Jj)w$t}jqmIa%((-1=g)I;N-wVIlo)`mNl6z$&SF`4hMSaL2Z|L>p{1 z8mlF_22TUG`7>`nDjn1IQ%WS72H!f<9FoBZ>XnG(*krL6^F^s37hRXGp9pDSozsFm zrI`q8d-y?ukue5KQR-8uyv*1Xy7}Rz6B`&*iMaOuFz?UXq*Fp zze{W)tb*u!?Z40II!?G)Lc3Z?KWb~){ZUV2b_mNzl`MKxhYZ48m6l|$AI4snt)@kP|P;h1C78hkCaT<7s-opvUD-*ygO(D#!{CC7H-%i6BN4=Cy ztXqSi*hD@MXF=c~$%^E(EpRE481x<9cv+MwDCX@@*&l2&lk@}f@dc$ALf1)5zp6=B zD?Z#^1=1k68{c{6r(lcv9e0obqI25C^$&^iL=H~J4YQCpP;wCfzQZ4SImbj<#70K7~ZuL(ay{N zNak7fUfK4Ia9*!?>(SZ_nbGCHlN;lKJ#g=M6hHOeBjuIYOTPay#B@a7cYy4L5CExx z$^*HA@P~VcsOWp?4O-{U9UmGO!`OorIraGjmJJfGy|RA-`2a4OY`#BBl?8v7hA*aQ zFF-CPde4_GrZbh3x>D+it<3+*I&+TL>!XX_uR!HY?kvon#2H?U7&nu|l`XDFqMCJc z9mMT3AA9EZBffbvc5_hMzO@fl!(IixgbZvzo<`~~>s<$fc zr;>2e$WKg#LR?q&DI(Sy=cIImNeC{GV{rJuTuFp%&(ambRd`LXzsLj-1VY9`t8@+= zj&P1hj;uE#yHLB!iK!X}N0(q%&_odG@dVudH69S27@i281fF1?vUBUo=;5&qsD@-L z;2H3?aJJwKRRqFC^X}kS;BtWYkgv>#?$hru+P`aSH zh`R8bJpKbfCwF*9ddD~Mz1ge(7y=`L@`rgh-CsJ8T$>3BfcCFG#Ww`0;dL^uUiN4E zwuER4F$vvvECE^pvER$?<$EpQ z5}*h80E7Un`$fNCZfI_BZg_4WZc+^25+Ef5VFAAksaP}`#AuLL!8Ab_z}z7pGkCn7 zhkiO3;x>Ts;P9aFnC~A}MHSNRV(cR9PJJ8kUNSY^r1%I;*ju>XDZYOHg7oaT|2olG zEH~WsMJRiRuknhr+*Q3Pk6>^i=$|oJ+^}41rs#UnZ5-h>sdX7SBsQeeHbTACs<`Ck zXWAoRxfCu+T%!1(4#Q>Z%J%`Lcp+&%5j1S`8ZPr1KJxB%I^us3H;g?AXiX=!y25TL zRhvQIVAz!sw!|aN8@)yfl{&0BGf=xwaqs#v?J92O`nd2S?7ETdD8nCvD}SZw3nn3wIwfEuk`ET2HF8Vct-NyJQN=|aE9G1dNa?;4 zVG~7%%VZZDIYYe2JB!@vN^`wf-bSTJ%l24$|E{JsW0%A#?{K!RciWT1zH;32I3w=7 zNw&;D#TOJlu)Oy%C&zAcF8tfuYM>SL9gXdvnXj`U8y>pLu>^j!gZncEmGIZRQ_M&8 z5y@*c!C%3@f?$0OsBGII+hQ5$<_nh@%wnvyCfblx4I(6O-|MAiZ88Sj*W?Y6?XDot zYn=r?BjtHl27&<^ZBhortwLkx>OEWf_(M;OSF*8b9lMoeJ=A&j>tUv3Sz^xArs(nL zuQkXAwCF!UX>;y;RJDjDA7iX1iBVRVXU-!ti2G&@svbVwk!?#O-+4jVJ?T~Zjcj-5UyDf7~vGPyRr<`|Td%N22FE4Go4y(*ABs!&U?Imy*N z#kJDOH2)!n=@zqjjMKTr$~MF59ARSGYjUW479jKN&EkxX z)A?Y*)&yW}nq_SQvfkQ^0*yz3XrsWbY4t0oNd~t`2A9driwv!+jLz@QeI}-1CWqW{ ziO_`P$h?y3e3I(?l0@z?0{b{#i>yr}tj^Rpo$zV5AUIkN?6_JNI9ew-TGu#Uo2*U! ztW9&QO=GN0Ypk~=CWo}6K&(+9dtBlUpN`iu>s^k?vDfIN`sgHlTqZUl`33uPjIBN$uYbQ8fTdYkxtheDNhk+)C zjHAGfxWtfe&AgIGypr2aG2It9mF_X!C$>ioCdbHenNNi5#60G4Jm&h4tyJYe!OU*0tLje;-x&Vb8{d){co)Di3T?oNG(R|CWG zpeGJ%{9&T0C&L88$jU9mtrwkADT~s96~>Kxo(!um`AiYa%rP&w&w_Cs>xwk{XeMn3KK_=J?^Cok z_+?ZMFY!($b^ci5g)_$@n`cu=f)}u6aQdK&z34&6R6R@0ywz(}z?g{BX41s`68W6d zm|3rzbK7KF+ab4o+h=oQS8&hnp35t{hq|Y{yS#^Sn|<4%$D-$?`=qCK8-6=_TW*`{ z`e?Se(9H<07Fm#}tx#bPYEQC|T1?3cJG3n{51t!Ika!)>n|6~Shj^MTda^zs2Hd4w z%Mfcn?%so_`dFsxWFdEcG|YGq(K3p_%N2P`PT^GOPkvU&Lz;P2rflt$s|$N8Yxbo1 zxn)}h?qiYW)lIXsjIJp`^GYrAE6#~2KVdwNJ}*EGKoEm56A+7PZ`%6f z#fe53^Jpd9qZZG+V^MfZ*TFm^^BBsdIMd8NDMxY|$2`1~4d*?JUHjs)rA$BH*b`Bh z%7FL;(KPr|1ai&r2TR7lx2pr?Fw{Y?O{IAQPe(LbNLPqi3c)7!yQH-SzovWHP-5NC zh>CGeN(UC@c+3dCS8+aV*3U<&kW~asSvk; zuH}~TOM9OQ3oPq#v8{tc)x+~?7q8nAL@}$`OdfIz%>2$#C28c6*bCiy#&sMxxDe4Z zw?4y)7N?FD8r}@rhjthua`SexGwoRpn@ zK7sGAaiHWY{xk4QKYCu329Yb#w|=;k)IfF=_arUPLV3&H+85`xD48#dv9%Kw7Q4!u zub?|?17LF|TX~b4J3&vJgxdFzHp9AHn3?vRi2!0=Et67w_)|$*mO{+OX5GOWciP*N zGymMT2ce%)h|}f>2T}X+a>y7H^_`gDyEa8-8B!b?bLenJt%5nVYLX9l^;m+~A#9uo z`^Bd*dq-w;0%qv~D@Gd+j) zDLXcQmFzC`){K5ue<~WNvj?>IjqcFzU3~f*sMCHad@+96jXjipVP^ikOklyNLPt|4 zize&C(&HW^R6?7Y!Omq)7z`=ZtjXy;eRgZi0(VV7J2iQQFLgx4R<_fpB&W#bzClkR z_1=)rd}k1rb93@&B03Iam{eWsh)yVWP%F|E9-S$C{oc?0Kzz^hSF%1$f2LfN-tiWX z{ebHWzP+!NS{&+AtRu@l&yK6e6kf=ed3n_*w~p&VWxVf^T%0#JsV`Rl52yB;vxw_I zg^TMOkDo!kbCK&WZ|{D;D!%!TpH5#qZh+jFyZczlUl>|ATGE7WuKTczt*@UMi9H3m zG1HHlCN1Tmc})|Y=YMnFOEfR-b1wkkLC{dhBS!{)R9`gKllZDuVZ#l*A@g$$tMY#>OYUM2Tm?J6p*Vcw}8oM^tF6 zeWSIgT{2i*ia`}sfCL=_Ct{j5zHUiKG{Su!(rbQ{|FVSeMPa_ysRfsJMhMYyQG|E- zA+Od$mx_lLt+y&{s%lsbiI|xlzC!9RiR4qhEK>RzU8{e_nXTyyaG8F@iF%h%Yb)n> zpQtZ~dtR1loiSQFT=NJ@M$MB(lYGK%3GvzOrdY<4B4)Q|)a(O4GQDDi#g7ROW|G#R zu89-Z!p!WAGQT~^xJX^#E}9Kq4Sx8M$yuPvy4W%SWD9NaG?Sr;z;86c-7(;y4gh)z zj@ZcAvVjUq!Gx3paY=^!V-4RCA{eEhd@p%w;Eqns>e<>}Q4?CwPti${yUN->%-a`F;hcfBN$j{#5qV_Eh(j`4sk4 z`n|umxi0ZoKNp_y#cykDD^qBo<(ewnV8}MXUl-U|`(g&82|)Lr~Xd)b!NOd(i%KZ=(QSr$BK2+Kh!XPt(g5YGk?ovAa&)?;)#gMYP0Y67qNS#Z#5#E znZ2ottFxJr-G7S?#@2{%tgIZoBrGKVE$fhQaPxduI9WNq^Z)b5$;$rUIy=Yz(En-U zU}x7O`9D>0{)hixRRRLcl2*2^X3or#wnnaI;$|iere@6YX7(1YmL#k!tel+O|GUOY z!p6qM%f%ujgb4Tl4bC&?uP?N(&eFq|-?bI9ja(bU*;vydsROlsqsg!jMI)h3RLU74 zHBD494>%hOd%x`fjE@*8Vbg%)YAJ0?Sc7a~so6(?@SjQR7fco7uNgln=f6K_h1X}| zAHV)JPQPvYT=AZ4`CN61O_Lyy4j~|;p4C`6)Xco>E^NSJ`=+71&8=5u#l>+(?~op1 zvx)N9*ewm5OEJOzr93Rn4Q)tYT3KBFs5~}(OCz-7Jh^?3VuRO)4bRK zSF0!1Wsw9jN5HH-c**(L_n&p?`zNCud)zEf=OOBaq<`+Q!;%hj2lnke*b?s(ufJ_B zghK34irfAr*6rGh4W8E(9iOHY&uwh8xge3DviJM~S+!XSSGo?kIe0MaI#(6V(LlO) zd-(J^*dh-6Vrjuf4E*62zsIt89q4@y2rtY9x0k@}t`17*BrJchTch#Oc*G%X@0NcU z%=Z5Ceqg38`lu@&*<~-bGTndJ^Xg^zg>HGmeUIp7ww<_wbg);r-PmX&wv#N}l|+81 zj!F7b8j{_Hq+mVvGTR4LCEQW!aI?uhTBu1(DxR)nSwqP?0!_3z*GyITj-CW#1nK=8 z-<4nb+(c0&D0DK!4p&?Qvri2z3C0hbNiwNC%s6^Npdrd{R7bub%~Z_yXG1z8GKjZf zF-f;69%WcyuLogITGHAFb$S=Qdn-YIl10X=Q>Z3jNd9-|3v@wwe#j$o~5oz z)ogQaOsX%o*H;g14Y==MJs~7X_m%4R;z4OYdIv7$-&-KvB9=qb7<}WJh69pwU#y zz-fuU0=1i>Y)SbqOOum>!?HSlZ1amv%lnH0DL(k!`Ybr)(VCy*M~y!pScTStsIKs4 z9Z_KKs6}p+<#S#44$%iQu64*3(;B+Zgm)8agW*3w!I@ z%Klgf^*Q@XymU9sC+!z?cJi)io#`u8@ef;Zp0~hSmx?ka%MOvnnMr?xQ`IHMd-9W$ z)pr$=W-o8)k(_2xoyOddRApD9@9{%&`j?dvbG#|d-4~Yy$R^L-9+e32N7uXEC7N*B z`^RW)V};%hb6%kk<6@6Thc*G}e=YuW>I4MWw<0axpq3@q$1d6t<5XVOfCd!HO0;(U zQA*5}ki?Xwp)16Y@j#AaJG;Xz1eC3$#WsnTmDR^O=B?LlOWWu)w2d4)he6<0V);r? zETLBJMjJ@I$uk{FHRs`UTFT4);rFZ3_i$JlHw?W1D@LtzH#Id7KZ!Q&upz^>=(Y(X zeUfO}xu?)lxH?Cbi)8mhxYn3&>N&Y4H zGSHDbKNoTiTZriS2S4#%ckY%TI7Zj2LP_9_TW21{7Cua?CmHH7(H|4 zJc|t>P534F>*S@F9)QmzHV2}vZ9f1j{eyaHNQl}QOog%*Emb+lY+Ps>A?-sPmR8}! z6^?o5Q0qg=3+GVT^oOFaznRvI{m^{Knyh+TNHWgRdrz+25nHVy*q+yWv#bbn_I?pu z7*1}aKP8toM)u$^n7Z=e?T7$X_SQQ)~ou#tuRg^F5Ls_Vo1v`w*Ra!&mTnns@5H4T}-;y6y7-5=J{oL zkQK}}Xvi^!maxOg7<&`==PzBlEQOn!CC$W-&QEK?TeYD#a=Yp#rfgpRB-S;K?Oi1* z_XU_Xw7C=@EDuL^HF^wf5izvG<{F@Fn(&A8>L_Jp9>>u+4kuM9b2sHL5*ob(Am<$#8U`s+eOVo=_4_#Oo6isE5|&yg8gM0+&BoXZr0)hIuO#QOqP9IBtdk{-{p5;SKV!ASZLzeP5DP8UU|NslkhKi_ zfNL~3PJdIY$qGwQQO2M^4ywx`jZllpYK#q@p$ztn!6iz4vy*~Qk~7X4ERZo9RAoEm zRm(nx*^U55&LI#n^tx&I9T>0&i>H=C0$&TBAm*62c-zDAsBS>HHND*a4D%|IfrLZ)Kv|VZ5NFGYfM2FZ< zs~BK^h`=!@$2kQ4pKU+|v~%qQBZdfq&Zr{|+U;CA)JjB4fobxHZ6>`xKl$xg&ObCUV$c)eC?Bc?hNnFdI_Q;^9k(dne-xo56 zd;pwIP2#MP6H%!)YT`+t*z;@?Tf`NwaXbgcz%+cTA1kD1J6BOCkym*bn>ol_P?_X9 z_VeG6I+Z126>d8d(xbdP?)SJc{v;zz3-YjiMl7rIw2mG{D z%iTUFmHMC+K~zny7GXx!vU0dh0X4J(G7HR~3DLBgc8P(6WG75M?01-YxAe|4F_yaW zP1ptYRuYnbq&uh1xL122TM5Uz4RtOh_M!TUK7WisqkCwZ=C}tbYha+}p$O*o#KU!7 zrcria(^bkCPo38c3BhhZ9L3x`=EMpxM;HF+c9gir&K^=ZO2Mv?sqiGdcD-&g@$i$m zUiY6a2>mvO4_}p(+$2Xe8eR@!>v$RnwkZM07s>Ccj@Qq7JKO41YN!plf}1Pcwr#c7 z9QGr@GKLKbQK-)r@|HYZ@>MHGCM(3Bi@zx)Z+NK-b`d+g)LMs)zz~e~G^*;d{TV{S zF|Q3nP*}7n_U)x@<-qkUsUagsO*vqZjGSr~tZ}-QWLUykM^;0jz4|x0Aq~y4B*Y?( z0eQrOCXS0+)bk@jjmvsUYBkQ#)*IqY(%L#&2`4K^oJY`@-ez&|uS2dsU2RN8R(ef? zh+!eCKg@7^d1^2L)?7snzR64^-!cE_>H?~}B%y2|AOzvtM zl#v@pCzc`RfNUrx%mk_^j7X>%j2tQ$3?Ymh$~uZRsx}IiRI&{SmXRH)D7z@SD7h%O zsGumJD5I#t;xf!n!d6buU!)8_Zlvs{KvH4uird0KFky~h?oh^2)=_2WEMP@h`RGM6 zqFJE*u*fh9^n^r-M43d9M7j7TL|us2&5U)RMEOMVti4eMEE9h3N_Jr^x{}_&C_+F7 z3ud+8kl^-XDOiI_i%N?(7-vKF$B3c_qxJ{dc|#DwwQ9=!m@W7jTOj(Aj0WX!Zb%(O zi=z?o$8Qo56!OTQVS{+l01_0?44F{aQ1oEXJKp8ba7ThY_E06%&)+w~$~OD}YM6j1 zZGMo?m~YT6DH!6SacG$9;%^xxm;mu-@TzyBJr9^`qm2OacdR|VP>-KSeJIY&(5cFf9?S(^zu2}ZZ(}WwD!4|(XtoU2*n6}Kr$2& z#wc{bk>=u`-vtU(Q5o5PCZzJg8i=1V`P9~S#XtGLyo$tix8eqR>w9q5B1c3x4XIck zP<4se6Ofu1ycuv!R89ctBttwN0SIZR;bH-ZW_SaKb)ZQetT>`$FFPU3C5&7V6wh&H zTLmbRdl_aSWmS2PKEcuoD1)eG2g#z;gQGp0TZKyW0AA>MMIBKAT~%?k zv&b)Oh#M(4(!lWwdtQKV=Fpcqnar63Y8F2#JWMZC{N zqM^YRagr>=q|8;tL6Z`|{$5jZQ=@d>)l&ITgug1^GuwS2dRIhR!m(Hc&iYS-oPhg4 z)U^xAsxmunfB3NsOcadz;`e@~F@y?&{Ba?Wd7k`qRxvsuut4Nz@ozYr@d|g$K(Z=5 zM3Q*x6%Zgw2@DzptM%`n$zr7cncQC#-wxsmAbB941kVnDQISVn9Dlf@KCmz38)`s+ z%&Gf#P`rasq7y0Do@wMDVGw^}PlCe$uut-q576^I=ug5MAHWyl?G9KU{E-L97yPr` ztPlLT16UvEJKv2%U;yHKmU-=+Mi%G^bC3a~Kjbqui4X4034j^259R?IBp=!z=^5vy z888F#h45Ss5LV7V{&w_m7b=~0{lvMc;G1|MPY{GXBCg`*`AAl(Sm7tusCN-6BXSgi zV~nr3%S8qkhVT_{UE?iV-}n95y7Urg4fGPe@dHE@5dPd@Gngai99=zbuvAq2ha8Qq#2FcPu%kB!VmOQ- z)Zn>qILXc5U$1`>=ZeeY95>Cyx#!n=$@6uV@RHf+p&C6(30&TvIp`22{*6xn++y}C zhj&s?yf3pFm-KOAouBCW{;SBWBzvqnj_eV{IkN8@*2lDtLs)3qVNRj8$_$i^gh^Fq zuY`M1ci3uM!oHtLN8Qr-gY5Q=N_}~!%|#j{Bb^~{_yNn30}-n_I6qnverIzVeSx;9 z$SFshY(4!~BP2AAM`3AQIlaJ@ZtX{Dlf>9-YU+}kznQ!K)?^~Q!#!ec_~?19n9v@0 z>k_Fse(hUDhJPE9$alCM^z)&o)KF^>n>H}S;V>}+y9Z*q$@ZmSQ$}2p3ljbEM3%K5TwqVh3Z@CRR5~Q#Vy5_ONJoVk5EuMoXNL$))Ztw~dzyTt z8TQb5eP=51&l*|*A2=o1Y0?wWeE6BWF`=e>T@dMfb~APx@^US|7P*pdvm*U>C zh?x1P_==L*2=;<#$HtsyT10!!ShI2aFCfMymsQF6dwef;>^gHdVSS+m&IEF_qtkoA z>2q#-4pCm1Qf|DOh?xE2ee>Tz9)p#gGd7ioNRX2nD_{M7@~;tLQaI%@rwfY{VKwbK z-G;J!36n`dt{wd3L$mP{SiVj8>7OKk*fq<3sO22hK!l%+~mPOsvLmrPM726;k(WOFlhRUqB0UlGrA-FH($sVXVHNx z=Vg1{oN+!oykgnO8{={b^|>SyX~!uArKc0VQaD)pG#OJ|GfrC0>WL((tCJGh%3$v7 zWDeuRUzEvrQQ_jTmPLv+yq}GlckR~7&kh>SFD}bWv}nY?fSQTow~9Z7dX__gtO5nnNm8t=2)tlw(5-M7#Wq0G+O>a96X~cVCViqig zhpvpCk_zJWkHAC%)Y2v@rpH)K(Zj}o7W5d}pYyg<%-J$P^Aufocda@&rd*eG=cXm7 zB9^-~x@1=R6}7TPuMJyJyB`G7Pg?Xr_h~A6MwH1qr~Km>7%5c8by_{vOLV#Qi&@nn znRBdRe?*@FA&Q9~Ha?Rx@E_z4&k4TNwT5Yec#0XCA=!oKCkh9fy1Ed_vL`dfG^WN9 zmCq+N1E;brx8AI8Ts}n-X_)ST=uw2B7)i(dqT)BAGY{MGqh~8q1aq3Po_ohwuscG0fa>A1*Tv}Wc zM!N7ei;Ev*Z`q5mUZ~-=7Pn7l@g-d{n&ep8O4zxRATv>sSQ(DjXLHiET8tc)Dmfb? z&UHB%NrExJzUn?Li+-Isubkp)YUkW?e0cG@qsS5`^oW?yU-cV%ov-7&KDj<=sln$9 z`ShER2zA^r6xT9ECXCNd$5&uWmN|Z?sG+0(2Nb?H_l%L$e^i#NKO_Hy2cKYEN{?gj zlseO1CZeZVyjt_Pry(pErukxtX~S)bAHePDJ}YMV3M*9c*;%yY)d;dOCl&J72+G_T z`CSSZ>s#a^)VvIFp<0=!psO>nX;)&_`ge%<7gVdJt9Vu#&^px_m6}s)O7du^87Xtl zdYPX3szpXVNT%niQ8tn*GxHCFVI4j2p-$dwkf}NecgkT1J5NER9 z*#7n{(@WpMOaBN*)m@XWwd%B9Y5cABo}0nz!W>FMBa0tNjz$A!@_BAvSFZBWPLCGx z_jvZZKaQYGW%b+OZgmb{7qjkmzSPz?mhKI-@a{~+XBk*3*w9e$Tn~tdZ#F9{J~8Gam2)&IFX9@v;_a+j z=Z{#(1b=niXZoAfryj7VM0GEy;-d|My6MPQ;JrB0X~t8keBJc@_2W2Kl1^o0A4;?t zWhIpBC2~`$NIJd#7hmTXoXHpc`QJ?JiEZ1~OpJ+b+jcUsZQHh<*tVTK@e|vd|88y7 zzS^pDs?O=Y_jFfxzqq%%@8=t?n#@AEqg3Kj^27sm54P`b;n17?^O4ryIr!PBfy`R* zi&u~J36D-@$Jwyb)^dSadWc5McE4J+RJK2=fzo$?IvTNYtR$8BslKpk^Q3`-M}Nmd zabH?9H7TVk6%UPB&cZ{k#ffN+y0I6T1f?%<}7}L|@24igJhL7uZH;%^Ce)n!tJanI(RRC-UTK z^*ZB~hzRBCYHJgfiBnn{KAy+9vrM~gODmnr3b+)`O@|HtsU?VPQm`Rf%%T*x%k{Wqh~HiC#JGjZFQRZF=I44Tp++fd;7gaAe%=;oxWwLW7tJ; zZLnF#1kDT4J2|3{LumKv^rY03Y2=V~h)e|fEd_n3(~JcEN)$roA!0{G2Tfx{(`l82 z>CJj#mL~NJb!JVuY8FApkYHA%tlfJ;Jg2}{Spbo`hp|!gxa=p zn4rVf68no&OAIAEA^S>=l}yD1ojLs4=!S1n>W}2q`iG^osa}QD)yV>qgel|?A?S0v zcNak0AEu@Q%{@LPpJa&W)IRG$eClY_<@6OT%o%y<+w^%&sK z@D|IBaQHf?zMZwnk=|)nTv0XkvZeH-807M|Ma05&O$Qssk=`oTJ_7}HFiV- z0K$>_{t5^BrOX)XV&a_u93`j4xBvni#*MC!@#$Q+sWWQiPFOGqbx5sl=%1Pa^})lw z6vu)H-BCmP;$Ni%)ydnl3>B@TDJ_l}qlApRj1@hsd-^QFk;NIRDDQ$L7|hCWHAl|H zNrXk<-Fn6FzD%~vt4KCv3(bq0_Ea+VG+O&~30e0p5PDS{L||D4W7 zT#xa`Q8c%X)x?TkeN@f8XyU@){^od31k&V^`W3P_g8-bSiY}s(k zXfMsuv@ddRk|OkMipe}Tii==ik*r0dzavG$kT`g=`0XE>+SeFvwrTwLv)+!M_>dn!ZTxA*91~BM@cq8bbnvWOiMJMYUxWuSE=J2wQIZK zdSOvCrS)S!rF`@c^k^GnN(;v5bN^=Hfq1jfFk!GAf)LHdJN3?LArm<^>(t$QZg8K! z?x!S{b`irnTULjJwFfV}uIVJQ%V8uVBAgDMp+S%2o#(*#HB%Cj;V0o@;OJ_q+_ILOYm8Y?n=(Ysc zMicqYNu66%x)Lr6=DK*EwkkVoIh+2JA+kS?h&i(wvY2|zdb(mvr+(qkqUJBHdP`vo z=oSpqw#>3jJAcWZCHY>ixf$YOZz{9F2O&5n;`)cW_$IKhAO0(7G5!$9c1xFi03l^) ztox6(DcU17DFMAIgiDhDEem$-uiEMdP$QqgBmJKgSL;RwSD2-=r`);rRjN7KYBx$tnPO^K*2Wl6W*HqhXy{%y zk-pwiQy6Uz5k@@_VRCq!Bbg~BdxmgHURqTwWKk!RYg;R}R2P5mDaNW#V=ys7&b5aD zqPR#^r+6?X;HvF>mS%q{ru8S)>m0qDJOi^O70oGfN&7hqWsBE&5F8(aRmvkC2uF?g!tG^fS7crm{3zZ$*4OsVSuyCTBP>C+{XJgVDRN z9;Yx3L>Jz6E*WdgUlA5hQ&B6I$8orsBhtcD`nA_Rf4l-1-F~M1FOeTU9sqWrN-cu58FGY(9y14xu zBt

$Ng#I-Rf1_Oo{FwH91bv)VhjIJp3Tyyn$CE6{dRbx~gP;GWeP0bYO|N^;`J zTCBl)FffFXw{pRSr&Az(HE~ZXX{q+XjPo?n<@Av)oV<80V1#Z9K|k(#Ywu(+HJDW7 z0cp`fM67Yn>KU!sLY#R?;Lr=3Hv2OY&Oqx=QTlFR2-m9K9Zs=?*?OJCv_&xNhq79PVFr@SsrbLLq`80FX`zp@}#g$q9G@#m#Q@& zFu44$JuH!`2o<>m{U0hdJdV26Q-k12sawHbIN6 zD*7h6z|u_A0K6Zws`vOBx*UrvSLf<8e{5H@OYijH>0$S6NK-G&Qp%Xx%tT7#71fgEA&zhgC37`kI~fO_q5QstJB=;=@1%oa zx_QA!qRQjf>Kn#&ON7xL_wk7YPXb`%>N%c^(iKH;e>Da*Ymm8bW)LyP#`<`Swt;R# z%2+ZU$Tc0TZw^ab%$7QTOJ(n`2-{SwE#*(~+sOl*0b(3NiaIz&QOW`~Nl}U#!qM!w z%32cU1+4hVs%M8TXKk&qk(ZU1x3Lm+!fLa4DN+h&MdcQCMCMlbmex{WGSuYSq#=DF zxx~a6EugtXXoE5O53#aD;e%}N!BRkN^0Kyq;o+6PQVC{@W;Lr+Ye!8wauY)Rz9u~U+ zOj)}GAgY#BW(AxL!`jho(>!@}uuCZe=R7Q?8zRT@V^&;aXN9-y79d?yN9QSkZUUm1 z78BjvqslZskvpB8)+}SIFKapL>@k=em(!U(H(N_pDNT^I)OqYLf9&PMPSUo*{G6?{ z0s5wlt91HyO9h9Yr`G4Z=BOQTnqxzy!P!7SeKAA+&q2kyW%J2kt(q;pgLZOC%9f*% z0@<6Ti&nsC3}4RPUe#;*8Jl{?JqT_QYD!B|V?HmL(swI%poF{T_Mlgkp7S-5#PV_J zGpH}t6A%3_4J0;nI=uJV~A&Pi$?%qGX_I&_L&10C5jBn}|kA-zsZmOmW*; zI2pLy0pP`KJKIL=HDOJs#uaSG^>j7o)Ki$fy64ojF5?KxbbW-nr-rOk&^GSh|E69_ zrR(w(x@(BJu2oH@v)kPLWn`eQaqZBKv6}T;v8Ac0ptfcDNWJoas}aVT3{aZVVv$(+ z!vmCDSt;e3zq8g@In^@Lwu_|D^0WYfrMxPGip_M6S6bIiwXa%56Q?+CJZhe^5N+zI zw$UQUU8d(_{9m1hBUW_nkEZJxm)RDZ)J#?uSDF|{TdHUTUAE51gMZi!1Z!!5X<4Sw z^9=F(-87(fY`&vzS{)LOX_@(@q^| zm0O;cm)@iUwyk;HW_jJAi<50LY(-zYFMm6TxpNIp`n)N80{u#m_bsTB2q_r0?Xk&Z05|;f2_9ZwSOQ$5_ZF*p|iki|r*4oWu z*wv^t*Y81D>3>^j{XR}~=tY&#mrOlC%u6NX7-ZzfUO0_f#!D63Zn=i7`pa?-_E%ee z&pI!35AdTxDyJ9PKBg(Goulb`Njiq;Odsk~S@eaLbGw16wzkoH*3O*5(n*99%kpw~A# z7Hd6b72Y%zS&K+20=_z}W13>VsuTo3ARM{4tAVbrK?u!OF^>|PDhKO|M61TdWXzH^ z=>jo*3-=54w1tv2M`}q0V(Ld=Vpa(A!gN|nO7cDp9ZhdVw)00MOP+HjY21$?)24#X z9a*Y>>F!muVnc2LU@XR6x=t9C?Tu+{bS8q$51t3dNYxavxc$D8z5pPzJ@ zHO4@Li|MF3)~X=XmKyiKy6g0_k;=-2)Dn8_v{(1$HiSuThNCf7bukcmG091ID+J0? zgH6icc70ik7?jr&&5cQHv2Fl6nOv?`LZSDc>`bLhow&U^c&Q1N$_U)Mk_{lIwuMHy z<3lJ^R*iD1_k8FRs?W$hw&w7CRBROsmXqmj{g1~lC$!Ukzwp}@4dC@$gyMDi1lY}x zqR-@QWlw3@XHSoMq^QRyp`Nv11o|6TuFYIVjE3%W4`WQd5vEzlUFY;w2Z`jNg|4D( z46U3@a`|nomb#sg$9-<;3A>U>#k-$kiXVl^ka9ZBclI~AoJHlce4_CG#wFwb&yN3- z9GVAruZ4UKrCSE6?tWE_lcP&OAqmoE%UtJuGm-s%VaRtyo|zGH^c0W4l6Wu;7&KJ8_!*Jhj}pA8AdGu<+a8}1oj3e!_nBHnhXb2hqf2&Hl0DU7Vb*B7zE+j#%5nbxPpRofxahvS z*g=AQRKQ~VMp#x2MB@7uLL_o+_Fuv3vWOoXwa9EVy+9R2-p^dNe}s`gwpNFG6sRM# zvu;Tq4X}Va@HRi-uICFI!wf&~G?gwYbN7M(+BNC!Mh6#MXy$;_}_2K>}U()$+&* zy9YpQB3`W7FlR!6y=vPAp@Ntru0Q$Ze;T89m5@$x8!xGDL&{CSy&e*m;H|&K4VGp@ zSRBru!x(?N>WIDTp6vx2WF=~~=jIWUMLe*V@e5g<32PWua+X8+5r|Shb25w0!pamy z$FkkXoa_zf8UUHd*&ZAg@IutB2xCUYT_WvVaD*+>D+x{|pNiy-EPfw0x$ zo5vUrz2DztydCXSCJ*6B@vf%jO($X0^)wd52m$0G*RZ+5vfy^14GPsU8G(McUi$#u z`q6_wlACNj;*)v~Nzf`@K{-dSgUf!^*I8-DE4x&1`jRL!@8CJHI}i#9rMZBli53$> z9}2gNcxGq$XK(WY&L_(S+@GsE(!LLVsFC>NGjHbqN3xi3HJ8 zEm*63lnJV2*ofZn93|rSYLs&$pkN^IkGIY0<1I5w20WuMrRinnlL7$n=6EB`z0vbLs>50{S=*N zP?nIHmeSLOyE-cK?Bf@&7x_p#Fk3zW4HJM?Dt_9uSGaUsc{a>poBt<5{HRfi`6yko zzEK&pJ!a1Ap%^B4C)u#zUI&0qiFA7JM*_Qm+3QIW0r3tR%Qvjtu#1v; z=D;#_c&$wE^6^&>VT>1F8sKKolaRk7nNqYh?IN-L!`bMX{1iXY>K2}(-HL)$!5U$L z*wyKPvkB`W`Y!I**3>)DPS^o^IjJMo_$J~nJ4=1aP5q$ufK>fH@;dn^47TAL&s=KE zPQ?xO_9BbG0@>?*5xLk{%s{RqzR{X~kZn$h3BUp69u?>$yKchzcs+O+bHcn|kNxr{ z8K^#7DHFDDi^nX1e0)-R`QKrfdP;at=g{rooS_tnT99FIyQ+`SRpGJr(@2%MyU-i7 zhs8fJPPe)E)_uA2ANB9cu=Mv$sH#NGh>a$k%}oZG{t8XFxmBVS*jkzO#aLeVVaG#F zisw{F&4xYfNe7e69muXkUH*ofuuNe`i({)jU?ckg`nsGW^S(LHGBeKTE*z~5*4#=R=?Z~dI&Ol{GB<9FNp9}cb$|{F%tgGXcIfL zzd5{Bq02qvIJ~Ua|FvBD&0$V%L2mCfWd2A6)$_59?;m{F+*@<-;VW41_o<(!BzV>P;; zCc|<==DkFY3%-XT+=C@iv=;@RF@#4gwa+B)6gSTS8T0Xh6)vOnWM#Aw2-zuV^^V=J zIJOC{)e~$jXkv+#`EDe7^6ALT1uh$)ry6l}>J}+?NOR)-E8v0?cS!}#GHOXcjtdJ0 zQ6#!Vp9>AXv#?t~uiyOoE!uWxSDww#*^6}hn#0adRI0L-5rogqBfS0lN`2I-sdFTN zRAGP+@7@Dmj)@K92oI95-7=p!wQ%TAtaXDBhvl2(EjQZ;p``{9`cv#&!Y^oMffpEw z_4(`o8DLO2w{raCvFR&38RiqfRgMf36U46@7|DBv=oKCRyNAjA)Qa}C8$^8!Zu4Xl@8LXqd?7`bvF)j41RXfFMeh+IhTXM+ zA>)yQaG4-C2L!&-n9m{+vE$>r%XX}S0=a4#nZkq&R?6J4xWPWm|9va>#VBLhC>Zsg z$HWA6A(Mhc>Ae-Q22iOvNd|12Do619dCag0Zp3|v@mQExKnUM|P;yFviA2eBg;n+y z56yji?v^nJ4YPX>5VulVKK~R;+Q1i%)XcyVq|H~3X zJj%dLfATtNHzC8(=?-Fc)PVC0y=GK`if;Fz zpU84eS)GR*K>B08dKmj`u~6@ve6BGu2SvT!1HwLjCTbt1e*{YVwCt_o+HQDz>hl?$ zFg^q;f+z@Yd?iN16yJQa=;kOo`TYS6d%Np6lq23UK`xwYsI8CiWv9#%AtYfJ`xkc; zGMs+%*{eg#TLEERKdQL`tlZhqsY~xa-c5QT-ag$_u}aikRJXt5u5cI{m|WWDj$`tw z`g%nM!{~*=uOaO63>RC6FmE5W|Y&T9Wo}6SQV^cL(tJu?XnH_{lw+ z?g-?ZSnUuCWH;H`O5d?|M>oUw5;9qc^-GGJZ-&BWRfW3dz;%H_D`FG(skmSU@V?%W zyCHZY+3nGDs%-_Mw%~6}ClGB4n`m`LzOL_@_Wqmk60-+RPG2WTo=naV1-7g;ukYsR zC<2B7En=}@t8%FcA|GhzQ!ZSVf)T9%?&+=Z;7*5Aqu39DdnG4Z0oBz>buKU)k{Sc< z*wgMGFq-rvxI}{QVrIRo(0TMnfFG{pT2}P0L_J=EuTJ{?Y2Q&Xt9?R z=D-d)Yoi6F!NSpBGjKlJLD}aqN^Fy*u_ANn-fbeN*eB|cyKmLaLno)&bDcH$okQm5 z%pD!PD=qA;Y`8-R{1TX{xaaGhTD&XwIID%7`GszQbM5`+C#(8do6B?BwoO%6XSS!O z%sidZElWBVHuS5@%X5p}CGFKl#dBfa_}OVY8}^j@d0p9%x$^K!%ku+JT3hp5ixI}c zMp|B4S0Ro@qFQ!Pgia8M>-x^@&QC4u)fRRq939KfZZ3~ayZ2LE*5XRvZ(m1E zb!qXY`HAiM9;C+HR``IE6Nxw_LV!0u{SnpXqF8@~KBjE#sYQBHaeHw-{>a(huX_cH0Bt~bV z!;6to<(SxGbKYkrV*3+OX)YzY=6}0p(>(KUW}buwh<)YuuXArGp{51Tas^WALWg@g zF|0$80udXYWx(R3eyxP>tUW(}Wh75%Gr+t_e2i=ek_W%)c>z9LvR`}b0Ut_A9)8@% z?r){V3SZj?7rF8NAIM-|Q*dw(t^a*ZMVcB<2hx@Q2V2aL#+Z%JJTE#+f*-_Sf}RzD zC%np-G>9v+PcO6$3)Be#(8fKX0r?TQIgEPjQLZW-01I($47aMJc3`(PV%rF#=c0%L zBWvM+1j-mrOb_Y!Cvc#R*fzs=7mv_5Bi=_|&hV#Xu5E81z4&(V=tc;@Dt;vpKn&!< ze7leYCWAQuSmRm|J=BP=%16(Iw-tb0Sll&3zOD9tO;!n8djxK=ZbmP#-Vh9fhHB)U zOSXFR9bQ-4aVp7G%N5J5!z043y6jp?dMFp2ZdY5z%@V58W!=8s-kX*XmFwDyDof29 z3=^)>KKyne4nrgxOEooC7l>n*o6fyDeo{VeALIwAF;~L{8st-^_3CFCZWrC&RK7x$ z#?9%~kF8Z%>hl1rl}q{gAXO(JnG|;BeD1MiJjpkETuEhKWqxU?xz)u^>nvySE?OsKfr8uNHR$C6^)7Hq zSN>asZ?k*U(ff`UeGEHO4|XUA!K^!b*$9eat|A9rG%gr9Q3CgWZK)K7WNxkhuMr6EAICUU+AXI}S!K_Qys zm_I@sz!Bq?2qS*d^*1j6kx(c`vyzpSMgI)N-|PLcL{Yj51xes#?WUcz@1S}41O+L| z&uh5+Vpm|o86EubEmK$LCe7U?O2xUpHP<_*Wg!xJ-+f0?=UxW|L@-3+VhZBl)e(G* zD>1T6!R7UWm$*2!1wx5`R`0F#_Chu?OPcnfl#L`ULX^(;d_eNi=pu0TB1VCC?nz&( zJg4l=G80Vtzo=_TK?N!8vhm^E_3*L%_XT;phqwcm+Q)v4@Z7R1F;vEkiMqkYm%+Ku z-i<2@^|?XMLZCPkFk_JQ;?{=u31%B~CC|svKF`#Oeqcfcm15v1fQ@>H?Hs1YgprHT zxeI3xqwQaRlxa`Rc?G|ak%9{7UD z@m7F44pCM+P(vQ#B~5yrnYcETm3cpwMCevK=j(A4W%Y9bGLKwu-EG9r}jN6 zvw!U{X)guW58Fb@_OxUjdB|XS1Xg=~HN$beNO2NQ>!GqLA%vqKC#G6IWQO3{z6X2O z_tH30)P*lTs(@hme>LmjBLUIQV}t{f-B>NXT-yI(8j*FucE>P3x<2QW^6b~sk*JH; zrR($d=?0V{gPz(x6e4qXIRXyApMC+7cGxS3nT zz9;UcAAe~7VD>o&aD&4I?0x>T;pk%tP$Ayd6|RfXC+$-Wzz6q&u_bGxjtZ$m_;oKn zOK5EnzzZFNx+!yQ7?6qrpxK9mU#+2ZNA1&s+5Tfs*arrVBo@L?ykP{MOa6?~Ck(zr z{EQsx>P4p5+?ylI6su4CeS{@&D;N%vs+L-kRT+EIK7oKtm=KuD-)*zL1bzJAjKuEb zedjRW7s)j!67&X5K;#~``(2OZ8Lv-|h_CMx_J7KfJTvqmaV;7LxPIoy8-cUJ93dAy zg|2G{9D-MplIDlC1noKoOo6K*gGky^b}baN*{frBfA#(MeIxL}M_|6cTa)CtFiji# zz*gxyXT>02TvbXs_i+=H8P*$yG#Ej?kSTKN}=y(g%Lv7DobJxZG6{aB+6fHkGVG>x38pfFhGq zpibA&WZzO?@XIi%Ks8uVdm02zQaLM%SepPu8dysv|d#!q{cz`%R zUq-TyUDvJeG9qlSYrSgGv+Wu4ZT&!gb9!6>U0Jx&i_^}fm{+fR+CUqjrLNjuioEra zy}ppWK8R>e$QC&5n{VD|+Ths?osYffC+?en9E$LC(Z&njg$&-@fh6y%!w-(Pazz9a z7r(Ydcy{7NRw(8=6Hm+%PhwF{Pn}Q?89@@>x=q~i3|`dXzT7dx&kB!H`*$AfA`@wt z3o;1DTHwAxIqFQb#zxkeRxPR^lxIVKM$G4s!iO4fncY^1&)z{5N!41-oHCQ8e31@{ zv9m~8l+$82viar}vbhzC`H8cHlS9e-S29l{lrGlznh6ojjHK2Y?r>X5o-rO5ou77S z?!R}L9&(6nr_XO7xdy+!#|87Q2lUU5fb8uc&i>B5|M-5&^*ddo03hx9ytwjpq>>%U zgrn~AdgWl=8+r}dWzlGT3@dseex|}{Q`&d-nnC0Ja@XFC1n*Yd)dJt9ye9oANO?`{ zAIwST2AxG|pWfU2Gx8<%+d?6>liOe^`7_KP5(y*=awQ9Pq>Iks4$Sd-4D|NzLRwKy z#UzS438ahUV94hAW5_yaV#Hp&QhsFznhhShKOMWhvFiW#vkt*^zxvbgnMNCov%lJ& za#Ng;g&(fxpW2>`(mDaLi(E->@J}_kO*;G5UZ0=h5Zm#cIxWyU3Bei~(|o z?&+1IxGal)Y_pC?Uci@h_AvZfNZh|go;goE87AxCX4bN)klQ4_T$6M*K(QfPpVf&I zVT>qr@OINd@ga)8+z5&zE){=;g_ls9>ncQolSq|nlUw}D4!2mwjhulLxdP(==4}J*{jLzp43kLQx=wtp1 z)){3O)FOC7)38v`Y!`9tm^_w-P%)4SNhqFFs)$ofMuWK0bnh6Ds4=EVA5|zx@-I-P zNq(n~RQAU)IpAl5SKCAMfRT%P?`#B>X_cI_#OYvmGKfEuub@Le6(A@w!`}F{C@FjbzD|G9d4vgk+Ps;HQ&hcjd49}55DM<`UCo=#^a`2Dk zQ+nKsKxRfkAc@MZ!6anSkhW$8r>JRTws%8gBu&G%h42UtQT1L0Bq&Pw!p#P~ALpwc z6PHW#WCc<_U^B^X*TKo1m-Zc#_prft7U!V_@z1ML!|L_ky%9VoIJY}PfRaC{K9CvW zx0APn8KwOaKOs{cpZse_Ro7tkl64y37!ntNY;D@pDUExu+ZJ4ULgLp-tF40Y@6QqB8&j2LpUJZF7b=viy{}59)%tx(gf$I zV6gaXB9?a^ogzk`xK!ahd}uO9u}ng8WpX9?5K6N)rN*Lxi+Y7>pryQKnXO8zRODh9 zPQ0xAjOt8^<)YJ~Q_;{_`&s)u%7f@R`Mlx?dW@ZvpFQ8DHnk5>^ZYe{@2F$?A>w^2 zNlVc-@hj<8eq8DHtt#1t%9Udj5L{5t?`2Ji~T=>KSV8z{Y?B+GFEq1calsW ziQnWc3_{loOdxpjimDdrJ47v(Y&v8t7T9%^R8M8M%Co9UPpG9jW4@>>AK3V1PG?!~ z;fs}K<{Oh3d@=-NxJ74{oRV$%^=8yO@3CLui`8<=XOx#u8t*m6HGN_P#5GD87cpth zU-WtuIZmYS$;ZXIRBKgam(K4|lNV+^w$Kj#6=afiF?PkqU)DKe6^7DO>gW5E%nLMr z+uj8`638)M8#+UqB=ujQ7ctk%&K!6ujyzagLCE4SCibe3KWa1IqiSWB(hk)5MF$tW z`L+LW|G{$%h7cv%m}U}3zN8a2X#9iF$APQ=1*~I&6K05PpaRz0SmOd4nK>XA)_Usw z0$Qc!7K+DFs}d`ewzWA1?BBZ(KXnxt@qogi~odi!gAdmb41eqsn-8+U*b!+i4@zfbj=5V58bulTMIN! z)?W9?GZ&nFv=Af%=<)u3Cb;N<-l^pO=T5jxKn9TW3cL2bAlmb43WeFJ{NUXGSiMr{ ze`_K{-mc{r`)=&e|70mB=8g2|m2fR^;Rk)Sl0)Jh`^dKE)tpPzjrqne?VkIn)c@9e z482qM5p~_Q{y}uGApYxeCFi$$>?3@?*B4^sdluW5|6@SiWMn@}&##o1;<`!c?+69M zLqi?7(ofQZjNzf84s7iwsfCGrmsJNoeMfMS0Ajk^lDbjCAkj}My4#kz(cB=>ZDfp> z#=6nFvM;12wNaALQ4+h4Y>b!4x>2uw5~dJ}Ga{G-g2F3fV+P+>})uL4_Igxv^R zAzIy3nKUKJa!~O*boh4F?-YM2>`;u|(BnJ3`F3ArQq_##q?{6@ZqCy6B$}itceNIS zQkY9&$-ll`paPjz_`At)nON22cXN#bocabxoG}b@VE%7X?w`>>c(w%Eb=9g(e^UZP zwnWO=Nm75)TgVI#wpoeK-=q*fqtW2m6scw<_P#4LX_Bf^%uBd#DOP!Z6^KPZ;!tCl z7rE!fqxg$SeJA+lWX!XpM*d>s;5gK{=0)Cl@v#141W0U(-?bmU;1( zf3!SUX8%6?W@Z05wFu94r<|8gQWURP6z^CRFV#_;H)HzRZE;aZ4K&+$=`4sTqvQ#T zDTq9wd5!oq%ya((k8U)?#jN}H7wS+Y7pwqDOoY*X8yB%`gwlQ!7wL3_(>|b$h#^9H zzoCt!Cc^e^dpH4p}7px+_ANpr6&WN_pNS7$=bfPRDUkhh!Lm6Z;cuN*&MOkqeG+l zhDpVA4-|A*0EOpGuDH=^4G>PKOY8$i1(ijAouyyIx_5Ee@86S8LcpWI4~XAg1xN{3 znMXuxGxdu&S+KC9|B9KCM#e9VhV006)#&y`bI#+O!Y_nj&C82r8DO{UVb$#I=`V85 zpFG&mZP)`Oec(9>IB5cmlsRYubNnTP{u*|x6JbOa8+NS{aYV?FXm~SnL=p~S+#KXu zXwQ^|dl(Yr+z8(gZ%>faLqBwh(DkAC&@b;j#Dq0co8^Y?Xa?O|-9)QXccWJhTGoqI zq3?vd_^FOKKfV|WC;6G`11dt=&xjgp%_4ONAP+%XF}Ni$GJzJhR9ks zGZWCdho6RU)sN?nMBn>%{bMrl<(io8U$8k6MxWs`R!Xm;I%Mw2@sB;$XFSzj%xiM* z;Ff({b(o!g8g&FKeR|CpmpTHDUa?j1v1|VIctU-+cWA9W%2i0gLY1{2yat${5Iub) zkSJ~=A!2W>{V(4@VS6n4X$3CGkv;INASHX8ix~|rsN=n#+CkFx*wu3?ZICN_knf0` zdq`~%{~NjLWlxyNe1PCC{0VP2n0pxQfx4X}IFk5$2f((Q*1ApspxaGtT(af;CSrK!8^F%nB2|6 zJN!2^9wsvP7&B;!@mBhsrKi94*6N+7r_b-0^M&voqi5>sy7Qf&C)PF&h9QV4h?+zQ zmV93XY)+n!6erxYpxvBYGxD;a6^mRfl6fEfrX!7b8)IUSa$o5t{aMkQyd@%ckmIIt zmBBSE+c0&N-WScTrLBti0Dja4$XmjZ2vdChQ#Wjm+9Oga%IP|rdYqNZ>bc?DSW311l~2ITQg{l zrZgm5({GG^i*8yo@{YzdB!cKVM{61~K@48I%W$m%Z4ZebhyQWQBPKA8_T7qdiy$Ts z0PVSjy2-@HQxB=Qaaz@Y>fEw1si;7amx?Sh$_eUQc5an!hSD)kVCF47x6X7N>ml7s zi?{f6%GEgcEkCzfH=|(O9nkwFUz-SXJQhgOuI@{hI$G=){4V56-IJgq|R;@`_geuY*-`^SY){b#r&)vc`Ee z!m^lEVzRZ5olT2xUCXM~#TCfq(bFriYi`?^a${#B^Md{{_EGhX(kt_$v1@DF;Ht@O zt@A?o(cmr2XHmeEuvKoA@o}m2nmGLi*9$C>x&oG=K zJIOZ_Y^htDGEW4KQXD2a4mWRYA)Xvs3*LgX#;QT84wW5Co0MB;wg{~yZ%JAc)fdYS zxvu9O51X&HV6FLYQ5&N^%Qhemhs=(dcFD^-Th`Y0jeeWc7Dug4o6Qzm-qx;-zKauA zN8V1I%^F+Ir#i1e{xbrn*iOpL#7pX@Ca+=s^Bkv{PR!ZMqloE?1-v)=PX=)~Dc0kw zaLtn)xA4-ro-dMw!Mi%OreE|1f*uQcqKtEJhy^2FFutftL&{ta>njq%c;Bs#`ieN6N2 zkBWyvmo%sIDx2FVSAb4@YM#au2is1~%2&;AJmY{(dFV4ka5hx_UYc3j`;ZT?>u2}% zP9ZPP=;0McQZ9zikGMcx27yr2SPKTqi@+-$tRI8t$EXYY8jVKa@Bra04$x?T;PI=~?_zQf3Ew!0g(m^&27ltmb_G??b-=D(ZpZYXo3W(W>;*=lMNOttxOXG={$*J~W0m zCslGhY#5&=KtY^0iVH(=@+KZs@z=UJy1uw&@#5D13b-}kyI=ugbI9PC;g+1flVvS! zU+A|uZF1D)waIFc#f{mx!?9#d@aHW2KjP@CQf*)dI0?eA2U3r#wb(Y3A-GZdLAZp z;cAOl&T?<34R*921b4H~nWyIuCbpkS&5tiX{LGQ0$q1DZnvsz}eF$u-QDv8C#3 zs(6!3`PcNR2135)BRH93+x{3Jr$^MLJs)>LtZ}I8e5L@TW5z!9PZmE2ife~=m9&1! zyYg$u>YSNfJIAI+w&7fQym*Z5VoIi?Nce*Pys2Qr3+fcnFqiZZJl?Z`H*Rw$Hm%Gn z&oI+zr&ITIO5h~DEaL7#jdaL(9F0}+QSni9TEK@~V8*pn>`_UX2I2u}| zZ)^Z$Rb^`+X^2RUtqQj`S|ZsXeOqme7mgRM5(SzAxva3cvAL(#MW&_nFUV6Px~S-* zeu<5bH}JeEE+;jw(kv-G7+R?iF6CA$fi3-Rkwj1sR&T(m32D4~puyRw8ZKi7L9wew zn`kYHu*ZLtQB;muS7ua{G~|B2>`Jszx+-&9ow1C`7-*^Vuq58dPpK%jmea0o)3mN_ z?$sQ9)QGGi2mNcFe9gdbP}QVe)3|R5UE@Blv1vFzv7v47a+P`GXSjH}U)9{K9ibkDHQb=?kH}k~9T?x`wyMK9F|=%4*RMOZ=HZV`gs-0BJ0dmJy;ES#P2Zu@ z$6$j?51t}1RcK7p*rYQ^VUtcTo)WoZZj91crZY?7q)m%$#J8s07;fjoXZCtu);Tn={atv>PwIA}Vvx(*xrnTSFMtT|Hwg1^e z!dgcKS^5GQxScN{%=$as9yJbuGcc!+Q^D)?l>Wm>5w=+qv2X~sO+y};Gt>89um5_R z$1HfOyE|e6la#f;mgXyUz4I(gEBNbL{QdP$_>d?jyw=HkZ)FUO8Sl{u{^adf(=}pV ze$vFioV4-qE)E#SxpiCHvs$D^+bUdOeyJ&e!N=NvqZ1(w`Kyg1^$u~lGWw|b|7G@< z{3ZsCu;w#89?axJYYi+5DYV4f1ngVGZTuv+ST09<=n;vivYOjeg3Iu-OlBc%5lBO> z4A1Zk!#o~hw*3nX$Kq5ySYooR8nQCjT#e%#Y^#T{`$-`tctITCAg30$tZIczW!2e? zYki*OLg0N~-~utfz6J^<3d|muMo0&VgWls1x<)-mg9VIPfpgyyYK4AWFc8h`p#|ns zjei`P_K<`vnhs)rlW5+)Ut?+B1;|5+s|V-PY2FveBUXDlP{pQnktyd<4vpWLQk$&d zB^^@JNk7VKjNdjq>*$d5ogah^=b@x47eI@7ND`H6!y2`1{}MlJDAQ(0rln7&?dvez z`uBHP<{Z(unB}O5<){U2LzK8yXOkiNaGsoAh@4)QT&5@?1@ss`ewcp}%wjJ6tO=sfr=iwZu{bdcAL$ZklTN74*9;fJyIV50`1cR2w51cFci zVV5uRr4O}}5yIfwatLo>dVTWde|F^%RRtJz8JfK~_gBRR^smn&yS(>zDhBwT4?K0~ zq}dpCxP_T9_P(a|LANKIgak++XmB5bPVnF` zBrrg5cXx*%gG&-T=-@uMyE_R4W^i{8GPt{byzk!KyLseE+} zk$eg3f+_TmqVva}db04|g%m;OJP&3CjMJZ2($=FnUf{bA)@n=V>~Gz#XA3eB>ZY%$ zRo`(X5JbsR2^AFWq>_r0v-$}Y60D`8m6ix^%57Xeky;#?# z!DBHL(vq%>-}e+ftHnrqvCL4-4@TR6P7~M+#BWMClkKb>mi2x$uc~#gXwrJF{v+N* zO_OB`=dkMSZl`C}b7FY7smbhnp0^dk!GjiIvrZRuI^9z>gSKSWThFZ5-nINHuhQ9i z0a#eI@}SR1N3On7gol*5GM{X!>Ku@`CfXDeh^i>%+*ueX zn}f11qwjQ>^34Ggs+3mScyQ1&r@UlWkOKWmcYYN&SROkK*HuQwZ; zM|k!CVc+m;B}iRn`d_#(#;AnV_-ilT`L`z?KD zu4BkR{>_B%pyR^O4qHc=-uBp|l7+w|wDd)Neo86pT<`l2ZfGC6B@4@v3$w)IcrC)1 z9DalHTj_K}J%IIbCp@+!8+Nk{aSi7EOb&2RUaRnJ_z4zs$52&xbKT${Wx`zVSZ1(C z^;nq$eL@}jhGQ|~Mgnub^9M)Az9%PfRlh^!!NIH)uPAGK*2#riRSPP`^uZkMGzIQU zn#qM1#Z7_DAXWJ>ch)g?`?0I8++(5KW63XT9#U&jF+Hcyp8khda?3`jJN)0x#8`-o z^B%x(pQj#uc!crVsCjE>nK7%*uY}6snQ2Q7p6tAtNS=M_xeBg5yp}BKAMAF76?jYv z%4&=b<;BWNqG~ycs*ZWomEx<_n(&3e^<0(+bq;whk|fc{V6i_4))$ZsOcj zhLx98ge9+KC)JUKC9VRJc69~bu{)nUs&@*VC$t#))>JL}SSH1hg-zvgQR>Ya-M~GC ztHc8ZDe24HO=j7hSaDjpxSIZST5;yCvT*sXvf4ylVa7;ZA@jDjYO@C9X*Y=4 zxyz%<+kb43N0s5NhXT^=r%lC&cl2I?d-7g^PH9m@0kRyv)6PHXgHe2&pn6$Wc%Y-Q z*pSt1VOWTH6KPd(;@@_)bGGzNit;7mfomc$0=`Sc0QQF51%)mb$sGRJ@W&%Ad&QOT z0cAET+*E{f`*y;@G+E_BU|ZSxFq{%8S4|tY6R*0NgN>3v32zVgCSbtM&Bx-kAEtT+dRE=miTZWv|RG&!JC+>e;J3XTeNCnz=e^(Ed-VJg%wb6sNQzo)pq*gFV&^Br-Daa>zqEtp;ZN|1V;Q`pDRI2 zvEHEXSh@XlrC-(MjMu);h41aR`x2j#hrxD}<+{V!OA6JjE{auRd3lD0Sa|R5y7L>p zz%+n7M?ru}pOi}Nhz9PMQhxK>0M!uhLH?4oNcbsOx(VbBe?rgr?03mk@Q`;R`o60)_0J|4fT#;u< z31><1XGy`*`bp9mU+5e@(K&#tAhBEFf!oPe2>FR0ha6=uW=*_C{T!wroiQTn>4n5d ztpz{XNh-dTECdM`54#sDz;?LaPFiRe54)(sUaP{ki$Gk8KIXYM1K{)I4HydGSB9LFge4kdwm^%b<-!7M5X?X2`BNdr4IRF{5<#)<4pDMt!e+l-h1U+9DLKR zlPc3z_L_2#iwI_ZoWJ-mr7U$Z{o&oB87N%myQd*&2O?hBVT2jF7JKFgc?*zB^A-7U z^D+1~<(BmQ%N$ov3zmqN2NfeZ6n>zHc;$^wiZsz|+7H6=_F$7Df$zC5+9E#kydPhy zq-5uP-}MDA#!#Dj9I2aUGjRCV?fH?{jj+<#@L}-fNw&y~fXC`b&Og^{chg6L<9Alk zL$KB1Z!aH(ABDdh4xb`FqYCt^-t$Z@^M`@PvT5xE%rWzPAgQasYZ6<@#3N__gMBOE zzw4)t^P$x~SN!j2Vej7@$1~nyiS#l#`D3r3fL}a5%cEMg(0Yn5v9_$juEE4PWOIVS zNbv(^Q?$J9bu|8bw}_q%_jJ+LrR;{0?RQPLw~hfCTpkdHU^>b7g{7~lK-a;Yfq41@ zxc*1@zd6(X5}o<>;ToK(Z;i82&-S?O-pA2a2FTiE$%pT8#g+ZOP3foa`C^gt&cp)* zRd~BdXPqJ^?jZt7iVA5mhb*D=8s4iLhu0E2NT0H%X;^d{uP3|H#hJ+b6?bQ1|1YL$j+O>WY>4&`vyyJhkRZP11;^^1XK_RW^6?L%= zy4jcK8Vc{CMeb4_vKCG$?5^KqP^u!&<#Qu=cfr44<-ha>Z=S*jt&-W!-2Q-%NhRP3hzb(1?l5pS1 z>jZv_-#bQ-W5W44V};mGoQgcY`BA-+%K3e&jGJ=IL(Ml|c47%b!DMt5BY}aQ(Pha> zO!bwkL%}aWWqdCd(1KT9Mn13jCD$07^9jS`43iIQAVNO}J@RFLm2{~cD_MghG!p@0?ozvB9NFM2F%F6y>@Jri5%Td>%bX^WPQTaV ziFeSX$UmN|mUWUw0)12w+m=sr`>57(V3!8RaAScfs^D!tKcW1!``1bGpY9#~vLBiA zD{f-h)7KSxTsWuRUlwFyuRho`Q{1RjReas!7%%b}8i53t#hG;c9u>YIBVWvc{Tgi( zA-cu6T!w@GR594WRp*~QU5ay-blC}_-TZ-f4<^E#7?V5^^>^6pPRXSzZtQi!XB_%? zt>Lt{t(sv{mfZBKh4+H4-xgOuY`;p53B7UV=DNLUMgBO1zElEFytDg*bDXb9&l6_y zHv`(sXf(kgKujB+++)$Bz82RzXltIWwfy9&!)cLfkTO5$4E^%nB3Cz8DOb|e*3{nA z%+zf?rq+~FONl4mXC2IMP!7Fa=iVstCO-eDf->>$9I9P`Rh;T*wf3f4Ey{*~m(N~% zA*OM?k=D_z6jPm}Bw=!h)LgD1K}V+)pJzsEPuW$kC02OMcV6Gfp=Q|dWjY01HPE9gs7WYavqv3 zMXi{(rE`JMa+r9 zGn%pBFEE^KnOrt#JIz?s7ZJd{Xs!tK0elmTcm_ZT!9<6C2czPV5Ci-}Faey5V14S7 z7f>QmQ+DWYFlsO&46OhD#5e?#hBE*WjYA@B$_UM(;S5Kx1G+>_`Jhj$#RntM0kk3E z=+MNFR`7Zr4vCcMdnlCVgcF(=+DZ<{2?+;qGJ_@XN#smjLc(b{gAhqL9)SoTSpPM& z3fzN7LIzk138&>`1^3{SD42SO$Wn8DM+D#`e=!9>LupQ!p&FsBq=48ESpX*!m>xe_ z*3>CPmWDGB5r^Xufv^GVV?c|*`FP2sfbkGnT22;lK7O*isb|OlHK#u!94A@aln|;# z!x@Yq2RMta3j+>uJ=nmTc*!!R_93kb>ppI05=Y zKVU-@LcOKe$8oOH5J)fx8%h9H$I-$C7~ow0KoHTK5I`$Jya@oyIM+c4UYhC{gfsvs zwypzM#=GVLtK({ko2rGB(pRS-c5t+0Oj)3HG}Vy^0l==PsW9{$JQa*U1(1chXf)K)h%a8>O=q>mFzllm_Y>Ri5es_XG){5B(OpHqiNJD4n6a(Yng25rgU`&8S zND^2UU>*Yg$43;52L->!#oG3tB^K5;`>4(GDQEJ>)0b?{B!Evl9h=UwC;#2xOk5U8 z>YI-yDo}G<=J9Yjf|RPF&s;p*Rz3MVgEK;MTs#oRR!3I7IeG8(|lgdkS739}0w1 zP%?#6&@$Vm9Sa*o?G*e>B&FD(ZSo|#BRf1gs`6$NhLFCcX_F@e-1^@X^>O@{;7D2Hr}b&70ZT5f*I4ZFzR zXZ1gzc{uxcgZ6#-_rfpl3;&w!`mpLRit>q@G%85c#BbYdT^IC4ZA`Wfz+c%Hn^!;% zN~iltrqh7-4oV$8QftU|;bb1%i%Hbx^X!k3?Iwhq%i*a78s29-|Mw~M1s?1<_~~du z__a8~VZ`YyX`}poRom->BohjNWq|6(|nsDZiFJVe&S!N16_|80YJ>z9zX z#AbNCgY+`_cpabzV<2H#x(aM4$h=EvjXaob+2*n_riNI zpH{MErkM(RP>5CW6w_w>b5!I<(sE^TEbaOBL+~S|_C?x9lP{q^%tsU-c9*yf^Ro}dX$<$Ng9*46%P#_qcWz!Y-shVSwUx>BZRI$)l1KIG#La8 zP-6Ug>a)Cg%X0FhElxqB+(6gwiDA&FRVhY;PrH^yWBRRxWm=B)L4q77`OOx)1VH=l zFC|9{qXa;nc#3c+(a5s#4bxu~jhGRK`C&=Mlu$ z>e_B&9b5bWi5t4{4#AeFIg1?PYw~=%X}8~81K7E1+u>o34pjYQw1;9giZi>K8N^mx z`FrB2`}Eq2RfWzQf{Fb1 zb<{U?YQiYT*k$`pwj3DuQAhK0VPE%@`j6l2ezmo^A**}@S3cTya#4yem)xywIjr4{ z&(yQ*aUm<42_(RBir?bUD z>#{6K19`;^Wa`J$|+0pmut%g>`t_x6`* zEp;^Nm}N!RaY%<-yQ6zM6S3H=3n+L9`6B0m{65gA3nl}kMlDMkpwr<+JKYlKeOue_ zKjXBJ4Qv~JdE(FLkOwm`2^Kwxu)MaZF8f~KPCYs7lVE62y+N}r8)t5UF_TKO%@`+9 zDH8cw%FECipGjyV7kWqJA(%`OTaary6~1HcJ9`)kpPg;J;CAu z8$8{tPGr0sdhtS{?bAw=y760zZg(zGJAI|ps2m=LYu+m1xkuoziI97}Xn}*$4Qif) zvh?to3g$3lzJ)-{^r2u?KQaCyRtB*`7uQkgol%B?keMs5S4!Jp`=+9E<8{pR=f>mV1h~$ zpqiUj{>9R%4=PZcp3CvRAjc_yo+*(Q?x^ZCvhgt{+Vp2N-5*0z|70Vcc{}=g4tuV7 zdaaJjn>{oxv1NyI;fm=)*r})f;c(5=oQ?4AwQk0I#;i^EtH07~eSaf+D58K-f+TLt z2k=JN-#vF)5C9|qqHmY*w)OV*F1zYq+x;^g)&T%g-OH2|Yo2QNF&5FBJS0|1L%Meh__TUMHSwxfOtOz_ zTu*+5ZlRn5I&#SClT`z^z^DbA{1WGmNX`f+$e69&(l64&91jNg0oNg~S1NXLRMP$6 zN9RJXgj*dX33{URkTW(jgLF~KoIzSz!FyHtAua;e9%pZgZR-J1hOX>RsQzl%pa&fN^O51z%B2UQi5cYiSbk&>NXlDs3#ojAFTR~LT1 zt6itGQPs|Eg-zk-Z~K_K?$aUuFu=Bv<{(yI?EKiVz1QaLYqurP5UDc6c2=J7kW)W^ z2^{i65>DtKr@DyRA^ewe`ia`g05JS(4JfU6`+KK2#3%qB z;w`>D4_Jn*SKJMct}@F{RhFp=LTZ}jAnJ?B@s~>XNjU{YS&wkf2qONCY=MVOe$PLX zCsg3ad522h$wU8eu6(QJfySdpofNqNLUWz$Qhedhyq@2d&WtFXGx^`&QNcF(uA@d8 z2DCQOHiev_O%r#0x@P5E&I4teSk4Vv#tA9#!Dx@EpOp*hKh&n1$E531w986erv|6& z)GbO{Ci{0x&TDRTtfmxg8o9^%D)k%ab;e6crpR~2Y})H)ZaTG%%Id4jbb-q!T3%%+ z>d9*M>5+=KO1Rv#tYL)}1Bj%V)xT7dSa`{-PL&8601TI3b-e}<9C}ytWWPR$uYZLO zhk1Wq4+BVs+?61p00TUJ>2=E|ORxO{r`S3W0RQQ&w$2Bw2=&%mcZcGG<=^p3tuKS+ z37-sHCcq=BQ}L6%_hjAHBiz74xYy-~OK>~e$udCX9lyr9BmfzDr@7AX55teq35w_j zh~Qs0A@Tqk$zE-p~QuWQKnjc7TS^ z0U|BxYQ=R|s34dy^el9N#VHEW2Dl7eV5`GjN&_tbXAoZJAh-a2cpmDx`v`66T$-_WR-xfIBut#rrnR(>9Th6+ z=T35)wa<}lA9p~=X|o~T@)NI*$Zh7J(SfhNHi_1iKXNTRWA_`v{i=lfH9IemJ-GEz zq;SncDhqw<%7Zu6K4W}zB^6W~k8Q)%cB$cChHN&H+rWK~za#~hhpQ+{dR~wm!0&Rr)cL=qjs)nxZf=N6e>pr#x5=#}O2z{vk zjC|A`*Zi>fL;OAz%2tz(%b&c@3s^Svk~Za;Cj;m3-9@)*!2& z@9{1UPwM6o&9a2>p^zpe4C`|A+Vp+j<}(dSMyTfyZQny;zoZO!Me zQD>xQly~*awk`_=fS6IfM5BQO)igj;F?EDQ-sLKBwJ48ipe^FfZ1jJpgG!^{YSEDW zPYF(%Dk@XGL8Y;8wHU~%<#|lgZ4m;q(U5AIT~o3D|0awmZKw;D5~=>dXTy@wc$-0b zu}*#d>lcG?*TlCoIE<`QD9vMw;_{wGita9JnnG&G>#Lm(fKz+Y_fvqLDOr*5pPKNS@ZqEX9K0ZIw&5|_tm8ikst zAxxpXKlgatM^-+Yi$GDpSh)PU>x|H~r)Lkp-tBIp^qPg~#-)_O4LPzxGGT`Ej%Szj z5pL4aCHDH{T~7ao&)=ccuykHR-Vq1Xjuft#qs&ttW8`4x(0D{jv3aRpIo*=>gQWD8 z!mjB~gT-V09TsfJLo_jV>6h0N^3h-s-IsF>p+utIQUUCX)*FYj8VXu*9+ELZb0t#auz;w4UkK&=*ZSRmF$!+hS zL(AjW%IOtT*Xrqq(;n<;RHK(7*>-NZMMDnn%3}RCQgK+XCn)rc-O?{dub-u3PhV@M zD~w+U3mBB1#tRHjUbCj#gi)#qM2%lh3OJNG+x6xMLmrZbf_H?(kmHbt??Z`Oy6~=1 z(e%WiQm#IhVIys-zav+^TI0H{uZOqv&<@4g=CwX(W;`Kvy3d~qrceQDY$Gk$2P1c;2<8Q|yr+(lb=bkSn zr5wH2RPTGfUx&Xm;2^a9STFFnXc|Yl(L=)3S!y+9t~VJ1Q4u^PJY`jZdnm&7{zV#A zGVOo=<`94tJ5yZ#W^*ZrVD7kMQ(_|->Vg%frahd~m$RLHI$|l)cM<&jQb=c)^0nLC z_Tk<|f$0qN?c0DeAJH2SjSnokUExbZLG%GctYX(YVF>a_I);lgCTzlpGo-M6;Uz7( z$Fzu_K6c5UcMDayhdlfkssk0c47*9s@5k9!6rB0U$8YIgdKc8`7mz(EjRg+lP3_B8 ziwfzKN_Ow~Rq7TW8cXILMvHGP7S!OsBHE>4H62ABllN&iL=AtaAizhZC`t?6$?{~s zV{QwV`(ey+5BHDZrDvPk{135_eav02+3DO~ZT&;7y`jb9xbvKiR>K!S&#RqWpu#_wL$%_KDd2H1-&}x10ZC#rB-=~$i zHbm0@77NuZduTtBBYolr zx|`Ot|0Hg?)GEIbU|NOB^S#+%v_2)wbE}BM~-8qyT(fI#H&kx=2vH#RQOvO zb0c*gJWb`>{hWbc=RBA<#9FcAa|LQFX{lZGZtl@2M$zZqyqJZW+~1>LrCkiZ$oGc6 zR`!kQQd=ag=Yt(s&LF)>v&V_Y&Vo#0T4Fru{Ge%F7eu`Vxh@V9CC4T)5+9f3-tZ(V zXnPO-vXS`x%ch5Cf9gZ8)HJ5;RFRedQ_gIzxk@TSbc(KE=*Qi;rfJp@riP7)-8lod zX2?E|j1+Kw$a2@l>*fY35Ygh>cV06`L6_b}bKY}luf3mp(KfFT8#kS09{gIM3jPp) z#b2E`o)!Hut7|s0!cEsxW_&(>ndkn3(Cvf$vY0dkzZnso{g(q3&W3zy9}qH{)5f!&FwLK{)t(@+e4ns zr9v+HNVK8}SSSa?qCLnmm9Zo?@Dy=1pJ0@u99oS%j7DK&WAX8gXL zxX;iJ#Lg9tJ5xLjo)gYkvqEZZ9;f%iLTKB#E60CL6o`0xj*gpIy|#KgMw(YAX9JsJ zVG*`-pG&Ikwc;8lEbxg>NLS~xyvl4qWg&hi9VJ}>TY3s&;JJMEeH(uaS%A!?q9Ety zvpO2|cOYAEKBD!P#LVD#pIs^QcY6zBzdDN!p^*!!r5v+C)Y90%q}=nb^CE?vr@3-6 zl1~PTExg52F@0>eyu@T+c1}em!xtVGXL+|Yi}-5NYF;r$QbaN1l9;0fFQ9n8=HjgB8+=jeGht*#CDCupQd50UjCi4f}y6NduzF` zk+2%ovYWlSOkP)blSyp)SfjbM_iK;pTnu6Fd28X@0Sq6HyrqTxiQ8;SA$KKg5cPt? zxQS%?Zb=@w+)b1Pnj^rqEg zuv1T@rwTF0=9HGFFL*@Qrz{?_sZUJM3Ec?iaJtY%VwUf#SQMRxlCOS1x|hXQl{=5B zJK)0PGHuHf?f5;7gi3_6`61BJ-GwWN(Mz3T8x6<+X{Kbc2d8u(Jb!a9_^?+`&|;e8 zX=$apnKBl#`tn+sXS{9l7FGi}+}voGhy3qM-71bG(zC_Jtr)2(GF|1D$1{`t`L*`G zYxaunM?SisvcaajA&>A6dR+nVg!OVxN>%Bh89Yi2Q`4IWDTx~CZ9EHaCGPF21MlXm zHzaGmYB5Sji3P77or~?67}BXe`_cC%-Gs%4pLVdTHAD_zG3SNtGt>l0_lB-j7WnBk zH?BXnxpsUA+}|4@{ky5rM!*#b2@QIC{wH?Z? zoN~?##a;KC)~G~2Fd&P4vk+V`7~A0m9|elzczWoEwT9bl;oC1KdF^Mm45YOfGoWaT8>Owf4oyKo8o!&_TUqQ=`svHz1$Ib5PR{h3vw8ur#Zp*hpiI^taSW zNHOhM!|6mU`?Ckz+ev5Fq8kj%;3xk;cWB)Kx>wr`Fgzg9@Lg-RKw;I2kiRE+D{4iT zd(ty}BRub-1NDCDKLS^!(#rMHobQN_YMYleOYlZ7tV$YoEQL$6%Oq*^+Uz6LJ8JZU zG$zz=Yxd7^omcb_DinbFU+Occ*NV>qD=Az*;a(XQJdJP74~1dQ?9u{NRZXM!W53Sn zQw6eQmos#z=@CK5uD=Sh?RdC9*AE!27qP}{wM~>T`vqiXw*Js+T@ls(nNDIE-HMsX z69+R0S}b-Z!=7O05+!lD#XIYZW?n6fy;;(dlNe(Xwxx1J3rqhFQ;q3YrO@SAr-C|{ z@0w_qc&#=88rTlEW@Wu8ncE)``+sMyyU&Stjt)ICnN(XJS3r|NptxjdK zXhL~u#n<6$NAiLL{iV&|z3=dSkW)uX<1+q21?^=EwB**Q!fF9nGS-%_{Gbu{nAsez z9HqE}E3F4E3S)%O zuwNTLC%+DEj;YpAzL4&t{5(s`x5k!sOjI?JQyUF0yk6TGj`Ks=xXixOi72oWxwN!v z?S=sN%@zvqFG+$?*BYnaT1J1~#xHBTAgN_+z?%8|(aP-AUd`BCf0GKoo4`OimUBUW zlMKc|Opo5&*TnfI8IyyN@UaHvF?E8^4i_}XRAFu={L1od+~MtOcIR)LKD54BTj6%6 z=;?ZygdR&hk5%-x5|`&QmN#}&CCP~)hHapoBD&zdiZ#)d5%1pjw#wn9(hD&T@`lsN z*Io_G!EewzrO;SI=g=Rf`MyA17HE?28Pt)KnVdqr9+dbaq-_9{{YJ# zEi_u)ONnbo`NNlNgpCgAtna`Pu)nePQrj)Ytzk~KqHJK_)oG{cU0Nzv?{{ynAfH!&FH>w}& zTlF^SX!kof?L{m%ifu{ppklMT(5qqNLZN)c+)*d3JLpQ5QQoZ8w$`peHp2%W%L>fs7?L;vhSM za7@BYi*oXhv3@79Py$CIuE|%io|X4Ki|_tJnahfD-O8c3u?%jiZAq_f(4Es^-5-Rz zCwD%H=0AlvSyb?tLq4Bfo+m&3iWsO8a<6MFk~lL=ci|j*@AKyYWbDUK)q&48>)uO$ z;q%9f^|(FO{gNXipFQDUq>yyR`wfpDjM9ff_{QA!k+c4h{IHC={jmOIYqVj`qRejR z_3$6&*L_oqo}RkJMjiyjCmASLPvpF*I`mPCsijM#-x#8$P4QHOuOgwz#x6~@7zq)z1PK6a0^`5^XOP1B*yLKb*a%UM;$rpemE z2gNHHd7$EFSg(~j`w7K7HKGi~mMYi&dAuQo^8N&E4`$1RJDPdthorYpO@=%6QZV+) z@)3<`r|uQ{=5IJV73lk`N*u!91(n1gf4a(;*;C&2k`6)b+bCr&E}N{MPo9Jpp%N>H=;65i#M_ML6pi%UeyMuf2F7m;$csz?)l}d zY5%w)%1hMTG%^TDGBIRb(dCbf{UUR)`yk8)Fwd~_Ryi+ak#hh?dqnq8U z9lj&^P}nTljY|^>{sO>_`4aFR@87B#^o|?U<+6YKK0BF_YxFq&yMSMA+Y>L>SUp?O zeuC<=@pj-N)l*=|LhCHRXbrk>*ogBh&+<>hbexaE+XE72=?&0fUOafBv*|O(1xa;Y zd9v4qyY zW=dDx$O~;zU?Y~c7?~x;w3uMybNhhQOktOAsadoOFB(J{u7np*8@ec-maux|CT6kq zLT{CF3-waeZR@%4DhMg(a3N*#EL`C7blB#t5qw6!|+cP=IU_##5M3p)2CeE{4i~oXOQY!Y+%-m&6|74u%BY z7$rt;|Hn$9JPb543OGI8p=u*#QvAuY2@C|>(9mQTAG7{S8r|8b1mPqdbp zax0{k#S5eVtkZvVKXMSs4T$4Ak2ZV_Z~RGLZW%#0`X>@d_?MazFOyIgHBrp25as+? zy582Fs0EQg%gAitPX{XZ{Yf{jKOsb)WNp07Ua^!^)*-pm*zY0d0w zz;I+I$E_*mKFg--32PsPm`><5?L0PA#5NHzgO?eI76J|n@k|f#REVV9+%l3#8w}?g zY6wbQ-vUJ*j3XnF3w}lDkq5}F2j}IgqAxWi!qMD!76Sz~yAJOa3JK|jQ)aR3t7Qh4 zW8La_Q!i)82E4L@+amBTM{bG6Z;4tgW}n_17{*e3MgF>s#z>{X{x=r+H_D|pjJry* zi(mRPXt!C!(kp0u&(LTL(X2uJjaDf>Nx!j7)La8SYJ7gR;`hwo4!phq&FRR@Y0H2X z_k8C;D;dwZR)wz+OGy2fVMeb%1e#s6lm0Jd(DW%HwHcNX0i2YqGKh<8xy%>dX5oA| zyV*5Dh1%ub67Bg;pyb1WNb2qI=0i=>qwRqK;vS>PkLHqX{ULCnW2EiJaO=@ifh60T zohXVHAW$w4-BJCL{-#KU2Ev>xAGqV%n(%#^vrq?DzdzHLz`cyTB3@;f+*%ndm*$|Z zqr5C|CIQtf;BHfTPiQ?TQuvpo~ z6iWYIz;)z;nlbe2|Cp@&giFZS`9fSX6CLB7(RaWH#ei2oLjo`%4C+UQudv$@J#Q%Q zpmwg^yleZawF=_5En01|rstd_K$osJkgCpe%`8^+MoOTmaiGSL8c%y)kH+j3Gnq}U zafE6q^e4A+l?3zDmu^zZ($(Mpm!L?Kmf(LsK0%&_k3-HrX6=52EB?rVofU&kw#@7zlzWz`CBGfeEau1bff* zj+ZKX@hyu>KPsn=MHMA%-d}vy-tz_NS>?|fr211+1kJpH%q*{I`cwY2cwDMQO0@%ri10{Pq8}V583)p}qT#?b^wDG>^7+;IEq5UY-5+%OcQn zxx4nzc+QQ()y98u?#pBKkB1SAX1YxB%9bFDU3|^aqiboT|>&s zHu(sQ+IzH0bqBVYm%RHQM!n}Bs{WnU4HTRg{mS51VkKrmm&Ory=`*&{TEATV;rW)q z#67&726&n|70+^Q|D@O^yI1_Rh5L6#A6$v~rcGjZ5>CU?Vw*@>zpf=S`6`TojxNqD)J_HRLzV^nFp}}bX_OuAuhv1488_nnXnpPE#OUFUyJ+nHMe_fpgY@}oP_#z+u z@yPP}x=Fl_6>+dBcqREAb7C5esWkQyfI)wm*fLI1{ERlQgcAI|--#3d4j(w)Wh^Hj zT(H7y#Ux>$swW9-4HwlvP`8)wu~D%V(LY-ge*%0Ak3Tk`rL$EzD%zMt+}{3>bu;h@ zC%}UQ+Uha^-g*#@d z=bXi!Bx-Hp#rhIzY&pe=N|Q40u0mc=DXq}H@dkg7Vm!iq1|^@$Yu8 zB==*3heaY!j&z@A0K1&c+{-bC`5&TwKf?YBi!7a{;%@`f5^jU;L6i#-Xl`r27u~<| zQZ9s|VdF5A92tZ32jKtgHvdo1?OsL9M!42(+^QK)6n1RPq5q*L?7u=5VY0=q=5e;F z1|{|#DU;|kcgdK!|GQ*wRN|fczX2vHwvDws@7B1e#FJxXkbWOr=Gy(=Be~PAC!Ma| zv%WnBOKB!uf+TbSE4D5!2-TR|tXlYx4ddgQzA$ z0OWsx+-%G9U=kDK!O(D)*fi2uuA##3TdvO*vC<9&C2C$!ps&EjC{;(dztWXuehZ9%m#FQ-SD>}E@F*_O&=uc*Rf!-W;lub98E#+^$#N8{<4+&P=0IeDUmp_m6ahGIT z7Qw@X_toA54|s;&~{QCDpq<&rJAU~lG~nwZC<2o_K%NMqQUV7YTGR2zG`)j zj6d7vHYQo84niMB$z`5mw+FuwQ7smLa?_T!5A5`7#`7Xk#QoAeEV4xyZ~WEu%{R>c z1*Z5c_<+)T%vB|S%JEwMYsz0`+{B-as(bf=Y+-eVW!8s6Q|13v+|q`osM*#8`cIc(Mt2q1be@9BtrLkc1OR_ ziqV0%Y{Q2;XZd(-BiJxyVwVH6p2FgAUvoYDJ?DxX4w~$BwsZweJL)51t~I}PKSlkZ z=yiHm*?B8It!@-?Du3oKLCW0w4e20BXDjP!SYXqV7OdEdbkv-An!+ZzqBpc0VbTgB zWC+T;t`YNk@YW3|`O4iM?y!>D_*@}q>aH_-V-UEE=enMFyJzi^#a1f`TOTbG(%u{- zXl+X7W?RQDCc^_=G`qRuCyfISC$t2@J6y9GTBFnGkP~}v~6@<*T7v?-H(O$oi9U)2z`To*jgoZ zIR-Z3sNeKTn#9-1`%@AHRft$CNZ>}^TeMNT2SyGf1QEa}>HF_$))(3ShIQ>9)2)(H zY#}eYclW|dM39&BS2ss}M+!{g9I@7|o`dG;P_#d$u~1%OyI+>cqntUW$D^*(1p*oV zz-)}Nf-rB-1Zm#s<|GRW7R>$`Wg~c1#>6I5n6O!3LFsv%tAZ8pv{J^{0aKu5tpe*w zZv^PNRDohor9Zr*hr!^uH(SB=+H{m54CL`mq5P;9OYm@+)FOkD!$42}n>gno#M+ao zlz^mOJ*ez_0vANmpHWwBQ8u6jbe`%bWZbv@L494slK>$PO}18 zEnu~cczsO8!-C$Bilmn{Ek-ekNZ9^x>aao8mmdA!?ZC3hM4{8en(F>*&*5mA3=THg z!g7q3A6i0>;qmNj9~|@OF7p}R3Du~)EJyDbve9x$hnhE@y6%?DFDj57+tV)XtE74u z@v`k*Y!kF9t_F4p8wIXOy96)@?|aNnrT~NK9BE@JLmhaJ_w%pV$s=jMevG0Z(hrY< zzs}+X_148YT5ql1zC8|9@>y8Fi>dUunornKNYp)VaA+wVx6hzl*?zuTo7#Zp-qJZw z@T5r|Y@Qcum>>0F>L(kOxt`wHXDkn%ws^D~m$~YcECMQZUb>zxEM~L+@^WK9QSzx~ zv6T+0T-?=D@@c1UO>)3YJwaPFtYq^0xP(%_uJBc8)W?KE!Rg_+^8MrXk?8}cH|nwz z=&9;FA5Av8cMUr`@b100840y7#ZZvQG2JI6nTLA$@Zk?_R$*g4+H7bHZq70$3I#W5 z;%=Okc9`wZb1;V~%`)svG=R>_6nu7XsYo<8gE>W1cU6=3&FYF2>~>TVZzl3UkRk=W zf2bE0rdsLSDi${r$?Yj_6ANVKQwzbj$If>zuvb0{y)#c4{g{0A?SrQnpp-zII3L~RcZZ^f%3+Ya4&Kdt{J742*w<~vT` z;LD}ZoE+eq33*hvqtOpF_TS8nOrX(g&9wDXhP5_**bkL1k-{e4q9zf7Nv3Q8an%A% zro`fos6U5p_b7a93nZ{Z_vfsbZg47>R&=6Zozi?ZIMvAw-lgvU!`fRw#qoUWq9G6< zxVr=h!686!mjOa>cY?dSC%6;b-DPlRaCZiGcXyXN`Tzd++`Ha8=iGJIdu#3b>Z@J5 zclYX@|&W?xTgBuKxyvwjQrtE&EbvB(QIJyP5*{#+AmCI_7O|hZzDu* zAKUPZztnWcY=7r1&E*S9?D|$}ohp_3WvXDX)@)GtjHye3UW9HjCpTt?`e(qCYKux2 z!Cw0QWmDbQ9FH};fHZqHs#zn`0c!iS^qEXOCuxt0C1Rpxe&SK3%kU;mwJUp}$IDdGk{ovScEu6Ih5JmG0=3R9ux>2MTE`v~d!;&0A@IzO^Nm2Rw3z0jGj zlsMZ-NNLoBCen zRrvT8qRtyMW52*ALb$_ASR;n6y|?my);DhIh{$JE-m}tjZNG{Q4Xq#_`!f#Eo3!Wf z{g!siYTZX2Cb=1$K(mL0{UVUsUN1M(>;s3Mb}5nKC4gV&L~S#dZC^C;%DTTUqh<^Q zG(jk!$mBi~$4~9f+lPtYR*Bzsi#M36jL|8AV?gk@Elnw=?ch-Qg$q30D?3Gb@oI^G z$%|hxxM8d^SJeczH)tek0Poa+3@R33@$Q*3t`ReL;mXbx%Jn_89^j!z4!Y8xJM{JX z8CCkHb^5y}Z!3Og-2%VefCL=%B)!#Q)D>vo`qdR3_XN=GQdP4jQ}l`L4G3Xi0@ z_#o0ZUulvG@?7R#F-^CgCQDQ=bpXyp%pMY1I7QZRiw9{X z&|fsoU3^&-ZeawM|ESe0By<`!YTjWX&eiTWZIIbJOo-AfbO4sdu*t<-Q9iW#g{}zp zTL0W)R060teVTW`mY5*04_N#wxm(B=0m&+dECDIQj8O5_KIMM~t}T zTeQ@vRh^MaS4au?AZCz05i-P6?rS{8I_ql?H$v9HI$hbMV+dGPvzM(l*3eJa+K>#l^On_ zfP7PV8U$K^P(F1L7Q^4#6!S&}J4ZUeRFP~^DVY7~esy0kdf@6-&aoUjc`!h;tSwn( zO{%m)Tt=$Ci+1ys$IeG*IkTa9Xj2F4PF$B2o#P_lAt$e>LopHKX$!Gd2(XJ7vl_Cu8c$iUGZ-@) z;~xQq0>ubfG)lo55%nt&Zo#(boLhGZ=)+BxZ@2;&Z=Z;r zezPx(=6N7zeRR&X;czbm^F*wgU!RDbg%ivS>ehfd4&%j0ST?`ZRj4~ztEPvU|Gctw zDqm_JKeji?Y~41xjWOv_6*EEhC?DpbJac@TThN@L5~D~{1|HefFI*+MzwT0tlPSMN zJX$t`M2zDf9)VwrU{Q-LL=H6y%8ejkja;lQVy?7nmR@r2yt2VyA)vx@PsI&pZ3ZdM zRBQ5DMDnm-?y9UVAC9r{s&DpUFeQY`zqD2@96wS-S#N+bq7qzd9~p!~s2Y;Gi5&J> zf*2k4H)jqT;56Nw=qjvtzzn?20pbmp`zj7;-KCUQX$BeEpq2oa29&3qARt%$Zcg{8 zLr@cPil(vk!fP6VA)x&MIb|*D0V!oXs|hlzh2k);hcdI6fiJ-7ZM;0vn9K|pp$c4D zsKnj9>LMpm%VWGN8s_V>*?YhiTP#Mot89CmLLYu(Y#tu1n0+=cZ6D8KzbTUDK~5%cpFH^hj(*cY%d&M`r9Jb&TAu$LLkM z2j2V#{M=0}jF# zKvQb~F_=y+Y)&Jybeud$1h5W+t8fBH~ z(%bHhgQ6v{9X~`^9VXORi+!tlA>OTJUqg}#4nsEk8Gg}S9CTw9j1SahkD#ikTlBi zs)ra5h(T7y_I2-EG^cO!tRumN^&oW7CyX@K7)YX$~j^;SiQH$53b!OXA;odFs z63+nDceVb62+M`RgmlY=#R_-JAz&;P^Zy zYJTX?o;>(v2<-nGHs`IvOEgZ{$Fl5TBUtffs*FGJGZ93C0#+cIHyI+s#gvZP8LyOa zRD6QmGQyKEi3dqnp26LyeEQ8gQgdMk?ia`C<0P_2o2gFvO*)qDJq6VY@u?2C*+FRe=1CJ@>98|3Z?s^2&R)&I6HlZ(V2s*0(&ZSb@G8ve2da?87maz5d6 zU8l8E3pY zj#&}e<@%Wc&EhGM$2008n4r2hZxpG$3jt^rFNuPGnmKDUm+8B6M&8=H(`?UA!UkIw zIaT0p@LYE{nB!bp3ZX%K4RI}`Eo$j2zCZ`}8L%JL5;+Rc($xyqoR8p}Cp-mm(QudT z8bVe{@226zF5GZT`^{>HGuFq&iF?)$XG=2H|Dh3$vG>C(4|7&>RRur!1K;qtp%i>MJQ?=)~-s5<~$m>*=nzljrH^*k@-Qnj1V$CR-wnG`w0a ztk#{tM!sbo7q|lVvqZ3-Q)$+gtyA-@kwXErC1rTI@q=v0FF>5c+e zb~X^9_}1;YLO1cfOU2vzeae<`KRqvp!n4@r!vIr2Vorr9oO~hN78+4#w=HMi=I6BV zyG^hEI0KpexaX*iL~-A*DLwf{;+j~EPDeIITIEe%7_GZT8$YZe$`0&O8D3#lKYUzx z7CpJHEW4#@Boyc!w?#ee^=c#)=w~{AN79qae~0HyoPl+tK#m+ zbH&Jm-o5e)mK3t=H(tdXX-8eth|~V@3q;;ua2Gt!aIcuMKXDS)oj|{F=JUmzi=(y8 z4HF3#^2LVbI}jXyYP(8G|08M`R^4dzxxU2n*v6%pH8UX#hdq`~`p#B1rog&fpfos& zg`J}NUHwlq%_?M(B5!L_;Uue8KIONA35_>5t>1Qy_V;(WiPO4U_mKS6d}}>7dd@G> zV>bQ7Nxq~TjOVg^Z`0!)6s}lN8*}#&<1b*l`YodVIk_$Z#~j3$4Bb9s!=Ej6^?4a5 zH&@mq647Z$mid^;X`AYM|Y@&4hfi%U<>>s(VQN2-ENT{~RwaI>Vy%ysMM z8BT{vM&8dks3H3zVGU#9$a&CZVVeo!junN}9U=Fvs5>?otp?tblcNp7SVDkDX z!D`vYUwA+!>9@H!`}3QPrBV!bcA$d*U+v1{849FxMK#hFd(WEDHFY1xcaINFN@I-S z>96{jiG=Bp7^vxx*@v6s=P(C(#K_@)(Bva_!+~6mkQrKm!l3{Ksq8?^^dXuzTO7F} zn>*mD3xr22qiMVf#Fnj_Z)<`3+pf7=1^aQFPRCIwC9TZdS+^~XA33&gmP5}ro*I#3 zFQAtr#k*bW^g$ZTZS@QUiY-JR$~q)#bD;rc|9S}ENi#(?d_8zL(t{opsDl6Y=aI#` z=hifDkwxArxu`z8BKC*VWxhR zj?Bz>kBwI5&OKbZW&PZ^`^wqt-~X0e<8VpSa7nWMj8t)=?oG?5_OgtivJ4uP@$EpZ zZv`8C*>xLuL}6RDyfPgjc!*b)U&mqpLZG58txWd;C$!1t$8na5z%_%6s|PhS4RLSs zT6TU6Yb#HrtFG(552hh~-xzyYYB!~W&2p32L;ovq&OuyJ3nMA(cDL*hvlM1}{3B){ zHX4CKazneT$B(nlM&CnD--iK;Q~uEUu2e0`CjkLuNeEE>2z(nh_=Zvq z$B!4ZE=LsD&5T2}@vqzW2Qd&@CJGJq9|frY42luBz6Ye`3Co1`1<^N?aI9xCL%sRh zZ1LRdCIuhr!lz(aD@zv^7fdNlnUx%Tn{iB$l{{{|%awDU8{6=b8>xy}GLYp@E} zxqt89zZmd(RPJmL6_sy_OCFV!y-G^>l>wPWIkHM%s*-(70cE#(&ls%dHc2768|b_QeFuVDj#+lMjHCZZvwRLA<$no~<(fD^d(?F4~G9j1CAdVN{p_q}#-d zahUbU%sFoWzskT#Q6KATiwqo!<)G*O5qtsTV;;6hc;ObGk>6-f)5JC(+7yx=-Pm!0 zTMg_=EDCu9AeS$FnpL=iR(r=2F1HRAu~as$)3hD8xo|#i@ilq8PTpjgl6$ly(_w9_ zum8U9kyy`o+01wq9<)t4uGlqO3(YZ1znDy`-C=Auw*IVP)G9g( zA#mFCmC>3ALFBgUe63vgpQ3KUI8VX8M8Q7iv;2~2tMtQcCr(A?FEAdiD*mhCbaBvp zfBk6V#z!(26!OhM!{=}Ka!=BR+EhABb?qoAi+gYommza-ptL9J`08ISb8uLzQ$9Pa z{K%aTi$TDXU#0jjIQ&WO^hXZPO@k*N?TZ5Z$JX@8C7S;QJ73f7lVrH03W*BlhTGjA zRGHomcXV5ybH5ZQVonu@6i@dk(dv_9GpC0pNdlzu$T4Vh1OZq@!SAu)FTY+2*&9kzrk zZqUprWr5qp@w|00g({cRk{KFwj)^Fe3c7{WF8vWzsu%JAzh4M?=-~>ca8;Hvn=L| z@ywX+;h!kwrd!uW5u6C-MzAkFh_%d{u0^DFr`-cUwG{&1l)R;Dwfz-=4;P?w)|w=9 zSe`9py#YW@$mtUIaT#Pv4P^V>TCb6Q4nIA0iKLB0*Ya(eh_ZRttn9u9jI&_UYnRG59eR$d z?ck4oTV)@kxs}v(r4cRJ5fwujG($&cc69gGa zK<0-&uSVovkP!Jn;^N5ukDTZR%v^Ei-oKXY;Jkxr?$IH^$1~k&SCpNNnx+Hke+0mC zYN$o7vfbsOj0Dz{Nl>9&_}2e+o zdMs5DdQ?Kz8SN^0q!1~7r3OKt_1NiIypi^_K;7YKY>6pBh1}g@cg8-#Q$PM~6zegm$-KZ=rB9lkPhHQedC1EQ##1Tft?Cb@ zxw73lx!Q4>rMIgFH@yHsm-le$qZt9iQ^bNoE4Qa7m>M7Cejb)|P|5QiVg((|oGBM7&zl2|VjI`(g zo38#s=WXO({W$j&dUHnl%41|iP%VYYm_3&;VLEfhR=Q@wnC&s-D4RS>!vR7It(Cf| zIEfe6!4AD1>i#Q(gb*17RnjF#eURu*Fr*Tmj~U<6cD;MMe~4)Cu{O|~XM1)a;8H$8 zeeA9^mExu17Vsu}lzg^#lzEnPgn7!0(49%&0~l=OgH0YbYi=zl95Z?{PJT6_r4KTt zb5Ah48_POIuktIbJ(RVvvY$rqpEh0H`pmh6M(+}s0bFfeM}Vs>LM|n*=^fZgLb_@M zGt*k6Y$4CphYq=jd?zeUw+iF0OYyAA8^q<_YS^n~2sXs8!%;7KiB#pRlDdwo=Poqj zjdwj8XDVagQe(rtQ)-OBa3jwWXAwWUv3I?UOhar8ua>_y+~#bwM|tP3a&v$-tl2`W z@oZJ>*?%{?0!T9mal{RBHEM`{bN{a);eHWK8#(th|2@9Px(V+N`O}(F#`NiTtJ@ms z|FalM-|GBJG^B4Kq9T1OFMTT~eJj1o!aK*MHy}Z&Vi~TR&|2Z2gPxiF>v6%`z%f*f z^l&A+q|TC*m{@6wfLQm+*pYkO(@3pGd#AiKw{)ulPCyFMh~zvV3E{ zV!Jb~G`Z%t-itZ%yLVQAwT7qW&X|MjkD%#C7-3Yk zcL!>wT_Qcc*EwpH0#8?by?aw86=yU!(^Rww2=ng%qG(WN*>?oOpQfqVYDexen>eyz z;9-Uvo|c+wOvd8U`K~e@j9tdrQc%nXI#x2^K5D(!`sKK<8_k1jblO48=*Ro?hlOD% z(?n7fcqNz~6@$B8g6mrkQzy=eqKQIbghBMwVaG|n)we}Eq+NizpfU<4&bl-fFi|LV zsNafp8TXFi49z?^ZUgXR?Q=ph6-Isfm0i7{jYyjaxO7Z-2v)LJVjuwW{f0j6&z`t0 z&LjyWz49C93^n_YTDs}=vaQECH3$1$M9im@lx&$Mldn|G4q zQE`!Wv3`^Ff}0T;R+owI`(LSpF7gk_oLX;|qY@6d@c7s$iaYdTQ~cbU7_7op4f`u; z#_vCli?ox1kFu7Ie=!xl4>eF)Q#meWYfgT$?A31+a^Q7As&_i@=$<;AJvQVuhG;r77Cgf4R52uUj zX+~~H0G8vF2~IX8MxBRz6;Q-6^0U11JCh?=6esv2Z|$~+oJ&5n(ncyxl=@L2*AfKJ zeS6~zR#3?WM$^$p)vJ8s&N&Y&Wza;hPr^=kj5BT(U7L-fnbo z{X3K_cAjc)G(v{?`frtk>36cy;W7@Ho=X+q<9<0OkqnFQU`#~f)pj>{rG{P|*27&* z1j5vk^(|v#fLjO;_9Tv^&qDUo>%2&;*!n%|nS2n%gajm{DJ2X|=82mA4AfV0)eHN= z3ezfniXYW;u{w2+cTK~d3e|z*0@|k=7a^*%Y)#AOqv|#zi0p*q?ttm^!Emn`@e4I2d9oIf}USr2Arkl~PKC{_B zDl^5?_szL8o54<5w^^x0A6A6N6~)yqC=M7?1Y>R4@t^3vg(Xau))GB=!~qtr1r|r} zygJ?|NW`iERHBV54HIMpYT7HuYSWzbv`s0G2HmGnz_bf+APOPV?xU1w0bi ztM(QM9g|uE_<92MQEObP`YX{bKVK~vvPbn^UfqA7Pv+Rhttx?J1MQo1rCUvzSx>xR zFT=6vU#jB%8^j!5j)7P$I$ADE7u^1)1!dfBb!D1Oy4!l02Y7yJk7nE8xJp%LHa(_2{O@qpA8_maxAr+(~| z(cMs`$D8=p73i=(ag6Exi&Q_i38$wGO$+`uUXDC40{b`oJPbP3=DbTzE%oS^(CVD` zPdZB2#mD0E+Idx(qSErI(zrLG+^IAK!LXL~oVr>ayTO4u%)@A-q7LcQn4XX zCe@Y^hAt!B0D@<0*=I%90;%{q*ZML#&C8SOI?K^Ikf<~EXO97Ia-CKBOx*96fQM>R z$~OvK%)|kXoz$ENPfZkTtDPtNMpGv7q3E0%B8vWCI!FRi702kc0h^xqOP+_;eOjvF z!A$}#X&KzPp=A5WMj6JKFj1x^(Q>TT9fBqyEhdhkh)tYiiiThz`^05B>)0TykQSwI zgr1iYPzfE0!d|KICKn_p3`Rw9eTko|h?7EkkDOCLvm_|DT$!)u>x||N4r!@(N#?yM zvK({|yi;zoa=4NUBEiviAx=C}_ky5QFQtcf-WV>lj%3oC= z4dyzexQSLP;a4FIGcxxlph+lYHV;=T?khgfj~|i?>vgWvFbZ8UPvs^@p+0GfJfmSc zj=QkFvvwJBb8&a!`M&D4`XU236FL(rtiG$ftBgHHz9n%`0I9aBrkAYxw9cg|ok^cr z6*k{x++`?E5pG1-o4XmbUdS|aETkT%?MdGhaPN7rf-=()K~1Z?h1m@qn)_PY3>OKU z(ycqI_Jzp}i^Gm79z3A*G0N47)rv8rRT7)1sV~!?r>D&b%}LBpYfg>7wxdXYm z6N9*exOs?0w1u_PWU?(pR?^!2-TmFURQ(?VU^%r#phck5ExK$1-}bZ!Nk~XeuRlg1 zML9kOiU;*`zI5sOx7)({w~qcf+vYK^Wc-JQ2Tr}y*bPugIZsv*d4lELVJtfGw*OrT|&<;Ox@wzBm#7eDsz^`}+LSbXF@H@iCSt`a^Io9+j# zKs)mt9CuAGcd3nizZ{!5x?z85)rnj~;%jNV#cpwU_0xm5~~oY z*g3GqBFA+H%ydmZZ_-XSxks%=ybjWTrja{IepT`b)nRq9o%w#1(p)3&QLf{snAo*n zlMp-;S?>wO)+@BjY4+N=2r3n7W{Om4{@td#=AvxB`fTcy=FN^i1BpWFxD-W1V|%Gxl?;LXm4-L8KYu z!iL@m*LW{Yan4KYG?6*1N8aNws*(6K7w8ee3zmjhdW$S#4g9&eqa6NQL2)e> zJjl1YI93~Rf9$d5)scz*ymX)6z+(!!@2~*w0r-saNO-)ra}BTCTeB~(2IjiPS{rHr zPxU1fm^|>eLEe%Ix+il7&re>*ROKIF8~m8^5e=2d<`A_32d!eg^uBXn!ru$eb@>@! z?mowGl#tJL3HJa!8qc~t9!FNQ^ChLCi&hFTVg(Quz5*ClW|~Qz#cK(h?L2MHm?dlp zV}~^CAw8N9izP@62PqGgEjlJJVHVV}huSw#i1;mGy^s>m;*1-nnmdE1(l@V751!X9 zgFP;%peM#rO>(->Z&fHuRPFm==+*kU;yvNI@~y(WU`2^)#LDr^xp|dlE4L1xbI@D| zPv*)A-v-~{X+Zo9O$+I)608UO?DYHF%BsZqwFu~RdD%YppHtyh9XR+)zkpxsp){>5q3g%yVJ%y$kkt~Zkmqp>2rG3V2HtG-P)i1u z(9Q$!fI|q^_%6dB0s#g7{b6c&_#;_*!C2eEMK=qJ~KS^`hu}IkspT=#b zg0ejQwZXOoAzb^41F^N2pUGq~nHSrFlmdWC0c6UoLYaUtn!toV7c;e39`Cp`rK=!I zM#y2eyeh@BPgUUUwNw^PS})4ZP@{S{aSMu5M_iIcU#O7NX@+2_kbBWZ^MAf{rtLlZ zrS+)7_(OzoPENfop(3BflXyJO-V=IF@H;Q!rN-cp1%R&NyNV7b5TsIhsS#*VUL5{w zQ|99zjYu>?N?~X0&5ZLR5WG+%W*L#HKha$QK@MtO)0ey^flcU7Q*1yf)j+8VvfVPL z>JXmPw9(J$L?%?doK0gSlFql>Pt;=zXG8^9d+>Pzqmn+#bT9G1Yt{Nm-33V>BZ)Wc zxUA+PMy%X&CDoYRW_{j9Jk&YgNognO9d0LLni*{;D%qP3&f-A3NklgfgIfP%(-7uT zEUcmd=PVU_C6pUs|Xo?S6Zthv;%7%261q zelrF|JDG=>)DL!$#8=_9J`{6`hcpwJDzbiW_~ES`k$lFpyd77hH@eeLDelC@HpLBs zZknwTS|T%Y=b+M6IHM$jzy!}M{&yd?V!MnzbJEbwVeBK6D1=CXV!3I|=183V#dcZY zs%zwDFliBBSCKxJA9x4s{xEuQ#-qls0k;)K;;J zlFHNUb(mf;rOvtKeCAB~C0+wo2OYt^w0}sHWiLXPWc4$pG#Ik)2fy~z7DLNAI&zN z6q)hY0MDO8QSgOr(|BXj8p**rW+BDH4NVuY2&HGCP{KI* zHDkft__`~xOQ23hYX}s{ei!n~f>K5pLu3>M=}n+LNBD}Zn-P73m7U6>*&OxVRU$J* zuYD&woQ3Vmh{7m#NZFZ>1-)&K=-!e=yx+m(PwW>7&-1}2ds489u1ITwvM~n3uUtsg zRtH-vf}rICpWp%hwb-B_$MmGYew3}CEuSkaEJw~L#|Mt=v{XM$G*T?2_iw;XU@-KP zAl&Wm7;H;XhK0^A+lxe&VuPB#KxzlgNL|O!nLt-HVA{; zk%_Qy;Iy;WO@gBeY|y-aVA2!Fh&_w{%*rzB5P~AodcUlN;EC;gqOJbd5R)F|iv|&r zIuX5vU1eHt=jSlL9R`%XN=_Vce#mf*9Fz`qGp~J2;(u55gvLLd@d3qoM>|9_?)p~@ zG&(^X5`@u9!s1{KpPzX0OiZo+1od`KKEn$LSK&Zy`BM;u!ri(B2LX~KLo#xxUt@a-ol!mhwzjQ|>FqSMYi;~Zi?z9Kj3L;;m-ovnh{jO90t9Fkt*H)` zUsQMcq6e0SnrkNe7W{=irwIo&!*-`hp{!J~xw2I!as*Jbtm_7WQ zXPOxPOv%Of%G!ye%VU}k9=V(*%p#k*99hLr$PKA+S3^|s@Z*htZyi;qjV;7=27uSs zl&J_8oujdUzWigD1sR(tvQRn4bZQn}xFXJ$eh%JUq&u9WpW_g@wp*B;u-AJ>OnJvA zC9lwX6NIZ$f_$;xQ|sxKuBxwiAWWPytaEFEP1votlRYxRy7Tz&6wibyNiezCHtL@M_0Mr}CA{craOqfSaeEsq9SN~7Z zNL4+)oKF&Z(z)?bYo7l?>UTRuYv*Srg-v zjQX^Q{(f*FA)?BM8?Ea@`|DkTRzW7ZZEp8B3QtW=P^C6&dcG*{XX3aUQ%=uvFL$$NzOMg8+khz=9RHUIUDj@LxF6lp`V!z|rzpDvY8(Rd4* zGUMtUcRcm^12vIT_CCe^hvTCJeDO192UQ96I^$LC76?r;JRE5`+eU(6TCJb$;m|d$ z`%fi#NF!>Ac&sD?)rEd(!vP{_~RDA8-#+f)rsFP-%4c>(6mbFWQgB^f%2`KP1>oNz`4+51@^(Vr|bSbQSd zo(VX)JDKeKPDzA9gkzr;O&hynre0chNJZ!83dk4}K)ife?5O_HZs>qDX_mjdGVZ?&CObtw4 zmKSbkBMs!6*GZNdt4?$3qku-~ZUne)mJw6_#}_*JlFe66FJ~SN~kGa(v->uC&kI_w;Os!yRi@8!oB3%mD;-K237jLBzzmM zxFgiW7fcSX(l&WnI@c?SH)8i;`Wj>Xy0t29R5PZ1amVIzp!i`z3*;0SHmmB0v<|Mpj7`$4pUr!EzLgibkB_6-XH5^iktNwgVlOFG;9t^F0zc?Qn zI}%3s2`ZVA6s~Ru|zY3dH1*H{Z-zC-6k@ z76-Ka&VoJi$0Xh)bkNE1wc(86?3a?8`wQ&t-YW9FUFcu%m+rr zb6_ebpFZPV#qdhy!Z+_uev?PPlRMUzoK37IDp4QS-2U3wuQT;FmQ-<4#)i6IH83FcJdlXt;BSbC4 z*SZ5`ODlTg?e8~l!49^{thUk)*Aen<4Uu$Cb!{auRtMP?lV^vexhCudHg#3w*S21n z+UY51P3`Wyw-vcA;A!Oqf_bV8)T1SaQPB7~BmITT&0gct!fvvEK54^g4(dTRrRg5p zr>SXQIx$Na;j5iww^SO z_8#SVF7{m^OXsaEo$LKH2M}76dE}JSebpJ-FX*>J=>z}J4XZL(gL=mZoT0m+ejzh- zDjkc^KyWH9=-qQqFd15K33!D{vHuN|;-XscQ`bWYb&~HyR`yU#x;O+_dK%(%;40l* zts>}G{P^*OII{Cm_tr(Yv!;6=$KrvMT8QTi^2`S}cYNHd=d}0r{XWy;G}AYFL_QRk z^&%iiTy6Q9=@wwKcQgdSJ?`CEt-fc~b@-#cTN+~ni3)7Bp-SRuWt;M=wO1)`*JCCB zH8o)mEcoY<@(%MYyhp-oZ-$A;KK7P!z$&wj^1!{4TjDCKj`F^bB|@rr_r2S*=c6yj zcEkZ4#G>zF7Q`080qG_5<~kF_yytt?dCU;#c@q;jPkit(MS$e4m6*Ek_xvWkw{`D2 zw7zy}W5%C3Km~sPikT@E_8`R{n{e60>z8ox;0Xm>54$JV=uN!LuU)(s&1TDqqn5Ua z;J_OCwkl^DTf|tR7OdZ3uf*FL(BgoY5(PEhgrm}D#7pYJE6 zB_6Jdaa(;M*`>p*^L_p+L>=)JLU~%-B2$KSNtxy+_#~{!73RlHNKFl7B|n;_=r# zkUsx%UK#<1?j3RQsjGsGMAVhTCW9l&kWSwA8Xw@#KEz=7r?aPAK+45?a>j2u zf})CBA4F@vLWl)~Xjy`O1xG@R_RS%rG#nSr4hS)PeB|5{yq8~9JVjQCaF;VCv{$ZUf?}y>m{0qK2MJh*=ler?JdEDj^*8ahJMn;Dj9#M1^%3ZC&nCOc`Kd} z(f-D}VM1uDALzNR$G(yx7S7ZkSyO++QzKs!Kwj4`YM!t|&??!0B>j1Vn820^H%Db~uA-+3(8_7Gch*IEdAm4A20&$vX zSZL(o_s9mtX)hU+A@T27W?>+bk5dY$Q@2uqyKe3nTEw9{QY$MnsH!gS9cqR&py)Dl z9B;pQ5V&PRS$~1z(9L-~Ho7HX`&j`ka-@a?G+?fc?44Nbm_QGXf{B@hjgiTnwd78f z!L7!(WM?yU_yvrhuebkcA>tqLt8$XuvJ9YRjm|zxZfO=)IZEF(TMh4#0hqrOoJUs* ziQ1+pe*YF1m1D?Hkd&&=%+afFZS?b0)PD`yjZR}5Xdd10{*Mb|7t*?o(fXOEKjN=0 zYz73?T57G{9NQAW2AUjaNJ-db4sU)4g5dTpPV^K#xhgdIoTwUNW16{LQ(*6odB;uy zye3q-A_I4!hGV&L`?Z-Fq!_iILh4g9qv+-jf84*aIg>!^u&&<>V{uk+R!lvNq3t1u zx~w{`<|C%vrQOkh*E!>%BclqN`;M^GzZC|;j?!385b61*3Q8}Fazr2ENfkDCT-SYG zb~@!QIWBjZQ(QN0PTp=LT&B1GeUmD?o?PA)tdx}#=z z1z>k-?ZOe?B~_W2)H1#j)qJ@Ojde!ih^mm2OemM=%oZQt*C|Lb_&K+7J4R7^YaGE& zoIAK>GQTfL0PDkKEPwB?puXWCC%!lmt0b>mbuGO#b9K1XJJ}qm&t$_x)znRC^g%JP zh8V72kK~Aq(}xC9=@tKQD62sGI3js27C=*fc96&A15}=g+FnG1?t&`JkA%W7?LSQ9eT^EmfNZ)SWeY8^B4!;FA0> z@uhX#m8zS7_Gi*RKDl2m^&Y6riRejl3obJ+?Pw|9C^NHQ9`ZaffG-W61ap_+o;W+a zRGA_l$A|aiyTUF3ac?o6AB&lONW>)@(F~4MD-qTlFU2ti4P`5#I-ERZp~iL>r9BIJ zOE_(dUs9M;2MurRZ$#UF?vmXxY1B;$iAH2)79QzYf0OYJ!uT?DCe-PPBs0>VH6B+Z zn9Q&z=fz6r*CxscgjD!+O1|54 z6zT;}i@tDUL8l1gUuY>%!ltnB+4!IQ|F_1U8!EBcOgu=B(v&G|un22bpcv~rs&CM8 zS$_=$Zz7BZk+bkUVKd}csm)n^3t|9; z<$(dggxdc45sbq5*{^j zlgg#NWxtUrUkY3b-s^sP&40sI zzG=U0haFe*A#!cIXcszcc!eo}XV~+Ypn8ZE3@Z0K7k?A?SA5AV0tt*}$_&0Dy_wjK zU7PqiBnS|)8Q&*GJqNt%$02#a*U{f4QfO^dz>J&wATNSub;ImuACAz+uUR|WJJ)`> zCS(S%a6A%5BaDWW;E#p~Po&i=q_mH&iH*LTcr$KJs4AC&D`r?GfKDH&gz9F!hyDbp<=y#T|+j_u}pj1&X`7yIXN9 z#ogVC+riz7`@!8gI2_#d%u&dwOxUY6U#L1w`ko-If=X4 za9E35#;p_G1Ko0B_?VJ~AsZbqphSJlMu!Mcl48VJ^|R7Cr72UR&9vLt9n3v3=HGv| z>X@$og5o;z|Eke%p({;WEFJd*7EAv9gv)-jB zN~t5N)dw=tFh(E~=mQyt)avw62mp)#gKR^Q(dKyL(BlkL(3nc$sYcOW(54ycW&ra| zO%I7#*D1L9;Evw*>tKOe zCg8_d?*}|6ZvApZsv1 zE)cfF{D2?b(C-gCKF|}61eLIdwlG4Uro-G8Lr;j}>>Ixj9zzu8Zk=6Uw{2g{jGx(x zGA2G4%ei^W2Jm)MRKVe_`qX2_)0x}UJ1g(;XH@TTZw^+`Fx;K~U5`OGK%^vvx$FNX;P%yme;{viCB z+jH@TcNL0j`gFVK5%^t-{Q7m{!a0%L*zbqqZ=7j@?T}d-Yb=h!ULM(SXNB)W+A`5| zDd!2-`G=hiKif$A&*CfUrBOA<>t{)+e5s1g|Z$$)}b#Z5pLR+ zQpaCe|25@#SdcNC1G}HPYk(=L#lnyaOlaqB# zP7Om%L&-PH6NLGl_3G@_jqkheO*V0>P)ON3>X`lX6rtP^xSJFMmnY#nf<@iM-669c z3m(Q{t{q|X{&##F**hti{gqOfb3eaH%PRKc^V!SkFvT}?NgMfujJW1Lz+ZKqG$uAi zY&&Ix7ckad5bL_|O*|I{F1tmMYq;!PR?XZ!bo6zI;%_?ncD3iIGK_&;-+liZTr< zjTGY|A`bp{Oq3PN_j8P%_P=BUg?*1S|0Vg~^Jt`5v9KcZw+sI}VWa>Tu?;G22mi!OHL=zesEHPuOL8VJ&WSEdofl2@E>6QHd+58{gpK_&Gv5-GGiL+vf zVu|&BvgaC9f>%Wj(TmLgtAq>_WJ06$NtOLSg8m~KLbWGJ{deIO62qmyNJT`H;{9vW zVa>{SmTPgq>>uH^XN^Fl4XM>yU)l?PcMkt>wV@XZrYXjBm9s$H}5ZXhhT@Y zFtJ-G`-=|C%eS)-cAuI^G1M!*~>uUqeDQy_Fu-^23B!N z^rK(#Velbze8VfB$x})$doDX5QOh6i>P0%2ThLMTruoqREXVatXZP9gsuR=}sgZr9 zc-?vEh357?IX6WY@usLyBUb~Rb$Hf8-o@3GDwtSZddiUKP(%R8)PPJmc4&{{^QBZu zAeN6K|G)-7)6IRby)i52)xgoNPGw2{kmYz;DZatZ2TR%ROl0G%u1-}9jDbmi{I&*6*Wl=mQh5jEYp z=ZANeee?bj@gV%8^TT!fZTPPj(bHU=SRc#TTdV- z@a2MfGk}@({iV~)KahCdfQ)awqD9_=xsg3fpax8FBb}39 zCNom8sqf14=S2l|cldb}QW;_=KcC-W{|Lyd`TG3g_t3w}1NeGv9m3M9xkqneSmf6C z)3B}vw;1iQ`?m`5u$h3k6y3f2ZHj2WX(ra=nD73`%?K5Urd}FCoC{AeK^~E&e$iMrVUKUbAlrO6 zzpieBJ+J5#XDriw;>bF-ToVA6@_(D+7Jpn)b5qKUPlMTt=!$-`h)+PnWiwZ$t_9xg zi@9(YX?Bx8P;4mi6+Mk97bT5Jx9K|^Ry}HTW7I^=CS{9VH(xaZUxmaUIgIId8iL{A*ANUz&q#ekF5!17 zgB`!-QVEDHi#_VDE_wB}NVz6l67^LEUtpkqHK2}mo=|~Tk3BQC_c&Xtb1b|Nj(n4{ zJt*vlH;C>EM|WBuN7RBC>b+sCqpzcX%;gc1&(!F%#Gn8@K?P4GlrXZ&RC@`rYUEy! z`X+GXL^!)nqW5VcD5@xK{(UVEM)Q0}Hb&d&QJI=Te@}f5$i0-e@d>WQ*DN znE&vQa!t9^?-N=b_X+M%^o%{-Qhba%6#n~a)R#8>FO5(=RxX)%|07QS(?A`y%$&}^ z+X=_RB{3`^dsi2#71W#}|AS5pp_#^u?EVjj;3BbYTK5?GzBmAW3sUBa@pkz6iqUCZ zHnWuG<3DS|{yM4anR|%#RY)?EtIvJ*GCTqAQeevCYqwN!PH;{9!GG3^y6jzg6+}?P zE6%B?`d|IC)D9tcbc@=gJTo{hl-}ufI)eSo4h8$RFj3>*`F7xn@V-Yoqxg<1Q{G-0 zutEsgMl>1ePUV@IV7wdr_>g|(ue^fok&qmmQt`M%z*V8k}r@4g7 z=YwSn&m2=huq*wz#meP#(=w-L4oEdv5wjUXNY?XrZO$c3A8MZ_eB9S=nMc?i>pu2i z!C*&pgYTb4y#Hte6}*ICO1Og%OD7p!BXlowhJEWnh(UrA7ftbfh`wI$SUEaFdak&p zAi3sK?BxJcg1>gC`iT3a zY!r8ubj8x&F+1t4m*xyOJNJarGZK{M%y>Hi-c$1|SNU~>>1Q^VYE|am^T*B<-)wYWY`)0^WyZh2Ak=NO|2aNuKbq0}``#2M*Vtwck9EpM-Zo z^4<<=(w#nmwn{F+utIT{;Wx-tA29 zc?jN#yM9=z$!BPBAi@Bb+Y+9;yg0x2IDCSSXme9*Ch6tF!&4{iWBIA$*qC8WspIj< zx3Spa`CWETnY>tJaZM8eoxZ8CdvWwDOweN)lfNEE^s;ThV-MOiHK*n` zNvZ0VVax`u7Zc0Cn9a}5d_h4U7X807f2UGOR>a_Aiqp#eKCi+$aFq{d6C1%G%r z<|8A{AH%T~I2W;OD=AFqm+hLL-1BrbmPaX=O4znj!lgKnyKRcQ`Z4`*8)i3WdBlTP z(H|Egnc(ogev;l@&WJ=xx#rD$Pb?evAsmKwN4XOe_v)z%noeT8s4|@!_7}u3JT?_( z3$RHX{7gQb;ORngC(YXvD%4`bZT1lNSd>kI=@{$h8pX$**7ZF6CY+M*8uT87EWG&S z=5&;8CH?NmLUM(kfyTZT(EmD=XP4BZznYXjnLC?nlJSYkQ9H9b)u@nKNYn@5{MMj> zc2L}~Dn22V*7z}OV_kA>dtvHcZ|(j}$DlEzcU3px zU|390vp8~@ZYDS7+4!*d2t^RDiwL$t2`7j%XD$BFi!)v>OQ>)gGY&YKF z1k>DKGq18nzq&=U6M$Y}E};*slwXSPWp7ib=Iif6?_tlfXBCX&dinr4`8kUU{e5^~ zWEr3B90t!)oVyQo{;-)h4gR>9t_WWJ#mzrU0((vT+f5$%$8%1bR!iHx$P;%Lg&#|- zAo^9fRZ?%@;T!8J3K&*run{+@K`V8lkpZR~NNyMI46s<-MjjA z?TPOtkn}lB6jIYLvV{1=7v-CChu*C~VdyimbbRTC#=-rfDSz#l>5gaT6@AYS_P`y- z1|u@K8kQ#12=1N!0za%?B#=zQ{yp@6NZ6VixgfsoknmTe?kP>!VjsP*M(DXIwTIz? zRY5l1KAg7RTJZmu-mJYAohm%z|EDqzz7nYsX8Jwmp8Mzy!yZB6$=)8@=A-NWq3l5n z9QJr00C>ZFrEzFNta`*wU3)VT4Z~)uCTmdnwKBcmU?ocZ@7$y5BA`zYv8*n^Nv!Wg zZ3MZVmo>jUNIortwjE*lx=BPklIs(P&N+FA>URB_s!zX`ywK8ihJ7>0XC~+)XaVF5 z=IIdLG{_$LS*HgmXhx&@WwdRQua#49!y>)+k>~7NuW@8rz(Sf@dIV&X=3%ZdF^;@v zv5S_TIg{*a5bc=GDA)j+?Xb);87oHmGpYuElOYJ6q@|29vW%+zxtv~*gUVkStR25k z`;32dNO9bkjpfIBCYeC>1CX33mz=CR_Kk`vt=3i!``XK7DUg@eymXp=2w1?Ny%ynk ziorj@gkTd%tO@5%SRSG;J#+JMF&8=y&zqrAk4(8nTV)2OrEEwaTU5$KT1ikDimQaV zmHh~hCDD8x4K%0K@z+~W!oOBMaUPuNl#gI%4gz*QCioqj#_<|}?E5w!{l8~>spK?f z-YaKLW1BX4$Yhc-%FM*}p44E{YG&pFaB2#QJ4d^D*ln*Ksfw^dRKrVs#BN=gV$qaIXV5^lw^!YtyWS8 ztSFU6Nh8ZPCpkSP8`@{TT6W273c}yWMadky+142H&eg4?;ys9rREV}t>hMvjDl!MG z>J#X$!6E-t>d1>HL~9EA-0C>QbwmBKap;`)rSLd`B;rRBbXe`;yv09%Mo2DG6Jo=P zv$h564|uJNRKv%JRyrxPrmHMY&K_mNgZ@e6%S!RuEE^EsRkhkM;4~K>wHO)ltqs-S zi;BsNBA!+Lq%p1xEQ#MG;UZW(K){zNDLInQ?tp6TM%#{5Nl{^gI6T)bVjFSGcWRaU z>SBFviXi{0hvj_0IR9-&P8Z+@Wx2=QA|HAyFx?>crdx3fN8 zo4Y{9FUDu|F5{$_g`$5NRIfMFwE>L-kM723kTEJn$`u@A$oR!+61#@agif4T>f!n4~cP_kd25?25gdA)hzuY1RZs<(H(B zkdb@ug0S{l^sm|;+A}^KMr3+wvM!3BG6D4E{U;rT&j``vLoKa7-B&%(=O2MxAdiU8RMWR zy9x?V!aw&CEgB2`M_a#0rYxerrk+wB{IRs>;2zn_M>0p@znr^|_%(0vOX(Q){IT`S z@gF2brd7A8!)rr(TG*JJ%0{y2mmq*t#{(%vwIm7jkikJ>GQUgxwWn=Kl#P(_${hF0Z1v zn`286f>}~s@?bhLpJat0HHN3FZ{x zOm@IsJ2w2cE1A zr+m4ZK8z-zIW>cVDv-LVQjI>P&Ze9Vwb)i!r<{=ZD)cA1Pn ziI7cJMIpXwLJ)%2W0dOZUDAboR4rrnGz3T;X(`l~DdTl18k_ygL31Cujf#QW$T2c5>HQ}KB9^ion$oiP)Wu)r0xhCru&dD@ zqu4-Ou6~(;Gm7Bn#IM~iW9>yz(p6mHcjTVI%l$WHD>?Jk1{FA^5j@X>sL6|ko#sWw zT4W(lT~N+&V#DYHCGM=1)+itlIW{I%8SKQFPbYCR2P_*4f4`y`x8YeO`Y z$eD?Fn`)!Y3)Z(CJt{5Fx7rOiWc2 z5eHo+dj5QI46jVGu{47H2CeroK<+G(h{LY^86f7%9MOag);+>`W*r7ShpiZVrBAw_ z4r;?$5@z|Se1!g=`xY@L-SmFiju|nhfNwUk3NsV8$Mw%?UTu_*XX}D@Gp;R>P>ZvI zv)ur{XqVDdiE&j>w{hO90#K^b8ld77;k@%qEr;1m)l`W^x1yDw{INUk^wz&ck6GN3 zRP&*!&F&0b3HQ2J)aIwG$;9x`J3_U2C5*TLg{{V|Oy^8w*{Y!~{kv0H`|+UD=Jq7U zNnJdzS6ZcbD?x5B`6N&D-M?Ei!GQ>Lsvj6zK(-MXEXbZuz{eys`nI6x&wc;7w;B+t z#OonmqJCVYnE&uP!V$K#q_pN$6qFDon)SPW^H1}=Z&}TZYfYLweZ6dU?z)&OJ7DWx zwU&#n$o!?;Zn_*_P-#Mc{35rAJ@>%RE}ka;fHrBoLH#frYRr7q^G;3B7Vr+$m$zQ9 zmE<^cG1_wUu%~;=U!5iPAod@+i--1P|A06r!JUJ+w{A%wfCbzE+cf9sHK`LzvCRsQ zVL!6jFRoOl(yx-3yCXYQ`~*@2$_Yir%*W}>Ua@1!p{!v`>vZF1#t;Umm>47q`7@2* z+6EYyyy?HtE^2R4`=)+ykzK2Y;%JsAM~6(Yvr0d1^~sY{HP1Ntw6gp1_;UD?`|_<5 zbbsw8JV!zccI2r~+OblIO}fD18w5g^PL%($A>2UVS5;!$$k?BdKf>XO$|(jSS~fK- zu58=6H}kFK=mUu@^IZS&9ud04@(gDuDa5$~+m7^{t9h674=Spch(HJJrW?-Zo*o6d z=NfHp)yM7UY968dP~X_Hi9MS6r}U2&JI2>j9D+K48;{0vT5G!wt{$x&8(q^)?e-2~ zYy0jU!yBs0{n`(mKBHYH_hD~eLsOF9FO3e z%gz82T6Q;#Xzwz39Oyy;=`3R!Hdo9P5%OH{bEteO8|H+LIQSWX{-qnozfh@Fl3eF` zce35RXRaY#(sG2BLEqXYGk{;tHk}SS>a%v%YheL_mz2w)cTW1PK)mfmu&<`8yUUtG zY!=!n*g;4ofA08OL&6rbV_2MyHo;T4KMZnASOD9HYw6mEjN^6evAJ@`7BO;^u_6z9 zcFX{4Tr^DK&9&ZQ&=hkgipE(!HO}mDMhLPumob*1v#}e6 ziJ_M-)uLt+e0-U{WmdDTVw2@^1K8_cv}4o2qNb7H%q>3uRl;Zda%=63=-$DH`B)ds zHS4jg@806KE@Yn9G_{s?R!OnNz97c`Tyj-4vo-pM>(vKuQ?uT0GqwfqWJ`%8?3s5S zwl5BU6XzX#{XV-T<`(*#Q!6j|E%;e@{vqFAOlbZhD}=aMc1_>z>89XkZ#DM6{+MIBe6v2VmOkh@m@ebsOWe%~^_rj@3;AR1QTOxeDYL zZQ HHoj8TCT3R8~p8GHNnwZJ0rIZ(>F<5#m?`WChk~WC8BH8`An1l_i#CN!9o&D zbJEu?IGJlbg;y__eD-ut?t{>g(^Cm=M_x66DRf;!d8IQ45ZGwCj<+Be9?o=R{4*hU zvQ{dzq*CMkPWX)zZi0Y}HyM3aly_)Z$^X*nFVb4ykD%gEs^GEs8f3088&Pi>!b}Xn zRFhLy9GwUxM)n^jc+Fyt5N56}8OaZKA<5eVxLZGWaJpYKIwJvP>tsEF8eZu7J96yFqtyl`L&w)zaIe;P_htZ>llx1 zc1@1G9@`L}W{kh7({+fB-niV%ZF+`9b4ICQ1hwfj154BdM#DUSrE>DOb%yePpJi0n z%o+6tojemD$KgBxwHmQH(-*JVcq2QGBSyDolpANko!X8$@Xl@|9UBR9hIUqkKxNK-Jo;r7?-eA|XeVJT1YAeMkPUBZ zL36iBfh*o|{!Fzjo@BTS%nIlI&TjHO4lH7vokQu{1)~`f)vR2(x3)aw!vvTyE`?)d zbMz>4PR3}TZ?_*}Zdv{l>GUtD>7{v~MU_a`L>Hp+tQx za)$ECE9{;dq#DnSYHQsCYFlNh>5Rg35< zDN#I=6!#v_9@ZYip1>Xk=9{K;dJhTP$iSCHTL?4_#f*46DdM6pD$uln$)a?-Z-{Ec zFJIw(2Npl$<(lV6*)i=8ejJE7CBvrvNF9D5&`MdZ4*YEm%O!#jYR1rIorZ>pLg5h8 ziQf_iM9K0OvFGz6^JY%xNsSx_--2Wc z5YvRG4VC+yEJ8=ddF2Op_ox+QJc2cH`?&n4VfPIIC=Xw4**h3tEC*lx8=!DYw3J1v zVcClkKN(*Y502OW3a|@$A%!D_BMT#y#8@+xzEc$S##r-9@FZGN5`6d;Q0gG=$IuKS1%XWGs71#>TP+4D%ZqP2j1T(P8r)<|W^`t- zX71~`u{UCd>C-oiKun^;u-KcQj#AnDdOfi}=bfvMcrMhH-&|Z?++WbHy)y;0e6)Tf ze=zJk?YvTcsFN~-AJAZ(x?$L$Rv;7w@}~D-5Fy0_6MxXwk}C;jGw1t~8bK5WTK~%k z;J=-JGyf2_kAFCBi*CF8 zWoG%>ik=Jw)f@d+A{}Z1?HgoN5aw_5UKt~lgLa6_eOS-R?OzijuGR*+kt=qb7WLS^ zO#O00&iqa|7Il8TQ2&B@#eP)tVhBh)dmfJ(DPHPbnq10U>Rh(RnyP3bbUou*{atcJ z7(M~rUOoTp(yM`7Jf@m1epT~>b!rwuYgMX&W17=MH4W-k*eY|JX*uS<-FP_^@gG4(IZr5$oaFsKjE-#bA+S5j*W&TG8%()Dh-l8zJ?1j z)cj}fE2AluRyU0Q)QuMJ7v7qdXwD(;ZK_MJC=DcmE#y7>FioVZCPUDmUyX3@;DMd1GG z0PDcn0_Bn6^ZRAua|I5_w$X2J<6HQw@6!(_q7JZcbaov=(#};q#Z1LO@LvADR^;(% zsbLkOZoO4x?0b-27p+yXDQT}0I3liAGN7N}0yJP&c6KdU(q!w*8?2O{Q>z?(cXu~wGfq7MPWVI>yz&m|K&^U!+ zDY$Egb!Yu@`LgxWvoE6GtC|^2Gvu_-Vi`pd=K3}I^6awnvd_uSQ`^(LmF!olspp21 zzo{pZlddP6lZPjBE$2Gy8N3HXNB?kOC%F7_P;b6 zbsLM9QlVURM-3A;^yamtWwyw0D>VKCk=yc!o2L|gdG8FGOEvccwk6j;ME8QwJr?qb z+37UL;1DvtWw)mfg@^mG#-VyBK;0E-!^8u!Ho7*kc4#56ljBUmxwNCWoOzpZH?q8CxwN{4zWmjQ<4NX?w~Rv+zWMlZ=I7_Dw$OAh zM8b~nRji1$gtds(Br=?1NGhQOoi+HYs0+|Hz|w+ki(Oo1E}Vm$e7}TaA5ujIowYb! zWdN&sTt~3dV0*dpeu0x9 zU1W;2pXHBfb^dPeHGJCMw%k0lF4*e}p8+HcaY z(l67WiFe9d&2G27jBv(g2WlO!U2^*8v^?E^1%27LLpU52Iv!fNmxL>B+Rl2j<8qA> zy)q;~@jxFvI8;X{8-vM#F$(2>cS-4aj?WtgQzue&5vu>v`P z+)SA|R(9-6c{)-zgRNt&izA0PqgBp@&JA@P)eEf~tyv%;P_K8`l~mWLXY|mL=d9np%1){ad<@ysXq=y2d<^Lf%Bj-iF(4f`-$c z&3>mfMfM$bPGOFC&VP<$C@`mOHcaF?8 z;<(kTvu#~qMbEyPJ!=BBVTq89dZJ;{b>e0MiGHobJqt&1!`{Hgz}mpnz;e^*9FRJx zzo30!f1rQRr3%bMOGnd;VS7+ZNmoo)oI2vpV>je57&LKqzSSisw9J%ckWXz(eM%Ki)R=nn?fgQ!I{w1!n3{QzPB%@D+B3mqGRT4mbQKz!krokHkVGF20Wwq4KqJPlV1t0;c7Ag1Iv^BLfh4mcl zTil)2e+pH&MY+)r=PU}QXaZiU!gz?Ah->KTa_e*VJ$qMm7tFy8brnomo2jf#LtK5S z<8}YI#t7?U>QdCs8)8~g)~MDh?A=nSP9U;I4{E(QSE_lfkybF&DVtZCc~z0@Z|sol zqwHnv>FsSf(B_(f&Ap4(r}g#?-~un>HCuio{)w(%=ap;Tzp3p??GHK7*4@$JKi1t- zrkPY|-8~QP<$Rh2YwDK`teC6q8seJLT$nR`=I;^i5q-=Pt0Cw(dd|LeM0qE^8h4Q3 z=Ig9ju4bLNg4so;;F#)LR?-xC%?-^B$KQ<4vDcQ?xL3Jtg=)J9T8UaaDoWb{731Ag z{_!Am(Q6&>@Na6c<=G*p;)4IE|KO&f+YA|hsD}Rc?zcpW@q|LJs#sqmbSI8U?_nOJ zjL_JKSm||~ed6-MG38ShC+6grV$`gD5WJctuSu*nVHOdi9+iD8%zDcnXx9pc$r<1T zaBo~1BqUlFb)M8)I$Pdb?gLv))TPYv6ZsInoDTCyrP0L}#)4v1V;f|eL4OLDW)961 zy4d@9`!Y@$oMK&Kol~va_2)K$S3s9TvrluD!iJgF83!+s0<*QKiRubY=R* zRK2yYccOQ!2huBj`T|4&0=>f;q5#pZ1jqJ=O!YIsE+7=JA4q(NaCqF#R^JSC1xB$W zO55@>)5GOx=A`MHY#?fSzu9S8_{Gt$r5x%V(G{gfj;nuajekm)Gjou+U9qioTv2YT z9^01Ou8}slU*XaES`9y(QB7rDWm|14G%Q}t1=K#*8Fa3)6`V4r&_-Oc&}^~oux+s= zc_3+JHnw*o`bAX7p?s=0tVgYH$svErchSDr-gk5CF&>&?markVp&dDx0|ILDuZy40 zoI9VhJ^Xz5?vc{5Xz#REwzW4;Nus(VI?E!Lj=Un3f!;^pg(7EPIb@#!- z@;dk*>NeH(w~8R2%Fy$ib3wP#^_PvX6YmYlofxO70l#&>F+YvM6fP&G_@15f{Y~+- zj-`!5<4nVy87THL>|FF*J;gvvtRq${&6?5yc7yV(WFIQmxIZu0xi*S9!$n@`=b%p! z-v;-c%btj9s0UbgGODzFr{K2aQ3{NW#k|F+T7Y2$ z^=*s?T60zi@Xd#V1q&?FQ~eBez>8h*8t zLXYM>>Yas`YY9;`@6lup?*Zd=?0sw{?Op1EnkTh4wO3_llR?nZ!JQ}6=JKzgv>LX~ z$<1#UkdBoe5gs8PO)VR133^1yKHLi$J}sUt-X7i$^P!zDU16P)UBg|aotCA%7Fa&a zMXyedH7!%azTSpI$UTn1%uAZv1p5Ra`m30SlozU(3O_fYq^X^oXVLCp$7aV^$6UvJ z$GT?7s`6XLs`6gXQDX3SNok%@+G-j4h_EW<2yQgDr?H zW5c+M;ETqKY{%_bcuTW-qd~Y;Tya*zYm{F@_obf!_ZduR31+ zCpE7mU)kT}=LFx|-k;cI-lAUZ-}m3u-<#g=R=uZtW_nh(z+2~^Mbba^j z72u6J(DL>0E$y))qvr3M=I=Kx`4&vBF*O;G5(+^BzH133{*5hvDUkz%4dLWcr|;Vu zPjwx@*(5O+8bBUUUZ-&?t<%MY4&|<6IoKgZ=ubGa6YHE0HWb!2>t3jX*^eJwY6Iu8n}M z?zCrqT^hxqtAgQgYukz6#OV-(A_jV-I9gu_v2ihJcIYL${gI>5lHsV942spw{_-i| zuuRe*^FSKJ=KnB}ur_ixijA9Tg+745Squ*xbPq7HHZr$&GmfT78fjWa!@a1p>?j&2sJ4bXxx}r|?SX%al#n{fmpo&(k7hZ6;bou7Xz{L4z(kfPE!N zo{*7HGO|AMI#M8|``@Dn`_EGcQ#02h{(8ZB{(5E?7MJfX0AuAnvpwYDn*`Z}I_XY~ zslgqYIbsf0bX5gVuBh#Ov*5oDxk{f&gg{qi%)pF!+ApVDQ_}H~v*q zSyTE_Kx|b+Rg`_W+fcj7nn{O=PbsAdoJp9eU>#=NT%BuORNZ)8rL%I=iu>RkD5F>X z>de!ilf{6Lgr5nQP=@8MuAnzO?fRU$d8J@i8wJU$scJ(A9?yKC% z*e%da*Ui-})Xmz>yGgV~yhXHyGZ}HAfpQgeMR8?%m2|}(s(*D83UwuWMSt}koQ<0~ zBD44X8dE%CCc-(QeqdnWyQEHb#$^r$<1M|6Ngx+! zWH^0RfjC6x<#ldVqJSxWbgI+iMyT7eR{=;0;7ny^8qhxI;` zep%;~5h0p)iAIG4Rq*e&Kw-XtY?p;K2nDlYAZtZ_m(c{3faxHjr7mlb{0jA+(z($S zr$gLV!#8z}c zk?{m{BGP3MaP_41vG!cKba78!8v1AEOTqH_of2D~oEQjYMY?J5{$?a>#&l279L#7Y zyF#)|e$V9-2e!OG5?kRH4RyjK_!^4T0{-zck4I0tJCL&XVj~#XTuZjD9&FU z*;dnz3!jKwcY@EvS8LCv&#Jq2Z+M>(RzA?~cy~SI+I!KYdj`BDxLrem(?; zS6Q2^e!YUT?w@rTG`FUhLmWJujhr2d5LWEqwoh#3t>OYJJ8Q0XHd+qeSc9#>DiW*h zjfPU(#critQ0cF>5m75N?i$)B$168>x z)3^b9X*pjJ-#~=jsNE7AZdIDVHK-+Ms&U5zzN85Jvz&fR>m85>%`ly^nKtzg-%JmIi2Eoec`92g!9e-n0E65KIy9G z3CFnAni$5>ErP74j@};LbG3)~O{KVs;RV&(HS%Etg@-E-zuOn`w|1!>##!d5M{(y% z*SDp?c@T>y3YnP6G&}zjf$2!C=d@T4H`9Uh()NSIMN=te53l=f{-?umXY%N%A&KIp zTB%p;q32kXz`1$9I-ek`84`&Y=Tpj5)(myq>!*jlRR(rL4I2@`cpk?+=X((d$x}>= zHz2eg@g3PT#SYOSI|s;_5+&*n(vzGpi@4Q3Zdj&hRkhG~E!_{8D;Ocm|A zMRMNsMEWk;J6AYuo}Ra8^ryb)1CawuKyF*^3=Tu-n_H|i4m-sLmLCIXZ>;b|K+j9 zKXs7Zn3COqNN)^-CenT!T!c()!DaO1jdzX=A=Q&M)QGK8|5&9)qzHjz`Unvvd!Z`o zf}DLPE%L)Szw?o@`ij47HuBUTej6duN+_$SQ`ra-Ace96>h#u_WVBSAQyV`%*x5VC`2vI9#tC)+rY*k})v%;k9 zYLW{~sAc}J^>Gh+IIV`WKa6!hv+6kyKx<5s!Kay{TFFe_LzqgDZQYr5~=N*c;EJ0f55kq?ovD7Fqi>W5Evey$G{El~WHie*t%>oaz1cZ})(}A1~#NSvCynneWzrJusuOmvt$1 zSarIUoUDCn@(_|GG0~R8IGg?QYxUL&yS`baabX|%aigupbckHA6LS!=689T0rns5% zICcj2Xhy%~;mFln(|`|96%=3E^AH<5e8hgNvf0<9xqDwNSzN!)J-by z=3ZL%AeYQ^a#CG>(a@srd@z^vdN4a?O) zVm0bn$`X)X-esF3U-9vmxs|=w{hP60y|wXYu%jL2X<!Qy{PkFgH21@+`71* z2>(VAf1R*`F*DdJ%g3t6=hA^2?5g@vpy*|&LZWlsnj^Q$E=$%TJnG+!l_lSZrr@=$ z?;@Oi2IS=D_l!>TBHt>KmtKLaz+91tGO1JAU|@<8<^+aM4WpV)f!S6BY_cd#GNc7$ zFGK1B0$vXTOHA9>{3iG>QO;Z9{{<7kO^ zrASot_QaeY`A?uuA?JSbfkmhVY2qElK=MK;IWJ%0P8m+A_@0b6GOi5)K1a7ztU&0|KvAtU1!U%jUXFs%cZj+bTZ`n zxe)C{2%Q9>b0D^P@W+z+lG4IAOZrXld6GT~@=E4G@96Xh=}W5DHNP(9hxJ|8!3ZfI zCh2=nUgVF_=@asIY-?=mQt2r^wFm#t`wM;+ZyTLW0gwE~R7m-WgTsx!V9NI!9FqO| z8NV(S95`#{{il9i%18SrzL~8{3!m$ok?%Fw|KdK^HNR_+k41GwS{^6WC!d*HeE!i+ z&5iSlPVc0AEZbpQ51rj1U6hJvJ7>E{a&5b8yGrtOO^;C=nsZ$K=&xP0r_*U3(0O#8 z$3B+A;Q3}RwND@&L1%kNKJ7JjFrRE^b3Hx08 zd^+!QZT|6jWnXSzMf1(RRy+4&@3wCz+mi~^URq;OKT(_QyXpLo{Rrhv`on(A4jr6| zubuXx^FMU@$B{+4$dTj7cN95BJH~0Jd&vG#fS%+&fp*TvVIzG^r+i3G$rjRCA3Evd zSm0=-{z{$ip>=|z!?B6_$+5+;mEvn>dmM-8R1f)pr2kX8nNIoOj1L8#AK4j_Z94Nq z{hm7aLvluEf2bd69O(=Y-i>#bQyz5whjh8KPCNhe>9aqnlRwT4)W6P7XBYGsZ@+V! za~J;ZbsivjNS*wlGe6GD&Z{Z;As?Ll50~glcV*I|GO~%&+Wp{e}Y7J(6Y4{$B;CVYkN-$4Ht&<4&v z)JH%nd__+@3Y>yCuL8e~m_fn;q!Mev{|)edaBc&=7&r-erh(rK&PC9F1A4(ZhTQP= z?EX!JpF9Wtdq4#x?niulyIhQe|KFhTRX`E>3r~U`g7}{xPpzzO;GKwroLL>-PDY7Z zonv_WF3^ZNqX#5>IZ*m0;-GF~F*w6O&qe%R&_4v-2W&zdANYrWHQ@Xe^j^>yaZyV< z6L<=!B8N8-XDaBEpq~Y1AP&A6F5d^v&wyFrqtC_DK(s>mPtYF&Ar-<2(0b6{MQ=b( zgyG<0EW~Nx{1*5W;yj8TLobQih=`yy`Md`)e}S4{#Km6$?*a#MD`Iqne*jIWKT4GF zHFaqX@Jm3g_s$~KPk@kK34JTQ1%ymWs0l8Sqi;JCqNUO@AnG7B66*Y*A#d_{pbQSa zASpY5LxH$yLi!`>ZYzICM+yVS5a)TtW2B0>wX3(f7 z`w!sP5mTd~TLv)e8QR4#%h-cJ^cZ^t2uWtg7=3a3UEC1xSJ3A{_kex}^f}Of0sRc< z)wt^*pJ?GW@K=I=5%?iE?}F|jdSDCD122L9XYkK}J_`D6&@X`A2>N}{r$7&Y?njAP zC~-LW&x5}n^cv95fPM?~LC{A){{i%Q&>w*Q1o=D#`V{!@p`M@zK#S0pVj21zk|{s} z1?U0sZqT^DL4dXqwE1fRVm1k5LEj4ce}K@R!UiD5T|5BP^d#~THO)2=cpj)(3f%M{ zY@=IAga+V;gyOw;YXS!&E1>4WKD_-h-eRr^e+Qihz7_OiNOc8oA)mr-pd0*mKtsZX z_klS`n+qE9CO`)Y9l+O;y+LVVg9OxHco7%@Le2#X;$SWaXqN~*AYy)r&{~2vXEl4F zNj|;-Ec_lE%r#*<=t3aozW6$%8n#k+8JtA2e~7PHWZcFfVFaa{!GS#$G0P15*uFX$BDbR>HVT&`z>Oqg_%t5PD349LkS@hE|uf-l#{;^Fc%AMaYAM zv6eJlik_4-yR4xpiv$lrz8&;OKzLBnNW}aAs96(eO9`b5=v#3G;eeJu{5SC= z-oi$R{g52=gSZ8yJcO9A)S_lbAh!~HJP8_G8Uxho0~wb70@QQ@JQ)!;sz^mB3*#ul z%1f~FQY#P|Tf|%w;d2U_-G_}3mmtm^Kv+f*qbx&~WXPVJ4%DnXX1WCFm7sYf^tqT1 z%mfEoPSi#amQ;f8B*K!4&{Vtu8<0AvVBzr*|#&mR};pljG=0B0&NP2NEL z1H2hH6?h*cq#ivvOr8KVaQrf@Ui-gJJtL}g=Zo|Vp{Vlp0Z07@obzvl;*SXByTN%k z$>|2C8}vrd8$q{%ZUub-H2F?HO3S6RgreI25T~!|yxgBZM<|t{#Ct&RClv1i=lj&> z@&kn8_rdu!YBCL&ftvhU{{qF)+i9&tIg8DdgB(OY%}BKZ@&6t1A4DlH;q8NX`ykrY zg16rW{fZWoTEyv%pj$z=g1!Kn+BJi;rwNTB}nXcXk8&6VEnw{Uk_H@#DY$@TNcAXi zdXht<_G3a$Q|o3Ca++EAgx++3Wzd#-xAv=T`PD2fgQHNK_~2&sOoYvehPF=DV_*kKzWfH{zXNGIkoGK6^$_ar1bsfq=?148^hVGdLAQc# z1$_bZ1>{!A>AMKUj{bc-=TSNyNB=77zY8Va4EiKw=H?_P!Z~l~a!@NiTSXsgH4Jrl z7$f)>s=x3K)m$>`sJGui+I!K5?T9%9bR*(Fj`$y8UStspA0a+kC%uC>uTdO18#P&m z@%j&p&N9SYhB`ckoIeKL4f?l9>;|VB^hVGdLAQc#1$_ZIP|v(adO&^*xC5MLFd|b> z?(-;j3Y9DUJJD&k=|F27Dlu)Oo+RWo)PFy472du^So+87lieZY^bXKFkd|uo03qk|x(PoeWP4Dzn;hO2Oe-&~5j<;`MM(;rD-UWXL_&bp5U8LFpEt&%@3XLa8keu}x zI|+R<4gGK*@E)zD)GqlRwDDF=+XKxQi#&{l8EMTFU#fCFOye*+WsCg_CD(T1C2&YPoYm@axu|beMnmh?_-qL zpa*(^Z=tU?pyq!i6gD7-4T$*wY7Pk(Jc#)s%8H?^7ZLMC#H>J^?;*}6nOk=0dxXL! zls*-Gco#-ws>VUj+>QLFqG#?#&rC(n+>M@@ik`U}Ju?+E;sSbRDrWr!LQYdJS!vYO7N?)$^bSqZ(5~HxzX_b(8V4=?4sbKt zyBqU&Gv@Eh=$UHt%*%-XGGh7>^JUE9YRuvfQsVPf|=BU;PP7A^&>vz@_&IMLng}-Kl$n(|Nkxod2QjHKLJcKkCqm zw)blsl=4&HQRLr`HXcPAN1%;I2|53D(6594BxtHbKgQj!+t2xX2!-7K3d{n&4wCi~ za!+XttR9uJ7cI(0ZhMhiHgelbxp8_M%_c%#f9j=n>LuZR*tOerdpPG;@Lg|*Cj5ZX z3QGvt2T2aK;&GHx0D1`MZvr2sapZBRb%lhSrV)GqZ5*QeJ*US~t=KEnF7~|aqx^v< zQO`#S#e8ZJ`}qH`cRp}7UDx72|Lz@`$TeglOeWq$g>x?vnT)yToO|a+MMYIbJW&-@ zRq;ebr>dgjiK^#`swXPyiHN7Fq9P(H$V5d&M0BDmDysAIQx#QJQ4v)cZ>{e+!(=k~ zy}Zx+{XU=H=kwlYf7ZIoI%}`J_u6Z(z4pH2Tv}g7>yN{EE|O31R9oyxAC57W$&6)v z6d^^@enBP8Uq(F_8S|gO59R5vAbv}+dAD+`^*X0^sm@*O6!RG7t3}XT6pO{w!oQ8Y z_bF(>=5hSwHJu&jb4Y$CtP8&w4g261!!Jfpjbi5pv=yr!p`U`Tgsz1CGW3_BUxn5= z?W;5BbYLEfozWd9`z^JS{We~DKm4Q6-=QZ*vF94a z2J6dw9GPll$O+BI>HpP8ZUXlXM;dzrkJ`lys%^gkJyo%B9?td7CG``<<`>X*ef&D9 zRjAl}6g^)@ridAQ8@!kn-=>EixCr~djm3&sQ*UFjBG%N~SgeRu@HQ4JB5o{JEb&3D zcaviCP4sWXuij*K4k;abMX_}V&MIv&_KM!UScho+H29FVD15_MVv}Fs^~^O$1eD3UW3+xg0Ijw8P!_L7YB zWoGjMWZvWH27Ma)GjZuH_?(g?i`Emt&B!<^W4_1J8{-wMQ$2V56;2m8`{C3wr~6q` zNSbddHZBnXey;Ko54BcbYSm~hr`nk+{EP7Wz<*o$F#~=TIya$n4^KDI)*-ZAh%X_> z?xRrx&dYE<2j>Mc_PfJ;PxIg$&Be5}aP_?Co!7yOR!?{t16b(f=m z8#>o=3fM+lYgNX;PpoyE)WVyDb@*O;I5{}&;goQ?xjI&;ba?vX&cPsV-xl7V=D=U36Ji`wd{Q#Xq1TW=H$x_N&F%7bknEtHx$QqvDg2Allw?^?U|fR^PzvidA1Mt5+^u) zpNI2Po^l7}gnRu=BJD}e>Bo^QM$fZoK78p(X>nt0o%H|5+`Vk1Cl5JmbVQD1dhzdu zH&pbq3Y%ARuks@IDsgQ41Z~~J4S>aY|2y%0tP4G#UIXV#N=qyfY=$!TbNkdEnQwqs zpy7CQZ?+Fw`OV>9O|37$zZ(AI@Hd2a4)8ai|M$^6z99VVSc4l*@pkR`MtVMvp65k3 z2AibT``GGxocf>SzU~NjcdtR;iVYoXcu(v&etTSLy&Jdw3C<7U{F8FbzrY!<*!&Be zFov^NnsY-h{XoAH`EvY#p|>0}g#S09N*cU`*6*kFzHr8Xd(>Ls5*?{xiTou*d)4!} z>aotz+n>_gwdi>oJ$=xymOBvcb*(9kx{qow_Mm43wsMgyC7K-wY3@j8a({RZ{t=b8 z7qS*cabvSk+loJ^ZP_Vmy+-S=(E3;5JOO@~w(i8*hk5!1rLFeS7!_Z|(|fsr`BAvD zJ+SIsb;eDch$tojyuc92tv?h&`firBM+W3?iO zwAg1`5iw2@hg*<+omI~?e;MVvn#Y!)vxvNxy+C%T8}wwSzg}-140jOnz~7R^wa2%g zR@(U;oTsUE4H?(dVOB;fU*Np6O0na!H;sV*Fqz3T#r9p$>*?nOjdFRt$dvZd?s@t) zLE}{{OYC}!ShtT%{t_8f2HY3ip4{eac#hNjEuJ5{M)<^1tB~`~dBs*CF|bRXM?RxB zrA;^q)^N%=i=J(avX|#nX}|(dh#mttK=b@IkkoORn877 z(_697noo8F7b8={(;jH3LBsuU{!E7X6C{sDS?FfwTAy}CavPQ$f+e@rcGDHdxPHsH z@;H0Guh?9{h;C%3n9Rv&KP$8goPBWCk^OGM4ik`X&MbeExwDzyrVWd6JyN*@s7g(8TgQ% ziA1OJ_i#$uq_lN6yV7KKrBQLsY^TN_)xYf0bDZVrU650t_h6Ps40)eBq66HFy&rK@ z!;_A_S>t6#Zr=^FRMM!$|3}gmbL8Bp_pSzat>&MUHp6?HccG7RC$=l3X}z^_V%5sA zThnf@@V=D$+m}_p^KI>e^}3EO)|t`0j$~`bbsb}&u6d5WeU?$2)9lm^p!dUH9@0pz z=ALae{U3y$ndljW=AQJSBfa`Fz1@fgms;d$<~wk{1!uRT`^7_!?h(7m;@ax1UEXWl z6Sjr3O6ewj;jChZ8ci$b!3A)tH5ZCKNDSwGBZegUOm@$_IQeQDdixdb z35V!CXRHIao#C!j8QyR6bQd?JpW=I{9(+@9e|WEWX|d?RH}YQ7vBVQ$Z#9c+(pR(4 zSS|Oix8V;y{&1=GyP{!K^i71`se0Tz_uw0c-*MNv7ub>(pg4&j<~uV4Zk5`Jj3W(GP{oLsrzGQ>UZ?>F~<15(pEA0S3>^@{!}K;Sq%ji7DefSL@2`a=9p^7A`&2PFU5K)!bZA9#wp z>(4khC=KTqaQ+R>FD_lH-;}ZQ7Ll#3C1>~z8a_b&1M_o&8;c-TITEQ^;P)B5&a7stDmEHJIxha{m@yhcdd3PUrmN^T>pYaV$^V@qo_EX zJz}`hDr1}?Z`m2$Fs~H48`>U0&o{^+i`o6Z0e>8vTj2aF^b#`URx!P&T0(Afo7Rmt z;r@;L5c@XxrL5T^vaBRe-zT$8B2%IfGWMKeJG@tUo!j8ij(#)sBsu&X?nA!J=>COqNnw-I zJWWM&s+@2xs)_m2!<$n5J1EHw#4f{-*#c`FNNR5 zu*c2f^u3bP_blX3I4ZNrxn6I)v420#O7G}d$@&y6{*iuu3i(CkU7yC6T7{=1talgI z`(?z@TDRI&tdxiF1l_@ZrRya=7;8*rjI5*hcsQ>iLk1q71n2!rcZr?}YENgW^Q_X0 zs3m8&9HU;wdE_mXiQS3JTddQJK8;O`O;j40p7_;$tVh0Kw@$$sz`UFa^C<3%3~r*{ z=LX|t?wyKgF{au~tYLpc*PvCUbz>$PxQn*S$n6VQ*HbOiP8$3G5YPN(^~2929ka*QLnHD=-;+EQ|&)1jr;)nPr`ZB)}3X(^BJK} z=-=a6^Jyig{ucisku!(3_09;PzoJ-tjBjVGCzV!g?qhVb7~Q@ory7EVmGe%-32m-p zc^W>aX{#%%?3!@ZI1Zx_Z2eAgY*R?nt5VGqVqa7&(MiT)5-sh&f%6o{TZS|({+K@W zrmY``@eqq0fio^#YeeH#BUq67; z7nya)e3Sc!cZqV}40kA2cL}R|yq*r?F8(~2y4~4Ra#(mEoY8Q;3}-aw$9Iq)L(jj3 z&O>m@;rtxV=ZIj{oME4d@^c;05%m*$w_@{%#&&aBOy<=3m11)mb&oh#iOfHT^6UjA z`h8QpHFt4*M{KMNzlFqCmp~7pw+~^BuV4-CW3AzEdczqGr%(7zF>$z6_^lau0MXzv zEK>rfD*8@&fzSsT?mUa3 z`^H}o$)?<1t|p>R;r4qg*&Tl;VD*S!jc4jPs9I_373ee00j#QjDRGIj!w1~7Cu3E_ zd|juju*|E>9XCj_D*wSSFP*cx82U|LzUDd31A?bJKNP$bx~223=2VKExU)hyXV{f4 zC^lzMYoy}XUd578=X1K8%*uU$2=;*P@6PLnp7q}4tf93k!TrH4(N^o|9&H=^t5c_+&Q&YE?NV1N)LwMR=ajYM; z4uW@qXMvB}dY5>_R_h%E54R`jDDcdcaDv!iB=vjSD&-hUl{VJHnGbzI<(+fL?*X4u z92>0IJZo>&@!AVDTcFkw#m;8vso-MpEb6uZ4+789{#adqu6*)v2CNF z7uxQ%bcf2f-b80e|4!qx(&lO;`=Ftgx_<|K4E&0@Om`ysaKhBv_0Fc=GU@*zGtG!9a?TbMP-*JwtQ~#DHaXWs4a^g3@8O+nc&^@4ggtSUJC*hoga}v%a zIG2=TA6MEw9@6^MeiqKN%84~qI@UC#se6&S7twZ6Ik9fYbW@JyC~Y|*jr=_1=fU|N zPrnz^Jau{M()yQZ_e&v7PlksWP9;w(X?G~KhAJKZHFo$lJxtM)R7lgeJUDr9`at&y zX`bE<=Wg`;89jeSe_QmoMRGVY^uZYfe^5w=wgMlcZ^!88F*Ktg7HVkANM})ZmU8mi zD4o|PqYFMm19nG=zJZWufsnK|1dQ6v9OPI8u~Qy zZz2B{_K#!#IQkz)|Krdjp+`cmf?fr^8G18%UPaHV@SlhOJp4bw{}VKGZ8O*Q7ooq1 zJU;2*llBMjKcGL1D;|z(H~ihu-+}%P^aIcjAb%_Jx1zHQon^@2KhEqB^K>-yXrBHJ z4S&NjU&k_Ehrb>E_D~xhawoO!WG?nI7yFUiisV-KC*Yre|3mmc4CPgWRgC^(IJ9eq z?J{aB9Q6oBF@koN(c&^_588vyLT8c3GI16~9c(NUO^3%{yLnon=&_pMj=oFhz-TBZvpm)&!rSyL(^j_$_ z)IG~uo2u9hZ8qAh$7t~}TKOX~ ze?-Ig(eQm}cE)(PGvYrM{$r0r&$uuqFvgo$ziaT!cWIYc;e@f`6!NEN>sRo91-%@4 zIWp`%HoK4g7wEsBzm#@M=?`=qI!51O^v$^)J-1W$KI+~FjVIW6f;|^{E;M7b8Kb=r zdLfpq#*)=|+P(P3y|hL45+i$wVb2)$jK4{L-lQ%wlE;k1sT-%RLt9SR7BSW)#@ar# zA4Yy`_#~VsX@&hO7UoV}(a;q-1D%1s2l^f~?4j-+EcOl-dk6Zj(0@gK2J$oL;aBP5 zSK%0~(K8133)Qc zI2ofuw0DU14tu4;UTL#`+3a7jX4GwlObcXMK(j|X?9nzc%_gSV^v$Mk7Fm-OW=-S_ zHaSC#wqms9j7QIS=x3mxfnEu{5_zIiEX+dT+wg5X(8dE}uOagq{2KT*jF(s%Czjge z05&;*b%ws3p)KODLmZAF6ASYM=EVu;fhM_}NiG+E z6rGRK)+4m_2>LIe{{nn62b;{nnT(#v&K_aW zh6W>q8XZ&y6N1U|Y+6);U}i8Um>(>XI$@~{mIf<=)xkP?Dg8oDp3thaBcn90Ic-=* zpZ_aw+5fzeFz%MpoVV<^;4S;t@|OMU41LRfOJkHV+PK~rBc+IU?ccz=_HX1}`+vu~ z_Dguz{!P4V|5Lncza8(|znORKci>(7F7Mh;@~-_9@7nLkyY@R7yNumNXQNumZM5U%mKZ&GE-qH26%(LVNynVW4v+RL~n{W-J9jjr7Z9kd&|6)-Wtk!@MdqT;2qv> zZ?6{hB&(x%(k+oEwV-|VAxmbhgZPtrk0UcuD*mM26UTe<#CJX;UZua$aFzJWVDT#T zn5*%aru;6$HR3f7if^fZ6o_{`E*_?y(L(%Ug?O0yM4|D#lMD-GA2 zATyfcHxxRWvt<@LF^jh_i=Fw!gj>xQ&5cGE$v?`QWb4gGU#EGx??=jj^x*Wc^vKI) zOnO{YCVr$$Nl#DDipt#d0x64y)37W{uS~CzvR=w&%GUG_!MoFYFP8)9nnub|>er-C zMCEi;&ZRF(*_yuSS&ft?UcxK9T#AsTcqLJ3|B=$kEA>1tce(WNdPk+-N6J8N$mO<% zd!wWj38!Hh>y7s&NzwMEQf7Fw1<&&qUM@=-(caHnPW^s$mi1OeWv#bCN|nI2M#@eZ zUv*uPTKg`SgHb8@NIC2slRlliT+T%N^YtuIS89F7&#x;gm7e7nG$<_viv6|}S4!uI zU*-pbyIuB`S?LpgPrpwirN2K&%20mcQvdMxh@SmATIuZ}e?L9ikM>ZLc#6M^ zIU3?0@sBGdSgh}#QcKJD&&t{v=3kI?Gfph*957c zOHiIZ?O8!K=pOWv&%R>O(;}M<1_Xn>>R?zfG8iL07XO#0|iY~2urX#*sJR`IN;okZ)(stGbA%S zGfJ#pApI}NjP;J;b#7+7lu3Tu%#hFnrG#@;z)Xq9C@(WrV20l(Gh06A34cXqVP;8Y zd1h5+t#>lBAyehI4Z3Hxd9yP+Gu0xqFLN++ICCr`&(z0-!#c>E&(vmW#fx`koos%# zAlouq?03_Wo|SEzsm{9i>Iiw(*{{r&sh2gM**@9+-m2^%e^GX*c<*HO z#O#Rd=xn7wQ#^EqjBi?Yg6Cx?XQzo(dS_>*M~cK8zfX3)c;r&?s3BfSc9F!1e%Ynv zd$KFaFUV*sGaIt2v+HzSi_eI)vKzBovfHz}vU{@o#X}`>h)0$Zol3HYvWG8kIfeC4x0x`|MeHvQOl4vTinHF9_wS6};ZT;tHc8p4nE> zw4w!3a8y{-R@m%dMJxYQMVpFt66fZHv92Oj(M4O+mMY4@ri+BQrgM=|Okt z*-(9wYm!Uk3Nt-&MY)n(`&=hki{r(EhE{C$MpcZ=mFB$iIsOINX=Vp)ayf63Xr7uq zF7iFRRk_}|ez}3UA-UnXQR4SQ#2XGute>45n;V~-RDK~h)$5m=A?sM;Zf<5-+)Y%*Vmb4`r1 zOq|R&kF!h@&N5eVez}_SOH(r61etF$GT#DDF3mZ!Tx*yGBMQbcrZ##mC>tST8ZDod z@;Sk2UN*X{@)N-Xfyn~XJ_cq+FeiffpAZ%aENuiN0$U>39>J~%_C%li z1r7-u5jfrmoDw+u3E)CmWmmCc*LVa?1zLO<7+VAA+NuGx`AVr4hXfxym?$vi6T)QU_k_nqt9gmD+SgFtZxK1 z3vB%aup`p3JA%Ce2O2<4BXG0|5z6j%~{F8`SRd>pJ2Slb9}5U6Sdwl#pA^29jw>q!yoR`<#%7~NcmIB4$9*}Mu#daO9_odwFm@uq{& z4(V=X#7or)^c3hL&|hGXz)*n^0(JI{?5TCxC+qgG?N{uKp2|>%K1+*SdUDjShV#^5 zjxO&h(L6C$*&EZ-qQ1?nSO07{7XCdg-18XQoQOBM9?Wb68t%(08i957_SgD*Y#kh_ z2MzYu%lqYjWuI(U_Tjy-OWBn@u-<-mIc5o@=SQD9=Sw5L#E-PvQF4s5j48b_;?EBE z57zC@a6P3Zr$}!PV^Mlnv@Yt_L0w!-gnEc`{jRJLoiB}B)Q@7|N7h!ubyXK%>iVA_ zeeQ|oe1Ei39gM06 zAJ^7@6i*j6fQE6heyrS3b|psEkB5a(-L+9{QX5E&_9Tb%;vr~mqw}pfjn_2dNzC%Z zM?LkeD1Y(92fcPt9Fx4mOGWvF#4}I)&?~=^6L{G$ma+fu4da|A`IYX`p5$1bwCN2H z&^EO#X~WYtBp>o7M)9*QUTU1|UUnsKntLUVqASutAE!j|a(yFlxIqlQoWn$MSaoSW z=1ngn-|=SEtG_iGPd2m{d6taLn;Y?Z)q??zK*L;cTq6+K^&@#*9jvJb>l;8_j`m;4 z%}$qnIRCm>b|vqspJR=S>Yl4-lLl)`*PplGqwC6Bd?m;7Bv18LhOwwl&RY}uwYNU> zr^|6*MYyii|M-*~*4rAwRBxiA*_5`%r|kFW9r!~IWz@k&nMOAhF34yf&q3jI{~LSJ$}U+myd3hm`f zZs*Im^_<|(4(;U63*~(oqc6Fezr2hbP~(4HZ1!cm{@MtXR}%WRzag}{uXflL`kB8o zw4=_WUmZQ$7wI?{#&TcsCSP(NU;8e3jxX!hm;5G>ye7yG?Hd$CaZJV^w2VHBLw^V) zzX>F#2_%;Z%If(~r!XG~B(Di1rwMvS{3zxIlG6nJL;bo=0?BOx$!&rWSA6yIIUw4j zlnzFRGC^g?8QO~0#yRr1a=Bk3+xj( zC~#QdSoC?ap8Oes^U-H@RPTJf=e35Bas=`P3ItjT6kqvN`=r_mxB{I8${P81!_PpV zn?O&2KA|mB{iDx8|M92Vr2c1WsK5w;(E^nM6GDBd$pX_NzJAUWm?JP>U{N&ps$*#c zD+E@5OdIP2HX6^lh8uUAx-HyRZX2MTo9a;Pc5%zytlQn~<@R+4xP#qc?nrlxJI~3{;xVwS9?g9Q?Bju=jLg2J}&b^qll1<$1$waa+ zS(GeEwg);TOOswQm+X=3o$Qw!m>iNEo*b1Nn;f5OYTfoC-)@}CJ!f%B~K>LB+n;nQ%)*hpQj2^EmOs*wkbE&IaQVlQr%KLQ+-nX zQ-e}NQzHaMrz%quQj=5DpcQ7O=A`DQ7NwR-Ss`V$6#dkHE4@yN{(Ga8EvfCPT~hW) z*`GR;I+8k`I+Z$`y3o<+81LA$V~dWhI=1Q9u4AfWmyYEfvmLw3&d}OCt-tPMzGt2@ zEc1f-H^VV6$>NJ!F)MB~v97k78u?a%)m);r+Mx%e9h5q4kvY zl+n`eWq;PVJ~lb_k4Edn$i&0O4T*0hW*N66o=L1R?ntalyl8wTu{rTG<8z7aiC2w# z61x&_7=NGmbu-hrx0&6{F{U<4G`r21)~u{q)>!fXanJOBA64?VE1QjuQ3U)yMZr3H zN4WWl(cu_P|T|~qkVMGKB z#Op@;W>z!HaGS-N#f)UL(q^Sb>i@BJ(m~D(1<97l;$+*T zo9vt{O9shq$)3qR$^OYf$)WDSZ@)ZUof z(tN-4PfBuoa#x3=$vw?SC-;lwq3c>Dk0g&LPsx+B$qOmt<38)!NySr5Q!P@hQf*T0 zQmIszRC)7_sjNFH)jid#uv@CH>P-zu4NeV9jqI=^H6}GKH8C~CZE|g=)b!M>)ZEm9 z)Z)~#)XF;heZ-Cp?HPTh)}+?Et5Tcm?3>z}+EGxI+FfVI)ZWyAR88vNiTe(?Dvs^{ z-MxhlDay!ZKk-ur$0%-M6#&YV3nbLPzKEPm&P`5v*Z!u;g*{a5X) zT+Njh!D4T5ebwH7YbZYxciJu%gC)okW@%_?ZfW(p-T$J!E$u8Z zmTs2#SMC1l`bz%#+Rrk8Xik0AFV&qbnZ)v2#Oqb!MDa`Fu?6C6&OuxzF5_J27gJ>{ zEEh?96jzF?iT|z@i#e^hQQX8iid)F&a2B_V+c`HfMs{d@i^xro+6{Bws=*%$C$~DHm2)O z8jcZ8SNVsi(uw|FaaN~ZL;RdY~ zWIQ5;8BZ9`8c%qXNnwG#rH00f#xjx`6YfsnS!22JF-b4<-r{g6)x?>!BsojX^{!GY zle?H`ayQkKT8ZUSJ8`+mPh9RAEw$5sCwYnmCOt`J)X&u<8wZ(Wr4BKs`lhBNMG`Jr zcTDGQ>O@jk-Lb&slBcPs=_6B5(i^ngzyIqktEFzHWYZ9mG6*-4Jbj!g*EG(hjP;Re zl4%-Avrx|8w7|54q?N2aUDT#^#(dLe)}E#v;t|sh(;k}BbWmSnI!4k-szvW@Ds`J< zDm7iERvRjvZ<%h8bnl=2#Z+N>VyZMfk*=Br=ksrSkJ;YrYW6g{25vKZyKa8lmvj|o zf3v~tFBX`CsMY4MYD>&v=7#9oJ?7@-RwT7E#~6~$-K0!&yp*XokYqLYGsl<*m{ZM} z=F!F$<}7obd9pcA%$KrsJBW8Vnx~UAhx*yu<^JGjfB0pnX)0fBTnVhx0A-Rg!MOKmUbC2A(zFH$Nij8S_DD ztJx}T^;|%G(4w|DT11PZ$=Tv%@%d*Tv;>F+uCxB!2Q5b8gIel?_!VM_ur#K>EG?*i zTB1ymmJV(uxEEX9)zahDJ&Vz0mq|5f@?TM{k3O*^O;1Rk*rGIb)Ut0g^fxpAyM z%QD=UU+o}m2@*XW#K9tYLl@J<46>>kCXVLp#B4E#a}jgJ&q&0aAm($`#QEZU&O=-x ze#Lo;C&iQaTcK)7++8Y?RU4wOfat42^tD6ubwc!YLG%?7eXAk*dLjDOK=k!S^z}jX z^+ojcNAxu#`qm?F1o?ak{+b7?JG+`lymI|l>CHSb0nH5ocHfcdI>ZoRXl!U2o zbTGu$*!-DVhMNH+|}@3Dp)|22ku!xaA>zPk(sx(UK~$m+B`9j^!QwHE}3tO z#G?P%+NA~84QrRi^!#h}Gmfjh==Vf_`^j%7DkoPVaAV-cw^m&R%BqXv(Fmj_MOYc# z!MjBysSiCroF*dIpZ`D-qoj!M2kebUF13_FflOwFRsIt z>)5}!<|xw$yHxUuZlGji8Wt{q(#MP;xtZ$ zX!T~jGGBY3N~ssEfg}3S1NHJmJ*ty>^x*Q~BhBefy>Oc2bca}?y!T{|)Awe-^@$`6 zVsd>tNy9m3eLH;&`PWY0O&?FL{RnNRAE2jyV`%p`7ST3#ge^}0VQzXt{8rf9jjYN! zdY-lJ(eKxHPBQ+T3SxgTkv(I-ai8!yE8}a?`jEc!=}E4RoV$+G9VDxple$vfW!(+k zJza(F3E?Vqm3l#MuUnvZ)qA3)hbZ&H$Hcl};xO{IJ%Y>*TFefPm>rzOd@?(@VqS2= zyio1`qa@FHa{;I?{t}g!k$Bx8jKzf_J#B+0v4rPF2oE@lBuWtt7ilyc#T)aa{~H@< zQUa9-cOjobkjP( zi7c_ziXZRj($%Om`Xtb(WHj5bbc$9g>Al(E&ZKkm?wq^TjEqBM?5mIa-9;ROciJ(< z#X6QSy)-x>%jPIET+44hdOq5LP1uBPqfFx}=aGhZ#k0!&M+GNp1E!my$lY*Jp` zJe=1WJqz8QobOpk{y=D3yqCA7zWAbkzWAYIi>*_ zRsKy_@2QXW!9|udmUzxZ!;|{<%;a0UC=M#phWqoM-g_mL#DWaqR9>5@Bj0y+&Bb$z znBf=Q?Jo}qm~XY_x+mSObNT1&U3a-dKY2Vk(|NX8v6agF8rk1VDA~t)hoY9I?;Q+hDRJoV2-)e!1}3xtNQl7u$r0IH?swHtlFK3Q;DzF z*ls94|2k$&U;8j#PIEHRR6ix=t*+>UVZMkk=gfOQ9_M+S$hzC7822@yJG6>+?s`P3 zZqhHA0snmoWKje1>*ejC_A(haCWl&XsSCW`_rzS%YfDn1)P%el?w7&K7T&H&$x7{8 z78BR46v~f&u}X@F)0f9co_qhb(Vq3DwCcvA2A_f|l__4MNv)(uE;FTxIXPsT#4z> zwjstQzi1||mI%wLq~utOj8?iMtnU1wO(EtPR*jlgKd<;)xVXHi?JJ(>#4sEpau~OF zDnB~=%a$T$EhQ;#9~*3b&~K54#5|>W`z4y6O9|$oxjEm@t91*yh%G$Pex1S@_O;q; zYuMUAq=Gr!=`L?){|^_&o*y1axyBv$X4h?3A6vz^%7lhcEI>LK=909tKx8P?wN|-qe?%dRlGx=> zt7kc^@UH7Vrzq78KFMWaTdjLAe0MIf2o_|Oy?&rP^+XYu*1ILbhW@LdKqpNNZg?pv zL)raKe)-!O^VHi1e)stIFDglql5f>~;H6^XMxN(v!JQqTLcPW|DeSIpP5I*axeb4ANP64rkvc$&*nNRfAoRJX`;Ki&7|6Mnw1 zrm0hiZ}aV;{Vn})@F_vbnERcA?3I+Ux9o&GgO%6kk6S689MiVw_KVQnuwhBVKDxl% zX?!MSfA%}^xbB9ckYGl=n{{*#@(gaUgtw}SRTlDD&p%I-SU+`N@QAW8&5toXfkp-j zJ2Up~@}-Y{G`J5-uiz(Jy_c4wjlY~t>(Nf{+6wy}9C!K4 z8MV%B=eWRMzH7%tZp9tfbnejad^EU;$&>S&sXDj89>-TcquIE2r=u!+m0g9ed{pz% z3VRrb;*d#)B)xZt(e&2Mz`-fZ3mLzhs>TJDD>8nsY1di(lB#CrS%#(ko>eu@-3hPS zJI79pyBkn7GsFIcu`3|4AJZeXb2iwhcdLNkV7ExMGyPXavNL^Iuu(gv<+z_#mE<`4 zah~#*Ciid9`vwm-V1_AvlvfE~F)@^Z&yv;@zoS(%!|Y)(cLk~%huFXD(+xDL5IN#N z&lfCEj$tygurKx2+&ge*qtd>YMU3S4y>c%fhZ$F67yH?0n$vQ7h+OA7*?GAXjZN6C z={{MqUThdDe3B1Gv;{Ae?2a+%+@%*PTF*rZ_%IvS~ zsT}1vlhbDO<$;Njm>#U3cICM*?6Mq+LYk5_9k1o-BzIu_FqMrJELSl60U|oarC!V| zb&}GN0v_8+l*@LT>@uR~3QWAal_^lb8$&O-bAzq`^I5`gq2gRIdvWC5$qLDjcZMqV zeqfmt_v@;#&%V?0^6HJPu@46wF(<|SDqbeoZ}n1@?=ya?N1qrzXgxT#r{I8Mt8rZA z-e`8e1Fnw-%|z~B{z>o)tC-1Q#~vvEe4#O$-RzJOZF%v9b6M;{hZT$N68&r|>@(Pt z9^W;sIG27$v*J-2OWn~t-trXVUE1`~0f`qd(ntMHR!F{P`7JVV7V}xekMJ@~gz;_bZg+YY+L^=v>8QqCu1~gwF8R0^yQNNox{2OhP!0B z@!K?*%vqw!6EhfO_WS5C?~cOiWN8gTD!JnV4~w_%z960h#b24YW>?3B8Yn#%3Y@MXW$D`waF%)7cTKg!1&FYR65ncDx0aXE6}dK}($ zF+D%>$?B-}`{INLdD=#LqGq{IfB1xP0uK!Zxkg^(3q_2NUB>AtcVfJr&zPrYxO%@Z#RY_fv8EIJd8;RX@+8QBdx>x!vxr4PSz@ zT+{lZv&G8`3YR*&pYvVsREV&79(&*1<`pla%=X05?K8Ntw~4~-`(gzX{O8gdHEr?? z9zA-TyFfQ(m&e1Ra#*kQ?bdi2SzRaY1rN0;|B{^WI-A7k6p8R*i}Jc{}@F{g&9^SMfVG z_JxHfMDPVHas-@zWVRON@}|l8=}2D2%iAY3PsnYtu@tmRY1vfH8&PLTEs@GG47O$IEmpgc}T{0DPw{bPN*ZLst zj44A#(rgu5L|T&IrKC~g%ZeNs2kr9P#F?Hjf5JpBj z&qGa{PTH+|4KF8d4okAIV_g)V;|VQCXLr+I`P;Kr8{j>MZhkK%ag(ke{uu zHQYAEShKrvNuKYnP@>pkf4WB~j?r;a(!RqhmuF{#btdEPZl+tOJPY3Y#$F4as>ty; z-BRJVp)I-?o_0V}_kjmHU3=DJp3o6`phH%T!$P7u%Xrh#@?^>nw;mMv|#GpyRUmXIcYS;K>2_1KYOPJ5;ki9zL0^Mhzhwz0g=>M=WaQKC&q zB*8afM)a)0W#aMGJHLEntkn{gdW6ooBrtpkx*WWCNf7%wo3FlH!TS`KOBS;7y7mDV z7om_?3@<^O;U2ft;JR~TL4u%sj&Rmp&$uaJJ$@spn2!7*j6va_0f9lu#oLn|bnI&% zb~_(Gaku!QNedm9{gU#z$-{eQNUX(QW;;Cc6-xGCR7zjJ9h_jjv_q!vk=;(4t-HH_ zD3#1Fe<^41xmTebsIv9uAv=+Xs*Xznwlm8Y%H@1_{Ncaa5cDU0N71VtyVFFB88;b* z)ylI9rC1$*zqvEbX1SajuyPG^=sx>4C5DFLuVvxBf`t!{r|w<3?c!VNe0gww+q!yr z@$bRth&$EKl^Rkd+ge`7+V5wHOIPR_*z4OeL)t1UG~gB<8l;YrQ7FsSFJY&T*xg{uC5W- z)3o6=!j(_PvN?%ed?ml#OolK|JCeg5UBI9cqN%!fqfj&ZU0@o&W8g*B;^5uvAq7^A z>vS(IiMMhw6#~K^v`)lmC5Dje1n-i5=N3Di?8WqH-`BDnZh0fYKf_0e9`ZafB`F%J z8!l|gH7H)0a_5;A&vs81U7Fm-0YQY{&jW%)+@EO+e4n)*#@5PhGwrwUR4Y@$7pu%n z(n%=g?cK#28lcRVvNvewZyqL@qv;2=;-5d(;eHhAf15Bj|2CAiYq@+NYe?J-!}>7u z=~v(Sgk`0KxfAL+h_f6 z)A&^0!BW};v~|2L8r5~RIdR9G*lZFVRV_ijkL$~dc%+kiqyvg#Ji zK6TdnZc;Ujxj13NLF?Ijr*3$o`?yx1EGrV z@Yt`yp@-F&OP@v8-t0SCPYd`5*i`2;0n3`z~3@+*=w_kQui8?f=o=WT{w@ zmR@Zt{@D}GqhB($EJlskq9dM$J&9r;7hXH};JNZb@b>-KZ+^*#%~#*5)t=U<{TWSp z6!BWHJ$Xr+m*7>>fAgx!1^X*z{Sh}dpFWwc@g!VPn5Z2bbhLP$-I%Q;Q`11>xX~{6 zV^V#Swf4$2Vb}f|#oinzHS5=hXy3$prW~V9W;*?4#*VeUtK{aDSCyfOp$vDQkCX22 z3AXITis8?-<#4X7Uk~~ohV^4oJ00eG(A-tnaim>9IU{2`;`taA+LU)?G$;a zIGT69T-dX}(bF}-$R__wMO>?s-=gX+cdb4nwfJ|U(V6dl#J6s=?N~Chja2T=(C^Aq zpVf%}alkohWAHJhQvXV1qdecJOCHDk+{{y9LRUd6Usg{3&}-s5p8d%N{oFr4+>pP& zQu8IFym*P@vcsb_*ZBxho&v9|3>A`Gv}00m|MKLa(VfShu%zR-)MSL;>b0cV`R~+c zV8niljuo|JO{(x-ZSTE60RfS$Rgpsi$6ng54tQ(nl4JyrIp`>iG!|vO4_!QXJk-T>SIxnt#;$Ph zl%|f*$Zw2}Qk2zJyUW#9snSvylu|BZixagO$kFMLj3Ie=El&MV394DI~pKRGa?Ml=6NpD^7TI z)YWt2^;};pg_2WVnJR8LnPeD!{FYiQ^we&)sbeR92GH*DRyW-lC_&P)7N8ZzYh zOxLFWE1me+_#LdnHzy5Ev{D>?fBiI*(Z_quk1b>sgoyJIg`LNrt-opQf7%`x9buEY zV6xh7Ya1OFxzK|(HFvLRUQ&&F=os74a`& zY2A6eN)5yCE{Zfh-JnqIxt7imPrusVqnrB6>+etG7FtyMiLxcVimqa0ygk}g|7L=A z#JQ;=UB)xTO!%YTN{FcEWHwQAF>5im_gN+`?d`GGtQ3#l7?)1_I4*bdy85;KQ$2A- zsiRtI(ihh(3Ozr4QVv;L+kt!)QtY(bwDYU1fW`6e!S1t33rvi{u32*Dxl;`0+*clP zYgybSMxDh^tUcv$(@$+dUeU$ zjBq`a^$iz~3-9m7i$UoJBS{Av;%N>XS6(hkY5lx8ckTQ{f?7-K?W2RklN2$msYZBi zN%h1y z!|Vr`yT8~BxA`8U>#ucN>Sub8WoQ0K_?wQOeaAwOJ^L}`nvA|({tTPp_NOPB!me+S z6a}tZIV`^Lm`%O7Ako|V!TGcA(DR}j!=!uHJbOedI!zX4^-Yp(=_pgh*Bn(!YYM)nz7>Pj%(s|Dgmyb1m{B2gyz%7&MKH70hr9z|Ia=mA4UW2J6 z_xB&mrG~@tEp2+jBuE@?!Rlis^aA#1~Xb&Hi8C`F?;pP~kUUt(?Zc}VpC(`!T zfn+xN%gwLL$Aq6dXQfh#;>QNVR+4v3?V9%Vk6wA%QlDbKD*LmWPuI3+W&Mw^UZ#Zp zv&(jRueS3|^e~jiMpd5uQ)sTG+7PB59U!$Rmq-5ggphi+Eot0wF81_NU!VVhoh_86 zxy&EG-iAC6*%>R&P&yh zGMcZ>jB1|t{hTgx#DM6yko@U#`G*Gecx2vgf^CP&TFWPc@a?zQWb0%uj7~Oq#xgaC z+PN)=AWJ|rjfE?2XX;h?_||xF_FVYag%erJ--7c4R1916f7@2C8Tc@{xoe+F-iGCQ z9rrOON!>bJ@6oNnkGD9D&z^Q~wu-5ak^EJ{92WG(o1w1Du1`a4E`+N??y<#7`A#`C zMuv({1qQFV+av?k_U~Kh{6ze{ph0n4-!wK#yLM^g)M81joY}n?iI1)B_3w0TA9x(k zoNQ{!t!Zw0_gFp6m*`2JeeYa0eeB_%nXt7a|FrO+ede;561il>sxu*r>u+YYtQQ*I z+P>*uydBv5Qp1qZ@yLe`!;hPhqKC^ZSKoFJuokv&RezNDUa{m|`Cx8yn`fGmLN-^8 z{PZT`&_r`oupzQMIad~W#%g0=PFCKx`5l8S&zE4?{ITtF8`sCYeEE|LVnku6Tm4NqFvVoBoZA+*=JVJbvVxxev?^Uky(Wj9PJ!xHSDF znk~44Bf7-x##rok4y%Wj>asGn@7M)@6**T6&sN-f6uH)Y$Ze{IN7KtAo+mz^^I+jl z!(+}F`7dq{q^%5TemiK=p@M&udD2i>v~{BWE#u-k*Zi3fTfQ_=XHTv_-OF36Y|T%} zOcnpRcBjX;FEd%ls^;aPJAZPrs#TdbQf4Ln{o6uz<(|Z=SU%kw$IZ)qz#!|q{YjtU z)iRIrwZl8xH7uU)ZAvh(P&iq#YIiL7x#oVmN{h2U(o>rfleX@RhEG+zY@xB%kCal~^U8j7D@GH>MaAc28U~r%i$xO6 z-rtMnN{>^IS&DUy$j`Vw&>KR$_@QLO_@i^wK)mLfa70VDc9K`|=(nj2Che`5xTs@Q z2kd06hvR>D6?MxVG_L3p+A5*kvRoO{?P(vL$x-|3w|Y^JQuWzf_Vs40&8d0_f{ z>DF7rC?~?k{6pRgZt6>4!*6K2SH{@B>HbbwUth=wa(%dIXEiJ|ra4O*J*8nxbc>C(?yzm6?jMTy8ic@YIioFu!Y*h}V*7mx0aX3b=n{&ZGTn8d6 zY;B}$4O@Hj*K3+4xlEl}Xs>+fvGGr?PwP|Fe|N~WY2L7_UCLc(t#Ux**p*5tNB^pz z4Ix+UFH_T(cq|)_=A6sS*Btr2##W&-`B0qTKFNFwBP?h(;`30>Es&jvJaZJ%5H+9T zzo}uPlIu-rkVHey1pw0)z8EwhQ1Oi3RH=a%p4Hhc(m}aOT17jVW~!R zeu#LSW=hZNmsM?16XS$e)IwQk2(o}exPSldhlIlRj~}~aNBq?@idV94c-^<`7~fjL ztG<%&;2zpO^qS`qmr&*z^>mWsZKJz-Gbsmfar!0JqmDwJEpG~vyT#w{kx`mFYOy%; zBYJ%p%gCWSi-4v`o65ls`RBgG}ZuW37sf=KOC7IoB_EYH2O`a5YEm z$fBEe7>FKe%hrt6`bxCEBKL8yFq|&;uIEI{RPFJFZA%0d^>w^$)wZ|!(?1)<~ug@ib~QhX|I!uF7@JyCdT-RZa(158RNK>aV_Xk^XR9z@o49^;~So9 z-9Z68zs9?swDh!DT2FAfEtv)uM>*(unciyY+0IllKjvw)&bA~~fP5cnoZ}DAtcnRy z30C3Asn}SVk+M$ODZ96Xj2TIB!`U;N>3ZR-L6f;Tw!bub**;J68sY!U@tejwST?pC zU^qVgaq)86gEgMx+~kkxroDB~G&JLbeoyZko*h)ijS{B0ho-0eZ`hf zdi5@Fpi<#y-ji+_LTqNQ-mJ&ZkfI0s95skvn|NC5eR=4*NAovA7VlIS9)A9WzO5a1 zo{#cDLx)}HeV?70bB<$R&sb26_*}o+!V23e?#Eh-J<|3$?_S~yv+bIzW`QcF`8cusxKp;Xy59WC%f2gXjy#2jGUeJMxjron ze#@zmexy2}6l(aJrMkGo{;Yt=V|#%=#a_gn!OdL?Y`$%^$*3(pOs^8U#x=SkP7-1trviH~oz3)};KJdYF+*0&Z-jgX^j z7mq&ot!M2=9%p!TTcWskuj<7&b9J84v!aT25f2t;k?w%J+TzN?SwAQ`z#!h9_(Ll}8CH(N}uWWBhtnq6WbWSG}>^LD2uZAMFlzll#+7h|nZw7T7UuZj5(4P!d<6glblH&%Ub z<&|(ctADLq=cvRR35>&pzf{$W%Zt6Yr0ZYJ{xV4Y6Ft^{pi=F9nY~S7 zcVBfwbEKoI+paq6M!`@=dGBm$I^7%9!M>46d z!OKgr@Y|<7@`4qqOTkyTro6JqxrY`9J_UCU`WkuiX41x-trvedVmVX&M~|{O(AU$G z9xh+;>4WxGbB1UgB0D}O-N^VF-Xi9BBP-jYxPWoyHTj`^=LWZ{e~zj1ldj&6IdO^K zrN_^mrut11&6iz4Vt%DbHi;7(+22J+=1C(qR)JA#A3gtEsDDT!a$47gzMNj+fCa%q za``Uq;y%fT-Xo6R?sT5<7TEE$>BKW}o+EL)u9-?|iECM%D|?o!c$99g`5`kM_UJ;> zxY-jSC7PG!ywVSL^Tec{fASziJ@}|hMjSTn_0fBZzmx>$1-(pJ`Qfs-YMSUt^Ad$U;^VLTJ6GO|ZOptmb4j#cc)qaY ztY)wM_fU=9{hxhvSVN^3VsesFE=|R4?6+I{lD2vKhG@0DN4IU%t=BW(>V{@wV>s?v+@ql-|bLNLHw|U8mWc z_~v{0*WJQ8&2K|$8s3Jq&#yC7`l_pOmX18wb1=ArKKkiNZ-u)9FFHwAeR4a5`yPF} zmhQd7F>F{#jUT)1n15|H=bVpljhOfD-nx!&)%ZPvr`~wF8;Ur|nXELs%sRIvUCm$r zeVEyz+#(_Glh)UoIo-Tj*7lHG(H{i{=hhsm*2;@KY|G+uf7KTkjOZLs4{l z_H+xK^|Ci#(D)-0($iBX_eLiDcD6!7$xG$_iSy?w9zWK!IUtsvNB)?5)hYSAY@$?+ zc55@Pw@&dEvJOiNj&j4bUhXF2;^q09yfU$Ge*YYM$t8h5rpiz)c0Bj3sjE+BOyO>zXN`;c*>5Jp+mc_NQn+>Z_kIrcEyoPhJ~8YuwY}za!_m~p=AU2fj4c`V zU@!y>za;-Zze`I?Xx+9qmC(6hY9^t1-PM$z`d5iddi)rEJU>>SU*dwD-3{bFlKdAW z&e)pS@e|1Z{(e$VT8W5N!sAb#l9VUm6|i_E3Km0DAY)G|DPV{v$v8#X|Mv*w07}=b zZlGZ(zk5007OCf8w z>u=Y;t9`e$UOc)K6B^o7m6kXCGn=oWV;#IwfmkduqW)7- z6WGKPN4C}B)aMu}Bi99WVz??zn1zhDjeLrEx}DD0pw53KXUl<42X)vV8e_z~D{@$# zd_JnfDQbd!Qi0Xstu@+n@l$}#)-+>6dN3h}rma#}hjzkP#`?}e&OYMj$_|$3tD@F^ zDILsuSDnl`yIn>1jB`xj zt$|qb&R5^{$qdt#V=hefpPrGqrpw(N4)=wGeBVxH8yzTL-Ya`QWWYVfzhQ8wa0_3# z@A4s8?}mZqWfs|o4;}UudrJ(=7w(AsuDX1%I3R1FcKLX*U+s*EJ6-?hAIsv!K6XQE z%eK5A1)`*vt_nZwgt%7OM* zrGx{U=fkxb(VmC>1MP);UX|OIWjy`m1_Y$bb11H~WCs7|?yDKn zI%~rK-?CI(#k>pS<_+npJ-?X^KGQ8T8B~_c9yKWQ`WP_t((k&-YUH1nUSm4gr!9xw zYp%{cOHTIMZJ6{FDNNB_jpP*askw^$pcZmQem@*gbM->x%weB1`kx}{h2l9F!^`eT z45~6TSLPkvP-WR)aZiGv#uZ+6P4a;nO?X*eP7WR0hU(_Gl6w+mY9h@gza=(QmznD# zur(?Sn&mB$97t5dJ&E&bEdOqk_N}Bv(nbyWC@~MSq0052{WYilMs_Q*zxW#Rx6KRX z5!emtAx|EyQP~WuK8YcyEi+e8cZO`&A~}d0p?FwqLsd7tjJh37b54uohUz#ovUy6a zP8@x5#F!zH_Tj7l{6Mlt{&(l4LyD2*)2GRPDMpS@DqkW$IFYpdYAy#vet#Ks{mQo} zqL8hKK|mxi*+s;l>*+P>>57Va7)VVCN$_{p!{ zr*0JXm^0aB%+T^_z`s9`^C)79?EdhD6r%v-W$+U*pq}b~Larg_-^exkJN3tZ{)X*H zZwbO_y8mb~LcUEMzt4!f(u7zSy!+`rilp$!r=F*5^H*iX?yThOJ@EMr(%r6}7xSyg z+41D_1s%@Xs|2-57BT-HIomFNR?*=|yQ*DRI$^cY z`wA)#bZ|!-iKDc;DcUycKJT*fzdA}F4U&=&y(oa8!Zn4~;W53%~!%3fy4w42B zH)c7x?`?P35?q?-!oyfL;Xr0A=b9Db_7!xX53bB{!3CGUvX}0A5N?0)Y-#t*#Sc5D zOKV)D>OZ}=KYF%o%!PEe{29{WO8sYLKd_k(U5W8o8d6;5m%T$byDuhW-hQjBU�H zPkGH-eOtg!ThSF>!=zHBpM zfBZ^}#V&7__RmU4hm5+v$+@$^B{6tvoo%zM)|q{xKh32=g-3Nctn4J_r5ert3TngT z+Mj(5X|(@z7o^QISDeIzs!*HKe_E6TM*Tnko27Oj{WF`)+4m&W)mi@SN2OuaEs_Ma ze?qpZ9;9}sf4kZd4z->OA6O(-)MhVy$ir+!`orQd?w`Nm#W~bO!i(-n1gQ#9J4ukL zWOF56oS?=Ko{JYRQ{!qbr0#PlY;FhcKi!bJIZ`vElZRJdJIJB-pQHchu>YsJJH&d` zkUsrS<(aep*=O_De=Ar>y+(|O745)r)cuez3j4e3MN_*T^)h6*?cc6f^bF~G9HZgL z+4{G0QO^i-_Wz!k4b}Q@|Ll7xtnAA1L;rWbEPRRdw5X`>FI}#id<&mBY<~U9(I{f_ zKfOEkrArX22G{;YExe7|i+`>M)R*Li_@F9s8Df!eP^HoQVDyD}T!1(BO@8%)pvl`l zGi7%F*{2NqF1><*j?TA}M#^(_JcR`^0=JvdK?M!*mX)nsA8u*F3%Z zBj@17QcIoPq9&~A0j)VgZKcO`=&VdU(jP8lZzXNwDxEkdjm>Y&#%cCz+P>7daw znrPy?IcM)xB%;bV>9kT>T>fQNLq=}qcYW?t`_TyR_kQX^Ju+-(qbfdggwRjoO$L0o z+t-Q-#Zr2-<8QQm4h`Mhx;sg}61?4VaWpk!*ds~3vBDxh|MQPP;U^LZI08TQZ~ys+ zA@Tq78^7)UmSM02bQu}B1*87^zv76J*uTm!1fnD!T}Jr(7#IQxBZ-P5lPG_e5iwX) z91%+)qT&d69IA|j`8y8`k%F!ZNs@xxYW`Pl7!n4HF2j;h#~|T={ShdrJV+!GYJX%& z3~GO53=wty$XGIZTO8%@*8@XF=4Jk!2bqXNy+&jb>b1m>DL7P}DUt-#d8Q!iy!?F( z3YLO;jVO3Y)N4*55YgL`FsOVfWE?7A3I+dnzF0{~;@|mVB{Af`^TkTyaOiCbn7?zw zN|I3ZL&gI5zw^K%O%ins437ABov}zGN5_%yf9H$EP_U>nEQW;24NE|^KOBjK^y&X@ zyErnD^7sC5WHKHVMaTIhrz!OMFumA635XeZSQDr#PF$iP=5gkXu{oM|by%QzT z*CyngqOMoS{?KJOpbQU`5r8rxP(}jE$UqrzJtYBoAblTIXQXYQ%YZycKpsfjKyM4= zK?3q10eK+TUDW=_Kpx0-2ptFHfwWU}9FPYY$b$^zK?d?bu3_k70C^zSCR7}9y+@Y; zc~F2nkiLc97RUoJ3LOXJfn3MYap*iSNFV$=4kL-q1A|nU5U0^)KpuD?59FQ!y)BRj;w37M0OWzZb&8Gy@*n_t5P&=g zKpx1w9{LzS9>`m<=r|w`z;g`XIR@|?gWQLqj)B}4qsxFih(I1hAP*vt2NB2v@EjQ( zpz8v7PQ6z~)fw=ddas9$1M&bo#{iyV0M9Xi=NQ0q>isjSE`aA40MBuN z=QzM~9N;+)@EiwtjsrZ$0iNRk&yi1K(B}{E90z!g13bq8p5p+|ae(JIV0?}PJV!n) zLf0Aa9QgzZhy(fu;5iQP90z!g13bq8p5p+|ae(JIV0=#f+!WQu0nhP(=Xk(#Jm5JV z@Ei|#jt4x)1D;bqOZ~gfc))W!;5i=f91nPo2Rz3Ep5p<}@qp)ez;is{IUevF4|t9T zJjVl`;{ng{faiF?b3EWV9`GCwc#a1=#{-_@0nhP(=Xk(#Jm5JV@Ei|#jt4x)1D@jn z&+&lgc))W!;5i=f91nPo2Rz3Ep5p<}@qp)ez;is{IUevF4|t9TJjVl`;{ng{faiF? zbL5kDbUz2i=Xk(#Jm5JV@Ei|#jt4x)1D@jn&+&lgc))W!;5i=f91nPo2Rz3Ep5p<} z@qp)ez;is{IUevF4|t9TJjVl`;{ng{faiF?b3EWV9`GCwc#a1=#{-_@f$=#W@Ei|# zjt4x)1D@jn&+&lgc))W!;5i=f91nO-06ZrEo)ZAi34rGWz;goNIRWsT0C-LSJSPC2 z69CT%fae6ja{}Nw0q~sq9NFLZ8`S5e&}HcB6#?*^0C-LSJSPC269CT%fae6ja{}Nw z0q~pvcuoL3Cjg!k0M7}4=LEoW0^m6T@SOU@ChFV+o)ZAi34rGWz;goNIRWsT0C-M) z_7YV;z;goNIRWsT0C-LSJSPC269CT%fae6jbL7!jbRK}`1i*6w;5h;CoB()E06a$? zq(#>S@SOU59qQZzo)ZAi34rGWz;goNIRWsT0CO-WL6OP@h{z$Dv;jBH%d@@SF&E zP6RwB0-h5A&yfe=(e(p7Cjy=m0ndqm=R{z9P6RwB0-h5A&xwHNM8I<*;5iZSoCtVM z1Ux4Ko)ZDjk%79 z69LbOfagTOb0Xk5b&kp3$0Y%tlK{_2!1$a5cuoSw=On;$65u%r@En;qhORT+kx0MAK)=On;$65u%r@SFsA zP69k90iKfp&&h!2$b@IqIUoa`lL61kfaheub28vL8StD8cuod9Cj*|70nf>R=VV}f zP6j+D1D+!j!O`am@SF^Ij!e!)Zwq)%20SMNo|6I3$$;l%z;iO-IT`RAndA=C8OQ_h zoH}3p@4iI_JSPL5lL61kfahdjd`<>DCj*|70nf>R=VZWhGT=EG@SF^IP6j+D1D=xs z&&h!2)H&p+^9*>7EJOgrq3?Iefaheub28vLGO-*u2Kw_TGT=EG@SF^IP6j+D1D+#` zETHQGjL*q{=VZWhGT=EG@En=wk3KHoIT`Sr40uikJSPL5lL61k!1$aDc#bSS0n{1D z1Mr*-cuod9Cj*`%3m~A63wTZjJSPL5lL61k!1$aDcuod9Cj*`%i*lgr40uikJSPL5 zlL61kfaheua|+-&1@N2#cuoO4rvRQK3yz@jr2w8&0M99a=M=zm3g9^f@SFm8P60fp z0G?9-&nbZC$YLt!d;!lXfaeszb7Uz*^!@KtNDS+qHHBwOL3hY6p^`k}A86u@%|;5h~GoC1u`DS+n`z;g=VIkGqqy3WA(oC0`G0X(Mwo>KtNDS+n` zz;g=VIR)^X0(edVJf{GjQvlB?faesza|+-&1@N2#cuoO4rvRQ)0MC&{gwU@WFg~XM zo>KtNDS+q5f>7vV0G?9-&nbZC6u@%|;5h~GoC0`G0X(Mwo+FDZq4NMdrvRQ)0M99a z=M=zm3fgmIIT_UZ?MNBQ52Orr-;R`_@<7T^ejsHi&yg}zKS#G|ZpgqS(qCH2-fOduU9Ek(k720zo4ro_s&yhHwU7G|Zpgl*H&I0m4dyd2bd7wQ<;($ERo+EKU z9%#>zI3N$S=NL(}=SUgQKhT~daX|k-dyd2b{R8be5(o4TwC6}1&_B?gBg>Wnd7wQ< z;($ERo+EKU9%#>zI3N$S=SUnn4+O`*?=3J0K0q0|T_G1hAP(KGkj@Ikq1zSGh=4eB zyFwB{#{r&W0M9Xi=NQ0q4B$DkC@yeZAP>NE4B$Bi@SM7C?O$Usq{_hS0eFr9JjVc@ zQ`hN59|On(@EikpjsZN!0G?w2&oO}K7{GH3+H>mCqJQNYTb$;Q9mqG9BC5Ed=-i24I88xQqSDqHAgWNfz=81{gFLEEpIVDVVz3ip?%K7?=Z64GskapwP#HD*h<- zCc4LHy?z7DY1U&hAPXVbO)a7E)nmNbJ}gX`hOwY+&EG|BMfpVtt(j5Cwvbg|1? z&>K1!P?VnQSn?gO^=lIu;D<^6w{7EsG&Wjddbx)I2LF!PkcJ9AaLm0`KTe4UVKVZN ztt1T2COHZUjb_T+(`-F5ct#~hj(yuF@7sa)Nnu>=q8ha2Fte znfZkgAqk_-DE?ARFAm=;Di6jlG)=xK`Ee@frV*)_A|DRTzWi?S1l;cyf6I|HVm-RS{?zC%^>t{BfMEm-XA zJ{?v|3n{uuiM^r#eto_j5Z$V#Y4e-j`B6KhbPU zPs{ER?uR{TT)8OX)%8@SSwmrH6E7^g)t@x0 z4BdPZ`w6757u1_EU?v5U70)zjRZ=G?ZG>#?One?k*BEN>^iWS>2w%Ji+LP$$!HHyx zLt`#~sqH60_BDgm)E;8GCk3KWjOBB-nCihbAYzD1R%pAI_DV2);*_)XXfz(xwsCbZ z{XjlJTcKLzfimw0V?jlosqG7%ND6}irQ(VJW1RR6h5=BUk?CW>@Wp-$O?afTb5%9n z0IA^|MUJDuj@aQZ?uAe^WEwbVaQ~{N4Xhbjr#Q@VcCx3cOSVr$`2otMl~!O+@c&Aa zTq1kZ>7j5by#vXe&}5lIVRL25)zO*JE<7gMzK3t-i>4gKER7!EYd9VW&GCrAnAWVC zJICK&SO%o;8;{r+il;=UuMLsR8KjNqrSwniBS~Y~;PZXPMynTo;m))qq>j7eo8t&JV38({3di%U&zmj~*DB7<$-He8eVXr|ObsYnQ z(R1z$bOXHU@4Q;i_SMT(iY6~m|FhN%O-{2CiKDQLz=jMYD{Am1)Rxi6JJs4tq{|vU zCcGE+q!HY_iDJk0(bj7VJ zEeJdIxnda2%E2{~NTw!)!hG(wZUmt|<4M(8PYPGnhfZ7+<046>=%)g8I5>Zo8V;Yl zPgDauu*%?yWUYs>M0dg7e}~VWxu(+J(t)~SOxS#LX*pUxO0~X%aGu0Y4tin8Xf|P`wcMguXp?{@z?X*`kP5pyh3t>`erjDDq(z z#;Kfd{CV$--s2~S^vZj;r;gk zf2@;)X95SxlV%hGSujE_<$Yjgy=ik4OrYCXsoGdr?FGNXz*q(~9`h=AdgDuQNl8zYqn=#Dq7~_dl(XE89WdCQr5pxfa(RC!hQB($3%(Lt{Uk)_0$m(g8 zO4rA3Vpw(??0D;HVv@{h=e-Pch9N$&Yp;W_%tV*AG?iYitL%iRfW0Sx7+HvYTD^ZQ7h&#w*Fv>)=) zH3}Ci@XJ!d&&kWf*~apZVBnDGIOXo{8R;_c`RA(;X_lLBIjbzL3wk!U<^wjFXu&*_ zJNz=LBo**&5S=*Bku9G<_EpWix-*7!t+I<{zW!AiZs%&5hYK39Y(;j*3z(-QMV~Y> z_&xe~KvW!qE2%GOfsxmh>@5Wfgw$&5SArq8od>(+kc;LPBbYia#b1mE_5KYDGD!Ib z^!tw12od2c=c|S6Zrk25-dD%PtxR8Sg`Wj5$R&w}*QeX=*(pMR+h1`wF=oxTMt3?4 z3xQB5J4Kj%7`;u^*ci*!)Yc~F`5YWl!ODoNC~_+Sp=d5{jGk+1jE9RWn9Q%E$xVN% zGkFQRP;dr8_L@`t_dh=HN_S+b&DWRbW&&oop(SENbh=pQU-}`x3*$~3gBb@K;$$sT zP{hFbyuiifvw=kb3jUxmQ?h=>l%GocWJ&wVa(`i1zV2X+eW&653ZKtF&YKR~1H=xn zp~^@GNns6oe)|d`^4}=m*N6lTN>7ge^D6W;!ZBjkr}gX{$)N@@XgEwMg@Pi}-dlFq z*pcw&)x3^5=b9kH4#$&kA?4_?=_F4lM41C&wpb-&V4E#{pJ&v3uQsCe3p4YNAJ;I# zYfB_yOKjY%>Ny7cs$6jeaQ-7U}QXr@|=W#uEVZW?ZGR=D>1A9f#zk3-L=9?)Jb>B z`8Omr14GSAehxfHojeA4)q;=$eK&4Ao=>Cfhs)mvwFahCRjpdJ6r%}sFselqAhyJ> z@s7`AI}V@M9LPzMLJAVf%T^`S=P5g(9mPWMmwXilogiX9MoQ_9>Psl)BX^xCzu6Zc zR`K%|c(Zd#`Qi(u89I`_WQ8>6;^gL)aToHjEUSZAmh)r})r7q9e41xaf_lhS-BvA`4M6k)mzQG`&sUSg6Ntc_Eq9 zw|CsDNjUq1DzU2Oa!&c_i8pIq!7m0_nGzEGAfV3|Tr4XL+Qb=~{e~$NHn%*4`e*xZ zz1y>|8};Rx6>6-|`vhO#deR>Sgvd|1&mFSMaX==S=(ASSXAS>@V|RV?%6f3+&k7_< zz*{Z{yObzh}jQa*ECQ+H!fYnP<|E?;Go!)?s~ehVeZH-`22~DVH|BH>l46tLf+~^KO6xw2Uu-O4h+~ zAcjeC%veEl5vn=<(Sr&4fr6Wwy%+kP-Lc%(!whcOK6lf%YhX`$@zK4i8?&kMlFu`t z6>AwBO@j~X?cFc^Si_AQrjkeC6R%yrsjst1dEIOJ3^I39-hT+@ux4<&b6mZd5mNSk4&Dh*KFkAA3ecrg**p8 z{%{F7u`)Z-LtNG6%7sO}IYairzpR2>;kLh?B2a!4&TE>eq^1BV`6rFmZ1KjeMU#|j zq&(^iKRD+0AqHyX^d~eJ(c6qYW$As73$uDPtoM-^3ztS<`8rI{z5w5K0>7Fr$lpcV zVYx3$4X_F%>G#6kzkd7?s+zb(Xs~G+TEyIpb@a6(Xp8alE&qm{nsEZs7Zv;v->G)O zeGSO!xA(yIiID`B+rnHVe6Kv)Q-5@y8aa1Dn zK6Qu5a%Zx8Bs?T=~b8MU&!8pB&*eft-)XnHX zAARP6 zOCJh}(-Q#RAFV`$UA*f4s3%-ZvnR(B@8b76$;-;#NkZ2{5^1K;$6eRMy@e_66bpen zGWE$Og_cv&ai#OjGK^_!{uj!#FVW5Vmx@{9TDogZoC?#D6ro*@RZP~c1(-T*4^l&4 z12M&j_%_nv#6NKfu*4X3X)W3c#wjtNA9p`}*;4^&qli9*CZuQvCUei@BKX_LvGZ}P zDFVlIhpL@LbTXn8j+RfUIyiEt`D<1)h5At*GHEM) zjLW=*ahKP4bWv_heQEUv&-zR+=Ghcc^Y`r$pGY4DGZ>>0GPA7kuA#lCVC;^(vjCdr z*hqj#jJfLIC__l^3F@lTiF-)``o%5@c?9*+d!$H0+jY>+G-)IEvT*c#`f-k%UuHXK zr_7Vh^CIw=HjT|>`0qs2hvP6VAo$AskM`nc93vU0qG_YSDj+hR9L5gI;{57 zsyY0kRVe?4nD&cTG7@E~i!tiwyTPaC@nGB+)RL;s)SaQ4Wm+paiehbs9J&wC6{!MJ zEW-UlGv0+@XNh9yW%=ks%yCtHNovb#HIk-*X%#w%;mP5dWNU-P0z)#O#Z!$-I*La9Ezu6A4*W}f$gzVoP^w7T8;7JJCZ z%!|1N21p%h2j?AhecLwCLYvK5Bti1%$mvf18?^4*W8d)^2Ms}TRJsqZ-N6ME^;EkC z+ES4jgj*SYF$f?jhXADo`h!iR+S9NiIbSKF%~jWRkVoCsW$&j*+vp` zgPTWwX>^B<1iq@R$~e8wkU**q7=AzFcbbL2lE;huF+9l~rp=|1pV%PE15P?#2iGqG zVt0MY>J*eezy*h#GsAig^7oE;80e*?KG5b#Vd456bfq}0J1|}qvTFf~478|jKGEtn z0P_LYv~x~IWeZ`JH2Q1R06r8u&`Xcdiyc@dOMx~KW6TgQHtzzhQsWVLj7TB2<77RK zau9Du1j<%S5l?%+H--Df>}2hsLg?%!Q;X5!oWNiqJ;pD{pUr>^#kzEkXKJ-_Wh7(8 z`+N48X~anoK0NPt0}p_Tuw+YuY)bNTxseB{w8-om6M{@qqiv`dHUIe~DVodveF*(u zT`dkPw;wk%k&;PAT5uGM5EtXf^0fJ4((RI_Y2@&{GU${qSoY@r9k8CuMrIzP))^scj}VGBrcMaMiDYZBv8!6dGz)Jy$f{bq2VF-9wPVb%bdoH{$>#-tCg^Zba1 zGwUd4P3fZ?}%gF8b}K{NcA-?bmx-sRco) zYTp=PLdG}v8X$;oUOEz~Id8#oPQkeJ$zx}}2~(!tMqWfz&_ z8q@YNUI_k3GgSj>3s4^6UqM#Pj25f1d8WZ4Ja!#IYnt9G!++B<<R>%gWRM}kNNkcL9WIQkQ`lBl57n$v&y*<+4# zvF5e)#}e}z*W`6n*qhrNG16Z zpBq&O62gb`u@8>~rsN3;wo3qv4beA>)sIerZ4(Qz_JY^_@qHch*8R$=2ndWJ@pH z{))1-Xf);lf~*}KD>CWW#NInnJnT08U&Ev}@YFvt9AF^9z)?*Jp6qiC(NS^-ra1}E z>n<{$_ZGF2SWQy;trxJkn<8s+V$lYV&dXIPPj&Ih$m5&8e-<>7%gOmD*LB3oZe{SL<>uCW(%Et1%V_ zjgDxa3^FgBWwxB@uOhEp$$Ea+_x$*)Lz}?l#XZrMJv*Npkv|5X8@Lt!6Cr*>gWQ7M zZJsXva*gZ8G^6Cja=mio?3K8%Pm2kAA9bj1$8N?m(mBXs27c{9kjniGVdk-DW?Wx* zrfR+BaF_LtmOrGEmLEFDDr3gYV}6ar4)VPGyFjocPVy-M#D zn-vn87f|-wS}@?lQu!}s@&gUFWh}&Q7N^Y2@s^G^MxcnB^IHB}6=gm-!V%bG)sEA5 zTn}CRZcXy{$wt@Axt|eCG)z0Om?G15L$gq%9C}}@ZNN@(3w`iOf=UetXN}ErI(|MX zk$-t;jqP==SNZ;}v^lcb6rxwZ^fTX3 zB|pO1T!rFOjZ(;974d1=rtJr1BztRaQWq$gO*zS4$ zyG5eD!F6l;Wrs|pw(@h!2r@4{8_*HzKa2-0Nn!{y-)z*DY86t!dO+zOr;lvyp6GlB ziQE*8--?q;L*#XY94bp_hWP?_zZ%ND(27l8u<;jViU9DsC6^DaU8oOiaP=kHMtN5S z3){O>VHJA(4jCvm+mSn`KiP@A{Cq+V;cQ3=$JiHlN_!ed;n9;5=_vj^#-48xxPCCi zwd4g|W?>WNC|`FKY^TLXRVA?Oz($sX zO`&e4EkhprL28EhwZWjTlos6Mixwni4e8p|hi$N|bl%AwPiI!}6; z;97fvI1dbJNaQ9jI}@OLU)g)Z-j^htd`pw~aw9PRnChv7H6EMx4<&~(qxd_tm};?D z$hl>E)-t6=m6IT%YYHI+(MH-Xxl>gZ+3XJ&TAAr{Hc4};giW7xwzA(baXr{2^nu1S zX~n`g4ona26Rm?#bA@yjEpjtZ$W|vA&mSRUf@f?>CGZEzxd0QZO#8Kv3PuLI`k!nz z750Q;xJK}7qWCa$P5fW8M<60*r(+=K?DUtne!-MJGyRCOUC#>)xoMMvz_a-b80CjIVw8WE&bqp*kKM5Qu64k0yFL* zCUM+L$)GKUJ_Cxe#3xp0J0B_A!+<}aDH`0LSu;E23b{IH#dg=XqRX+gVg@@fAg%&f2#v{7(T|Tu9)4sMRsxPdpR6ayszZ03 zyTHq9VlV0QP!QQ-YZ~}&ubtVq%iiNfifi-RvdErwRY2L*aBuLH&RFS&ui`>PRqdv2 z?~zB`kRdSBPB%;1@6Y>v0M@URyjs=$7s9Qn#+okdHtfdJtIq%ZH~jNk|K?`<(^mV( z&EkP3^`j4QnZ3hvC3O?G&QBT~IDTfkZC&_W8K*-V-J@4jIc~YKp`N8PLt5p5hL-bI z*+MShF^}u9v0w}Bt3}@T40Z5Tt>TK#if8%PA5N^GmkE zs=OXbcxLLWw2mV6=)y#e+XN%wi$mHWQY;YA*nwzl?}bT!C$cv7ys|!AFx4BOeQ{8f z;1S&z$pHc#V3KmJX=wX>k8chY^V_>8MING))1_nc>qp2IwZXSdZJ5w`7$+VV==byj zT1LG$79!1St0}oM4up=gquxo(hIJ0W%>8*kP<#$mB{(0AmIlYmMQ&13e~s(noQ4AT z5-V4u4=|@lO}XOlm25I+npbJz04OgYz&z1$70CyM6&buk;@y6~Zb;@(^P;O7L6nof zcZdG?50TBo5Z#3&yq6ONbu#`9IQdRi$$An&IIW!j2Z&Orhe2fy*E#?j*^~Gg&JO9? z9i0#%T}hu$sK>qx*}BfO7_NY9J_&#h&oHd-knw_8BB!*5&=*){a)njk2+zn#<;8s1 ze5ggGula4~y}~cwYTWcyJziN2S3FsH9%Eu-MPhOe*M$(eT>XARqR8)so1 zpbV?bABk$@!C@F=YbCua?UUreurp2VPAr9Nx-2 zZv7V@^k+EFKR?TxFv3{!S*#iJ;=FO^u0n?|ixKzB7?M|y zNs$KQp5gFqMVEsL9&08Mt^wiEI{9Uk69$BB_1lboJ9`# zI2y1)_Jau&e39~g4+OHyx$i=XMbS89?W`;tgmF%l2Nb?YamHIZTGbPp#$%>Y7j4)x zj2*k7y@Cw1uY>nXRHv2xo;-<#%a78?k&D}s{yTgk@ z`YKPkQaLYZ_kM~=EH4C#LD0cHcYZ$Q2AsU)9TB~ju(UOU!N6IVJJ$2&?#7 zj*4b%G{PV6v!lb4>jqcXg${|*n#vMDnD2M-;0n(Pu%($bD4h0d+xZWt+}y~@F>PIP z`<^NP$$&B%(rhA#LogaTPV5}N;7|eV0ao&8sXfm!t^)7L^G6a$6o8P%ohwe#+wYGqfoPj0mf`kx!N4Jm6+C$i?+3(m$1wX z;LMq(#AY0c;xN&WOGE`7V zsa`}f0`1PvW)MtPYdXamysLfDdmM#4F)qQcZBhVta|Xe(P=OW1Kwi%s&# z6H@;<>`kz$PerjF=-uwqdXgo~KKrKhwk|JZD&1d@IaWkqU)}pp%oU3czmPT{&8ruH z*Bqw&B2o1c|4BsuJNOlfG%}q^OVY`KLTx&93wRwQ{=q;1qdhsVx*Tcf-m~Je0t!5e zfJjeAAsXONPR4DOVmsE4RmyvgOf8rnkMey!8ah^dvOpE{K#AuO!LFzIFj;(b5j3uU zYAEy({fl#-^?CnyKybC#mzYY-CgdEfIK5V}sHn|9Y%n?dVpJ#?W!L@-C3r#bb&V(@ zCSbQ(+b2vA{YfArQ>|et3Eu3!?;=c`2N03Y}uvmDEuo2SG_ALa$K1W^E`B z-Pa>|4v%}g+S*xTt)T3#n2u|V+A1)TiL%n)im8!_yir|wOGK^TTktu8ATI>bfi`E$ zFyf^6CjGT-Mg$V>B{?T1e6i_4JvJK z6k$%mFzW=SpQXE}NTD;*E;Q%^DjI7d6(3Kwk#**9&(z4v1b*p0D4e=< zjQkRpMy_wIQZN!wa`GtrRu9%45OS+`PYb!Lbbv%kSp?Prjcd^svc~0P`YSgRq9CNY zdN#k+uywb1IS>~2FfV~%YNU7X;2tNnn+Gl`v76_ouQy+J^THJ?7f+|LIqUbeHkLXP zRPB**h9;MPrT-1LwG^lsc>v0hfe>se!=G{2#8j9dfE!yQg&{0)x5v3_Qh3)Sr91Yl zVN8chl!xDE+)Z@NTmB*%<8%;y{&>{|+ zAeP#(oIZVd7+bBf{5Bt9)tfX-fRQa*$WD9WVL9!!QS()Kh5$pH8c4fb{fZ}9Cnvh= zDGqJ@ux5^=%ZxufBQjNkrtzB5c50gYt8Z?~G+zG4Fi{g)3fCOz%^nw|)4z$#{-T&2 z-q1J0N#vlW4AWbbf^a^=5$jyKY4}Q7d;PcTx8}gei%0H=h`DCf($JU_A$rBA_&xlb ziwAQU1WZ_x{ohTcLO|@{8sgA%1p3wE+~z(aHsX;(=G%#GmGMkuK&WFHjknscu^<@D zC6VXuqe?Raq=C_Ot!5yAYF|X;7j?^-MyA;_wj9IGj=}Q~qj}qOZ}8T949-hzF3XT$F@VkVG-v2W!Cp$89>t;I)E`E| zHSf0b8wojl&PnzTD%q>jWL72_ltiLma>X~7_iV`k?i+O$RA(#+||TJ#Nsw$ z?7GYvp@lIgAF6mf(VfJjc!Z@VR_cCVc*GPH*S#UFCDbff36_1@klwfd(SzYo?vXs_ zuWrF*=sFTG>s~L6sSb@SzG$CXN5ap$0%&Pk1JA7RH<8r^sWBpyzFAMY^l6SmG#L6=D zSYho(`?^<7rz*v6J^m9&YT*@cCl5tit||Ymg>*#MvkXao_xb4TXytL|-5XpIkp7wi zL{{Suird&Eh#HIxdkI8q9-)bdPPEz27u&|ujYKEu$yOk=h^G?$!ukgP`k`uf4q}Vn z3=GEK?jd9)i73%#2FkciXMelNM&8C|*i+Nnp*0S3zbd%d1Tv=0Gi zfV3YbH#5EaPEmnepJ;v#%EoVEYT_mRpN@C1stVf(KfNR6uNF>OO@FUh3Z}{}Y z9m*Y&m@Axpvr!YuoTfrNV9qvU^G71eK$+h~P2R6gLy|s1244qIF6XvqhVnmwyArll zQ2F(YLuUV@d7fwwkV;Y)HG+Af{%j3?gE!L@R(Dv^K`-JRf#*8|nKaa?67W>6S)vFd zH$|F8cZS33><2B6HORj8x)BzK5bZ=ClLTG0t@ zZ#{k8+=sP%Dhx&iMn$6k_Q~HdmYjeh0}abVZNA!~VuT*4)%Zu9XgL^*2~k;WljoX= z#mh3x7H?VQBAP*0rJHjM;&>twDdu=rSI#s$J95VeG3V`n)uAQPxNHWnILv9ropC1? zJhyFAP{?uZ>>H_oCVBs(2eL4KlBF#@tB7 z3!Z6NFpsdR3=8Hyi<>g^^CO75!su!0DF%{oW?4Y1FJ!UC$F|LLpZG^E zLvLC#nqO{hTBY;^p4YsXAJ{xP@(}u2khDka{*AM4D6;YHxqjr&#h*sg!P^+Q++B%ldZE5xkc z8F{x;W;Rn2-Aj|)RW_wPsrNUgiClg|@E%v23{7egD+C05*wCDdIaV#8wro8l}H@Q6l4SHg{2IanwJT+|klN~?LcZUC*CI0hT9f9br2@=x$ z&spI=uUFOLe-oZ<&_>DLf!WU?gs_pI4WJyPiw>XL{W6o?=C58$ua3q5m~-k3I4rSd zz%Vk+MCb(H9~74hS%K2-?oxkvgWh@PWrlL zkI?DBmN#T7#whM(X1*33r?e0oq2u~g$$wMS-*2GW+8=>6!<2&4uczz}6o@aKx-=jZvVmF5g34{c1Jd}!Dy)uV0gC(qE?jK7Bas3fh0 zZ{Z_ll?aVMw$kTDNAGxdLfDvxE?2TpohYJ^h8Uhe`f0vOVk&CS0bYNj>5=2s}JG)B4% z=u6T^)>Ql{;-c6r#d;cDJ+3@z9>TA<7(kwVzAs*lwqph~o{#dKTqHm5V(?h7HGSgE zbW#0_6Z+}Fk8~Go_1Ag4lacRTlQf>5aF+|Vi-&&b-kYy_&b?HXXFJUN z>GV_HG)>v~Kf0bHXT*8cMYN{J-6%=rM>~i}@a*7=lASQ_lK1}~=m7p-(y@prx-{Qq zC@8PCYfEt7bxCh&j8A3p#J{lP_W$7<;zF1Imv2P(V(8m1JRC3M2QzLSs5R-=*=ZlO zkLl#u*XLF(XfSS=+h~K$sofzjB0dom=T@CIGCmRW6f^V{hI31b=n!zNOaq!rgdDgB z6Ns`%zT9t|h%Lv0U9HyI?d}H>`2Cn|2LzW>gfsTrka@vXoRReQ@pkPT;46pNktDBg z2l}zX=)xX-N%Y!jMqA!02Gia?k-mNi+8mOxDJyiJZFkGIXTfYu#N6u zdc3VIXfqS?k6CWm2SIO8BLHcPjIxkVGcE9>^YJ4dxa!?xiQuaM;q}4h3Ve)_ayNhW z9(p(rJ15Cx*vd?PfnYxjYFKemy$Qy$K&I&^d&(>j#6CDk;ShxE(n7NG>7@3Jv4CQp z8N`;m;o8rzRqR}`hQSUkuj)98(#{|r#zadDp_kFOA2}Q+`YGTO14M>+{<6a06msr7 z4Ru;dO^!D;qtM1JQNcaF>fni0xsYz5-p@+%_9lGo))hPyDK58k#ln`cQacc$jUFu{EsRf+5bR^Tec}IM@7W^&fhvqp(rn~4 zDX0|p>$e(gviPi}5rA0U9GoO;rjX=R(vGLXMA)cDR$-Mj1QLcAV2m(z)q&v760P+V z(4Yzrpy-g#4#YHFV%vNTPN09>eJm7`eW@$>&lJ6BC*7)orzQSc%X4jxlGDYAFV~pp zG0cUUjdKpCzfP8U;TNtMWXZr@8S(<>r@Ky06tph5^bP=qH9v^9xG#3j8`gCdnArdfId@P)T^qlIEO;EZ#-+n?KAX8#7bS z52gY_uN4EqnX`YMVCMNj*{)=v7CDU8>Qg}2H01vmg{s9_UTWl0{H8``<%@sDao*4zCE?JFUMI>`!ZPKU;UE-luE8dpIv}rtI z*#p;qk>!8lm5oT9?R-zOUj0D)3>L24{}|7%87%c3i-TNa(`NDB^er3xK`M-1@ybEW zx5z|nG{HqEoEH`8B$+%lw88`}Wwb4Tq&trCp4sAQR4PR6rLsUSk?p)}=)moUQE-+A` z49isptC22Iy0pBT9By{5_II2L`da%ubK8dO;?k0+!Pa*1 z)}yFbvE@Z1bE?h7+P_3H|EhC-C3Uc^*x5uP#pj>&TCt4WXg&}K8E_GL`??XK{kn-T zA?|HFB*+CF;(4}eci3FfihY-*w|kS(ii!RB!Uq3-Z;|}y?qTJ1tom>xt9m;*m_IKm z6fn~~=R{)@*+BR(8Ky(pvQma+6>F=1<1Uh)-GO$vWWENiPqfxpLX6+K^{?HevTf*4 zpW~*Xim4S_v*8i&=EU*8O1Q@k>_Bs47j~V2G= ziQaMg!~ll5+N%vdEd^rtHNtnIkV@z+YtBkUyUj8xvls{<^92y_EI%`Z=k< zJcqAA_Paa;jc)O@Lcn(#s#Nmq_7cxe?d!K$ zAsk_LWuc^1ce_6e$R36pToSbkzukvBu5a|66F+ZrmeL#$`-x%V_9VIII1VOSM1{4p zE1`Y#bY8#qd2qyhz!}M&hFQ7k_<6jIF;)PEFNJTsJ!qwWUsD3EFhBNF`ArwB6F7+8 zktJaBb=@d3Om9@UjQ$C*4evrfv(Hp?cELn``brI2abJ}{;2-9Cwuy^lEa2G1beGz* zoc*qa=Yy;Ywa6YKM@gnueq)req|teW=$J=(oA|K!yQI8|5Axns_V-mD!tTh$rWOS9 zAv*K43%Rbtu^tqdF3nT1$NQJ+lk%u&-StNqu@cXDleRT&nQe!(%m3=cLFo0p!z%;- zfJ%|9j#=ejx_V9F77Xi4{{3VvsyVjc*3#3)149IeIqgc5)=Fh{P1gzawG<^y^*JX- z#1Lmgw?A^Y`sxPy^$QwJnzj_SWf*|>=S`ay6A>LmDx8))tX8FGL7EWON^H&PPA09g zbeo!Vo@+j;%B*ZrP{x>8#?0b%6Vp7R&VZ-LxhkM^t&-{%LpjtL$Vz{Tls;V3scCCu z#%F`fo*xks!grjl=T$Ovwt2tkj&1su%Vugzjq0|OjhSAPyAMj`aqDr5cmn=!mdaSO zq%xUgu!MiYWVxawSjl}9ve=Z--O7e}W^5%yryevq8fb561-xpdq69N(HtZbGV@^ni z8QZ33(TZQ1H`L4B(2e?qZSqlhmEG@Z95ige!XDNd0(@WR6qpGWyG8$&2}P4K^TJp0?U9HM*# z&>W<=h@M|t+Pl;VzszndtOk5GqT<0v29(n_;FBFs0#%ZyHK0EG!>glR<%Yu7QMAh3 zWPz9Sw#&}4@^cxV)PF_(FKi+@{)N6ON%m*nB?gZ#?FA*6+`lnJ(71sL-L7WOmE@%8 zC;V#Gh^Az4Y#CLvmY*qbuR`86?vQ$YzmDeDS#RANjNeqJcCxu=AYGa}RY=|$D$w8t zN>NQ604|T%O-HN-jB7=CXE;fbYsFAHQ(ZU454fPL+;o-#$BBKs!FINqygLA}vIKs#5Gj&1z~$ibMl`BJfN$8*||dqrb5D z{m}9V{n$mYiI8ci_u0Zf#95jHGL6qz@Q3}O%9l-vD~;Ex|Zl=v1);@;8byy-lk-=W|4)Rm$Kj_e_G@JOun zbkX?>GGeH}+Joe%5Ir!}Ad{LaS&Q_$0Z|0*BsAD|v2xX?g^=%@Ek7~~4c}2IX-UCf zL3vT>81hNWio`oVWWSBOB~?e4A{EdKe)RI3uUozYL^1h=@nMEc$*%GYVVM|EcOf((Nl!Z!pL5r!(DYc5;pJNka@)QLg2>fvef_Otx&of?KQ@3dzdlER26e){je2XM38JWqMf=H4N)f$qLiXrkrY$l4!LmYLp#1N73Q)3y$xw-IsZk zXAdT0UKvW}eT)_6{VnNm**Y-v3K*+B0wm7k1!!BiVm{(S8$ zb_mvLpYm{YZ@n5YCm}|_*vGXRLwW^at92EJ4LLx%jj`-rhsD~54Vx2S)L^H+ z9~{iEF^|-Nm8R}0*`W7Rt$MZCp%1-*npZ`bfkio|9UbQy^aWitQYm)h5%XAJB9yL{I4sSH=)e>u&4JJK3LBK8(Blk- zrvrLf&=AlguCbPn(J9f*fBFCl@lMlT=7X2;r91c{(m!{F=-5p>@-Y$GO^Q?X%iyPL zTwTH?om}Rh?mYB<{9N%J9wv4a;c6vJrUY!Wzeu^KwE?6p1jcAZ6nTvd9vNQe4z3?- zly1DIh^ACl@0jg=T4Zs%C=;}-Qq#uZ5cIn!(Pw~7d2eOGu3KTCUff%UM#JTc1TAer z5pVl8l$A`bWq@Z!e3!~_m~plmRY;U?3yG8P!J(2gJnh2JgqDQNMv|vG>9;TK#}ytJ zg>X+Pa^2_lj5qpSh3*?ka_h^w*e6u)p5iERo9Na(w}@Lzgeuhm15jUaa}`L`M$!&l za=Cz3?pER=_|w~5Fh_Ktsi_IoNj?@EY504tNq(vd4sZ{0F&QJ!2zz1NA6bMY*r-aPOwquzP1 zlFSy{f3A+1`Mvm4N?RP!r9>#aQ7driwU;fPeC3T0>{O-r7fR#C$ z;AElZaBx3?l9-E=T%>C>hjY#U&)&5*x2+@Df4_oJOcndDOG$mAq1pN zfAd9kIJkzF*p-gJ>9T9Zr#vQArtS~MWffBZjmE88!=41w3MQJ-OhnTCk!kt5qvBjX zuER@+>#((Wq5;Q9wap5Pn)vM!pCrVGsg)F>IS3UyU$tMW`+?E z%bA!%@J$XqT5a3ZTPcYF!IDP3^!1rEf4ztS>aZeACTptUnl5i1sT#=ovC?qfXRGk%)lDBDIHQw$;ehya2X4nFNzb#ww}Q}X#3{K@O)e`8lZRk zUlPa2(g&DN!LhjGDmj7zJ+aess{$Fr5o+!q#qSLZ2l!-cf(e+|Y~I#pdeRg5e=1JT z-P&A|dQ7jOed2Qnrs2%oP#7^JrywzWC5jf(NxtnX!?#d_XWLigwJN>dil9;4NwkPd z9^1NTJI8i-bf22t>*i&{C&3jm>S`|rSICS3n(cN!Eief1)bYhvS3kG{g$O#b)RVz2 zO$E9$r-0hmIvtts@B50T_E58&f6W~@64?G;>&aTXAMNi|ggi++Yq!+!3J&^r4g#ML z-}M0U=uZm~GLl9C0$19O(T3_b^^HrcZnL2eJ?Q! zD5^{)%=$GX^tpR5OQY}v|EO3VKD9avmptHmzj&SIE&E%#Q9>0s^7R>2e-CVOf&mB# zuhZDNB_d!9k!r!<%4T@zB!*Ew%~F z;_Dl}9r{fv+~H#*Fr_BgDKMp8x=>&WI6eoBC_qHmDMzr!1f~c~S`<@Dx))xnFvj{} z!aYzaf=gP0nYwm?(eYqreaLxUEr#B)18S`+>jm6i@4YvC7WHIr3ajy3S9{C^(s4v* z{`qc)m9?zpIm8|Bf2Z)z*Tgq^MaOg9Y1k`8x@@535rVS*GT0PIGmU0T>DAk(bw)FB zs#>powy;ktY@hz>1jP8qBLeXQXH#SqChFQ?6YM%f2ua;Sf&|#@8|>bWa}}9kn?h_Q zjaLy8SpCpP;c}>#kzQ4oLu_N_g+_n!3}h`L!+xp@|LTF`f1m5@kq}@fzz$_4kA9Mr zvG$@w_7wCT)_<8~wU1jo4Y_6dW)@sM+K5*j$Z(P|v3B8mA<&BgXV`gEaiS z0E}bImQ50o^4fe@>Fz6Nq|Pc(IOXf50)?My-+n5@ne8#yFJH?b)&pO2`3aegE}0p# z2hf2`O*f;=e-b=f5t0|bT*Qv#Wq}IEd<}DT1>o%)@ZOALe(*M8N`zZpxaFyR5r@g9 zE8>PRw2oztcoqc6q>jeCAkci@XkMcEdPCz$veZYyXw5T4F)AP*E8@CeGFt2GjSvp` zdU+$RnBG}bEg2nRt-yEsCuF5U&GmZK3sc3_bi`XRXd6QIU zpO-*Rft=eR=bQ2DQ$7>n+!v;>FoiFvrIAWG7tZ}d(??g;>y;R$2N2QOc?)gT5 z`$Gn{#DB%?%{-d_LZoGNKHC_hts=e|{yz(9eX0zF?KPkfan zI?D3}y&dO1_;iGGUpV)Lb6+_3%dZVt9-!~n(Y4mOzeuPf{QK<0qlD6;b@p}$|9-vf z^{T>NmRjcs?&e?gnftfXf1dv9^taPY+@D9c-^B@a@?r{0I<6RvTe|(cTl0?~`;&Lo ze-?Qa(>_EFg4mmhy_wjXiM<)s%{U8%ieIni*Y3@3oryWrH%6N)w4AApy9SMK#yq%k=0xt(1iqVC)93NQnhY_Ah1W4I!a$t*sJvme|!@e z;*Ne#Zc7S+fQGn5k=vT4(QiTLqF2#)@=t~{BlKd>@Qh*5pkJZ?y!8WqYjg|!*mfw( z3~wKAm3u+kKYFHr<@FfJ69hMC_}~-qj0W)n>)NrXDx&!Q;QP6jO@HjHF zHGx(eFqt>!=w(9ZKlv|Dw^e~=d8f2#SX=FLAJ z6L24GF8rlWPfsnZb2>#p6g@kCa?gg29T;|S{^(kcKJw2MNp7D@$~j#RXIRO6*w5G; zW@FTHN8^1oIvlij1e41Q&<46BK6^jO?V}%wcIt7qCnBXDd& z<0>DmK(~WUQY6xCK;M9|e>9LXfk#|&EfMOKSa;YzKXFaNKFR8j9?(0~ANqr3kRF>! zOnYP>i&YkT(XBK5oyJLh@&Lw6xu6BmOWCKWqTyc(tpU)ds)_24M-R-}e|*rC_e%G} z)!EetRX)?Mde>(?^}T%dv2$~C{ZY|0RgwP$vb(Z&F>^+rM`S*;e{BB({~t6$jAg7b z0NI&2^c>$A2W;*^iJ3#=+;yG|&vi^pK}$N*<@vvqwkwy(= zN|LsnT`YGjQgWfb?04J!wx%$B*->%T$*4u=ZV3eniua#RfK)zVhS7;fVe$2rx)Y<7d`paO zhY;erCeVRmUmlV=?{ktQn0>fjwL>dlVF>6l5WUKn{>fBZ_W7h2w3yI$z%C1B+Mr?B zXa9NMApZXie;O4PQi1hB*9+H?bWanH4Rzh;mbh% zltBZ=R%mW596vFzud8Uy&mQ+DKIi#t8?v3)KKalS%gISGNhErAH_QA6!f)|U1ddZetr z35^nas5l~)VO-HO6sp3LBfXt=tmV#?N^RB36-}kIOu3>t?5qiQEA-bE(U4(iM9~PB zPA0;If1Lv5#;U|o7d44+(WGt%AvE~0p9mLO?n+W$K?DopJrUx9XbQGtTZjl%WLZTB z3=Ies5iV3`Y!djw*Mo>zv3P{^Ap;l6MpN!*sFz=(z;XJ)EGN3O7NogwaJgs}zJf@j z0zwM4A!H?D4w$EGDOUfbGoZ3pGi&96Jh^*be==n_I1g(q?pYvpnkxc1payo@i;LxP zgtNg);YIalNI58RSUDZTr+e5QRnT!M1X4S&EC(CPvbh9Gh$!bI(mNeo@fu}Yg6s0ayxWsMF+VT0E_7~W4GHO6C%*d`Ch zPq1%B3VHnS%(NYki!1%@iG;@de%^cDYnqxIO}N!u8a*6rq`5RO3)!4PUNfDd+jD93 z`aP+QV;FXD(r>qMC_~?z9Gqx94M!(9s-h<+GV*s-PPT#NwPLbWgG+;Ur9rYpf14r2 z1Qmd#2c}JeXV=(w-Jv-7epHxGU~OxcGXG0xdT}a*{H8B5Zj>bL@86r%mG_f53Z~7| zre)X@cHz6m&@{4C4Nz7qs99glFf@Ps$PB|eCwgwP4?AI!=CD*VMuo?Iyd>OwW*&y7 zloJR4?hOkHhs?mUVY(o-7&^!Vf3$t_WCl~hMikr-4C?(>N=QK4;_Wyxs8tafntXlz zAjNrp_{;=Xh`frEDIEC?xC3)%`!V2sV>zstbH`{qNT1ngrWVj5jl*)c#d|30pUm+$&m@f8!Q5UV%}Z zM>Tg%9s^HKJY!-5860VdvMu;%YLagJcFH`9#l8?DlKd8g(}448=}78_tw zRFY+e7x<7+H`ee8#P(?Es2V?aJyel5L6Nqh5Z8@;*p_X^W^Ly-hr%pIlsDANhBn+G zQ7AhuxkF^NTcn#%#DN)&e^QKg6Oy=H-6HK?TjrE${Wkm`S<19G=@v$zjoqEg1qLP}N_9JYkAR?Wx;TYxjJ9=57UqI^~ zZ}K0E;K=~p=Uk!e^<=J*6L4=@kG}cSc$dcQjrx+E?0Gb~C5;Fcf2)%smNRZiRW zcTUBVbk9>h+%`rQ6@2HeoxAdj7n3YhPToZN+evYXgX=D#C1bCeIYmX47NlVSE>apOH_9CfuMC1sTgN`4+0^Ohg`OV47t#FhR@Uvm0u@ca@U`xH&pWO!#U8sR`tph?ykzIRr2QTye80R{wXeTT)xNi5FW<84o4*R9 zdBKtV(llGS?K-BBB^;;q+WZTnxkg4acK!5VndOW;IUX!qj=LMft}t)cZcL%KaYn|? zK5>Lp6SGes;l7dZ?U=W}-5aiMZ49%Sh(HD?f5IhvL>HpKbYb06Z}*zb|Et`UmFKcJ zqD%O}n3y*82y+9MhDdTFcRTPEHm0@i*9dHMYF{sq)6htAs1N-(4ydF!kId8Jen??62&1qFnwbOPc`~dfS)jj=g ze`R!D$5OLe_^tu83Jb-G}mW_voKj=M5Ui)5REA*=ZbdoUR@WgB|baAYJCyYUltR28Izyf{c#>Zm9c!qZbg;XsC{ zprL4eMe5=PR<@z9_WHacHaaQFHtcBZgXPR;^ah1>V=kBNVdEs?t;xx5tt91^f5aGj z`i_)BC5d$)wc2YKYAM+U=c$mpT1R1YCor5=T2Fk2#P&>HZqXeWOPm>4bv7K(_@#gz zK-{Izix5yrQ+77uj0q+5X{F!Gx2$2=ftT8yi2#b`OsW?pjmw@E=P{aoVh>~~J*U>& zz(;_U#Bm&Dl4E-?sLcmz+)_u6Z@1ju>*ND_Ci}9 z!D412yrhO-#MqF(kMZ+)BLj8g9GBR{u;GU>4p4IF7()RA!1iY#Uass!f7)==Gsag* zYT@PCegG3VoWC0iC|(xfvP!1^gA)YKZ2byYrkZ4>3BGv-s#YePiR_9zdMNtv$JJH8 zqg?l{&aPzb|`l|Q-TD|$Rfv08dV(1x&m3QpVU<998wts;S z2aRbExEJT=jIToXTQhU$e>uK04q8KJcJ7Q(CCoT?ohQR{9n%iZ6-k!PXCNAAa5sny zK0&o3xBJZBz?mIkuO+R;&|774Y(^0F_L&;5ixdjCp zOq9Bu&?f`?)(_}~tK(tZ_RU85PF@h}(Hk^Odt{73eh`2d10^&9E8WA|9_*U1WbBLLN;f3gu3Hn;)it)e`=O%Z21J4)xav zlZl-mMQ{NZr6Vj-6p=_|o;}a`&VT&p^E8m2yg2cr@Oq$J$^*#@htbFnAFl_0`TFU? z9!N>*h9ftKLhpL;-Ae}ldGkO2$A8R!u99FR;p#BCa<2!IG@V_&f1eB|-qcMlqnQ`t zZ{sMQx+#8*AK#B+_Zvo?2Jcl_Hs4QOKOAJ&$Lm}ljmQ4b`w$K1Q!h;UAh8#?DQ2Eb z{8^HZHd|-3S?nbkhFx8o(hZ($_$3(8Ycl&9dmgRa=uyqyCh(W7w9B`_r?khGAJYyJ7JR?$s<#(jUEO zD$$$kK}=Jm`3-2b+^3(Dl-@j!vMq5(e;-KWX#nDUasx>*WSee@0kiMoT|OjTm_&g; z`s4=z`-SM(yANUs*Ip0Y;Sj{4usa3w>7Swz{~VMoXWNC3X=m8i?fheZ+=6igjl^zU zyGamsxoqdMz27Z$_+cOnqz7g{7UEw@rs;j)xzHY&m`!iK%%@W~_W$WgAKcU>+XXfbezKu^P%X7%bSnji2@YFSIQP-?!4J8*k-m6o{|o_&<=)I! z=z_cGSz?-@U;~LfBlxYEF;)$+ycB`Z4O54&LavITnHr0K22}aMvK-sw2AE=)j%hMA zyuz2M#U7f*mwOdP6iuMdOBX!%>k0?+TkIlUmHcDw#@?XfZj$Yg%x>o?21)tY+$E(y zEP9^HB_NE-7F)xM;mN|WtJ)lFWCX-QDg8e4#_kYtt$W1QqQFfC(##E`1fR&Nd?%Z- zgI877WsQD+;;()>oP2Vpen93c%-rF`jS~-HntTyq2+?F!hHyTyv?W7^i(ps=t*D%URay@%S+gp zZ%^}lyWK3Mephu{G2GLlu3#h_G)VA{e}*EH9V6H0YqJ-WDygIYRd5g#U&kFRSvA|c zY)o?2KgBp0+H|~ZoVxt(Bt;r5kQ3J_D*T=(jdc9)9XG7G$_Fq_>NL_>HXf2NLawP! zZ_OKj>K#tRic_9e1AM~?!Iq4bPMg*ZJ%|dOu%-l}-3Lv%(C_T~3tPKYE;8jXI+Gm`N8LKe`}AI4DFTRwO8X7R!Dle;Xt$HoyKWw;V16Hi1 z+Sw7Hq1&d;V<)rXFg;se5(3#4x?H~%v3Rn^e-+Y9UCNAp70boJs+*6j=1xchMLeEOi=Yq$SqEKt3SeL#a} zH~TU6kzPg5;Exb@mvA%H!cJz#UOV_0w2!CB?~}&T1ka;c3PO5p4ny{{k?g^L7QGAMGa>6=%b@ zTph1?W@6D3lu0>H($uSRNzW616xqi`UZk0SOazc;XPmXTS`^u0X_jWw?s92_5FwuA zM}|jk*a#uA)aK($BZLUEjdO*un4`Wg)gy$65F+Q|I0+$gSRq2PgyJlCt;BAszvDgC zY1LzM&EKgIt^5WLoFS@)5ChetS@l>QbCz49Zp9>65b*S4b-JvgRz=BwYR0w>)u||E zF%*PW>zVkWD_lB$j@ULb^#YX`32dAriSxC zCd(@tx)N&Vk(2Z}uYs?9D|P+gV>n{T|ENevJvT@v*=gGvKmu!j6#KWM1gufs);CH( zxkBSblmO5|L2b*c@=*d&FJoclCXBv_I=3ie(G*?D%DODeFj0OFa#&W!BBPRpe*=uQ zgwo;@s$;31wL?pR5G4(y9 zbiI>4KGUiL2nm29z}Ja_fZo4NJWqP?g6P{1Kd?W53P#QayAQmVD_XDNOvFWv6roU#~uV|O!FX>{6vbqbRLIz3=pQ9Qz+l% zGB0q*@=PzWnC^5Ic*#}caaZN+Gz3AWOPP+(lXf8Dxny51-xcKY)CD|K^}7}D8cc{R zrd0}fU_+RHhJo5WhLohJnrY;~MrR=4SBXt#=&{7Eumin0;{dsroz%&u#`Xt0hw$JzG0Ziy)v`ZEZ~=a znoc7%%TW~t?J{^P3)w+hBg=j26?}uVB!lJX0dNDrt>`|&9KR~y#)cn*;uion9K*4k z0ehxh)wVO-{6*`E?*1z9Hu`*ip6A?eQY$?lNHUY3``Fb(JyjW&Cx)mcUKU6Q^oV;Z;5h_0Ge@k_!K_H%uOoF7Pz-qd zf<75z_O~?s2!az@kWz-6{?Z7^0X48OJV$?e@`Tvq^qGQG4J~+S%)W98=|@WJmv9KT zkKaM4PGL)sg)fXN$T(CR7r1a6El5AIp)Z?DNQnbu0}5wsX`kYATbfLA(8~sYrN$jY z_(gZ-EQDV+&OG-JzG|UT)gj1hGJm#?;9IJO2JuA%Ur~@Ohkp&ugFC$#@ku(h=jpKZ{Zys#d@=;xe(q8` z0T7M{q|;V`sgf=@MI!Zs7Io);N4WVp7Exan-Rg)BJ;c&{ZbR=70q^kfdfB91Q>FiJl)oq1pA2eVr&_J=-bs8v0qYDk3 zq@{R5xd*w=qdsGB(J6AuCZRb1)Zi ziu$z54+ehO|H%a`*@C3_xZ)4BB4&3R( zs58uOvqb-(J8*hSf7KPt>ZsQH@@Fr6Oed$e?EibqDIb)|FaOJ*MV$8! zgM6xhc&r`~xVwwq`6BW+jMkg3JYAF@*8onKv53L&?>?H&=D^&4CwzCHIU-&F_AEWc9M#eb zp*~IT1HeG#?ntnI$q3`|RE7F!tI`kNY{7n66^`WszrnBjRiEW_SjNNp0=-Zx6;0Ar zG8JRAsixb}DDNhJGUV+u8e+Sg`ZCqoSjpO5j&xa@Q$CmxSOK>wZ>XH}_ zxaJVmAod2KpMZVxP72?F3(*lMx&Gt^iFc~jcm+0Ib_^|xcfGU&1?^6;w<$?$n@&go$(lYE3?v3mq7Ure< zhJ!W%JN*Uehh6#-FoJRRWtoshCR4D~ zm!kNXcn6D;u@Jp%kbKz@$p(3AwLlgtm46k1EL~OrL~oi|GTzouHm0T_PQ|lLMIZ~g zkOFl+h?is2yuLuz8YmnmtrgaIK4Bg8;g9!a9dZYMW51OcJLZT!9E|-gh+v^USm^ZW zL@*Amz^@2h5yzBn1%8R8$@O4QXM_7CoY(A+?^mn}b~y;U^ku3D#;FFf3Lv98zEN@V z28sDPatrn=Q!-PHUiKfzLWWQ+kj0YHUPT}a30a0|ScT6&?7_RL|NP$D;Hv zGb?9*H3cFwv#%Zkk$Kg_Nqb-FGWYDcHCq8~X0~+iMs;70eXLeOU(cU!doxFAm-a%Y z6!+|g^|fNhN-_I&L(hj!_Pb7kI`-vt2r1G>6tpiX+M_(_#TiqP0$6j#7~ajRaK=#PDx)n#2*gg15{1=6=QvI;nVfeUZ!?7Xox>R^OcJU^|tUvzpwF?~?X ze!UECJ)Hxc?021Bvp04Q)X|qWa;$XH*Yo!%Pd4<%5+KmbuEM7;0Aiqb!(W~NjpmPF z9>Y`Mp4VprsFQBvIQk~RAA@LN-=j;|`w*9qys3_pu<=S*(r7%E010bZhNf8D2SF%* zRFO?q`(KvmTe^)JeqN$)$fj<*gl|f7V^bSYsMUS?IY~3w7boeDUNohTk;y*#1ob41 z1Alfs2!XdYkm4x)o1ad;Ox&4=?x5`Y#{}22D-vlIB{^tRafDlKub5krk4g0cg$i+3 zAm1*YlNM*i^&{jWQjDD~?$1(i=!W=zu$XHA^b42zKfzZ{hHl_t_Y~SDM27vq`|J&~ zCCQh8?G@jl*;SH$4?OyA5PtT?9JyCzi&5OQ`qX@#l8HO=_{Yn$kJ*4Uat#$Y8(=u` zrrv#?*321&O&U*@eV;D%#$!ysnAnPi%`YylV0Vtf^5UrpK&hp8Wh?BtM91si%5R8!j>GLB)7otM#oow;yWAtG?J+ae;^Vs0!V3`19C z3n0tv(Ad@-jb%d%5s}f?eEMg9$kHbl^e04w5D_WZgyen&=J2RQMA_dQQ2BS!v!pVO zUx){ZL!YWIE&<%{57<6Taz7!=e3kp$t<%GBzki_ zh#^#&epq5ggb^7LBrHiba)T)smwC%m>09i=#7O=zcVjX#3ROX*n#^v0=P3rs^f9kf zq(3ZbjpZt$9j12HPyuUS-$pusrgdQe#nF>L@)F4f)L8uJNr{(A(YPz>;2PTOsjkhE zuD2*a0@@xXow%6x%!^%s?nW?`UiD`4`H{lBP&|=UwfNd|dWj`i>ute9*_-4J7`TOX=gYJDmkya@Bb1RrbgtIl}^^j=!bh zwT>xLGy?gW%NdPJ6nHe`sca!~t?duHTHjF!A2JQbNgS<1-c{;lvR}(xW>| z>IA{X>xphKe0Z^cy?-Ro?NIU3BMdUEh(tWFyG`zkaT{Xl1Y`|~nk{GDW;6D*;IrWg zf=ge2jpwfw-MG!0;nF?!c%WAT*f9`~y%=VI^qc%nQhwi^9H`yCCmOA3^RW-N{2avn zCeYTk)vv1dT=qw_`D}@}5!klz9i6C!)^_TyTHBe9>F5@JZ*AA^L@A~!n>IiBId>7052Jt3?)pSH=)4Lzj8ZvE7KKKZT3QC~Rv z)s5ZOX4=Pp)U;LSCIdPqixPYytMZ);*a5t% zvM%HNm`-_Semb0da;JVk=gM@dGjU^_e9|#H7mBBU$p=IW^I^IRUTE_wyMe{3jjl)h zP=qA~{D|}Oi9ckAr*y>5viFKlevXC+Zims2F%H>z)>i9PJd=Pj3UD0Aa{AaiX&(F6 zgMWR{)mzp2aCdR{K~paDJNy2^)^3%HPv*yu_n%Z<*Hq=-16-}>SJP(OMlQf-Oy?@WLJQ^6iN+~9iA~=l&(R-x?`P3BFP=pR&AeBU<@kQ;`r&{l z?I_4ZK#r4zMnyzUTbFeyvT6W*7C9iGR~tu>9EG;bm#TtnSejz+6i(bYKb*|AuCmm0 zdd-)r@vt~X53v+DE}pt+av8;s??DEHd!znSc!0rSh)2M<9uU9!Ae6WAzSd9XQd(SOHs9d zX3xe=r%>J~w+%3_GB<|c}x7%)F;iT=2+4vof zmXWfB+*<4%!9pH0q238Xg|1*?q%~`Q|7F#l5$e`rTM%jyp{kiq+4|lKLfs9aYG<&J znM9?Ry@N)ojqWFKa6yu-uIafswCH{U57e*~mKM7MhgJb5q@tm4KsdC0yno#(q6+ z>#L7_IA%6Ea(B3o?Ky1xG-H!bf=!N@O+JY2ISMur+Jq4z!gm{n0#+bk6ulyT4d#iY zmaQVF=9EPMI#jePvhApj%;9kW{%NCk0uyybMZ4w5<^W_bm>SrKVm}Jtp&X8WTghh79YsN%0eoSZ02I;>@^;uN{h2 za=afIEqnpb#F3`X1kh;-Q*A0H%i%WUN|1Dhnw_B`fKM4OZjFWj z_Rx`V!F7}=qpc4s5vnk#tpW~#GpHt8_(yk!Y+1sfI|N@hl7b2rd1x>QcVv-BiB?F-D>DiMRS0Efe4|*tX#`=q|8iNpGU(YUPbXkowR0ieFHn+bR z0PbHYbDkbC1LbvpG(`jIE+0fHn0g#rbfVYMmUhx0d zz=v%ImR-Y@=o?tJ41=@)%VHf0D(F@{p*50CI6^9;hqJGq&=u8&Ks0$^YU}C=2jK+# z=d{m+ZX@IdI5)+}NdFa7PiTsoRi;pyQc+a#;>VD}NV3|0Nv|A{gOWGWXi+6tX5gic zzLWSCMHQVah?lEpw$9Fd$!=IS1+mfL#b3Sv#h9WF%oJw=5p*rR$7E&Cf zAGN?Z^0X{}YKC+g0%L1Ch(9JBCRat@F74IRSV)necGLpnp3RO?DKmmfGkjTuuwj-8 zI_DO~*7o$PT*$WbV=M%jY`?g_95-$9V_qJst)}9`dE;OVEJMHORD`Be-7y+6=Y(GN zUcaXyZ0lhADglSKHu27dV>7D+ zd@Mqy9YdzMR3uWU8jWXHzo}fx$8El!PAp2qTp1-XE9nR&&XLDlUya03g~VY#CgYR# zbln$!&y};-iwN2twZsus#S|gz*=ZD^N|^l>H}$yO_)KsKQz;i*v_1O|85gzRUwO#5 zEdDtG{@0-WOsYWH^)$9P2n|CnuP@ORUu_m;aS{0D z^;9~BUGOSW=}??38;)*UyQ)q$RMpaXTtwGR+jiK%?WuH*q7xzsas9dWb`x=tyfnCf ztYzHVt`zR_X}HT<+XG^wPh+D+Ttvi0e$Lxse$L-?Zk-pG1 zf~Q%sqZ@)K-U0<*)=_?8p>GR8UfYhXTMSIQJju{h)3Fq!)Dq;irXm{@sHujlXtd+(dm`&?38bpu`CBA+Wc;j@tsV=}I z_aLm_ypy7EyJ`1C5!Y{?l{Y`srdne?GJFtD#fgl9Tnv@WMt+>al0hy^mB=$A zmfBNb%yc;M!$&EOz9rH)j;39I*{Qron`+DM)m;*n!?sLaN~66@v%7{|InB%E``I~; zwqH6dr^7;hrN}S;ux7lkMrj9{bVogv8^ZgYZILXq8{4<1$?nM9?P<2%`M2Q{^)%02 z8MQr4x4ZN{(6TOb7#cXNnb8$q>7Lz6Uy}S|!&Vbyq{R*JNo=M%5c1Eh=MSmAn-C>A161y+#Qw zBR6#~rO#gKB8(J;!FL`)!Y>a|I*}l_lOFW3P^vo<|3azmLzoJsx)&Ywt|q-|CVEGa z(34g=b)_23Q!lP9`my@??9k^#UHzI;b%TIYf_O6SP3J>yge5w_gQRgW@QbtY||ezb(lBTn(_wSkS?WR9kt>o zn3|y*GK0sfxX2YhK{E}@Ja0(1(jll}+Ls@50vDL}0@I#sjKY<9h(_OkjhW$b0#N5$ zD;(h`9KQ5e0CMcePdEoaj_u5^6=y_cBl`S|sBA^QeGxW}iLyf?!!b~lO(t1sd(?`s zN#SUJDzi@TuuCHJXHbMq3(!MBT=#_EMnYFl^ZX4Hy?j&0( z%9HLIrfaJV-S2uAN9W!7>~KU(=lrDEnB})gh*JXDK;H98)}D&CL8exbxX(OD%iC=m*I0X4@cez3DUz9dMxmN zB)x3SNhUNNnFk#41~&j%hT{_BG3J%q*=%vrOGo z*wOb2yP4)l6GYh3y>g#e$^jx^qwE>n(ccHscpA9Z1LUDeiXpRGV!-UXWQ4^@`lA<3 zX@pgW#XQeRVRs5DC$_NfdA|tQzw_yTpT}b!@wD@gaf>2jimA@ZO<1;DwrJ`105N0? z5`Ol^h!;O!4^(!u;_7O?6`w~kaYr8i*jRc$HXw~WM%#m8>}-IxTrM7AF1}BfdgC!x zxwu$fH8CQG$`$O+QCMC)^}{G`8z4xFJM-@|x=Q%MlqrJl6^~j;aT?r50YluqV~}P+ z6D8cXrfu7{ZQJf?+j!cxZQDI<+qOAvYr3bm=iQCi@B6QmjisFf(td{wsrXK5fpQY7X^mbX3&mwYo%*PYX zkd?bLzcivjSw`^dG8WiWFg0KlZzfcS0F;thagG)ex*!><3^UV`iKF*%xav(fwG^|G z6B6l7ID}%w*hIb9SwsA=VftJg73jWH8)W}9Vw1?~-k z{t|Ddvw-ugk-#tn7tA7lkMSmqM9G^B6L16(XEcnkC9c>{O@gR?1j}U98!|MD?sRde zbFGETdIuxlH!rQ~*w{fLj7Cv~$G3rezu_yG4~2hSb@!7_hKbcdFS*Q%);VsB$Fa=G$ zOSn4Dq#_{GmmFp*fQw1ns$79(Qs_`V@==`+iX}TOGQDS>FCM$m><7*Vi09Hj-4QsO zVg`_4u9KvhB%xA?si}=R#uEzD=5tZU`~B>Lfjx^YB;*!-rKl{hsF0}A@|UYQ0>-Y* zBnsKn%OEbH65Kv+tM-G8*7uW8yLXz9yD!)F6k%KjDOAseRV&t??}<^MPOws8(yBZo zeqV~A0mjjj2xrkG-bX+J9K<`CuA6_?hu(DSD~3mCQPUX)(}WaX9}sx|SEs+@Ct4bg^D}2r~Eq&1{yt_i%zpj96GNFex=f8ot+XdaZ??2w7F>} znb6ZR92+j6MY=O14S*z7eHxT(r~m9b2P_@r!34R3rnAemTugg;T0w561!y`sxQwKr z#Uo;P9Nj-X)+x*64E>ftM?cOxL4F?L6w*cnNg1S z6Ex)1SxDJImP0mpHe|nfFx3LSz5P<@=CG9q59JO`MZWGIRKJnTysL9K?{yOviIzwd zo;9tBSR$kuaBxN3A5ma%gX*>v3|r|Q=w`PS6luXn17mXnC@%XHD7~2p$0BA(Y3;Nj ztSi}8VgpM7@P#AVuh0VuNPfbf^>qOzPV~c%qn8xnxo}67b)5HRz&9uDZm`@1Q@w8FofOIR(=wGwCJ;oz?dK1_2Tk<{ z!l_RCh4|ieq@~Y${ovL21m|x(5!t^F<~Yzph8z$CK<6oq27@yd=ah6#Nm!aNl}W%- z;=)I#b8~wC{*nI?ZfH>!8n(=3nxz)Ih3+(#I;oYrVCp0ToI7t6z zCwJuT7AwS;y}2hfttW%*OeGb6%KP$)I8T3N9xNMstEpO6hABqeyH`634{iNLm37q8 zN{S9}hA?S^ocuoQP2Fq>FKNC_A=&XVXKb-DhIx(6&u3T{J%79uMbJ%0kdOU!WjVcT z$8`)^`}PuYA5jbD?EmDq+CnDw5WVLQ+oM_M1Ooe4B&cul-QUa%Dh6$rfh2X&$Pafm zyii9u1GYX0Aik@2BweND*e3(4-wa=#B4h?gXrG;L6E(I~+FGA-p2y)##xHbPNFklf zYW`v3)^xq4g`jghX~Q`VFX5eG;a;fEreFtMujjSqQD?|xhJixVC#Ef4QDyG*@Lb0D zbO5%^q8R-!XlB7q32{2Bs%$RRIC-+Xmyb<{AD*MX5q*-pN$K6I!l1jK!ASm3Dhp`$ zUp)EL_p0aXjH)6ZRt-^CNG6Vs$gMcaVU6g~>y0>xueJ|1OW1EI=CDl-IW0qBNG;S{ z!#uS%`5@C}r8n$J9ue-u`qK!lq)EVbSYR1xQ@-qtJlm;s#3$z_5#XO|PP61Xn_1Th z#RKyP>bztB)ddxcZ{FEl#fD=wvm9_~d0>yt{!{Li;8fcko&*<|NM>C?0Qc>?fa2hI z-Q89dPX1=~2N7i%RsyHI@!RNDRBx~E!!B(XgPws*5S=G0EbZe^ZUBe;7Y|Gjf}v>* zq!6To9V9%Y%-qW!emS?&9yzGxNvP5R5r5$~VKo)-PxsnPn-BoR`LMWu1W4j}0>w2U zrjx;z*A)d(i7YeyA$`6aM)I7cggd?SH+B9cYj?MO*3YS?I-d3l;!|cyC;j0U-*)S? z(dC%ta6A{Tp_cG^+=t4bElDPtHhv#UQmfg+R%w$Mjr1@pkZ3!Lk ztqJb5#R4RUD^7L`wQ^&x{5}{GbB`1#TO7 z0#OxXZ_Q!_r!%wGiKw;7gSYlB&H0vz-KVIND2|9ii>Km?!t$`0Ct-d#{-YS%rKzmgEK#uy zVnq#`0M6JRsj(**rOt?ZEZmVnJtJr;on{;n8H*4_A|6{irt zgIB-`GTE@R9)7oUOn6ou5`J99Sh|@6``@bH10>G+Qsp=yPb%Km-a^)2elHZNt#4pp zXw>u6EgQD$7UZyVuuP(t@a`7bD0*GL;a3`4mDV0H(r=!6pM{o~K{1zx%s)YtsiE*A zEN{skdEmmPXDc17_vGVDZSWVFVU@?m1u=;9Pb4;zAM zRMl4LK!2d6+Az?#v8?N>B}CU1{R%MKJgoA6m;!-*Q}k4e5w)*{wD8~2s){O7y8xEA zWbN@OPD~9$q2=}c19Xoi-k+}z9s#BB3&5-9L-qXn7riqZ;c!8H>O0-(28wr9^1zwA zRon7ly^Pv;P}W(StcG(zkKv8?AhZiP`I(kA3k*U;&O2Xs*| z{Mz{u+D8P&gG7m4y^!KkxwU~Z-lZ-z)3~K*(@V3T6Vq1GH6yM?SIC2`MHFMro3IVd zdch|X{p6I$3waFPuJgLiph9aS?34j8FnC)XUkjiNjA`~;(WA;{N&MWkP{8poe!L@KC zfDnx4kc|6==kk7+NUmE?0@}fSePxU>jAQz9qTGQO<1_H)8kE#97a7)7=?L$J+o~$j zG2HJKWT*A?jButdA=X;4Z-{GZc~)6z3C|0urlKk{Eq za| zI~Rh)HepE&kEb?7Nq*pP0gNTrCJVePHANVAB_1By6=7JOaLecx6hN35J1rS#wsL|$ z)M-dYY`fwL+oY3?r0spP-1bs4Z^qLW-&LJML$`%HH>?BP?OwmCDSZnLsnTGbxxVYS zP8U_!^$&DUs>e-wRIYaoGXFH*T{wccVTWG*WV}ws>p0I!eR%eJy4dbsoj`+&oi4g} z`yheGKFK?S+4Z;h84&lkN7rK@^$%+2VlCe}jIuq88$utKSm`~nQ<0iEU6Rq@YsKL2 zhQL=9^J$>M`8jkf2|^Ax$(UWj&QH_?oUhO`=J!fA*%&o9UyQYpV@uQ(VeF8WDpnY9 z=sPSD&deCUa#(Dzy!G(*23nXsG0>q$pTL%BNkN^V-ZDevMgV1(g&<$t$8Cdad?-Y5NdvL67oZ>*M(FhOFW?Ph^x}4Xv6Kbc?IFR2q zD;($HsmHkwXM{mBv)l*3YXELfkAqV14g5lA>ZE;8%FShW?@U?i`byAnzaAIv7mk~X z+On!+{dglUs}5V{G~!{e^yeiuWHpf6 z6&Wk{HlWwS{4=m0i4j#aQGqaM^}$ zyO3vriS7b-y+W*H8pP$Tre)NckwNeu-#2AgEi(e=~BtqXo6vo}o zYtXjUN+;iRic9Eo!2(uvs#!+NFRv-u4RqLmrnKDO?!t%Q`U>aL9C~&k%{hDy%D!H$ z7CEOsvy|CZQjeRWG4STc4bSY}f6&nTd!zMJ1OURz`Fy1-o5i0=q;h&YH(Wwhe*+(+ zd)oSp@QVe;am(zg9(p(YEJVA4~e>lU*=a zq$w7%S#{d&*KCj^KVcQ$hK(H{zW_m)G2F){OTo7j6NFbr&Bkbw=bM;sixlwt9}}r= zk-2In?`GGeM&M^kD;PP0%$P_iblc7p0Rwz)Poi;kCY-CaJ=?60ml92R6J^^wrff%Z zCU+7~&LlFgKkh#l{ZRN9MC<4!(%v2>iH2Ph96n3kA|1rtC}r~U)+y5nlHIxMyW@ji z0&e8Y_#qv;_YipZw3%uXNBP*bXhrxuL4WzFaGvwb0v|=u_@^eXsr@Q8Ok*6VxdHHs z8lS&NJiDB^6#-GzXm?xNwVAn}eWUm5qp~QhV2&WoVo-~X zl$mf$;eJ{kWR`yK5`jIbb-|^ZZX~pcdKb`g1)tUiS=9@6&Tc}Mvqy4nmLj)^CZH+w z9KW-iyJBUmyY!Sy=pKJ%(Ti8mVt{0M;!`2VLbUluxn%A!)99DP%2FN~7sEY>*$b&> zCq&;(M`dPfex0jt#YIaj{^nUUUQ$;~>gGonXyOOgo-51oBdP}X+iDLCXw4Od8(IqA;kr&!}8 z;>h7qSZph>XWz3pzEj}FT!1P(nD70LWzU@CM1GNF^1Y43@?Jzvw;$(HYbzD|RNzbQ z2jP%A{co&x%Gnt4S{bnprM(asvg*sENQ8+ylwNTs_S=s}5)98Ns%LV);JI$xL>ivQ zP^p&)e%ai_Ac`Y;Pwxcir`jXUi&qAv2d=+Seg0&5Nm&moh&i*TY5+qyC32(9qPGJrQ-b8YUI`}CWBwTnmGGO7UWSfGi`VNrB~5|3~slx zBs7T;@E%3=vuPl&3H|(!tCtLq*S~H6Nda(ZNw-#Dz&%O+&K!W?xPF)cMkLXjz%Qg{ zy9F<#gi?Lc+dZ*I&{We+h)q({qO;vB6Y&1SOGbOQ%uj3V;UwhFJ+`Gr5TPBg3!l1u zH@@0dLe8Q#P6!3;ouPXBw5Stdk`v^Eelhfr1FgY$=PjyX^Zak!u4E6Vo+Qc%u}U3r zCeL&6idu`3kkpHsr(RH|B^?lpm3 zoW1KwZ>6;Zxy*iUZTAN_$;b)wKQ_=MRaU|L*V}<$&-q_(gTFIf+D$z0fB#~98v@{< z_pr60cQUha{=c$CME>_4`9HBm7;OKWSLM6d1pK^yogWbx5KuoH5D;;aQ363yjSCcD z+isH`wTGbIov_(jtT&a6$jxYdg^^G$aATW~W}Zn&dWmwQg$M4;+BR;H;x`c$mt3}? zThh)Ly6en5*RK0tmp5|_re?Cjk}iC$oe*mxWsI)kr|mJX-l?lK;hG`W2&}sCv5lk^ zFP6VgX_Mr(TAm>U8#~(0kuBo!=u==f=~mJWv$p`~~0Z-!JI{m=JR#jNS}-f` zqOWf5RKnkwikM)T@f&Bs$Q&>r&4AHunCbiq+y~SAx{ITVU zNPqb=4pz=d+V&a<7N|K*K!pll{z^KeDjJp`kXYe#e$E;QuB13bP!s}PT5PYGY&~StN26Ygccn4 zD+DCq@`~jUE|$a{iC@7HS0dm>LLkm~jCZK@vhNs{O2ff=WD_zOhncO&;kQA>JU7WC zdhy;|J?1*HTcD@h6V^!@z?xdV4L`k*3#%^MK3F&J(#NCLv%_nF+hrwB)n?9pN;bnt z3V$g*^rVDMUHXhQ&xqH!wXx%zd0P%1bJ;ddyS(=5>u&-C=NZ>Hmj>PIpP4e2^OEZ7V#el=mVS_(IhF^3m>XPkeT*MJbI4^d zqEo^2>?;xD9I(1ZME&VdWCfsO>_oUV3>Yj=8we0j-iC6gKh_XVm5de49}2F-##wW{ za?_>`L>5}9@3Fgh0Bjn)es3>rbq0UELv)u}I(Mm!*mYa#!CAf5H=$sb)6Arf6b`3m zx)|FAwV0bJJXz}>gpfiPX>(;k%lYT-2AYvX>-qr6+tdhQ5*N;5u>Cqci5TE@xJ_X^ z@L!^$gBAM2d~KVjZI~muH|>mu+eU?rKwIDxe+;f@aVws*2yftIyN0Ky(X|of_XmL& z0F`mrud0k2!`jIXwXK()@L!Jq@gn^NG=q%kMBMH+1=x^`w>cvZOcJl zA}Um+@}&AQ?hXTk0uSK|G?`c#niJ;yK!t}7XO9s@_O5A`Tm4WORc1zy;|ztUSToGE;?R+TS=;Y9=v=OJ>Ahcch=-6Y#6HvUSKpPv2Yh zI*q0LN;{&?cuC{bB&1^AQ8Mv&M~*zE%m*US=U^(GrE?E!c7+|yBYP;G4D)p@<fZ^j5x6b13JL^t0Q5ri?sO}vK;dBC;aS+u)5};fhR^XG63W< z5>lxy4Ghh^W%X@R)O(G8LN z&tx0B*tNvzgK*b9eV`v(y{P9q%~+q=@<$7nQqVU|R&q?g^r7#S3bX}t$v@fooVuM2 z$t^&4Z4`AtY+tV~0UKedh!%DJDl(}%r-i)fLjrwmd_!0oUKNPh+QNi3*}$AWK%cl- z2xTo_e$aUOzp=_N$@57R6wI>|;v`{3WlHCDs##OUAV430;?w$KLl0cyo9XQvhHmgl z(qyeh`PYQukb0uf{8~-<=thI>z!-?i&v03B01LdNq(P29|Lg=IggiU4 zc_F-rD*}ZWqclfcYm((Vgx|dK9oJPyTe|CDKIJphou+!cI@C2w@fBgECPSq=$h#-! z0ol{s!+p0)8eg3MLsvxcpcY=Ul$8_|mj^q$A~7!%@V6`@d|P&^p%hWPUm5iN^6^M4 zQpE}>c-Fz!5YXc$VrbS66PHk?K{bb6i%|3lKwSLEIq}C=K>BbgZqaA|^t0>qC5kaa zE~+^VH44xr48z_&t36t|FjHiYQoV zf&hHNZOOW-aix)N(m;oHAHJ5EVZMw4^>0H zJ#=QPIZBBYn>MmSAWwosK%cZiAcrB;FTJ>Fo)&JRrh` zJ~+Y>G#sPQWtUx>li9lzI?Le=or8mC4nMo!9R5?FNdo?Iq2>jVoZP)FOQ8q&KQ-cX zag&+sz=42jk&_TZh|?I;fx!V?n%^5<9Lbkq4nVkOA+EclQIDtDR_Jen)A9g}<7D<@ zzh*MUxP&r(*GhoGV4+Dy8QG+>6uE$*RdgEvdMcZXrPIQm;9+XbL8;8l(Jqmr)$*2@ zKB?J*lVf+6p?ZwfYCJO7%&dQQnyzlT4MX6|MLp(%w%vHsi4NWn^$C!klSM=It>97} zrO9o2m(#if>7|oNV2P+ZFiM7chFmg1QCKRz>3_&>3K+XXA&qtmjJexNtFD!?++S)Pi(U{#n4bXaJkOKX{gPjEIMf zBS0_a=QuJF9)9OxN9?Hcgb}rtE+9=}hd=VC?C$&oYtw@bFygh;4aBq7jmlMr9hcj2 z=yJVP#(W5|?zx=KOxsO;7}uhde5=yF`wPRso}2NwDX>mm90lN4k+0o(B7qk-)3d^K z#oiGG29L`D<-#c}YiE4mp+`t{x&b2V>Pj-im7BK&kl(3f8`Jid_$ca(s3BE6DV zU-RZh(X&0$0H`oThHVztkp^On-KPz*;= zU5rlTwJsT@Vj$B>1Z)i}t4Qj4Ep>C^EMvgK+U5S@0Wpm@!Kc9+dYqux)bXi}*Z6US zg0V3}2*9EtArq?*iUrO7JNR=A6$I;0Z-j!Ih|m5oCE$;UKqBX!?8+{@`NEdKL45fa zN~^8&2Y^^eRZ2R;UyzSbWWWLF1c>E|+I?q0Nbmbd<;!>fUM%(&+>I*?hnsd{pn@qG ze?_d+z)lNEWK1Q6u+nCm`;uzyl5f3Mi{f3P)kwJMWs{lz>=SM=M~e>%E$IML#p&oZ zUPqQv(Oxc>w15vbUcJ-5F|0jQKA#g;?`YitSQ`b{7VTO_5h<6XoYa3d+y3VCb?VYv z+-hmbY4!=@=R(T!eaaKelRP@_0WkX#_c-V0Q_Pj|??FFSAbNoKo`dEJ=Dqv>XK3=h z_z01AJ4Q(xqZhED>{=dH(?OxLw=wUG+F{}xpUyPy9a#L^n~-A`PN@!7nK*Ck2cScx z={jT`THuuJ;@@C2YKwbV*QWMMxB7a%&|yVL;O@*F~?@%Dzt)$c!lWU7>E)xmFr z@?L&hq@^W2UwjrGHA$6A_iW?Z-<0 zO9|+)slc-&0v{z4&+;?@|<2Ohi>z77JifeSg-P%rzg`jmsLe`x@w1q|ve zh0wd!)Qi>e!U^GM!VTOU-1ye^$Q z`{NSIo)e4k#Z)ze8gzliup^`DJf>2%TcV-chAw+ry`Ketp01I(zEk_>=WG2lGAEm9 zh2F#@pj-X}-*1{&K3$cj#bO8v#OM&LZy)%Pr5Yz|>ADk*)vbhCoRQT}Ave=38vQ2W zAUs*MfJ2j-I35}z7FbXpga;;aQ6#aWYZ#>0H&$14*h5Yj{z-3tw3xO<%6$)zG$2rjq3@U zsJl&_-{04-s#yYooC1w$Gs{z|UpTd-y1g-2&aHt1zhxFZc&7@Nx}anw$3MoNqw%G zD$QogTRH7{b{6_4+}*`;Ql6XjEbfzDFN1G!0@-e!vy(C%wyO)k(WSG$GXR~5jg!8; z4C>XTpHiC*5IuA{N-EHG2Qv)dpCD^&FN1`VADQYgJY!L5V{MaWYV}e&)z&MLpv0JE z$d5M~YO6ZU*JM9)4Eg$g{uW6&Y6Z16Bja+$>z1=I1a3by^kh0MS)9X0w_8TAn5jzq zt>EiUgQ=30IXzpL<<222qYJRL3sy;iF^YHj+#jr$>t(Np$N!Kxa7T|gd67}d%Ui(3 z-QUV(BfR=Tx{DUK5~4IJ1~8}-}m2_}7p;<=xL>JNIILy?agHR^y>NQ-=+GA-d| z8O6+EPH05DAjQ-XEjW3VzYz=RNXh3WwCK1deVw{HaT`umA07SxC<5C^HCS3bOvRna z_AMtU_UX4!lDYAM4}=9MI3VV^F4IW^qlT}~DKg6O(4f$6B(wO*d&rkr`P|TyvT%r*cz+hBlQ;#q3;sHI;pNzH|=g zQ6ut#GB~LqYcxY$?q>@Z4l{aO5&Z=|ktpq$cUzw3IImrMu6=9?KxUg)^>bm@R zD}O#OQ7L23S3$(-O2vkWd~#A5kg+zCf*{ST+O^hw7<)pS<{} zyRTsi@d+x1?f`;ofFLhz5P3+1!u?n62h{yR+1HIm9aGKD(0I9SBS1bFDhW_yzIuU- zb$}>TnNLF(18cPPg_&Yey?3>tWT2M`-bW+v$%0ieM3S7WMCbfe5uLA;7dsy-T7x%F zN?>DaG~dPAvdG|=t*GBf$c!>$3ddlf$??gHDmWs=0{|G)ip)Lf34|GPUoK$~dVv*< zmFv|VIOWdbLFDTb-qQG8WleWhNfe1=Bp!>;b!#q$NqWz;|ERYPqyXjTxy1agxslnhL^r>4 zaod+8Hv*jOTMBnYjk7)}3XiN~5{Z(pgIapC6>;Qm%e22=T^I=X{&|1laZsC8de#RL zc{f8$(kdH$YAGlDBTkYSd}wnNgOuy(A*_63oI12a8_S3v*BIppwhqbsE$=dX@41)Z z+R8G#>+F=*Lo2-6Ea9cv*x*Pb@3rn_oNr0#;ovPW{N-ay8K#s@h)hmITzYJ^{Yd$< z1sr9mMgoJCo`2!m3zRMU#9xc*`kp8ZK>xp=5u@8;Oo9Mmz5asmB1D$1Is;OK2Z8BUE#%y&#@j^r}a=; z=^4>(Mz$)uudW~GJnk2A-Gb>IixRV1O= zx@cLIS6GwU6@dNuIFl$}-+QTi^!Re#@%fR#P($1sl!80jqOj8#u|tsuYq`zL)KUki z)${hbcax11T+^K&l>1b2lGixCdTH#bW+2@*C9q>4brCMHO|gOhlL*3R^qm2MZcx9Q zIb6*lXK&Y;V5@;TX{Z;@_fW|52$y@!X>4?3+>AA^%EM2XRzHoCjO)ZOwEmQ_)VPY} z-;G_hJvAu+YoiC7e62+0@)66Dyr~EH!l_s9YOKqdq@X>^mi}ymIdqA%{CcePGOPH5 zt*%_WGS-r$skJ`IvC*?#=v`HufCACLE{6$e`QFE0dm1bsAKEHrexFT>Fre6LT2+x6 z^Pa0=9YtT-dX%w{+h@1S5xFu=o)aG0t0%>$k)dR)-+xLtGy&&T>Q-8;FB9Rwvt4ebm5$a@SDY3lu2<>Z z-(mp7#~z?}7XS_|{$qA>o*^y#wIvgFCjLis%+k0n>UvBGb<}A{qwJnFZahJLVry+P zaI>`j35INA9)Z;iYy~$3sfi_k#&rcayhh>bg|_#F*6E_a#1m}U881rKEQ=Bl3-ENv zU5b+$W(RV|qm>q(MQri?^Sa?R3+u$VF(AYra@I6d<0jwzW$}&)T%%XLe%G+#-3+^5 zUG=SdsA<^${5uu3I=`vr>bv*k17xE zJB^t8oY)JNN~f7f)=B$(4oz|EZoFHDW+vsT|M`Is*-0ODpm?9u;ZWboveqvA$Cyw~ z0kd!j`Ml?z8RVNh+Pu={G7Jnrv`L0&WQffwu-=mPRA%DZRDt6Tly60~a10{^iS#y~ zN7>zWu{ct>ga+i4g1xjdd2VzQOSux+(s71C_-qt=3E?;lljZ>3XB@E64E{+)#)Rp;zZTPG z2@0D(d>MhD*)pwl+XzaH|DMpwrmKp^wmFztM)KaS)F8T>Z}b5;4VCKZ7Y%(r4^w1kSU26cDsy*A2YxypM+_@AzXO~0Q9Sf9_lwbw?FkgE!u z9qfCE~G#p~Epu7Z(3*sa- z6}siDYV9~3bW>f8UMDRL%5p+s-O+lg%&1$GSs@GL5WOHkP)otzGq&^PgGLfgW+H%T z3-~)oiyjeAk*dPg2ZV;kDt^p9Z{4*+%I@BG3j$6Q$@C~Yu!k#2hC_D}&XyLHwe%Ojvu5MDU=g9m_dKnRvM87nnAbGToL<(k8aj|Ta<2`f`$x5+rV;hhJdGj z7%4ajM@RVp(r~Da>u8_nCo>n1^|dbR)7ZMt^|g-j^;cAHyBq7J8LXAR|F2c0#qc*x z)#A{EQ3<1{PwhSyH zYMZ+Z?kR!0s}F05hZJb6k~uf{M0J4`D%X|{X?g)1%MRS#svn@sI7gw?^(FcDb*&|1 zL1nj=pdFj#%=2V&caF)rXBD;m}7^ zf&<=*b31K$Ao18_g>)9KZjgtzmj3>ECm<<+qGPSQ><6>B5?$dH!*Do9pY2zCj+O!1 zIN?5s`j}2HUPanx2`1+80hAJeb42dHyxahh*7Qz!q%s8N%Qhe{+?J~onLb=Lxb{ER zNB#`qT)?(YwE}&-2*iw+5>tpV&b!b`n+BXGV9ou!YsS~$LTB8XpG-4|lUMj_4H$9X z(-{%l(ige7$w3~&-)M%i{n^tcK&O)W9`IbgB&H0A{`2OJF@A7BQpcF`_XyyB4y_x< z-c&En@jYKb<}LR`c*zUmOf3SRROJ1-Zm^*>y1iMd#b+Q_3ycqF$`L&rNjr*_V>x0g?)V=-(Pa_T7}i@wL{5DZDw z(-VIR-mcxu#Y*QT)k|sXr1@Ffx|j^|zCISU_zJAuijHc{>oo2BnmhHdM5z@3>6}|W3{N%| z;8^fCTY}5$Iq^%=}mrGrQEWYvsCS0S7^jWQuFz68Ra<^m2a30+D zT#jTe*H<6^GeSx8TCrunEmOPF&2-$vY`_r1{-~UB4mjFKj6wi0~zeN0Ve0!rW~ga}tw1 zX&WorB-Asrji5Y%(JS)n&H3ku9g-yh0vOV^h!|K3{2sSoelp9ldsp+$^t^ir?rC-b zL+8@O1Hdtt>;_&7i+8&CE|u+@WeND-8jJX88=Kql1p zr-tq%ZSj4Qgg!&yku^wjFETt(<}g+-T~k^Dl+b4{qqWM_xGJLSBL%;^svK)^Pu}Z} zpX(*XIU7}L@v|P6GE9L|kyf=7Ir}KO{30#yMUX&}2ned>g^oUiqzF*LG*h2I*FWQl zrI!)hAX+WQ52|9M=JCI{+V9ZEMTQh}h0za80buHbHxA0YctoK5NeGfVBJDy&D}VR{ zh%ptA@If0xr~~+OL{qs2VuWi1T;4)>cj3qIjRHS-INqrd@^tOvGq^U)cVhA|Ak=}Z z@5dnAiZ_Vp#>ZoX{hK}F-PJ=Nmf(H<_mU$j{xe@CrdWHZ4&z<&7CRu5saoaX!FJ1=~%f^2Q_o> zJhK;?{yL2XRW&zh?s7SwP52ijNT8BGXjb6)3ggh`Qr?;AYAJ;-Vad`eYd3%rRamNx z=8pzr5Sqt~H;CSMfeg=`a6@g^QyN85JKdVd&e?-P&+V5H@G#AEKStKfi9lF>i+Onx zfDYpE$>!&@N>w=VftUANRixduJ7^GC^L# zG%xwm;N{U;_;qSUL$7gS4*UVpMG0(9zo!b5IGgFs1MH`L;uh{GJ21V^_|Ja1#6#68 ztl2MI7srJeWY0v={`dw$M^(ktm(S_;tILq|=P<=66uzs!`7wTa$>Jh+Ado5Lw|9<& z?ks*`)6p+e>E($#-K8$CT6aSp4uYK%u;Rp7`9vz>!OnMw-* zXuh4I2I7C%ZvO;Gz_!!Dw{q}Cc30?7pJ$|ZP1X<+XY02t`#9<|FscriyV+z4SiqHw zxj~h0wr?W-IQu%%Pr~(QEBn`kIR3ncoUcjuti(dL{X)a#xq1dK2x7I7`@#+aY=V;t zENn?h-Q)DeNVpifA0wUfaRRz4ph6EcU1l^9Kt;%)ysW9pfECb5 zj|j4l;}`XrJ5a;4B`2@$K59oLtSeu?3Ex&tRIH488?{7fcEW5$S|LCKt<`+Na#{Xz zHkAK-TV5q;8=EE#!P@k;J;Z{O4?IOH6Ubr{*0fTH3}?CkV{*MTNVFg6ITwW}wAw|U zCtsC)dyiu`py#-J3{BX3v3>)xCjGBY0Om*ibM3CXq({Bht&>XxWFOcD9K6Gty6*rP zFCk7u;|ZfT?{q2$q-oRfqkr>qHxTQ0+R;ak1D%g!^!HQ-Mk2E5O|_LMlWw>1xk8FmV+vf zfdoPz5EK}uu9PzKOBYg2V=e#_S!z2PR}zl8BuRuz=!!q%Lr-)dZP61J`X;=QvloRE zse6ZUzdGWwF?8K7gJ{oy&fPl(W<#eAlRNNAY;_BCB3T+uSs`7s+kVjU3+WqnFcP8@ zDRKksagDTov|-pQ%{_GYAaGNB_krEifZ+?xMWjeL#E}FyM0$_Cu=eXkpgk5mzKu=7 z_<&ppul+rvWdB$ujeI44H}C^v$M$3*c?r z>N+zUQ0X|f^J;nC=)M{|BH;y|3QUmR>@owSnm}8+XhbbiE4yVEa+l1cho76gTnUrj zyL+rR`(ISKm1BC#7VhLz{@zr8p><<%X&vXtK(dGZrPw5Lsq^Il{LncSVR*roSYcMA4p>nwbj5^@xy<7Z%m zV1HH`iZK%E%9-?X|p^LVau9dw_K=F#ux%LbWqj8UM=d+P;v?9D|5+9NeMoom~5 za|lVx7F7$&#O~p{Is6qaqG3+RtHPraJZyF3ECunzvmFus=eLFs4JP!u2rlzm$Doe& zA6WGtw0NoY|7F-?#3HnR4q*-HC=n)swVZ-tM#L^Bh^7?cc}Idg=v3rG?YL-9)6lrl(6XYNY3&wiR> zqnUnL75M9*<}%6JQ)1l%h=(vdGD~}&YEKqxTcj%Dn*8||i$NRa%|g#iyXzz;yZOw6CFj(%!?1CA+N6ctKop0LHE9M;tSL=4W-yafXfqed{xigotnY055AnDw z@WzA%zn)y)+vlNuX~F*>?k%J0Shj`HMR0crkRZXG;7*W0aQ6TqxVuXy1PLBIc!1#U z?iL_G@ZbSLa9E2V3xCbtXYX_Nec!!zjPLz;&7jxpS+l0qEbFfBB8{YfDf^3Wn@V}| z`xhG&QWR0vq8AETdm{j4iki<%9+Y5e6y;L9b13mZWGHNPe$v&6X)T3r_} zC54y(6W^y=wf zSx58Sp!+oz>$xeh4))$b7YdRYg_a1Zj_FL*Ey-+uU%dlbs@r`Ug}v$1Ctu`VRsVT) z*cIC6i3e2OHCJJjhSD4AjbrxW)kg=@a%k$M`FJKs)u#KRnGc2zoJ@^di98od+aDHd z5D6P}?lnWqQhYHh#xm80K;Od5#h6T;wBnXF)}Z=e{MbIE8gs#Z$hobiKluY3UrpSt$v##_ijJa|g0YrOqrb<25$Xw8@2 zdjX8xKBKxEV{Awn#c$jsbX(=_wco_N`~poHh$XBe7^yt(ss9JenH=f#LP){H`F5|Ot0t)3+5Nb zSW$lQ@c(@LnDzKuKKTWR}GVZkoruRJOBXf|xz@(CZwydw8uT^wul z`wtSF-iX_#IXxmC$5>F`4qmbLH^UB7v0jT)ElXcxs`{XcvIe1%30-5bR?PRonXHcn zX22ESmpgkYot${3mtPl@I@Xw63?EpL`ck*!p_`j@;{KkO{N8wp7RxS5_tt~wOw4Y- z`62G*+GtnE{yQ-=FWT-Cz1x(gKrslZNz_R#9YNbqo(qn3{X$=XZ_IEWLS)sGJp}W) zqld{vp$|E}%D+Lti^3~KLt#F>u8SklqEj;qp!ZCSz^!XC*?mXRHu?sOVp|cpt+UAU zh)umk;7GdS?$7mSDSOcpvfw(0UbH_(eq(8*eu}^;B8FgiC0`WqGV4h#}i^dm+xNs`0womMAJ2ZLqQVSLFT5Rdy$=Jt9Rv_Tj zG{FO#4XmJE+ixTHX5Tn1HH~N7Ml+tfHRgx0#bUdG2rdp6g-}w&0fqz05VMGPHr* zk&z?S)}zKFwFf6ZP^@9@2ddjc$G8CDJ8tdB)@N~fnllzt77Rj z)3PYH&`pj7S9oGpa95bbd_x_yF#q{bzq%!N`}JZQ(5L8+W&CRF**0Tw^|CWyKEaW| zM8-bS+^HuMPQ^J`ECDV!2ozL_{EXJiWklzdtBoQQwx3lxlfBx}d3kX#h+k(Li!WgF zQXg*4&~E`h2nWdGrg6F( zwI!lVVX+7c(=r(JvzNi|8MYauUA{%AYBl6G^a?&YAq)s3%V4JMoqe^BgeAX_&`*zv z_LYcFpO9hP#jp8Y9;y{|4|2@8uYoCndDGM+@bugA_~1V2ACKouS(b!wz^Y}PfRn`n zfJ-{LumY&60IoKxIT$?*Sw2EAJPg5zg6#mcsAlP5J-w>Nq%Zj!~`#M^_W`1u7mZ^KjmuLQ(>V1`J z;n8aJqSkajnQ&Ek9W^R3bJ}+b=EZ3b0D0vDN;u&x0miDr00rGz(rOd3oY#leVk3a- zVKa6~(emK$#=hE`GA@0tHV3pMBLWp$x}3o3Vgu8fdZ(dT*C*7`F}JOBKGp~a9?{$7 z4mJWEi0`aa5rmBALY}K5hAR0npFiXVF}S2Uf8t(G@w6SFkLRxjU514jiW z%uU_zuGp>z%%yfZevZ0FX?*HhP_#~hV`d!vaVDJL0G&+ zwQ_dx+JiSJ1TpEt>r3z%D0E1J4OeKpI4~rWc|x5+o|8S(QF4C)yBPba4V{rc7}zc1 zq~l!?z{8)($G%1oRDCyt#cXx+3Rp+}R_>E{6-A8NKg4%!N2_2*Gk2^;I7&<{WrL2v zqTV4FPSvlECkVb*r1H^RkrA`zE#c1|| zjw{0oW>495@a`d}(o;rXHGt(Z1Yv+wx}%uODk$j~sdnI?A!M;eq{mQaCu9x#vbMufm$R3_tssTa}J2 zbc6lJ%4c|53AR5tWBB&R%I5@=975oAJ3YBP3as>g4Gu#sL^jvVP1r@+*Dg!K%pH+^s1}nkxP0{SIo72EQJ{xY^u16C(HZ=c_fxb%f9oH zOO7P|(jFT7DmVM_2;4V&t&Wo4$5)=JG@q+WonPA5k|Ve%?44u+o1SEPLJ43=f19q` z_UVDdMKAT{31L&D|BnSvK_+XRv}NBFs{p=iEA(`&p_*6u*7AZQ#c!@hpOjLJ>YSUF z3fVo&(>aQuqqMU&T)y*cSmp_^s88X@&V_7Hgnf2|KYx#hf@Z_Ib6yC zgJcQYuBSYePJ0v;$;Rc5O>y}^d`TV`>N3@1r5iSc-riSE7w;@{drvb+w+h|SA|LMY z5XSf=X6wm>*|SG|4oDSkS0l!PQcbqTGO~ ze{zK7;`>7jWU>6qa^ImDg#`Ib`ha#WgWs6)GiwVHNn*Sm>ijc7j%<=rz#OKuMa&l( zx-&0_byUyog{qFaC<@3Lsx7W^u$aLwU#8hSmViW(_CbM1awF#IG-^kNV{Q12>rcbd zceqS8RT$NVe|0`s@)zg1`N)*rl|}K!f5iYTVUmVu5S3(8%aW3{9JldHn^csWY+P!l zS^tILczkSV5mvHgkGCpCH_()W6(SnKRQQS%Eh8pAX$N~#E26eH!}8_!7YGUWGl%ku znl~qXdxvLrtsW1Se)5_|vZ&I;u$I5$ov09-ywH#MHUpbjJwkxJKr$H?bq?xcG;Q0v z_!1*JC~5v;T+a08U{*ZQW#iBZXO^x_{Xu=Szz+0HYtq&-sZ?)t;v3*Qr8)T!Y#~)^H22`+dk}UHD&m+LzNuRc6gJIA2vuHbd)jU; zraeEi|5-O(ki`$0)2J2aCnLLB^O;4H)`~Ux{F5$?AJWb8lZ~7Cc76-rMLQjKAz>4f zwN`_rZ1MC?;7bh2ITbO#32B4K&dEVZfzs~-ZLp=SiTzZO$HQhVF7Oon(`>!=iw56$ zhh^4Q!TYrk0kHt^$H(N=K@WPM(b9a&FwcFkYSs#)kz^uy1?!w68DW`-;QtDWor9!G6STv?q_(gK-Lm zHqg(9+Zg(gehPnVjA2~bLw;FrN6&U+fIZeyAw4<*cJ0OA>f0^@ZK}bd6xbC-{4A)_l=(uaRDqD6`!R!}``D6jN z9$0_W_ci;=!g=glW-Vg^qpUhApFeSkTNG7Hb*X1NJm1f? zoNQb`_QLrTIKhjWw*9nV9_fJBbK{k8YZU{bK*P1oi!yKxQBB5&sEz6kL30A9poX}# zln)h0>WAR)Kk)K$^XLe#boWQ@5d~M{OMbxzv%BQ4hIq_Zt%EgTf|UBSgW3AG!7{V` zY*;{<{*Q}WZz7W@F>|U}!CS61C8PJ(@t&kS-WU7a`CkZ(`DR{b&}l!lcCtY6I1Puj z3{Uq*nRzveuto?id9^Vy2)fs^YYY|0K4xdv`Y^|`yY%t*d0mIGi_MEATAJIzUG3;} z+SKYd;*y^aFBx{PsUZJc_#BIc`uC?V*i$GH|NnobPU8Ri<^De{t7TqCm98DCG3O)% zS9U|d#ZnL&NTDNnXblPk;+YUu_81tCq!1wLAib2pa`U>j8<0r$ekayY>)@GyC&Dab z>oA{JW@#>a?5Kp6_*0K5u^>N1%1JD-;VqJ{{9T%Z<^s_%=ZQQIrD8+IGMi9v{Boq+ z@Ag7faGSye*_6tq*dn(mLKVFw*O3&3igz3M6bar7u1>gDA)$NDZ`YsZvJC+2UT>TI zEY3G#Ir1LkucL{)qWb-K+EtpyQl$S0l_bx4%6F_msAEq&8oIYqLBC6oN|%#NWRga) zP843a5u+@AQ~T4@5BQ#;lliT!?kjoFhnO4`vbU7rqJyRBQ| zHxbG&D_2TSZJQ;KpXK3432f$GorsL8%#Bx(Gu_z0mdm|LEf6a&Djl62uZ~x{Sk-n{ z7rV}{5M(AmIO1FYnx$Kbl=W8k6OqYc*PP-(g{}Y;gGhsb%Hfk@U^zL!CYLVF5Ot`( zro6g0$C~<^7{gLpe9)YfkR(!Iya{dDOrGi$fl3^8H7yB`s3h&j3549ATr^|isi9Jy zp`KimD}~Cm9TRj{9&jcao(WfpOd35}*2+qg>+8#Yo~_f!!ot|BZ1Ky36WGR-TxUTC zR<9V5sz^OA&Jyr zLkA)e#l^v>6?H-1FK)DI$=?jmtDk8;GF$uNwU1^tvVBPKy6^N&6v4o}jG$Nelw==y zJdF^q<s&s%d_d`+gT; zRqjb$+#|O5xR^Z`1Ryo*^>D_@LpR6T=D05J`p+ha*wuwM;5Rl2Q_)<_e>cZIoQgB@ zBnoCqo3BD72opXd3`Ei-5)w$Q>c;)pEcKJ;CqlpBR6y7(+GVTKZif%5nUl6bUj%;l z#gCMJB>U}E^@%VzY5L6oMegG`xDv9eANF3fy;zX=Te+dMt-1 z`sy1~`Ajj&KcDK!rCH|G1iu?lw5k8LJswe{EP2oG)dN}Q2q~gii>yg4 z&f3!W3l5%k_KyQtOx?P%_9$iC%ZV==aa3bCGHtiM)JaOE1C9*n#AMn--tPwl(=%&M zMR`~F=SH;^uzk6S^x1gHPY2TVUh>YpYOKKKolOcF7?U3<*ADo|P0MT7OepxZcibAq zEo?-LBUckH%ugWS@`E#%tu{E4W8IY8nO?YU`v$pXnqn5lU>J6~ zyEN9OMkTb^y;9N9t=PlvD!5JI8TiVee#xN_Mv$z-Osb?Z_e!E^@w|SizDP2n=(`1< zh~R?D`uxTErcJ({g~zmlHZ>YG zeb7Rl2dmGkI%!`*55hTWT8U>mTsjHZB`S@`tBbx&-^i=u>z=c2DtLD09$FbxoaXk^ zd=kgCO$w}C*q_E2wQV(lwq(lYkWCARIIne z>9P*F@TP^d=oZ(t`?gN}Ucjxr!3w6HiDP5JvwH4)F7Y2dHwFmbA2bk*J@~T)05ODGZK>+{75h(tKWf7Pl;5`cv z{{R9Bvw?sQh5w*Y0e|lgklP>EfBruB^x!WF1jr}EzhKC~J?Y&601;HPc5-)evvzW( z=HcLi2ujH-Bi*wEk3V4gKR~?c$5HdKkQ$_&H|QVt;ZlKj>!{oE($Xes8mhALO3xt( zpqi-PO`VZ=RGateth%K z<81a1?IBa__XKg0oB*b|g_{{@)BVuE$=uD%0ffCF5abLqXIFPnDiMem^m2E;hwnKP zxq@Or_|-jZ^(PFbL5N=e3BUh?MoU8)q{#qb95ZWEa}X{AVYb)*fw%etmid$DPbEMS zkdd5)qlK%D8TGx1YOYR}Hue^OB>Z{$KPAUWasv+b9-tY29QS5px!Gwdg1=0lIl<

x+B*eclCU`#-`7R1W!ptB1zD^`J0h4pZ0Xav%)yL{_tK)YgQ6 zRqg)56QYA)3bBAVL0*GrGl(PP20{(dfXG3lz+Y#GE0|&lv4Plw7z;4f0>oH=SAWs4 z|B=rPVh7@FAfA7uxq)Z3KPdmq`4=rU#0tbZLbmT|0Z^DUWj@3MWMvK62hrAlGo^+| zgI69P&g#Em^rtdAe+V^$I3bZDaUt=5{~}0ANMcBWNJ0>5BnhNvNa9E`AXW%Tll>=~n=GsF{K&(e>M{GuHLHr7# zM(jfDM(ji!K&(V;{X4_|6i9!UKjdEZdt>euzYnp$Lhf&OgXVx?iz9_YfkTTUgF}S_ z(BmlmO-7AFit`lb1r8&aMDcgo-N(mYHQ_%UZ3EE<#r@@>|Ff|B=(72n6Icug*kBac z{F@{2S0I2$dx$ouvkk-xWac$q2#ELy zjc5!A)d;{M*wi5We?0C$CjXZ#fPd$^TLwoX)OQOIEJScDgO7+0k$~LZG9hkuJ`9{> z{o(YU{3!G^6Hy_(H;to;m$9`L_PkD&lv+gJmIe| z99#>(R&@Y?vux~c=we@rK&M6g)z^lpQ;GpYBjLe=#>XF^b3CQ*dtPn-@_7mK&ah-f zo2kVu(a-{FNXYA|zjZ65F!1|~;SjZdwlVOqo>I{+xe`29>=SKCdZfcqVVEk79@=AC zt)SZ#;8@SY*^20C-MgL=OtLp$na3(52$LHXclm5?%#b5{H{ogDA_nE~po(05nmto} zT6yxn6>#ze{S<6F8B1>GjY-LskGGAHVEY}+CRp%Flg3Ft5%8&FihS5JD3Jsnn+Fvk zuxt~^*{SG2s5Mb}GDH%6n)KFt35CQvg4ktc z3ZKo`PPCx>5f4%% zW-eIIrt_$c_P|(-cu6Q_17$f$zLa(eAgg(%8Em^i&?aGb2SK%|Hok-ONUvp+GM;(N z#jsbLDsznYj_2i%2cJ4cd!D6f+4-$E@%iwHnJK|ZH!w@(L(_OG1#Ko)O>8uF)eXB7 zphr*3SxmGBHjfrHhj%q|j4L?zII>K`)>5bYB&^|rYs5rTBQTv z#+DLu*Bw!|0Zz!V#=`|xD*3iV&{&+pHm`oiT;k`!&36906PiA2G58yzhNo7+kVF)&V~J56f6j5iVqzF4FRkcg(mbmR^mD z{}rK;^4+3o`*4xUPH&S7#u^N;1|ofMKcbKs@M!D z8#OJ9>*os|53|Vi!}C6x)r?_(bA)o@G5u8i&^JEc06)pZNA~ke3rahtk)*2$ zM;usUVkc7Pnw7ua9VB-U2ISpAaB=&_Z*BK32X7SCI4>DpW3Qip*x~V?*{6$O!Ry^S z$aR&(-=hAOw0#HBXi~flUji`zTG1WkxatmK)YN(#A-Zt~!7K$4U3ZX>OqkhiB-KFd zwWtK4#6@Y|Keg+<@mxQ;WcnR@%~5#=89}@y@#>GgWCO@0pwMfjJIJq!*i+Wv&h5-$ z31~#@9fTG4pT-dRtH3McT7eDHf%s;MH}GB<8-d7=Y~l|%!UVYAHi*PA);Kam5>4`< zT3&aM?pIBVk-jiW7srM{-8;x;*mOhSV;D|7=R#fQg6f7ta)QmL-m=w#JlM#{2^VNH? zZYSGdUGmz3CDl?tR%@pxtq@H%NojDm-cCj1W)r>AvXDb>bI150JSd9=h8of)eWmFC zQuFi9%EM`0qlpggYv5VHlMyAD(N7J1?*z2~)d7A>2ZF&~fi$ZKKBia6D`%LBu-6AT zc~11LP9~VWstCb@Kj5rvfL?h_-swvf6ih;$4igpi(zm%q8bx^#r7`*MyS^nR?u_MA z*(Mf^@x(mz*Ck}@B~957T;(zA z18XdnDw~aYitL_LOXKXTWf}+Jni9Bd%A74w1Sd0v+7aaO1=lNS+lwh-1te1@bh`;K zZRDZs`thj(q$=Jv`t$~d8nL7^dMv=+thiv5S+9bc_rT5?(>k+R{(%dzS+ot)I3ly$ z{FSOra50vWkJG-z@8y(;4xZJ)06Hpian^&XnzW1FYj^mYSYFR_b;A)X<~sic|x6= zZHS+_TVX1a1cP)j*;H-z;g5Kz)5t#iT-#v` z#7hlv%quA3d9OFUDD((_O{NMNPq}@1-66UJ3=0g?UF4J6JZJir;Gh6&dzr^bvNnqR zRp{3>fVukK4Rttoxi&FAYL3@}QFt#&fj_1F%Z3M8(daSRJ9YT+#7@FU=Qb4G9uLbdTHzqMD2Ue5a8iihLB2o29W~}&Tk7EMWvM9v zI%^&ss3a~6X&H|tsaOlK_V*jB&km2L;2zux+XmRoHB}m)!PF#qxB3YR5o3rUrkF;`Hz<|^MJ!hR2`!EuTUZX;z2k0P01 z`3=3ow+0InnfWcDXgNFv`MrH}I=hELoCw2vHzp$+dB>g4efL+)9b9^i|NO3N}YIxT1WDi(Okj zP)BT>g=Eb;(ZY*iUi;&0P`Vm_Usxh5UGn+)f_V^+KuqpRIZ$Y1D7BZEIpt0t_ijd! z?gx|dRPys=x1s?RR_2T}jXXdRuW}?bsa&C6Xec1@fR*nxZ6@q`r=)tkRz;4vUqQc* zIz7ao?s0Qz=}I~Ihe!kE0a%+!yJ8>t+#rA@?c`GG{}SHuVd+AOu@Sxod$1i&Pf9zS z%q041ss}b?yOGJwjoG3kD-QU1p2w{5Gv4+y zRV%|}(&s#Sra?9<~yg{5H`!*;;Vxlg;n-#tBcnSk3zrX7VO|e z)(3A$B%-O{-hm7N@LN~zGO5SpL}il-{#@=3k`#7%(7$@3zC{H$GP#2!qh7NeMN)x_ zOL>8{HF)YBWYmaiR~H?a5J?2qh^cOMsP-C3feE<`V9f)#)uZ}<>*Nj;zofr|$Q*(S z1#1BpdO*dJ(H-PDr3={qhRobSE?xmWWAJJa6}usKOGy=;eLXYrPdiNiX-1e2v~Yc{ ze~~Iu0^K5V{k*^RtwcW%YTz-Nl@F61Z0Se`OiR7@o=u*zi>OW3tKu8mEJP>gnEJR_ zx!L&HiE{UgJO~LQzc>Wg*KkOC!57kuL#2}@%H>%5r}K5P2oh~IGMMEDKGS{5utGhO zl`kYoP}b+!Zjy@|CsNa%q0+Ta{O#^`Fq_f+1Z5F zt}hQB4&SVNG;>MZnasyBW*{ok*51|(@vSbDt?|+>mf}$_)E4m2z~E6ec2Mzo1x?5_ zNa$JH(rA}3Gx%IInYgEJJC>v`_*<)BvavQH(Lkp@ZX=Kv26T^O?h1$#&tCK8jF*46 z77&h3jur@Upbr89SQS(cijdn%3zwf`XYG~yY1YLEX347Jz*p63YBTGw)hZ_~QoZmu zwxF5NLZTcr33}HmP^b45 z*D{yq?<(vis(Bw9YG(b`n>uk&Q7otJ(M`S#IW4whuE$ce@SJU_4++cUeroO5Yq&Z{ z-=?Wc`?bm)YKQ)@r?&s~wpz5P*h`hinthf;Te69F5I=ISXaies-x(iM`XfzSck@D) z!c2?rEgu^Ja15@v``zHQP^Xsa?|yg*g|y@xW>EuEq&Q|yyFPFAEa9@rc`Gf+l-2L%EeBm4D?oenP*5f zn2`X)UIil4`wScdM`%O+!8)N5~+`$d>5%lDOGMikM|0G2mSe zx(a?;mP`ChF@|eb%l8P!dIK$EQgn^Fr?`B1NwZN-8VX6iA1_aQO2*uGw4>Tv$_0BI z#R2|yC~p7l(+N>IpC37XF0ZmE>Y)R5Pq?~^?;uWl9kfsxf}u4u&L8ASF)5|n8`2Sb z8WSD>=Asbi!B-s}Uju{HC0|&Ni=#=(YiIXb0Tp74=qwTJuZ1jBT0K903-w#_|j6v z=WuinpFL8ZIXJ9W_QYIs0!Z>q1Bz^+s`TwY&%dm4z?vHrKUP}BfWBnzo!8RLwB?S7 zq0}lwdu__%QT4k}8dLb?Nxp13i=K~kT852+gi@7{j!R0tCbEK_y*Ud@;u4KQiE9$z zJrlJRmQt5z%*VQ(ryF0!s#&ayPo}_=spTR55IPX@V;NHwIu}|8KWQgalsBucjxh}H zGqG+;J6xCN;MiL_kA6Ui1O%xSr3@@l953L zSC{}DtqQtS2J=*~sR~{&4Vo-ou$CD>6fft9i5xOgRafOLtPn~b6kn*o;kvn%QKHxh|0!LHiN`LWqX&$DySN;>+3icg`xNaS;)r$qT#34* zLyLQ}Sb0ERfgq7Cp)QY+)Po`4yNrW^Ldx6Tk}{MNTW2^QOPNVGPh6i_E-UjnlVY;E zM)b^de0*($3rqrTb_bc)yp10DQFu#Xf)37~3ff~&E5m_pApH)q7RU&8Ba{M{g}qcK z)I)a==yK?N&lG#@casY4W#)u(O$Q4L_ou%S_b07|ktCrcaiod@82STMw^Lcqm7~Gm&Teym$1AWyLFRGs$X{yxJmeZ4}4n;9lSx~2CfB6 zJ1+qJGu-`rDC3XSJ{fozC=>ICK(~?Bws1_iP3{tBcjbfa*<-^cjv7bD^4Y0ga{t== zx%mSVQkn&18SNKFzi|FneZ|{A%_VMh}9)J6U!je>WA zeAW{56}GWFLW5m;h|b#|F{*V{844qaGXJ|)YHiq%; za49k0^NuL;RW~s>q4G_3bBQZGpI@dv)}}GT3rY0Od;cTREbG@qM0?&NPkMSUFY^*Z zoyCBa_Tdyv=jorDqLIuQnU&11xF!>7YFgcoa^*RO40D+k*P^LaJ}`CzK@mYC>P-T! z*9dTX_Cxwwpk+FzA!dm)DD~~M_=p1$LQT;*rT?*Tt)5Pm+3$anpHir5 zn>I2a2^~1Ip#yv+W;?*aQO>OsSf}CH-~ehj8w>dM4y-f1o)qA4EIRDPO^_qID60?Y ze*0SJyZ%zVS|_-bA9YK>2t0@rOVy_2i#z8N(y3FY1ElGS1IvynHv1E06OF7JUbw>{ zcpKuM6%q9x^$>t;T9m6w-n?;7x8~%Ou)Kqy-NYMW<7#KEPo3FxPMb~Dt>St6DZ#{& z4vTNHf0U+7t-n*4DW}pj43@8JzFzd({^U;cKg$3`7&CN%@ z4q{Pm09dPYCif@{Z*thT5qsIVpyW6e8yG!fuV82jC5?0XRGWqQUhS`7{cmE`B=Y%9 z(Eg{XQ*;R{D)b!zfjo`mu}RhW<&8O#Rppk@f#bE45mij(VngsPK7K_TQ)MahX?BOa z8o!}iU?3c(N+8e2cExXVm?c3lb*l1d4#WAY(Z+{O^zyV3=+Mn>w$q;4ggfVV!!TDTLd9fZ~W zw=SppQZ#N?Mk^BJZva9+5&nd;6#2SzBm2r40QXcg)PCdUZK7*Ky2(7;j=v(oZvHUA zMm!~8Y#n$9RBO5Af6lJ8>ireRv8Zb9^yXV}qjQ$eB2mXfd8>C{6GptM72gu~l`!gE z|MY@xt{dB-UkPNnaPpMyZyx>{8c+~u)rhhXJV0epDF3{knV=zhp5Pp0%NcfjRqP6U z0n%8Lj&i;I>b)%2GxOOU0G|cjZq@It`ZH|2eiHN9YYVHkq|sL7#{O7S_SA2a7u8z1 zSB#CleJMoDkKK0w54t;u9X4N0eN9S0l5}6j2BC2bd6rjSNx5yuFiYOo9Vy)Tnc`86 zd_~QIX6sIm(doA0LbTOo=ARj~I-i;4fH2on;rLAjA2BBS!!g~&e1)eGb(BZxdRHa4 z=ShK+CK%$7Ybr`)aOR+UW_*)bj|*4e!`2zsNH>CO=PmaZ_L|)!Y#LEdu1_t~sy2R| zV=xhM{T!$#g#!+H{!7an2K*!R!7k{lG4KSQ1Xf%|xKGKoBs}|y;F<>(T?UYJ5c#uA z@^%KTBD-T->1A!K>(qOE=C5-PkTq;K)TycBVroz(82;IQ)1PS2yPZaKnG$}&^Izos zv!&Nxy4IgSav@^7K8;+CRn^}6hMbwPDawTtuJ$PcqMlXB8JxaV2`i^hMs2+&YRC=Qte5(*; zt$?ep1*Yci;=TOpA(T6F;K+y6)>hlz5Z|+6N0BaD-&7YD8A?zhY+({E_>NGvfC@j; zGO0)hyrGw``8YgDxb9~;lU8JU)EfQg9QqOZy!QEW9P+h&c3W>CN?blky;9aTpL`nn z&{`{BLwLab*Qb2+{ynn(-_J`+t%17sV1x~_Fhi0J2=#*e)4)zS+U%a!a7{CUL~P_ zdCQ8an9VzzMOhChQBvv$qT+sR`8Ygn+x+cvRKeSOQVW{2tKw}v#9^!Ea+2w^SN1y76T5l!EotZ^W*sl43d* z%j%?LqMWT4o6MA0Nyb)Gg`uWrki{dAl04=InxlV0g# z(o7OmHE13n#1zFUvJmtUHAZEx42vfJC{fnp-PRKV2!s`)(aUT0woLR()xXq!#Amy* ziLUV3ib=f@J_t=dJfsNV>=MpAT^{&US;%XQ|LtI-W!gc_;JL@+{{CrYtZ#HfSZ-hA zmNCgXW*i>1IQvY=J8$7>7BbI`7HJfcE9+={b>VT}9=(#6QVZeFCu?dJe10WiM`fj< zP#T4m2lSC)%Mb9nmdw~Hl(?sLnf@rz3Hli|y`}D|z@osSQ8y#xr{I!A&)|0+eB^$h z*mFxcU6=llca%qS!hKYsSEc?6cYeRz15?n6rs4fgB}3K-ff8XrV$DJ`@><-6x#xvw zPm-edqj95vg{zNg0YetfJDZt1`u~4FED9?~4UW%xZWKTj+kD;C? z+dvs@t7W?1%-mBhI34w5yII4QMW>zN$9YWjV`P_1$!ApA&bO1wWjld16TD6fc0Z;A z87AwDUfTs29CZcK)m|~=LeMf}6Gm)b>|iIHLHYcExfou1i_vTaBitxI>Ki%24Ft#X zgrXGC6)Hk&gN!tjUNPxgqQY(}azh(oK-2`9pzJ5mb0a=CxI!{61QRzA?({H<$jcYQW>xE*-Un}=_e%2ooZ96O)V*GVW&w*+q-SUBI$f!g(XtrGGT zIXZYemNFr{=AQBhK-HO3noIk985?fD@4?P7*JrSYnZOAYhL(e zC-`;hqdh0ROiO3z*}6~s!ovJJ1o{Ah+4Job@R6;tUp6MFtixfw26<=t>O5GDF=A|f)Uy=PT^BKZu7h9aB;C0{eTA%gNO6S{qqZO z{ENK`bT{3i`P0w%tNKB+*FrWc>cNM?yeo!AaK!F8{&N#d`#_>4f3D^r0DDKH-wnF{ z)7w|I&YQ%0*&LdZYu0%-w5f8m-*3 zVtVj5KE+5qaXvVpT;DJ`7_*}2O$$@*5l@S*49J??QUgNmcJdv;<~F?FG+z-trZ{ER zp=sSlBVT`TDPBQqkC99+O+8)RIsK=)W>8{A^HcYdnRg+NWS~>aT2ZFzl^&n-$+kB< z(`zHoHB{z!D{oo$Ow5{F^g8YK67p9KK9O|Rp8gat2-ymMGWBcHfx0I0+0^K1Rsz#UDsSzoZ| zs_-pJa0Qj%;0}V}T~05=I20Cr1TAKos#P;q*YNT*U}}5`f43p#)T3c@8CZIy>CJt) zvfxB8zRrJ>+1Vhf=;l!9t=iXU5F5O|U*$5{*dhr^HXOTYpbS z3)XMiItP2*?jBg!jfgMm!`{Mt(}D2LdUNRR?W4NM1@u6q#g{wc@CVooP= zx1(W*XLC~vXM254E+sFoAx4z_Y}b5d>$=rIV`m$HZD{AK36BvG2~bqYSW)CJ_PI$1 zdxoqny(#x52&-+@Q+N#Lr|G&_qQLoqbN!atYas{B+c5`*5E}=aw?JP0%~`CEnHgm~ z6ke!U2YVbA!C|689((qwc52cIYbm4#vcqw$zVW^6k66 z#DcrMn7-DmfDdZbIv6?nD;5nFe77Ud9rro=NPhX4U{G~Z!_dwrrxv`YFtuO)*-$Ia zVSPp22ZrH@8PE6gs2+W)P^{6hEC-yR(3*q!65 zOBa*V3cVC^A1AkU8FVXHD)aOm3^j`pNoS31_(Ar2DP=3%x>ggsMa!^q|>F%BX zVxcYVL%&$jmg;T$hAa4VA%C6l>c&u4utch+@el^yC6XmJIVw3gzZpbW-v-*W(&{JW z3jK|x8*t+}NxhlrhJwWP6ZE{f~&E**#E5*BXMcW&3XiQ zwk;hOhT9m#IB-JExZ!t@9goGmWv`kmMbiTKp##gz8ua{tUeF>jY)YK&nzA9hnm60U zgNk@jrI|W*r4Q#|F!9ytFF>dF*Wq;&r7&>1C${G8pCWFa+`8L2ESki;!77YR9k}U`b2uSZlM5K3+0DqFfLI|OS4pFLfAwVS51OcTc0wF*M_c?dmGsZpd_ucz`|9t;3fWbr7TF+YZ zH|PA#WkePZs4gz80007hZ1$9mi}n zlC&$azL6@OYxBf0vav(F^i$ufiIij+`=l1dq&n@A+hCmBm&|mIBQ@@%>~s1jY*%lj4jg8RI;>0QfdvOdQc*UU;yId zh82Q&eXKlwQ7epTrUgDK%nQUabLeD#!W^68n1S==P*5Hgo4 zTORY&p^L%UnSKTHV2oVoXpMY{65>Hmpk$_tl+Pj0Xd7IOmQjCj)!9|SD)&hHPN8`$ zug4ycJ5)FO?DqzlZ0!d1cr%PWxx%>J_*S_Ppz(~GXGu*&_1cl+w@Mpr1-{$1ST)Vq z??1a{=3Xp`IL2qZ6erZ@M+S!FXN}e8X4pO7c`(&x7#%~2;W8e_RJkY2k(@R|yBa7` zot2pP>xT+BwdwaoaQ|P$7Y5RtuV9k$kV$1U2%&4B5TS6fQ0}7aW!F-XmDkAlUXwgi zpsCW9n>Ri~`#t;UecE}niNH@cvP{;^>Rq93U8IPa0_H)0P`rSdjC@N|rFf)Ga}hsp zLU+GaDpJ5AcgdU{i;6_Syo2T>vmsmgk-ic=ZJx$POp8*|+CH_5uO_JF0t>rXXt@@6 zX{ML1X;w%HJkzBTkrom2QNI%IIr9)eIc7)R-7(0HvU*r9krecaBAJMkHsLs%f=nFx zVU>ObfeYrXb|@&3G0v7z9&k3?K!XBV+|yAZsI>2|oc-Z$=Hi{oL3gpUC5DwxQKW!BBJJ!O z11~N(-z_y5hYY9EBRYTb3mrEVRlp zj&{X1-rgFy@qI#43SnvqkAjphzezUd)>f=J9BtjQ!@hj40PoR=c`Rm)milVh_S3A; z+^d~oz$&B>joQZ`{vk0!E5OK%2{j^#>2lgX{Aklw?83Znkn{9(U#O?2{QKs*F~Y;T zTqjc*-==Nx^pTOx`k3I|S#(g<^&&~G`1G5+k}##u%fDX7FX`Ho;NH3zyYrbg8ZwV1 zj4OjiFAD|ah5P8O3)sMnwM}))gfA!O2!*}ZQ|wt<;{4lH zQwlG;wT%$U46y6g6l}rwy_By%zVAJMaYACsUv5j`ks5cx*QzW2TCVwBEy$b_PF+(c zL)z04GVIDvSN_-I0s(u``B!h>Of)gS>}}b=h~=@7wV+RD3y>?U<8Hnee*A99iCx6w ze3pc)8%;1qB=&rVhBZ*|le}cxno>!HC-sRK@~B&4HbT~FyLAgz`K1>wNXV#{yU7wN zJ|L62me7xavu;_U1kBogg6uzUO3AfR+Apakq9RxPissW-zMKuA9K%L|;YTG}?Zagm zZiEdZ7)Ign$yk4;e1yoAQSXl~5}lSIorFVX4A612w8T39^u8c1K65qxaZBda4oV}dwVRV|=IEA+%Ve~$A4xbz@e9+Zv z$zhVuz7+=Q&s-07c&3LKs~dF^|&`k(sQ!d9$%RoUCP5UmVj!c=Ty5=O=$hrVcDA2fJmT*ykg!2gZ z>I_3^+8Y_eHq|RtWnR#YUz98Ce2%?oF2D%r>b95xx_olSeQsCYa*b($utp~p!{$1l zHPpTp)$?2#^p5oK_~`ws^_!3Cvqn0Rj~1y>bY{7U^i9Le=Bk8#vvF~4;LJm$>ud$m zzP=qL=VOJoOtVMYrWuVFNGMt<^j7wrH4zl(J&&%)a#366#pv&M<$b1 zn=1Z~`ySw*udEN(;#&ye?OYZKQ!7%dnWmGwGZW@@uB4GgLLGkZkOhl8@IR7|?FH&q zH++|{Gx+Wzx#P+$ga?W>b;4H9m82sNd?t?yFzp-0XUNcm1@iujHra9p)ciHVVmRP-l^J>{S9&xySjSdeb~25hZj%lvDo{mzGO8f zcOQkgVc~uBgpol(6!SnbNXZVdcO2kHrkUM30&R@9lu;o#-W*S575ZY#fiiD9om1nG zW62(!TU7A+JsobEc?yzNoE&bdATQh}ge-U$ss&jX-ECyewu|-=PuOZ!H#VuObhqQb zb$(;@8PPhpl0>gl#k#%i2H{hb)tXTD$_pfH&2FE5c&7+S-h6D% z#kd_%^`4sheAguKJV_gH^h|v9;ndr?NT4gw6VujlJo*vBl@K*mgPvw&TGGslI{p}s z9SED=UEsi!6^2V>ca8>1Ciw+G@_&YhvS0M)6CPGO(sAIVr`F2&|Bm#(GBywT9-r^S zR`91s`HpcQ#GNjte+SL7fzt$fA-=7R;UDeq{}JBO)dg@vfQ*F0oNW2r>AkCp_7i(z zT)uhXL(ii8iQjP1F^b(NOKST@xyMahsZT>cj~{omi+pi0^DNMg*g~k4f~6D}bTNmd z-<}b#7zRyl>1coN?Fi{WDwG`g#ZJ(p_LfY~3};tiD}D+=`{WhPtMi?k#=^ZzGEB$n zhZRin<;{Q#3InA19t`GF%Le{eAj^F3DBYX_V_pJS>{_(5UHo{kP~}S25d#U}4gvpJ zhgcF&Z;M9zMA;y>@;~~J{L507@TsEmRMvYBw7tp2Of;k~5yFny#duVy^oO;(><+tr z<)|8@5l+}7nFn~iJ@Zb`*DuCUTPz06Fo5Sm+`J-f|DT96)Ir@b?K|X`+OjlmYa+rO z1;TlgriL+<=xt&6c%T5hE(n}~Y@y57cKyhd9j3oi<7cLzDF#u?gqW1DN zp?bc&o{v_mA09WJGD$l1n(UYD{ zCpPh^DV}M0cjgO=4b~N1wT7mxE-g=$puIX|$2Rcw2)7a=q?Z`=^*7kGkGpe`CIGoLVIV1aItB41UXRsj&uUuPpIpE?TaU^Ec2U%)Pge$U0BOpk8)J!K=79 z8NTLI+S25=2%}2p&mNw~%}b*^4B%6dK#FB^@>fSvMr5lC#hCXjEN| zVIJq2CCWx>B16fH?vHu)gJ`n9JDCi6`;oj7(bIFjRX~gS!QN5TK{WzYnSW=(78ThM zX4LuSGT#L*miUhjPMYKkZJW??BTppbg&}Rj@ktST3&EqAHS#w;8Q&NN56d@Na+;ZG zHyG%Z4Qw#W+3c6Y{sv9q;s^DQ30T(N30uG2?AVJ=@^ZVx{^(vlv*VbPD~Tn<;8;A$ z;BW>IcPHevl@P!ZjEkvGeeJrYR^3$vU+OLoD-xql72`bJ%Y9aQHyp)BYWl?=Jhzd1 z=}h>dlw2%&{>omt6DaIshY@=#!^;fG_H+27*a{|6Y`vm=q*><_Li&=F3~c zD-#fl8(WY2f)iNcOX>oD-qQGv3pI~dj|EVDxw>{IY^ZO@1KXHZY~c_xYk7QuL(JS8 z{BBMxc3`Tl(hGK)CCqR-`jg?V>wj*o5ws6jszwm^FGPkf>$ANYL4|&dG=8O6Hymm zF19NgPe7zG2rbb!q$yv+wFe!wH*R=j_#j z;CMe55u{t@XDg)DWh-7s>+&AktR-XF*M&v{R`k#)!)z%X@0*LrUj=r(4r*r~cG?Zk zOIP|EXI1Y~vp`c=p+Kk2lY_M}lt35huGZ+=<|_O8vZY^n;NES``t3R(kV%I2JxLnc zzX_qPb}s7$OMGsPe*S{;5vH*<$G&wvk@Om z@~rA>zLhnvh3t%sveQg;wa4e3Yw(?a!Dl|-jz||%Pgzb?s8+W=xHjA`Tp7`IRwJa( zYsYw|Hjoju`U!ss3hZJ~>RoEz30qOSIrFZ9(g|DfTQwFh1pp5I#xl~i1y9&?>afh~ zUBFJT$O6?S@SlyBO-_S67kKUSXYjDlj zuuhqZ{5?#?E)7)=Wbo9Okhc@A7|Ml6+oj(NOzv^HYGh|{#=+?HrTZvwP)6j9A+_)U zd1Iw?WuAg3g#=SKg}kV;hNzc}h0m5|N+kurfRd@P;;DFVREeo}c%H98(Q3>hlpz(k zLBlX4DQZb_2o2h)&rzP8{%4JdIIfF69Rk$lhWF(^=l>vXGS$iq$*L(slwbsMno&yA0R%tA1198sUoMyaQj7naseI7y*;T zQ$AD3^DhnJHHOtYv8|>(uFkKDaar*EPJ3n}q<3*CwUex*$wL#!Q~pdqDtbC%YZE7b ze#O}4hhQx)`B;@@RY+^yHG4>glGls>al-Z^mx%xe>TH)7D{623gsuC^lH$QZCO~!t zEgf&UgjqK-eg{S?!JnK1HC131(MaQ>GZ-EevCD9Lj}ZUH_6MJ+xp_1hJ$l|(2g-m@ zm+Xn94UN>jhHOeUEuqnuX1+;m@u}>BlcA&j!<CRL2fiItto)zn~QUZTtUS-IW!PA{A8FSs_LdZVQ zwQovOjfovJYl@z;C4E+eXkm2kT&J68wGL)N1_yxuoW4{4IeW9wV$3_Ci?CzRhn7*y z_~TzjxKY=P|85H2u%y5ha#!afqXg)&bDS?clJYH5agh5C--8OT>xfZVB?{%L#i$>d z`Dt4BX=h)CE%}GOGmQMOFi@LcXl0mi;o`og>-)&7%_nS^Oj7ZyTc)6S2wu!`UteS0 zJz=}t_X4^)6%ImX`#@HJcA(`k2-Q6@E}2yL3TOy>*DvN6hgW&x@g1g_RR@&WiSpOSdzP)KXXgZdd zunIK3+A{%p&;L1$EB|X4pTLMQUH|3t#enuWE@uV^tM1{KR$1r%_4~6MbMXDg#h)j4=~!!3EOEgPT*jD?0)(n4|;$mX&2^%j%*rkKIxsarNyMp0PHr-022m==wnfGa?4QW+B ziN!pqy2-Gs24FiuJ&vf7^2MWWa<^PzcczI%TC@2tGY0(EDO*}yNQa66k25lWQ!HWD z=)5e$`^d_h#l~=Po6rQ#3Z0fY~^)%9*2-pqSO+G=Mu)A|y? zH)r#LtWBwX)>uX%;hkT3BlxMZAK1$mI%tvIo znOmQio8T+;>q}@tmqj5$LSW3sr^8<6H7Xe+)ze?P3~Z3nwq^BIfs+Xq5?CxPI!zl* zXI*0rJuBuyD6wu6>kFYvs@ULncVoyHoo8d1aK+~*Z2ps{J8n&c6?3`V^?Vk#u2H|9 zPq;JV^LPETT-6|0y;v>7q;OG5)7cz$yWBEej9X8k)1dN(qZy<^ZscLck!Chr!+_hT zlqXUH5Vo`mf(p<=#fOwh1-BCirLC|pyNE-z@2UXGp#+IIq@qXSVRJI6qSaB?m4&UyHKnoE=b=snWqss6_4A=p`3 zC+2G-lXdG29Ld>|&8gxuUSbelV?+DaPbiV8wqO*Pvp)@AH|N*6(najG?lulBe>rpu z@^d6BWZT2E1)dsk)0r@Yw(D-Ok(r9;l;FI0k=I8zb(VS5eX~z*a$5y2z#1*$9S3mH z9I&$_Xk2)4>O$>mLRF<)nDv;8TQhjH#izMb>;tq--p#B9&95~Udo?$~xJhf(y~HA6 z0yg{sp6J)gb%gAF{oab?USW;VbU+$+5Uz^cBdf(>hZ7!lPGJ%-iSGq0pQm56Zf;N4 zsg(9<;#a9^uWY&wA#YGs$}La;Oe$W?uBYKoV1*zUu#A6H~2mKt6bPT85 zd=QoV&Go7(W#EsGwf;vN*wpICA_t@8>>asD@=x-uZ>g@Z*H6EFG2z!$2O=p;+BR|u z*KZd_nc1fPIP&qSmuZ;EQ0sas@>~9+P?_<*N4Fdhw|Rr#r0Q@B7I@fuazY|p)pZ`I z)Cfcxt4Fmp26acv6!luaHo70vfv5Irp}cP}OjW5V>G2mARt zT&8{tB&j{Kq%Wv?whUQc2W<3Od{5K<1)^}PjPsr{&Y1Dq?>R0incuO(^QJpGw)rXV zZ;{Uj=W16^`;|N`VGmZdTpDV}EhzPU9+=&g@D*>Cm+F^33UP@b=M7%2%OuNc5Vj58 zyGI;3#14i=swuDWnW9WQL&f^w%lGp-36?m1ZpRhU%5vE)A78)gfUXAX@o}ll!%>YA z%hYnSVvbc~*THGqG;@Dr2Y1J*u$8Q*+4hz}<`-wnq=OJvaphx!ksfCCKDyY;9v7lv zz6RFE+4^twe;Pm@&6Y%kVn)bGMt|-Cr{H|Bm+O_N&f@qe!)|mT$B}e+57m70GWAqX z$_K|a5|T-`TIYBNFrB9s%!=&@yX}VN;|~U()TU`3W01`)^-b+Cs9>+GLbk-)rJCuG ziVEY`Jye!^Lbii$oHx#7W|7Jo=UE?t_#mPN*gxu8lqd{Af1}(E{zw%x6Bbw`3#BK- zpPK;h43 zmoCr#A*9AeoAU5hR;}Q#BdK|dVXARJV_k4&9agHs3|W@lOl)f|aCR4I4Sc?Eg(*)q zCxLv@S>P`Prf`KX=)Ga_4KFTmt`%LUmSLkS?e&ff5zr#t7E~f+Gt{Wo*TLVvQox5u z4>7sJyz)NUtE%eyuTV4jLFETa&x7lIM=zTz3a)6 zY+y`V`A+5HMW``8FOFUsCYy$mxzfy&{|%+YE)t}iC{X30{vuz`Ta~;$_1bC zuQ1cnHKRb@Zknm&w^cXM$=QXN#HA{c?lJph`{pA->q4m!j3T<+Ebm3RfSh4n)&VbY zKUBSU@a@f+*zHe`)z7}Xg_{?ux}Z6RGOK)V;LAN|R#Gk$l2uJZ3+%Mm@K`Me<~VAp zJ_0eAGR=^cgztedB|vPww1+)h2ipO5g}Io5dLVBFg2QqV5LW&P!Bb5E)Pc)nfOCGLFU8v0PyD3g8E&dT{;A^>26?_W1x}0_nHYM6jjg496z4uLYKUjHIF|s)eiDzhdX!>wB+8I%$u}Zt{Dt zM8-Z2oG#2KJX4c@(6p354nTBfDK0Yln`mdI>ZlKggLsrV%@jF$!o0_Op9b}U{xd!9 zgc7KS$TR<>G+xN&4O^LXEe`h1cFn5BNmtSEDL(c4Zn5-8Tj=Fnay7i_3f8uYL$>Jb z1;;XeUPqbwYC!{0n?j9fY0FIPUp;2r?=1_wj=dk>NGMXU@2B8O82cb*vLb9Rr_iHm z3GYEb{`>5j>n(iCI8$qJuoVC}V^CdrDprFLUW(yPCSn|KnHrVz&fbog5X~kuWF`7V1 znKj7RH}&ye?6aT&i5uq)$)nR+6x<|oKarC^80fr4mwnROGKn}XC#*VL)=)U1 zUdrDrF56@vK*DAK@UkSZcV;h}m0j*E6&1y(hmLY*JuWe*eHbWcmRhY|x(dtNyUj6; zJSUwtiJQ5g&6K7=<4)K-7hbjmV9MZ$-BF{dyYjN+$_7&V$F_;Ug$K8NbCpQnj4q@h zduqnR(Ck~7KYslcx!ch2d~Pc}T;}i5TX5!@K%RQRO)jonzz&m+l7TiOjcn;}JS;;j zG3us7tZIcx-Yl+pRjS7?bK3lR?;|CdCLf01Q)TWqUrTZ)(gGK3Dt;VkKPWd)P`Rca z1LG2#>&lMajl6LM*_S~Id%eLLIAP-}BW~TJ^E=L?7R5MyBDfu)orR&Fl-Dy2sGo9M zM(QXg!`yv9USS%eDS_xXzAaueCFF0rMoD_gv$$k_`~)c?F5+v|`L%~4WuWZ9;#W=J zQ?hSvf%~bR&ykgWZ<$+{Mg5j+Ty<49?honZNtB@V37f6#sIY}`+5;DasA$_Vdk{-j zsz@O?=b}uNBzIbQ{4kn;-n_wSdcG<mywIH?`dV?Yk=-D4cdQBY$6njGw znK!0{oEsZebBU&qNs6~E_OWO4cu{X>bjmrE(Rx=WlZy8Gkhwc8$G_WjGyHCzM~VbQ zCU!sebj97D#5bm?TO&;kc;*m}=s`|hwCyE~CNpbQ=zPE}OC110cD7X%Hf!Mcqt}i4 z70f>>`1mWD-KJ0zCsJW;kKnHjDY`n2)EG|?D=L+6=~T*p>8i!ikfy)AL8g^OC+T;x z`?YBWm)(tAfz3MW$1?0lFf{Z z9UHF~fZSjMkHLA8eeGQ{Wk` zi`kxe^qVD#G-*j8S(Qj6joiwr7;xR5oEQ9Mn9I%ATyZmhrAV~&PWPtCbpN`>BoGeQ zsh8C!|9ar>Xpzx|?&51+9!_|Ts0JGw6Ru|wwd~NF?)B1^$ulOeux071^hm}!U#5zX zZb{z<_B~;zkEWeIM2>5%2{#kSp2_x)Zr@k-seUDxxp{82XE$4_=_!N#@eOS~ALA@3 zvx4TU=bFj~j2{MPc}U1>%(OK}N&vSO*E*(*BNd)}G!_WUqt{~2zoMsHmQZvTXc{(F zN_BRvAK#B`Uc!+9HpYv-yzJ3Gt2LZ zl^QGQC=!GSsFdHd$399oHIt=}WA43jB=rQScDnJ%QbK*W&Q};`7IKQCfo9u6p{-OI zgG5|V{L*GKPx;71d&ZQcw{=k#Tv#wQi^_1$zf@zE$(3+|E{LpeUjT4vSg^q76H zqKgb$$FCg)x|vZ?CWNQQxh%-g7#G8ni%0J&o_^2RzvB<)wbVI7J{%BHQY4z^aiP&7~}F%vMj7lie;li*sAA zcZU0#=Ugryz4@`L=oWH8k#lES9XuMvAHUy&V?EzKBGy~<*p??3Ow?Gt4)-$SDc38q zMD_Emo4mI1nVkflilyY<8a>WGg1|cmR*WR_G`PBfI0F@L93U)sk3$$?{q{zE1HrAm zAs*2jAYQaFDF4NN@sejEh`HiQHtX=drV^l&HrUwIAkBV9wYnytblo`HNXP55Zn4aZ zfB?tG+A71Vf=8V3;|2LyPQ$mJrDXe9emA0g&(;jPVSCBBDx}b9z&Gc2v7~XNS&B;X{2 z5|7V5hEfwju4{k`%DUz$2z))m#Z>$Z{3{nJ!Q{9c_>s5{Ww<4eb$AO(Pb0fsWsyg)Xquv2XJY7+ST zP7&A%n-mu#zQ6+nOgvC39^}5jKqWy%Iue9TVwE6U8$ktsu}EHw#@(dHdQl_B&`SYv z&u<`;R+ozZ@sC6n|04dttsFctiAx}5))q+Z>k+-pu=&V{=yJ5{<$94(v96}00CF}x z9=xMHV&|rHd;9*RK2LYitN<~BLYNRe45(K zY~EDd%Hh6jEKm46g^?WzdoMuZDcy_OzJF#3C5$P zDw+mVZuo?)tV)jBYAWU%sfnFTvPNT-K|!{c`TGf5t&` zJM0k5p1)QMeA&4HI!&HMgUaYHWi+N65lPujj6Fr;@4*T=`&PqiwFY;`w*$5icLFPY zt}WM6w-Z#>|CIA%Dq?!|NLx4DfSUd0nX= z^85o9lnDS$fUuVMrI-g?m(c0`9=srH2K%Gs5I>{IuC{=yOXv*@JDUEr+@q~ zPV8EdZFLwfESZH3iU*=h1APPk@t_6Sb$1_6<5r)h#Ng@6<~|=ajbYPo&8f>4W)|OB z1Hku)tm_A)m9)xA4FLKw}*oYVJ?b*N1#P!sXT^9^HG~p_anP*P<9V39bV6Kixgz z;^E}k>NgJ*>uiul6DDhKoRx~TkX&g-eYtZ!zbp@{_y?3W%8HaUVujQ_T*0egg!a}$ zTYM^B9PdSKsaoZBE=8|L-`Hwl5Y{r%b-;|HFO%R#aCBq-)Ct>E4#U%xWk&_yZ91eE zSW6T|%m*@^BaV})VuxRsfmJ8|=WSX`Vx+Sa$~yhZ0^kXDn~P2*$eTD4@)U7Cc){+2 z7`|WQ4)=;Z6J<4C@w@|V*>gME2ATb}vS0!AG-7JOU5bS#k5-U-ry`4HSpQT~$UR}p zZ6=N$5}!Qw5o7BRY)pm-;~559LsgAm!12Mpd~|B1Gt&;xowU@}z%o^gdWD0XI# z=AnQlUofDU0HL8Gref(Giqo5hM_Rns2Ya58S&!~3{oZ@uQmGSPs*?uKV~GX1hGqq| z5=2jLGeKmItNl$J4Vv{uuPW zkQx;?FF6Hznr2-B+l&Mi2W>;r*4&M8G;_Rsn2dMTUKks$`&mxZl8wt z^+C^fT*Yo`d_d^5GP+PdG@}_Tt~f`0#@Q~dKtX3m8Q;yt=tA#Z81E>#x$2P}fxjEH zQji{g;?o!$z-r^Q3Z&J^cRpm?L5GH@4FbR%EH~fjSqDCNS5W(Pj_h=E2-G9MH`Hmr zp}pnMd%tAo>_djbJCN)fO=fB-#szNGJ3#Ov_J*e|n170T6k-!0I9!_I=H4V1?okr^ zhihdsfA=he0}_uBCX^%^%e$)yvBV{(Abfk|*2Jw_gVouxy+!8}6p%8pV;0#yuYuAZ zy_WvXMt66-rdRC-5a#wI#5j)3x3Ai2k>rU<76(%==cnRLno93xCF?5V^Ytg7 z9GGh{Az^W@DJXVe%;3R`AwY8DZQz!4~Yl{c9)s!b!u9kX# z9@)A)xsaP{;!?zQg)mb7U2v4%%PklmC_Su$-D^AJ%%y2B(?OiBs4v$nz;wMuD!6)~ ze5?GSq;;}kTo^3Q_oJ7naFdI{cK4y28fPv`eL`(!`1kjrQhfBhE|o{XD5w02LDI_% z#ke5M!FDp!@J&$hg+7&mn)(ptgF1xjWg^xf=laELNBicHJd`8vwP9(C6wBq7WpBsL zV(+(Z@Dj1-NWFNaA%f~K&rjw)u~bn*zn+uqJr{LG-4mZ|AH_h7+@8UaeXHt0;9FyVND=nBR_U zyzo@ZW8=ePygN)RwogZB>@4%Td5Jw3>laG%bfO7%G)&2%XpX+~5>#x)X8m1q{;{f7 zYk2)Q;^MrxYHh!-TcvkY))a}Je;jJ_7)JFll9xlVtF1;#lji>pY}ii^o8vl&7Q(Do zO7Jv{_xH3ei~w^4yOqNh5Mq_Rq8w{=?E{zPRF0#2o;gAa6EGitSiwXB>S;8U0~}x%zxjbB;gZviVC~Eg zh);=rNPb4&@wH)QH6p6E=1pii@I-nz!?>|!Vh~uI?wS|x1d&NxB1MqU#S*(MyT9+r>LNWf43A8bymyRjR$9W8Wvm?6y;>to z5m3`bkRs?r_@@qsfcVyP$>+O0Q+$0yr6xH0{I}T_jB)IsYJxJms85!(NfA$pO~@~R zkQY~B;9OR7be;LzBo3oyH_;(BI~)_x@D~VIRx=)im1{qXc)N5Px*6Fc7vQaCcR2}d z6YXbeXkPp}Q9@uG#r02GBfDNQ;^8Pz$K1;Ad-?|VItGO2lbyY&$q?tp3x@lG&ehXl zcM8w*l;b@TpXW+K0-|aQmiH%b{C$u?F^vB}!}f7qU^vG3cE^gouxD`4jOYDfosQ@q zvDa`3(3fJT1H@gRHtEu(b!tW&4zq2+2N|Bhp;uG_uYc(%wNuqkvGX~oI%N%PoUqL) zE40jP9c@J_KO3b6^v_5Q)kgyz9nni9;%%KOj*xm8yDr0xj%(z`Vp zcg4cQ_*FxGcuK1iKBq-_259r!i{DzH&h0gj)Jri>Fq0eB#5O^ir*%&cSZkcHdB1U| z4kx@Ub09H1mn1t|O-!TBslD%!3qSLi&m1$7wC{$OGuEQReO;ohnnZaMU1AjIzW$mc zbk=+{t-N*4+SW+cOmoJCcX>O^pLDEReY2Hd*HI-8ewl32KnH$J=9DlZXk35(YX$13 zbb+NxsvpdRW|ye4aCr?@&)?O&_cXIR>Qm8n&mrIYXo{(#19ka0yCdbPwj}Oy7)HxJ z;kWl*`!CBmLUWAQCS8A;kPHs@oN%=bc!{SX{nPV#)69z`#rP_hz-W^go5@`W^Xwut z8G5nUO3g6>=qpaX@-tS0W|-8b2rPkd1pk;)RM<`9{#+CrL9~OA$k*OR~@qP4S7x z<5Q-MGjp~Xc{A=ab`-H5y^F{j%u}=rbq-#6!^iJ{CfY*RftAJN?(pxX27Lx`-g&*K zh%3G=ecA84NiU{v#}6H*quky6d0Si-RQ8T%#rZo{;MxgK8cI(O(# zI^3-h2QI}*8}AtC{Km+e?H(2~iC)4cybKH(b-OUZ{pXbH%?X7wALiOf9y_z7nYo$K zvm`lS11GCt8Kr^3Zq4UMQ;9pVj@WywF%RbYRxUurx?slFwsTgWx5X?iT!EL^jY@=?_gvIi1C5`|k6rN>Wyc5c z88BAQ+fi1 z7*IVV9rowVku)X)FdljT_f;2Ws*nelp=lQ!VG5wA?N)}v z!51{#YgByPFpihIIxkv=lr@)F*7@eR4!269jY8$0v_M3Jfy9L?0f?Ar?95M5#pe1t zY7aJ1ve{|-d4%I}5%T7{@G=6WEyEEyorfTWPZa~@_ND<(CPiKTu_uu(e@DNsd2d4v zs)j3U2IF&&*XQZ@n<*^~K-I{hI2<JYn1LO+8_oyr_N9w!moGKH^=3?!6xzwWea3 z1!}#((anW_fI8PUt)jUG`s*f0vu`LvPT0nk2#uheb)${Q`lOU`td&=^>T0we&&I=s z)VufTIS?s!Z-EJ#ZX4b(M`CJRMMF>6ZY}H!gU=5Zoqw>oS9}z|z$)5>5h<|>-Z1Vz z8jqWprtg7Hv2tZccpoLEc~YnQ<>>`?YGbnVKwV6G`gFJcr32{gpmPs$u%kbYe}|GE zyQ+a^%ZG!C1yfj1wL(MJ{5@WFfgg=$ZBYj;w{+|mgx2Pwqj2Re9xANbkLoficRmyX ziW4=^2Y8RJAZjk_+LXIQCYc%urJt}J&gnnw2GF>Vk6AS$<T#I>p!scd}yBQi?oI)ceiLI~b zzqMxdqq9%go<2F~mbbeq;tPUqkwQW`2m8h(6uohbQZD}1*q(9}#YljjXZ~J0v7n15 z>cMBaa`5hTwRzrmbIqoTM`x~nmwX)QWD@vEp-m(WU*zd}8e6J!mC8*4QYFt-2*h_o zNlil^$|68+WT1?HE#<7a=9ecZrz1m>3>fj?b3XOM5A#A}C@QEg75i!E@sfU5Q`ODz z>enDsQCIyIqg_-P+xGKuY%MKZseqysulO-U6AA|IRFtCiGGAjaA1N~pllK=Fx{QdO zLYTjsS@Yf8PNN(%n>1ki<-3Ei)i-@jy-x*S{YuCFdVU2fcPNRySZ%GYAG`(2uewSv z-rQ9WsX8x!$J&+`vbH%CB!(pPrU9>GhaLCZB0C?}74#vxIgtzFdCuN%DU6h3gCV8u zO}sm+-!AFnl4QbRl~B6XHZkL(o4Zr-n(Lfn{6+go>tf|ZTru!tI5s`@5f9tL-~K$q zhUlZ+(4A~ErFC^hT}-54tUhZNp{V?n>^4!UG5`7_jGeJL!$ItHX-l*K&Llb?QnWTE zS>u!#UslI|b8DE~_MAO)B02DSPw6ap*~cjxUo8E>U^|h#KP~!Tu8t|}1BM^Idh4}& z!uDneeF&NZwSii#?d1JjmeBYn)8feXN1kdi84NtZTTr{mca~Q$!Q@tSD;Q*|xt$z7 zbJg|-Jh%6SWn*hgic$K&Nbl%92Nle9$--?(nxdpfRJnuVbIlop*R<243%P%;rbO)DC>Zm+*omPYF^2i-`&sNmcgJ)C$3j6 z*!b%IBF+EGK2%(}L-ZUxUGE=if->0n-rQD6j4=u`($RoNcZprv1*1`n7|TmymuG$E zwd$z3>6qA4aMeD$uVHu2elFsqGJi3m{{+Ui37<8GM=-SWl=VI+S2&FChh1Y{QO4P1 zZS_32@eii17NJPl0sn@LNk~|h<(MogGI}h;5sj7&J+ zMJ1~}PCFIzkU3F8DQc?6=d$+8f-qPS_;wz;*^r zfz3_c77T9cr=ePx`;sZWyz(chug9F!v2HN7rNjTjcIqe9iEJAW!N1&%&L8E6C)%{p zPOCQN^(ATF!2Z-Me$fV{XgOH{g1Edci-wHuC?*s zQS<&1m_r1eQjybUW$zZ5V!gDZP=#_(-@ z^O~&hwyw0ve5>iNIo0}c=Y}YK2dSX~AcsGubS_uEVJvd4S(Ru~H?N}!d`T$-BS&o+ z?BGUEB=i@`$nIW4A9pyuuA_qcg*8wR$q5LTVm&Ok*;*(BGR$KXydY zfaQ)TL;}di-x$hZ^-OQGioc|)$ASeqjv5yGTRkI^94*HFnY_)d>18GYJ$2gVyaAa=S63`O#y{t%xvva?{ke19u%fn@FUA4MpbKiFVy0QMb zAFVTh-IqzD`K^%2GScLx@45t5A@^wWgZ8N|=ozqp1_b61Uety<>h|JxlAq31AIeh# zjw`MRGN#}Mal*%we*X~2lA2#{x${A%S?AT#u&r#2qF(F7`$;BOxwp`Z$d-=67}4Uq zIN<{4M+Yt}{?_D(oaE8@C+w=d1d@7wON*pKrOhHgvPxZDid)0s}9^6XD~k?QL~iilz|cPygS&$wr?Xe%Q7v-8oeQ9 z?E(k9YqBP@A#3e3(%$k_1Q)eNz^O;f$H|y5<1{pH@bg`=?UBmL+DzChbaCD6n|ErL zo)pW3p1x^M|MEEFSfx&@Kf;UKwx@b2&PZ3!@KUpFz(18&U9ttwJPn zClXH!UZBf<0@n3%-@v%F1DCeh5RVyH3fJ#q3960=*WVfZzLbnhi2p&^dxkZc_5Gqc zjunv+1(Z4?0@9V<8AU)qKx*g_k={f)0d7aC)R8(;rGyY5L`vu+N)-?gLJ|liBuKBJ zga}FI#4|{*lbtTuGb+7ebziJJzvn(pTGMq(1ZCd`|{xhAC{4=}EK%%r{JB&s-u+M;g2dOs*T+ zq*_j;e^aDi>?OXmG+Ojc6`xS<P&WIh_wPUhuBsPRM&-ypucMfygpGT^Kv5&$sl%h!8mzd&c38s(rHO<&+{M*om^? z9Y(VL+Y2)iAKoLrcLky5&KAfk$9nw!{$^dl>66Wx4%|=q&5?1(>qCI7Qq;d^!1C)t zusHPZY{wH%?$PmI42v>)eXkj&&=lP{5$Jy;;2ZM@S0cGx;?+91qJL-f#+KOQ3DfIt zcjewCINXYMNQLuQ=zY+Ko2hiSqi;(yq{C=fI7-Gqlyh{ea{XZcz~y@QGDd#zw*NQU zE@A6u8qm1=`AHW5p#nxag6pYE+j`IN$|mF2|2^ZiIj{m@#uE3(L2bTn@M3M#<@&Nn zj)A<|o8AxhMU!j%PQ?bgECfq3x#|`=hV~BcPy49JyC?ogxGK1A&Us$97UH=DGbqY| z+K;v^ScXTFl&un9OSLEXO;$M@+b7;ZMBy6bLfcnC+*P$k{eanhr;SQndBrWbgGwX& zhTx@p)k}w;`GJl6{QCWHt@*H%E=0Aj=a!TReP6bBF}csr)>}~e-ur=PlF$G2+jdwU zrnc32M85B`@wN>)c=}6RR*JXDZG#Q!ytC{=v23u`?74(5FK_SnOr-NzNq!TYav~{a za9sEFHVZ>7jJp7Ed-X`eEdUxx{eWJAS$skCxgK{)>{C8blZ9*U_RE zyOlecl8Y4#ibK+MdmVVo!lL!cM&V8io7|29BHNV>T`j6AC*a&HkAGOa-+Wf=Im7dp z+>>e(Ba)4IB&3u*%_`T`C^im3X{*LM`>FZu6-{2pihzu`fbQqX&nr``_Jni?8)n)^ z@}()*&y>8n*&61{E9pOnHaBI2@ z>Z1UWG2QkW?3N}^gPVVhn%#6g?_&+ePSg&lhWVF-UK9Ab*9^DlU34=m79scO;J>fH zrLvogy;XOIt#I1zFSBg5T>v7-((hAETW*z>J>mDmnfLTb!&H9X z#Kv{H4sD+{yh8IgF>Gi;?e#uX!}^ED698dOGYI>>OUqurrF)vvvfBh%7+DR42$Svt z1u>w#6J#McGrOT3%7~;M5fVM7Q2qjo7g8w@XT675^8^aB9B~BkH#ngz!cyv7sT>g( zpR{1d5-R7`S$8HRAVS8+eMp|&q)?$7J(}CmkG9QhJFT%3jzpK;8jKVwuNLS=RHd0+ zM<4h)JIOX;O@=mhO6EH2{h^yKx2d{)Oi+)-W)$WxhI}A})3AqF)lj+Sa51BT(50lD z>SR>r9gfz>HpEI4koy02aWL_(Ue~#T!5-QvZCkGF(0(G7YSsPKmBV?$3XP1Z82!~KS;s@M>V>&6f>8?!XoYyU)_3?tPN6lKHD#*(x}Lj9gt|}kM3Z|~OX1VcZ+N%1 z<}C=L`1T{NZN&T;Q8KK_X$yfj`~Mmdvf#XK^`*uLrdRFreiX!t8ALL=n30a*4%-6$ z^-Cz-)qL|$=OJ{I?;zll9+@?$$DWw)nsy`^JxL1|={ZaBFLu3Cjw@O3k{I|F$j#~rQS zc190(CFM^;ec3?0*LG}{Lm+%*K@yzekU7X9z{$JH&a9+H1Ls*1b`d6h4td3)@U?ti z*(H&geP=zuspGpW>YW^-%4CS4?+&$d@@aa5*KlT)34S*Zda=-~zM!9m%4Xp|ziz2V z9BB>OA~yrGTlEG@ z5u5x0;4E5E$lQ^NsLXVuXxm~A`Y`>5WV>U3G)6fJvMibibIo zEu2>rW_0i<{I!EhULFAoUBOLkSIY)o4xqs~`Sinz=aKdK*m>LLnzd5?Se2L>8p)ZC z*}Jp97~voUG1l;83uQ8dg(k0pKw%v}T95ZPh^%h$24C03)KT4$RAZbHds1zcFE;_E zZ-Qs-?XCpl4-yHsFblck1&7z@Oi*{Tg`iu7MFFU(YjGA^l|p0>fgv3p_Vi6^^?-i< zAlD2vI$7&a&lvDx_`tl;!ileRcX%)PNJzS`;Cy1Pn>W-1wxFFbPztoVfMxw!eIe)q$HzEiY=?LTaE(AqYU|hwEgGU~ zelcGPawAJ`Thio{M7T@I`@R}oEiEUjNg)uANvpnsI^L+K`5N`PP#w4@WuzE#s^jnA zeB;mVLjA^E%S}r9AtBGH6#~W0NFSq59<-f~&*viVb}x@s5!PCo!cZmo3(kaI_A1R3 zR@(D(`F#p~CmN8&dFI>f9rCK`T%rR@To>%-GOy9Ajt%Igfw3hzQw%UXS((GpWKHV< zXUltcs@Gv3-K+rCRE{%rWwlQc}xG%lk(48~6@dDsQ`QEUd4Ck>4?zl%nj5`BnO1`Si@JV|B}JC2l?| ziw=?s-wcIp%&(0D41YV99n+uyH~8j z9^m;-9(_Y3ao>5GUNbVt@4WKeElu(b0?sgcqSefF8@HNSS%HlzA0r zMYDIImvE=+qrQAYLlC&LcUXssZAo5NeU{`5q$PEt-d(nC7wKcw>bsQ56CprJq4PQmOC!(va6lsfD+}rhW=V4UDD`ah<};tXaF=tPsYZmZ=;IT) z)Xdg?-*vGmyYQ*5q`j~UkFFs z;zk#$N|O^ zy3vySJKbh*pKs z+Y|Y$3?y*%X2bN=73CsHvzp!*A0?@MIOPMuie?_&Z7$Qf6@V!{O_v zW-)KSxcNLks2Nwu0PI2cVR|=Y?)tjoudi@i`LQNdyRmy?xW=J)oP$}Z^~dQ`c6W)` zQfWqBsYtZDPWqn57o9s1>?4`sNUx~((b1qo%A6e8DoQZfoWs{Ul{m0mYe`4hU^7!RXKLZ4$`d+}`L$?UymI=(=+8gQ@OdYWPh`J;v==AZ2vMxk7YRFR_4hKk3*c5_ zUw=TAe}ykwL#0uYp|w1|`kR2P{BEpRB3b=KjYi19&hKeCg1c6#wLC8=&8XX*3nnK1 zNWHJS455m3A_AQ)p8+gZSdPBNe+lsK(xZ6SS#1Lfk;WeFtnswRqPj}`IIT8TaBi>a z>h0Bx54L(4eeOqe0mVVpI^ zFuufPTZS|!2%!B-fRh~Cbi8LLH$3lY+&q4SbbH_I#o6qXyN z=zs`W{KFE$c+<3njyR2^hY; zoNEMhE>KA_OLJJB+~-=lV~q~~JpJ|HGNrBJZAJF;nn$dvPa%QvVD3=Y%1sg<@<^M@ zS|dD8x%LapS0i|RtspTw%Jo#G;^kOsxTnl_#e30&tq(e38!I!;@>@k&rKoSOlmI4F zrsKC-^U@o}T8C9tb5aJ3?^!NeSy9uBK#i|Ff~6`5W>YDSX78Z`Yeak)gYp;s2yT^qqPnY(f_^LIiybJDq{`E5t92BkpHMjwiTQy*g;T^5MSql4TQxuDZRO~K zLXRg!rP(I0M*==olgnaRJv&~2xZCQ}bgKRF?WOw|`h?(#)W_=I+9P2T69h4qeLC^8lW!WgQXst|B?Y2la=@wwgg4{L=UgeD<s+K`$k96I2}-)x?YyH9gYjrlhRmea z^A1po8MM;3VIzu_O5w=l{+)1`NjE&)WQtf~w{^+48@n4YQ>fCZvc4-eQvE-iGIjrp3`cJ^>@lJ&MH);cPSOt@v4HBQear4^bhBy# zBd*+#Or^9AR;TyQ4ykAMvu^1ru-E6kJKM(^UAbhY$8NHlKNk z9_|eMaF%Cz1F(weaV9vQ$wgqBB_s>HFUFY^OB_TMvhd{48JNK7ny78_Myu-K;?P&>{_ zn$u^KJ#16pKKq>;x%;?g^t+E@acJ8o0 zkOusGxcZ(Qfdlk_)A1mki`4Kwk+fKuDfGE$Lss7;T9NU{aa3V)MLwDBY4v7*m0v#r$j&v%QgUPTV8xrfSY;kvI*U8 z+IdDb+!mtV>09&c73QYPLU?G!i|fjK)THfmceb6jC>7;Jy3OE#=~0%r9!! zcaoX5`>XmLC1WSpXZ$YZ^(RpCTdlPn(f*#49 zjUcYW!U>OoL$ec_t+qLfm(Vk=5&@83(yto%dwNzoouh1@kx|h|LvW_gWeMqx*vX4M z%G>9imA-ii{LK_ME)=u2zS4{#BRXSFZVaF5)Ouop4kAm!73yp^?_G^IaiI7v zA#4bMy3G3Ung_XtYbP)+hkDC=SHgp61Qk%wML^y^JW#uNI9ul z*=o{U3R%pQI#0m$8|%%yE-sJ`Yw4+2T^jZWK#_8IKBj(w80 zj-EDKn6P5V-ZHn2|2<>=!*C=jEa_B+Uh$a->tjzSWc7x%Nriz!FlnmwS7@~Zch(}U0D{hEJFEG9i`l@xyub6y`0{H4<}Xyv%>XGuNa8KZ@O zIARM{O1sRn0h4v1n;Kj7NC0DoS@?khCj$-c(+yMuCsk9*e8-a@^&eXEtP z0C?MFFQ2Fki8ze8^Ha?6U9?{LZP$rX)5J}eW>hP_tg#E5&^>Ru78ofj44(pXv>M`& z#mIn<6&B9-2Fs(Y0B$TO21|l&LnmdO879nBKVU2RArOS1FFl=fr$$RdYG({j^=G^b zVDIvKSa+!pOZBFjlk)`HP@JNcRCnfKflT>eAOApR^-x$xUMtzIa&l0LzjMgNp?L%0 z5NIM-TX3@JF~7ZocBmhom_;^)flM5?u#Nc95UhRF@tar?);Q@#m%1=&5dz$tj|c~T8|;bRloK_a&llIFi*=$+5C9?HlrnrA+;?aE(G zC5N@pq)*o)RCS47ofobN4zyb$faZXH;Qs2|!)$W!Rbuf|5w@H83Y|V|ckMTLYMIr{ zSVun2$55lH;d*iDr<|Cqx+4Cx$hxqXZ7Kg`C=Kl?t=Ym{Fx<;@FHnC^y?_MFw%*$~ zt2XtUyFuOkpV8t!skdefej)t{KDRt!fIa&C5_mXydpS#jk^%;8$v zAE1*P{Usu!3Pq{1eQp4i!IcWrIz4Zo)CNu|D{;FCdpjyI?#OYq^en%f*FaH)*5l3A zD@|o)N>%K-Q_MpaHyzf`(xAI9lK9w-U7mH(zf+OPY;XDo>mJ>{-WM%Q^=Mc$_1OL+ z>$tZdDLGoC8XOU9--d;Xi%%(~&iH!>IKEWidn->muw9Oa0+RYYX3YX|-fb99kO2zut>B@*5cT_*thD0~ z$83s8*c`tzU|r`OVW4bM*yT8M;klX1<+AzHu?2tJz!uPW;51DUcl)erM#GRopN@GwP&fW~_ zld6W?z4rq9JLdTPYi1l4rgf7O>Y={3_wRJI<`KRee@7Bs?@*Kz>fO%gJkC#? z!$GdY@4slq@J*E79cGx4i%6+Weg^WrfXv|KlBKrSw=eR%_(V@LkUyXF`K5i4Vv4*1 zw5KiC@H+T@?c2AB*HG7@&qH4LOa(&|eu@a}+mQKgx-+!1DEZ0%IE6ioAM^3Nt)zZ` z+Q~rT!RVgCh?L%0aPmD!)XjaO9emhs*v9F>RL-YQZxb9Cfm>D-3{NzOS)&`sbpxPR ze39KdeoPf-n^V_M2yakB%0pW2FnR~!Gy1^UUe7Y<5WyM-CTK1}nNcR2S8soe>Hv1cPDK)~qJZTjp z2B9TFZ4`;fiK0^JfZz5MWJ3LVF9JDBLANyixq=JgiTs3enp+#l(+F`L-S#4{jw+kt zkKqOrwYTbDwl5#GX2zb0mhEn6NCYm=8K;KqCkTsCPyQa^xaUXQ+(VxU6H&bR6TPk` z$)Hj$80M|seErpJZ?aE{=YeHqo~Vdk?wHce;yvXxb!P+JzSqVJ+x6pCDffY#2cN#w z>8pZfhCAQdJh+MY1`pQ!4!^KFjFyH@5XDsQ1BlwNDs(-mtRK9QrP1^)y3#d35Vuq+ix4L4U>Z& z^l8fyiKHe$PLDnHD)}3^Q>(oIMQU;xnNSdy-wYdaxw$H`5P^B87(Mv;(q63mZT|Kv z{Q6udBDG>0}9iTCrg|?I@DJZ>mVb!kjG-W?Kg00It~I zrs;X4&pN_|e`Y#&Vm)x&{^9*Za(j%9Ky&kRmJD7w(!b9n{FTbgEm7cavfz_#D z3O!Yoa-~gjrLCG$!dvY1J*a1y?xJsNJ^y&=P1!^957-QpdI74!_bbbi{H^v}F1FwB z{KE+wn6ZsK=r-s>j{^I)13@@gv?|gCtBI+Bv_e1Bh!yso@K{^gv^xzRTwRZEJhisth`Ki4xp&Ew1 z@^C)CRNk#+cFbKHC+{vZTVaQ=dR+1pcfjE90JAl1J~Z(52N3dTHdyLlfIlD=wni{L z6^)p+Y{P|^3b^20_ev9EZ}@~2$hA{MbdGKJj+VENQ$u(GH15z_c6%9rlq#T^dCfE^ z^h}&{d;8mUSw?`b&hf+7%FIF)?-W@U<@3|l7@@WnUV1tL+V=B9h8;;_Ycr8cb5 z$!H5mFi=Sk{4@Ju32yi0Wb;TF3byldfwr9LgExW5mWG$lI6UCc#4I3$k8?|H2{ztx zt)1bvb3i=9y@Ec|d@mF;qub~5TMHgd;zi1*b;&PEZXKS!(=x4?Sty@h<14u{xr3;e zpXoBu9%z7&kI~pe#Yj)zYI@vOf#zknJwzPH>#W`lK|oJ75zQddLuFqU)p*vikge1$ z@LOmFd47lQ&620Q?qq(}x$*?RFZ>`-|mz@Cq%L)Czr6>M7JE3r#65<_gNG`n|5k<14 z+GfK~Bw9)|J1-UoAW8%ZBW=CjSCmLx*=z!tx)4mksFSy0-b7k>)8>!pAgD_OFexia zD`+nW%sAOm(oGlbb>K`yw~&a6YdR9t?v@W1d*p9!6u5U2-e_68E1QhgQI7GonA|C1 zQ(VdG&6W`c$FNBa%x9(rjd|~^b72{*)!%lWIbT{8% zG6NJr;z0SU_JNy)|Mu$=qVfYLKclbLI@1jwc4IO>6W6eHEKzzi-O2(VhQH$KU8Z{ncJ3%bFktW?y!+|H|A=FQsK+b=Bp+LNC%h-ZYHsb-INry^ z$+LYC^=wzWwc5eT0YF*8Y=PJy2R=z&+p6cI2=T_!6DcrDw)Jz?O=&HlLMU?&L zPWE%T9gfIVi8d{8ga#^-Z`{VS?`uhz_$Jt8Wg1n}B-+ap8!#zS$NHMri`1h$prl@IG zXWZo8j9B3k1wO_N&jH0-!PMG^mK09!LM_8vLFFlU&{Jk;HVF3|8uNA+7;KKD$<*Fgq&-jdsC_hidBEb>1&QQ z*HtJ_-U#o?H_Nnn38i~nym3|5+q$}nvfk!!nO*9tUGx#*h3>W(*eK!eK;tT=MBAF| zDpUqvw$!-mm|XBv*i(4OK@c)`l3xYvTYvRvWB9Y1FU_1UnJG=CgY3ulV7EF^aV6WD z_S*pi%?&wa#fweqQYF2d>7OZ{mj;B1)_XW_v+^X2yGofD$EY+a3_Uu^^|);kR$%1O zlIx!Q`~Hjl5B5&?$9}u;t7k1|<<)cT=^?K?FL*eq{zD1+=bPCtD5+^la)qD-&|<@5 z2xbS$|jji}|z_CDXU^Xp-&oYr2r%Q#l9cladU$+mda zhtDciw(ERdxda(f=FxrdKjz$6%zNPLuK0fzr!zZ@D;4}2x2-LggYO?)>SxE30-fAM^Oz2XHNo;suv}W4qrl=|h1E;jAl zc5SxDI1M?_a;nQ_>`^^&ipiJQmU=OVF!Dfo{RK==_R@e)yw^d?-s_4Tjoch?_ubvQ z-Sb&dg5S^_U0L_v#%LzZf3tdz72?S+MXqNS0^b(}k=J`Zm`R*XFXl)2dMG!%&pSUf zX}#h4Da0}#FZsx2!h}nFTi6A}VT2MB(e1m6+RfpG0teO>TWcORYl^AO122m77gBiWD94S- z{lG)>?Z8I8*94Vk!}1>!rPq$e#(Ma&-$k8>=s?Nx0Gyp8PBey2@ z9tNcEGJxOn#>0-U&kqHupeRNw1v|0|tA-{cNrm=*&#~WI&-wYs$BH{8Mgd}-xeL_M ze&I2Z5juvhTe>hEJc1!JLs=*fHj@6?Jf2up!9>C%p1N%KW5&DN@eD&50`iW%!T!eG zjnh!|uUNk)rO)^jMQ07~JZb#p5Tdp~Bj8wo#_ncJ2a&1{^#$)SO2;d(V_%oif6#Rw z?eX=!`EM{admkkGGpxDm6#R|!E`fM^9x({nhQy%=6Bfw^?`GZ;QR%E-4!t&3$Rbl z|9>9L|H%|Io+ghnRqS!OH`T0JdTqwn0Z0n%+`e-#z!%Od%-xKWRhoq#o|fJOJDdym zBu~pNB{~P}ObMjMD9csF-?ljKFY%Jt=&e%J;t;wI?Ec`e&F(G)wIF{qcWxu+V!fZN zyVfwAllu5d8!j?_2Y8h*$qwOtjxm9>czT?>NLo^|GQ!LEKK<=5dt-SJVQ!6T+S_!d zhn?DdiuVz|RcK03@{-FUn#?-N`4`o%5u@h_wenHB9?j+^2p&8v%Bbm6&V{R^AmIQ~(Iy6qqI@_LtQE1zIG%2E@m*|{O0*7z%@`n`z)v^`#3nVkP z$n4^?k}-mLin{0i)EszzgI6U?r7|^jWOl??(`&iCK+VwvA#^-jbt2#0jJZ|`fL|3z zq(o~|RJ&6j;}FW1o7X*3P8P)7+gk4Xbf6RWO#-C`)mr_*f8r0<0=h6wPV?Rx@wWR< z@J+woFrz^Nf5v(v^Roc~(*R7Z#82hp!o$A*+X?SpG}~+vI{uC%hzKEFr;j|w`{@Wq zU&lAsa-EHsHKYR+`5O!NqtulJY%ggqYxA!6^uV6HpgAK~_Zrh+ZCYNQm0qw86z25x zq;BC3u6Q@JcIrU{2Gj>=vQ}?4n;5z`f*eAe9Ts+TTX#-H?0RSE^uX@$*aDrQ?U!4B zukOiVBE=hE3#O;~!}ae^ zFOV&c-=y)P%|x>Aw>NTq z!I$V=a(`8T*L3*{AMUY`Y<+um9K2l}xRU$`~7U9_#aiu&# zDswtIW4p|h5oT<9rloyKxmH$qL33OFG5%xH)9yA4q>y$(cw-l;(R8!Y5#618LKC;y z+u7Z`$=hmVJPX}HY*qM5w+3Vwq0QO*BN_PPyY8BL1$!7D8(|Cc@aSPAWcJfu=qxuqRbhR9uV1%rIU>(iSf*a@zZsQYF?2U`Q8va>ZNful zXX{Z-wam9a&t#pCY8TR|)f}3IS%?)ZZ@1pImq$$CPE!NHZTrC|Q!e_&Vzz~WlEAkt z;~@XmIK<0*+Zjs9?|TciW%QQQK4G3#dj8VaF4K?R{Q0wFW@Yp4wY3u-CcdnUT^()m zaIdA|)%OTDy%b%}2(XRE#L~WUn|bo#yo<_P$3lPLE8wKUyNmBmrleSUN7Zv}P&=9j~39S};Wt=GJzR~hVHR6mR>y&G=wal?UU zC*Q3$cGh96=|TfQ?luRtH%uKB&KuL`%snG%wVESLg~v}k@)n4st$RwvI<`Zuo5-KR z$zi^R>YX!;`i{+!H3Xk*Y*P|D`0J2|0iOp$>}O5$ST(x^{_8fUV~f5`g`%N6 zMST3?HGtRnuVW|5IApRX^$3B=PXOx&p7U0X>&vt4u=z&okb`=4#KLrQwZDZE8Lby_ zwX)T=7{mI41OMTrkdyt{h0R(n#Iyf=n=H;n>cr8uc%c9Z#Ngu}L~BrmGvQz#2l+S@ zhHg~v=!lbLSOo#6MiIXDt#M!~Z)+KSyGJ#xIgooJFG?kqGAeqi^F>yP*+SGh&RaFa zi{g|o3&l8ebXrT1K#koW$9D?jopeYBgecA>5+xTrtVYXqdxLvBl6)obEG@4HY^r2 zi?bIoq>Fw*pWVBLqWA|IQZwLyYRV0Rj2%*6iCT7Fs$YlLTW|b;ke*#f2O$tva0_oz zI|=}{_&^sjtU#bJI5Mi$L5k#aFR%53(Rr$`KPanW74Zu*?q zp#S4$O+YO)ET%^)jsIrX2k6|5zR~`&5r@$Kmd6<>)Rh<9F}Pynt~_PYsQB}Jl*^dO zoqoxIS1Y5hcmt?Wu8IOPZOD>LKq)eyQmm9LD0Z107ZNxkb{8|N&hpQnla(Y=*)kuplG3a_V_f za-f!e^~T?Gk!GLf9oEBJTyDg71aEyrSQ6!1C--+l*YanMbFv6?-E##YhyKhG^u}5) zVrFB6m&Ap>?_f__|3o*Z1djxJ`j_idhGKOhd_-;yR(ZQTARM*ric>Gru6;Z{N$eDP zSCzOm4($!9j0gnuCbpWHdZ+GZt@5JQwGn_kDYh%b&d%LG!rSJjvBYG0?xZlGJ;{=u~HLVKn(Sjv#*1G5qK|`KD+c1e0??BI;-cN z#MGe(*Nok?w+lDkUuU*9TRTPhtxv3N+104T5sBtkaZQ%1xIn2QB9UN>H;-vH|5;aL zU0IA6A?OP`ix`}+i_lG%w{@iGQY?AgA&~B7L2sqzn`S?qWQe*3RYiFAw+IV<4^72f z-ss4v4;?izz=qsHfb6J31LeO{Z*~CF{+^9&SkqqzYsMOTS2HJv4K8SvdpDy}yfbM6 ziiz@nStlkf{knhS8015nWZ7a-q+s(Nk;|YSox7=h2(|_bNU6$X4x=gGCI}| zPQU(de5mUG-G|!r-+ZW3|Al(~f1oGue-QsTbd;HZ;SH??-&haF2!109cS54`I;`2L zUtMCaT3CXZrbOvdE+4D8A&rf9G9*7EIU?~_e;sO_Lx23UuX$wEkPWXBh}!n7z2-a^ zHvb~09I>;}8a>YTgg62d;BI{N&(y&JuAWTJh^1!}p%`Jhs1fea?nToSJ8Yfy2&-i( zmKBlg%@V`?ve^-ypj@oA;9IMunpFm=cFnZ-uv=Fj%4_nqt@g#I93SIcV`@CvcW~n% zw>_)0|5n>ZWl1`1f|TEq!FAiA$j7om@)7uIcb90a!BSI^OFE!UFqJ`!zN?OYOy?|% z2ps5WihkaR)w&NfA904MFtd&}i?3jN3#2y*3u2SXLj1bFEIFc@r^r~j=AS#08>dg> zdcMy*3M{qgYL9tkdz!yW^=dZKZ6&a87$YSi?Mw9Rtq+eoH=Dx_) zchqdOILv)2O(0xvsknx!>aQlc=xo+Ho=W>xzcfNDzW1lw57wB8Y^?k-bZ@l(MdP|s zB{8iSXh9zcx1j`p+WsH4m;^85@@I&|@? zj%2hz7Rh3-HBFxJX9XL^i4Szy8!M}ES$C16$~er0cZ8=j7xVQ!gSMvDcUc7qs^Wih zMB&S-efXog(NNkumZrimwOvK?dftkiXFM?I3cWQ3VK{?>~r;%)KGo_yEXdAtf;`sBCGl1QVk)$rYZju=<;N=$f;w* zTXPN~!x7KUB;DVUpV{nf3HycO_|pmRo>v#~xOK}>^@Veh5L5A%oIX21ODw9r0uie zUF8dWCC{^1WN^HKVD@>(8(OyV=iip&jL|)4h5MH9dzO|oYiX5bbv3=hiJ|KLHrDl| zCBc8sXboEQ>Tx6vEcq@J%hy?b84FM%Yak#F^pG6aOJhD-#Eo*O*Fv+$n_K!W!(CJXcU-uEdjr~z*;2X&Kt>CjBn^EN=t(rsP{Ut2otPFkC zMRWHMxv$1VHWp@ekVY?R>y(6^j^8k!vB?z-wHxGyWu$!?WkWb_k`9fjC;hj{L>o6T z;S_b`4T?@<^$(?*F4>FkfC5;)YYoKV0phO;NpjJB%M3Aze-<|uQmfHO2Ojq7nyMV( zVvnyvp_=ayeqGn0Cdy4NhOZEN=RUuZxv_qtxk<=tG6^ELy~x-X`a@^VZyvwC6v3_L z;+q=c=_pB#`YmmPDv;*6H78s2;_fQ{bZ$J%@s<90B8{|nfQFAncFvTm`XjHE zIV+dfFq6bN=tWyabEz_(+ZMfA&c?5E_MWBG3Tn{MLnup&AdA$Ta#$IelnCbUvZ{9Ih$6~w z)NWn18qeaQ;{?V9OE|DuCU$J4NlxI*(ia~ZjVSbk7z?0+G(@f>rU;3%h$|x&xj4(= zjfDug+jkSK%}`md=5qKc(luFSmz0aGeS+DM(@sjivoPK!*NuZ}4IcPU!S0P+9$MSUukn)I1kz1$PsnEImte2x*6NP*aQ zak}x~nt@pVQ8qZy8Yf!v>j6qL2K$(GlS|~pEVa(j^^IFFbc1*CHbRbDGQ+g@M!b*S zyPNP);*E@Oivjbz`0d*=Utl{gUSY8X-;oA^^YF`K&?4$%35{TK&EIfSmsfeFrp6ov zUD|71-O9Vol-O;h z?5A0$?5Z(8|HIRU$_R;PsHwwsWr%tR5C>dE`4eA;b=Vw$o%CEk&N zozcZs#J>*~>bq_#lb^IPhSJ@R_Lux#_vuJ9dC=_@t$qVk34df0^en-l1j~_}3CK#0 zvNo((f@P96PP@t8kJ8PzqQ$&G=UG#Dy+`_m0%nyW>zT{|Al=IYWL*x6Q^c*&o&&6Z(m+Sh>(x} z(skeEri+vGTc&hW7=kEzF(sbL)k})+SZ*neiK(FTvxMXyWsHz_62wqNH`uub_U@Q` z%Myy(mJR=WSsMg7Mi&Rt%|u5dO3!BWfnesMgHvC~(8PX^`^9!veH5>D0_~rTp`n2^ zewxo^9mu>|X*sn!Dq1yXuSNG<64bIDl8+sD;hNm*evmF{-RoL=@7a&fSf2;E-mq;R zBcpo`7D4%{Z|R;-=;@So<>bSAXd$P6&ph`EFS(0b2uu*Uyj2b~P1JcdquRP6`#X~5duO&^t04xjnYbU_?OG!J;M+Pgn9uxtl11N&jXUUugR;aGAElb1 z+MZk^=Y3>&uB!SY1^QDi?$z6A4G((UXF|N|h2q@1aAas|W}rL4ptn0uoxH5&{G~`+wGX*IDmb=fgQ` z@g;m91oqy~b3ga*zOJ)HR#{IgkVS5ElRaaetfhtWYWqI&PLfq#p2VJ0S{5Ug6fTUw zb=Dyr?N-nU?~0>!mNTA^^eCm)FNkLy3+zO#(At%yAJUa{9{-b1!fyI}WVdt}DP3Fc zsHJ+iUA0T|&8_{&Di3VJ&}Bwx&vBIp2%8g7rMWias=$mhxvKww`?jGC z)mhEzvUu4@W|e@gROPeBvd1UtfdK9#LO@*Ybi zetbzvZ-)1r20J>t?fLpi{;KeY0o;vwkZ+L)9hhU?>+Kb_%Syf|9^Ucf)tQIR`0`Ru z@2yO00b5OI@w5$X!{}#otQa;{#ZmfCc1W~=>6H?xtY}}mqBZ%1!BN%TO1rY?_}=nY z9|DTbI@AY@g-x31@-ce==B7{waS(peJLBQ&%XQ7{eS6K*TUIoa0Yt+lm;f&{br zTrXx%KS){nY;die_Hy3nx3opM>N(HC!Uacc>8&IO#ikjl`o40EgT5m5Nm^&x;u))n zl*IZ>3Es0aJ#f2^-dAq2JW)0XT`obJ=nUK$AQ_ zw2{ni)kOaBshT$Ggz^G4^od!x;yahMZCSvB=i<*jPzd$cY9Pdq1$E0o$oKCz2K;oG zOBN=m7=*YvC6ur{4yvrWX8nBfArZ*kT$guGSM0xKA-4E&f=gom98Y8QT(;zdaX(sWutqs%1A-5{>+W1e zA6wb!#NVr>VD!T}jU}Ss&f$@FTiR2&pWpjsY$A2jdNHCTHG#Q7IEzIPaLTAiaR6ez zQHyQ`tpbe^tmdYgW4`GoQYl$Ex?YOrf}BaIaygeEfa9B~DgWmJ-vOHL%Puq9&!d)w zsTi>I(x#tB5|v9wuBSyZYu1}QF`wxk+Yd)Cb|~t(vYmHfiv&J5gM5dc~!74x$Ez*7;jA{?|2!PE#Vqc z66Lo-fsXpruZ>>nt%yqyiIeV04d|j^TKr<+CF8i!dl4ICh4we)d(}eYYg*1r?SGAoa8EcQedMTJZG6N<26myxi2))a%qn(azHit{# zJY6fBgs_AXEM7Lw>)x`?e0^2>tf}v5ypJ&F0F$-6qXth-t%>e?*cw`4fiav?!nQfw z|8*$%m~DrWUE;!KK&uIOIvM!v|IMxO|Dl(90|4C`5okV?C&allC@0-@iI;j)NIe$) zfc?R=NskZpru#~C39+d;2?p9u5@UY|`fWfpa^BU$Uh1W#9PNVKNWii-R^i@lpr}KG zPuMYA=0>!uWI0P68$EE}0>4Q$ZUHqLEj#t-DKb-{fW672IK|Vel9K{C#&@YIfGQte zJoD=OKNQmODs|U}tb;oT(}rMYgf2ymO%Eh6obP%a4|1sO-hX(eL!~yI5ZiBd>Xx(b z&vL6I=$U9owIp9mS-(BOHrM2A4?&A)Wo~hiY6@L__ zy9c!+=mgj&uKc)lFWV;TX?p5IV76xOY(!6r3tty$J@sSo2iAQLKAs72Ho5{f3?(H} zus+cIUgIhMUQP_mJ`P`g;6?hb= zZ5U3NUb3=qvMjS!Sc}{fp5osM*SFz`b0iqHln(vE+w-fi{P=CqIvyC_(BBO0&=UT2 z=&S@Hy((zWeB*rdmm`g*&86ng)*M`vdoeqm%aPp75xb=Q?1S!(x~`5%$C2^2fJ|kl ziPuKXEu*_t-Bx&|_bjmf^9z88;umcjJqhaERc(|wEo*A<^^b^zh+)}dY;^BvRF+Sa z!Ef^*3T9Sx9ArZZ>?z-(kz7bsR(QPw&gVym~ zsyruV*o>X!7~f&fP%+bHthfmeD(`~e39}L$;g{jQXw{AN2S`h=JGFU`u!N8cZ85s4 z#m7<_YKRa(r|;{TyJVg2ft)&>b367#sj&c}8m<{E@KUVQp_t35)}|A*VaQ4$+E=gSV|rUBIi2~+t<^AHj|As z_MYbd)k4yqJo1^tB-wdob2Vpq+Y71(?b>lp4!I~} zAHez##4uL3bbjZ8fX`|u7EM(OAhR5o@c+t4ytMcoQ~7(|6=%h z@a%2FzQr8nTPZ!^j@NrQjz}{}@!QvXsJ%TIE{TABk+lfEucuS+=DmT+a3^r1D(qFH zJD0N{pknyA+$95m7WM6W`r>XptB6elRu?~)I_2JtIAvgh@Vw5y{LLu=!$u#m(D1f6 zjbms*iVrAusELQ{;J&HHj-OdZcgZIKZ(WrZ-WNiAP9n|H66JzBKF4gG^{DY`Nd z^83oC3iE(phiuMm|9f%9nUi5IHDZx}eLh*&K}^D^T1|7o#>q7`DU|B}GR=D(C>mXx zurxizCiLLk6w7Vj{djEy$b!pLe%MnL9kecp2SQKg>}(E_7QYX1;^2@vW!oXphUF0JUrjtg{gw<&#yW z809&ZV?Wb(->S;u6$WZmQBqR&!HCKx&J)*#IMRcw?9KigV6rQ09&me%oyS;9YJ+*u zhm6y(zcnv}RL|~!Y~1Z=Cl~a3t}R^|Y0|W(^zASAQLc>-f?I*a3urNuPrmgAAJS(X zLsET0s^#{ySv#%)R%&l4`BeMA*h>Qob;kB^hu#kuCEeBz*Rq?|bc+45oR|9C$i;q7 zLgdH4S_hmmnG*Mbx--kK*aKINuLfpZB&taskKOIr?Z*yiP%a&bPBK^W&DGX{tjd^i zwJAn&-=rqvN$T{XLAql!h?|t+H9JwfdBN;a8!1#W!Cx)`2al*#k5>2ldfT61UfPX7 z{88oULq+|BA%kdJ$gp3BQi=vYcQ7Pl-rCIPG<7MB zlftqXsS-gQy>*?d=CBYGO1U@QOV2%x0*$O7w=7lZ7O>g=DSd))F*m$EI2xmQwTam@JaTFa^N3c0NxqH87TdKn)|l`XKY zrlPN{_vAn1hQ5OG%I#_dq^KUSlgqYN#b2)jt8V)*F#;1+n(EVgebDDCc^7JV&(Pl5 z-P1bSzHPp7{*4Wf>_8Gh z?4G3@@(zl$nYE?tmlUvhAEOg5Cx?dyh7k30Q z0byQb58DkPQi?tNmi8Y~_B02Tj4w5CdOH3{%Du*~jh2rn848+EO>RX2YCBOhXtuR# z1Uim$N#ldR&d0Wro0<1HSOZ+aizF3ywc=8*@4GGW<~_y!n%}^`t*=v}n2f8&!p3*% zn}WmZWmun#C5SJ-PS1y)1Z0ETgU6H-y?DJioieVk$oj z$zjnd^r+r@>BF|gUzU}Xp1O{srXID)u5yaFLcB0eU=QO@kSYf($erQo$vt;JZXG*& zwtZqCp#oBhb(b;jZ42*^`Vv}_d(pZSrVyiYIez!u=a{erjz3x_<1-XT6yn9@Jek$k z8T4TD3`|@%PjZ|~X;Bmpr~}e5wdlP~K#y&W@_>bbl$I^fzL5tIA=W!eGxe?3sNoXmI@m|j|uqx}%f>^^E#$v2Uul)FLDdXuDY2{x2j!EcJbuT+nxJ)88i-Y@& zuX1qCbUdAce+QgY?b4{^>IuYYY`&nAnqO3H0W*3_%K1_U!+)8{WX56MXh z^_+9HEsMz?IAq7da=U)Vx6~e5M|t^;s(6Q@7q*#8J6+a^X$4Vz^FLhOR2&Q(l;U0R z!oWl#VxbQ1+!uIrUS{(J$=5Ebvqbh@eD9vxor~52-@q;3mijUCD(BBQi$>CT$F&OF z4HyE4-qU1s$etwCk-D4Oz`k2nTG9jgfahAk1@D4 z?@>9c^*@5aU~Gl)+P8@$GYGvy*?ia`$l>W?e6*B( zS(vq#MbxELMuTKjgc=UXS&qXQmVsy!f!?E!g9bQhvlm-h_wzb1SBRA>;LzJ(BCxh5 zd3VgO-w_oA~vQ-yM*EkD0(Z zfz#f;F;Y$D`>$;qYT`eN`I>w4PN)egJ>fQ?PnP%8=bG<+x%P75Mq;nHX_f1!k_~=x zF+SH%cChVpyvxTQ&?ok#_aDqlzFGk4blwM=+wz`lh!zYTgWL1STn)}(tbs!H>?5*V zM_6_J^tU>N==Bacc33Yr`01`q?460qb@}X`)e^*0x>c&bKiUU*?k}inyjHf<;E8h4 zAHmFjcILs%9ZZk1=HzQ$R9kA;Kj|$ zoy9i)-N9xg8<>RG+?~BjBPIMgBo#4g(;pt_Px!);q}{-RTBqK?Z%Z zI~bWuCX~u*jTP1EvmoSQ9>Bf#Y5mjgHD$P-pJ8v$*0@d&F+0g5Mn#`^P}`_vcD={5 zNcV$AXAyYotAm>gj|$V8>$ZG`@w`RI!@#Wj^D!R-9pbVEnAfP+TIHp%S?RSOI(-A) zdOrC<5Z5bD^~HvmO?%Yx;;j;rMy`*qGG8>0Rju=t7aC*ND^MsO0AxZvM5sz2-Hyqf zxE)*yBR==Dv-K*NP0}sS5W2--Ha4d-Gg!3l2}W5|w=UC1?b1+AXTAZ;ES6WVdCn&u zyJJ_R{c0jel2#T5YQ{7uL>rTVT#M50h((8mV<%&rUMniaUn8na#JFiHVNISTS$x_} zze5A;%hp_p#~OeqQPQ@#N_NJ!@B_gKl@zzTVA9d2@38z=(P`Ia{)K#DvuvT^=2%sQ zyF$l(LoyCdv-x2~^ws_RR%b)%>P&=Ghs)rA2D9Fv&NOXaM;(>C?Jco zuF;{!C?D*2VZm-t$K|)?Bi05VH*H0U+G|hr2A*`=rGWeyFs*rgO{xRqKEN6wm}&XS zv8|L*Fw?d%O=En7R(o^nb-V#j|1CH;)r~2KtwN)VZKqb6zH#X>Ro*2GtITUw>|x?t z73hd=P0B_ouRY6ybF%N#`|rD$3-8v~{R1P@^+bEdvy&_u8dv67G=k(=vnHNx&t+dxhEtY>)&MxhBPO>K0swG1kiz&6K&}RPo=7*2zSay{G%VWSln|T|FWVr zNB6gFNb?q3Q2Cz!r%u?_kF~nFDo?!}O9VtA;#N6}=C=Zi`s8D+A&c6HJJWaeFA zPs=!TgmEl4HII-#Y1+S5>oCCSi*LNZl$NB>cFj@uffIuvhKo#SFVWRN4k&+bL763s zVU{a`v7HOZfjxb~P*}T5eu~M!mPD2*G{COD0cOF)wO42lBbC3j`V)}g2NgZvAd7lK zW^?~bBC7Y1<0fDFcKws5)ncFOoN)Wvw{@RI5(V%Lg3P2q-(F6i_G;i}kZQz(D`^rEHNlSc-+padq*)!5gZAGWaSU;L2wn5bcZx zYT}LF?HuMdeCn{QcHzn3v@3=PhNRIKQfvKXzEX(X2MKphKBq|;)q0;wOO3k}b%kHi zv;^Jlk{Av!sK!p_dJp}T^yrgw5L?GJxIRtB$!1#gMem*wMiD8X<_9xZb8hG#d2+Ty zPY08+U<8z`v?Vv}4&#g)ZSQe?eyUiiX0~gNMGW^Uvp>KA37hI6c&vKV{_3 z;F)9iA&k%Nj(ng;GI0ld$}n=LcDU#rWfnN|nRoPS^KQE^8VSbQRT}2Fe17x67@564 z;?_01UIykzJp7F{!Ty0;t1fDvkj(4MR|TIxGbm2jB%`XPz>7ZWU*XeiqPc-f{jUx z^}s!|ngMt$YHnI2I$!SKBG0T}PDDeulCQg;E!#?5(TX_wN%bOUpjjte*`8w{RE%hj z5OH#1g%ZM4)ZjnVo$6kC?Wcy3H`Q^C5ILn(XBR+_Iuj3IWbTxs>;s-Bq}=lj zc3-tcg@FVk=yp&QWUJ>;|LCZ|VSQ(Z8rZ6$$@ z?;CUM33um_lkR-2&pmXRTi3zR&*{$gv>C;_Wjev#@K@SDMLA}w7!Ttu{9la-YV>|a zAZAoaZD@PDfeZNU{&eb!{kr;$qm)yyANOpVCGS@e?x7?oS#GW$ zL7}Vmz;h3&$>z@4eRLU|WvY;$y(fB@ukHiz?vmMnVLCZBEIb=ERMKy@AKmeYCP5>! zOK`GK=^+qo#E6*|bL}M#oCTOp&4JR6BGCdzU#otP9>W%8lsDw>4!(lguiQEq(ry2m z?&Gt!Hx*`r3T=nk!Vfdubb7(}*%%2R)wip8Te_+HAiFHXrr-}E+Ife~vU_L$U^_nZ z2fSr-{R?=I-cWZmH0aCLIEpfFQaz&%%@f4zpP`26oJFN2G~}rW@L=fgcT#V-5wS3ERQWEUI{ z{9gQEJ^AY#@QA1&E75Z3NeJSiOz3@5MCx3^;=Ia=R{D<|%e1Tnfc^T}ltt4iGLQ}e zn&MWD9yny1?E4(&JZ^&=L;#;P$b`u?7GJZ;o9D%pR}RcyS3i~K)haX^`_p5r+ zg-GtxKbA!<9J$EBnd4@!KGC<-SCgZuS|}e}SFxj(zj{%l&s1C?U)&z92V2q&{&JEW zf4Pt3#eTWn82GL=BMhYR{c#`$xbfXOIm^#b5sElhp01Wc-rTUCJMJdBGSI}by=Dsd!zmKR*B^k%qJna${ zC#xx(Lba8mXf!-rprqlPcjS*Mg6e|ySJ&L(0wzuvTF?<#~+D7+#-OjZqerB_ni#M^z!yFclge$_l(R{it6ma2g9AlC1W)vmSNZ**1SJ# zTnFq#;^7q(^1&Z_2&CEII_gQA?%u_uw#q@{sjzE55Q7)U^Cu#QR<=8)ab8BD3!&kU zS??e$mo2bQ@CQ!$6iZcZ3VHsQbjt1oCOLKh+gU@sga$h|tDrhNZ?21NZKB>oxL3i+Gar0QY4f9qTktS{W~*#B#Z*-jx!ZiP_r(IJU@!quQU!%1r6~ zGeIKDdo3ySeai^Rl-BWOBGI-HN4K8cnB88iUt(s+MyDnnGuK20biZ#Og1NLK#t2Ot zON*1>_22_D7RHA453|V@VQ5iEnCJb-`Mj9jm{_Bi^QyN8uLD!Oa_jo4m^kG}_oS1Q zj?dh*s`D>LjD3p)@5s2R{f7vJb*0t)9mK{DWI4o=L}N|1bX7qd)=F3Q-(e7l!mg(2 zrS#W7Gh@~}bdd!sMLmvfhQwI4ssTdr;2W3=*bABVt)&0gSr^}>qKaAPpKQ&`yQnPxjB|Del)Hn|(z+Iq)FbPm3w=Ln9? z9|m_z6{2RP-~#(KFLpoK3p5I9nuB|ruZ8*habQ29j52~}lKX1#Mn^f`6OW9J@vIek zQ;S$l5i(Rhj3Wa&zeGfN8^%Xj5f{dS$}YAy0FC1vx&^MRHt&I?GKOG>dAalFZiTpu z56;f>ijMLkbzyLDu8sgRVz|7Q4R zKpV5v<4A6BOS}sWlvE-A$*>f#j)b*MVyY}4GuJM#X}Qe5|6q%(i*pX!H^u)t#Pk7T zBLr}|ZZ78=#)2I8-|%&-egDJaExO`22SbI6N)b@ErKAvrXq=qpKr?u(;A*boL^m8x zpGnh_%FU?o`;8@gme-C_PwudrAC7{Wk3&ZD5_g=Vgq9hoE3IT}l{ zybc#-xD7FnRz1cUhWMbpOX!2)z@POo(<2>jw=Kv1)vq#UHuuW7tX~}Ox`n|CM+e3I{)yPcl27;9|LKZ-3l zh1opkvN|i>AT>}&1kFm|R~#l}zCEl$`Ot9*<3_XoFy?lV`F>7ShrZW>T&)}O2On3P z2R@*>rBV=)e@{kbJp|dMG9rDF+uIW>p~10**5c6BAus<@wYmaEXJ$;z-=K`M5#0yb zV+gvkppt7l1z5N}g#^xm+mQB)g2-aNB_F1{--G#CZ0L-E{QRcytTz8Ln1y)$7+u)L zu@8N|(ngvY&P(_A3>xEv z^D!YBs6VU9G#rZ@v+e4~kGs!USFAW@J*^*ec4gHRBJRr;5V4zYcLxaj_+s8rPgY5_ zf0D8lN8)988E(wYiu_G$GUdyUBhf#5w{Kr_nYnX4V?z9pf&cuzSHHU$4QKd<*Y3o= zQc*rVh(ZC(;XkMEw$n6%_U@i`Gbi>}f~-5$;o+QMiK~IC4Z{!Bk`{|@haBm6_Z!E` zND+Hc)vJ399QCT;$P$ZS-=wZ&b7TkKM#ypuvf_Dx{cYU#a$nJYrtxV858?*8m18jb z>OI<7Rhcmwf1rLB+i83;tc-V^CTNMMr39#blI*Gj3U(V@YbI$j{iiToHj2$6teMJ zdoWuWF9<{COBnm+dT#b6|EN z1K9=N!*Ac2(3DOat33E};cwwcGg^dVa}@5kkQ(=hiMGsm4YD}T+KwZ(O%~FC2|4xA ztzKdZ+$`tatp8y<#dTbro4GjY2I(}lM8hObPxq{Z5n0#2Am|R&FG~*r;R$o(vp^3> z+;x*X#vARiCbEJ<5%rvs=>52mFqz8l-LhrGsJ71Lwh8r)jEr%I7Fg3`5B|DSyu|2xp?h94jVgS1^!Z6XlAG;(Wa-`2XrK0a({8GRXiGTHGubTsno z$g}xWfd@&Eu$c(*1qn63brq`Y25XjjIcs)St2Ike0+P`FN~P-lZDmD7HChc-rMh6( zh0K0&KFQt1M)9DFZRGYz#lX0jSH!gHL=<8JO;h`!0ABcI1%iA=1K4|3@dfd|_zKCm z*X_d3X{7PdBq(!SIX^+Kvi@7Mo==~UfK}=}5(gEfWu(wNj8=G16J``PJL6gPh_U|b zPzZ9kC^>ok^7KTem1PZo^y%c3`*;88Ob0cz&7@|P+=3U6*iAl;ySWf11RDq)$*{VB9fv5Rk4^LS0Un&N`W*)Dty4uh`>2w{^KoH{wiKu?wX1iW$b=jY-Uj&Q6nxc|r; zAMJw(dG0RsbzQFG{28OFu_q^D?Wd^ze0(vo(R(<2p^6)-z@_BU5rZ>-{#e7!vWIkH>^H6xpw9aFkB`UNZ|1F9t{3Jr&~YX!o`eEV?%%R0gkiRI9id+ z7aBn9Y8fxJ`!M$!2i|LZa+Ri6+;wuYOLlr?oIyADsA5S)M5-og%Zk@EP`V}!J7zC@ z$6Y02)g5Er9j0pFlk@A1Ulc$LZ*Nn|EnI~wCvjUDaDlOF$RIR3@m7i=i|u76M!D+p_+v=CUsDXanYVc3d$IrOOaH9t%M}>RlV|rL;KD}dVYus@ORQb-F^=14WTioX*VvjS z5;B7}Z~nAp39+1@Ex;$vw~WP(?;1!u{ouBT_>gCLe#J=ULZ^tCyB9!INdBGrGxnp%sv znzqH1wiz`P^Kaozd5M{`Cnq|z9>#~G?{!c&;v+V0I4 zT9HyniVzzn98hEpd9V=wIXM|#{h%GsyM{#%jy&IpLicd<6zlR2PC>|LS@bU%aB}Nd zbt4HCp#1M~qZ65P`MoGBV-cJ(+vlyS^8A-B%~)6mhd^FO5|Is42;f8qdLBq}vTExh zaSStG_<_6P@JbU*`Kr{zRN!)e`MhGs4W^Kl^y;wkz1l<|16gI>&3O^wZRsPj3o&i)q_+?uHn|u>=fb#PWRIV>UvDO zcudL*&;^R6hkxB2f)+PHirxMVQ~jTf!T(6b0g#aYzi^j-f>oUPo+-fkU)f}U9~Vg8 zJM$I3i_goy@c-By45}DEFrLbWP&KOjdgq#-Rc$2+FZVXLIhMA08Sdw9N0H2EeC=?V zFR9=ff{^M>vK{BT$gW_HR{7DL@Qqz@^{2Yum)zD$nr|RptKN)G~Fsf`Q zm*qQ*vj#9<51erAz^Sg^`8u5NVkruE!N>$J;E#?n@#1{F&d&NUX(2hLQG4mP0|g`H zEE(1HB9!l5|Ev`IpsWo0Y%bpgXqZ)+IkTl|rQfXNANsx8J6JX&6~UmbfKbPLm6V)} z%uMC_^t@^%){I7j?<9owjxB2_DPkiOSC1FZVxKR&aE^S;lLZywoBUgTb!i^Qb;C6B z8&Uctn}yUzjS+hdp(`l+YPA4IeB;w6Q|nYlcux3iJniXN4mq+zbx#}T#L+4!1&m_% zH2Hr|8d!3C1~c_C<2uAW7UZO`nRc}a^YvJ~C9=~f{L1bsob$3V?D(%kr&w9E(zfuy zSd->hjkB84!+=#Y$=o)Zo|aR*F^=L(84^N*+o%7TnN&5W#FJPiiWLeEx|AR7niy6# zPeN{#a76x%kmZ3jSQ|(ImB0cJt{2CRy3|>)YNW|ID45?ov;TY%tg5ep^F38-n`%ED zIfri+`#zmLJq90;m8bTyoUSs-CvNX&FTh1set)`fR{pAz-@2~lfLoTivUX?ZvV1Tm z!dt>3t&pBjO3qNmRKmVf1NG%g@b*)F)`xJSv@ZEa+g&%nny3hUm{u1rx&|4F8L}>a^kJJGogWSMinNxP42uakoCD`l zs|$?s-_*qTj5kKf3p{vJxLJ&*S#q1=xYXJ8D`}p#wyRfu(i{&Q!n5}vqmaitFE{dz zfJ#aNdha~gZ`-ZY2(4rsn12I4FB$Jf+7BuJpIx{AtL+BHz*6bHSu)kp2!NSdy|Vou zwhg8KR!&qyJ?22Uj?K*!+UneXp}x751h;U($^S8V%GtN#LdKUbvrvm$owa!kCcfS% zu$jbrNn}AaLA)aHr~;B}c9#=AocFryU;;R%(AGlnQ@!^Ep_MVtdk$H55C46_0cfK( zLP)RIex?#vM$6l#JKO#_)H{ccs~-YnQ;89jwvjl0;iRxP*zCLeg%PKFQDu*ymYvc4 zO{qQ?8U0}>*>*G#C^{R}l5O8o-8WY}onhVVyafSTHCDr&EVGo`Ghw4wu8MBc>IU_0WkLrM{+8K{IXFp- z*&0@MwT_b$$Gp~PB?c0Bk^SpskAi6pU~yo^4PQ!o?AeTbw3H>}r=9SjLuxpway^L4 zgg70d@oMJw7(SG=<0!h{U-mJlDi)Lna&dfN9mM9k+!QnkTj@ zzM&dBZ;cC{&$ex93pcvTOj(42%7;miN~JD}eXoMjsKL97PW0!;YitT4jfD0Kjf6qz zcG(;Zh8IC-MS#oay3xeKkuj%R4eSyEA+xEyy_u@MfbZ9@Q71Q{-`?=yg){f%>^V;> z!aepNh^mTjSwK^gwDGzsyZIls!PBgm*F@OrI1g=Vbg|7%t{?jF^}GuX0@j(2YTa&D z%(8Z=$z`7OrV81t-&V*cHY3i1V|ZJmF+n0xWXTRBx{M`zx@(M-51wqDiI8Pr6KvWj zU2?pmNIer8xr$f*!9(xIB%+mO=f+mjsEoY9^9t=e8%FybV zC_w`O`H~v7m+~29%$lRCz6y}Ex)s@LDq}d*1&eAl$Tu z#LXM|LfO&XG=k%qBU&GL`YbuRGcFuhm#YH8{40zj@-P-Y338|((XrH-H^>8 zBR`l~_VZAnl(03@t-uP#`+^YF$B&)*}w4b1ouCn-=CwHghuD|Emg`xnsnzrD@N3ic+K z&ckw6v;nx_febc}zSn!wcbiVDn#WIx%yB5{Te+}4?YzqziBi8y`-QfvcWOK1mwLDh zmg6OHa~z5E5~QuKH9>$)If&2(LwbE7fy8scqEDWR)BKZ+QOU#eqgNprSG7|*y)RQ7 zsPy{I*Hh6P^yPuFjw_aZ4pjZtJp;AhUCY#fhxCJSm@2Hje#SRBd5>Gju5JkSVwxKr z=VhTqS8P^xuaK%-Bi;dj)R1nV%Wwa&YyDYcZNd1lw~t+V{WA$s093I`gbOsSlb!ZD)3wRmInIwP42;x+T)5TpSQ8DXgcN5Im&2d{V{Ji&z9=0O0r`yRw2S>`$<@`o zAEB@~mfw(ZvoD@_sq_uIH6T06*Os6Fb`KG=R^rOGM4bVHPiaiTnf4!!08bI8R-#K21_O0&4LSSn0Yu~XDmlK&6C{q^A9;kc00 z6eR1F&^h~@On%~Q*)1oh6-q03i61%5`9jkpd|ntjv7Di; zn<(}OHm$AwQqg3hvzVavfKo_GTsUo{=v|+h7$AE&Ev;-YMdxwn$QBTj=IH&A@)J=} zf&5rFk%j_)p);Jke4=@kua=yY^qmQh1ZT>|=-NDc%2N+8tj1rZFh&x%uP&&XY>{Yh zwqM>_wvIPmYtNl9ojgr35E}7yBF4l@JaMg{R!E7hsQ5i0D2=Ci95V+z_<@1=%W&na zlr*$Nj(vyC&;<+3;(wfr1gq={-3~SP1u6eT0@z@FHyl=%DF1OB2gdwEoWIZ9J~*~R zH0)1VF#;~~BGJAY;n!ym+|0N*9uT5CsiM);+(nstKFv+5OZvGFIR3?Ql7hKT4;RBs z{E5yi(f#__O$e9Fgtkl`#JVbj83>obUx#$@>IawL;7##~YzRJL9t(gN7CbiL;JDqM zrix7^S-8s(qBM|jxu`c^_|oY+Vgmn}8Yu#`iTd8p|3tM;rzVbpxe4k3`KR7IoUb%u zyz^0xBgKOsMDJ`Kh~78e`T7(P0p_Z~X7*qoUJmS?(URswk}{x!c)s^M@CQONt2%a0 zv9aCKu0Lxr`5DobWjn<;?dnLde*WWL4D7w?;~F*FDQ_&w@3MVd2cPk6r+-Hau*>o} zlQSJtKL0(ZUFJ3O>ZiV?>Q0L@CsbYC9)ZK4CAqo|>3Y}gDJ;9lYzw=1Kby6o4)^(w zqkp|!Z2U|2qw><3k7~yfS@Ud*G+*ZGt!Q0)|BnjtRyRs>zIf;=&Xm3{#)G*3dcZF~ zSr7ddh&TR?@3|{Kn6M?z_8AC20(4v@&UEFLhDY{=Hup$VC;Ry{NvJ1%$_;D1X9Uq* z{vUXQZ8GwoEM?`+{8DU4Z_DiE9-mj|)Hf)|Md#N}nU|hpu@CKM6eMkPa!TraEo_$J z@A*0gMB*1`FL16LkMVykp)bBSsC2ekN(b9IfO5)%**MtQm)|rMIX1TdD4aDP)|oN& zS$x_iz=Lo)b5#a6l-kDzSq(`Uf9VIi6o&cl*4q@*2WAf z8lSBPWy2gAP|t9-gxEDTQL}q&S+u`awVq6W8}Ik_ zKhpI2f!c$y^-4eVm5faOErX3;hc+qe(^I#}`#{@z`wsfj!6jh`diz-BB)k$r%E)(w zSn_ANJhSPD8ThF=%fBtlaQ8b?Skf((bmr)sty<>g23Ebx7Z^Yw_DXyCYZgnkVLB4R zB`^L}3 z3e27_FOS<_S?$RiG)Mpi!`@}@!`S(tUXkj!V7@2M%?y+K_vGiDP`8YVLid}$U-Xp% z;cZ5WAlYJm`m8_y6az`GpF4$ zxM40H^^`3cqcyO83 z`=GK>zlIk=Anu<8#DRHO5qJ#Sl+VBQc=_NtxU$c+fUh|~AN>8VL*I_o90>n9v>`x| zt3waGBR|?q>@wi2lVjoCUf`L>iAL+AZrETuN->3$Fu^2?F;+ns^8dltmxr^per-Fa zgSLv6qNSxRswi4xso`|uppn)rh8$Hxq>-9xwoh9%7A-{^Ls~>kq2^g?t}1FKK_rTr z6GH?^`t9?+zw5hxzwf=??|;{hy`N`2&%N%o?t7_{7d7i)h%!OG>G67_d;b)*6Pg!ZpKnWi7Gv1sF8yBPOHw=V(BA;_uIZ(%0fs4Z+gDPD9uTkbMZ@54;yBDIF%AgTBP@lB5B+#?KV(W^woU63>$sES zlBCSg6&LASm}%)k+i=_Pyigz)+=u0y3VVkMsqxbN`;G(8l9 z0rEiI-#NE2njQ|%Gz~Pk#ivlbV-q~n3>6m?g6fJ7a0k==z?y*C(1#3>>Y|{hzpd}d zkD8@8gR{hi7UN3zJjN`!9U=HZ48w5Z(WiYMS(3epQTkae$w$>xc4TecMo0SxalhaG zF|d_g|J-Ci9jNDIgIPmYTQ0-qg$rP-YnIP}6y19~E@Q~$)^cHEz#Au}k0KUk&SR3&AwRSZHmiuO~> z3A6@kmN54E@AbN(T?c^TN3GC4C!(@)WmX}g^p89 z#0yvnBm&|0zs=QzQowZN68JU z|3qt`dEy_E6tO3kNG7(9o};2!mSUO&zeR84!DLeJL5KtgF6h2HefPyTT(Uc>IMz19 z3|1!G_V60;R=>Q!{M*#!UaGv657R7nGHB2=b7ENa6!gBfe-Qv3R3BKxC}=+CZ` zMuPU^^~o&-wIM-wjWTa3I5cb&k$Tb>;=r!ZPgTdqf$M6i z(+G20ldwb2*9@uWhq@@ut5FUYvGCHR9g zX`?c^`ADtu4Ihr+(tL)>-lWi>DnoGf+7?>iPl25yw!0-tUiRcDRCn&^~g!q898*Uc+| zil#J+jZh2xmpzLK2Z!5*x0(AgwL?GZp4;bNd^6`rX9skq;4fRvPJS&Gl@hT?3S`qB zEabc-SifDpBH=Jx*ec?z=%n`iElTFq5*>fsA~r=|q-U{*^gT-b>boP&u#|$K477Lw z>B-xVvXwcPGLyaZ?`DUdVwsnFyu$JmfkzGL$%lHsp|V*PZK8=G7d};`ukLA5UezSQ z+CSi?x>CoQ-LOGkk&pIO9j7R3zwo}%HR z^g;~5r|JW%Ox?%U?S^T*+}Gip`ug~ik14Yr(v4YIDMlN_;DoOsrUH7k4~AI7 z1e&U|yB(xki>zAhJdtjn?7+D(?Qra*>evjrrB2GYHl1p)5)YsXmLP7O~SO2AJs5kHf;pxWwsCihJ=yfagp)~pw0`og3`Ppf8_15&TW`B7F2dk#dS;69W?uZo z@d%oIMD^?UB;NyXUFP3E<zfO!94?IdVu0%bGi*zSW1w%8v^BCcv5-Us@N3}!5iXcO4*n6rGO3Q|BR|<{~1(ecrMUO zsQ|-AgK`wP$GbwNO7SlT8zhAC{xedzG%EDup}s@_IjeNc9(8LvMCoD37jV9gwJokX z()i7=I5G)+wK$h+ex#Xsq}aW}t5ul&}I*oiJraX(If3-^4_^$`B7hZV4)kGHBGiyZTPDGsF671jsF zW|XME#hv;HsgajGXKsVq;0n(Q3e0C1#eSjJK~6uscFMyAIgD2r&fgfcy1KPnS7dTB z*|+F)ey17CF}F%zlXYW8DpvnQ6XP5uTK@?4^gkU~ML{*ut zx~?_PEFdd6XW?RfFQ z9>ykKr}9Z4DKOaI+3cGClXYRI2*#<5?85IaVTer=U1A1(orj@s0YAGbAcae8oj(&= zP8xQ3H}}2Zm=&W|;a%)ock|*D-8)algXwLSCR+?)oSw^>oY5*v1`yt)w^k#!(w##_ z7P&H|PEk5$i4*A-v8|3aR6+Iyrp;(~0OS2*^&Ss@6x4(vnfYNFfjTr&DejenESP}9 z@9jt#MAGi1^9dH+j`QBW>gPFB6qO53hIZ{a&doxulnLRZQV@OB{HA=m4&Em*%#Dao z+xeYXm4SzWE;l*=$V0gnmbmTL;X?CvD=Qz&(k69rPM<6&{_fpi#N}Dp5%E)$qY|?uBiNrW!h3m&*fgFa^{gErhDy0`#^)=#$ zi}wx_vICzY{8*{zQ%ggr;o%r1($6-A^?{==b0XRpsTVVWEBXT0J%s|#cH&1oujKH* zn2(n?|It2}<$SC*){-Fvx4StXsZ^@P-iWsQ z>EegxMdE9=p09hB9<*5%MZs6AT(n{wVodxc{$!Q%#x%MINwuH?43j*|0W4M@f)j*}7B%GA@F${0}RlF~Z7y2oZwEI^|GLB)y-+1Y8 z#AdWjs$@x+dDU4nVT~da4YiE!*-tEzmAi=*(xto>l*fxGw&)a{jLu&$?)LQs3Pa7x z-tE!5U88Jc?f7*>K8;Ui#oL%(>ajj>@_B)Y1}6nLdl(^m{BE#qkijGRc(A92wCJo7 zGlr&B$FQFa@3pWE?6I|9{2j)&;$Zwb2iN@hg&dEnINPJ+H20b?Q;vD{{(8DSn(vED zI(<=KEhj*B_;#F6J^H{+Z!6;&dRB*7=3@bOm_K5$qtU3m;8-Qz5P!w_*6HBB6Q#E& z`Y=F&jzt8*fn}hXUp5}q>PLOmu^(`BpvzUgV4%Q;cN>j7Iz%w5)CMV+_6HVOlElh* zBeP6fGylhVZ;CxP?EyFLSIrvO(;WW-N83efxCqhRG*i>@h9jfvB?VGD`1mhYi){Wd znYxW-UjUu+1~?@P)+0*(9&0khKv1FzgxdW5N*YudXI-*tN<9Z%Q+oYM|D<1XZ_-&wcZn`s5XCg}=_nZ#g z?tbK@EaS+cw8mSnJnOgh%a?RwtaJlt*w(~z%k|7>V$YiSYodxj(lg{aDM1WgfcbdP zUvad8!Ouy=1x_b*D*;lv1&@haY$pe(}+DUHwbi z)&3S@F@#qIP9hhKu9cdpyPLdnG0Q+YU)I%iu@(V*d9AVPnJ^#R(Ki0u3%(S7){8UI ztv#2&EK;uyKk@r%ma#nNr(n|P35+EdXn*grwv91)TM)j7mvyo&^cS%kOjSI&17`~x zQO6RSB|=SEo4KJz^vw0=+YQ76p%;dW{n9x$ZQ|c{f-Om4S5Am?K4B~Z#BC=RKea2b z;HyP<#0KDEhObG4x>!3vl@X=i6BwB$NZY8dD2>8dsI~R^=2*Y5$~g=08%dfJsv?t$ zrBodn2;!j>e4uvQE?&TjcKi~`5Onz~n0tqL^3v04C`aC9QQmhTgVwZX;Xv3#gvik5 z_HeTXg2-^`A(WD?!7RT;ix{(>koSSqoa~b4sU=JX2e#?ISIVkD2joGJp%sU)tJZ1y zws(`J9@hV<{1;o71AN)2WZh+420LpFR{3X58}?5VE)4mHlpYRMPu=)(8$UTdHGKs_ z^>w*K$mc5vUA0=}AGYpQ%8-z*Zw&gZS;CbF{X;V~HCUxfMTF(Ur%ecrR;L^aRZJaK z2U}uRFpbvLF*Cn-R#-#rZRUg7&8Z=# zMLn1Oj#%7lsL~oU`+Vj^s2D2-nQOZ^3RG&vKQ3LZTo*#v=&a(O>z``LcN+Jsfmw3J zs{_Ych%Ps00tr>MTc%Mp*!%Uh49$X|!s)3$0TG7-+2x=KrYZ$8ys8X%NQpa9oQynD z#p76N)O$vTAxPjv%a!Wt?is8yqcnHK4@oAj8a0uFs3?OKz_nu7zenlM-<8xrM4+T- zKD7RjwMy$rmp~4MGwkiw@t}SVjbS&wt>%Swrstye4~Oug=J{j$oZ&tt^~$H zZ?&m0wW5tf(2IDjF)aRQQ$ad-T;BfBD-KgAo?&~=+t%OFbVob#b+ef8mod_yF3EG( zJW=$b?l?wr4#AxjW{a1k5QBi1EP{2K#$KrP7_u4tj(s#cUbsD#wD77~f_;{5l{~qu zRqs?Z14J8cw%62ew_9TZGEwl+AR1h-;5Iy#>;?KW&|Hz8Up$!pd~T<~PbYAarYPhWPwxnDcAfjeCy}jX^ou9+ z!pazXw-cO-<3QXk{?X`-I8UD%c<~3r6UtoW>kfl=Z0NrQ8}iVIEW2S)v|REyV0P8s z?)2@(;%LL|<(A%k1SqbYGgzx#EoXr|mw+4>XSy&qFg@6I5)yKqyCA_1)na$_aGxW9 zVY?qNqvwrCU=C3v*U}&PPz`w{O>@sEK_mGTO16^!?A~M0Z^zzl{%y>i)K70vx0!W*lV%jerjl@c@6%)8nnKYm;%+{(K`tbaF2O2Sp zvN5Zy)C?fYCjEIhr{MEnWdmNi`*$CBn(uQ?=GIi!@1yGaKq^k_GMFm+0lS0$zPh~- zmD{XRDmhH+4!x@leHbBnRJb|^j{r_ILUQdrqP@0FA<7T~lcDWv0!4g_qB;tIK}M+l zp&vS#@eO)}Ov2Frx*eDXm5VpCZ#&aCo9`m{K*+LkBTvn2^m}v^`^xW+)Esii?I)yE z{XLTX{>J^U4=HBXfoMDEb%J4>(D)0Z@5jRH!aR z1I=tK2UhH&8cYwIwoKbUEd3{RUo-JagDL428JG$yE^Ltnb^owxX})5Hd4|nCL?zY$ zmZrKfmzFdBYnaeLb@#9^HL3FD{8v{PXx??zHKG+Vq%h?8aeB}Mg1q2XXvN>-E?*ad`V zHJ<|OqBJB#NleY;En#eMjjY(D3CnT&L0p(y zCsx94>?)Vq?{HBLh)K`jLpUI>w8&{Zyn9c!YI}7wy_Dn--kYVX3PBVsev)m%#;$=% z`%0$?DPbo^NJz+~OqX2B&EIDqJezIOI-H8$F~!rdj*oXjmX}F8hr$eEaG4yud;NfH z9tot5F+ZCMO=lc!%+u};bgkfyTkkceh?HRS!@udP)}@Y!FLfyU0aLeVra!khm};!Q zvkTsneiPRzQ!D&Onz1Kp+)Ha{!imY12=j-cAvF^XhYOIapxJekF|eq&HeJeSL@YI5 zX8~wJPe2ic`)!q`1hpH(jaB7wG8)m|v>>)40007SRI9o-lOI5N?H41tzj*9)a3kwX zFI=d&<2ld_z`1rk*rs9LE4d+=+}v1A32|Ct#<-(SF^6_yC!3xoj1keMi}F?H-0zO5 zH}LhK44BppS#BehloOLnQWeAmig{&o11lNejaJ^`C@|J?O)ob}7dTGC`wM#YN(yF= zRGo7J)wX**_31++uiOOgt$981in_fJy*FTxUHB z!O)BcmEchQ^nHWCs5_Bg$wmV!KUn@DxznBj-ghv>NbW^&4#3YU z@t5CH>(BKz*2}=cIIq&S1|bt6%C4pKX^ay;yf6tJFy{pi(j0797+q$?P9<#nHxYKzSkYb~7E@#;?-07nYi;aoN8JDhC~w8O>@l zM|sPRslXxi)n#FJInC8o&#A`=xrp4RpDk5YA5v1e!4yZGWnA&?>}&KPm?+~sBu z)vSP}%|lmH$NsDZNd4{K^U9oMS2NP2z;Uv*(r=XFvmH7I>rTpkQ=PthHnn5)uVtN8 zPJ4p^P62H4ca(pq^11flCstz7F!FgVij?DDXTB^max81R9*NA^xc&qV#8xn$(!Lq3 zBld0O+TV|Tu2TWLbH^$Z(w1>6P8FP#f9MC;t4(|GYOCt33-k8+mL|#ss*F193ugRI zRum81RO}YM*n8B~-#pv4`?KFvKZt@2J_CCfJ{hngo>1|7@iZbw1jNP+**@Z~9r(`2 zgTOZe-!mth0RUW?{?20tG&z>wFP=M^yr7wB!dby>i4I^-VHp&tnPvhT<%z8-&!noy z+9mp<>mfUw2Yq}s9n-Ji}H#` zRVOuQF>@i-FPEc1-`vKWeM!sO15woDV^{F$gSJ4Qz?Vl$GuM0zOpjV)N6y_f^9l@w zR^s4E2t&S9UG_i_^q4pI#{Nxf8hg~d#YOg6+DL7bhRJozwO5Q21>4aU^Re+T-DG^P z#8(@y(X`@!tW%%H#g~T=-=}D!$W*XNFY?vCB z;EW#;yS@~}bwhp3Ot``PSgcyw;U&cq`ESyfJ4SFTdC55%+P4y`t>kMCnR{X4VV-Jl zONcALKB|iIF^-we`S8~eWTc#O1zWij4|uIhe-gK94=DApR=n;;Ot#xs8p(gxg5r-N zDdpMCU+cFk6>e0vEcAT>J=RwqF{SKu`0L_OicR#3yh0~`oVY3fsjneH1i zp_PcfzG70o}X ze3&*Oa>Ee9<@EJWYwbKd;|c)U($qqM;CNL5#dB2;L6a&fr`7D!~`7;}Wirxl32 zpi31Aef|xv9UUN#ze8j$k&pU&l?Q#-(Q*@RCmx|%nX1irN$yp_tQ4HNxE5{2BDw7- zGXZeKQ|C3yqMJB`_FYpdq9^@ii~a?Z%ul*DnEgP@2xORhjIV(k(FedOL)``3ek>eS z{FyRVKZ5Hdb9rV{+-{0sT z-G?3xECEl8n$LQ?ot-;f;*;ho=}mtw zV={rr9KXHDQoJtdbx2Q9t(TY9U3j1NNa$f{9a{Z+H{nA@zeUsTSxYOo*!VrU+UB}0 z2F?=^SC^-K8b%nt6t9#I5jW4&kvBY1dCGz4(Ifxxr)!;vu9x&l8!;ofk49%=atj~g z;9}yZML>(|3Yz>D_;Q&j$bNv%4|LyGV*>P1!7XY0Nu{pOgolm~`AmuY9`$Y+2#(uo`tpU!c_nnptzseD9nYbH4X!97Ja~?m@T8e?J*4jO)F45A?GGKrG8MlFPzi%w{mg1nh#FVS-=;$?Vq7mS5p>S4K_4@Q*)<(@sP3WJ(^u&+}SbJ#Z8|3zYzRrkd@qs)?RlDNZ_?*E+wp!EQ zNz;q%Oq(kfP@p_4$uJVFNt+(RCc(Fkz6GfM2HX(wO%>3+CIO7URy$zxf-U<}O%`oU zv2+66=Hxj2hPr>FD7q3NOOPZ@`CrH0tBzp*{@k{t(co04Vn{?)MJ#iAt@nIW+=`pT zfj3~PbqU8Wt`7lc!w#DltOh$?!b&9M1XRVr>LCPwX0B0QSwG!y zX!XO8**;h$7V46D4PUa*{`VyX{$ouOZ?R-S8K$lAwv;GaWJsOg%4_>_P8^hFheNvU zwQ^f$A@ynZn-Zy_uP?<}4Xu|DRJP*s(Mqvuxr0;Bj(_zrXu?3;a z^8pBY4t3JI?YRDl;Zc(dv zjL-^z`uod86c6$RqCR2}IAs3s7q4xKi4YVmn1fkTih0ci@rim@9?3m0$rETi|MAd! zYoE9H%boHTnUmFkn}Sn#uVINQnoOPR(bdfn&56dz`;-KCK7L;G=Oo4t z5%-_+LNvy{ps+w2cof^R%*ky7Yzs!!_I6Fgn%_~4UMJABdCC*k(K~E|rg03B5Gk`! z3!D4Niwu|ti5rh2Ee0gZrVvUB(0QIraXZpvKN1{(vlA=_7?yHIlNFYB=1lF$j$&Y* zRnkb7x*k~33P|Z5qOKmE(7(nGW)TQ-aa+B}b|8)WJIkFeE)j2P)LdROsNAR*h7WAp zmoO@$hc)gY8bzm%*S+Y(1QAnz@mP#hx!0v~HoXf@HSKn9URo6oK_QKCUkQ~cWy04v z{86p7TME50b9(E|X`!QBwExQ6$M1iD(>WP5lu;|jasK^2;x-;}GEjNlyte5(_xJ=1 z2#nm25|$vi7=k7RYBWt^X*K~Hy)ams46PpW+b2leiB}2r_twNBY2?dI5RoDTa`9kR z4Wde3zj|=2DkHNl2V&R{vk5478X!ZM6{NkIBG%(wmn~&1W1%ZpG@i=QK~MBEV;Hze zYo_=WQdM)}48MW(u(JC86!hjWpbIU9El=yvnaQ|`B@_V#9eps)qc8!G!GQ^x1 z;UCfH{7@OzOFH-K(op*j9L*>Xgo!4>1()hYQ`}Ei(}^*<=BR`QKmZpImRC}vpBSc* zmFTUJ8NUjSbyQGAv4vtQMy-DiRwvj~Oq)<);ooX=n=kjEN=G?^B+^6^JJGsG=JJ6g z78sXuY7A5E7n>G<1Lf+BYOk}S%Zd9EZj`;K_0 z6k>p7-RAHXSPY(9nU-XntBK2@Z&g<;>1cc(RZ`AoxfZ{n>D^L{02}xnkIhe?1C_j1 z5lHdpGQn9N6I3;bRQnw}I6J-o9sL;u4AOe8q2@bHmQH&f{nil$zj(?*Sp7TltG9(g z0dRWwy48GU_4LoG+(jp)L1hc8!SGWoL8B4zZ35usRf!d1->t4oHcS|AIT%ww4)4>f z`JH*Vw+K=G2QzO%Fbz1G{3Wh?j_`{|-v`T<{|s!}k$a)L3?d5}1NK3<*LQe7dKldw zH2ZGh+DFXw%2LtZQ$8t&@5H*(R~vf7o2_I$&Ld78_Q;WYm{A2XX5bfld0mi;6z~M+ zf5(HYZzvRW@G~#~Rct|x?(VY`64nAdg`@oVO(9T{6iox1>$9vgmRKou+7T({@|pMk zKxw)$%aoR{P7yT14G@<*@rUf??%@w8rySAy#z$Mn|lOI6q1uAch zzeJ!LhC;LHSfoXnwKe^C^Jf1>jF(2>IN`&PTV!kB_0eKs zuxGPoQ&>>VgC5`Iz_DY9CZv<2172_S?j(9e#F-BK?@1D|f@QOKQc zXbPwafs9PjIy?Q(nw7E>}@%xs-{N%E&j+mFu}_`U@p43zuu ziNV&nS7c7<##Nj`I4~r23KZ_s=XaEyk_sUqMvw%H?C)6OVO-f*HEYwCUemz`b%1*XwA#%-c z**;O!9*)q$Jcf-H@F&EDp_oNM8h>h>sf?6Ze>$e=3WojJHY6uNgg(*5+Sc@3EE#=T zJ@$RZz~9LWTe|LH_0Vk4utjtQVs13qf5A9f*`0WXs7tXWtA0^7;u~_?x-C)K&MkV8`{i zkMyya`H@P9l1x4DmU+;`K1;yuiK%W300$1Q|8RJ=j&?7__%RYEx;a@QvS6TDij3;zV89)M#o` z$8d24tY^CUvhuz?)IiG z^Gg}k4K&^w1T@l(Q2KQQN2HFq$EymcD>~19$tJ*CmQho~LqN<@;s^V4I)dn@U^o5! zaW{*nO56L7O~xx6^_Ng|bYt>)WD6}_Bilj3@)6$^NxcE*avw}dD8}U+s^x9?{*U?= z^b#B8_Jv;px_eo2+%HCn!1iCEx}_D}20v~mfc#fkGZ^ak8C_lOQXBVYh$|5ABpF^g zIyMTncbyJ3WQEb0H;#q|fr40;vXVJsb!$S`hNdUo#-Q{g-Tm_?JKRWwslrLUrk5#G zKWG=Ht*RjWHT zr~%HUvEVfs0dEgR$K1uggsm1EP!)#-UV=tE>C`(-#soKp#%|U34_SQg7Kkyc_@fHB zl0$KWd+J+jD^qFd)|)Oru(sVulm+Y;j{*@cvt9}j>?7|3M?5L?jKCN7SV8o(4#{KX zHN(mG8L8NP56~J6{5{CjSUW&DiH|Jx%XD5RQl(fndA)={v{gk~+q4ME9LoR2g@te} zxSJDrsS7ej4=sJa(Sid)$R=a+Pb&s)Ta`C8pUXL&uK2T5W~#X9l|9R9EJ!1aWW$=u zNPJBHi<6@NTj1DDdSptmhm!~^od!{BCuOB%d%{mN%ZL8)49tQg0Ef^UjYE;~)P5mW zGAyc7C$a#IrrIk)g*_;bhVc;b1yfKfbw_+2k0Vcpy_HEBOm3)ni*r|YOb#Z#yo4cr z9eZSeP$xq=H5(qhdYrPC-{3Vxz*}>sy$W~QoHSV-)K#yccI;8Mmh*=|FS2FwX95-a zDqIVsb)BjZAAx0Y+5V{Qe&N9IeQH2V;|&pP^!60mlsHhetoLx})-W5qmXH2U){6P? zz3=apADJ^raDKdEO$7vhxC(8!<+*pmCwEvfeu}gc;Rl&s4C#54IW%$uHa5R!L&d*z zp?AI1)jm2X)@IYy!aXrmg+5O56!O$@dQT&tu(lB{ca`2;$gyk$TB{zeU9J9wvIb*QOvqKJ;?nz_@`IIl~e zgZfr1+rN18|HI`*0IgcCAvr%qUr>DYsM_aw5Fv$A-$@Zy>!fr@{?6*Z7NP z`y?2-txzsQ2lR1i>#E1GMW-*&``Zop?9^4OI_{7vfLe%V7TB_LjK2a`xXIs?WpHpjr)c<+-Dh=M!g`#93q> zYltCi64ymrHXdBxzEnLNehzt+_51R@TDsNLYJ|K}4O`a7Q$XnoP_u1VJ&?Dk34@knsBzo8EJ0WofEQ=@&jY{|Rsm+iLpekomSIYp129Kgwa7MD5 zlz`|Cc>A^>RhY&kXyMKk!kXbuTG>td)Zvf*?kX#F06KTlzL$TbH?QtPQ~VDlXQrU^ zzA#rLEZ4lp_uxsgqr^LB!|KaAs4|`-_n(^FNn+Ghk z&qL9kCp*7ej+c`h;r6?aT|LlyWhb1v4sbjEK~(*rUtvIvizc`$9qYG8xhB`mvFq0i zh+TlB!6vvN?~jLpovT0(!^77wnYe{k|o^FAj9Iq3{#7$ke?EZ_v+< zG&w=GonR}KqZFZoSBkfQGGm`sd&=t35BvN{i_cA`o$i8kgtig)EDoeNT*869Xprau z@nIl{+sJ~P<5}Qw(RvNorTlAu{m<^&nzr`OpTTbeuh2=WDQg#!Xz7OO6{_`GM(ArZZeJqd*O8wktM=cDkf+n}-Cdd#zxQeCJ^0V@u4 zIfI}2S%2t9uxvFh+tJ?MWA88)S?o|`JHZnSZmx@%j;;FY6z30MzBVmA+-RrrZ2PxD z_GyOI>n4U2TOn@!gl%Q=Vz@%xu+%peDlG3OsAeIAJfchhWXzr5N@jnq#~2TA279Dg zgO$@UEmAWQE*W}-Q zG^|`DLuZpvxJltZ==VNL;)3wpfz>Ng6VnPTjf2+^Fi0b)eF0j+RU5T*8tfLOorZH` z$({Ayrgw)gjN0EB`l~+XW#(Lxr`TY>$NJ@iVN34y!$T{ae@hvww&D z{X|ubazOtDjz@_H_%E;55+aI7x&caoL!07Q42vc+;I@C zLTIk$C!VBAoP%7hw=C-Jc|V0e0t!>7qvjhCSzjjT1{biU6|Oh)ETLZ5*wDC9ggK)`c1yhz5;IwCj+5W&Qx=H zzVR7*`^A-cR-$^qC(>7I#Fb2o)#&)g%1Xzx=T!>ACEcGs?QlhGg;T`#W~AnJq?8*? zz7T<*!kWmMvR33Tp0suyB3nd)X~a0l)Y92$g~#ko^}YA!=caoXu>`4EIDKpw$m&dx zr)_&e7(zix${#96w(V`blqN34QY?p^4Np2v<|MW?&aKCM#S$LdgVyS@Q%p)pKEMzv z*QgfLvZ3r;hgf3(;;!<(@>tws;!HjOFTa!-z|Y0fqiCp?+f6U4Q_vxBw*ZjQ{{4DkwlN`UmzTG&7ivC(s$yHWDkU8BuKyPzu zEzoWP-gZC5z=W^mI#gvZVFT`SX$Y7p0LtB74OYLQ=g_j=4t=W;u1=pp1CiBA;4qNM zW!=0nnv9`wDsq1})u<;1?e*ih6cx|d+nrHa(BHWmT#6Hxo#&L_e znE<>>xKPRY_+E139PFr<#;SnvYN^I$rP;3#xoE&0yRw1n zwr`Mq|2Q)%=(w=4?yJcSda2GUPt987j#qYh`$GM`0iw6wns^D*ure7gZObZpW8ybc z;LW=(ebkX!9{vH=I;G;5P!B)BJ~>ipuX0Y9L4&9~NOl@p9bo(N6IMIc zl1n{=@JAIyEi6Myu-y(~CjQOICcf3xzwBfi zm7$s?T8+@rt;Ki}t`}IB8hV)sIqW<)u`zuwD<4J_TsU<-QQ0o*Sc!i8QJd0RS34{| zch+c1hWGh+T+s59NwUf)!i)(lyz(Sc8QTcdZUcl$f^v`5uu=lv9i2y7e5w3FLr-s?+u1V@ z%i^l6YD0?$e(`(^7yQL@*aeJ!0^rCL{P!R5my~8z(2d#yo`%beiOserO)puH&ku#( zrZ$Od4XGv!(`4g*-FuYp8V7CZ%v)^y063qAT{xNIp++I@{fEQ z;t2Bn(5NkyfFF3)=BlpvpbdIzF0KgEnE~V+&iRTnY_gNrHbx&#Y5+IX?wTy^xB=fx z{%l#rH^o6hOyn6OKIrdD@@A@OYE~ajlohT9`%X0|sr2&oGZLA>aZZ{bY-qckhtqw- z%5;r_(HWH~Te+z>p4Prv_LjS7cd#B&l@6)BAya3UDeOL0%I8~fpUFt%_faUz0c5Y3 z1?S>b=1dk_;546C#5%nE2DC#rL4y-fz~H@s(LDIDv#_4_J%s@_t` z4U#AFsTB>CQ;RHL<(9_wTP39csi^@d&yg*V8ocbJ@WVRYulAXb$+aQ=w>N5dm8)_d zVUQtBQzap8ar@q(7X~U79hE6QDjW|tpp{#1;lxV%+;@tPN+Y> zTUBmne-K70dPi#|MzD>d+rJi>?<;E&)BX8e%294M~7HrY_I~R~=;| z>qIN^iBZJLX7HMO2(t@Jv}1@(RQ4QS1V^KeT4VHv)9xe6!1g1DZqQgYXK`$EiFvgt zsg)iTEpE>J-o3J<1E6gJBzaFI8R^;4FA|$b{(lD zvSKGRhJlI-J=d?9pPF3Gz&W$f7{!pOp#gbcHIUss`!sD2m)6lwn(nt~j?~^y4 z)>l0-O&v{}OGVk9m}?-ycoD5vh~s=rc5;XZ43G&mjU*6UIfwW(B{yUF$HA^QaTAm(ApGD8}_cp_m} zOoKx3-AZNlMM{#UaY)4wq{RC>PdW$E_dm*x;P&kn)5HNi^fBK_y<;p)EgL@pIDhhI z3^Dz$CFL*PFVAp3^H+}8gRjOnO>!^J79}BW-T2P1xY%x!6N!J;n*w8zuAkN(Y$^04 zgu5R#d(dfOf_a6O6Zc6SPauOZz=LgzsvaWMJ&_=cdPlf&wIol!bNNJrd zr8uxIIeLcK7>1cGv_9wr4X$|q288rfNA;BXIw@y^bNZBBQ+qZxw+8<)K#b6pbw<$k z6Qe1WbKKNDu^9Dkbvu6`s=`PaiwiugWP+S?&VxC$(5@>rTI3>O;bokUdf&37>1@rK zq)g?7`^n}!xYAHJdg)7Lu!wZLfkoW64dU{Hye>Fq&WSApP9IIq0gxZh*EWxA8wkjZ zVLdCVQ`^iDTXW6LRhcr$_Z7#xUB88E*l&;kXe${l-PHTEBEOAQ?)bC+W)Gsvu=5~OjR(6zaq0}Vu>`1%aGM| zmDiAye1toMsv0g;=>qzro{EyPo@0d!km?8x`|gEiot?I2GD~{QIacI(+k;`0mltaF z@my89MOj7sLvhFXqA+)NAHO&~$YpT_>5b@VLBAuJIuAWD% zKhM1`{@D0L%5e8sT2j)b)!?6yi&9b)Y1}U7`qpE-t|t#*CUEmi?Bc#r>_1` z#LXg66ImFHR zdvB)<+~Xvc1&OtAp;W`G^ap}_A7t*Fe{*m-CDd~HGyQw(ikta?S@K`NSHS+b(%btE z7i~smXB_o1^DG(7j_nUiabKrqt$-^bLBr1@8&bmPu>aH8nTJF5{SW-!vF}S`-;#+W znUEq$l8h~seMy8UMwXE*V{4L~p=+(|qO2iHGV7nboNL~`8ZV52=Mp5#=2w>o}FUZK)` z1L?A=N{qpbUZ2hMV`1~7kB{@*9Da9dh%+NNDAGwZ`@s{t;Dzmlku_C=_DzGewwbJ} zf-f$nn~ja$*L|C3mr3%+H=85uG&hTstbJnay>2)m)0$5RZFfS7$(0{ zEL${}Im;M2+>&nnD*P5t9bs5zsD1SAyQQZ7X0EEwJ`5(qnc|zAyX%u{1PN4swO6L# zcNQTh#9*^xz`>_Vf^-M<=~`x78^Pmo3Vw#`ESQveyFkOuiP1P3e7x_DB&IJ&WID=; zsq-`qf5G5Fe1=ykTdk7%vqe&?^;JGaio=g+3D=|NCZu&2#k$m<5WR=HMA+)p=RA&1 zS|}csrSonijftc3Xwws7O2g>TgcRkFYL#4^vh^ z4)`1zxg3@Oy5eH+gB)x|B|;6<$j%BJve%mY{?l8gwqGllTa(DYBtCql0<_6*tyP%& zOMMJD7PN%(xcQ}`nX$C%#{oLA+@jbYkGxHM^oj}uv{t@VA#&gJlzsDT7mNI&T&7e$ zDBImur>mGIXHNEFXsCm-!ksX&Wo%uETkE5L;lW*a8!!zVz`Gd~Q-64?ryuZzYp1=K z^1|2g20N2W$+2kjgKI0@RhMT6#R``TlwNj<*Y~ZCzfle6`=YehLS}aHR6wh!I7+8F zPe-FhmeV**zv-rCER=sEjgH0S+2CLEK>9-Dg6?IvN|l7U6RdC@tF?4qQg!AFrXK4{!RZ&JFZ zXn^FlI%|GM?$lYi#M~@9dj7yd-FAvMV&sSuE`4u3416nx#ZBSuAY?XfUoAfJrnum9 z&d#=Vnu|jqTd`G#dcG$7v$c#Z#rrc^E9jSA62dkHc(sth0ZMD($(`IwD+pz)ZsIE$^yfAy}sLUZAD)HooHsH)oSe#N*w3M+V}Qb5AO!< z9B>+L{8*Y}98{4RARR6j_d`IJ3ub#+$LDW}ys<^d!oMjkIFSM4tSJhG!)L^BNN4ASsq3xG!){LwUvy|IX zj!R>5R}n**zj0^xa(jqS_CZ@bi&d(8tGn6upwIsL%g)W$H0~wRuMQ3>3u&U8- zZf#8eA)A!7K^sUnlt`aQH=TF&GpWSuIuGM9ZMO58Rp)F2&SxiHy8uB z$&y!CDbE%|S8IN<9&ua(dG$l^A9_ya5TDX+dP`Mz@kP_J0F z*gh1ZKg5u8cG4|%B=Ny$3d{IRcDlqulS01)X&1v%1NY-SB8rNILiA)wjD8t;D2M{_ zUeP+YYSGC1BUIm+4lg()$hpxu+;`ZNzK*uG!U~q#8xjeq;_U^f)R^rCwtVBUm+VyO z*6sY9n^gTR#vDN?(O|_DW7XH9UG}mO0y-1&z5aS71O(rk6eABj3}}|gn(KliLfM43MOUDkc%y9pgW!DLfaLUFQtDb z>)9E~A)^jYK~M~x(O~uOG-lE-Pvod*xvz`CqXm1r=MS4K`kc<*&|<93w2QWRH%Prp zvuj-C?W$w-Y}o9MqV@69Ydr?qlbc@jB94ArL+f)X)Wgr?RzeCkOUReJs8@J&UUwB0 z5a*hXVzM|}7cHQCJfMu^?Nu1DTb8}&Vb-A&pK*hgcuaWQTIJ@pa_@}$?DAKi#HZNo zv}bz{drx}@6)W=UNHTWOjnQ=3R_xwtJ@>QQ;h~e0)`H{!P$>)mN8mZ1las52le3+p ztF`kLYgbo0M_ZS_k53%8v~!#=R>ux0GG~^Pu1zu^{9J8SUW7iAYf4Bhv}VTy;i|KD?XC^N9=i0q?!t; zmFfnyyG6!lG(c|Z+wmu+_>?YRzRcI=?l$S#a_XL{`?CwCrH_F^8Qy8mHto>_ z-+Q5ah9}!(XUh_X@Ipo#e#)wCcQg~-T&oTWeLa3c-rVbH-d5NUy~#j*FY^dfQ|~z zLmD0ez4|p}S%Q0hz2XziR&h~L_c>TD6$r$rve(Zo*sIEK)8h}3QV8V335K4ZSPk+p zZy4#T)J)kQCV5oQ;+mO*Z<%Jl;2E$m(m0W9MQ2*BV53Tnq(Ds9xZYW0DN3TD%+^zA zWj-pYBw8G0ae7p?uTxdl|6>0UjOyLoN+aK5M#>L0C+9rx@!Z(?_A>q2^tSetC)K-y z&|OTu0*x%5;!-J|Vzo9sS!Fd-xF9O`1NB$rUHWFEDyb~cx;_xC@^tzARkks&qJA3b z{uciw2HL|;Ra|~DA2@!SIb7`3_ISH50_&?LhNp`{sejOjb>_6P>7mGb zweE5w@}0wYsUBkFvWn63RRt-2eSRUzYgNK7`VG>no(Sg)cuWgJ^+UN+@z;a#w4Zd= znI^{QwKm^kJs%WWi=Kb&#rQ#2(-(8nJup4c?($;Tz&w9LD94brZTZ04b>Hz#=N5V3 z>Is4|g)vX3MWB1G}9B;q{+|e|LPhILG zXPPhfdDLQND(JC=7_fG-Azp!ysaNGod)~&qte9$nJyAh5D(d}E)IFuq_=)6dszrq3 zr27-pkd8t6bhvarf{FN?T_;MGZK^jkSX4jy3MreggC8`kDIp_P3cTRfm?yYa`x_@v zuwJOr;!}HJ@kk_i_J~CY(zJB&ql`a>)Oslmedu$VJE8|@FSI6e=HWYDcGhe?m_;@A+ogv?>?FBu-@OxY9;Lq~q-NvcYI z5EJD%MQ!3@K)!Wgh4p@Iv+LQ$Gybt!S1@$x={YKj8cqxjQAjnJ2J?-l?oKc9ce}(o z_ZV0)tqYH=Ds8~%=wOqJj!{#!O_YYqVH_mMtnE(ibH~KZ3zlQerUu70L;%P!9Au}} z!wudVhX-^l0~8((p5jgW<}b_uACJNJdOCQ(`Biwyq0w-Vk4M*yj>-}yyy0Q#;IGs0 zH)Dt^0+gaSR_5HJ0_jA6Y~VIz3rqOz{dfqnaFq?XiohRLd}slHs6{IP1xJ9)JhyYW ztNUTYfeZk=XT|Sj2SP-GBD~yWEOoJ30C4CuIl%V&d4QamsB@A7dJ+jT^IDfaER;SA z0L{yYzo<5IVi*$#WC~LhecjPZ@TQPNkPRV~1nI5OB zL{9{HekV1a?28v7L4NSR`mR3R7o|o05}Bs_6cHsHX!k?q?KaQ%`8lwY=;3Joh~!2p z@q7wD)Di=-|7HekfLP){Br)<13z66Uze=Rr%R2<%c5R{pfMb6sY0o0^I~|AK5<%oY zPRTsmcZx*ZcWOT(g`9oSk0_9b*TDbd`2jLG@o?7vbm&sp-8u>0wV zVeRl z0HLZl@Gvj!?51L#JP7=JbNr)f|8vd^@{0#Wh5ucffxf1xv+iU{{Zk_EARjS delta 70642 zcmXuJ18^qK_dOijwry-|+qRQUHnyMGwr$(ClZ}%n#>U=Q?|wh;?_a0t%$=&4?yjzT zZuOkG>349Ktx!11a^MgcAW$GMARr*5AWYA9^X1s*@CO2lv!_6vGM zG_AL(k(vDkgbGW*J{LYntyVQ69^IL6SCkyTboV2OA1B(JPtKmJWYKYe=d+G}5*-P@ z;@?UBSq)$7iS!cqS3K~cx}O=VLE$s|yoA#~OVg%mkcd;QV+ft?m|2V!!ptrFf-*jJhe5|uSqxRRs3;fZ7#Fsc^fn>ZC96ORMe{R)a28TTH1BYFpn@;xa z5;kpTCj=pTx+?>UwR+8a)t&{KEAcvYkS&8vpG5zB8kL#4kDJJ+Q<`_tkuJHxX5#&e zXi3$YL*#$7Q<_(4OvAWUB@-Nj)DRTub~ z{%i%AETgM5(t0kCMFQe)#Cp7@KPG70q+22?<*C~&uz#DLM`C6a5XCzNGU#dp7dFl>SZ@uSSJLua^>0O}2w(04#6I4zpkk-1!W8&~!R1YH~@e zUP*=q?o9XwB;>T2%tB9A8u)oP^ZXv7f-_53NdA6J6}60xwNK`{712>k%CV#XC9hF0 zW174;XYHkw5w@prs0~a)K$$T0_hR)4Of%!hz&jR2qo+zK5png1f&3ltCr}rfs;{zU zjo`4j9*}uxI{V95B26LldxPXbg7}%XY|_T@HodKuj&g4&N-=m4oIGRE$sNJj z2s|xBwM>NWtJ6@~%4P_OJ*t`Yei!GJ7Pn9XE$_vckHEygCZ2P9f%_oC)>~EwU9L~V z^7Crb>bUDIl{F%xlQh9&A$pvlgX5qNXVPMOxqP#+r7TFY}CKf}dE7BpEf7t0G1|#4+17pI zGa20`hg6LkZ8fgBiW2_0!I%e+~Oq2#p46V%yzhn=IX z#v|i&5=%ZZUO8}NC4Vo+tp4;&1b6>SKlOV3wNuKMr%J%yEvN)Oes(*^4H<$Ykh$t> z58mGF+>>c~lReY_-yWjSu?w!F|0}y15FjALATXfmh_ztE&E0S?pn#qf6}f*I8@dbm zE!J_vX}3b#6T)MhCza7X z3GHHGBz5ymF^tA<7?8`bYZ1(^v;_Hlh(LqHw$X*Qx3)srpm1}`ywO&t5&7Kqak>zK zF6FpbHH1Us7c4TM_|@TswiJ*b){%)$Ho+CkT)5Ua1!`fg7gLt7vu zx{(ddN7abcY^a(;=+>O<bf@wkJQI1^~~#vN8W(xj^k9vNCC(VmR$#0KYKqtPL~j^MOjlJa2THl;1`a& zp4-P%j^~v>AXZoOC2^pW=*elQfa^V8Vn+1^y+`>3-cTD#$nER;JmUIv8m9Afr@poY z-R7c?dS-|3q_hfpR|_510-_&MFshH@7z1N5Qg$O-KNvM#p;1s+0!MA$OEM=*SO8r- z*`VmN(MIa{W!F)moY>TgI`Z2f64oKZ6rhVXTR8y`ojbM%pW`3C+dPp=d2S2Z`O!{F z1bI2n2DLjmzQ|tT6cVgZ!5<)eLX;E=y2;)QB_^Bal-y%|Hy$<+5-QEst#EB+diF%? zJ2f+b%^M+{R@0?T)$gUw+;(tLk$~TaRSMMtS_-@GGP^Kb*jO}Dx+rvHOg@YO>lEx21>T0*QJGJ_9 zhFCh2UftGHp?)V*8!8c&PuuVMKU-A@T`Km!c+^MRWn((5f1G9fh6gRqB>}8q2kGZ~ zZgKJVc#xziIGUm-(~zBkOsF;n451fzz=0QHMN?ut!jy8yz~I5?Fr+E_l3|2c%d+79 zce6+?zDP1ODLi4?7_x|7$fAfAG#r>PY6`oLUI;@;Gc-K^6)}oNCLn4}knH*bByMa` zxE!}^CK}#sYi)3DtIE;s0{EMAZScbkC+3H2(7^q=bARVbbrdMla0P;XUZXYeQ0u!e1fR-n@O7hKs z|I!;t4J4*FkrD(%^Z^V66C{1R77QD3>8Iy%$enck9?AU;y6@NUYZx1hM5MuV8^5i( zs+_CK$9tNC9hs02Jq1SQvP_yo=9LZK$NwEvSlMGTNm`~Xr7j{#KAD6mGjc}!Px|*= zKJzDxK;#ejXWk6c(Q!&1&&Slw6 z_+6%#pux#&$r3E^Ky8ueNg@Y3wuJ27W#gd+8>y+Ob&$6fQ8jVW^6!$G!JA@S%_k1z zIXk|uT&VKyLHW(n;Nf1n`u8i~q})gq1IoE?PWkfrxFu&hLns6IxBf2q%bESbv;E^7 z;Coi$_eeV)W@tZubktwTm5E@l<1tihIAwEr$0Lt6gR36 z{AVZIDfxVmA#!L>4oBh*TY~3+R5sC9vu(v*&UEli;djHg8XCqL<%YH;?2$z)+Z!7* z^R9f0`UmH063xz0ug*LGS=?zur~EVf0FvuCX=46^18p?AKPpb&iRY%|XZOJ`x8U>Z zIN!$#yQP7HwrIyZC z&!)-?&loKEX_g0lx&B+vOstNxiB9WLByBhF7Q*Uv5ZJ~>f#&ax6rYTYZ6H;Or>mJF zn;20eG|GiYq=p(mq$@$aw1!8Fu~sM!2Fv2}gA~o5fAReOhA8~AFF))X?KC<6z3L_u zW?A9Jxf2=H*=mDWI3Hu}7smS%C$b(XEhVjki9NB)p zNhsW(ZjMS?cxh?RK4V%^9yp2~U1{>VC`V+}@Rv;CF;xa_f$`l7r})E-gx6n>X-EuB z?$_x2+`<4LK3U$dF~ZG(bvikJq=~WWD;Umi%d?bP@=ne{c25h&q`V%vG#Bi zG>4#IxA_eI_jT=oSYm6|=cM02>+%DX+7Pa^@Wo0m>W-()egIA1H{;xeRCl%HE0qkn zG9Q|Qo`&d2k4!=ax^&VdEN{3cJY@H}(1hTNXPFO>{2`+E_jrBd#9{ z_SAa%=%^<~0nF>_3`$&vqDLGdw&-V=r){5T-I#VD&D7&tDpQs2D^X=# zZxdLWYx#gmgG{pv-#qQ4gV?o@S{BwOb1x3rg|B?O3 zBxMtDA#8s2!`6_Yv0Ms9jA~xm z$^r#fQea)z8H3H&`wv{CYom+HyT4>LZFMJBUY%~+sgf4VwilmGF`PI?nXMfg69<}f zjEfg`rpRT)_vLtBtk&O&h1{=#gc*Ksmiuae(8M~y(#qQTt-oA9>G$dLe+;zu_wCb*vsxl}rfll4HE z-4Ty1ge0p(q$K5VPvhy4$e+-NtvGFwax(j`i`#TU(ZhpHH&jQa?#ApyP>P-9qDU=( z@!29Gr*Z=XRf_Hq{Pb)y7fb95Zu^Oun-Q(6h~H|hPU7-)X=?B(J*D&P$Km?-X~fV* z$tTXqD<0MkL&$?3--rAt2kTGUsqe4s-A$DWb=Qp9MouSROTo{_=>7#B-_@1w_pE** z9QHf+et1x$I754+kqN%IPRFu%L-qRr=ewbM0k0_UbMcwcnuLPRydfQ{X*8uxy1HVy z4Ev_L9s#tBd&}43U$4h{ugCsJi#kqyTjy9vi<8Y3TCXB&5i`nuEJk>IUqky-Q@@GM zUZw)V%n0*$1U2K8{4k0Re!e_)tfkCOQu;W}!0WLmTxP7}mRVj&SMruag}}f7P*KQ8 z#DyaQP(T$yNu3=@RXZsDo`l?3-g_i-vtMNcXS7Zf-_KhbebM@Ofn*qJwn?6J-RCgF zGlRaJ^>ZC30~*kC?dWJJysOtG>Sargyqg6Q!TDIyrq6YDB$0%blUM04zei$(D(ah~ zSeY>o$N3+J?U|)Kth4z4)x|h~1fP;>2`2w%gtakKfN#-0CF{}1y=~l;s9lS}A2tVF zyOJ2war4}MRKdK=xHxZZ8ZWt^$zts@;~3f0quG@iqp6YL%5G<&7{T#CBnFjwZ2y$! znb=?E?)8Z#joch&v6oSP<(4s4KSgZ9*kWaE^zzF0X8+KhYFG2#W6-n#8Yh8H2H}mJ zBe|2}knrgq1X4vqwm82F<~;Es`I07OWb%fD%_q`hOAG?p5~~B+@yrfp-`R5QU!!=3 zJ8DfN&Iu5veEKe*g-iUIl+mWUDalKcSQm&*2+VDygH10 zLu)~}Sfrn-loTf9Bf_2my2SI1mTTk=i6)W%azv6nev+YswTh?k16~TgNh(=z+9>Zr zc{k=EbmG7cj}DFn0=ufhH#pv;ltw0q5sH+hLGz>+?PgRB&jU-y@FjWR*k2im%Nhy7 z>bVz`e>P4X?qg2_7^#CoL}Q=bwKOe-@GCDQ|Uqgch<3x{DmPrNSq5C z8OncB3NWQIBHaUw4QSJ>4vf}FD|(^MvJ^IQE zR^#tnvcW{HB2!7bS_CEqAncvNcv)ZJ3YVxcW36BfuxjvL9FG9QFE4 zF#mXtc)kqaZlxx;@1MUTCachhz2TPP=&iKRc4q0f=%cTQBj0-&{ypk{>aST8YuZ{a z)*+R+4UOT-LMkOsPr?%*U@U4Zx)YoNvH?;9f)3UM`U6I}VGxH87ih*D!SAWQywGtw zo{A8^9jc^nYo;V(xOOn;0qe$t>vhZeWH|+Hc8LMVRNEVZp7(DScppC^4?6_q{`(V2 zq#pDUA`F@)nT2DoiJMh-gRzYXM#C6610^G&I6J`%C7XmH5c7YN3u_1MBdyX-z!zY$ zY&{19YX(wwLF}2>Lg>i2*dc}IX7|b5^sm93A0o-Ps?m>dW3UaLi=bk#fvwzT_97ND ze*jo(Fig_}*^w41MQ69%tcFM#3;3>C>GM8%jnmM%*T00OhwQvcppi04i=5jXsUJk=(>iVB< zxBc&maQoZ@&fIdvi zWy>@(9u7+Vjjf_}43YA@Sc7qryVM<bi&GHV*k}3~O2Zx>5;2;&|hUs-c>Cpq%Y=8>_aNajXat!eta7ffa1Os=z@E z3*`S^;u!PcsbLr_J$`fp8_PKTiY>6VTF0XJDq)pGKm{=$b&X91QFtgB1n7Kxu!h3()oA9N1KBshjcoS z+V(ssJLaa)<*fzFU^S)yFgJ|w4R_>*Pcq0~W#2peK{Nx1$ z$0;zZvD(8a3REG_3yTLtfS~^7M`dNAb{IID>?rWHaP%guDsqFhWVe|;MN`}Ro1>ej zHPT7|$@$3uv09*5K`(^lOIx#eM2jFE=DQ)iiAjUbBg?CCr;OcmW1XemtQ&~oQ1%)_g~cx#A7%Q^XVan=jGBKEiz%tbTtQt&6VdW zpCN7-N4yFV-DPdXlB? z9LiW^Z4s0kDnh*TJUzVup$TdXcmbhhZ$q-t9{B)wiqJ?-mqro<^5h<-fZR%pxv_wR zDo%?b>JCANTi$z?7(pfRx6^NIFJ@T#d2m##gAZ}o(S&7?(J=o1u^2O5)R1k8x6Yy zbZ8eU)&@m?P1cF4R0Lot$}hO->?TWZ-kD>K-qflMSw*7UyrU^;2^P>@jX7bmwL3A_ zjj?S}>{TYurPqZCU;g=>#6NH11wW=Q_Cn7!;wR)+iid8wW2>ZcbmPprA#0=`TbOJC zsNp6y)=OFJif|a4qt*sYql&0Qu+|0~C2n>Y?ag_<284Y<(+ds2D_n(?Lt;{+l#j?V3xVJ>ZGmheD@9o=x|2Be!z1;N4l0ej~o5}smR)PC_{ zghZ#x2)hF2hEah79Z3GTan_S&+^ae(+F@;-KF<9uZ}2zK%@%=`Mh%n2>NF|B zemSg`XxS4Pgu%(!IFfR!-(sgVijM|o?;)QKz2|ntb{;guGe(wEl^~#Tvs1uft&Z(jD*0s_TVgms@vzMPcB%z_fGZ zXD8D`Pdf`zc7(nra=d)fy;&Vf!y?G17TVaxQ{uG-Z!^($tI>g!m^bqI1;Jx1*b(u= z^4e)u62}1;b!=Sm@$9e*2KVn%3RD}{>HhS8pAk|~v|4nE&ptanM)OC zF?Q-txk^^J!-ElDQjR`=NmY%BB14ih>bjm`mQ>Ryo*?7ViEJU`lyu9U+&iPs1~{%3wR2``+{Rmmwy zv;;KZ#%+u?->LFkZGEYN-Cie!cji%c-ZGX+W2o^WP!3bSDq$6xNzE}cXc|MmETP}) z1DES%6D%6Ww{Fh`GP0RDol%;=aZ^;2_1{HW_*H zgv$Z{Alt(Rb;120YgqUf^uxSJ3^0TFquxdb7(slH0az0Eumdm6CltdXLlXKoo03T- zv1o{b^arPSf=nf!^j9LtW}4EfP}}t3*I03eiP{a+ez);Y7KC^)X$tSqL8U0q5FkeR z5oD}g6ph?~Ui+cwKTB+4UL0=KAK!0@%`?-vzXl`=8hI9jKKMmu@$J`2D!!ujbvMfuks52L7YpraOowe(GlFosusX=;z!0u}RF z;w0d7C*xX^2IfD1{na??v!#zLUK15a>~Bfx2|xPTEO0sAP&rfvmrZ9|)lhlusiqLt z36RDY!FyXc!Qs>pic$}lQ#Mb3>)tfNwyVreO=+Lh4~T^=tt>3ze_+5AECyMu7?;jO zhNnEPPEASIqIjmoRzCbS1$32A@X`x6w^vj1?@H<^Puk_cWSe%525G94y-%}HQF|ZV zw;-UwND`7J^@3z%fn72Fb#+0Al5%4sbRTj4475B}rce zX5-91y@2g|sslJk?_M@W1(jFyKYoT|0% zV8wA)+MCGD>xu<4PPpr^#?%41Ansu6upkql0R-cP{9ftAtgnp}@ z{Bp@ZZ8mqgA?QXJpV@$Q&X&Eu=+P?gpy{VhNI*Zo367P)`hULk2aw%5ndDO2f)w24 zCMK`f!?t0ikt>wTSVE{Nk2vC>$r+u&fA`=vhvUHei7*d6c9)+Rkk>r0hnvkk5fAL8 z$Ze=!V;jah>sT+GS+V>& zZ~;j-4IkhE$pQa=Gy*TugYthw2wONbNtaVYMA0ydD`xmBdKDMkkQiW`EtlaP7&nPy9)8E0nfl4ph9d{Rm@e` z@p^1XHbKW z%gZz3hmB8_RkdifR_!bfv-x=)&Ca<9As`BrmH3AJkJ+>%trY~+-`Dv9W;m<@qQ#J8p?O*?wm}8D5q-~s@#2LMeargl;Y#A&~JCfBIoH14a%{VZk8GISSnR zQwi^sT%qy&zGPmG@=uih4|p6)*5McvM7c0raD2^ComN_p*wPb|aMNTFQ10p#M@?c0 zf6Pz-%-^*@X-W&q*)d%=$p{|!27qQRtFIzvjmGqGj3)6tEU%Gbn9Cu?s z3&#N2`a?1>E2Lm9o&FcxI`)xRpdmP1U}G2I+P?N(zAawNQ8(W zL%ffJ!)Fp>;P{sVSD&GsFCPCkM7L41^TVm}DVwPco!^!o`v0^E_)nY(W{5ni|D-Ac z55djRT-pJw8(h9yiG^YOC%Bj$LTtdm;>1?-`vO8F@ zx;zNy#!gz_q{BTKuEd7_m5@Ogzy>-6L4$tdGUPM)0J!pEOG$bp_z|6&6$`g3KV|VUbvg;_AchUx4hbJFc^0K>}$J_bj<%vo5eMH!Y*zg!GBXZ zt>ruK#$@JBQW^pb00dO8mu)R~%EP0ij zczX0L4>I0kR0 z$?H7Llt9&bkf|C&tyr^btihR>uC05^piG0yMz%Z{HSA_FipJtJW()ay{riUd0GkFjG3(D~6VJ0h6QsKL{u-ai>&fhHd}W*@K9f4qzo%3n>Znc=jF=oN?`)-7LG z0M&&li%tuFO>K1xtP9TmEdF(>=Ue#k|KpTMcu9E4m>f(2Tp{=x^atV(#4hNC9~1u` zTq-+bG-24G`!OboX(kmqcD?w+E02b2Y>d*JJ_Y|FA~u>gu}-2<%<_CkrHlz{e8B-k z*SxH_miOm2^{NIQ3%?OgjJ2N1|uvyk&{@|Oq6l_bZz_qMo)i%t$|bhZkc%j^)X zRHA3RL-_1BHrH?r8-6+q)5@WM?RY7aNLXi+n@`vh6}TXh#umbX0Lc#M81fy_-ZZ!$ zo~`(;56J(PybJ!*oB1}^7O787ZXR!sFY*Vb!8qsO6V{*R!KV=8>&>z<0B>5ms;Uk7 zc=V1J3jfTUn^tK{3ton&_wS_kmgLh*7!dy>B>@m2GU77Udyk}-isbxjuF{m1$@qWH z@?>Yav|1sI@`Xm^P~n7nS*8A#iO{Mljj(qmC=s&ww4k^gMP3NT=j>=8>|sHmG8i@8 zj zE1L&l^Ui7m8=8R{dT*L1owhpT5_=s)qOSC!V)mouH;E7v2>wO_!2XqB`~30>?{w9j zl2%o9k7qqZnK$@En1+gplltVJxgkR!BpZDX;^VOk=%f+0#tSTOCUTBD=Ak6C>$3BY z6E_-pU2(~P-Pn@LZb$57(A{;gUT1b= z5K6aMHO`ohrN-0&Fh>U2-n@nJ)n#f=T{*3c*f?i*68K=TJL99*Bd~yU_EL8ej*+yd zPhGJtSFsJJv2YOg+p;5^kawOtfDWc2sF~q5o&_oC zMB(q70tuuRfMsY4wO?-yRj6!+GiFc4-sD#t>Ji7>ENs8|uSqz^q5f<$?mIF)_@NHq zKrlBqFrSYo>i3DW$m~W8hKwqzO8>l1*#m8FiNGcDiYavuT#N3i~Td zSn)>7&XOXrA7e$K`9ai4%dhDGUT9K1f2I^{ivS!o0EJ+HtdIxU`KkvQujX0hg^6JC zq%>t750c;<`fm>s@1|QLu;l<)a{v!<)W*O6rkQ5PpUuE6kvhFK5hl_QR9$mnm#Ij0 z_!)rehr6)b=nbce(ni(v{yv|f{iT8sVH4TQ(Kj2Dj@|a(y<%@?T0dX zU0qy7JE#m356P;oXYF2`QQ;LFt|TR8SGduM#CSX8`ZFB7kkI(9$NKIAjnYdHaMTV> zZl%uK`osCfb#U2x2wY&Q)9M$w<&eZQcca`TzvJ&~X8?ipTi11{&Q8SX6|RL3G;}Kf zkf@gEs@5ORE2Eo}xLo;+1!w>0XQSWVYCh+5ycz|I8PmPBrz`u1;^8E9AjijIuIB2b zQyRZw$Udvt9VgS-$1pyyx%^r%-$)Z;WF6S^H0MEGhn;7Oom2F#&O=bf?SuPbQYEJL zqvF;zG`j@IJketL3@H+0B0cRfX)Ia;#XHyuW?|(Xp6y0r0GJ1;f zbYF2Nb0*le9yKHaOIX%d{I~eDJ>*j4P0U!_JFQ%_?nK+e0Uwq3ZFtsPY#Ngn8MwvR zvr05RQp)RT*<)oKuCm7Ash>l8Q9hN&x=CX%>C)>wC3W{!OiL_vcJpg0mb||XKF`a=DAnAq}S!Ml8434N3LiIf5kmza(O51Q0@V@c5r}4gu znfI{?JdQGq$$8&(9nw{L;hFLlt-$zei&6&pGT-K)o2dQ7F&!?FvCIP%JNg~3{`!?d z))1^HRPp+IsoCNXpLnK6;PUY>zYM4=2`C-NE$Wv?GbZ#id@r4!pI@^Eh>&&Zjy&vD zF&<~7Byx#ao4tyZO-XQbVRfo}I6mq#y{DF$^h&M;BO*EMvS{kQ^Qo(Vljn2 z69fy1!*z(XXkBM}AurR&7zS7;mp=oId&L2l=nv z3$9;hD$AZ71cX*NnE-_l;Nwy}(RA{S-?&>?cQEGkY%pC`ad7OyPi?z#4MY`)|M|TA zTHHPl%aAaN00xyn%+81Z%y((@{&-Ej;SGXz%1AmXy@75<3L&A9_&QxPQ!~^5`Dw9N zOwFa(!_r~9&t4k3Re;ZHcyRFD8tJU)XAuUV^A>wtzg{kZK z^iHA6$DTSLboX#@E@=z$#tR_rh1cS95rqSzZdoghJNjx&NB45+&H6;wbKkH~@~PZq z*9VQ~8;aEPZ>2`S%WHUfzUdKF-|%Z zhv%F$j|wgUw01FLA@aA0d~j_g)$ba3KBeE<4yTqjXHFuaURMO3W{R$A0?+KGEt@Vz z`OY8L8L+wNqyM#Zp(r5*|DFYBhB>rGd?}`$zBq=MjErPjOutz0I(`;tP?BtLI`eC)k8n2BH-kNTZS7mu3su#Wv6c!#Mn-canDZD8}J zw*JW>A~KN>(l0dWp>~~QJ1KL-p*daaOsg?A|)*V@$ueh~2Q=4Fe5b12Z` z?aJ;1pjetx@uGwjlwpVWYpI{tHa?ha5Rt;TQ^}n%W8jEKOM*YR)&C7Jx_rJhMSd7HB)o|R91}ZZL5Q2lscqcQ_uqQ4Tc%RkmLSdIL?y+G z68TOd*TEk<#Z`UY(qM)()iD5#(lfAQ5hYtfQLiGH_P2XKQG01GL3%nF`|;^f2J<9F|OI^<-UmWi^pC7G8dc-YhDZ(ToqH zru`*GOlEpHk_e&x^bF`i<6AfvknxljT8pQ=+ZObnRb)L*Qu)0t^R1TSis$DR8#r2W z%zS=piogAK-Ilnvre|O%tsWQv-flRkj=-f9CZ%TiIm>UJu&>$64>~iuK9&PZe~DpU z%nr6N2z6@p6$&)vRL9Bgs>J>%C}XMJYC=CGxeWar%?0JHWPguuypkt5i{E`eJS<`h zN^i!C=vUsqFND6C&qtVy-i7M6v78UNlT&z<;X(Do`XR9*Bd=?RSH^4sb*sM)ZETdI z)#a~}s@*16x=iVy+6FwJGn$2Vx3dUB;N!n2;7#t5IBx^0@sd4)A`whg&4#}1Q0lPc zzS~PLvi5WmVxVbpL%Fk=aQi*y=+Vx*;>2aQtCGF((*?D%RiE3nb>iYaWOgBY0cQlGUbP}%9u=>-c}1I9mM!{%qe+K zKGDUpeZPsp&5Kji(2ckXMiiLS^to%W76a%>5nnt-@jOAm2D?&C9>-V_myN41QnRo| zehABhkNgO@z&FJsDFZdd8<2uFG>@AERw^NsGn#1Z%kvqYqq7vGZ(-yO$zYo2k@XI_;;q6+2kMzZQM)08#G zA{LGoGVQnoU6mGUx3B8=6fo@MJro5u8pjnL={Y%3I}RxUG>!TC;Xb6jKScIjl$D^n z3xx;!J&ws#`Zh%_>SvOHjW#dWg$SET$vvkLH22$}<>HWoF0jB`&o}|m z7se*xM%K#*9D;g8|7?o02rH3+)yC0-La!B05l&%`aK0lGv(a@!qhyUJQb(TxQ5ixj z9_1oTedKPfqYSIl$r?>(F^x2_UPi+sV_@Z6-Wv`diOn}3IS8AMsjX$hDX{AqQKnAA zu`!TCP_ylAgs?u}+EU$)AOHEgXajGU?9YMYCzM(lAfg--HDTKk<&{Z^IK4(eCv}^M zE$FHL>)~6Y_@|a3s)J;@S9=-|KJjoNCUhw|A#mrWu34wt<_rOjoGVS|i!a%xw+;T0p-3YA-1dN#n96Nps+d(M+vpwjrG#z z_yr>X2*0a8z+#pTH#`^Utl4hdPOz#Lf0!A_b8`$^*zm!NP|o69Um7T`ElssMcNeOm zQD1q<(_X*v*n0mI?6%z`@4E@`@lW4<=5pILWPJa6yVS*IMY8l@Wl~aE7js9Y>ra*T zV%v0QPX3O~S%M~}tv4Aifm)^fb|9dQf6c=N%qGn#84RvZm^&99PGmSlq=c_O7b7JK zj!ADGMuzbnw0B#|Z$zPWH#AzI`IsvUc}CWnE~%<3T&vfKm{x~`hfefnk~E5#4)0(m ztp6`qV`zkjHm7Qo+8GN@U*|2vYs0V(trjsw7mc(J8i9&Le4iFE4gMGOTJPwv0Z`*b z&ZEW$)>SJ_W$U?C5M=4!kt#$Kaxf>@)WueiNb1u5wHHYU<3U|Y0A*lh>T#2rMtcX; zhrTrvQlOD%!rlNt?w3G$2%&_oAYztPgkYQ`B$i=EIn>{Xhi4HAx#l4ahaEY!s6#30 zeS9P#O$L61q6JeBq!H~J(4;Z=fE)@^b+VQKlykC>rcO&nQ5v$#=#-=5kZ61vsG75| zMAV=%V_a*g%_Nro9jTR#z5n6OkM)y=#VzNI3tom1)_Vrs9B1I~|G;PQ zwxaLef`&!^sWrae1UO9m;?-#O*3PC*u#__xPLXKT$VKTrZyJUJ7iZTySM8Hgbsn?# zb9dQx)s@oVkgeYwyQ{GS4@`39J_OpS4sFLX1JYmWwn6eAhkV3|E7lFq`Cw<(Qr`$CI>fWOPuh++0OmV_IEBJf>gR}y z3=7MbHIC~N8Q;QCXL2T+zuRE|UlA4A!O86S1hH~g72e*15R?c|srh<6l~`ZzYe9id z$Tc}t>;&J+6-aqEr|W}pZko9Ze~k7*_&)&t(I?2n{_Xghqa1w#-+y>-KLY>33 z$}j+;fpuQfG~kh=`DDM^8ktU)CfkIUE?x{(|Ef~Nwz9YXrNiVy=l($;&IIr=A&=^YJ`~y zFGGkc{6F$k8H|Pl7^>eEf%Sv`hhTi%t%*HJC}-&Zfs3spF>otzi`2#x_-^jCE;@P0 zJ|*aLN<>^`3ZD&?s29xj{F0$gt)2S%Qs(VTLauShrW#0n+Q6 z{&+`^k_CVLg3zgL4f})*E25KagAe-|7H8pj>lw>hbCD6QGyD~@S$sNAc0+4B0Ts%i z{pma$3kMJXzPkIE?ER#9S$v`7zBTYFcs#-7nk!HrM>j8K zuNq*LzT}6|-PU}@MBUdYWA8L2KH=`pbCznL_;z>cJhZIJIBVLaZ>OQnVCW^GUNpwf zWm~#)`ocu*=oRPy@2fX{1q5qm%&;4@g-1f#!`MrqT1{P`zjJ;=9i66yTKy57#bF^$ zQ%mToKpOVb^KDb4xJ=RH3gj2GHs8P|=nnwdLWD3mnBu=a_y2>}L-pkbsoZGLTXRH0 zcidH7v}E;pC96|3+7n$^VWsY$RMj6EtaO#?w45}l^l~*BS&58{R^fDaeUWvsU}+;{ zGn2dyeEkinwhnv1bVknc2s~SbOvdH0I+gKyT&1nlR&I{IIODc09p*lyAi$}th^yMc z_lS6YK5M#0Y5397e4;S`To^d&+!#4akv-N1C)QDzw-fgZPfRZTmwsLw&1qA) zQCpXF4&bb$vFArj_w`CDH{d!fctTMK@>zuo@Sbzo*Yp2z_m;7BL`%D-nVFd}W@bBf zOo=IGW@ct+bIgvJneCXFnVA`4W`>x@$@%VpH2-F#D;+&Q_U^52Z7p?I^;@-S)q1fS zlo!9IHB2P#dF~*LoPME^l+&T>y>*M%woReoBl4wTqq{<491F`oE)3C0Kl_t~NcSX4 zk*}jlL1C=nU!ex3!)`@Ws=}frT)1y-VsRcUyl5jyv#VP4LoOrp$=VY(xH327>N`UY zm%iNiX& zlaiuOoG@5Zoa<&;gaOk;5%+p^(0KwlYF%AmP*@+g|Nj-3#!Qo@dAeXteQW+Ge<#u2 zL}P<5EnS=+vID@3j7Z;36sA)*s6nw@Ds7fPXW(%_KVuy1tP6F4F72+^d}dS*@NSz8 zwha4>6iNPLWoO&TQA01Ck!*P(DuRvPg0}8E7|nDINj;ea4~tB?Y5`@~rw1cpJpzaU zVLh!lupJdm6WnjcDU?lbG>Pg6C}JaHUe&es%eGR}JWfFPkWf!ZJy_LG6zFY35<#di zQr~slh_au*@cG%hXuD`-?x!vx7dAm+||E;@+dDBe|o@_&qe=I z*0=C+OhW=~2^Z4*g9F{nAEgZZ8!OFlRXW7q6&xt$``fWQpRgkarWZ5PfRPP z7!=t`)`0GEeHk`mhLCDr9W}#IiXk1)WxB9a%%UNmt5~U*g<3TuW5d+=Ap=e*jCpb) zwl{<5OKtX2tY+CuSQ6Ni`Cq3hr=l0j(uNCBLG@{o4LVVgj?8htN>*h8O3XE;8$%D z4Z0y{9QX&e0mI?DiiXmH-#*tP#hoOYm}`BXG;k29%DLv&)pPjZnvmJOTHZclSRxIR z%h;i$LUpa%n5qbiGa((vZ}=~krVfTZpQu%)IMnu%XKaq*e@3iwh;gvti*}y@EkU6v z3=_BqbYb;owz4;84$sBEPj6FIT|H)Jc0DuBYbhdTfdzROAL3WQl{JJ2Uw?AF4(El4 zsz)pqW8CTpu=UmxPtB*vi#U&LHQDvS14Sqj%UcA<9 zZ0>U@aLSo1x7gqphzSA=*39$k|FQ+4a$=;noXk^Xy<`-|Wh^=hJhmT!ERq^q0fh7s4V!C$Bd z2-vZ{HmNV@8d#(be|dXM6mrqS;W;<07MrU>$S{ftRH7`cr_o4ujd`$j@j?wmvB41G z4jaUe?o6xRbNPm_W@Ym|NHnWaV`wqH7rNr=b?9wC4uKa(gMQKxxTbYn1s-_3WK~%3 z!nC3r>U`SR4`D#IX@(b&)zfFavpgm1zjXm`!XZ!IC)IE z1D#sfx&x%rBYtvTne;M41VqE@hT$MC+#xt@RNaP_2oqH37JbpkkW~)T%xFkEN%rzi zhSc8f8GmR4#2MTAw;(n5wH6O1W@e*jho&6lol!8asR zATSXB;MalagVQ7V$JWM4>Dlxf7c!|bh~z(pxNPpye(2-r;rwIiiqT2N_wzrx$Q$m2 zLE!&$q5SGoVt5oONWySDwWp3B}_#zg{F?tB4hEpS%x0s4MJ602=6okW;jN?q49z;Je`M%iS|6= z7bY|Wk(4{bO4}NXkt+dZl%=XPK3PX%9@7)7b2oVB%mq2ePU7RGc<%ddm&Z;ajYz%E ztw0dt)Z+d)n^;j~@}|R@C3)dJM#A{Lt+Pd^%p8|SbJPJ+k*kTkobN_U4mJn42QkEL zqN^_#1j(49QiCP9ILHTco0}C~AtP8v!M+d!r|&i0592+3S#*ovyzu)F=vuUegX#U< zn~yZiB{CQX)wv*;=MdTUdO zO~f*N784_T+zbSshP>4XO)p_-`Q`qI2!RqoQMKE<;~}VjJQOMh7DChodBmsz^_JFky~M_2{ET4G;u zeRgGqUIvZ<;~?DAF_uaR8Tc%H_|^34r$ecbl9^v?kN2{2F0XKRID3*1Mo?AKkQ)kIoi zJ*+R^=i`7m=n}w4C7rrU;2-VcTN@5BkSKB>B+!2`gZN*_C;3YKMe6qqf4D$c@4<73 zZOPC?8;55i_&q%c3kXY4%jE~4?3L{clZ(L z7CFHY{sZ64%srJa=|6s>rs0wL08q>*A6#0e;BEe#uhg=D2SNMCb$ReNrp8N1IVe56 z4_5RcWRgZP!T!h42}|U9!I&Zb2kFi-j6f)Mh<~i*xPHOHz|1lfQylCJ$19sJZK_eR zxpX2{rrXwV7h9V|xWZw5VnlF2n102V4~Dt%XvYAx6P<{clcs0$pL+)vtm{^IBH zFzC5@{GBq6=$R-W{e?v{p|mEk;=4guZq3|Ou|1jt&!`=!7hH9FUC-t14)yNU>XDAo*gXt< z=@M#ISlmizEj&!7P0w(TFH4gTepZ<+KSj;ciS)J%3<#L9jY;lW8A z(zvPKESCyNuxSSD=Kxb3)-18Bey>k&QRNiQVzFEG^aQRFJ=LPRs8PbCD z&V7rcx2Ah%qrXY4hmO6lufKe-mD7XwK?*H>dRM4P$;`LOrrw`Bgy5POKaM7KRyr?M z2@dmJ*k+|w*%=S?Z`Z2LM}j$1X&+M-f9_jNEVq}|63Gp$h#r*8+wv0!!D#1o>=pr zn2zJ7cBcE@*NbJdM_@_Oz1|XltJu-)zmhETu0j`dYzurnG({SZ@bbZf(w=S;<=}K% zem$34SL(3mXEYkDY;y6Ow}cnwcso&+L)nQqsnI14j_B)X3o&=c*?89>*r?XW(Dl)) z;W8ZgF+Db#!k~1o`vcq- z;9xEfLzt_@(qOO#vD0Mrt7qS5%+f6zt6V%2JwN{!Q4F+K*3#eN`m_vqxXiJ#S$wkEkO`te`kq3ZQF!E2Z3wU5?IJNJi zr~slZ7S-F|CmDr}>%KF)MP>RK2SQT;5V}>oASWJHXF`I0m;MoRK{eRcvBAXd!+^Y7 z&UzN==n%G<)v=R>-!mq-pLW-VJA2CM=+T*g2$3ykjm`e^(%&j%rL~3^5HOEl=HccM zpiOcDYW1$pUY{RMZa#8(iEp0bmKjSxy`rl!XG-P1QDmj%Le}PvDbpFbCmkUXtqdKL zo$0+h!#g!C*^66t{z@MioC>@U6zJ zgXdgs=?%4K3JFoTZAEg1iS)apf+0A<}CO54hzLA)x6V^bMAEdX4xIK0BQp^wmW+F!U!U?3e z^dw!`E&6bY>Y?O+y7S@?yIlYu^PG*YMkdV&5|jlDHcRH#2DHyrvN*N80ukz>s*DJR zP%i7#trI&`%3O}zIvGF0s;>Bp#j%}KzkH9hFc|&LM%0s9Vv^4#P&_b2BM&)CB>sKe zYZoCdzRW)=BirxuG(^7*Z$tpY5%a2iODsx(H=i*DVWBq{V>FEO8HfPTG!#X)X}FYr z9j?te7I>s+3gdm`340ACPaxRrb=zPlZ~ppxYNm?2dy&E$}% z$p_96X^Umk4%TE;JB6XF=EZu#_S8Z7k2aT4SzflEP4%ddZ?3CFi0Qj*=IK%`@e5*J zysI&XICil*mOE$rc=9qJKEMH@a|+7zLh9S!(A8s0sk=#t zt9}uN_I928c869?#QqarpQ8!Cm}c?@&tzR&)1in*snp!vzX;jTv{V+0f2Da$70#^A zy4gcD=0zfX*zwLQBu~L3bJ*0r5#fCd-7O@K{$vjzpX#Y-dEZE|v#xA03`Z>NsuFDJ#N;z!D>P3`lm z@!#vc%fHuJTP$)Bo}5Fu4$Wuzvkcgd#)=vv3jX;!3;%kbE_-+b!B?6S=Y`bZs&P*a zw9$|1nm(QwwN8e?CC4P$@>7S84XW;LkFL4Hd+!1oXYS?Lk;vkZc`Fyi8w5X; zk6+md)ZDNtEVdA%D^^`rlyKe!Q*gx*>pHssAj5+>?pDgm#4yzp0BT060&0SOuzs*S zO|U!^?8}I6V6feRT_1oP_w*1a`Z8PK+}SfdEdBzW@rFIGMbswrQFG?HmL8Z{cgq@R zcvzB)0g7+@c=b`5*4ZOsXj|xQMpBcP);Sh#BM^03M5? zARlAneieyf8{73qMC$l-y(;of$M-W?i;tW*{7$BDwiY_WPPxTCchxeG+rAx-kUEuE zyE7S>Mvz$^8xok{X)oWueEt&VPdJ!v?cK3pg_Ix5?NL6X+dYEF{T(aF@j0TiWu=c5)-dV~ zzlsSDPkKA!fu%wQogB@FauB9_MuyS6t8&+ngMpb!=In71H4C6sf1^ZdTE> z90Q#(Wm_BPPi65a)u7`Vx3D^1_QM@e+n3qZcCnJ?Ff1rZXSNpg`waE1{e*2E^^U)g zKKJv~S@%!shas*Qe*BnJea}Byg=4%oqcyl&q8+3ojzlH3s%1q5+KiKrVYHBDJWXKX zi1!kxTEfaQgJGeF%|jd9E&ggsi`812nx7~O6-wS@Wu!UibT`ohG{SB#&e+=k5&yS? zOZBb607CBNE5;qh5CaWd>WGYir73#FT|}lVE(9?yQH6_Z_Y54k;W%%(KDA52 z=_cZCO!}n4?fNO57|wbT{r&mW;G=rf&1g>aO*p~9-N#4Ef*Q`;`$i=3I@lH0HN)5d zVHd{#gZNj25mq|N&)6R%k$+D5)<@M4QfkbZqYU(vo22>u;@VdH6Q1?v44F1lXLG^- z^&H_Da~`3-N`IKickwUd{U<*Lhev9#Dvw(Bn3o}&1U5Y`sK|~>j(=&xl#Oa0gC70C z3}z%C@OAo&rbr7{cFx=Ap8R3aj|h|OEHDdji%!(n+aJtg=eA!~CCp(yee(Ti5(W6K z+jO71MU+MBX+iQs|7EY9Xz%XJ=}HNHfH&r88Hfvr3$&^3`bC3eGveczRNI~_UI(W3 z^E$7<{r#q`cxW$X!|%?7BOPOMG@LzsZSY(oQGs8U`@bxE^aG*Jx4I+aI@-B$aFR)*xRSCP+0(9Z+7!DPy-+{cw+5B?-0*@t4uJh3gbwdtwm6 zzbO6oKu^tPPUMs5oEiEd1FyQh7_1-;q{X=hx~q_by_rt~jG%{+V!exky{Wvr zn~)D-|1$7exHk*BBM8F#m*U=f>N(J7!KDY#dysn)XLs$VQh|^+kEc$#_sJt6S*}{X z3ofRggl(LY%2yrBiW{OAC{3UAtf9Waee{3KpF?-U6!g&cROZp6uDF0CpvGp8KJ5Z{ zj4eA9&rx`^r^#7&SRB{Lne^MrW72DK+G>Q*S<$g7NYFo1`ARQEf(HZqLs`Uze8-;a z_4h#B}8R$G@uo znX2M<5;>AG6M=pI^48a(oSUl!v@5_|x9xTt^c}%dpzoX{I0wa|4&{r`uftL?n~W95iW` zz_QSCnVCpf?m=>#!S(7?)N*U2Vki33;|xh?MKG-;g{9_3b?Fj^X}Gp<_{U0{mzAsb z^GceItqYR3!{YfM*?;AijtwOCr@QnX&P$ep@H>My1(0j$I=eHtf5nMrf0IjCY{yCA z5Pa-VxHhyv8zWOWom?t;wh;Bv)Kw{mA15Ao5pe4`c5XEL)D$V}`sXF&NWJ!|t&9W7 z^O0uQL9_NSS~{;Y5LFNOD7t;h`6#+It)_^nwc+LlqE^creqUJ+SSMf$NG(R4JmmSfUL0sUhy7@oq5 zGQA-7hW%4=XC&JFL9D_*Q>b|TCH#^Jn96F%-JY0T3O@-+YYYL%&)u#uv^u3txgMIN zxqT^e`VXo^)OVy&tiK%u?-*$v{!qu^A>7uIvq!X#E)a7mwJSuXo!s9cc3XQ`yR1+( z_nh&)-ZwZFDST6Up-$U}nGF{{^m3XS?Lv-)zkjVs(Es(b8^H)#hYKmLDhY1IP-N_+ z45wp3z`v-UzCn)#boxCXTk_y*?D&E_SeHN&Bvm{Vw6~sV7aXRu(mpc@tNXJ zy9KgKF*>M8$;oPs(zJ}u{WEMF26GBI;PG?dJgN!P!7=z%&4)o(zV=Zl46;^2Y$kaB zBW$og8S*!ckP5PT>~_M~|CxR~M=sARm3Gptk6hIR+GSvsR~O+pRtsIcXRFU`?0#sy z(waHHG)N~5<9S^Q^<|PjpYceOIA%zF%EjVX)I!y3;m`S}?-6NO!R z<~m*pfQ{z|39S}tS%Ty~L`RGHlp=~z1Q@lXaF-txZ3bqqA=;=E6($E<&tb4N43_x8 zf^zbTbAq|gLhPDhc!I?D1cHnb;0B$FNBQ1@sDM-j!h$A(9)Xsjs8xXb`s1Q3!4#V` zPXDA6Z#qnX!Tt@&8wP`2io@#(@~kKq2tN=Er-asf}xz;_+0ZO|8+nN)8ba+9PAOd3WH+zh1)qY5XWq+~UC^RK|l zha&Xh*01MsD0}tBTiq}e5>vG&5WIP<^;?zc#YyRDA^)qdhU}O>^c4j(PT42r5P(W% zrNyK)zs54|5W+I$D_>SV?h#__uQD-W2iHr~>p=@NBF?O#0 zwNBTlk)}XilDojfTw{$W%`gVtfa+{F8eR3K%;PksKT`e2!FsYblS$2@VQuxu|2?2e zv@%*iFkT#;9+h_-1Y~6GY8}m!O*oRE@_IZR%##yK%y<)&Ow92pbN5ga@STiZ{x(mr~X`KNQAZhq(-z)mzEC(9$|BG+c9E`vn) zBZDKc9eLkY;rq1vi~TFzo#ABmf~XmH%*!1jLzcIgONR!D-AI})sq0o+%Fl$4hUdm4 zShhD)@`WIYn7Z?1$s?aXUHK1!lQ7a0O^b??l#Tp$UL`XTJSO0jYBUmL2=F~E704)W9GBJ z5EyjdAK+V~Z1Nh%p+uCjDQ9x#nFrdzK_= zMwInMhc{T&3@-iD?Z-1KOTkhVL`lK~x44_#X`zPu>+jvVd-dT8L25@vwV_-7fg3YpQ(HtDo2o)dtlDK(z(e zENU`&zItQv$L3dK^HZ(S3{X+(4X&02Mv`&eFF|DdLfZc>f_I7eL46|Hj6^mQfYxgJ zh8b@H;{2EIsg1epY#=*P#?_{~JP|1-boSVDnx94f^4+#Ef8%XuBhrevPP5DDFH<&+ zx$A;W(K=lWJqkeg9%-z?ur{)n@E{~V#9s(qzY;#|(b-sTe&W>nEc=fMD$7`ECTfF& z@utW43vT7X-%Pm8T& zQ4*G8Op8r#pix7{8E;)oIjcxEgU?P@z1ix4hH&;t#?Hz%S;0(#uw7}QjtAF*$5m<^ z9A8NlvcV?VcIUm;)ukM<=R3|DT}|5hSk9fHb5;29{yL7+E4aCg6iKF}Gywnlz{k^) zP*a0k%6(LU@ad7eje55YTW%i#PI=_Dnf|=R@6VE?rdD*`0>o{zz`1K5P6K1CBgzyz z|DTQOLdGM9h^V`DTBDBPpWQ4s@6wZ-KkvORe6}M&(4@HQ_uc}M+^c!Phi`)yg&qjr zrWU++vNNo(s`51%WI^ZY+{ghpz5zxp2V>0=#b$&cpcxn5CK52apacUBY{Tfby!b~JS-Xm-a4)K8Q( zRlmGNa@8`QI4#^{_{{Da5bINdBRAY!r|?qf?0=u7QO!k#m#@_-O_XVmU) zv0Kon-4&=+irw79)wF5iV_a4)B=En_-v609|33%z|HaCuTymMTcYojUo zLgV6xmIFny%VLQ>tL+e5NQo=RNVDfI-U9oZcDmg9t`Q-IaUvilSLB7;@ z2ygn_0Mg74IbgL)Uge@jIMGpu*)s!lkDiT(7W6*9=jDcdTen6=+ZB^Y#u@kfGd_9p z311MC11mPML4oxYyyWc3>$2P`djyD!WXbIrq4go&N?a)Xic5IR6{&4BRBu0P_+!2uL47oNgKUhiicbR-;xqQM{R^ z0FpV=@`^-C@{KMoOi0dUv9sHZm_rWKZ$@8Nsx>i&Z0ob)n2vtNwzcZmTDWFl$K{$J znNIvRJLIx$#QcI!hPih)1zX_-9n-Q!S$^?j;)mFIA(8^#BBednB3`<3pug)zshSK^MOEu3-UdVSuc}}P_lpEH0O~Z%J?CS`mLT)9 zy0>c#phIiMU)sI8Lb4nm$YbI7!U<#fb!tm-`t z5x=OK5harXji>ya`m+vkOeD6YtrzXxBScG~gm{?N3QBty;7qZl!xK=QvvVG_6IMx2 zKv8mowMPtsUg>#Tg+5?omwnNBsp&IM71dAn5(*8P*6Y33=umJ#Z&cP2h-pK;v6C8I zz}1}i+WkAAe<-*LeXD%Vw2MhaDKm;FBQ*1-ey%L;_m1Q5qidR{2fBtPx(3>X-LH}A z7CM@SlymV<0Qd^I$!JAOs+ul6Q|pO7W}Dr4(iEBu7I0r%l@*mYgjmb%xMRidI5@rJ zn#V6mmB)V*=XI~TXf7qGJR7!q>%%a7w!%I!dkZVV1efg$JsiM~(#r+$lvQ)ftL`u% z^HmUQfT(m4u6C|Qe*b|ZvFZ!;1mN{NfqGCAK|x3kIEqX8Z&a2=L$}>BcEfA2gFex4 z+)FwAcI1=lt>YS%ICsJ;wBeUxb!E@ETPGQ&I--E_w@N*qEN4wwL|HExygNzi1fHJu zvf$H|`JAw&%#gXhHL}$uVxZwh*HWbC8@dwoHRH?GcJPhFWu`ska%V=&PE+`UL1d8- zVZ<(}YF;uKy8=ktrd)?%Sg~{+bE=S)D`id8e@w|Bi|#pz{+zwK2gqc>cpX;z2w5uo zPqj=#SvqB<-Rc`%6Jj;udy0Qry`ggX+SM6!AHtuUJl6^kFTR8QjT&GN+oU`5Kg@kE zPN$3>_>ZCg1W4jd%YTK}C!*Tw4Xb?Di_@Ny-NX&mLCp~j&L?aY^LtTw=6Q@oq8;AQ zgyTEu2tniEPv(e-gm_Wg+wOPEw!>HlO8>~M#v*+z80T+6)c@jC_k}U zrrOztYS=$w*|uXD36dvo$Z!9tydPM)QIe0rD}!g01b1@*2h7IZw_$6_Xe2FdJl_a*68_H*6m2pt+ayLWo>ciaDvYef^&qXOkwO~TnZ%8&S!0 z@vV*t+LX8EmUC}!!s;4UoG%m@ls*dDU@;I1bXVh|_%m8HY&X+HjQ#U>{RB?;cqWQ> z0*i*1cq+MqLga>FL8#~gZZWhk`>EMzOb~^PdpPCW8H>Z$f{wwWs`G~q4}lb+LiSTy z@1NR-iNF1o(i66n?2u9s&5^caMR@AmFS_1Vd$$#vxS-*(QLNH!V7*I$KxQTDrOfM( z1h`stPcigJUAoZ543u+EBD{iKv7I<)AR(wNCJ(5Oy1YBThZ~v4#G-<#M6X0q&+L5d z;k&MksJF~m6&3QgZiSl}1sc-3d15M6JiB=r8Qq_1{7-+HGIV`=R3 zC^A~jlabb#A9k!#sROuytoQj%QOdwla~TJZ#>8C{YsTcA+beLU+us&9I|2M#y&s+L z;%X2)|5@z@_5jtqRFJy)@~0c)NXpZx@EbOXIpjVbCUX19A^+*vY~+)tS#q7=gDprr z7GIx4JWsR?9@~RFJ@)hiMORLEaU1q%bXVRq-4>s8qr9<)SHOsK<)EB3hHGoR`snF( zb+fV)cs@XAw)oKj8gTMkmyNkTBJggPEn zjcO~r)-x3X*X-7X0U{giRR8y@T}$?oOh8% z(v79S-9=62;^GXAc12}@^6rXckyY#Z581Ic70X$>PKC~sG^aZCAq62)yoLR_;C$=5 z%ChGradq|8lrzuTy?KF}We1n&x>e*YB81X*i{Bm2&=2Qp7-4MiXKlysX-^_X7x-mp zThT6^&bHLkPrtJ6YT(;v3VECF^UP26&m%33=?i_kZvlJ&?n65rIL8lF3CJ?Y4xkyzAtF5&`>d;e@ZUUlh;%Q zOb`rXl^;10ly(TzC*VtvYP9hiJzZrWh-8~OrJ9gVIf`&K;gI%he-{n4BQ+v|9|Z~& zLXbs}XF(t*fZTs6jx{rb^oIhOenPK0C}gM`MUPXVkj!<)V(?hq8b}vm-4&cWPcec-^P|N*Eg7}9_*pN z2X>PuaeWVMe;}_vPUc#Kgc(sl)Eo9>2_^Pe+~Ch-9XdxE^2lU~P< zlb;)%WRSsH+n-}J+cw(AkTwaduY_vd<9u#175bw%ZQp4*FSM4akflbjU#T{r?ps@% zaPPXy7uu4W7`L{{+y791v%`q?-t30`N)Q`iIbu#@DShaH-&rGFomb5!>9X-PDzb#$9##*cvw+NOmGQjJ!Jcc!>Ea6G zawE{Fh{3NR)K^Qmy=6~{@(ugh$f_d`=z$|sKsUbmbytJhTjAm2qL_@SNZTMpo9Wc+EqC(hLBD7c$n z!T4&9H6MCZ8jf^aQ5Mr4a^L>$x^oKupPoq>=#t}sAF`qX{-1|p{p+FXRM!7`s1D*P zSCTqQu|&$zSoxwVBTJm0oZabyAms9>s^4}cSL(SC4lT>0XQ_`!DVM*dB2H|thY0b$ zxOH3*#2$>VYWGehN{YGjxxR$Q1W0OL#x^`S9`stD4}3%tjUSN&rK73bMkz|$KbybsXa zuq{<`vACG+r9R}voG`8}2Z=g3r41!%;i3(YXaF$S+^6kn=@`+0<}PVBOuZHHfu)46NXDUJ_YZs5wt~ z%bD^|9qnEs|AGP%nzs8Tf;5o45MHA;x^s7|G~^#fEWzhjorSCq4yBnAHwxO;(qzuF z>w!48fq0!00dg0TvB4J7ZrxxRgSds)s^C5wrvVrnWdFm_i0N(e8G@Kti(4c*8H$>T zE*22Uyr>2|b;+4TF12mlEV3k}V`vY!leeuhu8_ycSg3$}Z=D?cV0Z;b6@R{+MOf+4 z_32%N6}T@xqex^O3YKwQDi(K=C$jxTnt%o#(~^S>98yt241+#(mu7)32%xFx&g*e0 za+_|OUm+KatP3YsW;*@eNss)4Ue`c|sGuofzur}{#P5Lk5=l>m$9u)KBa`xi&J7A7 zT&QT+4X;ReD=LoJzL!F~+5BhJ8p%VRRSb0=jqWQ4KP~0=`8rab_DxBLb{4q-bKvS~ zP-m+be@|lY(iiz1E&ClOaTp~r-O-VP80terSb{5>FN^IzASPDzqNKCwd^=F4z{5~y z(?GlRnOzf@Hg-VL&dvopG%cv+nohZK!HnJ+{-oP9y2tI(s~q1HzGrd#Bp=(Ka`8OO zD)>=tJf?YhKIioM^mvig;q&GY3~1Gle^=#lbVF==>C zdw05ToLU&i#Jp_?jN7+r0NFH%qG9d1fB;TR=%qr*%8aymJE4QdZFVs|b1N$ZBYmZ2 zmiR7c8!Ii1|C_%&cM1_Jju*bhNjdDx%H@y5VOKRRTkBZ%`{~isc;A+$Nw}E2(BFb{ z%SoI6tP@tC!W~!jLq9mgku=i+Nq@}}CwLP2lnCNXErv!u-fWI`yHo#jDulBKXr1Vw z2(3K1IoAerc4NJ^Y^4zerMgRMp-aYDUq0>i?}wY~xIg=6hQsNIm^EATQ;^txCsUtx z+~oxj){!6Bn4AeqBoojF=y;tPm0E0>fVU$VfuHo9RGJ+sQ6@?bpM!z(ZzUdE&#ao$ zu|x1Wv)_a?9pupp-ov1Uuy^B3lE#n&KprV+KQ(GIMX66FjgsY|XO>H8lt7Wukbhfc zQWaz1)9^+p$5OJ0(DpWqU0GPsRHhNv9+XAL7XVq4I&QfBh{shLC&$KBW=&0=*P-4v3J1ytnkv^}`CL;!`(~(SwAXa(FH0OL zX+0aR*{-dQn|X0sw_D_8((0R^&Fwdz-QM%#BcOCqh%L3iqQwKBF^Hg`eAz>1o9?*3 z8AJ$Q&s{+2AkXcD9B9^hW3A$ z)$$P$U%xO+SIXb-O{RnPsfAn6P7Yn1UGKS1!xa#(S&%3$kai*%&gVG8hheo^Y3S|Z zq++@1a`Sfn^73^AP$(t8pjnRa=0xHdDW zsXrFM;OuE{^WIo{E6U38PAdGrAltR+(w+>jp+tnhp7`Li1qD`uvC(b=%tKr2#<;mw zmgEq;BKzp;x2#QkbkTEM4`XjKROe}j-Pc31*MYPZuV1`eCk|H>jQ1g#ulpv=;zIbT z`*%+*Eg6OScygwy8%Ah_MCoLw+A@Wq&?rPjh9DrB`F!i&cH9ff`BW-&1>avslP2HW z{8g-HwGnT*hU%59C)LA6_*n7uF`{N;xWMf8yJ2P9%5cNl)d_^Ti3(JAB#A$_Lv;yL z6|1(_yd=6N5rTBKoX3k%HSJ0{$6VF4b3Jmri;9iIdpd&&doC}d6mGiWY@q~!a3!p) zEYvS(#QkZ$(ruJ(BfmQv2+OaVceVOq;oWG-F(b#);p*By3jEDm!wTP4aCkt2fRx1H zbYYVjEH>`%`_uLk7!n8Gzh9D9c-rs zCj^HPJFR#=7Qfd**6{TDiXlpnJNR_s#u|3MzBPQxz}&NzugwYHaI5&Du5tjGhP)oV z7^kz%TTVhxZ9INB)t*=W{5mZ3e${|mXZ!}Bn?vp)E$N;4BCH3YjAw7!)BFY-@jU3&?nVd4su)f$VXOo?+dg!*1JhUc1> z*YDFV;K^z-=Vfl-&A^T^wSIDR&zQJ-faY}!d%!P0!~N-NusE#qp_)pn?YrPBTp1NTCQSd5fcXtVr{`nKQ^aP@{-lIP*yo*6J zC64N`5sb&I&=oY;wAy_44b8>1X$$%Q$Cyluh@5kI&H+>9cn_*8Vlw8;uj$#0(rVl# z*~}@p}t_b_ojT_|a_NiX;V(N3xuMxd=mQO}gmntuxPex)Qb4JanFwlpNb1^mM zON&{KV%Wj$`ATp2N_c|k++anJGIi%(xE5(QtFgp?Fhdo=I|uYG{lMQiOR>2FLL9V` z;HY|7tJs{_y64q!P+pv9kJa_X^L1%kM^v;zTVqSzxvr%`c@T;zShs0UyxaUkD*Kx9 z1Iyh>We1F7*12+v`Lu+KsNikx{aYiF^=#IH) zyg%pSuO14L8+a~%uW+|<#p4^@&IT0#Th`d0LF@ zp(Eg%Q=f7R4R3((P{1sPS zL8n`-_{K+F=0aZ4Ix|a3hTGWxNc&W2Oo-l@T0R0a?0skLRiU{Vm{>mu%?dn-o84GW z$-2FCkH43loY)}>o3UpVhbb@}=5=k1^*}hhnu~vZoF#sLdO4b`{8>5QpLy&j=-Xk= zBc;{Y z+)u1Jjd#**!^a~SfXI`~E8U)qcCiM`wY0F9Fa|{jG(@yW>!4rtav5HE1i#|W(6={q z^YkNv?rTO8KJe`-_KnY$yblFqNvygf*`jj0BznsD7V1Geq!ZyTCeG~au zG64}gmP@~c$o)k}%Ty&v$imP1hyvVv+kb6d>WYy|(utyl3h=Xw=q5ysNJP#L>>QQh zFsQ89oaTTYYi|*jShP}oINpw`Q15;?4kpx&Q+PNQCtN>-Z?Tt9HEx!0ii@(jhFq`9 zOB1ZXALVZt5D_~fAJObuuHPZO+*T<_RH--iGq~Vf@H=&qDO6=egrYT4hRvDcNr;8# z+~z7}rydO-1J*Tw^v2zco||LMBF@)MYVp1yL~(f{E#BehFe0An4!K z^3XRB&wfcnEI`7)<0~jf0t2R?fK}9+#gJx*6}?O=X5?2A#A*A@H(r*c&-i1rNc-kL zNu1UiJ7vcxr=B(~(%@v+iu?Vq0q1t8%%-_9=2}lR>dj&+Y}Z{T4Bafs&D1zoY~ft? zP$R_M!)$L%@ROjtANLuR?d;P0kE_GeUs~nze9{f*=};t)&-hx0=hvj7h*p3ZVcM|W zkG_#P0taB}`I$`b%Z|5G9okOG6tKJ%#+sfJ=bVbK<2`R4Wx(3}JDA`j81aAPzr=#o z=GaGYt`YZp>=U5*k@UxSWttK|Ic^kR&}7|)rCol8cYLAyfdzc5b1OQ8P9;Jc+_gDU~bv$$DEA zIy?;QS4b90Gb8JmDjnUomsvVajHVWU&6e2D`Y0@ccT8Eq5_p{KEUO9zjNxoPu;3} z{yuy5RIRQpJ=5Lm*UNe}(uix-YIM@*35?a~-gTE{p-7j9ebH?R*WYFV{j-U-Btj^+ z12wx!LybCb7EsOz`Kj;8x~;@Xt?locLOpSplVzFN-^8=A)I3Dx4UaK8^Qoe|K%dye zEN+F&6Ofm~Z{;eS*2Kr)b3Cem`6yNHu1pFp#@7v(T=L>;%ET9hV%*zePJe8n5lZ7Hxk`vEpgsAwU}2$F z5DNdey`cSGhgvn=q{Gh3B4$}MRpE4O628HpNlEzQjHs&o>robv=%}`U#&Fz>e+B4l@;GFjk!+k|l28}+pVtBPsiu(^z`P?}&a6$E{ zq($a##YsWD2|=Tnmtum;;P;b4jMtaTquxP|z7mT^WNAGN4KY`ts_lA5s?nLD(fF~6 z)$FQ3nRhEYACON!TnAE2#yR{gg;n)hHPm!8k*(b*=_qmvG&j{#kxNcTZiykFwrP2s zGP)*nkhD_y&=Octq$B>ebgbgcOP?@xWr3~Xro37mMP&A+6#t4RMz2yKhI4;rbR1KK z%bI}liF*02gQf$5`j1BsIT}%Q2o??m(jxlD^cHVB4G*NOIp)5(2F?U#-3$-8p!_k1 zYKIjei%0U@!*lBhUp_@01$++vvvlfgW>D&r?h_{qZ9RBm@?)ogU_6rg)1$MmGdjY< z+LKzWjvPZtM6DBXx<`YAR?lq2TGk4Ap)qYkxh!^4TsnuRZKZ{2XXpJ(tD9-%$Sh;c zK8giBd@10$yP7Gdb*tMbyI)5!PsBp6GHIrZ`?*BU;jAD)kj^!>wd3G@BLz`y+QY9= zZ{ajkB+^S0r7tvAq1k+~vM-jIiE46;ty8k1jKOTR6w*T&T@$+dGFQc#Px zpG^(0Ucv-Lgc-rIt=A%*w@(G~mL=Y_^9C3-RXRp~^m3d(IMN1ps+2n*9qlEi54k9g zOB4eUgkoABX!*_PoP-rs$8a5ERC>4zk_Z9?<%G53bU4TJZv0i_kVlJ(KghqY$D9fM z8k3OU_&Fx)ZrFlmH>yyU(uuJY>(#>?j@TTqC|%vB%5lmpl*NC=^_^{~nHO7L9&Bde z{(FTLL4iv4#M0L#4pp*-;7!t9q@Ne~#MBeVry;XnVb+kz`D}_l$5~3TJ<=yOpqxnH zya>WyWOzu?qR+61R_&x0|A6~@L9g`S``adu%$Qq*9==i~1iySrB)R4c<=~*Ayjc!` z?>D(qD}$Emfx$XuXIredJyDv7y*_1TTub%{#72lh&VuT-@0gaq!i2(1hDZN%V`PIS zBI~E@9ZBAx`7(IoGehG)4C871zk{?HLoo~X3Jh09yoM<61E+cG(ogx!4NK^Ca)C+Z!81w)Z!QcKa>zL>ni-wPQH=F|4J_U}l-A=h*OL;veGpL!W zPdT`DVqTlQS6KM7O}^O*We@mZWS4s-pic50N;meE>kOoy#H_l=J0lN{?vkBg4a{>Q2+nfK9N;ydVW#82|ojf|M5)xCuplo{nU1r756#1Pe60a zEonbjOLpx65uy}NuU~Dm@yRUu;Xi(j6aC|r zzzi%#0x~_V%KNDhRoiJu)unVX;k_6?3Y2ZB=_5q;WxaQm3jD??8;IUGMhw|=f;?Ex zFPKc6C`kSM8z?PS4G*zgL*D!K*wW6O- zdmh@?rodo14C$o5qKC*S5@_6K1$4Pdyl}&jlH){zQETuIcIg8mGuFGjkW~J{Bf?%;sz9F_!eOGzUant6`~pbK+mzw^@k8NQT(hB zlIno?REWpI2)FOO?mBC`9ERb&RKN=d^A9<>{$~1!ryoBsVt*zk`IrgYR0ZMaI;NjN z#W`t83@FA@+Ykq=+qIM;v`~di!D*uf`6|EjdXKK#7|t{@5N^3Yd7!jU8_szLm|yi* zlNWfTBpMO!U;SyYO*EPStvQX}EZ&h<`ueOy@D)dsU0m<4miXmNN_V6H~DSi(8OzFuOMu2R|I|^&Lvx}uip);yjB`-%^^Yv z0BDrY7jAd#Ii07N3t}>>m8=rzTc}_-d9XZi(^$d zUjAl5-USz*9dPrAG8*@Hhdlmn~n`v z^sEs7HW3z9A^CUQWxv0I60ZQRf{2>Z4V>pC{Bpk)fnlnryD79nhv4{yuMePfE5{{& z>=;|j>%I(f5Zlrj{v3$c7M5s$=#9~wwv6G~-m$S6)}q(u4i;n_WuWCqvU?|{mi9U3 z9b=I}1>;8>?w3q!T}&hk`Rh*VVj+aOMy>+43ggv2eAFRQm`iUh0`z_~Lgk&D$T$NO zvFj*X5tNQy6eDkzMwBhp;3(kqt8VAhRC9_(3m5UL#Qlivg}p|YWaTISTH@yHIeify z8%lw_-EuadX8)v=$NTCAf@ZftwkaZ@nrj*Eg+^OtZ>EW5uC$Y0rlp6ul@0Srr)8Ib zsWTqk6r{j0{XxjR?)+wCk@O}wVBiVr!*+}F)+aiG`e1GIT91<2x4^(tW%WNdIgM5C zD9IL-nddub_JdEPkdQ5LMG$0ILuXm91E^%%=wuG`-xUb4jC}$4hvW1`(q3zxzkpQL zxTs3iiA5{8N9LPhS)pw}BB(j*=nUaqr&;z>E8g*Rgwxqe%*AGgWnM z=$lw@ZNV49?dgcVu*d=5Oos_oC8 zHLr3uvbZBfccVmzOFz?aeZQl;I~7FVp7r3qFsr4w{pBHYK~wwCCH`#k4vZx>%p`nW zJ~u?KdbjTC<9+{pbg?ywh|hz^ z(!XS<$-Kg#Ys+2vYUA{ke{DqKc_Cj9w!~pzoP~#AZ?qfWH&yl2W&^TIVyQnU2 zw6qux7cSEg^-=cP_2^Kh{@klitv@1&E9vI~7>swursX%MZhF216TXiy26$+FH-jpG zz*NmO5T z=`t@cTB^nSq*5~B)0=>kTE))g*plsm;5lDomkp$vwh6S}N#3%hIW0Z3%rt7RW(cQ| znZBL{jT@X9*Bq{t6ev3#CDF|Hsg?^;KFnFyRVd|f-0@D{u)1C_rs*E?`l*M?%YA6mf>dvLmr*yLd5PTv9MUwU`BL&^xTx$2?T zATRbxBqKRzEdCICJC<)qffP~3bM+$vL9fwhJ2nBh-$dU5*rIN*GWBQq0SRF+d}5LA zFQsI~S_W!pF1eTd!7S87{p48R7c)8zv_9Te@9n${9eeoL+=(wC$rk5`ob40M0< zwn7WBwSLIO)M@NhGReWj*oG2A#Mp7!SYW@zZaZ17*ywnY zb9r$GJ1gtn1cFK2v9;;CTwM)Dmu1Tt4-i2*JQ_iz!;Far)6}=;hBm3B}19A?BNC zg3gyq|Hu)}4yvcGl$?O!?p|p6iPih1AjS8Dsu|DK;Sv|c`T%NV8>nJ^tSnmBKU@rF-6tY-) zinbW1`O7lMb^(|(JLcXf`TkJ!m*5s5HnqYajsm8G$r-whis62{tF4bbt>{eCeoutuF z(aSl4+Q5W4Fp+otd5q zGzAn=MK8eN9_#hX%T?n(C6wH9OWgQxN$bGH3h^&|KqSl+L$ov+4VAqORv}Ou*rlI`}r8EAQ#cUv-DmXga4pQ9ZGU`B>Oq|#EL071Avva*<`gt_K70%FR6`Ykwmvv} z?}7P?vSqd#ENaz>_6z+dnPPum6ldK(n(FSzZx;-wp!I@cqdjSwQ4$i~5t2dWU|I($ zLdnXa^)XLf^{{^Ssa=JV9icc*{4DX2I-;+7fZ-Xqm#Nt!Q>W)UjpiC9`q~|5nO!w*4z9S(NPA*#(+n7?|EP=Hz8jCn?az)C5*x&l43mc(Ds)q@X(IAeMjyzfL4jMz%V+v#)Zv74!(Lr%He>7(27viqtY zppo+6B+lEGBQrrvyh1KB0aRfjrG_%iQWY%E)3kcw64a}>+v$#Q!e7_sco>rk726)< zka}MAFZ~?lxM{lO;sx5wU`mvsPTg9ATvr)5@#Hs{W&WYgP zf-{mSG@wWUa5ym!ZYRMbY2PYo^eNG+);8MhlH&3on)j_T!%CU4<_Yr=5maXtNd9po zD8Vyz*UE?GAv4@DpID9xWzuE}Vk*Bx4j9WdtWuA|Lr0>K9gUv}JzuZS#t%4M%vi{F zv3(_(EjyX}*o6K`OPVshU+-XHt7yg*M8FzOkwbJ0@H_Qo$hDID+GG6G z&%T8s=LDK)s+`OX2<4)=W}943k!aqNlb!JT{$T};{^XGXi5Og8rmvZWE#_e+yBNPr zTib5x*xGH@qqad)!PZ-{IWEqKl6zVc zBB|tq36ayG*s^{hp;Ghv!A29od+{ooJSrGTHsiLsxb1T9+P&PU%00mSr6{r>_!VOp z>ZtA)f(n*ce9+W(;T$S87wPc4g>Cb^y?+QSoFs=_LbiGjFnk;6^!>HU7&{;@;hmk& z050Taw>3Gsqh_Z$>h-yB()Z~`PTnhL_~1-C>*&`fVAHx_$5zbUnpi+H{LG#^Q7g)ZCJ>g0(+-o! zkuVn(DP|*A3Q=qBuc7t53wcO{BR|$AI=oM4`A5D!W6CsjSq*!J9ri}7gbQx(T7dYP zzB+|G+Zcyl1CEM2K9tb54b>dFKz)Ffn%@y4tWWaNa6+tiOLs_mF!T#+1m*!Zka1#d zJJrs>7$NlM$s{vXn&vzftr>xSJ{HCkmx?t*n{S6{x|;lgGUXTMYIcuoKq@2E*(h(9 zwN;ayu}RR=pL!oTd=0OFT{p9_1}aRh#hr9{3gv-PD{7(s-zhgf!9 z%5OY7&>HpoVMabc{t{e3rJM4^0J=P9R`*ZuUVyT#m4&MZjlXKu>VuD$=M`gl=IOh% zKVx9nXun05ex2N-%8kV>iBi@R7M92Pt&3{8`4wJ`n3X}e`vaf!rYgF+3T23kC+I{%)>1)YPnVy3)}q8vVfFhzI@ib#KcHjxxn1CYvr=CC5PWZk4iq zJY~P`BS zh->jYKLt~cG_t|>=`EZ`+rnHkfkwTRT;GyR%hBTqyCGZYV9V2gJJuKt9InLeVs z*!!sluTi@_aq@zcdeLD+XtFq*%!D%~`-^pKS<9uqQJ?1>TDM2Y1l3U>6+e_QyD?9W zbHYZ+9(n{Tft;P>PmAI9%WdsuZ6=AGth#yZV3X9Y{-|FBSI`y9asdCbAnxpqR?2g~ z$$$e{9?!bVq`Ed^st$LI_hQ{*bYtVNRhhcWhM`)*-y7{kdaJbtz3p)x*vMr|)*INQMo(hWvkYp5TP4csXJ(c+r)OrMq|D~UvS|KD#ZIjwPS4yh zp2J4QjO^c)sHvh>tnXlbTRB?J{EXplJtMoU&{q=8m=PKDXDP>z8U+%QG%D}f)GLLq zJ?{uQk~ml@^S$sY*8tG}*gYE4XrUZglCx-`kdv?Lp)qT_c8|nR)ZBB(F!Vs6w{J4M zJ`_Cw1u}g5fL>QX;vg7k=zq?)1NQBNLxh8ag@r>#K!8U?MMgzMdE5U94ILX34IK*| z1qBly6AK3y4-XF&gMbhpmk=8l5BHy!K*78{0}F=)2Zw}lkgzt>;8Kv;;N z0Vog*)Q84ZGpHsgU>Sr80|kP{g24ibfSy$S>v> ze24*$1h9gKJZ~l#+kVv-(seh|t&ZfGQh#eD(|HAr9h5y z4LfNWpRz@F&a!#e`p*cx<=f8(ilNH`CXJ|A^#%Ue9NB6CNygm+yEHi0PK$U`Rvo+z zQ)l@#URFPBlkii-D3-hEjV8Q;6J9|pw6~+2uOMQS(&n9{0Zrp59{a7t@4M!xg$NZm zLPC*lURm$*3yTNOAm|kxAwq9I$kK*iNSIVb_VSE2VWp-2{l3H!Nm2KMIPYeIL;Sev zyfBv_cda-8pHyXOV3um4^J8G15h3;G&cUXiMqnnq=b{WoU%BwM=CAKF$6EQH4u9;$ zIMIcAuiW17olKEI)Vc?UwXLF@5St`DuH6M_0do9EUv3Mv! zq!aS3Br7-7;Q;4>P3*7GZf z^S=vRx8N{u`mJYzUeA2sx$--whpkO{UMlZ;4?9t!49Jh&E!$sMDa4z4@`+efUyU+f4y6J#@oWFbXD7Y8$Z9G2Y+2=VMpc1 z?>hz}Jeef|+x-bg$=+^3jp?a-sqQ;=LHolNk{a6e&vikuHy{n_3vFOj?p)E|$}9gK zEZ&l-Q}3T-!$kr|x10VELk%xdeg#d*tIp;Qh^-f_#8G&fue^H&rEI4H;an>|_P>wy zMdnO{ukY=Y`X9AUuc)IzDLl}hS@!Wr4*X|n#w+Seb(u?e`iEz<`q&A0u;7BVOqmFY zzDv^u7E!tn?KgC8aRII!mjvA}1ee~65qjKV-H0Hg>Q-KwRBt<;WmMztOaer_CD~;G zqp&1ogz*iSt1^8HiBs@-^us%*hDPnIh|aOg-TV9<3z}BZ9l5G1#wN2?3YoeclwF7a$4m3h`De)D`Eai>=h zVsII>**Q}=K%Nv=ZGdrEGDIF*%Lm$w$_`+Qm5%}=$#e>XnicY3T6;U1280zoZzuRB z*raX;h|A&bST4zL6)9f;k@mOVxccnNd+}$+8cdSx9fQY3+P7&E8?~`%F?+u=_@$=i z{$kfZ@3L*LLqK#{Crfnj#6VsI@&QY%lGfi;(q;o_AtPVCCsoA5Ac@NS6^FIOSMd&O zlW5xCAO#`}9pez+YRh{-SxEvjpmEzl{^HHj48#B?+ad>7Pa6+eY?aY%?o&U&Q7O3b zevjV?A^naBg`}QeOO64YHm8DMk@HktJw`T|KX1Y-}QK zfSY$V8Jn)qVpKV27J#^Jh3(BV+a}7%pI;uZe3+`8WRkFzN9<2Xeu@Vh!x0B{__F-J z*j=Rh;(v3yz<)F0zq#G_qBE4)Tc}*Z%3^Z^Jr8V{Y<`+*H$m!8BOK7J@pQh73!&LS{f}dlNvGLy+ zsmIex9xlu_C;Y+Papt5?W08B2V5*;qeY9DR$d4lMkJZKtgLt0%he3rj@OecbhwkbF zJ8>i*k&;8k1|1$x_ft3qPQMR88v;06p`;09eMI_W{rPibr#sG2*|FKZR*#aVbDF-v zt_7H(ug6lc)zuf+_Y8gu#nxW0H7^3Ab^QEDaiuE7wsGfx$p-~?vvq=?Wu}xY2(x!I zH#V_bDB#9^DB_KQPstrp{9>5&-t%Gu0R)-J<*ta%b@`Q%l2c~omZTli&Rl>xNo!Y*wY_%%30IyhsBmJJHB^p>dA_aIH(lD z$X9Q>s3?1%Dd2-F?p!Sx_-qUZ+Z393dFMUAy#qs}d$U#IHXG}+eE>ABK+sF&+I++n z>Y8I=!JKlGAL7K~pa!JLN6IxwMIvt8Q;!yVlfk$W3!KPLdyI{+-akR#MZ&T?8;jjb z9w#ohK1}bWE?|z?lC)XXe_<1GBH2rDn)euxaBAV`2)~d#uCpnHpr^J?d%MB)IW70^ zq`|gE@d5739;jtme85)}L%&#gX~H2xQ9`1>-U6lEt-0Oa(@gNnub@_qwFo|?353F} znYvOWiT<$~jN7)--2c)W))C$WQESIK|&oOX1^ zft@nX#vf_waU1=$+D9ist=2Un#`Upkl8~Q%NnA*=FJTXZiCor_}=R_!aNv zhLt@59joir8DLOLR$Mgx#Cy-9bU{{3Q|u)Y@~48f(}dyj%wH3m-SOTKX>#m0FHzT) zFpf9Uxybidka@e;R+$KP-a_1?o|5Szj*0>QTD}Dhoc)qmEWfnI-v+IV`f*Qi*9OwP z!B$~UCkjL`uIA)B*y&L z+6Zf9!iZcw`$67L5 zF<_!E0%nrrx~vQiL!3|-1i@!FNNp^+L#ZLBoQDgs>Akz@UhY3CnF<-HM`9D9E84ss8f-l~C;pr->;8 z@NMpA5%fMu+e9#ySP6U%XW+18Pl~oo?1^(HG>+98!AdHNQoS)lJDt1Jwpxk4m$y-T zXUC8DWkh!174#tvgDN&6!WLyF{G05F=)%bW&=w&%`k~k#h3+JA$aHP7;7zY+C-Ae` zj%ixpTG*CnJa(*tn=7WIEt-(ZA;OgcHt8r^bfQl)Z1z=Sp=?Bldi^u`9in)*%c<(b zwHZm$Y@@g&$oNAhd6PaBX_9?9K!82Cx94~2!WAcJ35w|gN52=FIA#2!*7Nno=efGvF4Fb;%anH`^QdRm&xp(}PgV6dLEMJfcX;o3d7U{5P|{I~ z37q94I`EIU>E`9*D?p&-OjcS$+5wYr#xupB9sX2X*5HLfnIO`{D3;V}^a5ES_SO-e&k z+5|tG`MiSs3IT}K2$u{ODYsdm%Cw*W-1^XGhjmN)qA1?o)e=nq%NXvgFFKj-S1ZrL z%NzR5O)YbRjJC5fRNL!%wo7;#t01T}Tc|_V8||~ghWh1OTl50R1k{QswV2w=7UbK|pobiF%gC&G&)iqio2>);}J$nHKrCL5t$+lLwt+nRyp=Tds= zgkqwvQpi$-g6UW~fTQkt`_lrZ6H8CO}-&YMOLjD z_j8lED_bkyC$$g2>LmeevI~cu9}Hq3yXVR*xDX2zCvC`tf?W%I9-fn&c-CIUO1Cw575IB(?eP9zKxMe+lt1^_kQT_H-#d1N8G{0qx#Aknp zG8-UH@a5W9_C~sJ23P&=(IM&?BaZGp(o!D1JW?$tQk~Xbt{5F{&ewGn=G@!?Yj_<# zJ>Ci>Yd`0-v3a<7IGDTG)^H3=aVJNnlVTcY z`sxFfQJch|`w-}4QfeOMbW=(73JNCLJ_8;u44%$jLFG~qx=8|H@=e~|0G=(6Uq(k> zK?i5sokROCGh=V$#^x(%?3&J(U&;!F>0MTc(Hgxg_+FBlzN%t88)-4l4>kRUvHn6WGSFy})N8*a6VOFr7-KLivH7bRfLG2edE zD+tFMI4{2~CcWv1Em-T{YYMwknOz;<8<>vCrn}Y3qO~qz zW4SO@7lou-W~q}vk+t@05A>Pm%Wy|;16=YQ=v7Z7jwyZ26nf>=xK;1rH=5Hy2tsVS zfKs;s0LcJi7jOR9G`xa_4IZMK?lU&glxR~AdS2eOWT=^4QB8Sw)p#t8Vy6ryh#gey zEU9eYXPqD4eTk5HYdO@Rc`I|n2;@| zyT_0%j;F^jz3B2{!t?e>zT-Tz?Ie43+1*gb;J5CugJX4#6iI$cXg5lX{n9nzp?g;q zFS!T=35fA=BA(f}M|o3*-Fgh_T;p?3G>}SuTd3npt{JoB{g#hU!9jV)UgKY_Lg5tK z1%tQv3i=O^0RNjwuDGTlnhlo}=k?7Q-Jxq-!xn9oHEH(wVI&Q+O^srocJL3OTju(;UkVG3(0(BCVLDzDxQBgkAo)v7f z{X@l;D&E03fsYT?p)T5fpz=wCc_rl)v?(23xlepq34zO8`4^}`zWo~yxVAAN%nM2T z;Yut@{)s~w+7F=J#+Ay16Gr zFO#FdKXoZ#RPKw6be`jTMMJox{6!Ah?_Qv@E^_|`Zi}JA=T3b!HGj|dqUJAq{N2NQ zS~<*N;#R7_*7p2RTxUJxn0k^U9r#tjzZ-3J=1+pv49Z3V%lV=E62vl%Hyaq3bVGbs(-h)W(iUO^JW07gdj`|fkMJ~Hy^ zsT&1$1v#1OIQsH(JEn+IVoAp!TdIrMRcv%oiU-^Bb8!E=Ap87=U{gcNVPn$eYHvSw znMc5UHoo8NZ7_)3kG^=HzJkuFMF#4kbQ0^!<$#q@NNAE#3T?gsBETJ4e|Lq|0dQ~r3L*j~Gdf>j3;&sZ zP~T=&mWQjxw?CTzyx}wOyV(Hp4AFSNdCq-n>dDIPw_$TF_2120j)Czqor!uuQ*WBT zth-h|zxs=?ZRKl;<~fv5jeF(WQ(IXO(^rtN>l^rDXYJ@=^iBq3(oI1;wob(^*AdHY zD!1Y!aY{)LNI_-Cd%sOAh6f57d^8?_ce7(0M2ZZm*PZ4s=5v{huOL11mp@tKjOYyz zO|fw^FpTwEA*p7uZd?|Nt033H1;w^6W}>3dZhe>RH_`RD_Zo{oxwy)XImUJ+;?jM1 ze=@e-WKnMeweIgCqb^;CJsw(z14=>?Cw=gao!<-LqM$QXbH`$Ia=}1Oq8QNl_?!3l ztvA-$3gLmEmwHa=Z|#$rwffVRZYpKGB!QoJp$g$NC`}Kl5j;67n(qqiytTM?RYJZq zN;J6gxUO4D6Qx&fY}%Moku|Gm8(AtwQ7%iYjA;hHOW-3b3}gATvU#;lV9@9uf?|ni z4eI^q$o-rZ+ja2ftK|m&p6mgvw&8w!Y!kbvV{8#qlSOUb@}0ag@jr^PS}unO6zf}E z>Wjn9ijy`qTHsr`#_DD~i6d;@~^P{nnbs9ZhwwnpU& z&%ezm-`q^HvPdzR-ujYM^bl#OET}k27Y0jw$pO~}7A6lA7KRT-i!NTYy&@>f?ixM(8SNQu zNf*P6L+WCE@q0y+W8i!b0ryNo;2X-QiYc2(bGkf(C!Hz#T#F z@ErgFplhzH2VWGcxV3vu4z3(tPkvu6 z?~9RPFRKcO9c)S;$t~E@b?eT;5p~$ofd(2hu)a4L!hD7o_6FP+xX!imt-3Z$<8>d5@E52y<+ zffH9Rr_ZB5u<=j$Nk7#GF*h z^Cp#XV)XLfw0bXO=NU&Vm$cw*L>UuJMJ+bB_)rGhqdc^H4 zr!rJCKYxtWmbObux#oDWOgFsb+O^! z7h+WIFG-9T-KNi%?odv_YQ>aMx6V932$ZwiVg@Vd-QGT4Dd75QdU#7-)rCrHVI0^8=e9>3(l;YHnQTTC z<^L^sY%Zbipr4QFWce)?;zQSUk!|_A1{9@|b9_na_x4q_HGdO?7JwKUM(Mlhi zZMwf7@r9iFkKd%rllN8C_P~QHFH}qgo;^FOG$>;*-dMZl0(IfCAgjVXS}qC4QE<1q zp&2J*1n+hTiSQ$LFaz=c1f346vtYb@Y0!N*8`h(i9?d%fH2@O?Vi6n4t#tG$**;!A zVc|!nbl$&_KI%T2<*TrLInkZ7#rLv`#yV0GiyBxUW;BzL4sBSl0AQL0n&2FUVG6mD z&F!`YetduS8W7zy!t$tl!c(Va*MX)WM6FOnocMHT;lK9nv7z3hSOM?l$8*-VoB;4` zVoWPcch@1Ji0%P}`nAZ#jm0a-01G(muVkODQvkj+HAWrubLy5d;PHfqa6~Q}_pZ zJA}qf+|#n*Ov+LWExaRD0-~^Pcq2d$R~}$q?l;SfJ^a-je~m&-B65KA6O3PP5vf(h zg~8*A$eqcHg{FMBYBdGjvCO8ra|f&*I5K^p`23{z>f5Cbpmp!{IqKVXjJ8LMw`r=vCrQ-{36*%d9ejhr?>X=Ou<17)k zU*X424(OZ&AiZH5#SpCtMw~q>WcM9QI9DXCqK)t!Ethn&@=?IM@6;#`*-w=7Hu}q4#ozk9 zmJ9eSuDT=pRQYe~pEp-Nc$f&9yAu7}l7*baPsQg@1O2yI+kwn2CmOf>+5RK)g~JV~ z)I(}?s3w?YE78i?r`!G+iIIcLe{2pma5JqfpEi9i8b0x<|Lq8ADn3b`h7fz@t`D-` z$1Ar-8h^dOPP?G=rrZHT;xma~EULs_7+suJdi&)~wk2~Nx zwafl7U^&Q98VmhKxgiCeIn z>_#=iTotwGjwhP^i5-;MX*X7$qvrNJ2uT`}Aqg#5-T~s{4EUZN2$4_f%mv93t$JU$ zTNK1(>Z37_oHP>|VzgIe%hE)tYGbe1h;@Deeo#FzqS}isixD<1EVUK!Elkk5$Ml(! zo?p{kwlQ$D?!np%f46m@@}zEuDL5WBEc~&SRh9Fsz3F-}d*|H9oHo+?vbWo-ZYsd2ywT%N@eS9l}-ZWu>6KmE)C` zNKs@?X-OxX77e;sNtw|`LtkUQUJ^tyvO!9GOLCX-;+^5|7{-xq`_KdtC7l2|;p?BX zzpoDvA5s$MrfzMc<^>Rg3Q7A_&!uiFsv2`OW>4$(LUTfOe(tVovRXsZ?Gg5^$SW9} zG+k)V3>0h%H_dq~O!g~&i`kLBHT5?(omF?PweQza3TA4;T3^I>#hk9Nc?k#kCM2|K zF6OU)@V3si(W3RV4B{$TTAu)vHlxt}1SF)rP%Xx*7KbmUXQB1u!dtnf)$Z~bB#4Eh zae}ATfpLU9lrXTU4)F~wB(gHfqQWtQXD@@zkP|D9#7p|a#?<+RMA$nAi*hHEe<_zQFS{;x%0U&nSp27EotH?k-gOe#ztTkLCC5o8tiV2 zMA|nL-lZnpMSg!O@Fy;+(JA^(U%cQv-Di9tB)GgE=Dc~rYY1V!LgGOQ{}IPK0l30b zeuX%KMdoK0uM#Wg+s4_dq+%LIoQeQ%A;xR zEkwc#`9w)?JeOe{zok*8twKxZmo+e4t1bVD|JOAoTLDd$L^T=Ya6na*JSP4t!gEym zFZ_ECopGJ-=+0#lg1>u%)wRAl{y^kTCQeqBH#3jpN||} zgVKGQZJ~Zm>@06g3}j)9G|Y#!Gq{fM0pO(KA80-4smL`w^w&HClf8UALN-mr2tgmy zmZaz%K}DG#!a~X56kErl04g!p;+(dv>JWp=@8QNK2twBC6sp6*9Xs^bX=y}|o#CD0 zl22ztjxnWlvYSu+G6ab2!hNqcU5-<*`5C1!y&*rs-kr%c+bkjF*gvp;BP( zrpFKK1@ZbwX=>QDwI><3f6zU3`oVkP%Ac9wyS#PtPPpkPhM49_j+i0_;e#}NS-?EU zN|ka&<4kTRfVX%xiy8cQlEeN3zt_}kr8>gr{3sYX$A2JZvb$~w2H#&{|4lbU)Y^dw z*E06Ib%b)ASsJ?NYKSh^|GL-8G1k4ExX$0!{GhTgwB@V`hjt?S3}5A^HPx>7qBUdDYR-5#1Fi*`GGRzAEd^W-8 zQW^K|-IwoLj#ZSOvjQxhsw%58Di^<%ToyN#+OGJ@m3cuBg1(#YU+pibn^LLn03_q& zYbY=+nXE+_{`*|?gS?l42bp1|!xu6Z!J%T`1^@lj`;7WuvcK_&@p}#f8jiuG^W{XM zdN*pRbOvTRglD?(2}#Mvg+4ETC&bRmm~g6&NBq z(OZ2YJRCb3jU#1AuUov~b4B_CR7U*VI?w8&q}0ADBov&#ueDw&>Ivm0V6=arL~U#4 z4^!fKUL^3oYLdA;rQ5uW^ZLCdztr#J<-!&B4G+o)6+Vb=yR7 z8sk09jTr|zP8pa&1YAf8P`~&7>Vc=_?6o8F7XlTw6Qk!fg7-`_nJOq3~B&sciBpLkz;}p2iO!m zc4Y45F0Y@AV+*vIsC;pyr!!es>e#c&ZtRg05umf_KEJ}cqvbJ-6neR<1eqsjD8E}w zLs6FG3fz*uQstnV$B*MabBE6>2J`lLH@o-ndy%;bp$WoDeGOmp`fX`se2KbuSD--A z&^uvL9?xPtI>HA0Vfe~53;lr?6r(AcFN%8W`c@(uu7Jq8QWfEe;Y3to00pK@2=@Ms zD+F8D+?jHrIqNpU_vK@Tp@7oc+dA-q$TeYRF9}@rUie8+pvE?Bq`Uwd|PE z1>dwOMhe^ZBg&7`Tr@=`G98cY<|@f8HYwnyr^4dW;QJK7PQtWS-}lk4%gc=H==IG8 z?$LX@(SAs((Lx}FPKMTDEh)Ph%+YQQGXffnrs#g1&Pe_V-_bQvKoO52)Z|*rSwE#!8a8932|FvGwj&HM3@=~F* z?!d44-7Qv_U981vsHoA@?&z|sx{>(E`Zl>(E1B{~Tp|Tk{?haB=|#gg>${&GK6o?} z&`Iw}udon7n(g8k~`Fo{qS`ICa~Cd}8oUPQw-_V3!Q-SOjH{d13~V^m@X zC1Cn8%^=4QgyX5%Ffe-NOSG7tJoPb@3Lqw5j{s#rt zD|^{x9G9!oXADsCkyJ_qdO!;Fr@-^rC-vijOCX|N3D2s=RtKhL?dZYo#o;4vM2n`m zT^Lm7r9;8|L*?&%>w)ESo2J0 z`0hxDlw!fKuaj`m{qv6YZo^zrGNsGc6j2sM8LY@}W-4hVhvTWQxhl`F+?N~!Ho zxeM{pw!bGYhWGsKFOwyrOu~#Q3U`iqu*{d+w*RI)@;P5$Qo1y} z0;l9Zl)2mT3h&IFL!9C>2JcPH*4VB;Vl{3?-!y?gWJbs5e#q+!5cr9UvJX&~#0XL8Z-ecvtNkxUtLRa+?8|6V+eW~CL(U>uAv`RaH5g}?+q^9LH6g1wiN z^t=GKld2%0d6;<=UZU;eris)~n9rhr6)yn5bTM$o&l@c0GKsj|blGpuL9=I^xFNEG z`6YUs)?>QUi2k8&6Mw(Jk;qS4Yd7f~vl6?|d4!&=7kxAw#VaFQbf^lE0Q(=P?l-M% zc{K;w3ie8?(z#{FJYo2&*Bay!;Ch#AXYpFy;g_Fb4tL{nr!rQNrovOrjchS8H4*`K zP1np912`Xp?5ZT=w^!qtod6oaZj6je^o%fSPmlI|4a*#+2UBah9Ddizv;%~H%&dwxrYpY%X^T%v{XQcp%j?s=7 z#i-IakxL{L`K8pla&Ds6lMuDn`EvM5y&&$wr?C-g$QBr*qMH5Ptv^R6T8m_)?Q)jx z3|Q9b z*&1(V{uNJao-U-`qvv5a{kR?g5RQCMcONuZ1XRbhTCB5rsK{yeX^F*iQd_1-u2Kit zUWG3=LoaU2>1AmDg4u?dDbZ>IBbJ~%va1&b%O~6Uuc8MGXgYo6Y=zOK==x`a7z+zK zg+tY~>R3-ff^YPMJV*^?zE-coyY{*yvaw30j1V68nXitsgNf0Gs|HAy*^creAvi0x zDui}%b0?4QZMQ%tdKGQ1%Qm0B#iUZD^UuobYo<`umdY3=l{@H!^Z8&s0ltWMwvYXL z+e#B|=?LS(KRk^0iOdu#vW34EbP*D>Q~e@TbT&cOfqD1r(UG$@Ik?#!F1v6>-Sp>g zN!WrY_D7*E_ChPQ0vTYhEg`YObyMhd!Q~NIc`3m^-hgEQA=D(JznX&QD^eZ(IqXfg za}3lbPBcVbO1UGRVsgAOB-R?4e>3zm4u5EmDzw`gPUIuVd5z`N#@3PaRkpdgDMH&y z)XYrVtdFf*$0N|8zy0-Cl>P@ByUJUR>Po{y{(?}}EhI`5$^kE7mA_1~6@*ne$js=) zwy}1YX_ROK7$7a2;bHooEeE@7l4(wVly>^Ujq%zR?~=}i&RH`Uzp*fQD_&w0|CvMf zG4A+x+P7?8lUcX49tls(yieTxbgl=+P4;U@iL6tylS(n!Tw3QVdmQ|qY^f6D&4V=% zl=GX`Jf*0^dUg)8e`v(hcg5150D{|cni`+&MNFshCeuD|>TBzsUb!5L>6a!Q)a)==Ge%lqy zazpo@nPIdD98(j;t)X|HOA!OLt=PUO;cfd@3CxOc1z_eY+xf`QXd86mX6KWLhN^3f z18IKw3tN->Lq1S%bq}}-3TvX+=8+O=Ql-hoy zAc5OdstMXl{9HxZC%>AxudJvP(6IH8L)h+FqBF($HzffvI*A~3guZ=nC{jrZ6u@b_ zeK6F80ocCtO}nkWDDa*+RGUP;{wkCrFoJ?%RRv7Uwf5L;G6C{vyYf3^N-8mu8#s(6<$e$-`19vN1Sbb3=wg7Qm_%DfMJJB7je3E zU)lavx%q1|-5wR!hsl7m0Lm@`#1V5lxkc2~8k}3W1H+F7_$JykNUZxx6il;aIX}PX z?V|a-<4iPk{2dDSlh%C*H8{kWc7?ct5=<&k2HP%hO4y z^Do`O+A?%czV2V5*nTY)Z{EhGt;T#SeEf2mvZ-gae;Io$kRbV|9I#2+t+8fUNgqd%1f19 z0wL5ww!BuD(fX^nfIsPW@vIO!bs)u_5Ny-{d(5bqk1?h-0A?=N?3xI8E#Xf zix{Q2Q}@RL@~xXkO(M$|>PJJRX1Qp4$9~+KywMSoe!9LU7zu;O?dm6FvgfVM=Z}l( zTK*25B+au)GU;y`v2H~g+*VI>J4$`cLd052Ypcfw0*mS1fPpYmpXzrGZ~o=D!yygO zo1|@!X7sr9V8YdB1{)f&w<7ycqrgBRMRZUA&Mnzs7Ps>IVuyfP0 zg16P>P{jL8PW?J~bm-MzW^ruj_2#bH5H-dj(VeP#e;vXaMO7gYzn|*I5iZ-;zX>6n z)iX!3h$PklRG2ngzcItmwmuk{)x5gu3x^1>+Jhaj8pEA(Fx;BWv>)_0Y`D%4rx*|r zWY$F9Ag%_({v7id|Kbb&B}g$mpAE@N@+Y~MIU$@Kat0msEq@ynxR6U}zZ+=lCkVj) z47^H-DK%dD5fIi;9{WHA{=8`bX~oj8FO~OYdRBqymDGEm88=Yw6|ia9ENLx5mW>>Z^oW~S@cit!F*d{CE zIemc-!Uw4xF59dIf4L3nl=hwrY#yaWi)7OQuOk84%Y6G9nJb55kt(L|j+LvJ%^!L$ zzf~dfK9qKCV?2J1I?utSiQ@JAf_b9xJ3JIt1QU#pzwY0TXn_OCzqg&C zva%$ly`nnC+hz8|D*G14V$uTxHRP;D^_JjA%@IqL-q?CRYLpIOB&P(?@IVa#jqI0FRq6}Qak$1Yym1-*c zi$PH4W)iXPpbN*3vHo&maT!njYO;W!QPr$z{?l@U+?k^3*$uEMHNTj7qH4`mtE;=d z>i6}hK3+^DC%qo53AY*c?#CfOIM!ck=hyN%k(KF7&h}JSP4&mCqS*fQ;$Ir{DWIDM z!}as=;~*H3N?hg0!kNTBm0LpLDx(SG?s)SI)F1 zW%OVMXZ4#Ogc|<7e!6r>zG8IOI=dqyw)$c;-MN}2`+>?LQ3gss!bz+QaPV#PSxD_` zqmMW}K*#BQfAqg|6g>{E@?*YqpNbL?-pTrW{Ja^KysD4m^ZnOHU8@V|pXi zP!hzKGDttCUz1_zJi7J)>2?V$A(`b1r$Dbt`MgM-Vit+T~m zeX@S%HLrvIcaHbDR^J9gjJ3isD`c!%@>0m)mL`*Bawcaz`2I0V0Ou3NaNnu_SFGiK zprf>s)SVOc$}v??OFPL$P42kN<~S302SS+O8Cmoo@^-j`$_R%mD>;Pkm9nIIimi9(GMtpG8*=O zAVIwMH1s1w&4s5px=KW;Wts9cV<$HoiJQTLSiOL;PO{dSEdKa@B( zAWFOc^wCwS!Wh=Bd=xesao0?@>RE{9lmL00H$Ddh*!-PC#B-+9tuw!1%{diio_&nc zo{D*-%#m0<_7h3eAZiIr-oksPeRy`L902L} z*_%rYV|2>5UZ26I;FDuwAR1OJJt}$6sT3i7fFCJ;8 zmSw5g>M7ftS|$m`YE3fg$~l=AfCy5iaRGt(`lw9N;q`{Xc18xkEM3!@klXscDCI@v zYn(Ma)&7AdbXgBUD?4S(9DSKW%RbFBpDI8Ls^z5_|Kv!6{`<_U{ckB} zN88e)X+z4s4?9n>foW$6QV0V~(zS}( z;--}h=pa(Ju3ZM;Bm_8=Q-9UmiP77%E;FtyS*o1aO%RbUA6){$d(h7Kkl7l| z{quE2Ui<5>68_e{RfmbX($iHrIQ89%a;dPyc+?r8lt}QjRcIWQ?f%SzmW%S+^&A5Y1Q-(r>9BM`Dfk_k0WI}H_g!#mBExhAQ>C=#Qq_(%! z^UnYEJNFXkw&*h(3Z?V>yaIL>!1^oNvwRt~W?gHbZ@6yD*U`|28?^!!XF&+m>Q>J4 zbW)F>7-90+QiiJzef31PnxN^bw6b{rJhMk=w_A+#nE@y6lz{u}Tymp?v&(TOmhk+^ zZtHJ_MVraj)ghZmMuoU|9?WoxzK$xHn5!%G?_+S6 z@AAGqYeZI)&N1(5P`bKvag{}S+R-PK+!K04<=lvLD7$AS2*l2ro?!U&8!h9mS)CV! zrqltSm@j)`xCQo}#Is*C5^8m<5vcp@Oq5M`DKpug(W5`joD1;#lSL51%!~qs8EFE| z_q$}kuU+R%e;>_c1JUihaHTm0`*!ZPs7Tbw^zzcRuVW|`gYZbvy$)hQZN#kjyQ&{< zxgI+4=BeAcg?JZmS*}dZ&;M@!DC^}LEOOCG?tYRpf+J&{sB{_9{!8;BQ8^S_?_Cls z;l*e$DQ%L(0KKEopEZpC*{Iq12Oo+)ni7f^;9-mxzJ2+fkX?azc3t;G96Rnyaz2xj zN6nAh5tuziSACr7n7j@!kO;=W<$$q|$E^mFV{yO6hL~?rVTf2>&u@<;wFsnlaNUyaK1QGi@;3!TW z^iO!>x6R!)pLv2VP*3Bwom@m%p)$|Ff>Q#n9n!`U*ve3l7d-z?*WCheKJq%-%=1q2 zs6mjME{t+`#DITYsEZIH6_=s7A&j*C*Z}sY?ZG%oI`*W%;+IY+9x_>4kpZ66Zo$=+ zUh9hk=%|JYR0)p-T1`YNsz2~naeI~tY+HKT@t8!oQQ~KO9?FuNk*fHz9pGT~B`tiB^&vR{+G~{r=B!@Xe)E0O z$|7jYbB7(7Qu#qxo3n!+P8Dn`x+CZ<-_<@k!u+c(#;!;y=?6;){#GPe)p2C!CocyX zCam|59bR(eMm_pHiXa|RDSYU;`S~{`EQl6&b8YCZUHJ9Y(1*ipuAb`(h7l+^O4<-} zCDvs0Hoh+pth8?j=pfXo&}mQ-&5jyIE>_UImuD|uYFIV)l32l#R@)%y>O9s0Jd^aSzz z3`0zmSFP1$Cc^*ql!W#AAf58SL&0J@=ReRVQwSkP1koRV)p3lgu3wGjbvtqZbI(XF zhHa22*d4@C!w@t%z^c0lIRyT&#o88*<)|i60Nji9Dtc!$c(H85h$cm!emY;1roDa< zaIEk$AbN9{p`4>!EU2~3cyi>3WHR+NS09xf%@Gonmuho{4slgu0bS_rMgM`7~8|_wAiS=N}^Qxp(@koJaM( zTyu_Yp%JAcz64GID$POradP3pk|fe&u0%f$fp^^W=e5S%=Y@{_YHJsK4@PbVl9}J->PxY*H;% zDuqcDX4m)(jWtLZkX3fb*7JLFqFC!GfqzZRYas?{xJYwc6#=FD3&8=;g@JRab?}Yy zIN<1xLb04UQqeB|2fhiVjl|=%NBY`(gp8YaD@XjQ=sc+8kWo02BG{kN^1EGgBRBGX z8n(oDr2fY1W^yNQFhMol6;1VahQI2Ly~aT=^n^w_Ync_(A|in~r7s4{s19Rypr|qe z43@~4rw(dz*2L~L;@@-Q<|c;sPmz@734mzoZntWWDLmCj)VWapjf*_n9k1Yr}4{%j0L45U6#N-;1vt$9 z9Ig7jVWgohWBhu7vpvp!85EQ(C`}HYj@MagYK}VI;N42F2$MidloZuaWkaAWrD*3{ z)V?&x*+HP0sd_~(He7jfF6#HelHyB){vEN4!usnT1?4Jpi=|rY((Yy?zfcp^&S1Gc zu9ZhBjjUt}Af^L6Pjq6pmNcy*Z2SOy((&w7UN1zimwVHVQ+j2=ldv#CF4 zcL8pciLv)7RTZe$4IdwiU4o{ZZI&~Hex+<~3!Y>~x6t}VJv+b>R);4PsRc-}t-U)Ado#l5n=Ds!7Sy0>0N!0azp`}C5T7x+4 zvKoNiu6nMJvcPPTnd-ch`TAKRdm^Rpb?kL@KP-uNQ>cAkhdO+K*Du0Qz;p=29}tia zaWEgJ)Lq(kr#!nQG6w?;_Oi)uH)5D-L}4;d&nB%T@w8S|6ibV)?1OiVD z+LEYBwCm&UC@5m>$_U>prdt~SVOht}lHNk&DW40NWfOBNBj)RTF+`jWZ{naBhGpWj1~djG4f?vHKD1Z#0oROnUQ+kdm+>*}Dn(Wr=|WAY3k5DIH8LDd-!&{ixc%S3*}V-ANtetc^80rp zfcfPP?;i+HgJbjiJL(KXa{b)blhTOyL%;h`MQAd|MPp_EGX<2W}MaRqUGwy7|k$US{8F49arGDC>$=Z`|-aHF-@wOu2>4&PMy zR{#DHZ1j97+&iS4%h*vTQL-;oU>FQ2@Ty?_^XmC+eK7=o-HhVDgbcK?5 zN`5;JJ?SG|-fwc?v=<`EVbiW}Y_X=YE+!@UIcmn+Lm)sB6GF?|b2spD3atP|F*+|N z3!FMWP0dF6^t}a?iT-2vpzPqs((m`_vXa&=vKzelPu1-Za`KvGPT?<5wfAvgdlOYU zsm5Krt?BdV9Obr%JW#Gey2TzgVeIv=-LcV8p$(Xel^k7B=a($+wlzsoNq7H2@+L59 z;pGMY!8v?J&E)F<_rbI>G&#Lr{j##hSi^vEX9&0yxZ{B*sngep-{$xA<-t;|n>{~g zTzy~{HK~DB_C)zY3(pG(5S#5nsVlLTE6S?KK;nE~{`&b_JVOR#y3y#{uUF`DFC0JV z^xqQ1sWeOZL*xN?6T9G(XU$q$Lc8>$oFtsP$t;?!-ks@E=eSL_Kjbe_Lzg+P0g(7y zF8G8Yojr)&A5cG5E7xH0aX9nm^B?k~ z`om$Ym`R+{W;_3a?R)x856w&huAf-wTQX-1*v7>GHXm9(32IADX(y4Z(8mfsDZ{M|8o5R zLUx`q;w9D`ueA~;q-vGYdR4&YeF8(=_4&)SZPNUTGr@{ziXsOgvk3yiR|RA|S4vPK z$UD&@GGi%JKW~>n&~?POIAqICkl8U2z?R(W=<{yZ!F=bwKSUdg(6RYz(od$`EGf>Y z8zuw@6sYhR61Xchz0|u}?IU}oO7F3H_>$_!zpDyA+AlSUK*hZ=#?8Z1o%AY-KB%>A z#c(HRD%TlPmUQ+`+u&UPg(~RUl=oY&{+T-5m)4rAO&IYe7OWMEawDAcm(`SNV`$Ph|z{ zoXdpcsal287~@r1MTF~T=T#B~KS;HV5Pt??kVWm~{(;~wA=kbe&`bFu2TbciO6?v< zwXbh@FDU$sRPqlLgnR-4)N|Ng;>(37nGR}+3cecug3Ic1&zdn}xxo?uciC*a0lctr zMcA!BX2`?jf2|B2R@BN}#~~}cEvT$r3)1L|KhKeXy)2f zCCaIEWx&$DjADBH6L^NMoH#p!2UGHrqa&{0cB@RSS=@(SjUQ*g{-bT)r@ z!|4A5-H#SjN$nlgvIL(~5FmJ00WuO#LyRvoLtu4A%>ly|t(L}6JGkvmN*8kn$+-{= z=$M0~m3n47hlAPITBf~LYD6;2pOVr>-mtDw&ik$ZK*4AWy!7s^Nn@|BS{9#Y6 z*TVi67xVMFjyf`Xae|0z1B47+)<%llew8oa>zJ!4*P*HyA%gi?p1<`Z01L=P{Zl6F z$?)4ySJSu0!3~_Q6x$%~u=*D91j*P>!HyeHIX~mccbSbZR!TPBt29D=4I7!qqYH3N zTgEjRAkwvepsIuO(zEXSGR-q&2Xn{M;&=iLtlCneA$TF@L+$|bcU;QLjvL9gTU1VM zF<6PFU!nZcs^|&BBR%-U0M>1ZLcZ-sroOzT(ceqziM+kZeJ;W-PA=oD@nC_wkqbZ* zH`LbKonDw`VWPfSMUoW%T;Id5-&{@MZ5obemrVA^bl&X=N87*yrAJg>1VbwkMAmTv zu{o*LUm*2xP&X)$;$-sM7%))yV>76YLFS&VrHmDRyYyb`|kHNYGMM@j)vYA(VTop1-tW z&h@svL1^$cXMWm{d2=bkcaw$(B&Hg-oiMIykIM%uP2uhe$a3QG`d0yU{A-cfx>)A> zpReze`y+l+J_#L;IrzQAc?=4w%C!+`);vc2DG-@pTwe!rrRWgoDW$&nxp>-xKJtO_ zA>rK*LG>4K|8nvRa#{dPl-^DW@OHQ>Z4o2AHQlG^MXEJdo8g=ikrkX1!php%(v_A@ zNvPpGF3OqA@cRP33*YfI0{FUWY8nE)#QK|lCk){yhjyjEskNp2Lf?kJ_~_||(~$T0 z`#`4yV+_c((NXg?qp|%5`iwff{dI?z?85BNBoCRMck*6VSb=xB!HUGz>dEI6D8YmK zj($;@tj3SS6*VqsI5N9uR8UDJC2L;D-9D4?HV-E;Q02~;0&OvD50 zEGc2Oj8J~ybl+9PfCXlLxQ5JCdfyl29dFRh-4Ow{S={2Sv%UHeI1WrGwTd$OmA!VV z8y2UmgLoutd}r`7`fQUp*OZM3YI#v>38d^j^qIKb1OnOZJEc;Z+rp1;U6Awd4Diy2 zw!~x!D@ruQ?wOfc!e!iuLLA2baL>^rpv;^6o) zx7?~KR0Eb@+wKh{IRaqpmX1lu+e6nidX2lFa+_Ptp$U5Uy#trowHVkLRaD z%peoSZj5s%IF5adP~TE1Nx7v1xumZ=_E>nw6nBjzLgXJP-g2jbLz|OlMT@DYt}arY zF*KchPg07`ex4lK3WT+hp$G~7f1q82R9VP5XLBy2gn9BSZmS1T z$CR^+k$i9{G>`f5wj}$clWV4C=JAR~xHB8=l3PQqEo|JN#1+-T;eu}sG=57=6@e2P z5KgvEn(SLf)3(woU5zKnR*bX*;DcrtCzOt+vgyW$76Or< zt!WA@_@xi#1U$SDPB5H(>P$-q_g1LD*p<81c!vM@2=?e+?Ew+r9a3Jtdd}*JeYBKh8J zG{tCAcl3W#ES}Cs^d`}y>oV7WSd-=71#c4X1oF|*XE)EjN>F*l>o>**-~S=bJZQ^w z9+Eu-@c1RY1+PSq{|m@w1j~gsr1wg~#MUy8-9ZKiYAVkM%$Pdkv?uB+QQ6f@)N`lw z8*#BuqVwcO-CFx0+#J8!=|-0m^@jPXu*NpgrV%#I1+%eFZ{>{7nPw=4NhI=f$Y8^2 zX#K>1I|cbSHND;XD`0CBi{-V}G+DChL_`B%XX$xM0!rbu*F$a(M%G`e(9C4**NdJc zysz+k64+2wK3VnjjXY_1RNHD$i7rZpI-d9a!s|)bWMIg<)!T3SjEwHlS44cGD0a%3 zSg{Asi`fc0N~j~&AB0jpu15A*<@>_S$rFbxY2!Ngqv&-w@>E7f01c#MYJ@7yH$bl( zt#WI)Fl?EFJ~XyaQ$Vbb1zy-SyX%0njWlih&0v&K39{IgmsH%5O{>pbi(8ZwVGL^o zjIE*)RfOdBl;6ONuIidqINpILJ}n^VUAnPi{b1|v@73$cZ94`65sM3{bX;X;WV{SE zoQEZHe$Z7&l#Av97+2h$@P`sLfd|7cORFxtAgF1i&(%!o)U@6XKA2HD6rokA+p9dg zH6apK_U8~=R@R5w%%Frlck*lD6$d33ty|2JF~1doV2&6^^=1*L2rft=?^FUkS0ekl zbl==1-(D6hvtVHxz0S9@qjioe&@gJm8;L-P>9+FrvR^*N9De&EW6s9`L=r~qm`7P% zYa&4snFbq3d}ua`zBsl-T~Zf+zTIern9~u3bBFA7wTCL6xF-t{8DOSxLC_8 zx=+KUqVd^$S5(ltz?m$2umMbaV-Y~(`wCN$z7Y0VWa>HN#CPhuu zju9FgW^>-H2dPc`LL1GWg2>jkVwbn>KKyQqsVtGpZHPR@?YD>D%95ct*ypEg$J|7{VzXcn7_Pc&a+1=!bs)w4`dw zPUhN!S^l^DpEYT4AOy(x@J}GouA9{&C=kd18U(@w0U(f*lN+;>i@B?^i=&h4f3Fgp zTen$d!3J%;^lbDT8q`pYdDxlY{ey)g`Z*&MNH-Q)YL6&m%|E!|pPuAqEteOgKk`-R z!0%vbMNd${^pZR#*@g*Z@ChR#t=wR;?ZeuJ-8Lc2LQRWhLAnG~00MH==u{3D-=d@z zEB`*=ci4dFm%qGU0rEX?BMnQr80bHhwPt;(6HSZc?c!|v!+kn~_iOM6`OoiIN$flW z8S(w)#y|AxqsVB`d1zQo^r$?T8w4%q$VNPthmci|GIk4|EYSAC_7+ah(`R7N?HEAz zs)U~~QZgI8}MXin^*Wndk!SPcd|f@6X?m z=h{B}KOmH%8afHKbz4^fW5&(ce9l&FPBY5F323CFxsG;0os4@a_msotb0p zztjKxfmFQQ8q7gKP;uNC>aJf2ruy`G@NoGvoCp58iCx2MyujBDSclc-RSk7yl|{E7 zoP#Rxb?)CkDcF4cn`KDh12f3G$qsfv0YAKxy)cEPYtIO?g8}12Ihsy!(#5@k@ak&u zC!*Av%n$fbzNw>`AIJcWO~(}$jDVc_e?8Z}gxAo77AYwgTYoy)VEl-b?VOG|sb{suLEg=HQutKSL zCtE5vl5@Czu)RI+8wk4ioKWM}hxwEo*qeaJF^w)Z4#R@o?>G8|6ke?-6dcMn#?5NE z12q&Vj*Lh1{Ynj3o|=_Mu|mqmbT<}Jd4#z$&@@~=;G$#nGRF}lD4Ku^M4*>rmc4-; zs8Lpe+rQ>dYeIc0>NTUGoLUmc9Oj`;P%1TP4;3HySY}dma2_RfBuJfP9m_)h5Il7( z_9Y8m4+aPi-&+z#}MFO2Mf&`f|4A3c0VmVIQ@>$$64-YehzK|s~MM1mREawsC?0gIuZ(K z%crBm8c`YMrlm#D8%{IvZiQ;C+T(^l>}gs1OUBQx9?3z&`!718mCz+O;2~AhGS^XWoMIeUZ8|BdYri%0&0k!eF#K8T65oHD;Xj36zoKKyam%s# zU|)Hr)oH=AG0m|;xyd&1AR~W5$6EdIhK@Df<~-kXQrpCV{Z*K1|EQoG88czP1# zt{W<@TEZ2uu|*PURp5oDteh=ocs#hi+s|f4>HVyt4Gp^qeLeJa_jh~`&_!)SpOWr2 zVnXUf>)DeDc0q0E);;OQXsqqT4U#pa8I2}ETvTUMoMR{p@g_ClEs_{HXPL5(Jz3v>Tpr!r_TUM2AU3r^oRNUh;8AE~bO^ zX1hkE6O6B!99mK7@b>5X8peJ01k=95O^a`nRu(@8v*yIcP zi;AZx?UNusd&xo6#Jl(YxLoEo<@XN#-%IJ+-93L%Ko-Y_!!|YMeISPdRQ_-3Fxu;Y zQ69r!tD>Xp7t>BaSg#{8jNlxKv+ElTcw<8QZWjH!!F$BhGe;;#=p}BW3nz-WhtK*} zfDknd6Do`F(_H}RS>oOV2gRf-V>27!{tTj?+f-*j9+9o=AA?}Z&w1+tqaDna&Zo1) zNat``3tA&FjLk#9^tox0Ia{by5-HNO5Gi_i68?sO71U0#9ZZ3av&U@?4h~J_vn%sR znYf@N)@RuP!9NGxG>D^6u^Ai+F$Z)Buzx-kX9T{pXz*~Q%a?FevHE|OqyBb2!`hfq z#yM%0of*o#kGB+?urk-IAl}t&41CfFyIxq9nOt96y#rJ(uy^A=aXkffP^RA^hNWJ$duM#vY2fZV(SNkj&Rge z+%zd+{;$We)Qazec&w4b^q$JRg7I-{vQ_?SbI0UH>U@sNvsM?wlR>%8Nhp6N>`lGL z&Obo)wDs$p0Djf~Dg~bdJ^sW~&C)xO@tCXu;t0X<5p8ITQUaD<2c1Kg)~lD(V)mZv zlhDWI9=@nLPSl2#e$|rMyFw3dD1LeHzQ@%BXE8e@nuPON&7{5VX41YNg~rbCUs98m zRR6iPv9ZSgoz5}z;g)IQZ6bHd;YgaMIz$p|`n|*KLR$4e&Xfrt|4sKQd!Qp)M-6)n zjddDPHwCo4PXDu95H`3X(?Y)KuA&P?3+<+4xQha1Ul=dq6!mP z%kN*MNav94ZS4@WZgbD5hvr`WccIjnWYYM}y&m?&tUyjY5nCeHo3hX#_0W_zo=kaH zU@rJ?Y}m)Mo;+=L-)~N0-lp3VTyPbsTlpfhaW3ETV;{myP9Nd0U1gjO$*Tx~@?Yp> z`lfx4=(^C8vu;s1UNpZfo1q6SsPdszFEOnH?AM`ThLl4-oRQEV_Rvbt(uvSU9LVVY0?|dj9RP}7y3H<%l>A0vmkM8md3T9KcpK18@ zukW@C<4+Piud=~jEAVgsr>!%MX(9^Z_}izAT2O21MyXr5S_lXQL^Mb|KqKDqMnFRl zDx#EPA>IZxXfP;73mGJu24e6kg@RBcC`1i0s8M4i#*28PUkm|7F)_{->mK_0ZQt+x zXQyv=cHWnr#o=XPIfcf7_3S5YMhZ9Gx5BSFGD`a`F4Xp;@2;ivQked6MP}6Lu;_%x ziIuMkcfQ@wZ#(cgvVBf*Z1U#VWx@T1xWLKGt>w^GGl)rYsC_L%!^wE!smu%my>-_x zi4J-bqQ+WHI#TDEVem5bQ*8NOP&EvE?UYsap;DT~n3Ep?Dko(psp}HsSE0gwYzFBk zSso0zb{frb{Rhs)Az;RZQouD5%yv4_Y8a)2ZaY;?3XPuqRE_9b)bf>RMS`-H;^*W? zUkG$zGTA$tpcCRz3`w;#m|IPbT|cQOB%zNIRyLqErCnRBuWNMHV>TshtP?K3$i~YCXsM?v#gj*;djHUGw(>O8*3rom zThaIobR}GJ6Fu7?-bwY)?xY&=$}c%?J7=nt(CL(lel@wLIy;n5-$+%Q+unc0f9ATy zt+r|f{BlrMkzJkU&%(ocbcE(vKb<%HBc8sUUy|T)H_PI7;2HM!frn126yKsL-#s_q zK{ecippM9=xSh8#Y62t<<`*EPh86YHpX+H|+I9}9_g-?UWS58XwiaGefTjT*`_Z%` z=m>7SZXADP5D)V{5AFqnjg;lisu$ibLS+XkOVMcu1+=jN(OHh&K1rx*86!sriae+{ z48uL=;`<#jdq!YWys2{Z_))9xjX*v!R9s?LPWL^uoibKVEjZ~B^nyiK(DsRuYwiqJ z5R&X8M^}~$Xa$e1Hr=e&p=t*nn4}(+RSM`w9u+#V%h7?e0y-VTmlHZv%TaxmfF9;i zp`WW9T~i~VnfRBAgJSDtoYV3B5H;GrJq0!LXEV=Epu8Ew{$enr62(hP!%)>`GG=C{ zM`WAQvy4%Oyxc5!*F-~U$Qw{K(;!aNV~St_C>jBJ=xU}RTIump1dz>#7&zmi0oqfs zJumlS;wZtSAkA|#ix^0CQJq#^PYOC8jxfOSDnczo2!4D`G)@;Y@Z3fHhhAO4T&3)k m-V%mnnls$>XheqDV&o6HE0!p*?;@rj{B6R=r=S#*1o;g`vr_y3 diff --git a/doc/cheatsheet/README.txt b/doc/cheatsheet/README.txt index e2f6ec042e9cc..d32fe5bcd05a6 100644 --- a/doc/cheatsheet/README.txt +++ b/doc/cheatsheet/README.txt @@ -2,3 +2,7 @@ The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013. To create the PDF version, within Powerpoint, simply do a "Save As" and pick "PDF' as the format. +This cheat sheet was inspired by the RstudioData Wrangling Cheatsheet[1], written by Irv Lustig, Princeton Consultants[2]. + +[1]: https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf +[2]: http://www.princetonoptimization.com/ diff --git a/doc/make.py b/doc/make.py index d46be2611ce3d..4967f30453fd1 100755 --- a/doc/make.py +++ b/doc/make.py @@ -1,476 +1,359 @@ #!/usr/bin/env python - """ Python script for building documentation. To build the docs you must have all optional dependencies for pandas installed. See the installation instructions for a list of these. -Note: currently latex builds do not work because of table formats that are not -supported in the latex generation. - -2014-01-30: Latex has some issues but 'latex_forced' works ok for 0.13.0-400 or so - Usage ----- -python make.py clean -python make.py html + $ python make.py clean + $ python make.py html + $ python make.py latex """ -from __future__ import print_function - -import io -import glob # noqa +import importlib +import sys import os import shutil -import sys -from contextlib import contextmanager - -import sphinx # noqa +# import subprocess import argparse -import jinja2 # noqa - -os.environ['PYTHONPATH'] = '..' - -SPHINX_BUILD = 'sphinxbuild' - - -def upload_dev(user='pandas'): - 'push a copy to the pydata dev directory' - if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'.format(user)): - raise SystemExit('Upload to Pydata Dev failed') - - -def upload_dev_pdf(user='pandas'): - 'push a copy to the pydata dev directory' - if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/dev/'.format(user)): - raise SystemExit('PDF upload to Pydata Dev failed') - - -def upload_stable(user='pandas'): - 'push a copy to the pydata stable directory' - if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'.format(user)): - raise SystemExit('Upload to stable failed') - - -def upload_stable_pdf(user='pandas'): - 'push a copy to the pydata dev directory' - if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/stable/'.format(user)): - raise SystemExit('PDF upload to stable failed') - - -def upload_prev(ver, doc_root='./', user='pandas'): - 'push a copy of older release to appropriate version directory' - local_dir = doc_root + 'build/html' - remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver - cmd = 'cd %s; rsync -avz . %s@pandas.pydata.org:%s -essh' - cmd = cmd % (local_dir, user, remote_dir) - print(cmd) - if os.system(cmd): - raise SystemExit( - 'Upload to %s from %s failed' % (remote_dir, local_dir)) - - local_dir = doc_root + 'build/latex' - pdf_cmd = 'cd %s; scp pandas.pdf %s@pandas.pydata.org:%s' - pdf_cmd = pdf_cmd % (local_dir, user, remote_dir) - if os.system(pdf_cmd): - raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root)) +from contextlib import contextmanager +import webbrowser +import jinja2 -def build_pandas(): - os.chdir('..') - os.system('python setup.py clean') - os.system('python setup.py build_ext --inplace') - os.chdir('doc') -def build_prev(ver): - if os.system('git checkout v%s' % ver) != 1: - os.chdir('..') - os.system('python setup.py clean') - os.system('python setup.py build_ext --inplace') - os.chdir('doc') - os.system('python make.py clean') - os.system('python make.py html') - os.system('python make.py latex') - os.system('git checkout master') +DOC_PATH = os.path.dirname(os.path.abspath(__file__)) +SOURCE_PATH = os.path.join(DOC_PATH, 'source') +BUILD_PATH = os.path.join(DOC_PATH, 'build') +BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates'] -def clean(): - if os.path.exists('build'): - shutil.rmtree('build') +@contextmanager +def _maybe_exclude_notebooks(): + """Skip building the notebooks if pandoc is not installed. - if os.path.exists('source/generated'): - shutil.rmtree('source/generated') + This assumes that nbsphinx is installed. + Skip notebook conversion if: + 1. nbconvert isn't installed, or + 2. nbconvert is installed, but pandoc isn't + """ + # TODO move to exclude_pattern + base = os.path.dirname(__file__) + notebooks = [os.path.join(base, 'source', nb) + for nb in ['style.ipynb']] + contents = {} + + def _remove_notebooks(): + for nb in notebooks: + with open(nb, 'rt') as f: + contents[nb] = f.read() + os.remove(nb) -@contextmanager -def cleanup_nb(nb): try: - yield - finally: + import nbconvert + except ImportError: + sys.stderr.write('Warning: nbconvert not installed. ' + 'Skipping notebooks.\n') + _remove_notebooks() + else: try: - os.remove(nb + '.executed') - except OSError: - pass - - -def get_kernel(): - """Find the kernel name for your python version""" - return 'python%s' % sys.version_info.major + nbconvert.utils.pandoc.get_pandoc_version() + except nbconvert.utils.pandoc.PandocMissing: + sys.stderr.write('Warning: Pandoc is not installed. ' + 'Skipping notebooks.\n') + _remove_notebooks() + yield -def execute_nb(src, dst, allow_errors=False, timeout=1000, kernel_name=''): - """ - Execute notebook in `src` and write the output to `dst` - - Parameters - ---------- - src, dst: str - path to notebook - allow_errors: bool - timeout: int - kernel_name: str - defualts to value set in notebook metadata - - Returns - ------- - dst: str - """ - import nbformat - from nbconvert.preprocessors import ExecutePreprocessor + for nb, content in contents.items(): + with open(nb, 'wt') as f: + f.write(content) - with io.open(src, encoding='utf-8') as f: - nb = nbformat.read(f, as_version=4) - ep = ExecutePreprocessor(allow_errors=allow_errors, - timeout=timeout, - kernel_name=kernel_name) - ep.preprocess(nb, resources={}) +class DocBuilder: + """Class to wrap the different commands of this script. - with io.open(dst, 'wt', encoding='utf-8') as f: - nbformat.write(nb, f) - return dst - - -def convert_nb(src, dst, to='html', template_file='basic'): + All public methods of this class can be called as parameters of the + script. """ - Convert a notebook `src`. - - Parameters - ---------- - src, dst: str - filepaths - to: {'rst', 'html'} - format to export to - template_file: str - name of template file to use. Default 'basic' - """ - from nbconvert import HTMLExporter, RSTExporter - - dispatch = {'rst': RSTExporter, 'html': HTMLExporter} - exporter = dispatch[to.lower()](template_file=template_file) - - (body, resources) = exporter.from_filename(src) - with io.open(dst, 'wt', encoding='utf-8') as f: - f.write(body) - return dst - - -def html(): - check_build() - - notebooks = [ - 'source/html-styling.ipynb', - ] - - for nb in notebooks: - with cleanup_nb(nb): + def __init__(self, num_jobs=1, include_api=True, single_doc=None, + verbosity=0): + self.num_jobs = num_jobs + self.include_api = include_api + self.verbosity = verbosity + self.single_doc = None + self.single_doc_type = None + if single_doc is not None: + self._process_single_doc(single_doc) + self.exclude_patterns = self._exclude_patterns + + self._generate_index() + if self.single_doc_type == 'docstring': + self._run_os('sphinx-autogen', '-o', + 'source/generated_single', 'source/index.rst') + + @property + def _exclude_patterns(self): + """Docs source files that will be excluded from building.""" + # TODO move maybe_exclude_notebooks here + if self.single_doc is not None: + rst_files = [f for f in os.listdir(SOURCE_PATH) + if ((f.endswith('.rst') or f.endswith('.ipynb')) + and (f != 'index.rst') + and (f != '{0}.rst'.format(self.single_doc)))] + if self.single_doc_type != 'api': + rst_files += ['generated/*.rst'] + elif not self.include_api: + rst_files = ['api.rst', 'generated/*.rst'] + else: + rst_files = ['generated_single/*.rst'] + + exclude_patterns = ','.join( + '{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files) + + return exclude_patterns + + def _process_single_doc(self, single_doc): + """Extract self.single_doc (base name) and self.single_doc_type from + passed single_doc kwarg. + + """ + self.include_api = False + + if single_doc == 'api.rst' or single_doc == 'api': + self.single_doc_type = 'api' + self.single_doc = 'api' + elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)): + self.single_doc_type = 'rst' + self.single_doc = os.path.splitext(os.path.basename(single_doc))[0] + elif os.path.exists( + os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))): + self.single_doc_type = 'rst' + self.single_doc = single_doc + elif single_doc is not None: try: - print("Converting %s" % nb) - kernel_name = get_kernel() - executed = execute_nb(nb, nb + '.executed', allow_errors=True, - kernel_name=kernel_name) - convert_nb(executed, nb.rstrip('.ipynb') + '.html') - except (ImportError, IndexError) as e: - print(e) - print("Failed to convert %s" % nb) - - if os.system('sphinx-build -P -b html -d build/doctrees ' - 'source build/html'): - raise SystemExit("Building HTML failed.") - try: - # remove stale file - os.system('rm source/html-styling.html') - os.system('cd build; rm -f html/pandas.zip;') - except: - pass - - -def zip_html(): - try: - print("\nZipping up HTML docs...") - # just in case the wonky build box doesn't have zip - # don't fail this. - os.system('cd build; rm -f html/pandas.zip; zip html/pandas.zip -r -q html/* ') - print("\n") - except: - pass - -def latex(): - check_build() - if sys.platform != 'win32': - # LaTeX format. - if os.system('sphinx-build -b latex -d build/doctrees ' - 'source build/latex'): - raise SystemExit("Building LaTeX failed.") - # Produce pdf. - - os.chdir('build/latex') - - # Call the makefile produced by sphinx... - if os.system('make'): - print("Rendering LaTeX failed.") - print("You may still be able to get a usable PDF file by going into 'build/latex'") - print("and executing 'pdflatex pandas.tex' for the requisite number of passes.") - print("Or using the 'latex_forced' target") - raise SystemExit - - os.chdir('../..') - else: - print('latex build has not been tested on windows') - -def latex_forced(): - check_build() - if sys.platform != 'win32': - # LaTeX format. - if os.system('sphinx-build -b latex -d build/doctrees ' - 'source build/latex'): - raise SystemExit("Building LaTeX failed.") - # Produce pdf. - - os.chdir('build/latex') - - # Manually call pdflatex, 3 passes should ensure latex fixes up - # all the required cross-references and such. - os.system('pdflatex -interaction=nonstopmode pandas.tex') - os.system('pdflatex -interaction=nonstopmode pandas.tex') - os.system('pdflatex -interaction=nonstopmode pandas.tex') - raise SystemExit("You should check the file 'build/latex/pandas.pdf' for problems.") - - os.chdir('../..') - else: - print('latex build has not been tested on windows') - + obj = pandas # noqa: F821 + for name in single_doc.split('.'): + obj = getattr(obj, name) + except AttributeError: + raise ValueError('Single document not understood, it should ' + 'be a file in doc/source/*.rst (e.g. ' + '"contributing.rst" or a pandas function or ' + 'method (e.g. "pandas.DataFrame.head")') + else: + self.single_doc_type = 'docstring' + if single_doc.startswith('pandas.'): + self.single_doc = single_doc[len('pandas.'):] + else: + self.single_doc = single_doc + + def _copy_generated_docstring(self): + """Copy existing generated (from api.rst) docstring page because + this is more correct in certain cases (where a custom autodoc + template is used). + + """ + fname = os.path.join(SOURCE_PATH, 'generated', + 'pandas.{}.rst'.format(self.single_doc)) + temp_dir = os.path.join(SOURCE_PATH, 'generated_single') -def check_build(): - build_dirs = [ - 'build', 'build/doctrees', 'build/html', - 'build/latex', 'build/plots', 'build/_static', - 'build/_templates'] - for d in build_dirs: try: - os.mkdir(d) + os.makedirs(temp_dir) except OSError: pass + if os.path.exists(fname): + try: + # copying to make sure sphinx always thinks it is new + # and needs to be re-generated (to pick source code changes) + shutil.copy(fname, temp_dir) + except: # noqa + pass + + def _generate_index(self): + """Create index.rst file with the specified sections.""" + if self.single_doc_type == 'docstring': + self._copy_generated_docstring() + + with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f: + t = jinja2.Template(f.read()) + + with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f: + f.write(t.render(include_api=self.include_api, + single_doc=self.single_doc, + single_doc_type=self.single_doc_type)) + + @staticmethod + def _create_build_structure(): + """Create directories required to build documentation.""" + for dirname in BUILD_DIRS: + try: + os.makedirs(os.path.join(BUILD_PATH, dirname)) + except OSError: + pass + + @staticmethod + def _run_os(*args): + """Execute a command as a OS terminal. + + Parameters + ---------- + *args : list of str + Command and parameters to be executed + + Examples + -------- + >>> DocBuilder()._run_os('python', '--version') + """ + # TODO check_call should be more safe, but it fails with + # exclude patterns, needs investigation + # subprocess.check_call(args, stderr=subprocess.STDOUT) + os.system(' '.join(args)) + + def _sphinx_build(self, kind): + """Call sphinx to build documentation. + + Attribute `num_jobs` from the class is used. + + Parameters + ---------- + kind : {'html', 'latex'} + + Examples + -------- + >>> DocBuilder(num_jobs=4)._sphinx_build('html') + """ + if kind not in ('html', 'latex'): + raise ValueError('kind must be html or latex, not {}'.format(kind)) + + self._run_os('sphinx-build', + '-j{}'.format(self.num_jobs), + '-b{}'.format(kind), + '-{}'.format( + 'v' * self.verbosity) if self.verbosity else '', + '-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')), + '-Dexclude_patterns={}'.format(self.exclude_patterns), + SOURCE_PATH, + os.path.join(BUILD_PATH, kind)) + + def _open_browser(self): + base_url = os.path.join('file://', DOC_PATH, 'build', 'html') + if self.single_doc_type == 'docstring': + url = os.path.join( + base_url, + 'generated_single', 'pandas.{}.html'.format(self.single_doc)) + else: + url = os.path.join(base_url, '{}.html'.format(self.single_doc)) + webbrowser.open(url, new=2) + + def html(self): + """Build HTML documentation.""" + self._create_build_structure() + with _maybe_exclude_notebooks(): + self._sphinx_build('html') + zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip') + if os.path.exists(zip_fname): + os.remove(zip_fname) + + if self.single_doc is not None: + self._open_browser() + shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'), + ignore_errors=True) + + def latex(self, force=False): + """Build PDF documentation.""" + self._create_build_structure() + if sys.platform == 'win32': + sys.stderr.write('latex build has not been tested on windows\n') + else: + self._sphinx_build('latex') + os.chdir(os.path.join(BUILD_PATH, 'latex')) + if force: + for i in range(3): + self._run_os('pdflatex', + '-interaction=nonstopmode', + 'pandas.tex') + raise SystemExit('You should check the file ' + '"build/latex/pandas.pdf" for problems.') + else: + self._run_os('make') + + def latex_forced(self): + """Build PDF documentation with retries to find missing references.""" + self.latex(force=True) + + @staticmethod + def clean(): + """Clean documentation generated files.""" + shutil.rmtree(BUILD_PATH, ignore_errors=True) + shutil.rmtree(os.path.join(SOURCE_PATH, 'generated'), + ignore_errors=True) + + def zip_html(self): + """Compress HTML documentation into a zip file.""" + zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip') + if os.path.exists(zip_fname): + os.remove(zip_fname) + dirname = os.path.join(BUILD_PATH, 'html') + fnames = os.listdir(dirname) + os.chdir(dirname) + self._run_os('zip', + zip_fname, + '-r', + '-q', + *fnames) -def all(): - # clean() - html() - - -def auto_dev_build(debug=False): - msg = '' - try: - step = 'clean' - clean() - step = 'html' - html() - step = 'upload dev' - upload_dev() - if not debug: - sendmail(step) - - step = 'latex' - latex() - step = 'upload pdf' - upload_dev_pdf() - if not debug: - sendmail(step) - except (Exception, SystemExit) as inst: - msg = str(inst) + '\n' - sendmail(step, '[ERROR] ' + msg) - - -def sendmail(step=None, err_msg=None): - from_name, to_name = _get_config() - - if step is None: - step = '' - - if err_msg is None or '[ERROR]' not in err_msg: - msgstr = 'Daily docs %s completed successfully' % step - subject = "DOC: %s successful" % step - else: - msgstr = err_msg - subject = "DOC: %s failed" % step - - import smtplib - from email.MIMEText import MIMEText - msg = MIMEText(msgstr) - msg['Subject'] = subject - msg['From'] = from_name - msg['To'] = to_name - - server_str, port, login, pwd = _get_credentials() - server = smtplib.SMTP(server_str, port) - server.ehlo() - server.starttls() - server.ehlo() - - server.login(login, pwd) - try: - server.sendmail(from_name, to_name, msg.as_string()) - finally: - server.close() - - -def _get_dir(subdir=None): - import getpass - USERNAME = getpass.getuser() - if sys.platform == 'darwin': - HOME = '/Users/%s' % USERNAME - else: - HOME = '/home/%s' % USERNAME - - if subdir is None: - subdir = '/code/scripts/config' - conf_dir = '%s/%s' % (HOME, subdir) - return conf_dir - - -def _get_credentials(): - tmp_dir = _get_dir() - cred = '%s/credentials' % tmp_dir - with open(cred, 'r') as fh: - server, port, un, domain = fh.read().split(',') - port = int(port) - login = un + '@' + domain + '.com' - - import base64 - with open('%s/cron_email_pwd' % tmp_dir, 'r') as fh: - pwd = base64.b64decode(fh.read()) - - return server, port, login, pwd - - -def _get_config(): - tmp_dir = _get_dir() - with open('%s/addresses' % tmp_dir, 'r') as fh: - from_name, to_name = fh.read().split(',') - return from_name, to_name - -funcd = { - 'html': html, - 'zip_html': zip_html, - 'upload_dev': upload_dev, - 'upload_stable': upload_stable, - 'upload_dev_pdf': upload_dev_pdf, - 'upload_stable_pdf': upload_stable_pdf, - 'latex': latex, - 'latex_forced': latex_forced, - 'clean': clean, - 'auto_dev': auto_dev_build, - 'auto_debug': lambda: auto_dev_build(True), - 'build_pandas': build_pandas, - 'all': all, -} - -small_docs = False - -# current_dir = os.getcwd() -# os.chdir(os.path.dirname(os.path.join(current_dir, __file__))) - -import argparse -argparser = argparse.ArgumentParser(description=""" -pandas documentation builder -""".strip()) - -# argparser.add_argument('-arg_name', '--arg_name', -# metavar='label for arg help', -# type=str|etc, -# nargs='N|*|?|+|argparse.REMAINDER', -# required=False, -# #choices='abc', -# help='help string', -# action='store|store_true') - -# args = argparser.parse_args() - -#print args.accumulate(args.integers) - -def generate_index(api=True, single=False, **kwds): - from jinja2 import Template - with open("source/index.rst.template") as f: - t = Template(f.read()) - - with open("source/index.rst","w") as f: - f.write(t.render(api=api,single=single,**kwds)) - -import argparse -argparser = argparse.ArgumentParser(description="pandas documentation builder", - epilog="Targets : %s" % funcd.keys()) - -argparser.add_argument('--no-api', - default=False, - help='Ommit api and autosummary', - action='store_true') -argparser.add_argument('--single', - metavar='FILENAME', - type=str, - default=False, - help='filename of section to compile, e.g. "indexing"') -argparser.add_argument('--user', - type=str, - default=False, - help='Username to connect to the pydata server') def main(): - args, unknown = argparser.parse_known_args() - sys.argv = [sys.argv[0]] + unknown - if args.single: - args.single = os.path.basename(args.single).split(".rst")[0] - - if 'clean' in unknown: - args.single=False - - generate_index(api=not args.no_api and not args.single, single=args.single) - - if len(sys.argv) > 2: - ftype = sys.argv[1] - ver = sys.argv[2] - - if ftype == 'build_previous': - build_prev(ver, user=args.user) - if ftype == 'upload_previous': - upload_prev(ver, user=args.user) - elif len(sys.argv) == 2: - for arg in sys.argv[1:]: - func = funcd.get(arg) - if func is None: - raise SystemExit('Do not know how to handle %s; valid args are %s' % ( - arg, list(funcd.keys()))) - if args.user: - func(user=args.user) - else: - func() - else: - small_docs = False - all() -# os.chdir(current_dir) + cmds = [method for method in dir(DocBuilder) if not method.startswith('_')] + + argparser = argparse.ArgumentParser( + description='pandas documentation builder', + epilog='Commands: {}'.format(','.join(cmds))) + argparser.add_argument('command', + nargs='?', + default='html', + help='command to run: {}'.format(', '.join(cmds))) + argparser.add_argument('--num-jobs', + type=int, + default=1, + help='number of jobs used by sphinx-build') + argparser.add_argument('--no-api', + default=False, + help='ommit api and autosummary', + action='store_true') + argparser.add_argument('--single', + metavar='FILENAME', + type=str, + default=None, + help=('filename of section or method name to ' + 'compile, e.g. "indexing", "DataFrame.join"')) + argparser.add_argument('--python-path', + type=str, + default=os.path.dirname(DOC_PATH), + help='path') + argparser.add_argument('-v', action='count', dest='verbosity', default=0, + help=('increase verbosity (can be repeated), ' + 'passed to the sphinx build command')) + args = argparser.parse_args() + + if args.command not in cmds: + raise ValueError('Unknown command {}. Available options: {}'.format( + args.command, ', '.join(cmds))) + + # Below we update both os.environ and sys.path. The former is used by + # external libraries (namely Sphinx) to compile this module and resolve + # the import of `python_path` correctly. The latter is used to resolve + # the import within the module, injecting it into the global namespace + os.environ['PYTHONPATH'] = args.python_path + sys.path.append(args.python_path) + globals()['pandas'] = importlib.import_module('pandas') + + builder = DocBuilder(args.num_jobs, not args.no_api, args.single, + args.verbosity) + getattr(builder, args.command)() + if __name__ == '__main__': - import sys sys.exit(main()) diff --git a/doc/plots/stats/moment_plots.py b/doc/plots/stats/moment_plots.py deleted file mode 100644 index 9e3a902592c6b..0000000000000 --- a/doc/plots/stats/moment_plots.py +++ /dev/null @@ -1,30 +0,0 @@ -import numpy as np - -import matplotlib.pyplot as plt -import pandas.util.testing as t -import pandas.stats.moments as m - - -def test_series(n=1000): - t.N = n - s = t.makeTimeSeries() - return s - - -def plot_timeseries(*args, **kwds): - n = len(args) - - fig, axes = plt.subplots(n, 1, figsize=kwds.get('size', (10, 5)), - sharex=True) - titles = kwds.get('titles', None) - - for k in range(1, n + 1): - ax = axes[k - 1] - ts = args[k - 1] - ax.plot(ts.index, ts.values) - - if titles: - ax.set_title(titles[k - 1]) - - fig.autofmt_xdate() - fig.subplots_adjust(bottom=0.10, top=0.95) diff --git a/doc/plots/stats/moments_ewma.py b/doc/plots/stats/moments_ewma.py deleted file mode 100644 index 3e521ed60bb8f..0000000000000 --- a/doc/plots/stats/moments_ewma.py +++ /dev/null @@ -1,15 +0,0 @@ -import matplotlib.pyplot as plt -import pandas.util.testing as t -import pandas.stats.moments as m - -t.N = 200 -s = t.makeTimeSeries().cumsum() - -plt.figure(figsize=(10, 5)) -plt.plot(s.index, s.values) -plt.plot(s.index, m.ewma(s, 20, min_periods=1).values) -f = plt.gcf() -f.autofmt_xdate() - -plt.show() -plt.close('all') diff --git a/doc/plots/stats/moments_ewmvol.py b/doc/plots/stats/moments_ewmvol.py deleted file mode 100644 index 093f62868fc4e..0000000000000 --- a/doc/plots/stats/moments_ewmvol.py +++ /dev/null @@ -1,23 +0,0 @@ -import matplotlib.pyplot as plt -import pandas.util.testing as t -import pandas.stats.moments as m - -t.N = 500 -ts = t.makeTimeSeries() -ts[::100] = 20 - -s = ts.cumsum() - - -plt.figure(figsize=(10, 5)) -plt.plot(s.index, m.ewmvol(s, span=50, min_periods=1).values, color='b') -plt.plot(s.index, m.rolling_std(s, 50, min_periods=1).values, color='r') - -plt.title('Exp-weighted std with shocks') -plt.legend(('Exp-weighted', 'Equal-weighted')) - -f = plt.gcf() -f.autofmt_xdate() - -plt.show() -plt.close('all') diff --git a/doc/plots/stats/moments_expw.py b/doc/plots/stats/moments_expw.py deleted file mode 100644 index 5fff419b3a940..0000000000000 --- a/doc/plots/stats/moments_expw.py +++ /dev/null @@ -1,35 +0,0 @@ -from moment_plots import * - -np.random.seed(1) - -ts = test_series(500) * 10 - -# ts[::100] = 20 - -s = ts.cumsum() - -fig, axes = plt.subplots(3, 1, figsize=(8, 10), sharex=True) - -ax0, ax1, ax2 = axes - -ax0.plot(s.index, s.values) -ax0.set_title('time series') - -ax1.plot(s.index, m.ewma(s, span=50, min_periods=1).values, color='b') -ax1.plot(s.index, m.rolling_mean(s, 50, min_periods=1).values, color='r') -ax1.set_title('rolling_mean vs. ewma') - -line1 = ax2.plot( - s.index, m.ewmstd(s, span=50, min_periods=1).values, color='b') -line2 = ax2.plot( - s.index, m.rolling_std(s, 50, min_periods=1).values, color='r') -ax2.set_title('rolling_std vs. ewmstd') - -fig.legend((line1, line2), - ('Exp-weighted', 'Equal-weighted'), - loc='upper right') -fig.autofmt_xdate() -fig.subplots_adjust(bottom=0.10, top=0.95) - -plt.show() -plt.close('all') diff --git a/doc/plots/stats/moments_rolling.py b/doc/plots/stats/moments_rolling.py deleted file mode 100644 index 30a6c5f53e20c..0000000000000 --- a/doc/plots/stats/moments_rolling.py +++ /dev/null @@ -1,24 +0,0 @@ -from moment_plots import * - -ts = test_series() -s = ts.cumsum() - -s[20:50] = np.NaN -s[120:150] = np.NaN -plot_timeseries(s, - m.rolling_count(s, 50), - m.rolling_sum(s, 50, min_periods=10), - m.rolling_mean(s, 50, min_periods=10), - m.rolling_std(s, 50, min_periods=10), - m.rolling_skew(s, 50, min_periods=10), - m.rolling_kurt(s, 50, min_periods=10), - size=(10, 12), - titles=('time series', - 'rolling_count', - 'rolling_sum', - 'rolling_mean', - 'rolling_std', - 'rolling_skew', - 'rolling_kurt')) -plt.show() -plt.close('all') diff --git a/doc/plots/stats/moments_rolling_binary.py b/doc/plots/stats/moments_rolling_binary.py deleted file mode 100644 index ab6b7b1c8ff49..0000000000000 --- a/doc/plots/stats/moments_rolling_binary.py +++ /dev/null @@ -1,30 +0,0 @@ -from moment_plots import * - -np.random.seed(1) - -ts = test_series() -s = ts.cumsum() -ts2 = test_series() -s2 = ts2.cumsum() - -s[20:50] = np.NaN -s[120:150] = np.NaN -fig, axes = plt.subplots(3, 1, figsize=(8, 10), sharex=True) - -ax0, ax1, ax2 = axes - -ax0.plot(s.index, s.values) -ax0.plot(s2.index, s2.values) -ax0.set_title('time series') - -ax1.plot(s.index, m.rolling_corr(s, s2, 50, min_periods=1).values) -ax1.set_title('rolling_corr') - -ax2.plot(s.index, m.rolling_cov(s, s2, 50, min_periods=1).values) -ax2.set_title('rolling_cov') - -fig.autofmt_xdate() -fig.subplots_adjust(bottom=0.10, top=0.95) - -plt.show() -plt.close('all') diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 0612e86134cf2..fbbe94a72c71e 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -11,7 +11,7 @@ np.random.seed(123456) np.set_printoptions(precision=4, suppress=True) import matplotlib - matplotlib.style.use('ggplot') + # matplotlib.style.use('default') pd.options.display.max_rows = 15 #### portions of this were borrowed from the @@ -25,7 +25,7 @@ ******************** This is a short introduction to pandas, geared mainly for new users. -You can see more complex recipes in the :ref:`Cookbook` +You can see more complex recipes in the :ref:`Cookbook`. Customarily, we import as follows: @@ -38,7 +38,7 @@ Customarily, we import as follows: Object Creation --------------- -See the :ref:`Data Structure Intro section ` +See the :ref:`Data Structure Intro section `. Creating a :class:`Series` by passing a list of values, letting pandas create a default integer index: @@ -48,7 +48,7 @@ a default integer index: s = pd.Series([1,3,5,np.nan,6,8]) s -Creating a :class:`DataFrame` by passing a numpy array, with a datetime index +Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index and labeled columns: .. ipython:: python @@ -70,7 +70,8 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s 'F' : 'foo' }) df2 -Having specific :ref:`dtypes ` +The columns of the resulting ``DataFrame`` have different +:ref:`dtypes `. .. ipython:: python @@ -84,29 +85,18 @@ will be completed: @verbatim In [1]: df2. - df2.A df2.boxplot - df2.abs df2.C - df2.add df2.clip - df2.add_prefix df2.clip_lower - df2.add_suffix df2.clip_upper - df2.align df2.columns - df2.all df2.combine - df2.any df2.combineAdd + df2.A df2.bool + df2.abs df2.boxplot + df2.add df2.C + df2.add_prefix df2.clip + df2.add_suffix df2.clip_lower + df2.align df2.clip_upper + df2.all df2.columns + df2.any df2.combine df2.append df2.combine_first - df2.apply df2.combineMult - df2.applymap df2.compound - df2.as_blocks df2.consolidate - df2.asfreq df2.convert_objects - df2.as_matrix df2.copy - df2.astype df2.corr - df2.at df2.corrwith - df2.at_time df2.count - df2.axes df2.cov - df2.B df2.cummax - df2.between_time df2.cummin - df2.bfill df2.cumprod - df2.blocks df2.cumsum - df2.bool df2.D + df2.apply df2.compound + df2.applymap df2.consolidate + df2.D As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically tab completed. ``E`` is there as well; the rest of the attributes have been @@ -115,16 +105,16 @@ truncated for brevity. Viewing Data ------------ -See the :ref:`Basics section ` +See the :ref:`Basics section `. -See the top & bottom rows of the frame +Here is how to view the top and bottom rows of the frame: .. ipython:: python df.head() df.tail(3) -Display the index, columns, and the underlying numpy data +Display the index, columns, and the underlying NumPy data: .. ipython:: python @@ -132,25 +122,25 @@ Display the index, columns, and the underlying numpy data df.columns df.values -Describe shows a quick statistic summary of your data +:func:`~DataFrame.describe` shows a quick statistic summary of your data: .. ipython:: python df.describe() -Transposing your data +Transposing your data: .. ipython:: python df.T -Sorting by an axis +Sorting by an axis: .. ipython:: python df.sort_index(axis=1, ascending=False) -Sorting by values +Sorting by values: .. ipython:: python @@ -164,15 +154,15 @@ Selection While standard Python / Numpy expressions for selecting and setting are intuitive and come in handy for interactive work, for production code, we recommend the optimized pandas data access methods, ``.at``, ``.iat``, - ``.loc``, ``.iloc`` and ``.ix``. + ``.loc`` and ``.iloc``. -See the indexing documentation :ref:`Indexing and Selecting Data ` and :ref:`MultiIndex / Advanced Indexing ` +See the indexing documentation :ref:`Indexing and Selecting Data ` and :ref:`MultiIndex / Advanced Indexing `. Getting ~~~~~~~ Selecting a single column, which yields a ``Series``, -equivalent to ``df.A`` +equivalent to ``df.A``: .. ipython:: python @@ -188,39 +178,39 @@ Selecting via ``[]``, which slices the rows. Selection by Label ~~~~~~~~~~~~~~~~~~ -See more in :ref:`Selection by Label ` +See more in :ref:`Selection by Label `. -For getting a cross section using a label +For getting a cross section using a label: .. ipython:: python df.loc[dates[0]] -Selecting on a multi-axis by label +Selecting on a multi-axis by label: .. ipython:: python df.loc[:,['A','B']] -Showing label slicing, both endpoints are *included* +Showing label slicing, both endpoints are *included*: .. ipython:: python df.loc['20130102':'20130104',['A','B']] -Reduction in the dimensions of the returned object +Reduction in the dimensions of the returned object: .. ipython:: python df.loc['20130102',['A','B']] -For getting a scalar value +For getting a scalar value: .. ipython:: python df.loc[dates[0],'A'] -For getting fast access to a scalar (equiv to the prior method) +For getting fast access to a scalar (equivalent to the prior method): .. ipython:: python @@ -229,45 +219,45 @@ For getting fast access to a scalar (equiv to the prior method) Selection by Position ~~~~~~~~~~~~~~~~~~~~~ -See more in :ref:`Selection by Position ` +See more in :ref:`Selection by Position `. -Select via the position of the passed integers +Select via the position of the passed integers: .. ipython:: python df.iloc[3] -By integer slices, acting similar to numpy/python +By integer slices, acting similar to numpy/python: .. ipython:: python df.iloc[3:5,0:2] -By lists of integer position locations, similar to the numpy/python style +By lists of integer position locations, similar to the numpy/python style: .. ipython:: python df.iloc[[1,2,4],[0,2]] -For slicing rows explicitly +For slicing rows explicitly: .. ipython:: python df.iloc[1:3,:] -For slicing columns explicitly +For slicing columns explicitly: .. ipython:: python df.iloc[:,1:3] -For getting a value explicitly +For getting a value explicitly: .. ipython:: python df.iloc[1,1] -For getting fast access to a scalar (equiv to the prior method) +For getting fast access to a scalar (equivalent to the prior method): .. ipython:: python @@ -301,7 +291,7 @@ Setting ~~~~~~~ Setting a new column automatically aligns the data -by the indexes +by the indexes. .. ipython:: python @@ -309,25 +299,25 @@ by the indexes s1 df['F'] = s1 -Setting values by label +Setting values by label: .. ipython:: python df.at[dates[0],'A'] = 0 -Setting values by position +Setting values by position: .. ipython:: python df.iat[0,1] = 0 -Setting by assigning with a numpy array +Setting by assigning with a NumPy array: .. ipython:: python df.loc[:,'D'] = np.array([5] * len(df)) -The result of the prior setting operations +The result of the prior setting operations. .. ipython:: python @@ -347,7 +337,7 @@ Missing Data pandas primarily uses the value ``np.nan`` to represent missing data. It is by default not included in computations. See the :ref:`Missing Data section -` +`. Reindexing allows you to change/add/delete the index on a specified axis. This returns a copy of the data. @@ -364,36 +354,36 @@ To drop any rows that have missing data. df1.dropna(how='any') -Filling missing data +Filling missing data. .. ipython:: python df1.fillna(value=5) -To get the boolean mask where values are ``nan`` +To get the boolean mask where values are ``nan``. .. ipython:: python - pd.isnull(df1) + pd.isna(df1) Operations ---------- -See the :ref:`Basic section on Binary Ops ` +See the :ref:`Basic section on Binary Ops `. Stats ~~~~~ Operations in general *exclude* missing data. -Performing a descriptive statistic +Performing a descriptive statistic: .. ipython:: python df.mean() -Same operation on the other axis +Same operation on the other axis: .. ipython:: python @@ -412,7 +402,7 @@ In addition, pandas automatically broadcasts along the specified dimension. Apply ~~~~~ -Applying functions to the data +Applying functions to the data: .. ipython:: python @@ -422,7 +412,7 @@ Applying functions to the data Histogramming ~~~~~~~~~~~~~ -See more at :ref:`Histogramming and Discretization ` +See more at :ref:`Histogramming and Discretization `. .. ipython:: python @@ -436,7 +426,7 @@ String Methods Series is equipped with a set of string processing methods in the `str` attribute that make it easy to operate on each element of the array, as in the code snippet below. Note that pattern-matching in `str` generally uses `regular -expressions `__ by default (and in +expressions `__ by default (and in some cases always uses them). See more at :ref:`Vectorized String Methods `. @@ -456,7 +446,7 @@ DataFrame, and Panel objects with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations. -See the :ref:`Merging section ` +See the :ref:`Merging section `. Concatenating pandas objects together with :func:`concat`: @@ -473,7 +463,7 @@ Concatenating pandas objects together with :func:`concat`: Join ~~~~ -SQL style merges. See the :ref:`Database style joining ` +SQL style merges. See the :ref:`Database style joining ` section. .. ipython:: python @@ -497,7 +487,8 @@ Another example that can be given is: Append ~~~~~~ -Append rows to a dataframe. See the :ref:`Appending ` +Append rows to a dataframe. See the :ref:`Appending ` +section. .. ipython:: python @@ -511,13 +502,13 @@ Grouping -------- By "group by" we are referring to a process involving one or more of the -following steps +following steps: - **Splitting** the data into groups based on some criteria - **Applying** a function to each group independently - **Combining** the results into a data structure -See the :ref:`Grouping section ` +See the :ref:`Grouping section `. .. ipython:: python @@ -529,14 +520,15 @@ See the :ref:`Grouping section ` 'D' : np.random.randn(8)}) df -Grouping and then applying a function ``sum`` to the resulting groups. +Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting +groups. .. ipython:: python df.groupby('A').sum() -Grouping by multiple columns forms a hierarchical index, which we then apply -the function. +Grouping by multiple columns forms a hierarchical index, and again we can +apply the ``sum`` function. .. ipython:: python @@ -606,7 +598,7 @@ Time Series pandas has simple, powerful, and efficient functionality for performing resampling operations during frequency conversion (e.g., converting secondly data into 5-minutely data). This is extremely common in, but not limited to, -financial applications. See the :ref:`Time Series section ` +financial applications. See the :ref:`Time Series section `. .. ipython:: python @@ -614,7 +606,7 @@ financial applications. See the :ref:`Time Series section ` ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) ts.resample('5Min').sum() -Time zone representation +Time zone representation: .. ipython:: python @@ -624,13 +616,13 @@ Time zone representation ts_utc = ts.tz_localize('UTC') ts_utc -Convert to another time zone +Converting to another time zone: .. ipython:: python ts_utc.tz_convert('US/Eastern') -Converting between time span representations +Converting between time span representations: .. ipython:: python @@ -656,7 +648,7 @@ the quarter end: Categoricals ------------ -Since version 0.15, pandas can include categorical data in a ``DataFrame``. For full docs, see the +pandas can include categorical data in a ``DataFrame``. For full docs, see the :ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python @@ -670,14 +662,15 @@ Convert the raw grades to a categorical data type. df["grade"] = df["raw_grade"].astype("category") df["grade"] -Rename the categories to more meaningful names (assigning to ``Series.cat.categories`` is inplace!) +Rename the categories to more meaningful names (assigning to +``Series.cat.categories`` is inplace!). .. ipython:: python df["grade"].cat.categories = ["very good", "good", "very bad"] Reorder the categories and simultaneously add the missing categories (methods under ``Series -.cat`` return a new ``Series`` per default). +.cat`` return a new ``Series`` by default). .. ipython:: python @@ -690,7 +683,7 @@ Sorting is per order in the categories, not lexical order. df.sort_values(by="grade") -Grouping by a categorical column shows also empty categories. +Grouping by a categorical column also shows empty categories. .. ipython:: python @@ -700,7 +693,7 @@ Grouping by a categorical column shows also empty categories. Plotting -------- -:ref:`Plotting ` docs. +See the :ref:`Plotting ` docs. .. ipython:: python :suppress: @@ -716,8 +709,8 @@ Plotting @savefig series_plot_basic.png ts.plot() -On DataFrame, :meth:`~DataFrame.plot` is a convenience to plot all of the -columns with labels: +On a DataFrame, the :meth:`~DataFrame.plot` method is a convenience to plot all +of the columns with labels: .. ipython:: python @@ -734,13 +727,13 @@ Getting Data In/Out CSV ~~~ -:ref:`Writing to a csv file ` +:ref:`Writing to a csv file. ` .. ipython:: python df.to_csv('foo.csv') -:ref:`Reading from a csv file ` +:ref:`Reading from a csv file. ` .. ipython:: python @@ -754,15 +747,15 @@ CSV HDF5 ~~~~ -Reading and writing to :ref:`HDFStores ` +Reading and writing to :ref:`HDFStores `. -Writing to a HDF5 Store +Writing to a HDF5 Store. .. ipython:: python df.to_hdf('foo.h5','df') -Reading from a HDF5 Store +Reading from a HDF5 Store. .. ipython:: python @@ -776,15 +769,15 @@ Reading from a HDF5 Store Excel ~~~~~ -Reading and writing to :ref:`MS Excel ` +Reading and writing to :ref:`MS Excel `. -Writing to an excel file +Writing to an excel file. .. ipython:: python df.to_excel('foo.xlsx', sheet_name='Sheet1') -Reading from an excel file +Reading from an excel file. .. ipython:: python @@ -798,7 +791,7 @@ Reading from an excel file Gotchas ------- -If you are trying an operation and you see an exception like: +If you are attempting to perform an operation you might see an exception like: .. code-block:: python diff --git a/doc/source/_static/banklist.html b/doc/source/_static/banklist.html index 8ec1561f8c394..cbcce5a2d49ff 100644 --- a/doc/source/_static/banklist.html +++ b/doc/source/_static/banklist.html @@ -7,7 +7,7 @@ - + @@ -4849,7 +4849,7 @@

Failed Bank List

`` are used to form the column index, if multiple rows are contained within +```` then a multiindex is created); if specified, the header row is taken +from the data minus the parsed header elements (``' % str(i)) + table += (''.format(i=i)) table += '
{val}{val}`` elements). dfs = pd.read_html(url, header=0) -Specify an index column +Specify an index column: .. code-block:: python dfs = pd.read_html(url, index_col=0) -Specify a number of rows to skip +Specify a number of rows to skip: .. code-block:: python dfs = pd.read_html(url, skiprows=0) Specify a number of rows to skip using a list (``xrange`` (Python 2 only) works -as well) +as well): .. code-block:: python dfs = pd.read_html(url, skiprows=range(2)) -Specify an HTML attribute +Specify an HTML attribute: .. code-block:: python @@ -2366,7 +2382,7 @@ Specify an HTML attribute dfs2 = pd.read_html(url, attrs={'class': 'sortable'}) print(np.array_equal(dfs1[0], dfs2[0])) # Should be True -Specify values that should be converted to NaN +Specify values that should be converted to NaN: .. code-block:: python @@ -2374,7 +2390,7 @@ Specify values that should be converted to NaN .. versionadded:: 0.19 -Specify whether to keep the default set of NaN values +Specify whether to keep the default set of NaN values: .. code-block:: python @@ -2384,7 +2400,7 @@ Specify whether to keep the default set of NaN values Specify converters for columns. This is useful for numerical text data that has leading zeros. By default columns that are numerical are cast to numeric -types and the leading zeros are lost. To avoid this, we can convert these +types and the leading zeros are lost. To avoid this, we can convert these columns to strings. .. code-block:: python @@ -2395,13 +2411,13 @@ columns to strings. .. versionadded:: 0.19 -Use some combination of the above +Use some combination of the above: .. code-block:: python dfs = pd.read_html(url, match='Metcalf Bank', index_col=0) -Read in pandas ``to_html`` output (with some loss of floating point precision) +Read in pandas ``to_html`` output (with some loss of floating point precision): .. code-block:: python @@ -2410,15 +2426,15 @@ Read in pandas ``to_html`` output (with some loss of floating point precision) dfin = pd.read_html(s, index_col=0) The ``lxml`` backend will raise an error on a failed parse if that is the only -parser you provide (if you only have a single parser you can provide just a +parser you provide. If you only have a single parser you can provide just a string, but it is considered good practice to pass a list with one string if, -for example, the function expects a sequence of strings) +for example, the function expects a sequence of strings. You may use: .. code-block:: python dfs = pd.read_html(url, 'Metcalf Bank', index_col=0, flavor=['lxml']) -or +Or you could pass ``flavor='lxml'`` without a list: .. code-block:: python @@ -2472,7 +2488,7 @@ HTML: .. raw:: html :file: _static/basic.html -The ``columns`` argument will limit the columns shown +The ``columns`` argument will limit the columns shown: .. ipython:: python @@ -2489,7 +2505,7 @@ HTML: :file: _static/columns.html ``float_format`` takes a Python callable to control the precision of floating -point values +point values: .. ipython:: python @@ -2506,7 +2522,7 @@ HTML: :file: _static/float_format.html ``bold_rows`` will make the row labels bold by default, but you can turn that -off +off: .. ipython:: python @@ -2579,7 +2595,7 @@ parse HTML tables in the top-level pandas io function ``read_html``. * Benefits - * |lxml|_ is very fast + * |lxml|_ is very fast. * |lxml|_ requires Cython to install correctly. @@ -2652,8 +2668,8 @@ The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``) and Excel 2007+ (``.xlsx``) files using the ``xlrd`` Python module. The :meth:`~DataFrame.to_excel` instance method is used for saving a ``DataFrame`` to Excel. Generally the semantics are -similar to working with :ref:`csv` data. See the :ref:`cookbook` for some -advanced strategies +similar to working with :ref:`csv` data. +See the :ref:`cookbook` for some advanced strategies. .. _io.excel_reader: @@ -2696,7 +2712,7 @@ The ``sheet_names`` property will generate a list of the sheet names in the file. The primary use-case for an ``ExcelFile`` is parsing multiple sheets with -different parameters +different parameters: .. code-block:: python @@ -2725,7 +2741,7 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc Specifying Sheets +++++++++++++++++ -.. note :: The second argument is ``sheet_name``, not to be confused with ``ExcelFile.sheet_names`` +.. note :: The second argument is ``sheet_name``, not to be confused with ``ExcelFile.sheet_names``. .. note :: An ExcelFile's attribute ``sheet_names`` provides access to a list of sheets. @@ -2802,12 +2818,12 @@ parameters. df.index = df.index.set_names(['lvl1', 'lvl2']) df.to_excel('path_to_file.xlsx') - df = pd.read_excel('path_to_file.xlsx', index_col=[0,1]) + df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1]) df If the source file has both ``MultiIndex`` index and columns, lists specifying each -should be passed to ``index_col`` and ``header`` +should be passed to ``index_col`` and ``header``: .. ipython:: python @@ -2828,10 +2844,10 @@ Parsing Specific Columns ++++++++++++++++++++++++ It is often the case that users will insert columns to do temporary computations -in Excel and you may not want to read in those columns. `read_excel` takes -a `usecols` keyword to allow you to specify a subset of columns to parse. +in Excel and you may not want to read in those columns. ``read_excel`` takes +a ``usecols`` keyword to allow you to specify a subset of columns to parse. -If `usecols` is an integer, then it is assumed to indicate the last column +If ``usecols`` is an integer, then it is assumed to indicate the last column to be parsed. .. code-block:: python @@ -2840,11 +2856,12 @@ to be parsed. If `usecols` is a list of integers, then it is assumed to be the file column indices to be parsed. + .. code-block:: python read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3]) -Element order is ignored, so usecols=[0,1] is the same as [1,0]. +Element order is ignored, so ``usecols=[0,1]`` is the same as ``[1,0]``. Parsing Dates +++++++++++++ @@ -2852,7 +2869,7 @@ Parsing Dates Datetime-like values are normally automatically converted to the appropriate dtype when reading the excel file. But if you have a column of strings that *look* like dates (but are not actually formatted as dates in excel), you can -use the `parse_dates` keyword to parse those strings to datetimes: +use the ``parse_dates`` keyword to parse those strings to datetimes: .. code-block:: python @@ -2862,7 +2879,7 @@ use the `parse_dates` keyword to parse those strings to datetimes: Cell Converters +++++++++++++++ -It is possible to transform the contents of Excel cells via the `converters` +It is possible to transform the contents of Excel cells via the ``converters`` option. For instance, to convert a column to boolean: .. code-block:: python @@ -2903,11 +2920,11 @@ Writing Excel Files Writing Excel Files to Disk +++++++++++++++++++++++++++ -To write a DataFrame object to a sheet of an Excel file, you can use the +To write a ``DataFrame`` object to a sheet of an Excel file, you can use the ``to_excel`` instance method. The arguments are largely the same as ``to_csv`` described above, the first argument being the name of the excel file, and the -optional second argument the name of the sheet to which the DataFrame should be -written. For example: +optional second argument the name of the sheet to which the ``DataFrame`` should be +written. For example: .. code-block:: python @@ -2917,7 +2934,7 @@ Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or ``openpyxl``. -The DataFrame will be written in a way that tries to mimic the REPL output. +The ``DataFrame`` will be written in a way that tries to mimic the REPL output. The ``index_label`` will be placed in the second row instead of the first. You can place it in the first row by setting the ``merge_cells`` option in ``to_excel()`` to ``False``: @@ -2926,10 +2943,7 @@ row instead of the first. You can place it in the first row by setting the df.to_excel('path_to_file.xlsx', index_label='label', merge_cells=False) -The Panel class also has a ``to_excel`` instance method, -which writes each DataFrame in the Panel to a separate sheet. - -In order to write separate DataFrames to separate sheets in a single Excel file, +In order to write separate ``DataFrames`` to separate sheets in a single Excel file, one can pass an :class:`~pandas.io.excel.ExcelWriter`. .. code-block:: python @@ -2990,13 +3004,13 @@ Pandas supports writing Excel files to buffer-like objects such as ``StringIO`` Excel writer engines '''''''''''''''''''' -``pandas`` chooses an Excel writer via two methods: +Pandas chooses an Excel writer via two methods: 1. the ``engine`` keyword argument 2. the filename extension (via the default specified in config options) -By default, ``pandas`` uses the `XlsxWriter`_ for ``.xlsx`` and `openpyxl`_ -for ``.xlsm`` files and `xlwt`_ for ``.xls`` files. If you have multiple +By default, pandas uses the `XlsxWriter`_ for ``.xlsx``, `openpyxl`_ +for ``.xlsm``, and `xlwt`_ for ``.xls`` files. If you have multiple engines installed, you can set the default engine through :ref:`setting the config options ` ``io.excel.xlsx.writer`` and ``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx`` @@ -3034,8 +3048,8 @@ Style and Formatting The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method. -- ``float_format`` : Format string for floating point numbers (default None) -- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default None) +- ``float_format`` : Format string for floating point numbers (default ``None``). +- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default ``None``). @@ -3044,10 +3058,10 @@ The look and feel of Excel worksheets created from pandas can be modified using Clipboard --------- -A handy way to grab data is to use the ``read_clipboard`` method, which takes -the contents of the clipboard buffer and passes them to the ``read_table`` -method. For instance, you can copy the following -text to the clipboard (CTRL-C on many operating systems): +A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method, +which takes the contents of the clipboard buffer and passes them to the +``read_table`` method. For instance, you can copy the following text to the +clipboard (CTRL-C on many operating systems): .. code-block:: python @@ -3056,7 +3070,7 @@ text to the clipboard (CTRL-C on many operating systems): y 2 5 q z 3 6 r -And then import the data directly to a DataFrame by calling: +And then import the data directly to a ``DataFrame`` by calling: .. code-block:: python @@ -3066,10 +3080,11 @@ And then import the data directly to a DataFrame by calling: clipdf -The ``to_clipboard`` method can be used to write the contents of a DataFrame to + +The ``to_clipboard`` method can be used to write the contents of a ``DataFrame`` to the clipboard. Following which you can paste the clipboard contents into other applications (CTRL-V on many operating systems). Here we illustrate writing a -DataFrame into clipboard and reading it back. +``DataFrame`` into clipboard and reading it back. .. ipython:: python @@ -3121,7 +3136,7 @@ any pickled pandas object (or any other pickled object) from file: Several internal refactorings have been done while still preserving compatibility with pickles created with older versions of pandas. However, - for such cases, pickled dataframes, series etc, must be read with + for such cases, pickled ``DataFrames``, ``Series`` etc, must be read with ``pd.read_pickle``, rather than ``pickle.load``. See `here `__ @@ -3139,8 +3154,8 @@ Compressed pickle files :func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing. -`zip`` file supports read only and must contain only one data file -to be read in. +The ``zip`` file format only supports reading and must contain only one data file +to be read. The compression type can be an explicit parameter or be inferred from the file extension. If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or @@ -3154,7 +3169,7 @@ If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ` 'C': pd.date_range('20130101', periods=1000, freq='s')}) df -Using an explicit compression type +Using an explicit compression type: .. ipython:: python @@ -3162,7 +3177,7 @@ Using an explicit compression type rt = pd.read_pickle("data.pkl.compress", compression="gzip") rt -Inferring compression type from the extension +Inferring compression type from the extension: .. ipython:: python @@ -3170,7 +3185,7 @@ Inferring compression type from the extension rt = pd.read_pickle("data.pkl.xz", compression="infer") rt -The default is to 'infer +The default is to 'infer': .. ipython:: python @@ -3221,14 +3236,14 @@ You can pass a list of objects and you will receive them back on deserialization pd.to_msgpack('foo.msg', df, 'foo', np.array([1,2,3]), s) pd.read_msgpack('foo.msg') -You can pass ``iterator=True`` to iterate over the unpacked results +You can pass ``iterator=True`` to iterate over the unpacked results: .. ipython:: python for o in pd.read_msgpack('foo.msg',iterator=True): print(o) -You can pass ``append=True`` to the writer to append to an existing pack +You can pass ``append=True`` to the writer to append to an existing pack: .. ipython:: python @@ -3331,7 +3346,7 @@ In a current or later Python session, you can retrieve stored objects: # dotted (attribute) access provides get as well store.df -Deletion of the object specified by the key +Deletion of the object specified by the key: .. ipython:: python @@ -3340,7 +3355,7 @@ Deletion of the object specified by the key store -Closing a Store, Context Manager +Closing a Store and using a context manager: .. ipython:: python @@ -3348,8 +3363,7 @@ Closing a Store, Context Manager store store.is_open - # Working with, and automatically closing the store with the context - # manager + # Working with, and automatically closing the store using a context manager with pd.HDFStore('store.h5') as store: store.keys() @@ -3449,17 +3463,17 @@ the ``fixed`` format. These types of stores are **not** appendable once written remove them and rewrite). Nor are they **queryable**; they must be retrieved in their entirety. They also do not support dataframes with non-unique column names. The ``fixed`` format stores offer very fast writing and slightly faster reading than ``table`` stores. -This format is specified by default when using ``put`` or ``to_hdf`` or by ``format='fixed'`` or ``format='f'`` +This format is specified by default when using ``put`` or ``to_hdf`` or by ``format='fixed'`` or ``format='f'``. .. warning:: - A ``fixed`` format will raise a ``TypeError`` if you try to retrieve using a ``where`` . + A ``fixed`` format will raise a ``TypeError`` if you try to retrieve using a ``where``: .. code-block:: python - pd.DataFrame(randn(10,2)).to_hdf('test_fixed.h5','df') + pd.DataFrame(randn(10, 2)).to_hdf('test_fixed.h5', 'df') - pd.read_hdf('test_fixed.h5','df',where='index>5') + pd.read_hdf('test_fixed.h5', 'df', where='index>5') TypeError: cannot pass a where specification when reading a fixed format. this store must be selected in its entirety @@ -3472,9 +3486,9 @@ Table Format ``HDFStore`` supports another ``PyTables`` format on disk, the ``table`` format. Conceptually a ``table`` is shaped very much like a DataFrame, with rows and columns. A ``table`` may be appended to in the same or -other sessions. In addition, delete & query type operations are +other sessions. In addition, delete and query type operations are supported. This format is specified by ``format='table'`` or ``format='t'`` -to ``append`` or ``put`` or ``to_hdf`` +to ``append`` or ``put`` or ``to_hdf``. This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to enable ``put/append/to_hdf`` to by default store in the ``table`` format. @@ -3514,9 +3528,9 @@ Hierarchical Keys Keys to a store can be specified as a string. These can be in a hierarchical path-name like format (e.g. ``foo/bar/bah``), which will generate a hierarchy of sub-stores (or ``Groups`` in PyTables -parlance). Keys can be specified with out the leading '/' and are ALWAYS +parlance). Keys can be specified with out the leading '/' and are **always** absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove -everything in the sub-store and BELOW, so be *careful*. +everything in the sub-store and **below**, so be *careful*. .. ipython:: python @@ -3547,7 +3561,7 @@ everything in the sub-store and BELOW, so be *careful*. /foo/bar/bah (Group) '' children := ['block0_items' (Array), 'block0_values' (Array), 'axis0' (Array), 'axis1' (Array)] - Instead, use explicit string based keys + Instead, use explicit string based keys: .. ipython:: python @@ -3596,8 +3610,8 @@ defaults to `nan`. Storing Multi-Index DataFrames ++++++++++++++++++++++++++++++ -Storing multi-index dataframes as tables is very similar to -storing/selecting from homogeneous index DataFrames. +Storing multi-index ``DataFrames`` as tables is very similar to +storing/selecting from homogeneous index ``DataFrames``. .. ipython:: python @@ -3632,10 +3646,10 @@ data. A query is specified using the ``Term`` class under the hood, as a boolean expression. -- ``index`` and ``columns`` are supported indexers of a DataFrame +- ``index`` and ``columns`` are supported indexers of a ``DataFrames``. - ``major_axis``, ``minor_axis``, and ``items`` are supported indexers of - the Panel -- if ``data_columns`` are specified, these can be used as additional indexers + the Panel. +- if ``data_columns`` are specified, these can be used as additional indexers. Valid comparison operators are: @@ -3849,7 +3863,7 @@ to perform queries (other than the `indexable` columns, which you can always query). For instance say you want to perform this common operation, on-disk, and return just the frame that matches this query. You can specify ``data_columns = True`` to force all columns to -be data_columns +be ``data_columns``. .. ipython:: python @@ -3879,7 +3893,7 @@ There is some performance degradation by making lots of columns into `data columns`, so it is up to the user to designate these. In addition, you cannot change data columns (nor indexables) after the first append/put operation (Of course you can simply read in the data and -create a new table!) +create a new table!). Iterator ++++++++ @@ -3912,7 +3926,7 @@ chunks. .. ipython:: python - dfeq = pd.DataFrame({'number': np.arange(1,11)}) + dfeq = pd.DataFrame({'number': np.arange(1, 11)}) dfeq store.append('dfeq', dfeq, data_columns=['number']) @@ -3921,9 +3935,9 @@ chunks. return [l[i:i+n] for i in range(0, len(l), n)] evens = [2,4,6,8,10] - coordinates = store.select_as_coordinates('dfeq','number=evens') + coordinates = store.select_as_coordinates('dfeq', 'number=evens') for c in chunks(coordinates, 2): - print(store.select('dfeq',where=c)) + print(store.select('dfeq', where=c)) Advanced Queries ++++++++++++++++ @@ -4005,7 +4019,7 @@ table names to a list of 'columns' you want in that table. If `None` is used in place of a list, that table will have the remaining unspecified columns of the given DataFrame. The argument ``selector`` defines which table is the selector table (which you can make queries from). -The argument ``dropna`` will drop rows from the input DataFrame to ensure +The argument ``dropna`` will drop rows from the input ``DataFrame`` to ensure tables are synchronized. This means that if a row for one of the tables being written to is entirely ``np.NaN``, that row will be dropped from all tables. @@ -4081,7 +4095,7 @@ the table using a ``where`` that selects all but the missing data. automatically. Thus, repeatedly deleting (or removing nodes) and adding again, **WILL TEND TO INCREASE THE FILE SIZE**. - To *repack and clean* the file, use :ref:`ptrepack ` + To *repack and clean* the file, use :ref:`ptrepack `. .. _io.hdf5-notes: @@ -4464,7 +4478,7 @@ Several caveats. - Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message on an attempt at serialization. -See the `Full Documentation `__ +See the `Full Documentation `__. .. ipython:: python @@ -4522,8 +4536,8 @@ dtypes, including extension dtypes such as datetime with tz. Several caveats. -- Duplicate column names and non-string columns names are not supported -- Index level names, if specified, must be strings +- Duplicate column names and non-string columns names are not supported. +- Index level names, if specified, must be strings. - Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype. - Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message on an attempt at serialization. @@ -4532,7 +4546,7 @@ You can specify an ``engine`` to direct the serialization. This can be one of `` If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, then ``pyarrow`` is tried, and falling back to ``fastparquet``. -See the documentation for `pyarrow `__ and `fastparquet `__ +See the documentation for `pyarrow `__ and `fastparquet `__. .. note:: @@ -4652,7 +4666,7 @@ If you want to manage your own connections you can pass one of those instead: Writing DataFrames '''''''''''''''''' -Assuming the following data is in a DataFrame ``data``, we can insert it into +Assuming the following data is in a ``DataFrame`` ``data``, we can insert it into the database using :func:`~pandas.DataFrame.to_sql`. +-----+------------+-------+-------+-------+ @@ -4738,7 +4752,7 @@ table name and optionally a subset of columns to read. pd.read_sql_table('data', engine) -You can also specify the name of the column as the DataFrame index, +You can also specify the name of the column as the ``DataFrame`` index, and specify a subset of columns to be read. .. ipython:: python @@ -4807,7 +4821,7 @@ Specifying this will return an iterator through chunks of the query result: for chunk in pd.read_sql_query("SELECT * FROM data_chunks", engine, chunksize=5): print(chunk) -You can also run a plain query without creating a dataframe with +You can also run a plain query without creating a ``DataFrame`` with :func:`~pandas.io.sql.execute`. This is useful for queries that don't return values, such as INSERT. This is functionally equivalent to calling ``execute`` on the SQLAlchemy engine or db connection object. Again, you must use the SQL syntax @@ -4923,7 +4937,7 @@ pandas integrates with this external package. if ``pandas-gbq`` is installed, yo use the pandas methods ``pd.read_gbq`` and ``DataFrame.to_gbq``, which will call the respective functions from ``pandas-gbq``. -Full documentation can be found `here `__ +Full documentation can be found `here `__. .. _io.stata: @@ -4986,7 +5000,7 @@ Reading from Stata format ''''''''''''''''''''''''' The top-level function ``read_stata`` will read a dta file and return -either a DataFrame or a :class:`~pandas.io.stata.StataReader` that can +either a ``DataFrame`` or a :class:`~pandas.io.stata.StataReader` that can be used to read the file incrementally. .. ipython:: python @@ -5084,7 +5098,7 @@ whether imported ``Categorical`` variables are ordered. .. note:: - *Stata* supports partially labeled series. These series have value labels for + *Stata* supports partially labeled series. These series have value labels for some but not all data values. Importing a partially labeled series will produce a ``Categorical`` with string categories for the values that are labeled and numeric categories for values with no label. @@ -5144,7 +5158,7 @@ into and from pandas, we recommend these packages from the broader community. netCDF '''''' -xarray_ provides data structures inspired by the pandas DataFrame for working +xarray_ provides data structures inspired by the pandas ``DataFrame`` for working with multi-dimensional datasets, with a focus on the netCDF file format and easy conversion to and from pandas. @@ -5173,7 +5187,8 @@ ignored. dtypes: float64(1), int64(1) memory usage: 15.3 MB -Writing +When writing, the top-three functions in terms of speed are are +``test_pickle_write``, ``test_feather_write`` and ``test_hdf_fixed_write_compress``. .. code-block:: ipython @@ -5204,7 +5219,8 @@ Writing In [32]: %timeit test_pickle_write_compress(df) 3.33 s ± 55.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) -Reading +When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and +``test_hdf_fixed_read``. .. code-block:: ipython @@ -5249,7 +5265,7 @@ Space on disk (in bytes) 16000848 Aug 21 18:00 test.pkl 7554108 Aug 21 18:00 test.pkl.compress -And here's the code +And here's the code: .. code-block:: python From 408773dbe40a63b524cbdcd221eeafd6bfb0f7ee Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 14 Feb 2018 13:56:51 -0600 Subject: [PATCH 113/217] CI: Move conda build and ASV check to cron job (#19698) * CI: Remove ASV run * CI: Removed conda build test * Removed asv.sh step * Removed the ASV matrix item --- .travis.yml | 12 ++----- ci/asv.sh | 35 ------------------- ci/install_travis.sh | 17 +-------- ...UILD_TEST.build => requirements-3.5.build} | 0 ...DA_BUILD_TEST.pip => requirements-3.5.pip} | 0 ...DA_BUILD_TEST.run => requirements-3.5.run} | 0 ...ONDA_BUILD_TEST.sh => requirements-3.5.sh} | 2 +- ci/requirements-3.6_ASV.build | 5 --- ci/requirements-3.6_ASV.run | 25 ------------- ci/requirements-3.6_ASV.sh | 7 ---- ci/script_multi.sh | 5 +-- ci/script_single.sh | 5 +-- 12 files changed, 6 insertions(+), 107 deletions(-) delete mode 100755 ci/asv.sh rename ci/{requirements-3.5_CONDA_BUILD_TEST.build => requirements-3.5.build} (100%) rename ci/{requirements-3.5_CONDA_BUILD_TEST.pip => requirements-3.5.pip} (100%) rename ci/{requirements-3.5_CONDA_BUILD_TEST.run => requirements-3.5.run} (100%) rename ci/{requirements-3.5_CONDA_BUILD_TEST.sh => requirements-3.5.sh} (86%) delete mode 100644 ci/requirements-3.6_ASV.build delete mode 100644 ci/requirements-3.6_ASV.run delete mode 100755 ci/requirements-3.6_ASV.sh diff --git a/.travis.yml b/.travis.yml index 4cbe7f86bd2fa..b1168f18315c3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,7 +52,7 @@ matrix: # In allow_failures - dist: trusty env: - - JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true + - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" - dist: trusty env: - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true COVERAGE=true @@ -73,17 +73,13 @@ matrix: env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" # In allow_failures - - dist: trusty - env: - - JOB="3.6_ASV" ASV=true - # In allow_failures - dist: trusty env: - JOB="3.6_DOC" DOC=true allow_failures: - dist: trusty env: - - JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true + - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" - dist: trusty env: - JOB="2.7_SLOW" SLOW=true @@ -97,9 +93,6 @@ matrix: - dist: trusty env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - - dist: trusty - env: - - JOB="3.6_ASV" ASV=true - dist: trusty env: - JOB="3.6_DOC" DOC=true @@ -135,7 +128,6 @@ script: - ci/script_single.sh - ci/script_multi.sh - ci/lint.sh - - ci/asv.sh - echo "checking imports" - source activate pandas && python ci/check_imports.py - echo "script done" diff --git a/ci/asv.sh b/ci/asv.sh deleted file mode 100755 index 1e9a8d6380eb5..0000000000000 --- a/ci/asv.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -echo "inside $0" - -source activate pandas - -RET=0 - -if [ "$ASV" ]; then - echo "Check for failed asv benchmarks" - - cd asv_bench - - asv machine --yes - - time asv dev | tee failed_asv.txt - - echo "The following asvs benchmarks (if any) failed." - - cat failed_asv.txt | grep "failed" failed_asv.txt - - if [ $? = "0" ]; then - RET=1 - fi - - echo "DONE displaying failed asvs benchmarks." - - rm failed_asv.txt - - echo "Check for failed asv benchmarks DONE" -else - echo "NOT checking for failed asv benchmarks" -fi - -exit $RET diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 6e270519e60c3..458ff083b65eb 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -50,12 +50,6 @@ conda config --set ssl_verify false || exit 1 conda config --set quiet true --set always_yes true --set changeps1 false || exit 1 conda update -q conda -if [ "$CONDA_BUILD_TEST" ]; then - echo - echo "[installing conda-build]" - conda install conda-build -fi - echo echo "[add channels]" conda config --remove channels defaults || exit 1 @@ -122,7 +116,7 @@ if [ "$COVERAGE" ]; then fi echo -if [ -z "$PIP_BUILD_TEST" ] && [ -z "$CONDA_BUILD_TEST" ]; then +if [ -z "$PIP_BUILD_TEST" ] ; then # build but don't install echo "[build em]" @@ -177,15 +171,6 @@ if [ "$PIP_BUILD_TEST" ]; then conda uninstall -y cython time pip install dist/*tar.gz || exit 1 -elif [ "$CONDA_BUILD_TEST" ]; then - - # build & install testing - echo "[building conda recipe]" - time conda build ./conda.recipe --python 3.5 -q --no-test || exit 1 - - echo "[installing]" - conda install pandas --use-local || exit 1 - else # install our pandas diff --git a/ci/requirements-3.5_CONDA_BUILD_TEST.build b/ci/requirements-3.5.build similarity index 100% rename from ci/requirements-3.5_CONDA_BUILD_TEST.build rename to ci/requirements-3.5.build diff --git a/ci/requirements-3.5_CONDA_BUILD_TEST.pip b/ci/requirements-3.5.pip similarity index 100% rename from ci/requirements-3.5_CONDA_BUILD_TEST.pip rename to ci/requirements-3.5.pip diff --git a/ci/requirements-3.5_CONDA_BUILD_TEST.run b/ci/requirements-3.5.run similarity index 100% rename from ci/requirements-3.5_CONDA_BUILD_TEST.run rename to ci/requirements-3.5.run diff --git a/ci/requirements-3.5_CONDA_BUILD_TEST.sh b/ci/requirements-3.5.sh similarity index 86% rename from ci/requirements-3.5_CONDA_BUILD_TEST.sh rename to ci/requirements-3.5.sh index 093fdbcf21d78..529e1e8742722 100644 --- a/ci/requirements-3.5_CONDA_BUILD_TEST.sh +++ b/ci/requirements-3.5.sh @@ -2,7 +2,7 @@ source activate pandas -echo "install 35 CONDA_BUILD_TEST" +echo "install 35" # pip install python-dateutil to get latest conda remove -n pandas python-dateutil --force diff --git a/ci/requirements-3.6_ASV.build b/ci/requirements-3.6_ASV.build deleted file mode 100644 index bc72eed2a0d4e..0000000000000 --- a/ci/requirements-3.6_ASV.build +++ /dev/null @@ -1,5 +0,0 @@ -python=3.6* -python-dateutil -pytz -numpy=1.13* -cython diff --git a/ci/requirements-3.6_ASV.run b/ci/requirements-3.6_ASV.run deleted file mode 100644 index 6c45e3371e9cf..0000000000000 --- a/ci/requirements-3.6_ASV.run +++ /dev/null @@ -1,25 +0,0 @@ -ipython -ipykernel -ipywidgets -sphinx=1.5* -nbconvert -nbformat -notebook -matplotlib -seaborn -scipy -lxml -beautifulsoup4 -html5lib -pytables -python-snappy -openpyxl -xlrd -xlwt -xlsxwriter -sqlalchemy -numexpr -bottleneck -statsmodels -xarray -pyqt diff --git a/ci/requirements-3.6_ASV.sh b/ci/requirements-3.6_ASV.sh deleted file mode 100755 index 8a46f85dbb6bc..0000000000000 --- a/ci/requirements-3.6_ASV.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -source activate pandas - -echo "[install ASV_BUILD deps]" - -pip install git+https://github.com/spacetelescope/asv diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 766e51625fbe6..6c354fc4cab0b 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -18,7 +18,7 @@ fi export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') echo PYTHONHASHSEED=$PYTHONHASHSEED -if [ "$PIP_BUILD_TEST" ] || [ "$CONDA_BUILD_TEST" ]; then +if [ "$PIP_BUILD_TEST" ] ; then echo "[build-test]" echo "[env]" @@ -37,9 +37,6 @@ if [ "$PIP_BUILD_TEST" ] || [ "$CONDA_BUILD_TEST" ]; then elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" -elif [ "$ASV" ]; then - echo "We are not running pytest as this is an asv-build" - elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml --strict $TEST_ARGS pandas diff --git a/ci/script_single.sh b/ci/script_single.sh index 153847ab2e8c9..74b0e897f1d73 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -16,15 +16,12 @@ if [ "$SLOW" ]; then TEST_ARGS="--only-slow --skip-network" fi -if [ "$PIP_BUILD_TEST" ] || [ "$CONDA_BUILD_TEST" ]; then +if [ "$PIP_BUILD_TEST" ]; then echo "We are not running pytest as this is a build test." elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" -elif [ "$ASV" ]; then - echo "We are not running pytest as this is an asv-build" - elif [ "$COVERAGE" ]; then echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas From e654b81d12275a0eaaefa14067c173c2aac44fce Mon Sep 17 00:00:00 2001 From: William Ayd Date: Wed, 14 Feb 2018 15:12:14 -0800 Subject: [PATCH 114/217] GroupBy Rank SegFault Fix - astype instead of view (#19701) * Use astype instead of view for lexsort upcasting * Added copy=False to astype in group_rank --- pandas/_libs/groupby_helper.pxi.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 1d77a373bb7dd..fe4d31516d839 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -531,7 +531,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # each label corresponds to a different group value, # the mask helps you differentiate missing values before # performing sort on the actual values - _as = np.lexsort(order).view(dtype=np.int64) + _as = np.lexsort(order).astype(np.int64, copy=False) if not ascending: _as = _as[::-1] From e7a26b06fda55236df78378c620bedc20a065e9c Mon Sep 17 00:00:00 2001 From: Gilberto Olimpio Date: Thu, 15 Feb 2018 06:17:31 -0200 Subject: [PATCH 115/217] DOC: Ambiguous description in to_parquet engine documentation (#19669) --- pandas/core/frame.py | 7 ++++--- pandas/io/parquet.py | 14 ++++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bc045d74cee52..a001037b573d4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1678,9 +1678,10 @@ def to_parquet(self, fname, engine='auto', compression='snappy', fname : str string file path engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' - Parquet reader library to use. If 'auto', then the option - 'io.parquet.engine' is used. If 'auto', then the first - library to be installed is used. + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. kwargs diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 1c22a305c089d..a99014f07a6b3 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -244,9 +244,10 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): path : string File path engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' - Parquet reader library to use. If 'auto', then the option - 'io.parquet.engine' is used. If 'auto', then the first - library to be installed is used. + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. kwargs @@ -271,9 +272,10 @@ def read_parquet(path, engine='auto', columns=None, **kwargs): .. versionadded 0.21.1 engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' - Parquet reader library to use. If 'auto', then the option - 'io.parquet.engine' is used. If 'auto', then the first - library to be installed is used. + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. kwargs are passed to the engine Returns From 196596a4c8bef8c4cfdc8392340cdc6add5830a1 Mon Sep 17 00:00:00 2001 From: Jan F-F Date: Thu, 15 Feb 2018 00:36:09 -0800 Subject: [PATCH 116/217] ENH: groupby().is_monotonic_increasing #17015 (#17453) --- doc/source/api.rst | 2 + doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/groupby.py | 4 +- pandas/tests/groupby/test_groupby.py | 61 +++++++++++++++++++++++++- pandas/tests/groupby/test_whitelist.py | 7 ++- 5 files changed, 71 insertions(+), 4 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 44f87aa3e1cec..103b0fe9ff019 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2240,6 +2240,8 @@ The following methods are available only for ``SeriesGroupBy`` objects. SeriesGroupBy.nunique SeriesGroupBy.unique SeriesGroupBy.value_counts + SeriesGroupBy.is_monotonic_increasing + SeriesGroupBy.is_monotonic_decreasing The following methods are available only for ``DataFrameGroupBy`` objects. diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 932618ba1df21..a2198d9103528 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -323,6 +323,7 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) +- Added :func:`SeriesGroupBy.is_monotonic_increasing` and :func:`SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 0363bcd02aa16..b1615f720368d 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -336,7 +336,9 @@ ]) | _plotting_methods _series_apply_whitelist = ((_common_apply_whitelist | - {'nlargest', 'nsmallest'}) - + {'nlargest', 'nsmallest', + 'is_monotonic_increasing', + 'is_monotonic_decreasing'}) - {'boxplot'}) | frozenset(['dtype', 'unique']) _dataframe_apply_whitelist = ((_common_apply_whitelist | diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6eacd45deb7bc..4cf7c8013aa2b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2639,7 +2639,7 @@ def test_group_shift_with_null_key(self): # Generate a moderately large dataframe with occasional missing # values in column `B`, and then group by [`A`, `B`]. This should # force `-1` in `labels` array of `g.grouper.group_info` exactly - # at those places, where the group-by key is partilly missing. + # at those places, where the group-by key is partially missing. df = DataFrame([(i % 12, i % 3 if i % 3 else np.nan, i) for i in range(n_rows)], dtype=float, columns=["A", "B", "Z"], index=None) @@ -2764,6 +2764,65 @@ def test_cummin_cummax(self): expected = pd.Series([1, 2, 1], name='b') tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('in_vals, out_vals', [ + + # Basics: strictly increasing (T), strictly decreasing (F), + # abs val increasing (F), non-strictly increasing (T) + ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1], + [True, False, False, True]), + + # Test with inf vals + ([1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf], + [True, False, True, False]), + + # Test with nan vals; should always be False + ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False]), + ]) + def test_is_monotonic_increasing(self, in_vals, out_vals): + # GH 17015 + source_dict = { + 'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'], + 'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'], + 'C': in_vals} + df = pd.DataFrame(source_dict) + result = df.groupby('B').C.is_monotonic_increasing + index = Index(list('abcd'), name='B') + expected = pd.Series(index=index, data=out_vals, name='C') + tm.assert_series_equal(result, expected) + + # Also check result equal to manually taking x.is_monotonic_increasing. + expected = ( + df.groupby(['B']).C.apply(lambda x: x.is_monotonic_increasing)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('in_vals, out_vals', [ + # Basics: strictly decreasing (T), strictly increasing (F), + # abs val decreasing (F), non-strictly increasing (T) + ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1], + [True, False, False, True]), + + # Test with inf vals + ([np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf], + [True, True, False, True]), + + # Test with nan vals; should always be False + ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False]), + ]) + def test_is_monotonic_decreasing(self, in_vals, out_vals): + # GH 17015 + source_dict = { + 'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'], + 'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'], + 'C': in_vals} + + df = pd.DataFrame(source_dict) + result = df.groupby('B').C.is_monotonic_decreasing + index = Index(list('abcd'), name='B') + expected = pd.Series(index=index, data=out_vals, name='C') + tm.assert_series_equal(result, expected) + def test_apply_numeric_coercion_when_datetime(self): # In the past, group-by/apply operations have been over-eager # in converting dtypes to numeric, in the presence of datetime diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 3117525d899f6..8d6e074881cbb 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -88,6 +88,8 @@ 'unique', 'nlargest', 'nsmallest', + 'is_monotonic_increasing', + 'is_monotonic_decreasing', ]) @@ -184,7 +186,7 @@ def test_regression_whitelist_methods( axis, skipna, sort): # GH6944 # GH 17537 - # explicitly test the whitelest methods + # explicitly test the whitelist methods if axis == 0: frame = raw_frame @@ -249,7 +251,8 @@ def test_tab_completion(mframe): 'cumsum', 'cumcount', 'ngroup', 'all', 'shift', 'skew', 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', - 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding', 'pipe'} + 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding', 'pipe', + } assert results == expected From bdd6a3376615ab097d40c52b908fce25c0b54634 Mon Sep 17 00:00:00 2001 From: Clemens Brunner Date: Thu, 15 Feb 2018 10:00:32 +0100 Subject: [PATCH 117/217] DOC: improve docs to clarify MultiIndex indexing (#19507) --- doc/source/advanced.rst | 86 ++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 27 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index ca903dadc6eb1..c455fbb8d0687 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -113,7 +113,13 @@ of the index is up to you: pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6]) We've "sparsified" the higher levels of the indexes to make the console output a -bit easier on the eyes. +bit easier on the eyes. Note that how the index is displayed can be controlled using the +``multi_sparse`` option in ``pandas.set_options()``: + +.. ipython:: python + + with pd.option_context('display.multi_sparse', False): + df It's worth keeping in mind that there's nothing preventing you from using tuples as atomic labels on an axis: @@ -129,15 +135,6 @@ can find yourself working with hierarchically-indexed data without creating a ``MultiIndex`` explicitly yourself. However, when loading data from a file, you may wish to generate your own ``MultiIndex`` when preparing the data set. -Note that how the index is displayed by be controlled using the -``multi_sparse`` option in ``pandas.set_options()``: - -.. ipython:: python - - pd.set_option('display.multi_sparse', False) - df - pd.set_option('display.multi_sparse', True) - .. _advanced.get_level_values: Reconstructing the level labels @@ -180,14 +177,13 @@ For example: .. ipython:: python -   # original MultiIndex -   df.columns +   df.columns # original MultiIndex - # sliced - df[['foo','qux']].columns + df[['foo','qux']].columns # sliced This is done to avoid a recomputation of the levels in order to make slicing -highly performant. If you want to see the actual used levels. +highly performant. If you want to see only the used levels, you can use the +:func:`MultiIndex.get_level_values` method. .. ipython:: python @@ -196,7 +192,7 @@ highly performant. If you want to see the actual used levels. # for a specific level df[['foo','qux']].columns.get_level_values(0) -To reconstruct the ``MultiIndex`` with only the used levels, the +To reconstruct the ``MultiIndex`` with only the used levels, the ``remove_unused_levels`` method may be used. .. versionadded:: 0.20.0 @@ -231,15 +227,33 @@ Advanced indexing with hierarchical index ----------------------------------------- Syntactically integrating ``MultiIndex`` in advanced indexing with ``.loc`` is a -bit challenging, but we've made every effort to do so. For example the -following works as you would expect: +bit challenging, but we've made every effort to do so. In general, MultiIndex +keys take the form of tuples. For example, the following works as you would expect: .. ipython:: python df = df.T df - df.loc['bar'] - df.loc['bar', 'two'] + df.loc[('bar', 'two'),] + +Note that ``df.loc['bar', 'two']`` would also work in this example, but this shorthand +notation can lead to ambiguity in general. + +If you also want to index a specific column with ``.loc``, you must use a tuple +like this: + +.. ipython:: python + + df.loc[('bar', 'two'), 'A'] + +You don't have to specify all levels of the ``MultiIndex`` by passing only the +first elements of the tuple. For example, you can use "partial" indexing to +get all elements with ``bar`` in the first level as follows: + +df.loc['bar'] + +This is a shortcut for the slightly more verbose notation ``df.loc[('bar',),]`` (equivalent +to ``df.loc['bar',]`` in this example). "Partial" slicing also works quite nicely. @@ -260,6 +274,24 @@ Passing a list of labels or tuples works similar to reindexing: df.loc[[('bar', 'two'), ('qux', 'one')]] +.. info:: + + It is important to note that tuples and lists are not treated identically + in pandas when it comes to indexing. Whereas a tuple is interpreted as one + multi-level key, a list is used to specify several keys. Or in other words, + tuples go horizontally (traversing levels), lists go vertically (scanning levels). + +Importantly, a list of tuples indexes several complete ``MultiIndex`` keys, +whereas a tuple of lists refer to several values within a level: + +.. ipython:: python + + s = pd.Series([1, 2, 3, 4, 5, 6], + index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]])) + s.loc[[("A", "c"), ("B", "d")]] # list of tuples + s.loc[(["A", "B"], ["c", "d"])] # tuple of lists + + .. _advanced.mi_slicers: Using slicers @@ -317,7 +349,7 @@ Basic multi-index slicing using slices, lists, and labels. dfmi.loc[(slice('A1','A3'), slice(None), ['C1', 'C3']), :] -You can use :class:`pandas.IndexSlice` to facilitate a more natural syntax +You can use :class:`pandas.IndexSlice` to facilitate a more natural syntax using ``:``, rather than using ``slice(None)``. .. ipython:: python @@ -626,7 +658,7 @@ Index Types ----------- We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex`` -are shown :ref:`here `, and information about +are shown :ref:`here `, and information about `TimedeltaIndex`` is found :ref:`here `. In the following sub-sections we will highlight some other index types. @@ -671,9 +703,9 @@ The ``CategoricalIndex`` is **preserved** after indexing: df2.loc['a'].index -Sorting the index will sort by the order of the categories (Recall that we -created the index with ``CategoricalDtype(list('cab'))``, so the sorted -order is ``cab``.). +Sorting the index will sort by the order of the categories (recall that we +created the index with ``CategoricalDtype(list('cab'))``, so the sorted +order is ``cab``). .. ipython:: python @@ -726,7 +758,7 @@ Int64Index and RangeIndex Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here `. -``Int64Index`` is a fundamental basic index in pandas. +``Int64Index`` is a fundamental basic index in pandas. This is an Immutable array implementing an ordered, sliceable set. Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects. @@ -765,7 +797,7 @@ The only positional indexing is via ``iloc``. sf.iloc[3] A scalar index that is not found will raise a ``KeyError``. -Slicing is primarily on the values of the index when using ``[],ix,loc``, and +Slicing is primarily on the values of the index when using ``[],ix,loc``, and **always** positional when using ``iloc``. The exception is when the slice is boolean, in which case it will always be positional. From 6a6f89781d66d04f6e0be969febd40cc67cc9fce Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 15 Feb 2018 04:31:11 -0800 Subject: [PATCH 118/217] add missing args, make kwarg explicit (#19691) --- pandas/core/indexes/base.py | 4 ++-- pandas/core/internals.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index be7c1624936bf..81b6b28d3927e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3943,8 +3943,8 @@ def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): def _evaluate_with_datetime_like(self, other, op, opstr): raise TypeError("can only perform ops with datetime like values") - def _evaluate_compare(self, op): - raise base.AbstractMethodError(self) + def _evaluate_compare(self, other, op): + raise com.AbstractMethodError(self) @classmethod def _add_comparison_methods(cls): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f553e1a02c9d6..dd5feefc49fe3 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3569,8 +3569,8 @@ def reduction(self, f, axis=0, consolidate=True, transposed=False, placement=np.arange(len(values)))], axes[0]) - def isna(self, **kwargs): - return self.apply('apply', **kwargs) + def isna(self, func, **kwargs): + return self.apply('apply', func=func, **kwargs) def where(self, **kwargs): return self.apply('where', **kwargs) From 0cde46bfb14f466b1e40a267ba58db60a7e12c38 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 16 Feb 2018 10:21:52 -0800 Subject: [PATCH 119/217] remove usages of _get_na_value (#19692) --- pandas/core/indexes/api.py | 3 +-- pandas/core/indexes/base.py | 8 +------- pandas/core/indexes/multi.py | 4 ++-- pandas/core/reshape/reshape.py | 6 +++--- pandas/core/series.py | 4 +--- 5 files changed, 8 insertions(+), 17 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index b7af533f96ddc..2e5ec8b554ce7 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -2,7 +2,6 @@ _new_Index, _ensure_index, _ensure_index_from_sequences, - _get_na_value, InvalidIndexError) # noqa from pandas.core.indexes.category import CategoricalIndex # noqa from pandas.core.indexes.multi import MultiIndex # noqa @@ -25,7 +24,7 @@ 'InvalidIndexError', 'TimedeltaIndex', 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', - '_ensure_index', '_ensure_index_from_sequences', '_get_na_value', + '_ensure_index', '_ensure_index_from_sequences', '_get_combined_index', '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 81b6b28d3927e..02dd2dbc25703 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2098,7 +2098,7 @@ def asof(self, label): try: loc = self.get_loc(label, method='pad') except KeyError: - return _get_na_value(self.dtype) + return self._na_value else: if isinstance(loc, slice): loc = loc.indices(len(self))[-1] @@ -4316,12 +4316,6 @@ def _ensure_index(index_like, copy=False): return Index(index_like) -def _get_na_value(dtype): - if is_datetime64_any_dtype(dtype) or is_timedelta64_dtype(dtype): - return libts.NaT - return np.nan - - def _ensure_has_len(seq): """If seq is an iterator, put its values into a list.""" try: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 94dbd8b884e47..73f4aee1c4880 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -34,7 +34,7 @@ from pandas.core.indexes.base import ( Index, _ensure_index, - _get_na_value, InvalidIndexError, + InvalidIndexError, _index_shared_docs) from pandas.core.indexes.frozen import ( FrozenNDArray, FrozenList, _ensure_frozen) @@ -804,7 +804,7 @@ def values(self): elif box: taken = algos.take_1d(lev._box_values(lev._ndarray_values), lab, - fill_value=_get_na_value(lev.dtype.type)) + fill_value=lev._na_value) else: taken = algos.take_1d(np.asarray(lev._values), lab) values.append(taken) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c8bca476c65f2..3ef152d091b24 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -29,7 +29,7 @@ import pandas.core.algorithms as algos from pandas._libs import algos as _algos, reshape as _reshape -from pandas.core.index import Index, MultiIndex, _get_na_value +from pandas.core.index import Index, MultiIndex class _Unstacker(object): @@ -260,7 +260,7 @@ def get_new_columns(self): return self.removed_level lev = self.removed_level - return lev.insert(0, _get_na_value(lev.dtype.type)) + return lev.insert(0, lev._na_value) stride = len(self.removed_level) + self.lift width = len(self.value_columns) @@ -299,7 +299,7 @@ def get_new_index(self): if len(self.new_index_levels) == 1: lev, lab = self.new_index_levels[0], result_labels[0] if (lab == -1).any(): - lev = lev.insert(len(lev), _get_na_value(lev.dtype.type)) + lev = lev.insert(len(lev), lev._na_value) return lev.take(lab) return MultiIndex(levels=self.new_index_levels, labels=result_labels, diff --git a/pandas/core/series.py b/pandas/core/series.py index 655eaa5373f5a..90dc14836ab55 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1261,8 +1261,6 @@ def count(self, level=None): ------- nobs : int or Series (if level specified) """ - from pandas.core.index import _get_na_value - if level is None: return notna(com._values_from_object(self)).sum() @@ -1275,7 +1273,7 @@ def count(self, level=None): mask = lab == -1 if mask.any(): lab[mask] = cnt = len(lev) - lev = lev.insert(cnt, _get_na_value(lev.dtype.type)) + lev = lev.insert(cnt, lev._na_value) obs = lab[notna(self.values)] out = np.bincount(obs, minlength=len(lev) or None) From 335314a3213f758a368520c5cc7c876a3ed6b85f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 17 Feb 2018 12:47:18 -0800 Subject: [PATCH 120/217] TST: Parametrize PeriodIndex tests (#19659) * fixup formatting * parametrize PeriodIndex tests * fixup typo * put lists of params at module level * make fixtures * docstrings for fixtures * requested docstring --- .../tests/indexes/period/test_arithmetic.py | 521 ++++++++++-------- pandas/tests/indexes/period/test_astype.py | 99 ++++ pandas/tests/indexes/period/test_ops.py | 36 -- pandas/tests/indexes/period/test_period.py | 60 +- 4 files changed, 394 insertions(+), 322 deletions(-) create mode 100644 pandas/tests/indexes/period/test_astype.py diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py index 81171920f635f..5f8f9533e9c44 100644 --- a/pandas/tests/indexes/period/test_arithmetic.py +++ b/pandas/tests/indexes/period/test_arithmetic.py @@ -11,7 +11,81 @@ import pandas.core.indexes.period as period +_common_mismatch = [pd.offsets.YearBegin(2), + pd.offsets.MonthBegin(1), + pd.offsets.Minute()] + + +@pytest.fixture(params=[timedelta(minutes=30), + np.timedelta64(30, 's'), + Timedelta(seconds=30)] + _common_mismatch) +def not_hourly(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Hourly frequencies. + """ + return request.param + + +@pytest.fixture(params=[np.timedelta64(4, 'h'), + timedelta(hours=23), + Timedelta('23:00:00')] + _common_mismatch) +def not_daily(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Daily frequencies. + """ + return request.param + + +@pytest.fixture(params=[np.timedelta64(365, 'D'), + timedelta(365), + Timedelta(days=365)] + _common_mismatch) +def mismatched(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Monthly or Annual frequencies. + """ + return request.param + + +@pytest.fixture(params=[pd.offsets.Day(3), + timedelta(days=3), + np.timedelta64(3, 'D'), + pd.offsets.Hour(72), + timedelta(minutes=60 * 24 * 3), + np.timedelta64(72, 'h'), + Timedelta('72:00:00')]) +def three_days(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 3-day timedelta + """ + return request.param + + +@pytest.fixture(params=[pd.offsets.Hour(2), + timedelta(hours=2), + np.timedelta64(2, 'h'), + pd.offsets.Minute(120), + timedelta(minutes=120), + np.timedelta64(120, 'm')]) +def two_hours(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 2-hour timedelta + """ + return request.param + + class TestPeriodIndexComparisons(object): + def test_pi_cmp_period(self): + idx = period_range('2007-01', periods=20, freq='M') + + result = idx < idx[10] + exp = idx.values < idx.values[10] + tm.assert_numpy_array_equal(result, exp) + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) def test_pi_cmp_pi(self, freq): base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], @@ -148,32 +222,35 @@ def test_pi_cmp_nat_mismatched_freq_raises(self, freq): idx1 == diff # TODO: De-duplicate with test_pi_cmp_nat - def test_comp_nat(self): + @pytest.mark.parametrize('dtype', [object, None]) + def test_comp_nat(self, dtype): left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT, pd.Period('2011-01-03')]) right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')]) - for lhs, rhs in [(left, right), - (left.astype(object), right.astype(object))]: - result = lhs == rhs - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) + if dtype is not None: + left = left.astype(dtype) + right = right.astype(dtype) - result = lhs != rhs - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) + result = left == right + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = left != right + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(lhs == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == rhs, expected) + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == right, expected) - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(lhs != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != lhs, expected) + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(left != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != left, expected) - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(lhs < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > lhs, expected) + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > left, expected) class TestPeriodIndexArithmetic(object): @@ -203,7 +280,7 @@ def test_pi_radd_offset_array(self): expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')]) tm.assert_index_equal(res, expected) - def test_add_iadd(self): + def test_pi_add_iadd_pi_raises(self): rng = pd.period_range('1/1/2000', freq='D', periods=5) other = pd.period_range('1/6/2000', freq='D', periods=5) @@ -214,89 +291,7 @@ def test_add_iadd(self): with pytest.raises(TypeError): rng += other - # offset - # DateOffset - rng = pd.period_range('2014', '2024', freq='A') - result = rng + pd.offsets.YearEnd(5) - expected = pd.period_range('2019', '2029', freq='A') - tm.assert_index_equal(result, expected) - rng += pd.offsets.YearEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365), Timedelta(days=365)]: - msg = ('Input has different freq(=.+)? ' - 'from PeriodIndex\\(freq=A-DEC\\)') - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng + o - - rng = pd.period_range('2014-01', '2016-12', freq='M') - result = rng + pd.offsets.MonthEnd(5) - expected = pd.period_range('2014-06', '2017-05', freq='M') - tm.assert_index_equal(result, expected) - rng += pd.offsets.MonthEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365), Timedelta(days=365)]: - rng = pd.period_range('2014-01', '2016-12', freq='M') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng + o - - # Tick - offsets = [pd.offsets.Day(3), timedelta(days=3), - np.timedelta64(3, 'D'), pd.offsets.Hour(72), - timedelta(minutes=60 * 24 * 3), np.timedelta64(72, 'h'), - Timedelta('72:00:00')] - for delta in offsets: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - result = rng + delta - expected = pd.period_range('2014-05-04', '2014-05-18', freq='D') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23), Timedelta('23:00:00')]: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng + o - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), pd.offsets.Minute(120), - timedelta(minutes=120), np.timedelta64(120, 'm'), - Timedelta(minutes=120)] - for delta in offsets: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', - freq='H') - result = rng + delta - expected = pd.period_range('2014-01-01 12:00', '2014-01-05 12:00', - freq='H') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - for delta in [pd.offsets.YearBegin(2), timedelta(minutes=30), - np.timedelta64(30, 's'), Timedelta(seconds=30)]: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', - freq='H') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng + delta - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng += delta - - def test_pi_add_int(self, one): + def test_pi_add_iadd_int(self, one): # Variants of `one` for #19012 rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) result = rng + one @@ -305,16 +300,27 @@ def test_pi_add_int(self, one): rng += one tm.assert_index_equal(rng, expected) + def test_pi_sub_isub_int(self, one): + """ + PeriodIndex.__sub__ and __isub__ with several representations of + the integer 1, e.g. int, long, np.int64, np.uint8, ... + """ + rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) + result = rng - one + expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng -= one + tm.assert_index_equal(rng, expected) + @pytest.mark.parametrize('five', [5, np.array(5, dtype=np.int64)]) - def test_sub(self, five): + def test_pi_sub_intlike(self, five): rng = period_range('2007-01', periods=50) result = rng - five exp = rng + (-five) tm.assert_index_equal(result, exp) - def test_sub_isub(self): - + def test_pi_sub_isub_pi_raises(self): # previously performed setop, now raises TypeError (GH14164) # TODO needs to wait on #13077 for decision on result type rng = pd.period_range('1/1/2000', freq='D', periods=5) @@ -326,6 +332,7 @@ def test_sub_isub(self): with pytest.raises(TypeError): rng -= other + def test_pi_sub_isub_offset(self): # offset # DateOffset rng = pd.period_range('2014', '2024', freq='A') @@ -335,102 +342,165 @@ def test_sub_isub(self): rng -= pd.offsets.YearEnd(5) tm.assert_index_equal(rng, expected) - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - rng = pd.period_range('2014', '2024', freq='A') - msg = ('Input has different freq(=.+)? ' - 'from PeriodIndex\\(freq=A-DEC\\)') - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng - o - rng = pd.period_range('2014-01', '2016-12', freq='M') result = rng - pd.offsets.MonthEnd(5) expected = pd.period_range('2013-08', '2016-07', freq='M') tm.assert_index_equal(result, expected) + rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - rng = pd.period_range('2014-01', '2016-12', freq='M') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng - o + # --------------------------------------------------------------- + # Timedelta-like (timedelta, timedelta64, Timedelta, Tick) + # TODO: Some of these are misnomers because of non-Tick DateOffsets + def test_pi_add_iadd_timedeltalike_daily(self, three_days): # Tick - offsets = [pd.offsets.Day(3), timedelta(days=3), - np.timedelta64(3, 'D'), pd.offsets.Hour(72), - timedelta(minutes=60 * 24 * 3), np.timedelta64(72, 'h')] - for delta in offsets: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - result = rng - delta - expected = pd.period_range('2014-04-28', '2014-05-12', freq='D') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng - o - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), pd.offsets.Minute(120), - timedelta(minutes=120), np.timedelta64(120, 'm')] - for delta in offsets: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', - freq='H') - result = rng - delta - expected = pd.period_range('2014-01-01 08:00', '2014-01-05 08:00', - freq='H') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - for delta in [pd.offsets.YearBegin(2), timedelta(minutes=30), - np.timedelta64(30, 's')]: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', - freq='H') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng + delta - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - rng += delta - - # int - rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) - result = rng - 1 - expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10) + other = three_days + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + expected = pd.period_range('2014-05-04', '2014-05-18', freq='D') + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_timedeltalike_daily(self, three_days): + # Tick-like 3 Days + other = three_days + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + expected = pd.period_range('2014-04-28', '2014-05-12', freq='D') + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_pi_add_iadd_timedeltalike_freq_mismatch_daily(self, not_daily): + other = not_daily + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng + other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng += other + + def test_pi_sub_timedeltalike_freq_mismatch_daily(self, not_daily): + other = not_daily + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng - other + + def test_pi_add_iadd_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + expected = pd.period_range('2014-01-01 12:00', '2014-01-05 12:00', + freq='H') + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_pi_add_timedeltalike_mismatched_freq_hourly(self, not_hourly): + other = not_hourly + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng + other + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng += other + + def test_pi_sub_isub_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + expected = pd.period_range('2014-01-01 08:00', '2014-01-05 08:00', + freq='H') + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_add_iadd_timedeltalike_annual(self): + # offset + # DateOffset + rng = pd.period_range('2014', '2024', freq='A') + result = rng + pd.offsets.YearEnd(5) + expected = pd.period_range('2019', '2029', freq='A') + tm.assert_index_equal(result, expected) + rng += pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_iadd_timedeltalike_freq_mismatch_annual(self, mismatched): + other = mismatched + rng = pd.period_range('2014', '2024', freq='A') + msg = ('Input has different freq(=.+)? ' + 'from PeriodIndex\\(freq=A-DEC\\)') + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng + other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng += other + + def test_pi_sub_isub_timedeltalike_freq_mismatch_annual(self, mismatched): + other = mismatched + rng = pd.period_range('2014', '2024', freq='A') + msg = ('Input has different freq(=.+)? ' + 'from PeriodIndex\\(freq=A-DEC\\)') + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng - other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng -= other + + def test_pi_add_iadd_timedeltalike_M(self): + rng = pd.period_range('2014-01', '2016-12', freq='M') + expected = pd.period_range('2014-06', '2017-05', freq='M') + + result = rng + pd.offsets.MonthEnd(5) tm.assert_index_equal(result, expected) - rng -= 1 + + rng += pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) + def test_pi_add_iadd_timedeltalike_freq_mismatch_monthly(self, mismatched): + other = mismatched + rng = pd.period_range('2014-01', '2016-12', freq='M') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng + other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng += other + + def test_pi_sub_isub_timedeltalike_freq_mismatch_monthly(self, mismatched): + other = mismatched + rng = pd.period_range('2014-01', '2016-12', freq='M') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng - other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng -= other + # --------------------------------------------------------------- # PeriodIndex.shift is used by __add__ and __sub__ def test_pi_shift_ndarray(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') result = idx.shift(np.array([1, 2, 3, 4])) - expected = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') + expected = PeriodIndex(['2011-02', '2011-04', 'NaT', '2011-08'], + freq='M', name='idx') tm.assert_index_equal(result, expected) - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') result = idx.shift(np.array([1, -2, 3, -4])) - expected = PeriodIndex(['2011-02', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') + expected = PeriodIndex(['2011-02', '2010-12', 'NaT', '2010-12'], + freq='M', name='idx') tm.assert_index_equal(result, expected) def test_shift(self): @@ -489,11 +559,11 @@ def test_shift_corner_cases(self): tm.assert_index_equal(idx.shift(-3), exp) def test_shift_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') result = idx.shift(1) - expected = PeriodIndex(['2011-02', '2011-03', 'NaT', - '2011-05'], freq='M', name='idx') + expected = PeriodIndex(['2011-02', '2011-03', 'NaT', '2011-05'], + freq='M', name='idx') tm.assert_index_equal(result, expected) assert result.name == expected.name @@ -519,18 +589,18 @@ def _check(self, values, func, expected): # comp op results in bool tm.assert_numpy_array_equal(result, expected) - s = pd.Series(values) - result = func(s) + ser = pd.Series(values) + result = func(ser) exp = pd.Series(expected, name=values.name) tm.assert_series_equal(result, exp) def test_pi_ops(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq='M', name='idx') - expected = PeriodIndex(['2011-03', '2011-04', - '2011-05', '2011-06'], freq='M', name='idx') + expected = PeriodIndex(['2011-03', '2011-04', '2011-05', '2011-06'], + freq='M', name='idx') self._check(idx, lambda x: x + 2, expected) self._check(idx, lambda x: 2 + x, expected) @@ -544,13 +614,13 @@ def test_pi_ops(self): tm.assert_index_equal(result, exp) def test_pi_ops_errors(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - s = pd.Series(idx) + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq='M', name='idx') + ser = pd.Series(idx) msg = r"unsupported operand type\(s\)" - for obj in [idx, s]: + for obj in [idx, ser]: for ng in ["str", 1.5]: with tm.assert_raises_regex(TypeError, msg): obj + ng @@ -581,10 +651,10 @@ def test_pi_ops_errors(self): np.subtract(ng, obj) def test_pi_ops_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - expected = PeriodIndex(['2011-03', '2011-04', - 'NaT', '2011-06'], freq='M', name='idx') + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') + expected = PeriodIndex(['2011-03', '2011-04', 'NaT', '2011-06'], + freq='M', name='idx') self._check(idx, lambda x: x + 2, expected) self._check(idx, lambda x: 2 + x, expected) self._check(idx, lambda x: np.add(x, 2), expected) @@ -593,10 +663,10 @@ def test_pi_ops_nat(self): self._check(idx + 2, lambda x: np.subtract(x, 2), idx) # freq with mult - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='2M', name='idx') - expected = PeriodIndex(['2011-07', '2011-08', - 'NaT', '2011-10'], freq='2M', name='idx') + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='2M', name='idx') + expected = PeriodIndex(['2011-07', '2011-08', 'NaT', '2011-10'], + freq='2M', name='idx') self._check(idx, lambda x: x + 3, expected) self._check(idx, lambda x: 3 + x, expected) self._check(idx, lambda x: np.add(x, 3), expected) @@ -605,26 +675,26 @@ def test_pi_ops_nat(self): self._check(idx + 3, lambda x: np.subtract(x, 3), idx) def test_pi_ops_array_int(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') f = lambda x: x + np.array([1, 2, 3, 4]) - exp = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') + exp = PeriodIndex(['2011-02', '2011-04', 'NaT', '2011-08'], + freq='M', name='idx') self._check(idx, f, exp) f = lambda x: np.add(x, np.array([4, -1, 1, 2])) - exp = PeriodIndex(['2011-05', '2011-01', 'NaT', - '2011-06'], freq='M', name='idx') + exp = PeriodIndex(['2011-05', '2011-01', 'NaT', '2011-06'], + freq='M', name='idx') self._check(idx, f, exp) f = lambda x: x - np.array([1, 2, 3, 4]) - exp = PeriodIndex(['2010-12', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') + exp = PeriodIndex(['2010-12', '2010-12', 'NaT', '2010-12'], + freq='M', name='idx') self._check(idx, f, exp) f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) - exp = PeriodIndex(['2010-10', '2010-12', 'NaT', - '2011-06'], freq='M', name='idx') + exp = PeriodIndex(['2010-10', '2010-12', 'NaT', '2011-06'], + freq='M', name='idx') self._check(idx, f, exp) def test_pi_ops_offset(self): @@ -648,29 +718,26 @@ def test_pi_ops_offset(self): def test_pi_offset_errors(self): idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01'], freq='D', name='idx') - s = pd.Series(idx) + ser = pd.Series(idx) # Series op is applied per Period instance, thus error is raised # from Period msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" msg_s = r"Input cannot be converted to Period\(freq=D\)" - for obj, msg in [(idx, msg_idx), (s, msg_s)]: - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): + for obj, msg in [(idx, msg_idx), (ser, msg_s)]: + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): obj + pd.offsets.Hour(2) - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): pd.offsets.Hour(2) + obj - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): obj - pd.offsets.Hour(2) def test_pi_sub_period(self): # GH 13071 - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq='M', name='idx') result = idx - pd.Period('2012-01', freq='M') exp = pd.Index([-12, -11, -10, -9], name='idx') @@ -695,16 +762,16 @@ def test_pi_sub_period(self): def test_pi_sub_pdnat(self): # GH 13071 - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx') tm.assert_index_equal(pd.NaT - idx, exp) tm.assert_index_equal(idx - pd.NaT, exp) def test_pi_sub_period_nat(self): # GH 13071 - idx = PeriodIndex(['2011-01', 'NaT', '2011-03', - '2011-04'], freq='M', name='idx') + idx = PeriodIndex(['2011-01', 'NaT', '2011-03', '2011-04'], + freq='M', name='idx') result = idx - pd.Period('2012-01', freq='M') exp = pd.Index([-12, np.nan, -10, -9], name='idx') diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py new file mode 100644 index 0000000000000..f2126487496c4 --- /dev/null +++ b/pandas/tests/indexes/period/test_astype.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas import NaT, Period, PeriodIndex, Int64Index, Index, period_range + + +class TestPeriodIndexAsType(object): + @pytest.mark.parametrize('dtype', [ + float, 'timedelta64', 'timedelta64[ns]']) + def test_astype_raises(self, dtype): + # GH#13149, GH#13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + msg = 'Cannot cast PeriodIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + idx.astype(dtype) + + def test_astype_conversion(self): + # GH#13149, GH#13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + + result = idx.astype(object) + expected = Index([Period('2016-05-16', freq='D')] + + [Period(NaT, freq='D')] * 3, dtype='object') + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([16937] + [-9223372036854775808] * 3, + dtype=np.int64) + tm.assert_index_equal(result, expected) + + result = idx.astype(str) + expected = Index(str(x) for x in idx) + tm.assert_index_equal(result, expected) + + idx = period_range('1990', '2009', freq='A') + result = idx.astype('i8') + tm.assert_index_equal(result, Index(idx.asi8)) + tm.assert_numpy_array_equal(result.values, idx.asi8) + + def test_astype_object(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], + dtype=object) + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + # TODO: de-duplicate this version (from test_ops) with the one above + # (from test_period) + def test_astype_object2(self): + idx = pd.period_range(start='2013-01-01', periods=4, freq='M', + name='idx') + expected_list = [pd.Period('2013-01-31', freq='M'), + pd.Period('2013-02-28', freq='M'), + pd.Period('2013-03-31', freq='M'), + pd.Period('2013-04-30', freq='M')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert idx.tolist() == expected_list + + idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', + '2013-01-04'], freq='D', name='idx') + expected_list = [pd.Period('2013-01-01', freq='D'), + pd.Period('2013-01-02', freq='D'), + pd.Period('NaT', freq='D'), + pd.Period('2013-01-04', freq='D')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + for i in [0, 1, 3]: + assert result[i] == expected[i] + assert result[2] is pd.NaT + assert result.name == expected.name + + result_list = idx.tolist() + for i in [0, 1, 3]: + assert result_list[i] == expected_list[i] + assert result_list[2] is pd.NaT diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 8745de0c2a7aa..6c272864e0026 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -25,42 +25,6 @@ def test_ops_properties(self): self.check_ops_properties(PeriodIndex._object_ops, f) self.check_ops_properties(PeriodIndex._bool_ops, f) - def test_astype_object(self): - idx = pd.period_range(start='2013-01-01', periods=4, freq='M', - name='idx') - expected_list = [pd.Period('2013-01-31', freq='M'), - pd.Period('2013-02-28', freq='M'), - pd.Period('2013-03-31', freq='M'), - pd.Period('2013-04-30', freq='M')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.astype(object) - assert isinstance(result, Index) - assert result.dtype == object - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert idx.tolist() == expected_list - - idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', - '2013-01-04'], freq='D', name='idx') - expected_list = [pd.Period('2013-01-01', freq='D'), - pd.Period('2013-01-02', freq='D'), - pd.Period('NaT', freq='D'), - pd.Period('2013-01-04', freq='D')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.astype(object) - assert isinstance(result, Index) - assert result.dtype == object - tm.assert_index_equal(result, expected) - for i in [0, 1, 3]: - assert result[i] == expected[i] - assert result[2] is pd.NaT - assert result.name == expected.name - - result_list = idx.tolist() - for i in [0, 1, 3]: - assert result_list[i] == expected_list[i] - assert result_list[2] is pd.NaT - def test_minmax(self): # monotonic diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index b3f059018493c..4c0c865928031 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -6,7 +6,7 @@ import pandas.util._test_decorators as td from pandas.util import testing as tm from pandas import (PeriodIndex, period_range, notna, DatetimeIndex, NaT, - Index, Period, Int64Index, Series, DataFrame, date_range, + Index, Period, Series, DataFrame, date_range, offsets) from ..datetimelike import DatetimeLike @@ -24,38 +24,6 @@ def setup_method(self, method): def create_index(self): return period_range('20130101', periods=5, freq='D') - def test_astype_conversion(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - result = idx.astype(object) - expected = Index([Period('2016-05-16', freq='D')] + - [Period(NaT, freq='D')] * 3, dtype='object') - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([16937] + [-9223372036854775808] * 3, - dtype=np.int64) - tm.assert_index_equal(result, expected) - - result = idx.astype(str) - expected = Index(str(x) for x in idx) - tm.assert_index_equal(result, expected) - - idx = period_range('1990', '2009', freq='A') - result = idx.astype('i8') - tm.assert_index_equal(result, Index(idx.asi8)) - tm.assert_numpy_array_equal(result.values, idx.asi8) - - @pytest.mark.parametrize('dtype', [ - float, 'timedelta64', 'timedelta64[ns]']) - def test_astype_raises(self, dtype): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - msg = 'Cannot cast PeriodIndex to dtype' - with tm.assert_raises_regex(TypeError, msg): - idx.astype(dtype) - def test_pickle_compat_construction(self): pass @@ -384,25 +352,6 @@ def test_factorize(self): tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) - def test_astype_object(self): - idx = pd.PeriodIndex([], freq='M') - - exp = np.array([], dtype=object) - tm.assert_numpy_array_equal(idx.astype(object).values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - - exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) - tm.assert_numpy_array_equal(idx.astype(object).values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], - dtype=object) - idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') - tm.assert_numpy_array_equal(idx.astype(object).values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - def test_is_(self): create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') @@ -421,13 +370,6 @@ def test_is_(self): assert not index.is_(index - 2) assert not index.is_(index - 0) - def test_comp_period(self): - idx = period_range('2007-01', periods=20, freq='M') - - result = idx < idx[10] - exp = idx.values < idx.values[10] - tm.assert_numpy_array_equal(result, exp) - def test_contains(self): rng = period_range('2007-01', freq='M', periods=10) From 6173edfe1b25c61e7a2e6019f06f1362c55dbd64 Mon Sep 17 00:00:00 2001 From: Tommy <10076072+tommyod@users.noreply.github.com> Date: Sun, 18 Feb 2018 14:38:16 +0100 Subject: [PATCH 121/217] DOC: Updated tutorials with additional info, new version and added some video tutorials (#19748) --- doc/source/tutorials.rst | 51 +++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index 710212bc237cd..db9385519bff2 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -9,52 +9,52 @@ This is a guide to many pandas tutorials, geared mainly for new users. Internal Guides --------------- -pandas own :ref:`10 Minutes to pandas<10min>` +pandas' own :ref:`10 Minutes to pandas<10min>`. -More complex recipes are in the :ref:`Cookbook` +More complex recipes are in the :ref:`Cookbook`. pandas Cookbook --------------- -The goal of this cookbook (by `Julia Evans `_) is to +The goal of this 2015 cookbook (by `Julia Evans `_) is to give you some concrete examples for getting started with pandas. These are examples with real-world data, and all the bugs and weirdness that entails. -Here are links to the v0.1 release. For an up-to-date table of contents, see the `pandas-cookbook GitHub +Here are links to the v0.2 release. For an up-to-date table of contents, see the `pandas-cookbook GitHub repository `_. To run the examples in this tutorial, you'll need to clone the GitHub repository and get IPython Notebook running. See `How to use this cookbook `_. -- `A quick tour of the IPython Notebook: `_ +- `A quick tour of the IPython Notebook: `_ Shows off IPython's awesome tab completion and magic functions. -- `Chapter 1: `_ +- `Chapter 1: `_ Reading your data into pandas is pretty much the easiest thing. Even when the encoding is wrong! -- `Chapter 2: `_ +- `Chapter 2: `_ It's not totally obvious how to select data from a pandas dataframe. Here we explain the basics (how to take slices and get columns) -- `Chapter 3: `_ +- `Chapter 3: `_ Here we get into serious slicing and dicing and learn how to filter dataframes in complicated ways, really fast. -- `Chapter 4: `_ +- `Chapter 4: `_ Groupby/aggregate is seriously my favorite thing about pandas and I use it all the time. You should probably read this. -- `Chapter 5: `_ +- `Chapter 5: `_ Here you get to find out if it's cold in Montreal in the winter (spoiler: yes). Web scraping with pandas is fun! Here we combine dataframes. -- `Chapter 6: `_ +- `Chapter 6: `_ Strings with pandas are great. It has all these vectorized string operations and they're the best. We will turn a bunch of strings containing "Snow" into vectors of numbers in a trice. -- `Chapter 7: `_ +- `Chapter 7: `_ Cleaning up messy data is never a joy, but with pandas it's easier. -- `Chapter 8: `_ +- `Chapter 8: `_ Parsing Unix timestamps is confusing at first but it turns out to be really easy. -Lessons for New pandas Users +Lessons for new pandas users ---------------------------- For more resources, please visit the main `repository `__. @@ -125,7 +125,7 @@ There are four sections covering selected topics as follows: .. _tutorial-exercises-new-users: -Exercises for New Users +Exercises for new users ----------------------- Practice your skills with real data sets and exercises. For more resources, please visit the main `repository `__. @@ -152,9 +152,14 @@ For more resources, please visit the main `repository `_. +The source may be found in the GitHub repository +`TomAugspurger/effective-pandas `_. + - `Modern Pandas `_ - `Method Chaining `_ - `Indexes `_ @@ -168,6 +173,20 @@ Excel charts with pandas, vincent and xlsxwriter - `Using Pandas and XlsxWriter to create Excel charts `_ +Video Tutorials +--------------- + +- `Pandas From The Ground Up `_ + (2015) (2:24) + `GitHub repo `_ +- `Introduction Into Pandas `_ + (2016) (1:28) + `GitHub repo `_ +- `Pandas: .head() to .tail() `_ + (2016) (1:26) + `GitHub repo `_ + + Various Tutorials ----------------- From 383f7ead2a9b1b411efeae2c1c79e4f53a7e4a6a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 18 Feb 2018 08:09:44 -0800 Subject: [PATCH 122/217] collect index formatting tests (#19661) --- .../tests/indexes/datetimes/test_formats.py | 175 +++++++++++++++++- pandas/tests/indexes/datetimes/test_misc.py | 10 - pandas/tests/indexes/datetimes/test_ops.py | 155 ---------------- pandas/tests/indexes/period/test_formats.py | 161 ++++++++++++++++ pandas/tests/indexes/period/test_indexing.py | 3 - pandas/tests/indexes/period/test_ops.py | 158 ---------------- .../tests/indexes/timedeltas/test_formats.py | 96 ++++++++++ pandas/tests/indexes/timedeltas/test_ops.py | 88 --------- 8 files changed, 431 insertions(+), 415 deletions(-) create mode 100644 pandas/tests/indexes/timedeltas/test_formats.py diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py index ea2731f66f0ef..0d1a9e65ce6c6 100644 --- a/pandas/tests/indexes/datetimes/test_formats.py +++ b/pandas/tests/indexes/datetimes/test_formats.py @@ -1,6 +1,10 @@ -from pandas import DatetimeIndex +from datetime import datetime +from pandas import DatetimeIndex, Series import numpy as np +import dateutil.tz +import pytz +import pytest import pandas.util.testing as tm import pandas as pd @@ -45,3 +49,172 @@ def test_to_native_types(): result = index.to_native_types(na_rep='pandas') tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeIndexRendering(object): + def test_dti_repr_short(self): + dr = pd.date_range(start='1/1/2012', periods=1) + repr(dr) + + dr = pd.date_range(start='1/1/2012', periods=2) + repr(dr) + + dr = pd.date_range(start='1/1/2012', periods=3) + repr(dr) + + @pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__']) + def test_dti_representation(self, method): + idxs = [] + idxs.append(DatetimeIndex([], freq='D')) + idxs.append(DatetimeIndex(['2011-01-01'], freq='D')) + idxs.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) + idxs.append(DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + freq='D')) + idxs.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' + ], freq='H', tz='Asia/Tokyo')) + idxs.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) + idxs.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) + + exp = [] + exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") + exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " + "freq='D')") + exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " + "dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " + "dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " + "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" + ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") + exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " + "'2011-01-01 10:00:00-05:00', 'NaT'], " + "dtype='datetime64[ns, US/Eastern]', freq=None)") + exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " + "'2011-01-01 10:00:00+00:00', 'NaT'], " + "dtype='datetime64[ns, UTC]', freq=None)""") + + with pd.option_context('display.width', 300): + for indx, expected in zip(idxs, exp): + result = getattr(indx, method)() + assert result == expected + + def test_dti_representation_to_series(self): + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) + + exp1 = """Series([], dtype: datetime64[ns])""" + + exp2 = ("0 2011-01-01\n" + "dtype: datetime64[ns]") + + exp3 = ("0 2011-01-01\n" + "1 2011-01-02\n" + "dtype: datetime64[ns]") + + exp4 = ("0 2011-01-01\n" + "1 2011-01-02\n" + "2 2011-01-03\n" + "dtype: datetime64[ns]") + + exp5 = ("0 2011-01-01 09:00:00+09:00\n" + "1 2011-01-01 10:00:00+09:00\n" + "2 2011-01-01 11:00:00+09:00\n" + "dtype: datetime64[ns, Asia/Tokyo]") + + exp6 = ("0 2011-01-01 09:00:00-05:00\n" + "1 2011-01-01 10:00:00-05:00\n" + "2 NaT\n" + "dtype: datetime64[ns, US/Eastern]") + + exp7 = ("0 2011-01-01 09:00:00\n" + "1 2011-01-02 10:15:00\n" + "dtype: datetime64[ns]") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, + idx5, idx6, idx7], + [exp1, exp2, exp3, exp4, + exp5, exp6, exp7]): + result = repr(Series(idx)) + assert result == expected + + def test_dti_summary(self): + # GH#9116 + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], + freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + + exp1 = ("DatetimeIndex: 0 entries\n" + "Freq: D") + + exp2 = ("DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\n" + "Freq: D") + + exp3 = ("DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\n" + "Freq: D") + + exp4 = ("DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\n" + "Freq: D") + + exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " + "to 2011-01-01 11:00:00+09:00\n" + "Freq: H") + + exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], + [exp1, exp2, exp3, exp4, exp5, exp6]): + result = idx.summary() + assert result == expected + + def test_dti_business_repr(self): + # only really care that it works + repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1))) + + def test_dti_business_summary(self): + rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1)) + rng.summary() + rng[2:2].summary() + + def test_dti_business_summary_pytz(self): + pd.bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_dti_business_summary_dateutil(self): + pd.bdate_range('1/1/2005', '1/1/2009', + tz=dateutil.tz.tzutc()).summary() + + def test_dti_custom_business_repr(self): + # only really care that it works + repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), + freq='C')) + + def test_dti_custom_business_summary(self): + rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), + freq='C') + rng.summary() + rng[2:2].summary() + + def test_dti_custom_business_summary_pytz(self): + pd.bdate_range('1/1/2005', '1/1/2009', freq='C', tz=pytz.utc).summary() + + def test_dti_custom_business_summary_dateutil(self): + pd.bdate_range('1/1/2005', '1/1/2009', freq='C', + tz=dateutil.tz.tzutc()).summary() diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 4a46c3b04bbad..2013b5e6cd6dd 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -83,16 +83,6 @@ def test_range_edges(self): '1970-01-03', '1970-01-04']) tm.assert_index_equal(idx, exp) - def test_datetimeindex_repr_short(self): - dr = date_range(start='1/1/2012', periods=1) - repr(dr) - - dr = date_range(start='1/1/2012', periods=2) - repr(dr) - - dr = date_range(start='1/1/2012', periods=3) - repr(dr) - class TestDatetime64(object): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index bc43b427fe0aa..b42cd454803b8 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1,6 +1,4 @@ -import pytz import pytest -import dateutil import warnings import numpy as np from datetime import datetime @@ -153,130 +151,6 @@ def test_repeat(self): tm.assert_raises_regex(ValueError, msg, np.repeat, rng, reps, axis=1) - def test_representation(self): - - idx = [] - idx.append(DatetimeIndex([], freq='D')) - idx.append(DatetimeIndex(['2011-01-01'], freq='D')) - idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) - idx.append(DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' - ], freq='H', tz='Asia/Tokyo')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) - - exp = [] - exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") - exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " - "freq='D')") - exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " - "dtype='datetime64[ns]', freq='D')") - exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " - "dtype='datetime64[ns]', freq='D')") - exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " - "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" - ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") - exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " - "'2011-01-01 10:00:00-05:00', 'NaT'], " - "dtype='datetime64[ns, US/Eastern]', freq=None)") - exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " - "'2011-01-01 10:00:00+00:00', 'NaT'], " - "dtype='datetime64[ns, UTC]', freq=None)""") - - with pd.option_context('display.width', 300): - for indx, expected in zip(idx, exp): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(indx, func)() - assert result == expected - - def test_representation_to_series(self): - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) - - exp1 = """Series([], dtype: datetime64[ns])""" - - exp2 = ("0 2011-01-01\n" - "dtype: datetime64[ns]") - - exp3 = ("0 2011-01-01\n" - "1 2011-01-02\n" - "dtype: datetime64[ns]") - - exp4 = ("0 2011-01-01\n" - "1 2011-01-02\n" - "2 2011-01-03\n" - "dtype: datetime64[ns]") - - exp5 = ("0 2011-01-01 09:00:00+09:00\n" - "1 2011-01-01 10:00:00+09:00\n" - "2 2011-01-01 11:00:00+09:00\n" - "dtype: datetime64[ns, Asia/Tokyo]") - - exp6 = ("0 2011-01-01 09:00:00-05:00\n" - "1 2011-01-01 10:00:00-05:00\n" - "2 NaT\n" - "dtype: datetime64[ns, US/Eastern]") - - exp7 = ("0 2011-01-01 09:00:00\n" - "1 2011-01-02 10:15:00\n" - "dtype: datetime64[ns]") - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, - idx5, idx6, idx7], - [exp1, exp2, exp3, exp4, - exp5, exp6, exp7]): - result = repr(Series(idx)) - assert result == expected - - def test_summary(self): - # GH9116 - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], - freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - - exp1 = ("DatetimeIndex: 0 entries\n" - "Freq: D") - - exp2 = ("DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\n" - "Freq: D") - - exp3 = ("DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\n" - "Freq: D") - - exp4 = ("DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\n" - "Freq: D") - - exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " - "to 2011-01-01 11:00:00+09:00\n" - "Freq: H") - - exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], - [exp1, exp2, exp3, exp4, exp5, exp6]): - result = idx.summary() - assert result == expected - def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], @@ -544,10 +418,6 @@ def test_copy(self): repr(cp) tm.assert_index_equal(cp, self.rng) - def test_repr(self): - # only really care that it works - repr(self.rng) - def test_shift(self): shifted = self.rng.shift(5) assert shifted[0] == self.rng[5] @@ -565,16 +435,6 @@ def test_shift(self): shifted = rng.shift(1, freq=BDay()) assert shifted[0] == rng[0] + BDay() - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - def test_equals(self): assert not self.rng.equals(list(self.rng)) @@ -612,10 +472,6 @@ def test_copy(self): repr(cp) tm.assert_index_equal(cp, self.rng) - def test_repr(self): - # only really care that it works - repr(self.rng) - def test_shift(self): shifted = self.rng.shift(5) @@ -640,16 +496,5 @@ def test_pickle_unpickle(self): unpickled = tm.round_trip_pickle(self.rng) assert unpickled.offset is not None - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - bdate_range('1/1/2005', '1/1/2009', freq='C', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - bdate_range('1/1/2005', '1/1/2009', freq='C', - tz=dateutil.tz.tzutc()).summary() - def test_equals(self): assert not self.rng.equals(list(self.rng)) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py index 533481ce051f7..b1a1060bf86c4 100644 --- a/pandas/tests/indexes/period/test_formats.py +++ b/pandas/tests/indexes/period/test_formats.py @@ -1,6 +1,7 @@ from pandas import PeriodIndex import numpy as np +import pytest import pandas.util.testing as tm import pandas as pd @@ -46,3 +47,163 @@ def test_to_native_types(): result = index.to_native_types(na_rep='pandas') tm.assert_numpy_array_equal(result, expected) + + +class TestPeriodIndexRendering(object): + @pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__']) + def test_representation(self, method): + # GH#7601 + idx1 = PeriodIndex([], freq='D') + idx2 = PeriodIndex(['2011-01-01'], freq='D') + idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + freq='D') + idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') + idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], + freq='H') + idx7 = pd.period_range('2013Q1', periods=1, freq="Q") + idx8 = pd.period_range('2013Q1', periods=2, freq="Q") + idx9 = pd.period_range('2013Q1', periods=3, freq="Q") + idx10 = PeriodIndex(['2011-01-01', '2011-02-01'], freq='3D') + + exp1 = """PeriodIndex([], dtype='period[D]', freq='D')""" + + exp2 = """PeriodIndex(['2011-01-01'], dtype='period[D]', freq='D')""" + + exp3 = ("PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]', " + "freq='D')") + + exp4 = ("PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " + "dtype='period[D]', freq='D')") + + exp5 = ("PeriodIndex(['2011', '2012', '2013'], dtype='period[A-DEC]', " + "freq='A-DEC')") + + exp6 = ("PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], " + "dtype='period[H]', freq='H')") + + exp7 = ("PeriodIndex(['2013Q1'], dtype='period[Q-DEC]', " + "freq='Q-DEC')") + + exp8 = ("PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]', " + "freq='Q-DEC')") + + exp9 = ("PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], " + "dtype='period[Q-DEC]', freq='Q-DEC')") + + exp10 = ("PeriodIndex(['2011-01-01', '2011-02-01'], " + "dtype='period[3D]', freq='3D')") + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, + idx6, idx7, idx8, idx9, idx10], + [exp1, exp2, exp3, exp4, exp5, + exp6, exp7, exp8, exp9, exp10]): + result = getattr(idx, method)() + assert result == expected + + def test_representation_to_series(self): + # GH#10971 + idx1 = PeriodIndex([], freq='D') + idx2 = PeriodIndex(['2011-01-01'], freq='D') + idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + freq='D') + idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') + idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], + freq='H') + + idx7 = pd.period_range('2013Q1', periods=1, freq="Q") + idx8 = pd.period_range('2013Q1', periods=2, freq="Q") + idx9 = pd.period_range('2013Q1', periods=3, freq="Q") + + exp1 = """Series([], dtype: object)""" + + exp2 = """0 2011-01-01 +dtype: object""" + + exp3 = """0 2011-01-01 +1 2011-01-02 +dtype: object""" + + exp4 = """0 2011-01-01 +1 2011-01-02 +2 2011-01-03 +dtype: object""" + + exp5 = """0 2011 +1 2012 +2 2013 +dtype: object""" + + exp6 = """0 2011-01-01 09:00 +1 2012-02-01 10:00 +2 NaT +dtype: object""" + + exp7 = """0 2013Q1 +dtype: object""" + + exp8 = """0 2013Q1 +1 2013Q2 +dtype: object""" + + exp9 = """0 2013Q1 +1 2013Q2 +2 2013Q3 +dtype: object""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, + idx6, idx7, idx8, idx9], + [exp1, exp2, exp3, exp4, exp5, + exp6, exp7, exp8, exp9]): + result = repr(pd.Series(idx)) + assert result == expected + + def test_summary(self): + # GH#9116 + idx1 = PeriodIndex([], freq='D') + idx2 = PeriodIndex(['2011-01-01'], freq='D') + idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + freq='D') + idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') + idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], + freq='H') + + idx7 = pd.period_range('2013Q1', periods=1, freq="Q") + idx8 = pd.period_range('2013Q1', periods=2, freq="Q") + idx9 = pd.period_range('2013Q1', periods=3, freq="Q") + + exp1 = """PeriodIndex: 0 entries +Freq: D""" + + exp2 = """PeriodIndex: 1 entries, 2011-01-01 to 2011-01-01 +Freq: D""" + + exp3 = """PeriodIndex: 2 entries, 2011-01-01 to 2011-01-02 +Freq: D""" + + exp4 = """PeriodIndex: 3 entries, 2011-01-01 to 2011-01-03 +Freq: D""" + + exp5 = """PeriodIndex: 3 entries, 2011 to 2013 +Freq: A-DEC""" + + exp6 = """PeriodIndex: 3 entries, 2011-01-01 09:00 to NaT +Freq: H""" + + exp7 = """PeriodIndex: 1 entries, 2013Q1 to 2013Q1 +Freq: Q-DEC""" + + exp8 = """PeriodIndex: 2 entries, 2013Q1 to 2013Q2 +Freq: Q-DEC""" + + exp9 = """PeriodIndex: 3 entries, 2013Q1 to 2013Q3 +Freq: Q-DEC""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, + idx6, idx7, idx8, idx9], + [exp1, exp2, exp3, exp4, exp5, + exp6, exp7, exp8, exp9]): + result = idx.summary() + assert result == expected diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 6cb4226dffc5a..b913934195260 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -14,9 +14,6 @@ class TestGetItem(object): - def setup_method(self, method): - pass - def test_getitem(self): idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx') diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 6c272864e0026..3b6641bc7ad5c 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -79,164 +79,6 @@ def test_numpy_minmax(self): tm.assert_raises_regex( ValueError, errmsg, np.argmax, pr, out=0) - def test_representation(self): - # GH 7601 - idx1 = PeriodIndex([], freq='D') - idx2 = PeriodIndex(['2011-01-01'], freq='D') - idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], - freq='D') - idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') - idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', - 'NaT'], freq='H') - idx7 = pd.period_range('2013Q1', periods=1, freq="Q") - idx8 = pd.period_range('2013Q1', periods=2, freq="Q") - idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - idx10 = PeriodIndex(['2011-01-01', '2011-02-01'], freq='3D') - - exp1 = """PeriodIndex([], dtype='period[D]', freq='D')""" - - exp2 = """PeriodIndex(['2011-01-01'], dtype='period[D]', freq='D')""" - - exp3 = ("PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]', " - "freq='D')") - - exp4 = ("PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " - "dtype='period[D]', freq='D')") - - exp5 = ("PeriodIndex(['2011', '2012', '2013'], dtype='period[A-DEC]', " - "freq='A-DEC')") - - exp6 = ("PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], " - "dtype='period[H]', freq='H')") - - exp7 = ("PeriodIndex(['2013Q1'], dtype='period[Q-DEC]', " - "freq='Q-DEC')") - - exp8 = ("PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]', " - "freq='Q-DEC')") - - exp9 = ("PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], " - "dtype='period[Q-DEC]', freq='Q-DEC')") - - exp10 = ("PeriodIndex(['2011-01-01', '2011-02-01'], " - "dtype='period[3D]', freq='3D')") - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, - idx6, idx7, idx8, idx9, idx10], - [exp1, exp2, exp3, exp4, exp5, - exp6, exp7, exp8, exp9, exp10]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - assert result == expected - - def test_representation_to_series(self): - # GH 10971 - idx1 = PeriodIndex([], freq='D') - idx2 = PeriodIndex(['2011-01-01'], freq='D') - idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = PeriodIndex(['2011-01-01', '2011-01-02', - '2011-01-03'], freq='D') - idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') - idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', - 'NaT'], freq='H') - - idx7 = pd.period_range('2013Q1', periods=1, freq="Q") - idx8 = pd.period_range('2013Q1', periods=2, freq="Q") - idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - - exp1 = """Series([], dtype: object)""" - - exp2 = """0 2011-01-01 -dtype: object""" - - exp3 = """0 2011-01-01 -1 2011-01-02 -dtype: object""" - - exp4 = """0 2011-01-01 -1 2011-01-02 -2 2011-01-03 -dtype: object""" - - exp5 = """0 2011 -1 2012 -2 2013 -dtype: object""" - - exp6 = """0 2011-01-01 09:00 -1 2012-02-01 10:00 -2 NaT -dtype: object""" - - exp7 = """0 2013Q1 -dtype: object""" - - exp8 = """0 2013Q1 -1 2013Q2 -dtype: object""" - - exp9 = """0 2013Q1 -1 2013Q2 -2 2013Q3 -dtype: object""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, - idx6, idx7, idx8, idx9], - [exp1, exp2, exp3, exp4, exp5, - exp6, exp7, exp8, exp9]): - result = repr(pd.Series(idx)) - assert result == expected - - def test_summary(self): - # GH9116 - idx1 = PeriodIndex([], freq='D') - idx2 = PeriodIndex(['2011-01-01'], freq='D') - idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = PeriodIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') - idx6 = PeriodIndex( - ['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], freq='H') - - idx7 = pd.period_range('2013Q1', periods=1, freq="Q") - idx8 = pd.period_range('2013Q1', periods=2, freq="Q") - idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - - exp1 = """PeriodIndex: 0 entries -Freq: D""" - - exp2 = """PeriodIndex: 1 entries, 2011-01-01 to 2011-01-01 -Freq: D""" - - exp3 = """PeriodIndex: 2 entries, 2011-01-01 to 2011-01-02 -Freq: D""" - - exp4 = """PeriodIndex: 3 entries, 2011-01-01 to 2011-01-03 -Freq: D""" - - exp5 = """PeriodIndex: 3 entries, 2011 to 2013 -Freq: A-DEC""" - - exp6 = """PeriodIndex: 3 entries, 2011-01-01 09:00 to NaT -Freq: H""" - - exp7 = """PeriodIndex: 1 entries, 2013Q1 to 2013Q1 -Freq: Q-DEC""" - - exp8 = """PeriodIndex: 2 entries, 2013Q1 to 2013Q2 -Freq: Q-DEC""" - - exp9 = """PeriodIndex: 3 entries, 2013Q1 to 2013Q3 -Freq: Q-DEC""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, - idx6, idx7, idx8, idx9], - [exp1, exp2, exp3, exp4, exp5, - exp6, exp7, exp8, exp9]): - result = idx.summary() - assert result == expected - def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py new file mode 100644 index 0000000000000..a8375459d74e4 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +import pytest + +import pandas as pd +from pandas import TimedeltaIndex + + +class TestTimedeltaIndexRendering(object): + @pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__']) + def test_representation(self, method): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" + + exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " + "freq='D')") + + exp3 = ("TimedeltaIndex(['1 days', '2 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " + "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = getattr(idx, method)() + assert result == expected + + def test_representation_to_series(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """Series([], dtype: timedelta64[ns])""" + + exp2 = ("0 1 days\n" + "dtype: timedelta64[ns]") + + exp3 = ("0 1 days\n" + "1 2 days\n" + "dtype: timedelta64[ns]") + + exp4 = ("0 1 days\n" + "1 2 days\n" + "2 3 days\n" + "dtype: timedelta64[ns]") + + exp5 = ("0 1 days 00:00:01\n" + "1 2 days 00:00:00\n" + "2 3 days 00:00:00\n" + "dtype: timedelta64[ns]") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = repr(pd.Series(idx)) + assert result == expected + + def test_summary(self): + # GH#9116 + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = ("TimedeltaIndex: 0 entries\n" + "Freq: D") + + exp2 = ("TimedeltaIndex: 1 entries, 1 days to 1 days\n" + "Freq: D") + + exp3 = ("TimedeltaIndex: 2 entries, 1 days to 2 days\n" + "Freq: D") + + exp4 = ("TimedeltaIndex: 3 entries, 1 days to 3 days\n" + "Freq: D") + + exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " + "00:00:00") + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = idx.summary() + assert result == expected diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index d154aa2172ef7..690ba66b6f5ef 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -73,94 +73,6 @@ def test_numpy_minmax(self): tm.assert_raises_regex( ValueError, errmsg, np.argmax, td, out=0) - def test_representation(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" - - exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " - "freq='D')") - - exp3 = ("TimedeltaIndex(['1 days', '2 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " - "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - assert result == expected - - def test_representation_to_series(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """Series([], dtype: timedelta64[ns])""" - - exp2 = """0 1 days -dtype: timedelta64[ns]""" - - exp3 = """0 1 days -1 2 days -dtype: timedelta64[ns]""" - - exp4 = """0 1 days -1 2 days -2 3 days -dtype: timedelta64[ns]""" - - exp5 = """0 1 days 00:00:01 -1 2 days 00:00:00 -2 3 days 00:00:00 -dtype: timedelta64[ns]""" - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = repr(pd.Series(idx)) - assert result == expected - - def test_summary(self): - # GH9116 - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = ("TimedeltaIndex: 0 entries\n" - "Freq: D") - - exp2 = ("TimedeltaIndex: 1 entries, 1 days to 1 days\n" - "Freq: D") - - exp3 = ("TimedeltaIndex: 2 entries, 1 days to 2 days\n" - "Freq: D") - - exp4 = ("TimedeltaIndex: 3 entries, 1 days to 3 days\n" - "Freq: D") - - exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " - "00:00:00") - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = idx.summary() - assert result == expected - def test_value_counts_unique(self): # GH 7735 From 563367f68f20b948ecf9faf94ecbb35c8f62092e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 18 Feb 2018 08:22:16 -0800 Subject: [PATCH 123/217] finish off tests.tseries.test_timezones (#19739) --- .../tests/scalar/timestamp/test_unary_ops.py | 24 ++++ pandas/tests/tseries/test_timezones.py | 108 ------------------ pandas/tests/tslibs/test_timezones.py | 31 +++++ 3 files changed, 55 insertions(+), 108 deletions(-) delete mode 100644 pandas/tests/tseries/test_timezones.py diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 8a6989c909cb2..994ff86e6fdf9 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -4,11 +4,13 @@ import pytest import pytz from pytz import utc +from dateutil.tz import gettz import pandas.util.testing as tm import pandas.util._test_decorators as td from pandas.compat import PY3 +from pandas._libs import tslib from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR from pandas import Timestamp, NaT @@ -215,6 +217,28 @@ def test_replace_tzinfo(self): assert result_dt == result_pd assert result_dt == result_pd.to_pydatetime() + @pytest.mark.parametrize('tz, normalize', [ + (pytz.timezone('US/Eastern'), lambda x: x.tzinfo.normalize(x)), + (gettz('US/Eastern'), lambda x: x)]) + def test_replace_across_dst(self, tz, normalize): + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization + ts_naive = Timestamp('2017-12-03 16:03:30') + ts_aware = tslib._localize_pydatetime(ts_naive, tz) + + # Preliminary sanity-check + assert ts_aware == normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = normalize(ts2) + assert ts2 == ts2b + # -------------------------------------------------------------- @td.skip_if_windows diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py deleted file mode 100644 index 97326dc04a522..0000000000000 --- a/pandas/tests/tseries/test_timezones.py +++ /dev/null @@ -1,108 +0,0 @@ -# pylint: disable-msg=E1101,W0612 -import pytest - -import pytz - -from datetime import datetime - -from pandas._libs.tslibs import timezones -from pandas import Timestamp - - -class TestTimeZoneSupportPytz(object): - - def tz(self, tz): - # Construct a timezone object from a string. Overridden in subclass to - # parameterize tests. - return pytz.timezone(tz) - - def tzstr(self, tz): - # Construct a timezone string from a string. Overridden in subclass to - # parameterize tests. - return tz - - def localize(self, tz, x): - return tz.localize(x) - - def normalize(self, ts): - tzinfo = ts.tzinfo - return tzinfo.normalize(ts) - - def cmptz(self, tz1, tz2): - # Compare two timezones. Overridden in subclass to parameterize - # tests. - return tz1.zone == tz2.zone - - # test utility methods - def test_infer_tz(self): - eastern = self.tz('US/Eastern') - utc = pytz.utc - - _start = datetime(2001, 1, 1) - _end = datetime(2009, 1, 1) - - start = self.localize(eastern, _start) - end = self.localize(eastern, _end) - assert (timezones.infer_tzinfo(start, end) is - self.localize(eastern, _start).tzinfo) - assert (timezones.infer_tzinfo(start, None) is - self.localize(eastern, _start).tzinfo) - assert (timezones.infer_tzinfo(None, end) is - self.localize(eastern, _end).tzinfo) - - start = utc.localize(_start) - end = utc.localize(_end) - assert (timezones.infer_tzinfo(start, end) is utc) - - end = self.localize(eastern, _end) - pytest.raises(Exception, timezones.infer_tzinfo, start, end) - pytest.raises(Exception, timezones.infer_tzinfo, end, start) - - def test_replace_across_dst(self): - # GH#18319 check that 1) timezone is correctly normalized and - # 2) that hour is not incorrectly changed by this normalization - tz = self.tz('US/Eastern') - - ts_naive = Timestamp('2017-12-03 16:03:30') - ts_aware = self.localize(tz, ts_naive) - - # Preliminary sanity-check - assert ts_aware == self.normalize(ts_aware) - - # Replace across DST boundary - ts2 = ts_aware.replace(month=6) - - # Check that `replace` preserves hour literal - assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) - - # Check that post-replace object is appropriately normalized - ts2b = self.normalize(ts2) - assert ts2 == ts2b - - -class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): - - def tz(self, tz): - """ - Construct a dateutil timezone. - Use tslib.maybe_get_tz so that we get the filename on the tz right - on windows. See #7337. - """ - return timezones.maybe_get_tz('dateutil/' + tz) - - def tzstr(self, tz): - """ Construct a timezone string from a string. Overridden in subclass - to parameterize tests. """ - return 'dateutil/' + tz - - def cmptz(self, tz1, tz2): - """ Compare two timezones. Overridden in subclass to parameterize - tests. """ - return tz1 == tz2 - - def localize(self, tz, x): - return x.replace(tzinfo=tz) - - def normalize(self, ts): - # no-op for dateutil - return ts diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 603c5e3fea26f..1bb355f267938 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -5,6 +5,7 @@ import pytz import dateutil.tz +from pandas._libs import tslib from pandas._libs.tslibs import timezones from pandas import Timestamp @@ -35,3 +36,33 @@ def test_tzlocal(): offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) offset = offset.total_seconds() * 1000000000 assert ts.value + offset == Timestamp('2011-01-01').value + + +@pytest.mark.parametrize('eastern, localize', [ + (pytz.timezone('US/Eastern'), lambda tz, x: tz.localize(x)), + (dateutil.tz.gettz('US/Eastern'), lambda tz, x: x.replace(tzinfo=tz))]) +def test_infer_tz(eastern, localize): + utc = pytz.utc + + start_naive = datetime(2001, 1, 1) + end_naive = datetime(2009, 1, 1) + + start = localize(eastern, start_naive) + end = localize(eastern, end_naive) + + assert (timezones.infer_tzinfo(start, end) is + tslib._localize_pydatetime(start_naive, eastern).tzinfo) + assert (timezones.infer_tzinfo(start, None) is + tslib._localize_pydatetime(start_naive, eastern).tzinfo) + assert (timezones.infer_tzinfo(None, end) is + tslib._localize_pydatetime(end_naive, eastern).tzinfo) + + start = utc.localize(start_naive) + end = utc.localize(end_naive) + assert timezones.infer_tzinfo(start, end) is utc + + end = tslib._localize_pydatetime(end_naive, eastern) + with pytest.raises(Exception): + timezones.infer_tzinfo(start, end) + with pytest.raises(Exception): + timezones.infer_tzinfo(end, start) From c0f761d98d5e23452f3cce4c21b55085da45e555 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 18 Feb 2018 08:36:24 -0800 Subject: [PATCH 124/217] dispatch frame methods to series versions instead of re-implementing masking etc (#19611) --- pandas/core/frame.py | 78 +++++--------------- pandas/core/indexes/base.py | 49 ++++--------- pandas/core/ops.py | 102 ++++++++++++++++++++++---- pandas/tests/frame/test_arithmetic.py | 17 +++++ 4 files changed, 138 insertions(+), 108 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a001037b573d4..b96af6af3707f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3944,34 +3944,27 @@ def _combine_frame(self, other, func, fill_value=None, level=None): new_index, new_columns = this.index, this.columns def _arith_op(left, right): + # for the mixed_type case where we iterate over columns, + # _arith_op(left, right) is equivalent to + # left._binop(right, func, fill_value=fill_value) left, right = ops.fill_binop(left, right, fill_value) return func(left, right) if this._is_mixed_type or other._is_mixed_type: - - # unique + # iterate over columns if this.columns.is_unique: - - def f(col): - r = _arith_op(this[col].values, other[col].values) - return self._constructor_sliced(r, index=new_index, - dtype=r.dtype) - - result = {col: f(col) for col in this} - - # non-unique + # unique columns + result = {col: _arith_op(this[col], other[col]) + for col in this} + result = self._constructor(result, index=new_index, + columns=new_columns, copy=False) else: - - def f(i): - r = _arith_op(this.iloc[:, i].values, - other.iloc[:, i].values) - return self._constructor_sliced(r, index=new_index, - dtype=r.dtype) - - result = {i: f(i) for i, col in enumerate(this.columns)} + # non-unique columns + result = {i: _arith_op(this.iloc[:, i], other.iloc[:, i]) + for i, col in enumerate(this.columns)} result = self._constructor(result, index=new_index, copy=False) result.columns = new_columns - return result + return result else: result = _arith_op(this.values, other.values) @@ -3979,36 +3972,11 @@ def f(i): return self._constructor(result, index=new_index, columns=new_columns, copy=False) - def _combine_series(self, other, func, fill_value=None, axis=None, - level=None, try_cast=True): - if fill_value is not None: - raise NotImplementedError("fill_value {fill} not supported." - .format(fill=fill_value)) - - if axis is not None: - axis = self._get_axis_name(axis) - if axis == 'index': - return self._combine_match_index(other, func, level=level) - else: - return self._combine_match_columns(other, func, level=level, - try_cast=try_cast) - else: - if not len(other): - return self * np.nan - - if not len(self): - # Ambiguous case, use _series so works with DataFrame - return self._constructor(data=self._series, index=self.index, - columns=self.columns) - - # default axis is columns - return self._combine_match_columns(other, func, level=level, - try_cast=try_cast) - def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join='outer', axis=0, level=level, copy=False) - return self._constructor(func(left.values.T, right.values).T, + new_data = func(left.values.T, right.values).T + return self._constructor(new_data, index=left.index, columns=self.columns, copy=False) @@ -4027,7 +3995,8 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): try_cast=try_cast) return self._constructor(new_data) - def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True): + def _compare_frame(self, other, func, str_rep, try_cast=True): + # compare_frame assumes self._indexed_same(other) import pandas.core.computation.expressions as expressions # unique @@ -4052,19 +4021,6 @@ def _compare(a, b): result.columns = self.columns return result - def _compare_frame(self, other, func, str_rep, try_cast=True): - if not self._indexed_same(other): - raise ValueError('Can only compare identically-labeled ' - 'DataFrame objects') - return self._compare_frame_evaluate(other, func, str_rep, - try_cast=try_cast) - - def _flex_compare_frame(self, other, func, str_rep, level, try_cast=True): - if not self._indexed_same(other): - self, other = self.align(other, 'outer', level=level, copy=False) - return self._compare_frame_evaluate(other, func, str_rep, - try_cast=try_cast) - def combine(self, other, func, fill_value=None, overwrite=True): """ Add two DataFrame objects and do not propagate NaN values, so if for a diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 02dd2dbc25703..7dfa34bd634ad 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -55,7 +55,7 @@ import pandas.core.algorithms as algos import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing -from pandas.core.ops import _comp_method_OBJECT_ARRAY +from pandas.core.ops import _comp_method_OBJECT_ARRAY, make_invalid_op from pandas.core.config import get_option from pandas.core.strings import StringMethods @@ -82,26 +82,6 @@ def _try_get_item(x): return x -def _make_invalid_op(name): - """ - Return a binary method that always raises a TypeError. - - Parameters - ---------- - name : str - - Returns - ------- - invalid_op : function - """ - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - - class InvalidIndexError(Exception): pass @@ -3994,22 +3974,23 @@ def _evaluate_compare(self, other): @classmethod def _add_numeric_methods_add_sub_disabled(cls): """ add in the numeric add/sub methods to disable """ - cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa - cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa + cls.__add__ = cls.__radd__ = __iadd__ = make_invalid_op('__add__') # noqa + cls.__sub__ = __isub__ = make_invalid_op('__sub__') # noqa @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable other than add/sub """ - cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__') - cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') - cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') - cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__') + cls.__pow__ = make_invalid_op('__pow__') + cls.__rpow__ = make_invalid_op('__rpow__') + cls.__mul__ = cls.__rmul__ = make_invalid_op('__mul__') + cls.__floordiv__ = cls.__rfloordiv__ = make_invalid_op('__floordiv__') + cls.__truediv__ = cls.__rtruediv__ = make_invalid_op('__truediv__') if not compat.PY3: - cls.__div__ = cls.__rdiv__ = _make_invalid_op('__div__') - cls.__neg__ = _make_invalid_op('__neg__') - cls.__pos__ = _make_invalid_op('__pos__') - cls.__abs__ = _make_invalid_op('__abs__') - cls.__inv__ = _make_invalid_op('__inv__') + cls.__div__ = cls.__rdiv__ = make_invalid_op('__div__') + cls.__neg__ = make_invalid_op('__neg__') + cls.__pos__ = make_invalid_op('__pos__') + cls.__abs__ = make_invalid_op('__abs__') + cls.__inv__ = make_invalid_op('__inv__') def _maybe_update_attributes(self, attrs): """ Update Index attributes (e.g. freq) depending on op """ @@ -4207,8 +4188,8 @@ def logical_func(self, *args, **kwargs): @classmethod def _add_logical_methods_disabled(cls): """ add in logical methods to disable """ - cls.all = _make_invalid_op('all') - cls.any = _make_invalid_op('any') + cls.all = make_invalid_op('all') + cls.any = make_invalid_op('any') Index._add_numeric_methods_disabled() diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4c234ccb4dd47..fd4fc5540fcec 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -96,6 +96,26 @@ def rxor(left, right): # ----------------------------------------------------------------------------- +def make_invalid_op(name): + """ + Return a binary method that always raises a TypeError. + + Parameters + ---------- + name : str + + Returns + ------- + invalid_op : function + """ + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self).__name__)) + + invalid_op.__name__ = name + return invalid_op + + def _gen_eval_kwargs(name): """ Find the keyword arguments to pass to numexpr for the given operation. @@ -1047,8 +1067,8 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): elif isinstance(other, (np.ndarray, list, tuple)): if len(other) != len(self): raise ValueError('Lengths must be equal') - return self._binop(self._constructor(other, self.index), op, - level=level, fill_value=fill_value) + other = self._constructor(other, self.index) + return self._binop(other, op, level=level, fill_value=fill_value) else: if fill_value is not None: self = self.fillna(fill_value) @@ -1071,6 +1091,51 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # ----------------------------------------------------------------------------- # DataFrame +def _combine_series_frame(self, other, func, fill_value=None, axis=None, + level=None, try_cast=True): + """ + Apply binary operator `func` to self, other using alignment and fill + conventions determined by the fill_value, axis, level, and try_cast kwargs. + + Parameters + ---------- + self : DataFrame + other : Series + func : binary operator + fill_value : object, default None + axis : {0, 1, 'columns', 'index', None}, default None + level : int or None, default None + try_cast : bool, default True + + Returns + ------- + result : DataFrame + """ + if fill_value is not None: + raise NotImplementedError("fill_value {fill} not supported." + .format(fill=fill_value)) + + if axis is not None: + axis = self._get_axis_number(axis) + if axis == 0: + return self._combine_match_index(other, func, level=level) + else: + return self._combine_match_columns(other, func, level=level, + try_cast=try_cast) + else: + if not len(other): + return self * np.nan + + if not len(self): + # Ambiguous case, use _series so works with DataFrame + return self._constructor(data=self._series, index=self.index, + columns=self.columns) + + # default axis is columns + return self._combine_match_columns(other, func, level=level, + try_cast=try_cast) + + def _align_method_FRAME(left, right, axis): """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ @@ -1179,8 +1244,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame return self._combine_frame(other, na_op, fill_value, level) elif isinstance(other, ABCSeries): - return self._combine_series(other, na_op, fill_value, axis, level, - try_cast=True) + return _combine_series_frame(self, other, na_op, + fill_value=fill_value, axis=axis, + level=level, try_cast=True) else: if fill_value is not None: self = self.fillna(fill_value) @@ -1209,13 +1275,17 @@ def f(self, other, axis=default_axis, level=None): other = _align_method_FRAME(self, other, axis) - if isinstance(other, ABCDataFrame): # Another DataFrame - return self._flex_compare_frame(other, na_op, str_rep, level, - try_cast=False) + if isinstance(other, ABCDataFrame): + # Another DataFrame + if not self._indexed_same(other): + self, other = self.align(other, 'outer', + level=level, copy=False) + return self._compare_frame(other, na_op, str_rep, try_cast=False) elif isinstance(other, ABCSeries): - return self._combine_series(other, na_op, None, axis, level, - try_cast=False) + return _combine_series_frame(self, other, na_op, + fill_value=None, axis=axis, + level=level, try_cast=False) else: return self._combine_const(other, na_op, try_cast=False) @@ -1227,11 +1297,17 @@ def f(self, other, axis=default_axis, level=None): def _comp_method_FRAME(func, name, str_rep): @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other): - if isinstance(other, ABCDataFrame): # Another DataFrame - return self._compare_frame(other, func, str_rep) + if isinstance(other, ABCDataFrame): + # Another DataFrame + if not self._indexed_same(other): + raise ValueError('Can only compare identically-labeled ' + 'DataFrame objects') + return self._compare_frame(other, func, str_rep, try_cast=True) + elif isinstance(other, ABCSeries): - return self._combine_series(other, func, - axis=None, try_cast=False) + return _combine_series_frame(self, other, func, + fill_value=None, axis=None, + level=None, try_cast=False) else: # straight boolean comparisons we want to allow all columns diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index a3a799aed1c55..65afe85628f8e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -72,6 +72,23 @@ def test_tz_aware_scalar_comparison(self, timestamps): # ------------------------------------------------------------------- # Arithmetic +class TestFrameFlexArithmetic(object): + def test_df_add_flex_filled_mixed_dtypes(self): + # GH#19611 + dti = pd.date_range('2016-01-01', periods=3) + ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]') + df = pd.DataFrame({'A': dti, 'B': ser}) + other = pd.DataFrame({'A': ser, 'B': ser}) + fill = pd.Timedelta(days=1).to_timedelta64() + result = df.add(other, fill_value=fill) + + expected = pd.DataFrame( + {'A': pd.Series(['2016-01-02', '2016-01-03', '2016-01-05'], + dtype='datetime64[ns]'), + 'B': ser * 2}) + tm.assert_frame_equal(result, expected) + + class TestFrameMulDiv(object): """Tests for DataFrame multiplication and division""" # ------------------------------------------------------------------ From de9e8677353ea1e622117522c4ce38f0cf47a60b Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sun, 18 Feb 2018 08:39:49 -0800 Subject: [PATCH 125/217] Removed if...else for K > 1 (#19734) --- pandas/_libs/groupby_helper.pxi.in | 176 ++++++++--------------------- 1 file changed, 47 insertions(+), 129 deletions(-) diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index fe4d31516d839..93fbb4477e2d0 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -56,36 +56,19 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, with nogil: - if K > 1: - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - sumx[lab, j] += val - - else: - - for i in range(N): - lab = labels[i] - if lab < 0: - continue + for i in range(N): + lab = labels[i] + if lab < 0: + continue - counts[lab] += 1 - val = values[i, 0] + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[lab, 0] += 1 - sumx[lab, 0] += val + nobs[lab, j] += 1 + sumx[lab, j] += val for i in range(ncounts): for j in range(K): @@ -119,33 +102,19 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, N, K = ( values).shape with nogil: - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - prodx[lab, j] *= val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue + for i in range(N): + lab = labels[i] + if lab < 0: + continue - counts[lab] += 1 - val = values[i, 0] + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[lab, 0] += 1 - prodx[lab, 0] *= val + nobs[lab, j] += 1 + prodx[lab, j] *= val for i in range(ncounts): for j in range(K): @@ -231,31 +200,18 @@ def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, N, K = ( values).shape with nogil: - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - # not nan - if val == val: - nobs[lab, j] += 1 - sumx[lab, j] += val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue + for i in range(N): + lab = labels[i] + if lab < 0: + continue - counts[lab] += 1 - val = values[i, 0] + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan if val == val: - nobs[lab, 0] += 1 - sumx[lab, 0] += val + nobs[lab, j] += 1 + sumx[lab, j] += val for i in range(ncounts): for j in range(K): @@ -670,33 +626,14 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, N, K = ( values).shape with nogil: - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} - if val == val and val != {{nan_val}}: - {{endif}} - nobs[lab, j] += 1 - if val > maxx[lab, j]: - maxx[lab, j] = val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue + for i in range(N): + lab = labels[i] + if lab < 0: + continue - counts[lab] += 1 - val = values[i, 0] + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan {{if name == 'int64'}} @@ -704,9 +641,9 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{else}} if val == val and val != {{nan_val}}: {{endif}} - nobs[lab, 0] += 1 - if val > maxx[lab, 0]: - maxx[lab, 0] = val + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val for i in range(ncounts): for j in range(K): @@ -744,33 +681,14 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, N, K = ( values).shape with nogil: - if K > 1: - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - {{if name == 'int64'}} - if val != {{nan_val}}: - {{else}} - if val == val and val != {{nan_val}}: - {{endif}} - nobs[lab, j] += 1 - if val < minx[lab, j]: - minx[lab, j] = val - else: - for i in range(N): - lab = labels[i] - if lab < 0: - continue + for i in range(N): + lab = labels[i] + if lab < 0: + continue - counts[lab] += 1 - val = values[i, 0] + counts[lab] += 1 + for j in range(K): + val = values[i, j] # not nan {{if name == 'int64'}} @@ -778,9 +696,9 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{else}} if val == val and val != {{nan_val}}: {{endif}} - nobs[lab, 0] += 1 - if val < minx[lab, 0]: - minx[lab, 0] = val + nobs[lab, j] += 1 + if val < minx[lab, j]: + minx[lab, j] = val for i in range(ncounts): for j in range(K): From 1f4484ca4e1af6bbdc98b04c37ef9f32c9da982f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 18 Feb 2018 08:47:29 -0800 Subject: [PATCH 126/217] Dispatch categorical Series ops to Categorical (#19582) --- doc/source/whatsnew/v0.23.0.txt | 2 + pandas/core/arrays/categorical.py | 3 ++ pandas/core/indexes/category.py | 25 ++++++--- pandas/core/ops.py | 72 +++++++++++++++----------- pandas/tests/indexes/common.py | 1 + pandas/tests/series/test_arithmetic.py | 34 ++++++++++++ 6 files changed, 99 insertions(+), 38 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a2198d9103528..11c49995372f5 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -849,3 +849,5 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index bcf9cb7646704..7354115f8295e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -53,6 +53,9 @@ def f(self, other): # results depending whether categories are the same or not is kind of # insane, so be a bit stricter here and use the python3 idea of # comparing only things of equal type. + if isinstance(other, ABCSeries): + return NotImplemented + if not self.ordered: if op in ['__lt__', '__gt__', '__le__', '__ge__']: raise TypeError("Unordered Categoricals can only compare " diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index a4d0f787cc6ec..218851b1713f2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -1,3 +1,5 @@ +import operator + import numpy as np from pandas._libs import index as libindex @@ -738,7 +740,9 @@ def _codes_for_groupby(self, sort): def _add_comparison_methods(cls): """ add in comparison methods """ - def _make_compare(opname): + def _make_compare(op): + opname = '__{op}__'.format(op=op.__name__) + def _evaluate_compare(self, other): # if we have a Categorical type, then must have the same @@ -761,16 +765,21 @@ def _evaluate_compare(self, other): "have the same categories and ordered " "attributes") - return getattr(self.values, opname)(other) + result = op(self.values, other) + if isinstance(result, ABCSeries): + # Dispatch to pd.Categorical returned NotImplemented + # and we got a Series back; down-cast to ndarray + result = result.values + return result return compat.set_function_name(_evaluate_compare, opname, cls) - cls.__eq__ = _make_compare('__eq__') - cls.__ne__ = _make_compare('__ne__') - cls.__lt__ = _make_compare('__lt__') - cls.__gt__ = _make_compare('__gt__') - cls.__le__ = _make_compare('__le__') - cls.__ge__ = _make_compare('__ge__') + cls.__eq__ = _make_compare(operator.eq) + cls.__ne__ = _make_compare(operator.ne) + cls.__lt__ = _make_compare(operator.lt) + cls.__gt__ = _make_compare(operator.gt) + cls.__le__ = _make_compare(operator.le) + cls.__ge__ = _make_compare(operator.ge) def _delegate_method(self, name, *args, **kwargs): """ method delegation to the ._values """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index fd4fc5540fcec..dff2b6844af94 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -819,7 +819,7 @@ def dispatch_to_index_op(op, left, right, index_class): # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, # left_idx may inherit a freq from a cached DatetimeIndex. # See discussion in GH#19147. - if left_idx.freq is not None: + if getattr(left_idx, 'freq', None) is not None: left_idx = left_idx._shallow_copy(freq=None) try: result = op(left_idx, right) @@ -867,9 +867,8 @@ def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand - if is_categorical_dtype(x): - return op(x, y) - elif is_categorical_dtype(y) and not is_scalar(y): + if is_categorical_dtype(y) and not is_scalar(y): + # The `not is_scalar(y)` check excludes the string "category" return op(y, x) elif is_object_dtype(x.dtype): @@ -917,17 +916,36 @@ def wrapper(self, other, axis=None): if axis is not None: self._get_axis_number(axis) + res_name = _get_series_op_result_name(self, other) + if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented + elif isinstance(other, ABCSeries) and not self._indexed_same(other): + raise ValueError("Can only compare identically-labeled " + "Series objects") + + elif is_categorical_dtype(self): + # Dispatch to Categorical implementation; pd.CategoricalIndex + # behavior is non-canonical GH#19513 + res_values = dispatch_to_index_op(op, self, other, pd.Categorical) + return self._constructor(res_values, index=self.index, + name=res_name) + + elif is_timedelta64_dtype(self): + res_values = dispatch_to_index_op(op, self, other, + pd.TimedeltaIndex) + return self._constructor(res_values, index=self.index, + name=res_name) + elif isinstance(other, ABCSeries): - name = com._maybe_match_name(self, other) - if not self._indexed_same(other): - msg = 'Can only compare identically-labeled Series objects' - raise ValueError(msg) + # By this point we have checked that self._indexed_same(other) res_values = na_op(self.values, other.values) - return self._constructor(res_values, index=self.index, name=name) + # rename is needed in case res_name is None and res_values.name + # is not. + return self._constructor(res_values, index=self.index, + name=res_name).rename(res_name) elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array @@ -937,15 +955,17 @@ def wrapper(self, other, axis=None): raise ValueError('Lengths must match to compare') res_values = na_op(self.values, np.asarray(other)) - return self._constructor(res_values, - index=self.index).__finalize__(self) - - elif (isinstance(other, pd.Categorical) and - not is_categorical_dtype(self)): - raise TypeError("Cannot compare a Categorical for op {op} with " - "Series of dtype {typ}.\nIf you want to compare " - "values, use 'series np.asarray(other)'." - .format(op=op, typ=self.dtype)) + result = self._constructor(res_values, index=self.index) + # rename is needed in case res_name is None and self.name + # is not. + return result.__finalize__(self).rename(res_name) + + elif isinstance(other, pd.Categorical): + # ordering of checks matters; by this point we know + # that not is_categorical_dtype(self) + res_values = op(self.values, other) + return self._constructor(res_values, index=self.index, + name=res_name) elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None @@ -956,16 +976,9 @@ def wrapper(self, other, axis=None): return self._constructor(res_values, index=self.index, name=self.name, dtype='bool') - if is_categorical_dtype(self): - # cats are a special case as get_values() would return an ndarray, - # which would then not take categories ordering into account - # we can go directly to op, as the na_op would just test again and - # dispatch to it. - with np.errstate(all='ignore'): - res = op(self.values, other) else: values = self.get_values() - if isinstance(other, (list, np.ndarray)): + if isinstance(other, list): other = np.asarray(other) with np.errstate(all='ignore'): @@ -975,10 +988,9 @@ def wrapper(self, other, axis=None): .format(typ=type(other))) # always return a full value series here - res = com._values_from_object(res) - - res = pd.Series(res, index=self.index, name=self.name, dtype='bool') - return res + res_values = com._values_from_object(res) + return pd.Series(res_values, index=self.index, + name=res_name, dtype='bool') return wrapper diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 2d8d70aa2ac84..1162662bf9a08 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -790,6 +790,7 @@ def test_equals_op(self): series_d = Series(array_d) with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == series_b + tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 94da97ef45301..f727edf8fb7d8 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -43,6 +43,40 @@ def test_ser_flex_cmp_return_dtypes_empty(self, opname): result = getattr(empty, opname)(const).get_dtype_counts() tm.assert_series_equal(result, Series([1], ['bool'])) + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt]) + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('baz', 'baz', 'baz')]) + def test_ser_cmp_result_names(self, names, op): + # datetime64 dtype + dti = pd.date_range('1949-06-07 03:00:00', + freq='H', periods=5, name=names[0]) + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # datetime64tz dtype + dti = dti.tz_localize('US/Central') + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # timedelta64 dtype + tdi = dti - dti.shift(1) + ser = Series(tdi).rename(names[1]) + result = op(ser, tdi) + assert result.name == names[2] + + # categorical + if op in [operator.eq, operator.ne]: + # categorical dtype comparisons raise for inequalities + cidx = tdi.astype('category') + ser = Series(cidx).rename(names[1]) + result = op(ser, cidx) + assert result.name == names[2] + class TestTimestampSeriesComparison(object): def test_dt64ser_cmp_period_scalar(self): From 84a0e23eb5974476c6aa2b54d9835c085a287222 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 18 Feb 2018 17:14:38 +0000 Subject: [PATCH 127/217] DOC/BLD: Pinning sphinx to 1.5, as 1.7 has been released and it's incompatible with vendored numpydoc (#19743) --- ci/requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index 82f8de277c57b..a474658fa2922 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -7,4 +7,4 @@ pytest>=3.1 python-dateutil>=2.5.0 pytz setuptools>=3.3 -sphinx +sphinx=1.5* From d27bd54fb57c43119ab16650fb63cde295fd1487 Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Mon, 19 Feb 2018 01:46:53 +0800 Subject: [PATCH 128/217] DOC: correct merge_asof example (#19737) --- pandas/core/reshape/merge.py | 4 +-- pandas/tests/reshape/merge/test_merge_asof.py | 29 +++++++++++++++---- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4b99b0407cfcc..7b1a0875bba59 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -457,8 +457,8 @@ def merge_asof(left, right, on=None, time ticker price quantity bid ask 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 NaN NaN + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 NaN NaN 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN See also diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 2f48aef1894a9..cebbcc41c3e17 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -92,11 +92,30 @@ def test_examples2(self): by='ticker', tolerance=pd.Timedelta('2ms')) - pd.merge_asof(trades, quotes, - on='time', - by='ticker', - tolerance=pd.Timedelta('10ms'), - allow_exact_matches=False) + expected = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.038', + '20160525 13:30:00.048', + '20160525 13:30:00.048', + '20160525 13:30:00.048']), + 'ticker': ['MSFT', 'MSFT', 'GOOG', 'GOOG', 'AAPL'], + 'price': [51.95, 51.95, + 720.77, 720.92, 98.00], + 'quantity': [75, 155, + 100, 100, 100], + 'bid': [np.nan, 51.97, np.nan, + np.nan, np.nan], + 'ask': [np.nan, 51.98, np.nan, + np.nan, np.nan]}, + columns=['time', 'ticker', 'price', 'quantity', + 'bid', 'ask']) + + result = pd.merge_asof(trades, quotes, + on='time', + by='ticker', + tolerance=pd.Timedelta('10ms'), + allow_exact_matches=False) + assert_frame_equal(result, expected) def test_examples3(self): """ doc-string examples """ From 0ca66801e6159b30734663b6dcdc187535523e71 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sun, 18 Feb 2018 18:32:33 -0500 Subject: [PATCH 129/217] FIX: const-correctness in numpy helpers (#19749) In python 3.7 the return type of PyUnicode_AsUTF8 changed from (char *) to (const char *). PyUnicode_FromString also takes (const char *) as input, also be explicit about that. https://bugs.python.org/issue28769 commit 2a404b63d48d73bbaa007d89efb7a01048475acd in cpython --- pandas/_libs/src/numpy_helper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 844be9b292be3..5cfa51dc8a0be 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -32,7 +32,7 @@ PANDAS_INLINE PyObject* get_value_1d(PyArrayObject* ap, Py_ssize_t i) { // returns ASCII or UTF8 (py3) view on python str // python object owns memory, should not be freed -PANDAS_INLINE char* get_c_string(PyObject* obj) { +PANDAS_INLINE const char* get_c_string(PyObject* obj) { #if PY_VERSION_HEX >= 0x03000000 return PyUnicode_AsUTF8(obj); #else @@ -40,7 +40,7 @@ PANDAS_INLINE char* get_c_string(PyObject* obj) { #endif } -PANDAS_INLINE PyObject* char_to_string(char* data) { +PANDAS_INLINE PyObject* char_to_string(const char* data) { #if PY_VERSION_HEX >= 0x03000000 return PyUnicode_FromString(data); #else From 52be57d55d7d3367a28c7f9c1e66478330472bf2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 19 Feb 2018 14:18:17 +0100 Subject: [PATCH 130/217] DOC/BLD: update vendored IPython.sphinxext version (#19765) updated to commit cc353b25b0fff58e4ed13899df9b3c8153df01d9 from ipython/ipython --- .../ipython_console_highlighting.py | 120 +------ .../ipython_sphinxext/ipython_directive.py | 339 +++++++++++------- 2 files changed, 225 insertions(+), 234 deletions(-) diff --git a/doc/sphinxext/ipython_sphinxext/ipython_console_highlighting.py b/doc/sphinxext/ipython_sphinxext/ipython_console_highlighting.py index c5ec26aefd442..b93a151fb3cb0 100644 --- a/doc/sphinxext/ipython_sphinxext/ipython_console_highlighting.py +++ b/doc/sphinxext/ipython_sphinxext/ipython_console_highlighting.py @@ -1,116 +1,28 @@ -"""reST directive for syntax-highlighting ipython interactive sessions. - -XXX - See what improvements can be made based on the new (as of Sept 2009) -'pycon' lexer for the python console. At the very least it will give better -highlighted tracebacks. """ +reST directive for syntax-highlighting ipython interactive sessions. -#----------------------------------------------------------------------------- -# Needed modules - -# Standard library -import re - -# Third party -from pygments.lexer import Lexer, do_insertions -from pygments.lexers.agile import (PythonConsoleLexer, PythonLexer, - PythonTracebackLexer) -from pygments.token import Comment, Generic +""" from sphinx import highlighting - -#----------------------------------------------------------------------------- -# Global constants -line_re = re.compile('.*?\n') - -#----------------------------------------------------------------------------- -# Code begins - classes and functions - - -class IPythonConsoleLexer(Lexer): - - """ - For IPython console output or doctests, such as: - - .. sourcecode:: ipython - - In [1]: a = 'foo' - - In [2]: a - Out[2]: 'foo' - - In [3]: print(a) - foo - - In [4]: 1 / 0 - - Notes: - - - Tracebacks are not currently supported. - - - It assumes the default IPython prompts, not customized ones. - """ - - name = 'IPython console session' - aliases = ['ipython'] - mimetypes = ['text/x-ipython-console'] - input_prompt = re.compile("(In \[[0-9]+\]: )|( \.\.\.+:)") - output_prompt = re.compile("(Out\[[0-9]+\]: )|( \.\.\.+:)") - continue_prompt = re.compile(" \.\.\.+:") - tb_start = re.compile("\-+") - - def get_tokens_unprocessed(self, text): - pylexer = PythonLexer(**self.options) - tblexer = PythonTracebackLexer(**self.options) - - curcode = '' - insertions = [] - for match in line_re.finditer(text): - line = match.group() - input_prompt = self.input_prompt.match(line) - continue_prompt = self.continue_prompt.match(line.rstrip()) - output_prompt = self.output_prompt.match(line) - if line.startswith("#"): - insertions.append((len(curcode), - [(0, Comment, line)])) - elif input_prompt is not None: - insertions.append((len(curcode), - [(0, Generic.Prompt, input_prompt.group())])) - curcode += line[input_prompt.end():] - elif continue_prompt is not None: - insertions.append((len(curcode), - [(0, Generic.Prompt, continue_prompt.group())])) - curcode += line[continue_prompt.end():] - elif output_prompt is not None: - # Use the 'error' token for output. We should probably make - # our own token, but error is typically in a bright color like - # red, so it works fine for our output prompts. - insertions.append((len(curcode), - [(0, Generic.Error, output_prompt.group())])) - curcode += line[output_prompt.end():] - else: - if curcode: - for item in do_insertions(insertions, - pylexer.get_tokens_unprocessed(curcode)): - yield item - curcode = '' - insertions = [] - yield match.start(), Generic.Output, line - if curcode: - for item in do_insertions(insertions, - pylexer.get_tokens_unprocessed(curcode)): - yield item - +from IPython.lib.lexers import IPyLexer def setup(app): """Setup as a sphinx extension.""" # This is only a lexer, so adding it below to pygments appears sufficient. - # But if somebody knows that the right API usage should be to do that via + # But if somebody knows what the right API usage should be to do that via # sphinx, by all means fix it here. At least having this setup.py # suppresses the sphinx warning we'd get without it. - pass + metadata = {'parallel_read_safe': True, 'parallel_write_safe': True} + return metadata + +# Register the extension as a valid pygments lexer. +# Alternatively, we could register the lexer with pygments instead. This would +# require using setuptools entrypoints: http://pygments.org/docs/plugins + +ipy2 = IPyLexer(python3=False) +ipy3 = IPyLexer(python3=True) -#----------------------------------------------------------------------------- -# Register the extension as a valid pygments lexer -highlighting.lexers['ipython'] = IPythonConsoleLexer() +highlighting.lexers['ipython'] = ipy2 +highlighting.lexers['ipython2'] = ipy2 +highlighting.lexers['ipython3'] = ipy3 diff --git a/doc/sphinxext/ipython_sphinxext/ipython_directive.py b/doc/sphinxext/ipython_sphinxext/ipython_directive.py index 5616d732eb1c6..a0e6728861b66 100644 --- a/doc/sphinxext/ipython_sphinxext/ipython_directive.py +++ b/doc/sphinxext/ipython_sphinxext/ipython_directive.py @@ -83,8 +83,29 @@ See http://matplotlib.org/sampledoc/ipython_directive.html for additional documentation. -ToDo ----- +Pseudo-Decorators +================= + +Note: Only one decorator is supported per input. If more than one decorator +is specified, then only the last one is used. + +In addition to the Pseudo-Decorators/options described at the above link, +several enhancements have been made. The directive will emit a message to the +console at build-time if code-execution resulted in an exception or warning. +You can suppress these on a per-block basis by specifying the :okexcept: +or :okwarning: options: + +.. code-block:: rst + + .. ipython:: + :okexcept: + :okwarning: + + In [1]: 1/0 + In [2]: # raise warning. + +To Do +----- - Turn the ad-hoc test() function into a real test suite. - Break up ipython-specific functionality from matplotlib stuff into better @@ -98,48 +119,31 @@ - VáclavŠmilauer : Prompt generalizations. - Skipper Seabold, refactoring, cleanups, pure python addition """ -from __future__ import print_function -from __future__ import unicode_literals #----------------------------------------------------------------------------- # Imports #----------------------------------------------------------------------------- # Stdlib +import atexit +import errno import os import re import sys import tempfile import ast -from pandas.compat import zip, range, map, lmap, u, text_type, cStringIO as StringIO import warnings - -# To keep compatibility with various python versions -try: - from hashlib import md5 -except ImportError: - from md5 import md5 +import shutil +from io import StringIO # Third-party -import sphinx from docutils.parsers.rst import directives -from docutils import nodes -from sphinx.util.compat import Directive +from docutils.parsers.rst import Directive # Our own -try: - from traitlets.config import Config -except ImportError: - from IPython import Config +from traitlets.config import Config from IPython import InteractiveShell from IPython.core.profiledir import ProfileDir -from IPython.utils import io -from IPython.utils.py3compat import PY3 - -if PY3: - from io import StringIO -else: - from StringIO import StringIO #----------------------------------------------------------------------------- # Globals @@ -191,8 +195,8 @@ def block_parser(part, rgxin, rgxout, fmtin, fmtout): continue if line_stripped.startswith('@'): - # we're assuming at most one decorator -- may need to - # rethink + # Here is where we assume there is, at most, one decorator. + # Might need to rethink this. decorator = line_stripped continue @@ -223,12 +227,17 @@ def block_parser(part, rgxin, rgxout, fmtin, fmtout): if matchout or nextline.startswith('#'): break elif nextline.startswith(continuation): + # The default ipython_rgx* treat the space following the colon as optional. + # However, If the space is there we must consume it or code + # employing the cython_magic extension will fail to execute. + # + # This works with the default ipython_rgx* patterns, + # If you modify them, YMMV. nextline = nextline[Nc:] if nextline and nextline[0] == ' ': nextline = nextline[1:] inputline += '\n' + nextline - else: rest.append(nextline) i+= 1 @@ -250,42 +259,19 @@ def block_parser(part, rgxin, rgxout, fmtin, fmtout): return block -class DecodingStringIO(StringIO, object): - def __init__(self,buf='',encodings=('utf8',), *args, **kwds): - super(DecodingStringIO, self).__init__(buf, *args, **kwds) - self.set_encodings(encodings) - - def set_encodings(self, encodings): - self.encodings = encodings - - def write(self,data): - if isinstance(data, text_type): - return super(DecodingStringIO, self).write(data) - else: - for enc in self.encodings: - try: - data = data.decode(enc) - return super(DecodingStringIO, self).write(data) - except : - pass - # default to brute utf8 if no encoding succeeded - return super(DecodingStringIO, self).write(data.decode('utf8', 'replace')) - - class EmbeddedSphinxShell(object): """An embedded IPython instance to run inside Sphinx""" - def __init__(self, exec_lines=None,state=None): + def __init__(self, exec_lines=None): - self.cout = DecodingStringIO(u'') + self.cout = StringIO() if exec_lines is None: exec_lines = [] - self.state = state - # Create config object for IPython config = Config() + config.HistoryManager.hist_file = ':memory:' config.InteractiveShell.autocall = False config.InteractiveShell.autoindent = False config.InteractiveShell.colors = 'NoColor' @@ -297,17 +283,9 @@ def __init__(self, exec_lines=None,state=None): profile = ProfileDir.create_profile_dir(pdir) # Create and initialize global ipython, but don't start its mainloop. - # This will persist across different EmbededSphinxShell instances. + # This will persist across different EmbeddedSphinxShell instances. IP = InteractiveShell.instance(config=config, profile_dir=profile) - - # io.stdout redirect must be done after instantiating InteractiveShell - io.stdout = self.cout - io.stderr = self.cout - - # For debugging, so we can see normal output, use this: - #from IPython.utils.io import Tee - #io.stdout = Tee(self.cout, channel='stdout') # dbg - #io.stderr = Tee(self.cout, channel='stderr') # dbg + atexit.register(self.cleanup) # Store a few parts of IPython we'll need. self.IP = IP @@ -316,12 +294,17 @@ def __init__(self, exec_lines=None,state=None): self.input = '' self.output = '' + self.tmp_profile_dir = tmp_profile_dir self.is_verbatim = False self.is_doctest = False self.is_suppress = False # Optionally, provide more detailed information to shell. + # this is assigned by the SetUp method of IPythonDirective + # to point at itself. + # + # So, you can access handy things at self.directive.state self.directive = None # on the first call to the savefig decorator, we'll import @@ -332,6 +315,9 @@ def __init__(self, exec_lines=None,state=None): for line in exec_lines: self.process_input_line(line, store_history=False) + def cleanup(self): + shutil.rmtree(self.tmp_profile_dir, ignore_errors=True) + def clear_cout(self): self.cout.seek(0) self.cout.truncate(0) @@ -346,11 +332,7 @@ def process_input_line(self, line, store_history=True): splitter.push(line) more = splitter.push_accepts_more() if not more: - try: - source_raw = splitter.source_raw_reset()[1] - except: - # recent ipython #4504 - source_raw = splitter.raw_reset() + source_raw = splitter.raw_reset() self.IP.run_cell(source_raw, store_history=store_history) finally: sys.stdout = stdout @@ -368,9 +350,9 @@ def process_image(self, decorator): source_dir = self.source_dir saveargs = decorator.split(' ') filename = saveargs[1] - # insert relative path to image file in source - outfile = os.path.relpath(os.path.join(savefig_dir,filename), - source_dir) + # insert relative path to image file in source (as absolute path for Sphinx) + outfile = '/' + os.path.relpath(os.path.join(savefig_dir,filename), + source_dir) imagerows = ['.. image:: %s'%outfile] @@ -403,17 +385,10 @@ def process_input(self, data, input_prompt, lineno): is_savefig = decorator is not None and \ decorator.startswith('@savefig') - # set the encodings to be used by DecodingStringIO - # to convert the execution output into unicode if - # needed. this attrib is set by IpythonDirective.run() - # based on the specified block options, defaulting to ['ut - self.cout.set_encodings(self.output_encoding) - input_lines = input.split('\n') - if len(input_lines) > 1: - if input_lines[-1] != "": - input_lines.append('') # make sure there's a blank line + if input_lines[-1] != "": + input_lines.append('') # make sure there's a blank line # so splitter buffer gets reset continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2)) @@ -456,30 +431,75 @@ def process_input(self, data, input_prompt, lineno): ret.append(formatted_line) if not is_suppress and len(rest.strip()) and is_verbatim: - # the "rest" is the standard output of the - # input, which needs to be added in - # verbatim mode + # The "rest" is the standard output of the input. This needs to be + # added when in verbatim mode. If there is no "rest", then we don't + # add it, as the new line will be added by the processed output. ret.append(rest) + # Fetch the processed output. (This is not the submitted output.) self.cout.seek(0) - output = self.cout.read() + processed_output = self.cout.read() if not is_suppress and not is_semicolon: - ret.append(output) - elif is_semicolon: # get spacing right + # + # In IPythonDirective.run, the elements of `ret` are eventually + # combined such that '' entries correspond to newlines. So if + # `processed_output` is equal to '', then the adding it to `ret` + # ensures that there is a blank line between consecutive inputs + # that have no outputs, as in: + # + # In [1]: x = 4 + # + # In [2]: x = 5 + # + # When there is processed output, it has a '\n' at the tail end. So + # adding the output to `ret` will provide the necessary spacing + # between consecutive input/output blocks, as in: + # + # In [1]: x + # Out[1]: 5 + # + # In [2]: x + # Out[2]: 5 + # + # When there is stdout from the input, it also has a '\n' at the + # tail end, and so this ensures proper spacing as well. E.g.: + # + # In [1]: print x + # 5 + # + # In [2]: x = 5 + # + # When in verbatim mode, `processed_output` is empty (because + # nothing was passed to IP. Sometimes the submitted code block has + # an Out[] portion and sometimes it does not. When it does not, we + # need to ensure proper spacing, so we have to add '' to `ret`. + # However, if there is an Out[] in the submitted code, then we do + # not want to add a newline as `process_output` has stuff to add. + # The difficulty is that `process_input` doesn't know if + # `process_output` will be called---so it doesn't know if there is + # Out[] in the code block. The requires that we include a hack in + # `process_block`. See the comments there. + # + ret.append(processed_output) + elif is_semicolon: + # Make sure there is a newline after the semicolon. ret.append('') # context information - filename = self.state.document.current_source - lineno = self.state.document.current_line + filename = "Unknown" + lineno = 0 + if self.directive.state: + filename = self.directive.state.document.current_source + lineno = self.directive.state.document.current_line # output any exceptions raised during execution to stdout # unless :okexcept: has been specified. - if not is_okexcept and "Traceback" in output: + if not is_okexcept and "Traceback" in processed_output: s = "\nException in %s at block ending on line %s\n" % (filename, lineno) s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n" sys.stdout.write('\n\n>>>' + ('-' * 73)) sys.stdout.write(s) - sys.stdout.write(output) + sys.stdout.write(processed_output) sys.stdout.write('<<<' + ('-' * 73) + '\n\n') # output any warning raised during execution to stdout @@ -490,28 +510,32 @@ def process_input(self, data, input_prompt, lineno): s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n" sys.stdout.write('\n\n>>>' + ('-' * 73)) sys.stdout.write(s) - sys.stdout.write('-' * 76 + '\n') + sys.stdout.write(('-' * 76) + '\n') s=warnings.formatwarning(w.message, w.category, w.filename, w.lineno, w.line) sys.stdout.write(s) sys.stdout.write('<<<' + ('-' * 73) + '\n') self.cout.truncate(0) - return (ret, input_lines, output, is_doctest, decorator, image_file, - image_directive) + + return (ret, input_lines, processed_output, + is_doctest, decorator, image_file, image_directive) - def process_output(self, data, output_prompt, - input_lines, output, is_doctest, decorator, image_file): + def process_output(self, data, output_prompt, input_lines, output, + is_doctest, decorator, image_file): """ Process data block for OUTPUT token. """ + # Recall: `data` is the submitted output, and `output` is the processed + # output from `input_lines`. + TAB = ' ' * 4 if is_doctest and output is not None: - found = output + found = output # This is the processed output found = found.strip() submitted = data.strip() @@ -522,7 +546,7 @@ def process_output(self, data, output_prompt, source = self.directive.state.document.current_source content = self.directive.content # Add tabs and join into a single string. - content = '\n'.join(TAB + line for line in content) + content = '\n'.join([TAB + line for line in content]) # Make sure the output contains the output prompt. ind = found.find(output_prompt) @@ -553,6 +577,31 @@ def process_output(self, data, output_prompt, else: self.custom_doctest(decorator, input_lines, found, submitted) + # When in verbatim mode, this holds additional submitted output + # to be written in the final Sphinx output. + # https://github.com/ipython/ipython/issues/5776 + out_data = [] + + is_verbatim = decorator=='@verbatim' or self.is_verbatim + if is_verbatim and data.strip(): + # Note that `ret` in `process_block` has '' as its last element if + # the code block was in verbatim mode. So if there is no submitted + # output, then we will have proper spacing only if we do not add + # an additional '' to `out_data`. This is why we condition on + # `and data.strip()`. + + # The submitted output has no output prompt. If we want the + # prompt and the code to appear, we need to join them now + # instead of adding them separately---as this would create an + # undesired newline. How we do this ultimately depends on the + # format of the output regex. I'll do what works for the default + # prompt for now, and we might have to adjust if it doesn't work + # in other cases. Finally, the submitted output does not have + # a trailing newline, so we must add it manually. + out_data.append("{0} {1}\n".format(output_prompt, data)) + + return out_data + def process_comment(self, data): """Process data fPblock for COMMENT token.""" if not self.is_suppress: @@ -563,9 +612,7 @@ def save_image(self, image_file): Saves the image file to disk. """ self.ensure_pyplot() - command = ('plt.gcf().savefig("%s", bbox_inches="tight", ' - 'dpi=100)' % image_file) - + command = 'plt.gcf().savefig("%s")'%image_file #print 'SAVEFIG', command # dbg self.process_input_line('bookmark ipy_thisdir', store_history=False) self.process_input_line('cd -b ipy_savedir', store_history=False) @@ -588,18 +635,53 @@ def process_block(self, block): image_file = None image_directive = None + found_input = False for token, data in block: if token == COMMENT: out_data = self.process_comment(data) elif token == INPUT: - (out_data, input_lines, output, is_doctest, decorator, - image_file, image_directive) = \ + found_input = True + (out_data, input_lines, output, is_doctest, + decorator, image_file, image_directive) = \ self.process_input(data, input_prompt, lineno) elif token == OUTPUT: + if not found_input: + + TAB = ' ' * 4 + linenumber = 0 + source = 'Unavailable' + content = 'Unavailable' + if self.directive: + linenumber = self.directive.state.document.current_line + source = self.directive.state.document.current_source + content = self.directive.content + # Add tabs and join into a single string. + content = '\n'.join([TAB + line for line in content]) + + e = ('\n\nInvalid block: Block contains an output prompt ' + 'without an input prompt.\n\n' + 'Document source: {0}\n\n' + 'Content begins at line {1}: \n\n{2}\n\n' + 'Problematic block within content: \n\n{TAB}{3}\n\n') + e = e.format(source, linenumber, content, block, TAB=TAB) + + # Write, rather than include in exception, since Sphinx + # will truncate tracebacks. + sys.stdout.write(e) + raise RuntimeError('An invalid block was detected.') + out_data = \ - self.process_output(data, output_prompt, - input_lines, output, is_doctest, - decorator, image_file) + self.process_output(data, output_prompt, input_lines, + output, is_doctest, decorator, + image_file) + if out_data: + # Then there was user submitted output in verbatim mode. + # We need to remove the last element of `ret` that was + # added in `process_input`, as it is '' and would introduce + # an undesirable newline. + assert(ret[-1] == '') + del ret[-1] + if out_data: ret.extend(out_data) @@ -740,8 +822,7 @@ class IPythonDirective(Directive): 'verbatim' : directives.flag, 'doctest' : directives.flag, 'okexcept': directives.flag, - 'okwarning': directives.flag, - 'output_encoding': directives.unchanged_required + 'okwarning': directives.flag } shell = None @@ -753,14 +834,9 @@ def get_config_options(self): config = self.state.document.settings.env.config # get config variables to set figure output directory - confdir = self.state.document.settings.env.app.confdir savefig_dir = config.ipython_savefig_dir - source_dir = os.path.dirname(self.state.document.current_source) - if savefig_dir is None: - savefig_dir = config.html_static_path - if isinstance(savefig_dir, list): - savefig_dir = savefig_dir[0] # safe to assume only one path? - savefig_dir = os.path.join(confdir, savefig_dir) + source_dir = self.state.document.settings.env.srcdir + savefig_dir = os.path.join(source_dir, savefig_dir) # get regex and prompt stuff rgxin = config.ipython_rgxin @@ -779,6 +855,12 @@ def setup(self): (savefig_dir, source_dir, rgxin, rgxout, promptin, promptout, mplbackend, exec_lines, hold_count) = self.get_config_options() + try: + os.makedirs(savefig_dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + if self.shell is None: # We will be here many times. However, when the # EmbeddedSphinxShell is created, its interactive shell member @@ -786,13 +868,11 @@ def setup(self): if mplbackend and 'matplotlib.backends' not in sys.modules: import matplotlib - # Repeated calls to use() will not hurt us since `mplbackend` - # is the same each time. matplotlib.use(mplbackend) # Must be called after (potentially) importing matplotlib and # setting its backend since exec_lines might import pylab. - self.shell = EmbeddedSphinxShell(exec_lines, self.state) + self.shell = EmbeddedSphinxShell(exec_lines) # Store IPython directive to enable better error messages self.shell.directive = self @@ -800,14 +880,9 @@ def setup(self): # reset the execution count if we haven't processed this doc #NOTE: this may be borked if there are multiple seen_doc tmp files #check time stamp? - if self.state.document.current_source not in self.seen_docs: + if not self.state.document.current_source in self.seen_docs: self.shell.IP.history_manager.reset() self.shell.IP.execution_count = 1 - try: - self.shell.IP.prompt_manager.width = 0 - except AttributeError: - # GH14003: class promptManager has removed after IPython 5.x - pass self.seen_docs.add(self.state.document.current_source) # and attach to shell so we don't have to pass them around @@ -846,13 +921,13 @@ def run(self): self.shell.is_okexcept = 'okexcept' in options self.shell.is_okwarning = 'okwarning' in options - self.shell.output_encoding = [options.get('output_encoding', 'utf8')] - # handle pure python code if 'python' in self.arguments: content = self.content self.content = self.shell.process_pure_python(content) + # parts consists of all text within the ipython-block. + # Each part is an input/output block. parts = '\n'.join(self.content).split('\n\n') lines = ['.. code-block:: ipython', ''] @@ -863,7 +938,8 @@ def run(self): if len(block): rows, figure = self.shell.process_block(block) for row in rows: - lines.extend([' %s'%line for line in row.split('\n')]) + lines.extend([' {0}'.format(line) + for line in row.split('\n')]) if figure is not None: figures.append(figure) @@ -873,7 +949,7 @@ def run(self): lines.extend(figure.split('\n')) lines.append('') - if len(lines)>2: + if len(lines) > 2: if debug: print('\n'.join(lines)) else: @@ -893,7 +969,7 @@ def setup(app): setup.app = app app.add_directive('ipython', IPythonDirective) - app.add_config_value('ipython_savefig_dir', None, 'env') + app.add_config_value('ipython_savefig_dir', 'savefig', 'env') app.add_config_value('ipython_rgxin', re.compile('In \[(\d+)\]:\s?(.*)\s*'), 'env') app.add_config_value('ipython_rgxout', @@ -914,6 +990,9 @@ def setup(app): app.add_config_value('ipython_holdcount', True, 'env') + metadata = {'parallel_read_safe': True, 'parallel_write_safe': True} + return metadata + # Simple smoke test, needs to be converted to a proper automatic test. def test(): @@ -1074,7 +1153,7 @@ def test(): #ipython_directive.DEBUG = True # dbg #options = dict(suppress=True) # dbg - options = dict() + options = {} for example in examples: content = example.split('\n') IPythonDirective('debug', arguments=None, options=options, From e9bb37411c9b9bb5723d27404b7e9e800bec1c1b Mon Sep 17 00:00:00 2001 From: topper-123 Date: Mon, 19 Feb 2018 13:50:56 +0000 Subject: [PATCH 131/217] add test for numpy ops, esp. nanmin/max bug for np<1.13 (#19753) --- pandas/tests/test_nanops.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index df3c49a73d227..dffb303af6ae1 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -13,6 +13,7 @@ import pandas.core.nanops as nanops import pandas.util.testing as tm import pandas.util._test_decorators as td +from pandas.compat.numpy import _np_version_under1p13 use_bn = nanops._USE_BOTTLENECK @@ -1015,3 +1016,34 @@ def test_use_bottleneck(): assert not pd.get_option('use_bottleneck') pd.set_option('use_bottleneck', use_bn) + + +@pytest.mark.parametrize("numpy_op, expected", [ + (np.sum, 10), + (np.nansum, 10), + (np.mean, 2.5), + (np.nanmean, 2.5), + (np.median, 2.5), + (np.nanmedian, 2.5), + (np.min, 1), + (np.max, 4), +]) +def test_numpy_ops(numpy_op, expected): + # GH8383 + result = numpy_op(pd.Series([1, 2, 3, 4])) + assert result == expected + + +@pytest.mark.parametrize("numpy_op, expected", [ + (np.nanmin, 1), + (np.nanmax, 4), +]) +def test_numpy_ops_np_version_under1p13(numpy_op, expected): + # GH8383 + result = numpy_op(pd.Series([1, 2, 3, 4])) + if _np_version_under1p13: + # bug for numpy < 1.13, where result is a series, should be a scalar + with pytest.raises(ValueError): + assert result == expected + else: + assert result == expected From 9e8794c6b28cf7fb4b17baeef1858683f72c2317 Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Mon, 19 Feb 2018 21:52:13 +0800 Subject: [PATCH 132/217] DOC: correct Period.strftime exsample (#19758) --- pandas/_libs/tslibs/period.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c11a8b149bc13..32ffe4e6d0453 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1403,7 +1403,7 @@ cdef class _Period(object): Examples -------- - >>> a = Period(freq='Q@JUL', year=2006, quarter=1) + >>> a = Period(freq='Q-JUL', year=2006, quarter=1) >>> a.strftime('%F-Q%q') '2006-Q1' >>> # Output the last month in the quarter of this date From 15232fdd728dfac4ef36c7a30cdb57710a0f2ec0 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 19 Feb 2018 10:38:41 -0500 Subject: [PATCH 133/217] ENH: fake http proxy in case of --skip-network testing (#19757) --- ci/script_multi.sh | 5 +++++ ci/script_single.sh | 5 +++++ pandas/tests/test_downstream.py | 2 ++ 3 files changed, 12 insertions(+) diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 6c354fc4cab0b..45c61ee3172fe 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -12,6 +12,11 @@ if [ -n "$LOCALE_OVERRIDE" ]; then python -c "$pycmd" fi +# Enforce absent network during testing by faking a proxy +if echo "$TEST_ARGS" | grep -e --skip-network -q; then + export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; +fi + # Workaround for pytest-xdist flaky collection order # https://github.com/pytest-dev/pytest/issues/920 # https://github.com/pytest-dev/pytest/issues/1075 diff --git a/ci/script_single.sh b/ci/script_single.sh index 74b0e897f1d73..021a5a7714fb5 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -16,6 +16,11 @@ if [ "$SLOW" ]; then TEST_ARGS="--only-slow --skip-network" fi +# Enforce absent network during testing by faking a proxy +if echo "$TEST_ARGS" | grep -e --skip-network -q; then + export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; +fi + if [ "$PIP_BUILD_TEST" ]; then echo "We are not running pytest as this is a build test." diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index b438d6a6137b0..a595d9f18d6b8 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -53,6 +53,7 @@ def test_xarray(df): assert df.to_xarray() is not None +@tm.network def test_statsmodels(): statsmodels = import_module('statsmodels') # noqa @@ -73,6 +74,7 @@ def test_scikit_learn(df): clf.predict(digits.data[-1:]) +@tm.network def test_seaborn(): seaborn = import_module('seaborn') From 505bf5e81be593ae5f3fe8c7c2395a866411a46d Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Tue, 20 Feb 2018 00:10:32 +0800 Subject: [PATCH 134/217] DOC: correct Panel.apply exsample (#19766) --- pandas/core/panel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 2cb80e938afb9..7f973992fb07f 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1020,7 +1020,7 @@ def apply(self, func, axis='major', **kwargs): Equivalent to previous: - >>> p.apply(lambda x: x.sum(), axis='minor') + >>> p.apply(lambda x: x.sum(), axis='major') Return the shapes of each DataFrame over axis 2 (i.e the shapes of items x major), as a Series From 81e2f761489d0695316854b2daba7e6dce86c7e5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 19 Feb 2018 08:45:30 -0800 Subject: [PATCH 135/217] split off scalar tests to submodules (#19752) --- pandas/tests/scalar/interval/__init__.py | 0 .../scalar/{ => interval}/test_interval.py | 0 pandas/tests/scalar/period/__init__.py | 0 .../tests/scalar/{ => period}/test_period.py | 0 .../scalar/{ => period}/test_period_asfreq.py | 0 pandas/tests/scalar/timedelta/__init__.py | 0 .../tests/scalar/timedelta/test_arithmetic.py | 422 ++++++++++++++++++ .../scalar/{ => timedelta}/test_timedelta.py | 164 +------ .../scalar/{ => timestamp}/test_timestamp.py | 0 9 files changed, 423 insertions(+), 163 deletions(-) create mode 100644 pandas/tests/scalar/interval/__init__.py rename pandas/tests/scalar/{ => interval}/test_interval.py (100%) create mode 100644 pandas/tests/scalar/period/__init__.py rename pandas/tests/scalar/{ => period}/test_period.py (100%) rename pandas/tests/scalar/{ => period}/test_period_asfreq.py (100%) create mode 100644 pandas/tests/scalar/timedelta/__init__.py create mode 100644 pandas/tests/scalar/timedelta/test_arithmetic.py rename pandas/tests/scalar/{ => timedelta}/test_timedelta.py (83%) rename pandas/tests/scalar/{ => timestamp}/test_timestamp.py (100%) diff --git a/pandas/tests/scalar/interval/__init__.py b/pandas/tests/scalar/interval/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/interval/test_interval.py similarity index 100% rename from pandas/tests/scalar/test_interval.py rename to pandas/tests/scalar/interval/test_interval.py diff --git a/pandas/tests/scalar/period/__init__.py b/pandas/tests/scalar/period/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/period/test_period.py similarity index 100% rename from pandas/tests/scalar/test_period.py rename to pandas/tests/scalar/period/test_period.py diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/period/test_period_asfreq.py similarity index 100% rename from pandas/tests/scalar/test_period_asfreq.py rename to pandas/tests/scalar/period/test_period_asfreq.py diff --git a/pandas/tests/scalar/timedelta/__init__.py b/pandas/tests/scalar/timedelta/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py new file mode 100644 index 0000000000000..90c911c24f6a9 --- /dev/null +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -0,0 +1,422 @@ +# -*- coding: utf-8 -*- +""" +Tests for scalar Timedelta arithmetic ops +""" +from datetime import datetime, timedelta +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.core import ops +from pandas import Timedelta, Timestamp, NaT + + +class TestTimedeltaAdditionSubtraction(object): + """ + Tests for Timedelta methods: + + __add__, __radd__, + __sub__, __rsub__ + """ + @pytest.mark.parametrize('ten_seconds', [ + Timedelta(10, unit='s'), + timedelta(seconds=10), + np.timedelta64(10, 's'), + np.timedelta64(10000000000, 'ns'), + pd.offsets.Second(10)]) + def test_td_add_sub_ten_seconds(self, ten_seconds): + # GH#6808 + base = Timestamp('20130101 09:01:12.123456') + expected_add = Timestamp('20130101 09:01:22.123456') + expected_sub = Timestamp('20130101 09:01:02.123456') + + result = base + ten_seconds + assert result == expected_add + + result = base - ten_seconds + assert result == expected_sub + + @pytest.mark.parametrize('one_day_ten_secs', [ + Timedelta('1 day, 00:00:10'), + Timedelta('1 days, 00:00:10'), + timedelta(days=1, seconds=10), + np.timedelta64(1, 'D') + np.timedelta64(10, 's'), + pd.offsets.Day() + pd.offsets.Second(10)]) + def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): + # GH#6808 + base = Timestamp('20130102 09:01:12.123456') + expected_add = Timestamp('20130103 09:01:22.123456') + expected_sub = Timestamp('20130101 09:01:02.123456') + + result = base + one_day_ten_secs + assert result == expected_add + + result = base - one_day_ten_secs + assert result == expected_sub + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_datetimelike_scalar(self, op): + # GH#19738 + td = Timedelta(10, unit='d') + + result = op(td, datetime(2016, 1, 1)) + if op is operator.add: + # datetime + Timedelta does _not_ call Timedelta.__radd__, + # so we get a datetime back instead of a Timestamp + assert isinstance(result, Timestamp) + assert result == Timestamp(2016, 1, 11) + + result = op(td, Timestamp('2018-01-12 18:09')) + assert isinstance(result, Timestamp) + assert result == Timestamp('2018-01-22 18:09') + + result = op(td, np.datetime64('2018-01-12')) + assert isinstance(result, Timestamp) + assert result == Timestamp('2018-01-22') + + result = op(td, NaT) + assert result is NaT + + with pytest.raises(TypeError): + op(td, 2) + with pytest.raises(TypeError): + op(td, 2.0) + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_td(self, op): + td = Timedelta(10, unit='d') + + result = op(td, Timedelta(days=10)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=20) + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_pytimedelta(self, op): + td = Timedelta(10, unit='d') + result = op(td, timedelta(days=9)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=19) + + @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_timedelta64(self, op): + td = Timedelta(10, unit='d') + result = op(td, np.timedelta64(-4, 'D')) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=6) + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_offset(self, op): + td = Timedelta(10, unit='d') + + result = op(td, pd.offsets.Hour(6)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=10, hours=6) + + def test_td_sub_td(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + result = td - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_pytimedelta(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + result = td - td.to_pytimedelta() + assert isinstance(result, Timedelta) + assert result == expected + + @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') + def test_td_sub_timedelta64(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + result = td - td.to_timedelta64() + assert isinstance(result, Timedelta) + # comparison fails even if we comment out the isinstance assertion + assert result == expected + + def test_td_sub_nat(self): + td = Timedelta(10, unit='d') + result = td - NaT + assert result is NaT + + @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') + def test_td_sub_td64_nat(self): + td = Timedelta(10, unit='d') + result = td - np.timedelta64('NaT') + assert result is NaT + + def test_td_sub_offset(self): + td = Timedelta(10, unit='d') + result = td - pd.offsets.Hour(1) + assert isinstance(result, Timedelta) + assert result == Timedelta(239, unit='h') + + def test_td_sub_numeric_raises(self): + td = td = Timedelta(10, unit='d') + with pytest.raises(TypeError): + td - 2 + with pytest.raises(TypeError): + td - 2.0 + + def test_td_rsub_pytimedelta(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + + result = td.to_pytimedelta() - td + assert isinstance(result, Timedelta) + assert result == expected + + @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') + def test_td_rsub_timedelta64(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + + result = td.to_timedelta64() - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_rsub_nat(self): + td = Timedelta(10, unit='d') + result = NaT - td + assert result is NaT + + result = np.datetime64('NaT') - td + assert result is NaT + + @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') + def test_td_rsub_td64_nat(self): + td = Timedelta(10, unit='d') + result = np.timedelta64('NaT') - td + assert result is NaT + + def test_td_rsub_offset(self): + result = pd.offsets.Hour(1) - Timedelta(10, unit='d') + assert isinstance(result, Timedelta) + assert result == Timedelta(-239, unit='h') + + def test_td_rsub_numeric_raises(self): + td = td = Timedelta(10, unit='d') + with pytest.raises(TypeError): + 2 - td + with pytest.raises(TypeError): + 2.0 - td + + +class TestTimedeltaMultiplicationDivision(object): + """ + Tests for Timedelta methods: + + __mul__, __rmul__, + __div__, __rdiv__, + __truediv__, __rtruediv__, + __floordiv__, __rfloordiv__, + __mod__, __rmod__, + __divmod__, __rdivmod__ + """ + + # --------------------------------------------------------------- + # Timedelta.__mul__, __rmul__ + + @pytest.mark.parametrize('op', [operator.mul, ops.rmul]) + def test_td_mul_scalar(self, op): + # GH#19738 + td = Timedelta(minutes=3) + + result = op(td, 2) + assert result == Timedelta(minutes=6) + + result = op(td, 1.5) + assert result == Timedelta(minutes=4, seconds=30) + + assert op(td, np.nan) is NaT + + assert op(-1, td).value == -1 * td.value + assert op(-1.0, td).value == -1.0 * td.value + + with pytest.raises(TypeError): + # timedelta * datetime is gibberish + op(td, Timestamp(2016, 1, 2)) + + with pytest.raises(TypeError): + # invalid multiply with another timedelta + op(td, td) + + # --------------------------------------------------------------- + # Timedelta.__div__, __truediv__ + + def test_td_div_timedeltalike_scalar(self): + # GH#19738 + td = Timedelta(10, unit='d') + + result = td / pd.offsets.Hour(1) + assert result == 240 + + assert td / td == 1 + assert td / np.timedelta64(60, 'h') == 4 + + assert np.isnan(td / NaT) + + def test_td_div_numeric_scalar(self): + # GH#19738 + td = Timedelta(10, unit='d') + + result = td / 2 + assert isinstance(result, Timedelta) + assert result == Timedelta(days=5) + + result = td / 5.0 + assert isinstance(result, Timedelta) + assert result == Timedelta(days=2) + + # --------------------------------------------------------------- + # Timedelta.__rdiv__ + + def test_td_rdiv_timedeltalike_scalar(self): + # GH#19738 + td = Timedelta(10, unit='d') + result = pd.offsets.Hour(1) / td + assert result == 1 / 240.0 + + assert np.timedelta64(60, 'h') / td == 0.25 + + # --------------------------------------------------------------- + # Timedelta.__floordiv__ + + def test_td_floordiv_timedeltalike_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + scalar = Timedelta(hours=3, minutes=3) + + assert td // scalar == 1 + assert -td // scalar.to_pytimedelta() == -2 + assert (2 * td) // scalar.to_timedelta64() == 2 + + def test_td_floordiv_null_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + + assert td // np.nan is NaT + assert np.isnan(td // NaT) + assert np.isnan(td // np.timedelta64('NaT')) + + def test_td_floordiv_invalid_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + + with pytest.raises(TypeError): + td // np.datetime64('2016-01-01', dtype='datetime64[us]') + + def test_td_floordiv_numeric_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + + expected = Timedelta(hours=1, minutes=32) + assert td // 2 == expected + assert td // 2.0 == expected + assert td // np.float64(2.0) == expected + assert td // np.int32(2.0) == expected + assert td // np.uint8(2.0) == expected + + def test_floordiv_timedeltalike_array(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + scalar = Timedelta(hours=3, minutes=3) + + # Array-like others + assert td // np.array(scalar.to_timedelta64()) == 1 + + res = (3 * td) // np.array([scalar.to_timedelta64()]) + expected = np.array([3], dtype=np.int64) + tm.assert_numpy_array_equal(res, expected) + + res = (10 * td) // np.array([scalar.to_timedelta64(), + np.timedelta64('NaT')]) + expected = np.array([10, np.nan]) + tm.assert_numpy_array_equal(res, expected) + + def test_td_floordiv_numeric_series(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + ser = pd.Series([1], dtype=np.int64) + res = td // ser + assert res.dtype.kind == 'm' + + # --------------------------------------------------------------- + # Timedelta.__rfloordiv__ + + def test_td_rfloordiv_timedeltalike_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + scalar = Timedelta(hours=3, minutes=4) + + # scalar others + # x // Timedelta is defined only for timedelta-like x. int-like, + # float-like, and date-like, in particular, should all either + # a) raise TypeError directly or + # b) return NotImplemented, following which the reversed + # operation will raise TypeError. + assert td.__rfloordiv__(scalar) == 1 + assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2 + assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0 + + def test_td_rfloordiv_null_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + + assert np.isnan(td.__rfloordiv__(NaT)) + assert np.isnan(td.__rfloordiv__(np.timedelta64('NaT'))) + + def test_td_rfloordiv_invalid_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + + dt64 = np.datetime64('2016-01-01', dtype='datetime64[us]') + with pytest.raises(TypeError): + td.__rfloordiv__(dt64) + + def test_td_rfloordiv_numeric_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + + assert td.__rfloordiv__(np.nan) is NotImplemented + assert td.__rfloordiv__(3.5) is NotImplemented + assert td.__rfloordiv__(2) is NotImplemented + + with pytest.raises(TypeError): + td.__rfloordiv__(np.float64(2.0)) + with pytest.raises(TypeError): + td.__rfloordiv__(np.int32(2.0)) + with pytest.raises(TypeError): + td.__rfloordiv__(np.uint8(9)) + + def test_td_rfloordiv_timedeltalike_array(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + scalar = Timedelta(hours=3, minutes=4) + + # Array-like others + assert td.__rfloordiv__(np.array(scalar.to_timedelta64())) == 1 + + res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()])) + expected = np.array([3], dtype=np.int64) + tm.assert_numpy_array_equal(res, expected) + + arr = np.array([(10 * scalar).to_timedelta64(), + np.timedelta64('NaT')]) + res = td.__rfloordiv__(arr) + expected = np.array([10, np.nan]) + tm.assert_numpy_array_equal(res, expected) + + def test_td_rfloordiv_numeric_series(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + ser = pd.Series([1], dtype=np.int64) + res = td.__rfloordiv__(ser) + assert res is NotImplemented + with pytest.raises(TypeError): + ser // td diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py similarity index 83% rename from pandas/tests/scalar/test_timedelta.py rename to pandas/tests/scalar/timedelta/test_timedelta.py index 667266be2a89b..420b66b4ce0dc 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -74,46 +74,6 @@ class Other: assert td.__mul__(other) is NotImplemented assert td.__floordiv__(other) is NotImplemented - def test_timedelta_ops_scalar(self): - # GH 6808 - base = pd.to_datetime('20130101 09:01:12.123456') - expected_add = pd.to_datetime('20130101 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), - np.timedelta64(10, 's'), - np.timedelta64(10000000000, 'ns'), - pd.offsets.Second(10)]: - result = base + offset - assert result == expected_add - - result = base - offset - assert result == expected_sub - - base = pd.to_datetime('20130102 09:01:12.123456') - expected_add = pd.to_datetime('20130103 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta('1 day, 00:00:10'), - pd.to_timedelta('1 days, 00:00:10'), - timedelta(days=1, seconds=10), - np.timedelta64(1, 'D') + np.timedelta64(10, 's'), - pd.offsets.Day() + pd.offsets.Second(10)]: - result = base + offset - assert result == expected_add - - result = base - offset - assert result == expected_sub - - def test_ops_offsets(self): - td = Timedelta(10, unit='d') - assert Timedelta(241, unit='h') == td + pd.offsets.Hour(1) - assert Timedelta(241, unit='h') == pd.offsets.Hour(1) + td - assert 240 == td / pd.offsets.Hour(1) - assert 1 / 240.0 == pd.offsets.Hour(1) / td - assert Timedelta(239, unit='h') == td - pd.offsets.Hour(1) - assert Timedelta(-239, unit='h') == pd.offsets.Hour(1) - td - def test_unary_ops(self): td = Timedelta(10, unit='d') @@ -129,130 +89,8 @@ def test_unary_ops(self): def test_binary_ops_nat(self): td = Timedelta(10, unit='d') - - assert (td - pd.NaT) is pd.NaT - assert (td + pd.NaT) is pd.NaT + # FIXME: The next test is wrong: td * NaT should raise assert (td * pd.NaT) is pd.NaT - assert (td / pd.NaT) is np.nan - assert (td // pd.NaT) is np.nan - assert (td // np.timedelta64('NaT')) is np.nan - - def test_binary_ops_integers(self): - td = Timedelta(10, unit='d') - - assert td * 2 == Timedelta(20, unit='d') - assert td / 2 == Timedelta(5, unit='d') - assert td // 2 == Timedelta(5, unit='d') - - # invert - assert td * -1 == Timedelta('-10d') - assert -1 * td == Timedelta('-10d') - - # can't operate with integers - pytest.raises(TypeError, lambda: td + 2) - pytest.raises(TypeError, lambda: td - 2) - - def test_binary_ops_with_timedelta(self): - td = Timedelta(10, unit='d') - - assert td - td == Timedelta(0, unit='ns') - assert td + td == Timedelta(20, unit='d') - assert td / td == 1 - - # invalid multiply with another timedelta - pytest.raises(TypeError, lambda: td * td) - - def test_floordiv(self): - # GH#18846 - td = Timedelta(hours=3, minutes=4) - scalar = Timedelta(hours=3, minutes=3) - - # scalar others - assert td // scalar == 1 - assert -td // scalar.to_pytimedelta() == -2 - assert (2 * td) // scalar.to_timedelta64() == 2 - - assert td // np.nan is pd.NaT - assert np.isnan(td // pd.NaT) - assert np.isnan(td // np.timedelta64('NaT')) - - with pytest.raises(TypeError): - td // np.datetime64('2016-01-01', dtype='datetime64[us]') - - expected = Timedelta(hours=1, minutes=32) - assert td // 2 == expected - assert td // 2.0 == expected - assert td // np.float64(2.0) == expected - assert td // np.int32(2.0) == expected - assert td // np.uint8(2.0) == expected - - # Array-like others - assert td // np.array(scalar.to_timedelta64()) == 1 - - res = (3 * td) // np.array([scalar.to_timedelta64()]) - expected = np.array([3], dtype=np.int64) - tm.assert_numpy_array_equal(res, expected) - - res = (10 * td) // np.array([scalar.to_timedelta64(), - np.timedelta64('NaT')]) - expected = np.array([10, np.nan]) - tm.assert_numpy_array_equal(res, expected) - - ser = pd.Series([1], dtype=np.int64) - res = td // ser - assert res.dtype.kind == 'm' - - def test_rfloordiv(self): - # GH#18846 - td = Timedelta(hours=3, minutes=3) - scalar = Timedelta(hours=3, minutes=4) - - # scalar others - # x // Timedelta is defined only for timedelta-like x. int-like, - # float-like, and date-like, in particular, should all either - # a) raise TypeError directly or - # b) return NotImplemented, following which the reversed - # operation will raise TypeError. - assert td.__rfloordiv__(scalar) == 1 - assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2 - assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0 - - assert np.isnan(td.__rfloordiv__(pd.NaT)) - assert np.isnan(td.__rfloordiv__(np.timedelta64('NaT'))) - - dt64 = np.datetime64('2016-01-01', dtype='datetime64[us]') - with pytest.raises(TypeError): - td.__rfloordiv__(dt64) - - assert td.__rfloordiv__(np.nan) is NotImplemented - assert td.__rfloordiv__(3.5) is NotImplemented - assert td.__rfloordiv__(2) is NotImplemented - - with pytest.raises(TypeError): - td.__rfloordiv__(np.float64(2.0)) - with pytest.raises(TypeError): - td.__rfloordiv__(np.int32(2.0)) - with pytest.raises(TypeError): - td.__rfloordiv__(np.uint8(9)) - - # Array-like others - assert td.__rfloordiv__(np.array(scalar.to_timedelta64())) == 1 - - res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()])) - expected = np.array([3], dtype=np.int64) - tm.assert_numpy_array_equal(res, expected) - - arr = np.array([(10 * scalar).to_timedelta64(), - np.timedelta64('NaT')]) - res = td.__rfloordiv__(arr) - expected = np.array([10, np.nan]) - tm.assert_numpy_array_equal(res, expected) - - ser = pd.Series([1], dtype=np.int64) - res = td.__rfloordiv__(ser) - assert res is NotImplemented - with pytest.raises(TypeError): - ser // td class TestTimedeltaComparison(object): diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py similarity index 100% rename from pandas/tests/scalar/test_timestamp.py rename to pandas/tests/scalar/timestamp/test_timestamp.py From ea14495eddd02d13b601043a47ffb8c850cadd1f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 19 Feb 2018 18:05:14 -0500 Subject: [PATCH 136/217] CI: remove PIP & old conda build in favor of pandas-ci buildsx (#19775) --- .travis.yml | 17 ++------------ ci/install_travis.sh | 28 ++---------------------- ci/requirements-3.6_PIP_BUILD_TEST.build | 6 ----- ci/requirements-3.6_PIP_BUILD_TEST.pip | 6 ----- ci/requirements-3.6_PIP_BUILD_TEST.sh | 7 ------ ci/script_multi.sh | 18 +-------------- ci/script_single.sh | 5 +---- 7 files changed, 6 insertions(+), 81 deletions(-) delete mode 100644 ci/requirements-3.6_PIP_BUILD_TEST.build delete mode 100644 ci/requirements-3.6_PIP_BUILD_TEST.pip delete mode 100644 ci/requirements-3.6_PIP_BUILD_TEST.sh diff --git a/.travis.yml b/.travis.yml index b1168f18315c3..22ef6c819c6d4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -50,9 +50,6 @@ matrix: packages: - python-gtk2 # In allow_failures - - dist: trusty - env: - - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" - dist: trusty env: - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true COVERAGE=true @@ -63,36 +60,26 @@ matrix: # In allow_failures - dist: trusty env: - - JOB="3.6_PIP_BUILD_TEST" TEST_ARGS="--skip-slow" PIP_BUILD_TEST=true + - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" addons: apt: packages: - xsel # In allow_failures - - dist: trusty - env: - - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - # In allow_failures - dist: trusty env: - JOB="3.6_DOC" DOC=true allow_failures: - - dist: trusty - env: - - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" - dist: trusty env: - JOB="2.7_SLOW" SLOW=true - dist: trusty env: - - JOB="3.6_PIP_BUILD_TEST" TEST_ARGS="--skip-slow" PIP_BUILD_TEST=true + - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" addons: apt: packages: - xsel - - dist: trusty - env: - - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - dist: trusty env: - JOB="3.6_DOC" DOC=true diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 458ff083b65eb..9ccb4baf25505 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -115,15 +115,6 @@ if [ "$COVERAGE" ]; then pip install coverage pytest-cov fi -echo -if [ -z "$PIP_BUILD_TEST" ] ; then - - # build but don't install - echo "[build em]" - time python setup.py build_ext --inplace || exit 1 - -fi - # we may have run installations echo echo "[conda installs]" @@ -161,23 +152,8 @@ conda list pandas pip list --format columns |grep pandas # build and install -echo - -if [ "$PIP_BUILD_TEST" ]; then - - # build & install testing - echo "[building release]" - time bash scripts/build_dist_for_release.sh || exit 1 - conda uninstall -y cython - time pip install dist/*tar.gz || exit 1 - -else - - # install our pandas - echo "[running setup.py develop]" - python setup.py develop || exit 1 - -fi +echo "[running setup.py develop]" +python setup.py develop || exit 1 echo echo "[show pandas]" diff --git a/ci/requirements-3.6_PIP_BUILD_TEST.build b/ci/requirements-3.6_PIP_BUILD_TEST.build deleted file mode 100644 index 1c4b46aea3865..0000000000000 --- a/ci/requirements-3.6_PIP_BUILD_TEST.build +++ /dev/null @@ -1,6 +0,0 @@ -python=3.6* -python-dateutil -pytz -nomkl -numpy -cython diff --git a/ci/requirements-3.6_PIP_BUILD_TEST.pip b/ci/requirements-3.6_PIP_BUILD_TEST.pip deleted file mode 100644 index f4617133cad5b..0000000000000 --- a/ci/requirements-3.6_PIP_BUILD_TEST.pip +++ /dev/null @@ -1,6 +0,0 @@ -xarray -geopandas -seaborn -pandas_datareader -statsmodels -scikit-learn diff --git a/ci/requirements-3.6_PIP_BUILD_TEST.sh b/ci/requirements-3.6_PIP_BUILD_TEST.sh deleted file mode 100644 index 3a8cf673b32f2..0000000000000 --- a/ci/requirements-3.6_PIP_BUILD_TEST.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -source activate pandas - -echo "install 36 PIP_BUILD_TEST" - -conda install -n pandas -c conda-forge pyarrow dask pyqt qtpy diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 45c61ee3172fe..2b2d4d5488b91 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -23,23 +23,7 @@ fi export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') echo PYTHONHASHSEED=$PYTHONHASHSEED -if [ "$PIP_BUILD_TEST" ] ; then - echo "[build-test]" - - echo "[env]" - pip list --format columns |grep pandas - - echo "[running]" - cd /tmp - unset PYTHONPATH - - echo "[build-test: single]" - python -c 'import pandas; pandas.test(["--skip-slow", "--skip-network", "-r xX", "-m single"])' - - echo "[build-test: not single]" - python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])' - -elif [ "$DOC" ]; then +if [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" elif [ "$COVERAGE" ]; then diff --git a/ci/script_single.sh b/ci/script_single.sh index 021a5a7714fb5..f376c920ac71b 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -21,10 +21,7 @@ if echo "$TEST_ARGS" | grep -e --skip-network -q; then export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; fi -if [ "$PIP_BUILD_TEST" ]; then - echo "We are not running pytest as this is a build test." - -elif [ "$DOC" ]; then +if [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" elif [ "$COVERAGE" ]; then From f83893cf45e7489fb13913dcb70515c2a38dff9f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 19 Feb 2018 15:16:04 -0800 Subject: [PATCH 137/217] ENH: implement Timedelta.__mod__ and __divmod__ (#19755) --- doc/source/timedeltas.rst | 14 ++ doc/source/whatsnew/v0.23.0.txt | 14 ++ pandas/_libs/tslibs/timedeltas.pyx | 18 ++ .../tests/scalar/timedelta/test_arithmetic.py | 186 ++++++++++++++++++ 4 files changed, 232 insertions(+) diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 50cff4c7bbdfb..5f3a01f0725d4 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -283,6 +283,20 @@ Rounded division (floor-division) of a ``timedelta64[ns]`` Series by a scalar td // pd.Timedelta(days=3, hours=4) pd.Timedelta(days=3, hours=4) // td +.. _timedeltas.mod_divmod: + +The mod (%) and divmod operations are defined for ``Timedelta`` when operating with another timedelta-like or with a numeric argument. + +.. ipython:: python + + pd.Timedelta(hours=37) % datetime.timedelta(hours=2) + + # divmod against a timedelta-like returns a pair (int, Timedelta) + divmod(datetime.timedelta(hours=2), pd.Timedelta(minutes=11)) + + # divmod against a numeric returns a pair (Timedelta, Timedelta) + divmod(pd.Timedelta(hours=25), 86400000000000) + Attributes ---------- diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 11c49995372f5..aa1e434aae6e9 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -117,6 +117,20 @@ resetting indexes. See the :ref:`Sorting by Indexes and Values # Sort by 'second' (index) and 'A' (column) df_multi.sort_values(by=['second', 'A']) +.. _whatsnew_0230.enhancements.timedelta_mod + +Timedelta mod method +^^^^^^^^^^^^^^^^^^^^ + +``mod`` (%) and ``divmod`` operations are now defined on ``Timedelta`` objects +when operating with either timedelta-like or with numeric arguments. +See the :ref:`documentation here `. (:issue:`19365`) + +.. ipython:: python + + td = pd.Timedelta(hours=37) + td % pd.Timedelta(minutes=45) + .. _whatsnew_0230.enhancements.ran_inf: ``.rank()`` handles ``inf`` values when ``NaN`` are present diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 37693068e0974..f10175fddd00b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1149,6 +1149,24 @@ class Timedelta(_Timedelta): return np.nan return other.value // self.value + def __mod__(self, other): + # Naive implementation, room for optimization + return self.__divmod__(other)[1] + + def __rmod__(self, other): + # Naive implementation, room for optimization + return self.__rdivmod__(other)[1] + + def __divmod__(self, other): + # Naive implementation, room for optimization + div = self // other + return div, self - div * other + + def __rdivmod__(self, other): + # Naive implementation, room for optimization + div = other // self + return div, other - div * self + cdef _floordiv(int64_t value, right): return value // right diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 90c911c24f6a9..43e9491b9de0b 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -420,3 +420,189 @@ def test_td_rfloordiv_numeric_series(self): assert res is NotImplemented with pytest.raises(TypeError): ser // td + + def test_mod_timedeltalike(self): + # GH#19365 + td = Timedelta(hours=37) + + # Timedelta-like others + result = td % Timedelta(hours=6) + assert isinstance(result, Timedelta) + assert result == Timedelta(hours=1) + + result = td % timedelta(minutes=60) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + result = td % NaT + assert result is NaT + + @pytest.mark.xfail(reason='GH#19378 floordiv td64 returns td64') + def test_mod_timedelta64_nat(self): + # GH#19365 + td = Timedelta(hours=37) + + result = td % np.timedelta64('NaT', 'ns') + assert result is NaT + + @pytest.mark.xfail(reason='GH#19378 floordiv td64 returns td64') + def test_mod_timedelta64(self): + # GH#19365 + td = Timedelta(hours=37) + + result = td % np.timedelta64(2, 'h') + assert isinstance(result, Timedelta) + assert result == Timedelta(hours=1) + + @pytest.mark.xfail(reason='GH#19378 floordiv by Tick not implemented') + def test_mod_offset(self): + # GH#19365 + td = Timedelta(hours=37) + + result = td % pd.offsets.Hour(5) + assert isinstance(result, Timedelta) + assert result == Timedelta(hours=2) + + # ---------------------------------------------------------------- + # Timedelta.__mod__, __rmod__ + + def test_mod_numeric(self): + # GH#19365 + td = Timedelta(hours=37) + + # Numeric Others + result = td % 2 + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + result = td % 1e12 + assert isinstance(result, Timedelta) + assert result == Timedelta(minutes=3, seconds=20) + + result = td % int(1e12) + assert isinstance(result, Timedelta) + assert result == Timedelta(minutes=3, seconds=20) + + def test_mod_invalid(self): + # GH#19365 + td = Timedelta(hours=37) + + with pytest.raises(TypeError): + td % pd.Timestamp('2018-01-22') + + with pytest.raises(TypeError): + td % [] + + def test_rmod_pytimedelta(self): + # GH#19365 + td = Timedelta(minutes=3) + + result = timedelta(minutes=4) % td + assert isinstance(result, Timedelta) + assert result == Timedelta(minutes=1) + + @pytest.mark.xfail(reason='GH#19378 floordiv by Tick not implemented') + def test_rmod_timedelta64(self): + # GH#19365 + td = Timedelta(minutes=3) + result = np.timedelta64(5, 'm') % td + assert isinstance(result, Timedelta) + assert result == Timedelta(minutes=2) + + def test_rmod_invalid(self): + # GH#19365 + td = Timedelta(minutes=3) + + with pytest.raises(TypeError): + pd.Timestamp('2018-01-22') % td + + with pytest.raises(TypeError): + 15 % td + + with pytest.raises(TypeError): + 16.0 % td + + with pytest.raises(TypeError): + np.array([22, 24]) % td + + # ---------------------------------------------------------------- + # Timedelta.__divmod__, __rdivmod__ + + def test_divmod_numeric(self): + # GH#19365 + td = Timedelta(days=2, hours=6) + + result = divmod(td, 53 * 3600 * 1e9) + assert result[0] == Timedelta(1, unit='ns') + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=1) + + assert result + result = divmod(td, np.nan) + assert result[0] is pd.NaT + assert result[1] is pd.NaT + + def test_divmod(self): + # GH#19365 + td = Timedelta(days=2, hours=6) + + result = divmod(td, timedelta(days=1)) + assert result[0] == 2 + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=6) + + result = divmod(td, 54) + assert result[0] == Timedelta(hours=1) + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(0) + + result = divmod(td, pd.NaT) + assert np.isnan(result[0]) + assert result[1] is pd.NaT + + @pytest.mark.xfail(reason='GH#19378 floordiv by Tick not implemented') + def test_divmod_offset(self): + # GH#19365 + td = Timedelta(days=2, hours=6) + + result = divmod(td, pd.offsets.Hour(-4)) + assert result[0] == -14 + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=-2) + + def test_divmod_invalid(self): + # GH#19365 + td = Timedelta(days=2, hours=6) + + with pytest.raises(TypeError): + divmod(td, pd.Timestamp('2018-01-22')) + + def test_rdivmod_pytimedelta(self): + # GH#19365 + result = divmod(timedelta(days=2, hours=6), Timedelta(days=1)) + assert result[0] == 2 + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=6) + + @pytest.mark.xfail(reason='GH#19378 floordiv by Tick not implemented') + def test_rdivmod_offset(self): + result = divmod(pd.offsets.Hour(54), Timedelta(hours=-4)) + assert result[0] == -14 + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=-2) + + def test_rdivmod_invalid(self): + # GH#19365 + td = Timedelta(minutes=3) + + with pytest.raises(TypeError): + divmod(pd.Timestamp('2018-01-22'), td) + + with pytest.raises(TypeError): + divmod(15, td) + + with pytest.raises(TypeError): + divmod(16.0, td) + + with pytest.raises(TypeError): + divmod(np.array([22, 24]), td) From 5b931a22d152491cabe81f73f8fc53aec95bdeec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 19 Feb 2018 15:16:36 -0800 Subject: [PATCH 138/217] Fix Timedelta floordiv, rfloordiv with offset, fix td64 return types (#19770) --- doc/source/whatsnew/v0.23.0.txt | 2 ++ pandas/_libs/tslibs/timedeltas.pyx | 16 +++++++++++++++- .../tests/scalar/timedelta/test_arithmetic.py | 18 +++++++++++------- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index aa1e434aae6e9..2f820043d7b6f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -730,6 +730,8 @@ Datetimelike - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) +- Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) +- Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) - Timezones diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f10175fddd00b..4483225e1801d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -478,11 +478,16 @@ def _binary_op_method_timedeltalike(op, name): elif other is NaT: return NaT + elif is_timedelta64_object(other): + # convert to Timedelta below; avoid catching this in + # has-dtype check before then + pass + elif is_datetime64_object(other) or PyDateTime_CheckExact(other): # the PyDateTime_CheckExact case is for a datetime object that # is specifically *not* a Timestamp, as the Timestamp case will be # handled after `_validate_ops_compat` returns False below - from ..tslib import Timestamp + from timestamps import Timestamp return op(self, Timestamp(other)) # We are implicitly requiring the canonical behavior to be # defined by Timestamp methods. @@ -503,6 +508,9 @@ def _binary_op_method_timedeltalike(op, name): # failed to parse as timedelta return NotImplemented + if other is NaT: + # e.g. if original other was timedelta64('NaT') + return NaT return Timedelta(op(self.value, other.value), unit='ns') f.__name__ = name @@ -1096,6 +1104,9 @@ class Timedelta(_Timedelta): # just defer if hasattr(other, '_typ'): # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return self // other.delta return NotImplemented if hasattr(other, 'dtype'): @@ -1128,6 +1139,9 @@ class Timedelta(_Timedelta): # just defer if hasattr(other, '_typ'): # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return other.delta // self return NotImplemented if hasattr(other, 'dtype'): diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 43e9491b9de0b..48da23f3575ab 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -100,7 +100,6 @@ def test_td_add_pytimedelta(self, op): assert isinstance(result, Timedelta) assert result == Timedelta(days=19) - @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') @pytest.mark.parametrize('op', [operator.add, ops.radd]) def test_td_add_timedelta64(self, op): td = Timedelta(10, unit='d') @@ -130,13 +129,11 @@ def test_td_sub_pytimedelta(self): assert isinstance(result, Timedelta) assert result == expected - @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') def test_td_sub_timedelta64(self): td = Timedelta(10, unit='d') expected = Timedelta(0, unit='ns') result = td - td.to_timedelta64() assert isinstance(result, Timedelta) - # comparison fails even if we comment out the isinstance assertion assert result == expected def test_td_sub_nat(self): @@ -144,7 +141,6 @@ def test_td_sub_nat(self): result = td - NaT assert result is NaT - @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') def test_td_sub_td64_nat(self): td = Timedelta(10, unit='d') result = td - np.timedelta64('NaT') @@ -171,7 +167,6 @@ def test_td_rsub_pytimedelta(self): assert isinstance(result, Timedelta) assert result == expected - @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') def test_td_rsub_timedelta64(self): td = Timedelta(10, unit='d') expected = Timedelta(0, unit='ns') @@ -188,7 +183,6 @@ def test_td_rsub_nat(self): result = np.datetime64('NaT') - td assert result is NaT - @pytest.mark.xfail(reason='GH#19738 argument not converted to Timedelta') def test_td_rsub_td64_nat(self): td = Timedelta(10, unit='d') result = np.timedelta64('NaT') - td @@ -304,6 +298,12 @@ def test_td_floordiv_null_scalar(self): assert np.isnan(td // NaT) assert np.isnan(td // np.timedelta64('NaT')) + def test_td_floordiv_offsets(self): + # GH#19738 + td = Timedelta(hours=3, minutes=4) + assert td // pd.offsets.Hour(1) == 3 + assert td // pd.offsets.Minute(2) == 92 + def test_td_floordiv_invalid_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=4) @@ -322,7 +322,7 @@ def test_td_floordiv_numeric_scalar(self): assert td // np.int32(2.0) == expected assert td // np.uint8(2.0) == expected - def test_floordiv_timedeltalike_array(self): + def test_td_floordiv_timedeltalike_array(self): # GH#18846 td = Timedelta(hours=3, minutes=4) scalar = Timedelta(hours=3, minutes=3) @@ -371,6 +371,10 @@ def test_td_rfloordiv_null_scalar(self): assert np.isnan(td.__rfloordiv__(NaT)) assert np.isnan(td.__rfloordiv__(np.timedelta64('NaT'))) + def test_td_rfloordiv_offsets(self): + # GH#19738 + assert pd.offsets.Hour(1) // Timedelta(minutes=25) == 2 + def test_td_rfloordiv_invalid_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=3) From 30f9b185ade79b7898dd69d1ed452cf36daaf798 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 19 Feb 2018 15:19:02 -0800 Subject: [PATCH 139/217] Reduce redirection in ops (#19649) --- pandas/core/ops.py | 228 ++++++++++++++++++++++++----------- pandas/core/panel.py | 3 +- pandas/core/sparse/array.py | 4 +- pandas/core/sparse/series.py | 4 +- 4 files changed, 159 insertions(+), 80 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index dff2b6844af94..da65f1f31ed2a 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -208,6 +208,78 @@ def _get_frame_op_default_axis(name): return 'columns' +def _get_opstr(op, cls): + """ + Find the operation string, if any, to pass to numexpr for this + operation. + + Parameters + ---------- + op : binary operator + cls : class + + Returns + ------- + op_str : string or None + """ + # numexpr is available for non-sparse classes + subtyp = getattr(cls, '_subtyp', '') + use_numexpr = 'sparse' not in subtyp + + if not use_numexpr: + # if we're not using numexpr, then don't pass a str_rep + return None + + return {operator.add: '+', + radd: '+', + operator.mul: '*', + rmul: '*', + operator.sub: '-', + rsub: '-', + operator.truediv: '/', + rtruediv: '/', + operator.floordiv: '//', + rfloordiv: '//', + operator.mod: None, # TODO: Why None for mod but '%' for rmod? + rmod: '%', + operator.pow: '**', + rpow: '**', + operator.eq: '==', + operator.ne: '!=', + operator.le: '<=', + operator.lt: '<', + operator.ge: '>=', + operator.gt: '>', + operator.and_: '&', + rand_: '&', + operator.or_: '|', + ror_: '|', + operator.xor: '^', + rxor: '^', + divmod: None, + rdivmod: None}[op] + + +def _get_op_name(op, special): + """ + Find the name to attach to this method according to conventions + for special and non-special methods. + + Parameters + ---------- + op : binary operator + special : bool + + Returns + ------- + op_name : str + """ + opname = op.__name__.strip('_') + if special: + opname = '__{opname}__'.format(opname=opname) + return opname + + # ----------------------------------------------------------------------------- # Docstring Generation and Templates @@ -501,48 +573,29 @@ def _create_methods(cls, arith_method, comp_method, bool_method, # creates actual methods based upon arithmetic, comp and bool method # constructors. - # numexpr is available for non-sparse classes - subtyp = getattr(cls, '_subtyp', '') - use_numexpr = 'sparse' not in subtyp - have_divmod = issubclass(cls, ABCSeries) # divmod is available for Series and SparseSeries - # if we're not using numexpr, then don't pass a str_rep - if use_numexpr: - op = lambda x: x - else: - op = lambda x: None - if special: - - def names(x): - if x[-1] == "_": - return "__{name}_".format(name=x) - else: - return "__{name}__".format(name=x) - else: - names = lambda x: x - # yapf: disable new_methods = dict( - add=arith_method(operator.add, names('add'), op('+')), - radd=arith_method(radd, names('radd'), op('+')), - sub=arith_method(operator.sub, names('sub'), op('-')), - mul=arith_method(operator.mul, names('mul'), op('*')), - truediv=arith_method(operator.truediv, names('truediv'), op('/')), - floordiv=arith_method(operator.floordiv, names('floordiv'), op('//')), + add=arith_method(cls, operator.add, special), + radd=arith_method(cls, radd, special), + sub=arith_method(cls, operator.sub, special), + mul=arith_method(cls, operator.mul, special), + truediv=arith_method(cls, operator.truediv, special), + floordiv=arith_method(cls, operator.floordiv, special), # Causes a floating point exception in the tests when numexpr enabled, # so for now no speedup - mod=arith_method(operator.mod, names('mod'), None), - pow=arith_method(operator.pow, names('pow'), op('**')), + mod=arith_method(cls, operator.mod, special), + pow=arith_method(cls, operator.pow, special), # not entirely sure why this is necessary, but previously was included # so it's here to maintain compatibility - rmul=arith_method(operator.mul, names('rmul'), op('*')), - rsub=arith_method(rsub, names('rsub'), op('-')), - rtruediv=arith_method(rtruediv, names('rtruediv'), op('/')), - rfloordiv=arith_method(rfloordiv, names('rfloordiv'), op('//')), - rpow=arith_method(rpow, names('rpow'), op('**')), - rmod=arith_method(rmod, names('rmod'), op('%'))) + rmul=arith_method(cls, rmul, special), + rsub=arith_method(cls, rsub, special), + rtruediv=arith_method(cls, rtruediv, special), + rfloordiv=arith_method(cls, rfloordiv, special), + rpow=arith_method(cls, rpow, special), + rmod=arith_method(cls, rmod, special)) # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] @@ -550,26 +603,30 @@ def names(x): # Comp methods never had a default axis set if comp_method: new_methods.update(dict( - eq=comp_method(operator.eq, names('eq'), op('==')), - ne=comp_method(operator.ne, names('ne'), op('!=')), - lt=comp_method(operator.lt, names('lt'), op('<')), - gt=comp_method(operator.gt, names('gt'), op('>')), - le=comp_method(operator.le, names('le'), op('<=')), - ge=comp_method(operator.ge, names('ge'), op('>=')))) + eq=comp_method(cls, operator.eq, special), + ne=comp_method(cls, operator.ne, special), + lt=comp_method(cls, operator.lt, special), + gt=comp_method(cls, operator.gt, special), + le=comp_method(cls, operator.le, special), + ge=comp_method(cls, operator.ge, special))) if bool_method: new_methods.update( - dict(and_=bool_method(operator.and_, names('and_'), op('&')), - or_=bool_method(operator.or_, names('or_'), op('|')), + dict(and_=bool_method(cls, operator.and_, special), + or_=bool_method(cls, operator.or_, special), # For some reason ``^`` wasn't used in original. - xor=bool_method(operator.xor, names('xor'), op('^')), - rand_=bool_method(rand_, names('rand_'), op('&')), - ror_=bool_method(ror_, names('ror_'), op('|')), - rxor=bool_method(rxor, names('rxor'), op('^')))) + xor=bool_method(cls, operator.xor, special), + rand_=bool_method(cls, rand_, special), + ror_=bool_method(cls, ror_, special), + rxor=bool_method(cls, rxor, special))) if have_divmod: # divmod doesn't have an op that is supported by numexpr - new_methods['divmod'] = arith_method(divmod, names('divmod'), None) + new_methods['divmod'] = arith_method(cls, divmod, special) - new_methods = {names(k): v for k, v in new_methods.items()} + if special: + dunderize = lambda x: '__{name}__'.format(name=x.strip('_')) + else: + dunderize = lambda x: x + new_methods = {dunderize(k): v for k, v in new_methods.items()} return new_methods @@ -596,16 +653,15 @@ def add_special_arithmetic_methods(cls, arith_method=None, Parameters ---------- arith_method : function (optional) - factory for special arithmetic methods, with op string: - f(op, name, str_rep) + factory for special arithmetic methods: + f(cls, op, special) comp_method : function (optional) - factory for rich comparison - signature: f(op, name, str_rep) + factory for rich comparison - signature: f(cls, op, special) bool_method : function (optional) - factory for boolean methods - signature: f(op, name, str_rep) + factory for boolean methods - signature: f(cls, op, special) """ new_methods = _create_methods(cls, arith_method, comp_method, bool_method, special=True) - # inplace operators (I feel like these should get passed an `inplace=True` # or just be removed @@ -645,8 +701,7 @@ def f(self, other): add_methods(cls, new_methods=new_methods) -def add_flex_arithmetic_methods(cls, flex_arith_method, - flex_comp_method=None, flex_bool_method=None): +def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. @@ -654,13 +709,13 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, Parameters ---------- flex_arith_method : function - factory for flex arithmetic methods, with op string: - f(op, name, str_rep) + factory for flex arithmetic methods: + f(cls, op, special) flex_comp_method : function, optional, - factory for rich comparison - signature: f(op, name, str_rep) + factory for rich comparison - signature: f(cls, op, special) """ new_methods = _create_methods(cls, flex_arith_method, - flex_comp_method, flex_bool_method, + flex_comp_method, bool_method=None, special=False) new_methods.update(dict(multiply=new_methods['mul'], subtract=new_methods['sub'], @@ -719,11 +774,13 @@ def _construct_divmod_result(left, result, index, name, dtype): ) -def _arith_method_SERIES(op, name, str_rep): +def _arith_method_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ + str_rep = _get_opstr(op, cls) + name = _get_op_name(op, special) eval_kwargs = _gen_eval_kwargs(name) fill_zeros = _gen_fill_zeros(name) construct_result = (_construct_divmod_result @@ -856,11 +913,12 @@ def _comp_method_OBJECT_ARRAY(op, x, y): return result -def _comp_method_SERIES(op, name, str_rep): +def _comp_method_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ + name = _get_op_name(op, special) masker = _gen_eval_kwargs(name).get('masker', False) def na_op(x, y): @@ -995,7 +1053,7 @@ def wrapper(self, other, axis=None): return wrapper -def _bool_method_SERIES(op, name, str_rep): +def _bool_method_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. @@ -1066,7 +1124,8 @@ def wrapper(self, other): return wrapper -def _flex_method_SERIES(op, name, str_rep): +def _flex_method_SERIES(cls, op, special): + name = _get_op_name(op, special) doc = _make_flex_doc(name, 'series') @Appender(doc) @@ -1192,7 +1251,9 @@ def to_series(right): return right -def _arith_method_FRAME(op, name, str_rep=None): +def _arith_method_FRAME(cls, op, special): + str_rep = _get_opstr(op, cls) + name = _get_op_name(op, special) eval_kwargs = _gen_eval_kwargs(name) fill_zeros = _gen_fill_zeros(name) default_axis = _get_frame_op_default_axis(name) @@ -1270,7 +1331,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f -def _flex_comp_method_FRAME(op, name, str_rep=None): +def _flex_comp_method_FRAME(cls, op, special): + str_rep = _get_opstr(op, cls) + name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(name) def na_op(x, y): @@ -1306,7 +1369,10 @@ def f(self, other, axis=default_axis, level=None): return f -def _comp_method_FRAME(func, name, str_rep): +def _comp_method_FRAME(cls, func, special): + str_rep = _get_opstr(func, cls) + name = _get_op_name(func, special) + @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other): if isinstance(other, ABCDataFrame): @@ -1345,8 +1411,10 @@ def f(self, other): # ----------------------------------------------------------------------------- # Panel -def _arith_method_PANEL(op, name, str_rep=None): +def _arith_method_PANEL(cls, op, special): # work only for scalars + name = _get_op_name(op, special) + def f(self, other): if not is_scalar(other): raise ValueError('Simple arithmetic with {name} can only be ' @@ -1359,7 +1427,10 @@ def f(self, other): return f -def _comp_method_PANEL(op, name, str_rep=None): +def _comp_method_PANEL(cls, op, special): + str_rep = _get_opstr(op, cls) + name = _get_op_name(op, special) + def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -1389,7 +1460,9 @@ def f(self, other, axis=None): return f -def _flex_method_PANEL(op, name, str_rep=None): +def _flex_method_PANEL(cls, op, special): + str_rep = _get_opstr(op, cls) + name = _get_op_name(op, special) eval_kwargs = _gen_eval_kwargs(name) fill_zeros = _gen_fill_zeros(name) @@ -1427,18 +1500,19 @@ def f(self, other, axis=0): comp_method=_comp_method_PANEL, bool_method=_arith_method_PANEL) +panel_flex_funcs = dict(flex_arith_method=_flex_method_PANEL, + flex_comp_method=_comp_method_PANEL) # ----------------------------------------------------------------------------- # Sparse -def _arith_method_SPARSE_SERIES(op, name, str_rep=None): +def _arith_method_SPARSE_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. - - str_rep is not used, but is present for compatibility. """ + name = _get_op_name(op, special) def wrapper(self, other): if isinstance(other, ABCDataFrame): @@ -1476,11 +1550,12 @@ def _sparse_series_op(left, right, op, name): return left._constructor(result, index=new_index, name=new_name) -def _arith_method_SPARSE_ARRAY(op, name, str_rep=None): +def _arith_method_SPARSE_ARRAY(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ + name = _get_op_name(op, special) def wrapper(self, other): from pandas.core.sparse.array import ( @@ -1508,3 +1583,12 @@ def wrapper(self, other): name = name[2:-2] wrapper.__name__ = name return wrapper + + +sparse_array_special_funcs = dict(arith_method=_arith_method_SPARSE_ARRAY, + comp_method=_arith_method_SPARSE_ARRAY, + bool_method=_arith_method_SPARSE_ARRAY) + +sparse_series_special_funcs = dict(arith_method=_arith_method_SPARSE_SERIES, + comp_method=_arith_method_SPARSE_SERIES, + bool_method=None) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 7f973992fb07f..3be1e3ef8734d 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1528,8 +1528,7 @@ def _extract_axis(self, data, axis=0, intersect=False): 'minor_axis': 'columns'}) ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) -ops.add_flex_arithmetic_methods(Panel, ops._flex_method_PANEL, - flex_comp_method=ops._comp_method_PANEL) +ops.add_flex_arithmetic_methods(Panel, **ops.panel_flex_funcs) Panel._add_numeric_operations() diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 3cbae717d0e07..4f7152666f7bf 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -844,6 +844,4 @@ def _make_index(length, indices, kind): ops.add_special_arithmetic_methods(SparseArray, - arith_method=ops._arith_method_SPARSE_ARRAY, - comp_method=ops._arith_method_SPARSE_ARRAY, - bool_method=ops._arith_method_SPARSE_ARRAY) + **ops.sparse_array_special_funcs) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 62a467bec2683..335a4c80adc63 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -817,6 +817,4 @@ def from_coo(cls, A, dense_index=False): # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, - ops._arith_method_SPARSE_SERIES, - comp_method=ops._arith_method_SPARSE_SERIES, - bool_method=None) + **ops.sparse_series_special_funcs) From 63fc36a0457d1d20adac0a10c2affa5f820b7483 Mon Sep 17 00:00:00 2001 From: Matt Kirk Date: Tue, 20 Feb 2018 06:38:42 +0700 Subject: [PATCH 140/217] Fix the non cython build for cpp extensions (#19707) --- setup.py | 96 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/setup.py b/setup.py index c66979dd19ef0..c7784260d79ca 100755 --- a/setup.py +++ b/setup.py @@ -311,7 +311,6 @@ class CheckSDist(sdist_class): 'pandas/_libs/missing.pyx', 'pandas/_libs/reduction.pyx', 'pandas/_libs/testing.pyx', - 'pandas/_libs/window.pyx', 'pandas/_libs/skiplist.pyx', 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', @@ -331,6 +330,10 @@ class CheckSDist(sdist_class): 'pandas/_libs/writers.pyx', 'pandas/io/sas/sas.pyx'] + _cpp_pyxfiles = ['pandas/_libs/window.pyx', + 'pandas/io/msgpack/_packer.pyx', + 'pandas/io/msgpack/_unpacker.pyx'] + def initialize_options(self): sdist_class.initialize_options(self) @@ -338,12 +341,17 @@ def run(self): if 'cython' in cmdclass: self.run_command('cython') else: - for pyxfile in self._pyxfiles: - cfile = pyxfile[:-3] + 'c' - msg = ("C-source file '{source}' not found.\n" - "Run 'setup.py cython' before sdist.".format( - source=cfile)) - assert os.path.isfile(cfile), msg + # If we are not running cython then + # compile the extensions correctly + pyx_files = [(self._pyxfiles, 'c'), (self._cpp_pyxfiles, 'cpp')] + + for pyxfiles, extension in pyx_files: + for pyxfile in pyxfiles: + sourcefile = pyxfile[:-3] + extension + msg = ("{extension}-source file '{source}' not found.\n" + "Run 'setup.py cython' before sdist.".format( + source=sourcefile, extension=extension)) + assert os.path.isfile(sourcefile), msg sdist_class.run(self) @@ -417,6 +425,11 @@ def get_tag(self): cmdclass['build_src'] = DummyBuildSrc cmdclass['build_ext'] = CheckingBuildExt +if sys.byteorder == 'big': + endian_macro = [('__BIG_ENDIAN__', '1')] +else: + endian_macro = [('__LITTLE_ENDIAN__', '1')] + lib_depends = ['inference'] @@ -453,6 +466,7 @@ def pxd(name): 'pandas/_libs/src/datetime/np_datetime_strings.h'] np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'] + tseries_depends = np_datetime_headers + ['pandas/_libs/tslibs/np_datetime.pxd'] # some linux distros require it @@ -618,17 +632,42 @@ def pxd(name): '_libs.window': { 'pyxfile': '_libs/window', 'pxdfiles': ['_libs/skiplist', '_libs/src/util'], - 'language': 'c++'}, + 'language': 'c++', + 'suffix': '.cpp'}, '_libs.writers': { 'pyxfile': '_libs/writers', 'pxdfiles': ['_libs/src/util']}, 'io.sas._sas': { - 'pyxfile': 'io/sas/sas'}} + 'pyxfile': 'io/sas/sas'}, + 'io.msgpack._packer': { + 'macros': endian_macro, + 'depends': ['pandas/_libs/src/msgpack/pack.h', + 'pandas/_libs/src/msgpack/pack_template.h'], + 'include': ['pandas/_libs/src/msgpack'] + common_include, + 'language': 'c++', + 'suffix': '.cpp', + 'pyxfile': 'io/msgpack/_packer', + 'subdir': 'io/msgpack'}, + 'io.msgpack._unpacker': { + 'depends': ['pandas/_libs/src/msgpack/unpack.h', + 'pandas/_libs/src/msgpack/unpack_define.h', + 'pandas/_libs/src/msgpack/unpack_template.h'], + 'macros': endian_macro, + 'include': ['pandas/_libs/src/msgpack'] + common_include, + 'language': 'c++', + 'suffix': '.cpp', + 'pyxfile': 'io/msgpack/_unpacker', + 'subdir': 'io/msgpack' + } +} extensions = [] for name, data in ext_data.items(): - sources = [srcpath(data['pyxfile'], suffix=suffix, subdir='')] + source_suffix = suffix if suffix == '.pyx' else data.get('suffix', '.c') + + sources = [srcpath(data['pyxfile'], suffix=source_suffix, subdir='')] + pxds = [pxd(x) for x in data.get('pxdfiles', [])] if suffix == '.pyx' and pxds: sources.extend(pxds) @@ -642,46 +681,11 @@ def pxd(name): depends=data.get('depends', []), include_dirs=include, language=data.get('language', 'c'), + define_macros=data.get('macros', []), extra_compile_args=extra_compile_args) extensions.append(obj) -# ---------------------------------------------------------------------- -# msgpack - -if sys.byteorder == 'big': - macros = [('__BIG_ENDIAN__', '1')] -else: - macros = [('__LITTLE_ENDIAN__', '1')] - -msgpack_include = ['pandas/_libs/src/msgpack'] + common_include -msgpack_suffix = suffix if suffix == '.pyx' else '.cpp' -unpacker_depends = ['pandas/_libs/src/msgpack/unpack.h', - 'pandas/_libs/src/msgpack/unpack_define.h', - 'pandas/_libs/src/msgpack/unpack_template.h'] - -packer_ext = Extension('pandas.io.msgpack._packer', - depends=['pandas/_libs/src/msgpack/pack.h', - 'pandas/_libs/src/msgpack/pack_template.h'], - sources=[srcpath('_packer', - suffix=msgpack_suffix, - subdir='io/msgpack')], - language='c++', - include_dirs=msgpack_include, - define_macros=macros, - extra_compile_args=extra_compile_args) -unpacker_ext = Extension('pandas.io.msgpack._unpacker', - depends=unpacker_depends, - sources=[srcpath('_unpacker', - suffix=msgpack_suffix, - subdir='io/msgpack')], - language='c++', - include_dirs=msgpack_include, - define_macros=macros, - extra_compile_args=extra_compile_args) -extensions.append(packer_ext) -extensions.append(unpacker_ext) - # ---------------------------------------------------------------------- # ujson From b419650164a0b02155190a4531992ce8b43041de Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 19 Feb 2018 20:21:33 -0500 Subject: [PATCH 141/217] DOC: whatsnew typo cleanup --- doc/source/whatsnew/v0.23.0.txt | 53 ++++++++++++++++----------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 2f820043d7b6f..7bd47c7172671 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -16,10 +16,10 @@ New features .. _whatsnew_0210.enhancements.limit_area: ``DataFrame.interpolate`` has gained the ``limit_area`` kwarg -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :meth:`DataFrame.interpolate` has gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. -Use `limit_area='inside'` to fill only NaNs surrounded by valid values or use `limit_area='outside'` to fill only ``NaN`` s +Use ``limit_area='inside'`` to fill only NaNs surrounded by valid values or use ``limit_area='outside'`` to fill only ``NaN`` s outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. @@ -352,13 +352,13 @@ Dependencies have increased minimum versions We have updated our minimum supported versions of dependencies (:issue:`15184`). If installed, we now require: - +-----------------+-----------------+----------+ - | Package | Minimum Version | Required | - +=================+=================+==========+ - | python-dateutil | 2.5.0 | X | - +-----------------+-----------------+----------+ - | openpyxl | 2.4.0 | | - +-----------------+-----------------+----------+ ++-----------------+-----------------+----------+ +| Package | Minimum Version | Required | ++=================+=================+==========+ +| python-dateutil | 2.5.0 | X | ++-----------------+-----------------+----------+ +| openpyxl | 2.4.0 | | ++-----------------+-----------------+----------+ .. _whatsnew_0230.api_breaking.deprecate_panel: @@ -391,7 +391,7 @@ Convert to an xarray DataArray .. _whatsnew_0230.api_breaking.apply: Changes to make output of ``DataFrame.apply`` consistent -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case @@ -454,7 +454,7 @@ Returning a ``Series`` allows one to control the exact return structure and colu .. ipython:: python - df.apply(lambda x: Series([1, 2, 3], index=['D', 'E', 'F']]), axis=1) + df.apply(lambda x: Series([1, 2, 3], index=['D', 'E', 'F']), axis=1) .. _whatsnew_0230.api_breaking.build_changes: @@ -555,7 +555,7 @@ Other API Changes - ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) - :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). -- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) +- :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). @@ -620,7 +620,7 @@ Removal of prior version deprecations/changes - The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`) - ``Categorical.from_array`` has been removed (:issue:`13854`) - The ``freq`` and ``how`` parameters have been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame - and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:18601 & :issue:18668) + and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:`18601` & :issue:`18668`) - ``DatetimeIndex.to_datetime``, ``Timestamp.to_datetime``, ``PeriodIndex.to_datetime``, and ``Index.to_datetime`` have been removed (:issue:`8254`, :issue:`14096`, :issue:`14113`) - :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`) - :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`) @@ -631,7 +631,7 @@ Removal of prior version deprecations/changes - ``pandas.tseries.frequencies.get_standard_freq`` has been removed in favor of ``pandas.tseries.frequencies.to_offset(freq).rule_code`` (:issue:`13874`) - The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) - The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) -- The ``Panel``class has dropped the ``to_long``and ``toLong`` methods (:issue:`19077`) +- The ``Panel`` class has dropped the ``to_long``and ``toLong`` methods (:issue:`19077`) - The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) - The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attribute:`Categorical.codes` (:issue:`7768`) - The ``flavor`` parameter have been removed from func:`to_sql` method (:issue:`13611`) @@ -672,7 +672,7 @@ Documentation Changes Rewrote some sentences for greater clarity, added more dynamic references to functions, methods and classes. (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) -- + .. _whatsnew_0230.bug_fixes: @@ -704,7 +704,7 @@ Categorical ``self`` but in a different order (:issue:`19551`) - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) -- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19032`) Datetimelike ^^^^^^^^^^^^ @@ -718,7 +718,7 @@ Datetimelike - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) - Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) - Bug in :class:`Series`` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) -- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (issue:`19042`) +- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) - Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) - Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) - Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) @@ -732,7 +732,7 @@ Datetimelike - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) - Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) -- + Timezones ^^^^^^^^^ @@ -791,11 +791,11 @@ MultiIndex - Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) - Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) - Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) -- Bug in :func:`MultiIndex.from_tuples`` which would fail to take zipped tuples in python3 (:issue:`18434`) -- Bug in :func:`MultiIndex.get_loc`` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) -- Bug in :func:`MultiIndex.get_loc`` which would cast boolean to integer labels (:issue:`19086`) -- Bug in :func:`MultiIndex.get_loc`` which would fail to locate keys containing ``NaN`` (:issue:`18485`) -- Bug in :func:`MultiIndex.get_loc`` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) +- Bug in :func:`MultiIndex.from_tuples` which would fail to take zipped tuples in python3 (:issue:`18434`) +- Bug in :func:`MultiIndex.get_loc` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) +- Bug in :func:`MultiIndex.get_loc` which would cast boolean to integer labels (:issue:`19086`) +- Bug in :func:`MultiIndex.get_loc` which would fail to locate keys containing ``NaN`` (:issue:`18485`) +- Bug in :func:`MultiIndex.get_loc` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) I/O @@ -817,10 +817,10 @@ I/O Plotting ^^^^^^^^ -- :func: `DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) +- :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) - Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). - :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). -- + Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -860,10 +860,9 @@ Reshaping - Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) - Bug in :func:`DataFrame.join` which does an *outer* instead of a *left* join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) - :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) +- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) -- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) -- From df4fd456a7c31a2532495cbf3b508e25460f7a42 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 20 Feb 2018 10:20:55 +0100 Subject: [PATCH 142/217] DOC: fix various warnings and errors in the docs (from deprecations/api changes) (#19763) --- doc/source/advanced.rst | 2 +- doc/source/dsintro.rst | 2 +- doc/source/io.rst | 1 + doc/source/whatsnew/v0.10.0.txt | 26 +++++++++++++++-------- doc/source/whatsnew/v0.13.1.txt | 37 +++++++++++++++++++++++++-------- doc/source/whatsnew/v0.15.0.txt | 2 +- doc/source/whatsnew/v0.21.0.txt | 13 +++++------- doc/source/whatsnew/v0.23.0.txt | 1 + doc/source/whatsnew/v0.8.0.txt | 4 ++-- pandas/core/frame.py | 4 ++-- 10 files changed, 59 insertions(+), 33 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index c455fbb8d0687..c81842d3d9212 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -274,7 +274,7 @@ Passing a list of labels or tuples works similar to reindexing: df.loc[[('bar', 'two'), ('qux', 'one')]] -.. info:: +.. note:: It is important to note that tuples and lists are not treated identically in pandas when it comes to indexing. Whereas a tuple is interpreted as one diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 78e2fdb46f659..582750b16f40d 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -506,7 +506,7 @@ to be inserted (for example, a ``Series`` or NumPy array), or a function of one argument to be called on the ``DataFrame``. A *copy* of the original DataFrame is returned, with the new values inserted. -.. versionmodified:: 0.23.0 +.. versionchanged:: 0.23.0 Starting with Python 3.6 the order of ``**kwargs`` is preserved. This allows for *dependent* assignment, where an expression later in ``**kwargs`` can refer diff --git a/doc/source/io.rst b/doc/source/io.rst index 7bb34e4d232dd..6120f7d25a0c3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2262,6 +2262,7 @@ is not round-trippable, nor are any names beginning with 'level_' within a indicate missing values and the subsequent read cannot distinguish the intent. .. ipython:: python + :okwarning: df.index.name = 'index' df.to_json('test.json', orient='table') diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt index a0c4a3e0073f9..222a2da23865c 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.txt @@ -411,15 +411,23 @@ N Dimensional Panels (Experimental) Adding experimental support for Panel4D and factory functions to create n-dimensional named panels. :ref:`Docs ` for NDim. Here is a taste of what to expect. - .. ipython:: python - :okwarning: - - p4d = Panel4D(randn(2, 2, 5, 4), - labels=['Label1','Label2'], - items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - p4d +.. code-block:: ipython + + In [58]: p4d = Panel4D(randn(2, 2, 5, 4), + ....: labels=['Label1','Label2'], + ....: items=['Item1', 'Item2'], + ....: major_axis=date_range('1/1/2000', periods=5), + ....: minor_axis=['A', 'B', 'C', 'D']) + ....: + + In [59]: p4d + Out[59]: + + Dimensions: 2 (labels) x 2 (items) x 5 (major_axis) x 4 (minor_axis) + Labels axis: Label1 to Label2 + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D diff --git a/doc/source/whatsnew/v0.13.1.txt b/doc/source/whatsnew/v0.13.1.txt index 5e5653945fefa..51ca6116d42ce 100644 --- a/doc/source/whatsnew/v0.13.1.txt +++ b/doc/source/whatsnew/v0.13.1.txt @@ -140,14 +140,21 @@ API changes applied would be called with an empty ``Series`` to guess whether a ``Series`` or ``DataFrame`` should be returned: - .. ipython:: python + .. code-block:: ipython + + In [32]: def applied_func(col): + ....: print("Apply function being called with: ", col) + ....: return col.sum() + ....: - def applied_func(col): - print("Apply function being called with: ", col) - return col.sum() + In [33]: empty = DataFrame(columns=['a', 'b']) - empty = DataFrame(columns=['a', 'b']) - empty.apply(applied_func) + In [34]: empty.apply(applied_func) + Apply function being called with: Series([], Length: 0, dtype: float64) + Out[34]: + a NaN + b NaN + Length: 2, dtype: float64 Now, when ``apply`` is called on an empty ``DataFrame``: if the ``reduce`` argument is ``True`` a ``Series`` will returned, if it is ``False`` a @@ -155,10 +162,22 @@ API changes function being applied will be called with an empty series to try and guess the return type. - .. ipython:: python + .. code-block:: ipython + + In [35]: empty.apply(applied_func, reduce=True) + Out[35]: + a NaN + b NaN + Length: 2, dtype: float64 + + In [36]: empty.apply(applied_func, reduce=False) + Out[36]: + Empty DataFrame + Columns: [a, b] + Index: [] + + [0 rows x 2 columns] - empty.apply(applied_func, reduce=True) - empty.apply(applied_func, reduce=False) Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index ef17904d5ab1a..c5ef6c8c9d74a 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -1044,7 +1044,7 @@ Other: idx = MultiIndex.from_product([['a'], range(3), list("pqr")], names=['foo', 'bar', 'baz']) idx.set_names('qux', level=0) - idx.set_names(['qux','baz'], level=[0,1]) + idx.set_names(['qux','corge'], level=[0,1]) idx.set_levels(['a','b','c'], level='bar') idx.set_levels([['a','b','c'],[1,2,3]], level=[1,2]) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3e673bd4cbc28..0c2e494f29bc1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -894,17 +894,14 @@ imported. Matplotlib plot methods (``plt.plot``, ``ax.plot``, ...), will not nicely format the x-axis for ``DatetimeIndex`` or ``PeriodIndex`` values. You must explicitly register these methods: -.. ipython:: python - - from pandas.tseries import converter - converter.register() - - fig, ax = plt.subplots() - plt.plot(pd.date_range('2017', periods=6), range(6)) - Pandas built-in ``Series.plot`` and ``DataFrame.plot`` *will* register these converters on first-use (:issue:17710). +.. note:: + + This change has been temporarily reverted in pandas 0.21.1, + for more details see :ref:`here `. + .. _whatsnew_0210.api: Other API Changes diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 7bd47c7172671..f31d0a5a0667c 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -211,6 +211,7 @@ A ``DataFrame`` can now be written to and subsequently read back via JSON while Please note that the string `index` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. .. ipython:: python + :okwarning: df.index.name = 'index' df.to_json('test.json', orient='table') diff --git a/doc/source/whatsnew/v0.8.0.txt b/doc/source/whatsnew/v0.8.0.txt index b9cece752981e..b2d1d16e86990 100644 --- a/doc/source/whatsnew/v0.8.0.txt +++ b/doc/source/whatsnew/v0.8.0.txt @@ -217,12 +217,12 @@ nanosecond support (the ``nanosecond`` field store the nanosecond value between ``DatetimeIndex`` to regular NumPy arrays. If you have code that requires an array of ``datetime.datetime`` objects, you -have a couple of options. First, the ``asobject`` property of ``DatetimeIndex`` +have a couple of options. First, the ``astype(object)`` method of ``DatetimeIndex`` produces an array of ``Timestamp`` objects: .. ipython:: python - stamp_array = rng.asobject + stamp_array = rng.astype(object) stamp_array stamp_array[5] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b96af6af3707f..0b315a7c6f031 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -114,7 +114,7 @@ - if `axis` is 1 or `'columns'` then `by` may contain column levels and/or index labels - .. versionmodified:: 0.23.0 + .. versionchanged:: 0.23.0 Allow specifying index or column level names.""", versionadded_to_excel='', optional_labels="""labels : array-like, optional @@ -2696,7 +2696,7 @@ def assign(self, **kwargs): or modified columns. All items are computed first, and then assigned in alphabetical order. - .. versionmodified :: 0.23.0 + .. versionchanged :: 0.23.0 Keyword argument order is maintained for Python 3.6 and later. From c1165843176394ab9e4f200627bca251c1c9dab1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 20 Feb 2018 03:16:29 -0800 Subject: [PATCH 143/217] Split+Parametrize Timedelta tests (#19736) --- .../scalar/timedelta/test_construction.py | 222 +++++++++++++++ pandas/tests/scalar/timedelta/test_formats.py | 48 ++++ .../tests/scalar/timedelta/test_timedelta.py | 254 ------------------ 3 files changed, 270 insertions(+), 254 deletions(-) create mode 100644 pandas/tests/scalar/timedelta/test_construction.py create mode 100644 pandas/tests/scalar/timedelta/test_formats.py diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py new file mode 100644 index 0000000000000..5ccad9e6b4e3c --- /dev/null +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +from datetime import timedelta + +import pytest +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import Timedelta + + +def test_construction(): + expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') + assert Timedelta(10, unit='d').value == expected + assert Timedelta(10.0, unit='d').value == expected + assert Timedelta('10 days').value == expected + assert Timedelta(days=10).value == expected + assert Timedelta(days=10.0).value == expected + + expected += np.timedelta64(10, 's').astype('m8[ns]').view('i8') + assert Timedelta('10 days 00:00:10').value == expected + assert Timedelta(days=10, seconds=10).value == expected + assert Timedelta(days=10, milliseconds=10 * 1000).value == expected + assert Timedelta(days=10, + microseconds=10 * 1000 * 1000).value == expected + + # rounding cases + assert Timedelta(82739999850000).value == 82739999850000 + assert ('0 days 22:58:59.999850' in str(Timedelta(82739999850000))) + assert Timedelta(123072001000000).value == 123072001000000 + assert ('1 days 10:11:12.001' in str(Timedelta(123072001000000))) + + # string conversion with/without leading zero + # GH#9570 + assert Timedelta('0:00:00') == timedelta(hours=0) + assert Timedelta('00:00:00') == timedelta(hours=0) + assert Timedelta('-1:00:00') == -timedelta(hours=1) + assert Timedelta('-01:00:00') == -timedelta(hours=1) + + # more strings & abbrevs + # GH#8190 + assert Timedelta('1 h') == timedelta(hours=1) + assert Timedelta('1 hour') == timedelta(hours=1) + assert Timedelta('1 hr') == timedelta(hours=1) + assert Timedelta('1 hours') == timedelta(hours=1) + assert Timedelta('-1 hours') == -timedelta(hours=1) + assert Timedelta('1 m') == timedelta(minutes=1) + assert Timedelta('1.5 m') == timedelta(seconds=90) + assert Timedelta('1 minute') == timedelta(minutes=1) + assert Timedelta('1 minutes') == timedelta(minutes=1) + assert Timedelta('1 s') == timedelta(seconds=1) + assert Timedelta('1 second') == timedelta(seconds=1) + assert Timedelta('1 seconds') == timedelta(seconds=1) + assert Timedelta('1 ms') == timedelta(milliseconds=1) + assert Timedelta('1 milli') == timedelta(milliseconds=1) + assert Timedelta('1 millisecond') == timedelta(milliseconds=1) + assert Timedelta('1 us') == timedelta(microseconds=1) + assert Timedelta('1 micros') == timedelta(microseconds=1) + assert Timedelta('1 microsecond') == timedelta(microseconds=1) + assert Timedelta('1.5 microsecond') == Timedelta('00:00:00.000001500') + assert Timedelta('1 ns') == Timedelta('00:00:00.000000001') + assert Timedelta('1 nano') == Timedelta('00:00:00.000000001') + assert Timedelta('1 nanosecond') == Timedelta('00:00:00.000000001') + + # combos + assert Timedelta('10 days 1 hour') == timedelta(days=10, hours=1) + assert Timedelta('10 days 1 h') == timedelta(days=10, hours=1) + assert Timedelta('10 days 1 h 1m 1s') == timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s') == -timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s') == -timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s 3us') == -timedelta( + days=10, hours=1, minutes=1, seconds=1, microseconds=3) + assert Timedelta('-10 days 1 h 1.5m 1s 3us') == -timedelta( + days=10, hours=1, minutes=1, seconds=31, microseconds=3) + + # Currently invalid as it has a - on the hh:mm:dd part + # (only allowed on the days) + with pytest.raises(ValueError): + Timedelta('-10 days -1 h 1.5m 1s 3us') + + # only leading neg signs are allowed + with pytest.raises(ValueError): + Timedelta('10 days -1 h 1.5m 1s 3us') + + # no units specified + with pytest.raises(ValueError): + Timedelta('3.1415') + + # invalid construction + tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", + lambda: Timedelta()) + tm.assert_raises_regex(ValueError, + "unit abbreviation w/o a number", + lambda: Timedelta('foo')) + tm.assert_raises_regex(ValueError, + "cannot construct a Timedelta from the " + "passed arguments, allowed keywords are ", + lambda: Timedelta(day=10)) + + # floats + expected = np.timedelta64( + 10, 's').astype('m8[ns]').view('i8') + np.timedelta64( + 500, 'ms').astype('m8[ns]').view('i8') + assert Timedelta(10.5, unit='s').value == expected + + # offset + assert pd.to_timedelta(pd.offsets.Hour(2)) == Timedelta(hours=2) + assert Timedelta(pd.offsets.Hour(2)) == Timedelta(hours=2) + assert Timedelta(pd.offsets.Second(2)) == Timedelta(seconds=2) + + # GH#11995: unicode + expected = Timedelta('1H') + result = pd.Timedelta(u'1H') + assert result == expected + assert (pd.to_timedelta(pd.offsets.Hour(2)) == + Timedelta(u'0 days, 02:00:00')) + + with pytest.raises(ValueError): + Timedelta(u'foo bar') + + +@pytest.mark.parametrize('item', list({'days': 'D', + 'seconds': 's', + 'microseconds': 'us', + 'milliseconds': 'ms', + 'minutes': 'm', + 'hours': 'h', + 'weeks': 'W'}.items())) +@pytest.mark.parametrize('npdtype', [np.int64, np.int32, np.int16, + np.float64, np.float32, np.float16]) +def test_td_construction_with_np_dtypes(npdtype, item): + # GH#8757: test construction with np dtypes + pykwarg, npkwarg = item + expected = np.timedelta64(1, npkwarg).astype('m8[ns]').view('i8') + assert Timedelta(**{pykwarg: npdtype(1)}).value == expected + + +@pytest.mark.parametrize('val', [ + '1s', '-1s', '1us', '-1us', '1 day', '-1 day', + '-23:59:59.999999', '-1 days +23:59:59.999999', '-1ns', + '1ns', '-23:59:59.999999999']) +def test_td_from_repr_roundtrip(val): + # round-trip both for string and value + td = Timedelta(val) + assert Timedelta(td.value) == td + + # str does not normally display nanos + if not td.nanoseconds: + assert Timedelta(str(td)) == td + assert Timedelta(td._repr_base(format='all')) == td + + +def test_overflow_on_construction(): + # xref https://github.com/statsmodels/statsmodels/issues/3374 + value = pd.Timedelta('1day').value * 20169940 + with pytest.raises(OverflowError): + pd.Timedelta(value) + + # xref GH#17637 + with pytest.raises(OverflowError): + pd.Timedelta(7 * 19999, unit='D') + + with pytest.raises(OverflowError): + pd.Timedelta(timedelta(days=13 * 19999)) + + +@pytest.mark.parametrize('fmt,exp', [ + ('P6DT0H50M3.010010012S', Timedelta(days=6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, + nanoseconds=12)), + ('P-6DT0H50M3.010010012S', Timedelta(days=-6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, + nanoseconds=12)), + ('P4DT12H30M5S', Timedelta(days=4, hours=12, minutes=30, seconds=5)), + ('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)), + ('P0DT0H0M0.00001S', Timedelta(microseconds=10)), + ('P0DT0H0M0.001S', Timedelta(milliseconds=1)), + ('P0DT0H1M0S', Timedelta(minutes=1)), + ('P1DT25H61M61S', Timedelta(days=1, hours=25, minutes=61, seconds=61)) +]) +def test_iso_constructor(fmt, exp): + assert Timedelta(fmt) == exp + + +@pytest.mark.parametrize('fmt', [ + 'PPPPPPPPPPPP', 'PDTHMS', 'P0DT999H999M999S', + 'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S', + 'P1DT0H0M0.S']) +def test_iso_constructor_raises(fmt): + with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration ' + 'format - {}'.format(fmt)): + Timedelta(fmt) + + +def test_td_constructor_on_nanoseconds(): + # GH#9273 + result = Timedelta(nanoseconds=100) + expected = Timedelta('100ns') + assert result == expected + + result = Timedelta(days=1, hours=1, minutes=1, weeks=1, seconds=1, + milliseconds=1, microseconds=1, nanoseconds=1) + expected = Timedelta(694861001001001) + assert result == expected + + result = Timedelta(microseconds=1) + Timedelta(nanoseconds=1) + expected = Timedelta('1us1ns') + assert result == expected + + result = Timedelta(microseconds=1) - Timedelta(nanoseconds=1) + expected = Timedelta('999ns') + assert result == expected + + result = Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2) + expected = Timedelta('990ns') + assert result == expected + + with pytest.raises(TypeError): + Timedelta(nanoseconds='abc') diff --git a/pandas/tests/scalar/timedelta/test_formats.py b/pandas/tests/scalar/timedelta/test_formats.py new file mode 100644 index 0000000000000..8a877c7d1c0fa --- /dev/null +++ b/pandas/tests/scalar/timedelta/test_formats.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +from pandas import Timedelta + + +def test_repr(): + assert (repr(Timedelta(10, unit='d')) == + "Timedelta('10 days 00:00:00')") + assert (repr(Timedelta(10, unit='s')) == + "Timedelta('0 days 00:00:10')") + assert (repr(Timedelta(10, unit='ms')) == + "Timedelta('0 days 00:00:00.010000')") + assert (repr(Timedelta(-10, unit='ms')) == + "Timedelta('-1 days +23:59:59.990000')") + + +def test_isoformat(): + td = Timedelta(days=6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, nanoseconds=12) + expected = 'P6DT0H50M3.010010012S' + result = td.isoformat() + assert result == expected + + td = Timedelta(days=4, hours=12, minutes=30, seconds=5) + result = td.isoformat() + expected = 'P4DT12H30M5S' + assert result == expected + + td = Timedelta(nanoseconds=123) + result = td.isoformat() + expected = 'P0DT0H0M0.000000123S' + assert result == expected + + # trim nano + td = Timedelta(microseconds=10) + result = td.isoformat() + expected = 'P0DT0H0M0.00001S' + assert result == expected + + # trim micro + td = Timedelta(milliseconds=1) + result = td.isoformat() + expected = 'P0DT0H0M0.001S' + assert result == expected + + # don't strip every 0 + result = Timedelta(minutes=1).isoformat() + expected = 'P0DT0H1M0S' + assert result == expected diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 420b66b4ce0dc..0f7fb84c6520b 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -36,31 +36,6 @@ def test_ops_error_str(self): assert not left == right assert left != right - def test_to_timedelta_on_nanoseconds(self): - # GH 9273 - result = Timedelta(nanoseconds=100) - expected = Timedelta('100ns') - assert result == expected - - result = Timedelta(days=1, hours=1, minutes=1, weeks=1, seconds=1, - milliseconds=1, microseconds=1, nanoseconds=1) - expected = Timedelta(694861001001001) - assert result == expected - - result = Timedelta(microseconds=1) + Timedelta(nanoseconds=1) - expected = Timedelta('1us1ns') - assert result == expected - - result = Timedelta(microseconds=1) - Timedelta(nanoseconds=1) - expected = Timedelta('999ns') - assert result == expected - - result = Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2) - expected = Timedelta('990ns') - assert result == expected - - pytest.raises(TypeError, lambda: Timedelta(nanoseconds='abc')) - def test_ops_notimplemented(self): class Other: pass @@ -124,164 +99,6 @@ def test_compare_timedelta_ndarray(self): class TestTimedeltas(object): - def setup_method(self, method): - pass - - def test_construction(self): - - expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') - assert Timedelta(10, unit='d').value == expected - assert Timedelta(10.0, unit='d').value == expected - assert Timedelta('10 days').value == expected - assert Timedelta(days=10).value == expected - assert Timedelta(days=10.0).value == expected - - expected += np.timedelta64(10, 's').astype('m8[ns]').view('i8') - assert Timedelta('10 days 00:00:10').value == expected - assert Timedelta(days=10, seconds=10).value == expected - assert Timedelta(days=10, milliseconds=10 * 1000).value == expected - assert (Timedelta(days=10, microseconds=10 * 1000 * 1000) - .value == expected) - - # gh-8757: test construction with np dtypes - timedelta_kwargs = {'days': 'D', - 'seconds': 's', - 'microseconds': 'us', - 'milliseconds': 'ms', - 'minutes': 'm', - 'hours': 'h', - 'weeks': 'W'} - npdtypes = [np.int64, np.int32, np.int16, np.float64, np.float32, - np.float16] - for npdtype in npdtypes: - for pykwarg, npkwarg in timedelta_kwargs.items(): - expected = np.timedelta64(1, npkwarg).astype( - 'm8[ns]').view('i8') - assert Timedelta(**{pykwarg: npdtype(1)}).value == expected - - # rounding cases - assert Timedelta(82739999850000).value == 82739999850000 - assert ('0 days 22:58:59.999850' in str(Timedelta(82739999850000))) - assert Timedelta(123072001000000).value == 123072001000000 - assert ('1 days 10:11:12.001' in str(Timedelta(123072001000000))) - - # string conversion with/without leading zero - # GH 9570 - assert Timedelta('0:00:00') == timedelta(hours=0) - assert Timedelta('00:00:00') == timedelta(hours=0) - assert Timedelta('-1:00:00') == -timedelta(hours=1) - assert Timedelta('-01:00:00') == -timedelta(hours=1) - - # more strings & abbrevs - # GH 8190 - assert Timedelta('1 h') == timedelta(hours=1) - assert Timedelta('1 hour') == timedelta(hours=1) - assert Timedelta('1 hr') == timedelta(hours=1) - assert Timedelta('1 hours') == timedelta(hours=1) - assert Timedelta('-1 hours') == -timedelta(hours=1) - assert Timedelta('1 m') == timedelta(minutes=1) - assert Timedelta('1.5 m') == timedelta(seconds=90) - assert Timedelta('1 minute') == timedelta(minutes=1) - assert Timedelta('1 minutes') == timedelta(minutes=1) - assert Timedelta('1 s') == timedelta(seconds=1) - assert Timedelta('1 second') == timedelta(seconds=1) - assert Timedelta('1 seconds') == timedelta(seconds=1) - assert Timedelta('1 ms') == timedelta(milliseconds=1) - assert Timedelta('1 milli') == timedelta(milliseconds=1) - assert Timedelta('1 millisecond') == timedelta(milliseconds=1) - assert Timedelta('1 us') == timedelta(microseconds=1) - assert Timedelta('1 micros') == timedelta(microseconds=1) - assert Timedelta('1 microsecond') == timedelta(microseconds=1) - assert Timedelta('1.5 microsecond') == Timedelta('00:00:00.000001500') - assert Timedelta('1 ns') == Timedelta('00:00:00.000000001') - assert Timedelta('1 nano') == Timedelta('00:00:00.000000001') - assert Timedelta('1 nanosecond') == Timedelta('00:00:00.000000001') - - # combos - assert Timedelta('10 days 1 hour') == timedelta(days=10, hours=1) - assert Timedelta('10 days 1 h') == timedelta(days=10, hours=1) - assert Timedelta('10 days 1 h 1m 1s') == timedelta( - days=10, hours=1, minutes=1, seconds=1) - assert Timedelta('-10 days 1 h 1m 1s') == -timedelta( - days=10, hours=1, minutes=1, seconds=1) - assert Timedelta('-10 days 1 h 1m 1s') == -timedelta( - days=10, hours=1, minutes=1, seconds=1) - assert Timedelta('-10 days 1 h 1m 1s 3us') == -timedelta( - days=10, hours=1, minutes=1, seconds=1, microseconds=3) - assert Timedelta('-10 days 1 h 1.5m 1s 3us'), -timedelta( - days=10, hours=1, minutes=1, seconds=31, microseconds=3) - - # Currently invalid as it has a - on the hh:mm:dd part - # (only allowed on the days) - pytest.raises(ValueError, - lambda: Timedelta('-10 days -1 h 1.5m 1s 3us')) - - # only leading neg signs are allowed - pytest.raises(ValueError, - lambda: Timedelta('10 days -1 h 1.5m 1s 3us')) - - # no units specified - pytest.raises(ValueError, lambda: Timedelta('3.1415')) - - # invalid construction - tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", - lambda: Timedelta()) - tm.assert_raises_regex(ValueError, - "unit abbreviation w/o a number", - lambda: Timedelta('foo')) - tm.assert_raises_regex(ValueError, - "cannot construct a Timedelta from the " - "passed arguments, allowed keywords are ", - lambda: Timedelta(day=10)) - - # round-trip both for string and value - for v in ['1s', '-1s', '1us', '-1us', '1 day', '-1 day', - '-23:59:59.999999', '-1 days +23:59:59.999999', '-1ns', - '1ns', '-23:59:59.999999999']: - - td = Timedelta(v) - assert Timedelta(td.value) == td - - # str does not normally display nanos - if not td.nanoseconds: - assert Timedelta(str(td)) == td - assert Timedelta(td._repr_base(format='all')) == td - - # floats - expected = np.timedelta64( - 10, 's').astype('m8[ns]').view('i8') + np.timedelta64( - 500, 'ms').astype('m8[ns]').view('i8') - assert Timedelta(10.5, unit='s').value == expected - - # offset - assert (to_timedelta(pd.offsets.Hour(2)) == - Timedelta('0 days, 02:00:00')) - assert (Timedelta(pd.offsets.Hour(2)) == - Timedelta('0 days, 02:00:00')) - assert (Timedelta(pd.offsets.Second(2)) == - Timedelta('0 days, 00:00:02')) - - # gh-11995: unicode - expected = Timedelta('1H') - result = pd.Timedelta(u'1H') - assert result == expected - assert (to_timedelta(pd.offsets.Hour(2)) == - Timedelta(u'0 days, 02:00:00')) - - pytest.raises(ValueError, lambda: Timedelta(u'foo bar')) - - def test_overflow_on_construction(self): - # xref https://github.com/statsmodels/statsmodels/issues/3374 - value = pd.Timedelta('1day').value * 20169940 - pytest.raises(OverflowError, pd.Timedelta, value) - - # xref gh-17637 - with pytest.raises(OverflowError): - pd.Timedelta(7 * 19999, unit='D') - - with pytest.raises(OverflowError): - pd.Timedelta(timedelta(days=13 * 19999)) - def test_total_seconds_scalar(self): # see gh-10939 rng = Timedelta('1 days, 10:11:12.100123456') @@ -291,17 +108,6 @@ def test_total_seconds_scalar(self): rng = Timedelta(np.nan) assert np.isnan(rng.total_seconds()) - def test_repr(self): - - assert (repr(Timedelta(10, unit='d')) == - "Timedelta('10 days 00:00:00')") - assert (repr(Timedelta(10, unit='s')) == - "Timedelta('0 days 00:00:10')") - assert (repr(Timedelta(10, unit='ms')) == - "Timedelta('0 days 00:00:00.010000')") - assert (repr(Timedelta(-10, unit='ms')) == - "Timedelta('-1 days +23:59:59.990000')") - def test_conversion(self): for td in [Timedelta(10, unit='d'), @@ -756,63 +562,3 @@ def test_components(self): result = s.dt.components assert not result.iloc[0].isna().all() assert result.iloc[1].isna().all() - - def test_isoformat(self): - td = Timedelta(days=6, minutes=50, seconds=3, - milliseconds=10, microseconds=10, nanoseconds=12) - expected = 'P6DT0H50M3.010010012S' - result = td.isoformat() - assert result == expected - - td = Timedelta(days=4, hours=12, minutes=30, seconds=5) - result = td.isoformat() - expected = 'P4DT12H30M5S' - assert result == expected - - td = Timedelta(nanoseconds=123) - result = td.isoformat() - expected = 'P0DT0H0M0.000000123S' - assert result == expected - - # trim nano - td = Timedelta(microseconds=10) - result = td.isoformat() - expected = 'P0DT0H0M0.00001S' - assert result == expected - - # trim micro - td = Timedelta(milliseconds=1) - result = td.isoformat() - expected = 'P0DT0H0M0.001S' - assert result == expected - - # don't strip every 0 - result = Timedelta(minutes=1).isoformat() - expected = 'P0DT0H1M0S' - assert result == expected - - @pytest.mark.parametrize('fmt,exp', [ - ('P6DT0H50M3.010010012S', Timedelta(days=6, minutes=50, seconds=3, - milliseconds=10, microseconds=10, - nanoseconds=12)), - ('P-6DT0H50M3.010010012S', Timedelta(days=-6, minutes=50, seconds=3, - milliseconds=10, microseconds=10, - nanoseconds=12)), - ('P4DT12H30M5S', Timedelta(days=4, hours=12, minutes=30, seconds=5)), - ('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)), - ('P0DT0H0M0.00001S', Timedelta(microseconds=10)), - ('P0DT0H0M0.001S', Timedelta(milliseconds=1)), - ('P0DT0H1M0S', Timedelta(minutes=1)), - ('P1DT25H61M61S', Timedelta(days=1, hours=25, minutes=61, seconds=61)) - ]) - def test_iso_constructor(self, fmt, exp): - assert Timedelta(fmt) == exp - - @pytest.mark.parametrize('fmt', [ - 'PPPPPPPPPPPP', 'PDTHMS', 'P0DT999H999M999S', - 'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S', - 'P1DT0H0M0.S']) - def test_iso_constructor_raises(self, fmt): - with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration ' - 'format - {}'.format(fmt)): - Timedelta(fmt) From e075e3b57002af4145da410d3cd54334f9987700 Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Tue, 20 Feb 2018 06:20:46 -0500 Subject: [PATCH 144/217] BUG: GH19458 fixes precision issue in TimeDelta.total_seconds() (#19783) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/tests/scalar/timedelta/test_timedelta.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index f31d0a5a0667c..349d7607559c5 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -733,6 +733,7 @@ Datetimelike - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) - Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) +- Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. `Timedelta('30S').total_seconds()==30.000000000000004` (:issue:`19458`) Timezones diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 4483225e1801d..78fdeb988e0f2 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -739,7 +739,7 @@ cdef class _Timedelta(timedelta): """ Total duration of timedelta in seconds (to ns precision) """ - return 1e-9 * self.value + return self.value / 1e9 def view(self, dtype): """ array view compat """ diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 0f7fb84c6520b..4257c610fb960 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -512,6 +512,15 @@ def test_implementation_limits(self): with pytest.raises(OverflowError): Timedelta(max_td.value + 1, 'ns') + def test_total_seconds_precision(self): + # GH 19458 + assert Timedelta('30S').total_seconds() == 30.0 + assert Timedelta('0').total_seconds() == 0.0 + assert Timedelta('-2S').total_seconds() == -2.0 + assert Timedelta('5.324S').total_seconds() == 5.324 + assert (Timedelta('30S').total_seconds() - 30.0) < 1e-20 + assert (30.0 - Timedelta('30S').total_seconds()) < 1e-20 + def test_timedelta_arithmetic(self): data = pd.Series(['nat', '32 days'], dtype='timedelta64[ns]') deltas = [timedelta(days=1), Timedelta(1, unit='D')] From 0ea6a5a10c3cf5dd997b178b09ac9464a4fe6301 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 20 Feb 2018 06:34:52 -0500 Subject: [PATCH 145/217] DOC: whatsnew cleanups --- doc/source/whatsnew/v0.23.0.txt | 155 +++++++++++++++++--------------- 1 file changed, 82 insertions(+), 73 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 349d7607559c5..ed50596843272 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -219,50 +219,6 @@ Please note that the string `index` is not supported with the round trip format, new_df print(new_df.index.name) -.. _whatsnew_0230.enhancements.index_division_by_zero: - -Index Division By Zero Fills Correctly -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) - -Previous Behavior: - -.. code-block:: ipython - - In [6]: index = pd.Int64Index([-1, 0, 1]) - - In [7]: index / 0 - Out[7]: Int64Index([0, 0, 0], dtype='int64') - - # Previous behavior yielded different results depending on the type of zero in the divisor - In [8]: index / 0.0 - Out[8]: Float64Index([-inf, nan, inf], dtype='float64') - - In [9]: index = pd.UInt64Index([0, 1]) - - In [10]: index / np.array([0, 0], dtype=np.uint64) - Out[10]: UInt64Index([0, 0], dtype='uint64') - - In [11]: pd.RangeIndex(1, 5) / 0 - ZeroDivisionError: integer division or modulo by zero - -Current Behavior: - -.. ipython:: python - - index = pd.Int64Index([-1, 0, 1]) - # division by zero gives -infinity where negative, +infinity where positive, and NaN for 0 / 0 - index / 0 - - # The result of division by zero should not depend on whether the zero is int or float - index / 0.0 - - index = pd.UInt64Index([0, 1]) - index / np.array([0, 0], dtype=np.uint64) - - pd.RangeIndex(1, 5) / 0 - .. _whatsnew_0230.enhancements.assign_dependent: ``.assign()`` accepts dependent arguments @@ -467,6 +423,50 @@ Build Changes - Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) - Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) +.. _whatsnew_0230.api_breaking.index_division_by_zero: + +Index Division By Zero Fills Correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) + +Previous Behavior: + +.. code-block:: ipython + + In [6]: index = pd.Int64Index([-1, 0, 1]) + + In [7]: index / 0 + Out[7]: Int64Index([0, 0, 0], dtype='int64') + + # Previous behavior yielded different results depending on the type of zero in the divisor + In [8]: index / 0.0 + Out[8]: Float64Index([-inf, nan, inf], dtype='float64') + + In [9]: index = pd.UInt64Index([0, 1]) + + In [10]: index / np.array([0, 0], dtype=np.uint64) + Out[10]: UInt64Index([0, 0], dtype='uint64') + + In [11]: pd.RangeIndex(1, 5) / 0 + ZeroDivisionError: integer division or modulo by zero + +Current Behavior: + +.. ipython:: python + + index = pd.Int64Index([-1, 0, 1]) + # division by zero gives -infinity where negative, +infinity where positive, and NaN for 0 / 0 + index / 0 + + # The result of division by zero should not depend on whether the zero is int or float + index / 0.0 + + index = pd.UInt64Index([0, 1]) + index / np.array([0, 0], dtype=np.uint64) + + pd.RangeIndex(1, 5) / 0 + .. _whatsnew_0230.api_breaking.extract: Extraction of matching patterns from strings @@ -475,10 +475,8 @@ Extraction of matching patterns from strings By default, extracting matching patterns from strings with :func:`str.extract` used to return a ``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was extracted``). As of Pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless -``expand`` is set to ``False`` (:issue:`11386`). - -Also, ``None`` was an accepted value for the ``expand`` parameter (which was equivalent to -``False``), but now raises a ``ValueError``. +``expand`` is set to ``False``. Finallay, ``None`` was an accepted value for +the ``expand`` parameter (which was equivalent to ``False``), but now raises a ``ValueError``. (:issue:`11386`) Previous Behavior: @@ -539,7 +537,26 @@ Notice in the example above that the converted ``Categorical`` has retained ``or Note that the unintenional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. -.. _whatsnew_0230.api: +.. _whatsnew_0230.api.datetimelike: + +Datetimelike API Changes +^^^^^^^^^^^^^^^^^^^^^^^^ + +- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) +- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) +- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) +- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) +- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) +- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) +- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) +- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) +- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). +- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) +- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) +- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) + +.. _whatsnew_0230.api.other: Other API Changes ^^^^^^^^^^^^^^^^^ @@ -547,45 +564,32 @@ Other API Changes - :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`) - ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`) - A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) -- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) - All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). - Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) -- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) -- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) -- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) -- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) -- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) + - :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). - :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) -- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). -- :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`) - When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) - :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) +- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) - Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) - Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) -- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) - :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) - In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) - Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) - The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) -- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) -- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) -- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) - :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) -- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) - ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) -- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) -- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) -- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) + object frequency is ``None`` (:issue:`19147`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. "" instead of "" (:issue:`19403`) -- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) .. _whatsnew_0230.deprecations: @@ -603,7 +607,8 @@ Deprecations - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) -- :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`) +- ``DataFrame.from_items`` is deprecated. Use :func:`DataFrame.from_dict` instead, or ``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`, :issue:`17312`) + - The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) - The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) @@ -712,17 +717,12 @@ Datetimelike - Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) - Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) - Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) -- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) - Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) - Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`) -- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) - Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) -- Bug in :class:`Series`` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) -- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) -- Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) - Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) -- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) - Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) - Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) @@ -731,6 +731,15 @@ Datetimelike - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) + +Timedelta +^^^^^^^^^ + +- Bug in :class:`Series`` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) +- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) +- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) +- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) - Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) - Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. `Timedelta('30S').total_seconds()==30.000000000000004` (:issue:`19458`) From c0bd94f34c275ae3c73844a72c6be7b2574c17a2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 20 Feb 2018 08:57:40 -0500 Subject: [PATCH 146/217] DOC: typos in whatsnew --- doc/source/api.rst | 10 ++++++++++ doc/source/whatsnew/v0.23.0.txt | 29 ++++++++++++++--------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 103b0fe9ff019..3b38f0caa1766 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1682,6 +1682,16 @@ MultiIndex Components MultiIndex.reorder_levels MultiIndex.remove_unused_levels +MultiIndex Selecting +~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + MultiIndex.get_loc + MultiIndex.get_indexer + MultiIndex.get_level_values + .. _api.datetimeindex: DatetimeIndex diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ed50596843272..879b245af49cd 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -544,7 +544,8 @@ Datetimelike API Changes - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) - Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) -- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) +- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) - ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) - :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) - :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) @@ -552,9 +553,9 @@ Datetimelike API Changes - ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). -- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) +- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) -- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) +- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) .. _whatsnew_0230.api.other: @@ -566,7 +567,6 @@ Other API Changes - A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) - All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). - Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) - - :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). - :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) @@ -587,9 +587,8 @@ Other API Changes - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) - ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) - object frequency is ``None`` (:issue:`19147`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) -- :class:`DateOffset` objects render more simply, e.g. "" instead of "" (:issue:`19403`) +- :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) .. _whatsnew_0230.deprecations: @@ -637,11 +636,11 @@ Removal of prior version deprecations/changes - ``pandas.tseries.frequencies.get_standard_freq`` has been removed in favor of ``pandas.tseries.frequencies.to_offset(freq).rule_code`` (:issue:`13874`) - The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) - The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) -- The ``Panel`` class has dropped the ``to_long``and ``toLong`` methods (:issue:`19077`) +- The ``Panel`` class has dropped the ``to_long`` and ``toLong`` methods (:issue:`19077`) - The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) - The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attribute:`Categorical.codes` (:issue:`7768`) - The ``flavor`` parameter have been removed from func:`to_sql` method (:issue:`13611`) -- The modules `pandas.tools.hashing` and `pandas.util.hashing` have been removed (:issue:`16223`) +- The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) - The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) @@ -652,7 +651,7 @@ Performance Improvements - Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) - Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) -- :class`DateOffset` arithmetic performance is improved (:issue:`18218`) +- :class:`DateOffset` arithmetic performance is improved (:issue:`18218`) - Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) - Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`) - The overridden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) @@ -735,14 +734,14 @@ Datetimelike Timedelta ^^^^^^^^^ -- Bug in :class:`Series`` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) - Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) - Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) - Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) - Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) - Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) -- Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. `Timedelta('30S').total_seconds()==30.000000000000004` (:issue:`19458`) +- Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) Timezones @@ -763,7 +762,7 @@ Timezones Offsets ^^^^^^^ -- Bug in :class:`WeekOfMonth` and class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`,:issue:`18672`,:issue:`18864`) +- Bug in :class:`WeekOfMonth` and :class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`, :issue:`18672`, :issue:`18864`) - Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) - Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) - Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) @@ -776,7 +775,7 @@ Numeric - Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) -- Bug in :class:`DataFrame` flex arithmetic (e.g. `df.add(other, fill_value=foo)`) with a `fill_value` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) +- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) Indexing @@ -863,13 +862,13 @@ Reshaping - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) - Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) -- Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) +- Bug in :func:`DataFrame.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) - Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) - Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) - Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) - Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) - Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) -- Bug in :func:`DataFrame.join` which does an *outer* instead of a *left* join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) +- Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) - :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) - Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) From cca630052c84584d928b7fc26957e5603436b721 Mon Sep 17 00:00:00 2001 From: Olivier Bilodeau Date: Tue, 20 Feb 2018 18:54:14 -0500 Subject: [PATCH 147/217] DOC: added a reference to DataFrame assign in concatenate section of merging (#18665) --- doc/source/merging.rst | 7 +++++++ doc/source/whatsnew/v0.23.0.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/merging.rst b/doc/source/merging.rst index ebade853313ab..4d9746eed0f0b 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -323,6 +323,13 @@ the name of the ``Series``. labels=['df1', 's1'], vertical=False); plt.close('all'); +.. note:: + + Since we're concatenating a ``Series`` to a ``DataFrame``, we could have + achieved the same result with :meth:`DataFrame.assign`. To concatenate an + arbitrary number of pandas objects (``DataFrame`` or ``Series``), use + ``concat``. + If unnamed ``Series`` are passed they will be numbered consecutively. .. ipython:: python diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 879b245af49cd..8d6a3dc72163e 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -677,7 +677,7 @@ Documentation Changes Rewrote some sentences for greater clarity, added more dynamic references to functions, methods and classes. (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) - +- Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) .. _whatsnew_0230.bug_fixes: From 1e3ff82f84fc582808ac94a2dbb02e7d151c3bf0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 20 Feb 2018 16:15:56 -0800 Subject: [PATCH 148/217] Sparse Ops Cleanup (#19782) --- pandas/core/frame.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/ops.py | 45 +++++++++++++------------------ pandas/core/sparse/array.py | 7 +++++ pandas/core/sparse/frame.py | 2 +- pandas/core/sparse/series.py | 5 +--- 6 files changed, 30 insertions(+), 33 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0b315a7c6f031..efd6814ba04c5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3995,7 +3995,7 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): try_cast=try_cast) return self._constructor(new_data) - def _compare_frame(self, other, func, str_rep, try_cast=True): + def _compare_frame(self, other, func, str_rep): # compare_frame assumes self._indexed_same(other) import pandas.core.computation.expressions as expressions diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 4b543262fc485..41e499da8e008 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -928,7 +928,7 @@ def insert(self, loc, item): def delete(self, loc): """ - Make a new DatetimeIndex with passed location(s) deleted. + Make a new TimedeltaIndex with passed location(s) deleted. Parameters ---------- diff --git a/pandas/core/ops.py b/pandas/core/ops.py index da65f1f31ed2a..ad6102eb6ad0f 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -721,9 +721,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None): subtract=new_methods['sub'], divide=new_methods['div'])) # opt out of bool flex methods for now - for k in ('ror_', 'rxor', 'rand_'): - if k in new_methods: - new_methods.pop(k) + assert not any(kname in new_methods for kname in ('ror_', 'rxor', 'rand_')) add_methods(cls, new_methods=new_methods) @@ -1080,19 +1078,19 @@ def na_op(x, y): try: result = lib.scalar_binop(x, y, op) except: - msg = ("cannot compare a dtyped [{dtype}] array " - "with a scalar of type [{type}]" - ).format(dtype=x.dtype, type=type(y).__name__) - raise TypeError(msg) + raise TypeError("cannot compare a dtyped [{dtype}] array " + "with a scalar of type [{typ}]" + .format(dtype=x.dtype, + typ=type(y).__name__)) return result + fill_int = lambda x: x.fillna(0) + fill_bool = lambda x: x.fillna(False).astype(bool) + def wrapper(self, other): is_self_int_dtype = is_integer_dtype(self.dtype) - fill_int = lambda x: x.fillna(0) - fill_bool = lambda x: x.fillna(False).astype(bool) - self, other = _align_method_SERIES(self, other, align_asobject=True) if isinstance(other, ABCDataFrame): @@ -1232,10 +1230,10 @@ def to_series(right): elif right.ndim == 2: if left.shape != right.shape: - msg = ("Unable to coerce to DataFrame, shape " - "must be {req_shape}: given {given_shape}" - ).format(req_shape=left.shape, given_shape=right.shape) - raise ValueError(msg) + raise ValueError("Unable to coerce to DataFrame, shape " + "must be {req_shape}: given {given_shape}" + .format(req_shape=left.shape, + given_shape=right.shape)) right = left._constructor(right, index=left.index, columns=left.columns) @@ -1293,8 +1291,8 @@ def na_op(x, y): result[mask] = op(xrav, y) else: raise TypeError("cannot perform operation {op} between " - "objects of type {x} and {y}".format( - op=name, x=type(x), y=type(y))) + "objects of type {x} and {y}" + .format(op=name, x=type(x), y=type(y))) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) @@ -1355,7 +1353,7 @@ def f(self, other, axis=default_axis, level=None): if not self._indexed_same(other): self, other = self.align(other, 'outer', level=level, copy=False) - return self._compare_frame(other, na_op, str_rep, try_cast=False) + return self._compare_frame(other, na_op, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, @@ -1380,7 +1378,7 @@ def f(self, other): if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') - return self._compare_frame(other, func, str_rep, try_cast=True) + return self._compare_frame(other, func, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, @@ -1532,10 +1530,6 @@ def wrapper(self, other): .format(other=type(other))) wrapper.__name__ = name - if name.startswith("__"): - # strip special method names, e.g. `__add__` needs to be `add` when - # passed to _sparse_series_op - name = name[2:-2] return wrapper @@ -1568,7 +1562,7 @@ def wrapper(self, other): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, dtype=dtype) - return _sparse_array_op(self, other, op, name) + return _sparse_array_op(self, other, op, name, series=False) elif is_scalar(other): with np.errstate(all='ignore'): fill = op(_get_fill(self), np.asarray(other)) @@ -1579,8 +1573,6 @@ def wrapper(self, other): raise TypeError('operation with {other} not supported' .format(other=type(other))) - if name.startswith("__"): - name = name[2:-2] wrapper.__name__ = name return wrapper @@ -1591,4 +1583,5 @@ def wrapper(self, other): sparse_series_special_funcs = dict(arith_method=_arith_method_SPARSE_SERIES, comp_method=_arith_method_SPARSE_SERIES, - bool_method=None) + bool_method=_bool_method_SERIES) +# TODO: I don't think the functions defined by bool_method are tested diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 4f7152666f7bf..92c4fe932f066 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -54,6 +54,9 @@ def _get_fill(arr): def _sparse_array_op(left, right, op, name, series=False): + if name.startswith('__'): + # For lookups in _libs.sparse we need non-dunder op name + name = name[2:-2] if series and is_integer_dtype(left) and is_integer_dtype(right): # series coerces to float64 if result should have NaN/inf @@ -119,6 +122,10 @@ def _sparse_array_op(left, right, op, name, series=False): def _wrap_result(name, data, sparse_index, fill_value, dtype=None): """ wrap op result to have correct dtype """ + if name.startswith('__'): + # e.g. __eq__ --> eq + name = name[2:-2] + if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'): dtype = np.bool diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 19b126216db81..872a17d8dbabe 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -551,7 +551,6 @@ def _combine_frame(self, other, func, fill_value=None, level=None): return self._constructor(index=new_index).__finalize__(self) new_data = {} - new_fill_value = None if fill_value is not None: # TODO: be a bit more intelligent here for col in new_columns: @@ -568,6 +567,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): new_data[col] = func(this[col], other[col]) # if the fill values are the same use them? or use a valid one + new_fill_value = None other_fill_value = getattr(other, 'default_fill_value', np.nan) if self.default_fill_value == other_fill_value: new_fill_value = self.default_fill_value diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 335a4c80adc63..26cf9dbadbbf2 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -811,10 +811,7 @@ def from_coo(cls, A, dense_index=False): return _coo_to_sparse_series(A, dense_index=dense_index) -# overwrite series methods with unaccelerated versions -ops.add_special_arithmetic_methods(SparseSeries, **ops.series_special_funcs) +# overwrite series methods with unaccelerated Sparse-specific versions ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs) -# overwrite basic arithmetic to use SparseSeries version -# force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, **ops.sparse_series_special_funcs) From a60e32539b86f35967ca9f8e8661359b013a9d64 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 21 Feb 2018 02:31:55 -0800 Subject: [PATCH 149/217] BUG: fix Period.asfreq conversion near datetime(1, 1, 1) (#19650) --- doc/source/whatsnew/v0.23.0.txt | 3 +- pandas/_libs/src/period_helper.c | 5 +- pandas/_libs/tslibs/period.pyx | 124 +++++++++++++++--- .../tests/scalar/period/test_period_asfreq.py | 22 ++++ pandas/tests/tslibs/test_period_asfreq.py | 1 + 5 files changed, 133 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 8d6a3dc72163e..35856b64c171a 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -741,8 +741,9 @@ Timedelta - Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) - Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) +- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`) - Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) - +- Timezones ^^^^^^^^^ diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index 7c4de8e42e73b..a812ed2e7e2b3 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -138,7 +138,7 @@ PANDAS_INLINE npy_int64 transform_via_day(npy_int64 ordinal, } static npy_int64 DtoB_weekday(npy_int64 absdate) { - return (((absdate) / 7) * 5) + (absdate) % 7 - BDAY_OFFSET; + return floordiv(absdate, 7) * 5 + mod_compat(absdate, 7) - BDAY_OFFSET; } static npy_int64 DtoB(struct date_info *dinfo, @@ -245,7 +245,8 @@ static npy_int64 asfreq_UpsampleWithinDay(npy_int64 ordinal, static npy_int64 asfreq_BtoDT(npy_int64 ordinal, asfreq_info *af_info) { ordinal += BDAY_OFFSET; ordinal = - (((ordinal - 1) / 5) * 7 + mod_compat(ordinal - 1, 5) + 1 - ORD_OFFSET); + (floordiv(ordinal - 1, 5) * 7 + mod_compat(ordinal - 1, 5) + 1 - + ORD_OFFSET); return upsample_daytime(ordinal, af_info); } diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 32ffe4e6d0453..e1c783ac9fa54 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -154,12 +154,32 @@ cdef inline int get_freq_group(int freq) nogil: return (freq // 1000) * 1000 -@cython.cdivision +# specifically _dont_ use cdvision or else ordinals near -1 are assigned to +# incorrect dates GH#19643 +@cython.cdivision(False) cdef int64_t get_period_ordinal(int year, int month, int day, int hour, int minute, int second, int microseconds, int picoseconds, int freq) nogil: - """generate an ordinal in period space""" + """ + Generate an ordinal in period space + + Parameters + ---------- + year : int + month : int + day : int + hour : int + minute : int + second : int + microseconds : int + picoseconds : int + freq : int + + Returns + ------- + period_ordinal : int64_t + """ cdef: int64_t absdays, unix_date, seconds, delta int64_t weeks @@ -190,7 +210,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day, if month >= fmonth: mdiff += 12 - return (year - 1970) * 4 + (mdiff - 1) / 3 + return (year - 1970) * 4 + (mdiff - 1) // 3 elif freq == FR_MTH: return (year - 1970) * 12 + month - 1 @@ -202,14 +222,14 @@ cdef int64_t get_period_ordinal(int year, int month, int day, seconds = unix_date * 86400 + hour * 3600 + minute * 60 + second if freq == FR_MS: - return seconds * 1000 + microseconds / 1000 + return seconds * 1000 + microseconds // 1000 elif freq == FR_US: return seconds * 1000000 + microseconds elif freq == FR_NS: return (seconds * 1000000000 + - microseconds * 1000 + picoseconds / 1000) + microseconds * 1000 + picoseconds // 1000) else: return seconds @@ -229,7 +249,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day, elif freq == FR_BUS: # calculate the current week assuming sunday as last day of a week # Jan 1 0001 is a Monday, so subtract 1 to get to end-of-week - weeks = (unix_date + ORD_OFFSET - 1) / 7 + weeks = (unix_date + ORD_OFFSET - 1) // 7 # calculate the current weekday (in range 1 .. 7) delta = (unix_date + ORD_OFFSET - 1) % 7 + 1 # return the number of business days in full weeks plus the business @@ -241,12 +261,12 @@ cdef int64_t get_period_ordinal(int year, int month, int day, elif freq_group == FR_WK: day_adj = freq - FR_WK - return (unix_date + ORD_OFFSET - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET + return (unix_date + ORD_OFFSET - (1 + day_adj)) // 7 + 1 - WEEK_OFFSET # raise ValueError -cdef int get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil: +cdef void get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil: cdef: int64_t absdate double abstime @@ -263,7 +283,6 @@ cdef int get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil: absdate += 1 dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime) - return 0 cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil: @@ -272,6 +291,15 @@ cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil: This corresponds to the number of days since Jan., 1st, 1AD. When the instance has a frequency less than daily, the proleptic date is calculated for the last day of the period. + + Parameters + ---------- + period_ordinal : int64_t + freq : int + + Returns + ------- + absdate : int64_t number of days since datetime(1, 1, 1) """ cdef: asfreq_info af_info @@ -285,11 +313,23 @@ cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil: return toDaily(period_ordinal, &af_info) + ORD_OFFSET -cdef int dInfoCalc_SetFromAbsDateTime(date_info *dinfo, - int64_t absdate, double abstime) nogil: +cdef void dInfoCalc_SetFromAbsDateTime(date_info *dinfo, + int64_t absdate, double abstime) nogil: """ Set the instance's value using the given date and time. Assumes GREGORIAN_CALENDAR. + + Parameters + ---------- + dinfo : date_info* + absdate : int64_t + days elapsed since datetime(1, 1, 1) + abstime : double + seconds elapsed since beginning of day described by absdate + + Notes + ----- + Updates dinfo inplace """ # Bounds check # The calling function is responsible for ensuring that @@ -300,13 +340,21 @@ cdef int dInfoCalc_SetFromAbsDateTime(date_info *dinfo, # Calculate the time dInfoCalc_SetFromAbsTime(dinfo, abstime) - return 0 -cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil: +cdef void dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil: """ Sets the date part of the date_info struct Assumes GREGORIAN_CALENDAR + + Parameters + ---------- + dinfo : date_info* + unix_date : int64_t + + Notes + ----- + Updates dinfo inplace """ cdef: pandas_datetimestruct dts @@ -315,13 +363,22 @@ cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil: dinfo.year = dts.year dinfo.month = dts.month dinfo.day = dts.day - return 0 @cython.cdivision -cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: +cdef void dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: """ Sets the time part of the DateTime object. + + Parameters + ---------- + dinfo : date_info* + abstime : double + seconds elapsed since beginning of day described by absdate + + Notes + ----- + Updates dinfo inplace """ cdef: int inttime @@ -336,7 +393,6 @@ cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: dinfo.hour = hour dinfo.minute = minute dinfo.second = second - return 0 @cython.cdivision @@ -370,7 +426,19 @@ cdef int64_t absdate_from_ymd(int year, int month, int day) nogil: Find the absdate (days elapsed since datetime(1, 1, 1) for the given year/month/day. Assumes GREGORIAN_CALENDAR + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + absdate : int + days elapsed since datetime(1, 1, 1) """ + # /* Calculate the absolute date cdef: pandas_datetimestruct dts @@ -385,6 +453,25 @@ cdef int64_t absdate_from_ymd(int year, int month, int day) nogil: cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): + """ + Find the year and quarter of a Period with the given ordinal and frequency + + Parameters + ---------- + ordinal : int64_t + freq : int + quarter : *int + year : *int + + Returns + ------- + qtr_freq : int + describes the implied quarterly frequency associated with `freq` + + Notes + ----- + Sets quarter and year inplace + """ cdef: asfreq_info af_info int qtr_freq @@ -403,8 +490,8 @@ cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): return qtr_freq -cdef int64_t DtoQ_yq(int64_t ordinal, asfreq_info *af_info, - int *year, int *quarter): +cdef void DtoQ_yq(int64_t ordinal, asfreq_info *af_info, + int *year, int *quarter): cdef: date_info dinfo @@ -419,7 +506,6 @@ cdef int64_t DtoQ_yq(int64_t ordinal, asfreq_info *af_info, year[0] = dinfo.year quarter[0] = monthToQuarter(dinfo.month) - return 0 cdef inline int monthToQuarter(int month): diff --git a/pandas/tests/scalar/period/test_period_asfreq.py b/pandas/tests/scalar/period/test_period_asfreq.py index a2819a3478f79..9f8b2562e9e20 100644 --- a/pandas/tests/scalar/period/test_period_asfreq.py +++ b/pandas/tests/scalar/period/test_period_asfreq.py @@ -1,3 +1,7 @@ +import pytest + +from pandas.errors import OutOfBoundsDatetime + import pandas as pd from pandas import Period, offsets from pandas.util import testing as tm @@ -6,6 +10,24 @@ class TestFreqConversion(object): """Test frequency conversion of date objects""" + @pytest.mark.parametrize('freq', ['A', 'Q', 'M', 'W', 'B', 'D']) + def test_asfreq_near_zero(self, freq): + # GH#19643, GH#19650 + per = Period('0001-01-01', freq=freq) + tup1 = (per.year, per.hour, per.day) + + prev = per - 1 + assert (per - 1).ordinal == per.ordinal - 1 + tup2 = (prev.year, prev.month, prev.day) + assert tup2 < tup1 + + @pytest.mark.xfail(reason='GH#19643 period_helper asfreq functions fail ' + 'to check for overflows') + def test_to_timestamp_out_of_bounds(self): + # GH#19643, currently gives Timestamp('1754-08-30 22:43:41.128654848') + per = Period('0001-01-01', freq='B') + with pytest.raises(OutOfBoundsDatetime): + per.to_timestamp() def test_asfreq_corner(self): val = Period(freq='A', year=2007) diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py index 98959adf6fda4..61737083e22ea 100644 --- a/pandas/tests/tslibs/test_period_asfreq.py +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -5,6 +5,7 @@ class TestPeriodFreqConversion(object): + def test_intraday_conversion_factors(self): assert period_asfreq(1, get_freq('D'), get_freq('H'), False) == 24 assert period_asfreq(1, get_freq('D'), get_freq('T'), False) == 1440 From e8e925beec12513e505983d672c657d6a54bcf3e Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Wed, 21 Feb 2018 18:38:41 +0800 Subject: [PATCH 150/217] DOC: correct Series.searchsorted example (#19784) --- pandas/core/base.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 0ca029ffd4c25..ebd69a5f9aac1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1165,21 +1165,16 @@ def factorize(self, sort=False, na_sentinel=-1): >>> x.searchsorted([1, 3], side='right') array([1, 3]) - >>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ]) + >>> x = pd.Categorical(['apple', 'bread', 'bread', + 'cheese', 'milk'], ordered=True) [apple, bread, bread, cheese, milk] Categories (4, object): [apple < bread < cheese < milk] >>> x.searchsorted('bread') array([1]) # Note: an array, not a scalar - >>> x.searchsorted(['bread']) - array([1]) - - >>> x.searchsorted(['bread', 'eggs']) - array([1, 4]) - - >>> x.searchsorted(['bread', 'eggs'], side='right') - array([3, 4]) # eggs before milk + >>> x.searchsorted(['bread'], side='right') + array([3]) """) @Substitution(klass='IndexOpsMixin') From 5f82d601834e2ca6b5b0a7d21adbe2cf03469ad5 Mon Sep 17 00:00:00 2001 From: Antonio Quinonez Date: Wed, 21 Feb 2018 02:56:21 -0800 Subject: [PATCH 151/217] DOC: Edit installation instructions for clarity. (#19798) --- doc/source/install.rst | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/doc/source/install.rst b/doc/source/install.rst index c4e331d64e721..4ff63d59024b2 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -6,7 +6,7 @@ Installation ============ -The easiest way for the majority of users to install pandas is to install it +The easiest way to install pandas is to install it as part of the `Anaconda `__ distribution, a cross platform distribution for data analysis and scientific computing. This is the recommended installation method for most users. @@ -40,7 +40,7 @@ packages that make up the `SciPy `__ stack (Linux, Mac OS X, Windows) Python distribution for data analytics and scientific computing. -After running a simple installer, the user will have access to pandas and the +After running the installer, the user will have access to pandas and the rest of the `SciPy `__ stack without needing to install anything else, and without needing to wait for any software to be compiled. @@ -51,9 +51,9 @@ A full list of the packages available as part of the `Anaconda `__ distribution `can be found here `__. -An additional advantage of installing with Anaconda is that you don't require -admin rights to install it, it will install in the user's home directory, and -this also makes it trivial to delete Anaconda at a later date (just delete +Another advantage to installing Anaconda is that you don't need +admin rights to install it. Anaconda can install in the user's home directory, +which makes it trivial to delete Anaconda if you decide (just delete that folder). .. _install.miniconda: @@ -85,9 +85,9 @@ downloading and running the `Miniconda will do this for you. The installer `can be found here `__ -The next step is to create a new conda environment (these are analogous to a -virtualenv but they also allow you to specify precisely which Python version -to install also). Run the following commands from a terminal window:: +The next step is to create a new conda environment. A conda environment is like a +virtualenv that allows you to specify a specific version of Python and set of libraries. +Run the following commands from a terminal window:: conda create -n name_of_my_env python @@ -118,8 +118,8 @@ distribution:: conda install anaconda -If you require any packages that are available to pip but not conda, simply -install pip, and use pip to install these packages:: +If you need packages that are available to pip but not conda, then +install pip, and then use pip to install those packages:: conda install pip pip install django @@ -134,15 +134,12 @@ pandas can be installed via pip from pip install pandas -This will likely require the installation of a number of dependencies, -including NumPy, will require a compiler to compile required bits of code, -and can take a few minutes to complete. Installing using your Linux distribution's package manager. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The commands in this table will install pandas for Python 3 from your distribution. -To install pandas for Python 2 you may need to use the package ``python-pandas``. +To install pandas for Python 2, you may need to use the ``python-pandas`` package. .. csv-table:: :header: "Distribution", "Status", "Download / Repository Link", "Install method" @@ -169,9 +166,9 @@ See the :ref:`contributing documentation ` for complete instructio Running the test suite ~~~~~~~~~~~~~~~~~~~~~~ -pandas is equipped with an exhaustive set of unit tests covering about 97% of +pandas is equipped with an exhaustive set of unit tests, covering about 97% of the codebase as of this writing. To run it on your machine to verify that -everything is working (and you have all of the dependencies, soft and hard, +everything is working (and that you have all of the dependencies, soft and hard, installed), make sure you have `pytest `__ and run: @@ -214,8 +211,8 @@ Recommended Dependencies .. note:: - You are highly encouraged to install these libraries, as they provide large speedups, especially - if working with large data sets. + You are highly encouraged to install these libraries, as they provide speed improvements, especially + when working with large data sets. .. _install.optional_dependencies: From 7077fe96ee3f5c654106bda114aa795a3006368d Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 21 Feb 2018 06:30:58 -0500 Subject: [PATCH 152/217] BF: Skip test_read_excel_parse_dates if no xlwt which is used in to_excel (#19803) --- pandas/tests/io/test_excel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index ebb8424b78ed4..4c790a0f0f64a 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -968,6 +968,7 @@ def test_read_excel_chunksize(self): def test_read_excel_parse_dates(self): # GH 11544, 12051 _skip_if_no_openpyxl() + _skip_if_no_xlwt() # for df2.to_excel df = DataFrame( {'col': [1, 2, 3], From e7e17124fdcaecbf72094a52c8c5d542273cfb65 Mon Sep 17 00:00:00 2001 From: Eric Chea <5069128+EricChea@users.noreply.github.com> Date: Wed, 21 Feb 2018 06:37:46 -0500 Subject: [PATCH 153/217] DEPR: Add deprecation warning for factorize() order keyword (#19751) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/algorithms.py | 2 ++ pandas/tests/test_algos.py | 9 ++++++++ pandas/tests/util/test_util.py | 14 ++++++++++++ pandas/util/_decorators.py | 38 +++++++++++++++++++++++++++++++-- 5 files changed, 62 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 35856b64c171a..ed3069943bb6a 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -610,6 +610,7 @@ Deprecations - The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) - The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) +- The ``order`` parameter of :func:`factorize` is deprecated and will be removed in a future release (:issue:`19727`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c754c063fce8e..624045a3d64bc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -32,6 +32,7 @@ from pandas.core import common as com from pandas._libs import algos, lib, hashtable as htable from pandas._libs.tslib import iNaT +from pandas.util._decorators import deprecate_kwarg # --------------- # @@ -436,6 +437,7 @@ def isin(comps, values): return f(comps, values) +@deprecate_kwarg(old_arg_name='order', new_arg_name=None) def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index b1e3177547ac6..884b1eb7342c6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -248,6 +248,15 @@ def test_uint64_factorize(self): tm.assert_numpy_array_equal(labels, exp_labels) tm.assert_numpy_array_equal(uniques, exp_uniques) + def test_deprecate_order(self): + # gh 19727 - check warning is raised for deprecated keyword, order. + # Test not valid once order keyword is removed. + data = np.array([2**63, 1, 2**63], dtype=np.uint64) + with tm.assert_produces_warning(expected_warning=FutureWarning): + algos.factorize(data, order=True) + with tm.assert_produces_warning(False): + algos.factorize(data) + class TestUnique(object): diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index 3b0a428218771..2bc017ef226ce 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -34,9 +34,14 @@ def _f2(new=False): def _f3(new=0): return new + @deprecate_kwarg('old', None) + def _f4(old=True, unchanged=True): + return old + self.f1 = _f1 self.f2 = _f2 self.f3 = _f3 + self.f4 = _f4 def test_deprecate_kwarg(self): x = 78 @@ -72,6 +77,15 @@ def test_bad_deprecate_kwarg(self): def f4(new=None): pass + def test_deprecate_keyword(self): + x = 9 + with tm.assert_produces_warning(FutureWarning): + result = self.f4(old=x) + assert result is x + with tm.assert_produces_warning(None): + result = self.f4(unchanged=x) + assert result is True + def test_rands(): r = tm.rands(10) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index eed9cee54efb3..1753bc8b8fc33 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -65,8 +65,9 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): ---------- old_arg_name : str Name of argument in function to deprecate - new_arg_name : str - Name of preferred argument in function + new_arg_name : str or None + Name of preferred argument in function. Use None to raise warning that + ``old_arg_name`` keyword is deprecated. mapping : dict or callable If mapping is present, use it to translate old arguments to new arguments. A callable must do its own value checking; @@ -82,12 +83,15 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): ... >>> f(columns='should work ok') should work ok + >>> f(cols='should raise warning') FutureWarning: cols is deprecated, use columns instead warnings.warn(msg, FutureWarning) should raise warning + >>> f(cols='should error', columns="can\'t pass do both") TypeError: Can only specify 'cols' or 'columns', not both + >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) ... def f(new=False): ... print('yes!' if new else 'no!') @@ -96,6 +100,25 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): FutureWarning: old='yes' is deprecated, use new=True instead warnings.warn(msg, FutureWarning) yes! + + + To raise a warning that a keyword will be removed entirely in the future + + >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None) + ... def f(cols='', another_param=''): + ... print(cols) + ... + >>> f(cols='should raise warning') + FutureWarning: the 'cols' keyword is deprecated and will be removed in a + future version please takes steps to stop use of 'cols' + should raise warning + >>> f(another_param='should not raise warning') + should not raise warning + + >>> f(cols='should raise warning', another_param='') + FutureWarning: the 'cols' keyword is deprecated and will be removed in a + future version please takes steps to stop use of 'cols' + should raise warning """ if mapping is not None and not hasattr(mapping, 'get') and \ @@ -107,6 +130,17 @@ def _deprecate_kwarg(func): @wraps(func) def wrapper(*args, **kwargs): old_arg_value = kwargs.pop(old_arg_name, None) + + if new_arg_name is None and old_arg_value is not None: + msg = ( + "the '{old_name}' keyword is deprecated and will be " + "removed in a future version " + "please takes steps to stop use of '{old_name}'" + ).format(old_name=old_arg_name) + warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + kwargs[old_arg_name] = old_arg_value + return func(*args, **kwargs) + if old_arg_value is not None: if mapping is not None: if hasattr(mapping, 'get'): From 440fc8d4a9fe82028daa6d8524985cd512a3b79b Mon Sep 17 00:00:00 2001 From: Noah Date: Wed, 21 Feb 2018 06:40:21 -0500 Subject: [PATCH 154/217] BUG: drop_duplicates not raising KeyError on missing key (#19730) --- doc/source/whatsnew/v0.23.0.txt | 2 ++ pandas/core/frame.py | 7 +++++++ pandas/tests/frame/test_analytics.py | 13 +++++++++++++ 3 files changed, 22 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ed3069943bb6a..a4b943f995a33 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -795,6 +795,8 @@ Indexing - Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) - Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) - Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) +- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`) + MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efd6814ba04c5..d81d22173bfbd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3655,6 +3655,13 @@ def f(vals): isinstance(subset, tuple) and subset in self.columns): subset = subset, + # Verify all columns in subset exist in the queried dataframe + # Otherwise, raise a KeyError, same as if you try to __getitem__ with a + # key that doesn't exist. + diff = Index(subset).difference(self.columns) + if not diff.empty: + raise KeyError(diff) + vals = (col.values for name, col in self.iteritems() if name in subset) labels, shape = map(list, zip(*map(f, vals))) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b9275fc69e7ff..f2b8387072c8d 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1492,6 +1492,19 @@ def test_drop_duplicates(self): for keep in ['first', 'last', False]: assert df.duplicated(keep=keep).sum() == 0 + @pytest.mark.parametrize('subset', ['a', ['a'], ['a', 'B']]) + def test_duplicated_with_misspelled_column_name(self, subset): + # GH 19730 + df = pd.DataFrame({'A': [0, 0, 1], + 'B': [0, 0, 1], + 'C': [0, 0, 1]}) + + with pytest.raises(KeyError): + df.duplicated(subset) + + with pytest.raises(KeyError): + df.drop_duplicates(subset) + def test_drop_duplicates_with_duplicate_column_names(self): # GH17836 df = DataFrame([ From c767f2238fa3966bef82eda4b989c1a0ce22bfe3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 21 Feb 2018 06:53:20 -0500 Subject: [PATCH 155/217] ASV: excel asv occasional failure (#19811) closes #19779 --- asv_bench/benchmarks/io/excel.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py index a7c6c43d15026..58ab6bb8046c5 100644 --- a/asv_bench/benchmarks/io/excel.py +++ b/asv_bench/benchmarks/io/excel.py @@ -25,13 +25,12 @@ def setup(self, engine): self.writer_read.save() self.bio_read.seek(0) - self.bio_write = BytesIO() - self.bio_write.seek(0) - self.writer_write = ExcelWriter(self.bio_write, engine=engine) - def time_read_excel(self, engine): read_excel(self.bio_read) def time_write_excel(self, engine): - self.df.to_excel(self.writer_write, sheet_name='Sheet1') - self.writer_write.save() + bio_write = BytesIO() + bio_write.seek(0) + writer_write = ExcelWriter(bio_write, engine=engine) + self.df.to_excel(writer_write, sheet_name='Sheet1') + writer_write.save() From db4c8e9c14b8ad725a9ba81caea6c64a58c1a39c Mon Sep 17 00:00:00 2001 From: Eric Chea <5069128+EricChea@users.noreply.github.com> Date: Wed, 21 Feb 2018 14:53:08 -0500 Subject: [PATCH 156/217] DOC: Add example of how to preserve order of columns with usecols. (#19746) * Add example of how to preserve order of columns with usecols. * Encase usecols in double back ticks for consistency. Change column names from numeric to string. * Add line to separate examples. --- doc/source/io.rst | 10 ++++++++-- pandas/io/parsers.py | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 6120f7d25a0c3..0b9a610b50d7d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -135,8 +135,14 @@ usecols : array-like or callable, default ``None`` be positional (i.e. integer indices into the document columns) or strings that correspond to column names provided either by the user in `names` or inferred from the document header row(s). For example, a valid array-like - `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. - Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. + `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. + + Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. To + instantiate a DataFrame from ``data`` with element order preserved use + ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns + in ``['foo', 'bar']`` order or + ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` for + ``['bar', 'foo']`` order. If callable, the callable function will be evaluated against the column names, returning names where the callable function evaluates to True: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 7ea6d321e0fdd..4b1385514a0c4 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -102,7 +102,12 @@ that correspond to column names provided either by the user in `names` or inferred from the document header row(s). For example, a valid array-like `usecols` parameter would be [0, 1, 2] or ['foo', 'bar', 'baz']. Element - order is ignored, so usecols=[1,0] is the same as [0,1]. + order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. + To instantiate a DataFrame from ``data`` with element order preserved use + ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns + in ``['foo', 'bar']`` order or + ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` + for ``['bar', 'foo']`` order. If callable, the callable function will be evaluated against the column names, returning names where the callable function evaluates to True. An From 8875ecba7d91686528b7916e444c9d43c7e0738c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 21 Feb 2018 15:50:04 -0800 Subject: [PATCH 157/217] TST: move more series tests to test_arithmetic (#19794) --- pandas/tests/series/test_arithmetic.py | 474 ++++++++++++++++- pandas/tests/series/test_operators.py | 695 +++++-------------------- pandas/tests/series/test_timezones.py | 9 + 3 files changed, 610 insertions(+), 568 deletions(-) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index f727edf8fb7d8..5b8d9cfab3e0d 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,17 +1,26 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta import operator +from decimal import Decimal import numpy as np import pytest -from pandas import Series, Timestamp, Period +from pandas import Series, Timestamp, Timedelta, Period, NaT from pandas._libs.tslibs.period import IncompatibleFrequency import pandas as pd import pandas.util.testing as tm +@pytest.fixture +def tdser(): + """ + Return a Series with dtype='timedelta64[ns]', including a NaT. + """ + return Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + + # ------------------------------------------------------------------ # Comparisons @@ -262,6 +271,97 @@ def test_cmp_series_period_series_mixed_freq(self): # ------------------------------------------------------------------ # Arithmetic +class TestSeriesDivision(object): + # __div__, __rdiv__, __floordiv__, __rfloordiv__ + # for non-timestamp/timedelta/period dtypes + + def test_divide_decimal(self): + # resolves issue GH#9787 + expected = Series([Decimal(5)]) + + ser = Series([Decimal(10)]) + result = ser / Decimal(2) + + tm.assert_series_equal(result, expected) + + ser = Series([Decimal(10)]) + result = ser // Decimal(2) + + tm.assert_series_equal(result, expected) + + def test_div_equiv_binop(self): + # Test Series.div as well as Series.__div__ + # float/integer issue + # GH#7785 + first = Series([1, 0], name='first') + second = Series([-0.01, -0.02], name='second') + expected = Series([-0.01, -np.inf]) + + result = second.div(first) + tm.assert_series_equal(result, expected, check_names=False) + + result = second / first + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype2', [ + np.int64, np.int32, np.int16, np.int8, + np.float64, np.float32, np.float16, + np.uint64, np.uint32, np.uint16, np.uint8]) + @pytest.mark.parametrize('dtype1', [np.int64, np.float64, np.uint64]) + def test_ser_div_ser(self, dtype1, dtype2): + # no longer do integer div for any ops, but deal with the 0's + first = Series([3, 4, 5, 8], name='first').astype(dtype1) + second = Series([0, 0, 0, 3], name='second').astype(dtype2) + + with np.errstate(all='ignore'): + expected = Series(first.values.astype(np.float64) / second.values, + dtype='float64', name=None) + expected.iloc[0:3] = np.inf + + result = first / second + tm.assert_series_equal(result, expected) + assert not result.equals(second / first) + + def test_rdiv_zero_compat(self): + # GH#8674 + zero_array = np.array([0] * 5) + data = np.random.randn(5) + expected = Series([0.] * 5) + + result = zero_array / Series(data) + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / data + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / Series(data) + tm.assert_series_equal(result, expected) + + def test_div_zero_inf_signs(self): + # GH#9144, inf signing + ser = Series([-1, 0, 1], name='first') + expected = Series([-np.inf, np.nan, np.inf], name='first') + + result = ser / 0 + tm.assert_series_equal(result, expected) + + def test_rdiv_zero(self): + # GH#9144 + ser = Series([-1, 0, 1], name='first') + expected = Series([0.0, np.nan, 0.0], name='first') + + result = 0 / ser + tm.assert_series_equal(result, expected) + + def test_floordiv_div(self): + # GH#9144 + ser = Series([-1, 0, 1], name='first') + + result = ser // 0 + expected = Series([-np.inf, np.nan, np.inf], name='first') + tm.assert_series_equal(result, expected) + + class TestSeriesArithmetic(object): # Standard, numeric, or otherwise not-Timestamp/Timedelta/Period dtypes @pytest.mark.parametrize('data', [ @@ -316,6 +416,20 @@ def test_series_radd_str(self): tm.assert_series_equal('a' + ser, pd.Series(['ax', np.nan, 'ax'])) tm.assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa'])) + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_timedelta(self, dtype): + # note this test is _not_ aimed at timedelta64-dtyped Series + ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days')], dtype=dtype) + expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), + pd.Timedelta('6 days')]) + + result = pd.Timedelta('3 days') + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.Timedelta('3 days') + tm.assert_series_equal(result, expected) + class TestPeriodSeriesArithmetic(object): def test_ops_series_timedelta(self): @@ -377,3 +491,361 @@ def test_dt64ser_sub_datetime_dtype(self): ser = Series([ts]) result = pd.to_timedelta(np.abs(ser - dt)) assert result.dtype == 'timedelta64[ns]' + + +class TestTimedeltaSeriesAdditionSubtraction(object): + # Tests for Series[timedelta64[ns]] __add__, __sub__, __radd__, __rsub__ + + # ------------------------------------------------------------------ + # Operations with int-like others + + def test_td64series_add_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + tdser + Series([2, 3, 4]) + + @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') + def test_td64series_radd_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + Series([2, 3, 4]) + tdser + + def test_td64series_sub_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + tdser - Series([2, 3, 4]) + + @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') + def test_td64series_rsub_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + Series([2, 3, 4]) - tdser + + def test_td64_series_add_intlike(self): + # GH#19123 + tdi = pd.TimedeltaIndex(['59 days', '59 days', 'NaT']) + ser = Series(tdi) + + other = Series([20, 30, 40], dtype='uint8') + + pytest.raises(TypeError, ser.__add__, 1) + pytest.raises(TypeError, ser.__sub__, 1) + + pytest.raises(TypeError, ser.__add__, other) + pytest.raises(TypeError, ser.__sub__, other) + + pytest.raises(TypeError, ser.__add__, other.values) + pytest.raises(TypeError, ser.__sub__, other.values) + + pytest.raises(TypeError, ser.__add__, pd.Index(other)) + pytest.raises(TypeError, ser.__sub__, pd.Index(other)) + + @pytest.mark.parametrize('scalar', [1, 1.5, np.array(2)]) + def test_td64series_add_sub_numeric_scalar_invalid(self, scalar, tdser): + with pytest.raises(TypeError): + tdser + scalar + with pytest.raises(TypeError): + scalar + tdser + with pytest.raises(TypeError): + tdser - scalar + with pytest.raises(TypeError): + scalar - tdser + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [ + np.array([1, 2, 3]), + pd.Index([1, 2, 3]), + pytest.param(Series([1, 2, 3]), + marks=pytest.mark.xfail(reason='GH#19123 integer ' + 'interpreted as nanos')) + ]) + def test_td64series_add_sub_numeric_array_invalid(self, vector, + dtype, tdser): + vector = vector.astype(dtype) + with pytest.raises(TypeError): + tdser + vector + with pytest.raises(TypeError): + vector + tdser + with pytest.raises(TypeError): + tdser - vector + with pytest.raises(TypeError): + vector - tdser + + # ------------------------------------------------------------------ + # Operations with datetime-like others + + def test_td64series_add_sub_timestamp(self): + # GH#11925 + tdser = Series(pd.timedelta_range('1 day', periods=3)) + ts = Timestamp('2012-01-01') + expected = Series(pd.date_range('2012-01-02', periods=3)) + tm.assert_series_equal(ts + tdser, expected) + tm.assert_series_equal(tdser + ts, expected) + + expected2 = Series(pd.date_range('2011-12-31', periods=3, freq='-1D')) + tm.assert_series_equal(ts - tdser, expected2) + tm.assert_series_equal(ts + (-tdser), expected2) + + with pytest.raises(TypeError): + tdser - ts + + # ------------------------------------------------------------------ + # Operations with timedelta-like others (including DateOffsets) + + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_td64_series_with_tdi(self, names): + # GH#17250 make sure result dtype is correct + # GH#19043 make sure names are propagated correctly + tdi = pd.TimedeltaIndex(['0 days', '1 day'], name=names[0]) + ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) + expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], + name=names[2]) + + result = tdi + ser + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + result = ser + tdi + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)], + name=names[2]) + + result = tdi - ser + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + result = ser - tdi + tm.assert_series_equal(result, -expected) + assert result.dtype == 'timedelta64[ns]' + + def test_td64_sub_NaT(self): + # GH#18808 + ser = Series([NaT, Timedelta('1s')]) + res = ser - NaT + expected = Series([NaT, NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + + +class TestTimedeltaSeriesMultiplicationDivision(object): + # Tests for Series[timedelta64[ns]] + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + + # ------------------------------------------------------------------ + # __floordiv__, __rfloordiv__ + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_timedelta_floordiv(self, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + result = td1 // scalar_td + expected = Series([0, 0, np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_timedelta_rfloordiv(self, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + result = scalar_td // td1 + expected = Series([1, 1, np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_timedelta_rfloordiv_explicit(self, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # We can test __rfloordiv__ using this syntax, + # see `test_timedelta_rfloordiv` + result = td1.__rfloordiv__(scalar_td) + expected = Series([1, 1, np.nan]) + tm.assert_series_equal(result, expected) + + # ------------------------------------------------------------------ + # Operations with int-like others + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])]) + def test_td64series_div_numeric_array(self, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + expected = Series(['2.95D', '1D 23H 12m', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser / vector + tm.assert_series_equal(result, expected) + + with pytest.raises(TypeError): + vector / tdser + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])]) + def test_td64series_mul_numeric_array(self, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + + expected = Series(['1180 Days', '1770 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser * vector + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [ + np.array([20, 30, 40]), + pytest.param(pd.Index([20, 30, 40]), + marks=pytest.mark.xfail(reason='__mul__ raises ' + 'instead of returning ' + 'NotImplemented')), + Series([20, 30, 40]) + ]) + def test_td64series_rmul_numeric_array(self, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + + expected = Series(['1180 Days', '1770 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = vector * tdser + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)]) + def test_td64series_mul_numeric_scalar(self, one, tdser): + # GH#4521 + # divide/multiply by integers + expected = Series(['-59 Days', '-59 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser * (-one) + tm.assert_series_equal(result, expected) + result = (-one) * tdser + tm.assert_series_equal(result, expected) + + expected = Series(['118 Days', '118 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser * (2 * one) + tm.assert_series_equal(result, expected) + result = (2 * one) * tdser + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('two', [ + 2, 2.0, + pytest.param(np.array(2), + marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' + 'incorrectly True.')), + pytest.param(np.array(2.0), + marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' + 'incorrectly True.')), + ]) + def test_td64series_div_numeric_scalar(self, two, tdser): + # GH#4521 + # divide/multiply by integers + expected = Series(['29.5D', '29.5D', 'NaT'], dtype='timedelta64[ns]') + + result = tdser / two + tm.assert_series_equal(result, expected) + + # ------------------------------------------------------------------ + # Operations with timedelta-like others + + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_tdi_mul_int_series(self, names): + # GH#19042 + tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], + name=names[0]) + ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) + + expected = Series(['0days', '1day', '4days', '9days', '16days'], + dtype='timedelta64[ns]', + name=names[2]) + + result = ser * tdi + tm.assert_series_equal(result, expected) + + # The direct operation tdi * ser still needs to be fixed. + result = ser.__rmul__(tdi) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_float_series_rdiv_tdi(self, names): + # GH#19042 + # TODO: the direct operation TimedeltaIndex / Series still + # needs to be fixed. + tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], + name=names[0]) + ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) + + expected = Series([tdi[n] / ser[n] for n in range(len(ser))], + dtype='timedelta64[ns]', + name=names[2]) + + result = ser.__rdiv__(tdi) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_td64series_mul_timedeltalike_invalid(self, scalar_td): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = 'operate|unsupported|cannot|not supported' + with tm.assert_raises_regex(TypeError, pattern): + td1 * scalar_td + with tm.assert_raises_regex(TypeError, pattern): + scalar_td * td1 + + +class TestTimedeltaSeriesInvalidArithmeticOps(object): + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_td64series_pow_invalid(self, scalar_td): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = 'operate|unsupported|cannot|not supported' + with tm.assert_raises_regex(TypeError, pattern): + scalar_td ** td1 + with tm.assert_raises_regex(TypeError, pattern): + td1 ** scalar_td diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 554b3e15d8f10..f90fcce973f00 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -9,7 +9,7 @@ import operator from itertools import product, starmap -from numpy import nan, inf +from numpy import nan import numpy as np import pandas as pd @@ -29,11 +29,6 @@ from .common import TestData -@pytest.fixture -def tdser(): - return Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') - - class TestSeriesComparisons(object): def test_series_comparison_scalars(self): series = Series(date_range('1/1/2000', periods=10)) @@ -579,291 +574,7 @@ def test_comp_ops_df_compat(self): left.to_frame() < right.to_frame() -class TestSeriesArithmetic(object): - def test_divide_decimal(self): - """ resolves issue #9787 """ - from decimal import Decimal - - expected = Series([Decimal(5)]) - - s = Series([Decimal(10)]) - s = s / Decimal(2) - - assert_series_equal(expected, s) - - s = Series([Decimal(10)]) - s = s // Decimal(2) - - assert_series_equal(expected, s) - - @pytest.mark.parametrize( - 'dtype2', - [ - np.int64, np.int32, np.int16, np.int8, - np.float64, np.float32, np.float16, - np.uint64, np.uint32, - np.uint16, np.uint8 - ]) - @pytest.mark.parametrize('dtype1', [np.int64, np.float64, np.uint64]) - def test_ser_div_ser(self, dtype1, dtype2): - # no longer do integer div for any ops, but deal with the 0's - first = Series([3, 4, 5, 8], name='first').astype(dtype1) - second = Series([0, 0, 0, 3], name='second').astype(dtype2) - - with np.errstate(all='ignore'): - expected = Series(first.values.astype(np.float64) / second.values, - dtype='float64', name=None) - expected.iloc[0:3] = np.inf - - result = first / second - assert_series_equal(result, expected) - assert not result.equals(second / first) - - def test_div_equiv_binop(self): - # Test Series.div as well as Series.__div__ - # float/integer issue - # GH#7785 - first = pd.Series([1, 0], name='first') - second = pd.Series([-0.01, -0.02], name='second') - expected = Series([-0.01, -np.inf]) - - result = second.div(first) - assert_series_equal(result, expected, check_names=False) - - result = second / first - assert_series_equal(result, expected) - - def test_rdiv_zero_compat(self): - # GH#8674 - zero_array = np.array([0] * 5) - data = np.random.randn(5) - expected = pd.Series([0.] * 5) - - result = zero_array / pd.Series(data) - assert_series_equal(result, expected) - - result = pd.Series(zero_array) / data - assert_series_equal(result, expected) - - result = pd.Series(zero_array) / pd.Series(data) - assert_series_equal(result, expected) - - def test_div_zero_inf_signs(self): - # GH#9144, inf signing - ser = Series([-1, 0, 1], name='first') - expected = Series([-np.inf, np.nan, np.inf], name='first') - - result = ser / 0 - assert_series_equal(result, expected) - - def test_rdiv_zero(self): - # GH#9144 - ser = Series([-1, 0, 1], name='first') - expected = Series([0.0, np.nan, 0.0], name='first') - - result = 0 / ser - assert_series_equal(result, expected) - - def test_floordiv_div(self): - # GH#9144 - ser = Series([-1, 0, 1], name='first') - - result = ser // 0 - expected = Series([-inf, nan, inf], name='first') - assert_series_equal(result, expected) - - -class TestTimedeltaSeriesArithmeticWithIntegers(object): - # Tests for Series with dtype 'timedelta64[ns]' arithmetic operations - # with integer and int-like others - - # ------------------------------------------------------------------ - # Addition and Subtraction - - def test_td64series_add_int_series_invalid(self, tdser): - with pytest.raises(TypeError): - tdser + Series([2, 3, 4]) - - @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') - def test_td64series_radd_int_series_invalid(self, tdser): - with pytest.raises(TypeError): - Series([2, 3, 4]) + tdser - - def test_td64series_sub_int_series_invalid(self, tdser): - with pytest.raises(TypeError): - tdser - Series([2, 3, 4]) - - @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') - def test_td64series_rsub_int_series_invalid(self, tdser): - with pytest.raises(TypeError): - Series([2, 3, 4]) - tdser - - def test_td64_series_add_intlike(self): - # GH#19123 - tdi = pd.TimedeltaIndex(['59 days', '59 days', 'NaT']) - ser = Series(tdi) - - other = Series([20, 30, 40], dtype='uint8') - - pytest.raises(TypeError, ser.__add__, 1) - pytest.raises(TypeError, ser.__sub__, 1) - - pytest.raises(TypeError, ser.__add__, other) - pytest.raises(TypeError, ser.__sub__, other) - - pytest.raises(TypeError, ser.__add__, other.values) - pytest.raises(TypeError, ser.__sub__, other.values) - - pytest.raises(TypeError, ser.__add__, pd.Index(other)) - pytest.raises(TypeError, ser.__sub__, pd.Index(other)) - - @pytest.mark.parametrize('scalar', [1, 1.5, np.array(2)]) - def test_td64series_add_sub_numeric_scalar_invalid(self, scalar, tdser): - with pytest.raises(TypeError): - tdser + scalar - with pytest.raises(TypeError): - scalar + tdser - with pytest.raises(TypeError): - tdser - scalar - with pytest.raises(TypeError): - scalar - tdser - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', - 'uint64', 'uint32', 'uint16', 'uint8', - 'float64', 'float32', 'float16']) - @pytest.mark.parametrize('vector', [ - np.array([1, 2, 3]), - pd.Index([1, 2, 3]), - pytest.param(Series([1, 2, 3]), - marks=pytest.mark.xfail(reason='GH#19123 integer ' - 'interpreted as nanos')) - ]) - def test_td64series_add_sub_numeric_array_invalid(self, vector, - dtype, tdser): - vector = vector.astype(dtype) - with pytest.raises(TypeError): - tdser + vector - with pytest.raises(TypeError): - vector + tdser - with pytest.raises(TypeError): - tdser - vector - with pytest.raises(TypeError): - vector - tdser - - # ------------------------------------------------------------------ - # Multiplicaton and Division - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', - 'uint64', 'uint32', 'uint16', 'uint8', - 'float64', 'float32', 'float16']) - @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), - pd.Index([20, 30, 40]), - Series([20, 30, 40])]) - def test_td64series_div_numeric_array(self, vector, dtype, tdser): - # GH 4521 - # divide/multiply by integers - vector = vector.astype(dtype) - expected = Series(['2.95D', '1D 23H 12m', 'NaT'], - dtype='timedelta64[ns]') - - result = tdser / vector - assert_series_equal(result, expected) - - with pytest.raises(TypeError): - vector / tdser - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', - 'uint64', 'uint32', 'uint16', 'uint8', - 'float64', 'float32', 'float16']) - @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), - pd.Index([20, 30, 40]), - Series([20, 30, 40])]) - def test_td64series_mul_numeric_array(self, vector, dtype, tdser): - # GH 4521 - # divide/multiply by integers - vector = vector.astype(dtype) - - expected = Series(['1180 Days', '1770 Days', 'NaT'], - dtype='timedelta64[ns]') - - result = tdser * vector - assert_series_equal(result, expected) - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', - 'uint64', 'uint32', 'uint16', 'uint8', - 'float64', 'float32', 'float16']) - @pytest.mark.parametrize('vector', [ - np.array([20, 30, 40]), - pytest.param(pd.Index([20, 30, 40]), - marks=pytest.mark.xfail(reason='__mul__ raises ' - 'instead of returning ' - 'NotImplemented')), - Series([20, 30, 40]) - ]) - def test_td64series_rmul_numeric_array(self, vector, dtype, tdser): - # GH 4521 - # divide/multiply by integers - vector = vector.astype(dtype) - - expected = Series(['1180 Days', '1770 Days', 'NaT'], - dtype='timedelta64[ns]') - - result = vector * tdser - assert_series_equal(result, expected) - - @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)]) - def test_td64series_mul_numeric_scalar(self, one, tdser): - # GH 4521 - # divide/multiply by integers - expected = Series(['-59 Days', '-59 Days', 'NaT'], - dtype='timedelta64[ns]') - - result = tdser * (-one) - assert_series_equal(result, expected) - result = (-one) * tdser - assert_series_equal(result, expected) - - expected = Series(['118 Days', '118 Days', 'NaT'], - dtype='timedelta64[ns]') - - result = tdser * (2 * one) - assert_series_equal(result, expected) - result = (2 * one) * tdser - assert_series_equal(result, expected) - - @pytest.mark.parametrize('two', [ - 2, 2.0, - pytest.param(np.array(2), - marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' - 'incorrectly True.')), - pytest.param(np.array(2.0), - marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' - 'incorrectly True.')), - ]) - def test_td64series_div_numeric_scalar(self, two, tdser): - # GH 4521 - # divide/multiply by integers - expected = Series(['29.5D', '29.5D', 'NaT'], dtype='timedelta64[ns]') - - result = tdser / two - assert_series_equal(result, expected) - - class TestTimedeltaSeriesArithmetic(object): - def test_td64series_add_sub_timestamp(self): - # GH11925 - tdser = Series(timedelta_range('1 day', periods=3)) - ts = Timestamp('2012-01-01') - expected = Series(date_range('2012-01-02', periods=3)) - assert_series_equal(ts + tdser, expected) - assert_series_equal(tdser + ts, expected) - - expected2 = Series(date_range('2011-12-31', periods=3, freq='-1D')) - assert_series_equal(ts - tdser, expected2) - assert_series_equal(ts + (-tdser), expected2) - - with pytest.raises(TypeError): - tdser - ts def test_timedelta64_operations_with_DateOffset(self): # GH 10699 @@ -1081,13 +792,6 @@ def test_timedelta64_ops_nat(self): assert_series_equal(timedelta_series / nan, nat_series_dtype_timedelta) - def test_td64_sub_NaT(self): - # GH#18808 - ser = Series([NaT, Timedelta('1s')]) - res = ser - NaT - expected = Series([NaT, NaT], dtype='timedelta64[ns]') - tm.assert_series_equal(res, expected) - @pytest.mark.parametrize('scalar_td', [timedelta(minutes=5, seconds=4), Timedelta(minutes=5, seconds=4), Timedelta('5m4s').to_timedelta64()]) @@ -1103,135 +807,6 @@ def test_operators_timedelta64_with_timedelta(self, scalar_td): td1 / scalar_td scalar_td / td1 - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) - def test_operators_timedelta64_with_timedelta_invalid(self, scalar_td): - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - # check that we are getting a TypeError - # with 'operate' (from core/ops.py) for the ops that are not - # defined - pattern = 'operate|unsupported|cannot|not supported' - with tm.assert_raises_regex(TypeError, pattern): - td1 * scalar_td - with tm.assert_raises_regex(TypeError, pattern): - scalar_td * td1 - with tm.assert_raises_regex(TypeError, pattern): - scalar_td ** td1 - with tm.assert_raises_regex(TypeError, pattern): - td1 ** scalar_td - - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) - def test_timedelta_rfloordiv(self, scalar_td): - # GH#18831 - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - result = scalar_td // td1 - expected = Series([1, 1, np.nan]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) - def test_timedelta_rfloordiv_explicit(self, scalar_td): - # GH#18831 - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - # We can test __rfloordiv__ using this syntax, - # see `test_timedelta_rfloordiv` - result = td1.__rfloordiv__(scalar_td) - expected = Series([1, 1, np.nan]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) - def test_timedelta_floordiv(self, scalar_td): - # GH#18831 - td1 = Series([timedelta(minutes=5, seconds=3)] * 3) - td1.iloc[2] = np.nan - - result = td1 // scalar_td - expected = Series([0, 0, np.nan]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('names', [(None, None, None), - ('Egon', 'Venkman', None), - ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_td64_series_with_tdi(self, names): - # GH#17250 make sure result dtype is correct - # GH#19043 make sure names are propagated correctly - tdi = pd.TimedeltaIndex(['0 days', '1 day'], name=names[0]) - ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) - expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], - name=names[2]) - - result = tdi + ser - tm.assert_series_equal(result, expected) - assert result.dtype == 'timedelta64[ns]' - - result = ser + tdi - tm.assert_series_equal(result, expected) - assert result.dtype == 'timedelta64[ns]' - - expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)], - name=names[2]) - - result = tdi - ser - tm.assert_series_equal(result, expected) - assert result.dtype == 'timedelta64[ns]' - - result = ser - tdi - tm.assert_series_equal(result, -expected) - assert result.dtype == 'timedelta64[ns]' - - @pytest.mark.parametrize('names', [(None, None, None), - ('Egon', 'Venkman', None), - ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_tdi_mul_int_series(self, names): - # GH#19042 - tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], - name=names[0]) - ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) - - expected = Series(['0days', '1day', '4days', '9days', '16days'], - dtype='timedelta64[ns]', - name=names[2]) - - result = ser * tdi - tm.assert_series_equal(result, expected) - - # The direct operation tdi * ser still needs to be fixed. - result = ser.__rmul__(tdi) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('names', [(None, None, None), - ('Egon', 'Venkman', None), - ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_float_series_rdiv_tdi(self, names): - # GH#19042 - # TODO: the direct operation TimedeltaIndex / Series still - # needs to be fixed. - tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], - name=names[0]) - ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) - - expected = Series([tdi[n] / ser[n] for n in range(len(ser))], - dtype='timedelta64[ns]', - name=names[2]) - - result = ser.__rdiv__(tdi) - tm.assert_series_equal(result, expected) - class TestDatetimeSeriesArithmetic(object): @pytest.mark.parametrize( @@ -1994,138 +1569,6 @@ def test_operators_reverse_object(self, op): expected = op(1., arr.astype(float)) assert_series_equal(result.astype(float), expected) - def test_arith_ops_df_compat(self): - # GH 1134 - s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x') - s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x') - - exp = pd.Series([3.0, 4.0, np.nan, np.nan], - index=list('ABCD'), name='x') - assert_series_equal(s1 + s2, exp) - assert_series_equal(s2 + s1, exp) - - exp = pd.DataFrame({'x': [3.0, 4.0, np.nan, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s1.to_frame() + s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() + s1.to_frame(), exp) - - # different length - s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') - s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') - - exp = pd.Series([3, 4, 5, np.nan], - index=list('ABCD'), name='x') - assert_series_equal(s3 + s4, exp) - assert_series_equal(s4 + s3, exp) - - exp = pd.DataFrame({'x': [3, 4, 5, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s3.to_frame() + s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() + s3.to_frame(), exp) - - def test_bool_ops_df_compat(self): - # GH 1134 - s1 = pd.Series([True, False, True], index=list('ABC'), name='x') - s2 = pd.Series([True, True, False], index=list('ABD'), name='x') - - exp = pd.Series([True, False, False, False], - index=list('ABCD'), name='x') - assert_series_equal(s1 & s2, exp) - assert_series_equal(s2 & s1, exp) - - # True | np.nan => True - exp = pd.Series([True, True, True, False], - index=list('ABCD'), name='x') - assert_series_equal(s1 | s2, exp) - # np.nan | True => np.nan, filled with False - exp = pd.Series([True, True, False, False], - index=list('ABCD'), name='x') - assert_series_equal(s2 | s1, exp) - - # DataFrame doesn't fill nan with False - exp = pd.DataFrame({'x': [True, False, np.nan, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s1.to_frame() & s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() & s1.to_frame(), exp) - - exp = pd.DataFrame({'x': [True, True, np.nan, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s1.to_frame() | s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() | s1.to_frame(), exp) - - # different length - s3 = pd.Series([True, False, True], index=list('ABC'), name='x') - s4 = pd.Series([True, True, True, True], index=list('ABCD'), name='x') - - exp = pd.Series([True, False, True, False], - index=list('ABCD'), name='x') - assert_series_equal(s3 & s4, exp) - assert_series_equal(s4 & s3, exp) - - # np.nan | True => np.nan, filled with False - exp = pd.Series([True, True, True, False], - index=list('ABCD'), name='x') - assert_series_equal(s3 | s4, exp) - # True | np.nan => True - exp = pd.Series([True, True, True, True], - index=list('ABCD'), name='x') - assert_series_equal(s4 | s3, exp) - - exp = pd.DataFrame({'x': [True, False, True, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s3.to_frame() & s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() & s3.to_frame(), exp) - - exp = pd.DataFrame({'x': [True, True, True, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s3.to_frame() | s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() | s3.to_frame(), exp) - - def test_series_frame_radd_bug(self): - # GH 353 - vals = Series(tm.rands_array(5, 10)) - result = 'foo_' + vals - expected = vals.map(lambda x: 'foo_' + x) - assert_series_equal(result, expected) - - frame = DataFrame({'vals': vals}) - result = 'foo_' + frame - expected = DataFrame({'vals': vals.map(lambda x: 'foo_' + x)}) - assert_frame_equal(result, expected) - - # really raise this time - with pytest.raises(TypeError): - datetime.now() + self.ts - - with pytest.raises(TypeError): - self.ts + datetime.now() - - @pytest.mark.parametrize('dtype', [None, object]) - def test_series_with_dtype_radd_timedelta(self, dtype): - ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), - pd.Timedelta('3 days')], dtype=dtype) - expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), - pd.Timedelta('6 days')]) - - result = pd.Timedelta('3 days') + ser - assert_series_equal(result, expected) - - result = ser + pd.Timedelta('3 days') - assert_series_equal(result, expected) - - def test_operators_frame(self): - # rpow does not work with DataFrame - df = DataFrame({'A': self.ts}) - - assert_series_equal(self.ts + self.ts, self.ts + df['A'], - check_names=False) - assert_series_equal(self.ts ** self.ts, self.ts ** df['A'], - check_names=False) - assert_series_equal(self.ts < self.ts, self.ts < df['A'], - check_names=False) - assert_series_equal(self.ts / self.ts, self.ts / df['A'], - check_names=False) - def test_operators_combine(self): def _check_fill(meth, op, a, b, fill_value=0): exp_index = a.index.union(b.index) @@ -2231,15 +1674,6 @@ def test_datetime64_with_index(self): df['result'] = df['date'] - df.index assert_series_equal(df['result'], df['expected'], check_names=False) - def test_dti_tz_convert_to_utc(self): - base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], - tz='UTC') - idx1 = base.tz_convert('Asia/Tokyo')[:2] - idx2 = base.tz_convert('US/Eastern')[1:] - - res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) - assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) - def test_op_duplicate_index(self): # GH14227 s1 = Series([1, 2], index=[1, 1]) @@ -2294,3 +1728,130 @@ def test_idxminmax_with_inf(self): assert np.isnan(s.idxmin(skipna=False)) assert s.idxmax() == 0 np.isnan(s.idxmax(skipna=False)) + + +class TestSeriesOperationsDataFrameCompat(object): + def test_operators_frame(self): + # rpow does not work with DataFrame + ts = tm.makeTimeSeries() + ts.name = 'ts' + + df = DataFrame({'A': ts}) + + assert_series_equal(ts + ts, ts + df['A'], + check_names=False) + assert_series_equal(ts ** ts, ts ** df['A'], + check_names=False) + assert_series_equal(ts < ts, ts < df['A'], + check_names=False) + assert_series_equal(ts / ts, ts / df['A'], + check_names=False) + + def test_series_frame_radd_bug(self): + # GH#353 + vals = Series(tm.rands_array(5, 10)) + result = 'foo_' + vals + expected = vals.map(lambda x: 'foo_' + x) + assert_series_equal(result, expected) + + frame = DataFrame({'vals': vals}) + result = 'foo_' + frame + expected = DataFrame({'vals': vals.map(lambda x: 'foo_' + x)}) + assert_frame_equal(result, expected) + + ts = tm.makeTimeSeries() + ts.name = 'ts' + + # really raise this time + with pytest.raises(TypeError): + datetime.now() + ts + + with pytest.raises(TypeError): + ts + datetime.now() + + def test_bool_ops_df_compat(self): + # GH 1134 + s1 = pd.Series([True, False, True], index=list('ABC'), name='x') + s2 = pd.Series([True, True, False], index=list('ABD'), name='x') + + exp = pd.Series([True, False, False, False], + index=list('ABCD'), name='x') + assert_series_equal(s1 & s2, exp) + assert_series_equal(s2 & s1, exp) + + # True | np.nan => True + exp = pd.Series([True, True, True, False], + index=list('ABCD'), name='x') + assert_series_equal(s1 | s2, exp) + # np.nan | True => np.nan, filled with False + exp = pd.Series([True, True, False, False], + index=list('ABCD'), name='x') + assert_series_equal(s2 | s1, exp) + + # DataFrame doesn't fill nan with False + exp = pd.DataFrame({'x': [True, False, np.nan, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s1.to_frame() & s2.to_frame(), exp) + assert_frame_equal(s2.to_frame() & s1.to_frame(), exp) + + exp = pd.DataFrame({'x': [True, True, np.nan, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s1.to_frame() | s2.to_frame(), exp) + assert_frame_equal(s2.to_frame() | s1.to_frame(), exp) + + # different length + s3 = pd.Series([True, False, True], index=list('ABC'), name='x') + s4 = pd.Series([True, True, True, True], index=list('ABCD'), name='x') + + exp = pd.Series([True, False, True, False], + index=list('ABCD'), name='x') + assert_series_equal(s3 & s4, exp) + assert_series_equal(s4 & s3, exp) + + # np.nan | True => np.nan, filled with False + exp = pd.Series([True, True, True, False], + index=list('ABCD'), name='x') + assert_series_equal(s3 | s4, exp) + # True | np.nan => True + exp = pd.Series([True, True, True, True], + index=list('ABCD'), name='x') + assert_series_equal(s4 | s3, exp) + + exp = pd.DataFrame({'x': [True, False, True, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s3.to_frame() & s4.to_frame(), exp) + assert_frame_equal(s4.to_frame() & s3.to_frame(), exp) + + exp = pd.DataFrame({'x': [True, True, True, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s3.to_frame() | s4.to_frame(), exp) + assert_frame_equal(s4.to_frame() | s3.to_frame(), exp) + + def test_arith_ops_df_compat(self): + # GH#1134 + s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x') + s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x') + + exp = pd.Series([3.0, 4.0, np.nan, np.nan], + index=list('ABCD'), name='x') + assert_series_equal(s1 + s2, exp) + assert_series_equal(s2 + s1, exp) + + exp = pd.DataFrame({'x': [3.0, 4.0, np.nan, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s1.to_frame() + s2.to_frame(), exp) + assert_frame_equal(s2.to_frame() + s1.to_frame(), exp) + + # different length + s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') + s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') + + exp = pd.Series([3, 4, 5, np.nan], + index=list('ABCD'), name='x') + assert_series_equal(s3 + s4, exp) + assert_series_equal(s4 + s3, exp) + + exp = pd.DataFrame({'x': [3, 4, 5, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s3.to_frame() + s4.to_frame(), exp) + assert_frame_equal(s4.to_frame() + s3.to_frame(), exp) diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 2e15c964e4e93..b54645d04bd1a 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -88,6 +88,15 @@ def test_series_tz_convert(self): tm.assert_raises_regex(TypeError, "Cannot convert tz-naive", ts.tz_convert, 'US/Eastern') + def test_series_tz_convert_to_utc(self): + base = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + tz='UTC') + idx1 = base.tz_convert('Asia/Tokyo')[:2] + idx2 = base.tz_convert('US/Eastern')[1:] + + res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) + tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) + # ----------------------------------------------------------------- # Series.append From e9754553c1e10b11f7cd51bca14d62c0eec8d490 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 21 Feb 2018 15:55:21 -0800 Subject: [PATCH 158/217] Fix name setting in DTI/TDI __add__ and __sub__ (#19744) --- doc/source/whatsnew/v0.23.0.txt | 2 + pandas/core/common.py | 15 ---- pandas/core/indexes/datetimelike.py | 51 +++++++----- pandas/core/indexes/datetimes.py | 36 +++++---- pandas/core/indexes/period.py | 4 +- pandas/core/indexes/timedeltas.py | 34 +++++--- pandas/core/ops.py | 78 +++++++++++++++---- pandas/core/series.py | 6 +- .../indexes/datetimes/test_arithmetic.py | 44 +++++++++-- .../indexes/timedeltas/test_arithmetic.py | 75 +++++++++++------- pandas/tests/test_common.py | 13 ++-- 11 files changed, 240 insertions(+), 118 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a4b943f995a33..c9951e0ec4378 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -731,6 +731,8 @@ Datetimelike - Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) - Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) +- Timedelta ^^^^^^^^^ diff --git a/pandas/core/common.py b/pandas/core/common.py index 6748db825acf0..77dc1522052d4 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -121,21 +121,6 @@ def _consensus_name_attr(objs): return name -def _maybe_match_name(a, b): - a_has = hasattr(a, 'name') - b_has = hasattr(b, 'name') - if a_has and b_has: - if a.name == b.name: - return a.name - else: - return None - elif a_has: - return a.name - elif b_has: - return b.name - return None - - def _get_info_slice(obj, indexer): """Slice the info axis of `obj` with `indexer`.""" if not hasattr(obj, '_info_axis_number'): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c98f8ceea0ffa..187f9fcf52dd4 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -29,7 +29,7 @@ from pandas.core.dtypes.generic import ( ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) from pandas.core.dtypes.missing import isna -from pandas.core import common as com, algorithms +from pandas.core import common as com, algorithms, ops from pandas.core.algorithms import checked_add_with_arr from pandas.errors import NullFrequencyError import pandas.io.formats.printing as printing @@ -661,29 +661,37 @@ def __add__(self, other): if isinstance(other, ABCSeries): return NotImplemented elif is_timedelta64_dtype(other): - return self._add_delta(other) + result = self._add_delta(other) elif isinstance(other, (DateOffset, timedelta)): - return self._add_delta(other) + result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects - return self._add_offset_array(other) + result = self._add_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if hasattr(other, '_add_delta'): - return other._add_delta(self) - raise TypeError("cannot add TimedeltaIndex and {typ}" - .format(typ=type(other))) + result = other._add_delta(self) + else: + raise TypeError("cannot add TimedeltaIndex and {typ}" + .format(typ=type(other))) elif is_integer(other): - return self.shift(other) + # This check must come after the check for timedelta64_dtype + # or else it will incorrectly catch np.timedelta64 objects + result = self.shift(other) elif isinstance(other, (datetime, np.datetime64)): - return self._add_datelike(other) + result = self._add_datelike(other) elif isinstance(other, Index): - return self._add_datelike(other) + result = self._add_datelike(other) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented + if result is not NotImplemented: + res_name = ops.get_op_result_name(self, other) + result.name = res_name + return result + cls.__add__ = __add__ cls.__radd__ = __add__ @@ -697,25 +705,27 @@ def __sub__(self, other): if isinstance(other, ABCSeries): return NotImplemented elif is_timedelta64_dtype(other): - return self._add_delta(-other) + result = self._add_delta(-other) elif isinstance(other, (DateOffset, timedelta)): - return self._add_delta(-other) + result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects - return self._sub_offset_array(other) + result = self._sub_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if not isinstance(other, TimedeltaIndex): raise TypeError("cannot subtract TimedeltaIndex and {typ}" .format(typ=type(other).__name__)) - return self._add_delta(-other) + result = self._add_delta(-other) elif isinstance(other, DatetimeIndex): - return self._sub_datelike(other) + result = self._sub_datelike(other) elif is_integer(other): - return self.shift(-other) + # This check must come after the check for timedelta64_dtype + # or else it will incorrectly catch np.timedelta64 objects + result = self.shift(-other) elif isinstance(other, (datetime, np.datetime64)): - return self._sub_datelike(other) + result = self._sub_datelike(other) elif isinstance(other, Period): - return self._sub_period(other) + result = self._sub_period(other) elif isinstance(other, Index): raise TypeError("cannot subtract {typ1} and {typ2}" .format(typ1=type(self).__name__, @@ -726,6 +736,11 @@ def __sub__(self, other): else: # pragma: no cover return NotImplemented + if result is not NotImplemented: + res_name = ops.get_op_result_name(self, other) + result.name = res_name + return result + cls.__sub__ = __sub__ def __rsub__(self, other): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cc9ce1f3fd5eb..debeabf9bae23 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -886,7 +886,7 @@ def _sub_datelike(self, other): else: raise TypeError("cannot subtract DatetimeIndex and {typ}" .format(typ=type(other).__name__)) - return TimedeltaIndex(result, name=self.name, copy=False) + return TimedeltaIndex(result) def _sub_datelike_dti(self, other): """subtraction of two DatetimeIndexes""" @@ -910,20 +910,31 @@ def _maybe_update_attributes(self, attrs): return attrs def _add_delta(self, delta): - if isinstance(delta, ABCSeries): - return NotImplemented + """ + Add a timedelta-like, DateOffset, or TimedeltaIndex-like object + to self. + + Parameters + ---------- + delta : {timedelta, np.timedelta64, DateOffset, + TimedelaIndex, ndarray[timedelta64]} + Returns + ------- + result : DatetimeIndex + + Notes + ----- + The result's name is set outside of _add_delta by the calling + method (__add__ or __sub__) + """ from pandas import TimedeltaIndex - name = self.name if isinstance(delta, (Tick, timedelta, np.timedelta64)): new_values = self._add_delta_td(delta) elif is_timedelta64_dtype(delta): if not isinstance(delta, TimedeltaIndex): delta = TimedeltaIndex(delta) - else: - # update name when delta is Index - name = com._maybe_match_name(self, delta) new_values = self._add_delta_tdi(delta) elif isinstance(delta, DateOffset): new_values = self._add_offset(delta).asi8 @@ -931,7 +942,7 @@ def _add_delta(self, delta): new_values = self.astype('O') + delta tz = 'UTC' if self.tz is not None else None - result = DatetimeIndex(new_values, tz=tz, name=name, freq='infer') + result = DatetimeIndex(new_values, tz=tz, freq='infer') if self.tz is not None and self.tz is not utc: result = result.tz_convert(self.tz) return result @@ -954,22 +965,19 @@ def _add_offset(self, offset): def _add_offset_array(self, other): # Array/Index of DateOffset objects - if isinstance(other, ABCSeries): - return NotImplemented - elif len(other) == 1: + if len(other) == 1: return self + other[0] else: warnings.warn("Adding/subtracting array of DateOffsets to " "{} not vectorized".format(type(self)), PerformanceWarning) return self.astype('O') + np.array(other) + # TODO: pass freq='infer' like we do in _sub_offset_array? # TODO: This works for __add__ but loses dtype in __sub__ def _sub_offset_array(self, other): # Array/Index of DateOffset objects - if isinstance(other, ABCSeries): - return NotImplemented - elif len(other) == 1: + if len(other) == 1: return self - other[0] else: warnings.warn("Adding/subtracting array of DateOffsets to " diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8f2d7d382a16e..60798e6d77e37 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -729,7 +729,7 @@ def _sub_datelike(self, other): if other is tslib.NaT: new_data = np.empty(len(self), dtype=np.int64) new_data.fill(tslib.iNaT) - return TimedeltaIndex(new_data, name=self.name) + return TimedeltaIndex(new_data) return NotImplemented def _sub_period(self, other): @@ -744,7 +744,7 @@ def _sub_period(self, other): new_data = new_data.astype(np.float64) new_data[self._isnan] = np.nan # result must be Int64Index or Float64Index - return Index(new_data, name=self.name) + return Index(new_data) def shift(self, n): """ diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 41e499da8e008..6b61db53d9a11 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -356,19 +356,32 @@ def _maybe_update_attributes(self, attrs): return attrs def _add_delta(self, delta): + """ + Add a timedelta-like, Tick, or TimedeltaIndex-like object + to self. + + Parameters + ---------- + delta : {timedelta, np.timedelta64, Tick, TimedeltaIndex} + + Returns + ------- + result : TimedeltaIndex + + Notes + ----- + The result's name is set outside of _add_delta by the calling + method (__add__ or __sub__) + """ if isinstance(delta, (Tick, timedelta, np.timedelta64)): new_values = self._add_delta_td(delta) - name = self.name elif isinstance(delta, TimedeltaIndex): new_values = self._add_delta_tdi(delta) - # update name when delta is index - name = com._maybe_match_name(self, delta) else: raise TypeError("cannot add the type {0} to a TimedeltaIndex" .format(type(delta))) - result = TimedeltaIndex(new_values, freq='infer', name=name) - return result + return TimedeltaIndex(new_values, freq='infer') def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): if isinstance(other, ABCSeries): @@ -409,7 +422,7 @@ def _add_datelike(self, other): result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan) result = self._maybe_mask_results(result, fill_value=iNaT) - return DatetimeIndex(result, name=self.name, copy=False) + return DatetimeIndex(result) def _sub_datelike(self, other): # GH#19124 Timedelta - datetime is not in general well-defined. @@ -426,9 +439,7 @@ def _add_offset_array(self, other): # TimedeltaIndex can only operate with a subset of DateOffset # subclasses. Incompatible classes will raise AttributeError, # which we re-raise as TypeError - if isinstance(other, ABCSeries): - return NotImplemented - elif len(other) == 1: + if len(other) == 1: return self + other[0] else: from pandas.errors import PerformanceWarning @@ -436,6 +447,7 @@ def _add_offset_array(self, other): "{} not vectorized".format(type(self)), PerformanceWarning) return self.astype('O') + np.array(other) + # TODO: pass freq='infer' like we do in _sub_offset_array? # TODO: This works for __add__ but loses dtype in __sub__ except AttributeError: raise TypeError("Cannot add non-tick DateOffset to TimedeltaIndex") @@ -446,9 +458,7 @@ def _sub_offset_array(self, other): # TimedeltaIndex can only operate with a subset of DateOffset # subclasses. Incompatible classes will raise AttributeError, # which we re-raise as TypeError - if isinstance(other, ABCSeries): - return NotImplemented - elif len(other) == 1: + if len(other) == 1: return self - other[0] else: from pandas.errors import PerformanceWarning diff --git a/pandas/core/ops.py b/pandas/core/ops.py index ad6102eb6ad0f..9e80ab3b3da4c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -42,6 +42,67 @@ ABCSparseSeries, ABCSparseArray) +# ----------------------------------------------------------------------------- +# Ops Wrapping Utilities + +def get_op_result_name(left, right): + """ + Find the appropriate name to pin to an operation result. This result + should always be either an Index or a Series. + + Parameters + ---------- + left : {Series, Index} + right : object + + Returns + ------- + name : object + Usually a string + """ + # `left` is always a pd.Series when called from within ops + if isinstance(right, (ABCSeries, pd.Index)): + name = _maybe_match_name(left, right) + else: + name = left.name + return name + + +def _maybe_match_name(a, b): + """ + Try to find a name to attach to the result of an operation between + a and b. If only one of these has a `name` attribute, return that + name. Otherwise return a consensus name if they match of None if + they have different names. + + Parameters + ---------- + a : object + b : object + + Returns + ------- + name : str or None + + See also + -------- + pandas.core.common._consensus_name_attr + """ + a_has = hasattr(a, 'name') + b_has = hasattr(b, 'name') + if a_has and b_has: + if a.name == b.name: + return a.name + else: + # TODO: what if they both have np.nan for their names? + return None + elif a_has: + return a.name + elif b_has: + return b.name + return None + + # ----------------------------------------------------------------------------- # Reversed Operations not available in the stdlib operator module. # Defining these instead of using lambdas allows us to reference them by name. @@ -822,7 +883,7 @@ def wrapper(left, right, name=name, na_op=na_op): return NotImplemented left, right = _align_method_SERIES(left, right) - res_name = _get_series_op_result_name(left, right) + res_name = get_op_result_name(left, right) if is_datetime64_dtype(left) or is_datetime64tz_dtype(left): result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex) @@ -886,15 +947,6 @@ def dispatch_to_index_op(op, left, right, index_class): return result -def _get_series_op_result_name(left, right): - # `left` is always a pd.Series - if isinstance(right, (ABCSeries, pd.Index)): - name = com._maybe_match_name(left, right) - else: - name = left.name - return name - - def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) @@ -972,7 +1024,7 @@ def wrapper(self, other, axis=None): if axis is not None: self._get_axis_number(axis) - res_name = _get_series_op_result_name(self, other) + res_name = get_op_result_name(self, other) if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early @@ -1098,7 +1150,7 @@ def wrapper(self, other): return NotImplemented elif isinstance(other, ABCSeries): - name = com._maybe_match_name(self, other) + name = get_op_result_name(self, other) is_other_int_dtype = is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) @@ -1536,7 +1588,7 @@ def wrapper(self, other): def _sparse_series_op(left, right, op, name): left, right = left.align(right, join='outer', copy=False) new_index = left.index - new_name = com._maybe_match_name(left, right) + new_name = get_op_result_name(left, right) from pandas.core.sparse.array import _sparse_array_op result = _sparse_array_op(left.values, right.values, op, name, diff --git a/pandas/core/series.py b/pandas/core/series.py index 90dc14836ab55..79ffb8be65838 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1728,7 +1728,7 @@ def _binop(self, other, func, level=None, fill_value=None): with np.errstate(all='ignore'): result = func(this_vals, other_vals) - name = com._maybe_match_name(self, other) + name = ops.get_op_result_name(self, other) result = self._constructor(result, index=new_index, name=name) result = result.__finalize__(self) if name is None: @@ -1769,7 +1769,7 @@ def combine(self, other, func, fill_value=np.nan): """ if isinstance(other, Series): new_index = self.index.union(other.index) - new_name = com._maybe_match_name(self, other) + new_name = ops.get_op_result_name(self, other) new_values = np.empty(len(new_index), dtype=self.dtype) for i, idx in enumerate(new_index): lv = self.get(idx, fill_value) @@ -1814,7 +1814,7 @@ def combine_first(self, other): this = self.reindex(new_index, copy=False) other = other.reindex(new_index, copy=False) # TODO: do we need name? - name = com._maybe_match_name(self, other) # noqa + name = ops.get_op_result_name(self, other) # noqa rs_vals = com._where_compat(isna(this), other._values, this._values) return self._constructor(rs_vals, index=new_index).__finalize__(self) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index ddc97636ae0a8..f252d6ec31f89 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -721,11 +721,10 @@ def test_dti_add_series(self, tz, names): result4 = index + ser.values tm.assert_index_equal(result4, expected) - @pytest.mark.parametrize('box', [np.array, pd.Index]) - def test_dti_add_offset_array(self, tz, box): + def test_dti_add_offset_array(self, tz): # GH#18849 dti = pd.date_range('2017-01-01', periods=2, tz=tz) - other = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) with tm.assert_produces_warning(PerformanceWarning): res = dti + other @@ -737,11 +736,29 @@ def test_dti_add_offset_array(self, tz, box): res2 = other + dti tm.assert_index_equal(res2, expected) - @pytest.mark.parametrize('box', [np.array, pd.Index]) - def test_dti_sub_offset_array(self, tz, box): + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_dti_add_offset_index(self, tz, names): + # GH#18849, GH#19744 + dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0]) + other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], + name=names[1]) + + with tm.assert_produces_warning(PerformanceWarning): + res = dti + other + expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))], + name=names[2], freq='infer') + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + dti + tm.assert_index_equal(res2, expected) + + def test_dti_sub_offset_array(self, tz): # GH#18824 dti = pd.date_range('2017-01-01', periods=2, tz=tz) - other = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) with tm.assert_produces_warning(PerformanceWarning): res = dti - other @@ -749,6 +766,21 @@ def test_dti_sub_offset_array(self, tz, box): name=dti.name, freq='infer') tm.assert_index_equal(res, expected) + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_dti_sub_offset_index(self, tz, names): + # GH#18824, GH#19744 + dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0]) + other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], + name=names[1]) + + with tm.assert_produces_warning(PerformanceWarning): + res = dti - other + expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))], + name=names[2], freq='infer') + tm.assert_index_equal(res, expected) + @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 3dc60ed33b958..029fdfcefc299 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -194,11 +194,31 @@ def test_shift_no_freq(self): # ------------------------------------------------------------- - @pytest.mark.parametrize('box', [np.array, pd.Index]) - def test_tdi_add_offset_array(self, box): + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_tdi_add_offset_index(self, names): + # GH#18849, GH#19744 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], + name=names[0]) + other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], + name=names[1]) + + expected = TimedeltaIndex([tdi[n] + other[n] for n in range(len(tdi))], + freq='infer', name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_index_equal(res2, expected) + + def test_tdi_add_offset_array(self): # GH#18849 tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) - other = box([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) expected = TimedeltaIndex([tdi[n] + other[n] for n in range(len(tdi))], freq='infer') @@ -211,23 +231,27 @@ def test_tdi_add_offset_array(self, box): res2 = other + tdi tm.assert_index_equal(res2, expected) - anchored = box([pd.offsets.QuarterEnd(), - pd.offsets.Week(weekday=2)]) + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_tdi_sub_offset_index(self, names): + # GH#18824, GH#19744 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], + name=names[0]) + other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], + name=names[1]) - # addition/subtraction ops with anchored offsets should issue - # a PerformanceWarning and _then_ raise a TypeError. - with pytest.raises(TypeError): - with tm.assert_produces_warning(PerformanceWarning): - tdi + anchored - with pytest.raises(TypeError): - with tm.assert_produces_warning(PerformanceWarning): - anchored + tdi + expected = TimedeltaIndex([tdi[n] - other[n] for n in range(len(tdi))], + freq='infer', name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi - other + tm.assert_index_equal(res, expected) - @pytest.mark.parametrize('box', [np.array, pd.Index]) - def test_tdi_sub_offset_array(self, box): + def test_tdi_sub_offset_array(self): # GH#18824 tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) - other = box([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) expected = TimedeltaIndex([tdi[n] - other[n] for n in range(len(tdi))], freq='infer') @@ -236,17 +260,6 @@ def test_tdi_sub_offset_array(self, box): res = tdi - other tm.assert_index_equal(res, expected) - anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) - - # addition/subtraction ops with anchored offsets should issue - # a PerformanceWarning and _then_ raise a TypeError. - with pytest.raises(TypeError): - with tm.assert_produces_warning(PerformanceWarning): - tdi - anchored - with pytest.raises(TypeError): - with tm.assert_produces_warning(PerformanceWarning): - anchored - tdi - @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) @@ -275,8 +288,12 @@ def test_tdi_with_offset_series(self, names): res3 = tdi - other tm.assert_series_equal(res3, expected_sub) - anchored = Series([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], - name=names[1]) + @pytest.mark.parametrize('box', [np.array, pd.Index, pd.Series]) + def test_tdi_add_sub_anchored_offset_arraylike(self, box): + # GH#18824 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) + + anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) # addition/subtraction ops with anchored offsets should issue # a PerformanceWarning and _then_ raise a TypeError. diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 57479be4d989f..0b329f64dafa3 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -9,6 +9,7 @@ from pandas import Series, Timestamp from pandas.compat import range, lmap import pandas.core.common as com +from pandas.core import ops import pandas.util.testing as tm @@ -167,26 +168,26 @@ def test_random_state(): def test_maybe_match_name(): - matched = com._maybe_match_name( + matched = ops._maybe_match_name( Series([1], name='x'), Series( [2], name='x')) assert (matched == 'x') - matched = com._maybe_match_name( + matched = ops._maybe_match_name( Series([1], name='x'), Series( [2], name='y')) assert (matched is None) - matched = com._maybe_match_name(Series([1]), Series([2], name='x')) + matched = ops._maybe_match_name(Series([1]), Series([2], name='x')) assert (matched is None) - matched = com._maybe_match_name(Series([1], name='x'), Series([2])) + matched = ops._maybe_match_name(Series([1], name='x'), Series([2])) assert (matched is None) - matched = com._maybe_match_name(Series([1], name='x'), [2]) + matched = ops._maybe_match_name(Series([1], name='x'), [2]) assert (matched == 'x') - matched = com._maybe_match_name([1], Series([2], name='y')) + matched = ops._maybe_match_name([1], Series([2], name='y')) assert (matched == 'y') From 0bfda02c0afc40772e44765f8e67678dc166e455 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 21 Feb 2018 16:11:32 -0800 Subject: [PATCH 159/217] parametrize a whole mess of tests (#19785) --- .../indexes/datetimes/test_arithmetic.py | 16 +- .../indexes/datetimes/test_construction.py | 150 ++++---- .../indexes/datetimes/test_date_range.py | 83 +++-- .../tests/indexes/datetimes/test_datetime.py | 42 ++- .../tests/indexes/datetimes/test_indexing.py | 28 +- .../tests/indexes/datetimes/test_missing.py | 88 ++--- pandas/tests/indexes/datetimes/test_ops.py | 259 +++++++------- .../indexes/datetimes/test_scalar_compat.py | 21 +- .../tests/indexes/datetimes/test_timezones.py | 32 +- pandas/tests/indexes/datetimes/test_tools.py | 14 +- .../tests/indexes/period/test_arithmetic.py | 44 +-- pandas/tests/indexes/period/test_asfreq.py | 29 +- .../tests/indexes/period/test_construction.py | 43 ++- pandas/tests/indexes/period/test_ops.py | 63 ++-- pandas/tests/indexes/period/test_period.py | 10 +- pandas/tests/indexes/period/test_setops.py | 21 +- pandas/tests/indexes/period/test_tools.py | 323 +++++++++--------- .../indexes/timedeltas/test_arithmetic.py | 14 +- pandas/tests/indexes/timedeltas/test_ops.py | 17 +- .../indexes/timedeltas/test_timedelta.py | 10 +- pandas/tests/scalar/period/test_period.py | 203 +++++------ 21 files changed, 742 insertions(+), 768 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index f252d6ec31f89..7900c983b6c77 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -614,19 +614,19 @@ def test_sub_dti_dti(self): result = dti2 - dti1 tm.assert_index_equal(result, expected) - def test_sub_period(self): - # GH 13078 + @pytest.mark.parametrize('freq', [None, 'D']) + def test_sub_period(self, freq): + # GH#13078 # not supported, check TypeError p = pd.Period('2011-01-01', freq='D') - for freq in [None, 'D']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) - with pytest.raises(TypeError): - idx - p + with pytest.raises(TypeError): + idx - p - with pytest.raises(TypeError): - p - idx + with pytest.raises(TypeError): + p - idx def test_ufunc_coercions(self): idx = date_range('2011-01-01', periods=3, freq='2D', name='x') diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 197a42bdaacbb..176f5bd0c1a2a 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -351,52 +351,51 @@ def test_constructor_coverage(self): freq='B') pytest.raises(ValueError, DatetimeIndex, periods=10, freq='D') - def test_constructor_datetime64_tzformat(self): - # see gh-6572: ISO 8601 format results in pytz.FixedOffset - for freq in ['AS', 'W-SUN']: - idx = date_range('2013-01-01T00:00:00-05:00', - '2016-01-01T23:59:59-05:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(-300)) - tm.assert_index_equal(idx, expected) - # Unable to use `US/Eastern` because of DST - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='America/Lima') - tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - idx = date_range('2013-01-01T00:00:00+09:00', - '2016-01-01T23:59:59+09:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(540)) - tm.assert_index_equal(idx, expected) - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='Asia/Tokyo') - tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + @pytest.mark.parametrize('freq', ['AS', 'W-SUN']) + def test_constructor_datetime64_tzformat(self, freq): + # see GH#6572: ISO 8601 format results in pytz.FixedOffset + idx = date_range('2013-01-01T00:00:00-05:00', + '2016-01-01T23:59:59-05:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(-300)) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='America/Lima') + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range('2013-01-01T00:00:00+09:00', + '2016-01-01T23:59:59+09:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(540)) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='Asia/Tokyo') + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) # Non ISO 8601 format results in dateutil.tz.tzoffset - for freq in ['AS', 'W-SUN']: - idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', - freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(-300)) - tm.assert_index_equal(idx, expected) - # Unable to use `US/Eastern` because of DST - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='America/Lima') - tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - idx = date_range('2013/1/1 0:00:00+9:00', - '2016/1/1 23:59:59+09:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(540)) - tm.assert_index_equal(idx, expected) - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='Asia/Tokyo') - tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', + freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(-300)) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='America/Lima') + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range('2013/1/1 0:00:00+9:00', + '2016/1/1 23:59:59+09:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(540)) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='Asia/Tokyo') + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) def test_constructor_dtype(self): @@ -451,36 +450,35 @@ def test_dti_constructor_preserve_dti_freq(self): rng2 = DatetimeIndex(rng) assert rng.freq == rng2.freq - def test_dti_constructor_years_only(self): + @pytest.mark.parametrize('tz', [None, 'UTC', 'Asia/Tokyo', + 'dateutil/US/Pacific']) + def test_dti_constructor_years_only(self, tz): # GH 6961 - for tz in [None, 'UTC', 'Asia/Tokyo', 'dateutil/US/Pacific']: - rng1 = date_range('2014', '2015', freq='M', tz=tz) - expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) + rng1 = date_range('2014', '2015', freq='M', tz=tz) + expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) - rng2 = date_range('2014', '2015', freq='MS', tz=tz) - expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', - tz=tz) + rng2 = date_range('2014', '2015', freq='MS', tz=tz) + expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', tz=tz) - rng3 = date_range('2014', '2020', freq='A', tz=tz) - expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) + rng3 = date_range('2014', '2020', freq='A', tz=tz) + expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) - rng4 = date_range('2014', '2020', freq='AS', tz=tz) - expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', - tz=tz) + rng4 = date_range('2014', '2020', freq='AS', tz=tz) + expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', tz=tz) - for rng, expected in [(rng1, expected1), (rng2, expected2), - (rng3, expected3), (rng4, expected4)]: - tm.assert_index_equal(rng, expected) + for rng, expected in [(rng1, expected1), (rng2, expected2), + (rng3, expected3), (rng4, expected4)]: + tm.assert_index_equal(rng, expected) - def test_dti_constructor_small_int(self): + @pytest.mark.parametrize('dtype', [np.int64, np.int32, np.int16, np.int8]) + def test_dti_constructor_small_int(self, dtype): # GH 13721 exp = DatetimeIndex(['1970-01-01 00:00:00.00000000', '1970-01-01 00:00:00.00000001', '1970-01-01 00:00:00.00000002']) - for dtype in [np.int64, np.int32, np.int16, np.int8]: - arr = np.array([0, 10, 20], dtype=dtype) - tm.assert_index_equal(DatetimeIndex(arr), exp) + arr = np.array([0, 10, 20], dtype=dtype) + tm.assert_index_equal(DatetimeIndex(arr), exp) def test_ctor_str_intraday(self): rng = DatetimeIndex(['1-1-2000 00:00:01']) @@ -499,7 +497,7 @@ def test_index_cast_datetime64_other_units(self): assert (idx.values == conversion.ensure_datetime64ns(arr)).all() def test_constructor_int64_nocopy(self): - # #1624 + # GH#1624 arr = np.arange(1000, dtype=np.int64) index = DatetimeIndex(arr) @@ -512,19 +510,17 @@ def test_constructor_int64_nocopy(self): arr[50:100] = -1 assert (index.asi8[50:100] != -1).all() - def test_from_freq_recreate_from_data(self): - freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', - 'C'] - - for f in freqs: - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) - idx = DatetimeIndex(org, freq=f) - tm.assert_index_equal(idx, org) - - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, - tz='US/Pacific', periods=1) - idx = DatetimeIndex(org, freq=f, tz='US/Pacific') - tm.assert_index_equal(idx, org) + @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', 'BH', + 'T', 'S', 'L', 'U', 'H', 'N', 'C']) + def test_from_freq_recreate_from_data(self, freq): + org = DatetimeIndex(start='2001/02/01 09:00', freq=freq, periods=1) + idx = DatetimeIndex(org, freq=freq) + tm.assert_index_equal(idx, org) + + org = DatetimeIndex(start='2001/02/01 09:00', freq=freq, + tz='US/Pacific', periods=1) + idx = DatetimeIndex(org, freq=freq, tz='US/Pacific') + tm.assert_index_equal(idx, org) def test_datetimeindex_constructor_misc(self): arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 3738398d017f8..d2ec465468dfb 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -222,16 +222,13 @@ def test_range_misspecified(self): with tm.assert_raises_regex(ValueError, msg): date_range() - def test_compat_replace(self): + @pytest.mark.parametrize('f', [compat.long, int]) + def test_compat_replace(self, f): # https://github.com/statsmodels/statsmodels/issues/3349 # replace should take ints/longs for compat - - for f in [compat.long, int]: - result = date_range(Timestamp('1960-04-01 00:00:00', - freq='QS-JAN'), - periods=f(76), - freq='QS-JAN') - assert len(result) == 76 + result = date_range(Timestamp('1960-04-01 00:00:00', freq='QS-JAN'), + periods=f(76), freq='QS-JAN') + assert len(result) == 76 def test_catch_infinite_loop(self): offset = offsets.DateOffset(minute=5) @@ -484,24 +481,24 @@ def test_range_tz_dateutil(self): assert dr[0] == start assert dr[2] == end - def test_range_closed(self): + @pytest.mark.parametrize('freq', ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed(self, freq): begin = datetime(2011, 1, 1) end = datetime(2014, 1, 1) - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] - tm.assert_index_equal(expected_left, left) - tm.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_left, left) + tm.assert_index_equal(expected_right, right) def test_range_closed_with_tz_aware_start_end(self): # GH12409, GH12684 @@ -546,28 +543,28 @@ def test_range_closed_with_tz_aware_start_end(self): tm.assert_index_equal(expected_left, left) tm.assert_index_equal(expected_right, right) - def test_range_closed_boundary(self): - # GH 11804 - for closed in ['right', 'left', None]: - right_boundary = date_range('2015-09-12', '2015-12-01', - freq='QS-MAR', closed=closed) - left_boundary = date_range('2015-09-01', '2015-09-12', - freq='QS-MAR', closed=closed) - both_boundary = date_range('2015-09-01', '2015-12-01', - freq='QS-MAR', closed=closed) - expected_right = expected_left = expected_both = both_boundary - - if closed == 'right': - expected_left = both_boundary[1:] - if closed == 'left': - expected_right = both_boundary[:-1] - if closed is None: - expected_right = both_boundary[1:] - expected_left = both_boundary[:-1] - - tm.assert_index_equal(right_boundary, expected_right) - tm.assert_index_equal(left_boundary, expected_left) - tm.assert_index_equal(both_boundary, expected_both) + @pytest.mark.parametrize('closed', ['right', 'left', None]) + def test_range_closed_boundary(self, closed): + # GH#11804 + right_boundary = date_range('2015-09-12', '2015-12-01', + freq='QS-MAR', closed=closed) + left_boundary = date_range('2015-09-01', '2015-09-12', + freq='QS-MAR', closed=closed) + both_boundary = date_range('2015-09-01', '2015-12-01', + freq='QS-MAR', closed=closed) + expected_right = expected_left = expected_both = both_boundary + + if closed == 'right': + expected_left = both_boundary[1:] + if closed == 'left': + expected_right = both_boundary[:-1] + if closed is None: + expected_right = both_boundary[1:] + expected_left = both_boundary[:-1] + + tm.assert_index_equal(right_boundary, expected_right) + tm.assert_index_equal(left_boundary, expected_left) + tm.assert_index_equal(both_boundary, expected_both) def test_years_only(self): # GH 6961 diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 05678b0c8dd45..2cf33644377ab 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -356,12 +356,11 @@ def test_does_not_convert_mixed_integer(self): assert cols.dtype == joined.dtype tm.assert_numpy_array_equal(cols.values, joined.values) - def test_join_self(self): + @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) + def test_join_self(self, how): index = date_range('1/1/2000', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - assert index is joined + joined = index.join(index, how=how) + assert index is joined def assert_index_parameters(self, index): assert index.freq == '40960N' @@ -381,18 +380,17 @@ def test_ns_index(self): freq=index.freq) self.assert_index_parameters(new_index) - def test_join_with_period_index(self): + @pytest.mark.parametrize('how', ['left', 'right', 'inner', 'outer']) + def test_join_with_period_index(self, how): df = tm.makeCustomDataframe( 10, 10, data_gen_f=lambda *args: np.random.randint(2), c_idx_type='p', r_idx_type='dt') s = df.iloc[:5, 0] - joins = 'left', 'right', 'inner', 'outer' - for join in joins: - with tm.assert_raises_regex(ValueError, - 'can only call with other ' - 'PeriodIndex-ed objects'): - df.columns.join(s.index, how=join) + with tm.assert_raises_regex(ValueError, + 'can only call with other ' + 'PeriodIndex-ed objects'): + df.columns.join(s.index, how=how) def test_factorize(self): idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02', @@ -439,18 +437,18 @@ def test_factorize(self): tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) - def test_factorize_tz(self): - # GH 13750 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) - idx = base.repeat(5) + @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']) + def test_factorize_tz(self, tz): + # GH#13750 + base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) + idx = base.repeat(5) - exp_arr = np.arange(100, dtype=np.intp).repeat(5) + exp_arr = np.arange(100, dtype=np.intp).repeat(5) - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(res, base) + for obj in [idx, pd.Series(idx)]: + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(res, base) def test_factorize_dst(self): # GH 13750 diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 48ceefd6368c0..a9f1a5e608ac7 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -346,25 +346,25 @@ def test_take_invalid_kwargs(self): indices, mode='clip') # TODO: This method came from test_datetime; de-dup with version above - def test_take2(self): + @pytest.mark.parametrize('tz', [None, 'US/Eastern', 'Asia/Tokyo']) + def test_take2(self, tz): dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15), datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)] - for tz in [None, 'US/Eastern', 'Asia/Tokyo']: - idx = DatetimeIndex(start='2010-01-01 09:00', - end='2010-02-01 09:00', freq='H', tz=tz, - name='idx') - expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz) + idx = DatetimeIndex(start='2010-01-01 09:00', + end='2010-02-01 09:00', freq='H', tz=tz, + name='idx') + expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz) - taken1 = idx.take([5, 6, 8, 12]) - taken2 = idx[[5, 6, 8, 12]] + taken1 = idx.take([5, 6, 8, 12]) + taken2 = idx[[5, 6, 8, 12]] - for taken in [taken1, taken2]: - tm.assert_index_equal(taken, expected) - assert isinstance(taken, DatetimeIndex) - assert taken.freq is None - assert taken.tz == expected.tz - assert taken.name == expected.name + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, DatetimeIndex) + assert taken.freq is None + assert taken.tz == expected.tz + assert taken.name == expected.name def test_take_fill_value(self): # GH 12631 diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py index adc0b7b3d81e8..c8d47caa7e947 100644 --- a/pandas/tests/indexes/datetimes/test_missing.py +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -1,50 +1,52 @@ +import pytest + import pandas as pd import pandas.util.testing as tm class TestDatetimeIndex(object): - def test_fillna_datetime64(self): + @pytest.mark.parametrize('tz', ['US/Eastern', 'Asia/Tokyo']) + def test_fillna_datetime64(self, tz): # GH 11343 - for tz in ['US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00']) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00']) - tm.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # tz mismatch - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), - pd.Timestamp('2011-01-01 10:00', tz=tz), - pd.Timestamp('2011-01-01 11:00')], dtype=object) - tm.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', - pd.Timestamp('2011-01-01 11:00')], dtype=object) - tm.assert_index_equal(idx.fillna('x'), exp) - - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00'], tz=tz) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], tz=tz) - tm.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - pd.Timestamp('2011-01-01 10:00'), - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - tm.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - 'x', - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - tm.assert_index_equal(idx.fillna('x'), exp) + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00']) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00']) + tm.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # tz mismatch + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), + pd.Timestamp('2011-01-01 10:00', tz=tz), + pd.Timestamp('2011-01-01 11:00')], dtype=object) + tm.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', + pd.Timestamp('2011-01-01 11:00')], dtype=object) + tm.assert_index_equal(idx.fillna('x'), exp) + + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00'], tz=tz) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], tz=tz) + tm.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + pd.Timestamp('2011-01-01 10:00'), + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + tm.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + 'x', + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + tm.assert_index_equal(idx.fillna('x'), exp) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index b42cd454803b8..ed7e425924097 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -96,111 +96,111 @@ def test_numpy_minmax(self): tm.assert_raises_regex( ValueError, errmsg, np.argmax, dr, out=0) - def test_repeat_range(self): + @pytest.mark.parametrize('tz', tz) + def test_repeat_range(self, tz): rng = date_range('1/1/2000', '1/1/2001') result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) - for tz in self.tz: - index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', - '2001-01-02', '2001-01-02'], tz=tz) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - assert res.freq is None - - index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', - '2001-01-03', '2001-01-03'], tz=tz) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - assert res.freq is None - - index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], - tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01', - 'NaT', 'NaT', 'NaT', - '2003-01-01', '2003-01-01', '2003-01-01'], - tz=tz) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - assert res.freq is None - - def test_repeat(self): + index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', + '2001-01-02', '2001-01-02'], tz=tz) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', + '2001-01-03', '2001-01-03'], tz=tz) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], + tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01', + 'NaT', 'NaT', 'NaT', + '2003-01-01', '2003-01-01', '2003-01-01'], + tz=tz) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + @pytest.mark.parametrize('tz', tz) + def test_repeat(self, tz): reps = 2 msg = "the 'axis' parameter is not supported" - for tz in self.tz: - rng = pd.date_range(start='2016-01-01', periods=2, - freq='30Min', tz=tz) - - expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), - ]) - - res = rng.repeat(reps) - tm.assert_index_equal(res, expected_rng) - assert res.freq is None + rng = pd.date_range(start='2016-01-01', periods=2, + freq='30Min', tz=tz) - tm.assert_index_equal(np.repeat(rng, reps), expected_rng) - tm.assert_raises_regex(ValueError, msg, np.repeat, - rng, reps, axis=1) + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + ]) - def test_resolution(self): + res = rng.repeat(reps) + tm.assert_index_equal(res, expected_rng) + assert res.freq is None + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + tm.assert_raises_regex(ValueError, msg, np.repeat, + rng, reps, axis=1) + + @pytest.mark.parametrize('tz', tz) + def test_resolution(self, tz): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], ['day', 'day', 'day', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond']): - for tz in self.tz: - idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, - tz=tz) - assert idx.resolution == expected + idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, + tz=tz) + assert idx.resolution == expected - def test_value_counts_unique(self): + @pytest.mark.parametrize('tz', tz) + def test_value_counts_unique(self, tz): # GH 7735 - for tz in self.tz: - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), - tz=tz) + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), + tz=tz) - exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, - tz=tz) - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, + tz=tz) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) - expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(idx.unique(), expected) + expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, + tz=tz) + tm.assert_index_equal(idx.unique(), expected) - idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', - '2013-01-01 09:00', '2013-01-01 08:00', - '2013-01-01 08:00', pd.NaT], tz=tz) + idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', + '2013-01-01 09:00', '2013-01-01 08:00', + '2013-01-01 08:00', pd.NaT], tz=tz) - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], - tz=tz) - expected = Series([3, 2], index=exp_idx) + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], + tz=tz) + expected = Series([3, 2], index=exp_idx) - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', - pd.NaT], tz=tz) - expected = Series([3, 2, 1], index=exp_idx) + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', + pd.NaT], tz=tz) + expected = Series([3, 2, 1], index=exp_idx) - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), - expected) + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), + expected) - tm.assert_index_equal(idx.unique(), exp_idx) + tm.assert_index_equal(idx.unique(), exp_idx) def test_nonunique_contains(self): # GH 9512 @@ -324,15 +324,16 @@ def test_drop_duplicates(self): res = Series(idx).drop_duplicates(keep=False) tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - def test_infer_freq(self): + @pytest.mark.parametrize('freq', [ + 'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', + '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', + '-3S']) + def test_infer_freq(self, freq): # GH 11018 - for freq in ['A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', - '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', - '-3S']: - idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) - result = pd.DatetimeIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - assert result.freq == freq + idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) + result = pd.DatetimeIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + assert result.freq == freq def test_nat_new(self): idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') @@ -344,57 +345,57 @@ def test_nat_new(self): exp = np.array([tslib.iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) - def test_nat(self): + @pytest.mark.parametrize('tz', [None, 'US/Eastern', 'UTC']) + def test_nat(self, tz): assert pd.DatetimeIndex._na_value is pd.NaT assert pd.DatetimeIndex([])._na_value is pd.NaT - for tz in [None, 'US/Eastern', 'UTC']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) - assert idx._can_hold_na + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) + assert idx._can_hold_na - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - assert not idx.hasnans - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.intp)) + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert not idx.hasnans + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([], dtype=np.intp)) - idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) - assert idx._can_hold_na + idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) + assert idx._can_hold_na - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - assert idx.hasnans - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([1], dtype=np.intp)) - def test_equals(self): + @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']) + def test_equals(self, tz): # GH 13107 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) - assert idx.equals(idx) - assert idx.equals(idx.copy()) - assert idx.equals(idx.astype(object)) - assert idx.astype(object).equals(idx) - assert idx.astype(object).equals(idx.astype(object)) - assert not idx.equals(list(idx)) - assert not idx.equals(pd.Series(idx)) - - idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], - tz='US/Pacific') - assert not idx.equals(idx2) - assert not idx.equals(idx2.copy()) - assert not idx.equals(idx2.astype(object)) - assert not idx.astype(object).equals(idx2) - assert not idx.equals(list(idx2)) - assert not idx.equals(pd.Series(idx2)) - - # same internal, different tz - idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') - tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) - assert not idx.equals(idx3) - assert not idx.equals(idx3.copy()) - assert not idx.equals(idx3.astype(object)) - assert not idx.astype(object).equals(idx3) - assert not idx.equals(list(idx3)) - assert not idx.equals(pd.Series(idx3)) + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], + tz='US/Pacific') + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) class TestBusinessDatetimeIndex(object): diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 83e7a0cd68d63..6f0756949edc6 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -38,18 +38,21 @@ def test_dti_date_out_of_range(self): pytest.raises(ValueError, DatetimeIndex, ['1400-01-01']) pytest.raises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) - def test_dti_timestamp_fields(self): + @pytest.mark.parametrize('field', [ + 'dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', + 'days_in_month', 'is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', 'is_year_start', + 'is_year_end', 'weekday_name']) + def test_dti_timestamp_fields(self, field): # extra fields from DatetimeIndex like quarter and week idx = tm.makeDateIndex(100) + expected = getattr(idx, field)[-1] + result = getattr(Timestamp(idx[-1]), field) + assert result == expected - fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', - 'days_in_month', 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'] - for f in fields: - expected = getattr(idx, f)[-1] - result = getattr(Timestamp(idx[-1]), f) - assert result == expected + def test_dti_timestamp_freq_fields(self): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) assert idx.freq == Timestamp(idx[-1], idx.freq).freq assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 62854676d43be..217610b76cf0f 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -170,17 +170,17 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): expected = Index([9, 9, 9]) tm.assert_index_equal(ut.hour, expected) - def test_dti_tz_convert_trans_pos_plus_1__bug(self): + @pytest.mark.parametrize('freq, n', [('H', 1), ('T', 60), ('S', 3600)]) + def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): # Regression test for tslib.tz_convert(vals, tz1, tz2). # See https://github.com/pandas-dev/pandas/issues/4496 for details. - for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: - idx = date_range(datetime(2011, 3, 26, 23), - datetime(2011, 3, 27, 1), freq=freq) - idx = idx.tz_localize('UTC') - idx = idx.tz_convert('Europe/Moscow') + idx = date_range(datetime(2011, 3, 26, 23), + datetime(2011, 3, 27, 1), freq=freq) + idx = idx.tz_localize('UTC') + idx = idx.tz_convert('Europe/Moscow') - expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) def test_dti_tz_convert_dst(self): for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: @@ -700,20 +700,20 @@ def test_dti_tz_constructors(self, tzstr): # ------------------------------------------------------------- # Unsorted - def test_join_utc_convert(self): + @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) + def test_join_utc_convert(self, how): rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') left = rng.tz_convert('US/Eastern') right = rng.tz_convert('Europe/Berlin') - for how in ['inner', 'outer', 'left', 'right']: - result = left.join(left[:-5], how=how) - assert isinstance(result, DatetimeIndex) - assert result.tz == left.tz + result = left.join(left[:-5], how=how) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz - result = left.join(right[:-5], how=how) - assert isinstance(result, DatetimeIndex) - assert result.tz.zone == 'UTC' + result = left.join(right[:-5], how=how) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == 'UTC' def test_dti_drop_dont_lose_tz(self): # GH#2621 diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b5926933544e8..fbf0977a04d82 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1013,18 +1013,20 @@ def test_string_na_nat_conversion(self, cache): assert_series_equal(dresult, expected, check_names=False) assert dresult.name == 'foo' + @pytest.mark.parametrize('dtype', [ + 'datetime64[h]', 'datetime64[m]', + 'datetime64[s]', 'datetime64[ms]', + 'datetime64[us]', 'datetime64[ns]']) @pytest.mark.parametrize('cache', [True, False]) - def test_dti_constructor_numpy_timeunits(self, cache): + def test_dti_constructor_numpy_timeunits(self, cache, dtype): # GH 9114 base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'], cache=cache) - for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', - 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: - values = base.values.astype(dtype) + values = base.values.astype(dtype) - tm.assert_index_equal(DatetimeIndex(values), base) - tm.assert_index_equal(to_datetime(values, cache=cache), base) + tm.assert_index_equal(DatetimeIndex(values), base) + tm.assert_index_equal(to_datetime(values, cache=cache), base) @pytest.mark.parametrize('cache', [True, False]) def test_dayfirst(self, cache): diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py index 5f8f9533e9c44..e16d346542b9e 100644 --- a/pandas/tests/indexes/period/test_arithmetic.py +++ b/pandas/tests/indexes/period/test_arithmetic.py @@ -613,7 +613,8 @@ def test_pi_ops(self): exp = pd.Index([0, -1, -2, -3], name='idx') tm.assert_index_equal(result, exp) - def test_pi_ops_errors(self): + @pytest.mark.parametrize('ng', ["str", 1.5]) + def test_pi_ops_errors(self, ng): idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq='M', name='idx') ser = pd.Series(idx) @@ -621,34 +622,33 @@ def test_pi_ops_errors(self): msg = r"unsupported operand type\(s\)" for obj in [idx, ser]: - for ng in ["str", 1.5]: - with tm.assert_raises_regex(TypeError, msg): - obj + ng + with tm.assert_raises_regex(TypeError, msg): + obj + ng - with pytest.raises(TypeError): - # error message differs between PY2 and 3 - ng + obj + with pytest.raises(TypeError): + # error message differs between PY2 and 3 + ng + obj + + with tm.assert_raises_regex(TypeError, msg): + obj - ng - with tm.assert_raises_regex(TypeError, msg): - obj - ng + with pytest.raises(TypeError): + np.add(obj, ng) + if _np_version_under1p10: + assert np.add(ng, obj) is NotImplemented + else: with pytest.raises(TypeError): - np.add(obj, ng) + np.add(ng, obj) - if _np_version_under1p10: - assert np.add(ng, obj) is NotImplemented - else: - with pytest.raises(TypeError): - np.add(ng, obj) + with pytest.raises(TypeError): + np.subtract(obj, ng) + if _np_version_under1p10: + assert np.subtract(ng, obj) is NotImplemented + else: with pytest.raises(TypeError): - np.subtract(obj, ng) - - if _np_version_under1p10: - assert np.subtract(ng, obj) is NotImplemented - else: - with pytest.raises(TypeError): - np.subtract(ng, obj) + np.subtract(ng, obj) def test_pi_ops_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py index c8724b2a3bc91..ea59a57069faa 100644 --- a/pandas/tests/indexes/period/test_asfreq.py +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -8,9 +8,6 @@ class TestPeriodIndex(object): - def setup_method(self, method): - pass - def test_asfreq(self): pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001') @@ -85,21 +82,21 @@ def test_asfreq_nat(self): expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q') tm.assert_index_equal(result, expected) - def test_asfreq_mult_pi(self): + @pytest.mark.parametrize('freq', ['D', '3D']) + def test_asfreq_mult_pi(self, freq): pi = PeriodIndex(['2001-01', '2001-02', 'NaT', '2001-03'], freq='2M') - for freq in ['D', '3D']: - result = pi.asfreq(freq) - exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', - '2001-04-30'], freq=freq) - tm.assert_index_equal(result, exp) - assert result.freq == exp.freq - - result = pi.asfreq(freq, how='S') - exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', - '2001-03-01'], freq=freq) - tm.assert_index_equal(result, exp) - assert result.freq == exp.freq + result = pi.asfreq(freq) + exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', + '2001-04-30'], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + result = pi.asfreq(freq, how='S') + exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', + '2001-03-01'], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq def test_asfreq_combined_pi(self): pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index eca80d17b1dc3..be741592ec7a2 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -286,14 +286,14 @@ def test_constructor_simple_new_empty(self): result = idx._simple_new(idx, name='p', freq='M') tm.assert_index_equal(result, idx) - def test_constructor_floats(self): - # GH13079 - for floats in [[1.1, 2.1], np.array([1.1, 2.1])]: - with pytest.raises(TypeError): - pd.PeriodIndex._simple_new(floats, freq='M') + @pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])]) + def test_constructor_floats(self, floats): + # GH#13079 + with pytest.raises(TypeError): + pd.PeriodIndex._simple_new(floats, freq='M') - with pytest.raises(TypeError): - pd.PeriodIndex(floats, freq='M') + with pytest.raises(TypeError): + pd.PeriodIndex(floats, freq='M') def test_constructor_nat(self): pytest.raises(ValueError, period_range, start='NaT', @@ -343,16 +343,14 @@ def test_constructor_freq_mult(self): with tm.assert_raises_regex(ValueError, msg): period_range('2011-01', periods=3, freq='0M') - def test_constructor_freq_mult_dti_compat(self): - import itertools - mults = [1, 2, 3, 4, 5] - freqs = ['A', 'M', 'D', 'T', 'S'] - for mult, freq in itertools.product(mults, freqs): - freqstr = str(mult) + freq - pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) - expected = date_range(start='2014-04-01', freq=freqstr, - periods=10).to_period(freqstr) - tm.assert_index_equal(pidx, expected) + @pytest.mark.parametrize('freq', ['A', 'M', 'D', 'T', 'S']) + @pytest.mark.parametrize('mult', [1, 2, 3, 4, 5]) + def test_constructor_freq_mult_dti_compat(self, mult, freq): + freqstr = str(mult) + freq + pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) + expected = date_range(start='2014-04-01', freq=freqstr, + periods=10).to_period(freqstr) + tm.assert_index_equal(pidx, expected) def test_constructor_freq_combined(self): for freq in ['1D1H', '1H1D']: @@ -445,11 +443,12 @@ def test_constructor_error(self): with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start) - def test_recreate_from_data(self): - for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: - org = PeriodIndex(start='2001/04/01', freq=o, periods=1) - idx = PeriodIndex(org.values, freq=o) - tm.assert_index_equal(idx, org) + @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', + 'T', 'S', 'L', 'U', 'N', 'H']) + def test_recreate_from_data(self, freq): + org = PeriodIndex(start='2001/04/01', freq=freq, periods=1) + idx = PeriodIndex(org.values, freq=freq) + tm.assert_index_equal(idx, org) def test_map_with_string_constructor(self): raw = [2005, 2007, 2009] diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 3b6641bc7ad5c..7d117b0b626cf 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,5 +1,6 @@ import numpy as np +import pytest import pandas as pd import pandas._libs.tslib as tslib @@ -368,37 +369,37 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) - def test_equals(self): - # GH 13107 - for freq in ['D', 'M']: - idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], - freq=freq) - assert idx.equals(idx) - assert idx.equals(idx.copy()) - assert idx.equals(idx.astype(object)) - assert idx.astype(object).equals(idx) - assert idx.astype(object).equals(idx.astype(object)) - assert not idx.equals(list(idx)) - assert not idx.equals(pd.Series(idx)) - - idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], - freq='H') - assert not idx.equals(idx2) - assert not idx.equals(idx2.copy()) - assert not idx.equals(idx2.astype(object)) - assert not idx.astype(object).equals(idx2) - assert not idx.equals(list(idx2)) - assert not idx.equals(pd.Series(idx2)) - - # same internal, different tz - idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H') - tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) - assert not idx.equals(idx3) - assert not idx.equals(idx3.copy()) - assert not idx.equals(idx3.astype(object)) - assert not idx.astype(object).equals(idx3) - assert not idx.equals(list(idx3)) - assert not idx.equals(pd.Series(idx3)) + @pytest.mark.parametrize('freq', ['D', 'M']) + def test_equals(self, freq): + # GH#13107 + idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], + freq=freq) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], + freq='H') + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H') + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) class TestPeriodIndexSeriesMethods(object): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 4c0c865928031..dd437363cfc1d 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -27,11 +27,11 @@ def create_index(self): def test_pickle_compat_construction(self): pass - def test_pickle_round_trip(self): - for freq in ['D', 'M', 'A']: - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq=freq) - result = tm.round_trip_pickle(idx) - tm.assert_index_equal(result, idx) + @pytest.mark.parametrize('freq', ['D', 'M', 'A']) + def test_pickle_round_trip(self, freq): + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq=freq) + result = tm.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) def test_where(self, klass): diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 1ac05f9fa94b7..ec0836dfa174b 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -14,24 +14,21 @@ def _permute(obj): class TestPeriodIndex(object): - def setup_method(self, method): - pass - - def test_joins(self): + @pytest.mark.parametrize('kind', ['inner', 'outer', 'left', 'right']) + def test_joins(self, kind): index = period_range('1/1/2000', '1/20/2000', freq='D') - for kind in ['inner', 'outer', 'left', 'right']: - joined = index.join(index[:-5], how=kind) + joined = index.join(index[:-5], how=kind) - assert isinstance(joined, PeriodIndex) - assert joined.freq == index.freq + assert isinstance(joined, PeriodIndex) + assert joined.freq == index.freq - def test_join_self(self): + @pytest.mark.parametrize('kind', ['inner', 'outer', 'left', 'right']) + def test_join_self(self, kind): index = period_range('1/1/2000', '1/20/2000', freq='D') - for kind in ['inner', 'outer', 'left', 'right']: - res = index.join(index, how=kind) - assert index is res + res = index.join(index, how=kind) + assert index is res def test_join_does_not_recur(self): df = tm.makeCustomDataframe( diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 97500f2f5ed95..38c6f257b2206 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -1,5 +1,6 @@ import numpy as np from datetime import datetime, timedelta +import pytest import pandas as pd import pandas.util.testing as tm @@ -29,32 +30,10 @@ def test_annual(self): def test_monthly(self): self._check_freq('M', '1970-01') - def test_weekly(self): - self._check_freq('W-THU', '1970-01-01') - - def test_daily(self): - self._check_freq('D', '1970-01-01') - - def test_business_daily(self): - self._check_freq('B', '1970-01-01') - - def test_hourly(self): - self._check_freq('H', '1970-01-01') - - def test_minutely(self): - self._check_freq('T', '1970-01-01') - - def test_secondly(self): - self._check_freq('S', '1970-01-01') - - def test_millisecondly(self): - self._check_freq('L', '1970-01-01') - - def test_microsecondly(self): - self._check_freq('U', '1970-01-01') - - def test_nanosecondly(self): - self._check_freq('N', '1970-01-01') + @pytest.mark.parametrize('freq', ['W-THU', 'D', 'B', 'H', 'T', + 'S', 'L', 'U', 'N']) + def test_freq(self, freq): + self._check_freq(freq, '1970-01-01') def test_negone_ordinals(self): freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S'] @@ -75,19 +54,6 @@ def test_negone_ordinals(self): class TestPeriodIndex(object): - - def setup_method(self, method): - pass - - def test_tolist(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - rs = index.tolist() - for x in rs: - assert isinstance(x, Period) - - recon = PeriodIndex(rs) - tm.assert_index_equal(index, recon) - def test_to_timestamp(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index, name='foo') @@ -129,24 +95,6 @@ def _get_with_delta(delta, freq='A-DEC'): tm.assert_index_equal(result.index, exp_index) assert result.name == 'foo' - def test_to_timestamp_quarterly_bug(self): - years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(lrange(1, 5), 40) - - pindex = PeriodIndex(year=years, quarter=quarters) - - stamps = pindex.to_timestamp('D', 'end') - expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex]) - tm.assert_index_equal(stamps, expected) - - def test_to_timestamp_preserve_name(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', - name='foo') - assert index.name == 'foo' - - conv = index.to_timestamp('D') - assert conv.name == 'foo' - def test_to_timestamp_repr_is_code(self): zs = [Timestamp('99-04-17 00:00:00', tz='UTC'), Timestamp('2001-04-17 00:00:00', tz='UTC'), @@ -155,57 +103,6 @@ def test_to_timestamp_repr_is_code(self): for z in zs: assert eval(repr(z)) == z - def test_to_timestamp_pi_nat(self): - # GH 7228 - index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', - name='idx') - - result = index.to_timestamp('D') - expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), - datetime(2011, 2, 1)], name='idx') - tm.assert_index_equal(result, expected) - assert result.name == 'idx' - - result2 = result.to_period(freq='M') - tm.assert_index_equal(result2, index) - assert result2.name == 'idx' - - result3 = result.to_period(freq='3M') - exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') - tm.assert_index_equal(result3, exp) - assert result3.freqstr == '3M' - - msg = ('Frequency must be positive, because it' - ' represents span: -2A') - with tm.assert_raises_regex(ValueError, msg): - result.to_period(freq='-2A') - - def test_to_timestamp_pi_mult(self): - idx = PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='2M', name='idx') - result = idx.to_timestamp() - expected = DatetimeIndex( - ['2011-01-01', 'NaT', '2011-02-01'], name='idx') - tm.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E') - expected = DatetimeIndex( - ['2011-02-28', 'NaT', '2011-03-31'], name='idx') - tm.assert_index_equal(result, expected) - - def test_to_timestamp_pi_combined(self): - idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') - result = idx.to_timestamp() - expected = DatetimeIndex( - ['2011-01-01 00:00', '2011-01-02 01:00'], name='idx') - tm.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E') - expected = DatetimeIndex( - ['2011-01-02 00:59:59', '2011-01-03 01:59:59'], name='idx') - tm.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E', freq='H') - expected = DatetimeIndex( - ['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') - tm.assert_index_equal(result, expected) - def test_to_timestamp_to_period_astype(self): idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') @@ -238,47 +135,26 @@ def test_dti_to_period(self): tm.assert_index_equal(pi3, period_range('1/1/2005', '11/1/2005', freq='M').asfreq('3D')) - def test_period_astype_to_timestamp(self): - pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') - - exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) - tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) - - exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) - tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) - - exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - tz='US/Eastern') - res = pi.astype('datetime64[ns, US/Eastern]') - tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) - - exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], - tz='US/Eastern') - res = pi.astype('datetime64[ns, US/Eastern]', how='end') - tm.assert_index_equal(res, exp) - - def test_to_period_quarterly(self): + @pytest.mark.parametrize('month', MONTHS) + def test_to_period_quarterly(self, month): # make sure we can make the round trip - for month in MONTHS: - freq = 'Q-%s' % month - rng = period_range('1989Q3', '1991Q3', freq=freq) - stamps = rng.to_timestamp() - result = stamps.to_period(freq) - tm.assert_index_equal(rng, result) - - def test_to_period_quarterlyish(self): - offsets = ['BQ', 'QS', 'BQS'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - assert prng.freq == 'Q-DEC' + freq = 'Q-%s' % month + rng = period_range('1989Q3', '1991Q3', freq=freq) + stamps = rng.to_timestamp() + result = stamps.to_period(freq) + tm.assert_index_equal(rng, result) + + @pytest.mark.parametrize('off', ['BQ', 'QS', 'BQS']) + def test_to_period_quarterlyish(self, off): + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == 'Q-DEC' - def test_to_period_annualish(self): - offsets = ['BA', 'AS', 'BAS'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - assert prng.freq == 'A-DEC' + @pytest.mark.parametrize('off', ['BA', 'AS', 'BAS']) + def test_to_period_annualish(self, off): + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == 'A-DEC' def test_to_period_monthish(self): offsets = ['MS', 'BM'] @@ -304,12 +180,6 @@ def test_period_dt64_round_trip(self): pi = dti.to_period(freq='H') tm.assert_index_equal(pi.to_timestamp(), dti) - def test_to_timestamp_1703(self): - index = period_range('1/1/2012', periods=4, freq='D') - - result = index.to_timestamp() - assert result[0] == Timestamp('1/1/2012') - def test_combine_first(self): # GH 3367 didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M') @@ -325,26 +195,137 @@ def test_combine_first(self): dtype=np.float64) tm.assert_series_equal(result, expected) - def test_searchsorted(self): - for freq in ['D', '2D']: - pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03', - '2014-01-04', '2014-01-05'], freq=freq) + @pytest.mark.parametrize('freq', ['D', '2D']) + def test_searchsorted(self, freq): + pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03', + '2014-01-04', '2014-01-05'], freq=freq) + + p1 = pd.Period('2014-01-01', freq=freq) + assert pidx.searchsorted(p1) == 0 + + p2 = pd.Period('2014-01-04', freq=freq) + assert pidx.searchsorted(p2) == 3 + + msg = "Input has different freq=H from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + pidx.searchsorted(pd.Period('2014-01-01', freq='H')) + + msg = "Input has different freq=5D from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) + + with tm.assert_produces_warning(FutureWarning): + pidx.searchsorted(key=p2) + + +class TestPeriodIndexConversion(object): + def test_tolist(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + rs = index.tolist() + for x in rs: + assert isinstance(x, Period) + + recon = PeriodIndex(rs) + tm.assert_index_equal(index, recon) + + def test_to_timestamp_pi_nat(self): + # GH#7228 + index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', + name='idx') + + result = index.to_timestamp('D') + expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), + datetime(2011, 2, 1)], name='idx') + tm.assert_index_equal(result, expected) + assert result.name == 'idx' + + result2 = result.to_period(freq='M') + tm.assert_index_equal(result2, index) + assert result2.name == 'idx' + + result3 = result.to_period(freq='3M') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], + freq='3M', name='idx') + tm.assert_index_equal(result3, exp) + assert result3.freqstr == '3M' + + msg = ('Frequency must be positive, because it' + ' represents span: -2A') + with tm.assert_raises_regex(ValueError, msg): + result.to_period(freq='-2A') + + def test_to_timestamp_preserve_name(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', + name='foo') + assert index.name == 'foo' + + conv = index.to_timestamp('D') + assert conv.name == 'foo' + + def test_to_timestamp_quarterly_bug(self): + years = np.arange(1960, 2000).repeat(4) + quarters = np.tile(lrange(1, 5), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + stamps = pindex.to_timestamp('D', 'end') + expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex]) + tm.assert_index_equal(stamps, expected) + + def test_to_timestamp_pi_mult(self): + idx = PeriodIndex(['2011-01', 'NaT', '2011-02'], + freq='2M', name='idx') + + result = idx.to_timestamp() + expected = DatetimeIndex(['2011-01-01', 'NaT', '2011-02-01'], + name='idx') + tm.assert_index_equal(result, expected) - p1 = pd.Period('2014-01-01', freq=freq) - assert pidx.searchsorted(p1) == 0 + result = idx.to_timestamp(how='E') + expected = DatetimeIndex(['2011-02-28', 'NaT', '2011-03-31'], + name='idx') + tm.assert_index_equal(result, expected) - p2 = pd.Period('2014-01-04', freq=freq) - assert pidx.searchsorted(p2) == 3 + def test_to_timestamp_pi_combined(self): + idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') - msg = "Input has different freq=H from PeriodIndex" - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - pidx.searchsorted(pd.Period('2014-01-01', freq='H')) + result = idx.to_timestamp() + expected = DatetimeIndex(['2011-01-01 00:00', '2011-01-02 01:00'], + name='idx') + tm.assert_index_equal(result, expected) - msg = "Input has different freq=5D from PeriodIndex" - with tm.assert_raises_regex( - period.IncompatibleFrequency, msg): - pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) + result = idx.to_timestamp(how='E') + expected = DatetimeIndex(['2011-01-02 00:59:59', + '2011-01-03 01:59:59'], + name='idx') + tm.assert_index_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - pidx.searchsorted(key=p2) + result = idx.to_timestamp(how='E', freq='H') + expected = DatetimeIndex(['2011-01-02 00:00', '2011-01-03 01:00'], + name='idx') + tm.assert_index_equal(result, expected) + + def test_period_astype_to_timestamp(self): + pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) + tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) + tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]') + tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]', how='end') + tm.assert_index_equal(res, exp) + + def test_to_timestamp_1703(self): + index = period_range('1/1/2012', periods=4, freq='D') + + result = index.to_timestamp() + assert result[0] == Timestamp('1/1/2012') diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 029fdfcefc299..4141d66cb519b 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -671,19 +671,19 @@ def test_dti_tdi_numeric_ops(self): expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) tm.assert_index_equal(result, expected) - def test_sub_period(self): + @pytest.mark.parametrize('freq', [None, 'H']) + def test_sub_period(self, freq): # GH 13078 # not supported, check TypeError p = pd.Period('2011-01-01', freq='D') - for freq in [None, 'H']: - idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) + idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) - with pytest.raises(TypeError): - idx - p + with pytest.raises(TypeError): + idx - p - with pytest.raises(TypeError): - p - idx + with pytest.raises(TypeError): + p - idx def test_addition_ops(self): # with datetimes/timedelta and tdi/dti diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 690ba66b6f5ef..49737e5359c2f 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -227,14 +227,15 @@ def test_drop_duplicates(self): res = Series(idx).drop_duplicates(keep=False) tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - def test_infer_freq(self): - # GH 11018 - for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' - ]: - idx = pd.timedelta_range('1', freq=freq, periods=10) - result = pd.TimedeltaIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - assert result.freq == freq + @pytest.mark.parametrize('freq', ['D', '3D', '-3D', + 'H', '2H', '-2H', + 'T', '2T', 'S', '-3S']) + def test_infer_freq(self, freq): + # GH#11018 + idx = pd.timedelta_range('1', freq=freq, periods=10) + result = pd.TimedeltaIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + assert result.freq == freq def test_nat_new(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index ce0f3b89b753e..37db9d704aa1f 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -102,13 +102,11 @@ def test_factorize(self): tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) - def test_join_self(self): - + @pytest.mark.parametrize('kind', ['outer', 'inner', 'left', 'right']) + def test_join_self(self, kind): index = timedelta_range('1 day', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - tm.assert_index_equal(index, joined) + joined = index.join(index, how=kind) + tm.assert_index_equal(index, joined) def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe(10, 10, diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 41b3bb55bfff1..dff5433adcf79 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -20,21 +20,21 @@ class TestPeriodProperties(object): "Test properties such as year, month, weekday, etc...." - def test_is_leap_year(self): + @pytest.mark.parametrize('freq', ['A', 'M', 'D', 'H']) + def test_is_leap_year(self, freq): # GH 13727 - for freq in ['A', 'M', 'D', 'H']: - p = Period('2000-01-01 00:00:00', freq=freq) - assert p.is_leap_year - assert isinstance(p.is_leap_year, bool) + p = Period('2000-01-01 00:00:00', freq=freq) + assert p.is_leap_year + assert isinstance(p.is_leap_year, bool) - p = Period('1999-01-01 00:00:00', freq=freq) - assert not p.is_leap_year + p = Period('1999-01-01 00:00:00', freq=freq) + assert not p.is_leap_year - p = Period('2004-01-01 00:00:00', freq=freq) - assert p.is_leap_year + p = Period('2004-01-01 00:00:00', freq=freq) + assert p.is_leap_year - p = Period('2100-01-01 00:00:00', freq=freq) - assert not p.is_leap_year + p = Period('2100-01-01 00:00:00', freq=freq) + assert not p.is_leap_year def test_quarterly_negative_ordinals(self): p = Period(ordinal=-1, freq='Q-DEC') @@ -52,40 +52,40 @@ def test_quarterly_negative_ordinals(self): assert p.month == 11 assert isinstance(p, Period) - def test_period_cons_quarterly(self): + @pytest.mark.parametrize('month', MONTHS) + def test_period_cons_quarterly(self, month): # bugs in scikits.timeseries - for month in MONTHS: - freq = 'Q-%s' % month - exp = Period('1989Q3', freq=freq) - assert '1989Q3' in str(exp) - stamp = exp.to_timestamp('D', how='end') - p = Period(stamp, freq=freq) - assert p == exp - - stamp = exp.to_timestamp('3D', how='end') - p = Period(stamp, freq=freq) - assert p == exp - - def test_period_cons_annual(self): + freq = 'Q-%s' % month + exp = Period('1989Q3', freq=freq) + assert '1989Q3' in str(exp) + stamp = exp.to_timestamp('D', how='end') + p = Period(stamp, freq=freq) + assert p == exp + + stamp = exp.to_timestamp('3D', how='end') + p = Period(stamp, freq=freq) + assert p == exp + + @pytest.mark.parametrize('month', MONTHS) + def test_period_cons_annual(self, month): # bugs in scikits.timeseries - for month in MONTHS: - freq = 'A-%s' % month - exp = Period('1989', freq=freq) - stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) - p = Period(stamp, freq=freq) - assert p == exp + 1 - assert isinstance(p, Period) - - def test_period_cons_weekly(self): - for num in range(10, 17): - daystr = '2011-02-%d' % num - for day in DAYS: - freq = 'W-%s' % day - - result = Period(daystr, freq=freq) - expected = Period(daystr, freq='D').asfreq(freq) - assert result == expected - assert isinstance(result, Period) + freq = 'A-%s' % month + exp = Period('1989', freq=freq) + stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) + p = Period(stamp, freq=freq) + assert p == exp + 1 + assert isinstance(p, Period) + + @pytest.mark.parametrize('day', DAYS) + @pytest.mark.parametrize('num', range(10, 17)) + def test_period_cons_weekly(self, num, day): + daystr = '2011-02-%d' % num + freq = 'W-%s' % day + + result = Period(daystr, freq=freq) + expected = Period(daystr, freq='D').asfreq(freq) + assert result == expected + assert isinstance(result, Period) def test_period_from_ordinal(self): p = pd.Period('2011-01', freq='M') @@ -212,58 +212,59 @@ def test_period_cons_combined(self): with tm.assert_raises_regex(ValueError, msg): Period('2011-01', freq='1D1W') - def test_timestamp_tz_arg(self): - for case in ['Europe/Brussels', 'Asia/Tokyo', 'US/Pacific']: - p = Period('1/1/2005', freq='M').to_timestamp(tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - assert p == exp - assert p.tz == exp_zone.tzinfo - assert p.tz == exp.tz - - p = Period('1/1/2005', freq='3H').to_timestamp(tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - assert p == exp - assert p.tz == exp_zone.tzinfo - assert p.tz == exp.tz - - p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=case) - exp = Timestamp('31/12/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - assert p == exp - assert p.tz == exp_zone.tzinfo - assert p.tz == exp.tz - - p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - assert p == exp - assert p.tz == exp_zone.tzinfo - assert p.tz == exp.tz - - def test_timestamp_tz_arg_dateutil(self): + @pytest.mark.parametrize('tzstr', ['Europe/Brussels', + 'Asia/Tokyo', 'US/Pacific']) + def test_timestamp_tz_arg(self, tzstr): + p = Period('1/1/2005', freq='M').to_timestamp(tz=tzstr) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period('1/1/2005', freq='3H').to_timestamp(tz=tzstr) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=tzstr) + exp = Timestamp('31/12/2005', tz='UTC').tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=tzstr) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + @pytest.mark.parametrize('tzstr', ['dateutil/Europe/Brussels', + 'dateutil/Asia/Tokyo', + 'dateutil/US/Pacific']) + def test_timestamp_tz_arg_dateutil(self, tzstr): from pandas._libs.tslibs.timezones import dateutil_gettz from pandas._libs.tslibs.timezones import maybe_get_tz - for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo', - 'dateutil/US/Pacific']: - p = Period('1/1/2005', freq='M').to_timestamp( - tz=maybe_get_tz(case)) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - assert p == exp - assert p.tz == dateutil_gettz(case.split('/', 1)[1]) - assert p.tz == exp.tz - - p = Period('1/1/2005', - freq='M').to_timestamp(freq='3H', tz=maybe_get_tz(case)) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - assert p == exp - assert p.tz == dateutil_gettz(case.split('/', 1)[1]) - assert p.tz == exp.tz + tz = maybe_get_tz(tzstr) + p = Period('1/1/2005', freq='M').to_timestamp(tz=tz) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + assert p == exp + assert p.tz == dateutil_gettz(tzstr.split('/', 1)[1]) + assert p.tz == exp.tz + + p = Period('1/1/2005', freq='M').to_timestamp(freq='3H', tz=tz) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + assert p == exp + assert p.tz == dateutil_gettz(tzstr.split('/', 1)[1]) + assert p.tz == exp.tz def test_timestamp_tz_arg_dateutil_from_string(self): from pandas._libs.tslibs.timezones import dateutil_gettz @@ -1403,14 +1404,14 @@ def test_sub_offset_nat(self): timedelta(hours=23, minutes=30)]: assert p - o is tslib.NaT - def test_nat_ops(self): - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - assert p + 1 is tslib.NaT - assert 1 + p is tslib.NaT - assert p - 1 is tslib.NaT - assert p - Period('2011-01', freq=freq) is tslib.NaT - assert Period('2011-01', freq=freq) - p is tslib.NaT + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_nat_ops(self, freq): + p = Period('NaT', freq=freq) + assert p + 1 is tslib.NaT + assert 1 + p is tslib.NaT + assert p - 1 is tslib.NaT + assert p - Period('2011-01', freq=freq) is tslib.NaT + assert Period('2011-01', freq=freq) - p is tslib.NaT def test_period_ops_offset(self): p = Period('2011-04-01', freq='D') From 4726b84412357584f7650089f7d58ae7b4da932f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 21 Feb 2018 19:12:22 -0500 Subject: [PATCH 160/217] DEPR: remove pandas.core.common is_* (#19769) --- doc/source/whatsnew/v0.23.0.txt | 18 ++++++++++ pandas/core/base.py | 10 +++--- pandas/core/common.py | 60 --------------------------------- pandas/core/resample.py | 14 ++++---- pandas/tests/api/test_types.py | 38 --------------------- 5 files changed, 30 insertions(+), 110 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c9951e0ec4378..f947cacbfde07 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -345,6 +345,23 @@ Convert to an xarray DataArray p.to_xarray() + + +.. _whatsnew_0230.api_breaking.core_common: + +pandas.core.common removals +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following error & warning messages are removed from ``pandas.core.common`` (:issue:`13634`, :issue:`19769`): + +- ``PerformanceWarning`` +- ``UnsupportedFunctionCall`` +- ``UnsortedIndexError`` +- ``AbstractMethodError`` + +These are available from import from ``pandas.errors`` (since 0.19.0). + + .. _whatsnew_0230.api_breaking.apply: Changes to make output of ``DataFrame.apply`` consistent @@ -644,6 +661,7 @@ Removal of prior version deprecations/changes - The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) - The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) +- Imports from ``pandas.core.common`` for functions such as ``is_datetime64_dtype`` are now removed. These are located in ``pandas.api.types``. (:issue:`13634`, :issue:`19769`) .. _whatsnew_0230.performance: diff --git a/pandas/core/base.py b/pandas/core/base.py index ebd69a5f9aac1..280b8849792e3 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -17,7 +17,7 @@ is_extension_array_dtype) from pandas.util._validators import validate_bool_kwarg - +from pandas.errors import AbstractMethodError from pandas.core import common as com, algorithms import pandas.core.nanops as nanops import pandas._libs.lib as lib @@ -46,7 +46,7 @@ class StringMixin(object): # Formatting def __unicode__(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def __str__(self): """ @@ -278,10 +278,10 @@ def _gotitem(self, key, ndim, subset=None): subset to act on """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def aggregate(self, func, *args, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) agg = aggregate @@ -1247,4 +1247,4 @@ def duplicated(self, keep='first'): # abstracts def _update_inplace(self, result, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) diff --git a/pandas/core/common.py b/pandas/core/common.py index 77dc1522052d4..c4fbcf28cbcae 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2,8 +2,6 @@ Misc tools for implementing data structures """ -import sys -import warnings from datetime import datetime, timedelta from functools import partial import inspect @@ -20,66 +18,8 @@ from pandas.core.dtypes.inference import _iterable_not_string from pandas.core.dtypes.missing import isna, isnull, notnull # noqa from pandas.api import types -from pandas.core.dtypes import common from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike -# compat -from pandas.errors import ( # noqa - PerformanceWarning, UnsupportedFunctionCall, UnsortedIndexError, - AbstractMethodError) - -# back-compat of public API -# deprecate these functions -m = sys.modules['pandas.core.common'] -for t in [t for t in dir(types) if not t.startswith('_')]: - - def outer(t=t): - - def wrapper(*args, **kwargs): - warnings.warn("pandas.core.common.{t} is deprecated. " - "import from the public API: " - "pandas.api.types.{t} instead".format(t=t), - DeprecationWarning, stacklevel=3) - return getattr(types, t)(*args, **kwargs) - return wrapper - - setattr(m, t, outer(t)) - -# back-compat for non-public functions -# deprecate these functions -for t in ['is_datetime_arraylike', - 'is_datetime_or_timedelta_dtype', - 'is_datetimelike', - 'is_datetimelike_v_numeric', - 'is_datetimelike_v_object', - 'is_datetimetz', - 'is_int_or_datetime_dtype', - 'is_period_arraylike', - 'is_string_like', - 'is_string_like_dtype']: - - def outer(t=t): - - def wrapper(*args, **kwargs): - warnings.warn("pandas.core.common.{t} is deprecated. " - "These are not longer public API functions, " - "but can be imported from " - "pandas.api.types.{t} instead".format(t=t), - DeprecationWarning, stacklevel=3) - return getattr(common, t)(*args, **kwargs) - return wrapper - - setattr(m, t, outer(t)) - - -# deprecate array_equivalent - -def array_equivalent(*args, **kwargs): - warnings.warn("'pandas.core.common.array_equivalent' is deprecated and " - "is no longer public API", DeprecationWarning, stacklevel=2) - from pandas.core.dtypes import missing - return missing.array_equivalent(*args, **kwargs) - class SettingWithCopyError(ValueError): pass diff --git a/pandas/core/resample.py b/pandas/core/resample.py index df656092f476e..772568ee84737 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -16,7 +16,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.tseries.offsets import DateOffset, Tick, Day, delta_to_nanoseconds from pandas.core.indexes.period import PeriodIndex -import pandas.core.common as com +from pandas.errors import AbstractMethodError import pandas.core.algorithms as algos from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries @@ -205,10 +205,10 @@ def __setattr__(self, attr, value): def __getitem__(self, key): try: return super(Resampler, self).__getitem__(key) - except (KeyError, com.AbstractMethodError): + except (KeyError, AbstractMethodError): # compat for deprecated - if isinstance(self.obj, com.ABCSeries): + if isinstance(self.obj, ABCSeries): return self._deprecated('__getitem__')[key] raise @@ -233,7 +233,7 @@ def _convert_obj(self, obj): return obj def _get_binner_for_time(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _set_binner(self): """ @@ -372,10 +372,10 @@ def transform(self, arg, *args, **kwargs): arg, *args, **kwargs) def _downsample(self, f): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _upsample(self, f, limit=None, fill_value=None): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _gotitem(self, key, ndim, subset=None): """ @@ -464,7 +464,7 @@ def _get_resampler_for_grouping(self, groupby, **kwargs): def _wrap_result(self, result): """ potentially wrap any results """ - if isinstance(result, com.ABCSeries) and self._selection is not None: + if isinstance(result, ABCSeries) and self._selection is not None: result.name = self._selection if isinstance(result, ABCSeries) and result.empty: diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 7e6430accc546..bd4891326c751 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -3,10 +3,8 @@ import pytest from warnings import catch_warnings -import numpy as np import pandas -from pandas.core import common as com from pandas.api import types from pandas.util import testing as tm @@ -52,42 +50,6 @@ def check_deprecation(self, fold, fnew): except AttributeError: pytest.raises(AttributeError, lambda: fnew('foo')) - def test_deprecation_core_common(self): - - # test that we are in fact deprecating - # the pandas.core.common introspectors - for t in self.allowed: - self.check_deprecation(getattr(com, t), getattr(types, t)) - - def test_deprecation_core_common_array_equivalent(self): - - with tm.assert_produces_warning(DeprecationWarning): - com.array_equivalent(np.array([1, 2]), np.array([1, 2])) - - def test_deprecation_core_common_moved(self): - - # these are in pandas.core.dtypes.common - l = ['is_datetime_arraylike', - 'is_datetime_or_timedelta_dtype', - 'is_datetimelike', - 'is_datetimelike_v_numeric', - 'is_datetimelike_v_object', - 'is_datetimetz', - 'is_int_or_datetime_dtype', - 'is_period_arraylike', - 'is_string_like', - 'is_string_like_dtype'] - - from pandas.core.dtypes import common as c - for t in l: - self.check_deprecation(getattr(com, t), getattr(c, t)) - - def test_removed_from_core_common(self): - - for t in ['is_null_datelike_scalar', - 'ensure_float']: - pytest.raises(AttributeError, lambda: getattr(com, t)) - def test_deprecated_from_api_types(self): for t in self.deprecated: From 4ea1508ebc07a191a5fe80b6177746c216f9c670 Mon Sep 17 00:00:00 2001 From: HagaiHargil Date: Thu, 22 Feb 2018 02:16:21 +0200 Subject: [PATCH 161/217] DOC: Clarify and add fill_value example in arithmetic ops (#19675) --- pandas/core/ops.py | 97 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 7 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 9e80ab3b3da4c..b20f208d14dc5 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -408,8 +408,10 @@ def _get_op_name(op, special): ---------- other : Series or scalar value fill_value : None or float value, default None (NaN) - Fill missing (NaN) values with this value. If both Series are - missing, the result will be missing + Fill existing missing (NaN) values, and any new element needed for + successful Series alignment, with this value before computation. + If data in both corresponding Series locations is missing + the result will be missing level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level @@ -418,6 +420,30 @@ def _get_op_name(op, special): ------- result : Series +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.add(b, fill_value=0) +a 2.0 +b 1.0 +c 1.0 +d 1.0 +e NaN +dtype: float64 + See also -------- Series.{reverse} @@ -433,8 +459,10 @@ def _get_op_name(op, special): axis : {0, 1, 'index', 'columns'} For Series input, axis to match Series index on fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame locations are - missing, the result will be missing + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level @@ -446,6 +474,33 @@ def _get_op_name(op, special): Returns ------- result : DataFrame + +Examples +-------- +>>> a = pd.DataFrame([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'], + columns=['one']) +>>> a + one +a 1.0 +b 1.0 +c 1.0 +d NaN +>>> b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan], + two=[np.nan, 2, np.nan, 2]), + index=['a', 'b', 'd', 'e']) +>>> b + one two +a 1.0 NaN +b NaN 2.0 +d 1.0 NaN +e NaN 2.0 +>>> a.add(b, fill_value=0) + one two +a 2.0 NaN +b 1.0 2.0 +c 1.0 NaN +d 1.0 NaN +e NaN 2.0 """ _flex_doc_FRAME = """ @@ -460,8 +515,10 @@ def _get_op_name(op, special): axis : {{0, 1, 'index', 'columns'}} For Series input, axis to match Series index on fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame - locations are missing, the result will be missing + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level @@ -474,6 +531,33 @@ def _get_op_name(op, special): ------- result : DataFrame +Examples +-------- +>>> a = pd.DataFrame([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'], + columns=['one']) +>>> a + one +a 1.0 +b 1.0 +c 1.0 +d NaN +>>> b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan], + two=[np.nan, 2, np.nan, 2]), + index=['a', 'b', 'd', 'e']) +>>> b + one two +a 1.0 NaN +b NaN 2.0 +d 1.0 NaN +e NaN 2.0 +>>> a.add(b, fill_value=0) + one two +a 2.0 NaN +b 1.0 2.0 +c 1.0 NaN +d 1.0 NaN +e NaN 2.0 + See also -------- DataFrame.{reverse} @@ -545,7 +629,6 @@ def _make_flex_doc(op_name, typ): base_doc = _flex_doc_PANEL else: raise AssertionError('Invalid typ argument.') - doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, equiv=equiv, reverse=op_desc['reverse']) return doc From 51350bcc6ac911b8bbf97841fb3837eda7234e0c Mon Sep 17 00:00:00 2001 From: Marco Hemken Date: Wed, 21 Feb 2018 16:18:03 -0800 Subject: [PATCH 162/217] DOC: added plotting module to the api reference docs (#19780) --- doc/source/api.rst | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 3b38f0caa1766..b8aad67e147ba 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2388,15 +2388,23 @@ Style Export and Import Styler.to_excel Plotting -~~~~~~~~ +-------- -.. currentmodule:: pandas +.. currentmodule:: pandas.plotting + +The following functions are contained in the `pandas.plotting` module. .. autosummary:: :toctree: generated/ - plotting.register_matplotlib_converters - plotting.deregister_matplotlib_converters + andrews_curves + bootstrap_plot + deregister_matplotlib_converters + lag_plot + parallel_coordinates + radviz + register_matplotlib_converters + scatter_matrix .. currentmodule:: pandas From 891ee9247ce32739d1df0d9bbd5e48b92553f87f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Feb 2018 18:20:57 -0600 Subject: [PATCH 163/217] API: Validate keyword arguments to fillna (#19684) --- doc/source/whatsnew/v0.23.0.txt | 3 +- pandas/core/arrays/categorical.py | 5 +++- pandas/core/generic.py | 12 ++------ pandas/tests/categorical/test_missing.py | 16 +++++++++++ pandas/util/_validators.py | 36 ++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index f947cacbfde07..4c1e98b236db7 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -573,6 +573,7 @@ Datetimelike API Changes - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) .. _whatsnew_0230.api.other: @@ -592,7 +593,6 @@ Other API Changes - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) - :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) - :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) -- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) - Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) - Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) @@ -606,6 +606,7 @@ Other API Changes - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) +- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 7354115f8295e..493b2e5bd899b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -40,7 +40,7 @@ Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.io.formats.terminal import get_terminal_size -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core.config import get_option from .base import ExtensionArray @@ -1610,6 +1610,9 @@ def fillna(self, value=None, method=None, limit=None): ------- filled : Categorical with NA/NaN filled """ + value, method = validate_fillna_kwargs( + value, method, validate_scalar_dict_value=False + ) if value is None: value = np.nan diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 297450417e3cf..8034cf89cf8b7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -54,7 +54,7 @@ import pandas.core.nanops as nanops from pandas.util._decorators import (Appender, Substitution, deprecate_kwarg) -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core import config # goal is to be able to define the docs close to function, while still being @@ -4697,10 +4697,8 @@ def infer_objects(self): def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): inplace = validate_bool_kwarg(inplace, 'inplace') + value, method = validate_fillna_kwargs(value, method) - if isinstance(value, (list, tuple)): - raise TypeError('"value" parameter must be a scalar or dict, but ' - 'you passed a "{0}"'.format(type(value).__name__)) self._consolidate_inplace() # set the default here, so functions examining the signaure @@ -4711,8 +4709,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, method = missing.clean_fill_method(method) from pandas import DataFrame if value is None: - if method is None: - raise ValueError('must specify a fill method or value') + if self._is_mixed_type and axis == 1: if inplace: raise NotImplementedError() @@ -4746,9 +4743,6 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, coerce=True, downcast=downcast) else: - if method is not None: - raise ValueError('cannot specify both a fill method and value') - if len(self._get_axis(axis)) == 0: return self diff --git a/pandas/tests/categorical/test_missing.py b/pandas/tests/categorical/test_missing.py index 79758dee5cfda..fca5573547071 100644 --- a/pandas/tests/categorical/test_missing.py +++ b/pandas/tests/categorical/test_missing.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np +import pytest import pandas.util.testing as tm from pandas import (Categorical, Index, isna) @@ -53,3 +54,18 @@ def test_set_item_nan(self): exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) tm.assert_categorical_equal(cat, exp) + + @pytest.mark.parametrize('fillna_kwargs, msg', [ + (dict(value=1, method='ffill'), + "Cannot specify both 'value' and 'method'."), + (dict(), + "Must specify a fill 'value' or 'method'."), + (dict(method='bad'), + "Invalid fill method. Expecting .* bad"), + ]) + def test_fillna_raises(self, fillna_kwargs, msg): + # https://github.com/pandas-dev/pandas/issues/19682 + cat = Categorical([1, 2, 3]) + + with tm.assert_raises_regex(ValueError, msg): + cat.fillna(**fillna_kwargs) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index b30ffc7416f92..a96563051e7de 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -320,3 +320,39 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): msg = "Cannot specify all of '{}', 'index', 'columns'." raise TypeError(msg.format(arg_name)) return out + + +def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): + """Validate the keyword arguments to 'fillna'. + + This checks that exactly one of 'value' and 'method' is specified. + If 'method' is specified, this validates that it's a valid method. + + Parameters + ---------- + value, method : object + The 'value' and 'method' keyword arguments for 'fillna'. + validate_scalar_dict_value : bool, default True + Whether to validate that 'value' is a scalar or dict. Specifically, + validate that it is not a list or tuple. + + Returns + ------- + value, method : object + """ + from pandas.core.missing import clean_fill_method + + if value is None and method is None: + raise ValueError("Must specify a fill 'value' or 'method'.") + elif value is None and method is not None: + method = clean_fill_method(method) + + elif value is not None and method is None: + if validate_scalar_dict_value and isinstance(value, (list, tuple)): + raise TypeError('"value" parameter must be a scalar or dict, but ' + 'you passed a "{0}"'.format(type(value).__name__)) + + elif value is not None and method is not None: + raise ValueError("Cannot specify both 'value' and 'method'.") + + return value, method From 49bfc0b3c04d3d42df4e13c4d455a7cbc8c85042 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 21 Feb 2018 17:37:33 -0800 Subject: [PATCH 164/217] Fix Index __mul__-like ops with timedelta scalars (#19333) --- doc/source/whatsnew/v0.23.0.txt | 2 + pandas/core/indexes/base.py | 23 +++++++++-- pandas/core/indexes/range.py | 12 +++++- pandas/tests/indexes/test_numeric.py | 38 ++++++++++++++++++- .../indexes/timedeltas/test_arithmetic.py | 16 +++++++- 5 files changed, 85 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 4c1e98b236db7..76c4fa08fca4d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -765,6 +765,7 @@ Timedelta - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) - Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`) - Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) +- Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mis-match (:issue`19333`) - Timezones @@ -799,6 +800,7 @@ Numeric - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) - Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) +- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) Indexing diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7dfa34bd634ad..59fe4bba649d3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import (lib, index as libindex, tslib as libts, algos as libalgos, join as libjoin, - Timestamp) + Timestamp, Timedelta) from pandas._libs.lib import is_datetime_array from pandas.compat import range, u, set_function_name @@ -16,7 +16,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, ABCMultiIndex, - ABCPeriodIndex, + ABCPeriodIndex, ABCTimedeltaIndex, ABCDateOffset) from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import ( @@ -3918,7 +3918,21 @@ def dropna(self, how='any'): return self._shallow_copy() def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): - raise TypeError("can only perform ops with timedelta like values") + # Timedelta knows how to operate with np.array, so dispatch to that + # operation and then wrap the results + other = Timedelta(other) + values = self.values + if reversed: + values, other = other, values + + with np.errstate(all='ignore'): + result = op(values, other) + + attrs = self._get_attributes_dict() + attrs = self._maybe_update_attributes(attrs) + if op == divmod: + return Index(result[0], **attrs), Index(result[1], **attrs) + return Index(result, **attrs) def _evaluate_with_datetime_like(self, other, op, opstr): raise TypeError("can only perform ops with datetime like values") @@ -4061,6 +4075,9 @@ def _make_evaluate_binop(op, opstr, reversed=False, constructor=Index): def _evaluate_numeric_binop(self, other): if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented + elif isinstance(other, ABCTimedeltaIndex): + # Defer to subclass implementation + return NotImplemented other = self._validate_for_numeric_binop(other, op, opstr) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0ed92a67c7e14..0ac415ee0b701 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,5 +1,6 @@ from sys import getsizeof import operator +from datetime import timedelta import numpy as np from pandas._libs import index as libindex @@ -8,7 +9,7 @@ is_integer, is_scalar, is_int64_dtype) -from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex from pandas import compat from pandas.compat import lrange, range, get_range_parameters @@ -587,6 +588,15 @@ def _make_evaluate_binop(op, opstr, reversed=False, step=False): def _evaluate_numeric_binop(self, other): if isinstance(other, ABCSeries): return NotImplemented + elif isinstance(other, ABCTimedeltaIndex): + # Defer to TimedeltaIndex implementation + return NotImplemented + elif isinstance(other, (timedelta, np.timedelta64)): + # GH#19333 is_integer evaluated True on timedelta64, + # so we need to catch these explicitly + if reversed: + return op(other, self._int64index) + return op(self._int64index, other) other = self._validate_for_numeric_binop(other, op, opstr) attrs = self._get_attributes_dict() diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index c6883df7ee91a..bafb6ae2e45f4 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -13,7 +13,7 @@ import pandas.util.testing as tm import pandas as pd -from pandas._libs.tslib import Timestamp +from pandas._libs.tslib import Timestamp, Timedelta from pandas.tests.indexes.common import Base @@ -26,6 +26,42 @@ def full_like(array, value): return ret +class TestIndexArithmeticWithTimedeltaScalar(object): + + @pytest.mark.parametrize('index', [ + Int64Index(range(1, 11)), + UInt64Index(range(1, 11)), + Float64Index(range(1, 11)), + RangeIndex(1, 11)]) + @pytest.mark.parametrize('scalar_td', [Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta()]) + def test_index_mul_timedelta(self, scalar_td, index): + # GH#19333 + expected = pd.timedelta_range('1 days', '10 days') + + result = index * scalar_td + tm.assert_index_equal(result, expected) + commute = scalar_td * index + tm.assert_index_equal(commute, expected) + + @pytest.mark.parametrize('index', [Int64Index(range(1, 3)), + UInt64Index(range(1, 3)), + Float64Index(range(1, 3)), + RangeIndex(1, 3)]) + @pytest.mark.parametrize('scalar_td', [Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta()]) + def test_index_rdiv_timedelta(self, scalar_td, index): + expected = pd.TimedeltaIndex(['1 Day', '12 Hours']) + + result = scalar_td / index + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + index / scalar_td + + class Numeric(Base): def test_numeric_compat(self): diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 4141d66cb519b..24341b3419859 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -368,7 +368,7 @@ def test_dti_mul_dti_raises(self): def test_dti_mul_too_short_raises(self): idx = self._holder(np.arange(5, dtype='int64')) - with pytest.raises(ValueError): + with pytest.raises(TypeError): idx * self._holder(np.arange(3)) with pytest.raises(ValueError): idx * np.array([1, 2]) @@ -544,6 +544,20 @@ def test_tdi_div_tdlike_scalar_with_nat(self, delta): result = rng / delta tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('other', [np.arange(1, 11), + pd.Int64Index(range(1, 11)), + pd.UInt64Index(range(1, 11)), + pd.Float64Index(range(1, 11)), + pd.RangeIndex(1, 11)]) + def test_tdi_rmul_arraylike(self, other): + tdi = TimedeltaIndex(['1 Day'] * 10) + expected = timedelta_range('1 days', '10 days') + + result = other * tdi + tm.assert_index_equal(result, expected) + commute = tdi * other + tm.assert_index_equal(commute, expected) + def test_subtraction_ops(self): # with datetimes/timedelta and tdi/dti tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') From 11c14e1ea4fa7d84b69fc052a8ab0816f293fbe7 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 22 Feb 2018 02:05:42 +0000 Subject: [PATCH 165/217] DOC: Improving code quality of doc/make.py, PEP-8, refactoring and removing unused commands (#19631) (#19634) --- ci/lint.sh | 7 + doc/make.py | 567 +++++++++++----------------------- doc/source/index.rst.template | 10 +- 3 files changed, 198 insertions(+), 386 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 49bf9a690b990..b862a3bfcf29e 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -37,6 +37,13 @@ if [ "$LINT" ]; then fi echo "Linting scripts/*.py DONE" + echo "Linting doc script" + flake8 doc/make.py + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Linting doc script DONE" + echo "Linting *.pyx" flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403 if [ $? -ne "0" ]; then diff --git a/doc/make.py b/doc/make.py index acef563f301e4..e3cb29aa3e086 100755 --- a/doc/make.py +++ b/doc/make.py @@ -1,128 +1,62 @@ #!/usr/bin/env python - """ Python script for building documentation. To build the docs you must have all optional dependencies for pandas installed. See the installation instructions for a list of these. -Note: currently latex builds do not work because of table formats that are not -supported in the latex generation. - -2014-01-30: Latex has some issues but 'latex_forced' works ok for 0.13.0-400 or so - Usage ----- -python make.py clean -python make.py html + $ python make.py clean + $ python make.py html + $ python make.py latex """ -from __future__ import print_function - -import io -import glob # noqa +import sys import os import shutil -import sys +import subprocess +import argparse from contextlib import contextmanager +import jinja2 -import sphinx # noqa -import argparse -import jinja2 # noqa -os.environ['PYTHONPATH'] = '..' +DOC_PATH = os.path.dirname(os.path.abspath(__file__)) +SOURCE_PATH = os.path.join(DOC_PATH, 'source') +BUILD_PATH = os.path.join(DOC_PATH, 'build') +BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates'] -SPHINX_BUILD = 'sphinxbuild' +def _generate_index(include_api, single_doc=None): + """Create index.rst file with the specified sections. -def _process_user(user): - if user is None or user is False: - user = '' - else: - user = user + '@' - return user - - -def upload_dev(user=None): - 'push a copy to the pydata dev directory' - user = _process_user(user) - if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'.format(user)): - raise SystemExit('Upload to Pydata Dev failed') - - -def upload_dev_pdf(user=None): - 'push a copy to the pydata dev directory' - user = _process_user(user) - if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/dev/'.format(user)): - raise SystemExit('PDF upload to Pydata Dev failed') - - -def upload_stable(user=None): - 'push a copy to the pydata stable directory' - user = _process_user(user) - if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'.format(user)): - raise SystemExit('Upload to stable failed') - - -def upload_stable_pdf(user=None): - 'push a copy to the pydata dev directory' - user = _process_user(user) - if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/stable/'.format(user)): - raise SystemExit('PDF upload to stable failed') - - -def upload_prev(ver, doc_root='./', user=None): - 'push a copy of older release to appropriate version directory' - user = _process_user(user) - local_dir = doc_root + 'build/html' - remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver - cmd = 'cd %s; rsync -avz . %spandas.pydata.org:%s -essh' - cmd = cmd % (local_dir, user, remote_dir) - print(cmd) - if os.system(cmd): - raise SystemExit( - 'Upload to %s from %s failed' % (remote_dir, local_dir)) - - local_dir = doc_root + 'build/latex' - pdf_cmd = 'cd %s; scp pandas.pdf %spandas.pydata.org:%s' - pdf_cmd = pdf_cmd % (local_dir, user, remote_dir) - if os.system(pdf_cmd): - raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root)) - -def build_pandas(): - os.chdir('..') - os.system('python setup.py clean') - os.system('python setup.py build_ext --inplace') - os.chdir('doc') - -def build_prev(ver): - if os.system('git checkout v%s' % ver) != 1: - os.chdir('..') - os.system('python setup.py clean') - os.system('python setup.py build_ext --inplace') - os.chdir('doc') - os.system('python make.py clean') - os.system('python make.py html') - os.system('python make.py latex') - os.system('git checkout master') - - -def clean(): - if os.path.exists('build'): - shutil.rmtree('build') - - if os.path.exists('source/generated'): - shutil.rmtree('source/generated') + Parameters + ---------- + include_api : bool + Whether API documentation will be built. + single_doc : str or None + If provided, this single documentation page will be generated. + """ + if single_doc is not None: + single_doc = os.path.splitext(os.path.basename(single_doc))[0] + include_api = False + + with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f: + t = jinja2.Template(f.read()) + + with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f: + f.write(t.render(include_api=include_api, + single_doc=single_doc)) @contextmanager -def maybe_exclude_notebooks(): - """ - Skip building the notebooks if pandoc is not installed. +def _maybe_exclude_notebooks(): + """Skip building the notebooks if pandoc is not installed. + This assumes that nbsphinx is installed. + + Skip notebook conversion if: + 1. nbconvert isn't installed, or + 2. nbconvert is installed, but pandoc isn't """ base = os.path.dirname(__file__) notebooks = [os.path.join(base, 'source', nb) @@ -135,304 +69,175 @@ def _remove_notebooks(): contents[nb] = f.read() os.remove(nb) - # Skip notebook conversion if - # 1. nbconvert isn't installed, or - # 2. nbconvert is installed, but pandoc isn't try: import nbconvert except ImportError: - print("Warning: nbconvert not installed. Skipping notebooks.") + sys.stderr.write('Warning: nbconvert not installed. ' + 'Skipping notebooks.\n') _remove_notebooks() else: try: nbconvert.utils.pandoc.get_pandoc_version() except nbconvert.utils.pandoc.PandocMissing: - print("Warning: Pandoc is not installed. Skipping notebooks.") + sys.stderr.write('Warning: Pandoc is not installed. ' + 'Skipping notebooks.\n') _remove_notebooks() yield + for nb, content in contents.items(): with open(nb, 'wt') as f: f.write(content) -def html(): - check_build() - - with maybe_exclude_notebooks(): - if os.system('sphinx-build -P -b html -d build/doctrees ' - 'source build/html'): - raise SystemExit("Building HTML failed.") - try: - # remove stale file - os.remove('build/html/pandas.zip') - except: - pass - - -def zip_html(): - try: - print("\nZipping up HTML docs...") - # just in case the wonky build box doesn't have zip - # don't fail this. - os.system('cd build; rm -f html/pandas.zip; zip html/pandas.zip -r -q html/* ') - print("\n") - except: - pass - -def latex(): - check_build() - if sys.platform != 'win32': - # LaTeX format. - if os.system('sphinx-build -j 2 -b latex -d build/doctrees ' - 'source build/latex'): - raise SystemExit("Building LaTeX failed.") - # Produce pdf. - - os.chdir('build/latex') - - # Call the makefile produced by sphinx... - if os.system('make'): - print("Rendering LaTeX failed.") - print("You may still be able to get a usable PDF file by going into 'build/latex'") - print("and executing 'pdflatex pandas.tex' for the requisite number of passes.") - print("Or using the 'latex_forced' target") - raise SystemExit - - os.chdir('../..') - else: - print('latex build has not been tested on windows') - -def latex_forced(): - check_build() - if sys.platform != 'win32': - # LaTeX format. - if os.system('sphinx-build -j 2 -b latex -d build/doctrees ' - 'source build/latex'): - raise SystemExit("Building LaTeX failed.") - # Produce pdf. - - os.chdir('build/latex') - - # Manually call pdflatex, 3 passes should ensure latex fixes up - # all the required cross-references and such. - os.system('pdflatex -interaction=nonstopmode pandas.tex') - os.system('pdflatex -interaction=nonstopmode pandas.tex') - os.system('pdflatex -interaction=nonstopmode pandas.tex') - raise SystemExit("You should check the file 'build/latex/pandas.pdf' for problems.") - - os.chdir('../..') - else: - print('latex build has not been tested on windows') - - -def check_build(): - build_dirs = [ - 'build', 'build/doctrees', 'build/html', - 'build/latex', 'build/plots', 'build/_static', - 'build/_templates'] - for d in build_dirs: - try: - os.mkdir(d) - except OSError: - pass - - -def all(): - # clean() - html() - - -def auto_dev_build(debug=False): - msg = '' - try: - step = 'clean' - clean() - step = 'html' - html() - step = 'upload dev' - upload_dev() - if not debug: - sendmail(step) - - step = 'latex' - latex() - step = 'upload pdf' - upload_dev_pdf() - if not debug: - sendmail(step) - except (Exception, SystemExit) as inst: - msg = str(inst) + '\n' - sendmail(step, '[ERROR] ' + msg) - - -def sendmail(step=None, err_msg=None): - from_name, to_name = _get_config() - - if step is None: - step = '' - - if err_msg is None or '[ERROR]' not in err_msg: - msgstr = 'Daily docs %s completed successfully' % step - subject = "DOC: %s successful" % step - else: - msgstr = err_msg - subject = "DOC: %s failed" % step - - import smtplib - from email.MIMEText import MIMEText - msg = MIMEText(msgstr) - msg['Subject'] = subject - msg['From'] = from_name - msg['To'] = to_name - - server_str, port, login, pwd = _get_credentials() - server = smtplib.SMTP(server_str, port) - server.ehlo() - server.starttls() - server.ehlo() - - server.login(login, pwd) - try: - server.sendmail(from_name, to_name, msg.as_string()) - finally: - server.close() - - -def _get_dir(subdir=None): - import getpass - USERNAME = getpass.getuser() - if sys.platform == 'darwin': - HOME = '/Users/%s' % USERNAME - else: - HOME = '/home/%s' % USERNAME - - if subdir is None: - subdir = '/code/scripts/config' - conf_dir = '%s/%s' % (HOME, subdir) - return conf_dir - - -def _get_credentials(): - tmp_dir = _get_dir() - cred = '%s/credentials' % tmp_dir - with open(cred, 'r') as fh: - server, port, un, domain = fh.read().split(',') - port = int(port) - login = un + '@' + domain + '.com' - - import base64 - with open('%s/cron_email_pwd' % tmp_dir, 'r') as fh: - pwd = base64.b64decode(fh.read()) - - return server, port, login, pwd - - -def _get_config(): - tmp_dir = _get_dir() - with open('%s/addresses' % tmp_dir, 'r') as fh: - from_name, to_name = fh.read().split(',') - return from_name, to_name - -funcd = { - 'html': html, - 'zip_html': zip_html, - 'upload_dev': upload_dev, - 'upload_stable': upload_stable, - 'upload_dev_pdf': upload_dev_pdf, - 'upload_stable_pdf': upload_stable_pdf, - 'latex': latex, - 'latex_forced': latex_forced, - 'clean': clean, - 'auto_dev': auto_dev_build, - 'auto_debug': lambda: auto_dev_build(True), - 'build_pandas': build_pandas, - 'all': all, -} - -small_docs = False - -# current_dir = os.getcwd() -# os.chdir(os.path.dirname(os.path.join(current_dir, __file__))) - -import argparse -argparser = argparse.ArgumentParser(description=""" -pandas documentation builder -""".strip()) - -# argparser.add_argument('-arg_name', '--arg_name', -# metavar='label for arg help', -# type=str|etc, -# nargs='N|*|?|+|argparse.REMAINDER', -# required=False, -# #choices='abc', -# help='help string', -# action='store|store_true') - -# args = argparser.parse_args() - -#print args.accumulate(args.integers) - -def generate_index(api=True, single=False, **kwds): - from jinja2 import Template - with open("source/index.rst.template") as f: - t = Template(f.read()) +class DocBuilder: + """Class to wrap the different commands of this script. - with open("source/index.rst","w") as f: - f.write(t.render(api=api,single=single,**kwds)) + All public methods of this class can be called as parameters of the + script. + """ + def __init__(self, num_jobs=1): + self.num_jobs = num_jobs + + @staticmethod + def _create_build_structure(): + """Create directories required to build documentation.""" + for dirname in BUILD_DIRS: + try: + os.makedirs(os.path.join(BUILD_PATH, dirname)) + except OSError: + pass + + @staticmethod + def _run_os(*args): + """Execute a command as a OS terminal. + + Parameters + ---------- + *args : list of str + Command and parameters to be executed + + Examples + -------- + >>> DocBuilder()._run_os('python', '--version') + """ + subprocess.check_call(args, stderr=subprocess.STDOUT) + + def _sphinx_build(self, kind): + """Call sphinx to build documentation. + + Attribute `num_jobs` from the class is used. + + Parameters + ---------- + kind : {'html', 'latex'} + + Examples + -------- + >>> DocBuilder(num_jobs=4)._sphinx_build('html') + """ + if kind not in ('html', 'latex'): + raise ValueError('kind must be html or latex, not {}'.format(kind)) + + self._run_os('sphinx-build', + '-j{}'.format(self.num_jobs), + '-b{}'.format(kind), + '-d{}'.format(os.path.join(BUILD_PATH, + 'doctrees')), + SOURCE_PATH, + os.path.join(BUILD_PATH, kind)) + + def html(self): + """Build HTML documentation.""" + self._create_build_structure() + with _maybe_exclude_notebooks(): + self._sphinx_build('html') + zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip') + if os.path.exists(zip_fname): + os.remove(zip_fname) + + def latex(self, force=False): + """Build PDF documentation.""" + self._create_build_structure() + if sys.platform == 'win32': + sys.stderr.write('latex build has not been tested on windows\n') + else: + self._sphinx_build('latex') + os.chdir(os.path.join(BUILD_PATH, 'latex')) + if force: + for i in range(3): + self._run_os('pdflatex', + '-interaction=nonstopmode', + 'pandas.tex') + raise SystemExit('You should check the file ' + '"build/latex/pandas.pdf" for problems.') + else: + self._run_os('make') + + def latex_forced(self): + """Build PDF documentation with retries to find missing references.""" + self.latex(force=True) + + @staticmethod + def clean(): + """Clean documentation generated files.""" + shutil.rmtree(BUILD_PATH, ignore_errors=True) + shutil.rmtree(os.path.join(SOURCE_PATH, 'generated'), + ignore_errors=True) + + def zip_html(self): + """Compress HTML documentation into a zip file.""" + zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip') + if os.path.exists(zip_fname): + os.remove(zip_fname) + dirname = os.path.join(BUILD_PATH, 'html') + fnames = os.listdir(dirname) + os.chdir(dirname) + self._run_os('zip', + zip_fname, + '-r', + '-q', + *fnames) -import argparse -argparser = argparse.ArgumentParser(description="pandas documentation builder", - epilog="Targets : %s" % funcd.keys()) - -argparser.add_argument('--no-api', - default=False, - help='Ommit api and autosummary', - action='store_true') -argparser.add_argument('--single', - metavar='FILENAME', - type=str, - default=False, - help='filename of section to compile, e.g. "indexing"') -argparser.add_argument('--user', - type=str, - default=False, - help='Username to connect to the pydata server') def main(): - args, unknown = argparser.parse_known_args() - sys.argv = [sys.argv[0]] + unknown - if args.single: - args.single = os.path.basename(args.single).split(".rst")[0] - - if 'clean' in unknown: - args.single=False - - generate_index(api=not args.no_api and not args.single, single=args.single) - - if len(sys.argv) > 2: - ftype = sys.argv[1] - ver = sys.argv[2] - - if ftype == 'build_previous': - build_prev(ver, user=args.user) - if ftype == 'upload_previous': - upload_prev(ver, user=args.user) - elif len(sys.argv) == 2: - for arg in sys.argv[1:]: - func = funcd.get(arg) - if func is None: - raise SystemExit('Do not know how to handle %s; valid args are %s' % ( - arg, list(funcd.keys()))) - if args.user: - func(user=args.user) - else: - func() - else: - small_docs = False - all() -# os.chdir(current_dir) + cmds = [method for method in dir(DocBuilder) if not method.startswith('_')] + + argparser = argparse.ArgumentParser( + description='pandas documentation builder', + epilog='Commands: {}'.format(','.join(cmds))) + argparser.add_argument('command', + nargs='?', + default='html', + help='command to run: {}'.format(', '.join(cmds))) + argparser.add_argument('--num-jobs', + type=int, + default=1, + help='number of jobs used by sphinx-build') + argparser.add_argument('--no-api', + default=False, + help='ommit api and autosummary', + action='store_true') + argparser.add_argument('--single', + metavar='FILENAME', + type=str, + default=None, + help=('filename of section to compile, ' + 'e.g. "indexing"')) + argparser.add_argument('--python-path', + type=str, + default=os.path.join(DOC_PATH, '..'), + help='path') + args = argparser.parse_args() + + if args.command not in cmds: + raise ValueError('Unknown command {}. Available options: {}'.format( + args.command, ', '.join(cmds))) + + os.environ['PYTHONPATH'] = args.python_path + _generate_index(not args.no_api, args.single) + getattr(DocBuilder(args.num_jobs), args.command)() + if __name__ == '__main__': - import sys sys.exit(main()) diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index 7c7457df8ea93..eff1227e98994 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -109,10 +109,10 @@ See the package overview for more detail about what's in the library. .. toctree:: :maxdepth: 4 - {% if single -%} - {{ single }} + {% if single_doc -%} + {{ single_doc }} {% endif -%} - {%if not single -%} + {% if not single_doc -%} whatsnew install contributing @@ -146,10 +146,10 @@ See the package overview for more detail about what's in the library. comparison_with_sql comparison_with_sas {% endif -%} - {% if api -%} + {% if include_api -%} api {% endif -%} - {%if not single -%} + {% if not single_doc -%} developer internals release From ab48369b423ce62194249ab719e49e7a5d12bb0c Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+maxim-lian@users.noreply.github.com> Date: Wed, 21 Feb 2018 21:17:53 -0500 Subject: [PATCH 166/217] DOC: RangeIndex as default index (#19781) --- pandas/core/frame.py | 4 ++-- pandas/core/series.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d81d22173bfbd..c607f1fa1c24c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -251,11 +251,11 @@ class DataFrame(NDFrame): data : numpy ndarray (structured or homogeneous), dict, or DataFrame Dict can contain Series, arrays, constants, or list-like objects index : Index or array-like - Index to use for resulting frame. Will default to np.arange(n) if + Index to use for resulting frame. Will default to RangeIndex if no indexing information part of input data and no index provided columns : Index or array-like Column labels to use for resulting frame. Will default to - np.arange(n) if no column labels are provided + RangeIndex (0, 1, 2, ..., n) if no column labels are provided dtype : dtype, default None Data type to force. Only a single dtype is allowed. If None, infer copy : boolean, default False diff --git a/pandas/core/series.py b/pandas/core/series.py index 79ffb8be65838..5f2194bda870c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -131,7 +131,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): index : array-like or Index (1d) Values must be hashable and have the same length as `data`. Non-unique index values are allowed. Will default to - RangeIndex(len(data)) if not provided. If both a dict and index + RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index sequence are used, the index will override the keys found in the dict. dtype : numpy.dtype or None From 613983ba844e10b4e61c061098aa93cc6b714a24 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 21 Feb 2018 21:20:05 -0500 Subject: [PATCH 167/217] Update df.to_stata() docstring (#19818) --- pandas/core/frame.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c607f1fa1c24c..c7e9cd9411633 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1612,7 +1612,7 @@ def to_stata(self, fname, convert_dates=None, write_index=True, time_stamp : datetime A datetime to use as file creation date. Default is the current time. - dataset_label : str + data_label : str A label for the data set. Must be 80 characters or smaller. variable_labels : dict Dictionary containing columns as keys and variable labels as @@ -1635,10 +1635,18 @@ def to_stata(self, fname, convert_dates=None, write_index=True, Examples -------- + >>> data.to_stata('./data_file.dta') + + Or with dates + + >>> data.to_stata('./date_data_file.dta', {2 : 'tw'}) + + Alternatively you can create an instance of the StataWriter class + >>> writer = StataWriter('./data_file.dta', data) >>> writer.write_file() - Or with dates + With dates: >>> writer = StataWriter('./date_data_file.dta', data, {2 : 'tw'}) >>> writer.write_file() From ea382a74ffa533325ae8c77d930c4acadfaa53f1 Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Thu, 22 Feb 2018 10:23:11 +0800 Subject: [PATCH 168/217] DOC: correct Series.reset_index example (#19832) --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 5f2194bda870c..6fcd54ecc6118 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1015,7 +1015,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): >>> s = pd.Series([1, 2, 3, 4], index=pd.Index(['a', 'b', 'c', 'd'], ... name = 'idx')) >>> s.reset_index() - index 0 + idx 0 0 0 1 1 1 2 2 2 3 From 69eac1e79c12238938f6b859a29bb0bd267b9033 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 21 Feb 2018 19:08:50 -0800 Subject: [PATCH 169/217] implement add_offset_array for PeriodIndex (#19826) --- pandas/core/indexes/period.py | 23 ++++++++ .../tests/indexes/period/test_arithmetic.py | 54 ++++++++++++++----- 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 60798e6d77e37..88f9297652ebf 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -44,6 +44,7 @@ from pandas.util._decorators import (Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.compat import zip, u +from pandas.errors import PerformanceWarning import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -746,6 +747,28 @@ def _sub_period(self, other): # result must be Int64Index or Float64Index return Index(new_data) + def _add_offset_array(self, other): + # Array/Index of DateOffset objects + if len(other) == 1: + return self + other[0] + else: + warnings.warn("Adding/subtracting array of DateOffsets to " + "{cls} not vectorized" + .format(cls=type(self).__name__), PerformanceWarning) + res_values = self.astype('O').values + np.array(other) + return self.__class__(res_values) + + def _sub_offset_array(self, other): + # Array/Index of DateOffset objects + if len(other) == 1: + return self - other[0] + else: + warnings.warn("Adding/subtracting array of DateOffsets to " + "{cls} not vectorized" + .format(cls=type(self).__name__), PerformanceWarning) + res_values = self.astype('O').values - np.array(other) + return self.__class__(res_values) + def shift(self, n): """ Specialized shift which produces an PeriodIndex diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py index e16d346542b9e..0c06e6a4963b4 100644 --- a/pandas/tests/indexes/period/test_arithmetic.py +++ b/pandas/tests/indexes/period/test_arithmetic.py @@ -9,6 +9,7 @@ period_range, Period, PeriodIndex, _np_version_under1p10) import pandas.core.indexes.period as period +from pandas.errors import PerformanceWarning _common_mismatch = [pd.offsets.YearBegin(2), @@ -254,32 +255,57 @@ def test_comp_nat(self, dtype): class TestPeriodIndexArithmetic(object): - def test_pi_add_offset_array(self): + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_pi_add_offset_array(self, box): # GH#18849 pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')]) - offs = np.array([pd.offsets.QuarterEnd(n=1, startingMonth=12), - pd.offsets.QuarterEnd(n=-2, startingMonth=12)]) - res = pi + offs + offs = box([pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12)]) expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi + offs tm.assert_index_equal(res, expected) + with tm.assert_produces_warning(PerformanceWarning): + res2 = offs + pi + tm.assert_index_equal(res2, expected) + unanchored = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + # addition/subtraction ops with incompatible offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. with pytest.raises(period.IncompatibleFrequency): - pi + unanchored - with pytest.raises(TypeError): - unanchored + pi + with tm.assert_produces_warning(PerformanceWarning): + pi + unanchored + with pytest.raises(period.IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + unanchored + pi - @pytest.mark.xfail(reason='GH#18824 radd doesnt implement this case') - def test_pi_radd_offset_array(self): - # GH#18849 + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_pi_sub_offset_array(self, box): + # GH#18824 pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')]) - offs = np.array([pd.offsets.QuarterEnd(n=1, startingMonth=12), - pd.offsets.QuarterEnd(n=-2, startingMonth=12)]) - res = offs + pi - expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')]) + other = box([pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12)]) + + expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi - other tm.assert_index_equal(res, expected) + anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(period.IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + pi - anchored + with pytest.raises(period.IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + anchored - pi + def test_pi_add_iadd_pi_raises(self): rng = pd.period_range('1/1/2000', freq='D', periods=5) other = pd.period_range('1/6/2000', freq='D', periods=5) From f3836c454dd908e70f6bee3fb7682f3624fe3443 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Thu, 22 Feb 2018 10:34:46 +0000 Subject: [PATCH 170/217] ENH: Add columns parameter to from_dict (#19802) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/frame.py | 15 ++++++++++++--- pandas/tests/frame/test_constructors.py | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 76c4fa08fca4d..1ae15f363a2d0 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -295,6 +295,7 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) - Added :func:`SeriesGroupBy.is_monotonic_increasing` and :func:`SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) +- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7e9cd9411633..2aae4dffbeaaf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -876,7 +876,7 @@ def dot(self, other): # IO methods (to / from other formats) @classmethod - def from_dict(cls, data, orient='columns', dtype=None): + def from_dict(cls, data, orient='columns', dtype=None, columns=None): """ Construct DataFrame from dict of array-like or dicts @@ -890,12 +890,17 @@ def from_dict(cls, data, orient='columns', dtype=None): (default). Otherwise if the keys should be rows, pass 'index'. dtype : dtype, default None Data type to force, otherwise infer + columns: list, default None + Column labels to use when orient='index'. Raises a ValueError + if used with orient='columns' + + .. versionadded:: 0.23.0 Returns ------- DataFrame """ - index, columns = None, None + index = None orient = orient.lower() if orient == 'index': if len(data) > 0: @@ -904,7 +909,11 @@ def from_dict(cls, data, orient='columns', dtype=None): data = _from_nested_dict(data) else: data, index = list(data.values()), list(data.keys()) - elif orient != 'columns': # pragma: no cover + elif orient == 'columns': + if columns is not None: + raise ValueError("cannot use columns parameter with " + "orient='columns'") + else: # pragma: no cover raise ValueError('only recognize index or columns for orient') return cls(data, index=index, columns=columns, dtype=dtype) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8abd88d8a379c..394997201f320 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1091,6 +1091,25 @@ def test_constructor_orient(self): xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) tm.assert_frame_equal(rs, xp) + def test_from_dict_columns_parameter(self): + # GH 18529 + # Test new columns parameter for from_dict that was added to make + # from_items(..., orient='index', columns=[...]) easier to replicate + result = DataFrame.from_dict(OrderedDict([('A', [1, 2]), + ('B', [4, 5])]), + orient='index', columns=['one', 'two']) + expected = DataFrame([[1, 2], [4, 5]], index=['A', 'B'], + columns=['one', 'two']) + tm.assert_frame_equal(result, expected) + + msg = "cannot use columns parameter with orient='columns'" + with tm.assert_raises_regex(ValueError, msg): + DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), + orient='columns', columns=['one', 'two']) + with tm.assert_raises_regex(ValueError, msg): + DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), + columns=['one', 'two']) + def test_constructor_Series_named(self): a = Series([1, 2, 3], index=['a', 'b', 'c'], name='x') df = DataFrame(a) From c660e2a494c063cb07a5f57273a8a1f9eaef47ab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 22 Feb 2018 03:12:52 -0800 Subject: [PATCH 171/217] fix Timedelta.__mul__(NaT) (#19819) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/tests/scalar/timedelta/test_arithmetic.py | 10 ++++++++++ pandas/tests/scalar/timedelta/test_timedelta.py | 5 ----- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 1ae15f363a2d0..b2ac6ecc7e011 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -757,6 +757,7 @@ Datetimelike Timedelta ^^^^^^^^^ +- Bug in :func:`Timedelta.__mul__` where multiplying by ``NaT`` returned ``NaT`` instead of raising a ``TypeError`` (:issue:`19819`) - Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) - Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) - Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 78fdeb988e0f2..1285cbb9ff62b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1057,7 +1057,7 @@ class Timedelta(_Timedelta): return other * self.to_timedelta64() elif other is NaT: - return NaT + raise TypeError('Cannot multiply Timedelta with NaT') elif not (is_integer_object(other) or is_float_object(other)): # only integers and floats allowed diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 48da23f3575ab..8460633febba9 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -216,6 +216,16 @@ class TestTimedeltaMultiplicationDivision(object): # --------------------------------------------------------------- # Timedelta.__mul__, __rmul__ + @pytest.mark.parametrize('td_nat', [pd.NaT, + np.timedelta64('NaT', 'ns'), + np.timedelta64('NaT')]) + @pytest.mark.parametrize('op', [operator.mul, ops.rmul]) + def test_td_mul_nat(self, op, td_nat): + # GH#19819 + td = Timedelta(10, unit='d') + with pytest.raises(TypeError): + op(td, td_nat) + @pytest.mark.parametrize('op', [operator.mul, ops.rmul]) def test_td_mul_scalar(self, op): # GH#19738 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 4257c610fb960..a80c5d6611b8a 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -62,11 +62,6 @@ def test_unary_ops(self): assert abs(-td) == td assert abs(-td) == Timedelta('10d') - def test_binary_ops_nat(self): - td = Timedelta(10, unit='d') - # FIXME: The next test is wrong: td * NaT should raise - assert (td * pd.NaT) is pd.NaT - class TestTimedeltaComparison(object): def test_comparison_object_array(self): From a31d2ad810c69895118f1d12c412b5f94e9e5039 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 22 Feb 2018 03:15:40 -0800 Subject: [PATCH 172/217] Fix rfloordiv return type, un-xfail Timedelta tests (#19820) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslibs/timedeltas.pyx | 12 ++++++++++-- pandas/tests/scalar/timedelta/test_arithmetic.py | 6 ------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index b2ac6ecc7e011..f0bd6fe4a0bc2 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -767,6 +767,7 @@ Timedelta - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) - Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`) - Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) +- Bug in :func: `Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`) - Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mis-match (:issue`19333`) - diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 1285cbb9ff62b..c4578a289b020 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1109,7 +1109,11 @@ class Timedelta(_Timedelta): return self // other.delta return NotImplemented - if hasattr(other, 'dtype'): + elif is_timedelta64_object(other): + # convert to Timedelta below + pass + + elif hasattr(other, 'dtype'): if other.dtype.kind == 'm': # also timedelta-like return _broadcast_floordiv_td64(self.value, other, _floordiv) @@ -1144,7 +1148,11 @@ class Timedelta(_Timedelta): return other.delta // self return NotImplemented - if hasattr(other, 'dtype'): + elif is_timedelta64_object(other): + # convert to Timedelta below + pass + + elif hasattr(other, 'dtype'): if other.dtype.kind == 'm': # also timedelta-like return _broadcast_floordiv_td64(self.value, other, _rfloordiv) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 8460633febba9..179768fcc6709 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -451,7 +451,6 @@ def test_mod_timedeltalike(self): result = td % NaT assert result is NaT - @pytest.mark.xfail(reason='GH#19378 floordiv td64 returns td64') def test_mod_timedelta64_nat(self): # GH#19365 td = Timedelta(hours=37) @@ -459,7 +458,6 @@ def test_mod_timedelta64_nat(self): result = td % np.timedelta64('NaT', 'ns') assert result is NaT - @pytest.mark.xfail(reason='GH#19378 floordiv td64 returns td64') def test_mod_timedelta64(self): # GH#19365 td = Timedelta(hours=37) @@ -468,7 +466,6 @@ def test_mod_timedelta64(self): assert isinstance(result, Timedelta) assert result == Timedelta(hours=1) - @pytest.mark.xfail(reason='GH#19378 floordiv by Tick not implemented') def test_mod_offset(self): # GH#19365 td = Timedelta(hours=37) @@ -515,7 +512,6 @@ def test_rmod_pytimedelta(self): assert isinstance(result, Timedelta) assert result == Timedelta(minutes=1) - @pytest.mark.xfail(reason='GH#19378 floordiv by Tick not implemented') def test_rmod_timedelta64(self): # GH#19365 td = Timedelta(minutes=3) @@ -574,7 +570,6 @@ def test_divmod(self): assert np.isnan(result[0]) assert result[1] is pd.NaT - @pytest.mark.xfail(reason='GH#19378 floordiv by Tick not implemented') def test_divmod_offset(self): # GH#19365 td = Timedelta(days=2, hours=6) @@ -598,7 +593,6 @@ def test_rdivmod_pytimedelta(self): assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(hours=6) - @pytest.mark.xfail(reason='GH#19378 floordiv by Tick not implemented') def test_rdivmod_offset(self): result = divmod(pd.offsets.Hour(54), Timedelta(hours=-4)) assert result[0] == -14 From c1dda284a6a45c161f223737d3356cbcbd8cc148 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 22 Feb 2018 04:39:39 -0700 Subject: [PATCH 173/217] BUG: Fix qcut with NaT present (#19833) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/reshape/tile.py | 10 +++++++--- pandas/tests/reshape/test_tile.py | 15 ++++++++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index f0bd6fe4a0bc2..ed93503388893 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -901,6 +901,7 @@ Reshaping - Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) - :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) - Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) Other ^^^^^ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 777f08bd9db2b..359c030157bd3 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -279,18 +279,22 @@ def _trim_zeros(x): def _coerce_to_type(x): """ if the passed data is of datetime/timedelta type, - this method converts it to integer so that cut method can + this method converts it to numeric so that cut method can handle it """ dtype = None if is_timedelta64_dtype(x): - x = to_timedelta(x).view(np.int64) + x = to_timedelta(x) dtype = np.timedelta64 elif is_datetime64_dtype(x): - x = to_datetime(x).view(np.int64) + x = to_datetime(x) dtype = np.datetime64 + if dtype is not None: + # GH 19768: force NaT to NaN during integer conversion + x = np.where(x.notna(), x.view(np.int64), np.nan) + return x, dtype diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index f7262a2f0da63..ff914273d47b1 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -6,7 +6,8 @@ from pandas import (Series, isna, to_datetime, DatetimeIndex, Timestamp, Interval, IntervalIndex, Categorical, - cut, qcut, date_range) + cut, qcut, date_range, NaT, TimedeltaIndex) +from pandas.tseries.offsets import Nano, Day import pandas.util.testing as tm from pandas.api.types import CategoricalDtype as CDT @@ -250,6 +251,18 @@ def test_qcut_nas(self): result = qcut(arr, 4) assert isna(result[:20]).all() + @pytest.mark.parametrize('s', [ + Series(DatetimeIndex(['20180101', NaT, '20180103'])), + Series(TimedeltaIndex(['0 days', NaT, '2 days']))], + ids=lambda x: str(x.dtype)) + def test_qcut_nat(self, s): + # GH 19768 + intervals = IntervalIndex.from_tuples( + [(s[0] - Nano(), s[2] - Day()), np.nan, (s[2] - Day(), s[2])]) + expected = Series(Categorical(intervals, ordered=True)) + result = qcut(s, 2) + tm.assert_series_equal(result, expected) + def test_qcut_index(self): result = qcut([0, 2], 2) intervals = [Interval(-0.001, 1), Interval(1, 2)] From 290f410fc2d36d4e4ca89cafb30abc2c35b77a08 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 22 Feb 2018 14:55:02 +0100 Subject: [PATCH 174/217] CI: Align pep8speaks config with setup.cfg (#19841) --- .pep8speaks.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.pep8speaks.yml b/.pep8speaks.yml index 299b76c8922cc..fda26d87bf7f6 100644 --- a/.pep8speaks.yml +++ b/.pep8speaks.yml @@ -6,5 +6,7 @@ scanner: pycodestyle: max-line-length: 79 ignore: # Errors and warnings to ignore - - E731 - - E402 + - E402, # module level import not at top of file + - E731, # do not assign a lambda expression, use a def + - E741, # do not use variables named 'l', 'O', or 'I' + - W503 # line break before binary operator From 80d6ccb3d1f71212693525f209fae806cbd2316a Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 22 Feb 2018 14:06:08 +0000 Subject: [PATCH 175/217] DOC: Making doc/source/conf.py pass PEP-8, and added to lint (#19839) --- ci/lint.sh | 6 ++--- doc/source/conf.py | 65 ++++++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index b862a3bfcf29e..e3a39668885f0 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -37,12 +37,12 @@ if [ "$LINT" ]; then fi echo "Linting scripts/*.py DONE" - echo "Linting doc script" - flake8 doc/make.py + echo "Linting doc scripts" + flake8 doc/make.py doc/source/conf.py if [ $? -ne "0" ]; then RET=1 fi - echo "Linting doc script DONE" + echo "Linting doc scripts DONE" echo "Linting *.pyx" flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403 diff --git a/doc/source/conf.py b/doc/source/conf.py index 7c4edd0486636..b5fbf096f2626 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -2,7 +2,8 @@ # # pandas documentation build configuration file, created by # -# This file is execfile()d with the current directory set to its containing dir. +# This file is execfile()d with the current directory set to its containing +# dir. # # Note that not all possible configuration values are present in this # autogenerated file. @@ -49,8 +50,9 @@ # -- General configuration ----------------------------------------------- -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. sphinxext. +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +# sphinxext. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', @@ -60,7 +62,8 @@ 'numpydoc', 'ipython_sphinxext.ipython_directive', 'ipython_sphinxext.ipython_console_highlighting', - 'IPython.sphinxext.ipython_console_highlighting', # lowercase didn't work + # lowercase didn't work + 'IPython.sphinxext.ipython_console_highlighting', 'sphinx.ext.intersphinx', 'sphinx.ext.coverage', 'sphinx.ext.mathjax', @@ -95,22 +98,24 @@ files_to_delete.append(f) if files_to_delete: - print("I'm about to DELETE the following:\n%s\n" % list(sorted(files_to_delete))) - sys.stdout.write("WARNING: I'd like to delete those to speed up processing (yes/no)? ") + print("I'm about to DELETE the following:\n{}\n".format( + list(sorted(files_to_delete)))) + sys.stdout.write("WARNING: I'd like to delete those " + "to speed up processing (yes/no)? ") if PY3: answer = input() else: answer = raw_input() - if answer.lower().strip() in ('y','yes'): + if answer.lower().strip() in ('y', 'yes'): for f in files_to_delete: - f = os.path.join(os.path.join(os.path.dirname(__file__),f)) - f= os.path.abspath(f) + f = os.path.join(os.path.join(os.path.dirname(__file__), f)) + f = os.path.abspath(f) try: - print("Deleting %s" % f) + print("Deleting {}".format(f)) os.unlink(f) except: - print("Error deleting %s" % f) + print("Error deleting {}".format(f)) pass # Add any paths that contain templates here, relative to this directory. @@ -137,7 +142,7 @@ import pandas # version = '%s r%s' % (pandas.__version__, svn_version()) -version = '%s' % (pandas.__version__) +version = str(pandas.__version__) # The full version, including alpha/beta/rc tags. release = version @@ -159,8 +164,8 @@ # for source files. exclude_trees = [] -# The reST default role (used for this markup: `text`) to use for all documents. -# default_role = None +# The reST default role (used for this markup: `text`) to use for all +# documents. default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True @@ -334,8 +339,8 @@ # The font size ('10pt', '11pt' or '12pt'). # latex_font_size = '10pt' -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). +# Grouping the document tree into LaTeX files. List of tuples (source start +# file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'pandas.tex', u('pandas: powerful Python data analysis toolkit'), @@ -392,7 +397,7 @@ # wherever the docs are built. The docs' target is the browser, not # the console, so this is fine. 'pd.options.display.encoding="utf8"' - ] +] # Add custom Documenter to handle attributes/methods of an AccessorProperty @@ -400,7 +405,8 @@ import sphinx from sphinx.util import rpartition -from sphinx.ext.autodoc import Documenter, MethodDocumenter, AttributeDocumenter +from sphinx.ext.autodoc import ( + Documenter, MethodDocumenter, AttributeDocumenter) from sphinx.ext.autosummary import Autosummary @@ -408,7 +414,6 @@ class AccessorDocumenter(MethodDocumenter): """ Specialized Documenter subclass for accessors. """ - objtype = 'accessor' directivetype = 'method' @@ -426,7 +431,6 @@ class AccessorLevelDocumenter(Documenter): Specialized Documenter subclass for objects on accessor level (methods, attributes). """ - # This is the simple straightforward version # modname is None, base the last elements (eg 'hour') # and path the part before (eg 'Series.dt') @@ -436,7 +440,6 @@ class AccessorLevelDocumenter(Documenter): # mod_cls = mod_cls.split('.') # # return modname, mod_cls + [base] - def resolve_name(self, modname, parents, path, base): if modname is None: if path: @@ -471,16 +474,17 @@ def resolve_name(self, modname, parents, path, base): return modname, parents + [base] -class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter): - +class AccessorAttributeDocumenter(AccessorLevelDocumenter, + AttributeDocumenter): objtype = 'accessorattribute' directivetype = 'attribute' - # lower than AttributeDocumenter so this is not chosen for normal attributes + # lower than AttributeDocumenter so this is not chosen for normal + # attributes priority = 0.6 -class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter): +class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter): objtype = 'accessormethod' directivetype = 'method' @@ -508,7 +512,6 @@ class PandasAutosummary(Autosummary): This alternative autosummary class lets us override the table summary for Series.plot and DataFrame.plot in the API docs. """ - def _replace_pandas_items(self, display_name, sig, summary, real_name): # this a hack: ideally we should extract the signature from the # .__call__ method instead of hard coding this @@ -561,18 +564,18 @@ def linkcode_resolve(domain, info): lineno = None if lineno: - linespec = "#L%d-L%d" % (lineno, lineno + len(source) - 1) + linespec = "#L{:d}-L{:d}".format(lineno, lineno + len(source) - 1) else: linespec = "" fn = os.path.relpath(fn, start=os.path.dirname(pandas.__file__)) if '+' in pandas.__version__: - return "http://github.com/pandas-dev/pandas/blob/master/pandas/%s%s" % ( - fn, linespec) + return ("http://github.com/pandas-dev/pandas/blob/master/pandas/" + "{}{}".format(fn, linespec)) else: - return "http://github.com/pandas-dev/pandas/blob/v%s/pandas/%s%s" % ( - pandas.__version__, fn, linespec) + return ("http://github.com/pandas-dev/pandas/blob/" + "v{}/pandas/{}{}".format(pandas.__version__, fn, linespec)) # remove the docstring of the flags attribute (inherited from numpy ndarray) From c3208546a7701a06fff6f9b2d3023e0ad1604a88 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Fri, 23 Feb 2018 01:22:34 +0000 Subject: [PATCH 176/217] Let initialisation from dicts use insertion order for py>=36, part I (#19830) --- pandas/tests/frame/test_apply.py | 2 +- pandas/tests/frame/test_block_internals.py | 4 ++-- pandas/tests/frame/test_constructors.py | 21 ++++++++++---------- pandas/tests/frame/test_dtypes.py | 4 ++-- pandas/tests/frame/test_indexing.py | 4 ++-- pandas/tests/frame/test_mutate_columns.py | 12 +++++------ pandas/tests/frame/test_nonunique_indexes.py | 8 ++++---- pandas/tests/frame/test_reshape.py | 5 +++-- pandas/tests/frame/test_to_csv.py | 6 +++--- 9 files changed, 33 insertions(+), 33 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index d1ad9f71e6350..a057ca0879cac 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -646,7 +646,7 @@ def test_infer_output_shape_columns(self): 'datetime': [pd.Timestamp('2017-11-29 03:30:00'), pd.Timestamp('2017-11-29 03:45:00')]}) result = df.apply(lambda row: (row.number, row.string), axis=1) - expected = Series([t[2:] for t in df.itertuples()]) + expected = Series([(t.number, t.string) for t in df.itertuples()]) assert_series_equal(result, expected) def test_infer_output_shape_listlike_columns(self): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 8b1fd7d50cb4d..8e012922d25f1 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -405,8 +405,8 @@ def test_get_numeric_data(self): result = df.get_dtype_counts() expected = Series({'int64': 1, 'float64': 1, datetime64name: 1, objectname: 1}) - result.sort_index() - expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() assert_series_equal(result, expected) df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 394997201f320..e0b94815878dd 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1381,9 +1381,8 @@ def test_constructor_with_datetimes(self): expected['float64'] = 1 expected[floatname] = 1 - result.sort_index() - expected = Series(expected) - expected.sort_index() + result = result.sort_index() + expected = Series(expected).sort_index() tm.assert_series_equal(result, expected) # check with ndarray construction ndim>0 @@ -1392,7 +1391,7 @@ def test_constructor_with_datetimes(self): intname: np.array([1] * 10, dtype=intname)}, index=np.arange(10)) result = df.get_dtype_counts() - result.sort_index() + result = result.sort_index() tm.assert_series_equal(result, expected) # GH 2809 @@ -1403,8 +1402,8 @@ def test_constructor_with_datetimes(self): df = DataFrame({'datetime_s': datetime_s}) result = df.get_dtype_counts() expected = Series({datetime64name: 1}) - result.sort_index() - expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) # GH 2810 @@ -1414,8 +1413,8 @@ def test_constructor_with_datetimes(self): df = DataFrame({'datetimes': datetimes, 'dates': dates}) result = df.get_dtype_counts() expected = Series({datetime64name: 1, objectname: 1}) - result.sort_index() - expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) # GH 7594 @@ -1538,8 +1537,8 @@ def test_constructor_for_list_with_dtypes(self): result = df.get_dtype_counts() expected = Series( {'int64': 1, 'float64': 2, datetime64name: 1, objectname: 1}) - result.sort_index() - expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) def test_constructor_frame_copy(self): @@ -1851,7 +1850,7 @@ def test_from_records_misc_brokenness(self): rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), 1]) df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - results = df2_obj.get_dtype_counts() + results = df2_obj.get_dtype_counts().sort_index() expected = Series({'datetime64[ns]': 1, 'int64': 1}) tm.assert_series_equal(results, expected) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 38bdecc9eb88f..e9e5b2a447a4a 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -725,9 +725,9 @@ def test_timedeltas(self): df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3, freq='D')), B=Series([timedelta(days=i) for i in range(3)]))) - result = df.get_dtype_counts().sort_values() + result = df.get_dtype_counts().sort_index() expected = Series( - {'datetime64[ns]': 1, 'timedelta64[ns]': 1}).sort_values() + {'datetime64[ns]': 1, 'timedelta64[ns]': 1}).sort_index() assert_series_equal(result, expected) df['C'] = df['A'] + df['B'] diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 882fa634d167d..a8b81b1b03552 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2430,8 +2430,8 @@ def _check_get(df, cond, check_dtypes=True): # upcasting case (GH # 2794) df = DataFrame(dict((c, Series([1] * 3, dtype=c)) - for c in ['int64', 'int32', - 'float32', 'float64'])) + for c in ['float32', 'float64', + 'int32', 'int64'])) df.iloc[1, :] = 0 result = df.where(df >= 0).get_dtype_counts() diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 8236a41d00243..4c560129bfa45 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -166,17 +166,17 @@ def test_insert(self): # new item df['x'] = df['a'].astype('float32') - result = Series(dict(float64=5, float32=1)) - assert (df.get_dtype_counts() == result).all() + result = Series(dict(float32=1, float64=5)) + assert (df.get_dtype_counts().sort_index() == result).all() # replacing current (in different block) df['a'] = df['a'].astype('float32') - result = Series(dict(float64=4, float32=2)) - assert (df.get_dtype_counts() == result).all() + result = Series(dict(float32=2, float64=4)) + assert (df.get_dtype_counts().sort_index() == result).all() df['y'] = df['a'].astype('int32') - result = Series(dict(float64=4, float32=2, int32=1)) - assert (df.get_dtype_counts() == result).all() + result = Series(dict(float32=2, float64=4, int32=1)) + assert (df.get_dtype_counts().sort_index() == result).all() with tm.assert_raises_regex(ValueError, 'already exists'): df.insert(1, 'a', df['b']) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 36465db78361f..0b32ec89d3909 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -155,14 +155,14 @@ def check(result, expected=None): # rename, GH 4403 df4 = DataFrame( - {'TClose': [22.02], - 'RT': [0.0454], + {'RT': [0.0454], + 'TClose': [22.02], 'TExg': [0.0422]}, index=MultiIndex.from_tuples([(600809, 20130331)], names=['STK_ID', 'RPT_Date'])) - df5 = DataFrame({'STK_ID': [600809] * 3, - 'RPT_Date': [20120930, 20121231, 20130331], + df5 = DataFrame({'RPT_Date': [20120930, 20121231, 20130331], + 'STK_ID': [600809] * 3, 'STK_Name': [u('饡驦'), u('饡驦'), u('饡驦')], 'TClose': [38.05, 41.66, 30.01]}, index=MultiIndex.from_tuples( diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 7907486c7c98d..68df0982a1e3e 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -719,9 +719,10 @@ def verify(df): assert_frame_equal(left, right) # GH7401 - df = pd.DataFrame({'A': list('aaaaabbbbb'), 'C': np.arange(10), + df = pd.DataFrame({'A': list('aaaaabbbbb'), 'B': (date_range('2012-01-01', periods=5) - .tolist() * 2)}) + .tolist() * 2), + 'C': np.arange(10)}) df.iloc[3, 1] = np.NaN left = df.set_index(['A', 'B']).unstack() diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index a3ba34ae92283..dda5cdea52cac 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1054,10 +1054,10 @@ def test_to_csv_with_dst_transitions(self): def test_to_csv_quoting(self): df = DataFrame({ - 'c_string': ['a', 'b,c'], - 'c_int': [42, np.nan], - 'c_float': [1.0, 3.2], 'c_bool': [True, False], + 'c_float': [1.0, 3.2], + 'c_int': [42, np.nan], + 'c_string': ['a', 'b,c'], }) expected = """\ From c05f632bf5234b61ae41dd401732f9e6dfe18477 Mon Sep 17 00:00:00 2001 From: Kate Surta Date: Fri, 23 Feb 2018 04:40:19 +0300 Subject: [PATCH 177/217] BUG: Fix MultiIndex .loc with all numpy arrays (#19772) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexing.py | 3 +-- pandas/tests/indexing/test_loc.py | 43 ++++++++++++++++++++++++++++++- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ed93503388893..603e4e6ce0522 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -836,6 +836,7 @@ MultiIndex - Bug in :func:`MultiIndex.get_loc` which would cast boolean to integer labels (:issue:`19086`) - Bug in :func:`MultiIndex.get_loc` which would fail to locate keys containing ``NaN`` (:issue:`18485`) - Bug in :func:`MultiIndex.get_loc` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) +- Bug in indexing where nested indexers having only numpy arrays are handled incorrectly (:issue:`19686`) I/O diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 352ce921d1d44..eb3aeda7902fc 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2107,10 +2107,9 @@ def is_nested_tuple(tup, labels): if not isinstance(tup, tuple): return False - # are we nested tuple of: tuple,list,slice for i, k in enumerate(tup): - if isinstance(k, (tuple, list, slice)): + if is_list_like(k) or isinstance(k, slice): return isinstance(labels, MultiIndex) return False diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 433b0d87ac005..86a5a82441ee8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -8,7 +8,7 @@ import pandas as pd from pandas.compat import lrange, StringIO -from pandas import Series, DataFrame, Timestamp, date_range, MultiIndex +from pandas import Series, DataFrame, Timestamp, date_range, MultiIndex, Index from pandas.util import testing as tm from pandas.tests.indexing.common import Base @@ -711,3 +711,44 @@ def test_identity_slice_returns_new_object(self): original_series[:3] = [7, 8, 9] assert all(sliced_series[:3] == [7, 8, 9]) + + @pytest.mark.parametrize( + 'indexer_type_1', + (list, tuple, set, slice, np.ndarray, Series, Index)) + @pytest.mark.parametrize( + 'indexer_type_2', + (list, tuple, set, slice, np.ndarray, Series, Index)) + def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2): + # GH #19686 + # .loc should work with nested indexers which can be + # any list-like objects (see `pandas.api.types.is_list_like`) or slices + + def convert_nested_indexer(indexer_type, keys): + if indexer_type == np.ndarray: + return np.array(keys) + if indexer_type == slice: + return slice(*keys) + return indexer_type(keys) + + a = [10, 20, 30] + b = [1, 2, 3] + index = pd.MultiIndex.from_product([a, b]) + df = pd.DataFrame( + np.arange(len(index), dtype='int64'), + index=index, columns=['Data']) + + keys = ([10, 20], [2, 3]) + types = (indexer_type_1, indexer_type_2) + + # check indexers with all the combinations of nested objects + # of all the valid types + indexer = tuple( + convert_nested_indexer(indexer_type, k) + for indexer_type, k in zip(types, keys)) + + result = df.loc[indexer, 'Data'] + expected = pd.Series( + [1, 2, 4, 5], name='Data', + index=pd.MultiIndex.from_product(keys)) + + tm.assert_series_equal(result, expected) From 8466004cb3f289ada761cadecf4131cccbba5203 Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Fri, 23 Feb 2018 19:24:23 +0800 Subject: [PATCH 178/217] DOC: correct min_count param docstring (#19836) --- pandas/core/generic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8034cf89cf8b7..85e2ce475ffa2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7855,7 +7855,7 @@ def _doc_parms(cls): >>> pd.Series([np.nan]).prod() 1.0 ->>> pd.Series([np.nan]).sum(min_count=1) +>>> pd.Series([np.nan]).prod(min_count=1) nan """ @@ -7867,8 +7867,9 @@ def _doc_parms(cls): .. versionadded :: 0.22.0 - Added with the default being 1. This means the sum or product - of an all-NA or empty series is ``NaN``. + Added with the default being 0. This means the sum of an all-NA + or empty Series is 0, and the product of an all-NA or empty + Series is 1. """ From 2c657dda54283fed0ea6a76d508cce414828c7b6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 23 Feb 2018 03:35:00 -0800 Subject: [PATCH 179/217] Continue porting period_helper; fix leftover asfreq bug (#19834) --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/_libs/src/period_helper.c | 120 +++++++------- pandas/_libs/src/period_helper.h | 24 --- pandas/_libs/tslibs/period.pyx | 152 +++++++----------- pandas/tests/scalar/period/test_period.py | 7 + .../tests/scalar/period/test_period_asfreq.py | 10 ++ 6 files changed, 128 insertions(+), 187 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 603e4e6ce0522..ca5749afd11bc 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -765,7 +765,7 @@ Timedelta - Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) - Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) - Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) -- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`) +- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`, :issue:`19834`) - Bug in :func:`Timedelta.total_seconds()` causing precision errors i.e. ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) - Bug in :func: `Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`) - Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mis-match (:issue`19333`) diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index a812ed2e7e2b3..e3d250aa44f17 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -42,10 +42,10 @@ static int floordiv(int x, int divisor) { static int monthToQuarter(int month) { return ((month - 1) / 3) + 1; } -/* Find the absdate (days elapsed since datetime(1, 1, 1) +/* Find the unix_date (days elapsed since datetime(1970, 1, 1) * for the given year/month/day. * Assumes GREGORIAN_CALENDAR */ -npy_int64 absdate_from_ymd(int year, int month, int day) { +npy_int64 unix_date_from_ymd(int year, int month, int day) { /* Calculate the absolute date */ pandas_datetimestruct dts; npy_int64 unix_date; @@ -55,16 +55,16 @@ npy_int64 absdate_from_ymd(int year, int month, int day) { dts.month = month; dts.day = day; unix_date = pandas_datetimestruct_to_datetime(PANDAS_FR_D, &dts); - return ORD_OFFSET + unix_date; + return unix_date; } /* Sets the date part of the date_info struct Assumes GREGORIAN_CALENDAR */ static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, - npy_int64 absdate) { + npy_int64 unix_date) { pandas_datetimestruct dts; - pandas_datetime_to_datetimestruct(absdate - ORD_OFFSET, PANDAS_FR_D, &dts); + pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts); dinfo->year = dts.year; dinfo->month = dts.month; dinfo->day = dts.day; @@ -137,26 +137,26 @@ PANDAS_INLINE npy_int64 transform_via_day(npy_int64 ordinal, return result; } -static npy_int64 DtoB_weekday(npy_int64 absdate) { - return floordiv(absdate, 7) * 5 + mod_compat(absdate, 7) - BDAY_OFFSET; +static npy_int64 DtoB_weekday(npy_int64 unix_date) { + return floordiv(unix_date + 4, 7) * 5 + mod_compat(unix_date + 4, 7) - 4; } static npy_int64 DtoB(struct date_info *dinfo, - int roll_back, npy_int64 absdate) { + int roll_back, npy_int64 unix_date) { int day_of_week = dayofweek(dinfo->year, dinfo->month, dinfo->day); if (roll_back == 1) { if (day_of_week > 4) { // change to friday before weekend - absdate -= (day_of_week - 4); + unix_date -= (day_of_week - 4); } } else { if (day_of_week > 4) { // change to Monday after weekend - absdate += (7 - day_of_week); + unix_date += (7 - day_of_week); } } - return DtoB_weekday(absdate); + return DtoB_weekday(unix_date); } @@ -165,18 +165,19 @@ static npy_int64 DtoB(struct date_info *dinfo, static npy_int64 asfreq_DTtoA(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; ordinal = downsample_daytime(ordinal, af_info); - dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); + dInfoCalc_SetFromAbsDate(&dinfo, ordinal); if (dinfo.month > af_info->to_a_year_end) { - return (npy_int64)(dinfo.year + 1 - BASE_YEAR); + return (npy_int64)(dinfo.year + 1 - 1970); } else { - return (npy_int64)(dinfo.year - BASE_YEAR); + return (npy_int64)(dinfo.year - 1970); } } -static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year, - int *quarter) { +static int DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year) { struct date_info dinfo; - dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); + int quarter; + + dInfoCalc_SetFromAbsDate(&dinfo, ordinal); if (af_info->to_q_year_end != 12) { dinfo.month -= af_info->to_q_year_end; if (dinfo.month <= 0) { @@ -187,9 +188,8 @@ static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year, } *year = dinfo.year; - *quarter = monthToQuarter(dinfo.month); - - return 0; + quarter = monthToQuarter(dinfo.month); + return quarter; } static npy_int64 asfreq_DTtoQ(npy_int64 ordinal, asfreq_info *af_info) { @@ -197,8 +197,8 @@ static npy_int64 asfreq_DTtoQ(npy_int64 ordinal, asfreq_info *af_info) { ordinal = downsample_daytime(ordinal, af_info); - DtoQ_yq(ordinal, af_info, &year, &quarter); - return (npy_int64)((year - BASE_YEAR) * 4 + quarter - 1); + quarter = DtoQ_yq(ordinal, af_info, &year); + return (npy_int64)((year - 1970) * 4 + quarter - 1); } static npy_int64 asfreq_DTtoM(npy_int64 ordinal, asfreq_info *af_info) { @@ -206,28 +206,25 @@ static npy_int64 asfreq_DTtoM(npy_int64 ordinal, asfreq_info *af_info) { ordinal = downsample_daytime(ordinal, af_info); - dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET); - return (npy_int64)((dinfo.year - BASE_YEAR) * 12 + dinfo.month - 1); + dInfoCalc_SetFromAbsDate(&dinfo, ordinal); + return (npy_int64)((dinfo.year - 1970) * 12 + dinfo.month - 1); } static npy_int64 asfreq_DTtoW(npy_int64 ordinal, asfreq_info *af_info) { ordinal = downsample_daytime(ordinal, af_info); - return (ordinal + ORD_OFFSET - (1 + af_info->to_week_end)) / 7 + 1 - - WEEK_OFFSET; + return floordiv(ordinal + 3 - af_info->to_week_end, 7) + 1; } static npy_int64 asfreq_DTtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - npy_int64 absdate; int roll_back; ordinal = downsample_daytime(ordinal, af_info); - absdate = ordinal + ORD_OFFSET; - dInfoCalc_SetFromAbsDate(&dinfo, absdate); + dInfoCalc_SetFromAbsDate(&dinfo, ordinal); // This usage defines roll_back the opposite way from the others roll_back = 1 - af_info->is_end; - return DtoB(&dinfo, roll_back, absdate); + return DtoB(&dinfo, roll_back, ordinal); } // all intra day calculations are now done within one function @@ -243,10 +240,7 @@ static npy_int64 asfreq_UpsampleWithinDay(npy_int64 ordinal, //************ FROM BUSINESS *************** static npy_int64 asfreq_BtoDT(npy_int64 ordinal, asfreq_info *af_info) { - ordinal += BDAY_OFFSET; - ordinal = - (floordiv(ordinal - 1, 5) * 7 + mod_compat(ordinal - 1, 5) + 1 - - ORD_OFFSET); + ordinal = floordiv(ordinal + 3, 5) * 7 + mod_compat(ordinal + 3, 5) - 3; return upsample_daytime(ordinal, af_info); } @@ -270,8 +264,7 @@ static npy_int64 asfreq_BtoW(npy_int64 ordinal, asfreq_info *af_info) { //************ FROM WEEKLY *************** static npy_int64 asfreq_WtoDT(npy_int64 ordinal, asfreq_info *af_info) { - ordinal = (ordinal + WEEK_OFFSET) * 7 + - af_info->from_week_end - ORD_OFFSET + + ordinal = ordinal * 7 + af_info->from_week_end - 4 + (7 - 1) * (af_info->is_end - 1); return upsample_daytime(ordinal, af_info); } @@ -294,30 +287,29 @@ static npy_int64 asfreq_WtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_WtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - npy_int64 absdate = asfreq_WtoDT(ordinal, af_info) + ORD_OFFSET; + npy_int64 unix_date = asfreq_WtoDT(ordinal, af_info); int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate(&dinfo, absdate); + dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, absdate); + return DtoB(&dinfo, roll_back, unix_date); } //************ FROM MONTHLY *************** static void MtoD_ym(npy_int64 ordinal, int *y, int *m) { - *y = floordiv(ordinal, 12) + BASE_YEAR; + *y = floordiv(ordinal, 12) + 1970; *m = mod_compat(ordinal, 12) + 1; } static npy_int64 asfreq_MtoDT(npy_int64 ordinal, asfreq_info *af_info) { - npy_int64 absdate; + npy_int64 unix_date; int y, m; ordinal += af_info->is_end; MtoD_ym(ordinal, &y, &m); - absdate = absdate_from_ymd(y, m, 1); - ordinal = absdate - ORD_OFFSET; + unix_date = unix_date_from_ymd(y, m, 1); - ordinal -= af_info->is_end; - return upsample_daytime(ordinal, af_info); + unix_date -= af_info->is_end; + return upsample_daytime(unix_date, af_info); } static npy_int64 asfreq_MtoA(npy_int64 ordinal, asfreq_info *af_info) { @@ -334,18 +326,18 @@ static npy_int64 asfreq_MtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_MtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - npy_int64 absdate = asfreq_MtoDT(ordinal, af_info) + ORD_OFFSET; + npy_int64 unix_date = asfreq_MtoDT(ordinal, af_info); int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate(&dinfo, absdate); + dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, absdate); + return DtoB(&dinfo, roll_back, unix_date); } //************ FROM QUARTERLY *************** static void QtoD_ym(npy_int64 ordinal, int *y, int *m, asfreq_info *af_info) { - *y = floordiv(ordinal, 4) + BASE_YEAR; + *y = floordiv(ordinal, 4) + 1970; *m = mod_compat(ordinal, 4) * 3 + 1; if (af_info->from_q_year_end != 12) { @@ -359,16 +351,16 @@ static void QtoD_ym(npy_int64 ordinal, int *y, int *m, asfreq_info *af_info) { } static npy_int64 asfreq_QtoDT(npy_int64 ordinal, asfreq_info *af_info) { - npy_int64 absdate; + npy_int64 unix_date; int y, m; ordinal += af_info->is_end; QtoD_ym(ordinal, &y, &m, af_info); - absdate = absdate_from_ymd(y, m, 1); + unix_date = unix_date_from_ymd(y, m, 1); - absdate -= af_info->is_end; - return upsample_daytime(absdate - ORD_OFFSET, af_info); + unix_date -= af_info->is_end; + return upsample_daytime(unix_date, af_info); } static npy_int64 asfreq_QtoQ(npy_int64 ordinal, asfreq_info *af_info) { @@ -389,21 +381,21 @@ static npy_int64 asfreq_QtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_QtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - npy_int64 absdate = asfreq_QtoDT(ordinal, af_info) + ORD_OFFSET; + npy_int64 unix_date = asfreq_QtoDT(ordinal, af_info); int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate(&dinfo, absdate); + dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, absdate); + return DtoB(&dinfo, roll_back, unix_date); } //************ FROM ANNUAL *************** static npy_int64 asfreq_AtoDT(npy_int64 ordinal, asfreq_info *af_info) { - npy_int64 absdate; + npy_int64 unix_date; // start from 1970 - npy_int64 year = ordinal + BASE_YEAR; + npy_int64 year = ordinal + 1970; int month = (af_info->from_a_year_end % 12) + 1; if (af_info->from_a_year_end != 12) { @@ -411,10 +403,10 @@ static npy_int64 asfreq_AtoDT(npy_int64 ordinal, asfreq_info *af_info) { } year += af_info->is_end; - absdate = absdate_from_ymd(year, month, 1); + unix_date = unix_date_from_ymd(year, month, 1); - absdate -= af_info->is_end; - return upsample_daytime(absdate - ORD_OFFSET, af_info); + unix_date -= af_info->is_end; + return upsample_daytime(unix_date, af_info); } static npy_int64 asfreq_AtoA(npy_int64 ordinal, asfreq_info *af_info) { @@ -435,11 +427,11 @@ static npy_int64 asfreq_AtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_AtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; - npy_int64 absdate = asfreq_AtoDT(ordinal, af_info) + ORD_OFFSET; + npy_int64 unix_date = asfreq_AtoDT(ordinal, af_info); int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate(&dinfo, absdate); + dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, absdate); + return DtoB(&dinfo, roll_back, unix_date); } static npy_int64 nofunc(npy_int64 ordinal, asfreq_info *af_info) { diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index 1573b1eeec74b..7163dc960d152 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -20,32 +20,8 @@ frequency conversion routines. #include "limits.h" #include "numpy/ndarraytypes.h" -/* - * declarations from period here - */ - -#define Py_Error(errortype, errorstr) \ - { \ - PyErr_SetString(errortype, errorstr); \ - goto onError; \ - } - /*** FREQUENCY CONSTANTS ***/ -// HIGHFREQ_ORIG is the datetime ordinal from which to begin the second -// frequency ordinal sequence - -// #define HIGHFREQ_ORIG 62135683200LL -#define BASE_YEAR 1970 -#define ORD_OFFSET 719163LL // days until 1970-01-01 -#define BDAY_OFFSET 513689LL // days until 1970-01-01 -#define WEEK_OFFSET 102737LL -#define BASE_WEEK_TO_DAY_OFFSET \ - 1 // difference between day 0 and end of week in days -#define DAYS_PER_WEEK 7 -#define BUSINESS_DAYS_PER_WEEK 5 -#define HIGHFREQ_ORIG 0 // ORD_OFFSET * 86400LL // days until 1970-01-01 - #define FR_ANN 1000 /* Annual */ #define FR_ANNDEC FR_ANN /* Annual - December year end*/ #define FR_ANNJAN 1001 /* Annual - January year end*/ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e1c783ac9fa54..f1a193706144f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -75,10 +75,6 @@ cdef extern from "period_helper.h": int FR_BUS int FR_UND - int ORD_OFFSET - int WEEK_OFFSET - int BDAY_OFFSET - ctypedef struct date_info: double second int minute @@ -181,7 +177,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day, period_ordinal : int64_t """ cdef: - int64_t absdays, unix_date, seconds, delta + int64_t unix_date, seconds, delta int64_t weeks int64_t day_adj int freq_group, fmonth, mdiff @@ -215,8 +211,7 @@ cdef int64_t get_period_ordinal(int year, int month, int day, elif freq == FR_MTH: return (year - 1970) * 12 + month - 1 - absdays = absdate_from_ymd(year, month, day) - unix_date = absdays - ORD_OFFSET + unix_date = unix_date_from_ymd(year, month, day) if freq >= FR_SEC: seconds = unix_date * 86400 + hour * 3600 + minute * 60 + second @@ -247,48 +242,48 @@ cdef int64_t get_period_ordinal(int year, int month, int day, return unix_date elif freq == FR_BUS: - # calculate the current week assuming sunday as last day of a week - # Jan 1 0001 is a Monday, so subtract 1 to get to end-of-week - weeks = (unix_date + ORD_OFFSET - 1) // 7 + # calculate the current week (counting from 1970-01-01) treating + # sunday as last day of a week + weeks = (unix_date + 3) // 7 # calculate the current weekday (in range 1 .. 7) - delta = (unix_date + ORD_OFFSET - 1) % 7 + 1 + delta = (unix_date + 3) % 7 + 1 # return the number of business days in full weeks plus the business # days in the last - possible partial - week if delta <= 5: - return (weeks * 5) + delta - BDAY_OFFSET + return (5 * weeks) + delta - 4 else: - return (weeks * 5) + (5 + 1) - BDAY_OFFSET + return (5 * weeks) + (5 + 1) - 4 elif freq_group == FR_WK: day_adj = freq - FR_WK - return (unix_date + ORD_OFFSET - (1 + day_adj)) // 7 + 1 - WEEK_OFFSET + return (unix_date + 3 - day_adj) // 7 + 1 # raise ValueError cdef void get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil: cdef: - int64_t absdate + int64_t unix_date double abstime - absdate = get_python_ordinal(ordinal, freq); - abstime = get_abs_time(freq, absdate - ORD_OFFSET, ordinal) + unix_date = get_unix_date(ordinal, freq) + abstime = get_abs_time(freq, unix_date, ordinal) while abstime < 0: abstime += 86400 - absdate -= 1 + unix_date -= 1 while abstime >= 86400: abstime -= 86400 - absdate += 1 + unix_date += 1 - dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime) + date_info_from_days_and_time(dinfo, unix_date, abstime) -cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil: +cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil: """ Returns the proleptic Gregorian ordinal of the date, as an integer. - This corresponds to the number of days since Jan., 1st, 1AD. + This corresponds to the number of days since Jan., 1st, 1970 AD. When the instance has a frequency less than daily, the proleptic date is calculated for the last day of the period. @@ -299,92 +294,56 @@ cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil: Returns ------- - absdate : int64_t number of days since datetime(1, 1, 1) + unix_date : int64_t number of days since datetime(1970, 1, 1) """ cdef: asfreq_info af_info freq_conv_func toDaily = NULL if freq == FR_DAY: - return period_ordinal + ORD_OFFSET + return period_ordinal toDaily = get_asfreq_func(freq, FR_DAY) get_asfreq_info(freq, FR_DAY, 'E', &af_info) - return toDaily(period_ordinal, &af_info) + ORD_OFFSET + return toDaily(period_ordinal, &af_info) -cdef void dInfoCalc_SetFromAbsDateTime(date_info *dinfo, - int64_t absdate, double abstime) nogil: +@cython.cdivision +cdef void date_info_from_days_and_time(date_info *dinfo, + int64_t unix_date, + double abstime) nogil: """ Set the instance's value using the given date and time. - Assumes GREGORIAN_CALENDAR. Parameters ---------- dinfo : date_info* - absdate : int64_t - days elapsed since datetime(1, 1, 1) + unix_date : int64_t + days elapsed since datetime(1970, 1, 1) abstime : double - seconds elapsed since beginning of day described by absdate + seconds elapsed since beginning of day described by unix_date Notes ----- Updates dinfo inplace """ + cdef: + pandas_datetimestruct dts + int inttime + int hour, minute + double second + # Bounds check # The calling function is responsible for ensuring that # abstime >= 0.0 and abstime <= 86400 # Calculate the date - dInfoCalc_SetFromAbsDate(dinfo, absdate) - - # Calculate the time - dInfoCalc_SetFromAbsTime(dinfo, abstime) - - -cdef void dInfoCalc_SetFromAbsDate(date_info *dinfo, int64_t absdate) nogil: - """ - Sets the date part of the date_info struct - Assumes GREGORIAN_CALENDAR - - Parameters - ---------- - dinfo : date_info* - unix_date : int64_t - - Notes - ----- - Updates dinfo inplace - """ - cdef: - pandas_datetimestruct dts - - pandas_datetime_to_datetimestruct(absdate - ORD_OFFSET, PANDAS_FR_D, &dts) + pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts) dinfo.year = dts.year dinfo.month = dts.month dinfo.day = dts.day - -@cython.cdivision -cdef void dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: - """ - Sets the time part of the DateTime object. - - Parameters - ---------- - dinfo : date_info* - abstime : double - seconds elapsed since beginning of day described by absdate - - Notes - ----- - Updates dinfo inplace - """ - cdef: - int inttime - int hour, minute - double second - + # Calculate the time inttime = abstime hour = inttime / 3600 minute = (inttime % 3600) / 60 @@ -396,8 +355,7 @@ cdef void dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: @cython.cdivision -cdef double get_abs_time(int freq, int64_t date_ordinal, - int64_t ordinal) nogil: +cdef double get_abs_time(int freq, int64_t unix_date, int64_t ordinal) nogil: cdef: int freq_index, day_index, base_index int64_t per_day, start_ord @@ -416,16 +374,15 @@ cdef double get_abs_time(int freq, int64_t date_ordinal, if base_index < freq_index: unit = 1 / unit - start_ord = date_ordinal * per_day + start_ord = unix_date * per_day result = (unit * (ordinal - start_ord)) return result -cdef int64_t absdate_from_ymd(int year, int month, int day) nogil: +cdef int64_t unix_date_from_ymd(int year, int month, int day) nogil: """ - Find the absdate (days elapsed since datetime(1, 1, 1) + Find the unix_date (days elapsed since datetime(1970, 1, 1) for the given year/month/day. - Assumes GREGORIAN_CALENDAR Parameters ---------- @@ -435,11 +392,9 @@ cdef int64_t absdate_from_ymd(int year, int month, int day) nogil: Returns ------- - absdate : int - days elapsed since datetime(1, 1, 1) + unix_date : int + days elapsed since datetime(1970, 1, 1) """ - - # /* Calculate the absolute date cdef: pandas_datetimestruct dts int64_t unix_date @@ -449,7 +404,7 @@ cdef int64_t absdate_from_ymd(int year, int month, int day) nogil: dts.month = month dts.day = day unix_date = pandas_datetimestruct_to_datetime(PANDAS_FR_D, &dts) - return ORD_OFFSET + unix_date + return unix_date cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): @@ -475,9 +430,9 @@ cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): cdef: asfreq_info af_info int qtr_freq - int64_t daily_ord + int64_t unix_date - daily_ord = get_python_ordinal(ordinal, freq) - ORD_OFFSET + unix_date = get_unix_date(ordinal, freq) if get_freq_group(freq) == FR_QTR: qtr_freq = freq @@ -486,16 +441,16 @@ cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): get_asfreq_info(FR_DAY, qtr_freq, 'E', &af_info) - DtoQ_yq(daily_ord, &af_info, year, quarter) + quarter[0] = DtoQ_yq(unix_date, &af_info, year) return qtr_freq -cdef void DtoQ_yq(int64_t ordinal, asfreq_info *af_info, - int *year, int *quarter): +cdef int DtoQ_yq(int64_t unix_date, asfreq_info *af_info, int *year): cdef: date_info dinfo + int quarter - dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET) + date_info_from_days_and_time(&dinfo, unix_date, 0) if af_info.to_q_year_end != 12: dinfo.month -= af_info.to_q_year_end @@ -505,10 +460,11 @@ cdef void DtoQ_yq(int64_t ordinal, asfreq_info *af_info, dinfo.year += 1 year[0] = dinfo.year - quarter[0] = monthToQuarter(dinfo.month) + quarter = month_to_quarter(dinfo.month) + return quarter -cdef inline int monthToQuarter(int month): +cdef inline int month_to_quarter(int month): return (month - 1) // 3 + 1 @@ -678,7 +634,7 @@ def period_format(int64_t value, int freq, object fmt=None): return repr(NaT) if fmt is None: - freq_group = (freq // 1000) * 1000 + freq_group = get_freq_group(freq) if freq_group == 1000: # FR_ANN fmt = b'%Y' elif freq_group == 2000: # FR_QTR @@ -1620,8 +1576,8 @@ class Period(_Period): return cls._from_ordinal(ordinal, freq) -def _ordinal_from_fields(year, month, quarter, day, - hour, minute, second, freq): +cdef int64_t _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq): base, mult = get_freq_code(freq) if quarter is not None: year, month = _quarter_to_myear(year, quarter, freq) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index dff5433adcf79..f43ab0704f0f4 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1440,3 +1440,10 @@ def test_period_immutable(): freq = per.freq with pytest.raises(AttributeError): per.freq = 2 * freq + + +@pytest.mark.xfail(reason='GH#19834 Period parsing error') +def test_small_year_parsing(): + per1 = Period('0001-01-07', 'D') + assert per1.year == 1 + assert per1.day == 7 diff --git a/pandas/tests/scalar/period/test_period_asfreq.py b/pandas/tests/scalar/period/test_period_asfreq.py index 9f8b2562e9e20..474d19809b03c 100644 --- a/pandas/tests/scalar/period/test_period_asfreq.py +++ b/pandas/tests/scalar/period/test_period_asfreq.py @@ -21,6 +21,16 @@ def test_asfreq_near_zero(self, freq): tup2 = (prev.year, prev.month, prev.day) assert tup2 < tup1 + def test_asfreq_near_zero_weekly(self): + # GH#19834 + per1 = Period('0001-01-01', 'D') + 6 + per2 = Period('0001-01-01', 'D') - 6 + week1 = per1.asfreq('W') + week2 = per2.asfreq('W') + assert week1 != week2 + assert week1.asfreq('D', 'E') >= per1 + assert week2.asfreq('D', 'S') <= per2 + @pytest.mark.xfail(reason='GH#19643 period_helper asfreq functions fail ' 'to check for overflows') def test_to_timestamp_out_of_bounds(self): From d5c616774aa90f61c135989801cfb87d6dde3ae8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 23 Feb 2018 03:36:42 -0800 Subject: [PATCH 180/217] BUG: fix index op names and pinning (#19723) --- pandas/core/indexes/base.py | 267 +++++++++++++--------------- pandas/core/indexes/datetimelike.py | 26 ++- pandas/core/indexes/datetimes.py | 5 +- pandas/core/indexes/period.py | 16 +- pandas/core/indexes/range.py | 58 ++---- pandas/core/indexes/timedeltas.py | 12 +- pandas/tests/indexes/common.py | 5 +- pandas/tests/indexes/test_base.py | 22 ++- 8 files changed, 204 insertions(+), 207 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 59fe4bba649d3..c343126db0ea1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,11 +1,11 @@ -import datetime +from datetime import datetime, timedelta import warnings import operator import numpy as np from pandas._libs import (lib, index as libindex, tslib as libts, algos as libalgos, join as libjoin, - Timestamp, Timedelta) + Timedelta) from pandas._libs.lib import is_datetime_array from pandas.compat import range, u, set_function_name @@ -47,6 +47,7 @@ from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.common as com import pandas.core.base as base +from pandas.core import ops from pandas.util._decorators import ( Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList @@ -55,7 +56,7 @@ import pandas.core.algorithms as algos import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing -from pandas.core.ops import _comp_method_OBJECT_ARRAY, make_invalid_op +from pandas.core.ops import make_invalid_op from pandas.core.config import get_option from pandas.core.strings import StringMethods @@ -82,6 +83,74 @@ def _try_get_item(x): return x +def _make_comparison_op(op, cls): + def cmp_method(self, other): + if isinstance(other, (np.ndarray, Index, ABCSeries)): + if other.ndim > 0 and len(self) != len(other): + raise ValueError('Lengths must match to compare') + + # we may need to directly compare underlying + # representations + if needs_i8_conversion(self) and needs_i8_conversion(other): + return self._evaluate_compare(other, op) + + if is_object_dtype(self) and self.nlevels == 1: + # don't pass MultiIndex + with np.errstate(all='ignore'): + result = ops._comp_method_OBJECT_ARRAY(op, self.values, other) + else: + with np.errstate(all='ignore'): + result = op(self.values, np.asarray(other)) + + # technically we could support bool dtyped Index + # for now just return the indexing array directly + if is_bool_dtype(result): + return result + try: + return Index(result) + except TypeError: + return result + + name = '__{name}__'.format(name=op.__name__) + # TODO: docstring? + return set_function_name(cmp_method, name, cls) + + +def _make_arithmetic_op(op, cls): + def index_arithmetic_method(self, other): + if isinstance(other, (ABCSeries, ABCDataFrame)): + return NotImplemented + elif isinstance(other, ABCTimedeltaIndex): + # Defer to subclass implementation + return NotImplemented + + other = self._validate_for_numeric_binop(other, op) + + # handle time-based others + if isinstance(other, (ABCDateOffset, np.timedelta64, timedelta)): + return self._evaluate_with_timedelta_like(other, op) + elif isinstance(other, (datetime, np.datetime64)): + return self._evaluate_with_datetime_like(other, op) + + values = self.values + with np.errstate(all='ignore'): + result = op(values, other) + + result = missing.dispatch_missing(op, values, other, result) + + attrs = self._get_attributes_dict() + attrs = self._maybe_update_attributes(attrs) + if op is divmod: + result = (Index(result[0], **attrs), Index(result[1], **attrs)) + else: + result = Index(result, **attrs) + return result + + name = '__{name}__'.format(name=op.__name__) + # TODO: docstring? + return set_function_name(index_arithmetic_method, name, cls) + + class InvalidIndexError(Exception): pass @@ -2175,11 +2244,13 @@ def __add__(self, other): def __radd__(self, other): return Index(other + np.array(self)) - __iadd__ = __add__ + def __iadd__(self, other): + # alias for __add__ + return self + other def __sub__(self, other): raise TypeError("cannot perform __sub__ with this index type: " - "{typ}".format(typ=type(self))) + "{typ}".format(typ=type(self).__name__)) def __and__(self, other): return self.intersection(other) @@ -3917,13 +3988,11 @@ def dropna(self, how='any'): return self._shallow_copy(self.values[~self._isnan]) return self._shallow_copy() - def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): + def _evaluate_with_timedelta_like(self, other, op): # Timedelta knows how to operate with np.array, so dispatch to that # operation and then wrap the results other = Timedelta(other) values = self.values - if reversed: - values, other = other, values with np.errstate(all='ignore'): result = op(values, other) @@ -3934,7 +4003,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): return Index(result[0], **attrs), Index(result[1], **attrs) return Index(result, **attrs) - def _evaluate_with_datetime_like(self, other, op, opstr): + def _evaluate_with_datetime_like(self, other, op): raise TypeError("can only perform ops with datetime like values") def _evaluate_compare(self, other, op): @@ -3943,64 +4012,39 @@ def _evaluate_compare(self, other, op): @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ - - def _make_compare(op): - def _evaluate_compare(self, other): - if isinstance(other, (np.ndarray, Index, ABCSeries)): - if other.ndim > 0 and len(self) != len(other): - raise ValueError('Lengths must match to compare') - - # we may need to directly compare underlying - # representations - if needs_i8_conversion(self) and needs_i8_conversion(other): - return self._evaluate_compare(other, op) - - if (is_object_dtype(self) and - self.nlevels == 1): - - # don't pass MultiIndex - with np.errstate(all='ignore'): - result = _comp_method_OBJECT_ARRAY( - op, self.values, other) - else: - with np.errstate(all='ignore'): - result = op(self.values, np.asarray(other)) - - # technically we could support bool dtyped Index - # for now just return the indexing array directly - if is_bool_dtype(result): - return result - try: - return Index(result) - except TypeError: - return result - - name = '__{name}__'.format(name=op.__name__) - return set_function_name(_evaluate_compare, name, cls) - - cls.__eq__ = _make_compare(operator.eq) - cls.__ne__ = _make_compare(operator.ne) - cls.__lt__ = _make_compare(operator.lt) - cls.__gt__ = _make_compare(operator.gt) - cls.__le__ = _make_compare(operator.le) - cls.__ge__ = _make_compare(operator.ge) + cls.__eq__ = _make_comparison_op(operator.eq, cls) + cls.__ne__ = _make_comparison_op(operator.ne, cls) + cls.__lt__ = _make_comparison_op(operator.lt, cls) + cls.__gt__ = _make_comparison_op(operator.gt, cls) + cls.__le__ = _make_comparison_op(operator.le, cls) + cls.__ge__ = _make_comparison_op(operator.ge, cls) @classmethod def _add_numeric_methods_add_sub_disabled(cls): """ add in the numeric add/sub methods to disable """ - cls.__add__ = cls.__radd__ = __iadd__ = make_invalid_op('__add__') # noqa - cls.__sub__ = __isub__ = make_invalid_op('__sub__') # noqa + cls.__add__ = make_invalid_op('__add__') + cls.__radd__ = make_invalid_op('__radd__') + cls.__iadd__ = make_invalid_op('__iadd__') + cls.__sub__ = make_invalid_op('__sub__') + cls.__rsub__ = make_invalid_op('__rsub__') + cls.__isub__ = make_invalid_op('__isub__') @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable other than add/sub """ cls.__pow__ = make_invalid_op('__pow__') cls.__rpow__ = make_invalid_op('__rpow__') - cls.__mul__ = cls.__rmul__ = make_invalid_op('__mul__') - cls.__floordiv__ = cls.__rfloordiv__ = make_invalid_op('__floordiv__') - cls.__truediv__ = cls.__rtruediv__ = make_invalid_op('__truediv__') + cls.__mul__ = make_invalid_op('__mul__') + cls.__rmul__ = make_invalid_op('__rmul__') + cls.__floordiv__ = make_invalid_op('__floordiv__') + cls.__rfloordiv__ = make_invalid_op('__rfloordiv__') + cls.__truediv__ = make_invalid_op('__truediv__') + cls.__rtruediv__ = make_invalid_op('__rtruediv__') if not compat.PY3: - cls.__div__ = cls.__rdiv__ = make_invalid_op('__div__') + cls.__div__ = make_invalid_op('__div__') + cls.__rdiv__ = make_invalid_op('__rdiv__') + cls.__mod__ = make_invalid_op('__mod__') + cls.__divmod__ = make_invalid_op('__divmod__') cls.__neg__ = make_invalid_op('__neg__') cls.__pos__ = make_invalid_op('__pos__') cls.__abs__ = make_invalid_op('__abs__') @@ -4015,34 +4059,29 @@ def _validate_for_numeric_unaryop(self, op, opstr): if not self._is_numeric_dtype: raise TypeError("cannot evaluate a numeric op " - "{opstr} for type: {typ}".format( - opstr=opstr, - typ=type(self)) - ) + "{opstr} for type: {typ}" + .format(opstr=opstr, typ=type(self).__name__)) - def _validate_for_numeric_binop(self, other, op, opstr): + def _validate_for_numeric_binop(self, other, op): """ return valid other, evaluate or raise TypeError if we are not of the appropriate type internal method called by ops """ + opstr = '__{opname}__'.format(opname=op.__name__) # if we are an inheritor of numeric, # but not actually numeric (e.g. DatetimeIndex/PeriodIndex) if not self._is_numeric_dtype: raise TypeError("cannot evaluate a numeric op {opstr} " - "for type: {typ}".format( - opstr=opstr, - typ=type(self)) - ) + "for type: {typ}" + .format(opstr=opstr, typ=type(self).__name__)) if isinstance(other, Index): if not other._is_numeric_dtype: raise TypeError("cannot evaluate a numeric op " - "{opstr} with type: {typ}".format( - opstr=type(self), - typ=type(other)) - ) + "{opstr} with type: {typ}" + .format(opstr=opstr, typ=type(other))) elif isinstance(other, np.ndarray) and not other.ndim: other = other.item() @@ -4054,11 +4093,10 @@ def _validate_for_numeric_binop(self, other, op, opstr): if other.dtype.kind not in ['f', 'i', 'u']: raise TypeError("cannot evaluate a numeric op " "with a non-numeric dtype") - elif isinstance(other, (ABCDateOffset, np.timedelta64, - datetime.timedelta)): + elif isinstance(other, (ABCDateOffset, np.timedelta64, timedelta)): # higher up to handle pass - elif isinstance(other, (Timestamp, np.datetime64)): + elif isinstance(other, (datetime, np.datetime64)): # higher up to handle pass else: @@ -4070,73 +4108,24 @@ def _validate_for_numeric_binop(self, other, op, opstr): @classmethod def _add_numeric_methods_binary(cls): """ add in numeric methods """ - - def _make_evaluate_binop(op, opstr, reversed=False, constructor=Index): - def _evaluate_numeric_binop(self, other): - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - elif isinstance(other, ABCTimedeltaIndex): - # Defer to subclass implementation - return NotImplemented - - other = self._validate_for_numeric_binop(other, op, opstr) - - # handle time-based others - if isinstance(other, (ABCDateOffset, np.timedelta64, - datetime.timedelta)): - return self._evaluate_with_timedelta_like(other, op, opstr, - reversed) - elif isinstance(other, (Timestamp, np.datetime64)): - return self._evaluate_with_datetime_like(other, op, opstr) - - # if we are a reversed non-commutative op - values = self.values - if reversed: - values, other = other, values - - attrs = self._get_attributes_dict() - attrs = self._maybe_update_attributes(attrs) - with np.errstate(all='ignore'): - result = op(values, other) - - result = missing.dispatch_missing(op, values, other, result) - return constructor(result, **attrs) - - return _evaluate_numeric_binop - - cls.__add__ = cls.__radd__ = _make_evaluate_binop( - operator.add, '__add__') - cls.__sub__ = _make_evaluate_binop( - operator.sub, '__sub__') - cls.__rsub__ = _make_evaluate_binop( - operator.sub, '__sub__', reversed=True) - cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( - operator.mul, '__mul__') - cls.__rpow__ = _make_evaluate_binop( - operator.pow, '__pow__', reversed=True) - cls.__pow__ = _make_evaluate_binop( - operator.pow, '__pow__') - cls.__mod__ = _make_evaluate_binop( - operator.mod, '__mod__') - cls.__floordiv__ = _make_evaluate_binop( - operator.floordiv, '__floordiv__') - cls.__rfloordiv__ = _make_evaluate_binop( - operator.floordiv, '__floordiv__', reversed=True) - cls.__truediv__ = _make_evaluate_binop( - operator.truediv, '__truediv__') - cls.__rtruediv__ = _make_evaluate_binop( - operator.truediv, '__truediv__', reversed=True) + cls.__add__ = _make_arithmetic_op(operator.add, cls) + cls.__radd__ = _make_arithmetic_op(ops.radd, cls) + cls.__sub__ = _make_arithmetic_op(operator.sub, cls) + cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls) + cls.__mul__ = _make_arithmetic_op(operator.mul, cls) + cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls) + cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls) + cls.__pow__ = _make_arithmetic_op(operator.pow, cls) + cls.__mod__ = _make_arithmetic_op(operator.mod, cls) + cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls) + cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls) + cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls) + cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls) if not compat.PY3: - cls.__div__ = _make_evaluate_binop( - operator.div, '__div__') - cls.__rdiv__ = _make_evaluate_binop( - operator.div, '__div__', reversed=True) + cls.__div__ = _make_arithmetic_op(operator.div, cls) + cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls) - cls.__divmod__ = _make_evaluate_binop( - divmod, - '__divmod__', - constructor=lambda result, **attrs: (Index(result[0], **attrs), - Index(result[1], **attrs))) + cls.__divmod__ = _make_arithmetic_op(divmod, cls) @classmethod def _add_numeric_methods_unary(cls): @@ -4153,8 +4142,8 @@ def _evaluate_numeric_unary(self): return _evaluate_numeric_unary - cls.__neg__ = _make_evaluate_unary(lambda x: -x, '__neg__') - cls.__pos__ = _make_evaluate_unary(lambda x: x, '__pos__') + cls.__neg__ = _make_evaluate_unary(operator.neg, '__neg__') + cls.__pos__ = _make_evaluate_unary(operator.pos, '__pos__') cls.__abs__ = _make_evaluate_unary(np.abs, '__abs__') cls.__inv__ = _make_evaluate_unary(lambda x: -x, '__inv__') diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 187f9fcf52dd4..ac75e5ae5e2a0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -669,6 +669,7 @@ def __add__(self, other): result = self._add_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if hasattr(other, '_add_delta'): + # i.e. DatetimeIndex, TimedeltaIndex, or PeriodIndex result = other._add_delta(self) else: raise TypeError("cannot add TimedeltaIndex and {typ}" @@ -693,7 +694,11 @@ def __add__(self, other): return result cls.__add__ = __add__ - cls.__radd__ = __add__ + + def __radd__(self, other): + # alias for __add__ + return self.__add__(other) + cls.__radd__ = __radd__ def __sub__(self, other): from pandas.core.index import Index @@ -712,10 +717,10 @@ def __sub__(self, other): # Array/Index of DateOffset objects result = self._sub_offset_array(other) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): - if not isinstance(other, TimedeltaIndex): - raise TypeError("cannot subtract TimedeltaIndex and {typ}" - .format(typ=type(other).__name__)) - result = self._add_delta(-other) + # We checked above for timedelta64_dtype(other) so this + # must be invalid. + raise TypeError("cannot subtract TimedeltaIndex and {typ}" + .format(typ=type(other).__name__)) elif isinstance(other, DatetimeIndex): result = self._sub_datelike(other) elif is_integer(other): @@ -747,8 +752,15 @@ def __rsub__(self, other): return -(self - other) cls.__rsub__ = __rsub__ - cls.__iadd__ = __add__ - cls.__isub__ = __sub__ + def __iadd__(self, other): + # alias for __add__ + return self.__add__(other) + cls.__iadd__ = __iadd__ + + def __isub__(self, other): + # alias for __sub__ + return self.__sub__(other) + cls.__isub__ = __isub__ def _add_delta(self, other): return NotImplemented diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index debeabf9bae23..17f92339e4205 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -100,10 +100,11 @@ def f(self): return property(f) -def _dt_index_cmp(opname, cls, nat_result=False): +def _dt_index_cmp(opname, cls): """ Wrap comparison operations to convert datetime-like to datetime64 """ + nat_result = True if opname == '__ne__' else False def wrapper(self, other): func = getattr(super(DatetimeIndex, self), opname) @@ -291,7 +292,7 @@ def _join_i8_wrapper(joinf, **kwargs): def _add_comparison_methods(cls): """ add in comparison methods """ cls.__eq__ = _dt_index_cmp('__eq__', cls) - cls.__ne__ = _dt_index_cmp('__ne__', cls, nat_result=True) + cls.__ne__ = _dt_index_cmp('__ne__', cls) cls.__lt__ = _dt_index_cmp('__lt__', cls) cls.__gt__ = _dt_index_cmp('__gt__', cls) cls.__le__ = _dt_index_cmp('__le__', cls) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 88f9297652ebf..4c14cbffcd813 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -76,26 +76,25 @@ def dt64arr_to_periodarr(data, freq, tz): _DIFFERENT_FREQ_INDEX = period._DIFFERENT_FREQ_INDEX -def _period_index_cmp(opname, cls, nat_result=False): +def _period_index_cmp(opname, cls): """ - Wrap comparison operations to convert datetime-like to datetime64 + Wrap comparison operations to convert Period-like to PeriodDtype """ + nat_result = True if opname == '__ne__' else False def wrapper(self, other): + op = getattr(self._ndarray_values, opname) if isinstance(other, Period): - func = getattr(self._ndarray_values, opname) - other_base, _ = _gfc(other.freq) if other.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - result = func(other.ordinal) + result = op(other.ordinal) elif isinstance(other, PeriodIndex): if other.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - op = getattr(self._ndarray_values, opname) result = op(other._ndarray_values) mask = self._isnan | other._isnan @@ -108,8 +107,7 @@ def wrapper(self, other): result.fill(nat_result) else: other = Period(other, freq=self.freq) - func = getattr(self._ndarray_values, opname) - result = func(other.ordinal) + result = op(other.ordinal) if self.hasnans: result[self._isnan] = nat_result @@ -231,7 +229,7 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): def _add_comparison_methods(cls): """ add in comparison methods """ cls.__eq__ = _period_index_cmp('__eq__', cls) - cls.__ne__ = _period_index_cmp('__ne__', cls, nat_result=True) + cls.__ne__ = _period_index_cmp('__ne__', cls) cls.__lt__ = _period_index_cmp('__lt__', cls) cls.__gt__ = _period_index_cmp('__gt__', cls) cls.__le__ = _period_index_cmp('__le__', cls) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0ac415ee0b701..9d770cffb0059 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -16,6 +16,7 @@ from pandas.compat.numpy import function as nv import pandas.core.common as com +from pandas.core import ops from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat @@ -570,16 +571,12 @@ def __floordiv__(self, other): def _add_numeric_methods_binary(cls): """ add in numeric methods, specialized to RangeIndex """ - def _make_evaluate_binop(op, opstr, reversed=False, step=False): + def _make_evaluate_binop(op, step=False): """ Parameters ---------- op : callable that accepts 2 parms perform the binary op - opstr : string - string name of ops - reversed : boolean, default False - if this is a reversed op, e.g. radd step : callable, optional, default to False op to apply to the step parm if not None if False, use the existing step @@ -594,17 +591,13 @@ def _evaluate_numeric_binop(self, other): elif isinstance(other, (timedelta, np.timedelta64)): # GH#19333 is_integer evaluated True on timedelta64, # so we need to catch these explicitly - if reversed: - return op(other, self._int64index) return op(self._int64index, other) - other = self._validate_for_numeric_binop(other, op, opstr) + other = self._validate_for_numeric_binop(other, op) attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) left, right = self, other - if reversed: - left, right = right, left try: # apply if we have an override @@ -638,43 +631,26 @@ def _evaluate_numeric_binop(self, other): return result - except (ValueError, TypeError, AttributeError, - ZeroDivisionError): + except (ValueError, TypeError, ZeroDivisionError): # Defer to Int64Index implementation - if reversed: - return op(other, self._int64index) return op(self._int64index, other) + # TODO: Do attrs get handled reliably? return _evaluate_numeric_binop - cls.__add__ = cls.__radd__ = _make_evaluate_binop( - operator.add, '__add__') - cls.__sub__ = _make_evaluate_binop(operator.sub, '__sub__') - cls.__rsub__ = _make_evaluate_binop( - operator.sub, '__sub__', reversed=True) - cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( - operator.mul, - '__mul__', - step=operator.mul) - cls.__truediv__ = _make_evaluate_binop( - operator.truediv, - '__truediv__', - step=operator.truediv) - cls.__rtruediv__ = _make_evaluate_binop( - operator.truediv, - '__truediv__', - reversed=True, - step=operator.truediv) + cls.__add__ = _make_evaluate_binop(operator.add) + cls.__radd__ = _make_evaluate_binop(ops.radd) + cls.__sub__ = _make_evaluate_binop(operator.sub) + cls.__rsub__ = _make_evaluate_binop(ops.rsub) + cls.__mul__ = _make_evaluate_binop(operator.mul, step=operator.mul) + cls.__rmul__ = _make_evaluate_binop(ops.rmul, step=ops.rmul) + cls.__truediv__ = _make_evaluate_binop(operator.truediv, + step=operator.truediv) + cls.__rtruediv__ = _make_evaluate_binop(ops.rtruediv, + step=ops.rtruediv) if not compat.PY3: - cls.__div__ = _make_evaluate_binop( - operator.div, - '__div__', - step=operator.div) - cls.__rdiv__ = _make_evaluate_binop( - operator.div, - '__div__', - reversed=True, - step=operator.div) + cls.__div__ = _make_evaluate_binop(operator.div, step=operator.div) + cls.__rdiv__ = _make_evaluate_binop(ops.rdiv, step=ops.rdiv) RangeIndex._add_numeric_methods() diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 6b61db53d9a11..3542a24290f89 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -53,10 +53,11 @@ def f(self): return property(f) -def _td_index_cmp(opname, cls, nat_result=False): +def _td_index_cmp(opname, cls): """ Wrap comparison operations to convert timedelta-like to timedelta64 """ + nat_result = True if opname == '__ne__' else False def wrapper(self, other): msg = "cannot compare a TimedeltaIndex with type {0}" @@ -184,7 +185,7 @@ def _join_i8_wrapper(joinf, **kwargs): def _add_comparison_methods(cls): """ add in comparison methods """ cls.__eq__ = _td_index_cmp('__eq__', cls) - cls.__ne__ = _td_index_cmp('__ne__', cls, nat_result=True) + cls.__ne__ = _td_index_cmp('__ne__', cls) cls.__lt__ = _td_index_cmp('__lt__', cls) cls.__gt__ = _td_index_cmp('__gt__', cls) cls.__le__ = _td_index_cmp('__le__', cls) @@ -383,11 +384,12 @@ def _add_delta(self, delta): return TimedeltaIndex(new_values, freq='infer') - def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): + def _evaluate_with_timedelta_like(self, other, op): if isinstance(other, ABCSeries): # GH#19042 return NotImplemented + opstr = '__{opname}__'.format(opname=op.__name__).replace('__r', '__') # allow division by a timedelta if opstr in ['__div__', '__truediv__', '__floordiv__']: if _is_convertible_to_td(other): @@ -398,11 +400,9 @@ def _evaluate_with_timedelta_like(self, other, op, opstr, reversed=False): i8 = self.asi8 left, right = i8, other.value - if reversed: - left, right = right, left if opstr in ['__floordiv__']: - result = left // right + result = op(left, right) else: result = op(left, np.float64(right)) result = self._maybe_mask_results(result, convert='float64') diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1162662bf9a08..8f51dbabd5b71 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -127,16 +127,17 @@ def test_numeric_compat(self): idx = self.create_index() tm.assert_raises_regex(TypeError, "cannot perform __mul__", lambda: idx * 1) - tm.assert_raises_regex(TypeError, "cannot perform __mul__", + tm.assert_raises_regex(TypeError, "cannot perform __rmul__", lambda: 1 * idx) div_err = "cannot perform __truediv__" if PY3 \ else "cannot perform __div__" tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) + div_err = div_err.replace(' __', ' __r') tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", lambda: idx // 1) - tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", + tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", lambda: 1 // idx) def test_logical_compat(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 90edcb526bb2e..d7f185853ca45 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -7,6 +7,7 @@ from collections import defaultdict import pandas.util.testing as tm +from pandas.core.dtypes.generic import ABCIndex from pandas.core.dtypes.common import is_unsigned_integer_dtype from pandas.core.indexes.api import Index, MultiIndex from pandas.tests.indexes.common import Base @@ -1988,6 +1989,17 @@ def test_addsub_arithmetic(self, dtype, delta): tm.assert_index_equal(idx - idx, 0 * idx) assert not (idx - idx).empty + def test_iadd_preserves_name(self): + # GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name + ser = pd.Series([1, 2, 3]) + ser.index.name = 'foo' + + ser.index += 1 + assert ser.index.name == "foo" + + ser.index -= 1 + assert ser.index.name == "foo" + class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ @@ -2301,9 +2313,17 @@ def test_ensure_index_from_sequences(self, data, names, expected): tm.assert_index_equal(result, expected) -@pytest.mark.parametrize('opname', ['eq', 'ne', 'le', 'lt', 'ge', 'gt']) +@pytest.mark.parametrize('opname', ['eq', 'ne', 'le', 'lt', 'ge', 'gt', + 'add', 'radd', 'sub', 'rsub', + 'mul', 'rmul', 'truediv', 'rtruediv', + 'floordiv', 'rfloordiv', + 'pow', 'rpow', 'mod', 'divmod']) def test_generated_op_names(opname, indices): index = indices + if isinstance(index, ABCIndex) and opname == 'rsub': + # pd.Index.__rsub__ does not exist; though the method does exist + # for subclasses. see GH#19723 + return opname = '__{name}__'.format(name=opname) method = getattr(index, opname) assert method.__name__ == opname From 4242a0ee1b64ef53203cf40dca95f1d74d2fe5f7 Mon Sep 17 00:00:00 2001 From: Tommy <10076072+tommyod@users.noreply.github.com> Date: Fri, 23 Feb 2018 12:46:11 +0100 Subject: [PATCH 181/217] DOC: Spellcheck of gotchas.rst (FAQ page) (#19747) --- ci/lint.sh | 1 + doc/source/gotchas.rst | 114 +++++++++++++++++++++-------------------- 2 files changed, 60 insertions(+), 55 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index e3a39668885f0..fcd65fc5aba5e 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -156,6 +156,7 @@ if [ "$LINT" ]; then RET=1 fi echo "Check for deprecated messages without sphinx directive DONE" + else echo "NOT Linting" fi diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index bc490877e190d..b7042ef390018 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -22,22 +22,22 @@ Frequently Asked Questions (FAQ) DataFrame memory usage ---------------------- -The memory usage of a dataframe (including the index) -is shown when accessing the ``info`` method of a dataframe. A -configuration option, ``display.memory_usage`` (see :ref:`options`), -specifies if the dataframe's memory usage will be displayed when -invoking the ``df.info()`` method. +The memory usage of a ``DataFrame`` (including the index) is shown when calling +the :meth:`~DataFrame.info`. A configuration option, ``display.memory_usage`` +(see :ref:`the list of options `), specifies if the +``DataFrame``'s memory usage will be displayed when invoking the ``df.info()`` +method. -For example, the memory usage of the dataframe below is shown -when calling ``df.info()``: +For example, the memory usage of the ``DataFrame`` below is shown +when calling :meth:`~DataFrame.info`: .. ipython:: python dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]', 'complex128', 'object', 'bool'] n = 5000 - data = dict([ (t, np.random.randint(100, size=n).astype(t)) - for t in dtypes]) + data = dict([(t, np.random.randint(100, size=n).astype(t)) + for t in dtypes]) df = pd.DataFrame(data) df['categorical'] = df['object'].astype('category') @@ -48,7 +48,7 @@ pandas does not count the memory used by values in columns with ``dtype=object``. Passing ``memory_usage='deep'`` will enable a more accurate memory usage report, -that accounts for the full usage of the contained objects. This is optional +accounting for the full usage of the contained objects. This is optional as it can be expensive to do this deeper introspection. .. ipython:: python @@ -58,11 +58,11 @@ as it can be expensive to do this deeper introspection. By default the display option is set to ``True`` but can be explicitly overridden by passing the ``memory_usage`` argument when invoking ``df.info()``. -The memory usage of each column can be found by calling the ``memory_usage`` -method. This returns a Series with an index represented by column names -and memory usage of each column shown in bytes. For the dataframe above, -the memory usage of each column and the total memory usage of the -dataframe can be found with the memory_usage method: +The memory usage of each column can be found by calling the +:meth:`~DataFrame.memory_usage` method. This returns a ``Series`` with an index +represented by column names and memory usage of each column shown in bytes. For +the ``DataFrame`` above, the memory usage of each column and the total memory +usage can be found with the ``memory_usage`` method: .. ipython:: python @@ -71,18 +71,18 @@ dataframe can be found with the memory_usage method: # total memory usage of dataframe df.memory_usage().sum() -By default the memory usage of the dataframe's index is shown in the -returned Series, the memory usage of the index can be suppressed by passing +By default the memory usage of the ``DataFrame``'s index is shown in the +returned ``Series``, the memory usage of the index can be suppressed by passing the ``index=False`` argument: .. ipython:: python df.memory_usage(index=False) -The memory usage displayed by the ``info`` method utilizes the -``memory_usage`` method to determine the memory usage of a dataframe -while also formatting the output in human-readable units (base-2 -representation; i.e., 1KB = 1024 bytes). +The memory usage displayed by the :meth:`~DataFrame.info` method utilizes the +:meth:`~DataFrame.memory_usage` method to determine the memory usage of a +``DataFrame`` while also formatting the output in human-readable units (base-2 +representation; i.e. 1KB = 1024 bytes). See also :ref:`Categorical Memory Usage `. @@ -91,17 +91,18 @@ See also :ref:`Categorical Memory Usage `. Using If/Truth Statements with pandas ------------------------------------- -pandas follows the NumPy convention of raising an error when you try to convert something to a ``bool``. -This happens in a ``if`` or when using the boolean operations, ``and``, ``or``, or ``not``. It is not clear -what the result of +pandas follows the NumPy convention of raising an error when you try to convert +something to a ``bool``. This happens in an ``if``-statement or when using the +boolean operations: ``and``, ``or``, and ``not``. It is not clear what the result +of the following code should be: .. code-block:: python >>> if pd.Series([False, True, False]): ... -should be. Should it be ``True`` because it's not zero-length? ``False`` because there are ``False`` values? -It is unclear, so instead, pandas raises a ``ValueError``: +Should it be ``True`` because it's not zero-length, or ``False`` because there +are ``False`` values? It is unclear, so instead, pandas raises a ``ValueError``: .. code-block:: python @@ -111,9 +112,9 @@ It is unclear, so instead, pandas raises a ``ValueError``: ... ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). - -If you see that, you need to explicitly choose what you want to do with it (e.g., use `any()`, `all()` or `empty`). -or, you might want to compare if the pandas object is ``None`` +You need to explicitly choose what you want to do with the ``DataFrame``, e.g. +use :meth:`~DataFrame.any`, :meth:`~DataFrame.all` or :meth:`~DataFrame.empty`. +Alternatively, you might want to compare if the pandas object is ``None``: .. code-block:: python @@ -122,7 +123,7 @@ or, you might want to compare if the pandas object is ``None`` >>> I was not None -or return if ``any`` value is ``True``. +Below is how to check if any of the values are ``True``: .. code-block:: python @@ -130,7 +131,8 @@ or return if ``any`` value is ``True``. print("I am any") >>> I am any -To evaluate single-element pandas objects in a boolean context, use the method ``.bool()``: +To evaluate single-element pandas objects in a boolean context, use the method +:meth:`~DataFrame.bool`: .. ipython:: python @@ -161,25 +163,25 @@ See :ref:`boolean comparisons` for more examples. Using the ``in`` operator ~~~~~~~~~~~~~~~~~~~~~~~~~ -Using the Python ``in`` operator on a Series tests for membership in the +Using the Python ``in`` operator on a ``Series`` tests for membership in the index, not membership among the values. -.. ipython:: +.. ipython:: python s = pd.Series(range(5), index=list('abcde')) 2 in s 'b' in s If this behavior is surprising, keep in mind that using ``in`` on a Python -dictionary tests keys, not values, and Series are dict-like. -To test for membership in the values, use the method :func:`~pandas.Series.isin`: +dictionary tests keys, not values, and ``Series`` are dict-like. +To test for membership in the values, use the method :meth:`~pandas.Series.isin`: -.. ipython:: +.. ipython:: python s.isin([2]) s.isin([2]).any() -For DataFrames, likewise, ``in`` applies to the column axis, +For ``DataFrames``, likewise, ``in`` applies to the column axis, testing for membership in the list of column names. ``NaN``, Integer ``NA`` values and ``NA`` type promotions @@ -189,12 +191,12 @@ Choice of ``NA`` representation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For lack of ``NA`` (missing) support from the ground up in NumPy and Python in -general, we were given the difficult choice between either +general, we were given the difficult choice between either: - A *masked array* solution: an array of data and an array of boolean values - indicating whether a value is there or is missing + indicating whether a value is there or is missing. - Using a special sentinel value, bit pattern, or set of sentinel values to - denote ``NA`` across the dtypes + denote ``NA`` across the dtypes. For many reasons we chose the latter. After years of production use it has proven, at least in my opinion, to be the best decision given the state of @@ -226,15 +228,16 @@ arrays. For example: s2.dtype This trade-off is made largely for memory and performance reasons, and also so -that the resulting Series continues to be "numeric". One possibility is to use -``dtype=object`` arrays instead. +that the resulting ``Series`` continues to be "numeric". One possibility is to +use ``dtype=object`` arrays instead. ``NA`` type promotions ~~~~~~~~~~~~~~~~~~~~~~ -When introducing NAs into an existing Series or DataFrame via ``reindex`` or -some other means, boolean and integer types will be promoted to a different -dtype in order to store the NAs. These are summarized by this table: +When introducing NAs into an existing ``Series`` or ``DataFrame`` via +:meth:`~Series.reindex` or some other means, boolean and integer types will be +promoted to a different dtype in order to store the NAs. The promotions are +summarized in this table: .. csv-table:: :header: "Typeclass","Promotion dtype for storing NAs" @@ -289,19 +292,19 @@ integer arrays to floating when NAs must be introduced. Differences with NumPy ---------------------- -For Series and DataFrame objects, ``var`` normalizes by ``N-1`` to produce -unbiased estimates of the sample variance, while NumPy's ``var`` normalizes -by N, which measures the variance of the sample. Note that ``cov`` -normalizes by ``N-1`` in both pandas and NumPy. +For ``Series`` and ``DataFrame`` objects, :meth:`~DataFrame.var` normalizes by +``N-1`` to produce unbiased estimates of the sample variance, while NumPy's +``var`` normalizes by N, which measures the variance of the sample. Note that +:meth:`~DataFrame.cov` normalizes by ``N-1`` in both pandas and NumPy. Thread-safety ------------- As of pandas 0.11, pandas is not 100% thread safe. The known issues relate to -the ``DataFrame.copy`` method. If you are doing a lot of copying of DataFrame -objects shared among threads, we recommend holding locks inside the threads -where the data copying occurs. +the :meth:`~DataFrame.copy` method. If you are doing a lot of copying of +``DataFrame`` objects shared among threads, we recommend holding locks inside +the threads where the data copying occurs. See `this link `__ for more information. @@ -310,7 +313,8 @@ for more information. Byte-Ordering Issues -------------------- Occasionally you may have to deal with data that were created on a machine with -a different byte order than the one on which you are running Python. A common symptom of this issue is an error like +a different byte order than the one on which you are running Python. A common +symptom of this issue is an error like: .. code-block:: python @@ -320,8 +324,8 @@ a different byte order than the one on which you are running Python. A common sy To deal with this issue you should convert the underlying NumPy array to the native -system byte order *before* passing it to Series/DataFrame/Panel constructors -using something similar to the following: +system byte order *before* passing it to ``Series`` or ``DataFrame`` +constructors using something similar to the following: .. ipython:: python From 0468afed8389ce075f786cc2b9bb8b4f549c6221 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 23 Feb 2018 05:47:23 -0600 Subject: [PATCH 182/217] ENH: Allow storing ExtensionArrays in containers (#19520) * ENH: non-interval changes * COMPAT: py2 Super * BUG: Use original object for extension array * Consistent boxing / unboxing NumPy compat * 32-bit compat * Add a test array * linting * Default __iter__ * Tests for value_counts * Implement value_counts * Py2 compat * Fixed dropna * Test fixups * Started setitem * REF/Clean: Internal / External values * Move to index base * Setitem tests, decimal example * Compat * Fixed extension block tests. The only "API change" was that you can't just inherit from NonConsolidatableMixin, which is OK since 1. it's a mixin 2. geopandas also inherits from Block * Clarify binop tests Make it clearer which bit might raise * TST: Removed ops tests * Cleanup unique handling * Simplify object concat * Use values for intersection I think eventually we'll want to ndarray_values for this, but it'll require a bit more work to support. Currently, using ndarary_values causes occasional failures on categorical. * hmm * More failing tests * remove bad test * better setitem * Dropna works. * Restore xfail test * Test Categorical * Xfail setitem tests * TST: Skip JSON tests on py2 * Additional testing * More tests * ndarray_values * API: Default ExtensionArray.astype (cherry picked from commit 943a915562b72bed147c857de927afa0daf31c1a) (cherry picked from commit fbf0a0672380e210d3cb3c527fa8045a204d81be) * Simplify concat_as_object * Py2 compat (cherry picked from commit b20e12cae68dd86ff51597464045656763d369f7) * Set-ops ugliness * better docstrings * tolist * linting * Moved dtypes (cherry picked from commit d1362271bca8a7b183f3241e5c2f040c422118b8) * clean * cleanup * NumPy compat * Use base _values for CategoricalIndex * Update dev docs * cleanup * cleanup (cherry picked from commit 242562108b099b4e7a205541ee15b9272dcb5265) * cleanup * Linting * Precision in tests * Linting * Move to extension * Push _ndarray_values to ExtensionArray Now IndexOpsMixin._ndarray_values will dispatch all the way down to the EA. Subclasses like Categorical can override it as they see fit. * Clean up tolist * Move test locations * Fixed test * REF: Update per comments * lint * REF: Use _values for size and shape * PERF: Implement size, shape for IntervalIndex * PERF: Avoid materializing values for PeriodIndex shape, size * Cleanup * Override nbytes * Remove unused change * Docs * Test cleanpu * Always set PANDAS_TESTING_MODE * Revert "Always set PANDAS_TESTING_MODE" This reverts commit a312ba5c59c2e96854a286bde74d7fd4562afbf8. * Explicitly catch warnings or not * fastparquet warnings * Unicode literals strikes again. Only catch fp warning for newer numpy * Restore circle env var * More parquet test catching * No stacklevel * Lower bound on FP * Exact bound for FP * Don't use fastpath for ExtensionBlock make_block * Consistently use _values * TST: Additional constructor tests * CLN: de-nested a bit * _fill_value handling * Handle user provided dtype in constructors. When the dtype matches, we allow it to proceed. When the dtype would require coercion, we raise. * Document ExtensionBlock._maybe_coerce_values Also changes to use _values as we should * Created ABCExtensionArray * TST: Tests for is_object_dtype and is_string_dtype and EAs * fixup! Handle user provided dtype in constructors. * Doc for setitem * Split base tests * Revert test_parquet changes * API: Removed _fill_value from the interface * Push coercion to extension dtype till later * Linting * ERR: Better error message for coercion to 3rd party dtypes * CLN: Make take_nd EA aware * Revert sparse changes * Other _typ for ABCExtensionArray * Test cleanup and expansion. Tests for concating and aligning frames * Copy if copy * TST: remove self param for fixture * Remove unnescessary EA handling in Series ctor * API: Removed value_counts Moved setitem notes to comment * More doc notes * Handle expanding a DataFrame with an EA * Added ExtensionDtype.__eq__ Support for astype * linting * REF: is_dtype_equal refactor Moved from PandasExtensionDtype to ExtensionDtype with one modification: catch TypeError explicitly. * Remove reference to dtype being a class * move * Moved sparse check to take_nd * Docstring * Split tests * Revert index change * Copy changes * Simplify EA implementation names comments for object vs. str missing values * Linting --- pandas/core/algorithms.py | 26 ++- pandas/core/arrays/base.py | 90 +++++++--- pandas/core/dtypes/base.py | 57 +++++-- pandas/core/dtypes/common.py | 2 +- pandas/core/dtypes/dtypes.py | 25 --- pandas/core/dtypes/generic.py | 2 + pandas/core/dtypes/missing.py | 53 +++--- pandas/core/frame.py | 21 ++- pandas/core/indexes/base.py | 4 +- pandas/core/indexing.py | 3 + pandas/core/internals.py | 66 ++++++-- pandas/core/series.py | 54 ++++-- pandas/tests/categorical/test_missing.py | 3 +- pandas/tests/extension/base/__init__.py | 42 +++++ pandas/tests/extension/base/casting.py | 11 ++ pandas/tests/extension/base/constructors.py | 43 +++++ pandas/tests/extension/base/dtype.py | 46 ++++++ pandas/tests/extension/base/getitem.py | 119 ++++++++++++++ pandas/tests/extension/base/interface.py | 53 ++++++ pandas/tests/extension/base/methods.py | 32 ++++ pandas/tests/extension/base/missing.py | 45 +++++ pandas/tests/extension/base/reshaping.py | 61 +++++++ pandas/tests/extension/category/__init__.py | 0 .../extension/category/test_categorical.py | 84 ++++++++++ pandas/tests/extension/conftest.py | 48 ++++++ pandas/tests/extension/decimal/__init__.py | 0 pandas/tests/extension/decimal/array.py | 86 ++++++++++ .../tests/extension/decimal/test_decimal.py | 154 ++++++++++++++++++ pandas/tests/extension/json/__init__.py | 0 pandas/tests/extension/json/array.py | 99 +++++++++++ pandas/tests/extension/json/test_json.py | 73 +++++++++ .../test_external_block.py | 4 +- 32 files changed, 1276 insertions(+), 130 deletions(-) create mode 100644 pandas/tests/extension/base/__init__.py create mode 100644 pandas/tests/extension/base/casting.py create mode 100644 pandas/tests/extension/base/constructors.py create mode 100644 pandas/tests/extension/base/dtype.py create mode 100644 pandas/tests/extension/base/getitem.py create mode 100644 pandas/tests/extension/base/interface.py create mode 100644 pandas/tests/extension/base/methods.py create mode 100644 pandas/tests/extension/base/missing.py create mode 100644 pandas/tests/extension/base/reshaping.py create mode 100644 pandas/tests/extension/category/__init__.py create mode 100644 pandas/tests/extension/category/test_categorical.py create mode 100644 pandas/tests/extension/conftest.py create mode 100644 pandas/tests/extension/decimal/__init__.py create mode 100644 pandas/tests/extension/decimal/array.py create mode 100644 pandas/tests/extension/decimal/test_decimal.py create mode 100644 pandas/tests/extension/json/__init__.py create mode 100644 pandas/tests/extension/json/array.py create mode 100644 pandas/tests/extension/json/test_json.py rename pandas/tests/{internals => extension}/test_external_block.py (94%) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 624045a3d64bc..d616e3f92aa4d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -15,11 +15,12 @@ is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, is_complex_dtype, is_object_dtype, + is_extension_array_dtype, is_categorical_dtype, is_sparse, is_period_dtype, is_numeric_dtype, is_float_dtype, is_bool_dtype, needs_i8_conversion, - is_categorical, is_datetimetz, + is_datetimetz, is_datetime64_any_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_interval_dtype, is_scalar, is_list_like, @@ -547,7 +548,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, if is_categorical_dtype(values) or is_sparse(values): # handle Categorical and sparse, - result = Series(values).values.value_counts(dropna=dropna) + result = Series(values)._values.value_counts(dropna=dropna) result.name = name counts = result.values @@ -1292,10 +1293,13 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, """ Specialized Cython take which sets NaN values in one pass + This dispatches to ``take`` defined on ExtensionArrays. It does not + currently dispatch to ``SparseArray.take`` for sparse ``arr``. + Parameters ---------- - arr : ndarray - Input array + arr : array-like + Input array. indexer : ndarray 1-D array of indices to take, subarrays corresponding to -1 value indicies are filed with fill_value @@ -1315,17 +1319,25 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. + + Returns + ------- + subarray : array-like + May be the same type as the input, or cast to an ndarray. """ + # TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs # dispatch to internal type takes - if is_categorical(arr): - return arr.take_nd(indexer, fill_value=fill_value, - allow_fill=allow_fill) + if is_extension_array_dtype(arr): + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) elif is_datetimetz(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) elif is_interval_dtype(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + if is_sparse(arr): + arr = arr.get_values() + if indexer is None: indexer = np.arange(arr.shape[axis], dtype=np.int64) dtype, fill_value = arr.dtype, arr.dtype.type() diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e618dc6b69b2d..cec881394a021 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -25,14 +25,13 @@ class ExtensionArray(object): * isna * take * copy - * _formatting_values * _concat_same_type - Some additional methods are required to satisfy pandas' internal, private + Some additional methods are available to satisfy pandas' internal, private block API. - * _concat_same_type * _can_hold_na + * _formatting_values This class does not inherit from 'abc.ABCMeta' for performance reasons. Methods and properties required by the interface raise @@ -53,13 +52,14 @@ class ExtensionArray(object): Extension arrays should be able to be constructed with instances of the class, i.e. ``ExtensionArray(extension_array)`` should return an instance, not error. - - Additionally, certain methods and interfaces are required for proper - this array to be properly stored inside a ``DataFrame`` or ``Series``. """ + # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. + # Don't override this. + _typ = 'extension' # ------------------------------------------------------------------------ # Must be a Sequence # ------------------------------------------------------------------------ + def __getitem__(self, item): # type (Any) -> Any """Select a subset of self. @@ -92,7 +92,46 @@ def __getitem__(self, item): raise AbstractMethodError(self) def __setitem__(self, key, value): - # type: (Any, Any) -> None + # type: (Union[int, np.ndarray], Any) -> None + """Set one or more values inplace. + + This method is not required to satisfy the pandas extension array + interface. + + Parameters + ---------- + key : int, ndarray, or slice + When called from, e.g. ``Series.__setitem__``, ``key`` will be + one of + + * scalar int + * ndarray of integers. + * boolean ndarray + * slice object + + value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object + value or values to be set of ``key``. + + Returns + ------- + None + """ + # Some notes to the ExtensionArray implementor who may have ended up + # here. While this method is not required for the interface, if you + # *do* choose to implement __setitem__, then some semantics should be + # observed: + # + # * Setting multiple values : ExtensionArrays should support setting + # multiple values at once, 'key' will be a sequence of integers and + # 'value' will be a same-length sequence. + # + # * Broadcasting : For a sequence 'key' and a scalar 'value', + # each position in 'key' should be set to 'value'. + # + # * Coercion : Most users will expect basic coercion to work. For + # example, a string like '2018-01-01' is coerced to a datetime + # when setting on a datetime64ns array. In general, if the + # __init__ method coerces that value, then so should __setitem__ raise NotImplementedError(_not_implemented_message.format( type(self), '__setitem__') ) @@ -107,6 +146,16 @@ def __len__(self): # type: () -> int raise AbstractMethodError(self) + def __iter__(self): + """Iterate over elements of the array. + + """ + # This needs to be implemented so that pandas recognizes extension + # arrays as list-like. The default implementation makes successive + # calls to ``__getitem__``, which may be slower than necessary. + for i in range(len(self)): + yield self[i] + # ------------------------------------------------------------------------ # Required attributes # ------------------------------------------------------------------------ @@ -132,9 +181,9 @@ def nbytes(self): # type: () -> int """The number of bytes needed to store this object in memory. - If this is expensive to compute, return an approximate lower bound - on the number of bytes needed. """ + # If this is expensive to compute, return an approximate lower bound + # on the number of bytes needed. raise AbstractMethodError(self) # ------------------------------------------------------------------------ @@ -184,8 +233,8 @@ def take(self, indexer, allow_fill=True, fill_value=None): will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. fill_value : any, default None - Fill value to replace -1 values with. By default, this uses - the missing value sentinel for this type, ``self._fill_value``. + Fill value to replace -1 values with. If applicable, this should + use the sentinel missing value for this type. Notes ----- @@ -198,17 +247,20 @@ def take(self, indexer, allow_fill=True, fill_value=None): Examples -------- - Suppose the extension array somehow backed by a NumPy structured array - and that the underlying structured array is stored as ``self.data``. - Then ``take`` may be written as + Suppose the extension array is backed by a NumPy array stored as + ``self.data``. Then ``take`` may be written as .. code-block:: python def take(self, indexer, allow_fill=True, fill_value=None): mask = indexer == -1 result = self.data.take(indexer) - result[mask] = self._fill_value + result[mask] = np.nan # NA for this type return type(self)(result) + + See Also + -------- + numpy.take """ raise AbstractMethodError(self) @@ -230,17 +282,12 @@ def copy(self, deep=False): # ------------------------------------------------------------------------ # Block-related methods # ------------------------------------------------------------------------ - @property - def _fill_value(self): - # type: () -> Any - """The missing value for this type, e.g. np.nan""" - return None def _formatting_values(self): # type: () -> np.ndarray # At the moment, this has to be an array since we use result.dtype """An array of values to be printed in, e.g. the Series repr""" - raise AbstractMethodError(self) + return np.array(self) @classmethod def _concat_same_type(cls, to_concat): @@ -257,6 +304,7 @@ def _concat_same_type(cls, to_concat): """ raise AbstractMethodError(cls) + @property def _can_hold_na(self): # type: () -> bool """Whether your array can hold missing values. True by default. diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index c7c5378801f02..d54d980d02ffa 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -1,4 +1,7 @@ """Extend pandas with custom array types""" +import numpy as np + +from pandas import compat from pandas.errors import AbstractMethodError @@ -23,6 +26,32 @@ class ExtensionDtype(object): def __str__(self): return self.name + def __eq__(self, other): + """Check whether 'other' is equal to self. + + By default, 'other' is considered equal if + + * it's a string matching 'self.name'. + * it's an instance of this type. + + Parameters + ---------- + other : Any + + Returns + ------- + bool + """ + if isinstance(other, compat.string_types): + return other == self.name + elif isinstance(other, type(self)): + return True + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + @property def type(self): # type: () -> type @@ -102,11 +131,12 @@ def construct_from_string(cls, string): @classmethod def is_dtype(cls, dtype): - """Check if we match 'dtype' + """Check if we match 'dtype'. Parameters ---------- - dtype : str or dtype + dtype : object + The object to check. Returns ------- @@ -118,12 +148,19 @@ def is_dtype(cls, dtype): 1. ``cls.construct_from_string(dtype)`` is an instance of ``cls``. - 2. 'dtype' is ``cls`` or a subclass of ``cls``. + 2. ``dtype`` is an object and is an instance of ``cls`` + 3. ``dtype`` has a ``dtype`` attribute, and any of the above + conditions is true for ``dtype.dtype``. """ - if isinstance(dtype, str): - try: - return isinstance(cls.construct_from_string(dtype), cls) - except TypeError: - return False - else: - return issubclass(dtype, cls) + dtype = getattr(dtype, 'dtype', dtype) + + if isinstance(dtype, np.dtype): + return False + elif dtype is None: + return False + elif isinstance(dtype, cls): + return True + try: + return cls.construct_from_string(dtype) is not None + except TypeError: + return False diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index c2b71bc316fe8..197b35de88896 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1708,9 +1708,9 @@ def is_extension_array_dtype(arr_or_dtype): """ from pandas.core.arrays import ExtensionArray - # we want to unpack series, anything else? if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)): arr_or_dtype = arr_or_dtype._values + return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray)) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 99e4033f104db..d262a71933915 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -66,13 +66,6 @@ def __hash__(self): raise NotImplementedError("sub-classes should implement an __hash__ " "method") - def __eq__(self, other): - raise NotImplementedError("sub-classes should implement an __eq__ " - "method") - - def __ne__(self, other): - return not self.__eq__(other) - def __getstate__(self): # pickle support; we don't want to pickle the cache return {k: getattr(self, k, None) for k in self._metadata} @@ -82,24 +75,6 @@ def reset_cache(cls): """ clear the cache """ cls._cache = {} - @classmethod - def is_dtype(cls, dtype): - """ Return a boolean if the passed type is an actual dtype that - we can match (via string or type) - """ - if hasattr(dtype, 'dtype'): - dtype = dtype.dtype - if isinstance(dtype, np.dtype): - return False - elif dtype is None: - return False - elif isinstance(dtype, cls): - return True - try: - return cls.construct_from_string(dtype) is not None - except: - return False - class CategoricalDtypeType(type): """ diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index b032cb6f14d4c..cb54c94d29205 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -57,6 +57,8 @@ def _check(cls, inst): ABCDateOffset = create_pandas_abc_type("ABCDateOffset", "_typ", ("dateoffset",)) ABCInterval = create_pandas_abc_type("ABCInterval", "_typ", ("interval", )) +ABCExtensionArray = create_pandas_abc_type("ABCExtensionArray", "_typ", + ("extension", "categorical",)) class _ABCGeneric(type): diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index ffac702476af1..01c88c269e7e0 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -5,14 +5,16 @@ from pandas._libs import lib, missing as libmissing from pandas._libs.tslib import NaT, iNaT from .generic import (ABCMultiIndex, ABCSeries, - ABCIndexClass, ABCGeneric) + ABCIndexClass, ABCGeneric, + ABCExtensionArray) from .common import (is_string_dtype, is_datetimelike, is_datetimelike_v_numeric, is_float_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_interval_dtype, - is_complex_dtype, is_categorical_dtype, + is_complex_dtype, is_string_like_dtype, is_bool_dtype, is_integer_dtype, is_dtype_equal, + is_extension_array_dtype, needs_i8_conversion, _ensure_object, pandas_dtype, is_scalar, @@ -57,7 +59,8 @@ def _isna_new(obj): # hack (for now) because MI registers as ndarray elif isinstance(obj, ABCMultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") - elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): + elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, + ABCExtensionArray)): return _isna_ndarraylike(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isna(func=isna)) @@ -124,30 +127,31 @@ def _use_inf_as_na(key): def _isna_ndarraylike(obj): - values = getattr(obj, 'values', obj) dtype = values.dtype - if is_string_dtype(dtype): - if is_categorical_dtype(values): - from pandas import Categorical - if not isinstance(values, Categorical): - values = values.values - result = values.isna() - elif is_interval_dtype(values): - from pandas import IntervalIndex - result = IntervalIndex(obj).isna() + if is_extension_array_dtype(obj): + if isinstance(obj, (ABCIndexClass, ABCSeries)): + values = obj._values else: + values = obj + result = values.isna() + elif is_interval_dtype(values): + # TODO(IntervalArray): remove this if block + from pandas import IntervalIndex + result = IntervalIndex(obj).isna() + elif is_string_dtype(dtype): + # Working around NumPy ticket 1542 + shape = values.shape - # Working around NumPy ticket 1542 - shape = values.shape - - if is_string_like_dtype(dtype): - result = np.zeros(values.shape, dtype=bool) - else: - result = np.empty(shape, dtype=bool) - vec = libmissing.isnaobj(values.ravel()) - result[...] = vec.reshape(shape) + if is_string_like_dtype(dtype): + # object array of strings + result = np.zeros(values.shape, dtype=bool) + else: + # object array of non-strings + result = np.empty(shape, dtype=bool) + vec = libmissing.isnaobj(values.ravel()) + result[...] = vec.reshape(shape) elif needs_i8_conversion(obj): # this is the NaT pattern @@ -406,4 +410,7 @@ def remove_na_arraylike(arr): """ Return array-like containing only true/non-NaN values, possibly empty. """ - return arr[notna(lib.values_from_object(arr))] + if is_extension_array_dtype(arr): + return arr[notna(arr)] + else: + return arr[notna(lib.values_from_object(arr))] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2aae4dffbeaaf..1c5cf87d6b39b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -39,6 +39,7 @@ is_categorical_dtype, is_object_dtype, is_extension_type, + is_extension_array_dtype, is_datetimetz, is_datetime64_any_dtype, is_datetime64tz_dtype, @@ -71,7 +72,7 @@ create_block_manager_from_arrays, create_block_manager_from_blocks) from pandas.core.series import Series -from pandas.core.arrays import Categorical +from pandas.core.arrays import Categorical, ExtensionArray import pandas.core.algorithms as algorithms from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) @@ -511,7 +512,7 @@ def _get_axes(N, K, index=index, columns=columns): index, columns = _get_axes(len(values), 1) return _arrays_to_mgr([values], columns, index, columns, dtype=dtype) - elif is_datetimetz(values): + elif (is_datetimetz(values) or is_extension_array_dtype(values)): # GH19157 if columns is None: columns = [0] @@ -2837,7 +2838,7 @@ def reindexer(value): # now align rows value = reindexer(value).T - elif isinstance(value, Categorical): + elif isinstance(value, ExtensionArray): value = value.copy() elif isinstance(value, Index) or is_sequence(value): @@ -2867,7 +2868,7 @@ def reindexer(value): value = maybe_cast_to_datetime(value, value.dtype) # return internal types directly - if is_extension_type(value): + if is_extension_type(value) or is_extension_array_dtype(value): return value # broadcast across multiple columns if necessary @@ -3404,12 +3405,8 @@ class max type new_obj = self.copy() def _maybe_casted_values(index, labels=None): - if isinstance(index, PeriodIndex): - values = index.astype(object).values - elif isinstance(index, DatetimeIndex) and index.tz is not None: - values = index - else: - values = index.values + values = index._values + if not isinstance(index, (PeriodIndex, DatetimeIndex)): if values.dtype == np.object_: values = lib.maybe_convert_objects(values) @@ -5621,7 +5618,9 @@ def count(self, axis=0, level=None, numeric_only=False): if len(frame._get_axis(axis)) == 0: result = Series(0, index=frame._get_agg_axis(axis)) else: - if frame._is_mixed_type: + if frame._is_mixed_type or frame._data.any_extension_types: + # the or any_extension_types is really only hit for single- + # column frames with an extension array result = notna(frame).sum(axis=axis) else: counts = notna(frame.values).sum(axis=axis) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c343126db0ea1..0813c12d573d5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -13,6 +13,7 @@ from pandas import compat from pandas.core.accessor import CachedAccessor +from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, ABCMultiIndex, @@ -2051,6 +2052,7 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): if is_categorical_dtype(values.dtype): values = np.array(values) + elif is_object_dtype(values.dtype): values = lib.maybe_convert_objects(values, safe=1) @@ -2652,7 +2654,7 @@ def get_value(self, series, key): # if we have something that is Index-like, then # use this, e.g. DatetimeIndex s = getattr(series, '_values', None) - if isinstance(s, Index) and is_scalar(key): + if isinstance(s, (ExtensionArray, Index)) and is_scalar(key): try: return s[key] except (IndexError, ValueError): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index eb3aeda7902fc..2aa490cd02afb 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -618,6 +618,9 @@ def can_do_equal_len(): return if isinstance(value, (ABCSeries, dict)): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. value = self._align_series(indexer, Series(value)) elif isinstance(value, ABCDataFrame): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index dd5feefc49fe3..bad0626206e80 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -56,7 +56,11 @@ is_null_datelike_scalar) import pandas.core.dtypes.concat as _concat -from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex +from pandas.core.dtypes.generic import ( + ABCSeries, + ABCDatetimeIndex, + ABCExtensionArray, + ABCIndexClass) import pandas.core.common as com import pandas.core.algorithms as algos @@ -99,6 +103,7 @@ class Block(PandasObject): is_object = False is_categorical = False is_sparse = False + is_extension = False _box_to_block_values = True _can_hold_na = False _can_consolidate = True @@ -1854,11 +1859,40 @@ class ExtensionBlock(NonConsolidatableMixIn, Block): ExtensionArrays are limited to 1-D. """ + is_extension = True + + def __init__(self, values, placement, ndim=None): + values = self._maybe_coerce_values(values) + super(ExtensionBlock, self).__init__(values, placement, ndim) + + def _maybe_coerce_values(self, values): + """Unbox to an extension array. + + This will unbox an ExtensionArray stored in an Index or Series. + ExtensionArrays pass through. No dtype coercion is done. + + Parameters + ---------- + values : Index, Series, ExtensionArray + + Returns + ------- + ExtensionArray + """ + if isinstance(values, (ABCIndexClass, ABCSeries)): + values = values._values + return values + @property def _holder(self): # For extension blocks, the holder is values-dependent. return type(self.values) + @property + def _can_hold_na(self): + # The default ExtensionArray._can_hold_na is True + return self._holder._can_hold_na + @property def is_view(self): """Extension arrays are never treated as views.""" @@ -3451,6 +3485,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, else: align_keys = [] + # TODO(EA): may interfere with ExtensionBlock.setitem for blocks + # with a .values attribute. aligned_args = dict((k, kwargs[k]) for k in align_keys if hasattr(kwargs[k], 'values')) @@ -3696,6 +3732,11 @@ def is_datelike_mixed_type(self): self._consolidate_inplace() return any(block.is_datelike for block in self.blocks) + @property + def any_extension_types(self): + """Whether any of the blocks in this manager are extension blocks""" + return any(block.is_extension for block in self.blocks) + @property def is_view(self): """ return a boolean if we are a single block and are a view """ @@ -4101,7 +4142,10 @@ def set(self, item, value, check=False): # FIXME: refactor, clearly separate broadcasting & zip-like assignment # can prob also fix the various if tests for sparse/categorical - value_is_extension_type = is_extension_type(value) + # TODO(EA): Remove an is_extension_ when all extension types satisfy + # the interface + value_is_extension_type = (is_extension_type(value) or + is_extension_array_dtype(value)) # categorical/spares/datetimetz if value_is_extension_type: @@ -4833,15 +4877,11 @@ def form_blocks(arrays, names, axes): if len(items_dict['ExtensionBlock']): - external_blocks = [] - for i, _, array in items_dict['ExtensionBlock']: - if isinstance(array, ABCSeries): - array = array.values - # Allow our internal arrays to chose their block type. - block_type = getattr(array, '_block_type', ExtensionBlock) - external_blocks.append( - make_block(array, klass=block_type, - fastpath=True, placement=[i])) + external_blocks = [ + make_block(array, klass=ExtensionBlock, placement=[i]) + for i, _, array in items_dict['ExtensionBlock'] + ] + blocks.extend(external_blocks) if len(extra_locs): @@ -5162,7 +5202,7 @@ def _safe_reshape(arr, new_shape): """ if isinstance(arr, ABCSeries): arr = arr._values - if not isinstance(arr, Categorical): + if not isinstance(arr, ABCExtensionArray): arr = arr.reshape(new_shape) return arr @@ -5673,6 +5713,8 @@ def is_na(self): if not values._null_fill_value and values.sp_index.ngaps > 0: return False values_flat = values.ravel(order='K') + elif isinstance(self.block, ExtensionBlock): + values_flat = values else: values_flat = values.ravel(order='K') total_len = values_flat.shape[0] diff --git a/pandas/core/series.py b/pandas/core/series.py index 6fcd54ecc6118..b42e02bc99237 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -14,12 +14,14 @@ import numpy.ma as ma from pandas.core.accessor import CachedAccessor +from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.common import ( is_categorical_dtype, is_bool, is_integer, is_integer_dtype, is_float_dtype, is_extension_type, + is_extension_array_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_list_like, @@ -173,12 +175,17 @@ def __init__(self, data=None, index=None, dtype=None, name=None, raise NotImplementedError("initializing a Series from a " "MultiIndex is not supported") elif isinstance(data, Index): - # need to copy to avoid aliasing issues if name is None: name = data.name - data = data._to_embed(keep_tz=True, dtype=dtype) + if dtype is not None: + # astype copies + data = data.astype(dtype) + else: + # need to copy to avoid aliasing issues + data = data._values.copy() copy = False + elif isinstance(data, np.ndarray): pass elif isinstance(data, Series): @@ -203,13 +210,15 @@ def __init__(self, data=None, index=None, dtype=None, name=None, '`data` argument and a different ' '`index` argument. `copy` must ' 'be False.') - elif isinstance(data, Categorical): + + elif is_extension_array_dtype(data) and dtype is not None: # GH12574: Allow dtype=category only, otherwise error - if ((dtype is not None) and - not is_categorical_dtype(dtype)): - raise ValueError("cannot specify a dtype with a " - "Categorical unless " - "dtype='category'") + if not data.dtype.is_dtype(dtype): + raise ValueError("Cannot specify a dtype '{}' with an " + "extension array of a different " + "dtype ('{}').".format(dtype, + data.dtype)) + elif (isinstance(data, types.GeneratorType) or (compat.PY3 and isinstance(data, map))): data = list(data) @@ -2556,8 +2565,7 @@ def _reindex_indexer(self, new_index, indexer, copy): return self.copy() return self - # be subclass-friendly - new_values = algorithms.take_1d(self.get_values(), indexer) + new_values = algorithms.take_1d(self._values, indexer) return self._constructor(new_values, index=new_index) def _needs_reindex_multi(self, axes, method, level): @@ -3113,10 +3121,11 @@ def _sanitize_index(data, index, copy=False): if isinstance(data, ABCIndexClass) and not copy: pass - elif isinstance(data, PeriodIndex): - data = data.astype(object).values - elif isinstance(data, DatetimeIndex): - data = data._to_embed(keep_tz=True) + elif isinstance(data, (PeriodIndex, DatetimeIndex)): + data = data._values + if copy: + data = data.copy() + elif isinstance(data, np.ndarray): # coerce datetimelike types @@ -3156,8 +3165,17 @@ def _try_cast(arr, take_fast_path): subarr = np.array(subarr, dtype=dtype, copy=copy) except (ValueError, TypeError): if is_categorical_dtype(dtype): + # We *do* allow casting to categorical, since we know + # that Categorical is the only array type for 'category'. subarr = Categorical(arr, dtype.categories, ordered=dtype.ordered) + elif is_extension_array_dtype(dtype): + # We don't allow casting to third party dtypes, since we don't + # know what array belongs to which type. + msg = ("Cannot cast data to extension dtype '{}'. " + "Pass the extension array directly.".format(dtype)) + raise ValueError(msg) + elif dtype is not None and raise_cast_failure: raise else: @@ -3189,9 +3207,15 @@ def _try_cast(arr, take_fast_path): # we will try to copy be-definition here subarr = _try_cast(data, True) - elif isinstance(data, Categorical): + elif isinstance(data, ExtensionArray): subarr = data + if dtype is not None and not data.dtype.is_dtype(dtype): + msg = ("Cannot coerce extension array to dtype '{typ}'. " + "Do the coercion before passing to the constructor " + "instead.".format(typ=dtype)) + raise ValueError(msg) + if copy: subarr = data.copy() return subarr diff --git a/pandas/tests/categorical/test_missing.py b/pandas/tests/categorical/test_missing.py index fca5573547071..5133c97d8b590 100644 --- a/pandas/tests/categorical/test_missing.py +++ b/pandas/tests/categorical/test_missing.py @@ -1,10 +1,9 @@ # -*- coding: utf-8 -*- - import numpy as np import pytest import pandas.util.testing as tm -from pandas import (Categorical, Index, isna) +from pandas import Categorical, Index, isna from pandas.compat import lrange from pandas.core.dtypes.dtypes import CategoricalDtype diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py new file mode 100644 index 0000000000000..2273ef1f3e110 --- /dev/null +++ b/pandas/tests/extension/base/__init__.py @@ -0,0 +1,42 @@ +"""Base test suite for extension arrays. + +These tests are intended for third-party libraries to subclass to validate +that their extension arrays and dtypes satisfy the interface. Moving or +renaming the tests should not be done lightly. + +Libraries are expected to implement a few pytest fixtures to provide data +for the tests. The fixtures may be located in either + +* The same module as your test class. +* A ``conftest.py`` in the same directory as your test class. + +The full list of fixtures may be found in the ``conftest.py`` next to this +file. + +.. code-block:: python + + import pytest + from pandas.tests.extension.base import BaseDtypeTests + + + @pytest.fixture + def dtype(): + return MyDtype() + + + class TestMyDtype(BaseDtypeTests): + pass + + +Your class ``TestDtype`` will inherit all the tests defined on +``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype`` +wherever the test requires it. You're free to implement additional tests. +""" +from .casting import BaseCastingTests # noqa +from .constructors import BaseConstructorsTests # noqa +from .dtype import BaseDtypeTests # noqa +from .getitem import BaseGetitemTests # noqa +from .interface import BaseInterfaceTests # noqa +from .methods import BaseMethodsTests # noqa +from .missing import BaseMissingTests # noqa +from .reshaping import BaseReshapingTests # noqa diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py new file mode 100644 index 0000000000000..bcfbf0a247269 --- /dev/null +++ b/pandas/tests/extension/base/casting.py @@ -0,0 +1,11 @@ +import pandas as pd +from pandas.core.internals import ObjectBlock + + +class BaseCastingTests(object): + """Casting to and from ExtensionDtypes""" + + def test_astype_object_series(self, all_data): + ser = pd.Series({"A": all_data}) + result = ser.astype(object) + assert isinstance(result._data.blocks[0], ObjectBlock) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py new file mode 100644 index 0000000000000..7ad100e6289e9 --- /dev/null +++ b/pandas/tests/extension/base/constructors.py @@ -0,0 +1,43 @@ +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.core.internals import ExtensionBlock + + +class BaseConstructorsTests(object): + + def test_series_constructor(self, data): + result = pd.Series(data) + assert result.dtype == data.dtype + assert len(result) == len(data) + assert isinstance(result._data.blocks[0], ExtensionBlock) + assert result._data.blocks[0].values is data + + # Series[EA] is unboxed / boxed correctly + result2 = pd.Series(result) + assert result2.dtype == data.dtype + assert isinstance(result2._data.blocks[0], ExtensionBlock) + + @pytest.mark.parametrize("from_series", [True, False]) + def test_dataframe_constructor_from_dict(self, data, from_series): + if from_series: + data = pd.Series(data) + result = pd.DataFrame({"A": data}) + assert result.dtypes['A'] == data.dtype + assert result.shape == (len(data), 1) + assert isinstance(result._data.blocks[0], ExtensionBlock) + + def test_dataframe_from_series(self, data): + result = pd.DataFrame(pd.Series(data)) + assert result.dtypes[0] == data.dtype + assert result.shape == (len(data), 1) + assert isinstance(result._data.blocks[0], ExtensionBlock) + + @pytest.mark.xfail(reason="GH-19342") + def test_series_given_mismatched_index_raises(self, data): + msg = 'Wrong number of items passed 3, placement implies 4' + with tm.assert_raises_regex(ValueError, None) as m: + pd.Series(data[:3], index=[0, 1, 2, 3, 4]) + + assert m.match(msg) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py new file mode 100644 index 0000000000000..f5015bd469f13 --- /dev/null +++ b/pandas/tests/extension/base/dtype.py @@ -0,0 +1,46 @@ +import numpy as np +import pandas as pd + + +class BaseDtypeTests(object): + """Base class for ExtensionDtype classes""" + + def test_name(self, dtype): + assert isinstance(dtype.name, str) + + def test_kind(self, dtype): + valid = set('biufcmMOSUV') + if dtype.kind is not None: + assert dtype.kind in valid + + def test_construct_from_string_own_name(self, dtype): + result = dtype.construct_from_string(dtype.name) + assert type(result) is type(dtype) + + # check OK as classmethod + result = type(dtype).construct_from_string(dtype.name) + assert type(result) is type(dtype) + + def test_is_dtype_from_name(self, dtype): + result = type(dtype).is_dtype(dtype.name) + assert result is True + + def test_is_dtype_unboxes_dtype(self, data, dtype): + assert dtype.is_dtype(data) is True + + def test_is_dtype_from_self(self, dtype): + result = type(dtype).is_dtype(dtype) + assert result is True + + def test_is_not_string_type(self, dtype): + return not pd.api.types.is_string_dtype(dtype) + + def test_is_not_object_type(self, dtype): + return not pd.api.types.is_object_dtype(dtype) + + def test_eq_with_str(self, dtype): + assert dtype == dtype.name + assert dtype != dtype.name + '-suffix' + + def test_eq_with_numpy_object(self, dtype): + assert dtype != np.dtype('object') diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py new file mode 100644 index 0000000000000..f43971e928cac --- /dev/null +++ b/pandas/tests/extension/base/getitem.py @@ -0,0 +1,119 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + + +class BaseGetitemTests(object): + """Tests for ExtensionArray.__getitem__.""" + + def test_iloc_series(self, data): + ser = pd.Series(data) + result = ser.iloc[:4] + expected = pd.Series(data[:4]) + tm.assert_series_equal(result, expected) + + result = ser.iloc[[0, 1, 2, 3]] + tm.assert_series_equal(result, expected) + + def test_iloc_frame(self, data): + df = pd.DataFrame({"A": data, 'B': np.arange(len(data))}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.iloc[:4, [0]] + tm.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.iloc[[0, 1, 2, 3], [0]] + tm.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name='A') + + # slice -> series + result = df.iloc[:4, 0] + tm.assert_series_equal(result, expected) + + # sequence -> series + result = df.iloc[:4, 0] + tm.assert_series_equal(result, expected) + + def test_loc_series(self, data): + ser = pd.Series(data) + result = ser.loc[:3] + expected = pd.Series(data[:4]) + tm.assert_series_equal(result, expected) + + result = ser.loc[[0, 1, 2, 3]] + tm.assert_series_equal(result, expected) + + def test_loc_frame(self, data): + df = pd.DataFrame({"A": data, 'B': np.arange(len(data))}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.loc[:3, ['A']] + tm.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.loc[[0, 1, 2, 3], ['A']] + tm.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name='A') + + # slice -> series + result = df.loc[:3, 'A'] + tm.assert_series_equal(result, expected) + + # sequence -> series + result = df.loc[:3, 'A'] + tm.assert_series_equal(result, expected) + + def test_getitem_scalar(self, data): + result = data[0] + assert isinstance(result, data.dtype.type) + + result = pd.Series(data)[0] + assert isinstance(result, data.dtype.type) + + def test_getitem_scalar_na(self, data_missing, na_cmp, na_value): + result = data_missing[0] + assert na_cmp(result, na_value) + + def test_getitem_mask(self, data): + # Empty mask, raw array + mask = np.zeros(len(data), dtype=bool) + result = data[mask] + assert len(result) == 0 + assert isinstance(result, type(data)) + + # Empty mask, in series + mask = np.zeros(len(data), dtype=bool) + result = pd.Series(data)[mask] + assert len(result) == 0 + assert result.dtype == data.dtype + + # non-empty mask, raw array + mask[0] = True + result = data[mask] + assert len(result) == 1 + assert isinstance(result, type(data)) + + # non-empty mask, in series + result = pd.Series(data)[mask] + assert len(result) == 1 + assert result.dtype == data.dtype + + def test_getitem_slice(self, data): + # getitem[slice] should return an array + result = data[slice(0)] # empty + assert isinstance(result, type(data)) + + result = data[slice(1)] # scalar + assert isinstance(result, type(data)) + + def test_take_sequence(self, data): + result = pd.Series(data)[[0, 1, 3]] + assert result.iloc[0] == data[0] + assert result.iloc[1] == data[1] + assert result.iloc[2] == data[3] diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py new file mode 100644 index 0000000000000..8f17131a9482b --- /dev/null +++ b/pandas/tests/extension/base/interface.py @@ -0,0 +1,53 @@ +import numpy as np + +import pandas as pd +from pandas.compat import StringIO +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype + + +class BaseInterfaceTests(object): + """Tests that the basic interface is satisfied.""" + # ------------------------------------------------------------------------ + # Interface + # ------------------------------------------------------------------------ + + def test_len(self, data): + assert len(data) == 100 + + def test_ndim(self, data): + assert data.ndim == 1 + + def test_can_hold_na_valid(self, data): + assert data._can_hold_na in {True, False} + + def test_memory_usage(self, data): + s = pd.Series(data) + result = s.memory_usage(index=False) + assert result == s.nbytes + + def test_array_interface(self, data): + result = np.array(data) + assert result[0] == data[0] + + def test_as_ndarray_with_dtype_kind(self, data): + np.array(data, dtype=data.dtype.kind) + + def test_repr(self, data): + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + + df = pd.DataFrame({"A": data}) + repr(df) + + def test_dtype_name_in_info(self, data): + buf = StringIO() + pd.DataFrame({"A": data}).info(buf=buf) + result = buf.getvalue() + assert data.dtype.name in result + + def test_is_extension_array_dtype(self, data): + assert is_extension_array_dtype(data) + assert is_extension_array_dtype(data.dtype) + assert is_extension_array_dtype(pd.Series(data)) + assert isinstance(data.dtype, ExtensionDtype) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py new file mode 100644 index 0000000000000..c77811ca63926 --- /dev/null +++ b/pandas/tests/extension/base/methods.py @@ -0,0 +1,32 @@ +import pytest +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + + +class BaseMethodsTests(object): + """Various Series and DataFrame methods.""" + + @pytest.mark.parametrize('dropna', [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + tm.assert_series_equal(result, expected) + + def test_count(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + result = df.count(axis='columns') + expected = pd.Series([0, 1]) + tm.assert_series_equal(result, expected) + + def test_apply_simple_series(self, data): + result = pd.Series(data).apply(id) + assert isinstance(result, pd.Series) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py new file mode 100644 index 0000000000000..1d6f2eea1f1f9 --- /dev/null +++ b/pandas/tests/extension/base/missing.py @@ -0,0 +1,45 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + + +class BaseMissingTests(object): + def test_isna(self, data_missing): + if data_missing._can_hold_na: + expected = np.array([True, False]) + else: + expected = np.array([False, False]) + + result = pd.isna(data_missing) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(data_missing).isna() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + def test_dropna_series(self, data_missing): + ser = pd.Series(data_missing) + result = ser.dropna() + expected = ser.iloc[[1]] + tm.assert_series_equal(result, expected) + + def test_dropna_frame(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + + # defaults + result = df.dropna() + expected = df.iloc[[1]] + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.dropna(axis='columns') + expected = pd.DataFrame(index=[0, 1]) + tm.assert_frame_equal(result, expected) + + # multiple + df = pd.DataFrame({"A": data_missing, + "B": [1, np.nan]}) + result = df.dropna() + expected = df.iloc[:0] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py new file mode 100644 index 0000000000000..d8f577c6fa50d --- /dev/null +++ b/pandas/tests/extension/base/reshaping.py @@ -0,0 +1,61 @@ +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.core.internals import ExtensionBlock + + +class BaseReshapingTests(object): + """Tests for reshaping and concatenation.""" + @pytest.mark.parametrize('in_frame', [True, False]) + def test_concat(self, data, in_frame): + wrapped = pd.Series(data) + if in_frame: + wrapped = pd.DataFrame(wrapped) + result = pd.concat([wrapped, wrapped], ignore_index=True) + + assert len(result) == len(data) * 2 + + if in_frame: + dtype = result.dtypes[0] + else: + dtype = result.dtype + + assert dtype == data.dtype + assert isinstance(result._data.blocks[0], ExtensionBlock) + + def test_align(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.Series(type(data)(list(a) + [na_value])) + e2 = pd.Series(type(data)([na_value] + list(b))) + tm.assert_series_equal(r1, e1) + tm.assert_series_equal(r2, e2) + + def test_align_frame(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.DataFrame({'A': a}).align( + pd.DataFrame({'A': b}, index=[1, 2, 3]) + ) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])}) + e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))}) + tm.assert_frame_equal(r1, e1) + tm.assert_frame_equal(r2, e2) + + def test_set_frame_expand_regular_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + df['B'] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + tm.assert_frame_equal(df, expected) + + def test_set_frame_expand_extension_with_regular(self, data): + df = pd.DataFrame({'A': data}) + df['B'] = [1] * len(data) + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/extension/category/__init__.py b/pandas/tests/extension/category/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py new file mode 100644 index 0000000000000..ec548fca6d901 --- /dev/null +++ b/pandas/tests/extension/category/test_categorical.py @@ -0,0 +1,84 @@ +import string + +import pytest +import numpy as np + +from pandas.api.types import CategoricalDtype +from pandas import Categorical +from pandas.tests.extension import base + + +def make_data(): + return np.random.choice(list(string.ascii_letters), size=100) + + +@pytest.fixture +def dtype(): + return CategoricalDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + return Categorical(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return Categorical([np.nan, 'A']) + + +@pytest.fixture +def na_value(): + return np.nan + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + @pytest.mark.skip(reason="Memory usage doesn't match") + def test_memory_usage(self): + # Is this deliberate? + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + @pytest.mark.skip(reason="Unobserved categories preseved in concat.") + def test_align(self, data, na_value): + pass + + @pytest.mark.skip(reason="Unobserved categories preseved in concat.") + def test_align_frame(self, data, na_value): + pass + + +class TestGetitem(base.BaseGetitemTests): + @pytest.mark.skip(reason="Backwards compatability") + def test_getitem_scalar(self): + # CategoricalDtype.type isn't "correct" since it should + # be a parent of the elements (object). But don't want + # to break things by changing. + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + pass + + @pytest.mark.skip(reason="Unobserved categories included") + def test_value_counts(self, all_data, dropna): + pass + + +class TestCasting(base.BaseCastingTests): + pass diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py new file mode 100644 index 0000000000000..f86849b9cbd61 --- /dev/null +++ b/pandas/tests/extension/conftest.py @@ -0,0 +1,48 @@ +import operator + +import pytest + + +@pytest.fixture +def dtype(): + """A fixture providing the ExtensionDtype to validate.""" + raise NotImplementedError + + +@pytest.fixture +def data(): + """Length-100 array for this type.""" + raise NotImplementedError + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid]""" + raise NotImplementedError + + +@pytest.fixture(params=['data', 'data_missing']) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == 'data': + return data + elif request.param == 'data_missing': + return data_missing + + +@pytest.fixture +def na_cmp(): + """Binary operator for comparing NA values. + + Should return a function of two arguments that returns + True if both arguments are (scalar) NA for your type. + + By defult, uses ``operator.or`` + """ + return operator.is_ + + +@pytest.fixture +def na_value(): + """The scalar missing value for this type. Default 'None'""" + return None diff --git a/pandas/tests/extension/decimal/__init__.py b/pandas/tests/extension/decimal/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py new file mode 100644 index 0000000000000..f526ac5996a10 --- /dev/null +++ b/pandas/tests/extension/decimal/array.py @@ -0,0 +1,86 @@ +import decimal +import numbers +import random +import sys + +import numpy as np + +import pandas as pd +from pandas.core.arrays import ExtensionArray +from pandas.core.dtypes.base import ExtensionDtype + + +class DecimalDtype(ExtensionDtype): + type = decimal.Decimal + name = 'decimal' + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + +class DecimalArray(ExtensionArray): + dtype = DecimalDtype() + + def __init__(self, values): + values = np.asarray(values, dtype=object) + + self.values = values + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.values[item] + else: + return type(self)(self.values[item]) + + def copy(self, deep=False): + if deep: + return type(self)(self.values.copy()) + return type(self)(self) + + def __setitem__(self, key, value): + if pd.api.types.is_list_like(value): + value = [decimal.Decimal(v) for v in value] + else: + value = decimal.Decimal(value) + self.values[key] = value + + def __len__(self): + return len(self.values) + + def __repr__(self): + return repr(self.values) + + @property + def nbytes(self): + n = len(self) + if n: + return n * sys.getsizeof(self[0]) + return 0 + + def isna(self): + return np.array([x.is_nan() for x in self.values]) + + def take(self, indexer, allow_fill=True, fill_value=None): + mask = indexer == -1 + + out = self.values.take(indexer) + out[mask] = self._na_value + + return type(self)(out) + + @property + def _na_value(self): + return decimal.Decimal('NaN') + + @classmethod + def _concat_same_type(cls, to_concat): + return cls(np.concatenate([x.values for x in to_concat])) + + +def make_data(): + return [decimal.Decimal(random.random()) for _ in range(100)] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py new file mode 100644 index 0000000000000..7b4d079ecad87 --- /dev/null +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -0,0 +1,154 @@ +import decimal + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest + +from pandas.tests.extension import base + +from .array import DecimalDtype, DecimalArray, make_data + + +@pytest.fixture +def dtype(): + return DecimalDtype() + + +@pytest.fixture +def data(): + return DecimalArray(make_data()) + + +@pytest.fixture +def data_missing(): + return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)]) + + +@pytest.fixture +def na_cmp(): + return lambda x, y: x.is_nan() and y.is_nan() + + +@pytest.fixture +def na_value(): + return decimal.Decimal("NaN") + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + + def test_align(self, data, na_value): + # Have to override since assert_series_equal doesn't + # compare Decimal(NaN) properly. + a = data[:3] + b = data[2:5] + r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) + + # NaN handling + e1 = pd.Series(type(data)(list(a) + [na_value])) + e2 = pd.Series(type(data)([na_value] + list(b))) + tm.assert_series_equal(r1.iloc[:3], e1.iloc[:3]) + assert r1[3].is_nan() + assert e1[3].is_nan() + + tm.assert_series_equal(r2.iloc[1:], e2.iloc[1:]) + assert r2[0].is_nan() + assert e2[0].is_nan() + + def test_align_frame(self, data, na_value): + # Override for Decimal(NaN) comparison + a = data[:3] + b = data[2:5] + r1, r2 = pd.DataFrame({'A': a}).align( + pd.DataFrame({'A': b}, index=[1, 2, 3]) + ) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])}) + e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))}) + + tm.assert_frame_equal(r1.iloc[:3], e1.iloc[:3]) + assert r1.loc[3, 'A'].is_nan() + assert e1.loc[3, 'A'].is_nan() + + tm.assert_frame_equal(r2.iloc[1:], e2.iloc[1:]) + assert r2.loc[0, 'A'].is_nan() + assert e2.loc[0, 'A'].is_nan() + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize('dropna', [True, False]) + @pytest.mark.xfail(reason="value_counts not implemented yet.") + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + tm.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + pass + + +def test_series_constructor_coerce_data_to_extension_dtype_raises(): + xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the " + "extension array directly.") + with tm.assert_raises_regex(ValueError, xpr): + pd.Series([0, 1, 2], dtype=DecimalDtype()) + + +def test_series_constructor_with_same_dtype_ok(): + arr = DecimalArray([decimal.Decimal('10.0')]) + result = pd.Series(arr, dtype=DecimalDtype()) + expected = pd.Series(arr) + tm.assert_series_equal(result, expected) + + +def test_series_constructor_coerce_extension_array_to_dtype_raises(): + arr = DecimalArray([decimal.Decimal('10.0')]) + xpr = "Cannot specify a dtype 'int64' .* \('decimal'\)." + + with tm.assert_raises_regex(ValueError, xpr): + pd.Series(arr, dtype='int64') + + +def test_dataframe_constructor_with_same_dtype_ok(): + arr = DecimalArray([decimal.Decimal('10.0')]) + + result = pd.DataFrame({"A": arr}, dtype=DecimalDtype()) + expected = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, expected) + + +def test_dataframe_constructor_with_different_dtype_raises(): + arr = DecimalArray([decimal.Decimal('10.0')]) + + xpr = "Cannot coerce extension array to dtype 'int64'. " + with tm.assert_raises_regex(ValueError, xpr): + pd.DataFrame({"A": arr}, dtype='int64') diff --git a/pandas/tests/extension/json/__init__.py b/pandas/tests/extension/json/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py new file mode 100644 index 0000000000000..90aac93c68f64 --- /dev/null +++ b/pandas/tests/extension/json/array.py @@ -0,0 +1,99 @@ +import collections +import itertools +import numbers +import random +import string +import sys + +import numpy as np + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.arrays import ExtensionArray + + +class JSONDtype(ExtensionDtype): + type = collections.Mapping + name = 'json' + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + +class JSONArray(ExtensionArray): + dtype = JSONDtype() + + def __init__(self, values): + for val in values: + if not isinstance(val, self.dtype.type): + raise TypeError + self.data = values + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.data[item] + elif isinstance(item, np.ndarray) and item.dtype == 'bool': + return type(self)([x for x, m in zip(self, item) if m]) + else: + return type(self)(self.data[item]) + + def __setitem__(self, key, value): + if isinstance(key, numbers.Integral): + self.data[key] = value + else: + if not isinstance(value, (type(self), + collections.Sequence)): + # broadcast value + value = itertools.cycle([value]) + + if isinstance(key, np.ndarray) and key.dtype == 'bool': + # masking + for i, (k, v) in enumerate(zip(key, value)): + if k: + assert isinstance(v, self.dtype.type) + self.data[i] = v + else: + for k, v in zip(key, value): + assert isinstance(v, self.dtype.type) + self.data[k] = v + + def __len__(self): + return len(self.data) + + def __repr__(self): + return 'JSONArary({!r})'.format(self.data) + + @property + def nbytes(self): + return sys.getsizeof(self.data) + + def isna(self): + return np.array([x == self._na_value for x in self.data]) + + def take(self, indexer, allow_fill=True, fill_value=None): + output = [self.data[loc] if loc != -1 else self._na_value + for loc in indexer] + return type(self)(output) + + def copy(self, deep=False): + return type(self)(self.data[:]) + + @property + def _na_value(self): + return {} + + @classmethod + def _concat_same_type(cls, to_concat): + data = list(itertools.chain.from_iterable([x.data for x in to_concat])) + return cls(data) + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + return [collections.UserDict([ + (random.choice(string.ascii_letters), random.randint(0, 100)) + for _ in range(random.randint(0, 10))]) for _ in range(100)] diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py new file mode 100644 index 0000000000000..e0721bb1d8d1a --- /dev/null +++ b/pandas/tests/extension/json/test_json.py @@ -0,0 +1,73 @@ +import operator +import sys + +import pytest + + +from pandas.tests.extension import base + +from .array import JSONArray, JSONDtype, make_data + +pytestmark = pytest.mark.skipif(sys.version_info[0] == 2, + reason="Py2 doesn't have a UserDict") + + +@pytest.fixture +def dtype(): + return JSONDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + return JSONArray(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return JSONArray([{}, {'a': 10}]) + + +@pytest.fixture +def na_value(): + return {} + + +@pytest.fixture +def na_cmp(): + return operator.eq + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.skip(reason="Unhashable") + def test_value_counts(self, all_data, dropna): + pass + + +class TestCasting(base.BaseCastingTests): + pass diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/extension/test_external_block.py similarity index 94% rename from pandas/tests/internals/test_external_block.py rename to pandas/tests/extension/test_external_block.py index 2487363df8f99..991da41168aa0 100644 --- a/pandas/tests/internals/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -5,12 +5,12 @@ import pandas as pd from pandas.core.internals import ( - BlockManager, SingleBlockManager, ExtensionBlock) + BlockManager, SingleBlockManager, NonConsolidatableMixIn, Block) import pytest -class CustomBlock(ExtensionBlock): +class CustomBlock(NonConsolidatableMixIn, Block): _holder = np.ndarray From 5c41b2deef97a04d8031da2d9060d17b12c27be8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 23 Feb 2018 04:35:07 -0800 Subject: [PATCH 183/217] Separate TimedeltaIndex mul/div tests (#19848) --- .../tests/indexes/datetimes/test_datetime.py | 108 +--- .../tests/indexes/datetimes/test_indexing.py | 570 +++++++++++------- pandas/tests/indexes/period/test_indexing.py | 84 ++- pandas/tests/indexes/period/test_period.py | 37 +- .../indexes/timedeltas/test_arithmetic.py | 296 ++++----- .../tests/indexes/timedeltas/test_indexing.py | 229 +++---- 6 files changed, 677 insertions(+), 647 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 2cf33644377ab..b685584a29fb9 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -2,7 +2,7 @@ import pytest import numpy as np -from datetime import date, timedelta, time +from datetime import date import dateutil import pandas as pd @@ -18,112 +18,6 @@ class TestDatetimeIndex(object): - def test_get_loc(self): - idx = pd.date_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - if method is not None: - assert idx.get_loc(idx[1], method, - tolerance=pd.Timedelta('0 days')) == 1 - - assert idx.get_loc('2000-01-01', method='nearest') == 0 - assert idx.get_loc('2000-01-01T12', method='nearest') == 1 - - assert idx.get_loc('2000-01-01T12', method='nearest', - tolerance='1 day') == 1 - assert idx.get_loc('2000-01-01T12', method='nearest', - tolerance=pd.Timedelta('1D')) == 1 - assert idx.get_loc('2000-01-01T12', method='nearest', - tolerance=np.timedelta64(1, 'D')) == 1 - assert idx.get_loc('2000-01-01T12', method='nearest', - tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, - 'unit abbreviation w/o a number'): - idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') - with pytest.raises(KeyError): - idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') - with pytest.raises( - ValueError, - match='tolerance size must match target index size'): - idx.get_loc('2000-01-01', method='nearest', - tolerance=[pd.Timedelta('1day').to_timedelta64(), - pd.Timedelta('1day').to_timedelta64()]) - - assert idx.get_loc('2000', method='nearest') == slice(0, 3) - assert idx.get_loc('2000-01', method='nearest') == slice(0, 3) - - assert idx.get_loc('1999', method='nearest') == 0 - assert idx.get_loc('2001', method='nearest') == 2 - - with pytest.raises(KeyError): - idx.get_loc('1999', method='pad') - with pytest.raises(KeyError): - idx.get_loc('2001', method='backfill') - - with pytest.raises(KeyError): - idx.get_loc('foobar') - with pytest.raises(TypeError): - idx.get_loc(slice(2)) - - idx = pd.to_datetime(['2000-01-01', '2000-01-04']) - assert idx.get_loc('2000-01-02', method='nearest') == 0 - assert idx.get_loc('2000-01-03', method='nearest') == 1 - assert idx.get_loc('2000-01', method='nearest') == slice(0, 2) - - # time indexing - idx = pd.date_range('2000-01-01', periods=24, freq='H') - tm.assert_numpy_array_equal(idx.get_loc(time(12)), - np.array([12]), check_dtype=False) - tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), - np.array([]), check_dtype=False) - with pytest.raises(NotImplementedError): - idx.get_loc(time(12, 30), method='pad') - - def test_get_indexer(self): - idx = pd.date_range('2000-01-01', periods=3) - exp = np.array([0, 1, 2], dtype=np.intp) - tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) - - target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', - '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')), - np.array([0, -1, 1], dtype=np.intp)) - tol_raw = [pd.Timedelta('1 hour'), - pd.Timedelta('1 hour'), - pd.Timedelta('1 hour').to_timedelta64(), ] - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', - tolerance=[np.timedelta64(x) for x in tol_raw]), - np.array([0, -1, 1], dtype=np.intp)) - tol_bad = [pd.Timedelta('2 hour').to_timedelta64(), - pd.Timedelta('1 hour').to_timedelta64(), - 'foo', ] - with pytest.raises( - ValueError, match='abbreviation w/o a number'): - idx.get_indexer(target, 'nearest', tolerance=tol_bad) - with pytest.raises(ValueError): - idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') - - def test_reasonable_keyerror(self): - # GH #1062 - index = DatetimeIndex(['1/3/2000']) - try: - index.get_loc('1/1/2000') - except KeyError as e: - assert '2000' in str(e) - def test_roundtrip_pickle_with_tz(self): # GH 8367 diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index a9f1a5e608ac7..af65a8618d30f 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timedelta, time import pytest import pytz @@ -12,10 +12,93 @@ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) -class TestDatetimeIndex(object): +class TestGetItem(object): + def test_getitem(self): + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', + tz='Asia/Tokyo', name='idx') - def test_where_other(self): + for idx in [idx1, idx2]: + result = idx[0] + assert result == Timestamp('2011-01-01', tz=idx.tz) + + result = idx[0:5] + expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[0:10:2] + expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[-20:-5:3] + expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[4::-1] + expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', + '2011-01-02', '2011-01-01'], + freq='-1D', tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + def test_dti_business_getitem(self): + rng = pd.bdate_range(START, END) + smaller = rng[:5] + exp = DatetimeIndex(rng.view(np.ndarray)[:5]) + tm.assert_index_equal(smaller, exp) + + assert smaller.offset == rng.offset + + sliced = rng[::5] + assert sliced.offset == BDay() * 5 + fancy_indexed = rng[[4, 3, 2, 1, 0]] + assert len(fancy_indexed) == 5 + assert isinstance(fancy_indexed, DatetimeIndex) + assert fancy_indexed.freq is None + + # 32-bit vs. 64-bit platforms + assert rng[4] == rng[np.int_(4)] + + def test_dti_business_getitem_matplotlib_hackaround(self): + rng = pd.bdate_range(START, END) + values = rng[:, None] + expected = rng.values[:, None] + tm.assert_numpy_array_equal(values, expected) + + def test_dti_custom_getitem(self): + rng = pd.bdate_range(START, END, freq='C') + smaller = rng[:5] + exp = DatetimeIndex(rng.view(np.ndarray)[:5]) + tm.assert_index_equal(smaller, exp) + assert smaller.offset == rng.offset + + sliced = rng[::5] + assert sliced.offset == CDay() * 5 + + fancy_indexed = rng[[4, 3, 2, 1, 0]] + assert len(fancy_indexed) == 5 + assert isinstance(fancy_indexed, DatetimeIndex) + assert fancy_indexed.freq is None + + # 32-bit vs. 64-bit platforms + assert rng[4] == rng[np.int_(4)] + + def test_dti_custom_getitem_matplotlib_hackaround(self): + rng = pd.bdate_range(START, END, freq='C') + values = rng[:, None] + expected = rng.values[:, None] + tm.assert_numpy_array_equal(values, expected) + + +class TestWhere(object): + def test_where_other(self): # other is ndarray or Index i = pd.date_range('20130101', periods=3, tz='US/Eastern') @@ -46,6 +129,152 @@ def test_where_tz(self): expected = i2 tm.assert_index_equal(result, expected) + +class TestTake(object): + def test_take(self): + # GH#10295 + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', + tz='Asia/Tokyo', name='idx') + + for idx in [idx1, idx2]: + result = idx.take([0]) + assert result == Timestamp('2011-01-01', tz=idx.tz) + + result = idx.take([0, 1, 2]) + expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([7, 4, 1]) + expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([3, 2, 5]) + expected = DatetimeIndex(['2011-01-04', '2011-01-03', + '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq is None + + result = idx.take([-3, 2, 5]) + expected = DatetimeIndex(['2011-01-29', '2011-01-03', + '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_take_invalid_kwargs(self): + idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + # TODO: This method came from test_datetime; de-dup with version above + @pytest.mark.parametrize('tz', [None, 'US/Eastern', 'Asia/Tokyo']) + def test_take2(self, tz): + dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15), + datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)] + + idx = DatetimeIndex(start='2010-01-01 09:00', + end='2010-02-01 09:00', freq='H', tz=tz, + name='idx') + expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz) + + taken1 = idx.take([5, 6, 8, 12]) + taken2 = idx[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, DatetimeIndex) + assert taken.freq is None + assert taken.tz == expected.tz + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH#12631 + idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx') + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_fill_value_with_timezone(self): + idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +class TestDatetimeIndex(object): @pytest.mark.parametrize('null', [None, np.nan, pd.NaT]) @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern']) def test_insert_nat(self, tz, null): @@ -253,233 +482,108 @@ def test_delete_slice(self): assert result.freq == expected.freq assert result.tz == expected.tz - def test_getitem(self): - idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', - tz='Asia/Tokyo', name='idx') - - for idx in [idx1, idx2]: - result = idx[0] - assert result == Timestamp('2011-01-01', tz=idx.tz) - - result = idx[0:5] - expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', - tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq == expected.freq - - result = idx[0:10:2] - expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', - tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq == expected.freq - - result = idx[-20:-5:3] - expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', - tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq == expected.freq - - result = idx[4::-1] - expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', - '2011-01-02', '2011-01-01'], - freq='-1D', tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq == expected.freq - - def test_take(self): - # GH 10295 - idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', - tz='Asia/Tokyo', name='idx') - - for idx in [idx1, idx2]: - result = idx.take([0]) - assert result == Timestamp('2011-01-01', tz=idx.tz) - - result = idx.take([0, 1, 2]) - expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', - tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq == expected.freq - - result = idx.take([0, 2, 4]) - expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', - tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq == expected.freq - - result = idx.take([7, 4, 1]) - expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', - tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq == expected.freq - - result = idx.take([3, 2, 5]) - expected = DatetimeIndex(['2011-01-04', '2011-01-03', - '2011-01-06'], - freq=None, tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq is None - - result = idx.take([-3, 2, 5]) - expected = DatetimeIndex(['2011-01-29', '2011-01-03', - '2011-01-06'], - freq=None, tz=idx.tz, name='idx') - tm.assert_index_equal(result, expected) - assert result.freq is None - - def test_take_invalid_kwargs(self): - idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - indices = [1, 6, 5, 9, 10, 13, 15, 3] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') - - # TODO: This method came from test_datetime; de-dup with version above - @pytest.mark.parametrize('tz', [None, 'US/Eastern', 'Asia/Tokyo']) - def test_take2(self, tz): - dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15), - datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)] - - idx = DatetimeIndex(start='2010-01-01 09:00', - end='2010-02-01 09:00', freq='H', tz=tz, - name='idx') - expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz) - - taken1 = idx.take([5, 6, 8, 12]) - taken2 = idx[[5, 6, 8, 12]] - - for taken in [taken1, taken2]: - tm.assert_index_equal(taken, expected) - assert isinstance(taken, DatetimeIndex) - assert taken.freq is None - assert taken.tz == expected.tz - assert taken.name == expected.name - - def test_take_fill_value(self): - # GH 12631 - idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx') - result = idx.take(np.array([1, 0, -1])) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - def test_take_fill_value_with_timezone(self): - idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - result = idx.take(np.array([1, 0, -1])) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - -class TestBusinessDatetimeIndexIndexing(object): - def setup_method(self, method): - self.rng = pd.bdate_range(START, END) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - tm.assert_index_equal(smaller, exp) - - assert smaller.offset == self.rng.offset - - sliced = self.rng[::5] - assert sliced.offset == BDay() * 5 - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - assert len(fancy_indexed) == 5 - assert isinstance(fancy_indexed, DatetimeIndex) - assert fancy_indexed.freq is None - - # 32-bit vs. 64-bit platforms - assert self.rng[4] == self.rng[np.int_(4)] - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - tm.assert_numpy_array_equal(values, expected) - - -class TestCustomDatetimeIndexIndexing(object): - def setup_method(self, method): - self.rng = pd.bdate_range(START, END, freq='C') - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - tm.assert_index_equal(smaller, exp) - assert smaller.offset == self.rng.offset - - sliced = self.rng[::5] - assert sliced.offset == CDay() * 5 - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - assert len(fancy_indexed) == 5 - assert isinstance(fancy_indexed, DatetimeIndex) - assert fancy_indexed.freq is None - - # 32-bit vs. 64-bit platforms - assert self.rng[4] == self.rng[np.int_(4)] - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - tm.assert_numpy_array_equal(values, expected) + def test_get_loc(self): + idx = pd.date_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + if method is not None: + assert idx.get_loc(idx[1], method, + tolerance=pd.Timedelta('0 days')) == 1 + + assert idx.get_loc('2000-01-01', method='nearest') == 0 + assert idx.get_loc('2000-01-01T12', method='nearest') == 1 + + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance='1 day') == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=pd.Timedelta('1D')) == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=np.timedelta64(1, 'D')) == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=timedelta(1)) == 1 + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): + idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') + with pytest.raises(KeyError): + idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') + with pytest.raises( + ValueError, + match='tolerance size must match target index size'): + idx.get_loc('2000-01-01', method='nearest', + tolerance=[pd.Timedelta('1day').to_timedelta64(), + pd.Timedelta('1day').to_timedelta64()]) + + assert idx.get_loc('2000', method='nearest') == slice(0, 3) + assert idx.get_loc('2000-01', method='nearest') == slice(0, 3) + + assert idx.get_loc('1999', method='nearest') == 0 + assert idx.get_loc('2001', method='nearest') == 2 + + with pytest.raises(KeyError): + idx.get_loc('1999', method='pad') + with pytest.raises(KeyError): + idx.get_loc('2001', method='backfill') + + with pytest.raises(KeyError): + idx.get_loc('foobar') + with pytest.raises(TypeError): + idx.get_loc(slice(2)) + + idx = pd.to_datetime(['2000-01-01', '2000-01-04']) + assert idx.get_loc('2000-01-02', method='nearest') == 0 + assert idx.get_loc('2000-01-03', method='nearest') == 1 + assert idx.get_loc('2000-01', method='nearest') == slice(0, 2) + + # time indexing + idx = pd.date_range('2000-01-01', periods=24, freq='H') + tm.assert_numpy_array_equal(idx.get_loc(time(12)), + np.array([12]), check_dtype=False) + tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), + np.array([]), check_dtype=False) + with pytest.raises(NotImplementedError): + idx.get_loc(time(12, 30), method='pad') + + def test_get_indexer(self): + idx = pd.date_range('2000-01-01', periods=3) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) + + target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', + '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')), + np.array([0, -1, 1], dtype=np.intp)) + tol_raw = [pd.Timedelta('1 hour'), + pd.Timedelta('1 hour'), + pd.Timedelta('1 hour').to_timedelta64(), ] + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=[np.timedelta64(x) for x in tol_raw]), + np.array([0, -1, 1], dtype=np.intp)) + tol_bad = [pd.Timedelta('2 hour').to_timedelta64(), + pd.Timedelta('1 hour').to_timedelta64(), + 'foo', ] + with pytest.raises( + ValueError, match='abbreviation w/o a number'): + idx.get_indexer(target, 'nearest', tolerance=tol_bad) + with pytest.raises(ValueError): + idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') + + def test_reasonable_keyerror(self): + # GH#1062 + index = DatetimeIndex(['1/3/2000']) + try: + index.get_loc('1/1/2000') + except KeyError as e: + assert '2000' in str(e) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index b913934195260..6b8e2203e83fd 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -6,9 +6,9 @@ import pandas as pd from pandas.util import testing as tm from pandas.compat import lrange -from pandas._libs import tslib, tslibs +from pandas._libs import tslibs from pandas import (PeriodIndex, Series, DatetimeIndex, - period_range, Period) + period_range, Period, notna) from pandas._libs.tslibs import period as libperiod @@ -119,7 +119,7 @@ def test_getitem_datetime(self): def test_getitem_nat(self): idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') assert idx[0] == pd.Period('2011-01', freq='M') - assert idx[1] is tslib.NaT + assert idx[1] is pd.NaT s = pd.Series([0, 1, 2], index=idx) assert s[pd.NaT] == 1 @@ -127,7 +127,7 @@ def test_getitem_nat(self): s = pd.Series(idx, index=idx) assert (s[pd.Period('2011-01', freq='M')] == pd.Period('2011-01', freq='M')) - assert s[pd.NaT] is tslib.NaT + assert s[pd.NaT] is pd.NaT def test_getitem_list_periods(self): # GH 7710 @@ -190,31 +190,43 @@ def test_getitem_day(self): s[v] -class TestIndexing(object): +class TestWhere(object): + @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) + def test_where(self, klass): + i = period_range('20130101', periods=5, freq='D') + cond = [True] * len(i) + expected = i + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) - def test_get_loc_msg(self): - idx = period_range('2000-1-1', freq='A', periods=10) - bad_period = Period('2012', 'A') - pytest.raises(KeyError, idx.get_loc, bad_period) + cond = [False] + [True] * (len(i) - 1) + expected = PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D') + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) - try: - idx.get_loc(bad_period) - except KeyError as inst: - assert inst.args[0] == bad_period + def test_where_other(self): + i = period_range('20130101', periods=5, freq='D') + for arr in [np.nan, pd.NaT]: + result = i.where(notna(i), other=np.nan) + expected = i + tm.assert_index_equal(result, expected) - def test_get_loc_nat(self): - didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) - pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notna(i2), i2) + tm.assert_index_equal(result, i2) - # check DatetimeIndex compat - for idx in [didx, pidx]: - assert idx.get_loc(pd.NaT) == 1 - assert idx.get_loc(None) == 1 - assert idx.get_loc(float('nan')) == 1 - assert idx.get_loc(np.nan) == 1 + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notna(i2), i2.values) + tm.assert_index_equal(result, i2) + +class TestTake(object): def test_take(self): - # GH 10295 + # GH#10295 idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx') @@ -278,7 +290,7 @@ def test_take_misc(self): assert taken.name == expected.name def test_take_fill_value(self): - # GH 12631 + # GH#12631 idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], name='xxx', freq='D') result = idx.take(np.array([1, 0, -1])) @@ -309,6 +321,30 @@ def test_take_fill_value(self): with pytest.raises(IndexError): idx.take(np.array([1, -5])) + +class TestIndexing(object): + + def test_get_loc_msg(self): + idx = period_range('2000-1-1', freq='A', periods=10) + bad_period = Period('2012', 'A') + pytest.raises(KeyError, idx.get_loc, bad_period) + + try: + idx.get_loc(bad_period) + except KeyError as inst: + assert inst.args[0] == bad_period + + def test_get_loc_nat(self): + didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) + pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + + # check DatetimeIndex compat + for idx in [didx, pidx]: + assert idx.get_loc(pd.NaT) == 1 + assert idx.get_loc(None) == 1 + assert idx.get_loc(float('nan')) == 1 + assert idx.get_loc(np.nan) == 1 + def test_get_loc(self): # GH 17717 p0 = pd.Period('2017-09-01') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index dd437363cfc1d..4548d7fa1a468 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -5,7 +5,7 @@ import pandas as pd import pandas.util._test_decorators as td from pandas.util import testing as tm -from pandas import (PeriodIndex, period_range, notna, DatetimeIndex, NaT, +from pandas import (PeriodIndex, period_range, DatetimeIndex, NaT, Index, Period, Series, DataFrame, date_range, offsets) @@ -33,38 +33,9 @@ def test_pickle_round_trip(self, freq): result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx) - @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) - def test_where(self, klass): - i = self.create_index() - cond = [True] * len(i) - expected = i - result = i.where(klass(cond)) - tm.assert_index_equal(result, expected) - - cond = [False] + [True] * (len(i) - 1) - expected = PeriodIndex([NaT] + i[1:].tolist(), freq='D') - result = i.where(klass(cond)) - tm.assert_index_equal(result, expected) - - def test_where_other(self): - - i = self.create_index() - for arr in [np.nan, pd.NaT]: - result = i.where(notna(i), other=np.nan) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notna(i2), i2) - tm.assert_index_equal(result, i2) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notna(i2), i2.values) - tm.assert_index_equal(result, i2) + def test_where(self): + # This is handled in test_indexing + pass def test_repeat(self): # GH10183 diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 24341b3419859..282501860f7e5 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -123,8 +123,149 @@ def test_comparisons_nat(self): tm.assert_numpy_array_equal(result, expected) +class TestTimedeltaIndexMultiplicationDivision(object): + # __mul__, __rmul__, + # __div__, __rdiv__, __floordiv__, __rfloordiv__, + # __mod__, __rmod__, __divmod__, __rdivmod__ + + # ------------------------------------------------------------- + # Multiplication + # organized with scalar others first, then array-like + + def test_tdi_mul_int(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + result = idx * 1 + tm.assert_index_equal(result, idx) + + def test_tdi_rmul_int(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + result = 1 * idx + tm.assert_index_equal(result, idx) + + def test_tdi_mul_tdlike_scalar_raises(self, delta): + rng = timedelta_range('1 days', '10 days', name='foo') + with pytest.raises(TypeError): + rng * delta + + def test_tdi_mul_int_array_zerodim(self): + rng5 = np.arange(5, dtype='int64') + idx = TimedeltaIndex(rng5) + expected = TimedeltaIndex(rng5 * 5) + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, expected) + + def test_tdi_mul_int_array(self): + rng5 = np.arange(5, dtype='int64') + idx = TimedeltaIndex(rng5) + didx = TimedeltaIndex(rng5 ** 2) + + result = idx * rng5 + tm.assert_index_equal(result, didx) + + def test_tdi_mul_dti_raises(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + with pytest.raises(TypeError): + idx * idx + + def test_tdi_mul_too_short_raises(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + with pytest.raises(TypeError): + idx * TimedeltaIndex(np.arange(3)) + with pytest.raises(ValueError): + idx * np.array([1, 2]) + + def test_tdi_mul_int_series(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + didx = TimedeltaIndex(np.arange(5, dtype='int64') ** 2) + + result = idx * Series(np.arange(5, dtype='int64')) + + tm.assert_series_equal(result, Series(didx)) + + def test_tdi_mul_float_series(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + + rng5f = np.arange(5, dtype='float64') + result = idx * Series(rng5f + 0.1) + expected = Series(TimedeltaIndex(rng5f * (rng5f + 0.1))) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('other', [np.arange(1, 11), + pd.Int64Index(range(1, 11)), + pd.UInt64Index(range(1, 11)), + pd.Float64Index(range(1, 11)), + pd.RangeIndex(1, 11)]) + def test_tdi_rmul_arraylike(self, other): + tdi = TimedeltaIndex(['1 Day'] * 10) + expected = timedelta_range('1 days', '10 days') + + result = other * tdi + tm.assert_index_equal(result, expected) + commute = tdi * other + tm.assert_index_equal(commute, expected) + + # ------------------------------------------------------------- + # TimedeltaIndex.__div__ + + def test_tdi_div_int(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + result = idx / 1 + tm.assert_index_equal(result, idx) + + def test_tdi_div_tdlike_scalar(self, delta): + rng = timedelta_range('1 days', '10 days', name='foo') + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + + result = rng / delta + tm.assert_index_equal(result, expected, exact=False) + + def test_tdi_div_tdlike_scalar_with_nat(self, delta): + rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + expected = Float64Index([12, np.nan, 24], name='foo') + result = rng / delta + tm.assert_index_equal(result, expected) + + def test_tdi_div_nat_raises(self): + # don't allow division by NaT (make could in the future) + rng = timedelta_range('1 days', '10 days', name='foo') + with pytest.raises(TypeError): + rng / pd.NaT + + # ------------------------------------------------------------- + # TimedeltaIndex.__floordiv__ + + def test_tdi_floordiv_int(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + result = idx // 1 + tm.assert_index_equal(result, idx) + + def test_tdi_floordiv_tdlike_scalar(self, delta): + tdi = timedelta_range('1 days', '10 days', name='foo') + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + + result = tdi // delta + tm.assert_index_equal(result, expected, exact=False) + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=10, seconds=7), + Timedelta('10m7s'), + Timedelta('10m7s').to_timedelta64()]) + def test_tdi_floordiv_timedelta_scalar(self, scalar_td): + # GH#19125 + tdi = TimedeltaIndex(['00:05:03', '00:05:03', pd.NaT], freq=None) + expected = pd.Index([2.0, 2.0, np.nan]) + + res = tdi.__rfloordiv__(scalar_td) + tm.assert_index_equal(res, expected) + + expected = pd.Index([0.0, 0.0, np.nan]) + + res = tdi // (scalar_td) + tm.assert_index_equal(res, expected) + + class TestTimedeltaIndexArithmetic(object): - _holder = TimedeltaIndex + # Addition and Subtraction Operations # ------------------------------------------------------------- # Invalid Operations @@ -138,6 +279,20 @@ def test_tdi_add_str_invalid(self): with pytest.raises(TypeError): 'a' + tdi + @pytest.mark.parametrize('freq', [None, 'H']) + def test_tdi_sub_period(self, freq): + # GH#13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) + + with pytest.raises(TypeError): + idx - p + + with pytest.raises(TypeError): + p - idx + # ------------------------------------------------------------- # TimedeltaIndex.shift is used by __add__/__sub__ @@ -310,69 +465,6 @@ def test_tdi_add_sub_anchored_offset_arraylike(self, box): with tm.assert_produces_warning(PerformanceWarning): anchored - tdi - def test_mul_int(self): - idx = self._holder(np.arange(5, dtype='int64')) - result = idx * 1 - tm.assert_index_equal(result, idx) - - def test_rmul_int(self): - idx = self._holder(np.arange(5, dtype='int64')) - result = 1 * idx - tm.assert_index_equal(result, idx) - - def test_div_int(self): - idx = self._holder(np.arange(5, dtype='int64')) - result = idx / 1 - tm.assert_index_equal(result, idx) - - def test_floordiv_int(self): - idx = self._holder(np.arange(5, dtype='int64')) - result = idx // 1 - tm.assert_index_equal(result, idx) - - def test_mul_int_array_zerodim(self): - rng5 = np.arange(5, dtype='int64') - idx = self._holder(rng5) - expected = self._holder(rng5 * 5) - result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, expected) - - def test_mul_int_array(self): - rng5 = np.arange(5, dtype='int64') - idx = self._holder(rng5) - didx = self._holder(rng5 ** 2) - - result = idx * rng5 - tm.assert_index_equal(result, didx) - - def test_mul_int_series(self): - idx = self._holder(np.arange(5, dtype='int64')) - didx = self._holder(np.arange(5, dtype='int64') ** 2) - - result = idx * Series(np.arange(5, dtype='int64')) - - tm.assert_series_equal(result, Series(didx)) - - def test_mul_float_series(self): - idx = self._holder(np.arange(5, dtype='int64')) - - rng5f = np.arange(5, dtype='float64') - result = idx * Series(rng5f + 0.1) - expected = Series(self._holder(rng5f * (rng5f + 0.1))) - tm.assert_series_equal(result, expected) - - def test_dti_mul_dti_raises(self): - idx = self._holder(np.arange(5, dtype='int64')) - with pytest.raises(TypeError): - idx * idx - - def test_dti_mul_too_short_raises(self): - idx = self._holder(np.arange(5, dtype='int64')) - with pytest.raises(TypeError): - idx * self._holder(np.arange(3)) - with pytest.raises(ValueError): - idx * np.array([1, 2]) - def test_ufunc_coercions(self): # normal ops are also tested in tseries/test_timedeltas.py idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], @@ -496,68 +588,6 @@ def test_tdi_radd_timestamp(self): # ------------------------------------------------------------- - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=10, seconds=7), - Timedelta('10m7s'), - Timedelta('10m7s').to_timedelta64()]) - def test_tdi_floordiv_timedelta_scalar(self, scalar_td): - # GH#19125 - tdi = TimedeltaIndex(['00:05:03', '00:05:03', pd.NaT], freq=None) - expected = pd.Index([2.0, 2.0, np.nan]) - - res = tdi.__rfloordiv__(scalar_td) - tm.assert_index_equal(res, expected) - - expected = pd.Index([0.0, 0.0, np.nan]) - - res = tdi // (scalar_td) - tm.assert_index_equal(res, expected) - - def test_tdi_floordiv_tdlike_scalar(self, delta): - tdi = timedelta_range('1 days', '10 days', name='foo') - expected = Int64Index((np.arange(10) + 1) * 12, name='foo') - - result = tdi // delta - tm.assert_index_equal(result, expected, exact=False) - - def test_tdi_mul_tdlike_scalar_raises(self, delta): - rng = timedelta_range('1 days', '10 days', name='foo') - with pytest.raises(TypeError): - rng * delta - - def test_tdi_div_nat_raises(self): - # don't allow division by NaT (make could in the future) - rng = timedelta_range('1 days', '10 days', name='foo') - with pytest.raises(TypeError): - rng / pd.NaT - - def test_tdi_div_tdlike_scalar(self, delta): - rng = timedelta_range('1 days', '10 days', name='foo') - expected = Int64Index((np.arange(10) + 1) * 12, name='foo') - - result = rng / delta - tm.assert_index_equal(result, expected, exact=False) - - def test_tdi_div_tdlike_scalar_with_nat(self, delta): - rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - expected = Float64Index([12, np.nan, 24], name='foo') - result = rng / delta - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize('other', [np.arange(1, 11), - pd.Int64Index(range(1, 11)), - pd.UInt64Index(range(1, 11)), - pd.Float64Index(range(1, 11)), - pd.RangeIndex(1, 11)]) - def test_tdi_rmul_arraylike(self, other): - tdi = TimedeltaIndex(['1 Day'] * 10) - expected = timedelta_range('1 days', '10 days') - - result = other * tdi - tm.assert_index_equal(result, expected) - commute = tdi * other - tm.assert_index_equal(commute, expected) - def test_subtraction_ops(self): # with datetimes/timedelta and tdi/dti tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') @@ -685,20 +715,6 @@ def test_dti_tdi_numeric_ops(self): expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize('freq', [None, 'H']) - def test_sub_period(self, freq): - # GH 13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) - - with pytest.raises(TypeError): - idx - p - - with pytest.raises(TypeError): - p - idx - def test_addition_ops(self): # with datetimes/timedelta and tdi/dti tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 59e38c2e738b0..08992188265bd 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -8,116 +8,7 @@ from pandas import TimedeltaIndex, timedelta_range, compat, Index, Timedelta -class TestTimedeltaIndex(object): - - def test_insert(self): - - idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') - - result = idx.insert(2, timedelta(days=5)) - exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') - tm.assert_index_equal(result, exp) - - # insertion of non-datetime should coerce to object index - result = idx.insert(1, 'inserted') - expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), - Timedelta('2day')], name='idx') - assert not isinstance(result, TimedeltaIndex) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - - idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') - - # preserve freq - expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', - '1day 00:00:03'], - name='idx', freq='s') - expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', - '1day 00:00:03', '1day 00:00:04'], - name='idx', freq='s') - - # reset freq to None - expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', - '1day 00:00:02', '1day 00:00:03'], - name='idx', freq=None) - expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', - '1day 00:00:03', '1day 00:00:05'], - name='idx', freq=None) - - cases = [(0, Timedelta('1day'), expected_0), - (-3, Timedelta('1day'), expected_0), - (3, Timedelta('1day 00:00:04'), expected_3), - (1, Timedelta('1day 00:00:01'), expected_1_nofreq), - (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] - - for n, d, expected in cases: - result = idx.insert(n, d) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert result.freq == expected.freq - - # GH 18295 (test missing) - expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day']) - for na in (np.nan, pd.NaT, None): - result = timedelta_range('1day', '3day').insert(1, na) - tm.assert_index_equal(result, expected) - - def test_delete(self): - idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') - - # prserve freq - expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', - name='idx') - expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', - name='idx') - - # reset freq to None - expected_1 = TimedeltaIndex( - ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') - - cases = {0: expected_0, - -5: expected_0, - -1: expected_4, - 4: expected_4, - 1: expected_1} - for n, expected in compat.iteritems(cases): - result = idx.delete(n) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert result.freq == expected.freq - - with pytest.raises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) - - def test_delete_slice(self): - idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') - - # prserve freq - expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', - name='idx') - expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', - name='idx') - - # reset freq to None - expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', - '7 d', '8 d', '9 d', '10d'], - freq=None, name='idx') - - cases = {(0, 1, 2): expected_0_2, - (7, 8, 9): expected_7_9, - (3, 4, 5): expected_3_5} - for n, expected in compat.iteritems(cases): - result = idx.delete(n) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert result.freq == expected.freq - - result = idx.delete(slice(n[0], n[-1] + 1)) - tm.assert_index_equal(result, expected) - assert result.name == expected.name - assert result.freq == expected.freq - +class TestGetItem(object): def test_getitem(self): idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx') @@ -150,6 +41,13 @@ def test_getitem(self): tm.assert_index_equal(result, expected) assert result.freq == expected.freq + +class TestWhere(object): + # placeholder for symmetry with DatetimeIndex and PeriodIndex tests + pass + + +class TestTake(object): def test_take(self): # GH 10295 idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx') @@ -252,6 +150,117 @@ def test_take_fill_value(self): with pytest.raises(IndexError): idx.take(np.array([1, -5])) + +class TestTimedeltaIndex(object): + + def test_insert(self): + + idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, 'inserted') + expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), + Timedelta('2day')], name='idx') + assert not isinstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') + + # preserve freq + expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', + '1day 00:00:03'], + name='idx', freq='s') + expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', + '1day 00:00:03', '1day 00:00:04'], + name='idx', freq='s') + + # reset freq to None + expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', + '1day 00:00:02', '1day 00:00:03'], + name='idx', freq=None) + expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', + '1day 00:00:03', '1day 00:00:05'], + name='idx', freq=None) + + cases = [(0, Timedelta('1day'), expected_0), + (-3, Timedelta('1day'), expected_0), + (3, Timedelta('1day 00:00:04'), expected_3), + (1, Timedelta('1day 00:00:01'), expected_1_nofreq), + (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + # GH 18295 (test missing) + expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day']) + for na in (np.nan, pd.NaT, None): + result = timedelta_range('1day', '3day').insert(1, na) + tm.assert_index_equal(result, expected) + + def test_delete(self): + idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') + + # prserve freq + expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', + name='idx') + expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', + name='idx') + + # reset freq to None + expected_1 = TimedeltaIndex( + ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') + + cases = {0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + with pytest.raises((IndexError, ValueError)): + # either depeidnig on numpy version + result = idx.delete(5) + + def test_delete_slice(self): + idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') + + # prserve freq + expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', + name='idx') + expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', + name='idx') + + # reset freq to None + expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', + '7 d', '8 d', '9 d', '10d'], + freq=None, name='idx') + + cases = {(0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + def test_get_loc(self): idx = pd.to_timedelta(['0 days', '1 days', '2 days']) From c5631bb2edfa3e00fec18f2504d2aed06d675870 Mon Sep 17 00:00:00 2001 From: luzpaz Date: Sat, 24 Feb 2018 08:26:05 -0500 Subject: [PATCH 184/217] DOC: misc. typos (#19876) Found via `codespell -q 3 -I ../pandas-whitelist.txt` Where whitelists consists of: ``` ans behaviour doubleclick indicies initialise initialised initialising nd resetted splitted thru valu ``` --- doc/source/basics.rst | 2 +- doc/source/dsintro.rst | 2 +- doc/source/whatsnew/v0.14.1.txt | 2 +- pandas/_libs/groupby_helper.pxi.in | 2 +- pandas/core/arrays/categorical.py | 2 +- pandas/core/internals.py | 8 ++++---- pandas/plotting/_converter.py | 2 +- pandas/tests/extension/category/test_categorical.py | 2 +- pandas/tests/extension/conftest.py | 2 +- pandas/tests/frame/test_mutate_columns.py | 2 +- pandas/tests/frame/test_repr_info.py | 2 +- pandas/tests/io/test_excel.py | 2 +- pandas/tests/io/test_stata.py | 2 +- pandas/tests/reshape/test_concat.py | 4 ++-- pandas/tests/sparse/frame/test_frame.py | 6 +++--- 15 files changed, 21 insertions(+), 21 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 749d4be11ad45..e1b36a6acad70 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -2312,4 +2312,4 @@ All NumPy dtypes are subclasses of ``numpy.generic``: .. note:: Pandas also defines the types ``category``, and ``datetime64[ns, tz]``, which are not integrated into the normal - NumPy hierarchy and wont show up with the above function. + NumPy hierarchy and won't show up with the above function. diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 582750b16f40d..e8f73a9ec2e8a 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -539,7 +539,7 @@ To write code compatible with all versions of Python, split the assignment in tw you'll need to take care when passing ``assign`` expressions that * Updating an existing column - * Refering to the newly updated column in the same ``assign`` + * Referring to the newly updated column in the same ``assign`` For example, we'll update column "A" and then refer to it when creating "B". diff --git a/doc/source/whatsnew/v0.14.1.txt b/doc/source/whatsnew/v0.14.1.txt index d8a6dc1793612..4674cbc846722 100644 --- a/doc/source/whatsnew/v0.14.1.txt +++ b/doc/source/whatsnew/v0.14.1.txt @@ -145,7 +145,7 @@ Performance ~~~~~~~~~~~ - Improvements in dtype inference for numeric operations involving yielding performance gains for dtypes: ``int64``, ``timedelta64``, ``datetime64`` (:issue:`7223`) - Improvements in Series.transform for significant performance gains (:issue:`6496`) -- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for signifcant performance gains (:issue:`7383`) +- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for significant performance gains (:issue:`7383`) - Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`) - Improvements in `MultiIndex.from_product` for large iterables (:issue:`7627`) diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 93fbb4477e2d0..e03e3af65755b 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -426,7 +426,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, labels : array containing unique label for each group, with its ordering matching up to the corresponding record in `values` is_datetimelike : bool - unused in this method but provided for call compatability with other + unused in this method but provided for call compatibility with other Cython transformations ties_method : {'keep', 'top', 'bottom'} * keep: leave NA values where they are diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 493b2e5bd899b..c6eeabf0148d0 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -521,7 +521,7 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes, cats = to_timedelta(inferred_categories, errors='coerce') if known_categories: - # recode from observation oder to dtype.categories order + # recode from observation order to dtype.categories order categories = dtype.categories codes = _recode_for_categories(inferred_codes, cats, categories) elif not cats.is_monotonic_increasing: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index bad0626206e80..d385185fbb558 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2600,12 +2600,12 @@ def __init__(self, values, placement, ndim=None): def _maybe_coerce_values(self, values): """Input validation for values passed to __init__. Ensure that - we have datetime64ns, coercing if nescessary. + we have datetime64ns, coercing if necessary. Parametetrs ----------- values : array-like - Must be convertable to datetime64 + Must be convertible to datetime64 Returns ------- @@ -2760,12 +2760,12 @@ def __init__(self, values, placement, ndim=2, dtype=None): def _maybe_coerce_values(self, values, dtype=None): """Input validation for values passed to __init__. Ensure that - we have datetime64TZ, coercing if nescessary. + we have datetime64TZ, coercing if necessary. Parametetrs ----------- values : array-like - Must be convertable to datetime64 + Must be convertible to datetime64 dtype : string or DatetimeTZDtype, optional Does a shallow copy to this tz diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 9ca06475290e4..f413e4177b386 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -197,7 +197,7 @@ def __call__(self, x, pos=0): ---------- x : float The time of day specified as seconds since 00:00 (midnight), - with upto microsecond precision. + with up to microsecond precision. pos Unused diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index ec548fca6d901..8f413b4a19730 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -60,7 +60,7 @@ def test_align_frame(self, data, na_value): class TestGetitem(base.BaseGetitemTests): - @pytest.mark.skip(reason="Backwards compatability") + @pytest.mark.skip(reason="Backwards compatibility") def test_getitem_scalar(self): # CategoricalDtype.type isn't "correct" since it should # be a parent of the elements (object). But don't want diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index f86849b9cbd61..21ed8894e8ebb 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -37,7 +37,7 @@ def na_cmp(): Should return a function of two arguments that returns True if both arguments are (scalar) NA for your type. - By defult, uses ``operator.or`` + By default, uses ``operator.or`` """ return operator.is_ diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 4c560129bfa45..51ffe2966b4e5 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -95,7 +95,7 @@ def test_assign_bad(self): def test_assign_dependent_old_python(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) - # Key C does not exist at defition time of df + # Key C does not exist at definition time of df with pytest.raises(KeyError): df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C']) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 8c46dc30a0f5f..3e5aae10618e9 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -307,7 +307,7 @@ def test_info_memory_usage(self): res = buf.getvalue().splitlines() assert "memory usage: " in res[-1] - # do not display memory usage cas + # do not display memory usage case df.info(buf=buf, memory_usage=False) res = buf.getvalue().splitlines() assert "memory usage: " not in res[-1] diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 4c790a0f0f64a..86cee54665781 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1786,7 +1786,7 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): nrows = 5 ncols = 3 for use_headers in (True, False): - for i in range(1, 4): # row multindex upto nlevel=3 + for i in range(1, 4): # row multindex up to nlevel=3 for j in range(1, 4): # col "" df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 4e259d0994bdb..49ad07b79d111 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -336,7 +336,7 @@ def test_read_write_dta10(self): with tm.ensure_clean() as path: original.to_stata(path, {'datetime': 'tc'}) written_and_read_again = self.read_dta(path) - # original.index is np.int32, readed index is np.int64 + # original.index is np.int32, read index is np.int64 tm.assert_frame_equal(written_and_read_again.set_index('index'), original, check_index_type=False) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 7e126dd56775b..cc4eb6b475ae5 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -473,7 +473,7 @@ def test_concat_categorical(self): tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) - # completelly different categories (same dtype) => not-category + # completely different categories (same dtype) => not-category s1 = pd.Series([10, 11, np.nan], dtype='category') s2 = pd.Series([np.nan, 1, 3, 2], dtype='category') @@ -518,7 +518,7 @@ def test_concat_categorical_coercion(self): tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) - # completelly different categories => not-category + # completely different categories => not-category s1 = pd.Series([10, 11, np.nan], dtype='category') s2 = pd.Series([1, 3, 2]) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 0e8b2161cafc4..ee0d63aff7367 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -247,10 +247,10 @@ def test_constructor_preserve_attr(self): def test_constructor_nan_dataframe(self): # GH 10079 trains = np.arange(100) - tresholds = [10, 20, 30, 40, 50, 60] - tuples = [(i, j) for i in trains for j in tresholds] + thresholds = [10, 20, 30, 40, 50, 60] + tuples = [(i, j) for i in trains for j in thresholds] index = pd.MultiIndex.from_tuples(tuples, - names=['trains', 'tresholds']) + names=['trains', 'thresholds']) matrix = np.empty((len(index), len(trains))) matrix.fill(np.nan) df = pd.DataFrame(matrix, index=index, columns=trains, dtype=float) From eb60dae976ad65b50c1b360adff62324c39a6edf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 24 Feb 2018 15:39:07 +0100 Subject: [PATCH 185/217] DOC: remove deprecated from_items from dsintro docs (#19837) --- doc/source/dsintro.rst | 35 +++++++++++++---------------------- pandas/core/frame.py | 16 +++++++++------- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index e8f73a9ec2e8a..1ba00b8fb6f23 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -364,6 +364,19 @@ and returns a DataFrame. It operates like the ``DataFrame`` constructor except for the ``orient`` parameter which is ``'columns'`` by default, but which can be set to ``'index'`` in order to use the dict keys as row labels. + +.. ipython:: python + + pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])])) + +If you pass ``orient='index'``, the keys will be the row labels. In this +case, you can also pass the desired column names: + +.. ipython:: python + + pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]), + orient='index', columns=['one', 'two', 'three']) + .. _basics.dataframe.from_records: **DataFrame.from_records** @@ -378,28 +391,6 @@ dtype. For example: data pd.DataFrame.from_records(data, index='C') -.. _basics.dataframe.from_items: - -**DataFrame.from_items** - -``DataFrame.from_items`` works analogously to the form of the ``dict`` -constructor that takes a sequence of ``(key, value)`` pairs, where the keys are -column (or row, in the case of ``orient='index'``) names, and the value are the -column values (or row values). This can be useful for constructing a DataFrame -with the columns in a particular order without having to pass an explicit list -of columns: - -.. ipython:: python - - pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])]) - -If you pass ``orient='index'``, the keys will be the row labels. But in this -case you must also pass the desired column names: - -.. ipython:: python - - pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], - orient='index', columns=['one', 'two', 'three']) Column selection, addition, deletion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1c5cf87d6b39b..061b69f25e7ac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1255,12 +1255,14 @@ def to_records(self, index=True, convert_datetime64=True): @classmethod def from_items(cls, items, columns=None, orient='columns'): - """ + """Construct a dataframe from a list of tuples + .. deprecated:: 0.23.0 - from_items is deprecated and will be removed in a - future version. Use :meth:`DataFrame.from_dict(dict())` - instead. :meth:`DataFrame.from_dict(OrderedDict(...))` may be used - to preserve the key order. + `from_items` is deprecated and will be removed in a future version. + Use :meth:`DataFrame.from_dict(dict(items)) ` + instead. + :meth:`DataFrame.from_dict(OrderedDict(items)) ` + may be used to preserve the key order. Convert (key, value) pairs to DataFrame. The keys will be the axis index (usually the columns, but depends on the specified @@ -1284,8 +1286,8 @@ def from_items(cls, items, columns=None, orient='columns'): """ warnings.warn("from_items is deprecated. Please use " - "DataFrame.from_dict(dict()) instead. " - "DataFrame.from_dict(OrderedDict()) may be used to " + "DataFrame.from_dict(dict(items), ...) instead. " + "DataFrame.from_dict(OrderedDict(items)) may be used to " "preserve the key order.", FutureWarning, stacklevel=2) From f8a3e72c1a0f268b8fd60f7f4a1cd19bfbe8c107 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 24 Feb 2018 06:43:34 -0800 Subject: [PATCH 186/217] De-duplicate add_offset_array methods (#19835) --- pandas/core/indexes/datetimelike.py | 97 +++++++++++++++++++---------- pandas/core/indexes/datetimes.py | 23 ------- pandas/core/indexes/period.py | 23 ------- pandas/core/indexes/timedeltas.py | 38 ++--------- 4 files changed, 70 insertions(+), 111 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ac75e5ae5e2a0..a68d883f04380 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -2,7 +2,7 @@ Base and utility classes for tseries type pandas objects. """ import warnings - +import operator from datetime import datetime, timedelta from pandas import compat @@ -10,6 +10,12 @@ from pandas.core.tools.timedeltas import to_timedelta import numpy as np + +from pandas._libs import lib, iNaT, NaT +from pandas._libs.tslibs.period import Period +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds +from pandas._libs.tslibs.timestamps import round_ns + from pandas.core.dtypes.common import ( _ensure_int64, is_dtype_equal, @@ -25,18 +31,15 @@ is_integer_dtype, is_object_dtype, is_string_dtype, + is_period_dtype, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms, ops from pandas.core.algorithms import checked_add_with_arr -from pandas.errors import NullFrequencyError +from pandas.errors import NullFrequencyError, PerformanceWarning import pandas.io.formats.printing as printing -from pandas._libs import lib, iNaT, NaT -from pandas._libs.tslibs.period import Period -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds -from pandas._libs.tslibs.timestamps import round_ns from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly @@ -637,13 +640,33 @@ def _sub_datelike(self, other): def _sub_period(self, other): return NotImplemented - def _add_offset_array(self, other): - # Array/Index of DateOffset objects - return NotImplemented + def _addsub_offset_array(self, other, op): + """ + Add or subtract array-like of DateOffset objects - def _sub_offset_array(self, other): - # Array/Index of DateOffset objects - return NotImplemented + Parameters + ---------- + other : Index, np.ndarray + object-dtype containing pd.DateOffset objects + op : {operator.add, operator.sub} + + Returns + ------- + result : same class as self + """ + assert op in [operator.add, operator.sub] + if len(other) == 1: + return op(self, other[0]) + + warnings.warn("Adding/subtracting array of DateOffsets to " + "{cls} not vectorized" + .format(cls=type(self).__name__), PerformanceWarning) + + res_values = op(self.astype('O').values, np.array(other)) + kwargs = {} + if not is_period_dtype(self): + kwargs['freq'] = 'infer' + return self._constructor(res_values, **kwargs) @classmethod def _add_datetimelike_methods(cls): @@ -660,13 +683,24 @@ def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): return NotImplemented - elif is_timedelta64_dtype(other): + + # scalar others + elif isinstance(other, (DateOffset, timedelta, np.timedelta64)): result = self._add_delta(other) - elif isinstance(other, (DateOffset, timedelta)): + elif isinstance(other, (datetime, np.datetime64)): + result = self._add_datelike(other) + elif is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + result = self.shift(other) + + # array-like others + elif is_timedelta64_dtype(other): + # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects - result = self._add_offset_array(other) + result = self._addsub_offset_array(other, operator.add) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): if hasattr(other, '_add_delta'): # i.e. DatetimeIndex, TimedeltaIndex, or PeriodIndex @@ -674,12 +708,6 @@ def __add__(self, other): else: raise TypeError("cannot add TimedeltaIndex and {typ}" .format(typ=type(other))) - elif is_integer(other): - # This check must come after the check for timedelta64_dtype - # or else it will incorrectly catch np.timedelta64 objects - result = self.shift(other) - elif isinstance(other, (datetime, np.datetime64)): - result = self._add_datelike(other) elif isinstance(other, Index): result = self._add_datelike(other) elif is_integer_dtype(other) and self.freq is None: @@ -709,13 +737,26 @@ def __sub__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): return NotImplemented - elif is_timedelta64_dtype(other): + + # scalar others + elif isinstance(other, (DateOffset, timedelta, np.timedelta64)): result = self._add_delta(-other) - elif isinstance(other, (DateOffset, timedelta)): + elif isinstance(other, (datetime, np.datetime64)): + result = self._sub_datelike(other) + elif is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + result = self.shift(-other) + elif isinstance(other, Period): + result = self._sub_period(other) + + # array-like others + elif is_timedelta64_dtype(other): + # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects - result = self._sub_offset_array(other) + result = self._addsub_offset_array(other, operator.sub) elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): # We checked above for timedelta64_dtype(other) so this # must be invalid. @@ -723,14 +764,6 @@ def __sub__(self, other): .format(typ=type(other).__name__)) elif isinstance(other, DatetimeIndex): result = self._sub_datelike(other) - elif is_integer(other): - # This check must come after the check for timedelta64_dtype - # or else it will incorrectly catch np.timedelta64 objects - result = self.shift(-other) - elif isinstance(other, (datetime, np.datetime64)): - result = self._sub_datelike(other) - elif isinstance(other, Period): - result = self._sub_period(other) elif isinstance(other, Index): raise TypeError("cannot subtract {typ1} and {typ2}" .format(typ1=type(self).__name__, diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 17f92339e4205..36ea2bffb9531 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -964,29 +964,6 @@ def _add_offset(self, offset): "or DatetimeIndex", PerformanceWarning) return self.astype('O') + offset - def _add_offset_array(self, other): - # Array/Index of DateOffset objects - if len(other) == 1: - return self + other[0] - else: - warnings.warn("Adding/subtracting array of DateOffsets to " - "{} not vectorized".format(type(self)), - PerformanceWarning) - return self.astype('O') + np.array(other) - # TODO: pass freq='infer' like we do in _sub_offset_array? - # TODO: This works for __add__ but loses dtype in __sub__ - - def _sub_offset_array(self, other): - # Array/Index of DateOffset objects - if len(other) == 1: - return self - other[0] - else: - warnings.warn("Adding/subtracting array of DateOffsets to " - "{} not vectorized".format(type(self)), - PerformanceWarning) - res_values = self.astype('O').values - np.array(other) - return self.__class__(res_values, freq='infer') - def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.io.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(self, date_format) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 4c14cbffcd813..f0567c9c963af 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -44,7 +44,6 @@ from pandas.util._decorators import (Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.compat import zip, u -from pandas.errors import PerformanceWarning import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -745,28 +744,6 @@ def _sub_period(self, other): # result must be Int64Index or Float64Index return Index(new_data) - def _add_offset_array(self, other): - # Array/Index of DateOffset objects - if len(other) == 1: - return self + other[0] - else: - warnings.warn("Adding/subtracting array of DateOffsets to " - "{cls} not vectorized" - .format(cls=type(self).__name__), PerformanceWarning) - res_values = self.astype('O').values + np.array(other) - return self.__class__(res_values) - - def _sub_offset_array(self, other): - # Array/Index of DateOffset objects - if len(other) == 1: - return self - other[0] - else: - warnings.warn("Adding/subtracting array of DateOffsets to " - "{cls} not vectorized" - .format(cls=type(self).__name__), PerformanceWarning) - res_values = self.astype('O').values - np.array(other) - return self.__class__(res_values) - def shift(self, n): """ Specialized shift which produces an PeriodIndex diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 3542a24290f89..219adfdb66c82 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,7 +1,6 @@ """ implement the TimedeltaIndex """ from datetime import timedelta -import warnings import numpy as np from pandas.core.dtypes.common import ( @@ -433,43 +432,16 @@ def _sub_datelike(self, other): else: raise TypeError("cannot subtract a datelike from a TimedeltaIndex") - def _add_offset_array(self, other): - # Array/Index of DateOffset objects + def _addsub_offset_array(self, other, op): + # Add or subtract Array-like of DateOffset objects try: # TimedeltaIndex can only operate with a subset of DateOffset # subclasses. Incompatible classes will raise AttributeError, # which we re-raise as TypeError - if len(other) == 1: - return self + other[0] - else: - from pandas.errors import PerformanceWarning - warnings.warn("Adding/subtracting array of DateOffsets to " - "{} not vectorized".format(type(self)), - PerformanceWarning) - return self.astype('O') + np.array(other) - # TODO: pass freq='infer' like we do in _sub_offset_array? - # TODO: This works for __add__ but loses dtype in __sub__ - except AttributeError: - raise TypeError("Cannot add non-tick DateOffset to TimedeltaIndex") - - def _sub_offset_array(self, other): - # Array/Index of DateOffset objects - try: - # TimedeltaIndex can only operate with a subset of DateOffset - # subclasses. Incompatible classes will raise AttributeError, - # which we re-raise as TypeError - if len(other) == 1: - return self - other[0] - else: - from pandas.errors import PerformanceWarning - warnings.warn("Adding/subtracting array of DateOffsets to " - "{} not vectorized".format(type(self)), - PerformanceWarning) - res_values = self.astype('O').values - np.array(other) - return self.__class__(res_values, freq='infer') + return DatetimeIndexOpsMixin._addsub_offset_array(self, other, op) except AttributeError: - raise TypeError("Cannot subtrack non-tick DateOffset from" - " TimedeltaIndex") + raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}" + .format(cls=type(self).__name__)) def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): From ab0bcfc87d2b4c7750129504bef0075cbcad4352 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Sat, 24 Feb 2018 14:55:35 +0000 Subject: [PATCH 187/217] Let initialisation from dicts use insertion order for python >= 3.6 (part II) (#19859) --- pandas/tests/groupby/test_groupby.py | 16 +++---- pandas/tests/groupby/test_transform.py | 8 +++- pandas/tests/indexing/test_ix.py | 16 ++++--- pandas/tests/io/formats/test_format.py | 12 +++--- pandas/tests/io/formats/test_to_latex.py | 21 ++++----- pandas/tests/io/json/test_pandas.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 43 ++++++++++--------- .../tests/reshape/merge/test_merge_ordered.py | 5 ++- pandas/tests/reshape/test_concat.py | 8 ++-- pandas/tests/reshape/test_melt.py | 8 ++-- pandas/tests/reshape/test_reshape.py | 24 +++++------ 11 files changed, 86 insertions(+), 77 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4cf7c8013aa2b..129ac6b06205c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -99,9 +99,9 @@ def max_value(group): applied = df.groupby('A').apply(max_value) result = applied.get_dtype_counts().sort_values() - expected = Series({'object': 2, - 'float64': 2, - 'int64': 1}).sort_values() + expected = Series({'float64': 2, + 'int64': 1, + 'object': 2}).sort_values() assert_series_equal(result, expected) def test_groupby_return_type(self): @@ -244,7 +244,7 @@ def func_with_no_date(batch): return pd.Series({'c': 2}) def func_with_date(batch): - return pd.Series({'c': 2, 'b': datetime(2015, 1, 1)}) + return pd.Series({'b': datetime(2015, 1, 1), 'c': 2}) dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date) dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1]) @@ -1628,8 +1628,8 @@ def f(g): def test_apply_with_mixed_dtype(self): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 - df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], - 'foo2': np.random.randn(6)}) + df = DataFrame({'foo1': np.random.randn(6), + 'foo2': ['one', 'two', 'two', 'three', 'one', 'two']}) result = df.apply(lambda x: x, axis=1) assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts()) @@ -2113,10 +2113,10 @@ def test_multifunc_sum_bug(self): def test_handle_dict_return_value(self): def f(group): - return {'min': group.min(), 'max': group.max()} + return {'max': group.max(), 'min': group.min()} def g(group): - return Series({'min': group.min(), 'max': group.max()}) + return Series({'max': group.max(), 'min': group.min()}) result = self.df.groupby('A')['C'].apply(f) expected = self.df.groupby('A')['C'].apply(g) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 4159d0f709a13..1be7dfdcc64e6 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -519,7 +519,9 @@ def test_cython_transform_frame(self, op, args, targop): 'timedelta': pd.timedelta_range(1, freq='s', periods=1000), 'string': strings * 50, - 'string_missing': strings_missing * 50}) + 'string_missing': strings_missing * 50}, + columns=['float', 'float_missing', 'int', 'datetime', + 'timedelta', 'string', 'string_missing']) df['cat'] = df['string'].astype('category') df2 = df.copy() @@ -552,7 +554,9 @@ def test_cython_transform_frame(self, op, args, targop): tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index( axis=1)) - tm.assert_frame_equal(expected, getattr(gb, op)(*args)) + tm.assert_frame_equal( + expected, + getattr(gb, op)(*args).sort_index(axis=1)) # individual columns for c in df: if c not in ['float', 'int', 'float_missing' diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index 3f71e673a4ffe..c84576c984525 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -53,13 +53,15 @@ def test_ix_loc_setitem_consistency(self): # GH 8607 # ix setitem consistency - df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580], - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) - expected = DataFrame({'timestamp': pd.to_datetime( - [1413840976, 1413842580, 1413760580], unit='s'), - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) + df = DataFrame({'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470], + 'timestamp': [1413840976, 1413842580, 1413760580]}) + expected = DataFrame({'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470], + 'timestamp': pd.to_datetime( + [1413840976, 1413842580, 1413760580], + unit='s') + }) df2 = df.copy() df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index dddba5b425c3b..03c071dbe4bc5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -539,8 +539,8 @@ def test_east_asian_unicode_frame(self): assert _rep(df) == expected # column name - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + df = DataFrame({'b': [u'あ', u'いいい', u'う', u'ええええええ'], + u'あああああ': [1, 222, 33333, 4]}, index=['a', 'bb', 'c', 'ddd']) expected = (u" b あああああ\na あ 1\n" u"bb いいい 222\nc う 33333\n" @@ -647,8 +647,8 @@ def test_east_asian_unicode_frame(self): assert _rep(df) == expected # column name - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + df = DataFrame({'b': [u'あ', u'いいい', u'う', u'ええええええ'], + u'あああああ': [1, 222, 33333, 4]}, index=['a', 'bb', 'c', 'ddd']) expected = (u" b あああああ\n" u"a あ 1\n" @@ -733,8 +733,8 @@ def test_east_asian_unicode_frame(self): assert _rep(df) == expected # ambiguous unicode - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']}, + df = DataFrame({'b': [u'あ', u'いいい', u'¡¡', u'ええええええ'], + u'あああああ': [1, 222, 33333, 4]}, index=['a', 'bb', 'c', '¡¡¡']) expected = (u" b あああああ\n" u"a あ 1\n" diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index f266a8b3a3268..5ebf196be094e 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -115,17 +115,18 @@ def test_to_latex_empty(self): assert result == expected def test_to_latex_with_formatters(self): - df = DataFrame({'int': [1, 2, 3], + df = DataFrame({'datetime64': [datetime(2016, 1, 1), + datetime(2016, 2, 5), + datetime(2016, 3, 3)], 'float': [1.0, 2.0, 3.0], + 'int': [1, 2, 3], 'object': [(1, 2), True, False], - 'datetime64': [datetime(2016, 1, 1), - datetime(2016, 2, 5), - datetime(2016, 3, 3)]}) + }) - formatters = {'int': lambda x: '0x{x:x}'.format(x=x), + formatters = {'datetime64': lambda x: x.strftime('%Y-%m'), 'float': lambda x: '[{x: 4.1f}]'.format(x=x), + 'int': lambda x: '0x{x:x}'.format(x=x), 'object': lambda x: '-{x!s}-'.format(x=x), - 'datetime64': lambda x: x.strftime('%Y-%m'), '__index__': lambda x: 'index: {x}'.format(x=x)} result = df.to_latex(formatters=dict(formatters)) @@ -347,10 +348,10 @@ def test_to_latex_escape(self): a = 'a' b = 'b' - test_dict = {u('co^l1'): {a: "a", - b: "b"}, - u('co$e^x$'): {a: "a", - b: "b"}} + test_dict = {u('co$e^x$'): {a: "a", + b: "b"}, + u('co^l1'): {a: "a", + b: "b"}} unescaped_result = DataFrame(test_dict).to_latex(escape=False) escaped_result = DataFrame(test_dict).to_latex( diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a72744e08fa7c..7e497c395266f 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -553,7 +553,7 @@ def __str__(self): def test_label_overflow(self): # GH14256: buffer length not checked when writing label - df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]}) + df = pd.DataFrame({'bar' * 100000: [1], 'foo': [1337]}) assert df.to_json() == \ '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format( bar=('bar' * 100000)) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 101d34ebdb89f..5dca45c8dd8bb 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -588,18 +588,18 @@ def test_merge_on_datetime64tz(self): result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - left = pd.DataFrame({'value': pd.date_range('20151010', periods=2, - tz='US/Eastern'), - 'key': [1, 2]}) - right = pd.DataFrame({'value': pd.date_range('20151011', periods=2, - tz='US/Eastern'), - 'key': [2, 3]}) + left = pd.DataFrame({'key': [1, 2], + 'value': pd.date_range('20151010', periods=2, + tz='US/Eastern')}) + right = pd.DataFrame({'key': [2, 3], + 'value': pd.date_range('20151011', periods=2, + tz='US/Eastern')}) expected = DataFrame({ + 'key': [1, 2, 3], 'value_x': list(pd.date_range('20151010', periods=2, tz='US/Eastern')) + [pd.NaT], 'value_y': [pd.NaT] + list(pd.date_range('20151011', periods=2, - tz='US/Eastern')), - 'key': [1, 2, 3]}) + tz='US/Eastern'))}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' @@ -632,18 +632,18 @@ def test_merge_on_periods(self): result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - left = pd.DataFrame({'value': pd.period_range('20151010', periods=2, - freq='D'), - 'key': [1, 2]}) - right = pd.DataFrame({'value': pd.period_range('20151011', periods=2, - freq='D'), - 'key': [2, 3]}) + left = pd.DataFrame({'key': [1, 2], + 'value': pd.period_range('20151010', periods=2, + freq='D')}) + right = pd.DataFrame({'key': [2, 3], + 'value': pd.period_range('20151011', periods=2, + freq='D')}) exp_x = pd.period_range('20151010', periods=2, freq='D') exp_y = pd.period_range('20151011', periods=2, freq='D') - expected = DataFrame({'value_x': list(exp_x) + [pd.NaT], - 'value_y': [pd.NaT] + list(exp_y), - 'key': [1, 2, 3]}) + expected = DataFrame({'key': [1, 2, 3], + 'value_x': list(exp_x) + [pd.NaT], + 'value_y': [pd.NaT] + list(exp_y)}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) assert result['value_x'].dtype == 'object' @@ -651,12 +651,13 @@ def test_merge_on_periods(self): def test_indicator(self): # PR #10054. xref #7412 and closes #8790. - df1 = DataFrame({'col1': [0, 1], 'col_left': [ - 'a', 'b'], 'col_conflict': [1, 2]}) + df1 = DataFrame({'col1': [0, 1], 'col_conflict': [1, 2], + 'col_left': ['a', 'b']}) df1_copy = df1.copy() - df2 = DataFrame({'col1': [1, 2, 3, 4, 5], 'col_right': [2, 2, 2, 2, 2], - 'col_conflict': [1, 2, 3, 4, 5]}) + df2 = DataFrame({'col1': [1, 2, 3, 4, 5], + 'col_conflict': [1, 2, 3, 4, 5], + 'col_right': [2, 2, 2, 2, 2]}) df2_copy = df2.copy() df_result = DataFrame({ diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 31c484a483d18..42d8eb7273ee1 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -83,9 +83,10 @@ def test_empty_sequence_concat(self): pd.concat([pd.DataFrame(), None]) def test_doc_example(self): - left = DataFrame({'key': ['a', 'c', 'e', 'a', 'c', 'e'], + left = DataFrame({'group': list('aaabbb'), + 'key': ['a', 'c', 'e', 'a', 'c', 'e'], 'lvalue': [1, 2, 3] * 2, - 'group': list('aaabbb')}) + }) right = DataFrame({'key': ['b', 'c', 'd'], 'rvalue': [1, 2, 3]}) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index cc4eb6b475ae5..437b4179c580a 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1542,10 +1542,10 @@ def test_concat_bug_2972(self): def test_concat_bug_3602(self): # GH 3602, duplicate columns - df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'stringvar': [ - 'rrr', 'rrr', 'rrr', 'rrr'], 'prc': [6, 6, 6, 6]}) - df2 = DataFrame({'misc': [1, 2, 3, 4], 'prc': [ - 6, 6, 6, 6], 'C': [9, 10, 11, 12]}) + df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'prc': [6, 6, 6, 6], + 'stringvar': ['rrr', 'rrr', 'rrr', 'rrr']}) + df2 = DataFrame({'C': [9, 10, 11, 12], 'misc': [1, 2, 3, 4], + 'prc': [6, 6, 6, 6]}) expected = DataFrame([[0, 6, 'rrr', 9, 1, 6], [0, 6, 'rrr', 10, 2, 6], [0, 6, 'rrr', 11, 3, 6], diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index b7422dfd7e911..000b22d4fdd36 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -589,11 +589,11 @@ def test_nonnumeric_suffix(self): def test_mixed_type_suffix(self): df = pd.DataFrame({ - 'treatment_1': [1.0, 2.0], - 'treatment_foo': [3.0, 4.0], - 'result_foo': [5.0, 6.0], + 'A': ['X1', 'X2'], 'result_1': [0, 9], - 'A': ['X1', 'X2']}) + 'result_foo': [5.0, 6.0], + 'treatment_1': [1.0, 2.0], + 'treatment_foo': [3.0, 4.0]}) expected = pd.DataFrame({ 'A': ['X1', 'X2', 'X1', 'X2'], 'colname': ['1', '1', 'foo', 'foo'], diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index a57c3c41b3637..c4d925b83585b 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -100,8 +100,8 @@ def test_basic_types(self, sparse, dtype): expected_counts = {'int64': 1, 'object': 1} expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) - expected = Series(expected_counts).sort_values() - tm.assert_series_equal(result.get_dtype_counts().sort_values(), + expected = Series(expected_counts).sort_index() + tm.assert_series_equal(result.get_dtype_counts().sort_index(), expected) def test_just_na(self, sparse): @@ -212,10 +212,10 @@ def test_dataframe_dummies_prefix_str(self, df, sparse): def test_dataframe_dummies_subset(self, df, sparse): result = get_dummies(df, prefix=['from_A'], columns=['A'], sparse=sparse) - expected = DataFrame({'from_A_a': [1, 0, 1], - 'from_A_b': [0, 1, 0], - 'B': ['b', 'b', 'c'], - 'C': [1, 2, 3]}, dtype=np.uint8) + expected = DataFrame({'B': ['b', 'b', 'c'], + 'C': [1, 2, 3], + 'from_A_a': [1, 0, 1], + 'from_A_b': [0, 1, 0]}, dtype=np.uint8) expected[['C']] = df[['C']] assert_frame_equal(result, expected) @@ -249,16 +249,16 @@ def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse): def test_dataframe_dummies_prefix_dict(self, sparse): prefixes = {'A': 'from_A', 'B': 'from_B'} - df = DataFrame({'A': ['a', 'b', 'a'], - 'B': ['b', 'b', 'c'], - 'C': [1, 2, 3]}) + df = DataFrame({'C': [1, 2, 3], + 'A': ['a', 'b', 'a'], + 'B': ['b', 'b', 'c']}) result = get_dummies(df, prefix=prefixes, sparse=sparse) - expected = DataFrame({'from_A_a': [1, 0, 1], + expected = DataFrame({'C': [1, 2, 3], + 'from_A_a': [1, 0, 1], 'from_A_b': [0, 1, 0], 'from_B_b': [1, 1, 0], - 'from_B_c': [0, 0, 1], - 'C': [1, 2, 3]}) + 'from_B_c': [0, 0, 1]}) columns = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c'] expected[columns] = expected[columns].astype(np.uint8) From f001b705517131bf30dcc4711b73cc2310f78039 Mon Sep 17 00:00:00 2001 From: cbertinato Date: Sat, 24 Feb 2018 09:58:40 -0500 Subject: [PATCH 188/217] BUG: fix Series constructor for scalar and Categorical dtype (#19717) --- doc/source/whatsnew/v0.23.0.txt | 3 ++- pandas/core/dtypes/cast.py | 2 +- pandas/tests/dtypes/test_cast.py | 15 ++++++++++++++- pandas/tests/series/test_constructors.py | 7 +++++++ 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ca5749afd11bc..a188ddd613080 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -730,7 +730,8 @@ Categorical ``self`` but in a different order (:issue:`19551`) - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) -- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19032`) +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) +- Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19565`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 55919fb2bea0d..352ce29f5c37b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1178,7 +1178,7 @@ def construct_1d_arraylike_from_scalar(value, length, dtype): subarr = DatetimeIndex([value] * length, dtype=dtype) elif is_categorical_dtype(dtype): from pandas import Categorical - subarr = Categorical([value] * length) + subarr = Categorical([value] * length, dtype=dtype) else: if not isinstance(dtype, (np.dtype, type(np.dtype))): dtype = dtype.dtype diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index d13d781f03117..31bd962b67afb 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -22,7 +22,8 @@ maybe_convert_string_to_object, maybe_convert_scalar, find_common_type, - construct_1d_object_array_from_listlike) + construct_1d_object_array_from_listlike, + construct_1d_arraylike_from_scalar) from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -422,3 +423,15 @@ def test_cast_1d_array(self, datum1, datum2): @pytest.mark.parametrize('val', [1, 2., None]) def test_cast_1d_array_invalid_scalar(self, val): pytest.raises(TypeError, construct_1d_object_array_from_listlike, val) + + def test_cast_1d_arraylike_from_scalar_categorical(self): + # GH 19565 - Categorical result from scalar did not maintain categories + # and ordering of the passed dtype + cats = ['a', 'b', 'c'] + cat_type = CategoricalDtype(categories=cats, ordered=False) + expected = pd.Categorical(['a', 'a'], categories=cats) + result = construct_1d_arraylike_from_scalar('a', len(expected), + cat_type) + tm.assert_categorical_equal(result, expected, + check_category_order=True, + check_dtype=True) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 33737387edffa..77f9dfcce686d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -270,6 +270,13 @@ def test_constructor_categorical_dtype(self): tm.assert_index_equal(result.cat.categories, pd.Index(['b', 'a'])) assert result.cat.ordered is False + # GH 19565 - Check broadcasting of scalar with Categorical dtype + result = Series('a', index=[0, 1], + dtype=CategoricalDtype(['a', 'b'], ordered=True)) + expected = Series(['a', 'a'], index=[0, 1], + dtype=CategoricalDtype(['a', 'b'], ordered=True)) + tm.assert_series_equal(result, expected, check_categorical=True) + def test_categorical_sideeffects_free(self): # Passing a categorical to a Series and then changing values in either # the series or the categorical should not change the values in the From 5d0f3d5e68fc3e553926462b7ff435f1aba70a56 Mon Sep 17 00:00:00 2001 From: jayfoad Date: Sat, 24 Feb 2018 15:08:16 +0000 Subject: [PATCH 189/217] Raise OptionError instead of KeyError in __getattr__. Fixes #19789. (#19790) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/config.py | 5 ++++- pandas/tests/test_config.py | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a188ddd613080..6bcc6d1582c34 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -909,3 +909,4 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`) diff --git a/pandas/core/config.py b/pandas/core/config.py index 692aed178719d..369e0568346ef 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -196,7 +196,10 @@ def __getattr__(self, key): if prefix: prefix += "." prefix += key - v = object.__getattribute__(self, "d")[key] + try: + v = object.__getattribute__(self, "d")[key] + except KeyError: + raise OptionError("No such option") if isinstance(v, dict): return DictWrapper(v, prefix) else: diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index 8d6f36ac6a798..91ce65dcce9b2 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -428,3 +428,9 @@ def test_option_context_scope(self): # Ensure the current context is reset assert self.cf.get_option(option_name) == original_value + + def test_dictwrapper_getattr(self): + options = self.cf.options + # GH 19789 + pytest.raises(self.cf.OptionError, getattr, options, 'bananas') + assert not hasattr(options, 'bananas') From fbc8d724ee697e6df390e3ab93a59ed2ad532c41 Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Sat, 24 Feb 2018 16:10:34 +0100 Subject: [PATCH 190/217] Keep subclassing in apply (#19823) --- doc/source/whatsnew/v0.23.0.txt | 2 ++ pandas/core/apply.py | 16 ++++----- pandas/tests/frame/test_subclass.py | 56 +++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6bcc6d1582c34..fd3c3a5a7a301 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -295,8 +295,10 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) - Added :func:`SeriesGroupBy.is_monotonic_increasing` and :func:`SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) +- For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) - :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) + .. _whatsnew_0230.api_breaking: Backwards incompatible API changes diff --git a/pandas/core/apply.py b/pandas/core/apply.py index c65943fbbb201..9056f78ee02ed 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -162,7 +162,7 @@ def apply_empty_result(self): pass if reduce: - return Series(np.nan, index=self.agg_axis) + return self.obj._constructor_sliced(np.nan, index=self.agg_axis) else: return self.obj.copy() @@ -175,11 +175,13 @@ def apply_raw(self): result = np.apply_along_axis(self.f, self.axis, self.values) # TODO: mixed type case - from pandas import DataFrame, Series if result.ndim == 2: - return DataFrame(result, index=self.index, columns=self.columns) + return self.obj._constructor(result, + index=self.index, + columns=self.columns) else: - return Series(result, index=self.agg_axis) + return self.obj._constructor_sliced(result, + index=self.agg_axis) def apply_broadcast(self, target): result_values = np.empty_like(target.values) @@ -232,7 +234,7 @@ def apply_standard(self): axis=self.axis, dummy=dummy, labels=labels) - return Series(result, index=labels) + return self.obj._constructor_sliced(result, index=labels) except Exception: pass @@ -291,8 +293,7 @@ def wrap_results(self): return self.wrap_results_for_axis() # dict of scalars - from pandas import Series - result = Series(results) + result = self.obj._constructor_sliced(results) result.index = self.res_index return result @@ -379,7 +380,6 @@ def wrap_results_for_axis(self): # we have a non-series and don't want inference elif not isinstance(results[0], ABCSeries): from pandas import Series - result = Series(results) result.index = self.res_index diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index c52b512c2930a..caaa311e9ee96 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -514,3 +514,59 @@ def test_subclassed_wide_to_long(self): long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year") tm.assert_frame_equal(long_frame, expected) + + def test_subclassed_apply(self): + # GH 19822 + + def check_row_subclass(row): + assert isinstance(row, tm.SubclassedSeries) + + def strech(row): + if row["variable"] == "height": + row["value"] += 0.5 + return row + + df = tm.SubclassedDataFrame([ + ['John', 'Doe', 'height', 5.5], + ['Mary', 'Bo', 'height', 6.0], + ['John', 'Doe', 'weight', 130], + ['Mary', 'Bo', 'weight', 150]], + columns=['first', 'last', 'variable', 'value']) + + df.apply(lambda x: check_row_subclass(x)) + df.apply(lambda x: check_row_subclass(x), axis=1) + + expected = tm.SubclassedDataFrame([ + ['John', 'Doe', 'height', 6.0], + ['Mary', 'Bo', 'height', 6.5], + ['John', 'Doe', 'weight', 130], + ['Mary', 'Bo', 'weight', 150]], + columns=['first', 'last', 'variable', 'value']) + + result = df.apply(lambda x: strech(x), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedDataFrame([ + [1, 2, 3], + [1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + + result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedSeries([ + [1, 2, 3], + [1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + assert not isinstance(result, tm.SubclassedDataFrame) + tm.assert_series_equal(result, expected) From faf595e4159ec58aa904a3f5e50887b2e0eae86b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 24 Feb 2018 09:19:19 -0600 Subject: [PATCH 191/217] REF: Base class for all extension tests (#19863) --- ci/lint.sh | 9 +++++++ pandas/tests/extension/base/__init__.py | 8 ++++++ pandas/tests/extension/base/base.py | 6 +++++ pandas/tests/extension/base/casting.py | 4 ++- pandas/tests/extension/base/constructors.py | 4 ++- pandas/tests/extension/base/dtype.py | 4 ++- pandas/tests/extension/base/getitem.py | 29 +++++++++++---------- pandas/tests/extension/base/interface.py | 4 ++- pandas/tests/extension/base/methods.py | 9 ++++--- pandas/tests/extension/base/missing.py | 14 +++++----- pandas/tests/extension/base/reshaping.py | 17 ++++++------ 11 files changed, 72 insertions(+), 36 deletions(-) create mode 100644 pandas/tests/extension/base/base.py diff --git a/ci/lint.sh b/ci/lint.sh index fcd65fc5aba5e..545ac9c90c5c1 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -111,6 +111,15 @@ if [ "$LINT" ]; then RET=1 fi + # Check for the following code in the extension array base tests + # tm.assert_frame_equal + # tm.assert_series_equal + grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base + + if [ $? = "0" ]; then + RET=1 + fi + echo "Check for invalid testing DONE" # Check for imports from pandas.core.common instead diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 2273ef1f3e110..27c106efd0524 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -31,6 +31,14 @@ class TestMyDtype(BaseDtypeTests): Your class ``TestDtype`` will inherit all the tests defined on ``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype`` wherever the test requires it. You're free to implement additional tests. + +All the tests in these modules use ``self.assert_frame_equal`` or +``self.assert_series_equal`` for dataframe or series comparisons. By default, +they use the usual ``pandas.testing.assert_frame_equal`` and +``pandas.testing.assert_series_equal``. You can override the checks used +by defining the staticmethods ``assert_frame_equal`` and +``assert_series_equal`` on your base test class. + """ from .casting import BaseCastingTests # noqa from .constructors import BaseConstructorsTests # noqa diff --git a/pandas/tests/extension/base/base.py b/pandas/tests/extension/base/base.py new file mode 100644 index 0000000000000..d29587e635ebd --- /dev/null +++ b/pandas/tests/extension/base/base.py @@ -0,0 +1,6 @@ +import pandas.util.testing as tm + + +class BaseExtensionTests(object): + assert_series_equal = staticmethod(tm.assert_series_equal) + assert_frame_equal = staticmethod(tm.assert_frame_equal) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index bcfbf0a247269..adc690939b36c 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -1,8 +1,10 @@ import pandas as pd from pandas.core.internals import ObjectBlock +from .base import BaseExtensionTests -class BaseCastingTests(object): + +class BaseCastingTests(BaseExtensionTests): """Casting to and from ExtensionDtypes""" def test_astype_object_series(self, all_data): diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 7ad100e6289e9..2d5d747aec5a7 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -4,8 +4,10 @@ import pandas.util.testing as tm from pandas.core.internals import ExtensionBlock +from .base import BaseExtensionTests -class BaseConstructorsTests(object): + +class BaseConstructorsTests(BaseExtensionTests): def test_series_constructor(self, data): result = pd.Series(data) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index f5015bd469f13..63d3d807c270c 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -1,8 +1,10 @@ import numpy as np import pandas as pd +from .base import BaseExtensionTests -class BaseDtypeTests(object): + +class BaseDtypeTests(BaseExtensionTests): """Base class for ExtensionDtype classes""" def test_name(self, dtype): diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index f43971e928cac..31ed8b9e01225 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -1,20 +1,21 @@ import numpy as np import pandas as pd -import pandas.util.testing as tm +from .base import BaseExtensionTests -class BaseGetitemTests(object): + +class BaseGetitemTests(BaseExtensionTests): """Tests for ExtensionArray.__getitem__.""" def test_iloc_series(self, data): ser = pd.Series(data) result = ser.iloc[:4] expected = pd.Series(data[:4]) - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) result = ser.iloc[[0, 1, 2, 3]] - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_iloc_frame(self, data): df = pd.DataFrame({"A": data, 'B': np.arange(len(data))}) @@ -22,30 +23,30 @@ def test_iloc_frame(self, data): # slice -> frame result = df.iloc[:4, [0]] - tm.assert_frame_equal(result, expected) + self.assert_frame_equal(result, expected) # sequence -> frame result = df.iloc[[0, 1, 2, 3], [0]] - tm.assert_frame_equal(result, expected) + self.assert_frame_equal(result, expected) expected = pd.Series(data[:4], name='A') # slice -> series result = df.iloc[:4, 0] - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) # sequence -> series result = df.iloc[:4, 0] - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_loc_series(self, data): ser = pd.Series(data) result = ser.loc[:3] expected = pd.Series(data[:4]) - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) result = ser.loc[[0, 1, 2, 3]] - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_loc_frame(self, data): df = pd.DataFrame({"A": data, 'B': np.arange(len(data))}) @@ -53,21 +54,21 @@ def test_loc_frame(self, data): # slice -> frame result = df.loc[:3, ['A']] - tm.assert_frame_equal(result, expected) + self.assert_frame_equal(result, expected) # sequence -> frame result = df.loc[[0, 1, 2, 3], ['A']] - tm.assert_frame_equal(result, expected) + self.assert_frame_equal(result, expected) expected = pd.Series(data[:4], name='A') # slice -> series result = df.loc[:3, 'A'] - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) # sequence -> series result = df.loc[:3, 'A'] - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_getitem_scalar(self, data): result = data[0] diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 8f17131a9482b..e1596f0675f32 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -5,8 +5,10 @@ from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes.dtypes import ExtensionDtype +from .base import BaseExtensionTests -class BaseInterfaceTests(object): + +class BaseInterfaceTests(BaseExtensionTests): """Tests that the basic interface is satisfied.""" # ------------------------------------------------------------------------ # Interface diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c77811ca63926..74e5d180b1aa3 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -2,10 +2,11 @@ import numpy as np import pandas as pd -import pandas.util.testing as tm +from .base import BaseExtensionTests -class BaseMethodsTests(object): + +class BaseMethodsTests(BaseExtensionTests): """Various Series and DataFrame methods.""" @pytest.mark.parametrize('dropna', [True, False]) @@ -19,13 +20,13 @@ def test_value_counts(self, all_data, dropna): result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() expected = pd.Series(other).value_counts(dropna=dropna).sort_index() - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_count(self, data_missing): df = pd.DataFrame({"A": data_missing}) result = df.count(axis='columns') expected = pd.Series([0, 1]) - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_apply_simple_series(self, data): result = pd.Series(data).apply(id) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 1d6f2eea1f1f9..3ae82fa1ca432 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -3,8 +3,10 @@ import pandas as pd import pandas.util.testing as tm +from .base import BaseExtensionTests -class BaseMissingTests(object): + +class BaseMissingTests(BaseExtensionTests): def test_isna(self, data_missing): if data_missing._can_hold_na: expected = np.array([True, False]) @@ -16,13 +18,13 @@ def test_isna(self, data_missing): result = pd.Series(data_missing).isna() expected = pd.Series(expected) - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_dropna_series(self, data_missing): ser = pd.Series(data_missing) result = ser.dropna() expected = ser.iloc[[1]] - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_dropna_frame(self, data_missing): df = pd.DataFrame({"A": data_missing}) @@ -30,16 +32,16 @@ def test_dropna_frame(self, data_missing): # defaults result = df.dropna() expected = df.iloc[[1]] - tm.assert_frame_equal(result, expected) + self.assert_frame_equal(result, expected) # axis = 1 result = df.dropna(axis='columns') expected = pd.DataFrame(index=[0, 1]) - tm.assert_frame_equal(result, expected) + self.assert_frame_equal(result, expected) # multiple df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]}) result = df.dropna() expected = df.iloc[:0] - tm.assert_frame_equal(result, expected) + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index d8f577c6fa50d..cfb70f2291555 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -1,11 +1,12 @@ import pytest import pandas as pd -import pandas.util.testing as tm from pandas.core.internals import ExtensionBlock +from .base import BaseExtensionTests -class BaseReshapingTests(object): + +class BaseReshapingTests(BaseExtensionTests): """Tests for reshaping and concatenation.""" @pytest.mark.parametrize('in_frame', [True, False]) def test_concat(self, data, in_frame): @@ -32,8 +33,8 @@ def test_align(self, data, na_value): # Assumes that the ctor can take a list of scalars of the type e1 = pd.Series(type(data)(list(a) + [na_value])) e2 = pd.Series(type(data)([na_value] + list(b))) - tm.assert_series_equal(r1, e1) - tm.assert_series_equal(r2, e2) + self.assert_series_equal(r1, e1) + self.assert_series_equal(r2, e2) def test_align_frame(self, data, na_value): a = data[:3] @@ -45,17 +46,17 @@ def test_align_frame(self, data, na_value): # Assumes that the ctor can take a list of scalars of the type e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])}) e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))}) - tm.assert_frame_equal(r1, e1) - tm.assert_frame_equal(r2, e2) + self.assert_frame_equal(r1, e1) + self.assert_frame_equal(r2, e2) def test_set_frame_expand_regular_with_extension(self, data): df = pd.DataFrame({"A": [1] * len(data)}) df['B'] = data expected = pd.DataFrame({"A": [1] * len(data), "B": data}) - tm.assert_frame_equal(df, expected) + self.assert_frame_equal(df, expected) def test_set_frame_expand_extension_with_regular(self, data): df = pd.DataFrame({'A': data}) df['B'] = [1] * len(data) expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) - tm.assert_frame_equal(df, expected) + self.assert_frame_equal(df, expected) From 92299cdb8944e6d810b04504cad7e28f3fb322d3 Mon Sep 17 00:00:00 2001 From: Tommy <10076072+tommyod@users.noreply.github.com> Date: Sat, 24 Feb 2018 17:31:12 +0100 Subject: [PATCH 192/217] DOC: Updated links to 2 tutorials in tutorials.rst (#19857) --- doc/source/tutorials.rst | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index db9385519bff2..0398e2892cef5 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -26,32 +26,34 @@ repository `_. To run the examples in th clone the GitHub repository and get IPython Notebook running. See `How to use this cookbook `_. -- `A quick tour of the IPython Notebook: `_ +- `A quick tour of the IPython Notebook: `_ Shows off IPython's awesome tab completion and magic functions. -- `Chapter 1: `_ +- `Chapter 1: `_ Reading your data into pandas is pretty much the easiest thing. Even when the encoding is wrong! -- `Chapter 2: `_ +- `Chapter 2: `_ It's not totally obvious how to select data from a pandas dataframe. Here we explain the basics (how to take slices and get columns) -- `Chapter 3: `_ +- `Chapter 3: `_ Here we get into serious slicing and dicing and learn how to filter dataframes in complicated ways, really fast. -- `Chapter 4: `_ +- `Chapter 4: `_ Groupby/aggregate is seriously my favorite thing about pandas and I use it all the time. You should probably read this. -- `Chapter 5: `_ +- `Chapter 5: `_ Here you get to find out if it's cold in Montreal in the winter (spoiler: yes). Web scraping with pandas is fun! Here we combine dataframes. -- `Chapter 6: `_ +- `Chapter 6: `_ Strings with pandas are great. It has all these vectorized string operations and they're the best. We will turn a bunch of strings containing "Snow" into vectors of numbers in a trice. -- `Chapter 7: `_ +- `Chapter 7: `_ Cleaning up messy data is never a joy, but with pandas it's easier. -- `Chapter 8: `_ +- `Chapter 8: `_ Parsing Unix timestamps is confusing at first but it turns out to be really easy. +- `Chapter 9: `_ + Reading data from SQL databases. Lessons for new pandas users From 0057ee29f0150d4b6306d1f0863d75eaceddaf78 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 24 Feb 2018 08:38:22 -0800 Subject: [PATCH 193/217] templatize timedelta arith ops (#19871) --- pandas/_libs/tslibs/timedeltas.pyx | 47 +++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c4578a289b020..7aeff9bec75b5 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -471,9 +471,12 @@ def _binary_op_method_timedeltalike(op, name): # define a binary operation that only works if the other argument is # timedelta like or an array of timedeltalike def f(self, other): - if hasattr(other, 'delta') and not PyDelta_Check(other): - # offsets.Tick - return op(self, other.delta) + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return op(self, other.delta) + return NotImplemented elif other is NaT: return NaT @@ -1052,7 +1055,14 @@ class Timedelta(_Timedelta): __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') def __mul__(self, other): - if hasattr(other, 'dtype'): + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset; this op will raise TypeError + return other.delta * self + return NotImplemented + + elif hasattr(other, 'dtype'): # ndarray-like return other * self.to_timedelta64() @@ -1068,7 +1078,18 @@ class Timedelta(_Timedelta): __rmul__ = __mul__ def __truediv__(self, other): - if hasattr(other, 'dtype'): + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return self / other.delta + return NotImplemented + + elif is_timedelta64_object(other): + # convert to Timedelta below + pass + + elif hasattr(other, 'dtype'): return self.to_timedelta64() / other elif is_integer_object(other) or is_float_object(other): @@ -1084,7 +1105,18 @@ class Timedelta(_Timedelta): return self.value / float(other.value) def __rtruediv__(self, other): - if hasattr(other, 'dtype'): + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return other.delta / self + return NotImplemented + + elif is_timedelta64_object(other): + # convert to Timedelta below + pass + + elif hasattr(other, 'dtype'): return other / self.to_timedelta64() elif not _validate_ops_compat(other): @@ -1160,9 +1192,10 @@ class Timedelta(_Timedelta): '{op}'.format(dtype=other.dtype, op='__floordiv__')) - if is_float_object(other) and util._checknull(other): + elif is_float_object(other) and util._checknull(other): # i.e. np.nan return NotImplemented + elif not _validate_ops_compat(other): return NotImplemented From 06088a89da4634bfab45351615df9a57b2d858c5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 24 Feb 2018 12:21:03 -0500 Subject: [PATCH 194/217] COMPAT: fixup decimal extension for indexing compat (#19882) --- pandas/tests/extension/base/getitem.py | 6 ++++-- pandas/tests/extension/decimal/array.py | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 31ed8b9e01225..566ba1721d13c 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -18,7 +18,8 @@ def test_iloc_series(self, data): self.assert_series_equal(result, expected) def test_iloc_frame(self, data): - df = pd.DataFrame({"A": data, 'B': np.arange(len(data))}) + df = pd.DataFrame({"A": data, 'B': + np.arange(len(data), dtype='int64')}) expected = pd.DataFrame({"A": data[:4]}) # slice -> frame @@ -49,7 +50,8 @@ def test_loc_series(self, data): self.assert_series_equal(result, expected) def test_loc_frame(self, data): - df = pd.DataFrame({"A": data, 'B': np.arange(len(data))}) + df = pd.DataFrame({"A": data, + 'B': np.arange(len(data), dtype='int64')}) expected = pd.DataFrame({"A": data[:4]}) # slice -> frame diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index f526ac5996a10..8b2eaadeca99e 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -8,6 +8,7 @@ import pandas as pd from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.common import _ensure_platform_int class DecimalDtype(ExtensionDtype): @@ -68,6 +69,7 @@ def isna(self): def take(self, indexer, allow_fill=True, fill_value=None): mask = indexer == -1 + indexer = _ensure_platform_int(indexer) out = self.values.take(indexer) out[mask] = self._na_value From ffa89a6b31dea277d89a269d138cb6d4279db9f2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 24 Feb 2018 20:44:50 -0500 Subject: [PATCH 195/217] CI: pin jemalloc=4.5.0.poast for 2.7 build per (#19888) https://issues.apache.org/jira/browse/ARROW-2208 --- ci/requirements-2.7.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index e3bd5e46026c5..95169e5dcce57 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 jemalloc=4.5.0.post fastparquet From 08732e0bf0161bb8badae515ac329e76b0c84f95 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sun, 25 Feb 2018 08:05:26 -0800 Subject: [PATCH 196/217] Cythonized GroupBy Fill (#19673) --- asv_bench/benchmarks/groupby.py | 10 +- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/groupby.pyx | 216 +++++++++++++++++++++++++++ pandas/_libs/groupby_helper.pxi.in | 163 -------------------- pandas/core/groupby.py | 102 ++++++++++--- pandas/tests/groupby/test_groupby.py | 55 +++++++ 6 files changed, 362 insertions(+), 185 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 61db39528a5fb..c347442784d41 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -370,11 +370,11 @@ class GroupByMethods(object): param_names = ['dtype', 'method'] params = [['int', 'float'], - ['all', 'any', 'count', 'cumcount', 'cummax', 'cummin', - 'cumprod', 'cumsum', 'describe', 'first', 'head', 'last', 'mad', - 'max', 'min', 'median', 'mean', 'nunique', 'pct_change', 'prod', - 'rank', 'sem', 'shift', 'size', 'skew', 'std', 'sum', 'tail', - 'unique', 'value_counts', 'var']] + ['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin', + 'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head', + 'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique', + 'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew', + 'std', 'sum', 'tail', 'unique', 'value_counts', 'var']] def setup(self, dtype, method): ngroups = 1000 diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fd3c3a5a7a301..fcaf46b1c3d71 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -689,6 +689,7 @@ Performance Improvements - Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) - Improved performance of :func:`DataFrameGroupBy.rank` (:issue:`15779`) - Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) +- Improved performance of ``GroupBy.ffill`` and ``GroupBy.bfill`` (:issue:`11296`) .. _whatsnew_0230.docs: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 866683ce378ab..e3d208a915225 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -94,5 +94,221 @@ cdef inline float64_t kth_smallest_c(float64_t* a, return a[k] +@cython.boundscheck(False) +@cython.wraparound(False) +def group_median_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] labels, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, ngroups, size + ndarray[int64_t] _counts + ndarray data + float64_t* ptr + + assert min_count == -1, "'min_count' only used in add and prod" + + ngroups = len(counts) + N, K = ( values).shape + + indexer, _counts = groupsort_indexer(labels, ngroups) + counts[:] = _counts[1:] + + data = np.empty((K, N), dtype=np.float64) + ptr = data.data + + take_2d_axis1_float64_float64(values.T, indexer, out=data) + + with nogil: + + for i in range(K): + # exclude NA group + ptr += _counts[0] + for j in range(ngroups): + size = _counts[j + 1] + out[j, i] = median_linear(ptr, size) + ptr += size + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cumprod_float64(float64_t[:, :] out, + float64_t[:, :] values, + int64_t[:] labels, + bint is_datetimelike): + """ + Only transforms on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, size + float64_t val + float64_t[:, :] accum + int64_t lab + + N, K = ( values).shape + accum = np.ones_like(values) + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + if val == val: + accum[lab, j] *= val + out[i, j] = accum[lab, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cumsum(numeric[:, :] out, + numeric[:, :] values, + int64_t[:] labels, + is_datetimelike): + """ + Only transforms on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, size + numeric val + numeric[:, :] accum + int64_t lab + + N, K = ( values).shape + accum = np.zeros_like(values) + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + + if numeric == float32_t or numeric == float64_t: + if val == val: + accum[lab, j] += val + out[i, j] = accum[lab, j] + else: + accum[lab, j] += val + out[i, j] = accum[lab, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, + int ngroups, int periods): + cdef: + Py_ssize_t N, i, j, ii + int offset, sign + int64_t lab, idxer, idxer_slot + int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64) + int64_t[:, :] label_indexer + + N, = ( labels).shape + + if periods < 0: + periods = -periods + offset = N - 1 + sign = -1 + elif periods > 0: + offset = 0 + sign = 1 + + if periods == 0: + with nogil: + for i in range(N): + out[i] = i + else: + # array of each previous indexer seen + label_indexer = np.zeros((ngroups, periods), dtype=np.int64) + with nogil: + for i in range(N): + ## reverse iterator if shifting backwards + ii = offset + sign * i + lab = labels[ii] + + # Skip null keys + if lab == -1: + out[ii] = -1 + continue + + label_seen[lab] += 1 + + idxer_slot = label_seen[lab] % periods + idxer = label_indexer[lab, idxer_slot] + + if label_seen[lab] > periods: + out[ii] = idxer + else: + out[ii] = -1 + + label_indexer[lab, idxer_slot] = ii + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, + ndarray[uint8_t] mask, object direction, + int64_t limit): + """Indexes how to fill values forwards or backwards within a group + + Parameters + ---------- + out : array of int64_t values which this method will write its results to + Missing values will be written to with a value of -1 + labels : array containing unique label for each group, with its ordering + matching up to the corresponding record in `values` + mask : array of int64_t values where a 1 indicates a missing value + direction : {'ffill', 'bfill'} + Direction for fill to be applied (forwards or backwards, respectively) + limit : Consecutive values to fill before stopping, or -1 for no limit + + Notes + ----- + This method modifies the `out` parameter rather than returning an object + """ + cdef: + Py_ssize_t i, N + ndarray[int64_t] sorted_labels + int64_t idx, curr_fill_idx=-1, filled_vals=0 + + N = len(out) + + # Make sure all arrays are the same size + assert N == len(labels) == len(mask) + + sorted_labels = np.argsort(labels).astype(np.int64, copy=False) + if direction == 'bfill': + sorted_labels = sorted_labels[::-1] + + with nogil: + for i in range(N): + idx = sorted_labels[i] + if mask[idx] == 1: # is missing + # Stop filling once we've hit the limit + if filled_vals >= limit and limit != -1: + curr_fill_idx = -1 + filled_vals += 1 + else: # reset items when not missing + filled_vals = 0 + curr_fill_idx = idx + + out[idx] = curr_fill_idx + + # If we move to the next group, reset + # the fill_idx and counter + if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]: + curr_fill_idx = -1 + filled_vals = 0 + + # generated from template include "groupby_helper.pxi" diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index e03e3af65755b..de802f4a72277 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -791,166 +791,3 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, out[i, j] = mval {{endfor}} - -#---------------------------------------------------------------------- -# other grouping functions not needing a template -#---------------------------------------------------------------------- - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_median_float64(ndarray[float64_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[float64_t, ndim=2] values, - ndarray[int64_t] labels, - Py_ssize_t min_count=-1): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, ngroups, size - ndarray[int64_t] _counts - ndarray data - float64_t* ptr - - assert min_count == -1, "'min_count' only used in add and prod" - - ngroups = len(counts) - N, K = ( values).shape - - indexer, _counts = groupsort_indexer(labels, ngroups) - counts[:] = _counts[1:] - - data = np.empty((K, N), dtype=np.float64) - ptr = data.data - - take_2d_axis1_float64_float64(values.T, indexer, out=data) - - with nogil: - - for i in range(K): - # exclude NA group - ptr += _counts[0] - for j in range(ngroups): - size = _counts[j + 1] - out[j, i] = median_linear(ptr, size) - ptr += size - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_cumprod_float64(float64_t[:, :] out, - float64_t[:, :] values, - int64_t[:] labels, - bint is_datetimelike): - """ - Only transforms on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, size - float64_t val - float64_t[:, :] accum - int64_t lab - - N, K = ( values).shape - accum = np.ones_like(values) - - with nogil: - for i in range(N): - lab = labels[i] - - if lab < 0: - continue - for j in range(K): - val = values[i, j] - if val == val: - accum[lab, j] *= val - out[i, j] = accum[lab, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_cumsum(numeric[:, :] out, - numeric[:, :] values, - int64_t[:] labels, - is_datetimelike): - """ - Only transforms on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, size - numeric val - numeric[:, :] accum - int64_t lab - - N, K = ( values).shape - accum = np.zeros_like(values) - - with nogil: - for i in range(N): - lab = labels[i] - - if lab < 0: - continue - for j in range(K): - val = values[i, j] - - if numeric == float32_t or numeric == float64_t: - if val == val: - accum[lab, j] += val - out[i, j] = accum[lab, j] - else: - accum[lab, j] += val - out[i, j] = accum[lab, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_shift_indexer(int64_t[:] out, int64_t[:] labels, - int ngroups, int periods): - cdef: - Py_ssize_t N, i, j, ii - int offset, sign - int64_t lab, idxer, idxer_slot - int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64) - int64_t[:, :] label_indexer - - N, = ( labels).shape - - if periods < 0: - periods = -periods - offset = N - 1 - sign = -1 - elif periods > 0: - offset = 0 - sign = 1 - - if periods == 0: - with nogil: - for i in range(N): - out[i] = i - else: - # array of each previous indexer seen - label_indexer = np.zeros((ngroups, periods), dtype=np.int64) - with nogil: - for i in range(N): - ## reverse iterator if shifting backwards - ii = offset + sign * i - lab = labels[ii] - - # Skip null keys - if lab == -1: - out[ii] = -1 - continue - - label_seen[lab] += 1 - - idxer_slot = label_seen[lab] % periods - idxer = label_indexer[lab, idxer_slot] - - if label_seen[lab] > periods: - out[ii] = idxer - else: - out[ii] = -1 - - label_indexer[lab, idxer_slot] = ii diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index b1615f720368d..852ad04cd8a2e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,5 +1,5 @@ import types -from functools import wraps +from functools import wraps, partial import numpy as np import datetime import collections @@ -38,7 +38,7 @@ _ensure_float) from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.generic import ABCSeries -from pandas.core.dtypes.missing import isna, notna, _maybe_fill +from pandas.core.dtypes.missing import isna, isnull, notna, _maybe_fill from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) @@ -1457,6 +1457,36 @@ def expanding(self, *args, **kwargs): from pandas.core.window import ExpandingGroupby return ExpandingGroupby(self, *args, **kwargs) + def _fill(self, direction, limit=None): + """Shared function for `pad` and `backfill` to call Cython method + + Parameters + ---------- + direction : {'ffill', 'bfill'} + Direction passed to underlying Cython function. `bfill` will cause + values to be filled backwards. `ffill` and any other values will + default to a forward fill + limit : int, default None + Maximum number of consecutive values to fill. If `None`, this + method will convert to -1 prior to passing to Cython + + Returns + ------- + `Series` or `DataFrame` with filled values + + See Also + -------- + pad + backfill + """ + # Need int value for Cython + if limit is None: + limit = -1 + + return self._get_cythonized_result('group_fillna_indexer', + self.grouper, needs_mask=True, + direction=direction, limit=limit) + @Substitution(name='groupby') def pad(self, limit=None): """ @@ -1474,7 +1504,7 @@ def pad(self, limit=None): Series.fillna DataFrame.fillna """ - return self.apply(lambda x: x.ffill(limit=limit)) + return self._fill('ffill', limit=limit) ffill = pad @Substitution(name='groupby') @@ -1494,7 +1524,7 @@ def backfill(self, limit=None): Series.fillna DataFrame.fillna """ - return self.apply(lambda x: x.bfill(limit=limit)) + return self._fill('bfill', limit=limit) bfill = backfill @Substitution(name='groupby') @@ -1843,6 +1873,45 @@ def cummax(self, axis=0, **kwargs): return self._cython_transform('cummax', numeric_only=False) + def _get_cythonized_result(self, how, grouper, needs_mask=False, + needs_ngroups=False, **kwargs): + """Get result for Cythonized functions + + Parameters + ---------- + how : str, Cythonized function name to be called + grouper : Grouper object containing pertinent group info + needs_mask : bool, default False + Whether boolean mask needs to be part of the Cython call signature + needs_ngroups : bool, default False + Whether number of groups part of the Cython call signature + **kwargs : dict + Extra arguments to be passed back to Cython funcs + + Returns + ------- + `Series` or `DataFrame` with filled values + """ + + labels, _, ngroups = grouper.group_info + output = collections.OrderedDict() + base_func = getattr(libgroupby, how) + + for name, obj in self._iterate_slices(): + indexer = np.zeros_like(labels, dtype=np.int64) + func = partial(base_func, indexer, labels) + if needs_mask: + mask = isnull(obj.values).view(np.uint8) + func = partial(func, mask) + + if needs_ngroups: + func = partial(func, ngroups) + + func(**kwargs) # Call func to modify indexer values in place + output[name] = algorithms.take_nd(obj.values, indexer) + + return self._wrap_transformed_output(output) + @Substitution(name='groupby') @Appender(_doc_template) def shift(self, periods=1, freq=None, axis=0): @@ -1860,17 +1929,9 @@ def shift(self, periods=1, freq=None, axis=0): if freq is not None or axis != 0: return self.apply(lambda x: x.shift(periods, freq, axis)) - labels, _, ngroups = self.grouper.group_info - - # filled in by Cython - indexer = np.zeros_like(labels) - libgroupby.group_shift_indexer(indexer, labels, ngroups, periods) - - output = {} - for name, obj in self._iterate_slices(): - output[name] = algorithms.take_nd(obj.values, indexer) - - return self._wrap_transformed_output(output) + return self._get_cythonized_result('group_shift_indexer', + self.grouper, needs_ngroups=True, + periods=periods) @Substitution(name='groupby') @Appender(_doc_template) @@ -3577,7 +3638,6 @@ def describe(self, **kwargs): def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): - from functools import partial from pandas.core.reshape.tile import cut from pandas.core.reshape.merge import _get_join_indexers @@ -4585,9 +4645,17 @@ def _apply_to_column_groupbys(self, func): in self._iterate_column_groupbys()), keys=self._selected_obj.columns, axis=1) + def _fill(self, direction, limit=None): + """Overriden method to join grouped columns in output""" + res = super(DataFrameGroupBy, self)._fill(direction, limit=limit) + output = collections.OrderedDict( + (grp.name, grp.grouper) for grp in self.grouper.groupings) + + from pandas import concat + return concat((self._wrap_transformed_output(output), res), axis=1) + def count(self): """ Compute count of group, excluding missing values """ - from functools import partial from pandas.core.dtypes.missing import _isna_ndarraylike as isna data, _ = self._get_data_to_aggregate() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 129ac6b06205c..2429e9975fc8e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2061,6 +2061,61 @@ def test_rank_object_raises(self, ties_method, ascending, na_option, ascending=ascending, na_option=na_option, pct=pct) + @pytest.mark.parametrize("mix_groupings", [True, False]) + @pytest.mark.parametrize("as_series", [True, False]) + @pytest.mark.parametrize("val1,val2", [ + ('foo', 'bar'), (1, 2), (1., 2.)]) + @pytest.mark.parametrize("fill_method,limit,exp_vals", [ + ("ffill", None, + [np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']), + ("ffill", 1, + [np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]), + ("bfill", None, + ['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]), + ("bfill", 1, + [np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan]) + ]) + def test_group_fill_methods(self, mix_groupings, as_series, val1, val2, + fill_method, limit, exp_vals): + vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan] + _exp_vals = list(exp_vals) + # Overwrite placeholder values + for index, exp_val in enumerate(_exp_vals): + if exp_val == 'val1': + _exp_vals[index] = val1 + elif exp_val == 'val2': + _exp_vals[index] = val2 + + # Need to modify values and expectations depending on the + # Series / DataFrame that we ultimately want to generate + if mix_groupings: # ['a', 'b', 'a, 'b', ...] + keys = ['a', 'b'] * len(vals) + + def interweave(list_obj): + temp = list() + for x in list_obj: + temp.extend([x, x]) + + return temp + + _exp_vals = interweave(_exp_vals) + vals = interweave(vals) + else: # ['a', 'a', 'a', ... 'b', 'b', 'b'] + keys = ['a'] * len(vals) + ['b'] * len(vals) + _exp_vals = _exp_vals * 2 + vals = vals * 2 + + df = DataFrame({'key': keys, 'val': vals}) + if as_series: + result = getattr( + df.groupby('key')['val'], fill_method)(limit=limit) + exp = Series(_exp_vals, name='val') + assert_series_equal(result, exp) + else: + result = getattr(df.groupby('key'), fill_method)(limit=limit) + exp = DataFrame({'key': keys, 'val': _exp_vals}) + assert_frame_equal(result, exp) + def test_dont_clobber_name_column(self): df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'], 'name': ['foo', 'bar', 'baz'] * 2}) From e5981d12d05ecd2cd0ec52db8c65562fa2701f8d Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sun, 25 Feb 2018 08:06:59 -0800 Subject: [PATCH 197/217] Fixed pct_change with 'fill_method' returning NaN instead of 0 (#19875) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/generic.py | 2 +- pandas/tests/frame/test_analytics.py | 3 --- pandas/tests/frame/test_timeseries.py | 2 +- pandas/tests/generic/test_generic.py | 20 ++++++++++++++++++++ pandas/tests/generic/test_panel.py | 2 +- pandas/tests/series/test_timeseries.py | 2 +- 7 files changed, 25 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fcaf46b1c3d71..ba24c93121dcb 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -808,6 +808,7 @@ Numeric - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) - Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) - Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) +- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (provided) (:issue:`19873`) Indexing diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85e2ce475ffa2..e1ed6ae9c8a6c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7488,7 +7488,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, **kwargs)) - 1) rs = rs.reindex_like(data) if freq is None: - mask = isna(com._values_from_object(self)) + mask = isna(com._values_from_object(data)) np.putmask(rs.values, mask, np.nan) return rs diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f2b8387072c8d..de4a132e0d613 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1941,12 +1941,9 @@ def test_pct_change(self): pnl.iat[1, 1] = np.nan pnl.iat[2, 3] = 60 - mask = pnl.isnull() - for axis in range(2): expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( axis=axis) - 1 - expected[mask] = np.nan result = pnl.pct_change(axis=axis, fill_method='pad') tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 25dd285e883a0..9f94439a71a57 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -118,7 +118,7 @@ def test_pct_change_shift_over_nas(self): df = DataFrame({'a': s, 'b': s}) chg = df.pct_change() - expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) + expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2]) edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 3868bdf7d4620..311c71f734945 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -592,6 +592,26 @@ def test_copy_and_deepcopy(self): assert obj_copy is not obj self._compare(obj_copy, obj) + @pytest.mark.parametrize("periods,fill_method,limit,exp", [ + (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), + (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), + (1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]), + (1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]), + (-1, "ffill", None, [np.nan, np.nan, -.5, -.5, -.6, 0, 0, np.nan]), + (-1, "ffill", 1, [np.nan, np.nan, -.5, -.5, -.6, 0, np.nan, np.nan]), + (-1, "bfill", None, [0, 0, -.5, -.5, -.6, np.nan, np.nan, np.nan]), + (-1, "bfill", 1, [np.nan, 0, -.5, -.5, -.6, np.nan, np.nan, np.nan]) + ]) + def test_pct_change(self, periods, fill_method, limit, exp): + vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] + obj = self._typ(vals) + func = getattr(obj, 'pct_change') + res = func(periods=periods, fill_method=fill_method, limit=limit) + if type(obj) is DataFrame: + tm.assert_frame_equal(res, DataFrame(exp)) + else: + tm.assert_series_equal(res, Series(exp)) + class TestNDFrame(object): # tests that don't fit elsewhere diff --git a/pandas/tests/generic/test_panel.py b/pandas/tests/generic/test_panel.py index 4cbd5cb2aa69f..49cb773a1bd10 100644 --- a/pandas/tests/generic/test_panel.py +++ b/pandas/tests/generic/test_panel.py @@ -45,7 +45,7 @@ def test_to_xarray(self): 'test_stat_non_defaults_args', 'test_truncate_out_of_bounds', 'test_metadata_propagation', 'test_copy_and_deepcopy', - 'test_sample']: + 'test_pct_change', 'test_sample']: def f(): def tester(self): diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 7a1aff1cc223c..63a05ef7de565 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -352,7 +352,7 @@ def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) chg = s.pct_change() - expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) + expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2]) assert_series_equal(chg, expected) def test_pct_change_periods_freq(self): From a00f41a6f20a1ddcfdcebf49c1a352f0963869ad Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 25 Feb 2018 08:09:35 -0800 Subject: [PATCH 198/217] Use pandas_datetimestruct instead of date_info (#19874) --- pandas/_libs/src/period_helper.c | 159 ++++-------- pandas/_libs/src/period_helper.h | 5 +- pandas/_libs/tslibs/period.pyx | 240 ++++++++++-------- .../{test_period_asfreq.py => test_asfreq.py} | 0 4 files changed, 178 insertions(+), 226 deletions(-) rename pandas/tests/scalar/period/{test_period_asfreq.py => test_asfreq.py} (100%) diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index e3d250aa44f17..19a7282f38049 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -89,14 +89,12 @@ static npy_int64 daytime_conversion_factor_matrix[7][7] = { {0, 0, 0, 0, 0, 1, 1000}, {0, 0, 0, 0, 0, 0, 1}}; -PANDAS_INLINE int max_value(int a, int b) { return a > b ? a : b; } +int max_value(int a, int b) { return a > b ? a : b; } PANDAS_INLINE int min_value(int a, int b) { return a < b ? a : b; } PANDAS_INLINE int get_freq_group(int freq) { return (freq / 1000) * 1000; } -PANDAS_INLINE int get_freq_group_index(int freq) { return freq / 1000; } - npy_int64 get_daytime_conversion_factor(int from_index, int to_index) { int row = min_value(from_index, to_index); @@ -227,16 +225,6 @@ static npy_int64 asfreq_DTtoB(npy_int64 ordinal, asfreq_info *af_info) { return DtoB(&dinfo, roll_back, ordinal); } -// all intra day calculations are now done within one function -static npy_int64 asfreq_DownsampleWithinDay(npy_int64 ordinal, - asfreq_info *af_info) { - return downsample_daytime(ordinal, af_info); -} - -static npy_int64 asfreq_UpsampleWithinDay(npy_int64 ordinal, - asfreq_info *af_info) { - return upsample_daytime(ordinal, af_info); -} //************ FROM BUSINESS *************** static npy_int64 asfreq_BtoDT(npy_int64 ordinal, asfreq_info *af_info) { @@ -288,26 +276,26 @@ static npy_int64 asfreq_WtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_WtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; npy_int64 unix_date = asfreq_WtoDT(ordinal, af_info); + int roll_back = af_info->is_end; dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, unix_date); } //************ FROM MONTHLY *************** -static void MtoD_ym(npy_int64 ordinal, int *y, int *m) { - *y = floordiv(ordinal, 12) + 1970; - *m = mod_compat(ordinal, 12) + 1; +static void MtoD_ym(npy_int64 ordinal, int *year, int *month) { + *year = floordiv(ordinal, 12) + 1970; + *month = mod_compat(ordinal, 12) + 1; } static npy_int64 asfreq_MtoDT(npy_int64 ordinal, asfreq_info *af_info) { npy_int64 unix_date; - int y, m; + int year, month; ordinal += af_info->is_end; - MtoD_ym(ordinal, &y, &m); - unix_date = unix_date_from_ymd(y, m, 1); + MtoD_ym(ordinal, &year, &month); + unix_date = unix_date_from_ymd(year, month, 1); unix_date -= af_info->is_end; return upsample_daytime(unix_date, af_info); } @@ -327,38 +315,37 @@ static npy_int64 asfreq_MtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_MtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; npy_int64 unix_date = asfreq_MtoDT(ordinal, af_info); - int roll_back = af_info->is_end; + int roll_back = af_info->is_end; dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, unix_date); } //************ FROM QUARTERLY *************** -static void QtoD_ym(npy_int64 ordinal, int *y, int *m, asfreq_info *af_info) { - *y = floordiv(ordinal, 4) + 1970; - *m = mod_compat(ordinal, 4) * 3 + 1; +static void QtoD_ym(npy_int64 ordinal, int *year, int *month, + asfreq_info *af_info) { + *year = floordiv(ordinal, 4) + 1970; + *month = mod_compat(ordinal, 4) * 3 + 1; if (af_info->from_q_year_end != 12) { - *m += af_info->from_q_year_end; - if (*m > 12) { - *m -= 12; + *month += af_info->from_q_year_end; + if (*month > 12) { + *month -= 12; } else { - *y -= 1; + *year -= 1; } } } static npy_int64 asfreq_QtoDT(npy_int64 ordinal, asfreq_info *af_info) { npy_int64 unix_date; - int y, m; + int year, month; ordinal += af_info->is_end; - QtoD_ym(ordinal, &y, &m, af_info); - - unix_date = unix_date_from_ymd(y, m, 1); + QtoD_ym(ordinal, &year, &month, af_info); + unix_date = unix_date_from_ymd(year, month, 1); unix_date -= af_info->is_end; return upsample_daytime(unix_date, af_info); } @@ -382,29 +369,39 @@ static npy_int64 asfreq_QtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_QtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; npy_int64 unix_date = asfreq_QtoDT(ordinal, af_info); - int roll_back = af_info->is_end; + int roll_back = af_info->is_end; dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, unix_date); } //************ FROM ANNUAL *************** -static npy_int64 asfreq_AtoDT(npy_int64 ordinal, asfreq_info *af_info) { - npy_int64 unix_date; +static void AtoD_ym(npy_int64 ordinal, int *year, int *month, + asfreq_info *af_info) { + *year = ordinal + 1970; + *month = 1; - // start from 1970 - npy_int64 year = ordinal + 1970; - - int month = (af_info->from_a_year_end % 12) + 1; if (af_info->from_a_year_end != 12) { - year -= 1; + *month += af_info->from_a_year_end; + if (*month > 12) { + // This case is never reached, but is kept for symmetry + // with QtoD_ym + *month -= 12; + } else { + *year -= 1; + } } +} - year += af_info->is_end; - unix_date = unix_date_from_ymd(year, month, 1); +static npy_int64 asfreq_AtoDT(npy_int64 ordinal, asfreq_info *af_info) { + npy_int64 unix_date; + int year, month; + + ordinal += af_info->is_end; + AtoD_ym(ordinal, &year, &month, af_info); + unix_date = unix_date_from_ymd(year, month, 1); unix_date -= af_info->is_end; return upsample_daytime(unix_date, af_info); } @@ -428,9 +425,9 @@ static npy_int64 asfreq_AtoW(npy_int64 ordinal, asfreq_info *af_info) { static npy_int64 asfreq_AtoB(npy_int64 ordinal, asfreq_info *af_info) { struct date_info dinfo; npy_int64 unix_date = asfreq_AtoDT(ordinal, af_info); + int roll_back = af_info->is_end; dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, unix_date); } @@ -443,57 +440,6 @@ static npy_int64 no_op(npy_int64 ordinal, asfreq_info *af_info) { // end of frequency specific conversion routines -static int calc_a_year_end(int freq, int group) { - int result = (freq - group) % 12; - if (result == 0) { - return 12; - } else { - return result; - } -} - -static int calc_week_end(int freq, int group) { return freq - group; } - -void get_asfreq_info(int fromFreq, int toFreq, char relation, - asfreq_info *af_info) { - int fromGroup = get_freq_group(fromFreq); - int toGroup = get_freq_group(toFreq); - - if (relation == 'E') { - af_info->is_end = 1; - } else { - af_info->is_end = 0; - } - - af_info->intraday_conversion_factor = get_daytime_conversion_factor( - get_freq_group_index(max_value(fromGroup, FR_DAY)), - get_freq_group_index(max_value(toGroup, FR_DAY))); - - switch (fromGroup) { - case FR_WK: - af_info->from_week_end = calc_week_end(fromFreq, fromGroup); - break; - case FR_ANN: - af_info->from_a_year_end = calc_a_year_end(fromFreq, fromGroup); - break; - case FR_QTR: - af_info->from_q_year_end = calc_a_year_end(fromFreq, fromGroup); - break; - } - - switch (toGroup) { - case FR_WK: - af_info->to_week_end = calc_week_end(toFreq, toGroup); - break; - case FR_ANN: - af_info->to_a_year_end = calc_a_year_end(toFreq, toGroup); - break; - case FR_QTR: - af_info->to_q_year_end = calc_a_year_end(toFreq, toGroup); - break; - } -} - freq_conv_func get_asfreq_func(int fromFreq, int toFreq) { int fromGroup = get_freq_group(fromFreq); int toGroup = get_freq_group(toFreq); @@ -650,9 +596,9 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq) { case FR_US: case FR_NS: if (fromGroup > toGroup) { - return &asfreq_DownsampleWithinDay; + return &downsample_daytime; } else { - return &asfreq_UpsampleWithinDay; + return &upsample_daytime; } default: return &nofunc; @@ -662,20 +608,3 @@ freq_conv_func get_asfreq_func(int fromFreq, int toFreq) { return &nofunc; } } - -/* ------------------------------------------------------------------ - * New pandas API-helper code, to expose to cython - * ------------------------------------------------------------------*/ - -npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, - char relation) { - npy_int64 val; - freq_conv_func func; - asfreq_info finfo; - - func = get_asfreq_func(freq1, freq2); - - get_asfreq_info(freq1, freq2, relation, &finfo); - val = (*func)(period_ordinal, &finfo); - return val; -} diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index 7163dc960d152..c6313924adddd 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -108,12 +108,9 @@ typedef npy_int64 (*freq_conv_func)(npy_int64, asfreq_info *af_info); * new pandas API helper functions here */ -npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, char relation); - freq_conv_func get_asfreq_func(int fromFreq, int toFreq); -void get_asfreq_info(int fromFreq, int toFreq, char relation, - asfreq_info *af_info); npy_int64 get_daytime_conversion_factor(int from_index, int to_index); +int max_value(int a, int b); #endif // PANDAS__LIBS_SRC_PERIOD_HELPER_H_ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f1a193706144f..9cf7e39791f2b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -75,15 +75,8 @@ cdef extern from "period_helper.h": int FR_BUS int FR_UND - ctypedef struct date_info: - double second - int minute - int hour - int day - int month - int year - ctypedef struct asfreq_info: + int64_t intraday_conversion_factor int is_end int from_week_end @@ -97,24 +90,21 @@ cdef extern from "period_helper.h": ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil - int64_t asfreq(int64_t dtordinal, int freq1, int freq2, - char relation) except INT32_MIN freq_conv_func get_asfreq_func(int fromFreq, int toFreq) nogil - void get_asfreq_info(int fromFreq, int toFreq, char relation, - asfreq_info *af_info) nogil int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil + int max_value(int left, int right) nogil @cython.cdivision -cdef char* c_strftime(date_info *dinfo, char *fmt): +cdef char* c_strftime(pandas_datetimestruct *dts, char *fmt): """ Generate a nice string representation of the period object, originally from DateObject_strftime Parameters ---------- - dinfo : date_info* + dts : pandas_datetimestruct* fmt : char* Returns @@ -126,14 +116,14 @@ cdef char* c_strftime(date_info *dinfo, char *fmt): char *result int result_len = strlen(fmt) + 50 - c_date.tm_sec = dinfo.second - c_date.tm_min = dinfo.minute - c_date.tm_hour = dinfo.hour - c_date.tm_mday = dinfo.day - c_date.tm_mon = dinfo.month - 1 - c_date.tm_year = dinfo.year - 1900 - c_date.tm_wday = (dayofweek(dinfo.year, dinfo.month, dinfo.day) + 1) % 7 - c_date.tm_yday = get_day_of_year(dinfo.year, dinfo.month, dinfo.day) - 1 + c_date.tm_sec = dts.sec + c_date.tm_min = dts.min + c_date.tm_hour = dts.hour + c_date.tm_mday = dts.day + c_date.tm_mon = dts.month - 1 + c_date.tm_year = dts.year - 1900 + c_date.tm_wday = (dayofweek(dts.year, dts.month, dts.day) + 1) % 7 + c_date.tm_yday = get_day_of_year(dts.year, dts.month, dts.day) - 1 c_date.tm_isdst = -1 result = malloc(result_len * sizeof(char)) @@ -150,6 +140,10 @@ cdef inline int get_freq_group(int freq) nogil: return (freq // 1000) * 1000 +cdef inline int get_freq_group_index(int freq) nogil: + return freq // 1000 + + # specifically _dont_ use cdvision or else ordinals near -1 are assigned to # incorrect dates GH#19643 @cython.cdivision(False) @@ -261,7 +255,8 @@ cdef int64_t get_period_ordinal(int year, int month, int day, # raise ValueError -cdef void get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil: +cdef void get_date_info(int64_t ordinal, int freq, + pandas_datetimestruct *dts) nogil: cdef: int64_t unix_date double abstime @@ -277,7 +272,7 @@ cdef void get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil: abstime -= 86400 unix_date += 1 - date_info_from_days_and_time(dinfo, unix_date, abstime) + date_info_from_days_and_time(dts, unix_date, abstime) cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil: @@ -304,12 +299,12 @@ cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil: return period_ordinal toDaily = get_asfreq_func(freq, FR_DAY) - get_asfreq_info(freq, FR_DAY, 'E', &af_info) + get_asfreq_info(freq, FR_DAY, True, &af_info) return toDaily(period_ordinal, &af_info) @cython.cdivision -cdef void date_info_from_days_and_time(date_info *dinfo, +cdef void date_info_from_days_and_time(pandas_datetimestruct *dts, int64_t unix_date, double abstime) nogil: """ @@ -317,7 +312,7 @@ cdef void date_info_from_days_and_time(date_info *dinfo, Parameters ---------- - dinfo : date_info* + dts : pandas_datetimestruct* unix_date : int64_t days elapsed since datetime(1970, 1, 1) abstime : double @@ -325,23 +320,19 @@ cdef void date_info_from_days_and_time(date_info *dinfo, Notes ----- - Updates dinfo inplace + Updates dts inplace """ cdef: - pandas_datetimestruct dts int inttime int hour, minute - double second + double second, subsecond_fraction # Bounds check # The calling function is responsible for ensuring that # abstime >= 0.0 and abstime <= 86400 # Calculate the date - pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts) - dinfo.year = dts.year - dinfo.month = dts.month - dinfo.day = dts.day + pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, dts) # Calculate the time inttime = abstime @@ -349,9 +340,13 @@ cdef void date_info_from_days_and_time(date_info *dinfo, minute = (inttime % 3600) / 60 second = abstime - (hour * 3600 + minute * 60) - dinfo.hour = hour - dinfo.minute = minute - dinfo.second = second + dts.hour = hour + dts.min = minute + dts.sec = second + + subsecond_fraction = second - dts.sec + dts.us = int((subsecond_fraction) * 1e6) + dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6) @cython.cdivision @@ -439,7 +434,7 @@ cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): else: qtr_freq = FR_QTR - get_asfreq_info(FR_DAY, qtr_freq, 'E', &af_info) + get_asfreq_info(FR_DAY, qtr_freq, True, &af_info) quarter[0] = DtoQ_yq(unix_date, &af_info, year) return qtr_freq @@ -447,20 +442,20 @@ cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): cdef int DtoQ_yq(int64_t unix_date, asfreq_info *af_info, int *year): cdef: - date_info dinfo + pandas_datetimestruct dts int quarter - date_info_from_days_and_time(&dinfo, unix_date, 0) + date_info_from_days_and_time(&dts, unix_date, 0) if af_info.to_q_year_end != 12: - dinfo.month -= af_info.to_q_year_end - if dinfo.month <= 0: - dinfo.month += 12 + dts.month -= af_info.to_q_year_end + if dts.month <= 0: + dts.month += 12 else: - dinfo.year += 1 + dts.year += 1 - year[0] = dinfo.year - quarter = month_to_quarter(dinfo.month) + year[0] = dts.year + quarter = month_to_quarter(dts.month) return quarter @@ -528,10 +523,6 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): return out -cdef char START = 'S' -cdef char END = 'E' - - cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): """ Convert period ordinal from one frequency to another, and if upsampling, @@ -539,14 +530,15 @@ cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): """ cdef: int64_t retval + freq_conv_func func + asfreq_info af_info if ordinal == iNaT: return iNaT - if end: - retval = asfreq(ordinal, freq1, freq2, END) - else: - retval = asfreq(ordinal, freq1, freq2, START) + func = get_asfreq_func(freq1, freq2) + get_asfreq_info(freq1, freq2, end, &af_info) + retval = func(ordinal, &af_info) if retval == INT32_MIN: raise ValueError('Frequency conversion failed') @@ -554,6 +546,58 @@ cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): return retval +cdef void get_asfreq_info(int from_freq, int to_freq, + bint is_end, asfreq_info *af_info) nogil: + """ + Construct the `asfreq_info` object used to convert an ordinal from + `from_freq` to `to_freq`. + + Parameters + ---------- + from_freq : int + to_freq int + is_end : bool + af_info : *asfreq_info + """ + cdef: + int from_group = get_freq_group(from_freq) + int to_group = get_freq_group(to_freq) + + af_info.is_end = is_end + + af_info.intraday_conversion_factor = get_daytime_conversion_factor( + get_freq_group_index(max_value(from_group, FR_DAY)), + get_freq_group_index(max_value(to_group, FR_DAY))) + + if from_group == FR_WK: + af_info.from_week_end = calc_week_end(from_freq, from_group) + elif from_group == FR_ANN: + af_info.from_a_year_end = calc_a_year_end(from_freq, from_group) + elif from_group == FR_QTR: + af_info.from_q_year_end = calc_a_year_end(from_freq, from_group) + + if to_group == FR_WK: + af_info.to_week_end = calc_week_end(to_freq, to_group) + elif to_group == FR_ANN: + af_info.to_a_year_end = calc_a_year_end(to_freq, to_group) + elif to_group == FR_QTR: + af_info.to_q_year_end = calc_a_year_end(to_freq, to_group) + + +@cython.cdivision +cdef int calc_a_year_end(int freq, int group) nogil: + cdef: + int result = (freq - group) % 12 + if result == 0: + return 12 + else: + return result + + +cdef inline int calc_week_end(int freq, int group) nogil: + return freq - group + + def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): """ Convert int64-array of period ordinals from one frequency to another, and @@ -565,18 +609,12 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): freq_conv_func func asfreq_info af_info int64_t val - char relation n = len(arr) result = np.empty(n, dtype=np.int64) - if end: - relation = END - else: - relation = START - func = get_asfreq_func(freq1, freq2) - get_asfreq_info(freq1, freq2, relation, &af_info) + get_asfreq_info(freq1, freq2, end, &af_info) mask = arr == iNaT if mask.any(): # NaT process @@ -605,24 +643,12 @@ def period_ordinal(int y, int m, int d, int h, int min, cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) nogil: cdef: pandas_datetimestruct dts - date_info dinfo float subsecond_fraction if ordinal == NPY_NAT: return NPY_NAT - get_date_info(ordinal, freq, &dinfo) - - dts.year = dinfo.year - dts.month = dinfo.month - dts.day = dinfo.day - dts.hour = dinfo.hour - dts.min = dinfo.minute - dts.sec = int(dinfo.second) - subsecond_fraction = dinfo.second - dts.sec - dts.us = int((subsecond_fraction) * 1e6) - dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6) - + get_date_info(ordinal, freq, &dts) return dtstruct_to_dt64(&dts) @@ -680,7 +706,7 @@ cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", cdef object _period_strftime(int64_t value, int freq, object fmt): cdef: Py_ssize_t i - date_info dinfo + pandas_datetimestruct dts char *formatted object pat, repl, result list found_pat = [False] * len(extra_fmts) @@ -689,7 +715,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): if PyUnicode_Check(fmt): fmt = fmt.encode('utf-8') - get_date_info(value, freq, &dinfo) + get_date_info(value, freq, &dts) for i in range(len(extra_fmts)): pat = extra_fmts[i][0] repl = extra_fmts[i][1] @@ -697,7 +723,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): fmt = fmt.replace(pat, repl) found_pat[i] = True - formatted = c_strftime(&dinfo, fmt) + formatted = c_strftime(&dts, fmt) result = util.char_to_string(formatted) free(formatted) @@ -736,9 +762,9 @@ ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN cdef int pyear(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return dinfo.year + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.year @cython.cdivision @@ -762,65 +788,65 @@ cdef int pquarter(int64_t ordinal, int freq): cdef int pmonth(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return dinfo.month + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.month cdef int pday(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return dinfo.day + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.day cdef int pweekday(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return dayofweek(dinfo.year, dinfo.month, dinfo.day) + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dayofweek(dts.year, dts.month, dts.day) cdef int pday_of_year(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return get_day_of_year(dinfo.year, dinfo.month, dinfo.day) + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return get_day_of_year(dts.year, dts.month, dts.day) cdef int pweek(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return ccalendar.get_week_of_year(dinfo.year, dinfo.month, dinfo.day) + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return ccalendar.get_week_of_year(dts.year, dts.month, dts.day) cdef int phour(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return dinfo.hour + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.hour cdef int pminute(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return dinfo.minute + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.min cdef int psecond(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return dinfo.second + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.sec cdef int pdays_in_month(int64_t ordinal, int freq): cdef: - date_info dinfo - get_date_info(ordinal, freq, &dinfo) - return ccalendar.get_days_in_month(dinfo.year, dinfo.month) + pandas_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return ccalendar.get_days_in_month(dts.year, dts.month) def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): diff --git a/pandas/tests/scalar/period/test_period_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py similarity index 100% rename from pandas/tests/scalar/period/test_period_asfreq.py rename to pandas/tests/scalar/period/test_asfreq.py From 86dfeae5dfe27fc5d24967c06f070676ad95c89d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 25 Feb 2018 08:11:41 -0800 Subject: [PATCH 199/217] Fix+test DTI/TDI/PI add/sub with ndarray[datetime64/timedelta64] (#19847) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexes/datetimelike.py | 21 ++++--- pandas/core/indexes/timedeltas.py | 4 ++ .../indexes/datetimes/test_arithmetic.py | 56 ++++++++++++++++++ .../tests/indexes/period/test_arithmetic.py | 58 +++++++++++++++++++ .../indexes/timedeltas/test_arithmetic.py | 49 ++++++++++++++++ 6 files changed, 182 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ba24c93121dcb..b7dfdf9cfea1e 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -756,6 +756,7 @@ Datetimelike - Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) - Timedelta diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a68d883f04380..9411428b2e68d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -31,6 +31,7 @@ is_integer_dtype, is_object_dtype, is_string_dtype, + is_datetime64_dtype, is_period_dtype, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( @@ -676,9 +677,7 @@ def _add_datetimelike_methods(cls): """ def __add__(self, other): - from pandas.core.index import Index - from pandas.core.indexes.timedeltas import TimedeltaIndex - from pandas.tseries.offsets import DateOffset + from pandas import Index, DatetimeIndex, TimedeltaIndex, DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): @@ -710,6 +709,9 @@ def __add__(self, other): .format(typ=type(other))) elif isinstance(other, Index): result = self._add_datelike(other) + elif is_datetime64_dtype(other): + # ndarray[datetime64]; note DatetimeIndex is caught above + return self + DatetimeIndex(other) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") @@ -729,10 +731,7 @@ def __radd__(self, other): cls.__radd__ = __radd__ def __sub__(self, other): - from pandas.core.index import Index - from pandas.core.indexes.datetimes import DatetimeIndex - from pandas.core.indexes.timedeltas import TimedeltaIndex - from pandas.tseries.offsets import DateOffset + from pandas import Index, DatetimeIndex, TimedeltaIndex, DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): @@ -764,6 +763,9 @@ def __sub__(self, other): .format(typ=type(other).__name__)) elif isinstance(other, DatetimeIndex): result = self._sub_datelike(other) + elif is_datetime64_dtype(other): + # ndarray[datetime64]; note we caught DatetimeIndex earlier + return self - DatetimeIndex(other) elif isinstance(other, Index): raise TypeError("cannot subtract {typ1} and {typ2}" .format(typ1=type(self).__name__, @@ -782,6 +784,11 @@ def __sub__(self, other): cls.__sub__ = __sub__ def __rsub__(self, other): + if is_datetime64_dtype(other) and is_timedelta64_dtype(self): + # ndarray[datetime64] cannot be subtracted from self, so + # we need to wrap in DatetimeIndex and flip the operation + from pandas import DatetimeIndex + return DatetimeIndex(other) - self return -(self - other) cls.__rsub__ = __rsub__ diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 219adfdb66c82..6f80962eab079 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -377,6 +377,10 @@ def _add_delta(self, delta): new_values = self._add_delta_td(delta) elif isinstance(delta, TimedeltaIndex): new_values = self._add_delta_tdi(delta) + elif is_timedelta64_dtype(delta): + # ndarray[timedelta64] --> wrap in TimedeltaIndex + delta = TimedeltaIndex(delta) + new_values = self._add_delta_tdi(delta) else: raise TypeError("cannot add the type {0} to a TimedeltaIndex" .format(type(delta))) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 7900c983b6c77..5a7ea44f3698c 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -571,6 +571,62 @@ def test_add_datetimelike_and_dti_tz(self, addend): with tm.assert_raises_regex(TypeError, msg): addend + dti_tz + # ------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_dti_add_dt64_array_raises(self, tz): + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + dtarr = dti.values + + with pytest.raises(TypeError): + dti + dtarr + with pytest.raises(TypeError): + dtarr + dti + + def test_dti_sub_dt64_array_naive(self): + dti = pd.date_range('2016-01-01', periods=3, tz=None) + dtarr = dti.values + + expected = dti - dti + result = dti - dtarr + tm.assert_index_equal(result, expected) + result = dtarr - dti + tm.assert_index_equal(result, expected) + + def test_dti_sub_dt64_array_aware_raises(self, tz): + if tz is None: + return + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + dtarr = dti.values + + with pytest.raises(TypeError): + dti - dtarr + with pytest.raises(TypeError): + dtarr - dti + + def test_dti_add_td64_array(self, tz): + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = dti + tdi + result = dti + tdarr + tm.assert_index_equal(result, expected) + result = tdarr + dti + tm.assert_index_equal(result, expected) + + def test_dti_sub_td64_array(self, tz): + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = dti - tdi + result = dti - tdarr + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + tdarr - dti + # ------------------------------------------------------------- def test_sub_dti_dti(self): diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py index 0c06e6a4963b4..d7bf1e0210f62 100644 --- a/pandas/tests/indexes/period/test_arithmetic.py +++ b/pandas/tests/indexes/period/test_arithmetic.py @@ -255,6 +255,64 @@ def test_comp_nat(self, dtype): class TestPeriodIndexArithmetic(object): + + # ----------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_pi_add_sub_dt64_array_raises(self): + rng = pd.period_range('1/1/2000', freq='D', periods=3) + dti = pd.date_range('2016-01-01', periods=3) + dtarr = dti.values + + with pytest.raises(TypeError): + rng + dtarr + with pytest.raises(TypeError): + dtarr + rng + + with pytest.raises(TypeError): + rng - dtarr + with pytest.raises(TypeError): + dtarr - rng + + def test_pi_add_sub_td64_array_non_tick_raises(self): + rng = pd.period_range('1/1/2000', freq='Q', periods=3) + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + with pytest.raises(period.IncompatibleFrequency): + rng + tdarr + with pytest.raises(period.IncompatibleFrequency): + tdarr + rng + + with pytest.raises(period.IncompatibleFrequency): + rng - tdarr + with pytest.raises(period.IncompatibleFrequency): + tdarr - rng + + @pytest.mark.xfail(reason='op with TimedeltaIndex raises, with ndarray OK') + def test_pi_add_sub_td64_array_tick(self): + rng = pd.period_range('1/1/2000', freq='Q', periods=3) + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = rng + tdi + result = rng + tdarr + tm.assert_index_equal(result, expected) + result = tdarr + rng + tm.assert_index_equal(result, expected) + + expected = rng - tdi + result = rng - tdarr + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + tdarr - rng + + # ----------------------------------------------------------------- + # operations with array/Index of DateOffset objects + @pytest.mark.parametrize('box', [np.array, pd.Index]) def test_pi_add_offset_array(self, box): # GH#18849 diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 282501860f7e5..6a80b995b6ee9 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -586,6 +586,55 @@ def test_tdi_radd_timestamp(self): expected = DatetimeIndex(['2011-01-02', '2011-01-03']) tm.assert_index_equal(result, expected) + # ------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_tdi_sub_dt64_array(self): + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + dtarr = dti.values + + with pytest.raises(TypeError): + tdi - dtarr + + # TimedeltaIndex.__rsub__ + expected = pd.DatetimeIndex(dtarr) - tdi + result = dtarr - tdi + tm.assert_index_equal(result, expected) + + def test_tdi_add_dt64_array(self): + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + dtarr = dti.values + + expected = pd.DatetimeIndex(dtarr) + tdi + result = tdi + dtarr + tm.assert_index_equal(result, expected) + result = dtarr + tdi + tm.assert_index_equal(result, expected) + + def test_tdi_add_td64_array(self): + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = 2 * tdi + result = tdi + tdarr + tm.assert_index_equal(result, expected) + result = tdarr + tdi + tm.assert_index_equal(result, expected) + + def test_tdi_sub_td64_array(self): + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = 0 * tdi + result = tdi - tdarr + tm.assert_index_equal(result, expected) + result = tdarr - tdi + tm.assert_index_equal(result, expected) + # ------------------------------------------------------------- def test_subtraction_ops(self): From 21bc4d57cc37b4ebc4ea7d996076e3d886fcbf2f Mon Sep 17 00:00:00 2001 From: jjames34 Date: Sun, 25 Feb 2018 10:17:10 -0600 Subject: [PATCH 200/217] Fixed issue with leftover test.json file (#19879) --- doc/source/whatsnew/v0.23.0.txt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index b7dfdf9cfea1e..99a3773603fc4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -214,13 +214,22 @@ Please note that the string `index` is not supported with the round trip format, :okwarning: df.index.name = 'index' + df.to_json('test.json', orient='table') new_df = pd.read_json('test.json', orient='table') new_df - print(new_df.index.name) + new_df.dtypes + +.. ipython:: python + :suppress: + + import os + os.remove('test.json') + .. _whatsnew_0230.enhancements.assign_dependent: + ``.assign()`` accepts dependent arguments ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From b9149b07a4b10dcbdf7f46f52b650c9418a1b16e Mon Sep 17 00:00:00 2001 From: Ming Li <14131823+minggli@users.noreply.github.com> Date: Sun, 25 Feb 2018 23:05:25 +0000 Subject: [PATCH 201/217] ENH: ISO8601-compliant datetime string conversion in `iterrows()` and Series construction. (#19762) --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/dtypes/cast.py | 17 ++++++++++++----- pandas/core/internals.py | 4 ++-- pandas/tests/dtypes/test_cast.py | 4 ++++ pandas/tests/frame/test_api.py | 15 ++++++++++++++- 5 files changed, 33 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 99a3773603fc4..7f33372f765fb 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -766,7 +766,6 @@ Datetimelike - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) -- Timedelta ^^^^^^^^^ @@ -918,6 +917,7 @@ Reshaping - :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) - Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) - Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) +- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) Other ^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 352ce29f5c37b..b1d0dc2a2442e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -904,16 +904,23 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): def try_datetime(v): # safe coerce to datetime64 try: - v = tslib.array_to_datetime(v, errors='raise') + # GH19671 + v = tslib.array_to_datetime(v, + require_iso8601=True, + errors='raise') except ValueError: # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, - # then these stay as object dtype + # then these stay as object dtype, xref GH19671 try: - from pandas import to_datetime - return to_datetime(v) - except Exception: + from pandas._libs.tslibs import conversion + from pandas import DatetimeIndex + + values, tz = conversion.datetime_to_datetime64(v) + return DatetimeIndex(values).tz_localize( + 'UTC').tz_convert(tz=tz) + except (ValueError, TypeError): pass except Exception: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d385185fbb558..00ef8f9cef598 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2602,8 +2602,8 @@ def _maybe_coerce_values(self, values): """Input validation for values passed to __init__. Ensure that we have datetime64ns, coercing if necessary. - Parametetrs - ----------- + Parameters + ---------- values : array-like Must be convertible to datetime64 diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 31bd962b67afb..96a9e3227b40b 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -301,6 +301,10 @@ def test_maybe_infer_to_datetimelike(self): [NaT, 'b', 1]])) assert result.size == 6 + # GH19671 + result = Series(['M1701', Timestamp('20130101')]) + assert result.dtype.kind == 'O' + class TestConvert(object): diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 91fe7f99ca681..8ba5469480e64 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -15,7 +15,8 @@ from numpy.random import randn import numpy as np -from pandas import DataFrame, Series, date_range, timedelta_range, Categorical +from pandas import (DataFrame, Series, date_range, timedelta_range, + Categorical, SparseDataFrame) import pandas as pd from pandas.util.testing import (assert_almost_equal, @@ -214,6 +215,18 @@ def test_iterrows(self): exp = self.mixed_frame.loc[k] self._assert_series_equal(v, exp) + def test_iterrows_iso8601(self): + # GH19671 + if self.klass == SparseDataFrame: + pytest.xfail(reason='SparseBlock datetime type not implemented.') + + s = self.klass( + {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], + 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) + for k, v in s.iterrows(): + exp = s.loc[k] + self._assert_series_equal(v, exp) + def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) From 48785e6c0929f66fdae91289cf853d9982d89c29 Mon Sep 17 00:00:00 2001 From: Ming Li <14131823+minggli@users.noreply.github.com> Date: Sun, 25 Feb 2018 23:06:25 +0000 Subject: [PATCH 202/217] parameterize test_pct_change_periods_freq (#19897) --- pandas/tests/frame/test_timeseries.py | 45 ++++++++++++-------------- pandas/tests/series/test_timeseries.py | 41 ++++++++++++----------- 2 files changed, 40 insertions(+), 46 deletions(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 9f94439a71a57..e1bc310e1e934 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -122,36 +122,31 @@ def test_pct_change_shift_over_nas(self): edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) - def test_pct_change_periods_freq(self): + @pytest.mark.parametrize("freq, periods, fill_method, limit", + [('5B', 5, None, None), + ('3B', 3, None, None), + ('3B', 3, 'bfill', None), + ('7B', 7, 'pad', 1), + ('7B', 7, 'bfill', 3), + ('14B', 14, None, None)]) + def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): # GH 7292 - rs_freq = self.tsframe.pct_change(freq='5B') - rs_periods = self.tsframe.pct_change(5) - assert_frame_equal(rs_freq, rs_periods) - - rs_freq = self.tsframe.pct_change(freq='3B', fill_method=None) - rs_periods = self.tsframe.pct_change(3, fill_method=None) - assert_frame_equal(rs_freq, rs_periods) - - rs_freq = self.tsframe.pct_change(freq='3B', fill_method='bfill') - rs_periods = self.tsframe.pct_change(3, fill_method='bfill') - assert_frame_equal(rs_freq, rs_periods) - - rs_freq = self.tsframe.pct_change(freq='7B', - fill_method='pad', - limit=1) - rs_periods = self.tsframe.pct_change(7, fill_method='pad', limit=1) - assert_frame_equal(rs_freq, rs_periods) - - rs_freq = self.tsframe.pct_change(freq='7B', - fill_method='bfill', - limit=3) - rs_periods = self.tsframe.pct_change(7, fill_method='bfill', limit=3) + rs_freq = self.tsframe.pct_change(freq=freq, + fill_method=fill_method, + limit=limit) + rs_periods = self.tsframe.pct_change(periods, + fill_method=fill_method, + limit=limit) assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns) - rs_freq = empty_ts.pct_change(freq='14B') - rs_periods = empty_ts.pct_change(14) + rs_freq = empty_ts.pct_change(freq=freq, + fill_method=fill_method, + limit=limit) + rs_periods = empty_ts.pct_change(periods, + fill_method=fill_method, + limit=limit) assert_frame_equal(rs_freq, rs_periods) def test_frame_ctor_datetime64_column(self): diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 63a05ef7de565..baf2619c7b022 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -355,31 +355,30 @@ def test_pct_change_shift_over_nas(self): expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2]) assert_series_equal(chg, expected) - def test_pct_change_periods_freq(self): + @pytest.mark.parametrize("freq, periods, fill_method, limit", + [('5B', 5, None, None), + ('3B', 3, None, None), + ('3B', 3, 'bfill', None), + ('7B', 7, 'pad', 1), + ('7B', 7, 'bfill', 3), + ('14B', 14, None, None)]) + def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): # GH 7292 - rs_freq = self.ts.pct_change(freq='5B') - rs_periods = self.ts.pct_change(5) - assert_series_equal(rs_freq, rs_periods) - - rs_freq = self.ts.pct_change(freq='3B', fill_method=None) - rs_periods = self.ts.pct_change(3, fill_method=None) - assert_series_equal(rs_freq, rs_periods) - - rs_freq = self.ts.pct_change(freq='3B', fill_method='bfill') - rs_periods = self.ts.pct_change(3, fill_method='bfill') - assert_series_equal(rs_freq, rs_periods) - - rs_freq = self.ts.pct_change(freq='7B', fill_method='pad', limit=1) - rs_periods = self.ts.pct_change(7, fill_method='pad', limit=1) - assert_series_equal(rs_freq, rs_periods) - - rs_freq = self.ts.pct_change(freq='7B', fill_method='bfill', limit=3) - rs_periods = self.ts.pct_change(7, fill_method='bfill', limit=3) + rs_freq = self.ts.pct_change(freq=freq, + fill_method=fill_method, + limit=limit) + rs_periods = self.ts.pct_change(periods, + fill_method=fill_method, + limit=limit) assert_series_equal(rs_freq, rs_periods) empty_ts = Series(index=self.ts.index) - rs_freq = empty_ts.pct_change(freq='14B') - rs_periods = empty_ts.pct_change(14) + rs_freq = empty_ts.pct_change(freq=freq, + fill_method=fill_method, + limit=limit) + rs_periods = empty_ts.pct_change(periods, + fill_method=fill_method, + limit=limit) assert_series_equal(rs_freq, rs_periods) def test_autocorr(self): From 6003ff65521bb29cae6351690f41ff56d94848e3 Mon Sep 17 00:00:00 2001 From: Wilson Lin Date: Mon, 26 Feb 2018 00:43:00 -0800 Subject: [PATCH 203/217] DOC: Make API reference intro section concise (#19846) --- doc/source/api.rst | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index b8aad67e147ba..0e47499a03f3a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -6,19 +6,18 @@ API Reference ************* This page gives an overview of all public pandas objects, functions and -methods. In general, all classes and functions exposed in the top-level -``pandas.*`` namespace are regarded as public. +methods. All classes and functions exposed in ``pandas.*`` namespace are public. -Further some of the subpackages are public, including ``pandas.errors``, -``pandas.plotting``, and ``pandas.testing``. Certain functions in the -``pandas.io`` and ``pandas.tseries`` submodules are public as well (those -mentioned in the documentation). Further, the ``pandas.api.types`` subpackage -holds some public functions related to data types in pandas. +Some subpackages are public which include ``pandas.errors``, +``pandas.plotting``, and ``pandas.testing``. Public functions in +``pandas.io`` and ``pandas.tseries`` submodules are mentioned in +the documentation. ``pandas.api.types`` subpackage holds some +public functions related to data types in pandas. .. warning:: - The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are considered to be PRIVATE. Stability of functionality in those modules in not guaranteed. + The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed. .. _api.functions: From 769e4c2962972a928c0538dee96678fa89f4dc10 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 26 Feb 2018 09:46:25 +0100 Subject: [PATCH 204/217] DOC/BLD: unpin sphinx to use sphinx 1.7 (#19687) --- ci/requirements-3.6_DOC.run | 2 +- ci/requirements_dev.txt | 2 +- doc/sphinxext/numpydoc/numpydoc.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ci/requirements-3.6_DOC.run b/ci/requirements-3.6_DOC.run index 6c45e3371e9cf..084f38ce17eb2 100644 --- a/ci/requirements-3.6_DOC.run +++ b/ci/requirements-3.6_DOC.run @@ -1,7 +1,7 @@ ipython ipykernel ipywidgets -sphinx=1.5* +sphinx nbconvert nbformat notebook diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index a474658fa2922..82f8de277c57b 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -7,4 +7,4 @@ pytest>=3.1 python-dateutil>=2.5.0 pytz setuptools>=3.3 -sphinx=1.5* +sphinx diff --git a/doc/sphinxext/numpydoc/numpydoc.py b/doc/sphinxext/numpydoc/numpydoc.py index 2bc2d1e91ed3f..4861aa90edce1 100755 --- a/doc/sphinxext/numpydoc/numpydoc.py +++ b/doc/sphinxext/numpydoc/numpydoc.py @@ -26,7 +26,6 @@ raise RuntimeError("Sphinx 1.0.1 or newer is required") from .docscrape_sphinx import get_doc_object, SphinxDocString -from sphinx.util.compat import Directive if sys.version_info[0] >= 3: sixu = lambda s: s From 51093fc940fb7b3733c71cfa880e155fc073b9c1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 26 Feb 2018 09:48:10 +0100 Subject: [PATCH 205/217] DOC: fix numpydoc section titles in misc plotting docstrings (#19899) --- pandas/plotting/_misc.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index d6048f54993e6..45594e9c6ea95 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -149,8 +149,8 @@ def _get_marker_compat(marker): def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): """RadViz - a multivariate data visualization algorithm - Parameters: - ----------- + Parameters + ---------- frame: DataFrame class_column: str Column name containing class names @@ -163,8 +163,8 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): kwds: keywords Options to pass to matplotlib scatter plotting method - Returns: - -------- + Returns + ------- ax: Matplotlib axis object """ import matplotlib.pyplot as plt @@ -247,8 +247,8 @@ def andrews_curves(frame, class_column, ax=None, samples=200, color=None, linearly spaced between -pi and +pi. Each row of frame then corresponds to a single curve. - Parameters: - ----------- + Parameters + ---------- frame : DataFrame Data to be plotted, preferably normalized to (0.0, 1.0) class_column : Name of the column containing class names @@ -262,8 +262,8 @@ def andrews_curves(frame, class_column, ax=None, samples=200, color=None, kwds: keywords Options to pass to matplotlib plotting method - Returns: - -------- + Returns + ------- ax: Matplotlib axis object """ @@ -325,8 +325,8 @@ def f(t): def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): """Bootstrap plot. - Parameters: - ----------- + Parameters + ---------- series: Time series fig: matplotlib figure object, optional size: number of data points to consider during each sampling @@ -334,8 +334,8 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): kwds: optional keyword arguments for plotting commands, must be accepted by both hist and plot - Returns: - -------- + Returns + ------- fig: matplotlib figure """ import random @@ -503,15 +503,15 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, def lag_plot(series, lag=1, ax=None, **kwds): """Lag plot for time series. - Parameters: - ----------- + Parameters + ---------- series: Time series lag: lag of the scatter plot, default 1 ax: Matplotlib axis object, optional kwds: Matplotlib scatter method keyword arguments, optional - Returns: - -------- + Returns + ------- ax: Matplotlib axis object """ import matplotlib.pyplot as plt From eed8092e21be23f05e90f5568204997bdcbb886b Mon Sep 17 00:00:00 2001 From: Colin Date: Mon, 26 Feb 2018 05:34:58 -0500 Subject: [PATCH 206/217] DOC: small typo fix (#19901) --- pandas/core/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 852ad04cd8a2e..00643614e8803 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1440,7 +1440,7 @@ def resample(self, rule, *args, **kwargs): def rolling(self, *args, **kwargs): """ Return a rolling grouper, providing rolling - functionaility per group + functionality per group """ from pandas.core.window import RollingGroupby @@ -1451,7 +1451,7 @@ def rolling(self, *args, **kwargs): def expanding(self, *args, **kwargs): """ Return an expanding grouper, providing expanding - functionaility per group + functionality per group """ from pandas.core.window import ExpandingGroupby From e3c5467aef77fe514184d8351f76d69fa7567c71 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 26 Feb 2018 03:10:14 -0800 Subject: [PATCH 207/217] cleanup order of operations kludges (#19895) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexes/datetimelike.py | 48 +++++++------------ pandas/core/indexes/datetimes.py | 16 +++++-- pandas/core/indexes/timedeltas.py | 30 +++++++----- .../indexes/datetimes/test_arithmetic.py | 4 +- .../indexes/timedeltas/test_arithmetic.py | 2 +- 6 files changed, 49 insertions(+), 52 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 7f33372f765fb..fb22dc40e335f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -586,6 +586,7 @@ Datetimelike API Changes - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) - :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) +- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) .. _whatsnew_0230.api.other: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9411428b2e68d..8e56fc2775a56 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -32,6 +32,7 @@ is_object_dtype, is_string_dtype, is_datetime64_dtype, + is_datetime64tz_dtype, is_period_dtype, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( @@ -200,8 +201,9 @@ def _evaluate_compare(self, other, op): if is_bool_dtype(result): result[mask] = False return result + + result[mask] = iNaT try: - result[mask] = iNaT return Index(result) except TypeError: return result @@ -349,7 +351,7 @@ def _nat_new(self, box=True): return result attribs = self._get_attributes_dict() - if not isinstance(self, ABCPeriodIndex): + if not is_period_dtype(self): attribs['freq'] = None return self._simple_new(result, **attribs) @@ -631,9 +633,9 @@ def _convert_scalar_indexer(self, key, kind=None): ._convert_scalar_indexer(key, kind=kind)) def _add_datelike(self, other): - raise TypeError("cannot add {0} and {1}" - .format(type(self).__name__, - type(other).__name__)) + raise TypeError("cannot add {cls} and {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) def _sub_datelike(self, other): raise com.AbstractMethodError(self) @@ -677,7 +679,7 @@ def _add_datetimelike_methods(cls): """ def __add__(self, other): - from pandas import Index, DatetimeIndex, TimedeltaIndex, DateOffset + from pandas import DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): @@ -700,18 +702,9 @@ def __add__(self, other): elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) - elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): - if hasattr(other, '_add_delta'): - # i.e. DatetimeIndex, TimedeltaIndex, or PeriodIndex - result = other._add_delta(self) - else: - raise TypeError("cannot add TimedeltaIndex and {typ}" - .format(typ=type(other))) - elif isinstance(other, Index): - result = self._add_datelike(other) - elif is_datetime64_dtype(other): - # ndarray[datetime64]; note DatetimeIndex is caught above - return self + DatetimeIndex(other) + elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): + # DatetimeIndex, ndarray[datetime64] + return self._add_datelike(other) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") @@ -731,7 +724,7 @@ def __radd__(self, other): cls.__radd__ = __radd__ def __sub__(self, other): - from pandas import Index, DatetimeIndex, TimedeltaIndex, DateOffset + from pandas import Index, DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): @@ -756,20 +749,13 @@ def __sub__(self, other): elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) - elif isinstance(self, TimedeltaIndex) and isinstance(other, Index): - # We checked above for timedelta64_dtype(other) so this - # must be invalid. - raise TypeError("cannot subtract TimedeltaIndex and {typ}" - .format(typ=type(other).__name__)) - elif isinstance(other, DatetimeIndex): + elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): + # DatetimeIndex, ndarray[datetime64] result = self._sub_datelike(other) - elif is_datetime64_dtype(other): - # ndarray[datetime64]; note we caught DatetimeIndex earlier - return self - DatetimeIndex(other) elif isinstance(other, Index): - raise TypeError("cannot subtract {typ1} and {typ2}" - .format(typ1=type(self).__name__, - typ2=type(other).__name__)) + raise TypeError("cannot subtract {cls} and {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 36ea2bffb9531..55d8b7c18a622 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -864,11 +864,16 @@ def _add_datelike(self, other): def _sub_datelike(self, other): # subtract a datetime from myself, yielding a TimedeltaIndex from pandas import TimedeltaIndex - if isinstance(other, DatetimeIndex): + + if isinstance(other, (DatetimeIndex, np.ndarray)): + # if other is an ndarray, we assume it is datetime64-dtype + other = DatetimeIndex(other) + # require tz compat if not self._has_same_tz(other): - raise TypeError("DatetimeIndex subtraction must have the same " - "timezones or no timezones") + raise TypeError("{cls} subtraction must have the same " + "timezones or no timezones" + .format(cls=type(self).__name__)) result = self._sub_datelike_dti(other) elif isinstance(other, (datetime, np.datetime64)): other = Timestamp(other) @@ -885,8 +890,9 @@ def _sub_datelike(self, other): result = self._maybe_mask_results(result, fill_value=libts.iNaT) else: - raise TypeError("cannot subtract DatetimeIndex and {typ}" - .format(typ=type(other).__name__)) + raise TypeError("cannot subtract {cls} and {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) return TimedeltaIndex(result) def _sub_datelike_dti(self, other): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 6f80962eab079..eebd52d7fb801 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -59,30 +59,28 @@ def _td_index_cmp(opname, cls): nat_result = True if opname == '__ne__' else False def wrapper(self, other): - msg = "cannot compare a TimedeltaIndex with type {0}" + msg = "cannot compare a {cls} with type {typ}" func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) except ValueError: # failed to parse as timedelta - raise TypeError(msg.format(type(other))) + raise TypeError(msg.format(cls=type(self).__name__, + typ=type(other).__name__)) result = func(other) if isna(other): result.fill(nat_result) - else: - if not is_list_like(other): - raise TypeError(msg.format(type(other))) + elif not is_list_like(other): + raise TypeError(msg.format(cls=type(self).__name__, + typ=type(other).__name__)) + else: other = TimedeltaIndex(other).values result = func(other) result = com._values_from_object(result) - if isinstance(other, Index): - o_mask = other.values.view('i8') == iNaT - else: - o_mask = other.view('i8') == iNaT - + o_mask = np.array(isna(other)) if o_mask.any(): result[o_mask] = nat_result @@ -416,9 +414,15 @@ def _evaluate_with_timedelta_like(self, other, op): def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike from pandas import Timestamp, DatetimeIndex + if other is NaT: # GH#19124 pd.NaT is treated like a timedelta return self._nat_new() + elif isinstance(other, (DatetimeIndex, np.ndarray)): + # if other is an ndarray, we assume it is datetime64-dtype + # defer to implementation in DatetimeIndex + other = DatetimeIndex(other) + return other + self else: other = Timestamp(other) i8 = self.asi8 @@ -434,7 +438,8 @@ def _sub_datelike(self, other): if other is NaT: return self._nat_new() else: - raise TypeError("cannot subtract a datelike from a TimedeltaIndex") + raise TypeError("cannot subtract a datelike from a {cls}" + .format(cls=type(self).__name__)) def _addsub_offset_array(self, other, op): # Add or subtract Array-like of DateOffset objects @@ -962,8 +967,7 @@ def _is_convertible_to_index(other): def _is_convertible_to_td(key): - # TODO: Not all DateOffset objects are convertible to Timedelta - return isinstance(key, (DateOffset, timedelta, Timedelta, + return isinstance(key, (Tick, timedelta, np.timedelta64, compat.string_types)) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 5a7ea44f3698c..0c56c6b16fb2f 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -508,7 +508,7 @@ def test_dti_sub_tdi(self, tz): result = dti - tdi tm.assert_index_equal(result, expected) - msg = 'cannot subtract TimedeltaIndex and DatetimeIndex' + msg = 'cannot subtract .*TimedeltaIndex' with tm.assert_raises_regex(TypeError, msg): tdi - dti @@ -531,7 +531,7 @@ def test_dti_isub_tdi(self, tz): result -= tdi tm.assert_index_equal(result, expected) - msg = 'cannot subtract TimedeltaIndex and DatetimeIndex' + msg = 'cannot subtract .*TimedeltaIndex' with tm.assert_raises_regex(TypeError, msg): tdi -= dti diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index 6a80b995b6ee9..9ffffb6ff06d5 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -792,7 +792,7 @@ def test_addition_ops(self): pytest.raises(ValueError, lambda: tdi[0:1] + dti) # random indexes - pytest.raises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) + pytest.raises(NullFrequencyError, lambda: tdi + Int64Index([1, 2, 3])) # this is a union! # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) From d9f57a4a38a2a33aa17e57b58a253eeb77a53468 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 26 Feb 2018 03:37:50 -0800 Subject: [PATCH 208/217] make ops.add_foo take just class (#19828) --- pandas/core/frame.py | 4 +- pandas/core/ops.py | 184 ++++++++++++++++++++++------------- pandas/core/panel.py | 4 +- pandas/core/series.py | 4 +- pandas/core/sparse/array.py | 14 +-- pandas/core/sparse/frame.py | 4 +- pandas/core/sparse/series.py | 5 +- 7 files changed, 127 insertions(+), 92 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 061b69f25e7ac..e4ef1b97882d9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6131,8 +6131,8 @@ def isin(self, values): DataFrame._add_numeric_operations() DataFrame._add_series_or_dataframe_operations() -ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs) -ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs) +ops.add_flex_arithmetic_methods(DataFrame) +ops.add_special_arithmetic_methods(DataFrame) def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index b20f208d14dc5..7bdbac66b4f31 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -37,7 +37,7 @@ construct_1d_object_array_from_listlike) from pandas.core.dtypes.generic import ( ABCSeries, - ABCDataFrame, + ABCDataFrame, ABCPanel, ABCIndex, ABCSparseSeries, ABCSparseArray) @@ -711,6 +711,64 @@ def mask_cmp_op(x, y, op, allowed_types): # Functions that add arithmetic methods to objects, given arithmetic factory # methods +def _get_method_wrappers(cls): + """ + Find the appropriate operation-wrappers to use when defining flex/special + arithmetic, boolean, and comparison operations with the given class. + + Parameters + ---------- + cls : class + + Returns + ------- + arith_flex : function or None + comp_flex : function or None + arith_special : function + comp_special : function + bool_special : function + + Notes + ----- + None is only returned for SparseArray + """ + if issubclass(cls, ABCSparseSeries): + # Be sure to catch this before ABCSeries and ABCSparseArray, + # as they will both come see SparseSeries as a subclass + arith_flex = _flex_method_SERIES + comp_flex = _flex_method_SERIES + arith_special = _arith_method_SPARSE_SERIES + comp_special = _arith_method_SPARSE_SERIES + bool_special = _bool_method_SERIES + # TODO: I don't think the functions defined by bool_method are tested + elif issubclass(cls, ABCSeries): + # Just Series; SparseSeries is caught above + arith_flex = _flex_method_SERIES + comp_flex = _flex_method_SERIES + arith_special = _arith_method_SERIES + comp_special = _comp_method_SERIES + bool_special = _bool_method_SERIES + elif issubclass(cls, ABCSparseArray): + arith_flex = None + comp_flex = None + arith_special = _arith_method_SPARSE_ARRAY + comp_special = _arith_method_SPARSE_ARRAY + bool_special = _arith_method_SPARSE_ARRAY + elif issubclass(cls, ABCPanel): + arith_flex = _flex_method_PANEL + comp_flex = _comp_method_PANEL + arith_special = _arith_method_PANEL + comp_special = _comp_method_PANEL + bool_special = _arith_method_PANEL + elif issubclass(cls, ABCDataFrame): + # Same for DataFrame and SparseDataFrame + arith_flex = _arith_method_FRAME + comp_flex = _flex_comp_method_FRAME + arith_special = _arith_method_FRAME + comp_special = _comp_method_FRAME + bool_special = _arith_method_FRAME + return arith_flex, comp_flex, arith_special, comp_special, bool_special + def _create_methods(cls, arith_method, comp_method, bool_method, special=False): @@ -743,16 +801,18 @@ def _create_methods(cls, arith_method, comp_method, bool_method, # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] + if have_divmod: + # divmod doesn't have an op that is supported by numexpr + new_methods['divmod'] = arith_method(cls, divmod, special) + + new_methods.update(dict( + eq=comp_method(cls, operator.eq, special), + ne=comp_method(cls, operator.ne, special), + lt=comp_method(cls, operator.lt, special), + gt=comp_method(cls, operator.gt, special), + le=comp_method(cls, operator.le, special), + ge=comp_method(cls, operator.ge, special))) - # Comp methods never had a default axis set - if comp_method: - new_methods.update(dict( - eq=comp_method(cls, operator.eq, special), - ne=comp_method(cls, operator.ne, special), - lt=comp_method(cls, operator.lt, special), - gt=comp_method(cls, operator.gt, special), - le=comp_method(cls, operator.le, special), - ge=comp_method(cls, operator.ge, special))) if bool_method: new_methods.update( dict(and_=bool_method(cls, operator.and_, special), @@ -762,9 +822,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, rand_=bool_method(cls, rand_, special), ror_=bool_method(cls, ror_, special), rxor=bool_method(cls, rxor, special))) - if have_divmod: - # divmod doesn't have an op that is supported by numexpr - new_methods['divmod'] = arith_method(cls, divmod, special) if special: dunderize = lambda x: '__{name}__'.format(name=x.strip('_')) @@ -788,22 +845,17 @@ def add_methods(cls, new_methods): # ---------------------------------------------------------------------- # Arithmetic -def add_special_arithmetic_methods(cls, arith_method=None, - comp_method=None, bool_method=None): +def add_special_arithmetic_methods(cls): """ Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. Parameters ---------- - arith_method : function (optional) - factory for special arithmetic methods: - f(cls, op, special) - comp_method : function (optional) - factory for rich comparison - signature: f(cls, op, special) - bool_method : function (optional) - factory for boolean methods - signature: f(cls, op, special) + cls : class + special methods will be defined and pinned to this class """ + _, _, arith_method, comp_method, bool_method = _get_method_wrappers(cls) new_methods = _create_methods(cls, arith_method, comp_method, bool_method, special=True) # inplace operators (I feel like these should get passed an `inplace=True` @@ -836,28 +888,26 @@ def f(self, other): __ipow__=_wrap_inplace_method(new_methods["__pow__"]))) if not compat.PY3: new_methods["__idiv__"] = _wrap_inplace_method(new_methods["__div__"]) - if bool_method: - new_methods.update( - dict(__iand__=_wrap_inplace_method(new_methods["__and__"]), - __ior__=_wrap_inplace_method(new_methods["__or__"]), - __ixor__=_wrap_inplace_method(new_methods["__xor__"]))) + + new_methods.update( + dict(__iand__=_wrap_inplace_method(new_methods["__and__"]), + __ior__=_wrap_inplace_method(new_methods["__or__"]), + __ixor__=_wrap_inplace_method(new_methods["__xor__"]))) add_methods(cls, new_methods=new_methods) -def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None): +def add_flex_arithmetic_methods(cls): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. Parameters ---------- - flex_arith_method : function - factory for flex arithmetic methods: - f(cls, op, special) - flex_comp_method : function, optional, - factory for rich comparison - signature: f(cls, op, special) + cls : class + flex methods will be defined and pinned to this class """ + flex_arith_method, flex_comp_method, _, _, _ = _get_method_wrappers(cls) new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, bool_method=None, special=False) @@ -1284,14 +1334,6 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): return flex_wrapper -series_flex_funcs = dict(flex_arith_method=_flex_method_SERIES, - flex_comp_method=_flex_method_SERIES) - -series_special_funcs = dict(arith_method=_arith_method_SERIES, - comp_method=_comp_method_SERIES, - bool_method=_bool_method_SERIES) - - # ----------------------------------------------------------------------------- # DataFrame @@ -1533,14 +1575,6 @@ def f(self, other): return f -frame_flex_funcs = dict(flex_arith_method=_arith_method_FRAME, - flex_comp_method=_flex_comp_method_FRAME) - -frame_special_funcs = dict(arith_method=_arith_method_FRAME, - comp_method=_comp_method_FRAME, - bool_method=_arith_method_FRAME) - - # ----------------------------------------------------------------------------- # Panel @@ -1629,16 +1663,38 @@ def f(self, other, axis=0): return f -panel_special_funcs = dict(arith_method=_arith_method_PANEL, - comp_method=_comp_method_PANEL, - bool_method=_arith_method_PANEL) - -panel_flex_funcs = dict(flex_arith_method=_flex_method_PANEL, - flex_comp_method=_comp_method_PANEL) - # ----------------------------------------------------------------------------- # Sparse +def _cast_sparse_series_op(left, right, opname): + """ + For SparseSeries operation, coerce to float64 if the result is expected + to have NaN or inf values + + Parameters + ---------- + left : SparseArray + right : SparseArray + opname : str + + Returns + ------- + left : SparseArray + right : SparseArray + """ + opname = opname.strip('_') + + if is_integer_dtype(left) and is_integer_dtype(right): + # series coerces to float64 if result should have NaN/inf + if opname in ('floordiv', 'mod') and (right.values == 0).any(): + left = left.astype(np.float64) + right = right.astype(np.float64) + elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any(): + left = left.astype(np.float64) + right = right.astype(np.float64) + + return left, right + def _arith_method_SPARSE_SERIES(cls, op, special): """ @@ -1674,8 +1730,8 @@ def _sparse_series_op(left, right, op, name): new_name = get_op_result_name(left, right) from pandas.core.sparse.array import _sparse_array_op - result = _sparse_array_op(left.values, right.values, op, name, - series=True) + lvalues, rvalues = _cast_sparse_series_op(left.values, right.values, name) + result = _sparse_array_op(lvalues, rvalues, op, name) return left._constructor(result, index=new_index, name=new_name) @@ -1697,7 +1753,7 @@ def wrapper(self, other): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, dtype=dtype) - return _sparse_array_op(self, other, op, name, series=False) + return _sparse_array_op(self, other, op, name) elif is_scalar(other): with np.errstate(all='ignore'): fill = op(_get_fill(self), np.asarray(other)) @@ -1710,13 +1766,3 @@ def wrapper(self, other): wrapper.__name__ = name return wrapper - - -sparse_array_special_funcs = dict(arith_method=_arith_method_SPARSE_ARRAY, - comp_method=_arith_method_SPARSE_ARRAY, - bool_method=_arith_method_SPARSE_ARRAY) - -sparse_series_special_funcs = dict(arith_method=_arith_method_SPARSE_SERIES, - comp_method=_arith_method_SPARSE_SERIES, - bool_method=_bool_method_SERIES) -# TODO: I don't think the functions defined by bool_method are tested diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 3be1e3ef8734d..fc7fad861df44 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1527,8 +1527,8 @@ def _extract_axis(self, data, axis=0, intersect=False): slicers={'major_axis': 'index', 'minor_axis': 'columns'}) -ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) -ops.add_flex_arithmetic_methods(Panel, **ops.panel_flex_funcs) +ops.add_special_arithmetic_methods(Panel) +ops.add_flex_arithmetic_methods(Panel) Panel._add_numeric_operations() diff --git a/pandas/core/series.py b/pandas/core/series.py index b42e02bc99237..26b7fd552b062 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3100,8 +3100,8 @@ def to_period(self, freq=None, copy=True): Series._add_series_or_dataframe_operations() # Add arithmetic! -ops.add_flex_arithmetic_methods(Series, **ops.series_flex_funcs) -ops.add_special_arithmetic_methods(Series, **ops.series_special_funcs) +ops.add_flex_arithmetic_methods(Series) +ops.add_special_arithmetic_methods(Series) # ----------------------------------------------------------------------------- diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 92c4fe932f066..5532d7522cd2d 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -53,20 +53,11 @@ def _get_fill(arr): return np.asarray(arr.fill_value) -def _sparse_array_op(left, right, op, name, series=False): +def _sparse_array_op(left, right, op, name): if name.startswith('__'): # For lookups in _libs.sparse we need non-dunder op name name = name[2:-2] - if series and is_integer_dtype(left) and is_integer_dtype(right): - # series coerces to float64 if result should have NaN/inf - if name in ('floordiv', 'mod') and (right.values == 0).any(): - left = left.astype(np.float64) - right = right.astype(np.float64) - elif name in ('rfloordiv', 'rmod') and (left.values == 0).any(): - left = left.astype(np.float64) - right = right.astype(np.float64) - # dtype used to find corresponding sparse method if not is_dtype_equal(left.dtype, right.dtype): dtype = find_common_type([left.dtype, right.dtype]) @@ -850,5 +841,4 @@ def _make_index(length, indices, kind): return index -ops.add_special_arithmetic_methods(SparseArray, - **ops.sparse_array_special_funcs) +ops.add_special_arithmetic_methods(SparseArray) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 872a17d8dbabe..d89b1d681c478 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -1014,5 +1014,5 @@ def homogenize(series_dict): # use unaccelerated ops for sparse objects -ops.add_flex_arithmetic_methods(SparseDataFrame, **ops.frame_flex_funcs) -ops.add_special_arithmetic_methods(SparseDataFrame, **ops.frame_special_funcs) +ops.add_flex_arithmetic_methods(SparseDataFrame) +ops.add_special_arithmetic_methods(SparseDataFrame) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 26cf9dbadbbf2..7a1496bf11117 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -812,6 +812,5 @@ def from_coo(cls, A, dense_index=False): # overwrite series methods with unaccelerated Sparse-specific versions -ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs) -ops.add_special_arithmetic_methods(SparseSeries, - **ops.sparse_series_special_funcs) +ops.add_flex_arithmetic_methods(SparseSeries) +ops.add_special_arithmetic_methods(SparseSeries) From 34712712b5df6abd1cc55e64753c1ffed05ea925 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Mon, 26 Feb 2018 04:21:06 -0800 Subject: [PATCH 209/217] Test Decorators and Better Pytest Integration in 'test_excel' (#19829) --- pandas/compat/__init__.py | 14 + pandas/tests/io/test_excel.py | 1619 ++++++++++++++----------------- pandas/util/_test_decorators.py | 6 +- pandas/util/testing.py | 2 +- 4 files changed, 739 insertions(+), 902 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 80a2c05d86971..78aaf4596c8b7 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -365,6 +365,20 @@ def callable(obj): return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) +if sys.version_info[0] < 3: + # In PY2 functools.wraps doesn't provide metadata pytest needs to generate + # decorated tests using parametrization. See pytest GH issue #2782 + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + def wrapper(f): + f = functools.wraps(wrapped, assigned, updated)(f) + f.__wrapped__ = wrapped + return f + return wrapper +else: + wraps = functools.wraps + + def add_metaclass(metaclass): """Class decorator for creating a class with a metaclass.""" def wrapper(cls): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 86cee54665781..fdf9954285db8 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -28,43 +28,6 @@ from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf -def _skip_if_no_xlrd(): - try: - import xlrd - ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) - if ver < (0, 9): - pytest.skip('xlrd < 0.9, skipping') - except ImportError: - pytest.skip('xlrd not installed, skipping') - - -def _skip_if_no_xlwt(): - try: - import xlwt # NOQA - except ImportError: - pytest.skip('xlwt not installed, skipping') - - -def _skip_if_no_openpyxl(): - try: - import openpyxl # NOQA - except ImportError: - pytest.skip('openpyxl not installed, skipping') - - -def _skip_if_no_xlsxwriter(): - try: - import xlsxwriter # NOQA - except ImportError: - pytest.skip('xlsxwriter not installed, skipping') - - -def _skip_if_no_excelsuite(): - _skip_if_no_xlrd() - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - - _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] @@ -74,6 +37,7 @@ def _skip_if_no_excelsuite(): _mixed_frame['foo'] = 'bar' +@td.skip_if_no('xlrd', '0.9') class SharedItems(object): def setup_method(self, method): @@ -103,7 +67,7 @@ def get_csv_refdf(self, basename): dfref = read_csv(pref, index_col=0, parse_dates=True, engine='python') return dfref - def get_excelfile(self, basename): + def get_excelfile(self, basename, ext): """ Return test data ExcelFile instance. Test data path is defined by pandas.util.testing.get_data_path() @@ -119,9 +83,9 @@ def get_excelfile(self, basename): excel : io.excel.ExcelFile """ - return ExcelFile(os.path.join(self.dirpath, basename + self.ext)) + return ExcelFile(os.path.join(self.dirpath, basename + ext)) - def get_exceldf(self, basename, *args, **kwds): + def get_exceldf(self, basename, ext, *args, **kwds): """ Return test data DataFrame. Test data path is defined by pandas.util.testing.get_data_path() @@ -137,36 +101,23 @@ def get_exceldf(self, basename, *args, **kwds): df : DataFrame """ - pth = os.path.join(self.dirpath, basename + self.ext) + pth = os.path.join(self.dirpath, basename + ext) return read_excel(pth, *args, **kwds) class ReadingTestsBase(SharedItems): # This is based on ExcelWriterBase - # - # Base class for test cases to run with different Excel readers. - # To add a reader test, define the following: - # 1. A check_skip function that skips your tests if your reader isn't - # installed. - # 2. Add a property ext, which is the file extension that your reader - # reades from. (needs to start with '.' so it's a valid path) - # 3. Add a property engine_name, which is the name of the reader class. - # For the reader this is not used for anything at the moment. - def setup_method(self, method): - self.check_skip() - super(ReadingTestsBase, self).setup_method(method) - - def test_usecols_int(self): + def test_usecols_int(self, ext): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, usecols=3) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - usecols=3) + df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=3) + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols=3) with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, parse_cols=3) # TODO add index to xls file) @@ -174,17 +125,17 @@ def test_usecols_int(self): tm.assert_frame_equal(df2, dfref, check_names=False) tm.assert_frame_equal(df3, dfref, check_names=False) - def test_usecols_list(self): + def test_usecols_list(self, ext): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, - usecols=[0, 2, 3]) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols=[0, 2, 3]) + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols=[0, 2, 3]) with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, parse_cols=[0, 2, 3]) # TODO add index to xls file) @@ -192,18 +143,18 @@ def test_usecols_list(self): tm.assert_frame_equal(df2, dfref, check_names=False) tm.assert_frame_equal(df3, dfref, check_names=False) - def test_usecols_str(self): + def test_usecols_str(self, ext): dfref = self.get_csv_refdf('test1') df1 = dfref.reindex(columns=['A', 'B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - usecols='A:D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A:D') + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A:D') with tm.assert_produces_warning(FutureWarning): - df4 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, parse_cols='A:D') # TODO add index to xls, read xls ignores index name ? @@ -212,37 +163,37 @@ def test_usecols_str(self): tm.assert_frame_equal(df4, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - usecols='A,C,D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A,C,D') + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A,C,D') # TODO add index to xls file tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - usecols='A,C:D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, usecols='A,C:D') + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, usecols='A,C:D') tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) - def test_excel_stop_iterator(self): + def test_excel_stop_iterator(self, ext): - parsed = self.get_exceldf('test2', 'Sheet1') + parsed = self.get_exceldf('test2', ext, 'Sheet1') expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1']) tm.assert_frame_equal(parsed, expected) - def test_excel_cell_error_na(self): + def test_excel_cell_error_na(self, ext): - parsed = self.get_exceldf('test3', 'Sheet1') + parsed = self.get_exceldf('test3', ext, 'Sheet1') expected = DataFrame([[np.nan]], columns=['Test']) tm.assert_frame_equal(parsed, expected) - def test_excel_passes_na(self): + def test_excel_passes_na(self, ext): - excel = self.get_excelfile('test4') + excel = self.get_excelfile('test4', ext) parsed = read_excel(excel, 'Sheet1', keep_default_na=False, na_values=['apple']) @@ -257,7 +208,7 @@ def test_excel_passes_na(self): tm.assert_frame_equal(parsed, expected) # 13967 - excel = self.get_excelfile('test5') + excel = self.get_excelfile('test5', ext) parsed = read_excel(excel, 'Sheet1', keep_default_na=False, na_values=['apple']) @@ -271,9 +222,9 @@ def test_excel_passes_na(self): columns=['Test']) tm.assert_frame_equal(parsed, expected) - def test_excel_table_sheet_by_index(self): + def test_excel_table_sheet_by_index(self, ext): - excel = self.get_excelfile('test1') + excel = self.get_excelfile('test1', ext) dfref = self.get_csv_refdf('test1') df1 = read_excel(excel, 0, index_col=0) @@ -300,21 +251,22 @@ def test_excel_table_sheet_by_index(self): with pytest.raises(xlrd.XLRDError): read_excel(excel, 'asdf') - def test_excel_table(self): + def test_excel_table(self, ext): dfref = self.get_csv_refdf('test1') - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0) + df1 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0) + df2 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0) # TODO add index to file tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - df3 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df3 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1]) - def test_reader_special_dtypes(self): + def test_reader_special_dtypes(self, ext): expected = DataFrame.from_dict(OrderedDict([ ("IntCol", [1, 2, -3, 4, 0]), @@ -330,36 +282,36 @@ def test_reader_special_dtypes(self): basename = 'test_types' # should read in correctly and infer types - actual = self.get_exceldf(basename, 'Sheet1') + actual = self.get_exceldf(basename, ext, 'Sheet1') tm.assert_frame_equal(actual, expected) # if not coercing number, then int comes in as float float_expected = expected.copy() float_expected["IntCol"] = float_expected["IntCol"].astype(float) float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 - actual = self.get_exceldf(basename, 'Sheet1', convert_float=False) + actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False) tm.assert_frame_equal(actual, float_expected) # check setting Index (assuming xls and xlsx are the same here) for icol, name in enumerate(expected.columns): - actual = self.get_exceldf(basename, 'Sheet1', index_col=icol) + actual = self.get_exceldf(basename, ext, 'Sheet1', index_col=icol) exp = expected.set_index(name) tm.assert_frame_equal(actual, exp) # convert_float and converters should be different but both accepted expected["StrCol"] = expected["StrCol"].apply(str) actual = self.get_exceldf( - basename, 'Sheet1', converters={"StrCol": str}) + basename, ext, 'Sheet1', converters={"StrCol": str}) tm.assert_frame_equal(actual, expected) no_convert_float = float_expected.copy() no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) - actual = self.get_exceldf(basename, 'Sheet1', convert_float=False, + actual = self.get_exceldf(basename, ext, 'Sheet1', convert_float=False, converters={"StrCol": str}) tm.assert_frame_equal(actual, no_convert_float) # GH8212 - support for converters and missing values - def test_reader_converters(self): + def test_reader_converters(self, ext): basename = 'test_converters' @@ -378,13 +330,14 @@ def test_reader_converters(self): # should read in correctly and set types of single cells (not array # dtypes) - actual = self.get_exceldf(basename, 'Sheet1', converters=converters) + actual = self.get_exceldf(basename, ext, 'Sheet1', + converters=converters) tm.assert_frame_equal(actual, expected) - def test_reader_dtype(self): + def test_reader_dtype(self, ext): # GH 8212 basename = 'testdtype' - actual = self.get_exceldf(basename) + actual = self.get_exceldf(basename, ext) expected = DataFrame({ 'a': [1, 2, 3, 4], @@ -395,7 +348,7 @@ def test_reader_dtype(self): tm.assert_frame_equal(actual, expected) - actual = self.get_exceldf(basename, + actual = self.get_exceldf(basename, ext, dtype={'a': 'float64', 'b': 'float32', 'c': str}) @@ -406,14 +359,14 @@ def test_reader_dtype(self): tm.assert_frame_equal(actual, expected) with pytest.raises(ValueError): - actual = self.get_exceldf(basename, dtype={'d': 'int64'}) + actual = self.get_exceldf(basename, ext, dtype={'d': 'int64'}) - def test_reading_all_sheets(self): + def test_reading_all_sheets(self, ext): # Test reading all sheetnames by setting sheetname to None, # Ensure a dict is returned. # See PR #9450 basename = 'test_multisheet' - dfs = self.get_exceldf(basename, sheet_name=None) + dfs = self.get_exceldf(basename, ext, sheet_name=None) # ensure this is not alphabetical to test order preservation expected_keys = ['Charlie', 'Alpha', 'Beta'] tm.assert_contains_all(expected_keys, dfs.keys()) @@ -421,7 +374,7 @@ def test_reading_all_sheets(self): # Ensure sheet order is preserved assert expected_keys == list(dfs.keys()) - def test_reading_multiple_specific_sheets(self): + def test_reading_multiple_specific_sheets(self, ext): # Test reading specific sheetnames by specifying a mixed list # of integers and strings, and confirm that duplicated sheet # references (positions/names) are removed properly. @@ -430,42 +383,41 @@ def test_reading_multiple_specific_sheets(self): basename = 'test_multisheet' # Explicitly request duplicates. Only the set should be returned. expected_keys = [2, 'Charlie', 'Charlie'] - dfs = self.get_exceldf(basename, sheet_name=expected_keys) + dfs = self.get_exceldf(basename, ext, sheet_name=expected_keys) expected_keys = list(set(expected_keys)) tm.assert_contains_all(expected_keys, dfs.keys()) assert len(expected_keys) == len(dfs.keys()) - def test_reading_all_sheets_with_blank(self): + def test_reading_all_sheets_with_blank(self, ext): # Test reading all sheetnames by setting sheetname to None, # In the case where some sheets are blank. # Issue #11711 basename = 'blank_with_header' - dfs = self.get_exceldf(basename, sheet_name=None) + dfs = self.get_exceldf(basename, ext, sheet_name=None) expected_keys = ['Sheet1', 'Sheet2', 'Sheet3'] tm.assert_contains_all(expected_keys, dfs.keys()) # GH6403 - def test_read_excel_blank(self): - actual = self.get_exceldf('blank', 'Sheet1') + def test_read_excel_blank(self, ext): + actual = self.get_exceldf('blank', ext, 'Sheet1') tm.assert_frame_equal(actual, DataFrame()) - def test_read_excel_blank_with_header(self): + def test_read_excel_blank_with_header(self, ext): expected = DataFrame(columns=['col_1', 'col_2']) - actual = self.get_exceldf('blank_with_header', 'Sheet1') + actual = self.get_exceldf('blank_with_header', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') # GH 12292 : error when read one empty column from excel file - def test_read_one_empty_col_no_header(self): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - + def test_read_one_empty_col_no_header(self, ext): df = pd.DataFrame( [["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]] ) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, 'no_header', index=False, header=False) actual_header_none = read_excel( path, @@ -484,17 +436,16 @@ def test_read_one_empty_col_no_header(self): tm.assert_frame_equal(actual_header_none, expected) tm.assert_frame_equal(actual_header_zero, expected) - def test_read_one_empty_col_with_header(self): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') + def test_read_one_empty_col_with_header(self, ext): df = pd.DataFrame( [["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]] ) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, 'with_header', index=False, header=True) actual_header_none = read_excel( path, @@ -514,16 +465,15 @@ def test_read_one_empty_col_with_header(self): expected_header_zero = DataFrame(columns=[0], dtype='int64') tm.assert_frame_equal(actual_header_zero, expected_header_zero) - def test_set_column_names_in_parameter(self): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') + def test_set_column_names_in_parameter(self, ext): # GH 12870 : pass down column names associated with # keyword argument names refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'], [3, 'baz']], columns=['a', 'b']) - with ensure_clean(self.ext) as pth: + with ensure_clean(ext) as pth: with ExcelWriter(pth) as writer: refdf.to_excel(writer, 'Data_no_head', header=False, index=False) @@ -540,42 +490,45 @@ def test_set_column_names_in_parameter(self): tm.assert_frame_equal(xlsdf_no_head, refdf) tm.assert_frame_equal(xlsdf_with_head, refdf) - def test_date_conversion_overflow(self): + def test_date_conversion_overflow(self, ext): # GH 10001 : pandas.ExcelFile ignore parse_dates=False expected = pd.DataFrame([[pd.Timestamp('2016-03-12'), 'Marc Johnson'], [pd.Timestamp('2016-03-16'), 'Jack Black'], [1e+20, 'Timothy Brown']], columns=['DateColWithBigInt', 'StringCol']) - result = self.get_exceldf('testdateoverflow') + result = self.get_exceldf('testdateoverflow', ext) tm.assert_frame_equal(result, expected) - def test_sheet_name_and_sheetname(self): + def test_sheet_name_and_sheetname(self, ext): # GH10559: Minor improvement: Change "sheet_name" to "sheetname" # GH10969: DOC: Consistent var names (sheetname vs sheet_name) # GH12604: CLN GH10559 Rename sheetname variable to sheet_name dfref = self.get_csv_refdf('test1') - df1 = self.get_exceldf('test1', sheet_name='Sheet1') # doc + df1 = self.get_exceldf('test1', ext, sheet_name='Sheet1') # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2 = self.get_exceldf('test1', sheetname='Sheet1') # bkwrd compat + df2 = self.get_exceldf('test1', ext, + sheetname='Sheet1') # bkwrd compat tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - def test_sheet_name_both_raises(self): + def test_sheet_name_both_raises(self, ext): with tm.assert_raises_regex(TypeError, "Cannot specify both"): - self.get_exceldf('test1', sheetname='Sheet1', sheet_name='Sheet1') + self.get_exceldf('test1', ext, sheetname='Sheet1', + sheet_name='Sheet1') -class XlrdTests(ReadingTestsBase): +@pytest.mark.parametrize("ext", ['.xls', '.xlsx', '.xlsm']) +class TestXlrdReader(ReadingTestsBase): """ This is the base class for the xlrd tests, and 3 different file formats are supported: xls, xlsx, xlsm """ - def test_excel_read_buffer(self): + def test_excel_read_buffer(self, ext): - pth = os.path.join(self.dirpath, 'test1' + self.ext) + pth = os.path.join(self.dirpath, 'test1' + ext) expected = read_excel(pth, 'Sheet1', index_col=0) with open(pth, 'rb') as f: actual = read_excel(f, 'Sheet1', index_col=0) @@ -586,10 +539,10 @@ def test_excel_read_buffer(self): actual = read_excel(xls, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) - def test_read_xlrd_Book(self): - _skip_if_no_xlwt() - + @td.skip_if_no('xlwt') + def test_read_xlrd_Book(self, ext): import xlrd + df = self.frame with ensure_clean('.xls') as pth: df.to_excel(pth, "SheetA") @@ -603,39 +556,39 @@ def test_read_xlrd_Book(self): tm.assert_frame_equal(df, result) @tm.network - def test_read_from_http_url(self): + def test_read_from_http_url(self, ext): url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/tests/io/data/test1' + self.ext) + 'pandas/tests/io/data/test1' + ext) url_table = read_excel(url) - local_table = self.get_exceldf('test1') + local_table = self.get_exceldf('test1', ext) tm.assert_frame_equal(url_table, local_table) - def test_read_from_s3_url(self): + @td.skip_if_no('s3fs') + def test_read_from_s3_url(self, ext): boto3 = pytest.importorskip('boto3') - pytest.importorskip('s3fs') moto = pytest.importorskip('moto') with moto.mock_s3(): conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket="pandas-test") - file_name = os.path.join(self.dirpath, 'test1' + self.ext) + file_name = os.path.join(self.dirpath, 'test1' + ext) with open(file_name, 'rb') as f: - conn.Bucket("pandas-test").put_object(Key="test1" + self.ext, + conn.Bucket("pandas-test").put_object(Key="test1" + ext, Body=f) - url = ('s3://pandas-test/test1' + self.ext) + url = ('s3://pandas-test/test1' + ext) url_table = read_excel(url) - local_table = self.get_exceldf('test1') + local_table = self.get_exceldf('test1', ext) tm.assert_frame_equal(url_table, local_table) @pytest.mark.slow - def test_read_from_file_url(self): + def test_read_from_file_url(self, ext): # FILE if sys.version_info[:2] < (2, 6): pytest.skip("file:// not supported with Python < 2.6") - localtable = os.path.join(self.dirpath, 'test1' + self.ext) + localtable = os.path.join(self.dirpath, 'test1' + ext) local_table = read_excel(localtable) try: @@ -649,37 +602,37 @@ def test_read_from_file_url(self): tm.assert_frame_equal(url_table, local_table) @td.skip_if_no('pathlib') - def test_read_from_pathlib_path(self): + def test_read_from_pathlib_path(self, ext): # GH12655 from pathlib import Path - str_path = os.path.join(self.dirpath, 'test1' + self.ext) + str_path = os.path.join(self.dirpath, 'test1' + ext) expected = read_excel(str_path, 'Sheet1', index_col=0) - path_obj = Path(self.dirpath, 'test1' + self.ext) + path_obj = Path(self.dirpath, 'test1' + ext) actual = read_excel(path_obj, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) @td.skip_if_no('py.path') - def test_read_from_py_localpath(self): + def test_read_from_py_localpath(self, ext): # GH12655 from py.path import local as LocalPath - str_path = os.path.join(self.dirpath, 'test1' + self.ext) + str_path = os.path.join(self.dirpath, 'test1' + ext) expected = read_excel(str_path, 'Sheet1', index_col=0) abs_dir = os.path.abspath(self.dirpath) - path_obj = LocalPath(abs_dir).join('test1' + self.ext) + path_obj = LocalPath(abs_dir).join('test1' + ext) actual = read_excel(path_obj, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) - def test_reader_closes_file(self): + def test_reader_closes_file(self, ext): - pth = os.path.join(self.dirpath, 'test1' + self.ext) + pth = os.path.join(self.dirpath, 'test1' + ext) f = open(pth, 'rb') with ExcelFile(f) as xlsx: # parses okay @@ -687,14 +640,12 @@ def test_reader_closes_file(self): assert f.closed - def test_creating_and_reading_multiple_sheets(self): + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') + def test_creating_and_reading_multiple_sheets(self, ext): # Test reading multiple sheets, from a runtime created excel file # with multiple sheets. # See PR #9450 - - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - def tdf(sheetname): d, i = [11, 22, 33], [1, 2, 3] return DataFrame(d, i, columns=[sheetname]) @@ -704,7 +655,7 @@ def tdf(sheetname): dfs = [tdf(s) for s in sheets] dfs = dict(zip(sheets, dfs)) - with ensure_clean(self.ext) as pth: + with ensure_clean(ext) as pth: with ExcelWriter(pth) as ew: for sheetname, df in iteritems(dfs): df.to_excel(ew, sheetname) @@ -712,10 +663,10 @@ def tdf(sheetname): for s in sheets: tm.assert_frame_equal(dfs[s], dfs_returned[s]) - def test_reader_seconds(self): - # Test reading times with and without milliseconds. GH5945. + def test_reader_seconds(self, ext): import xlrd + # Test reading times with and without milliseconds. GH5945. if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): # Xlrd >= 0.9.3 can handle Excel milliseconds. expected = DataFrame.from_dict({"Time": [time(1, 2, 3), @@ -743,16 +694,16 @@ def test_reader_seconds(self): time(16, 37, 1), time(18, 20, 54)]}) - actual = self.get_exceldf('times_1900', 'Sheet1') + actual = self.get_exceldf('times_1900', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) - actual = self.get_exceldf('times_1904', 'Sheet1') + actual = self.get_exceldf('times_1904', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) - def test_read_excel_multiindex(self): + def test_read_excel_multiindex(self, ext): # GH 4679 mi = MultiIndex.from_product([['foo', 'bar'], ['a', 'b']]) - mi_file = os.path.join(self.dirpath, 'testmultiindex' + self.ext) + mi_file = os.path.join(self.dirpath, 'testmultiindex' + ext) expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], [2, 3.5, pd.Timestamp('2015-01-02'), False], @@ -806,9 +757,9 @@ def test_read_excel_multiindex(self): header=[0, 1], skiprows=2) tm.assert_frame_equal(actual, expected) - def test_read_excel_multiindex_empty_level(self): + @td.skip_if_no('xlsxwriter') + def test_read_excel_multiindex_empty_level(self, ext): # GH 12453 - _skip_if_no_xlsxwriter() with ensure_clean('.xlsx') as path: df = DataFrame({ ('Zero', ''): {0: 0}, @@ -846,9 +797,9 @@ def test_read_excel_multiindex_empty_level(self): actual = pd.read_excel(path, header=[0, 1]) tm.assert_frame_equal(actual, expected) - def test_excel_multindex_roundtrip(self): + @td.skip_if_no('xlsxwriter') + def test_excel_multindex_roundtrip(self, ext): # GH 4679 - _skip_if_no_xlsxwriter() with ensure_clean('.xlsx') as pth: for c_idx_names in [True, False]: for r_idx_names in [True, False]: @@ -891,9 +842,9 @@ def test_excel_multindex_roundtrip(self): tm.assert_frame_equal( df, act, check_names=check_names) - def test_excel_old_index_format(self): + def test_excel_old_index_format(self, ext): # see gh-4679 - filename = 'test_index_name_pre17' + self.ext + filename = 'test_index_name_pre17' + ext in_file = os.path.join(self.dirpath, filename) # We detect headers to determine if index names exist, so @@ -952,31 +903,30 @@ def test_excel_old_index_format(self): actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) - def test_read_excel_bool_header_arg(self): + def test_read_excel_bool_header_arg(self, ext): # GH 6114 for arg in [True, False]: with pytest.raises(TypeError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), header=arg) - def test_read_excel_chunksize(self): + def test_read_excel_chunksize(self, ext): # GH 8011 with pytest.raises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), chunksize=100) - def test_read_excel_parse_dates(self): + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') + def test_read_excel_parse_dates(self, ext): # GH 11544, 12051 - _skip_if_no_openpyxl() - _skip_if_no_xlwt() # for df2.to_excel - df = DataFrame( {'col': [1, 2, 3], 'date_strings': pd.date_range('2012-01-01', periods=3)}) df2 = df.copy() df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') - with ensure_clean(self.ext) as pth: + with ensure_clean(ext) as pth: df2.to_excel(pth) res = read_excel(pth) @@ -995,10 +945,10 @@ def test_read_excel_parse_dates(self): date_parser=dateparser, index_col=0) tm.assert_frame_equal(df, res) - def test_read_excel_skiprows_list(self): + def test_read_excel_skiprows_list(self, ext): # GH 4903 actual = pd.read_excel(os.path.join(self.dirpath, - 'testskiprows' + self.ext), + 'testskiprows' + ext), 'skiprows_list', skiprows=[0, 2]) expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], [2, 3.5, pd.Timestamp('2015-01-02'), False], @@ -1008,40 +958,40 @@ def test_read_excel_skiprows_list(self): tm.assert_frame_equal(actual, expected) actual = pd.read_excel(os.path.join(self.dirpath, - 'testskiprows' + self.ext), + 'testskiprows' + ext), 'skiprows_list', skiprows=np.array([0, 2])) tm.assert_frame_equal(actual, expected) - def test_read_excel_nrows(self): + def test_read_excel_nrows(self, ext): # GH 16645 num_rows_to_pull = 5 - actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), nrows=num_rows_to_pull) expected = pd.read_excel(os.path.join(self.dirpath, - 'test1' + self.ext)) + 'test1' + ext)) expected = expected[:num_rows_to_pull] tm.assert_frame_equal(actual, expected) - def test_read_excel_nrows_greater_than_nrows_in_file(self): + def test_read_excel_nrows_greater_than_nrows_in_file(self, ext): # GH 16645 expected = pd.read_excel(os.path.join(self.dirpath, - 'test1' + self.ext)) + 'test1' + ext)) num_records_in_file = len(expected) num_rows_to_pull = num_records_in_file + 10 - actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), nrows=num_rows_to_pull) tm.assert_frame_equal(actual, expected) - def test_read_excel_nrows_non_integer_parameter(self): + def test_read_excel_nrows_non_integer_parameter(self, ext): # GH 16645 msg = "'nrows' must be an integer >=0" with tm.assert_raises_regex(ValueError, msg): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), nrows='5') - def test_read_excel_squeeze(self): + def test_read_excel_squeeze(self, ext): # GH 12157 - f = os.path.join(self.dirpath, 'test_squeeze' + self.ext) + f = os.path.join(self.dirpath, 'test_squeeze' + ext) actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True) expected = pd.Series([2, 3, 4], [4, 5, 6], name='b') @@ -1058,351 +1008,308 @@ def test_read_excel_squeeze(self): tm.assert_series_equal(actual, expected) -class TestXlsReaderTests(XlrdTests): - ext = '.xls' - engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) - - -class TestXlsxReaderTests(XlrdTests): - ext = '.xlsx' - engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) +class _WriterBase(SharedItems): + @pytest.fixture(autouse=True) + def set_engine_and_path(self, request, merge_cells, engine, ext): + """Fixture to set engine and open file for use in each test case -class TestXlsmReaderTests(XlrdTests): - ext = '.xlsm' - engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) + Rather than requiring `engine=...` to be provided explictly as an + argument in each test, this fixture sets a global option to dictate + which engine should be used to write Excel files. After executing + the test it rolls back said change to the global option. + It also uses a context manager to open a temporary excel file for + the function to write to, accessible via `self.path` -class ExcelWriterBase(SharedItems): + Notes + ----- + This fixture will run as part of each test method defined in the + class and any subclasses, on account of the `autouse=True` + argument + """ + option_name = 'io.excel.{ext}.writer'.format(ext=ext.strip('.')) + prev_engine = get_option(option_name) + set_option(option_name, engine) + with ensure_clean(ext) as path: + self.path = path + yield + set_option(option_name, prev_engine) # Roll back option change + + +@pytest.mark.parametrize("merge_cells", [True, False]) +@pytest.mark.parametrize("engine,ext", [ + pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param('openpyxl', '.xlsm', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param('xlwt', '.xls', marks=pytest.mark.skipif( + not td.safe_import('xlwt'), reason='No xlwt')), + pytest.param('xlsxwriter', '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('xlsxwriter'), reason='No xlsxwriter')) +]) +class TestExcelWriter(_WriterBase): # Base class for test cases to run with different Excel writers. - # To add a writer test, define the following: - # 1. A check_skip function that skips your tests if your writer isn't - # installed. - # 2. Add a property ext, which is the file extension that your writer - # writes to. (needs to start with '.' so it's a valid path) - # 3. Add a property engine_name, which is the name of the writer class. - - # Test with MultiIndex and Hierarchical Rows as merged cells. - merge_cells = True - - def setup_method(self, method): - self.check_skip() - super(ExcelWriterBase, self).setup_method(method) - self.option_name = 'io.excel.%s.writer' % self.ext.strip('.') - self.prev_engine = get_option(self.option_name) - set_option(self.option_name, self.engine_name) - - def teardown_method(self, method): - set_option(self.option_name, self.prev_engine) - def test_excel_sheet_by_name_raise(self): - _skip_if_no_xlrd() + def test_excel_sheet_by_name_raise(self, merge_cells, engine, ext): import xlrd - with ensure_clean(self.ext) as pth: - gt = DataFrame(np.random.randn(10, 2)) - gt.to_excel(pth) - xl = ExcelFile(pth) - df = read_excel(xl, 0) - tm.assert_frame_equal(gt, df) - - with pytest.raises(xlrd.XLRDError): - read_excel(xl, '0') - - def test_excelwriter_contextmanager(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as pth: - with ExcelWriter(pth) as writer: - self.frame.to_excel(writer, 'Data1') - self.frame2.to_excel(writer, 'Data2') - - with ExcelFile(pth) as reader: - found_df = read_excel(reader, 'Data1') - found_df2 = read_excel(reader, 'Data2') - tm.assert_frame_equal(found_df, self.frame) - tm.assert_frame_equal(found_df2, self.frame2) - - def test_roundtrip(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # test roundtrip - self.frame.to_excel(path, 'test1') - recons = read_excel(path, 'test1', index_col=0) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', index=False) - recons = read_excel(path, 'test1', index_col=None) - recons.index = self.frame.index - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', na_rep='NA') - recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) - tm.assert_frame_equal(self.frame, recons) - - # GH 3611 - self.frame.to_excel(path, 'test1', na_rep='88') - recons = read_excel(path, 'test1', index_col=0, na_values=['88']) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', na_rep='88') - recons = read_excel(path, 'test1', index_col=0, - na_values=[88, 88.0]) - tm.assert_frame_equal(self.frame, recons) - - # GH 6573 - self.frame.to_excel(path, 'Sheet1') - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, '0') - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(self.frame, recons) - - # GH 8825 Pandas Series should provide to_excel method - s = self.frame["A"] - s.to_excel(path) - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(s.to_frame(), recons) - - def test_mixed(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - self.mixed_frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=0) - tm.assert_frame_equal(self.mixed_frame, recons) - - def test_tsframe(self): - _skip_if_no_xlrd() + gt = DataFrame(np.random.randn(10, 2)) + gt.to_excel(self.path) + xl = ExcelFile(self.path) + df = read_excel(xl, 0) + tm.assert_frame_equal(gt, df) + with pytest.raises(xlrd.XLRDError): + read_excel(xl, '0') + + def test_excelwriter_contextmanager(self, merge_cells, engine, ext): + with ExcelWriter(self.path) as writer: + self.frame.to_excel(writer, 'Data1') + self.frame2.to_excel(writer, 'Data2') + + with ExcelFile(self.path) as reader: + found_df = read_excel(reader, 'Data1') + found_df2 = read_excel(reader, 'Data2') + tm.assert_frame_equal(found_df, self.frame) + tm.assert_frame_equal(found_df2, self.frame2) + + def test_roundtrip(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # test roundtrip + self.frame.to_excel(self.path, 'test1') + recons = read_excel(self.path, 'test1', index_col=0) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', index=False) + recons = read_excel(self.path, 'test1', index_col=None) + recons.index = self.frame.index + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', na_rep='NA') + recons = read_excel(self.path, 'test1', index_col=0, na_values=['NA']) + tm.assert_frame_equal(self.frame, recons) + + # GH 3611 + self.frame.to_excel(self.path, 'test1', na_rep='88') + recons = read_excel(self.path, 'test1', index_col=0, na_values=['88']) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', na_rep='88') + recons = read_excel(self.path, 'test1', index_col=0, + na_values=[88, 88.0]) + tm.assert_frame_equal(self.frame, recons) + + # GH 6573 + self.frame.to_excel(self.path, 'Sheet1') + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, '0') + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(self.frame, recons) + + # GH 8825 Pandas Series should provide to_excel method + s = self.frame["A"] + s.to_excel(self.path) + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(s.to_frame(), recons) + + def test_mixed(self, merge_cells, engine, ext): + self.mixed_frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=0) + tm.assert_frame_equal(self.mixed_frame, recons) + + def test_tsframe(self, merge_cells, engine, ext): df = tm.makeTimeDataFrame()[:5] - with ensure_clean(self.ext) as path: - df.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(df, recons) - - def test_basics_with_nan(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: - self.frame['A'][:5] = nan - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - def test_int_types(self): - _skip_if_no_xlrd() - - for np_type in (np.int8, np.int16, np.int32, np.int64): - - with ensure_clean(self.ext) as path: - # Test np.int values read come back as int (rather than float - # which is Excel's format). - frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), - dtype=np_type) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - int_frame = frame.astype(np.int64) - tm.assert_frame_equal(int_frame, recons) - recons2 = read_excel(path, 'test1') - tm.assert_frame_equal(int_frame, recons2) - - # test with convert_float=False comes back as float - float_frame = frame.astype(float) - recons = read_excel(path, 'test1', convert_float=False) - tm.assert_frame_equal(recons, float_frame, - check_index_type=False, - check_column_type=False) - - def test_float_types(self): - _skip_if_no_xlrd() - - for np_type in (np.float16, np.float32, np.float64): - with ensure_clean(self.ext) as path: - # Test np.float values read come back as float. - frame = DataFrame(np.random.random_sample(10), dtype=np_type) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1').astype(np_type) - tm.assert_frame_equal(frame, recons, check_dtype=False) - - def test_bool_types(self): - _skip_if_no_xlrd() - - for np_type in (np.bool8, np.bool_): - with ensure_clean(self.ext) as path: - # Test np.bool values read come back as float. - frame = (DataFrame([1, 0, True, False], dtype=np_type)) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1').astype(np_type) - tm.assert_frame_equal(frame, recons) - - def test_inf_roundtrip(self): - _skip_if_no_xlrd() - + df.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(df, recons) + + def test_basics_with_nan(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + @pytest.mark.parametrize("np_type", [ + np.int8, np.int16, np.int32, np.int64]) + def test_int_types(self, merge_cells, engine, ext, np_type): + # Test np.int values read come back as int (rather than float + # which is Excel's format). + frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), + dtype=np_type) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + int_frame = frame.astype(np.int64) + tm.assert_frame_equal(int_frame, recons) + recons2 = read_excel(self.path, 'test1') + tm.assert_frame_equal(int_frame, recons2) + + # test with convert_float=False comes back as float + float_frame = frame.astype(float) + recons = read_excel(self.path, 'test1', convert_float=False) + tm.assert_frame_equal(recons, float_frame, + check_index_type=False, + check_column_type=False) + + @pytest.mark.parametrize("np_type", [ + np.float16, np.float32, np.float64]) + def test_float_types(self, merge_cells, engine, ext, np_type): + # Test np.float values read come back as float. + frame = DataFrame(np.random.random_sample(10), dtype=np_type) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1').astype(np_type) + tm.assert_frame_equal(frame, recons, check_dtype=False) + + @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) + def test_bool_types(self, merge_cells, engine, ext, np_type): + # Test np.bool values read come back as float. + frame = (DataFrame([1, 0, True, False], dtype=np_type)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1').astype(np_type) + tm.assert_frame_equal(frame, recons) + + def test_inf_roundtrip(self, merge_cells, engine, ext): frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) - with ensure_clean(self.ext) as path: - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(frame, recons) - - def test_sheets(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # Test writing to separate sheets - writer = ExcelWriter(path) - self.frame.to_excel(writer, 'test1') - self.tsframe.to_excel(writer, 'test2') - writer.save() - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=0) - tm.assert_frame_equal(self.frame, recons) - recons = read_excel(reader, 'test2', index_col=0) - tm.assert_frame_equal(self.tsframe, recons) - assert 2 == len(reader.sheet_names) - assert 'test1' == reader.sheet_names[0] - assert 'test2' == reader.sheet_names[1] - - def test_colaliases(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # column aliases - col_aliases = Index(['AA', 'X', 'Y', 'Z']) - self.frame2.to_excel(path, 'test1', header=col_aliases) - reader = ExcelFile(path) - rs = read_excel(reader, 'test1', index_col=0) - xp = self.frame2.copy() - xp.columns = col_aliases - tm.assert_frame_equal(xp, rs) - - def test_roundtrip_indexlabels(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # test index_label - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', - index_label=['test'], - merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - assert frame.index.names == recons.index.names - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, - 'test1', - index_label=['test', 'dummy', 'dummy2'], - merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - assert frame.index.names == recons.index.names - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, - 'test1', - index_label='test', - merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - tm.assert_frame_equal(frame, recons.astype(bool)) - - with ensure_clean(self.ext) as path: - - self.frame.to_excel(path, - 'test1', - columns=['A', 'B', 'C', 'D'], - index=False, merge_cells=self.merge_cells) - # take 'A' and 'B' as indexes (same row as cols 'C', 'D') - df = self.frame.copy() - df = df.set_index(['A', 'B']) - - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=[0, 1]) - tm.assert_frame_equal(df, recons, check_less_precise=True) - - def test_excel_roundtrip_indexname(self): - _skip_if_no_xlrd() - + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(frame, recons) + + def test_sheets(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # Test writing to separate sheets + writer = ExcelWriter(self.path) + self.frame.to_excel(writer, 'test1') + self.tsframe.to_excel(writer, 'test2') + writer.save() + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=0) + tm.assert_frame_equal(self.frame, recons) + recons = read_excel(reader, 'test2', index_col=0) + tm.assert_frame_equal(self.tsframe, recons) + assert 2 == len(reader.sheet_names) + assert 'test1' == reader.sheet_names[0] + assert 'test2' == reader.sheet_names[1] + + def test_colaliases(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # column aliases + col_aliases = Index(['AA', 'X', 'Y', 'Z']) + self.frame2.to_excel(self.path, 'test1', header=col_aliases) + reader = ExcelFile(self.path) + rs = read_excel(reader, 'test1', index_col=0) + xp = self.frame2.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) + + def test_roundtrip_indexlabels(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # test index_label + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, 'test1', + index_label=['test'], + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + assert frame.index.names == recons.index.names + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, + 'test1', + index_label=['test', 'dummy', 'dummy2'], + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + assert frame.index.names == recons.index.names + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, + 'test1', + index_label='test', + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + tm.assert_frame_equal(frame, recons.astype(bool)) + + self.frame.to_excel(self.path, + 'test1', + columns=['A', 'B', 'C', 'D'], + index=False, merge_cells=merge_cells) + # take 'A' and 'B' as indexes (same row as cols 'C', 'D') + df = self.frame.copy() + df = df.set_index(['A', 'B']) + + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=[0, 1]) + tm.assert_frame_equal(df, recons, check_less_precise=True) + + def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' - with ensure_clean(self.ext) as path: - df.to_excel(path, merge_cells=self.merge_cells) + df.to_excel(self.path, merge_cells=merge_cells) - xf = ExcelFile(path) - result = read_excel(xf, xf.sheet_names[0], - index_col=0) + xf = ExcelFile(self.path) + result = read_excel(xf, xf.sheet_names[0], + index_col=0) - tm.assert_frame_equal(result, df) - assert result.index.name == 'foo' - - def test_excel_roundtrip_datetime(self): - _skip_if_no_xlrd() + tm.assert_frame_equal(result, df) + assert result.index.name == 'foo' + def test_excel_roundtrip_datetime(self, merge_cells, engine, ext): # datetime.date, not sure what to test here exactly tsf = self.tsframe.copy() - with ensure_clean(self.ext) as path: - tsf.index = [x.date() for x in self.tsframe.index] - tsf.to_excel(path, 'test1', merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(self.tsframe, recons) + tsf.index = [x.date() for x in self.tsframe.index] + tsf.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(self.tsframe, recons) # GH4133 - excel output format strings - def test_excel_date_datetime_format(self): - _skip_if_no_xlrd() + def test_excel_date_datetime_format(self, merge_cells, engine, ext): df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), @@ -1414,133 +1321,117 @@ def test_excel_date_datetime_format(self): datetime(2014, 2, 28, 13, 5, 13)]], index=['DATE', 'DATETIME'], columns=['X', 'Y']) - with ensure_clean(self.ext) as filename1: - with ensure_clean(self.ext) as filename2: - writer1 = ExcelWriter(filename1) - writer2 = ExcelWriter(filename2, - date_format='DD.MM.YYYY', - datetime_format='DD.MM.YYYY HH-MM-SS') + with ensure_clean(ext) as filename2: + writer1 = ExcelWriter(self.path) + writer2 = ExcelWriter(filename2, + date_format='DD.MM.YYYY', + datetime_format='DD.MM.YYYY HH-MM-SS') - df.to_excel(writer1, 'test1') - df.to_excel(writer2, 'test1') + df.to_excel(writer1, 'test1') + df.to_excel(writer2, 'test1') - writer1.close() - writer2.close() + writer1.close() + writer2.close() - reader1 = ExcelFile(filename1) - reader2 = ExcelFile(filename2) + reader1 = ExcelFile(self.path) + reader2 = ExcelFile(filename2) - rs1 = read_excel(reader1, 'test1', index_col=None) - rs2 = read_excel(reader2, 'test1', index_col=None) + rs1 = read_excel(reader1, 'test1', index_col=None) + rs2 = read_excel(reader2, 'test1', index_col=None) - tm.assert_frame_equal(rs1, rs2) + tm.assert_frame_equal(rs1, rs2) - # since the reader returns a datetime object for dates, we need - # to use df_expected to check the result - tm.assert_frame_equal(rs2, df_expected) + # since the reader returns a datetime object for dates, we need + # to use df_expected to check the result + tm.assert_frame_equal(rs2, df_expected) - def test_to_excel_interval_no_labels(self): + def test_to_excel_interval_no_labels(self, merge_cells, engine, ext): # GH19242 - test writing Interval without labels - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - dtype=np.int64) - expected = frame.copy() - frame['new'] = pd.cut(frame[0], 10) - expected['new'] = pd.cut(expected[0], 10).astype(str) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(expected, recons) - - def test_to_excel_interval_labels(self): + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + dtype=np.int64) + expected = frame.copy() + frame['new'] = pd.cut(frame[0], 10) + expected['new'] = pd.cut(expected[0], 10).astype(str) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) + + def test_to_excel_interval_labels(self, merge_cells, engine, ext): # GH19242 - test writing Interval with labels - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - dtype=np.int64) - expected = frame.copy() - intervals = pd.cut(frame[0], 10, labels=['A', 'B', 'C', 'D', 'E', - 'F', 'G', 'H', 'I', 'J']) - frame['new'] = intervals - expected['new'] = pd.Series(list(intervals)) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(expected, recons) - - def test_to_excel_timedelta(self): + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + dtype=np.int64) + expected = frame.copy() + intervals = pd.cut(frame[0], 10, labels=['A', 'B', 'C', 'D', 'E', + 'F', 'G', 'H', 'I', 'J']) + frame['new'] = intervals + expected['new'] = pd.Series(list(intervals)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) + + def test_to_excel_timedelta(self, merge_cells, engine, ext): # GH 19242, GH9155 - test writing timedelta to xls - _skip_if_no_xlrd() - - with ensure_clean('.xls') as path: - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - columns=['A'], - dtype=np.int64 - ) - expected = frame.copy() - frame['new'] = frame['A'].apply(lambda x: timedelta(seconds=x)) - expected['new'] = expected['A'].apply( - lambda x: timedelta(seconds=x).total_seconds() / float(86400)) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(expected, recons) - - def test_to_excel_periodindex(self): - _skip_if_no_xlrd() - + if engine == 'openpyxl': + pytest.xfail('Timedelta roundtrip broken with openpyxl') + if engine == 'xlsxwriter' and (sys.version_info[0] == 2 and + sys.platform.startswith('linux')): + pytest.xfail('Not working on linux with Py2 and xlsxwriter') + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + columns=['A'], + dtype=np.int64 + ) + expected = frame.copy() + frame['new'] = frame['A'].apply(lambda x: timedelta(seconds=x)) + expected['new'] = expected['A'].apply( + lambda x: timedelta(seconds=x).total_seconds() / float(86400)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) + + def test_to_excel_periodindex(self, merge_cells, engine, ext): frame = self.tsframe xp = frame.resample('M', kind='period').mean() - with ensure_clean(self.ext) as path: - xp.to_excel(path, 'sht1') - - reader = ExcelFile(path) - rs = read_excel(reader, 'sht1', index_col=0) - tm.assert_frame_equal(xp, rs.to_period('M')) + xp.to_excel(self.path, 'sht1') - def test_to_excel_multiindex(self): - _skip_if_no_xlrd() + reader = ExcelFile(self.path) + rs = read_excel(reader, 'sht1', index_col=0) + tm.assert_frame_equal(xp, rs.to_period('M')) + def test_to_excel_multiindex(self, merge_cells, engine, ext): frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index - with ensure_clean(self.ext) as path: - frame.to_excel(path, 'test1', header=False) - frame.to_excel(path, 'test1', columns=['A', 'B']) + frame.to_excel(self.path, 'test1', header=False) + frame.to_excel(self.path, 'test1', columns=['A', 'B']) - # round trip - frame.to_excel(path, 'test1', merge_cells=self.merge_cells) - reader = ExcelFile(path) - df = read_excel(reader, 'test1', index_col=[0, 1]) - tm.assert_frame_equal(frame, df) + # round trip + frame.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + df = read_excel(reader, 'test1', index_col=[0, 1]) + tm.assert_frame_equal(frame, df) # GH13511 - def test_to_excel_multiindex_nan_label(self): - _skip_if_no_xlrd() - + def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext): frame = pd.DataFrame({'A': [None, 2, 3], 'B': [10, 20, 30], 'C': np.random.sample(3)}) frame = frame.set_index(['A', 'B']) - with ensure_clean(self.ext) as path: - frame.to_excel(path, merge_cells=self.merge_cells) - df = read_excel(path, index_col=[0, 1]) - tm.assert_frame_equal(frame, df) + frame.to_excel(self.path, merge_cells=merge_cells) + df = read_excel(self.path, index_col=[0, 1]) + tm.assert_frame_equal(frame, df) # Test for Issue 11328. If column indices are integers, make # sure they are handled correctly for either setting of # merge_cells - def test_to_excel_multiindex_cols(self): - _skip_if_no_xlrd() - + def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, @@ -1551,42 +1442,37 @@ def test_to_excel_multiindex_cols(self): (50, 1), (50, 2)]) frame.columns = new_cols_index header = [0, 1] - if not self.merge_cells: + if not merge_cells: header = 0 - with ensure_clean(self.ext) as path: - # round trip - frame.to_excel(path, 'test1', merge_cells=self.merge_cells) - reader = ExcelFile(path) - df = read_excel(reader, 'test1', header=header, - index_col=[0, 1]) - if not self.merge_cells: - fm = frame.columns.format(sparsify=False, - adjoin=False, names=False) - frame.columns = [".".join(map(str, q)) for q in zip(*fm)] - tm.assert_frame_equal(frame, df) - - def test_to_excel_multiindex_dates(self): - _skip_if_no_xlrd() - + # round trip + frame.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + df = read_excel(reader, 'test1', header=header, + index_col=[0, 1]) + if not merge_cells: + fm = frame.columns.format(sparsify=False, + adjoin=False, names=False) + frame.columns = [".".join(map(str, q)) for q in zip(*fm)] + tm.assert_frame_equal(frame, df) + + def test_to_excel_multiindex_dates(self, merge_cells, engine, ext): # try multiindex with dates tsframe = self.tsframe.copy() new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) - with ensure_clean(self.ext) as path: - tsframe.index.names = ['time', 'foo'] - tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=[0, 1]) - - tm.assert_frame_equal(tsframe, recons) - assert recons.index.names == ('time', 'foo') + tsframe.index.names = ['time', 'foo'] + tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=[0, 1]) - def test_to_excel_multiindex_no_write_index(self): - _skip_if_no_xlrd() + tm.assert_frame_equal(tsframe, recons) + assert recons.index.names == ('time', 'foo') + def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, + ext): # Test writing and re-reading a MI witout the index. GH 5616. # Initial non-MI frame. @@ -1597,53 +1483,44 @@ def test_to_excel_multiindex_no_write_index(self): multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) frame2.index = multi_index - with ensure_clean(self.ext) as path: + # Write out to Excel without the index. + frame2.to_excel(self.path, 'test1', index=False) - # Write out to Excel without the index. - frame2.to_excel(path, 'test1', index=False) + # Read it back in. + reader = ExcelFile(self.path) + frame3 = read_excel(reader, 'test1') - # Read it back in. - reader = ExcelFile(path) - frame3 = read_excel(reader, 'test1') - - # Test that it is the same as the initial frame. - tm.assert_frame_equal(frame1, frame3) - - def test_to_excel_float_format(self): - _skip_if_no_xlrd() + # Test that it is the same as the initial frame. + tm.assert_frame_equal(frame1, frame3) + def test_to_excel_float_format(self, merge_cells, engine, ext): df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) - with ensure_clean(self.ext) as filename: - df.to_excel(filename, 'test1', float_format='%.2f') + df.to_excel(self.path, 'test1', float_format='%.2f') - reader = ExcelFile(filename) - rs = read_excel(reader, 'test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) - - def test_to_excel_output_encoding(self): - _skip_if_no_xlrd() + reader = ExcelFile(self.path) + rs = read_excel(reader, 'test1', index_col=None) + xp = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + tm.assert_frame_equal(rs, xp) + def test_to_excel_output_encoding(self, merge_cells, engine, ext): # avoid mixed inferred_type df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'], [u'\u0195', u'\u0196', u'\u0197']], index=[u'A\u0192', u'B'], columns=[u'X\u0193', u'Y', u'Z']) - with ensure_clean('__tmp_to_excel_float_format__.' + self.ext)\ - as filename: + with ensure_clean('__tmp_to_excel_float_format__.' + ext) as filename: df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') result = read_excel(filename, 'TestSheet', encoding='utf8') tm.assert_frame_equal(result, df) - def test_to_excel_unicode_filename(self): - _skip_if_no_xlrd() - with ensure_clean(u('\u0192u.') + self.ext) as filename: + def test_to_excel_unicode_filename(self, merge_cells, engine, ext): + with ensure_clean(u('\u0192u.') + ext) as filename: try: f = open(filename, 'wb') except UnicodeEncodeError: @@ -1664,7 +1541,7 @@ def test_to_excel_unicode_filename(self): index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp) - # def test_to_excel_header_styling_xls(self): + # def test_to_excel_header_styling_xls(self, merge_cells, engine, ext): # import StringIO # s = StringIO( @@ -1711,7 +1588,7 @@ def test_to_excel_unicode_filename(self): # assert 1 == cell_xf.border.left_line_style # assert 2 == cell_xf.alignment.hor_align # os.remove(filename) - # def test_to_excel_header_styling_xlsx(self): + # def test_to_excel_header_styling_xlsx(self, merge_cells, engine, ext): # import StringIO # s = StringIO( # """Date,ticker,type,value @@ -1764,10 +1641,8 @@ def test_to_excel_unicode_filename(self): # assert ws.cell(maddr).merged # os.remove(filename) - def test_excel_010_hemstring(self): - _skip_if_no_xlrd() - - if self.merge_cells: + def test_excel_010_hemstring(self, merge_cells, engine, ext): + if merge_cells: pytest.skip('Skip tests for merged MI format.') from pandas.util.testing import makeCustomDataframe as mkdf @@ -1776,12 +1651,11 @@ def test_excel_010_hemstring(self): def roundtrip(df, header=True, parser_hdr=0, index=True): - with ensure_clean(self.ext) as path: - df.to_excel(path, header=header, - merge_cells=self.merge_cells, index=index) - xf = ExcelFile(path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res + df.to_excel(self.path, header=header, + merge_cells=merge_cells, index=index) + xf = ExcelFile(self.path) + res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) + return res nrows = 5 ncols = 3 @@ -1817,12 +1691,11 @@ def roundtrip(df, header=True, parser_hdr=0, index=True): assert res.shape == (1, 2) assert res.iloc[0, 0] is not np.nan - def test_excel_010_hemstring_raises_NotImplementedError(self): + def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, + engine, ext): # This test was failing only for j>1 and header=False, # So I reproduced a simple test. - _skip_if_no_xlrd() - - if self.merge_cells: + if merge_cells: pytest.skip('Skip tests for merged MI format.') from pandas.util.testing import makeCustomDataframe as mkdf @@ -1831,12 +1704,11 @@ def test_excel_010_hemstring_raises_NotImplementedError(self): def roundtrip2(df, header=True, parser_hdr=0, index=True): - with ensure_clean(self.ext) as path: - df.to_excel(path, header=header, - merge_cells=self.merge_cells, index=index) - xf = ExcelFile(path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res + df.to_excel(self.path, header=header, + merge_cells=merge_cells, index=index) + xf = ExcelFile(self.path) + res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) + return res nrows = 5 ncols = 3 @@ -1846,134 +1718,119 @@ def roundtrip2(df, header=True, parser_hdr=0, index=True): with pytest.raises(NotImplementedError): roundtrip2(df, header=False, index=False) - def test_duplicated_columns(self): + def test_duplicated_columns(self, merge_cells, engine, ext): # Test for issue #5235 - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - colnames = ['A', 'B', 'B'] - - write_frame.columns = colnames - write_frame.to_excel(path, 'test1') - - read_frame = read_excel(path, 'test1') - read_frame.columns = colnames - tm.assert_frame_equal(write_frame, read_frame) - - # 11007 / #10970 - write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=['A', 'B', 'A', 'B']) - write_frame.to_excel(path, 'test1') - read_frame = read_excel(path, 'test1') - read_frame.columns = ['A', 'B', 'A', 'B'] - tm.assert_frame_equal(write_frame, read_frame) - - # 10982 - write_frame.to_excel(path, 'test1', index=False, header=False) - read_frame = read_excel(path, 'test1', header=None) - write_frame.columns = [0, 1, 2, 3] - tm.assert_frame_equal(write_frame, read_frame) - - def test_swapped_columns(self): - # Test for issue #5427. - _skip_if_no_xlrd() + write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) + colnames = ['A', 'B', 'B'] - with ensure_clean(self.ext) as path: - write_frame = DataFrame({'A': [1, 1, 1], - 'B': [2, 2, 2]}) - write_frame.to_excel(path, 'test1', columns=['B', 'A']) + write_frame.columns = colnames + write_frame.to_excel(self.path, 'test1') - read_frame = read_excel(path, 'test1', header=0) + read_frame = read_excel(self.path, 'test1') + read_frame.columns = colnames + tm.assert_frame_equal(write_frame, read_frame) - tm.assert_series_equal(write_frame['A'], read_frame['A']) - tm.assert_series_equal(write_frame['B'], read_frame['B']) + # 11007 / #10970 + write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'A', 'B']) + write_frame.to_excel(self.path, 'test1') + read_frame = read_excel(self.path, 'test1') + read_frame.columns = ['A', 'B', 'A', 'B'] + tm.assert_frame_equal(write_frame, read_frame) - def test_invalid_columns(self): # 10982 - _skip_if_no_xlrd() + write_frame.to_excel(self.path, 'test1', index=False, header=False) + read_frame = read_excel(self.path, 'test1', header=None) + write_frame.columns = [0, 1, 2, 3] + tm.assert_frame_equal(write_frame, read_frame) + + def test_swapped_columns(self, merge_cells, engine, ext): + # Test for issue #5427. + write_frame = DataFrame({'A': [1, 1, 1], + 'B': [2, 2, 2]}) + write_frame.to_excel(self.path, 'test1', columns=['B', 'A']) + + read_frame = read_excel(self.path, 'test1', header=0) + + tm.assert_series_equal(write_frame['A'], read_frame['A']) + tm.assert_series_equal(write_frame['B'], read_frame['B']) - with ensure_clean(self.ext) as path: - write_frame = DataFrame({'A': [1, 1, 1], - 'B': [2, 2, 2]}) + def test_invalid_columns(self, merge_cells, engine, ext): + # 10982 + write_frame = DataFrame({'A': [1, 1, 1], + 'B': [2, 2, 2]}) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - write_frame.to_excel(path, 'test1', columns=['B', 'C']) - expected = write_frame.reindex(columns=['B', 'C']) - read_frame = read_excel(path, 'test1') - tm.assert_frame_equal(expected, read_frame) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + write_frame.to_excel(self.path, 'test1', columns=['B', 'C']) + expected = write_frame.reindex(columns=['B', 'C']) + read_frame = read_excel(self.path, 'test1') + tm.assert_frame_equal(expected, read_frame) - with pytest.raises(KeyError): - write_frame.to_excel(path, 'test1', columns=['C', 'D']) + with pytest.raises(KeyError): + write_frame.to_excel(self.path, 'test1', columns=['C', 'D']) - def test_comment_arg(self): + def test_comment_arg(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument functionality to read_excel - with ensure_clean(self.ext) as path: - - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(path, 'test_c') - - # Read file without comment arg - result1 = read_excel(path, 'test_c') - result1.iloc[1, 0] = None - result1.iloc[1, 1] = None - result1.iloc[2, 1] = None - result2 = read_excel(path, 'test_c', comment='#') - tm.assert_frame_equal(result1, result2) - - def test_comment_default(self): + + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') + + # Read file without comment arg + result1 = read_excel(self.path, 'test_c') + result1.iloc[1, 0] = None + result1.iloc[1, 1] = None + result1.iloc[2, 1] = None + result2 = read_excel(self.path, 'test_c', comment='#') + tm.assert_frame_equal(result1, result2) + + def test_comment_default(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument default to read_excel - with ensure_clean(self.ext) as path: - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(path, 'test_c') + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') - # Read file with default and explicit comment=None - result1 = read_excel(path, 'test_c') - result2 = read_excel(path, 'test_c', comment=None) - tm.assert_frame_equal(result1, result2) + # Read file with default and explicit comment=None + result1 = read_excel(self.path, 'test_c') + result2 = read_excel(self.path, 'test_c', comment=None) + tm.assert_frame_equal(result1, result2) - def test_comment_used(self): + def test_comment_used(self, merge_cells, engine, ext): # Re issue #18735 # Test the comment argument is working as expected when used - with ensure_clean(self.ext) as path: - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(path, 'test_c') + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') - # Test read_frame_comment against manually produced expected output - expected = DataFrame({'A': ['one', None, 'one'], - 'B': ['two', None, None]}) - result = read_excel(path, 'test_c', comment='#') - tm.assert_frame_equal(result, expected) + # Test read_frame_comment against manually produced expected output + expected = DataFrame({'A': ['one', None, 'one'], + 'B': ['two', None, None]}) + result = read_excel(self.path, 'test_c', comment='#') + tm.assert_frame_equal(result, expected) - def test_comment_emptyline(self): + def test_comment_emptyline(self, merge_cells, engine, ext): # Re issue #18735 # Test that read_excel ignores commented lines at the end of file - with ensure_clean(self.ext) as path: - df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']}) - df.to_excel(path, index=False) + df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']}) + df.to_excel(self.path, index=False) - # Test that all-comment lines at EoF are ignored - expected = DataFrame({'a': [1], 'b': [2]}) - result = read_excel(path, comment='#') - tm.assert_frame_equal(result, expected) + # Test that all-comment lines at EoF are ignored + expected = DataFrame({'a': [1], 'b': [2]}) + result = read_excel(self.path, comment='#') + tm.assert_frame_equal(result, expected) - def test_datetimes(self): + def test_datetimes(self, merge_cells, engine, ext): # Test writing and reading datetimes. For issue #9139. (xref #9185) - _skip_if_no_xlrd() - datetimes = [datetime(2013, 1, 13, 1, 2, 3), datetime(2013, 1, 13, 2, 45, 56), datetime(2013, 1, 13, 4, 29, 49), @@ -1986,21 +1843,18 @@ def test_datetimes(self): datetime(2013, 1, 13, 16, 37, 0), datetime(2013, 1, 13, 18, 20, 52)] - with ensure_clean(self.ext) as path: - write_frame = DataFrame({'A': datetimes}) - write_frame.to_excel(path, 'Sheet1') - read_frame = read_excel(path, 'Sheet1', header=0) + write_frame = DataFrame({'A': datetimes}) + write_frame.to_excel(self.path, 'Sheet1') + read_frame = read_excel(self.path, 'Sheet1', header=0) - tm.assert_series_equal(write_frame['A'], read_frame['A']) + tm.assert_series_equal(write_frame['A'], read_frame['A']) # GH7074 - def test_bytes_io(self): - _skip_if_no_xlrd() - + def test_bytes_io(self, merge_cells, engine, ext): bio = BytesIO() df = DataFrame(np.random.randn(10, 2)) # pass engine explicitly as there is no file path to infer from - writer = ExcelWriter(bio, engine=self.engine_name) + writer = ExcelWriter(bio, engine=engine) df.to_excel(writer) writer.save() bio.seek(0) @@ -2008,62 +1862,59 @@ def test_bytes_io(self): tm.assert_frame_equal(df, reread_df) # GH8188 - def test_write_lists_dict(self): - _skip_if_no_xlrd() - + def test_write_lists_dict(self, merge_cells, engine, ext): df = DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], 'numeric': [1, 2, 3.0], 'str': ['apple', 'banana', 'cherry']}) expected = df.copy() expected.mixed = expected.mixed.apply(str) expected.numeric = expected.numeric.astype('int64') - with ensure_clean(self.ext) as path: - df.to_excel(path, 'Sheet1') - read = read_excel(path, 'Sheet1', header=0) - tm.assert_frame_equal(read, expected) + + df.to_excel(self.path, 'Sheet1') + read = read_excel(self.path, 'Sheet1', header=0) + tm.assert_frame_equal(read, expected) # GH13347 - def test_true_and_false_value_options(self): + def test_true_and_false_value_options(self, merge_cells, engine, ext): df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2']) expected = df.replace({'foo': True, 'bar': False}) - with ensure_clean(self.ext) as path: - df.to_excel(path) - read_frame = read_excel(path, true_values=['foo'], - false_values=['bar']) - tm.assert_frame_equal(read_frame, expected) - def test_freeze_panes(self): + df.to_excel(self.path) + read_frame = read_excel(self.path, true_values=['foo'], + false_values=['bar']) + tm.assert_frame_equal(read_frame, expected) + + def test_freeze_panes(self, merge_cells, engine, ext): # GH15160 expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2']) - with ensure_clean(self.ext) as path: - expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) - result = read_excel(path) - tm.assert_frame_equal(expected, result) + expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) + result = read_excel(self.path) + tm.assert_frame_equal(expected, result) - def test_path_pathlib(self): + def test_path_pathlib(self, merge_cells, engine, ext): df = tm.makeDataFrame() - writer = partial(df.to_excel, engine=self.engine_name) + writer = partial(df.to_excel, engine=engine) reader = partial(pd.read_excel) result = tm.round_trip_pathlib(writer, reader, - path="foo.{}".format(self.ext)) + path="foo.{}".format(ext)) tm.assert_frame_equal(df, result) - def test_path_localpath(self): + def test_path_localpath(self, merge_cells, engine, ext): df = tm.makeDataFrame() - writer = partial(df.to_excel, engine=self.engine_name) + writer = partial(df.to_excel, engine=engine) reader = partial(pd.read_excel) result = tm.round_trip_pathlib(writer, reader, - path="foo.{}".format(self.ext)) + path="foo.{}".format(ext)) tm.assert_frame_equal(df, result) -class TestOpenpyxlTests(ExcelWriterBase): - engine_name = 'openpyxl' - ext = '.xlsx' - check_skip = staticmethod(_skip_if_no_openpyxl) +@td.skip_if_no('openpyxl') +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xlsx', 'openpyxl')]) +class TestOpenpyxlTests(_WriterBase): - def test_to_excel_styleconverter(self): + def test_to_excel_styleconverter(self, merge_cells, ext, engine): from openpyxl import styles hstyle = { @@ -2117,7 +1968,7 @@ def test_to_excel_styleconverter(self): assert kw['number_format'] == number_format assert kw['protection'] == protection - def test_write_cells_merge_styled(self): + def test_write_cells_merge_styled(self, merge_cells, ext, engine): from pandas.io.formats.excel import ExcelCell sheet_name = 'merge_styled' @@ -2138,7 +1989,7 @@ def test_write_cells_merge_styled(self): mergestart=1, mergeend=1, style=sty_merged), ] - with ensure_clean('.xlsx') as path: + with ensure_clean(ext) as path: writer = _OpenpyxlWriter(path) writer.write_cells(initial_cells, sheet_name=sheet_name) writer.write_cells(merge_cells, sheet_name=sheet_name) @@ -2150,44 +2001,41 @@ def test_write_cells_merge_styled(self): assert xcell_a2.font == openpyxl_sty_merged -class TestXlwtTests(ExcelWriterBase): - ext = '.xls' - engine_name = 'xlwt' - check_skip = staticmethod(_skip_if_no_xlwt) +@td.skip_if_no('xlwt') +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xls', 'xlwt')]) +class TestXlwtTests(_WriterBase): - def test_excel_raise_error_on_multiindex_columns_and_no_index(self): - _skip_if_no_xlwt() + def test_excel_raise_error_on_multiindex_columns_and_no_index( + self, merge_cells, ext, engine): # MultiIndex as columns is not yet implemented 9794 cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), ('2014', 'weight')]) df = DataFrame(np.random.randn(10, 3), columns=cols) with pytest.raises(NotImplementedError): - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, index=False) - def test_excel_multiindex_columns_and_index_true(self): - _skip_if_no_xlwt() + def test_excel_multiindex_columns_and_index_true(self, merge_cells, ext, + engine): cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), ('2014', 'weight')]) df = pd.DataFrame(np.random.randn(10, 3), columns=cols) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, index=True) - def test_excel_multiindex_index(self): - _skip_if_no_xlwt() + def test_excel_multiindex_index(self, merge_cells, ext, engine): # MultiIndex as index works so assert no error #9794 cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), ('2014', 'weight')]) df = DataFrame(np.random.randn(3, 10), index=cols) - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: df.to_excel(path, index=False) - def test_to_excel_styleconverter(self): - _skip_if_no_xlwt() - + def test_to_excel_styleconverter(self, merge_cells, ext, engine): import xlwt hstyle = {"font": {"bold": True}, @@ -2207,23 +2055,21 @@ def test_to_excel_styleconverter(self): assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert -class TestXlsxWriterTests(ExcelWriterBase): - ext = '.xlsx' - engine_name = 'xlsxwriter' - check_skip = staticmethod(_skip_if_no_xlsxwriter) +@td.skip_if_no('xlsxwriter') +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xlsx', 'xlsxwriter')]) +class TestXlsxWriterTests(_WriterBase): - def test_column_format(self): + @td.skip_if_no('openpyxl') + def test_column_format(self, merge_cells, ext, engine): # Test that column formats are applied to cells. Test for issue #9167. # Applicable to xlsxwriter only. - _skip_if_no_xlsxwriter() - with warnings.catch_warnings(): # Ignore the openpyxl lxml warning. warnings.simplefilter("ignore") - _skip_if_no_openpyxl() import openpyxl - with ensure_clean(self.ext) as path: + with ensure_clean(ext) as path: frame = DataFrame({'A': [123456, 123456], 'B': [123456, 123456]}) @@ -2260,54 +2106,28 @@ def test_column_format(self): assert read_num_format == num_format -class TestOpenpyxlTests_NoMerge(ExcelWriterBase): - ext = '.xlsx' - engine_name = 'openpyxl' - check_skip = staticmethod(_skip_if_no_openpyxl) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - -class TestXlwtTests_NoMerge(ExcelWriterBase): - ext = '.xls' - engine_name = 'xlwt' - check_skip = staticmethod(_skip_if_no_xlwt) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - -class TestXlsxWriterTests_NoMerge(ExcelWriterBase): - ext = '.xlsx' - engine_name = 'xlsxwriter' - check_skip = staticmethod(_skip_if_no_xlsxwriter) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - class TestExcelWriterEngineTests(object): - def test_ExcelWriter_dispatch(self): - with tm.assert_raises_regex(ValueError, 'No engine'): - ExcelWriter('nothing') - - try: - import xlsxwriter # noqa - writer_klass = _XlsxWriter - except ImportError: - _skip_if_no_openpyxl() - writer_klass = _OpenpyxlWriter - - with ensure_clean('.xlsx') as path: + @pytest.mark.parametrize('klass,ext', [ + pytest.param(_XlsxWriter, '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('xlsxwriter'), reason='No xlsxwriter')), + pytest.param(_OpenpyxlWriter, '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param(_XlwtWriter, '.xls', marks=pytest.mark.skipif( + not td.safe_import('xlwt'), reason='No xlwt')) + ]) + def test_ExcelWriter_dispatch(self, klass, ext): + with ensure_clean(ext) as path: writer = ExcelWriter(path) - assert isinstance(writer, writer_klass) + if ext == '.xlsx' and td.safe_import('xlsxwriter'): + # xlsxwriter has preference over openpyxl if both installed + assert isinstance(writer, _XlsxWriter) + else: + assert isinstance(writer, klass) - _skip_if_no_xlwt() - with ensure_clean('.xls') as path: - writer = ExcelWriter(path) - assert isinstance(writer, _XlwtWriter) + def test_ExcelWriter_dispatch_raises(self): + with tm.assert_raises_regex(ValueError, 'No engine'): + ExcelWriter('nothing') def test_register_writer(self): # some awkward mocking to test out dispatch and such actually works @@ -2498,11 +2318,11 @@ def custom_converter(css): assert n_cells == (10 + 1) * (3 + 1) +@td.skip_if_no('openpyxl') class TestFSPath(object): @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') def test_excelfile_fspath(self): - _skip_if_no_openpyxl() with tm.ensure_clean('foo.xlsx') as path: df = DataFrame({"A": [1, 2]}) df.to_excel(path) @@ -2513,7 +2333,6 @@ def test_excelfile_fspath(self): @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') # @pytest.mark.xfail def test_excelwriter_fspath(self): - _skip_if_no_openpyxl() with tm.ensure_clean('foo.xlsx') as path: writer = ExcelWriter(path) assert os.fspath(writer) == str(path) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 0fd5648739e5c..b2745ab5eec77 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -57,7 +57,11 @@ def safe_import(mod_name, min_version=None): return mod else: import sys - version = getattr(sys.modules[mod_name], '__version__') + try: + version = getattr(sys.modules[mod_name], '__version__') + except AttributeError: + # xlrd uses a capitalized attribute name + version = getattr(sys.modules[mod_name], '__VERSION__') if version: from distutils.version import LooseVersion if LooseVersion(version) >= LooseVersion(min_version): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 0009e26f8b100..942416408e4f0 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2165,7 +2165,7 @@ def network(t, url="http://www.google.com", from pytest import skip t.network = True - @wraps(t) + @compat.wraps(t) def wrapper(*args, **kwargs): if check_before_test and not raise_on_error: if not can_connect(url, error_classes): From 55087043e7c26ec7e8a438b8498a07c4e50b9987 Mon Sep 17 00:00:00 2001 From: cbertinato Date: Mon, 26 Feb 2018 20:13:00 -0500 Subject: [PATCH 210/217] BUG: Fix Series constructor for Categorical with index (#19714) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/series.py | 13 ++++++++++- pandas/tests/io/formats/test_style.py | 2 +- pandas/tests/series/test_constructors.py | 28 ++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fb22dc40e335f..5330f7e7e998b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -919,6 +919,7 @@ Reshaping - Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) - Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) - Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) +- Bug in :class:`Series` constructor with ``Categorical`` where a ```ValueError`` is not raised when an index of different length is given (:issue:`19342`) Other ^^^^^ diff --git a/pandas/core/series.py b/pandas/core/series.py index 26b7fd552b062..8053651a4877a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -212,7 +212,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None, 'be False.') elif is_extension_array_dtype(data) and dtype is not None: - # GH12574: Allow dtype=category only, otherwise error if not data.dtype.is_dtype(dtype): raise ValueError("Cannot specify a dtype '{}' with an " "extension array of a different " @@ -235,6 +234,18 @@ def __init__(self, data=None, index=None, dtype=None, name=None, if not is_list_like(data): data = [data] index = com._default_index(len(data)) + elif is_list_like(data): + + # a scalar numpy array is list-like but doesn't + # have a proper length + try: + if len(index) != len(data): + raise ValueError( + 'Length of passed values is {val}, ' + 'index implies {ind}' + .format(val=len(data), ind=len(index))) + except TypeError: + pass # create/copy the manager if isinstance(data, SingleBlockManager): diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index bedb11d4fc4ae..adf8e14b756c2 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -24,7 +24,7 @@ def setup_method(self, method): def h(x, foo='bar'): return pd.Series( - ['color: {foo}'.format(foo=foo)], index=x.index, name=x.name) + 'color: {foo}'.format(foo=foo), index=x.index, name=x.name) self.h = h self.styler = Styler(self.df) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 77f9dfcce686d..25f425ffa0021 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -400,6 +400,34 @@ def test_constructor_default_index(self): s = Series([0, 1, 2]) tm.assert_index_equal(s.index, pd.Index(np.arange(3))) + @pytest.mark.parametrize('input', [[1, 2, 3], + (1, 2, 3), + list(range(3)), + pd.Categorical(['a', 'b', 'a']), + (i for i in range(3)), + map(lambda x: x, range(3))]) + def test_constructor_index_mismatch(self, input): + # GH 19342 + # test that construction of a Series with an index of different length + # raises an error + msg = 'Length of passed values is 3, index implies 4' + with pytest.raises(ValueError, message=msg): + Series(input, index=np.arange(4)) + + def test_constructor_numpy_scalar(self): + # GH 19342 + # construction with a numpy scalar + # should not raise + result = Series(np.array(100), index=np.arange(4), dtype='int64') + expected = Series(100, index=np.arange(4), dtype='int64') + tm.assert_series_equal(result, expected) + + def test_constructor_broadcast_list(self): + # GH 19342 + # construction with single-element container and index + # should raise + pytest.raises(ValueError, Series, ['foo'], index=['a', 'b', 'c']) + def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] From 69f0e8bef1e6b923f486af22d8fc9171aec105ee Mon Sep 17 00:00:00 2001 From: Jaume Bonet Date: Tue, 27 Feb 2018 02:15:31 +0100 Subject: [PATCH 211/217] CLN: Remove Series._from_array (#19893) --- pandas/core/dtypes/concat.py | 21 +++++++++++++++++++++ pandas/core/frame.py | 8 ++++---- pandas/core/series.py | 18 ++---------------- pandas/core/sparse/series.py | 6 ------ 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index d306d0d78f1f4..0501493e718d0 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -101,6 +101,27 @@ def _get_frame_result_type(result, objs): ABCSparseDataFrame)) +def _get_sliced_frame_result_type(data, obj): + """ + return appropriate class of Series. When data is sparse + it will return a SparseSeries, otherwise it will return + the Series. + + Parameters + ---------- + data : array-like + obj : DataFrame + + Returns + ------- + Series or SparseSeries + """ + if is_sparse(data): + from pandas.core.sparse.api import SparseSeries + return SparseSeries + return obj._constructor_sliced + + def _concat_compat(to_concat, axis=0): """ provide concatenation of an array of arrays each of which is a single diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4ef1b97882d9..1f26a367334c6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -60,6 +60,7 @@ is_iterator, is_sequence, is_named_tuple) +from pandas.core.dtypes.concat import _get_sliced_frame_result_type from pandas.core.dtypes.missing import isna, notna @@ -2166,8 +2167,7 @@ def _ixs(self, i, axis=0): if index_len and not len(values): values = np.array([np.nan] * index_len, dtype=object) - result = self._constructor_sliced._from_array( - values, index=self.index, name=label, fastpath=True) + result = self._box_col_values(values, label) # this is a cached value, mark it so result._set_as_cached(label, self) @@ -2563,8 +2563,8 @@ def _box_item_values(self, key, values): def _box_col_values(self, values, items): """ provide boxed values for a column """ - return self._constructor_sliced._from_array(values, index=self.index, - name=items, fastpath=True) + klass = _get_sliced_frame_result_type(values, self) + return klass(values, index=self.index, name=items, fastpath=True) def __setitem__(self, key, value): key = com._apply_if_callable(key, self) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8053651a4877a..6822f1f6b58b5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -316,25 +316,11 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, warnings.warn("'from_array' is deprecated and will be removed in a " "future version. Please use the pd.Series(..) " "constructor instead.", FutureWarning, stacklevel=2) - return cls._from_array(arr, index=index, name=name, dtype=dtype, - copy=copy, fastpath=fastpath) - - @classmethod - def _from_array(cls, arr, index=None, name=None, dtype=None, copy=False, - fastpath=False): - """ - Internal method used in DataFrame.__setitem__/__getitem__. - Difference with Series(..) is that this method checks if a sparse - array is passed. - - """ - # return a sparse series here if isinstance(arr, ABCSparseArray): from pandas.core.sparse.series import SparseSeries cls = SparseSeries - - return cls(arr, index=index, name=name, dtype=dtype, copy=copy, - fastpath=fastpath) + return cls(arr, index=index, name=name, dtype=dtype, + copy=copy, fastpath=fastpath) @property def _constructor(self): diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 7a1496bf11117..f8b98a1a40081 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -216,12 +216,6 @@ def from_array(cls, arr, index=None, name=None, copy=False, warnings.warn("'from_array' is deprecated and will be removed in a " "future version. Please use the pd.SparseSeries(..) " "constructor instead.", FutureWarning, stacklevel=2) - return cls._from_array(arr, index=index, name=name, copy=copy, - fill_value=fill_value, fastpath=fastpath) - - @classmethod - def _from_array(cls, arr, index=None, name=None, copy=False, - fill_value=None, fastpath=False): return cls(arr, index=index, name=name, copy=copy, fill_value=fill_value, fastpath=fastpath) From 25fc8289884be09c22313c6ab0468d94652a043b Mon Sep 17 00:00:00 2001 From: Anh Le Date: Mon, 26 Feb 2018 20:24:57 -0500 Subject: [PATCH 212/217] DOC fix incorrect example in DataFrame.to_dict docstring. Close GH19868 (#19915) --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1f26a367334c6..ae8fb48a61fce 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -958,8 +958,8 @@ def to_dict(self, orient='dict', into=dict): {'col1': [1, 2], 'col2': [0.5, 0.75]}, index=['a', 'b']) >>> df col1 col2 - a 1 0.1 - b 2 0.2 + a 1 0.50 + b 2 0.75 >>> df.to_dict() {'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}} From 2d10b350f4e9d586fb9b2159acf0318be4cf498e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 27 Feb 2018 03:36:22 -0800 Subject: [PATCH 213/217] handle NaT add/sub in one place (#19903) --- pandas/core/indexes/datetimelike.py | 47 +++++++++++++++++++++++++---- pandas/core/indexes/datetimes.py | 20 +++--------- pandas/core/indexes/period.py | 17 +---------- pandas/core/indexes/timedeltas.py | 18 +++-------- 4 files changed, 52 insertions(+), 50 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8e56fc2775a56..4c6effc65a4d3 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Base and utility classes for tseries type pandas objects. """ @@ -640,6 +641,28 @@ def _add_datelike(self, other): def _sub_datelike(self, other): raise com.AbstractMethodError(self) + def _add_nat(self): + """Add pd.NaT to self""" + if is_period_dtype(self): + raise TypeError('Cannot add {cls} and {typ}' + .format(cls=type(self).__name__, + typ=type(NaT).__name__)) + + # GH#19124 pd.NaT is treated like a timedelta for both timedelta + # and datetime dtypes + return self._nat_new(box=True) + + def _sub_nat(self): + """Subtract pd.NaT from self""" + # GH#19124 Timedelta - datetime is not in general well-defined. + # We make an exception for pd.NaT, which in this case quacks + # like a timedelta. + # For datetime64 dtypes by convention we treat NaT as a datetime, so + # this subtraction returns a timedelta64 dtype. + # For period dtype, timedelta64 is a close-enough return dtype. + result = self._nat_new(box=False) + return result.view('timedelta64[ns]') + def _sub_period(self, other): return NotImplemented @@ -686,6 +709,8 @@ def __add__(self, other): return NotImplemented # scalar others + elif other is NaT: + result = self._add_nat() elif isinstance(other, (DateOffset, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, (datetime, np.datetime64)): @@ -711,9 +736,13 @@ def __add__(self, other): else: # pragma: no cover return NotImplemented - if result is not NotImplemented: - res_name = ops.get_op_result_name(self, other) - result.name = res_name + if result is NotImplemented: + return NotImplemented + elif not isinstance(result, Index): + # Index.__new__ will choose appropriate subclass for dtype + result = Index(result) + res_name = ops.get_op_result_name(self, other) + result.name = res_name return result cls.__add__ = __add__ @@ -731,6 +760,8 @@ def __sub__(self, other): return NotImplemented # scalar others + elif other is NaT: + result = self._sub_nat() elif isinstance(other, (DateOffset, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, (datetime, np.datetime64)): @@ -762,9 +793,13 @@ def __sub__(self, other): else: # pragma: no cover return NotImplemented - if result is not NotImplemented: - res_name = ops.get_op_result_name(self, other) - result.name = res_name + if result is NotImplemented: + return NotImplemented + elif not isinstance(result, Index): + # Index.__new__ will choose appropriate subclass for dtype + result = Index(result) + res_name = ops.get_op_result_name(self, other) + result.name = res_name return result cls.__sub__ = __sub__ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 55d8b7c18a622..eb8133a1bbf97 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -853,22 +853,11 @@ def __setstate__(self, state): raise Exception("invalid pickle state") _unpickle_compat = __setstate__ - def _add_datelike(self, other): - # adding a timedeltaindex to a datetimelike - if other is libts.NaT: - return self._nat_new(box=True) - raise TypeError("cannot add {0} and {1}" - .format(type(self).__name__, - type(other).__name__)) - def _sub_datelike(self, other): - # subtract a datetime from myself, yielding a TimedeltaIndex - from pandas import TimedeltaIndex - + # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] if isinstance(other, (DatetimeIndex, np.ndarray)): # if other is an ndarray, we assume it is datetime64-dtype other = DatetimeIndex(other) - # require tz compat if not self._has_same_tz(other): raise TypeError("{cls} subtraction must have the same " @@ -876,9 +865,10 @@ def _sub_datelike(self, other): .format(cls=type(self).__name__)) result = self._sub_datelike_dti(other) elif isinstance(other, (datetime, np.datetime64)): + assert other is not libts.NaT other = Timestamp(other) if other is libts.NaT: - result = self._nat_new(box=False) + return self - libts.NaT # require tz compat elif not self._has_same_tz(other): raise TypeError("Timestamp subtraction must have the same " @@ -893,7 +883,7 @@ def _sub_datelike(self, other): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) - return TimedeltaIndex(result) + return result.view('timedelta64[ns]') def _sub_datelike_dti(self, other): """subtraction of two DatetimeIndexes""" @@ -906,7 +896,7 @@ def _sub_datelike_dti(self, other): if self.hasnans or other.hasnans: mask = (self._isnan) | (other._isnan) new_values[mask] = libts.iNaT - return new_values.view('i8') + return new_values.view('timedelta64[ns]') def _maybe_update_attributes(self, attrs): """ Update Index attributes (e.g. freq) depending on op """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f0567c9c963af..b936a4e26af60 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -12,7 +12,6 @@ is_scalar, is_datetime64_dtype, is_datetime64_any_dtype, - is_timedelta64_dtype, is_period_dtype, is_bool_dtype, pandas_dtype, @@ -23,7 +22,6 @@ import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index -from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin from pandas.core.tools.datetimes import parse_time_string import pandas.tseries.offsets as offsets @@ -700,16 +698,6 @@ def _maybe_convert_timedelta(self, other): return other.n msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - elif isinstance(other, np.ndarray): - if is_integer_dtype(other): - return other - elif is_timedelta64_dtype(other): - offset = frequencies.to_offset(self.freq) - if isinstance(offset, offsets.Tick): - nanos = delta_to_nanoseconds(other) - offset_nanos = delta_to_nanoseconds(offset) - if (nanos % offset_nanos).all() == 0: - return nanos // offset_nanos elif is_integer(other): # integer is passed to .shift via # _add_datetimelike_methods basically @@ -724,10 +712,7 @@ def _add_delta(self, other): return self.shift(ordinal_delta) def _sub_datelike(self, other): - if other is tslib.NaT: - new_data = np.empty(len(self), dtype=np.int64) - new_data.fill(tslib.iNaT) - return TimedeltaIndex(new_data) + assert other is not tslib.NaT return NotImplemented def _sub_period(self, other): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index eebd52d7fb801..c42c0656c585a 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -414,16 +414,13 @@ def _evaluate_with_timedelta_like(self, other, op): def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike from pandas import Timestamp, DatetimeIndex - - if other is NaT: - # GH#19124 pd.NaT is treated like a timedelta - return self._nat_new() - elif isinstance(other, (DatetimeIndex, np.ndarray)): + if isinstance(other, (DatetimeIndex, np.ndarray)): # if other is an ndarray, we assume it is datetime64-dtype # defer to implementation in DatetimeIndex other = DatetimeIndex(other) return other + self else: + assert other is not NaT other = Timestamp(other) i8 = self.asi8 result = checked_add_with_arr(i8, other.value, @@ -432,14 +429,9 @@ def _add_datelike(self, other): return DatetimeIndex(result) def _sub_datelike(self, other): - # GH#19124 Timedelta - datetime is not in general well-defined. - # We make an exception for pd.NaT, which in this case quacks - # like a timedelta. - if other is NaT: - return self._nat_new() - else: - raise TypeError("cannot subtract a datelike from a {cls}" - .format(cls=type(self).__name__)) + assert other is not NaT + raise TypeError("cannot subtract a datelike from a {cls}" + .format(cls=type(self).__name__)) def _addsub_offset_array(self, other, op): # Add or subtract Array-like of DateOffset objects From 892dd3d29e3c9dbf24963d70b2b6dad74bed93d2 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Tue, 27 Feb 2018 13:28:22 -0800 Subject: [PATCH 214/217] ASV: Added seek to buffer to fix xlwt asv failure (#19926) * Added seek to buffer to fix xlwt asv failure * Added conditional to check for seek on xlrd object --- pandas/io/excel.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 0d3d4286f5a3c..78af86cc00f7f 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -387,6 +387,10 @@ def __init__(self, io, **kwds): self.book = io elif not isinstance(io, xlrd.Book) and hasattr(io, "read"): # N.B. xlrd.Book has a read attribute too + if hasattr(io, 'seek'): + # GH 19779 + io.seek(0) + data = io.read() self.book = xlrd.open_workbook(file_contents=data) elif isinstance(self._io, compat.string_types): From dc4bf8ad88521e761674d886751959e7c984b89d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Feb 2018 15:31:55 -0600 Subject: [PATCH 215/217] TST: Debug flaky plotting test (#19925) --- pandas/tests/plotting/test_datetimelike.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 94adf349fe2cd..08a047a2e7707 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -689,14 +689,17 @@ def test_mixed_freq_regular_first(self): s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] # it works! - s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) - ax2 = s2.plot(style='g') + ax2 = s2.plot(style='g', ax=ax) lines = ax2.get_lines() idx1 = PeriodIndex(lines[0].get_xdata()) idx2 = PeriodIndex(lines[1].get_xdata()) - assert idx1.equals(s1.index.to_period('B')) - assert idx2.equals(s2.index.to_period('B')) + + tm.assert_index_equal(idx1, s1.index.to_period('B')) + tm.assert_index_equal(idx2, s2.index.to_period('B')) + left, right = ax2.get_xlim() pidx = s1.index.to_period() assert left <= pidx[0].ordinal From 4a4f3f201fcebaf6ed521ab4d37464c43e1cf75c Mon Sep 17 00:00:00 2001 From: harisbal Date: Sun, 28 Jan 2018 21:52:23 +0000 Subject: [PATCH 216/217] Rebase Rebase --- ci/requirements-3.6.build | 2 +- pandas/core/base.py | 8 ++--- pandas/core/resample.py | 6 ++-- pandas/tests/frame/test_operators.py | 47 ++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 8 deletions(-) diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build index 1c4b46aea3865..94e1152450d87 100644 --- a/ci/requirements-3.6.build +++ b/ci/requirements-3.6.build @@ -2,5 +2,5 @@ python=3.6* python-dateutil pytz nomkl -numpy +numpy=1.13.* cython diff --git a/pandas/core/base.py b/pandas/core/base.py index 280b8849792e3..fb93fde6d3a21 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -46,7 +46,7 @@ class StringMixin(object): # Formatting def __unicode__(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def __str__(self): """ @@ -278,10 +278,10 @@ def _gotitem(self, key, ndim, subset=None): subset to act on """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def aggregate(self, func, *args, **kwargs): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) agg = aggregate @@ -1247,4 +1247,4 @@ def duplicated(self, keep='first'): # abstracts def _update_inplace(self, result, **kwargs): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 772568ee84737..af747ac4d3b20 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -233,7 +233,7 @@ def _convert_obj(self, obj): return obj def _get_binner_for_time(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _set_binner(self): """ @@ -372,10 +372,10 @@ def transform(self, arg, *args, **kwargs): arg, *args, **kwargs) def _downsample(self, f): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _upsample(self, f, limit=None, fill_value=None): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _gotitem(self, key, ndim, subset=None): """ diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 5df50f3d7835b..3a146cbf7c438 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -28,6 +28,53 @@ _check_mixed_int) +class TestDataFrameArithmetic(object): + + @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') + def test_frame_sub_datetime64_not_ns(self): + df = pd.DataFrame(date_range('20130101', periods=3)) + dt64 = np.datetime64('2013-01-01') + assert dt64.dtype == 'datetime64[D]' + res = df - dt64 + expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1), + pd.Timedelta(days=2)]) + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_radd_str_invalid(self, dtype, data): + df = DataFrame(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + df + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_int(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([2, 3, 4], dtype=dtype) + result = 1 + df + assert_frame_equal(result, expected) + result = df + 1 + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_nan(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + result = np.nan + df + assert_frame_equal(result, expected) + result = df + np.nan + assert_frame_equal(result, expected) + + def test_frame_radd_str(self): + df = pd.DataFrame(['x', np.nan, 'x']) + assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) + assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) + + class TestDataFrameOperators(TestData): def test_operators(self): From 593d6cb0d9d32ac4872ad1ff519dbffdcf1ec520 Mon Sep 17 00:00:00 2001 From: harisbal Date: Sun, 11 Mar 2018 13:29:28 +0000 Subject: [PATCH 217/217] Rebase --- .github/CODE_OF_CONDUCT.md | 63 + .github/CONTRIBUTING.md | 519 +- .github/ISSUE_TEMPLATE.md | 13 +- .github/PULL_REQUEST_TEMPLATE.md | 32 +- .gitignore | 10 +- .pep8speaks.yml | 12 + .travis.yml | 315 +- AUTHORS.md | 57 + LICENSE | 106 +- LICENSES/XARRAY_LICENSE | 191 + MANIFEST.in | 5 +- Makefile | 8 +- README.md | 116 +- RELEASE.md | 2 +- appveyor.yml | 43 +- asv_bench/asv.conf.json | 23 +- asv_bench/benchmarks/algorithms.py | 175 +- asv_bench/benchmarks/attrs_caching.py | 12 +- asv_bench/benchmarks/binary_ops.py | 131 +- asv_bench/benchmarks/categoricals.py | 155 +- asv_bench/benchmarks/ctors.py | 64 +- asv_bench/benchmarks/eval.py | 64 +- asv_bench/benchmarks/frame_ctor.py | 155 +- asv_bench/benchmarks/frame_methods.py | 642 +- asv_bench/benchmarks/gil.py | 457 +- asv_bench/benchmarks/groupby.py | 826 +-- asv_bench/benchmarks/hdfstore_bench.py | 122 - asv_bench/benchmarks/index_object.py | 277 +- asv_bench/benchmarks/indexing.py | 373 +- asv_bench/benchmarks/inference.py | 148 +- .../benchmarks/io}/__init__.py | 0 asv_bench/benchmarks/io/csv.py | 249 + asv_bench/benchmarks/io/excel.py | 36 + asv_bench/benchmarks/io/hdf.py | 151 + asv_bench/benchmarks/io/json.py | 127 + asv_bench/benchmarks/io/msgpack.py | 26 + asv_bench/benchmarks/io/pickle.py | 26 + asv_bench/benchmarks/io/sas.py | 21 + asv_bench/benchmarks/io/sql.py | 132 + asv_bench/benchmarks/io/stata.py | 37 + asv_bench/benchmarks/io_bench.py | 194 - asv_bench/benchmarks/io_sql.py | 105 - asv_bench/benchmarks/join_merge.py | 443 +- asv_bench/benchmarks/multiindex_object.py | 140 + asv_bench/benchmarks/offset.py | 125 + asv_bench/benchmarks/packers.py | 316 - asv_bench/benchmarks/pandas_vb_common.py | 65 +- asv_bench/benchmarks/panel_ctor.py | 82 +- asv_bench/benchmarks/panel_methods.py | 32 +- asv_bench/benchmarks/parser_vb.py | 121 - asv_bench/benchmarks/period.py | 88 +- asv_bench/benchmarks/plotting.py | 58 +- asv_bench/benchmarks/reindex.py | 204 +- asv_bench/benchmarks/replace.py | 98 +- asv_bench/benchmarks/reshape.py | 134 +- asv_bench/benchmarks/rolling.py | 76 + asv_bench/benchmarks/series_methods.py | 155 +- asv_bench/benchmarks/sparse.py | 214 +- asv_bench/benchmarks/stat_ops.py | 295 +- asv_bench/benchmarks/strings.py | 146 +- asv_bench/benchmarks/timedelta.py | 127 +- asv_bench/benchmarks/timeseries.py | 588 +- asv_bench/benchmarks/timestamp.py | 119 + asv_bench/vbench_to_asv.py | 8 +- bench/alignment.py | 22 - bench/bench_dense_to_sparse.py | 14 - bench/bench_get_put_value.py | 56 - bench/bench_groupby.py | 66 - bench/bench_join_panel.py | 85 - bench/bench_khash_dict.py | 89 - bench/bench_merge.R | 161 - bench/bench_merge.py | 105 - bench/bench_merge_sqlite.py | 87 - bench/bench_pivot.R | 27 - bench/bench_pivot.py | 16 - bench/bench_take_indexing.py | 55 - bench/bench_unique.py | 278 - bench/bench_with_subset.R | 53 - bench/bench_with_subset.py | 116 - bench/better_unique.py | 80 - bench/duplicated.R | 22 - bench/io_roundtrip.py | 116 - bench/serialize.py | 89 - bench/test.py | 70 - bench/zoo_bench.R | 71 - bench/zoo_bench.py | 36 - ci/appveyor.recipe/bld.bat | 2 - ci/appveyor.recipe/build.sh | 2 - ci/appveyor.recipe/meta.yaml | 37 - ci/before_install_travis.sh | 15 - ci/before_script_travis.sh | 11 + ci/build_docs.sh | 22 +- ci/check_cache.sh | 8 +- ci/check_imports.py | 35 + ci/environment-dev.yaml | 15 + ci/install_circle.sh | 86 + ci/install_db_circle.sh | 8 + ci/{install_db.sh => install_db_travis.sh} | 0 ci/install_test.sh | 17 - ci/install_travis.sh | 224 +- ci/lint.sh | 123 +- ci/prep_cython_cache.sh | 8 +- ci/print_skipped.py | 7 +- ci/requirements-2.7.build | 8 +- ci/requirements-2.7.pip | 9 +- ci/requirements-2.7.run | 12 +- ci/requirements-2.7.sh | 2 +- ci/requirements-2.7_BUILD_TEST.build | 4 - ci/requirements-2.7_COMPAT.build | 7 +- ci/requirements-2.7_COMPAT.pip | 2 + ci/requirements-2.7_COMPAT.run | 18 +- ci/requirements-2.7_LOCALE.build | 5 +- ci/requirements-2.7_LOCALE.pip | 2 + ci/requirements-2.7_LOCALE.run | 20 +- ci/requirements-2.7_SLOW.build | 3 +- ci/requirements-2.7_SLOW.run | 7 +- bench/larry.py => ci/requirements-2.7_WIN.pip | 0 ...ts-2.7-64.run => requirements-2.7_WIN.run} | 4 +- ci/requirements-3.4-64.run | 12 - ci/requirements-3.4.build | 3 - ci/requirements-3.4.pip | 5 - ci/requirements-3.4.run | 18 - ci/requirements-3.4_SLOW.pip | 3 - ci/requirements-3.4_SLOW.sh | 7 - ci/requirements-3.5.build | 4 +- ci/requirements-3.5.pip | 1 + ci/requirements-3.5.run | 4 +- ci/requirements-3.5.sh | 6 +- ci/requirements-3.5_ASCII.build | 2 + ci/requirements-3.5_DOC_BUILD.sh | 9 - ci/requirements-3.5_NUMPY_DEV.build.sh | 13 - ci/requirements-3.5_NUMPY_DEV.run | 2 - ci/requirements-3.5_OSX.build | 2 + ci/requirements-3.5_OSX.sh | 2 +- ci/requirements-3.6-64.run | 13 - ci/requirements-3.6.build | 2 + ci/requirements-3.6.pip | 1 + ci/requirements-3.6.run | 7 +- ci/requirements-3.6.sh | 4 +- ..._SLOW.build => requirements-3.6_DOC.build} | 3 +- ...DOC_BUILD.run => requirements-3.6_DOC.run} | 10 +- ci/requirements-3.6_DOC.sh | 11 + ...LD.build => requirements-3.6_LOCALE.build} | 2 + .../requirements-3.6_LOCALE.pip | 0 ...ts_all.txt => requirements-3.6_LOCALE.run} | 24 +- ...ild => requirements-3.6_LOCALE_SLOW.build} | 3 + .../requirements-3.6_LOCALE_SLOW.pip | 0 ...W.run => requirements-3.6_LOCALE_SLOW.run} | 18 +- ci/requirements-3.6_NUMPY_DEV.build | 2 + ci/requirements-3.6_NUMPY_DEV.build.sh | 21 + .../requirements-3.6_NUMPY_DEV.pip | 0 ci/requirements-3.6_NUMPY_DEV.run | 1 + .../requirements-3.6_WIN.pip | 0 ...ts-3.5-64.run => requirements-3.6_WIN.run} | 6 +- ci/requirements-optional-conda.txt | 27 + ci/requirements-optional-pip.txt | 29 + ci/requirements_dev.txt | 17 +- ci/run_circle.sh | 9 + ci/script.sh | 32 - ci/script_multi.sh | 46 + ci/script_single.sh | 39 + ci/show_circle.sh | 8 + ci/submit_cython_cache.sh | 2 +- ci/upload_coverage.sh | 12 + circle.yml | 38 + codecov.yml | 6 +- conda.recipe/meta.yaml | 14 +- doc/README.rst | 16 +- doc/_templates/api_redirect.html | 13 +- doc/cheatsheet/Pandas_Cheat_Sheet.pdf | Bin 685284 -> 175124 bytes doc/cheatsheet/Pandas_Cheat_Sheet.pptx | Bin 105196 -> 178327 bytes doc/cheatsheet/README.txt | 4 + doc/make.py | 755 +-- doc/plots/stats/moment_plots.py | 30 - doc/plots/stats/moments_ewma.py | 15 - doc/plots/stats/moments_ewmvol.py | 23 - doc/plots/stats/moments_expw.py | 35 - doc/plots/stats/moments_rolling.py | 24 - doc/plots/stats/moments_rolling_binary.py | 30 - doc/source/10min.rst | 183 +- doc/source/_static/banklist.html | 4 +- doc/source/_static/ci.png | Bin 0 -> 374599 bytes doc/source/_static/style-excel.png | Bin 0 -> 58167 bytes doc/source/advanced.rst | 327 +- doc/source/api.rst | 885 ++- doc/source/basics.rst | 551 +- doc/source/categorical.rst | 478 +- doc/source/comparison_with_r.rst | 14 - doc/source/comparison_with_sas.rst | 200 +- doc/source/comparison_with_sql.rst | 10 +- doc/source/computation.rst | 180 +- doc/source/conf.py | 262 +- doc/source/contributing.rst | 681 +- doc/source/cookbook.rst | 83 +- doc/source/developer.rst | 142 + doc/source/dsintro.rst | 275 +- doc/source/ecosystem.rst | 104 +- doc/source/enhancingperf.rst | 136 +- doc/source/extending.rst | 269 + doc/source/gotchas.rst | 134 +- doc/source/groupby.rst | 357 +- doc/source/index.rst.template | 19 +- doc/source/indexing.rst | 463 +- doc/source/install.rst | 156 +- doc/source/internals.rst | 165 +- doc/source/io.rst | 1710 ++--- doc/source/merging.rst | 432 +- doc/source/missing_data.rst | 196 +- doc/source/options.rst | 380 +- doc/source/overview.rst | 88 +- doc/source/r_interface.rst | 11 +- doc/source/release.rst | 654 +- doc/source/remote_data.rst | 31 - doc/source/reshaping.rst | 175 +- doc/source/sparse.rst | 49 +- .../{html-styling.ipynb => style.ipynb} | 571 +- doc/source/style.rst | 10 - doc/source/template_structure.html | 57 + doc/source/text.rst | 79 +- .../themes/nature_with_gtoc/layout.html | 11 + .../nature_with_gtoc/static/nature.css_t | 41 +- doc/source/timedeltas.rst | 60 +- doc/source/timeseries.rst | 580 +- doc/source/tutorials.rst | 61 +- doc/source/visualization.rst | 230 +- doc/source/whatsnew.rst | 12 + doc/source/whatsnew/v0.10.0.txt | 77 +- doc/source/whatsnew/v0.10.1.txt | 6 +- doc/source/whatsnew/v0.11.0.txt | 4 +- doc/source/whatsnew/v0.12.0.txt | 16 +- doc/source/whatsnew/v0.13.0.txt | 6 +- doc/source/whatsnew/v0.13.1.txt | 39 +- doc/source/whatsnew/v0.14.0.txt | 26 +- doc/source/whatsnew/v0.14.1.txt | 6 +- doc/source/whatsnew/v0.15.0.txt | 38 +- doc/source/whatsnew/v0.15.1.txt | 2 +- doc/source/whatsnew/v0.15.2.txt | 4 +- doc/source/whatsnew/v0.16.0.txt | 33 +- doc/source/whatsnew/v0.16.1.txt | 117 +- doc/source/whatsnew/v0.16.2.txt | 4 +- doc/source/whatsnew/v0.17.0.txt | 4 +- doc/source/whatsnew/v0.17.1.txt | 8 +- doc/source/whatsnew/v0.18.0.txt | 8 +- doc/source/whatsnew/v0.18.1.txt | 18 +- doc/source/whatsnew/v0.19.0.txt | 11 +- doc/source/whatsnew/v0.19.2.txt | 4 +- doc/source/whatsnew/v0.20.0.txt | 1513 ++++- doc/source/whatsnew/v0.20.2.txt | 127 + doc/source/whatsnew/v0.20.3.txt | 60 + doc/source/whatsnew/v0.21.0.txt | 1178 ++++ doc/source/whatsnew/v0.21.1.txt | 171 + doc/source/whatsnew/v0.22.0.txt | 243 + doc/source/whatsnew/v0.23.0.txt | 1065 ++++ doc/source/whatsnew/v0.4.x.txt | 2 +- doc/source/whatsnew/v0.7.3.txt | 2 +- doc/source/whatsnew/v0.8.0.txt | 6 +- doc/source/whatsnew/v0.8.1.txt | 2 +- doc/source/whatsnew/v0.9.1.txt | 17 +- doc/sphinxext/README.rst | 2 +- .../ipython_console_highlighting.py | 120 +- .../ipython_sphinxext/ipython_directive.py | 343 +- doc/sphinxext/numpydoc/README.rst | 2 +- doc/sphinxext/numpydoc/compiler_unparse.py | 2 +- doc/sphinxext/numpydoc/docscrape.py | 4 +- doc/sphinxext/numpydoc/docscrape_sphinx.py | 6 +- doc/sphinxext/numpydoc/numpydoc.py | 5 - doc/sphinxext/numpydoc/phantom_import.py | 4 +- .../numpydoc/tests/test_docscrape.py | 6 +- pandas/__init__.py | 97 +- pandas/_libs/__init__.py | 9 + pandas/_libs/algos.pxd | 21 + pandas/_libs/algos.pyx | 318 + .../{src => _libs}/algos_common_helper.pxi.in | 46 +- .../{src => _libs}/algos_rank_helper.pxi.in | 83 +- .../{src => _libs}/algos_take_helper.pxi.in | 0 pandas/_libs/groupby.pyx | 383 ++ .../groupby_helper.pxi.in} | 597 +- pandas/{src/hash.pyx => _libs/hashing.pyx} | 14 +- pandas/{ => _libs}/hashtable.pxd | 16 + pandas/{ => _libs}/hashtable.pyx | 72 +- .../hashtable_class_helper.pxi.in | 189 +- .../hashtable_func_helper.pxi.in | 100 +- pandas/{ => _libs}/index.pyx | 388 +- .../{src => _libs}/index_class_helper.pxi.in | 5 +- pandas/_libs/indexing.pyx | 22 + pandas/_libs/internals.pyx | 438 ++ pandas/_libs/interval.pyx | 286 + pandas/_libs/intervaltree.pxi.in | 400 ++ pandas/{src => _libs}/join.pyx | 62 +- .../join_func_helper.pxi.in} | 3 +- pandas/{src => _libs}/join_helper.pxi.in | 0 pandas/_libs/khash.pxd | 142 + pandas/_libs/lib.pyx | 756 +++ pandas/_libs/missing.pxd | 6 + pandas/_libs/missing.pyx | 310 + pandas/_libs/ops.pyx | 296 + pandas/{parser.pyx => _libs/parsers.pyx} | 628 +- pandas/_libs/properties.pyx | 63 + .../{src/reduce.pyx => _libs/reduction.pyx} | 44 +- pandas/_libs/reshape.pyx | 17 + pandas/_libs/reshape_helper.pxi.in | 81 + pandas/_libs/skiplist.pxd | 48 + pandas/{src => _libs}/skiplist.pyx | 29 +- pandas/{src => _libs}/sparse.pyx | 94 +- pandas/{src => _libs}/sparse_op_helper.pxi.in | 0 pandas/_libs/src/compat_helper.h | 50 + pandas/{ => _libs}/src/datetime/np_datetime.c | 271 +- pandas/{ => _libs}/src/datetime/np_datetime.h | 63 +- .../src/datetime/np_datetime_strings.c | 617 +- .../src/datetime/np_datetime_strings.h | 39 +- pandas/_libs/src/headers/cmath | 15 + pandas/{ => _libs}/src/headers/ms_inttypes.h | 0 pandas/{ => _libs}/src/headers/ms_stdint.h | 0 pandas/{ => _libs}/src/headers/portable.h | 0 pandas/{ => _libs}/src/headers/stdint.h | 0 pandas/{ => _libs}/src/helper.h | 6 +- pandas/{ => _libs}/src/inference.pyx | 1226 ++-- pandas/{ => _libs}/src/klib/khash.h | 0 pandas/{ => _libs}/src/klib/khash_python.h | 5 +- pandas/{ => _libs}/src/msgpack/pack.h | 0 .../{ => _libs}/src/msgpack/pack_template.h | 0 pandas/{ => _libs}/src/msgpack/sysdep.h | 0 pandas/{ => _libs}/src/msgpack/unpack.h | 0 .../{ => _libs}/src/msgpack/unpack_define.h | 0 .../{ => _libs}/src/msgpack/unpack_template.h | 0 pandas/_libs/src/numpy_helper.h | 56 + pandas/{ => _libs}/src/parse_helper.h | 6 +- pandas/{ => _libs}/src/parser/io.c | 136 +- pandas/{ => _libs}/src/parser/io.h | 34 +- pandas/{ => _libs}/src/parser/tokenizer.c | 151 +- pandas/{ => _libs}/src/parser/tokenizer.h | 52 +- pandas/_libs/src/period_helper.c | 601 ++ pandas/_libs/src/period_helper.h | 112 + pandas/{ => _libs}/src/skiplist.h | 6 +- pandas/{ => _libs}/src/ujson/lib/ultrajson.h | 19 +- .../{ => _libs}/src/ujson/lib/ultrajsondec.c | 0 .../{ => _libs}/src/ujson/lib/ultrajsonenc.c | 19 +- .../{ => _libs}/src/ujson/python/JSONtoObj.c | 16 +- .../{ => _libs}/src/ujson/python/objToJSON.c | 52 +- .../{ => _libs}/src/ujson/python/py_defines.h | 8 +- pandas/{ => _libs}/src/ujson/python/ujson.c | 6 +- pandas/{ => _libs}/src/ujson/python/version.h | 8 +- pandas/{ => _libs}/src/util.pxd | 104 +- pandas/{src => _libs}/testing.pyx | 6 +- pandas/_libs/tslib.pyx | 799 +++ pandas/_libs/tslibs/__init__.py | 2 + pandas/_libs/tslibs/ccalendar.pxd | 13 + pandas/_libs/tslibs/ccalendar.pyx | 230 + pandas/_libs/tslibs/conversion.pxd | 33 + pandas/_libs/tslibs/conversion.pyx | 1194 ++++ pandas/_libs/tslibs/fields.pyx | 671 ++ pandas/_libs/tslibs/frequencies.pxd | 10 + pandas/_libs/tslibs/frequencies.pyx | 512 ++ pandas/_libs/tslibs/nattype.pxd | 9 + pandas/_libs/tslibs/nattype.pyx | 589 ++ pandas/_libs/tslibs/np_datetime.pxd | 80 + pandas/_libs/tslibs/np_datetime.pyx | 201 + pandas/_libs/tslibs/offsets.pyx | 942 +++ pandas/_libs/tslibs/parsing.pyx | 681 ++ pandas/{src => _libs/tslibs}/period.pyx | 1308 ++-- pandas/_libs/tslibs/resolution.pyx | 641 ++ pandas/_libs/tslibs/strptime.pyx | 634 ++ pandas/_libs/tslibs/timedeltas.pxd | 13 + pandas/_libs/tslibs/timedeltas.pyx | 1275 ++++ pandas/_libs/tslibs/timestamps.pxd | 11 + pandas/_libs/tslibs/timestamps.pyx | 1091 ++++ pandas/_libs/tslibs/timezones.pxd | 17 + pandas/_libs/tslibs/timezones.pyx | 316 + pandas/{ => _libs}/window.pyx | 347 +- pandas/_libs/writers.pyx | 174 + pandas/_version.py | 60 +- pandas/algos.pyx | 664 -- pandas/api/__init__.py | 1 + pandas/api/extensions/__init__.py | 4 + pandas/api/tests/test_api.py | 227 - pandas/api/types/__init__.py | 9 +- pandas/compat/__init__.py | 110 +- pandas/compat/chainmap_impl.py | 4 +- pandas/compat/numpy/__init__.py | 26 +- pandas/compat/numpy/function.py | 68 +- pandas/compat/openpyxl_compat.py | 35 - pandas/compat/pickle_compat.py | 128 +- pandas/computation/__init__.py | 30 - pandas/computation/api.py | 4 - pandas/computation/expressions.py | 259 +- pandas/computation/tests/test_compat.py | 63 - pandas/conftest.py | 84 +- pandas/core/accessor.py | 239 + pandas/core/algorithms.py | 1173 ++-- pandas/core/api.py | 68 +- pandas/core/apply.py | 405 ++ pandas/core/arrays/__init__.py | 2 + pandas/core/arrays/base.py | 348 + pandas/core/arrays/categorical.py | 2360 +++++++ pandas/core/base.py | 676 +- pandas/core/categorical.py | 2088 +----- pandas/core/common.py | 320 +- .../parser => core/computation}/__init__.py | 0 pandas/{ => core}/computation/align.py | 26 +- pandas/core/computation/api.py | 14 + pandas/core/computation/check.py | 22 + pandas/{ => core}/computation/common.py | 0 pandas/{ => core}/computation/engines.py | 22 +- pandas/{ => core}/computation/eval.py | 135 +- pandas/{ => core}/computation/expr.py | 110 +- pandas/core/computation/expressions.py | 248 + pandas/{ => core}/computation/ops.py | 20 +- pandas/{ => core}/computation/pytables.py | 177 +- pandas/{ => core}/computation/scope.py | 21 +- pandas/core/config.py | 91 +- pandas/core/config_init.py | 293 +- pandas/core/datetools.py | 8 +- pandas/{sparse => core/dtypes}/__init__.py | 0 pandas/{types => core/dtypes}/api.py | 34 +- pandas/core/dtypes/base.py | 166 + pandas/{types => core/dtypes}/cast.py | 664 +- pandas/core/dtypes/common.py | 2027 ++++++ pandas/core/dtypes/concat.py | 644 ++ pandas/core/dtypes/dtypes.py | 725 +++ pandas/{types => core/dtypes}/generic.py | 14 +- pandas/core/dtypes/inference.py | 459 ++ pandas/{types => core/dtypes}/missing.py | 175 +- pandas/core/frame.py | 3074 +++++---- pandas/core/generic.py | 3205 ++++++++-- pandas/core/groupby.py | 2253 ++++--- pandas/core/index.py | 4 +- .../tests => core/indexes}/__init__.py | 0 pandas/core/indexes/accessors.py | 250 + pandas/{ => core}/indexes/api.py | 57 +- pandas/{ => core}/indexes/base.py | 1710 +++-- pandas/{ => core}/indexes/category.py | 383 +- .../base.py => core/indexes/datetimelike.py} | 425 +- .../index.py => core/indexes/datetimes.py} | 874 ++- pandas/core/indexes/frozen.py | 150 + pandas/core/indexes/interval.py | 1531 +++++ pandas/{ => core}/indexes/multi.py | 1145 +++- pandas/{ => core}/indexes/numeric.py | 184 +- pandas/{tseries => core/indexes}/period.py | 605 +- pandas/{ => core}/indexes/range.py | 253 +- .../tdi.py => core/indexes/timedeltas.py} | 435 +- pandas/core/indexing.py | 609 +- pandas/core/internals.py | 2341 ++++--- pandas/core/missing.py | 343 +- pandas/core/nanops.py | 217 +- pandas/core/ops.py | 2314 ++++--- pandas/core/panel.py | 318 +- pandas/core/panel4d.py | 60 - pandas/core/panelnd.py | 132 - pandas/{tseries => core}/resample.py | 483 +- pandas/{stats => core/reshape}/__init__.py | 0 pandas/core/reshape/api.py | 8 + pandas/{tools => core/reshape}/concat.py | 97 +- pandas/core/reshape/melt.py | 439 ++ pandas/core/reshape/merge.py | 1718 +++++ pandas/{tools => core/reshape}/pivot.py | 232 +- pandas/core/{ => reshape}/reshape.py | 803 +-- pandas/{tools => core/reshape}/tile.py | 320 +- pandas/core/reshape/util.py | 76 + pandas/core/series.py | 1622 +++-- pandas/core/sorting.py | 485 ++ pandas/core/sparse.py | 10 - .../formats => core/sparse}/__init__.py | 0 pandas/core/sparse/api.py | 5 + pandas/{ => core}/sparse/array.py | 261 +- pandas/{ => core}/sparse/frame.py | 315 +- pandas/{ => core}/sparse/scipy_sparse.py | 8 +- pandas/{ => core}/sparse/series.py | 244 +- pandas/core/strings.py | 456 +- .../test_msgpack => core/tools}/__init__.py | 0 .../tools.py => core/tools/datetimes.py} | 501 +- .../{tools/util.py => core/tools/numeric.py} | 118 +- pandas/{tseries => core/tools}/timedeltas.py | 37 +- pandas/{tests/types => core/util}/__init__.py | 0 pandas/{tools => core/util}/hashing.py | 141 +- pandas/core/window.py | 727 ++- pandas/errors/__init__.py | 102 + pandas/formats/style.py | 1001 +-- pandas/info.py | 20 - pandas/io/api.py | 19 +- pandas/io/auth.py | 126 - pandas/{util => io}/clipboard/__init__.py | 29 +- pandas/{util => io}/clipboard/clipboards.py | 18 +- pandas/{util => io}/clipboard/exceptions.py | 3 +- pandas/{util => io}/clipboard/windows.py | 1 - pandas/io/{clipboard.py => clipboards.py} | 16 +- pandas/io/common.py | 198 +- pandas/io/data.py | 6 - pandas/io/date_converters.py | 11 +- pandas/io/excel.py | 994 +-- pandas/io/feather_format.py | 37 +- .../{tools/tests => io/formats}/__init__.py | 0 pandas/io/formats/common.py | 44 + pandas/io/formats/console.py | 84 + pandas/io/formats/css.py | 250 + pandas/io/formats/excel.py | 654 ++ pandas/{ => io}/formats/format.py | 995 ++- pandas/{ => io}/formats/printing.py | 65 +- pandas/io/formats/style.py | 1267 ++++ pandas/io/formats/templates/html.tpl | 70 + pandas/{util => io/formats}/terminal.py | 9 +- pandas/io/gbq.py | 1118 +--- pandas/io/html.py | 200 +- pandas/io/json/__init__.py | 3 +- pandas/io/json/json.py | 479 +- pandas/io/json/normalize.py | 65 +- pandas/io/json/table_schema.py | 324 + pandas/{ => io}/msgpack/__init__.py | 9 +- pandas/{ => io}/msgpack/_packer.pyx | 13 +- pandas/{ => io}/msgpack/_unpacker.pyx | 29 +- pandas/{ => io}/msgpack/_version.py | 0 pandas/{ => io}/msgpack/exceptions.py | 0 pandas/io/packers.py | 85 +- pandas/io/parquet.py | 288 + pandas/io/parsers.py | 1014 +-- pandas/io/pickle.py | 77 +- pandas/io/pytables.py | 387 +- pandas/io/s3.py | 14 +- pandas/io/sas/__init__.py | 1 + pandas/io/sas/{saslib.pyx => sas.pyx} | 34 +- pandas/io/sas/sas7bdat.py | 25 +- pandas/io/sas/sas_constants.py | 24 + pandas/io/sas/sas_xport.py | 6 +- pandas/io/sas/sasreader.py | 2 + pandas/io/sql.py | 318 +- pandas/io/stata.py | 205 +- pandas/io/tests/data/legacy_hdf/legacy.h5 | Bin 14928 -> 0 bytes .../data/legacy_hdf/legacy_table_0.11.h5 | Bin 293877 -> 0 bytes pandas/io/tests/sas/data/productsales.csv | 1441 ----- pandas/io/tests/sas/test_sas.py | 13 - pandas/io/tests/test_common.py | 159 - pandas/io/tests/test_date_converters.py | 150 - pandas/io/tests/test_excel.py | 2327 ------- pandas/io/tests/test_gbq.py | 1330 ---- pandas/io/tests/test_pickle.py | 285 - pandas/io/tests/test_s3.py | 10 - pandas/io/wb.py | 6 - pandas/json.py | 7 + pandas/lib.pxd | 4 - pandas/lib.py | 8 + pandas/lib.pyx | 1968 ------ pandas/parser.py | 8 + pandas/plotting/__init__.py | 20 + pandas/plotting/_compat.py | 76 + pandas/plotting/_converter.py | 1163 ++++ pandas/plotting/_core.py | 2966 +++++++++ pandas/plotting/_misc.py | 614 ++ pandas/plotting/_style.py | 183 + pandas/plotting/_timeseries.py | 352 ++ pandas/plotting/_tools.py | 383 ++ pandas/sparse/api.py | 6 - pandas/sparse/list.py | 151 - pandas/sparse/tests/test_list.py | 112 - pandas/src/datetime.pxd | 195 - pandas/src/datetime_helper.h | 36 - pandas/src/headers/math.h | 11 - pandas/src/khash.pxd | 140 - pandas/src/klib/ktypes.h | 6 - pandas/src/klib/kvec.h | 151 - pandas/src/numpy.pxd | 984 --- pandas/src/numpy_helper.h | 162 - pandas/src/offsets.pyx | 367 -- pandas/src/parser/.gitignore | 2 - pandas/src/parser/Makefile | 13 - pandas/src/period_helper.c | 1518 ----- pandas/src/period_helper.h | 191 - pandas/src/properties.pyx | 65 - pandas/src/skiplist.pxd | 22 - pandas/stats/api.py | 7 - pandas/stats/moments.py | 851 --- pandas/testing.py | 8 + .../stub => pandas/tests/api/__init__.py | 0 pandas/tests/api/test_api.py | 253 + pandas/tests/api/test_types.py | 65 + pandas/tests/categorical/__init__.py | 0 pandas/tests/categorical/common.py | 10 + pandas/tests/categorical/test_analytics.py | 320 + pandas/tests/categorical/test_api.py | 518 ++ pandas/tests/categorical/test_constructors.py | 515 ++ pandas/tests/categorical/test_dtypes.py | 163 + pandas/tests/categorical/test_indexing.py | 105 + pandas/tests/categorical/test_missing.py | 70 + pandas/tests/categorical/test_operators.py | 293 + pandas/tests/categorical/test_repr.py | 517 ++ pandas/tests/categorical/test_sorting.py | 123 + pandas/tests/categorical/test_subclass.py | 26 + pandas/tests/categorical/test_warnings.py | 18 + pandas/tests/computation/__init__.py | 0 pandas/tests/computation/test_compat.py | 47 + .../tests => tests/computation}/test_eval.py | 913 ++- pandas/tests/dtypes/__init__.py | 0 pandas/tests/dtypes/test_cast.py | 441 ++ pandas/tests/dtypes/test_common.py | 628 ++ pandas/tests/{types => dtypes}/test_concat.py | 15 +- pandas/tests/dtypes/test_dtypes.py | 769 +++ pandas/tests/dtypes/test_generic.py | 83 + pandas/tests/dtypes/test_inference.py | 1237 ++++ .../tests/{types => dtypes}/test_missing.py | 270 +- pandas/tests/extension/__init__.py | 0 pandas/tests/extension/base/__init__.py | 50 + pandas/tests/extension/base/base.py | 6 + pandas/tests/extension/base/casting.py | 13 + pandas/tests/extension/base/constructors.py | 50 + pandas/tests/extension/base/dtype.py | 48 + pandas/tests/extension/base/getitem.py | 122 + pandas/tests/extension/base/interface.py | 55 + pandas/tests/extension/base/methods.py | 33 + pandas/tests/extension/base/missing.py | 47 + pandas/tests/extension/base/reshaping.py | 62 + pandas/tests/extension/category/__init__.py | 0 .../extension/category/test_categorical.py | 84 + pandas/tests/extension/conftest.py | 48 + pandas/tests/extension/decimal/__init__.py | 0 pandas/tests/extension/decimal/array.py | 92 + .../tests/extension/decimal/test_decimal.py | 154 + pandas/tests/extension/json/__init__.py | 0 pandas/tests/extension/json/array.py | 105 + pandas/tests/extension/json/test_json.py | 73 + pandas/tests/extension/test_common.py | 67 + pandas/tests/extension/test_external_block.py | 77 + pandas/tests/formats/test_format.py | 4985 --------------- pandas/tests/formats/test_printing.py | 134 - pandas/tests/frame/common.py | 13 +- pandas/tests/frame/test_alter_axes.py | 588 +- pandas/tests/frame/test_analytics.py | 1135 ++-- pandas/tests/frame/test_api.py | 519 ++ pandas/tests/frame/test_apply.py | 699 +- pandas/tests/frame/test_arithmetic.py | 277 + pandas/tests/frame/test_asof.py | 57 +- .../tests/frame/test_axis_select_reindex.py | 488 +- pandas/tests/frame/test_block_internals.py | 226 +- pandas/tests/frame/test_combine_concat.py | 113 +- pandas/tests/frame/test_constructors.py | 738 ++- pandas/tests/frame/test_convert_to.py | 231 +- pandas/tests/frame/test_dtypes.py | 386 +- pandas/tests/frame/test_indexing.py | 1110 +++- pandas/tests/frame/test_join.py | 184 + pandas/tests/frame/test_misc_api.py | 483 -- pandas/tests/frame/test_missing.py | 217 +- pandas/tests/frame/test_mutate_columns.py | 142 +- pandas/tests/frame/test_nonunique_indexes.py | 52 +- pandas/tests/frame/test_operators.py | 478 +- pandas/tests/frame/test_period.py | 25 +- pandas/tests/frame/test_quantile.py | 80 +- pandas/tests/frame/test_query_eval.py | 308 +- pandas/tests/frame/test_rank.py | 299 + pandas/tests/frame/test_replace.py | 84 +- pandas/tests/frame/test_repr_info.py | 212 +- pandas/tests/frame/test_reshape.py | 240 +- .../frame/test_sort_values_level_as_str.py | 126 + pandas/tests/frame/test_sorting.py | 329 +- pandas/tests/frame/test_subclass.py | 435 +- pandas/tests/frame/test_timeseries.py | 292 +- pandas/tests/frame/test_timezones.py | 135 + pandas/tests/frame/test_to_csv.py | 351 +- pandas/tests/frame/test_validate.py | 52 +- pandas/tests/generic/__init__.py | 0 pandas/tests/generic/test_frame.py | 270 + pandas/tests/generic/test_generic.py | 1010 +++ .../generic/test_label_or_level_utils.py | 430 ++ pandas/tests/generic/test_panel.py | 57 + pandas/tests/generic/test_series.py | 229 + pandas/tests/groupby/aggregate/__init__.py | 0 .../tests/groupby/aggregate/test_aggregate.py | 307 + pandas/tests/groupby/aggregate/test_cython.py | 206 + pandas/tests/groupby/aggregate/test_other.py | 502 ++ pandas/tests/groupby/common.py | 38 +- pandas/tests/groupby/test_aggregate.py | 740 --- pandas/tests/groupby/test_bin_groupby.py | 58 +- pandas/tests/groupby/test_categorical.py | 253 +- pandas/tests/groupby/test_counting.py | 214 + pandas/tests/groupby/test_filters.py | 49 +- pandas/tests/groupby/test_functional.py | 372 ++ pandas/tests/groupby/test_groupby.py | 2893 +++------ pandas/tests/groupby/test_grouping.py | 803 +++ pandas/tests/groupby/test_index_as_string.py | 116 + pandas/tests/groupby/test_misc.py | 101 - pandas/tests/groupby/test_nth.py | 331 + pandas/tests/groupby/test_timegrouper.py | 130 +- pandas/tests/groupby/test_transform.py | 377 +- pandas/tests/groupby/test_value_counts.py | 76 + pandas/tests/groupby/test_whitelist.py | 313 + pandas/tests/indexes/common.py | 815 ++- pandas/tests/indexes/conftest.py | 47 + pandas/tests/indexes/data/s1-0.12.0.pickle | Bin 862 -> 0 bytes pandas/tests/indexes/data/s2-0.12.0.pickle | Bin 814 -> 0 bytes pandas/tests/indexes/datetimelike.py | 63 +- .../indexes/datetimes/test_arithmetic.py | 1052 +++ pandas/tests/indexes/datetimes/test_astype.py | 177 +- .../indexes/datetimes/test_construction.py | 417 +- .../indexes/datetimes/test_date_range.py | 574 +- .../tests/indexes/datetimes/test_datetime.py | 565 +- .../indexes/datetimes/test_datetimelike.py | 63 +- .../tests/indexes/datetimes/test_formats.py | 220 + .../tests/indexes/datetimes/test_indexing.py | 454 +- pandas/tests/indexes/datetimes/test_misc.py | 343 +- .../tests/indexes/datetimes/test_missing.py | 90 +- pandas/tests/indexes/datetimes/test_ops.py | 1269 +--- ...tial_slcing.py => test_partial_slicing.py} | 174 +- .../indexes/datetimes/test_scalar_compat.py | 217 + pandas/tests/indexes/datetimes/test_setops.py | 297 +- .../tests/indexes/datetimes/test_timezones.py | 1029 +++ pandas/tests/indexes/datetimes/test_tools.py | 1427 +++-- pandas/tests/indexes/interval/__init__.py | 0 pandas/tests/indexes/interval/test_astype.py | 209 + .../indexes/interval/test_construction.py | 342 + .../tests/indexes/interval/test_interval.py | 966 +++ .../indexes/interval/test_interval_new.py | 315 + .../indexes/interval/test_interval_range.py | 301 + .../indexes/interval/test_interval_tree.py | 93 + .../tests/indexes/period/test_arithmetic.py | 885 +++ pandas/tests/indexes/period/test_asfreq.py | 158 +- pandas/tests/indexes/period/test_astype.py | 99 + .../tests/indexes/period/test_construction.py | 222 +- pandas/tests/indexes/period/test_formats.py | 209 + pandas/tests/indexes/period/test_indexing.py | 520 +- pandas/tests/indexes/period/test_ops.py | 1081 +--- .../indexes/period/test_partial_slicing.py | 26 +- pandas/tests/indexes/period/test_period.py | 499 +- .../tests/indexes/period/test_period_range.py | 94 + .../indexes/period/test_scalar_compat.py | 17 + pandas/tests/indexes/period/test_setops.py | 51 +- pandas/tests/indexes/period/test_tools.py | 468 +- pandas/tests/indexes/test_base.py | 1176 ++-- pandas/tests/indexes/test_category.py | 634 +- pandas/tests/indexes/test_frozen.py | 71 + pandas/tests/indexes/test_multi.py | 1699 +++-- pandas/tests/indexes/test_numeric.py | 609 +- pandas/tests/indexes/test_range.py | 507 +- .../indexes/timedeltas/test_arithmetic.py | 1123 ++++ .../tests/indexes/timedeltas/test_astype.py | 125 +- .../indexes/timedeltas/test_construction.py | 38 +- .../tests/indexes/timedeltas/test_formats.py | 96 + .../tests/indexes/timedeltas/test_indexing.py | 248 +- pandas/tests/indexes/timedeltas/test_ops.py | 1129 +--- .../timedeltas/test_partial_slicing.py | 26 +- .../indexes/timedeltas/test_scalar_compat.py | 63 + .../tests/indexes/timedeltas/test_setops.py | 19 +- .../indexes/timedeltas/test_timedelta.py | 427 +- .../timedeltas/test_timedelta_range.py | 28 +- pandas/tests/indexes/timedeltas/test_tools.py | 77 +- pandas/tests/indexing/common.py | 274 + pandas/tests/indexing/interval/__init__.py | 0 .../tests/indexing/interval/test_interval.py | 270 + .../indexing/interval/test_interval_new.py | 247 + pandas/tests/indexing/test_callable.py | 6 +- pandas/tests/indexing/test_categorical.py | 490 +- .../indexing/test_chaining_and_caching.py | 237 +- pandas/tests/indexing/test_coercion.py | 1526 ++--- pandas/tests/indexing/test_datetime.py | 104 +- pandas/tests/indexing/test_floats.py | 182 +- pandas/tests/indexing/test_iloc.py | 651 ++ pandas/tests/indexing/test_indexing.py | 2950 +-------- pandas/tests/indexing/test_indexing_slow.py | 15 +- pandas/tests/indexing/test_ix.py | 337 + pandas/tests/indexing/test_loc.py | 754 +++ pandas/tests/indexing/test_multiindex.py | 623 +- pandas/tests/indexing/test_panel.py | 314 +- pandas/tests/indexing/test_partial.py | 639 ++ pandas/tests/indexing/test_scalar.py | 172 + pandas/tests/indexing/test_timedelta.py | 59 +- pandas/tests/internals/__init__.py | 0 .../tests/{ => internals}/test_internals.py | 746 ++- pandas/tests/io/__init__.py | 0 pandas/tests/io/conftest.py | 83 + .../{io/tests => tests/io}/data/S4_EDUC1.dta | Bin .../{io/tests => tests/io}/data/banklist.csv | 0 .../{io/tests => tests/io}/data/banklist.html | 4 +- pandas/{io/tests => tests/io}/data/blank.xls | Bin pandas/{io/tests => tests/io}/data/blank.xlsm | Bin pandas/{io/tests => tests/io}/data/blank.xlsx | Bin .../io}/data/blank_with_header.xls | Bin .../io}/data/blank_with_header.xlsm | Bin .../io}/data/blank_with_header.xlsx | Bin .../io}/data/categorical_0_14_1.pickle | 0 .../io}/data/categorical_0_15_2.pickle | Bin .../io}/data/computer_sales_page.html | 0 pandas/tests/io/data/feather-0_3_1.feather | Bin 0 -> 672 bytes pandas/tests/io/data/fixed_width_format.txt | 3 + .../tests => tests/io}/data/gbq_fake_job.txt | 0 .../data/html_encoding/chinese_utf-16.html | Bin .../data/html_encoding/chinese_utf-32.html | Bin .../io}/data/html_encoding/chinese_utf-8.html | 0 .../io}/data/html_encoding/letz_latin1.html | 0 pandas/{io/tests => tests/io}/data/iris.csv | 0 .../io}/data/legacy_hdf/datetimetz_object.h5 | Bin .../io}/data/legacy_hdf/legacy_table.h5 | Bin ...periodindex_0.20.1_x86_64_darwin_2.7.13.h5 | Bin 0 -> 7312 bytes .../io}/data/legacy_hdf/pytables_native.h5 | Bin .../io}/data/legacy_hdf/pytables_native2.h5 | Bin .../0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack | Bin .../0.16.2_AMD64_windows_2.7.10.msgpack | Bin .../0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack | Bin .../0.16.2_x86_64_darwin_2.7.10.msgpack | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack | Bin .../0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack | Bin .../0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack | Bin .../0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack | Bin .../0.17.0_AMD64_windows_2.7.11.msgpack | Bin .../0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack | Bin .../0.17.0_x86_64_darwin_2.7.11.msgpack | Bin .../0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack | Bin .../0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack | Bin .../0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack | Bin .../0.17.1_AMD64_windows_2.7.11.msgpack | Bin .../0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack | Bin .../0.17.1_AMD64_windows_2.7.11.msgpack | Bin .../0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack | Bin .../0.17.1_x86_64_darwin_2.7.11.msgpack | Bin .../0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack | Bin .../0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack | Bin .../0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack | Bin .../0.18.0_AMD64_windows_2.7.11.msgpack | Bin .../0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack | Bin .../0.18.0_x86_64_darwin_2.7.11.msgpack | Bin .../0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack | Bin .../0.18.1_x86_64_darwin_2.7.12.msgpack | Bin .../0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack | Bin .../0.19.2_x86_64_darwin_2.7.12.msgpack | Bin 0 -> 12325 bytes .../0.19.2/0.19.2_x86_64_darwin_3.6.1.msgpack | Bin 0 -> 119196 bytes .../0.10.1/AMD64_windows_2.7.3.pickle | Bin .../0.10.1/x86_64_linux_2.7.3.pickle | Bin .../0.11.0/0.11.0_x86_64_linux_3.3.0.pickle | Bin .../0.11.0/x86_64_linux_2.7.3.pickle | Bin .../0.11.0/x86_64_linux_3.3.0.pickle | Bin .../0.12.0/0.12.0_AMD64_windows_2.7.3.pickle | Bin .../0.12.0/0.12.0_x86_64_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_AMD64_windows_2.7.3.pickle | Bin .../0.13.0/0.13.0_i686_linux_2.6.5.pickle | Bin .../0.13.0/0.13.0_i686_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_i686_linux_3.2.3.pickle | Bin .../0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle | Bin .../0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_2.7.8.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_3.3.0.pickle | Bin .../0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle | Bin .../0.14.0/0.14.0_x86_64_linux_2.7.8.pickle | Bin .../0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle | Bin .../0.14.1/0.14.1_x86_64_linux_2.7.8.pickle | Bin .../0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle | Bin .../0.15.0/0.15.0_x86_64_linux_2.7.8.pickle | Bin .../0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle | Bin .../0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle | Bin .../0.16.2/0.16.2_AMD64_windows_2.7.10.pickle | Bin .../0.16.2/0.16.2_AMD64_windows_2.7.14.pickle | Bin 0 -> 132692 bytes .../0.16.2/0.16.2_AMD64_windows_3.4.3.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle | Bin .../0.16.2/0.16.2_x86_64_linux_2.7.10.pickle | Bin .../0.16.2/0.16.2_x86_64_linux_3.4.3.pickle | Bin .../0.17.0/0.17.0_AMD64_windows_2.7.11.pickle | Bin .../0.17.0/0.17.0_AMD64_windows_3.4.4.pickle | Bin .../0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle | Bin .../0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle | Bin .../0.17.0/0.17.0_x86_64_darwin_3.5.3.pickle | Bin 0 -> 129175 bytes .../0.17.0/0.17.0_x86_64_linux_2.7.11.pickle | Bin .../0.17.0/0.17.0_x86_64_linux_3.4.4.pickle | Bin .../0.17.0/0.17.1_AMD64_windows_2.7.11.pickle | Bin .../0.17.1/0.17.1_AMD64_windows_2.7.11.pickle | Bin .../0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle | Bin .../0.18.0/0.18.0_AMD64_windows_2.7.11.pickle | Bin .../0.18.0/0.18.0_AMD64_windows_3.5.1.pickle | Bin .../0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle | Bin .../0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle | Bin .../0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle | Bin .../0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle | Bin 125826 -> 127853 bytes .../0.19.2/0.19.2_AMD64_windows_2.7.14.pickle | Bin 0 -> 133468 bytes .../0.19.2/0.19.2_x86_64_darwin_2.7.12.pickle | Bin 0 -> 127525 bytes .../0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle | Bin 0 -> 132762 bytes .../0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle | Bin 0 -> 126076 bytes .../0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle | Bin 0 -> 132857 bytes pandas/{io/tests => tests/io}/data/macau.html | 2 +- .../{io/tests => tests/io}/data/nyse_wsj.html | 0 pandas/{io/tests => tests/io}/data/spam.html | 2 +- .../tests => tests/io}/data/stata10_115.dta | Bin .../tests => tests/io}/data/stata10_117.dta | Bin .../tests => tests/io}/data/stata11_115.dta | Bin .../tests => tests/io}/data/stata11_117.dta | Bin .../tests => tests/io}/data/stata12_117.dta | Bin pandas/tests/io/data/stata13_dates.dta | Bin 0 -> 3386 bytes .../tests => tests/io}/data/stata14_118.dta | Bin .../{io/tests => tests/io}/data/stata15.dta | Bin .../tests => tests/io}/data/stata1_114.dta | Bin .../tests => tests/io}/data/stata1_117.dta | Bin .../io}/data/stata1_encoding.dta | Bin .../tests => tests/io}/data/stata2_113.dta | Bin .../tests => tests/io}/data/stata2_114.dta | Bin .../tests => tests/io}/data/stata2_115.dta | Bin .../tests => tests/io}/data/stata2_117.dta | Bin pandas/{io/tests => tests/io}/data/stata3.csv | 0 .../tests => tests/io}/data/stata3_113.dta | Bin .../tests => tests/io}/data/stata3_114.dta | Bin .../tests => tests/io}/data/stata3_115.dta | Bin .../tests => tests/io}/data/stata3_117.dta | Bin .../tests => tests/io}/data/stata4_113.dta | Bin .../tests => tests/io}/data/stata4_114.dta | Bin .../tests => tests/io}/data/stata4_115.dta | Bin .../tests => tests/io}/data/stata4_117.dta | Bin pandas/{io/tests => tests/io}/data/stata5.csv | 0 .../tests => tests/io}/data/stata5_113.dta | Bin .../tests => tests/io}/data/stata5_114.dta | Bin .../tests => tests/io}/data/stata5_115.dta | Bin .../tests => tests/io}/data/stata5_117.dta | Bin pandas/{io/tests => tests/io}/data/stata6.csv | 0 .../tests => tests/io}/data/stata6_113.dta | Bin .../tests => tests/io}/data/stata6_114.dta | Bin .../tests => tests/io}/data/stata6_115.dta | Bin .../tests => tests/io}/data/stata6_117.dta | Bin .../tests => tests/io}/data/stata7_111.dta | Bin .../tests => tests/io}/data/stata7_115.dta | Bin .../tests => tests/io}/data/stata7_117.dta | Bin .../tests => tests/io}/data/stata8_113.dta | Bin .../tests => tests/io}/data/stata8_115.dta | Bin .../tests => tests/io}/data/stata8_117.dta | Bin .../tests => tests/io}/data/stata9_115.dta | Bin .../tests => tests/io}/data/stata9_117.dta | Bin pandas/{io/tests => tests/io}/data/test1.csv | 0 pandas/{io/tests => tests/io}/data/test1.xls | Bin pandas/{io/tests => tests/io}/data/test1.xlsm | Bin pandas/{io/tests => tests/io}/data/test1.xlsx | Bin pandas/{io/tests => tests/io}/data/test2.xls | Bin pandas/{io/tests => tests/io}/data/test2.xlsm | Bin pandas/{io/tests => tests/io}/data/test2.xlsx | Bin pandas/{io/tests => tests/io}/data/test3.xls | Bin pandas/{io/tests => tests/io}/data/test3.xlsm | Bin pandas/{io/tests => tests/io}/data/test3.xlsx | Bin pandas/{io/tests => tests/io}/data/test4.xls | Bin pandas/{io/tests => tests/io}/data/test4.xlsm | Bin pandas/{io/tests => tests/io}/data/test4.xlsx | Bin pandas/{io/tests => tests/io}/data/test5.xls | Bin pandas/{io/tests => tests/io}/data/test5.xlsm | Bin pandas/{io/tests => tests/io}/data/test5.xlsx | Bin .../io}/data/test_converters.xls | Bin .../io}/data/test_converters.xlsm | Bin .../io}/data/test_converters.xlsx | Bin .../io}/data/test_index_name_pre17.xls | Bin .../io}/data/test_index_name_pre17.xlsm | Bin .../io}/data/test_index_name_pre17.xlsx | Bin .../{io/tests => tests/io}/data/test_mmap.csv | 0 .../io}/data/test_multisheet.xls | Bin .../io}/data/test_multisheet.xlsm | Bin .../io}/data/test_multisheet.xlsx | Bin .../tests => tests/io}/data/test_squeeze.xls | Bin .../tests => tests/io}/data/test_squeeze.xlsm | Bin .../tests => tests/io}/data/test_squeeze.xlsx | Bin .../tests => tests/io}/data/test_types.xls | Bin .../tests => tests/io}/data/test_types.xlsm | Bin .../tests => tests/io}/data/test_types.xlsx | Bin .../io}/data/testdateoverflow.xls | Bin .../io}/data/testdateoverflow.xlsm | Bin .../io}/data/testdateoverflow.xlsx | Bin .../{io/tests => tests/io}/data/testdtype.xls | Bin .../tests => tests/io}/data/testdtype.xlsm | Bin .../tests => tests/io}/data/testdtype.xlsx | Bin .../io}/data/testmultiindex.xls | Bin .../io}/data/testmultiindex.xlsm | Bin .../io}/data/testmultiindex.xlsx | Bin .../tests => tests/io}/data/testskiprows.xls | Bin .../tests => tests/io}/data/testskiprows.xlsm | Bin .../tests => tests/io}/data/testskiprows.xlsx | Bin .../tests => tests/io}/data/times_1900.xls | Bin .../tests => tests/io}/data/times_1900.xlsm | Bin .../tests => tests/io}/data/times_1900.xlsx | Bin .../tests => tests/io}/data/times_1904.xls | Bin .../tests => tests/io}/data/times_1904.xlsm | Bin .../tests => tests/io}/data/times_1904.xlsx | Bin pandas/{io/tests => tests/io}/data/tips.csv | 0 .../tests => tests/io}/data/valid_markup.html | 0 .../io}/data/wikipedia_states.html | 0 pandas/tests/io/formats/__init__.py | 0 .../io/formats}/data/unicode_series.csv | 0 pandas/tests/io/formats/test_css.py | 186 + .../tests/io/formats/test_eng_formatting.py | 193 + pandas/tests/io/formats/test_format.py | 2620 ++++++++ pandas/tests/io/formats/test_printing.py | 220 + pandas/tests/{ => io}/formats/test_style.py | 654 +- pandas/tests/io/formats/test_to_csv.py | 287 + pandas/tests/io/formats/test_to_excel.py | 274 + pandas/tests/io/formats/test_to_html.py | 1873 ++++++ pandas/tests/io/formats/test_to_latex.py | 623 ++ .../io}/generate_legacy_storage_files.py | 138 +- pandas/tests/io/json/__init__.py | 0 .../io}/json/data/tsframe_iso_v012.json | 0 .../io}/json/data/tsframe_v012.json | 0 .../tests/io/json/data/tsframe_v012.json.zip | Bin 0 -> 436 bytes pandas/tests/io/json/test_compression.py | 100 + .../tests/io/json/test_json_table_schema.py | 562 ++ .../tests => tests/io}/json/test_normalize.py | 185 +- .../tests => tests/io}/json/test_pandas.py | 374 +- pandas/tests/io/json/test_readlines.py | 169 + .../{io/tests => tests/io}/json/test_ujson.py | 625 +- pandas/tests/io/msgpack/__init__.py | 0 pandas/tests/io/msgpack/common.py | 10 + pandas/tests/io/msgpack/data/frame.mp | Bin 0 -> 309 bytes .../msgpack}/test_buffer.py | 5 +- .../{test_msgpack => io/msgpack}/test_case.py | 6 +- pandas/tests/io/msgpack/test_except.py | 39 + .../msgpack}/test_extension.py | 12 +- .../msgpack}/test_format.py | 2 +- .../msgpack}/test_limits.py | 44 +- .../msgpack}/test_newspec.py | 2 +- .../{test_msgpack => io/msgpack}/test_obj.py | 17 +- .../{test_msgpack => io/msgpack}/test_pack.py | 19 +- .../msgpack}/test_read_size.py | 2 +- .../{test_msgpack => io/msgpack}/test_seq.py | 4 +- .../msgpack}/test_sequnpack.py | 42 +- .../msgpack}/test_subtype.py | 2 +- .../msgpack}/test_unpack.py | 7 +- .../msgpack}/test_unpack_raw.py | 2 +- pandas/tests/io/parser/__init__.py | 0 .../io}/parser/c_parser_only.py | 171 +- .../{io/tests => tests/io}/parser/comment.py | 0 .../{io/tests => tests/io}/parser/common.py | 566 +- .../tests => tests/io}/parser/compression.py | 57 +- .../tests => tests/io}/parser/converters.py | 20 +- .../tests => tests/io}/parser/data/iris.csv | 0 pandas/tests/io/parser/data/items.jsonl | 2 + .../io}/parser/data/salaries.csv | 0 .../io}/parser/data/salaries.csv.bz2 | Bin .../io}/parser/data/salaries.csv.gz | Bin .../io}/parser/data/salaries.csv.xz | Bin .../io}/parser/data/salaries.csv.zip | Bin .../io}/parser/data/sauron.SHIFT_JIS.csv | 0 pandas/tests/io/parser/data/sub_char.csv | 2 + pandas/tests/io/parser/data/tar_csv.tar | Bin 0 -> 10240 bytes pandas/tests/io/parser/data/tar_csv.tar.gz | Bin 0 -> 10240 bytes .../tests => tests/io}/parser/data/test1.csv | 0 .../io}/parser/data/test1.csv.bz2 | Bin .../io}/parser/data/test1.csv.gz | Bin .../tests => tests/io}/parser/data/test2.csv | 0 .../io}/parser/data/test_mmap.csv | 0 .../tests => tests/io}/parser/data/tips.csv | 0 pandas/tests/io/parser/data/tips.csv.bz2 | Bin 0 -> 1316 bytes pandas/tests/io/parser/data/tips.csv.gz | Bin 0 -> 1740 bytes .../parser}/data/unicode_series.csv | 0 .../io}/parser/data/utf16_ex.txt | Bin .../tests/io/parser/data/utf16_ex_small.zip | Bin 0 -> 285 bytes .../{io/tests => tests/io}/parser/dialect.py | 4 +- .../{io/tests => tests/io}/parser/dtypes.py | 141 +- .../{io/tests => tests/io}/parser/header.py | 103 +- .../tests => tests/io}/parser/index_col.py | 16 +- pandas/tests/io/parser/mangle_dupes.py | 88 + .../tests => tests/io}/parser/multithread.py | 0 .../tests => tests/io}/parser/na_values.py | 82 +- .../tests => tests/io}/parser/parse_dates.py | 319 +- .../io}/parser/python_parser_only.py | 104 +- .../{io/tests => tests/io}/parser/quoting.py | 34 +- .../{io/tests => tests/io}/parser/skiprows.py | 6 +- .../tests => tests/io}/parser/test_network.py | 146 +- .../tests => tests/io}/parser/test_parsers.py | 69 +- .../io}/parser/test_read_fwf.py | 55 +- .../io}/parser/test_textreader.py | 156 +- .../io}/parser/test_unsupported.py | 105 +- .../{io/tests => tests/io}/parser/usecols.py | 78 +- pandas/tests/io/sas/__init__.py | 0 .../tests => tests/io}/sas/data/DEMO_G.csv | 0 .../tests => tests/io}/sas/data/DEMO_G.xpt | Bin .../tests => tests/io}/sas/data/DRXFCD_G.csv | 0 .../tests => tests/io}/sas/data/DRXFCD_G.xpt | Bin .../tests => tests/io}/sas/data/SSHSV1_A.csv | 0 .../tests => tests/io}/sas/data/SSHSV1_A.xpt | Bin .../tests => tests/io}/sas/data/airline.csv | 0 .../io}/sas/data/airline.sas7bdat | Bin pandas/tests/io/sas/data/datetime.csv | 5 + pandas/tests/io/sas/data/datetime.sas7bdat | Bin 0 -> 131072 bytes .../io}/sas/data/paxraw_d_short.csv | 0 .../io}/sas/data/paxraw_d_short.xpt | Bin pandas/tests/io/sas/data/productsales.csv | 1441 +++++ .../io}/sas/data/productsales.sas7bdat | Bin .../io}/sas/data/test1.sas7bdat | Bin .../io}/sas/data/test10.sas7bdat | Bin .../io}/sas/data/test11.sas7bdat | Bin .../io}/sas/data/test12.sas7bdat | Bin .../io}/sas/data/test13.sas7bdat | Bin .../io}/sas/data/test14.sas7bdat | Bin .../io}/sas/data/test15.sas7bdat | Bin .../io}/sas/data/test16.sas7bdat | Bin .../io}/sas/data/test2.sas7bdat | Bin .../io}/sas/data/test3.sas7bdat | Bin .../io}/sas/data/test4.sas7bdat | Bin .../io}/sas/data/test5.sas7bdat | Bin .../io}/sas/data/test6.sas7bdat | Bin .../io}/sas/data/test7.sas7bdat | Bin .../io}/sas/data/test8.sas7bdat | Bin .../io}/sas/data/test9.sas7bdat | Bin .../io}/sas/data/test_12659.csv | 0 .../io}/sas/data/test_12659.sas7bdat | Bin .../io}/sas/data/test_sas7bdat_1.csv | 0 .../io}/sas/data/test_sas7bdat_2.csv | 0 .../io/sas/data/zero_variables.sas7bdat} | Bin 238321 -> 149504 bytes pandas/tests/io/sas/test_sas.py | 16 + .../tests => tests/io}/sas/test_sas7bdat.py | 71 +- .../{io/tests => tests/io}/sas/test_xport.py | 8 +- .../{io/tests => tests/io}/test_clipboard.py | 31 +- pandas/tests/io/test_common.py | 292 + pandas/tests/io/test_excel.py | 2333 +++++++ pandas/{io/tests => tests/io}/test_feather.py | 89 +- pandas/tests/io/test_gbq.py | 135 + pandas/{io/tests => tests/io}/test_html.py | 351 +- pandas/{io/tests => tests/io}/test_packers.py | 288 +- pandas/tests/io/test_parquet.py | 504 ++ pandas/tests/io/test_pickle.py | 494 ++ .../{io/tests => tests/io}/test_pytables.py | 3013 ++++----- pandas/tests/io/test_s3.py | 8 + pandas/{io/tests => tests/io}/test_sql.py | 760 ++- pandas/{io/tests => tests/io}/test_stata.py | 515 +- pandas/tests/plotting/common.py | 116 +- pandas/tests/plotting/test_boxplot_method.py | 119 +- pandas/tests/plotting/test_converter.py | 354 ++ pandas/tests/plotting/test_datetimelike.py | 1161 ++-- pandas/tests/plotting/test_deprecated.py | 58 + pandas/tests/plotting/test_frame.py | 968 +-- pandas/tests/plotting/test_groupby.py | 7 +- pandas/tests/plotting/test_hist_method.py | 151 +- pandas/tests/plotting/test_misc.py | 132 +- pandas/tests/plotting/test_series.py | 484 +- pandas/tests/reshape/__init__.py | 0 .../tests => tests/reshape}/data/cut_data.csv | 0 pandas/tests/reshape/merge/__init__.py | 0 .../merge}/data/allow_exact_matches.csv | 0 .../allow_exact_matches_and_tolerance.csv | 0 .../reshape/merge}/data/asof.csv | 0 .../reshape/merge}/data/asof2.csv | 0 .../reshape/merge}/data/quotes.csv | 0 .../reshape/merge}/data/quotes2.csv | 0 .../reshape/merge}/data/tolerance.csv | 0 .../reshape/merge}/data/trades.csv | 0 .../reshape/merge}/data/trades2.csv | 0 .../reshape/merge}/test_join.py | 277 +- .../reshape/merge}/test_merge.py | 1112 +++- .../reshape/merge}/test_merge_asof.py | 128 +- .../merge/test_merge_index_as_string.py | 215 + .../reshape/merge}/test_merge_ordered.py | 36 +- .../tests => tests/reshape}/test_concat.py | 944 ++- pandas/tests/reshape/test_melt.py | 621 ++ .../tests => tests/reshape}/test_pivot.py | 513 +- pandas/tests/reshape/test_reshape.py | 509 ++ pandas/tests/reshape/test_tile.py | 591 ++ .../tests/reshape/test_union_categoricals.py | 345 + pandas/tests/reshape/test_util.py | 49 + pandas/tests/scalar/interval/__init__.py | 0 pandas/tests/scalar/interval/test_interval.py | 202 + pandas/tests/scalar/period/__init__.py | 0 .../test_asfreq.py} | 589 +- pandas/tests/scalar/period/test_period.py | 1449 +++++ pandas/tests/scalar/test_nat.py | 332 + pandas/tests/scalar/test_period.py | 1448 ----- pandas/tests/scalar/test_timedelta.py | 713 --- pandas/tests/scalar/test_timestamp.py | 1684 ----- pandas/tests/scalar/timedelta/__init__.py | 0 .../tests/scalar/timedelta/test_arithmetic.py | 616 ++ .../scalar/timedelta/test_construction.py | 222 + pandas/tests/scalar/timedelta/test_formats.py | 48 + .../tests/scalar/timedelta/test_timedelta.py | 568 ++ pandas/tests/scalar/timestamp/__init__.py | 0 .../tests/scalar/timestamp/test_arithmetic.py | 76 + .../scalar/timestamp/test_comparisons.py | 194 + .../tests/scalar/timestamp/test_rendering.py | 96 + .../tests/scalar/timestamp/test_timestamp.py | 874 +++ .../tests/scalar/timestamp/test_timezones.py | 293 + .../tests/scalar/timestamp/test_unary_ops.py | 264 + pandas/tests/series/common.py | 2 +- pandas/tests/series/indexing/__init__.py | 0 pandas/tests/series/indexing/conftest.py | 8 + .../tests/series/indexing/test_alter_index.py | 520 ++ pandas/tests/series/indexing/test_boolean.py | 603 ++ pandas/tests/series/indexing/test_callable.py | 33 + pandas/tests/series/indexing/test_datetime.py | 710 +++ pandas/tests/series/indexing/test_iloc.py | 38 + pandas/tests/series/indexing/test_indexing.py | 760 +++ pandas/tests/series/indexing/test_loc.py | 150 + pandas/tests/series/indexing/test_numeric.py | 236 + pandas/tests/series/test_alter_axes.py | 163 +- pandas/tests/series/test_analytics.py | 1511 +++-- pandas/tests/series/test_api.py | 754 +++ pandas/tests/series/test_apply.py | 341 +- pandas/tests/series/test_arithmetic.py | 868 +++ pandas/tests/series/test_asof.py | 74 +- pandas/tests/series/test_combine_concat.py | 114 +- pandas/tests/series/test_constructors.py | 824 ++- pandas/tests/series/test_datetime_values.py | 164 +- pandas/tests/series/test_dtypes.py | 504 +- pandas/tests/series/test_indexing.py | 2638 -------- pandas/tests/series/test_internals.py | 14 +- pandas/tests/series/test_io.py | 173 +- pandas/tests/series/test_misc_api.py | 350 - pandas/tests/series/test_missing.py | 547 +- pandas/tests/series/test_operators.py | 2457 ++++---- pandas/tests/series/test_period.py | 186 +- pandas/tests/series/test_quantile.py | 94 +- pandas/tests/series/test_rank.py | 471 ++ pandas/tests/series/test_replace.py | 88 +- pandas/tests/series/test_repr.py | 332 +- pandas/tests/series/test_sorting.py | 169 +- pandas/tests/series/test_subclass.py | 31 +- pandas/tests/series/test_timeseries.py | 323 +- pandas/tests/series/test_timezones.py | 302 + pandas/tests/series/test_validate.py | 42 +- pandas/tests/sparse/__init__.py | 0 pandas/tests/sparse/common.py | 0 pandas/tests/sparse/frame/__init__.py | 0 pandas/tests/sparse/frame/test_analytics.py | 40 + pandas/tests/sparse/frame/test_apply.py | 92 + .../sparse/frame}/test_frame.py | 515 +- pandas/tests/sparse/frame/test_indexing.py | 113 + pandas/tests/sparse/frame/test_to_csv.py | 20 + .../tests/sparse/frame/test_to_from_scipy.py | 168 + pandas/tests/sparse/series/__init__.py | 0 pandas/tests/sparse/series/test_indexing.py | 113 + .../sparse/series}/test_series.py | 418 +- .../sparse}/test_arithmetics.py | 40 +- .../tests => tests/sparse}/test_array.py | 396 +- .../sparse}/test_combine_concat.py | 101 +- .../tests => tests/sparse}/test_format.py | 46 +- .../tests => tests/sparse}/test_groupby.py | 4 +- .../tests => tests/sparse}/test_indexing.py | 299 +- .../tests => tests/sparse}/test_libsparse.py | 218 +- .../tests => tests/sparse}/test_pivot.py | 4 +- pandas/tests/sparse/test_reshape.py | 38 + pandas/tests/test_algos.py | 909 +-- pandas/tests/test_base.py | 676 +- pandas/tests/test_categorical.py | 4578 -------------- pandas/tests/test_common.py | 53 +- pandas/tests/test_compat.py | 48 +- pandas/tests/test_config.py | 297 +- pandas/tests/test_downstream.py | 110 + pandas/tests/test_errors.py | 81 + pandas/tests/test_expressions.py | 106 +- pandas/tests/test_generic.py | 2032 ------ pandas/tests/test_join.py | 65 +- pandas/tests/test_lib.py | 178 +- pandas/tests/test_msgpack/test_except.py | 33 - pandas/tests/test_multilevel.py | 1630 +++-- pandas/tests/test_nanops.py | 241 +- pandas/tests/test_panel.py | 3653 ++++++----- pandas/tests/test_panel4d.py | 943 --- pandas/tests/test_panelnd.py | 101 - pandas/tests/test_register_accessor.py | 87 + pandas/tests/{tseries => }/test_resample.py | 1347 ++-- pandas/tests/test_reshape.py | 952 --- pandas/tests/test_sorting.py | 437 ++ pandas/tests/test_stats.py | 185 - pandas/tests/test_strings.py | 568 +- pandas/tests/test_take.py | 22 +- pandas/tests/test_window.py | 2306 +++---- pandas/tests/tools/__init__.py | 0 .../tools/test_numeric.py} | 195 +- pandas/tests/tseries/conftest.py | 7 + pandas/tests/tseries/offsets/__init__.py | 1 + pandas/tests/tseries/offsets/common.py | 25 + pandas/tests/tseries/offsets/conftest.py | 26 + .../{ => offsets}/data/cday-0.14.1.pickle | Bin .../data/dateoffset_0_15_2.pickle | 0 pandas/tests/tseries/offsets/test_fiscal.py | 658 ++ pandas/tests/tseries/offsets/test_offsets.py | 3181 ++++++++++ pandas/tests/tseries/offsets/test_ticks.py | 236 + .../tests/tseries/offsets/test_yqm_offsets.py | 1030 +++ pandas/tests/tseries/test_converter.py | 199 - pandas/tests/tseries/test_frequencies.py | 555 +- pandas/tests/tseries/test_holiday.py | 147 +- pandas/tests/tseries/test_offsets.py | 4962 --------------- pandas/tests/tseries/test_timezones.py | 1727 ----- pandas/tests/tslibs/__init__.py | 0 pandas/tests/tslibs/test_array_to_datetime.py | 145 + pandas/tests/tslibs/test_ccalendar.py | 18 + pandas/tests/tslibs/test_conversion.py | 57 + pandas/tests/tslibs/test_libfrequencies.py | 116 + pandas/tests/tslibs/test_liboffsets.py | 172 + pandas/tests/tslibs/test_parsing.py | 229 + pandas/tests/tslibs/test_period_asfreq.py | 82 + pandas/tests/tslibs/test_timezones.py | 68 + pandas/tests/types/test_cast.py | 276 - pandas/tests/types/test_common.py | 54 - pandas/tests/types/test_dtypes.py | 352 -- pandas/tests/types/test_generic.py | 40 - pandas/tests/types/test_inference.py | 966 --- pandas/tests/types/test_io.py | 109 - pandas/tests/util/__init__.py | 0 .../tests => tests/util}/test_hashing.py | 112 +- pandas/tests/{ => util}/test_testing.py | 276 +- pandas/tests/{ => util}/test_util.py | 220 +- pandas/tools/merge.py | 1424 +---- pandas/tools/plotting.py | 4013 +----------- pandas/tools/tests/test_tile.py | 352 -- pandas/tseries/api.py | 8 +- pandas/tseries/common.py | 241 - pandas/tseries/converter.py | 1014 +-- pandas/tseries/frequencies.py | 1020 +-- pandas/tseries/holiday.py | 24 +- pandas/tseries/interval.py | 38 - pandas/tseries/offsets.py | 2146 +++---- pandas/tseries/plotting.py | 314 +- pandas/tseries/util.py | 104 - pandas/tslib.pxd | 10 - pandas/tslib.py | 7 + pandas/tslib.pyx | 5615 ----------------- pandas/types/common.py | 497 +- pandas/types/concat.py | 487 +- pandas/types/dtypes.py | 367 -- pandas/types/inference.py | 106 - pandas/util/__init__.py | 2 + pandas/util/_decorators.py | 380 ++ pandas/util/_depr_module.py | 103 + pandas/util/{doctools.py => _doctools.py} | 30 +- .../{print_versions.py => _print_versions.py} | 33 +- pandas/util/_test_decorators.py | 189 + pandas/util/_tester.py | 27 +- pandas/util/{validators.py => _validators.py} | 142 +- pandas/util/decorators.py | 237 +- pandas/util/depr_module.py | 65 - pandas/util/testing.py | 1286 ++-- pyproject.toml | 9 + scripts/announce.py | 124 + scripts/api_rst_coverage.py | 43 - scripts/bench_join.R | 50 - scripts/bench_join.py | 211 - scripts/bench_join_multi.py | 32 - scripts/bench_refactor.py | 51 - scripts/boxplot_test.py | 14 - scripts/build_dist.sh | 4 +- scripts/build_dist_for_release.sh | 10 + scripts/convert_deps.py | 29 + scripts/count_code.sh | 1 - scripts/faster_xs.py | 15 - scripts/file_sizes.py | 208 - scripts/find_commits_touching_func.py | 173 +- scripts/find_undoc_args.py | 161 +- scripts/gen_release_notes.py | 95 - scripts/git-mrb | 82 - scripts/git_code_churn.py | 34 - scripts/groupby_sample.py | 54 - scripts/groupby_speed.py | 35 - scripts/groupby_test.py | 145 - scripts/hdfstore_panel_perf.py | 17 - scripts/json_manip.py | 423 -- scripts/leak.py | 13 - scripts/list_future_warnings.sh | 46 + scripts/{merge-py.py => merge-pr.py} | 69 +- scripts/parser_magic.py | 74 - scripts/preepoch_test.py | 23 - scripts/pypistats.py | 101 - scripts/roll_median_leak.py | 26 - scripts/runtests.py | 5 - scripts/test_py27.bat | 6 - scripts/testmed.py | 171 - scripts/touchup_gh_issues.py | 44 - scripts/use_build_cache.py | 354 -- scripts/validate_docstrings.py | 499 ++ scripts/winbuild_py27.bat | 2 - scripts/windows_builder/build_27-32.bat | 25 - scripts/windows_builder/build_27-64.bat | 25 - scripts/windows_builder/build_34-32.bat | 27 - scripts/windows_builder/build_34-64.bat | 27 - scripts/windows_builder/check_and_build.bat | 2 - scripts/windows_builder/check_and_build.py | 194 - scripts/windows_builder/readme.txt | 17 - setup.cfg | 15 +- setup.py | 668 +- test.bat | 3 +- test.sh | 2 +- test_fast.bat | 3 + test_fast.sh | 10 +- test_perf.sh | 5 - tox.ini | 11 +- vb_suite/.gitignore | 4 - vb_suite/attrs_caching.py | 20 - vb_suite/binary_ops.py | 199 - vb_suite/categoricals.py | 16 - vb_suite/ctors.py | 39 - vb_suite/eval.py | 150 - vb_suite/frame_ctor.py | 123 - vb_suite/frame_methods.py | 525 -- vb_suite/generate_rst_files.py | 2 - vb_suite/gil.py | 110 - vb_suite/groupby.py | 620 -- vb_suite/hdfstore_bench.py | 278 - vb_suite/index_object.py | 173 - vb_suite/indexing.py | 292 - vb_suite/inference.py | 36 - vb_suite/io_bench.py | 150 - vb_suite/io_sql.py | 126 - vb_suite/join_merge.py | 270 - vb_suite/make.py | 167 - vb_suite/measure_memory_consumption.py | 55 - vb_suite/miscellaneous.py | 32 - vb_suite/packers.py | 252 - vb_suite/pandas_vb_common.py | 30 - vb_suite/panel_ctor.py | 76 - vb_suite/panel_methods.py | 28 - vb_suite/parser_vb.py | 112 - vb_suite/perf_HEAD.py | 243 - vb_suite/plotting.py | 25 - vb_suite/reindex.py | 225 - vb_suite/replace.py | 36 - vb_suite/reshape.py | 65 - vb_suite/run_suite.py | 15 - vb_suite/series_methods.py | 39 - vb_suite/source/conf.py | 225 - vb_suite/source/themes/agogo/layout.html | 95 - .../source/themes/agogo/static/agogo.css_t | 476 -- .../source/themes/agogo/static/bgfooter.png | Bin 434 -> 0 bytes vb_suite/source/themes/agogo/static/bgtop.png | Bin 430 -> 0 bytes vb_suite/source/themes/agogo/theme.conf | 19 - vb_suite/sparse.py | 65 - vb_suite/stat_ops.py | 126 - vb_suite/strings.py | 59 - vb_suite/suite.py | 164 - vb_suite/test.py | 67 - vb_suite/test_perf.py | 616 -- vb_suite/timedelta.py | 32 - vb_suite/timeseries.py | 445 -- versioneer.py | 16 +- 1413 files changed, 185189 insertions(+), 142306 deletions(-) create mode 100644 .github/CODE_OF_CONDUCT.md create mode 100644 .pep8speaks.yml create mode 100644 AUTHORS.md create mode 100644 LICENSES/XARRAY_LICENSE delete mode 100644 asv_bench/benchmarks/hdfstore_bench.py rename {pandas/api/tests => asv_bench/benchmarks/io}/__init__.py (100%) create mode 100644 asv_bench/benchmarks/io/csv.py create mode 100644 asv_bench/benchmarks/io/excel.py create mode 100644 asv_bench/benchmarks/io/hdf.py create mode 100644 asv_bench/benchmarks/io/json.py create mode 100644 asv_bench/benchmarks/io/msgpack.py create mode 100644 asv_bench/benchmarks/io/pickle.py create mode 100644 asv_bench/benchmarks/io/sas.py create mode 100644 asv_bench/benchmarks/io/sql.py create mode 100644 asv_bench/benchmarks/io/stata.py delete mode 100644 asv_bench/benchmarks/io_bench.py delete mode 100644 asv_bench/benchmarks/io_sql.py create mode 100644 asv_bench/benchmarks/multiindex_object.py create mode 100644 asv_bench/benchmarks/offset.py delete mode 100644 asv_bench/benchmarks/packers.py delete mode 100644 asv_bench/benchmarks/parser_vb.py create mode 100644 asv_bench/benchmarks/rolling.py create mode 100644 asv_bench/benchmarks/timestamp.py delete mode 100644 bench/alignment.py delete mode 100644 bench/bench_dense_to_sparse.py delete mode 100644 bench/bench_get_put_value.py delete mode 100644 bench/bench_groupby.py delete mode 100644 bench/bench_join_panel.py delete mode 100644 bench/bench_khash_dict.py delete mode 100644 bench/bench_merge.R delete mode 100644 bench/bench_merge.py delete mode 100644 bench/bench_merge_sqlite.py delete mode 100644 bench/bench_pivot.R delete mode 100644 bench/bench_pivot.py delete mode 100644 bench/bench_take_indexing.py delete mode 100644 bench/bench_unique.py delete mode 100644 bench/bench_with_subset.R delete mode 100644 bench/bench_with_subset.py delete mode 100644 bench/better_unique.py delete mode 100644 bench/duplicated.R delete mode 100644 bench/io_roundtrip.py delete mode 100644 bench/serialize.py delete mode 100644 bench/test.py delete mode 100644 bench/zoo_bench.R delete mode 100644 bench/zoo_bench.py delete mode 100644 ci/appveyor.recipe/bld.bat delete mode 100644 ci/appveyor.recipe/build.sh delete mode 100644 ci/appveyor.recipe/meta.yaml delete mode 100755 ci/before_install_travis.sh create mode 100755 ci/before_script_travis.sh create mode 100644 ci/check_imports.py create mode 100644 ci/environment-dev.yaml create mode 100755 ci/install_circle.sh create mode 100755 ci/install_db_circle.sh rename ci/{install_db.sh => install_db_travis.sh} (100%) delete mode 100755 ci/install_test.sh delete mode 100644 ci/requirements-2.7_BUILD_TEST.build rename bench/larry.py => ci/requirements-2.7_WIN.pip (100%) rename ci/{requirements-2.7-64.run => requirements-2.7_WIN.run} (84%) delete mode 100644 ci/requirements-3.4-64.run delete mode 100644 ci/requirements-3.4.build delete mode 100644 ci/requirements-3.4.pip delete mode 100644 ci/requirements-3.4.run delete mode 100644 ci/requirements-3.4_SLOW.pip delete mode 100644 ci/requirements-3.4_SLOW.sh delete mode 100644 ci/requirements-3.5_DOC_BUILD.sh delete mode 100644 ci/requirements-3.5_NUMPY_DEV.build.sh delete mode 100644 ci/requirements-3.5_NUMPY_DEV.run delete mode 100644 ci/requirements-3.6-64.run rename ci/{requirements-3.4_SLOW.build => requirements-3.6_DOC.build} (53%) rename ci/{requirements-3.5_DOC_BUILD.run => requirements-3.6_DOC.run} (69%) create mode 100644 ci/requirements-3.6_DOC.sh rename ci/{requirements-3.5_DOC_BUILD.build => requirements-3.6_LOCALE.build} (65%) rename pandas/computation/tests/__init__.py => ci/requirements-3.6_LOCALE.pip (100%) rename ci/{requirements_all.txt => requirements-3.6_LOCALE.run} (58%) rename ci/{requirements-3.5_NUMPY_DEV.build => requirements-3.6_LOCALE_SLOW.build} (53%) rename pandas/indexes/__init__.py => ci/requirements-3.6_LOCALE_SLOW.pip (100%) rename ci/{requirements-3.4_SLOW.run => requirements-3.6_LOCALE_SLOW.run} (53%) create mode 100644 ci/requirements-3.6_NUMPY_DEV.build create mode 100644 ci/requirements-3.6_NUMPY_DEV.build.sh rename pandas/io/tests/__init__.py => ci/requirements-3.6_NUMPY_DEV.pip (100%) create mode 100644 ci/requirements-3.6_NUMPY_DEV.run rename pandas/io/tests/json/__init__.py => ci/requirements-3.6_WIN.pip (100%) rename ci/{requirements-3.5-64.run => requirements-3.6_WIN.run} (65%) create mode 100644 ci/requirements-optional-conda.txt create mode 100644 ci/requirements-optional-pip.txt create mode 100755 ci/run_circle.sh delete mode 100755 ci/script.sh create mode 100755 ci/script_multi.sh create mode 100755 ci/script_single.sh create mode 100755 ci/show_circle.sh create mode 100755 ci/upload_coverage.sh create mode 100644 circle.yml delete mode 100644 doc/plots/stats/moment_plots.py delete mode 100644 doc/plots/stats/moments_ewma.py delete mode 100644 doc/plots/stats/moments_ewmvol.py delete mode 100644 doc/plots/stats/moments_expw.py delete mode 100644 doc/plots/stats/moments_rolling.py delete mode 100644 doc/plots/stats/moments_rolling_binary.py create mode 100644 doc/source/_static/ci.png create mode 100644 doc/source/_static/style-excel.png create mode 100644 doc/source/developer.rst create mode 100644 doc/source/extending.rst delete mode 100644 doc/source/remote_data.rst rename doc/source/{html-styling.ipynb => style.ipynb} (69%) delete mode 100644 doc/source/style.rst create mode 100644 doc/source/template_structure.html create mode 100644 doc/source/whatsnew/v0.20.2.txt create mode 100644 doc/source/whatsnew/v0.20.3.txt create mode 100644 doc/source/whatsnew/v0.21.0.txt create mode 100644 doc/source/whatsnew/v0.21.1.txt create mode 100644 doc/source/whatsnew/v0.22.0.txt create mode 100644 doc/source/whatsnew/v0.23.0.txt create mode 100644 pandas/_libs/__init__.py create mode 100644 pandas/_libs/algos.pxd create mode 100644 pandas/_libs/algos.pyx rename pandas/{src => _libs}/algos_common_helper.pxi.in (91%) rename pandas/{src => _libs}/algos_rank_helper.pxi.in (84%) rename pandas/{src => _libs}/algos_take_helper.pxi.in (100%) create mode 100644 pandas/_libs/groupby.pyx rename pandas/{src/algos_groupby_helper.pxi.in => _libs/groupby_helper.pxi.in} (58%) rename pandas/{src/hash.pyx => _libs/hashing.pyx} (95%) rename pandas/{ => _libs}/hashtable.pxd (74%) rename pandas/{ => _libs}/hashtable.pyx (73%) rename pandas/{src => _libs}/hashtable_class_helper.pxi.in (84%) rename pandas/{src => _libs}/hashtable_func_helper.pxi.in (76%) rename pandas/{ => _libs}/index.pyx (53%) rename pandas/{src => _libs}/index_class_helper.pxi.in (94%) create mode 100644 pandas/_libs/indexing.pyx create mode 100644 pandas/_libs/internals.pyx create mode 100644 pandas/_libs/interval.pyx create mode 100644 pandas/_libs/intervaltree.pxi.in rename pandas/{src => _libs}/join.pyx (82%) rename pandas/{src/joins_func_helper.pxi.in => _libs/join_func_helper.pxi.in} (99%) rename pandas/{src => _libs}/join_helper.pxi.in (100%) create mode 100644 pandas/_libs/khash.pxd create mode 100644 pandas/_libs/lib.pyx create mode 100644 pandas/_libs/missing.pxd create mode 100644 pandas/_libs/missing.pyx create mode 100644 pandas/_libs/ops.pyx rename pandas/{parser.pyx => _libs/parsers.pyx} (84%) create mode 100644 pandas/_libs/properties.pyx rename pandas/{src/reduce.pyx => _libs/reduction.pyx} (95%) create mode 100644 pandas/_libs/reshape.pyx create mode 100644 pandas/_libs/reshape_helper.pxi.in create mode 100644 pandas/_libs/skiplist.pxd rename pandas/{src => _libs}/skiplist.pyx (92%) rename pandas/{src => _libs}/sparse.pyx (91%) rename pandas/{src => _libs}/sparse_op_helper.pxi.in (100%) create mode 100644 pandas/_libs/src/compat_helper.h rename pandas/{ => _libs}/src/datetime/np_datetime.c (82%) rename pandas/{ => _libs}/src/datetime/np_datetime.h (65%) rename pandas/{ => _libs}/src/datetime/np_datetime_strings.c (54%) rename pandas/{ => _libs}/src/datetime/np_datetime_strings.h (60%) create mode 100644 pandas/_libs/src/headers/cmath rename pandas/{ => _libs}/src/headers/ms_inttypes.h (100%) rename pandas/{ => _libs}/src/headers/ms_stdint.h (100%) rename pandas/{ => _libs}/src/headers/portable.h (100%) rename pandas/{ => _libs}/src/headers/stdint.h (100%) rename pandas/{ => _libs}/src/helper.h (82%) rename pandas/{ => _libs}/src/inference.pyx (58%) rename pandas/{ => _libs}/src/klib/khash.h (100%) rename pandas/{ => _libs}/src/klib/khash_python.h (95%) rename pandas/{ => _libs}/src/msgpack/pack.h (100%) rename pandas/{ => _libs}/src/msgpack/pack_template.h (100%) rename pandas/{ => _libs}/src/msgpack/sysdep.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack_define.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack_template.h (100%) create mode 100644 pandas/_libs/src/numpy_helper.h rename pandas/{ => _libs}/src/parse_helper.h (98%) rename pandas/{ => _libs}/src/parser/io.c (66%) rename pandas/{ => _libs}/src/parser/io.h (68%) rename pandas/{ => _libs}/src/parser/tokenizer.c (93%) rename pandas/{ => _libs}/src/parser/tokenizer.h (82%) create mode 100644 pandas/_libs/src/period_helper.c create mode 100644 pandas/_libs/src/period_helper.h rename pandas/{ => _libs}/src/skiplist.h (98%) rename pandas/{ => _libs}/src/ujson/lib/ultrajson.h (94%) rename pandas/{ => _libs}/src/ujson/lib/ultrajsondec.c (100%) rename pandas/{ => _libs}/src/ujson/lib/ultrajsonenc.c (98%) rename pandas/{ => _libs}/src/ujson/python/JSONtoObj.c (98%) rename pandas/{ => _libs}/src/ujson/python/objToJSON.c (98%) rename pandas/{ => _libs}/src/ujson/python/py_defines.h (93%) rename pandas/{ => _libs}/src/ujson/python/ujson.c (96%) rename pandas/{ => _libs}/src/ujson/python/version.h (92%) rename pandas/{ => _libs}/src/util.pxd (50%) rename pandas/{src => _libs}/testing.pyx (97%) create mode 100644 pandas/_libs/tslib.pyx create mode 100644 pandas/_libs/tslibs/__init__.py create mode 100644 pandas/_libs/tslibs/ccalendar.pxd create mode 100644 pandas/_libs/tslibs/ccalendar.pyx create mode 100644 pandas/_libs/tslibs/conversion.pxd create mode 100644 pandas/_libs/tslibs/conversion.pyx create mode 100644 pandas/_libs/tslibs/fields.pyx create mode 100644 pandas/_libs/tslibs/frequencies.pxd create mode 100644 pandas/_libs/tslibs/frequencies.pyx create mode 100644 pandas/_libs/tslibs/nattype.pxd create mode 100644 pandas/_libs/tslibs/nattype.pyx create mode 100644 pandas/_libs/tslibs/np_datetime.pxd create mode 100644 pandas/_libs/tslibs/np_datetime.pyx create mode 100644 pandas/_libs/tslibs/offsets.pyx create mode 100644 pandas/_libs/tslibs/parsing.pyx rename pandas/{src => _libs/tslibs}/period.pyx (51%) create mode 100644 pandas/_libs/tslibs/resolution.pyx create mode 100644 pandas/_libs/tslibs/strptime.pyx create mode 100644 pandas/_libs/tslibs/timedeltas.pxd create mode 100644 pandas/_libs/tslibs/timedeltas.pyx create mode 100644 pandas/_libs/tslibs/timestamps.pxd create mode 100644 pandas/_libs/tslibs/timestamps.pyx create mode 100644 pandas/_libs/tslibs/timezones.pxd create mode 100644 pandas/_libs/tslibs/timezones.pyx rename pandas/{ => _libs}/window.pyx (82%) create mode 100644 pandas/_libs/writers.pyx delete mode 100644 pandas/algos.pyx create mode 100644 pandas/api/extensions/__init__.py delete mode 100644 pandas/api/tests/test_api.py delete mode 100644 pandas/compat/openpyxl_compat.py delete mode 100644 pandas/computation/api.py delete mode 100644 pandas/computation/tests/test_compat.py create mode 100644 pandas/core/accessor.py create mode 100644 pandas/core/apply.py create mode 100644 pandas/core/arrays/__init__.py create mode 100644 pandas/core/arrays/base.py create mode 100644 pandas/core/arrays/categorical.py rename pandas/{io/tests/parser => core/computation}/__init__.py (100%) rename pandas/{ => core}/computation/align.py (85%) create mode 100644 pandas/core/computation/api.py create mode 100644 pandas/core/computation/check.py rename pandas/{ => core}/computation/common.py (100%) rename pandas/{ => core}/computation/engines.py (86%) rename pandas/{ => core}/computation/eval.py (66%) rename pandas/{ => core}/computation/expr.py (86%) create mode 100644 pandas/core/computation/expressions.py rename pandas/{ => core}/computation/ops.py (96%) rename pandas/{ => core}/computation/pytables.py (76%) rename pandas/{ => core}/computation/scope.py (93%) rename pandas/{sparse => core/dtypes}/__init__.py (100%) rename pandas/{types => core/dtypes}/api.py (67%) create mode 100644 pandas/core/dtypes/base.py rename pandas/{types => core/dtypes}/cast.py (57%) create mode 100644 pandas/core/dtypes/common.py create mode 100644 pandas/core/dtypes/concat.py create mode 100644 pandas/core/dtypes/dtypes.py rename pandas/{types => core/dtypes}/generic.py (78%) create mode 100644 pandas/core/dtypes/inference.py rename pandas/{types => core/dtypes}/missing.py (70%) rename pandas/{sparse/tests => core/indexes}/__init__.py (100%) create mode 100644 pandas/core/indexes/accessors.py rename pandas/{ => core}/indexes/api.py (60%) rename pandas/{ => core}/indexes/base.py (72%) rename pandas/{ => core}/indexes/category.py (63%) rename pandas/{tseries/base.py => core/indexes/datetimelike.py} (63%) rename pandas/{tseries/index.py => core/indexes/datetimes.py} (73%) create mode 100644 pandas/core/indexes/frozen.py create mode 100644 pandas/core/indexes/interval.py rename pandas/{ => core}/indexes/multi.py (69%) rename pandas/{ => core}/indexes/numeric.py (69%) rename pandas/{tseries => core/indexes}/period.py (68%) rename pandas/{ => core}/indexes/range.py (76%) rename pandas/{tseries/tdi.py => core/indexes/timedeltas.py} (70%) delete mode 100644 pandas/core/panel4d.py delete mode 100644 pandas/core/panelnd.py rename pandas/{tseries => core}/resample.py (78%) mode change 100755 => 100644 rename pandas/{stats => core/reshape}/__init__.py (100%) create mode 100644 pandas/core/reshape/api.py rename pandas/{tools => core/reshape}/concat.py (87%) create mode 100644 pandas/core/reshape/melt.py create mode 100644 pandas/core/reshape/merge.py rename pandas/{tools => core/reshape}/pivot.py (74%) rename pandas/core/{ => reshape}/reshape.py (54%) rename pandas/{tools => core/reshape}/tile.py (57%) create mode 100644 pandas/core/reshape/util.py create mode 100644 pandas/core/sorting.py delete mode 100644 pandas/core/sparse.py rename pandas/{tests/formats => core/sparse}/__init__.py (100%) create mode 100644 pandas/core/sparse/api.py rename pandas/{ => core}/sparse/array.py (78%) rename pandas/{ => core}/sparse/frame.py (72%) rename pandas/{ => core}/sparse/scipy_sparse.py (95%) rename pandas/{ => core}/sparse/series.py (80%) rename pandas/{tests/test_msgpack => core/tools}/__init__.py (100%) rename pandas/{tseries/tools.py => core/tools/datetimes.py} (64%) rename pandas/{tools/util.py => core/tools/numeric.py} (66%) rename pandas/{tseries => core/tools}/timedeltas.py (83%) rename pandas/{tests/types => core/util}/__init__.py (100%) rename pandas/{tools => core/util}/hashing.py (68%) create mode 100644 pandas/errors/__init__.py delete mode 100644 pandas/info.py delete mode 100644 pandas/io/auth.py rename pandas/{util => io}/clipboard/__init__.py (75%) rename pandas/{util => io}/clipboard/clipboards.py (90%) rename pandas/{util => io}/clipboard/exceptions.py (77%) rename pandas/{util => io}/clipboard/windows.py (99%) rename pandas/io/{clipboard.py => clipboards.py} (90%) delete mode 100644 pandas/io/data.py rename pandas/{tools/tests => io/formats}/__init__.py (100%) create mode 100644 pandas/io/formats/common.py create mode 100644 pandas/io/formats/console.py create mode 100644 pandas/io/formats/css.py create mode 100644 pandas/io/formats/excel.py rename pandas/{ => io}/formats/format.py (75%) rename pandas/{ => io}/formats/printing.py (80%) create mode 100644 pandas/io/formats/style.py create mode 100644 pandas/io/formats/templates/html.tpl rename pandas/{util => io/formats}/terminal.py (95%) create mode 100644 pandas/io/json/table_schema.py rename pandas/{ => io}/msgpack/__init__.py (81%) rename pandas/{ => io}/msgpack/_packer.pyx (97%) rename pandas/{ => io}/msgpack/_unpacker.pyx (95%) rename pandas/{ => io}/msgpack/_version.py (100%) rename pandas/{ => io}/msgpack/exceptions.py (100%) create mode 100644 pandas/io/parquet.py rename pandas/io/sas/{saslib.pyx => sas.pyx} (92%) delete mode 100644 pandas/io/tests/data/legacy_hdf/legacy.h5 delete mode 100644 pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 delete mode 100644 pandas/io/tests/sas/data/productsales.csv delete mode 100644 pandas/io/tests/sas/test_sas.py delete mode 100644 pandas/io/tests/test_common.py delete mode 100644 pandas/io/tests/test_date_converters.py delete mode 100644 pandas/io/tests/test_excel.py delete mode 100644 pandas/io/tests/test_gbq.py delete mode 100644 pandas/io/tests/test_pickle.py delete mode 100644 pandas/io/tests/test_s3.py delete mode 100644 pandas/io/wb.py create mode 100644 pandas/json.py delete mode 100644 pandas/lib.pxd create mode 100644 pandas/lib.py delete mode 100644 pandas/lib.pyx create mode 100644 pandas/parser.py create mode 100644 pandas/plotting/__init__.py create mode 100644 pandas/plotting/_compat.py create mode 100644 pandas/plotting/_converter.py create mode 100644 pandas/plotting/_core.py create mode 100644 pandas/plotting/_misc.py create mode 100644 pandas/plotting/_style.py create mode 100644 pandas/plotting/_timeseries.py create mode 100644 pandas/plotting/_tools.py delete mode 100644 pandas/sparse/api.py delete mode 100644 pandas/sparse/list.py delete mode 100644 pandas/sparse/tests/test_list.py delete mode 100644 pandas/src/datetime.pxd delete mode 100644 pandas/src/datetime_helper.h delete mode 100644 pandas/src/headers/math.h delete mode 100644 pandas/src/khash.pxd delete mode 100644 pandas/src/klib/ktypes.h delete mode 100644 pandas/src/klib/kvec.h delete mode 100644 pandas/src/numpy.pxd delete mode 100644 pandas/src/numpy_helper.h delete mode 100644 pandas/src/offsets.pyx delete mode 100644 pandas/src/parser/.gitignore delete mode 100644 pandas/src/parser/Makefile delete mode 100644 pandas/src/period_helper.c delete mode 100644 pandas/src/period_helper.h delete mode 100644 pandas/src/properties.pyx delete mode 100644 pandas/src/skiplist.pxd delete mode 100644 pandas/stats/api.py delete mode 100644 pandas/stats/moments.py create mode 100644 pandas/testing.py rename vb_suite/source/_static/stub => pandas/tests/api/__init__.py (100%) create mode 100644 pandas/tests/api/test_api.py create mode 100644 pandas/tests/api/test_types.py create mode 100644 pandas/tests/categorical/__init__.py create mode 100644 pandas/tests/categorical/common.py create mode 100644 pandas/tests/categorical/test_analytics.py create mode 100644 pandas/tests/categorical/test_api.py create mode 100644 pandas/tests/categorical/test_constructors.py create mode 100644 pandas/tests/categorical/test_dtypes.py create mode 100644 pandas/tests/categorical/test_indexing.py create mode 100644 pandas/tests/categorical/test_missing.py create mode 100644 pandas/tests/categorical/test_operators.py create mode 100644 pandas/tests/categorical/test_repr.py create mode 100644 pandas/tests/categorical/test_sorting.py create mode 100644 pandas/tests/categorical/test_subclass.py create mode 100644 pandas/tests/categorical/test_warnings.py create mode 100644 pandas/tests/computation/__init__.py create mode 100644 pandas/tests/computation/test_compat.py rename pandas/{computation/tests => tests/computation}/test_eval.py (71%) create mode 100644 pandas/tests/dtypes/__init__.py create mode 100644 pandas/tests/dtypes/test_cast.py create mode 100644 pandas/tests/dtypes/test_common.py rename pandas/tests/{types => dtypes}/test_concat.py (88%) create mode 100644 pandas/tests/dtypes/test_dtypes.py create mode 100644 pandas/tests/dtypes/test_generic.py create mode 100644 pandas/tests/dtypes/test_inference.py rename pandas/tests/{types => dtypes}/test_missing.py (54%) create mode 100644 pandas/tests/extension/__init__.py create mode 100644 pandas/tests/extension/base/__init__.py create mode 100644 pandas/tests/extension/base/base.py create mode 100644 pandas/tests/extension/base/casting.py create mode 100644 pandas/tests/extension/base/constructors.py create mode 100644 pandas/tests/extension/base/dtype.py create mode 100644 pandas/tests/extension/base/getitem.py create mode 100644 pandas/tests/extension/base/interface.py create mode 100644 pandas/tests/extension/base/methods.py create mode 100644 pandas/tests/extension/base/missing.py create mode 100644 pandas/tests/extension/base/reshaping.py create mode 100644 pandas/tests/extension/category/__init__.py create mode 100644 pandas/tests/extension/category/test_categorical.py create mode 100644 pandas/tests/extension/conftest.py create mode 100644 pandas/tests/extension/decimal/__init__.py create mode 100644 pandas/tests/extension/decimal/array.py create mode 100644 pandas/tests/extension/decimal/test_decimal.py create mode 100644 pandas/tests/extension/json/__init__.py create mode 100644 pandas/tests/extension/json/array.py create mode 100644 pandas/tests/extension/json/test_json.py create mode 100644 pandas/tests/extension/test_common.py create mode 100644 pandas/tests/extension/test_external_block.py delete mode 100644 pandas/tests/formats/test_format.py delete mode 100644 pandas/tests/formats/test_printing.py create mode 100644 pandas/tests/frame/test_api.py create mode 100644 pandas/tests/frame/test_arithmetic.py create mode 100644 pandas/tests/frame/test_join.py delete mode 100644 pandas/tests/frame/test_misc_api.py create mode 100644 pandas/tests/frame/test_rank.py create mode 100644 pandas/tests/frame/test_sort_values_level_as_str.py create mode 100644 pandas/tests/frame/test_timezones.py create mode 100644 pandas/tests/generic/__init__.py create mode 100644 pandas/tests/generic/test_frame.py create mode 100644 pandas/tests/generic/test_generic.py create mode 100644 pandas/tests/generic/test_label_or_level_utils.py create mode 100644 pandas/tests/generic/test_panel.py create mode 100644 pandas/tests/generic/test_series.py create mode 100644 pandas/tests/groupby/aggregate/__init__.py create mode 100644 pandas/tests/groupby/aggregate/test_aggregate.py create mode 100644 pandas/tests/groupby/aggregate/test_cython.py create mode 100644 pandas/tests/groupby/aggregate/test_other.py delete mode 100644 pandas/tests/groupby/test_aggregate.py create mode 100644 pandas/tests/groupby/test_counting.py create mode 100644 pandas/tests/groupby/test_functional.py create mode 100644 pandas/tests/groupby/test_grouping.py create mode 100644 pandas/tests/groupby/test_index_as_string.py delete mode 100644 pandas/tests/groupby/test_misc.py create mode 100644 pandas/tests/groupby/test_nth.py create mode 100644 pandas/tests/groupby/test_value_counts.py create mode 100644 pandas/tests/groupby/test_whitelist.py create mode 100644 pandas/tests/indexes/conftest.py delete mode 100644 pandas/tests/indexes/data/s1-0.12.0.pickle delete mode 100644 pandas/tests/indexes/data/s2-0.12.0.pickle create mode 100644 pandas/tests/indexes/datetimes/test_arithmetic.py create mode 100644 pandas/tests/indexes/datetimes/test_formats.py rename pandas/tests/indexes/datetimes/{test_partial_slcing.py => test_partial_slicing.py} (57%) create mode 100644 pandas/tests/indexes/datetimes/test_scalar_compat.py create mode 100644 pandas/tests/indexes/datetimes/test_timezones.py create mode 100644 pandas/tests/indexes/interval/__init__.py create mode 100644 pandas/tests/indexes/interval/test_astype.py create mode 100644 pandas/tests/indexes/interval/test_construction.py create mode 100644 pandas/tests/indexes/interval/test_interval.py create mode 100644 pandas/tests/indexes/interval/test_interval_new.py create mode 100644 pandas/tests/indexes/interval/test_interval_range.py create mode 100644 pandas/tests/indexes/interval/test_interval_tree.py create mode 100644 pandas/tests/indexes/period/test_arithmetic.py create mode 100644 pandas/tests/indexes/period/test_astype.py create mode 100644 pandas/tests/indexes/period/test_formats.py create mode 100644 pandas/tests/indexes/period/test_period_range.py create mode 100644 pandas/tests/indexes/period/test_scalar_compat.py create mode 100644 pandas/tests/indexes/test_frozen.py create mode 100644 pandas/tests/indexes/timedeltas/test_arithmetic.py create mode 100644 pandas/tests/indexes/timedeltas/test_formats.py create mode 100644 pandas/tests/indexes/timedeltas/test_scalar_compat.py create mode 100644 pandas/tests/indexing/interval/__init__.py create mode 100644 pandas/tests/indexing/interval/test_interval.py create mode 100644 pandas/tests/indexing/interval/test_interval_new.py create mode 100644 pandas/tests/indexing/test_iloc.py create mode 100644 pandas/tests/indexing/test_ix.py create mode 100644 pandas/tests/indexing/test_loc.py create mode 100644 pandas/tests/indexing/test_partial.py create mode 100644 pandas/tests/indexing/test_scalar.py create mode 100644 pandas/tests/internals/__init__.py rename pandas/tests/{ => internals}/test_internals.py (62%) create mode 100644 pandas/tests/io/__init__.py create mode 100644 pandas/tests/io/conftest.py rename pandas/{io/tests => tests/io}/data/S4_EDUC1.dta (100%) rename pandas/{io/tests => tests/io}/data/banklist.csv (100%) rename pandas/{io/tests => tests/io}/data/banklist.html (99%) rename pandas/{io/tests => tests/io}/data/blank.xls (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank.xlsm (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank.xlsx (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xls (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xlsm (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xlsx (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/categorical_0_14_1.pickle (100%) rename pandas/{io/tests => tests/io}/data/categorical_0_15_2.pickle (100%) rename pandas/{io/tests => tests/io}/data/computer_sales_page.html (100%) create mode 100644 pandas/tests/io/data/feather-0_3_1.feather create mode 100644 pandas/tests/io/data/fixed_width_format.txt rename pandas/{io/tests => tests/io}/data/gbq_fake_job.txt (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-16.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-32.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-8.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/letz_latin1.html (100%) rename pandas/{io/tests => tests/io}/data/iris.csv (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/datetimetz_object.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy_table.h5 (100%) create mode 100644 pandas/tests/io/data/legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 rename pandas/{io/tests => tests/io}/data/legacy_hdf/pytables_native.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/pytables_native2.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack (100%) create mode 100644 pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack create mode 100644 pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_3.6.1.msgpack rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle (100%) create mode 100644 pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.14.pickle rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle (100%) create mode 100644 pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.5.3.pickle rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle (85%) create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_AMD64_windows_2.7.14.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.12.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle rename pandas/{io/tests => tests/io}/data/macau.html (99%) rename pandas/{io/tests => tests/io}/data/nyse_wsj.html (100%) rename pandas/{io/tests => tests/io}/data/spam.html (99%) rename pandas/{io/tests => tests/io}/data/stata10_115.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata10_117.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata11_115.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata11_117.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata12_117.dta (100%) create mode 100644 pandas/tests/io/data/stata13_dates.dta rename pandas/{io/tests => tests/io}/data/stata14_118.dta (100%) rename pandas/{io/tests => tests/io}/data/stata15.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_encoding.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3.csv (100%) rename pandas/{io/tests => tests/io}/data/stata3_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5.csv (100%) rename pandas/{io/tests => tests/io}/data/stata5_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6.csv (100%) rename pandas/{io/tests => tests/io}/data/stata6_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_111.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata9_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata9_117.dta (100%) rename pandas/{io/tests => tests/io}/data/test1.csv (100%) rename pandas/{io/tests => tests/io}/data/test1.xls (100%) rename pandas/{io/tests => tests/io}/data/test1.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test1.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test2.xls (100%) rename pandas/{io/tests => tests/io}/data/test2.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test2.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test3.xls (100%) rename pandas/{io/tests => tests/io}/data/test3.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test3.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test4.xls (100%) rename pandas/{io/tests => tests/io}/data/test4.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test4.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test5.xls (100%) rename pandas/{io/tests => tests/io}/data/test5.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test5.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xls (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xls (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_mmap.csv (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xls (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xls (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_types.xls (100%) rename pandas/{io/tests => tests/io}/data/test_types.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_types.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xls (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xls (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xls (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xls (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xlsx (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xls (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xlsm (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xlsx (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xls (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xlsm (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xlsx (100%) rename pandas/{io/tests => tests/io}/data/tips.csv (100%) rename pandas/{io/tests => tests/io}/data/valid_markup.html (100%) rename pandas/{io/tests => tests/io}/data/wikipedia_states.html (100%) create mode 100644 pandas/tests/io/formats/__init__.py rename pandas/{io/tests/parser => tests/io/formats}/data/unicode_series.csv (100%) create mode 100644 pandas/tests/io/formats/test_css.py create mode 100644 pandas/tests/io/formats/test_eng_formatting.py create mode 100644 pandas/tests/io/formats/test_format.py create mode 100644 pandas/tests/io/formats/test_printing.py rename pandas/tests/{ => io}/formats/test_style.py (50%) create mode 100644 pandas/tests/io/formats/test_to_csv.py create mode 100644 pandas/tests/io/formats/test_to_excel.py create mode 100644 pandas/tests/io/formats/test_to_html.py create mode 100644 pandas/tests/io/formats/test_to_latex.py rename pandas/{io/tests => tests/io}/generate_legacy_storage_files.py (64%) mode change 100644 => 100755 create mode 100644 pandas/tests/io/json/__init__.py rename pandas/{io/tests => tests/io}/json/data/tsframe_iso_v012.json (100%) rename pandas/{io/tests => tests/io}/json/data/tsframe_v012.json (100%) create mode 100644 pandas/tests/io/json/data/tsframe_v012.json.zip create mode 100644 pandas/tests/io/json/test_compression.py create mode 100644 pandas/tests/io/json/test_json_table_schema.py rename pandas/{io/tests => tests/io}/json/test_normalize.py (59%) rename pandas/{io/tests => tests/io}/json/test_pandas.py (76%) create mode 100644 pandas/tests/io/json/test_readlines.py rename pandas/{io/tests => tests/io}/json/test_ujson.py (72%) create mode 100644 pandas/tests/io/msgpack/__init__.py create mode 100644 pandas/tests/io/msgpack/common.py create mode 100644 pandas/tests/io/msgpack/data/frame.mp rename pandas/tests/{test_msgpack => io/msgpack}/test_buffer.py (76%) rename pandas/tests/{test_msgpack => io/msgpack}/test_case.py (95%) create mode 100644 pandas/tests/io/msgpack/test_except.py rename pandas/tests/{test_msgpack => io/msgpack}/test_extension.py (89%) rename pandas/tests/{test_msgpack => io/msgpack}/test_format.py (98%) rename pandas/tests/{test_msgpack => io/msgpack}/test_limits.py (64%) rename pandas/tests/{test_msgpack => io/msgpack}/test_newspec.py (97%) rename pandas/tests/{test_msgpack => io/msgpack}/test_obj.py (85%) rename pandas/tests/{test_msgpack => io/msgpack}/test_pack.py (89%) rename pandas/tests/{test_msgpack => io/msgpack}/test_read_size.py (96%) rename pandas/tests/{test_msgpack => io/msgpack}/test_seq.py (90%) rename pandas/tests/{test_msgpack => io/msgpack}/test_sequnpack.py (71%) rename pandas/tests/{test_msgpack => io/msgpack}/test_subtype.py (90%) rename pandas/tests/{test_msgpack => io/msgpack}/test_unpack.py (90%) rename pandas/tests/{test_msgpack => io/msgpack}/test_unpack_raw.py (94%) create mode 100644 pandas/tests/io/parser/__init__.py rename pandas/{io/tests => tests/io}/parser/c_parser_only.py (76%) rename pandas/{io/tests => tests/io}/parser/comment.py (100%) rename pandas/{io/tests => tests/io}/parser/common.py (75%) rename pandas/{io/tests => tests/io}/parser/compression.py (76%) rename pandas/{io/tests => tests/io}/parser/converters.py (89%) rename pandas/{io/tests => tests/io}/parser/data/iris.csv (100%) create mode 100644 pandas/tests/io/parser/data/items.jsonl rename pandas/{io/tests => tests/io}/parser/data/salaries.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.bz2 (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.gz (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.xz (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.zip (100%) rename pandas/{io/tests => tests/io}/parser/data/sauron.SHIFT_JIS.csv (100%) create mode 100644 pandas/tests/io/parser/data/sub_char.csv create mode 100644 pandas/tests/io/parser/data/tar_csv.tar create mode 100644 pandas/tests/io/parser/data/tar_csv.tar.gz rename pandas/{io/tests => tests/io}/parser/data/test1.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/test1.csv.bz2 (100%) rename pandas/{io/tests => tests/io}/parser/data/test1.csv.gz (100%) rename pandas/{io/tests => tests/io}/parser/data/test2.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/test_mmap.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/tips.csv (100%) create mode 100644 pandas/tests/io/parser/data/tips.csv.bz2 create mode 100644 pandas/tests/io/parser/data/tips.csv.gz rename pandas/tests/{formats => io/parser}/data/unicode_series.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/utf16_ex.txt (100%) create mode 100644 pandas/tests/io/parser/data/utf16_ex_small.zip rename pandas/{io/tests => tests/io}/parser/dialect.py (95%) rename pandas/{io/tests => tests/io}/parser/dtypes.py (67%) rename pandas/{io/tests => tests/io}/parser/header.py (68%) rename pandas/{io/tests => tests/io}/parser/index_col.py (92%) create mode 100644 pandas/tests/io/parser/mangle_dupes.py rename pandas/{io/tests => tests/io}/parser/multithread.py (100%) rename pandas/{io/tests => tests/io}/parser/na_values.py (76%) rename pandas/{io/tests => tests/io}/parser/parse_dates.py (61%) rename pandas/{io/tests => tests/io}/parser/python_parser_only.py (69%) rename pandas/{io/tests => tests/io}/parser/quoting.py (82%) rename pandas/{io/tests => tests/io}/parser/skiprows.py (98%) rename pandas/{io/tests => tests/io}/parser/test_network.py (61%) rename pandas/{io/tests => tests/io}/parser/test_parsers.py (53%) rename pandas/{io/tests => tests/io}/parser/test_read_fwf.py (90%) rename pandas/{io/tests => tests/io}/parser/test_textreader.py (67%) rename pandas/{io/tests => tests/io}/parser/test_unsupported.py (53%) rename pandas/{io/tests => tests/io}/parser/usecols.py (84%) create mode 100644 pandas/tests/io/sas/__init__.py rename pandas/{io/tests => tests/io}/sas/data/DEMO_G.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/DEMO_G.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/DRXFCD_G.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/DRXFCD_G.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/SSHSV1_A.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/SSHSV1_A.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/airline.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/airline.sas7bdat (100%) create mode 100644 pandas/tests/io/sas/data/datetime.csv create mode 100644 pandas/tests/io/sas/data/datetime.sas7bdat rename pandas/{io/tests => tests/io}/sas/data/paxraw_d_short.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/paxraw_d_short.xpt (100%) create mode 100644 pandas/tests/io/sas/data/productsales.csv rename pandas/{io/tests => tests/io}/sas/data/productsales.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test1.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test10.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test11.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test12.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test13.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test14.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test15.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test16.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test2.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test3.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test4.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test5.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test6.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test7.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test8.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test9.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test_12659.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/test_12659.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test_sas7bdat_1.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/test_sas7bdat_2.csv (100%) rename pandas/{io/tests/data/legacy_hdf/legacy_0.10.h5 => tests/io/sas/data/zero_variables.sas7bdat} (61%) create mode 100644 pandas/tests/io/sas/test_sas.py rename pandas/{io/tests => tests/io}/sas/test_sas7bdat.py (64%) rename pandas/{io/tests => tests/io}/sas/test_xport.py (97%) rename pandas/{io/tests => tests/io}/test_clipboard.py (87%) create mode 100644 pandas/tests/io/test_common.py create mode 100644 pandas/tests/io/test_excel.py rename pandas/{io/tests => tests/io}/test_feather.py (52%) create mode 100644 pandas/tests/io/test_gbq.py rename pandas/{io/tests => tests/io}/test_html.py (76%) rename pandas/{io/tests => tests/io}/test_packers.py (78%) create mode 100644 pandas/tests/io/test_parquet.py create mode 100644 pandas/tests/io/test_pickle.py rename pandas/{io/tests => tests/io}/test_pytables.py (67%) create mode 100644 pandas/tests/io/test_s3.py rename pandas/{io/tests => tests/io}/test_sql.py (80%) rename pandas/{io/tests => tests/io}/test_stata.py (79%) create mode 100644 pandas/tests/plotting/test_converter.py create mode 100644 pandas/tests/plotting/test_deprecated.py create mode 100644 pandas/tests/reshape/__init__.py rename pandas/{tools/tests => tests/reshape}/data/cut_data.csv (100%) create mode 100644 pandas/tests/reshape/merge/__init__.py rename pandas/{tools/tests => tests/reshape/merge}/data/allow_exact_matches.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/data/allow_exact_matches_and_tolerance.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/data/asof.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/data/asof2.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/data/quotes.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/data/quotes2.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/data/tolerance.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/data/trades.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/data/trades2.csv (100%) rename pandas/{tools/tests => tests/reshape/merge}/test_join.py (78%) rename pandas/{tools/tests => tests/reshape/merge}/test_merge.py (56%) rename pandas/{tools/tests => tests/reshape/merge}/test_merge_asof.py (89%) create mode 100644 pandas/tests/reshape/merge/test_merge_index_as_string.py rename pandas/{tools/tests => tests/reshape/merge}/test_merge_ordered.py (73%) rename pandas/{tools/tests => tests/reshape}/test_concat.py (75%) create mode 100644 pandas/tests/reshape/test_melt.py rename pandas/{tools/tests => tests/reshape}/test_pivot.py (79%) create mode 100644 pandas/tests/reshape/test_reshape.py create mode 100644 pandas/tests/reshape/test_tile.py create mode 100644 pandas/tests/reshape/test_union_categoricals.py create mode 100644 pandas/tests/reshape/test_util.py create mode 100644 pandas/tests/scalar/interval/__init__.py create mode 100644 pandas/tests/scalar/interval/test_interval.py create mode 100644 pandas/tests/scalar/period/__init__.py rename pandas/tests/scalar/{test_period_asfreq.py => period/test_asfreq.py} (57%) create mode 100644 pandas/tests/scalar/period/test_period.py create mode 100644 pandas/tests/scalar/test_nat.py delete mode 100644 pandas/tests/scalar/test_period.py delete mode 100644 pandas/tests/scalar/test_timedelta.py delete mode 100644 pandas/tests/scalar/test_timestamp.py create mode 100644 pandas/tests/scalar/timedelta/__init__.py create mode 100644 pandas/tests/scalar/timedelta/test_arithmetic.py create mode 100644 pandas/tests/scalar/timedelta/test_construction.py create mode 100644 pandas/tests/scalar/timedelta/test_formats.py create mode 100644 pandas/tests/scalar/timedelta/test_timedelta.py create mode 100644 pandas/tests/scalar/timestamp/__init__.py create mode 100644 pandas/tests/scalar/timestamp/test_arithmetic.py create mode 100644 pandas/tests/scalar/timestamp/test_comparisons.py create mode 100644 pandas/tests/scalar/timestamp/test_rendering.py create mode 100644 pandas/tests/scalar/timestamp/test_timestamp.py create mode 100644 pandas/tests/scalar/timestamp/test_timezones.py create mode 100644 pandas/tests/scalar/timestamp/test_unary_ops.py create mode 100644 pandas/tests/series/indexing/__init__.py create mode 100644 pandas/tests/series/indexing/conftest.py create mode 100644 pandas/tests/series/indexing/test_alter_index.py create mode 100644 pandas/tests/series/indexing/test_boolean.py create mode 100644 pandas/tests/series/indexing/test_callable.py create mode 100644 pandas/tests/series/indexing/test_datetime.py create mode 100644 pandas/tests/series/indexing/test_iloc.py create mode 100644 pandas/tests/series/indexing/test_indexing.py create mode 100644 pandas/tests/series/indexing/test_loc.py create mode 100644 pandas/tests/series/indexing/test_numeric.py create mode 100644 pandas/tests/series/test_api.py create mode 100644 pandas/tests/series/test_arithmetic.py delete mode 100644 pandas/tests/series/test_indexing.py delete mode 100644 pandas/tests/series/test_misc_api.py create mode 100644 pandas/tests/series/test_rank.py create mode 100644 pandas/tests/series/test_timezones.py create mode 100644 pandas/tests/sparse/__init__.py create mode 100644 pandas/tests/sparse/common.py create mode 100644 pandas/tests/sparse/frame/__init__.py create mode 100644 pandas/tests/sparse/frame/test_analytics.py create mode 100644 pandas/tests/sparse/frame/test_apply.py rename pandas/{sparse/tests => tests/sparse/frame}/test_frame.py (72%) create mode 100644 pandas/tests/sparse/frame/test_indexing.py create mode 100644 pandas/tests/sparse/frame/test_to_csv.py create mode 100644 pandas/tests/sparse/frame/test_to_from_scipy.py create mode 100644 pandas/tests/sparse/series/__init__.py create mode 100644 pandas/tests/sparse/series/test_indexing.py rename pandas/{sparse/tests => tests/sparse/series}/test_series.py (80%) rename pandas/{sparse/tests => tests/sparse}/test_arithmetics.py (95%) rename pandas/{sparse/tests => tests/sparse}/test_array.py (71%) rename pandas/{sparse/tests => tests/sparse}/test_combine_concat.py (82%) rename pandas/{sparse/tests => tests/sparse}/test_format.py (77%) rename pandas/{sparse/tests => tests/sparse}/test_groupby.py (96%) rename pandas/{sparse/tests => tests/sparse}/test_indexing.py (82%) rename pandas/{sparse/tests => tests/sparse}/test_libsparse.py (77%) rename pandas/{sparse/tests => tests/sparse}/test_pivot.py (97%) create mode 100644 pandas/tests/sparse/test_reshape.py delete mode 100644 pandas/tests/test_categorical.py create mode 100644 pandas/tests/test_downstream.py create mode 100644 pandas/tests/test_errors.py delete mode 100644 pandas/tests/test_generic.py delete mode 100644 pandas/tests/test_msgpack/test_except.py mode change 100755 => 100644 pandas/tests/test_multilevel.py delete mode 100644 pandas/tests/test_panel4d.py delete mode 100644 pandas/tests/test_panelnd.py create mode 100644 pandas/tests/test_register_accessor.py rename pandas/tests/{tseries => }/test_resample.py (72%) mode change 100755 => 100644 delete mode 100644 pandas/tests/test_reshape.py create mode 100644 pandas/tests/test_sorting.py delete mode 100644 pandas/tests/test_stats.py create mode 100644 pandas/tests/tools/__init__.py rename pandas/{tools/tests/test_util.py => tests/tools/test_numeric.py} (71%) create mode 100644 pandas/tests/tseries/conftest.py create mode 100644 pandas/tests/tseries/offsets/__init__.py create mode 100644 pandas/tests/tseries/offsets/common.py create mode 100644 pandas/tests/tseries/offsets/conftest.py rename pandas/tests/tseries/{ => offsets}/data/cday-0.14.1.pickle (100%) rename pandas/tests/tseries/{ => offsets}/data/dateoffset_0_15_2.pickle (100%) create mode 100644 pandas/tests/tseries/offsets/test_fiscal.py create mode 100644 pandas/tests/tseries/offsets/test_offsets.py create mode 100644 pandas/tests/tseries/offsets/test_ticks.py create mode 100644 pandas/tests/tseries/offsets/test_yqm_offsets.py delete mode 100644 pandas/tests/tseries/test_converter.py delete mode 100644 pandas/tests/tseries/test_offsets.py delete mode 100644 pandas/tests/tseries/test_timezones.py create mode 100644 pandas/tests/tslibs/__init__.py create mode 100644 pandas/tests/tslibs/test_array_to_datetime.py create mode 100644 pandas/tests/tslibs/test_ccalendar.py create mode 100644 pandas/tests/tslibs/test_conversion.py create mode 100644 pandas/tests/tslibs/test_libfrequencies.py create mode 100644 pandas/tests/tslibs/test_liboffsets.py create mode 100644 pandas/tests/tslibs/test_parsing.py create mode 100644 pandas/tests/tslibs/test_period_asfreq.py create mode 100644 pandas/tests/tslibs/test_timezones.py delete mode 100644 pandas/tests/types/test_cast.py delete mode 100644 pandas/tests/types/test_common.py delete mode 100644 pandas/tests/types/test_dtypes.py delete mode 100644 pandas/tests/types/test_generic.py delete mode 100644 pandas/tests/types/test_inference.py delete mode 100644 pandas/tests/types/test_io.py create mode 100644 pandas/tests/util/__init__.py rename pandas/{tools/tests => tests/util}/test_hashing.py (70%) rename pandas/tests/{ => util}/test_testing.py (74%) rename pandas/tests/{ => util}/test_util.py (65%) delete mode 100644 pandas/tools/tests/test_tile.py delete mode 100644 pandas/tseries/common.py delete mode 100644 pandas/tseries/interval.py delete mode 100644 pandas/tseries/util.py delete mode 100644 pandas/tslib.pxd create mode 100644 pandas/tslib.py delete mode 100644 pandas/tslib.pyx delete mode 100644 pandas/types/dtypes.py delete mode 100644 pandas/types/inference.py create mode 100644 pandas/util/_decorators.py create mode 100644 pandas/util/_depr_module.py rename pandas/util/{doctools.py => _doctools.py} (88%) rename pandas/util/{print_versions.py => _print_versions.py} (80%) create mode 100644 pandas/util/_test_decorators.py rename pandas/util/{validators.py => _validators.py} (60%) delete mode 100644 pandas/util/depr_module.py create mode 100644 pyproject.toml create mode 100755 scripts/announce.py delete mode 100644 scripts/api_rst_coverage.py delete mode 100644 scripts/bench_join.R delete mode 100644 scripts/bench_join.py delete mode 100644 scripts/bench_join_multi.py delete mode 100644 scripts/bench_refactor.py delete mode 100644 scripts/boxplot_test.py create mode 100755 scripts/build_dist_for_release.sh create mode 100755 scripts/convert_deps.py delete mode 100755 scripts/count_code.sh delete mode 100644 scripts/faster_xs.py delete mode 100644 scripts/file_sizes.py delete mode 100644 scripts/gen_release_notes.py delete mode 100644 scripts/git-mrb delete mode 100644 scripts/git_code_churn.py delete mode 100644 scripts/groupby_sample.py delete mode 100644 scripts/groupby_speed.py delete mode 100644 scripts/groupby_test.py delete mode 100644 scripts/hdfstore_panel_perf.py delete mode 100644 scripts/json_manip.py delete mode 100644 scripts/leak.py create mode 100755 scripts/list_future_warnings.sh rename scripts/{merge-py.py => merge-pr.py} (80%) delete mode 100644 scripts/parser_magic.py delete mode 100644 scripts/preepoch_test.py delete mode 100644 scripts/pypistats.py delete mode 100644 scripts/roll_median_leak.py delete mode 100644 scripts/runtests.py delete mode 100644 scripts/test_py27.bat delete mode 100644 scripts/testmed.py delete mode 100755 scripts/touchup_gh_issues.py delete mode 100755 scripts/use_build_cache.py create mode 100755 scripts/validate_docstrings.py delete mode 100644 scripts/winbuild_py27.bat delete mode 100644 scripts/windows_builder/build_27-32.bat delete mode 100644 scripts/windows_builder/build_27-64.bat delete mode 100644 scripts/windows_builder/build_34-32.bat delete mode 100644 scripts/windows_builder/build_34-64.bat delete mode 100644 scripts/windows_builder/check_and_build.bat delete mode 100644 scripts/windows_builder/check_and_build.py delete mode 100644 scripts/windows_builder/readme.txt create mode 100644 test_fast.bat delete mode 100755 test_perf.sh delete mode 100644 vb_suite/.gitignore delete mode 100644 vb_suite/attrs_caching.py delete mode 100644 vb_suite/binary_ops.py delete mode 100644 vb_suite/categoricals.py delete mode 100644 vb_suite/ctors.py delete mode 100644 vb_suite/eval.py delete mode 100644 vb_suite/frame_ctor.py delete mode 100644 vb_suite/frame_methods.py delete mode 100644 vb_suite/generate_rst_files.py delete mode 100644 vb_suite/gil.py delete mode 100644 vb_suite/groupby.py delete mode 100644 vb_suite/hdfstore_bench.py delete mode 100644 vb_suite/index_object.py delete mode 100644 vb_suite/indexing.py delete mode 100644 vb_suite/inference.py delete mode 100644 vb_suite/io_bench.py delete mode 100644 vb_suite/io_sql.py delete mode 100644 vb_suite/join_merge.py delete mode 100755 vb_suite/make.py delete mode 100755 vb_suite/measure_memory_consumption.py delete mode 100644 vb_suite/miscellaneous.py delete mode 100644 vb_suite/packers.py delete mode 100644 vb_suite/pandas_vb_common.py delete mode 100644 vb_suite/panel_ctor.py delete mode 100644 vb_suite/panel_methods.py delete mode 100644 vb_suite/parser_vb.py delete mode 100755 vb_suite/perf_HEAD.py delete mode 100644 vb_suite/plotting.py delete mode 100644 vb_suite/reindex.py delete mode 100644 vb_suite/replace.py delete mode 100644 vb_suite/reshape.py delete mode 100755 vb_suite/run_suite.py delete mode 100644 vb_suite/series_methods.py delete mode 100644 vb_suite/source/conf.py delete mode 100644 vb_suite/source/themes/agogo/layout.html delete mode 100644 vb_suite/source/themes/agogo/static/agogo.css_t delete mode 100644 vb_suite/source/themes/agogo/static/bgfooter.png delete mode 100644 vb_suite/source/themes/agogo/static/bgtop.png delete mode 100644 vb_suite/source/themes/agogo/theme.conf delete mode 100644 vb_suite/sparse.py delete mode 100644 vb_suite/stat_ops.py delete mode 100644 vb_suite/strings.py delete mode 100644 vb_suite/suite.py delete mode 100644 vb_suite/test.py delete mode 100755 vb_suite/test_perf.py delete mode 100644 vb_suite/timedelta.py delete mode 100644 vb_suite/timeseries.py diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000..a1fbece3284ec --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,63 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery +* Personal attacks +* Trolling or insulting/derogatory comments +* Public or private harassment +* Publishing other's private information, such as physical or electronic + addresses, without explicit permission +* Other unethical or unprofessional conduct + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +- Safia Abdalla +- Tom Augspurger +- Joris Van den Bossche +- Camille Scott +- Nathaniel Smith + +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[http://contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct + diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 7898822e0e11d..95729f845ff5c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,515 +1,24 @@ Contributing to pandas ====================== -Where to start? ---------------- - -All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. - -If you are simply looking to start working with the *pandas* codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out. - -Or maybe through using *pandas* you have an idea of you own or are looking for something in the documentation and thinking 'this can be improved'...you can do something about it! - -Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). - -Bug reports and enhancement requests ------------------------------------- - -Bug reports are an important part of making *pandas* more stable. Having a complete bug report will allow others to reproduce the bug and provide insight into fixing. Because many versions of *pandas* are supported, knowing version information will also identify improvements made since previous versions. Trying the bug-producing code out on the *master* branch is often a worthwhile exercise to confirm the bug still exists. It is also worth searching existing bug reports and pull requests to see if the issue has already been reported and/or fixed. - -Bug reports must: - -1. Include a short, self-contained Python snippet reproducing the problem. You can format the code nicely by using [GitHub Flavored Markdown](http://github.github.com/github-flavored-markdown/): - - ```python - >>> from pandas import DataFrame - >>> df = DataFrame(...) - ... - ``` - -2. Include the full version string of *pandas* and its dependencies. In versions of *pandas* after 0.12 you can use a built in function: - - >>> from pandas.util.print_versions import show_versions - >>> show_versions() - - and in *pandas* 0.13.1 onwards: - - >>> pd.show_versions() - -3. Explain why the current behavior is wrong/not desired and what you expect instead. - -The issue will then show up to the *pandas* community and be open to comments/ideas from others. - -Working with the code ---------------------- - -Now that you have an issue you want to fix, enhancement to add, or documentation to improve, you need to learn how to work with GitHub and the *pandas* code base. - -### Version control, Git, and GitHub - -To the new user, working with Git is one of the more daunting aspects of contributing to *pandas*. It can very quickly become overwhelming, but sticking to the guidelines below will help keep the process straightforward and mostly trouble free. As always, if you are having difficulties please feel free to ask for help. - -The code is hosted on [GitHub](https://www.github.com/pandas-dev/pandas). To contribute you will need to sign up for a [free GitHub account](https://github.com/signup/free). We use [Git](http://git-scm.com/) for version control to allow many people to work together on the project. - -Some great resources for learning Git: - -- the [GitHub help pages](http://help.github.com/). -- the [NumPy's documentation](http://docs.scipy.org/doc/numpy/dev/index.html). -- Matthew Brett's [Pydagogue](http://matthew-brett.github.com/pydagogue/). - -### Getting started with Git - -[GitHub has instructions](http://help.github.com/set-up-git-redirect) for installing git, setting up your SSH key, and configuring git. All these steps need to be completed before you can work seamlessly between your local repository and GitHub. - -### Forking - -You will need your own fork to work on the code. Go to the [pandas project page](https://github.com/pandas-dev/pandas) and hit the `Fork` button. You will want to clone your fork to your machine: - - git clone git@github.com:your-user-name/pandas.git pandas-yourname - cd pandas-yourname - git remote add upstream git://github.com/pandas-dev/pandas.git - -This creates the directory pandas-yourname and connects your repository to the upstream (main project) *pandas* repository. - -The testing suite will run automatically on Travis-CI once your pull request is submitted. However, if you wish to run the test suite on a branch prior to submitting the pull request, then Travis-CI needs to be hooked up to your GitHub repository. Instructions for doing so are [here](http://about.travis-ci.org/docs/user/getting-started/). - -### Creating a branch - -You want your master branch to reflect only production-ready code, so create a feature branch for making your changes. For example: - - git branch shiny-new-feature - git checkout shiny-new-feature - -The above can be simplified to: - - git checkout -b shiny-new-feature - -This changes your working directory to the shiny-new-feature branch. Keep any changes in this branch specific to one bug or feature so it is clear what the branch brings to *pandas*. You can have many shiny-new-features and switch in between them using the git checkout command. - -To update this branch, you need to retrieve the changes from the master branch: - - git fetch upstream - git rebase upstream/master - -This will replay your commits on top of the lastest pandas git master. If this leads to merge conflicts, you must resolve these before submitting your pull request. If you have uncommitted changes, you will need to `stash` them prior to updating. This will effectively store your changes and they can be reapplied after updating. - -### Creating a development environment - -An easy way to create a *pandas* development environment is as follows. - -- Install either Anaconda <install.anaconda> or miniconda <install.miniconda> -- Make sure that you have cloned the repository <contributing.forking> -- `cd` to the *pandas* source directory - -Tell conda to create a new environment, named `pandas_dev`, or any other name you would like for this environment, by running: - - conda create -n pandas_dev --file ci/requirements_dev.txt - -For a python 3 environment: - - conda create -n pandas_dev python=3 --file ci/requirements_dev.txt - -If you are on Windows, then you will also need to install the compiler linkages: - - conda install -n pandas_dev libpython - -This will create the new environment, and not touch any of your existing environments, nor any existing python installation. It will install all of the basic dependencies of *pandas*, as well as the development and testing tools. If you would like to install other dependencies, you can install them as follows: - - conda install -n pandas_dev -c pandas pytables scipy - -To install *all* pandas dependencies you can do the following: - - conda install -n pandas_dev -c pandas --file ci/requirements_all.txt - -To work in this environment, Windows users should `activate` it as follows: - - activate pandas_dev - -Mac OSX and Linux users should use: - - source activate pandas_dev - -You will then see a confirmation message to indicate you are in the new development environment. - -To view your environments: - - conda info -e - -To return to you home root environment: - - deactivate - -See the full conda docs [here](http://conda.pydata.org/docs). - -At this point you can easily do an *in-place* install, as detailed in the next section. - -### Making changes - -Before making your code changes, it is often necessary to build the code that was just checked out. There are two primary methods of doing this. - -1. The best way to develop *pandas* is to build the C extensions in-place by running: - - python setup.py build_ext --inplace - - If you startup the Python interpreter in the *pandas* source directory you will call the built C extensions - -2. Another very common option is to do a `develop` install of *pandas*: - - python setup.py develop - - This makes a symbolic link that tells the Python interpreter to import *pandas* from your development directory. Thus, you can always be using the development version on your system without being inside the clone directory. - -Contributing to the documentation ---------------------------------- - -If you're not the developer type, contributing to the documentation is still of huge value. You don't even have to be an expert on *pandas* to do so! Something as simple as rewriting small passages for clarity as you reference the docs is a simple but effective way to contribute. The next person to read that passage will be in your debt! - -In fact, there are sections of the docs that are worse off after being written by experts. If something in the docs doesn't make sense to you, updating the relevant section after you figure it out is a simple way to ensure it will help the next person. - -### About the *pandas* documentation - -The documentation is written in **reStructuredText**, which is almost like writing in plain English, and built using [Sphinx](http://sphinx.pocoo.org/). The Sphinx Documentation has an excellent [introduction to reST](http://sphinx.pocoo.org/rest.html). Review the Sphinx docs to perform more complex changes to the documentation as well. - -Some other important things to know about the docs: - -- The *pandas* documentation consists of two parts: the docstrings in the code itself and the docs in this folder `pandas/doc/`. - - The docstrings provide a clear explanation of the usage of the individual functions, while the documentation in this folder consists of tutorial-like overviews per topic together with some other information (what's new, installation, etc). - -- The docstrings follow the **Numpy Docstring Standard**, which is used widely in the Scientific Python community. This standard specifies the format of the different sections of the docstring. See [this document](https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt) for a detailed explanation, or look at some of the existing functions to extend it in a similar manner. -- The tutorials make heavy use of the [ipython directive](http://matplotlib.org/sampledoc/ipython_directive.html) sphinx extension. This directive lets you put code in the documentation which will be run during the doc build. For example: - - .. ipython:: python - - x = 2 - x**3 - - will be rendered as: - - In [1]: x = 2 - - In [2]: x**3 - Out[2]: 8 - - Almost all code examples in the docs are run (and the output saved) during the doc build. This approach means that code examples will always be up to date, but it does make the doc building a bit more complex. - -> **note** -> -> The `.rst` files are used to automatically generate Markdown and HTML versions of the docs. For this reason, please do not edit `CONTRIBUTING.md` directly, but instead make any changes to `doc/source/contributing.rst`. Then, to generate `CONTRIBUTING.md`, use [pandoc](http://johnmacfarlane.net/pandoc/) with the following command: -> -> pandoc doc/source/contributing.rst -t markdown_github > CONTRIBUTING.md - -The utility script `scripts/api_rst_coverage.py` can be used to compare the list of methods documented in `doc/source/api.rst` (which is used to generate the [API Reference](http://pandas.pydata.org/pandas-docs/stable/api.html) page) and the actual public methods. This will identify methods documented in in `doc/source/api.rst` that are not actually class methods, and existing methods that are not documented in `doc/source/api.rst`. - -### How to build the *pandas* documentation - -#### Requirements - -To build the *pandas* docs there are some extra requirements: you will need to have `sphinx` and `ipython` installed. [numpydoc](https://github.com/numpy/numpydoc) is used to parse the docstrings that follow the Numpy Docstring Standard (see above), but you don't need to install this because a local copy of numpydoc is included in the *pandas* source code. - -It is easiest to create a development environment <contributing.dev\_env>, then install: - - conda install -n pandas_dev sphinx ipython - -Furthermore, it is recommended to have all [optional dependencies](http://pandas.pydata.org/pandas-docs/dev/install.html#optional-dependencies) installed. This is not strictly necessary, but be aware that you will see some error messages when building the docs. This happens because all the code in the documentation is executed during the doc build, and so code examples using optional dependencies will generate errors. Run `pd.show_versions()` to get an overview of the installed version of all dependencies. - -> **warning** -> -> You need to have `sphinx` version 1.2.2 or newer, but older than version 1.3. Versions before 1.1.3 should also work. - -#### Building the documentation - -So how do you build the docs? Navigate to your local `pandas/doc/` directory in the console and run: - - python make.py html - -Then you can find the HTML output in the folder `pandas/doc/build/html/`. - -The first time you build the docs, it will take quite a while because it has to run all the code examples and build all the generated docstring pages. In subsequent evocations, sphinx will try to only build the pages that have been modified. - -If you want to do a full clean build, do: - - python make.py clean - python make.py build - -Starting with *pandas* 0.13.1 you can tell `make.py` to compile only a single section of the docs, greatly reducing the turn-around time for checking your changes. You will be prompted to delete `.rst` files that aren't required. This is okay because the prior versions of these files can be checked out from git. However, you must make sure not to commit the file deletions to your Git repository! - - #omit autosummary and API section - python make.py clean - python make.py --no-api - - # compile the docs with only a single - # section, that which is in indexing.rst - python make.py clean - python make.py --single indexing - -For comparison, a full documentation build may take 10 minutes, a `-no-api` build may take 3 minutes and a single section may take 15 seconds. Subsequent builds, which only process portions you have changed, will be faster. Open the following file in a web browser to see the full documentation you just built: - - pandas/docs/build/html/index.html +Whether you are a novice or experienced software developer, all contributions and suggestions are welcome! -And you'll have the satisfaction of seeing your new and improved documentation! +Our main contribution docs can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst), but if you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant places in the docs for further information. -#### Building master branch documentation - -When pull requests are merged into the *pandas* `master` branch, the main parts of the documentation are also built by Travis-CI. These docs are then hosted [here](http://pandas-docs.github.io/pandas-docs-travis). - -Contributing to the code base ------------------------------ - -### Code standards - -*pandas* uses the [PEP8](http://www.python.org/dev/peps/pep-0008/) standard. There are several tools to ensure you abide by this standard. - -We've written a tool to check that your commits are PEP8 great, [pip install pep8radius](https://github.com/hayd/pep8radius). Look at PEP8 fixes in your branch vs master with: - - pep8radius master --diff - -and make these changes with: - - pep8radius master --diff --in-place - -Alternatively, use the [flake8](http://pypi.python.org/pypi/flake8) tool for checking the style of your code. Additional standards are outlined on the [code style wiki page](https://github.com/pandas-dev/pandas/wiki/Code-Style-and-Conventions). - -Please try to maintain backward compatibility. *pandas* has lots of users with lots of existing code, so don't break it if at all possible. If you think breakage is required, clearly state why as part of the pull request. Also, be careful when changing method signatures and add deprecation warnings where needed. - -### Test-driven development/code writing - -*pandas* is serious about testing and strongly encourages contributors to embrace [test-driven development (TDD)](http://en.wikipedia.org/wiki/Test-driven_development). This development process "relies on the repetition of a very short development cycle: first the developer writes an (initially failing) automated test case that defines a desired improvement or new function, then produces the minimum amount of code to pass that test." So, before actually writing any code, you should write your tests. Often the test can be taken from the original GitHub issue. However, it is always worth considering additional use cases and writing corresponding tests. - -Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue. - -Like many packages, *pandas* uses the [Nose testing system](https://nose.readthedocs.io/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html). - -#### Writing tests - -All tests should go into the `tests` subdirectory of the specific package. This folder contains many current examples of tests, and we suggest looking to these for inspiration. If your test requires working with files or network connectivity, there is more information on the [testing page](https://github.com/pandas-dev/pandas/wiki/Testing) of the wiki. - -The `pandas.util.testing` module has many special `assert` functions that make it easier to make statements about whether Series or DataFrame objects are equivalent. The easiest way to verify that your code is correct is to explicitly construct the result you expect, then compare the actual result to the expected correct result: - - def test_pivot(self): - data = { - 'index' : ['A', 'B', 'C', 'C', 'B', 'A'], - 'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'], - 'values' : [1., 2., 3., 3., 2., 1.] - } - - frame = DataFrame(data) - pivoted = frame.pivot(index='index', columns='columns', values='values') - - expected = DataFrame({ - 'One' : {'A' : 1., 'B' : 2., 'C' : 3.}, - 'Two' : {'A' : 1., 'B' : 2., 'C' : 3.} - }) - - assert_frame_equal(pivoted, expected) - -#### Running the test suite - -The tests can then be run directly inside your Git clone (without having to install *pandas*) by typing: - - nosetests pandas - -The tests suite is exhaustive and takes around 20 minutes to run. Often it is worth running only a subset of tests first around your changes before running the entire suite. This is done using one of the following constructs: - - nosetests pandas/tests/[test-module].py - nosetests pandas/tests/[test-module].py:[TestClass] - nosetests pandas/tests/[test-module].py:[TestClass].[test_method] - -#### Running the performance test suite - -Performance matters and it is worth considering whether your code has introduced performance regressions. *pandas* is in the process of migrating to the [asv library](https://github.com/spacetelescope/asv) to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the `pandas/asv_bench` directory. asv supports both python2 and python3. - -> **note** -> -> The asv benchmark suite was translated from the previous framework, vbench, so many stylistic issues are likely a result of automated transformation of the code. - -To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](https://asv.readthedocs.io/en/latest/installing.html). - -To install asv: - - pip install git+https://github.com/spacetelescope/asv - -If you need to run a benchmark, change your directory to `/asv_bench/` and run the following if you have been developing on `master`: - - asv continuous master - -If you are working on another branch, either of the following can be used: - - asv continuous master HEAD - asv continuous master your_branch - -This will check out the master revision and run the suite on both master and your commit. Running the full test suite can take up to one hour and use up to 3GB of RAM. Usually it is sufficient to paste only a subset of the results into the pull request to show that the committed changes do not cause unexpected performance regressions. - -You can run specific benchmarks using the `-b` flag, which takes a regular expression. For example, this will only run tests from a `pandas/asv_bench/benchmarks/groupby.py` file: - - asv continuous master -b groupby - -If you want to only run a specific group of tests from a file, you can do it using `.` as a separator. For example: - - asv continuous master -b groupby.groupby_agg_builtins1 - -will only run a `groupby_agg_builtins1` test defined in a `groupby` file. - -It can also be useful to run tests in your current environment. You can simply do it by: - - asv dev - -This command is equivalent to: - - asv run --quick --show-stderr --python=same - -This will launch every test only once, display stderr from the benchmarks, and use your local `python` that comes from your `$PATH`. - -Information on how to write a benchmark can be found in the [asv documentation](https://asv.readthedocs.io/en/latest/writing_benchmarks.html). - -#### Running the vbench performance test suite (phasing out) - -Historically, *pandas* used [vbench library](https://github.com/pydata/vbench) to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the `pandas/vb_suite` directory. vbench currently only works on python2. - -To install vbench: - - pip install git+https://github.com/pydata/vbench - -Vbench also requires `sqlalchemy`, `gitpython`, and `psutil`, which can all be installed using pip. If you need to run a benchmark, change your directory to the *pandas* root and run: - - ./test_perf.sh -b master -t HEAD - -This will check out the master revision and run the suite on both master and your commit. Running the full test suite can take up to one hour and use up to 3GB of RAM. Usually it is sufficient to paste a subset of the results into the Pull Request to show that the committed changes do not cause unexpected performance regressions. - -You can run specific benchmarks using the `-r` flag, which takes a regular expression. - -See the [performance testing wiki](https://github.com/pandas-dev/pandas/wiki/Performance-Testing) for information on how to write a benchmark. - -### Documenting your code - -Changes should be reflected in the release notes located in `doc/source/whatsnew/vx.y.z.txt`. This file contains an ongoing change log for each release. Add an entry to this file to document your fix, enhancement or (unavoidable) breaking change. Make sure to include the GitHub issue number when adding your entry (using `` :issue:`1234` `` where 1234 is the issue/pull request number). - -If your code is an enhancement, it is most likely necessary to add usage examples to the existing documentation. This can be done following the section regarding documentation above <contributing.documentation>. Further, to let users know when this feature was added, the `versionadded` directive is used. The sphinx syntax for that is: - -``` sourceCode -.. versionadded:: 0.17.0 -``` - -This will put the text *New in version 0.17.0* wherever you put the sphinx directive. This should also be put in the docstring when adding a new function or method ([example](https://github.com/pandas-dev/pandas/blob/v0.16.2/pandas/core/generic.py#L1959)) or a new keyword argument ([example](https://github.com/pandas-dev/pandas/blob/v0.16.2/pandas/core/frame.py#L1171)). - -Contributing your changes to *pandas* -------------------------------------- - -### Committing your code - -Keep style fixes to a separate commit to make your pull request more readable. - -Once you've made changes, you can see them by typing: - - git status - -If you have created a new file, it is not being tracked by git. Add it by typing: - - git add path/to/file-to-be-added.py - -Doing 'git status' again should give something like: - - # On branch shiny-new-feature - # - # modified: /relative/path/to/file-you-added.py - # - -Finally, commit your changes to your local repository with an explanatory message. *Pandas* uses a convention for commit message prefixes and layout. Here are some common prefixes along with general guidelines for when to use them: - -> - ENH: Enhancement, new functionality -> - BUG: Bug fix -> - DOC: Additions/updates to documentation -> - TST: Additions/updates to tests -> - BLD: Updates to the build process/scripts -> - PERF: Performance improvement -> - CLN: Code cleanup - -The following defines how a commit message should be structured. Please reference the relevant GitHub issues in your commit message using GH1234 or \#1234. Either style is fine, but the former is generally preferred: - -> - a subject line with < 80 chars. -> - One blank line. -> - Optionally, a commit message body. - -Now you can commit your changes in your local repository: - - git commit -m - -### Combining commits - -If you have multiple commits, you may want to combine them into one commit, often referred to as "squashing" or "rebasing". This is a common request by package maintainers when submitting a pull request as it maintains a more compact commit history. To rebase your commits: - - git rebase -i HEAD~# - -Where \# is the number of commits you want to combine. Then you can pick the relevant commit message and discard others. - -To squash to the master branch do: - - git rebase -i master - -Use the `s` option on a commit to `squash`, meaning to keep the commit messages, or `f` to `fixup`, meaning to merge the commit messages. - -Then you will need to push the branch (see below) forcefully to replace the current commits with the new ones: - - git push origin shiny-new-feature -f - -### Pushing your changes - -When you want your changes to appear publicly on your GitHub page, push your forked feature branch's commits: - - git push origin shiny-new-feature - -Here `origin` is the default name given to your remote repository on GitHub. You can see the remote repositories: - - git remote -v - -If you added the upstream repository as described above you will see something like: - - origin git@github.com:yourname/pandas.git (fetch) - origin git@github.com:yourname/pandas.git (push) - upstream git://github.com/pandas-dev/pandas.git (fetch) - upstream git://github.com/pandas-dev/pandas.git (push) - -Now your code is on GitHub, but it is not yet a part of the *pandas* project. For that to happen, a pull request needs to be submitted on GitHub. - -### Review your code - -When you're ready to ask for a code review, file a pull request. Before you do, once again make sure that you have followed all the guidelines outlined in this document regarding code style, tests, performance tests, and documentation. You should also double check your branch changes against the branch it was based on: - -1. Navigate to your repository on GitHub -- -2. Click on `Branches` -3. Click on the `Compare` button for your feature branch -4. Select the `base` and `compare` branches, if necessary. This will be `master` and `shiny-new-feature`, respectively. - -### Finally, make the pull request - -If everything looks good, you are ready to make a pull request. A pull request is how code from a local repository becomes available to the GitHub community and can be looked at and eventually merged into the master version. This pull request and its associated changes will eventually be committed to the master branch and available in the next release. To submit a pull request: - -1. Navigate to your repository on GitHub -2. Click on the `Pull Request` button -3. You can then click on `Commits` and `Files Changed` to make sure everything looks okay one last time -4. Write a description of your changes in the `Preview Discussion` tab -5. Click `Send Pull Request`. - -This request then goes to the repository maintainers, and they will review the code. If you need to make more changes, you can make them in your branch, push them to GitHub, and the pull request will be automatically updated. Pushing them to GitHub again is done by: - - git push -f origin shiny-new-feature - -This will automatically update your pull request with the latest code and restart the Travis-CI tests. - -### Delete your merged branch (optional) - -Once your feature branch is accepted into upstream, you'll probably want to get rid of the branch. First, merge upstream master into your branch so git knows it is safe to delete your branch: - - git fetch upstream - git checkout master - git merge upstream/master +Getting Started +--------------- +If you are looking to contribute to the *pandas* codebase, the best place to start is the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues). This is also a great place for filing bug reports and making suggestions for ways in which we can improve the code and documentation. -Then you can just do: +If you have additional questions, feel free to ask them on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). Further information can also be found in our [Getting Started](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#where-to-start) section of our main contribution doc. - git branch -d shiny-new-feature +Filing Issues +------------- +If you notice a bug in the code or in docs or have suggestions for how we can improve either, feel free to create an issue on the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) using [GitHub's "issue" form](https://github.com/pandas-dev/pandas/issues/new). The form contains some questions that will help us best address your issue. For more information regarding how to file issues against *pandas*, please refer to the [Bug reports and enhancement requests](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#bug-reports-and-enhancement-requests) section of our main contribution doc. -Make sure you use a lower-case `-d`, or else git won't warn you if your feature branch has not actually been merged. +Contributing to the Codebase +---------------------------- +The code is hosted on [GitHub](https://www.github.com/pandas-dev/pandas), so you will need to use [Git](http://git-scm.com/) to clone the project and make changes to the codebase. Once you have obtained a copy of the code, you should create a development environment that is separate from your existing Python environment so that you can make and test changes without compromising your own work environment. For more information, please refer to our [Working with the code](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#working-with-the-code) section of our main contribution docs. -The branch will still exist on GitHub, so to delete it there do: +Before submitting your changes for review, make sure to check that your changes do not break any tests. You can find more information about our test suites can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#test-driven-development-code-writing). We also have guidelines regarding coding style that will be enforced during testing. Details about coding style can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#code-standards). - git push origin --delete shiny-new-feature +Once your changes are ready to be submitted, make sure to push your changes to GitHub before creating a pull request. Details about how to do that can be found in the [Contributing your changes to pandas](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#contributing-your-changes-to-pandas) section of our main contribution docs. We will review your changes, and you will most likely be asked to make additional changes before it is finally ready to merge. However, once it's ready, we will merge it, and you will have successfully contributed to the codebase! diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1f614b54b1f71..e33835c462511 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -8,11 +8,22 @@ [this should explain **why** the current behaviour is a problem and why the expected output is a better solution.] +**Note**: We receive a lot of issues on our GitHub tracker, so it is very possible that your issue has been posted before. Please check first before submitting so that we do not have to handle and close duplicates! + +**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. + +For documentation-related issues, you can check the latest versions of the docs on `master` here: + +https://pandas-docs.github.io/pandas-docs-travis/ + +If the issue has not been resolved there, go ahead and file it in the issue tracker. + #### Expected Output #### Output of ``pd.show_versions()``
-# Paste the output here pd.show_versions() here + +[paste the output of ``pd.show_versions()`` here below this line]
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 918d427ee4f4c..c1e02bd8eafc4 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,28 @@ - - [ ] closes #xxxx - - [ ] tests added / passed - - [ ] passes ``git diff upstream/master | flake8 --diff`` - - [ ] whatsnew entry +Checklist for the pandas documentation sprint (ignore this if you are doing +an unrelated PR): + +- [ ] PR title is "DOC: update the docstring" +- [ ] The validation script passes: `scripts/validate_docstrings.py ` +- [ ] The PEP8 style check passes: `git diff upstream/master -u -- "*.py" | flake8 --diff` +- [ ] The html version looks good: `python doc/make.py --single ` +- [ ] It has been proofread on language by another sprint participant + +Please include the output of the validation script below between the "```" ticks: + +``` +# paste output of "scripts/validate_docstrings.py " here +# between the "```" (remove this comment, but keep the "```") + +``` + +If the validation script still gives errors, but you think there is a good reason +to deviate in this case (and there are certainly such cases), please state this +explicitly. + + +Checklist for other PRs (remove this part if you are doing a PR for the pandas documentation sprint): + +- [ ] closes #xxxx +- [ ] tests added / passed +- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff` +- [ ] whatsnew entry diff --git a/.gitignore b/.gitignore index a509fcf736ea8..00dac6e336c37 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ *$ *.bak *flymake* +*.iml *.kdev4 *.log *.swp @@ -20,6 +21,7 @@ .ipynb_checkpoints .tags .cache/ +.vscode/ # Compiled source # ################### @@ -86,8 +88,9 @@ scikits *.c *.cpp -# Performance Testing # -####################### +# Unit / Performance Testing # +############################## +.pytest_cache/ asv_bench/env/ asv_bench/html/ asv_bench/results/ @@ -103,3 +106,6 @@ doc/source/index.rst doc/build/html/index.html # Windows specific leftover: doc/tmp.sv +doc/source/styled.xlsx +doc/source/templates/ +env/ diff --git a/.pep8speaks.yml b/.pep8speaks.yml new file mode 100644 index 0000000000000..fda26d87bf7f6 --- /dev/null +++ b/.pep8speaks.yml @@ -0,0 +1,12 @@ +# File : .pep8speaks.yml + +scanner: + diff_only: True # If True, errors caused by only the patch are shown + +pycodestyle: + max-line-length: 79 + ignore: # Errors and warnings to ignore + - E402, # module level import not at top of file + - E731, # do not assign a lambda expression, use a def + - E741, # do not use variables named 'l', 'O', or 'I' + - W503 # line break before binary operator diff --git a/.travis.yml b/.travis.yml index 2ff5d508d0371..22ef6c819c6d4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,22 +1,25 @@ sudo: false language: python +# Default Python version is usually 2.7 +python: 3.5 -# To turn off cached miniconda, cython files and compiler cache comment out the -# USE_CACHE=true line for the build in the matrix below. To delete caches go to -# https://travis-ci.org/OWNER/REPOSITORY/caches or run +# To turn off cached cython files and compiler cache +# set NOCACHE-true +# To delete caches go to https://travis-ci.org/OWNER/REPOSITORY/caches or run # travis cache --delete inside the project directory from the travis command line client -# The cash directories will be deleted if anything in ci/ changes in a commit +# The cache directories will be deleted if anything in ci/ changes in a commit cache: + ccache: true directories: - - $HOME/miniconda # miniconda cache - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache env: global: - - # pandas-docs-travis GH - - secure: "YvvTc+FrSYHgdxqoxn9s8VOaCWjvZzlkaf6k55kkmQqCYR9dPiLMsot1F96/N7o3YlD1s0znPQCak93Du8HHi/8809zAXloTaMSZrWz4R4qn96xlZFRE88O/w/Z1t3VVYpKX3MHlCggBc8MtXrqmvWKJMAqXyysZ4TTzoiJDPvE=" + # create a github personal access token + # cd pandas-dev/pandas + # travis encrypt 'PANDAS_GH_TOKEN=personal_access_token' -r pandas-dev/pandas + - secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA=" git: # for cloning @@ -24,312 +27,108 @@ git: matrix: fast_finish: true + exclude: + # Exclude the default Python 3.5 build + - python: 3.5 include: - - language: objective-c - os: osx - compiler: clang - osx_image: xcode6.4 + - os: osx + language: generic env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_osx" - - TEST_ARGS="--skip-slow --skip-network" - - BUILD_TYPE=conda - - JOB_TAG=_OSX - - TRAVIS_PYTHON_VERSION=3.5 - - CACHE_NAME="35_osx" - - USE_CACHE=true - - python: 2.7 + - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" + - dist: trusty env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow_nnet_LOCALE" - - TEST_ARGS="--only-slow --skip-network" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - FULL_DEPS=true - - JOB_TAG=_LOCALE - - CACHE_NAME="27_slow_nnet_LOCALE" - - USE_CACHE=true + - JOB="2.7_LOCALE" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true addons: apt: packages: - language-pack-zh-hans - - python: 2.7 + - dist: trusty env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow" - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - CLIPBOARD_GUI=gtk2 - - LINT=true - - CACHE_NAME="27_nslow" - - USE_CACHE=true + - JOB="2.7" TEST_ARGS="--skip-slow" LINT=true addons: apt: packages: - python-gtk2 - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_nslow" - - TEST_ARGS="--skip-slow --skip-network" - - FULL_DEPS=true - - CLIPBOARD=xsel - - COVERAGE=true - - CACHE_NAME="35_nslow" -# - USE_CACHE=true # Don't use cache for 35_nslow - addons: - apt: - packages: - - xsel - - python: 3.6 - env: - - PYTHON_VERSION=3.6 - - JOB_NAME: "36" - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - addons: - apt: - packages: - - libatlas-base-dev - - gfortran -# In allow_failures - - python: 2.7 + # In allow_failures + - dist: trusty env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow_nnet_COMPAT" - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="it_IT.UTF-8" - - INSTALL_TEST=true - - JOB_TAG=_COMPAT - - CACHE_NAME="27_nslow_nnet_COMPAT" - - USE_CACHE=true - addons: - apt: - packages: - - language-pack-it -# In allow_failures - - python: 2.7 + - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true COVERAGE=true + # In allow_failures + - dist: trusty env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CACHE_NAME="27_slow" - - USE_CACHE=true -# In allow_failures - - python: 2.7 + - JOB="2.7_SLOW" SLOW=true + # In allow_failures + - dist: trusty env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test_conda" - - JOB_TAG=_BUILD_TEST - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - BUILD_TEST=true - - CACHE_NAME="27_build_test_conda" - - USE_CACHE=true -# In allow_failures - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_nslow" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_nslow" - - USE_CACHE=true + - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" addons: apt: packages: - xsel - - language-pack-zh-hans -# In allow_failures - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_slow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel -# In allow_failures - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_numpy_dev" - - JOB_TAG=_NUMPY_DEV - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - CACHE_NAME="35_numpy_dev" - - USE_CACHE=true - addons: - apt: - packages: - - libatlas-base-dev - - gfortran -# In allow_failures - - python: 3.5 + # In allow_failures + - dist: trusty env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_ascii" - - JOB_TAG=_ASCII - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="C" - - CACHE_NAME="35_ascii" - - USE_CACHE=true -# In allow_failures - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "doc_build" - - FULL_DEPS=true - - DOC_BUILD=true - - JOB_TAG=_DOC_BUILD - - CACHE_NAME="doc_build" - - USE_CACHE=true + - JOB="3.6_DOC" DOC=true allow_failures: - - python: 2.7 + - dist: trusty env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CACHE_NAME="27_slow" - - USE_CACHE=true - - python: 3.4 + - JOB="2.7_SLOW" SLOW=true + - dist: trusty env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_slow" - - USE_CACHE=true + - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" addons: apt: packages: - xsel - - python: 2.7 - env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test_conda" - - JOB_TAG=_BUILD_TEST - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - BUILD_TEST=true - - CACHE_NAME="27_build_test_conda" - - USE_CACHE=true - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_nslow" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_nslow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel - - language-pack-zh-hans - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_numpy_dev" - - JOB_TAG=_NUMPY_DEV - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - CACHE_NAME="35_numpy_dev" - - USE_CACHE=true - addons: - apt: - packages: - - libatlas-base-dev - - gfortran - - python: 2.7 - env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow_nnet_COMPAT" - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="it_IT.UTF-8" - - INSTALL_TEST=true - - JOB_TAG=_COMPAT - - CACHE_NAME="27_nslow_nnet_COMPAT" - - USE_CACHE=true - addons: - apt: - packages: - - language-pack-it - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_ascii" - - JOB_TAG=_ASCII - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="C" - - CACHE_NAME="35_ascii" - - USE_CACHE=true - - python: 3.5 + - dist: trusty env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "doc_build" - - FULL_DEPS=true - - DOC_BUILD=true - - JOB_TAG=_DOC_BUILD - - CACHE_NAME="doc_build" - - USE_CACHE=true + - JOB="3.6_DOC" DOC=true before_install: - echo "before_install" + # set non-blocking IO on travis + # https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 + - python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);' - source ci/travis_process_gbq_encryption.sh - - echo $VIRTUAL_ENV - export PATH="$HOME/miniconda3/bin:$PATH" - df -h - - date - pwd - uname -a - - python -V -# git info & get tags - git --version - git tag - - ci/before_install_travis.sh - - export DISPLAY=:99.0 install: - echo "install start" - - ci/check_cache.sh - ci/prep_cython_cache.sh - ci/install_travis.sh - ci/submit_cython_cache.sh - echo "install done" before_script: - - source activate pandas && pip install codecov - - ci/install_db.sh + - ci/install_db_travis.sh + - export DISPLAY=":99.0" + - ci/before_script_travis.sh script: - echo "script start" - ci/run_build_docs.sh - - ci/script.sh + - ci/script_single.sh + - ci/script_multi.sh - ci/lint.sh + - echo "checking imports" + - source activate pandas && python ci/check_imports.py - echo "script done" after_success: - - source activate pandas && codecov + - ci/upload_coverage.sh after_script: - echo "after_script start" - - ci/install_test.sh - - source activate pandas && python -c "import pandas; pandas.show_versions();" - - ci/print_skipped.py /tmp/pytest.xml + - source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd + - if [ -e /tmp/single.xml ]; then + ci/print_skipped.py /tmp/single.xml; + fi + - if [ -e /tmp/multiple.xml ]; then + ci/print_skipped.py /tmp/multiple.xml; + fi - echo "after_script done" diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000000000..dcaaea101f4c8 --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,57 @@ +About the Copyright Holders +=========================== + +* Copyright (c) 2008-2011 AQR Capital Management, LLC + + AQR Capital Management began pandas development in 2008. Development was + led by Wes McKinney. AQR released the source under this license in 2009. +* Copyright (c) 2011-2012, Lambda Foundry, Inc. + + Wes is now an employee of Lambda Foundry, and remains the pandas project + lead. +* Copyright (c) 2011-2012, PyData Development Team + + The PyData Development Team is the collection of developers of the PyData + project. This includes all of the PyData sub-projects, including pandas. The + core team that coordinates development on GitHub can be found here: + http://github.com/pydata. + +Full credits for pandas contributors can be found in the documentation. + +Our Copyright Policy +==================== + +PyData uses a shared copyright model. Each contributor maintains copyright +over their contributions to PyData. However, it is important to note that +these contributions are typically only changes to the repositories. Thus, +the PyData source code, in its entirety, is not the copyright of any single +person or institution. Instead, it is the collective copyright of the +entire PyData Development Team. If individual contributors want to maintain +a record of what changes/contributions they have specific copyright on, +they should indicate their copyright in the commit message of the change +when they commit the change to one of the PyData repositories. + +With this in mind, the following banner should be used in any source code +file to indicate the copyright and license terms: + +``` +#----------------------------------------------------------------------------- +# Copyright (c) 2012, PyData Development Team +# All rights reserved. +# +# Distributed under the terms of the BSD Simplified License. +# +# The full license is in the LICENSE file, distributed with this software. +#----------------------------------------------------------------------------- +``` + +Other licenses can be found in the LICENSES directory. + +License +======= + +pandas is distributed under a 3-clause ("Simplified" or "New") BSD +license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have +BSD-compatible licenses, are included. Their licenses follow the pandas +license. + diff --git a/LICENSE b/LICENSE index c9b8834e8774b..924de26253bf4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,87 +1,29 @@ -======= -License -======= +BSD 3-Clause License -pandas is distributed under a 3-clause ("Simplified" or "New") BSD -license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have -BSD-compatible licenses, are included. Their licenses follow the pandas -license. - -pandas license -============== - -Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team -All rights reserved. - -Copyright (c) 2008-2011 AQR Capital Management, LLC +Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team All rights reserved. Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the copyright holder nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -About the Copyright Holders -=========================== - -AQR Capital Management began pandas development in 2008. Development was -led by Wes McKinney. AQR released the source under this license in 2009. -Wes is now an employee of Lambda Foundry, and remains the pandas project -lead. - -The PyData Development Team is the collection of developers of the PyData -project. This includes all of the PyData sub-projects, including pandas. The -core team that coordinates development on GitHub can be found here: -http://github.com/pydata. - -Full credits for pandas contributors can be found in the documentation. - -Our Copyright Policy -==================== - -PyData uses a shared copyright model. Each contributor maintains copyright -over their contributions to PyData. However, it is important to note that -these contributions are typically only changes to the repositories. Thus, -the PyData source code, in its entirety, is not the copyright of any single -person or institution. Instead, it is the collective copyright of the -entire PyData Development Team. If individual contributors want to maintain -a record of what changes/contributions they have specific copyright on, -they should indicate their copyright in the commit message of the change -when they commit the change to one of the PyData repositories. - -With this in mind, the following banner should be used in any source code -file to indicate the copyright and license terms: - -#----------------------------------------------------------------------------- -# Copyright (c) 2012, PyData Development Team -# All rights reserved. -# -# Distributed under the terms of the BSD Simplified License. -# -# The full license is in the LICENSE file, distributed with this software. -#----------------------------------------------------------------------------- - -Other licenses can be found in the LICENSES directory. \ No newline at end of file diff --git a/LICENSES/XARRAY_LICENSE b/LICENSES/XARRAY_LICENSE new file mode 100644 index 0000000000000..37ec93a14fdcd --- /dev/null +++ b/LICENSES/XARRAY_LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in index 2d26fbfd6adaf..9773019c6e6e0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,13 +1,13 @@ include MANIFEST.in include LICENSE include RELEASE.md -include README.rst +include README.md include setup.py +include pyproject.toml graft doc prune doc/build -graft examples graft pandas global-exclude *.so @@ -26,3 +26,4 @@ global-exclude *.png # recursive-include LICENSES * include versioneer.py include pandas/_version.py +include pandas/io/formats/templates/*.tpl diff --git a/Makefile b/Makefile index 9a768932b8bea..c79175cd3c401 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -tseries: pandas/lib.pyx pandas/tslib.pyx pandas/hashtable.pyx +tseries: pandas/_libs/lib.pyx pandas/_libs/tslib.pyx pandas/_libs/hashtable.pyx python setup.py build_ext --inplace .PHONY : develop build clean clean_pyc tseries doc @@ -9,12 +9,12 @@ clean: clean_pyc: -find . -name '*.py[co]' -exec rm {} \; -sparse: pandas/src/sparse.pyx - python setup.py build_ext --inplace - build: clean_pyc python setup.py build_ext --inplace +lint-diff: + git diff master --name-only -- "*.py" | grep "pandas" | xargs flake8 + develop: build -python setup.py develop diff --git a/README.md b/README.md index 4293d7294d5e0..86cf95508a5d9 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@
latest release
latest releaselatest release
Package Status
+ + circleci build status + +
- - appveyor build status + + appveyor build status
Conda - - conda downloads + + conda default downloads + +
Conda-forge + + conda-forge downloads
`` elements are used to form the column -index); if specified, the header row is taken from the data minus the parsed -header elements (```` elements). +Specify a header row (by default ```` or ```` elements located within a +``
`` elements). .. code-block:: python dfs = pd.read_html(url, header=0) -Specify an index column +Specify an index column: .. code-block:: python dfs = pd.read_html(url, index_col=0) -Specify a number of rows to skip +Specify a number of rows to skip: .. code-block:: python dfs = pd.read_html(url, skiprows=0) Specify a number of rows to skip using a list (``xrange`` (Python 2 only) works -as well) +as well): .. code-block:: python dfs = pd.read_html(url, skiprows=range(2)) -Specify an HTML attribute +Specify an HTML attribute: .. code-block:: python @@ -2146,7 +2389,7 @@ Specify an HTML attribute dfs2 = pd.read_html(url, attrs={'class': 'sortable'}) print(np.array_equal(dfs1[0], dfs2[0])) # Should be True -Specify values that should be converted to NaN +Specify values that should be converted to NaN: .. code-block:: python @@ -2154,7 +2397,7 @@ Specify values that should be converted to NaN .. versionadded:: 0.19 -Specify whether to keep the default set of NaN values +Specify whether to keep the default set of NaN values: .. code-block:: python @@ -2164,7 +2407,7 @@ Specify whether to keep the default set of NaN values Specify converters for columns. This is useful for numerical text data that has leading zeros. By default columns that are numerical are cast to numeric -types and the leading zeros are lost. To avoid this, we can convert these +types and the leading zeros are lost. To avoid this, we can convert these columns to strings. .. code-block:: python @@ -2175,13 +2418,13 @@ columns to strings. .. versionadded:: 0.19 -Use some combination of the above +Use some combination of the above: .. code-block:: python dfs = pd.read_html(url, match='Metcalf Bank', index_col=0) -Read in pandas ``to_html`` output (with some loss of floating point precision) +Read in pandas ``to_html`` output (with some loss of floating point precision): .. code-block:: python @@ -2190,15 +2433,15 @@ Read in pandas ``to_html`` output (with some loss of floating point precision) dfin = pd.read_html(s, index_col=0) The ``lxml`` backend will raise an error on a failed parse if that is the only -parser you provide (if you only have a single parser you can provide just a +parser you provide. If you only have a single parser you can provide just a string, but it is considered good practice to pass a list with one string if, -for example, the function expects a sequence of strings) +for example, the function expects a sequence of strings. You may use: .. code-block:: python dfs = pd.read_html(url, 'Metcalf Bank', index_col=0, flavor=['lxml']) -or +Or you could pass ``flavor='lxml'`` without a list: .. code-block:: python @@ -2252,7 +2495,7 @@ HTML: .. raw:: html :file: _static/basic.html -The ``columns`` argument will limit the columns shown +The ``columns`` argument will limit the columns shown: .. ipython:: python @@ -2269,7 +2512,7 @@ HTML: :file: _static/columns.html ``float_format`` takes a Python callable to control the precision of floating -point values +point values: .. ipython:: python @@ -2286,7 +2529,7 @@ HTML: :file: _static/float_format.html ``bold_rows`` will make the row labels bold by default, but you can turn that -off +off: .. ipython:: python @@ -2359,7 +2602,7 @@ parse HTML tables in the top-level pandas io function ``read_html``. * Benefits - * |lxml|_ is very fast + * |lxml|_ is very fast. * |lxml|_ requires Cython to install correctly. @@ -2432,8 +2675,8 @@ The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``) and Excel 2007+ (``.xlsx``) files using the ``xlrd`` Python module. The :meth:`~DataFrame.to_excel` instance method is used for saving a ``DataFrame`` to Excel. Generally the semantics are -similar to working with :ref:`csv` data. See the :ref:`cookbook` for some -advanced strategies +similar to working with :ref:`csv` data. +See the :ref:`cookbook` for some advanced strategies. .. _io.excel_reader: @@ -2441,12 +2684,12 @@ Reading Excel Files ''''''''''''''''''' In the most basic use-case, ``read_excel`` takes a path to an Excel -file, and the ``sheetname`` indicating which sheet to parse. +file, and the ``sheet_name`` indicating which sheet to parse. .. code-block:: python # Returns a DataFrame - read_excel('path_to_file.xls', sheetname='Sheet1') + read_excel('path_to_file.xls', sheet_name='Sheet1') .. _io.excel.excelfile_class: @@ -2455,7 +2698,7 @@ file, and the ``sheetname`` indicating which sheet to parse. +++++++++++++++++++ To facilitate working with multiple sheets from the same file, the ``ExcelFile`` -class can be used to wrap the file and can be be passed into ``read_excel`` +class can be used to wrap the file and can be passed into ``read_excel`` There will be a performance benefit for reading multiple sheets as the file is read into memory only once. @@ -2476,7 +2719,7 @@ The ``sheet_names`` property will generate a list of the sheet names in the file. The primary use-case for an ``ExcelFile`` is parsing multiple sheets with -different parameters +different parameters: .. code-block:: python @@ -2500,26 +2743,17 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc # equivalent using the read_excel function data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], index_col=None, na_values=['NA']) -.. versionadded:: 0.12 - -``ExcelFile`` has been moved to the top level namespace. - -.. versionadded:: 0.17 - -``read_excel`` can take an ``ExcelFile`` object as input - - .. _io.excel.specifying_sheets: Specifying Sheets +++++++++++++++++ -.. note :: The second argument is ``sheetname``, not to be confused with ``ExcelFile.sheet_names`` +.. note :: The second argument is ``sheet_name``, not to be confused with ``ExcelFile.sheet_names``. .. note :: An ExcelFile's attribute ``sheet_names`` provides access to a list of sheets. -- The arguments ``sheetname`` allows specifying the sheet or sheets to read. -- The default value for ``sheetname`` is 0, indicating to read the first sheet +- The arguments ``sheet_name`` allows specifying the sheet or sheets to read. +- The default value for ``sheet_name`` is 0, indicating to read the first sheet - Pass a string to refer to the name of a particular sheet in the workbook. - Pass an integer to refer to the index of a sheet. Indices follow Python convention, beginning at 0. @@ -2550,22 +2784,17 @@ Using None to get all sheets: .. code-block:: python # Returns a dictionary of DataFrames - read_excel('path_to_file.xls',sheetname=None) + read_excel('path_to_file.xls',sheet_name=None) Using a list to get multiple sheets: .. code-block:: python # Returns the 1st and 4th sheet, as a dictionary of DataFrames. - read_excel('path_to_file.xls',sheetname=['Sheet1',3]) + read_excel('path_to_file.xls',sheet_name=['Sheet1',3]) -.. versionadded:: 0.16 - -``read_excel`` can read more than one sheet, by setting ``sheetname`` to either +``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. - -.. versionadded:: 0.13 - Sheets can be specified by sheet index or sheet name, using an integer or string, respectively. @@ -2574,8 +2803,6 @@ respectively. Reading a ``MultiIndex`` ++++++++++++++++++++++++ -.. versionadded:: 0.17 - ``read_excel`` can read a ``MultiIndex`` index, by passing a list of columns to ``index_col`` and a ``MultiIndex`` column by passing a list of rows to ``header``. If either the ``index`` or ``columns`` have serialized level names those will be read in as well by specifying @@ -2598,12 +2825,12 @@ parameters. df.index = df.index.set_names(['lvl1', 'lvl2']) df.to_excel('path_to_file.xlsx') - df = pd.read_excel('path_to_file.xlsx', index_col=[0,1]) + df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1]) df If the source file has both ``MultiIndex`` index and columns, lists specifying each -should be passed to ``index_col`` and ``header`` +should be passed to ``index_col`` and ``header``: .. ipython:: python @@ -2619,37 +2846,47 @@ should be passed to ``index_col`` and ``header`` import os os.remove('path_to_file.xlsx') -.. warning:: - - Excel files saved in version 0.16.2 or prior that had index names will still able to be read in, - but the ``has_index_names`` argument must specified to ``True``. - Parsing Specific Columns ++++++++++++++++++++++++ It is often the case that users will insert columns to do temporary computations -in Excel and you may not want to read in those columns. `read_excel` takes -a `parse_cols` keyword to allow you to specify a subset of columns to parse. +in Excel and you may not want to read in those columns. ``read_excel`` takes +a ``usecols`` keyword to allow you to specify a subset of columns to parse. -If `parse_cols` is an integer, then it is assumed to indicate the last column +If ``usecols`` is an integer, then it is assumed to indicate the last column to be parsed. .. code-block:: python - read_excel('path_to_file.xls', 'Sheet1', parse_cols=2) + read_excel('path_to_file.xls', 'Sheet1', usecols=2) -If `parse_cols` is a list of integers, then it is assumed to be the file column +If `usecols` is a list of integers, then it is assumed to be the file column indices to be parsed. .. code-block:: python - read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3]) + read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3]) + +Element order is ignored, so ``usecols=[0,1]`` is the same as ``[1,0]``. + +Parsing Dates ++++++++++++++ + +Datetime-like values are normally automatically converted to the appropriate +dtype when reading the excel file. But if you have a column of strings that +*look* like dates (but are not actually formatted as dates in excel), you can +use the ``parse_dates`` keyword to parse those strings to datetimes: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings']) + Cell Converters +++++++++++++++ -It is possible to transform the contents of Excel cells via the `converters` +It is possible to transform the contents of Excel cells via the ``converters`` option. For instance, to convert a column to boolean: .. code-block:: python @@ -2690,11 +2927,11 @@ Writing Excel Files Writing Excel Files to Disk +++++++++++++++++++++++++++ -To write a DataFrame object to a sheet of an Excel file, you can use the +To write a ``DataFrame`` object to a sheet of an Excel file, you can use the ``to_excel`` instance method. The arguments are largely the same as ``to_csv`` described above, the first argument being the name of the excel file, and the -optional second argument the name of the sheet to which the DataFrame should be -written. For example: +optional second argument the name of the sheet to which the ``DataFrame`` should be +written. For example: .. code-block:: python @@ -2704,19 +2941,16 @@ Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or ``openpyxl``. -The DataFrame will be written in a way that tries to mimic the REPL output. One -difference from 0.12.0 is that the ``index_label`` will be placed in the second -row instead of the first. You can get the previous behaviour by setting the +The ``DataFrame`` will be written in a way that tries to mimic the REPL output. +The ``index_label`` will be placed in the second +row instead of the first. You can place it in the first row by setting the ``merge_cells`` option in ``to_excel()`` to ``False``: .. code-block:: python df.to_excel('path_to_file.xlsx', index_label='label', merge_cells=False) -The Panel class also has a ``to_excel`` instance method, -which writes each DataFrame in the Panel to a separate sheet. - -In order to write separate DataFrames to separate sheets in a single Excel file, +In order to write separate ``DataFrames`` to separate sheets in a single Excel file, one can pass an :class:`~pandas.io.excel.ExcelWriter`. .. code-block:: python @@ -2739,15 +2973,9 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`. Writing Excel Files to Memory +++++++++++++++++++++++++++++ -.. versionadded:: 0.17 - Pandas supports writing Excel files to buffer-like objects such as ``StringIO`` or ``BytesIO`` using :class:`~pandas.io.excel.ExcelWriter`. -.. versionadded:: 0.17 - -Added support for Openpyxl >= 2.2 - .. code-block:: python # Safe import for either Python 2.x or 3.x @@ -2777,20 +3005,19 @@ Added support for Openpyxl >= 2.2 ``'xlsxwriter'`` will produce an Excel 2007-format workbook (xlsx). If omitted, an Excel 2007-formatted workbook is produced. + .. _io.excel.writers: Excel writer engines '''''''''''''''''''' -.. versionadded:: 0.13 - -``pandas`` chooses an Excel writer via two methods: +Pandas chooses an Excel writer via two methods: 1. the ``engine`` keyword argument 2. the filename extension (via the default specified in config options) -By default, ``pandas`` uses the `XlsxWriter`_ for ``.xlsx`` and `openpyxl`_ -for ``.xlsm`` files and `xlwt`_ for ``.xls`` files. If you have multiple +By default, pandas uses the `XlsxWriter`_ for ``.xlsx``, `openpyxl`_ +for ``.xlsm``, and `xlwt`_ for ``.xls`` files. If you have multiple engines installed, you can set the default engine through :ref:`setting the config options ` ``io.excel.xlsx.writer`` and ``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx`` @@ -2803,9 +3030,7 @@ files if `Xlsxwriter`_ is not available. To specify which writer you want to use, you can pass an engine keyword argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: -- ``openpyxl``: This includes stable support for Openpyxl from 1.6.1. However, - it is advised to use version 2.2 and higher, especially when working with - styles. +- ``openpyxl``: version 2.4 or higher is required - ``xlsxwriter`` - ``xlwt`` @@ -2823,15 +3048,27 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') +.. _io.excel.style: + +Style and Formatting +'''''''''''''''''''' + +The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method. + +- ``float_format`` : Format string for floating point numbers (default ``None``). +- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default ``None``). + + + .. _io.clipboard: Clipboard --------- -A handy way to grab data is to use the ``read_clipboard`` method, which takes -the contents of the clipboard buffer and passes them to the ``read_table`` -method. For instance, you can copy the following -text to the clipboard (CTRL-C on many operating systems): +A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method, +which takes the contents of the clipboard buffer and passes them to the +``read_table`` method. For instance, you can copy the following text to the +clipboard (CTRL-C on many operating systems): .. code-block:: python @@ -2840,7 +3077,7 @@ text to the clipboard (CTRL-C on many operating systems): y 2 5 q z 3 6 r -And then import the data directly to a DataFrame by calling: +And then import the data directly to a ``DataFrame`` by calling: .. code-block:: python @@ -2850,10 +3087,11 @@ And then import the data directly to a DataFrame by calling: clipdf -The ``to_clipboard`` method can be used to write the contents of a DataFrame to + +The ``to_clipboard`` method can be used to write the contents of a ``DataFrame`` to the clipboard. Following which you can paste the clipboard contents into other applications (CTRL-V on many operating systems). Here we illustrate writing a -DataFrame into clipboard and reading it back. +``DataFrame`` into clipboard and reading it back. .. ipython:: python @@ -2866,7 +3104,7 @@ We can see that we got the same content back, which we had earlier written to th .. note:: - You may need to install xclip or xsel (with gtk or PyQt4 modules) on Linux to use these methods. + You may need to install xclip or xsel (with gtk, PyQt5, PyQt4 or qtpy) on Linux to use these methods. .. _io.pickle: @@ -2899,28 +3137,88 @@ any pickled pandas object (or any other pickled object) from file: Loading pickled data received from untrusted sources can be unsafe. - See: http://docs.python.org/2.7/library/pickle.html + See: https://docs.python.org/3/library/pickle.html .. warning:: - Several internal refactorings, 0.13 (:ref:`Series Refactoring `), and 0.15 (:ref:`Index Refactoring `), - preserve compatibility with pickles created prior to these versions. However, these must - be read with ``pd.read_pickle``, rather than the default python ``pickle.load``. - See `this question `__ + Several internal refactorings have been done while still preserving + compatibility with pickles created with older versions of pandas. However, + for such cases, pickled ``DataFrames``, ``Series`` etc, must be read with + ``pd.read_pickle``, rather than ``pickle.load``. + + See `here `__ + and `here `__ + for some examples of compatibility-breaking changes. See + `this question `__ for a detailed explanation. -.. note:: +.. _io.pickle.compression: - These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated. +Compressed pickle files +''''''''''''''''''''''' + +.. versionadded:: 0.20.0 + +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read +and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing. +The ``zip`` file format only supports reading and must contain only one data file +to be read. + +The compression type can be an explicit parameter or be inferred from the file extension. +If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or +``'.xz'``, respectively. + +.. ipython:: python + + df = pd.DataFrame({ + 'A': np.random.randn(1000), + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) + df + +Using an explicit compression type: + +.. ipython:: python + + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt + +Inferring compression type from the extension: + +.. ipython:: python + + df.to_pickle("data.pkl.xz", compression="infer") + rt = pd.read_pickle("data.pkl.xz", compression="infer") + rt + +The default is to 'infer': + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt + + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt + +.. ipython:: python + :suppress: + + import os + os.remove("data.pkl.compress") + os.remove("data.pkl.xz") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") .. _io.msgpack: msgpack ------- -.. versionadded:: 0.13.0 - -Starting in 0.13.0, pandas is supporting the ``msgpack`` format for +pandas supports the ``msgpack`` format for object serialization. This is a lightweight portable binary format, similar to binary JSON, that is highly space efficient, and provides good performance both on the writing (serialization), and reading (deserialization). @@ -2931,25 +3229,6 @@ both on the writing (serialization), and reading (deserialization). optimizations in the io of the ``msgpack`` data. Since this is marked as an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. - As a result of writing format changes and other issues: - - +----------------------+------------------------+ - | Packed with | Can be unpacked with | - +======================+========================+ - | pre-0.17 / Python 2 | any | - +----------------------+------------------------+ - | pre-0.17 / Python 3 | any | - +----------------------+------------------------+ - | 0.17 / Python 2 | - 0.17 / Python 2 | - | | - >=0.18 / any Python | - +----------------------+------------------------+ - | 0.17 / Python 3 | >=0.18 / any Python | - +----------------------+------------------------+ - | 0.18 | >= 0.18 | - +----------------------+------------------------+ - - Reading (files packed by older versions) is backward-compatibile, except for files packed with 0.17 in Python 2, in which case only they can only be unpacked in Python 2. - .. ipython:: python df = pd.DataFrame(np.random.rand(5,2),columns=list('AB')) @@ -2964,14 +3243,14 @@ You can pass a list of objects and you will receive them back on deserialization pd.to_msgpack('foo.msg', df, 'foo', np.array([1,2,3]), s) pd.read_msgpack('foo.msg') -You can pass ``iterator=True`` to iterate over the unpacked results +You can pass ``iterator=True`` to iterate over the unpacked results: .. ipython:: python for o in pd.read_msgpack('foo.msg',iterator=True): - print o + print(o) -You can pass ``append=True`` to the writer to append to an existing pack +You can pass ``append=True`` to the writer to append to an existing pack: .. ipython:: python @@ -2980,7 +3259,7 @@ You can pass ``append=True`` to the writer to append to an existing pack Unlike other io methods, ``to_msgpack`` is available on both a per-object basis, ``df.to_msgpack()`` and using the top-level ``pd.to_msgpack(...)`` where you -can pack arbitrary collections of python lists, dicts, scalars, while intermixing +can pack arbitrary collections of Python lists, dicts, scalars, while intermixing pandas objects. .. ipython:: python @@ -3022,15 +3301,10 @@ for some advanced strategies .. warning:: - As of version 0.15.0, pandas requires ``PyTables`` >= 3.0.0. Stores written with prior versions of pandas / ``PyTables`` >= 2.3 are fully compatible (this was the previous minimum ``PyTables`` required version). - -.. warning:: - - There is a ``PyTables`` indexing bug which may appear when querying stores using an index. If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. Stores created previously will need to be rewritten using the updated version. - -.. warning:: - - As of version 0.17.0, ``HDFStore`` will not drop rows that have all missing values by default. Previously, if all values (except the index) were missing, ``HDFStore`` would not write those rows to disk. + pandas requires ``PyTables`` >= 3.0.0. + There is a indexing bug in ``PyTables`` < 3.2 which may appear when querying stores using an index. + If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. + Stores created previously will need to be rewritten using the updated version. .. ipython:: python :suppress: @@ -3079,7 +3353,7 @@ In a current or later Python session, you can retrieve stored objects: # dotted (attribute) access provides get as well store.df -Deletion of the object specified by the key +Deletion of the object specified by the key: .. ipython:: python @@ -3088,7 +3362,7 @@ Deletion of the object specified by the key store -Closing a Store, Context Manager +Closing a Store and using a context manager: .. ipython:: python @@ -3096,8 +3370,7 @@ Closing a Store, Context Manager store store.is_open - # Working with, and automatically closing the store with the context - # manager + # Working with, and automatically closing the store using a context manager with pd.HDFStore('store.h5') as store: store.keys() @@ -3114,7 +3387,7 @@ Read/Write API '''''''''''''' ``HDFStore`` supports an top-level API using ``read_hdf`` for reading and ``to_hdf`` for writing, -similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0) +similar to how ``read_csv`` and ``to_csv`` work. .. ipython:: python @@ -3129,7 +3402,7 @@ similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0) os.remove('store_tl.h5') -As of version 0.17.0, HDFStore will no longer drop rows that are all missing by default. This behavior can be enabled by setting ``dropna=True``. +HDFStore will by default not drop rows that are all missing. This behavior can be changed by setting ``dropna=True``. .. ipython:: python :suppress: @@ -3192,26 +3465,22 @@ This is also true for the major axis of a ``Panel``: Fixed Format '''''''''''' -.. note:: - - This was prior to 0.13.0 the ``Storer`` format. - The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called -the ``fixed`` format. These types of stores are are **not** appendable once written (though you can simply +the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be retrieved in their entirety. They also do not support dataframes with non-unique column names. The ``fixed`` format stores offer very fast writing and slightly faster reading than ``table`` stores. -This format is specified by default when using ``put`` or ``to_hdf`` or by ``format='fixed'`` or ``format='f'`` +This format is specified by default when using ``put`` or ``to_hdf`` or by ``format='fixed'`` or ``format='f'``. .. warning:: - A ``fixed`` format will raise a ``TypeError`` if you try to retrieve using a ``where`` . + A ``fixed`` format will raise a ``TypeError`` if you try to retrieve using a ``where``: .. code-block:: python - pd.DataFrame(randn(10,2)).to_hdf('test_fixed.h5','df') + pd.DataFrame(randn(10, 2)).to_hdf('test_fixed.h5', 'df') - pd.read_hdf('test_fixed.h5','df',where='index>5') + pd.read_hdf('test_fixed.h5', 'df', where='index>5') TypeError: cannot pass a where specification when reading a fixed format. this store must be selected in its entirety @@ -3224,11 +3493,9 @@ Table Format ``HDFStore`` supports another ``PyTables`` format on disk, the ``table`` format. Conceptually a ``table`` is shaped very much like a DataFrame, with rows and columns. A ``table`` may be appended to in the same or -other sessions. In addition, delete & query type operations are +other sessions. In addition, delete and query type operations are supported. This format is specified by ``format='table'`` or ``format='t'`` -to ``append`` or ``put`` or ``to_hdf`` - -.. versionadded:: 0.13 +to ``append`` or ``put`` or ``to_hdf``. This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to enable ``put/append/to_hdf`` to by default store in the ``table`` format. @@ -3268,9 +3535,9 @@ Hierarchical Keys Keys to a store can be specified as a string. These can be in a hierarchical path-name like format (e.g. ``foo/bar/bah``), which will generate a hierarchy of sub-stores (or ``Groups`` in PyTables -parlance). Keys can be specified with out the leading '/' and are ALWAYS +parlance). Keys can be specified with out the leading '/' and are **always** absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove -everything in the sub-store and BELOW, so be *careful*. +everything in the sub-store and **below**, so be *careful*. .. ipython:: python @@ -3301,7 +3568,7 @@ everything in the sub-store and BELOW, so be *careful*. /foo/bar/bah (Group) '' children := ['block0_items' (Array), 'block0_values' (Array), 'axis0' (Array), 'axis1' (Array)] - Instead, use explicit string based keys + Instead, use explicit string based keys: .. ipython:: python @@ -3350,8 +3617,8 @@ defaults to `nan`. Storing Multi-Index DataFrames ++++++++++++++++++++++++++++++ -Storing multi-index dataframes as tables is very similar to -storing/selecting from homogeneous index DataFrames. +Storing multi-index ``DataFrames`` as tables is very similar to +storing/selecting from homogeneous index ``DataFrames``. .. ipython:: python @@ -3379,12 +3646,6 @@ Querying Querying a Table ++++++++++++++++ -.. warning:: - - This query capabilities have changed substantially starting in ``0.13.0``. - Queries from prior version are accepted (with a ``DeprecationWarning``) printed - if its not string-like. - ``select`` and ``delete`` operations have an optional criterion that can be specified to select/delete only a subset of the data. This allows one to have a very large on-disk table and retrieve only a portion of the @@ -3392,10 +3653,10 @@ data. A query is specified using the ``Term`` class under the hood, as a boolean expression. -- ``index`` and ``columns`` are supported indexers of a DataFrame +- ``index`` and ``columns`` are supported indexers of a ``DataFrames``. - ``major_axis``, ``minor_axis``, and ``items`` are supported indexers of - the Panel -- if ``data_columns`` are specified, these can be used as additional indexers + the Panel. +- if ``data_columns`` are specified, these can be used as additional indexers. Valid comparison operators are: @@ -3533,9 +3794,7 @@ space. These are in terms of the total number of rows in a table. Using timedelta64[ns] +++++++++++++++++++++ -.. versionadded:: 0.13 - -Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type. Terms can be +You can store and query using the ``timedelta64[ns]`` type. Terms can be specified in the format: ``()``, where float may be signed (and fractional), and unit can be ``D,s,ms,us,ns`` for the timedelta. Here's an example: @@ -3559,7 +3818,7 @@ indexed dimension as the ``where``. .. note:: - Indexes are automagically created (starting ``0.10.1``) on the indexables + Indexes are automagically created on the indexables and any data columns you specify. This behavior can be turned off by passing ``index=False`` to ``append``. @@ -3611,7 +3870,7 @@ to perform queries (other than the `indexable` columns, which you can always query). For instance say you want to perform this common operation, on-disk, and return just the frame that matches this query. You can specify ``data_columns = True`` to force all columns to -be data_columns +be ``data_columns``. .. ipython:: python @@ -3625,7 +3884,7 @@ be data_columns # on-disk operations store.append('df_dc', df_dc, data_columns = ['B', 'C', 'string', 'string2']) - store.select('df_dc', [ pd.Term('B>0') ]) + store.select('df_dc', where='B>0') # getting creative store.select('df_dc', 'B > 0 & C > 0 & string == foo') @@ -3641,12 +3900,12 @@ There is some performance degradation by making lots of columns into `data columns`, so it is up to the user to designate these. In addition, you cannot change data columns (nor indexables) after the first append/put operation (Of course you can simply read in the data and -create a new table!) +create a new table!). Iterator ++++++++ -Starting in ``0.11.0``, you can pass, ``iterator=True`` or ``chunksize=number_in_a_chunk`` +You can pass ``iterator=True`` or ``chunksize=number_in_a_chunk`` to ``select`` and ``select_as_multiple`` to return an iterator on the results. The default is 50,000 rows returned in a chunk. @@ -3657,8 +3916,6 @@ The default is 50,000 rows returned in a chunk. .. note:: - .. versionadded:: 0.12.0 - You can also use the iterator with ``read_hdf`` which will open, then automatically close the store when finished iterating. @@ -3676,7 +3933,7 @@ chunks. .. ipython:: python - dfeq = pd.DataFrame({'number': np.arange(1,11)}) + dfeq = pd.DataFrame({'number': np.arange(1, 11)}) dfeq store.append('dfeq', dfeq, data_columns=['number']) @@ -3685,9 +3942,9 @@ chunks. return [l[i:i+n] for i in range(0, len(l), n)] evens = [2,4,6,8,10] - coordinates = store.select_as_coordinates('dfeq','number=evens') + coordinates = store.select_as_coordinates('dfeq', 'number=evens') for c in chunks(coordinates, 2): - print store.select('dfeq',where=c) + print(store.select('dfeq', where=c)) Advanced Queries ++++++++++++++++ @@ -3754,8 +4011,8 @@ of rows in an object. Multiple Table Queries ++++++++++++++++++++++ -New in 0.10.1 are the methods ``append_to_multiple`` and -``select_as_multiple``, that can perform appending/selecting from +The methods ``append_to_multiple`` and +``select_as_multiple`` can perform appending/selecting from multiple tables at once. The idea is to have one table (call it the selector table) that you index most/all of the columns, and perform your queries. The other table(s) are data tables with an index matching the @@ -3769,7 +4026,7 @@ table names to a list of 'columns' you want in that table. If `None` is used in place of a list, that table will have the remaining unspecified columns of the given DataFrame. The argument ``selector`` defines which table is the selector table (which you can make queries from). -The argument ``dropna`` will drop rows from the input DataFrame to ensure +The argument ``dropna`` will drop rows from the input ``DataFrame`` to ensure tables are synchronized. This means that if a row for one of the tables being written to is entirely ``np.NaN``, that row will be dropped from all tables. @@ -3845,7 +4102,7 @@ the table using a ``where`` that selects all but the missing data. automatically. Thus, repeatedly deleting (or removing nodes) and adding again, **WILL TEND TO INCREASE THE FILE SIZE**. - To *repack and clean* the file, use :ref:`ptrepack ` + To *repack and clean* the file, use :ref:`ptrepack `. .. _io.hdf5-notes: @@ -3857,26 +4114,64 @@ Compression +++++++++++ ``PyTables`` allows the stored data to be compressed. This applies to -all kinds of stores, not just tables. +all kinds of stores, not just tables. Two parameters are used to +control compression: ``complevel`` and ``complib``. + +``complevel`` specifies if and how hard data is to be compressed. + ``complevel=0`` and ``complevel=None`` disables + compression and ``0`_: The default compression library. A classic in terms of compression, achieves good compression rates but is somewhat slow. + - `lzo `_: Fast compression and decompression. + - `bzip2 `_: Good compression rates. + - `blosc `_: Fast compression and decompression. + + .. versionadded:: 0.20.2 + + Support for alternative blosc compressors: + + - `blosc:blosclz `_ This is the + default compressor for ``blosc`` + - `blosc:lz4 + `_: + A compact, very popular and fast compressor. + - `blosc:lz4hc + `_: + A tweaked version of LZ4, produces better + compression ratios at the expense of speed. + - `blosc:snappy `_: + A popular compressor used in many places. + - `blosc:zlib `_: A classic; + somewhat slower than the previous ones, but + achieving better compression ratios. + - `blosc:zstd `_: An + extremely well balanced codec; it provides the best + compression ratios among the others above, and at + reasonably fast speed. + + If ``complib`` is defined as something other than the + listed libraries a ``ValueError`` exception is issued. -- Pass ``complevel=int`` for a compression level (1-9, with 0 being no - compression, and the default) -- Pass ``complib=lib`` where lib is any of ``zlib, bzip2, lzo, blosc`` for - whichever compression library you prefer. +.. note:: -``HDFStore`` will use the file based compression scheme if no overriding -``complib`` or ``complevel`` options are provided. ``blosc`` offers very -fast compression, and is my most used. Note that ``lzo`` and ``bzip2`` -may not be installed (by Python) by default. + If the library specified with the ``complib`` option is missing on your platform, + compression defaults to ``zlib`` without further ado. -Compression for all objects within the file +Enable compression for all objects within the file: .. code-block:: python - store_compressed = pd.HDFStore('store_compressed.h5', complevel=9, complib='blosc') + store_compressed = pd.HDFStore('store_compressed.h5', complevel=9, complib='blosc:blosclz') -Or on-the-fly compression (this only applies to tables). You can turn -off file compression for a specific table by passing ``complevel=0`` +Or on-the-fly compression (this only applies to tables) in stores where compression is not enabled: .. code-block:: python @@ -3929,7 +4224,7 @@ Caveats .. warning:: - ``PyTables`` will show a ``NaturalNameWarning`` if a column name + ``PyTables`` will show a ``NaturalNameWarning`` if a column name cannot be used as an attribute selector. *Natural* identifiers contain only letters, numbers, and underscores, and may not begin with a number. @@ -3963,10 +4258,8 @@ object : ``strings`` ``np.nan`` Categorical Data ++++++++++++++++ -.. versionadded:: 0.15.2 - -Writing data to a ``HDFStore`` that contains a ``category`` dtype was implemented -in 0.15.2. Queries work the same as if it was an object array. However, the ``category`` dtyped data is +You can write data that contains ``category`` dtypes to a ``HDFStore``. +Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. .. ipython:: python @@ -3981,21 +4274,6 @@ stored in a more efficient manner. result result.dtypes -.. warning:: - - The format of the ``Categorical`` is readable by prior versions of pandas (< 0.15.2), but will retrieve - the data as an integer based column (e.g. the ``codes``). However, the ``categories`` *can* be retrieved - but require the user to select them manually using the explicit meta path. - - The data is stored like so: - - .. ipython:: python - - cstore - - # to get the categories - cstore.select('dfcat/meta/A/meta') - .. ipython:: python :suppress: :okexcept: @@ -4021,7 +4299,7 @@ Pass ``min_itemsize`` on the first table creation to a-priori specify the minimu ``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to allow all *indexables* or *data_columns* to have this min_itemsize. -Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. +Passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. .. note:: @@ -4149,44 +4427,6 @@ Now you can import the ``DataFrame`` into R: starting point if you have stored multiple ``DataFrame`` objects to a single HDF5 file. -Backwards Compatibility -''''''''''''''''''''''' - -0.10.1 of ``HDFStore`` can read tables created in a prior version of pandas, -however query terms using the -prior (undocumented) methodology are unsupported. ``HDFStore`` will -issue a warning if you try to use a legacy-format file. You must -read in the entire file and write it out using the new format, using the -method ``copy`` to take advantage of the updates. The group attribute -``pandas_version`` contains the version information. ``copy`` takes a -number of options, please see the docstring. - - -.. ipython:: python - :suppress: - - import os - legacy_file_path = os.path.abspath('source/_static/legacy_0.10.h5') - -.. ipython:: python - :okwarning: - - # a legacy store - legacy_store = pd.HDFStore(legacy_file_path,'r') - legacy_store - - # copy (and return the new handle) - new_store = legacy_store.copy('store_new.h5') - new_store - new_store.close() - -.. ipython:: python - :suppress: - - legacy_store.close() - import os - os.remove('store_new.h5') - Performance ''''''''''' @@ -4211,32 +4451,6 @@ Performance `Here `__ for more information and some solutions. -Experimental -'''''''''''' - -HDFStore supports ``Panel4D`` storage. - -.. ipython:: python - :okwarning: - - p4d = pd.Panel4D({ 'l1' : wp }) - p4d - store.append('p4d', p4d) - store - -These, by default, index the three axes ``items, major_axis, -minor_axis``. On an ``AppendableTable`` it is possible to setup with the -first append a different indexing scheme, depending on how you want to -store your data. Pass the ``axes`` keyword with a list of dimensions -(currently must by exactly 1 less than the total dimensions of the -object). This cannot be changed after table creation. - -.. ipython:: python - :okwarning: - - store.append('p4d2', p4d, axes=['labels', 'major_axis', 'minor_axis']) - store - store.select('p4d2', [ pd.Term('labels=l1'), pd.Term('items=Item1'), pd.Term('minor_axis=A_big_strings') ]) .. ipython:: python :suppress: @@ -4263,13 +4477,15 @@ Several caveats. - This is a newer library, and the format, though stable, is not guaranteed to be backward compatible to the earlier versions. -- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an - error if a non-default one is provided. You can simply ``.reset_index()`` in order to store the index. +- The format will NOT write an ``Index``, or ``MultiIndex`` for the + ``DataFrame`` and will raise an error if a non-default one is provided. You + can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to + ignore it. - Duplicate column names and non-string columns names are not supported -- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message +- Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message on an attempt at serialization. -See the `Full Documentation `__ +See the `Full Documentation `__. .. ipython:: python @@ -4289,6 +4505,7 @@ See the `Full Documentation `__ Write to a feather file. .. ipython:: python + :okwarning: df.to_feather('example.feather') @@ -4308,6 +4525,87 @@ Read from a feather file. import os os.remove('example.feather') + +.. _io.parquet: + +Parquet +------- + +.. versionadded:: 0.21.0 + +`Apache Parquet `__ provides a partitioned binary columnar serialization for data frames. It is designed to +make reading and writing data frames efficient, and to make sharing data across data analysis +languages easy. Parquet can use a variety of compression techniques to shrink the file size as much as possible +while still maintaining good read performance. + +Parquet is designed to faithfully serialize and de-serialize ``DataFrame`` s, supporting all of the pandas +dtypes, including extension dtypes such as datetime with tz. + +Several caveats. + +- Duplicate column names and non-string columns names are not supported. +- Index level names, if specified, must be strings. +- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype. +- Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message + on an attempt at serialization. + +You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``. +If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, +then ``pyarrow`` is tried, and falling back to ``fastparquet``. + +See the documentation for `pyarrow `__ and `fastparquet `__. + +.. note:: + + These engines are very similar and should read/write nearly identical parquet format files. + Currently ``pyarrow`` does not support timedelta data, ``fastparquet>=0.1.4`` supports timezone aware datetimes. + These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). + +.. ipython:: python + + df = pd.DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.date_range('20130101', periods=3), + 'g': pd.date_range('20130101', periods=3, tz='US/Eastern')}) + + df + df.dtypes + +Write to a parquet file. + +.. ipython:: python + + df.to_parquet('example_pa.parquet', engine='pyarrow') + df.to_parquet('example_fp.parquet', engine='fastparquet') + +Read from a parquet file. + +.. ipython:: python + + result = pd.read_parquet('example_fp.parquet', engine='fastparquet') + result = pd.read_parquet('example_pa.parquet', engine='pyarrow') + + result.dtypes + +Read only certain columns of a parquet file. + +.. ipython:: python + + result = pd.read_parquet('example_fp.parquet', engine='fastparquet', columns=['a', 'b']) + + result.dtypes + + +.. ipython:: python + :suppress: + + import os + os.remove('example_pa.parquet') + os.remove('example_fp.parquet') + .. _io.sql: SQL Queries @@ -4318,13 +4616,11 @@ facilitate data retrieval and to reduce dependency on DB-specific API. Database is provided by SQLAlchemy if installed. In addition you will need a driver library for your database. Examples of such drivers are `psycopg2 `__ for PostgreSQL or `pymysql `__ for MySQL. -For `SQLite `__ this is +For `SQLite `__ this is included in Python's standard library by default. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. -.. versionadded:: 0.14.0 - If SQLAlchemy is not installed, a fallback is only provided for sqlite (and for mysql for backwards compatibility, but this is deprecated and will be removed in a future version). @@ -4377,7 +4673,7 @@ If you want to manage your own connections you can pass one of those instead: Writing DataFrames '''''''''''''''''' -Assuming the following data is in a DataFrame ``data``, we can insert it into +Assuming the following data is in a ``DataFrame`` ``data``, we can insert it into the database using :func:`~pandas.DataFrame.to_sql`. +-----+------------+-------+-------+-------+ @@ -4415,6 +4711,12 @@ writes ``data`` to the database in batches of 1000 rows at a time: data.to_sql('data_chunked', engine, chunksize=1000) +.. note:: + + The function :func:`~pandas.DataFrame.to_sql` will perform a multivalue + insert if the engine dialect ``supports_multivalues_insert``. This will + greatly speed up the insert in some cases. + SQL data types ++++++++++++++ @@ -4463,7 +4765,7 @@ table name and optionally a subset of columns to read. pd.read_sql_table('data', engine) -You can also specify the name of the column as the DataFrame index, +You can also specify the name of the column as the ``DataFrame`` index, and specify a subset of columns to be read. .. ipython:: python @@ -4491,8 +4793,6 @@ You can check if a table exists using :func:`~pandas.io.sql.has_table` Schema support '''''''''''''' -.. versionadded:: 0.15.0 - Reading from and writing to different schema's is supported through the ``schema`` keyword in the :func:`~pandas.read_sql_table` and :func:`~pandas.DataFrame.to_sql` functions. Note however that this depends on the database flavor (sqlite does not @@ -4534,7 +4834,7 @@ Specifying this will return an iterator through chunks of the query result: for chunk in pd.read_sql_query("SELECT * FROM data_chunks", engine, chunksize=5): print(chunk) -You can also run a plain query without creating a dataframe with +You can also run a plain query without creating a ``DataFrame`` with :func:`~pandas.io.sql.execute`. This is useful for queries that don't return values, such as INSERT. This is functionally equivalent to calling ``execute`` on the SQLAlchemy engine or db connection object. Again, you must use the SQL syntax @@ -4639,301 +4939,24 @@ And then issue the following queries: Google BigQuery --------------- -.. versionadded:: 0.13.0 - -The :mod:`pandas.io.gbq` module provides a wrapper for Google's BigQuery -analytics web service to simplify retrieving results from BigQuery tables -using SQL-like queries. Result sets are parsed into a pandas -DataFrame with a shape and data types derived from the source table. -Additionally, DataFrames can be inserted into new BigQuery tables or appended -to existing tables. - .. warning:: - To use this module, you will need a valid BigQuery account. Refer to the - `BigQuery Documentation `__ - for details on the service itself. + Starting in 0.20.0, pandas has split off Google BigQuery support into the + separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. -The key functions are: +The ``pandas-gbq`` package provides functionality to read/write from Google BigQuery. -.. currentmodule:: pandas.io.gbq - -.. autosummary:: - :toctree: generated/ - - read_gbq - to_gbq - -.. currentmodule:: pandas - - -Supported Data Types -++++++++++++++++++++ - -Pandas supports all these `BigQuery data types `__: -``STRING``, ``INTEGER`` (64bit), ``FLOAT`` (64 bit), ``BOOLEAN`` and -``TIMESTAMP`` (microsecond precision). Data types ``BYTES`` and ``RECORD`` -are not supported. - -Integer and boolean ``NA`` handling -+++++++++++++++++++++++++++++++++++ - -.. versionadded:: 0.20 - -Since all columns in BigQuery queries are nullable, and NumPy lacks of ``NA`` -support for integer and boolean types, this module will store ``INTEGER`` or -``BOOLEAN`` columns with at least one ``NULL`` value as ``dtype=object``. -Otherwise those columns will be stored as ``dtype=int64`` or ``dtype=bool`` -respectively. - -This is opposite to default pandas behaviour which will promote integer -type to float in order to store NAs. See the :ref:`gotchas` -for detailed explaination. - -While this trade-off works well for most cases, it breaks down for storing -values greater than 2**53. Such values in BigQuery can represent identifiers -and unnoticed precision lost for identifier is what we want to avoid. - -.. _io.bigquery_deps: - -Dependencies -++++++++++++ - -This module requires following additional dependencies: - -- `httplib2 `__: HTTP client -- `google-api-python-client `__: Google's API client -- `oauth2client `__: authentication and authorization for Google's API - -.. _io.bigquery_authentication: - -Authentication -'''''''''''''' - -.. versionadded:: 0.18.0 - -Authentication to the Google ``BigQuery`` service is via ``OAuth 2.0``. -Is possible to authenticate with either user account credentials or service account credentials. - -Authenticating with user account credentials is as simple as following the prompts in a browser window -which will be automatically opened for you. You will be authenticated to the specified -``BigQuery`` account using the product name ``pandas GBQ``. It is only possible on local host. -The remote authentication using user account credentials is not currently supported in pandas. -Additional information on the authentication mechanism can be found -`here `__. - -Authentication with service account credentials is possible via the `'private_key'` parameter. This method -is particularly useful when working on remote servers (eg. jupyter iPython notebook on remote host). -Additional information on service accounts can be found -`here `__. - -Authentication via ``application default credentials`` is also possible. This is only valid -if the parameter ``private_key`` is not provided. This method also requires that -the credentials can be fetched from the environment the code is running in. -Otherwise, the OAuth2 client-side authentication is used. -Additional information on -`application default credentials `__. - -.. versionadded:: 0.19.0 - -.. note:: - - The `'private_key'` parameter can be set to either the file path of the service account key - in JSON format, or key contents of the service account key in JSON format. - -.. note:: - - A private key can be obtained from the Google developers console by clicking - `here `__. Use JSON key type. - -.. _io.bigquery_reader: - -Querying -'''''''' - -Suppose you want to load all data from an existing BigQuery table : `test_dataset.test_table` -into a DataFrame using the :func:`~pandas.io.gbq.read_gbq` function. - -.. code-block:: python - - # Insert your BigQuery Project ID Here - # Can be found in the Google web console - projectid = "xxxxxxxx" - - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', projectid) - - -You can define which column from BigQuery to use as an index in the -destination DataFrame as well as a preferred column order as follows: - -.. code-block:: python - - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', - index_col='index_column_name', - col_order=['col1', 'col2', 'col3'], projectid) - - -Starting with 0.20.0, you can specify the query config as parameter to use additional options of your job. -For more information about query configuration parameters see -`here `__. - -.. code-block:: python - - configuration = { - 'query': { - "useQueryCache": False - } - } - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', - configuration=configuration, projectid) - - -.. note:: - - You can find your project id in the `Google developers console `__. - - -.. note:: - - You can toggle the verbose output via the ``verbose`` flag which defaults to ``True``. - -.. note:: - - The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL - or BigQuery's ``'standard'`` SQL (beta). The default value is ``'legacy'``. For more information - on BigQuery's standard SQL, see `BigQuery SQL Reference - `__ - -.. _io.bigquery_writer: - -Writing DataFrames -'''''''''''''''''' - -Assume we want to write a DataFrame ``df`` into a BigQuery table using :func:`~pandas.DataFrame.to_gbq`. - -.. ipython:: python - - df = pd.DataFrame({'my_string': list('abc'), - 'my_int64': list(range(1, 4)), - 'my_float64': np.arange(4.0, 7.0), - 'my_bool1': [True, False, True], - 'my_bool2': [False, True, False], - 'my_dates': pd.date_range('now', periods=3)}) - - df - df.dtypes - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid) - -.. note:: - - The destination table and destination dataset will automatically be created if they do not already exist. - -The ``if_exists`` argument can be used to dictate whether to ``'fail'``, ``'replace'`` -or ``'append'`` if the destination table already exists. The default value is ``'fail'``. - -For example, assume that ``if_exists`` is set to ``'fail'``. The following snippet will raise -a ``TableCreationError`` if the destination table already exists. - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid, if_exists='fail') - -.. note:: - - If the ``if_exists`` argument is set to ``'append'``, the destination dataframe will - be written to the table using the defined table schema and column types. The - dataframe must match the destination table in structure and data types. - If the ``if_exists`` argument is set to ``'replace'``, and the existing table has a - different schema, a delay of 2 minutes will be forced to ensure that the new schema - has propagated in the Google environment. See - `Google BigQuery issue 191 `__. - -Writing large DataFrames can result in errors due to size limitations being exceeded. -This can be avoided by setting the ``chunksize`` argument when calling :func:`~pandas.DataFrame.to_gbq`. -For example, the following writes ``df`` to a BigQuery table in batches of 10000 rows at a time: - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid, chunksize=10000) - -You can also see the progress of your post via the ``verbose`` flag which defaults to ``True``. -For example: - -.. code-block:: python - - In [8]: df.to_gbq('my_dataset.my_table', projectid, chunksize=10000, verbose=True) - - Streaming Insert is 10% Complete - Streaming Insert is 20% Complete - Streaming Insert is 30% Complete - Streaming Insert is 40% Complete - Streaming Insert is 50% Complete - Streaming Insert is 60% Complete - Streaming Insert is 70% Complete - Streaming Insert is 80% Complete - Streaming Insert is 90% Complete - Streaming Insert is 100% Complete - -.. note:: - - If an error occurs while streaming data to BigQuery, see - `Troubleshooting BigQuery Errors `__. - -.. note:: - - The BigQuery SQL query language has some oddities, see the - `BigQuery Query Reference Documentation `__. - -.. note:: - - While BigQuery uses SQL-like syntax, it has some important differences from traditional - databases both in functionality, API limitations (size and quantity of queries or uploads), - and how Google charges for use of the service. You should refer to `Google BigQuery documentation `__ - often as the service seems to be changing and evolving. BiqQuery is best for analyzing large - sets of data quickly, but it is not a direct replacement for a transactional database. - -.. _io.bigquery_create_tables: - -Creating BigQuery Tables -'''''''''''''''''''''''' - -.. warning:: - - As of 0.17, the function :func:`~pandas.io.gbq.generate_bq_schema` has been deprecated and will be - removed in a future version. - -As of 0.15.2, the gbq module has a function :func:`~pandas.io.gbq.generate_bq_schema` which will -produce the dictionary representation schema of the specified pandas DataFrame. - -.. code-block:: ipython - - In [10]: gbq.generate_bq_schema(df, default_type='STRING') - - Out[10]: {'fields': [{'name': 'my_bool1', 'type': 'BOOLEAN'}, - {'name': 'my_bool2', 'type': 'BOOLEAN'}, - {'name': 'my_dates', 'type': 'TIMESTAMP'}, - {'name': 'my_float64', 'type': 'FLOAT'}, - {'name': 'my_int64', 'type': 'INTEGER'}, - {'name': 'my_string', 'type': 'STRING'}]} - -.. note:: - - If you delete and re-create a BigQuery table with the same name, but different table schema, - you must wait 2 minutes before streaming data into the table. As a workaround, consider creating - the new table with a different name. Refer to - `Google BigQuery issue 191 `__. +pandas integrates with this external package. if ``pandas-gbq`` is installed, you can +use the pandas methods ``pd.read_gbq`` and ``DataFrame.to_gbq``, which will call the +respective functions from ``pandas-gbq``. +Full documentation can be found `here `__. .. _io.stata: Stata Format ------------ -.. versionadded:: 0.12.0 - .. _io.stata_writer: Writing to Stata format @@ -4990,15 +5013,13 @@ Reading from Stata format ''''''''''''''''''''''''' The top-level function ``read_stata`` will read a dta file and return -either a DataFrame or a :class:`~pandas.io.stata.StataReader` that can +either a ``DataFrame`` or a :class:`~pandas.io.stata.StataReader` that can be used to read the file incrementally. .. ipython:: python pd.read_stata('stata.dta') -.. versionadded:: 0.16.0 - Specifying a ``chunksize`` yields a :class:`~pandas.io.stata.StataReader` instance that can be used to read ``chunksize`` lines from the file at a time. The ``StataReader`` @@ -5056,8 +5077,6 @@ values will have ``object`` data type. Categorical Data ++++++++++++++++ -.. versionadded:: 0.15.2 - ``Categorical`` data can be exported to *Stata* data files as value labeled data. The exported data consists of the underlying category codes as integer data values and the categories as value labels. *Stata* does not have an explicit equivalent @@ -5092,7 +5111,7 @@ whether imported ``Categorical`` variables are ordered. .. note:: - *Stata* supports partially labeled series. These series have value labels for + *Stata* supports partially labeled series. These series have value labels for some but not all data values. Importing a partially labeled series will produce a ``Categorical`` with string categories for the values that are labeled and numeric categories for values with no label. @@ -5104,10 +5123,8 @@ whether imported ``Categorical`` variables are ordered. SAS Formats ----------- -.. versionadded:: 0.17.0 - The top-level function :func:`read_sas` can read (but not write) SAS -`xport` (.XPT) and `SAS7BDAT` (.sas7bdat) format files were added in *v0.18.0*. +`xport` (.XPT) and (since *v0.18.0*) `SAS7BDAT` (.sas7bdat) format files. SAS files only contain two value types: ASCII text and floating point values (usually 8 bytes but sometimes truncated). For xport files, @@ -5154,7 +5171,7 @@ into and from pandas, we recommend these packages from the broader community. netCDF '''''' -xarray_ provides data structures inspired by the pandas DataFrame for working +xarray_ provides data structures inspired by the pandas ``DataFrame`` for working with multi-dimensional datasets, with a focus on the netCDF file format and easy conversion to and from pandas. @@ -5165,85 +5182,114 @@ easy conversion to and from pandas. Performance Considerations -------------------------- -This is an informal comparison of various IO methods, using pandas 0.13.1. +This is an informal comparison of various IO methods, using pandas +0.20.3. Timings are machine dependent and small differences should be +ignored. .. code-block:: ipython - In [1]: df = pd.DataFrame(randn(1000000,2),columns=list('AB')) + In [1]: sz = 1000000 + In [2]: df = pd.DataFrame({'A': randn(sz), 'B': [1] * sz}) - In [2]: df.info() + In [3]: df.info() - Int64Index: 1000000 entries, 0 to 999999 + RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 2 columns): A 1000000 non-null float64 - B 1000000 non-null float64 - dtypes: float64(2) - memory usage: 22.9 MB + B 1000000 non-null int64 + dtypes: float64(1), int64(1) + memory usage: 15.3 MB -Writing +When writing, the top-three functions in terms of speed are are +``test_pickle_write``, ``test_feather_write`` and ``test_hdf_fixed_write_compress``. .. code-block:: ipython In [14]: %timeit test_sql_write(df) - 1 loops, best of 3: 6.24 s per loop + 2.37 s ± 36.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [15]: %timeit test_hdf_fixed_write(df) - 1 loops, best of 3: 237 ms per loop + 194 ms ± 65.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) In [26]: %timeit test_hdf_fixed_write_compress(df) - 1 loops, best of 3: 245 ms per loop + 119 ms ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) In [16]: %timeit test_hdf_table_write(df) - 1 loops, best of 3: 901 ms per loop + 623 ms ± 125 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [27]: %timeit test_hdf_table_write_compress(df) - 1 loops, best of 3: 952 ms per loop + 563 ms ± 23.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [17]: %timeit test_csv_write(df) - 1 loops, best of 3: 3.44 s per loop + 3.13 s ± 49.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [30]: %timeit test_feather_write(df) + 103 ms ± 5.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [31]: %timeit test_pickle_write(df) + 109 ms ± 3.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) -Reading + In [32]: %timeit test_pickle_write_compress(df) + 3.33 s ± 55.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + +When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and +``test_hdf_fixed_read``. .. code-block:: ipython In [18]: %timeit test_sql_read() - 1 loops, best of 3: 766 ms per loop + 1.35 s ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [19]: %timeit test_hdf_fixed_read() - 10 loops, best of 3: 19.1 ms per loop + 14.3 ms ± 438 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) In [28]: %timeit test_hdf_fixed_read_compress() - 10 loops, best of 3: 36.3 ms per loop + 23.5 ms ± 672 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) In [20]: %timeit test_hdf_table_read() - 10 loops, best of 3: 39 ms per loop + 35.4 ms ± 314 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) In [29]: %timeit test_hdf_table_read_compress() - 10 loops, best of 3: 60.6 ms per loop + 42.6 ms ± 2.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) In [22]: %timeit test_csv_read() - 1 loops, best of 3: 620 ms per loop + 516 ms ± 27.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [33]: %timeit test_feather_read() + 4.06 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + In [34]: %timeit test_pickle_read() + 6.5 ms ± 172 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + In [35]: %timeit test_pickle_read_compress() + 588 ms ± 3.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) Space on disk (in bytes) .. code-block:: none - 25843712 Apr 8 14:11 test.sql - 24007368 Apr 8 14:11 test_fixed.hdf - 15580682 Apr 8 14:11 test_fixed_compress.hdf - 24458444 Apr 8 14:11 test_table.hdf - 16797283 Apr 8 14:11 test_table_compress.hdf - 46152810 Apr 8 14:11 test.csv + 34816000 Aug 21 18:00 test.sql + 24009240 Aug 21 18:00 test_fixed.hdf + 7919610 Aug 21 18:00 test_fixed_compress.hdf + 24458892 Aug 21 18:00 test_table.hdf + 8657116 Aug 21 18:00 test_table_compress.hdf + 28520770 Aug 21 18:00 test.csv + 16000248 Aug 21 18:00 test.feather + 16000848 Aug 21 18:00 test.pkl + 7554108 Aug 21 18:00 test.pkl.compress -And here's the code +And here's the code: .. code-block:: python - import sqlite3 import os + import pandas as pd + import sqlite3 + from numpy.random import randn from pandas.io import sql - df = pd.DataFrame(randn(1000000,2),columns=list('AB')) + sz = 1000000 + df = pd.DataFrame({'A': randn(sz), 'B': [1] * sz}) def test_sql_write(df): if os.path.exists('test.sql'): @@ -5286,3 +5332,21 @@ And here's the code def test_csv_read(): pd.read_csv('test.csv',index_col=0) + + def test_feather_write(df): + df.to_feather('test.feather') + + def test_feather_read(): + pd.read_feather('test.feather') + + def test_pickle_write(df): + df.to_pickle('test.pkl') + + def test_pickle_read(): + pd.read_pickle('test.pkl') + + def test_pickle_write_compress(df): + df.to_pickle('test.pkl.compress', compression='xz') + + def test_pickle_read_compress(): + pd.read_pickle('test.pkl.compress', compression='xz') diff --git a/doc/source/merging.rst b/doc/source/merging.rst index f732f0a4cc749..aebbcee67ad48 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -13,7 +13,7 @@ import matplotlib.pyplot as plt plt.close('all') - import pandas.util.doctools as doctools + import pandas.util._doctools as doctools p = doctools.TablePlotter() @@ -31,11 +31,11 @@ operations. Concatenating objects --------------------- -The ``concat`` function (in the main pandas namespace) does all of the heavy -lifting of performing concatenation operations along an axis while performing -optional set logic (union or intersection) of the indexes (if any) on the other -axes. Note that I say "if any" because there is only a single possible axis of -concatenation for Series. +The :func:`~pandas.concat` function (in the main pandas namespace) does all of +the heavy lifting of performing concatenation operations along an axis while +performing optional set logic (union or intersection) of the indexes (if any) on +the other axes. Note that I say "if any" because there is only a single possible +axis of concatenation for Series. Before diving into all of the details of ``concat`` and what it can do, here is a simple example: @@ -109,10 +109,10 @@ some configurable handling of "what to do with the other axes": to the actual data concatenation. - ``copy`` : boolean, default True. If False, do not copy data unnecessarily. -Without a little bit of context and example many of these arguments don't make -much sense. Let's take the above example. Suppose we wanted to associate -specific keys with each of the pieces of the chopped up DataFrame. We can do -this using the ``keys`` argument: +Without a little bit of context many of these arguments don't make much sense. +Let's revisit the above example. Suppose we wanted to associate specific keys +with each of the pieces of the chopped up DataFrame. We can do this using the +``keys`` argument: .. ipython:: python @@ -128,7 +128,7 @@ this using the ``keys`` argument: As you can see (if you've read the rest of the documentation), the resulting object's index has a :ref:`hierarchical index `. This -means that we can now do stuff like select out each chunk by key: +means that we can now select out each chunk by key: .. ipython:: python @@ -138,10 +138,10 @@ It's not a stretch to see how this can be very useful. More detail on this functionality below. .. note:: - It is worth noting however, that ``concat`` (and therefore ``append``) makes - a full copy of the data, and that constantly reusing this function can - create a significant performance hit. If you need to use the operation over - several datasets, use a list comprehension. + It is worth noting that :func:`~pandas.concat` (and therefore + :func:`~pandas.append`) makes a full copy of the data, and that constantly + reusing this function can create a significant performance hit. If you need + to use the operation over several datasets, use a list comprehension. :: @@ -152,17 +152,16 @@ functionality below. Set logic on the other axes ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When gluing together multiple DataFrames (or Panels or...), for example, you -have a choice of how to handle the other axes (other than the one being -concatenated). This can be done in three ways: +When gluing together multiple DataFrames, you have a choice of how to handle +the other axes (other than the one being concatenated). This can be done in +the following three ways: - Take the (sorted) union of them all, ``join='outer'``. This is the default option as it results in zero information loss. - Take the intersection, ``join='inner'``. -- Use a specific index (in the case of DataFrame) or indexes (in the case of - Panel or future higher dimensional objects), i.e. the ``join_axes`` argument +- Use a specific index, as passed to the ``join_axes`` argument. -Here is a example of each of these methods. First, the default ``join='outer'`` +Here is an example of each of these methods. First, the default ``join='outer'`` behavior: .. ipython:: python @@ -217,9 +216,9 @@ DataFrame: Concatenating using ``append`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A useful shortcut to ``concat`` are the ``append`` instance methods on Series -and DataFrame. These methods actually predated ``concat``. They concatenate -along ``axis=0``, namely the index: +A useful shortcut to :func:`~pandas.concat` are the :meth:`~DataFrame.append` +instance methods on ``Series`` and ``DataFrame``. These methods actually predated +``concat``. They concatenate along ``axis=0``, namely the index: .. ipython:: python @@ -233,7 +232,7 @@ along ``axis=0``, namely the index: labels=['df1', 'df2'], vertical=True); plt.close('all'); -In the case of DataFrame, the indexes must be disjoint but the columns do not +In the case of ``DataFrame``, the indexes must be disjoint but the columns do not need to be: .. ipython:: python @@ -264,18 +263,17 @@ need to be: .. note:: - Unlike `list.append` method, which appends to the original list and - returns nothing, ``append`` here **does not** modify ``df1`` and - returns its copy with ``df2`` appended. + Unlike the :py:meth:`~list.append` method, which appends to the original list + and returns ``None``, :meth:`~DataFrame.append` here **does not** modify + ``df1`` and returns its copy with ``df2`` appended. .. _merging.ignore_index: Ignoring indexes on the concatenation axis ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For DataFrames which don't have a meaningful index, you may wish to append them -and ignore the fact that they may have overlapping indexes: - -To do this, use the ``ignore_index`` argument: +For ``DataFrame``s which don't have a meaningful index, you may wish to append +them and ignore the fact that they may have overlapping indexes. To do this, use +the ``ignore_index`` argument: .. ipython:: python @@ -289,7 +287,7 @@ To do this, use the ``ignore_index`` argument: labels=['df1', 'df4'], vertical=True); plt.close('all'); -This is also a valid argument to ``DataFrame.append``: +This is also a valid argument to :meth:`DataFrame.append`: .. ipython:: python @@ -308,9 +306,9 @@ This is also a valid argument to ``DataFrame.append``: Concatenating with mixed ndims ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -You can concatenate a mix of Series and DataFrames. The -Series will be transformed to DataFrames with the column name as -the name of the Series. +You can concatenate a mix of ``Series`` and ``DataFrame``s. The +``Series`` will be transformed to ``DataFrame`` with the column name as +the name of the ``Series``. .. ipython:: python @@ -325,7 +323,14 @@ the name of the Series. labels=['df1', 's1'], vertical=False); plt.close('all'); -If unnamed Series are passed they will be numbered consecutively. +.. note:: + + Since we're concatenating a ``Series`` to a ``DataFrame``, we could have + achieved the same result with :meth:`DataFrame.assign`. To concatenate an + arbitrary number of pandas objects (``DataFrame`` or ``Series``), use + ``concat``. + +If unnamed ``Series`` are passed they will be numbered consecutively. .. ipython:: python @@ -357,8 +362,10 @@ Passing ``ignore_index=True`` will drop all name references. More concatenating with group keys ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A fairly common use of the ``keys`` argument is to override the column names when creating a new DataFrame based on existing Series. -Notice how the default behaviour consists on letting the resulting DataFrame inherits the parent Series' name, when these existed. +A fairly common use of the ``keys`` argument is to override the column names +when creating a new ``DataFrame`` based on existing ``Series``. +Notice how the default behaviour consists on letting the resulting ``DataFrame`` +inherit the parent ``Series``' name, when these existed. .. ipython:: python @@ -374,7 +381,7 @@ Through the ``keys`` argument we can override the existing column names. pd.concat([s3, s4, s5], axis=1, keys=['red','blue','yellow']) -Let's consider now a variation on the very first example presented: +Let's consider a variation of the very first example presented: .. ipython:: python @@ -417,7 +424,7 @@ for the ``keys`` argument (unless other keys are specified): plt.close('all'); The MultiIndex created has levels that are constructed from the passed keys and -the index of the DataFrame pieces: +the index of the ``DataFrame`` pieces: .. ipython:: python @@ -444,7 +451,7 @@ do so using the ``levels`` argument: result.index.levels -Yes, this is fairly esoteric, but is actually necessary for implementing things +This is fairly esoteric, but it is actually necessary for implementing things like GroupBy where the order of a categorical variable is meaningful. .. _merging.append.row: @@ -453,8 +460,8 @@ Appending rows to a DataFrame ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ While not especially efficient (since a new object must be created), you can -append a single row to a DataFrame by passing a Series or dict to ``append``, -which returns a new DataFrame as above. +append a single row to a ``DataFrame`` by passing a ``Series`` or dict to +``append``, which returns a new ``DataFrame`` as above. .. ipython:: python @@ -498,43 +505,46 @@ pandas has full-featured, **high performance** in-memory join operations idiomatically very similar to relational databases like SQL. These methods perform significantly better (in some cases well over an order of magnitude better) than other open source implementations (like ``base::merge.data.frame`` -in R). The reason for this is careful algorithmic design and internal layout of -the data in DataFrame. +in R). The reason for this is careful algorithmic design and the internal layout +of the data in ``DataFrame``. See the :ref:`cookbook` for some advanced strategies. Users who are familiar with SQL but new to pandas might be interested in a :ref:`comparison with SQL`. -pandas provides a single function, ``merge``, as the entry point for all -standard database join operations between DataFrame objects: +pandas provides a single function, :func:`~pandas.merge`, as the entry point for +all standard database join operations between ``DataFrame`` objects: :: pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, - suffixes=('_x', '_y'), copy=True, indicator=False) + suffixes=('_x', '_y'), copy=True, indicator=False, + validate=None) -- ``left``: A DataFrame object -- ``right``: Another DataFrame object -- ``on``: Columns (names) to join on. Must be found in both the left and - right DataFrame objects. If not passed and ``left_index`` and +- ``left``: A DataFrame object. +- ``right``: Another DataFrame object. +- ``on``: Column or index level names to join on. Must be found in both the left + and right DataFrame objects. If not passed and ``left_index`` and ``right_index`` are ``False``, the intersection of the columns in the - DataFrames will be inferred to be the join keys -- ``left_on``: Columns from the left DataFrame to use as keys. Can either be - column names or arrays with length equal to the length of the DataFrame -- ``right_on``: Columns from the right DataFrame to use as keys. Can either be - column names or arrays with length equal to the length of the DataFrame + DataFrames will be inferred to be the join keys. +- ``left_on``: Columns or index levels from the left DataFrame to use as + keys. Can either be column names, index level names, or arrays with length + equal to the length of the DataFrame. +- ``right_on``: Columns or index levels from the right DataFrame to use as + keys. Can either be column names, index level names, or arrays with length + equal to the length of the DataFrame. - ``left_index``: If ``True``, use the index (row labels) from the left DataFrame as its join key(s). In the case of a DataFrame with a MultiIndex (hierarchical), the number of levels must match the number of join keys - from the right DataFrame + from the right DataFrame. - ``right_index``: Same usage as ``left_index`` for the right DataFrame - ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``. Defaults - to ``inner``. See below for more detailed description of each method + to ``inner``. See below for more detailed description of each method. - ``sort``: Sort the result DataFrame by the join keys in lexicographical order. Defaults to ``True``, setting to ``False`` will improve performance - substantially in many cases + substantially in many cases. - ``suffixes``: A tuple of string suffixes to apply to overlapping columns. Defaults to ``('_x', '_y')``. - ``copy``: Always copy data (default ``True``) from the passed DataFrame @@ -549,17 +559,33 @@ standard database join operations between DataFrame objects: merge key only appears in ``'right'`` DataFrame, and ``both`` if the observation's merge key is found in both. - .. versionadded:: 0.17.0 +- ``validate`` : string, default None. + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": checks if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": checks if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": checks if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + + .. versionadded:: 0.21.0 + +.. note:: + + Support for specifying index levels as the ``on``, ``left_on``, and + ``right_on`` parameters was added in version 0.23.0. The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` and ``right`` is a subclass of DataFrame, the return type will still be ``DataFrame``. ``merge`` is a function in the pandas namespace, and it is also available as a -DataFrame instance method, with the calling DataFrame being implicitly -considered the left object in the join. +``DataFrame`` instance method :meth:`~DataFrame.merge`, with the calling +``DataFrame `` being implicitly considered the left object in the join. -The related ``DataFrame.join`` method, uses ``merge`` internally for the +The related :meth:`~DataFrame.join` method, uses ``merge`` internally for the index-on-index (by default) and column(s)-on-index join. If you are joining on index only, you may wish to use ``DataFrame.join`` to save yourself some typing. @@ -568,19 +594,19 @@ Brief primer on merge methods (relational algebra) Experienced users of relational databases like SQL will be familiar with the terminology used to describe join operations between two SQL-table like -structures (DataFrame objects). There are several cases to consider which are -very important to understand: +structures (``DataFrame`` objects). There are several cases to consider which +are very important to understand: -- **one-to-one** joins: for example when joining two DataFrame objects on - their indexes (which must contain unique values) +- **one-to-one** joins: for example when joining two ``DataFrame`` objects on + their indexes (which must contain unique values). - **many-to-one** joins: for example when joining an index (unique) to one or - more columns in a DataFrame + more columns in a different ``DataFrame``. - **many-to-many** joins: joining columns on columns. .. note:: When joining columns on columns (potentially a many-to-many join), any - indexes on the passed DataFrame objects **will be discarded**. + indexes on the passed ``DataFrame`` objects **will be discarded**. It is worth spending some time understanding the result of the **many-to-many** @@ -608,7 +634,9 @@ key combination: labels=['left', 'right'], vertical=False); plt.close('all'); -Here is a more complicated example with multiple join keys: +Here is a more complicated example with multiple join keys. Only the keys +appearing in ``left`` and ``right`` are present (the intersection), since +``how='inner'`` by default. .. ipython:: python @@ -711,19 +739,56 @@ Here is another example with duplicate join keys in DataFrames: labels=['left', 'right'], vertical=False); plt.close('all'); + .. warning:: - Joining / merging on duplicate keys can cause a returned frame that is the multiplication of the row dimensions, - may result in memory overflow. It is the user' s responsibility to manage duplicate values in keys before joining large DataFrames. + Joining / merging on duplicate keys can cause a returned frame that is the multiplication of the row dimensions, which may result in memory overflow. It is the user' s responsibility to manage duplicate values in keys before joining large DataFrames. + +.. _merging.validation: + +Checking for duplicate keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.21.0 + +Users can use the ``validate`` argument to automatically check whether there +are unexpected duplicates in their merge keys. Key uniqueness is checked before +merge operations and so should protect against memory overflows. Checking key +uniqueness is also a good way to ensure user data structures are as expected. + +In the following example, there are duplicate values of ``B`` in the right +``DataFrame``. As this is not a one-to-one merge -- as specified in the +``validate`` argument -- an exception will be raised. + + +.. ipython:: python + + left = pd.DataFrame({'A' : [1,2], 'B' : [1, 2]}) + right = pd.DataFrame({'A' : [4,5,6], 'B': [2, 2, 2]}) + +.. code-block:: ipython + + In [53]: result = pd.merge(left, right, on='B', how='outer', validate="one_to_one") + ... + MergeError: Merge keys are not unique in right dataset; not a one-to-one merge + +If the user is aware of the duplicates in the right ``DataFrame`` but wants to +ensure there are no duplicates in the left DataFrame, one can use the +``validate='one_to_many'`` argument instead, which will not raise an exception. + +.. ipython:: python + + pd.merge(left, right, on='B', how='outer', validate="one_to_many") + .. _merging.indicator: The merge indicator ~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.17.0 - -``merge`` now accepts the argument ``indicator``. If ``True``, a Categorical-type column called ``_merge`` will be added to the output object that takes on values: +:func:`~pandas.merge` accepts the argument ``indicator``. If ``True``, a +Categorical-type column called ``_merge`` will be added to the output object +that takes on values: =================================== ================ Observation Origin ``_merge`` value @@ -746,14 +811,92 @@ The ``indicator`` argument will also accept string arguments, in which case the pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column') +.. _merging.dtypes: + +Merge Dtypes +~~~~~~~~~~~~ + +.. versionadded:: 0.19.0 + +Merging will preserve the dtype of the join keys. + +.. ipython:: python + + left = pd.DataFrame({'key': [1], 'v1': [10]}) + left + right = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]}) + right + +We are able to preserve the join keys: + +.. ipython:: python + + pd.merge(left, right, how='outer') + pd.merge(left, right, how='outer').dtypes + +Of course if you have missing values that are introduced, then the +resulting dtype will be upcast. + +.. ipython:: python + + pd.merge(left, right, how='outer', on='key') + pd.merge(left, right, how='outer', on='key').dtypes + +.. versionadded:: 0.20.0 + +Merging will preserve ``category`` dtypes of the mergands. See also the section on :ref:`categoricals `. + +The left frame. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,))) + X = X.astype(CategoricalDtype(categories=['foo', 'bar'])) + + left = pd.DataFrame({'X': X, + 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) + left + left.dtypes + +The right frame. + +.. ipython:: python + + right = pd.DataFrame({ + 'X': pd.Series(['foo', 'bar'], + dtype=CategoricalDtype(['foo', 'bar'])), + 'Z': [1, 2] + }) + right + right.dtypes + +The merged result: + +.. ipython:: python + + result = pd.merge(left, right, how='outer') + result + result.dtypes + +.. note:: + + The category dtypes must be *exactly* the same, meaning the same categories and the ordered attribute. + Otherwise the result will coerce to ``object`` dtype. + +.. note:: + + Merging on ``category`` dtypes that are the same can be quite performant compared to ``object`` dtype merging. + .. _merging.join.index: Joining on index ~~~~~~~~~~~~~~~~ -``DataFrame.join`` is a convenient method for combining the columns of two -potentially differently-indexed DataFrames into a single result DataFrame. Here -is a very basic example: +:meth:`DataFrame.join` is a convenient method for combining the columns of two +potentially differently-indexed ``DataFrames`` into a single result +``DataFrame``. Here is a very basic example: .. ipython:: python @@ -787,6 +930,8 @@ is a very basic example: labels=['left', 'right'], vertical=False); plt.close('all'); +The same as above, but with ``how='inner'``. + .. ipython:: python result = left.join(right, how='inner') @@ -830,10 +975,10 @@ indexes: Joining key columns on an index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``join`` takes an optional ``on`` argument which may be a column or multiple -column names, which specifies that the passed DataFrame is to be aligned on -that column in the DataFrame. These two function calls are completely -equivalent: +:meth:`~DataFrame.join` takes an optional ``on`` argument which may be a column +or multiple column names, which specifies that the passed ``DataFrame`` is to be +aligned on that column in the ``DataFrame``. These two function calls are +completely equivalent: :: @@ -842,8 +987,8 @@ equivalent: how='left', sort=False) Obviously you can choose whichever form you find more convenient. For -many-to-one joins (where one of the DataFrame's is already indexed by the join -key), using ``join`` may be more convenient. Here is a simple example: +many-to-one joins (where one of the ``DataFrame``'s is already indexed by the +join key), using ``join`` may be more convenient. Here is a simple example: .. ipython:: python @@ -935,8 +1080,6 @@ As you can see, this drops any rows where there was no match. Joining a single Index to a Multi-index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - You can join a singly-indexed ``DataFrame`` with a level of a multi-indexed ``DataFrame``. The level will match on the name of the index of the singly-indexed frame against a level name of the multi-indexed frame. @@ -982,19 +1125,25 @@ This is equivalent but less verbose and more memory efficient / faster than this Joining with two multi-indexes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This is not Implemented via ``join`` at-the-moment, however it can be done using the following. +As of Pandas 0.23.1 the :func:`Dataframe.join` can be used to join multi-indexed ``Dataframe`` instances on the overlaping index levels .. ipython:: python - index = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), + index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), ('K1', 'X2')], names=['key', 'X']) left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, - index=index) + index=index_left) - result = pd.merge(left.reset_index(), right.reset_index(), - on=['key'], how='inner').set_index(['key','X','Y']) + index_right = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'), + ('K2', 'Y2'), ('K2', 'Y3')], + names=['key', 'Y']) + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}, + index=index_right) + + left.join(right) .. ipython:: python :suppress: @@ -1004,11 +1153,68 @@ This is not Implemented via ``join`` at-the-moment, however it can be done using labels=['left', 'right'], vertical=False); plt.close('all'); +For earlier versions it can be done using the following. + +.. ipython:: python + + pd.merge(left.reset_index(), right.reset_index(), + on=['key'], how='inner').set_index(['key','X','Y']) + +.. _merging.merge_on_columns_and_levels: + +Merging on a combination of columns and index levels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.22 + +Strings passed as the ``on``, ``left_on``, and ``right_on`` parameters +may refer to either column names or index level names. This enables merging +``DataFrame`` instances on a combination of index levels and columns without +resetting indexes. + +.. ipython:: python + + left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key2': ['K0', 'K1', 'K0', 'K1']}, + index=left_index) + + right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') + + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3'], + 'key2': ['K0', 'K0', 'K0', 'K1']}, + index=right_index) + + result = left.merge(right, on=['key1', 'key2']) + +.. ipython:: python + :suppress: + + @savefig merge_on_index_and_column.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. note:: + + When DataFrames are merged on a string that matches an index level in both + frames, the index level is preserved as an index level in the resulting + DataFrame. + +.. note:: + + If a string matches both a column name and an index level name, then a + warning is issued and the column takes precedence. This will result in an + ambiguity error in a future version. + Overlapping value columns ~~~~~~~~~~~~~~~~~~~~~~~~~ The merge ``suffixes`` argument takes a tuple of list of strings to append to -overlapping column names in the input DataFrames to disambiguate the result +overlapping column names in the input ``DataFrame``s to disambiguate the result columns: .. ipython:: python @@ -1038,7 +1244,7 @@ columns: labels=['left', 'right'], vertical=False); plt.close('all'); -``DataFrame.join`` has ``lsuffix`` and ``rsuffix`` arguments which behave +:meth:`DataFrame.join` has ``lsuffix`` and ``rsuffix`` arguments which behave similarly. .. ipython:: python @@ -1060,8 +1266,8 @@ similarly. Joining multiple DataFrame or Panel objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A list or tuple of DataFrames can also be passed to ``DataFrame.join`` to join -them together on their indexes. The same is true for ``Panel.join``. +A list or tuple of ``DataFrames`` can also be passed to :meth:`~DataFrame.join` +to join them together on their indexes. .. ipython:: python @@ -1082,8 +1288,8 @@ Merging together values within Series or DataFrame columns ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Another fairly common situation is to have two like-indexed (or similarly -indexed) Series or DataFrame objects and wanting to "patch" values in one -object from values for matching indices in the other. Here is an example: +indexed) ``Series`` or ``DataFrame`` objects and wanting to "patch" values in +one object from values for matching indices in the other. Here is an example: .. ipython:: python @@ -1092,7 +1298,7 @@ object from values for matching indices in the other. Here is an example: df2 = pd.DataFrame([[-42.6, np.nan, -8.2], [-5., 1.6, 4]], index=[1, 2]) -For this, use the ``combine_first`` method: +For this, use the :meth:`~DataFrame.combine_first` method: .. ipython:: python @@ -1106,9 +1312,9 @@ For this, use the ``combine_first`` method: labels=['df1', 'df2'], vertical=False); plt.close('all'); -Note that this method only takes values from the right DataFrame if they are -missing in the left DataFrame. A related method, ``update``, alters non-NA -values inplace: +Note that this method only takes values from the right ``DataFrame`` if they are +missing in the left ``DataFrame``. A related method, :meth:`~DataFrame.update`, +alters non-NA values inplace: .. ipython:: python :suppress: @@ -1159,12 +1365,16 @@ Merging AsOf .. versionadded:: 0.19.0 -A :func:`merge_asof` is similar to an ordered left-join except that we match on nearest key rather than equal keys. For each row in the ``left`` DataFrame, we select the last row in the ``right`` DataFrame whose ``on`` key is less than the left's key. Both DataFrames must be sorted by the key. +A :func:`merge_asof` is similar to an ordered left-join except that we match on +nearest key rather than equal keys. For each row in the ``left`` ``DataFrame``, +we select the last row in the ``right`` ``DataFrame`` whose ``on`` key is less +than the left's key. Both DataFrames must be sorted by the key. -Optionally an asof merge can perform a group-wise merge. This matches the ``by`` key equally, -in addition to the nearest match on the ``on`` key. +Optionally an asof merge can perform a group-wise merge. This matches the +``by`` key equally, in addition to the nearest match on the ``on`` key. -For example; we might have ``trades`` and ``quotes`` and we want to ``asof`` merge them. +For example; we might have ``trades`` and ``quotes`` and we want to ``asof`` +merge them. .. ipython:: python @@ -1213,7 +1423,7 @@ By default we are taking the asof of the quotes. on='time', by='ticker') -We only asof within ``2ms`` betwen the quote time and the trade time. +We only asof within ``2ms`` between the quote time and the trade time. .. ipython:: python @@ -1222,9 +1432,9 @@ We only asof within ``2ms`` betwen the quote time and the trade time. by='ticker', tolerance=pd.Timedelta('2ms')) -We only asof within ``10ms`` betwen the quote time and the trade time and we exclude exact matches on time. -Note that though we exclude the exact matches (of the quotes), prior quotes DO propogate to that point -in time. +We only asof within ``10ms`` between the quote time and the trade time and we +exclude exact matches on time. Note that though we exclude the exact matches +(of the quotes), prior quotes **do** propagate to that point in time. .. ipython:: python diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 37930775885e3..ee0e2c7462f66 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -7,7 +7,7 @@ import pandas as pd pd.options.display.max_rows=15 import matplotlib - matplotlib.style.use('ggplot') + # matplotlib.style.use('default') import matplotlib.pyplot as plt .. _missing_data: @@ -27,7 +27,7 @@ pandas. NumPy will soon be able to provide a native NA type solution (similar to R) performant enough to be used in pandas. -See the :ref:`cookbook` for some advanced strategies +See the :ref:`cookbook` for some advanced strategies. Missing data basics ------------------- @@ -36,14 +36,14 @@ When / why does data become missing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some might quibble over our usage of *missing*. By "missing" we simply mean -**null** or "not present for whatever reason". Many data sets simply arrive with +**NA** ("not available") or "not present for whatever reason". Many data sets simply arrive with missing data, either because it exists and was not collected or it never existed. For example, in a collection of financial time series, some of the time series might start on different dates. Thus, values prior to the start date would generally be marked as missing. In pandas, one of the most common ways that missing data is **introduced** into -a data set is by reindexing. For example +a data set is by reindexing. For example: .. ipython:: python @@ -63,32 +63,31 @@ to handling missing data. While ``NaN`` is the default missing value marker for reasons of computational speed and convenience, we need to be able to easily detect this value with data of different types: floating point, integer, boolean, and general object. In many cases, however, the Python ``None`` will -arise and we wish to also consider that "missing" or "null". +arise and we wish to also consider that "missing" or "not available" or "NA". .. note:: - Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "null" in computations. This is no longer the case by - default; use the ``mode.use_inf_as_null`` option to recover it. + If you want to consider ``inf`` and ``-inf`` to be "NA" in computations, + you can set ``pandas.options.mode.use_inf_as_na = True``. -.. _missing.isnull: +.. _missing.isna: To make detecting missing values easier (and across different array dtypes), -pandas provides the :func:`~pandas.core.common.isnull` and -:func:`~pandas.core.common.notnull` functions, which are also methods on +pandas provides the :func:`isna` and +:func:`notna` functions, which are also methods on ``Series`` and ``DataFrame`` objects: .. ipython:: python df2['one'] - pd.isnull(df2['one']) - df2['four'].notnull() - df2.isnull() + pd.isna(df2['one']) + df2['four'].notna() + df2.isna() .. warning:: - One has to be mindful that in python (and numpy), the ``nan's`` don't compare equal, but ``None's`` **do**. - Note that Pandas/numpy uses the fact that ``np.nan != np.nan``, and treats ``None`` like ``np.nan``. + One has to be mindful that in Python (and NumPy), the ``nan's`` don't compare equal, but ``None's`` **do**. + Note that pandas/NumPy uses the fact that ``np.nan != np.nan``, and treats ``None`` like ``np.nan``. .. ipython:: python @@ -105,7 +104,7 @@ Datetimes --------- For datetime64[ns] types, ``NaT`` represents missing values. This is a pseudo-native -sentinel value that can be represented by numpy in a singular dtype (datetime64[ns]). +sentinel value that can be represented by NumPy in a singular dtype (datetime64[ns]). pandas objects provide intercompatibility between ``NaT`` and ``NaN``. .. ipython:: python @@ -170,10 +169,10 @@ The descriptive statistics and computational methods discussed in the ` and :ref:`here `) are all written to account for missing data. For example: -* When summing data, NA (missing) values will be treated as zero -* If the data are all NA, the result will be NA +* When summing data, NA (missing) values will be treated as zero. +* If the data are all NA, the result will be NA. * Methods like **cumsum** and **cumprod** ignore NA values, but preserve them - in the resulting arrays + in the resulting arrays. .. ipython:: python @@ -182,6 +181,43 @@ account for missing data. For example: df.mean(1) df.cumsum() + +.. _missing_data.numeric_sum: + +Sum/Prod of Empties/Nans +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + This behavior is now standard as of v0.21.0; previously sum/prod would give different + results if the ``bottleneck`` package was installed. + See the :ref:`v0.21.0 whatsnew `. + +With ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, the result will be all-``NaN``. + +.. ipython:: python + + s = pd.Series([np.nan]) + + s.sum() + +Summing over an empty ``Series`` will return ``NaN``: + +.. ipython:: python + + pd.Series([]).sum() + +.. warning:: + + These behaviors differ from the default in ``numpy`` where an empty sum returns zero. + + .. ipython:: python + + np.nansum(np.array([np.nan])) + np.nansum(np.array([])) + + + NA values in GroupBy ~~~~~~~~~~~~~~~~~~~~ @@ -206,7 +242,7 @@ with missing data. Filling missing values: fillna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The **fillna** function can "fill in" NA values with non-null data in a couple +The **fillna** function can "fill in" NA values with non-NA data in a couple of ways, which we illustrate: **Replace NA with a scalar value** @@ -215,12 +251,12 @@ of ways, which we illustrate: df2 df2.fillna(0) - df2['four'].fillna('missing') + df2['one'].fillna('missing') **Fill gaps forward or backward** Using the same filling arguments as :ref:`reindexing `, we -can propagate non-null values forward or backward: +can propagate non-NA values forward or backward: .. ipython:: python @@ -264,8 +300,6 @@ and ``bfill()`` is equivalent to ``fillna(method='bfill')`` Filling with a PandasObject ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.12 - You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series must match the columns of the frame you wish to fill. The use case of this is to fill a DataFrame with the mean of that column. @@ -281,14 +315,12 @@ use case of this is to fill a DataFrame with the mean of that column. dff.fillna(dff.mean()) dff.fillna(dff.mean()['B':'C']) -.. versionadded:: 0.13 - Same result as above, but is aligning the 'fill' value which is a Series in this case. .. ipython:: python - dff.where(pd.notnull(dff), dff.mean(), axis='columns') + dff.where(pd.notna(dff), dff.mean(), axis='columns') .. _missing_data.dropna: @@ -297,7 +329,7 @@ Dropping axis labels with missing data: dropna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You may wish to simply exclude labels from a data set which refer to missing -data. To do this, use the **dropna** method: +data. To do this, use the :meth:`~DataFrame.dropna` method: .. ipython:: python :suppress: @@ -312,7 +344,7 @@ data. To do this, use the **dropna** method: df.dropna(axis=1) df['one'].dropna() -Series.dropna is a simpler method as it only has one axis to consider. +An equivalent :meth:`~Series.dropna` method is available for Series. DataFrame.dropna has considerably more options than Series.dropna, which can be examined :ref:`in the API `. @@ -321,17 +353,12 @@ examined :ref:`in the API `. Interpolation ~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - - :meth:`~pandas.DataFrame.interpolate`, and :meth:`~pandas.Series.interpolate` have - revamped interpolation methods and functionality. - -.. versionadded:: 0.17.0 +.. versionadded:: 0.21.0 - The ``limit_direction`` keyword argument was added. + The ``limit_area`` keyword argument was added. -Both Series and Dataframe objects have an ``interpolate`` method that, by default, -performs linear interpolation at missing datapoints. +Both Series and DataFrame objects have an :meth:`~DataFrame.interpolate` method +that, by default, performs linear interpolation at missing datapoints. .. ipython:: python :suppress: @@ -389,7 +416,7 @@ You can also interpolate with a DataFrame: df.interpolate() The ``method`` argument gives access to fancier interpolation methods. -If you have scipy_ installed, you can set pass the name of a 1-d interpolation routine to ``method``. +If you have scipy_ installed, you can pass the name of a 1-d interpolation routine to ``method``. You'll want to consult the full scipy interpolation documentation_ and reference guide_ for details. The appropriate interpolation method will depend on the type of data you are working with. @@ -397,7 +424,7 @@ The appropriate interpolation method will depend on the type of data you are wor ``method='quadratic'`` may be appropriate. * If you have values approximating a cumulative distribution function, then ``method='pchip'`` should work well. -* To fill missing values with goal of smooth plotting, use ``method='akima'``. +* To fill missing values with goal of smooth plotting, consider ``method='akima'``. .. warning:: @@ -454,39 +481,60 @@ at the new values. .. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation .. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html +.. _missing_data.interp_limits: + Interpolation Limits ^^^^^^^^^^^^^^^^^^^^ Like other pandas fill methods, ``interpolate`` accepts a ``limit`` keyword -argument. Use this argument to limit the number of consecutive interpolations, -keeping ``NaN`` values for interpolations that are too far from the last valid -observation: +argument. Use this argument to limit the number of consecutive ``NaN`` values +filled since the last valid observation: .. ipython:: python - ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13]) - ser.interpolate(limit=2) + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13, np.nan, np.nan]) + + # fill all consecutive values in a forward direction + ser.interpolate() -By default, ``limit`` applies in a forward direction, so that only ``NaN`` -values after a non-``NaN`` value can be filled. If you provide ``'backward'`` or -``'both'`` for the ``limit_direction`` keyword argument, you can fill ``NaN`` -values before non-``NaN`` values, or both before and after non-``NaN`` values, -respectively: + # fill one consecutive value in a forward direction + ser.interpolate(limit=1) -.. ipython:: python +By default, ``NaN`` values are filled in a ``forward`` direction. Use +``limit_direction`` parameter to fill ``backward`` or from ``both`` directions. - ser.interpolate(limit=1) # limit_direction == 'forward' +.. ipython:: python + # fill one consecutive value backwards ser.interpolate(limit=1, limit_direction='backward') + # fill one consecutive value in both directions ser.interpolate(limit=1, limit_direction='both') + # fill all consecutive values in both directions + ser.interpolate(limit_direction='both') + +By default, ``NaN`` values are filled whether they are inside (surrounded by) +existing valid values, or outside existing valid values. Introduced in v0.23 +the ``limit_area`` parameter restricts filling to either inside or outside values. + +.. ipython:: python + + # fill one consecutive inside value in both directions + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + + # fill all consecutive outside values backward + ser.interpolate(limit_direction='backward', limit_area='outside') + + # fill all consecutive outside values in both directions + ser.interpolate(limit_direction='both', limit_area='outside') + .. _missing_data.replace: Replacing Generic Values ~~~~~~~~~~~~~~~~~~~~~~~~ -Often times we want to replace arbitrary values with other values. New in v0.8 -is the ``replace`` method in Series/DataFrame that provides an efficient yet +Often times we want to replace arbitrary values with other values. The +``replace`` method in Series/DataFrame provides an efficient yet flexible way to perform such replacements. For a Series, you can replace a single value or a list of values by another @@ -537,10 +585,10 @@ String/Regular Expression Replacement backslashes than strings without this prefix. Backslashes in raw strings will be interpreted as an escaped backslash, e.g., ``r'\' == '\\'``. You should `read about them - `__ + `__ if this is unclear. -Replace the '.' with ``nan`` (str -> str) +Replace the '.' with ``NaN`` (str -> str): .. ipython:: python @@ -549,58 +597,58 @@ Replace the '.' with ``nan`` (str -> str) df.replace('.', np.nan) Now do it with a regular expression that removes surrounding whitespace -(regex -> regex) +(regex -> regex): .. ipython:: python df.replace(r'\s*\.\s*', np.nan, regex=True) -Replace a few different values (list -> list) +Replace a few different values (list -> list): .. ipython:: python df.replace(['a', '.'], ['b', np.nan]) -list of regex -> list of regex +list of regex -> list of regex: .. ipython:: python df.replace([r'\.', r'(a)'], ['dot', '\1stuff'], regex=True) -Only search in column ``'b'`` (dict -> dict) +Only search in column ``'b'`` (dict -> dict): .. ipython:: python df.replace({'b': '.'}, {'b': np.nan}) Same as the previous example, but use a regular expression for -searching instead (dict of regex -> dict) +searching instead (dict of regex -> dict): .. ipython:: python df.replace({'b': r'\s*\.\s*'}, {'b': np.nan}, regex=True) -You can pass nested dictionaries of regular expressions that use ``regex=True`` +You can pass nested dictionaries of regular expressions that use ``regex=True``: .. ipython:: python df.replace({'b': {'b': r''}}, regex=True) -or you can pass the nested dictionary like so +Alternatively, you can pass the nested dictionary like so: .. ipython:: python df.replace(regex={'b': {r'\s*\.\s*': np.nan}}) You can also use the group of a regular expression match when replacing (dict -of regex -> dict of regex), this works for lists as well +of regex -> dict of regex), this works for lists as well. .. ipython:: python df.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, regex=True) You can pass a list of regular expressions, of which those that match -will be replaced with a scalar (list of regex -> regex) +will be replaced with a scalar (list of regex -> regex). .. ipython:: python @@ -609,7 +657,7 @@ will be replaced with a scalar (list of regex -> regex) All of the regular expression examples can also be passed with the ``to_replace`` argument as the ``regex`` argument. In this case the ``value`` argument must be passed explicitly by name or ``regex`` must be a nested -dictionary. The previous example, in this case, would then be +dictionary. The previous example, in this case, would then be: .. ipython:: python @@ -626,7 +674,7 @@ want to use a regular expression. Numeric Replacement ~~~~~~~~~~~~~~~~~~~ -Similar to ``DataFrame.fillna`` +The :meth:`~DataFrame.replace` method is similar to :meth:`~DataFrame.fillna`. .. ipython:: python @@ -634,7 +682,7 @@ Similar to ``DataFrame.fillna`` df[np.random.rand(df.shape[0]) > 0.5] = 1.5 df.replace(1.5, np.nan) -Replacing more than one value via lists works as well +Replacing more than one value is possible by passing a list. .. ipython:: python @@ -642,7 +690,7 @@ Replacing more than one value via lists works as well df.replace([1.5, df00], [np.nan, 'a']) df[1].dtype -You can also operate on the DataFrame in place +You can also operate on the DataFrame in place: .. ipython:: python @@ -652,7 +700,7 @@ You can also operate on the DataFrame in place When replacing multiple ``bool`` or ``datetime64`` objects, the first argument to ``replace`` (``to_replace``) must match the type of the value - being replaced type. For example, + being replaced. For example, .. code-block:: python @@ -680,9 +728,9 @@ Missing data casting rules and indexing While pandas supports storing arrays of integer and boolean type, these types are not capable of storing missing data. Until we can switch to using a native -NA type in NumPy, we've established some "casting rules" when reindexing will -cause missing data to be introduced into, say, a Series or DataFrame. Here they -are: +NA type in NumPy, we've established some "casting rules". When a reindexing +operation introduces missing data, the Series will be cast according to the +rules introduced in the table below. .. csv-table:: :header: "data type", "Cast to" diff --git a/doc/source/options.rst b/doc/source/options.rst index 77cac6d495d13..a82be4d84bf3f 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -28,7 +28,7 @@ You can get/set options directly as attributes of the top-level ``options`` attr pd.options.display.max_rows = 999 pd.options.display.max_rows -There is also an API composed of 5 relevant functions, available directly from the ``pandas`` +The API is composed of 5 relevant functions, available directly from the ``pandas`` namespace: - :func:`~pandas.get_option` / :func:`~pandas.set_option` - get/set the value of a single option. @@ -37,10 +37,10 @@ namespace: - :func:`~pandas.option_context` - execute a codeblock with a set of options that revert to prior settings after execution. -**Note:** developers can check out pandas/core/config.py for more info. +**Note:** Developers can check out `pandas/core/config.py `_ for more information. All of the functions above accept a regexp pattern (``re.search`` style) as an argument, -and so passing in a substring will work - as long as it is unambiguous : +and so passing in a substring will work - as long as it is unambiguous: .. ipython:: python @@ -78,8 +78,9 @@ with no argument ``describe_option`` will print out the descriptions for all ava Getting and Setting Options --------------------------- -As described above, ``get_option()`` and ``set_option()`` are available from the -pandas namespace. To change an option, call ``set_option('option regex', new_value)`` +As described above, :func:`~pandas.get_option` and :func:`~pandas.set_option` +are available from the pandas namespace. To change an option, call +``set_option('option regex', new_value)``. .. ipython:: python @@ -87,7 +88,7 @@ pandas namespace. To change an option, call ``set_option('option regex', new_va pd.set_option('mode.sim_interactive', True) pd.get_option('mode.sim_interactive') -**Note:** that the option 'mode.sim_interactive' is mostly used for debugging purposes. +**Note:** The option 'mode.sim_interactive' is mostly used for debugging purposes. All options also have a default value, and you can use ``reset_option`` to do just that: @@ -163,7 +164,7 @@ lines are replaced by an ellipsis. df pd.reset_option('max_rows') -``display.expand_frame_repr`` allows for the the representation of +``display.expand_frame_repr`` allows for the representation of dataframes to stretch across pages, wrapped over the full column vs row-wise. .. ipython:: python @@ -221,7 +222,7 @@ can specify the option ``df.info(null_counts=True)`` to override on showing a pa .. ipython:: python - df =pd.DataFrame(np.random.choice([0,1,np.nan], size=(10,10))) + df = pd.DataFrame(np.random.choice([0,1,np.nan], size=(10,10))) df pd.set_option('max_info_rows', 11) df.info() @@ -229,8 +230,8 @@ can specify the option ``df.info(null_counts=True)`` to override on showing a pa df.info() pd.reset_option('max_info_rows') -``display.precision`` sets the output display precision in terms of decimal places. This is only a -suggestion. +``display.precision`` sets the output display precision in terms of decimal places. +This is only a suggestion. .. ipython:: python @@ -241,7 +242,7 @@ suggestion. df ``display.chop_threshold`` sets at what level pandas rounds to zero when -it displays a Series of DataFrame. Note, this does not effect the +it displays a Series of DataFrame. This setting does not change the precision at which the number is stored. .. ipython:: python @@ -254,7 +255,7 @@ precision at which the number is stored. pd.reset_option('chop_threshold') ``display.colheader_justify`` controls the justification of the headers. -Options are 'right', and 'left'. +The options are 'right', and 'left'. .. ipython:: python @@ -273,151 +274,172 @@ Options are 'right', and 'left'. Available Options ----------------- -========================== ============ ================================== -Option Default Function -========================== ============ ================================== -display.chop_threshold None If set to a float value, all float - values smaller then the given - threshold will be displayed as - exactly 0 by repr and friends. -display.colheader_justify right Controls the justification of - column headers. used by DataFrameFormatter. -display.column_space 12 No description available. -display.date_dayfirst False When True, prints and parses dates - with the day first, eg 20/01/2005 -display.date_yearfirst False When True, prints and parses dates - with the year first, eg 2005/01/20 -display.encoding UTF-8 Defaults to the detected encoding - of the console. Specifies the encoding - to be used for strings returned by - to_string, these are generally strings - meant to be displayed on the console. -display.expand_frame_repr True Whether to print out the full DataFrame - repr for wide DataFrames across - multiple lines, `max_columns` is - still respected, but the output will - wrap-around across multiple "pages" - if its width exceeds `display.width`. -display.float_format None The callable should accept a floating - point number and return a string with - the desired format of the number. - This is used in some places like - SeriesFormatter. - See core.format.EngFormatter for an example. -display.height 60 Deprecated. Use `display.max_rows` instead. -display.large_repr truncate For DataFrames exceeding max_rows/max_cols, - the repr (and HTML repr) can show - a truncated table (the default from 0.13), - or switch to the view from df.info() - (the behaviour in earlier versions of pandas). - allowable settings, ['truncate', 'info'] -display.latex.repr False Whether to produce a latex DataFrame - representation for jupyter frontends - that support it. -display.latex.escape True Escapes special caracters in Dataframes, when - using the to_latex method. -display.latex.longtable False Specifies if the to_latex method of a Dataframe - uses the longtable format. -display.line_width 80 Deprecated. Use `display.width` instead. -display.max_columns 20 max_rows and max_columns are used - in __repr__() methods to decide if - to_string() or info() is used to - render an object to a string. In - case python/IPython is running in - a terminal this can be set to 0 and - pandas will correctly auto-detect - the width the terminal and swap to - a smaller format in case all columns - would not fit vertically. The IPython - notebook, IPython qtconsole, or IDLE - do not run in a terminal and hence - it is not possible to do correct - auto-detection. 'None' value means - unlimited. -display.max_colwidth 50 The maximum width in characters of - a column in the repr of a pandas - data structure. When the column overflows, - a "..." placeholder is embedded in - the output. -display.max_info_columns 100 max_info_columns is used in DataFrame.info - method to decide if per column information - will be printed. -display.max_info_rows 1690785 df.info() will usually show null-counts - for each column. For large frames - this can be quite slow. max_info_rows - and max_info_cols limit this null - check only to frames with smaller - dimensions then specified. -display.max_rows 60 This sets the maximum number of rows - pandas should output when printing - out various output. For example, - this value determines whether the - repr() for a dataframe prints out - fully or just a summary repr. - 'None' value means unlimited. -display.max_seq_items 100 when pretty-printing a long sequence, - no more then `max_seq_items` will - be printed. If items are omitted, - they will be denoted by the addition - of "..." to the resulting string. - If set to None, the number of items - to be printed is unlimited. -display.memory_usage True This specifies if the memory usage of - a DataFrame should be displayed when the - df.info() method is invoked. -display.multi_sparse True "Sparsify" MultiIndex display (don't - display repeated elements in outer - levels within groups) -display.notebook_repr_html True When True, IPython notebook will - use html representation for - pandas objects (if it is available). -display.pprint_nest_depth 3 Controls the number of nested levels - to process when pretty-printing -display.precision 6 Floating point output precision in - terms of number of places after the - decimal, for regular formatting as well - as scientific notation. Similar to - numpy's ``precision`` print option -display.show_dimensions truncate Whether to print out dimensions - at the end of DataFrame repr. - If 'truncate' is specified, only - print out the dimensions if the - frame is truncated (e.g. not display - all rows and/or columns) -display.width 80 Width of the display in characters. - In case python/IPython is running in - a terminal this can be set to None - and pandas will correctly auto-detect - the width. Note that the IPython notebook, - IPython qtconsole, or IDLE do not run in a - terminal and hence it is not possible - to correctly detect the width. -html.border 1 A ``border=value`` attribute is - inserted in the ```` tag - for the DataFrame HTML repr. -io.excel.xls.writer xlwt The default Excel writer engine for - 'xls' files. -io.excel.xlsm.writer openpyxl The default Excel writer engine for - 'xlsm' files. Available options: - 'openpyxl' (the default). -io.excel.xlsx.writer openpyxl The default Excel writer engine for - 'xlsx' files. -io.hdf.default_format None default format writing format, if - None, then put will default to - 'fixed' and append will default to - 'table' -io.hdf.dropna_table True drop ALL nan rows when appending - to a table -mode.chained_assignment warn Raise an exception, warn, or no - action if trying to use chained - assignment, The default is warn -mode.sim_interactive False Whether to simulate interactive mode - for purposes of testing -mode.use_inf_as_null False True means treat None, NaN, -INF, - INF as null (old way), False means - None and NaN are null, but INF, -INF - are not null (new way). -========================== ============ ================================== +======================================= ============ ================================== +Option Default Function +======================================= ============ ================================== +display.chop_threshold None If set to a float value, all float + values smaller then the given + threshold will be displayed as + exactly 0 by repr and friends. +display.colheader_justify right Controls the justification of + column headers. used by DataFrameFormatter. +display.column_space 12 No description available. +display.date_dayfirst False When True, prints and parses dates + with the day first, eg 20/01/2005 +display.date_yearfirst False When True, prints and parses dates + with the year first, eg 2005/01/20 +display.encoding UTF-8 Defaults to the detected encoding + of the console. Specifies the encoding + to be used for strings returned by + to_string, these are generally strings + meant to be displayed on the console. +display.expand_frame_repr True Whether to print out the full DataFrame + repr for wide DataFrames across + multiple lines, `max_columns` is + still respected, but the output will + wrap-around across multiple "pages" + if its width exceeds `display.width`. +display.float_format None The callable should accept a floating + point number and return a string with + the desired format of the number. + This is used in some places like + SeriesFormatter. + See core.format.EngFormatter for an example. +display.large_repr truncate For DataFrames exceeding max_rows/max_cols, + the repr (and HTML repr) can show + a truncated table (the default), + or switch to the view from df.info() + (the behaviour in earlier versions of pandas). + allowable settings, ['truncate', 'info'] +display.latex.repr False Whether to produce a latex DataFrame + representation for jupyter frontends + that support it. +display.latex.escape True Escapes special characters in DataFrames, when + using the to_latex method. +display.latex.longtable False Specifies if the to_latex method of a DataFrame + uses the longtable format. +display.latex.multicolumn True Combines columns when using a MultiIndex +display.latex.multicolumn_format 'l' Alignment of multicolumn labels +display.latex.multirow False Combines rows when using a MultiIndex. + Centered instead of top-aligned, + separated by clines. +display.max_columns 20 max_rows and max_columns are used + in __repr__() methods to decide if + to_string() or info() is used to + render an object to a string. In + case python/IPython is running in + a terminal this can be set to 0 and + pandas will correctly auto-detect + the width the terminal and swap to + a smaller format in case all columns + would not fit vertically. The IPython + notebook, IPython qtconsole, or IDLE + do not run in a terminal and hence + it is not possible to do correct + auto-detection. 'None' value means + unlimited. +display.max_colwidth 50 The maximum width in characters of + a column in the repr of a pandas + data structure. When the column overflows, + a "..." placeholder is embedded in + the output. +display.max_info_columns 100 max_info_columns is used in DataFrame.info + method to decide if per column information + will be printed. +display.max_info_rows 1690785 df.info() will usually show null-counts + for each column. For large frames + this can be quite slow. max_info_rows + and max_info_cols limit this null + check only to frames with smaller + dimensions then specified. +display.max_rows 60 This sets the maximum number of rows + pandas should output when printing + out various output. For example, + this value determines whether the + repr() for a dataframe prints out + fully or just a summary repr. + 'None' value means unlimited. +display.max_seq_items 100 when pretty-printing a long sequence, + no more then `max_seq_items` will + be printed. If items are omitted, + they will be denoted by the addition + of "..." to the resulting string. + If set to None, the number of items + to be printed is unlimited. +display.memory_usage True This specifies if the memory usage of + a DataFrame should be displayed when the + df.info() method is invoked. +display.multi_sparse True "Sparsify" MultiIndex display (don't + display repeated elements in outer + levels within groups) +display.notebook_repr_html True When True, IPython notebook will + use html representation for + pandas objects (if it is available). +display.pprint_nest_depth 3 Controls the number of nested levels + to process when pretty-printing +display.precision 6 Floating point output precision in + terms of number of places after the + decimal, for regular formatting as well + as scientific notation. Similar to + numpy's ``precision`` print option +display.show_dimensions truncate Whether to print out dimensions + at the end of DataFrame repr. + If 'truncate' is specified, only + print out the dimensions if the + frame is truncated (e.g. not display + all rows and/or columns) +display.width 80 Width of the display in characters. + In case python/IPython is running in + a terminal this can be set to None + and pandas will correctly auto-detect + the width. Note that the IPython notebook, + IPython qtconsole, or IDLE do not run in a + terminal and hence it is not possible + to correctly detect the width. +display.html.table_schema False Whether to publish a Table Schema + representation for frontends that + support it. +display.html.border 1 A ``border=value`` attribute is + inserted in the ``
`` tag + for the DataFrame HTML repr. +display.html.use_mathjax True When True, Jupyter notebook will process + table contents using MathJax, rendering + mathematical expressions enclosed by the + dollar symbol. +io.excel.xls.writer xlwt The default Excel writer engine for + 'xls' files. +io.excel.xlsm.writer openpyxl The default Excel writer engine for + 'xlsm' files. Available options: + 'openpyxl' (the default). +io.excel.xlsx.writer openpyxl The default Excel writer engine for + 'xlsx' files. +io.hdf.default_format None default format writing format, if + None, then put will default to + 'fixed' and append will default to + 'table' +io.hdf.dropna_table True drop ALL nan rows when appending + to a table +io.parquet.engine None The engine to use as a default for + parquet reading and writing. If None + then try 'pyarrow' and 'fastparquet' +mode.chained_assignment warn Controls ``SettingWithCopyWarning``: + 'raise', 'warn', or None. Raise an + exception, warn, or no action if + trying to use :ref:`chained assignment `. +mode.sim_interactive False Whether to simulate interactive mode + for purposes of testing. +mode.use_inf_as_na False True means treat None, NaN, -INF, + INF as NA (old way), False means + None and NaN are null, but INF, -INF + are not NA (new way). +compute.use_bottleneck True Use the bottleneck library to accelerate + computation if it is installed. +compute.use_numexpr True Use the numexpr library to accelerate + computation if it is installed. +plotting.matplotlib.register_converters True Register custom converters with + matplotlib. Set to False to de-register. +======================================= ============ ================================== + .. _basics.console_output: @@ -460,10 +482,10 @@ Unicode Formatting Enabling this option will affect the performance for printing of DataFrame and Series (about 2 times slower). Use only when it is actually required. -Some East Asian countries use Unicode characters its width is corresponding to 2 alphabets. -If DataFrame or Series contains these characters, default output cannot be aligned properly. +Some East Asian countries use Unicode characters whose width corresponds to two Latin characters. +If a DataFrame or Series contains these characters, the default output mode may not align them properly. -.. note:: Screen captures are attached for each outputs to show the actual results. +.. note:: Screen captures are attached for each output to show the actual results. .. ipython:: python @@ -472,8 +494,9 @@ If DataFrame or Series contains these characters, default output cannot be align .. image:: _static/option_unicode01.png -Enable ``display.unicode.east_asian_width`` allows pandas to check each character's "East Asian Width" property. -These characters can be aligned properly by checking this property, but it takes longer time than standard ``len`` function. +Enabling ``display.unicode.east_asian_width`` allows pandas to check each character's "East Asian Width" property. +These characters can be aligned properly by setting this option to ``True``. However, this will result in longer render +times than the standard ``len`` function. .. ipython:: python @@ -482,9 +505,10 @@ These characters can be aligned properly by checking this property, but it takes .. image:: _static/option_unicode02.png -In addition, Unicode contains characters which width is "Ambiguous". These character's width should be either 1 or 2 depending on terminal setting or encoding. Because this cannot be distinguished from Python, ``display.unicode.ambiguous_as_wide`` option is added to handle this. +In addition, Unicode characters whose width is "Ambiguous" can either be 1 or 2 characters wide depending on the +terminal setting or encoding. The option ``display.unicode.ambiguous_as_wide`` can be used to handle the ambiguity. -By default, "Ambiguous" character's width, "¡" (inverted exclamation) in below example, is regarded as 1. +By default, an "Ambiguous" character's width, such as "¡" (inverted exclamation) in the example below, is taken to be 1. .. ipython:: python @@ -493,7 +517,10 @@ By default, "Ambiguous" character's width, "¡" (inverted exclamation) in below .. image:: _static/option_unicode03.png -Enabling ``display.unicode.ambiguous_as_wide`` lets pandas to figure these character's width as 2. Note that this option will be effective only when ``display.unicode.east_asian_width`` is enabled. Confirm starting position has been changed, but is not aligned properly because the setting is mismatched with this environment. +Enabling ``display.unicode.ambiguous_as_wide`` makes pandas interpret these characters' widths to be 2. +(Note that this option will only be effective when ``display.unicode.east_asian_width`` is enabled.) + +However, setting this option incorrectly for your terminal will cause these characters to be aligned incorrectly: .. ipython:: python @@ -507,3 +534,26 @@ Enabling ``display.unicode.ambiguous_as_wide`` lets pandas to figure these chara pd.set_option('display.unicode.east_asian_width', False) pd.set_option('display.unicode.ambiguous_as_wide', False) + +.. _options.table_schema: + +Table Schema Display +-------------------- + +.. versionadded:: 0.20.0 + +``DataFrame`` and ``Series`` will publish a Table Schema representation +by default. False by default, this can be enabled globally with the +``display.html.table_schema`` option: + +.. ipython:: python + + pd.set_option('display.html.table_schema', True) + +Only ``'display.max_rows'`` are serialized and published. + + +.. ipython:: python + :suppress: + + pd.reset_option('display.html.table_schema') diff --git a/doc/source/overview.rst b/doc/source/overview.rst index 92caeec319169..f86b1c67e6843 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -6,42 +6,42 @@ Package overview **************** -:mod:`pandas` consists of the following things +:mod:`pandas` is an open source, BSD-licensed library providing high-performance, +easy-to-use data structures and data analysis tools for the `Python `__ +programming language. + +:mod:`pandas` consists of the following elements: * A set of labeled array data structures, the primary of which are - Series and DataFrame + Series and DataFrame. * Index objects enabling both simple axis indexing and multi-level / - hierarchical axis indexing - * An integrated group by engine for aggregating and transforming data sets + hierarchical axis indexing. + * An integrated group by engine for aggregating and transforming data sets. * Date range generation (date_range) and custom date offsets enabling the - implementation of customized frequencies + implementation of customized frequencies. * Input/Output tools: loading tabular data from flat files (CSV, delimited, Excel 2003), and saving and loading pandas objects from the fast and efficient PyTables/HDF5 format. * Memory-efficient "sparse" versions of the standard data structures for storing - data that is mostly missing or mostly constant (some fixed value) - * Moving window statistics (rolling mean, rolling standard deviation, etc.) - * Static and moving window linear and `panel regression - `__ + data that is mostly missing or mostly constant (some fixed value). + * Moving window statistics (rolling mean, rolling standard deviation, etc.). -Data structures at a glance ---------------------------- +Data Structures +--------------- .. csv-table:: :header: "Dimensions", "Name", "Description" :widths: 15, 20, 50 - 1, Series, "1D labeled homogeneously-typed array" - 2, DataFrame, "General 2D labeled, size-mutable tabular structure with - potentially heterogeneously-typed columns" - 3, Panel, "General 3D labeled, also size-mutable array" + 1, "Series", "1D labeled homogeneously-typed array" + 2, "DataFrame", "General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column" -Why more than 1 data structure? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Why more than one data structure? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The best way to think about the pandas data structures is as flexible containers for lower dimensional data. For example, DataFrame is a container -for Series, and Panel is a container for DataFrame objects. We would like to be +for Series, and Series is a container for scalars. We would like to be able to insert and remove objects from these containers in a dictionary-like fashion. @@ -58,7 +58,7 @@ transformations in downstream functions. For example, with tabular data (DataFrame) it is more semantically helpful to think of the **index** (the rows) and the **columns** rather than axis 0 and -axis 1. And iterating through the columns of the DataFrame thus results in more +axis 1. Iterating through the columns of the DataFrame thus results in more readable code: :: @@ -74,8 +74,7 @@ All pandas data structures are value-mutable (the values they contain can be altered) but not always size-mutable. The length of a Series cannot be changed, but, for example, columns can be inserted into a DataFrame. However, the vast majority of methods produce new objects and leave the input data -untouched. In general, though, we like to **favor immutability** where -sensible. +untouched. In general we like to **favor immutability** where sensible. Getting Support --------------- @@ -85,36 +84,41 @@ The first stop for pandas issues and ideas is the `Github Issue Tracker pandas community experts can answer through `Stack Overflow `__. -Longer discussions occur on the `developer mailing list -`__, and commercial support -inquiries for Lambda Foundry should be sent to: support@lambdafoundry.com +Community +--------- -Credits -------- +pandas is actively supported today by a community of like-minded individuals around +the world who contribute their valuable time and energy to help make open source +pandas possible. Thanks to `all of our contributors `__. + +If you're interested in contributing, please +visit `Contributing to pandas webpage `__. -pandas development began at `AQR Capital Management `__ in -April 2008. It was open-sourced at the end of 2009. AQR continued to provide -resources for development through the end of 2011, and continues to contribute -bug reports today. +pandas is a `NumFOCUS `__ sponsored project. +This will help ensure the success of development of pandas as a world-class open-source +project, and makes it possible to `donate `__ to the project. -Since January 2012, `Lambda Foundry `__, has -been providing development resources, as well as commercial support, -training, and consulting for pandas. +Project Governance +------------------ -pandas is only made possible by a group of people around the world like you -who have contributed new code, bug reports, fixes, comments and ideas. A -complete list can be found `on Github `__. +The governance process that pandas project has used informally since its inception in 2008 is formalized in `Project Governance documents `__. +The documents clarify how decisions are made and how the various elements of our community interact, including the relationship between open source collaborative development and work that may be funded by for-profit or non-profit entities. + +Wes McKinney is the Benevolent Dictator for Life (BDFL). Development Team ----------------- +----------------- + +The list of the Core Team members and more detailed information can be found on the `people’s page `__ of the governance repo. + -pandas is a part of the PyData project. The PyData Development Team is a -collection of developers focused on the improvement of Python's data -libraries. The core team that coordinates development can be found on `Github -`__. If you're interested in contributing, please -visit the `project website `__. +Institutional Partners +---------------------- + +The information about current institutional partners can be found on `pandas website page `__. License ------- .. literalinclude:: ../../LICENSE + diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst index b5d699cad69d5..88634d7f75c63 100644 --- a/doc/source/r_interface.rst +++ b/doc/source/r_interface.rst @@ -41,15 +41,17 @@ In the remainder of this page, a few examples of explicit conversion is given. T Transferring R data sets into Python ------------------------------------ -The ``pandas2ri.ri2py`` function retrieves an R data set and converts it to the -appropriate pandas object (most likely a DataFrame): +Once the pandas conversion is activated (``pandas2ri.activate()``), many conversions +of R to pandas objects will be done automatically. For example, to obtain the 'iris' dataset as a pandas DataFrame: .. ipython:: python r.data('iris') - df_iris = pandas2ri.ri2py(r['iris']) - df_iris.head() + r['iris'].head() +If the pandas conversion was not activated, the above could also be accomplished +by explicitly converting it with the ``pandas2ri.ri2py`` function +(``pandas2ri.ri2py(r['iris'])``). Converting DataFrames into R objects ------------------------------------ @@ -65,7 +67,6 @@ DataFrames into the equivalent R object (that is, **data.frame**): print(type(r_dataframe)) print(r_dataframe) - The DataFrame's index is stored as the ``rownames`` attribute of the data.frame instance. diff --git a/doc/source/release.rst b/doc/source/release.rst index f89fec9fb86e6..8e063116cbf07 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -37,6 +37,586 @@ analysis / manipulation tool available in any language. * Binary installers on PyPI: http://pypi.python.org/pypi/pandas * Documentation: http://pandas.pydata.org +pandas 0.22.0 +------------- + +**Release date:** December 29, 2017 + +This is a major release from 0.21.1 and includes a single, API-breaking change. +We recommend that all users upgrade to this version after carefully reading the +release note. + +The only changes are: + +- The sum of an empty or all-*NA* ``Series`` is now ``0`` +- The product of an empty or all-*NA* ``Series`` is now ``1`` +- We've added a ``min_count`` parameter to ``.sum()`` and ``.prod()`` controlling + the minimum number of valid values for the result to be valid. If fewer than + ``min_count`` non-*NA* values are present, the result is *NA*. The default is + ``0``. To return ``NaN``, the 0.21 behavior, use ``min_count=1``. + +See the :ref:`v0.22.0 Whatsnew ` overview for further explanation +of all the places in the library this affects. + +pandas 0.21.1 +------------- + +**Release date:** December 12, 2017 + +This is a minor bug-fix release in the 0.21.x series and includes some small +regression fixes, bug fixes and performance improvements. We recommend that all +users upgrade to this version. + +Highlights include: + +- Temporarily restore matplotlib datetime plotting functionality. This should + resolve issues for users who relied implicitly on pandas to plot datetimes + with matplotlib. See :ref:`here `. +- Improvements to the Parquet IO functions introduced in 0.21.0. See + :ref:`here `. + +See the :ref:`v0.21.1 Whatsnew ` overview for an extensive list +of all the changes for 0.21.1. + +Thanks +~~~~~~ + +A total of 46 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +Contributors +============ + +* Aaron Critchley + +* Alex Rychyk +* Alexander Buchkovsky + +* Alexander Michael Schade + +* Chris Mazzullo +* Cornelius Riemenschneider + +* Dave Hirschfeld + +* David Fischer + +* David Stansby + +* Dror Atariah + +* Eric Kisslinger + +* Hans + +* Ingolf Becker + +* Jan Werkmann + +* Jeff Reback +* Joris Van den Bossche +* Jörg Döpfert + +* Kevin Kuhl + +* Krzysztof Chomski + +* Leif Walsh +* Licht Takeuchi +* Manraj Singh + +* Matt Braymer-Hayes + +* Michael Waskom + +* Mie~~~ + +* Peter Hoffmann + +* Robert Meyer + +* Sam Cohan + +* Sietse Brouwer + +* Sven + +* Tim Swast +* Tom Augspurger +* Wes Turner +* William Ayd + +* Yee Mey + +* bolkedebruin + +* cgohlke +* derestle-htwg + +* fjdiod + +* gabrielclow + +* gfyoung +* ghasemnaddaf + +* jbrockmendel +* jschendel +* miker985 + +* topper-123 + +pandas 0.21.0 +------------- + +**Release date:** October 27, 2017 + +This is a major release from 0.20.3 and includes a number of API changes, +deprecations, new features, enhancements, and performance improvements along +with a large number of bug fixes. We recommend that all users upgrade to this +version. + +Highlights include: + +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `. +- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying + categoricals independent of the data, see :ref:`here `. +- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `. +- Compatibility fixes for pypy, see :ref:`here `. +- Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here `. +- Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here `) and ``GroupBy.pipe`` (see :ref:`here `). +- Indexing with a list of labels, where one or more of the labels is missing, is deprecated and will raise a KeyError in a future version, see :ref:`here `. + +See the :ref:`v0.21.0 Whatsnew ` overview for an extensive list +of all enhancements and bugs that have been fixed in 0.21.0 + +Thanks +~~~~~~ + +A total of 206 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +Contributors +============ + +* 3553x + +* Aaron Barber +* Adam Gleave + +* Adam Smith + +* AdamShamlian + +* Adrian Liaw + +* Alan Velasco + +* Alan Yee + +* Alex B + +* Alex Lubbock + +* Alex Marchenko + +* Alex Rychyk + +* Amol K + +* Andreas Winkler +* Andrew + +* Andrew 亮 +* André Jonasson + +* Becky Sweger +* Berkay + +* Bob Haffner + +* Bran Yang +* Brian Tu + +* Brock Mendel + +* Carol Willing + +* Carter Green + +* Chankey Pathak + +* Chris +* Chris Billington +* Chris Filo Gorgolewski + +* Chris Kerr +* Chris M + +* Chris Mazzullo + +* Christian Prinoth +* Christian Stade-Schuldt +* Christoph Moehl + +* DSM +* Daniel Chen + +* Daniel Grady +* Daniel Himmelstein +* Dave Willmer +* David Cook +* David Gwynne +* David Read + +* Dillon Niederhut + +* Douglas Rudd +* Eric Stein + +* Eric Wieser + +* Erik Fredriksen +* Florian Wilhelm + +* Floris Kint + +* Forbidden Donut +* Gabe F + +* Giftlin + +* Giftlin Rajaiah + +* Giulio Pepe + +* Guilherme Beltramini +* Guillem Borrell + +* Hanmin Qin + +* Hendrik Makait + +* Hugues Valois +* Hussain Tamboli + +* Iva Miholic + +* Jan Novotný + +* Jan Rudolph +* Jean Helie + +* Jean-Baptiste Schiratti + +* Jean-Mathieu Deschenes +* Jeff Knupp + +* Jeff Reback +* Jeff Tratner +* JennaVergeynst +* JimStearns206 +* Joel Nothman +* John W. O'Brien +* Jon Crall + +* Jon Mease +* Jonathan J. Helmus + +* Joris Van den Bossche +* JosephWagner +* Juarez Bochi +* Julian Kuhlmann + +* Karel De Brabandere +* Kassandra Keeton + +* Keiron Pizzey + +* Keith Webber +* Kernc +* Kevin Sheppard +* Kirk Hansen + +* Licht Takeuchi + +* Lucas Kushner + +* Mahdi Ben Jelloul + +* Makarov Andrey + +* Malgorzata Turzanska + +* Marc Garcia + +* Margaret Sy + +* MarsGuy + +* Matt Bark + +* Matthew Roeschke +* Matti Picus +* Mehmet Ali "Mali" Akmanalp +* Michael Gasvoda + +* Michael Penkov + +* Milo + +* Morgan Stuart + +* Morgan243 + +* Nathan Ford + +* Nick Eubank +* Nick Garvey + +* Oleg Shteynbuk + +* P-Tillmann + +* Pankaj Pandey +* Patrick Luo +* Patrick O'Melveny +* Paul Reidy + +* Paula + +* Peter Quackenbush +* Peter Yanovich + +* Phillip Cloud +* Pierre Haessig +* Pietro Battiston +* Pradyumna Reddy Chinthala +* Prasanjit Prakash +* RobinFiveWords +* Ryan Hendrickson +* Sam Foo +* Sangwoong Yoon + +* Simon Gibbons + +* SimonBaron +* Steven Cutting + +* Sudeep + +* Sylvia + +* T N + +* Telt +* Thomas A Caswell +* Tim Swast + +* Tom Augspurger +* Tong SHEN +* Tuan + +* Utkarsh Upadhyay + +* Vincent La + +* Vivek + +* WANG Aiyong +* WBare +* Wes McKinney +* XF + +* Yi Liu + +* Yosuke Nakabayashi + +* aaron315 + +* abarber4gh + +* aernlund + +* agustín méndez + +* andymaheshw + +* ante328 + +* aviolov + +* bpraggastis +* cbertinato + +* cclauss + +* chernrick +* chris-b1 +* dkamm + +* dwkenefick +* economy +* faic + +* fding253 + +* gfyoung +* guygoldberg + +* hhuuggoo + +* huashuai + +* ian +* iulia + +* jaredsnyder +* jbrockmendel + +* jdeschenes +* jebob + +* jschendel + +* keitakurita +* kernc + +* kiwirob + +* kjford +* linebp +* lloydkirk +* louispotok + +* majiang + +* manikbhandari + +* matthiashuschle + +* mattip +* maxwasserman + +* mjlove12 + +* nmartensen + +* pandas-docs-bot + +* parchd-1 + +* philipphanemann + +* rdk1024 + +* reidy-p + +* ri938 +* ruiann + +* rvernica + +* s-weigand + +* scotthavard92 + +* skwbc + +* step4me + +* tobycheese + +* topper-123 + +* tsdlovell +* ysau + +* zzgao + + + +pandas 0.20.0 / 0.20.1 +---------------------- + +**Release date:** May 5, 2017 + + +This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- New ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here ` +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- The ``.ix`` indexer has been deprecated, see :ref:`here ` +- ``Panel`` has been deprecated, see :ref:`here ` +- Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here ` +- Improved user API when grouping by index levels in ``.groupby()``, see :ref:`here ` +- Improved support for ``UInt64`` dtypes, see :ref:`here ` +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec and that gives the possibility for a more interactive repr in the Jupyter Notebook, see :ref:`here ` +- Experimental support for exporting styled DataFrames (``DataFrame.style``) to Excel, see :ref:`here ` +- Window binary corr/cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` +- Support for S3 handling now uses ``s3fs``, see :ref:`here ` +- Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` + +See the :ref:`v0.20.1 Whatsnew ` overview for an extensive list +of all enhancements and bugs that have been fixed in 0.20.1. + + +.. note:: + + This is a combined release for 0.20.0 and 0.20.1. + Version 0.20.1 contains one additional change for backwards-compatibility with downstream projects using pandas' ``utils`` routines. (:issue:`16250`) + +Thanks +~~~~~~ + +- abaldenko +- Adam J. Stewart +- Adrian +- adrian-stepien +- Ajay Saxena +- Akash Tandon +- Albert Villanova del Moral +- Aleksey Bilogur +- alexandercbooth +- Alexis Mignon +- Amol Kahat +- Andreas Winkler +- Andrew Kittredge +- Anthonios Partheniou +- Arco Bast +- Ashish Singal +- atbd +- bastewart +- Baurzhan Muftakhidinov +- Ben Kandel +- Ben Thayer +- Ben Welsh +- Bill Chambers +- bmagnusson +- Brandon M. Burroughs +- Brian +- Brian McFee +- carlosdanielcsantos +- Carlos Souza +- chaimdemulder +- Chris +- chris-b1 +- Chris Ham +- Christopher C. Aycock +- Christoph Gohlke +- Christoph Paulik +- Chris Warth +- Clemens Brunner +- DaanVanHauwermeiren +- Daniel Himmelstein +- Dave Willmer +- David Cook +- David Gwynne +- David Hoffman +- David Krych +- dickreuter +- Diego Fernandez +- Dimitris Spathis +- discort +- Dmitry L +- Dody Suria Wijaya +- Dominik Stanczak +- Dr-Irv +- Dr. Irv +- dr-leo +- D.S. McNeil +- dubourg +- dwkenefick +- Elliott Sales de Andrade +- Ennemoser Christoph +- Francesc Alted +- Fumito Hamamura +- funnycrab +- gfyoung +- Giacomo Ferroni +- goldenbull +- Graham R. Jeffries +- Greg Williams +- Guilherme Beltramini +- Guilherme Samora +- Hao Wu +- Harshit Patni +- hesham.shabana@hotmail.com +- Ilya V. Schurov +- Iván Vallés Pérez +- Jackie Leng +- Jaehoon Hwang +- James Draper +- James Goppert +- James McBride +- James Santucci +- Jan Schulz +- Jeff Carey +- Jeff Reback +- JennaVergeynst +- Jim +- Jim Crist +- Joe Jevnik +- Joel Nothman +- John +- John Tucker +- John W. O'Brien +- John Zwinck +- jojomdt +- Jonathan de Bruin +- Jonathan Whitmore +- Jon Mease +- Jon M. Mease +- Joost Kranendonk +- Joris Van den Bossche +- Joshua Bradt +- Julian Santander +- Julien Marrec +- Jun Kim +- Justin Solinsky +- Kacawi +- Kamal Kamalaldin +- Kerby Shedden +- Kernc +- Keshav Ramaswamy +- Kevin Sheppard +- Kyle Kelley +- Larry Ren +- Leon Yin +- linebp +- Line Pedersen +- Lorenzo Cestaro +- Luca Scarabello +- Lukasz +- Mahmoud Lababidi +- manu +- manuels +- Mark Mandel +- Matthew Brett +- Matthew Roeschke +- mattip +- Matti Picus +- Matt Roeschke +- maxalbert +- Maximilian Roos +- mcocdawc +- Michael Charlton +- Michael Felt +- Michael Lamparski +- Michiel Stock +- Mikolaj Chwalisz +- Min RK +- Miroslav Šedivý +- Mykola Golubyev +- Nate Yoder +- Nathalie Rud +- Nicholas Ver Halen +- Nick Chmura +- Nolan Nichols +- nuffe +- Pankaj Pandey +- paul-mannino +- Pawel Kordek +- pbreach +- Pete Huang +- Peter +- Peter Csizsek +- Petio Petrov +- Phil Ruffwind +- Pietro Battiston +- Piotr Chromiec +- Prasanjit Prakash +- Robert Bradshaw +- Rob Forgione +- Robin +- Rodolfo Fernandez +- Roger Thomas +- Rouz Azari +- Sahil Dua +- sakkemo +- Sam Foo +- Sami Salonen +- Sarah Bird +- Sarma Tangirala +- scls19fr +- Scott Sanderson +- Sebastian Bank +- Sebastian Gsänger +- Sébastien de Menten +- Shawn Heide +- Shyam Saladi +- sinhrks +- Sinhrks +- Stephen Rauch +- stijnvanhoey +- Tara Adiseshan +- themrmax +- the-nose-knows +- Thiago Serafim +- Thoralf Gutierrez +- Thrasibule +- Tobias Gustafsson +- Tom Augspurger +- tomrod +- Tong Shen +- Tong SHEN +- TrigonaMinima +- tzinckgraf +- Uwe +- wandersoncferreira +- watercrossing +- wcwagner +- Wes Turner +- Wiktor Tomczak +- WillAyd +- xgdgsc +- Yaroslav Halchenko +- Yimeng Zhang +- yui-knk + pandas 0.19.2 ------------- @@ -1055,7 +1635,7 @@ performance improvements along with a large number of bug fixes. Highlights include: -- Drop support for numpy < 1.7.0 (:issue:`7711`) +- Drop support for NumPy < 1.7.0 (:issue:`7711`) - The ``Categorical`` type was integrated as a first-class pandas type, see :ref:`here ` - New scalar type ``Timedelta``, and a new index type ``TimedeltaIndex``, see :ref:`here ` - New DataFrame default display for ``df.info()`` to include memory usage, see :ref:`Memory Usage ` @@ -1452,7 +2032,7 @@ Bug Fixes - Bug in Series.xs with a multi-index (:issue:`6018`) - Bug in Series construction of mixed type with datelike and an integer (which should result in object type and not automatic conversion) (:issue:`6028`) -- Possible segfault when chained indexing with an object array under numpy 1.7.1 (:issue:`6026`, :issue:`6056`) +- Possible segfault when chained indexing with an object array under NumPy 1.7.1 (:issue:`6026`, :issue:`6056`) - Bug in setting using fancy indexing a single element with a non-scalar (e.g. a list), (:issue:`6043`) - ``to_sql`` did not respect ``if_exists`` (:issue:`4110` :issue:`4304`) @@ -1463,7 +2043,7 @@ Bug Fixes - Fixed missing arg validation in get_options_data (:issue:`6105`) - Bug in assignment with duplicate columns in a frame where the locations are a slice (e.g. next to each other) (:issue:`6120`) -- Bug in propogating _ref_locs during construction of a DataFrame with dups +- Bug in propagating _ref_locs during construction of a DataFrame with dups index/columns (:issue:`6121`) - Bug in ``DataFrame.apply`` when using mixed datelike reductions (:issue:`6125`) - Bug in ``DataFrame.append`` when appending a row with different columns (:issue:`6129`) @@ -1476,7 +2056,7 @@ Bug Fixes - Bug in ``HDFStore`` on appending a dataframe with multi-indexed columns to an existing table (:issue:`6167`) - Consistency with dtypes in setting an empty DataFrame (:issue:`6171`) -- Bug in selecting on a multi-index ``HDFStore`` even in the presence of under +- Bug in selecting on a multi-index ``HDFStore`` even in the presence of under specified column spec (:issue:`6169`) - Bug in ``nanops.var`` with ``ddof=1`` and 1 elements would sometimes return ``inf`` rather than ``nan`` on some platforms (:issue:`6136`) @@ -1597,7 +2177,7 @@ Improvements to existing features - allow DataFrame constructor to accept more list-like objects, e.g. list of ``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`, :issue:`4297`, :issue:`4851`), thanks @lgautier -- DataFrame constructor now accepts a numpy masked record array +- DataFrame constructor now accepts a NumPy masked record array (:issue:`3478`), thanks @jnothman - ``__getitem__`` with ``tuple`` key (e.g., ``[:, 2]``) on ``Series`` without ``MultiIndex`` raises ``ValueError`` (:issue:`4759`, :issue:`4837`) @@ -1817,8 +2397,8 @@ API Changes support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`) - Arithmetic func factories are now passed real names (suitable for using with super) (:issue:`5240`) -- Provide numpy compatibility with 1.7 for a calling convention like - ``np.prod(pandas_object)`` as numpy call with additional keyword args +- Provide NumPy compatibility with 1.7 for a calling convention like + ``np.prod(pandas_object)`` as NumPy call with additional keyword args (:issue:`4435`) - Provide __dir__ method (and local context) for tab completion / remove ipython completers code (:issue:`4501`) @@ -1857,7 +2437,7 @@ API Changes - The refactoring involving``Series`` deriving from ``NDFrame`` breaks ``rpy2<=2.3.8``. an Issue has been opened against rpy2 and a workaround is detailed in :issue:`5698`. Thanks @JanSchulz. - ``Series.argmin`` and ``Series.argmax`` are now aliased to ``Series.idxmin`` and ``Series.idxmax``. - These return the *index* of the min or max element respectively. Prior to 0.13.0 these would return + These return the *index* of the min or max element respectively. Prior to 0.13.0 these would return the position of the min / max element (:issue:`6214`) Internal Refactoring @@ -1901,7 +2481,7 @@ See :ref:`Internal Refactoring` - Series now inherits from ``NDFrame`` rather than directly from ``ndarray``. There are several minor changes that affect the API. - - numpy functions that do not support the array interface will now return + - NumPy functions that do not support the array interface will now return ``ndarrays`` rather than series, e.g. ``np.diff``, ``np.ones_like``, ``np.where`` - ``Series(0.5)`` would previously return the scalar ``0.5``, this is no @@ -2070,7 +2650,7 @@ Bug Fixes - Fix bug in having a rhs of ``np.timedelta64`` or ``np.offsets.DateOffset`` when operating with datetimes (:issue:`4532`) - Fix arithmetic with series/datetimeindex and ``np.timedelta64`` not working - the same (:issue:`4134`) and buggy timedelta in numpy 1.6 (:issue:`4135`) + the same (:issue:`4134`) and buggy timedelta in NumPy 1.6 (:issue:`4135`) - Fix bug in ``pd.read_clipboard`` on windows with PY3 (:issue:`4561`); not decoding properly - ``tslib.get_period_field()`` and ``tslib.get_period_field_arr()`` now raise @@ -2111,7 +2691,7 @@ Bug Fixes - Bug with reindexing on the index with a non-unique index will now raise ``ValueError`` (:issue:`4746`) - Bug in setting with ``loc/ix`` a single indexer with a multi-index axis and - a numpy array, related to (:issue:`3777`) + a NumPy array, related to (:issue:`3777`) - Bug in concatenation with duplicate columns across dtypes not merging with axis=0 (:issue:`4771`, :issue:`4975`) - Bug in ``iloc`` with a slice index failing (:issue:`4771`) @@ -2314,7 +2894,7 @@ Improvements to existing features ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Fixed various issues with internal pprinting code, the repr() for various objects - including TimeStamp and Index now produces valid python code strings and + including TimeStamp and Index now produces valid Python code strings and can be used to recreate the object, (:issue:`3038`, :issue:`3379`, :issue:`3251`, :issue:`3460`) - ``convert_objects`` now accepts a ``copy`` parameter (defaults to ``True``) - ``HDFStore`` @@ -2338,7 +2918,7 @@ Improvements to existing features - clipboard functions use pyperclip (no dependencies on Windows, alternative dependencies offered for Linux) (:issue:`3837`). - Plotting functions now raise a ``TypeError`` before trying to plot anything - if the associated objects have have a dtype of ``object`` (:issue:`1818`, + if the associated objects have a dtype of ``object`` (:issue:`1818`, :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to numeric arrays if possible so that you can still plot, for example, an object array with floats. This happens before any drawing takes place which @@ -2378,7 +2958,7 @@ API Changes to enable alternate encodings (:issue:`3750`) - enable support for ``iterator/chunksize`` with ``read_hdf`` - The repr() for (Multi)Index now obeys display.max_seq_items rather - then numpy threshold print options. (:issue:`3426`, :issue:`3466`) + then NumPy threshold print options. (:issue:`3426`, :issue:`3466`) - Added mangle_dupe_cols option to read_table/csv, allowing users to control legacy behaviour re dupe cols (A, A.1, A.2 vs A, A ) (:issue:`3468`) Note: The default value will change in 0.12 to the "no mangle" behaviour, @@ -2445,8 +3025,8 @@ API Changes as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) -- New ``StringMixin`` that, given a ``__unicode__`` method, gets python 2 and - python 3 compatible string methods (``__str__``, ``__bytes__``, and +- New ``StringMixin`` that, given a ``__unicode__`` method, gets Python 2 and + Python 3 compatible string methods (``__str__``, ``__bytes__``, and ``__repr__``). Plus string safety throughout. Now employed in many places throughout the pandas library. (:issue:`4090`, :issue:`4092`) @@ -2517,7 +3097,7 @@ Bug Fixes - Fixed bug where a time-series was being selected in preference to an actual column name in a frame (:issue:`3594`) - Make secondary_y work properly for bar plots (:issue:`3598`) -- Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return +- Fix modulo and integer division on Series,DataFrames to act similarly to ``float`` dtypes to return ``np.nan`` or ``np.inf`` as appropriate (:issue:`3590`) - Fix incorrect dtype on groupby with ``as_index=False`` (:issue:`3610`) - Fix ``read_csv/read_excel`` to correctly encode identical na_values, e.g. ``na_values=[-999.0,-999]`` @@ -2559,7 +3139,7 @@ Bug Fixes two integer arrays with at least 10000 cells total (:issue:`3764`) - Indexing with a string with seconds resolution not selecting from a time index (:issue:`3925`) - csv parsers would loop infinitely if ``iterator=True`` but no ``chunksize`` was - specified (:issue:`3967`), python parser failing with ``chunksize=1`` + specified (:issue:`3967`), Python parser failing with ``chunksize=1`` - Fix index name not propagating when using ``shift`` - Fixed dropna=False being ignored with multi-index stack (:issue:`3997`) - Fixed flattening of columns when renaming MultiIndex columns DataFrame (:issue:`4004`) @@ -2721,7 +3301,7 @@ API Changes - all timedelta like objects will be correctly assigned to ``timedelta64`` with mixed ``NaN`` and/or ``NaT`` allowed -- arguments to DataFrame.clip were inconsistent to numpy and Series clipping +- arguments to DataFrame.clip were inconsistent to NumPy and Series clipping (:issue:`2747`) - util.testing.assert_frame_equal now checks the column and index names (:issue:`2964`) - Constructors will now return a more informative ValueError on failures @@ -2780,7 +3360,7 @@ Bug Fixes - Series ops with a Timestamp on the rhs was throwing an exception (:issue:`2898`) added tests for Series ops with datetimes,timedeltas,Timestamps, and datelike Series on both lhs and rhs - - Fixed subtle timedelta64 inference issue on py3 & numpy 1.7.0 (:issue:`3094`) + - Fixed subtle timedelta64 inference issue on py3 & NumPy 1.7.0 (:issue:`3094`) - Fixed some formatting issues on timedelta when negative - Support null checking on timedelta64, representing (and formatting) with NaT - Support setitem with np.nan value, converts to NaT @@ -2820,11 +3400,11 @@ Bug Fixes - Fixed bug in reshape if not passed correct input, now raises TypeError (:issue:`2719`) - Fixed a bug where Series ctor did not respect ordering if OrderedDict passed in (:issue:`3282`) - Fix NameError issue on RESO_US (:issue:`2787`) -- Allow selection in an *unordered* timeseries to work similary +- Allow selection in an *unordered* timeseries to work similarly to an *ordered* timeseries (:issue:`2437`). - Fix implemented ``.xs`` when called with ``axes=1`` and a level parameter (:issue:`2903`) - Timestamp now supports the class method fromordinal similar to datetimes (:issue:`3042`) -- Fix issue with indexing a series with a boolean key and specifiying a 1-len list on the rhs (:issue:`2745`) +- Fix issue with indexing a series with a boolean key and specifying a 1-len list on the rhs (:issue:`2745`) or a list on the rhs (:issue:`3235`) - Fixed bug in groupby apply when kernel generate list of arrays having unequal len (:issue:`1738`) - fixed handling of rolling_corr with center=True which could produce corr>1 (:issue:`3155`) @@ -2975,7 +3555,7 @@ Bug Fixes - Upconvert datetime + datetime64 values when concatenating frames (:issue:`2624`) - Raise a more helpful error message in merge operations when one DataFrame has duplicate columns (:issue:`2649`) -- Fix partial date parsing issue occuring only when code is run at EOM +- Fix partial date parsing issue occurring only when code is run at EOM (:issue:`2618`) - Prevent MemoryError when using counting sort in sortlevel with high-cardinality MultiIndex objects (:issue:`2684`) @@ -3393,7 +3973,7 @@ Bug Fixes - Don't lose tzinfo when passing DatetimeIndex as DataFrame column (:issue:`1682`) - Fix tz conversion with time zones that haven't had any DST transitions since first date in the array (:issue:`1673`) -- Fix field access with UTC->local conversion on unsorted arrays (:issue:`1756`) +- Fix field access with UTC->local conversion on unsorted arrays (:issue:`1756`) - Fix isnull handling of array-like (list) inputs (:issue:`1755`) - Fix regression in handling of Series in Series constructor (:issue:`1671`) - Fix comparison of Int64Index with DatetimeIndex (:issue:`1681`) @@ -3502,7 +4082,7 @@ Bug Fixes columns (:issue:`1943`) - Fix time zone localization bug causing improper fields (e.g. hours) in time zones that have not had a UTC transition in a long time (:issue:`1946`) -- Fix errors when parsing and working with with fixed offset timezones +- Fix errors when parsing and working with fixed offset timezones (:issue:`1922`, :issue:`1928`) - Fix text parser bug when handling UTC datetime objects generated by dateutil (:issue:`1693`) @@ -3803,7 +4383,7 @@ Bug Fixes error (:issue:`1090`) - Consistently set name on groupby pieces (:issue:`184`) - Treat dict return values as Series in GroupBy.apply (:issue:`823`) -- Respect column selection for DataFrame in in GroupBy.transform (:issue:`1365`) +- Respect column selection for DataFrame in GroupBy.transform (:issue:`1365`) - Fix MultiIndex partial indexing bug (:issue:`1352`) - Enable assignment of rows in mixed-type DataFrame via .ix (:issue:`1432`) - Reset index mapping when grouping Series in Cython (:issue:`1423`) @@ -3945,7 +4525,7 @@ Bug Fixes - Fix na-filling handling in mixed-type DataFrame (:issue:`910`) - Fix to DataFrame.set_value with non-existant row/col (:issue:`911`) - Fix malformed block in groupby when excluding nuisance columns (:issue:`916`) -- Fix inconsistant NA handling in dtype=object arrays (:issue:`925`) +- Fix inconsistent NA handling in dtype=object arrays (:issue:`925`) - Fix missing center-of-mass computation in ewmcov (:issue:`862`) - Don't raise exception when opening read-only HDF5 file (:issue:`847`) - Fix possible out-of-bounds memory access in 0-length Series (:issue:`917`) @@ -3994,7 +4574,7 @@ Bug Fixes - Add clearer error message in csv parser (:issue:`835`) - Fix loss of fractional seconds in HDFStore (:issue:`513`) - Fix DataFrame join where columns have datetimes (:issue:`787`) -- Work around numpy performance issue in take (:issue:`817`) +- Work around NumPy performance issue in take (:issue:`817`) - Improve comparison operations for NA-friendliness (:issue:`801`) - Fix indexing operation for floating point values (:issue:`780`, :issue:`798`) - Fix groupby case resulting in malformed dataframe (:issue:`814`) @@ -4460,7 +5040,7 @@ New Features - Add `melt` function to `pandas.core.reshape` - Add `level` parameter to group by level in Series and DataFrame descriptive statistics (:issue:`313`) -- Add `head` and `tail` methods to Series, analogous to to DataFrame (PR +- Add `head` and `tail` methods to Series, analogous to DataFrame (PR :issue:`296`) - Add `Series.isin` function which checks if each value is contained in a passed sequence (:issue:`289`) @@ -4815,9 +5395,9 @@ pandas 0.4.3 **Release date:** 10/9/2011 -is is largely a bugfix release from 0.4.2 but also includes a handful of new -d enhanced features. Also, pandas can now be installed and used on Python 3 -hanks Thomas Kluyver!). +This is largely a bugfix release from 0.4.2 but also includes a handful of new +and enhanced features. Also, pandas can now be installed and used on Python 3 +(thanks Thomas Kluyver!). New Features ~~~~~~~~~~~~ @@ -4880,9 +5460,9 @@ pandas 0.4.2 **Release date:** 10/3/2011 -is is a performance optimization release with several bug fixes. The new -t64Index and new merging / joining Cython code and related Python -frastructure are the main new additions +This is a performance optimization release with several bug fixes. The new +Int64Index and new merging / joining Cython code and related Python +infrastructure are the main new additions New Features ~~~~~~~~~~~~ @@ -4957,7 +5537,7 @@ pandas 0.4.1 **Release date:** 9/25/2011 -is is primarily a bug fix release but includes some new features and +This is primarily a bug fix release but includes some new features and improvements New Features @@ -5242,7 +5822,7 @@ API Changes `offset` argument for everything. So you can still pass a time rule string to `offset` - Added optional `encoding` argument to `read_csv`, `read_table`, `to_csv`, - `from_csv` to handle unicode in python 2.x + `from_csv` to handle unicode in Python 2.x Bug Fixes ~~~~~~~~~ diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst deleted file mode 100644 index 7980133582125..0000000000000 --- a/doc/source/remote_data.rst +++ /dev/null @@ -1,31 +0,0 @@ -.. _remote_data: - -.. currentmodule:: pandas - -****************** -Remote Data Access -****************** - -.. _remote_data.pandas_datareader: - -DataReader ----------- - -The sub-package ``pandas.io.data`` is removed in favor of a separately -installable `pandas-datareader package -`_. This will allow the data -modules to be independently updated to your pandas installation. The API for -``pandas-datareader v0.1.1`` is the same as in ``pandas v0.16.1``. -(:issue:`8961`) - - You should replace the imports of the following: - - .. code-block:: python - - from pandas.io import data, wb - - With: - - .. code-block:: python - - from pandas_datareader import data, wb diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index eccaa9474bf6d..71ddaa13fdd8a 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -41,7 +41,7 @@ Data is often stored in CSV files or databases in so-called "stacked" or df -For the curious here is how the above DataFrame was created: +For the curious here is how the above ``DataFrame`` was created: .. code-block:: python @@ -63,15 +63,16 @@ To select out everything for variable ``A`` we could do: But suppose we wish to do time series operations with the variables. A better representation would be where the ``columns`` are the unique variables and an ``index`` of dates identifies individual observations. To reshape the data into -this form, use the ``pivot`` function: +this form, we use the :meth:`DataFrame.pivot` method (also implemented as a +top level function :func:`~pandas.pivot`): .. ipython:: python df.pivot(index='date', columns='variable', values='value') -If the ``values`` argument is omitted, and the input DataFrame has more than +If the ``values`` argument is omitted, and the input ``DataFrame`` has more than one column of values which are not used as column or index inputs to ``pivot``, -then the resulting "pivoted" DataFrame will have :ref:`hierarchical columns +then the resulting "pivoted" ``DataFrame`` will have :ref:`hierarchical columns ` whose topmost level indicates the respective value column: @@ -81,7 +82,7 @@ column: pivoted = df.pivot('date', 'variable') pivoted -You of course can then select subsets from the pivoted DataFrame: +You can then select subsets from the pivoted ``DataFrame``: .. ipython:: python @@ -95,18 +96,18 @@ are homogeneously-typed. Reshaping by stacking and unstacking ------------------------------------ -Closely related to the ``pivot`` function are the related ``stack`` and -``unstack`` functions currently available on Series and DataFrame. These -functions are designed to work together with ``MultiIndex`` objects (see the -section on :ref:`hierarchical indexing `). Here are -essentially what these functions do: +Closely related to the :meth:`~DataFrame.pivot` method are the related +:meth:`~DataFrame.stack` and :meth:`~DataFrame.unstack` methods available on +``Series`` and ``DataFrame``. These methods are designed to work together with +``MultiIndex`` objects (see the section on :ref:`hierarchical indexing +`). Here are essentially what these methods do: - ``stack``: "pivot" a level of the (possibly hierarchical) column labels, - returning a DataFrame with an index with a new inner-most level of row + returning a ``DataFrame`` with an index with a new inner-most level of row labels. - - ``unstack``: inverse operation from ``stack``: "pivot" a level of the + - ``unstack``: (inverse operation of ``stack``) "pivot" a level of the (possibly hierarchical) row index to the column axis, producing a reshaped - DataFrame with a new inner-most level of column labels. + ``DataFrame`` with a new inner-most level of column labels. The clearest way to explain is by example. Let's take a prior example data set from the hierarchical indexing section: @@ -122,11 +123,11 @@ from the hierarchical indexing section: df2 = df[:4] df2 -The ``stack`` function "compresses" a level in the DataFrame's columns to +The ``stack`` function "compresses" a level in the ``DataFrame``'s columns to produce either: - - A Series, in the case of a simple column Index - - A DataFrame, in the case of a ``MultiIndex`` in the columns + - A ``Series``, in the case of a simple column Index. + - A ``DataFrame``, in the case of a ``MultiIndex`` in the columns. If the columns have a ``MultiIndex``, you can choose which level to stack. The stacked level becomes the new lowest level in a ``MultiIndex`` on the columns: @@ -136,7 +137,7 @@ stacked level becomes the new lowest level in a ``MultiIndex`` on the columns: stacked = df2.stack() stacked -With a "stacked" DataFrame or Series (having a ``MultiIndex`` as the +With a "stacked" ``DataFrame`` or ``Series`` (having a ``MultiIndex`` as the ``index``), the inverse operation of ``stack`` is ``unstack``, which by default unstacks the **last level**: @@ -156,8 +157,8 @@ the level numbers: stacked.unstack('second') Notice that the ``stack`` and ``unstack`` methods implicitly sort the index -levels involved. Hence a call to ``stack`` and then ``unstack``, or viceversa, -will result in a **sorted** copy of the original DataFrame or Series: +levels involved. Hence a call to ``stack`` and then ``unstack``, or vice versa, +will result in a **sorted** copy of the original ``DataFrame`` or ``Series``: .. ipython:: python @@ -166,7 +167,7 @@ will result in a **sorted** copy of the original DataFrame or Series: df all(df.unstack().stack() == df.sort_index()) -while the above code will raise a ``TypeError`` if the call to ``sort_index`` is +The above code will raise a ``TypeError`` if the call to ``sort_index`` is removed. .. _reshaping.stack_multiple: @@ -240,7 +241,7 @@ values will be set to ``NaN``. df3 df3.unstack() -.. versionadded: 0.18.0 +.. versionadded:: 0.18.0 Alternatively, unstack takes an optional ``fill_value`` argument, for specifying the value of missing data. @@ -265,12 +266,12 @@ the right thing: Reshaping by Melt ----------------- -The :func:`~pandas.melt` function is useful to massage a -DataFrame into a format where one or more columns are identifier variables, -while all other columns, considered measured variables, are "unpivoted" to the -row axis, leaving just two non-identifier columns, "variable" and "value". The -names of those columns can be customized by supplying the ``var_name`` and -``value_name`` parameters. +The top-level :func:`~pandas.melt` function and the corresponding :meth:`DataFrame.melt` +are useful to massage a ``DataFrame`` into a format where one or more columns +are *identifier variables*, while all other columns, considered *measured +variables*, are "unpivoted" to the row axis, leaving just two non-identifier +columns, "variable" and "value". The names of those columns can be customized +by supplying the ``var_name`` and ``value_name`` parameters. For instance, @@ -281,10 +282,12 @@ For instance, 'height' : [5.5, 6.0], 'weight' : [130, 150]}) cheese - pd.melt(cheese, id_vars=['first', 'last']) - pd.melt(cheese, id_vars=['first', 'last'], var_name='quantity') + cheese.melt(id_vars=['first', 'last']) + cheese.melt(id_vars=['first', 'last'], var_name='quantity') -Another way to transform is to use the ``wide_to_long`` panel data convenience function. +Another way to transform is to use the :func:`~pandas.wide_to_long` panel data +convenience function. It is less flexible than :func:`~pandas.melt`, but more +user-friendly. .. ipython:: python @@ -323,22 +326,25 @@ Pivot tables .. _reshaping.pivot: -While ``pivot`` provides general purpose pivoting of DataFrames with various -data types (strings, numerics, etc.), Pandas also provides the ``pivot_table`` -function for pivoting with aggregation of numeric data. -The function ``pandas.pivot_table`` can be used to create spreadsheet-style pivot -tables. See the :ref:`cookbook` for some advanced strategies -It takes a number of arguments +While :meth:`~DataFrame.pivot` provides general purpose pivoting with various +data types (strings, numerics, etc.), pandas also provides :func:`~pandas.pivot_table` +for pivoting with aggregation of numeric data. + +The function :func:`~pandas.pivot_table` can be used to create spreadsheet-style +pivot tables. See the :ref:`cookbook` for some advanced +strategies. + +It takes a number of arguments: -- ``data``: A DataFrame object -- ``values``: a column or a list of columns to aggregate +- ``data``: a DataFrame object. +- ``values``: a column or a list of columns to aggregate. - ``index``: a column, Grouper, array which has the same length as data, or list of them. Keys to group by on the pivot table index. If an array is passed, it is being used as the same manner as column values. - ``columns``: a column, Grouper, array which has the same length as data, or list of them. Keys to group by on the pivot table column. If an array is passed, it is being used as the same manner as column values. -- ``aggfunc``: function to use for aggregation, defaulting to ``numpy.mean`` +- ``aggfunc``: function to use for aggregation, defaulting to ``numpy.mean``. Consider a data set like this: @@ -362,7 +368,7 @@ We can produce pivot tables from this data very easily: pd.pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum) pd.pivot_table(df, values=['D','E'], index=['B'], columns=['A', 'C'], aggfunc=np.sum) -The result object is a DataFrame having potentially hierarchical indexes on the +The result object is a ``DataFrame`` having potentially hierarchical indexes on the rows and columns. If the ``values`` column name is not given, the pivot table will include all of the data that can be aggregated in an additional level of hierarchy in the columns: @@ -385,7 +391,8 @@ calling ``to_string`` if you wish: table = pd.pivot_table(df, index=['A', 'B'], columns=['C']) print(table.to_string(na_rep='')) -Note that ``pivot_table`` is also available as an instance method on DataFrame. +Note that ``pivot_table`` is also available as an instance method on DataFrame, + i.e. :meth:`DataFrame.pivot_table`. .. _reshaping.pivot.margins: @@ -405,27 +412,27 @@ rows and columns: Cross tabulations ----------------- -Use the ``crosstab`` function to compute a cross-tabulation of two (or more) +Use :func:`~pandas.crosstab` to compute a cross-tabulation of two (or more) factors. By default ``crosstab`` computes a frequency table of the factors unless an array of values and an aggregation function are passed. It takes a number of arguments -- ``index``: array-like, values to group by in the rows -- ``columns``: array-like, values to group by in the columns +- ``index``: array-like, values to group by in the rows. +- ``columns``: array-like, values to group by in the columns. - ``values``: array-like, optional, array of values to aggregate according to - the factors + the factors. - ``aggfunc``: function, optional, If no values array is passed, computes a - frequency table -- ``rownames``: sequence, default ``None``, must match number of row arrays passed + frequency table. +- ``rownames``: sequence, default ``None``, must match number of row arrays passed. - ``colnames``: sequence, default ``None``, if passed, must match number of column - arrays passed + arrays passed. - ``margins``: boolean, default ``False``, Add row/column margins (subtotals) - ``normalize``: boolean, {'all', 'index', 'columns'}, or {0,1}, default ``False``. Normalize by dividing all values by the sum of values. -Any Series passed will have their name attributes used unless row or column +Any ``Series`` passed will have their name attributes used unless row or column names for the cross-tabulation are specified For example: @@ -477,9 +484,9 @@ using the ``normalize`` argument: pd.crosstab(df.A, df.B, normalize='columns') -``crosstab`` can also be passed a third Series and an aggregation function -(``aggfunc``) that will be applied to the values of the third Series within each -group defined by the first two Series: +``crosstab`` can also be passed a third ``Series`` and an aggregation function +(``aggfunc``) that will be applied to the values of the third ``Series`` within +each group defined by the first two ``Series``: .. ipython:: python @@ -501,9 +508,9 @@ Finally, one can also add margins or normalize this output. Tiling ------ -The ``cut`` function computes groupings for the values of the input array and -is often used to transform continuous variables to discrete or categorical -variables: +The :func:`~pandas.cut` function computes groupings for the values of the input +array and is often used to transform continuous variables to discrete or +categorical variables: .. ipython:: python @@ -516,7 +523,15 @@ Alternatively we can specify custom bin-edges: .. ipython:: python - pd.cut(ages, bins=[0, 18, 35, 70]) + c = pd.cut(ages, bins=[0, 18, 35, 70]) + c + +.. versionadded:: 0.20.0 + +If the ``bins`` keyword is an ``IntervalIndex``, then these will be +used to bin the passed data.:: + + pd.cut([25, 20, 50], bins=c.categories) .. _reshaping.dummies: @@ -524,9 +539,10 @@ Alternatively we can specify custom bin-edges: Computing indicator / dummy variables ------------------------------------- -To convert a categorical variable into a "dummy" or "indicator" DataFrame, for example -a column in a DataFrame (a Series) which has ``k`` distinct values, can derive a DataFrame -containing ``k`` columns of 1s and 0s: +To convert a categorical variable into a "dummy" or "indicator" ``DataFrame``, +for example a column in a ``DataFrame`` (a ``Series``) which has ``k`` distinct +values, can derive a ``DataFrame`` containing ``k`` columns of 1s and 0s using +:func:`~pandas.get_dummies`: .. ipython:: python @@ -535,7 +551,7 @@ containing ``k`` columns of 1s and 0s: pd.get_dummies(df['key']) Sometimes it's useful to prefix the column names, for example when merging the result -with the original DataFrame: +with the original ``DataFrame``: .. ipython:: python @@ -560,11 +576,9 @@ This function is often used along with discretization functions like ``cut``: See also :func:`Series.str.get_dummies `. -.. versionadded:: 0.15.0 - -:func:`get_dummies` also accepts a DataFrame. By default all categorical -variables (categorical in the statistical sense, -those with `object` or `categorical` dtype) are encoded as dummy variables. +:func:`get_dummies` also accepts a ``DataFrame``. By default all categorical +variables (categorical in the statistical sense, those with `object` or +`categorical` dtype) are encoded as dummy variables. .. ipython:: python @@ -573,9 +587,8 @@ those with `object` or `categorical` dtype) are encoded as dummy variables. 'C': [1, 2, 3]}) pd.get_dummies(df) -All non-object columns are included untouched in the output. - -You can control the columns that are encoded with the ``columns`` keyword. +All non-object columns are included untouched in the output. You can control +the columns that are encoded with the ``columns`` keyword. .. ipython:: python @@ -585,14 +598,14 @@ Notice that the ``B`` column is still included in the output, it just hasn't been encoded. You can drop ``B`` before calling ``get_dummies`` if you don't want to include it in the output. -As with the Series version, you can pass values for the ``prefix`` and +As with the ``Series`` version, you can pass values for the ``prefix`` and ``prefix_sep``. By default the column name is used as the prefix, and '_' as -the prefix separator. You can specify ``prefix`` and ``prefix_sep`` in 3 ways +the prefix separator. You can specify ``prefix`` and ``prefix_sep`` in 3 ways: - string: Use the same value for ``prefix`` or ``prefix_sep`` for each column - to be encoded + to be encoded. - list: Must be the same length as the number of columns being encoded. -- dict: Mapping column name to prefix +- dict: Mapping column name to prefix. .. ipython:: python @@ -627,12 +640,24 @@ When a column contains only one level, it will be omitted in the result. pd.get_dummies(df, drop_first=True) +By default new columns will have ``np.uint8`` dtype. +To choose another dtype, use the``dtype`` argument: + +.. ipython:: python + + df = pd.DataFrame({'A': list('abc'), 'B': [1.1, 2.2, 3.3]}) + + pd.get_dummies(df, dtype=bool).dtypes + +.. versionadded:: 0.23.0 + +.. _reshaping.factorize: Factorizing values ------------------ -To encode 1-d values as an enumerated type use ``factorize``: +To encode 1-d values as an enumerated type use :func:`~pandas.factorize`: .. ipython:: python @@ -648,7 +673,7 @@ handling of NaN: .. note:: The following ``numpy.unique`` will fail under Python 3 with a ``TypeError`` because of an ordering bug. See also - `Here `__ + `here `__. .. code-block:: ipython @@ -666,4 +691,4 @@ handling of NaN: you can use ``df["cat_col"] = pd.Categorical(df["col"])`` or ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`, see the :ref:`Categorical introduction ` and the - :ref:`API documentation `. This feature was introduced in version 0.15. + :ref:`API documentation `. diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index 2bc5d3f6dd0f5..260d8aa32ef52 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -17,11 +17,11 @@ Sparse data structures .. note:: The ``SparsePanel`` class has been removed in 0.19.0 -We have implemented "sparse" versions of Series and DataFrame. These are not sparse +We have implemented "sparse" versions of ``Series`` and ``DataFrame``. These are not sparse in the typical "mostly 0". Rather, you can view these objects as being "compressed" where any data matching a specific value (``NaN`` / missing value, though any value can be chosen) is omitted. A special ``SparseIndex`` object tracks where data has been -"sparsified". This will make much more sense in an example. All of the standard pandas +"sparsified". This will make much more sense with an example. All of the standard pandas data structures have a ``to_sparse`` method: .. ipython:: python @@ -32,7 +32,7 @@ data structures have a ``to_sparse`` method: sts The ``to_sparse`` method takes a ``kind`` argument (for the sparse index, see -below) and a ``fill_value``. So if we had a mostly zero Series, we could +below) and a ``fill_value``. So if we had a mostly zero ``Series``, we could convert it to sparse with ``fill_value=0``: .. ipython:: python @@ -40,7 +40,7 @@ convert it to sparse with ``fill_value=0``: ts.fillna(0).to_sparse(fill_value=0) The sparse objects exist for memory efficiency reasons. Suppose you had a -large, mostly NA DataFrame: +large, mostly NA ``DataFrame``: .. ipython:: python @@ -85,15 +85,6 @@ can be converted back to a regular ndarray by calling ``to_dense``: sparr.to_dense() -.. _sparse.list: - -SparseList ----------- - -The ``SparseList`` class has been deprecated and will be removed in a future version. -See the `docs of a previous version `__ -for documentation on ``SparseList``. - SparseIndex objects ------------------- @@ -132,7 +123,7 @@ dtype, ``fill_value`` default changes: s.to_sparse() You can change the dtype using ``.astype()``, the result is also sparse. Note that -``.astype()`` also affects to the ``fill_value`` to keep its dense represantation. +``.astype()`` also affects to the ``fill_value`` to keep its dense representation. .. ipython:: python @@ -186,7 +177,35 @@ the correct dense result. Interaction with scipy.sparse ----------------------------- -Experimental api to transform between sparse pandas and scipy.sparse structures. +SparseDataFrame +~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +Pandas supports creating sparse dataframes directly from ``scipy.sparse`` matrices. + +.. ipython:: python + + from scipy.sparse import csr_matrix + + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + + sp_arr = csr_matrix(arr) + sp_arr + + sdf = pd.SparseDataFrame(sp_arr) + sdf + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. +To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you can use the :meth:`SparseDataFrame.to_coo` method: + +.. ipython:: python + + sdf.to_coo() + +SparseSeries +~~~~~~~~~~~~ A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``. diff --git a/doc/source/html-styling.ipynb b/doc/source/style.ipynb similarity index 69% rename from doc/source/html-styling.ipynb rename to doc/source/style.ipynb index 1a97378fd30b1..152ca90049bf1 100644 --- a/doc/source/html-styling.ipynb +++ b/doc/source/style.ipynb @@ -2,45 +2,38 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "collapsed": true + }, "source": [ + "# Styling\n", + "\n", "*New in version 0.17.1*\n", "\n", - "

*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your [feedback](https://github.com/pandas-dev/pandas/issues).*

\n", + "*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*\n", "\n", - "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/html-styling.ipynb).\n", + "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/style.ipynb).\n", "\n", "You can apply **conditional formatting**, the visual styling of a DataFrame\n", "depending on the data within, by using the ``DataFrame.style`` property.\n", - "This is a property that returns a ``pandas.Styler`` object, which has\n", + "This is a property that returns a ``Styler`` object, which has\n", "useful methods for formatting and displaying DataFrames.\n", "\n", "The styling is accomplished using CSS.\n", "You write \"style functions\" that take scalars, `DataFrame`s or `Series`, and return *like-indexed* DataFrames or Series with CSS `\"attribute: value\"` pairs for the values.\n", - "These functions can be incrementally passed to the `Styler` which collects the styles before rendering.\n", - "\n", - "### Contents\n", - "\n", - "- [Building Styles](#Building-Styles)\n", - "- [Finer Control: Slicing](#Finer-Control:-Slicing)\n", - "- [Builtin Styles](#Builtin-Styles)\n", - "- [Other options](#Other-options)\n", - "- [Sharing Styles](#Sharing-Styles)\n", - "- [Limitations](#Limitations)\n", - "- [Terms](#Terms)\n", - "- [Extensibility](#Extensibility)" + "These functions can be incrementally passed to the `Styler` which collects the styles before rendering." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Building Styles\n", + "## Building Styles\n", "\n", "Pass your style functions into one of the following methods:\n", "\n", - "- `Styler.applymap`: elementwise\n", - "- `Styler.apply`: column-/row-/table-wise\n", + "- ``Styler.applymap``: elementwise\n", + "- ``Styler.apply``: column-/row-/table-wise\n", "\n", "Both of those methods take a function (and some other keyword arguments) and applies your function to the DataFrame in a certain way.\n", "`Styler.applymap` works through the DataFrame elementwise.\n", @@ -58,7 +51,21 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true, + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot\n", + "# We have this here to trigger matplotlib's font cache stuff.\n", + "# This cell is hidden from the output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true }, "outputs": [], "source": [ @@ -82,9 +89,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "df.style" @@ -94,7 +99,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "*Note*: The `DataFrame.style` attribute is a propetry that returns a `Styler` object. `Styler` has a `_repr_html_` method defined on it so they are rendered automatically. If you want the actual HTML back for further processing or for writing to file call the `.render()` method which returns a string.\n", + "*Note*: The `DataFrame.style` attribute is a property that returns a `Styler` object. `Styler` has a `_repr_html_` method defined on it so they are rendered automatically. If you want the actual HTML back for further processing or for writing to file call the `.render()` method which returns a string.\n", "\n", "The above output looks very similar to the standard DataFrame HTML representation. But we've done some work behind the scenes to attach CSS classes to each cell. We can view these by calling the `.render` method." ] @@ -102,9 +107,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "df.style.highlight_null().render().split('\\n')[:10]" @@ -155,9 +158,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ "s = df.style.applymap(color_negative_red)\n", @@ -168,7 +169,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to resuse your existing knowledge of how to interact with DataFrames.\n", + "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to reuse your existing knowledge of how to interact with DataFrames.\n", "\n", "Notice also that our function returned a string containing the CSS attribute and value, separated by a colon just like in a `'.format(css))" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python [default]", "language": "python", "name": "python3" }, @@ -961,9 +1227,16 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1" + "version": "3.5.3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 1, + "version_minor": 0 + } } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/doc/source/style.rst b/doc/source/style.rst deleted file mode 100644 index 506b38bf06e65..0000000000000 --- a/doc/source/style.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _style: - -.. currentmodule:: pandas - -***** -Style -***** - -.. raw:: html - :file: html-styling.html diff --git a/doc/source/template_structure.html b/doc/source/template_structure.html new file mode 100644 index 0000000000000..0778d8e2e6f18 --- /dev/null +++ b/doc/source/template_structure.html @@ -0,0 +1,57 @@ + + + +

before_style
+
style +
<style type="text/css">
+
table_styles
+
before_cellstyle
+
cellstyle
+
</style>
+
+ +
before_table
+ +
table +
<table ...>
+
caption
+ +
thead +
before_head_rows
+
head_tr (loop over headers)
+
after_head_rows
+
+ +
tbody +
before_rows
+
tr (loop over data rows)
+
after_rows
+
+
</table>
+
+ +
after_table
diff --git a/doc/source/text.rst b/doc/source/text.rst index 52e05c5d511bc..da8e40892716e 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -99,7 +99,7 @@ Elements in the split lists can be accessed using ``get`` or ``[]`` notation: s2.str.split('_').str.get(1) s2.str.split('_').str[1] -Easy to expand this to return a DataFrame using ``expand``. +It is easy to expand this to return a DataFrame using ``expand``. .. ipython:: python @@ -118,8 +118,8 @@ i.e., from the end of the string to the beginning of the string: s2.str.rsplit('_', expand=True, n=1) -Methods like ``replace`` and ``findall`` take `regular expressions -`__, too: +``replace`` by default replaces `regular expressions +`__: .. ipython:: python @@ -146,12 +146,25 @@ following code will cause trouble because of the regular expression meaning of # We need to escape the special character (for >1 len patterns) dollars.str.replace(r'-\$', '-') -The ``replace`` method can also take a callable as replacement. It is called -on every ``pat`` using :func:`re.sub`. The callable should expect one -positional argument (a regex object) and return a string. +.. versionadded:: 0.23.0 + +If you do want literal replacement of a string (equivalent to +:meth:`str.replace`), you can set the optional ``regex`` parameter to +``False``, rather than escaping each character. In this case both ``pat`` +and ``repl`` must be strings: + +.. ipython:: python + + # These lines are equivalent + dollars.str.replace(r'-\$', '-') + dollars.str.replace('-$', '-', regex=False) .. versionadded:: 0.20.0 +The ``replace`` method can also take a callable as replacement. It is called +on every ``pat`` using :func:`re.sub`. The callable should expect one +positional argument (a regex object) and return a string. + .. ipython:: python # Reverse every lowercase alphabetic word @@ -164,6 +177,28 @@ positional argument (a regex object) and return a string. repl = lambda m: m.group('two').swapcase() pd.Series(['Foo Bar Baz', np.nan]).str.replace(pat, repl) +.. versionadded:: 0.20.0 + +The ``replace`` method also accepts a compiled regular expression object +from :func:`re.compile` as a pattern. All flags should be included in the +compiled regular expression object. + +.. ipython:: python + + import re + regex_pat = re.compile(r'^.a|dog', flags=re.IGNORECASE) + s3.str.replace(regex_pat, 'XX-XX ') + +Including a ``flags`` argument when calling ``replace`` with a compiled +regular expression object will raise a ``ValueError``. + +.. ipython:: + + @verbatim + In [1]: s3.str.replace(regex_pat, 'XX-XX ', flags=re.IGNORECASE) + --------------------------------------------------------------------------- + ValueError: case and flags cannot be set when pat is a compiled regex + Indexing with ``.str`` ---------------------- @@ -190,8 +225,6 @@ Extracting Substrings Extract first match in each subject (extract) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. versionadded:: 0.13.0 - .. warning:: In version 0.18.0, ``extract`` gained the ``expand`` argument. When @@ -199,10 +232,11 @@ Extract first match in each subject (extract) ``DataFrame``, depending on the subject and regular expression pattern (same behavior as pre-0.18.0). When ``expand=True`` it always returns a ``DataFrame``, which is more consistent and less - confusing from the perspective of a user. + confusing from the perspective of a user. ``expand=True`` is the + default since version 0.23.0. The ``extract`` method accepts a `regular expression -`__ with at least one +`__ with at least one capture group. Extracting a regular expression with more than one group returns a @@ -249,7 +283,7 @@ It returns a Series if ``expand=False``. pd.Series(['a1', 'b2', 'c3']).str.extract('[ab](\d)', expand=False) Calling on an ``Index`` with a regex with exactly one capture group -returns a ``DataFrame`` with one column if ``expand=True``, +returns a ``DataFrame`` with one column if ``expand=True``. .. ipython:: python @@ -351,33 +385,20 @@ You can check whether elements contain a pattern: .. ipython:: python - pattern = r'[a-z][0-9]' + pattern = r'[0-9][a-z]' pd.Series(['1', '2', '3a', '3b', '03c']).str.contains(pattern) -or match a pattern: - +Or whether elements match a pattern: .. ipython:: python - pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern, as_indexer=True) + pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern) The distinction between ``match`` and ``contains`` is strictness: ``match`` relies on strict ``re.match``, while ``contains`` relies on ``re.search``. -.. warning:: - - In previous versions, ``match`` was for *extracting* groups, - returning a not-so-convenient Series of tuples. The new method ``extract`` - (described in the previous section) is now preferred. - - This old, deprecated behavior of ``match`` is still the default. As - demonstrated above, use the new behavior by setting ``as_indexer=True``. - In this mode, ``match`` is analogous to ``contains``, returning a boolean - Series. The new behavior will become the default behavior in a future - release. - Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take - an extra ``na`` argument so missing values can be considered True or False: +an extra ``na`` argument so missing values can be considered True or False: .. ipython:: python @@ -425,7 +446,7 @@ Method Summary :meth:`~Series.str.join`;Join strings in each element of the Series with passed separator :meth:`~Series.str.get_dummies`;Split strings on the delimiter returning DataFrame of dummy variables :meth:`~Series.str.contains`;Return boolean array if each string contains pattern/regex - :meth:`~Series.str.replace`;Replace occurrences of pattern/regex with some other string or the return value of a callable given the occurrence + :meth:`~Series.str.replace`;Replace occurrences of pattern/regex/string with some other string or the return value of a callable given the occurrence :meth:`~Series.str.repeat`;Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``) :meth:`~Series.str.pad`;"Add whitespace to left, right, or both sides of strings" :meth:`~Series.str.center`;Equivalent to ``str.center`` diff --git a/doc/source/themes/nature_with_gtoc/layout.html b/doc/source/themes/nature_with_gtoc/layout.html index ddf1e861f5f81..a2106605c5562 100644 --- a/doc/source/themes/nature_with_gtoc/layout.html +++ b/doc/source/themes/nature_with_gtoc/layout.html @@ -94,4 +94,15 @@

{{ _('Search') }}

}); }); + {% endblock %} \ No newline at end of file diff --git a/doc/source/themes/nature_with_gtoc/static/nature.css_t b/doc/source/themes/nature_with_gtoc/static/nature.css_t index 2948f0d68b402..b61068ee28bef 100644 --- a/doc/source/themes/nature_with_gtoc/static/nature.css_t +++ b/doc/source/themes/nature_with_gtoc/static/nature.css_t @@ -299,20 +299,45 @@ td.field-body blockquote { padding-left: 30px; } -.rendered_html table { +// Adapted from the new Jupyter notebook style +// https://github.com/jupyter/notebook/blob/c8841b68c4c0739bbee1291e0214771f24194079/notebook/static/notebook/less/renderedhtml.less#L59 +table { margin-left: auto; margin-right: auto; - border-right: 1px solid #cbcbcb; - border-bottom: 1px solid #cbcbcb; + border: none; + border-collapse: collapse; + border-spacing: 0; + color: @rendered_html_border_color; + table-layout: fixed; +} +thead { + border-bottom: 1px solid @rendered_html_border_color; + vertical-align: bottom; +} +tr, th, td { + vertical-align: middle; + padding: 0.5em 0.5em; + line-height: normal; + white-space: normal; + max-width: none; + border: none; +} +th { + font-weight: bold; +} +th.col_heading { + text-align: right; +} +tbody tr:nth-child(odd) { + background: #f5f5f5; } -.rendered_html td, th { - border-left: 1px solid #cbcbcb; - border-top: 1px solid #cbcbcb; - margin: 0; - padding: 0.5em .75em; +table td.data, table th.row_heading table th.col_heading { + font-family: monospace; + text-align: right; } + /** * See also */ diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 07effcfdff33b..5f3a01f0725d4 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -23,13 +23,12 @@ Time Deltas *********** -.. note:: - - Starting in v0.15.0, we introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner, - but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes. +Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, +seconds. They can be both positive and negative. -Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, seconds. -They can be both positive and negative. +``Timedelta`` is a subclass of ``datetime.timedelta``, and behaves in a similar manner, +but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, +parsing, and attributes. Parsing ------- @@ -63,6 +62,14 @@ You can construct a ``Timedelta`` scalar through various arguments: pd.Timedelta('nan') pd.Timedelta('nat') + # ISO 8601 Duration strings + pd.Timedelta('P0DT0H1M0S') + pd.Timedelta('P0DT0H0M0.000000123S') + +.. versionadded:: 0.23.0 + + Added constructor for `ISO 8601 Duration`_ strings + :ref:`DateOffsets` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction. .. ipython:: python @@ -78,15 +85,10 @@ Further, operations among the scalars yield another scalar ``Timedelta``. to_timedelta ~~~~~~~~~~~~ -.. warning:: - - Prior to 0.15.0 ``pd.to_timedelta`` would return a ``Series`` for list-like/Series input, and a ``np.timedelta64`` for scalar input. - It will now return a ``TimedeltaIndex`` for list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. - - The arguments to ``pd.to_timedelta`` are now ``(arg, unit='ns', box=True)``, previously were ``(arg, box=True, unit='ns')`` as these are more logical. - -Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, or Series from a recognized timedelta format / value into a ``Timedelta`` type. -It will construct Series if the input is a Series, a scalar if the input is scalar-like, otherwise will output a ``TimedeltaIndex``. +Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, +or Series from a recognized timedelta format / value into a ``Timedelta`` type. +It will construct Series if the input is a Series, a scalar if the input is +scalar-like, otherwise it will output a ``TimedeltaIndex``. You can parse a single string to a Timedelta: @@ -242,11 +244,9 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob Frequency Conversion -------------------- -.. versionadded:: 0.13 - Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``. -Note that division by the numpy scalar is true division, while astyping is equivalent of floor division. +Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division. .. ipython:: python @@ -275,6 +275,28 @@ yields another ``timedelta64[ns]`` dtypes Series. td * -1 td * pd.Series([1, 2, 3, 4]) +Rounded division (floor-division) of a ``timedelta64[ns]`` Series by a scalar +``Timedelta`` gives a series of integers. + +.. ipython:: python + + td // pd.Timedelta(days=3, hours=4) + pd.Timedelta(days=3, hours=4) // td + +.. _timedeltas.mod_divmod: + +The mod (%) and divmod operations are defined for ``Timedelta`` when operating with another timedelta-like or with a numeric argument. + +.. ipython:: python + + pd.Timedelta(hours=37) % datetime.timedelta(hours=2) + + # divmod against a timedelta-like returns a pair (int, Timedelta) + divmod(datetime.timedelta(hours=2), pd.Timedelta(minutes=11)) + + # divmod against a numeric returns a pair (Timedelta, Timedelta) + divmod(pd.Timedelta(hours=25), 86400000000000) + Attributes ---------- @@ -330,8 +352,6 @@ You can convert a ``Timedelta`` to an `ISO 8601 Duration`_ string with the TimedeltaIndex -------------- -.. versionadded:: 0.15.0 - To generate an index with time delta, you can use either the ``TimedeltaIndex`` or the ``timedelta_range`` constructor. diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index e09d240ed91b7..466c48b780861 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -60,7 +60,7 @@ Change frequency and fill gaps: converted = ts.asfreq('45Min', method='pad') converted.head() -Resample: +Resample the series to a daily frequency: .. ipython:: python @@ -73,24 +73,24 @@ Resample: Overview -------- -Following table shows the type of time-related classes pandas can handle and +The following table shows the type of time-related classes pandas can handle and how to create them. -================= =============================== ================================================== +================= =============================== =================================================================== Class Remarks How to create -================= =============================== ================================================== -``Timestamp`` Represents a single time stamp ``to_datetime``, ``Timestamp`` -``DatetimeIndex`` Index of ``Timestamp`` ``to_datetime``, ``date_range``, ``DatetimeIndex`` +================= =============================== =================================================================== +``Timestamp`` Represents a single timestamp ``to_datetime``, ``Timestamp`` +``DatetimeIndex`` Index of ``Timestamp`` ``to_datetime``, ``date_range``, ``bdate_range``, ``DatetimeIndex`` ``Period`` Represents a single time span ``Period`` ``PeriodIndex`` Index of ``Period`` ``period_range``, ``PeriodIndex`` -================= =============================== ================================================== +================= =============================== =================================================================== .. _timeseries.representation: -Time Stamps vs. Time Spans --------------------------- +Timestamps vs. Time Spans +------------------------- -Time-stamped data is the most basic type of timeseries data that associates +Timestamped data is the most basic type of time series data that associates values with points in time. For pandas objects it means using the points in time. @@ -112,9 +112,9 @@ For example: pd.Period('2012-05', freq='D') -``Timestamp`` and ``Period`` can be the index. Lists of ``Timestamp`` and -``Period`` are automatically coerce to ``DatetimeIndex`` and ``PeriodIndex`` -respectively. +:class:`Timestamp` and :class:`Period` can serve as an index. Lists of +``Timestamp`` and ``Period`` are automatically coerced to :class:`DatetimeIndex` +and :class:`PeriodIndex` respectively. .. ipython:: python @@ -149,10 +149,10 @@ future releases. Converting to Timestamps ------------------------ -To convert a Series or list-like object of date-like objects e.g. strings, +To convert a :class:`Series` or list-like object of date-like objects e.g. strings, epochs, or a mixture, you can use the ``to_datetime`` function. When passed -a Series, this returns a Series (with the same index), while a list-like -is converted to a DatetimeIndex: +a ``Series``, this returns a ``Series`` (with the same index), while a list-like +is converted to a ``DatetimeIndex``: .. ipython:: python @@ -175,15 +175,9 @@ you can pass the ``dayfirst`` flag: can't be parsed with the day being first it will be parsed as if ``dayfirst`` were False. -.. note:: - Specifying a ``format`` argument will potentially speed up the conversion - considerably and on versions later then 0.13.0 explicitly specifying - a format string of '%Y%m%d' takes a faster path still. - -If you pass a single string to ``to_datetime``, it returns single ``Timestamp``. -Also, ``Timestamp`` can accept the string input. -Note that ``Timestamp`` doesn't accept string parsing option like ``dayfirst`` -or ``format``, use ``to_datetime`` if these are required. +If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``. +``Timestamp`` can also accept string input, but it doesn't accept string parsing +options like ``dayfirst`` or ``format``, so use ``to_datetime`` if these are required. .. ipython:: python @@ -191,6 +185,25 @@ or ``format``, use ``to_datetime`` if these are required. pd.Timestamp('2010/11/12') +Providing a Format Argument +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In addition to the required datetime string, a ``format`` argument can be passed to ensure specific parsing. +This could also potentially speed up the conversion considerably. + +.. ipython:: python + + pd.to_datetime('2010/11/12', format='%Y/%m/%d') + + pd.to_datetime('12-11-2010 00:00', format='%d-%m-%Y %H:%M') + +For more information on the choices available when specifying the ``format`` +option, see the Python `datetime documentation +` have about 15 digits precision in + decimal. Rounding during conversion from float to high precision ``Timestamp`` is + unavoidable. The only way to achieve exact precision is to use a fixed-width + types (e.g. an int64). + + .. ipython:: python + + pd.to_datetime([1490195805.433, 1490195805.433502912], unit='s') + pd.to_datetime(1490195805433502912, unit='ns') + +.. seealso:: + + :ref:`timeseries.origin` + +.. _timeseries.converting.epoch_inverse: + +From Timestamps to Epoch +~~~~~~~~~~~~~~~~~~~~~~~~ + +To invert the operation from above, namely, to convert from a ``Timestamp`` to a 'unix' epoch: .. ipython:: python - pd.to_datetime([1]) + stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D') + stamps - pd.to_datetime([1, 3.14], unit='s') +We convert the ``DatetimeIndex`` to an ``int64`` array, then divide by the conversion unit. -.. note:: +.. ipython:: python - Epoch times will be rounded to the nearest nanosecond. + stamps.view('int64') // pd.Timedelta(1, unit='s') + +.. _timeseries.origin: + +Using the ``origin`` Parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +Using the ``origin`` parameter, one can specify an alternative starting point for creation +of a ``DatetimeIndex``. For example, to use 1960-01-01 as the starting date: + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. +Commonly called 'unix epoch' or POSIX time. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D') .. _timeseries.daterange: Generating Ranges of Timestamps ------------------------------- -To generate an index with time stamps, you can use either the DatetimeIndex or -Index constructor and pass in a list of datetime objects: +To generate an index with timestamps, you can use either the ``DatetimeIndex`` or +``Index`` constructor and pass in a list of datetime objects: .. ipython:: python @@ -296,37 +350,36 @@ Index constructor and pass in a list of datetime objects: index = pd.Index(dates) index -Practically, this becomes very cumbersome because we often need a very long +In practice this becomes very cumbersome because we often need a very long index with a large number of timestamps. If we need timestamps on a regular -frequency, we can use the pandas functions ``date_range`` and ``bdate_range`` -to create timestamp indexes. +frequency, we can use the :func:`date_range` and :func:`bdate_range` functions +to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a +**calendar day** while the default for ``bdate_range`` is a **business day**: .. ipython:: python - index = pd.date_range('2000-1-1', periods=1000, freq='M') + start = datetime(2011, 1, 1) + end = datetime(2012, 1, 1) + + index = pd.date_range(start, end) index - index = pd.bdate_range('2012-1-1', periods=250) + index = pd.bdate_range(start, end) index -Convenience functions like ``date_range`` and ``bdate_range`` utilize a -variety of frequency aliases. The default frequency for ``date_range`` is a -**calendar day** while the default for ``bdate_range`` is a **business day** +Convenience functions like ``date_range`` and ``bdate_range`` can utilize a +variety of :ref:`frequency aliases `: .. ipython:: python - start = datetime(2011, 1, 1) - end = datetime(2012, 1, 1) - - rng = pd.date_range(start, end) - rng + pd.date_range(start, periods=1000, freq='M') - rng = pd.bdate_range(start, end) - rng + pd.bdate_range(start, periods=250, freq='BQS') ``date_range`` and ``bdate_range`` make it easy to generate a range of dates -using various combinations of parameters like ``start``, ``end``, -``periods``, and ``freq``: +using various combinations of parameters like ``start``, ``end``, ``periods``, +and ``freq``. The start and end dates are strictly inclusive, so dates outside +of those specified will not be generated: .. ipython:: python @@ -338,15 +391,45 @@ using various combinations of parameters like ``start``, ``end``, pd.bdate_range(start=start, periods=20) -The start and end dates are strictly inclusive. So it will not generate any -dates outside of those dates if specified. +.. _timeseries.custom-freq-ranges: + +Custom Frequency Ranges +~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + This functionality was originally exclusive to ``cdate_range``, which is + deprecated as of version 0.21.0 in favor of ``bdate_range``. Note that + ``cdate_range`` only utilizes the ``weekmask`` and ``holidays`` parameters + when custom business day, 'C', is passed as the frequency string. Support has + been expanded with ``bdate_range`` to work with any custom frequency string. + +.. versionadded:: 0.21.0 + +``bdate_range`` can also generate a range of custom frequency dates by using +the ``weekmask`` and ``holidays`` parameters. These parameters will only be +used if a custom frequency string is passed. + +.. ipython:: python + + weekmask = 'Mon Wed Fri' + + holidays = [datetime(2011, 1, 5), datetime(2011, 3, 14)] + + pd.bdate_range(start, end, freq='C', weekmask=weekmask, holidays=holidays) + + pd.bdate_range(start, end, freq='CBMS', weekmask=weekmask) + +.. seealso:: + + :ref:`timeseries.custombusinessdays` .. _timeseries.timestamp-limits: -Timestamp limitations +Timestamp Limitations --------------------- -Since pandas represents timestamps in nanosecond resolution, the timespan that +Since pandas represents timestamps in nanosecond resolution, the time span that can be represented using a 64-bit integer is limited to approximately 584 years: .. ipython:: python @@ -354,7 +437,9 @@ can be represented using a 64-bit integer is limited to approximately 584 years: pd.Timestamp.min pd.Timestamp.max -See :ref:`here ` for ways to represent data outside these bound. +.. seealso:: + + :ref:`timeseries.oob` .. _timeseries.datetimeindex: @@ -362,20 +447,20 @@ Indexing -------- One of the main uses for ``DatetimeIndex`` is as an index for pandas objects. -The ``DatetimeIndex`` class contains many timeseries related optimizations: +The ``DatetimeIndex`` class contains many time series related optimizations: - A large range of dates for various offsets are pre-computed and cached under the hood in order to make generating subsequent date ranges very fast - (just have to grab a slice) - - Fast shifting using the ``shift`` and ``tshift`` method on pandas objects - - Unioning of overlapping DatetimeIndex objects with the same frequency is - very fast (important for fast data alignment) + (just have to grab a slice). + - Fast shifting using the ``shift`` and ``tshift`` method on pandas objects. + - Unioning of overlapping ``DatetimeIndex`` objects with the same frequency is + very fast (important for fast data alignment). - Quick access to date fields via properties such as ``year``, ``month``, etc. - - Regularization functions like ``snap`` and very fast ``asof`` logic + - Regularization functions like ``snap`` and very fast ``asof`` logic. -DatetimeIndex objects has all the basic functionality of regular Index objects -and a smorgasbord of advanced timeseries-specific methods for easy frequency -processing. +``DatetimeIndex`` objects have all the basic functionality of regular ``Index`` +objects, and a smorgasbord of advanced time series specific methods for easy +frequency processing. .. seealso:: :ref:`Reindexing methods ` @@ -383,8 +468,7 @@ processing. .. note:: While pandas does not force you to have a sorted date index, some of these - methods may have unexpected or incorrect behavior if the dates are - unsorted. So please be careful. + methods may have unexpected or incorrect behavior if the dates are unsorted. ``DatetimeIndex`` can be used like a regular index and offers all of its intelligent functionality like selection, slicing, etc. @@ -402,7 +486,7 @@ intelligent functionality like selection, slicing, etc. Partial String Indexing ~~~~~~~~~~~~~~~~~~~~~~~ -You can pass in dates and strings that parse to dates as indexing parameters: +Dates and strings that parse to timestamps can be passed as indexing parameters: .. ipython:: python @@ -421,9 +505,9 @@ the year or year and month as strings: ts['2011-6'] -This type of slicing will work on a DataFrame with a ``DateTimeIndex`` as well. Since the +This type of slicing will work on a ``DataFrame`` with a ``DatetimeIndex`` as well. Since the partial string selection is a form of label slicing, the endpoints **will be** included. This -would include matching times on an included date. Here's an example: +would include matching times on an included date: .. ipython:: python @@ -433,25 +517,26 @@ would include matching times on an included date. Here's an example: dft dft['2013'] -This starts on the very first time in the month, and includes the last date & time for the month +This starts on the very first time in the month, and includes the last date and +time for the month: .. ipython:: python dft['2013-1':'2013-2'] -This specifies a stop time **that includes all of the times on the last day** +This specifies a stop time **that includes all of the times on the last day**: .. ipython:: python dft['2013-1':'2013-2-28'] -This specifies an **exact** stop time (and is not the same as the above) +This specifies an **exact** stop time (and is not the same as the above): .. ipython:: python dft['2013-1':'2013-2-28 00:00:00'] -We are stopping on the included end-point as it is part of the index +We are stopping on the included end-point as it is part of the index: .. ipython:: python @@ -459,7 +544,7 @@ We are stopping on the included end-point as it is part of the index .. versionadded:: 0.18.0 -DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiIndex``. For example: +``DatetimeIndex`` partial string indexing also works on a ``DataFrame`` with a ``MultiIndex``: .. ipython:: python @@ -477,14 +562,14 @@ DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiInd .. _timeseries.slice_vs_exact_match: -Slice vs. exact match +Slice vs. Exact Match ~~~~~~~~~~~~~~~~~~~~~ .. versionchanged:: 0.20.0 -The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of an index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match. +The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match. -For example, let us consider ``Series`` object which index has minute resolution. +Consider a ``Series`` object with a minute resolution index: .. ipython:: python @@ -507,7 +592,8 @@ A timestamp string with minute resolution (or more accurate), gives a scalar ins series_minute['2011-12-31 23:59'] series_minute['2011-12-31 23:59:00'] -If index resolution is second, then, the minute-accurate timestamp gives a ``Series``. +If index resolution is second, then the minute-accurate timestamp gives a +``Series``. .. ipython:: python @@ -529,7 +615,7 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra .. warning:: - However if the string is treated as an exact match, the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics `. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as index and there is no column with such name: + However, if the string is treated as an exact match, the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics `. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as the index and there is no column with such name: To *always* have unambiguous selection, whether the row is treated as a slice or a single selection, use ``.loc``. @@ -552,7 +638,7 @@ Note also that ``DatetimeIndex`` resolution cannot be less precise than day. Exact Indexing ~~~~~~~~~~~~~~ -As discussed in previous section, indexing a ``DateTimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with ``Timestamp`` or ``datetime`` objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*. +As discussed in previous section, indexing a ``DatetimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with ``Timestamp`` or ``datetime`` objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*. These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and ``seconds``, even though they were not explicitly specified (they are ``0``). @@ -570,25 +656,32 @@ With no defaults. Truncating & Fancy Indexing ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A ``truncate`` convenience function is provided that is equivalent to slicing: +A :meth:`~DataFrame.truncate` convenience function is provided that is similar +to slicing. Note that ``truncate`` assumes a 0 value for any unspecified date +component in a ``DatetimeIndex`` in contrast to slicing which returns any +partially matching dates: .. ipython:: python - ts.truncate(before='10/31/2011', after='12/31/2011') + rng2 = pd.date_range('2011-01-01', '2012-01-01', freq='W') + ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2) + + ts2.truncate(before='2011-11', after='2011-12') + ts2['2011-11':'2011-12'] -Even complicated fancy indexing that breaks the DatetimeIndex's frequency -regularity will result in a ``DatetimeIndex`` (but frequency is lost): +Even complicated fancy indexing that breaks the ``DatetimeIndex`` frequency +regularity will result in a ``DatetimeIndex``, although frequency is lost: .. ipython:: python - ts[[0, 2, 6]].index + ts2[[0, 2, 6]].index .. _timeseries.components: Time/Date Components -------------------- -There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DateTimeIndex``. +There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DatetimeIndex``. .. csv-table:: :header: "Property", "Description" @@ -607,10 +700,10 @@ There are several time/date properties that one can access from ``Timestamp`` or dayofyear,"The ordinal day of year" weekofyear,"The week ordinal of the year" week,"The week ordinal of the year" - dayofweek,"The numer of the day of the week with Monday=0, Sunday=6" + dayofweek,"The number of the day of the week with Monday=0, Sunday=6" weekday,"The number of the day of the week with Monday=0, Sunday=6" weekday_name,"The name of the day in a week (ex: Friday)" - quarter,"Quarter of the date: Jan=Mar = 1, Apr-Jun = 2, etc." + quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." days_in_month,"The number of days in the month of the datetime" is_month_start,"Logical indicating if first day of month (defined by frequency)" is_month_end,"Logical indicating if last day of month (defined by frequency)" @@ -620,17 +713,19 @@ There are several time/date properties that one can access from ``Timestamp`` or is_year_end,"Logical indicating if last day of year (defined by frequency)" is_leap_year,"Logical indicating if the date belongs to a leap year" -Furthermore, if you have a ``Series`` with datetimelike values, then you can access these properties via the ``.dt`` accessor, see the :ref:`docs ` +Furthermore, if you have a ``Series`` with datetimelike values, then you can +access these properties via the ``.dt`` accessor, as detailed in the section +on :ref:`.dt accessors`. .. _timeseries.offsets: -DateOffset objects +DateOffset Objects ------------------ -In the preceding examples, we created DatetimeIndex objects at various +In the preceding examples, we created ``DatetimeIndex`` objects at various frequencies by passing in :ref:`frequency strings ` -like 'M', 'W', and 'BM to the ``freq`` keyword. Under the hood, these frequency -strings are being translated into an instance of pandas ``DateOffset``, +like 'M', 'W', and 'BM' to the ``freq`` keyword. Under the hood, these frequency +strings are being translated into an instance of :class:`DateOffset`, which represents a regular frequency increment. Specific offset logic like "month", "business day", or "one hour" is represented in its various subclasses. @@ -640,7 +735,7 @@ which represents a regular frequency increment. Specific offset logic like DateOffset, "Generic offset class, defaults to 1 calendar day" BDay, "business day (weekday)" - CDay, "custom business day (experimental)" + CDay, "custom business day" Week, "one week, optionally anchored on a day of the week" WeekOfMonth, "the x-th day of the y-th week of each month" LastWeekOfMonth, "the x-th day of the last week of each month" @@ -672,7 +767,7 @@ which represents a regular frequency increment. Specific offset logic like Nano, "one nanosecond" The basic ``DateOffset`` takes the same arguments as -``dateutil.relativedelta``, which works like: +``dateutil.relativedelta``, which works as follows: .. ipython:: python @@ -688,12 +783,13 @@ We could have done the same thing with ``DateOffset``: The key features of a ``DateOffset`` object are: -- it can be added / subtracted to/from a datetime object to obtain a - shifted date -- it can be multiplied by an integer (positive or negative) so that the - increment will be applied multiple times -- it has ``rollforward`` and ``rollback`` methods for moving a date forward - or backward to the next or previous "offset date" +- It can be added / subtracted to/from a datetime object to obtain a + shifted date. +- It can be multiplied by an integer (positive or negative) so that the + increment will be applied multiple times. +- It has :meth:`~pandas.DateOffset.rollforward` and + :meth:`~pandas.DateOffset.rollback` methods for moving a date forward or + backward to the next or previous "offset date". Subclasses of ``DateOffset`` define the ``apply`` function which dictates custom date increment logic, such as adding business days: @@ -722,7 +818,10 @@ The ``rollforward`` and ``rollback`` methods do exactly what you would expect: It's definitely worth exploring the ``pandas.tseries.offsets`` module and the various docstrings for the classes. -These operations (``apply``, ``rollforward`` and ``rollback``) preserves time (hour, minute, etc) information by default. To reset time, use ``normalize=True`` keyword when creating the offset instance. If ``normalize=True``, result is normalized after the function is applied. +These operations (``apply``, ``rollforward`` and ``rollback``) preserve time +(hour, minute, etc) information by default. To reset time, use ``normalize=True`` +when creating the offset instance. If ``normalize=True``, the result is +normalized after the function is applied. .. ipython:: python @@ -741,7 +840,7 @@ These operations (``apply``, ``rollforward`` and ``rollback``) preserves time (h hour.apply(pd.Timestamp('2014-01-01 23:00')) -Parametric offsets +Parametric Offsets ~~~~~~~~~~~~~~~~~~ Some of the offsets can be "parameterized" when created to result in different @@ -758,7 +857,7 @@ particular day of the week: d - Week() -``normalize`` option will be effective for addition and subtraction. +The ``normalize`` option will be effective for addition and subtraction. .. ipython:: python @@ -776,7 +875,7 @@ Another example is parameterizing ``YearEnd`` with the specific ending month: .. _timeseries.offsetseries: -Using offsets with ``Series`` / ``DatetimeIndex`` +Using Offsets with ``Series`` / ``DatetimeIndex`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Offsets can be used with either a ``Series`` or ``DatetimeIndex`` to @@ -837,7 +936,7 @@ As an interesting example, let's look at Egypt where a Friday-Saturday weekend i dt = datetime(2013, 4, 30) dt + 2 * bday_egypt -Let's map to the weekday names +Let's map to the weekday names: .. ipython:: python @@ -893,9 +992,10 @@ The ``BusinessHour`` class provides a business hour representation on ``Business allowing to use specific start and end times. By default, ``BusinessHour`` uses 9:00 - 17:00 as business hours. -Adding ``BusinessHour`` will increment ``Timestamp`` by hourly. -If target ``Timestamp`` is out of business hours, move to the next business hour then increment it. -If the result exceeds the business hours end, remaining is added to the next business day. +Adding ``BusinessHour`` will increment ``Timestamp`` by hourly frequency. +If target ``Timestamp`` is out of business hours, move to the next business hour +then increment it. If the result exceeds the business hours end, the remaining +hours are added to the next business day. .. ipython:: python @@ -921,9 +1021,10 @@ If the result exceeds the business hours end, remaining is added to the next bus # Subtracting 3 business hours pd.Timestamp('2014-08-01 10:00') + BusinessHour(-3) -Also, you can specify ``start`` and ``end`` time by keywords. -Argument must be ``str`` which has ``hour:minute`` representation or ``datetime.time`` instance. -Specifying seconds, microseconds and nanoseconds as business hour results in ``ValueError``. +You can also specify ``start`` and ``end`` time by keywords. The argument must +be a ``str`` with an ``hour:minute`` representation or a ``datetime.time`` +instance. Specifying seconds, microseconds and nanoseconds as business hour +results in ``ValueError``. .. ipython:: python @@ -979,8 +1080,9 @@ under the default business hours (9:00 - 17:00), there is no gap (0 minutes) bet # The result is the same as rollworward because BusinessDay never overlap. BusinessHour().apply(pd.Timestamp('2014-08-02')) -``BusinessHour`` regards Saturday and Sunday as holidays. To use arbitrary holidays, -you can use ``CustomBusinessHour`` offset, see :ref:`Custom Business Hour `: +``BusinessHour`` regards Saturday and Sunday as holidays. To use arbitrary +holidays, you can use ``CustomBusinessHour`` offset, as explained in the +following subsection. .. _timeseries.custombusinesshour: @@ -1005,7 +1107,7 @@ as ``BusinessHour`` except that it skips specified custom holidays. # Tuesday after MLK Day (Monday is skipped because it's a holiday) dt + bhour_us * 2 -You can use keyword arguments suported by either ``BusinessHour`` and ``CustomBusinessDay``. +You can use keyword arguments supported by either ``BusinessHour`` and ``CustomBusinessDay``. .. ipython:: python @@ -1020,15 +1122,14 @@ Offset Aliases ~~~~~~~~~~~~~~ A number of string aliases are given to useful common time series -frequencies. We will refer to these aliases as *offset aliases* -(referred to as *time rules* prior to v0.8.0). +frequencies. We will refer to these aliases as *offset aliases*. .. csv-table:: :header: "Alias", "Description" :widths: 15, 100 "B", "business day frequency" - "C", "custom business day frequency (experimental)" + "C", "custom business day frequency" "D", "calendar day frequency" "W", "weekly frequency" "M", "month end frequency" @@ -1040,13 +1141,13 @@ frequencies. We will refer to these aliases as *offset aliases* "BMS", "business month start frequency" "CBMS", "custom business month start frequency" "Q", "quarter end frequency" - "BQ", "business quarter endfrequency" + "BQ", "business quarter end frequency" "QS", "quarter start frequency" "BQS", "business quarter start frequency" - "A", "year end frequency" - "BA", "business year end frequency" - "AS", "year start frequency" - "BAS", "business year start frequency" + "A, Y", "year end frequency" + "BA, BY", "business year end frequency" + "AS, YS", "year start frequency" + "BAS, BYS", "business year start frequency" "BH", "business hour frequency" "H", "hourly frequency" "T, min", "minutely frequency" @@ -1084,13 +1185,13 @@ For some frequencies you can specify an anchoring suffix: :header: "Alias", "Description" :widths: 15, 100 - "W\-SUN", "weekly frequency (sundays). Same as 'W'" - "W\-MON", "weekly frequency (mondays)" - "W\-TUE", "weekly frequency (tuesdays)" - "W\-WED", "weekly frequency (wednesdays)" - "W\-THU", "weekly frequency (thursdays)" - "W\-FRI", "weekly frequency (fridays)" - "W\-SAT", "weekly frequency (saturdays)" + "W\-SUN", "weekly frequency (Sundays). Same as 'W'" + "W\-MON", "weekly frequency (Mondays)" + "W\-TUE", "weekly frequency (Tuesdays)" + "W\-WED", "weekly frequency (Wednesdays)" + "W\-THU", "weekly frequency (Thursdays)" + "W\-FRI", "weekly frequency (Fridays)" + "W\-SAT", "weekly frequency (Saturdays)" "(B)Q(S)\-DEC", "quarterly frequency, year ends in December. Same as 'Q'" "(B)Q(S)\-JAN", "quarterly frequency, year ends in January" "(B)Q(S)\-FEB", "quarterly frequency, year ends in February" @@ -1124,7 +1225,7 @@ Anchored Offset Semantics ~~~~~~~~~~~~~~~~~~~~~~~~~ For those offsets that are anchored to the start or end of specific -frequency (``MonthEnd``, ``MonthBegin``, ``WeekEnd``, etc) the following +frequency (``MonthEnd``, ``MonthBegin``, ``WeekEnd``, etc), the following rules apply to rolling forward and backwards. When ``n`` is not 0, if the given date is not on an anchor point, it snapped to the next(previous) @@ -1175,7 +1276,7 @@ Holidays and calendars provide a simple way to define holiday rules to be used with ``CustomBusinessDay`` or in other analysis that requires a predefined set of holidays. The ``AbstractHolidayCalendar`` class provides all the necessary methods to return a list of holidays and only ``rules`` need to be defined -in a specific holiday calendar class. Further, ``start_date`` and ``end_date`` +in a specific holiday calendar class. Furthermore, the ``start_date`` and ``end_date`` class attributes determine over what date range holidays are generated. These should be overwritten on the ``AbstractHolidayCalendar`` class to have the range apply to all calendar subclasses. ``USFederalHolidayCalendar`` is the @@ -1230,7 +1331,7 @@ or ``Timestamp`` objects. datetime(2012, 7, 6) + offset Ranges are defined by the ``start_date`` and ``end_date`` class attributes -of ``AbstractHolidayCalendar``. The defaults are below. +of ``AbstractHolidayCalendar``. The defaults are shown below. .. ipython:: python @@ -1263,23 +1364,24 @@ or calendars with additional rules. .. _timeseries.advanced_datetime: -Time series-related instance methods +Time Series-Related Instance Methods ------------------------------------ -Shifting / lagging +Shifting / Lagging ~~~~~~~~~~~~~~~~~~ One may want to *shift* or *lag* the values in a time series back and forward in -time. The method for this is ``shift``, which is available on all of the pandas -objects. +time. The method for this is :meth:`~Series.shift`, which is available on all of +the pandas objects. .. ipython:: python ts = ts[:5] ts.shift(1) -The shift method accepts an ``freq`` argument which can accept a -``DateOffset`` class or other ``timedelta``-like object or also a :ref:`offset alias `: +The ``shift`` method accepts an ``freq`` argument which can accept a +``DateOffset`` class or other ``timedelta``-like object or also an +:ref:`offset alias `: .. ipython:: python @@ -1287,8 +1389,8 @@ The shift method accepts an ``freq`` argument which can accept a ts.shift(5, freq='BM') Rather than changing the alignment of the data and the index, ``DataFrame`` and -``Series`` objects also have a ``tshift`` convenience method that changes -all the dates in the index by a specified number of offsets: +``Series`` objects also have a :meth:`~Series.tshift` convenience method that +changes all the dates in the index by a specified number of offsets: .. ipython:: python @@ -1297,12 +1399,13 @@ all the dates in the index by a specified number of offsets: Note that with ``tshift``, the leading entry is no longer NaN because the data is not being realigned. -Frequency conversion +Frequency Conversion ~~~~~~~~~~~~~~~~~~~~ -The primary function for changing frequencies is the ``asfreq`` function. -For a ``DatetimeIndex``, this is basically just a thin, but convenient wrapper -around ``reindex`` which generates a ``date_range`` and calls ``reindex``. +The primary function for changing frequencies is the :meth:`~Series.asfreq` +method. For a ``DatetimeIndex``, this is basically just a thin, but convenient +wrapper around :meth:`~Series.reindex` which generates a ``date_range`` and +calls ``reindex``. .. ipython:: python @@ -1312,23 +1415,23 @@ around ``reindex`` which generates a ``date_range`` and calls ``reindex``. ts.asfreq(BDay()) ``asfreq`` provides a further convenience so you can specify an interpolation -method for any gaps that may appear after the frequency conversion +method for any gaps that may appear after the frequency conversion. .. ipython:: python ts.asfreq(BDay(), method='pad') -Filling forward / backward +Filling Forward / Backward ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Related to ``asfreq`` and ``reindex`` is the ``fillna`` function documented in -the :ref:`missing data section `. +Related to ``asfreq`` and ``reindex`` is :meth:`~Series.fillna`, which is +documented in the :ref:`missing data section `. -Converting to Python datetimes +Converting to Python Datetimes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``DatetimeIndex`` can be converted to an array of Python native datetime.datetime objects using the -``to_pydatetime`` method. +``DatetimeIndex`` can be converted to an array of Python native +:py:class:`datetime.datetime` objects using the ``to_pydatetime`` method. .. _timeseries.resampling: @@ -1340,20 +1443,22 @@ Resampling The interface to ``.resample`` has changed in 0.18.0 to be more groupby-like and hence more flexible. See the :ref:`whatsnew docs ` for a comparison with prior versions. -Pandas has a simple, powerful, and efficient functionality for -performing resampling operations during frequency conversion (e.g., converting -secondly data into 5-minutely data). This is extremely common in, but not -limited to, financial applications. +Pandas has a simple, powerful, and efficient functionality for performing +resampling operations during frequency conversion (e.g., converting secondly +data into 5-minutely data). This is extremely common in, but not limited to, +financial applications. -``.resample()`` is a time-based groupby, followed by a reduction method on each of its groups. -See some :ref:`cookbook examples ` for some advanced strategies +:meth:`~Series.resample` is a time-based groupby, followed by a reduction method +on each of its groups. See some :ref:`cookbook examples ` for +some advanced strategies. Starting in version 0.18.1, the ``resample()`` function can be used directly from ``DataFrameGroupBy`` objects, see the :ref:`groupby docs `. .. note:: - ``.resample()`` is similar to using a ``.rolling()`` operation with a time-based offset, see a discussion :ref:`here ` + ``.resample()`` is similar to using a :meth:`~Series.rolling` operation with + a time-based offset, see a discussion :ref:`here `. Basics ~~~~~~ @@ -1370,8 +1475,9 @@ The ``resample`` function is very flexible and allows you to specify many different parameters to control the frequency conversion and resampling operation. -The ``how`` parameter can be a function name or numpy array function that takes -an array and produces aggregated values: +Any function available via :ref:`dispatching ` is available as +a method of the returned object, including ``sum``, ``mean``, ``std``, ``sem``, +``max``, ``min``, ``median``, ``first``, ``last``, ``ohlc``: .. ipython:: python @@ -1381,9 +1487,6 @@ an array and produces aggregated values: ts.resample('5Min').max() -Any function available via :ref:`dispatching ` can be given to -the ``how`` parameter by name, including ``sum``, ``mean``, ``std``, ``sem``, -``max``, ``min``, ``median``, ``first``, ``last``, ``ohlc``. For downsampling, ``closed`` can be set to 'left' or 'right' to specify which end of the interval is closed: @@ -1401,17 +1504,36 @@ labels. .. ipython:: python - ts.resample('5Min').mean() # by default label='right' + ts.resample('5Min').mean() # by default label='left' ts.resample('5Min', label='left').mean() ts.resample('5Min', label='left', loffset='1s').mean() +.. note:: + + The default values for ``label`` and ``closed`` is 'left' for all + frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' + which all have a default of 'right'. + + .. ipython:: python + + rng2 = pd.date_range('1/1/2012', end='3/31/2012', freq='D') + ts2 = pd.Series(range(len(rng2)), index=rng2) + + # default: label='right', closed='right' + ts2.resample('M').max() + + # default: label='left', closed='left' + ts2.resample('SM').max() + + ts2.resample('SM', label='right', closed='right').max() + The ``axis`` parameter can be set to 0 or 1 and allows you to resample the -specified axis for a DataFrame. +specified axis for a ``DataFrame``. ``kind`` can be set to 'timestamp' or 'period' to convert the resulting index -to/from time-stamp and time-span representations. By default ``resample`` +to/from timestamp and time span representations. By default ``resample`` retains the input representation. ``convention`` can be set to 'start' or 'end' when resampling period data @@ -1419,8 +1541,8 @@ retains the input representation. frequency periods. -Up Sampling -~~~~~~~~~~~ +Upsampling +~~~~~~~~~~ For upsampling, you can specify a way to upsample and the ``limit`` parameter to interpolate over the gaps that are created: @@ -1437,20 +1559,21 @@ For upsampling, you can specify a way to upsample and the ``limit`` parameter to Sparse Resampling ~~~~~~~~~~~~~~~~~ -Sparse timeseries are ones where you have a lot fewer points relative -to the amount of time you are looking to resample. Naively upsampling a sparse series can potentially -generate lots of intermediate values. When you don't want to use a method to fill these values, e.g. ``fill_method`` is ``None``, -then intermediate values will be filled with ``NaN``. +Sparse timeseries are the ones where you have a lot fewer points relative +to the amount of time you are looking to resample. Naively upsampling a sparse +series can potentially generate lots of intermediate values. When you don't want +to use a method to fill these values, e.g. ``fill_method`` is ``None``, then +intermediate values will be filled with ``NaN``. Since ``resample`` is a time-based groupby, the following is a method to efficiently -resample only the groups that are not all ``NaN`` +resample only the groups that are not all ``NaN``. .. ipython:: python rng = pd.date_range('2014-1-1', periods=100, freq='D') + pd.Timedelta('1s') ts = pd.Series(range(100), index=rng) -If we want to resample to the full range of the series +If we want to resample to the full range of the series: .. ipython:: python @@ -1470,11 +1593,13 @@ We can instead only resample those groups where we have points as follows: ts.groupby(partial(round, freq='3T')).sum() +.. _timeseries.aggregate: + Aggregation ~~~~~~~~~~~ -Similar to :ref:`groupby aggregates ` and the :ref:`window functions `, a ``Resampler`` can be selectively -resampled. +Similar to the :ref:`aggregating API `, :ref:`groupby API `, and the :ref:`window functions API `, +a ``Resampler`` can be selectively resampled. Resampling a ``DataFrame``, the default will be to act on all columns with the same function. @@ -1494,21 +1619,13 @@ We can select a specific column or columns using standard getitem. r[['A','B']].mean() -You can pass a list or dict of functions to do aggregation with, outputting a DataFrame: +You can pass a list or dict of functions to do aggregation with, outputting a ``DataFrame``: .. ipython:: python r['A'].agg([np.sum, np.mean, np.std]) -If a dict is passed, the keys will be used to name the columns. Otherwise the -function's name (stored in the function object) will be used. - -.. ipython:: python - - r['A'].agg({'result1' : np.sum, - 'result2' : np.mean}) - -On a resampled DataFrame, you can pass a list of functions to apply to each +On a resampled ``DataFrame``, you can pass a list of functions to apply to each column, which produces an aggregated result with a hierarchical index: .. ipython:: python @@ -1516,7 +1633,7 @@ column, which produces an aggregated result with a hierarchical index: r.agg([np.sum, np.mean]) By passing a dict to ``aggregate`` you can apply a different aggregation to the -columns of a DataFrame: +columns of a ``DataFrame``: .. ipython:: python :okexcept: @@ -1525,7 +1642,7 @@ columns of a DataFrame: 'B' : lambda x: np.std(x, ddof=1)}) The function names can also be strings. In order for a string to be valid it -must be implemented on the Resampled object +must be implemented on the resampled object: .. ipython:: python @@ -1663,6 +1780,15 @@ has multiplied span. pd.PeriodIndex(start='2014-01', freq='3M', periods=4) +If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor +endpoints for a ``PeriodIndex`` with frequency matching that of the +``PeriodIndex`` constructor. + +.. ipython:: python + + pd.PeriodIndex(start=pd.Period('2017Q1', freq='Q'), + end=pd.Period('2017Q2', freq='Q'), freq='M') + Just like ``DatetimeIndex``, a ``PeriodIndex`` can also be used to index pandas objects: @@ -1824,7 +1950,7 @@ frequencies ``Q-JAN`` through ``Q-DEC``. .. _timeseries.interchange: -Converting between Representations +Converting Between Representations ---------------------------------- Timestamped data can be converted to PeriodIndex-ed data using ``to_period`` @@ -1868,7 +1994,7 @@ the quarter end: .. _timeseries.oob: -Representing out-of-bounds spans +Representing Out-of-Bounds Spans -------------------------------- If you have data that is outside of the ``Timestamp`` bounds, see :ref:`Timestamp limitations `, @@ -1879,7 +2005,7 @@ then you can use a ``PeriodIndex`` and/or ``Series`` of ``Periods`` to do comput span = pd.period_range('1215-01-01', '1381-01-01', freq='D') span -To convert from a ``int64`` based YYYYMMDD representation. +To convert from an ``int64`` based YYYYMMDD representation. .. ipython:: python @@ -1892,7 +2018,7 @@ To convert from a ``int64`` based YYYYMMDD representation. s.apply(conv) s.apply(conv)[2] -These can easily be converted to a ``PeriodIndex`` +These can easily be converted to a ``PeriodIndex``: .. ipython:: python @@ -1904,9 +2030,11 @@ These can easily be converted to a ``PeriodIndex`` Time Zone Handling ------------------ -Pandas provides rich support for working with timestamps in different time zones using ``pytz`` and ``dateutil`` libraries. -``dateutil`` support is new in 0.14.1 and currently only supported for fixed offset and tzfile zones. The default library is ``pytz``. -Support for ``dateutil`` is provided for compatibility with other applications e.g. if you use ``dateutil`` in other python packages. +Pandas provides rich support for working with timestamps in different time +zones using ``pytz`` and ``dateutil`` libraries. ``dateutil`` currently is only +supported for fixed offset and tzfile zones. The default library is ``pytz``. +Support for ``dateutil`` is provided for compatibility with other +applications e.g. if you use ``dateutil`` in other Python packages. Working with Time Zones ~~~~~~~~~~~~~~~~~~~~~~~ @@ -1963,7 +2091,7 @@ which gives you more control over which time zone is used: rng_dateutil.tz == tz_dateutil Timestamps, like Python's ``datetime.datetime`` object can be either time zone -naive or time zone aware. Naive time series and DatetimeIndex objects can be +naive or time zone aware. Naive time series and ``DatetimeIndex`` objects can be *localized* using ``tz_localize``: .. ipython:: python @@ -1998,7 +2126,7 @@ tz-aware data to another time zone: It is incorrect to pass a timezone directly into the ``datetime.datetime`` constructor (e.g., ``datetime.datetime(2011, 1, 1, tz=timezone('US/Eastern'))``. Instead, the datetime - needs to be localized using the the localize method on the timezone. + needs to be localized using the localize method on the timezone. Under the hood, all timestamps are stored in UTC. Scalar values from a ``DatetimeIndex`` with a time zone will have their fields (day, hour, minute) @@ -2031,8 +2159,8 @@ Localization of ``Timestamp`` functions just like ``DatetimeIndex`` and ``Series rng[5].tz_localize('Asia/Shanghai') -Operations between Series in different time zones will yield UTC -Series, aligning the data on the UTC timestamps: +Operations between ``Series`` in different time zones will yield UTC +``Series``, aligning the data on the UTC timestamps: .. ipython:: python @@ -2112,11 +2240,9 @@ constructor as well as ``tz_localize``. .. _timeseries.timezone_series: -TZ aware Dtypes +TZ Aware Dtypes ~~~~~~~~~~~~~~~ -.. versionadded:: 0.17.0 - ``Series/DatetimeIndex`` with a timezone **naive** value are represented with a dtype of ``datetime64[ns]``. .. ipython:: python @@ -2156,21 +2282,21 @@ a convert on an aware stamp. .. note:: - Using the ``.values`` accessor on a ``Series``, returns an numpy array of the data. - These values are converted to UTC, as numpy does not currently support timezones (even though it is *printing* in the local timezone!). + Using the ``.values`` accessor on a ``Series``, returns an NumPy array of the data. + These values are converted to UTC, as NumPy does not currently support timezones (even though it is *printing* in the local timezone!). .. ipython:: python s_naive.values s_aware.values - Further note that once converted to a numpy array these would lose the tz tenor. + Further note that once converted to a NumPy array these would lose the tz tenor. .. ipython:: python pd.Series(s_aware.values) - However, these can be easily converted + However, these can be easily converted: .. ipython:: python diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index 2489b787560d0..85e455de7d246 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -9,52 +9,54 @@ This is a guide to many pandas tutorials, geared mainly for new users. Internal Guides --------------- -pandas own :ref:`10 Minutes to pandas<10min>` +pandas' own :ref:`10 Minutes to pandas<10min>`. -More complex recipes are in the :ref:`Cookbook` +More complex recipes are in the :ref:`Cookbook`. pandas Cookbook --------------- -The goal of this cookbook (by `Julia Evans `_) is to +The goal of this 2015 cookbook (by `Julia Evans `_) is to give you some concrete examples for getting started with pandas. These are examples with real-world data, and all the bugs and weirdness that -that entails. +entails. -Here are links to the v0.1 release. For an up-to-date table of contents, see the `pandas-cookbook GitHub +Here are links to the v0.2 release. For an up-to-date table of contents, see the `pandas-cookbook GitHub repository `_. To run the examples in this tutorial, you'll need to clone the GitHub repository and get IPython Notebook running. See `How to use this cookbook `_. -- `A quick tour of the IPython Notebook: `_ +- `A quick tour of the IPython Notebook: `_ Shows off IPython's awesome tab completion and magic functions. -- `Chapter 1: `_ +- `Chapter 1: `_ Reading your data into pandas is pretty much the easiest thing. Even when the encoding is wrong! -- `Chapter 2: `_ +- `Chapter 2: `_ It's not totally obvious how to select data from a pandas dataframe. Here we explain the basics (how to take slices and get columns) -- `Chapter 3: `_ +- `Chapter 3: `_ Here we get into serious slicing and dicing and learn how to filter dataframes in complicated ways, really fast. -- `Chapter 4: `_ +- `Chapter 4: `_ Groupby/aggregate is seriously my favorite thing about pandas and I use it all the time. You should probably read this. -- `Chapter 5: `_ +- `Chapter 5: `_ Here you get to find out if it's cold in Montreal in the winter (spoiler: yes). Web scraping with pandas is fun! Here we combine dataframes. -- `Chapter 6: `_ +- `Chapter 6: `_ Strings with pandas are great. It has all these vectorized string operations and they're the best. We will turn a bunch of strings containing "Snow" into vectors of numbers in a trice. -- `Chapter 7: `_ +- `Chapter 7: `_ Cleaning up messy data is never a joy, but with pandas it's easier. -- `Chapter 8: `_ +- `Chapter 8: `_ Parsing Unix timestamps is confusing at first but it turns out to be really easy. +- `Chapter 9: `_ + Reading data from SQL databases. -Lessons for New pandas Users +Lessons for new pandas users ---------------------------- For more resources, please visit the main `repository `__. @@ -125,7 +127,7 @@ There are four sections covering selected topics as follows: .. _tutorial-exercises-new-users: -Exercises for New Users +Exercises for new users ----------------------- Practice your skills with real data sets and exercises. For more resources, please visit the main `repository `__. @@ -152,29 +154,50 @@ For more resources, please visit the main `repository `_ +Tutorial series written in 2016 by +`Tom Augspurger `_. +The source may be found in the GitHub repository +`TomAugspurger/effective-pandas `_. + +- `Modern Pandas `_ - `Method Chaining `_ - `Indexes `_ - `Performance `_ - `Tidy Data `_ - `Visualization `_ +- `Timeseries `_ Excel charts with pandas, vincent and xlsxwriter ------------------------------------------------ - `Using Pandas and XlsxWriter to create Excel charts `_ +Video Tutorials +--------------- + +- `Pandas From The Ground Up `_ + (2015) (2:24) + `GitHub repo `__ +- `Introduction Into Pandas `_ + (2016) (1:28) + `GitHub repo `__ +- `Pandas: .head() to .tail() `_ + (2016) (1:26) + `GitHub repo `__ + + Various Tutorials ----------------- - `Wes McKinney's (pandas BDFL) blog `_ - `Statistical analysis made easy in Python with SciPy and pandas DataFrames, by Randal Olson `_ - `Statistical Data Analysis in Python, tutorial videos, by Christopher Fonnesbeck from SciPy 2013 `_ -- `Financial analysis in python, by Thomas Wiecki `_ +- `Financial analysis in Python, by Thomas Wiecki `_ - `Intro to pandas data structures, by Greg Reda `_ - `Pandas and Python: Top 10, by Manish Amde `_ - `Pandas Tutorial, by Mikhail Semeniuk `_ - `Pandas DataFrames Tutorial, by Karlijn Willems `_ +- `A concise tutorial with real life examples `_ diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 2b2012dbf0b8a..09a52ee527cb5 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -10,7 +10,7 @@ np.set_printoptions(precision=4, suppress=True) pd.options.display.max_rows = 15 import matplotlib - matplotlib.style.use('ggplot') + # matplotlib.style.use('default') import matplotlib.pyplot as plt plt.close('all') @@ -24,13 +24,6 @@ We use the standard convention for referencing the matplotlib API: import matplotlib.pyplot as plt -The plots in this document are made using matplotlib's ``ggplot`` style (new in version 1.4): - -.. code-block:: python - - import matplotlib - matplotlib.style.use('ggplot') - We provide the basics in pandas to easily create decent looking plots. See the :ref:`ecosystem ` section for visualization libraries that go beyond the basics documented here. @@ -44,7 +37,8 @@ libraries that go beyond the basics documented here. Basic Plotting: ``plot`` ------------------------ -See the :ref:`cookbook` for some advanced strategies +We will demonstrate the basics, see the :ref:`cookbook` for +some advanced strategies. The ``plot`` method on Series and DataFrame is just a simple wrapper around :meth:`plt.plot() `: @@ -101,7 +95,8 @@ You can plot one column versus another using the `x` and `y` keywords in .. note:: - For more formatting and styling options, see :ref:`below `. + For more formatting and styling options, see + :ref:`formatting ` below. .. ipython:: python :suppress: @@ -114,14 +109,13 @@ Other Plots ----------- Plotting methods allow for a handful of plot styles other than the -default Line plot. These methods can be provided as the ``kind`` -keyword argument to :meth:`~DataFrame.plot`. -These include: +default line plot. These methods can be provided as the ``kind`` +keyword argument to :meth:`~DataFrame.plot`, and include: * :ref:`'bar' ` or :ref:`'barh' ` for bar plots * :ref:`'hist' ` for histogram * :ref:`'box' ` for boxplot -* :ref:`'kde' ` or ``'density'`` for density plots +* :ref:`'kde' ` or :ref:`'density' ` for density plots * :ref:`'area' ` for area plots * :ref:`'scatter' ` for scatter plots * :ref:`'hexbin' ` for hexagonal bin plots @@ -134,9 +128,7 @@ For example, a bar plot can be created the following way: plt.figure(); @savefig bar_plot_ex.png - df.iloc[5].plot(kind='bar'); plt.axhline(0, color='k') - -.. versionadded:: 0.17.0 + df.iloc[5].plot(kind='bar'); You can also create these other plots using the methods ``DataFrame.plot.`` instead of providing the ``kind`` keyword argument. This makes it easier to discover plot methods and the specific arguments they use: @@ -149,12 +141,12 @@ You can also create these other plots using the methods ``DataFrame.plot.` df.plot.area df.plot.barh df.plot.density df.plot.hist df.plot.line df.plot.scatter df.plot.bar df.plot.box df.plot.hexbin df.plot.kde df.plot.pie -In addition to these ``kind`` s, there are the :ref:`DataFrame.hist() `, +In addition to these ``kind`` s, there are the :ref:`DataFrame.hist() `, and :ref:`DataFrame.boxplot() ` methods, which use a separate interface. -Finally, there are several :ref:`plotting functions ` in ``pandas.tools.plotting`` +Finally, there are several :ref:`plotting functions ` in ``pandas.plotting`` that take a :class:`Series` or :class:`DataFrame` as an argument. These -include +include: * :ref:`Scatter Matrix ` * :ref:`Andrews Curves ` @@ -229,9 +221,7 @@ To get horizontal bar plots, use the ``barh`` method: Histograms ~~~~~~~~~~ -.. versionadded:: 0.15.0 - -Histogram can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods. +Histograms can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods. .. ipython:: python @@ -249,7 +239,8 @@ Histogram can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Serie plt.close('all') -Histogram can be stacked by ``stacked=True``. Bin size can be changed by ``bins`` keyword. +A histogram can be stacked using ``stacked=True``. Bin size can be changed +using the ``bins`` keyword. .. ipython:: python @@ -263,7 +254,9 @@ Histogram can be stacked by ``stacked=True``. Bin size can be changed by ``bins` plt.close('all') -You can pass other keywords supported by matplotlib ``hist``. For example, horizontal and cumulative histgram can be drawn by ``orientation='horizontal'`` and ``cumulative='True'``. +You can pass other keywords supported by matplotlib ``hist``. For example, +horizontal and cumulative histograms can be drawn by +``orientation='horizontal'`` and ``cumulative=True``. .. ipython:: python @@ -306,8 +299,6 @@ subplots: df.diff().hist(color='k', alpha=0.5, bins=50) -.. versionadded:: 0.10.0 - The ``by`` keyword can be specified to plot grouped histograms: .. ipython:: python @@ -330,8 +321,6 @@ The ``by`` keyword can be specified to plot grouped histograms: Box Plots ~~~~~~~~~ -.. versionadded:: 0.15.0 - Boxplot can be drawn calling :meth:`Series.plot.box` and :meth:`DataFrame.plot.box`, or :meth:`DataFrame.boxplot` to visualize the distribution of values within each column. @@ -478,7 +467,7 @@ keyword, will affect the output type as well: ``'both'`` Yes Series of namedtuples ================ ======= ========================== -``Groupby.boxplot`` always returns a Series of ``return_type``. +``Groupby.boxplot`` always returns a ``Series`` of ``return_type``. .. ipython:: python :okwarning: @@ -496,7 +485,9 @@ keyword, will affect the output type as well: plt.close('all') -Compare to: +The subplots above are split by the numeric columns first, then the value of +the ``g`` column. Below the subplots are first split by the value of ``g``, +then by the numeric columns. .. ipython:: python :okwarning: @@ -514,8 +505,6 @@ Compare to: Area Plot ~~~~~~~~~ -.. versionadded:: 0.14 - You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`. Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values. @@ -552,11 +541,9 @@ To produce an unstacked plot, pass ``stacked=False``. Alpha value is set to 0.5 Scatter Plot ~~~~~~~~~~~~ -.. versionadded:: 0.13 - Scatter plot can be drawn by using the :meth:`DataFrame.plot.scatter` method. -Scatter plot requires numeric columns for x and y axis. -These can be specified by ``x`` and ``y`` keywords each. +Scatter plot requires numeric columns for the x and y axes. +These can be specified by the ``x`` and ``y`` keywords. .. ipython:: python :suppress: @@ -600,8 +587,9 @@ each point: plt.close('all') -You can pass other keywords supported by matplotlib ``scatter``. -Below example shows a bubble chart using a dataframe column values as bubble size. +You can pass other keywords supported by matplotlib +:meth:`scatter `. The example below shows a +bubble chart using a column of the ``DataFrame`` as the bubble size. .. ipython:: python @@ -621,8 +609,6 @@ See the :meth:`scatter ` method and the Hexagonal Bin Plot ~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - You can create hexagonal bin plots with :meth:`DataFrame.plot.hexbin`. Hexbin plots can be a useful alternative to scatter plots if your data are too dense to plot each point individually. @@ -652,7 +638,7 @@ You can specify alternative aggregations by passing values to the ``C`` and and ``reduce_C_function`` is a function of one argument that reduces all the values in a bin to a single number (e.g. ``mean``, ``max``, ``sum``, ``std``). In this example the positions are given by columns ``a`` and ``b``, while the value is -given by column ``z``. The bins are aggregated with numpy's ``max`` function. +given by column ``z``. The bins are aggregated with NumPy's ``max`` function. .. ipython:: python :suppress: @@ -684,8 +670,6 @@ See the :meth:`hexbin ` method and the Pie plot ~~~~~~~~ -.. versionadded:: 0.14 - You can create a pie plot with :meth:`DataFrame.plot.pie` or :meth:`Series.plot.pie`. If your data includes any ``NaN``, they will be automatically filled with 0. A ``ValueError`` will be raised if there are any negative values in your data. @@ -708,14 +692,16 @@ A ``ValueError`` will be raised if there are any negative values in your data. plt.close('all') -For pie plots it's best to use square figures, one's with an equal aspect ratio. You can create the -figure with equal width and height, or force the aspect ratio to be equal after plotting by -calling ``ax.set_aspect('equal')`` on the returned ``axes`` object. +For pie plots it's best to use square figures, i.e. a figure aspect ratio 1. +You can create the figure with equal width and height, or force the aspect ratio +to be equal after plotting by calling ``ax.set_aspect('equal')`` on the returned +``axes`` object. -Note that pie plot with :class:`DataFrame` requires that you either specify a target column by the ``y`` -argument or ``subplots=True``. When ``y`` is specified, pie plot of selected column -will be drawn. If ``subplots=True`` is specified, pie plots for each column are drawn as subplots. -A legend will be drawn in each pie plots by default; specify ``legend=False`` to hide it. +Note that pie plot with :class:`DataFrame` requires that you either specify a +target column by the ``y`` argument or ``subplots=True``. When ``y`` is +specified, pie plot of selected column will be drawn. If ``subplots=True`` is +specified, pie plots for each column are drawn as subplots. A legend will be +drawn in each pie plots by default; specify ``legend=False`` to hide it. .. ipython:: python :suppress: @@ -739,7 +725,7 @@ You can use the ``labels`` and ``colors`` keywords to specify the labels and col .. warning:: - Most pandas plots use the the ``label`` and ``color`` arguments (note the lack of "s" on those). + Most pandas plots use the ``label`` and ``color`` arguments (note the lack of "s" on those). To be consistent with :func:`matplotlib.pyplot.pie` you must use ``labels`` and ``colors``. If you want to hide wedge labels, specify ``labels=None``. @@ -785,7 +771,7 @@ See the `matplotlib pie documentation `__ +for more information. By coloring these curves differently for each class it is possible to visualize data clustering. Curves belonging to samples of the same class will usually be closer together and form larger structures. @@ -896,7 +879,7 @@ of the same class will usually be closer together and form larger structures. .. ipython:: python - from pandas.tools.plotting import andrews_curves + from pandas.plotting import andrews_curves data = pd.read_csv('data/iris.data') @@ -910,15 +893,17 @@ of the same class will usually be closer together and form larger structures. Parallel Coordinates ~~~~~~~~~~~~~~~~~~~~ -Parallel coordinates is a plotting technique for plotting multivariate data. -It allows one to see clusters in data and to estimate other statistics visually. +Parallel coordinates is a plotting technique for plotting multivariate data, +see the `Wikipedia entry `__ +for an introduction. +Parallel coordinates allows one to see clusters in data and to estimate other statistics visually. Using parallel coordinates points are represented as connected line segments. Each vertical line represents one attribute. One set of connected line segments represents one data point. Points that tend to cluster will appear closer together. .. ipython:: python - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates data = pd.read_csv('data/iris.data') @@ -939,7 +924,9 @@ Lag Plot Lag plots are used to check if a data set or time series is random. Random data should not exhibit any structure in the lag plot. Non-random structure -implies that the underlying data are not random. +implies that the underlying data are not random. The ``lag`` argument may +be passed, and when ``lag=1`` the plot is essentially ``data[:-1]`` vs. +``data[1:]``. .. ipython:: python :suppress: @@ -948,7 +935,7 @@ implies that the underlying data are not random. .. ipython:: python - from pandas.tools.plotting import lag_plot + from pandas.plotting import lag_plot plt.figure() @@ -974,7 +961,9 @@ If time series is random, such autocorrelations should be near zero for any and all time-lag separations. If time series is non-random then one or more of the autocorrelations will be significantly non-zero. The horizontal lines displayed in the plot correspond to 95% and 99% confidence bands. The dashed line is 99% -confidence band. +confidence band. See the +`Wikipedia entry `__ for more about +autocorrelation plots. .. ipython:: python :suppress: @@ -983,7 +972,7 @@ confidence band. .. ipython:: python - from pandas.tools.plotting import autocorrelation_plot + from pandas.plotting import autocorrelation_plot plt.figure() @@ -1016,7 +1005,7 @@ are what constitutes the bootstrap plot. .. ipython:: python - from pandas.tools.plotting import bootstrap_plot + from pandas.plotting import bootstrap_plot data = pd.Series(np.random.rand(1000)) @@ -1043,12 +1032,14 @@ unit interval). The point in the plane, where our sample settles to (where the forces acting on our sample are at an equilibrium) is where a dot representing our sample will be drawn. Depending on which class that sample belongs it will be colored differently. +See the R package `Radviz `__ +for more information. **Note**: The "Iris" dataset is available `here `__. .. ipython:: python - from pandas.tools.plotting import radviz + from pandas.plotting import radviz data = pd.read_csv('data/iris.data') @@ -1067,6 +1058,21 @@ be colored differently. Plot Formatting --------------- +Setting the plot style +~~~~~~~~~~~~~~~~~~~~~~ + +From version 1.5 and up, matplotlib offers a range of preconfigured plotting styles. Setting the +style can be used to easily give plots the general look that you want. +Setting the style is as easy as calling ``matplotlib.style.use(my_plot_style)`` before +creating your plot. For example you could write ``matplotlib.style.use('ggplot')`` for ggplot-style +plots. + +You can see the various available style names at ``matplotlib.style.available`` and it's very +easy to try them out. + +General plot style arguments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Most plotting methods have a set of keyword arguments that control the layout and formatting of the returned plot: @@ -1159,7 +1165,7 @@ To plot data on a secondary y-axis, use the ``secondary_y`` keyword: plt.close('all') -To plot some columns in a DataFrame, give the column names to the ``secondary_y`` +To plot some columns in a ``DataFrame``, give the column names to the ``secondary_y`` keyword: .. ipython:: python @@ -1199,7 +1205,7 @@ time-series data. For limited cases where pandas cannot infer the frequency information (e.g., in an externally created ``twinx``), you can choose to suppress this behavior for alignment purposes. -Here is the default behavior, notice how the x-axis tick labelling is performed: +Here is the default behavior, notice how the x-axis tick labeling is performed: .. ipython:: python @@ -1228,14 +1234,14 @@ Using the ``x_compat`` parameter, you can suppress this behavior: plt.close('all') If you have more than one plot that needs to be suppressed, the ``use`` method -in ``pandas.plot_params`` can be used in a `with statement`: +in ``pandas.plotting.plot_params`` can be used in a `with statement`: .. ipython:: python plt.figure() @savefig ser_plot_suppress_context.png - with pd.plot_params.use('x_compat', True): + with pd.plotting.plot_params.use('x_compat', True): df.A.plot(color='r') df.B.plot(color='g') df.C.plot(color='b') @@ -1245,10 +1251,22 @@ in ``pandas.plot_params`` can be used in a `with statement`: plt.close('all') +Automatic Date Tick Adjustment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +``TimedeltaIndex`` now uses the native matplotlib +tick locator methods, it is useful to call the automatic +date tick adjustment from matplotlib for figures whose ticklabels overlap. + +See the :meth:`autofmt_xdate ` method and the +`matplotlib documentation `__ for more. + Subplots ~~~~~~~~ -Each Series in a DataFrame can be plotted on a different axis +Each ``Series`` in a ``DataFrame`` can be plotted on a different axis with the ``subplots`` keyword: .. ipython:: python @@ -1264,13 +1282,13 @@ with the ``subplots`` keyword: Using Layout and Targeting Multiple Axes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The layout of subplots can be specified by ``layout`` keyword. It can accept +The layout of subplots can be specified by the ``layout`` keyword. It can accept ``(rows, columns)``. The ``layout`` keyword can be used in -``hist`` and ``boxplot`` also. If input is invalid, ``ValueError`` will be raised. +``hist`` and ``boxplot`` also. If the input is invalid, a ``ValueError`` will be raised. The number of axes which can be contained by rows x columns specified by ``layout`` must be larger than the number of required subplots. If layout can contain more axes than required, -blank axes are not drawn. Similar to a numpy array's ``reshape`` method, you +blank axes are not drawn. Similar to a NumPy array's ``reshape`` method, you can use ``-1`` for one dimension to automatically calculate the number of rows or columns needed, given the other. @@ -1284,7 +1302,7 @@ or columns needed, given the other. plt.close('all') -The above example is identical to using +The above example is identical to using: .. ipython:: python @@ -1298,11 +1316,11 @@ The above example is identical to using The required number of columns (3) is inferred from the number of series to plot and the given number of rows (2). -Also, you can pass multiple axes created beforehand as list-like via ``ax`` keyword. -This allows to use more complicated layout. +You can pass multiple axes created beforehand as list-like via ``ax`` keyword. +This allows more complicated layouts. The passed axes must be the same number as the subplots being drawn. -When multiple axes are passed via ``ax`` keyword, ``layout``, ``sharex`` and ``sharey`` keywords +When multiple axes are passed via the ``ax`` keyword, ``layout``, ``sharex`` and ``sharey`` keywords don't affect to the output. You should explicitly pass ``sharex=False`` and ``sharey=False``, otherwise you will see a warning. @@ -1359,15 +1377,13 @@ Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a Plotting With Error Bars ~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 +Plotting with error bars is supported in :meth:`DataFrame.plot` and :meth:`Series.plot`. -Plotting with error bars is now supported in the :meth:`DataFrame.plot` and :meth:`Series.plot` +Horizontal and vertical error bars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats: -Horizontal and vertical errorbars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats. - -- As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series` -- As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values -- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series` +- As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series`. +- As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values. +- As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series`. Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``M`` length :class:`Series`, a ``Mx2`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors should be in a ``Mx2xN`` array. @@ -1401,8 +1417,6 @@ Here is an example of one way to easily plot group means with standard deviation Plotting Tables ~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and :meth:`Series.plot` with a ``table`` keyword. The ``table`` keyword can accept ``bool``, :class:`DataFrame` or :class:`Series`. The simple way to draw a table is to specify ``table=True``. Data will be transposed to meet matplotlib's default layout. .. ipython:: python @@ -1424,7 +1438,10 @@ Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and : plt.close('all') -Also, you can pass different :class:`DataFrame` or :class:`Series` for ``table`` keyword. The data will be drawn as displayed in print method (not transposed automatically). If required, it should be transposed manually as below example. +Also, you can pass a different :class:`DataFrame` or :class:`Series` to the +``table`` keyword. The data will be drawn as displayed in print method +(not transposed automatically). If required, it should be transposed manually +as seen in the example below. .. ipython:: python @@ -1438,11 +1455,14 @@ Also, you can pass different :class:`DataFrame` or :class:`Series` for ``table`` plt.close('all') -Finally, there is a helper function ``pandas.tools.plotting.table`` to create a table from :class:`DataFrame` and :class:`Series`, and add it to an ``matplotlib.Axes``. This function can accept keywords which matplotlib table has. +There also exists a helper function ``pandas.plotting.table``, which creates a +table from :class:`DataFrame` or :class:`Series`, and adds it to an +``matplotlib.Axes`` instance. This function can accept keywords which the +matplotlib `table `__ has. .. ipython:: python - from pandas.tools.plotting import table + from pandas.plotting import table fig, ax = plt.subplots(1, 1) table(ax, np.round(df.describe(), 2), @@ -1465,18 +1485,18 @@ Colormaps A potential issue when plotting a large number of columns is that it can be difficult to distinguish some series due to repetition in the default colors. To -remedy this, DataFrame plotting supports the use of the ``colormap=`` argument, +remedy this, ``DataFrame`` plotting supports the use of the ``colormap`` argument, which accepts either a Matplotlib `colormap `__ or a string that is a name of a colormap registered with Matplotlib. A visualization of the default matplotlib colormaps is available `here -`__. +`__. As matplotlib does not directly support colormaps for line-based plots, the colors are selected based on an even spacing determined by the number of columns -in the DataFrame. There is no consideration made for background color, so some +in the ``DataFrame``. There is no consideration made for background color, so some colormaps will produce lines that are not easily visible. -To use the cubehelix colormap, we can simply pass ``'cubehelix'`` to ``colormap=`` +To use the cubehelix colormap, we can pass ``colormap='cubehelix'``. .. ipython:: python :suppress: @@ -1498,7 +1518,7 @@ To use the cubehelix colormap, we can simply pass ``'cubehelix'`` to ``colormap= plt.close('all') -or we can pass the colormap itself +Alternatively, we can pass the colormap itself: .. ipython:: python @@ -1569,9 +1589,9 @@ Plotting directly with matplotlib In some situations it may still be preferable or necessary to prepare plots directly with matplotlib, for instance when a certain type of plot or -customization is not (yet) supported by pandas. Series and DataFrame objects -behave like arrays and can therefore be passed directly to matplotlib functions -without explicit casts. +customization is not (yet) supported by pandas. ``Series`` and ``DataFrame`` +objects behave like arrays and can therefore be passed directly to +matplotlib functions without explicit casts. pandas also automatically registers formatters and locators that recognize date indices, thereby extending date and time support to practically all plot types @@ -1579,10 +1599,6 @@ available in matplotlib. Although this formatting does not provide the same level of refinement you would get when plotting via pandas, it can be faster when plotting a large number of points. -.. note:: - - The speed up for large data sets only applies to pandas 0.14.0 and later. - .. ipython:: python :suppress: diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index d6fb1c6a8f9cc..d61a98fe2dae4 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -18,6 +18,18 @@ What's New These are new features and improvements of note in each release. +.. include:: whatsnew/v0.23.0.txt + +.. include:: whatsnew/v0.22.0.txt + +.. include:: whatsnew/v0.21.1.txt + +.. include:: whatsnew/v0.21.0.txt + +.. include:: whatsnew/v0.20.3.txt + +.. include:: whatsnew/v0.20.2.txt + .. include:: whatsnew/v0.20.0.txt .. include:: whatsnew/v0.19.2.txt diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt index fed3ba3ce3a84..222a2da23865c 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.txt @@ -128,15 +128,45 @@ labeled the aggregated group with the end of the interval: the next day). ``notnull``. That they ever were was a relic of early pandas. This behavior can be re-enabled globally by the ``mode.use_inf_as_null`` option: -.. ipython:: python +.. code-block:: ipython - s = pd.Series([1.5, np.inf, 3.4, -np.inf]) - pd.isnull(s) - s.fillna(0) - pd.set_option('use_inf_as_null', True) - pd.isnull(s) - s.fillna(0) - pd.reset_option('use_inf_as_null') + In [6]: s = pd.Series([1.5, np.inf, 3.4, -np.inf]) + + In [7]: pd.isnull(s) + Out[7]: + 0 False + 1 False + 2 False + 3 False + Length: 4, dtype: bool + + In [8]: s.fillna(0) + Out[8]: + 0 1.500000 + 1 inf + 2 3.400000 + 3 -inf + Length: 4, dtype: float64 + + In [9]: pd.set_option('use_inf_as_null', True) + + In [10]: pd.isnull(s) + Out[10]: + 0 False + 1 True + 2 False + 3 True + Length: 4, dtype: bool + + In [11]: s.fillna(0) + Out[11]: + 0 1.5 + 1 0.0 + 2 3.4 + 3 0.0 + Length: 4, dtype: float64 + + In [12]: pd.reset_option('use_inf_as_null') - Methods with the ``inplace`` option now all return ``None`` instead of the calling object. E.g. code written like ``df = df.fillna(0, inplace=True)`` @@ -303,11 +333,10 @@ Updated PyTables Support store.append('wp',wp) # selecting via A QUERY - store.select('wp', - [ Term('major_axis>20000102'), Term('minor_axis', '=', ['A','B']) ]) + store.select('wp', "major_axis>20000102 and minor_axis=['A','B']") # removing data from tables - store.remove('wp', Term('major_axis>20000103')) + store.remove('wp', "major_axis>20000103") store.select('wp') # deleting a store @@ -340,7 +369,7 @@ Updated PyTables Support df1 df1.get_dtype_counts() -- performance improvments on table writing +- performance improvements on table writing - support for arbitrarily indexed dimensions - ``SparseSeries`` now has a ``density`` property (:issue:`2384`) - enable ``Series.str.strip/lstrip/rstrip`` methods to take an input argument @@ -382,15 +411,23 @@ N Dimensional Panels (Experimental) Adding experimental support for Panel4D and factory functions to create n-dimensional named panels. :ref:`Docs ` for NDim. Here is a taste of what to expect. - .. ipython:: python - :okwarning: +.. code-block:: ipython - p4d = Panel4D(randn(2, 2, 5, 4), - labels=['Label1','Label2'], - items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - p4d + In [58]: p4d = Panel4D(randn(2, 2, 5, 4), + ....: labels=['Label1','Label2'], + ....: items=['Item1', 'Item2'], + ....: major_axis=date_range('1/1/2000', periods=5), + ....: minor_axis=['A', 'B', 'C', 'D']) + ....: + + In [59]: p4d + Out[59]: + + Dimensions: 2 (labels) x 2 (items) x 5 (major_axis) x 4 (minor_axis) + Labels axis: Label1 to Label2 + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D diff --git a/doc/source/whatsnew/v0.10.1.txt b/doc/source/whatsnew/v0.10.1.txt index edc628fe85027..2d5843101dec2 100644 --- a/doc/source/whatsnew/v0.10.1.txt +++ b/doc/source/whatsnew/v0.10.1.txt @@ -58,7 +58,7 @@ perform queries on a table, by passing a list to ``data_columns`` # on-disk operations store.append('df', df, data_columns = ['B','C','string','string2']) - store.select('df',[ 'B > 0', 'string == foo' ]) + store.select('df', "B>0 and string=='foo'") # this is in-memory version of this type of selection df[(df.B > 0) & (df.string == 'foo')] @@ -110,7 +110,7 @@ columns, this is equivalent to passing a store.select('mi') # the levels are automatically included as data columns - store.select('mi', Term('foo=bar')) + store.select('mi', "foo='bar'") Multi-table creation via ``append_to_multiple`` and selection via ``select_as_multiple`` can create/select from multiple tables and return a @@ -153,7 +153,7 @@ combined result, by using ``where`` on a selector table. table - You can pass ``chunksize=an integer`` to ``append``, to change the writing - chunksize (default is 50000). This will signficantly lower your memory usage + chunksize (default is 50000). This will significantly lower your memory usage on writing. - You can pass ``expectedrows=an integer`` to the first ``append``, to set the diff --git a/doc/source/whatsnew/v0.11.0.txt b/doc/source/whatsnew/v0.11.0.txt index ea149595e681f..b90a597815ec5 100644 --- a/doc/source/whatsnew/v0.11.0.txt +++ b/doc/source/whatsnew/v0.11.0.txt @@ -88,7 +88,7 @@ Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passe Dtype Conversion ~~~~~~~~~~~~~~~~ -This is lower-common-denomicator upcasting, meaning you get the dtype which can accomodate all of the types +This is lower-common-denominator upcasting, meaning you get the dtype which can accommodate all of the types .. ipython:: python @@ -193,7 +193,7 @@ Furthermore ``datetime64[ns]`` columns are created by default, when passed datet df.loc[df.index[2:4], ['A','timestamp']] = np.nan df -Astype conversion on ``datetime64[ns]`` to ``object``, implicity converts ``NaT`` to ``np.nan`` +Astype conversion on ``datetime64[ns]`` to ``object``, implicitly converts ``NaT`` to ``np.nan`` .. ipython:: python diff --git a/doc/source/whatsnew/v0.12.0.txt b/doc/source/whatsnew/v0.12.0.txt index c4188898bdf71..ad33c49792d9f 100644 --- a/doc/source/whatsnew/v0.12.0.txt +++ b/doc/source/whatsnew/v0.12.0.txt @@ -38,7 +38,7 @@ API changes * ``to_clipboard`` - - Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return + - Fix modulo and integer division on Series,DataFrames to act similarly to ``float`` dtypes to return ``np.nan`` or ``np.inf`` as appropriate (:issue:`3590`). This correct a numpy bug that treats ``integer`` and ``float`` dtypes differently. @@ -154,7 +154,7 @@ API changes - The behavior of ``datetime64`` dtypes has changed with respect to certain so-called reduction operations (:issue:`3726`). The following operations now - raise a ``TypeError`` when perfomed on a ``Series`` and return an *empty* + raise a ``TypeError`` when performed on a ``Series`` and return an *empty* ``Series`` when performed on a ``DataFrame`` similar to performing these operations on, for example, a ``DataFrame`` of ``slice`` objects: @@ -206,11 +206,11 @@ I/O Enhancements :ref:`See the installation docs` - Added module for reading and writing Stata files: ``pandas.io.stata`` (:issue:`1512`) - accessable via ``read_stata`` top-level function for reading, + accessible via ``read_stata`` top-level function for reading, and ``to_stata`` DataFrame method for writing, :ref:`See the docs` - Added module for reading and writing json format files: ``pandas.io.json`` - accessable via ``read_json`` top-level function for reading, + accessible via ``read_json`` top-level function for reading, and ``to_json`` DataFrame method for writing, :ref:`See the docs` various issues (:issue:`1226`, :issue:`3804`, :issue:`3876`, :issue:`3867`, :issue:`1305`) @@ -220,7 +220,7 @@ I/O Enhancements list of the rows from which to read the index. - The option, ``tupleize_cols`` can now be specified in both ``to_csv`` and - ``read_csv``, to provide compatiblity for the pre 0.12 behavior of + ``read_csv``, to provide compatibility for the pre 0.12 behavior of writing and reading ``MultIndex`` columns via a list of tuples. The default in 0.12 is to write lists of tuples and *not* interpret list of tuples as a ``MultiIndex`` column. @@ -236,10 +236,10 @@ I/O Enhancements .. ipython:: python from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) - df.to_csv('mi.csv',tupleize_cols=False) + df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv('mi.csv') print(open('mi.csv').read()) - pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1],tupleize_cols=False) + pd.read_csv('mi.csv', header=[0,1,2,3], index_col=[0,1]) .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.13.0.txt b/doc/source/whatsnew/v0.13.0.txt index 118632cc2c0ee..f440be1ddd56e 100644 --- a/doc/source/whatsnew/v0.13.0.txt +++ b/doc/source/whatsnew/v0.13.0.txt @@ -357,11 +357,11 @@ HDFStore API Changes .. ipython:: python path = 'test.h5' - df = DataFrame(randn(10,2)) + df = pd.DataFrame(np.random.randn(10,2)) df.to_hdf(path,'df_table',format='table') df.to_hdf(path,'df_table2',append=True) df.to_hdf(path,'df_fixed') - with get_store(path) as store: + with pd.HDFStore(path) as store: print(store) .. ipython:: python @@ -790,7 +790,7 @@ Experimental .. ipython:: python for o in pd.read_msgpack('foo.msg',iterator=True): - print o + print(o) .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.13.1.txt b/doc/source/whatsnew/v0.13.1.txt index d5d54ba43b622..51ca6116d42ce 100644 --- a/doc/source/whatsnew/v0.13.1.txt +++ b/doc/source/whatsnew/v0.13.1.txt @@ -125,7 +125,7 @@ API changes df = DataFrame({'col':['foo', 0, np.nan]}) df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0]) df.equals(df2) - df.equals(df2.sort()) + df.equals(df2.sort_index()) import pandas.core.common as com com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan])) @@ -140,14 +140,21 @@ API changes applied would be called with an empty ``Series`` to guess whether a ``Series`` or ``DataFrame`` should be returned: - .. ipython:: python + .. code-block:: ipython + + In [32]: def applied_func(col): + ....: print("Apply function being called with: ", col) + ....: return col.sum() + ....: - def applied_func(col): - print("Apply function being called with: ", col) - return col.sum() + In [33]: empty = DataFrame(columns=['a', 'b']) - empty = DataFrame(columns=['a', 'b']) - empty.apply(applied_func) + In [34]: empty.apply(applied_func) + Apply function being called with: Series([], Length: 0, dtype: float64) + Out[34]: + a NaN + b NaN + Length: 2, dtype: float64 Now, when ``apply`` is called on an empty ``DataFrame``: if the ``reduce`` argument is ``True`` a ``Series`` will returned, if it is ``False`` a @@ -155,10 +162,22 @@ API changes function being applied will be called with an empty series to try and guess the return type. - .. ipython:: python + .. code-block:: ipython + + In [35]: empty.apply(applied_func, reduce=True) + Out[35]: + a NaN + b NaN + Length: 2, dtype: float64 + + In [36]: empty.apply(applied_func, reduce=False) + Out[36]: + Empty DataFrame + Columns: [a, b] + Index: [] + + [0 rows x 2 columns] - empty.apply(applied_func, reduce=True) - empty.apply(applied_func, reduce=False) Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.14.0.txt b/doc/source/whatsnew/v0.14.0.txt index f1feab4b909dc..be962ceb181ff 100644 --- a/doc/source/whatsnew/v0.14.0.txt +++ b/doc/source/whatsnew/v0.14.0.txt @@ -83,7 +83,7 @@ API changes been removed, instead a header with the column names is returned (:issue:`6062`). - ``Series`` and ``Index`` now internall share more common operations, e.g. ``factorize(),nunique(),value_counts()`` are now supported on ``Index`` types as well. The ``Series.weekday`` property from is removed - from Series for API consistency. Using a ``DatetimeIndex/PeriodIndex`` method on a Series will now raise a ``TypeError``. + from Series for API consistency. Using a ``DatetimeIndex/PeriodIndex`` method on a Series will now raise a ``TypeError``. (:issue:`4551`, :issue:`4056`, :issue:`5519`, :issue:`6380`, :issue:`7206`). - Add ``is_month_start``, ``is_month_end``, ``is_quarter_start``, ``is_quarter_end``, ``is_year_start``, ``is_year_end`` accessors for ``DateTimeIndex`` / ``Timestamp`` which return a boolean array of whether the timestamp(s) are at the start/end of the month/quarter/year defined by the frequency of the ``DateTimeIndex`` / ``Timestamp`` (:issue:`4565`, :issue:`6998`) @@ -284,7 +284,7 @@ Display Changes `large_repr` set to 'info' (:issue:`7105`) - The `verbose` keyword in ``DataFrame.info()``, which controls whether to shorten the ``info`` representation, is now ``None`` by default. This will follow the global setting in - ``display.max_info_columns``. The global setting can be overriden with ``verbose=True`` or + ``display.max_info_columns``. The global setting can be overridden with ``verbose=True`` or ``verbose=False``. - Fixed a bug with the `info` repr not honoring the `display.max_info_columns` setting (:issue:`6939`) - Offset/freq info now in Timestamp __repr__ (:issue:`4553`) @@ -446,7 +446,7 @@ Some other enhancements to the sql functions include: - support for writing the index. This can be controlled with the ``index`` keyword (default is True). - specify the column label to use when writing the index with ``index_label``. -- specify string columns to parse as datetimes withh the ``parse_dates`` +- specify string columns to parse as datetimes with the ``parse_dates`` keyword in :func:`~pandas.read_sql_query` and :func:`~pandas.read_sql_table`. .. warning:: @@ -596,15 +596,15 @@ Plotting - `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`) - `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) - Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coodinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates. + Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coordinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates. - The :func:`parallel_coordinates` function now takes argument ``color`` - instead of ``colors``. A ``FutureWarning`` is raised to alert that + instead of ``colors``. A ``FutureWarning`` is raised to alert that the old ``colors`` argument will not be supported in a future release. (:issue:`6956`) - The :func:`parallel_coordinates` and :func:`andrews_curves` functions now take positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is - raised if the old ``data`` argument is used by name. (:issue:`6956`) + raised if the old ``data`` argument is used by name. (:issue:`6956`) - :meth:`DataFrame.boxplot` now supports ``layout`` keyword (:issue:`6769`) - :meth:`DataFrame.boxplot` has a new keyword argument, `return_type`. It accepts ``'dict'``, @@ -645,17 +645,17 @@ Deprecations - The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A - ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments + ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments will not be supported in a future release (:issue:`5505`) - The :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.duplicated` methods now take argument ``subset`` instead of ``cols`` to better align with - :meth:`DataFrame.dropna`. A ``FutureWarning`` is raised to alert that the old + :meth:`DataFrame.dropna`. A ``FutureWarning`` is raised to alert that the old ``cols`` arguments will not be supported in a future release (:issue:`6680`) - The :meth:`DataFrame.to_csv` and :meth:`DataFrame.to_excel` functions now takes argument ``columns`` instead of ``cols``. A - ``FutureWarning`` is raised to alert that the old ``cols`` arguments + ``FutureWarning`` is raised to alert that the old ``cols`` arguments will not be supported in a future release (:issue:`6645`) - Indexers will warn ``FutureWarning`` when used with a scalar indexer and @@ -698,12 +698,12 @@ Deprecations ALWAYS return a view. (:issue:`6894`) - The :func:`parallel_coordinates` function now takes argument ``color`` - instead of ``colors``. A ``FutureWarning`` is raised to alert that + instead of ``colors``. A ``FutureWarning`` is raised to alert that the old ``colors`` argument will not be supported in a future release. (:issue:`6956`) - The :func:`parallel_coordinates` and :func:`andrews_curves` functions now take positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is - raised if the old ``data`` argument is used by name. (:issue:`6956`) + raised if the old ``data`` argument is used by name. (:issue:`6956`) - The support for the 'mysql' flavor when using DBAPI connection objects has been deprecated. MySQL will be further supported with SQLAlchemy engines (:issue:`6900`). @@ -899,7 +899,7 @@ Bug Fixes - Raise when trying to align on different levels of a multi-index assignment (:issue:`3738`) - Bug in setting complex dtypes via boolean indexing (:issue:`6345`) - Bug in TimeGrouper/resample when presented with a non-monotonic DatetimeIndex that would return invalid results. (:issue:`4161`) -- Bug in index name propogation in TimeGrouper/resample (:issue:`4161`) +- Bug in index name propagation in TimeGrouper/resample (:issue:`4161`) - TimeGrouper has a more compatible API to the rest of the groupers (e.g. ``groups`` was missing) (:issue:`3881`) - Bug in multiple grouping with a TimeGrouper depending on target column order (:issue:`6764`) - Bug in ``pd.eval`` when parsing strings with possible tokens like ``'&'`` @@ -976,7 +976,7 @@ Bug Fixes clean`` (:issue:`6768`) - Bug with numpy < 1.7.2 when reading long strings from ``HDFStore`` (:issue:`6166`) - Bug in ``DataFrame._reduce`` where non bool-like (0/1) integers were being - coverted into bools. (:issue:`6806`) + converted into bools. (:issue:`6806`) - Regression from 0.13 with ``fillna`` and a Series on datetime-like (:issue:`6344`) - Bug in adding ``np.timedelta64`` to ``DatetimeIndex`` with timezone outputs incorrect results (:issue:`6818`) - Bug in ``DataFrame.replace()`` where changing a dtype through replacement diff --git a/doc/source/whatsnew/v0.14.1.txt b/doc/source/whatsnew/v0.14.1.txt index 239d6c9c6e0d4..4674cbc846722 100644 --- a/doc/source/whatsnew/v0.14.1.txt +++ b/doc/source/whatsnew/v0.14.1.txt @@ -75,7 +75,7 @@ API changes Note that for the other offsets the default behaviour did not change. -- Add back ``#N/A N/A`` as a default NA value in text parsing, (regresion from 0.12) (:issue:`5521`) +- Add back ``#N/A N/A`` as a default NA value in text parsing, (regression from 0.12) (:issue:`5521`) - Raise a ``TypeError`` on inplace-setting with a ``.where`` and a non ``np.nan`` value as this is inconsistent with a set-item expression like ``df[mask] = None`` (:issue:`7656`) @@ -88,7 +88,7 @@ Enhancements - Add ``dropna`` argument to ``value_counts`` and ``nunique`` (:issue:`5569`). - Add :meth:`~pandas.DataFrame.select_dtypes` method to allow selection of columns based on dtype (:issue:`7316`). See :ref:`the docs `. -- All ``offsets`` suppports the ``normalize`` keyword to specify whether +- All ``offsets`` supports the ``normalize`` keyword to specify whether ``offsets.apply``, ``rollforward`` and ``rollback`` resets the time (hour, minute, etc) or not (default ``False``, preserves time) (:issue:`7156`): @@ -145,7 +145,7 @@ Performance ~~~~~~~~~~~ - Improvements in dtype inference for numeric operations involving yielding performance gains for dtypes: ``int64``, ``timedelta64``, ``datetime64`` (:issue:`7223`) - Improvements in Series.transform for significant performance gains (:issue:`6496`) -- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for signifcant performance gains (:issue:`7383`) +- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for significant performance gains (:issue:`7383`) - Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`) - Improvements in `MultiIndex.from_product` for large iterables (:issue:`7627`) diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index aff8ec9092cdc..c5ef6c8c9d74a 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -22,7 +22,7 @@ users upgrade to this version. - ``read_csv`` will now by default ignore blank lines when parsing, see :ref:`here ` - API change in using Indexes in set operations, see :ref:`here ` - Enhancements in the handling of timezones, see :ref:`here ` - - A lot of improvements to the rolling and expanding moment funtions, see :ref:`here ` + - A lot of improvements to the rolling and expanding moment functions, see :ref:`here ` - Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring ` - dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`) - Split indexing documentation into :ref:`Indexing and Selecting Data ` and :ref:`MultiIndex / Advanced Indexing ` @@ -80,7 +80,7 @@ For full docs, see the :ref:`categorical introduction ` and the # Reorder the categories and simultaneously add the missing categories df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"]) df["grade"] - df.sort("grade") + df.sort_values("grade") df.groupby("grade").size() - ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct @@ -326,7 +326,7 @@ Timezone handling improvements - ``Timestamp.tz_localize`` and ``Timestamp.tz_convert`` now raise ``TypeError`` in error cases, rather than ``Exception`` (:issue:`8025`) -- a timeseries/index localized to UTC when inserted into a Series/DataFrame will preserve the UTC timezone (rather than being a naive ``datetime64[ns]``) as ``object`` dtype (:issue:`8411`) +- a timeseries/index localized to UTC when inserted into a Series/DataFrame will preserve the UTC timezone (rather than being a naive ``datetime64[ns]``) as ``object`` dtype (:issue:`8411`) - ``Timestamp.__repr__`` displays ``dateutil.tz.tzoffset`` info (:issue:`7907`) @@ -676,10 +676,19 @@ Other notable API changes: Both will now return a frame reindex by [1,3]. E.g. - .. ipython:: python + .. code-block:: ipython - df.loc[[1,3]] - df.loc[[1,3],:] + In [3]: df.loc[[1,3]] + Out[3]: + 0 + 1 a + 3 NaN + + In [4]: df.loc[[1,3],:] + Out[4]: + 0 + 1 a + 3 NaN This can also be seen in multi-axis indexing with a ``Panel``. @@ -693,9 +702,14 @@ Other notable API changes: The following would raise ``KeyError`` prior to 0.15.0: - .. ipython:: python + .. code-block:: ipython - p.loc[['ItemA','ItemD'],:,'D'] + In [5]: + Out[5]: + ItemA ItemD + 1 3 NaN + 2 7 NaN + 3 11 NaN Furthermore, ``.loc`` will raise If no values are found in a multi-index with a list-like indexer: @@ -823,7 +837,7 @@ Other notable API changes: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead - See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy + See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy - ``merge``, ``DataFrame.merge``, and ``ordered_merge`` now return the same type as the ``left`` argument (:issue:`7737`). @@ -864,7 +878,7 @@ a transparent change with only very limited API implications (:issue:`5080`, :is - you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs ` - when plotting with a ``PeriodIndex``, the matplotlib internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex`` (this is similar to how a ``DatetimeIndex`` passes arrays of ``datetimes`` now) -- MultiIndexes will now raise similary to other pandas objects w.r.t. truth testing, see :ref:`here ` (:issue:`7897`). +- MultiIndexes will now raise similarly to other pandas objects w.r.t. truth testing, see :ref:`here ` (:issue:`7897`). - When plotting a DatetimeIndex directly with matplotlib's `plot` function, the axis labels will no longer be formatted as dates but as integers (the internal representation of a ``datetime64``). **UPDATE** This is fixed @@ -1030,7 +1044,7 @@ Other: idx = MultiIndex.from_product([['a'], range(3), list("pqr")], names=['foo', 'bar', 'baz']) idx.set_names('qux', level=0) - idx.set_names(['qux','baz'], level=[0,1]) + idx.set_names(['qux','corge'], level=[0,1]) idx.set_levels(['a','b','c'], level='bar') idx.set_levels([['a','b','c'],[1,2,3]], level=[1,2]) @@ -1104,7 +1118,7 @@ Bug Fixes - Bug in multi-index slicing with various edge cases (:issue:`8132`) - Regression in multi-index indexing with a non-scalar type object (:issue:`7914`) - Bug in ``Timestamp`` comparisons with ``==`` and ``int64`` dtype (:issue:`8058`) -- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`) +- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is referred internally (:issue:`7748`) - Bug in ``Panel`` when using ``major_xs`` and ``copy=False`` is passed (deprecation warning fails because of missing ``warnings``) (:issue:`8152`). - Bug in pickle deserialization that failed for pre-0.14.1 containers with dup items trying to avoid ambiguity when matching block and manager items, when there's only one block there's no ambiguity (:issue:`7794`) diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index cd9298c74539a..f84f25d3e906c 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -274,7 +274,7 @@ Enhancements Bug Fixes ~~~~~~~~~ -- Bug in unpickling of a ``CustomBusinessDay`` object (:issue:`8591`) +- Bug in unpickling of a ``CustomBusinessDay`` object (:issue:`8591`) - Bug in coercing ``Categorical`` to a records array, e.g. ``df.to_records()`` (:issue:`8626`) - Bug in ``Categorical`` not created properly with ``Series.to_frame()`` (:issue:`8626`) - Bug in coercing in astype of a ``Categorical`` of a passed ``pd.Categorical`` (this now raises ``TypeError`` correctly), (:issue:`8626`) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index feba3d6224e65..f1dfab0f57ed3 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -163,7 +163,7 @@ Other enhancements: p.all() - Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`). -- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here`__. +- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here `__. - ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`). - ``Timedelta`` now supports arithemtic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`). - Added ``Timedelta.to_timedelta64()`` method to the public API (:issue:`8884`). @@ -215,7 +215,7 @@ Bug Fixes - ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo and when it receives no data from Yahoo (:issue:`8761`), (:issue:`8783`). - Fix: The font size was only set on x axis if vertical or the y axis if horizontal. (:issue:`8765`) - Fixed division by 0 when reading big csv files in python 3 (:issue:`8621`) -- Bug in outputing a Multindex with ``to_html,index=False`` which would add an extra column (:issue:`8452`) +- Bug in outputting a Multindex with ``to_html,index=False`` which would add an extra column (:issue:`8452`) - Imported categorical variables from Stata files retain the ordinal information in the underlying data (:issue:`8836`). - Defined ``.size`` attribute across ``NDFrame`` objects to provide compat with numpy >= 1.9.1; buggy with ``np.array_split`` (:issue:`8846`) - Skip testing of histogram plots for matplotlib <= 1.2 (:issue:`8648`). diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 4d43660960597..48af06d124f2e 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -56,7 +56,7 @@ and the entire DataFrame (with all original and new columns) is returned. iris.assign(sepal_ratio=iris['SepalWidth'] / iris['SepalLength']).head() Above was an example of inserting a precomputed value. We can also pass in -a function to be evalutated. +a function to be evaluated. .. ipython :: python @@ -380,12 +380,29 @@ New Behavior For ease of creation of series of categorical data, we have added the ability to pass keywords when calling ``.astype()``. These are passed directly to the constructor. -.. ipython:: python +.. code-block:: python - s = Series(["a","b","c","a"]).astype('category',ordered=True) - s - s = Series(["a","b","c","a"]).astype('category',categories=list('abcdef'),ordered=False) - s + In [54]: s = Series(["a","b","c","a"]).astype('category',ordered=True) + + In [55]: s + Out[55]: + 0 a + 1 b + 2 c + 3 a + dtype: category + Categories (3, object): [a < b < c] + + In [56]: s = Series(["a","b","c","a"]).astype('category',categories=list('abcdef'),ordered=False) + + In [57]: s + Out[57]: + 0 a + 1 b + 2 c + 3 a + dtype: category + Categories (6, object): [a, b, c, d, e, f] .. _whatsnew_0160.api_breaking.other: @@ -578,7 +595,7 @@ Bug Fixes - Bug in ``unstack`` with ``TimedeltaIndex`` or ``DatetimeIndex`` and nulls (:issue:`9491`). - Bug in ``rank`` where comparing floats with tolerance will cause inconsistent behaviour (:issue:`8365`). - Fixed character encoding bug in ``read_stata`` and ``StataReader`` when loading data from a URL (:issue:`9231`). -- Bug in adding ``offsets.Nano`` to other offets raises ``TypeError`` (:issue:`9284`) +- Bug in adding ``offsets.Nano`` to other offsets raises ``TypeError`` (:issue:`9284`) - Bug in ``DatetimeIndex`` iteration, related to (:issue:`8890`), fixed in (:issue:`9100`) - Bugs in ``resample`` around DST transitions. This required fixing offset classes so they behave correctly on DST transitions. (:issue:`5172`, :issue:`8744`, :issue:`8653`, :issue:`9173`, :issue:`9468`). - Bug in binary operator method (eg ``.mul()``) alignment with integer levels (:issue:`9463`). @@ -594,7 +611,7 @@ Bug Fixes - Accessing ``Series.str`` methods on with non-string values now raises ``TypeError`` instead of producing incorrect results (:issue:`9184`) - Bug in ``DatetimeIndex.__contains__`` when index has duplicates and is not monotonic increasing (:issue:`9512`) - Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`) -- Fixed issue in the ``xlsxwriter`` engine where it added a default 'General' format to cells if no other format wass applied. This prevented other row or column formatting being applied. (:issue:`9167`) +- Fixed issue in the ``xlsxwriter`` engine where it added a default 'General' format to cells if no other format was applied. This prevented other row or column formatting being applied. (:issue:`9167`) - Fixes issue with ``index_col=False`` when ``usecols`` is also specified in ``read_csv``. (:issue:`9082`) - Bug where ``wide_to_long`` would modify the input stubnames list (:issue:`9204`) - Bug in ``to_sql`` not storing float64 values using double precision. (:issue:`9009`) diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 1a3b8319aeb59..9e1dc391d7ace 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -41,48 +41,94 @@ indexing with duplicates. This is a container around a ``Categorical`` (introduc and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1, setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``. -.. ipython :: python +.. code-block:: ipython + + In [1]: df = DataFrame({'A' : np.arange(6), + ...: 'B' : Series(list('aabbca')).astype('category', + ...: categories=list('cab')) + ...: }) + ...: + + In [2]: df + Out[2]: + A B + 0 0 a + 1 1 a + 2 2 b + 3 3 b + 4 4 c + 5 5 a + + In [3]: df.dtypes + Out[3]: + A int64 + B category + dtype: object + + In [4]: df.B.cat.categories + Out[4]: Index(['c', 'a', 'b'], dtype='object') - df = DataFrame({'A' : np.arange(6), - 'B' : Series(list('aabbca')).astype('category', - categories=list('cab')) - }) - df - df.dtypes - df.B.cat.categories setting the index, will create create a ``CategoricalIndex`` -.. ipython :: python +.. code-block:: ipython + + In [5]: df2 = df.set_index('B') - df2 = df.set_index('B') - df2.index + In [6]: df2.index + Out[6]: CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') indexing with ``__getitem__/.iloc/.loc/.ix`` works similarly to an Index with duplicates. The indexers MUST be in the category or the operation will raise. -.. ipython :: python +.. code-block:: ipython - df2.loc['a'] + In [7]: df2.loc['a'] + Out[7]: + A + B + a 0 + a 1 + a 5 and preserves the ``CategoricalIndex`` -.. ipython :: python +.. code-block:: ipython + + In [8]: df2.loc['a'].index + Out[8]: CategoricalIndex(['a', 'a', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') - df2.loc['a'].index sorting will order by the order of the categories -.. ipython :: python +.. code-block:: ipython - df2.sort_index() + In [9]: df2.sort_index() + Out[9]: + A + B + c 4 + a 0 + a 1 + a 5 + b 2 + b 3 groupby operations on the index will preserve the index nature as well -.. ipython :: python +.. code-block:: ipython + + In [10]: df2.groupby(level=0).sum() + Out[10]: + A + B + c 4 + a 6 + b 5 + + In [11]: df2.groupby(level=0).sum().index + Out[11]: CategoricalIndex(['c', 'a', 'b'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') - df2.groupby(level=0).sum() - df2.groupby(level=0).sum().index reindexing operations, will return a resulting index based on the type of the passed indexer, meaning that passing a list will return a plain-old-``Index``; indexing with @@ -90,12 +136,31 @@ a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the c of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with values NOT in the categories, similarly to how you can reindex ANY pandas index. -.. ipython :: python +.. code-block:: ipython - df2.reindex(['a','e']) - df2.reindex(['a','e']).index - df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))) - df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index + In [12]: df2.reindex(['a','e']) + Out[12]: + A + B + a 0.0 + a 1.0 + a 5.0 + e NaN + + In [13]: df2.reindex(['a','e']).index + Out[13]: Index(['a', 'a', 'a', 'e'], dtype='object', name='B') + + In [14]: df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))) + Out[14]: + A + B + a 0.0 + a 1.0 + a 5.0 + e NaN + + In [15]: df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index + Out[15]: CategoricalIndex(['a', 'a', 'a', 'e'], categories=['a', 'b', 'c', 'd', 'e'], ordered=False, name='B', dtype='category') See the :ref:`documentation ` for more. (:issue:`7629`, :issue:`10038`, :issue:`10039`) @@ -248,7 +313,7 @@ Other Enhancements - Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`) - Add ``normalize`` as a ``dt`` accessor method. (:issue:`10047`) -- ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here ` +- ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here ` - ``pd.lib.infer_dtype`` now returns ``'bytes'`` in Python 3 where appropriate. (:issue:`10032`) diff --git a/doc/source/whatsnew/v0.16.2.txt b/doc/source/whatsnew/v0.16.2.txt index bfe44290e49d2..91ec0c3038985 100644 --- a/doc/source/whatsnew/v0.16.2.txt +++ b/doc/source/whatsnew/v0.16.2.txt @@ -63,10 +63,10 @@ of ``(function, keyword)`` indicating where the DataFrame should flow. For examp bb = pd.read_csv('data/baseball.csv', index_col='id') - # sm.poisson takes (formula, data) + # sm.ols takes (formula, data) (bb.query('h > 0') .assign(ln_h = lambda df: np.log(df.h)) - .pipe((sm.poisson, 'data'), 'hr ~ ln_h + year + g + C(lg)') + .pipe((sm.ols, 'data'), 'hr ~ ln_h + year + g + C(lg)') .fit() .summary() ) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 9cb299593076d..239b2ba96404c 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -329,7 +329,7 @@ has been changed to make this keyword unnecessary - the change is shown below. Google BigQuery Enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Added ability to automatically create a table/dataset using the :func:`pandas.io.gbq.to_gbq` function if the destination table/dataset does not exist. (:issue:`8325`, :issue:`11121`). -- Added ability to replace an existing table and schema when calling the :func:`pandas.io.gbq.to_gbq` function via the ``if_exists`` argument. See the :ref:`docs ` for more details (:issue:`8325`). +- Added ability to replace an existing table and schema when calling the :func:`pandas.io.gbq.to_gbq` function via the ``if_exists`` argument. See the `docs `__ for more details (:issue:`8325`). - ``InvalidColumnOrder`` and ``InvalidPageToken`` in the gbq module will raise ``ValueError`` instead of ``IOError``. - The ``generate_bq_schema()`` function is now deprecated and will be removed in a future version (:issue:`11121`) - The gbq module will now support Python 3 (:issue:`11094`). @@ -1157,7 +1157,7 @@ Bug Fixes - Bug in ``.var()`` causing roundoff errors for highly similar values (:issue:`10242`) - Bug in ``DataFrame.plot(subplots=True)`` with duplicated columns outputs incorrect result (:issue:`10962`) - Bug in ``Index`` arithmetic may result in incorrect class (:issue:`10638`) -- Bug in ``date_range`` results in empty if freq is negative annualy, quarterly and monthly (:issue:`11018`) +- Bug in ``date_range`` results in empty if freq is negative annually, quarterly and monthly (:issue:`11018`) - Bug in ``DatetimeIndex`` cannot infer negative freq (:issue:`11018`) - Remove use of some deprecated numpy comparison operations, mainly in tests. (:issue:`10569`) - Bug in ``Index`` dtype may not applied properly (:issue:`11017`) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 17496c84b7181..6e5e113e859d7 100644 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -5,7 +5,7 @@ v0.17.1 (November 21, 2015) .. note:: - We are proud to announce that *pandas* has become a sponsored project of the (`NUMFocus organization`_). This will help ensure the success of development of *pandas* as a world-class open-source project. + We are proud to announce that *pandas* has become a sponsored project of the (`NumFOCUS organization`_). This will help ensure the success of development of *pandas* as a world-class open-source project. .. _numfocus organization: http://www.numfocus.org/blog/numfocus-announces-new-fiscally-sponsored-project-pandas @@ -58,7 +58,7 @@ We can render the HTML to get the following table. :file: whatsnew_0171_html_table.html :class:`~pandas.core.style.Styler` interacts nicely with the Jupyter Notebook. -See the :ref:`documentation - -
- {% if caption %} - - {% endif %} - - - {% for r in head %} - - {% for c in r %} - {% if c.is_visible != False %} - <{{c.type}} class="{{c.class}}" {{ c.attributes|join(" ") }}> - {{c.value}} - {% endif %} - {% endfor %} - - {% endfor %} - - - {% for r in body %} - - {% for c in r %} - {% if c.is_visible != False %} - <{{c.type}} id="T_{{uuid}}{{c.id}}" - class="{{c.class}}" {{ c.attributes|join(" ") }}> - {{ c.display_value }} - {% endif %} - {% endfor %} - - {% endfor %} - -
{{caption}}
- """) - - def __init__(self, data, precision=None, table_styles=None, uuid=None, - caption=None, table_attributes=None): - self.ctx = defaultdict(list) - self._todo = [] - - if not isinstance(data, (pd.Series, pd.DataFrame)): - raise TypeError("``data`` must be a Series or DataFrame") - if data.ndim == 1: - data = data.to_frame() - if not data.index.is_unique or not data.columns.is_unique: - raise ValueError("style is not supported for non-unique indicies.") - - self.data = data - self.index = data.index - self.columns = data.columns - - self.uuid = uuid - self.table_styles = table_styles - self.caption = caption - if precision is None: - precision = get_option('display.precision') - self.precision = precision - self.table_attributes = table_attributes - # display_funcs maps (row, col) -> formatting function - - def default_display_func(x): - if is_float(x): - return '{:>.{precision}g}'.format(x, precision=self.precision) - else: - return x - - self._display_funcs = defaultdict(lambda: default_display_func) - - def _repr_html_(self): - """Hooks into Jupyter notebook rich display system.""" - return self.render() - - def _translate(self): - """ - Convert the DataFrame in `self.data` and the attrs from `_build_styles` - into a dictionary of {head, body, uuid, cellstyle} - """ - table_styles = self.table_styles or [] - caption = self.caption - ctx = self.ctx - precision = self.precision - uuid = self.uuid or str(uuid1()).replace("-", "_") - ROW_HEADING_CLASS = "row_heading" - COL_HEADING_CLASS = "col_heading" - INDEX_NAME_CLASS = "index_name" - - DATA_CLASS = "data" - BLANK_CLASS = "blank" - BLANK_VALUE = "" - - def format_attr(pair): - return "{key}={value}".format(**pair) - - # for sparsifying a MultiIndex - idx_lengths = _get_level_lengths(self.index) - col_lengths = _get_level_lengths(self.columns) - - cell_context = dict() - - n_rlvls = self.data.index.nlevels - n_clvls = self.data.columns.nlevels - rlabels = self.data.index.tolist() - clabels = self.data.columns.tolist() - - if n_rlvls == 1: - rlabels = [[x] for x in rlabels] - if n_clvls == 1: - clabels = [[x] for x in clabels] - clabels = list(zip(*clabels)) - - cellstyle = [] - head = [] - - for r in range(n_clvls): - # Blank for Index columns... - row_es = [{"type": "th", - "value": BLANK_VALUE, - "display_value": BLANK_VALUE, - "is_visible": True, - "class": " ".join([BLANK_CLASS])}] * (n_rlvls - 1) - - # ... except maybe the last for columns.names - name = self.data.columns.names[r] - cs = [BLANK_CLASS if name is None else INDEX_NAME_CLASS, - "level%s" % r] - name = BLANK_VALUE if name is None else name - row_es.append({"type": "th", - "value": name, - "display_value": name, - "class": " ".join(cs), - "is_visible": True}) - - for c in range(len(clabels[0])): - cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] - cs.extend(cell_context.get( - "col_headings", {}).get(r, {}).get(c, [])) - value = clabels[r][c] - row_es.append({"type": "th", - "value": value, - "display_value": value, - "class": " ".join(cs), - "is_visible": _is_visible(c, r, col_lengths), - "attributes": [ - format_attr({"key": "colspan", - "value": col_lengths.get( - (r, c), 1)}) - ]}) - head.append(row_es) - - if self.data.index.names and not all(x is None - for x in self.data.index.names): - index_header_row = [] - - for c, name in enumerate(self.data.index.names): - cs = [INDEX_NAME_CLASS, - "level%s" % c] - name = '' if name is None else name - index_header_row.append({"type": "th", "value": name, - "class": " ".join(cs)}) - - index_header_row.extend( - [{"type": "th", - "value": BLANK_VALUE, - "class": " ".join([BLANK_CLASS]) - }] * len(clabels[0])) - - head.append(index_header_row) - - body = [] - for r, idx in enumerate(self.data.index): - # cs.extend( - # cell_context.get("row_headings", {}).get(r, {}).get(c, [])) - row_es = [{"type": "th", - "is_visible": _is_visible(r, c, idx_lengths), - "attributes": [ - format_attr({"key": "rowspan", - "value": idx_lengths.get((c, r), 1)}) - ], - "value": rlabels[r][c], - "class": " ".join([ROW_HEADING_CLASS, "level%s" % c, - "row%s" % r]), - "display_value": rlabels[r][c]} - for c in range(len(rlabels[r]))] - - for c, col in enumerate(self.data.columns): - cs = [DATA_CLASS, "row%s" % r, "col%s" % c] - cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) - formatter = self._display_funcs[(r, c)] - value = self.data.iloc[r, c] - row_es.append({ - "type": "td", - "value": value, - "class": " ".join(cs), - "id": "_".join(cs[1:]), - "display_value": formatter(value) - }) - props = [] - for x in ctx[r, c]: - # have to handle empty styles like [''] - if x.count(":"): - props.append(x.split(":")) - else: - props.append(['', '']) - cellstyle.append({'props': props, - 'selector': "row%s_col%s" % (r, c)}) - body.append(row_es) - - return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, - precision=precision, table_styles=table_styles, - caption=caption, table_attributes=self.table_attributes) - - def format(self, formatter, subset=None): - """ - Format the text display value of cells. - - .. versionadded:: 0.18.0 - - Parameters - ---------- - formatter: str, callable, or dict - subset: IndexSlice - An argument to ``DataFrame.loc`` that restricts which elements - ``formatter`` is applied to. - - Returns - ------- - self : Styler - - Notes - ----- - - ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where - ``a`` is one of - - - str: this will be wrapped in: ``a.format(x)`` - - callable: called with the value of an individual cell - - The default display value for numeric values is the "general" (``g``) - format with ``pd.options.display.precision`` precision. - - Examples - -------- - - >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) - >>> df.style.format("{:.2%}") - >>> df['c'] = ['a', 'b', 'c', 'd'] - >>> df.style.format({'C': str.upper}) - """ - if subset is None: - row_locs = range(len(self.data)) - col_locs = range(len(self.data.columns)) - else: - subset = _non_reducing_slice(subset) - if len(subset) == 1: - subset = subset, self.data.columns - - sub_df = self.data.loc[subset] - row_locs = self.data.index.get_indexer_for(sub_df.index) - col_locs = self.data.columns.get_indexer_for(sub_df.columns) - - if isinstance(formatter, MutableMapping): - for col, col_formatter in formatter.items(): - # formatter must be callable, so '{}' are converted to lambdas - col_formatter = _maybe_wrap_formatter(col_formatter) - col_num = self.data.columns.get_indexer_for([col])[0] - - for row_num in row_locs: - self._display_funcs[(row_num, col_num)] = col_formatter - else: - # single scalar to format all cells with - locs = product(*(row_locs, col_locs)) - for i, j in locs: - formatter = _maybe_wrap_formatter(formatter) - self._display_funcs[(i, j)] = formatter - return self - - def render(self): - """ - Render the built up styles to HTML - - .. versionadded:: 0.17.1 - - Returns - ------- - rendered: str - the rendered HTML - - Notes - ----- - ``Styler`` objects have defined the ``_repr_html_`` method - which automatically calls ``self.render()`` when it's the - last item in a Notebook cell. When calling ``Styler.render()`` - directly, wrap the result in ``IPython.display.HTML`` to view - the rendered HTML in the notebook. - """ - self._compute() - d = self._translate() - # filter out empty styles, every cell will have a class - # but the list of props may just be [['', '']]. - # so we have the neested anys below - trimmed = [x for x in d['cellstyle'] - if any(any(y) for y in x['props'])] - d['cellstyle'] = trimmed - return self.template.render(**d) - - def _update_ctx(self, attrs): - """ - update the state of the Styler. Collects a mapping - of {index_label: [': ']} - - attrs: Series or DataFrame - should contain strings of ': ;: ' - Whitespace shouldn't matter and the final trailing ';' shouldn't - matter. - """ - for row_label, v in attrs.iterrows(): - for col_label, col in v.iteritems(): - i = self.index.get_indexer([row_label])[0] - j = self.columns.get_indexer([col_label])[0] - for pair in col.rstrip(";").split(";"): - self.ctx[(i, j)].append(pair) - - def _copy(self, deepcopy=False): - styler = Styler(self.data, precision=self.precision, - caption=self.caption, uuid=self.uuid, - table_styles=self.table_styles) - if deepcopy: - styler.ctx = copy.deepcopy(self.ctx) - styler._todo = copy.deepcopy(self._todo) - else: - styler.ctx = self.ctx - styler._todo = self._todo - return styler - - def __copy__(self): - """ - Deep copy by default. - """ - return self._copy(deepcopy=False) - - def __deepcopy__(self, memo): - return self._copy(deepcopy=True) - - def clear(self): - """"Reset" the styler, removing any previously applied styles. - Returns None. - """ - self.ctx.clear() - self._todo = [] - - def _compute(self): - """ - Execute the style functions built up in `self._todo`. - - Relies on the conventions that all style functions go through - .apply or .applymap. The append styles to apply as tuples of - - (application method, *args, **kwargs) - """ - r = self - for func, args, kwargs in self._todo: - r = func(self)(*args, **kwargs) - return r - - def _apply(self, func, axis=0, subset=None, **kwargs): - subset = slice(None) if subset is None else subset - subset = _non_reducing_slice(subset) - data = self.data.loc[subset] - if axis is not None: - result = data.apply(func, axis=axis, **kwargs) - else: - result = func(data, **kwargs) - if not isinstance(result, pd.DataFrame): - raise TypeError( - "Function {!r} must return a DataFrame when " - "passed to `Styler.apply` with axis=None".format(func)) - if not (result.index.equals(data.index) and - result.columns.equals(data.columns)): - msg = ('Result of {!r} must have identical index and columns ' - 'as the input'.format(func)) - raise ValueError(msg) - - result_shape = result.shape - expected_shape = self.data.loc[subset].shape - if result_shape != expected_shape: - msg = ("Function {!r} returned the wrong shape.\n" - "Result has shape: {}\n" - "Expected shape: {}".format(func, - result.shape, - expected_shape)) - raise ValueError(msg) - self._update_ctx(result) - return self - - def apply(self, func, axis=0, subset=None, **kwargs): - """ - Apply a function column-wise, row-wise, or table-wase, - updating the HTML representation with the result. - - .. versionadded:: 0.17.1 - - Parameters - ---------- - func : function - ``func`` should take a Series or DataFrame (depending - on ``axis``), and return an object with the same shape. - Must return a DataFrame with identical index and - column labels when ``axis=None`` - axis : int, str or None - apply to each column (``axis=0`` or ``'index'``) - or to each row (``axis=1`` or ``'columns'``) or - to the entire DataFrame at once with ``axis=None`` - subset : IndexSlice - a valid indexer to limit ``data`` to *before* applying the - function. Consider using a pandas.IndexSlice - kwargs : dict - pass along to ``func`` - - Returns - ------- - self : Styler - - Notes - ----- - The output shape of ``func`` should match the input, i.e. if - ``x`` is the input row, column, or table (depending on ``axis``), - then ``func(x.shape) == x.shape`` should be true. - - This is similar to ``DataFrame.apply``, except that ``axis=None`` - applies the function to the entire DataFrame at once, - rather than column-wise or row-wise. - - Examples - -------- - >>> def highlight_max(x): - ... return ['background-color: yellow' if v == x.max() else '' - for v in x] - ... - >>> df = pd.DataFrame(np.random.randn(5, 2)) - >>> df.style.apply(highlight_max) - """ - self._todo.append((lambda instance: getattr(instance, '_apply'), - (func, axis, subset), kwargs)) - return self - - def _applymap(self, func, subset=None, **kwargs): - func = partial(func, **kwargs) # applymap doesn't take kwargs? - if subset is None: - subset = pd.IndexSlice[:] - subset = _non_reducing_slice(subset) - result = self.data.loc[subset].applymap(func) - self._update_ctx(result) - return self - - def applymap(self, func, subset=None, **kwargs): - """ - Apply a function elementwise, updating the HTML - representation with the result. - - .. versionadded:: 0.17.1 - - Parameters - ---------- - func : function - ``func`` should take a scalar and return a scalar - subset : IndexSlice - a valid indexer to limit ``data`` to *before* applying the - function. Consider using a pandas.IndexSlice - kwargs : dict - pass along to ``func`` - - Returns - ------- - self : Styler - - """ - self._todo.append((lambda instance: getattr(instance, '_applymap'), - (func, subset), kwargs)) - return self - - def set_precision(self, precision): - """ - Set the precision used to render. - - .. versionadded:: 0.17.1 - - Parameters - ---------- - precision: int - - Returns - ------- - self : Styler - """ - self.precision = precision - return self - - def set_table_attributes(self, attributes): - """ - Set the table attributes. These are the items - that show up in the opening ```` tag in addition - to to automatic (by default) id. - - .. versionadded:: 0.17.1 - - Parameters - ---------- - precision: int - - Returns - ------- - self : Styler - """ - self.table_attributes = attributes - return self - - def export(self): - """ - Export the styles to applied to the current Styler. - Can be applied to a second style with ``Styler.use``. - - .. versionadded:: 0.17.1 - - Returns - ------- - styles: list - - See Also - -------- - Styler.use - """ - return self._todo - - def use(self, styles): - """ - Set the styles on the current Styler, possibly using styles - from ``Styler.export``. - - .. versionadded:: 0.17.1 - - Parameters - ---------- - styles: list - list of style functions - - Returns - ------- - self : Styler - - See Also - -------- - Styler.export - """ - self._todo.extend(styles) - return self - - def set_uuid(self, uuid): - """ - Set the uuid for a Styler. - - .. versionadded:: 0.17.1 - - Parameters - ---------- - uuid: str - - Returns - ------- - self : Styler - """ - self.uuid = uuid - return self - - def set_caption(self, caption): - """ - Se the caption on a Styler - - .. versionadded:: 0.17.1 - - Parameters - ---------- - caption: str - - Returns - ------- - self : Styler - """ - self.caption = caption - return self - - def set_table_styles(self, table_styles): - """ - Set the table styles on a Styler. These are placed in a - ``""" + template_select = """\ + .dataframe %s { + %s: %s; + }""" + element_props = [('tbody tr th:only-of-type', + 'vertical-align', + 'middle'), + ('tbody tr th', + 'vertical-align', + 'top')] + if isinstance(self.columns, MultiIndex): + element_props.append(('thead tr th', + 'text-align', + 'left')) + if all((self.fmt.has_index_names, + self.fmt.index, + self.fmt.show_index_names)): + element_props.append(('thead tr:last-of-type th', + 'text-align', + 'right')) + else: + element_props.append(('thead th', + 'text-align', + 'right')) + template_mid = '\n\n'.join(map(lambda t: template_select % t, + element_props)) + template = dedent('\n'.join((template_first, + template_mid, + template_last))) + if self.notebook: + self.write(template) + def write_result(self, buf): indent = 0 + id_section = "" frame = self.frame _classes = ['dataframe'] # Default class. + use_mathjax = get_option("display.html.use_mathjax") + if not use_mathjax: + _classes.append('tex2jax_ignore') if self.classes is not None: if isinstance(self.classes, str): self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): - raise AssertionError('classes must be list or tuple, ' - 'not %s' % type(self.classes)) + raise AssertionError('classes must be list or tuple, not {typ}' + .format(typ=type(self.classes))) _classes.extend(self.classes) if self.notebook: @@ -1017,11 +1229,15 @@ def write_result(self, buf): except (ImportError, AttributeError): pass - self.write(''.format(div_style)) + self.write(''.format(style=div_style)) - self.write('
' % (self.border, - ' '.join(_classes)), - indent) + self.write_style() + + if self.table_id is not None: + id_section = ' id="{table_id}"'.format(table_id=self.table_id) + self.write('
' + .format(border=self.border, cls=' '.join(_classes), + id_section=id_section), indent) indent += self.indent_delta indent = self._write_header(indent) @@ -1030,8 +1246,10 @@ def write_result(self, buf): self.write('
', indent) if self.should_show_dimensions: by = chr(215) if compat.PY3 else unichr(215) # × - self.write(u('

%d rows %s %d columns

') % - (len(frame), by, len(frame.columns))) + self.write(u('

{rows} rows {by} {cols} columns

') + .format(rows=len(frame), + by=by, + cols=len(frame.columns))) if self.notebook: self.write('') @@ -1056,7 +1274,7 @@ def _column_header(): row.append(single_column_table(self.columns.names)) else: row.append('') - style = "text-align: %s;" % self.fmt.justify + style = "text-align: {just};".format(just=self.fmt.justify) row.extend([single_column_table(c, self.fmt.justify, style) for c in self.columns]) else: @@ -1071,7 +1289,7 @@ def _column_header(): indent += self.indent_delta if isinstance(self.columns, MultiIndex): - template = 'colspan="%d" halign="left"' + template = 'colspan="{span:d}" halign="left"' if self.fmt.sparsify: # GH3547 @@ -1080,7 +1298,7 @@ def _column_header(): sentinel = None levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False) - level_lengths = _get_level_lengths(levels, sentinel) + level_lengths = get_level_lengths(levels, sentinel) inner_lvl = len(level_lengths) - 1 for lnum, (records, values) in enumerate(zip(level_lengths, levels)): @@ -1139,7 +1357,7 @@ def _column_header(): for i, v in enumerate(values): if i in records: if records[i] > 1: - tags[j] = template % records[i] + tags[j] = template.format(span=records[i]) else: continue j += 1 @@ -1157,7 +1375,9 @@ def _column_header(): self.write_tr(col_row, indent, self.indent_delta, header=True, align=align) - if self.fmt.has_index_names and self.fmt.index: + if all((self.fmt.has_index_names, + self.fmt.index, + self.fmt.show_index_names)): row = ([x if x is not None else '' for x in self.frame.index.names] + [''] * min(len(self.columns), self.max_cols)) @@ -1227,7 +1447,7 @@ def _write_regular_rows(self, fmt_values, indent): nindex_levels=1) def _write_hierarchical_rows(self, fmt_values, indent): - template = 'rowspan="%d" valign="top"' + template = 'rowspan="{span}" valign="top"' truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v @@ -1246,7 +1466,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) - level_lengths = _get_level_lengths(levels, sentinel) + level_lengths = get_level_lengths(levels, sentinel) inner_lvl = len(level_lengths) - 1 if truncate_v: # Insert ... row and adjust idx_values and @@ -1302,7 +1522,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): for records, v in zip(level_lengths, idx_values[i]): if i in records: if records[i] > 1: - tags[j] = template % records[i] + tags[j] = template.format(span=records[i]) else: sparse_offset += 1 continue @@ -1329,46 +1549,6 @@ def _write_hierarchical_rows(self, fmt_values, indent): nindex_levels=frame.index.nlevels) -def _get_level_lengths(levels, sentinel=''): - """For each index in each level the function returns lengths of indexes. - - Parameters - ---------- - levels : list of lists - List of values on for level. - sentinel : string, optional - Value which states that no new index starts on there. - - Returns - ---------- - Returns list of maps. For each level returns map of indexes (key is index - in row and value is length of index). - """ - if len(levels) == 0: - return [] - - control = [True for x in levels[0]] - - result = [] - for level in levels: - last_index = 0 - - lengths = {} - for i, key in enumerate(level): - if control[i] and key == sentinel: - pass - else: - control[i] = False - lengths[last_index] = i - last_index - last_index = i - - lengths[last_index] = len(level) - last_index - - result.append(lengths) - - return result - - class CSVFormatter(object): def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', @@ -1384,7 +1564,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', if path_or_buf is None: path_or_buf = StringIO() - self.path_or_buf = _expand_user(path_or_buf) + self.path_or_buf = _expand_user(_stringify_path(path_or_buf)) self.sep = sep self.na_rep = na_rep self.float_format = float_format @@ -1457,12 +1637,9 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.chunksize = int(chunksize) self.data_index = obj.index - if isinstance(obj.index, PeriodIndex): - self.data_index = obj.index.to_timestamp() - - if (isinstance(self.data_index, DatetimeIndex) and + if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and date_format is not None): - self.data_index = Index([x.strftime(date_format) if notnull(x) else + self.data_index = Index([x.strftime(date_format) if notna(x) else '' for x in self.data_index]) self.nlevels = getattr(self.data_index, 'nlevels', 1) @@ -1471,12 +1648,20 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', def save(self): # create the writer & save + if self.encoding is None: + if compat.PY2: + encoding = 'ascii' + else: + encoding = 'utf-8' + else: + encoding = self.encoding + if hasattr(self.path_or_buf, 'write'): f = self.path_or_buf close = False else: f, handles = _get_handle(self.path_or_buf, self.mode, - encoding=self.encoding, + encoding=encoding, compression=self.compression) close = True @@ -1486,11 +1671,11 @@ def save(self): doublequote=self.doublequote, escapechar=self.escapechar, quotechar=self.quotechar) - if self.encoding is not None: - writer_kwargs['encoding'] = self.encoding - self.writer = UnicodeWriter(f, **writer_kwargs) - else: + if encoding == 'ascii': self.writer = csv.writer(f, **writer_kwargs) + else: + writer_kwargs['encoding'] = encoding + self.writer = UnicodeWriter(f, **writer_kwargs) self._save() @@ -1513,8 +1698,9 @@ def _save_header(self): return if has_aliases: if len(header) != len(cols): - raise ValueError(('Writing %d cols but got %d aliases' - % (len(cols), len(header)))) + raise ValueError(('Writing {ncols} cols but got {nalias} ' + 'aliases'.format(ncols=len(cols), + nalias=len(header)))) else: write_cols = header else: @@ -1545,7 +1731,7 @@ def _save_header(self): else: encoded_labels = [] - if not has_mi_columns: + if not has_mi_columns or has_aliases: encoded_labels += list(write_cols) writer.writerow(encoded_labels) else: @@ -1566,7 +1752,7 @@ def _save_header(self): if isinstance(index_label, list) and len(index_label) > 1: col_line.extend([''] * (len(index_label) - 1)) - col_line.extend(columns.get_level_values(i)) + col_line.extend(columns._get_level_values(i)) writer.writerow(col_line) @@ -1619,300 +1805,9 @@ def _save_chunk(self, start_i, end_i): date_format=self.date_format, quoting=self.quoting) - lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer) + libwriters.write_csv_rows(self.data, ix, self.nlevels, + self.cols, self.writer) -# from collections import namedtuple -# ExcelCell = namedtuple("ExcelCell", -# 'row, col, val, style, mergestart, mergeend') - - -class ExcelCell(object): - __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend') - __slots__ = __fields__ - - def __init__(self, row, col, val, style=None, mergestart=None, - mergeend=None): - self.row = row - self.col = col - self.val = val - self.style = style - self.mergestart = mergestart - self.mergeend = mergeend - - -header_style = {"font": {"bold": True}, - "borders": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "alignment": {"horizontal": "center", - "vertical": "top"}} - - -class ExcelFormatter(object): - """ - Class for formatting a DataFrame to a list of ExcelCells, - - Parameters - ---------- - df : dataframe - na_rep: na representation - float_format : string, default None - Format string for floating point numbers - cols : sequence, optional - Columns to write - header : boolean or list of string, default True - Write out column names. If a list of string is given it is - assumed to be aliases for the column names - index : boolean, default True - output row names (index) - index_label : string or sequence, default None - Column label for index column(s) if desired. If None is given, and - `header` and `index` are True, then the index names are used. A - sequence should be given if the DataFrame uses MultiIndex. - merge_cells : boolean, default False - Format MultiIndex and Hierarchical Rows as merged cells. - inf_rep : string, default `'inf'` - representation for np.inf values (which aren't representable in Excel) - A `'-'` sign will be added in front of -inf. - """ - - def __init__(self, df, na_rep='', float_format=None, cols=None, - header=True, index=True, index_label=None, merge_cells=False, - inf_rep='inf'): - self.rowcounter = 0 - self.na_rep = na_rep - self.df = df - if cols is not None: - self.df = df.loc[:, cols] - self.columns = self.df.columns - self.float_format = float_format - self.index = index - self.index_label = index_label - self.header = header - self.merge_cells = merge_cells - self.inf_rep = inf_rep - - def _format_value(self, val): - if lib.checknull(val): - val = self.na_rep - elif is_float(val): - if lib.isposinf_scalar(val): - val = self.inf_rep - elif lib.isneginf_scalar(val): - val = '-%s' % self.inf_rep - elif self.float_format is not None: - val = float(self.float_format % val) - return val - - def _format_header_mi(self): - if self.columns.nlevels > 1: - if not self.index: - raise NotImplementedError("Writing to Excel with MultiIndex" - " columns and no index " - "('index'=False) is not yet " - "implemented.") - - has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) - if not (has_aliases or self.header): - return - - columns = self.columns - level_strs = columns.format(sparsify=self.merge_cells, adjoin=False, - names=False) - level_lengths = _get_level_lengths(level_strs) - coloffset = 0 - lnum = 0 - - if self.index and isinstance(self.df.index, MultiIndex): - coloffset = len(self.df.index[0]) - 1 - - if self.merge_cells: - # Format multi-index as a merged cells. - for lnum in range(len(level_lengths)): - name = columns.names[lnum] - yield ExcelCell(lnum, coloffset, name, header_style) - - for lnum, (spans, levels, labels) in enumerate(zip( - level_lengths, columns.levels, columns.labels)): - values = levels.take(labels) - for i in spans: - if spans[i] > 1: - yield ExcelCell(lnum, coloffset + i + 1, values[i], - header_style, lnum, - coloffset + i + spans[i]) - else: - yield ExcelCell(lnum, coloffset + i + 1, values[i], - header_style) - else: - # Format in legacy format with dots to indicate levels. - for i, values in enumerate(zip(*level_strs)): - v = ".".join(map(pprint_thing, values)) - yield ExcelCell(lnum, coloffset + i + 1, v, header_style) - - self.rowcounter = lnum - - def _format_header_regular(self): - has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) - if has_aliases or self.header: - coloffset = 0 - - if self.index: - coloffset = 1 - if isinstance(self.df.index, MultiIndex): - coloffset = len(self.df.index[0]) - - colnames = self.columns - if has_aliases: - if len(self.header) != len(self.columns): - raise ValueError('Writing %d cols but got %d aliases' % - (len(self.columns), len(self.header))) - else: - colnames = self.header - - for colindex, colname in enumerate(colnames): - yield ExcelCell(self.rowcounter, colindex + coloffset, colname, - header_style) - - def _format_header(self): - if isinstance(self.columns, MultiIndex): - gen = self._format_header_mi() - else: - gen = self._format_header_regular() - - gen2 = () - if self.df.index.names: - row = [x if x is not None else '' - for x in self.df.index.names] + [''] * len(self.columns) - if reduce(lambda x, y: x and y, map(lambda x: x != '', row)): - gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style) - for colindex, val in enumerate(row)) - self.rowcounter += 1 - return itertools.chain(gen, gen2) - - def _format_body(self): - - if isinstance(self.df.index, MultiIndex): - return self._format_hierarchical_rows() - else: - return self._format_regular_rows() - - def _format_regular_rows(self): - has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) - if has_aliases or self.header: - self.rowcounter += 1 - - coloffset = 0 - # output index and index_label? - if self.index: - # chek aliases - # if list only take first as this is not a MultiIndex - if (self.index_label and - isinstance(self.index_label, (list, tuple, np.ndarray, - Index))): - index_label = self.index_label[0] - # if string good to go - elif self.index_label and isinstance(self.index_label, str): - index_label = self.index_label - else: - index_label = self.df.index.names[0] - - if isinstance(self.columns, MultiIndex): - self.rowcounter += 1 - - if index_label and self.header is not False: - yield ExcelCell(self.rowcounter - 1, 0, index_label, - header_style) - - # write index_values - index_values = self.df.index - if isinstance(self.df.index, PeriodIndex): - index_values = self.df.index.to_timestamp() - - coloffset = 1 - for idx, idxval in enumerate(index_values): - yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style) - - # Write the body of the frame data series by series. - for colidx in range(len(self.columns)): - series = self.df.iloc[:, colidx] - for i, val in enumerate(series): - yield ExcelCell(self.rowcounter + i, colidx + coloffset, val) - - def _format_hierarchical_rows(self): - has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) - if has_aliases or self.header: - self.rowcounter += 1 - - gcolidx = 0 - - if self.index: - index_labels = self.df.index.names - # check for aliases - if (self.index_label and - isinstance(self.index_label, (list, tuple, np.ndarray, - Index))): - index_labels = self.index_label - - # MultiIndex columns require an extra row - # with index names (blank if None) for - # unambigous round-trip, unless not merging, - # in which case the names all go on one row Issue #11328 - if isinstance(self.columns, MultiIndex) and self.merge_cells: - self.rowcounter += 1 - - # if index labels are not empty go ahead and dump - if (any(x is not None for x in index_labels) and - self.header is not False): - - for cidx, name in enumerate(index_labels): - yield ExcelCell(self.rowcounter - 1, cidx, name, - header_style) - - if self.merge_cells: - # Format hierarchical rows as merged cells. - level_strs = self.df.index.format(sparsify=True, adjoin=False, - names=False) - level_lengths = _get_level_lengths(level_strs) - - for spans, levels, labels in zip(level_lengths, - self.df.index.levels, - self.df.index.labels): - - values = levels.take(labels, - allow_fill=levels._can_hold_na, - fill_value=True) - - for i in spans: - if spans[i] > 1: - yield ExcelCell(self.rowcounter + i, gcolidx, - values[i], header_style, - self.rowcounter + i + spans[i] - 1, - gcolidx) - else: - yield ExcelCell(self.rowcounter + i, gcolidx, - values[i], header_style) - gcolidx += 1 - - else: - # Format hierarchical rows with non-merged values. - for indexcolvals in zip(*self.df.index): - for idx, indexcolval in enumerate(indexcolvals): - yield ExcelCell(self.rowcounter + idx, gcolidx, - indexcolval, header_style) - gcolidx += 1 - - # Write the body of the frame data series by series. - for colidx in range(len(self.columns)): - series = self.df.iloc[:, colidx] - for i, val in enumerate(series): - yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val) - - def get_formatted_cells(self): - for cell in itertools.chain(self._format_header(), - self._format_body()): - cell.val = self._format_value(cell.val) - yield cell # ---------------------------------------------------------------------- # Array formatters @@ -1923,6 +1818,8 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', if is_categorical_dtype(values): fmt_klass = CategoricalArrayFormatter + elif is_interval_dtype(values): + fmt_klass = IntervalArrayFormatter elif is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter elif is_period_arraylike(values): @@ -1978,8 +1875,9 @@ def _format_strings(self): if self.float_format is None: float_format = get_option("display.float_format") if float_format is None: - fmt_str = '%% .%dg' % get_option("display.precision") - float_format = lambda x: fmt_str % x + fmt_str = ('{{x: .{prec:d}g}}' + .format(prec=get_option("display.precision"))) + float_format = lambda x: fmt_str.format(x=x) else: float_format = self.float_format @@ -1988,17 +1886,17 @@ def _format_strings(self): (lambda x: pprint_thing(x, escape_chars=('\t', '\r', '\n')))) def _format(x): - if self.na_rep is not None and lib.checknull(x): + if self.na_rep is not None and is_scalar(x) and isna(x): if x is None: return 'None' elif x is pd.NaT: return 'NaT' return self.na_rep elif isinstance(x, PandasObject): - return '%s' % x + return u'{x}'.format(x=x) else: # object dtype - return '%s' % formatter(x) + return u'{x}'.format(x=formatter(x)) vals = self.values if isinstance(vals, Index): @@ -2006,17 +1904,17 @@ def _format(x): elif isinstance(vals, ABCSparseArray): vals = vals.values - is_float_type = lib.map_infer(vals, is_float) & notnull(vals) + is_float_type = lib.map_infer(vals, is_float) & notna(vals) leading_space = is_float_type.any() fmt_values = [] for i, v in enumerate(vals): if not is_float_type[i] and leading_space: - fmt_values.append(' %s' % _format(v)) + fmt_values.append(u' {v}'.format(v=_format(v))) elif is_float_type[i]: fmt_values.append(float_format(v)) else: - fmt_values.append(' %s' % _format(v)) + fmt_values.append(u' {v}'.format(v=_format(v))) return fmt_values @@ -2052,10 +1950,10 @@ def _value_formatter(self, float_format=None, threshold=None): # because str(0.0) = '0.0' while '%g' % 0.0 = '0' if float_format: def base_formatter(v): - return (float_format % v) if notnull(v) else self.na_rep + return float_format(value=v) if notna(v) else self.na_rep else: def base_formatter(v): - return str(v) if notnull(v) else self.na_rep + return str(v) if notna(v) else self.na_rep if self.decimal != '.': def decimal_formatter(v): @@ -2067,7 +1965,7 @@ def decimal_formatter(v): return decimal_formatter def formatter(value): - if notnull(value): + if notna(value): if abs(value) > threshold: return decimal_formatter(value) else: @@ -2080,7 +1978,7 @@ def formatter(value): def get_result_as_array(self): """ Returns the float values converted into strings using - the parameters given at initalisation, as a numpy array + the parameters given at initialisation, as a numpy array """ if self.formatter is not None: @@ -2097,7 +1995,7 @@ def format_values_with(float_format): # separate the wheat from the chaff values = self.values - mask = isnull(values) + mask = isna(values) if hasattr(values, 'to_dense'): # sparse numpy ndarray values = values.to_dense() values = np.array(values, dtype='object') @@ -2113,10 +2011,14 @@ def format_values_with(float_format): # There is a special default string when we are fixed-width # The default is otherwise to use str instead of a formatting string - if self.float_format is None and self.fixed_width: - float_format = '%% .%df' % self.digits + if self.float_format is None: + if self.fixed_width: + float_format = partial('{value: .{digits:d}f}'.format, + digits=self.digits) + else: + float_format = self.float_format else: - float_format = self.float_format + float_format = lambda value: self.float_format % value formatted_values = format_values_with(float_format) @@ -2143,7 +2045,8 @@ def format_values_with(float_format): (abs_vals > 0)).any() if has_small_values or (too_long and has_large_values): - float_format = '%% .%de' % self.digits + float_format = partial('{value: .{digits:d}e}'.format, + digits=self.digits) formatted_values = format_values_with(float_format) return formatted_values @@ -2159,7 +2062,7 @@ def _format_strings(self): class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self): - formatter = self.formatter or (lambda x: '% d' % x) + formatter = self.formatter or (lambda x: '{x: d}'.format(x=x)) fmt_values = [formatter(x) for x in self.values] return fmt_values @@ -2190,17 +2093,28 @@ def _format_strings(self): return fmt_values.tolist() +class IntervalArrayFormatter(GenericArrayFormatter): + + def __init__(self, values, *args, **kwargs): + GenericArrayFormatter.__init__(self, values, *args, **kwargs) + + def _format_strings(self): + formatter = self.formatter or str + fmt_values = np.array([formatter(x) for x in self.values]) + return fmt_values + + class PeriodArrayFormatter(IntArrayFormatter): def _format_strings(self): - from pandas.tseries.period import IncompatibleFrequency + from pandas.core.indexes.period import IncompatibleFrequency try: values = PeriodIndex(self.values).to_native_types() except IncompatibleFrequency: # periods may contains different freq values = Index(self.values, dtype='object').to_native_types() - formatter = self.formatter or (lambda x: '%s' % x) + formatter = self.formatter or (lambda x: '{x}'.format(x=x)) fmt_values = [formatter(x) for x in values] return fmt_values @@ -2291,14 +2205,14 @@ def _is_dates_only(values): consider_values = values_int != iNaT one_day_nanos = (86400 * 1e9) even_days = np.logical_and(consider_values, - values_int % one_day_nanos != 0).sum() == 0 + values_int % int(one_day_nanos) != 0).sum() == 0 if even_days: return True return False def _format_datetime64(x, tz=None, nat_rep='NaT'): - if x is None or lib.checknull(x): + if x is None or (is_scalar(x) and isna(x)): return nat_rep if tz is not None or not isinstance(x, Timestamp): @@ -2308,7 +2222,7 @@ def _format_datetime64(x, tz=None, nat_rep='NaT'): def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None): - if x is None or lib.checknull(x): + if x is None or (is_scalar(x) and isna(x)): return nat_rep if not isinstance(x, Timestamp): @@ -2342,7 +2256,7 @@ class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self): """ we by definition have a TZ """ - values = self.values.asobject + values = self.values.astype(object) is_dates_only = _is_dates_only(values) formatter = (self.formatter or _get_format_datetime64(is_dates_only, @@ -2386,21 +2300,21 @@ def _get_format_timedelta64(values, nat_rep='NaT', box=False): consider_values, np.abs(values_int) >= one_day_nanos).sum() == 0 if even_days: - format = 'even_day' + format = None elif all_sub_day: format = 'sub_day' else: format = 'long' def _formatter(x): - if x is None or lib.checknull(x): + if x is None or (is_scalar(x) and isna(x)): return nat_rep if not isinstance(x, Timedelta): x = Timedelta(x) result = x._repr_base(format=format) if box: - result = "'{0}'".format(result) + result = "'{res}'".format(res=result) return result return _formatter @@ -2414,7 +2328,7 @@ def _make_fixed_width(strings, justify='right', minimum=None, adj=None): if adj is None: adj = _get_adjustment() - max_len = np.max([adj.len(x) for x in strings]) + max_len = max(adj.len(x) for x in strings) if minimum is not None: max_len = max(minimum, max_len) @@ -2442,8 +2356,8 @@ def _trim_zeros(str_floats, na_rep='NaN'): def _cond(values): non_na = [x for x in values if x != na_rep] - return (len(non_na) > 0 and all([x.endswith('0') for x in non_na]) and - not (any([('e' in x) or ('E' in x) for x in non_na]))) + return (len(non_na) > 0 and all(x.endswith('0') for x in non_na) and + not (any(('e' in x) or ('E' in x) for x in non_na))) while _cond(trimmed): trimmed = [x[:-1] if x != na_rep else x for x in trimmed] @@ -2455,12 +2369,12 @@ def _cond(values): def single_column_table(column, align=None, style=None): table = '
%s
{i!s}
' return table @@ -2468,93 +2382,17 @@ def single_column_table(column, align=None, style=None): def single_row_table(row): # pragma: no cover table = '' for i in row: - table += ('' % str(i)) + table += (''.format(i=i)) table += '
%s{i!s}
' return table def _has_names(index): if isinstance(index, MultiIndex): - return any([x is not None for x in index.names]) + return com._any_not_none(*index.names) else: return index.name is not None -# ----------------------------------------------------------------------------- -# Global formatting options - -_initial_defencoding = None - - -def detect_console_encoding(): - """ - Try to find the most capable encoding supported by the console. - slighly modified from the way IPython handles the same issue. - """ - import locale - global _initial_defencoding - - encoding = None - try: - encoding = sys.stdout.encoding or sys.stdin.encoding - except AttributeError: - pass - - # try again for something better - if not encoding or 'ascii' in encoding.lower(): - try: - encoding = locale.getpreferredencoding() - except Exception: - pass - - # when all else fails. this will usually be "ascii" - if not encoding or 'ascii' in encoding.lower(): - encoding = sys.getdefaultencoding() - - # GH3360, save the reported defencoding at import time - # MPL backends may change it. Make available for debugging. - if not _initial_defencoding: - _initial_defencoding = sys.getdefaultencoding() - - return encoding - - -def get_console_size(): - """Return console size as tuple = (width, height). - - Returns (None,None) in non-interactive session. - """ - display_width = get_option('display.width') - # deprecated. - display_height = get_option('display.height', silent=True) - - # Consider - # interactive shell terminal, can detect term size - # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term - # size non-interactive script, should disregard term size - - # in addition - # width,height have default values, but setting to 'None' signals - # should use Auto-Detection, But only in interactive shell-terminal. - # Simple. yeah. - - if com.in_interactive_session(): - if com.in_ipython_frontend(): - # sane defaults for interactive non-shell terminal - # match default for width,height in config_init - from pandas.core.config import get_default_val - terminal_width = get_default_val('display.width') - terminal_height = get_default_val('display.height') - else: - # pure terminal - terminal_width, terminal_height = get_terminal_size() - else: - terminal_width, terminal_height = None, None - - # Note if the User sets width/Height to None (auto-detection) - # and we're in a script (non-inter), this will return (None,None) - # caller needs to deal. - return (display_width or terminal_width, display_height or terminal_height) - class EngFormatter(object): """ @@ -2638,18 +2476,19 @@ def __call__(self, num): prefix = self.ENG_PREFIXES[int_pow10] else: if int_pow10 < 0: - prefix = 'E-%02d' % (-int_pow10) + prefix = 'E-{pow10:02d}'.format(pow10=-int_pow10) else: - prefix = 'E+%02d' % int_pow10 + prefix = 'E+{pow10:02d}'.format(pow10=int_pow10) mant = sign * dnum / (10**pow10) if self.accuracy is None: # pragma: no cover - format_str = u("% g%s") + format_str = u("{mant: g}{prefix}") else: - format_str = (u("%% .%if%%s") % self.accuracy) + format_str = (u("{{mant: .{acc:d}f}}{{prefix}}") + .format(acc=self.accuracy)) - formatted = format_str % (mant, prefix) + formatted = format_str.format(mant=mant, prefix=prefix) return formatted # .strip() diff --git a/pandas/formats/printing.py b/pandas/io/formats/printing.py similarity index 80% rename from pandas/formats/printing.py rename to pandas/io/formats/printing.py index 37bd4b63d6f7a..e0f53f671017a 100644 --- a/pandas/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -2,7 +2,8 @@ printing tools """ -from pandas.types.inference import is_sequence +import sys +from pandas.core.dtypes.inference import is_sequence from pandas import compat from pandas.compat import u from pandas.core.config import get_option @@ -101,9 +102,9 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): bounds length of printed sequence, depending on options """ if isinstance(seq, set): - fmt = u("{%s}") + fmt = u("{{{body}}}") else: - fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)") + fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})") if max_seq_items is False: nitems = len(seq) @@ -122,7 +123,7 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): elif isinstance(seq, tuple) and len(seq) == 1: body += ',' - return fmt % body + return fmt.format(body=body) def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): @@ -130,10 +131,10 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ - fmt = u("{%s}") + fmt = u("{{{things}}}") pairs = [] - pfmt = u("%s: %s") + pfmt = u("{key}: {val}") if max_seq_items is False: nitems = len(seq) @@ -141,16 +142,17 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): nitems = max_seq_items or get_option("max_seq_items") or len(seq) for k, v in list(seq.items())[:nitems]: - pairs.append(pfmt % - (pprint_thing(k, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds), - pprint_thing(v, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds))) + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds))) if nitems < len(seq): - return fmt % (", ".join(pairs) + ", ...") + return fmt.format(things=", ".join(pairs) + ", ...") else: - return fmt % ", ".join(pairs) + return fmt.format(things=", ".join(pairs)) def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, @@ -220,10 +222,10 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: - fmt = "'%s'" + fmt = u("'{thing}'") else: - fmt = "u'%s'" - result = fmt % as_escaped_unicode(thing) + fmt = u("u'{thing}'") + result = fmt.format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) @@ -233,3 +235,34 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): value = pprint_thing(object) # get unicode representation of object return value.encode(encoding, errors, **kwds) + + +def _enable_data_resource_formatter(enable): + if 'IPython' not in sys.modules: + # definitely not in IPython + return + from IPython import get_ipython + ip = get_ipython() + if ip is None: + # still not in IPython + return + + formatters = ip.display_formatter.formatters + mimetype = "application/vnd.dataresource+json" + + if enable: + if mimetype not in formatters: + # define tableschema formatter + from IPython.core.formatters import BaseFormatter + + class TableSchemaFormatter(BaseFormatter): + print_method = '_repr_data_resource_' + _return_type = (dict,) + # register it: + formatters[mimetype] = TableSchemaFormatter() + # enable it if it's been disabled: + formatters[mimetype].enabled = True + else: + # unregister tableschema mime-type + if mimetype in formatters: + formatters[mimetype].enabled = False diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py new file mode 100644 index 0000000000000..f876ceb8a26bf --- /dev/null +++ b/pandas/io/formats/style.py @@ -0,0 +1,1267 @@ +""" +Module for applying conditional formatting to +DataFrames and Series. +""" +from functools import partial +from itertools import product +from contextlib import contextmanager +from uuid import uuid1 +import copy +from collections import defaultdict, MutableMapping + +try: + from jinja2 import ( + PackageLoader, Environment, ChoiceLoader, FileSystemLoader + ) +except ImportError: + msg = "pandas.Styler requires jinja2. "\ + "Please install with `conda install Jinja2`\n"\ + "or `pip install Jinja2`" + raise ImportError(msg) + +from pandas.core.dtypes.common import is_float, is_string_like + +import numpy as np +import pandas as pd +from pandas.api.types import is_list_like +from pandas.compat import range +from pandas.core.config import get_option +from pandas.core.generic import _shared_docs +import pandas.core.common as com +from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice +from pandas.util._decorators import Appender +try: + import matplotlib.pyplot as plt + from matplotlib import colors + has_mpl = True +except ImportError: + has_mpl = False + no_mpl_message = "{0} requires matplotlib." + + +@contextmanager +def _mpl(func): + if has_mpl: + yield plt, colors + else: + raise ImportError(no_mpl_message.format(func.__name__)) + + +class Styler(object): + """ + Helps style a DataFrame or Series according to the + data with HTML and CSS. + + Parameters + ---------- + data: Series or DataFrame + precision: int + precision to round floats to, defaults to pd.options.display.precision + table_styles: list-like, default None + list of {selector: (attr, value)} dicts; see Notes + uuid: str, default None + a unique identifier to avoid CSS collisons; generated automatically + caption: str, default None + caption to attach to the table + + Attributes + ---------- + env : Jinja2 Environment + template : Jinja2 Template + loader : Jinja2 Loader + + Notes + ----- + Most styling will be done by passing style functions into + ``Styler.apply`` or ``Styler.applymap``. Style functions should + return values with strings containing CSS ``'attr: value'`` that will + be applied to the indicated cells. + + If using in the Jupyter notebook, Styler has defined a ``_repr_html_`` + to automatically render itself. Otherwise call Styler.render to get + the genterated HTML. + + CSS classes are attached to the generated HTML + + * Index and Column names include ``index_name`` and ``level`` + where `k` is its level in a MultiIndex + * Index label cells include + + * ``row_heading`` + * ``row`` where `n` is the numeric position of the row + * ``level`` where `k` is the level in a MultiIndex + + * Column label cells include + * ``col_heading`` + * ``col`` where `n` is the numeric position of the column + * ``evel`` where `k` is the level in a MultiIndex + + * Blank cells include ``blank`` + * Data cells include ``data`` + + See Also + -------- + pandas.DataFrame.style + """ + loader = PackageLoader("pandas", "io/formats/templates") + env = Environment( + loader=loader, + trim_blocks=True, + ) + template = env.get_template("html.tpl") + + def __init__(self, data, precision=None, table_styles=None, uuid=None, + caption=None, table_attributes=None): + self.ctx = defaultdict(list) + self._todo = [] + + if not isinstance(data, (pd.Series, pd.DataFrame)): + raise TypeError("``data`` must be a Series or DataFrame") + if data.ndim == 1: + data = data.to_frame() + if not data.index.is_unique or not data.columns.is_unique: + raise ValueError("style is not supported for non-unique indicies.") + + self.data = data + self.index = data.index + self.columns = data.columns + + self.uuid = uuid + self.table_styles = table_styles + self.caption = caption + if precision is None: + precision = get_option('display.precision') + self.precision = precision + self.table_attributes = table_attributes + self.hidden_index = False + self.hidden_columns = [] + + # display_funcs maps (row, col) -> formatting function + + def default_display_func(x): + if is_float(x): + return '{:>.{precision}g}'.format(x, precision=self.precision) + else: + return x + + self._display_funcs = defaultdict(lambda: default_display_func) + + def _repr_html_(self): + """Hooks into Jupyter notebook rich display system.""" + return self.render() + + @Appender(_shared_docs['to_excel'] % dict( + axes='index, columns', klass='Styler', + axes_single_arg="{0 or 'index', 1 or 'columns'}", + optional_by=""" + by : str or list of str + Name or list of names which refer to the axis items.""", + versionadded_to_excel='\n .. versionadded:: 0.20')) + def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', + float_format=None, columns=None, header=True, index=True, + index_label=None, startrow=0, startcol=0, engine=None, + merge_cells=True, encoding=None, inf_rep='inf', verbose=True, + freeze_panes=None): + + from pandas.io.formats.excel import ExcelFormatter + formatter = ExcelFormatter(self, na_rep=na_rep, cols=columns, + header=header, + float_format=float_format, index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep) + formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow, + startcol=startcol, freeze_panes=freeze_panes, + engine=engine) + + def _translate(self): + """ + Convert the DataFrame in `self.data` and the attrs from `_build_styles` + into a dictionary of {head, body, uuid, cellstyle} + """ + table_styles = self.table_styles or [] + caption = self.caption + ctx = self.ctx + precision = self.precision + hidden_index = self.hidden_index + hidden_columns = self.hidden_columns + uuid = self.uuid or str(uuid1()).replace("-", "_") + ROW_HEADING_CLASS = "row_heading" + COL_HEADING_CLASS = "col_heading" + INDEX_NAME_CLASS = "index_name" + + DATA_CLASS = "data" + BLANK_CLASS = "blank" + BLANK_VALUE = "" + + def format_attr(pair): + return "{key}={value}".format(**pair) + + # for sparsifying a MultiIndex + idx_lengths = _get_level_lengths(self.index) + col_lengths = _get_level_lengths(self.columns, hidden_columns) + + cell_context = dict() + + n_rlvls = self.data.index.nlevels + n_clvls = self.data.columns.nlevels + rlabels = self.data.index.tolist() + clabels = self.data.columns.tolist() + + if n_rlvls == 1: + rlabels = [[x] for x in rlabels] + if n_clvls == 1: + clabels = [[x] for x in clabels] + clabels = list(zip(*clabels)) + + cellstyle = [] + head = [] + + for r in range(n_clvls): + # Blank for Index columns... + row_es = [{"type": "th", + "value": BLANK_VALUE, + "display_value": BLANK_VALUE, + "is_visible": not hidden_index, + "class": " ".join([BLANK_CLASS])}] * (n_rlvls - 1) + + # ... except maybe the last for columns.names + name = self.data.columns.names[r] + cs = [BLANK_CLASS if name is None else INDEX_NAME_CLASS, + "level{lvl}".format(lvl=r)] + name = BLANK_VALUE if name is None else name + row_es.append({"type": "th", + "value": name, + "display_value": name, + "class": " ".join(cs), + "is_visible": not hidden_index}) + + if clabels: + for c, value in enumerate(clabels[r]): + cs = [COL_HEADING_CLASS, "level{lvl}".format(lvl=r), + "col{col}".format(col=c)] + cs.extend(cell_context.get( + "col_headings", {}).get(r, {}).get(c, [])) + es = { + "type": "th", + "value": value, + "display_value": value, + "class": " ".join(cs), + "is_visible": _is_visible(c, r, col_lengths), + } + colspan = col_lengths.get((r, c), 0) + if colspan > 1: + es["attributes"] = [ + format_attr({"key": "colspan", "value": colspan}) + ] + row_es.append(es) + head.append(row_es) + + if (self.data.index.names and + com._any_not_none(*self.data.index.names) and + not hidden_index): + index_header_row = [] + + for c, name in enumerate(self.data.index.names): + cs = [INDEX_NAME_CLASS, + "level{lvl}".format(lvl=c)] + name = '' if name is None else name + index_header_row.append({"type": "th", "value": name, + "class": " ".join(cs)}) + + index_header_row.extend( + [{"type": "th", + "value": BLANK_VALUE, + "class": " ".join([BLANK_CLASS]) + }] * (len(clabels[0]) - len(hidden_columns))) + + head.append(index_header_row) + + body = [] + for r, idx in enumerate(self.data.index): + row_es = [] + for c, value in enumerate(rlabels[r]): + rid = [ROW_HEADING_CLASS, "level{lvl}".format(lvl=c), + "row{row}".format(row=r)] + es = { + "type": "th", + "is_visible": (_is_visible(r, c, idx_lengths) and + not hidden_index), + "value": value, + "display_value": value, + "id": "_".join(rid[1:]), + "class": " ".join(rid) + } + rowspan = idx_lengths.get((c, r), 0) + if rowspan > 1: + es["attributes"] = [ + format_attr({"key": "rowspan", "value": rowspan}) + ] + row_es.append(es) + + for c, col in enumerate(self.data.columns): + cs = [DATA_CLASS, "row{row}".format(row=r), + "col{col}".format(col=c)] + cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) + formatter = self._display_funcs[(r, c)] + value = self.data.iloc[r, c] + row_es.append({ + "type": "td", + "value": value, + "class": " ".join(cs), + "id": "_".join(cs[1:]), + "display_value": formatter(value), + "is_visible": (c not in hidden_columns) + }) + props = [] + for x in ctx[r, c]: + # have to handle empty styles like [''] + if x.count(":"): + props.append(x.split(":")) + else: + props.append(['', '']) + cellstyle.append({'props': props, + 'selector': "row{row}_col{col}" + .format(row=r, col=c)}) + body.append(row_es) + + table_attr = self.table_attributes + use_mathjax = get_option("display.html.use_mathjax") + if not use_mathjax: + table_attr = table_attr or '' + if 'class="' in table_attr: + table_attr = table_attr.replace('class="', + 'class="tex2jax_ignore ') + else: + table_attr += ' class="tex2jax_ignore"' + + return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, + precision=precision, table_styles=table_styles, + caption=caption, table_attributes=table_attr) + + def format(self, formatter, subset=None): + """ + Format the text display value of cells. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + formatter: str, callable, or dict + subset: IndexSlice + An argument to ``DataFrame.loc`` that restricts which elements + ``formatter`` is applied to. + + Returns + ------- + self : Styler + + Notes + ----- + + ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where + ``a`` is one of + + - str: this will be wrapped in: ``a.format(x)`` + - callable: called with the value of an individual cell + + The default display value for numeric values is the "general" (``g``) + format with ``pd.options.display.precision`` precision. + + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) + >>> df.style.format("{:.2%}") + >>> df['c'] = ['a', 'b', 'c', 'd'] + >>> df.style.format({'c': str.upper}) + """ + if subset is None: + row_locs = range(len(self.data)) + col_locs = range(len(self.data.columns)) + else: + subset = _non_reducing_slice(subset) + if len(subset) == 1: + subset = subset, self.data.columns + + sub_df = self.data.loc[subset] + row_locs = self.data.index.get_indexer_for(sub_df.index) + col_locs = self.data.columns.get_indexer_for(sub_df.columns) + + if isinstance(formatter, MutableMapping): + for col, col_formatter in formatter.items(): + # formatter must be callable, so '{}' are converted to lambdas + col_formatter = _maybe_wrap_formatter(col_formatter) + col_num = self.data.columns.get_indexer_for([col])[0] + + for row_num in row_locs: + self._display_funcs[(row_num, col_num)] = col_formatter + else: + # single scalar to format all cells with + locs = product(*(row_locs, col_locs)) + for i, j in locs: + formatter = _maybe_wrap_formatter(formatter) + self._display_funcs[(i, j)] = formatter + return self + + def render(self, **kwargs): + """Render the built up styles to HTML + + Parameters + ---------- + `**kwargs`: + Any additional keyword arguments are passed through + to ``self.template.render``. This is useful when you + need to provide additional variables for a custom + template. + + .. versionadded:: 0.20 + + Returns + ------- + rendered: str + the rendered HTML + + Notes + ----- + ``Styler`` objects have defined the ``_repr_html_`` method + which automatically calls ``self.render()`` when it's the + last item in a Notebook cell. When calling ``Styler.render()`` + directly, wrap the result in ``IPython.display.HTML`` to view + the rendered HTML in the notebook. + + Pandas uses the following keys in render. Arguments passed + in ``**kwargs`` take precedence, so think carefully if you want + to override them: + + * head + * cellstyle + * body + * uuid + * precision + * table_styles + * caption + * table_attributes + """ + self._compute() + # TODO: namespace all the pandas keys + d = self._translate() + # filter out empty styles, every cell will have a class + # but the list of props may just be [['', '']]. + # so we have the neested anys below + trimmed = [x for x in d['cellstyle'] + if any(any(y) for y in x['props'])] + d['cellstyle'] = trimmed + d.update(kwargs) + return self.template.render(**d) + + def _update_ctx(self, attrs): + """ + update the state of the Styler. Collects a mapping + of {index_label: [': ']} + + attrs: Series or DataFrame + should contain strings of ': ;: ' + Whitespace shouldn't matter and the final trailing ';' shouldn't + matter. + """ + for row_label, v in attrs.iterrows(): + for col_label, col in v.iteritems(): + i = self.index.get_indexer([row_label])[0] + j = self.columns.get_indexer([col_label])[0] + for pair in col.rstrip(";").split(";"): + self.ctx[(i, j)].append(pair) + + def _copy(self, deepcopy=False): + styler = Styler(self.data, precision=self.precision, + caption=self.caption, uuid=self.uuid, + table_styles=self.table_styles) + if deepcopy: + styler.ctx = copy.deepcopy(self.ctx) + styler._todo = copy.deepcopy(self._todo) + else: + styler.ctx = self.ctx + styler._todo = self._todo + return styler + + def __copy__(self): + """ + Deep copy by default. + """ + return self._copy(deepcopy=False) + + def __deepcopy__(self, memo): + return self._copy(deepcopy=True) + + def clear(self): + """"Reset" the styler, removing any previously applied styles. + Returns None. + """ + self.ctx.clear() + self._todo = [] + + def _compute(self): + """ + Execute the style functions built up in `self._todo`. + + Relies on the conventions that all style functions go through + .apply or .applymap. The append styles to apply as tuples of + + (application method, *args, **kwargs) + """ + r = self + for func, args, kwargs in self._todo: + r = func(self)(*args, **kwargs) + return r + + def _apply(self, func, axis=0, subset=None, **kwargs): + subset = slice(None) if subset is None else subset + subset = _non_reducing_slice(subset) + data = self.data.loc[subset] + if axis is not None: + result = data.apply(func, axis=axis, + result_type='expand', **kwargs) + result.columns = data.columns + else: + result = func(data, **kwargs) + if not isinstance(result, pd.DataFrame): + raise TypeError( + "Function {func!r} must return a DataFrame when " + "passed to `Styler.apply` with axis=None" + .format(func=func)) + if not (result.index.equals(data.index) and + result.columns.equals(data.columns)): + msg = ('Result of {func!r} must have identical index and ' + 'columns as the input'.format(func=func)) + raise ValueError(msg) + + result_shape = result.shape + expected_shape = self.data.loc[subset].shape + if result_shape != expected_shape: + msg = ("Function {func!r} returned the wrong shape.\n" + "Result has shape: {res}\n" + "Expected shape: {expect}".format(func=func, + res=result.shape, + expect=expected_shape)) + raise ValueError(msg) + self._update_ctx(result) + return self + + def apply(self, func, axis=0, subset=None, **kwargs): + """ + Apply a function column-wise, row-wise, or table-wase, + updating the HTML representation with the result. + + Parameters + ---------- + func : function + ``func`` should take a Series or DataFrame (depending + on ``axis``), and return an object with the same shape. + Must return a DataFrame with identical index and + column labels when ``axis=None`` + axis : int, str or None + apply to each column (``axis=0`` or ``'index'``) + or to each row (``axis=1`` or ``'columns'``) or + to the entire DataFrame at once with ``axis=None`` + subset : IndexSlice + a valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice + kwargs : dict + pass along to ``func`` + + Returns + ------- + self : Styler + + Notes + ----- + The output shape of ``func`` should match the input, i.e. if + ``x`` is the input row, column, or table (depending on ``axis``), + then ``func(x.shape) == x.shape`` should be true. + + This is similar to ``DataFrame.apply``, except that ``axis=None`` + applies the function to the entire DataFrame at once, + rather than column-wise or row-wise. + + Examples + -------- + >>> def highlight_max(x): + ... return ['background-color: yellow' if v == x.max() else '' + for v in x] + ... + >>> df = pd.DataFrame(np.random.randn(5, 2)) + >>> df.style.apply(highlight_max) + """ + self._todo.append((lambda instance: getattr(instance, '_apply'), + (func, axis, subset), kwargs)) + return self + + def _applymap(self, func, subset=None, **kwargs): + func = partial(func, **kwargs) # applymap doesn't take kwargs? + if subset is None: + subset = pd.IndexSlice[:] + subset = _non_reducing_slice(subset) + result = self.data.loc[subset].applymap(func) + self._update_ctx(result) + return self + + def applymap(self, func, subset=None, **kwargs): + """ + Apply a function elementwise, updating the HTML + representation with the result. + + Parameters + ---------- + func : function + ``func`` should take a scalar and return a scalar + subset : IndexSlice + a valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice + kwargs : dict + pass along to ``func`` + + Returns + ------- + self : Styler + + See Also + -------- + Styler.where + + """ + self._todo.append((lambda instance: getattr(instance, '_applymap'), + (func, subset), kwargs)) + return self + + def where(self, cond, value, other=None, subset=None, **kwargs): + """ + Apply a function elementwise, updating the HTML + representation with a style which is selected in + accordance with the return value of a function. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + cond : callable + ``cond`` should take a scalar and return a boolean + value : str + applied when ``cond`` returns true + other : str + applied when ``cond`` returns false + subset : IndexSlice + a valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice + kwargs : dict + pass along to ``cond`` + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap + + """ + + if other is None: + other = '' + + return self.applymap(lambda val: value if cond(val) else other, + subset=subset, **kwargs) + + def set_precision(self, precision): + """ + Set the precision used to render. + + Parameters + ---------- + precision: int + + Returns + ------- + self : Styler + """ + self.precision = precision + return self + + def set_table_attributes(self, attributes): + """ + Set the table attributes. These are the items + that show up in the opening ```` tag in addition + to to automatic (by default) id. + + Parameters + ---------- + attributes : string + + Returns + ------- + self : Styler + + Examples + -------- + >>> df = pd.DataFrame(np.random.randn(10, 4)) + >>> df.style.set_table_attributes('class="pure-table"') + # ...
... + """ + self.table_attributes = attributes + return self + + def export(self): + """ + Export the styles to applied to the current Styler. + Can be applied to a second style with ``Styler.use``. + + Returns + ------- + styles: list + + See Also + -------- + Styler.use + """ + return self._todo + + def use(self, styles): + """ + Set the styles on the current Styler, possibly using styles + from ``Styler.export``. + + Parameters + ---------- + styles: list + list of style functions + + Returns + ------- + self : Styler + + See Also + -------- + Styler.export + """ + self._todo.extend(styles) + return self + + def set_uuid(self, uuid): + """ + Set the uuid for a Styler. + + Parameters + ---------- + uuid: str + + Returns + ------- + self : Styler + """ + self.uuid = uuid + return self + + def set_caption(self, caption): + """ + Set the caption on a Styler + + Parameters + ---------- + caption: str + + Returns + ------- + self : Styler + """ + self.caption = caption + return self + + def set_table_styles(self, table_styles): + """ + Set the table styles on a Styler. These are placed in a + `` +{%- endblock style %} +{%- block before_table %}{% endblock before_table %} +{%- block table %} +
+{%- block caption %} +{%- if caption -%} + +{%- endif -%} +{%- endblock caption %} +{%- block thead %} + + {%- block before_head_rows %}{% endblock %} + {%- for r in head %} + {%- block head_tr scoped %} + + {%- for c in r %} + {%- if c.is_visible != False %} + <{{ c.type }} class="{{c.class}}" {{ c.attributes|join(" ") }}>{{c.value}} + {%- endif %} + {%- endfor %} + + {%- endblock head_tr %} + {%- endfor %} + {%- block after_head_rows %}{% endblock %} + +{%- endblock thead %} +{%- block tbody %} + + {%- block before_rows %}{%- endblock before_rows %} + {%- for r in body %} + {%- block tr scoped %} + + {%- for c in r %} + {%- if c.is_visible != False %} + <{{ c.type }} id="T_{{ uuid }}{{ c.id }}" class="{{ c.class }}" {{ c.attributes|join(" ") }}>{{ c.display_value }} + {%- endif %} + {%- endfor %} + + {%- endblock tr %} + {%- endfor %} + {%- block after_rows %}{%- endblock after_rows %} + +{%- endblock tbody %} +
{{caption}}
+{%- endblock table %} +{%- block after_table %}{% endblock after_table %} diff --git a/pandas/util/terminal.py b/pandas/io/formats/terminal.py similarity index 95% rename from pandas/util/terminal.py rename to pandas/io/formats/terminal.py index 6b8428ff75806..4bcb28fa59b86 100644 --- a/pandas/util/terminal.py +++ b/pandas/io/formats/terminal.py @@ -14,6 +14,8 @@ from __future__ import print_function import os +import sys +import shutil __all__ = ['get_terminal_size'] @@ -26,6 +28,10 @@ def get_terminal_size(): IPython zmq frontends, or IDLE do not run in a terminal, """ import platform + + if sys.version_info[0] >= 3: + return shutil.get_terminal_size() + current_os = platform.system() tuple_xy = None if current_os == 'Windows': @@ -115,6 +121,7 @@ def ioctl_GWINSZ(fd): return None return int(cr[1]), int(cr[0]) + if __name__ == "__main__": sizex, sizey = get_terminal_size() - print('width = %s height = %s' % (sizex, sizey)) + print('width = {w} height = {h}'.format(w=sizex, h=sizey)) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 169a2b1df9b4c..f9bc6ae1a5451 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,637 +1,24 @@ -import warnings -from datetime import datetime -import json -import logging -from time import sleep -import uuid -import time -import sys +""" Google BigQuery support """ -import numpy as np - -from distutils.version import StrictVersion -from pandas import compat, DataFrame, concat -from pandas.core.common import PandasError -from pandas.compat import lzip, bytes_to_str - - -def _check_google_client_version(): +def _try_import(): + # since pandas is a dependency of pandas-gbq + # we need to import on first use try: - import pkg_resources - + import pandas_gbq except ImportError: - raise ImportError('Could not import pkg_resources (setuptools).') - - if compat.PY3: - google_api_minimum_version = '1.4.1' - else: - google_api_minimum_version = '1.2.0' - - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( - 'google-api-python-client').version - - if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) < - StrictVersion(google_api_minimum_version)): - raise ImportError("pandas requires google-api-python-client >= {0} " - "for Google BigQuery support, " - "current version {1}" - .format(google_api_minimum_version, - _GOOGLE_API_CLIENT_VERSION)) - - -def _test_google_api_imports(): - - try: - import httplib2 # noqa - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - except: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - from oauth2client.client import AccessTokenRefreshError # noqa - from oauth2client.client import OAuth2WebServerFlow # noqa - from oauth2client.file import Storage # noqa - from oauth2client.tools import run_flow, argparser # noqa - except ImportError as e: - raise ImportError("Missing module required for Google BigQuery " - "support: {0}".format(str(e))) - -logger = logging.getLogger('pandas.io.gbq') -logger.setLevel(logging.ERROR) - - -class InvalidPrivateKeyFormat(PandasError, ValueError): - """ - Raised when provided private key has invalid format. - """ - pass - - -class AccessDenied(PandasError, ValueError): - """ - Raised when invalid credentials are provided, or tokens have expired. - """ - pass - - -class DatasetCreationError(PandasError, ValueError): - """ - Raised when the create dataset method fails - """ - pass - - -class GenericGBQException(PandasError, ValueError): - """ - Raised when an unrecognized Google API Error occurs. - """ - pass - - -class InvalidColumnOrder(PandasError, ValueError): - """ - Raised when the provided column order for output - results DataFrame does not match the schema - returned by BigQuery. - """ - pass - - -class InvalidPageToken(PandasError, ValueError): - """ - Raised when Google BigQuery fails to return, - or returns a duplicate page token. - """ - pass - - -class InvalidSchema(PandasError, ValueError): - """ - Raised when the provided DataFrame does - not match the schema of the destination - table in BigQuery. - """ - pass - - -class NotFoundException(PandasError, ValueError): - """ - Raised when the project_id, table or dataset provided in the query could - not be found. - """ - pass - - -class StreamingInsertError(PandasError, ValueError): - """ - Raised when BigQuery reports a streaming insert error. - For more information see `Streaming Data Into BigQuery - `__ - """ - - -class TableCreationError(PandasError, ValueError): - """ - Raised when the create table method fails - """ - pass - - -class GbqConnector(object): - scope = 'https://www.googleapis.com/auth/bigquery' - - def __init__(self, project_id, reauth=False, verbose=False, - private_key=None, dialect='legacy'): - _check_google_client_version() - _test_google_api_imports() - self.project_id = project_id - self.reauth = reauth - self.verbose = verbose - self.private_key = private_key - self.dialect = dialect - self.credentials = self.get_credentials() - self.service = self.get_service() - - def get_credentials(self): - if self.private_key: - return self.get_service_account_credentials() - else: - # Try to retrieve Application Default Credentials - credentials = self.get_application_default_credentials() - if not credentials: - credentials = self.get_user_account_credentials() - return credentials - - def get_application_default_credentials(self): - """ - This method tries to retrieve the "default application credentials". - This could be useful for running code on Google Cloud Platform. - - .. versionadded:: 0.19.0 - - Parameters - ---------- - None - - Returns - ------- - - GoogleCredentials, - If the default application credentials can be retrieved - from the environment. The retrieved credentials should also - have access to the project (self.project_id) on BigQuery. - - OR None, - If default application credentials can not be retrieved - from the environment. Or, the retrieved credentials do not - have access to the project (self.project_id) on BigQuery. - """ - import httplib2 - try: - from googleapiclient.discovery import build - except ImportError: - from apiclient.discovery import build - try: - from oauth2client.client import GoogleCredentials - except ImportError: - return None - - try: - credentials = GoogleCredentials.get_application_default() - except: - return None - - http = httplib2.Http() - try: - http = credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - # Check if the application has rights to the BigQuery project - jobs = bigquery_service.jobs() - job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} - jobs.insert(projectId=self.project_id, body=job_data).execute() - return credentials - except: - return None - - def get_user_account_credentials(self): - from oauth2client.client import OAuth2WebServerFlow - from oauth2client.file import Storage - from oauth2client.tools import run_flow, argparser - - flow = OAuth2WebServerFlow( - client_id=('495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd' - '.apps.googleusercontent.com'), - client_secret='kOc9wMptUtxkcIFbtZCcrEAc', - scope=self.scope, - redirect_uri='urn:ietf:wg:oauth:2.0:oob') - - storage = Storage('bigquery_credentials.dat') - credentials = storage.get() - - if credentials is None or credentials.invalid or self.reauth: - credentials = run_flow(flow, storage, argparser.parse_args([])) - - return credentials - - def get_service_account_credentials(self): - # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 - # We need to know that a supported version of oauth2client is installed - # Test that either of the following is installed: - # - SignedJwtAssertionCredentials from oauth2client.client - # - ServiceAccountCredentials from oauth2client.service_account - # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0 - # ServiceAccountCredentials is available in oauthclient >= 2.0.0 - oauth2client_v1 = True - oauth2client_v2 = True - - try: - from oauth2client.client import SignedJwtAssertionCredentials - except ImportError: - oauth2client_v1 = False - - try: - from oauth2client.service_account import ServiceAccountCredentials - except ImportError: - oauth2client_v2 = False - - if not oauth2client_v1 and not oauth2client_v2: - raise ImportError("Missing oauth2client required for BigQuery " - "service account support") - - from os.path import isfile - - try: - if isfile(self.private_key): - with open(self.private_key) as f: - json_key = json.loads(f.read()) - else: - # ugly hack: 'private_key' field has new lines inside, - # they break json parser, but we need to preserve them - json_key = json.loads(self.private_key.replace('\n', ' ')) - json_key['private_key'] = json_key['private_key'].replace( - ' ', '\n') - - if compat.PY3: - json_key['private_key'] = bytes( - json_key['private_key'], 'UTF-8') - - if oauth2client_v1: - return SignedJwtAssertionCredentials( - json_key['client_email'], - json_key['private_key'], - self.scope, - ) - else: - return ServiceAccountCredentials.from_json_keyfile_dict( - json_key, - self.scope) - except (KeyError, ValueError, TypeError, AttributeError): - raise InvalidPrivateKeyFormat( - "Private key is missing or invalid. It should be service " - "account private key JSON (file path or string contents) " - "with at least two keys: 'client_email' and 'private_key'. " - "Can be obtained from: https://console.developers.google." - "com/permissions/serviceaccounts") - - def _print(self, msg, end='\n'): - if self.verbose: - sys.stdout.write(msg + end) - sys.stdout.flush() - - def _start_timer(self): - self.start = time.time() - - def get_elapsed_seconds(self): - return round(time.time() - self.start, 2) - - def print_elapsed_seconds(self, prefix='Elapsed', postfix='s.', - overlong=7): - sec = self.get_elapsed_seconds() - if sec > overlong: - self._print('{} {} {}'.format(prefix, sec, postfix)) - - # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size - @staticmethod - def sizeof_fmt(num, suffix='b'): - fmt = "%3.1f %s%s" - for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']: - if abs(num) < 1024.0: - return fmt % (num, unit, suffix) - num /= 1024.0 - return fmt % (num, 'Y', suffix) - - def get_service(self): - import httplib2 - try: - from googleapiclient.discovery import build - except: - from apiclient.discovery import build - - http = httplib2.Http() - http = self.credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - - return bigquery_service - - @staticmethod - def process_http_error(ex): - # See `BigQuery Troubleshooting Errors - # `__ - - status = json.loads(bytes_to_str(ex.content))['error'] - errors = status.get('errors', None) - - if errors: - for error in errors: - reason = error['reason'] - message = error['message'] - - raise GenericGBQException( - "Reason: {0}, Message: {1}".format(reason, message)) - - raise GenericGBQException(errors) - - def process_insert_errors(self, insert_errors): - for insert_error in insert_errors: - row = insert_error['index'] - errors = insert_error.get('errors', None) - for error in errors: - reason = error['reason'] - message = error['message'] - location = error['location'] - error_message = ('Error at Row: {0}, Reason: {1}, ' - 'Location: {2}, Message: {3}' - .format(row, reason, location, message)) - - # Report all error messages if verbose is set - if self.verbose: - self._print(error_message) - else: - raise StreamingInsertError(error_message + - '\nEnable verbose logging to ' - 'see all errors') - - raise StreamingInsertError - - def run_query(self, query, **kwargs): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - from oauth2client.client import AccessTokenRefreshError - - _check_google_client_version() - - job_collection = self.service.jobs() - - job_config = { - 'query': { - 'query': query, - 'useLegacySql': self.dialect == 'legacy' - # 'allowLargeResults', 'createDisposition', - # 'preserveNulls', destinationTable, useQueryCache - } - } - config = kwargs.get('configuration') - if config is not None: - if len(config) != 1: - raise ValueError("Only one job type must be specified, but " - "given {}".format(','.join(config.keys()))) - if 'query' in config: - if 'query' in config['query'] and query is not None: - raise ValueError("Query statement can't be specified " - "inside config while it is specified " - "as parameter") - - job_config['query'].update(config['query']) - else: - raise ValueError("Only 'query' job type is supported") - - job_data = { - 'configuration': job_config - } - - self._start_timer() - try: - self._print('Requesting query... ', end="") - query_reply = job_collection.insert( - projectId=self.project_id, body=job_data).execute() - self._print('ok.\nQuery running...') - except (AccessTokenRefreshError, ValueError): - if self.private_key: - raise AccessDenied( - "The service account credentials are not valid") - else: - raise AccessDenied( - "The credentials have been revoked or expired, " - "please re-run the application to re-authorize") - except HttpError as ex: - self.process_http_error(ex) - - job_reference = query_reply['jobReference'] - - while not query_reply.get('jobComplete', False): - self.print_elapsed_seconds(' Elapsed', 's. Waiting...') - try: - query_reply = job_collection.getQueryResults( - projectId=job_reference['projectId'], - jobId=job_reference['jobId']).execute() - except HttpError as ex: - self.process_http_error(ex) - - if self.verbose: - if query_reply['cacheHit']: - self._print('Query done.\nCache hit.\n') - else: - bytes_processed = int(query_reply.get( - 'totalBytesProcessed', '0')) - self._print('Query done.\nProcessed: {}\n'.format( - self.sizeof_fmt(bytes_processed))) - - self._print('Retrieving results...') - - total_rows = int(query_reply['totalRows']) - result_pages = list() - seen_page_tokens = list() - current_row = 0 - # Only read schema on first page - schema = query_reply['schema'] - - # Loop through each page of data - while 'rows' in query_reply and current_row < total_rows: - page = query_reply['rows'] - result_pages.append(page) - current_row += len(page) - - self.print_elapsed_seconds( - ' Got page: {}; {}% done. Elapsed'.format( - len(result_pages), - round(100.0 * current_row / total_rows))) - - if current_row == total_rows: - break - - page_token = query_reply.get('pageToken', None) - - if not page_token and current_row < total_rows: - raise InvalidPageToken("Required pageToken was missing. " - "Received {0} of {1} rows" - .format(current_row, total_rows)) - - elif page_token in seen_page_tokens: - raise InvalidPageToken("A duplicate pageToken was returned") - - seen_page_tokens.append(page_token) - - try: - query_reply = job_collection.getQueryResults( - projectId=job_reference['projectId'], - jobId=job_reference['jobId'], - pageToken=page_token).execute() - except HttpError as ex: - self.process_http_error(ex) - - if current_row < total_rows: - raise InvalidPageToken() - - # print basic query stats - self._print('Got {} rows.\n'.format(total_rows)) - - return schema, result_pages - - def load_data(self, dataframe, dataset_id, table_id, chunksize): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - - job_id = uuid.uuid4().hex - rows = [] - remaining_rows = len(dataframe) - - total_rows = remaining_rows - self._print("\n\n") - - for index, row in dataframe.reset_index(drop=True).iterrows(): - row_dict = dict() - row_dict['json'] = json.loads(row.to_json(force_ascii=False, - date_unit='s', - date_format='iso')) - row_dict['insertId'] = job_id + str(index) - rows.append(row_dict) - remaining_rows -= 1 - - if (len(rows) % chunksize == 0) or (remaining_rows == 0): - self._print("\rStreaming Insert is {0}% Complete".format( - ((total_rows - remaining_rows) * 100) / total_rows)) - - body = {'rows': rows} - - try: - response = self.service.tabledata().insertAll( - projectId=self.project_id, - datasetId=dataset_id, - tableId=table_id, - body=body).execute() - except HttpError as ex: - self.process_http_error(ex) - - # For streaming inserts, even if you receive a success HTTP - # response code, you'll need to check the insertErrors property - # of the response to determine if the row insertions were - # successful, because it's possible that BigQuery was only - # partially successful at inserting the rows. See the `Success - # HTTP Response Codes - # `__ - # section - - insert_errors = response.get('insertErrors', None) - if insert_errors: - self.process_insert_errors(insert_errors) - - sleep(1) # Maintains the inserts "per second" rate per API - rows = [] - - self._print("\n") - - def verify_schema(self, dataset_id, table_id, schema): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - - try: - remote_schema = self.service.tables().get( - projectId=self.project_id, - datasetId=dataset_id, - tableId=table_id).execute()['schema'] - - fields_remote = set([json.dumps(field_remote) - for field_remote in remote_schema['fields']]) - fields_local = set(json.dumps(field_local) - for field_local in schema['fields']) - - return fields_remote == fields_local - except HttpError as ex: - self.process_http_error(ex) - - def delete_and_recreate_table(self, dataset_id, table_id, table_schema): - delay = 0 - # Changes to table schema may take up to 2 minutes as of May 2015 See - # `Issue 191 - # `__ - # Compare previous schema with new schema to determine if there should - # be a 120 second delay + # give a nice error message + raise ImportError("Load data from Google BigQuery\n" + "\n" + "the pandas-gbq package is not installed\n" + "see the docs: https://pandas-gbq.readthedocs.io\n" + "\n" + "you can install via pip or conda:\n" + "pip install pandas-gbq\n" + "conda install pandas-gbq -c conda-forge\n") - if not self.verify_schema(dataset_id, table_id, table_schema): - self._print('The existing table has a different schema. Please ' - 'wait 2 minutes. See Google BigQuery issue #191') - delay = 120 - - table = _Table(self.project_id, dataset_id, - private_key=self.private_key) - table.delete(table_id) - table.create(table_id, table_schema) - sleep(delay) - - -def _parse_data(schema, rows): - # see: - # http://pandas.pydata.org/pandas-docs/dev/missing_data.html - # #missing-data-casting-rules-and-indexing - dtype_map = {'FLOAT': np.dtype(float), - 'TIMESTAMP': 'M8[ns]'} - - fields = schema['fields'] - col_types = [field['type'] for field in fields] - col_names = [str(field['name']) for field in fields] - col_dtypes = [dtype_map.get(field['type'], object) for field in fields] - page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes)) - for row_num, raw_row in enumerate(rows): - entries = raw_row.get('f', []) - for col_num, field_type in enumerate(col_types): - field_value = _parse_entry(entries[col_num].get('v', ''), - field_type) - page_array[row_num][col_num] = field_value - - return DataFrame(page_array, columns=col_names) - - -def _parse_entry(field_value, field_type): - if field_value is None or field_value == 'null': - return None - if field_type == 'INTEGER': - return int(field_value) - elif field_type == 'FLOAT': - return float(field_value) - elif field_type == 'TIMESTAMP': - timestamp = datetime.utcfromtimestamp(float(field_value)) - return np.datetime64(timestamp) - elif field_type == 'BOOLEAN': - return field_value == 'true' - return field_value + return pandas_gbq def read_gbq(query, project_id=None, index_col=None, col_order=None, @@ -639,14 +26,11 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, **kwargs): r"""Load data from Google BigQuery. - THIS IS AN EXPERIMENTAL LIBRARY - The main method a user calls to execute a Query in Google BigQuery and read results into a pandas DataFrame. - Google BigQuery API Client Library v2 for Python is used. - Documentation is available at - https://developers.google.com/api-client-library/python/apis/bigquery/v2 + This function requires the `pandas-gbq package + `__. Authentication to the Google BigQuery service is via OAuth 2.0. @@ -654,8 +38,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, By default "application default credentials" are used. - .. versionadded:: 0.19.0 - If default application credentials are not found or are restrictive, user account credentials are used. In this case, you will be asked to grant permissions for product name 'pandas GBQ'. @@ -683,29 +65,23 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, private_key : str (optional) Service account private key in JSON format. Can be file path or string contents. This is useful for remote server - authentication (eg. jupyter iPython notebook on remote host) - - .. versionadded:: 0.18.1 + authentication (eg. Jupyter/IPython notebook on remote host) dialect : {'legacy', 'standard'}, default 'legacy' 'legacy' : Use BigQuery's legacy SQL dialect. - 'standard' : Use BigQuery's standard SQL (beta), which is + 'standard' : Use BigQuery's standard SQL, which is compliant with the SQL 2011 standard. For more information see `BigQuery SQL Reference `__ - .. versionadded:: 0.19.0 - - **kwargs : Arbitrary keyword arguments + `**kwargs` : Arbitrary keyword arguments configuration (dict): query config parameters for job processing. For example: configuration = {'query': {'useQueryCache': False}} For more information see `BigQuery SQL Reference - ` - - .. versionadded:: 0.20.0 + `__ Returns ------- @@ -713,446 +89,20 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, DataFrame representing results of query """ - - if not project_id: - raise TypeError("Missing required parameter: project_id") - - if dialect not in ('legacy', 'standard'): - raise ValueError("'{0}' is not valid for dialect".format(dialect)) - - connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key, - dialect=dialect) - schema, pages = connector.run_query(query, **kwargs) - dataframe_list = [] - while len(pages) > 0: - page = pages.pop() - dataframe_list.append(_parse_data(schema, page)) - - if len(dataframe_list) > 0: - final_df = concat(dataframe_list, ignore_index=True) - else: - final_df = _parse_data(schema, []) - - # Reindex the DataFrame on the provided column - if index_col is not None: - if index_col in final_df.columns: - final_df.set_index(index_col, inplace=True) - else: - raise InvalidColumnOrder( - 'Index column "{0}" does not exist in DataFrame.' - .format(index_col) - ) - - # Change the order of columns in the DataFrame based on provided list - if col_order is not None: - if sorted(col_order) == sorted(final_df.columns): - final_df = final_df[col_order] - else: - raise InvalidColumnOrder( - 'Column order does not match this DataFrame.' - ) - - # cast BOOLEAN and INTEGER columns from object to bool/int - # if they dont have any nulls - type_map = {'BOOLEAN': bool, 'INTEGER': int} - for field in schema['fields']: - if field['type'] in type_map and \ - final_df[field['name']].notnull().all(): - final_df[field['name']] = \ - final_df[field['name']].astype(type_map[field['type']]) - - connector.print_elapsed_seconds( - 'Total time taken', - datetime.now().strftime('s.\nFinished at %Y-%m-%d %H:%M:%S.'), - 0 - ) - - return final_df + pandas_gbq = _try_import() + return pandas_gbq.read_gbq( + query, project_id=project_id, + index_col=index_col, col_order=col_order, + reauth=reauth, verbose=verbose, + private_key=private_key, + dialect=dialect, + **kwargs) def to_gbq(dataframe, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None): - """Write a DataFrame to a Google BigQuery table. - - THIS IS AN EXPERIMENTAL LIBRARY - - The main method a user calls to export pandas DataFrame contents to - Google BigQuery table. - - Google BigQuery API Client Library v2 for Python is used. - Documentation is available at - https://developers.google.com/api-client-library/python/apis/bigquery/v2 - - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - .. versionadded:: 0.19.0 - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: - - Service account credentials will be used to authenticate. - - Parameters - ---------- - dataframe : DataFrame - DataFrame to be written - destination_table : string - Name of table to be written, in the form 'dataset.tablename' - project_id : str - Google BigQuery Account project ID. - chunksize : int (default 10000) - Number of rows to be inserted in each chunk from the dataframe. - verbose : boolean (default True) - Show percentage complete - reauth : boolean (default False) - Force Google BigQuery to reauthenticate the user. This is useful - if multiple accounts are used. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - 'fail': If table exists, do nothing. - 'replace': If table exists, drop it, recreate it, and insert data. - 'append': If table exists, insert data. Create if does not exist. - private_key : str (optional) - Service account private key in JSON format. Can be file path - or string contents. This is useful for remote server - authentication (eg. jupyter iPython notebook on remote host) - """ - - if if_exists not in ('fail', 'replace', 'append'): - raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) - - if '.' not in destination_table: - raise NotFoundException( - "Invalid Table Name. Should be of the form 'datasetId.tableId' ") - - connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key) - dataset_id, table_id = destination_table.rsplit('.', 1) - - table = _Table(project_id, dataset_id, reauth=reauth, - private_key=private_key) - - table_schema = _generate_bq_schema(dataframe) - - # If table exists, check if_exists parameter - if table.exists(table_id): - if if_exists == 'fail': - raise TableCreationError("Could not create the table because it " - "already exists. " - "Change the if_exists parameter to " - "append or replace data.") - elif if_exists == 'replace': - connector.delete_and_recreate_table( - dataset_id, table_id, table_schema) - elif if_exists == 'append': - if not connector.verify_schema(dataset_id, table_id, table_schema): - raise InvalidSchema("Please verify that the structure and " - "data types in the DataFrame match the " - "schema of the destination table.") - else: - table.create(table_id, table_schema) - - connector.load_data(dataframe, dataset_id, table_id, chunksize) - - -def generate_bq_schema(df, default_type='STRING'): - # deprecation TimeSeries, #11121 - warnings.warn("generate_bq_schema is deprecated and will be removed in " - "a future version", FutureWarning, stacklevel=2) - - return _generate_bq_schema(df, default_type=default_type) - - -def _generate_bq_schema(df, default_type='STRING'): - """ Given a passed df, generate the associated Google BigQuery schema. - - Parameters - ---------- - df : DataFrame - default_type : string - The default big query type in case the type of the column - does not exist in the schema. - """ - - type_mapping = { - 'i': 'INTEGER', - 'b': 'BOOLEAN', - 'f': 'FLOAT', - 'O': 'STRING', - 'S': 'STRING', - 'U': 'STRING', - 'M': 'TIMESTAMP' - } - - fields = [] - for column_name, dtype in df.dtypes.iteritems(): - fields.append({'name': column_name, - 'type': type_mapping.get(dtype.kind, default_type)}) - - return {'fields': fields} - - -class _Table(GbqConnector): - - def __init__(self, project_id, dataset_id, reauth=False, verbose=False, - private_key=None): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - self.http_error = HttpError - self.dataset_id = dataset_id - super(_Table, self).__init__(project_id, reauth, verbose, private_key) - - def exists(self, table_id): - """ Check if a table exists in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be verified - - Returns - ------- - boolean - true if table exists, otherwise false - """ - - try: - self.service.tables().get( - projectId=self.project_id, - datasetId=self.dataset_id, - tableId=table_id).execute() - return True - except self.http_error as ex: - if ex.resp.status == 404: - return False - else: - self.process_http_error(ex) - - def create(self, table_id, schema): - """ Create a table in Google BigQuery given a table and schema - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be written - schema : str - Use the generate_bq_schema to generate your table schema from a - dataframe. - """ - - if self.exists(table_id): - raise TableCreationError( - "The table could not be created because it already exists") - - if not _Dataset(self.project_id, - private_key=self.private_key).exists(self.dataset_id): - _Dataset(self.project_id, - private_key=self.private_key).create(self.dataset_id) - - body = { - 'schema': schema, - 'tableReference': { - 'tableId': table_id, - 'projectId': self.project_id, - 'datasetId': self.dataset_id - } - } - - try: - self.service.tables().insert( - projectId=self.project_id, - datasetId=self.dataset_id, - body=body).execute() - except self.http_error as ex: - self.process_http_error(ex) - - def delete(self, table_id): - """ Delete a table in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be deleted - """ - - if not self.exists(table_id): - raise NotFoundException("Table does not exist") - - try: - self.service.tables().delete( - datasetId=self.dataset_id, - projectId=self.project_id, - tableId=table_id).execute() - except self.http_error as ex: - self.process_http_error(ex) - - -class _Dataset(GbqConnector): - - def __init__(self, project_id, reauth=False, verbose=False, - private_key=None): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - self.http_error = HttpError - super(_Dataset, self).__init__(project_id, reauth, verbose, - private_key) - - def exists(self, dataset_id): - """ Check if a dataset exists in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset_id : str - Name of dataset to be verified - - Returns - ------- - boolean - true if dataset exists, otherwise false - """ - - try: - self.service.datasets().get( - projectId=self.project_id, - datasetId=dataset_id).execute() - return True - except self.http_error as ex: - if ex.resp.status == 404: - return False - else: - self.process_http_error(ex) - - def datasets(self): - """ Return a list of datasets in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - None - - Returns - ------- - list - List of datasets under the specific project - """ - - try: - list_dataset_response = self.service.datasets().list( - projectId=self.project_id).execute().get('datasets', None) - - if not list_dataset_response: - return [] - - dataset_list = list() - - for row_num, raw_row in enumerate(list_dataset_response): - dataset_list.append(raw_row['datasetReference']['datasetId']) - - return dataset_list - except self.http_error as ex: - self.process_http_error(ex) - - def create(self, dataset_id): - """ Create a dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to be written - """ - - if self.exists(dataset_id): - raise DatasetCreationError( - "The dataset could not be created because it already exists") - - body = { - 'datasetReference': { - 'projectId': self.project_id, - 'datasetId': dataset_id - } - } - - try: - self.service.datasets().insert( - projectId=self.project_id, - body=body).execute() - except self.http_error as ex: - self.process_http_error(ex) - - def delete(self, dataset_id): - """ Delete a dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to be deleted - """ - - if not self.exists(dataset_id): - raise NotFoundException( - "Dataset {0} does not exist".format(dataset_id)) - - try: - self.service.datasets().delete( - datasetId=dataset_id, - projectId=self.project_id).execute() - - except self.http_error as ex: - self.process_http_error(ex) - - def tables(self, dataset_id): - """ List tables in the specific dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to list tables for - - Returns - ------- - list - List of tables under the specific dataset - """ - - try: - list_table_response = self.service.tables().list( - projectId=self.project_id, - datasetId=dataset_id).execute().get('tables', None) - - if not list_table_response: - return [] - - table_list = list() - - for row_num, raw_row in enumerate(list_table_response): - table_list.append(raw_row['tableReference']['tableId']) - - return table_list - except self.http_error as ex: - self.process_http_error(ex) + pandas_gbq = _try_import() + pandas_gbq.to_gbq(dataframe, destination_table, project_id, + chunksize=chunksize, + verbose=verbose, reauth=reauth, + if_exists=if_exists, private_key=private_key) diff --git a/pandas/io/html.py b/pandas/io/html.py index 3c38dae91eb89..300a5a151f5d2 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -12,15 +12,16 @@ import numpy as np -from pandas.types.common import is_list_like -from pandas.io.common import (EmptyDataError, _is_url, urlopen, +from pandas.core.dtypes.common import is_list_like +from pandas.errors import EmptyDataError +from pandas.io.common import (_is_url, urlopen, parse_url, _validate_header_arg) from pandas.io.parsers import TextParser from pandas.compat import (lrange, lmap, u, string_types, iteritems, raise_with_traceback, binary_type) from pandas import Series -from pandas.core.common import AbstractMethodError -from pandas.formats.printing import pprint_thing +import pandas.core.common as com +from pandas.io.formats.printing import pprint_thing _IMPORTS = False _HAS_BS4 = False @@ -36,8 +37,6 @@ def _importers(): if _IMPORTS: return - _IMPORTS = True - global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB try: @@ -58,6 +57,8 @@ def _importers(): except ImportError: pass + _IMPORTS = True + ############# # READ HTML # @@ -159,6 +160,14 @@ class _HtmlFrameParser(object): attrs : dict List of HTML element attributes to match. + encoding : str + Encoding to be used by parser + + displayed_only : bool + Whether or not items with "display:none" should be ignored + + .. versionadded:: 0.23.0 + Attributes ---------- io : str or file-like @@ -171,6 +180,14 @@ class _HtmlFrameParser(object): A dictionary of valid table attributes to use to search for table elements. + encoding : str + Encoding to be used by parser + + displayed_only : bool + Whether or not items with "display:none" should be ignored + + .. versionadded:: 0.23.0 + Notes ----- To subclass this class effectively you must override the following methods: @@ -186,11 +203,12 @@ class _HtmlFrameParser(object): functionality. """ - def __init__(self, io, match, attrs, encoding): + def __init__(self, io, match, attrs, encoding, displayed_only): self.io = io self.match = match self.attrs = attrs self.encoding = encoding + self.displayed_only = displayed_only def parse_tables(self): tables = self._parse_tables(self._build_doc(), self.match, self.attrs) @@ -233,7 +251,7 @@ def _text_getter(self, obj): text : str or unicode The text from an individual DOM node. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_td(self, obj): """Return the td elements from a row element. @@ -247,7 +265,7 @@ def _parse_td(self, obj): columns : list of node-like These are the elements of each row, i.e., the columns. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_tables(self, doc, match, attrs): """Return all tables from the parsed DOM. @@ -262,7 +280,7 @@ def _parse_tables(self, doc, match, attrs): attrs : dict A dictionary of table attributes that can be used to disambiguate - mutliple tables on a page. + multiple tables on a page. Raises ------ @@ -274,7 +292,7 @@ def _parse_tables(self, doc, match, attrs): tables : list of node-like A list of
elements to be parsed into raw data. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_tr(self, table): """Return the list of row elements from the parsed table element. @@ -289,7 +307,7 @@ def _parse_tr(self, table): rows : list of node-like A list row elements of a table, usually or ... element. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_tbody(self, table): """Return the body of the table. @@ -319,7 +337,7 @@ def _parse_tbody(self, table): tbody : node-like A ... element. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_tfoot(self, table): """Return the footer of the table if any. @@ -334,7 +352,7 @@ def _parse_tfoot(self, table): tfoot : node-like A ... element. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _build_doc(self): """Return a tree-like object that can be used to iterate over the DOM. @@ -343,7 +361,7 @@ def _build_doc(self): ------- obj : tree-like """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _build_table(self, table): header = self._parse_raw_thead(table) @@ -355,9 +373,12 @@ def _parse_raw_thead(self, table): thead = self._parse_thead(table) res = [] if thead: - res = lmap(self._text_getter, self._parse_th(thead[0])) - return np.atleast_1d( - np.array(res).squeeze()) if res and len(res) == 1 else res + trs = self._parse_tr(thead[0]) + for tr in trs: + cols = lmap(self._text_getter, self._parse_td(tr)) + if any(col != '' for col in cols): + res.append(cols) + return res def _parse_raw_tfoot(self, table): tfoot = self._parse_tfoot(table) @@ -376,6 +397,27 @@ def _parse_raw_tbody(self, table): res = self._parse_tr(table) return self._parse_raw_data(res) + def _handle_hidden_tables(self, tbl_list, attr_name): + """Returns list of tables, potentially removing hidden elements + + Parameters + ---------- + tbl_list : list of Tag or list of Element + Type of list elements will vary depending upon parser used + attr_name : str + Name of the accessor for retrieving HTML attributes + + Returns + ------- + list of Tag or list of Element + Return type matches `tbl_list` + """ + if not self.displayed_only: + return tbl_list + + return [x for x in tbl_list if "display:none" not in + getattr(x, attr_name).get('style', '').replace(" ", "")] + class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser): """HTML to DataFrame parser that uses BeautifulSoup under the hood. @@ -427,22 +469,29 @@ def _parse_tables(self, doc, match, attrs): result = [] unique_tables = set() + tables = self._handle_hidden_tables(tables, "attrs") for table in tables: + if self.displayed_only: + for elem in table.find_all( + style=re.compile(r"display:\s*none")): + elem.decompose() + if (table not in unique_tables and table.find(text=match) is not None): result.append(table) unique_tables.add(table) if not result: - raise ValueError("No tables found matching pattern %r" % - match.pattern) + raise ValueError("No tables found matching pattern {patt!r}" + .format(patt=match.pattern)) return result def _setup_build_doc(self): raw_text = _read(self.io) if not raw_text: - raise ValueError('No text parsed from document: %s' % self.io) + raise ValueError('No text parsed from document: {doc}' + .format(doc=self.io)) return raw_text def _build_doc(self): @@ -469,8 +518,8 @@ def _build_xpath_expr(attrs): if 'class_' in attrs: attrs['class'] = attrs.pop('class_') - s = [u("@%s=%r") % (k, v) for k, v in iteritems(attrs)] - return u('[%s]') % ' and '.join(s) + s = [u("@{key}={val!r}").format(key=k, val=v) for k, v in iteritems(attrs)] + return u('[{expr}]').format(expr=' and '.join(s)) _re_namespace = {'re': 'http://exslt.org/regular-expressions'} @@ -513,8 +562,8 @@ def _parse_tables(self, doc, match, kwargs): # 1. check all descendants for the given pattern and only search tables # 2. go up the tree until we find a table - query = '//table//*[re:test(text(), %r)]/ancestor::table' - xpath_expr = u(query) % pattern + query = '//table//*[re:test(text(), {patt!r})]/ancestor::table' + xpath_expr = u(query).format(patt=pattern) # if any table attributes were given build an xpath expression to # search for them @@ -523,8 +572,20 @@ def _parse_tables(self, doc, match, kwargs): tables = doc.xpath(xpath_expr, namespaces=_re_namespace) + tables = self._handle_hidden_tables(tables, "attrib") + if self.displayed_only: + for table in tables: + # lxml utilizes XPATH 1.0 which does not have regex + # support. As a result, we find all elements with a style + # attribute and iterate them to check for display:none + for elem in table.xpath('.//*[@style]'): + if "display:none" in elem.attrib.get( + "style", "").replace(" ", ""): + elem.getparent().remove(elem) + if not tables: - raise ValueError("No tables found matching regex %r" % pattern) + raise ValueError("No tables found matching regex {patt!r}" + .format(patt=pattern)) return tables def _build_doc(self): @@ -570,8 +631,9 @@ def _build_doc(self): scheme = parse_url(self.io).scheme if scheme not in _valid_schemes: # lxml can't parse it - msg = ('%r is not a valid url scheme, valid schemes are ' - '%s') % (scheme, _valid_schemes) + msg = (('{invalid!r} is not a valid url scheme, valid ' + 'schemes are {valid}') + .format(invalid=scheme, valid=_valid_schemes)) raise ValueError(msg) else: # something else happened: maybe a faulty connection @@ -591,9 +653,17 @@ def _parse_tfoot(self, table): return table.xpath('.//tfoot') def _parse_raw_thead(self, table): - expr = './/thead//th' - return [_remove_whitespace(x.text_content()) for x in - table.xpath(expr)] + expr = './/thead' + thead = table.xpath(expr) + res = [] + if thead: + trs = self._parse_tr(thead[0]) + for tr in trs: + cols = [_remove_whitespace(x.text_content()) for x in + self._parse_td(tr)] + if any(col != '' for col in cols): + res.append(cols) + return res def _parse_raw_tfoot(self, table): expr = './/tfoot//th|//tfoot//td' @@ -615,19 +685,17 @@ def _data_to_frame(**kwargs): head, body, foot = kwargs.pop('data') header = kwargs.pop('header') kwargs['skiprows'] = _get_skiprows(kwargs['skiprows']) - if head: - body = [head] + body - + rows = lrange(len(head)) + body = head + body if header is None: # special case when a table has ', result) - - def test_to_html_multiindex(self): - columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), - np.mod(lrange(4), 2))), - names=['CL0', 'CL1']) - df = DataFrame([list('abcd'), list('efgh')], columns=columns) - result = df.to_html(justify='left') - expected = ('
elements. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _parse_thead(self, table): """Return the header of a table. @@ -304,7 +322,7 @@ def _parse_thead(self, table): thead : node-like A
elements - header = 0 + header = 0 if rows == [0] else rows if foot: body += [foot] # fill out elements of body that are "ragged" _expand_elements(body) - tp = TextParser(body, header=header, **kwargs) df = tp.read() return df @@ -660,8 +728,9 @@ def _parser_dispatch(flavor): """ valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: - raise ValueError('%r is not a valid flavor, valid flavors are %s' % - (flavor, valid_parsers)) + raise ValueError('{invalid!r} is not a valid flavor, valid flavors ' + 'are {valid}' + .format(invalid=flavor, valid=valid_parsers)) if flavor in ('bs4', 'html5lib'): if not _HAS_HTML5LIB: @@ -670,7 +739,7 @@ def _parser_dispatch(flavor): raise ImportError( "BeautifulSoup4 (bs4) not found, please install it") import bs4 - if bs4.__version__ == LooseVersion('4.2.0'): + if LooseVersion(bs4.__version__) == LooseVersion('4.2.0'): raise ValueError("You're using a version" " of BeautifulSoup4 (4.2.0) that has been" " known to cause problems on certain" @@ -685,7 +754,7 @@ def _parser_dispatch(flavor): def _print_as_set(s): - return '{%s}' % ', '.join([pprint_thing(el) for el in s]) + return '{{arg}}'.format(arg=', '.join(pprint_thing(el) for el in s)) def _validate_flavor(flavor): @@ -695,25 +764,27 @@ def _validate_flavor(flavor): flavor = flavor, elif isinstance(flavor, collections.Iterable): if not all(isinstance(flav, string_types) for flav in flavor): - raise TypeError('Object of type %r is not an iterable of strings' % - type(flavor).__name__) + raise TypeError('Object of type {typ!r} is not an iterable of ' + 'strings' + .format(typ=type(flavor).__name__)) else: - fmt = '{0!r}' if isinstance(flavor, string_types) else '{0}' + fmt = '{flavor!r}' if isinstance(flavor, string_types) else '{flavor}' fmt += ' is not a valid flavor' - raise ValueError(fmt.format(flavor)) + raise ValueError(fmt.format(flavor=flavor)) flavor = tuple(flavor) valid_flavors = set(_valid_parsers) flavor_set = set(flavor) if not flavor_set & valid_flavors: - raise ValueError('%s is not a valid set of flavors, valid flavors are ' - '%s' % (_print_as_set(flavor_set), - _print_as_set(valid_flavors))) + raise ValueError('{invalid} is not a valid set of flavors, valid ' + 'flavors are {valid}' + .format(invalid=_print_as_set(flavor_set), + valid=_print_as_set(valid_flavors))) return flavor -def _parse(flavor, io, match, attrs, encoding, **kwargs): +def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs): flavor = _validate_flavor(flavor) compiled_match = re.compile(match) # you can pass a compiled regex here @@ -721,11 +792,23 @@ def _parse(flavor, io, match, attrs, encoding, **kwargs): retained = None for flav in flavor: parser = _parser_dispatch(flav) - p = parser(io, compiled_match, attrs, encoding) + p = parser(io, compiled_match, attrs, encoding, displayed_only) try: tables = p.parse_tables() except Exception as caught: + # if `io` is an io-like object, check if it's seekable + # and try to rewind it before trying the next parser + if hasattr(io, 'seekable') and io.seekable(): + io.seek(0) + elif hasattr(io, 'seekable') and not io.seekable(): + # if we couldn't rewind it, let the user know + raise ValueError('The flavor {} failed to parse your input. ' + 'Since you passed a non-rewindable file ' + 'object, we can\'t rewind it to try ' + 'another parser. Try read_html() with a ' + 'different flavor.'.format(flav)) + retained = caught else: break @@ -743,9 +826,9 @@ def _parse(flavor, io, match, attrs, encoding, **kwargs): def read_html(io, match='.+', flavor=None, header=None, index_col=None, skiprows=None, attrs=None, parse_dates=False, - tupleize_cols=False, thousands=',', encoding=None, + tupleize_cols=None, thousands=',', encoding=None, decimal='.', converters=None, na_values=None, - keep_default_na=True): + keep_default_na=True, displayed_only=True): r"""Read HTML tables into a ``list`` of ``DataFrame`` objects. Parameters @@ -812,6 +895,9 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, :class:`~pandas.MultiIndex`, otherwise return raw tuples. Defaults to ``False``. + .. deprecated:: 0.21.0 + This argument will be removed and will always convert to MultiIndex + thousands : str, optional Separator to use to parse thousands. Defaults to ``','``. @@ -846,6 +932,11 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, .. versionadded:: 0.19.0 + display_only : bool, default True + Whether elements with "display: none" should be parsed + + .. versionadded:: 0.23.0 + Returns ------- dfs : list of DataFrames @@ -853,7 +944,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, Notes ----- Before using this function you should read the :ref:`gotchas about the - HTML parsing libraries `. + HTML parsing libraries `. Expect to do some cleanup after you call this function. For example, you might need to manually assign column names if the column names are @@ -893,4 +984,5 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, parse_dates=parse_dates, tupleize_cols=tupleize_cols, thousands=thousands, attrs=attrs, encoding=encoding, decimal=decimal, converters=converters, na_values=na_values, - keep_default_na=keep_default_na) + keep_default_na=keep_default_na, + displayed_only=displayed_only) diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py index a9390a04cc2cd..32d110b3404a9 100644 --- a/pandas/io/json/__init__.py +++ b/pandas/io/json/__init__.py @@ -1,4 +1,5 @@ from .json import to_json, read_json, loads, dumps # noqa from .normalize import json_normalize # noqa +from .table_schema import build_table_schema # noqa -del json, normalize # noqa +del json, normalize, table_schema # noqa diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 6fc766081eefe..1627b2f4d3ec3 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -1,50 +1,71 @@ # pylint: disable-msg=E1101,W0613,W0603 - +from itertools import islice import os import numpy as np -import pandas.json as _json -from pandas.tslib import iNaT -from pandas.compat import StringIO, long, u -from pandas import compat, isnull -from pandas import Series, DataFrame, to_datetime -from pandas.io.common import get_filepath_or_buffer, _get_handle -from pandas.core.common import AbstractMethodError -from pandas.formats.printing import pprint_thing +import pandas._libs.json as json +from pandas._libs.tslib import iNaT +from pandas.compat import StringIO, long, u, to_str +from pandas import compat, isna +from pandas import Series, DataFrame, to_datetime, MultiIndex +from pandas.io.common import (get_filepath_or_buffer, _get_handle, + _infer_compression, _stringify_path, + BaseIterator) +from pandas.io.parsers import _validate_integer +import pandas.core.common as com +from pandas.core.reshape.concat import concat +from pandas.io.formats.printing import pprint_thing from .normalize import _convert_to_line_delimits +from .table_schema import build_table_schema, parse_table_schema +from pandas.core.dtypes.common import is_period_dtype + +loads = json.loads +dumps = json.dumps -loads = _json.loads -dumps = _json.dumps +TABLE_SCHEMA_VERSION = '0.20.0' # interface to/from def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False): + default_handler=None, lines=False, compression=None, + index=True): + + if not index and orient not in ['split', 'table']: + raise ValueError("'index=False' is only valid when 'orient' is " + "'split' or 'table'") + path_or_buf = _stringify_path(path_or_buf) if lines and orient != 'records': raise ValueError( "'lines' keyword only valid when 'orient' is records") - if isinstance(obj, Series): - s = SeriesWriter( - obj, orient=orient, date_format=date_format, - double_precision=double_precision, ensure_ascii=force_ascii, - date_unit=date_unit, default_handler=default_handler).write() + if orient == 'table' and isinstance(obj, Series): + obj = obj.to_frame(name=obj.name or 'values') + if orient == 'table' and isinstance(obj, DataFrame): + writer = JSONTableWriter + elif isinstance(obj, Series): + writer = SeriesWriter elif isinstance(obj, DataFrame): - s = FrameWriter( - obj, orient=orient, date_format=date_format, - double_precision=double_precision, ensure_ascii=force_ascii, - date_unit=date_unit, default_handler=default_handler).write() + writer = FrameWriter else: raise NotImplementedError("'obj' should be a Series or a DataFrame") + s = writer( + obj, orient=orient, date_format=date_format, + double_precision=double_precision, ensure_ascii=force_ascii, + date_unit=date_unit, default_handler=default_handler, + index=index).write() + if lines: s = _convert_to_line_delimits(s) if isinstance(path_or_buf, compat.string_types): - with open(path_or_buf, 'w') as fh: + fh, handles = _get_handle(path_or_buf, 'w', compression=compression) + try: fh.write(s) + finally: + fh.close() elif path_or_buf is None: return s else: @@ -54,7 +75,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', class Writer(object): def __init__(self, obj, orient, date_format, double_precision, - ensure_ascii, date_unit, default_handler=None): + ensure_ascii, date_unit, index, default_handler=None): self.obj = obj if orient is None: @@ -66,22 +87,30 @@ def __init__(self, obj, orient, date_format, double_precision, self.ensure_ascii = ensure_ascii self.date_unit = date_unit self.default_handler = default_handler + self.index = index self.is_copy = None self._format_axes() def _format_axes(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def write(self): + return self._write(self.obj, self.orient, self.double_precision, + self.ensure_ascii, self.date_unit, + self.date_format == 'iso', self.default_handler) + + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): return dumps( - self.obj, - orient=self.orient, - double_precision=self.double_precision, - ensure_ascii=self.ensure_ascii, - date_unit=self.date_unit, - iso_dates=self.date_format == 'iso', - default_handler=self.default_handler) + obj, + orient=orient, + double_precision=double_precision, + ensure_ascii=ensure_ascii, + date_unit=date_unit, + iso_dates=iso_dates, + default_handler=default_handler + ) class SeriesWriter(Writer): @@ -90,7 +119,16 @@ class SeriesWriter(Writer): def _format_axes(self): if not self.obj.index.is_unique and self.orient == 'index': raise ValueError("Series index must be unique for orient=" - "'%s'" % self.orient) + "'{orient}'".format(orient=self.orient)) + + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): + if not self.index and orient == 'split': + obj = {"name": obj.name, "data": obj.values} + return super(SeriesWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, default_handler) class FrameWriter(Writer): @@ -101,17 +139,92 @@ def _format_axes(self): if not self.obj.index.is_unique and self.orient in ( 'index', 'columns'): raise ValueError("DataFrame index must be unique for orient=" - "'%s'." % self.orient) + "'{orient}'.".format(orient=self.orient)) if not self.obj.columns.is_unique and self.orient in ( 'index', 'columns', 'records'): raise ValueError("DataFrame columns must be unique for orient=" - "'%s'." % self.orient) + "'{orient}'.".format(orient=self.orient)) + + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): + if not self.index and orient == 'split': + obj = obj.to_dict(orient='split') + del obj["index"] + return super(FrameWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, default_handler) + + +class JSONTableWriter(FrameWriter): + _default_orient = 'records' + + def __init__(self, obj, orient, date_format, double_precision, + ensure_ascii, date_unit, index, default_handler=None): + """ + Adds a `schema` attribute with the Table Schema, resets + the index (can't do in caller, because the schema inference needs + to know what the index is, forces orient to records, and forces + date_format to 'iso'. + """ + super(JSONTableWriter, self).__init__( + obj, orient, date_format, double_precision, ensure_ascii, + date_unit, index, default_handler=default_handler) + + if date_format != 'iso': + msg = ("Trying to write with `orient='table'` and " + "`date_format='{fmt}'`. Table Schema requires dates " + "to be formatted with `date_format='iso'`" + .format(fmt=date_format)) + raise ValueError(msg) + + self.schema = build_table_schema(obj, index=self.index) + + # NotImplementd on a column MultiIndex + if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): + raise NotImplementedError( + "orient='table' is not supported for MultiIndex") + + # TODO: Do this timedelta properly in objToJSON.c See GH #15137 + if ((obj.ndim == 1) and (obj.name in set(obj.index.names)) or + len(obj.columns & obj.index.names)): + msg = "Overlapping names between the index and columns" + raise ValueError(msg) + + obj = obj.copy() + timedeltas = obj.select_dtypes(include=['timedelta']).columns + if len(timedeltas): + obj[timedeltas] = obj[timedeltas].applymap( + lambda x: x.isoformat()) + # Convert PeriodIndex to datetimes before serialzing + if is_period_dtype(obj.index): + obj.index = obj.index.to_timestamp() + + # exclude index from obj if index=False + if not self.index: + self.obj = obj.reset_index(drop=True) + else: + self.obj = obj.reset_index(drop=False) + self.date_format = 'iso' + self.orient = 'records' + self.index = index + + def _write(self, obj, orient, double_precision, ensure_ascii, + date_unit, iso_dates, default_handler): + data = super(JSONTableWriter, self)._write(obj, orient, + double_precision, + ensure_ascii, date_unit, + iso_dates, + default_handler) + serialized = '{{"schema": {schema}, "data": {data}}}'.format( + schema=dumps(self.schema), data=data) + return serialized def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, - lines=False): + lines=False, chunksize=None, compression='infer'): """ Convert a JSON string to pandas object @@ -148,13 +261,16 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, * when ``typ == 'frame'``, - allowed orients are ``{'split','records','index', - 'columns','values'}`` + 'columns','values', 'table'}`` - default is ``'columns'`` - The DataFrame index must be unique for orients ``'index'`` and ``'columns'``. - The DataFrame columns must be unique for orients ``'index'``, ``'columns'``, and ``'records'``. + .. versionadded:: 0.23.0 + 'table' as an allowed value for the ``orient`` argument + typ : type of object to recover (series or frame), default 'frame' dtype : boolean or dict, default True If True, infer dtypes, if a dict of column to dtype, then use those, @@ -200,10 +316,40 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, .. versionadded:: 0.19.0 + chunksize: integer, default None + Return JsonReader object for iteration. + See the `line-delimted json docs + `_ + for more information on ``chunksize``. + This can only be passed if `lines=True`. + If this is None, the file will be read into memory all at once. + + .. versionadded:: 0.21.0 + + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer', then use + gzip, bz2, zip or xz if path_or_buf is a string ending in + '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression + otherwise. If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. + + .. versionadded:: 0.21.0 + Returns ------- result : Series or DataFrame, depending on the value of `typ`. + Notes + ----- + Specific to ``orient='table'``, if a :class:`DataFrame` with a literal + :class:`Index` name of `index` gets written with :func:`to_json`, the + subsequent read operation will incorrectly set the :class:`Index` name to + ``None``. This is because `index` is also used by :func:`DataFrame.to_json` + to denote a missing :class:`Index` name, and the subsequent + :func:`read_json` operation cannot distinguish between the two. The same + limitation is encountered with a :class:`MultiIndex` and any names + beginning with ``'level_'``. + See Also -------- DataFrame.to_json @@ -244,51 +390,193 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, col 1 col 2 0 a b 1 c d + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '{"schema": {"fields": [{"name": "index", "type": "string"}, + {"name": "col 1", "type": "string"}, + {"name": "col 2", "type": "string"}], + "primaryKey": "index", + "pandas_version": "0.20.0"}, + "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, + {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ - filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf, - encoding=encoding) - if isinstance(filepath_or_buffer, compat.string_types): + compression = _infer_compression(path_or_buf, compression) + filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( + path_or_buf, encoding=encoding, compression=compression, + ) + + json_reader = JsonReader( + filepath_or_buffer, orient=orient, typ=typ, dtype=dtype, + convert_axes=convert_axes, convert_dates=convert_dates, + keep_default_dates=keep_default_dates, numpy=numpy, + precise_float=precise_float, date_unit=date_unit, encoding=encoding, + lines=lines, chunksize=chunksize, compression=compression, + ) + + if chunksize: + return json_reader + + result = json_reader.read() + if should_close: try: - exists = os.path.exists(filepath_or_buffer) + filepath_or_buffer.close() + except: # noqa: flake8 + pass + return result - # if the filepath is too long will raise here - # 5874 - except (TypeError, ValueError): - exists = False - if exists: - fh, handles = _get_handle(filepath_or_buffer, 'r', - encoding=encoding) - json = fh.read() - fh.close() +class JsonReader(BaseIterator): + """ + JsonReader provides an interface for reading in a JSON file. + + If initialized with ``lines=True`` and ``chunksize``, can be iterated over + ``chunksize`` lines at a time. Otherwise, calling ``read`` reads in the + whole document. + """ + def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes, + convert_dates, keep_default_dates, numpy, precise_float, + date_unit, encoding, lines, chunksize, compression): + + self.path_or_buf = filepath_or_buffer + self.orient = orient + self.typ = typ + self.dtype = dtype + self.convert_axes = convert_axes + self.convert_dates = convert_dates + self.keep_default_dates = keep_default_dates + self.numpy = numpy + self.precise_float = precise_float + self.date_unit = date_unit + self.encoding = encoding + self.compression = compression + self.lines = lines + self.chunksize = chunksize + self.nrows_seen = 0 + self.should_close = False + + if self.chunksize is not None: + self.chunksize = _validate_integer("chunksize", self.chunksize, 1) + if not self.lines: + raise ValueError("chunksize can only be passed if lines=True") + + data = self._get_data_from_filepath(filepath_or_buffer) + self.data = self._preprocess_data(data) + + def _preprocess_data(self, data): + """ + At this point, the data either has a `read` attribute (e.g. a file + object or a StringIO) or is a string that is a JSON document. + + If self.chunksize, we prepare the data for the `__next__` method. + Otherwise, we read it into memory for the `read` method. + """ + if hasattr(data, 'read') and not self.chunksize: + data = data.read() + if not hasattr(data, 'read') and self.chunksize: + data = StringIO(data) + + return data + + def _get_data_from_filepath(self, filepath_or_buffer): + """ + read_json accepts three input types: + 1. filepath (string-like) + 2. file-like object (e.g. open file object, StringIO) + 3. JSON string + + This method turns (1) into (2) to simplify the rest of the processing. + It returns input types (2) and (3) unchanged. + """ + + data = filepath_or_buffer + + exists = False + if isinstance(data, compat.string_types): + try: + exists = os.path.exists(filepath_or_buffer) + # gh-5874: if the filepath is too long will raise here + except (TypeError, ValueError): + pass + + if exists or self.compression is not None: + data, _ = _get_handle(filepath_or_buffer, 'r', + encoding=self.encoding, + compression=self.compression) + self.should_close = True + self.open_stream = data + + return data + + def _combine_lines(self, lines): + """Combines a list of JSON objects into one JSON object""" + lines = filter(None, map(lambda x: x.strip(), lines)) + return '[' + ','.join(lines) + ']' + + def read(self): + """Read the whole JSON input into a pandas object""" + if self.lines and self.chunksize: + obj = concat(self) + elif self.lines: + + data = to_str(self.data) + obj = self._get_object_parser( + self._combine_lines(data.split('\n')) + ) else: - json = filepath_or_buffer - elif hasattr(filepath_or_buffer, 'read'): - json = filepath_or_buffer.read() - else: - json = filepath_or_buffer + obj = self._get_object_parser(self.data) + self.close() + return obj + + def _get_object_parser(self, json): + """parses a json document into a pandas object""" + typ = self.typ + dtype = self.dtype + kwargs = { + "orient": self.orient, "dtype": self.dtype, + "convert_axes": self.convert_axes, + "convert_dates": self.convert_dates, + "keep_default_dates": self.keep_default_dates, "numpy": self.numpy, + "precise_float": self.precise_float, "date_unit": self.date_unit + } + obj = None + if typ == 'frame': + obj = FrameParser(json, **kwargs).parse() + + if typ == 'series' or obj is None: + if not isinstance(dtype, bool): + dtype = dict(data=dtype) + obj = SeriesParser(json, **kwargs).parse() + + return obj + + def close(self): + """ + If we opened a stream earlier, in _get_data_from_filepath, we should + close it. If an open stream or file was passed, we leave it open. + """ + if self.should_close: + try: + self.open_stream.close() + except (IOError, AttributeError): + pass - if lines: - # If given a json lines file, we break the string into lines, add - # commas and put it in a json list to make a valid json object. - lines = list(StringIO(json.strip())) - json = '[' + ','.join(lines) + ']' + def __next__(self): + lines = list(islice(self.data, self.chunksize)) + if lines: + lines_json = self._combine_lines(lines) + obj = self._get_object_parser(lines_json) - obj = None - if typ == 'frame': - obj = FrameParser(json, orient, dtype, convert_axes, convert_dates, - keep_default_dates, numpy, precise_float, - date_unit).parse() + # Make sure that the returned objects have the right index. + obj.index = range(self.nrows_seen, self.nrows_seen + len(obj)) + self.nrows_seen += len(obj) - if typ == 'series' or obj is None: - if not isinstance(dtype, bool): - dtype = dict(data=dtype) - obj = SeriesParser(json, orient, dtype, convert_axes, convert_dates, - keep_default_dates, numpy, precise_float, - date_unit).parse() + return obj - return obj + self.close() + raise StopIteration class Parser(object): @@ -317,8 +605,8 @@ def __init__(self, json, orient, dtype=True, convert_axes=True, if date_unit is not None: date_unit = date_unit.lower() if date_unit not in self._STAMP_UNITS: - raise ValueError('date_unit must be one of %s' % - (self._STAMP_UNITS,)) + raise ValueError('date_unit must be one of {units}' + .format(units=self._STAMP_UNITS)) self.min_stamp = self._MIN_STAMPS[date_unit] else: self.min_stamp = self._MIN_STAMPS['s'] @@ -336,8 +624,8 @@ def check_keys_split(self, decoded): bad_keys = set(decoded.keys()).difference(set(self._split_keys)) if bad_keys: bad_keys = ", ".join(bad_keys) - raise ValueError(u("JSON data had unexpected key(s): %s") % - pprint_thing(bad_keys)) + raise ValueError(u("JSON data had unexpected key(s): {bad_keys}") + .format(bad_keys=pprint_thing(bad_keys))) def parse(self): @@ -366,7 +654,7 @@ def _convert_axes(self): setattr(self.obj, axis, new_axis) def _try_convert_types(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): @@ -388,7 +676,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, try: dtype = np.dtype(dtype) return data.astype(dtype), True - except: + except (TypeError, ValueError): return data, False if convert_dates: @@ -404,7 +692,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, try: data = data.astype('float64') result = True - except: + except (TypeError, ValueError): pass if data.dtype.kind == 'f': @@ -415,7 +703,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, try: data = data.astype('float64') result = True - except: + except (TypeError, ValueError): pass # do't coerce 0-len data @@ -427,7 +715,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, if (new_data == data).all(): data = new_data result = True - except: + except (TypeError, ValueError): pass # coerce ints to 64 @@ -437,7 +725,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, try: data = data.astype('int64') result = True - except: + except (TypeError, ValueError): pass return data, result @@ -456,12 +744,12 @@ def _try_convert_to_date(self, data): if new_data.dtype == 'object': try: new_data = data.astype('int64') - except: + except (TypeError, ValueError, OverflowError): pass # ignore numbers that are out of range if issubclass(new_data.dtype.type, np.number): - in_range = (isnull(new_data.values) | (new_data > self.min_stamp) | + in_range = (isna(new_data.values) | (new_data > self.min_stamp) | (new_data.values == iNaT)) if not in_range.all(): return data, False @@ -473,13 +761,13 @@ def _try_convert_to_date(self, data): unit=date_unit) except ValueError: continue - except: + except Exception: break return new_data, True return data, False def _try_convert_dates(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) class SeriesParser(Parser): @@ -491,10 +779,8 @@ def _parse_no_numpy(self): json = self.json orient = self.orient if orient == "split": - decoded = dict((str(k), v) - for k, v in compat.iteritems(loads( - json, - precise_float=self.precise_float))) + decoded = {str(k): v for k, v in compat.iteritems( + loads(json, precise_float=self.precise_float))} self.check_keys_split(decoded) self.obj = Series(dtype=None, **decoded) else: @@ -508,7 +794,7 @@ def _parse_numpy(self): if orient == "split": decoded = loads(json, dtype=None, numpy=True, precise_float=self.precise_float) - decoded = dict((str(k), v) for k, v in compat.iteritems(decoded)) + decoded = {str(k): v for k, v in compat.iteritems(decoded)} self.check_keys_split(decoded) self.obj = Series(**decoded) elif orient == "columns" or orient == "index": @@ -540,13 +826,13 @@ def _parse_numpy(self): if orient == "columns": args = loads(json, dtype=None, numpy=True, labelled=True, precise_float=self.precise_float) - if args: + if len(args): args = (args[0].T, args[2], args[1]) self.obj = DataFrame(*args) elif orient == "split": decoded = loads(json, dtype=None, numpy=True, precise_float=self.precise_float) - decoded = dict((str(k), v) for k, v in compat.iteritems(decoded)) + decoded = {str(k): v for k, v in compat.iteritems(decoded)} self.check_keys_split(decoded) self.obj = DataFrame(**decoded) elif orient == "values": @@ -566,15 +852,16 @@ def _parse_no_numpy(self): self.obj = DataFrame( loads(json, precise_float=self.precise_float), dtype=None) elif orient == "split": - decoded = dict((str(k), v) - for k, v in compat.iteritems(loads( - json, - precise_float=self.precise_float))) + decoded = {str(k): v for k, v in compat.iteritems( + loads(json, precise_float=self.precise_float))} self.check_keys_split(decoded) self.obj = DataFrame(dtype=None, **decoded) elif orient == "index": self.obj = DataFrame( loads(json, precise_float=self.precise_float), dtype=None).T + elif orient == 'table': + self.obj = parse_table_schema(json, + precise_float=self.precise_float) else: self.obj = DataFrame( loads(json, precise_float=self.precise_float), dtype=None) diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index d684441c5974d..c7901f4352d00 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -5,7 +5,7 @@ from collections import defaultdict import numpy as np -from pandas.lib import convert_json_to_lines +from pandas._libs.writers import convert_json_to_lines from pandas import compat, DataFrame @@ -21,7 +21,7 @@ def _convert_to_line_delimits(s): return convert_json_to_lines(s) -def nested_to_record(ds, prefix="", level=0): +def nested_to_record(ds, prefix="", sep=".", level=0): """a simplified json_normalize converts a nested dict into a flat dict ("record"), unlike json_normalize, @@ -31,6 +31,12 @@ def nested_to_record(ds, prefix="", level=0): ---------- ds : dict or list of dicts prefix: the prefix, optional, default: "" + sep : string, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + .. versionadded:: 0.20.0 + level: the number of levels in the jason string, optional, default: 0 Returns @@ -66,7 +72,7 @@ def nested_to_record(ds, prefix="", level=0): if level == 0: newkey = k else: - newkey = prefix + '.' + k + newkey = prefix + sep + k # only dicts gets recurse-flattend # only at level>1 do we rename the rest of the keys @@ -77,7 +83,7 @@ def nested_to_record(ds, prefix="", level=0): continue else: v = new_d.pop(k) - new_d.update(nested_to_record(v, newkey, level + 1)) + new_d.update(nested_to_record(v, newkey, sep, level + 1)) new_ds.append(new_d) if singleton: @@ -88,7 +94,8 @@ def nested_to_record(ds, prefix="", level=0): def json_normalize(data, record_path=None, meta=None, meta_prefix=None, record_prefix=None, - errors='raise'): + errors='raise', + sep='.'): """ "Normalize" semi-structured JSON data into a flat table @@ -107,13 +114,20 @@ def json_normalize(data, record_path=None, meta=None, meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' - * ignore : will ignore KeyError if keys listed in meta are not - always present - * raise : will raise KeyError if keys listed in meta are not - always present + * 'ignore' : will ignore KeyError if keys listed in meta are not + always present + * 'raise' : will raise KeyError if keys listed in meta are not + always present + + .. versionadded:: 0.20.0 + + sep : string, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar .. versionadded:: 0.20.0 + Returns ------- frame : DataFrame @@ -121,6 +135,16 @@ def json_normalize(data, record_path=None, meta=None, Examples -------- + >>> from pandas.io.json import json_normalize + >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, + ... {'name': {'given': 'Mose', 'family': 'Regner'}}, + ... {'id': 2, 'name': 'Faye Raker'}] + >>> json_normalize(data) + id name name.family name.first name.given name.last + 0 1.0 NaN NaN Coleen NaN Volk + 1 NaN NaN Regner NaN Mose NaN + 2 2.0 Faye Raker NaN NaN NaN NaN + >>> data = [{'state': 'Florida', ... 'shortname': 'FL', ... 'info': { @@ -136,7 +160,6 @@ def json_normalize(data, record_path=None, meta=None, ... }, ... 'counties': [{'name': 'Summit', 'population': 1234}, ... {'name': 'Cuyahoga', 'population': 1337}]}] - >>> from pandas.io.json import json_normalize >>> result = json_normalize(data, 'counties', ['state', 'shortname', ... ['info', 'governor']]) >>> result @@ -158,12 +181,15 @@ def _pull_field(js, spec): return result + if isinstance(data, list) and not data: + return DataFrame() + # A bit of a hackjob if isinstance(data, dict): data = [data] if record_path is None: - if any([isinstance(x, dict) for x in compat.itervalues(data[0])]): + if any(isinstance(x, dict) for x in compat.itervalues(data[0])): # naive normalization, this is idempotent for flat records # and potentially will inflate the data considerably for # deeply nested structures: @@ -171,7 +197,7 @@ def _pull_field(js, spec): # # TODO: handle record value which are lists, at least error # reasonably - data = nested_to_record(data) + data = nested_to_record(data, sep=sep) return DataFrame(data) elif not isinstance(record_path, list): record_path = [record_path] @@ -181,16 +207,16 @@ def _pull_field(js, spec): elif not isinstance(meta, list): meta = [meta] - for i, x in enumerate(meta): - if not isinstance(x, list): - meta[i] = [x] + meta = [m if isinstance(m, list) else [m] for m in meta] # Disastrously inefficient for now records = [] lengths = [] meta_vals = defaultdict(list) - meta_keys = ['.'.join(val) for val in meta] + if not isinstance(sep, compat.string_types): + sep = str(sep) + meta_keys = [sep.join(val) for val in meta] def _recursive_extract(data, path, seen_meta, level=0): if len(path) > 1: @@ -221,7 +247,8 @@ def _recursive_extract(data, path, seen_meta, level=0): raise \ KeyError("Try running with " "errors='ignore' as key " - "%s is not always present", e) + "{err} is not always present" + .format(err=e)) meta_vals[key].append(meta_val) records.extend(recs) @@ -239,8 +266,8 @@ def _recursive_extract(data, path, seen_meta, level=0): k = meta_prefix + k if k in result: - raise ValueError('Conflicting metadata name %s, ' - 'need distinguishing prefix ' % k) + raise ValueError('Conflicting metadata name {name}, ' + 'need distinguishing prefix '.format(name=k)) result[k] = np.array(v).repeat(lengths) diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py new file mode 100644 index 0000000000000..01f7db7d68664 --- /dev/null +++ b/pandas/io/json/table_schema.py @@ -0,0 +1,324 @@ +""" +Table Schema builders + +http://specs.frictionlessdata.io/json-table-schema/ +""" +import warnings + +import pandas._libs.json as json +from pandas import DataFrame +from pandas.api.types import CategoricalDtype +import pandas.core.common as com +from pandas.core.dtypes.common import ( + is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype, + is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_categorical_dtype, is_period_dtype, is_string_dtype +) + +loads = json.loads + + +def as_json_table_type(x): + """ + Convert a NumPy / pandas type to its corresponding json_table. + + Parameters + ---------- + x : array or dtype + + Returns + ------- + t : str + the Table Schema data types + + Notes + ----- + This table shows the relationship between NumPy / pandas dtypes, + and Table Schema dtypes. + + ============== ================= + Pandas type Table Schema type + ============== ================= + int64 integer + float64 number + bool boolean + datetime64[ns] datetime + timedelta64[ns] duration + object str + categorical any + =============== ================= + """ + if is_integer_dtype(x): + return 'integer' + elif is_bool_dtype(x): + return 'boolean' + elif is_numeric_dtype(x): + return 'number' + elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or + is_period_dtype(x)): + return 'datetime' + elif is_timedelta64_dtype(x): + return 'duration' + elif is_categorical_dtype(x): + return 'any' + elif is_string_dtype(x): + return 'string' + else: + return 'any' + + +def set_default_names(data): + """Sets index names to 'index' for regular, or 'level_x' for Multi""" + if com._all_not_none(*data.index.names): + nms = data.index.names + if len(nms) == 1 and data.index.name == 'index': + warnings.warn("Index name of 'index' is not round-trippable") + elif len(nms) > 1 and any(x.startswith('level_') for x in nms): + warnings.warn("Index names beginning with 'level_' are not " + "round-trippable") + return data + + data = data.copy() + if data.index.nlevels > 1: + names = [name if name is not None else 'level_{}'.format(i) + for i, name in enumerate(data.index.names)] + data.index.names = names + else: + data.index.name = data.index.name or 'index' + return data + + +def convert_pandas_type_to_json_field(arr, dtype=None): + dtype = dtype or arr.dtype + if arr.name is None: + name = 'values' + else: + name = arr.name + field = {'name': name, + 'type': as_json_table_type(dtype)} + + if is_categorical_dtype(arr): + if hasattr(arr, 'categories'): + cats = arr.categories + ordered = arr.ordered + else: + cats = arr.cat.categories + ordered = arr.cat.ordered + field['constraints'] = {"enum": list(cats)} + field['ordered'] = ordered + elif is_period_dtype(arr): + field['freq'] = arr.freqstr + elif is_datetime64tz_dtype(arr): + if hasattr(arr, 'dt'): + field['tz'] = arr.dt.tz.zone + else: + field['tz'] = arr.tz.zone + return field + + +def convert_json_field_to_pandas_type(field): + """ + Converts a JSON field descriptor into its corresponding NumPy / pandas type + + Parameters + ---------- + field + A JSON field descriptor + + Returns + ------- + dtype + + Raises + ----- + ValueError + If the type of the provided field is unknown or currently unsupported + + Examples + -------- + >>> convert_json_field_to_pandas_type({'name': 'an_int', + 'type': 'integer'}) + 'int64' + >>> convert_json_field_to_pandas_type({'name': 'a_categorical', + 'type': 'any', + 'contraints': {'enum': [ + 'a', 'b', 'c']}, + 'ordered': True}) + 'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)' + >>> convert_json_field_to_pandas_type({'name': 'a_datetime', + 'type': 'datetime'}) + 'datetime64[ns]' + >>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz', + 'type': 'datetime', + 'tz': 'US/Central'}) + 'datetime64[ns, US/Central]' + """ + typ = field['type'] + if typ == 'string': + return 'object' + elif typ == 'integer': + return 'int64' + elif typ == 'number': + return 'float64' + elif typ == 'boolean': + return 'bool' + elif typ == 'duration': + return 'timedelta64' + elif typ == 'datetime': + if field.get('tz'): + return 'datetime64[ns, {tz}]'.format(tz=field['tz']) + else: + return 'datetime64[ns]' + elif typ == 'any': + if 'constraints' in field and 'ordered' in field: + return CategoricalDtype(categories=field['constraints']['enum'], + ordered=field['ordered']) + else: + return 'object' + + raise ValueError("Unsupported or invalid field type: {}".format(typ)) + + +def build_table_schema(data, index=True, primary_key=None, version=True): + """ + Create a Table schema from ``data``. + + Parameters + ---------- + data : Series, DataFrame + index : bool, default True + Whether to include ``data.index`` in the schema. + primary_key : bool or None, default True + column names to designate as the primary key. + The default `None` will set `'primaryKey'` to the index + level or levels if the index is unique. + version : bool, default True + Whether to include a field `pandas_version` with the version + of pandas that generated the schema. + + Returns + ------- + schema : dict + + Examples + -------- + >>> df = pd.DataFrame( + ... {'A': [1, 2, 3], + ... 'B': ['a', 'b', 'c'], + ... 'C': pd.date_range('2016-01-01', freq='d', periods=3), + ... }, index=pd.Index(range(3), name='idx')) + >>> build_table_schema(df) + {'fields': [{'name': 'idx', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}], + 'pandas_version': '0.20.0', + 'primaryKey': ['idx']} + + Notes + ----- + See `_as_json_table_type` for conversion types. + Timedeltas as converted to ISO8601 duration format with + 9 decimal places after the secnods field for nanosecond precision. + + Categoricals are converted to the `any` dtype, and use the `enum` field + constraint to list the allowed values. The `ordered` attribute is included + in an `ordered` field. + """ + if index is True: + data = set_default_names(data) + + schema = {} + fields = [] + + if index: + if data.index.nlevels > 1: + for level in data.index.levels: + fields.append(convert_pandas_type_to_json_field(level)) + else: + fields.append(convert_pandas_type_to_json_field(data.index)) + + if data.ndim > 1: + for column, s in data.iteritems(): + fields.append(convert_pandas_type_to_json_field(s)) + else: + fields.append(convert_pandas_type_to_json_field(data)) + + schema['fields'] = fields + if index and data.index.is_unique and primary_key is None: + if data.index.nlevels == 1: + schema['primaryKey'] = [data.index.name] + else: + schema['primaryKey'] = data.index.names + elif primary_key is not None: + schema['primaryKey'] = primary_key + + if version: + schema['pandas_version'] = '0.20.0' + return schema + + +def parse_table_schema(json, precise_float): + """ + Builds a DataFrame from a given schema + + Parameters + ---------- + json : + A JSON table schema + precise_float : boolean + Flag controlling precision when decoding string to double values, as + dictated by ``read_json`` + + Returns + ------- + df : DataFrame + + Raises + ------ + NotImplementedError + If the JSON table schema contains either timezone or timedelta data + + Notes + ----- + Because :func:`DataFrame.to_json` uses the string 'index' to denote a + name-less :class:`Index`, this function sets the name of the returned + :class:`DataFrame` to ``None`` when said string is encountered with a + normal :class:`Index`. For a :class:`MultiIndex`, the same limitation + applies to any strings beginning with 'level_'. Therefore, an + :class:`Index` name of 'index' and :class:`MultiIndex` names starting + with 'level_' are not supported. + + See also + -------- + build_table_schema : inverse function + pandas.read_json + """ + table = loads(json, precise_float=precise_float) + col_order = [field['name'] for field in table['schema']['fields']] + df = DataFrame(table['data'])[col_order] + + dtypes = {field['name']: convert_json_field_to_pandas_type(field) + for field in table['schema']['fields']} + + # Cannot directly use as_type with timezone data on object; raise for now + if any(str(x).startswith('datetime64[ns, ') for x in dtypes.values()): + raise NotImplementedError('table="orient" can not yet read timezone ' + 'data') + + # No ISO constructor for Timedelta as of yet, so need to raise + if 'timedelta64' in dtypes.values(): + raise NotImplementedError('table="orient" can not yet read ' + 'ISO-formatted Timedelta data') + + df = df.astype(dtypes) + + df = df.set_index(table['schema']['primaryKey']) + if len(df.index.names) == 1: + if df.index.name == 'index': + df.index.name = None + else: + df.index.names = [None if x.startswith('level_') else x for x in + df.index.names] + + return df diff --git a/pandas/msgpack/__init__.py b/pandas/io/msgpack/__init__.py similarity index 81% rename from pandas/msgpack/__init__.py rename to pandas/io/msgpack/__init__.py index 33d60a12ef0a3..984e90ee03e69 100644 --- a/pandas/msgpack/__init__.py +++ b/pandas/io/msgpack/__init__.py @@ -2,8 +2,8 @@ from collections import namedtuple -from pandas.msgpack.exceptions import * # noqa -from pandas.msgpack._version import version # noqa +from pandas.io.msgpack.exceptions import * # noqa +from pandas.io.msgpack._version import version # noqa class ExtType(namedtuple('ExtType', 'code data')): @@ -19,8 +19,8 @@ def __new__(cls, code, data): import os # noqa -from pandas.msgpack._packer import Packer # noqa -from pandas.msgpack._unpacker import unpack, unpackb, Unpacker # noqa +from pandas.io.msgpack._packer import Packer # noqa +from pandas.io.msgpack._unpacker import unpack, unpackb, Unpacker # noqa def pack(o, stream, **kwargs): @@ -41,6 +41,7 @@ def packb(o, **kwargs): """ return Packer(**kwargs).pack(o) + # alias for compatibility to simplejson/marshal/pickle. load = unpack loads = unpackb diff --git a/pandas/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx similarity index 97% rename from pandas/msgpack/_packer.pyx rename to pandas/io/msgpack/_packer.pyx index 008dbe5541d50..c81069c8e04c0 100644 --- a/pandas/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -1,16 +1,17 @@ # coding: utf-8 -#cython: embedsignature=True +# cython: embedsignature=True from cpython cimport * from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from pandas.msgpack.exceptions import PackValueError -from pandas.msgpack import ExtType +from pandas.io.msgpack.exceptions import PackValueError +from pandas.io.msgpack import ExtType +import numpy as np -cdef extern from "../src/msgpack/pack.h": +cdef extern from "../../src/msgpack/pack.h": struct msgpack_packer: char* buf size_t length @@ -133,7 +134,7 @@ cdef class Packer(object): while True: if o is None: ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, bool): + elif isinstance(o, (bool, np.bool_)): if o: ret = msgpack_pack_true(&self.pk) else: @@ -224,7 +225,7 @@ cdef class Packer(object): default_used = 1 continue else: - raise TypeError("can't serialize %r" % (o,)) + raise TypeError("can't serialize {thing!r}".format(thing=o)) return ret cpdef pack(self, object obj): diff --git a/pandas/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx similarity index 95% rename from pandas/msgpack/_unpacker.pyx rename to pandas/io/msgpack/_unpacker.pyx index 6f23a24adde6c..04bb330e595dd 100644 --- a/pandas/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -1,5 +1,5 @@ # coding: utf-8 -#cython: embedsignature=True +# cython: embedsignature=True from cpython cimport * cdef extern from "Python.h": @@ -11,16 +11,16 @@ from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from pandas.msgpack.exceptions import (BufferFull, OutOfData, - UnpackValueError, ExtraData) -from pandas.msgpack import ExtType +from pandas.io.msgpack.exceptions import (BufferFull, OutOfData, + UnpackValueError, ExtraData) +from pandas.io.msgpack import ExtType -cdef extern from "../src/msgpack/unpack.h": +cdef extern from "../../src/msgpack/unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook - bint has_pairs_hook # call object_hook with k-v pairs + bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook PyObject* ext_hook char *encoding @@ -94,13 +94,13 @@ cdef inline init_ctx(unpack_context *ctx, def default_read_extended_type(typecode, data): raise NotImplementedError("Cannot decode extended type " - "with typecode=%d" % typecode) + "with typecode={code}".format(code=typecode)) def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, @@ -144,7 +144,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, buf + off, buf_len - off)) return obj else: - raise UnpackValueError("Unpack failed: error = %d" % (ret,)) + raise UnpackValueError("Unpack failed: error = {ret}".format(ret=ret)) def unpack(object stream, object object_hook=None, object list_hook=None, @@ -202,7 +202,7 @@ cdef class Unpacker(object): :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it - is insufficient. You shoud set this parameter when unpacking + is insufficient. You should set this parameter when unpacking data from untrasted source. :param int max_str_len: @@ -257,7 +257,7 @@ cdef class Unpacker(object): object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors='strict', int max_buffer_size=0, object ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, @@ -411,7 +411,8 @@ cdef class Unpacker(object): else: raise OutOfData("No more data to unpack.") else: - raise ValueError("Unpack failed: error = %d" % (ret,)) + raise ValueError("Unpack failed: error = {ret}" + .format(ret=ret)) def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" @@ -466,8 +467,8 @@ cdef class Unpacker(object): return self._unpack(unpack_construct, None, 1) # for debug. - #def _buf(self): + # def _buf(self): # return PyString_FromStringAndSize(self.buf, self.buf_tail) - #def _off(self): + # def _off(self): # return self.buf_head diff --git a/pandas/msgpack/_version.py b/pandas/io/msgpack/_version.py similarity index 100% rename from pandas/msgpack/_version.py rename to pandas/io/msgpack/_version.py diff --git a/pandas/msgpack/exceptions.py b/pandas/io/msgpack/exceptions.py similarity index 100% rename from pandas/msgpack/exceptions.py rename to pandas/io/msgpack/exceptions.py diff --git a/pandas/io/packers.py b/pandas/io/packers.py index ab44e46c96b77..f9b1d1574d45c 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -48,29 +48,30 @@ from pandas import compat from pandas.compat import u, u_safe -from pandas.types.common import (is_categorical_dtype, is_object_dtype, - needs_i8_conversion, pandas_dtype) +from pandas.core.dtypes.common import ( + is_categorical_dtype, is_object_dtype, + needs_i8_conversion, pandas_dtype) from pandas import (Timestamp, Period, Series, DataFrame, # noqa Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, - Categorical) -from pandas.tslib import NaTType -from pandas.sparse.api import SparseSeries, SparseDataFrame -from pandas.sparse.array import BlockIndex, IntIndex + Categorical, CategoricalIndex, IntervalIndex, Interval, + TimedeltaIndex) +from pandas.core.sparse.api import SparseSeries, SparseDataFrame +from pandas.core.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame -from pandas.core.common import PerformanceWarning -from pandas.io.common import get_filepath_or_buffer +from pandas.errors import PerformanceWarning +from pandas.io.common import get_filepath_or_buffer, _stringify_path from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals -from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType +from pandas.io.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType from pandas.util._move import ( BadMove as _BadMove, move_into_mutable_buffer as _move_into_mutable_buffer, ) -# check whcih compression libs we have installed +# check which compression libs we have installed try: import zlib @@ -148,6 +149,7 @@ def writer(fh): for a in args: fh.write(pack(a, **kwargs)) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, compat.string_types): with open(path_or_buf, mode) as fh: writer(fh) @@ -179,7 +181,7 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): obj : type of object stored in file """ - path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) + path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) @@ -187,11 +189,16 @@ def read(fh): l = list(unpack(fh, encoding=encoding, **kwargs)) if len(l) == 1: return l[0] + + if should_close: + try: + path_or_buf.close() + except: # noqa: flake8 + pass return l # see if we have an actual file if isinstance(path_or_buf, compat.string_types): - try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): @@ -201,22 +208,26 @@ def read(fh): with open(path_or_buf, 'rb') as fh: return read(fh) - # treat as a binary-like if isinstance(path_or_buf, compat.binary_type): + # treat as a binary-like fh = None try: - fh = compat.BytesIO(path_or_buf) - return read(fh) + # We can't distinguish between a path and a buffer of bytes in + # Python 2 so instead assume the first byte of a valid path is + # less than 0x80. + if compat.PY3 or ord(path_or_buf[0]) >= 0x80: + fh = compat.BytesIO(path_or_buf) + return read(fh) finally: if fh is not None: fh.close() - - # a buffer like - if hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): + elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): + # treat as a buffer like return read(path_or_buf) raise ValueError('path_or_buf needs to be a string file path or file-like') + dtype_dict = {21: np.dtype('M8[ns]'), u('datetime64[ns]'): np.dtype('M8[ns]'), u('datetime64[us]'): np.dtype('M8[us]'), @@ -237,6 +248,7 @@ def dtype_for(t): return dtype_dict[t] return np.typeDict.get(t, t) + c2f_dict = {'complex': np.float64, 'complex128': np.float64, 'complex64': np.float32} @@ -347,8 +359,11 @@ def unconvert(values, dtype, compress=None): ) # fall through to copying `np.fromstring` - # Copy the string into a numpy array. - return np.fromstring(values, dtype=dtype) + # Copy the bytes into a numpy array. + buf = np.frombuffer(values, dtype=dtype) + buf = buf.copy() # required to not mutate the original data + buf.flags.writeable = True + return buf def encode(obj): @@ -387,6 +402,13 @@ def encode(obj): u'freq': u_safe(getattr(obj, 'freqstr', None)), u'tz': tz, u'compress': compressor} + elif isinstance(obj, IntervalIndex): + return {u'typ': u'interval_index', + u'klass': u(obj.__class__.__name__), + u'name': getattr(obj, 'name', None), + u'left': getattr(obj, '_left', None), + u'right': getattr(obj, '_right', None), + u'closed': getattr(obj, '_closed', None)} elif isinstance(obj, MultiIndex): return {u'typ': u'multi_index', u'klass': u(obj.__class__.__name__), @@ -466,7 +488,7 @@ def encode(obj): } elif isinstance(obj, (datetime, date, np.datetime64, timedelta, - np.timedelta64, NaTType)): + np.timedelta64)) or obj is NaT: if isinstance(obj, Timestamp): tz = obj.tzinfo if tz is not None: @@ -478,7 +500,7 @@ def encode(obj): u'value': obj.value, u'freq': freq, u'tz': tz} - if isinstance(obj, NaTType): + if obj is NaT: return {u'typ': u'nat'} elif isinstance(obj, np.timedelta64): return {u'typ': u'timedelta64', @@ -499,7 +521,12 @@ def encode(obj): elif isinstance(obj, Period): return {u'typ': u'period', u'ordinal': obj.ordinal, - u'freq': u(obj.freq)} + u'freq': u_safe(obj.freqstr)} + elif isinstance(obj, Interval): + return {u'typ': u'interval', + u'left': obj.left, + u'right': obj.right, + u'closed': obj.closed} elif isinstance(obj, BlockIndex): return {u'typ': u'block_index', u'klass': u(obj.__class__.__name__), @@ -571,7 +598,7 @@ def decode(obj): elif typ == u'period_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq']) - return globals()[obj[u'klass']](data, **d) + return globals()[obj[u'klass']]._from_ordinals(data, **d) elif typ == u'datetime_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False) @@ -583,13 +610,19 @@ def decode(obj): result = result.tz_localize('UTC').tz_convert(tz) return result + elif typ == u'interval_index': + return globals()[obj[u'klass']].from_arrays(obj[u'left'], + obj[u'right'], + obj[u'closed'], + name=obj[u'name']) elif typ == u'category': from_codes = globals()[obj[u'klass']].from_codes return from_codes(codes=obj[u'codes'], categories=obj[u'categories'], - ordered=obj[u'ordered'], - name=obj[u'name']) + ordered=obj[u'ordered']) + elif typ == u'interval': + return Interval(obj[u'left'], obj[u'right'], obj[u'closed']) elif typ == u'series': dtype = dtype_for(obj[u'dtype']) pd_dtype = pandas_dtype(dtype) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py new file mode 100644 index 0000000000000..a99014f07a6b3 --- /dev/null +++ b/pandas/io/parquet.py @@ -0,0 +1,288 @@ +""" parquet compat """ + +from warnings import catch_warnings +from distutils.version import LooseVersion +from pandas import DataFrame, RangeIndex, Int64Index, get_option +from pandas.compat import string_types +import pandas.core.common as com +from pandas.io.common import get_filepath_or_buffer, is_s3_url + + +def get_engine(engine): + """ return our implementation """ + + if engine == 'auto': + engine = get_option('io.parquet.engine') + + if engine == 'auto': + # try engines in this order + try: + return PyArrowImpl() + except ImportError: + pass + + try: + return FastParquetImpl() + except ImportError: + pass + + raise ImportError("Unable to find a usable engine; " + "tried using: 'pyarrow', 'fastparquet'.\n" + "pyarrow or fastparquet is required for parquet " + "support") + + if engine not in ['pyarrow', 'fastparquet']: + raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") + + if engine == 'pyarrow': + return PyArrowImpl() + elif engine == 'fastparquet': + return FastParquetImpl() + + +class BaseImpl(object): + + api = None # module + + @staticmethod + def validate_dataframe(df): + + if not isinstance(df, DataFrame): + raise ValueError("to_parquet only supports IO with DataFrames") + + # must have value column names (strings only) + if df.columns.inferred_type not in {'string', 'unicode'}: + raise ValueError("parquet must have string column names") + + # index level names must be strings + valid_names = all( + isinstance(name, string_types) + for name in df.index.names + if name is not None + ) + if not valid_names: + raise ValueError("Index level names must be strings") + + def write(self, df, path, compression, **kwargs): + raise com.AbstractMethodError(self) + + def read(self, path, columns=None, **kwargs): + raise com.AbstractMethodError(self) + + +class PyArrowImpl(BaseImpl): + + def __init__(self): + # since pandas is a dependency of pyarrow + # we need to import on first use + try: + import pyarrow + import pyarrow.parquet + except ImportError: + raise ImportError( + "pyarrow is required for parquet support\n\n" + "you can install via conda\n" + "conda install pyarrow -c conda-forge\n" + "\nor via pip\n" + "pip install -U pyarrow\n" + ) + if LooseVersion(pyarrow.__version__) < '0.4.1': + raise ImportError( + "pyarrow >= 0.4.1 is required for parquet support\n\n" + "you can install via conda\n" + "conda install pyarrow -c conda-forge\n" + "\nor via pip\n" + "pip install -U pyarrow\n" + ) + + self._pyarrow_lt_060 = ( + LooseVersion(pyarrow.__version__) < LooseVersion('0.6.0')) + self._pyarrow_lt_070 = ( + LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0')) + + self.api = pyarrow + + def write(self, df, path, compression='snappy', + coerce_timestamps='ms', **kwargs): + self.validate_dataframe(df) + if self._pyarrow_lt_070: + self._validate_write_lt_070(df) + path, _, _, _ = get_filepath_or_buffer(path, mode='wb') + + if self._pyarrow_lt_060: + table = self.api.Table.from_pandas(df, timestamps_to_ms=True) + self.api.parquet.write_table( + table, path, compression=compression, **kwargs) + + else: + table = self.api.Table.from_pandas(df) + self.api.parquet.write_table( + table, path, compression=compression, + coerce_timestamps=coerce_timestamps, **kwargs) + + def read(self, path, columns=None, **kwargs): + path, _, _, should_close = get_filepath_or_buffer(path) + if self._pyarrow_lt_070: + result = self.api.parquet.read_pandas(path, columns=columns, + **kwargs).to_pandas() + else: + kwargs['use_pandas_metadata'] = True + result = self.api.parquet.read_table(path, columns=columns, + **kwargs).to_pandas() + if should_close: + try: + path.close() + except: # noqa: flake8 + pass + + return result + + def _validate_write_lt_070(self, df): + # Compatibility shim for pyarrow < 0.7.0 + # TODO: Remove in pandas 0.23.0 + from pandas.core.indexes.multi import MultiIndex + if isinstance(df.index, MultiIndex): + msg = ( + "Multi-index DataFrames are only supported " + "with pyarrow >= 0.7.0" + ) + raise ValueError(msg) + # Validate index + if not isinstance(df.index, Int64Index): + msg = ( + "pyarrow < 0.7.0 does not support serializing {} for the " + "index; you can .reset_index() to make the index into " + "column(s), or install the latest version of pyarrow or " + "fastparquet." + ) + raise ValueError(msg.format(type(df.index))) + if not df.index.equals(RangeIndex(len(df))): + raise ValueError( + "pyarrow < 0.7.0 does not support serializing a non-default " + "index; you can .reset_index() to make the index into " + "column(s), or install the latest version of pyarrow or " + "fastparquet." + ) + if df.index.name is not None: + raise ValueError( + "pyarrow < 0.7.0 does not serialize indexes with a name; you " + "can set the index.name to None or install the latest version " + "of pyarrow or fastparquet." + ) + + +class FastParquetImpl(BaseImpl): + + def __init__(self): + # since pandas is a dependency of fastparquet + # we need to import on first use + try: + import fastparquet + except ImportError: + raise ImportError( + "fastparquet is required for parquet support\n\n" + "you can install via conda\n" + "conda install fastparquet -c conda-forge\n" + "\nor via pip\n" + "pip install -U fastparquet" + ) + if LooseVersion(fastparquet.__version__) < '0.1.0': + raise ImportError( + "fastparquet >= 0.1.0 is required for parquet " + "support\n\n" + "you can install via conda\n" + "conda install fastparquet -c conda-forge\n" + "\nor via pip\n" + "pip install -U fastparquet" + ) + self.api = fastparquet + + def write(self, df, path, compression='snappy', **kwargs): + self.validate_dataframe(df) + # thriftpy/protocol/compact.py:339: + # DeprecationWarning: tostring() is deprecated. + # Use tobytes() instead. + + if is_s3_url(path): + # path is s3:// so we need to open the s3file in 'wb' mode. + # TODO: Support 'ab' + + path, _, _, _ = get_filepath_or_buffer(path, mode='wb') + # And pass the opened s3file to the fastparquet internal impl. + kwargs['open_with'] = lambda path, _: path + else: + path, _, _, _ = get_filepath_or_buffer(path) + + with catch_warnings(record=True): + self.api.write(path, df, + compression=compression, **kwargs) + + def read(self, path, columns=None, **kwargs): + if is_s3_url(path): + # When path is s3:// an S3File is returned. + # We need to retain the original path(str) while also + # pass the S3File().open function to fsatparquet impl. + s3, _, _, should_close = get_filepath_or_buffer(path) + try: + parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open) + finally: + s3.close() + else: + path, _, _, _ = get_filepath_or_buffer(path) + parquet_file = self.api.ParquetFile(path) + + return parquet_file.to_pandas(columns=columns, **kwargs) + + +def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): + """ + Write a DataFrame to the parquet format. + + Parameters + ---------- + df : DataFrame + path : string + File path + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. + kwargs + Additional keyword arguments passed to the engine + """ + impl = get_engine(engine) + return impl.write(df, path, compression=compression, **kwargs) + + +def read_parquet(path, engine='auto', columns=None, **kwargs): + """ + Load a parquet object from the file path, returning a DataFrame. + + .. versionadded 0.21.0 + + Parameters + ---------- + path : string + File path + columns: list, default=None + If not None, only these columns will be read from the file. + + .. versionadded 0.21.1 + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + kwargs are passed to the engine + + Returns + ------- + DataFrame + + """ + + impl = get_engine(engine) + return impl.read(path, columns=columns, **kwargs) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f8905dfa315c4..469cd6d82e4b4 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -13,32 +13,38 @@ import numpy as np from pandas import compat -from pandas.compat import (range, lrange, StringIO, lzip, +from pandas.compat import (range, lrange, PY3, StringIO, lzip, zip, string_types, map, u) -from pandas.types.common import (is_integer, _ensure_object, - is_list_like, is_integer_dtype, - is_float, is_dtype_equal, - is_object_dtype, is_string_dtype, - is_scalar, is_categorical_dtype) -from pandas.types.missing import isnull -from pandas.types.cast import _astype_nansafe -from pandas.core.index import Index, MultiIndex, RangeIndex +from pandas.core.dtypes.common import ( + is_integer, _ensure_object, + is_list_like, is_integer_dtype, + is_float, is_dtype_equal, + is_object_dtype, is_string_dtype, + is_scalar, is_categorical_dtype) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.index import (Index, MultiIndex, RangeIndex, + _ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.categorical import Categorical -from pandas.core.common import AbstractMethodError +from pandas.core.arrays import Categorical +from pandas.core import algorithms +import pandas.core.common as com from pandas.io.date_converters import generic_parser -from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, - _get_handle, UnicodeReader, UTF8Recoder, - BaseIterator, ParserError, EmptyDataError, - ParserWarning, _NA_VALUES, _infer_compression) -from pandas.tseries import tools +from pandas.errors import ParserWarning, ParserError, EmptyDataError +from pandas.io.common import (get_filepath_or_buffer, is_file_like, + _validate_header_arg, _get_handle, + UnicodeReader, UTF8Recoder, _NA_VALUES, + BaseIterator, _infer_compression) +from pandas.core.tools import datetimes as tools -from pandas.util.decorators import Appender - -import pandas.lib as lib -import pandas.parser as _parser +from pandas.util._decorators import Appender +import pandas._libs.lib as lib +import pandas._libs.parsers as parsers +import pandas._libs.ops as libops +from pandas._libs.tslibs import parsing # BOM character (byte order mark) # This exists at the beginning of a file to indicate endianness @@ -46,7 +52,7 @@ # so we need to remove it if we see it. _BOM = u('\ufeff') -_parser_params = """Also supports optionally iterating or breaking of the file +_parser_params = r"""Also supports optionally iterating or breaking of the file into chunks. Additional help can be found in the `online docs for IO Tools @@ -58,10 +64,8 @@ object with a read() method (such as a file handle or StringIO) The string could be a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. For instance, a local file could - be file ://localhost/path/to/table.csv + be file://localhost/path/to/table.csv %s -delimiter : str, default ``None`` - Alternative argument name for sep. delim_whitespace : boolean, default False Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be used as the sep. Equivalent to setting ``sep='\s+'``. If this option @@ -71,19 +75,23 @@ .. versionadded:: 0.18.1 support for the Python parser. header : int or list of ints, default 'infer' - Row number(s) to use as the column names, and the start of the data. - Default behavior is as if set to 0 if no ``names`` passed, otherwise - ``None``. Explicitly pass ``header=0`` to be able to replace existing - names. The header can be a list of integers that specify row locations for - a multi-index on the columns e.g. [0,1,3]. Intervening rows that are not - specified will be skipped (e.g. 2 in this example is skipped). Note that - this parameter ignores commented lines and empty lines if - ``skip_blank_lines=True``, so header=0 denotes the first line of data - rather than the first line of the file. + Row number(s) to use as the column names, and the start of the + data. Default behavior is to infer the column names: if no names + are passed the behavior is identical to ``header=0`` and column + names are inferred from the first line of the file, if column + names are passed explicitly then the behavior is identical to + ``header=None``. Explicitly pass ``header=0`` to be able to + replace existing names. The header can be a list of integers that + specify row locations for a multi-index on the columns + e.g. [0,1,3]. Intervening rows that are not specified will be + skipped (e.g. 2 in this example is skipped). Note that this + parameter ignores commented lines and empty lines if + ``skip_blank_lines=True``, so header=0 denotes the first line of + data rather than the first line of the file. names : array-like, default None List of column names to use. If file contains no header row, then you - should explicitly pass header=None. Duplicates in this list are not - allowed unless mangle_dupe_cols=True, which is the default. + should explicitly pass header=None. Duplicates in this list will cause + a ``UserWarning`` to be issued. index_col : int or sequence or False, default None Column to use as the row labels of the DataFrame. If a sequence is given, a MultiIndex is used. If you have a malformed file with delimiters at the end @@ -94,27 +102,25 @@ be positional (i.e. integer indices into the document columns) or strings that correspond to column names provided either by the user in `names` or inferred from the document header row(s). For example, a valid array-like - `usecols` parameter would be [0, 1, 2] or ['foo', 'bar', 'baz']. + `usecols` parameter would be [0, 1, 2] or ['foo', 'bar', 'baz']. Element + order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. + To instantiate a DataFrame from ``data`` with element order preserved use + ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns + in ``['foo', 'bar']`` order or + ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` + for ``['bar', 'foo']`` order. If callable, the callable function will be evaluated against the column names, returning names where the callable function evaluates to True. An example of a valid callable argument would be ``lambda x: x.upper() in ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster parsing time and lower memory usage. -as_recarray : boolean, default False - DEPRECATED: this argument will be removed in a future version. Please call - `pd.read_csv(...).to_records()` instead. - - Return a NumPy recarray instead of a DataFrame after parsing the data. - If set to True, this option takes precedence over the `squeeze` parameter. - In addition, as row indices are not available in such a format, the - `index_col` parameter will be ignored. squeeze : boolean, default False If the parsed data only contains one column then return a Series prefix : str, default None Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... mangle_dupe_cols : boolean, default True - Duplicate columns will be specified as 'X.0'...'X.N', rather than + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than 'X'...'X'. Passing in False will cause data to be overwritten if there are duplicate names in the columns. dtype : Type name or dict of column -> type, default None @@ -141,18 +147,28 @@ An example of a valid callable argument would be ``lambda x: x in [0, 2]``. skipfooter : int, default 0 Number of lines at bottom of file to skip (Unsupported with engine='c') -skip_footer : int, default 0 - DEPRECATED: use the `skipfooter` parameter instead, as they are identical nrows : int, default None Number of rows of file to read. Useful for reading pieces of large files na_values : scalar, str, list-like, or dict, default None Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), - 70, subsequent_indent=" ") + """'`. + 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True - If na_values are specified and keep_default_na is False the default NaN - values are overridden, otherwise they're appended to. + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. na_filter : boolean, default True Detect missing value markers (empty strings and the value of na_values). In data without any NAs, passing na_filter=False can improve the performance @@ -168,7 +184,7 @@ * list of ints or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a separate date column. * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as - a single date column. + a single date column. * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result 'foo' @@ -178,38 +194,39 @@ Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False - If True and parse_dates is enabled, pandas will attempt to infer the format - of the datetime strings in the columns, and if it can be inferred, switch - to a faster method of parsing them. In some cases this can increase the - parsing speed by ~5-10x. + If True and `parse_dates` is enabled, pandas will attempt to infer the + format of the datetime strings in the columns, and if it can be inferred, + switch to a faster method of parsing them. In some cases this can increase + the parsing speed by 5-10x. keep_date_col : boolean, default False - If True and parse_dates specifies combining multiple columns then + If True and `parse_dates` specifies combining multiple columns then keep the original columns. date_parser : function, default None Function to use for converting a sequence of string columns to an array of datetime instances. The default uses ``dateutil.parser.parser`` to do the - conversion. Pandas will try to call date_parser in three different ways, + conversion. Pandas will try to call `date_parser` in three different ways, advancing to the next if an exception occurs: 1) Pass one or more arrays - (as defined by parse_dates) as arguments; 2) concatenate (row-wise) the - string values from the columns defined by parse_dates into a single array - and pass that; and 3) call date_parser once for each row using one or more - strings (corresponding to the columns defined by parse_dates) as arguments. + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. dayfirst : boolean, default False DD/MM format dates, international and European format iterator : boolean, default False Return TextFileReader object for iteration or getting chunks with ``get_chunk()``. chunksize : int, default None - Return TextFileReader object for iteration. `See IO Tools docs for more - information - `_ on - ``iterator`` and ``chunksize``. + Return TextFileReader object for iteration. + See the `IO Tools docs + `_ + for more information on ``iterator`` and ``chunksize``. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' - For on-the-fly decompression of on-disk data. If 'infer', then use gzip, - bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2', - '.zip', or 'xz', respectively, and no decompression otherwise. If using - 'zip', the ZIP file must contain only one data file to be read in. - Set to None for no decompression. + For on-the-fly decompression of on-disk data. If 'infer' and + `filepath_or_buffer` is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no + decompression). If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression. @@ -241,8 +258,8 @@ of a line, the line will be ignored altogether. This parameter must be a single character. Like empty lines (as long as ``skip_blank_lines=True``), fully commented lines are ignored by the parameter `header` but not by - `skiprows`. For example, if comment='#', parsing '#empty\\na,b,c\\n1,2,3' - with `header=0` will result in 'a,b,c' being + `skiprows`. For example, if ``comment='#'``, parsing + ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being treated as the header. encoding : str, default None Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python @@ -255,16 +272,19 @@ override values, a ParserWarning will be issued. See csv.Dialect documentation for more details. tupleize_cols : boolean, default False + .. deprecated:: 0.21.0 + This argument will be removed and will always convert to MultiIndex + Leave a list of tuples on columns as is (default is to convert to - a Multi Index on the columns) + a MultiIndex on the columns) error_bad_lines : boolean, default True Lines with too many fields (e.g. a csv line with too many commas) will by default cause an exception to be raised, and no DataFrame will be returned. If False, then these "bad lines" will dropped from the DataFrame that is - returned. (Only valid with C parser) + returned. warn_bad_lines : boolean, default True If error_bad_lines is False, and warn_bad_lines is True, a warning for each - "bad line" will be output. (Only valid with C parser). + "bad line" will be output. low_memory : boolean, default True Internally process the file in chunks, resulting in lower memory use while parsing, but possibly mixed type inference. To ensure no mixed @@ -272,22 +292,6 @@ Note that the entire file is read into a single DataFrame regardless, use the `chunksize` or `iterator` parameter to return the data in chunks. (Only valid with C parser) -buffer_lines : int, default None - DEPRECATED: this argument will be removed in a future version because its - value is not respected by the parser -compact_ints : boolean, default False - DEPRECATED: this argument will be removed in a future version - - If compact_ints is True, then for any column that is of integer dtype, - the parser will attempt to cast it as the smallest integer dtype possible, - either signed or unsigned depending on the specification from the - `use_unsigned` parameter. -use_unsigned : boolean, default False - DEPRECATED: this argument will be removed in a future version - - If integer columns are being compacted (i.e. `compact_ints=True`), specify - whether the column should be compacted to the smallest signed or unsigned - integer dtype. memory_map : boolean, default False If a filepath is provided for `filepath_or_buffer`, map the file object directly onto memory and access the data directly from there. Using this @@ -304,10 +308,15 @@ currently more feature-complete.""" _sep_doc = r"""sep : str, default {default} - Delimiter to use. If sep is None, will try to automatically determine - this. Separators longer than 1 character and different from ``'\s+'`` will - be interpreted as regular expressions, will force use of the python parsing - engine and will ignore quotes in the data. Regex example: ``'\r\t'``""" + Delimiter to use. If sep is None, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will + be used and automatically detect the separator by Python's builtin sniffer + tool, ``csv.Sniffer``. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'`` +delimiter : str, default ``None`` + Alternative argument name for sep.""" _read_csv_doc = """ Read CSV (comma-separated) file into DataFrame @@ -332,36 +341,73 @@ widths : list of ints. optional A list of field widths which can be used instead of 'colspecs' if the intervals are contiguous. +delimiter : str, default ``'\t' + ' '`` + Characters to consider as filler characters in the fixed-width file. + Can be used to specify the filler character of the fields + if it is not spaces (e.g., '~'). """ _read_fwf_doc = """ Read a table of fixed-width formatted lines into DataFrame %s - -Also, 'delimiter' is used to specify the filler character of the -fields if it is not spaces (e.g., '~'). """ % (_parser_params % (_fwf_widths, '')) -def _validate_nrows(nrows): +def _validate_integer(name, val, min_val=0): """ - Checks whether the 'nrows' parameter for parsing is either + Checks whether the 'name' parameter for parsing is either an integer OR float that can SAFELY be cast to an integer without losing accuracy. Raises a ValueError if that is not the case. + + Parameters + ---------- + name : string + Parameter name (used for error reporting) + val : int or float + The value to check + min_val : int + Minimum allowed value (val < min_val will result in a ValueError) """ - msg = "'nrows' must be an integer" + msg = "'{name:s}' must be an integer >={min_val:d}".format(name=name, + min_val=min_val) - if nrows is not None: - if is_float(nrows): - if int(nrows) != nrows: + if val is not None: + if is_float(val): + if int(val) != val: raise ValueError(msg) - nrows = int(nrows) - elif not is_integer(nrows): + val = int(val) + elif not (is_integer(val) and val >= min_val): raise ValueError(msg) - return nrows + return val + + +def _validate_names(names): + """ + Check if the `names` parameter contains duplicates. + + If duplicates are found, we issue a warning before returning. + + Parameters + ---------- + names : array-like or None + An array containing a list of the names used for the output DataFrame. + + Returns + ------- + names : array-like or None + The original `names` parameter. + """ + + if names is not None: + if len(names) != len(set(names)): + msg = ("Duplicate names specified. This " + "will raise an error in the future.") + warnings.warn(msg, UserWarning, stacklevel=3) + + return names def _read(filepath_or_buffer, kwds): @@ -373,7 +419,7 @@ def _read(filepath_or_buffer, kwds): compression = kwds.get('compression') compression = _infer_compression(filepath_or_buffer, compression) - filepath_or_buffer, _, compression = get_filepath_or_buffer( + filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( filepath_or_buffer, encoding, compression) kwds['compression'] = compression @@ -383,32 +429,32 @@ def _read(filepath_or_buffer, kwds): # Extract some of the arguments (pass chunksize on). iterator = kwds.get('iterator', False) - chunksize = kwds.get('chunksize', None) - nrows = _validate_nrows(kwds.pop('nrows', None)) + chunksize = _validate_integer('chunksize', kwds.get('chunksize', None), 1) + nrows = kwds.get('nrows', None) + + # Check for duplicates in names. + _validate_names(kwds.get("names", None)) # Create the parser. parser = TextFileReader(filepath_or_buffer, **kwds) - if (nrows is not None) and (chunksize is not None): - raise NotImplementedError("'nrows' and 'chunksize' cannot be used" - " together yet.") - elif nrows is not None: - try: - data = parser.read(nrows) - finally: - parser.close() - return data - - elif chunksize or iterator: + if chunksize or iterator: return parser try: - data = parser.read() + data = parser.read(nrows) finally: parser.close() + if should_close: + try: + filepath_or_buffer.close() + except: # noqa: flake8 + pass + return data + _parser_defaults = { 'delimiter': None, @@ -444,7 +490,7 @@ def _read(filepath_or_buffer, kwds): 'usecols': None, - # 'nrows': None, + 'nrows': None, # 'iterator': False, 'chunksize': None, 'verbose': False, @@ -460,15 +506,12 @@ def _read(filepath_or_buffer, kwds): _c_parser_defaults = { 'delim_whitespace': False, - 'as_recarray': False, 'na_filter': True, - 'compact_ints': False, - 'use_unsigned': False, 'low_memory': True, 'memory_map': False, - 'buffer_lines': None, 'error_bad_lines': True, 'warn_bad_lines': True, + 'tupleize_cols': False, 'float_precision': None } @@ -477,20 +520,18 @@ def _read(filepath_or_buffer, kwds): 'widths': None, } -_c_unsupported = set(['skipfooter']) -_python_unsupported = set([ +_c_unsupported = {'skipfooter'} +_python_unsupported = { 'low_memory', - 'buffer_lines', - 'error_bad_lines', - 'warn_bad_lines', 'float_precision', -]) -_deprecated_args = set([ - 'as_recarray', - 'buffer_lines', - 'compact_ints', - 'use_unsigned', -]) +} + +_deprecated_defaults = { + 'tupleize_cols': None +} +_deprecated_args = { + 'tupleize_cols', +} def _make_parser_function(name, sep=','): @@ -549,23 +590,18 @@ def parser_f(filepath_or_buffer, comment=None, encoding=None, dialect=None, - tupleize_cols=False, + tupleize_cols=None, # Error Handling error_bad_lines=True, warn_bad_lines=True, skipfooter=0, - skip_footer=0, # deprecated # Internal doublequote=True, delim_whitespace=False, - as_recarray=False, - compact_ints=False, - use_unsigned=False, low_memory=_c_parser_defaults['low_memory'], - buffer_lines=None, memory_map=False, float_precision=None): @@ -584,13 +620,6 @@ def parser_f(filepath_or_buffer, engine = 'c' engine_specified = False - if skip_footer != 0: - warnings.warn("The 'skip_footer' argument has " - "been deprecated and will be removed " - "in a future version. Please use the " - "'skipfooter' argument instead.", - FutureWarning, stacklevel=2) - kwds = dict(delimiter=delimiter, engine=engine, dialect=dialect, @@ -625,7 +654,7 @@ def parser_f(filepath_or_buffer, nrows=nrows, iterator=iterator, chunksize=chunksize, - skipfooter=skipfooter or skip_footer, + skipfooter=skipfooter, converters=converters, dtype=dtype, usecols=usecols, @@ -636,14 +665,10 @@ def parser_f(filepath_or_buffer, float_precision=float_precision, na_filter=na_filter, - compact_ints=compact_ints, - use_unsigned=use_unsigned, delim_whitespace=delim_whitespace, - as_recarray=as_recarray, warn_bad_lines=warn_bad_lines, error_bad_lines=error_bad_lines, low_memory=low_memory, - buffer_lines=buffer_lines, mangle_dupe_cols=mangle_dupe_cols, tupleize_cols=tupleize_cols, infer_datetime_format=infer_datetime_format, @@ -655,6 +680,7 @@ def parser_f(filepath_or_buffer, return parser_f + read_csv = _make_parser_function('read_csv', sep=',') read_csv = Appender(_read_csv_doc)(read_csv) @@ -747,10 +773,13 @@ def __init__(self, f, engine=None, **kwds): options = self._get_options_with_defaults(engine) self.chunksize = options.pop('chunksize', None) + self.nrows = options.pop('nrows', None) self.squeeze = options.pop('squeeze', False) # might mutate self.engine + self.engine = self._check_file_or_buffer(f, engine) self.options, self.engine = self._clean_options(options, engine) + if 'has_index_names' in kwds: self.options['has_index_names'] = kwds['has_index_names'] @@ -782,12 +811,14 @@ def _get_options_with_defaults(self, engine): if ('python' in engine and argname not in _python_unsupported): pass + elif value == _deprecated_defaults.get(argname, default): + pass else: raise ValueError( 'The %r option is not supported with the' ' %r engine' % (argname, engine)) else: - value = default + value = _deprecated_defaults.get(argname, default) options[argname] = value if engine == 'python-fwf': @@ -796,6 +827,23 @@ def _get_options_with_defaults(self, engine): return options + def _check_file_or_buffer(self, f, engine): + # see gh-16530 + if is_file_like(f): + next_attr = "__next__" if PY3 else "next" + + # The C engine doesn't need the file-like to have the "next" or + # "__next__" attribute. However, the Python engine explicitly calls + # "next(...)" when iterating through such an object, meaning it + # needs to have that attribute ("next" for Python 2.x, "__next__" + # for Python 3.x) + if engine != "c" and not hasattr(f, next_attr): + msg = ("The 'python' engine cannot iterate " + "through this file buffer.") + raise ValueError(msg) + + return engine + def _clean_options(self, options, engine): result = options.copy() @@ -819,19 +867,19 @@ def _clean_options(self, options, engine): " sep=None with delim_whitespace=False" engine = 'python' elif sep is not None and len(sep) > 1: - if engine == 'c' and sep == '\s+': + if engine == 'c' and sep == r'\s+': result['delim_whitespace'] = True del result['delimiter'] elif engine not in ('python', 'python-fwf'): # wait until regex engine integrated fallback_reason = "the 'c' engine does not support"\ " regex separators (separators > 1 char and"\ - " different from '\s+' are"\ + r" different from '\s+' are"\ " interpreted as regex)" engine = 'python' elif delim_whitespace: if 'python' in engine: - result['delimiter'] = '\s+' + result['delimiter'] = r'\s+' elif sep is not None: encodeable = True try: @@ -887,24 +935,27 @@ def _clean_options(self, options, engine): na_values = options['na_values'] skiprows = options['skiprows'] - # really delete this one - keep_default_na = result.pop('keep_default_na') - _validate_header_arg(options['header']) depr_warning = '' for arg in _deprecated_args: parser_default = _c_parser_defaults[arg] + depr_default = _deprecated_defaults[arg] + msg = ("The '{arg}' argument has been deprecated " "and will be removed in a future version." .format(arg=arg)) - if arg == 'as_recarray': - msg += ' Please call pd.to_csv(...).to_records() instead.' + if arg == 'tupleize_cols': + msg += (' Column tuples will then ' + 'always be converted to MultiIndex.') - if result.get(arg, parser_default) != parser_default: + if result.get(arg, depr_default) != depr_default: + # raise Exception(result.get(arg, depr_default), depr_default) depr_warning += msg + '\n\n' + else: + result[arg] = parser_default if depr_warning != '': warnings.warn(depr_warning, FutureWarning, stacklevel=2) @@ -928,6 +979,7 @@ def _clean_options(self, options, engine): converters = {} # Converting values to NA + keep_default_na = options['keep_default_na'] na_values, na_fvalues = _clean_na_values(na_values, keep_default_na) # handle skiprows; this is internally handled by the @@ -964,21 +1016,24 @@ def _make_engine(self, engine='c'): klass = PythonParser elif engine == 'python-fwf': klass = FixedWidthFieldParser + else: + raise ValueError('Unknown engine: {engine} (valid options are' + ' "c", "python", or' ' "python-fwf")'.format( + engine=engine)) self._engine = klass(self.f, **self.options) def _failover_to_python(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def read(self, nrows=None): + nrows = _validate_integer('nrows', nrows) + if nrows is not None: if self.options.get('skipfooter'): raise ValueError('skipfooter not supported for iteration') ret = self._engine.read(nrows) - if self.options.get('as_recarray'): - return ret - # May alter columns / col_dict index, columns, col_dict = self._create_index(ret) @@ -1007,6 +1062,10 @@ def _create_index(self, ret): def get_chunk(self, size=None): if size is None: size = self.chunksize + if self.nrows is not None: + if self._currow >= self.nrows: + raise StopIteration + size = min(size, self.nrows - self._currow) return self.read(nrows=size) @@ -1014,6 +1073,24 @@ def _is_index_col(col): return col is not None and col is not False +def _is_potential_multi_index(columns): + """ + Check whether or not the `columns` parameter + could be converted into a MultiIndex. + + Parameters + ---------- + columns : array-like + Object which may or may not be convertible into a MultiIndex + + Returns + ------- + boolean : Whether or not columns could become a MultiIndex + """ + return (len(columns) and not isinstance(columns, MultiIndex) and + all(isinstance(c, tuple) for c in columns)) + + def _evaluate_usecols(usecols, names): """ Check whether or not the 'usecols' parameter @@ -1023,11 +1100,73 @@ def _evaluate_usecols(usecols, names): If not a callable, returns 'usecols'. """ if callable(usecols): - return set([i for i, name in enumerate(names) - if usecols(name)]) + return {i for i, name in enumerate(names) if usecols(name)} + return usecols + + +def _validate_usecols_names(usecols, names): + """ + Validates that all usecols are present in a given + list of names. If not, raise a ValueError that + shows what usecols are missing. + + Parameters + ---------- + usecols : iterable of usecols + The columns to validate are present in names. + names : iterable of names + The column names to check against. + + Returns + ------- + usecols : iterable of usecols + The `usecols` parameter if the validation succeeds. + + Raises + ------ + ValueError : Columns were missing. Error message will list them. + """ + missing = [c for c in usecols if c not in names] + if len(missing) > 0: + raise ValueError( + "Usecols do not match columns, " + "columns expected but not found: {missing}".format(missing=missing) + ) + return usecols +def _validate_skipfooter_arg(skipfooter): + """ + Validate the 'skipfooter' parameter. + + Checks whether 'skipfooter' is a non-negative integer. + Raises a ValueError if that is not the case. + + Parameters + ---------- + skipfooter : non-negative integer + The number of rows to skip at the end of the file. + + Returns + ------- + validated_skipfooter : non-negative integer + The original input if the validation succeeds. + + Raises + ------ + ValueError : 'skipfooter' was not a non-negative integer. + """ + + if not is_integer(skipfooter): + raise ValueError("skipfooter must be an integer") + + if skipfooter < 0: + raise ValueError("skipfooter cannot be negative") + + return skipfooter + + def _validate_usecols_arg(usecols): """ Validate the 'usecols' parameter. @@ -1108,9 +1247,11 @@ def __init__(self, kwds): self.na_values = kwds.get('na_values') self.na_fvalues = kwds.get('na_fvalues') + self.na_filter = kwds.get('na_filter', False) + self.keep_default_na = kwds.get('keep_default_na', True) + self.true_values = kwds.get('true_values') self.false_values = kwds.get('false_values') - self.as_recarray = kwds.get('as_recarray', False) self.tupleize_cols = kwds.get('tupleize_cols', False) self.mangle_dupe_cols = kwds.get('mangle_dupe_cols', True) self.infer_datetime_format = kwds.pop('infer_datetime_format', False) @@ -1124,9 +1265,8 @@ def __init__(self, kwds): # validate header options for mi self.header = kwds.get('header') if isinstance(self.header, (list, tuple, np.ndarray)): - if kwds.get('as_recarray'): - raise ValueError("cannot specify as_recarray when " - "specifying a multi-index header") + if not all(map(is_integer, self.header)): + raise ValueError("header must be integer or list of integers") if kwds.get('usecols'): raise ValueError("cannot specify usecols when " "specifying a multi-index header") @@ -1144,6 +1284,10 @@ def __init__(self, kwds): raise ValueError("index_col must only contain row numbers " "when specifying a multi-index header") + # GH 16338 + elif self.header is not None and not is_integer(self.header): + raise ValueError("header must be integer or list of integers") + self._name_processed = False self._first_chunk = True @@ -1167,13 +1311,18 @@ def _should_parse_dates(self, i): if isinstance(self.parse_dates, bool): return self.parse_dates else: - name = self.index_names[i] + if self.index_names is not None: + name = self.index_names[i] + else: + name = None j = self.index_col[i] if is_scalar(self.parse_dates): - return (j == self.parse_dates) or (name == self.parse_dates) + return ((j == self.parse_dates) or + (name is not None and name == self.parse_dates)) else: - return (j in self.parse_dates) or (name in self.parse_dates) + return ((j in self.parse_dates) or + (name is not None and name in self.parse_dates)) def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_names=False): @@ -1202,7 +1351,7 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, field_count = len(header[0]) def extract(r): - return tuple([r[i] for i in range(field_count) if i not in sic]) + return tuple(r[i] for i in range(field_count) if i not in sic) columns = lzip(*[extract(r) for r in header]) names = ic + columns @@ -1213,11 +1362,11 @@ def tostr(x): # if we find 'Unnamed' all of a single level, then our header was too # long for n in range(len(columns[0])): - if all(['Unnamed' in tostr(c[n]) for c in columns]): + if all('Unnamed' in tostr(c[n]) for c in columns): raise ParserError( "Passed header=[%s] are too many rows for this " "multi_index of columns" - % ','.join([str(x) for x in self.header]) + % ','.join(str(x) for x in self.header) ) # clean the column names (if we have an index_col) @@ -1239,23 +1388,29 @@ def _maybe_dedup_names(self, names): # would be nice! if self.mangle_dupe_cols: names = list(names) # so we can index - counts = {} + counts = defaultdict(int) + is_potential_mi = _is_potential_multi_index(names) for i, col in enumerate(names): - cur_count = counts.get(col, 0) + cur_count = counts[col] + + while cur_count > 0: + counts[col] = cur_count + 1 - if cur_count > 0: - names[i] = '%s.%d' % (col, cur_count) + if is_potential_mi: + col = col[:-1] + ('%s.%d' % (col[-1], cur_count),) + else: + col = '%s.%d' % (col, cur_count) + cur_count = counts[col] + names[i] = col counts[col] = cur_count + 1 return names def _maybe_make_multi_index_columns(self, columns, col_names=None): # possibly create a column mi here - if (not self.tupleize_cols and len(columns) and - not isinstance(columns, MultiIndex) and - all([isinstance(c, tuple) for c in columns])): + if _is_potential_multi_index(columns): columns = MultiIndex.from_tuples(columns, names=col_names) return columns @@ -1266,7 +1421,6 @@ def _make_index(self, data, alldata, columns, indexnamerow=False): elif not self._has_complex_date_col: index = self._get_simple_index(alldata, columns) index = self._agg_index(index) - elif self._has_complex_date_col: if not self._name_processed: (self.index_names, _, @@ -1293,7 +1447,6 @@ def ix(col): if not isinstance(col, compat.string_types): return col raise ValueError('Index %s invalid' % col) - index = None to_remove = [] index = [] @@ -1324,8 +1477,6 @@ def _get_name(icol): if i == icol: return c - index = None - to_remove = [] index = [] for idx in self.index_col: @@ -1343,24 +1494,31 @@ def _get_name(icol): def _agg_index(self, index, try_parse_dates=True): arrays = [] + for i, arr in enumerate(index): - if (try_parse_dates and self._should_parse_dates(i)): + if try_parse_dates and self._should_parse_dates(i): arr = self._date_conv(arr) - col_na_values = self.na_values - col_na_fvalues = self.na_fvalues + if self.na_filter: + col_na_values = self.na_values + col_na_fvalues = self.na_fvalues + else: + col_na_values = set() + col_na_fvalues = set() if isinstance(self.na_values, dict): col_name = self.index_names[i] if col_name is not None: col_na_values, col_na_fvalues = _get_na_values( - col_name, self.na_values, self.na_fvalues) + col_name, self.na_values, self.na_fvalues, + self.keep_default_na) arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) - index = MultiIndex.from_arrays(arrays, names=self.index_names) + names = self.index_names + index = _ensure_index_from_sequences(arrays, names) return index @@ -1377,7 +1535,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, if self.na_filter: col_na_values, col_na_fvalues = _get_na_values( - c, na_values, na_fvalues) + c, na_values, na_fvalues, self.keep_default_na) else: col_na_values, col_na_fvalues = set(), set() @@ -1392,7 +1550,8 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, try: values = lib.map_infer(values, conv_f) except ValueError: - mask = lib.ismember(values, na_values).view(np.uint8) + mask = algorithms.isin( + values, list(na_values)).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types( @@ -1407,15 +1566,10 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, values, set(col_na_values) | col_na_fvalues, try_num_bool) - # type specificed in dtype param + # type specified in dtype param if cast_type and not is_dtype_equal(cvals, cast_type): cvals = self._cast_types(cvals, cast_type, c) - if issubclass(cvals.dtype.type, np.integer) and self.compact_ints: - cvals = lib.downcast_int64( - cvals, _parser.na_values, - self.use_unsigned) - result[c] = cvals if verbose and na_count: print('Filled %d NA values in column %s' % (na_count, str(c))) @@ -1440,7 +1594,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): na_count = 0 if issubclass(values.dtype.type, (np.number, np.bool_)): - mask = lib.ismember(values, na_values) + mask = algorithms.isin(values, list(na_values)) na_count = mask.sum() if na_count > 0: if is_integer_dtype(values): @@ -1451,20 +1605,21 @@ def _infer_types(self, values, na_values, try_num_bool=True): if try_num_bool: try: result = lib.maybe_convert_numeric(values, na_values, False) - na_count = isnull(result).sum() + na_count = isna(result).sum() except Exception: result = values if values.dtype == np.object_: - na_count = lib.sanitize_objects(result, na_values, False) + na_count = parsers.sanitize_objects(result, na_values, + False) else: result = values if values.dtype == np.object_: - na_count = lib.sanitize_objects(values, na_values, False) + na_count = parsers.sanitize_objects(values, na_values, False) if result.dtype == np.object_ and try_num_bool: - result = lib.maybe_convert_bool(values, - true_values=self.true_values, - false_values=self.false_values) + result = libops.maybe_convert_bool(values, + true_values=self.true_values, + false_values=self.false_values) return result, na_count @@ -1486,15 +1641,23 @@ def _cast_types(self, values, cast_type, column): """ if is_categorical_dtype(cast_type): - # XXX this is for consistency with - # c-parser which parses all categories - # as strings - if not is_object_dtype(values): - values = _astype_nansafe(values, str) - values = Categorical(values) + known_cats = (isinstance(cast_type, CategoricalDtype) and + cast_type.categories is not None) + + if not is_object_dtype(values) and not known_cats: + # XXX this is for consistency with + # c-parser which parses all categories + # as strings + values = astype_nansafe(values, str) + + cats = Index(values).unique().dropna() + values = Categorical._from_inferred_categories( + cats, cats.get_indexer(values), cast_type + ) + else: try: - values = _astype_nansafe(values, cast_type, copy=True) + values = astype_nansafe(values, cast_type, copy=True) except ValueError: raise ValueError("Unable to convert column %s to " "type %s" % (column, cast_type)) @@ -1502,6 +1665,7 @@ def _cast_types(self, values, cast_type, column): def _do_date_conversions(self, names, data): # returns data, columns + if self.parse_dates is not None: data, names = _process_date_conversion( data, self._date_conv, self.parse_dates, self.index_col, @@ -1521,7 +1685,9 @@ def __init__(self, src, **kwds): ParserBase.__init__(self, kwds) - if 'utf-16' in (kwds.get('encoding') or ''): + if (kwds.get('compression') is None + and 'utf-16' in (kwds.get('encoding') or '')): + # if source is utf-16 plain text, convert source to utf-8 if isinstance(src, compat.string_types): src = open(src, 'rb') self.handles.append(src) @@ -1531,7 +1697,7 @@ def __init__(self, src, **kwds): # #2442 kwds['allow_leading_cols'] = self.index_col is not False - self._reader = _parser.TextReader(src, **kwds) + self._reader = parsers.TextReader(src, **kwds) # XXX self.usecols, self.usecols_dtype = _validate_usecols_arg( @@ -1572,12 +1738,18 @@ def __init__(self, src, **kwds): if self.usecols: usecols = _evaluate_usecols(self.usecols, self.orig_names) + + # GH 14671 + if (self.usecols_dtype == 'string' and + not set(usecols).issubset(self.orig_names)): + _validate_usecols_names(usecols, self.orig_names) + if len(self.names) > len(usecols): self.names = [n for i, n in enumerate(self.names) if (i in usecols or n in usecols)] if len(self.names) < len(usecols): - raise ValueError("Usecols do not match names.") + _validate_usecols_names(usecols, self.names) self._set_noconvert_columns() @@ -1622,6 +1794,7 @@ def _set_noconvert_columns(self): # A set of integers will be converted to a list in # the correct order every single time. usecols = list(self.usecols) + usecols.sort() elif (callable(self.usecols) or self.usecols_dtype not in ('empty', None)): # The names attribute should have the correct columns @@ -1693,10 +1866,6 @@ def read(self, nrows=None): # Done with first read, next time raise StopIteration self._first_chunk = False - if self.as_recarray: - # what to do if there are leading columns? - return data - names = self.names if self._reader.leading_cols: @@ -1716,7 +1885,7 @@ def read(self, nrows=None): try_parse_dates=True) arrays.append(values) - index = MultiIndex.from_arrays(arrays) + index = _ensure_index_from_sequences(arrays) if self.usecols is not None: names = self._filter_usecols(names) @@ -1725,7 +1894,7 @@ def read(self, nrows=None): # rename dict keys data = sorted(data.items()) - data = dict((k, v) for k, (i, v) in zip(names, data)) + data = {k: v for k, (i, v) in zip(names, data)} names, data = self._do_date_conversions(names, data) @@ -1743,7 +1912,7 @@ def read(self, nrows=None): # columns as list alldata = [x[1] for x in data] - data = dict((k, v) for k, (i, v) in zip(names, data)) + data = {k: v for k, (i, v) in zip(names, data)} names, data = self._do_date_conversions(names, data) index, names = self._make_index(data, alldata, names) @@ -1836,7 +2005,7 @@ def TextParser(*args, **kwds): def count_empty_vals(vals): - return sum([1 for v in vals if v == '' or v is None]) + return sum(1 for v in vals if v == '' or v is None) class PythonParser(ParserBase): @@ -1864,7 +2033,7 @@ def __init__(self, f, **kwds): else: self.skipfunc = lambda x: x in self.skiprows - self.skipfooter = kwds['skipfooter'] + self.skipfooter = _validate_skipfooter_arg(kwds['skipfooter']) self.delimiter = kwds['delimiter'] self.quotechar = kwds['quotechar'] @@ -1879,9 +2048,10 @@ def __init__(self, f, **kwds): self.usecols, _ = _validate_usecols_arg(kwds['usecols']) self.skip_blank_lines = kwds['skip_blank_lines'] - self.names_passed = kwds['names'] or None + self.warn_bad_lines = kwds['warn_bad_lines'] + self.error_bad_lines = kwds['error_bad_lines'] - self.na_filter = kwds['na_filter'] + self.names_passed = kwds['names'] or None self.has_index_names = False if 'has_index_names' in kwds: @@ -1891,15 +2061,14 @@ def __init__(self, f, **kwds): self.converters = kwds['converters'] self.dtype = kwds['dtype'] - self.compact_ints = kwds['compact_ints'] - self.use_unsigned = kwds['use_unsigned'] self.thousands = kwds['thousands'] self.decimal = kwds['decimal'] self.comment = kwds['comment'] self._comment_lines = [] - f, handles = _get_handle(f, 'r', encoding=self.encoding, + mode = 'r' if PY3 else 'rb' + f, handles = _get_handle(f, mode, encoding=self.encoding, compression=self.compression, memory_map=self.memory_map) self.handles.extend(handles) @@ -1911,7 +2080,7 @@ def __init__(self, f, **kwds): self.data = f # Get columns in two steps: infer from data, then - # infer column indices from self.usecols if is is specified. + # infer column indices from self.usecols if it is specified. self._col_indices = None self.columns, self.num_original_columns = self._infer_columns() @@ -2097,9 +2266,6 @@ def read(self, rows=None): columns, data = self._do_date_conversions(columns, data) data = self._convert_data(data) - if self.as_recarray: - return self._to_recarray(data, columns) - index, columns = self._make_index(data, alldata, columns, indexnamerow) return index, columns, data @@ -2117,7 +2283,7 @@ def _exclude_implicit_index(self, alldata): offset += 1 data[col] = alldata[i + offset] else: - data = dict((k, v) for k, v in zip(names, alldata)) + data = {k: v for k, v in zip(names, alldata)} return data @@ -2125,7 +2291,7 @@ def _exclude_implicit_index(self, alldata): def get_chunk(self, size=None): if size is None: size = self.chunksize - return self.read(nrows=size) + return self.read(rows=size) def _convert_data(self, data): # apply converters @@ -2167,19 +2333,6 @@ def _clean_mapping(mapping): clean_na_fvalues, self.verbose, clean_conv, clean_dtypes) - def _to_recarray(self, data, columns): - dtypes = [] - o = compat.OrderedDict() - - # use the columns to "order" the keys - # in the unordered 'data' dictionary - for col in columns: - dtypes.append((str(col), data[col].dtype)) - o[col] = data[col] - - tuples = lzip(*o.values()) - return np.array(tuples, dtypes) - def _infer_columns(self): names = self.names num_original_columns = 0 @@ -2187,10 +2340,11 @@ def _infer_columns(self): if self.header is not None: header = self.header - # we have a mi columns, so read an extra line if isinstance(header, (list, tuple, np.ndarray)): - have_mi_columns = True - header = list(header) + [header[-1] + 1] + have_mi_columns = len(header) > 1 + # we have a mi columns, so read an extra line + if have_mi_columns: + header = list(header) + [header[-1] + 1] else: have_mi_columns = False header = [header] @@ -2238,11 +2392,17 @@ def _infer_columns(self): this_columns.append(c) if not have_mi_columns and self.mangle_dupe_cols: - counts = {} + counts = defaultdict(int) + for i, col in enumerate(this_columns): - cur_count = counts.get(col, 0) - if cur_count > 0: - this_columns[i] = '%s.%d' % (col, cur_count) + cur_count = counts[col] + + while cur_count > 0: + counts[col] = cur_count + 1 + col = "%s.%d" % (col, cur_count) + cur_count = counts[col] + + this_columns[i] = col counts[col] = cur_count + 1 elif have_mi_columns: @@ -2334,14 +2494,18 @@ def _handle_usecols(self, columns, usecols_key): if self.usecols is not None: if callable(self.usecols): col_indices = _evaluate_usecols(self.usecols, usecols_key) - elif any([isinstance(u, string_types) for u in self.usecols]): + elif any(isinstance(u, string_types) for u in self.usecols): if len(columns) > 1: raise ValueError("If using multiple headers, usecols must " "be integers.") col_indices = [] + for col in self.usecols: if isinstance(col, string_types): - col_indices.append(usecols_key.index(col)) + try: + col_indices.append(usecols_key.index(col)) + except ValueError: + _validate_usecols_names(self.usecols, usecols_key) else: col_indices.append(col) else: @@ -2422,7 +2586,19 @@ def _check_for_bom(self, first_row): # return an empty string. return [""] - def _empty(self, line): + def _is_line_empty(self, line): + """ + Check if a line is empty or not. + + Parameters + ---------- + line : str, array-like + The line of data to check. + + Returns + ------- + boolean : Whether or not the line is empty. + """ return not line or all(not x for x in line) def _next_line(self): @@ -2435,11 +2611,12 @@ def _next_line(self): line = self._check_comments([self.data[self.pos]])[0] self.pos += 1 # either uncommented or blank to begin with - if not self.skip_blank_lines and (self._empty(self.data[ - self.pos - 1]) or line): + if (not self.skip_blank_lines and + (self._is_line_empty( + self.data[self.pos - 1]) or line)): break elif self.skip_blank_lines: - ret = self._check_empty([line]) + ret = self._remove_empty_lines([line]) if ret: line = ret[0] break @@ -2451,35 +2628,19 @@ def _next_line(self): next(self.data) while True: - try: - orig_line = next(self.data) - except csv.Error as e: - msg = str(e) - - if 'NULL byte' in str(e): - msg = ('NULL byte detected. This byte ' - 'cannot be processed in Python\'s ' - 'native csv library at the moment, ' - 'so please pass in engine=\'c\' instead') - - if self.skipfooter > 0: - reason = ('Error could possibly be due to ' - 'parsing errors in the skipped footer rows ' - '(the skipfooter keyword is only applied ' - 'after Python\'s csv library has parsed ' - 'all rows).') - msg += '. ' + reason - - raise csv.Error(msg) - line = self._check_comments([orig_line])[0] + orig_line = self._next_iter_line(row_num=self.pos + 1) self.pos += 1 - if (not self.skip_blank_lines and - (self._empty(orig_line) or line)): - break - elif self.skip_blank_lines: - ret = self._check_empty([line]) - if ret: - line = ret[0] + + if orig_line is not None: + line = self._check_comments([orig_line])[0] + + if self.skip_blank_lines: + ret = self._remove_empty_lines([line]) + + if ret: + line = ret[0] + break + elif self._is_line_empty(orig_line) or line: break # This was the first line of the file, @@ -2492,6 +2653,66 @@ def _next_line(self): self.buf.append(line) return line + def _alert_malformed(self, msg, row_num): + """ + Alert a user about a malformed row. + + If `self.error_bad_lines` is True, the alert will be `ParserError`. + If `self.warn_bad_lines` is True, the alert will be printed out. + + Parameters + ---------- + msg : The error message to display. + row_num : The row number where the parsing error occurred. + Because this row number is displayed, we 1-index, + even though we 0-index internally. + """ + + if self.error_bad_lines: + raise ParserError(msg) + elif self.warn_bad_lines: + base = 'Skipping line {row_num}: '.format(row_num=row_num) + sys.stderr.write(base + msg + '\n') + + def _next_iter_line(self, row_num): + """ + Wrapper around iterating through `self.data` (CSV source). + + When a CSV error is raised, we check for specific + error messages that allow us to customize the + error message displayed to the user. + + Parameters + ---------- + row_num : The row number of the line being parsed. + """ + + try: + return next(self.data) + except csv.Error as e: + if self.warn_bad_lines or self.error_bad_lines: + msg = str(e) + + if 'NULL byte' in msg: + msg = ('NULL byte detected. This byte ' + 'cannot be processed in Python\'s ' + 'native csv library at the moment, ' + 'so please pass in engine=\'c\' instead') + elif 'newline inside string' in msg: + msg = ('EOF inside string starting with ' + 'line ' + str(row_num)) + + if self.skipfooter > 0: + reason = ('Error could possibly be due to ' + 'parsing errors in the skipped footer rows ' + '(the skipfooter keyword is only applied ' + 'after Python\'s csv library has parsed ' + 'all rows).') + msg += '. ' + reason + + self._alert_malformed(msg, row_num) + return None + def _check_comments(self, lines): if self.comment is None: return lines @@ -2510,7 +2731,22 @@ def _check_comments(self, lines): ret.append(rl) return ret - def _check_empty(self, lines): + def _remove_empty_lines(self, lines): + """ + Iterate through the lines and remove any that are + either empty or contain only one whitespace value + + Parameters + ---------- + lines : array-like + The array of lines that we are to filter. + + Returns + ------- + filtered_lines : array-like + The same array of lines with the "empty" ones removed. + """ + ret = [] for l in lines: # Remove empty lines and lines with only one whitespace value @@ -2626,37 +2862,51 @@ def _rows_to_cols(self, content): if self._implicit_index: col_len += len(self.index_col) - # see gh-13320 - zipped_content = list(lib.to_object_array( - content, min_width=col_len).T) - zip_len = len(zipped_content) - - if self.skipfooter < 0: - raise ValueError('skip footer cannot be negative') + max_len = max(len(row) for row in content) - # Loop through rows to verify lengths are correct. - if (col_len != zip_len and + # Check that there are no rows with too many + # elements in their row (rows with too few + # elements are padded with NaN). + if (max_len > col_len and self.index_col is not False and self.usecols is None): - i = 0 - for (i, l) in enumerate(content): - if len(l) != col_len: - break - footers = 0 - if self.skipfooter: - footers = self.skipfooter + footers = self.skipfooter if self.skipfooter else 0 + bad_lines = [] - row_num = self.pos - (len(content) - i + footers) + iter_content = enumerate(content) + content_len = len(content) + content = [] - msg = ('Expected %d fields in line %d, saw %d' % - (col_len, row_num + 1, zip_len)) - if len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: - # see gh-13374 - reason = ('Error could possibly be due to quotes being ' - 'ignored when a multi-char delimiter is used.') - msg += '. ' + reason - raise ValueError(msg) + for (i, l) in iter_content: + actual_len = len(l) + + if actual_len > col_len: + if self.error_bad_lines or self.warn_bad_lines: + row_num = self.pos - (content_len - i + footers) + bad_lines.append((row_num, actual_len)) + + if self.error_bad_lines: + break + else: + content.append(l) + + for row_num, actual_len in bad_lines: + msg = ('Expected %d fields in line %d, saw %d' % + (col_len, row_num + 1, actual_len)) + if (self.delimiter and + len(self.delimiter) > 1 and + self.quoting != csv.QUOTE_NONE): + # see gh-13374 + reason = ('Error could possibly be due to quotes being ' + 'ignored when a multi-char delimiter is used.') + msg += '. ' + reason + + self._alert_malformed(msg, row_num + 1) + + # see gh-13320 + zipped_content = list(lib.to_object_array( + content, min_width=col_len).T) if self.usecols: if self._implicit_index: @@ -2670,7 +2920,6 @@ def _rows_to_cols(self, content): return zipped_content def _get_lines(self, rows=None): - source = self.data lines = self.buf new_rows = None @@ -2685,14 +2934,14 @@ def _get_lines(self, rows=None): rows -= len(self.buf) if new_rows is None: - if isinstance(source, list): - if self.pos > len(source): + if isinstance(self.data, list): + if self.pos > len(self.data): raise StopIteration if rows is None: - new_rows = source[self.pos:] - new_pos = len(source) + new_rows = self.data[self.pos:] + new_pos = len(self.data) else: - new_rows = source[self.pos:self.pos + rows] + new_rows = self.data[self.pos:self.pos + rows] new_pos = self.pos + rows # Check for stop rows. n.b.: self.skiprows is a set. @@ -2708,21 +2957,19 @@ def _get_lines(self, rows=None): try: if rows is not None: for _ in range(rows): - new_rows.append(next(source)) + new_rows.append(next(self.data)) lines.extend(new_rows) else: rows = 0 + while True: - try: - new_rows.append(next(source)) - rows += 1 - except csv.Error as inst: - if 'newline inside string' in str(inst): - row_num = str(self.pos + rows) - msg = ('EOF inside string starting with ' - 'line ' + row_num) - raise Exception(msg) - raise + new_row = self._next_iter_line( + row_num=self.pos + rows + 1) + rows += 1 + + if new_row is not None: + new_rows.append(new_row) + except StopIteration: if self.skiprows: new_rows = [row for i, row in enumerate(new_rows) @@ -2741,7 +2988,7 @@ def _get_lines(self, rows=None): lines = self._check_comments(lines) if self.skip_blank_lines: - lines = self._check_empty(lines) + lines = self._remove_empty_lines(lines) lines = self._check_thousands(lines) return self._check_decimal(lines) @@ -2763,7 +3010,7 @@ def converter(*date_cols): ) except: return tools.to_datetime( - lib.try_parse_dates(strs, dayfirst=dayfirst)) + parsing.try_parse_dates(strs, dayfirst=dayfirst)) else: try: result = tools.to_datetime( @@ -2774,9 +3021,9 @@ def converter(*date_cols): except Exception: try: return tools.to_datetime( - lib.try_parse_dates(_concat_date_cols(date_cols), - parser=date_parser, - dayfirst=dayfirst), + parsing.try_parse_dates(_concat_date_cols(date_cols), + parser=date_parser, + dayfirst=dayfirst), errors='ignore') except Exception: return generic_parser(date_parser, *date_cols) @@ -2856,11 +3103,11 @@ def _try_convert_dates(parser, colspec, data_dict, columns): if c in colset: colnames.append(c) elif isinstance(c, int) and c not in columns: - colnames.append(str(columns[c])) + colnames.append(columns[c]) else: colnames.append(c) - new_name = '_'.join([str(x) for x in colnames]) + new_name = '_'.join(str(x) for x in colnames) to_parse = [data_dict[c] for c in colnames if c in data_dict] new_col = parser(*to_parse) @@ -2873,19 +3120,26 @@ def _clean_na_values(na_values, keep_default_na=True): if keep_default_na: na_values = _NA_VALUES else: - na_values = [] + na_values = set() na_fvalues = set() elif isinstance(na_values, dict): - na_values = na_values.copy() # Prevent aliasing. - if keep_default_na: - for k, v in compat.iteritems(na_values): - if not is_list_like(v): - v = [v] + old_na_values = na_values.copy() + na_values = {} # Prevent aliasing. + + # Convert the values in the na_values dictionary + # into array-likes for further use. This is also + # where we append the default NaN values, provided + # that `keep_default_na=True`. + for k, v in compat.iteritems(old_na_values): + if not is_list_like(v): + v = [v] + + if keep_default_na: v = set(v) | _NA_VALUES - na_values[k] = v - na_fvalues = dict([ - (k, _floatify_na_values(v)) for k, v in na_values.items() # noqa - ]) + + na_values[k] = v + na_fvalues = dict((k, _floatify_na_values(v)) + for k, v in na_values.items()) else: if not is_list_like(na_values): na_values = [na_values] @@ -2954,16 +3208,14 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): if index_col is None or index_col is False: index = Index([]) else: - index = [Series([], dtype=dtype[index_name]) - for index_name in index_names] - index = MultiIndex.from_arrays(index, names=index_names) + data = [Series([], dtype=dtype[name]) for name in index_names] + index = _ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) - col_dict = dict((col_name, - Series([], dtype=dtype[col_name])) - for col_name in columns) + col_dict = {col_name: Series([], dtype=dtype[col_name]) + for col_name in columns} return index, columns, col_dict @@ -3006,12 +3258,38 @@ def _stringify_na_values(na_values): return set(result) -def _get_na_values(col, na_values, na_fvalues): +def _get_na_values(col, na_values, na_fvalues, keep_default_na): + """ + Get the NaN values for a given column. + + Parameters + ---------- + col : str + The name of the column. + na_values : array-like, dict + The object listing the NaN values as strings. + na_fvalues : array-like, dict + The object listing the NaN values as floats. + keep_default_na : bool + If `na_values` is a dict, and the column is not mapped in the + dictionary, whether to return the default NaN values or the empty set. + + Returns + ------- + nan_tuple : A length-two tuple composed of + + 1) na_values : the string NaN values for that column. + 2) na_fvalues : the float NaN values for that column. + """ + if isinstance(na_values, dict): if col in na_values: return na_values[col], na_fvalues[col] else: - return _NA_VALUES, set() + if keep_default_na: + return _NA_VALUES, set() + + return set(), set() else: return na_values, na_fvalues @@ -3038,7 +3316,7 @@ def _concat_date_cols(date_cols): for x in date_cols[0] ], dtype=object) - rs = np.array([' '.join([compat.text_type(y) for y in x]) + rs = np.array([' '.join(compat.text_type(y) for y in x) for x in zip(*date_cols)], dtype=object) return rs @@ -3109,7 +3387,7 @@ def get_rows(self, n, skiprows=None): def detect_colspecs(self, n=100, skiprows=None): # Regex escape the delimiters - delimiters = ''.join([r'\%s' % x for x in self.delimiter]) + delimiters = ''.join(r'\%s' % x for x in self.delimiter) pattern = re.compile('([^%s]+)' % delimiters) rows = self.get_rows(n, skiprows) if not rows: diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 2358c296f782e..756096dd0c9ce 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,12 +1,14 @@ """ pickle compat """ +import warnings import numpy as np from numpy.lib.format import read_array, write_array from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 -from pandas.types.common import is_datetime64_dtype, _NS_DTYPE +from pandas.core.dtypes.common import is_datetime64_dtype, _NS_DTYPE +from pandas.io.common import _get_handle, _infer_compression, _stringify_path -def to_pickle(obj, path): +def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): """ Pickle (serialize) object to input file path @@ -15,28 +17,75 @@ def to_pickle(obj, path): obj : any object path : string File path + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' + a string representing the compression to use in the output file + + .. versionadded:: 0.20.0 + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible + values for this parameter depend on the version of Python. For Python + 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. + For Python >= 3.4, 4 is a valid value. A negative value for the + protocol parameter is equivalent to setting its value to + HIGHEST_PROTOCOL. + + .. [1] https://docs.python.org/3/library/pickle.html + .. versionadded:: 0.21.0 + + """ - with open(path, 'wb') as f: - pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL) + path = _stringify_path(path) + inferred_compression = _infer_compression(path, compression) + f, fh = _get_handle(path, 'wb', + compression=inferred_compression, + is_text=False) + if protocol < 0: + protocol = pkl.HIGHEST_PROTOCOL + try: + pkl.dump(obj, f, protocol=protocol) + finally: + for _f in fh: + _f.close() -def read_pickle(path): +def read_pickle(path, compression='infer'): """ Load pickled pandas object (or any other pickled object) from the specified file path Warning: Loading pickled data received from untrusted sources can be - unsafe. See: http://docs.python.org/2.7/library/pickle.html + unsafe. See: https://docs.python.org/3/library/pickle.html Parameters ---------- path : string File path + compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer', then use + gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz', + or '.zip' respectively, and no decompression otherwise. + Set to None for no decompression. + + .. versionadded:: 0.20.0 Returns ------- unpickled : type of object stored in file """ + path = _stringify_path(path) + inferred_compression = _infer_compression(path, compression) + + def read_wrapper(func): + # wrapper file handle open/close operation + f, fh = _get_handle(path, 'rb', + compression=inferred_compression, + is_text=False) + try: + return func(f) + finally: + for _f in fh: + _f.close() def try_read(path, encoding=None): # try with cPickle @@ -48,19 +97,18 @@ def try_read(path, encoding=None): # cpickle # GH 6899 try: - with open(path, 'rb') as fh: - return pkl.load(fh) + with warnings.catch_warnings(record=True): + # We want to silencce any warnings about, e.g. moved modules. + return read_wrapper(lambda f: pkl.load(f)) except Exception: # reg/patched pickle try: - with open(path, 'rb') as fh: - return pc.load(fh, encoding=encoding, compat=False) - + return read_wrapper( + lambda f: pc.load(f, encoding=encoding, compat=False)) # compat pickle except: - with open(path, 'rb') as fh: - return pc.load(fh, encoding=encoding, compat=True) - + return read_wrapper( + lambda f: pc.load(f, encoding=encoding, compat=True)) try: return try_read(path) except: @@ -68,6 +116,7 @@ def try_read(path, encoding=None): return try_read(path, encoding='latin1') raise + # compat with sparse pickle / unpickle diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9224f7d3d9a94..2437b7d396e84 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -12,31 +12,32 @@ import warnings import os -from pandas.types.common import (is_list_like, - is_categorical_dtype, - is_timedelta64_dtype, - is_datetime64tz_dtype, - is_datetime64_dtype, - _ensure_object, - _ensure_int64, - _ensure_platform_int) -from pandas.types.missing import array_equivalent +from pandas.core.dtypes.common import ( + is_list_like, + is_categorical_dtype, + is_timedelta64_dtype, + is_datetime64tz_dtype, + is_datetime64_dtype, + _ensure_object, + _ensure_int64, + _ensure_platform_int) +from pandas.core.dtypes.missing import array_equivalent import numpy as np - -import pandas as pd -from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, isnull, concat, +from pandas import (Series, DataFrame, Panel, Index, + MultiIndex, Int64Index, isna, concat, to_datetime, SparseSeries, SparseDataFrame, PeriodIndex, DatetimeIndex, TimedeltaIndex) from pandas.core import config from pandas.io.common import _stringify_path -from pandas.sparse.array import BlockIndex, IntIndex +from pandas.core.sparse.array import BlockIndex, IntIndex from pandas.core.base import StringMixin -from pandas.formats.printing import adjoin, pprint_thing -from pandas.core.common import _asarray_tuplesafe, PerformanceWarning +from pandas.io.formats.printing import adjoin, pprint_thing +from pandas.errors import PerformanceWarning +import pandas.core.common as com from pandas.core.algorithms import match, unique -from pandas.core.categorical import Categorical, _factorize_from_iterables +from pandas.core.arrays.categorical import (Categorical, + _factorize_from_iterables) from pandas.core.internals import (BlockManager, make_block, _block2d_to_blocknd, _factor_indexer, _block_shape) @@ -44,11 +45,10 @@ from pandas import compat from pandas.compat import u_safe as u, PY3, range, lrange, string_types, filter from pandas.core.config import get_option -from pandas.computation.pytables import Expr, maybe_expression +from pandas.core.computation.pytables import Expr, maybe_expression -import pandas.lib as lib -import pandas.algos as algos -import pandas.tslib as tslib +from pandas._libs import algos, lib, writers as libwriters +from pandas._libs.tslibs import timezones from distutils.version import LooseVersion @@ -74,6 +74,19 @@ def _ensure_encoding(encoding): encoding = _default_encoding return encoding + +def _ensure_str(name): + """Ensure that an index / column name is a str (python 3) or + unicode (python 2); otherwise they may be np.string dtype. + Non-string dtypes are passed through unchanged. + + https://github.com/pandas-dev/pandas/issues/13492 + """ + if isinstance(name, compat.string_types): + name = compat.text_type(name) + return name + + Term = Expr @@ -85,7 +98,7 @@ def _ensure_term(where, scope_level): create the terms here with a frame_level=2 (we are 2 levels down) """ - # only consider list/tuple here as an ndarray is automaticaly a coordinate + # only consider list/tuple here as an ndarray is automatically a coordinate # list level = scope_level + 1 if isinstance(where, (list, tuple)): @@ -112,6 +125,7 @@ class ClosedFileError(Exception): class IncompatibilityWarning(Warning): pass + incompatibility_doc = """ where criteria is being ignored as this version [%s] is too old (or not-defined), read the file in and write it out to a new file to upgrade (with @@ -122,6 +136,7 @@ class IncompatibilityWarning(Warning): class AttributeConflictWarning(Warning): pass + attribute_conflict_doc = """ the [%s] attribute of the existing index is [%s] which conflicts with the new [%s], resetting the attribute to None @@ -131,6 +146,7 @@ class AttributeConflictWarning(Warning): class DuplicateWarning(Warning): pass + duplicate_doc = """ duplicate entries in table, taking most recently appended """ @@ -162,16 +178,13 @@ class DuplicateWarning(Warning): Series: u('series'), SparseSeries: u('sparse_series'), - pd.TimeSeries: u('series'), DataFrame: u('frame'), SparseDataFrame: u('sparse_frame'), Panel: u('wide'), - Panel4D: u('ndim'), } # storer class map _STORER_MAP = { - u('TimeSeries'): 'LegacySeriesFixed', u('Series'): 'LegacySeriesFixed', u('DataFrame'): 'LegacyFrameFixed', u('DataMatrix'): 'LegacyFrameFixed', @@ -190,7 +203,6 @@ class DuplicateWarning(Warning): u('appendable_frame'): 'AppendableFrameTable', u('appendable_multiframe'): 'AppendableMultiFrameTable', u('appendable_panel'): 'AppendablePanelTable', - u('appendable_ndim'): 'AppendableNDimTable', u('worm'): 'WORMTable', u('legacy_frame'): 'LegacyFrameTable', u('legacy_panel'): 'LegacyPanelTable', @@ -199,8 +211,7 @@ class DuplicateWarning(Warning): # axes map _AXES_MAP = { DataFrame: [0], - Panel: [1, 2], - Panel4D: [1, 2, 3], + Panel: [1, 2] } # register our configuration options @@ -235,7 +246,7 @@ def _tables(): _table_mod = tables # version requirements - if LooseVersion(tables.__version__) < '3.0.0': + if LooseVersion(tables.__version__) < LooseVersion('3.0.0'): raise ImportError("PyTables version >= 3.0.0 is required") # set the file open policy @@ -270,7 +281,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, f(path_or_buf) -def read_hdf(path_or_buf, key=None, **kwargs): +def read_hdf(path_or_buf, key=None, mode='r', **kwargs): """ read from the store, close it if we opened it Retrieve pandas object stored in file, optionally based on where @@ -278,14 +289,17 @@ def read_hdf(path_or_buf, key=None, **kwargs): Parameters ---------- - path_or_buf : path (string), buffer, or path object (pathlib.Path or - py._path.local.LocalPath) to read from + path_or_buf : path (string), buffer or path object (pathlib.Path or + py._path.local.LocalPath) designating the file to open, or an + already opened pd.HDFStore object .. versionadded:: 0.19.0 support for pathlib, py.path. key : group identifier in the store. Can be omitted if the HDF file contains a single pandas object. - where : list of Term (or convertable) objects, optional + mode : string, {'r', 'r+', 'a'}, default 'r'. Mode to use when opening + the file. Ignored if path_or_buf is a pd.HDFStore. + where : list of Term (or convertible) objects, optional start : optional, integer (defaults to None), row number to start selection stop : optional, integer (defaults to None), row number to stop @@ -301,17 +315,24 @@ def read_hdf(path_or_buf, key=None, **kwargs): """ - if kwargs.get('mode', 'a') not in ['r', 'r+', 'a']: + if mode not in ['r', 'r+', 'a']: raise ValueError('mode {0} is not allowed while performing a read. ' - 'Allowed modes are r, r+ and a.' - .format(kwargs.get('mode'))) + 'Allowed modes are r, r+ and a.'.format(mode)) # grab the scope if 'where' in kwargs: kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1) - path_or_buf = _stringify_path(path_or_buf) - if isinstance(path_or_buf, string_types): + if isinstance(path_or_buf, HDFStore): + if not path_or_buf.is_open: + raise IOError('The HDFStore must be open for reading.') + store = path_or_buf + auto_close = False + else: + path_or_buf = _stringify_path(path_or_buf) + if not isinstance(path_or_buf, string_types): + raise NotImplementedError('Support for generic buffers has not ' + 'been implemented.') try: exists = os.path.exists(path_or_buf) @@ -323,22 +344,11 @@ def read_hdf(path_or_buf, key=None, **kwargs): raise compat.FileNotFoundError( 'File %s does not exist' % path_or_buf) + store = HDFStore(path_or_buf, mode=mode, **kwargs) # can't auto open/close if we are using an iterator # so delegate to the iterator - store = HDFStore(path_or_buf, **kwargs) auto_close = True - elif isinstance(path_or_buf, HDFStore): - if not path_or_buf.is_open: - raise IOError('The HDFStore must be open for reading.') - - store = path_or_buf - auto_close = False - - else: - raise NotImplementedError('Support for generic buffers has not been ' - 'implemented.') - try: if key is None: groups = store.groups() @@ -402,12 +412,17 @@ class HDFStore(StringMixin): and if the file does not exist it is created. ``'r+'`` It is similar to ``'a'``, but the file must already exist. - complevel : int, 1-9, default 0 - If a complib is specified compression will be applied - where possible - complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None - If complevel is > 0 apply compression to objects written - in the store wherever possible + complevel : int, 0-9, default None + Specifies a compression level for data. + A value of 0 disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. fletcher32 : bool, default False If applying compression use the fletcher32 checksum @@ -430,21 +445,28 @@ def __init__(self, path, mode=None, complevel=None, complib=None, raise ImportError('HDFStore requires PyTables, "{ex}" problem ' 'importing'.format(ex=str(ex))) - if complib not in (None, 'blosc', 'bzip2', 'lzo', 'zlib'): - raise ValueError("complib only supports 'blosc', 'bzip2', lzo' " - "or 'zlib' compression.") + if complib is not None and complib not in tables.filters.all_complibs: + raise ValueError( + "complib only supports {libs} compression.".format( + libs=tables.filters.all_complibs)) + + if complib is None and complevel is not None: + complib = tables.filters.default_complib - self._path = path + self._path = _stringify_path(path) if mode is None: mode = 'a' self._mode = mode self._handle = None - self._complevel = complevel + self._complevel = complevel if complevel else 0 self._complib = complib self._fletcher32 = fletcher32 self._filters = None self.open(mode=mode, **kwargs) + def __fspath__(self): + return self._path + @property def root(self): """ return the root node """ @@ -466,7 +488,6 @@ def __delitem__(self, key): def __getattr__(self, name): """ allow attribute access to get stores """ - self._check_if_open() try: return self.get(name) except: @@ -475,7 +496,7 @@ def __getattr__(self, name): (type(self).__name__, name)) def __contains__(self, key): - """ check for existance of this key + """ check for existence of this key can match the exact pathname or the pathnm w/o the leading '/' """ node = self.get_node(key) @@ -489,32 +510,7 @@ def __len__(self): return len(self.groups()) def __unicode__(self): - output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) - if self.is_open: - lkeys = sorted(list(self.keys())) - if len(lkeys): - keys = [] - values = [] - - for k in lkeys: - try: - s = self.get_storer(k) - if s is not None: - keys.append(pprint_thing(s.pathname or k)) - values.append( - pprint_thing(s or 'invalid_HDFStore node')) - except Exception as detail: - keys.append(k) - values.append("[invalid_HDFStore node: %s]" - % pprint_thing(detail)) - - output += adjoin(12, keys, values) - else: - output += 'Empty' - else: - output += "File is CLOSED" - - return output + return '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) def __enter__(self): return self @@ -574,11 +570,8 @@ def open(self, mode='a', **kwargs): if self.is_open: self.close() - if self._complib is not None: - if self._complevel is None: - self._complevel = 9 - self._filters = _tables().Filters(self._complevel, - self._complib, + if self._complevel and self._complevel > 0: + self._filters = _tables().Filters(self._complevel, self._complib, fletcher32=self._fletcher32) try: @@ -611,7 +604,7 @@ def open(self, mode='a', **kwargs): except (Exception) as e: - # trying to read from a non-existant file causes an error which + # trying to read from a non-existent file causes an error which # is not part of IOError, make it one if self._mode == 'r' and 'Unable to open/create file' in str(e): raise IOError(str(e)) @@ -684,7 +677,7 @@ def select(self, key, where=None, start=None, stop=None, columns=None, Parameters ---------- key : object - where : list of Term (or convertable) objects, optional + where : list of Term (or convertible) objects, optional start : integer (defaults to None), row number to start selection stop : integer (defaults to None), row number to stop selection columns : a list of columns that if not None, will limit the return @@ -729,7 +722,7 @@ def select_as_coordinates( Parameters ---------- key : object - where : list of Term (or convertable) objects, optional + where : list of Term (or convertible) objects, optional start : integer (defaults to None), row number to start selection stop : integer (defaults to None), row number to stop selection """ @@ -820,18 +813,18 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, "all tables must have exactly the same nrows!") # axis is the concentation axes - axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0] + axis = list({t.non_index_axes[0][0] for t in tbls})[0] def func(_start, _stop, _where): # retrieve the objs, _where is always passed as a set of # coordinates here - objs = [t.read(where=_where, columns=columns, **kwargs) - for t in tbls] + objs = [t.read(where=_where, columns=columns, start=_start, + stop=_stop, **kwargs) for t in tbls] # concat and return return concat(objs, axis=axis, - verify_integrity=False).consolidate() + verify_integrity=False)._consolidate() # create the iterator it = TableIterator(self, s, func, where=where, nrows=nrows, @@ -878,7 +871,7 @@ def remove(self, key, where=None, start=None, stop=None): ---------- key : string Node to remove or delete rows from - where : list of Term (or convertable) objects, optional + where : list of Term (or convertible) objects, optional start : integer (defaults to None), row number to start selection stop : integer (defaults to None), row number to stop selection @@ -910,7 +903,7 @@ def remove(self, key, where=None, start=None, stop=None): raise KeyError('No object named %s in the file' % key) # remove the node - if where is None and start is None and stop is None: + if com._all_none(where, start, stop): s.group._f_remove(recursive=True) # delete from the table @@ -929,7 +922,7 @@ def append(self, key, value, format=None, append=True, columns=None, Parameters ---------- key : object - value : {Series, DataFrame, Panel, Panel4D} + value : {Series, DataFrame, Panel} format: 'table' is the default table(t) : table format Write as a PyTables Table structure which may perform @@ -1045,7 +1038,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None, dc = data_columns if k == selector else None # compute the val - val = value.reindex_axis(v, axis=axis) + val = value.reindex(v, axis=axis) self.append(k, val, data_columns=dc, **kwargs) @@ -1156,6 +1149,39 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None, return new_store + def info(self): + """ + print detailed information on the store + + .. versionadded:: 0.21.0 + """ + output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) + if self.is_open: + lkeys = sorted(list(self.keys())) + if len(lkeys): + keys = [] + values = [] + + for k in lkeys: + try: + s = self.get_storer(k) + if s is not None: + keys.append(pprint_thing(s.pathname or k)) + values.append( + pprint_thing(s or 'invalid_HDFStore node')) + except Exception as detail: + keys.append(k) + values.append("[invalid_HDFStore node: %s]" + % pprint_thing(detail)) + + output += adjoin(12, keys, values) + else: + output += 'Empty' + else: + output += "File is CLOSED" + + return output + # private methods ###### def _check_if_open(self): if not self.is_open: @@ -1222,7 +1248,7 @@ def error(t): # existing node (and must be a table) if tt is None: - # if we are a writer, determin the tt + # if we are a writer, determine the tt if value is not None: if pt == u('series_table'): @@ -1324,6 +1350,13 @@ def _read_group(self, group, **kwargs): def get_store(path, **kwargs): """ Backwards compatible alias for ``HDFStore`` """ + warnings.warn( + "get_store is deprecated and be " + "removed in a future version\n" + "HDFStore(path, **kwargs) is the replacement", + FutureWarning, + stacklevel=6) + return HDFStore(path, **kwargs) @@ -1335,7 +1368,7 @@ class TableIterator(object): ---------- store : the reference store - s : the refered storer + s : the referred storer func : the function to execute the query where : the where of the query nrows : the rows to iterate on @@ -1413,7 +1446,8 @@ def get_result(self, coordinates=False): # if specified read via coordinates (necessary for multiple selections if coordinates: - where = self.s.read_coordinates(where=self.where) + where = self.s.read_coordinates(where=self.where, start=self.start, + stop=self.stop) else: where = self.where @@ -1503,8 +1537,8 @@ def __unicode__(self): def __eq__(self, other): """ compare 2 col items """ - return all([getattr(self, a, None) == getattr(other, a, None) - for a in ['name', 'cname', 'axis', 'pos']]) + return all(getattr(self, a, None) == getattr(other, a, None) + for a in ['name', 'cname', 'axis', 'pos']) def __ne__(self, other): return not self.__eq__(other) @@ -1586,7 +1620,7 @@ def __iter__(self): def maybe_set_size(self, min_itemsize=None, **kwargs): """ maybe set a string col itemsize: - min_itemsize can be an interger or a dict with this columns name + min_itemsize can be an integer or a dict with this columns name with an integer size """ if _ensure_decoded(self.kind) == u('string'): @@ -1677,11 +1711,11 @@ def set_info(self, info): self.__dict__.update(idx) def get_attr(self): - """ set the kind for this colummn """ + """ set the kind for this column """ self.kind = getattr(self.attrs, self.kind_attr, None) def set_attr(self): - """ set the kind for this colummn """ + """ set the kind for this column """ setattr(self.attrs, self.kind_attr, self.kind) def read_metadata(self, handler): @@ -1756,7 +1790,7 @@ def create_for_block( # name values_0 try: if version[0] == 0 and version[1] <= 10 and version[2] == 0: - m = re.search("values_block_(\d+)", name) + m = re.search(r"values_block_(\d+)", name) if m: name = "values_%s" % m.groups()[0] except: @@ -1788,8 +1822,8 @@ def __unicode__(self): def __eq__(self, other): """ compare 2 col items """ - return all([getattr(self, a, None) == getattr(other, a, None) - for a in ['name', 'cname', 'dtype', 'pos']]) + return all(getattr(self, a, None) == getattr(other, a, None) + for a in ['name', 'cname', 'dtype', 'pos']) def set_data(self, data, dtype=None): self.data = data @@ -2096,7 +2130,24 @@ def convert(self, values, nan_rep, encoding): # we have a categorical categories = self.metadata - self.data = Categorical.from_codes(self.data.ravel(), + codes = self.data.ravel() + + # if we have stored a NaN in the categories + # then strip it; in theory we could have BOTH + # -1s in the codes and nulls :< + if categories is None: + # Handle case of NaN-only categorical columns in which case + # the categories are an empty array; when this is stored, + # pytables cannot write a zero-len array, so on readback + # the categories would be None and `read_hdf()` would fail. + categories = Index([], dtype=np.float64) + else: + mask = isna(categories) + if mask.any(): + categories = categories[~mask] + codes[codes != -1] -= mask.astype(int).cumsum().values + + self.data = Categorical.from_codes(codes, categories=categories, ordered=self.ordered) @@ -2115,14 +2166,14 @@ def convert(self, values, nan_rep, encoding): return self def get_attr(self): - """ get the data for this colummn """ + """ get the data for this column """ self.values = getattr(self.attrs, self.kind_attr, None) self.dtype = getattr(self.attrs, self.dtype_attr, None) self.meta = getattr(self.attrs, self.meta_attr, None) self.set_kind() def set_attr(self): - """ set the data for this colummn """ + """ set the data for this column """ setattr(self.attrs, self.kind_attr, self.values) setattr(self.attrs, self.meta_attr, self.meta) if self.dtype is not None: @@ -2192,7 +2243,7 @@ def set_version(self): version = _ensure_decoded( getattr(self.group._v_attrs, 'pandas_version', None)) try: - self.version = tuple([int(x) for x in version.split('.')]) + self.version = tuple(int(x) for x in version.split('.')) if len(self.version) == 2: self.version = self.version + (0,) except: @@ -2213,7 +2264,7 @@ def __unicode__(self): s = self.shape if s is not None: if isinstance(s, (list, tuple)): - s = "[%s]" % ','.join([pprint_thing(x) for x in s]) + s = "[%s]" % ','.join(pprint_thing(x) for x in s) return "%-12.12s (shape->%s)" % (self.pandas_type, s) return self.pandas_type @@ -2317,7 +2368,7 @@ def delete(self, where=None, start=None, stop=None, **kwargs): support fully deleting the node in its entirety (only) - where specification must be None """ - if where is None and start is None and stop is None: + if com._all_none(where, start, stop): self._handle.remove_node(self.group, recursive=True) return None @@ -2328,8 +2379,7 @@ class GenericFixed(Fixed): """ a generified fixed version """ _index_type_map = {DatetimeIndex: 'datetime', PeriodIndex: 'period'} - _reverse_index_map = dict([(v, k) - for k, v in compat.iteritems(_index_type_map)]) + _reverse_index_map = {v: k for k, v in compat.iteritems(_index_type_map)} attributes = [] # indexer helpders @@ -2345,8 +2395,11 @@ def _alias_to_class(self, alias): def _get_index_factory(self, klass): if klass == DatetimeIndex: def f(values, freq=None, tz=None): - return DatetimeIndex._simple_new(values, None, freq=freq, - tz=tz) + # data are already in UTC, localize and convert if tz present + result = DatetimeIndex._simple_new(values, None, freq=freq) + if tz is not None: + result = result.tz_localize('UTC').tz_convert(tz) + return result return f elif klass == PeriodIndex: def f(values, freq=None, tz=None): @@ -2395,13 +2448,12 @@ def read_array(self, key, start=None, stop=None): """ read an array for the specified node (off of group """ import tables node = getattr(self.group, key) - data = node[start:stop] attrs = node._v_attrs transposed = getattr(attrs, 'transposed', False) if isinstance(node, tables.VLArray): - ret = data[0] + ret = node[0][start:stop] else: dtype = getattr(attrs, 'value_type', None) shape = getattr(attrs, 'shape', None) @@ -2410,7 +2462,7 @@ def read_array(self, key, start=None, stop=None): # length 0 axis ret = np.empty(shape, dtype=dtype) else: - ret = data + ret = node[start:stop] if dtype == u('datetime64'): @@ -2544,10 +2596,10 @@ def read_index_node(self, node, start=None, stop=None): name = None if 'name' in node._v_attrs: - name = node._v_attrs.name + name = _ensure_str(node._v_attrs.name) - index_class = self._alias_to_class(getattr(node._v_attrs, - 'index_class', '')) + index_class = self._alias_to_class(_ensure_decoded( + getattr(node._v_attrs, 'index_class', ''))) factory = self._get_index_factory(index_class) kwargs = {} @@ -2949,11 +3001,11 @@ def __unicode__(self): ver = '' if self.is_old_version: - ver = "[%s]" % '.'.join([str(x) for x in self.version]) + ver = "[%s]" % '.'.join(str(x) for x in self.version) return "%-12.12s%s (typ->%s,nrows->%s,ncols->%s,indexers->[%s]%s)" % ( self.pandas_type, ver, self.table_type_short, self.nrows, - self.ncols, ','.join([a.name for a in self.index_axes]), dc + self.ncols, ','.join(a.name for a in self.index_axes), dc ) def __getitem__(self, c): @@ -3046,7 +3098,7 @@ def axes(self): @property def ncols(self): """ the number of total columns in the values axes """ - return sum([len(a.values) for a in self.values_axes]) + return sum(len(a.values) for a in self.values_axes) @property def is_transposed(self): @@ -3406,10 +3458,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, if existing_table is not None: indexer = len(self.non_index_axes) exist_axis = existing_table.non_index_axes[indexer][1] - if append_axis != exist_axis: + if not array_equivalent(np.array(append_axis), + np.array(exist_axis)): # ahah! -> reindex - if sorted(append_axis) == sorted(exist_axis): + if array_equivalent(np.array(sorted(append_axis)), + np.array(sorted(exist_axis))): append_axis = exist_axis # the non_index_axes info @@ -3438,7 +3492,7 @@ def get_blk_items(mgr, blocks): return [mgr.items.take(blk.mgr_locs) for blk in blocks] # figure out data_columns and get out blocks - block_obj = self.get_object(obj).consolidate() + block_obj = self.get_object(obj)._consolidate() blocks = block_obj._data.blocks blk_items = get_blk_items(block_obj._data, blocks) if len(self.non_index_axes): @@ -3446,7 +3500,7 @@ def get_blk_items(mgr, blocks): data_columns = self.validate_data_columns( data_columns, min_itemsize) if len(data_columns): - mgr = block_obj.reindex_axis( + mgr = block_obj.reindex( Index(axis_labels).difference(Index(data_columns)), axis=axis )._data @@ -3454,14 +3508,14 @@ def get_blk_items(mgr, blocks): blocks = list(mgr.blocks) blk_items = get_blk_items(mgr, blocks) for c in data_columns: - mgr = block_obj.reindex_axis([c], axis=axis)._data + mgr = block_obj.reindex([c], axis=axis)._data blocks.extend(mgr.blocks) blk_items.extend(get_blk_items(mgr, mgr.blocks)) # reorder the blocks in the same order as the existing_table if we can if existing_table is not None: - by_items = dict([(tuple(b_items.tolist()), (b, b_items)) - for b, b_items in zip(blocks, blk_items)]) + by_items = {tuple(b_items.tolist()): (b, b_items) + for b, b_items in zip(blocks, blk_items)} new_blocks = [] new_blk_items = [] for ea in existing_table.values_axes: @@ -3609,7 +3663,7 @@ def create_description(self, complib=None, complevel=None, d = dict(name='table', expectedrows=expectedrows) # description from the axes & values - d['description'] = dict([(a.cname, a.typ) for a in self.axes]) + d['description'] = {a.cname: a.typ for a in self.axes} if complib: if complevel is None: @@ -3709,7 +3763,7 @@ def write(self, **kwargs): class LegacyTable(Table): """ an appendable table: allow append/query/delete operations to a - (possibily) already existing appendable table this table ALLOWS + (possibly) already existing appendable table this table ALLOWS append (but doesn't require them), and stores the data in a format that can be easily searched @@ -3787,10 +3841,10 @@ def read(self, where=None, columns=None, **kwargs): lp = DataFrame(c.data, index=long_index, columns=c.values) # need a better algorithm - tuple_index = long_index._tuple_index + tuple_index = long_index.values - unique_tuples = lib.fast_unique(tuple_index.values) - unique_tuples = _asarray_tuplesafe(unique_tuples) + unique_tuples = unique(tuple_index) + unique_tuples = com._asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) indexer = _ensure_platform_int(indexer) @@ -3805,7 +3859,7 @@ def read(self, where=None, columns=None, **kwargs): if len(objs) == 1: wp = objs[0] else: - wp = concat(objs, axis=0, verify_integrity=False).consolidate() + wp = concat(objs, axis=0, verify_integrity=False)._consolidate() # apply the selection filters & axis orderings wp = self.process_axes(wp, columns=columns) @@ -3894,7 +3948,7 @@ def write_data(self, chunksize, dropna=False): # figure the mask: only do if we can successfully process this # column, otherwise ignore the mask - mask = isnull(a.data).all(axis=0) + mask = isna(a.data).all(axis=0) if isinstance(mask, np.ndarray): masks.append(mask.astype('u1', copy=False)) @@ -4241,7 +4295,7 @@ class AppendableMultiFrameTable(AppendableFrameTable): table_type = u('appendable_multiframe') obj_type = DataFrame ndim = 2 - _re_levels = re.compile("^level_\d+$") + _re_levels = re.compile(r"^level_\d+$") @property def table_type_short(self): @@ -4290,14 +4344,6 @@ def is_transposed(self): return self.data_orientation != tuple(range(self.ndim)) -class AppendableNDimTable(AppendablePanelTable): - - """ suppor the new appendable table formats """ - table_type = u('appendable_ndim') - ndim = 4 - obj_type = Panel4D - - def _reindex_axis(obj, axis, labels, other=None): ax = obj._get_axis(axis) labels = _ensure_index(labels) @@ -4311,7 +4357,7 @@ def _reindex_axis(obj, axis, labels, other=None): labels = _ensure_index(labels.unique()) if other is not None: - labels = labels & _ensure_index(other.unique()) + labels = _ensure_index(other.unique()) & labels if not labels.equals(ax): slicer = [slice(None, None)] * obj.ndim slicer[axis] = labels @@ -4332,9 +4378,9 @@ def _get_info(info, name): def _get_tz(tz): """ for a tz-aware type, return an encoded zone """ - zone = tslib.get_timezone(tz) + zone = timezones.get_timezone(tz) if zone is None: - zone = tslib.tot_seconds(tz.utcoffset()) + zone = tz.utcoffset().total_seconds() return zone @@ -4354,7 +4400,7 @@ def _set_tz(values, tz, preserve_UTC=False, coerce=False): if tz is not None: name = getattr(values, 'name', None) values = values.ravel() - tz = tslib.get_timezone(_ensure_decoded(tz)) + tz = timezones.get_timezone(_ensure_decoded(tz)) values = DatetimeIndex(values, name=name) if values.tz is None: values = values.tz_localize('UTC').tz_convert(tz) @@ -4384,7 +4430,7 @@ def _convert_index(index, encoding=None, format_type=None): elif isinstance(index, (Int64Index, PeriodIndex)): atom = _tables().Int64Col() # avoid to store ndarray of Period objects - return IndexCol(index._values, 'integer', atom, + return IndexCol(index._ndarray_values, 'integer', atom, freq=getattr(index, 'freq', None), index_name=index_name) @@ -4482,7 +4528,7 @@ def _unconvert_index(data, kind, encoding=None): def _unconvert_index_legacy(data, kind, legacy=False, encoding=None): kind = _ensure_decoded(kind) if kind == u('datetime'): - index = lib.time64_to_datetime(data) + index = to_datetime(data) elif kind in (u('integer')): index = np.asarray(data, dtype=object) elif kind in (u('string')): @@ -4515,7 +4561,8 @@ def _convert_string_array(data, encoding, itemsize=None): # create the sized dtype if itemsize is None: - itemsize = lib.max_len_string_array(_ensure_object(data.ravel())) + ensured = _ensure_object(data.ravel()) + itemsize = libwriters.max_len_string_array(ensured) data = np.asarray(data, dtype="S%d" % itemsize) return data @@ -4544,7 +4591,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None): encoding = _ensure_encoding(encoding) if encoding is not None and len(data): - itemsize = lib.max_len_string_array(_ensure_object(data)) + itemsize = libwriters.max_len_string_array(_ensure_object(data)) if compat.PY3: dtype = "U{0}".format(itemsize) else: @@ -4558,7 +4605,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None): if nan_rep is None: nan_rep = 'nan' - data = lib.string_array_replace_from_nan_rep(data, nan_rep) + data = libwriters.string_array_replace_from_nan_rep(data, nan_rep) return data.reshape(shape) @@ -4575,7 +4622,7 @@ def _get_converter(kind, encoding): if kind == 'datetime64': return lambda x: np.asarray(x, dtype='M8[ns]') elif kind == 'datetime': - return lib.convert_timestamps + return lambda x: to_datetime(x, cache=True).to_pydatetime() elif kind == 'string': return lambda x: _unconvert_string_array(x, encoding=encoding) else: # pragma: no cover @@ -4597,7 +4644,7 @@ class Selection(object): Parameters ---------- table : a Table object - where : list of Terms (or convertable to) + where : list of Terms (or convertible to) start, stop: indicies to start and/or stop selection """ @@ -4662,7 +4709,7 @@ def generate(self, where): raise ValueError( "The passed where expression: {0}\n" " contains an invalid variable reference\n" - " all of the variable refrences must be a " + " all of the variable references must be a " "reference to\n" " an axis (e.g. 'index' or 'columns'), or a " "data_column\n" diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 5e48de757d00e..bd2286c5c8569 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -19,11 +19,15 @@ def _strip_schema(url): def get_filepath_or_buffer(filepath_or_buffer, encoding=None, - compression=None): + compression=None, mode=None): + + if mode is None: + mode = 'rb' + fs = s3fs.S3FileSystem(anon=False) try: - filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer)) - except (OSError, NoCredentialsError): + filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) + except (compat.FileNotFoundError, NoCredentialsError): # boto3 has troubles when trying to access a public file # when credentialed... # An OSError is raised if you have credentials, but they @@ -31,5 +35,5 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, # A NoCredentialsError is raised if you don't have creds # for that bucket. fs = s3fs.S3FileSystem(anon=True) - filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer)) - return filepath_or_buffer, None, compression + filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) + return filepath_or_buffer, None, compression, True diff --git a/pandas/io/sas/__init__.py b/pandas/io/sas/__init__.py index e69de29bb2d1d..fa6b29a1a3fcc 100644 --- a/pandas/io/sas/__init__.py +++ b/pandas/io/sas/__init__.py @@ -0,0 +1 @@ +from .sasreader import read_sas # noqa diff --git a/pandas/io/sas/saslib.pyx b/pandas/io/sas/sas.pyx similarity index 92% rename from pandas/io/sas/saslib.pyx rename to pandas/io/sas/sas.pyx index 4396180da44cb..e2a1107969990 100644 --- a/pandas/io/sas/saslib.pyx +++ b/pandas/io/sas/sas.pyx @@ -2,16 +2,16 @@ # cython: boundscheck=False, initializedcheck=False import numpy as np -cimport numpy as np -from numpy cimport uint8_t, uint16_t, int8_t, int64_t +cimport numpy as cnp +from numpy cimport uint8_t, uint16_t, int8_t, int64_t, ndarray import sas_constants as const # rle_decompress decompresses data using a Run Length Encoding # algorithm. It is partially documented here: # # https://cran.r-project.org/web/packages/sas7bdat/vignettes/sas7bdat.pdf -cdef np.ndarray[uint8_t, ndim=1] rle_decompress( - int result_length, np.ndarray[uint8_t, ndim=1] inbuff): +cdef ndarray[uint8_t, ndim=1] rle_decompress( + int result_length, ndarray[uint8_t, ndim=1] inbuff): cdef: uint8_t control_byte, x @@ -101,10 +101,12 @@ cdef np.ndarray[uint8_t, ndim=1] rle_decompress( result[rpos] = 0x00 rpos += 1 else: - raise ValueError("unknown control byte: %v", control_byte) + raise ValueError("unknown control byte: {byte}" + .format(byte=control_byte)) if len(result) != result_length: - raise ValueError("RLE: %v != %v", (len(result), result_length)) + raise ValueError("RLE: {got} != {expect}".format(got=len(result), + expect=result_length)) return np.asarray(result) @@ -112,8 +114,8 @@ cdef np.ndarray[uint8_t, ndim=1] rle_decompress( # rdc_decompress decompresses data using the Ross Data Compression algorithm: # # http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm -cdef np.ndarray[uint8_t, ndim=1] rdc_decompress( - int result_length, np.ndarray[uint8_t, ndim=1] inbuff): +cdef ndarray[uint8_t, ndim=1] rdc_decompress( + int result_length, ndarray[uint8_t, ndim=1] inbuff): cdef: uint8_t cmd @@ -185,7 +187,8 @@ cdef np.ndarray[uint8_t, ndim=1] rdc_decompress( raise ValueError("unknown RDC command") if len(outbuff) != result_length: - raise ValueError("RDC: %v != %v\n", len(outbuff), result_length) + raise ValueError("RDC: {got} != {expect}\n" + .format(got=len(outbuff), expect=result_length)) return np.asarray(outbuff) @@ -223,8 +226,8 @@ cdef class Parser(object): int subheader_pointer_length int current_page_type bint is_little_endian - np.ndarray[uint8_t, ndim=1] (*decompress)( - int result_length, np.ndarray[uint8_t, ndim=1] inbuff) + ndarray[uint8_t, ndim=1] (*decompress)( + int result_length, ndarray[uint8_t, ndim=1] inbuff) object parser def __init__(self, object parser): @@ -258,7 +261,8 @@ cdef class Parser(object): self.column_types[j] = column_type_string else: raise ValueError("unknown column type: " - "%s" % self.parser.columns[j].ctype) + "{typ}" + .format(typ=self.parser.columns[j].ctype)) # compression if parser.compression == const.rle_compression: @@ -378,8 +382,8 @@ cdef class Parser(object): return True return False else: - raise ValueError("unknown page type: %s", - self.current_page_type) + raise ValueError("unknown page type: {typ}" + .format(typ=self.current_page_type)) cdef void process_byte_array_with_data(self, int offset, int length): @@ -387,7 +391,7 @@ cdef class Parser(object): Py_ssize_t j int s, k, m, jb, js, current_row int64_t lngt, start, ct - np.ndarray[uint8_t, ndim=1] source + ndarray[uint8_t, ndim=1] source int64_t[:] column_types int64_t[:] lengths int64_t[:] offsets diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 91f417abc0502..806cbddaa2ee2 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -17,10 +17,11 @@ import pandas as pd from pandas import compat from pandas.io.common import get_filepath_or_buffer, BaseIterator +from pandas.errors import EmptyDataError import numpy as np import struct import pandas.io.sas.sas_constants as const -from pandas.io.sas.saslib import Parser +from pandas.io.sas._sas import Parser class _subheader_pointer(object): @@ -44,8 +45,8 @@ class SAS7BDATReader(BaseIterator): index : column identifier, defaults to None Column to use as index. convert_dates : boolean, defaults to True - Attempt to convert dates to Pandas datetime values. Note all - SAS date formats are supported. + Attempt to convert dates to Pandas datetime values. Note that + some rarely used SAS date formats may be unsupported. blank_missing : boolean, defaults to True Convert empty strings to missing values (SAS uses blanks to indicate missing character variables). @@ -89,7 +90,7 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self._current_row_on_page_index = 0 self._current_row_in_file_index = 0 - self._path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) + self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf) if isinstance(self._path_or_buf, compat.string_types): self._path_or_buf = open(self._path_or_buf, 'rb') self.handle = self._path_or_buf @@ -594,6 +595,10 @@ def read(self, nrows=None): elif nrows is None: nrows = self.row_count + if len(self.column_types) == 0: + self.close() + raise EmptyDataError("No columns to parse from file") + if self._current_row_in_file_index >= self.row_count: return None @@ -655,9 +660,15 @@ def _chunk_to_dataframe(self): rslt[name] = self._byte_chunk[jb, :].view( dtype=self.byte_order + 'd') rslt[name] = np.asarray(rslt[name], dtype=np.float64) - if self.convert_dates and (self.column_formats[j] == "MMDDYY"): - epoch = pd.datetime(1960, 1, 1) - rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d') + if self.convert_dates: + unit = None + if self.column_formats[j] in const.sas_date_formats: + unit = 'd' + elif self.column_formats[j] in const.sas_datetime_formats: + unit = 's' + if unit: + rslt[name] = pd.to_datetime(rslt[name], unit=unit, + origin="1960-01-01") jb += 1 elif self.column_types[j] == b's': rslt[name] = self._string_chunk[js, :] diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py index 65ae1e9102cb2..c4b3588164305 100644 --- a/pandas/io/sas/sas_constants.py +++ b/pandas/io/sas/sas_constants.py @@ -145,3 +145,27 @@ class index: b"\xFF\xFF\xFF\xFE": index.columnListIndex, b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": index.columnListIndex, b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": index.columnListIndex} + + +# List of frequently used SAS date and datetime formats +# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm +# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java +sas_date_formats = ("DATE", "DAY", "DDMMYY", "DOWNAME", "JULDAY", "JULIAN", + "MMDDYY", "MMYY", "MMYYC", "MMYYD", "MMYYP", "MMYYS", + "MMYYN", "MONNAME", "MONTH", "MONYY", "QTR", "QTRR", + "NENGO", "WEEKDATE", "WEEKDATX", "WEEKDAY", "WEEKV", + "WORDDATE", "WORDDATX", "YEAR", "YYMM", "YYMMC", "YYMMD", + "YYMMP", "YYMMS", "YYMMN", "YYMON", "YYMMDD", "YYQ", + "YYQC", "YYQD", "YYQP", "YYQS", "YYQN", "YYQR", "YYQRC", + "YYQRD", "YYQRP", "YYQRS", "YYQRN", + "YYMMDDP", "YYMMDDC", "E8601DA", "YYMMDDN", "MMDDYYC", + "MMDDYYS", "MMDDYYD", "YYMMDDS", "B8601DA", "DDMMYYN", + "YYMMDDD", "DDMMYYB", "DDMMYYP", "MMDDYYP", "YYMMDDB", + "MMDDYYN", "DDMMYYC", "DDMMYYD", "DDMMYYS", + "MINGUO") + +sas_datetime_formats = ("DATETIME", "DTWKDATX", + "B8601DN", "B8601DT", "B8601DX", "B8601DZ", "B8601LX", + "E8601DN", "E8601DT", "E8601DX", "E8601DZ", "E8601LX", + "DATEAMPM", "DTDATE", "DTMONYY", "DTMONYY", "DTWKDATX", + "DTYEAR", "TOD", "MDYAMPM") diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 76fc55154bc49..7994517b9f303 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -14,7 +14,7 @@ from pandas import compat import struct import numpy as np -from pandas.util.decorators import Appender +from pandas.util._decorators import Appender import warnings _correct_line1 = ("HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!" @@ -76,7 +76,6 @@ >>> for chunk in itr: >>> do_something(chunk) -.. versionadded:: 0.17.0 """ % {"_base_params_doc": _base_params_doc, "_format_params_doc": _format_params_doc, "_params2_doc": _params2_doc, @@ -237,7 +236,8 @@ def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', self._chunksize = chunksize if isinstance(filepath_or_buffer, str): - filepath_or_buffer, encoding, compression = get_filepath_or_buffer( + (filepath_or_buffer, encoding, + compression, should_close) = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)): diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 3e4d9c9024dbd..b8a0bf5733158 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -2,6 +2,7 @@ Read SAS sas7bdat or xport files. """ from pandas import compat +from pandas.io.common import _stringify_path def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, @@ -34,6 +35,7 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, buffer_error_msg = ("If this is a buffer object rather " "than a string name, you must specify " "a format string") + filepath_or_buffer = _stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, compat.string_types): raise ValueError(buffer_error_msg) try: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9fa01c413aca8..ccb8d2d99d734 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -11,17 +11,18 @@ import re import numpy as np -import pandas.lib as lib -from pandas.types.missing import isnull -from pandas.types.dtypes import DatetimeTZDtype -from pandas.types.common import (is_list_like, is_dict_like, - is_datetime64tz_dtype) +import pandas._libs.lib as lib +from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.common import ( + is_list_like, is_dict_like, + is_datetime64tz_dtype) from pandas.compat import (map, zip, raise_with_traceback, string_types, text_type) from pandas.core.api import DataFrame, Series from pandas.core.base import PandasObject -from pandas.tseries.tools import to_datetime +from pandas.core.tools.datetimes import to_datetime from contextlib import contextmanager @@ -40,24 +41,6 @@ class DatabaseError(IOError): _SQLALCHEMY_INSTALLED = None -def _validate_flavor_parameter(flavor): - """ - Checks whether a database 'flavor' was specified. - If not None, produces FutureWarning if 'sqlite' and - raises a ValueError if anything else. - """ - if flavor is not None: - if flavor == 'sqlite': - warnings.warn("the 'flavor' parameter is deprecated " - "and will be removed in a future version, " - "as 'sqlite' is the only supported option " - "when SQLAlchemy is not installed.", - FutureWarning, stacklevel=2) - else: - raise ValueError("database flavor {flavor} is not " - "supported".format(flavor=flavor)) - - def _is_sqlalchemy_connectable(con): global _SQLALCHEMY_INSTALLED if _SQLALCHEMY_INSTALLED is None: @@ -66,11 +49,11 @@ def _is_sqlalchemy_connectable(con): _SQLALCHEMY_INSTALLED = True from distutils.version import LooseVersion - ver = LooseVersion(sqlalchemy.__version__) + ver = sqlalchemy.__version__ # For sqlalchemy versions < 0.8.2, the BIGINT type is recognized # for a sqlite engine, which results in a warning when trying to # read/write a DataFrame with int64 values. (GH7433) - if ver < '0.8.2': + if LooseVersion(ver) < LooseVersion('0.8.2'): from sqlalchemy import BigInteger from sqlalchemy.ext.compiler import compiles @@ -88,7 +71,7 @@ def compile_big_int_sqlite(type_, compiler, **kw): def _convert_params(sql, params): - """convert sql and params args to DBAPI2.0 compliant format""" + """Convert SQL and params args to DBAPI2.0 compliant format.""" args = [sql] if params is not None: if hasattr(params, 'keys'): # test if params is a mapping @@ -98,30 +81,30 @@ def _convert_params(sql, params): return args -def _handle_date_column(col, format=None): +def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): return to_datetime(col, errors='ignore', **format) else: - if format in ['D', 's', 'ms', 'us', 'ns']: - return to_datetime(col, errors='coerce', unit=format, utc=True) - elif (issubclass(col.dtype.type, np.floating) or - issubclass(col.dtype.type, np.integer)): - # parse dates as timestamp - format = 's' if format is None else format - return to_datetime(col, errors='coerce', unit=format, utc=True) + # Allow passing of formatting string for integers + # GH17855 + if format is None and (issubclass(col.dtype.type, np.floating) or + issubclass(col.dtype.type, np.integer)): + format = 's' + if format in ['D', 'd', 'h', 'm', 's', 'ms', 'us', 'ns']: + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 return (to_datetime(col, errors='coerce') .astype('datetime64[ns, UTC]')) else: - return to_datetime(col, errors='coerce', format=format, utc=True) + return to_datetime(col, errors='coerce', format=format, utc=utc) def _parse_date_columns(data_frame, parse_dates): """ Force non-datetime columns to be read as such. - Supports both string formatted and integer timestamp columns + Supports both string formatted and integer timestamp columns. """ # handle non-list entries for parse_dates gracefully if parse_dates is True or parse_dates is None or parse_dates is False: @@ -150,7 +133,7 @@ def _parse_date_columns(data_frame, parse_dates): def _wrap_result(data, columns, index_col=None, coerce_float=True, parse_dates=None): - """Wrap result set of query in a DataFrame """ + """Wrap result set of query in a DataFrame.""" frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) @@ -170,9 +153,9 @@ def execute(sql, con, cur=None, params=None): Parameters ---------- sql : string - Query to be executed + SQL query to be executed. con : SQLAlchemy connectable(engine/connection) or sqlite3 connection - Using SQLAlchemy makes it possible to use any DB supported by that + Using SQLAlchemy makes it possible to use any DB supported by the library. If a DBAPI2 object, only sqlite3 is supported. cur : deprecated, cursor is obtained from connection, default: None @@ -199,36 +182,36 @@ def read_sql_table(table_name, con, schema=None, index_col=None, chunksize=None): """Read SQL database table into a DataFrame. - Given a table name and an SQLAlchemy connectable, returns a DataFrame. + Given a table name and a SQLAlchemy connectable, returns a DataFrame. This function does not support DBAPI connections. Parameters ---------- table_name : string - Name of SQL table in database + Name of SQL table in database. con : SQLAlchemy connectable (or database string URI) - Sqlite DBAPI connection mode not supported + SQLite DBAPI connection mode not supported. schema : string, default None Name of SQL schema in database to query (if database flavor - supports this). If None, use default schema (default). + supports this). Uses default schema if None (default). index_col : string or list of strings, optional, default: None - Column(s) to set as index(MultiIndex) + Column(s) to set as index(MultiIndex). coerce_float : boolean, default True - Attempt to convert values of non-string, non-numeric objects (like + Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point. Can result in loss of Precision. parse_dates : list or dict, default: None - - List of column names to parse as dates + - List of column names to parse as dates. - Dict of ``{column_name: format string}`` where format string is strftime compatible in case of parsing string times or is one of - (D, s, ns, ms, us) in case of parsing integer timestamps + (D, s, ns, ms, us) in case of parsing integer timestamps. - Dict of ``{column_name: arg dict}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime` Especially useful with databases without native Datetime support, - such as SQLite + such as SQLite. columns : list, default: None - List of column names to select from sql table + List of column names to select from SQL table chunksize : int, default None - If specified, return an iterator where `chunksize` is the number of + If specified, returns an iterator where `chunksize` is the number of rows to include in each chunk. Returns @@ -237,7 +220,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None, Notes ----- - Any datetime values with time zone information will be converted to UTC + Any datetime values with time zone information will be converted to UTC. See also -------- @@ -280,17 +263,17 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, Parameters ---------- sql : string SQL query or SQLAlchemy Selectable (select or text object) - to be executed. - con : SQLAlchemy connectable(engine/connection) or database string URI + SQL query to be executed. + con : SQLAlchemy connectable(engine/connection), database string URI, or sqlite3 DBAPI2 connection Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. index_col : string or list of strings, optional, default: None - Column(s) to set as index(MultiIndex) + Column(s) to set as index(MultiIndex). coerce_float : boolean, default True - Attempt to convert values of non-string, non-numeric objects (like - decimal.Decimal) to floating point, useful for SQL result sets + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point. Useful for SQL result sets. params : list, tuple or dict, optional, default: None List of parameters to pass to execute method. The syntax used to pass parameters is database driver dependent. Check your @@ -298,14 +281,14 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, described in PEP 249's paramstyle, is supported. Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'} parse_dates : list or dict, default: None - - List of column names to parse as dates + - List of column names to parse as dates. - Dict of ``{column_name: format string}`` where format string is - strftime compatible in case of parsing string times or is one of - (D, s, ns, ms, us) in case of parsing integer timestamps + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. - Dict of ``{column_name: arg dict}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime` Especially useful with databases without native Datetime support, - such as SQLite + such as SQLite. chunksize : int, default None If specified, return an iterator where `chunksize` is the number of rows to include in each chunk. @@ -317,11 +300,11 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, Notes ----- Any datetime values with time zone information parsed via the `parse_dates` - parameter will be converted to UTC + parameter will be converted to UTC. See also -------- - read_sql_table : Read SQL database table into a DataFrame + read_sql_table : Read SQL database table into a DataFrame. read_sql """ @@ -336,20 +319,27 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, """ Read SQL query or database table into a DataFrame. + This function is a convenience wrapper around ``read_sql_table`` and + ``read_sql_query`` (for backward compatibility). It will delegate + to the specific function depending on the provided input. A SQL query + will be routed to ``read_sql_query``, while a database table name will + be routed to ``read_sql_table``. Note that the delegated function might + have more specific notes about their functionality not listed here. + Parameters ---------- - sql : string SQL query or SQLAlchemy Selectable (select or text object) - to be executed, or database table name. - con : SQLAlchemy connectable(engine/connection) or database string URI + sql : string or SQLAlchemy Selectable (select or text object) + SQL query to be executed or a table name. + con : SQLAlchemy connectable (engine/connection) or database string URI or DBAPI2 connection (fallback mode) + Using SQLAlchemy makes it possible to use any DB supported by that - library. - If a DBAPI2 object, only sqlite3 is supported. + library. If a DBAPI2 object, only sqlite3 is supported. index_col : string or list of strings, optional, default: None - Column(s) to set as index(MultiIndex) + Column(s) to set as index(MultiIndex). coerce_float : boolean, default True - Attempt to convert values of non-string, non-numeric objects (like - decimal.Decimal) to floating point, useful for SQL result sets + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. params : list, tuple or dict, optional, default: None List of parameters to pass to execute method. The syntax used to pass parameters is database driver dependent. Check your @@ -357,16 +347,16 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, described in PEP 249's paramstyle, is supported. Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'} parse_dates : list or dict, default: None - - List of column names to parse as dates + - List of column names to parse as dates. - Dict of ``{column_name: format string}`` where format string is - strftime compatible in case of parsing string times or is one of - (D, s, ns, ms, us) in case of parsing integer timestamps + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. - Dict of ``{column_name: arg dict}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime` Especially useful with databases without native Datetime support, - such as SQLite + such as SQLite. columns : list, default: None - List of column names to select from sql table (only used when reading + List of column names to select from SQL table (only used when reading a table). chunksize : int, default None If specified, return an iterator where `chunksize` is the @@ -376,18 +366,10 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, ------- DataFrame - Notes - ----- - This function is a convenience wrapper around ``read_sql_table`` and - ``read_sql_query`` (and for backward compatibility) and will delegate - to the specific function depending on the provided input (database - table name or sql query). The delegated function might have more specific - notes about their functionality not listed here. - See also -------- - read_sql_table : Read SQL database table into a DataFrame - read_sql_query : Read SQL query into a DataFrame + read_sql_table : Read SQL database table into a DataFrame. + read_sql_query : Read SQL query into a DataFrame. """ pandas_sql = pandasSQL_builder(con) @@ -415,23 +397,21 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, chunksize=chunksize) -def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', - index=True, index_label=None, chunksize=None, dtype=None): +def to_sql(frame, name, con, schema=None, if_exists='fail', index=True, + index_label=None, chunksize=None, dtype=None): """ Write records stored in a DataFrame to a SQL database. Parameters ---------- - frame : DataFrame + frame : DataFrame, Series name : string - Name of SQL table + Name of SQL table. con : SQLAlchemy connectable(engine/connection) or database string URI or sqlite3 DBAPI2 connection Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. - flavor : 'sqlite', default None - DEPRECATED: this parameter will be removed in a future version schema : string, default None Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). @@ -440,7 +420,7 @@ def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. index : boolean, default True - Write DataFrame index as a column + Write DataFrame index as a column. index_label : string or sequence, default None Column label for index column(s). If None is given (default) and `index` is True, then the index names are used. @@ -457,7 +437,7 @@ def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', if if_exists not in ('fail', 'replace', 'append'): raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) - pandas_sql = pandasSQL_builder(con, schema=schema, flavor=flavor) + pandas_sql = pandasSQL_builder(con, schema=schema) if isinstance(frame, Series): frame = frame.to_frame() @@ -470,20 +450,18 @@ def to_sql(frame, name, con, flavor=None, schema=None, if_exists='fail', chunksize=chunksize, dtype=dtype) -def has_table(table_name, con, flavor=None, schema=None): +def has_table(table_name, con, schema=None): """ Check if DataBase has named table. Parameters ---------- table_name: string - Name of SQL table + Name of SQL table. con: SQLAlchemy connectable(engine/connection) or sqlite3 DBAPI2 connection Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. - flavor : 'sqlite', default None - DEPRECATED: this parameter will be removed in a future version schema : string, default None Name of SQL schema in database to write to (if database flavor supports this). If None, use default schema (default). @@ -492,16 +470,17 @@ def has_table(table_name, con, flavor=None, schema=None): ------- boolean """ - pandas_sql = pandasSQL_builder(con, flavor=flavor, schema=schema) + pandas_sql = pandasSQL_builder(con, schema=schema) return pandas_sql.has_table(table_name) + table_exists = has_table def _engine_builder(con): """ Returns a SQLAlchemy engine from a URI (if con is a string) - else it just return con without modifying it + else it just return con without modifying it. """ global _SQLALCHEMY_INSTALLED if isinstance(con, string_types): @@ -516,14 +495,12 @@ def _engine_builder(con): return con -def pandasSQL_builder(con, flavor=None, schema=None, meta=None, +def pandasSQL_builder(con, schema=None, meta=None, is_cursor=False): """ Convenience function to return the correct PandasSQL subclass based on the - provided parameters + provided parameters. """ - _validate_flavor_parameter(flavor) - # When support for DBAPI connections is removed, # is_cursor should not be necessary. con = _engine_builder(con) @@ -539,7 +516,7 @@ class SQLTable(PandasObject): """ For mapping Pandas tables to SQL tables. Uses fact that table is reflected by SQLAlchemy to - do better type convertions. + do better type conversions. Also holds various flags needed to avoid having to pass them between functions all the time. """ @@ -595,8 +572,29 @@ def create(self): else: self._execute_create() - def insert_statement(self): - return self.table.insert() + def insert_statement(self, data, conn): + """ + Generate tuple of SQLAlchemy insert statement and any arguments + to be executed by connection (via `_execute_insert`). + + Parameters + ---------- + conn : SQLAlchemy connectable(engine/connection) + Connection to recieve the data + data : list of dict + The data to be inserted + + Returns + ------- + SQLAlchemy statement + insert statement + *, optional + Additional parameters to be passed when executing insert statement + """ + dialect = getattr(conn, 'dialect', None) + if dialect and getattr(dialect, 'supports_multivalues_insert', False): + return self.table.insert(data), + return self.table.insert(), data def insert_data(self): if self.index is not None: @@ -626,7 +624,7 @@ def insert_data(self): # replace NaN with None if b._can_hold_na: - mask = isnull(d) + mask = isna(d) d[mask] = None for col_loc, col in zip(b.mgr_locs, d): @@ -635,8 +633,9 @@ def insert_data(self): return column_names, data_list def _execute_insert(self, conn, keys, data_iter): - data = [dict((k, v) for k, v in zip(keys, row)) for row in data_iter] - conn.execute(self.insert_statement(), data) + """Insert data into this table with database connection""" + data = [{k: v for k, v in zip(keys, row)} for row in data_iter] + conn.execute(*self.insert_statement(data, conn)) def insert(self, chunksize=None): keys, data_list = self.insert_data() @@ -665,7 +664,7 @@ def insert(self, chunksize=None): def _query_iterator(self, result, chunksize, columns, coerce_float=True, parse_dates=None): - """Return generator through chunked result set""" + """Return generator through chunked result set.""" while True: data = result.fetchmany(chunksize) @@ -748,8 +747,9 @@ def _get_column_names_and_types(self, dtype_mapper): if self.index is not None: for i, idx_label in enumerate(self.index): idx_type = dtype_mapper( - self.frame.index.get_level_values(i)) - column_names_and_types.append((idx_label, idx_type, True)) + self.frame.index._get_level_values(i)) + column_names_and_types.append((text_type(idx_label), + idx_type, True)) column_names_and_types += [ (text_type(self.frame.columns[i]), @@ -796,7 +796,7 @@ def _harmonize_columns(self, parse_dates=None): all Nones with false. Therefore only convert bool if there are no NA values. Datetimes should already be converted to np.datetime64 if supported, - but here we also force conversion if required + but here we also force conversion if required. """ # handle non-list entries for parse_dates gracefully if parse_dates is True or parse_dates is None or parse_dates is False: @@ -814,8 +814,9 @@ def _harmonize_columns(self, parse_dates=None): if (col_type is datetime or col_type is date or col_type is DatetimeTZDtype): - self.frame[col_name] = _handle_date_column(df_col) - + # Convert tz-aware Datetime SQL columns to UTC + utc = col_type is DatetimeTZDtype + self.frame[col_name] = _handle_date_column(df_col, utc=utc) elif col_type is float: # floats support NA, can always convert! self.frame[col_name] = df_col.astype(col_type, copy=False) @@ -838,7 +839,7 @@ def _harmonize_columns(self, parse_dates=None): except KeyError: pass # this column not in results - def _get_notnull_col_dtype(self, col): + def _get_notna_col_dtype(self, col): """ Infer datatype of the Series col. In case the dtype of col is 'object' and it contains NA values, this infers the datatype of the not-NA @@ -846,9 +847,9 @@ def _get_notnull_col_dtype(self, col): """ col_for_inference = col if col.dtype == 'object': - notnulldata = col[~isnull(col)] - if len(notnulldata): - col_for_inference = notnulldata + notnadata = col[~isna(col)] + if len(notnadata): + col_for_inference = notnadata return lib.infer_dtype(col_for_inference) @@ -858,7 +859,7 @@ def _sqlalchemy_type(self, col): if col.name in dtype: return self.dtype[col.name] - col_type = self._get_notnull_col_dtype(col) + col_type = self._get_notna_col_dtype(col) from sqlalchemy.types import (BigInteger, Integer, Float, Text, Boolean, @@ -922,7 +923,7 @@ def _get_dtype(self, sqltype): class PandasSQL(PandasObject): """ - Subclasses Should define read_sql and to_sql + Subclasses Should define read_sql and to_sql. """ def read_sql(self, *args, **kwargs): @@ -936,8 +937,8 @@ def to_sql(self, *args, **kwargs): class SQLDatabase(PandasSQL): """ - This class enables convertion between DataFrame and SQL databases - using SQLAlchemy to handle DataBase abstraction + This class enables conversion between DataFrame and SQL databases + using SQLAlchemy to handle DataBase abstraction. Parameters ---------- @@ -982,28 +983,28 @@ def read_table(self, table_name, index_col=None, coerce_float=True, Parameters ---------- table_name : string - Name of SQL table in database + Name of SQL table in database. index_col : string, optional, default: None - Column to set as index + Column to set as index. coerce_float : boolean, default True - Attempt to convert values of non-string, non-numeric objects + Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point. This can result in loss of precision. parse_dates : list or dict, default: None - - List of column names to parse as dates + - List of column names to parse as dates. - Dict of ``{column_name: format string}`` where format string is - strftime compatible in case of parsing string times or is one of - (D, s, ns, ms, us) in case of parsing integer timestamps + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. - Dict of ``{column_name: arg}``, where the arg corresponds to the keyword arguments of :func:`pandas.to_datetime`. Especially useful with databases without native Datetime support, - such as SQLite + such as SQLite. columns : list, default: None - List of column names to select from sql table + List of column names to select from SQL table. schema : string, default None Name of SQL schema in database to query (if database flavor supports this). If specified, this overwrites the default - schema of the SQLDatabase object. + schema of the SQL database object. chunksize : int, default None If specified, return an iterator where `chunksize` is the number of rows to include in each chunk. @@ -1044,12 +1045,12 @@ def read_query(self, sql, index_col=None, coerce_float=True, Parameters ---------- sql : string - SQL query to be executed + SQL query to be executed. index_col : string, optional, default: None Column name to use as index for the returned DataFrame object. coerce_float : boolean, default True Attempt to convert values of non-string, non-numeric objects (like - decimal.Decimal) to floating point, useful for SQL result sets + decimal.Decimal) to floating point, useful for SQL result sets. params : list, tuple or dict, optional, default: None List of parameters to pass to execute method. The syntax used to pass parameters is database driver dependent. Check your @@ -1057,14 +1058,14 @@ def read_query(self, sql, index_col=None, coerce_float=True, described in PEP 249's paramstyle, is supported. Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'} parse_dates : list or dict, default: None - - List of column names to parse as dates + - List of column names to parse as dates. - Dict of ``{column_name: format string}`` where format string is - strftime compatible in case of parsing string times or is one of - (D, s, ns, ms, us) in case of parsing integer timestamps + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. - Dict of ``{column_name: arg dict}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime` Especially useful with databases - without native Datetime support, such as SQLite + without native Datetime support, such as SQLite. chunksize : int, default None If specified, return an iterator where `chunksize` is the number of rows to include in each chunk. @@ -1107,13 +1108,13 @@ def to_sql(self, frame, name, if_exists='fail', index=True, ---------- frame : DataFrame name : string - Name of SQL table + Name of SQL table. if_exists : {'fail', 'replace', 'append'}, default 'fail' - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. index : boolean, default True - Write DataFrame index as a column + Write DataFrame index as a column. index_label : string or sequence, default None Column label for index column(s). If None is given (default) and `index` is True, then the index names are used. @@ -1219,7 +1220,7 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): def _get_unicode_name(name): try: - uname = name.encode("utf-8", "strict").decode("utf-8") + uname = text_type(name).encode("utf-8", "strict").decode("utf-8") except UnicodeError: raise ValueError("Cannot convert identifier to UTF-8: '%s'" % name) return uname @@ -1292,14 +1293,14 @@ def _execute_insert(self, conn, keys, data_iter): def _create_table_setup(self): """ - Return a list of SQL statement that create a table reflecting the + Return a list of SQL statements that creates a table reflecting the structure of a DataFrame. The first entry will be a CREATE TABLE - statement while the rest will be CREATE INDEX statements + statement while the rest will be CREATE INDEX statements. """ column_names_and_types = \ self._get_column_names_and_types(self._sql_type_name) - pat = re.compile('\s+') + pat = re.compile(r'\s+') column_names = [col_name for col_name, _, _ in column_names_and_types] if any(map(pat.search, column_names)): warnings.warn(_SAFE_NAMES_WARNING, stacklevel=6) @@ -1314,7 +1315,7 @@ def _create_table_setup(self): keys = [self.keys] else: keys = self.keys - cnames_br = ", ".join([escape(c) for c in keys]) + cnames_br = ", ".join(escape(c) for c in keys) create_tbl_stmts.append( "CONSTRAINT {tbl}_pk PRIMARY KEY ({cnames_br})".format( tbl=self.name, cnames_br=cnames_br)) @@ -1326,7 +1327,7 @@ def _create_table_setup(self): if is_index] if len(ix_cols): cnames = "_".join(ix_cols) - cnames_br = ",".join([escape(c) for c in ix_cols]) + cnames_br = ",".join(escape(c) for c in ix_cols) create_stmts.append( "CREATE INDEX " + escape("ix_" + self.name + "_" + cnames) + "ON " + escape(self.name) + " (" + cnames_br + ")") @@ -1338,7 +1339,7 @@ def _sql_type_name(self, col): if col.name in dtype: return dtype[col.name] - col_type = self._get_notnull_col_dtype(col) + col_type = self._get_notna_col_dtype(col) if col_type == 'timedelta64': warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " @@ -1362,8 +1363,8 @@ def _sql_type_name(self, col): class SQLiteDatabase(PandasSQL): """ - Version of SQLDatabase to support sqlite connections (fallback without - sqlalchemy). This should only be used internally. + Version of SQLDatabase to support SQLite connections (fallback without + SQLAlchemy). This should only be used internally. Parameters ---------- @@ -1371,9 +1372,7 @@ class SQLiteDatabase(PandasSQL): """ - def __init__(self, con, flavor=None, is_cursor=False): - _validate_flavor_parameter(flavor) - + def __init__(self, con, is_cursor=False): self.is_cursor = is_cursor self.con = con @@ -1464,11 +1463,12 @@ def to_sql(self, frame, name, if_exists='fail', index=True, Parameters ---------- frame: DataFrame - name: name of SQL table + name: string + Name of SQL table. if_exists: {'fail', 'replace', 'append'}, default 'fail' fail: If table exists, do nothing. replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. + append: If table exists, insert data. Create if it does not exist. index : boolean, default True Write DataFrame index as a column index_label : string or sequence, default None @@ -1476,7 +1476,7 @@ def to_sql(self, frame, name, if_exists='fail', index=True, `index` is True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. schema : string, default None - Ignored parameter included for compatability with SQLAlchemy + Ignored parameter included for compatibility with SQLAlchemy version of ``to_sql``. chunksize : int, default None If not None, then rows will be written in batches of this @@ -1526,7 +1526,7 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): return str(table.sql_schema()) -def get_schema(frame, name, flavor=None, keys=None, con=None, dtype=None): +def get_schema(frame, name, keys=None, con=None, dtype=None): """ Get the SQL db table schema for the given frame. @@ -1541,13 +1541,11 @@ def get_schema(frame, name, flavor=None, keys=None, con=None, dtype=None): Using SQLAlchemy makes it possible to use any DB supported by that library, default: None If a DBAPI2 object, only sqlite3 is supported. - flavor : 'sqlite', default None - DEPRECATED: this parameter will be removed in a future version dtype : dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. """ - pandas_sql = pandasSQL_builder(con=con, flavor=flavor) + pandas_sql = pandasSQL_builder(con=con) return pandas_sql._create_sql_schema(frame, name, keys=keys, dtype=dtype) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 2be7657883e88..9646831cb612c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -9,29 +9,35 @@ You can find more information on http://presbrey.mit.edu/PyDTA and http://www.statsmodels.org/devel/ """ -import numpy as np -import sys +import datetime import struct -from dateutil.relativedelta import relativedelta +import sys +from collections import OrderedDict -from pandas.types.common import (is_categorical_dtype, is_datetime64_dtype, - _ensure_object) +import numpy as np +from dateutil.relativedelta import relativedelta +from pandas._libs.lib import infer_dtype +from pandas._libs.tslib import NaT, Timestamp +from pandas._libs.writers import max_len_string_array +import pandas as pd +from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex +from pandas.compat import (lrange, lmap, lzip, text_type, string_types, range, + zip, BytesIO) from pandas.core.base import StringMixin -from pandas.core.categorical import Categorical +from pandas.core.arrays import Categorical +from pandas.core.dtypes.common import (is_categorical_dtype, _ensure_object, + is_datetime64_dtype) from pandas.core.frame import DataFrame from pandas.core.series import Series -import datetime -from pandas import compat, to_timedelta, to_datetime, isnull, DatetimeIndex -from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \ - zip, BytesIO -from pandas.util.decorators import Appender -import pandas as pd +from pandas.io.common import (get_filepath_or_buffer, BaseIterator, + _stringify_path) +from pandas.util._decorators import Appender +from pandas.util._decorators import deprecate_kwarg -from pandas.io.common import get_filepath_or_buffer, BaseIterator -from pandas.lib import max_len_string_array, infer_dtype -from pandas.tslib import NaT, Timestamp +VALID_ENCODINGS = ('ascii', 'us-ascii', 'latin-1', 'latin_1', 'iso-8859-1', + 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'L1') _version_error = ("Version of given Stata file is not 104, 105, 108, " "111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), " @@ -45,14 +51,14 @@ _encoding_params = """\ encoding : string, None or encoding - Encoding used to parse the files. None defaults to iso-8859-1.""" + Encoding used to parse the files. None defaults to latin-1.""" _statafile_processing_params2 = """\ -index : identifier of index column - identifier of column that should be used as index of the DataFrame +index_col : string, optional, default: None + Column to set as index convert_missing : boolean, defaults to False Flag indicating whether to convert missing values to their Stata - representations. If False, missing values are replaced with nans. + representations. If False, missing values are replaced with nan. If True, columns containing missing values are returned with object data types and missing values are represented by StataMissingValue objects. @@ -105,9 +111,11 @@ _statafile_processing_params2, _chunksize_params, _iterator_params) -_data_method_doc = """Reads observations from Stata file, converting them into a dataframe +_data_method_doc = """\ +Reads observations from Stata file, converting them into a dataframe -This is a legacy method. Use `read` in new code. + .. deprecated:: + This is a legacy method. Use `read` in new code. Parameters ---------- @@ -152,15 +160,16 @@ @Appender(_read_stata_doc) +@deprecate_kwarg(old_arg_name='index', new_arg_name='index_col') def read_stata(filepath_or_buffer, convert_dates=True, - convert_categoricals=True, encoding=None, index=None, + convert_categoricals=True, encoding=None, index_col=None, convert_missing=False, preserve_dtypes=True, columns=None, order_categoricals=True, chunksize=None, iterator=False): reader = StataReader(filepath_or_buffer, convert_dates=convert_dates, convert_categoricals=convert_categoricals, - index=index, convert_missing=convert_missing, + index_col=index_col, convert_missing=convert_missing, preserve_dtypes=preserve_dtypes, columns=columns, order_categoricals=order_categoricals, @@ -241,8 +250,9 @@ def _stata_elapsed_date_to_datetime_vec(dates, fmt): def convert_year_month_safe(year, month): """ Convert year and month to datetimes, using pandas vectorized versions - when the date range falls within the range supported by pandas. Other - wise it falls back to a slower but more robust method using datetime. + when the date range falls within the range supported by pandas. + Otherwise it falls back to a slower but more robust method + using datetime. """ if year.max() < MAX_YEAR and year.min() > MIN_YEAR: return to_datetime(100 * year + month, format='%Y%m') @@ -298,11 +308,11 @@ def convert_delta_safe(base, deltas, unit): data_col[bad_locs] = 1.0 # Replace with NaT dates = dates.astype(np.int64) - if fmt in ["%tc", "tc"]: # Delta ms relative to base + if fmt.startswith(("%tc", "tc")): # Delta ms relative to base base = stata_epoch ms = dates conv_dates = convert_delta_safe(base, ms, 'ms') - elif fmt in ["%tC", "tC"]: + elif fmt.startswith(("%tC", "tC")): from warnings import warn warn("Encountered %tC format. Leaving in Stata Internal Format.") @@ -310,27 +320,30 @@ def convert_delta_safe(base, deltas, unit): if has_bad_values: conv_dates[bad_locs] = pd.NaT return conv_dates - elif fmt in ["%td", "td", "%d", "d"]: # Delta days relative to base + # Delta days relative to base + elif fmt.startswith(("%td", "td", "%d", "d")): base = stata_epoch days = dates conv_dates = convert_delta_safe(base, days, 'd') - elif fmt in ["%tw", "tw"]: # does not count leap days - 7 days is a week + # does not count leap days - 7 days is a week. + # 52nd week may have more than 7 days + elif fmt.startswith(("%tw", "tw")): year = stata_epoch.year + dates // 52 days = (dates % 52) * 7 conv_dates = convert_year_days_safe(year, days) - elif fmt in ["%tm", "tm"]: # Delta months relative to base + elif fmt.startswith(("%tm", "tm")): # Delta months relative to base year = stata_epoch.year + dates // 12 month = (dates % 12) + 1 conv_dates = convert_year_month_safe(year, month) - elif fmt in ["%tq", "tq"]: # Delta quarters relative to base + elif fmt.startswith(("%tq", "tq")): # Delta quarters relative to base year = stata_epoch.year + dates // 4 month = (dates % 4) * 3 + 1 conv_dates = convert_year_month_safe(year, month) - elif fmt in ["%th", "th"]: # Delta half-years relative to base + elif fmt.startswith(("%th", "th")): # Delta half-years relative to base year = stata_epoch.year + dates // 2 month = (dates % 2) * 6 + 1 conv_dates = convert_year_month_safe(year, month) - elif fmt in ["%ty", "ty"]: # Years -- not delta + elif fmt.startswith(("%ty", "ty")): # Years -- not delta year = dates month = np.ones_like(dates) conv_dates = convert_year_month_safe(year, month) @@ -395,7 +408,7 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): return DataFrame(d, index=index) - bad_loc = isnull(dates) + bad_loc = isna(dates) index = dates.index if bad_loc.any(): dates = Series(dates) @@ -459,6 +472,7 @@ class PossiblePrecisionLoss(Warning): class ValueLabelTypeMismatch(Warning): pass + value_label_mismatch_doc = """ Stata value labels (pandas categories) must be strings. Column {0} contains non-string labels which will be converted to strings. Please check that the @@ -502,8 +516,8 @@ def _cast_to_stata_types(data): this range. If the int64 values are outside of the range of those perfectly representable as float64 values, a warning is raised. - bool columns are cast to int8. uint colums are converted to int of the - same size if there is no loss in precision, other wise are upcast to a + bool columns are cast to int8. uint columns are converted to int of the + same size if there is no loss in precision, otherwise are upcast to a larger type. uint64 is currently not supported since it is concerted to object in a DataFrame. """ @@ -633,7 +647,7 @@ def __init__(self, catarray): def _encode(self, s): """ - Python 3 compatability shim + Python 3 compatibility shim """ if compat.PY3: return s.encode(self._encoding) @@ -815,9 +829,14 @@ def get_base_missing_value(cls, dtype): class StataParser(object): - _default_encoding = 'iso-8859-1' + _default_encoding = 'latin-1' def __init__(self, encoding): + if encoding is not None: + if encoding not in VALID_ENCODINGS: + raise ValueError('Unknown encoding. Only latin-1 and ascii ' + 'supported.') + self._encoding = encoding # type code. @@ -931,11 +950,12 @@ def __init__(self, encoding): class StataReader(StataParser, BaseIterator): __doc__ = _stata_reader_doc + @deprecate_kwarg(old_arg_name='index', new_arg_name='index_col') def __init__(self, path_or_buf, convert_dates=True, - convert_categoricals=True, index=None, + convert_categoricals=True, index_col=None, convert_missing=False, preserve_dtypes=True, columns=None, order_categoricals=True, - encoding='iso-8859-1', chunksize=None): + encoding='latin-1', chunksize=None): super(StataReader, self).__init__(encoding) self.col_sizes = () @@ -943,11 +963,15 @@ def __init__(self, path_or_buf, convert_dates=True, # calls to read). self._convert_dates = convert_dates self._convert_categoricals = convert_categoricals - self._index = index + self._index_col = index_col self._convert_missing = convert_missing self._preserve_dtypes = preserve_dtypes self._columns = columns self._order_categoricals = order_categoricals + if encoding is not None: + if encoding not in VALID_ENCODINGS: + raise ValueError('Unknown encoding. Only latin-1 and ascii ' + 'supported.') self._encoding = encoding self._chunksize = chunksize @@ -962,8 +986,9 @@ def __init__(self, path_or_buf, convert_dates=True, self._lines_read = 0 self._native_byteorder = _set_endianness(sys.byteorder) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): - path_or_buf, encoding, _ = get_filepath_or_buffer( + path_or_buf, encoding, _, should_close = get_filepath_or_buffer( path_or_buf, encoding=self._default_encoding ) @@ -979,6 +1004,7 @@ def __init__(self, path_or_buf, convert_dates=True, self.path_or_buf = BytesIO(contents) self._read_header() + self._setup_dtype() def __enter__(self): """ enter context manager """ @@ -1008,10 +1034,6 @@ def _read_header(self): # calculate size of a data record self.col_sizes = lmap(lambda x: self._calcsize(x), self.typlist) - # remove format details from %td - self.fmtlist = ["%td" if x.startswith("%td") else x - for x in self.fmtlist] - def _read_new_header(self, first_char): # The first part of the header is common to 117 and 118. self.path_or_buf.read(27) # stata_dta>
@@ -1281,6 +1303,23 @@ def _read_old_header(self, first_char): # necessary data to continue parsing self.data_location = self.path_or_buf.tell() + def _setup_dtype(self): + """Map between numpy and state dtypes""" + if self._dtype is not None: + return self._dtype + + dtype = [] # Convert struct data types to numpy data type + for i, typ in enumerate(self.typlist): + if typ in self.NUMPY_TYPE_MAP: + dtype.append(('s' + str(i), self.byteorder + + self.NUMPY_TYPE_MAP[typ])) + else: + dtype.append(('s' + str(i), 'S' + str(typ))) + dtype = np.dtype(dtype) + self._dtype = dtype + + return self._dtype + def _calcsize(self, fmt): return (type(fmt) is int and fmt or struct.calcsize(self.byteorder + fmt)) @@ -1302,12 +1341,14 @@ def _null_terminate(self, s): return s def _read_value_labels(self): - if self.format_version <= 108: - # Value labels are not supported in version 108 and earlier. - return if self._value_labels_read: # Don't read twice return + if self.format_version <= 108: + # Value labels are not supported in version 108 and earlier. + self._value_labels_read = True + self.value_label_dict = dict() + return if self.format_version >= 117: self.path_or_buf.seek(self.seek_value_labels) @@ -1361,7 +1402,8 @@ def _read_value_labels(self): def _read_strls(self): self.path_or_buf.seek(self.seek_strls) - self.GSO = {0: ''} + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO = {'0': ''} while True: if self.path_or_buf.read(3) != b'GSO': break @@ -1386,10 +1428,11 @@ def _read_strls(self): if self.format_version == 117: encoding = self._encoding or self._default_encoding va = va[0:-1].decode(encoding) - self.GSO[v_o] = va + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO[str(v_o)] = va # legacy - @Appender('DEPRECATED: ' + _data_method_doc) + @Appender(_data_method_doc) def data(self, **kwargs): import warnings @@ -1422,8 +1465,9 @@ def get_chunk(self, size=None): return self.read(nrows=size) @Appender(_read_method_doc) + @deprecate_kwarg(old_arg_name='index', new_arg_name='index_col') def read(self, nrows=None, convert_dates=None, - convert_categoricals=None, index=None, + convert_categoricals=None, index_col=None, convert_missing=None, preserve_dtypes=None, columns=None, order_categoricals=None): # Handle empty file or chunk. If reading incrementally raise @@ -1448,26 +1492,16 @@ def read(self, nrows=None, convert_dates=None, columns = self._columns if order_categoricals is None: order_categoricals = self._order_categoricals + if index_col is None: + index_col = self._index_col if nrows is None: nrows = self.nobs - if (self.format_version >= 117) and (self._dtype is None): + if (self.format_version >= 117) and (not self._value_labels_read): self._can_read_value_labels = True self._read_strls() - # Setup the dtype. - if self._dtype is None: - dtype = [] # Convert struct data types to numpy data type - for i, typ in enumerate(self.typlist): - if typ in self.NUMPY_TYPE_MAP: - dtype.append(('s' + str(i), self.byteorder + - self.NUMPY_TYPE_MAP[typ])) - else: - dtype.append(('s' + str(i), 'S' + str(typ))) - dtype = np.dtype(dtype) - self._dtype = dtype - # Read data dtype = self._dtype max_read_len = (self.nobs - self._lines_read) * dtype.itemsize @@ -1498,14 +1532,14 @@ def read(self, nrows=None, convert_dates=None, self._read_value_labels() if len(data) == 0: - data = DataFrame(columns=self.varlist, index=index) + data = DataFrame(columns=self.varlist) else: - data = DataFrame.from_records(data, index=index) + data = DataFrame.from_records(data) data.columns = self.varlist # If index is not specified, use actual row number rather than # restarting at 0 for each chunk. - if index is None: + if index_col is None: ix = np.arange(self._lines_read - read_lines, self._lines_read) data = data.set_index(ix) @@ -1527,7 +1561,7 @@ def read(self, nrows=None, convert_dates=None, cols_ = np.where(self.dtyplist)[0] # Convert columns (if needed) to match input type - index = data.index + ix = data.index requires_type_conversion = False data_formatted = [] for i in cols_: @@ -1537,17 +1571,18 @@ def read(self, nrows=None, convert_dates=None, if dtype != np.dtype(object) and dtype != self.dtyplist[i]: requires_type_conversion = True data_formatted.append( - (col, Series(data[col], index, self.dtyplist[i]))) + (col, Series(data[col], ix, self.dtyplist[i]))) else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame.from_items(data_formatted) + data = DataFrame.from_dict(OrderedDict(data_formatted)) del data_formatted self._do_convert_missing(data, convert_missing) if convert_dates: - cols = np.where(lmap(lambda x: x in _date_formats, + cols = np.where(lmap(lambda x: any(x.startswith(fmt) + for fmt in _date_formats), self.fmtlist))[0] for i in cols: col = data.columns[i] @@ -1578,7 +1613,10 @@ def read(self, nrows=None, convert_dates=None, convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame.from_items(retyped_data) + data = DataFrame.from_dict(OrderedDict(retyped_data)) + + if index_col is not None: + data = data.set_index(data.pop(index_col)) return data @@ -1622,7 +1660,8 @@ def _insert_strls(self, data): for i, typ in enumerate(self.typlist): if typ != 'Q': continue - data.iloc[:, i] = [self.GSO[k] for k in data.iloc[:, i]] + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]] return data def _do_select_columns(self, data, columns): @@ -1687,7 +1726,7 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist, cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame.from_items(cat_converted_data) + data = DataFrame.from_dict(OrderedDict(cat_converted_data)) return data def data_label(self): @@ -1846,7 +1885,7 @@ class StataWriter(StataParser): Input to save convert_dates : dict Dictionary mapping columns containing datetime types to stata internal - format to use when wirting the dates. Options are 'tc', 'td', 'tm', + format to use when writing the dates. Options are 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. Datetime columns that do not have a conversion type specified will be converted to 'tc'. Raises NotImplementedError if a datetime column has @@ -1854,7 +1893,7 @@ class StataWriter(StataParser): write_index : bool Write the index to Stata dataset. encoding : str - Default is latin-1. Unicode is not supported + Default is latin-1. Only latin-1 and ascii are supported. byteorder : str Can be ">", "<", "little", or "big". default is `sys.byteorder` time_stamp : datetime @@ -1878,7 +1917,7 @@ class StataWriter(StataParser): NotImplementedError * If datetimes contain timezone information ValueError - * Columns listed in convert_dates are noth either datetime64[ns] + * Columns listed in convert_dates are neither datetime64[ns] or datetime.datetime * Column dtype is not representable in Stata * Column listed in convert_dates is not in DataFrame @@ -1913,7 +1952,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True, if byteorder is None: byteorder = sys.byteorder self._byteorder = _set_endianness(byteorder) - self._fname = fname + self._fname = _stringify_path(fname) self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} def _write(self, to_write): @@ -1937,7 +1976,6 @@ def _prepare_categoricals(self, data): return data get_base_missing_value = StataMissingValue.get_base_missing_value - index = data.index data_formatted = [] for col, col_is_cat in zip(data, is_cat): if col_is_cat: @@ -1960,11 +1998,10 @@ def _prepare_categoricals(self, data): # Replace missing values with Stata missing value for type values[values == -1] = get_base_missing_value(dtype) - data_formatted.append((col, values, index)) - + data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame.from_items(data_formatted) + return DataFrame.from_dict(OrderedDict(data_formatted)) def _replace_nans(self, data): # return data diff --git a/pandas/io/tests/data/legacy_hdf/legacy.h5 b/pandas/io/tests/data/legacy_hdf/legacy.h5 deleted file mode 100644 index 38b822dd169945b5f5022a219cc784a4c9951320..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14928 zcmeHN2|Sfq8^0(aNlls-)tzdjME0fVxU%M2vRxdOIp-4O%o+G)mJFVB)2b> zR!SU1C>2wKk%ViP>|4lp-|f3TrP24zeA85~pZ9;3=RNN^&w0-Cod0=msJ^!D6rnjn zB;*nhAWa~NqDkU5j9;cJu@WLr)H~n_9vt`K$l}EkJS2V+Qa1}P7scBr^I-)Ic)5j{ zjt+?xV&MLp&<5Sx#dQlO6X@v4 zboJ-jvnC<%foN`QqRqpJUbwlE1SY({0m44XV(*tD`rjx1emOE;okBGHe({gCmWa=V zGq5h2;yva1qmRSi@o+pO>H~2{Nc3A2alo7lu`T4{<5&bDf)pd0oQZgzmzNiPQh?(x zkpv%)=wuFXuH|@(SZ+FQQAnYL8L#nZJ&g3@h=~&qQ_RdLHk>?H{%t>wm?$AHFQdT8 zjg%jsp3#RmH_ZNd|84&%#6lnhMj>IQ`*3}Ys{U-VOdXnvA;WUrCc2Wbs;$4VqShJ} zIjX5?fRhr{z{Fpkrf2DE%kXtIwAHtuu3=dC2T-?K(3q;$dQMs@6cq;3NZU8qc8i|5 zk-s5b(NLKYpcD|`tK{WG*L77=Hc{T>>`Hf)w=&Z6u-37qF^z1Tf&y*b6cv2dIT^?) zY+7UKq2;ckX1>nD*2%!%YK@Y=v5m67vBD->McqJy&1tzUG$3 zI*yK;186Q@pNf@cND*qJh?G2|6krD7KXgv^Qm=)}oqlUGwV+&J~O5cICj* zZ3za2T@7%CnqcS7NhPrKzVO7_;w)%hQd0SAPAL#ns~O-CZ-)!geN}8D?f{aDII|pC z85vqzQ52`|L~%>R9u%M62t~0$G7?2?lNc0ZnLndA7|lYFmUR(DPo*>z^VVmfxOjIi zin~|^D3(7eLa~ifg(B6x4n@|%W)vsfXhYHASvQIS#uJ|-{VNO*LUDQYR227R&OouI zem07Iwo6g8_)Z4Jqyz;Ng&(P+=-#D)Am6H*nxpxH@cG52z&*pcu*kXnY4D6bSjueJ zb|fepyf|qhq?%F&9d15}kIHxfLlQ#r{F|Zy%cxH*n%w{rCy*q>O5ecU*L-{u=)VDx z74z!y5*lE)Mc>BRU)O-C8fUdv);)s9&UA0DkS>FEGn4E#oahG^=1(r`yD$Xp?b3gU zF{^?_#mfd(@TCF%FvI%P^-(}Tl=n>X+-D#(M_R_ZI~59xSlOOl(+LNbP{rBOU2x@G z+bP~bselrebh>&OoVZ&L%t5fqn zyZNPPt|b3$z!~kUW{Ol4H5i_JP$EU5=nCbhFrcP zjJCsY^mcLd-xv_ga76`>u!K+vB0g$61~}^aIeKBWNcp?@3Rwngf4qF9bOk0)?DMeP zm;2|U0b~t~94imb_jz!`n~zt1sNHkrksYT1X)}lLAWb25J#wGuAK*v#c0=r05Vdtx z{JmrKgAd=YP9phZ(;jV#XpOmW^R zOp--MjPM3zKHlYTM$U`5s~;WXrSJALm0x2D9Iu}_F&mR7_T9Mpzrbgrfw9g-_ThF! zsNs)37a@3YtT^@t8vY{gy86(#kI9eZy(8oPe|vt(Lz;}mdffjJ{jQvYb(qWNMNv8A zT$1pAh{V`_Z}}9gg};~I8_nnU{mur3@GG>r*l54pF%66rN4fStzPQ4be>=G5fA2AhT7S)Q2Cm-{EDfzffDjE~R(wvRLg{NYV{=NJk7T1x{Vwdb9JTV4fdmIwU zfjou)UQGC(6N}$3_o>wWb#Yw?wNn}%SVs2)r=9=t-3bvLdw)8J+cEJZHbb8pFX#HD zxA#vYc{lew`c^5s8AQHx5|XJbfk|X(E&H4% zaQ^ZJT`QLs&e?tU9owU?p|l+TuR8(4zu83%;mnN(w*>z zaonK#&H->cQAuqmGXs{{x5*7|dkJ@_UcILGT_sd^lrpEfJq2+hZ#vKK?Sm}WBX{eL zS3>&4V;&tBs=!sZZjJ2kSztwE((`2YSs+81)*r!}1`gD0$l4{526@W;4d!_~ftUFQ zXY2F6fT;^3ykgqw;NI=5r$JAj1Njv$p356`EsK<~ot!xElTJ1qTrc8xes44MovI^g*j@^L zig-kQl2QQV*H;&a=rzFRy^^p_fQwL(Q+hY~rA_posTSZ9zgWq+Jd@pTpuLmc6>8NE#b;eRs2JS{dG#3a*_R4HB14?^t9~UoK>eO&|N1eI1pSuP@!0*L2D|tF*LH-l$TM=GQ2^FkY8tclco|j2e>lmA(87q>#g^u6I^Lnad?YafwghNv2`3pFSH*77_Tt zG`tdiZr=_G-d2fkbXd}uEBjF1hJ5!ia^xJ66B|XYNBoldqJhsw17q>bJbYadiukj5 zCT$t^1{(e%?tbp$=9y$Utiy!+_;8--z7%WW@69t$;CjHP?=2vBW*+|i{4D{z0NaIg zGT6`6#uEEv%184%f_HH3e|+bdT=}>8&%4i$|7f0xetPE23&EJMDf)l&9IhTd)NlOn zdVV+0#J;<0{+rKZ7XHWc%tOkU#{XKL`Dg3@lUev*2J)9^pky^Kxc}@KX#Tm8-d*E6 zUGwW;!?CP=Cpmnt9+t-J>}ml&9&epOmP-J!?hcGxk8+s4%P_<(z7x)EbW$*w*9aay zc~u>@{yw<+u(sc30vpEMuav1=R0XNM$>Jt^TEY3;_Wc*<6oPuWc7fyO9bms{_T7rs zSFlt3)PtNDrw9I9t3jdI$><+ezJ`19^|XfVs-V=JoCVvGdO?=N#YHyWzW{qpSiF2e zEszYk{b2A|Jx~ysrt3{@0z6UIQqGd^fmTJv-lMv$;2)i~X@{;S0K343@8m3dfpht` zqmifYf(=!V8`xXeFvF&g{5_=#lz7s7ww-SW`aAk}2(Ntw*{-HLX!onYy!Gl^4((_F zvZu~IIrQ=gbU&T_;^d)PxKvtVi|*lWu(@jcw~>!{GCDTNOpZVI3W$6iQhM064SYp) z6MQ;ltuv5t^$X0lVP*>1@ebc!d_0x?i9g773V>!UxKr z+ghXP*Y|Wo+5@RW^$%{t#i?E4TCg4%T=jc(di^y(qu5^d=^cQASsFlS#ubp@F!P>y zOB?JPvba}t^(;K2bP6O!6v81dp`FrlQK0X5wm_(CCS2^Uus7g!4M^O*Dk!XY7`UYQ z(+xY*;R*5liEW_|U_|hip)A!dC{etOA?QrXXg+Ls>e!2VsC{f1Fwd_52f{br-TpKa z9L;fy6mzWrb`Q7aR;yP7i+Hn+21Pd1F7mLf%pCyYB7&6zce0_Lg44AMhblOHxZ;5Q z<3iY4U(@HY_9f&yAtjX_(+FO=#b!&LZUzZ`hWl?l=mJ-7?Dd;5aR8V+HVqNqT?bko x8?i3QR6}yy+Rzlh2C-)qD{|JB!30;^z>@kW@LtB&8lx@sKrE{CS2Ol6@LyVSHP`?E diff --git a/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 b/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 deleted file mode 100644 index 958effc2ce6f83dcc62caa866b7a7de7c98c4a30..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 293877 zcmeI531C#!_5bfo0t_L6fT+QxPTh!0LlVMft1}4+CK{5M37f6IFi9qmGMNmM2?Vqi zTq?C{u|@1tqWL1Y^h)s0n1aaY_ULW<`RVc$9nW*Me{~ggmE3^YA^bijk~RF-KSr`Zl!JY#yi?roepGnG7L)47p+LkJ`q zr$Oz%2}9W1;P>%^`j&dym6yn_dpk}&Rn?_7Zlq+t`5P>bfhyK1*{4I@)+|?zL%QFT zms87(YyEML{9V50Qls@|_@l3zzsr>VF6a$3dE4v5i-N+clwTdfQ_@;<$lK~OnOY`O ze$&Q=ne#93h1y%%0xG}gI_1yLE8zdles+k=Cym#rR{CdQGnCI84ElJ>xUtvECS`&bf1KvP= z$QO*cAM*1&w~#0O4&wU`=Jze|3C7}nrpCTo%8{OobcE8KJ~4nxi(Pw~I-SP(h~pEv zdpVh>l8r1$u@MvL?r<3s=Y*`2|A@Tga)vuy(Y6!~VMTlRJ(lLjbF;W0-ld{T2*cJ3=gkAkzXS_6Z3W( zr$hEe>9#$xn=v0bG?CI*gtHM_)ZwT@e1ZT7fB*=900{IW0(G6GfsP3^^TsZ`*HiKI zdn@eje=Q1+>Ad(O*S2Gx3*C3&eIZ4-{7h>8(dSbzxKI9TmR+yQ{}Bk zj4Iw0`Pui+sXA=ov#vQ!XV)~$Zw!oIbowE`{MQZhUvh2ufBAo1F!{5{s0Z)u8oTGI z$ocbzKC<|`yCU=3&YWLRd-kNNf|WyVx+Z)1mdM4MR{W#r$<3~t9%(%5{b7>C355y&)q$G`v%v5CCj%iKkrl5{Icc}Z&UtQ-=z6n&%X1kXa3{* z`DyKImU~}tP5Z~?K3{2wY~Z-92yO)Uq?1L(^w2?#L>*BmMDrUE@cr`@H;; zk6ds5A^nNZ@?MEFJhd`??c*Cdgi(-$6UP2wY6o_Nn7u}&t?1W`R|u)W?g@OZOXdi zs<*l7YR@StYVjhFVIF|3TKKPhGY@O@H9MMgNIB?6~^yoty58JQK|Pz`1Kvq-tz&<-CfL z(7drVlm0mGqUG1#!olA5-GUt;f;r%^|GsEj{n@b^QRWg?|lEl z&`)m~yC?F4A59tC^oxgF6S5z<(*3~)5%0Q*S30&0?3&d!XUe?NV()^65zm})>B%oY z6S?i_KVN(4celIVeEknsz54t+k-uI(ebTzKw?%?;&pmTdVe5=eU;Pz3cQ}9XTh|3w zef;Iz4cj7VPi%MW9QKmy?kDD#t-17VS73a@-02Gn&#hVX`^#^+z3ZBPyWDsA{4d?K z!*$dN8?PUG&4-btd28E#6517MuN^HLdB?~dD>Ei`ZjYQ^+`g#em!Cy$`dv=NgKaNH zp5A@M^&egJNu;xCMs>bt!mQ$slOEZ%>W*b^xn4Z%#~W{rL|muca@EU|8@5NL-+W?q zY1$UoxlmLFJ9ak8MeOgvn}7+?V5brYv14Y_G6Kw17lu2>5wlX zp0g(u&L7(_rMTdWOGp0Y`PIz*RNGNG?*DFd{paSNZobj6HS+kTJ5PFdM_SkX&gyB! zK2KS5@vS$Uxpl?2wz$q(^Tp3M4f)j7^vh>{SI=&fx>{;ct#NZ_n-RV8PZH&2{3 zXU^j9|L)a2uH!ov3_Nnn3z6rJ$h~0qm=7Y?|MtnV&;Q{^u72dU(G?H?0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!XNErbe z8^CP5v>aVxHp`ZKYmX3T8Cxqq8LO);D`Q-8k7d)BS=6EQ zBLAWKD@1#I?as!wK-k+7@P(YsZ6Rma+u-*($22t;jFG#HWwKn^LMm@^WwFPj%NuAa zudbr1w2VVArXSu7v$yZa)-tphbxa!Ee+bGm>!4rgyS&hI3@2^ z;%rHQI-aPHk@Rl@G!9qmZqC(dO_8z#^6zY%I;QcO%wE%Z8A_TlvR5lbPgQlPjZ2aL zUw$Ozuu9@M83EEyaEjt5ohr|e$e$C&}8Zi4KU^5dh-0LP~K+J>tNabwqV%rTj2A{%={w}KXK0yn~`6&KjdrbXbiWsah1z0 zQGV)&jh7(z^@#zTme{qYsncotSll-=_>Yb5gLW!GRwHuA9hx4}@etlWA!*b1FS{(y z#@gq0G&lQwvP_Xtcn;w|ntaWEZ`ddEi1RcaW*@KW>k=pVHiFBGO`N^q+RM%{>@U{7 zhiEw=FK^GqwY9~w#F5xkIclFV<;MnSYi6wL6l*_8Ql+{2Fz4jta>5S9<~$*}9QkAS z*yM6WH&*?c{ohcN zyr(S`_BA=Dd;J~0c3BVkr9pChmCdXxt(dCDRs3?8vd>KdL!Pkddo8D zt=1cu<1>a?pY^n)$9l?#WCtZ}9f2lqXb}ya zzWO(e8+HFBzCD>QnEjY1>M?=J|P9pvj~S=!51@`%@or z?kmv&vhV&XOZmVa-skDLZ>C4xM zauc;&&3BT^b!+T?Jm~#=$}L+*Eu_amzCc3u(P&pp|f2*P%e-Ul+IJgIoX(6 zucr*w=R?x}&ZAX{#M`Eyi;ej!JyEE1Z}#1j=Rr{;w{Q0ux-&cvW$QM7?TdX|*G(i; z{-JO&##bN3LK>q9X{+8LN8;5xi*?$nQos={~IE0^iW|3qHjc2527o7_%je=}p%F(3c<(g%w^`1p=ruUUBB%)dYTz|H^M z?0fv^w;w$EvVv^3wBm^));yMV_(kizPyMRl!w+38Gw<7R?Tb%M^Vo(vmMmFgFTefD zamPJ*O4i9YZq9jg+Uj+8?%sa%qYIbcwsq2tOPOQYyc6y{b@+qd{&4HLukHGx^qCWH zxO(m5o?{&jer@w9PS2=sf6?{vGmqXf?K$71d$Jave%Q3b#-4cHnh%2m?ary$ z@ARqdhy9}9k?_--HdZZs{&yqlw%IRnv-Hmn&3$9{;{z9NSv=>SKTIA|)iUajCq6Xc z*~-qxx1WC5SiU?o{#O$s?`}Bl+ix7T?$;ed55H^7Mfd-9MehH)9qDd&{nF3xT0iQs zUo}q{a>lNPaX)_M_GO=3amkpiZ$EgGs(Ra?vOTSZ0IP&|q+e%!L-Y^AZ8BG{R4d;9%?V6~FxYRC2sBtUV?Gj-pcI$eEM z_mQu|@mYOKJ?%N6oS{OmAu{sq2J zy9^c;`OUw_ke^q;|BH6I`4!RMAu^wYZd5DlJyc$oV$2&1`gqIuuTRvscKE|BvK{=A z@%tF692uQl&!dy|!ndgZC{sDP{$N2yZbg2EXS5U@oe?a|2nWYzc=KdMnQXR9Mf|GZ zq2A9J1&Z><@*mQZEYCAqi^(YA`g~ca*E71o8{)gXY|rTCwl=<7MAh)p=jwVCw?JE< zUbWrJe{Do@KiyhSZUt|nul-14x-aviG7mWEm6n`~(OkO{c-{f<7y^^VHLHBnc-4&+ZI-Shra`=xZTHjvSaHos6S8Q%p zG=vrH;TL#|V2=|7yI=k09$DY7(fcNP zUuD>PY&@RXztt6&$lnJum}|7fhl&o?$N|Ah<1qHM(wI-bbFNZ=l20>h@pp!XgSf!bveSv zQ9145P)lG=QNCdZs(x-?J*hu>A4*ieC};2W7Zw@qR?nkVJgCRR+nLCI$^Rr;58t+j zXO6QiIgfg-E{dCq*rGQUb%;+8009sH0T2Lzen{ZTVdF9aXKamJedxeZcWwT+Ye(n* z4SN5%ovyY3U)SN27Mdj-dCiEQ{dnQ)k=AAPPoH(lGm+Ih{&CTgs^3S#3;FXbm!qf2 z6{mlBL*193MNXOOob%+CXI*ng9R2qJ?w4HQxgnpA%hB`h?`>!2JTY*CYx}(O|9NZC zhmotF`u)$ZF5jl=<+C~c_{%~o-dQ(f)aFR-_Q_eB7w?F?_Rss)Z1}+%m#mk|iDzoD zy&B%QF6Z)hB2V49xU21hZIPxsKYrYH&kmAXtfx2Z%oz<8-~ZG4$c1;LUvlW|HzObY zW<&mW#=RAh?Q8HZ7IRXu-8cD@dz*jziRHRgWixI;cITmPYI-~5hFdOr5v zyYV;dxEEbl&wBUnm%2W5jk@#Fzpb;srt0N#gH>_tq9ZpvebTPouJhgJ{demHpGGFU z^4k9mulZcn%jL8f5)c3Z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5ctXo*w_GOSBm+4x4zOJ^c zjB&|5mQ7z~4>u}#2c1CrjUrH9R$L=GJX81Dsna*=m0jpV))Hv)b@GD-Z%CeMZfldN zutBj+R7sCNhiiLXd7i50VO?J)E0<;1*g)ps|LQ90D!Bo(L-@}L{D(_rDYs~ zAunoou<2#Bo{H)!DWAsWsQ!Yye3{S2hRSm}@`&+Xsd31Bs$W5kQH?`;!g07{t`c-p zISbWsZ@KF1NDeXrG!9qmcF)!6&PB=&$gb{Ktd6;<$}O3_rgI6az3Oby#8XvWYGY|+ zwQTkks{E*cWrys%{GsG>bU)dt??;wm`rRSvCtLHAPL=mEQ+r18KW2Y+7{5;Xk^W5j zFzIQzU-Ngg2F(5_)_)B!=U48l$NsA@uR!V`laU{D@VpK-rJ}NqyF8Z76{vLRmU7fi zn@#N*{Te3owRZ%4p$30j0`x z)zSQ_CZ{~6_A?egf8*SafNpy7{HjpiX62b}Y_M#9TQKbRE%5neX8w_gpSb6U&B(9X zAM!PIG=^K+xXR_0C_nYX#)c@{(I*CQZHZlbnmSGPxVA<0_hDVGjqZnbDoJ0^Txgm| zoQCjz3Q3zD?si#^jkVA1Xm0lVw2cTm7WLa$ldswD4f|vsQL;nNP}A&bRJ5#^W0dAK zLYE*;{^i=s&N1vU*4~GRc8EgxCU4ipwY9~w)Uono1GF{s`N(|TV0Ag>GgqHk)|OmO z*rC{#{4lv3`Qy!<$>rR#G{)wJlgnwb{ho+BfnaR4zuw+wta>#2y`j8((xay>6!tYa zr+fV!zIJ}z$TvuitFoDOr4>_Ed*Y90g~-M-WSOgV!PfB*?H3pCUXHQ^TZ+bmb`QKQ zy&exTYpTXW@Ac)DAmiPqM)518ud0sL20qztYj#SH>1+w8Nw>_*Kl-Y_ReRGP_Zrf1 zFt@BSkw2r+MNWr~zXE?I?scMg*{prl-{j)Sd=|p@gL{;a1fU#Zls%3>c#8_r}~;& zya8uOs3#SEFjeE|Le4;J1e1Ac70#W?eb{vVh0NF!HTSuVD8(Ks|sjq$yZrSvB&o@qfh3+?%-uAGw zwr!!aT|JmCkPoKLQ^do|L7)y`>sh0?v*xqLX3?M(b!sqK8j#afj- z4`utx>|8?S9|{)}?}@}h9HWVBtKK08u6q2X{oN)nymlS;vckSslHmJaSQ?=00JNY0wB;I2qc;> zoRpw*BR`8jUx-df6Xe5b5C8!X009sHfrFO-#rHbBY98R$>tV+F@<18iQCd-3Cqj6{ zI*56FgVsTa0Ij>xIw`H=IDf9n?`fRvs$b~#=z<-$sAF0OrTUQePXdYJY~Q#|*#X(r zvfI`1zOFZ0=~1~mA5`VmXaQp#FZR9WNG?O;phCpE%lJ?pfgg+glqt4Ead0#qHvdwMhb}4IyZ3SC z_siCJP+Eo8#YW%9skeKKmEWN1rR&yI(O_)Vb;;$jb;dhZB$p#Uvi8>Ga&BGT++Qb` zv$l7C9VdzBl`1#&q=0NcS7FzV>IF=*m9& zH5y&?-iwa+_8xX`el3pPuH#%p@iOba>g`~S4_(q@J*k;J#et|%V8k7!PA}28(6M8+ zPDEX?dPzL5IsoxG^g6xI@oaq^Hx&O#sZZ+uweKC@TS%+ioC@2Oc2|9!w~ zlgl}DrsY>Bmn+wDJ8w=dH&M&Iabt2hb6fZ4ekz|!oV?+5ABo2A^)bG4?Ny2!@!pT| zoonyaD&zL8mdttoV&|{>X5ZFz6Vmy?7T@XCPGF|)K(ck#p2T-r^-J;Tlp5u$^~SGz zoM#Ow){>r{AZ{b-NG1OX5L0T2KI5IFb=^fu1($^`R;=*0BkM}i)J00@8p2!H?x zSO`#@=Q91hhj-|8u~f%-I{r^Bdi6BUbJ?Rx-mTla?OAn9>z{50&m`X?s`wgQ5$7pvFrKJ@`IQ;w+a? zdh=c{**NLFzLbR?3~`>x{2GleidUoKy~P{s&9B87A9tOs;yh*6ebrlkj`LJ{%8?#y zU*kM`u9s+>=d@|sXNihq^+NN#uf_MU?bkR@rH}aY0gCISaYM&n!MG9rP89#O9XI0M zPw^uMBF+=zru*N4F~?`6`uje65?2U)bk~P@-f7bVXUSrxtv4i>3k%)T)CFp@R#Ir6SZ8|)5+ybP2I1%seCSR@|M$mI#qF=s~=X} zh_^KkkiNf@(!JR?y%$fkULu|=w0*CBU8{`ep=>F$@BR|!nX8?^Ol`HsdSL82>o+mZ zb8>>w{7{0p4dJ`7fdB}A00@8p2=q?^y^ZtSm0-RQotXAdOz0H|fB*=900?}e2~eD8 zQ2*WU<$7H#)p4HN-qVX-dLcs0Q^cwsy@#Zfl;QYL$Ta(j zJJQEtZv+2p)r{4*)YGoKLhHq}POioGn$UQdxk1@qr>@Uf&yHPx6m?O3RDo6A{C-0G z&Na!OSzA4=h94X9vGJ*5$dJ78eL1RUHu-J44i@zCKU>*eA6^s`G}QM$n&00whrF#m z6Cz`L598k$8O^`I7iyP(tXzD5r1+cWRBwJ>0sk-BN%anq`6P6sTKT<`=J)E}V9>`~ z#+6hrn~=%Wio5tb=b~Qz=azh(^^I+Q{fCo8y+@hq&GiQhGIA^OGd!cE=;(}KVMaJO zHp838r9^w&G9?b`?|K;pit@w{^+T5D8C^0u!|NGc%s*8zyd0qgA*Xl*+5+{eZQf4t zhg(G%-f6l1zwylsBoAXStq*3>vIXS6R%_P5C!_KMbZv<7&os@$HnFIR~3_9^=a zcmwqzUr@HoY#;o*lr!|J{@$PbBlf(;M1Plx_}JKCl9fZBh>bg4C9faGD}9;QBpJyX zlYf}&EzzRn+Y{v!TtwfBwSchaWSfv*s|DyiWzA+aj%!3gd^5d=S@kb*5%>JmZxTKO# zAo%#|J}#!0#s7P=g?Zy8b3?wTew^MjtMNOfqO7vCKEJ-Ic&eygjz4p|XgnC>&z!HI zTRveKH10}d>qLLz)mNA>AB_#eFK6*G9E{pIrJ}M@6m1@V;+X0c`(lenRwYuhebom-_zvD*i`E zJzPGK-e}w;TJO7P56>KDJ9c|_eKxe$Dy^&e-(khtp5a$)DY|#FjQNGvQAC(C@wTCYvD2u!+gmpI7auZXGsg zc&;VO(j7}}*?g|U1z&k_Fw1wOy_k2(PM4KV$aM?VI*Q z9{IyYH(Pq1eOURQhW$^E>mnoJf4_zQYA+of)G19;`%#x@*4vM|L<Rkpz(TYXaqE#WkXXGj%VWI?dE} zqz_q3pvl+C4@&fjVx0;b6kBiYWw^H2=RZ^Sx8-w{X0mcwo{bG;4*svMqOOt~Fgt|* zoWOsmz6#MEU%RuhEfDs$1biW9b6dz6_BQx^&M{5RW8@x}&t$p$Q{+?SO|C5Vc=)j> zZ=gKSKloNxQB_*TAsF(ab_bhYR_m##u9EVig;ak*UcSs{V?*V+9C^feuhcj!+O7H% zRKtm>acECC4jTJ=(Q&nARV`VL@B)X^AjY2zxFTcZ5b4;vezY)79Mz_lfI?P=;X+2d}_`(a(K zjqZnbDoJ0^Txgm|oQ81w6_Pf6AG6DPY^;55M{~2^r)@;ov8dn1ntaWEZ`ddEh>{)J zv!vP6sAyR+$0*Hfgf2mx{L8hMonzQzti2Br?GT0XP2R4HYio;Vsbl5G254($%sbM=`=2PT&jb||(7Y{})wA0L&ST+S^^W9-Hu$>p@zeoxZ)$Q6vO_Sf6{j8%_j zzc-YZPkQvUg~Glj=X9^X!`IHw8~FywaaA_6uC!vRYL6o3}jQgUp($@z8sHxh2YY_o-2mue{8vI$9g}WV@}|DLtmMC7>qV z>T2Khw`y?gGjXpI#mi>xtNtbzPv%oV!=LqDfBBk6 z>~m8Vze3}!)*G1PGlpBA$J?@A>nlgKQ_myh0so+zL+bQnry`k?OorDJs~+T9rN*otxYqeMJ&#^7OwEv&i^0h^n|?l|b%mw+Ih4{= z+-F3S((Ly4N9f5pPt+8WpO2jnbe~6=*Ne^bD87tbS6e;9G-HbvDZ0hzdDJP77(0G| zu3R5m6E!PhES#2Hj%vC6%jD~7<+_5RtmNfQ)N<(=$>pr=-CxIKH$_@`^qg&spV)PJ z(UaKwhg&u|-t&#qBwy)m4?Any7CPJ2gXseKVCp=DoENS4Qw53ae1(3mMtZQ?Ijyfy zx;Hx~?Ie4y)OJ4dNG;40%J!AnxrE9;6fP#-6N!a5Mibdqy+aPht9KUbv{k1ne`PI| zr^|_+Abk=~lIv|s_H;#zJ*klp&pw9pN~r#*eNabr1ET8-e1P)*d~?fB>pEY+=l8Yx z0%18h;S$z$7OE#h=H`Bj)?@gwQl?X0>1(;-eUw@KZ?P~U0Yjn+wN9jER@m8++5wy&I|t@NneQ+0f8jTSK0@nYX=j^Hvh4k|>vyNnMl ziN=PCB8+wL*!M_?4;B1WeC0PUJ~UI=jy53cdx|u^N-ZuO!hgh@q(pI4^nQZYhv|54 z?&Z33kp@eblsZf>JzWz zCzs3Ca&PH=Sj(Yf@*}$^B`?pdGu${Xxg3?dztbe~yi(q*V* zDGr4E8tEr>dWpt`PMfA~eX^=py{J0?*||9m4lC{r=OU3 z(68;d5%+#=-mE0W@t_ztYCVAX9_Tnw=%e@gFwf6Z9sdb^^j;rve&0YxU-cy(H+kHr z^*NKC`$m3GJ>SgMH~LiEKZLK~ah}iXqQ!b5J%;w`xiVRtC$;|){T$b++fRu#?z41E z^5-};>*2o-_?_f(6le8hZgRPD-H=l!B$u10HT1f?+j}vFjIFR**a@a;ybPS zrTBD8jq=rcQVA;t)}A})G@7l z>eER2DS**dP_j7BwfuUj<2*%~V%uAsXZkcX&bp-Z{)_W$`hlufR~wV4 z(azYT=O&jEb||(dbwAWm>?iq=_ZyR!=hj&-sZTB!Tf_c26DN*M4n5vlsq)^xpHpuf zD4q&&o=T5NI7`Fm&3ip_yoEI$y7&5W%Lc~N*Vir1GnrqbxK4^!qvO5B8|=-m#nIbs z9GECxX5Cl4_2)QGrKcQaSNr(=pFP)0G|ux(?zkvhoH&r&Zgy^dPu}0-Je59l|1IBt zOmUqwZs_a=DdTz&zprGDUrF`%eJ;_8-upZn z`XGIn=N$(k&J*LK``<}A&~ctZAG8j#Td%Lo=I6wgiu;EcqCC#?foA>aM9(Cm{NCI@ z%zniBeV){QzkZJE6os2MYn3ikMHyBpN4>5fdB}A00@9U5&{(GdAt7p!YB2**uKVjcG~{EzQD%v9Z$ONx#*HF zjydh3%OVrkzTiK>o_=fk7bC|7u4PLXk3K5h(KPsiTQ?2;k&w5E1=~Nb+ELv)Y|!xB z%$)q$T}y0f1J%yb9ZPNeMV*ZczFfdpdK_s5uiXFMg^lT1%NMSHYg~KAaObwOR!#ri zPmVl$iTj@a9zN8b?#On!m+-%Aj2%FLC4A+hVCnhxtZxq)R?wTFynb*Nb7rQm{>Pjz mvu?Zi_G{KZaK@%LKb(GY_pu*!0;Zk%q2GJwO*Wy&mHmG_5(N4H diff --git a/pandas/io/tests/sas/data/productsales.csv b/pandas/io/tests/sas/data/productsales.csv deleted file mode 100644 index fea9b68912297..0000000000000 --- a/pandas/io/tests/sas/data/productsales.csv +++ /dev/null @@ -1,1441 +0,0 @@ -ACTUAL,PREDICT,COUNTRY,REGION,DIVISION,PRODTYPE,PRODUCT,QUARTER,YEAR,MONTH -925,850,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -999,297,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -608,846,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -642,533,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -656,646,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -948,486,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -612,717,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -114,564,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -685,230,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -657,494,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -608,903,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -353,266,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -107,190,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -354,139,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -101,217,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -553,560,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -877,148,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -431,762,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -511,457,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -157,532,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -520,629,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -114,491,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -277,0,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -561,979,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -220,585,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -444,267,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -178,487,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -756,764,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -329,312,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -910,531,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -530,536,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -101,773,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -515,143,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -730,126,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -993,862,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -954,754,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -267,410,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -347,701,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -991,204,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -923,509,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -437,378,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -737,507,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -104,49,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -840,876,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -704,66,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -889,819,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -107,351,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -571,201,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -688,209,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -544,51,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -954,135,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -445,47,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -829,379,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -464,758,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -968,475,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -842,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -721,507,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -966,269,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -332,699,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -328,824,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -355,497,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -506,44,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -585,522,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -634,378,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -662,689,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -783,90,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -786,720,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -710,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -950,457,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -274,947,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -406,834,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -515,71,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -35,282,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -995,538,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -670,679,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -406,601,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -825,577,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -467,908,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -709,819,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -522,687,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -688,157,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -956,111,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -129,31,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -687,790,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -877,795,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -845,379,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -425,114,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -899,475,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -987,747,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -641,372,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -448,415,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -341,955,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -137,356,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -235,316,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -482,351,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -678,164,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -240,386,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -605,113,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -274,68,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -422,885,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -763,575,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -561,743,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -339,816,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -877,203,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -192,581,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -604,815,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -55,333,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -87,40,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -942,672,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -912,23,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -768,948,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -951,291,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -768,839,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -978,864,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -20,337,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -298,95,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -193,535,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -336,191,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -617,412,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -709,711,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -5,425,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -164,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -422,948,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -424,544,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -854,764,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -168,446,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -8,957,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -748,967,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -682,11,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -300,110,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -672,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -894,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -944,965,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -403,423,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -596,753,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -481,770,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -503,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -126,79,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -721,441,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -271,858,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -721,667,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -157,193,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -991,394,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -499,680,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -284,414,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -705,770,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -737,679,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -745,7,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -633,713,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -983,851,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -591,944,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -42,130,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -771,485,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -465,23,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -296,193,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -890,7,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -312,919,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -777,768,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -364,854,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -601,411,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -823,736,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -847,10,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -490,311,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -387,348,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -688,458,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -650,195,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -447,658,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -91,704,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -197,807,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -51,861,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -570,873,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -423,933,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -524,355,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -416,794,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -789,645,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -551,700,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -400,831,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -361,800,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -189,830,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -554,828,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -585,12,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -281,501,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -629,914,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -43,685,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -533,755,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -882,708,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -790,595,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -600,32,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -148,49,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -237,727,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -488,239,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -457,273,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -401,986,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -181,544,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -995,182,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -120,197,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -119,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -319,974,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -333,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -923,688,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -634,750,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -493,155,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -461,860,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -304,102,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -641,425,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -992,224,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -202,408,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -770,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -202,816,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -14,515,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -134,793,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -977,460,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -174,732,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -429,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -514,38,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -784,616,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -973,225,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -511,402,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -30,697,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -895,567,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -557,231,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -282,372,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -909,15,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -276,866,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -234,452,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -479,663,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -782,982,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -755,813,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -689,523,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -496,871,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -24,511,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -379,819,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -441,525,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -49,13,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -243,694,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -295,782,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -395,839,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -929,461,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -997,303,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -889,421,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -72,421,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -926,433,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -850,394,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -826,338,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -651,764,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -854,216,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -899,96,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -309,550,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -943,636,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -138,427,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -99,652,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -270,478,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -862,18,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -574,40,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -359,453,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -958,987,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -791,26,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -284,101,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -190,969,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -527,492,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -112,263,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -271,593,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -643,923,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -554,146,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -211,305,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -368,318,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -778,417,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -808,623,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -46,761,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -466,272,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -18,988,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -87,821,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -765,962,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -62,615,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -13,523,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -775,806,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -636,586,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -458,520,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -206,908,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -310,30,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -813,247,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -22,647,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -742,55,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -394,154,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -957,344,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -205,95,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -198,665,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -638,145,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -155,925,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -688,395,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -730,749,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -208,279,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -525,288,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -483,509,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -748,255,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -6,214,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -168,473,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -301,702,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -9,814,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -778,231,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -799,422,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -309,572,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -433,363,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -969,919,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -181,355,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -787,992,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -971,147,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -440,183,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -209,375,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -537,77,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -364,308,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -377,660,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -251,555,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -607,455,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -127,888,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -513,652,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -146,799,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -917,249,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -776,539,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -330,198,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -981,340,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -862,152,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -612,347,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -607,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -786,855,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -160,87,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -199,69,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -972,807,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -870,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -494,798,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -975,714,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -760,17,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -180,797,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -256,422,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -422,621,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -859,661,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -586,363,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -441,910,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -597,998,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -717,95,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -713,731,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -591,718,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -492,467,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -170,126,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -684,127,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -981,746,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -966,878,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -439,27,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -151,569,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -602,812,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -187,603,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -415,506,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -61,185,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -839,692,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -596,565,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -751,512,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -460,86,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -922,399,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -153,672,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -928,801,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -951,730,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -394,408,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -615,982,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -653,499,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -180,307,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -649,741,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -921,640,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -11,300,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -696,929,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -795,309,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -550,340,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -320,228,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -845,1000,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -245,21,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -142,583,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -717,506,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -3,405,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -790,556,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -646,72,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -230,103,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -938,262,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -629,102,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -317,841,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -812,159,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -141,570,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -64,375,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -207,298,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -435,32,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -96,760,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -252,338,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -956,149,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -633,343,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -190,151,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -227,44,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -24,583,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -420,230,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -910,907,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -709,783,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -810,117,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -723,416,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -911,318,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -230,888,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -448,60,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -945,596,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -508,576,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -262,576,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -441,280,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -15,219,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -795,133,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -301,273,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -304,86,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -49,400,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -576,364,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -669,63,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -325,929,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -272,344,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -80,768,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -46,668,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -223,407,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -774,536,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -784,657,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -92,215,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -67,966,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -747,674,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -686,574,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -93,266,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -192,680,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -51,362,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -498,412,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -546,431,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -485,94,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -925,345,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -292,445,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -540,632,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -21,855,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -100,36,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -49,250,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -353,427,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -911,367,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -823,245,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -278,893,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -576,490,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -655,88,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -763,964,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -88,62,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -746,506,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -927,680,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -297,153,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -291,403,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -838,98,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -112,376,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -509,477,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -472,50,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -495,592,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -1000,813,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -241,740,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -693,873,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -903,459,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -791,224,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -108,562,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -845,199,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -452,275,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -479,355,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -410,947,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -379,454,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -740,450,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -471,575,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -325,6,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -455,847,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -563,338,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -879,517,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -312,630,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -587,381,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -628,864,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -486,416,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -811,852,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -990,815,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -35,23,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -764,527,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -619,693,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 -996,977,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -554,549,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -540,951,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -140,390,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -554,204,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -724,78,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -693,613,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -866,745,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -833,56,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -164,887,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -753,651,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -60,691,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -688,767,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -883,709,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -109,417,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -950,326,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -438,599,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -286,818,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -342,13,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -383,185,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -80,140,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -322,717,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -749,852,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -606,125,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -641,325,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -494,648,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -428,365,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -936,120,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -597,347,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -728,638,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -933,732,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -663,465,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -394,262,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -334,947,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -114,694,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -89,482,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -874,600,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -674,94,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -347,323,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -105,49,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -286,70,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -669,844,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -786,773,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -104,68,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -770,110,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -263,42,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -900,171,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -630,644,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -597,408,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -185,45,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -175,522,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -576,166,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -957,885,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -993,713,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -500,838,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -410,267,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -592,967,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -64,529,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -208,656,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -273,665,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -906,419,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -429,776,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -961,971,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -338,248,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -472,486,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -903,674,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -299,603,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -948,492,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -931,512,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -570,391,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -97,313,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -674,758,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -468,304,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -430,846,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -893,912,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -519,810,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -267,122,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -908,102,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -176,161,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -673,450,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -798,215,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -291,765,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -583,557,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -442,739,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -951,811,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -430,780,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -559,645,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -726,365,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -944,597,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -497,126,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -388,655,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -81,604,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -111,280,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -288,115,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -845,205,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -745,672,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -352,339,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -234,70,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -167,528,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -606,220,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -670,691,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -764,197,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -659,239,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -996,50,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -424,135,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -899,972,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -392,475,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -555,868,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -860,451,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -114,565,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -943,116,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -365,385,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -249,375,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -192,357,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -328,230,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -311,829,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -576,971,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -915,280,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -522,853,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -625,953,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -873,874,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -498,578,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -808,768,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -742,178,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -744,916,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -30,917,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -747,633,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -672,107,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -564,523,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -785,924,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -825,481,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -243,240,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -959,819,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -123,602,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -714,538,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -252,632,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -715,952,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -670,480,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -81,700,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -653,726,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -795,526,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -182,410,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -725,307,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -101,73,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -143,232,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -15,993,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -742,652,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -339,761,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -39,428,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -465,4,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -889,101,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -856,869,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -358,271,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -452,633,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -387,481,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -824,302,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -185,245,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -151,941,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -419,721,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -643,893,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -63,898,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -202,94,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -332,962,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -723,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -148,108,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -840,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -601,767,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -962,323,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -166,982,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -531,614,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -963,839,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -994,388,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -978,296,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -72,429,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -33,901,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -428,350,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -413,581,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -737,583,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -85,92,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -916,647,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -785,771,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -302,26,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -1000,598,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -458,715,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -896,74,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -615,580,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -174,848,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -651,118,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -784,54,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -121,929,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -341,393,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -615,820,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -697,336,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -215,299,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -197,747,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -205,154,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -256,486,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -377,251,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -577,225,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -686,77,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -332,74,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -534,596,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -485,493,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -594,782,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -413,487,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -13,127,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -483,538,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -820,94,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -745,252,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -79,722,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -36,536,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -950,958,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -74,466,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -458,309,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -609,680,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -429,539,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -956,511,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -205,505,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -629,720,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -277,823,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -266,21,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -872,142,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -435,95,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -988,398,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -953,328,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -556,151,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -211,978,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -389,918,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -351,542,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -14,96,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -181,496,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -452,77,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -511,236,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -193,913,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -797,49,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -988,967,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -487,502,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -941,790,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -577,121,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -456,55,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -982,739,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -593,683,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -702,610,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -528,248,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -873,530,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -301,889,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -769,245,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -724,473,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -466,938,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -774,150,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -111,772,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -954,201,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -780,945,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -210,177,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -93,378,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -332,83,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -186,803,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -782,398,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -41,215,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -222,194,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -992,287,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -477,410,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -948,50,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -817,204,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -597,239,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -649,637,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -3,938,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -731,788,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -181,399,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -468,576,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -891,187,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -226,703,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -28,455,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -609,244,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -224,868,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -230,353,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -216,101,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -282,924,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -501,144,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -320,0,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -720,910,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -464,259,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -363,107,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -49,63,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -223,270,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -452,554,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -210,154,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -444,205,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -222,441,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -678,183,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -25,459,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -57,810,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -981,268,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -740,916,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -408,742,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -966,522,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -107,299,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -488,677,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -759,709,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -504,310,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -99,160,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -503,698,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -724,540,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -309,901,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -625,34,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -294,536,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -890,780,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -501,716,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -34,532,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -203,871,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -140,199,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -845,845,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -774,591,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -645,378,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -986,942,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -296,686,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -936,720,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -341,546,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -32,845,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -277,667,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -548,627,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -727,142,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -812,655,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -168,556,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -150,459,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -136,89,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -695,726,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -363,38,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -853,60,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -621,369,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -764,381,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -669,465,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -772,981,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -228,758,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -261,31,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -821,237,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -100,285,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -465,94,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -350,561,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -991,143,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -910,95,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -206,341,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -263,388,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -374,272,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -875,890,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -810,734,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -398,364,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -565,619,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -417,517,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -291,781,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -251,327,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -449,48,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -774,809,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -386,73,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -22,936,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -940,400,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -132,736,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -103,211,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -152,271,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -952,855,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -872,923,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -748,854,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -749,769,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -876,271,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -860,383,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -900,29,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -705,185,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -913,351,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -315,560,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -466,840,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -233,517,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -906,949,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -148,633,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -661,636,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -847,138,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -768,481,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -866,408,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -475,130,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -112,813,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -136,661,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -763,311,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -388,872,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -996,643,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -486,174,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -494,528,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -771,124,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -49,126,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -322,440,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -878,881,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -827,292,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -852,873,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -716,357,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -81,247,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -916,18,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -673,395,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -242,620,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -914,946,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -902,72,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -707,691,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -223,95,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -619,878,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -254,757,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -688,898,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -477,172,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -280,419,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -546,849,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -630,807,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -455,599,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -505,59,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -823,790,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -891,574,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -840,96,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -436,376,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -168,352,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -177,741,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -727,12,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -278,157,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -443,10,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -905,544,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -881,817,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -507,754,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -363,425,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -603,492,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -473,485,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -128,369,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -105,560,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -325,651,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -711,326,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -983,180,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -241,935,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -71,403,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -395,345,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -168,278,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -512,376,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -291,104,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -776,543,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -271,798,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -946,333,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -195,833,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -165,132,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -238,629,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -409,337,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -720,300,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -309,470,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -812,875,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -441,237,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -500,272,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -517,860,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -924,415,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -572,140,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -768,367,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -692,195,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -28,245,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -202,285,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -76,98,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -421,932,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -636,898,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -52,330,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -184,603,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -739,280,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -841,507,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -65,202,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -623,513,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -517,132,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -636,21,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -845,657,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -232,195,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -26,323,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -680,299,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -364,811,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -572,739,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -145,889,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -644,189,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -87,698,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -620,646,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -535,562,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -661,753,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -884,425,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -689,693,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -646,941,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 -4,975,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -813,455,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -773,260,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -205,69,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -657,147,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -154,533,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -747,881,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -787,457,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -867,441,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -307,859,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -571,177,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -92,633,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -269,382,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -764,707,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -662,566,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -818,349,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -617,128,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -649,231,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -895,258,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -750,812,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -738,362,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -107,133,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -278,60,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -32,88,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -129,378,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -187,569,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -670,186,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -678,875,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -423,636,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -389,360,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -257,677,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -780,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -159,158,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -97,384,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -479,927,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -9,134,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -614,273,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -261,27,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -115,209,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -358,470,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -133,219,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -891,907,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -702,778,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -58,998,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -606,194,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -668,933,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -813,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -450,949,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -956,579,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -276,131,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -889,689,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -708,908,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -14,524,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -904,336,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -272,916,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -257,236,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -343,965,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -80,350,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -530,599,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -340,901,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -595,935,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -47,667,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -279,104,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -293,803,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -162,64,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -935,825,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -689,839,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -484,184,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -230,348,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -164,904,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -401,219,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -607,381,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -229,524,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -786,902,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -92,212,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -455,762,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -409,182,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -166,442,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -277,919,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -92,67,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -631,741,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -390,617,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -403,214,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -964,202,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -223,788,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -684,639,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -645,336,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -470,937,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -424,399,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -862,21,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -736,125,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -554,635,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -790,229,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -115,770,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -853,622,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -643,109,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -794,975,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -892,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -728,123,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -744,135,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -678,535,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -768,971,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -234,166,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -333,814,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -968,557,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -119,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -469,486,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -261,429,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -984,65,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -845,977,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -374,410,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -687,150,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -157,630,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -49,488,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -817,112,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -223,598,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -433,705,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -41,226,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -396,979,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -131,19,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -521,204,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -751,805,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -45,549,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -144,912,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -119,427,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -728,1,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -120,540,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -657,940,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -409,644,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -881,821,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -113,560,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -831,309,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -129,1000,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -76,945,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -260,931,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -882,504,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -157,950,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -443,278,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -111,225,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -497,6,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -321,124,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -194,206,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -684,320,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -634,270,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -622,278,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -689,447,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -120,170,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -374,87,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -926,384,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -687,574,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -600,585,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -779,947,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -223,984,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -628,189,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -326,364,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -836,49,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -361,851,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -444,643,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -501,143,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -743,763,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -861,987,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -203,264,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -762,439,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -705,750,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -153,37,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -436,95,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -428,79,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -804,832,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -805,649,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -860,838,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -104,439,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -434,207,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -912,804,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -571,875,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -267,473,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -415,845,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -261,91,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -746,630,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -30,185,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -662,317,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -916,88,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -415,607,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -514,35,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -756,680,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -461,78,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -460,117,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -305,440,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -198,652,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -234,249,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -638,658,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -88,563,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -751,737,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -816,789,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -437,988,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -715,220,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -780,946,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -245,986,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -201,129,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -815,433,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -865,492,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -634,306,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -901,154,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -789,206,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -882,81,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -953,882,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -862,848,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -628,664,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -765,389,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -741,182,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -61,505,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -470,861,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -869,263,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -650,400,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -750,556,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -602,497,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -54,181,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -384,619,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -161,332,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -977,669,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -615,487,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -783,994,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -977,331,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -375,739,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -298,665,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -104,921,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -713,862,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -556,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -323,517,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -391,352,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -593,166,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -906,859,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -130,571,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -613,976,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -58,466,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -314,79,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -67,864,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -654,623,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -312,170,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -349,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -415,763,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -404,896,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -22,973,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -744,161,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -804,934,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -101,697,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -293,116,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -266,84,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -372,604,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -38,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -385,783,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -262,335,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -961,321,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -831,177,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -579,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -301,583,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -693,364,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -895,343,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -320,854,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -284,691,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -362,387,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -132,298,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -42,635,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -118,81,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -42,375,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -18,846,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -512,933,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -337,237,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -167,964,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -749,382,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -890,610,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -910,148,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -403,837,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -403,85,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -661,425,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -485,633,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -789,515,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -415,512,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -418,156,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -163,464,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -298,813,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -584,455,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -797,366,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -767,734,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -984,451,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -388,134,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -924,547,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -566,802,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -390,61,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -608,556,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -840,202,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -112,964,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -288,112,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -408,445,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -876,884,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -224,348,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -133,564,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -662,568,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -68,882,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -626,542,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -678,119,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -361,248,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -464,868,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -681,841,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -377,484,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -222,986,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -972,39,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -56,930,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -695,252,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -908,794,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -328,658,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -891,139,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -265,331,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -251,261,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -783,122,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -425,296,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -859,391,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -314,75,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -153,731,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -955,883,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -654,707,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -693,97,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -757,390,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -221,237,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -942,496,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -31,814,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -540,765,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -352,308,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -904,327,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -436,266,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -281,699,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -801,599,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -273,950,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -716,117,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -902,632,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -341,35,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -155,562,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -796,144,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -257,142,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -611,273,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -6,915,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -125,920,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -745,294,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -437,681,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -906,86,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -844,764,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -413,269,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -869,138,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -403,834,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -137,112,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -922,921,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -202,859,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -955,442,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -781,593,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -12,346,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -931,312,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -95,690,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -795,344,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -542,784,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -935,639,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -269,726,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -197,596,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -828,263,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -461,194,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -35,895,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -88,502,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -832,342,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -900,421,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -368,901,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -201,474,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -758,571,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -504,511,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -864,379,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -574,68,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -61,210,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -565,478,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -475,296,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -44,664,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -145,880,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -813,607,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -703,97,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -757,908,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -96,152,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -860,622,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -750,309,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -585,912,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -127,429,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -669,580,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -708,179,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -830,119,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -550,369,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -762,882,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -468,727,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -151,823,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -103,783,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -876,884,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -881,891,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -116,909,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -677,765,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -477,180,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -154,712,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -331,175,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -784,869,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -563,820,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -229,554,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -451,126,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -974,760,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -484,446,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -69,254,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -755,516,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -331,779,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -482,987,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -632,318,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -750,427,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -618,86,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -935,553,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -716,315,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -205,328,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -215,521,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -871,156,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -552,841,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -619,623,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -701,849,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -104,438,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -114,719,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -854,906,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -563,267,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -73,542,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -427,552,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -348,428,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -148,158,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -895,379,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -394,142,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -792,588,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -175,506,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -208,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -354,132,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -163,652,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -336,723,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -804,682,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -863,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -326,125,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -568,321,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -691,922,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -152,884,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -565,38,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -38,194,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -185,996,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -318,532,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -960,391,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -122,104,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -400,22,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -301,650,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -909,143,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -433,999,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -508,415,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -648,350,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -793,342,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -129,215,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -481,52,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -406,292,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -512,862,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -668,309,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -551,886,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -124,172,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -655,912,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -523,666,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -739,656,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -87,145,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -890,664,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -665,639,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -329,707,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -417,891,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -828,466,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -298,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -356,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -909,874,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -251,805,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -526,426,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -652,932,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -573,581,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 diff --git a/pandas/io/tests/sas/test_sas.py b/pandas/io/tests/sas/test_sas.py deleted file mode 100644 index 237e3676c3b3d..0000000000000 --- a/pandas/io/tests/sas/test_sas.py +++ /dev/null @@ -1,13 +0,0 @@ -import pandas.util.testing as tm -from pandas.compat import StringIO -from pandas import read_sas - - -class TestSas(tm.TestCase): - - def test_sas_buffer_format(self): - - # GH14947 - b = StringIO("") - with self.assertRaises(ValueError): - read_sas(b) diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py deleted file mode 100644 index 3c980cae3351a..0000000000000 --- a/pandas/io/tests/test_common.py +++ /dev/null @@ -1,159 +0,0 @@ -""" - Tests for the pandas.io.common functionalities -""" -import mmap -import os -from os.path import isabs - -import pandas.util.testing as tm - -from pandas.io import common -from pandas.compat import is_platform_windows, StringIO - -from pandas import read_csv, concat -import pandas as pd - -try: - from pathlib import Path -except ImportError: - pass - -try: - from py.path import local as LocalPath -except ImportError: - pass - - -class TestCommonIOCapabilities(tm.TestCase): - data1 = """index,A,B,C,D -foo,2,3,4,5 -bar,7,8,9,10 -baz,12,13,14,15 -qux,12,13,14,15 -foo2,12,13,14,15 -bar2,12,13,14,15 -""" - - def test_expand_user(self): - filename = '~/sometest' - expanded_name = common._expand_user(filename) - - self.assertNotEqual(expanded_name, filename) - self.assertTrue(isabs(expanded_name)) - self.assertEqual(os.path.expanduser(filename), expanded_name) - - def test_expand_user_normal_path(self): - filename = '/somefolder/sometest' - expanded_name = common._expand_user(filename) - - self.assertEqual(expanded_name, filename) - self.assertEqual(os.path.expanduser(filename), expanded_name) - - def test_stringify_path_pathlib(self): - tm._skip_if_no_pathlib() - - rel_path = common._stringify_path(Path('.')) - self.assertEqual(rel_path, '.') - redundant_path = common._stringify_path(Path('foo//bar')) - self.assertEqual(redundant_path, os.path.join('foo', 'bar')) - - def test_stringify_path_localpath(self): - tm._skip_if_no_localpath() - - path = os.path.join('foo', 'bar') - abs_path = os.path.abspath(path) - lpath = LocalPath(path) - self.assertEqual(common._stringify_path(lpath), abs_path) - - def test_get_filepath_or_buffer_with_path(self): - filename = '~/sometest' - filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) - self.assertNotEqual(filepath_or_buffer, filename) - self.assertTrue(isabs(filepath_or_buffer)) - self.assertEqual(os.path.expanduser(filename), filepath_or_buffer) - - def test_get_filepath_or_buffer_with_buffer(self): - input_buffer = StringIO() - filepath_or_buffer, _, _ = common.get_filepath_or_buffer(input_buffer) - self.assertEqual(filepath_or_buffer, input_buffer) - - def test_iterator(self): - reader = read_csv(StringIO(self.data1), chunksize=1) - result = concat(reader, ignore_index=True) - expected = read_csv(StringIO(self.data1)) - tm.assert_frame_equal(result, expected) - - # GH12153 - it = read_csv(StringIO(self.data1), chunksize=1) - first = next(it) - tm.assert_frame_equal(first, expected.iloc[[0]]) - tm.assert_frame_equal(concat(it), expected.iloc[1:]) - - def test_error_rename(self): - # see gh-12665 - try: - raise common.CParserError() - except common.ParserError: - pass - - try: - raise common.ParserError() - except common.CParserError: - pass - - try: - raise common.ParserError() - except pd.parser.CParserError: - pass - - -class TestMMapWrapper(tm.TestCase): - - def setUp(self): - self.mmap_file = os.path.join(tm.get_data_path(), - 'test_mmap.csv') - - def test_constructor_bad_file(self): - non_file = StringIO('I am not a file') - non_file.fileno = lambda: -1 - - # the error raised is different on Windows - if is_platform_windows(): - msg = "The parameter is incorrect" - err = OSError - else: - msg = "[Errno 22]" - err = mmap.error - - tm.assertRaisesRegexp(err, msg, common.MMapWrapper, non_file) - - target = open(self.mmap_file, 'r') - target.close() - - msg = "I/O operation on closed file" - tm.assertRaisesRegexp(ValueError, msg, common.MMapWrapper, target) - - def test_get_attr(self): - with open(self.mmap_file, 'r') as target: - wrapper = common.MMapWrapper(target) - - attrs = dir(wrapper.mmap) - attrs = [attr for attr in attrs - if not attr.startswith('__')] - attrs.append('__next__') - - for attr in attrs: - self.assertTrue(hasattr(wrapper, attr)) - - self.assertFalse(hasattr(wrapper, 'foo')) - - def test_next(self): - with open(self.mmap_file, 'r') as target: - wrapper = common.MMapWrapper(target) - lines = target.readlines() - - for line in lines: - next_line = next(wrapper) - self.assertEqual(next_line.strip(), line.strip()) - - self.assertRaises(StopIteration, next, wrapper) diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py deleted file mode 100644 index 5b54925c65fbd..0000000000000 --- a/pandas/io/tests/test_date_converters.py +++ /dev/null @@ -1,150 +0,0 @@ -from pandas.compat import StringIO -from datetime import date, datetime - -import numpy as np - -from pandas import DataFrame, MultiIndex -from pandas.io.parsers import (read_csv, read_table) -from pandas.util.testing import assert_frame_equal -import pandas.io.date_converters as conv -import pandas.util.testing as tm -from pandas.compat.numpy import np_array_datetime64_compat - - -class TestConverters(tm.TestCase): - - def setUp(self): - self.years = np.array([2007, 2008]) - self.months = np.array([1, 2]) - self.days = np.array([3, 4]) - self.hours = np.array([5, 6]) - self.minutes = np.array([7, 8]) - self.seconds = np.array([9, 0]) - self.dates = np.array(['2007/1/3', '2008/2/4'], dtype=object) - self.times = np.array(['05:07:09', '06:08:00'], dtype=object) - self.expected = np.array([datetime(2007, 1, 3, 5, 7, 9), - datetime(2008, 2, 4, 6, 8, 0)]) - - def test_parse_date_time(self): - result = conv.parse_date_time(self.dates, self.times) - self.assertTrue((result == self.expected).all()) - - data = """\ -date, time, a, b -2001-01-05, 10:00:00, 0.0, 10. -2001-01-05, 00:00:00, 1., 11. -""" - datecols = {'date_time': [0, 1]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, date_parser=conv.parse_date_time) - self.assertIn('date_time', df) - self.assertEqual(df.date_time.loc[0], datetime(2001, 1, 5, 10, 0, 0)) - - data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" - "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" - "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" - "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" - "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" - "KORD,19990127, 23:00:00, 22:56:00, -0.5900") - - date_spec = {'nominal': [1, 2], 'actual': [1, 3]} - df = read_csv(StringIO(data), header=None, parse_dates=date_spec, - date_parser=conv.parse_date_time) - - def test_parse_date_fields(self): - result = conv.parse_date_fields(self.years, self.months, self.days) - expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) - self.assertTrue((result == expected).all()) - - data = ("year, month, day, a\n 2001 , 01 , 10 , 10.\n" - "2001 , 02 , 1 , 11.") - datecols = {'ymd': [0, 1, 2]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_date_fields) - self.assertIn('ymd', df) - self.assertEqual(df.ymd.loc[0], datetime(2001, 1, 10)) - - def test_datetime_six_col(self): - result = conv.parse_all_fields(self.years, self.months, self.days, - self.hours, self.minutes, self.seconds) - self.assertTrue((result == self.expected).all()) - - data = """\ -year, month, day, hour, minute, second, a, b -2001, 01, 05, 10, 00, 0, 0.0, 10. -2001, 01, 5, 10, 0, 00, 1., 11. -""" - datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_all_fields) - self.assertIn('ymdHMS', df) - self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0)) - - def test_datetime_fractional_seconds(self): - data = """\ -year, month, day, hour, minute, second, a, b -2001, 01, 05, 10, 00, 0.123456, 0.0, 10. -2001, 01, 5, 10, 0, 0.500000, 1., 11. -""" - datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_all_fields) - self.assertIn('ymdHMS', df) - self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0, - microsecond=123456)) - self.assertEqual(df.ymdHMS.loc[1], datetime(2001, 1, 5, 10, 0, 0, - microsecond=500000)) - - def test_generic(self): - data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11." - datecols = {'ym': [0, 1]} - dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1) - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=dateconverter) - self.assertIn('ym', df) - self.assertEqual(df.ym.loc[0], date(2001, 1, 1)) - - def test_dateparser_resolution_if_not_ns(self): - # issue 10245 - data = """\ -date,time,prn,rxstatus -2013-11-03,19:00:00,126,00E80000 -2013-11-03,19:00:00,23,00E80000 -2013-11-03,19:00:00,13,00E80000 -""" - - def date_parser(date, time): - datetime = np_array_datetime64_compat( - date + 'T' + time + 'Z', dtype='datetime64[s]') - return datetime - - df = read_csv(StringIO(data), date_parser=date_parser, - parse_dates={'datetime': ['date', 'time']}, - index_col=['datetime', 'prn']) - - datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3, - dtype='datetime64[s]') - df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3}, - index=MultiIndex.from_tuples( - [(datetimes[0], 126), - (datetimes[1], 23), - (datetimes[2], 13)], - names=['datetime', 'prn'])) - assert_frame_equal(df, df_correct) - - def test_parse_date_column_with_empty_string(self): - # GH 6428 - data = """case,opdate - 7,10/18/2006 - 7,10/18/2008 - 621, """ - result = read_csv(StringIO(data), parse_dates=['opdate']) - expected_data = [[7, '10/18/2006'], - [7, '10/18/2008'], - [621, ' ']] - expected = DataFrame(expected_data, columns=['case', 'opdate']) - assert_frame_equal(result, expected) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py deleted file mode 100644 index a22c89184f20d..0000000000000 --- a/pandas/io/tests/test_excel.py +++ /dev/null @@ -1,2327 +0,0 @@ -# pylint: disable=E1101 - -from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems -from datetime import datetime, date, time -import sys -import os -from distutils.version import LooseVersion - -import warnings -import operator -import functools -import pytest - -from numpy import nan -import numpy as np - -import pandas as pd -from pandas import DataFrame, Index, MultiIndex -from pandas.io.parsers import read_csv -from pandas.io.excel import ( - ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _Openpyxl1Writer, - _Openpyxl20Writer, _Openpyxl22Writer, register_writer, _XlsxWriter -) -from pandas.io.common import URLError -from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf -from pandas.core.config import set_option, get_option -import pandas.util.testing as tm - - -def _skip_if_no_xlrd(): - try: - import xlrd - ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) - if ver < (0, 9): - pytest.skip('xlrd < 0.9, skipping') - except ImportError: - pytest.skip('xlrd not installed, skipping') - - -def _skip_if_no_xlwt(): - try: - import xlwt # NOQA - except ImportError: - pytest.skip('xlwt not installed, skipping') - - -def _skip_if_no_openpyxl(): - try: - import openpyxl # NOQA - except ImportError: - pytest.skip('openpyxl not installed, skipping') - - -def _skip_if_no_xlsxwriter(): - try: - import xlsxwriter # NOQA - except ImportError: - pytest.skip('xlsxwriter not installed, skipping') - - -def _skip_if_no_excelsuite(): - _skip_if_no_xlrd() - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - - -def _skip_if_no_s3fs(): - try: - import s3fs # noqa - except ImportError: - pytest.skip('s3fs not installed, skipping') - - -_seriesd = tm.getSeriesData() -_tsd = tm.getTimeSeriesData() -_frame = DataFrame(_seriesd)[:10] -_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10] -_tsframe = tm.makeTimeDataFrame()[:5] -_mixed_frame = _frame.copy() -_mixed_frame['foo'] = 'bar' - - -class SharedItems(object): - - def setUp(self): - self.dirpath = tm.get_data_path() - self.frame = _frame.copy() - self.frame2 = _frame2.copy() - self.tsframe = _tsframe.copy() - self.mixed_frame = _mixed_frame.copy() - - def get_csv_refdf(self, basename): - """ - Obtain the reference data from read_csv with the Python engine. - Test data path is defined by pandas.util.testing.get_data_path() - - Parameters - ---------- - - basename : str - File base name, excluding file extension. - - Returns - ------- - - dfref : DataFrame - """ - pref = os.path.join(self.dirpath, basename + '.csv') - dfref = read_csv(pref, index_col=0, parse_dates=True, engine='python') - return dfref - - def get_excelfile(self, basename): - """ - Return test data ExcelFile instance. Test data path is defined by - pandas.util.testing.get_data_path() - - Parameters - ---------- - - basename : str - File base name, excluding file extension. - - Returns - ------- - - excel : io.excel.ExcelFile - """ - return ExcelFile(os.path.join(self.dirpath, basename + self.ext)) - - def get_exceldf(self, basename, *args, **kwds): - """ - Return test data DataFrame. Test data path is defined by - pandas.util.testing.get_data_path() - - Parameters - ---------- - - basename : str - File base name, excluding file extension. - - Returns - ------- - - df : DataFrame - """ - pth = os.path.join(self.dirpath, basename + self.ext) - return read_excel(pth, *args, **kwds) - - -class ReadingTestsBase(SharedItems): - # This is based on ExcelWriterBase - # - # Base class for test cases to run with different Excel readers. - # To add a reader test, define the following: - # 1. A check_skip function that skips your tests if your reader isn't - # installed. - # 2. Add a property ext, which is the file extension that your reader - # reades from. (needs to start with '.' so it's a valid path) - # 3. Add a property engine_name, which is the name of the reader class. - # For the reader this is not used for anything at the moment. - - def setUp(self): - self.check_skip() - super(ReadingTestsBase, self).setUp() - - def test_parse_cols_int(self): - - dfref = self.get_csv_refdf('test1') - dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=3) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols=3) - # TODO add index to xls file) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - - def test_parse_cols_list(self): - - dfref = self.get_csv_refdf('test1') - dfref = dfref.reindex(columns=['B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, - parse_cols=[0, 2, 3]) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols=[0, 2, 3]) - # TODO add index to xls file) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - - def test_parse_cols_str(self): - - dfref = self.get_csv_refdf('test1') - - df1 = dfref.reindex(columns=['A', 'B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - parse_cols='A:D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A:D') - # TODO add index to xls, read xls ignores index name ? - tm.assert_frame_equal(df2, df1, check_names=False) - tm.assert_frame_equal(df3, df1, check_names=False) - - df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - parse_cols='A,C,D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A,C,D') - # TODO add index to xls file - tm.assert_frame_equal(df2, df1, check_names=False) - tm.assert_frame_equal(df3, df1, check_names=False) - - df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - parse_cols='A,C:D') - df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A,C:D') - tm.assert_frame_equal(df2, df1, check_names=False) - tm.assert_frame_equal(df3, df1, check_names=False) - - def test_excel_stop_iterator(self): - - parsed = self.get_exceldf('test2', 'Sheet1') - expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1']) - tm.assert_frame_equal(parsed, expected) - - def test_excel_cell_error_na(self): - - parsed = self.get_exceldf('test3', 'Sheet1') - expected = DataFrame([[np.nan]], columns=['Test']) - tm.assert_frame_equal(parsed, expected) - - def test_excel_passes_na(self): - - excel = self.get_excelfile('test4') - - parsed = read_excel(excel, 'Sheet1', keep_default_na=False, - na_values=['apple']) - expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']], - columns=['Test']) - tm.assert_frame_equal(parsed, expected) - - parsed = read_excel(excel, 'Sheet1', keep_default_na=True, - na_values=['apple']) - expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], - columns=['Test']) - tm.assert_frame_equal(parsed, expected) - - # 13967 - excel = self.get_excelfile('test5') - - parsed = read_excel(excel, 'Sheet1', keep_default_na=False, - na_values=['apple']) - expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']], - columns=['Test']) - tm.assert_frame_equal(parsed, expected) - - parsed = read_excel(excel, 'Sheet1', keep_default_na=True, - na_values=['apple']) - expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], - columns=['Test']) - tm.assert_frame_equal(parsed, expected) - - def test_excel_table_sheet_by_index(self): - - excel = self.get_excelfile('test1') - dfref = self.get_csv_refdf('test1') - - df1 = read_excel(excel, 0, index_col=0) - df2 = read_excel(excel, 1, skiprows=[1], index_col=0) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - - df1 = excel.parse(0, index_col=0) - df2 = excel.parse(1, skiprows=[1], index_col=0) - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - - df3 = read_excel(excel, 0, index_col=0, skipfooter=1) - df4 = read_excel(excel, 0, index_col=0, skip_footer=1) - tm.assert_frame_equal(df3, df1.iloc[:-1]) - tm.assert_frame_equal(df3, df4) - - df3 = excel.parse(0, index_col=0, skipfooter=1) - df4 = excel.parse(0, index_col=0, skip_footer=1) - tm.assert_frame_equal(df3, df1.iloc[:-1]) - tm.assert_frame_equal(df3, df4) - - import xlrd - with tm.assertRaises(xlrd.XLRDError): - read_excel(excel, 'asdf') - - def test_excel_table(self): - - dfref = self.get_csv_refdf('test1') - - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0) - # TODO add index to file - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - - df3 = self.get_exceldf('test1', 'Sheet1', index_col=0, - skipfooter=1) - df4 = self.get_exceldf('test1', 'Sheet1', index_col=0, - skip_footer=1) - tm.assert_frame_equal(df3, df1.iloc[:-1]) - tm.assert_frame_equal(df3, df4) - - def test_reader_special_dtypes(self): - - expected = DataFrame.from_items([ - ("IntCol", [1, 2, -3, 4, 0]), - ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), - ("BoolCol", [True, False, True, True, False]), - ("StrCol", [1, 2, 3, 4, 5]), - # GH5394 - this is why convert_float isn't vectorized - ("Str2Col", ["a", 3, "c", "d", "e"]), - ("DateCol", [datetime(2013, 10, 30), datetime(2013, 10, 31), - datetime(1905, 1, 1), datetime(2013, 12, 14), - datetime(2015, 3, 14)]) - ]) - - basename = 'test_types' - - # should read in correctly and infer types - actual = self.get_exceldf(basename, 'Sheet1') - tm.assert_frame_equal(actual, expected) - - # if not coercing number, then int comes in as float - float_expected = expected.copy() - float_expected["IntCol"] = float_expected["IntCol"].astype(float) - float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 - actual = self.get_exceldf(basename, 'Sheet1', convert_float=False) - tm.assert_frame_equal(actual, float_expected) - - # check setting Index (assuming xls and xlsx are the same here) - for icol, name in enumerate(expected.columns): - actual = self.get_exceldf(basename, 'Sheet1', index_col=icol) - exp = expected.set_index(name) - tm.assert_frame_equal(actual, exp) - - # convert_float and converters should be different but both accepted - expected["StrCol"] = expected["StrCol"].apply(str) - actual = self.get_exceldf( - basename, 'Sheet1', converters={"StrCol": str}) - tm.assert_frame_equal(actual, expected) - - no_convert_float = float_expected.copy() - no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) - actual = self.get_exceldf(basename, 'Sheet1', convert_float=False, - converters={"StrCol": str}) - tm.assert_frame_equal(actual, no_convert_float) - - # GH8212 - support for converters and missing values - def test_reader_converters(self): - - basename = 'test_converters' - - expected = DataFrame.from_items([ - ("IntCol", [1, 2, -3, -1000, 0]), - ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]), - ("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']), - ("StrCol", ['1', np.nan, '3', '4', '5']), - ]) - - converters = {'IntCol': lambda x: int(x) if x != '' else -1000, - 'FloatCol': lambda x: 10 * x if x else np.nan, - 2: lambda x: 'Found' if x != '' else 'Not found', - 3: lambda x: str(x) if x else '', - } - - # should read in correctly and set types of single cells (not array - # dtypes) - actual = self.get_exceldf(basename, 'Sheet1', converters=converters) - tm.assert_frame_equal(actual, expected) - - def test_reader_dtype(self): - # GH 8212 - basename = 'testdtype' - actual = self.get_exceldf(basename) - - expected = DataFrame({ - 'a': [1, 2, 3, 4], - 'b': [2.5, 3.5, 4.5, 5.5], - 'c': [1, 2, 3, 4], - 'd': [1.0, 2.0, np.nan, 4.0]}).reindex( - columns=['a', 'b', 'c', 'd']) - - tm.assert_frame_equal(actual, expected) - - actual = self.get_exceldf(basename, - dtype={'a': 'float64', - 'b': 'float32', - 'c': str}) - - expected['a'] = expected['a'].astype('float64') - expected['b'] = expected['b'].astype('float32') - expected['c'] = ['001', '002', '003', '004'] - tm.assert_frame_equal(actual, expected) - - with tm.assertRaises(ValueError): - actual = self.get_exceldf(basename, dtype={'d': 'int64'}) - - def test_reading_all_sheets(self): - # Test reading all sheetnames by setting sheetname to None, - # Ensure a dict is returned. - # See PR #9450 - basename = 'test_multisheet' - dfs = self.get_exceldf(basename, sheetname=None) - # ensure this is not alphabetical to test order preservation - expected_keys = ['Charlie', 'Alpha', 'Beta'] - tm.assert_contains_all(expected_keys, dfs.keys()) - # Issue 9930 - # Ensure sheet order is preserved - tm.assert_equal(expected_keys, list(dfs.keys())) - - def test_reading_multiple_specific_sheets(self): - # Test reading specific sheetnames by specifying a mixed list - # of integers and strings, and confirm that duplicated sheet - # references (positions/names) are removed properly. - # Ensure a dict is returned - # See PR #9450 - basename = 'test_multisheet' - # Explicitly request duplicates. Only the set should be returned. - expected_keys = [2, 'Charlie', 'Charlie'] - dfs = self.get_exceldf(basename, sheetname=expected_keys) - expected_keys = list(set(expected_keys)) - tm.assert_contains_all(expected_keys, dfs.keys()) - assert len(expected_keys) == len(dfs.keys()) - - def test_reading_all_sheets_with_blank(self): - # Test reading all sheetnames by setting sheetname to None, - # In the case where some sheets are blank. - # Issue #11711 - basename = 'blank_with_header' - dfs = self.get_exceldf(basename, sheetname=None) - expected_keys = ['Sheet1', 'Sheet2', 'Sheet3'] - tm.assert_contains_all(expected_keys, dfs.keys()) - - # GH6403 - def test_read_excel_blank(self): - actual = self.get_exceldf('blank', 'Sheet1') - tm.assert_frame_equal(actual, DataFrame()) - - def test_read_excel_blank_with_header(self): - expected = DataFrame(columns=['col_1', 'col_2']) - actual = self.get_exceldf('blank_with_header', 'Sheet1') - tm.assert_frame_equal(actual, expected) - - # GH 12292 : error when read one empty column from excel file - def test_read_one_empty_col_no_header(self): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - - df = pd.DataFrame( - [["", 1, 100], - ["", 2, 200], - ["", 3, 300], - ["", 4, 400]] - ) - with ensure_clean(self.ext) as path: - df.to_excel(path, 'no_header', index=False, header=False) - actual_header_none = read_excel( - path, - 'no_header', - parse_cols=[0], - header=None - ) - - actual_header_zero = read_excel( - path, - 'no_header', - parse_cols=[0], - header=0 - ) - expected = DataFrame() - tm.assert_frame_equal(actual_header_none, expected) - tm.assert_frame_equal(actual_header_zero, expected) - - def test_read_one_empty_col_with_header(self): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - - df = pd.DataFrame( - [["", 1, 100], - ["", 2, 200], - ["", 3, 300], - ["", 4, 400]] - ) - with ensure_clean(self.ext) as path: - df.to_excel(path, 'with_header', index=False, header=True) - actual_header_none = read_excel( - path, - 'with_header', - parse_cols=[0], - header=None - ) - - actual_header_zero = read_excel( - path, - 'with_header', - parse_cols=[0], - header=0 - ) - expected_header_none = DataFrame(pd.Series([0], dtype='int64')) - tm.assert_frame_equal(actual_header_none, expected_header_none) - expected_header_zero = DataFrame(columns=[0], dtype='int64') - tm.assert_frame_equal(actual_header_zero, expected_header_zero) - - def test_set_column_names_in_parameter(self): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - - # GH 12870 : pass down column names associated with - # keyword argument names - refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'], - [3, 'baz']], columns=['a', 'b']) - - with ensure_clean(self.ext) as pth: - with ExcelWriter(pth) as writer: - refdf.to_excel(writer, 'Data_no_head', - header=False, index=False) - refdf.to_excel(writer, 'Data_with_head', index=False) - - refdf.columns = ['A', 'B'] - - with ExcelFile(pth) as reader: - xlsdf_no_head = read_excel(reader, 'Data_no_head', - header=None, names=['A', 'B']) - xlsdf_with_head = read_excel(reader, 'Data_with_head', - index_col=None, names=['A', 'B']) - - tm.assert_frame_equal(xlsdf_no_head, refdf) - tm.assert_frame_equal(xlsdf_with_head, refdf) - - def test_date_conversion_overflow(self): - # GH 10001 : pandas.ExcelFile ignore parse_dates=False - expected = pd.DataFrame([[pd.Timestamp('2016-03-12'), 'Marc Johnson'], - [pd.Timestamp('2016-03-16'), 'Jack Black'], - [1e+20, 'Timothy Brown']], - columns=['DateColWithBigInt', 'StringCol']) - - result = self.get_exceldf('testdateoverflow') - tm.assert_frame_equal(result, expected) - - -class XlrdTests(ReadingTestsBase): - """ - This is the base class for the xlrd tests, and 3 different file formats - are supported: xls, xlsx, xlsm - """ - - def test_excel_read_buffer(self): - - pth = os.path.join(self.dirpath, 'test1' + self.ext) - expected = read_excel(pth, 'Sheet1', index_col=0) - with open(pth, 'rb') as f: - actual = read_excel(f, 'Sheet1', index_col=0) - tm.assert_frame_equal(expected, actual) - - with open(pth, 'rb') as f: - xls = ExcelFile(f) - actual = read_excel(xls, 'Sheet1', index_col=0) - tm.assert_frame_equal(expected, actual) - - def test_read_xlrd_Book(self): - _skip_if_no_xlwt() - - import xlrd - df = self.frame - with ensure_clean('.xls') as pth: - df.to_excel(pth, "SheetA") - book = xlrd.open_workbook(pth) - - with ExcelFile(book, engine="xlrd") as xl: - result = read_excel(xl, "SheetA") - tm.assert_frame_equal(df, result) - - result = read_excel(book, sheetname="SheetA", engine="xlrd") - tm.assert_frame_equal(df, result) - - @tm.network - def test_read_from_http_url(self): - url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/io/tests/data/test1' + self.ext) - url_table = read_excel(url) - local_table = self.get_exceldf('test1') - tm.assert_frame_equal(url_table, local_table) - - @tm.network(check_before_test=True) - def test_read_from_s3_url(self): - _skip_if_no_s3fs() - - url = ('s3://pandas-test/test1' + self.ext) - url_table = read_excel(url) - local_table = self.get_exceldf('test1') - tm.assert_frame_equal(url_table, local_table) - - @tm.slow - def test_read_from_file_url(self): - - # FILE - if sys.version_info[:2] < (2, 6): - pytest.skip("file:// not supported with Python < 2.6") - - localtable = os.path.join(self.dirpath, 'test1' + self.ext) - local_table = read_excel(localtable) - - try: - url_table = read_excel('file://localhost/' + localtable) - except URLError: - # fails on some systems - import platform - pytest.skip("failing on %s" % - ' '.join(platform.uname()).strip()) - - tm.assert_frame_equal(url_table, local_table) - - def test_read_from_pathlib_path(self): - - # GH12655 - tm._skip_if_no_pathlib() - - from pathlib import Path - - str_path = os.path.join(self.dirpath, 'test1' + self.ext) - expected = read_excel(str_path, 'Sheet1', index_col=0) - - path_obj = Path(self.dirpath, 'test1' + self.ext) - actual = read_excel(path_obj, 'Sheet1', index_col=0) - - tm.assert_frame_equal(expected, actual) - - def test_read_from_py_localpath(self): - - # GH12655 - tm._skip_if_no_localpath() - - from py.path import local as LocalPath - - str_path = os.path.join(self.dirpath, 'test1' + self.ext) - expected = read_excel(str_path, 'Sheet1', index_col=0) - - abs_dir = os.path.abspath(self.dirpath) - path_obj = LocalPath(abs_dir).join('test1' + self.ext) - actual = read_excel(path_obj, 'Sheet1', index_col=0) - - tm.assert_frame_equal(expected, actual) - - def test_reader_closes_file(self): - - pth = os.path.join(self.dirpath, 'test1' + self.ext) - f = open(pth, 'rb') - with ExcelFile(f) as xlsx: - # parses okay - read_excel(xlsx, 'Sheet1', index_col=0) - - self.assertTrue(f.closed) - - def test_creating_and_reading_multiple_sheets(self): - # Test reading multiple sheets, from a runtime created excel file - # with multiple sheets. - # See PR #9450 - - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - - def tdf(sheetname): - d, i = [11, 22, 33], [1, 2, 3] - return DataFrame(d, i, columns=[sheetname]) - - sheets = ['AAA', 'BBB', 'CCC'] - - dfs = [tdf(s) for s in sheets] - dfs = dict(zip(sheets, dfs)) - - with ensure_clean(self.ext) as pth: - with ExcelWriter(pth) as ew: - for sheetname, df in iteritems(dfs): - df.to_excel(ew, sheetname) - dfs_returned = read_excel(pth, sheetname=sheets) - for s in sheets: - tm.assert_frame_equal(dfs[s], dfs_returned[s]) - - def test_reader_seconds(self): - # Test reading times with and without milliseconds. GH5945. - import xlrd - - if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): - # Xlrd >= 0.9.3 can handle Excel milliseconds. - expected = DataFrame.from_items([("Time", - [time(1, 2, 3), - time(2, 45, 56, 100000), - time(4, 29, 49, 200000), - time(6, 13, 42, 300000), - time(7, 57, 35, 400000), - time(9, 41, 28, 500000), - time(11, 25, 21, 600000), - time(13, 9, 14, 700000), - time(14, 53, 7, 800000), - time(16, 37, 0, 900000), - time(18, 20, 54)])]) - else: - # Xlrd < 0.9.3 rounds Excel milliseconds. - expected = DataFrame.from_items([("Time", - [time(1, 2, 3), - time(2, 45, 56), - time(4, 29, 49), - time(6, 13, 42), - time(7, 57, 35), - time(9, 41, 29), - time(11, 25, 22), - time(13, 9, 15), - time(14, 53, 8), - time(16, 37, 1), - time(18, 20, 54)])]) - - actual = self.get_exceldf('times_1900', 'Sheet1') - tm.assert_frame_equal(actual, expected) - - actual = self.get_exceldf('times_1904', 'Sheet1') - tm.assert_frame_equal(actual, expected) - - def test_read_excel_multiindex(self): - # GH 4679 - mi = MultiIndex.from_product([['foo', 'bar'], ['a', 'b']]) - mi_file = os.path.join(self.dirpath, 'testmultiindex' + self.ext) - - expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], - [2, 3.5, pd.Timestamp('2015-01-02'), False], - [3, 4.5, pd.Timestamp('2015-01-03'), False], - [4, 5.5, pd.Timestamp('2015-01-04'), True]], - columns=mi) - - actual = read_excel(mi_file, 'mi_column', header=[0, 1]) - tm.assert_frame_equal(actual, expected) - actual = read_excel(mi_file, 'mi_column', header=[0, 1], index_col=0) - tm.assert_frame_equal(actual, expected) - - expected.columns = ['a', 'b', 'c', 'd'] - expected.index = mi - actual = read_excel(mi_file, 'mi_index', index_col=[0, 1]) - tm.assert_frame_equal(actual, expected, check_names=False) - - expected.columns = mi - actual = read_excel(mi_file, 'both', index_col=[0, 1], header=[0, 1]) - tm.assert_frame_equal(actual, expected, check_names=False) - - expected.index = mi.set_names(['ilvl1', 'ilvl2']) - expected.columns = ['a', 'b', 'c', 'd'] - actual = read_excel(mi_file, 'mi_index_name', index_col=[0, 1]) - tm.assert_frame_equal(actual, expected) - - expected.index = list(range(4)) - expected.columns = mi.set_names(['c1', 'c2']) - actual = read_excel(mi_file, 'mi_column_name', - header=[0, 1], index_col=0) - tm.assert_frame_equal(actual, expected) - - # Issue #11317 - expected.columns = mi.set_levels( - [1, 2], level=1).set_names(['c1', 'c2']) - actual = read_excel(mi_file, 'name_with_int', - index_col=0, header=[0, 1]) - tm.assert_frame_equal(actual, expected) - - expected.columns = mi.set_names(['c1', 'c2']) - expected.index = mi.set_names(['ilvl1', 'ilvl2']) - actual = read_excel(mi_file, 'both_name', - index_col=[0, 1], header=[0, 1]) - tm.assert_frame_equal(actual, expected) - - actual = read_excel(mi_file, 'both_name', - index_col=[0, 1], header=[0, 1]) - tm.assert_frame_equal(actual, expected) - - actual = read_excel(mi_file, 'both_name_skiprows', index_col=[0, 1], - header=[0, 1], skiprows=2) - tm.assert_frame_equal(actual, expected) - - def test_read_excel_multiindex_empty_level(self): - # GH 12453 - _skip_if_no_xlsxwriter() - with ensure_clean('.xlsx') as path: - df = DataFrame({ - ('Zero', ''): {0: 0}, - ('One', 'x'): {0: 1}, - ('Two', 'X'): {0: 3}, - ('Two', 'Y'): {0: 7} - }) - - expected = DataFrame({ - ('Zero', 'Unnamed: 3_level_1'): {0: 0}, - ('One', u'x'): {0: 1}, - ('Two', u'X'): {0: 3}, - ('Two', u'Y'): {0: 7} - }) - - df.to_excel(path) - actual = pd.read_excel(path, header=[0, 1]) - tm.assert_frame_equal(actual, expected) - - df = pd.DataFrame({ - ('Beg', ''): {0: 0}, - ('Middle', 'x'): {0: 1}, - ('Tail', 'X'): {0: 3}, - ('Tail', 'Y'): {0: 7} - }) - - expected = pd.DataFrame({ - ('Beg', 'Unnamed: 0_level_1'): {0: 0}, - ('Middle', u'x'): {0: 1}, - ('Tail', u'X'): {0: 3}, - ('Tail', u'Y'): {0: 7} - }) - - df.to_excel(path) - actual = pd.read_excel(path, header=[0, 1]) - tm.assert_frame_equal(actual, expected) - - def test_excel_multindex_roundtrip(self): - # GH 4679 - _skip_if_no_xlsxwriter() - with ensure_clean('.xlsx') as pth: - for c_idx_names in [True, False]: - for r_idx_names in [True, False]: - for c_idx_levels in [1, 3]: - for r_idx_levels in [1, 3]: - # column index name can't be serialized unless - # MultiIndex - if (c_idx_levels == 1 and c_idx_names): - continue - - # empty name case current read in as unamed levels, - # not Nones - check_names = True - if not r_idx_names and r_idx_levels > 1: - check_names = False - - df = mkdf(5, 5, c_idx_names, - r_idx_names, c_idx_levels, - r_idx_levels) - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), - header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) - - df.iloc[0, :] = np.nan - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), - header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) - - df.iloc[-1, :] = np.nan - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), - header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) - - def test_excel_oldindex_format(self): - # GH 4679 - data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], - ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], - ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], - ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], - ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) - columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] - mi = MultiIndex(levels=[['R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], - ['R_l1_g0', 'R_l1_g1', 'R_l1_g2', - 'R_l1_g3', 'R_l1_g4']], - labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], - names=['R0', 'R1']) - si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], name='R0') - - in_file = os.path.join( - self.dirpath, 'test_index_name_pre17' + self.ext) - - expected = pd.DataFrame(data, index=si, columns=columns) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'single_names', has_index_names=True) - tm.assert_frame_equal(actual, expected) - - expected.index.name = None - actual = pd.read_excel(in_file, 'single_no_names') - tm.assert_frame_equal(actual, expected) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'single_no_names', has_index_names=False) - tm.assert_frame_equal(actual, expected) - - expected.index = mi - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'multi_names', has_index_names=True) - tm.assert_frame_equal(actual, expected) - - expected.index.names = [None, None] - actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1]) - tm.assert_frame_equal(actual, expected, check_names=False) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1], - has_index_names=False) - tm.assert_frame_equal(actual, expected, check_names=False) - - def test_read_excel_bool_header_arg(self): - # GH 6114 - for arg in [True, False]: - with tm.assertRaises(TypeError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - header=arg) - - def test_read_excel_chunksize(self): - # GH 8011 - with tm.assertRaises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - chunksize=100) - - def test_read_excel_parse_dates(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - parse_dates=True) - - def test_read_excel_date_parser(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - date_parser=dateparse) - - def test_read_excel_skiprows_list(self): - # GH 4903 - actual = pd.read_excel(os.path.join(self.dirpath, - 'testskiprows' + self.ext), - 'skiprows_list', skiprows=[0, 2]) - expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], - [2, 3.5, pd.Timestamp('2015-01-02'), False], - [3, 4.5, pd.Timestamp('2015-01-03'), False], - [4, 5.5, pd.Timestamp('2015-01-04'), True]], - columns=['a', 'b', 'c', 'd']) - tm.assert_frame_equal(actual, expected) - - actual = pd.read_excel(os.path.join(self.dirpath, - 'testskiprows' + self.ext), - 'skiprows_list', skiprows=np.array([0, 2])) - tm.assert_frame_equal(actual, expected) - - def test_read_excel_squeeze(self): - # GH 12157 - f = os.path.join(self.dirpath, 'test_squeeze' + self.ext) - - actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True) - expected = pd.Series([2, 3, 4], [4, 5, 6], name='b') - expected.index.name = 'a' - tm.assert_series_equal(actual, expected) - - actual = pd.read_excel(f, 'two_columns', squeeze=True) - expected = pd.DataFrame({'a': [4, 5, 6], - 'b': [2, 3, 4]}) - tm.assert_frame_equal(actual, expected) - - actual = pd.read_excel(f, 'one_column', squeeze=True) - expected = pd.Series([1, 2, 3], name='a') - tm.assert_series_equal(actual, expected) - - -class XlsReaderTests(XlrdTests, tm.TestCase): - ext = '.xls' - engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) - - -class XlsxReaderTests(XlrdTests, tm.TestCase): - ext = '.xlsx' - engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) - - -class XlsmReaderTests(XlrdTests, tm.TestCase): - ext = '.xlsm' - engine_name = 'xlrd' - check_skip = staticmethod(_skip_if_no_xlrd) - - -class ExcelWriterBase(SharedItems): - # Base class for test cases to run with different Excel writers. - # To add a writer test, define the following: - # 1. A check_skip function that skips your tests if your writer isn't - # installed. - # 2. Add a property ext, which is the file extension that your writer - # writes to. (needs to start with '.' so it's a valid path) - # 3. Add a property engine_name, which is the name of the writer class. - - # Test with MultiIndex and Hierarchical Rows as merged cells. - merge_cells = True - - def setUp(self): - self.check_skip() - super(ExcelWriterBase, self).setUp() - self.option_name = 'io.excel.%s.writer' % self.ext.strip('.') - self.prev_engine = get_option(self.option_name) - set_option(self.option_name, self.engine_name) - - def tearDown(self): - set_option(self.option_name, self.prev_engine) - - def test_excel_sheet_by_name_raise(self): - _skip_if_no_xlrd() - import xlrd - - with ensure_clean(self.ext) as pth: - gt = DataFrame(np.random.randn(10, 2)) - gt.to_excel(pth) - xl = ExcelFile(pth) - df = read_excel(xl, 0) - tm.assert_frame_equal(gt, df) - - with tm.assertRaises(xlrd.XLRDError): - read_excel(xl, '0') - - def test_excelwriter_contextmanager(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as pth: - with ExcelWriter(pth) as writer: - self.frame.to_excel(writer, 'Data1') - self.frame2.to_excel(writer, 'Data2') - - with ExcelFile(pth) as reader: - found_df = read_excel(reader, 'Data1') - found_df2 = read_excel(reader, 'Data2') - tm.assert_frame_equal(found_df, self.frame) - tm.assert_frame_equal(found_df2, self.frame2) - - def test_roundtrip(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # test roundtrip - self.frame.to_excel(path, 'test1') - recons = read_excel(path, 'test1', index_col=0) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', index=False) - recons = read_excel(path, 'test1', index_col=None) - recons.index = self.frame.index - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', na_rep='NA') - recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) - tm.assert_frame_equal(self.frame, recons) - - # GH 3611 - self.frame.to_excel(path, 'test1', na_rep='88') - recons = read_excel(path, 'test1', index_col=0, na_values=['88']) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, 'test1', na_rep='88') - recons = read_excel(path, 'test1', index_col=0, - na_values=[88, 88.0]) - tm.assert_frame_equal(self.frame, recons) - - # GH 6573 - self.frame.to_excel(path, 'Sheet1') - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path, '0') - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(self.frame, recons) - - # GH 8825 Pandas Series should provide to_excel method - s = self.frame["A"] - s.to_excel(path) - recons = read_excel(path, index_col=0) - tm.assert_frame_equal(s.to_frame(), recons) - - def test_mixed(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - self.mixed_frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=0) - tm.assert_frame_equal(self.mixed_frame, recons) - - def test_tsframe(self): - _skip_if_no_xlrd() - - df = tm.makeTimeDataFrame()[:5] - - with ensure_clean(self.ext) as path: - df.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(df, recons) - - def test_basics_with_nan(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: - self.frame['A'][:5] = nan - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - def test_int_types(self): - _skip_if_no_xlrd() - - for np_type in (np.int8, np.int16, np.int32, np.int64): - - with ensure_clean(self.ext) as path: - # Test np.int values read come back as int (rather than float - # which is Excel's format). - frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), - dtype=np_type) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - int_frame = frame.astype(np.int64) - tm.assert_frame_equal(int_frame, recons) - recons2 = read_excel(path, 'test1') - tm.assert_frame_equal(int_frame, recons2) - - # test with convert_float=False comes back as float - float_frame = frame.astype(float) - recons = read_excel(path, 'test1', convert_float=False) - tm.assert_frame_equal(recons, float_frame, - check_index_type=False, - check_column_type=False) - - def test_float_types(self): - _skip_if_no_xlrd() - - for np_type in (np.float16, np.float32, np.float64): - with ensure_clean(self.ext) as path: - # Test np.float values read come back as float. - frame = DataFrame(np.random.random_sample(10), dtype=np_type) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1').astype(np_type) - tm.assert_frame_equal(frame, recons, check_dtype=False) - - def test_bool_types(self): - _skip_if_no_xlrd() - - for np_type in (np.bool8, np.bool_): - with ensure_clean(self.ext) as path: - # Test np.bool values read come back as float. - frame = (DataFrame([1, 0, True, False], dtype=np_type)) - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1').astype(np_type) - tm.assert_frame_equal(frame, recons) - - def test_inf_roundtrip(self): - _skip_if_no_xlrd() - - frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) - with ensure_clean(self.ext) as path: - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(frame, recons) - - def test_sheets(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # Test writing to separate sheets - writer = ExcelWriter(path) - self.frame.to_excel(writer, 'test1') - self.tsframe.to_excel(writer, 'test2') - writer.save() - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=0) - tm.assert_frame_equal(self.frame, recons) - recons = read_excel(reader, 'test2', index_col=0) - tm.assert_frame_equal(self.tsframe, recons) - self.assertEqual(2, len(reader.sheet_names)) - self.assertEqual('test1', reader.sheet_names[0]) - self.assertEqual('test2', reader.sheet_names[1]) - - def test_colaliases(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # column aliases - col_aliases = Index(['AA', 'X', 'Y', 'Z']) - self.frame2.to_excel(path, 'test1', header=col_aliases) - reader = ExcelFile(path) - rs = read_excel(reader, 'test1', index_col=0) - xp = self.frame2.copy() - xp.columns = col_aliases - tm.assert_frame_equal(xp, rs) - - def test_roundtrip_indexlabels(self): - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', columns=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # test index_label - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', - index_label=['test'], - merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - self.assertEqual(frame.index.names, recons.index.names) - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, - 'test1', - index_label=['test', 'dummy', 'dummy2'], - merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - self.assertEqual(frame.index.names, recons.index.names) - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, - 'test1', - index_label='test', - merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=0, - ).astype(np.int64) - frame.index.names = ['test'] - tm.assert_frame_equal(frame, recons.astype(bool)) - - with ensure_clean(self.ext) as path: - - self.frame.to_excel(path, - 'test1', - columns=['A', 'B', 'C', 'D'], - index=False, merge_cells=self.merge_cells) - # take 'A' and 'B' as indexes (same row as cols 'C', 'D') - df = self.frame.copy() - df = df.set_index(['A', 'B']) - - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', index_col=[0, 1]) - tm.assert_frame_equal(df, recons, check_less_precise=True) - - def test_excel_roundtrip_indexname(self): - _skip_if_no_xlrd() - - df = DataFrame(np.random.randn(10, 4)) - df.index.name = 'foo' - - with ensure_clean(self.ext) as path: - df.to_excel(path, merge_cells=self.merge_cells) - - xf = ExcelFile(path) - result = read_excel(xf, xf.sheet_names[0], - index_col=0) - - tm.assert_frame_equal(result, df) - self.assertEqual(result.index.name, 'foo') - - def test_excel_roundtrip_datetime(self): - _skip_if_no_xlrd() - - # datetime.date, not sure what to test here exactly - tsf = self.tsframe.copy() - with ensure_clean(self.ext) as path: - - tsf.index = [x.date() for x in self.tsframe.index] - tsf.to_excel(path, 'test1', merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1') - tm.assert_frame_equal(self.tsframe, recons) - - # GH4133 - excel output format strings - def test_excel_date_datetime_format(self): - _skip_if_no_xlrd() - df = DataFrame([[date(2014, 1, 31), - date(1999, 9, 24)], - [datetime(1998, 5, 26, 23, 33, 4), - datetime(2014, 2, 28, 13, 5, 13)]], - index=['DATE', 'DATETIME'], columns=['X', 'Y']) - df_expected = DataFrame([[datetime(2014, 1, 31), - datetime(1999, 9, 24)], - [datetime(1998, 5, 26, 23, 33, 4), - datetime(2014, 2, 28, 13, 5, 13)]], - index=['DATE', 'DATETIME'], columns=['X', 'Y']) - - with ensure_clean(self.ext) as filename1: - with ensure_clean(self.ext) as filename2: - writer1 = ExcelWriter(filename1) - writer2 = ExcelWriter(filename2, - date_format='DD.MM.YYYY', - datetime_format='DD.MM.YYYY HH-MM-SS') - - df.to_excel(writer1, 'test1') - df.to_excel(writer2, 'test1') - - writer1.close() - writer2.close() - - reader1 = ExcelFile(filename1) - reader2 = ExcelFile(filename2) - - rs1 = read_excel(reader1, 'test1', index_col=None) - rs2 = read_excel(reader2, 'test1', index_col=None) - - tm.assert_frame_equal(rs1, rs2) - - # since the reader returns a datetime object for dates, we need - # to use df_expected to check the result - tm.assert_frame_equal(rs2, df_expected) - - def test_to_excel_periodindex(self): - _skip_if_no_xlrd() - - frame = self.tsframe - xp = frame.resample('M', kind='period').mean() - - with ensure_clean(self.ext) as path: - xp.to_excel(path, 'sht1') - - reader = ExcelFile(path) - rs = read_excel(reader, 'sht1', index_col=0) - tm.assert_frame_equal(xp, rs.to_period('M')) - - def test_to_excel_multiindex(self): - _skip_if_no_xlrd() - - frame = self.frame - arrays = np.arange(len(frame.index) * 2).reshape(2, -1) - new_index = MultiIndex.from_arrays(arrays, - names=['first', 'second']) - frame.index = new_index - - with ensure_clean(self.ext) as path: - frame.to_excel(path, 'test1', header=False) - frame.to_excel(path, 'test1', columns=['A', 'B']) - - # round trip - frame.to_excel(path, 'test1', merge_cells=self.merge_cells) - reader = ExcelFile(path) - df = read_excel(reader, 'test1', index_col=[0, 1], - parse_dates=False) - tm.assert_frame_equal(frame, df) - - # GH13511 - def test_to_excel_multiindex_nan_label(self): - _skip_if_no_xlrd() - - frame = pd.DataFrame({'A': [None, 2, 3], - 'B': [10, 20, 30], - 'C': np.random.sample(3)}) - frame = frame.set_index(['A', 'B']) - - with ensure_clean(self.ext) as path: - frame.to_excel(path, merge_cells=self.merge_cells) - df = read_excel(path, index_col=[0, 1]) - tm.assert_frame_equal(frame, df) - - # Test for Issue 11328. If column indices are integers, make - # sure they are handled correctly for either setting of - # merge_cells - def test_to_excel_multiindex_cols(self): - _skip_if_no_xlrd() - - frame = self.frame - arrays = np.arange(len(frame.index) * 2).reshape(2, -1) - new_index = MultiIndex.from_arrays(arrays, - names=['first', 'second']) - frame.index = new_index - - new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), - (50, 1), (50, 2)]) - frame.columns = new_cols_index - header = [0, 1] - if not self.merge_cells: - header = 0 - - with ensure_clean(self.ext) as path: - # round trip - frame.to_excel(path, 'test1', merge_cells=self.merge_cells) - reader = ExcelFile(path) - df = read_excel(reader, 'test1', header=header, - index_col=[0, 1], - parse_dates=False) - if not self.merge_cells: - fm = frame.columns.format(sparsify=False, - adjoin=False, names=False) - frame.columns = [".".join(map(str, q)) for q in zip(*fm)] - tm.assert_frame_equal(frame, df) - - def test_to_excel_multiindex_dates(self): - _skip_if_no_xlrd() - - # try multiindex with dates - tsframe = self.tsframe.copy() - new_index = [tsframe.index, np.arange(len(tsframe.index))] - tsframe.index = MultiIndex.from_arrays(new_index) - - with ensure_clean(self.ext) as path: - tsframe.index.names = ['time', 'foo'] - tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells) - reader = ExcelFile(path) - recons = read_excel(reader, 'test1', - index_col=[0, 1]) - - tm.assert_frame_equal(tsframe, recons) - self.assertEqual(recons.index.names, ('time', 'foo')) - - def test_to_excel_multiindex_no_write_index(self): - _skip_if_no_xlrd() - - # Test writing and re-reading a MI witout the index. GH 5616. - - # Initial non-MI frame. - frame1 = DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]}) - - # Add a MI. - frame2 = frame1.copy() - multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) - frame2.index = multi_index - - with ensure_clean(self.ext) as path: - - # Write out to Excel without the index. - frame2.to_excel(path, 'test1', index=False) - - # Read it back in. - reader = ExcelFile(path) - frame3 = read_excel(reader, 'test1') - - # Test that it is the same as the initial frame. - tm.assert_frame_equal(frame1, frame3) - - def test_to_excel_float_format(self): - _skip_if_no_xlrd() - - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean(self.ext) as filename: - df.to_excel(filename, 'test1', float_format='%.2f') - - reader = ExcelFile(filename) - rs = read_excel(reader, 'test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) - - def test_to_excel_output_encoding(self): - _skip_if_no_xlrd() - - # avoid mixed inferred_type - df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'], - [u'\u0195', u'\u0196', u'\u0197']], - index=[u'A\u0192', u'B'], - columns=[u'X\u0193', u'Y', u'Z']) - - with ensure_clean('__tmp_to_excel_float_format__.' + self.ext)\ - as filename: - df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') - result = read_excel(filename, 'TestSheet', encoding='utf8') - tm.assert_frame_equal(result, df) - - def test_to_excel_unicode_filename(self): - _skip_if_no_xlrd() - with ensure_clean(u('\u0192u.') + self.ext) as filename: - try: - f = open(filename, 'wb') - except UnicodeEncodeError: - pytest.skip('no unicode file names on this system') - else: - f.close() - - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - df.to_excel(filename, 'test1', float_format='%.2f') - - reader = ExcelFile(filename) - rs = read_excel(reader, 'test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) - - # def test_to_excel_header_styling_xls(self): - - # import StringIO - # s = StringIO( - # """Date,ticker,type,value - # 2001-01-01,x,close,12.2 - # 2001-01-01,x,open ,12.1 - # 2001-01-01,y,close,12.2 - # 2001-01-01,y,open ,12.1 - # 2001-02-01,x,close,12.2 - # 2001-02-01,x,open ,12.1 - # 2001-02-01,y,close,12.2 - # 2001-02-01,y,open ,12.1 - # 2001-03-01,x,close,12.2 - # 2001-03-01,x,open ,12.1 - # 2001-03-01,y,close,12.2 - # 2001-03-01,y,open ,12.1""") - # df = read_csv(s, parse_dates=["Date"]) - # pdf = df.pivot_table(values="value", rows=["ticker"], - # cols=["Date", "type"]) - - # try: - # import xlwt - # import xlrd - # except ImportError: - # pytest.skip - - # filename = '__tmp_to_excel_header_styling_xls__.xls' - # pdf.to_excel(filename, 'test1') - - # wbk = xlrd.open_workbook(filename, - # formatting_info=True) - # self.assertEqual(["test1"], wbk.sheet_names()) - # ws = wbk.sheet_by_name('test1') - # self.assertEqual([(0, 1, 5, 7), (0, 1, 3, 5), (0, 1, 1, 3)], - # ws.merged_cells) - # for i in range(0, 2): - # for j in range(0, 7): - # xfx = ws.cell_xf_index(0, 0) - # cell_xf = wbk.xf_list[xfx] - # font = wbk.font_list - # self.assertEqual(1, font[cell_xf.font_index].bold) - # self.assertEqual(1, cell_xf.border.top_line_style) - # self.assertEqual(1, cell_xf.border.right_line_style) - # self.assertEqual(1, cell_xf.border.bottom_line_style) - # self.assertEqual(1, cell_xf.border.left_line_style) - # self.assertEqual(2, cell_xf.alignment.hor_align) - # os.remove(filename) - # def test_to_excel_header_styling_xlsx(self): - # import StringIO - # s = StringIO( - # """Date,ticker,type,value - # 2001-01-01,x,close,12.2 - # 2001-01-01,x,open ,12.1 - # 2001-01-01,y,close,12.2 - # 2001-01-01,y,open ,12.1 - # 2001-02-01,x,close,12.2 - # 2001-02-01,x,open ,12.1 - # 2001-02-01,y,close,12.2 - # 2001-02-01,y,open ,12.1 - # 2001-03-01,x,close,12.2 - # 2001-03-01,x,open ,12.1 - # 2001-03-01,y,close,12.2 - # 2001-03-01,y,open ,12.1""") - # df = read_csv(s, parse_dates=["Date"]) - # pdf = df.pivot_table(values="value", rows=["ticker"], - # cols=["Date", "type"]) - # try: - # import openpyxl - # from openpyxl.cell import get_column_letter - # except ImportError: - # pytest.skip - # if openpyxl.__version__ < '1.6.1': - # pytest.skip - # # test xlsx_styling - # filename = '__tmp_to_excel_header_styling_xlsx__.xlsx' - # pdf.to_excel(filename, 'test1') - # wbk = openpyxl.load_workbook(filename) - # self.assertEqual(["test1"], wbk.get_sheet_names()) - # ws = wbk.get_sheet_by_name('test1') - # xlsaddrs = ["%s2" % chr(i) for i in range(ord('A'), ord('H'))] - # xlsaddrs += ["A%s" % i for i in range(1, 6)] - # xlsaddrs += ["B1", "D1", "F1"] - # for xlsaddr in xlsaddrs: - # cell = ws.cell(xlsaddr) - # self.assertTrue(cell.style.font.bold) - # self.assertEqual(openpyxl.style.Border.BORDER_THIN, - # cell.style.borders.top.border_style) - # self.assertEqual(openpyxl.style.Border.BORDER_THIN, - # cell.style.borders.right.border_style) - # self.assertEqual(openpyxl.style.Border.BORDER_THIN, - # cell.style.borders.bottom.border_style) - # self.assertEqual(openpyxl.style.Border.BORDER_THIN, - # cell.style.borders.left.border_style) - # self.assertEqual(openpyxl.style.Alignment.HORIZONTAL_CENTER, - # cell.style.alignment.horizontal) - # mergedcells_addrs = ["C1", "E1", "G1"] - # for maddr in mergedcells_addrs: - # self.assertTrue(ws.cell(maddr).merged) - # os.remove(filename) - - def test_excel_010_hemstring(self): - _skip_if_no_xlrd() - - if self.merge_cells: - pytest.skip('Skip tests for merged MI format.') - - from pandas.util.testing import makeCustomDataframe as mkdf - # ensure limited functionality in 0.10 - # override of #2370 until sorted out in 0.11 - - def roundtrip(df, header=True, parser_hdr=0, index=True): - - with ensure_clean(self.ext) as path: - df.to_excel(path, header=header, - merge_cells=self.merge_cells, index=index) - xf = ExcelFile(path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res - - nrows = 5 - ncols = 3 - for use_headers in (True, False): - for i in range(1, 4): # row multindex upto nlevel=3 - for j in range(1, 4): # col "" - df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) - - # this if will be removed once multi column excel writing - # is implemented for now fixing #9794 - if j > 1: - with tm.assertRaises(NotImplementedError): - res = roundtrip(df, use_headers, index=False) - else: - res = roundtrip(df, use_headers) - - if use_headers: - self.assertEqual(res.shape, (nrows, ncols + i)) - else: - # first row taken as columns - self.assertEqual(res.shape, (nrows - 1, ncols + i)) - - # no nans - for r in range(len(res.index)): - for c in range(len(res.columns)): - self.assertTrue(res.iloc[r, c] is not np.nan) - - res = roundtrip(DataFrame([0])) - self.assertEqual(res.shape, (1, 1)) - self.assertTrue(res.iloc[0, 0] is not np.nan) - - res = roundtrip(DataFrame([0]), False, None) - self.assertEqual(res.shape, (1, 2)) - self.assertTrue(res.iloc[0, 0] is not np.nan) - - def test_excel_010_hemstring_raises_NotImplementedError(self): - # This test was failing only for j>1 and header=False, - # So I reproduced a simple test. - _skip_if_no_xlrd() - - if self.merge_cells: - pytest.skip('Skip tests for merged MI format.') - - from pandas.util.testing import makeCustomDataframe as mkdf - # ensure limited functionality in 0.10 - # override of #2370 until sorted out in 0.11 - - def roundtrip2(df, header=True, parser_hdr=0, index=True): - - with ensure_clean(self.ext) as path: - df.to_excel(path, header=header, - merge_cells=self.merge_cells, index=index) - xf = ExcelFile(path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res - - nrows = 5 - ncols = 3 - j = 2 - i = 1 - df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) - with tm.assertRaises(NotImplementedError): - roundtrip2(df, header=False, index=False) - - def test_duplicated_columns(self): - # Test for issue #5235 - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - colnames = ['A', 'B', 'B'] - - write_frame.columns = colnames - write_frame.to_excel(path, 'test1') - - read_frame = read_excel(path, 'test1') - read_frame.columns = colnames - tm.assert_frame_equal(write_frame, read_frame) - - # 11007 / #10970 - write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=['A', 'B', 'A', 'B']) - write_frame.to_excel(path, 'test1') - read_frame = read_excel(path, 'test1') - read_frame.columns = ['A', 'B', 'A', 'B'] - tm.assert_frame_equal(write_frame, read_frame) - - # 10982 - write_frame.to_excel(path, 'test1', index=False, header=False) - read_frame = read_excel(path, 'test1', header=None) - write_frame.columns = [0, 1, 2, 3] - tm.assert_frame_equal(write_frame, read_frame) - - def test_swapped_columns(self): - # Test for issue #5427. - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - write_frame = DataFrame({'A': [1, 1, 1], - 'B': [2, 2, 2]}) - write_frame.to_excel(path, 'test1', columns=['B', 'A']) - - read_frame = read_excel(path, 'test1', header=0) - - tm.assert_series_equal(write_frame['A'], read_frame['A']) - tm.assert_series_equal(write_frame['B'], read_frame['B']) - - def test_invalid_columns(self): - # 10982 - _skip_if_no_xlrd() - - with ensure_clean(self.ext) as path: - write_frame = DataFrame({'A': [1, 1, 1], - 'B': [2, 2, 2]}) - - write_frame.to_excel(path, 'test1', columns=['B', 'C']) - expected = write_frame.loc[:, ['B', 'C']] - read_frame = read_excel(path, 'test1') - tm.assert_frame_equal(expected, read_frame) - - with tm.assertRaises(KeyError): - write_frame.to_excel(path, 'test1', columns=['C', 'D']) - - def test_datetimes(self): - - # Test writing and reading datetimes. For issue #9139. (xref #9185) - _skip_if_no_xlrd() - - datetimes = [datetime(2013, 1, 13, 1, 2, 3), - datetime(2013, 1, 13, 2, 45, 56), - datetime(2013, 1, 13, 4, 29, 49), - datetime(2013, 1, 13, 6, 13, 42), - datetime(2013, 1, 13, 7, 57, 35), - datetime(2013, 1, 13, 9, 41, 28), - datetime(2013, 1, 13, 11, 25, 21), - datetime(2013, 1, 13, 13, 9, 14), - datetime(2013, 1, 13, 14, 53, 7), - datetime(2013, 1, 13, 16, 37, 0), - datetime(2013, 1, 13, 18, 20, 52)] - - with ensure_clean(self.ext) as path: - write_frame = DataFrame.from_items([('A', datetimes)]) - write_frame.to_excel(path, 'Sheet1') - read_frame = read_excel(path, 'Sheet1', header=0) - - tm.assert_series_equal(write_frame['A'], read_frame['A']) - - # GH7074 - def test_bytes_io(self): - _skip_if_no_xlrd() - - bio = BytesIO() - df = DataFrame(np.random.randn(10, 2)) - # pass engine explicitly as there is no file path to infer from - writer = ExcelWriter(bio, engine=self.engine_name) - df.to_excel(writer) - writer.save() - bio.seek(0) - reread_df = read_excel(bio) - tm.assert_frame_equal(df, reread_df) - - # GH8188 - def test_write_lists_dict(self): - _skip_if_no_xlrd() - - df = DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], - 'numeric': [1, 2, 3.0], - 'str': ['apple', 'banana', 'cherry']}) - expected = df.copy() - expected.mixed = expected.mixed.apply(str) - expected.numeric = expected.numeric.astype('int64') - with ensure_clean(self.ext) as path: - df.to_excel(path, 'Sheet1') - read = read_excel(path, 'Sheet1', header=0) - tm.assert_frame_equal(read, expected) - - # GH13347 - def test_true_and_false_value_options(self): - df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2']) - expected = df.replace({'foo': True, - 'bar': False}) - with ensure_clean(self.ext) as path: - df.to_excel(path) - read_frame = read_excel(path, true_values=['foo'], - false_values=['bar']) - tm.assert_frame_equal(read_frame, expected) - - -def raise_wrapper(major_ver): - def versioned_raise_wrapper(orig_method): - @functools.wraps(orig_method) - def wrapped(self, *args, **kwargs): - _skip_if_no_openpyxl() - if openpyxl_compat.is_compat(major_ver=major_ver): - orig_method(self, *args, **kwargs) - else: - msg = (r'Installed openpyxl is not supported at this ' - r'time\. Use.+') - with tm.assertRaisesRegexp(ValueError, msg): - orig_method(self, *args, **kwargs) - return wrapped - return versioned_raise_wrapper - - -def raise_on_incompat_version(major_ver): - def versioned_raise_on_incompat_version(cls): - methods = filter(operator.methodcaller( - 'startswith', 'test_'), dir(cls)) - for method in methods: - setattr(cls, method, raise_wrapper( - major_ver)(getattr(cls, method))) - return cls - return versioned_raise_on_incompat_version - - -@raise_on_incompat_version(1) -class OpenpyxlTests(ExcelWriterBase, tm.TestCase): - ext = '.xlsx' - engine_name = 'openpyxl1' - check_skip = staticmethod(lambda *args, **kwargs: None) - - def test_to_excel_styleconverter(self): - _skip_if_no_openpyxl() - if not openpyxl_compat.is_compat(major_ver=1): - pytest.skip('incompatiable openpyxl version') - - import openpyxl - - hstyle = {"font": {"bold": True}, - "borders": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "alignment": {"horizontal": "center", "vertical": "top"}} - - xlsx_style = _Openpyxl1Writer._convert_to_style(hstyle) - self.assertTrue(xlsx_style.font.bold) - self.assertEqual(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.top.border_style) - self.assertEqual(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.right.border_style) - self.assertEqual(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.bottom.border_style) - self.assertEqual(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.left.border_style) - self.assertEqual(openpyxl.style.Alignment.HORIZONTAL_CENTER, - xlsx_style.alignment.horizontal) - self.assertEqual(openpyxl.style.Alignment.VERTICAL_TOP, - xlsx_style.alignment.vertical) - - -def skip_openpyxl_gt21(cls): - """Skip a TestCase instance if openpyxl >= 2.2""" - - @classmethod - def setUpClass(cls): - _skip_if_no_openpyxl() - import openpyxl - ver = openpyxl.__version__ - if (not (LooseVersion(ver) >= LooseVersion('2.0.0') and - LooseVersion(ver) < LooseVersion('2.2.0'))): - pytest.skip("openpyxl %s >= 2.2" % str(ver)) - - cls.setUpClass = setUpClass - return cls - - -@raise_on_incompat_version(2) -@skip_openpyxl_gt21 -class Openpyxl20Tests(ExcelWriterBase, tm.TestCase): - ext = '.xlsx' - engine_name = 'openpyxl20' - check_skip = staticmethod(lambda *args, **kwargs: None) - - def test_to_excel_styleconverter(self): - import openpyxl - from openpyxl import styles - - hstyle = { - "font": { - "color": '00FF0000', - "bold": True, - }, - "borders": { - "top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin", - }, - "alignment": { - "horizontal": "center", - "vertical": "top", - }, - "fill": { - "patternType": 'solid', - 'fgColor': { - 'rgb': '006666FF', - 'tint': 0.3, - }, - }, - "number_format": { - "format_code": "0.00" - }, - "protection": { - "locked": True, - "hidden": False, - }, - } - - font_color = styles.Color('00FF0000') - font = styles.Font(bold=True, color=font_color) - side = styles.Side(style=styles.borders.BORDER_THIN) - border = styles.Border(top=side, right=side, bottom=side, left=side) - alignment = styles.Alignment(horizontal='center', vertical='top') - fill_color = styles.Color(rgb='006666FF', tint=0.3) - fill = styles.PatternFill(patternType='solid', fgColor=fill_color) - - # ahh openpyxl API changes - ver = openpyxl.__version__ - if ver >= LooseVersion('2.0.0') and ver < LooseVersion('2.1.0'): - number_format = styles.NumberFormat(format_code='0.00') - else: - number_format = '0.00' # XXX: Only works with openpyxl-2.1.0 - - protection = styles.Protection(locked=True, hidden=False) - - kw = _Openpyxl20Writer._convert_to_style_kwargs(hstyle) - self.assertEqual(kw['font'], font) - self.assertEqual(kw['border'], border) - self.assertEqual(kw['alignment'], alignment) - self.assertEqual(kw['fill'], fill) - self.assertEqual(kw['number_format'], number_format) - self.assertEqual(kw['protection'], protection) - - def test_write_cells_merge_styled(self): - from pandas.formats.format import ExcelCell - from openpyxl import styles - - sheet_name = 'merge_styled' - - sty_b1 = {'font': {'color': '00FF0000'}} - sty_a2 = {'font': {'color': '0000FF00'}} - - initial_cells = [ - ExcelCell(col=1, row=0, val=42, style=sty_b1), - ExcelCell(col=0, row=1, val=99, style=sty_a2), - ] - - sty_merged = {'font': {'color': '000000FF', 'bold': True}} - sty_kwargs = _Openpyxl20Writer._convert_to_style_kwargs(sty_merged) - openpyxl_sty_merged = styles.Style(**sty_kwargs) - merge_cells = [ - ExcelCell(col=0, row=0, val='pandas', - mergestart=1, mergeend=1, style=sty_merged), - ] - - with ensure_clean('.xlsx') as path: - writer = _Openpyxl20Writer(path) - writer.write_cells(initial_cells, sheet_name=sheet_name) - writer.write_cells(merge_cells, sheet_name=sheet_name) - - wks = writer.sheets[sheet_name] - xcell_b1 = wks['B1'] - xcell_a2 = wks['A2'] - self.assertEqual(xcell_b1.style, openpyxl_sty_merged) - self.assertEqual(xcell_a2.style, openpyxl_sty_merged) - - -def skip_openpyxl_lt22(cls): - """Skip a TestCase instance if openpyxl < 2.2""" - - @classmethod - def setUpClass(cls): - _skip_if_no_openpyxl() - import openpyxl - ver = openpyxl.__version__ - if LooseVersion(ver) < LooseVersion('2.2.0'): - pytest.skip("openpyxl %s < 2.2" % str(ver)) - - cls.setUpClass = setUpClass - return cls - - -@raise_on_incompat_version(2) -@skip_openpyxl_lt22 -class Openpyxl22Tests(ExcelWriterBase, tm.TestCase): - ext = '.xlsx' - engine_name = 'openpyxl22' - check_skip = staticmethod(lambda *args, **kwargs: None) - - def test_to_excel_styleconverter(self): - from openpyxl import styles - - hstyle = { - "font": { - "color": '00FF0000', - "bold": True, - }, - "borders": { - "top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin", - }, - "alignment": { - "horizontal": "center", - "vertical": "top", - }, - "fill": { - "patternType": 'solid', - 'fgColor': { - 'rgb': '006666FF', - 'tint': 0.3, - }, - }, - "number_format": { - "format_code": "0.00" - }, - "protection": { - "locked": True, - "hidden": False, - }, - } - - font_color = styles.Color('00FF0000') - font = styles.Font(bold=True, color=font_color) - side = styles.Side(style=styles.borders.BORDER_THIN) - border = styles.Border(top=side, right=side, bottom=side, left=side) - alignment = styles.Alignment(horizontal='center', vertical='top') - fill_color = styles.Color(rgb='006666FF', tint=0.3) - fill = styles.PatternFill(patternType='solid', fgColor=fill_color) - - number_format = '0.00' - - protection = styles.Protection(locked=True, hidden=False) - - kw = _Openpyxl22Writer._convert_to_style_kwargs(hstyle) - self.assertEqual(kw['font'], font) - self.assertEqual(kw['border'], border) - self.assertEqual(kw['alignment'], alignment) - self.assertEqual(kw['fill'], fill) - self.assertEqual(kw['number_format'], number_format) - self.assertEqual(kw['protection'], protection) - - def test_write_cells_merge_styled(self): - if not openpyxl_compat.is_compat(major_ver=2): - pytest.skip('incompatiable openpyxl version') - - from pandas.formats.format import ExcelCell - - sheet_name = 'merge_styled' - - sty_b1 = {'font': {'color': '00FF0000'}} - sty_a2 = {'font': {'color': '0000FF00'}} - - initial_cells = [ - ExcelCell(col=1, row=0, val=42, style=sty_b1), - ExcelCell(col=0, row=1, val=99, style=sty_a2), - ] - - sty_merged = {'font': {'color': '000000FF', 'bold': True}} - sty_kwargs = _Openpyxl22Writer._convert_to_style_kwargs(sty_merged) - openpyxl_sty_merged = sty_kwargs['font'] - merge_cells = [ - ExcelCell(col=0, row=0, val='pandas', - mergestart=1, mergeend=1, style=sty_merged), - ] - - with ensure_clean('.xlsx') as path: - writer = _Openpyxl22Writer(path) - writer.write_cells(initial_cells, sheet_name=sheet_name) - writer.write_cells(merge_cells, sheet_name=sheet_name) - - wks = writer.sheets[sheet_name] - xcell_b1 = wks['B1'] - xcell_a2 = wks['A2'] - self.assertEqual(xcell_b1.font, openpyxl_sty_merged) - self.assertEqual(xcell_a2.font, openpyxl_sty_merged) - - -class XlwtTests(ExcelWriterBase, tm.TestCase): - ext = '.xls' - engine_name = 'xlwt' - check_skip = staticmethod(_skip_if_no_xlwt) - - def test_excel_raise_error_on_multiindex_columns_and_no_index(self): - _skip_if_no_xlwt() - # MultiIndex as columns is not yet implemented 9794 - cols = MultiIndex.from_tuples([('site', ''), - ('2014', 'height'), - ('2014', 'weight')]) - df = DataFrame(np.random.randn(10, 3), columns=cols) - with tm.assertRaises(NotImplementedError): - with ensure_clean(self.ext) as path: - df.to_excel(path, index=False) - - def test_excel_multiindex_columns_and_index_true(self): - _skip_if_no_xlwt() - cols = MultiIndex.from_tuples([('site', ''), - ('2014', 'height'), - ('2014', 'weight')]) - df = pd.DataFrame(np.random.randn(10, 3), columns=cols) - with ensure_clean(self.ext) as path: - df.to_excel(path, index=True) - - def test_excel_multiindex_index(self): - _skip_if_no_xlwt() - # MultiIndex as index works so assert no error #9794 - cols = MultiIndex.from_tuples([('site', ''), - ('2014', 'height'), - ('2014', 'weight')]) - df = DataFrame(np.random.randn(3, 10), index=cols) - with ensure_clean(self.ext) as path: - df.to_excel(path, index=False) - - def test_to_excel_styleconverter(self): - _skip_if_no_xlwt() - - import xlwt - - hstyle = {"font": {"bold": True}, - "borders": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "alignment": {"horizontal": "center", "vertical": "top"}} - - xls_style = _XlwtWriter._convert_to_style(hstyle) - self.assertTrue(xls_style.font.bold) - self.assertEqual(xlwt.Borders.THIN, xls_style.borders.top) - self.assertEqual(xlwt.Borders.THIN, xls_style.borders.right) - self.assertEqual(xlwt.Borders.THIN, xls_style.borders.bottom) - self.assertEqual(xlwt.Borders.THIN, xls_style.borders.left) - self.assertEqual(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) - self.assertEqual(xlwt.Alignment.VERT_TOP, xls_style.alignment.vert) - - -class XlsxWriterTests(ExcelWriterBase, tm.TestCase): - ext = '.xlsx' - engine_name = 'xlsxwriter' - check_skip = staticmethod(_skip_if_no_xlsxwriter) - - def test_column_format(self): - # Test that column formats are applied to cells. Test for issue #9167. - # Applicable to xlsxwriter only. - _skip_if_no_xlsxwriter() - - with warnings.catch_warnings(): - # Ignore the openpyxl lxml warning. - warnings.simplefilter("ignore") - _skip_if_no_openpyxl() - import openpyxl - - with ensure_clean(self.ext) as path: - frame = DataFrame({'A': [123456, 123456], - 'B': [123456, 123456]}) - - writer = ExcelWriter(path) - frame.to_excel(writer) - - # Add a number format to col B and ensure it is applied to cells. - num_format = '#,##0' - write_workbook = writer.book - write_worksheet = write_workbook.worksheets()[0] - col_format = write_workbook.add_format({'num_format': num_format}) - write_worksheet.set_column('B:B', None, col_format) - writer.save() - - read_workbook = openpyxl.load_workbook(path) - try: - read_worksheet = read_workbook['Sheet1'] - except TypeError: - # compat - read_worksheet = read_workbook.get_sheet_by_name(name='Sheet1') - - # Get the number format from the cell. - try: - cell = read_worksheet['B2'] - except TypeError: - # compat - cell = read_worksheet.cell('B2') - - try: - read_num_format = cell.number_format - except: - read_num_format = cell.style.number_format._format_code - - self.assertEqual(read_num_format, num_format) - - -class OpenpyxlTests_NoMerge(ExcelWriterBase, tm.TestCase): - ext = '.xlsx' - engine_name = 'openpyxl' - check_skip = staticmethod(_skip_if_no_openpyxl) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - -class XlwtTests_NoMerge(ExcelWriterBase, tm.TestCase): - ext = '.xls' - engine_name = 'xlwt' - check_skip = staticmethod(_skip_if_no_xlwt) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - -class XlsxWriterTests_NoMerge(ExcelWriterBase, tm.TestCase): - ext = '.xlsx' - engine_name = 'xlsxwriter' - check_skip = staticmethod(_skip_if_no_xlsxwriter) - - # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. - merge_cells = False - - -class ExcelWriterEngineTests(tm.TestCase): - - def test_ExcelWriter_dispatch(self): - with tm.assertRaisesRegexp(ValueError, 'No engine'): - ExcelWriter('nothing') - - try: - import xlsxwriter # noqa - writer_klass = _XlsxWriter - except ImportError: - _skip_if_no_openpyxl() - if not openpyxl_compat.is_compat(major_ver=1): - pytest.skip('incompatible openpyxl version') - writer_klass = _Openpyxl1Writer - - with ensure_clean('.xlsx') as path: - writer = ExcelWriter(path) - tm.assertIsInstance(writer, writer_klass) - - _skip_if_no_xlwt() - with ensure_clean('.xls') as path: - writer = ExcelWriter(path) - tm.assertIsInstance(writer, _XlwtWriter) - - def test_register_writer(self): - # some awkward mocking to test out dispatch and such actually works - called_save = [] - called_write_cells = [] - - class DummyClass(ExcelWriter): - called_save = False - called_write_cells = False - supported_extensions = ['test', 'xlsx', 'xls'] - engine = 'dummy' - - def save(self): - called_save.append(True) - - def write_cells(self, *args, **kwargs): - called_write_cells.append(True) - - def check_called(func): - func() - self.assertTrue(len(called_save) >= 1) - self.assertTrue(len(called_write_cells) >= 1) - del called_save[:] - del called_write_cells[:] - - with pd.option_context('io.excel.xlsx.writer', 'dummy'): - register_writer(DummyClass) - writer = ExcelWriter('something.test') - tm.assertIsInstance(writer, DummyClass) - df = tm.makeCustomDataframe(1, 1) - panel = tm.makePanel() - func = lambda: df.to_excel('something.test') - check_called(func) - check_called(lambda: panel.to_excel('something.test')) - check_called(lambda: df.to_excel('something.xlsx')) - check_called(lambda: df.to_excel('something.xls', engine='dummy')) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py deleted file mode 100644 index 0868edd2147b5..0000000000000 --- a/pandas/io/tests/test_gbq.py +++ /dev/null @@ -1,1330 +0,0 @@ -import re -from datetime import datetime -import pytest -import pytz -import platform -from time import sleep -import os -import logging - -import numpy as np - -from distutils.version import StrictVersion -from pandas import compat - -from pandas import NaT -from pandas.compat import u, range -from pandas.core.frame import DataFrame -import pandas.io.gbq as gbq -import pandas.util.testing as tm -from pandas.compat.numpy import np_datetime64_compat - -PROJECT_ID = None -PRIVATE_KEY_JSON_PATH = None -PRIVATE_KEY_JSON_CONTENTS = None - -if compat.PY3: - DATASET_ID = 'pydata_pandas_bq_testing_py3' -else: - DATASET_ID = 'pydata_pandas_bq_testing_py2' - -TABLE_ID = 'new_test' -DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID) - -VERSION = platform.python_version() - -_IMPORTS = False -_GOOGLE_API_CLIENT_INSTALLED = False -_GOOGLE_API_CLIENT_VALID_VERSION = False -_HTTPLIB2_INSTALLED = False -_SETUPTOOLS_INSTALLED = False - - -def _skip_if_no_project_id(): - if not _get_project_id(): - pytest.skip( - "Cannot run integration tests without a project id") - - -def _skip_local_auth_if_in_travis_env(): - if _in_travis_environment(): - pytest.skip("Cannot run local auth in travis environment") - - -def _skip_if_no_private_key_path(): - if not _get_private_key_path(): - pytest.skip("Cannot run integration tests without a " - "private key json file path") - - -def _skip_if_no_private_key_contents(): - if not _get_private_key_contents(): - pytest.skip("Cannot run integration tests without a " - "private key json contents") - - -def _in_travis_environment(): - return 'TRAVIS_BUILD_DIR' in os.environ and \ - 'GBQ_PROJECT_ID' in os.environ - - -def _get_project_id(): - if _in_travis_environment(): - return os.environ.get('GBQ_PROJECT_ID') - else: - return PROJECT_ID - - -def _get_private_key_path(): - if _in_travis_environment(): - return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci', - 'travis_gbq.json']) - else: - return PRIVATE_KEY_JSON_PATH - - -def _get_private_key_contents(): - if _in_travis_environment(): - with open(os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci', - 'travis_gbq.json'])) as f: - return f.read() - else: - return PRIVATE_KEY_JSON_CONTENTS - - -def _test_imports(): - global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ - _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED - - try: - import pkg_resources - _SETUPTOOLS_INSTALLED = True - except ImportError: - _SETUPTOOLS_INSTALLED = False - - if compat.PY3: - google_api_minimum_version = '1.4.1' - else: - google_api_minimum_version = '1.2.0' - - if _SETUPTOOLS_INSTALLED: - try: - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - except: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - - from oauth2client.client import OAuth2WebServerFlow # noqa - from oauth2client.client import AccessTokenRefreshError # noqa - - from oauth2client.file import Storage # noqa - from oauth2client.tools import run_flow # noqa - _GOOGLE_API_CLIENT_INSTALLED = True - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( - 'google-api-python-client').version - - if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) >= - StrictVersion(google_api_minimum_version)): - _GOOGLE_API_CLIENT_VALID_VERSION = True - - except ImportError: - _GOOGLE_API_CLIENT_INSTALLED = False - - try: - import httplib2 # noqa - _HTTPLIB2_INSTALLED = True - except ImportError: - _HTTPLIB2_INSTALLED = False - - if not _SETUPTOOLS_INSTALLED: - raise ImportError('Could not import pkg_resources (setuptools).') - - if not _GOOGLE_API_CLIENT_INSTALLED: - raise ImportError('Could not import Google API Client.') - - if not _GOOGLE_API_CLIENT_VALID_VERSION: - raise ImportError("pandas requires google-api-python-client >= {0} " - "for Google BigQuery support, " - "current version {1}" - .format(google_api_minimum_version, - _GOOGLE_API_CLIENT_VERSION)) - - if not _HTTPLIB2_INSTALLED: - raise ImportError( - "pandas requires httplib2 for Google BigQuery support") - - # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 - # We need to know that a supported version of oauth2client is installed - # Test that either of the following is installed: - # - SignedJwtAssertionCredentials from oauth2client.client - # - ServiceAccountCredentials from oauth2client.service_account - # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0 - # ServiceAccountCredentials is available in oauthclient >= 2.0.0 - oauth2client_v1 = True - oauth2client_v2 = True - - try: - from oauth2client.client import SignedJwtAssertionCredentials # noqa - except ImportError: - oauth2client_v1 = False - - try: - from oauth2client.service_account import ServiceAccountCredentials # noqa - except ImportError: - oauth2client_v2 = False - - if not oauth2client_v1 and not oauth2client_v2: - raise ImportError("Missing oauth2client required for BigQuery " - "service account support") - - -def _setup_common(): - try: - _test_imports() - except (ImportError, NotImplementedError) as import_exception: - pytest.skip(import_exception) - - if _in_travis_environment(): - logging.getLogger('oauth2client').setLevel(logging.ERROR) - logging.getLogger('apiclient').setLevel(logging.ERROR) - - -def _check_if_can_get_correct_default_credentials(): - # Checks if "Application Default Credentials" can be fetched - # from the environment the tests are running in. - # See Issue #13577 - - import httplib2 - try: - from googleapiclient.discovery import build - except ImportError: - from apiclient.discovery import build - try: - from oauth2client.client import GoogleCredentials - credentials = GoogleCredentials.get_application_default() - http = httplib2.Http() - http = credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - jobs = bigquery_service.jobs() - job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} - jobs.insert(projectId=_get_project_id(), body=job_data).execute() - return True - except: - return False - - -def clean_gbq_environment(private_key=None): - dataset = gbq._Dataset(_get_project_id(), private_key=private_key) - - for i in range(1, 10): - if DATASET_ID + str(i) in dataset.datasets(): - dataset_id = DATASET_ID + str(i) - table = gbq._Table(_get_project_id(), dataset_id, - private_key=private_key) - for j in range(1, 20): - if TABLE_ID + str(j) in dataset.tables(dataset_id): - table.delete(TABLE_ID + str(j)) - - dataset.delete(dataset_id) - - -def make_mixed_dataframe_v2(test_size): - # create df to test for all BQ datatypes except RECORD - bools = np.random.randint(2, size=(1, test_size)).astype(bool) - flts = np.random.randn(1, test_size) - ints = np.random.randint(1, 10, size=(1, test_size)) - strs = np.random.randint(1, 10, size=(1, test_size)).astype(str) - times = [datetime.now(pytz.timezone('US/Arizona')) - for t in range(test_size)] - return DataFrame({'bools': bools[0], - 'flts': flts[0], - 'ints': ints[0], - 'strs': strs[0], - 'times': times[0]}, - index=range(test_size)) - - -def test_generate_bq_schema_deprecated(): - # 11121 Deprecation of generate_bq_schema - with tm.assert_produces_warning(FutureWarning): - df = make_mixed_dataframe_v2(10) - gbq.generate_bq_schema(df) - - -class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): - - def setUp(self): - _setup_common() - _skip_if_no_project_id() - _skip_local_auth_if_in_travis_env() - - self.sut = gbq.GbqConnector(_get_project_id()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - def test_get_application_default_credentials_does_not_throw_error(self): - if _check_if_can_get_correct_default_credentials(): - pytest.skip("Can get default_credentials " - "from the environment!") - credentials = self.sut.get_application_default_credentials() - self.assertIsNone(credentials) - - def test_get_application_default_credentials_returns_credentials(self): - if not _check_if_can_get_correct_default_credentials(): - pytest.skip("Cannot get default_credentials " - "from the environment!") - from oauth2client.client import GoogleCredentials - credentials = self.sut.get_application_default_credentials() - self.assertTrue(isinstance(credentials, GoogleCredentials)) - - -class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): - def setUp(self): - _setup_common() - - _skip_if_no_project_id() - _skip_if_no_private_key_path() - - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - -class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): - def setUp(self): - _setup_common() - - _skip_if_no_project_id() - _skip_if_no_private_key_contents() - - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_contents()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - -class GBQUnitTests(tm.TestCase): - - def setUp(self): - _setup_common() - - def test_import_google_api_python_client(self): - if compat.PY2: - with tm.assertRaises(ImportError): - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - else: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - - def test_should_return_bigquery_integers_as_python_ints(self): - result = gbq._parse_entry(1, 'INTEGER') - tm.assert_equal(result, int(1)) - - def test_should_return_bigquery_floats_as_python_floats(self): - result = gbq._parse_entry(1, 'FLOAT') - tm.assert_equal(result, float(1)) - - def test_should_return_bigquery_timestamps_as_numpy_datetime(self): - result = gbq._parse_entry('0e9', 'TIMESTAMP') - tm.assert_equal(result, np_datetime64_compat('1970-01-01T00:00:00Z')) - - def test_should_return_bigquery_booleans_as_python_booleans(self): - result = gbq._parse_entry('false', 'BOOLEAN') - tm.assert_equal(result, False) - - def test_should_return_bigquery_strings_as_python_strings(self): - result = gbq._parse_entry('STRING', 'STRING') - tm.assert_equal(result, 'STRING') - - def test_to_gbq_should_fail_if_invalid_table_name_passed(self): - with tm.assertRaises(gbq.NotFoundException): - gbq.to_gbq(DataFrame(), 'invalid_table_name', project_id="1234") - - def test_to_gbq_with_no_project_id_given_should_fail(self): - with tm.assertRaises(TypeError): - gbq.to_gbq(DataFrame(), 'dataset.tablename') - - def test_read_gbq_with_no_project_id_given_should_fail(self): - with tm.assertRaises(TypeError): - gbq.read_gbq('SELECT 1') - - def test_that_parse_data_works_properly(self): - test_schema = {'fields': [ - {'mode': 'NULLABLE', 'name': 'valid_string', 'type': 'STRING'}]} - test_page = [{'f': [{'v': 'PI'}]}] - - test_output = gbq._parse_data(test_schema, test_page) - correct_output = DataFrame({'valid_string': ['PI']}) - tm.assert_frame_equal(test_output, correct_output) - - def test_read_gbq_with_invalid_private_key_json_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', private_key='y') - - def test_read_gbq_with_empty_private_key_json_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', private_key='{}') - - def test_read_gbq_with_private_key_json_wrong_types_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq( - 'SELECT 1', project_id='x', - private_key='{ "client_email" : 1, "private_key" : True }') - - def test_read_gbq_with_empty_private_key_file_should_fail(self): - with tm.ensure_clean() as empty_file_path: - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', - private_key=empty_file_path) - - def test_read_gbq_with_corrupted_private_key_json_should_fail(self): - _skip_if_no_private_key_contents() - - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq( - 'SELECT 1', project_id='x', - private_key=re.sub('[a-z]', '9', _get_private_key_contents())) - - -class TestReadGBQIntegration(tm.TestCase): - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - - _setup_common() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - pass - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_should_read_as_user_account(self): - _skip_local_auth_if_in_travis_env() - - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_read_as_service_account_with_key_path(self): - _skip_if_no_private_key_path() - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_read_as_service_account_with_key_contents(self): - _skip_if_no_private_key_contents() - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_contents()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - -class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - _skip_if_no_private_key_path() - - _setup_common() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - pass - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_should_properly_handle_valid_strings(self): - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_properly_handle_empty_strings(self): - query = 'SELECT "" AS empty_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'empty_string': [""]})) - - def test_should_properly_handle_null_strings(self): - query = 'SELECT STRING(NULL) AS null_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_string': [None]})) - - def test_should_properly_handle_valid_integers(self): - query = 'SELECT INTEGER(3) AS valid_integer' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_integer': [3]})) - - def test_should_properly_handle_nullable_integers(self): - query = '''SELECT * FROM - (SELECT 1 AS nullable_integer), - (SELECT NULL AS nullable_integer)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_integer': [1, None]}).astype(object)) - - def test_should_properly_handle_valid_longs(self): - query = 'SELECT 1 << 62 AS valid_long' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'valid_long': [1 << 62]})) - - def test_should_properly_handle_nullable_longs(self): - query = '''SELECT * FROM - (SELECT 1 << 62 AS nullable_long), - (SELECT NULL AS nullable_long)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_long': [1 << 62, None]}).astype(object)) - - def test_should_properly_handle_null_integers(self): - query = 'SELECT INTEGER(NULL) AS null_integer' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_integer': [None]})) - - def test_should_properly_handle_valid_floats(self): - from math import pi - query = 'SELECT PI() AS valid_float' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'valid_float': [pi]})) - - def test_should_properly_handle_nullable_floats(self): - from math import pi - query = '''SELECT * FROM - (SELECT PI() AS nullable_float), - (SELECT NULL AS nullable_float)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_float': [pi, None]})) - - def test_should_properly_handle_valid_doubles(self): - from math import pi - query = 'SELECT PI() * POW(10, 307) AS valid_double' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'valid_double': [pi * 10 ** 307]})) - - def test_should_properly_handle_nullable_doubles(self): - from math import pi - query = '''SELECT * FROM - (SELECT PI() * POW(10, 307) AS nullable_double), - (SELECT NULL AS nullable_double)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_double': [pi * 10 ** 307, None]})) - - def test_should_properly_handle_null_floats(self): - query = 'SELECT FLOAT(NULL) AS null_float' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_float': [np.nan]})) - - def test_should_properly_handle_timestamp_unix_epoch(self): - query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'unix_epoch': [np.datetime64('1970-01-01T00:00:00.000000Z')]})) - - def test_should_properly_handle_arbitrary_timestamp(self): - query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({ - 'valid_timestamp': [np.datetime64('2004-09-15T05:00:00.000000Z')] - })) - - def test_should_properly_handle_null_timestamp(self): - query = 'SELECT TIMESTAMP(NULL) AS null_timestamp' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_timestamp': [NaT]})) - - def test_should_properly_handle_true_boolean(self): - query = 'SELECT BOOLEAN(TRUE) AS true_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'true_boolean': [True]})) - - def test_should_properly_handle_false_boolean(self): - query = 'SELECT BOOLEAN(FALSE) AS false_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'false_boolean': [False]})) - - def test_should_properly_handle_null_boolean(self): - query = 'SELECT BOOLEAN(NULL) AS null_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_boolean': [None]})) - - def test_should_properly_handle_nullable_booleans(self): - query = '''SELECT * FROM - (SELECT BOOLEAN(TRUE) AS nullable_boolean), - (SELECT NULL AS nullable_boolean)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_boolean': [True, None]}).astype(object)) - - def test_unicode_string_conversion_and_normalization(self): - correct_test_datatype = DataFrame( - {'unicode_string': [u("\xe9\xfc")]} - ) - - unicode_string = "\xc3\xa9\xc3\xbc" - - if compat.PY3: - unicode_string = unicode_string.encode('latin-1').decode('utf8') - - query = 'SELECT "{0}" AS unicode_string'.format(unicode_string) - - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, correct_test_datatype) - - def test_index_column(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2" - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col="string_1", - private_key=_get_private_key_path()) - correct_frame = DataFrame( - {'string_1': ['a'], 'string_2': ['b']}).set_index("string_1") - tm.assert_equal(result_frame.index.name, correct_frame.index.name) - - def test_column_order(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" - col_order = ['string_3', 'string_1', 'string_2'] - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - col_order=col_order, - private_key=_get_private_key_path()) - correct_frame = DataFrame({'string_1': ['a'], 'string_2': [ - 'b'], 'string_3': ['c']})[col_order] - tm.assert_frame_equal(result_frame, correct_frame) - - def test_column_order_plus_index(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" - col_order = ['string_3', 'string_2'] - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col='string_1', col_order=col_order, - private_key=_get_private_key_path()) - correct_frame = DataFrame( - {'string_1': ['a'], 'string_2': ['b'], 'string_3': ['c']}) - correct_frame.set_index('string_1', inplace=True) - correct_frame = correct_frame[col_order] - tm.assert_frame_equal(result_frame, correct_frame) - - def test_malformed_query(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - - def test_bad_project_id(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELECT 1", project_id='001', - private_key=_get_private_key_path()) - - def test_bad_table_name(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - - def test_download_dataset_larger_than_200k_rows(self): - test_size = 200005 - # Test for known BigQuery bug in datasets larger than 100k rows - # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results - df = gbq.read_gbq("SELECT id FROM [publicdata:samples.wikipedia] " - "GROUP EACH BY id ORDER BY id ASC LIMIT {0}" - .format(test_size), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), test_size) - - def test_zero_rows(self): - # Bug fix for https://github.com/pandas-dev/pandas/issues/10273 - df = gbq.read_gbq("SELECT title, id, is_bot, " - "SEC_TO_TIMESTAMP(timestamp) ts " - "FROM [publicdata:samples.wikipedia] " - "WHERE timestamp=-9999999", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - page_array = np.zeros( - (0,), dtype=[('title', object), ('id', np.dtype(int)), - ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')]) - expected_result = DataFrame( - page_array, columns=['title', 'id', 'is_bot', 'ts']) - self.assert_frame_equal(df, expected_result) - - def test_legacy_sql(self): - legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10" - - # Test that a legacy sql statement fails when - # setting dialect='standard' - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq(legacy_sql, project_id=_get_project_id(), - dialect='standard', - private_key=_get_private_key_path()) - - # Test that a legacy sql statement succeeds when - # setting dialect='legacy' - df = gbq.read_gbq(legacy_sql, project_id=_get_project_id(), - dialect='legacy', - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), 10) - - def test_standard_sql(self): - standard_sql = "SELECT DISTINCT id FROM " \ - "`publicdata.samples.wikipedia` LIMIT 10" - - # Test that a standard sql statement fails when using - # the legacy SQL dialect (default value) - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq(standard_sql, project_id=_get_project_id(), - private_key=_get_private_key_path()) - - # Test that a standard sql statement succeeds when - # setting dialect='standard' - df = gbq.read_gbq(standard_sql, project_id=_get_project_id(), - dialect='standard', - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), 10) - - def test_invalid_option_for_sql_dialect(self): - sql_statement = "SELECT DISTINCT id FROM " \ - "`publicdata.samples.wikipedia` LIMIT 10" - - # Test that an invalid option for `dialect` raises ValueError - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - dialect='invalid', - private_key=_get_private_key_path()) - - # Test that a correct option for dialect succeeds - # to make sure ValueError was due to invalid dialect - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - dialect='standard', private_key=_get_private_key_path()) - - def test_query_with_parameters(self): - sql_statement = "SELECT @param1 + @param2 AS valid_result" - config = { - 'query': { - "useLegacySql": False, - "parameterMode": "named", - "queryParameters": [ - { - "name": "param1", - "parameterType": { - "type": "INTEGER" - }, - "parameterValue": { - "value": 1 - } - }, - { - "name": "param2", - "parameterType": { - "type": "INTEGER" - }, - "parameterValue": { - "value": 2 - } - } - ] - } - } - # Test that a query that relies on parameters fails - # when parameters are not supplied via configuration - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path()) - - # Test that the query is successful because we have supplied - # the correct query parameters via the 'config' option - df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - tm.assert_frame_equal(df, DataFrame({'valid_result': [3]})) - - def test_query_inside_configuration(self): - query_no_use = 'SELECT "PI_WRONG" AS valid_string' - query = 'SELECT "PI" AS valid_string' - config = { - 'query': { - "query": query, - "useQueryCache": False, - } - } - # Test that it can't pass query both - # inside config and as parameter - with tm.assertRaises(ValueError): - gbq.read_gbq(query_no_use, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - - df = gbq.read_gbq(None, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_configuration_without_query(self): - sql_statement = 'SELECT 1' - config = { - 'copy': { - "sourceTable": { - "projectId": _get_project_id(), - "datasetId": "publicdata:samples", - "tableId": "wikipedia" - }, - "destinationTable": { - "projectId": _get_project_id(), - "datasetId": "publicdata:samples", - "tableId": "wikipedia_copied" - }, - } - } - # Test that only 'query' configurations are supported - # nor 'copy','load','extract' - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - - -class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added See `Issue 191 - # `__ - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - _skip_if_no_private_key_path() - - _setup_common() - clean_gbq_environment(_get_private_key_path()) - - gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path() - ).create(DATASET_ID + "1") - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - - self.dataset = gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path()) - self.table = gbq._Table(_get_project_id(), DATASET_ID + "1", - private_key=_get_private_key_path()) - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - - clean_gbq_environment(_get_private_key_path()) - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_upload_data(self): - destination_table = DESTINATION_TABLE + "1" - - test_size = 20001 - df = make_mixed_dataframe_v2(test_size) - - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], test_size) - - def test_upload_data_if_table_exists_fail(self): - destination_table = DESTINATION_TABLE + "2" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - self.table.create(TABLE_ID + "2", gbq._generate_bq_schema(df)) - - # Test the default value of if_exists is 'fail' - with tm.assertRaises(gbq.TableCreationError): - gbq.to_gbq(df, destination_table, _get_project_id(), - private_key=_get_private_key_path()) - - # Test the if_exists parameter with value 'fail' - with tm.assertRaises(gbq.TableCreationError): - gbq.to_gbq(df, destination_table, _get_project_id(), - if_exists='fail', private_key=_get_private_key_path()) - - def test_upload_data_if_table_exists_append(self): - destination_table = DESTINATION_TABLE + "3" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - df_different_schema = tm.makeMixedDataFrame() - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - # Test the if_exists parameter with value 'append' - gbq.to_gbq(df, destination_table, _get_project_id(), - if_exists='append', private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], test_size * 2) - - # Try inserting with a different schema, confirm failure - with tm.assertRaises(gbq.InvalidSchema): - gbq.to_gbq(df_different_schema, destination_table, - _get_project_id(), if_exists='append', - private_key=_get_private_key_path()) - - def test_upload_data_if_table_exists_replace(self): - - pytest.skip("buggy test") - - destination_table = DESTINATION_TABLE + "4" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - df_different_schema = tm.makeMixedDataFrame() - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - # Test the if_exists parameter with the value 'replace'. - gbq.to_gbq(df_different_schema, destination_table, - _get_project_id(), if_exists='replace', - private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], 5) - - @tm.slow - def test_google_upload_errors_should_raise_exception(self): - destination_table = DESTINATION_TABLE + "5" - - test_timestamp = datetime.now(pytz.timezone('US/Arizona')) - bad_df = DataFrame({'bools': [False, False], 'flts': [0.0, 1.0], - 'ints': [0, '1'], 'strs': ['a', 1], - 'times': [test_timestamp, test_timestamp]}, - index=range(2)) - - with tm.assertRaises(gbq.StreamingInsertError): - gbq.to_gbq(bad_df, destination_table, _get_project_id(), - verbose=True, private_key=_get_private_key_path()) - - def test_generate_schema(self): - df = tm.makeMixedDataFrame() - schema = gbq._generate_bq_schema(df) - - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.assertEqual(schema, test_schema) - - def test_create_table(self): - destination_table = TABLE_ID + "6" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.assertTrue(self.table.exists(destination_table), - 'Expected table to exist') - - def test_table_does_not_exist(self): - self.assertTrue(not self.table.exists(TABLE_ID + "7"), - 'Expected table not to exist') - - def test_delete_table(self): - destination_table = TABLE_ID + "8" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.table.delete(destination_table) - self.assertTrue(not self.table.exists( - destination_table), 'Expected table not to exist') - - def test_list_table(self): - destination_table = TABLE_ID + "9" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.assertTrue( - destination_table in self.dataset.tables(DATASET_ID + "1"), - 'Expected table list to contain table {0}' - .format(destination_table)) - - def test_verify_schema_allows_flexible_column_order(self): - destination_table = TABLE_ID + "10" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertTrue(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected schema to match') - - def test_verify_schema_fails_different_data_type(self): - destination_table = TABLE_ID + "11" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'STRING'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertFalse(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected different schema') - - def test_verify_schema_fails_different_structure(self): - destination_table = TABLE_ID + "12" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B2', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertFalse(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected different schema') - - def test_upload_data_flexible_column_order(self): - destination_table = DESTINATION_TABLE + "13" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - df_columns_reversed = df[df.columns[::-1]] - - gbq.to_gbq(df_columns_reversed, destination_table, _get_project_id(), - if_exists='append', private_key=_get_private_key_path()) - - def test_list_dataset(self): - dataset_id = DATASET_ID + "1" - self.assertTrue(dataset_id in self.dataset.datasets(), - 'Expected dataset list to contain dataset {0}' - .format(dataset_id)) - - def test_list_table_zero_results(self): - dataset_id = DATASET_ID + "2" - self.dataset.create(dataset_id) - table_list = gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path() - ).tables(dataset_id) - self.assertEqual(len(table_list), 0, - 'Expected gbq.list_table() to return 0') - - def test_create_dataset(self): - dataset_id = DATASET_ID + "3" - self.dataset.create(dataset_id) - self.assertTrue(dataset_id in self.dataset.datasets(), - 'Expected dataset to exist') - - def test_delete_dataset(self): - dataset_id = DATASET_ID + "4" - self.dataset.create(dataset_id) - self.dataset.delete(dataset_id) - self.assertTrue(dataset_id not in self.dataset.datasets(), - 'Expected dataset not to exist') - - def test_dataset_exists(self): - dataset_id = DATASET_ID + "5" - self.dataset.create(dataset_id) - self.assertTrue(self.dataset.exists(dataset_id), - 'Expected dataset to exist') - - def create_table_data_dataset_does_not_exist(self): - dataset_id = DATASET_ID + "6" - table_id = TABLE_ID + "1" - table_with_new_dataset = gbq._Table(_get_project_id(), dataset_id) - df = make_mixed_dataframe_v2(10) - table_with_new_dataset.create(table_id, gbq._generate_bq_schema(df)) - self.assertTrue(self.dataset.exists(dataset_id), - 'Expected dataset to exist') - self.assertTrue(table_with_new_dataset.exists( - table_id), 'Expected dataset to exist') - - def test_dataset_does_not_exist(self): - self.assertTrue(not self.dataset.exists( - DATASET_ID + "_not_found"), 'Expected dataset not to exist') - - -class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added - # See `Issue 191 - # `__ - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - _skip_local_auth_if_in_travis_env() - - _setup_common() - clean_gbq_environment() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test - # is executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - - clean_gbq_environment() - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test - # is executed. - pass - - def test_upload_data(self): - destination_table = "{0}.{1}".format(DATASET_ID + "2", TABLE_ID + "1") - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), - project_id=_get_project_id()) - - self.assertEqual(result['num_rows'][0], test_size) - - -class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added - # See `Issue 191 - # `__ - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _setup_common() - _skip_if_no_project_id() - _skip_if_no_private_key_contents() - - clean_gbq_environment(_get_private_key_contents()) - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test - # is executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - - clean_gbq_environment(_get_private_key_contents()) - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test - # is executed. - pass - - def test_upload_data(self): - destination_table = "{0}.{1}".format(DATASET_ID + "3", TABLE_ID + "1") - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_contents()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_contents()) - self.assertEqual(result['num_rows'][0], test_size) diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py deleted file mode 100644 index 588b2d5f04888..0000000000000 --- a/pandas/io/tests/test_pickle.py +++ /dev/null @@ -1,285 +0,0 @@ -# pylint: disable=E1101,E1103,W0232 - -""" manage legacy pickle tests """ - -import pytest -import os - -from distutils.version import LooseVersion - -import pandas as pd -from pandas import Index -from pandas.compat import u, is_platform_little_endian -import pandas -import pandas.util.testing as tm -from pandas.tseries.offsets import Day, MonthEnd - - -class TestPickle(): - """ - How to add pickle tests: - - 1. Install pandas version intended to output the pickle. - - 2. Execute "generate_legacy_storage_files.py" to create the pickle. - $ python generate_legacy_storage_files.py pickle - - 3. Move the created pickle to "data/legacy_pickle/" directory. - - NOTE: TestPickle can't be a subclass of tm.Testcase to use test generator. - http://stackoverflow.com/questions/6689537/ - nose-test-generators-inside-class - """ - - @classmethod - def setup_class(cls): - from pandas.io.tests.generate_legacy_storage_files import ( - create_pickle_data) - cls.data = create_pickle_data() - cls.path = u('__%s__.pickle' % tm.rands(10)) - - def compare_element(self, result, expected, typ, version=None): - if isinstance(expected, Index): - tm.assert_index_equal(expected, result) - return - - if typ.startswith('sp_'): - comparator = getattr(tm, "assert_%s_equal" % typ) - comparator(result, expected, exact_indices=False) - elif typ == 'timestamp': - if expected is pd.NaT: - assert result is pd.NaT - else: - tm.assert_equal(result, expected) - tm.assert_equal(result.freq, expected.freq) - else: - comparator = getattr(tm, "assert_%s_equal" % - typ, tm.assert_almost_equal) - comparator(result, expected) - - def compare(self, vf, version): - - # py3 compat when reading py2 pickle - try: - data = pandas.read_pickle(vf) - except (ValueError) as e: - if 'unsupported pickle protocol:' in str(e): - # trying to read a py3 pickle in py2 - return - else: - raise - - for typ, dv in data.items(): - for dt, result in dv.items(): - try: - expected = self.data[typ][dt] - except (KeyError): - if version in ('0.10.1', '0.11.0') and dt == 'reg': - break - else: - raise - - # use a specific comparator - # if available - comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt) - comparator = getattr(self, comparator, self.compare_element) - comparator(result, expected, typ, version) - return data - - def compare_sp_series_ts(self, res, exp, typ, version): - # SparseTimeSeries integrated into SparseSeries in 0.12.0 - # and deprecated in 0.17.0 - if version and LooseVersion(version) <= "0.12.0": - tm.assert_sp_series_equal(res, exp, check_series_type=False) - else: - tm.assert_sp_series_equal(res, exp) - - def compare_series_ts(self, result, expected, typ, version): - # GH 7748 - tm.assert_series_equal(result, expected) - tm.assert_equal(result.index.freq, expected.index.freq) - tm.assert_equal(result.index.freq.normalize, False) - tm.assert_series_equal(result > 0, expected > 0) - - # GH 9291 - freq = result.index.freq - tm.assert_equal(freq + Day(1), Day(2)) - - res = freq + pandas.Timedelta(hours=1) - tm.assert_equal(isinstance(res, pandas.Timedelta), True) - tm.assert_equal(res, pandas.Timedelta(days=1, hours=1)) - - res = freq + pandas.Timedelta(nanoseconds=1) - tm.assert_equal(isinstance(res, pandas.Timedelta), True) - tm.assert_equal(res, pandas.Timedelta(days=1, nanoseconds=1)) - - def compare_series_dt_tz(self, result, expected, typ, version): - # 8260 - # dtype is object < 0.17.0 - if LooseVersion(version) < '0.17.0': - expected = expected.astype(object) - tm.assert_series_equal(result, expected) - else: - tm.assert_series_equal(result, expected) - - def compare_series_cat(self, result, expected, typ, version): - # Categorical dtype is added in 0.15.0 - # ordered is changed in 0.16.0 - if LooseVersion(version) < '0.15.0': - tm.assert_series_equal(result, expected, check_dtype=False, - check_categorical=False) - elif LooseVersion(version) < '0.16.0': - tm.assert_series_equal(result, expected, check_categorical=False) - else: - tm.assert_series_equal(result, expected) - - def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): - # 8260 - # dtype is object < 0.17.0 - if LooseVersion(version) < '0.17.0': - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) - - def compare_frame_cat_onecol(self, result, expected, typ, version): - # Categorical dtype is added in 0.15.0 - # ordered is changed in 0.16.0 - if LooseVersion(version) < '0.15.0': - tm.assert_frame_equal(result, expected, check_dtype=False, - check_categorical=False) - elif LooseVersion(version) < '0.16.0': - tm.assert_frame_equal(result, expected, check_categorical=False) - else: - tm.assert_frame_equal(result, expected) - - def compare_frame_cat_and_float(self, result, expected, typ, version): - self.compare_frame_cat_onecol(result, expected, typ, version) - - def compare_index_period(self, result, expected, typ, version): - tm.assert_index_equal(result, expected) - tm.assertIsInstance(result.freq, MonthEnd) - tm.assert_equal(result.freq, MonthEnd()) - tm.assert_equal(result.freqstr, 'M') - tm.assert_index_equal(result.shift(2), expected.shift(2)) - - def compare_sp_frame_float(self, result, expected, typ, version): - if LooseVersion(version) <= '0.18.1': - tm.assert_sp_frame_equal(result, expected, exact_indices=False, - check_dtype=False) - else: - tm.assert_sp_frame_equal(result, expected) - - def read_pickles(self, version): - if not is_platform_little_endian(): - pytest.skip("known failure on non-little endian") - - pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version))) - n = 0 - for f in os.listdir(pth): - vf = os.path.join(pth, f) - data = self.compare(vf, version) - - if data is None: - continue - n += 1 - assert n > 0, 'Pickle files are not tested' - - def test_pickles(self): - pickle_path = tm.get_data_path('legacy_pickle') - n = 0 - for v in os.listdir(pickle_path): - pth = os.path.join(pickle_path, v) - if os.path.isdir(pth): - yield self.read_pickles, v - n += 1 - assert n > 0, 'Pickle files are not tested' - - def test_round_trip_current(self): - - try: - import cPickle as c_pickle - - def c_pickler(obj, path): - with open(path, 'wb') as fh: - c_pickle.dump(obj, fh, protocol=-1) - - def c_unpickler(path): - with open(path, 'rb') as fh: - fh.seek(0) - return c_pickle.load(fh) - except: - c_pickler = None - c_unpickler = None - - import pickle as python_pickle - - def python_pickler(obj, path): - with open(path, 'wb') as fh: - python_pickle.dump(obj, fh, protocol=-1) - - def python_unpickler(path): - with open(path, 'rb') as fh: - fh.seek(0) - return python_pickle.load(fh) - - for typ, dv in self.data.items(): - for dt, expected in dv.items(): - - for writer in [pd.to_pickle, c_pickler, python_pickler]: - if writer is None: - continue - - with tm.ensure_clean(self.path) as path: - - # test writing with each pickler - writer(expected, path) - - # test reading with each unpickler - result = pd.read_pickle(path) - self.compare_element(result, expected, typ) - - if c_unpickler is not None: - result = c_unpickler(path) - self.compare_element(result, expected, typ) - - result = python_unpickler(path) - self.compare_element(result, expected, typ) - - def test_pickle_v0_14_1(self): - - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_14_1.pickle') - # This code was executed once on v0.14.1 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - - def test_pickle_v0_15_2(self): - # ordered -> _ordered - # GH 9347 - - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_15_2.pickle') - # This code was executed once on v0.15.2 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) diff --git a/pandas/io/tests/test_s3.py b/pandas/io/tests/test_s3.py deleted file mode 100644 index 2983fa647445c..0000000000000 --- a/pandas/io/tests/test_s3.py +++ /dev/null @@ -1,10 +0,0 @@ -from pandas.util import testing as tm - -from pandas.io.common import _is_s3_url - - -class TestS3URL(tm.TestCase): - - def test_is_s3_url(self): - self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com")) - self.assertFalse(_is_s3_url("s4://pandas/somethingelse.com")) diff --git a/pandas/io/wb.py b/pandas/io/wb.py deleted file mode 100644 index 5dc4d9ce1adc4..0000000000000 --- a/pandas/io/wb.py +++ /dev/null @@ -1,6 +0,0 @@ -raise ImportError( - "The pandas.io.wb module is moved to a separate package " - "(pandas-datareader). After installing the pandas-datareader package " - "(https://github.com/pydata/pandas-datareader), you can change " - "the import ``from pandas.io import data, wb`` to " - "``from pandas_datareader import data, wb``.") diff --git a/pandas/json.py b/pandas/json.py new file mode 100644 index 0000000000000..16d6580c87951 --- /dev/null +++ b/pandas/json.py @@ -0,0 +1,7 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.json module is deprecated and will be " + "removed in a future version. Please import from " + "pandas.io.json instead", FutureWarning, stacklevel=2) +from pandas._libs.json import dumps, loads diff --git a/pandas/lib.pxd b/pandas/lib.pxd deleted file mode 100644 index 554b0248e97ea..0000000000000 --- a/pandas/lib.pxd +++ /dev/null @@ -1,4 +0,0 @@ -# prototypes for sharing - -cdef bint is_null_datetimelike(v) -cpdef bint is_period(val) diff --git a/pandas/lib.py b/pandas/lib.py new file mode 100644 index 0000000000000..859a78060fcc1 --- /dev/null +++ b/pandas/lib.py @@ -0,0 +1,8 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.lib module is deprecated and will be " + "removed in a future version. These are private functions " + "and can be accessed from pandas._libs.lib instead", + FutureWarning, stacklevel=2) +from pandas._libs.lib import * diff --git a/pandas/lib.pyx b/pandas/lib.pyx deleted file mode 100644 index b4724bc3dd59b..0000000000000 --- a/pandas/lib.pyx +++ /dev/null @@ -1,1968 +0,0 @@ -# cython: profile=False -cimport numpy as np -cimport cython -import numpy as np -import sys -cdef bint PY3 = (sys.version_info[0] >= 3) - -from numpy cimport * - -np.import_array() - -cdef extern from "numpy/arrayobject.h": - cdef enum NPY_TYPES: - NPY_intp "NPY_INTP" - - -from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem, - PyDict_Contains, PyDict_Keys, - Py_INCREF, PyTuple_SET_ITEM, - PyList_Check, PyFloat_Check, - PyString_Check, - PyBytes_Check, - PyTuple_SetItem, - PyTuple_New, - PyObject_SetAttrString, - PyObject_RichCompareBool, - PyBytes_GET_SIZE, - PyUnicode_GET_SIZE) - -try: - from cpython cimport PyString_GET_SIZE -except ImportError: - from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE - -cdef extern from "Python.h": - Py_ssize_t PY_SSIZE_T_MAX - - ctypedef struct PySliceObject: - pass - - cdef int PySlice_GetIndicesEx( - PySliceObject* s, Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, - Py_ssize_t *slicelength) except -1 - -cimport cpython - -isnan = np.isnan -cdef double NaN = np.NaN -cdef double nan = NaN -cdef double NAN = nan - -from datetime import datetime as pydatetime - -# this is our tseries.pxd -from datetime cimport * - -from tslib cimport (convert_to_tsobject, convert_to_timedelta64, - _check_all_nulls) -import tslib -from tslib import NaT, Timestamp, Timedelta - -cdef int64_t NPY_NAT = util.get_nat() - -ctypedef unsigned char UChar - -cimport util -from util cimport (is_array, _checknull, _checknan, INT64_MAX, - INT64_MIN, UINT8_MAX) - -cdef extern from "math.h": - double sqrt(double x) - double fabs(double) - -# import datetime C API -PyDateTime_IMPORT - -# initialize numpy -import_array() -import_ufunc() - - -def values_from_object(object o): - """ return my values or the object if we are say an ndarray """ - cdef f - - f = getattr(o, 'get_values', None) - if f is not None: - o = f() - - return o - -cpdef map_indices_list(list index): - """ - Produce a dict mapping the values of the input array to their respective - locations. - - Example: - array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} - - Better to do this with Cython because of the enormous speed boost. - """ - cdef Py_ssize_t i, length - cdef dict result = {} - - length = len(index) - - for i from 0 <= i < length: - result[index[i]] = i - - return result - - -from libc.stdlib cimport malloc, free - - -def ismember_nans(float64_t[:] arr, set values, bint hasnans): - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - float64_t val - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - val = arr[i] - result[i] = val in values or hasnans and isnan(val) - - return result.view(np.bool_) - - -def ismember(ndarray arr, set values): - """ - Checks whether - - Parameters - ---------- - arr : ndarray - values : set - - Returns - ------- - ismember : ndarray (boolean dtype) - """ - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - object val - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - val = util.get_value_at(arr, i) - result[i] = val in values - - return result.view(np.bool_) - - -def ismember_int64(ndarray[int64_t] arr, set values): - """ - Checks whether - - Parameters - ---------- - arr : ndarray of int64 - values : set - - Returns - ------- - ismember : ndarray (boolean dtype) - """ - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - int64_t v - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - result[i] = arr[i] in values - - return result.view(np.bool_) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def memory_usage_of_objects(ndarray[object, ndim=1] arr): - """ return the memory usage of an object array in bytes, - does not include the actual bytes of the pointers """ - cdef Py_ssize_t i, n - cdef int64_t s = 0 - - n = len(arr) - for i from 0 <= i < n: - s += arr[i].__sizeof__() - return s - -#---------------------------------------------------------------------- -# datetime / io related - -cdef int _EPOCH_ORD = 719163 - -from datetime import date as pydate - -cdef inline int64_t gmtime(object date): - cdef int y, m, d, h, mn, s, days - - y = PyDateTime_GET_YEAR(date) - m = PyDateTime_GET_MONTH(date) - d = PyDateTime_GET_DAY(date) - h = PyDateTime_DATE_GET_HOUR(date) - mn = PyDateTime_DATE_GET_MINUTE(date) - s = PyDateTime_DATE_GET_SECOND(date) - - days = pydate(y, m, 1).toordinal() - _EPOCH_ORD + d - 1 - return (( (((days * 24 + h) * 60 + mn))) * 60 + s) * 1000 - - -cpdef object to_datetime(int64_t timestamp): - return pydatetime.utcfromtimestamp(timestamp / 1000.0) - - -cpdef object to_timestamp(object dt): - return gmtime(dt) - - -def array_to_timestamp(ndarray[object, ndim=1] arr): - cdef int i, n - cdef ndarray[int64_t, ndim=1] result - - n = len(arr) - result = np.empty(n, dtype=np.int64) - - for i from 0 <= i < n: - result[i] = gmtime(arr[i]) - - return result - - -def time64_to_datetime(ndarray[int64_t, ndim=1] arr): - cdef int i, n - cdef ndarray[object, ndim=1] result - - n = len(arr) - result = np.empty(n, dtype=object) - - for i from 0 <= i < n: - result[i] = to_datetime(arr[i]) - - return result - - -#---------------------------------------------------------------------- -# isnull / notnull related - -cdef double INF = np.inf -cdef double NEGINF = -INF - - -cpdef bint checknull(object val): - if util.is_float_object(val) or util.is_complex_object(val): - return val != val # and val != INF and val != NEGINF - elif util.is_datetime64_object(val): - return get_datetime64_value(val) == NPY_NAT - elif val is NaT: - return True - elif util.is_timedelta64_object(val): - return get_timedelta64_value(val) == NPY_NAT - elif is_array(val): - return False - else: - return _checknull(val) - - -cpdef bint checknull_old(object val): - if util.is_float_object(val) or util.is_complex_object(val): - return val != val or val == INF or val == NEGINF - elif util.is_datetime64_object(val): - return get_datetime64_value(val) == NPY_NAT - elif val is NaT: - return True - elif util.is_timedelta64_object(val): - return get_timedelta64_value(val) == NPY_NAT - elif is_array(val): - return False - else: - return util._checknull(val) - - -cpdef bint isposinf_scalar(object val): - if util.is_float_object(val) and val == INF: - return True - else: - return False - - -cpdef bint isneginf_scalar(object val): - if util.is_float_object(val) and val == NEGINF: - return True - else: - return False - - -cpdef bint isscalar(object val): - """ - Return True if given value is scalar. - - This includes: - - numpy array scalar (e.g. np.int64) - - Python builtin numerics - - Python builtin byte arrays and strings - - None - - instances of datetime.datetime - - instances of datetime.timedelta - - Period - - instances of decimal.Decimal - - """ - - return (np.PyArray_IsAnyScalar(val) - # As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3. - or PyBytes_Check(val) - # We differ from numpy (as of 1.10), which claims that None is - # not scalar in np.isscalar(). - or val is None - or PyDate_Check(val) - or PyDelta_Check(val) - or PyTime_Check(val) - or util.is_period_object(val) - or is_decimal(val)) - - -def item_from_zerodim(object val): - """ - If the value is a zerodim array, return the item it contains. - - Examples - -------- - >>> item_from_zerodim(1) - 1 - >>> item_from_zerodim('foobar') - 'foobar' - >>> item_from_zerodim(np.array(1)) - 1 - >>> item_from_zerodim(np.array([1])) - array([1]) - - """ - return util.unbox_if_zerodim(val) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def isnullobj(ndarray arr): - cdef Py_ssize_t i, n - cdef object val - cdef ndarray[uint8_t] result - - assert arr.ndim == 1, "'arr' must be 1-D." - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i from 0 <= i < n: - val = arr[i] - result[i] = _check_all_nulls(val) - return result.view(np.bool_) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def isnullobj_old(ndarray arr): - cdef Py_ssize_t i, n - cdef object val - cdef ndarray[uint8_t] result - - assert arr.ndim == 1, "'arr' must be 1-D." - - n = len(arr) - result = np.zeros(n, dtype=np.uint8) - for i from 0 <= i < n: - val = arr[i] - result[i] = val is NaT or util._checknull_old(val) - return result.view(np.bool_) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def isnullobj2d(ndarray arr): - cdef Py_ssize_t i, j, n, m - cdef object val - cdef ndarray[uint8_t, ndim=2] result - - assert arr.ndim == 2, "'arr' must be 2-D." - - n, m = ( arr).shape - result = np.zeros((n, m), dtype=np.uint8) - for i from 0 <= i < n: - for j from 0 <= j < m: - val = arr[i, j] - if checknull(val): - result[i, j] = 1 - return result.view(np.bool_) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def isnullobj2d_old(ndarray arr): - cdef Py_ssize_t i, j, n, m - cdef object val - cdef ndarray[uint8_t, ndim=2] result - - assert arr.ndim == 2, "'arr' must be 2-D." - - n, m = ( arr).shape - result = np.zeros((n, m), dtype=np.uint8) - for i from 0 <= i < n: - for j from 0 <= j < m: - val = arr[i, j] - if checknull_old(val): - result[i, j] = 1 - return result.view(np.bool_) - - -@cython.wraparound(False) -@cython.boundscheck(False) -cpdef ndarray[object] list_to_object_array(list obj): - """ - Convert list to object ndarray. Seriously can\'t believe - I had to write this function. - """ - cdef: - Py_ssize_t i, n = len(obj) - ndarray[object] arr = np.empty(n, dtype=object) - - for i in range(n): - arr[i] = obj[i] - - return arr - - -@cython.wraparound(False) -@cython.boundscheck(False) -def fast_unique(ndarray[object] values): - cdef: - Py_ssize_t i, n = len(values) - list uniques = [] - dict table = {} - object val, stub = 0 - - for i from 0 <= i < n: - val = values[i] - if val not in table: - table[val] = stub - uniques.append(val) - try: - uniques.sort() - except Exception: - pass - - return uniques - - -@cython.wraparound(False) -@cython.boundscheck(False) -def fast_unique_multiple(list arrays): - cdef: - ndarray[object] buf - Py_ssize_t k = len(arrays) - Py_ssize_t i, j, n - list uniques = [] - dict table = {} - object val, stub = 0 - - for i from 0 <= i < k: - buf = arrays[i] - n = len(buf) - for j from 0 <= j < n: - val = buf[j] - if val not in table: - table[val] = stub - uniques.append(val) - try: - uniques.sort() - except Exception: - pass - - return uniques - - -@cython.wraparound(False) -@cython.boundscheck(False) -def fast_unique_multiple_list(list lists): - cdef: - list buf - Py_ssize_t k = len(lists) - Py_ssize_t i, j, n - list uniques = [] - dict table = {} - object val, stub = 0 - - for i from 0 <= i < k: - buf = lists[i] - n = len(buf) - for j from 0 <= j < n: - val = buf[j] - if val not in table: - table[val] = stub - uniques.append(val) - try: - uniques.sort() - except Exception: - pass - - return uniques - - -@cython.wraparound(False) -@cython.boundscheck(False) -def fast_unique_multiple_list_gen(object gen, bint sort=True): - """ - Generate a list of unique values from a generator of lists. - - Parameters - ---------- - gen : generator object - A generator of lists from which the unique list is created - sort : boolean - Whether or not to sort the resulting unique list - - Returns - ------- - unique_list : list of unique values - """ - cdef: - list buf - Py_ssize_t j, n - list uniques = [] - dict table = {} - object val, stub = 0 - - for buf in gen: - n = len(buf) - for j from 0 <= j < n: - val = buf[j] - if val not in table: - table[val] = stub - uniques.append(val) - if sort: - try: - uniques.sort() - except Exception: - pass - - return uniques - - -@cython.wraparound(False) -@cython.boundscheck(False) -def dicts_to_array(list dicts, list columns): - cdef: - Py_ssize_t i, j, k, n - ndarray[object, ndim=2] result - dict row - object col, onan = np.nan - - k = len(columns) - n = len(dicts) - - result = np.empty((n, k), dtype='O') - - for i in range(n): - row = dicts[i] - for j in range(k): - col = columns[j] - if col in row: - result[i, j] = row[col] - else: - result[i, j] = onan - - return result - - -def fast_zip(list ndarrays): - """ - For zipping multiple ndarrays into an ndarray of tuples - """ - cdef: - Py_ssize_t i, j, k, n - ndarray[object] result - flatiter it - object val, tup - - k = len(ndarrays) - n = len(ndarrays[0]) - - result = np.empty(n, dtype=object) - - # initialize tuples on first pass - arr = ndarrays[0] - it = PyArray_IterNew(arr) - for i in range(n): - val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) - tup = PyTuple_New(k) - - PyTuple_SET_ITEM(tup, 0, val) - Py_INCREF(val) - result[i] = tup - PyArray_ITER_NEXT(it) - - for j in range(1, k): - arr = ndarrays[j] - it = PyArray_IterNew(arr) - if len(arr) != n: - raise ValueError('all arrays must be same length') - - for i in range(n): - val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) - PyTuple_SET_ITEM(result[i], j, val) - Py_INCREF(val) - PyArray_ITER_NEXT(it) - - return result - - -def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): - """ - Reverse indexing operation. - - Given `indexer`, make `indexer_inv` of it, such that:: - - indexer_inv[indexer[x]] = x - - .. note:: If indexer is not unique, only first occurrence is accounted. - - """ - - cdef: - Py_ssize_t i, n = len(indexer) - ndarray[int64_t] rev_indexer - int64_t idx - - rev_indexer = np.empty(length, dtype=np.int64) - rev_indexer.fill(-1) - for i in range(n): - idx = indexer[i] - if idx != -1: - rev_indexer[idx] = i - - return rev_indexer - - -def has_infs_f4(ndarray[float32_t] arr): - cdef: - Py_ssize_t i, n = len(arr) - float32_t inf, neginf, val - - inf = np.inf - neginf = -inf - - for i in range(n): - val = arr[i] - if val == inf or val == neginf: - return True - return False - - -def has_infs_f8(ndarray[float64_t] arr): - cdef: - Py_ssize_t i, n = len(arr) - float64_t inf, neginf, val - - inf = np.inf - neginf = -inf - - for i in range(n): - val = arr[i] - if val == inf or val == neginf: - return True - return False - - -def convert_timestamps(ndarray values): - cdef: - object val, f, result - dict cache = {} - Py_ssize_t i, n = len(values) - ndarray[object] out - - # for HDFStore, a bit temporary but... - - from datetime import datetime - f = datetime.fromtimestamp - - out = np.empty(n, dtype='O') - - for i in range(n): - val = util.get_value_1d(values, i) - if val in cache: - out[i] = cache[val] - else: - cache[val] = out[i] = f(val) - - return out - - -def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len): - cdef: - Py_ssize_t i, n = len(indices) - int k, vstart, vlast, v - - if n == 0: - return slice(0, 0) - - vstart = indices[0] - if vstart < 0 or max_len <= vstart: - return indices - - if n == 1: - return slice(vstart, vstart + 1) - - vlast = indices[n - 1] - if vlast < 0 or max_len <= vlast: - return indices - - k = indices[1] - indices[0] - if k == 0: - return indices - else: - for i in range(2, n): - v = indices[i] - if v - indices[i - 1] != k: - return indices - - if k > 0: - return slice(vstart, vlast + 1, k) - else: - if vlast == 0: - return slice(vstart, None, k) - else: - return slice(vstart, vlast - 1, k) - - -def maybe_booleans_to_slice(ndarray[uint8_t] mask): - cdef: - Py_ssize_t i, n = len(mask) - Py_ssize_t start, end - bint started = 0, finished = 0 - - for i in range(n): - if mask[i]: - if finished: - return mask.view(np.bool_) - if not started: - started = 1 - start = i - else: - if finished: - continue - - if started: - end = i - finished = 1 - - if not started: - return slice(0, 0) - if not finished: - return slice(start, None) - else: - return slice(start, end) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def scalar_compare(ndarray[object] values, object val, object op): - import operator - cdef: - Py_ssize_t i, n = len(values) - ndarray[uint8_t, cast=True] result - bint isnull_val - int flag - object x - - if op is operator.lt: - flag = cpython.Py_LT - elif op is operator.le: - flag = cpython.Py_LE - elif op is operator.gt: - flag = cpython.Py_GT - elif op is operator.ge: - flag = cpython.Py_GE - elif op is operator.eq: - flag = cpython.Py_EQ - elif op is operator.ne: - flag = cpython.Py_NE - else: - raise ValueError('Unrecognized operator') - - result = np.empty(n, dtype=bool).view(np.uint8) - isnull_val = checknull(val) - - if flag == cpython.Py_NE: - for i in range(n): - x = values[i] - if checknull(x): - result[i] = True - elif isnull_val: - result[i] = True - else: - try: - result[i] = cpython.PyObject_RichCompareBool(x, val, flag) - except (TypeError): - result[i] = True - elif flag == cpython.Py_EQ: - for i in range(n): - x = values[i] - if checknull(x): - result[i] = False - elif isnull_val: - result[i] = False - else: - try: - result[i] = cpython.PyObject_RichCompareBool(x, val, flag) - except (TypeError): - result[i] = False - - else: - for i in range(n): - x = values[i] - if checknull(x): - result[i] = False - elif isnull_val: - result[i] = False - else: - result[i] = cpython.PyObject_RichCompareBool(x, val, flag) - - return result.view(bool) - - -@cython.wraparound(False) -@cython.boundscheck(False) -cpdef bint array_equivalent_object(object[:] left, object[:] right): - """ perform an element by element comparion on 1-d object arrays - taking into account nan positions """ - cdef: - Py_ssize_t i, n = left.shape[0] - object x, y - - for i in range(n): - x = left[i] - y = right[i] - - # we are either not equal or both nan - # I think None == None will be true here - if not (PyObject_RichCompareBool(x, y, cpython.Py_EQ) or - _checknull(x) and _checknull(y)): - return False - return True - - -@cython.wraparound(False) -@cython.boundscheck(False) -def vec_compare(ndarray[object] left, ndarray[object] right, object op): - import operator - cdef: - Py_ssize_t i, n = len(left) - ndarray[uint8_t, cast=True] result - int flag - - if n != len(right): - raise ValueError('Arrays were different lengths: %d vs %d' - % (n, len(right))) - - if op is operator.lt: - flag = cpython.Py_LT - elif op is operator.le: - flag = cpython.Py_LE - elif op is operator.gt: - flag = cpython.Py_GT - elif op is operator.ge: - flag = cpython.Py_GE - elif op is operator.eq: - flag = cpython.Py_EQ - elif op is operator.ne: - flag = cpython.Py_NE - else: - raise ValueError('Unrecognized operator') - - result = np.empty(n, dtype=bool).view(np.uint8) - - if flag == cpython.Py_NE: - for i in range(n): - x = left[i] - y = right[i] - - if checknull(x) or checknull(y): - result[i] = True - else: - result[i] = cpython.PyObject_RichCompareBool(x, y, flag) - else: - for i in range(n): - x = left[i] - y = right[i] - - if checknull(x) or checknull(y): - result[i] = False - else: - result[i] = cpython.PyObject_RichCompareBool(x, y, flag) - - return result.view(bool) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def scalar_binop(ndarray[object] values, object val, object op): - cdef: - Py_ssize_t i, n = len(values) - ndarray[object] result - object x - - result = np.empty(n, dtype=object) - if util._checknull(val): - result.fill(val) - return result - - for i in range(n): - x = values[i] - if util._checknull(x): - result[i] = x - else: - result[i] = op(x, val) - - return maybe_convert_bool(result) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def vec_binop(ndarray[object] left, ndarray[object] right, object op): - cdef: - Py_ssize_t i, n = len(left) - ndarray[object] result - - if n != len(right): - raise ValueError('Arrays were different lengths: %d vs %d' - % (n, len(right))) - - result = np.empty(n, dtype=object) - - for i in range(n): - x = left[i] - y = right[i] - try: - result[i] = op(x, y) - except TypeError: - if util._checknull(x): - result[i] = x - elif util._checknull(y): - result[i] = y - else: - raise - - return maybe_convert_bool(result) - - -def astype_intsafe(ndarray[object] arr, new_dtype): - cdef: - Py_ssize_t i, n = len(arr) - object v - bint is_datelike - ndarray result - - # on 32-bit, 1.6.2 numpy M8[ns] is a subdtype of integer, which is weird - is_datelike = new_dtype in ['M8[ns]', 'm8[ns]'] - - result = np.empty(n, dtype=new_dtype) - for i in range(n): - v = arr[i] - if is_datelike and checknull(v): - result[i] = NPY_NAT - else: - # we can use the unsafe version because we know `result` is mutable - # since it was created from `np.empty` - util.set_value_at_unsafe(result, i, v) - - return result - - -cpdef ndarray[object] astype_unicode(ndarray arr): - cdef: - Py_ssize_t i, n = arr.size - ndarray[object] result = np.empty(n, dtype=object) - - for i in range(n): - # we can use the unsafe version because we know `result` is mutable - # since it was created from `np.empty` - util.set_value_at_unsafe(result, i, unicode(arr[i])) - - return result - - -cpdef ndarray[object] astype_str(ndarray arr): - cdef: - Py_ssize_t i, n = arr.size - ndarray[object] result = np.empty(n, dtype=object) - - for i in range(n): - # we can use the unsafe version because we know `result` is mutable - # since it was created from `np.empty` - util.set_value_at_unsafe(result, i, str(arr[i])) - - return result - - -def clean_index_list(list obj): - """ - Utility used in pandas.core.index._ensure_index - """ - cdef: - ndarray[object] converted - Py_ssize_t i, n = len(obj) - object v - bint all_arrays = 1 - - for i in range(n): - v = obj[i] - if not (PyList_Check(v) or np.PyArray_Check(v) or hasattr(v, '_data')): - all_arrays = 0 - break - - if all_arrays: - return obj, all_arrays - - converted = np.empty(n, dtype=object) - for i in range(n): - v = obj[i] - if PyList_Check(v) or np.PyArray_Check(v) or hasattr(v, '_data'): - converted[i] = tuple(v) - else: - converted[i] = v - - return maybe_convert_objects(converted), 0 - - -ctypedef fused pandas_string: - str - unicode - bytes - - -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): - """ return the maximum size of elements in a 1-dim string array """ - cdef: - Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] - pandas_string v - - for i in range(length): - v = arr[i] - if PyString_Check(v): - l = PyString_GET_SIZE(v) - elif PyBytes_Check(v): - l = PyBytes_GET_SIZE(v) - elif PyUnicode_Check(v): - l = PyUnicode_GET_SIZE(v) - - if l > m: - m = l - - return m - - -@cython.boundscheck(False) -@cython.wraparound(False) -def string_array_replace_from_nan_rep( - ndarray[object, ndim=1] arr, object nan_rep, - object replace=None): - """ - Replace the values in the array with 'replacement' if - they are 'nan_rep'. Return the same array. - """ - - cdef int length = arr.shape[0], i = 0 - if replace is None: - replace = np.nan - - for i from 0 <= i < length: - if arr[i] == nan_rep: - arr[i] = replace - - return arr - - -@cython.boundscheck(False) -@cython.wraparound(False) -def convert_json_to_lines(object arr): - """ - replace comma separated json with line feeds, paying special attention - to quotes & brackets - """ - cdef: - Py_ssize_t i = 0, num_open_brackets_seen = 0, length - bint in_quotes = 0, is_escaping = 0 - ndarray[uint8_t] narr - unsigned char v, comma, left_bracket, right_brack, newline - - newline = ord('\n') - comma = ord(',') - left_bracket = ord('{') - right_bracket = ord('}') - quote = ord('"') - backslash = ord('\\') - - narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() - length = narr.shape[0] - for i in range(length): - v = narr[i] - if v == quote and i > 0 and not is_escaping: - in_quotes = ~in_quotes - if v == backslash or is_escaping: - is_escaping = ~is_escaping - if v == comma: # commas that should be \n - if num_open_brackets_seen == 0 and not in_quotes: - narr[i] = newline - elif v == left_bracket: - if not in_quotes: - num_open_brackets_seen += 1 - elif v == right_bracket: - if not in_quotes: - num_open_brackets_seen -= 1 - - return narr.tostring().decode('utf-8') - - -@cython.boundscheck(False) -@cython.wraparound(False) -def write_csv_rows(list data, ndarray data_index, - int nlevels, ndarray cols, object writer): - - cdef int N, j, i, ncols - cdef list rows - cdef object val - - # In crude testing, N>100 yields little marginal improvement - N=100 - - # pre-allocate rows - ncols = len(cols) - rows = [[None] * (nlevels + ncols) for x in range(N)] - - j = -1 - if nlevels == 1: - for j in range(len(data_index)): - row = rows[j % N] - row[0] = data_index[j] - for i in range(ncols): - row[1 + i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - elif nlevels > 1: - for j in range(len(data_index)): - row = rows[j % N] - row[:nlevels] = list(data_index[j]) - for i in range(ncols): - row[nlevels + i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - else: - for j in range(len(data_index)): - row = rows[j % N] - for i in range(ncols): - row[i] = data[i][j] - - if j >= N - 1 and j % N == N - 1: - writer.writerows(rows) - - if j >= 0 and (j < N - 1 or (j % N) != N - 1): - writer.writerows(rows[:((j + 1) % N)]) - - -#------------------------------------------------------------------------------ -# Groupby-related functions -@cython.boundscheck(False) -def arrmap(ndarray[object] index, object func): - cdef int length = index.shape[0] - cdef int i = 0 - - cdef ndarray[object] result = np.empty(length, dtype=np.object_) - - for i from 0 <= i < length: - result[i] = func(index[i]) - - return result - - -@cython.wraparound(False) -@cython.boundscheck(False) -def is_lexsorted(list list_of_arrays): - cdef: - int i - Py_ssize_t n, nlevels - int64_t k, cur, pre - ndarray arr - - nlevels = len(list_of_arrays) - n = len(list_of_arrays[0]) - - cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) - for i from 0 <= i < nlevels: - arr = list_of_arrays[i] - vecs[i] = arr.data - - # Assume uniqueness?? - for i from 1 <= i < n: - for k from 0 <= k < nlevels: - cur = vecs[k][i] - pre = vecs[k][i - 1] - if cur == pre: - continue - elif cur > pre: - break - else: - return False - free(vecs) - return True - - -# TODO: could do even better if we know something about the data. eg, index has -# 1-min data, binner has 5-min data, then bins are just strides in index. This -# is a general, O(max(len(values), len(binner))) method. -@cython.boundscheck(False) -@cython.wraparound(False) -def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, - object closed='left', bint hasnans=0): - """ - Int64 (datetime64) version of generic python version in groupby.py - """ - cdef: - Py_ssize_t lenidx, lenbin, i, j, bc, vc - ndarray[int64_t] bins - int64_t l_bin, r_bin, nat_count - bint right_closed = closed == 'right' - - nat_count = 0 - if hasnans: - mask = values == iNaT - nat_count = np.sum(mask) - values = values[~mask] - - lenidx = len(values) - lenbin = len(binner) - - if lenidx <= 0 or lenbin <= 0: - raise ValueError("Invalid length for values or for binner") - - # check binner fits data - if values[0] < binner[0]: - raise ValueError("Values falls before first bin") - - if values[lenidx - 1] > binner[lenbin - 1]: - raise ValueError("Values falls after last bin") - - bins = np.empty(lenbin - 1, dtype=np.int64) - - j = 0 # index into values - bc = 0 # bin count - - # linear scan - if right_closed: - for i in range(0, lenbin - 1): - r_bin = binner[i + 1] - # count values in current bin, advance to next bin - while j < lenidx and values[j] <= r_bin: - j += 1 - bins[bc] = j - bc += 1 - else: - for i in range(0, lenbin - 1): - r_bin = binner[i + 1] - # count values in current bin, advance to next bin - while j < lenidx and values[j] < r_bin: - j += 1 - bins[bc] = j - bc += 1 - - if nat_count > 0: - # shift bins by the number of NaT - bins = bins + nat_count - bins = np.insert(bins, 0, nat_count) - - return bins - - -@cython.boundscheck(False) -@cython.wraparound(False) -def row_bool_subset(ndarray[float64_t, ndim=2] values, - ndarray[uint8_t, cast=True] mask): - cdef: - Py_ssize_t i, j, n, k, pos = 0 - ndarray[float64_t, ndim=2] out - - n, k = ( values).shape - assert(n == len(mask)) - - out = np.empty((mask.sum(), k), dtype=np.float64) - - for i in range(n): - if mask[i]: - for j in range(k): - out[pos, j] = values[i, j] - pos += 1 - - return out - - -@cython.boundscheck(False) -@cython.wraparound(False) -def row_bool_subset_object(ndarray[object, ndim=2] values, - ndarray[uint8_t, cast=True] mask): - cdef: - Py_ssize_t i, j, n, k, pos = 0 - ndarray[object, ndim=2] out - - n, k = ( values).shape - assert(n == len(mask)) - - out = np.empty((mask.sum(), k), dtype=object) - - for i in range(n): - if mask[i]: - for j in range(k): - out[pos, j] = values[i, j] - pos += 1 - - return out - - -@cython.boundscheck(False) -@cython.wraparound(False) -def get_level_sorter(ndarray[int64_t, ndim=1] label, - ndarray[int64_t, ndim=1] starts): - """ - argsort for a single level of a multi-index, keeping the order of higher - levels unchanged. `starts` points to starts of same-key indices w.r.t - to leading levels; equivalent to: - np.hstack([label[starts[i]:starts[i+1]].argsort(kind='mergesort') - + starts[i] for i in range(len(starts) - 1)]) - """ - cdef: - int64_t l, r - Py_ssize_t i - ndarray[int64_t, ndim=1] out = np.empty(len(label), dtype=np.int64) - - for i in range(len(starts) - 1): - l, r = starts[i], starts[i + 1] - out[l:r] = l + label[l:r].argsort(kind='mergesort') - - return out - - -def group_count(ndarray[int64_t] values, Py_ssize_t size): - cdef: - Py_ssize_t i, n = len(values) - ndarray[int64_t] counts - - counts = np.zeros(size, dtype=np.int64) - for i in range(n): - counts[values[i]] += 1 - return counts - - -def lookup_values(ndarray[object] values, dict mapping): - cdef: - Py_ssize_t i, n = len(values) - - result = np.empty(n, dtype='O') - for i in range(n): - result[i] = mapping[values[i]] - return maybe_convert_objects(result) - - -@cython.boundscheck(False) -@cython.wraparound(False) -def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, - ndarray[int64_t, ndim=1] labels, - Py_ssize_t max_bin, - int axis): - cdef: - Py_ssize_t i, j, k, n - ndarray[int64_t, ndim=2] counts - - assert(axis == 0 or axis == 1) - n, k = ( mask).shape - - if axis == 0: - counts = np.zeros((max_bin, k), dtype='i8') - with nogil: - for i from 0 <= i < n: - for j from 0 <= j < k: - counts[labels[i], j] += mask[i, j] - - else: # axis == 1 - counts = np.zeros((n, max_bin), dtype='i8') - with nogil: - for i from 0 <= i < n: - for j from 0 <= j < k: - counts[i, labels[j]] += mask[i, j] - - return counts - - -cdef class _PandasNull: - - def __richcmp__(_PandasNull self, object other, int op): - if op == 2: # == - return isinstance(other, _PandasNull) - elif op == 3: # != - return not isinstance(other, _PandasNull) - else: - return False - - def __hash__(self): - return 0 - -pandas_null = _PandasNull() - - -def fast_zip_fillna(list ndarrays, fill_value=pandas_null): - """ - For zipping multiple ndarrays into an ndarray of tuples - """ - cdef: - Py_ssize_t i, j, k, n - ndarray[object] result - flatiter it - object val, tup - - k = len(ndarrays) - n = len(ndarrays[0]) - - result = np.empty(n, dtype=object) - - # initialize tuples on first pass - arr = ndarrays[0] - it = PyArray_IterNew(arr) - for i in range(n): - val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) - tup = PyTuple_New(k) - - if val != val: - val = fill_value - - PyTuple_SET_ITEM(tup, 0, val) - Py_INCREF(val) - result[i] = tup - PyArray_ITER_NEXT(it) - - for j in range(1, k): - arr = ndarrays[j] - it = PyArray_IterNew(arr) - if len(arr) != n: - raise ValueError('all arrays must be same length') - - for i in range(n): - val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) - if val != val: - val = fill_value - - PyTuple_SET_ITEM(result[i], j, val) - Py_INCREF(val) - PyArray_ITER_NEXT(it) - - return result - - -def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): - cdef: - Py_ssize_t i, group_size, n, start - int64_t lab - object slobj - ndarray[int64_t] starts, ends - - n = len(labels) - - starts = np.zeros(ngroups, dtype=np.int64) - ends = np.zeros(ngroups, dtype=np.int64) - - start = 0 - group_size = 0 - for i in range(n): - lab = labels[i] - if lab < 0: - start += 1 - else: - group_size += 1 - if i == n - 1 or lab != labels[i + 1]: - starts[lab] = start - ends[lab] = start + group_size - start += group_size - group_size = 0 - - return starts, ends - - -def indices_fast(object index, ndarray[int64_t] labels, list keys, - list sorted_labels): - cdef: - Py_ssize_t i, j, k, lab, cur, start, n = len(labels) - dict result = {} - object tup - - k = len(keys) - - if n == 0: - return result - - start = 0 - cur = labels[0] - for i in range(1, n): - lab = labels[i] - - if lab != cur: - if lab != -1: - tup = PyTuple_New(k) - for j in range(k): - val = util.get_value_at(keys[j], - sorted_labels[j][i - 1]) - PyTuple_SET_ITEM(tup, j, val) - Py_INCREF(val) - - result[tup] = index[start:i] - start = i - cur = lab - - tup = PyTuple_New(k) - for j in range(k): - val = util.get_value_at(keys[j], - sorted_labels[j][n - 1]) - PyTuple_SET_ITEM(tup, j, val) - Py_INCREF(val) - result[tup] = index[start:] - - return result - - -@cython.boundscheck(False) -@cython.wraparound(False) -def get_blkno_indexers(int64_t[:] blknos, bint group=True): - """ - Enumerate contiguous runs of integers in ndarray. - - Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))`` - pairs for each contiguous run found. - - If `group` is True and there is more than one run for a certain blkno, - ``(blkno, array)`` with an array containing positions of all elements equal - to blkno. - - Returns - ------- - iter : iterator of (int, slice or array) - - """ - # There's blkno in this function's name because it's used in block & - # blockno handling. - cdef: - int64_t cur_blkno - Py_ssize_t i, start, stop, n, diff - - object blkno - list group_order - dict group_slices - int64_t[:] res_view - - n = blknos.shape[0] - - if n == 0: - return - - start = 0 - cur_blkno = blknos[start] - - if group == False: - for i in range(1, n): - if blknos[i] != cur_blkno: - yield cur_blkno, slice(start, i) - - start = i - cur_blkno = blknos[i] - - yield cur_blkno, slice(start, n) - else: - group_order = [] - group_dict = {} - - for i in range(1, n): - if blknos[i] != cur_blkno: - if cur_blkno not in group_dict: - group_order.append(cur_blkno) - group_dict[cur_blkno] = [(start, i)] - else: - group_dict[cur_blkno].append((start, i)) - - start = i - cur_blkno = blknos[i] - - if cur_blkno not in group_dict: - group_order.append(cur_blkno) - group_dict[cur_blkno] = [(start, n)] - else: - group_dict[cur_blkno].append((start, n)) - - for blkno in group_order: - slices = group_dict[blkno] - if len(slices) == 1: - yield blkno, slice(slices[0][0], slices[0][1]) - else: - tot_len = sum([stop - start for start, stop in slices]) - result = np.empty(tot_len, dtype=np.int64) - res_view = result - - i = 0 - for start, stop in slices: - for diff in range(start, stop): - res_view[i] = diff - i += 1 - - yield blkno, result - - -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef slice indexer_as_slice(int64_t[:] vals): - cdef: - Py_ssize_t i, n, start, stop - int64_t d - - if vals is None: - raise TypeError("vals must be ndarray") - - n = vals.shape[0] - - if n == 0 or vals[0] < 0: - return None - - if n == 1: - return slice(vals[0], vals[0] + 1, 1) - - if vals[1] < 0: - return None - - # n > 2 - d = vals[1] - vals[0] - - if d == 0: - return None - - for i in range(2, n): - if vals[i] < 0 or vals[i] - vals[i - 1] != d: - return None - - start = vals[0] - stop = start + n * d - if stop < 0 and d < 0: - return slice(start, None, d) - else: - return slice(start, stop, d) - - -cpdef slice_canonize(slice s): - """ - Convert slice to canonical bounded form. - """ - cdef: - Py_ssize_t start = 0, stop = 0, step = 1, length - - if s.step is None: - step = 1 - else: - step = s.step - if step == 0: - raise ValueError("slice step cannot be zero") - - if step > 0: - if s.stop is None: - raise ValueError("unbounded slice") - - stop = s.stop - if s.start is None: - start = 0 - else: - start = s.start - if start > stop: - start = stop - elif step < 0: - if s.start is None: - raise ValueError("unbounded slice") - - start = s.start - if s.stop is None: - stop = -1 - else: - stop = s.stop - if stop > start: - stop = start - - if start < 0 or (stop < 0 and s.stop is not None): - raise ValueError("unbounded slice") - - if stop < 0: - return slice(start, None, step) - else: - return slice(start, stop, step) - - -cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): - """ - Get (start, stop, step, length) tuple for a slice. - - If `objlen` is not specified, slice must be bounded, otherwise the result - will be wrong. - - """ - cdef: - Py_ssize_t start, stop, step, length - - if slc is None: - raise TypeError("slc should be a slice") - - PySlice_GetIndicesEx(slc, objlen, - &start, &stop, &step, &length) - - return start, stop, step, length - - -cpdef Py_ssize_t slice_len( - slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: - """ - Get length of a bounded slice. - - The slice must not have any "open" bounds that would create dependency on - container size, i.e.: - - if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None`` - - if ``s.step < 0``, ``s.start`` is not ``None`` - - Otherwise, the result is unreliable. - - """ - cdef: - Py_ssize_t start, stop, step, length - - if slc is None: - raise TypeError("slc must be slice") - - PySlice_GetIndicesEx(slc, objlen, - &start, &stop, &step, &length) - - return length - - -def slice_getitem(slice slc not None, ind): - cdef: - Py_ssize_t s_start, s_stop, s_step, s_len - Py_ssize_t ind_start, ind_stop, ind_step, ind_len - - s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) - - if isinstance(ind, slice): - ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, - s_len) - - if ind_step > 0 and ind_len == s_len: - # short-cut for no-op slice - if ind_len == s_len: - return slc - - if ind_step < 0: - s_start = s_stop - s_step - ind_step = -ind_step - - s_step *= ind_step - s_stop = s_start + ind_stop * s_step - s_start = s_start + ind_start * s_step - - if s_step < 0 and s_stop < 0: - return slice(s_start, None, s_step) - else: - return slice(s_start, s_stop, s_step) - - else: - return np.arange(s_start, s_stop, s_step, dtype=np.int64)[ind] - - -cdef class BlockPlacement: - # __slots__ = '_as_slice', '_as_array', '_len' - cdef slice _as_slice - cdef object _as_array - - cdef bint _has_slice, _has_array, _is_known_slice_like - - def __init__(self, val): - cdef slice slc - - self._has_slice = False - self._has_array = False - - if isinstance(val, slice): - slc = slice_canonize(val) - - if slc.start != slc.stop: - self._as_slice = slc - self._has_slice = True - else: - arr = np.empty(0, dtype=np.int64) - self._as_array = arr - self._has_array = True - else: - # Cython memoryview interface requires ndarray to be writeable. - arr = np.require(val, dtype=np.int64, requirements='W') - assert arr.ndim == 1 - self._as_array = arr - self._has_array = True - - def __str__(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - v = self._as_slice - else: - v = self._as_array - - return '%s(%r)' % (self.__class__.__name__, v) - - __repr__ = __str__ - - def __len__(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return slice_len(s) - else: - return len(self._as_array) - - def __iter__(self): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t start, stop, step, _ - if s is not None: - start, stop, step, _ = slice_get_indices_ex(s) - return iter(range(start, stop, step)) - else: - return iter(self._as_array) - - @property - def as_slice(self): - cdef slice s = self._ensure_has_slice() - if s is None: - raise TypeError('Not slice-like') - else: - return s - - @property - def indexer(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return s - else: - return self._as_array - - def isin(self, arr): - from pandas.core.index import Int64Index - return Int64Index(self.as_array, copy=False).isin(arr) - - @property - def as_array(self): - cdef Py_ssize_t start, stop, end, _ - if not self._has_array: - start, stop, step, _ = slice_get_indices_ex(self._as_slice) - self._as_array = np.arange(start, stop, step, - dtype=np.int64) - self._has_array = True - return self._as_array - - @property - def is_slice_like(self): - cdef slice s = self._ensure_has_slice() - return s is not None - - def __getitem__(self, loc): - cdef slice s = self._ensure_has_slice() - if s is not None: - val = slice_getitem(s, loc) - else: - val = self._as_array[loc] - - if not isinstance(val, slice) and val.ndim == 0: - return val - - return BlockPlacement(val) - - def delete(self, loc): - return BlockPlacement(np.delete(self.as_array, loc, axis=0)) - - def append(self, others): - if len(others) == 0: - return self - - return BlockPlacement(np.concatenate([self.as_array] + - [o.as_array for o in others])) - - cdef iadd(self, other): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t other_int, start, stop, step, l - - if isinstance(other, int) and s is not None: - other_int = other - - if other_int == 0: - return self - - start, stop, step, l = slice_get_indices_ex(s) - start += other_int - stop += other_int - - if ((step > 0 and start < 0) or - (step < 0 and stop < step)): - raise ValueError("iadd causes length change") - - if stop < 0: - self._as_slice = slice(start, None, step) - else: - self._as_slice = slice(start, stop, step) - - self._has_array = False - self._as_array = None - else: - newarr = self.as_array + other - if (newarr < 0).any(): - raise ValueError("iadd causes length change") - - self._as_array = newarr - self._has_array = True - self._has_slice = False - self._as_slice = None - - return self - - cdef BlockPlacement copy(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return BlockPlacement(s) - else: - return BlockPlacement(self._as_array) - - def add(self, other): - return self.copy().iadd(other) - - def sub(self, other): - return self.add(-other) - - cdef slice _ensure_has_slice(self): - if not self._has_slice: - self._as_slice = indexer_as_slice(self._as_array) - self._has_slice = True - return self._as_slice - - -include "reduce.pyx" -include "properties.pyx" -include "inference.pyx" diff --git a/pandas/parser.py b/pandas/parser.py new file mode 100644 index 0000000000000..f43a408c943d0 --- /dev/null +++ b/pandas/parser.py @@ -0,0 +1,8 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.parser module is deprecated and will be " + "removed in a future version. Please import from " + "pandas.io.parser instead", FutureWarning, stacklevel=2) +from pandas._libs.parsers import na_values +from pandas.io.common import CParserError diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py new file mode 100644 index 0000000000000..385d4d7f047c7 --- /dev/null +++ b/pandas/plotting/__init__.py @@ -0,0 +1,20 @@ +""" +Plotting api +""" + +# flake8: noqa + +from pandas.plotting._misc import (scatter_matrix, radviz, + andrews_curves, bootstrap_plot, + parallel_coordinates, lag_plot, + autocorrelation_plot) +from pandas.plotting._core import boxplot +from pandas.plotting._style import plot_params +from pandas.plotting._tools import table +try: + from pandas.plotting._converter import \ + register as register_matplotlib_converters + from pandas.plotting._converter import \ + deregister as deregister_matplotlib_converters +except ImportError: + pass diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_compat.py new file mode 100644 index 0000000000000..0cc715eda2e18 --- /dev/null +++ b/pandas/plotting/_compat.py @@ -0,0 +1,76 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +from distutils.version import LooseVersion + + +def _mpl_le_1_2_1(): + try: + import matplotlib as mpl + return (LooseVersion(mpl.__version__) <= LooseVersion('1.2.1') and + str(mpl.__version__)[0] != '0') + except ImportError: + return False + + +def _mpl_ge_1_3_1(): + try: + import matplotlib + # The or v[0] == '0' is because their versioneer is + # messed up on dev + return (LooseVersion(matplotlib.__version__) >= + LooseVersion('1.3.1') or + str(matplotlib.__version__)[0] == '0') + except ImportError: + return False + + +def _mpl_ge_1_4_0(): + try: + import matplotlib + return (LooseVersion(matplotlib.__version__) >= LooseVersion('1.4') or + str(matplotlib.__version__)[0] == '0') + except ImportError: + return False + + +def _mpl_ge_1_5_0(): + try: + import matplotlib + return (LooseVersion(matplotlib.__version__) >= LooseVersion('1.5') or + str(matplotlib.__version__)[0] == '0') + except ImportError: + return False + + +def _mpl_ge_2_0_0(): + try: + import matplotlib + return LooseVersion(matplotlib.__version__) >= LooseVersion('2.0') + except ImportError: + return False + + +def _mpl_le_2_0_0(): + try: + import matplotlib + return matplotlib.compare_versions('2.0.0', matplotlib.__version__) + except ImportError: + return False + + +def _mpl_ge_2_0_1(): + try: + import matplotlib + return LooseVersion(matplotlib.__version__) >= LooseVersion('2.0.1') + except ImportError: + return False + + +def _mpl_ge_2_1_0(): + try: + import matplotlib + return LooseVersion(matplotlib.__version__) >= LooseVersion('2.1') + except ImportError: + return False diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py new file mode 100644 index 0000000000000..f413e4177b386 --- /dev/null +++ b/pandas/plotting/_converter.py @@ -0,0 +1,1163 @@ +import warnings +from datetime import datetime, timedelta +import datetime as pydt +import numpy as np + +from dateutil.relativedelta import relativedelta + +import matplotlib.units as units +import matplotlib.dates as dates + +from matplotlib.ticker import Formatter, AutoLocator, Locator +from matplotlib.transforms import nonsingular + +from pandas.core.dtypes.common import ( + is_float, is_integer, + is_integer_dtype, + is_float_dtype, + is_datetime64_ns_dtype, + is_period_arraylike, + is_nested_list_like +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.compat import lrange +import pandas.compat as compat +from pandas._libs import tslib +import pandas.core.common as com +from pandas.core.index import Index + +from pandas.core.indexes.datetimes import date_range +import pandas.core.tools.datetimes as tools +from pandas._libs.tslibs import resolution +import pandas.tseries.frequencies as frequencies +from pandas.tseries.frequencies import FreqGroup +from pandas.core.indexes.period import Period, PeriodIndex + +from pandas.plotting._compat import _mpl_le_2_0_0 + +# constants +HOURS_PER_DAY = 24. +MIN_PER_HOUR = 60. +SEC_PER_MIN = 60. + +SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR +SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY + +MUSEC_PER_DAY = 1e6 * SEC_PER_DAY + +_WARN = True # Global for whether pandas has registered the units explicitly +_mpl_units = {} # Cache for units overwritten by us + + +def get_pairs(): + pairs = [ + (tslib.Timestamp, DatetimeConverter), + (Period, PeriodConverter), + (pydt.datetime, DatetimeConverter), + (pydt.date, DatetimeConverter), + (pydt.time, TimeConverter), + (np.datetime64, DatetimeConverter), + ] + return pairs + + +def register(explicit=True): + """Register Pandas Formatters and Converters with matplotlib + + This function modifies the global ``matplotlib.units.registry`` + dictionary. Pandas adds custom converters for + + * pd.Timestamp + * pd.Period + * np.datetime64 + * datetime.datetime + * datetime.date + * datetime.time + + See Also + -------- + deregister_matplotlib_converter + """ + # Renamed in pandas.plotting.__init__ + global _WARN + + if explicit: + _WARN = False + + pairs = get_pairs() + for type_, cls in pairs: + converter = cls() + if type_ in units.registry: + previous = units.registry[type_] + _mpl_units[type_] = previous + units.registry[type_] = converter + + +def deregister(): + """Remove pandas' formatters and converters + + Removes the custom converters added by :func:`register`. This + attempts to set the state of the registry back to the state before + pandas registered its own units. Converters for pandas' own types like + Timestamp and Period are removed completely. Converters for types + pandas overwrites, like ``datetime.datetime``, are restored to their + original value. + + See Also + -------- + deregister_matplotlib_converters + """ + # Renamed in pandas.plotting.__init__ + for type_, cls in get_pairs(): + # We use type to catch our classes directly, no inheritance + if type(units.registry.get(type_)) is cls: + units.registry.pop(type_) + + # restore the old keys + for unit, formatter in _mpl_units.items(): + if type(formatter) not in {DatetimeConverter, PeriodConverter, + TimeConverter}: + # make it idempotent by excluding ours. + units.registry[unit] = formatter + + +def _check_implicitly_registered(): + global _WARN + + if _WARN: + msg = ("Using an implicitly registered datetime converter for a " + "matplotlib plotting method. The converter was registered " + "by pandas on import. Future versions of pandas will require " + "you to explicitly register matplotlib converters.\n\n" + "To register the converters:\n\t" + ">>> from pandas.plotting import register_matplotlib_converters" + "\n\t" + ">>> register_matplotlib_converters()") + warnings.warn(msg, FutureWarning) + _WARN = False + + +def _to_ordinalf(tm): + tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + + float(tm.microsecond / 1e6)) + return tot_sec + + +def time2num(d): + if isinstance(d, compat.string_types): + parsed = tools.to_datetime(d) + if not isinstance(parsed, datetime): + raise ValueError('Could not parse time {d}'.format(d=d)) + return _to_ordinalf(parsed.time()) + if isinstance(d, pydt.time): + return _to_ordinalf(d) + return d + + +class TimeConverter(units.ConversionInterface): + + @staticmethod + def convert(value, unit, axis): + valid_types = (str, pydt.time) + if (isinstance(value, valid_types) or is_integer(value) or + is_float(value)): + return time2num(value) + if isinstance(value, Index): + return value.map(time2num) + if isinstance(value, (list, tuple, np.ndarray, Index)): + return [time2num(x) for x in value] + return value + + @staticmethod + def axisinfo(unit, axis): + if unit != 'time': + return None + + majloc = AutoLocator() + majfmt = TimeFormatter(majloc) + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='time') + + @staticmethod + def default_units(x, axis): + return 'time' + + +# time formatter +class TimeFormatter(Formatter): + + def __init__(self, locs): + self.locs = locs + + def __call__(self, x, pos=0): + """ + Return the time of day as a formatted string. + + Parameters + ---------- + x : float + The time of day specified as seconds since 00:00 (midnight), + with up to microsecond precision. + pos + Unused + + Returns + ------- + str + A string in HH:MM:SS.mmmuuu format. Microseconds, + milliseconds and seconds are only displayed if non-zero. + """ + fmt = '%H:%M:%S.%f' + s = int(x) + msus = int(round((x - s) * 1e6)) + ms = msus // 1000 + us = msus % 1000 + m, s = divmod(s, 60) + h, m = divmod(m, 60) + _, h = divmod(h, 24) + if us != 0: + return pydt.time(h, m, s, msus).strftime(fmt) + elif ms != 0: + return pydt.time(h, m, s, msus).strftime(fmt)[:-3] + elif s != 0: + return pydt.time(h, m, s).strftime('%H:%M:%S') + + return pydt.time(h, m).strftime('%H:%M') + + +# Period Conversion + + +class PeriodConverter(dates.DateConverter): + + @staticmethod + def convert(values, units, axis): + if is_nested_list_like(values): + values = [PeriodConverter._convert_1d(v, units, axis) + for v in values] + else: + values = PeriodConverter._convert_1d(values, units, axis) + return values + + @staticmethod + def _convert_1d(values, units, axis): + if not hasattr(axis, 'freq'): + raise TypeError('Axis must have `freq` set to convert to Periods') + valid_types = (compat.string_types, datetime, + Period, pydt.date, pydt.time, np.datetime64) + if (isinstance(values, valid_types) or is_integer(values) or + is_float(values)): + return get_datevalue(values, axis.freq) + if isinstance(values, PeriodIndex): + return values.asfreq(axis.freq)._ndarray_values + if isinstance(values, Index): + return values.map(lambda x: get_datevalue(x, axis.freq)) + if is_period_arraylike(values): + return PeriodIndex(values, freq=axis.freq)._ndarray_values + if isinstance(values, (list, tuple, np.ndarray, Index)): + return [get_datevalue(x, axis.freq) for x in values] + return values + + +def get_datevalue(date, freq): + if isinstance(date, Period): + return date.asfreq(freq).ordinal + elif isinstance(date, (compat.string_types, datetime, + pydt.date, pydt.time, np.datetime64)): + return Period(date, freq).ordinal + elif (is_integer(date) or is_float(date) or + (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): + return date + elif date is None: + return None + raise ValueError("Unrecognizable date '{date}'".format(date=date)) + + +def _dt_to_float_ordinal(dt): + """ + Convert :mod:`datetime` to the Gregorian date as UTC float days, + preserving hours, minutes, seconds and microseconds. Return value + is a :func:`float`. + """ + if (isinstance(dt, (np.ndarray, Index, ABCSeries) + ) and is_datetime64_ns_dtype(dt)): + base = dates.epoch2num(dt.asi8 / 1.0E9) + else: + base = dates.date2num(dt) + return base + + +# Datetime Conversion +class DatetimeConverter(dates.DateConverter): + + @staticmethod + def convert(values, unit, axis): + # values might be a 1-d array, or a list-like of arrays. + _check_implicitly_registered() + if is_nested_list_like(values): + values = [DatetimeConverter._convert_1d(v, unit, axis) + for v in values] + else: + values = DatetimeConverter._convert_1d(values, unit, axis) + return values + + @staticmethod + def _convert_1d(values, unit, axis): + def try_parse(values): + try: + return _dt_to_float_ordinal(tools.to_datetime(values)) + except Exception: + return values + + if isinstance(values, (datetime, pydt.date)): + return _dt_to_float_ordinal(values) + elif isinstance(values, np.datetime64): + return _dt_to_float_ordinal(tslib.Timestamp(values)) + elif isinstance(values, pydt.time): + return dates.date2num(values) + elif (is_integer(values) or is_float(values)): + return values + elif isinstance(values, compat.string_types): + return try_parse(values) + elif isinstance(values, (list, tuple, np.ndarray, Index)): + if isinstance(values, Index): + values = values.values + if not isinstance(values, np.ndarray): + values = com._asarray_tuplesafe(values) + + if is_integer_dtype(values) or is_float_dtype(values): + return values + + try: + values = tools.to_datetime(values) + if isinstance(values, Index): + values = _dt_to_float_ordinal(values) + else: + values = [_dt_to_float_ordinal(x) for x in values] + except Exception: + values = _dt_to_float_ordinal(values) + + return values + + @staticmethod + def axisinfo(unit, axis): + """ + Return the :class:`~matplotlib.units.AxisInfo` for *unit*. + + *unit* is a tzinfo instance or None. + The *axis* argument is required but not used. + """ + tz = unit + + majloc = PandasAutoDateLocator(tz=tz) + majfmt = PandasAutoDateFormatter(majloc, tz=tz) + datemin = pydt.date(2000, 1, 1) + datemax = pydt.date(2010, 1, 1) + + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='', + default_limits=(datemin, datemax)) + + +class PandasAutoDateFormatter(dates.AutoDateFormatter): + + def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): + dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) + # matplotlib.dates._UTC has no _utcoffset called by pandas + if self._tz is dates.UTC: + self._tz._utcoffset = self._tz.utcoffset(None) + + # For mpl > 2.0 the format strings are controlled via rcparams + # so do not mess with them. For mpl < 2.0 change the second + # break point and add a musec break point + if _mpl_le_2_0_0(): + self.scaled[1. / SEC_PER_DAY] = '%H:%M:%S' + self.scaled[1. / MUSEC_PER_DAY] = '%H:%M:%S.%f' + + +class PandasAutoDateLocator(dates.AutoDateLocator): + + def get_locator(self, dmin, dmax): + 'Pick the best locator based on a distance.' + _check_implicitly_registered() + delta = relativedelta(dmax, dmin) + + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days + num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds + tot_sec = num_days * 86400. + num_sec + + if abs(tot_sec) < self.minticks: + self._freq = -1 + locator = MilliSecondLocator(self.tz) + locator.set_axis(self.axis) + + locator.set_view_interval(*self.axis.get_view_interval()) + locator.set_data_interval(*self.axis.get_data_interval()) + return locator + + return dates.AutoDateLocator.get_locator(self, dmin, dmax) + + def _get_unit(self): + return MilliSecondLocator.get_unit_generic(self._freq) + + +class MilliSecondLocator(dates.DateLocator): + + UNIT = 1. / (24 * 3600 * 1000) + + def __init__(self, tz): + dates.DateLocator.__init__(self, tz) + self._interval = 1. + + def _get_unit(self): + return self.get_unit_generic(-1) + + @staticmethod + def get_unit_generic(freq): + unit = dates.RRuleLocator.get_unit_generic(freq) + if unit < 0: + return MilliSecondLocator.UNIT + return unit + + def __call__(self): + # if no data have been set, this will tank with a ValueError + _check_implicitly_registered() + try: + dmin, dmax = self.viewlim_to_dt() + except ValueError: + return [] + + if dmin > dmax: + dmax, dmin = dmin, dmax + # We need to cap at the endpoints of valid datetime + + # TODO(wesm) unused? + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + nmax, nmin = dates.date2num((dmax, dmin)) + + num = (nmax - nmin) * 86400 * 1000 + max_millis_ticks = 6 + for interval in [1, 10, 50, 100, 200, 500]: + if num <= interval * (max_millis_ticks - 1): + self._interval = interval + break + else: + # We went through the whole loop without breaking, default to 1 + self._interval = 1000. + + estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) + + if estimate > self.MAXTICKS * 2: + raise RuntimeError(('MillisecondLocator estimated to generate ' + '{estimate:d} ticks from {dmin} to {dmax}: ' + 'exceeds Locator.MAXTICKS' + '* 2 ({arg:d}) ').format( + estimate=estimate, dmin=dmin, dmax=dmax, + arg=self.MAXTICKS * 2)) + + freq = '%dL' % self._get_interval() + tz = self.tz.tzname(None) + st = _from_ordinal(dates.date2num(dmin)) # strip tz + ed = _from_ordinal(dates.date2num(dmax)) + all_dates = date_range(start=st, end=ed, + freq=freq, tz=tz).astype(object) + + try: + if len(all_dates) > 0: + locs = self.raise_if_exceeds(dates.date2num(all_dates)) + return locs + except Exception: # pragma: no cover + pass + + lims = dates.date2num([dmin, dmax]) + return lims + + def _get_interval(self): + return self._interval + + def autoscale(self): + """ + Set the view limits to include the data range. + """ + dmin, dmax = self.datalim_to_dt() + if dmin > dmax: + dmax, dmin = dmin, dmax + + # We need to cap at the endpoints of valid datetime + + # TODO(wesm): unused? + + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + dmin, dmax = self.datalim_to_dt() + + vmin = dates.date2num(dmin) + vmax = dates.date2num(dmax) + + return self.nonsingular(vmin, vmax) + + +def _from_ordinal(x, tz=None): + ix = int(x) + dt = datetime.fromordinal(ix) + remainder = float(x) - ix + hour, remainder = divmod(24 * remainder, 1) + minute, remainder = divmod(60 * remainder, 1) + second, remainder = divmod(60 * remainder, 1) + microsecond = int(1e6 * remainder) + if microsecond < 10: + microsecond = 0 # compensate for rounding errors + dt = datetime(dt.year, dt.month, dt.day, int(hour), int(minute), + int(second), microsecond) + if tz is not None: + dt = dt.astimezone(tz) + + if microsecond > 999990: # compensate for rounding errors + dt += timedelta(microseconds=1e6 - microsecond) + + return dt + +# Fixed frequency dynamic tick locators and formatters + +# ------------------------------------------------------------------------- +# --- Locators --- +# ------------------------------------------------------------------------- + + +def _get_default_annual_spacing(nyears): + """ + Returns a default spacing between consecutive ticks for annual data. + """ + if nyears < 11: + (min_spacing, maj_spacing) = (1, 1) + elif nyears < 20: + (min_spacing, maj_spacing) = (1, 2) + elif nyears < 50: + (min_spacing, maj_spacing) = (1, 5) + elif nyears < 100: + (min_spacing, maj_spacing) = (5, 10) + elif nyears < 200: + (min_spacing, maj_spacing) = (5, 25) + elif nyears < 600: + (min_spacing, maj_spacing) = (10, 50) + else: + factor = nyears // 1000 + 1 + (min_spacing, maj_spacing) = (factor * 20, factor * 100) + return (min_spacing, maj_spacing) + + +def period_break(dates, period): + """ + Returns the indices where the given period changes. + + Parameters + ---------- + dates : PeriodIndex + Array of intervals to monitor. + period : string + Name of the period to monitor. + """ + current = getattr(dates, period) + previous = getattr(dates - 1, period) + return np.nonzero(current - previous)[0] + + +def has_level_label(label_flags, vmin): + """ + Returns true if the ``label_flags`` indicate there is at least one label + for this level. + + if the minimum view limit is not an exact integer, then the first tick + label won't be shown, so we must adjust for that. + """ + if label_flags.size == 0 or (label_flags.size == 1 and + label_flags[0] == 0 and + vmin % 1 > 0.0): + return False + else: + return True + + +def _daily_finder(vmin, vmax, freq): + periodsperday = -1 + + if freq >= FreqGroup.FR_HR: + if freq == FreqGroup.FR_NS: + periodsperday = 24 * 60 * 60 * 1000000000 + elif freq == FreqGroup.FR_US: + periodsperday = 24 * 60 * 60 * 1000000 + elif freq == FreqGroup.FR_MS: + periodsperday = 24 * 60 * 60 * 1000 + elif freq == FreqGroup.FR_SEC: + periodsperday = 24 * 60 * 60 + elif freq == FreqGroup.FR_MIN: + periodsperday = 24 * 60 + elif freq == FreqGroup.FR_HR: + periodsperday = 24 + else: # pragma: no cover + raise ValueError("unexpected frequency: {freq}".format(freq=freq)) + periodsperyear = 365 * periodsperday + periodspermonth = 28 * periodsperday + + elif freq == FreqGroup.FR_BUS: + periodsperyear = 261 + periodspermonth = 19 + elif freq == FreqGroup.FR_DAY: + periodsperyear = 365 + periodspermonth = 28 + elif resolution.get_freq_group(freq) == FreqGroup.FR_WK: + periodsperyear = 52 + periodspermonth = 3 + else: # pragma: no cover + raise ValueError("unexpected frequency") + + # save this for later usage + vmin_orig = vmin + + (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), + Period(ordinal=int(vmax), freq=freq)) + span = vmax.ordinal - vmin.ordinal + 1 + dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) + # Initialize the output + info = np.zeros(span, + dtype=[('val', np.int64), ('maj', bool), + ('min', bool), ('fmt', '|S20')]) + info['val'][:] = dates_._ndarray_values + info['fmt'][:] = '' + info['maj'][[0, -1]] = True + # .. and set some shortcuts + info_maj = info['maj'] + info_min = info['min'] + info_fmt = info['fmt'] + + def first_label(label_flags): + if (label_flags[0] == 0) and (label_flags.size > 1) and \ + ((vmin_orig % 1) > 0.0): + return label_flags[1] + else: + return label_flags[0] + + # Case 1. Less than a month + if span <= periodspermonth: + day_start = period_break(dates_, 'day') + month_start = period_break(dates_, 'month') + + def _hour_finder(label_interval, force_year_start): + _hour = dates_.hour + _prev_hour = (dates_ - 1).hour + hour_start = (_hour - _prev_hour) != 0 + info_maj[day_start] = True + info_min[hour_start & (_hour % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' + info_fmt[day_start] = '%H:%M\n%d-%b' + info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' + if force_year_start and not has_level_label(year_start, vmin_orig): + info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' + + def _minute_finder(label_interval): + hour_start = period_break(dates_, 'hour') + _minute = dates_.minute + _prev_minute = (dates_ - 1).minute + minute_start = (_minute - _prev_minute) != 0 + info_maj[hour_start] = True + info_min[minute_start & (_minute % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' + info_fmt[day_start] = '%H:%M\n%d-%b' + info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' + + def _second_finder(label_interval): + minute_start = period_break(dates_, 'minute') + _second = dates_.second + _prev_second = (dates_ - 1).second + second_start = (_second - _prev_second) != 0 + info['maj'][minute_start] = True + info['min'][second_start & (_second % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[second_start & (_second % + label_interval == 0)] = '%H:%M:%S' + info_fmt[day_start] = '%H:%M:%S\n%d-%b' + info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' + + if span < periodsperday / 12000.0: + _second_finder(1) + elif span < periodsperday / 6000.0: + _second_finder(2) + elif span < periodsperday / 2400.0: + _second_finder(5) + elif span < periodsperday / 1200.0: + _second_finder(10) + elif span < periodsperday / 800.0: + _second_finder(15) + elif span < periodsperday / 400.0: + _second_finder(30) + elif span < periodsperday / 150.0: + _minute_finder(1) + elif span < periodsperday / 70.0: + _minute_finder(2) + elif span < periodsperday / 24.0: + _minute_finder(5) + elif span < periodsperday / 12.0: + _minute_finder(15) + elif span < periodsperday / 6.0: + _minute_finder(30) + elif span < periodsperday / 2.5: + _hour_finder(1, False) + elif span < periodsperday / 1.5: + _hour_finder(2, False) + elif span < periodsperday * 1.25: + _hour_finder(3, False) + elif span < periodsperday * 2.5: + _hour_finder(6, True) + elif span < periodsperday * 4: + _hour_finder(12, True) + else: + info_maj[month_start] = True + info_min[day_start] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[day_start] = '%d' + info_fmt[month_start] = '%d\n%b' + info_fmt[year_start] = '%d\n%b\n%Y' + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(day_start)] = '%d\n%b\n%Y' + else: + info_fmt[first_label(month_start)] = '%d\n%b\n%Y' + + # Case 2. Less than three months + elif span <= periodsperyear // 4: + month_start = period_break(dates_, 'month') + info_maj[month_start] = True + if freq < FreqGroup.FR_HR: + info['min'] = True + else: + day_start = period_break(dates_, 'day') + info['min'][day_start] = True + week_start = period_break(dates_, 'week') + year_start = period_break(dates_, 'year') + info_fmt[week_start] = '%d' + info_fmt[month_start] = '\n\n%b' + info_fmt[year_start] = '\n\n%b\n%Y' + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(week_start)] = '\n\n%b\n%Y' + else: + info_fmt[first_label(month_start)] = '\n\n%b\n%Y' + # Case 3. Less than 14 months ............... + elif span <= 1.15 * periodsperyear: + year_start = period_break(dates_, 'year') + month_start = period_break(dates_, 'month') + week_start = period_break(dates_, 'week') + info_maj[month_start] = True + info_min[week_start] = True + info_min[year_start] = False + info_min[month_start] = False + info_fmt[month_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + if not has_level_label(year_start, vmin_orig): + info_fmt[first_label(month_start)] = '%b\n%Y' + # Case 4. Less than 2.5 years ............... + elif span <= 2.5 * periodsperyear: + year_start = period_break(dates_, 'year') + quarter_start = period_break(dates_, 'quarter') + month_start = period_break(dates_, 'month') + info_maj[quarter_start] = True + info_min[month_start] = True + info_fmt[quarter_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + # Case 4. Less than 4 years ................. + elif span <= 4 * periodsperyear: + year_start = period_break(dates_, 'year') + month_start = period_break(dates_, 'month') + info_maj[year_start] = True + info_min[month_start] = True + info_min[year_start] = False + + month_break = dates_[month_start].month + jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] + info_fmt[jan_or_jul] = '%b' + info_fmt[year_start] = '%b\n%Y' + # Case 5. Less than 11 years ................ + elif span <= 11 * periodsperyear: + year_start = period_break(dates_, 'year') + quarter_start = period_break(dates_, 'quarter') + info_maj[year_start] = True + info_min[quarter_start] = True + info_min[year_start] = False + info_fmt[year_start] = '%Y' + # Case 6. More than 12 years ................ + else: + year_start = period_break(dates_, 'year') + year_break = dates_[year_start].year + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(year_break % maj_anndef == 0)] + info_maj[major_idx] = True + minor_idx = year_start[(year_break % min_anndef == 0)] + info_min[minor_idx] = True + info_fmt[major_idx] = '%Y' + + return info + + +def _monthly_finder(vmin, vmax, freq): + periodsperyear = 12 + + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + # Initialize the output + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + dates_ = info['val'] + info['fmt'] = '' + year_start = (dates_ % 12 == 0).nonzero()[0] + info_maj = info['maj'] + info_fmt = info['fmt'] + + if span <= 1.15 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + info_fmt[:] = '%b' + info_fmt[year_start] = '%b\n%Y' + + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = '%b\n%Y' + + elif span <= 2.5 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + # TODO: Check the following : is it really info['fmt'] ? + info['fmt'][quarter_start] = True + info['min'] = True + + info_fmt[quarter_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + + elif span <= 4 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) + info_fmt[jan_or_jul] = '%b' + info_fmt[year_start] = '%b\n%Y' + + elif span <= 11 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + info['min'][quarter_start] = True + + info_fmt[year_start] = '%Y' + + else: + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + years = dates_[year_start] // 12 + 1 + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info['min'][year_start[(years % min_anndef == 0)]] = True + + info_fmt[major_idx] = '%Y' + + return info + + +def _quarterly_finder(vmin, vmax, freq): + periodsperyear = 4 + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + info['fmt'] = '' + dates_ = info['val'] + info_maj = info['maj'] + info_fmt = info['fmt'] + year_start = (dates_ % 4 == 0).nonzero()[0] + + if span <= 3.5 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + info_fmt[:] = 'Q%q' + info_fmt[year_start] = 'Q%q\n%F' + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = 'Q%q\n%F' + + elif span <= 11 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + info_fmt[year_start] = '%F' + + else: + years = dates_[year_start] // 4 + 1 + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info['min'][year_start[(years % min_anndef == 0)]] = True + info_fmt[major_idx] = '%F' + + return info + + +def _annual_finder(vmin, vmax, freq): + (vmin, vmax) = (int(vmin), int(vmax + 1)) + span = vmax - vmin + 1 + + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + info['fmt'] = '' + dates_ = info['val'] + + (min_anndef, maj_anndef) = _get_default_annual_spacing(span) + major_idx = dates_ % maj_anndef == 0 + info['maj'][major_idx] = True + info['min'][(dates_ % min_anndef == 0)] = True + info['fmt'][major_idx] = '%Y' + + return info + + +def get_finder(freq): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + fgroup = resolution.get_freq_group(freq) + + if fgroup == FreqGroup.FR_ANN: + return _annual_finder + elif fgroup == FreqGroup.FR_QTR: + return _quarterly_finder + elif freq == FreqGroup.FR_MTH: + return _monthly_finder + elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): + return _daily_finder + else: # pragma: no cover + errmsg = "Unsupported frequency: {freq}".format(freq=freq) + raise NotImplementedError(errmsg) + + +class TimeSeries_DateLocator(Locator): + """ + Locates the ticks along an axis controlled by a :class:`Series`. + + Parameters + ---------- + freq : {var} + Valid frequency specifier. + minor_locator : {False, True}, optional + Whether the locator is for minor ticks (True) or not. + dynamic_mode : {True, False}, optional + Whether the locator should work in dynamic mode. + base : {int}, optional + quarter : {int}, optional + month : {int}, optional + day : {int}, optional + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, + base=1, quarter=1, month=1, day=1, plot_obj=None): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + self.freq = freq + self.base = base + (self.quarter, self.month, self.day) = (quarter, month, day) + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _get_default_locs(self, vmin, vmax): + "Returns the default locations of ticks." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + + locator = self.plot_obj.date_axis_info + + if self.isminor: + return np.compress(locator['min'], locator['val']) + return np.compress(locator['maj'], locator['val']) + + def __call__(self): + 'Return the locations of the ticks.' + # axis calls Locator.set_axis inside set_m_formatter + _check_implicitly_registered() + + vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + vmin, vmax = vi + if vmax < vmin: + vmin, vmax = vmax, vmin + if self.isdynamic: + locs = self._get_default_locs(vmin, vmax) + else: # pragma: no cover + base = self.base + (d, m) = divmod(vmin, base) + vmin = (d + 1) * base + locs = lrange(vmin, vmax + 1, base) + return locs + + def autoscale(self): + """ + Sets the view limits to the nearest multiples of base that contain the + data. + """ + # requires matplotlib >= 0.98.0 + (vmin, vmax) = self.axis.get_data_interval() + + locs = self._get_default_locs(vmin, vmax) + (vmin, vmax) = locs[[0, -1]] + if vmin == vmax: + vmin -= 1 + vmax += 1 + return nonsingular(vmin, vmax) + +# ------------------------------------------------------------------------- +# --- Formatter --- +# ------------------------------------------------------------------------- + + +class TimeSeries_DateFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`PeriodIndex`. + + Parameters + ---------- + freq : {int, string} + Valid frequency specifier. + minor_locator : {False, True} + Whether the current formatter should apply to minor ticks (True) or + major ticks (False). + dynamic_mode : {True, False} + Whether the formatter works in dynamic mode or not. + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, + plot_obj=None): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + self.format = None + self.freq = freq + self.locs = [] + self.formatdict = None + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _set_default_format(self, vmin, vmax): + "Returns the default ticks spacing." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + info = self.plot_obj.date_axis_info + + if self.isminor: + format = np.compress(info['min'] & np.logical_not(info['maj']), + info) + else: + format = np.compress(info['maj'], info) + self.formatdict = {x: f for (x, _, _, f) in format} + return self.formatdict + + def set_locs(self, locs): + 'Sets the locations of the ticks' + # don't actually use the locs. This is just needed to work with + # matplotlib. Force to use vmin, vmax + _check_implicitly_registered() + + self.locs = locs + + (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + if vmax < vmin: + (vmin, vmax) = (vmax, vmin) + self._set_default_format(vmin, vmax) + + def __call__(self, x, pos=0): + _check_implicitly_registered() + + if self.formatdict is None: + return '' + else: + fmt = self.formatdict.pop(x, '') + return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + def __call__(self, x, pos=0): + _check_implicitly_registered() + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py new file mode 100644 index 0000000000000..520c6cecce6d7 --- /dev/null +++ b/pandas/plotting/_core.py @@ -0,0 +1,2966 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +import re +from collections import namedtuple +from distutils.version import LooseVersion + +import numpy as np + +from pandas.util._decorators import cache_readonly +import pandas.core.common as com +from pandas.core.base import PandasObject +from pandas.core.config import get_option +from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike +from pandas.core.dtypes.common import ( + is_list_like, + is_integer, + is_number, + is_hashable, + is_iterator) +from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame + +from pandas.core.generic import _shared_docs, _shared_doc_kwargs +from pandas.core.index import Index, MultiIndex + +from pandas.core.indexes.period import PeriodIndex +from pandas.compat import range, lrange, map, zip, string_types +import pandas.compat as compat +from pandas.io.formats.printing import pprint_thing +from pandas.util._decorators import Appender + +from pandas.plotting._compat import (_mpl_ge_1_3_1, + _mpl_ge_1_5_0, + _mpl_ge_2_0_0) +from pandas.plotting._style import (plot_params, + _get_standard_colors) +from pandas.plotting._tools import (_subplots, _flatten, table, + _handle_shared_axes, _get_all_lines, + _get_xlim, _set_ticks_props, + format_date_labels) + +try: + from pandas.plotting import _converter +except ImportError: + pass +else: + if get_option('plotting.matplotlib.register_converters'): + _converter.register(explicit=True) + + +def _get_standard_kind(kind): + return {'density': 'kde'}.get(kind, kind) + + +def _gca(rc=None): + import matplotlib.pyplot as plt + with plt.rc_context(rc): + return plt.gca() + + +def _gcf(): + import matplotlib.pyplot as plt + return plt.gcf() + + +class MPLPlot(object): + """ + Base class for assembling a pandas plot using matplotlib + + Parameters + ---------- + data : + + """ + + @property + def _kind(self): + """Specify kind str. Must be overridden in child class""" + raise NotImplementedError + + _layout_type = 'vertical' + _default_rot = 0 + orientation = None + _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog', + 'mark_right', 'stacked'] + _attr_defaults = {'logy': False, 'logx': False, 'loglog': False, + 'mark_right': True, 'stacked': False} + + def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, + sharey=False, use_index=True, + figsize=None, grid=None, legend=True, rot=None, + ax=None, fig=None, title=None, xlim=None, ylim=None, + xticks=None, yticks=None, + sort_columns=False, fontsize=None, + secondary_y=False, colormap=None, + table=False, layout=None, **kwds): + + _converter._WARN = False + self.data = data + self.by = by + + self.kind = kind + + self.sort_columns = sort_columns + + self.subplots = subplots + + if sharex is None: + if ax is None: + self.sharex = True + else: + # if we get an axis, the users should do the visibility + # setting... + self.sharex = False + else: + self.sharex = sharex + + self.sharey = sharey + self.figsize = figsize + self.layout = layout + + self.xticks = xticks + self.yticks = yticks + self.xlim = xlim + self.ylim = ylim + self.title = title + self.use_index = use_index + + self.fontsize = fontsize + + if rot is not None: + self.rot = rot + # need to know for format_date_labels since it's rotated to 30 by + # default + self._rot_set = True + else: + self._rot_set = False + self.rot = self._default_rot + + if grid is None: + grid = False if secondary_y else self.plt.rcParams['axes.grid'] + + self.grid = grid + self.legend = legend + self.legend_handles = [] + self.legend_labels = [] + + for attr in self._pop_attributes: + value = kwds.pop(attr, self._attr_defaults.get(attr, None)) + setattr(self, attr, value) + + self.ax = ax + self.fig = fig + self.axes = None + + # parse errorbar input if given + xerr = kwds.pop('xerr', None) + yerr = kwds.pop('yerr', None) + self.errors = {} + for kw, err in zip(['xerr', 'yerr'], [xerr, yerr]): + self.errors[kw] = self._parse_errorbars(kw, err) + + if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, Index)): + secondary_y = [secondary_y] + self.secondary_y = secondary_y + + # ugly TypeError if user passes matplotlib's `cmap` name. + # Probably better to accept either. + if 'cmap' in kwds and colormap: + raise TypeError("Only specify one of `cmap` and `colormap`.") + elif 'cmap' in kwds: + self.colormap = kwds.pop('cmap') + else: + self.colormap = colormap + + self.table = table + + self.kwds = kwds + + self._validate_color_args() + + def _validate_color_args(self): + if 'color' not in self.kwds and 'colors' in self.kwds: + warnings.warn(("'colors' is being deprecated. Please use 'color'" + "instead of 'colors'")) + colors = self.kwds.pop('colors') + self.kwds['color'] = colors + + if ('color' in self.kwds and self.nseries == 1 and + not is_list_like(self.kwds['color'])): + # support series.plot(color='green') + self.kwds['color'] = [self.kwds['color']] + + if ('color' in self.kwds and isinstance(self.kwds['color'], tuple) and + self.nseries == 1 and len(self.kwds['color']) in (3, 4)): + # support RGB and RGBA tuples in series plot + self.kwds['color'] = [self.kwds['color']] + + if ('color' in self.kwds or 'colors' in self.kwds) and \ + self.colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + + if 'color' in self.kwds and self.style is not None: + if is_list_like(self.style): + styles = self.style + else: + styles = [self.style] + # need only a single match + for s in styles: + if re.match('^[a-z]+?', s) is not None: + raise ValueError( + "Cannot pass 'style' string with a color " + "symbol and 'color' keyword argument. Please" + " use one or the other or pass 'style' " + "without a color symbol") + + def _iter_data(self, data=None, keep_index=False, fillna=None): + if data is None: + data = self.data + if fillna is not None: + data = data.fillna(fillna) + + # TODO: unused? + # if self.sort_columns: + # columns = com._try_sort(data.columns) + # else: + # columns = data.columns + + for col, values in data.iteritems(): + if keep_index is True: + yield col, values + else: + yield col, values.values + + @property + def nseries(self): + if self.data.ndim == 1: + return 1 + else: + return self.data.shape[1] + + def draw(self): + self.plt.draw_if_interactive() + + def generate(self): + self._args_adjust() + self._compute_plot_data() + self._setup_subplots() + self._make_plot() + self._add_table() + self._make_legend() + self._adorn_subplots() + + for ax in self.axes: + self._post_plot_logic_common(ax, self.data) + self._post_plot_logic(ax, self.data) + + def _args_adjust(self): + pass + + def _has_plotted_object(self, ax): + """check whether ax has data""" + return (len(ax.lines) != 0 or + len(ax.artists) != 0 or + len(ax.containers) != 0) + + def _maybe_right_yaxis(self, ax, axes_num): + if not self.on_right(axes_num): + # secondary axes may be passed via ax kw + return self._get_ax_layer(ax) + + if hasattr(ax, 'right_ax'): + # if it has right_ax proparty, ``ax`` must be left axes + return ax.right_ax + elif hasattr(ax, 'left_ax'): + # if it has left_ax proparty, ``ax`` must be right axes + return ax + else: + # otherwise, create twin axes + orig_ax, new_ax = ax, ax.twinx() + # TODO: use Matplotlib public API when available + new_ax._get_lines = orig_ax._get_lines + new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill + orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax + + if not self._has_plotted_object(orig_ax): # no data on left y + orig_ax.get_yaxis().set_visible(False) + return new_ax + + def _setup_subplots(self): + if self.subplots: + fig, axes = _subplots(naxes=self.nseries, + sharex=self.sharex, sharey=self.sharey, + figsize=self.figsize, ax=self.ax, + layout=self.layout, + layout_type=self._layout_type) + else: + if self.ax is None: + fig = self.plt.figure(figsize=self.figsize) + axes = fig.add_subplot(111) + else: + fig = self.ax.get_figure() + if self.figsize is not None: + fig.set_size_inches(self.figsize) + axes = self.ax + + axes = _flatten(axes) + + if self.logx or self.loglog: + [a.set_xscale('log') for a in axes] + if self.logy or self.loglog: + [a.set_yscale('log') for a in axes] + + self.fig = fig + self.axes = axes + + @property + def result(self): + """ + Return result axes + """ + if self.subplots: + if self.layout is not None and not is_list_like(self.ax): + return self.axes.reshape(*self.layout) + else: + return self.axes + else: + sec_true = isinstance(self.secondary_y, bool) and self.secondary_y + all_sec = (is_list_like(self.secondary_y) and + len(self.secondary_y) == self.nseries) + if (sec_true or all_sec): + # if all data is plotted on secondary, return right axes + return self._get_ax_layer(self.axes[0], primary=False) + else: + return self.axes[0] + + def _compute_plot_data(self): + data = self.data + + if isinstance(data, ABCSeries): + label = self.label + if label is None and data.name is None: + label = 'None' + data = data.to_frame(name=label) + + # GH16953, _convert is needed as fallback, for ``Series`` + # with ``dtype == object`` + data = data._convert(datetime=True, timedelta=True) + numeric_data = data.select_dtypes(include=[np.number, + "datetime", + "datetimetz", + "timedelta"]) + + try: + is_empty = numeric_data.empty + except AttributeError: + is_empty = not len(numeric_data) + + # no empty frames or series allowed + if is_empty: + raise TypeError('Empty {0!r}: no numeric data to ' + 'plot'.format(numeric_data.__class__.__name__)) + + self.data = numeric_data + + def _make_plot(self): + raise com.AbstractMethodError(self) + + def _add_table(self): + if self.table is False: + return + elif self.table is True: + data = self.data.transpose() + else: + data = self.table + ax = self._get_ax(0) + table(ax, data) + + def _post_plot_logic_common(self, ax, data): + """Common post process for each axes""" + + def get_label(i): + try: + return pprint_thing(data.index[i]) + except Exception: + return '' + + if self.orientation == 'vertical' or self.orientation is None: + if self._need_to_set_index: + xticklabels = [get_label(x) for x in ax.get_xticks()] + ax.set_xticklabels(xticklabels) + self._apply_axis_properties(ax.xaxis, rot=self.rot, + fontsize=self.fontsize) + self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) + + if hasattr(ax, 'right_ax'): + self._apply_axis_properties(ax.right_ax.yaxis, + fontsize=self.fontsize) + + elif self.orientation == 'horizontal': + if self._need_to_set_index: + yticklabels = [get_label(y) for y in ax.get_yticks()] + ax.set_yticklabels(yticklabels) + self._apply_axis_properties(ax.yaxis, rot=self.rot, + fontsize=self.fontsize) + self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) + + if hasattr(ax, 'right_ax'): + self._apply_axis_properties(ax.right_ax.yaxis, + fontsize=self.fontsize) + else: # pragma no cover + raise ValueError + + def _post_plot_logic(self, ax, data): + """Post process for each axes. Overridden in child classes""" + pass + + def _adorn_subplots(self): + """Common post process unrelated to data""" + if len(self.axes) > 0: + all_axes = self._get_subplots() + nrows, ncols = self._get_axes_layout() + _handle_shared_axes(axarr=all_axes, nplots=len(all_axes), + naxes=nrows * ncols, nrows=nrows, + ncols=ncols, sharex=self.sharex, + sharey=self.sharey) + + for ax in self.axes: + if self.yticks is not None: + ax.set_yticks(self.yticks) + + if self.xticks is not None: + ax.set_xticks(self.xticks) + + if self.ylim is not None: + ax.set_ylim(self.ylim) + + if self.xlim is not None: + ax.set_xlim(self.xlim) + + ax.grid(self.grid) + + if self.title: + if self.subplots: + if is_list_like(self.title): + if len(self.title) != self.nseries: + msg = ('The length of `title` must equal the number ' + 'of columns if using `title` of type `list` ' + 'and `subplots=True`.\n' + 'length of title = {}\n' + 'number of columns = {}').format( + len(self.title), self.nseries) + raise ValueError(msg) + + for (ax, title) in zip(self.axes, self.title): + ax.set_title(title) + else: + self.fig.suptitle(self.title) + else: + if is_list_like(self.title): + msg = ('Using `title` of type `list` is not supported ' + 'unless `subplots=True` is passed') + raise ValueError(msg) + self.axes[0].set_title(self.title) + + def _apply_axis_properties(self, axis, rot=None, fontsize=None): + labels = axis.get_majorticklabels() + axis.get_minorticklabels() + for label in labels: + if rot is not None: + label.set_rotation(rot) + if fontsize is not None: + label.set_fontsize(fontsize) + + @property + def legend_title(self): + if not isinstance(self.data.columns, MultiIndex): + name = self.data.columns.name + if name is not None: + name = pprint_thing(name) + return name + else: + stringified = map(pprint_thing, + self.data.columns.names) + return ','.join(stringified) + + def _add_legend_handle(self, handle, label, index=None): + if label is not None: + if self.mark_right and index is not None: + if self.on_right(index): + label = label + ' (right)' + self.legend_handles.append(handle) + self.legend_labels.append(label) + + def _make_legend(self): + ax, leg = self._get_ax_legend(self.axes[0]) + + handles = [] + labels = [] + title = '' + + if not self.subplots: + if leg is not None: + title = leg.get_title().get_text() + handles = leg.legendHandles + labels = [x.get_text() for x in leg.get_texts()] + + if self.legend: + if self.legend == 'reverse': + self.legend_handles = reversed(self.legend_handles) + self.legend_labels = reversed(self.legend_labels) + + handles += self.legend_handles + labels += self.legend_labels + if self.legend_title is not None: + title = self.legend_title + + if len(handles) > 0: + ax.legend(handles, labels, loc='best', title=title) + + elif self.subplots and self.legend: + for ax in self.axes: + if ax.get_visible(): + ax.legend(loc='best') + + def _get_ax_legend(self, ax): + leg = ax.get_legend() + other_ax = (getattr(ax, 'left_ax', None) or + getattr(ax, 'right_ax', None)) + other_leg = None + if other_ax is not None: + other_leg = other_ax.get_legend() + if leg is None and other_leg is not None: + leg = other_leg + ax = other_ax + return ax, leg + + @cache_readonly + def plt(self): + import matplotlib.pyplot as plt + return plt + + @staticmethod + def mpl_ge_1_3_1(): + return _mpl_ge_1_3_1() + + @staticmethod + def mpl_ge_1_5_0(): + return _mpl_ge_1_5_0() + + _need_to_set_index = False + + def _get_xticks(self, convert_period=False): + index = self.data.index + is_datetype = index.inferred_type in ('datetime', 'date', + 'datetime64', 'time') + + if self.use_index: + if convert_period and isinstance(index, PeriodIndex): + self.data = self.data.reindex(index=index.sort_values()) + x = self.data.index.to_timestamp()._mpl_repr() + elif index.is_numeric(): + """ + Matplotlib supports numeric values or datetime objects as + xaxis values. Taking LBYL approach here, by the time + matplotlib raises exception when using non numeric/datetime + values for xaxis, several actions are already taken by plt. + """ + x = index._mpl_repr() + elif is_datetype: + self.data = self.data[notna(self.data.index)] + self.data = self.data.sort_index() + x = self.data.index._mpl_repr() + else: + self._need_to_set_index = True + x = lrange(len(index)) + else: + x = lrange(len(index)) + + return x + + @classmethod + def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): + mask = isna(y) + if mask.any(): + y = np.ma.array(y) + y = np.ma.masked_where(mask, y) + + if isinstance(x, Index): + x = x._mpl_repr() + + if is_errorbar: + if 'xerr' in kwds: + kwds['xerr'] = np.array(kwds.get('xerr')) + if 'yerr' in kwds: + kwds['yerr'] = np.array(kwds.get('yerr')) + return ax.errorbar(x, y, **kwds) + else: + # prevent style kwarg from going to errorbar, where it is + # unsupported + if style is not None: + args = (x, y, style) + else: + args = (x, y) + return ax.plot(*args, **kwds) + + def _get_index_name(self): + if isinstance(self.data.index, MultiIndex): + name = self.data.index.names + if com._any_not_none(*name): + name = ','.join(pprint_thing(x) for x in name) + else: + name = None + else: + name = self.data.index.name + if name is not None: + name = pprint_thing(name) + + return name + + @classmethod + def _get_ax_layer(cls, ax, primary=True): + """get left (primary) or right (secondary) axes""" + if primary: + return getattr(ax, 'left_ax', ax) + else: + return getattr(ax, 'right_ax', ax) + + def _get_ax(self, i): + # get the twinx ax if appropriate + if self.subplots: + ax = self.axes[i] + ax = self._maybe_right_yaxis(ax, i) + self.axes[i] = ax + else: + ax = self.axes[0] + ax = self._maybe_right_yaxis(ax, i) + + ax.get_yaxis().set_visible(True) + return ax + + def on_right(self, i): + if isinstance(self.secondary_y, bool): + return self.secondary_y + + if isinstance(self.secondary_y, (tuple, list, np.ndarray, Index)): + return self.data.columns[i] in self.secondary_y + + def _apply_style_colors(self, colors, kwds, col_num, label): + """ + Manage style and color based on column number and its label. + Returns tuple of appropriate style and kwds which "color" may be added. + """ + style = None + if self.style is not None: + if isinstance(self.style, list): + try: + style = self.style[col_num] + except IndexError: + pass + elif isinstance(self.style, dict): + style = self.style.get(label, style) + else: + style = self.style + + has_color = 'color' in kwds or self.colormap is not None + nocolor_style = style is None or re.match('[a-z]+', style) is None + if (has_color or self.subplots) and nocolor_style: + kwds['color'] = colors[col_num % len(colors)] + return style, kwds + + def _get_colors(self, num_colors=None, color_kwds='color'): + if num_colors is None: + num_colors = self.nseries + + return _get_standard_colors(num_colors=num_colors, + colormap=self.colormap, + color=self.kwds.get(color_kwds)) + + def _parse_errorbars(self, label, err): + """ + Look for error keyword arguments and return the actual errorbar data + or return the error DataFrame/dict + + Error bars can be specified in several ways: + Series: the user provides a pandas.Series object of the same + length as the data + ndarray: provides a np.ndarray of the same length as the data + DataFrame/dict: error values are paired with keys matching the + key in the plotted DataFrame + str: the name of the column within the plotted DataFrame + """ + + if err is None: + return None + + from pandas import DataFrame, Series + + def match_labels(data, e): + e = e.reindex(data.index) + return e + + # key-matched DataFrame + if isinstance(err, DataFrame): + + err = match_labels(self.data, err) + # key-matched dict + elif isinstance(err, dict): + pass + + # Series of error values + elif isinstance(err, Series): + # broadcast error series across data + err = match_labels(self.data, err) + err = np.atleast_2d(err) + err = np.tile(err, (self.nseries, 1)) + + # errors are a column in the dataframe + elif isinstance(err, string_types): + evalues = self.data[err].values + self.data = self.data[self.data.columns.drop(err)] + err = np.atleast_2d(evalues) + err = np.tile(err, (self.nseries, 1)) + + elif is_list_like(err): + if is_iterator(err): + err = np.atleast_2d(list(err)) + else: + # raw error values + err = np.atleast_2d(err) + + err_shape = err.shape + + # asymmetrical error bars + if err.ndim == 3: + if (err_shape[0] != self.nseries) or \ + (err_shape[1] != 2) or \ + (err_shape[2] != len(self.data)): + msg = "Asymmetrical error bars should be provided " + \ + "with the shape (%u, 2, %u)" % \ + (self.nseries, len(self.data)) + raise ValueError(msg) + + # broadcast errors to each data series + if len(err) == 1: + err = np.tile(err, (self.nseries, 1)) + + elif is_number(err): + err = np.tile([err], (self.nseries, len(self.data))) + + else: + msg = "No valid {label} detected".format(label=label) + raise ValueError(msg) + + return err + + def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): + from pandas import DataFrame + errors = {} + + for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]): + if flag: + err = self.errors[kw] + # user provided label-matched dataframe of errors + if isinstance(err, (DataFrame, dict)): + if label is not None and label in err.keys(): + err = err[label] + else: + err = None + elif index is not None and err is not None: + err = err[index] + + if err is not None: + errors[kw] = err + return errors + + def _get_subplots(self): + from matplotlib.axes import Subplot + return [ax for ax in self.axes[0].get_figure().get_axes() + if isinstance(ax, Subplot)] + + def _get_axes_layout(self): + axes = self._get_subplots() + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + +class PlanePlot(MPLPlot): + """ + Abstract class for plotting on plane, currently scatter and hexbin. + """ + + _layout_type = 'single' + + def __init__(self, data, x, y, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if x is None or y is None: + raise ValueError(self._kind + ' requires and x and y column') + if is_integer(x) and not self.data.columns.holds_integer(): + x = self.data.columns[x] + if is_integer(y) and not self.data.columns.holds_integer(): + y = self.data.columns[y] + if len(self.data[x]._get_numeric_data()) == 0: + raise ValueError(self._kind + ' requires x column to be numeric') + if len(self.data[y]._get_numeric_data()) == 0: + raise ValueError(self._kind + ' requires y column to be numeric') + + self.x = x + self.y = y + + @property + def nseries(self): + return 1 + + def _post_plot_logic(self, ax, data): + x, y = self.x, self.y + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) + + +class ScatterPlot(PlanePlot): + _kind = 'scatter' + + def __init__(self, data, x, y, s=None, c=None, **kwargs): + if s is None: + # hide the matplotlib default for size, in case we want to change + # the handling of this argument later + s = 20 + super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs) + if is_integer(c) and not self.data.columns.holds_integer(): + c = self.data.columns[c] + self.c = c + + def _make_plot(self): + x, y, c, data = self.x, self.y, self.c, self.data + ax = self.axes[0] + + c_is_column = is_hashable(c) and c in self.data.columns + + # plot a colorbar only if a colormap is provided or necessary + cb = self.kwds.pop('colorbar', self.colormap or c_is_column) + + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or 'Greys' + cmap = self.plt.cm.get_cmap(cmap) + color = self.kwds.pop("color", None) + if c is not None and color is not None: + raise TypeError('Specify exactly one of `c` and `color`') + elif c is None and color is None: + c_values = self.plt.rcParams['patch.facecolor'] + elif color is not None: + c_values = color + elif c_is_column: + c_values = self.data[c].values + else: + c_values = c + + if self.legend and hasattr(self, 'label'): + label = self.label + else: + label = None + scatter = ax.scatter(data[x].values, data[y].values, c=c_values, + label=label, cmap=cmap, **self.kwds) + if cb: + img = ax.collections[0] + kws = dict(ax=ax) + if self.mpl_ge_1_3_1(): + kws['label'] = c if c_is_column else '' + self.fig.colorbar(img, **kws) + + if label is not None: + self._add_legend_handle(scatter, label) + else: + self.legend = False + + errors_x = self._get_errorbars(label=x, index=0, yerr=False) + errors_y = self._get_errorbars(label=y, index=0, xerr=False) + if len(errors_x) > 0 or len(errors_y) > 0: + err_kwds = dict(errors_x, **errors_y) + err_kwds['ecolor'] = scatter.get_facecolor()[0] + ax.errorbar(data[x].values, data[y].values, + linestyle='none', **err_kwds) + + +class HexBinPlot(PlanePlot): + _kind = 'hexbin' + + def __init__(self, data, x, y, C=None, **kwargs): + super(HexBinPlot, self).__init__(data, x, y, **kwargs) + if is_integer(C) and not self.data.columns.holds_integer(): + C = self.data.columns[C] + self.C = C + + def _make_plot(self): + x, y, data, C = self.x, self.y, self.data, self.C + ax = self.axes[0] + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or 'BuGn' + cmap = self.plt.cm.get_cmap(cmap) + cb = self.kwds.pop('colorbar', True) + + if C is None: + c_values = None + else: + c_values = data[C].values + + ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, + **self.kwds) + if cb: + img = ax.collections[0] + self.fig.colorbar(img, ax=ax) + + def _make_legend(self): + pass + + +class LinePlot(MPLPlot): + _kind = 'line' + _default_rot = 0 + orientation = 'vertical' + + def __init__(self, data, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if self.stacked: + self.data = self.data.fillna(value=0) + self.x_compat = plot_params['x_compat'] + if 'x_compat' in self.kwds: + self.x_compat = bool(self.kwds.pop('x_compat')) + + def _is_ts_plot(self): + # this is slightly deceptive + return not self.x_compat and self.use_index and self._use_dynamic_x() + + def _use_dynamic_x(self): + from pandas.plotting._timeseries import _use_dynamic_x + return _use_dynamic_x(self._get_ax(0), self.data) + + def _make_plot(self): + if self._is_ts_plot(): + from pandas.plotting._timeseries import _maybe_convert_index + data = _maybe_convert_index(self._get_ax(0), self.data) + + x = data.index # dummy, not used + plotf = self._ts_plot + it = self._iter_data(data=data, keep_index=True) + else: + x = self._get_xticks(convert_period=True) + plotf = self._plot + it = self._iter_data() + + stacking_id = self._get_stacking_id() + is_errorbar = com._any_not_none(*self.errors.values()) + + colors = self._get_colors() + for i, (label, y) in enumerate(it): + ax = self._get_ax(i) + kwds = self.kwds.copy() + style, kwds = self._apply_style_colors(colors, kwds, i, label) + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) # .encode('utf-8') + kwds['label'] = label + + newlines = plotf(ax, x, y, style=style, column_num=i, + stacking_id=stacking_id, + is_errorbar=is_errorbar, + **kwds) + self._add_legend_handle(newlines[0], label, index=i) + + if not _mpl_ge_2_0_0(): + lines = _get_all_lines(ax) + left, right = _get_xlim(lines) + ax.set_xlim(left, right) + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, + stacking_id=None, **kwds): + # column_num is used to get the target column from protf in line and + # area plots + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) + lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) + cls._update_stacker(ax, stacking_id, y) + return lines + + @classmethod + def _ts_plot(cls, ax, x, data, style=None, **kwds): + from pandas.plotting._timeseries import (_maybe_resample, + _decorate_axes, + format_dateaxis) + # accept x to be consistent with normal plot func, + # x is not passed to tsplot as it uses data.index as x coordinate + # column_num must be in kwds for stacking purpose + freq, data = _maybe_resample(data, ax, kwds) + + # Set ax with freq info + _decorate_axes(ax, freq, kwds) + # digging deeper + if hasattr(ax, 'left_ax'): + _decorate_axes(ax.left_ax, freq, kwds) + if hasattr(ax, 'right_ax'): + _decorate_axes(ax.right_ax, freq, kwds) + ax._plot_data.append((data, cls._kind, kwds)) + + lines = cls._plot(ax, data.index, data.values, style=style, **kwds) + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, data.index) + return lines + + def _get_stacking_id(self): + if self.stacked: + return id(self.data) + else: + return None + + @classmethod + def _initialize_stacker(cls, ax, stacking_id, n): + if stacking_id is None: + return + if not hasattr(ax, '_stacker_pos_prior'): + ax._stacker_pos_prior = {} + if not hasattr(ax, '_stacker_neg_prior'): + ax._stacker_neg_prior = {} + ax._stacker_pos_prior[stacking_id] = np.zeros(n) + ax._stacker_neg_prior[stacking_id] = np.zeros(n) + + @classmethod + def _get_stacked_values(cls, ax, stacking_id, values, label): + if stacking_id is None: + return values + if not hasattr(ax, '_stacker_pos_prior'): + # stacker may not be initialized for subplots + cls._initialize_stacker(ax, stacking_id, len(values)) + + if (values >= 0).all(): + return ax._stacker_pos_prior[stacking_id] + values + elif (values <= 0).all(): + return ax._stacker_neg_prior[stacking_id] + values + + raise ValueError('When stacked is True, each column must be either ' + 'all positive or negative.' + '{0} contains both positive and negative values' + .format(label)) + + @classmethod + def _update_stacker(cls, ax, stacking_id, values): + if stacking_id is None: + return + if (values >= 0).all(): + ax._stacker_pos_prior[stacking_id] += values + elif (values <= 0).all(): + ax._stacker_neg_prior[stacking_id] += values + + def _post_plot_logic(self, ax, data): + condition = (not self._use_dynamic_x() and + data.index.is_all_dates and + not self.subplots or + (self.subplots and self.sharex)) + + index_name = self._get_index_name() + + if condition: + # irregular TS rotated 30 deg. by default + # probably a better place to check / set this. + if not self._rot_set: + self.rot = 30 + format_date_labels(ax, rot=self.rot) + + if index_name is not None and self.use_index: + ax.set_xlabel(index_name) + + +class AreaPlot(LinePlot): + _kind = 'area' + + def __init__(self, data, **kwargs): + kwargs.setdefault('stacked', True) + data = data.fillna(value=0) + LinePlot.__init__(self, data, **kwargs) + + if not self.stacked: + # use smaller alpha to distinguish overlap + self.kwds.setdefault('alpha', 0.5) + + if self.logy or self.loglog: + raise ValueError("Log-y scales are not supported in area plot") + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, + stacking_id=None, is_errorbar=False, **kwds): + + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) + + # need to remove label, because subplots uses mpl legend as it is + line_kwds = kwds.copy() + if cls.mpl_ge_1_5_0(): + line_kwds.pop('label') + lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) + + # get data from the line to get coordinates for fill_between + xdata, y_values = lines[0].get_data(orig=False) + + # unable to use ``_get_stacked_values`` here to get starting point + if stacking_id is None: + start = np.zeros(len(y)) + elif (y >= 0).all(): + start = ax._stacker_pos_prior[stacking_id] + elif (y <= 0).all(): + start = ax._stacker_neg_prior[stacking_id] + else: + start = np.zeros(len(y)) + + if 'color' not in kwds: + kwds['color'] = lines[0].get_color() + + rect = ax.fill_between(xdata, start, y_values, **kwds) + cls._update_stacker(ax, stacking_id, y) + + # LinePlot expects list of artists + res = [rect] if cls.mpl_ge_1_5_0() else lines + return res + + def _add_legend_handle(self, handle, label, index=None): + if not self.mpl_ge_1_5_0(): + from matplotlib.patches import Rectangle + # Because fill_between isn't supported in legend, + # specifically add Rectangle handle here + alpha = self.kwds.get('alpha', None) + handle = Rectangle((0, 0), 1, 1, fc=handle.get_color(), + alpha=alpha) + LinePlot._add_legend_handle(self, handle, label, index=index) + + def _post_plot_logic(self, ax, data): + LinePlot._post_plot_logic(self, ax, data) + + if self.ylim is None: + if (data >= 0).all().all(): + ax.set_ylim(0, None) + elif (data <= 0).all().all(): + ax.set_ylim(None, 0) + + +class BarPlot(MPLPlot): + _kind = 'bar' + _default_rot = 90 + orientation = 'vertical' + + def __init__(self, data, **kwargs): + # we have to treat a series differently than a + # 1-column DataFrame w.r.t. color handling + self._is_series = isinstance(data, ABCSeries) + self.bar_width = kwargs.pop('width', 0.5) + pos = kwargs.pop('position', 0.5) + kwargs.setdefault('align', 'center') + self.tick_pos = np.arange(len(data)) + + self.bottom = kwargs.pop('bottom', 0) + self.left = kwargs.pop('left', 0) + + self.log = kwargs.pop('log', False) + MPLPlot.__init__(self, data, **kwargs) + + if self.stacked or self.subplots: + self.tickoffset = self.bar_width * pos + if kwargs['align'] == 'edge': + self.lim_offset = self.bar_width / 2 + else: + self.lim_offset = 0 + else: + if kwargs['align'] == 'edge': + w = self.bar_width / self.nseries + self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 + self.lim_offset = w * 0.5 + else: + self.tickoffset = self.bar_width * pos + self.lim_offset = 0 + + self.ax_pos = self.tick_pos - self.tickoffset + + def _args_adjust(self): + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + if is_list_like(self.left): + self.left = np.array(self.left) + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.bar(x, y, w, bottom=start, log=log, **kwds) + + @property + def _start_base(self): + return self.bottom + + def _make_plot(self): + import matplotlib as mpl + + colors = self._get_colors() + ncolors = len(colors) + + pos_prior = neg_prior = np.zeros(len(self.data)) + K = self.nseries + + for i, (label, y) in enumerate(self._iter_data(fillna=0)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + if self._is_series: + kwds['color'] = colors + else: + kwds['color'] = colors[i % ncolors] + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) + + if (('yerr' in kwds) or ('xerr' in kwds)) \ + and (kwds.get('ecolor') is None): + kwds['ecolor'] = mpl.rcParams['xtick.color'] + + start = 0 + if self.log and (y >= 1).all(): + start = 1 + start = start + self._start_base + + if self.subplots: + w = self.bar_width / 2 + rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, + log=self.log, **kwds) + ax.set_title(label) + elif self.stacked: + mask = y > 0 + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, + log=self.log, **kwds) + pos_prior = pos_prior + np.where(mask, y, 0) + neg_prior = neg_prior + np.where(mask, 0, y) + else: + w = self.bar_width / K + rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w, + start=start, label=label, + log=self.log, **kwds) + self._add_legend_handle(rect, label, index=i) + + def _post_plot_logic(self, ax, data): + if self.use_index: + str_index = [pprint_thing(key) for key in data.index] + else: + str_index = [pprint_thing(key) for key in range(data.shape[0])] + name = self._get_index_name() + + s_edge = self.ax_pos[0] - 0.25 + self.lim_offset + e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset + + self._decorate_ticks(ax, name, str_index, s_edge, e_edge) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + ax.set_xlim((start_edge, end_edge)) + ax.set_xticks(self.tick_pos) + ax.set_xticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_xlabel(name) + + +class BarhPlot(BarPlot): + _kind = 'barh' + _default_rot = 0 + orientation = 'horizontal' + + @property + def _start_base(self): + return self.left + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.barh(x, y, w, left=start, log=log, **kwds) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + # horizontal bars + ax.set_ylim((start_edge, end_edge)) + ax.set_yticks(self.tick_pos) + ax.set_yticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_ylabel(name) + + +class HistPlot(LinePlot): + _kind = 'hist' + + def __init__(self, data, bins=10, bottom=0, **kwargs): + self.bins = bins # use mpl default + self.bottom = bottom + # Do not call LinePlot.__init__ which may fill nan + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if is_integer(self.bins): + # create common bin edge + values = (self.data._convert(datetime=True)._get_numeric_data()) + values = np.ravel(values) + values = values[~isna(values)] + + hist, self.bins = np.histogram( + values, bins=self.bins, + range=self.kwds.get('range', None), + weights=self.kwds.get('weights', None)) + + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + + @classmethod + def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, + stacking_id=None, **kwds): + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(bins) - 1) + y = y[~isna(y)] + + base = np.zeros(len(bins) - 1) + bottom = bottom + \ + cls._get_stacked_values(ax, stacking_id, base, kwds['label']) + # ignore style + n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) + cls._update_stacker(ax, stacking_id, n) + return patches + + def _make_plot(self): + colors = self._get_colors() + stacking_id = self._get_stacking_id() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + + kwds = self.kwds.copy() + + label = pprint_thing(label) + kwds['label'] = label + + style, kwds = self._apply_style_colors(colors, kwds, i, label) + if style is not None: + kwds['style'] = style + + kwds = self._make_plot_keywords(kwds, y) + artists = self._plot(ax, y, column_num=i, + stacking_id=stacking_id, **kwds) + self._add_legend_handle(artists[0], label, index=i) + + def _make_plot_keywords(self, kwds, y): + """merge BoxPlot/KdePlot properties to passed kwds""" + # y is required for KdePlot + kwds['bottom'] = self.bottom + kwds['bins'] = self.bins + return kwds + + def _post_plot_logic(self, ax, data): + if self.orientation == 'horizontal': + ax.set_xlabel('Frequency') + else: + ax.set_ylabel('Frequency') + + @property + def orientation(self): + if self.kwds.get('orientation', None) == 'horizontal': + return 'horizontal' + else: + return 'vertical' + + +class KdePlot(HistPlot): + _kind = 'kde' + orientation = 'vertical' + + def __init__(self, data, bw_method=None, ind=None, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + self.bw_method = bw_method + self.ind = ind + + def _args_adjust(self): + pass + + def _get_ind(self, y): + if self.ind is None: + # np.nanmax() and np.nanmin() ignores the missing values + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, 1000) + elif is_integer(self.ind): + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, self.ind) + else: + ind = self.ind + return ind + + @classmethod + def _plot(cls, ax, y, style=None, bw_method=None, ind=None, + column_num=None, stacking_id=None, **kwds): + from scipy.stats import gaussian_kde + from scipy import __version__ as spv + + y = remove_na_arraylike(y) + + if LooseVersion(spv) >= '0.11.0': + gkde = gaussian_kde(y, bw_method=bw_method) + else: + gkde = gaussian_kde(y) + if bw_method is not None: + msg = ('bw_method was added in Scipy 0.11.0.' + + ' Scipy version in use is {spv}.'.format(spv=spv)) + warnings.warn(msg) + + y = gkde.evaluate(ind) + lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) + return lines + + def _make_plot_keywords(self, kwds, y): + kwds['bw_method'] = self.bw_method + kwds['ind'] = self._get_ind(y) + return kwds + + def _post_plot_logic(self, ax, data): + ax.set_ylabel('Density') + + +class PiePlot(MPLPlot): + _kind = 'pie' + _layout_type = 'horizontal' + + def __init__(self, data, kind=None, **kwargs): + data = data.fillna(value=0) + if (data < 0).any().any(): + raise ValueError("{0} doesn't allow negative values".format(kind)) + MPLPlot.__init__(self, data, kind=kind, **kwargs) + + def _args_adjust(self): + self.grid = False + self.logy = False + self.logx = False + self.loglog = False + + def _validate_color_args(self): + pass + + def _make_plot(self): + colors = self._get_colors( + num_colors=len(self.data), color_kwds='colors') + self.kwds.setdefault('colors', colors) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + if label is not None: + label = pprint_thing(label) + ax.set_ylabel(label) + + kwds = self.kwds.copy() + + def blank_labeler(label, value): + if value == 0: + return '' + else: + return label + + idx = [pprint_thing(v) for v in self.data.index] + labels = kwds.pop('labels', idx) + # labels is used for each wedge's labels + # Blank out labels for values of 0 so they don't overlap + # with nonzero wedges + if labels is not None: + blabels = [blank_labeler(l, value) for + l, value in zip(labels, y)] + else: + blabels = None + results = ax.pie(y, labels=blabels, **kwds) + + if kwds.get('autopct', None) is not None: + patches, texts, autotexts = results + else: + patches, texts = results + autotexts = [] + + if self.fontsize is not None: + for t in texts + autotexts: + t.set_fontsize(self.fontsize) + + # leglabels is used for legend labels + leglabels = labels if labels is not None else idx + for p, l in zip(patches, leglabels): + self._add_legend_handle(p, l) + + +class BoxPlot(LinePlot): + _kind = 'box' + _layout_type = 'horizontal' + + _valid_return_types = (None, 'axes', 'dict', 'both') + # namedtuple to hold results + BP = namedtuple("Boxplot", ['ax', 'lines']) + + def __init__(self, data, return_type='axes', **kwargs): + # Do not call LinePlot.__init__ which may fill nan + if return_type not in self._valid_return_types: + raise ValueError( + "return_type must be {None, 'axes', 'dict', 'both'}") + + self.return_type = return_type + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if self.subplots: + # Disable label ax sharing. Otherwise, all subplots shows last + # column label + if self.orientation == 'vertical': + self.sharex = False + else: + self.sharey = False + + @classmethod + def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): + if y.ndim == 2: + y = [remove_na_arraylike(v) for v in y] + # Boxplot fails with empty arrays, so need to add a NaN + # if any cols are empty + # GH 8181 + y = [v if v.size > 0 else np.array([np.nan]) for v in y] + else: + y = remove_na_arraylike(y) + bp = ax.boxplot(y, **kwds) + + if return_type == 'dict': + return bp, bp + elif return_type == 'both': + return cls.BP(ax=ax, lines=bp), bp + else: + return ax, bp + + def _validate_color_args(self): + if 'color' in self.kwds: + if self.colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + self.color = self.kwds.pop('color') + + if isinstance(self.color, dict): + valid_keys = ['boxes', 'whiskers', 'medians', 'caps'] + for key, values in compat.iteritems(self.color): + if key not in valid_keys: + raise ValueError("color dict contains invalid " + "key '{0}' " + "The key must be either {1}" + .format(key, valid_keys)) + else: + self.color = None + + # get standard colors for default + colors = _get_standard_colors(num_colors=3, + colormap=self.colormap, + color=None) + # use 2 colors by default, for box/whisker and median + # flier colors isn't needed here + # because it can be specified by ``sym`` kw + self._boxes_c = colors[0] + self._whiskers_c = colors[0] + self._medians_c = colors[2] + self._caps_c = 'k' # mpl default + + def _get_colors(self, num_colors=None, color_kwds='color'): + pass + + def maybe_color_bp(self, bp): + if isinstance(self.color, dict): + boxes = self.color.get('boxes', self._boxes_c) + whiskers = self.color.get('whiskers', self._whiskers_c) + medians = self.color.get('medians', self._medians_c) + caps = self.color.get('caps', self._caps_c) + else: + # Other types are forwarded to matplotlib + # If None, use default colors + boxes = self.color or self._boxes_c + whiskers = self.color or self._whiskers_c + medians = self.color or self._medians_c + caps = self.color or self._caps_c + + from matplotlib.artist import setp + setp(bp['boxes'], color=boxes, alpha=1) + setp(bp['whiskers'], color=whiskers, alpha=1) + setp(bp['medians'], color=medians, alpha=1) + setp(bp['caps'], color=caps, alpha=1) + + def _make_plot(self): + if self.subplots: + from pandas.core.series import Series + self._return_obj = Series() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + kwds = self.kwds.copy() + + ret, bp = self._plot(ax, y, column_num=i, + return_type=self.return_type, **kwds) + self.maybe_color_bp(bp) + self._return_obj[label] = ret + + label = [pprint_thing(label)] + self._set_ticklabels(ax, label) + else: + y = self.data.values.T + ax = self._get_ax(0) + kwds = self.kwds.copy() + + ret, bp = self._plot(ax, y, column_num=0, + return_type=self.return_type, **kwds) + self.maybe_color_bp(bp) + self._return_obj = ret + + labels = [l for l, _ in self._iter_data()] + labels = [pprint_thing(l) for l in labels] + if not self.use_index: + labels = [pprint_thing(key) for key in range(len(labels))] + self._set_ticklabels(ax, labels) + + def _set_ticklabels(self, ax, labels): + if self.orientation == 'vertical': + ax.set_xticklabels(labels) + else: + ax.set_yticklabels(labels) + + def _make_legend(self): + pass + + def _post_plot_logic(self, ax, data): + pass + + @property + def orientation(self): + if self.kwds.get('vert', True): + return 'vertical' + else: + return 'horizontal' + + @property + def result(self): + if self.return_type is None: + return super(BoxPlot, self).result + else: + return self._return_obj + + +# kinds supported by both dataframe and series +_common_kinds = ['line', 'bar', 'barh', + 'kde', 'density', 'area', 'hist', 'box'] +# kinds supported by dataframe +_dataframe_kinds = ['scatter', 'hexbin'] +# kinds supported only by series or dataframe single column +_series_kinds = ['pie'] +_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds + +_klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot, + ScatterPlot, HexBinPlot, AreaPlot, PiePlot] + +_plot_klass = {} +for klass in _klasses: + _plot_klass[klass._kind] = klass + + +def _plot(data, x=None, y=None, subplots=False, + ax=None, kind='line', **kwds): + kind = _get_standard_kind(kind.lower().strip()) + if kind in _all_kinds: + klass = _plot_klass[kind] + else: + raise ValueError("%r is not a valid plot kind" % kind) + + if kind in _dataframe_kinds: + if isinstance(data, ABCDataFrame): + plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, + kind=kind, **kwds) + else: + raise ValueError("plot kind %r can only be used for data frames" + % kind) + + elif kind in _series_kinds: + if isinstance(data, ABCDataFrame): + if y is None and subplots is False: + msg = "{0} requires either y column or 'subplots=True'" + raise ValueError(msg.format(kind)) + elif y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + else: + if isinstance(data, ABCDataFrame): + if x is not None: + if is_integer(x) and not data.columns.holds_integer(): + x = data.columns[x] + elif not isinstance(data[x], ABCSeries): + raise ValueError("x must be a label or position") + data = data.set_index(x) + + if y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + elif not isinstance(data[y], ABCSeries): + raise ValueError("y must be a label or position") + label = kwds['label'] if 'label' in kwds else y + series = data[y].copy() # Don't modify + series.name = label + + for kw in ['xerr', 'yerr']: + if (kw in kwds) and \ + (isinstance(kwds[kw], string_types) or + is_integer(kwds[kw])): + try: + kwds[kw] = data[kwds[kw]] + except (IndexError, KeyError, TypeError): + pass + data = series + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +df_kind = """- 'scatter' : scatter plot + - 'hexbin' : hexbin plot""" +series_kind = "" + +df_coord = """x : label or position, default None + y : label or position, default None + Allows plotting of one column versus another""" +series_coord = "" + +df_unique = """stacked : boolean, default False in line and + bar plots, and True in area plot. If True, create stacked plot. + sort_columns : boolean, default False + Sort column names to determine plot ordering + secondary_y : boolean or sequence, default False + Whether to plot on the secondary y-axis + If a list/tuple, which columns to plot on secondary y-axis""" +series_unique = """label : label argument to provide to plot + secondary_y : boolean or sequence of ints, default False + If True then y-axis will be on the right""" + +df_ax = """ax : matplotlib axes object, default None + subplots : boolean, default False + Make separate subplots for each column + sharex : boolean, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in; Be aware, that passing in both an ax and sharex=True + will alter all x axis labels for all axis in a figure! + sharey : boolean, default False + In case subplots=True, share y axis and set some y axis labels to + invisible + layout : tuple (optional) + (rows, columns) for the layout of subplots""" +series_ax = """ax : matplotlib axes object + If not passed, uses gca()""" + +df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe + column, the values of that column are used to color each point. + - If `kind` = 'hexbin', you can control the size of the bins with the + `gridsize` argument. By default, a histogram of the counts around each + `(x, y)` point is computed. You can specify alternative aggregations + by passing values to the `C` and `reduce_C_function` arguments. + `C` specifies the value at each `(x, y)` point and `reduce_C_function` + is a function of one argument that reduces all the values in a bin to + a single number (e.g. `mean`, `max`, `sum`, `std`).""" +series_note = "" + +_shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df', + klass_kind=df_kind, klass_coord=df_coord, + klass_ax=df_ax, klass_unique=df_unique, + klass_note=df_note) +_shared_doc_series_kwargs = dict(klass='Series', klass_obj='s', + klass_kind=series_kind, + klass_coord=series_coord, klass_ax=series_ax, + klass_unique=series_unique, + klass_note=series_note) + +_shared_docs['plot'] = """ + Make plots of %(klass)s using matplotlib / pylab. + + *New in version 0.17.0:* Each plot kind has a corresponding method on the + ``%(klass)s.plot`` accessor: + ``%(klass_obj)s.plot(kind='line')`` is equivalent to + ``%(klass_obj)s.plot.line()``. + + Parameters + ---------- + data : %(klass)s + %(klass_coord)s + kind : str + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + %(klass_kind)s + %(klass_ax)s + figsize : a tuple (width, height) in inches + use_index : boolean, default True + Use index as ticks for x axis + title : string or list + Title to use for the plot. If a string is passed, print the string at + the top of the figure. If a list is passed and `subplots` is True, + print each item in the list above the corresponding subplot. + grid : boolean, default None (matlab style default) + Axis grid lines + legend : False/True/'reverse' + Place legend on axis subplots + style : list or dict + matplotlib line style per column + logx : boolean, default False + Use log scaling on x axis + logy : boolean, default False + Use log scaling on y axis + loglog : boolean, default False + Use log scaling on both x and y axes + xticks : sequence + Values to use for the xticks + yticks : sequence + Values to use for the yticks + xlim : 2-tuple/list + ylim : 2-tuple/list + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal plots) + fontsize : int, default None + Font size for xticks and yticks + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + colorbar : boolean, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots) + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) + table : boolean, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data will + be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : same types as yerr. + %(klass_unique)s + mark_right : boolean, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend + `**kwds` : keywords + Options to pass to matplotlib plotting method + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + + Notes + ----- + + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) + %(klass_note)s + + """ + + +@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) +def plot_frame(data, x=None, y=None, kind='line', ax=None, + subplots=False, sharex=None, sharey=False, layout=None, + figsize=None, use_index=True, title=None, grid=None, + legend=True, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + secondary_y=False, sort_columns=False, + **kwds): + return _plot(data, kind=kind, x=x, y=y, ax=ax, + subplots=subplots, sharex=sharex, sharey=sharey, + layout=layout, figsize=figsize, use_index=use_index, + title=title, grid=grid, legend=legend, + style=style, logx=logx, logy=logy, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, + secondary_y=secondary_y, sort_columns=sort_columns, + **kwds) + + +@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) +def plot_series(data, kind='line', ax=None, # Series unique + figsize=None, use_index=True, title=None, grid=None, + legend=False, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + label=None, secondary_y=False, # Series unique + **kwds): + + import matplotlib.pyplot as plt + if ax is None and len(plt.get_fignums()) > 0: + ax = _gca() + ax = MPLPlot._get_ax_layer(ax) + return _plot(data, kind=kind, ax=ax, + figsize=figsize, use_index=use_index, title=title, + grid=grid, legend=legend, + style=style, logx=logx, logy=logy, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, + label=label, secondary_y=secondary_y, + **kwds) + + +_shared_docs['boxplot'] = """ + Make a box plot from DataFrame column optionally grouped by some columns or + other inputs + + Parameters + ---------- + data : the pandas object holding the data + column : column name or list of names, or vector + Can be any valid input to groupby + by : string or sequence + Column in the DataFrame to group by + ax : Matplotlib axes object, optional + fontsize : int or string + rot : label rotation angle + figsize : A tuple (width, height) in inches + grid : Setting this to True will show the grid + layout : tuple (optional) + (rows, columns) for the layout of the plot + return_type : {None, 'axes', 'dict', 'both'}, default None + The kind of object to return. The default is ``axes`` + 'axes' returns the matplotlib axes the boxplot is drawn on; + 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot; + 'both' returns a namedtuple with the axes and dict. + + When grouping with ``by``, a Series mapping columns to ``return_type`` + is returned, unless ``return_type`` is None, in which case a NumPy + array of axes is returned with the same shape as ``layout``. + See the prose documentation for more. + + `**kwds` : Keyword Arguments + All other plotting keyword arguments to be passed to + matplotlib's boxplot function + + Returns + ------- + lines : dict + ax : matplotlib Axes + (ax, lines): namedtuple + + Notes + ----- + Use ``return_type='dict'`` when you want to tweak the appearance + of the lines after plotting. In this case a dict containing the Lines + making up the boxes, caps, fliers, medians, and whiskers is returned. + """ + + +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + + # validate return_type: + if return_type not in BoxPlot._valid_return_types: + raise ValueError("return_type must be {'axes', 'dict', 'both'}") + + from pandas import Series, DataFrame + if isinstance(data, Series): + data = DataFrame({'x': data}) + column = 'x' + + def _get_colors(): + return _get_standard_colors(color=kwds.get('color'), num_colors=1) + + def maybe_color_bp(bp): + if 'color' not in kwds: + from matplotlib.artist import setp + setp(bp['boxes'], color=colors[0], alpha=1) + setp(bp['whiskers'], color=colors[0], alpha=1) + setp(bp['medians'], color=colors[2], alpha=1) + + def plot_group(keys, values, ax): + keys = [pprint_thing(x) for x in keys] + values = [np.asarray(remove_na_arraylike(v)) for v in values] + bp = ax.boxplot(values, **kwds) + if fontsize is not None: + ax.tick_params(axis='both', labelsize=fontsize) + if kwds.get('vert', 1): + ax.set_xticklabels(keys, rotation=rot) + else: + ax.set_yticklabels(keys, rotation=rot) + maybe_color_bp(bp) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type == 'dict': + return bp + elif return_type == 'both': + return BoxPlot.BP(ax=ax, lines=bp) + else: + return ax + + colors = _get_colors() + if column is None: + columns = None + else: + if isinstance(column, (list, tuple)): + columns = column + else: + columns = [column] + + if by is not None: + # Prefer array return type for 2-D plots to match the subplot layout + # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 + result = _grouped_plot_by_column(plot_group, data, columns=columns, + by=by, grid=grid, figsize=figsize, + ax=ax, layout=layout, + return_type=return_type) + else: + if return_type is None: + return_type = 'axes' + if layout is not None: + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") + + if ax is None: + rc = {'figure.figsize': figsize} if figsize is not None else {} + ax = _gca(rc) + data = data._get_numeric_data() + if columns is None: + columns = data.columns + else: + data = data[columns] + + result = plot_group(columns, data.values.T, ax) + ax.grid(grid) + + return result + + +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, + grid=True, figsize=None, layout=None, + return_type=None, **kwds): + import matplotlib.pyplot as plt + _converter._WARN = False + ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, + grid=grid, rot=rot, figsize=figsize, layout=layout, + return_type=return_type, **kwds) + plt.draw_if_interactive() + return ax + + +def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, + **kwargs): + """ + Make a scatter plot from two DataFrame columns + + Parameters + ---------- + data : DataFrame + x : Column name for the x-axis values + y : Column name for the y-axis values + ax : Matplotlib axis object + figsize : A tuple (width, height) in inches + grid : Setting this to True will show the grid + kwargs : other plotting keyword arguments + To be passed to scatter function + + Returns + ------- + fig : matplotlib.Figure + """ + import matplotlib.pyplot as plt + + kwargs.setdefault('edgecolors', 'none') + + def plot_group(group, ax): + xvals = group[x].values + yvals = group[y].values + ax.scatter(xvals, yvals, **kwargs) + ax.grid(grid) + + if by is not None: + fig = _grouped_plot(plot_group, data, by=by, figsize=figsize, ax=ax) + else: + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111) + else: + fig = ax.get_figure() + plot_group(data, ax) + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) + + ax.grid(grid) + + return fig + + +def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, + sharey=False, figsize=None, layout=None, bins=10, **kwds): + """ + Draw histogram of the DataFrame's series using matplotlib / pylab. + + Parameters + ---------- + data : DataFrame + column : string or sequence + If passed, will be used to limit data to a subset of columns + by : object, optional + If passed, then used to form histograms for separate groups + grid : boolean, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + ax : matplotlib axes object, default None + sharex : boolean, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in; Be aware, that passing in both an ax and sharex=True + will alter all x axis labels for all subplots in a figure! + sharey : boolean, default False + In case subplots=True, share y axis and set some y axis labels to + invisible + figsize : tuple + The size of the figure to create in inches by default + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + `**kwds` : other plotting keyword arguments + To be passed to hist function + + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + + """ + _converter._WARN = False + if by is not None: + axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, + figsize=figsize, sharex=sharex, sharey=sharey, + layout=layout, bins=bins, xlabelsize=xlabelsize, + xrot=xrot, ylabelsize=ylabelsize, + yrot=yrot, **kwds) + return axes + + if column is not None: + if not isinstance(column, (list, np.ndarray, Index)): + column = [column] + data = data[column] + data = data._get_numeric_data() + naxes = len(data.columns) + + fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, + sharex=sharex, sharey=sharey, figsize=figsize, + layout=layout) + _axes = _flatten(axes) + + for i, col in enumerate(com._try_sort(data.columns)): + ax = _axes[i] + ax.hist(data[col].dropna().values, bins=bins, **kwds) + ax.set_title(col) + ax.grid(grid) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + fig.subplots_adjust(wspace=0.3, hspace=0.3) + + return axes + + +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, figsize=None, + bins=10, **kwds): + """ + Draw histogram of the input series using matplotlib + + Parameters + ---------- + by : object, optional + If passed, then used to form histograms for separate groups + ax : matplotlib axis object + If not passed, uses gca() + grid : boolean, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + figsize : tuple, default None + figure size in inches by default + bins : integer or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + bins: integer, default 10 + Number of histogram bins to be used + `**kwds` : keywords + To be passed to the actual plotting function + + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + + """ + import matplotlib.pyplot as plt + + if by is None: + if kwds.get('layout', None) is not None: + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") + # hack until the plotting interface is a bit more unified + fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else + plt.figure(figsize=figsize)) + if (figsize is not None and tuple(figsize) != + tuple(fig.get_size_inches())): + fig.set_size_inches(*figsize, forward=True) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError('passed axis not bound to passed figure') + values = self.dropna().values + + ax.hist(values, bins=bins, **kwds) + ax.grid(grid) + axes = np.array([ax]) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + + else: + if 'figure' in kwds: + raise ValueError("Cannot pass 'figure' when using the " + "'by' argument, since a new 'Figure' instance " + "will be created") + axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize, + bins=bins, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, **kwds) + + if hasattr(axes, 'ndim'): + if axes.ndim == 1 and len(axes) == 1: + return axes[0] + return axes + + +def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None, + layout=None, sharex=False, sharey=False, rot=90, grid=True, + xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, + **kwargs): + """ + Grouped histogram + + Parameters + ---------- + data: Series/DataFrame + column: object, optional + by: object, optional + ax: axes, optional + bins: int, default 50 + figsize: tuple, optional + layout: optional + sharex: boolean, default False + sharey: boolean, default False + rot: int, default 90 + grid: bool, default True + kwargs: dict, keyword arguments passed to matplotlib.Axes.hist + + Returns + ------- + axes: collection of Matplotlib Axes + """ + _converter._WARN = False + + def plot_group(group, ax): + ax.hist(group.dropna().values, bins=bins, **kwargs) + + xrot = xrot or rot + + fig, axes = _grouped_plot(plot_group, data, column=column, + by=by, sharex=sharex, sharey=sharey, ax=ax, + figsize=figsize, layout=layout, rot=rot) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, + hspace=0.5, wspace=0.3) + return axes + + +def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, + rot=0, grid=True, ax=None, figsize=None, + layout=None, **kwds): + """ + Make box plots from DataFrameGroupBy data. + + Parameters + ---------- + grouped : Grouped DataFrame + subplots : + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each group + column : column name or list of names, or vector + Can be any valid input to groupby + fontsize : int or string + rot : label rotation angle + grid : Setting this to True will show the grid + ax : Matplotlib axis object, default None + figsize : A tuple (width, height) in inches + layout : tuple (optional) + (rows, columns) for the layout of the plot + `**kwds` : Keyword Arguments + All other plotting keyword arguments to be passed to + matplotlib's boxplot function + + Returns + ------- + dict of key/value = group key/DataFrame.boxplot return value + or DataFrame.boxplot return value in case subplots=figures=False + + Examples + -------- + >>> import pandas + >>> import numpy as np + >>> import itertools + >>> + >>> tuples = [t for t in itertools.product(range(1000), range(4))] + >>> index = pandas.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> data = np.random.randn(len(index),4) + >>> df = pandas.DataFrame(data, columns=list('ABCD'), index=index) + >>> + >>> grouped = df.groupby(level='lvl1') + >>> boxplot_frame_groupby(grouped) + >>> + >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) + >>> boxplot_frame_groupby(grouped, subplots=False) + """ + _converter._WARN = False + if subplots is True: + naxes = len(grouped) + fig, axes = _subplots(naxes=naxes, squeeze=False, + ax=ax, sharex=False, sharey=True, + figsize=figsize, layout=layout) + axes = _flatten(axes) + + from pandas.core.series import Series + ret = Series() + for (key, group), ax in zip(grouped, axes): + d = group.boxplot(ax=ax, column=column, fontsize=fontsize, + rot=rot, grid=grid, **kwds) + ax.set_title(pprint_thing(key)) + ret.loc[key] = d + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, + right=0.9, wspace=0.2) + else: + from pandas.core.reshape.concat import concat + keys, frames = zip(*grouped) + if grouped.axis == 0: + df = concat(frames, keys=keys, axis=1) + else: + if len(frames) > 1: + df = frames[0].join(frames[1::]) + else: + df = frames[0] + ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, + grid=grid, ax=ax, figsize=figsize, + layout=layout, **kwds) + return ret + + +def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, + figsize=None, sharex=True, sharey=True, layout=None, + rot=0, ax=None, **kwargs): + from pandas import DataFrame + + if figsize == 'default': + # allowed to specify mpl default with 'default' + warnings.warn("figsize='default' is deprecated. Specify figure" + "size by tuple instead", FutureWarning, stacklevel=4) + figsize = None + + grouped = data.groupby(by) + if column is not None: + grouped = grouped[column] + + naxes = len(grouped) + fig, axes = _subplots(naxes=naxes, figsize=figsize, + sharex=sharex, sharey=sharey, ax=ax, + layout=layout) + + _axes = _flatten(axes) + + for i, (key, group) in enumerate(grouped): + ax = _axes[i] + if numeric_only and isinstance(group, DataFrame): + group = group._get_numeric_data() + plotf(group, ax, **kwargs) + ax.set_title(pprint_thing(key)) + + return fig, axes + + +def _grouped_plot_by_column(plotf, data, columns=None, by=None, + numeric_only=True, grid=False, + figsize=None, ax=None, layout=None, + return_type=None, **kwargs): + grouped = data.groupby(by) + if columns is None: + if not isinstance(by, (list, tuple)): + by = [by] + columns = data._get_numeric_data().columns.difference(by) + naxes = len(columns) + fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, + figsize=figsize, ax=ax, layout=layout) + + _axes = _flatten(axes) + + ax_values = [] + + for i, col in enumerate(columns): + ax = _axes[i] + gp_col = grouped[col] + keys, values = zip(*gp_col) + re_plotf = plotf(keys, values, ax, **kwargs) + ax.set_title(col) + ax.set_xlabel(pprint_thing(by)) + ax_values.append(re_plotf) + ax.grid(grid) + + from pandas.core.series import Series + result = Series(ax_values, index=columns) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type is None: + result = axes + + byline = by[0] if len(by) == 1 else by + fig.suptitle('Boxplot grouped by {byline}'.format(byline=byline)) + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + + return result + + +class BasePlotMethods(PandasObject): + + def __init__(self, data): + self._data = data + + def __call__(self, *args, **kwargs): + raise NotImplementedError + + +class SeriesPlotMethods(BasePlotMethods): + """Series plotting accessor and method + + Examples + -------- + >>> s.plot.line() + >>> s.plot.bar() + >>> s.plot.hist() + + Plotting methods can also be accessed by calling the accessor as a method + with the ``kind`` argument: + ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` + """ + + def __call__(self, kind='line', ax=None, + figsize=None, use_index=True, title=None, grid=None, + legend=False, style=None, logx=False, logy=False, + loglog=False, xticks=None, yticks=None, + xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + label=None, secondary_y=False, **kwds): + return plot_series(self._data, kind=kind, ax=ax, figsize=figsize, + use_index=use_index, title=title, grid=grid, + legend=legend, style=style, logx=logx, logy=logy, + loglog=loglog, xticks=xticks, yticks=yticks, + xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, + colormap=colormap, table=table, yerr=yerr, + xerr=xerr, label=label, secondary_y=secondary_y, + **kwds) + __call__.__doc__ = plot_series.__doc__ + + def line(self, **kwds): + """ + Line plot + + Parameters + ---------- + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 3, 2]) + >>> s.plot.line() + """ + return self(kind='line', **kwds) + + def bar(self, **kwds): + """ + Vertical bar plot + + Parameters + ---------- + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='bar', **kwds) + + def barh(self, **kwds): + """ + Horizontal bar plot + + Parameters + ---------- + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='barh', **kwds) + + def box(self, **kwds): + """ + Boxplot + + Parameters + ---------- + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='box', **kwds) + + def hist(self, bins=10, **kwds): + """ + Histogram + + Parameters + ---------- + bins: integer, default 10 + Number of histogram bins to be used + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='hist', bins=bins, **kwds) + + def kde(self, bw_method=None, ind=None, **kwds): + """ + Kernel Density Estimate plot using Gaussian kernels. + + In statistics, kernel density estimation (KDE) is a non-parametric way + to estimate the probability density function (PDF) of a random + variable. This function uses Gaussian kernels and includes automatic + bandwith determination. + + Parameters + ---------- + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points for the estimated PDF. If None (default), + 1000 equally spaced points are used. If `ind` is a NumPy array, the + kde is evaluated at the points passed. If `ind` is an integer, + `ind` number of equally spaced points are used. + kwds : optional + Additional keyword arguments are documented in + :meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + + See also + -------- + scipy.stats.gaussian_kde : Representation of a kernel-density + estimate using Gaussian kernels. This is the function used + internally to estimate the PDF. + + Examples + -------- + Given a Series of points randomly sampled from an unknown + distribution, estimate this distribution using KDE with automatic + bandwidth determination and plot the results, evaluating them at + 1000 equally spaced points (default): + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) + >>> ax = s.plot.kde() + + + An scalar fixed bandwidth can be specified. Using a too small bandwidth + can lead to overfitting, while a too large bandwidth can result in + underfitting: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) + """ + return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) + + density = kde + + def area(self, **kwds): + """ + Area plot + + Parameters + ---------- + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='area', **kwds) + + def pie(self, **kwds): + """ + Pie chart + + Parameters + ---------- + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='pie', **kwds) + + +class FramePlotMethods(BasePlotMethods): + """DataFrame plotting accessor and method + + Examples + -------- + >>> df.plot.line() + >>> df.plot.scatter('x', 'y') + >>> df.plot.hexbin() + + These plotting methods can also be accessed by calling the accessor as a + method with the ``kind`` argument: + ``df.plot(kind='line')`` is equivalent to ``df.plot.line()`` + """ + + def __call__(self, x=None, y=None, kind='line', ax=None, + subplots=False, sharex=None, sharey=False, layout=None, + figsize=None, use_index=True, title=None, grid=None, + legend=True, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + secondary_y=False, sort_columns=False, **kwds): + return plot_frame(self._data, kind=kind, x=x, y=y, ax=ax, + subplots=subplots, sharex=sharex, sharey=sharey, + layout=layout, figsize=figsize, use_index=use_index, + title=title, grid=grid, legend=legend, style=style, + logx=logx, logy=logy, loglog=loglog, xticks=xticks, + yticks=yticks, xlim=xlim, ylim=ylim, rot=rot, + fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, secondary_y=secondary_y, + sort_columns=sort_columns, **kwds) + __call__.__doc__ = plot_frame.__doc__ + + def line(self, x=None, y=None, **kwds): + """ + Line plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='line', x=x, y=y, **kwds) + + def bar(self, x=None, y=None, **kwds): + """ + Vertical bar plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='bar', x=x, y=y, **kwds) + + def barh(self, x=None, y=None, **kwds): + """ + Horizontal bar plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='barh', x=x, y=y, **kwds) + + def box(self, by=None, **kwds): + r""" + Boxplot + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='box', by=by, **kwds) + + def hist(self, by=None, bins=10, **kwds): + """ + Histogram + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + bins: integer, default 10 + Number of histogram bins to be used + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='hist', by=by, bins=bins, **kwds) + + def kde(self, bw_method=None, ind=None, **kwds): + """ + Kernel Density Estimate plot + + Parameters + ---------- + bw_method: str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points. If None (default), 1000 equally spaced points + are used. If `ind` is a NumPy array, the kde is evaluated at the + points passed. If `ind` is an integer, `ind` number of equally + spaced points are used. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) + + density = kde + + def area(self, x=None, y=None, **kwds): + """ + Area plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='area', x=x, y=y, **kwds) + + def pie(self, y=None, **kwds): + """ + Pie chart + + Parameters + ---------- + y : label or position, optional + Column to plot. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='pie', y=y, **kwds) + + def scatter(self, x, y, s=None, c=None, **kwds): + """ + Scatter plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + s : scalar or array_like, optional + Size of each point. + c : label or position, optional + Color of each point. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds) + + def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, + **kwds): + """ + Hexbin plot + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + C : label or position, optional + The value at each `(x, y)` point. + reduce_C_function : callable, optional + Function of one argument that reduces all the values in a bin to + a single number (e.g. `mean`, `max`, `sum`, `std`). + gridsize : int, optional + Number of bins. + `**kwds` : optional + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + if reduce_C_function is not None: + kwds['reduce_C_function'] = reduce_C_function + if gridsize is not None: + kwds['gridsize'] = gridsize + return self(kind='hexbin', x=x, y=y, C=C, **kwds) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py new file mode 100644 index 0000000000000..03a06169d46bc --- /dev/null +++ b/pandas/plotting/_misc.py @@ -0,0 +1,614 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import numpy as np + +from pandas.util._decorators import deprecate_kwarg +from pandas.core.dtypes.missing import notna +from pandas.compat import range, lrange, lmap, zip +from pandas.io.formats.printing import pprint_thing + + +from pandas.plotting._style import _get_standard_colors +from pandas.plotting._tools import _subplots, _set_ticks_props + + +def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, + diagonal='hist', marker='.', density_kwds=None, + hist_kwds=None, range_padding=0.05, **kwds): + """ + Draw a matrix of scatter plots. + + Parameters + ---------- + frame : DataFrame + alpha : float, optional + amount of transparency applied + figsize : (float,float), optional + a tuple (width, height) in inches + ax : Matplotlib axis object, optional + grid : bool, optional + setting this to True will show the grid + diagonal : {'hist', 'kde'} + pick between 'kde' and 'hist' for + either Kernel Density Estimation or Histogram + plot in the diagonal + marker : str, optional + Matplotlib marker type, default '.' + hist_kwds : other plotting keyword arguments + To be passed to hist function + density_kwds : other plotting keyword arguments + To be passed to kernel density estimate plot + range_padding : float, optional + relative extension of axis range in x and y + with respect to (x_max - x_min) or (y_max - y_min), + default 0.05 + kwds : other plotting keyword arguments + To be passed to scatter function + + Examples + -------- + >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) + >>> scatter_matrix(df, alpha=0.2) + """ + + df = frame._get_numeric_data() + n = df.columns.size + naxes = n * n + fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, + squeeze=False) + + # no gaps between subplots + fig.subplots_adjust(wspace=0, hspace=0) + + mask = notna(df) + + marker = _get_marker_compat(marker) + + hist_kwds = hist_kwds or {} + density_kwds = density_kwds or {} + + # GH 14855 + kwds.setdefault('edgecolors', 'none') + + boundaries_list = [] + for a in df.columns: + values = df[a].values[mask[a].values] + rmin_, rmax_ = np.min(values), np.max(values) + rdelta_ext = (rmax_ - rmin_) * range_padding / 2. + boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) + + for i, a in zip(lrange(n), df.columns): + for j, b in zip(lrange(n), df.columns): + ax = axes[i, j] + + if i == j: + values = df[a].values[mask[a].values] + + # Deal with the diagonal by drawing a histogram there. + if diagonal == 'hist': + ax.hist(values, **hist_kwds) + + elif diagonal in ('kde', 'density'): + from scipy.stats import gaussian_kde + y = values + gkde = gaussian_kde(y) + ind = np.linspace(y.min(), y.max(), 1000) + ax.plot(ind, gkde.evaluate(ind), **density_kwds) + + ax.set_xlim(boundaries_list[i]) + + else: + common = (mask[a] & mask[b]).values + + ax.scatter(df[b][common], df[a][common], + marker=marker, alpha=alpha, **kwds) + + ax.set_xlim(boundaries_list[j]) + ax.set_ylim(boundaries_list[i]) + + ax.set_xlabel(b) + ax.set_ylabel(a) + + if j != 0: + ax.yaxis.set_visible(False) + if i != n - 1: + ax.xaxis.set_visible(False) + + if len(df.columns) > 1: + lim1 = boundaries_list[0] + locs = axes[0][1].yaxis.get_majorticklocs() + locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] + adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) + + lim0 = axes[0][0].get_ylim() + adj = adj * (lim0[1] - lim0[0]) + lim0[0] + axes[0][0].yaxis.set_ticks(adj) + + if np.all(locs == locs.astype(int)): + # if all ticks are int + locs = locs.astype(int) + axes[0][0].yaxis.set_ticklabels(locs) + + _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + return axes + + +def _get_marker_compat(marker): + import matplotlib.lines as mlines + import matplotlib as mpl + if mpl.__version__ < '1.1.0' and marker == '.': + return 'o' + if marker not in mlines.lineMarkers: + return 'o' + return marker + + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + """ + Plot a multidimensional dataset in 2D. + + Each Series in the DataFrame is represented as a evenly distributed + slice on a circle. Each data point is rendered in the circle according to + the value on each Series. Highly correlated `Series` in the `DataFrame` + are placed closer on the unit circle. + + RadViz allow to project a N-dimensional data set into a 2D space where the + influence of each dimension can be interpreted as a balance between the + influence of all dimensions. + + More info available at the `original article + `_ + describing RadViz. + + Parameters + ---------- + frame : `DataFrame` + Pandas object holding the data. + class_column : str + Column name containing the name of the data point category. + ax : :class:`matplotlib.axes.Axes`, optional + A plot instance to which to add the information. + color : list[str] or tuple[str], optional + Assign a color to each category. Example: ['blue', 'green']. + colormap : str or :class:`matplotlib.colors.Colormap`, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + kwds : optional + Options to pass to matplotlib scatter plotting method. + + Returns + ------- + axes : :class:`matplotlib.axes.Axes` + + See Also + -------- + pandas.plotting.andrews_curves : Plot clustering visualization + + Examples + -------- + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, + ... 6.7, 4.6], + ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, + ... 3.3, 3.6], + ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, + ... 5.7, 1.0], + ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, + ... 2.1, 0.2], + ... 'Category': ['virginica', 'virginica', 'setosa', + ... 'virginica', 'virginica', 'versicolor', + ... 'versicolor', 'setosa', 'virginica', + ... 'setosa'] + ... }) + >>> rad_viz = pd.plotting.radviz(df, 'Category') + """ + import matplotlib.pyplot as plt + import matplotlib.patches as patches + + def normalize(series): + a = min(series) + b = max(series) + return (series - a) / (b - a) + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + df = frame.drop(class_column, axis=1).apply(normalize) + + if ax is None: + ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) + + to_plot = {} + colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, + color_type='random', color=color) + + for kls in classes: + to_plot[kls] = [[], []] + + m = len(frame.columns) - 1 + s = np.array([(np.cos(t), np.sin(t)) + for t in [2.0 * np.pi * (i / float(m)) + for i in range(m)]]) + + for i in range(n): + row = df.iloc[i].values + row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) + y = (s * row_).sum(axis=0) / row.sum() + kls = class_col.iat[i] + to_plot[kls][0].append(y[0]) + to_plot[kls][1].append(y[1]) + + for i, kls in enumerate(classes): + ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], + label=pprint_thing(kls), **kwds) + ax.legend() + + ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) + + for xy, name in zip(s, df.columns): + + ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) + + if xy[0] < 0.0 and xy[1] < 0.0: + ax.text(xy[0] - 0.025, xy[1] - 0.025, name, + ha='right', va='top', size='small') + elif xy[0] < 0.0 and xy[1] >= 0.0: + ax.text(xy[0] - 0.025, xy[1] + 0.025, name, + ha='right', va='bottom', size='small') + elif xy[0] >= 0.0 and xy[1] < 0.0: + ax.text(xy[0] + 0.025, xy[1] - 0.025, name, + ha='left', va='top', size='small') + elif xy[0] >= 0.0 and xy[1] >= 0.0: + ax.text(xy[0] + 0.025, xy[1] + 0.025, name, + ha='left', va='bottom', size='small') + + ax.axis('equal') + return ax + + +@deprecate_kwarg(old_arg_name='data', new_arg_name='frame') +def andrews_curves(frame, class_column, ax=None, samples=200, color=None, + colormap=None, **kwds): + """ + Generates a matplotlib plot of Andrews curves, for visualising clusters of + multivariate data. + + Andrews curves have the functional form: + + f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + + x_4 sin(2t) + x_5 cos(2t) + ... + + Where x coefficients correspond to the values of each dimension and t is + linearly spaced between -pi and +pi. Each row of frame then corresponds to + a single curve. + + Parameters + ---------- + frame : DataFrame + Data to be plotted, preferably normalized to (0.0, 1.0) + class_column : Name of the column containing class names + ax : matplotlib axes object, default None + samples : Number of points to plot in each curve + color: list or tuple, optional + Colors to use for the different classes + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + kwds: keywords + Options to pass to matplotlib plotting method + + Returns + ------- + ax: Matplotlib axis object + + """ + from math import sqrt, pi + import matplotlib.pyplot as plt + + def function(amplitudes): + def f(t): + x1 = amplitudes[0] + result = x1 / sqrt(2.0) + + # Take the rest of the coefficients and resize them + # appropriately. Take a copy of amplitudes as otherwise numpy + # deletes the element from amplitudes itself. + coeffs = np.delete(np.copy(amplitudes), 0) + coeffs.resize(int((coeffs.size + 1) / 2), 2) + + # Generate the harmonics and arguments for the sin and cos + # functions. + harmonics = np.arange(0, coeffs.shape[0]) + 1 + trig_args = np.outer(harmonics, t) + + result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + + coeffs[:, 1, np.newaxis] * np.cos(trig_args), + axis=0) + return result + return f + + n = len(frame) + class_col = frame[class_column] + classes = frame[class_column].drop_duplicates() + df = frame.drop(class_column, axis=1) + t = np.linspace(-pi, pi, samples) + used_legends = set([]) + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + colors = dict(zip(classes, color_values)) + if ax is None: + ax = plt.gca(xlim=(-pi, pi)) + for i in range(n): + row = df.iloc[i].values + f = function(row) + y = f(t) + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(t, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(t, y, color=colors[kls], **kwds) + + ax.legend(loc='upper right') + ax.grid() + return ax + + +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + """Bootstrap plot. + + Parameters + ---------- + series: Time series + fig: matplotlib figure object, optional + size: number of data points to consider during each sampling + samples: number of times the bootstrap procedure is performed + kwds: optional keyword arguments for plotting commands, must be accepted + by both hist and plot + + Returns + ------- + fig: matplotlib figure + """ + import random + import matplotlib.pyplot as plt + + # random.sample(ndarray, int) fails on python 3.3, sigh + data = list(series.values) + samplings = [random.sample(data, size) for _ in range(samples)] + + means = np.array([np.mean(sampling) for sampling in samplings]) + medians = np.array([np.median(sampling) for sampling in samplings]) + midranges = np.array([(min(sampling) + max(sampling)) * 0.5 + for sampling in samplings]) + if fig is None: + fig = plt.figure() + x = lrange(samples) + axes = [] + ax1 = fig.add_subplot(2, 3, 1) + ax1.set_xlabel("Sample") + axes.append(ax1) + ax1.plot(x, means, **kwds) + ax2 = fig.add_subplot(2, 3, 2) + ax2.set_xlabel("Sample") + axes.append(ax2) + ax2.plot(x, medians, **kwds) + ax3 = fig.add_subplot(2, 3, 3) + ax3.set_xlabel("Sample") + axes.append(ax3) + ax3.plot(x, midranges, **kwds) + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_xlabel("Mean") + axes.append(ax4) + ax4.hist(means, **kwds) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_xlabel("Median") + axes.append(ax5) + ax5.hist(medians, **kwds) + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_xlabel("Midrange") + axes.append(ax6) + ax6.hist(midranges, **kwds) + for axis in axes: + plt.setp(axis.get_xticklabels(), fontsize=8) + plt.setp(axis.get_yticklabels(), fontsize=8) + return fig + + +@deprecate_kwarg(old_arg_name='colors', new_arg_name='color') +@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) +def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, + use_columns=False, xticks=None, colormap=None, + axvlines=True, axvlines_kwds=None, sort_labels=False, + **kwds): + """Parallel coordinates plotting. + + Parameters + ---------- + frame: DataFrame + class_column: str + Column name containing class names + cols: list, optional + A list of column names to use + ax: matplotlib.axis, optional + matplotlib axis object + color: list or tuple, optional + Colors to use for the different classes + use_columns: bool, optional + If true, columns will be used as xticks + xticks: list or tuple, optional + A list of values to use for xticks + colormap: str or matplotlib colormap, default None + Colormap to use for line colors. + axvlines: bool, optional + If true, vertical lines will be added at each xtick + axvlines_kwds: keywords, optional + Options to be passed to axvline method for vertical lines + sort_labels: bool, False + Sort class_column labels, useful when assigning colors + + .. versionadded:: 0.20.0 + + kwds: keywords + Options to pass to matplotlib plotting method + + Returns + ------- + ax: matplotlib axis object + + Examples + -------- + >>> from pandas import read_csv + >>> from pandas.tools.plotting import parallel_coordinates + >>> from matplotlib import pyplot as plt + >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' + '/pandas/tests/data/iris.csv') + >>> parallel_coordinates(df, 'Name', color=('#556270', + '#4ECDC4', '#C7F464')) + >>> plt.show() + """ + if axvlines_kwds is None: + axvlines_kwds = {'linewidth': 1, 'color': 'black'} + import matplotlib.pyplot as plt + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + + if cols is None: + df = frame.drop(class_column, axis=1) + else: + df = frame[cols] + + used_legends = set([]) + + ncols = len(df.columns) + + # determine values to use for xticks + if use_columns is True: + if not np.all(np.isreal(list(df.columns))): + raise ValueError('Columns must be numeric to be used as xticks') + x = df.columns + elif xticks is not None: + if not np.all(np.isreal(xticks)): + raise ValueError('xticks specified must be numeric') + elif len(xticks) != ncols: + raise ValueError('Length of xticks must match number of columns') + x = xticks + else: + x = lrange(ncols) + + if ax is None: + ax = plt.gca() + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) + colors = dict(zip(classes, color_values)) + + for i in range(n): + y = df.iloc[i].values + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(x, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(x, y, color=colors[kls], **kwds) + + if axvlines: + for i in x: + ax.axvline(i, **axvlines_kwds) + + ax.set_xticks(x) + ax.set_xticklabels(df.columns) + ax.set_xlim(x[0], x[-1]) + ax.legend(loc='upper right') + ax.grid() + return ax + + +def lag_plot(series, lag=1, ax=None, **kwds): + """Lag plot for time series. + + Parameters + ---------- + series: Time series + lag: lag of the scatter plot, default 1 + ax: Matplotlib axis object, optional + kwds: Matplotlib scatter method keyword arguments, optional + + Returns + ------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + + # workaround because `c='b'` is hardcoded in matplotlibs scatter method + kwds.setdefault('c', plt.rcParams['patch.facecolor']) + + data = series.values + y1 = data[:-lag] + y2 = data[lag:] + if ax is None: + ax = plt.gca() + ax.set_xlabel("y(t)") + ax.set_ylabel("y(t + {lag})".format(lag=lag)) + ax.scatter(y1, y2, **kwds) + return ax + + +def autocorrelation_plot(series, ax=None, **kwds): + """Autocorrelation plot for time series. + + Parameters: + ----------- + series: Time series + ax: Matplotlib axis object, optional + kwds : keywords + Options to pass to matplotlib plotting method + + Returns: + ----------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + n = len(series) + data = np.asarray(series) + if ax is None: + ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) + mean = np.mean(data) + c0 = np.sum((data - mean) ** 2) / float(n) + + def r(h): + return ((data[:n - h] - mean) * + (data[h:] - mean)).sum() / float(n) / c0 + x = np.arange(n) + 1 + y = lmap(r, x) + z95 = 1.959963984540054 + z99 = 2.5758293035489004 + ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') + ax.axhline(y=z95 / np.sqrt(n), color='grey') + ax.axhline(y=0.0, color='black') + ax.axhline(y=-z95 / np.sqrt(n), color='grey') + ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') + ax.set_xlabel("Lag") + ax.set_ylabel("Autocorrelation") + ax.plot(x, y, **kwds) + if 'label' in kwds: + ax.legend() + ax.grid() + return ax diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py new file mode 100644 index 0000000000000..426b29a8840f4 --- /dev/null +++ b/pandas/plotting/_style.py @@ -0,0 +1,183 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +from contextlib import contextmanager +import re + +import numpy as np + +from pandas.core.dtypes.common import is_list_like +from pandas.compat import lrange, lmap +import pandas.compat as compat +from pandas.plotting._compat import _mpl_ge_2_0_0 + + +def _get_standard_colors(num_colors=None, colormap=None, color_type='default', + color=None): + import matplotlib.pyplot as plt + + if color is None and colormap is not None: + if isinstance(colormap, compat.string_types): + import matplotlib.cm as cm + cmap = colormap + colormap = cm.get_cmap(colormap) + if colormap is None: + raise ValueError("Colormap {0} is not recognized".format(cmap)) + colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) + elif color is not None: + if colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + colors = list(color) if is_list_like(color) else color + else: + if color_type == 'default': + # need to call list() on the result to copy so we don't + # modify the global rcParams below + try: + colors = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + except KeyError: + colors = list(plt.rcParams.get('axes.color_cycle', + list('bgrcmyk'))) + if isinstance(colors, compat.string_types): + colors = list(colors) + elif color_type == 'random': + import pandas.core.common as com + + def random_color(column): + """ Returns a random color represented as a list of length 3""" + # GH17525 use common._random_state to avoid resetting the seed + rs = com._random_state(column) + return rs.rand(3).tolist() + + colors = lmap(random_color, lrange(num_colors)) + else: + raise ValueError("color_type must be either 'default' or 'random'") + + if isinstance(colors, compat.string_types): + import matplotlib.colors + conv = matplotlib.colors.ColorConverter() + + def _maybe_valid_colors(colors): + try: + [conv.to_rgba(c) for c in colors] + return True + except ValueError: + return False + + # check whether the string can be convertible to single color + maybe_single_color = _maybe_valid_colors([colors]) + # check whether each character can be convertible to colors + maybe_color_cycle = _maybe_valid_colors(list(colors)) + if maybe_single_color and maybe_color_cycle and len(colors) > 1: + # Special case for single str 'CN' match and convert to hex + # for supporting matplotlib < 2.0.0 + if re.match(r'\AC[0-9]\Z', colors) and _mpl_ge_2_0_0(): + hex_color = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + colors = [hex_color[int(colors[1])]] + else: + # this may no longer be required + msg = ("'{0}' can be parsed as both single color and " + "color cycle. Specify each color using a list " + "like ['{0}'] or {1}") + raise ValueError(msg.format(colors, list(colors))) + elif maybe_single_color: + colors = [colors] + else: + # ``colors`` is regarded as color cycle. + # mpl will raise error any of them is invalid + pass + + if len(colors) != num_colors: + try: + multiple = num_colors // len(colors) - 1 + except ZeroDivisionError: + raise ValueError("Invalid color argument: ''") + mod = num_colors % len(colors) + + colors += multiple * colors + colors += colors[:mod] + + return colors + + +class _Options(dict): + """ + Stores pandas plotting options. + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later + """ + + # alias so the names are same as plotting method parameter names + _ALIASES = {'x_compat': 'xaxis.compat'} + _DEFAULT_KEYS = ['xaxis.compat'] + + def __init__(self, deprecated=False): + self._deprecated = deprecated + # self['xaxis.compat'] = False + super(_Options, self).__setitem__('xaxis.compat', False) + + def _warn_if_deprecated(self): + if self._deprecated: + warnings.warn("'pandas.plot_params' is deprecated. Use " + "'pandas.plotting.plot_params' instead", + FutureWarning, stacklevel=3) + + def __getitem__(self, key): + self._warn_if_deprecated() + key = self._get_canonical_key(key) + if key not in self: + raise ValueError( + '{key} is not a valid pandas plotting option'.format(key=key)) + return super(_Options, self).__getitem__(key) + + def __setitem__(self, key, value): + self._warn_if_deprecated() + key = self._get_canonical_key(key) + return super(_Options, self).__setitem__(key, value) + + def __delitem__(self, key): + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError( + 'Cannot remove default parameter {key}'.format(key=key)) + return super(_Options, self).__delitem__(key) + + def __contains__(self, key): + key = self._get_canonical_key(key) + return super(_Options, self).__contains__(key) + + def reset(self): + """ + Reset the option store to its initial state + + Returns + ------- + None + """ + self._warn_if_deprecated() + self.__init__() + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + + @contextmanager + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + self._warn_if_deprecated() + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + +plot_params = _Options() diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_timeseries.py new file mode 100644 index 0000000000000..21a03ea388566 --- /dev/null +++ b/pandas/plotting/_timeseries.py @@ -0,0 +1,352 @@ +# TODO: Use the fact that axis can have units to simplify the process + +import functools + +import numpy as np + +from matplotlib import pylab +from pandas.core.indexes.period import Period +from pandas.tseries.offsets import DateOffset +import pandas.tseries.frequencies as frequencies +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.io.formats.printing import pprint_thing +import pandas.compat as compat + +from pandas.plotting._converter import (TimeSeries_DateLocator, + TimeSeries_DateFormatter, + TimeSeries_TimedeltaFormatter) + +# --------------------------------------------------------------------- +# Plotting functions and monkey patches + + +def tsplot(series, plotf, ax=None, **kwargs): + import warnings + """ + Plots a Series on the given Matplotlib axes or the current axes + + Parameters + ---------- + axes : Axes + series : Series + + Notes + _____ + Supports same kwargs as Axes.plot + + + .. deprecated:: 0.23.0 + Use Series.plot() instead + """ + warnings.warn("'tsplot' is deprecated and will be removed in a " + "future version. Please use Series.plot() instead.", + FutureWarning, stacklevel=2) + + # Used inferred freq is possible, need a test case for inferred + if ax is None: + import matplotlib.pyplot as plt + ax = plt.gca() + + freq, series = _maybe_resample(series, ax, kwargs) + + # Set ax with freq info + _decorate_axes(ax, freq, kwargs) + ax._plot_data.append((series, plotf, kwargs)) + lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) + + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, series.index) + return lines + + +def _maybe_resample(series, ax, kwargs): + # resample against axes freq if necessary + freq, ax_freq = _get_freq(ax, series) + + if freq is None: # pragma: no cover + raise ValueError('Cannot use dynamic axis without frequency info') + + # Convert DatetimeIndex to PeriodIndex + if isinstance(series.index, DatetimeIndex): + series = series.to_period(freq=freq) + + if ax_freq is not None and freq != ax_freq: + if frequencies.is_superperiod(freq, ax_freq): # upsample input + series = series.copy() + series.index = series.index.asfreq(ax_freq, how='s') + freq = ax_freq + elif _is_sup(freq, ax_freq): # one is weekly + how = kwargs.pop('how', 'last') + series = getattr(series.resample('D'), how)().dropna() + series = getattr(series.resample(ax_freq), how)().dropna() + freq = ax_freq + elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): + _upsample_others(ax, freq, kwargs) + ax_freq = freq + else: # pragma: no cover + raise ValueError('Incompatible frequency conversion') + return freq, series + + +def _is_sub(f1, f2): + return ((f1.startswith('W') and frequencies.is_subperiod('D', f2)) or + (f2.startswith('W') and frequencies.is_subperiod(f1, 'D'))) + + +def _is_sup(f1, f2): + return ((f1.startswith('W') and frequencies.is_superperiod('D', f2)) or + (f2.startswith('W') and frequencies.is_superperiod(f1, 'D'))) + + +def _upsample_others(ax, freq, kwargs): + legend = ax.get_legend() + lines, labels = _replot_ax(ax, freq, kwargs) + _replot_ax(ax, freq, kwargs) + + other_ax = None + if hasattr(ax, 'left_ax'): + other_ax = ax.left_ax + if hasattr(ax, 'right_ax'): + other_ax = ax.right_ax + + if other_ax is not None: + rlines, rlabels = _replot_ax(other_ax, freq, kwargs) + lines.extend(rlines) + labels.extend(rlabels) + + if (legend is not None and kwargs.get('legend', True) and + len(lines) > 0): + title = legend.get_title().get_text() + if title == 'None': + title = None + ax.legend(lines, labels, loc='best', title=title) + + +def _replot_ax(ax, freq, kwargs): + data = getattr(ax, '_plot_data', None) + + # clear current axes and data + ax._plot_data = [] + ax.clear() + + _decorate_axes(ax, freq, kwargs) + + lines = [] + labels = [] + if data is not None: + for series, plotf, kwds in data: + series = series.copy() + idx = series.index.asfreq(freq, how='S') + series.index = idx + ax._plot_data.append((series, plotf, kwds)) + + # for tsplot + if isinstance(plotf, compat.string_types): + from pandas.plotting._core import _plot_klass + plotf = _plot_klass[plotf]._plot + + lines.append(plotf(ax, series.index._mpl_repr(), + series.values, **kwds)[0]) + labels.append(pprint_thing(series.name)) + + return lines, labels + + +def _decorate_axes(ax, freq, kwargs): + """Initialize axes for time-series plotting""" + if not hasattr(ax, '_plot_data'): + ax._plot_data = [] + + ax.freq = freq + xaxis = ax.get_xaxis() + xaxis.freq = freq + if not hasattr(ax, 'legendlabels'): + ax.legendlabels = [kwargs.get('label', None)] + else: + ax.legendlabels.append(kwargs.get('label', None)) + ax.view_interval = None + ax.date_axis_info = None + + +def _get_ax_freq(ax): + """ + Get the freq attribute of the ax object if set. + Also checks shared axes (eg when using secondary yaxis, sharex=True + or twinx) + """ + ax_freq = getattr(ax, 'freq', None) + if ax_freq is None: + # check for left/right ax in case of secondary yaxis + if hasattr(ax, 'left_ax'): + ax_freq = getattr(ax.left_ax, 'freq', None) + elif hasattr(ax, 'right_ax'): + ax_freq = getattr(ax.right_ax, 'freq', None) + if ax_freq is None: + # check if a shared ax (sharex/twinx) has already freq set + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + if len(shared_axes) > 1: + for shared_ax in shared_axes: + ax_freq = getattr(shared_ax, 'freq', None) + if ax_freq is not None: + break + return ax_freq + + +def _get_freq(ax, series): + # get frequency from data + freq = getattr(series.index, 'freq', None) + if freq is None: + freq = getattr(series.index, 'inferred_freq', None) + + ax_freq = _get_ax_freq(ax) + + # use axes freq if no data freq + if freq is None: + freq = ax_freq + + # get the period frequency + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = frequencies.get_base_alias(freq) + + freq = frequencies.get_period_alias(freq) + return freq, ax_freq + + +def _use_dynamic_x(ax, data): + freq = _get_index_freq(data) + ax_freq = _get_ax_freq(ax) + + if freq is None: # convert irregular if axes has freq info + freq = ax_freq + else: # do not use tsplot if irregular was plotted first + if (ax_freq is None) and (len(ax.get_lines()) > 0): + return False + + if freq is None: + return False + + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = frequencies.get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + if freq is None: + return False + + # hack this for 0.10.1, creating more technical debt...sigh + if isinstance(data.index, DatetimeIndex): + base = frequencies.get_freq(freq) + x = data.index + if (base <= frequencies.FreqGroup.FR_DAY): + return x[:1].is_normalized + return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] + return True + + +def _get_index_freq(data): + freq = getattr(data.index, 'freq', None) + if freq is None: + freq = getattr(data.index, 'inferred_freq', None) + if freq == 'B': + weekdays = np.unique(data.index.dayofweek) + if (5 in weekdays) or (6 in weekdays): + freq = None + return freq + + +def _maybe_convert_index(ax, data): + # tsplot converts automatically, but don't want to convert index + # over and over for DataFrames + if isinstance(data.index, DatetimeIndex): + freq = getattr(data.index, 'freq', None) + + if freq is None: + freq = getattr(data.index, 'inferred_freq', None) + if isinstance(freq, DateOffset): + freq = freq.rule_code + + if freq is None: + freq = _get_ax_freq(ax) + + if freq is None: + raise ValueError('Could not get frequency alias for plotting') + + freq = frequencies.get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + data = data.to_period(freq=freq) + return data + + +# Patch methods for subplot. Only format_dateaxis is currently used. +# Do we need the rest for convenience? + +def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + +def _format_coord(freq, t, y): + return "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y) + + +def format_dateaxis(subplot, freq, index): + """ + Pretty-formats the date axis (x-axis). + + Major and minor ticks are automatically set for the frequency of the + current underlying series. As the dynamic mode is activated by + default, changing the limits of the x axis will intelligently change + the positions of the ticks. + """ + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, PeriodIndex): + + majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = functools.partial(_format_coord, freq) + + elif isinstance(index, TimedeltaIndex): + subplot.xaxis.set_major_formatter( + TimeSeries_TimedeltaFormatter()) + else: + raise TypeError('index type not supported') + + pylab.draw_if_interactive() diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py new file mode 100644 index 0000000000000..816586fbb82f5 --- /dev/null +++ b/pandas/plotting/_tools.py @@ -0,0 +1,383 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +from math import ceil + +import numpy as np + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.index import Index +from pandas.compat import range + + +def format_date_labels(ax, rot): + # mini version of autofmt_xdate + try: + for label in ax.get_xticklabels(): + label.set_ha('right') + label.set_rotation(rot) + fig = ax.get_figure() + fig.subplots_adjust(bottom=0.2) + except Exception: # pragma: no cover + pass + + +def table(ax, data, rowLabels=None, colLabels=None, **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table + + Parameters + ---------- + `ax`: Matplotlib axes object + `data`: DataFrame or Series + data for table contents + `kwargs`: keywords, optional + keyword arguments which passed to matplotlib.table.table. + If `rowLabels` or `colLabels` is not specified, data index or column + name will be used. + + Returns + ------- + matplotlib table object + """ + from pandas import DataFrame + if isinstance(data, ABCSeries): + data = DataFrame(data, columns=[data.name]) + elif isinstance(data, DataFrame): + pass + else: + raise ValueError('Input data must be DataFrame or Series') + + if rowLabels is None: + rowLabels = data.index + + if colLabels is None: + colLabels = data.columns + + cellText = data.values + + import matplotlib.table + table = matplotlib.table.table(ax, cellText=cellText, + rowLabels=rowLabels, + colLabels=colLabels, **kwargs) + return table + + +def _get_layout(nplots, layout=None, layout_type='box'): + if layout is not None: + if not isinstance(layout, (tuple, list)) or len(layout) != 2: + raise ValueError('Layout must be a tuple of (rows, columns)') + + nrows, ncols = layout + + # Python 2 compat + ceil_ = lambda x: int(ceil(x)) + if nrows == -1 and ncols > 0: + layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) + elif ncols == -1 and nrows > 0: + layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) + elif ncols <= 0 and nrows <= 0: + msg = "At least one dimension of layout must be positive" + raise ValueError(msg) + + if nrows * ncols < nplots: + raise ValueError('Layout of {nrows}x{ncols} must be larger ' + 'than required size {nplots}'.format( + nrows=nrows, ncols=ncols, nplots=nplots)) + + return layout + + if layout_type == 'single': + return (1, 1) + elif layout_type == 'horizontal': + return (1, nplots) + elif layout_type == 'vertical': + return (nplots, 1) + + layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} + try: + return layouts[nplots] + except KeyError: + k = 1 + while k ** 2 < nplots: + k += 1 + + if (k - 1) * k >= nplots: + return k, (k - 1) + else: + return k, k + +# copied from matplotlib/pyplot.py and modified for pandas.plotting + + +def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, + subplot_kw=None, ax=None, layout=None, layout_type='box', + **fig_kw): + """Create a figure with a set of subplots already made. + + This utility wrapper makes it convenient to create common layouts of + subplots, including the enclosing figure object, in a single call. + + Keyword arguments: + + naxes : int + Number of required axes. Exceeded axes are set invisible. Default is + nrows * ncols. + + sharex : bool + If True, the X axis will be shared amongst all subplots. + + sharey : bool + If True, the Y axis will be shared amongst all subplots. + + squeeze : bool + + If True, extra dimensions are squeezed out from the returned axis object: + - if only one subplot is constructed (nrows=ncols=1), the resulting + single Axis object is returned as a scalar. + - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object + array of Axis objects are returned as numpy 1-d arrays. + - for NxM subplots with N>1 and M>1 are returned as a 2d array. + + If False, no squeezing is done: the returned axis object is always + a 2-d array containing Axis instances, even if it ends up being 1x1. + + subplot_kw : dict + Dict with keywords passed to the add_subplot() call used to create each + subplots. + + ax : Matplotlib axis object, optional + + layout : tuple + Number of rows and columns of the subplot grid. + If not specified, calculated from naxes and layout_type + + layout_type : {'box', 'horziontal', 'vertical'}, default 'box' + Specify how to layout the subplot grid. + + fig_kw : Other keyword arguments to be passed to the figure() call. + Note that all keywords not recognized above will be + automatically included here. + + Returns: + + fig, ax : tuple + - fig is the Matplotlib Figure object + - ax can be either a single axis object or an array of axis objects if + more than one subplot was created. The dimensions of the resulting array + can be controlled with the squeeze keyword, see above. + + **Examples:** + + x = np.linspace(0, 2*np.pi, 400) + y = np.sin(x**2) + + # Just a figure and one subplot + f, ax = plt.subplots() + ax.plot(x, y) + ax.set_title('Simple plot') + + # Two subplots, unpack the output array immediately + f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) + ax1.plot(x, y) + ax1.set_title('Sharing Y axis') + ax2.scatter(x, y) + + # Four polar axes + plt.subplots(2, 2, subplot_kw=dict(polar=True)) + """ + import matplotlib.pyplot as plt + + if subplot_kw is None: + subplot_kw = {} + + if ax is None: + fig = plt.figure(**fig_kw) + else: + if is_list_like(ax): + ax = _flatten(ax) + if layout is not None: + warnings.warn("When passing multiple axes, layout keyword is " + "ignored", UserWarning) + if sharex or sharey: + warnings.warn("When passing multiple axes, sharex and sharey " + "are ignored. These settings must be specified " + "when creating axes", UserWarning, + stacklevel=4) + if len(ax) == naxes: + fig = ax[0].get_figure() + return fig, ax + else: + raise ValueError("The number of passed axes must be {0}, the " + "same as the output plot".format(naxes)) + + fig = ax.get_figure() + # if ax is passed and a number of subplots is 1, return ax as it is + if naxes == 1: + if squeeze: + return fig, ax + else: + return fig, _flatten(ax) + else: + warnings.warn("To output multiple subplots, the figure containing " + "the passed axes is being cleared", UserWarning, + stacklevel=4) + fig.clear() + + nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) + nplots = nrows * ncols + + # Create empty object array to hold all axes. It's easiest to make it 1-d + # so we can just append subplots upon creation, and then + axarr = np.empty(nplots, dtype=object) + + # Create first subplot separately, so we can share it if requested + ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) + + if sharex: + subplot_kw['sharex'] = ax0 + if sharey: + subplot_kw['sharey'] = ax0 + axarr[0] = ax0 + + # Note off-by-one counting because add_subplot uses the MATLAB 1-based + # convention. + for i in range(1, nplots): + kwds = subplot_kw.copy() + # Set sharex and sharey to None for blank/dummy axes, these can + # interfere with proper axis limits on the visible axes if + # they share axes e.g. issue #7528 + if i >= naxes: + kwds['sharex'] = None + kwds['sharey'] = None + ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) + axarr[i] = ax + + if naxes != nplots: + for ax in axarr[naxes:]: + ax.set_visible(False) + + _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) + + if squeeze: + # Reshape the array to have the final desired dimension (nrow,ncol), + # though discarding unneeded dimensions that equal 1. If we only have + # one subplot, just return it instead of a 1-element array. + if nplots == 1: + axes = axarr[0] + else: + axes = axarr.reshape(nrows, ncols).squeeze() + else: + # returned axis array will be always 2-d, even if nrows=ncols=1 + axes = axarr.reshape(nrows, ncols) + + return fig, axes + + +def _remove_labels_from_axis(axis): + for t in axis.get_majorticklabels(): + t.set_visible(False) + + try: + # set_visible will not be effective if + # minor axis has NullLocator and NullFormattor (default) + import matplotlib.ticker as ticker + if isinstance(axis.get_minor_locator(), ticker.NullLocator): + axis.set_minor_locator(ticker.AutoLocator()) + if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): + axis.set_minor_formatter(ticker.FormatStrFormatter('')) + for t in axis.get_minorticklabels(): + t.set_visible(False) + except Exception: # pragma no cover + raise + axis.get_label().set_visible(False) + + +def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): + if nplots > 1: + + if nrows > 1: + try: + # first find out the ax layout, + # so that we can correctly handle 'gaps" + layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) + for ax in axarr: + layout[ax.rowNum, ax.colNum] = ax.get_visible() + + for ax in axarr: + # only the last row of subplots should get x labels -> all + # other off layout handles the case that the subplot is + # the last in the column, because below is no subplot/gap. + if not layout[ax.rowNum + 1, ax.colNum]: + continue + if sharex or len(ax.get_shared_x_axes() + .get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + except IndexError: + # if gridspec is used, ax.rowNum and ax.colNum may different + # from layout shape. in this case, use last_row logic + for ax in axarr: + if ax.is_last_row(): + continue + if sharex or len(ax.get_shared_x_axes() + .get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + if ncols > 1: + for ax in axarr: + # only the first column should get y labels -> set all other to + # off as we only have labels in the first column and we always + # have a subplot there, we can skip the layout test + if ax.is_first_col(): + continue + if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.yaxis) + + +def _flatten(axes): + if not is_list_like(axes): + return np.array([axes]) + elif isinstance(axes, (np.ndarray, Index)): + return axes.ravel() + return np.array(axes) + + +def _get_all_lines(ax): + lines = ax.get_lines() + + if hasattr(ax, 'right_ax'): + lines += ax.right_ax.get_lines() + + if hasattr(ax, 'left_ax'): + lines += ax.left_ax.get_lines() + + return lines + + +def _get_xlim(lines): + left, right = np.inf, -np.inf + for l in lines: + x = l.get_xdata(orig=False) + left = min(np.nanmin(x), left) + right = max(np.nanmax(x), right) + return left, right + + +def _set_ticks_props(axes, xlabelsize=None, xrot=None, + ylabelsize=None, yrot=None): + import matplotlib.pyplot as plt + + for ax in _flatten(axes): + if xlabelsize is not None: + plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) + if xrot is not None: + plt.setp(ax.get_xticklabels(), rotation=xrot) + if ylabelsize is not None: + plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) + if yrot is not None: + plt.setp(ax.get_yticklabels(), rotation=yrot) + return axes diff --git a/pandas/sparse/api.py b/pandas/sparse/api.py deleted file mode 100644 index 55841fbeffa2d..0000000000000 --- a/pandas/sparse/api.py +++ /dev/null @@ -1,6 +0,0 @@ -# pylint: disable=W0611 -# flake8: noqa -from pandas.sparse.array import SparseArray -from pandas.sparse.list import SparseList -from pandas.sparse.series import SparseSeries, SparseTimeSeries -from pandas.sparse.frame import SparseDataFrame diff --git a/pandas/sparse/list.py b/pandas/sparse/list.py deleted file mode 100644 index d294e65bbf10c..0000000000000 --- a/pandas/sparse/list.py +++ /dev/null @@ -1,151 +0,0 @@ -import warnings -import numpy as np -from pandas.core.base import PandasObject -from pandas.formats.printing import pprint_thing - -from pandas.types.common import is_scalar -from pandas.sparse.array import SparseArray -from pandas.util.validators import validate_bool_kwarg -import pandas._sparse as splib - - -class SparseList(PandasObject): - - """ - Data structure for accumulating data to be converted into a - SparseArray. Has similar API to the standard Python list - - Parameters - ---------- - data : scalar or array-like - fill_value : scalar, default NaN - """ - - def __init__(self, data=None, fill_value=np.nan): - - # see gh-13784 - warnings.warn("SparseList is deprecated and will be removed " - "in a future version", FutureWarning, stacklevel=2) - - self.fill_value = fill_value - self._chunks = [] - - if data is not None: - self.append(data) - - def __unicode__(self): - contents = '\n'.join(repr(c) for c in self._chunks) - return '%s\n%s' % (object.__repr__(self), pprint_thing(contents)) - - def __len__(self): - return sum(len(c) for c in self._chunks) - - def __getitem__(self, i): - if i < 0: - if i + len(self) < 0: # pragma: no cover - raise ValueError('%d out of range' % i) - i += len(self) - - passed = 0 - j = 0 - while i >= passed + len(self._chunks[j]): - passed += len(self._chunks[j]) - j += 1 - return self._chunks[j][i - passed] - - def __setitem__(self, i, value): - raise NotImplementedError - - @property - def nchunks(self): - return len(self._chunks) - - @property - def is_consolidated(self): - return self.nchunks == 1 - - def consolidate(self, inplace=True): - """ - Internally consolidate chunks of data - - Parameters - ---------- - inplace : boolean, default True - Modify the calling object instead of constructing a new one - - Returns - ------- - splist : SparseList - If inplace=False, new object, otherwise reference to existing - object - """ - inplace = validate_bool_kwarg(inplace, 'inplace') - if not inplace: - result = self.copy() - else: - result = self - - if result.is_consolidated: - return result - - result._consolidate_inplace() - return result - - def _consolidate_inplace(self): - new_values = np.concatenate([c.sp_values for c in self._chunks]) - new_index = _concat_sparse_indexes([c.sp_index for c in self._chunks]) - new_arr = SparseArray(new_values, sparse_index=new_index, - fill_value=self.fill_value) - self._chunks = [new_arr] - - def copy(self): - """ - Return copy of the list - - Returns - ------- - new_list : SparseList - """ - new_splist = SparseList(fill_value=self.fill_value) - new_splist._chunks = list(self._chunks) - return new_splist - - def to_array(self): - """ - Return SparseArray from data stored in the SparseList - - Returns - ------- - sparr : SparseArray - """ - self.consolidate(inplace=True) - return self._chunks[0] - - def append(self, value): - """ - Append element or array-like chunk of data to the SparseList - - Parameters - ---------- - value: scalar or array-like - """ - if is_scalar(value): - value = [value] - - sparr = SparseArray(value, fill_value=self.fill_value) - self._chunks.append(sparr) - self._consolidated = False - - -def _concat_sparse_indexes(indexes): - all_indices = [] - total_length = 0 - - for index in indexes: - # increment by offset - inds = index.to_int_index().indices + total_length - - all_indices.append(inds) - total_length += index.length - - return splib.IntIndex(total_length, np.concatenate(all_indices)) diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py deleted file mode 100644 index 8511cd5997368..0000000000000 --- a/pandas/sparse/tests/test_list.py +++ /dev/null @@ -1,112 +0,0 @@ -from pandas.compat import range -import unittest - -from numpy import nan -import numpy as np - -from pandas.sparse.api import SparseList, SparseArray -import pandas.util.testing as tm - - -class TestSparseList(unittest.TestCase): - - def setUp(self): - self.na_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) - self.zero_data = np.array([0, 0, 1, 2, 3, 0, 4, 5, 0, 6]) - - def test_deprecation(self): - # see gh-13784 - with tm.assert_produces_warning(FutureWarning): - SparseList() - - def test_constructor(self): - with tm.assert_produces_warning(FutureWarning): - lst1 = SparseList(self.na_data[:5]) - with tm.assert_produces_warning(FutureWarning): - exp = SparseList() - - exp.append(self.na_data[:5]) - tm.assert_sp_list_equal(lst1, exp) - - def test_len(self): - with tm.assert_produces_warning(FutureWarning): - arr = self.na_data - splist = SparseList() - splist.append(arr[:5]) - self.assertEqual(len(splist), 5) - splist.append(arr[5]) - self.assertEqual(len(splist), 6) - splist.append(arr[6:]) - self.assertEqual(len(splist), 10) - - def test_append_na(self): - with tm.assert_produces_warning(FutureWarning): - arr = self.na_data - splist = SparseList() - splist.append(arr[:5]) - splist.append(arr[5]) - splist.append(arr[6:]) - - sparr = splist.to_array() - tm.assert_sp_array_equal(sparr, SparseArray(arr)) - - def test_append_zero(self): - with tm.assert_produces_warning(FutureWarning): - arr = self.zero_data - splist = SparseList(fill_value=0) - splist.append(arr[:5]) - splist.append(arr[5]) - splist.append(arr[6:]) - - # list always produces int64, but SA constructor - # is platform dtype aware - sparr = splist.to_array() - exp = SparseArray(arr, fill_value=0) - tm.assert_sp_array_equal(sparr, exp, check_dtype=False) - - def test_consolidate(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - arr = self.na_data - exp_sparr = SparseArray(arr) - - splist = SparseList() - splist.append(arr[:5]) - splist.append(arr[5]) - splist.append(arr[6:]) - - consol = splist.consolidate(inplace=False) - self.assertEqual(consol.nchunks, 1) - self.assertEqual(splist.nchunks, 3) - tm.assert_sp_array_equal(consol.to_array(), exp_sparr) - - splist.consolidate() - self.assertEqual(splist.nchunks, 1) - tm.assert_sp_array_equal(splist.to_array(), exp_sparr) - - def test_copy(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - arr = self.na_data - exp_sparr = SparseArray(arr) - - splist = SparseList() - splist.append(arr[:5]) - splist.append(arr[5]) - - cp = splist.copy() - cp.append(arr[6:]) - self.assertEqual(splist.nchunks, 2) - tm.assert_sp_array_equal(cp.to_array(), exp_sparr) - - def test_getitem(self): - with tm.assert_produces_warning(FutureWarning): - arr = self.na_data - splist = SparseList() - splist.append(arr[:5]) - splist.append(arr[5]) - splist.append(arr[6:]) - - for i in range(len(arr)): - tm.assert_almost_equal(splist[i], arr[i]) - tm.assert_almost_equal(splist[-i], arr[-i]) diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd deleted file mode 100644 index 2267c8282ec14..0000000000000 --- a/pandas/src/datetime.pxd +++ /dev/null @@ -1,195 +0,0 @@ -# cython: profile=False -from numpy cimport int64_t, int32_t, npy_int64, npy_int32, ndarray -from cpython cimport PyObject - -from cpython cimport PyUnicode_Check, PyUnicode_AsASCIIString - - -cdef extern from "headers/stdint.h": - enum: INT64_MIN - enum: INT32_MIN - - - -cdef extern from "datetime.h": - - ctypedef class datetime.date [object PyDateTime_Date]: - pass - - ctypedef class datetime.datetime [object PyDateTime_DateTime]: - pass - - ctypedef class datetime.timedelta [object PyDateTime_Delta]: - pass - - void PyDateTime_IMPORT() - - int PyDateTime_GET_YEAR(date) - int PyDateTime_GET_MONTH(date) - int PyDateTime_GET_DAY(date) - int PyDateTime_DATE_GET_HOUR(object o) - int PyDateTime_DATE_GET_MINUTE(object o) - int PyDateTime_DATE_GET_SECOND(object o) - int PyDateTime_DATE_GET_MICROSECOND(object o) - int PyDateTime_TIME_GET_HOUR(object o) - int PyDateTime_TIME_GET_MINUTE(object o) - int PyDateTime_TIME_GET_SECOND(object o) - int PyDateTime_TIME_GET_MICROSECOND(object o) - bint PyDateTime_Check(object o) - bint PyDate_Check(object o) - bint PyTime_Check(object o) - bint PyDelta_Check(object o) - object PyDateTime_FromDateAndTime(int year, int month, int day, int hour, - int minute, int second, int us) - -cdef extern from "numpy/ndarrayobject.h": - - ctypedef int64_t npy_timedelta - ctypedef int64_t npy_datetime - - ctypedef enum NPY_CASTING: - NPY_NO_CASTING - NPY_EQUIV_CASTING - NPY_SAFE_CASTING - NPY_SAME_KIND_CASTING - NPY_UNSAFE_CASTING - - -cdef extern from "numpy_helper.h": - npy_datetime get_datetime64_value(object o) - npy_timedelta get_timedelta64_value(object o) - -cdef extern from "numpy/npy_common.h": - - ctypedef unsigned char npy_bool - -cdef extern from "datetime/np_datetime.h": - - ctypedef enum PANDAS_DATETIMEUNIT: - PANDAS_FR_Y - PANDAS_FR_M - PANDAS_FR_W - PANDAS_FR_D - PANDAS_FR_B - PANDAS_FR_h - PANDAS_FR_m - PANDAS_FR_s - PANDAS_FR_ms - PANDAS_FR_us - PANDAS_FR_ns - PANDAS_FR_ps - PANDAS_FR_fs - PANDAS_FR_as - - ctypedef struct pandas_datetimestruct: - npy_int64 year - npy_int32 month, day, hour, min, sec, us, ps, as - - int cmp_pandas_datetimestruct(pandas_datetimestruct *a, - pandas_datetimestruct *b) - - int convert_pydatetime_to_datetimestruct(PyObject *obj, - pandas_datetimestruct *out, - PANDAS_DATETIMEUNIT *out_bestunit, - int apply_tzinfo) - - npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, - pandas_datetimestruct *d) nogil - void pandas_datetime_to_datetimestruct(npy_datetime val, - PANDAS_DATETIMEUNIT fr, - pandas_datetimestruct *result) nogil - int days_per_month_table[2][12] - - int dayofweek(int y, int m, int d) nogil - int is_leapyear(int64_t year) nogil - PANDAS_DATETIMEUNIT get_datetime64_unit(object o) - -cdef extern from "datetime/np_datetime_strings.h": - - int parse_iso_8601_datetime(char *str, int len, PANDAS_DATETIMEUNIT unit, - NPY_CASTING casting, pandas_datetimestruct *out, - int *out_local, int *out_tzoffset, - PANDAS_DATETIMEUNIT *out_bestunit, - npy_bool *out_special) - - int make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen, - int local, PANDAS_DATETIMEUNIT base, int tzoffset, - NPY_CASTING casting) - - int get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base) - - # int parse_python_string(object obj, pandas_datetimestruct *out) except -1 - - - - -cdef inline int _string_to_dts(object val, pandas_datetimestruct* dts, - int* out_local, int* out_tzoffset) except? -1: - cdef int result - cdef char *tmp - - if PyUnicode_Check(val): - val = PyUnicode_AsASCIIString(val); - - tmp = val - result = _cstring_to_dts(tmp, len(val), dts, out_local, out_tzoffset) - - if result == -1: - raise ValueError('Unable to parse %s' % str(val)) - return result - -cdef inline int _cstring_to_dts(char *val, int length, - pandas_datetimestruct* dts, - int* out_local, int* out_tzoffset) except? -1: - cdef: - npy_bool special - PANDAS_DATETIMEUNIT out_bestunit - int result - - result = parse_iso_8601_datetime(val, length, PANDAS_FR_ns, - NPY_UNSAFE_CASTING, - dts, out_local, out_tzoffset, &out_bestunit, &special) - return result - - -cdef inline object _datetime64_to_datetime(int64_t val): - cdef pandas_datetimestruct dts - pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) - return _dts_to_pydatetime(&dts) - -cdef inline object _dts_to_pydatetime(pandas_datetimestruct *dts): - return PyDateTime_FromDateAndTime(dts.year, dts.month, - dts.day, dts.hour, - dts.min, dts.sec, dts.us) - -cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts): - dts.year = PyDateTime_GET_YEAR(val) - dts.month = PyDateTime_GET_MONTH(val) - dts.day = PyDateTime_GET_DAY(val) - dts.hour = PyDateTime_DATE_GET_HOUR(val) - dts.min = PyDateTime_DATE_GET_MINUTE(val) - dts.sec = PyDateTime_DATE_GET_SECOND(val) - dts.us = PyDateTime_DATE_GET_MICROSECOND(val) - dts.ps = dts.as = 0 - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) - -cdef inline int64_t _dtlike_to_datetime64(object val, - pandas_datetimestruct *dts): - dts.year = val.year - dts.month = val.month - dts.day = val.day - dts.hour = val.hour - dts.min = val.minute - dts.sec = val.second - dts.us = val.microsecond - dts.ps = dts.as = 0 - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) - -cdef inline int64_t _date_to_datetime64(object val, - pandas_datetimestruct *dts): - dts.year = PyDateTime_GET_YEAR(val) - dts.month = PyDateTime_GET_MONTH(val) - dts.day = PyDateTime_GET_DAY(val) - dts.hour = dts.min = dts.sec = dts.us = 0 - dts.ps = dts.as = 0 - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) diff --git a/pandas/src/datetime_helper.h b/pandas/src/datetime_helper.h deleted file mode 100644 index bef4b4266c824..0000000000000 --- a/pandas/src/datetime_helper.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -*/ - -#ifndef PANDAS_SRC_DATETIME_HELPER_H_ -#define PANDAS_SRC_DATETIME_HELPER_H_ - -#include -#include "datetime.h" -#include "numpy/arrayobject.h" -#include "numpy/arrayscalars.h" - -npy_int64 get_long_attr(PyObject *o, const char *attr) { - npy_int64 long_val; - PyObject *value = PyObject_GetAttrString(o, attr); - long_val = (PyLong_Check(value) ? - PyLong_AsLongLong(value) : PyInt_AS_LONG(value)); - Py_DECREF(value); - return long_val; -} - -npy_float64 total_seconds(PyObject *td) { - // Python 2.6 compat - npy_int64 microseconds = get_long_attr(td, "microseconds"); - npy_int64 seconds = get_long_attr(td, "seconds"); - npy_int64 days = get_long_attr(td, "days"); - npy_int64 days_in_seconds = days * 24LL * 3600LL; - return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; -} - -#endif // PANDAS_SRC_DATETIME_HELPER_H_ diff --git a/pandas/src/headers/math.h b/pandas/src/headers/math.h deleted file mode 100644 index 34ad9f24a58f9..0000000000000 --- a/pandas/src/headers/math.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _PANDAS_MATH_H_ -#define _PANDAS_MATH_H_ - -#if defined(_MSC_VER) && (_MSC_VER < 1800) -#include -__inline int signbit(double num) { return _copysign(1.0, num) < 0; } -#else -#include -#endif - -#endif diff --git a/pandas/src/khash.pxd b/pandas/src/khash.pxd deleted file mode 100644 index adb0fe285dbb8..0000000000000 --- a/pandas/src/khash.pxd +++ /dev/null @@ -1,140 +0,0 @@ -from cpython cimport PyObject -from numpy cimport int64_t, uint64_t, int32_t, uint32_t, float64_t - -cdef extern from "khash_python.h": - ctypedef uint32_t khint_t - ctypedef khint_t khiter_t - - ctypedef struct kh_pymap_t: - khint_t n_buckets, size, n_occupied, upper_bound - uint32_t *flags - PyObject **keys - size_t *vals - - inline kh_pymap_t* kh_init_pymap() - inline void kh_destroy_pymap(kh_pymap_t*) - inline void kh_clear_pymap(kh_pymap_t*) - inline khint_t kh_get_pymap(kh_pymap_t*, PyObject*) - inline void kh_resize_pymap(kh_pymap_t*, khint_t) - inline khint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*) - inline void kh_del_pymap(kh_pymap_t*, khint_t) - - bint kh_exist_pymap(kh_pymap_t*, khiter_t) - - ctypedef struct kh_pyset_t: - khint_t n_buckets, size, n_occupied, upper_bound - uint32_t *flags - PyObject **keys - size_t *vals - - inline kh_pyset_t* kh_init_pyset() - inline void kh_destroy_pyset(kh_pyset_t*) - inline void kh_clear_pyset(kh_pyset_t*) - inline khint_t kh_get_pyset(kh_pyset_t*, PyObject*) - inline void kh_resize_pyset(kh_pyset_t*, khint_t) - inline khint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*) - inline void kh_del_pyset(kh_pyset_t*, khint_t) - - bint kh_exist_pyset(kh_pyset_t*, khiter_t) - - ctypedef char* kh_cstr_t - - ctypedef struct kh_str_t: - khint_t n_buckets, size, n_occupied, upper_bound - uint32_t *flags - kh_cstr_t *keys - size_t *vals - - inline kh_str_t* kh_init_str() nogil - inline void kh_destroy_str(kh_str_t*) nogil - inline void kh_clear_str(kh_str_t*) nogil - inline khint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil - inline void kh_resize_str(kh_str_t*, khint_t) nogil - inline khint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil - inline void kh_del_str(kh_str_t*, khint_t) nogil - - bint kh_exist_str(kh_str_t*, khiter_t) nogil - - ctypedef struct kh_int64_t: - khint_t n_buckets, size, n_occupied, upper_bound - uint32_t *flags - int64_t *keys - size_t *vals - - inline kh_int64_t* kh_init_int64() nogil - inline void kh_destroy_int64(kh_int64_t*) nogil - inline void kh_clear_int64(kh_int64_t*) nogil - inline khint_t kh_get_int64(kh_int64_t*, int64_t) nogil - inline void kh_resize_int64(kh_int64_t*, khint_t) nogil - inline khint_t kh_put_int64(kh_int64_t*, int64_t, int*) nogil - inline void kh_del_int64(kh_int64_t*, khint_t) nogil - - bint kh_exist_int64(kh_int64_t*, khiter_t) nogil - - ctypedef uint64_t khuint64_t - - ctypedef struct kh_uint64_t: - khint_t n_buckets, size, n_occupied, upper_bound - uint32_t *flags - khuint64_t *keys - size_t *vals - - inline kh_uint64_t* kh_init_uint64() nogil - inline void kh_destroy_uint64(kh_uint64_t*) nogil - inline void kh_clear_uint64(kh_uint64_t*) nogil - inline khint_t kh_get_uint64(kh_uint64_t*, int64_t) nogil - inline void kh_resize_uint64(kh_uint64_t*, khint_t) nogil - inline khint_t kh_put_uint64(kh_uint64_t*, int64_t, int*) nogil - inline void kh_del_uint64(kh_uint64_t*, khint_t) nogil - - bint kh_exist_uint64(kh_uint64_t*, khiter_t) nogil - - ctypedef struct kh_float64_t: - khint_t n_buckets, size, n_occupied, upper_bound - uint32_t *flags - float64_t *keys - size_t *vals - - inline kh_float64_t* kh_init_float64() nogil - inline void kh_destroy_float64(kh_float64_t*) nogil - inline void kh_clear_float64(kh_float64_t*) nogil - inline khint_t kh_get_float64(kh_float64_t*, float64_t) nogil - inline void kh_resize_float64(kh_float64_t*, khint_t) nogil - inline khint_t kh_put_float64(kh_float64_t*, float64_t, int*) nogil - inline void kh_del_float64(kh_float64_t*, khint_t) nogil - - bint kh_exist_float64(kh_float64_t*, khiter_t) nogil - - ctypedef struct kh_int32_t: - khint_t n_buckets, size, n_occupied, upper_bound - uint32_t *flags - int32_t *keys - size_t *vals - - inline kh_int32_t* kh_init_int32() nogil - inline void kh_destroy_int32(kh_int32_t*) nogil - inline void kh_clear_int32(kh_int32_t*) nogil - inline khint_t kh_get_int32(kh_int32_t*, int32_t) nogil - inline void kh_resize_int32(kh_int32_t*, khint_t) nogil - inline khint_t kh_put_int32(kh_int32_t*, int32_t, int*) nogil - inline void kh_del_int32(kh_int32_t*, khint_t) nogil - - bint kh_exist_int32(kh_int32_t*, khiter_t) nogil - - # sweep factorize - - ctypedef struct kh_strbox_t: - khint_t n_buckets, size, n_occupied, upper_bound - uint32_t *flags - kh_cstr_t *keys - PyObject **vals - - inline kh_strbox_t* kh_init_strbox() nogil - inline void kh_destroy_strbox(kh_strbox_t*) nogil - inline void kh_clear_strbox(kh_strbox_t*) nogil - inline khint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil - inline void kh_resize_strbox(kh_strbox_t*, khint_t) nogil - inline khint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil - inline void kh_del_strbox(kh_strbox_t*, khint_t) nogil - - bint kh_exist_strbox(kh_strbox_t*, khiter_t) nogil diff --git a/pandas/src/klib/ktypes.h b/pandas/src/klib/ktypes.h deleted file mode 100644 index 981f17372a2d5..0000000000000 --- a/pandas/src/klib/ktypes.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __KTYPES_H -#define __KTYPES_H - -/* compipler specific configuration */ - -#endif /* __KTYPES_H */ diff --git a/pandas/src/klib/kvec.h b/pandas/src/klib/kvec.h deleted file mode 100644 index c5e6e6c407dfc..0000000000000 --- a/pandas/src/klib/kvec.h +++ /dev/null @@ -1,151 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, by Attractive Chaos - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* - An example: - -#include "kvec.h" -int main() { - kvec_t(int) array; - kv_init(array); - kv_push(int, array, 10); // append - kv_a(int, array, 20) = 5; // dynamic - kv_A(array, 20) = 4; // static - kv_destroy(array); - return 0; -} -*/ - -/* - 2008-09-22 (0.1.0): - - * The initial version. - -*/ - -#ifndef AC_KVEC_H -#define AC_KVEC_H - -#include -#include -#include - -#ifndef PANDAS_INLINE - #if defined(__GNUC__) - #define PANDAS_INLINE static __inline__ - #elif defined(_MSC_VER) - #define PANDAS_INLINE static __inline - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define PANDAS_INLINE static inline - #else - #define PANDAS_INLINE - #endif -#endif - -#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) - -#define kvec_t(type) struct { size_t n, m; type *a; } -#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) -#define kv_destroy(v) free((v).a) -#define kv_A(v, i) ((v).a[(i)]) -#define kv_pop(v) ((v).a[--(v).n]) -#define kv_size(v) ((v).n) -#define kv_max(v) ((v).m) - -#define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m)) - -#define kv_copy(type, v1, v0) do { \ - if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \ - (v1).n = (v0).n; \ - memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ - } while (0) \ - -#define kv_push(type, v, x) do { \ - if ((v)->n == (v)->m) { \ - (v)->m = (v)->m? (v)->m<<1 : 2; \ - (v)->a = (type*)realloc((v)->a, sizeof(type) * (v)->m); \ - } \ - (v)->a[(v)->n++] = (x); \ - } while (0) - -#define kv_pushp(type, v) (((v).n == (v).m)? \ - ((v).m = ((v).m? (v).m<<1 : 2), \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ - : 0), ((v).a + ((v).n++)) - -#define kv_a(type, v, i) ((v).m <= (size_t)(i)? \ - ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ - : (v).n <= (size_t)(i)? (v).n = (i) \ - : 0), (v).a[(i)] - -// #define kv_int64_push(v, x) (kv_push(int64_t, (v), (x))) - -typedef struct { - size_t n, m; - int64_t* a; -} kv_int64_t; - -typedef struct { - size_t n, m; - double* a; -} kv_double; - -typedef struct { - size_t n, m; - PyObject** a; -} kv_object_t; - -void PANDAS_INLINE kv_object_push(kv_object_t *v, PyObject *x) { - do { - if (v->n == v->m) { - v->m = v->m? v->m<<1 : 2; - v->a = (PyObject**)realloc(v->a, sizeof(PyObject*) * v->m); - } - v->a[v->n++] = x; - } while (0); - // kv_push(PyObject*, v, x); - Py_INCREF(x); -} - -void PANDAS_INLINE kv_int64_push(kv_int64_t *v, int64_t x) { - kv_push(int64_t, v, x); -} - -void PANDAS_INLINE kv_double_push(kv_double *v, double x) { - kv_push(double, v, x); -} - -void PANDAS_INLINE kv_object_destroy(kv_object_t *v) { - int i; - for (i = 0; i < v->n; ++i) - { - Py_XDECREF(v->a[i]); - } - free(v->a); -} - - -#endif diff --git a/pandas/src/numpy.pxd b/pandas/src/numpy.pxd deleted file mode 100644 index 9ab3b9b1b81ae..0000000000000 --- a/pandas/src/numpy.pxd +++ /dev/null @@ -1,984 +0,0 @@ -# NumPy static imports for Cython -# -# If any of the PyArray_* functions are called, import_array must be -# called first. -# -# This also defines backwards-compatability buffer acquisition -# code for use in Python 2.x (or Python <= 2.5 when NumPy starts -# implementing PEP-3118 directly). -# -# Because of laziness, the format string of the buffer is statically -# allocated. Increase the size if this is not enough, or submit a -# patch to do this properly. -# -# Author: Dag Sverre Seljebotn -# - -DEF _buffer_format_string_len = 255 - -cimport cpython.buffer as pybuf -from cpython.ref cimport Py_INCREF, Py_XDECREF -from cpython.object cimport PyObject -cimport libc.stdlib as stdlib -cimport libc.stdio as stdio - -cdef extern from "Python.h": - ctypedef int Py_intptr_t - -cdef extern from "numpy/arrayobject.h": - ctypedef Py_intptr_t npy_intp - ctypedef size_t npy_uintp - - cdef enum NPY_TYPES: - NPY_BOOL - NPY_BYTE - NPY_UBYTE - NPY_SHORT - NPY_USHORT - NPY_INT - NPY_UINT - NPY_LONG - NPY_ULONG - NPY_LONGLONG - NPY_ULONGLONG - NPY_FLOAT - NPY_DOUBLE - NPY_LONGDOUBLE - NPY_CFLOAT - NPY_CDOUBLE - NPY_CLONGDOUBLE - NPY_OBJECT - NPY_STRING - NPY_UNICODE - NPY_VOID - NPY_NTYPES - NPY_NOTYPE - - NPY_INT8 - NPY_INT16 - NPY_INT32 - NPY_INT64 - NPY_INT128 - NPY_INT256 - NPY_UINT8 - NPY_UINT16 - NPY_UINT32 - NPY_UINT64 - NPY_UINT128 - NPY_UINT256 - NPY_FLOAT16 - NPY_FLOAT32 - NPY_FLOAT64 - NPY_FLOAT80 - NPY_FLOAT96 - NPY_FLOAT128 - NPY_FLOAT256 - NPY_COMPLEX32 - NPY_COMPLEX64 - NPY_COMPLEX128 - NPY_COMPLEX160 - NPY_COMPLEX192 - NPY_COMPLEX256 - NPY_COMPLEX512 - - NPY_DATETIME - NPY_TIMEDELTA - - NPY_INTP - - ctypedef enum NPY_ORDER: - NPY_ANYORDER - NPY_CORDER - NPY_FORTRANORDER - - ctypedef enum NPY_CLIPMODE: - NPY_CLIP - NPY_WRAP - NPY_RAISE - - ctypedef enum NPY_SCALARKIND: - NPY_NOSCALAR, - NPY_BOOL_SCALAR, - NPY_INTPOS_SCALAR, - NPY_INTNEG_SCALAR, - NPY_FLOAT_SCALAR, - NPY_COMPLEX_SCALAR, - NPY_OBJECT_SCALAR - - ctypedef enum NPY_SORTKIND: - NPY_QUICKSORT - NPY_HEAPSORT - NPY_MERGESORT - - ctypedef enum NPY_SEARCHSIDE: - NPY_SEARCHLEFT - NPY_SEARCHRIGHT - - enum: - NPY_C_CONTIGUOUS - NPY_F_CONTIGUOUS - NPY_CONTIGUOUS - NPY_FORTRAN - NPY_OWNDATA - NPY_FORCECAST - NPY_ENSURECOPY - NPY_ENSUREARRAY - NPY_ELEMENTSTRIDES - NPY_ALIGNED - NPY_NOTSWAPPED - NPY_WRITEABLE - NPY_UPDATEIFCOPY - NPY_ARR_HAS_DESCR - - NPY_BEHAVED - NPY_BEHAVED_NS - NPY_CARRAY - NPY_CARRAY_RO - NPY_FARRAY - NPY_FARRAY_RO - NPY_DEFAULT - - NPY_IN_ARRAY - NPY_OUT_ARRAY - NPY_INOUT_ARRAY - NPY_IN_FARRAY - NPY_OUT_FARRAY - NPY_INOUT_FARRAY - - NPY_UPDATE_ALL - - cdef enum: - NPY_MAXDIMS - - npy_intp NPY_MAX_ELSIZE - - ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *, void *) - - ctypedef class numpy.dtype [object PyArray_Descr]: - # Use PyDataType_* macros when possible, however there are no macros - # for accessing some of the fields, so some are defined. Please - # ask on cython-dev if you need more. - cdef int type_num - cdef int itemsize "elsize" - cdef char byteorder - cdef object fields - cdef tuple names - - ctypedef extern class numpy.flatiter [object PyArrayIterObject]: - # Use through macros - pass - - ctypedef extern class numpy.broadcast [object PyArrayMultiIterObject]: - # Use through macros - pass - - ctypedef struct PyArrayObject: - # For use in situations where ndarray can't replace PyArrayObject*, - # like PyArrayObject**. - pass - - ctypedef class numpy.ndarray [object PyArrayObject]: - cdef __cythonbufferdefaults__ = {"mode": "strided"} - - cdef: - # Only taking a few of the most commonly used and stable fields. - # One should use PyArray_* macros instead to access the C fields. - char *data - int ndim "nd" - npy_intp *shape "dimensions" - npy_intp *strides - dtype descr - PyObject* base - - # Note: This syntax (function definition in pxd files) is an - # experimental exception made for __getbuffer__ and __releasebuffer__ - # -- the details of this may change. - def __getbuffer__(ndarray self, Py_buffer* info, int flags): - # This implementation of getbuffer is geared towards Cython - # requirements, and does not yet fullfill the PEP. - # In particular strided access is always provided regardless - # of flags - - if info == NULL: return - - cdef int copy_shape, i, ndim - cdef int endian_detector = 1 - cdef bint little_endian = ((&endian_detector)[0] != 0) - - ndim = PyArray_NDIM(self) - - if sizeof(npy_intp) != sizeof(Py_ssize_t): - copy_shape = 1 - else: - copy_shape = 0 - - if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) - and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): - raise ValueError(u"ndarray is not C contiguous") - - if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) - and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): - raise ValueError(u"ndarray is not Fortran contiguous") - - info.buf = PyArray_DATA(self) - info.ndim = ndim - if copy_shape: - # Allocate new buffer for strides and shape info. - # This is allocated as one block, strides first. - info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) - info.shape = info.strides + ndim - for i in range(ndim): - info.strides[i] = PyArray_STRIDES(self)[i] - info.shape[i] = PyArray_DIMS(self)[i] - else: - info.strides = PyArray_STRIDES(self) - info.shape = PyArray_DIMS(self) - info.suboffsets = NULL - info.itemsize = PyArray_ITEMSIZE(self) - info.readonly = not PyArray_ISWRITEABLE(self) - - cdef int t - cdef char* f = NULL - cdef dtype descr = self.descr - cdef list stack - cdef int offset - - cdef bint hasfields = PyDataType_HASFIELDS(descr) - - if not hasfields and not copy_shape: - # do not call releasebuffer - info.obj = None - else: - # need to call releasebuffer - info.obj = self - - if not hasfields: - t = descr.type_num - if ((descr.byteorder == '>' and little_endian) or - (descr.byteorder == '<' and not little_endian)): - raise ValueError(u"Non-native byte order not supported") - if t == NPY_BYTE: f = "b" - elif t == NPY_UBYTE: f = "B" - elif t == NPY_SHORT: f = "h" - elif t == NPY_USHORT: f = "H" - elif t == NPY_INT: f = "i" - elif t == NPY_UINT: f = "I" - elif t == NPY_LONG: f = "l" - elif t == NPY_ULONG: f = "L" - elif t == NPY_LONGLONG: f = "q" - elif t == NPY_ULONGLONG: f = "Q" - elif t == NPY_FLOAT: f = "f" - elif t == NPY_DOUBLE: f = "d" - elif t == NPY_LONGDOUBLE: f = "g" - elif t == NPY_CFLOAT: f = "Zf" - elif t == NPY_CDOUBLE: f = "Zd" - elif t == NPY_CLONGDOUBLE: f = "Zg" - elif t == NPY_OBJECT: f = "O" - else: - raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) - info.format = f - return - else: - info.format = stdlib.malloc(_buffer_format_string_len) - info.format[0] = '^' # Native data types, manual alignment - offset = 0 - f = _util_dtypestring(descr, info.format + 1, - info.format + _buffer_format_string_len, - &offset) - f[0] = 0 # Terminate format string - - def __releasebuffer__(ndarray self, Py_buffer* info): - if PyArray_HASFIELDS(self): - stdlib.free(info.format) - if sizeof(npy_intp) != sizeof(Py_ssize_t): - stdlib.free(info.strides) - # info.shape was stored after info.strides in the same block - - - ctypedef signed char npy_bool - - ctypedef signed char npy_byte - ctypedef signed short npy_short - ctypedef signed int npy_int - ctypedef signed long npy_long - ctypedef signed long long npy_longlong - - ctypedef unsigned char npy_ubyte - ctypedef unsigned short npy_ushort - ctypedef unsigned int npy_uint - ctypedef unsigned long npy_ulong - ctypedef unsigned long long npy_ulonglong - - ctypedef float npy_float - ctypedef double npy_double - ctypedef long double npy_longdouble - - ctypedef signed char npy_int8 - ctypedef signed short npy_int16 - ctypedef signed int npy_int32 - ctypedef signed long long npy_int64 - ctypedef signed long long npy_int96 - ctypedef signed long long npy_int128 - - ctypedef unsigned char npy_uint8 - ctypedef unsigned short npy_uint16 - ctypedef unsigned int npy_uint32 - ctypedef unsigned long long npy_uint64 - ctypedef unsigned long long npy_uint96 - ctypedef unsigned long long npy_uint128 - - ctypedef float npy_float16 - ctypedef float npy_float32 - ctypedef double npy_float64 - ctypedef long double npy_float80 - ctypedef long double npy_float96 - ctypedef long double npy_float128 - - ctypedef struct npy_cfloat: - double real - double imag - - ctypedef struct npy_cdouble: - double real - double imag - - ctypedef struct npy_clongdouble: - double real - double imag - - ctypedef struct npy_complex64: - double real - double imag - - ctypedef struct npy_complex128: - double real - double imag - - ctypedef struct npy_complex160: - double real - double imag - - ctypedef struct npy_complex192: - double real - double imag - - ctypedef struct npy_complex256: - double real - double imag - - ctypedef struct PyArray_Dims: - npy_intp *ptr - int len - - void import_array() - - # - # Macros from ndarrayobject.h - # - bint PyArray_CHKFLAGS(ndarray m, int flags) - bint PyArray_ISCONTIGUOUS(ndarray m) - bint PyArray_ISWRITEABLE(ndarray m) - bint PyArray_ISALIGNED(ndarray m) - - int PyArray_NDIM(ndarray) - bint PyArray_ISONESEGMENT(ndarray) - bint PyArray_ISFORTRAN(ndarray) - int PyArray_FORTRANIF(ndarray) - - void* PyArray_DATA(ndarray) - char* PyArray_BYTES(ndarray) - npy_intp* PyArray_DIMS(ndarray) - npy_intp* PyArray_STRIDES(ndarray) - npy_intp PyArray_DIM(ndarray, size_t) - npy_intp PyArray_STRIDE(ndarray, size_t) - - # object PyArray_BASE(ndarray) wrong refcount semantics - # dtype PyArray_DESCR(ndarray) wrong refcount semantics - int PyArray_FLAGS(ndarray) - npy_intp PyArray_ITEMSIZE(ndarray) - int PyArray_TYPE(ndarray arr) - - object PyArray_GETITEM(ndarray arr, void *itemptr) - int PyArray_SETITEM(ndarray arr, void *itemptr, object obj) - - bint PyTypeNum_ISBOOL(int) - bint PyTypeNum_ISUNSIGNED(int) - bint PyTypeNum_ISSIGNED(int) - bint PyTypeNum_ISINTEGER(int) - bint PyTypeNum_ISFLOAT(int) - bint PyTypeNum_ISNUMBER(int) - bint PyTypeNum_ISSTRING(int) - bint PyTypeNum_ISCOMPLEX(int) - bint PyTypeNum_ISPYTHON(int) - bint PyTypeNum_ISFLEXIBLE(int) - bint PyTypeNum_ISUSERDEF(int) - bint PyTypeNum_ISEXTENDED(int) - bint PyTypeNum_ISOBJECT(int) - - bint PyDataType_ISBOOL(dtype) - bint PyDataType_ISUNSIGNED(dtype) - bint PyDataType_ISSIGNED(dtype) - bint PyDataType_ISINTEGER(dtype) - bint PyDataType_ISFLOAT(dtype) - bint PyDataType_ISNUMBER(dtype) - bint PyDataType_ISSTRING(dtype) - bint PyDataType_ISCOMPLEX(dtype) - bint PyDataType_ISPYTHON(dtype) - bint PyDataType_ISFLEXIBLE(dtype) - bint PyDataType_ISUSERDEF(dtype) - bint PyDataType_ISEXTENDED(dtype) - bint PyDataType_ISOBJECT(dtype) - bint PyDataType_HASFIELDS(dtype) - - bint PyArray_ISBOOL(ndarray) - bint PyArray_ISUNSIGNED(ndarray) - bint PyArray_ISSIGNED(ndarray) - bint PyArray_ISINTEGER(ndarray) - bint PyArray_ISFLOAT(ndarray) - bint PyArray_ISNUMBER(ndarray) - bint PyArray_ISSTRING(ndarray) - bint PyArray_ISCOMPLEX(ndarray) - bint PyArray_ISPYTHON(ndarray) - bint PyArray_ISFLEXIBLE(ndarray) - bint PyArray_ISUSERDEF(ndarray) - bint PyArray_ISEXTENDED(ndarray) - bint PyArray_ISOBJECT(ndarray) - bint PyArray_HASFIELDS(ndarray) - - bint PyArray_ISVARIABLE(ndarray) - - bint PyArray_SAFEALIGNEDCOPY(ndarray) - bint PyArray_ISNBO(ndarray) - bint PyArray_IsNativeByteOrder(ndarray) - bint PyArray_ISNOTSWAPPED(ndarray) - bint PyArray_ISBYTESWAPPED(ndarray) - - bint PyArray_FLAGSWAP(ndarray, int) - - bint PyArray_ISCARRAY(ndarray) - bint PyArray_ISCARRAY_RO(ndarray) - bint PyArray_ISFARRAY(ndarray) - bint PyArray_ISFARRAY_RO(ndarray) - bint PyArray_ISBEHAVED(ndarray) - bint PyArray_ISBEHAVED_RO(ndarray) - - - bint PyDataType_ISNOTSWAPPED(dtype) - bint PyDataType_ISBYTESWAPPED(dtype) - - bint PyArray_DescrCheck(object) - - bint PyArray_Check(object) - bint PyArray_CheckExact(object) - - # Cannot be supported due to out arg: - # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&) - # bint PyArray_HasArrayInterface(op, out) - - - bint PyArray_IsZeroDim(object) - # Cannot be supported due to ## ## in macro: - # bint PyArray_IsScalar(object, verbatim work) - bint PyArray_CheckScalar(object) - bint PyArray_IsPythonNumber(object) - bint PyArray_IsPythonScalar(object) - bint PyArray_IsAnyScalar(object) - bint PyArray_CheckAnyScalar(object) - ndarray PyArray_GETCONTIGUOUS(ndarray) - bint PyArray_SAMESHAPE(ndarray, ndarray) - npy_intp PyArray_SIZE(ndarray) - npy_intp PyArray_NBYTES(ndarray) - - object PyArray_FROM_O(object) - object PyArray_FROM_OF(object m, int flags) - bint PyArray_FROM_OT(object m, int type) - bint PyArray_FROM_OTF(object m, int type, int flags) - object PyArray_FROMANY(object m, int type, int min, int max, int flags) - object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran) - object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran) - void PyArray_FILLWBYTE(object, int val) - npy_intp PyArray_REFCOUNT(object) - object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth) - unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2) - bint PyArray_EquivByteorders(int b1, int b2) - object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum) - object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data) - #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr) - object PyArray_ToScalar(void* data, ndarray arr) - - void* PyArray_GETPTR1(ndarray m, npy_intp i) - void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j) - void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k) - void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l) - - void PyArray_XDECREF_ERR(ndarray) - # Cannot be supported due to out arg - # void PyArray_DESCR_REPLACE(descr) - - - object PyArray_Copy(ndarray) - object PyArray_FromObject(object op, int type, int min_depth, int max_depth) - object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth) - object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth) - - object PyArray_Cast(ndarray mp, int type_num) - object PyArray_Take(ndarray ap, object items, int axis) - object PyArray_Put(ndarray ap, object items, object values) - - void PyArray_ITER_RESET(flatiter it) nogil - void PyArray_ITER_NEXT(flatiter it) nogil - void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil - void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil - void* PyArray_ITER_DATA(flatiter it) nogil - bint PyArray_ITER_NOTDONE(flatiter it) nogil - - void PyArray_MultiIter_RESET(broadcast multi) nogil - void PyArray_MultiIter_NEXT(broadcast multi) nogil - void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil - void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil - void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil - void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil - bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil - - # Functions from __multiarray_api.h - - # Functions taking dtype and returning object/ndarray are disabled - # for now as they steal dtype references. I'm conservative and disable - # more than is probably needed until it can be checked further. - int PyArray_SetNumericOps (object) - object PyArray_GetNumericOps () - int PyArray_INCREF (ndarray) - int PyArray_XDECREF (ndarray) - void PyArray_SetStringFunction (object, int) - dtype PyArray_DescrFromType (int) - object PyArray_TypeObjectFromType (int) - char * PyArray_Zero (ndarray) - char * PyArray_One (ndarray) - #object PyArray_CastToType (ndarray, dtype, int) - int PyArray_CastTo (ndarray, ndarray) - int PyArray_CastAnyTo (ndarray, ndarray) - int PyArray_CanCastSafely (int, int) - npy_bool PyArray_CanCastTo (dtype, dtype) - int PyArray_ObjectType (object, int) - dtype PyArray_DescrFromObject (object, dtype) - #ndarray* PyArray_ConvertToCommonType (object, int *) - dtype PyArray_DescrFromScalar (object) - dtype PyArray_DescrFromTypeObject (object) - npy_intp PyArray_Size (object) - #object PyArray_Scalar (void *, dtype, object) - #object PyArray_FromScalar (object, dtype) - void PyArray_ScalarAsCtype (object, void *) - #int PyArray_CastScalarToCtype (object, void *, dtype) - #int PyArray_CastScalarDirect (object, dtype, void *, int) - object PyArray_ScalarFromObject (object) - #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int) - object PyArray_FromDims (int, int *, int) - #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *) - #object PyArray_FromAny (object, dtype, int, int, int, object) - object PyArray_EnsureArray (object) - object PyArray_EnsureAnyArray (object) - #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *) - #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *) - #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp) - #object PyArray_FromIter (object, dtype, npy_intp) - object PyArray_Return (ndarray) - #object PyArray_GetField (ndarray, dtype, int) - #int PyArray_SetField (ndarray, dtype, int, object) - object PyArray_Byteswap (ndarray, npy_bool) - object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER) - int PyArray_MoveInto (ndarray, ndarray) - int PyArray_CopyInto (ndarray, ndarray) - int PyArray_CopyAnyInto (ndarray, ndarray) - int PyArray_CopyObject (ndarray, object) - object PyArray_NewCopy (ndarray, NPY_ORDER) - object PyArray_ToList (ndarray) - object PyArray_ToString (ndarray, NPY_ORDER) - int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *) - int PyArray_Dump (object, object, int) - object PyArray_Dumps (object, int) - int PyArray_ValidType (int) - void PyArray_UpdateFlags (ndarray, int) - object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object) - #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object) - #dtype PyArray_DescrNew (dtype) - dtype PyArray_DescrNewFromType (int) - double PyArray_GetPriority (object, double) - object PyArray_IterNew (object) - object PyArray_MultiIterNew (int, ...) - - int PyArray_PyIntAsInt (object) - npy_intp PyArray_PyIntAsIntp (object) - int PyArray_Broadcast (broadcast) - void PyArray_FillObjectArray (ndarray, object) - int PyArray_FillWithScalar (ndarray, object) - npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *) - dtype PyArray_DescrNewByteorder (dtype, char) - object PyArray_IterAllButAxis (object, int *) - #object PyArray_CheckFromAny (object, dtype, int, int, int, object) - #object PyArray_FromArray (ndarray, dtype, int) - object PyArray_FromInterface (object) - object PyArray_FromStructInterface (object) - #object PyArray_FromArrayAttr (object, dtype, object) - #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*) - int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND) - object PyArray_NewFlagsObject (object) - npy_bool PyArray_CanCastScalar (type, type) - #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t) - int PyArray_RemoveSmallest (broadcast) - int PyArray_ElementStrides (object) - void PyArray_Item_INCREF (char *, dtype) - void PyArray_Item_XDECREF (char *, dtype) - object PyArray_FieldNames (object) - object PyArray_Transpose (ndarray, PyArray_Dims *) - object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE) - object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE) - object PyArray_PutMask (ndarray, object, object) - object PyArray_Repeat (ndarray, object, int) - object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE) - int PyArray_Sort (ndarray, int, NPY_SORTKIND) - object PyArray_ArgSort (ndarray, int, NPY_SORTKIND) - object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE) - object PyArray_ArgMax (ndarray, int, ndarray) - object PyArray_ArgMin (ndarray, int, ndarray) - object PyArray_Reshape (ndarray, object) - object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER) - object PyArray_Squeeze (ndarray) - #object PyArray_View (ndarray, dtype, type) - object PyArray_SwapAxes (ndarray, int, int) - object PyArray_Max (ndarray, int, ndarray) - object PyArray_Min (ndarray, int, ndarray) - object PyArray_Ptp (ndarray, int, ndarray) - object PyArray_Mean (ndarray, int, int, ndarray) - object PyArray_Trace (ndarray, int, int, int, int, ndarray) - object PyArray_Diagonal (ndarray, int, int, int) - object PyArray_Clip (ndarray, object, object, ndarray) - object PyArray_Conjugate (ndarray, ndarray) - object PyArray_Nonzero (ndarray) - object PyArray_Std (ndarray, int, int, ndarray, int) - object PyArray_Sum (ndarray, int, int, ndarray) - object PyArray_CumSum (ndarray, int, int, ndarray) - object PyArray_Prod (ndarray, int, int, ndarray) - object PyArray_CumProd (ndarray, int, int, ndarray) - object PyArray_All (ndarray, int, ndarray) - object PyArray_Any (ndarray, int, ndarray) - object PyArray_Compress (ndarray, object, int, ndarray) - object PyArray_Flatten (ndarray, NPY_ORDER) - object PyArray_Ravel (ndarray, NPY_ORDER) - npy_intp PyArray_MultiplyList (npy_intp *, int) - int PyArray_MultiplyIntList (int *, int) - void * PyArray_GetPtr (ndarray, npy_intp*) - int PyArray_CompareLists (npy_intp *, npy_intp *, int) - #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype) - #int PyArray_As1D (object*, char **, int *, int) - #int PyArray_As2D (object*, char ***, int *, int *, int) - int PyArray_Free (object, void *) - #int PyArray_Converter (object, object*) - int PyArray_IntpFromSequence (object, npy_intp *, int) - object PyArray_Concatenate (object, int) - object PyArray_InnerProduct (object, object) - object PyArray_MatrixProduct (object, object) - object PyArray_CopyAndTranspose (object) - object PyArray_Correlate (object, object, int) - int PyArray_TypestrConvert (int, int) - #int PyArray_DescrConverter (object, dtype*) - #int PyArray_DescrConverter2 (object, dtype*) - int PyArray_IntpConverter (object, PyArray_Dims *) - #int PyArray_BufferConverter (object, chunk) - int PyArray_AxisConverter (object, int *) - int PyArray_BoolConverter (object, npy_bool *) - int PyArray_ByteorderConverter (object, char *) - int PyArray_OrderConverter (object, NPY_ORDER *) - unsigned char PyArray_EquivTypes (dtype, dtype) - #object PyArray_Zeros (int, npy_intp *, dtype, int) - #object PyArray_Empty (int, npy_intp *, dtype, int) - object PyArray_Where (object, object, object) - object PyArray_Arange (double, double, double, int) - #object PyArray_ArangeObj (object, object, object, dtype) - int PyArray_SortkindConverter (object, NPY_SORTKIND *) - object PyArray_LexSort (object, int) - object PyArray_Round (ndarray, int, ndarray) - unsigned char PyArray_EquivTypenums (int, int) - int PyArray_RegisterDataType (dtype) - int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *) - int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND) - #void PyArray_InitArrFuncs (PyArray_ArrFuncs *) - object PyArray_IntTupleFromIntp (int, npy_intp *) - int PyArray_TypeNumFromName (char *) - int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *) - #int PyArray_OutputConverter (object, ndarray*) - object PyArray_BroadcastToShape (object, npy_intp *, int) - void _PyArray_SigintHandler (int) - void* _PyArray_GetSigintBuf () - #int PyArray_DescrAlignConverter (object, dtype*) - #int PyArray_DescrAlignConverter2 (object, dtype*) - int PyArray_SearchsideConverter (object, void *) - object PyArray_CheckAxis (ndarray, int *, int) - npy_intp PyArray_OverflowMultiplyList (npy_intp *, int) - int PyArray_CompareString (char *, char *, size_t) - - -# Typedefs that matches the runtime dtype objects in -# the numpy module. - -# The ones that are commented out needs an IFDEF function -# in Cython to enable them only on the right systems. - -ctypedef npy_int8 int8_t -ctypedef npy_int16 int16_t -ctypedef npy_int32 int32_t -ctypedef npy_int64 int64_t -#ctypedef npy_int96 int96_t -#ctypedef npy_int128 int128_t - -ctypedef npy_uint8 uint8_t -ctypedef npy_uint16 uint16_t -ctypedef npy_uint32 uint32_t -ctypedef npy_uint64 uint64_t -#ctypedef npy_uint96 uint96_t -#ctypedef npy_uint128 uint128_t - -ctypedef npy_float16 float16_t -ctypedef npy_float32 float32_t -ctypedef npy_float64 float64_t -#ctypedef npy_float80 float80_t -#ctypedef npy_float128 float128_t - -ctypedef float complex complex64_t -ctypedef double complex complex128_t - -# The int types are mapped a bit surprising -- -# numpy.int corresponds to 'l' and numpy.long to 'q' -ctypedef npy_long int_t -ctypedef npy_longlong long_t -ctypedef npy_longlong longlong_t - -ctypedef npy_ulong uint_t -ctypedef npy_ulonglong ulong_t -ctypedef npy_ulonglong ulonglong_t - -ctypedef npy_intp intp_t -ctypedef npy_uintp uintp_t - -ctypedef npy_double float_t -ctypedef npy_double double_t -ctypedef npy_longdouble longdouble_t - -ctypedef npy_cfloat cfloat_t -ctypedef npy_cdouble cdouble_t -ctypedef npy_clongdouble clongdouble_t - -ctypedef npy_cdouble complex_t - -cdef inline object PyArray_MultiIterNew1(a): - return PyArray_MultiIterNew(1, a) - -cdef inline object PyArray_MultiIterNew2(a, b): - return PyArray_MultiIterNew(2, a, b) - -cdef inline object PyArray_MultiIterNew3(a, b, c): - return PyArray_MultiIterNew(3, a, b, c) - -cdef inline object PyArray_MultiIterNew4(a, b, c, d): - return PyArray_MultiIterNew(4, a, b, c, d) - -cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): - return PyArray_MultiIterNew(5, a, b, c, d, e) - -cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: - # Recursive utility function used in __getbuffer__ to get format - # string. The new location in the format string is returned. - - cdef dtype child - cdef int delta_offset - cdef tuple i - cdef int endian_detector = 1 - cdef bint little_endian = ((&endian_detector)[0] != 0) - cdef tuple fields - - for childname in descr.names: - fields = descr.fields[childname] - child, new_offset = fields - - if (end - f) - (new_offset - offset[0]) < 15: - raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") - - if ((child.byteorder == '>' and little_endian) or - (child.byteorder == '<' and not little_endian)): - raise ValueError(u"Non-native byte order not supported") - # One could encode it in the format string and have Cython - # complain instead, BUT: < and > in format strings also imply - # standardized sizes for datatypes, and we rely on native in - # order to avoid reencoding data types based on their size. - # - # A proper PEP 3118 exporter for other clients than Cython - # must deal properly with this! - - # Output padding bytes - while offset[0] < new_offset: - f[0] = 120 # "x"; pad byte - f += 1 - offset[0] += 1 - - offset[0] += child.itemsize - - if not PyDataType_HASFIELDS(child): - t = child.type_num - if end - f < 5: - raise RuntimeError(u"Format string allocated too short.") - - # Until ticket #99 is fixed, use integers to avoid warnings - if t == NPY_BYTE: f[0] = 98 #"b" - elif t == NPY_UBYTE: f[0] = 66 #"B" - elif t == NPY_SHORT: f[0] = 104 #"h" - elif t == NPY_USHORT: f[0] = 72 #"H" - elif t == NPY_INT: f[0] = 105 #"i" - elif t == NPY_UINT: f[0] = 73 #"I" - elif t == NPY_LONG: f[0] = 108 #"l" - elif t == NPY_ULONG: f[0] = 76 #"L" - elif t == NPY_LONGLONG: f[0] = 113 #"q" - elif t == NPY_ULONGLONG: f[0] = 81 #"Q" - elif t == NPY_FLOAT: f[0] = 102 #"f" - elif t == NPY_DOUBLE: f[0] = 100 #"d" - elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" - elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf - elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd - elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg - elif t == NPY_OBJECT: f[0] = 79 #"O" - else: - raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) - f += 1 - else: - # Cython ignores struct boundary information ("T{...}"), - # so don't output it - f = _util_dtypestring(child, f, end, offset) - return f - - -# -# ufunc API -# - -cdef extern from "numpy/ufuncobject.h": - - ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *) - - ctypedef extern class numpy.ufunc [object PyUFuncObject]: - cdef: - int nin, nout, nargs - int identity - PyUFuncGenericFunction *functions - void **data - int ntypes - int check_return - char *name - char *types - char *doc - void *ptr - PyObject *obj - PyObject *userloops - - cdef enum: - PyUFunc_Zero - PyUFunc_One - PyUFunc_None - UFUNC_ERR_IGNORE - UFUNC_ERR_WARN - UFUNC_ERR_RAISE - UFUNC_ERR_CALL - UFUNC_ERR_PRINT - UFUNC_ERR_LOG - UFUNC_MASK_DIVIDEBYZERO - UFUNC_MASK_OVERFLOW - UFUNC_MASK_UNDERFLOW - UFUNC_MASK_INVALID - UFUNC_SHIFT_DIVIDEBYZERO - UFUNC_SHIFT_OVERFLOW - UFUNC_SHIFT_UNDERFLOW - UFUNC_SHIFT_INVALID - UFUNC_FPE_DIVIDEBYZERO - UFUNC_FPE_OVERFLOW - UFUNC_FPE_UNDERFLOW - UFUNC_FPE_INVALID - UFUNC_ERR_DEFAULT - UFUNC_ERR_DEFAULT2 - - object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *, - void **, char *, int, int, int, int, char *, char *, int) - int PyUFunc_RegisterLoopForType(ufunc, int, - PyUFuncGenericFunction, int *, void *) - int PyUFunc_GenericFunction \ - (ufunc, PyObject *, PyObject *, PyArrayObject **) - void PyUFunc_f_f_As_d_d \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_d_d \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_f_f \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_g_g \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_F_F_As_D_D \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_F_F \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_D_D \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_G_G \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_O_O \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_ff_f_As_dd_d \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_ff_f \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_dd_d \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_gg_g \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_FF_F_As_DD_D \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_DD_D \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_FF_F \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_GG_G \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_OO_O \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_O_O_method \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_OO_O_method \ - (char **, npy_intp *, npy_intp *, void *) - void PyUFunc_On_Om \ - (char **, npy_intp *, npy_intp *, void *) - int PyUFunc_GetPyValues \ - (char *, int *, int *, PyObject **) - int PyUFunc_checkfperr \ - (int, PyObject *, int *) - void PyUFunc_clearfperr() - int PyUFunc_getfperr() - int PyUFunc_handlefperr \ - (int, PyObject *, int, int *) - int PyUFunc_ReplaceLoopBySignature \ - (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *) - object PyUFunc_FromFuncAndDataAndSignature \ - (PyUFuncGenericFunction *, void **, char *, int, int, int, - int, char *, char *, int, char *) - - void import_ufunc() - - -cdef inline void set_array_base(ndarray arr, object base): - cdef PyObject* baseptr - if base is None: - baseptr = NULL - else: - Py_INCREF(base) # important to do this before decref below! - baseptr = base - Py_XDECREF(arr.base) - arr.base = baseptr - -cdef inline object get_array_base(ndarray arr): - if arr.base is NULL: - return None - else: - return arr.base diff --git a/pandas/src/numpy_helper.h b/pandas/src/numpy_helper.h deleted file mode 100644 index 809edb2e99fa2..0000000000000 --- a/pandas/src/numpy_helper.h +++ /dev/null @@ -1,162 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -*/ - -#ifndef PANDAS_SRC_NUMPY_HELPER_H_ -#define PANDAS_SRC_NUMPY_HELPER_H_ - -#include "Python.h" -#include "helper.h" -#include "numpy/arrayobject.h" -#include "numpy/arrayscalars.h" - -#define PANDAS_FLOAT 0 -#define PANDAS_INT 1 -#define PANDAS_BOOL 2 -#define PANDAS_STRING 3 -#define PANDAS_OBJECT 4 -#define PANDAS_DATETIME 5 - -PANDAS_INLINE int infer_type(PyObject* obj) { - if (PyBool_Check(obj)) { - return PANDAS_BOOL; - } else if (PyArray_IsIntegerScalar(obj)) { - return PANDAS_INT; - } else if (PyArray_IsScalar(obj, Datetime)) { - return PANDAS_DATETIME; - } else if (PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating)) { - return PANDAS_FLOAT; - } else if (PyString_Check(obj) || PyUnicode_Check(obj)) { - return PANDAS_STRING; - } else { - return PANDAS_OBJECT; - } -} - -PANDAS_INLINE npy_int64 get_nat(void) { return NPY_MIN_INT64; } - -PANDAS_INLINE npy_datetime get_datetime64_value(PyObject* obj) { - return ((PyDatetimeScalarObject*)obj)->obval; -} - -PANDAS_INLINE npy_timedelta get_timedelta64_value(PyObject* obj) { - return ((PyTimedeltaScalarObject*)obj)->obval; -} - -PANDAS_INLINE int is_integer_object(PyObject* obj) { - return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj); -} - -PANDAS_INLINE int is_float_object(PyObject* obj) { - return (PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating)); -} -PANDAS_INLINE int is_complex_object(PyObject* obj) { - return (PyComplex_Check(obj) || PyArray_IsScalar(obj, ComplexFloating)); -} - -PANDAS_INLINE int is_bool_object(PyObject* obj) { - return (PyBool_Check(obj) || PyArray_IsScalar(obj, Bool)); -} - -PANDAS_INLINE int is_string_object(PyObject* obj) { - return (PyString_Check(obj) || PyUnicode_Check(obj)); -} - -PANDAS_INLINE int is_datetime64_object(PyObject* obj) { - return PyArray_IsScalar(obj, Datetime); -} - -PANDAS_INLINE int is_timedelta64_object(PyObject* obj) { - return PyArray_IsScalar(obj, Timedelta); -} - -PANDAS_INLINE int assign_value_1d(PyArrayObject* ap, Py_ssize_t _i, - PyObject* v) { - npy_intp i = (npy_intp)_i; - char* item = (char*)PyArray_DATA(ap) + i * PyArray_STRIDE(ap, 0); - return PyArray_DESCR(ap)->f->setitem(v, item, ap); -} - -PANDAS_INLINE PyObject* get_value_1d(PyArrayObject* ap, Py_ssize_t i) { - char* item = (char*)PyArray_DATA(ap) + i * PyArray_STRIDE(ap, 0); - return PyArray_Scalar(item, PyArray_DESCR(ap), (PyObject*)ap); -} - -PANDAS_INLINE char* get_c_string(PyObject* obj) { -#if PY_VERSION_HEX >= 0x03000000 - PyObject* enc_str = PyUnicode_AsEncodedString(obj, "utf-8", "error"); - - char* ret; - ret = PyBytes_AS_STRING(enc_str); - - // TODO(general): memory leak here - - return ret; -#else - return PyString_AsString(obj); -#endif -} - -PANDAS_INLINE PyObject* char_to_string(char* data) { -#if PY_VERSION_HEX >= 0x03000000 - return PyUnicode_FromString(data); -#else - return PyString_FromString(data); -#endif -} - -PyObject* sarr_from_data(PyArray_Descr* descr, int length, void* data) { - PyArrayObject* result; - npy_intp dims[1] = {length}; - Py_INCREF(descr); // newfromdescr steals a reference to descr - result = (PyArrayObject*)PyArray_NewFromDescr(&PyArray_Type, descr, 1, dims, - NULL, data, 0, NULL); - - // Returned array doesn't own data by default - result->flags |= NPY_OWNDATA; - - return (PyObject*)result; -} - -void transfer_object_column(char* dst, char* src, size_t stride, - size_t length) { - size_t i; - size_t sz = sizeof(PyObject*); - - for (i = 0; i < length; ++i) { - // uninitialized data - - // Py_XDECREF(*((PyObject**) dst)); - - memcpy(dst, src, sz); - Py_INCREF(*((PyObject**)dst)); - src += sz; - dst += stride; - } -} - -void set_array_owndata(PyArrayObject* ao) { ao->flags |= NPY_OWNDATA; } - -void set_array_not_contiguous(PyArrayObject* ao) { - ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS); -} - -// If arr is zerodim array, return a proper array scalar (e.g. np.int64). -// Otherwise, return arr as is. -PANDAS_INLINE PyObject* unbox_if_zerodim(PyObject* arr) { - if (PyArray_IsZeroDim(arr)) { - PyObject* ret; - ret = PyArray_ToScalar(PyArray_DATA(arr), arr); - return ret; - } else { - Py_INCREF(arr); - return arr; - } -} - -#endif // PANDAS_SRC_NUMPY_HELPER_H_ diff --git a/pandas/src/offsets.pyx b/pandas/src/offsets.pyx deleted file mode 100644 index c963e256d0aa5..0000000000000 --- a/pandas/src/offsets.pyx +++ /dev/null @@ -1,367 +0,0 @@ - -ctypedef enum time_res: - r_min = 0 - r_microsecond - r_second - r_minute - r_hour - r_day - r_month - r_year - r_max = 98 - r_invalid = 99 - - -cdef conversion_factor(time_res res1, time_res res2): - cdef: - time_res min_res, max_res - int64_t factor - - min_res = min(res1, res2) - max_res = max(res1, res2) - factor = 1 - - if min_res == max_res: - return factor - - while min_res < max_res: - if min_res < r_microsecond: - raise "Cannot convert from less than us" - elif min_res == r_microsecond: - factor *= 1000000 - min_res = r_second - elif min_res == r_second: - factor *= 60 - min_res = r_minute - elif min_res == r_minute: - factor *= 60 - min_res = r_hour - elif min_res == r_hour: - factor *= 24 - min_res = r_day - else: - raise "Cannot convert to month or year" - - return factor - -# Logic to generate ranges -# ----------------------------------------------------------------------------- - -cdef inline int64_t weekend_adjustment(int64_t dow, int bkwd): - if dow > 4: # sat or sun? - if bkwd: # roll back 1 or 2 days - return (4 - dow) - else: # roll forward 2 or 1 days - return (7 - dow) - return 0 - -cdef int64_t us_in_day = conversion_factor(r_microsecond, r_day) - -cdef class _Offset: - """ - Base class to generate timestamps. Set the anchor, and then move offsets - with next & prev. Retrieve timestamp with ts attribute. - """ - cdef: - int64_t t, dow, biz, dayoffset - object start - _TSObject ts - - def __cinit__(self): - self.t=0 - self.dow=0 - self.biz=0 - self.dayoffset=0 - - cpdef anchor(self, object start=None): - if start is not None: - self.start = start - self.ts = convert_to_tsobject(self.start, None, None) - self._setup() - - cdef _setup(self): - pass - - cpdef next(self): - pass - - cpdef __next__(self): - """wrapper around next""" - return self.next() - - cpdef prev(self): - pass - - cdef int64_t _ts(self): - """ - Access the current timestamp value, with a possible weekday - adjustment. - """ - cdef int64_t adj - - if self.biz != 0: - adj = weekend_adjustment(self.dow, self.biz < 0) - return self.t + us_in_day * adj - else: - return self.t - - cdef int64_t _get_anchor(self): - """ - Retrieve an anchor relating to current offset we're on. - """ - return self.t - self.dayoffset * us_in_day - - property ts: - def __get__(self): - return self._ts() - -cdef class YearOffset(_Offset): - """ - Generate annual timestamps from provided start time; apply dayoffset to - each timestamp. If biz > 0, we choose the next business day at each time; - previous if < 0. - - Parameters - ---------- - dayoffset : int - biz : int - """ - cdef: - int64_t y, ly - - def __init__(self, int64_t dayoffset=0, int64_t biz=0, object anchor=None): - self.dayoffset = dayoffset - self.biz = biz - - if anchor is not None: - self.anchor(anchor) - - cdef _setup(self): - cdef _TSObject ts = self.ts - - self.t = ts.value + self.dayoffset * us_in_day - self.y = ts.dts.year - - self.ly = (ts.dts.month > 2 or - ts.dts.month == 2 and ts.dts.day == 29) - - if self.biz != 0: - self.dow = (ts_dayofweek(ts) + self.dayoffset) % 7 - - cpdef next(self): - cdef int64_t days - - days = 365 + is_leapyear(self.y + self.ly) - - self.t += days * us_in_day - self.y += 1 - - if self.biz != 0: - self.dow = (self.dow + days) % 7 - - cpdef prev(self): - cdef int64_t days - - days = 365 + is_leapyear(self.y - (1 - self.ly)) - - self.t -= days * us_in_day - self.y -= 1 - - if self.biz != 0: - self.dow = (self.dow - days) % 7 - -cdef class MonthOffset(_Offset): - """ - Generate monthly timestamps from provided start time, and apply dayoffset - to each timestamp. Stride to construct strided timestamps (eg quarterly). - If biz > 0, we choose the next business day at each time; previous if < 0. - - Parameters - ---------- - dayoffset : int - stride : int, > 0 - biz : int - """ - cdef: - Py_ssize_t stride, ly, m - int64_t y - - def __init__(self, int64_t dayoffset=0, Py_ssize_t stride=1, - int64_t biz=0, object anchor=None): - self.dayoffset = dayoffset - self.stride = stride - self.biz = biz - - if stride <= 0: - raise ValueError("Stride must be positive") - - if anchor is not None: - self.anchor(anchor) - - cdef _setup(self): - cdef _TSObject ts = self.ts - - self.t = ts.value + (self.dayoffset * us_in_day) - - # for day counting - self.m = ts.dts.month - 1 - self.y = ts.dts.year - self.ly = is_leapyear(self.y) - - if self.biz != 0: - self.dow = (ts_dayofweek(ts) + self.dayoffset) % 7 - - cpdef next(self): - cdef: - int64_t tmp, days - Py_ssize_t j - - days = 0 - for j in range(0, self.stride): - if self.m >= 12: - self.m -= 12 - self.y += 1 - self.ly = is_leapyear(self.y) - days += days_per_month_table[self.ly][self.m] - self.m += 1 - - self.t += days * us_in_day - - if self.biz != 0: - self.dow = (self.dow + days) % 7 - - cpdef prev(self): - cdef: - int64_t tmp, days - Py_ssize_t j - - days = 0 - for j in range(0, self.stride): - self.m -= 1 - if self.m < 0: - self.m += 12 - self.y -= 1 - self.ly = is_leapyear(self.y) - days += days_per_month_table[self.ly][self.m] - - self.t -= days * us_in_day - - if self.biz != 0: - self.dow = (self.dow - days) % 7 - -cdef class DayOfMonthOffset(_Offset): - """ - Generate relative monthly timestamps from month & year of provided start - time. For example, fridays of the third week of each month (week=3, day=4); - or, thursdays of the last week of each month (week=-1, day=3). - - Parameters - ---------- - week : int - day : int, 0 to 6 - """ - cdef: - Py_ssize_t ly, m - int64_t y, day, week - - def __init__(self, int64_t week=0, int64_t day=0, object anchor=None): - self.week = week - self.day = day - - if self.day < 0 or self.day > 6: - raise ValueError("Day offset must be 0 to 6") - - if anchor is not None: - self.anchor(anchor) - - cdef _setup(self): - cdef _TSObject ts = self.ts - - # rewind to beginning of month - self.t = ts.value - (ts.dts.day - 1) * us_in_day - self.dow = dayofweek(ts.dts.year, ts.dts.month, 1) - - # for day counting - self.m = ts.dts.month - 1 - self.y = ts.dts.year - self.ly = is_leapyear(self.y) - - cpdef next(self): - cdef: - int64_t tmp, days - - days = days_per_month_table[self.ly][self.m] - self.t += days * us_in_day - self.dow = (self.dow + days) % 7 - - self.m += 1 - if self.m >= 12: - self.m -= 12 - self.y += 1 - self.ly = is_leapyear(self.y) - - cpdef prev(self): - cdef: - int64_t tmp, days - - days = days_per_month_table[self.ly][(self.m - 1) % 12] - self.t -= days * us_in_day - self.dow = (self.dow - days) % 7 - - self.m -= 1 - if self.m < 0: - self.m += 12 - self.y -= 1 - self.ly = is_leapyear(self.y) - - cdef int64_t _ts(self): - """ - Overwrite default adjustment - """ - cdef int64_t adj = (self.week * 7) + (self.day - self.dow) % 7 - return self.t + us_in_day * adj - -cdef class DayOffset(_Offset): - """ - Generate daily timestamps beginning with first valid time >= start time. If - biz != 0, we skip weekends. Stride, to construct weekly timestamps. - - Parameters - ---------- - stride : int, > 0 - biz : boolean - """ - cdef: - Py_ssize_t stride - - def __init__(self, int64_t stride=1, int64_t biz=0, object anchor=None): - self.stride = stride - self.biz = biz - - if self.stride <= 0: - raise ValueError("Stride must be positive") - - if anchor is not None: - self.anchor(anchor) - - cdef _setup(self): - cdef _TSObject ts = self.ts - self.t = ts.value - if self.biz != 0: - self.dow = ts_dayofweek(ts) - - cpdef next(self): - self.t += (self.stride * us_in_day) - if self.biz != 0: - self.dow = (self.dow + self.stride) % 7 - if self.dow >= 5: - self.t += (7 - self.dow) * us_in_day - self.dow = 0 - - cpdef prev(self): - self.t -= (self.stride * us_in_day) - if self.biz != 0: - self.dow = (self.dow - self.stride) % 7 - if self.dow >= 5: - self.t += (4 - self.dow) * us_in_day - self.dow = 4 diff --git a/pandas/src/parser/.gitignore b/pandas/src/parser/.gitignore deleted file mode 100644 index f07e771a35eec..0000000000000 --- a/pandas/src/parser/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -!*.c -test* \ No newline at end of file diff --git a/pandas/src/parser/Makefile b/pandas/src/parser/Makefile deleted file mode 100644 index ec88eaf44ba15..0000000000000 --- a/pandas/src/parser/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -PYTHONBASE = /Library/Frameworks/EPD64.framework/Versions/Current -NUMPY_INC = /Library/Frameworks/EPD64.framework/Versions/7.1/lib/python2.7/site-packages/numpy/core/include -PYTHON_INC = -I$(PYTHONBASE)/include/python2.7 -I$(NUMPY_INC) -PYTHON_LINK = -L$(PYTHONBASE)/lib -lpython - -SOURCES = conversions.c parser.c str_to.c - -check-syntax: - gcc -g $(PYTHON_INC) -o /dev/null -S ${CHK_SOURCES} - -test: $(SOURCES) - gcc $(PYTHON_INC) -o test $(SOURCES) - ./test \ No newline at end of file diff --git a/pandas/src/period_helper.c b/pandas/src/period_helper.c deleted file mode 100644 index 19f810eb54ea7..0000000000000 --- a/pandas/src/period_helper.c +++ /dev/null @@ -1,1518 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Borrowed and derived code from scikits.timeseries that we will expose via -Cython to pandas. This primarily concerns interval representation and -frequency conversion routines. - -See end of file for stuff pandas uses (search for 'pandas'). -*/ - -#include "period_helper.h" - -/* ------------------------------------------------------------------ - * Code derived from scikits.timeseries - * ------------------------------------------------------------------*/ - -static int mod_compat(int x, int m) { - int result = x % m; - if (result < 0) return result + m; - return result; -} - -static int floordiv(int x, int divisor) { - if (x < 0) { - if (mod_compat(x, divisor)) { - return x / divisor - 1; - } else { - return x / divisor; - } - } else { - return x / divisor; - } -} - -/* Table with day offsets for each month (0-based, without and with leap) */ -static int month_offset[2][13] = { - {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, - {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}}; - -/* Table of number of days in a month (0-based, without and with leap) */ -static int days_in_month[2][12] = { - {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, - {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; - -/* Return 1/0 iff year points to a leap year in calendar. */ -static int dInfoCalc_Leapyear(npy_int64 year, int calendar) { - if (calendar == GREGORIAN_CALENDAR) { - return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); - } else { - return (year % 4 == 0); - } -} - -/* Return the day of the week for the given absolute date. */ -static int dInfoCalc_DayOfWeek(npy_int64 absdate) { - int day_of_week; - - if (absdate >= 1) { - day_of_week = (absdate - 1) % 7; - } else { - day_of_week = 6 - ((-absdate) % 7); - } - return day_of_week; -} - -static int monthToQuarter(int month) { return ((month - 1) / 3) + 1; } - -/* Return the year offset, that is the absolute date of the day - 31.12.(year-1) in the given calendar. - - Note: - For the Julian calendar we shift the absdate (which is measured - using the Gregorian Epoch) value by two days because the Epoch - (0001-01-01) in the Julian calendar lies 2 days before the Epoch in - the Gregorian calendar. */ -static int dInfoCalc_YearOffset(npy_int64 year, int calendar) { - year--; - if (calendar == GREGORIAN_CALENDAR) { - if (year >= 0 || -1 / 4 == -1) - return year * 365 + year / 4 - year / 100 + year / 400; - else - return year * 365 + (year - 3) / 4 - (year - 99) / 100 + - (year - 399) / 400; - } else if (calendar == JULIAN_CALENDAR) { - if (year >= 0 || -1 / 4 == -1) - return year * 365 + year / 4 - 2; - else - return year * 365 + (year - 3) / 4 - 2; - } - Py_Error(PyExc_ValueError, "unknown calendar"); -onError: - return INT_ERR_CODE; -} - -/* Set the instance's value using the given date and time. calendar may be set - * to the flags: GREGORIAN_CALENDAR, JULIAN_CALENDAR to indicate the calendar - * to be used. */ - -static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int year, - int month, int day, int hour, - int minute, double second, - int calendar) { - /* Calculate the absolute date */ - { - int leap; - npy_int64 absdate; - int yearoffset; - - /* Range check */ - Py_AssertWithArg(year > -(INT_MAX / 366) && year < (INT_MAX / 366), - PyExc_ValueError, "year out of range: %i", year); - - /* Is it a leap year ? */ - leap = dInfoCalc_Leapyear(year, calendar); - - /* Negative month values indicate months relative to the years end */ - if (month < 0) month += 13; - Py_AssertWithArg(month >= 1 && month <= 12, PyExc_ValueError, - "month out of range (1-12): %i", month); - - /* Negative values indicate days relative to the months end */ - if (day < 0) day += days_in_month[leap][month - 1] + 1; - Py_AssertWithArg(day >= 1 && day <= days_in_month[leap][month - 1], - PyExc_ValueError, "day out of range: %i", day); - - yearoffset = dInfoCalc_YearOffset(year, calendar); - if (yearoffset == INT_ERR_CODE) goto onError; - - absdate = day + month_offset[leap][month - 1] + yearoffset; - - dinfo->absdate = absdate; - - dinfo->year = year; - dinfo->month = month; - dinfo->quarter = ((month - 1) / 3) + 1; - dinfo->day = day; - - dinfo->day_of_week = dInfoCalc_DayOfWeek(absdate); - dinfo->day_of_year = (short)(absdate - yearoffset); - - dinfo->calendar = calendar; - } - - /* Calculate the absolute time */ - { - Py_AssertWithArg(hour >= 0 && hour <= 23, PyExc_ValueError, - "hour out of range (0-23): %i", hour); - Py_AssertWithArg(minute >= 0 && minute <= 59, PyExc_ValueError, - "minute out of range (0-59): %i", minute); - Py_AssertWithArg( - second >= (double)0.0 && - (second < (double)60.0 || - (hour == 23 && minute == 59 && second < (double)61.0)), - PyExc_ValueError, - "second out of range (0.0 - <60.0; <61.0 for 23:59): %f", second); - - dinfo->abstime = (double)(hour * 3600 + minute * 60) + second; - - dinfo->hour = hour; - dinfo->minute = minute; - dinfo->second = second; - } - return 0; - -onError: - return INT_ERR_CODE; -} - -/* Sets the date part of the date_info struct using the indicated - calendar. - - XXX This could also be done using some integer arithmetics rather - than with this iterative approach... */ -static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, - npy_int64 absdate, int calendar) { - register npy_int64 year; - npy_int64 yearoffset; - int leap, dayoffset; - int *monthoffset; - - /* Approximate year */ - if (calendar == GREGORIAN_CALENDAR) { - year = (npy_int64)(((double)absdate) / 365.2425); - } else if (calendar == JULIAN_CALENDAR) { - year = (npy_int64)(((double)absdate) / 365.25); - } else { - Py_Error(PyExc_ValueError, "unknown calendar"); - } - - if (absdate > 0) year++; - - /* Apply corrections to reach the correct year */ - while (1) { - /* Calculate the year offset */ - yearoffset = dInfoCalc_YearOffset(year, calendar); - if (yearoffset == INT_ERR_CODE) goto onError; - - /* Backward correction: absdate must be greater than the - yearoffset */ - if (yearoffset >= absdate) { - year--; - continue; - } - - dayoffset = absdate - yearoffset; - leap = dInfoCalc_Leapyear(year, calendar); - - /* Forward correction: non leap years only have 365 days */ - if (dayoffset > 365 && !leap) { - year++; - continue; - } - break; - } - - dinfo->year = year; - dinfo->calendar = calendar; - - /* Now iterate to find the month */ - monthoffset = month_offset[leap]; - { - register int month; - - for (month = 1; month < 13; month++) { - if (monthoffset[month] >= dayoffset) break; - } - - dinfo->month = month; - dinfo->quarter = monthToQuarter(month); - dinfo->day = dayoffset - month_offset[leap][month - 1]; - } - - dinfo->day_of_week = dInfoCalc_DayOfWeek(absdate); - dinfo->day_of_year = dayoffset; - dinfo->absdate = absdate; - - return 0; - -onError: - return INT_ERR_CODE; -} - -/////////////////////////////////////////////// - -// frequency specifc conversion routines -// each function must take an integer fromDate and -// a char relation ('S' or 'E' for 'START' or 'END') -/////////////////////////////////////////////////////////////////////// - -// helpers for frequency conversion routines // - -static int daytime_conversion_factors[][2] = { - {FR_DAY, 1}, {FR_HR, 24}, {FR_MIN, 60}, {FR_SEC, 60}, - {FR_MS, 1000}, {FR_US, 1000}, {FR_NS, 1000}, {0, 0}}; - -static npy_int64 **daytime_conversion_factor_matrix = NULL; - -PANDAS_INLINE int max_value(int a, int b) { return a > b ? a : b; } - -PANDAS_INLINE int min_value(int a, int b) { return a < b ? a : b; } - -PANDAS_INLINE int get_freq_group(int freq) { return (freq / 1000) * 1000; } - -PANDAS_INLINE int get_freq_group_index(int freq) { return freq / 1000; } - -static int calc_conversion_factors_matrix_size(void) { - int matrix_size = 0; - int index; - for (index = 0;; index++) { - int period_value = - get_freq_group_index(daytime_conversion_factors[index][0]); - if (period_value == 0) { - break; - } - matrix_size = max_value(matrix_size, period_value); - } - return matrix_size + 1; -} - -static void alloc_conversion_factors_matrix(int matrix_size) { - int row_index; - int column_index; - daytime_conversion_factor_matrix = - malloc(matrix_size * sizeof(**daytime_conversion_factor_matrix)); - for (row_index = 0; row_index < matrix_size; row_index++) { - daytime_conversion_factor_matrix[row_index] = - malloc(matrix_size * sizeof(**daytime_conversion_factor_matrix)); - for (column_index = 0; column_index < matrix_size; column_index++) { - daytime_conversion_factor_matrix[row_index][column_index] = 0; - } - } -} - -static npy_int64 calculate_conversion_factor(int start_value, int end_value) { - npy_int64 conversion_factor = 0; - int index; - for (index = 0;; index++) { - int freq_group = daytime_conversion_factors[index][0]; - - if (freq_group == 0) { - conversion_factor = 0; - break; - } - - if (freq_group == start_value) { - conversion_factor = 1; - } else { - conversion_factor *= daytime_conversion_factors[index][1]; - } - - if (freq_group == end_value) { - break; - } - } - return conversion_factor; -} - -static void populate_conversion_factors_matrix(void) { - int row_index_index; - int row_value, row_index; - int column_index_index; - int column_value, column_index; - - for (row_index_index = 0;; row_index_index++) { - row_value = daytime_conversion_factors[row_index_index][0]; - if (row_value == 0) { - break; - } - row_index = get_freq_group_index(row_value); - for (column_index_index = row_index_index;; column_index_index++) { - column_value = daytime_conversion_factors[column_index_index][0]; - if (column_value == 0) { - break; - } - column_index = get_freq_group_index(column_value); - - daytime_conversion_factor_matrix[row_index][column_index] = - calculate_conversion_factor(row_value, column_value); - } - } -} - -void initialize_daytime_conversion_factor_matrix() { - if (daytime_conversion_factor_matrix == NULL) { - int matrix_size = calc_conversion_factors_matrix_size(); - alloc_conversion_factors_matrix(matrix_size); - populate_conversion_factors_matrix(); - } -} - -PANDAS_INLINE npy_int64 get_daytime_conversion_factor(int from_index, - int to_index) { - return daytime_conversion_factor_matrix[min_value(from_index, to_index)] - [max_value(from_index, to_index)]; -} - -PANDAS_INLINE npy_int64 upsample_daytime(npy_int64 ordinal, - asfreq_info *af_info, int atEnd) { - if (atEnd) { - return (ordinal + 1) * af_info->intraday_conversion_factor - 1; - } else { - return ordinal * af_info->intraday_conversion_factor; - } -} - -PANDAS_INLINE npy_int64 downsample_daytime(npy_int64 ordinal, - asfreq_info *af_info, int atEnd) { - return ordinal / (af_info->intraday_conversion_factor); -} - -PANDAS_INLINE npy_int64 transform_via_day(npy_int64 ordinal, char relation, - asfreq_info *af_info, - freq_conv_func first_func, - freq_conv_func second_func) { - // printf("transform_via_day(%ld, %ld, %d)\n", ordinal, - // af_info->intraday_conversion_factor, - // af_info->intraday_conversion_upsample); - npy_int64 result; - - result = (*first_func)(ordinal, relation, af_info); - result = (*second_func)(result, relation, af_info); - - return result; -} - -static npy_int64 DtoB_weekday(npy_int64 absdate) { - return (((absdate) / 7) * 5) + (absdate) % 7 - BDAY_OFFSET; -} - -static npy_int64 DtoB_WeekendToMonday(npy_int64 absdate, int day_of_week) { - if (day_of_week > 4) { - // change to Monday after weekend - absdate += (7 - day_of_week); - } - return DtoB_weekday(absdate); -} - -static npy_int64 DtoB_WeekendToFriday(npy_int64 absdate, int day_of_week) { - if (day_of_week > 4) { - // change to friday before weekend - absdate -= (day_of_week - 4); - } - return DtoB_weekday(absdate); -} - -static npy_int64 absdate_from_ymd(int y, int m, int d) { - struct date_info tempDate; - if (dInfoCalc_SetFromDateAndTime(&tempDate, y, m, d, 0, 0, 0, - GREGORIAN_CALENDAR)) { - return INT_ERR_CODE; - } - return tempDate.absdate; -} - -//************ FROM DAILY *************** - -static npy_int64 asfreq_DTtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - ordinal = downsample_daytime(ordinal, af_info, 0); - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - if (dinfo.month > af_info->to_a_year_end) { - return (npy_int64)(dinfo.year + 1 - BASE_YEAR); - } else { - return (npy_int64)(dinfo.year - BASE_YEAR); - } -} - -static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year, - int *quarter) { - struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - if (af_info->to_q_year_end != 12) { - dinfo.month -= af_info->to_q_year_end; - if (dinfo.month <= 0) { - dinfo.month += 12; - } else { - dinfo.year += 1; - } - dinfo.quarter = monthToQuarter(dinfo.month); - } - - *year = dinfo.year; - *quarter = dinfo.quarter; - - return 0; -} - -static npy_int64 asfreq_DTtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - int year, quarter; - - ordinal = downsample_daytime(ordinal, af_info, 0); - - if (DtoQ_yq(ordinal, af_info, &year, &quarter) == INT_ERR_CODE) { - return INT_ERR_CODE; - } - - return (npy_int64)((year - BASE_YEAR) * 4 + quarter - 1); -} - -static npy_int64 asfreq_DTtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - - ordinal = downsample_daytime(ordinal, af_info, 0); - - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - return (npy_int64)((dinfo.year - BASE_YEAR) * 12 + dinfo.month - 1); -} - -static npy_int64 asfreq_DTtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - ordinal = downsample_daytime(ordinal, af_info, 0); - return (ordinal + ORD_OFFSET - (1 + af_info->to_week_end)) / 7 + 1 - - WEEK_OFFSET; -} - -static npy_int64 asfreq_DTtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - - ordinal = downsample_daytime(ordinal, af_info, 0); - - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } -} - -// all intra day calculations are now done within one function -static npy_int64 asfreq_DownsampleWithinDay(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return downsample_daytime(ordinal, af_info, relation == 'E'); -} - -static npy_int64 asfreq_UpsampleWithinDay(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return upsample_daytime(ordinal, af_info, relation == 'E'); -} -//************ FROM BUSINESS *************** - -static npy_int64 asfreq_BtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - ordinal += BDAY_OFFSET; - ordinal = - (((ordinal - 1) / 5) * 7 + mod_compat(ordinal - 1, 5) + 1 - ORD_OFFSET); - - return upsample_daytime(ordinal, af_info, relation != 'S'); -} - -static npy_int64 asfreq_BtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_BtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_BtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoM); -} - -static npy_int64 asfreq_BtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoW); -} - -//************ FROM WEEKLY *************** - -static npy_int64 asfreq_WtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - ordinal += WEEK_OFFSET; - if (relation != 'S') { - ordinal += 1; - } - - ordinal = ordinal * 7 - 6 + af_info->from_week_end - ORD_OFFSET; - - if (relation != 'S') { - ordinal -= 1; - } - - return upsample_daytime(ordinal, af_info, relation != 'S'); -} - -static npy_int64 asfreq_WtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_WtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_WtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoM); -} - -static npy_int64 asfreq_WtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoW); -} - -static npy_int64 asfreq_WtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_WtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } -} - -//************ FROM MONTHLY *************** -static void MtoD_ym(npy_int64 ordinal, int *y, int *m) { - *y = floordiv(ordinal, 12) + BASE_YEAR; - *m = mod_compat(ordinal, 12) + 1; -} - -static npy_int64 asfreq_MtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - npy_int64 absdate; - int y, m; - - if (relation == 'E') { - ordinal += 1; - } - MtoD_ym(ordinal, &y, &m); - if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) - return INT_ERR_CODE; - ordinal = absdate - ORD_OFFSET; - - if (relation == 'E') { - ordinal -= 1; - } - - return upsample_daytime(ordinal, af_info, relation != 'S'); -} - -static npy_int64 asfreq_MtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_MtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_MtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoW); -} - -static npy_int64 asfreq_MtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_MtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } -} - -//************ FROM QUARTERLY *************** - -static void QtoD_ym(npy_int64 ordinal, int *y, int *m, asfreq_info *af_info) { - *y = floordiv(ordinal, 4) + BASE_YEAR; - *m = mod_compat(ordinal, 4) * 3 + 1; - - if (af_info->from_q_year_end != 12) { - *m += af_info->from_q_year_end; - if (*m > 12) { - *m -= 12; - } else { - *y -= 1; - } - } -} - -static npy_int64 asfreq_QtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - npy_int64 absdate; - int y, m; - - if (relation == 'E') { - ordinal += 1; - } - - QtoD_ym(ordinal, &y, &m, af_info); - - if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) - return INT_ERR_CODE; - - if (relation == 'E') { - absdate -= 1; - } - - return upsample_daytime(absdate - ORD_OFFSET, af_info, relation != 'S'); -} - -static npy_int64 asfreq_QtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_QtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_QtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoM); -} - -static npy_int64 asfreq_QtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoW); -} - -static npy_int64 asfreq_QtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_QtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } -} - -//************ FROM ANNUAL *************** - -static npy_int64 asfreq_AtoDT(npy_int64 year, char relation, - asfreq_info *af_info) { - npy_int64 absdate; - int month = (af_info->from_a_year_end) % 12; - - // start from 1970 - year += BASE_YEAR; - - month += 1; - - if (af_info->from_a_year_end != 12) { - year -= 1; - } - - if (relation == 'E') { - year += 1; - } - - absdate = absdate_from_ymd(year, month, 1); - - if (absdate == INT_ERR_CODE) { - return INT_ERR_CODE; - } - - if (relation == 'E') { - absdate -= 1; - } - - return upsample_daytime(absdate - ORD_OFFSET, af_info, relation != 'S'); -} - -static npy_int64 asfreq_AtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_AtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_AtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoM); -} - -static npy_int64 asfreq_AtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoW); -} - -static npy_int64 asfreq_AtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_AtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } -} - -static npy_int64 nofunc(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return INT_ERR_CODE; -} -static npy_int64 no_op(npy_int64 ordinal, char relation, asfreq_info *af_info) { - return ordinal; -} - -// end of frequency specific conversion routines - -static int calc_a_year_end(int freq, int group) { - int result = (freq - group) % 12; - if (result == 0) { - return 12; - } else { - return result; - } -} - -static int calc_week_end(int freq, int group) { return freq - group; } - -void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) { - int fromGroup = get_freq_group(fromFreq); - int toGroup = get_freq_group(toFreq); - - af_info->intraday_conversion_factor = get_daytime_conversion_factor( - get_freq_group_index(max_value(fromGroup, FR_DAY)), - get_freq_group_index(max_value(toGroup, FR_DAY))); - - // printf("get_asfreq_info(%d, %d) %ld, %d\n", fromFreq, toFreq, - // af_info->intraday_conversion_factor, - // af_info->intraday_conversion_upsample); - - switch (fromGroup) { - case FR_WK: - af_info->from_week_end = calc_week_end(fromFreq, fromGroup); - break; - case FR_ANN: - af_info->from_a_year_end = calc_a_year_end(fromFreq, fromGroup); - break; - case FR_QTR: - af_info->from_q_year_end = calc_a_year_end(fromFreq, fromGroup); - break; - } - - switch (toGroup) { - case FR_WK: - af_info->to_week_end = calc_week_end(toFreq, toGroup); - break; - case FR_ANN: - af_info->to_a_year_end = calc_a_year_end(toFreq, toGroup); - break; - case FR_QTR: - af_info->to_q_year_end = calc_a_year_end(toFreq, toGroup); - break; - } -} - -freq_conv_func get_asfreq_func(int fromFreq, int toFreq) { - int fromGroup = get_freq_group(fromFreq); - int toGroup = get_freq_group(toFreq); - - if (fromGroup == FR_UND) { - fromGroup = FR_DAY; - } - - switch (fromGroup) { - case FR_ANN: - switch (toGroup) { - case FR_ANN: - return &asfreq_AtoA; - case FR_QTR: - return &asfreq_AtoQ; - case FR_MTH: - return &asfreq_AtoM; - case FR_WK: - return &asfreq_AtoW; - case FR_BUS: - return &asfreq_AtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_AtoDT; - - default: - return &nofunc; - } - - case FR_QTR: - switch (toGroup) { - case FR_ANN: - return &asfreq_QtoA; - case FR_QTR: - return &asfreq_QtoQ; - case FR_MTH: - return &asfreq_QtoM; - case FR_WK: - return &asfreq_QtoW; - case FR_BUS: - return &asfreq_QtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_QtoDT; - default: - return &nofunc; - } - - case FR_MTH: - switch (toGroup) { - case FR_ANN: - return &asfreq_MtoA; - case FR_QTR: - return &asfreq_MtoQ; - case FR_MTH: - return &no_op; - case FR_WK: - return &asfreq_MtoW; - case FR_BUS: - return &asfreq_MtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_MtoDT; - default: - return &nofunc; - } - - case FR_WK: - switch (toGroup) { - case FR_ANN: - return &asfreq_WtoA; - case FR_QTR: - return &asfreq_WtoQ; - case FR_MTH: - return &asfreq_WtoM; - case FR_WK: - return &asfreq_WtoW; - case FR_BUS: - return &asfreq_WtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_WtoDT; - default: - return &nofunc; - } - - case FR_BUS: - switch (toGroup) { - case FR_ANN: - return &asfreq_BtoA; - case FR_QTR: - return &asfreq_BtoQ; - case FR_MTH: - return &asfreq_BtoM; - case FR_WK: - return &asfreq_BtoW; - case FR_BUS: - return &no_op; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_BtoDT; - default: - return &nofunc; - } - - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - switch (toGroup) { - case FR_ANN: - return &asfreq_DTtoA; - case FR_QTR: - return &asfreq_DTtoQ; - case FR_MTH: - return &asfreq_DTtoM; - case FR_WK: - return &asfreq_DTtoW; - case FR_BUS: - return &asfreq_DTtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - if (fromGroup > toGroup) { - return &asfreq_DownsampleWithinDay; - } else { - return &asfreq_UpsampleWithinDay; - } - default: - return &nofunc; - } - - default: - return &nofunc; - } -} - -double get_abs_time(int freq, npy_int64 date_ordinal, npy_int64 ordinal) { - // printf("get_abs_time %d %lld %lld\n", freq, date_ordinal, ordinal); - - int freq_index, day_index, base_index; - npy_int64 per_day, start_ord; - double unit, result; - - if (freq <= FR_DAY) { - return 0; - } - - freq_index = get_freq_group_index(freq); - day_index = get_freq_group_index(FR_DAY); - base_index = get_freq_group_index(FR_SEC); - - // printf(" indices: day %d, freq %d, base %d\n", day_index, freq_index, - // base_index); - - per_day = get_daytime_conversion_factor(day_index, freq_index); - unit = get_daytime_conversion_factor(freq_index, base_index); - - // printf(" per_day: %lld, unit: %f\n", per_day, unit); - - if (base_index < freq_index) { - unit = 1 / unit; - // printf(" corrected unit: %f\n", unit); - } - - start_ord = date_ordinal * per_day; - // printf("start_ord: %lld\n", start_ord); - result = (double)(unit * (ordinal - start_ord)); - // printf(" result: %f\n", result); - return result; -} - -/* Sets the time part of the DateTime object. */ -static int dInfoCalc_SetFromAbsTime(struct date_info *dinfo, double abstime) { - int inttime; - int hour, minute; - double second; - - inttime = (int)abstime; - hour = inttime / 3600; - minute = (inttime % 3600) / 60; - second = abstime - (double)(hour * 3600 + minute * 60); - - dinfo->hour = hour; - dinfo->minute = minute; - dinfo->second = second; - - dinfo->abstime = abstime; - - return 0; -} - -/* Set the instance's value using the given date and time. calendar - may be set to the flags: GREGORIAN_CALENDAR, JULIAN_CALENDAR to - indicate the calendar to be used. */ -static int dInfoCalc_SetFromAbsDateTime(struct date_info *dinfo, - npy_int64 absdate, double abstime, - int calendar) { - /* Bounds check */ - Py_AssertWithArg(abstime >= 0.0 && abstime <= SECONDS_PER_DAY, - PyExc_ValueError, - "abstime out of range (0.0 - 86400.0): %f", abstime); - - /* Calculate the date */ - if (dInfoCalc_SetFromAbsDate(dinfo, absdate, calendar)) goto onError; - - /* Calculate the time */ - if (dInfoCalc_SetFromAbsTime(dinfo, abstime)) goto onError; - - return 0; -onError: - return INT_ERR_CODE; -} - -/* ------------------------------------------------------------------ - * New pandas API-helper code, to expose to cython - * ------------------------------------------------------------------*/ - -npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, - char relation) { - npy_int64 val; - freq_conv_func func; - asfreq_info finfo; - - func = get_asfreq_func(freq1, freq2); - - get_asfreq_info(freq1, freq2, &finfo); - - // printf("\n%x %d %d %ld %ld\n", func, freq1, freq2, - // finfo.intraday_conversion_factor, -finfo.intraday_conversion_factor); - - val = (*func)(period_ordinal, relation, &finfo); - - if (val == INT_ERR_CODE) { - // Py_Error(PyExc_ValueError, "Unable to convert to desired - // frequency."); - goto onError; - } - return val; -onError: - return INT_ERR_CODE; -} - -/* generate an ordinal in period space */ -npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, - int second, int microseconds, int picoseconds, - int freq) { - npy_int64 absdays, delta, seconds; - npy_int64 weeks, days; - npy_int64 ordinal, day_adj; - int freq_group, fmonth, mdiff; - freq_group = get_freq_group(freq); - - if (freq == FR_SEC || freq == FR_MS || freq == FR_US || freq == FR_NS) { - absdays = absdate_from_ymd(year, month, day); - delta = (absdays - ORD_OFFSET); - seconds = - (npy_int64)(delta * 86400 + hour * 3600 + minute * 60 + second); - - switch (freq) { - case FR_MS: - return seconds * 1000 + microseconds / 1000; - - case FR_US: - return seconds * 1000000 + microseconds; - - case FR_NS: - return seconds * 1000000000 + microseconds * 1000 + - picoseconds / 1000; - } - - return seconds; - } - - if (freq == FR_MIN) { - absdays = absdate_from_ymd(year, month, day); - delta = (absdays - ORD_OFFSET); - return (npy_int64)(delta * 1440 + hour * 60 + minute); - } - - if (freq == FR_HR) { - if ((absdays = absdate_from_ymd(year, month, day)) == INT_ERR_CODE) { - goto onError; - } - delta = (absdays - ORD_OFFSET); - return (npy_int64)(delta * 24 + hour); - } - - if (freq == FR_DAY) { - return (npy_int64)(absdate_from_ymd(year, month, day) - ORD_OFFSET); - } - - if (freq == FR_UND) { - return (npy_int64)(absdate_from_ymd(year, month, day) - ORD_OFFSET); - } - - if (freq == FR_BUS) { - if ((days = absdate_from_ymd(year, month, day)) == INT_ERR_CODE) { - goto onError; - } - // calculate the current week assuming sunday as last day of a week - weeks = (days - BASE_WEEK_TO_DAY_OFFSET) / DAYS_PER_WEEK; - // calculate the current weekday (in range 1 .. 7) - delta = (days - BASE_WEEK_TO_DAY_OFFSET) % DAYS_PER_WEEK + 1; - // return the number of business days in full weeks plus the business - // days in the last - possible partial - week - return (npy_int64)(weeks * BUSINESS_DAYS_PER_WEEK) + - (delta <= BUSINESS_DAYS_PER_WEEK ? delta - : BUSINESS_DAYS_PER_WEEK + 1) - - BDAY_OFFSET; - } - - if (freq_group == FR_WK) { - if ((ordinal = (npy_int64)absdate_from_ymd(year, month, day)) == - INT_ERR_CODE) { - goto onError; - } - day_adj = freq - FR_WK; - return (ordinal - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET; - } - - if (freq == FR_MTH) { - return (year - BASE_YEAR) * 12 + month - 1; - } - - if (freq_group == FR_QTR) { - fmonth = freq - FR_QTR; - if (fmonth == 0) fmonth = 12; - - mdiff = month - fmonth; - if (mdiff < 0) mdiff += 12; - if (month >= fmonth) mdiff += 12; - - return (year - BASE_YEAR) * 4 + (mdiff - 1) / 3; - } - - if (freq_group == FR_ANN) { - fmonth = freq - FR_ANN; - if (fmonth == 0) fmonth = 12; - if (month <= fmonth) { - return year - BASE_YEAR; - } else { - return year - BASE_YEAR + 1; - } - } - - Py_Error(PyExc_RuntimeError, "Unable to generate frequency ordinal"); - -onError: - return INT_ERR_CODE; -} - -/* - Returns the proleptic Gregorian ordinal of the date, as an integer. - This corresponds to the number of days since Jan., 1st, 1AD. - When the instance has a frequency less than daily, the proleptic date - is calculated for the last day of the period. - */ - -npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq) { - asfreq_info af_info; - freq_conv_func toDaily = NULL; - - if (freq == FR_DAY) return period_ordinal + ORD_OFFSET; - - toDaily = get_asfreq_func(freq, FR_DAY); - get_asfreq_info(freq, FR_DAY, &af_info); - - return toDaily(period_ordinal, 'E', &af_info) + ORD_OFFSET; -} - -char *str_replace(const char *s, const char *old, const char *new) { - char *ret; - int i, count = 0; - size_t newlen = strlen(new); - size_t oldlen = strlen(old); - - for (i = 0; s[i] != '\0'; i++) { - if (strstr(&s[i], old) == &s[i]) { - count++; - i += oldlen - 1; - } - } - - ret = PyArray_malloc(i + 1 + count * (newlen - oldlen)); - if (ret == NULL) { - return (char *)PyErr_NoMemory(); - } - - i = 0; - while (*s) { - if (strstr(s, old) == s) { - strncpy(&ret[i], new, sizeof(char) * newlen); - i += newlen; - s += oldlen; - } else { - ret[i++] = *s++; - } - } - ret[i] = '\0'; - - return ret; -} - -// function to generate a nice string representation of the period -// object, originally from DateObject_strftime - -char *c_strftime(struct date_info *tmp, char *fmt) { - struct tm c_date; - char *result; - struct date_info dinfo = *tmp; - int result_len = strlen(fmt) + 50; - - c_date.tm_sec = (int)dinfo.second; - c_date.tm_min = dinfo.minute; - c_date.tm_hour = dinfo.hour; - c_date.tm_mday = dinfo.day; - c_date.tm_mon = dinfo.month - 1; - c_date.tm_year = dinfo.year - 1900; - c_date.tm_wday = (dinfo.day_of_week + 1) % 7; - c_date.tm_yday = dinfo.day_of_year - 1; - c_date.tm_isdst = -1; - - result = malloc(result_len * sizeof(char)); - - strftime(result, result_len, fmt, &c_date); - - return result; -} - -int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year) { - asfreq_info af_info; - int qtr_freq; - npy_int64 daily_ord; - npy_int64 (*toDaily)(npy_int64, char, asfreq_info *) = NULL; - - toDaily = get_asfreq_func(freq, FR_DAY); - get_asfreq_info(freq, FR_DAY, &af_info); - - daily_ord = toDaily(ordinal, 'E', &af_info); - - if (get_freq_group(freq) == FR_QTR) { - qtr_freq = freq; - } else { - qtr_freq = FR_QTR; - } - get_asfreq_info(FR_DAY, qtr_freq, &af_info); - - if (DtoQ_yq(daily_ord, &af_info, year, quarter) == INT_ERR_CODE) return -1; - - return 0; -} - -static int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) { - asfreq_info af_info; - int qtr_freq; - - ordinal = get_python_ordinal(ordinal, freq) - ORD_OFFSET; - - if (get_freq_group(freq) == FR_QTR) - qtr_freq = freq; - else - qtr_freq = FR_QTR; - - get_asfreq_info(FR_DAY, qtr_freq, &af_info); - - if (DtoQ_yq(ordinal, &af_info, year, quarter) == INT_ERR_CODE) - return INT_ERR_CODE; - - if ((qtr_freq % 1000) > 12) *year -= 1; - - return 0; -} - -static int _ISOWeek(struct date_info *dinfo) { - int week; - - /* Estimate */ - week = (dinfo->day_of_year - 1) - dinfo->day_of_week + 3; - if (week >= 0) week = week / 7 + 1; - - /* Verify */ - if (week < 0) { - /* The day lies in last week of the previous year */ - if ((week > -2) || (week == -2 && dInfoCalc_Leapyear(dinfo->year - 1, - dinfo->calendar))) - week = 53; - else - week = 52; - } else if (week == 53) { - /* Check if the week belongs to year or year+1 */ - if (31 - dinfo->day + dinfo->day_of_week < 3) { - week = 1; - } - } - - return week; -} - -int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo) { - npy_int64 absdate = get_python_ordinal(ordinal, freq); - double abstime = get_abs_time(freq, absdate - ORD_OFFSET, ordinal); - - while (abstime < 0) { - abstime += 86400; - absdate -= 1; - } - while (abstime >= 86400) { - abstime -= 86400; - absdate += 1; - } - - if (dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - return 0; -} - -int pyear(npy_int64 ordinal, int freq) { - struct date_info dinfo; - get_date_info(ordinal, freq, &dinfo); - return dinfo.year; -} - -int pqyear(npy_int64 ordinal, int freq) { - int year, quarter; - if (_quarter_year(ordinal, freq, &year, &quarter) == INT_ERR_CODE) - return INT_ERR_CODE; - return year; -} - -int pquarter(npy_int64 ordinal, int freq) { - int year, quarter; - if (_quarter_year(ordinal, freq, &year, &quarter) == INT_ERR_CODE) - return INT_ERR_CODE; - return quarter; -} - -int pmonth(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.month; -} - -int pday(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day; -} - -int pweekday(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_week; -} - -int pday_of_week(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_week; -} - -int pday_of_year(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_year; -} - -int pweek(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return _ISOWeek(&dinfo); -} - -int phour(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.hour; -} - -int pminute(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.minute; -} - -int psecond(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return (int)dinfo.second; -} - -int pdays_in_month(npy_int64 ordinal, int freq) { - int days; - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - - days = days_in_month[dInfoCalc_Leapyear(dinfo.year, dinfo.calendar)] - [dinfo.month - 1]; - return days; -} diff --git a/pandas/src/period_helper.h b/pandas/src/period_helper.h deleted file mode 100644 index 601717692ff6d..0000000000000 --- a/pandas/src/period_helper.h +++ /dev/null @@ -1,191 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Borrowed and derived code from scikits.timeseries that we will expose via -Cython to pandas. This primarily concerns interval representation and -frequency conversion routines. -*/ - -#ifndef PANDAS_SRC_PERIOD_HELPER_H_ -#define PANDAS_SRC_PERIOD_HELPER_H_ - -#include -#include "headers/stdint.h" -#include "helper.h" -#include "limits.h" -#include "numpy/ndarraytypes.h" - -/* - * declarations from period here - */ - -#define GREGORIAN_CALENDAR 0 -#define JULIAN_CALENDAR 1 - -#define SECONDS_PER_DAY ((double)86400.0) - -#define Py_AssertWithArg(x, errortype, errorstr, a1) \ - { \ - if (!(x)) { \ - PyErr_Format(errortype, errorstr, a1); \ - goto onError; \ - } \ - } -#define Py_Error(errortype, errorstr) \ - { \ - PyErr_SetString(errortype, errorstr); \ - goto onError; \ - } - -/*** FREQUENCY CONSTANTS ***/ - -// HIGHFREQ_ORIG is the datetime ordinal from which to begin the second -// frequency ordinal sequence - -// #define HIGHFREQ_ORIG 62135683200LL -#define BASE_YEAR 1970 -#define ORD_OFFSET 719163LL // days until 1970-01-01 -#define BDAY_OFFSET 513689LL // days until 1970-01-01 -#define WEEK_OFFSET 102737LL -#define BASE_WEEK_TO_DAY_OFFSET \ - 1 // difference between day 0 and end of week in days -#define DAYS_PER_WEEK 7 -#define BUSINESS_DAYS_PER_WEEK 5 -#define HIGHFREQ_ORIG 0 // ORD_OFFSET * 86400LL // days until 1970-01-01 - -#define FR_ANN 1000 /* Annual */ -#define FR_ANNDEC FR_ANN /* Annual - December year end*/ -#define FR_ANNJAN 1001 /* Annual - January year end*/ -#define FR_ANNFEB 1002 /* Annual - February year end*/ -#define FR_ANNMAR 1003 /* Annual - March year end*/ -#define FR_ANNAPR 1004 /* Annual - April year end*/ -#define FR_ANNMAY 1005 /* Annual - May year end*/ -#define FR_ANNJUN 1006 /* Annual - June year end*/ -#define FR_ANNJUL 1007 /* Annual - July year end*/ -#define FR_ANNAUG 1008 /* Annual - August year end*/ -#define FR_ANNSEP 1009 /* Annual - September year end*/ -#define FR_ANNOCT 1010 /* Annual - October year end*/ -#define FR_ANNNOV 1011 /* Annual - November year end*/ - -/* The standard quarterly frequencies with various fiscal year ends - eg, Q42005 for Q@OCT runs Aug 1, 2005 to Oct 31, 2005 */ -#define FR_QTR 2000 /* Quarterly - December year end (default quarterly) */ -#define FR_QTRDEC FR_QTR /* Quarterly - December year end */ -#define FR_QTRJAN 2001 /* Quarterly - January year end */ -#define FR_QTRFEB 2002 /* Quarterly - February year end */ -#define FR_QTRMAR 2003 /* Quarterly - March year end */ -#define FR_QTRAPR 2004 /* Quarterly - April year end */ -#define FR_QTRMAY 2005 /* Quarterly - May year end */ -#define FR_QTRJUN 2006 /* Quarterly - June year end */ -#define FR_QTRJUL 2007 /* Quarterly - July year end */ -#define FR_QTRAUG 2008 /* Quarterly - August year end */ -#define FR_QTRSEP 2009 /* Quarterly - September year end */ -#define FR_QTROCT 2010 /* Quarterly - October year end */ -#define FR_QTRNOV 2011 /* Quarterly - November year end */ - -#define FR_MTH 3000 /* Monthly */ - -#define FR_WK 4000 /* Weekly */ -#define FR_WKSUN FR_WK /* Weekly - Sunday end of week */ -#define FR_WKMON 4001 /* Weekly - Monday end of week */ -#define FR_WKTUE 4002 /* Weekly - Tuesday end of week */ -#define FR_WKWED 4003 /* Weekly - Wednesday end of week */ -#define FR_WKTHU 4004 /* Weekly - Thursday end of week */ -#define FR_WKFRI 4005 /* Weekly - Friday end of week */ -#define FR_WKSAT 4006 /* Weekly - Saturday end of week */ - -#define FR_BUS 5000 /* Business days */ -#define FR_DAY 6000 /* Daily */ -#define FR_HR 7000 /* Hourly */ -#define FR_MIN 8000 /* Minutely */ -#define FR_SEC 9000 /* Secondly */ -#define FR_MS 10000 /* Millisecondly */ -#define FR_US 11000 /* Microsecondly */ -#define FR_NS 12000 /* Nanosecondly */ - -#define FR_UND -10000 /* Undefined */ - -#define INT_ERR_CODE INT32_MIN - -#define MEM_CHECK(item) \ - if (item == NULL) { \ - return PyErr_NoMemory(); \ - } -#define ERR_CHECK(item) \ - if (item == NULL) { \ - return NULL; \ - } - -typedef struct asfreq_info { - int from_week_end; // day the week ends on in the "from" frequency - int to_week_end; // day the week ends on in the "to" frequency - - int from_a_year_end; // month the year ends on in the "from" frequency - int to_a_year_end; // month the year ends on in the "to" frequency - - int from_q_year_end; // month the year ends on in the "from" frequency - int to_q_year_end; // month the year ends on in the "to" frequency - - npy_int64 intraday_conversion_factor; -} asfreq_info; - -typedef struct date_info { - npy_int64 absdate; - double abstime; - - double second; - int minute; - int hour; - int day; - int month; - int quarter; - int year; - int day_of_week; - int day_of_year; - int calendar; -} date_info; - -typedef npy_int64 (*freq_conv_func)(npy_int64, char, asfreq_info *); - -/* - * new pandas API helper functions here - */ - -npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, char relation); - -npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, - int second, int microseconds, int picoseconds, - int freq); - -npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq); - -int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo); -freq_conv_func get_asfreq_func(int fromFreq, int toFreq); -void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info); - -int pyear(npy_int64 ordinal, int freq); -int pqyear(npy_int64 ordinal, int freq); -int pquarter(npy_int64 ordinal, int freq); -int pmonth(npy_int64 ordinal, int freq); -int pday(npy_int64 ordinal, int freq); -int pweekday(npy_int64 ordinal, int freq); -int pday_of_week(npy_int64 ordinal, int freq); -int pday_of_year(npy_int64 ordinal, int freq); -int pweek(npy_int64 ordinal, int freq); -int phour(npy_int64 ordinal, int freq); -int pminute(npy_int64 ordinal, int freq); -int psecond(npy_int64 ordinal, int freq); -int pdays_in_month(npy_int64 ordinal, int freq); - -double getAbsTime(int freq, npy_int64 dailyDate, npy_int64 originalDate); -char *c_strftime(struct date_info *dinfo, char *fmt); -int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year); - -void initialize_daytime_conversion_factor_matrix(void); - -#endif // PANDAS_SRC_PERIOD_HELPER_H_ diff --git a/pandas/src/properties.pyx b/pandas/src/properties.pyx deleted file mode 100644 index e619a3b6edd9a..0000000000000 --- a/pandas/src/properties.pyx +++ /dev/null @@ -1,65 +0,0 @@ -from cpython cimport PyDict_Contains, PyDict_GetItem, PyDict_GetItem - - -cdef class cache_readonly(object): - - cdef readonly: - object func, name, allow_setting - - def __init__(self, func=None, allow_setting=False): - if func is not None: - self.func = func - self.name = func.__name__ - self.allow_setting = allow_setting - - def __call__(self, func, doc=None): - self.func = func - self.name = func.__name__ - return self - - def __get__(self, obj, typ): - # Get the cache or set a default one if needed - - cache = getattr(obj, '_cache', None) - if cache is None: - try: - cache = obj._cache = {} - except (AttributeError): - return - - if PyDict_Contains(cache, self.name): - # not necessary to Py_INCREF - val = PyDict_GetItem(cache, self.name) - else: - val = self.func(obj) - PyDict_SetItem(cache, self.name, val) - return val - - def __set__(self, obj, value): - - if not self.allow_setting: - raise Exception("cannot set values for [%s]" % self.name) - - # Get the cache or set a default one if needed - cache = getattr(obj, '_cache', None) - if cache is None: - try: - cache = obj._cache = {} - except (AttributeError): - return - - PyDict_SetItem(cache, self.name, value) - -cdef class AxisProperty(object): - cdef: - Py_ssize_t axis - - def __init__(self, axis=0): - self.axis = axis - - def __get__(self, obj, type): - cdef list axes = obj._data.axes - return axes[self.axis] - - def __set__(self, obj, value): - obj._set_axis(self.axis, value) diff --git a/pandas/src/skiplist.pxd b/pandas/src/skiplist.pxd deleted file mode 100644 index 69e9df5b542aa..0000000000000 --- a/pandas/src/skiplist.pxd +++ /dev/null @@ -1,22 +0,0 @@ -cdef extern from "skiplist.h": - ctypedef struct node_t: - node_t **next - int *width - double value - int is_nil - int levels - int ref_count - - ctypedef struct skiplist_t: - node_t *head - node_t **tmp_chain - int *tmp_steps - int size - int maxlevels - - inline skiplist_t* skiplist_init(int) nogil - inline void skiplist_destroy(skiplist_t*) nogil - inline double skiplist_get(skiplist_t*, int, int*) nogil - inline int skiplist_insert(skiplist_t*, double) nogil - inline int skiplist_remove(skiplist_t*, double) nogil - diff --git a/pandas/stats/api.py b/pandas/stats/api.py deleted file mode 100644 index 2a11456d4f9e5..0000000000000 --- a/pandas/stats/api.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Common namespace of statistical functions -""" - -# flake8: noqa - -from pandas.stats.moments import * diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py deleted file mode 100644 index 95b209aee0b0c..0000000000000 --- a/pandas/stats/moments.py +++ /dev/null @@ -1,851 +0,0 @@ -""" -Provides rolling statistical moments and related descriptive -statistics implemented in Cython -""" -from __future__ import division - -import warnings -import numpy as np -from pandas.types.common import is_scalar -from pandas.core.api import DataFrame, Series -from pandas.util.decorators import Substitution, Appender - -__all__ = ['rolling_count', 'rolling_max', 'rolling_min', - 'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov', - 'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt', - 'rolling_quantile', 'rolling_median', 'rolling_apply', - 'rolling_window', - 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov', - 'expanding_count', 'expanding_max', 'expanding_min', - 'expanding_sum', 'expanding_mean', 'expanding_std', - 'expanding_cov', 'expanding_corr', 'expanding_var', - 'expanding_skew', 'expanding_kurt', 'expanding_quantile', - 'expanding_median', 'expanding_apply'] - -# ----------------------------------------------------------------------------- -# Docs - -# The order of arguments for the _doc_template is: -# (header, args, kwargs, returns, notes) - -_doc_template = """ -%s - -Parameters ----------- -%s%s -Returns -------- -%s -%s -""" - -_roll_kw = """window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. -min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. -center : boolean, default False - Set the labels at the center of the window. -how : string, default '%s' - Method for down- or re-sampling -""" - -_roll_notes = r""" -Notes ------ -By default, the result is set to the right edge of the window. This can be -changed to the center of the window by setting ``center=True``. - -The `freq` keyword is used to conform time series data to a specified -frequency by resampling the data. This is done with the default parameters -of :meth:`~pandas.Series.resample` (i.e. using the `mean`). -""" - - -_ewm_kw = r"""com : float, optional - Specify decay in terms of center of mass, - :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0` -span : float, optional - Specify decay in terms of span, - :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1` -halflife : float, optional - Specify decay in terms of half-life, - :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0` -alpha : float, optional - Specify smoothing factor :math:`\alpha` directly, - :math:`0 < \alpha \leq 1` - - .. versionadded:: 0.18.0 - -min_periods : int, default 0 - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : None or string alias / date offset object, default=None - Frequency to conform to before computing statistic -adjust : boolean, default True - Divide by decaying adjustment factor in beginning periods to account for - imbalance in relative weightings (viewing EWMA as a moving average) -how : string, default 'mean' - Method for down- or re-sampling -ignore_na : boolean, default False - Ignore missing values when calculating weights; - specify True to reproduce pre-0.15.0 behavior -""" - -_ewm_notes = r""" -Notes ------ -Exactly one of center of mass, span, half-life, and alpha must be provided. -Allowed values and relationship between the parameters are specified in the -parameter descriptions above; see the link at the end of this section for -a detailed explanation. - -When adjust is True (default), weighted averages are calculated using weights - (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. - -When adjust is False, weighted averages are calculated recursively as: - weighted_average[0] = arg[0]; - weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. - -When ignore_na is False (default), weights are based on absolute positions. -For example, the weights of x and y used in calculating the final weighted -average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and -(1-alpha)**2 and alpha (if adjust is False). - -When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based on -relative positions. For example, the weights of x and y used in calculating -the final weighted average of [x, None, y] are 1-alpha and 1 (if adjust is -True), and 1-alpha and alpha (if adjust is False). - -More details can be found at -http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows -""" - -_expanding_kw = """min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. -""" - - -_type_of_input_retval = "y : type of input argument" - -_flex_retval = """y : type depends on inputs - DataFrame / DataFrame -> DataFrame (matches on columns) or Panel (pairwise) - DataFrame / Series -> Computes result for each column - Series / Series -> Series""" - -_pairwise_retval = "y : Panel whose items are df1.index values" - -_unary_arg = "arg : Series, DataFrame\n" - -_binary_arg_flex = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray, optional - if not supplied then will default to arg1 and produce pairwise output -""" - -_binary_arg = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray -""" - -_pairwise_arg = """df1 : DataFrame -df2 : DataFrame -""" - -_pairwise_kw = """pairwise : bool, default False - If False then only matching columns between arg1 and arg2 will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. -""" - -_ddof_kw = """ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. -""" - -_bias_kw = r"""bias : boolean, default False - Use a standard estimation bias correction -""" - - -def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): - """ - wrapper function to dispatch to the appropriate window functions - wraps/unwraps ndarrays for compat - - can be removed when ndarray support is removed - """ - is_ndarray = isinstance(arg, np.ndarray) - if is_ndarray: - if arg.ndim == 1: - arg = Series(arg) - elif arg.ndim == 2: - arg = DataFrame(arg) - else: - raise AssertionError("cannot support ndim > 2 for ndarray compat") - - warnings.warn("pd.{dispatch}_{name} is deprecated for ndarrays and " - "will be removed " - "in a future version" - .format(dispatch=dispatch, name=name), - FutureWarning, stacklevel=3) - - # get the functional keywords here - if func_kw is None: - func_kw = [] - kwds = {} - for k in func_kw: - value = kwargs.pop(k, None) - if value is not None: - kwds[k] = value - - # how is a keyword that if not-None should be in kwds - how = kwargs.pop('how', None) - if how is not None: - kwds['how'] = how - - r = getattr(arg, dispatch)(**kwargs) - - if not is_ndarray: - - # give a helpful deprecation message - # with copy-pastable arguments - pargs = ','.join(["{a}={b}".format(a=a, b=b) - for a, b in kwargs.items() if b is not None]) - aargs = ','.join(args) - if len(aargs): - aargs += ',' - - def f(a, b): - if is_scalar(b): - return "{a}={b}".format(a=a, b=b) - return "{a}=<{b}>".format(a=a, b=type(b).__name__) - aargs = ','.join([f(a, b) for a, b in kwds.items() if b is not None]) - warnings.warn("pd.{dispatch}_{name} is deprecated for {klass} " - "and will be removed in a future version, replace with " - "\n\t{klass}.{dispatch}({pargs}).{name}({aargs})" - .format(klass=type(arg).__name__, pargs=pargs, - aargs=aargs, dispatch=dispatch, name=name), - FutureWarning, stacklevel=3) - - result = getattr(r, name)(*args, **kwds) - - if is_ndarray: - result = result.values - return result - - -def rolling_count(arg, window, **kwargs): - """ - Rolling count of number of non-NaN observations inside provided window. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - how : string, default 'mean' - Method for down- or re-sampling - - Returns - ------- - rolling_count : type of caller - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', 'count', arg, window=window, **kwargs) - - -@Substitution("Unbiased moving covariance.", _binary_arg_flex, - _roll_kw % 'None' + _pairwise_kw + _ddof_kw, _flex_retval, - _roll_notes) -@Appender(_doc_template) -def rolling_cov(arg1, arg2=None, window=None, pairwise=None, **kwargs): - if window is None and isinstance(arg2, (int, float)): - window = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - elif arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - return ensure_compat('rolling', - 'cov', - arg1, - other=arg2, - window=window, - pairwise=pairwise, - func_kw=['other', 'pairwise', 'ddof'], - **kwargs) - - -@Substitution("Moving sample correlation.", _binary_arg_flex, - _roll_kw % 'None' + _pairwise_kw, _flex_retval, _roll_notes) -@Appender(_doc_template) -def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): - if window is None and isinstance(arg2, (int, float)): - window = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - elif arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - return ensure_compat('rolling', - 'corr', - arg1, - other=arg2, - window=window, - pairwise=pairwise, - func_kw=['other', 'pairwise'], - **kwargs) - - -# ----------------------------------------------------------------------------- -# Exponential moving moments - - -@Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw, - _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewma(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - freq=None, adjust=True, how=None, ignore_na=False): - return ensure_compat('ewm', - 'mean', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na) - - -@Substitution("Exponentially-weighted moving variance", _unary_arg, - _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmvar(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - bias=False, freq=None, how=None, ignore_na=False, adjust=True): - return ensure_compat('ewm', - 'var', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na, - bias=bias, - func_kw=['bias']) - - -@Substitution("Exponentially-weighted moving std", _unary_arg, - _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmstd(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - bias=False, freq=None, how=None, ignore_na=False, adjust=True): - return ensure_compat('ewm', - 'std', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na, - bias=bias, - func_kw=['bias']) - -ewmvol = ewmstd - - -@Substitution("Exponentially-weighted moving covariance", _binary_arg_flex, - _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None, - min_periods=0, bias=False, freq=None, pairwise=None, how=None, - ignore_na=False, adjust=True): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and com is None: - com = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - - return ensure_compat('ewm', - 'cov', - arg1, - other=arg2, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - bias=bias, - freq=freq, - how=how, - ignore_na=ignore_na, - adjust=adjust, - pairwise=pairwise, - func_kw=['other', 'pairwise', 'bias']) - - -@Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, - _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None, - min_periods=0, freq=None, pairwise=None, how=None, ignore_na=False, - adjust=True): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and com is None: - com = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('ewm', - 'corr', - arg1, - other=arg2, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - how=how, - ignore_na=ignore_na, - adjust=adjust, - pairwise=pairwise, - func_kw=['other', 'pairwise']) - -# --------------------------------------------------------------------- -# Python interface to Cython functions - - -def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): - if how is None: - how_arg_str = 'None' - else: - how_arg_str = "'%s" % how - - @Substitution(desc, _unary_arg, _roll_kw % how_arg_str + additional_kw, - _type_of_input_retval, _roll_notes) - @Appender(_doc_template) - def f(arg, window, min_periods=None, freq=None, center=False, - **kwargs): - - return ensure_compat('rolling', - name, - arg, - window=window, - min_periods=min_periods, - freq=freq, - center=center, - func_kw=func_kw, - **kwargs) - return f - -rolling_max = _rolling_func('max', 'Moving maximum.', how='max') -rolling_min = _rolling_func('min', 'Moving minimum.', how='min') -rolling_sum = _rolling_func('sum', 'Moving sum.') -rolling_mean = _rolling_func('mean', 'Moving mean.') -rolling_median = _rolling_func('median', 'Moving median.', how='median') -rolling_std = _rolling_func('std', 'Moving standard deviation.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -rolling_var = _rolling_func('var', 'Moving variance.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -rolling_skew = _rolling_func('skew', 'Unbiased moving skewness.') -rolling_kurt = _rolling_func('kurt', 'Unbiased moving kurtosis.') - - -def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, - center=False): - """Moving quantile. - - Parameters - ---------- - arg : Series, DataFrame - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - quantile : float - 0 <= quantile <= 1 - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - - Returns - ------- - y : type of input argument - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', - 'quantile', - arg, - window=window, - freq=freq, - center=center, - min_periods=min_periods, - func_kw=['quantile'], - quantile=quantile) - - -def rolling_apply(arg, window, func, min_periods=None, freq=None, - center=False, args=(), kwargs={}): - """Generic moving function application. - - Parameters - ---------- - arg : Series, DataFrame - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - func : function - Must produce a single value from an ndarray input - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input argument - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', - 'apply', - arg, - window=window, - freq=freq, - center=center, - min_periods=min_periods, - func_kw=['func', 'args', 'kwargs'], - func=func, - args=args, - kwargs=kwargs) - - -def rolling_window(arg, window=None, win_type=None, min_periods=None, - freq=None, center=False, mean=True, - axis=0, how=None, **kwargs): - """ - Applies a moving window of type ``window_type`` and size ``window`` - on the data. - - Parameters - ---------- - arg : Series, DataFrame - window : int or ndarray - Weighting window specification. If the window is an integer, then it is - treated as the window length and win_type is required - win_type : str, default None - Window type (see Notes) - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - mean : boolean, default True - If True computes weighted mean, else weighted sum - axis : {0, 1}, default 0 - how : string, default 'mean' - Method for down- or re-sampling - - Returns - ------- - y : type of input argument - - Notes - ----- - The recognized window types are: - - * ``boxcar`` - * ``triang`` - * ``blackman`` - * ``hamming`` - * ``bartlett`` - * ``parzen`` - * ``bohman`` - * ``blackmanharris`` - * ``nuttall`` - * ``barthann`` - * ``kaiser`` (needs beta) - * ``gaussian`` (needs std) - * ``general_gaussian`` (needs power, width) - * ``slepian`` (needs width). - - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - func = 'mean' if mean else 'sum' - return ensure_compat('rolling', - func, - arg, - window=window, - win_type=win_type, - freq=freq, - center=center, - min_periods=min_periods, - axis=axis, - func_kw=kwargs.keys(), - **kwargs) - - -def _expanding_func(name, desc, func_kw=None, additional_kw=''): - @Substitution(desc, _unary_arg, _expanding_kw + additional_kw, - _type_of_input_retval, "") - @Appender(_doc_template) - def f(arg, min_periods=1, freq=None, **kwargs): - return ensure_compat('expanding', - name, - arg, - min_periods=min_periods, - freq=freq, - func_kw=func_kw, - **kwargs) - return f - -expanding_max = _expanding_func('max', 'Expanding maximum.') -expanding_min = _expanding_func('min', 'Expanding minimum.') -expanding_sum = _expanding_func('sum', 'Expanding sum.') -expanding_mean = _expanding_func('mean', 'Expanding mean.') -expanding_median = _expanding_func('median', 'Expanding median.') - -expanding_std = _expanding_func('std', 'Expanding standard deviation.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -expanding_var = _expanding_func('var', 'Expanding variance.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -expanding_skew = _expanding_func('skew', 'Unbiased expanding skewness.') -expanding_kurt = _expanding_func('kurt', 'Unbiased expanding kurtosis.') - - -def expanding_count(arg, freq=None): - """ - Expanding count of number of non-NaN observations. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - - Returns - ------- - expanding_count : type of caller - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', 'count', arg, freq=freq) - - -def expanding_quantile(arg, quantile, min_periods=1, freq=None): - """Expanding quantile. - - Parameters - ---------- - arg : Series, DataFrame - quantile : float - 0 <= quantile <= 1 - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - - Returns - ------- - y : type of input argument - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', - 'quantile', - arg, - freq=freq, - min_periods=min_periods, - func_kw=['quantile'], - quantile=quantile) - - -@Substitution("Unbiased expanding covariance.", _binary_arg_flex, - _expanding_kw + _pairwise_kw + _ddof_kw, _flex_retval, "") -@Appender(_doc_template) -def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, - pairwise=None, ddof=1): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and min_periods is None: - min_periods = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('expanding', - 'cov', - arg1, - other=arg2, - min_periods=min_periods, - pairwise=pairwise, - freq=freq, - ddof=ddof, - func_kw=['other', 'pairwise', 'ddof']) - - -@Substitution("Expanding sample correlation.", _binary_arg_flex, - _expanding_kw + _pairwise_kw, _flex_retval, "") -@Appender(_doc_template) -def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and min_periods is None: - min_periods = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('expanding', - 'corr', - arg1, - other=arg2, - min_periods=min_periods, - pairwise=pairwise, - freq=freq, - func_kw=['other', 'pairwise', 'ddof']) - - -def expanding_apply(arg, func, min_periods=1, freq=None, - args=(), kwargs={}): - """Generic expanding function application. - - Parameters - ---------- - arg : Series, DataFrame - func : function - Must produce a single value from an ndarray input - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input argument - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', - 'apply', - arg, - freq=freq, - min_periods=min_periods, - func_kw=['func', 'args', 'kwargs'], - func=func, - args=args, - kwargs=kwargs) diff --git a/pandas/testing.py b/pandas/testing.py new file mode 100644 index 0000000000000..3baf99957cb33 --- /dev/null +++ b/pandas/testing.py @@ -0,0 +1,8 @@ +# flake8: noqa + +""" +Public testing utility functions. +""" + +from pandas.util.testing import ( + assert_frame_equal, assert_series_equal, assert_index_equal) diff --git a/vb_suite/source/_static/stub b/pandas/tests/api/__init__.py similarity index 100% rename from vb_suite/source/_static/stub rename to pandas/tests/api/__init__.py diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py new file mode 100644 index 0000000000000..ea6c250420b13 --- /dev/null +++ b/pandas/tests/api/test_api.py @@ -0,0 +1,253 @@ +# -*- coding: utf-8 -*- +import sys +from warnings import catch_warnings + +import pytest +import pandas as pd +from pandas import api +from pandas.util import testing as tm + + +class Base(object): + + def check(self, namespace, expected, ignored=None): + # see which names are in the namespace, minus optional + # ignored ones + # compare vs the expected + + result = sorted([f for f in dir(namespace) if not f.startswith('_')]) + if ignored is not None: + result = sorted(list(set(result) - set(ignored))) + + expected = sorted(expected) + tm.assert_almost_equal(result, expected) + + +class TestPDApi(Base): + + # these are optionally imported based on testing + # & need to be ignored + ignored = ['tests', 'locale', 'conftest'] + + # top-level sub-packages + lib = ['api', 'compat', 'core', 'errors', 'pandas', + 'plotting', 'test', 'testing', 'tools', 'tseries', + 'util', 'options', 'io'] + + # these are already deprecated; awaiting removal + deprecated_modules = ['datetools', 'parser', 'json', 'lib', 'tslib'] + + # misc + misc = ['IndexSlice', 'NaT'] + + # top-level classes + classes = ['Categorical', 'CategoricalIndex', 'DataFrame', 'DateOffset', + 'DatetimeIndex', 'ExcelFile', 'ExcelWriter', 'Float64Index', + 'Grouper', 'HDFStore', 'Index', 'Int64Index', 'MultiIndex', + 'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index', + 'Series', 'SparseArray', 'SparseDataFrame', + 'SparseSeries', 'Timedelta', + 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex'] + + # these are already deprecated; awaiting removal + deprecated_classes = ['WidePanel', 'TimeGrouper', 'Expr', 'Term'] + + # these should be deprecated in the future + deprecated_classes_in_future = ['Panel'] + + # external modules exposed in pandas namespace + modules = ['np', 'datetime'] + + # top-level functions + funcs = ['bdate_range', 'concat', 'crosstab', 'cut', + 'date_range', 'interval_range', 'eval', + 'factorize', 'get_dummies', + 'infer_freq', 'isna', 'isnull', 'lreshape', + 'melt', 'notna', 'notnull', 'offsets', + 'merge', 'merge_ordered', 'merge_asof', + 'period_range', + 'pivot', 'pivot_table', 'qcut', + 'show_versions', 'timedelta_range', 'unique', + 'value_counts', 'wide_to_long'] + + # top-level option funcs + funcs_option = ['reset_option', 'describe_option', 'get_option', + 'option_context', 'set_option', + 'set_eng_float_format'] + + # top-level read_* funcs + funcs_read = ['read_clipboard', 'read_csv', 'read_excel', 'read_fwf', + 'read_gbq', 'read_hdf', 'read_html', 'read_json', + 'read_msgpack', 'read_pickle', 'read_sas', 'read_sql', + 'read_sql_query', 'read_sql_table', 'read_stata', + 'read_table', 'read_feather', 'read_parquet'] + + # top-level to_* funcs + funcs_to = ['to_datetime', 'to_msgpack', + 'to_numeric', 'to_pickle', 'to_timedelta'] + + # top-level to deprecate in the future + deprecated_funcs_in_future = [] + + # these are already deprecated; awaiting removal + deprecated_funcs = ['pnow', 'match', 'groupby', 'get_store', + 'plot_params', 'scatter_matrix'] + + def test_api(self): + + self.check(pd, + self.lib + self.misc + + self.modules + self.deprecated_modules + + self.classes + self.deprecated_classes + + self.deprecated_classes_in_future + + self.funcs + self.funcs_option + + self.funcs_read + self.funcs_to + + self.deprecated_funcs_in_future + + self.deprecated_funcs, + self.ignored) + + +class TestApi(Base): + + allowed = ['types', 'extensions'] + + def test_api(self): + + self.check(api, self.allowed) + + +class TestTesting(Base): + + funcs = ['assert_frame_equal', 'assert_series_equal', + 'assert_index_equal'] + + def test_testing(self): + + from pandas import testing + self.check(testing, self.funcs) + + +class TestDatetoolsDeprecation(object): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.datetools.to_datetime('2016-01-01') + + def test_deprecation_access_obj(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.datetools.monthEnd + + +class TestTopLevelDeprecations(object): + + # top-level API deprecations + # GH 13790 + + def test_pnow(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.pnow(freq='M') + + def test_term(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.Term('index>=date') + + def test_expr(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.Expr('2>1') + + def test_match(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.match([1, 2, 3], [1]) + + def test_groupby(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) + + def test_TimeGrouper(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.TimeGrouper(freq='D') + + # GH 15940 + + def test_get_store(self): + pytest.importorskip('tables') + with tm.ensure_clean() as path: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = pd.get_store(path) + s.close() + + +class TestJson(object): + + def test_deprecation_access_func(self): + with catch_warnings(record=True): + pd.json.dumps([]) + + +class TestParser(object): + + def test_deprecation_access_func(self): + with catch_warnings(record=True): + pd.parser.na_values + + +class TestLib(object): + + def test_deprecation_access_func(self): + with catch_warnings(record=True): + pd.lib.infer_dtype('foo') + + +class TestTSLib(object): + + def test_deprecation_access_func(self): + with catch_warnings(record=True): + pd.tslib.Timestamp('20160101') + + +class TestTypes(object): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False): + from pandas.types.concat import union_categoricals + c1 = pd.Categorical(list('aabc')) + c2 = pd.Categorical(list('abcd')) + union_categoricals( + [c1, c2], + sort_categories=True, + ignore_order=True) + + +class TestCDateRange(object): + + def test_deprecation_cdaterange(self): + # GH17596 + from pandas.core.indexes.datetimes import cdate_range + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + cdate_range('2017-01-01', '2017-12-31') + + +class TestCategoricalMove(object): + + def test_categorical_move(self): + # May have been cached by another import, e.g. pickle tests. + sys.modules.pop("pandas.core.categorical", None) + + with tm.assert_produces_warning(FutureWarning): + from pandas.core.categorical import Categorical # noqa + + sys.modules.pop("pandas.core.categorical", None) + + with tm.assert_produces_warning(FutureWarning): + from pandas.core.categorical import CategoricalDtype # noqa diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py new file mode 100644 index 0000000000000..bd4891326c751 --- /dev/null +++ b/pandas/tests/api/test_types.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +import pytest + +from warnings import catch_warnings + +import pandas +from pandas.api import types +from pandas.util import testing as tm + +from .test_api import Base + + +class TestTypes(Base): + + allowed = ['is_bool', 'is_bool_dtype', + 'is_categorical', 'is_categorical_dtype', 'is_complex', + 'is_complex_dtype', 'is_datetime64_any_dtype', + 'is_datetime64_dtype', 'is_datetime64_ns_dtype', + 'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal', + 'is_extension_type', 'is_float', 'is_float_dtype', + 'is_int64_dtype', 'is_integer', + 'is_integer_dtype', 'is_number', 'is_numeric_dtype', + 'is_object_dtype', 'is_scalar', 'is_sparse', + 'is_string_dtype', 'is_signed_integer_dtype', + 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', + 'is_unsigned_integer_dtype', 'is_period', + 'is_period_dtype', 'is_interval', 'is_interval_dtype', + 'is_re', 'is_re_compilable', + 'is_dict_like', 'is_iterator', 'is_file_like', + 'is_list_like', 'is_hashable', 'is_array_like', + 'is_named_tuple', + 'pandas_dtype', 'union_categoricals', 'infer_dtype'] + deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence'] + dtypes = ['CategoricalDtype', 'DatetimeTZDtype', + 'PeriodDtype', 'IntervalDtype'] + + def test_types(self): + + self.check(types, self.allowed + self.dtypes + self.deprecated) + + def check_deprecation(self, fold, fnew): + with tm.assert_produces_warning(DeprecationWarning): + try: + result = fold('foo') + expected = fnew('foo') + assert result == expected + except TypeError: + pytest.raises(TypeError, lambda: fnew('foo')) + except AttributeError: + pytest.raises(AttributeError, lambda: fnew('foo')) + + def test_deprecated_from_api_types(self): + + for t in self.deprecated: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + getattr(types, t)(1) + + +def test_moved_infer_dtype(): + + with catch_warnings(record=True): + e = pandas.lib.infer_dtype('foo') + assert e is not None diff --git a/pandas/tests/categorical/__init__.py b/pandas/tests/categorical/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/categorical/common.py b/pandas/tests/categorical/common.py new file mode 100644 index 0000000000000..9462482553ed8 --- /dev/null +++ b/pandas/tests/categorical/common.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- + +from pandas import Categorical + + +class TestCategorical(object): + + def setup_method(self, method): + self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], + ordered=True) diff --git a/pandas/tests/categorical/test_analytics.py b/pandas/tests/categorical/test_analytics.py new file mode 100644 index 0000000000000..53d0e596a1d99 --- /dev/null +++ b/pandas/tests/categorical/test_analytics.py @@ -0,0 +1,320 @@ +# -*- coding: utf-8 -*- + +import pytest +import sys + +import numpy as np + +import pandas.util.testing as tm +from pandas import Categorical, Index, Series + +from pandas.compat import PYPY + + +class TestCategoricalAnalytics(object): + + def test_min_max(self): + + # unordered cats have no min/max + cat = Categorical(["a", "b", "c", "d"], ordered=False) + pytest.raises(TypeError, lambda: cat.min()) + pytest.raises(TypeError, lambda: cat.max()) + + cat = Categorical(["a", "b", "c", "d"], ordered=True) + _min = cat.min() + _max = cat.max() + assert _min == "a" + assert _max == "d" + + cat = Categorical(["a", "b", "c", "d"], + categories=['d', 'c', 'b', 'a'], ordered=True) + _min = cat.min() + _max = cat.max() + assert _min == "d" + assert _max == "a" + + cat = Categorical([np.nan, "b", "c", np.nan], + categories=['d', 'c', 'b', 'a'], ordered=True) + _min = cat.min() + _max = cat.max() + assert np.isnan(_min) + assert _max == "b" + + _min = cat.min(numeric_only=True) + assert _min == "c" + _max = cat.max(numeric_only=True) + assert _max == "b" + + cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], + ordered=True) + _min = cat.min() + _max = cat.max() + assert np.isnan(_min) + assert _max == 1 + + _min = cat.min(numeric_only=True) + assert _min == 2 + _max = cat.max(numeric_only=True) + assert _max == 1 + + @pytest.mark.parametrize("values,categories,exp_mode", [ + ([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]), + ([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]), + ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]), + ([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]), + ([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), + ([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4])]) + def test_mode(self, values, categories, exp_mode): + s = Categorical(values, categories=categories, ordered=True) + res = s.mode() + exp = Categorical(exp_mode, categories=categories, ordered=True) + tm.assert_categorical_equal(res, exp) + + def test_searchsorted(self): + # https://github.com/pandas-dev/pandas/issues/8420 + # https://github.com/pandas-dev/pandas/issues/14522 + + c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], + categories=['cheese', 'milk', 'apple', 'bread'], + ordered=True) + s1 = Series(c1) + c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], + categories=['cheese', 'milk', 'apple', 'bread'], + ordered=False) + s2 = Series(c2) + + # Searching for single item argument, side='left' (default) + res_cat = c1.searchsorted('apple') + res_ser = s1.searchsorted('apple') + exp = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for single item array, side='left' (default) + res_cat = c1.searchsorted(['bread']) + res_ser = s1.searchsorted(['bread']) + exp = np.array([3], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for several items array, side='right' + res_cat = c1.searchsorted(['apple', 'bread'], side='right') + res_ser = s1.searchsorted(['apple', 'bread'], side='right') + exp = np.array([3, 5], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for a single value that is not from the Categorical + pytest.raises(ValueError, lambda: c1.searchsorted('cucumber')) + pytest.raises(ValueError, lambda: s1.searchsorted('cucumber')) + + # Searching for multiple values one of each is not from the Categorical + pytest.raises(ValueError, + lambda: c1.searchsorted(['bread', 'cucumber'])) + pytest.raises(ValueError, + lambda: s1.searchsorted(['bread', 'cucumber'])) + + # searchsorted call for unordered Categorical + pytest.raises(ValueError, lambda: c2.searchsorted('apple')) + pytest.raises(ValueError, lambda: s2.searchsorted('apple')) + + with tm.assert_produces_warning(FutureWarning): + res = c1.searchsorted(v=['bread']) + exp = np.array([3], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) + + def test_unique(self): + # categories are reordered based on value when ordered=False + cat = Categorical(["a", "b"]) + exp = Index(["a", "b"]) + res = cat.unique() + tm.assert_index_equal(res.categories, exp) + tm.assert_categorical_equal(res, cat) + + cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"]) + res = cat.unique() + tm.assert_index_equal(res.categories, exp) + tm.assert_categorical_equal(res, Categorical(exp)) + + cat = Categorical(["c", "a", "b", "a", "a"], + categories=["a", "b", "c"]) + exp = Index(["c", "a", "b"]) + res = cat.unique() + tm.assert_index_equal(res.categories, exp) + exp_cat = Categorical(exp, categories=['c', 'a', 'b']) + tm.assert_categorical_equal(res, exp_cat) + + # nan must be removed + cat = Categorical(["b", np.nan, "b", np.nan, "a"], + categories=["a", "b", "c"]) + res = cat.unique() + exp = Index(["b", "a"]) + tm.assert_index_equal(res.categories, exp) + exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"]) + tm.assert_categorical_equal(res, exp_cat) + + def test_unique_ordered(self): + # keep categories order when ordered=True + cat = Categorical(['b', 'a', 'b'], categories=['a', 'b'], ordered=True) + res = cat.unique() + exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True) + tm.assert_categorical_equal(res, exp_cat) + + cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'], + ordered=True) + res = cat.unique() + exp_cat = Categorical(['c', 'b', 'a'], categories=['a', 'b', 'c'], + ordered=True) + tm.assert_categorical_equal(res, exp_cat) + + cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'], + ordered=True) + res = cat.unique() + exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True) + tm.assert_categorical_equal(res, exp_cat) + + cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'], + ordered=True) + res = cat.unique() + exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'], + ordered=True) + tm.assert_categorical_equal(res, exp_cat) + + def test_unique_index_series(self): + c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1]) + # Categorical.unique sorts categories by appearance order + # if ordered=False + exp = Categorical([3, 1, 2], categories=[3, 1, 2]) + tm.assert_categorical_equal(c.unique(), exp) + + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + c = Categorical([1, 1, 2, 2], categories=[3, 2, 1]) + exp = Categorical([1, 2], categories=[1, 2]) + tm.assert_categorical_equal(c.unique(), exp) + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True) + # Categorical.unique keeps categories order if ordered=True + exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True) + tm.assert_categorical_equal(c.unique(), exp) + + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + def test_shift(self): + # GH 9416 + cat = Categorical(['a', 'b', 'c', 'd', 'a']) + + # shift forward + sp1 = cat.shift(1) + xp1 = Categorical([np.nan, 'a', 'b', 'c', 'd']) + tm.assert_categorical_equal(sp1, xp1) + tm.assert_categorical_equal(cat[:-1], sp1[1:]) + + # shift back + sn2 = cat.shift(-2) + xp2 = Categorical(['c', 'd', 'a', np.nan, np.nan], + categories=['a', 'b', 'c', 'd']) + tm.assert_categorical_equal(sn2, xp2) + tm.assert_categorical_equal(cat[2:], sn2[:-2]) + + # shift by zero + tm.assert_categorical_equal(cat, cat.shift(0)) + + def test_nbytes(self): + cat = Categorical([1, 2, 3]) + exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories + assert cat.nbytes == exp + + def test_memory_usage(self): + cat = Categorical([1, 2, 3]) + + # .categories is an index, so we include the hashtable + assert 0 < cat.nbytes <= cat.memory_usage() + assert 0 < cat.nbytes <= cat.memory_usage(deep=True) + + cat = Categorical(['foo', 'foo', 'bar']) + assert cat.memory_usage(deep=True) > cat.nbytes + + if not PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 + + def test_map(self): + c = Categorical(list('ABABC'), categories=list('CBA'), ordered=True) + result = c.map(lambda x: x.lower()) + exp = Categorical(list('ababc'), categories=list('cba'), ordered=True) + tm.assert_categorical_equal(result, exp) + + c = Categorical(list('ABABC'), categories=list('ABC'), ordered=False) + result = c.map(lambda x: x.lower()) + exp = Categorical(list('ababc'), categories=list('abc'), ordered=False) + tm.assert_categorical_equal(result, exp) + + result = c.map(lambda x: 1) + # GH 12766: Return an index not an array + tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) + + def test_validate_inplace(self): + cat = Categorical(['A', 'B', 'B', 'C', 'A']) + invalid_values = [1, "True", [1, 2, 3], 5.0] + + for value in invalid_values: + with pytest.raises(ValueError): + cat.set_ordered(value=True, inplace=value) + + with pytest.raises(ValueError): + cat.as_ordered(inplace=value) + + with pytest.raises(ValueError): + cat.as_unordered(inplace=value) + + with pytest.raises(ValueError): + cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value) + + with pytest.raises(ValueError): + cat.rename_categories(['X', 'Y', 'Z'], inplace=value) + + with pytest.raises(ValueError): + cat.reorder_categories( + ['X', 'Y', 'Z'], ordered=True, inplace=value) + + with pytest.raises(ValueError): + cat.add_categories( + new_categories=['D', 'E', 'F'], inplace=value) + + with pytest.raises(ValueError): + cat.remove_categories(removals=['D', 'E', 'F'], inplace=value) + + with pytest.raises(ValueError): + cat.remove_unused_categories(inplace=value) + + with pytest.raises(ValueError): + cat.sort_values(inplace=value) + + def test_repeat(self): + # GH10183 + cat = Categorical(["a", "b"], categories=["a", "b"]) + exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"]) + res = cat.repeat(2) + tm.assert_categorical_equal(res, exp) + + def test_numpy_repeat(self): + cat = Categorical(["a", "b"], categories=["a", "b"]) + exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"]) + tm.assert_categorical_equal(np.repeat(cat, 2), exp) + + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1) + + def test_isna(self): + exp = np.array([False, False, True]) + c = Categorical(["a", "b", np.nan]) + res = c.isna() + + tm.assert_numpy_array_equal(res, exp) diff --git a/pandas/tests/categorical/test_api.py b/pandas/tests/categorical/test_api.py new file mode 100644 index 0000000000000..ad5b78b36438b --- /dev/null +++ b/pandas/tests/categorical/test_api.py @@ -0,0 +1,518 @@ +# -*- coding: utf-8 -*- + +import pytest + +import numpy as np + +import pandas.util.testing as tm +from pandas import Categorical, CategoricalIndex, Index, Series, DataFrame + +from pandas.core.arrays.categorical import _recode_for_categories +from pandas.tests.categorical.common import TestCategorical + + +class TestCategoricalAPI(object): + + def test_ordered_api(self): + # GH 9347 + cat1 = Categorical(list('acb'), ordered=False) + tm.assert_index_equal(cat1.categories, Index(['a', 'b', 'c'])) + assert not cat1.ordered + + cat2 = Categorical(list('acb'), categories=list('bca'), ordered=False) + tm.assert_index_equal(cat2.categories, Index(['b', 'c', 'a'])) + assert not cat2.ordered + + cat3 = Categorical(list('acb'), ordered=True) + tm.assert_index_equal(cat3.categories, Index(['a', 'b', 'c'])) + assert cat3.ordered + + cat4 = Categorical(list('acb'), categories=list('bca'), ordered=True) + tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a'])) + assert cat4.ordered + + def test_set_ordered(self): + + cat = Categorical(["a", "b", "c", "a"], ordered=True) + cat2 = cat.as_unordered() + assert not cat2.ordered + cat2 = cat.as_ordered() + assert cat2.ordered + cat2.as_unordered(inplace=True) + assert not cat2.ordered + cat2.as_ordered(inplace=True) + assert cat2.ordered + + assert cat2.set_ordered(True).ordered + assert not cat2.set_ordered(False).ordered + cat2.set_ordered(True, inplace=True) + assert cat2.ordered + cat2.set_ordered(False, inplace=True) + assert not cat2.ordered + + # removed in 0.19.0 + msg = "can\'t set attribute" + with tm.assert_raises_regex(AttributeError, msg): + cat.ordered = True + with tm.assert_raises_regex(AttributeError, msg): + cat.ordered = False + + def test_rename_categories(self): + cat = Categorical(["a", "b", "c", "a"]) + + # inplace=False: the old one must not be changed + res = cat.rename_categories([1, 2, 3]) + tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1], + dtype=np.int64)) + tm.assert_index_equal(res.categories, Index([1, 2, 3])) + + exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + exp_cat = Index(["a", "b", "c"]) + tm.assert_index_equal(cat.categories, exp_cat) + + # GH18862 (let rename_categories take callables) + result = cat.rename_categories(lambda x: x.upper()) + expected = Categorical(["A", "B", "C", "A"]) + tm.assert_categorical_equal(result, expected) + + # and now inplace + res = cat.rename_categories([1, 2, 3], inplace=True) + assert res is None + tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1], + dtype=np.int64)) + tm.assert_index_equal(cat.categories, Index([1, 2, 3])) + + # Lengthen + with pytest.raises(ValueError): + cat.rename_categories([1, 2, 3, 4]) + + # Shorten + with pytest.raises(ValueError): + cat.rename_categories([1, 2]) + + def test_rename_categories_series(self): + # https://github.com/pandas-dev/pandas/issues/17981 + c = Categorical(['a', 'b']) + xpr = "Treating Series 'new_categories' as a list-like " + with tm.assert_produces_warning(FutureWarning) as rec: + result = c.rename_categories(Series([0, 1])) + + assert len(rec) == 1 + assert xpr in str(rec[0].message) + expected = Categorical([0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_rename_categories_dict(self): + # GH 17336 + cat = Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}) + expected = Index([4, 3, 2, 1]) + tm.assert_index_equal(res.categories, expected) + + # Test for inplace + res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}, + inplace=True) + assert res is None + tm.assert_index_equal(cat.categories, expected) + + # Test for dicts of smaller length + cat = Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 1, 'c': 3}) + + expected = Index([1, 'b', 3, 'd']) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with bigger length + cat = Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3, + 'd': 4, 'e': 5, 'f': 6}) + expected = Index([1, 2, 3, 4]) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with no items from old categories + cat = Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'f': 1, 'g': 3}) + + expected = Index(['a', 'b', 'c', 'd']) + tm.assert_index_equal(res.categories, expected) + + def test_reorder_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical(["a", "b", "c", "a"], categories=["c", "b", "a"], + ordered=True) + + # first inplace == False + res = cat.reorder_categories(["c", "b", "a"]) + # cat must be the same as before + tm.assert_categorical_equal(cat, old) + # only res is changed + tm.assert_categorical_equal(res, new) + + # inplace == True + res = cat.reorder_categories(["c", "b", "a"], inplace=True) + assert res is None + tm.assert_categorical_equal(cat, new) + + # not all "old" included in "new" + cat = Categorical(["a", "b", "c", "a"], ordered=True) + + def f(): + cat.reorder_categories(["a"]) + + pytest.raises(ValueError, f) + + # still not all "old" in "new" + def f(): + cat.reorder_categories(["a", "b", "d"]) + + pytest.raises(ValueError, f) + + # all "old" included in "new", but too long + def f(): + cat.reorder_categories(["a", "b", "c", "d"]) + + pytest.raises(ValueError, f) + + def test_add_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical(["a", "b", "c", "a"], + categories=["a", "b", "c", "d"], ordered=True) + + # first inplace == False + res = cat.add_categories("d") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.add_categories(["d"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + # inplace == True + res = cat.add_categories("d", inplace=True) + tm.assert_categorical_equal(cat, new) + assert res is None + + # new is in old categories + def f(): + cat.add_categories(["d"]) + + pytest.raises(ValueError, f) + + # GH 9927 + cat = Categorical(list("abc"), ordered=True) + expected = Categorical( + list("abc"), categories=list("abcde"), ordered=True) + # test with Series, np.array, index, list + res = cat.add_categories(Series(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(np.array(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(Index(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(["d", "e"]) + tm.assert_categorical_equal(res, expected) + + def test_set_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + exp_categories = Index(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) + + res = cat.set_categories(["c", "b", "a"], inplace=True) + tm.assert_index_equal(cat.categories, exp_categories) + tm.assert_numpy_array_equal(cat.__array__(), exp_values) + assert res is None + + res = cat.set_categories(["a", "b", "c"]) + # cat must be the same as before + tm.assert_index_equal(cat.categories, exp_categories) + tm.assert_numpy_array_equal(cat.__array__(), exp_values) + # only res is changed + exp_categories_back = Index(["a", "b", "c"]) + tm.assert_index_equal(res.categories, exp_categories_back) + tm.assert_numpy_array_equal(res.__array__(), exp_values) + + # not all "old" included in "new" -> all not included ones are now + # np.nan + cat = Categorical(["a", "b", "c", "a"], ordered=True) + res = cat.set_categories(["a"]) + tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], + dtype=np.int8)) + + # still not all "old" in "new" + res = cat.set_categories(["a", "b", "d"]) + tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], + dtype=np.int8)) + tm.assert_index_equal(res.categories, Index(["a", "b", "d"])) + + # all "old" included in "new" + cat = cat.set_categories(["a", "b", "c", "d"]) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_index_equal(cat.categories, exp_categories) + + # internals... + c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], + dtype=np.int8)) + tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) + + exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) + tm.assert_numpy_array_equal(c.get_values(), exp) + + # all "pointers" to '4' must be changed from 3 to 0,... + c = c.set_categories([4, 3, 2, 1]) + + # positions are changed + tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], + dtype=np.int8)) + + # categories are now in new order + tm.assert_index_equal(c.categories, Index([4, 3, 2, 1])) + + # output is the same + exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) + tm.assert_numpy_array_equal(c.get_values(), exp) + assert c.min() == 4 + assert c.max() == 1 + + # set_categories should set the ordering if specified + c2 = c.set_categories([4, 3, 2, 1], ordered=False) + assert not c2.ordered + + tm.assert_numpy_array_equal(c.get_values(), c2.get_values()) + + # set_categories should pass thru the ordering + c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) + assert not c2.ordered + + tm.assert_numpy_array_equal(c.get_values(), c2.get_values()) + + @pytest.mark.parametrize('values, categories, new_categories', [ + # No NaNs, same cats, same order + (['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],), + # No NaNs, same cats, different order + (['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],), + # Same, unsorted + (['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],), + # No NaNs, same cats, different order + (['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],), + # NaNs + (['a', 'b', 'c'], ['a', 'b'], ['a', 'b']), + (['a', 'b', 'c'], ['a', 'b'], ['b', 'a']), + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), + # Introduce NaNs + (['a', 'b', 'c'], ['a', 'b'], ['a']), + (['a', 'b', 'c'], ['a', 'b'], ['b']), + (['b', 'a', 'c'], ['a', 'b'], ['a']), + (['b', 'a', 'c'], ['a', 'b'], ['a']), + # No overlap + (['a', 'b', 'c'], ['a', 'b'], ['d', 'e']), + ]) + @pytest.mark.parametrize('ordered', [True, False]) + def test_set_categories_many(self, values, categories, new_categories, + ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c.set_categories(new_categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_set_categories_private(self): + cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd']) + cat._set_categories(['a', 'c', 'd', 'e']) + expected = Categorical(['a', 'c', 'd'], categories=list('acde')) + tm.assert_categorical_equal(cat, expected) + + # fastpath + cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd']) + cat._set_categories(['a', 'c', 'd', 'e'], fastpath=True) + expected = Categorical(['a', 'c', 'd'], categories=list('acde')) + tm.assert_categorical_equal(cat, expected) + + def test_remove_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], + ordered=True) + + # first inplace == False + res = cat.remove_categories("c") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.remove_categories(["c"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + # inplace == True + res = cat.remove_categories("c", inplace=True) + tm.assert_categorical_equal(cat, new) + assert res is None + + # removal is not in categories + def f(): + cat.remove_categories(["c"]) + + pytest.raises(ValueError, f) + + def test_remove_unused_categories(self): + c = Categorical(["a", "b", "c", "d", "a"], + categories=["a", "b", "c", "d", "e"]) + exp_categories_all = Index(["a", "b", "c", "d", "e"]) + exp_categories_dropped = Index(["a", "b", "c", "d"]) + + tm.assert_index_equal(c.categories, exp_categories_all) + + res = c.remove_unused_categories() + tm.assert_index_equal(res.categories, exp_categories_dropped) + tm.assert_index_equal(c.categories, exp_categories_all) + + res = c.remove_unused_categories(inplace=True) + tm.assert_index_equal(c.categories, exp_categories_dropped) + assert res is None + + # with NaN values (GH11599) + c = Categorical(["a", "b", "c", np.nan], + categories=["a", "b", "c", "d", "e"]) + res = c.remove_unused_categories() + tm.assert_index_equal(res.categories, + Index(np.array(["a", "b", "c"]))) + exp_codes = np.array([0, 1, 2, -1], dtype=np.int8) + tm.assert_numpy_array_equal(res.codes, exp_codes) + tm.assert_index_equal(c.categories, exp_categories_all) + + val = ['F', np.nan, 'D', 'B', 'D', 'F', np.nan] + cat = Categorical(values=val, categories=list('ABCDEFG')) + out = cat.remove_unused_categories() + tm.assert_index_equal(out.categories, Index(['B', 'D', 'F'])) + exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) + tm.assert_numpy_array_equal(out.codes, exp_codes) + assert out.get_values().tolist() == val + + alpha = list('abcdefghijklmnopqrstuvwxyz') + val = np.random.choice(alpha[::2], 10000).astype('object') + val[np.random.choice(len(val), 100)] = np.nan + + cat = Categorical(values=val, categories=alpha) + out = cat.remove_unused_categories() + assert out.get_values().tolist() == val.tolist() + + +class TestCategoricalAPIWithFactor(TestCategorical): + + def test_describe(self): + # string type + desc = self.factor.describe() + assert self.factor.ordered + exp_index = CategoricalIndex(['a', 'b', 'c'], name='categories', + ordered=self.factor.ordered) + expected = DataFrame({'counts': [3, 2, 3], + 'freqs': [3 / 8., 2 / 8., 3 / 8.]}, + index=exp_index) + tm.assert_frame_equal(desc, expected) + + # check unused categories + cat = self.factor.copy() + cat.set_categories(["a", "b", "c", "d"], inplace=True) + desc = cat.describe() + + exp_index = CategoricalIndex( + list('abcd'), ordered=self.factor.ordered, name='categories') + expected = DataFrame({'counts': [3, 2, 3, 0], + 'freqs': [3 / 8., 2 / 8., 3 / 8., 0]}, + index=exp_index) + tm.assert_frame_equal(desc, expected) + + # check an integer one + cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) + desc = cat.describe() + exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, + name='categories') + expected = DataFrame({'counts': [5, 3, 3], + 'freqs': [5 / 11., 3 / 11., 3 / 11.]}, + index=exp_index) + tm.assert_frame_equal(desc, expected) + + # https://github.com/pandas-dev/pandas/issues/3678 + # describe should work with NaN + cat = Categorical([np.nan, 1, 2, 2]) + desc = cat.describe() + expected = DataFrame({'counts': [1, 2, 1], + 'freqs': [1 / 4., 2 / 4., 1 / 4.]}, + index=CategoricalIndex([1, 2, np.nan], + categories=[1, 2], + name='categories')) + tm.assert_frame_equal(desc, expected) + + def test_set_categories_inplace(self): + cat = self.factor.copy() + cat.set_categories(['a', 'b', 'c', 'd'], inplace=True) + tm.assert_index_equal(cat.categories, Index(['a', 'b', 'c', 'd'])) + + +class TestPrivateCategoricalAPI(object): + + def test_codes_immutable(self): + + # Codes should be read only + c = Categorical(["a", "b", "c", "a", np.nan]) + exp = np.array([0, 1, 2, 0, -1], dtype='int8') + tm.assert_numpy_array_equal(c.codes, exp) + + # Assignments to codes should raise + def f(): + c.codes = np.array([0, 1, 2, 0, 1], dtype='int8') + + pytest.raises(ValueError, f) + + # changes in the codes array should raise + # np 1.6.1 raises RuntimeError rather than ValueError + codes = c.codes + + def f(): + codes[4] = 1 + + pytest.raises(ValueError, f) + + # But even after getting the codes, the original array should still be + # writeable! + c[4] = "a" + exp = np.array([0, 1, 2, 0, 0], dtype='int8') + tm.assert_numpy_array_equal(c.codes, exp) + c._codes[4] = 2 + exp = np.array([0, 1, 2, 0, 2], dtype='int8') + tm.assert_numpy_array_equal(c.codes, exp) + + @pytest.mark.parametrize('codes, old, new, expected', [ + ([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]), + ([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]), + ([0, 1], ['a', 'b'], ['b', 'a'], [1, 0]), + ([0, 1], ['b', 'a'], ['a', 'b'], [1, 0]), + ([0, 1, 0, 1], ['a', 'b'], ['a', 'b', 'c'], [0, 1, 0, 1]), + ([0, 1, 2, 2], ['a', 'b', 'c'], ['a', 'b'], [0, 1, -1, -1]), + ([0, 1, -1], ['a', 'b', 'c'], ['a', 'b', 'c'], [0, 1, -1]), + ([0, 1, -1], ['a', 'b', 'c'], ['b'], [-1, 0, -1]), + ([0, 1, -1], ['a', 'b', 'c'], ['d'], [-1, -1, -1]), + ([0, 1, -1], ['a', 'b', 'c'], [], [-1, -1, -1]), + ([-1, -1], [], ['a', 'b'], [-1, -1]), + ([1, 0], ['b', 'a'], ['a', 'b'], [0, 1]), + ]) + def test_recode_to_categories(self, codes, old, new, expected): + codes = np.asanyarray(codes, dtype=np.int8) + expected = np.asanyarray(expected, dtype=np.int8) + old = Index(old) + new = Index(new) + result = _recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) + + def test_recode_to_categories_large(self): + N = 1000 + codes = np.arange(N) + old = Index(codes) + expected = np.arange(N - 1, -1, -1, dtype=np.int16) + new = Index(expected) + result = _recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/categorical/test_constructors.py b/pandas/tests/categorical/test_constructors.py new file mode 100644 index 0000000000000..6cc34770a65e0 --- /dev/null +++ b/pandas/tests/categorical/test_constructors.py @@ -0,0 +1,515 @@ +# -*- coding: utf-8 -*- + +import pytest +from datetime import datetime + +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (Categorical, Index, Series, Timestamp, + CategoricalIndex, date_range, DatetimeIndex, + period_range, timedelta_range, NaT, + Interval, IntervalIndex) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype + + +class TestCategoricalConstructors(object): + + def test_validate_ordered(self): + # see gh-14058 + exp_msg = "'ordered' must either be 'True' or 'False'" + exp_err = TypeError + + # This should be a boolean. + ordered = np.array([0, 1, 2]) + + with tm.assert_raises_regex(exp_err, exp_msg): + Categorical([1, 2, 3], ordered=ordered) + + with tm.assert_raises_regex(exp_err, exp_msg): + Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'], + ordered=ordered) + + def test_constructor_empty(self): + # GH 17248 + c = Categorical([]) + expected = Index([]) + tm.assert_index_equal(c.categories, expected) + + c = Categorical([], categories=[1, 2, 3]) + expected = pd.Int64Index([1, 2, 3]) + tm.assert_index_equal(c.categories, expected) + + def test_constructor_tuples(self): + values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object) + result = Categorical(values) + expected = Index([(1,), (1, 2)], tupleize_cols=False) + tm.assert_index_equal(result.categories, expected) + assert result.ordered is False + + def test_constructor_tuples_datetimes(self): + # numpy will auto reshape when all of the tuples are the + # same len, so add an extra one with 2 items and slice it off + values = np.array([(Timestamp('2010-01-01'),), + (Timestamp('2010-01-02'),), + (Timestamp('2010-01-01'),), + (Timestamp('2010-01-02'),), + ('a', 'b')], dtype=object)[:-1] + result = Categorical(values) + expected = Index([(Timestamp('2010-01-01'),), + (Timestamp('2010-01-02'),)], tupleize_cols=False) + tm.assert_index_equal(result.categories, expected) + + def test_constructor_unsortable(self): + + # it works! + arr = np.array([1, 2, 3, datetime.now()], dtype='O') + factor = Categorical(arr, ordered=False) + assert not factor.ordered + + # this however will raise as cannot be sorted + pytest.raises( + TypeError, lambda: Categorical(arr, ordered=True)) + + def test_constructor_interval(self): + result = Categorical([Interval(1, 2), Interval(2, 3), Interval(3, 6)], + ordered=True) + ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) + exp = Categorical(ii, ordered=True) + tm.assert_categorical_equal(result, exp) + tm.assert_index_equal(result.categories, ii) + + def test_constructor(self): + + exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) + c1 = Categorical(exp_arr) + tm.assert_numpy_array_equal(c1.__array__(), exp_arr) + c2 = Categorical(exp_arr, categories=["a", "b", "c"]) + tm.assert_numpy_array_equal(c2.__array__(), exp_arr) + c2 = Categorical(exp_arr, categories=["c", "b", "a"]) + tm.assert_numpy_array_equal(c2.__array__(), exp_arr) + + # categories must be unique + def f(): + Categorical([1, 2], [1, 2, 2]) + + pytest.raises(ValueError, f) + + def f(): + Categorical(["a", "b"], ["a", "b", "b"]) + + pytest.raises(ValueError, f) + + # The default should be unordered + c1 = Categorical(["a", "b", "c", "a"]) + assert not c1.ordered + + # Categorical as input + c1 = Categorical(["a", "b", "c", "a"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1, categories=["a", "b", "c"]) + tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) + tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) + + # Series of dtype category + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(Series(c1)) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(Series(c1)) + tm.assert_categorical_equal(c1, c2) + + # Series + c1 = Categorical(["a", "b", "c", "a"]) + c2 = Categorical(Series(["a", "b", "c", "a"])) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(Series(["a", "b", "c", "a"]), + categories=["a", "b", "c", "d"]) + tm.assert_categorical_equal(c1, c2) + + # This should result in integer categories, not float! + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + assert is_integer_dtype(cat.categories) + + # https://github.com/pandas-dev/pandas/issues/3678 + cat = Categorical([np.nan, 1, 2, 3]) + assert is_integer_dtype(cat.categories) + + # this should result in floats + cat = Categorical([np.nan, 1, 2., 3]) + assert is_float_dtype(cat.categories) + + cat = Categorical([np.nan, 1., 2., 3.]) + assert is_float_dtype(cat.categories) + + # This doesn't work -> this would probably need some kind of "remember + # the original type" feature to try to cast the array interface result + # to... + + # vals = np.asarray(cat[cat.notna()]) + # assert is_integer_dtype(vals) + + # corner cases + cat = Categorical([1]) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + cat = Categorical(["a"]) + assert len(cat.categories) == 1 + assert cat.categories[0] == "a" + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + # Scalars should be converted to lists + cat = Categorical(1) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + # two arrays + # - when the first is an integer dtype and the second is not + # - when the resulting codes are all -1/NaN + with tm.assert_produces_warning(None): + c_old = Categorical([0, 1, 2, 0, 1, 2], + categories=["a", "b", "c"]) # noqa + + with tm.assert_produces_warning(None): + c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa + categories=[3, 4, 5]) + + # the next one are from the old docs + with tm.assert_produces_warning(None): + c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa + cat = Categorical([1, 2], categories=[1, 2, 3]) + + # this is a legitimate constructor + with tm.assert_produces_warning(None): + c = Categorical(np.array([], dtype='int64'), # noqa + categories=[3, 2, 1], ordered=True) + + def test_constructor_not_sequence(self): + # https://github.com/pandas-dev/pandas/issues/16022 + with pytest.raises(TypeError): + Categorical(['a', 'b'], categories='a') + + def test_constructor_with_null(self): + + # Cannot have NaN in categories + with pytest.raises(ValueError): + Categorical([np.nan, "a", "b", "c"], + categories=[np.nan, "a", "b", "c"]) + + with pytest.raises(ValueError): + Categorical([None, "a", "b", "c"], + categories=[None, "a", "b", "c"]) + + with pytest.raises(ValueError): + Categorical(DatetimeIndex(['nat', '20160101']), + categories=[NaT, Timestamp('20160101')]) + + def test_constructor_with_index(self): + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) + tm.assert_categorical_equal(ci.values, Categorical(ci)) + + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) + tm.assert_categorical_equal(ci.values, + Categorical(ci.astype(object), + categories=ci.categories)) + + def test_constructor_with_generator(self): + # This was raising an Error in isna(single_val).any() because isna + # returned a scalar for a generator + xrange = range + + exp = Categorical([0, 1, 2]) + cat = Categorical((x for x in [0, 1, 2])) + tm.assert_categorical_equal(cat, exp) + cat = Categorical(xrange(3)) + tm.assert_categorical_equal(cat, exp) + + # This uses xrange internally + from pandas.core.index import MultiIndex + MultiIndex.from_product([range(5), ['a', 'b', 'c']]) + + # check that categories accept generators and sequences + cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) + tm.assert_categorical_equal(cat, exp) + cat = Categorical([0, 1, 2], categories=xrange(3)) + tm.assert_categorical_equal(cat, exp) + + def test_constructor_with_datetimelike(self): + + # 12077 + # constructor wwth a datetimelike and NaT + + for dtl in [date_range('1995-01-01 00:00:00', periods=5, freq='s'), + date_range('1995-01-01 00:00:00', periods=5, + freq='s', tz='US/Eastern'), + timedelta_range('1 day', periods=5, freq='s')]: + + s = Series(dtl) + c = Categorical(s) + expected = type(dtl)(s) + expected.freq = None + tm.assert_index_equal(c.categories, expected) + tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8')) + + # with NaT + s2 = s.copy() + s2.iloc[-1] = NaT + c = Categorical(s2) + expected = type(dtl)(s2.dropna()) + expected.freq = None + tm.assert_index_equal(c.categories, expected) + + exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) + tm.assert_numpy_array_equal(c.codes, exp) + + result = repr(c) + assert 'NaT' in result + + def test_constructor_from_index_series_datetimetz(self): + idx = date_range('2015-01-01 10:00', freq='D', periods=3, + tz='US/Eastern') + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_from_index_series_timedelta(self): + idx = timedelta_range('1 days', freq='D', periods=3) + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_from_index_series_period(self): + idx = period_range('2015-01-01', freq='D', periods=3) + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_invariant(self): + # GH 14190 + vals = [ + np.array([1., 1.2, 1.8, np.nan]), + np.array([1, 2, 3], dtype='int64'), + ['a', 'b', 'c', np.nan], + [pd.Period('2014-01'), pd.Period('2014-02'), NaT], + [Timestamp('2014-01-01'), Timestamp('2014-01-02'), NaT], + [Timestamp('2014-01-01', tz='US/Eastern'), + Timestamp('2014-01-02', tz='US/Eastern'), NaT], + ] + for val in vals: + c = Categorical(val) + c2 = Categorical(c) + tm.assert_categorical_equal(c, c2) + + @pytest.mark.parametrize('ordered', [True, False]) + def test_constructor_with_dtype(self, ordered): + categories = ['b', 'a', 'c'] + dtype = CategoricalDtype(categories, ordered=ordered) + result = Categorical(['a', 'b', 'a', 'c'], dtype=dtype) + expected = Categorical(['a', 'b', 'a', 'c'], categories=categories, + ordered=ordered) + tm.assert_categorical_equal(result, expected) + assert result.ordered is ordered + + def test_constructor_dtype_and_others_raises(self): + dtype = CategoricalDtype(['a', 'b'], ordered=True) + with tm.assert_raises_regex(ValueError, "Cannot"): + Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype) + + with tm.assert_raises_regex(ValueError, "Cannot"): + Categorical(['a', 'b'], ordered=True, dtype=dtype) + + with tm.assert_raises_regex(ValueError, "Cannot"): + Categorical(['a', 'b'], ordered=False, dtype=dtype) + + @pytest.mark.parametrize('categories', [ + None, ['a', 'b'], ['a', 'c'], + ]) + @pytest.mark.parametrize('ordered', [True, False]) + def test_constructor_str_category(self, categories, ordered): + result = Categorical(['a', 'b'], categories=categories, + ordered=ordered, dtype='category') + expected = Categorical(['a', 'b'], categories=categories, + ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_constructor_str_unknown(self): + with tm.assert_raises_regex(ValueError, "Unknown `dtype`"): + Categorical([1, 2], dtype="foo") + + def test_constructor_from_categorical_with_dtype(self): + dtype = CategoricalDtype(['a', 'b', 'c'], ordered=True) + values = Categorical(['a', 'b', 'd']) + result = Categorical(values, dtype=dtype) + # We use dtype.categories, not values.categories + expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'], + ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_constructor_from_categorical_with_unknown_dtype(self): + dtype = CategoricalDtype(None, ordered=True) + values = Categorical(['a', 'b', 'd']) + result = Categorical(values, dtype=dtype) + # We use values.categories, not dtype.categories + expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'd'], + ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_constructor_from_categorical_string(self): + values = Categorical(['a', 'b', 'd']) + # use categories, ordered + result = Categorical(values, categories=['a', 'b', 'c'], ordered=True, + dtype='category') + expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'], + ordered=True) + tm.assert_categorical_equal(result, expected) + + # No string + result = Categorical(values, categories=['a', 'b', 'c'], ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_constructor_with_categorical_categories(self): + # GH17884 + expected = Categorical(['a', 'b'], categories=['a', 'b', 'c']) + + result = Categorical( + ['a', 'b'], categories=Categorical(['a', 'b', 'c'])) + tm.assert_categorical_equal(result, expected) + + result = Categorical( + ['a', 'b'], categories=CategoricalIndex(['a', 'b', 'c'])) + tm.assert_categorical_equal(result, expected) + + def test_from_codes(self): + + # too few categories + def f(): + Categorical.from_codes([1, 2], [1, 2]) + + pytest.raises(ValueError, f) + + # no int codes + def f(): + Categorical.from_codes(["a"], [1, 2]) + + pytest.raises(ValueError, f) + + # no unique categories + def f(): + Categorical.from_codes([0, 1, 2], ["a", "a", "b"]) + + pytest.raises(ValueError, f) + + # NaN categories included + def f(): + Categorical.from_codes([0, 1, 2], ["a", "b", np.nan]) + + pytest.raises(ValueError, f) + + # too negative + def f(): + Categorical.from_codes([-2, 1, 2], ["a", "b", "c"]) + + pytest.raises(ValueError, f) + + exp = Categorical(["a", "b", "c"], ordered=False) + res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"]) + tm.assert_categorical_equal(exp, res) + + # Not available in earlier numpy versions + if hasattr(np.random, "choice"): + codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) + Categorical.from_codes(codes, categories=["train", "test"]) + + def test_from_codes_with_categorical_categories(self): + # GH17884 + expected = Categorical(['a', 'b'], categories=['a', 'b', 'c']) + + result = Categorical.from_codes( + [0, 1], categories=Categorical(['a', 'b', 'c'])) + tm.assert_categorical_equal(result, expected) + + result = Categorical.from_codes( + [0, 1], categories=CategoricalIndex(['a', 'b', 'c'])) + tm.assert_categorical_equal(result, expected) + + # non-unique Categorical still raises + with pytest.raises(ValueError): + Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a'])) + + @pytest.mark.parametrize('dtype', [None, 'category']) + def test_from_inferred_categories(self, dtype): + cats = ['a', 'b'] + codes = np.array([0, 0, 1, 1], dtype='i8') + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes(codes, cats) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, 'category']) + def test_from_inferred_categories_sorts(self, dtype): + cats = ['b', 'a'] + codes = np.array([0, 1, 1, 1], dtype='i8') + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b']) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_dtype(self): + cats = ['a', 'b', 'd'] + codes = np.array([0, 1, 0, 2], dtype='i8') + dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical(['a', 'b', 'a', 'd'], + categories=['c', 'b', 'a'], + ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_coerces(self): + cats = ['1', '2', 'bad'] + codes = np.array([0, 0, 1, 2], dtype='i8') + dtype = CategoricalDtype([1, 2]) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical([1, 1, 2, np.nan]) + tm.assert_categorical_equal(result, expected) + + def test_construction_with_ordered(self): + # GH 9347, 9190 + cat = Categorical([0, 1, 2]) + assert not cat.ordered + cat = Categorical([0, 1, 2], ordered=False) + assert not cat.ordered + cat = Categorical([0, 1, 2], ordered=True) + assert cat.ordered + + @pytest.mark.xfail(reason="Imaginary values not supported in Categorical") + def test_constructor_imaginary(self): + values = [1, 2, 3 + 1j] + c1 = Categorical(values) + tm.assert_index_equal(c1.categories, Index(values)) + tm.assert_numpy_array_equal(np.array(c1), np.array(values)) diff --git a/pandas/tests/categorical/test_dtypes.py b/pandas/tests/categorical/test_dtypes.py new file mode 100644 index 0000000000000..8973d1196f6a9 --- /dev/null +++ b/pandas/tests/categorical/test_dtypes.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- + +import pytest + +import numpy as np + +import pandas.util.testing as tm +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas import Categorical, Index, CategoricalIndex, Series + + +class TestCategoricalDtypes(object): + + def test_is_equal_dtype(self): + + # test dtype comparisons between cats + + c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False) + c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False) + c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True) + assert c1.is_dtype_equal(c1) + assert c2.is_dtype_equal(c2) + assert c3.is_dtype_equal(c3) + assert c1.is_dtype_equal(c2) + assert not c1.is_dtype_equal(c3) + assert not c1.is_dtype_equal(Index(list('aabca'))) + assert not c1.is_dtype_equal(c1.astype(object)) + assert c1.is_dtype_equal(CategoricalIndex(c1)) + assert (c1.is_dtype_equal( + CategoricalIndex(c1, categories=list('cab')))) + assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True)) + + # GH 16659 + s1 = Series(c1) + s2 = Series(c2) + s3 = Series(c3) + assert c1.is_dtype_equal(s1) + assert c2.is_dtype_equal(s2) + assert c3.is_dtype_equal(s3) + assert c1.is_dtype_equal(s2) + assert not c1.is_dtype_equal(s3) + assert not c1.is_dtype_equal(s1.astype(object)) + + def test_set_dtype_same(self): + c = Categorical(['a', 'b', 'c']) + result = c._set_dtype(CategoricalDtype(['a', 'b', 'c'])) + tm.assert_categorical_equal(result, c) + + def test_set_dtype_new_categories(self): + c = Categorical(['a', 'b', 'c']) + result = c._set_dtype(CategoricalDtype(list('abcd'))) + tm.assert_numpy_array_equal(result.codes, c.codes) + tm.assert_index_equal(result.dtype.categories, Index(list('abcd'))) + + @pytest.mark.parametrize('values, categories, new_categories', [ + # No NaNs, same cats, same order + (['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],), + # No NaNs, same cats, different order + (['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],), + # Same, unsorted + (['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],), + # No NaNs, same cats, different order + (['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],), + # NaNs + (['a', 'b', 'c'], ['a', 'b'], ['a', 'b']), + (['a', 'b', 'c'], ['a', 'b'], ['b', 'a']), + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), + # Introduce NaNs + (['a', 'b', 'c'], ['a', 'b'], ['a']), + (['a', 'b', 'c'], ['a', 'b'], ['b']), + (['b', 'a', 'c'], ['a', 'b'], ['a']), + (['b', 'a', 'c'], ['a', 'b'], ['a']), + # No overlap + (['a', 'b', 'c'], ['a', 'b'], ['d', 'e']), + ]) + @pytest.mark.parametrize('ordered', [True, False]) + def test_set_dtype_many(self, values, categories, new_categories, + ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c._set_dtype(expected.dtype) + tm.assert_categorical_equal(result, expected) + + def test_set_dtype_no_overlap(self): + c = Categorical(['a', 'b', 'c'], ['d', 'e']) + result = c._set_dtype(CategoricalDtype(['a', 'b'])) + expected = Categorical([None, None, None], categories=['a', 'b']) + tm.assert_categorical_equal(result, expected) + + def test_codes_dtypes(self): + + # GH 8453 + result = Categorical(['foo', 'bar', 'baz']) + assert result.codes.dtype == 'int8' + + result = Categorical(['foo%05d' % i for i in range(400)]) + assert result.codes.dtype == 'int16' + + result = Categorical(['foo%05d' % i for i in range(40000)]) + assert result.codes.dtype == 'int32' + + # adding cats + result = Categorical(['foo', 'bar', 'baz']) + assert result.codes.dtype == 'int8' + result = result.add_categories(['foo%05d' % i for i in range(400)]) + assert result.codes.dtype == 'int16' + + # removing cats + result = result.remove_categories(['foo%05d' % i for i in range(300)]) + assert result.codes.dtype == 'int8' + + @pytest.mark.parametrize('ordered', [True, False]) + def test_astype(self, ordered): + # string + cat = Categorical(list('abbaaccc'), ordered=ordered) + result = cat.astype(object) + expected = np.array(cat) + tm.assert_numpy_array_equal(result, expected) + + msg = 'could not convert string to float' + with tm.assert_raises_regex(ValueError, msg): + cat.astype(float) + + # numeric + cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered) + result = cat.astype(object) + expected = np.array(cat, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = cat.astype(int) + expected = np.array(cat, dtype=np.int) + tm.assert_numpy_array_equal(result, expected) + + result = cat.astype(float) + expected = np.array(cat, dtype=np.float) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('dtype_ordered', [True, False]) + @pytest.mark.parametrize('cat_ordered', [True, False]) + def test_astype_category(self, dtype_ordered, cat_ordered): + # GH 10696/18593 + data = list('abcaacbab') + cat = Categorical(data, categories=list('bac'), ordered=cat_ordered) + + # standard categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = cat.astype(dtype) + expected = Categorical( + data, categories=cat.categories, ordered=dtype_ordered) + tm.assert_categorical_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(list('adc'), dtype_ordered) + result = cat.astype(dtype) + expected = Categorical(data, dtype=dtype) + tm.assert_categorical_equal(result, expected) + + if dtype_ordered is False: + # dtype='category' can't specify ordered, so only test once + result = cat.astype('category') + expected = cat + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/categorical/test_indexing.py b/pandas/tests/categorical/test_indexing.py new file mode 100644 index 0000000000000..9c27b1101e5ca --- /dev/null +++ b/pandas/tests/categorical/test_indexing.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +import pytest + +import numpy as np + +import pandas.util.testing as tm +from pandas import Categorical, Index, PeriodIndex +from pandas.tests.categorical.common import TestCategorical + + +class TestCategoricalIndexingWithFactor(TestCategorical): + + def test_getitem(self): + assert self.factor[0] == 'a' + assert self.factor[-1] == 'c' + + subf = self.factor[[0, 1, 2]] + tm.assert_numpy_array_equal(subf._codes, + np.array([0, 1, 1], dtype=np.int8)) + + subf = self.factor[np.asarray(self.factor) == 'c'] + tm.assert_numpy_array_equal(subf._codes, + np.array([2, 2, 2], dtype=np.int8)) + + def test_setitem(self): + + # int/positional + c = self.factor.copy() + c[0] = 'b' + assert c[0] == 'b' + c[-1] = 'a' + assert c[-1] == 'a' + + # boolean + c = self.factor.copy() + indexer = np.zeros(len(c), dtype='bool') + indexer[0] = True + indexer[-1] = True + c[indexer] = 'c' + expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], + ordered=True) + + tm.assert_categorical_equal(c, expected) + + +class TestCategoricalIndexing(object): + + def test_getitem_listlike(self): + + # GH 9469 + # properly coerce the input indexers + np.random.seed(1) + c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)) + result = c.codes[np.array([100000]).astype(np.int64)] + expected = c[np.array([100000]).astype(np.int64)].codes + tm.assert_numpy_array_equal(result, expected) + + def test_periodindex(self): + idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', + '2014-03', '2014-03'], freq='M') + + cat1 = Categorical(idx1) + str(cat1) + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8) + exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') + tm.assert_numpy_array_equal(cat1._codes, exp_arr) + tm.assert_index_equal(cat1.categories, exp_idx) + + idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', + '2014-03', '2014-01'], freq='M') + cat2 = Categorical(idx2, ordered=True) + str(cat2) + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8) + exp_idx2 = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') + tm.assert_numpy_array_equal(cat2._codes, exp_arr) + tm.assert_index_equal(cat2.categories, exp_idx2) + + idx3 = PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09', + '2013-08', '2013-07', '2013-05'], freq='M') + cat3 = Categorical(idx3, ordered=True) + exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8) + exp_idx = PeriodIndex(['2013-05', '2013-07', '2013-08', '2013-09', + '2013-10', '2013-11', '2013-12'], freq='M') + tm.assert_numpy_array_equal(cat3._codes, exp_arr) + tm.assert_index_equal(cat3.categories, exp_idx) + + def test_categories_assigments(self): + s = Categorical(["a", "b", "c", "a"]) + exp = np.array([1, 2, 3, 1], dtype=np.int64) + s.categories = [1, 2, 3] + tm.assert_numpy_array_equal(s.__array__(), exp) + tm.assert_index_equal(s.categories, Index([1, 2, 3])) + + # lengthen + def f(): + s.categories = [1, 2, 3, 4] + + pytest.raises(ValueError, f) + + # shorten + def f(): + s.categories = [1, 2] + + pytest.raises(ValueError, f) diff --git a/pandas/tests/categorical/test_missing.py b/pandas/tests/categorical/test_missing.py new file mode 100644 index 0000000000000..5133c97d8b590 --- /dev/null +++ b/pandas/tests/categorical/test_missing.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +import numpy as np +import pytest + +import pandas.util.testing as tm +from pandas import Categorical, Index, isna +from pandas.compat import lrange +from pandas.core.dtypes.dtypes import CategoricalDtype + + +class TestCategoricalMissing(object): + + def test_na_flags_int_categories(self): + # #1457 + + categories = lrange(10) + labels = np.random.randint(0, 10, 20) + labels[::5] = -1 + + cat = Categorical(labels, categories, fastpath=True) + repr(cat) + + tm.assert_numpy_array_equal(isna(cat), labels == -1) + + def test_nan_handling(self): + + # Nans are represented as -1 in codes + c = Categorical(["a", "b", np.nan, "a"]) + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], + dtype=np.int8)) + c[1] = np.nan + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], + dtype=np.int8)) + + # Adding nan to categories should make assigned nan point to the + # category! + c = Categorical(["a", "b", np.nan, "a"]) + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], + dtype=np.int8)) + + def test_set_dtype_nans(self): + c = Categorical(['a', 'b', np.nan]) + result = c._set_dtype(CategoricalDtype(['a', 'c'])) + tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], + dtype='int8')) + + def test_set_item_nan(self): + cat = Categorical([1, 2, 3]) + cat[1] = np.nan + + exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(cat, exp) + + @pytest.mark.parametrize('fillna_kwargs, msg', [ + (dict(value=1, method='ffill'), + "Cannot specify both 'value' and 'method'."), + (dict(), + "Must specify a fill 'value' or 'method'."), + (dict(method='bad'), + "Invalid fill method. Expecting .* bad"), + ]) + def test_fillna_raises(self, fillna_kwargs, msg): + # https://github.com/pandas-dev/pandas/issues/19682 + cat = Categorical([1, 2, 3]) + + with tm.assert_raises_regex(ValueError, msg): + cat.fillna(**fillna_kwargs) diff --git a/pandas/tests/categorical/test_operators.py b/pandas/tests/categorical/test_operators.py new file mode 100644 index 0000000000000..fa8bb817616e4 --- /dev/null +++ b/pandas/tests/categorical/test_operators.py @@ -0,0 +1,293 @@ +# -*- coding: utf-8 -*- + +import pytest + +import pandas as pd +import numpy as np + +import pandas.util.testing as tm +from pandas import Categorical, Series, DataFrame, date_range +from pandas.tests.categorical.common import TestCategorical + + +class TestCategoricalOpsWithFactor(TestCategorical): + + def test_categories_none_comparisons(self): + factor = Categorical(['a', 'b', 'b', 'a', + 'a', 'c', 'c', 'c'], ordered=True) + tm.assert_categorical_equal(factor, self.factor) + + def test_comparisons(self): + + result = self.factor[self.factor == 'a'] + expected = self.factor[np.asarray(self.factor) == 'a'] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor != 'a'] + expected = self.factor[np.asarray(self.factor) != 'a'] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor < 'c'] + expected = self.factor[np.asarray(self.factor) < 'c'] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor > 'a'] + expected = self.factor[np.asarray(self.factor) > 'a'] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor >= 'b'] + expected = self.factor[np.asarray(self.factor) >= 'b'] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor <= 'b'] + expected = self.factor[np.asarray(self.factor) <= 'b'] + tm.assert_categorical_equal(result, expected) + + n = len(self.factor) + + other = self.factor[np.random.permutation(n)] + result = self.factor == other + expected = np.asarray(self.factor) == np.asarray(other) + tm.assert_numpy_array_equal(result, expected) + + result = self.factor == 'd' + expected = np.repeat(False, len(self.factor)) + tm.assert_numpy_array_equal(result, expected) + + # comparisons with categoricals + cat_rev = Categorical( + ["a", "b", "c"], categories=["c", "b", "a"], ordered=True) + cat_rev_base = Categorical( + ["b", "b", "b"], categories=["c", "b", "a"], ordered=True) + cat = Categorical(["a", "b", "c"], ordered=True) + cat_base = Categorical( + ["b", "b", "b"], categories=cat.categories, ordered=True) + + # comparisons need to take categories ordering into account + res_rev = cat_rev > cat_rev_base + exp_rev = np.array([True, False, False]) + tm.assert_numpy_array_equal(res_rev, exp_rev) + + res_rev = cat_rev < cat_rev_base + exp_rev = np.array([False, False, True]) + tm.assert_numpy_array_equal(res_rev, exp_rev) + + res = cat > cat_base + exp = np.array([False, False, True]) + tm.assert_numpy_array_equal(res, exp) + + # Only categories with same categories can be compared + def f(): + cat > cat_rev + + pytest.raises(TypeError, f) + + cat_rev_base2 = Categorical( + ["b", "b", "b"], categories=["c", "b", "a", "d"]) + + def f(): + cat_rev > cat_rev_base2 + + pytest.raises(TypeError, f) + + # Only categories with same ordering information can be compared + cat_unorderd = cat.set_ordered(False) + assert not (cat > cat).any() + + def f(): + cat > cat_unorderd + + pytest.raises(TypeError, f) + + # comparison (in both directions) with Series will raise + s = Series(["b", "b", "b"]) + pytest.raises(TypeError, lambda: cat > s) + pytest.raises(TypeError, lambda: cat_rev > s) + pytest.raises(TypeError, lambda: s < cat) + pytest.raises(TypeError, lambda: s < cat_rev) + + # comparison with numpy.array will raise in both direction, but only on + # newer numpy versions + a = np.array(["b", "b", "b"]) + pytest.raises(TypeError, lambda: cat > a) + pytest.raises(TypeError, lambda: cat_rev > a) + + # Make sure that unequal comparison take the categories order in + # account + cat_rev = Categorical( + list("abc"), categories=list("cba"), ordered=True) + exp = np.array([True, False, False]) + res = cat_rev > "b" + tm.assert_numpy_array_equal(res, exp) + + +class TestCategoricalOps(object): + + def test_datetime_categorical_comparison(self): + dt_cat = Categorical(date_range('2014-01-01', periods=3), ordered=True) + tm.assert_numpy_array_equal(dt_cat > dt_cat[0], + np.array([False, True, True])) + tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, + np.array([False, True, True])) + + def test_reflected_comparison_with_scalars(self): + # GH8658 + cat = Categorical([1, 2, 3], ordered=True) + tm.assert_numpy_array_equal(cat > cat[0], + np.array([False, True, True])) + tm.assert_numpy_array_equal(cat[0] < cat, + np.array([False, True, True])) + + def test_comparison_with_unknown_scalars(self): + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal + cat = Categorical([1, 2, 3], ordered=True) + + pytest.raises(TypeError, lambda: cat < 4) + pytest.raises(TypeError, lambda: cat > 4) + pytest.raises(TypeError, lambda: 4 < cat) + pytest.raises(TypeError, lambda: 4 > cat) + + tm.assert_numpy_array_equal(cat == 4, + np.array([False, False, False])) + tm.assert_numpy_array_equal(cat != 4, + np.array([True, True, True])) + + @pytest.mark.parametrize('data,reverse,base', [ + (list("abc"), list("cba"), list("bbb")), + ([1, 2, 3], [3, 2, 1], [2, 2, 2])] + ) + def test_comparisons(self, data, reverse, base): + cat_rev = Series( + Categorical(data, categories=reverse, ordered=True)) + cat_rev_base = Series( + Categorical(base, categories=reverse, ordered=True)) + cat = Series(Categorical(data, ordered=True)) + cat_base = Series( + Categorical(base, categories=cat.cat.categories, ordered=True)) + s = Series(base) + a = np.array(base) + + # comparisons need to take categories ordering into account + res_rev = cat_rev > cat_rev_base + exp_rev = Series([True, False, False]) + tm.assert_series_equal(res_rev, exp_rev) + + res_rev = cat_rev < cat_rev_base + exp_rev = Series([False, False, True]) + tm.assert_series_equal(res_rev, exp_rev) + + res = cat > cat_base + exp = Series([False, False, True]) + tm.assert_series_equal(res, exp) + + scalar = base[1] + res = cat > scalar + exp = Series([False, False, True]) + exp2 = cat.values > scalar + tm.assert_series_equal(res, exp) + tm.assert_numpy_array_equal(res.values, exp2) + res_rev = cat_rev > scalar + exp_rev = Series([True, False, False]) + exp_rev2 = cat_rev.values > scalar + tm.assert_series_equal(res_rev, exp_rev) + tm.assert_numpy_array_equal(res_rev.values, exp_rev2) + + # Only categories with same categories can be compared + def f(): + cat > cat_rev + + pytest.raises(TypeError, f) + + # categorical cannot be compared to Series or numpy array, and also + # not the other way around + pytest.raises(TypeError, lambda: cat > s) + pytest.raises(TypeError, lambda: cat_rev > s) + pytest.raises(TypeError, lambda: cat > a) + pytest.raises(TypeError, lambda: cat_rev > a) + + pytest.raises(TypeError, lambda: s < cat) + pytest.raises(TypeError, lambda: s < cat_rev) + + pytest.raises(TypeError, lambda: a < cat) + pytest.raises(TypeError, lambda: a < cat_rev) + + @pytest.mark.parametrize('ctor', [ + lambda *args, **kwargs: Categorical(*args, **kwargs), + lambda *args, **kwargs: Series(Categorical(*args, **kwargs)), + ]) + def test_unordered_different_order_equal(self, ctor): + # https://github.com/pandas-dev/pandas/issues/16014 + c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False) + c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False) + assert (c1 == c2).all() + + c1 = ctor(['a', 'b'], categories=['a', 'b'], ordered=False) + c2 = ctor(['b', 'a'], categories=['b', 'a'], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False) + c2 = ctor(['b', 'b'], categories=['b', 'a'], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(['a', 'a'], categories=['a', 'b'], ordered=False) + c2 = ctor(['a', 'b'], categories=['b', 'a'], ordered=False) + result = c1 == c2 + tm.assert_numpy_array_equal(np.array(result), np.array([True, False])) + + def test_unordered_different_categories_raises(self): + c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False) + c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False) + with tm.assert_raises_regex(TypeError, + "Categoricals can only be compared"): + c1 == c2 + + def test_compare_different_lengths(self): + c1 = Categorical([], categories=['a', 'b']) + c2 = Categorical([], categories=['a']) + msg = "Categories are different lengths" + with tm.assert_raises_regex(TypeError, msg): + c1 == c2 + + def test_compare_unordered_different_order(self): + # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- + # 349290078 + a = pd.Categorical(['a'], categories=['a', 'b']) + b = pd.Categorical(['b'], categories=['b', 'a']) + assert not a.equals(b) + + def test_numeric_like_ops(self): + + df = DataFrame({'value': np.random.randint(0, 10000, 100)}) + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=['value'], ascending=True) + df['value_group'] = pd.cut(df.value, range(0, 10500, 500), + right=False, labels=cat_labels) + + # numeric ops should not succeed + for op in ['__add__', '__sub__', '__mul__', '__truediv__']: + pytest.raises(TypeError, + lambda: getattr(df, op)(df)) + + # reduction ops should not succeed (unless specifically defined, e.g. + # min/max) + s = df['value_group'] + for op in ['kurt', 'skew', 'var', 'std', 'mean', 'sum', 'median']: + pytest.raises(TypeError, + lambda: getattr(s, op)(numeric_only=False)) + + # mad technically works because it takes always the numeric data + + # numpy ops + s = Series(Categorical([1, 2, 3, 4])) + pytest.raises(TypeError, lambda: np.sum(s)) + + # numeric ops on a Series + for op in ['__add__', '__sub__', '__mul__', '__truediv__']: + pytest.raises(TypeError, lambda: getattr(s, op)(2)) + + # invalid ufunc + pytest.raises(TypeError, lambda: np.log(s)) diff --git a/pandas/tests/categorical/test_repr.py b/pandas/tests/categorical/test_repr.py new file mode 100644 index 0000000000000..0cadf66b24d46 --- /dev/null +++ b/pandas/tests/categorical/test_repr.py @@ -0,0 +1,517 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from pandas import (Categorical, Series, CategoricalIndex, date_range, + period_range, timedelta_range) +from pandas.compat import u, PY3 +from pandas.core.config import option_context +from pandas.tests.categorical.common import TestCategorical + + +class TestCategoricalReprWithFactor(TestCategorical): + + def test_print(self): + expected = ["[a, b, b, a, a, c, c, c]", + "Categories (3, object): [a < b < c]"] + expected = "\n".join(expected) + actual = repr(self.factor) + assert actual == expected + + +class TestCategoricalRepr(object): + + def test_big_print(self): + factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'], + fastpath=True) + expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600", + "Categories (3, object): [a, b, c]"] + expected = "\n".join(expected) + + actual = repr(factor) + + assert actual == expected + + def test_empty_print(self): + factor = Categorical([], ["a", "b", "c"]) + expected = ("[], Categories (3, object): [a, b, c]") + # hack because array_repr changed in numpy > 1.6.x + actual = repr(factor) + assert actual == expected + + assert expected == actual + factor = Categorical([], ["a", "b", "c"], ordered=True) + expected = ("[], Categories (3, object): [a < b < c]") + actual = repr(factor) + assert expected == actual + + factor = Categorical([], []) + expected = ("[], Categories (0, object): []") + assert expected == repr(factor) + + def test_print_none_width(self): + # GH10087 + a = Series(Categorical([1, 2, 3, 4])) + exp = u("0 1\n1 2\n2 3\n3 4\n" + + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") + + with option_context("display.width", None): + assert exp == repr(a) + + def test_unicode_print(self): + if PY3: + _rep = repr + else: + _rep = unicode # noqa + + c = Categorical(['aaaaa', 'bb', 'cccc'] * 20) + expected = u"""\ +[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] +Length: 60 +Categories (3, object): [aaaaa, bb, cccc]""" + + assert _rep(c) == expected + + c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) + expected = u"""\ +[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] +Length: 60 +Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa + + assert _rep(c) == expected + + # unicode option should not affect to Categorical, as it doesn't care + # the repr width + with option_context('display.unicode.east_asian_width', True): + + c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) + expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] +Length: 60 +Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa + + assert _rep(c) == expected + + def test_categorical_repr(self): + c = Categorical([1, 2, 3]) + exp = """[1, 2, 3] +Categories (3, int64): [1, 2, 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + exp = """[1, 2, 3, 1, 2, 3] +Categories (3, int64): [1, 2, 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 4, 5] * 10) + exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] +Length: 50 +Categories (5, int64): [1, 2, 3, 4, 5]""" + + assert repr(c) == exp + + c = Categorical(np.arange(20)) + exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] +Length: 20 +Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" + + assert repr(c) == exp + + def test_categorical_repr_ordered(self): + c = Categorical([1, 2, 3], ordered=True) + exp = """[1, 2, 3] +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) + exp = """[1, 2, 3, 1, 2, 3] +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True) + exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] +Length: 50 +Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" + + assert repr(c) == exp + + c = Categorical(np.arange(20), ordered=True) + exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] +Length: 20 +Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" + + assert repr(c) == exp + + def test_categorical_repr_datetime(self): + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + c = Categorical(idx) + + # TODO(wesm): exceeding 80 characters in the console is not good + # behavior + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]""") + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]") + + assert repr(c) == exp + + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + c = Categorical(idx) + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]") + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " + "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " + "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]") + + assert repr(c) == exp + + def test_categorical_repr_datetime_ordered(self): + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + + assert repr(c) == exp + + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa + + assert repr(c) == exp + + def test_categorical_repr_period(self): + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + c = Categorical(idx) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa + + assert repr(c) == exp + + idx = period_range('2011-01', freq='M', periods=5) + c = Categorical(idx) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa + + assert repr(c) == exp + + def test_categorical_repr_period_ordered(self): + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa + + assert repr(c) == exp + + idx = period_range('2011-01', freq='M', periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa + + assert repr(c) == exp + + def test_categorical_repr_timedelta(self): + idx = timedelta_range('1 days', periods=5) + c = Categorical(idx) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa + + assert repr(c) == exp + + idx = timedelta_range('1 hours', periods=20) + c = Categorical(idx) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 20 +Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, + 18 days 01:00:00, 19 days 01:00:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 40 +Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, + 18 days 01:00:00, 19 days 01:00:00]""" # noqa + + assert repr(c) == exp + + def test_categorical_repr_timedelta_ordered(self): + idx = timedelta_range('1 days', periods=5) + c = Categorical(idx, ordered=True) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa + + assert repr(c) == exp + + idx = timedelta_range('1 hours', periods=20) + c = Categorical(idx, ordered=True) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 20 +Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 40 +Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa + + assert repr(c) == exp + + def test_categorical_index_repr(self): + idx = CategoricalIndex(Categorical([1, 2, 3])) + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa + assert repr(idx) == exp + + i = CategoricalIndex(Categorical(np.arange(10))) + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + def test_categorical_index_repr_ordered(self): + i = CategoricalIndex(Categorical([1, 2, 3], ordered=True)) + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa + assert repr(i) == exp + + i = CategoricalIndex(Categorical(np.arange(10), ordered=True)) + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa + assert repr(i) == exp + + def test_categorical_index_repr_datetime(self): + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', + '2011-01-01 11:00:00', '2011-01-01 12:00:00', + '2011-01-01 13:00:00'], + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + def test_categorical_index_repr_datetime_ordered(self): + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', + '2011-01-01 11:00:00', '2011-01-01 12:00:00', + '2011-01-01 13:00:00'], + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp + + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp + + i = CategoricalIndex(Categorical(idx.append(idx), ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', + '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', + '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp + + def test_categorical_index_repr_period(self): + # test all length + idx = period_range('2011-01-01 09:00', freq='H', periods=1) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + idx = period_range('2011-01-01 09:00', freq='H', periods=2) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + idx = period_range('2011-01-01 09:00', freq='H', periods=3) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + i = CategoricalIndex(Categorical(idx.append(idx))) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', + '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', + '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + idx = period_range('2011-01', freq='M', periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + def test_categorical_index_repr_period_ordered(self): + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp + + idx = period_range('2011-01', freq='M', periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa + assert repr(i) == exp + + def test_categorical_index_repr_timedelta(self): + idx = timedelta_range('1 days', periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + idx = timedelta_range('1 hours', periods=10) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', + '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', + '9 days 01:00:00'], + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + def test_categorical_index_repr_timedelta_ordered(self): + idx = timedelta_range('1 days', periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa + assert repr(i) == exp + + idx = timedelta_range('1 hours', periods=10) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', + '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', + '9 days 01:00:00'], + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp diff --git a/pandas/tests/categorical/test_sorting.py b/pandas/tests/categorical/test_sorting.py new file mode 100644 index 0000000000000..88edb6c8f1348 --- /dev/null +++ b/pandas/tests/categorical/test_sorting.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +import pandas.util.testing as tm +from pandas import Categorical, Index + + +class TestCategoricalSort(object): + + def test_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(c.argsort(ascending=True), expected, + check_dtype=False) + + expected = expected[::-1] + tm.assert_numpy_array_equal(c.argsort(ascending=False), expected, + check_dtype=False) + + def test_numpy_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(np.argsort(c), expected, + check_dtype=False) + + tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected, + check_dtype=False) + + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + c, axis=0) + + msg = "the 'order' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + c, order='C') + + def test_sort_values(self): + + # unordered cats are sortable + cat = Categorical(["a", "b", "b", "a"], ordered=False) + cat.sort_values() + + cat = Categorical(["a", "c", "b", "d"], ordered=True) + + # sort_values + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + cat = Categorical(["a", "c", "b", "d"], + categories=["a", "b", "c", "d"], ordered=True) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + res = cat.sort_values(ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + # sort (inplace order) + cat1 = cat.copy() + cat1.sort_values(inplace=True) + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(cat1.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + # reverse + cat = Categorical(["a", "c", "c", "b", "d"], ordered=True) + res = cat.sort_values(ascending=False) + exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) + + def test_sort_values_na_position(self): + # see gh-12882 + cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True) + exp_categories = Index([2, 5]) + + exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) + res = cat.sort_values() # default arguments + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0]) + res = cat.sort_values(ascending=True, na_position='first') + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0]) + res = cat.sort_values(ascending=False, na_position='first') + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) + res = cat.sort_values(ascending=True, na_position='last') + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan]) + res = cat.sort_values(ascending=False, na_position='last') + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) + res = cat.sort_values(ascending=False, na_position='last') + exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) + + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) + res = cat.sort_values(ascending=False, na_position='first') + exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) diff --git a/pandas/tests/categorical/test_subclass.py b/pandas/tests/categorical/test_subclass.py new file mode 100644 index 0000000000000..4060d2ebf633a --- /dev/null +++ b/pandas/tests/categorical/test_subclass.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +from pandas import Categorical + +import pandas.util.testing as tm + + +class TestCategoricalSubclassing(object): + + def test_constructor(self): + sc = tm.SubclassedCategorical(['a', 'b', 'c']) + assert isinstance(sc, tm.SubclassedCategorical) + tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c'])) + + def test_from_codes(self): + sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c']) + assert isinstance(sc, tm.SubclassedCategorical) + exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c']) + tm.assert_categorical_equal(sc, exp) + + def test_map(self): + sc = tm.SubclassedCategorical(['a', 'b', 'c']) + res = sc.map(lambda x: x.upper()) + assert isinstance(res, tm.SubclassedCategorical) + exp = Categorical(['A', 'B', 'C']) + tm.assert_categorical_equal(res, exp) diff --git a/pandas/tests/categorical/test_warnings.py b/pandas/tests/categorical/test_warnings.py new file mode 100644 index 0000000000000..91278580254aa --- /dev/null +++ b/pandas/tests/categorical/test_warnings.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- + +import pytest + +import pandas.util.testing as tm + + +class TestCategoricalWarnings(object): + def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip('IPython', minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; c = Categorical([])" + ip.run_code(code) + with tm.assert_produces_warning(None): + with provisionalcompleter('ignore'): + list(ip.Completer.completions('c.', 1)) diff --git a/pandas/tests/computation/__init__.py b/pandas/tests/computation/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py new file mode 100644 index 0000000000000..c25ef4bf38cab --- /dev/null +++ b/pandas/tests/computation/test_compat.py @@ -0,0 +1,47 @@ +import pytest +from distutils.version import LooseVersion + +import pandas as pd + +from pandas.core.computation.engines import _engines +import pandas.core.computation.expr as expr +from pandas.core.computation.check import _MIN_NUMEXPR_VERSION + + +def test_compat(): + # test we have compat with our version of nu + + from pandas.core.computation.check import _NUMEXPR_INSTALLED + try: + import numexpr as ne + ver = ne.__version__ + if LooseVersion(ver) < LooseVersion(_MIN_NUMEXPR_VERSION): + assert not _NUMEXPR_INSTALLED + else: + assert _NUMEXPR_INSTALLED + except ImportError: + pytest.skip("not testing numexpr version compat") + + +@pytest.mark.parametrize('engine', _engines) +@pytest.mark.parametrize('parser', expr._parsers) +def test_invalid_numexpr_version(engine, parser): + def testit(): + a, b = 1, 2 # noqa + res = pd.eval('a + b', engine=engine, parser=parser) + assert res == 3 + + if engine == 'numexpr': + try: + import numexpr as ne + except ImportError: + pytest.skip("no numexpr") + else: + if (LooseVersion(ne.__version__) < + LooseVersion(_MIN_NUMEXPR_VERSION)): + with pytest.raises(ImportError): + testit() + else: + testit() + else: + testit() diff --git a/pandas/computation/tests/test_eval.py b/pandas/tests/computation/test_eval.py similarity index 71% rename from pandas/computation/tests/test_eval.py rename to pandas/tests/computation/test_eval.py index ada714c8ac52e..07ba0b681418e 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1,43 +1,61 @@ - -# flake8: noqa - import warnings +from warnings import catch_warnings import operator from itertools import product -from distutils.version import LooseVersion import pytest from numpy.random import randn, rand, randint import numpy as np -from pandas.types.common import is_list_like, is_scalar +from pandas.core.dtypes.common import is_bool, is_list_like, is_scalar import pandas as pd from pandas.core import common as com +from pandas.errors import PerformanceWarning from pandas import DataFrame, Series, Panel, date_range from pandas.util.testing import makeCustomDataframe as mkdf -from pandas.computation import pytables -from pandas.computation.engines import _engines, NumExprClobberingError -from pandas.computation.expr import PythonExprVisitor, PandasExprVisitor -from pandas.computation.ops import (_binary_ops_dict, - _special_case_arith_ops_syms, - _arith_ops_syms, _bool_ops_syms, - _unary_math_ops, _binary_math_ops) - -import pandas.computation.expr as expr +from pandas.core.computation import pytables +from pandas.core.computation.engines import _engines, NumExprClobberingError +from pandas.core.computation.expr import PythonExprVisitor, PandasExprVisitor +from pandas.core.computation.expressions import ( + _USE_NUMEXPR, _NUMEXPR_INSTALLED) +from pandas.core.computation.ops import ( + _binary_ops_dict, + _special_case_arith_ops_syms, + _arith_ops_syms, _bool_ops_syms, + _unary_math_ops, _binary_math_ops) + +import pandas.core.computation.expr as expr import pandas.util.testing as tm -import pandas.lib as lib +import pandas.util._test_decorators as td from pandas.util.testing import (assert_frame_equal, randbool, - assertRaisesRegexp, assert_numpy_array_equal, - assert_produces_warning, assert_series_equal, - slow) -from pandas.compat import PY3, u, reduce + assert_numpy_array_equal, assert_series_equal, + assert_produces_warning) +from pandas.compat import PY3, reduce _series_frame_incompatible = _bool_ops_syms _scalar_skip = 'in', 'not in' +@pytest.fixture(params=( + pytest.param(engine, + marks=pytest.mark.skipif( + engine == 'numexpr' and not _USE_NUMEXPR, + reason='numexpr enabled->{enabled}, ' + 'installed->{installed}'.format( + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED))) + for engine in _engines)) # noqa +def engine(request): + return request.param + + +@pytest.fixture(params=expr._parsers) +def parser(request): + return request.param + + def engine_has_neg_frac(engine): return _engines[engine].has_neg_frac @@ -48,7 +66,8 @@ def _eval_single_bin(lhs, cmp1, rhs, engine): try: return c(lhs, rhs) except ValueError as e: - if str(e).startswith('negative number cannot be raised to a fractional power'): + if str(e).startswith('negative number cannot be ' + 'raised to a fractional power'): return np.nan raise return c(lhs, rhs) @@ -56,14 +75,14 @@ def _eval_single_bin(lhs, cmp1, rhs, engine): def _series_and_2d_ndarray(lhs, rhs): return ((isinstance(lhs, Series) and - isinstance(rhs, np.ndarray) and rhs.ndim > 1) - or (isinstance(rhs, Series) and - isinstance(lhs, np.ndarray) and lhs.ndim > 1)) + isinstance(rhs, np.ndarray) and rhs.ndim > 1) or + (isinstance(rhs, Series) and + isinstance(lhs, np.ndarray) and lhs.ndim > 1)) def _series_and_frame(lhs, rhs): - return ((isinstance(lhs, Series) and isinstance(rhs, DataFrame)) - or (isinstance(rhs, Series) and isinstance(lhs, DataFrame))) + return ((isinstance(lhs, Series) and isinstance(rhs, DataFrame)) or + (isinstance(rhs, Series) and isinstance(lhs, DataFrame))) def _bool_and_frame(lhs, rhs): @@ -78,20 +97,18 @@ def _is_py3_complex_incompat(result, expected): _good_arith_ops = com.difference(_arith_ops_syms, _special_case_arith_ops_syms) -class TestEvalNumexprPandas(tm.TestCase): +@td.skip_if_no_ne +class TestEvalNumexprPandas(object): @classmethod - def setUpClass(cls): - super(TestEvalNumexprPandas, cls).setUpClass() - tm.skip_if_no_ne() + def setup_class(cls): import numexpr as ne cls.ne = ne cls.engine = 'numexpr' cls.parser = 'pandas' @classmethod - def tearDownClass(cls): - super(TestEvalNumexprPandas, cls).tearDownClass() + def teardown_class(cls): del cls.engine, cls.parser if hasattr(cls, 'ne'): del cls.ne @@ -120,16 +137,16 @@ def setup_ops(self): self.arith_ops = _good_arith_ops self.unary_ops = '-', '~', 'not ' - def setUp(self): + def setup_method(self, method): self.setup_ops() self.setup_data() self.current_engines = filter(lambda x: x != self.engine, _engines) - def tearDown(self): + def teardown_method(self, method): del self.lhses, self.rhses, self.scalar_rhses, self.scalar_lhses del self.pandas_rhses, self.pandas_lhses, self.current_engines - @slow + @pytest.mark.slow def test_complex_cmp_ops(self): cmp_ops = ('!=', '==', '<=', '>=', '<', '>') cmp2_ops = ('>', '<') @@ -146,7 +163,7 @@ def test_simple_cmp_ops(self): for lhs, rhs, cmp_op in product(bool_lhses, bool_rhses, self.cmp_ops): self.check_simple_cmp_op(lhs, cmp_op, rhs) - @slow + @pytest.mark.slow def test_binary_arith_ops(self): for lhs, op, rhs in product(self.lhses, self.arith_ops, self.rhses): self.check_binary_arith_op(lhs, op, rhs) @@ -159,24 +176,23 @@ def test_floor_division(self): for lhs, rhs in product(self.lhses, self.rhses): self.check_floor_division(lhs, '//', rhs) + @td.skip_if_windows def test_pow(self): - tm._skip_if_windows() - # odd failure on win32 platform, so skip for lhs, rhs in product(self.lhses, self.rhses): self.check_pow(lhs, '**', rhs) - @slow + @pytest.mark.slow def test_single_invert_op(self): for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses): self.check_single_invert_op(lhs, op, rhs) - @slow + @pytest.mark.slow def test_compound_invert_op(self): for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses): self.check_compound_invert_op(lhs, op, rhs) - @slow + @pytest.mark.slow def test_chained_cmp_op(self): mids = self.lhses cmp_ops = '<', '>' @@ -192,7 +208,7 @@ def check_equal(self, result, expected): elif isinstance(result, np.ndarray): tm.assert_numpy_array_equal(result, expected) else: - self.assertEqual(result, expected) + assert result == expected def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): skip_these = _scalar_skip @@ -202,27 +218,30 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): scalar_with_in_notin = (is_scalar(rhs) and (cmp1 in skip_these or cmp2 in skip_these)) if scalar_with_in_notin: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): pd.eval(ex, engine=self.engine, parser=self.parser) - self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, - parser=self.parser, local_dict={'lhs': lhs, - 'rhs': rhs}) + with pytest.raises(TypeError): + pd.eval(ex, engine=self.engine, parser=self.parser, + local_dict={'lhs': lhs, 'rhs': rhs}) else: lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine) rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine) - if (isinstance(lhs_new, Series) and isinstance(rhs_new, DataFrame) - and binop in _series_frame_incompatible): + if (isinstance(lhs_new, Series) and + isinstance(rhs_new, DataFrame) and + binop in _series_frame_incompatible): pass # TODO: the code below should be added back when left and right # hand side bool ops are fixed. - + # # try: - # self.assertRaises(Exception, pd.eval, ex, - #local_dict={'lhs': lhs, 'rhs': rhs}, - # engine=self.engine, parser=self.parser) + # pytest.raises(Exception, pd.eval, ex, + # local_dict={'lhs': lhs, 'rhs': rhs}, + # engine=self.engine, parser=self.parser) # except AssertionError: - #import ipdb; ipdb.set_trace() - # raise + # import ipdb + # + # ipdb.set_trace() + # raise else: expected = _eval_single_bin( lhs_new, binop, rhs_new, self.engine) @@ -230,7 +249,6 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): self.check_equal(result, expected) def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): - skip_these = _scalar_skip def check_operands(left, right, cmp_op): return _eval_single_bin(left, cmp_op, right, self.engine) @@ -253,9 +271,9 @@ def check_operands(left, right, cmp_op): def check_simple_cmp_op(self, lhs, cmp1, rhs): ex = 'lhs {0} rhs'.format(cmp1) if cmp1 in ('in', 'not in') and not is_list_like(rhs): - self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, - parser=self.parser, local_dict={'lhs': lhs, - 'rhs': rhs}) + pytest.raises(TypeError, pd.eval, ex, engine=self.engine, + parser=self.parser, local_dict={'lhs': lhs, + 'rhs': rhs}) else: expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) result = pd.eval(ex, engine=self.engine, parser=self.parser) @@ -308,15 +326,16 @@ def check_floor_division(self, lhs, arith1, rhs): expected = lhs // rhs self.check_equal(res, expected) else: - self.assertRaises(TypeError, pd.eval, ex, local_dict={'lhs': lhs, - 'rhs': rhs}, - engine=self.engine, parser=self.parser) + pytest.raises(TypeError, pd.eval, ex, + local_dict={'lhs': lhs, 'rhs': rhs}, + engine=self.engine, parser=self.parser) def get_expected_pow_result(self, lhs, rhs): try: expected = _eval_single_bin(lhs, '**', rhs, self.engine) except ValueError as e: - if str(e).startswith('negative number cannot be raised to a fractional power'): + if str(e).startswith('negative number cannot be ' + 'raised to a fractional power'): if self.engine == 'python': pytest.skip(str(e)) else: @@ -332,8 +351,8 @@ def check_pow(self, lhs, arith1, rhs): if (is_scalar(lhs) and is_scalar(rhs) and _is_py3_complex_incompat(result, expected)): - self.assertRaises(AssertionError, tm.assert_numpy_array_equal, - result, expected) + pytest.raises(AssertionError, tm.assert_numpy_array_equal, + result, expected) else: tm.assert_almost_equal(result, expected) @@ -355,7 +374,6 @@ def check_single_invert_op(self, lhs, cmp1, rhs): tm.assert_almost_equal(expected, result) for engine in self.current_engines: - tm.skip_if_no_ne(engine) tm.assert_almost_equal(result, pd.eval('~elb', engine=engine, parser=self.parser)) @@ -364,9 +382,9 @@ def check_compound_invert_op(self, lhs, cmp1, rhs): ex = '~(lhs {0} rhs)'.format(cmp1) if is_scalar(rhs) and cmp1 in skip_these: - self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, - parser=self.parser, local_dict={'lhs': lhs, - 'rhs': rhs}) + pytest.raises(TypeError, pd.eval, ex, engine=self.engine, + parser=self.parser, local_dict={'lhs': lhs, + 'rhs': rhs}) else: # compound if is_scalar(lhs) and is_scalar(rhs): @@ -381,7 +399,6 @@ def check_compound_invert_op(self, lhs, cmp1, rhs): # make sure the other engines work the same as this one for engine in self.current_engines: - tm.skip_if_no_ne(engine) ev = pd.eval(ex, engine=self.engine, parser=self.parser) tm.assert_almost_equal(ev, result) @@ -396,16 +413,16 @@ def test_frame_invert(self): # float always raises lhs = DataFrame(randn(5, 2)) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) # int raises on numexpr lhs = DataFrame(randint(5, size=(5, 2))) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = ~lhs @@ -421,10 +438,10 @@ def test_frame_invert(self): # object raises lhs = DataFrame({'b': ['a', 1, 2.0], 'c': rand(3) > 0.5}) if self.engine == 'numexpr': - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) def test_series_invert(self): @@ -435,16 +452,16 @@ def test_series_invert(self): # float raises lhs = Series(randn(5)) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) # int raises on numexpr lhs = Series(randint(5, size=5)) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = ~lhs @@ -464,10 +481,10 @@ def test_series_invert(self): # object lhs = Series(['a', 1, 2.0]) if self.engine == 'numexpr': - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): result = pd.eval(expr, engine=self.engine, parser=self.parser) def test_frame_negate(self): @@ -488,7 +505,7 @@ def test_frame_negate(self): # bool doesn't work with numexpr but works elsewhere lhs = DataFrame(rand(5, 2) > 0.5) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = -lhs @@ -513,7 +530,7 @@ def test_series_negate(self): # bool doesn't work with numexpr but works elsewhere lhs = Series(rand(5) > 0.5) if self.engine == 'numexpr': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): result = pd.eval(expr, engine=self.engine, parser=self.parser) else: expect = -lhs @@ -525,95 +542,69 @@ def test_frame_pos(self): # float lhs = DataFrame(randn(5, 2)) - if self.engine == 'python': - with tm.assertRaises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_frame_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_frame_equal(expect, result) # int lhs = DataFrame(randint(5, size=(5, 2))) - if self.engine == 'python': - with tm.assertRaises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_frame_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_frame_equal(expect, result) # bool doesn't work with numexpr but works elsewhere lhs = DataFrame(rand(5, 2) > 0.5) - if self.engine == 'python': - with tm.assertRaises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_frame_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_frame_equal(expect, result) def test_series_pos(self): expr = self.ex('+') # float lhs = Series(randn(5)) - if self.engine == 'python': - with tm.assertRaises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_series_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_series_equal(expect, result) # int lhs = Series(randint(5, size=5)) - if self.engine == 'python': - with tm.assertRaises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_series_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_series_equal(expect, result) # bool doesn't work with numexpr but works elsewhere lhs = Series(rand(5) > 0.5) - if self.engine == 'python': - with tm.assertRaises(TypeError): - result = pd.eval(expr, engine=self.engine, parser=self.parser) - else: - expect = lhs - result = pd.eval(expr, engine=self.engine, parser=self.parser) - assert_series_equal(expect, result) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + assert_series_equal(expect, result) def test_scalar_unary(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): pd.eval('~1.0', engine=self.engine, parser=self.parser) - self.assertEqual( - pd.eval('-1.0', parser=self.parser, engine=self.engine), -1.0) - self.assertEqual( - pd.eval('+1.0', parser=self.parser, engine=self.engine), +1.0) - - self.assertEqual( - pd.eval('~1', parser=self.parser, engine=self.engine), ~1) - self.assertEqual( - pd.eval('-1', parser=self.parser, engine=self.engine), -1) - self.assertEqual( - pd.eval('+1', parser=self.parser, engine=self.engine), +1) - - self.assertEqual( - pd.eval('~True', parser=self.parser, engine=self.engine), ~True) - self.assertEqual( - pd.eval('~False', parser=self.parser, engine=self.engine), ~False) - self.assertEqual( - pd.eval('-True', parser=self.parser, engine=self.engine), -True) - self.assertEqual( - pd.eval('-False', parser=self.parser, engine=self.engine), -False) - self.assertEqual( - pd.eval('+True', parser=self.parser, engine=self.engine), +True) - self.assertEqual( - pd.eval('+False', parser=self.parser, engine=self.engine), +False) + assert pd.eval('-1.0', parser=self.parser, + engine=self.engine) == -1.0 + assert pd.eval('+1.0', parser=self.parser, + engine=self.engine) == +1.0 + assert pd.eval('~1', parser=self.parser, + engine=self.engine) == ~1 + assert pd.eval('-1', parser=self.parser, + engine=self.engine) == -1 + assert pd.eval('+1', parser=self.parser, + engine=self.engine) == +1 + assert pd.eval('~True', parser=self.parser, + engine=self.engine) == ~True + assert pd.eval('~False', parser=self.parser, + engine=self.engine) == ~False + assert pd.eval('-True', parser=self.parser, + engine=self.engine) == -True + assert pd.eval('-False', parser=self.parser, + engine=self.engine) == -False + assert pd.eval('+True', parser=self.parser, + engine=self.engine) == +True + assert pd.eval('+False', parser=self.parser, + engine=self.engine) == +False def test_unary_in_array(self): # GH 11235 @@ -632,63 +623,64 @@ def test_disallow_scalar_bool_ops(self): exprs += '2 * x > 2 or 1 and 2', exprs += '2 * df > 3 and 1 or a', - x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) + x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) # noqa for ex in exprs: - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, engine=self.engine, parser=self.parser) def test_identical(self): - # GH 10546 + # see gh-10546 x = 1 result = pd.eval('x', engine=self.engine, parser=self.parser) - self.assertEqual(result, 1) - self.assertTrue(is_scalar(result)) + assert result == 1 + assert is_scalar(result) x = 1.5 result = pd.eval('x', engine=self.engine, parser=self.parser) - self.assertEqual(result, 1.5) - self.assertTrue(is_scalar(result)) + assert result == 1.5 + assert is_scalar(result) x = False result = pd.eval('x', engine=self.engine, parser=self.parser) - self.assertEqual(result, False) - self.assertTrue(is_scalar(result)) + assert not result + assert is_bool(result) + assert is_scalar(result) x = np.array([1]) result = pd.eval('x', engine=self.engine, parser=self.parser) tm.assert_numpy_array_equal(result, np.array([1])) - self.assertEqual(result.shape, (1, )) + assert result.shape == (1, ) x = np.array([1.5]) result = pd.eval('x', engine=self.engine, parser=self.parser) tm.assert_numpy_array_equal(result, np.array([1.5])) - self.assertEqual(result.shape, (1, )) + assert result.shape == (1, ) - x = np.array([False]) + x = np.array([False]) # noqa result = pd.eval('x', engine=self.engine, parser=self.parser) tm.assert_numpy_array_equal(result, np.array([False])) - self.assertEqual(result.shape, (1, )) + assert result.shape == (1, ) def test_line_continuation(self): # GH 11149 exp = """1 + 2 * \ 5 - 1 + 2 """ result = pd.eval(exp, engine=self.engine, parser=self.parser) - self.assertEqual(result, 12) + assert result == 12 def test_float_truncation(self): # GH 14241 exp = '1000000000.006' result = pd.eval(exp, engine=self.engine, parser=self.parser) expected = np.float64(exp) - self.assertEqual(result, expected) + assert result == expected df = pd.DataFrame({'A': [1000000000.0009, 1000000000.0011, 1000000000.0015]}) cutoff = 1000000000.0006 result = df.query("A < %.4f" % cutoff) - self.assertTrue(result.empty) + assert result.empty cutoff = 1000000000.0010 result = df.query("A > %.4f" % cutoff) @@ -700,13 +692,25 @@ def test_float_truncation(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) + def test_disallow_python_keywords(self): + # GH 18221 + df = pd.DataFrame([[0, 0, 0]], columns=['foo', 'bar', 'class']) + msg = "Python keyword not valid identifier in numexpr query" + with tm.assert_raises_regex(SyntaxError, msg): + df.query('class == 0') + df = pd.DataFrame() + df.index.name = 'lambda' + with tm.assert_raises_regex(SyntaxError, msg): + df.query('lambda == 0') + + +@td.skip_if_no_ne class TestEvalNumexprPython(TestEvalNumexprPandas): @classmethod - def setUpClass(cls): - super(TestEvalNumexprPython, cls).setUpClass() - tm.skip_if_no_ne() + def setup_class(cls): + super(TestEvalNumexprPython, cls).setup_class() import numexpr as ne cls.ne = ne cls.engine = 'numexpr' @@ -724,15 +728,15 @@ def setup_ops(self): def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): ex1 = 'lhs {0} mid {1} rhs'.format(cmp1, cmp2) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex1, engine=self.engine, parser=self.parser) class TestEvalPythonPython(TestEvalNumexprPython): @classmethod - def setUpClass(cls): - super(TestEvalPythonPython, cls).setUpClass() + def setup_class(cls): + super(TestEvalPythonPython, cls).setup_class() cls.engine = 'python' cls.parser = 'python' @@ -761,8 +765,8 @@ def check_alignment(self, result, nlhs, ghs, op): class TestEvalPythonPandas(TestEvalPythonPython): @classmethod - def setUpClass(cls): - super(TestEvalPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestEvalPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' @@ -774,17 +778,16 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): f = lambda *args, **kwargs: np.random.randn() -ENGINES_PARSERS = list(product(_engines, expr._parsers)) - -#------------------------------------- -# typecasting rules consistency with python -# issue #12388 +# ------------------------------------- +# gh-12388: Typecasting rules consistency with python class TestTypeCasting(object): - - def check_binop_typecasting(self, engine, parser, op, dt): - tm.skip_if_no_ne(engine) + @pytest.mark.parametrize('op', ['+', '-', '*', '**', '/']) + # maybe someday... numexpr has too many upcasting rules now + # chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])) + @pytest.mark.parametrize('dt', [np.float32, np.float64]) + def test_binop_typecasting(self, engine, parser, op, dt): df = mkdf(5, 3, data_gen_f=f, dtype=dt) s = 'df {} 3'.format(op) res = pd.eval(s, engine=engine, parser=parser) @@ -798,18 +801,9 @@ def check_binop_typecasting(self, engine, parser, op, dt): assert res.values.dtype == dt assert_frame_equal(res, eval(s)) - def test_binop_typecasting(self): - for engine, parser in ENGINES_PARSERS: - for op in ['+', '-', '*', '**', '/']: - # maybe someday... numexpr has too many upcasting rules now - # for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', - # 'float'])): - for dt in [np.float32, np.float64]: - yield self.check_binop_typecasting, engine, parser, op, dt - -#------------------------------------- -# basic and complex alignment +# ------------------------------------- +# Basic and complex alignment def _is_datetime(x): return issubclass(x.dtype.type, np.datetime64) @@ -826,19 +820,13 @@ class TestAlignment(object): index_types = 'i', 'u', 'dt' lhs_index_types = index_types + ('s',) # 'p' - def check_align_nested_unary_op(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_align_nested_unary_op(self, engine, parser): s = 'df * ~2' df = mkdf(5, 3, data_gen_f=f) res = pd.eval(s, engine=engine, parser=parser) assert_frame_equal(res, df * ~2) - def test_align_nested_unary_op(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_align_nested_unary_op, engine, parser - - def check_basic_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_basic_frame_alignment(self, engine, parser): args = product(self.lhs_index_types, self.index_types, self.index_types) with warnings.catch_warnings(record=True): @@ -856,12 +844,7 @@ def check_basic_frame_alignment(self, engine, parser): res = pd.eval('df + df2', engine=engine, parser=parser) assert_frame_equal(res, df + df2) - def test_basic_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_frame_alignment, engine, parser - - def check_frame_comparison(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_frame_comparison(self, engine, parser): args = product(self.lhs_index_types, repeat=2) for r_idx_type, c_idx_type in args: df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type, @@ -874,12 +857,8 @@ def check_frame_comparison(self, engine, parser): res = pd.eval('df < df3', engine=engine, parser=parser) assert_frame_equal(res, df < df3) - def test_frame_comparison(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_frame_comparison, engine, parser - - def check_medium_complex_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + @pytest.mark.slow + def test_medium_complex_frame_alignment(self, engine, parser): args = product(self.lhs_index_types, self.index_types, self.index_types, self.index_types) @@ -899,14 +878,7 @@ def check_medium_complex_frame_alignment(self, engine, parser): engine=engine, parser=parser) assert_frame_equal(res, df + df2 + df3) - @slow - def test_medium_complex_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_medium_complex_frame_alignment, engine, parser - - def check_basic_frame_series_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + def test_basic_frame_series_alignment(self, engine, parser): def testit(r_idx_type, c_idx_type, index_name): df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type) @@ -932,13 +904,7 @@ def testit(r_idx_type, c_idx_type, index_name): for r_idx_type, c_idx_type, index_name in args: testit(r_idx_type, c_idx_type, index_name) - def test_basic_frame_series_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_frame_series_alignment, engine, parser - - def check_basic_series_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + def test_basic_series_frame_alignment(self, engine, parser): def testit(r_idx_type, c_idx_type, index_name): df = mkdf(10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type) @@ -968,12 +934,7 @@ def testit(r_idx_type, c_idx_type, index_name): for r_idx_type, c_idx_type, index_name in args: testit(r_idx_type, c_idx_type, index_name) - def test_basic_series_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_series_frame_alignment, engine, parser - - def check_series_frame_commutativity(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_series_frame_commutativity(self, engine, parser): args = product(self.lhs_index_types, self.index_types, ('+', '*'), ('index', 'columns')) @@ -1000,13 +961,8 @@ def check_series_frame_commutativity(self, engine, parser): if engine == 'numexpr': assert_frame_equal(a, b) - def test_series_frame_commutativity(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_series_frame_commutativity, engine, parser - - def check_complex_series_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + @pytest.mark.slow + def test_complex_series_frame_alignment(self, engine, parser): import random args = product(self.lhs_index_types, self.index_types, self.index_types, self.index_types) @@ -1047,20 +1003,14 @@ def check_complex_series_frame_alignment(self, engine, parser): parser=parser) else: res = pd.eval('df2 + s + df', engine=engine, parser=parser) - tm.assert_equal(res.shape, expected.shape) + assert res.shape == expected.shape assert_frame_equal(res, expected) - @slow - def test_complex_series_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_complex_series_frame_alignment, engine, parser - - def check_performance_warning_for_poor_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_performance_warning_for_poor_alignment(self, engine, parser): df = DataFrame(randn(1000, 10)) s = Series(randn(10000)) if engine == 'numexpr': - seen = pd.core.common.PerformanceWarning + seen = PerformanceWarning else: seen = False @@ -1082,7 +1032,7 @@ def check_performance_warning_for_poor_alignment(self, engine, parser): is_python_engine = engine == 'python' if not is_python_engine: - wrn = pd.core.common.PerformanceWarning + wrn = PerformanceWarning else: wrn = False @@ -1090,36 +1040,29 @@ def check_performance_warning_for_poor_alignment(self, engine, parser): pd.eval('df + s', engine=engine, parser=parser) if not is_python_engine: - tm.assert_equal(len(w), 1) + assert len(w) == 1 msg = str(w[0].message) expected = ("Alignment difference on axis {0} is larger" " than an order of magnitude on term {1!r}, " "by more than {2:.4g}; performance may suffer" "".format(1, 'df', np.log10(s.size - df.shape[1]))) - tm.assert_equal(msg, expected) - - def test_performance_warning_for_poor_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield (self.check_performance_warning_for_poor_alignment, engine, - parser) + assert msg == expected -#------------------------------------ -# slightly more complex ops +# ------------------------------------ +# Slightly more complex ops -class TestOperationsNumExprPandas(tm.TestCase): +@td.skip_if_no_ne +class TestOperationsNumExprPandas(object): @classmethod - def setUpClass(cls): - super(TestOperationsNumExprPandas, cls).setUpClass() - tm.skip_if_no_ne() + def setup_class(cls): cls.engine = 'numexpr' cls.parser = 'pandas' cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms @classmethod - def tearDownClass(cls): - super(TestOperationsNumExprPandas, cls).tearDownClass() + def teardown_class(cls): del cls.engine, cls.parser def eval(self, *args, **kwargs): @@ -1137,22 +1080,22 @@ def test_simple_arith_ops(self): ex3 = '1 {0} (x + 1)'.format(op) if op in ('in', 'not in'): - self.assertRaises(TypeError, pd.eval, ex, - engine=self.engine, parser=self.parser) + pytest.raises(TypeError, pd.eval, ex, + engine=self.engine, parser=self.parser) else: expec = _eval_single_bin(1, op, 1, self.engine) x = self.eval(ex, engine=self.engine, parser=self.parser) - tm.assert_equal(x, expec) + assert x == expec expec = _eval_single_bin(x, op, 1, self.engine) y = self.eval(ex2, local_dict={'x': x}, engine=self.engine, parser=self.parser) - tm.assert_equal(y, expec) + assert y == expec expec = _eval_single_bin(1, op, x + 1, self.engine) y = self.eval(ex3, local_dict={'x': x}, engine=self.engine, parser=self.parser) - tm.assert_equal(y, expec) + assert y == expec def test_simple_bool_ops(self): for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), @@ -1160,7 +1103,7 @@ def test_simple_bool_ops(self): ex = '{0} {1} {2}'.format(lhs, op, rhs) res = self.eval(ex) exp = eval(ex) - self.assertEqual(res, exp) + assert res == exp def test_bool_ops_with_constants(self): for op, lhs, rhs in product(expr._bool_ops_syms, ('True', 'False'), @@ -1168,14 +1111,15 @@ def test_bool_ops_with_constants(self): ex = '{0} {1} {2}'.format(lhs, op, rhs) res = self.eval(ex) exp = eval(ex) - self.assertEqual(res, exp) + assert res == exp def test_panel_fails(self): - x = Panel(randn(3, 4, 5)) - y = Series(randn(10)) - with pytest.raises(NotImplementedError): - self.eval('x + y', - local_dict={'x': x, 'y': y}) + with catch_warnings(record=True): + x = Panel(randn(3, 4, 5)) + y = Series(randn(10)) + with pytest.raises(NotImplementedError): + self.eval('x + y', + local_dict={'x': x, 'y': y}) def test_4d_ndarray_fails(self): x = randn(3, 4, 5, 6) @@ -1186,7 +1130,7 @@ def test_4d_ndarray_fails(self): def test_constant(self): x = self.eval('1') - tm.assert_equal(x, 1) + assert x == 1 def test_single_variable(self): df = DataFrame(randn(10, 2)) @@ -1196,7 +1140,7 @@ def test_single_variable(self): def test_truediv(self): s = np.array([1]) ex = 's / 1' - d = {'s': s} + d = {'s': s} # noqa if PY3: res = self.eval(ex, truediv=False) @@ -1207,19 +1151,19 @@ def test_truediv(self): res = self.eval('1 / 2', truediv=True) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec res = self.eval('1 / 2', truediv=False) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec res = self.eval('s / 2', truediv=False) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec res = self.eval('s / 2', truediv=True) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec else: res = self.eval(ex, truediv=False) tm.assert_numpy_array_equal(res, np.array([1])) @@ -1229,23 +1173,23 @@ def test_truediv(self): res = self.eval('1 / 2', truediv=True) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec res = self.eval('1 / 2', truediv=False) expec = 0 - self.assertEqual(res, expec) + assert res == expec res = self.eval('s / 2', truediv=False) expec = 0 - self.assertEqual(res, expec) + assert res == expec res = self.eval('s / 2', truediv=True) expec = 0.5 - self.assertEqual(res, expec) + assert res == expec def test_failing_subscript_with_name_error(self): - df = DataFrame(np.random.randn(5, 3)) - with tm.assertRaises(NameError): + df = DataFrame(np.random.randn(5, 3)) # noqa + with pytest.raises(NameError): self.eval('df[x > 2] > 2') def test_lhs_expression_subscript(self): @@ -1271,20 +1215,19 @@ def test_assignment_fails(self): df = DataFrame(np.random.randn(5, 3), columns=list('abc')) df2 = DataFrame(np.random.randn(5, 3)) expr1 = 'df = df2' - self.assertRaises(ValueError, self.eval, expr1, - local_dict={'df': df, 'df2': df2}) + pytest.raises(ValueError, self.eval, expr1, + local_dict={'df': df, 'df2': df2}) def test_assignment_column(self): df = DataFrame(np.random.randn(5, 2), columns=list('ab')) orig_df = df.copy() # multiple assignees - self.assertRaises(SyntaxError, df.eval, 'd c = a + b') + pytest.raises(SyntaxError, df.eval, 'd c = a + b') # invalid assignees - self.assertRaises(SyntaxError, df.eval, 'd,c = a + b') - self.assertRaises( - SyntaxError, df.eval, 'Timestamp("20131001") = a + b') + pytest.raises(SyntaxError, df.eval, 'd,c = a + b') + pytest.raises(SyntaxError, df.eval, 'Timestamp("20131001") = a + b') # single assignment - existing variable expected = orig_df.copy() @@ -1320,14 +1263,14 @@ def f(): df.eval('a = a + b', inplace=True) result = old_a + df.b assert_series_equal(result, df.a, check_names=False) - self.assertTrue(result.name is None) + assert result.name is None f() # multiple assignment df = orig_df.copy() df.eval('c = a + b', inplace=True) - self.assertRaises(SyntaxError, df.eval, 'c = a = b') + pytest.raises(SyntaxError, df.eval, 'c = a = b') # explicit targets df = orig_df.copy() @@ -1345,22 +1288,15 @@ def test_column_in(self): assert_series_equal(result, expected) def assignment_not_inplace(self): - # GH 9297 + # see gh-9297 df = DataFrame(np.random.randn(5, 2), columns=list('ab')) actual = df.eval('c = a + b', inplace=False) - self.assertIsNotNone(actual) + assert actual is not None + expected = df.copy() expected['c'] = expected['a'] + expected['b'] - assert_frame_equal(df, expected) - - # default for inplace will change - with tm.assert_produces_warnings(FutureWarning): - df.eval('c = a + b') - - # but don't warn without assignment - with tm.assert_produces_warnings(None): - df.eval('a + b') + tm.assert_frame_equal(df, expected) def test_multi_line_expression(self): # GH 11149 @@ -1373,7 +1309,7 @@ def test_multi_line_expression(self): c = a + b d = c + b""", inplace=True) assert_frame_equal(expected, df) - self.assertIsNone(ans) + assert ans is None expected['a'] = expected['a'] - 1 expected['e'] = expected['a'] + 2 @@ -1381,10 +1317,10 @@ def test_multi_line_expression(self): a = a - 1 e = a + 2""", inplace=True) assert_frame_equal(expected, df) - self.assertIsNone(ans) + assert ans is None # multi-line not valid if not all assignments - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.eval(""" a = b + 2 b - 2""", inplace=False) @@ -1421,24 +1357,62 @@ def test_multi_line_expression_local_variable(self): d = c + @local_var """, inplace=True) assert_frame_equal(expected, df) - self.assertIsNone(ans) + assert ans is None def test_assignment_in_query(self): # GH 8664 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) df_orig = df.copy() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.query('a = 1') assert_frame_equal(df, df_orig) - def query_inplace(self): - # GH 11149 + def test_query_inplace(self): + # see gh-11149 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) expected = df.copy() expected = expected[expected['a'] == 2] df.query('a == 2', inplace=True) assert_frame_equal(expected, df) + df = {} + expected = {"a": 3} + + self.eval("a = 1 + 2", target=df, inplace=True) + tm.assert_dict_equal(df, expected) + + @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], + np.array([]), (1, 3)]) + def test_cannot_item_assign(self, invalid_target): + msg = "Cannot assign expression output to target" + expression = "a = 1 + 2" + + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=True) + + if hasattr(invalid_target, "copy"): + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) + def test_cannot_copy_item(self, invalid_target): + msg = "Cannot return a copy of the target" + expression = "a = 1 + 2" + + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("target", [1, "cat", [1, 2], + np.array([]), (1, 3), {1: 2}]) + def test_inplace_no_assignment(self, target): + expression = "1 + 2" + + assert self.eval(expression, target=target, inplace=False) == 3 + + msg = "Cannot operate inplace if there is no assignment" + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=target, inplace=True) + def test_basic_period_index_boolean_expression(self): df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i') @@ -1473,108 +1447,108 @@ def test_simple_in_ops(self): if self.parser != 'python': res = pd.eval('1 in [1, 2]', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('2 in (1, 2)', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('3 in (1, 2)', engine=self.engine, parser=self.parser) - self.assertFalse(res) + assert not res res = pd.eval('3 not in (1, 2)', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('[3] not in (1, 2)', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('[3] in ([3], 2)', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('[[3]] in [[[3]], 2]', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('(3,) in [(3,), 2]', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res res = pd.eval('(3,) not in [(3,), 2]', engine=self.engine, parser=self.parser) - self.assertFalse(res) + assert not res res = pd.eval('[(3,)] in [[(3,)], 2]', engine=self.engine, parser=self.parser) - self.assertTrue(res) + assert res else: - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('1 in [1, 2]', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('2 in (1, 2)', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('3 in (1, 2)', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('3 not in (1, 2)', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('[(3,)] in (1, 2, [(3,)])', engine=self.engine, parser=self.parser) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('[3] not in (1, 2, [[3]])', engine=self.engine, parser=self.parser) +@td.skip_if_no_ne class TestOperationsNumExprPython(TestOperationsNumExprPandas): @classmethod - def setUpClass(cls): - super(TestOperationsNumExprPython, cls).setUpClass() + def setup_class(cls): + super(TestOperationsNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' - tm.skip_if_no_ne(cls.engine) cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms cls.arith_ops = filter(lambda x: x not in ('in', 'not in'), cls.arith_ops) def test_check_many_exprs(self): - a = 1 + a = 1 # noqa expr = ' * '.join('a' * 33) expected = 1 res = pd.eval(expr, engine=self.engine, parser=self.parser) - tm.assert_equal(res, expected) + assert res == expected def test_fails_and(self): df = DataFrame(np.random.randn(5, 3)) - self.assertRaises(NotImplementedError, pd.eval, 'df > 2 and df > 3', - local_dict={'df': df}, parser=self.parser, - engine=self.engine) + pytest.raises(NotImplementedError, pd.eval, 'df > 2 and df > 3', + local_dict={'df': df}, parser=self.parser, + engine=self.engine) def test_fails_or(self): df = DataFrame(np.random.randn(5, 3)) - self.assertRaises(NotImplementedError, pd.eval, 'df > 2 or df > 3', - local_dict={'df': df}, parser=self.parser, - engine=self.engine) + pytest.raises(NotImplementedError, pd.eval, 'df > 2 or df > 3', + local_dict={'df': df}, parser=self.parser, + engine=self.engine) def test_fails_not(self): df = DataFrame(np.random.randn(5, 3)) - self.assertRaises(NotImplementedError, pd.eval, 'not df > 2', - local_dict={'df': df}, parser=self.parser, - engine=self.engine) + pytest.raises(NotImplementedError, pd.eval, 'not df > 2', + local_dict={'df': df}, parser=self.parser, + engine=self.engine) def test_fails_ampersand(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) # noqa ex = '(df + 2)[df > 1] > 0 & (df > 0)' - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, parser=self.parser, engine=self.engine) def test_fails_pipe(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) # noqa ex = '(df + 2)[df > 1] > 0 | (df > 0)' - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, parser=self.parser, engine=self.engine) def test_bool_ops_with_constants(self): @@ -1582,31 +1556,31 @@ def test_bool_ops_with_constants(self): ('True', 'False')): ex = '{0} {1} {2}'.format(lhs, op, rhs) if op in ('and', 'or'): - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): self.eval(ex) else: res = self.eval(ex) exp = eval(ex) - self.assertEqual(res, exp) + assert res == exp def test_simple_bool_ops(self): for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), (True, False)): ex = 'lhs {0} rhs'.format(op) if op in ('and', 'or'): - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, engine=self.engine, parser=self.parser) else: res = pd.eval(ex, engine=self.engine, parser=self.parser) exp = eval(ex) - self.assertEqual(res, exp) + assert res == exp class TestOperationsPythonPython(TestOperationsNumExprPython): @classmethod - def setUpClass(cls): - super(TestOperationsPythonPython, cls).setUpClass() + def setup_class(cls): + super(TestOperationsPythonPython, cls).setup_class() cls.engine = cls.parser = 'python' cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms cls.arith_ops = filter(lambda x: x not in ('in', 'not in'), @@ -1616,26 +1590,25 @@ def setUpClass(cls): class TestOperationsPythonPandas(TestOperationsNumExprPandas): @classmethod - def setUpClass(cls): - super(TestOperationsPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestOperationsPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms -class TestMathPythonPython(tm.TestCase): +@td.skip_if_no_ne +class TestMathPythonPython(object): @classmethod - def setUpClass(cls): - super(TestMathPythonPython, cls).setUpClass() - tm.skip_if_no_ne() + def setup_class(cls): cls.engine = 'python' cls.parser = 'pandas' cls.unary_fns = _unary_math_ops cls.binary_fns = _binary_math_ops @classmethod - def tearDownClass(cls): + def teardown_class(cls): del cls.engine, cls.parser def eval(self, *args, **kwargs): @@ -1688,14 +1661,14 @@ def test_df_arithmetic_subexpression(self): def check_result_type(self, dtype, expect_dtype): df = DataFrame({'a': np.random.randn(10).astype(dtype)}) - self.assertEqual(df.a.dtype, dtype) + assert df.a.dtype == dtype df.eval("b = sin(a)", engine=self.engine, parser=self.parser, inplace=True) got = df.b expect = np.sin(df.a) - self.assertEqual(expect.dtype, got.dtype) - self.assertEqual(expect_dtype, got.dtype) + assert expect.dtype == got.dtype + assert expect_dtype == got.dtype tm.assert_series_equal(got, expect, check_names=False) def test_result_types(self): @@ -1714,17 +1687,17 @@ def test_result_types2(self): def test_undefined_func(self): df = DataFrame({'a': np.random.randn(10)}) - with tm.assertRaisesRegexp(ValueError, - "\"mysin\" is not a supported function"): + with tm.assert_raises_regex( + ValueError, "\"mysin\" is not a supported function"): df.eval("mysin(a)", engine=self.engine, parser=self.parser) def test_keyword_arg(self): df = DataFrame({'a': np.random.randn(10)}) - with tm.assertRaisesRegexp(TypeError, - "Function \"sin\" does not support " - "keyword arguments"): + with tm.assert_raises_regex(TypeError, + "Function \"sin\" does not support " + "keyword arguments"): df.eval("sin(x=a)", engine=self.engine, parser=self.parser) @@ -1733,8 +1706,8 @@ def test_keyword_arg(self): class TestMathPythonPandas(TestMathPythonPython): @classmethod - def setUpClass(cls): - super(TestMathPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestMathPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' @@ -1742,8 +1715,8 @@ def setUpClass(cls): class TestMathNumExprPandas(TestMathPythonPython): @classmethod - def setUpClass(cls): - super(TestMathNumExprPandas, cls).setUpClass() + def setup_class(cls): + super(TestMathNumExprPandas, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'pandas' @@ -1751,8 +1724,8 @@ def setUpClass(cls): class TestMathNumExprPython(TestMathPythonPython): @classmethod - def setUpClass(cls): - super(TestMathNumExprPython, cls).setUpClass() + def setup_class(cls): + super(TestMathNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' @@ -1762,62 +1735,48 @@ def setUpClass(cls): class TestScope(object): - def check_global_scope(self, e, engine, parser): - tm.skip_if_no_ne(engine) + def test_global_scope(self, engine, parser): + e = '_var_s * 2' tm.assert_numpy_array_equal(_var_s * 2, pd.eval(e, engine=engine, parser=parser)) - def test_global_scope(self): - e = '_var_s * 2' - for engine, parser in product(_engines, expr._parsers): - yield self.check_global_scope, e, engine, parser - - def check_no_new_locals(self, engine, parser): - tm.skip_if_no_ne(engine) - x = 1 + def test_no_new_locals(self, engine, parser): + x = 1 # noqa lcls = locals().copy() pd.eval('x + 1', local_dict=lcls, engine=engine, parser=parser) lcls2 = locals().copy() lcls2.pop('lcls') - tm.assert_equal(lcls, lcls2) - - def test_no_new_locals(self): - for engine, parser in product(_engines, expr._parsers): - yield self.check_no_new_locals, engine, parser + assert lcls == lcls2 - def check_no_new_globals(self, engine, parser): - tm.skip_if_no_ne(engine) - x = 1 + def test_no_new_globals(self, engine, parser): + x = 1 # noqa gbls = globals().copy() pd.eval('x + 1', engine=engine, parser=parser) gbls2 = globals().copy() - tm.assert_equal(gbls, gbls2) - - def test_no_new_globals(self): - for engine, parser in product(_engines, expr._parsers): - yield self.check_no_new_globals, engine, parser + assert gbls == gbls2 +@td.skip_if_no_ne def test_invalid_engine(): - tm.skip_if_no_ne() - assertRaisesRegexp(KeyError, 'Invalid engine \'asdf\' passed', - pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, - engine='asdf') + tm.assert_raises_regex(KeyError, 'Invalid engine \'asdf\' passed', + pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, + engine='asdf') +@td.skip_if_no_ne def test_invalid_parser(): - tm.skip_if_no_ne() - assertRaisesRegexp(KeyError, 'Invalid parser \'asdf\' passed', - pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, - parser='asdf') + tm.assert_raises_regex(KeyError, 'Invalid parser \'asdf\' passed', + pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, + parser='asdf') _parsers = {'python': PythonExprVisitor, 'pytables': pytables.ExprVisitor, 'pandas': PandasExprVisitor} -def check_disallowed_nodes(engine, parser): - tm.skip_if_no_ne(engine) +@pytest.mark.parametrize('engine', _engines) +@pytest.mark.parametrize('parser', _parsers) +def test_disallowed_nodes(engine, parser): VisitorClass = _parsers[parser] uns_ops = VisitorClass.unsupported_nodes inst = VisitorClass('x + 1', engine, parser) @@ -1827,145 +1786,90 @@ def check_disallowed_nodes(engine, parser): getattr(inst, ops)() -def test_disallowed_nodes(): - for engine, visitor in product(_parsers, repeat=2): - yield check_disallowed_nodes, engine, visitor - - -def check_syntax_error_exprs(engine, parser): - tm.skip_if_no_ne(engine) +def test_syntax_error_exprs(engine, parser): e = 's +' with pytest.raises(SyntaxError): pd.eval(e, engine=engine, parser=parser) -def test_syntax_error_exprs(): - for engine, parser in ENGINES_PARSERS: - yield check_syntax_error_exprs, engine, parser - - -def check_name_error_exprs(engine, parser): - tm.skip_if_no_ne(engine) +def test_name_error_exprs(engine, parser): e = 's + t' - with tm.assertRaises(NameError): + with pytest.raises(NameError): pd.eval(e, engine=engine, parser=parser) -def test_name_error_exprs(): - for engine, parser in ENGINES_PARSERS: - yield check_name_error_exprs, engine, parser - - -def check_invalid_local_variable_reference(engine, parser): - tm.skip_if_no_ne(engine) - - a, b = 1, 2 +def test_invalid_local_variable_reference(engine, parser): + a, b = 1, 2 # noqa exprs = 'a + @b', '@a + b', '@a + @b' - for expr in exprs: + + for _expr in exprs: if parser != 'pandas': - with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is only"): - pd.eval(exprs, engine=engine, parser=parser) + with tm.assert_raises_regex(SyntaxError, + "The '@' prefix is only"): + pd.eval(_expr, engine=engine, parser=parser) else: - with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is not"): - pd.eval(exprs, engine=engine, parser=parser) - - -def test_invalid_local_variable_reference(): - for engine, parser in ENGINES_PARSERS: - yield check_invalid_local_variable_reference, engine, parser + with tm.assert_raises_regex(SyntaxError, + "The '@' prefix is not"): + pd.eval(_expr, engine=engine, parser=parser) -def check_numexpr_builtin_raises(engine, parser): - tm.skip_if_no_ne(engine) +def test_numexpr_builtin_raises(engine, parser): sin, dotted_line = 1, 2 if engine == 'numexpr': - with tm.assertRaisesRegexp(NumExprClobberingError, - 'Variables in expression .+'): + with tm.assert_raises_regex(NumExprClobberingError, + 'Variables in expression .+'): pd.eval('sin + dotted_line', engine=engine, parser=parser) else: res = pd.eval('sin + dotted_line', engine=engine, parser=parser) - tm.assert_equal(res, sin + dotted_line) + assert res == sin + dotted_line -def test_numexpr_builtin_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_numexpr_builtin_raises, engine, parser - - -def check_bad_resolver_raises(engine, parser): - tm.skip_if_no_ne(engine) +def test_bad_resolver_raises(engine, parser): cannot_resolve = 42, 3.0 - with tm.assertRaisesRegexp(TypeError, 'Resolver of type .+'): + with tm.assert_raises_regex(TypeError, 'Resolver of type .+'): pd.eval('1 + 2', resolvers=cannot_resolve, engine=engine, parser=parser) -def test_bad_resolver_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_bad_resolver_raises, engine, parser - - -def check_empty_string_raises(engine, parser): +def test_empty_string_raises(engine, parser): # GH 13139 - tm.skip_if_no_ne(engine) - with tm.assertRaisesRegexp(ValueError, 'expr cannot be an empty string'): + with tm.assert_raises_regex(ValueError, + 'expr cannot be an empty string'): pd.eval('', engine=engine, parser=parser) -def test_empty_string_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_empty_string_raises, engine, parser - - -def check_more_than_one_expression_raises(engine, parser): - tm.skip_if_no_ne(engine) - with tm.assertRaisesRegexp(SyntaxError, - 'only a single expression is allowed'): +def test_more_than_one_expression_raises(engine, parser): + with tm.assert_raises_regex(SyntaxError, + 'only a single expression is allowed'): pd.eval('1 + 1; 2 + 2', engine=engine, parser=parser) -def test_more_than_one_expression_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_more_than_one_expression_raises, engine, parser +@pytest.mark.parametrize('cmp', ('and', 'or')) +@pytest.mark.parametrize('lhs', (int, float)) +@pytest.mark.parametrize('rhs', (int, float)) +def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): + gen = {int: lambda: np.random.randint(10), float: np.random.randn} + mid = gen[lhs]() # noqa + lhs = gen[lhs]() # noqa + rhs = gen[rhs]() # noqa -def check_bool_ops_fails_on_scalars(gen, lhs, cmp, rhs, engine, parser): - tm.skip_if_no_ne(engine) - mid = gen[type(lhs)]() ex1 = 'lhs {0} mid {1} rhs'.format(cmp, cmp) ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp, cmp) ex3 = '(lhs {0} mid) & (mid {1} rhs)'.format(cmp, cmp) for ex in (ex1, ex2, ex3): - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval(ex, engine=engine, parser=parser) -def test_bool_ops_fails_on_scalars(): - _bool_ops_syms = 'and', 'or' - dtypes = int, float - gen = {int: lambda: np.random.randint(10), float: np.random.randn} - for engine, parser, dtype1, cmp, dtype2 in product(_engines, expr._parsers, - dtypes, _bool_ops_syms, - dtypes): - yield (check_bool_ops_fails_on_scalars, gen, gen[dtype1](), cmp, - gen[dtype2](), engine, parser) - - -def check_inf(engine, parser): - tm.skip_if_no_ne(engine) +def test_inf(engine, parser): s = 'inf + 1' expected = np.inf result = pd.eval(s, engine=engine, parser=parser) - tm.assert_equal(result, expected) - + assert result == expected -def test_inf(): - for engine, parser in ENGINES_PARSERS: - yield check_inf, engine, parser - -def check_negate_lt_eq_le(engine, parser): - tm.skip_if_no_ne(engine) +def test_negate_lt_eq_le(engine, parser): df = pd.DataFrame([[0, 10], [1, 20]], columns=['cat', 'count']) expected = df[~(df.cat > 0)] @@ -1973,23 +1877,18 @@ def check_negate_lt_eq_le(engine, parser): tm.assert_frame_equal(result, expected) if parser == 'python': - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query('not (cat > 0)', engine=engine, parser=parser) else: result = df.query('not (cat > 0)', engine=engine, parser=parser) tm.assert_frame_equal(result, expected) -def test_negate_lt_eq_le(): - for engine, parser in product(_engines, expr._parsers): - yield check_negate_lt_eq_le, engine, parser - - -class TestValidate(tm.TestCase): +class TestValidate(object): def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): pd.eval("2+2", inplace=value) diff --git a/pandas/tests/dtypes/__init__.py b/pandas/tests/dtypes/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py new file mode 100644 index 0000000000000..96a9e3227b40b --- /dev/null +++ b/pandas/tests/dtypes/test_cast.py @@ -0,0 +1,441 @@ +# -*- coding: utf-8 -*- + +""" +These test the private routines in types/cast.py + +""" + +import pytest +from datetime import datetime, timedelta, date +import numpy as np + +import pandas as pd +from pandas import (Timedelta, Timestamp, DatetimeIndex, + DataFrame, NaT, Period, Series) + +from pandas.core.dtypes.cast import ( + maybe_downcast_to_dtype, + maybe_convert_objects, + cast_scalar_to_array, + infer_dtype_from_scalar, + infer_dtype_from_array, + maybe_convert_string_to_object, + maybe_convert_scalar, + find_common_type, + construct_1d_object_array_from_listlike, + construct_1d_arraylike_from_scalar) +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + PeriodDtype) +from pandas.core.dtypes.common import ( + is_dtype_equal) +from pandas.util import testing as tm + + +class TestMaybeDowncast(object): + + def test_downcast_conv(self): + # test downcasting + + arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) + result = maybe_downcast_to_dtype(arr, 'infer') + tm.assert_numpy_array_equal(result, arr) + + arr = np.array([8., 8., 8., 8., 8.9999999999995]) + result = maybe_downcast_to_dtype(arr, 'infer') + expected = np.array([8, 8, 8, 8, 9], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + arr = np.array([8., 8., 8., 8., 9.0000000000005]) + result = maybe_downcast_to_dtype(arr, 'infer') + expected = np.array([8, 8, 8, 8, 9], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + # GH16875 coercing of bools + ser = Series([True, True, False]) + result = maybe_downcast_to_dtype(ser, np.dtype(np.float64)) + expected = ser + tm.assert_series_equal(result, expected) + + # conversions + + expected = np.array([1, 2]) + for dtype in [np.float64, object, np.int64]: + arr = np.array([1.0, 2.0], dtype=dtype) + result = maybe_downcast_to_dtype(arr, 'infer') + tm.assert_almost_equal(result, expected, check_dtype=False) + + for dtype in [np.float64, object]: + expected = np.array([1.0, 2.0, np.nan], dtype=dtype) + arr = np.array([1.0, 2.0, np.nan], dtype=dtype) + result = maybe_downcast_to_dtype(arr, 'infer') + tm.assert_almost_equal(result, expected) + + # empties + for dtype in [np.int32, np.float64, np.float32, np.bool_, + np.int64, object]: + arr = np.array([], dtype=dtype) + result = maybe_downcast_to_dtype(arr, 'int64') + tm.assert_almost_equal(result, np.array([], dtype=np.int64)) + assert result.dtype == np.int64 + + def test_datetimelikes_nan(self): + arr = np.array([1, 2, np.nan]) + exp = np.array([1, 2, np.datetime64('NaT')], dtype='datetime64[ns]') + res = maybe_downcast_to_dtype(arr, 'datetime64[ns]') + tm.assert_numpy_array_equal(res, exp) + + exp = np.array([1, 2, np.timedelta64('NaT')], dtype='timedelta64[ns]') + res = maybe_downcast_to_dtype(arr, 'timedelta64[ns]') + tm.assert_numpy_array_equal(res, exp) + + def test_datetime_with_timezone(self): + # GH 15426 + ts = Timestamp("2016-01-01 12:00:00", tz='US/Pacific') + exp = DatetimeIndex([ts, ts]) + res = maybe_downcast_to_dtype(exp, exp.dtype) + tm.assert_index_equal(res, exp) + + res = maybe_downcast_to_dtype(exp.asi8, exp.dtype) + tm.assert_index_equal(res, exp) + + +class TestInferDtype(object): + + def testinfer_dtype_from_scalar(self): + # Test that infer_dtype_from_scalar is returning correct dtype for int + # and float. + + for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, + np.int32, np.uint64, np.int64]: + data = dtypec(12) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == type(data) + + data = 12 + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.int64 + + for dtypec in [np.float16, np.float32, np.float64]: + data = dtypec(12) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == dtypec + + data = np.float(12) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.float64 + + for data in [True, False]: + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.bool_ + + for data in [np.complex64(1), np.complex128(1)]: + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.complex_ + + for data in [np.datetime64(1, 'ns'), Timestamp(1), + datetime(2000, 1, 1, 0, 0)]: + dtype, val = infer_dtype_from_scalar(data) + assert dtype == 'M8[ns]' + + for data in [np.timedelta64(1, 'ns'), Timedelta(1), + timedelta(1)]: + dtype, val = infer_dtype_from_scalar(data) + assert dtype == 'm8[ns]' + + for tz in ['UTC', 'US/Eastern', 'Asia/Tokyo']: + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=True) + assert dtype == 'datetime64[ns, {0}]'.format(tz) + assert val == dt.value + + dtype, val = infer_dtype_from_scalar(dt) + assert dtype == np.object_ + assert val == dt + + for freq in ['M', 'D']: + p = Period('2011-01-01', freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) + assert dtype == 'period[{0}]'.format(freq) + assert val == p.ordinal + + dtype, val = infer_dtype_from_scalar(p) + dtype == np.object_ + assert val == p + + # misc + for data in [date(2000, 1, 1), + Timestamp(1, tz='US/Eastern'), 'foo']: + + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.object_ + + def testinfer_dtype_from_scalar_errors(self): + with pytest.raises(ValueError): + infer_dtype_from_scalar(np.array([1])) + + @pytest.mark.parametrize( + "arr, expected, pandas_dtype", + [('foo', np.object_, False), + (b'foo', np.object_, False), + (1, np.int_, False), + (1.5, np.float_, False), + ([1], np.int_, False), + (np.array([1], dtype=np.int64), np.int64, False), + ([np.nan, 1, ''], np.object_, False), + (np.array([[1.0, 2.0]]), np.float_, False), + (pd.Categorical(list('aabc')), np.object_, False), + (pd.Categorical([1, 2, 3]), np.int64, False), + (pd.Categorical(list('aabc')), 'category', True), + (pd.Categorical([1, 2, 3]), 'category', True), + (Timestamp('20160101'), np.object_, False), + (np.datetime64('2016-01-01'), np.dtype(' df.two.sum() + + with catch_warnings(record=True) as w: + # successfully modify column in place + # this should not raise a warning + df.one += 1 + assert len(w) == 0 + assert df.one.iloc[0] == 2 + + with catch_warnings(record=True) as w: + # successfully add an attribute to a series + # this should not raise a warning + df.two.not_an_index = [1, 2] + assert len(w) == 0 + + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + df.four = df.two + 2 + assert df.four.sum() > df.two.sum() diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py new file mode 100644 index 0000000000000..b4f5d67530fbd --- /dev/null +++ b/pandas/tests/dtypes/test_inference.py @@ -0,0 +1,1237 @@ +# -*- coding: utf-8 -*- + +""" +These the test the public routines exposed in types/common.py +related to inference and not otherwise tested in types/test_common.py + +""" +from warnings import catch_warnings +import collections +import re +from datetime import datetime, date, timedelta, time +from decimal import Decimal +import numpy as np +import pytz +import pytest + +import pandas as pd +from pandas._libs import tslib, lib, missing as libmissing +from pandas import (Series, Index, DataFrame, Timedelta, + DatetimeIndex, TimedeltaIndex, Timestamp, + Panel, Period, Categorical, isna, Interval, + DateOffset) +from pandas.compat import u, PY2, PY3, StringIO, lrange +from pandas.core.dtypes import inference +from pandas.core.dtypes.common import ( + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_number, + is_integer, + is_float, + is_bool, + is_scalar, + is_scipy_sparse, + _ensure_int32, + _ensure_categorical) +from pandas.util import testing as tm +import pandas.util._test_decorators as td + + +@pytest.fixture(params=[True, False], ids=str) +def coerce(request): + return request.param + + +def test_is_sequence(): + is_seq = inference.is_sequence + assert (is_seq((1, 2))) + assert (is_seq([1, 2])) + assert (not is_seq("abcd")) + assert (not is_seq(u("abcd"))) + assert (not is_seq(np.int64)) + + class A(object): + + def __getitem__(self): + return 1 + + assert (not is_seq(A())) + + +@pytest.mark.parametrize( + "ll", + [ + [], [1], (1, ), (1, 2), {'a': 1}, + set([1, 'a']), Series([1]), + Series([]), Series(['a']).str]) +def test_is_list_like_passes(ll): + assert inference.is_list_like(ll) + + +@pytest.mark.parametrize( + "ll", [1, '2', object(), str]) +def test_is_list_like_fails(ll): + assert not inference.is_list_like(ll) + + +def test_is_array_like(): + assert inference.is_array_like(Series([])) + assert inference.is_array_like(Series([1, 2])) + assert inference.is_array_like(np.array(["a", "b"])) + assert inference.is_array_like(Index(["2016-01-01"])) + + class DtypeList(list): + dtype = "special" + + assert inference.is_array_like(DtypeList()) + + assert not inference.is_array_like([1, 2, 3]) + assert not inference.is_array_like(tuple()) + assert not inference.is_array_like("foo") + assert not inference.is_array_like(123) + + +@pytest.mark.parametrize('inner', [ + [], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), + Series([]), Series(['a']).str, (x for x in range(5)) +]) +@pytest.mark.parametrize('outer', [ + list, Series, np.array, tuple +]) +def test_is_nested_list_like_passes(inner, outer): + result = outer([inner for _ in range(5)]) + assert inference.is_list_like(result) + + +@pytest.mark.parametrize('obj', [ + 'abc', [], [1], (1,), ['a'], 'a', {'a'}, + [1, 2, 3], Series([1]), DataFrame({"A": [1]}), + ([1, 2] for _ in range(5)), +]) +def test_is_nested_list_like_fails(obj): + assert not inference.is_nested_list_like(obj) + + +@pytest.mark.parametrize( + "ll", [{}, {'A': 1}, Series([1])]) +def test_is_dict_like_passes(ll): + assert inference.is_dict_like(ll) + + +@pytest.mark.parametrize( + "ll", ['1', 1, [1, 2], (1, 2), range(2), Index([1])]) +def test_is_dict_like_fails(ll): + assert not inference.is_dict_like(ll) + + +def test_is_file_like(): + class MockFile(object): + pass + + is_file = inference.is_file_like + + data = StringIO("data") + assert is_file(data) + + # No read / write attributes + # No iterator attributes + m = MockFile() + assert not is_file(m) + + MockFile.write = lambda self: 0 + + # Write attribute but not an iterator + m = MockFile() + assert not is_file(m) + + # gh-16530: Valid iterator just means we have the + # __iter__ attribute for our purposes. + MockFile.__iter__ = lambda self: self + + # Valid write-only file + m = MockFile() + assert is_file(m) + + del MockFile.write + MockFile.read = lambda self: 0 + + # Valid read-only file + m = MockFile() + assert is_file(m) + + # Iterator but no read / write attributes + data = [1, 2, 3] + assert not is_file(data) + + if PY3: + from unittest import mock + assert not is_file(mock.Mock()) + + +@pytest.mark.parametrize( + "ll", [collections.namedtuple('Test', list('abc'))(1, 2, 3)]) +def test_is_names_tuple_passes(ll): + assert inference.is_named_tuple(ll) + + +@pytest.mark.parametrize( + "ll", [(1, 2, 3), 'a', Series({'pi': 3.14})]) +def test_is_names_tuple_fails(ll): + assert not inference.is_named_tuple(ll) + + +def test_is_hashable(): + + # all new-style classes are hashable by default + class HashableClass(object): + pass + + class UnhashableClass1(object): + __hash__ = None + + class UnhashableClass2(object): + + def __hash__(self): + raise TypeError("Not hashable") + + hashable = (1, + 3.14, + np.float64(3.14), + 'a', + tuple(), + (1, ), + HashableClass(), ) + not_hashable = ([], UnhashableClass1(), ) + abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), ) + + for i in hashable: + assert inference.is_hashable(i) + for i in not_hashable: + assert not inference.is_hashable(i) + for i in abc_hashable_not_really_hashable: + assert not inference.is_hashable(i) + + # numpy.array is no longer collections.Hashable as of + # https://github.com/numpy/numpy/pull/5326, just test + # is_hashable() + assert not inference.is_hashable(np.array([])) + + # old-style classes in Python 2 don't appear hashable to + # collections.Hashable but also seem to support hash() by default + if PY2: + + class OldStyleClass(): + pass + + c = OldStyleClass() + assert not isinstance(c, collections.Hashable) + assert inference.is_hashable(c) + hash(c) # this will not raise + + +@pytest.mark.parametrize( + "ll", [re.compile('ad')]) +def test_is_re_passes(ll): + assert inference.is_re(ll) + + +@pytest.mark.parametrize( + "ll", ['x', 2, 3, object()]) +def test_is_re_fails(ll): + assert not inference.is_re(ll) + + +@pytest.mark.parametrize( + "ll", [r'a', u('x'), + r'asdf', + re.compile('adsf'), + u(r'\u2233\s*'), + re.compile(r'')]) +def test_is_recompilable_passes(ll): + assert inference.is_re_compilable(ll) + + +@pytest.mark.parametrize( + "ll", [1, [], object()]) +def test_is_recompilable_fails(ll): + assert not inference.is_re_compilable(ll) + + +class TestInference(object): + + def test_infer_dtype_bytes(self): + compare = 'string' if PY2 else 'bytes' + + # string array of bytes + arr = np.array(list('abc'), dtype='S1') + assert lib.infer_dtype(arr) == compare + + # object array of bytes + arr = arr.astype(object) + assert lib.infer_dtype(arr) == compare + + # object array of bytes with missing values + assert lib.infer_dtype([b'a', np.nan, b'c'], skipna=True) == compare + + def test_isinf_scalar(self): + # GH 11352 + assert libmissing.isposinf_scalar(float('inf')) + assert libmissing.isposinf_scalar(np.inf) + assert not libmissing.isposinf_scalar(-np.inf) + assert not libmissing.isposinf_scalar(1) + assert not libmissing.isposinf_scalar('a') + + assert libmissing.isneginf_scalar(float('-inf')) + assert libmissing.isneginf_scalar(-np.inf) + assert not libmissing.isneginf_scalar(np.inf) + assert not libmissing.isneginf_scalar(1) + assert not libmissing.isneginf_scalar('a') + + def test_maybe_convert_numeric_infinities(self): + # see gh-13274 + infinities = ['inf', 'inF', 'iNf', 'Inf', + 'iNF', 'InF', 'INf', 'INF'] + na_values = set(['', 'NULL', 'nan']) + + pos = np.array(['inf'], dtype=np.float64) + neg = np.array(['-inf'], dtype=np.float64) + + msg = "Unable to parse string" + + for infinity in infinities: + for maybe_int in (True, False): + out = lib.maybe_convert_numeric( + np.array([infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(['-' + infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, neg) + + out = lib.maybe_convert_numeric( + np.array([u(infinity)], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(['+' + infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + # too many characters + with tm.assert_raises_regex(ValueError, msg): + lib.maybe_convert_numeric( + np.array(['foo_' + infinity], dtype=object), + na_values, maybe_int) + + def test_maybe_convert_numeric_post_floatify_nan(self, coerce): + # see gh-13314 + data = np.array(['1.200', '-999.000', '4.500'], dtype=object) + expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) + nan_values = set([-999, -999.0]) + + out = lib.maybe_convert_numeric(data, nan_values, coerce) + tm.assert_numpy_array_equal(out, expected) + + def test_convert_infs(self): + arr = np.array(['inf', 'inf', 'inf'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False) + assert result.dtype == np.float64 + + arr = np.array(['-inf', '-inf', '-inf'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False) + assert result.dtype == np.float64 + + def test_scientific_no_exponent(self): + # See PR 12215 + arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False, True) + assert np.all(np.isnan(result)) + + def test_convert_non_hashable(self): + # GH13324 + # make sure that we are handing non-hashables + arr = np.array([[10.0, 2], 1.0, 'apple']) + result = lib.maybe_convert_numeric(arr, set(), False, True) + tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) + + def test_convert_numeric_uint64(self): + arr = np.array([2**63], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) + + arr = np.array([str(2**63)], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) + + arr = np.array([np.uint64(2**63)], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) + + @pytest.mark.parametrize("arr", [ + np.array([2**63, np.nan], dtype=object), + np.array([str(2**63), np.nan], dtype=object), + np.array([np.nan, 2**63], dtype=object), + np.array([np.nan, str(2**63)], dtype=object)]) + def test_convert_numeric_uint64_nan(self, coerce, arr): + expected = arr.astype(float) if coerce else arr.copy() + result = lib.maybe_convert_numeric(arr, set(), + coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + def test_convert_numeric_uint64_nan_values(self, coerce): + arr = np.array([2**63, 2**63 + 1], dtype=object) + na_values = set([2**63]) + + expected = (np.array([np.nan, 2**63 + 1], dtype=float) + if coerce else arr.copy()) + result = lib.maybe_convert_numeric(arr, na_values, + coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("case", [ + np.array([2**63, -1], dtype=object), + np.array([str(2**63), -1], dtype=object), + np.array([str(2**63), str(-1)], dtype=object), + np.array([-1, 2**63], dtype=object), + np.array([-1, str(2**63)], dtype=object), + np.array([str(-1), str(2**63)], dtype=object)]) + def test_convert_numeric_int64_uint64(self, case, coerce): + expected = case.astype(float) if coerce else case.copy() + result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("value", [-2**63 - 1, 2**64]) + def test_convert_int_overflow(self, value): + # see gh-18584 + arr = np.array([value], dtype=object) + result = lib.maybe_convert_objects(arr) + tm.assert_numpy_array_equal(arr, result) + + def test_maybe_convert_objects_uint64(self): + # see gh-4471 + arr = np.array([2**63], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + # NumPy bug: can't compare uint64 to int64, as that + # results in both casting to float64, so we should + # make sure that this function is robust against it + arr = np.array([np.uint64(2**63)], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + arr = np.array([2, -1], dtype=object) + exp = np.array([2, -1], dtype=np.int64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + arr = np.array([2**63, -1], dtype=object) + exp = np.array([2**63, -1], dtype=object) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + def test_mixed_dtypes_remain_object_array(self): + # GH14956 + array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], + dtype=object) + result = lib.maybe_convert_objects(array, convert_datetime=1) + tm.assert_numpy_array_equal(result, array) + + +class TestTypeInference(object): + + # Dummy class used for testing with Python objects + class Dummy(): + pass + + def test_length_zero(self): + result = lib.infer_dtype(np.array([], dtype='i4')) + assert result == 'integer' + + result = lib.infer_dtype([]) + assert result == 'empty' + + # GH 18004 + arr = np.array([np.array([], dtype=object), + np.array([], dtype=object)]) + result = lib.infer_dtype(arr) + assert result == 'empty' + + def test_integers(self): + arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'integer' + + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'mixed-integer' + + arr = np.array([1, 2, 3, 4, 5], dtype='i4') + result = lib.infer_dtype(arr) + assert result == 'integer' + + def test_bools(self): + arr = np.array([True, False, True, True, True], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'boolean' + + arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'boolean' + + arr = np.array([True, False, True, 'foo'], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'mixed' + + arr = np.array([True, False, True], dtype=bool) + result = lib.infer_dtype(arr) + assert result == 'boolean' + + arr = np.array([True, np.nan, False], dtype='O') + result = lib.infer_dtype(arr, skipna=True) + assert result == 'boolean' + + def test_floats(self): + arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'floating' + + arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], + dtype='O') + result = lib.infer_dtype(arr) + assert result == 'mixed-integer' + + arr = np.array([1, 2, 3, 4, 5], dtype='f4') + result = lib.infer_dtype(arr) + assert result == 'floating' + + arr = np.array([1, 2, 3, 4, 5], dtype='f8') + result = lib.infer_dtype(arr) + assert result == 'floating' + + def test_decimals(self): + # GH15690 + arr = np.array([Decimal(1), Decimal(2), Decimal(3)]) + result = lib.infer_dtype(arr) + assert result == 'decimal' + + arr = np.array([1.0, 2.0, Decimal(3)]) + result = lib.infer_dtype(arr) + assert result == 'mixed' + + arr = np.array([Decimal(1), Decimal('NaN'), Decimal(3)]) + result = lib.infer_dtype(arr) + assert result == 'decimal' + + arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'decimal' + + def test_string(self): + pass + + def test_unicode(self): + arr = [u'a', np.nan, u'c'] + result = lib.infer_dtype(arr) + assert result == 'mixed' + + arr = [u'a', np.nan, u'c'] + result = lib.infer_dtype(arr, skipna=True) + expected = 'unicode' if PY2 else 'string' + assert result == expected + + def test_datetime(self): + + dates = [datetime(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + assert index.inferred_type == 'datetime64' + + def test_infer_dtype_datetime(self): + + arr = np.array([Timestamp('2011-01-01'), + Timestamp('2011-01-02')]) + assert lib.infer_dtype(arr) == 'datetime' + + arr = np.array([np.datetime64('2011-01-01'), + np.datetime64('2011-01-01')], dtype=object) + assert lib.infer_dtype(arr) == 'datetime64' + + arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) + assert lib.infer_dtype(arr) == 'datetime' + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, pd.Timestamp('2011-01-02')]) + assert lib.infer_dtype(arr) == 'datetime' + + arr = np.array([n, np.datetime64('2011-01-02')]) + assert lib.infer_dtype(arr) == 'datetime64' + + arr = np.array([n, datetime(2011, 1, 1)]) + assert lib.infer_dtype(arr) == 'datetime' + + arr = np.array([n, pd.Timestamp('2011-01-02'), n]) + assert lib.infer_dtype(arr) == 'datetime' + + arr = np.array([n, np.datetime64('2011-01-02'), n]) + assert lib.infer_dtype(arr) == 'datetime64' + + arr = np.array([n, datetime(2011, 1, 1), n]) + assert lib.infer_dtype(arr) == 'datetime' + + # different type of nat + arr = np.array([np.timedelta64('nat'), + np.datetime64('2011-01-02')], dtype=object) + assert lib.infer_dtype(arr) == 'mixed' + + arr = np.array([np.datetime64('2011-01-02'), + np.timedelta64('nat')], dtype=object) + assert lib.infer_dtype(arr) == 'mixed' + + # mixed datetime + arr = np.array([datetime(2011, 1, 1), + pd.Timestamp('2011-01-02')]) + assert lib.infer_dtype(arr) == 'datetime' + + # should be datetime? + arr = np.array([np.datetime64('2011-01-01'), + pd.Timestamp('2011-01-02')]) + assert lib.infer_dtype(arr) == 'mixed' + + arr = np.array([pd.Timestamp('2011-01-02'), + np.datetime64('2011-01-01')]) + assert lib.infer_dtype(arr) == 'mixed' + + arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) + assert lib.infer_dtype(arr) == 'mixed-integer' + + arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) + assert lib.infer_dtype(arr) == 'mixed' + + arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) + assert lib.infer_dtype(arr) == 'mixed' + + def test_infer_dtype_timedelta(self): + + arr = np.array([pd.Timedelta('1 days'), + pd.Timedelta('2 days')]) + assert lib.infer_dtype(arr) == 'timedelta' + + arr = np.array([np.timedelta64(1, 'D'), + np.timedelta64(2, 'D')], dtype=object) + assert lib.infer_dtype(arr) == 'timedelta' + + arr = np.array([timedelta(1), timedelta(2)]) + assert lib.infer_dtype(arr) == 'timedelta' + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, Timedelta('1 days')]) + assert lib.infer_dtype(arr) == 'timedelta' + + arr = np.array([n, np.timedelta64(1, 'D')]) + assert lib.infer_dtype(arr) == 'timedelta' + + arr = np.array([n, timedelta(1)]) + assert lib.infer_dtype(arr) == 'timedelta' + + arr = np.array([n, pd.Timedelta('1 days'), n]) + assert lib.infer_dtype(arr) == 'timedelta' + + arr = np.array([n, np.timedelta64(1, 'D'), n]) + assert lib.infer_dtype(arr) == 'timedelta' + + arr = np.array([n, timedelta(1), n]) + assert lib.infer_dtype(arr) == 'timedelta' + + # different type of nat + arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], + dtype=object) + assert lib.infer_dtype(arr) == 'mixed' + + arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], + dtype=object) + assert lib.infer_dtype(arr) == 'mixed' + + def test_infer_dtype_period(self): + # GH 13664 + arr = np.array([pd.Period('2011-01', freq='D'), + pd.Period('2011-02', freq='D')]) + assert lib.infer_dtype(arr) == 'period' + + arr = np.array([pd.Period('2011-01', freq='D'), + pd.Period('2011-02', freq='M')]) + assert lib.infer_dtype(arr) == 'period' + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, pd.Period('2011-01', freq='D')]) + assert lib.infer_dtype(arr) == 'period' + + arr = np.array([n, pd.Period('2011-01', freq='D'), n]) + assert lib.infer_dtype(arr) == 'period' + + # different type of nat + arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')], + dtype=object) + assert lib.infer_dtype(arr) == 'mixed' + + arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')], + dtype=object) + assert lib.infer_dtype(arr) == 'mixed' + + @pytest.mark.parametrize( + "data", + [ + [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)], + [Timestamp("20170612"), Timestamp("20170311")], + [Timestamp("20170612", tz='US/Eastern'), + Timestamp("20170311", tz='US/Eastern')], + [date(2017, 6, 12), + Timestamp("20170311", tz='US/Eastern')], + [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], + [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)] + ] + ) + def test_infer_datetimelike_array_datetime(self, data): + assert lib.infer_datetimelike_array(data) == "datetime" + + @pytest.mark.parametrize( + "data", + [ + [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], + [timedelta(2017, 6, 12), date(2017, 3, 11)], + [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], + [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)] + ] + ) + def test_infer_datetimelike_array_timedelta(self, data): + assert lib.infer_datetimelike_array(data) == "timedelta" + + def test_infer_datetimelike_array_date(self): + arr = [date(2017, 6, 12), date(2017, 3, 11)] + assert lib.infer_datetimelike_array(arr) == "date" + + @pytest.mark.parametrize( + "data", + [ + ["2017-06-12", "2017-03-11"], + [20170612, 20170311], + [20170612.5, 20170311.8], + [Dummy(), Dummy()], + [Timestamp("20170612"), Timestamp("20170311", tz='US/Eastern')], + [Timestamp("20170612"), 20170311], + [timedelta(2017, 6, 12), Timestamp("20170311", tz='US/Eastern')] + ] + ) + def test_infer_datetimelike_array_mixed(self, data): + assert lib.infer_datetimelike_array(data) == "mixed" + + @pytest.mark.parametrize( + "first, expected", + [ + [[None], "mixed"], + [[np.nan], "mixed"], + [[pd.NaT], "nat"], + [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"], + [[np.datetime64("2017-06-12"), pd.NaT], "datetime"], + [[date(2017, 6, 12), pd.NaT], "date"], + [[timedelta(2017, 6, 12), pd.NaT], "timedelta"], + [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"] + ] + ) + @pytest.mark.parametrize("second", [None, np.nan]) + def test_infer_datetimelike_array_nan_nat_like(self, first, second, + expected): + first.append(second) + assert lib.infer_datetimelike_array(first) == expected + + def test_infer_dtype_all_nan_nat_like(self): + arr = np.array([np.nan, np.nan]) + assert lib.infer_dtype(arr) == 'floating' + + # nan and None mix are result in mixed + arr = np.array([np.nan, np.nan, None]) + assert lib.infer_dtype(arr) == 'mixed' + + arr = np.array([None, np.nan, np.nan]) + assert lib.infer_dtype(arr) == 'mixed' + + # pd.NaT + arr = np.array([pd.NaT]) + assert lib.infer_dtype(arr) == 'datetime' + + arr = np.array([pd.NaT, np.nan]) + assert lib.infer_dtype(arr) == 'datetime' + + arr = np.array([np.nan, pd.NaT]) + assert lib.infer_dtype(arr) == 'datetime' + + arr = np.array([np.nan, pd.NaT, np.nan]) + assert lib.infer_dtype(arr) == 'datetime' + + arr = np.array([None, pd.NaT, None]) + assert lib.infer_dtype(arr) == 'datetime' + + # np.datetime64(nat) + arr = np.array([np.datetime64('nat')]) + assert lib.infer_dtype(arr) == 'datetime64' + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.datetime64('nat'), n]) + assert lib.infer_dtype(arr) == 'datetime64' + + arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) + assert lib.infer_dtype(arr) == 'datetime64' + + arr = np.array([np.timedelta64('nat')], dtype=object) + assert lib.infer_dtype(arr) == 'timedelta' + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.timedelta64('nat'), n]) + assert lib.infer_dtype(arr) == 'timedelta' + + arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) + assert lib.infer_dtype(arr) == 'timedelta' + + # datetime / timedelta mixed + arr = np.array([pd.NaT, np.datetime64('nat'), + np.timedelta64('nat'), np.nan]) + assert lib.infer_dtype(arr) == 'mixed' + + arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], + dtype=object) + assert lib.infer_dtype(arr) == 'mixed' + + def test_is_datetimelike_array_all_nan_nat_like(self): + arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert not lib.is_timedelta_array(arr) + assert not lib.is_timedelta64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert lib.is_timedelta_array(arr) + assert lib.is_timedelta64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), + np.timedelta64('nat')]) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_array(arr) + assert not lib.is_timedelta64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT]) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert lib.is_timedelta_array(arr) + assert lib.is_timedelta64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, np.nan], dtype=object) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_array(arr) + assert not lib.is_timedelta64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + assert lib.is_datetime_with_singletz_array( + np.array([pd.Timestamp('20130101', tz='US/Eastern'), + pd.Timestamp('20130102', tz='US/Eastern')], + dtype=object)) + assert not lib.is_datetime_with_singletz_array( + np.array([pd.Timestamp('20130101', tz='US/Eastern'), + pd.Timestamp('20130102', tz='CET')], + dtype=object)) + + @pytest.mark.parametrize( + "func", + [ + 'is_datetime_array', + 'is_datetime64_array', + 'is_bool_array', + 'is_timedelta_array', + 'is_timedelta64_array', + 'is_timedelta_or_timedelta64_array', + 'is_date_array', + 'is_time_array', + 'is_interval_array', + 'is_period_array']) + def test_other_dtypes_for_array(self, func): + func = getattr(lib, func) + arr = np.array(['foo', 'bar']) + assert not func(arr) + + arr = np.array([1, 2]) + assert not func(arr) + + def test_date(self): + + dates = [date(2012, 1, day) for day in range(1, 20)] + index = Index(dates) + assert index.inferred_type == 'date' + + dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan] + result = lib.infer_dtype(dates) + assert result == 'mixed' + + result = lib.infer_dtype(dates, skipna=True) + assert result == 'date' + + def test_is_numeric_array(self): + + assert lib.is_float_array(np.array([1, 2.0])) + assert lib.is_float_array(np.array([1, 2.0, np.nan])) + assert not lib.is_float_array(np.array([1, 2])) + + assert lib.is_integer_array(np.array([1, 2])) + assert not lib.is_integer_array(np.array([1, 2.0])) + + def test_is_string_array(self): + + assert lib.is_string_array(np.array(['foo', 'bar'])) + assert not lib.is_string_array( + np.array(['foo', 'bar', np.nan], dtype=object), skipna=False) + assert lib.is_string_array( + np.array(['foo', 'bar', np.nan], dtype=object), skipna=True) + assert not lib.is_string_array(np.array([1, 2])) + + def test_to_object_array_tuples(self): + r = (5, 6) + values = [r] + result = lib.to_object_array_tuples(values) + + try: + # make sure record array works + from collections import namedtuple + record = namedtuple('record', 'x y') + r = record(5, 6) + values = [r] + result = lib.to_object_array_tuples(values) # noqa + except ImportError: + pass + + def test_object(self): + + # GH 7431 + # cannot infer more than this as only a single element + arr = np.array([None], dtype='O') + result = lib.infer_dtype(arr) + assert result == 'mixed' + + def test_to_object_array_width(self): + # see gh-13320 + rows = [[1, 2, 3], [4, 5, 6]] + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows, min_width=1) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array([[1, 2, 3, None, None], + [4, 5, 6, None, None]], dtype=object) + out = lib.to_object_array(rows, min_width=5) + tm.assert_numpy_array_equal(out, expected) + + def test_is_period(self): + assert lib.is_period(pd.Period('2011-01', freq='M')) + assert not lib.is_period(pd.PeriodIndex(['2011-01'], freq='M')) + assert not lib.is_period(pd.Timestamp('2011-01')) + assert not lib.is_period(1) + assert not lib.is_period(np.nan) + + def test_categorical(self): + + # GH 8974 + from pandas import Categorical, Series + arr = Categorical(list('abc')) + result = lib.infer_dtype(arr) + assert result == 'categorical' + + result = lib.infer_dtype(Series(arr)) + assert result == 'categorical' + + arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) + result = lib.infer_dtype(arr) + assert result == 'categorical' + + result = lib.infer_dtype(Series(arr)) + assert result == 'categorical' + + +class TestNumberScalar(object): + + def test_is_number(self): + + assert is_number(True) + assert is_number(1) + assert is_number(1.1) + assert is_number(1 + 3j) + assert is_number(np.bool(False)) + assert is_number(np.int64(1)) + assert is_number(np.float64(1.1)) + assert is_number(np.complex128(1 + 3j)) + assert is_number(np.nan) + + assert not is_number(None) + assert not is_number('x') + assert not is_number(datetime(2011, 1, 1)) + assert not is_number(np.datetime64('2011-01-01')) + assert not is_number(Timestamp('2011-01-01')) + assert not is_number(Timestamp('2011-01-01', tz='US/Eastern')) + assert not is_number(timedelta(1000)) + assert not is_number(Timedelta('1 days')) + + # questionable + assert not is_number(np.bool_(False)) + assert is_number(np.timedelta64(1, 'D')) + + def test_is_bool(self): + assert is_bool(True) + assert is_bool(np.bool(False)) + assert is_bool(np.bool_(False)) + + assert not is_bool(1) + assert not is_bool(1.1) + assert not is_bool(1 + 3j) + assert not is_bool(np.int64(1)) + assert not is_bool(np.float64(1.1)) + assert not is_bool(np.complex128(1 + 3j)) + assert not is_bool(np.nan) + assert not is_bool(None) + assert not is_bool('x') + assert not is_bool(datetime(2011, 1, 1)) + assert not is_bool(np.datetime64('2011-01-01')) + assert not is_bool(Timestamp('2011-01-01')) + assert not is_bool(Timestamp('2011-01-01', tz='US/Eastern')) + assert not is_bool(timedelta(1000)) + assert not is_bool(np.timedelta64(1, 'D')) + assert not is_bool(Timedelta('1 days')) + + def test_is_integer(self): + assert is_integer(1) + assert is_integer(np.int64(1)) + + assert not is_integer(True) + assert not is_integer(1.1) + assert not is_integer(1 + 3j) + assert not is_integer(np.bool(False)) + assert not is_integer(np.bool_(False)) + assert not is_integer(np.float64(1.1)) + assert not is_integer(np.complex128(1 + 3j)) + assert not is_integer(np.nan) + assert not is_integer(None) + assert not is_integer('x') + assert not is_integer(datetime(2011, 1, 1)) + assert not is_integer(np.datetime64('2011-01-01')) + assert not is_integer(Timestamp('2011-01-01')) + assert not is_integer(Timestamp('2011-01-01', tz='US/Eastern')) + assert not is_integer(timedelta(1000)) + assert not is_integer(Timedelta('1 days')) + + # questionable + assert is_integer(np.timedelta64(1, 'D')) + + def test_is_float(self): + assert is_float(1.1) + assert is_float(np.float64(1.1)) + assert is_float(np.nan) + + assert not is_float(True) + assert not is_float(1) + assert not is_float(1 + 3j) + assert not is_float(np.bool(False)) + assert not is_float(np.bool_(False)) + assert not is_float(np.int64(1)) + assert not is_float(np.complex128(1 + 3j)) + assert not is_float(None) + assert not is_float('x') + assert not is_float(datetime(2011, 1, 1)) + assert not is_float(np.datetime64('2011-01-01')) + assert not is_float(Timestamp('2011-01-01')) + assert not is_float(Timestamp('2011-01-01', tz='US/Eastern')) + assert not is_float(timedelta(1000)) + assert not is_float(np.timedelta64(1, 'D')) + assert not is_float(Timedelta('1 days')) + + def test_is_datetime_dtypes(self): + + ts = pd.date_range('20130101', periods=3) + tsa = pd.date_range('20130101', periods=3, tz='US/Eastern') + + assert is_datetime64_dtype('datetime64') + assert is_datetime64_dtype('datetime64[ns]') + assert is_datetime64_dtype(ts) + assert not is_datetime64_dtype(tsa) + + assert not is_datetime64_ns_dtype('datetime64') + assert is_datetime64_ns_dtype('datetime64[ns]') + assert is_datetime64_ns_dtype(ts) + assert is_datetime64_ns_dtype(tsa) + + assert is_datetime64_any_dtype('datetime64') + assert is_datetime64_any_dtype('datetime64[ns]') + assert is_datetime64_any_dtype(ts) + assert is_datetime64_any_dtype(tsa) + + assert not is_datetime64tz_dtype('datetime64') + assert not is_datetime64tz_dtype('datetime64[ns]') + assert not is_datetime64tz_dtype(ts) + assert is_datetime64tz_dtype(tsa) + + for tz in ['US/Eastern', 'UTC']: + dtype = 'datetime64[ns, {}]'.format(tz) + assert not is_datetime64_dtype(dtype) + assert is_datetime64tz_dtype(dtype) + assert is_datetime64_ns_dtype(dtype) + assert is_datetime64_any_dtype(dtype) + + def test_is_timedelta(self): + assert is_timedelta64_dtype('timedelta64') + assert is_timedelta64_dtype('timedelta64[ns]') + assert not is_timedelta64_ns_dtype('timedelta64') + assert is_timedelta64_ns_dtype('timedelta64[ns]') + + tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') + assert is_timedelta64_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]')) + + # Conversion to Int64Index: + assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64')) + assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]')) + + +class TestIsScalar(object): + + def test_is_scalar_builtin_scalars(self): + assert is_scalar(None) + assert is_scalar(True) + assert is_scalar(False) + assert is_scalar(0.) + assert is_scalar(np.nan) + assert is_scalar('foobar') + assert is_scalar(b'foobar') + assert is_scalar(u('efoobar')) + assert is_scalar(datetime(2014, 1, 1)) + assert is_scalar(date(2014, 1, 1)) + assert is_scalar(time(12, 0)) + assert is_scalar(timedelta(hours=1)) + assert is_scalar(pd.NaT) + + def test_is_scalar_builtin_nonscalars(self): + assert not is_scalar({}) + assert not is_scalar([]) + assert not is_scalar([1]) + assert not is_scalar(()) + assert not is_scalar((1, )) + assert not is_scalar(slice(None)) + assert not is_scalar(Ellipsis) + + def test_is_scalar_numpy_array_scalars(self): + assert is_scalar(np.int64(1)) + assert is_scalar(np.float64(1.)) + assert is_scalar(np.int32(1)) + assert is_scalar(np.object_('foobar')) + assert is_scalar(np.str_('foobar')) + assert is_scalar(np.unicode_(u('foobar'))) + assert is_scalar(np.bytes_(b'foobar')) + assert is_scalar(np.datetime64('2014-01-01')) + assert is_scalar(np.timedelta64(1, 'h')) + + def test_is_scalar_numpy_zerodim_arrays(self): + for zerodim in [np.array(1), np.array('foobar'), + np.array(np.datetime64('2014-01-01')), + np.array(np.timedelta64(1, 'h')), + np.array(np.datetime64('NaT'))]: + assert not is_scalar(zerodim) + assert is_scalar(lib.item_from_zerodim(zerodim)) + + def test_is_scalar_numpy_arrays(self): + assert not is_scalar(np.array([])) + assert not is_scalar(np.array([[]])) + assert not is_scalar(np.matrix('1; 2')) + + def test_is_scalar_pandas_scalars(self): + assert is_scalar(Timestamp('2014-01-01')) + assert is_scalar(Timedelta(hours=1)) + assert is_scalar(Period('2014-01-01')) + assert is_scalar(Interval(left=0, right=1)) + assert is_scalar(DateOffset(days=1)) + + def test_is_scalar_pandas_containers(self): + assert not is_scalar(Series()) + assert not is_scalar(Series([1])) + assert not is_scalar(DataFrame()) + assert not is_scalar(DataFrame([[1]])) + with catch_warnings(record=True): + assert not is_scalar(Panel()) + assert not is_scalar(Panel([[[1]]])) + assert not is_scalar(Index([])) + assert not is_scalar(Index([1])) + + +def test_datetimeindex_from_empty_datetime64_array(): + for unit in ['ms', 'us', 'ns']: + idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) + assert (len(idx) == 0) + + +def test_nan_to_nat_conversions(): + + df = DataFrame(dict({ + 'A': np.asarray( + lrange(10), dtype='float64'), + 'B': Timestamp('20010101') + })) + df.iloc[3:6, :] = np.nan + result = df.loc[4, 'B'].value + assert (result == tslib.iNaT) + + s = df['B'].copy() + s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) + assert (isna(s[8])) + + # numpy < 1.7.0 is wrong + from distutils.version import LooseVersion + if LooseVersion(np.__version__) >= LooseVersion('1.7.0'): + assert (s[8].value == np.datetime64('NaT').astype(np.int64)) + + +@td.skip_if_no_scipy +def test_is_scipy_sparse(spmatrix): # noqa: F811 + assert is_scipy_sparse(spmatrix([[0, 1]])) + assert not is_scipy_sparse(np.array([1])) + + +def test_ensure_int32(): + values = np.arange(10, dtype=np.int32) + result = _ensure_int32(values) + assert (result.dtype == np.int32) + + values = np.arange(10, dtype=np.int64) + result = _ensure_int32(values) + assert (result.dtype == np.int32) + + +def test_ensure_categorical(): + values = np.arange(10, dtype=np.int32) + result = _ensure_categorical(values) + assert (result.dtype == 'category') + + values = Categorical(values) + result = _ensure_categorical(values) + tm.assert_categorical_equal(result, values) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/dtypes/test_missing.py similarity index 54% rename from pandas/tests/types/test_missing.py rename to pandas/tests/dtypes/test_missing.py index cab44f1122ae1..4f208bc352c70 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest +from warnings import catch_warnings import numpy as np from datetime import datetime from pandas.util import testing as tm @@ -7,156 +9,181 @@ import pandas as pd from pandas.core import config as cf from pandas.compat import u -from pandas.tslib import iNaT + +from pandas._libs import missing as libmissing +from pandas._libs.tslib import iNaT from pandas import (NaT, Float64Index, Series, DatetimeIndex, TimedeltaIndex, date_range) -from pandas.types.dtypes import DatetimeTZDtype -from pandas.types.missing import (array_equivalent, isnull, notnull, - na_value_for_dtype) +from pandas.core.dtypes.common import is_scalar +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import ( + array_equivalent, isna, notna, isnull, notnull, + na_value_for_dtype) -def test_notnull(): - assert notnull(1.) - assert not notnull(None) - assert not notnull(np.NaN) +@pytest.mark.parametrize('notna_f', [notna, notnull]) +def test_notna_notnull(notna_f): + assert notna_f(1.) + assert not notna_f(None) + assert not notna_f(np.NaN) - with cf.option_context("mode.use_inf_as_null", False): - assert notnull(np.inf) - assert notnull(-np.inf) + with cf.option_context("mode.use_inf_as_na", False): + assert notna_f(np.inf) + assert notna_f(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) + result = notna_f(arr) assert result.all() - with cf.option_context("mode.use_inf_as_null", True): - assert not notnull(np.inf) - assert not notnull(-np.inf) + with cf.option_context("mode.use_inf_as_na", True): + assert not notna_f(np.inf) + assert not notna_f(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) + result = notna_f(arr) assert result.sum() == 2 - with cf.option_context("mode.use_inf_as_null", False): + with cf.option_context("mode.use_inf_as_na", False): for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: - assert (isinstance(isnull(s), Series)) + assert (isinstance(notna_f(s), Series)) -class TestIsNull(tm.TestCase): +class TestIsNA(object): def test_0d_array(self): - self.assertTrue(isnull(np.array(np.nan))) - self.assertFalse(isnull(np.array(0.0))) - self.assertFalse(isnull(np.array(0))) + assert isna(np.array(np.nan)) + assert not isna(np.array(0.0)) + assert not isna(np.array(0)) # test object dtype - self.assertTrue(isnull(np.array(np.nan, dtype=object))) - self.assertFalse(isnull(np.array(0.0, dtype=object))) - self.assertFalse(isnull(np.array(0, dtype=object))) - - def test_isnull(self): - self.assertFalse(isnull(1.)) - self.assertTrue(isnull(None)) - self.assertTrue(isnull(np.NaN)) - self.assertTrue(float('nan')) - self.assertFalse(isnull(np.inf)) - self.assertFalse(isnull(-np.inf)) + assert isna(np.array(np.nan, dtype=object)) + assert not isna(np.array(0.0, dtype=object)) + assert not isna(np.array(0, dtype=object)) + + def test_empty_object(self): + + for shape in [(4, 0), (4,)]: + arr = np.empty(shape=shape, dtype=object) + result = isna(arr) + expected = np.ones(shape=shape, dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('isna_f', [isna, isnull]) + def test_isna_isnull(self, isna_f): + assert not isna_f(1.) + assert isna_f(None) + assert isna_f(np.NaN) + assert float('nan') + assert not isna_f(np.inf) + assert not isna_f(-np.inf) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: - self.assertIsInstance(isnull(s), Series) + assert isinstance(isna_f(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: - result = isnull(df) - expected = df.apply(isnull) + result = isna_f(df) + expected = df.apply(isna_f) tm.assert_frame_equal(result, expected) # panel - for p in [tm.makePanel(), tm.makePeriodPanel(), - tm.add_nans(tm.makePanel())]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel_equal(result, expected) - - # panel 4d - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel4d_equal(result, expected) - - def test_isnull_lists(self): - result = isnull([[False]]) + with catch_warnings(record=True): + for p in [tm.makePanel(), tm.makePeriodPanel(), + tm.add_nans(tm.makePanel())]: + result = isna_f(p) + expected = p.apply(isna_f) + tm.assert_panel_equal(result, expected) + + def test_isna_lists(self): + result = isna([[False]]) exp = np.array([[False]]) tm.assert_numpy_array_equal(result, exp) - result = isnull([[1], [2]]) + result = isna([[1], [2]]) exp = np.array([[False], [False]]) tm.assert_numpy_array_equal(result, exp) # list of strings / unicode - result = isnull(['foo', 'bar']) + result = isna(['foo', 'bar']) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) - result = isnull([u('foo'), u('bar')]) + result = isna([u('foo'), u('bar')]) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) - def test_isnull_nat(self): - result = isnull([NaT]) + def test_isna_nat(self): + result = isna([NaT]) exp = np.array([True]) tm.assert_numpy_array_equal(result, exp) - result = isnull(np.array([NaT], dtype=object)) + result = isna(np.array([NaT], dtype=object)) exp = np.array([True]) tm.assert_numpy_array_equal(result, exp) - def test_isnull_numpy_nat(self): + def test_isna_numpy_nat(self): arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), np.datetime64('NaT', 's')]) - result = isnull(arr) + result = isna(arr) expected = np.array([True] * 4) tm.assert_numpy_array_equal(result, expected) - def test_isnull_datetime(self): - self.assertFalse(isnull(datetime.now())) - self.assertTrue(notnull(datetime.now())) + def test_isna_datetime(self): + assert not isna(datetime.now()) + assert notna(datetime.now()) idx = date_range('1/1/1990', periods=20) exp = np.ones(len(idx), dtype=bool) - tm.assert_numpy_array_equal(notnull(idx), exp) + tm.assert_numpy_array_equal(notna(idx), exp) idx = np.asarray(idx) idx[0] = iNaT idx = DatetimeIndex(idx) - mask = isnull(idx) - self.assertTrue(mask[0]) + mask = isna(idx) + assert mask[0] exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) - self.assert_numpy_array_equal(mask, exp) + tm.assert_numpy_array_equal(mask, exp) # GH 9129 pidx = idx.to_period(freq='M') - mask = isnull(pidx) - self.assertTrue(mask[0]) + mask = isna(pidx) + assert mask[0] exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) - self.assert_numpy_array_equal(mask, exp) + tm.assert_numpy_array_equal(mask, exp) - mask = isnull(pidx[1:]) + mask = isna(pidx[1:]) exp = np.zeros(len(mask), dtype=bool) - self.assert_numpy_array_equal(mask, exp) + tm.assert_numpy_array_equal(mask, exp) + + @pytest.mark.parametrize( + "value, expected", + [(np.complex128(np.nan), True), + (np.float64(1), False), + (np.array([1, 1 + 0j, np.nan, 3]), + np.array([False, False, True, False])), + (np.array([1, 1 + 0j, np.nan, 3], dtype=object), + np.array([False, False, True, False])), + (np.array([1, 1 + 0j, np.nan, 3]).astype(object), + np.array([False, False, True, False]))]) + def test_complex(self, value, expected): + result = isna(value) + if is_scalar(result): + assert result is expected + else: + tm.assert_numpy_array_equal(result, expected) def test_datetime_other_units(self): idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02']) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) - tm.assert_numpy_array_equal(isnull(idx.values), exp) - tm.assert_numpy_array_equal(notnull(idx.values), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', @@ -164,24 +191,24 @@ def test_datetime_other_units(self): values = idx.values.astype(dtype) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(values), exp) - tm.assert_numpy_array_equal(notnull(values), ~exp) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(values, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_timedelta_other_units(self): idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days']) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) - tm.assert_numpy_array_equal(isnull(idx.values), exp) - tm.assert_numpy_array_equal(notnull(idx.values), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', @@ -189,30 +216,30 @@ def test_timedelta_other_units(self): values = idx.values.astype(dtype) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(values), exp) - tm.assert_numpy_array_equal(notnull(values), ~exp) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(values, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_period(self): idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M') exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(idx) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(idx, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_array_equivalent(): @@ -301,3 +328,52 @@ def test_na_value_for_dtype(): for dtype in ['O']: assert np.isnan(na_value_for_dtype(np.dtype(dtype))) + + +class TestNAObj(object): + + _1d_methods = ['isnaobj', 'isnaobj_old'] + _2d_methods = ['isnaobj2d', 'isnaobj2d_old'] + + def _check_behavior(self, arr, expected): + for method in TestNAObj._1d_methods: + result = getattr(libmissing, method)(arr) + tm.assert_numpy_array_equal(result, expected) + + arr = np.atleast_2d(arr) + expected = np.atleast_2d(expected) + + for method in TestNAObj._2d_methods: + result = getattr(libmissing, method)(arr) + tm.assert_numpy_array_equal(result, expected) + + def test_basic(self): + arr = np.array([1, None, 'foo', -5.1, pd.NaT, np.nan]) + expected = np.array([False, True, False, False, True, True]) + + self._check_behavior(arr, expected) + + def test_non_obj_dtype(self): + arr = np.array([1, 3, np.nan, 5], dtype=float) + expected = np.array([False, False, True, False]) + + self._check_behavior(arr, expected) + + def test_empty_arr(self): + arr = np.array([]) + expected = np.array([], dtype=bool) + + self._check_behavior(arr, expected) + + def test_empty_str_inp(self): + arr = np.array([""]) # empty but not na + expected = np.array([False]) + + self._check_behavior(arr, expected) + + def test_empty_like(self): + # see gh-13717: no segfaults! + arr = np.empty_like([None]) + expected = np.array([True]) + + self._check_behavior(arr, expected) diff --git a/pandas/tests/extension/__init__.py b/pandas/tests/extension/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py new file mode 100644 index 0000000000000..27c106efd0524 --- /dev/null +++ b/pandas/tests/extension/base/__init__.py @@ -0,0 +1,50 @@ +"""Base test suite for extension arrays. + +These tests are intended for third-party libraries to subclass to validate +that their extension arrays and dtypes satisfy the interface. Moving or +renaming the tests should not be done lightly. + +Libraries are expected to implement a few pytest fixtures to provide data +for the tests. The fixtures may be located in either + +* The same module as your test class. +* A ``conftest.py`` in the same directory as your test class. + +The full list of fixtures may be found in the ``conftest.py`` next to this +file. + +.. code-block:: python + + import pytest + from pandas.tests.extension.base import BaseDtypeTests + + + @pytest.fixture + def dtype(): + return MyDtype() + + + class TestMyDtype(BaseDtypeTests): + pass + + +Your class ``TestDtype`` will inherit all the tests defined on +``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype`` +wherever the test requires it. You're free to implement additional tests. + +All the tests in these modules use ``self.assert_frame_equal`` or +``self.assert_series_equal`` for dataframe or series comparisons. By default, +they use the usual ``pandas.testing.assert_frame_equal`` and +``pandas.testing.assert_series_equal``. You can override the checks used +by defining the staticmethods ``assert_frame_equal`` and +``assert_series_equal`` on your base test class. + +""" +from .casting import BaseCastingTests # noqa +from .constructors import BaseConstructorsTests # noqa +from .dtype import BaseDtypeTests # noqa +from .getitem import BaseGetitemTests # noqa +from .interface import BaseInterfaceTests # noqa +from .methods import BaseMethodsTests # noqa +from .missing import BaseMissingTests # noqa +from .reshaping import BaseReshapingTests # noqa diff --git a/pandas/tests/extension/base/base.py b/pandas/tests/extension/base/base.py new file mode 100644 index 0000000000000..d29587e635ebd --- /dev/null +++ b/pandas/tests/extension/base/base.py @@ -0,0 +1,6 @@ +import pandas.util.testing as tm + + +class BaseExtensionTests(object): + assert_series_equal = staticmethod(tm.assert_series_equal) + assert_frame_equal = staticmethod(tm.assert_frame_equal) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py new file mode 100644 index 0000000000000..adc690939b36c --- /dev/null +++ b/pandas/tests/extension/base/casting.py @@ -0,0 +1,13 @@ +import pandas as pd +from pandas.core.internals import ObjectBlock + +from .base import BaseExtensionTests + + +class BaseCastingTests(BaseExtensionTests): + """Casting to and from ExtensionDtypes""" + + def test_astype_object_series(self, all_data): + ser = pd.Series({"A": all_data}) + result = ser.astype(object) + assert isinstance(result._data.blocks[0], ObjectBlock) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py new file mode 100644 index 0000000000000..4ac04d71338fd --- /dev/null +++ b/pandas/tests/extension/base/constructors.py @@ -0,0 +1,50 @@ +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.core.internals import ExtensionBlock + +from .base import BaseExtensionTests + + +class BaseConstructorsTests(BaseExtensionTests): + + def test_array_from_scalars(self, data): + scalars = [data[0], data[1], data[2]] + result = data._constructor_from_sequence(scalars) + assert isinstance(result, type(data)) + + def test_series_constructor(self, data): + result = pd.Series(data) + assert result.dtype == data.dtype + assert len(result) == len(data) + assert isinstance(result._data.blocks[0], ExtensionBlock) + assert result._data.blocks[0].values is data + + # Series[EA] is unboxed / boxed correctly + result2 = pd.Series(result) + assert result2.dtype == data.dtype + assert isinstance(result2._data.blocks[0], ExtensionBlock) + + @pytest.mark.parametrize("from_series", [True, False]) + def test_dataframe_constructor_from_dict(self, data, from_series): + if from_series: + data = pd.Series(data) + result = pd.DataFrame({"A": data}) + assert result.dtypes['A'] == data.dtype + assert result.shape == (len(data), 1) + assert isinstance(result._data.blocks[0], ExtensionBlock) + + def test_dataframe_from_series(self, data): + result = pd.DataFrame(pd.Series(data)) + assert result.dtypes[0] == data.dtype + assert result.shape == (len(data), 1) + assert isinstance(result._data.blocks[0], ExtensionBlock) + + @pytest.mark.xfail(reason="GH-19342") + def test_series_given_mismatched_index_raises(self, data): + msg = 'Wrong number of items passed 3, placement implies 4' + with tm.assert_raises_regex(ValueError, None) as m: + pd.Series(data[:3], index=[0, 1, 2, 3, 4]) + + assert m.match(msg) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py new file mode 100644 index 0000000000000..63d3d807c270c --- /dev/null +++ b/pandas/tests/extension/base/dtype.py @@ -0,0 +1,48 @@ +import numpy as np +import pandas as pd + +from .base import BaseExtensionTests + + +class BaseDtypeTests(BaseExtensionTests): + """Base class for ExtensionDtype classes""" + + def test_name(self, dtype): + assert isinstance(dtype.name, str) + + def test_kind(self, dtype): + valid = set('biufcmMOSUV') + if dtype.kind is not None: + assert dtype.kind in valid + + def test_construct_from_string_own_name(self, dtype): + result = dtype.construct_from_string(dtype.name) + assert type(result) is type(dtype) + + # check OK as classmethod + result = type(dtype).construct_from_string(dtype.name) + assert type(result) is type(dtype) + + def test_is_dtype_from_name(self, dtype): + result = type(dtype).is_dtype(dtype.name) + assert result is True + + def test_is_dtype_unboxes_dtype(self, data, dtype): + assert dtype.is_dtype(data) is True + + def test_is_dtype_from_self(self, dtype): + result = type(dtype).is_dtype(dtype) + assert result is True + + def test_is_not_string_type(self, dtype): + return not pd.api.types.is_string_dtype(dtype) + + def test_is_not_object_type(self, dtype): + return not pd.api.types.is_object_dtype(dtype) + + def test_eq_with_str(self, dtype): + assert dtype == dtype.name + assert dtype != dtype.name + '-suffix' + + def test_eq_with_numpy_object(self, dtype): + assert dtype != np.dtype('object') diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py new file mode 100644 index 0000000000000..566ba1721d13c --- /dev/null +++ b/pandas/tests/extension/base/getitem.py @@ -0,0 +1,122 @@ +import numpy as np + +import pandas as pd + +from .base import BaseExtensionTests + + +class BaseGetitemTests(BaseExtensionTests): + """Tests for ExtensionArray.__getitem__.""" + + def test_iloc_series(self, data): + ser = pd.Series(data) + result = ser.iloc[:4] + expected = pd.Series(data[:4]) + self.assert_series_equal(result, expected) + + result = ser.iloc[[0, 1, 2, 3]] + self.assert_series_equal(result, expected) + + def test_iloc_frame(self, data): + df = pd.DataFrame({"A": data, 'B': + np.arange(len(data), dtype='int64')}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.iloc[:4, [0]] + self.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.iloc[[0, 1, 2, 3], [0]] + self.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name='A') + + # slice -> series + result = df.iloc[:4, 0] + self.assert_series_equal(result, expected) + + # sequence -> series + result = df.iloc[:4, 0] + self.assert_series_equal(result, expected) + + def test_loc_series(self, data): + ser = pd.Series(data) + result = ser.loc[:3] + expected = pd.Series(data[:4]) + self.assert_series_equal(result, expected) + + result = ser.loc[[0, 1, 2, 3]] + self.assert_series_equal(result, expected) + + def test_loc_frame(self, data): + df = pd.DataFrame({"A": data, + 'B': np.arange(len(data), dtype='int64')}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.loc[:3, ['A']] + self.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.loc[[0, 1, 2, 3], ['A']] + self.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name='A') + + # slice -> series + result = df.loc[:3, 'A'] + self.assert_series_equal(result, expected) + + # sequence -> series + result = df.loc[:3, 'A'] + self.assert_series_equal(result, expected) + + def test_getitem_scalar(self, data): + result = data[0] + assert isinstance(result, data.dtype.type) + + result = pd.Series(data)[0] + assert isinstance(result, data.dtype.type) + + def test_getitem_scalar_na(self, data_missing, na_cmp, na_value): + result = data_missing[0] + assert na_cmp(result, na_value) + + def test_getitem_mask(self, data): + # Empty mask, raw array + mask = np.zeros(len(data), dtype=bool) + result = data[mask] + assert len(result) == 0 + assert isinstance(result, type(data)) + + # Empty mask, in series + mask = np.zeros(len(data), dtype=bool) + result = pd.Series(data)[mask] + assert len(result) == 0 + assert result.dtype == data.dtype + + # non-empty mask, raw array + mask[0] = True + result = data[mask] + assert len(result) == 1 + assert isinstance(result, type(data)) + + # non-empty mask, in series + result = pd.Series(data)[mask] + assert len(result) == 1 + assert result.dtype == data.dtype + + def test_getitem_slice(self, data): + # getitem[slice] should return an array + result = data[slice(0)] # empty + assert isinstance(result, type(data)) + + result = data[slice(1)] # scalar + assert isinstance(result, type(data)) + + def test_take_sequence(self, data): + result = pd.Series(data)[[0, 1, 3]] + assert result.iloc[0] == data[0] + assert result.iloc[1] == data[1] + assert result.iloc[2] == data[3] diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py new file mode 100644 index 0000000000000..e1596f0675f32 --- /dev/null +++ b/pandas/tests/extension/base/interface.py @@ -0,0 +1,55 @@ +import numpy as np + +import pandas as pd +from pandas.compat import StringIO +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype + +from .base import BaseExtensionTests + + +class BaseInterfaceTests(BaseExtensionTests): + """Tests that the basic interface is satisfied.""" + # ------------------------------------------------------------------------ + # Interface + # ------------------------------------------------------------------------ + + def test_len(self, data): + assert len(data) == 100 + + def test_ndim(self, data): + assert data.ndim == 1 + + def test_can_hold_na_valid(self, data): + assert data._can_hold_na in {True, False} + + def test_memory_usage(self, data): + s = pd.Series(data) + result = s.memory_usage(index=False) + assert result == s.nbytes + + def test_array_interface(self, data): + result = np.array(data) + assert result[0] == data[0] + + def test_as_ndarray_with_dtype_kind(self, data): + np.array(data, dtype=data.dtype.kind) + + def test_repr(self, data): + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + + df = pd.DataFrame({"A": data}) + repr(df) + + def test_dtype_name_in_info(self, data): + buf = StringIO() + pd.DataFrame({"A": data}).info(buf=buf) + result = buf.getvalue() + assert data.dtype.name in result + + def test_is_extension_array_dtype(self, data): + assert is_extension_array_dtype(data) + assert is_extension_array_dtype(data.dtype) + assert is_extension_array_dtype(pd.Series(data)) + assert isinstance(data.dtype, ExtensionDtype) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py new file mode 100644 index 0000000000000..74e5d180b1aa3 --- /dev/null +++ b/pandas/tests/extension/base/methods.py @@ -0,0 +1,33 @@ +import pytest +import numpy as np + +import pandas as pd + +from .base import BaseExtensionTests + + +class BaseMethodsTests(BaseExtensionTests): + """Various Series and DataFrame methods.""" + + @pytest.mark.parametrize('dropna', [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + def test_count(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + result = df.count(axis='columns') + expected = pd.Series([0, 1]) + self.assert_series_equal(result, expected) + + def test_apply_simple_series(self, data): + result = pd.Series(data).apply(id) + assert isinstance(result, pd.Series) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py new file mode 100644 index 0000000000000..3ae82fa1ca432 --- /dev/null +++ b/pandas/tests/extension/base/missing.py @@ -0,0 +1,47 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + +from .base import BaseExtensionTests + + +class BaseMissingTests(BaseExtensionTests): + def test_isna(self, data_missing): + if data_missing._can_hold_na: + expected = np.array([True, False]) + else: + expected = np.array([False, False]) + + result = pd.isna(data_missing) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(data_missing).isna() + expected = pd.Series(expected) + self.assert_series_equal(result, expected) + + def test_dropna_series(self, data_missing): + ser = pd.Series(data_missing) + result = ser.dropna() + expected = ser.iloc[[1]] + self.assert_series_equal(result, expected) + + def test_dropna_frame(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + + # defaults + result = df.dropna() + expected = df.iloc[[1]] + self.assert_frame_equal(result, expected) + + # axis = 1 + result = df.dropna(axis='columns') + expected = pd.DataFrame(index=[0, 1]) + self.assert_frame_equal(result, expected) + + # multiple + df = pd.DataFrame({"A": data_missing, + "B": [1, np.nan]}) + result = df.dropna() + expected = df.iloc[:0] + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py new file mode 100644 index 0000000000000..cfb70f2291555 --- /dev/null +++ b/pandas/tests/extension/base/reshaping.py @@ -0,0 +1,62 @@ +import pytest + +import pandas as pd +from pandas.core.internals import ExtensionBlock + +from .base import BaseExtensionTests + + +class BaseReshapingTests(BaseExtensionTests): + """Tests for reshaping and concatenation.""" + @pytest.mark.parametrize('in_frame', [True, False]) + def test_concat(self, data, in_frame): + wrapped = pd.Series(data) + if in_frame: + wrapped = pd.DataFrame(wrapped) + result = pd.concat([wrapped, wrapped], ignore_index=True) + + assert len(result) == len(data) * 2 + + if in_frame: + dtype = result.dtypes[0] + else: + dtype = result.dtype + + assert dtype == data.dtype + assert isinstance(result._data.blocks[0], ExtensionBlock) + + def test_align(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.Series(type(data)(list(a) + [na_value])) + e2 = pd.Series(type(data)([na_value] + list(b))) + self.assert_series_equal(r1, e1) + self.assert_series_equal(r2, e2) + + def test_align_frame(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.DataFrame({'A': a}).align( + pd.DataFrame({'A': b}, index=[1, 2, 3]) + ) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])}) + e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))}) + self.assert_frame_equal(r1, e1) + self.assert_frame_equal(r2, e2) + + def test_set_frame_expand_regular_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + df['B'] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + self.assert_frame_equal(df, expected) + + def test_set_frame_expand_extension_with_regular(self, data): + df = pd.DataFrame({'A': data}) + df['B'] = [1] * len(data) + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + self.assert_frame_equal(df, expected) diff --git a/pandas/tests/extension/category/__init__.py b/pandas/tests/extension/category/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py new file mode 100644 index 0000000000000..8f413b4a19730 --- /dev/null +++ b/pandas/tests/extension/category/test_categorical.py @@ -0,0 +1,84 @@ +import string + +import pytest +import numpy as np + +from pandas.api.types import CategoricalDtype +from pandas import Categorical +from pandas.tests.extension import base + + +def make_data(): + return np.random.choice(list(string.ascii_letters), size=100) + + +@pytest.fixture +def dtype(): + return CategoricalDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + return Categorical(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return Categorical([np.nan, 'A']) + + +@pytest.fixture +def na_value(): + return np.nan + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + @pytest.mark.skip(reason="Memory usage doesn't match") + def test_memory_usage(self): + # Is this deliberate? + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + @pytest.mark.skip(reason="Unobserved categories preseved in concat.") + def test_align(self, data, na_value): + pass + + @pytest.mark.skip(reason="Unobserved categories preseved in concat.") + def test_align_frame(self, data, na_value): + pass + + +class TestGetitem(base.BaseGetitemTests): + @pytest.mark.skip(reason="Backwards compatibility") + def test_getitem_scalar(self): + # CategoricalDtype.type isn't "correct" since it should + # be a parent of the elements (object). But don't want + # to break things by changing. + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + pass + + @pytest.mark.skip(reason="Unobserved categories included") + def test_value_counts(self, all_data, dropna): + pass + + +class TestCasting(base.BaseCastingTests): + pass diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py new file mode 100644 index 0000000000000..21ed8894e8ebb --- /dev/null +++ b/pandas/tests/extension/conftest.py @@ -0,0 +1,48 @@ +import operator + +import pytest + + +@pytest.fixture +def dtype(): + """A fixture providing the ExtensionDtype to validate.""" + raise NotImplementedError + + +@pytest.fixture +def data(): + """Length-100 array for this type.""" + raise NotImplementedError + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid]""" + raise NotImplementedError + + +@pytest.fixture(params=['data', 'data_missing']) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == 'data': + return data + elif request.param == 'data_missing': + return data_missing + + +@pytest.fixture +def na_cmp(): + """Binary operator for comparing NA values. + + Should return a function of two arguments that returns + True if both arguments are (scalar) NA for your type. + + By default, uses ``operator.or`` + """ + return operator.is_ + + +@pytest.fixture +def na_value(): + """The scalar missing value for this type. Default 'None'""" + return None diff --git a/pandas/tests/extension/decimal/__init__.py b/pandas/tests/extension/decimal/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py new file mode 100644 index 0000000000000..736556e4be20d --- /dev/null +++ b/pandas/tests/extension/decimal/array.py @@ -0,0 +1,92 @@ +import decimal +import numbers +import random +import sys + +import numpy as np + +import pandas as pd +from pandas.core.arrays import ExtensionArray +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.common import _ensure_platform_int + + +class DecimalDtype(ExtensionDtype): + type = decimal.Decimal + name = 'decimal' + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + +class DecimalArray(ExtensionArray): + dtype = DecimalDtype() + + def __init__(self, values): + values = np.asarray(values, dtype=object) + + self.values = values + + @classmethod + def _constructor_from_sequence(cls, scalars): + return cls(scalars) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.values[item] + else: + return type(self)(self.values[item]) + + def copy(self, deep=False): + if deep: + return type(self)(self.values.copy()) + return type(self)(self) + + def __setitem__(self, key, value): + if pd.api.types.is_list_like(value): + value = [decimal.Decimal(v) for v in value] + else: + value = decimal.Decimal(value) + self.values[key] = value + + def __len__(self): + return len(self.values) + + def __repr__(self): + return repr(self.values) + + @property + def nbytes(self): + n = len(self) + if n: + return n * sys.getsizeof(self[0]) + return 0 + + def isna(self): + return np.array([x.is_nan() for x in self.values]) + + def take(self, indexer, allow_fill=True, fill_value=None): + mask = indexer == -1 + + indexer = _ensure_platform_int(indexer) + out = self.values.take(indexer) + out[mask] = self._na_value + + return type(self)(out) + + @property + def _na_value(self): + return decimal.Decimal('NaN') + + @classmethod + def _concat_same_type(cls, to_concat): + return cls(np.concatenate([x.values for x in to_concat])) + + +def make_data(): + return [decimal.Decimal(random.random()) for _ in range(100)] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py new file mode 100644 index 0000000000000..7b4d079ecad87 --- /dev/null +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -0,0 +1,154 @@ +import decimal + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest + +from pandas.tests.extension import base + +from .array import DecimalDtype, DecimalArray, make_data + + +@pytest.fixture +def dtype(): + return DecimalDtype() + + +@pytest.fixture +def data(): + return DecimalArray(make_data()) + + +@pytest.fixture +def data_missing(): + return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)]) + + +@pytest.fixture +def na_cmp(): + return lambda x, y: x.is_nan() and y.is_nan() + + +@pytest.fixture +def na_value(): + return decimal.Decimal("NaN") + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + + def test_align(self, data, na_value): + # Have to override since assert_series_equal doesn't + # compare Decimal(NaN) properly. + a = data[:3] + b = data[2:5] + r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) + + # NaN handling + e1 = pd.Series(type(data)(list(a) + [na_value])) + e2 = pd.Series(type(data)([na_value] + list(b))) + tm.assert_series_equal(r1.iloc[:3], e1.iloc[:3]) + assert r1[3].is_nan() + assert e1[3].is_nan() + + tm.assert_series_equal(r2.iloc[1:], e2.iloc[1:]) + assert r2[0].is_nan() + assert e2[0].is_nan() + + def test_align_frame(self, data, na_value): + # Override for Decimal(NaN) comparison + a = data[:3] + b = data[2:5] + r1, r2 = pd.DataFrame({'A': a}).align( + pd.DataFrame({'A': b}, index=[1, 2, 3]) + ) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])}) + e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))}) + + tm.assert_frame_equal(r1.iloc[:3], e1.iloc[:3]) + assert r1.loc[3, 'A'].is_nan() + assert e1.loc[3, 'A'].is_nan() + + tm.assert_frame_equal(r2.iloc[1:], e2.iloc[1:]) + assert r2.loc[0, 'A'].is_nan() + assert e2.loc[0, 'A'].is_nan() + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize('dropna', [True, False]) + @pytest.mark.xfail(reason="value_counts not implemented yet.") + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + tm.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + pass + + +def test_series_constructor_coerce_data_to_extension_dtype_raises(): + xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the " + "extension array directly.") + with tm.assert_raises_regex(ValueError, xpr): + pd.Series([0, 1, 2], dtype=DecimalDtype()) + + +def test_series_constructor_with_same_dtype_ok(): + arr = DecimalArray([decimal.Decimal('10.0')]) + result = pd.Series(arr, dtype=DecimalDtype()) + expected = pd.Series(arr) + tm.assert_series_equal(result, expected) + + +def test_series_constructor_coerce_extension_array_to_dtype_raises(): + arr = DecimalArray([decimal.Decimal('10.0')]) + xpr = "Cannot specify a dtype 'int64' .* \('decimal'\)." + + with tm.assert_raises_regex(ValueError, xpr): + pd.Series(arr, dtype='int64') + + +def test_dataframe_constructor_with_same_dtype_ok(): + arr = DecimalArray([decimal.Decimal('10.0')]) + + result = pd.DataFrame({"A": arr}, dtype=DecimalDtype()) + expected = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, expected) + + +def test_dataframe_constructor_with_different_dtype_raises(): + arr = DecimalArray([decimal.Decimal('10.0')]) + + xpr = "Cannot coerce extension array to dtype 'int64'. " + with tm.assert_raises_regex(ValueError, xpr): + pd.DataFrame({"A": arr}, dtype='int64') diff --git a/pandas/tests/extension/json/__init__.py b/pandas/tests/extension/json/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py new file mode 100644 index 0000000000000..21addf9d1549f --- /dev/null +++ b/pandas/tests/extension/json/array.py @@ -0,0 +1,105 @@ +import collections +import itertools +import numbers +import random +import string +import sys + +import numpy as np + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.arrays import ExtensionArray + + +class JSONDtype(ExtensionDtype): + type = collections.Mapping + name = 'json' + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + +class JSONArray(ExtensionArray): + dtype = JSONDtype() + + def __init__(self, values): + for val in values: + if not isinstance(val, self.dtype.type): + raise TypeError + self.data = values + + @classmethod + def _constructor_from_sequence(cls, scalars): + return cls(scalars) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.data[item] + elif isinstance(item, np.ndarray) and item.dtype == 'bool': + return self._constructor_from_sequence([ + x for x, m in zip(self, item) if m + ]) + else: + return type(self)(self.data[item]) + + def __setitem__(self, key, value): + if isinstance(key, numbers.Integral): + self.data[key] = value + else: + if not isinstance(value, (type(self), + collections.Sequence)): + # broadcast value + value = itertools.cycle([value]) + + if isinstance(key, np.ndarray) and key.dtype == 'bool': + # masking + for i, (k, v) in enumerate(zip(key, value)): + if k: + assert isinstance(v, self.dtype.type) + self.data[i] = v + else: + for k, v in zip(key, value): + assert isinstance(v, self.dtype.type) + self.data[k] = v + + def __len__(self): + return len(self.data) + + def __repr__(self): + return 'JSONArary({!r})'.format(self.data) + + @property + def nbytes(self): + return sys.getsizeof(self.data) + + def isna(self): + return np.array([x == self._na_value for x in self.data]) + + def take(self, indexer, allow_fill=True, fill_value=None): + output = [self.data[loc] if loc != -1 else self._na_value + for loc in indexer] + return self._constructor_from_sequence(output) + + def copy(self, deep=False): + return type(self)(self.data[:]) + + @property + def _na_value(self): + return {} + + @classmethod + def _concat_same_type(cls, to_concat): + data = list(itertools.chain.from_iterable([x.data for x in to_concat])) + return cls(data) + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + return [collections.UserDict([ + (random.choice(string.ascii_letters), random.randint(0, 100)) + for _ in range(random.randint(0, 10))]) for _ in range(100)] diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py new file mode 100644 index 0000000000000..e0721bb1d8d1a --- /dev/null +++ b/pandas/tests/extension/json/test_json.py @@ -0,0 +1,73 @@ +import operator +import sys + +import pytest + + +from pandas.tests.extension import base + +from .array import JSONArray, JSONDtype, make_data + +pytestmark = pytest.mark.skipif(sys.version_info[0] == 2, + reason="Py2 doesn't have a UserDict") + + +@pytest.fixture +def dtype(): + return JSONDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + return JSONArray(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return JSONArray([{}, {'a': 10}]) + + +@pytest.fixture +def na_value(): + return {} + + +@pytest.fixture +def na_cmp(): + return operator.eq + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.skip(reason="Unhashable") + def test_value_counts(self, all_data, dropna): + pass + + +class TestCasting(base.BaseCastingTests): + pass diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py new file mode 100644 index 0000000000000..1f4582f687415 --- /dev/null +++ b/pandas/tests/extension/test_common.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.core.arrays import ExtensionArray +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype + + +class DummyDtype(ExtensionDtype): + pass + + +class DummyArray(ExtensionArray): + + def __init__(self, data): + self.data = data + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return self.data.dtype + + +class TestExtensionArrayDtype(object): + + @pytest.mark.parametrize('values', [ + pd.Categorical([]), + pd.Categorical([]).dtype, + pd.Series(pd.Categorical([])), + DummyDtype(), + DummyArray(np.array([1, 2])), + ]) + def test_is_extension_array_dtype(self, values): + assert is_extension_array_dtype(values) + + @pytest.mark.parametrize('values', [ + np.array([]), + pd.Series(np.array([])), + ]) + def test_is_not_extension_array_dtype(self, values): + assert not is_extension_array_dtype(values) + + +def test_astype(): + + arr = DummyArray(np.array([1, 2, 3])) + expected = np.array([1, 2, 3], dtype=object) + + result = arr.astype(object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype('object') + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_no_copy(): + arr = DummyArray(np.array([1, 2, 3], dtype=np.int64)) + result = arr.astype(arr.dtype, copy=False) + + assert arr.data is result + + result = arr.astype(arr.dtype) + assert arr.data is not result diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py new file mode 100644 index 0000000000000..991da41168aa0 --- /dev/null +++ b/pandas/tests/extension/test_external_block.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# pylint: disable=W0102 + +import numpy as np + +import pandas as pd +from pandas.core.internals import ( + BlockManager, SingleBlockManager, NonConsolidatableMixIn, Block) + +import pytest + + +class CustomBlock(NonConsolidatableMixIn, Block): + + _holder = np.ndarray + + def formatting_values(self): + return np.array(["Val: {}".format(i) for i in self.values]) + + def concat_same_type(self, to_concat, placement=None): + """ + Always concatenate disregarding self.ndim as the values are + always 1D in this custom Block + """ + values = np.concatenate([blk.values for blk in to_concat]) + return self.make_block_same_class( + values, placement=placement or slice(0, len(values), 1)) + + +@pytest.fixture +def df(): + df1 = pd.DataFrame({'a': [1, 2, 3]}) + blocks = df1._data.blocks + values = np.arange(3, dtype='int64') + custom_block = CustomBlock(values, placement=slice(1, 2)) + blocks = blocks + (custom_block,) + block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df1.index]) + return pd.DataFrame(block_manager) + + +def test_custom_repr(): + values = np.arange(3, dtype='int64') + + # series + block = CustomBlock(values, placement=slice(0, 3)) + + s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3))) + assert repr(s) == '0 Val: 0\n1 Val: 1\n2 Val: 2\ndtype: int64' + + # dataframe + block = CustomBlock(values, placement=slice(0, 1)) + blk_mgr = BlockManager([block], [['col'], range(3)]) + df = pd.DataFrame(blk_mgr) + assert repr(df) == ' col\n0 Val: 0\n1 Val: 1\n2 Val: 2' + + +def test_concat_series(): + # GH17728 + values = np.arange(3, dtype='int64') + block = CustomBlock(values, placement=slice(0, 3)) + s = pd.Series(block, pd.RangeIndex(3), fastpath=True) + + res = pd.concat([s, s]) + assert isinstance(res._data.blocks[0], CustomBlock) + + +def test_concat_dataframe(df): + # GH17728 + res = pd.concat([df, df]) + assert isinstance(res._data.blocks[1], CustomBlock) + + +def test_concat_axis1(df): + # GH17954 + df2 = pd.DataFrame({'c': [.1, .2, .3]}) + res = pd.concat([df, df2], axis=1) + assert isinstance(res._data.blocks[1], CustomBlock) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py deleted file mode 100644 index 476c6a636ae5a..0000000000000 --- a/pandas/tests/formats/test_format.py +++ /dev/null @@ -1,4985 +0,0 @@ -# -*- coding: utf-8 -*- - -# TODO(wesm): lots of issues making flake8 hard -# flake8: noqa - -from __future__ import print_function -from distutils.version import LooseVersion -import re - -from pandas.compat import (range, zip, lrange, StringIO, PY3, - u, lzip, is_platform_windows, - is_platform_32bit) -import pandas.compat as compat -import itertools -from operator import methodcaller -import os -import sys -from textwrap import dedent -import warnings - -from numpy import nan -from numpy.random import randn -import numpy as np - -import codecs - -div_style = '' -try: - import IPython - if IPython.__version__ < LooseVersion('3.0.0'): - div_style = ' style="max-width:1500px;overflow:auto;"' -except (ImportError, AttributeError): - pass - -from pandas import DataFrame, Series, Index, Timestamp, MultiIndex, date_range, NaT - -import pandas.formats.format as fmt -import pandas.util.testing as tm -import pandas.core.common as com -import pandas.formats.printing as printing -from pandas.util.terminal import get_terminal_size -import pandas as pd -from pandas.core.config import (set_option, get_option, option_context, - reset_option) -from datetime import datetime - -import pytest - -use_32bit_repr = is_platform_windows() or is_platform_32bit() - -_frame = DataFrame(tm.getSeriesData()) - - -def curpath(): - pth, _ = os.path.split(os.path.abspath(__file__)) - return pth - - -def has_info_repr(df): - r = repr(df) - c1 = r.split('\n')[0].startswith(", 2. Index, 3. Columns, 4. dtype, 5. memory usage, 6. trailing newline - return has_info and nv - - -def has_horizontally_truncated_repr(df): - try: # Check header row - fst_line = np.array(repr(df).splitlines()[0].split()) - cand_col = np.where(fst_line == '...')[0][0] - except: - return False - # Make sure each row has this ... in the same place - r = repr(df) - for ix, l in enumerate(r.splitlines()): - if not r.split()[cand_col] == '...': - return False - return True - - -def has_vertically_truncated_repr(df): - r = repr(df) - only_dot_row = False - for row in r.splitlines(): - if re.match(r'^[\.\ ]+$', row): - only_dot_row = True - return only_dot_row - - -def has_truncated_repr(df): - return has_horizontally_truncated_repr( - df) or has_vertically_truncated_repr(df) - - -def has_doubly_truncated_repr(df): - return has_horizontally_truncated_repr( - df) and has_vertically_truncated_repr(df) - - -def has_expanded_repr(df): - r = repr(df) - for line in r.split('\n'): - if line.endswith('\\'): - return True - return False - - -class TestDataFrameFormatting(tm.TestCase): - - def setUp(self): - self.warn_filters = warnings.filters - warnings.filterwarnings('ignore', category=FutureWarning, - module=".*format") - - self.frame = _frame.copy() - - def tearDown(self): - warnings.filters = self.warn_filters - - def test_repr_embedded_ndarray(self): - arr = np.empty(10, dtype=[('err', object)]) - for i in range(len(arr)): - arr['err'][i] = np.random.randn(i) - - df = DataFrame(arr) - repr(df['err']) - repr(df) - df.to_string() - - def test_eng_float_formatter(self): - self.frame.loc[5] = 0 - - fmt.set_eng_float_format() - repr(self.frame) - - fmt.set_eng_float_format(use_eng_prefix=True) - repr(self.frame) - - fmt.set_eng_float_format(accuracy=0) - repr(self.frame) - self.reset_display_options() - - def test_show_null_counts(self): - - df = DataFrame(1, columns=range(10), index=range(10)) - df.iloc[1, 1] = np.nan - - def check(null_counts, result): - buf = StringIO() - df.info(buf=buf, null_counts=null_counts) - self.assertTrue(('non-null' in buf.getvalue()) is result) - - with option_context('display.max_info_rows', 20, - 'display.max_info_columns', 20): - check(None, True) - check(True, True) - check(False, False) - - with option_context('display.max_info_rows', 5, - 'display.max_info_columns', 5): - check(None, False) - check(True, False) - check(False, False) - - def test_repr_tuples(self): - buf = StringIO() - - df = DataFrame({'tups': lzip(range(10), range(10))}) - repr(df) - df.to_string(col_space=10, buf=buf) - - def test_repr_truncation(self): - max_len = 20 - with option_context("display.max_colwidth", max_len): - df = DataFrame({'A': np.random.randn(10), - 'B': [tm.rands(np.random.randint( - max_len - 1, max_len + 1)) for i in range(10) - ]}) - r = repr(df) - r = r[r.find('\n') + 1:] - - adj = fmt._get_adjustment() - - for line, value in lzip(r.split('\n'), df['B']): - if adj.len(value) + 1 > max_len: - self.assertIn('...', line) - else: - self.assertNotIn('...', line) - - with option_context("display.max_colwidth", 999999): - self.assertNotIn('...', repr(df)) - - with option_context("display.max_colwidth", max_len + 2): - self.assertNotIn('...', repr(df)) - - def test_repr_chop_threshold(self): - df = DataFrame([[0.1, 0.5], [0.5, -0.1]]) - pd.reset_option("display.chop_threshold") # default None - self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') - - with option_context("display.chop_threshold", 0.2): - self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0') - - with option_context("display.chop_threshold", 0.6): - self.assertEqual(repr(df), ' 0 1\n0 0.0 0.0\n1 0.0 0.0') - - with option_context("display.chop_threshold", None): - self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') - - def test_repr_obeys_max_seq_limit(self): - with option_context("display.max_seq_items", 2000): - self.assertTrue(len(printing.pprint_thing(lrange(1000))) > 1000) - - with option_context("display.max_seq_items", 5): - self.assertTrue(len(printing.pprint_thing(lrange(1000))) < 100) - - def test_repr_set(self): - self.assertEqual(printing.pprint_thing(set([1])), '{1}') - - def test_repr_is_valid_construction_code(self): - # for the case of Index, where the repr is traditional rather then - # stylized - idx = Index(['a', 'b']) - res = eval("pd." + repr(idx)) - tm.assert_series_equal(Series(res), Series(idx)) - - def test_repr_should_return_str(self): - # http://docs.python.org/py3k/reference/datamodel.html#object.__repr__ - # http://docs.python.org/reference/datamodel.html#object.__repr__ - # "...The return value must be a string object." - - # (str on py2.x, str (unicode) on py3) - - data = [8, 5, 3, 5] - index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] - cols = [u("\u03c8")] - df = DataFrame(data, columns=cols, index=index1) - self.assertTrue(type(df.__repr__()) == str) # both py2 / 3 - - def test_repr_no_backslash(self): - with option_context('mode.sim_interactive', True): - df = DataFrame(np.random.randn(10, 4)) - self.assertTrue('\\' not in repr(df)) - - def test_expand_frame_repr(self): - df_small = DataFrame('hello', [0], [0]) - df_wide = DataFrame('hello', [0], lrange(10)) - df_tall = DataFrame('hello', lrange(30), lrange(5)) - - with option_context('mode.sim_interactive', True): - with option_context('display.max_columns', 10, 'display.width', 20, - 'display.max_rows', 20, - 'display.show_dimensions', True): - with option_context('display.expand_frame_repr', True): - self.assertFalse(has_truncated_repr(df_small)) - self.assertFalse(has_expanded_repr(df_small)) - self.assertFalse(has_truncated_repr(df_wide)) - self.assertTrue(has_expanded_repr(df_wide)) - self.assertTrue(has_vertically_truncated_repr(df_tall)) - self.assertTrue(has_expanded_repr(df_tall)) - - with option_context('display.expand_frame_repr', False): - self.assertFalse(has_truncated_repr(df_small)) - self.assertFalse(has_expanded_repr(df_small)) - self.assertFalse(has_horizontally_truncated_repr(df_wide)) - self.assertFalse(has_expanded_repr(df_wide)) - self.assertTrue(has_vertically_truncated_repr(df_tall)) - self.assertFalse(has_expanded_repr(df_tall)) - - def test_repr_non_interactive(self): - # in non interactive mode, there can be no dependency on the - # result of terminal auto size detection - df = DataFrame('hello', lrange(1000), lrange(5)) - - with option_context('mode.sim_interactive', False, 'display.width', 0, - 'display.height', 0, 'display.max_rows', 5000): - self.assertFalse(has_truncated_repr(df)) - self.assertFalse(has_expanded_repr(df)) - - def test_repr_max_columns_max_rows(self): - term_width, term_height = get_terminal_size() - if term_width < 10 or term_height < 10: - pytest.skip("terminal size too small, " - "{0} x {1}".format(term_width, term_height)) - - def mkframe(n): - index = ['%05d' % i for i in range(n)] - return DataFrame(0, index, index) - - df6 = mkframe(6) - df10 = mkframe(10) - with option_context('mode.sim_interactive', True): - with option_context('display.width', term_width * 2): - with option_context('display.max_rows', 5, - 'display.max_columns', 5): - self.assertFalse(has_expanded_repr(mkframe(4))) - self.assertFalse(has_expanded_repr(mkframe(5))) - self.assertFalse(has_expanded_repr(df6)) - self.assertTrue(has_doubly_truncated_repr(df6)) - - with option_context('display.max_rows', 20, - 'display.max_columns', 10): - # Out off max_columns boundary, but no extending - # since not exceeding width - self.assertFalse(has_expanded_repr(df6)) - self.assertFalse(has_truncated_repr(df6)) - - with option_context('display.max_rows', 9, - 'display.max_columns', 10): - # out vertical bounds can not result in exanded repr - self.assertFalse(has_expanded_repr(df10)) - self.assertTrue(has_vertically_truncated_repr(df10)) - - # width=None in terminal, auto detection - with option_context('display.max_columns', 100, 'display.max_rows', - term_width * 20, 'display.width', None): - df = mkframe((term_width // 7) - 2) - self.assertFalse(has_expanded_repr(df)) - df = mkframe((term_width // 7) + 2) - printing.pprint_thing(df._repr_fits_horizontal_()) - self.assertTrue(has_expanded_repr(df)) - - def test_str_max_colwidth(self): - # GH 7856 - df = pd.DataFrame([{'a': 'foo', - 'b': 'bar', - 'c': 'uncomfortably long line with lots of stuff', - 'd': 1}, {'a': 'foo', - 'b': 'bar', - 'c': 'stuff', - 'd': 1}]) - df.set_index(['a', 'b', 'c']) - self.assertTrue( - str(df) == - ' a b c d\n' - '0 foo bar uncomfortably long line with lots of stuff 1\n' - '1 foo bar stuff 1') - with option_context('max_colwidth', 20): - self.assertTrue(str(df) == ' a b c d\n' - '0 foo bar uncomfortably lo... 1\n' - '1 foo bar stuff 1') - - def test_auto_detect(self): - term_width, term_height = get_terminal_size() - fac = 1.05 # Arbitrary large factor to exceed term widht - cols = range(int(term_width * fac)) - index = range(10) - df = DataFrame(index=index, columns=cols) - with option_context('mode.sim_interactive', True): - with option_context('max_rows', None): - with option_context('max_columns', None): - # Wrap around with None - self.assertTrue(has_expanded_repr(df)) - with option_context('max_rows', 0): - with option_context('max_columns', 0): - # Truncate with auto detection. - self.assertTrue(has_horizontally_truncated_repr(df)) - - index = range(int(term_height * fac)) - df = DataFrame(index=index, columns=cols) - with option_context('max_rows', 0): - with option_context('max_columns', None): - # Wrap around with None - self.assertTrue(has_expanded_repr(df)) - # Truncate vertically - self.assertTrue(has_vertically_truncated_repr(df)) - - with option_context('max_rows', None): - with option_context('max_columns', 0): - self.assertTrue(has_horizontally_truncated_repr(df)) - - def test_to_string_repr_unicode(self): - buf = StringIO() - - unicode_values = [u('\u03c3')] * 10 - unicode_values = np.array(unicode_values, dtype=object) - df = DataFrame({'unicode': unicode_values}) - df.to_string(col_space=10, buf=buf) - - # it works! - repr(df) - - idx = Index(['abc', u('\u03c3a'), 'aegdvg']) - ser = Series(np.random.randn(len(idx)), idx) - rs = repr(ser).split('\n') - line_len = len(rs[0]) - for line in rs[1:]: - try: - line = line.decode(get_option("display.encoding")) - except: - pass - if not line.startswith('dtype:'): - self.assertEqual(len(line), line_len) - - # it works even if sys.stdin in None - _stdin = sys.stdin - try: - sys.stdin = None - repr(df) - finally: - sys.stdin = _stdin - - def test_to_string_unicode_columns(self): - df = DataFrame({u('\u03c3'): np.arange(10.)}) - - buf = StringIO() - df.to_string(buf=buf) - buf.getvalue() - - buf = StringIO() - df.info(buf=buf) - buf.getvalue() - - result = self.frame.to_string() - tm.assertIsInstance(result, compat.text_type) - - def test_to_string_utf8_columns(self): - n = u("\u05d0").encode('utf-8') - - with option_context('display.max_rows', 1): - df = DataFrame([1, 2], columns=[n]) - repr(df) - - def test_to_string_unicode_two(self): - dm = DataFrame({u('c/\u03c3'): []}) - buf = StringIO() - dm.to_string(buf) - - def test_to_string_unicode_three(self): - dm = DataFrame(['\xc2']) - buf = StringIO() - dm.to_string(buf) - - def test_to_string_with_formatters(self): - df = DataFrame({'int': [1, 2, 3], - 'float': [1.0, 2.0, 3.0], - 'object': [(1, 2), True, False]}, - columns=['int', 'float', 'object']) - - formatters = [('int', lambda x: '0x%x' % x), - ('float', lambda x: '[% 4.1f]' % x), - ('object', lambda x: '-%s-' % str(x))] - result = df.to_string(formatters=dict(formatters)) - result2 = df.to_string(formatters=lzip(*formatters)[1]) - self.assertEqual(result, (' int float object\n' - '0 0x1 [ 1.0] -(1, 2)-\n' - '1 0x2 [ 2.0] -True-\n' - '2 0x3 [ 3.0] -False-')) - self.assertEqual(result, result2) - - def test_to_string_with_datetime64_monthformatter(self): - months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] - x = DataFrame({'months': months}) - - def format_func(x): - return x.strftime('%Y-%m') - result = x.to_string(formatters={'months': format_func}) - expected = 'months\n0 2016-01\n1 2016-02' - self.assertEqual(result.strip(), expected) - - def test_to_string_with_datetime64_hourformatter(self): - - x = DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], - format='%H:%M:%S.%f')}) - - def format_func(x): - return x.strftime('%H:%M') - - result = x.to_string(formatters={'hod': format_func}) - expected = 'hod\n0 10:10\n1 12:12' - self.assertEqual(result.strip(), expected) - - def test_to_string_with_formatters_unicode(self): - df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) - result = df.to_string(formatters={u('c/\u03c3'): lambda x: '%s' % x}) - self.assertEqual(result, u(' c/\u03c3\n') + '0 1\n1 2\n2 3') - - def test_east_asian_unicode_frame(self): - if PY3: - _rep = repr - else: - _rep = unicode - - # not alighned properly because of east asian width - - # mid col - df = DataFrame({'a': [u'あ', u'いいい', u'う', u'ええええええ'], - 'b': [1, 222, 33333, 4]}, - index=['a', 'bb', 'c', 'ddd']) - expected = (u" a b\na あ 1\n" - u"bb いいい 222\nc う 33333\n" - u"ddd ええええええ 4") - self.assertEqual(_rep(df), expected) - - # last col - df = DataFrame({'a': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) - expected = (u" a b\na 1 あ\n" - u"bb 222 いいい\nc 33333 う\n" - u"ddd 4 ええええええ") - self.assertEqual(_rep(df), expected) - - # all col - df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) - expected = (u" a b\na あああああ あ\n" - u"bb い いいい\nc う う\n" - u"ddd えええ ええええええ") - self.assertEqual(_rep(df), expected) - - # column name - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) - expected = (u" b あああああ\na あ 1\n" - u"bb いいい 222\nc う 33333\n" - u"ddd ええええええ 4") - self.assertEqual(_rep(df), expected) - - # index - df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=[u'あああ', u'いいいいいい', u'うう', u'え']) - expected = (u" a b\nあああ あああああ あ\n" - u"いいいいいい い いいい\nうう う う\n" - u"え えええ ええええええ") - self.assertEqual(_rep(df), expected) - - # index name - df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお')) - expected = (u" a b\nおおおお \nあ あああああ あ\n" - u"い い いいい\nうう う う\nえ えええ ええええええ" - ) - self.assertEqual(_rep(df), expected) - - # all - df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'], - u'いいいいい': [u'あ', u'いいい', u'う', u'ええ']}, - index=pd.Index([u'あ', u'いいい', u'うう', u'え'], name=u'お')) - expected = (u" あああ いいいいい\nお \nあ あああ あ\n" - u"いいい い いいい\nうう う う\nえ えええええ ええ") - self.assertEqual(_rep(df), expected) - - # MultiIndex - idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( - u'おおお', u'かかかか'), (u'き', u'くく')]) - df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx) - expected = (u" a b\nあ いい あああああ あ\n" - u"う え い いいい\nおおお かかかか う う\n" - u"き くく えええ ええええええ") - self.assertEqual(_rep(df), expected) - - # truncate - with option_context('display.max_rows', 3, 'display.max_columns', 3): - df = pd.DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ'], - 'c': [u'お', u'か', u'ききき', u'くくくくくく'], - u'ああああ': [u'さ', u'し', u'す', u'せ']}, - columns=['a', 'b', 'c', u'ああああ']) - - expected = (u" a ... ああああ\n0 あああああ ... さ\n" - u".. ... ... ...\n3 えええ ... せ\n" - u"\n[4 rows x 4 columns]") - self.assertEqual(_rep(df), expected) - - df.index = [u'あああ', u'いいいい', u'う', 'aaa'] - expected = (u" a ... ああああ\nあああ あああああ ... さ\n" - u".. ... ... ...\naaa えええ ... せ\n" - u"\n[4 rows x 4 columns]") - self.assertEqual(_rep(df), expected) - - # Emable Unicode option ----------------------------------------- - with option_context('display.unicode.east_asian_width', True): - - # mid col - df = DataFrame({'a': [u'あ', u'いいい', u'う', u'ええええええ'], - 'b': [1, 222, 33333, 4]}, - index=['a', 'bb', 'c', 'ddd']) - expected = (u" a b\na あ 1\n" - u"bb いいい 222\nc う 33333\n" - u"ddd ええええええ 4") - self.assertEqual(_rep(df), expected) - - # last col - df = DataFrame({'a': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) - expected = (u" a b\na 1 あ\n" - u"bb 222 いいい\nc 33333 う\n" - u"ddd 4 ええええええ") - self.assertEqual(_rep(df), expected) - - # all col - df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) - expected = (u" a b\na あああああ あ\n" - u"bb い いいい\nc う う\n" - u"ddd えええ ええええええ" - "") - self.assertEqual(_rep(df), expected) - - # column name - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) - expected = (u" b あああああ\na あ 1\n" - u"bb いいい 222\nc う 33333\n" - u"ddd ええええええ 4") - self.assertEqual(_rep(df), expected) - - # index - df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=[u'あああ', u'いいいいいい', u'うう', u'え']) - expected = (u" a b\nあああ あああああ あ\n" - u"いいいいいい い いいい\nうう う う\n" - u"え えええ ええええええ") - self.assertEqual(_rep(df), expected) - - # index name - df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお')) - expected = (u" a b\nおおおお \n" - u"あ あああああ あ\nい い いいい\n" - u"うう う う\nえ えええ ええええええ" - ) - self.assertEqual(_rep(df), expected) - - # all - df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'], - u'いいいいい': [u'あ', u'いいい', u'う', u'ええ']}, - index=pd.Index([u'あ', u'いいい', u'うう', u'え'], name=u'お')) - expected = (u" あああ いいいいい\nお \n" - u"あ あああ あ\nいいい い いいい\n" - u"うう う う\nえ えええええ ええ") - self.assertEqual(_rep(df), expected) - - # MultiIndex - idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( - u'おおお', u'かかかか'), (u'き', u'くく')]) - df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx) - expected = (u" a b\nあ いい あああああ あ\n" - u"う え い いいい\nおおお かかかか う う\n" - u"き くく えええ ええええええ") - self.assertEqual(_rep(df), expected) - - # truncate - with option_context('display.max_rows', 3, 'display.max_columns', - 3): - - df = pd.DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ'], - 'c': [u'お', u'か', u'ききき', u'くくくくくく'], - u'ああああ': [u'さ', u'し', u'す', u'せ']}, - columns=['a', 'b', 'c', u'ああああ']) - - expected = (u" a ... ああああ\n0 あああああ ... さ\n" - u".. ... ... ...\n3 えええ ... せ\n" - u"\n[4 rows x 4 columns]") - self.assertEqual(_rep(df), expected) - - df.index = [u'あああ', u'いいいい', u'う', 'aaa'] - expected = (u" a ... ああああ\nあああ あああああ ... さ\n" - u"... ... ... ...\naaa えええ ... せ\n" - u"\n[4 rows x 4 columns]") - self.assertEqual(_rep(df), expected) - - # ambiguous unicode - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']}, - index=['a', 'bb', 'c', '¡¡¡']) - expected = (u" b あああああ\na あ 1\n" - u"bb いいい 222\nc ¡¡ 33333\n" - u"¡¡¡ ええええええ 4") - self.assertEqual(_rep(df), expected) - - def test_to_string_buffer_all_unicode(self): - buf = StringIO() - - empty = DataFrame({u('c/\u03c3'): Series()}) - nonempty = DataFrame({u('c/\u03c3'): Series([1, 2, 3])}) - - print(empty, file=buf) - print(nonempty, file=buf) - - # this should work - buf.getvalue() - - def test_to_string_with_col_space(self): - df = DataFrame(np.random.random(size=(1, 3))) - c10 = len(df.to_string(col_space=10).split("\n")[1]) - c20 = len(df.to_string(col_space=20).split("\n")[1]) - c30 = len(df.to_string(col_space=30).split("\n")[1]) - self.assertTrue(c10 < c20 < c30) - - # GH 8230 - # col_space wasn't being applied with header=False - with_header = df.to_string(col_space=20) - with_header_row1 = with_header.splitlines()[1] - no_header = df.to_string(col_space=20, header=False) - self.assertEqual(len(with_header_row1), len(no_header)) - - def test_to_string_truncate_indices(self): - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, - tm.makeDateIndex, tm.makePeriodIndex]: - for column in [tm.makeStringIndex]: - for h in [10, 20]: - for w in [10, 20]: - with option_context("display.expand_frame_repr", - False): - df = DataFrame(index=index(h), columns=column(w)) - with option_context("display.max_rows", 15): - if h == 20: - self.assertTrue( - has_vertically_truncated_repr(df)) - else: - self.assertFalse( - has_vertically_truncated_repr(df)) - with option_context("display.max_columns", 15): - if w == 20: - self.assertTrue( - has_horizontally_truncated_repr(df)) - else: - self.assertFalse( - has_horizontally_truncated_repr(df)) - with option_context("display.max_rows", 15, - "display.max_columns", 15): - if h == 20 and w == 20: - self.assertTrue(has_doubly_truncated_repr( - df)) - else: - self.assertFalse(has_doubly_truncated_repr( - df)) - - def test_to_string_truncate_multilevel(self): - arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays, columns=arrays) - with option_context("display.max_rows", 7, "display.max_columns", 7): - self.assertTrue(has_doubly_truncated_repr(df)) - - def test_truncate_with_different_dtypes(self): - - # 11594, 12045 - # when truncated the dtypes of the splits can differ - - # 11594 - import datetime - s = Series([datetime.datetime(2012, 1, 1)] * 10 + - [datetime.datetime(1012, 1, 2)] + [datetime.datetime(2012, 1, 3)] * 10) - - with pd.option_context('display.max_rows', 8): - result = str(s) - self.assertTrue('object' in result) - - # 12045 - df = DataFrame({'text': ['some words'] + [None] * 9}) - - with pd.option_context('display.max_rows', 8, 'display.max_columns', 3): - result = str(df) - self.assertTrue('None' in result) - self.assertFalse('NaN' in result) - - def test_datetimelike_frame(self): - - # GH 12211 - df = DataFrame( - {'date': [pd.Timestamp('20130101').tz_localize('UTC')] + [pd.NaT] * 5}) - - with option_context("display.max_rows", 5): - result = str(df) - self.assertTrue('2013-01-01 00:00:00+00:00' in result) - self.assertTrue('NaT' in result) - self.assertTrue('...' in result) - self.assertTrue('[6 rows x 1 columns]' in result) - - dts = [pd.Timestamp('2011-01-01', tz='US/Eastern')] * 5 + [pd.NaT] * 5 - df = pd.DataFrame({"dt": dts, - "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) - with option_context('display.max_rows', 5): - expected = (' dt x\n' - '0 2011-01-01 00:00:00-05:00 1\n' - '1 2011-01-01 00:00:00-05:00 2\n' - '.. ... ..\n' - '8 NaT 9\n' - '9 NaT 10\n\n' - '[10 rows x 2 columns]') - self.assertEqual(repr(df), expected) - - dts = [pd.NaT] * 5 + [pd.Timestamp('2011-01-01', tz='US/Eastern')] * 5 - df = pd.DataFrame({"dt": dts, - "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) - with option_context('display.max_rows', 5): - expected = (' dt x\n' - '0 NaT 1\n' - '1 NaT 2\n' - '.. ... ..\n' - '8 2011-01-01 00:00:00-05:00 9\n' - '9 2011-01-01 00:00:00-05:00 10\n\n' - '[10 rows x 2 columns]') - self.assertEqual(repr(df), expected) - - dts = ([pd.Timestamp('2011-01-01', tz='Asia/Tokyo')] * 5 + - [pd.Timestamp('2011-01-01', tz='US/Eastern')] * 5) - df = pd.DataFrame({"dt": dts, - "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) - with option_context('display.max_rows', 5): - expected = (' dt x\n' - '0 2011-01-01 00:00:00+09:00 1\n' - '1 2011-01-01 00:00:00+09:00 2\n' - '.. ... ..\n' - '8 2011-01-01 00:00:00-05:00 9\n' - '9 2011-01-01 00:00:00-05:00 10\n\n' - '[10 rows x 2 columns]') - self.assertEqual(repr(df), expected) - - def test_to_html_with_col_space(self): - def check_with_width(df, col_space): - import re - # check that col_space affects HTML generation - # and be very brittle about it. - html = df.to_html(col_space=col_space) - hdrs = [x for x in html.split(r"\n") if re.search(r"\s]", x)] - self.assertTrue(len(hdrs) > 0) - for h in hdrs: - self.assertTrue("min-width" in h) - self.assertTrue(str(col_space) in h) - - df = DataFrame(np.random.random(size=(1, 3))) - - check_with_width(df, 30) - check_with_width(df, 50) - - def test_to_html_with_empty_string_label(self): - # GH3547, to_html regards empty string labels as repeated labels - data = {'c1': ['a', 'b'], 'c2': ['a', ''], 'data': [1, 2]} - df = DataFrame(data).set_index(['c1', 'c2']) - res = df.to_html() - self.assertTrue("rowspan" not in res) - - def test_to_html_unicode(self): - df = DataFrame({u('\u03c3'): np.arange(10.)}) - expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
\u03c3
00.0
11.0
22.0
33.0
44.0
55.0
66.0
77.0
88.0
99.0
' - self.assertEqual(df.to_html(), expected) - df = DataFrame({'A': [u('\u03c3')]}) - expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n
A
0\u03c3
' - self.assertEqual(df.to_html(), expected) - - def test_to_html_decimal(self): - # GH 12031 - df = DataFrame({'A': [6.0, 3.1, 2.2]}) - result = df.to_html(decimal=',') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
A
06,0
13,1
22,2
') - self.assertEqual(result, expected) - - def test_to_html_escaped(self): - a = 'str", - b: ""}, - 'co>l2': {a: "", - b: ""}} - rs = DataFrame(test_dict).to_html() - xp = """ - - - - - - - - - - - - - - - - - - - -
co<l1co>l2
str<ing1 &amp;<type 'str'><type 'str'>
stri>ng2 &amp;<type 'str'><type 'str'>
""" - - self.assertEqual(xp, rs) - - def test_to_html_escape_disabled(self): - a = 'strbold", - b: "bold"}, - 'co>l2': {a: "bold", - b: "bold"}} - rs = DataFrame(test_dict).to_html(escape=False) - xp = """ - - - - - - - - - - - - - - - - - -
co - co>l2
str - boldbold
stri>ng2 &boldbold
""" - - self.assertEqual(xp, rs) - - def test_to_html_multiindex_index_false(self): - # issue 8452 - df = DataFrame({ - 'a': range(2), - 'b': range(3, 5), - 'c': range(5, 7), - 'd': range(3, 5) - }) - df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) - result = df.to_html(index=False) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ab
cdcd
0353
1464
""" - - self.assertEqual(result, expected) - - df.index = Index(df.index.values, name='idx') - result = df.to_html(index=False) - self.assertEqual(result, expected) - - def test_to_html_multiindex_sparsify_false_multi_sparse(self): - with option_context('display.multi_sparse', False): - index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', None]) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01
foo
0001
0123
1045
1167
""" - - self.assertEqual(result, expected) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - columns=index[::2], index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
foo01
00
foo
0001
0123
1045
1167
""" - - self.assertEqual(result, expected) - - def test_to_html_multiindex_sparsify(self): - index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', None]) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) - - result = df.to_html() - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01
foo
0001
123
1045
167
""" - - self.assertEqual(result, expected) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=index[::2], - index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
foo01
00
foo
0001
123
1045
167
""" - - self.assertEqual(result, expected) - - def test_to_html_multiindex_odd_even_truncate(self): - # GH 14882 - Issue on truncation with odd length DataFrame - mi = MultiIndex.from_product([[100, 200, 300], - [10, 20, 30], - [1, 2, 3, 4, 5, 6, 7]], - names=['a', 'b', 'c']) - df = DataFrame({'n': range(len(mi))}, index=mi) - result = df.to_html(max_rows=60) - expected
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
20128
229
......
633
734
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" - self.assertEqual(result, expected) - - # Test that ... appears in a middle level - result = df.to_html(max_rows=56) - expected
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
.........
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" - self.assertEqual(result, expected) - - def test_to_html_index_formatter(self): - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=['foo', None], - index=lrange(4)) - - f = lambda x: 'abcd' [x] - result = df.to_html(formatters={'__index__': f}) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fooNone
a01
b23
c45
d67
""" - - self.assertEqual(result, expected) - - def test_to_html_datetime64_monthformatter(self): - months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] - x = DataFrame({'months': months}) - - def format_func(x): - return x.strftime('%Y-%m') - result = x.to_html(formatters={'months': format_func}) - expected = """\ - - - - - - - - - - - - - - - - - -
months
02016-01
12016-02
""" - self.assertEqual(result, expected) - - def test_to_html_datetime64_hourformatter(self): - - x = DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], - format='%H:%M:%S.%f')}) - - def format_func(x): - return x.strftime('%H:%M') - result = x.to_html(formatters={'hod': format_func}) - expected = """\ - - - - - - - - - - - - - - - - - -
hod
010:10
112:12
""" - self.assertEqual(result, expected) - - def test_to_html_regression_GH6098(self): - df = DataFrame({u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')], - u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), - u('1er')], - 'données1': np.random.randn(5), - 'données2': np.random.randn(5)}) - # it works - df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() - - def test_to_html_truncate(self): - pytest.skip("unreliable on travis") - index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) - df = DataFrame(index=index, columns=range(20)) - fmt.set_option('display.max_rows', 8) - fmt.set_option('display.max_columns', 4) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01...1819
2001-01-01NaNNaN...NaNNaN
2001-01-02NaNNaN...NaNNaN
2001-01-03NaNNaN...NaNNaN
2001-01-04NaNNaN...NaNNaN
..................
2001-01-17NaNNaN...NaNNaN
2001-01-18NaNNaN...NaNNaN
2001-01-19NaNNaN...NaNNaN
2001-01-20NaNNaN...NaNNaN
-

20 rows × 20 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_truncate_multi_index(self): - pytest.skip("unreliable on travis") - arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays, columns=arrays) - fmt.set_option('display.max_rows', 7) - fmt.set_option('display.max_columns', 7) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
barbaz...fooqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
...........................
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
-

8 rows × 8 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_truncate_multi_index_sparse_off(self): - pytest.skip("unreliable on travis") - arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays, columns=arrays) - fmt.set_option('display.max_rows', 7) - fmt.set_option('display.max_columns', 7) - fmt.set_option('display.multi_sparse', False) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
barbarbaz...fooquxqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
bartwoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
quxtwoNaNNaNNaN...NaNNaNNaN
-

8 rows × 8 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_border(self): - df = DataFrame({'A': [1, 2]}) - result = df.to_html() - assert 'border="1"' in result - - def test_to_html_border_option(self): - df = DataFrame({'A': [1, 2]}) - with pd.option_context('html.border', 0): - result = df.to_html() - self.assertTrue('border="0"' in result) - self.assertTrue('border="0"' in df._repr_html_()) - - def test_to_html_border_zero(self): - df = DataFrame({'A': [1, 2]}) - result = df.to_html(border=0) - self.assertTrue('border="0"' in result) - - def test_nonunicode_nonascii_alignment(self): - df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) - rep_str = df.to_string() - lines = rep_str.split('\n') - self.assertEqual(len(lines[1]), len(lines[2])) - - def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({u('c/\u03c3'): Series({'test': np.NaN})}) - compat.text_type(dm.to_string()) - - def test_string_repr_encoding(self): - filepath = tm.get_data_path('unicode_series.csv') - df = pd.read_csv(filepath, header=None, encoding='latin1') - repr(df) - repr(df[1]) - - def test_repr_corner(self): - # representing infs poses no problems - df = DataFrame({'foo': [-np.inf, np.inf]}) - repr(df) - - def test_frame_info_encoding(self): - index = ['\'Til There Was You (1997)', - 'ldum klaka (Cold Fever) (1994)'] - fmt.set_option('display.max_rows', 1) - df = DataFrame(columns=['a', 'b', 'c'], index=index) - repr(df) - repr(df.T) - fmt.set_option('display.max_rows', 200) - - def test_pprint_thing(self): - from pandas.formats.printing import pprint_thing as pp_t - - if PY3: - pytest.skip("doesn't work on Python 3") - - self.assertEqual(pp_t('a'), u('a')) - self.assertEqual(pp_t(u('a')), u('a')) - self.assertEqual(pp_t(None), 'None') - self.assertEqual(pp_t(u('\u05d0'), quote_strings=True), u("u'\u05d0'")) - self.assertEqual(pp_t(u('\u05d0'), quote_strings=False), u('\u05d0')) - self.assertEqual(pp_t((u('\u05d0'), - u('\u05d1')), quote_strings=True), - u("(u'\u05d0', u'\u05d1')")) - self.assertEqual(pp_t((u('\u05d0'), (u('\u05d1'), - u('\u05d2'))), - quote_strings=True), - u("(u'\u05d0', (u'\u05d1', u'\u05d2'))")) - self.assertEqual(pp_t(('foo', u('\u05d0'), (u('\u05d0'), - u('\u05d0'))), - quote_strings=True), - u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")) - - # escape embedded tabs in string - # GH #2038 - self.assertTrue(not "\t" in pp_t("a\tb", escape_chars=("\t", ))) - - def test_wide_repr(self): - with option_context('mode.sim_interactive', True, - 'display.show_dimensions', True): - max_cols = get_option('display.max_columns') - df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) - set_option('display.expand_frame_repr', False) - rep_str = repr(df) - - assert "10 rows x %d columns" % (max_cols - 1) in rep_str - set_option('display.expand_frame_repr', True) - wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) - - with option_context('display.width', 120): - wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) - - reset_option('display.expand_frame_repr') - - def test_wide_repr_wide_columns(self): - with option_context('mode.sim_interactive', True): - df = DataFrame(randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90]) - rep_str = repr(df) - - self.assertEqual(len(rep_str.splitlines()), 20) - - def test_wide_repr_named(self): - with option_context('mode.sim_interactive', True): - max_cols = get_option('display.max_columns') - df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) - df.index.name = 'DataFrame Index' - set_option('display.expand_frame_repr', False) - - rep_str = repr(df) - set_option('display.expand_frame_repr', True) - wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) - - with option_context('display.width', 150): - wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) - - for line in wide_repr.splitlines()[1::13]: - self.assertIn('DataFrame Index', line) - - reset_option('display.expand_frame_repr') - - def test_wide_repr_multiindex(self): - with option_context('mode.sim_interactive', True): - midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) - max_cols = get_option('display.max_columns') - df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)), - index=midx) - df.index.names = ['Level 0', 'Level 1'] - set_option('display.expand_frame_repr', False) - rep_str = repr(df) - set_option('display.expand_frame_repr', True) - wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) - - with option_context('display.width', 150): - wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) - - for line in wide_repr.splitlines()[1::13]: - self.assertIn('Level 0 Level 1', line) - - reset_option('display.expand_frame_repr') - - def test_wide_repr_multiindex_cols(self): - with option_context('mode.sim_interactive', True): - max_cols = get_option('display.max_columns') - midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) - mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - - 1))) - df = DataFrame(tm.rands_array(25, (10, max_cols - 1)), - index=midx, columns=mcols) - df.index.names = ['Level 0', 'Level 1'] - set_option('display.expand_frame_repr', False) - rep_str = repr(df) - set_option('display.expand_frame_repr', True) - wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) - - with option_context('display.width', 150): - wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) - - reset_option('display.expand_frame_repr') - - def test_wide_repr_unicode(self): - with option_context('mode.sim_interactive', True): - max_cols = get_option('display.max_columns') - df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) - set_option('display.expand_frame_repr', False) - rep_str = repr(df) - set_option('display.expand_frame_repr', True) - wide_repr = repr(df) - self.assertNotEqual(rep_str, wide_repr) - - with option_context('display.width', 150): - wider_repr = repr(df) - self.assertTrue(len(wider_repr) < len(wide_repr)) - - reset_option('display.expand_frame_repr') - - def test_wide_repr_wide_long_columns(self): - with option_context('mode.sim_interactive', True): - df = DataFrame({'a': ['a' * 30, 'b' * 30], - 'b': ['c' * 70, 'd' * 80]}) - - result = repr(df) - self.assertTrue('ccccc' in result) - self.assertTrue('ddddd' in result) - - def test_long_series(self): - n = 1000 - s = Series( - np.random.randint(-50, 50, n), - index=['s%04d' % x for x in range(n)], dtype='int64') - - import re - str_rep = str(s) - nmatches = len(re.findall('dtype', str_rep)) - self.assertEqual(nmatches, 1) - - def test_index_with_nan(self): - # GH 2850 - df = DataFrame({'id1': {0: '1a3', - 1: '9h4'}, - 'id2': {0: np.nan, - 1: 'd67'}, - 'id3': {0: '78d', - 1: '79d'}, - 'value': {0: 123, - 1: 64}}) - - # multi-index - y = df.set_index(['id1', 'id2', 'id3']) - result = y.to_string() - expected = u( - ' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') - self.assertEqual(result, expected) - - # index - y = df.set_index('id2') - result = y.to_string() - expected = u( - ' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') - self.assertEqual(result, expected) - - # with append (this failed in 0.12) - y = df.set_index(['id1', 'id2']).set_index('id3', append=True) - result = y.to_string() - expected = u( - ' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') - self.assertEqual(result, expected) - - # all-nan in mi - df2 = df.copy() - df2.loc[:, 'id2'] = np.nan - y = df2.set_index('id2') - result = y.to_string() - expected = u( - ' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') - self.assertEqual(result, expected) - - # partial nan in mi - df2 = df.copy() - df2.loc[:, 'id2'] = np.nan - y = df2.set_index(['id2', 'id3']) - result = y.to_string() - expected = u( - ' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') - self.assertEqual(result, expected) - - df = DataFrame({'id1': {0: np.nan, - 1: '9h4'}, - 'id2': {0: np.nan, - 1: 'd67'}, - 'id3': {0: np.nan, - 1: '79d'}, - 'value': {0: 123, - 1: 64}}) - - y = df.set_index(['id1', 'id2', 'id3']) - result = y.to_string() - expected = u( - ' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') - self.assertEqual(result, expected) - - def test_to_string(self): - from pandas import read_table - import re - - # big mixed - biggie = DataFrame({'A': randn(200), - 'B': tm.makeStringIndex(200)}, - index=lrange(200)) - - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan - s = biggie.to_string() - - buf = StringIO() - retval = biggie.to_string(buf=buf) - self.assertIsNone(retval) - self.assertEqual(buf.getvalue(), s) - - tm.assertIsInstance(s, compat.string_types) - - # print in right order - result = biggie.to_string(columns=['B', 'A'], col_space=17, - float_format='%.5f'.__mod__) - lines = result.split('\n') - header = lines[0].strip().split() - joined = '\n'.join([re.sub(r'\s+', ' ', x).strip() for x in lines[1:]]) - recons = read_table(StringIO(joined), names=header, - header=None, sep=' ') - tm.assert_series_equal(recons['B'], biggie['B']) - self.assertEqual(recons['A'].count(), biggie['A'].count()) - self.assertTrue((np.abs(recons['A'].dropna() - biggie['A'].dropna()) < - 0.1).all()) - - # expected = ['B', 'A'] - # self.assertEqual(header, expected) - - result = biggie.to_string(columns=['A'], col_space=17) - header = result.split('\n')[0].strip().split() - expected = ['A'] - self.assertEqual(header, expected) - - biggie.to_string(columns=['B', 'A'], - formatters={'A': lambda x: '%.1f' % x}) - - biggie.to_string(columns=['B', 'A'], float_format=str) - biggie.to_string(columns=['B', 'A'], col_space=12, float_format=str) - - frame = DataFrame(index=np.arange(200)) - frame.to_string() - - def test_to_string_no_header(self): - df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) - - df_s = df.to_string(header=False) - expected = "0 1 4\n1 2 5\n2 3 6" - - self.assertEqual(df_s, expected) - - def test_to_string_no_index(self): - df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) - - df_s = df.to_string(index=False) - expected = "x y\n1 4\n2 5\n3 6" - - self.assertEqual(df_s, expected) - - def test_to_string_line_width_no_index(self): - df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) - - df_s = df.to_string(line_width=1, index=False) - expected = "x \\\n1 \n2 \n3 \n\ny \n4 \n5 \n6" - - self.assertEqual(df_s, expected) - - def test_to_string_float_formatting(self): - self.reset_display_options() - fmt.set_option('display.precision', 5, 'display.column_space', 12, - 'display.notebook_repr_html', False) - - df = DataFrame({'x': [0, 0.25, 3456.000, 12e+45, 1.64e+6, 1.7e+8, - 1.253456, np.pi, -1e6]}) - - df_s = df.to_string() - - # Python 2.5 just wants me to be sad. And debian 32-bit - # sys.version_info[0] == 2 and sys.version_info[1] < 6: - if _three_digit_exp(): - expected = (' x\n0 0.00000e+000\n1 2.50000e-001\n' - '2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n' - '5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n' - '8 -1.00000e+006') - else: - expected = (' x\n0 0.00000e+00\n1 2.50000e-01\n' - '2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n' - '5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n' - '8 -1.00000e+06') - self.assertEqual(df_s, expected) - - df = DataFrame({'x': [3234, 0.253]}) - df_s = df.to_string() - - expected = (' x\n' '0 3234.000\n' '1 0.253') - self.assertEqual(df_s, expected) - - self.reset_display_options() - self.assertEqual(get_option("display.precision"), 6) - - df = DataFrame({'x': [1e9, 0.2512]}) - df_s = df.to_string() - # Python 2.5 just wants me to be sad. And debian 32-bit - # sys.version_info[0] == 2 and sys.version_info[1] < 6: - if _three_digit_exp(): - expected = (' x\n' - '0 1.000000e+009\n' - '1 2.512000e-001') - else: - expected = (' x\n' - '0 1.000000e+09\n' - '1 2.512000e-01') - self.assertEqual(df_s, expected) - - def test_to_string_small_float_values(self): - df = DataFrame({'a': [1.5, 1e-17, -5.5e-7]}) - - result = df.to_string() - # sadness per above - if '%.4g' % 1.7e8 == '1.7e+008': - expected = (' a\n' - '0 1.500000e+000\n' - '1 1.000000e-017\n' - '2 -5.500000e-007') - else: - expected = (' a\n' - '0 1.500000e+00\n' - '1 1.000000e-17\n' - '2 -5.500000e-07') - self.assertEqual(result, expected) - - # but not all exactly zero - df = df * 0 - result = df.to_string() - expected = (' 0\n' '0 0\n' '1 0\n' '2 -0') - - def test_to_string_float_index(self): - index = Index([1.5, 2, 3, 4, 5]) - df = DataFrame(lrange(5), index=index) - - result = df.to_string() - expected = (' 0\n' - '1.5 0\n' - '2.0 1\n' - '3.0 2\n' - '4.0 3\n' - '5.0 4') - self.assertEqual(result, expected) - - def test_to_string_ascii_error(self): - data = [('0 ', u(' .gitignore '), u(' 5 '), - ' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80' - '\xa2\xe2\x80\xa2\xe2\x80\xa2')] - df = DataFrame(data) - - # it works! - repr(df) - - def test_to_string_int_formatting(self): - df = DataFrame({'x': [-15, 20, 25, -35]}) - self.assertTrue(issubclass(df['x'].dtype.type, np.integer)) - - output = df.to_string() - expected = (' x\n' '0 -15\n' '1 20\n' '2 25\n' '3 -35') - self.assertEqual(output, expected) - - def test_to_string_index_formatter(self): - df = DataFrame([lrange(5), lrange(5, 10), lrange(10, 15)]) - - rs = df.to_string(formatters={'__index__': lambda x: 'abc' [x]}) - - xp = """\ - 0 1 2 3 4 -a 0 1 2 3 4 -b 5 6 7 8 9 -c 10 11 12 13 14\ -""" - - self.assertEqual(rs, xp) - - def test_to_string_left_justify_cols(self): - self.reset_display_options() - df = DataFrame({'x': [3234, 0.253]}) - df_s = df.to_string(justify='left') - expected = (' x \n' '0 3234.000\n' '1 0.253') - self.assertEqual(df_s, expected) - - def test_to_string_format_na(self): - self.reset_display_options() - df = DataFrame({'A': [np.nan, -1, -2.1234, 3, 4], - 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) - result = df.to_string() - - expected = (' A B\n' - '0 NaN NaN\n' - '1 -1.0000 foo\n' - '2 -2.1234 foooo\n' - '3 3.0000 fooooo\n' - '4 4.0000 bar') - self.assertEqual(result, expected) - - df = DataFrame({'A': [np.nan, -1., -2., 3., 4.], - 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) - result = df.to_string() - - expected = (' A B\n' - '0 NaN NaN\n' - '1 -1.0 foo\n' - '2 -2.0 foooo\n' - '3 3.0 fooooo\n' - '4 4.0 bar') - self.assertEqual(result, expected) - - def test_to_string_line_width(self): - df = DataFrame(123, lrange(10, 15), lrange(30)) - s = df.to_string(line_width=80) - self.assertEqual(max(len(l) for l in s.split('\n')), 80) - - def test_show_dimensions(self): - df = DataFrame(123, lrange(10, 15), lrange(30)) - - with option_context('display.max_rows', 10, 'display.max_columns', 40, - 'display.width', 500, 'display.expand_frame_repr', - 'info', 'display.show_dimensions', True): - self.assertTrue('5 rows' in str(df)) - self.assertTrue('5 rows' in df._repr_html_()) - with option_context('display.max_rows', 10, 'display.max_columns', 40, - 'display.width', 500, 'display.expand_frame_repr', - 'info', 'display.show_dimensions', False): - self.assertFalse('5 rows' in str(df)) - self.assertFalse('5 rows' in df._repr_html_()) - with option_context('display.max_rows', 2, 'display.max_columns', 2, - 'display.width', 500, 'display.expand_frame_repr', - 'info', 'display.show_dimensions', 'truncate'): - self.assertTrue('5 rows' in str(df)) - self.assertTrue('5 rows' in df._repr_html_()) - with option_context('display.max_rows', 10, 'display.max_columns', 40, - 'display.width', 500, 'display.expand_frame_repr', - 'info', 'display.show_dimensions', 'truncate'): - self.assertFalse('5 rows' in str(df)) - self.assertFalse('5 rows' in df._repr_html_()) - - def test_to_html(self): - # big mixed - biggie = DataFrame({'A': randn(200), - 'B': tm.makeStringIndex(200)}, - index=lrange(200)) - - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan - s = biggie.to_html() - - buf = StringIO() - retval = biggie.to_html(buf=buf) - self.assertIsNone(retval) - self.assertEqual(buf.getvalue(), s) - - tm.assertIsInstance(s, compat.string_types) - - biggie.to_html(columns=['B', 'A'], col_space=17) - biggie.to_html(columns=['B', 'A'], - formatters={'A': lambda x: '%.1f' % x}) - - biggie.to_html(columns=['B', 'A'], float_format=str) - biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) - - frame = DataFrame(index=np.arange(200)) - frame.to_html() - - def test_to_html_filename(self): - biggie = DataFrame({'A': randn(200), - 'B': tm.makeStringIndex(200)}, - index=lrange(200)) - - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan - with tm.ensure_clean('test.html') as path: - biggie.to_html(path) - with open(path, 'r') as f: - s = biggie.to_html() - s2 = f.read() - self.assertEqual(s, s2) - - frame = DataFrame(index=np.arange(200)) - with tm.ensure_clean('test.html') as path: - frame.to_html(path) - with open(path, 'r') as f: - self.assertEqual(frame.to_html(), f.read()) - - def test_to_html_with_no_bold(self): - x = DataFrame({'x': randn(5)}) - ashtml = x.to_html(bold_rows=False) - self.assertFalse('")]) - - def test_to_html_columns_arg(self): - result = self.frame.to_html(columns=['A']) - self.assertNotIn('
B
\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
CL001
CL10101
0abcd
1efgh
') - - self.assertEqual(result, expected) - - columns = MultiIndex.from_tuples(list(zip( - range(4), np.mod( - lrange(4), 2)))) - df = DataFrame([list('abcd'), list('efgh')], columns=columns) - - result = df.to_html(justify='right') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
0123
0101
0abcd
1efgh
') - - self.assertEqual(result, expected) - - def test_to_html_justify(self): - df = DataFrame({'A': [6, 30000, 2], - 'B': [1, 2, 70000], - 'C': [223442, 0, 1]}, - columns=['A', 'B', 'C']) - result = df.to_html(justify='left') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
061223442
13000020
22700001
') - self.assertEqual(result, expected) - - result = df.to_html(justify='right') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
061223442
13000020
22700001
') - self.assertEqual(result, expected) - - def test_to_html_index(self): - index = ['foo', 'bar', 'baz'] - df = DataFrame({'A': [1, 2, 3], - 'B': [1.2, 3.4, 5.6], - 'C': ['one', 'two', np.NaN]}, - columns=['A', 'B', 'C'], - index=index) - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
foo11.2one
bar23.4two
baz35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - - expected_without_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
11.2one
23.4two
35.6NaN
') - result = df.to_html(index=False) - for i in index: - self.assertNotIn(i, result) - self.assertEqual(result, expected_without_index) - df.index = Index(['foo', 'bar', 'baz'], name='idx') - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
idx
foo11.2one
bar23.4two
baz35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - self.assertEqual(df.to_html(index=False), expected_without_index) - - tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')] - df.index = MultiIndex.from_tuples(tuples) - - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
foocar11.2one
bike23.4two
barcar35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - - result = df.to_html(index=False) - for i in ['foo', 'bar', 'car', 'bike']: - self.assertNotIn(i, result) - # must be the same result as normal index - self.assertEqual(result, expected_without_index) - - df.index = MultiIndex.from_tuples(tuples, names=['idx1', 'idx2']) - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
idx1idx2
foocar11.2one
bike23.4two
barcar35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - self.assertEqual(df.to_html(index=False), expected_without_index) - - def test_repr_html(self): - self.frame._repr_html_() - - fmt.set_option('display.max_rows', 1, 'display.max_columns', 1) - self.frame._repr_html_() - - fmt.set_option('display.notebook_repr_html', False) - self.frame._repr_html_() - - self.reset_display_options() - - df = DataFrame([[1, 2], [3, 4]]) - fmt.set_option('display.show_dimensions', True) - self.assertTrue('2 rows' in df._repr_html_()) - fmt.set_option('display.show_dimensions', False) - self.assertFalse('2 rows' in df._repr_html_()) - - self.reset_display_options() - - def test_repr_html_wide(self): - max_cols = get_option('display.max_columns') - df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) - reg_repr = df._repr_html_() - assert "..." not in reg_repr - - wide_df = DataFrame(tm.rands_array(25, size=(10, max_cols + 1))) - wide_repr = wide_df._repr_html_() - assert "..." in wide_repr - - def test_repr_html_wide_multiindex_cols(self): - max_cols = get_option('display.max_columns') - - mcols = MultiIndex.from_product([np.arange(max_cols // 2), - ['foo', 'bar']], - names=['first', 'second']) - df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), - columns=mcols) - reg_repr = df._repr_html_() - assert '...' not in reg_repr - - mcols = MultiIndex.from_product((np.arange(1 + (max_cols // 2)), - ['foo', 'bar']), - names=['first', 'second']) - df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), - columns=mcols) - wide_repr = df._repr_html_() - assert '...' in wide_repr - - def test_repr_html_long(self): - max_rows = get_option('display.max_rows') - h = max_rows - 1 - df = DataFrame({'A': np.arange(1, 1 + h), 'B': np.arange(41, 41 + h)}) - reg_repr = df._repr_html_() - assert '..' not in reg_repr - assert str(41 + max_rows // 2) in reg_repr - - h = max_rows + 1 - df = DataFrame({'A': np.arange(1, 1 + h), 'B': np.arange(41, 41 + h)}) - long_repr = df._repr_html_() - assert '..' in long_repr - assert str(41 + max_rows // 2) not in long_repr - assert u('%d rows ') % h in long_repr - assert u('2 columns') in long_repr - - def test_repr_html_float(self): - max_rows = get_option('display.max_rows') - h = max_rows - 1 - df = DataFrame({'idx': np.linspace(-10, 10, h), - 'A': np.arange(1, 1 + h), - 'B': np.arange(41, 41 + h)}).set_index('idx') - reg_repr = df._repr_html_() - assert '..' not in reg_repr - assert str(40 + h) in reg_repr - - h = max_rows + 1 - df = DataFrame({'idx': np.linspace(-10, 10, h), - 'A': np.arange(1, 1 + h), - 'B': np.arange(41, 41 + h)}).set_index('idx') - long_repr = df._repr_html_() - assert '..' in long_repr - assert '31' not in long_repr - assert u('%d rows ') % h in long_repr - assert u('2 columns') in long_repr - - def test_repr_html_long_multiindex(self): - max_rows = get_option('display.max_rows') - max_L1 = max_rows // 2 - - tuples = list(itertools.product(np.arange(max_L1), ['foo', 'bar'])) - idx = MultiIndex.from_tuples(tuples, names=['first', 'second']) - df = DataFrame(np.random.randn(max_L1 * 2, 2), index=idx, - columns=['A', 'B']) - reg_repr = df._repr_html_() - assert '...' not in reg_repr - - tuples = list(itertools.product(np.arange(max_L1 + 1), ['foo', 'bar'])) - idx = MultiIndex.from_tuples(tuples, names=['first', 'second']) - df = DataFrame(np.random.randn((max_L1 + 1) * 2, 2), index=idx, - columns=['A', 'B']) - long_repr = df._repr_html_() - assert '...' in long_repr - - def test_repr_html_long_and_wide(self): - max_cols = get_option('display.max_columns') - max_rows = get_option('display.max_rows') - - h, w = max_rows - 1, max_cols - 1 - df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) - assert '...' not in df._repr_html_() - - h, w = max_rows + 1, max_cols + 1 - df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) - assert '...' in df._repr_html_() - - def test_info_repr(self): - max_rows = get_option('display.max_rows') - max_cols = get_option('display.max_columns') - # Long - h, w = max_rows + 1, max_cols - 1 - df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) - assert has_vertically_truncated_repr(df) - with option_context('display.large_repr', 'info'): - assert has_info_repr(df) - - # Wide - h, w = max_rows - 1, max_cols + 1 - df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) - assert has_horizontally_truncated_repr(df) - with option_context('display.large_repr', 'info'): - assert has_info_repr(df) - - def test_info_repr_max_cols(self): - # GH #6939 - df = DataFrame(randn(10, 5)) - with option_context('display.large_repr', 'info', - 'display.max_columns', 1, - 'display.max_info_columns', 4): - self.assertTrue(has_non_verbose_info_repr(df)) - - with option_context('display.large_repr', 'info', - 'display.max_columns', 1, - 'display.max_info_columns', 5): - self.assertFalse(has_non_verbose_info_repr(df)) - - # test verbose overrides - # fmt.set_option('display.max_info_columns', 4) # exceeded - - def test_info_repr_html(self): - max_rows = get_option('display.max_rows') - max_cols = get_option('display.max_columns') - # Long - h, w = max_rows + 1, max_cols - 1 - df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) - assert r'<class' not in df._repr_html_() - with option_context('display.large_repr', 'info'): - assert r'<class' in df._repr_html_() - - # Wide - h, w = max_rows - 1, max_cols + 1 - df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) - assert ' - - - - - - - - - - """).strip() - self.assertEqual(result, expected) - - result = df.to_html(classes=["sortable", "draggable"]) - self.assertEqual(result, expected) - - def test_to_html_no_index_max_rows(self): - # GH https://github.com/pandas-dev/pandas/issues/14998 - df = DataFrame({"A": [1, 2, 3, 4]}) - result = df.to_html(index=False, max_rows=1) - expected = dedent("""\ - - - - - - - - - - - -
A
1
""") - self.assertEqual(result, expected) - - def test_pprint_pathological_object(self): - """ - if the test fails, the stack will overflow and nose crash, - but it won't hang. - """ - - class A: - - def __getitem__(self, key): - return 3 # obviously simplified - - df = DataFrame([A()]) - repr(df) # just don't dine - - def test_float_trim_zeros(self): - vals = [2.08430917305e+10, 3.52205017305e+10, 2.30674817305e+10, - 2.03954217305e+10, 5.59897817305e+10] - skip = True - for line in repr(DataFrame({'A': vals})).split('\n')[:-2]: - if line.startswith('dtype:'): - continue - if _three_digit_exp(): - self.assertTrue(('+010' in line) or skip) - else: - self.assertTrue(('+10' in line) or skip) - skip = False - - def test_dict_entries(self): - df = DataFrame({'A': [{'a': 1, 'b': 2}]}) - - val = df.to_string() - self.assertTrue("'a': 1" in val) - self.assertTrue("'b': 2" in val) - - def test_to_latex_filename(self): - with tm.ensure_clean('test.tex') as path: - self.frame.to_latex(path) - - with open(path, 'r') as f: - self.assertEqual(self.frame.to_latex(), f.read()) - - # test with utf-8 and encoding option (GH 7061) - df = DataFrame([[u'au\xdfgangen']]) - with tm.ensure_clean('test.tex') as path: - df.to_latex(path, encoding='utf-8') - with codecs.open(path, 'r', encoding='utf-8') as f: - self.assertEqual(df.to_latex(), f.read()) - - # test with utf-8 without encoding option - if compat.PY3: # python3: pandas default encoding is utf-8 - with tm.ensure_clean('test.tex') as path: - df.to_latex(path) - with codecs.open(path, 'r', encoding='utf-8') as f: - self.assertEqual(df.to_latex(), f.read()) - else: - # python2 default encoding is ascii, so an error should be raised - with tm.ensure_clean('test.tex') as path: - self.assertRaises(UnicodeEncodeError, df.to_latex, path) - - def test_to_latex(self): - # it works! - self.frame.to_latex() - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex() - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -{} & a & b \\ -\midrule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False) - withoutindex_expected = r"""\begin{tabular}{rl} -\toprule - a & b \\ -\midrule - 1 & b1 \\ - 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_format(self): - # GH Bug #9402 - self.frame.to_latex(column_format='ccc') - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(column_format='ccc') - withindex_expected = r"""\begin{tabular}{ccc} -\toprule -{} & a & b \\ -\midrule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - def test_to_latex_with_formatters(self): - df = DataFrame({'int': [1, 2, 3], - 'float': [1.0, 2.0, 3.0], - 'object': [(1, 2), True, False], - 'datetime64': [datetime(2016, 1, 1), - datetime(2016, 2, 5), - datetime(2016, 3, 3)]}) - - formatters = {'int': lambda x: '0x%x' % x, - 'float': lambda x: '[% 4.1f]' % x, - 'object': lambda x: '-%s-' % str(x), - 'datetime64': lambda x: x.strftime('%Y-%m'), - '__index__': lambda x: 'index: %s' % x} - result = df.to_latex(formatters=dict(formatters)) - - expected = r"""\begin{tabular}{llrrl} -\toprule -{} & datetime64 & float & int & object \\ -\midrule -index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ -index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ -index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ -\bottomrule -\end{tabular} -""" - self.assertEqual(result, expected) - - def test_to_latex_multiindex(self): - df = DataFrame({('x', 'y'): ['a']}) - result = df.to_latex() - expected = r"""\begin{tabular}{ll} -\toprule -{} & x \\ -{} & y \\ -\midrule -0 & a \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - result = df.T.to_latex() - expected = r"""\begin{tabular}{lll} -\toprule - & & 0 \\ -\midrule -x & y & a \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - df = DataFrame.from_dict({ - ('c1', 0): pd.Series(dict((x, x) for x in range(4))), - ('c1', 1): pd.Series(dict((x, x + 4) for x in range(4))), - ('c2', 0): pd.Series(dict((x, x) for x in range(4))), - ('c2', 1): pd.Series(dict((x, x + 4) for x in range(4))), - ('c3', 0): pd.Series(dict((x, x) for x in range(4))), - }).T - result = df.to_latex() - expected = r"""\begin{tabular}{llrrrr} -\toprule - & & 0 & 1 & 2 & 3 \\ -\midrule -c1 & 0 & 0 & 1 & 2 & 3 \\ - & 1 & 4 & 5 & 6 & 7 \\ -c2 & 0 & 0 & 1 & 2 & 3 \\ - & 1 & 4 & 5 & 6 & 7 \\ -c3 & 0 & 0 & 1 & 2 & 3 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - # GH 10660 - df = pd.DataFrame({'a': [0, 0, 1, 1], - 'b': list('abab'), - 'c': [1, 2, 3, 4]}) - result = df.set_index(['a', 'b']).to_latex() - expected = r"""\begin{tabular}{llr} -\toprule - & & c \\ -a & b & \\ -\midrule -0 & a & 1 \\ - & b & 2 \\ -1 & a & 3 \\ - & b & 4 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - result = df.groupby('a').describe().to_latex() - expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' - ' & & & & & & ' - '\\\\\n{} & count & mean & std & min & 25\\% & ' - '50\\% & 75\\% & max \\\\\na & & & ' - ' & & & & & \\\\\n\\midrule\n0 ' - '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' - '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' - '& 3.5 & 3.75 & 4.0 ' - '\\\\\n\\bottomrule\n\\end{tabular}\n') - - self.assertEqual(result, expected) - - def test_to_latex_escape(self): - a = 'a' - b = 'b' - - test_dict = {u('co^l1'): {a: "a", - b: "b"}, - u('co$e^x$'): {a: "a", - b: "b"}} - - unescaped_result = DataFrame(test_dict).to_latex(escape=False) - escaped_result = DataFrame(test_dict).to_latex( - ) # default: escape=True - - unescaped_expected = r'''\begin{tabular}{lll} -\toprule -{} & co$e^x$ & co^l1 \\ -\midrule -a & a & a \\ -b & b & b \\ -\bottomrule -\end{tabular} -''' - - escaped_expected = r'''\begin{tabular}{lll} -\toprule -{} & co\$e\textasciicircumx\$ & co\textasciicircuml1 \\ -\midrule -a & a & a \\ -b & b & b \\ -\bottomrule -\end{tabular} -''' - - self.assertEqual(unescaped_result, unescaped_expected) - self.assertEqual(escaped_result, escaped_expected) - - def test_to_latex_longtable(self): - self.frame.to_latex(longtable=True) - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(longtable=True) - withindex_expected = r"""\begin{longtable}{lrl} -\toprule -{} & a & b \\ -\midrule -\endhead -\midrule -\multicolumn{3}{r}{{Continued on next page}} \\ -\midrule -\endfoot - -\bottomrule -\endlastfoot -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\end{longtable} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False, longtable=True) - withoutindex_expected = r"""\begin{longtable}{rl} -\toprule - a & b \\ -\midrule -\endhead -\midrule -\multicolumn{3}{r}{{Continued on next page}} \\ -\midrule -\endfoot - -\bottomrule -\endlastfoot - 1 & b1 \\ - 2 & b2 \\ -\end{longtable} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_escape_special_chars(self): - special_characters = ['&', '%', '$', '#', '_', '{', '}', '~', '^', - '\\'] - df = DataFrame(data=special_characters) - observed = df.to_latex() - expected = r"""\begin{tabular}{ll} -\toprule -{} & 0 \\ -\midrule -0 & \& \\ -1 & \% \\ -2 & \$ \\ -3 & \# \\ -4 & \_ \\ -5 & \{ \\ -6 & \} \\ -7 & \textasciitilde \\ -8 & \textasciicircum \\ -9 & \textbackslash \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(observed, expected) - - def test_to_latex_no_header(self): - # GH 7124 - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(header=False) - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False, header=False) - withoutindex_expected = r"""\begin{tabular}{rl} -\toprule - 1 & b1 \\ - 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_decimal(self): - # GH 12031 - self.frame.to_latex() - df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(decimal=',') - print("WHAT THE") - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -{} & a & b \\ -\midrule -0 & 1,0 & b1 \\ -1 & 2,1 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - def test_to_csv_quotechar(self): - df = DataFrame({'col': [1, 2]}) - expected = """\ -"","col" -"0","1" -"1","2" -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1) # 1=QUOTE_ALL - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - expected = """\ -$$,$col$ -$0$,$1$ -$1$,$2$ -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1, quotechar="$") - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - with tm.ensure_clean('test.csv') as path: - with tm.assertRaisesRegexp(TypeError, 'quotechar'): - df.to_csv(path, quoting=1, quotechar=None) - - def test_to_csv_doublequote(self): - df = DataFrame({'col': ['a"a', '"bb"']}) - expected = '''\ -"","col" -"0","a""a" -"1","""bb""" -''' - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - from _csv import Error - with tm.ensure_clean('test.csv') as path: - with tm.assertRaisesRegexp(Error, 'escapechar'): - df.to_csv(path, doublequote=False) # no escapechar set - - def test_to_csv_escapechar(self): - df = DataFrame({'col': ['a"a', '"bb"']}) - expected = '''\ -"","col" -"0","a\\"a" -"1","\\"bb\\"" -''' - - with tm.ensure_clean('test.csv') as path: # QUOTE_ALL - df.to_csv(path, quoting=1, doublequote=False, escapechar='\\') - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - df = DataFrame({'col': ['a,a', ',bb,']}) - expected = """\ -,col -0,a\\,a -1,\\,bb\\, -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=3, escapechar='\\') # QUOTE_NONE - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - def test_csv_to_string(self): - df = DataFrame({'col': [1, 2]}) - expected = ',col\n0,1\n1,2\n' - self.assertEqual(df.to_csv(), expected) - - def test_to_csv_decimal(self): - # GH 781 - df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) - - expected_default = ',col1,col2,col3\n0,1,a,10.1\n' - self.assertEqual(df.to_csv(), expected_default) - - expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n' - self.assertEqual( - df.to_csv(decimal=',', sep=';'), expected_european_excel) - - expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n' - self.assertEqual( - df.to_csv(float_format='%.2f'), expected_float_format_default) - - expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n' - self.assertEqual( - df.to_csv(decimal=',', sep=';', - float_format='%.2f'), expected_float_format) - - # GH 11553: testing if decimal is taken into account for '0.0' - df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1}) - expected = 'a,b,c\n0^0,2^2,1\n1^1,3^3,1\n' - self.assertEqual(df.to_csv(index=False, decimal='^'), expected) - - # same but for an index - self.assertEqual(df.set_index('a').to_csv(decimal='^'), expected) - - # same for a multi-index - self.assertEqual( - df.set_index(['a', 'b']).to_csv(decimal="^"), expected) - - def test_to_csv_float_format(self): - # testing if float_format is taken into account for the index - # GH 11553 - df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1}) - expected = 'a,b,c\n0,2.20,1\n1,3.30,1\n' - self.assertEqual( - df.set_index('a').to_csv(float_format='%.2f'), expected) - - # same for a multi-index - self.assertEqual( - df.set_index(['a', 'b']).to_csv(float_format='%.2f'), expected) - - def test_to_csv_na_rep(self): - # testing if NaN values are correctly represented in the index - # GH 11553 - df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n0.0,0,2\n_,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - # now with an index containing only NaNs - df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n_,0,2\n_,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - # check if na_rep parameter does not break anything when no NaN - df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n0,0,2\n0,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - def test_to_csv_date_format(self): - # GH 10209 - df_sec = DataFrame({'A': pd.date_range('20130101', periods=5, freq='s') - }) - df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d') - }) - - expected_default_sec = ',A\n0,2013-01-01 00:00:00\n1,2013-01-01 00:00:01\n2,2013-01-01 00:00:02' + \ - '\n3,2013-01-01 00:00:03\n4,2013-01-01 00:00:04\n' - self.assertEqual(df_sec.to_csv(), expected_default_sec) - - expected_ymdhms_day = ',A\n0,2013-01-01 00:00:00\n1,2013-01-02 00:00:00\n2,2013-01-03 00:00:00' + \ - '\n3,2013-01-04 00:00:00\n4,2013-01-05 00:00:00\n' - self.assertEqual( - df_day.to_csv( - date_format='%Y-%m-%d %H:%M:%S'), expected_ymdhms_day) - - expected_ymd_sec = ',A\n0,2013-01-01\n1,2013-01-01\n2,2013-01-01\n3,2013-01-01\n4,2013-01-01\n' - self.assertEqual( - df_sec.to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) - - expected_default_day = ',A\n0,2013-01-01\n1,2013-01-02\n2,2013-01-03\n3,2013-01-04\n4,2013-01-05\n' - self.assertEqual(df_day.to_csv(), expected_default_day) - self.assertEqual( - df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day) - - # testing if date_format parameter is taken into account for - # multi-indexed dataframes (GH 7791) - df_sec['B'] = 0 - df_sec['C'] = 1 - expected_ymd_sec = 'A,B,C\n2013-01-01,0,1\n' - df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B']) - self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), - expected_ymd_sec) - - def test_to_csv_multi_index(self): - # see gh-6618 - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) - - exp = ",1\n,2\n0,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "1\n2\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), - index=pd.MultiIndex.from_arrays([[1], [2]])) - - exp = ",,1\n,,2\n1,2,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "1\n2\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - - df = DataFrame( - [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) - - exp = ",foo\n,bar\n0,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "foo\nbar\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - - def test_period(self): - # GH 12615 - df = pd.DataFrame({'A': pd.period_range('2013-01', - periods=4, freq='M'), - 'B': [pd.Period('2011-01', freq='M'), - pd.Period('2011-02-01', freq='D'), - pd.Period('2011-03-01 09:00', freq='H'), - pd.Period('2011-04', freq='M')], - 'C': list('abcd')}) - exp = (" A B C\n0 2013-01 2011-01 a\n" - "1 2013-02 2011-02-01 b\n2 2013-03 2011-03-01 09:00 c\n" - "3 2013-04 2011-04 d") - self.assertEqual(str(df), exp) - - -def gen_series_formatting(): - s1 = pd.Series(['a'] * 100) - s2 = pd.Series(['ab'] * 100) - s3 = pd.Series(['a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef']) - s4 = s3[::-1] - test_sers = {'onel': s1, 'twol': s2, 'asc': s3, 'desc': s4} - return test_sers - - -class TestSeriesFormatting(tm.TestCase): - - def setUp(self): - self.ts = tm.makeTimeSeries() - - def test_repr_unicode(self): - s = Series([u('\u03c3')] * 10) - repr(s) - - a = Series([u("\u05d0")] * 1000) - a.name = 'title1' - repr(a) - - def test_to_string(self): - buf = StringIO() - - s = self.ts.to_string() - - retval = self.ts.to_string(buf=buf) - self.assertIsNone(retval) - self.assertEqual(buf.getvalue().strip(), s) - - # pass float_format - format = '%.4f'.__mod__ - result = self.ts.to_string(float_format=format) - result = [x.split()[1] for x in result.split('\n')[:-1]] - expected = [format(x) for x in self.ts] - self.assertEqual(result, expected) - - # empty string - result = self.ts[:0].to_string() - self.assertEqual(result, 'Series([], Freq: B)') - - result = self.ts[:0].to_string(length=0) - self.assertEqual(result, 'Series([], Freq: B)') - - # name and length - cp = self.ts.copy() - cp.name = 'foo' - result = cp.to_string(length=True, name=True, dtype=True) - last_line = result.split('\n')[-1].strip() - self.assertEqual(last_line, - "Freq: B, Name: foo, Length: %d, dtype: float64" % - len(cp)) - - def test_freq_name_separation(self): - s = Series(np.random.randn(10), - index=date_range('1/1/2000', periods=10), name=0) - - result = repr(s) - self.assertTrue('Freq: D, Name: 0' in result) - - def test_to_string_mixed(self): - s = Series(['foo', np.nan, -1.23, 4.56]) - result = s.to_string() - expected = (u('0 foo\n') + u('1 NaN\n') + u('2 -1.23\n') + - u('3 4.56')) - self.assertEqual(result, expected) - - # but don't count NAs as floats - s = Series(['foo', np.nan, 'bar', 'baz']) - result = s.to_string() - expected = (u('0 foo\n') + '1 NaN\n' + '2 bar\n' + '3 baz') - self.assertEqual(result, expected) - - s = Series(['foo', 5, 'bar', 'baz']) - result = s.to_string() - expected = (u('0 foo\n') + '1 5\n' + '2 bar\n' + '3 baz') - self.assertEqual(result, expected) - - def test_to_string_float_na_spacing(self): - s = Series([0., 1.5678, 2., -3., 4.]) - s[::2] = np.nan - - result = s.to_string() - expected = (u('0 NaN\n') + '1 1.5678\n' + '2 NaN\n' + - '3 -3.0000\n' + '4 NaN') - self.assertEqual(result, expected) - - def test_to_string_without_index(self): - # GH 11729 Test index=False option - s = Series([1, 2, 3, 4]) - result = s.to_string(index=False) - expected = (u('1\n') + '2\n' + '3\n' + '4') - self.assertEqual(result, expected) - - def test_unicode_name_in_footer(self): - s = Series([1, 2], name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) - sf = fmt.SeriesFormatter(s, name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) - sf._get_footer() # should not raise exception - - def test_east_asian_unicode_series(self): - if PY3: - _rep = repr - else: - _rep = unicode - # not alighned properly because of east asian width - - # unicode index - s = Series(['a', 'bb', 'CCC', 'D'], - index=[u'あ', u'いい', u'ううう', u'ええええ']) - expected = (u"あ a\nいい bb\nううう CCC\n" - u"ええええ D\ndtype: object") - self.assertEqual(_rep(s), expected) - - # unicode values - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=['a', 'bb', 'c', 'ddd']) - expected = (u"a あ\nbb いい\nc ううう\n" - u"ddd ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) - - # both - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=[u'ああ', u'いいいい', u'う', u'えええ']) - expected = (u"ああ あ\nいいいい いい\nう ううう\n" - u"えええ ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) - - # unicode footer - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=[u'ああ', u'いいいい', u'う', u'えええ'], name=u'おおおおおおお') - expected = (u"ああ あ\nいいいい いい\nう ううう\n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") - self.assertEqual(_rep(s), expected) - - # MultiIndex - idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( - u'おおお', u'かかかか'), (u'き', u'くく')]) - s = Series([1, 22, 3333, 44444], index=idx) - expected = (u"あ いい 1\nう え 22\nおおお かかかか 3333\n" - u"き くく 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) - - # object dtype, shorter than unicode repr - s = Series([1, 22, 3333, 44444], index=[1, 'AB', np.nan, u'あああ']) - expected = (u"1 1\nAB 22\nNaN 3333\n" - u"あああ 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) - - # object dtype, longer than unicode repr - s = Series([1, 22, 3333, 44444], - index=[1, 'AB', pd.Timestamp('2011-01-01'), u'あああ']) - expected = (u"1 1\nAB 22\n" - u"2011-01-01 00:00:00 3333\nあああ 44444\ndtype: int64" - ) - self.assertEqual(_rep(s), expected) - - # truncate - with option_context('display.max_rows', 3): - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], name=u'おおおおおおお') - - expected = (u"0 あ\n ... \n" - u"3 ええええ\nName: おおおおおおお, dtype: object") - self.assertEqual(_rep(s), expected) - - s.index = [u'ああ', u'いいいい', u'う', u'えええ'] - expected = (u"ああ あ\n ... \n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") - self.assertEqual(_rep(s), expected) - - # Emable Unicode option ----------------------------------------- - with option_context('display.unicode.east_asian_width', True): - - # unicode index - s = Series(['a', 'bb', 'CCC', 'D'], - index=[u'あ', u'いい', u'ううう', u'ええええ']) - expected = (u"あ a\nいい bb\nううう CCC\n" - u"ええええ D\ndtype: object") - self.assertEqual(_rep(s), expected) - - # unicode values - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=['a', 'bb', 'c', 'ddd']) - expected = (u"a あ\nbb いい\nc ううう\n" - u"ddd ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) - - # both - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=[u'ああ', u'いいいい', u'う', u'えええ']) - expected = (u"ああ あ\nいいいい いい\nう ううう\n" - u"えええ ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) - - # unicode footer - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=[u'ああ', u'いいいい', u'う', u'えええ'], name=u'おおおおおおお') - expected = (u"ああ あ\nいいいい いい\nう ううう\n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") - self.assertEqual(_rep(s), expected) - - # MultiIndex - idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( - u'おおお', u'かかかか'), (u'き', u'くく')]) - s = Series([1, 22, 3333, 44444], index=idx) - expected = (u"あ いい 1\nう え 22\nおおお かかかか 3333\n" - u"き くく 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) - - # object dtype, shorter than unicode repr - s = Series([1, 22, 3333, 44444], index=[1, 'AB', np.nan, u'あああ']) - expected = (u"1 1\nAB 22\nNaN 3333\n" - u"あああ 44444\ndtype: int64") - self.assertEqual(_rep(s), expected) - - # object dtype, longer than unicode repr - s = Series([1, 22, 3333, 44444], - index=[1, 'AB', pd.Timestamp('2011-01-01'), u'あああ']) - expected = (u"1 1\nAB 22\n" - u"2011-01-01 00:00:00 3333\nあああ 44444\ndtype: int64" - ) - self.assertEqual(_rep(s), expected) - - # truncate - with option_context('display.max_rows', 3): - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], name=u'おおおおおおお') - expected = (u"0 あ\n ... \n" - u"3 ええええ\nName: おおおおおおお, dtype: object") - self.assertEqual(_rep(s), expected) - - s.index = [u'ああ', u'いいいい', u'う', u'えええ'] - expected = (u"ああ あ\n ... \n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") - self.assertEqual(_rep(s), expected) - - # ambiguous unicode - s = Series([u'¡¡', u'い¡¡', u'ううう', u'ええええ'], - index=[u'ああ', u'¡¡¡¡いい', u'¡¡', u'えええ']) - expected = (u"ああ ¡¡\n¡¡¡¡いい い¡¡\n¡¡ ううう\n" - u"えええ ええええ\ndtype: object") - self.assertEqual(_rep(s), expected) - - def test_float_trim_zeros(self): - vals = [2.08430917305e+10, 3.52205017305e+10, 2.30674817305e+10, - 2.03954217305e+10, 5.59897817305e+10] - for line in repr(Series(vals)).split('\n'): - if line.startswith('dtype:'): - continue - if _three_digit_exp(): - self.assertIn('+010', line) - else: - self.assertIn('+10', line) - - def test_datetimeindex(self): - - index = date_range('20130102', periods=6) - s = Series(1, index=index) - result = s.to_string() - self.assertTrue('2013-01-02' in result) - - # nat in index - s2 = Series(2, index=[Timestamp('20130111'), NaT]) - s = s2.append(s) - result = s.to_string() - self.assertTrue('NaT' in result) - - # nat in summary - result = str(s2.index) - self.assertTrue('NaT' in result) - - def test_timedelta64(self): - - from datetime import datetime, timedelta - - Series(np.array([1100, 20], dtype='timedelta64[ns]')).to_string() - - s = Series(date_range('2012-1-1', periods=3, freq='D')) - - # GH2146 - - # adding NaTs - y = s - s.shift(1) - result = y.to_string() - self.assertTrue('1 days' in result) - self.assertTrue('00:00:00' not in result) - self.assertTrue('NaT' in result) - - # with frac seconds - o = Series([datetime(2012, 1, 1, microsecond=150)] * 3) - y = s - o - result = y.to_string() - self.assertTrue('-1 days +23:59:59.999850' in result) - - # rounding? - o = Series([datetime(2012, 1, 1, 1)] * 3) - y = s - o - result = y.to_string() - self.assertTrue('-1 days +23:00:00' in result) - self.assertTrue('1 days 23:00:00' in result) - - o = Series([datetime(2012, 1, 1, 1, 1)] * 3) - y = s - o - result = y.to_string() - self.assertTrue('-1 days +22:59:00' in result) - self.assertTrue('1 days 22:59:00' in result) - - o = Series([datetime(2012, 1, 1, 1, 1, microsecond=150)] * 3) - y = s - o - result = y.to_string() - self.assertTrue('-1 days +22:58:59.999850' in result) - self.assertTrue('0 days 22:58:59.999850' in result) - - # neg time - td = timedelta(minutes=5, seconds=3) - s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td - y = s - s2 - result = y.to_string() - self.assertTrue('-1 days +23:54:57' in result) - - td = timedelta(microseconds=550) - s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td - y = s - td - result = y.to_string() - self.assertTrue('2012-01-01 23:59:59.999450' in result) - - # no boxing of the actual elements - td = Series(pd.timedelta_range('1 days', periods=3)) - result = td.to_string() - self.assertEqual(result, u("0 1 days\n1 2 days\n2 3 days")) - - def test_mixed_datetime64(self): - df = DataFrame({'A': [1, 2], 'B': ['2012-01-01', '2012-01-02']}) - df['B'] = pd.to_datetime(df.B) - - result = repr(df.loc[0]) - self.assertTrue('2012-01-01' in result) - - def test_period(self): - # GH 12615 - index = pd.period_range('2013-01', periods=6, freq='M') - s = Series(np.arange(6, dtype='int64'), index=index) - exp = ("2013-01 0\n2013-02 1\n2013-03 2\n2013-04 3\n" - "2013-05 4\n2013-06 5\nFreq: M, dtype: int64") - self.assertEqual(str(s), exp) - - s = Series(index) - exp = ("0 2013-01\n1 2013-02\n2 2013-03\n3 2013-04\n" - "4 2013-05\n5 2013-06\ndtype: object") - self.assertEqual(str(s), exp) - - # periods with mixed freq - s = Series([pd.Period('2011-01', freq='M'), - pd.Period('2011-02-01', freq='D'), - pd.Period('2011-03-01 09:00', freq='H')]) - exp = ("0 2011-01\n1 2011-02-01\n" - "2 2011-03-01 09:00\ndtype: object") - self.assertEqual(str(s), exp) - - def test_max_multi_index_display(self): - # GH 7101 - - # doc example (indexing.rst) - - # multi-index - arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = list(zip(*arrays)) - index = MultiIndex.from_tuples(tuples, names=['first', 'second']) - s = Series(randn(8), index=index) - - with option_context("display.max_rows", 10): - self.assertEqual(len(str(s).split('\n')), 10) - with option_context("display.max_rows", 3): - self.assertEqual(len(str(s).split('\n')), 5) - with option_context("display.max_rows", 2): - self.assertEqual(len(str(s).split('\n')), 5) - with option_context("display.max_rows", 1): - self.assertEqual(len(str(s).split('\n')), 4) - with option_context("display.max_rows", 0): - self.assertEqual(len(str(s).split('\n')), 10) - - # index - s = Series(randn(8), None) - - with option_context("display.max_rows", 10): - self.assertEqual(len(str(s).split('\n')), 9) - with option_context("display.max_rows", 3): - self.assertEqual(len(str(s).split('\n')), 4) - with option_context("display.max_rows", 2): - self.assertEqual(len(str(s).split('\n')), 4) - with option_context("display.max_rows", 1): - self.assertEqual(len(str(s).split('\n')), 3) - with option_context("display.max_rows", 0): - self.assertEqual(len(str(s).split('\n')), 9) - - # Make sure #8532 is fixed - def test_consistent_format(self): - s = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9999, 1, 1] * 10) - with option_context("display.max_rows", 10): - res = repr(s) - exp = ('0 1.0000\n1 1.0000\n2 1.0000\n3 ' - '1.0000\n4 1.0000\n ... \n125 ' - '1.0000\n126 1.0000\n127 0.9999\n128 ' - '1.0000\n129 1.0000\ndtype: float64') - self.assertEqual(res, exp) - - def chck_ncols(self, s): - with option_context("display.max_rows", 10): - res = repr(s) - lines = res.split('\n') - lines = [line for line in repr(s).split('\n') - if not re.match(r'[^\.]*\.+', line)][:-1] - ncolsizes = len(set(len(line.strip()) for line in lines)) - self.assertEqual(ncolsizes, 1) - - def test_format_explicit(self): - test_sers = gen_series_formatting() - with option_context("display.max_rows", 4): - res = repr(test_sers['onel']) - exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object' - self.assertEqual(exp, res) - res = repr(test_sers['twol']) - exp = ('0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype:' - ' object') - self.assertEqual(exp, res) - res = repr(test_sers['asc']) - exp = ('0 a\n1 ab\n ... \n4 abcde\n5' - ' abcdef\ndtype: object') - self.assertEqual(exp, res) - res = repr(test_sers['desc']) - exp = ('5 abcdef\n4 abcde\n ... \n1 ab\n0' - ' a\ndtype: object') - self.assertEqual(exp, res) - - def test_ncols(self): - test_sers = gen_series_formatting() - for s in test_sers.values(): - self.chck_ncols(s) - - def test_max_rows_eq_one(self): - s = Series(range(10), dtype='int64') - with option_context("display.max_rows", 1): - strrepr = repr(s).split('\n') - exp1 = ['0', '0'] - res1 = strrepr[0].split() - self.assertEqual(exp1, res1) - exp2 = ['..'] - res2 = strrepr[1].split() - self.assertEqual(exp2, res2) - - def test_truncate_ndots(self): - def getndots(s): - return len(re.match(r'[^\.]*(\.*)', s).groups()[0]) - - s = Series([0, 2, 3, 6]) - with option_context("display.max_rows", 2): - strrepr = repr(s).replace('\n', '') - self.assertEqual(getndots(strrepr), 2) - - s = Series([0, 100, 200, 400]) - with option_context("display.max_rows", 2): - strrepr = repr(s).replace('\n', '') - self.assertEqual(getndots(strrepr), 3) - - def test_to_string_name(self): - s = Series(range(100), dtype='int64') - s.name = 'myser' - res = s.to_string(max_rows=2, name=True) - exp = '0 0\n ..\n99 99\nName: myser' - self.assertEqual(res, exp) - res = s.to_string(max_rows=2, name=False) - exp = '0 0\n ..\n99 99' - self.assertEqual(res, exp) - - def test_to_string_dtype(self): - s = Series(range(100), dtype='int64') - res = s.to_string(max_rows=2, dtype=True) - exp = '0 0\n ..\n99 99\ndtype: int64' - self.assertEqual(res, exp) - res = s.to_string(max_rows=2, dtype=False) - exp = '0 0\n ..\n99 99' - self.assertEqual(res, exp) - - def test_to_string_length(self): - s = Series(range(100), dtype='int64') - res = s.to_string(max_rows=2, length=True) - exp = '0 0\n ..\n99 99\nLength: 100' - self.assertEqual(res, exp) - - def test_to_string_na_rep(self): - s = pd.Series(index=range(100)) - res = s.to_string(na_rep='foo', max_rows=2) - exp = '0 foo\n ..\n99 foo' - self.assertEqual(res, exp) - - def test_to_string_float_format(self): - s = pd.Series(range(10), dtype='float64') - res = s.to_string(float_format=lambda x: '{0:2.1f}'.format(x), - max_rows=2) - exp = '0 0.0\n ..\n9 9.0' - self.assertEqual(res, exp) - - def test_to_string_header(self): - s = pd.Series(range(10), dtype='int64') - s.index.name = 'foo' - res = s.to_string(header=True, max_rows=2) - exp = 'foo\n0 0\n ..\n9 9' - self.assertEqual(res, exp) - res = s.to_string(header=False, max_rows=2) - exp = '0 0\n ..\n9 9' - self.assertEqual(res, exp) - - -class TestEngFormatter(tm.TestCase): - - def test_eng_float_formatter(self): - df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) - - fmt.set_eng_float_format() - result = df.to_string() - expected = (' A\n' - '0 1.410E+00\n' - '1 141.000E+00\n' - '2 14.100E+03\n' - '3 1.410E+06') - self.assertEqual(result, expected) - - fmt.set_eng_float_format(use_eng_prefix=True) - result = df.to_string() - expected = (' A\n' - '0 1.410\n' - '1 141.000\n' - '2 14.100k\n' - '3 1.410M') - self.assertEqual(result, expected) - - fmt.set_eng_float_format(accuracy=0) - result = df.to_string() - expected = (' A\n' - '0 1E+00\n' - '1 141E+00\n' - '2 14E+03\n' - '3 1E+06') - self.assertEqual(result, expected) - - self.reset_display_options() - - def compare(self, formatter, input, output): - formatted_input = formatter(input) - msg = ("formatting of %s results in '%s', expected '%s'" % - (str(input), formatted_input, output)) - self.assertEqual(formatted_input, output, msg) - - def compare_all(self, formatter, in_out): - """ - Parameters: - ----------- - formatter: EngFormatter under test - in_out: list of tuples. Each tuple = (number, expected_formatting) - - It is tested if 'formatter(number) == expected_formatting'. - *number* should be >= 0 because formatter(-number) == fmt is also - tested. *fmt* is derived from *expected_formatting* - """ - for input, output in in_out: - self.compare(formatter, input, output) - self.compare(formatter, -input, "-" + output[1:]) - - def test_exponents_with_eng_prefix(self): - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - f = np.sqrt(2) - in_out = [(f * 10 ** -24, " 1.414y"), (f * 10 ** -23, " 14.142y"), - (f * 10 ** -22, " 141.421y"), (f * 10 ** -21, " 1.414z"), - (f * 10 ** -20, " 14.142z"), (f * 10 ** -19, " 141.421z"), - (f * 10 ** -18, " 1.414a"), (f * 10 ** -17, " 14.142a"), - (f * 10 ** -16, " 141.421a"), (f * 10 ** -15, " 1.414f"), - (f * 10 ** -14, " 14.142f"), (f * 10 ** -13, " 141.421f"), - (f * 10 ** -12, " 1.414p"), (f * 10 ** -11, " 14.142p"), - (f * 10 ** -10, " 141.421p"), (f * 10 ** -9, " 1.414n"), - (f * 10 ** -8, " 14.142n"), (f * 10 ** -7, " 141.421n"), - (f * 10 ** -6, " 1.414u"), (f * 10 ** -5, " 14.142u"), - (f * 10 ** -4, " 141.421u"), (f * 10 ** -3, " 1.414m"), - (f * 10 ** -2, " 14.142m"), (f * 10 ** -1, " 141.421m"), - (f * 10 ** 0, " 1.414"), (f * 10 ** 1, " 14.142"), - (f * 10 ** 2, " 141.421"), (f * 10 ** 3, " 1.414k"), - (f * 10 ** 4, " 14.142k"), (f * 10 ** 5, " 141.421k"), - (f * 10 ** 6, " 1.414M"), (f * 10 ** 7, " 14.142M"), - (f * 10 ** 8, " 141.421M"), (f * 10 ** 9, " 1.414G"), ( - f * 10 ** 10, " 14.142G"), (f * 10 ** 11, " 141.421G"), - (f * 10 ** 12, " 1.414T"), (f * 10 ** 13, " 14.142T"), ( - f * 10 ** 14, " 141.421T"), (f * 10 ** 15, " 1.414P"), ( - f * 10 ** 16, " 14.142P"), (f * 10 ** 17, " 141.421P"), ( - f * 10 ** 18, " 1.414E"), (f * 10 ** 19, " 14.142E"), - (f * 10 ** 20, " 141.421E"), (f * 10 ** 21, " 1.414Z"), ( - f * 10 ** 22, " 14.142Z"), (f * 10 ** 23, " 141.421Z"), ( - f * 10 ** 24, " 1.414Y"), (f * 10 ** 25, " 14.142Y"), ( - f * 10 ** 26, " 141.421Y")] - self.compare_all(formatter, in_out) - - def test_exponents_without_eng_prefix(self): - formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False) - f = np.pi - in_out = [(f * 10 ** -24, " 3.1416E-24"), - (f * 10 ** -23, " 31.4159E-24"), - (f * 10 ** -22, " 314.1593E-24"), - (f * 10 ** -21, " 3.1416E-21"), - (f * 10 ** -20, " 31.4159E-21"), - (f * 10 ** -19, " 314.1593E-21"), - (f * 10 ** -18, " 3.1416E-18"), - (f * 10 ** -17, " 31.4159E-18"), - (f * 10 ** -16, " 314.1593E-18"), - (f * 10 ** -15, " 3.1416E-15"), - (f * 10 ** -14, " 31.4159E-15"), - (f * 10 ** -13, " 314.1593E-15"), - (f * 10 ** -12, " 3.1416E-12"), - (f * 10 ** -11, " 31.4159E-12"), - (f * 10 ** -10, " 314.1593E-12"), - (f * 10 ** -9, " 3.1416E-09"), (f * 10 ** -8, " 31.4159E-09"), - (f * 10 ** -7, " 314.1593E-09"), (f * 10 ** -6, " 3.1416E-06"), - (f * 10 ** -5, " 31.4159E-06"), (f * 10 ** -4, - " 314.1593E-06"), - (f * 10 ** -3, " 3.1416E-03"), (f * 10 ** -2, " 31.4159E-03"), - (f * 10 ** -1, " 314.1593E-03"), (f * 10 ** 0, " 3.1416E+00"), ( - f * 10 ** 1, " 31.4159E+00"), (f * 10 ** 2, " 314.1593E+00"), - (f * 10 ** 3, " 3.1416E+03"), (f * 10 ** 4, " 31.4159E+03"), ( - f * 10 ** 5, " 314.1593E+03"), (f * 10 ** 6, " 3.1416E+06"), - (f * 10 ** 7, " 31.4159E+06"), (f * 10 ** 8, " 314.1593E+06"), ( - f * 10 ** 9, " 3.1416E+09"), (f * 10 ** 10, " 31.4159E+09"), - (f * 10 ** 11, " 314.1593E+09"), (f * 10 ** 12, " 3.1416E+12"), - (f * 10 ** 13, " 31.4159E+12"), (f * 10 ** 14, " 314.1593E+12"), - (f * 10 ** 15, " 3.1416E+15"), (f * 10 ** 16, " 31.4159E+15"), - (f * 10 ** 17, " 314.1593E+15"), (f * 10 ** 18, " 3.1416E+18"), - (f * 10 ** 19, " 31.4159E+18"), (f * 10 ** 20, " 314.1593E+18"), - (f * 10 ** 21, " 3.1416E+21"), (f * 10 ** 22, " 31.4159E+21"), - (f * 10 ** 23, " 314.1593E+21"), (f * 10 ** 24, " 3.1416E+24"), - (f * 10 ** 25, " 31.4159E+24"), (f * 10 ** 26, " 314.1593E+24")] - self.compare_all(formatter, in_out) - - def test_rounding(self): - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - in_out = [(5.55555, ' 5.556'), (55.5555, ' 55.556'), - (555.555, ' 555.555'), (5555.55, ' 5.556k'), - (55555.5, ' 55.556k'), (555555, ' 555.555k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - in_out = [(5.55555, ' 5.6'), (55.5555, ' 55.6'), (555.555, ' 555.6'), - (5555.55, ' 5.6k'), (55555.5, ' 55.6k'), (555555, ' 555.6k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True) - in_out = [(5.55555, ' 6'), (55.5555, ' 56'), (555.555, ' 556'), - (5555.55, ' 6k'), (55555.5, ' 56k'), (555555, ' 556k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - result = formatter(0) - self.assertEqual(result, u(' 0.000')) - - def test_nan(self): - # Issue #11981 - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - result = formatter(np.nan) - self.assertEqual(result, u('NaN')) - - df = pd.DataFrame({'a': [1.5, 10.3, 20.5], - 'b': [50.3, 60.67, 70.12], - 'c': [100.2, 101.33, 120.33]}) - pt = df.pivot_table(values='a', index='b', columns='c') - fmt.set_eng_float_format(accuracy=1) - result = pt.to_string() - self.assertTrue('NaN' in result) - self.reset_display_options() - - def test_inf(self): - # Issue #11981 - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - result = formatter(np.inf) - self.assertEqual(result, u('inf')) - - -def _three_digit_exp(): - return '%.4g' % 1.7e8 == '1.7e+008' - - -class TestFloatArrayFormatter(tm.TestCase): - - def test_misc(self): - obj = fmt.FloatArrayFormatter(np.array([], dtype=np.float64)) - result = obj.get_result() - self.assertTrue(len(result) == 0) - - def test_format(self): - obj = fmt.FloatArrayFormatter(np.array([12, 0], dtype=np.float64)) - result = obj.get_result() - self.assertEqual(result[0], " 12.0") - self.assertEqual(result[1], " 0.0") - - def test_output_significant_digits(self): - # Issue #9764 - - # In case default display precision changes: - with pd.option_context('display.precision', 6): - # DataFrame example from issue #9764 - d = pd.DataFrame( - {'col1': [9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, - 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, - 4.999e-6, 5e-6, 5.0001e-6, 6e-6]}) - - expected_output = { - (0, 6): - ' col1\n0 9.999000e-08\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', - (1, 6): - ' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', - (1, 8): - ' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07\n6 5.000100e-07\n7 6.000000e-07', - (8, 16): - ' col1\n8 9.999000e-07\n9 1.000000e-06\n10 1.000100e-06\n11 2.000000e-06\n12 4.999000e-06\n13 5.000000e-06\n14 5.000100e-06\n15 6.000000e-06', - (9, 16): - ' col1\n9 0.000001\n10 0.000001\n11 0.000002\n12 0.000005\n13 0.000005\n14 0.000005\n15 0.000006' - } - - for (start, stop), v in expected_output.items(): - self.assertEqual(str(d[start:stop]), v) - - def test_too_long(self): - # GH 10451 - with pd.option_context('display.precision', 4): - # need both a number > 1e6 and something that normally formats to - # having length > display.precision + 6 - df = pd.DataFrame(dict(x=[12345.6789])) - self.assertEqual(str(df), ' x\n0 12345.6789') - df = pd.DataFrame(dict(x=[2e6])) - self.assertEqual(str(df), ' x\n0 2000000.0') - df = pd.DataFrame(dict(x=[12345.6789, 2e6])) - self.assertEqual( - str(df), ' x\n0 1.2346e+04\n1 2.0000e+06') - - -class TestRepr_timedelta64(tm.TestCase): - - def test_none(self): - delta_1d = pd.to_timedelta(1, unit='D') - delta_0d = pd.to_timedelta(0, unit='D') - delta_1s = pd.to_timedelta(1, unit='s') - delta_500ms = pd.to_timedelta(500, unit='ms') - - drepr = lambda x: x._repr_base() - self.assertEqual(drepr(delta_1d), "1 days") - self.assertEqual(drepr(-delta_1d), "-1 days") - self.assertEqual(drepr(delta_0d), "0 days") - self.assertEqual(drepr(delta_1s), "0 days 00:00:01") - self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") - self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual( - drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") - - def test_even_day(self): - delta_1d = pd.to_timedelta(1, unit='D') - delta_0d = pd.to_timedelta(0, unit='D') - delta_1s = pd.to_timedelta(1, unit='s') - delta_500ms = pd.to_timedelta(500, unit='ms') - - drepr = lambda x: x._repr_base(format='even_day') - self.assertEqual(drepr(delta_1d), "1 days") - self.assertEqual(drepr(-delta_1d), "-1 days") - self.assertEqual(drepr(delta_0d), "0 days") - self.assertEqual(drepr(delta_1s), "0 days 00:00:01") - self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") - self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual( - drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") - - def test_sub_day(self): - delta_1d = pd.to_timedelta(1, unit='D') - delta_0d = pd.to_timedelta(0, unit='D') - delta_1s = pd.to_timedelta(1, unit='s') - delta_500ms = pd.to_timedelta(500, unit='ms') - - drepr = lambda x: x._repr_base(format='sub_day') - self.assertEqual(drepr(delta_1d), "1 days") - self.assertEqual(drepr(-delta_1d), "-1 days") - self.assertEqual(drepr(delta_0d), "00:00:00") - self.assertEqual(drepr(delta_1s), "00:00:01") - self.assertEqual(drepr(delta_500ms), "00:00:00.500000") - self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual( - drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") - - def test_long(self): - delta_1d = pd.to_timedelta(1, unit='D') - delta_0d = pd.to_timedelta(0, unit='D') - delta_1s = pd.to_timedelta(1, unit='s') - delta_500ms = pd.to_timedelta(500, unit='ms') - - drepr = lambda x: x._repr_base(format='long') - self.assertEqual(drepr(delta_1d), "1 days 00:00:00") - self.assertEqual(drepr(-delta_1d), "-1 days +00:00:00") - self.assertEqual(drepr(delta_0d), "0 days 00:00:00") - self.assertEqual(drepr(delta_1s), "0 days 00:00:01") - self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") - self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual( - drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") - - def test_all(self): - delta_1d = pd.to_timedelta(1, unit='D') - delta_0d = pd.to_timedelta(0, unit='D') - delta_1ns = pd.to_timedelta(1, unit='ns') - - drepr = lambda x: x._repr_base(format='all') - self.assertEqual(drepr(delta_1d), "1 days 00:00:00.000000000") - self.assertEqual(drepr(delta_0d), "0 days 00:00:00.000000000") - self.assertEqual(drepr(delta_1ns), "0 days 00:00:00.000000001") - - -class TestTimedelta64Formatter(tm.TestCase): - - def test_days(self): - x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(x, box=True).get_result() - self.assertEqual(result[0].strip(), "'0 days'") - self.assertEqual(result[1].strip(), "'1 days'") - - result = fmt.Timedelta64Formatter(x[1:2], box=True).get_result() - self.assertEqual(result[0].strip(), "'1 days'") - - result = fmt.Timedelta64Formatter(x, box=False).get_result() - self.assertEqual(result[0].strip(), "0 days") - self.assertEqual(result[1].strip(), "1 days") - - result = fmt.Timedelta64Formatter(x[1:2], box=False).get_result() - self.assertEqual(result[0].strip(), "1 days") - - def test_days_neg(self): - x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(-x, box=True).get_result() - self.assertEqual(result[0].strip(), "'0 days'") - self.assertEqual(result[1].strip(), "'-1 days'") - - def test_subdays(self): - y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') - result = fmt.Timedelta64Formatter(y, box=True).get_result() - self.assertEqual(result[0].strip(), "'00:00:00'") - self.assertEqual(result[1].strip(), "'00:00:01'") - - def test_subdays_neg(self): - y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') - result = fmt.Timedelta64Formatter(-y, box=True).get_result() - self.assertEqual(result[0].strip(), "'00:00:00'") - self.assertEqual(result[1].strip(), "'-1 days +23:59:59'") - - def test_zero(self): - x = pd.to_timedelta(list(range(1)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(x, box=True).get_result() - self.assertEqual(result[0].strip(), "'0 days'") - - x = pd.to_timedelta(list(range(1)), unit='D') - result = fmt.Timedelta64Formatter(x, box=True).get_result() - self.assertEqual(result[0].strip(), "'0 days'") - - -class TestDatetime64Formatter(tm.TestCase): - - def test_mixed(self): - x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), pd.NaT]) - result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 00:00:00") - self.assertEqual(result[1].strip(), "2013-01-01 12:00:00") - - def test_dates(self): - x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT]) - result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01") - self.assertEqual(result[1].strip(), "2013-01-02") - - def test_date_nanos(self): - x = Series([Timestamp(200)]) - result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "1970-01-01 00:00:00.000000200") - - def test_dates_display(self): - - # 10170 - # make sure that we are consistently display date formatting - x = Series(date_range('20130101 09:00:00', periods=5, freq='D')) - x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-05 09:00:00") - - x = Series(date_range('20130101 09:00:00', periods=5, freq='s')) - x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-01 09:00:04") - - x = Series(date_range('20130101 09:00:00', periods=5, freq='ms')) - x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.004") - - x = Series(date_range('20130101 09:00:00', periods=5, freq='us')) - x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000000") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000004") - - x = Series(date_range('20130101 09:00:00', periods=5, freq='N')) - x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() - self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000000000") - self.assertEqual(result[1].strip(), "NaT") - self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000000004") - - def test_datetime64formatter_yearmonth(self): - x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)]) - - def format_func(x): - return x.strftime('%Y-%m') - - formatter = fmt.Datetime64Formatter(x, formatter=format_func) - result = formatter.get_result() - self.assertEqual(result, ['2016-01', '2016-02']) - - def test_datetime64formatter_hoursecond(self): - - x = Series(pd.to_datetime(['10:10:10.100', '12:12:12.120'], - format='%H:%M:%S.%f')) - - def format_func(x): - return x.strftime('%H:%M') - - formatter = fmt.Datetime64Formatter(x, formatter=format_func) - result = formatter.get_result() - self.assertEqual(result, ['10:10', '12:12']) - - -class TestNaTFormatting(tm.TestCase): - - def test_repr(self): - self.assertEqual(repr(pd.NaT), "NaT") - - def test_str(self): - self.assertEqual(str(pd.NaT), "NaT") - - -class TestDatetimeIndexFormat(tm.TestCase): - - def test_datetime(self): - formatted = pd.to_datetime([datetime(2003, 1, 1, 12), pd.NaT]).format() - self.assertEqual(formatted[0], "2003-01-01 12:00:00") - self.assertEqual(formatted[1], "NaT") - - def test_date(self): - formatted = pd.to_datetime([datetime(2003, 1, 1), pd.NaT]).format() - self.assertEqual(formatted[0], "2003-01-01") - self.assertEqual(formatted[1], "NaT") - - def test_date_tz(self): - formatted = pd.to_datetime([datetime(2013, 1, 1)], utc=True).format() - self.assertEqual(formatted[0], "2013-01-01 00:00:00+00:00") - - formatted = pd.to_datetime( - [datetime(2013, 1, 1), pd.NaT], utc=True).format() - self.assertEqual(formatted[0], "2013-01-01 00:00:00+00:00") - - def test_date_explict_date_format(self): - formatted = pd.to_datetime([datetime(2003, 2, 1), pd.NaT]).format( - date_format="%m-%d-%Y", na_rep="UT") - self.assertEqual(formatted[0], "02-01-2003") - self.assertEqual(formatted[1], "UT") - - -class TestDatetimeIndexUnicode(tm.TestCase): - - def test_dates(self): - text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1) - ])) - self.assertTrue("['2013-01-01'," in text) - self.assertTrue(", '2014-01-01']" in text) - - def test_mixed(self): - text = str(pd.to_datetime([datetime(2013, 1, 1), datetime( - 2014, 1, 1, 12), datetime(2014, 1, 1)])) - self.assertTrue("'2013-01-01 00:00:00'," in text) - self.assertTrue("'2014-01-01 00:00:00']" in text) - - -class TestStringRepTimestamp(tm.TestCase): - - def test_no_tz(self): - dt_date = datetime(2013, 1, 2) - self.assertEqual(str(dt_date), str(Timestamp(dt_date))) - - dt_datetime = datetime(2013, 1, 2, 12, 1, 3) - self.assertEqual(str(dt_datetime), str(Timestamp(dt_datetime))) - - dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45) - self.assertEqual(str(dt_datetime_us), str(Timestamp(dt_datetime_us))) - - ts_nanos_only = Timestamp(200) - self.assertEqual(str(ts_nanos_only), "1970-01-01 00:00:00.000000200") - - ts_nanos_micros = Timestamp(1200) - self.assertEqual(str(ts_nanos_micros), "1970-01-01 00:00:00.000001200") - - def test_tz_pytz(self): - tm._skip_if_no_pytz() - - import pytz - - dt_date = datetime(2013, 1, 2, tzinfo=pytz.utc) - self.assertEqual(str(dt_date), str(Timestamp(dt_date))) - - dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=pytz.utc) - self.assertEqual(str(dt_datetime), str(Timestamp(dt_datetime))) - - dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=pytz.utc) - self.assertEqual(str(dt_datetime_us), str(Timestamp(dt_datetime_us))) - - def test_tz_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - utc = dateutil.tz.tzutc() - - dt_date = datetime(2013, 1, 2, tzinfo=utc) - self.assertEqual(str(dt_date), str(Timestamp(dt_date))) - - dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=utc) - self.assertEqual(str(dt_datetime), str(Timestamp(dt_datetime))) - - dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=utc) - self.assertEqual(str(dt_datetime_us), str(Timestamp(dt_datetime_us))) - - def test_nat_representations(self): - for f in (str, repr, methodcaller('isoformat')): - self.assertEqual(f(pd.NaT), 'NaT') - - -def test_format_percentiles(): - result = fmt.format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) - expected = ['1.999%', '2.001%', '50%', '66.667%', '99.99%'] - tm.assert_equal(result, expected) - - result = fmt.format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) - expected = ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] - tm.assert_equal(result, expected) - - tm.assertRaises(ValueError, fmt.format_percentiles, [0.1, np.nan, 0.5]) - tm.assertRaises(ValueError, fmt.format_percentiles, [-0.001, 0.1, 0.5]) - tm.assertRaises(ValueError, fmt.format_percentiles, [2, 0.1, 0.5]) - tm.assertRaises(ValueError, fmt.format_percentiles, [0.1, 0.5, 'a']) diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py deleted file mode 100644 index 52f3e06c6cbd0..0000000000000 --- a/pandas/tests/formats/test_printing.py +++ /dev/null @@ -1,134 +0,0 @@ -# -*- coding: utf-8 -*- -from pandas import compat -import pandas.formats.printing as printing -import pandas.formats.format as fmt -import pandas.util.testing as tm -import pandas.core.config as cf - - -def test_adjoin(): - data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] - expected = 'a dd ggg\nb ee hhh\nc ff iii' - - adjoined = printing.adjoin(2, *data) - - assert (adjoined == expected) - - -def test_repr_binary_type(): - import string - letters = string.ascii_letters - btype = compat.binary_type - try: - raw = btype(letters, encoding=cf.get_option('display.encoding')) - except TypeError: - raw = btype(letters) - b = compat.text_type(compat.bytes_to_str(raw)) - res = printing.pprint_thing(b, quote_strings=True) - tm.assert_equal(res, repr(b)) - res = printing.pprint_thing(b, quote_strings=False) - tm.assert_equal(res, b) - - -class TestFormattBase(tm.TestCase): - - def test_adjoin(self): - data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] - expected = 'a dd ggg\nb ee hhh\nc ff iii' - - adjoined = printing.adjoin(2, *data) - - self.assertEqual(adjoined, expected) - - def test_adjoin_unicode(self): - data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], ['ggg', 'hhh', u'いいい']] - expected = u'あ dd ggg\nb ええ hhh\nc ff いいい' - adjoined = printing.adjoin(2, *data) - self.assertEqual(adjoined, expected) - - adj = fmt.EastAsianTextAdjustment() - - expected = u"""あ dd ggg -b ええ hhh -c ff いいい""" - - adjoined = adj.adjoin(2, *data) - self.assertEqual(adjoined, expected) - cols = adjoined.split('\n') - self.assertEqual(adj.len(cols[0]), 13) - self.assertEqual(adj.len(cols[1]), 13) - self.assertEqual(adj.len(cols[2]), 16) - - expected = u"""あ dd ggg -b ええ hhh -c ff いいい""" - - adjoined = adj.adjoin(7, *data) - self.assertEqual(adjoined, expected) - cols = adjoined.split('\n') - self.assertEqual(adj.len(cols[0]), 23) - self.assertEqual(adj.len(cols[1]), 23) - self.assertEqual(adj.len(cols[2]), 26) - - def test_justify(self): - adj = fmt.EastAsianTextAdjustment() - - def just(x, *args, **kwargs): - # wrapper to test single str - return adj.justify([x], *args, **kwargs)[0] - - self.assertEqual(just('abc', 5, mode='left'), 'abc ') - self.assertEqual(just('abc', 5, mode='center'), ' abc ') - self.assertEqual(just('abc', 5, mode='right'), ' abc') - self.assertEqual(just(u'abc', 5, mode='left'), 'abc ') - self.assertEqual(just(u'abc', 5, mode='center'), ' abc ') - self.assertEqual(just(u'abc', 5, mode='right'), ' abc') - - self.assertEqual(just(u'パンダ', 5, mode='left'), u'パンダ') - self.assertEqual(just(u'パンダ', 5, mode='center'), u'パンダ') - self.assertEqual(just(u'パンダ', 5, mode='right'), u'パンダ') - - self.assertEqual(just(u'パンダ', 10, mode='left'), u'パンダ ') - self.assertEqual(just(u'パンダ', 10, mode='center'), u' パンダ ') - self.assertEqual(just(u'パンダ', 10, mode='right'), u' パンダ') - - def test_east_asian_len(self): - adj = fmt.EastAsianTextAdjustment() - - self.assertEqual(adj.len('abc'), 3) - self.assertEqual(adj.len(u'abc'), 3) - - self.assertEqual(adj.len(u'パンダ'), 6) - self.assertEqual(adj.len(u'パンダ'), 5) - self.assertEqual(adj.len(u'パンダpanda'), 11) - self.assertEqual(adj.len(u'パンダpanda'), 10) - - def test_ambiguous_width(self): - adj = fmt.EastAsianTextAdjustment() - self.assertEqual(adj.len(u'¡¡ab'), 4) - - with cf.option_context('display.unicode.ambiguous_as_wide', True): - adj = fmt.EastAsianTextAdjustment() - self.assertEqual(adj.len(u'¡¡ab'), 6) - - data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], - ['ggg', u'¡¡ab', u'いいい']] - expected = u'あ dd ggg \nb ええ ¡¡ab\nc ff いいい' - adjoined = adj.adjoin(2, *data) - self.assertEqual(adjoined, expected) - - -# TODO: fix this broken test - -# def test_console_encode(): -# """ -# On Python 2, if sys.stdin.encoding is None (IPython with zmq frontend) -# common.console_encode should encode things as utf-8. -# """ -# if compat.PY3: -# pytest.skip - -# with tm.stdin_encoding(encoding=None): -# result = printing.console_encode(u"\u05d0") -# expected = u"\u05d0".encode('utf-8') -# assert (result == expected) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index b9cd764c8704c..c85fea3c3d71b 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,7 +1,7 @@ import numpy as np from pandas import compat -from pandas.util.decorators import cache_readonly +from pandas.util._decorators import cache_readonly import pandas.util.testing as tm import pandas as pd @@ -10,8 +10,8 @@ _frame = pd.DataFrame(_seriesd) _frame2 = pd.DataFrame(_seriesd, columns=['D', 'C', 'B', 'A']) -_intframe = pd.DataFrame(dict((k, v.astype(int)) - for k, v in compat.iteritems(_seriesd))) +_intframe = pd.DataFrame({k: v.astype(int) + for k, v in compat.iteritems(_seriesd)}) _tsframe = pd.DataFrame(_tsd) @@ -32,8 +32,7 @@ def frame2(self): @cache_readonly def intframe(self): # force these all to int64 to avoid platform testing issues - return pd.DataFrame(dict([(c, s) for c, s in - compat.iteritems(_intframe)]), + return pd.DataFrame({c: s for c, s in compat.iteritems(_intframe)}, dtype=np.int64) @cache_readonly @@ -112,7 +111,7 @@ def _check_mixed_float(df, dtype=None): # float16 are most likely to be upcasted to float32 dtypes = dict(A='float32', B='float32', C='float16', D='float64') if isinstance(dtype, compat.string_types): - dtypes = dict([(k, dtype) for k, v in dtypes.items()]) + dtypes = {k: dtype for k, v in dtypes.items()} elif isinstance(dtype, dict): dtypes.update(dtype) if dtypes.get('A'): @@ -128,7 +127,7 @@ def _check_mixed_float(df, dtype=None): def _check_mixed_int(df, dtype=None): dtypes = dict(A='int32', B='uint64', C='uint8', D='int64') if isinstance(dtype, compat.string_types): - dtypes = dict([(k, dtype) for k, v in dtypes.items()]) + dtypes = {k: dtype for k, v in dtypes.items()} elif isinstance(dtype, dict): dtypes.update(dtype) if dtypes.get('A'): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e84bb6407fafc..3e0ba26c20eb0 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -2,25 +2,31 @@ from __future__ import print_function +import inspect +import pytest + from datetime import datetime, timedelta import numpy as np -from pandas.compat import lrange +from pandas.compat import lrange, PY2 from pandas import (DataFrame, Series, Index, MultiIndex, - RangeIndex, date_range) + RangeIndex, date_range, IntervalIndex, + to_datetime) +from pandas.core.dtypes.common import ( + is_object_dtype, + is_categorical_dtype, + is_interval_dtype) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from pandas.tests.frame.common import TestData -class TestDataFrameAlterAxes(tm.TestCase, TestData): +class TestDataFrameAlterAxes(TestData): def test_set_index(self): idx = Index(np.arange(len(self.mixed_frame))) @@ -28,8 +34,8 @@ def test_set_index(self): # cache it _ = self.mixed_frame['foo'] # noqa self.mixed_frame.index = idx - self.assertIs(self.mixed_frame['foo'].index, idx) - with assertRaisesRegexp(ValueError, 'Length mismatch'): + assert self.mixed_frame['foo'].index is idx + with tm.assert_raises_regex(ValueError, 'Length mismatch'): self.mixed_frame.index = idx[::2] def test_set_index_cast(self): @@ -64,7 +70,7 @@ def test_set_index2(self): assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) - self.assertEqual(result.index.name, index.name) + assert result.index.name == index.name # inplace, single df2 = df.copy() @@ -92,7 +98,7 @@ def test_set_index2(self): assert_frame_equal(result, expected) assert_frame_equal(result_nodrop, expected_nodrop) - self.assertEqual(result.index.names, index.names) + assert result.index.names == index.names # inplace df2 = df.copy() @@ -104,7 +110,8 @@ def test_set_index2(self): assert_frame_equal(df3, expected_nodrop) # corner case - with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): + with tm.assert_raises_regex(ValueError, + 'Index has duplicate keys'): df.set_index('A', verify_integrity=True) # append @@ -121,7 +128,21 @@ def test_set_index2(self): # Series result = df.set_index(df.C) - self.assertEqual(result.index.name, 'C') + assert result.index.name == 'C' + + @pytest.mark.parametrize('level', ['a', pd.Series(range(3), name='a')]) + def test_set_index_duplicate_names(self, level): + # GH18872 + df = pd.DataFrame(np.arange(8).reshape(4, 2), columns=['a', 'b']) + + # Pass an existing level name: + df.index.name = 'a' + pytest.raises(ValueError, df.set_index, level, append=True) + pytest.raises(ValueError, df.set_index, [level], append=True) + + # Pass twice the same level name: + df.index.name = 'c' + pytest.raises(ValueError, df.set_index, [level, level]) def test_set_index_nonuniq(self): df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], @@ -129,17 +150,19 @@ def test_set_index_nonuniq(self): 'C': ['a', 'b', 'c', 'd', 'e'], 'D': np.random.randn(5), 'E': np.random.randn(5)}) - with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): + with tm.assert_raises_regex(ValueError, + 'Index has duplicate keys'): df.set_index('A', verify_integrity=True, inplace=True) - self.assertIn('A', df) + assert 'A' in df def test_set_index_bug(self): # GH1590 df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']}) - df2 = df.select(lambda indx: indx >= 1) - rs = df2.set_index('key') xp = DataFrame({'val': [1, 2]}, Index(['b', 'c'], name='key')) + + df2 = df.loc[df.index.map(lambda indx: indx >= 1)] + rs = df2.set_index('key') assert_frame_equal(rs, xp) def test_set_index_pass_arrays(self): @@ -167,7 +190,7 @@ def test_construction_with_categorical_index(self): idf = df.set_index('B') str(idf) tm.assert_index_equal(idf.index, ci, check_names=False) - self.assertEqual(idf.index.name, 'B') + assert idf.index.name == 'B' # from a CategoricalIndex df = DataFrame({'A': np.random.randn(10), @@ -175,17 +198,17 @@ def test_construction_with_categorical_index(self): idf = df.set_index('B') str(idf) tm.assert_index_equal(idf.index, ci, check_names=False) - self.assertEqual(idf.index.name, 'B') + assert idf.index.name == 'B' idf = df.set_index('B').reset_index().set_index('B') str(idf) tm.assert_index_equal(idf.index, ci, check_names=False) - self.assertEqual(idf.index.name, 'B') + assert idf.index.name == 'B' new_df = idf.reset_index() new_df.index = df.B tm.assert_index_equal(new_df.index, ci, check_names=False) - self.assertEqual(idf.index.name, 'B') + assert idf.index.name == 'B' def test_set_index_cast_datetimeindex(self): df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i) @@ -193,13 +216,13 @@ def test_set_index_cast_datetimeindex(self): 'B': np.random.randn(1000)}) idf = df.set_index('A') - tm.assertIsInstance(idf.index, pd.DatetimeIndex) + assert isinstance(idf.index, pd.DatetimeIndex) # don't cast a DatetimeIndex WITH a tz, leave as object # GH 6032 i = (pd.DatetimeIndex( - pd.tseries.tools.to_datetime(['2013-1-1 13:00', - '2013-1-2 14:00'], errors="raise")) + to_datetime(['2013-1-1 13:00', + '2013-1-2 14:00'], errors="raise")) .tz_localize('US/Pacific')) df = DataFrame(np.random.randn(2, 1), columns=['A']) @@ -217,7 +240,7 @@ def test_set_index_cast_datetimeindex(self): df['B'] = i result = df['B'] assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'B') + assert result.name == 'B' # keep the timezone result = i.to_series(keep_tz=True) @@ -228,13 +251,13 @@ def test_set_index_cast_datetimeindex(self): result = df['C'] comp = pd.DatetimeIndex(expected.values).copy() comp.tz = None - self.assert_numpy_array_equal(result.values, comp.values) + tm.assert_numpy_array_equal(result.values, comp.values) # list of datetimes with a tz df['D'] = i.to_pydatetime() result = df['D'] assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'D') + assert result.name == 'D' # GH 6785 # set the index manually @@ -272,13 +295,13 @@ def test_set_index_timezone(self): i = pd.to_datetime(["2014-01-01 10:10:10"], utc=True).tz_convert('Europe/Rome') df = DataFrame({'i': i}) - self.assertEqual(df.set_index(i).index[0].hour, 11) - self.assertEqual(pd.DatetimeIndex(pd.Series(df.i))[0].hour, 11) - self.assertEqual(df.set_index(df.i).index[0].hour, 11) + assert df.set_index(i).index[0].hour == 11 + assert pd.DatetimeIndex(pd.Series(df.i))[0].hour == 11 + assert df.set_index(df.i).index[0].hour == 11 def test_set_index_dst(self): di = pd.date_range('2006-10-29 00:00:00', periods=3, - req='H', tz='US/Pacific') + freq='H', tz='US/Pacific') df = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, index=di).reset_index() @@ -295,6 +318,17 @@ def test_set_index_dst(self): exp = pd.DataFrame({'b': [3, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp) + def test_reset_index_with_intervals(self): + idx = pd.IntervalIndex.from_breaks(np.arange(11), name='x') + original = pd.DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']] + + result = original.set_index('x') + expected = pd.DataFrame({'y': np.arange(10)}, index=idx) + assert_frame_equal(result, expected) + + result2 = result.reset_index() + assert_frame_equal(result2, original) + def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)]) df = DataFrame(np.random.randn(3, 3), columns=columns) @@ -320,7 +354,7 @@ def test_set_index_empty_column(self): def test_set_columns(self): cols = Index(np.arange(len(self.mixed_frame.columns))) self.mixed_frame.columns = cols - with assertRaisesRegexp(ValueError, 'Length mismatch'): + with tm.assert_raises_regex(ValueError, 'Length mismatch'): self.mixed_frame.columns = cols[::2] def test_dti_set_index_reindex(self): @@ -347,7 +381,7 @@ def test_dti_set_index_reindex(self): # TODO: unused? result = df.set_index(new_index) # noqa - self.assertEqual(new_index.freq, index.freq) + assert new_index.freq == index.freq # Renaming @@ -380,7 +414,7 @@ def test_rename(self): tm.assert_index_equal(renamed.index, pd.Index(['BAR', 'FOO'])) # have to pass something - self.assertRaises(TypeError, self.frame.rename) + pytest.raises(TypeError, self.frame.rename) # partial columns renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'}) @@ -398,45 +432,136 @@ def test_rename(self): renamed = renamer.rename(index={'foo': 'bar', 'bar': 'foo'}) tm.assert_index_equal(renamed.index, pd.Index(['bar', 'foo'], name='name')) - self.assertEqual(renamed.index.name, renamer.index.name) + assert renamed.index.name == renamer.index.name + + def test_rename_axis_inplace(self): + # GH 15704 + frame = self.frame.copy() + expected = frame.rename_axis('foo') + result = frame.copy() + no_return = result.rename_axis('foo', inplace=True) + + assert no_return is None + assert_frame_equal(result, expected) + + expected = frame.rename_axis('bar', axis=1) + result = frame.copy() + no_return = result.rename_axis('bar', axis=1, inplace=True) + + assert no_return is None + assert_frame_equal(result, expected) + + def test_rename_axis_warns(self): + # https://github.com/pandas-dev/pandas/issues/17833 + df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}) + with tm.assert_produces_warning(FutureWarning) as w: + df.rename_axis(id, axis=0) + assert 'rename' in str(w[0].message) + + with tm.assert_produces_warning(FutureWarning) as w: + df.rename_axis({0: 10, 1: 20}, axis=0) + assert 'rename' in str(w[0].message) + + with tm.assert_produces_warning(FutureWarning) as w: + df.rename_axis(id, axis=1) + assert 'rename' in str(w[0].message) + + with tm.assert_produces_warning(FutureWarning) as w: + df['A'].rename_axis(id) + assert 'rename' in str(w[0].message) + + def test_rename_multiindex(self): - # MultiIndex tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')] tuples_columns = [('fizz1', 'buzz1'), ('fizz2', 'buzz2')] index = MultiIndex.from_tuples(tuples_index, names=['foo', 'bar']) columns = MultiIndex.from_tuples( tuples_columns, names=['fizz', 'buzz']) - renamer = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) - renamed = renamer.rename(index={'foo1': 'foo3', 'bar2': 'bar3'}, - columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}) + df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) + + # + # without specifying level -> across all levels + + renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'}, + columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}) new_index = MultiIndex.from_tuples([('foo3', 'bar1'), ('foo2', 'bar3')], names=['foo', 'bar']) new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'), ('fizz2', 'buzz3')], names=['fizz', 'buzz']) - self.assert_index_equal(renamed.index, new_index) - self.assert_index_equal(renamed.columns, new_columns) - self.assertEqual(renamed.index.names, renamer.index.names) - self.assertEqual(renamed.columns.names, renamer.columns.names) + tm.assert_index_equal(renamed.index, new_index) + tm.assert_index_equal(renamed.columns, new_columns) + assert renamed.index.names == df.index.names + assert renamed.columns.names == df.columns.names + + # + # with specifying a level (GH13766) + + # dict + new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'), + ('fizz2', 'buzz2')], + names=['fizz', 'buzz']) + renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, + level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, + level='fizz') + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples([('fizz1', 'buzz1'), + ('fizz2', 'buzz3')], + names=['fizz', 'buzz']) + renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, + level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'}, + level='buzz') + tm.assert_index_equal(renamed.columns, new_columns) + + # function + func = str.upper + new_columns = MultiIndex.from_tuples([('FIZZ1', 'buzz1'), + ('FIZZ2', 'buzz2')], + names=['fizz', 'buzz']) + renamed = df.rename(columns=func, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level='fizz') + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples([('fizz1', 'BUZZ1'), + ('fizz2', 'BUZZ2')], + names=['fizz', 'buzz']) + renamed = df.rename(columns=func, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level='buzz') + tm.assert_index_equal(renamed.columns, new_columns) + + # index + new_index = MultiIndex.from_tuples([('foo3', 'bar1'), + ('foo2', 'bar2')], + names=['foo', 'bar']) + renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'}, + level=0) + tm.assert_index_equal(renamed.index, new_index) def test_rename_nocopy(self): renamed = self.frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1. - self.assertTrue((self.frame['C'] == 1.).all()) + assert (self.frame['C'] == 1.).all() def test_rename_inplace(self): self.frame.rename(columns={'C': 'foo'}) - self.assertIn('C', self.frame) - self.assertNotIn('foo', self.frame) + assert 'C' in self.frame + assert 'foo' not in self.frame c_id = id(self.frame['C']) frame = self.frame.copy() frame.rename(columns={'C': 'foo'}, inplace=True) - self.assertNotIn('C', frame) - self.assertIn('foo', frame) - self.assertNotEqual(id(frame['foo']), c_id) + assert 'C' not in frame + assert 'foo' in frame + assert id(frame['foo']) != c_id def test_rename_bug(self): # GH 5344 @@ -480,19 +605,6 @@ def test_reorder_levels(self): index=e_idx) assert_frame_equal(result, expected) - result = df.reorder_levels([0, 0, 0]) - e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']], - labels=[[0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0]], - names=['L0', 'L0', 'L0']) - expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, - index=e_idx) - assert_frame_equal(result, expected) - - result = df.reorder_levels(['L0', 'L0', 'L0']) - assert_frame_equal(result, expected) - def test_reset_index(self): stacked = self.frame.stack()[::2] stacked = DataFrame({'foo': stacked, 'bar': stacked}) @@ -516,27 +628,27 @@ def test_reset_index(self): # default name assigned rdf = self.frame.reset_index() exp = pd.Series(self.frame.index.values, name='index') - self.assert_series_equal(rdf['index'], exp) + tm.assert_series_equal(rdf['index'], exp) # default name assigned, corner case df = self.frame.copy() df['index'] = 'foo' rdf = df.reset_index() exp = pd.Series(self.frame.index.values, name='level_0') - self.assert_series_equal(rdf['level_0'], exp) + tm.assert_series_equal(rdf['level_0'], exp) # but this is ok self.frame.index.name = 'index' deleveled = self.frame.reset_index() - self.assert_series_equal(deleveled['index'], - pd.Series(self.frame.index)) - self.assert_index_equal(deleveled.index, - pd.Index(np.arange(len(deleveled)))) + tm.assert_series_equal(deleveled['index'], + pd.Series(self.frame.index)) + tm.assert_index_equal(deleveled.index, + pd.Index(np.arange(len(deleveled)))) # preserve column names self.frame.columns.name = 'columns' resetted = self.frame.reset_index() - self.assertEqual(resetted.columns.name, 'columns') + assert resetted.columns.name == 'columns' # only remove certain columns frame = self.frame.reset_index().set_index(['index', 'A', 'B']) @@ -568,6 +680,43 @@ def test_reset_index(self): xp = xp.set_index(['B'], append=True) assert_frame_equal(rs, xp, check_names=False) + def test_reset_index_level(self): + df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'C', 'D']) + + for levels in ['A', 'B'], [0, 1]: + # With MultiIndex + result = df.set_index(['A', 'B']).reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = df.set_index(['A', 'B']).reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = df.set_index(['A', 'B']).reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A', 'B']).reset_index(level=levels, + drop=True) + tm.assert_frame_equal(result, df[['C', 'D']]) + + # With single-level Index (GH 16263) + result = df.set_index('A').reset_index(level=levels[0]) + tm.assert_frame_equal(result, df) + + result = df.set_index('A').reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A']).reset_index(level=levels[0], + drop=True) + tm.assert_frame_equal(result, df[['B', 'C', 'D']]) + + # Missing levels - for both MultiIndex and single-level Index: + for idx_lev in ['A', 'B'], ['A']: + with tm.assert_raises_regex(KeyError, 'Level E '): + df.set_index(idx_lev).reset_index(level=['A', 'E']) + with tm.assert_raises_regex(IndexError, 'Too many levels'): + df.set_index(idx_lev).reset_index(level=[0, 1, 2]) + def test_reset_index_right_dtype(self): time = np.arange(0.0, 10, np.sqrt(2) / 2) s1 = Series((9.81 * time ** 2) / 2, @@ -576,10 +725,10 @@ def test_reset_index_right_dtype(self): df = DataFrame(s1) resetted = s1.reset_index() - self.assertEqual(resetted['time'].dtype, np.float64) + assert resetted['time'].dtype == np.float64 resetted = df.reset_index() - self.assertEqual(resetted['time'].dtype, np.float64) + assert resetted['time'].dtype == np.float64 def test_reset_index_multiindex_col(self): vals = np.random.randn(3, 3).astype(object) @@ -624,6 +773,33 @@ def test_reset_index_multiindex_col(self): ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) + def test_reset_index_multiindex_nan(self): + # GH6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, np.nan], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': [np.nan, 'b', 'c'], + 'B': [0, 1, 2], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, 2], + 'C': [np.nan, 1.1, 2.2]}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [np.nan, np.nan, np.nan], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + def test_reset_index_with_datetimeindex_cols(self): # GH5818 # @@ -642,7 +818,7 @@ def test_reset_index_range(self): df = pd.DataFrame([[0, 0], [1, 1]], columns=['A', 'B'], index=RangeIndex(stop=2)) result = df.reset_index() - tm.assertIsInstance(result.index, RangeIndex) + assert isinstance(result.index, RangeIndex) expected = pd.DataFrame([[0, 0, 0], [1, 1, 1]], columns=['index', 'A', 'B'], index=RangeIndex(stop=2)) @@ -652,34 +828,154 @@ def test_set_index_names(self): df = pd.util.testing.makeDataFrame() df.index.name = 'name' - self.assertEqual(df.set_index(df.index).index.names, ['name']) + assert df.set_index(df.index).index.names == ['name'] mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B']) mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values, - names=['A', 'B', 'A', 'B']) + names=['A', 'B', 'C', 'D']) df = df.set_index(['A', 'B']) - self.assertEqual(df.set_index(df.index).index.names, ['A', 'B']) + assert df.set_index(df.index).index.names == ['A', 'B'] # Check that set_index isn't converting a MultiIndex into an Index - self.assertTrue(isinstance(df.set_index(df.index).index, MultiIndex)) + assert isinstance(df.set_index(df.index).index, MultiIndex) # Check actual equality tm.assert_index_equal(df.set_index(df.index).index, mi) + idx2 = df.index.rename(['C', 'D']) + # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather # than a pair of tuples - self.assertTrue(isinstance(df.set_index( - [df.index, df.index]).index, MultiIndex)) + assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex) # Check equality - tm.assert_index_equal(df.set_index([df.index, df.index]).index, mi2) + tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) def test_rename_objects(self): renamed = self.mixed_frame.rename(columns=str.upper) - self.assertIn('FOO', renamed) - self.assertNotIn('foo', renamed) + + assert 'FOO' in renamed + assert 'foo' not in renamed + + def test_rename_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y']) + expected = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) + + result = df.rename(str.lower, axis=1) + assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis='columns') + assert_frame_equal(result, expected) + + result = df.rename({"A": 'a', 'B': 'b'}, axis=1) + assert_frame_equal(result, expected) + + result = df.rename({"A": 'a', 'B': 'b'}, axis='columns') + assert_frame_equal(result, expected) + + # Index + expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) + result = df.rename(str.lower, axis=0) + assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis='index') + assert_frame_equal(result, expected) + + result = df.rename({'X': 'x', 'Y': 'y'}, axis=0) + assert_frame_equal(result, expected) + + result = df.rename({'X': 'x', 'Y': 'y'}, axis='index') + assert_frame_equal(result, expected) + + result = df.rename(mapper=str.lower, axis='index') + assert_frame_equal(result, expected) + + def test_rename_mapper_multi(self): + df = pd.DataFrame({"A": ['a', 'b'], "B": ['c', 'd'], + 'C': [1, 2]}).set_index(["A", "B"]) + result = df.rename(str.upper) + expected = df.rename(index=str.upper) + assert_frame_equal(result, expected) + + def test_rename_positional_named(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) + result = df.rename(str.lower, columns=str.upper) + expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) + assert_frame_equal(result, expected) + + def test_rename_axis_style_raises(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1']) + + # Named target and axis + with tm.assert_raises_regex(TypeError, None): + df.rename(index=str.lower, axis=1) + + with tm.assert_raises_regex(TypeError, None): + df.rename(index=str.lower, axis='columns') + + with tm.assert_raises_regex(TypeError, None): + df.rename(index=str.lower, axis='columns') + + with tm.assert_raises_regex(TypeError, None): + df.rename(columns=str.lower, axis='columns') + + with tm.assert_raises_regex(TypeError, None): + df.rename(index=str.lower, axis=0) + + # Multiple targets and axis + with tm.assert_raises_regex(TypeError, None): + df.rename(str.lower, str.lower, axis='columns') + + # Too many targets + with tm.assert_raises_regex(TypeError, None): + df.rename(str.lower, str.lower, str.lower) + + # Duplicates + with tm.assert_raises_regex(TypeError, "multiple values"): + df.rename(id, mapper=id) + + def test_reindex_api_equivalence(self): + # equivalence of the labels/axis and index/columns API's + df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=['a', 'b', 'c'], + columns=['d', 'e', 'f']) + + res1 = df.reindex(['b', 'a']) + res2 = df.reindex(index=['b', 'a']) + res3 = df.reindex(labels=['b', 'a']) + res4 = df.reindex(labels=['b', 'a'], axis=0) + res5 = df.reindex(['b', 'a'], axis=0) + for res in [res2, res3, res4, res5]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(columns=['e', 'd']) + res2 = df.reindex(['e', 'd'], axis=1) + res3 = df.reindex(labels=['e', 'd'], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(index=['b', 'a'], columns=['e', 'd']) + res2 = df.reindex(columns=['e', 'd'], index=['b', 'a']) + res3 = df.reindex(labels=['b', 'a'], axis=0).reindex(labels=['e', 'd'], + axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + def test_rename_positional(self): + df = pd.DataFrame(columns=['A', 'B']) + with tm.assert_produces_warning(FutureWarning) as rec: + result = df.rename(None, str.lower) + expected = pd.DataFrame(columns=['a', 'b']) + assert_frame_equal(result, expected) + assert len(rec) == 1 + message = str(rec[0].message) + assert 'rename' in message + assert 'Use named arguments' in message def test_assign_columns(self): self.frame['hi'] = 'there' @@ -703,3 +999,135 @@ def test_set_index_preserve_categorical_dtype(self): result = df.set_index(cols).reset_index() result = result.reindex(columns=df.columns) tm.assert_frame_equal(result, df) + + def test_ambiguous_warns(self): + df = pd.DataFrame({"A": [1, 2]}) + with tm.assert_produces_warning(FutureWarning): + df.rename(id, id) + + with tm.assert_produces_warning(FutureWarning): + df.rename({0: 10}, {"A": "B"}) + + @pytest.mark.skipif(PY2, reason="inspect.signature") + def test_rename_signature(self): + sig = inspect.signature(pd.DataFrame.rename) + parameters = set(sig.parameters) + assert parameters == {"self", "mapper", "index", "columns", "axis", + "inplace", "copy", "level"} + + @pytest.mark.skipif(PY2, reason="inspect.signature") + def test_reindex_signature(self): + sig = inspect.signature(pd.DataFrame.reindex) + parameters = set(sig.parameters) + assert parameters == {"self", "labels", "index", "columns", "axis", + "limit", "copy", "level", "method", + "fill_value", "tolerance"} + + +class TestIntervalIndex(object): + + def test_setitem(self): + + df = DataFrame({'A': range(10)}) + s = pd.cut(df.A, 5) + assert isinstance(s.cat.categories, IntervalIndex) + + # B & D end up as Categoricals + # the remainer are converted to in-line objects + # contining an IntervalIndex.values + df['B'] = s + df['C'] = np.array(s) + df['D'] = s.values + df['E'] = np.array(s.values) + + assert is_categorical_dtype(df['B']) + assert is_interval_dtype(df['B'].cat.categories) + assert is_categorical_dtype(df['D']) + assert is_interval_dtype(df['D'].cat.categories) + + assert is_object_dtype(df['C']) + assert is_object_dtype(df['E']) + + # they compare equal as Index + # when converted to numpy objects + c = lambda x: Index(np.array(x)) + tm.assert_index_equal(c(df.B), c(df.B), check_names=False) + tm.assert_index_equal(c(df.B), c(df.C), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + + # B & D are the same Series + tm.assert_series_equal(df['B'], df['B'], check_names=False) + tm.assert_series_equal(df['B'], df['D'], check_names=False) + + # C & E are the same Series + tm.assert_series_equal(df['C'], df['C'], check_names=False) + tm.assert_series_equal(df['C'], df['E'], check_names=False) + + def test_set_reset_index(self): + + df = DataFrame({'A': range(10)}) + s = pd.cut(df.A, 5) + df['B'] = s + df = df.set_index('B') + + df = df.reset_index() + + def test_set_axis_inplace(self): + # GH14636 + df = DataFrame({'A': [1.1, 2.2, 3.3], + 'B': [5.0, 6.1, 7.2], + 'C': [4.4, 5.5, 6.6]}, + index=[2010, 2011, 2012]) + + expected = {0: df.copy(), + 1: df.copy()} + expected[0].index = list('abc') + expected[1].columns = list('abc') + expected['index'] = expected[0] + expected['columns'] = expected[1] + + for axis in expected: + # inplace=True + # The FutureWarning comes from the fact that we would like to have + # inplace default to False some day + for inplace, warn in (None, FutureWarning), (True, None): + kwargs = {'inplace': inplace} + + result = df.copy() + with tm.assert_produces_warning(warn): + result.set_axis(list('abc'), axis=axis, **kwargs) + tm.assert_frame_equal(result, expected[axis]) + + # inplace=False + result = df.set_axis(list('abc'), axis=axis, inplace=False) + tm.assert_frame_equal(expected[axis], result) + + # omitting the "axis" parameter + with tm.assert_produces_warning(None): + result = df.set_axis(list('abc'), inplace=False) + tm.assert_frame_equal(result, expected[0]) + + # wrong values for the "axis" parameter + for axis in 3, 'foo': + with tm.assert_raises_regex(ValueError, 'No axis named'): + df.set_axis(list('abc'), axis=axis, inplace=False) + + def test_set_axis_prior_to_deprecation_signature(self): + df = DataFrame({'A': [1.1, 2.2, 3.3], + 'B': [5.0, 6.1, 7.2], + 'C': [4.4, 5.5, 6.6]}, + index=[2010, 2011, 2012]) + + expected = {0: df.copy(), + 1: df.copy()} + expected[0].index = list('abc') + expected[1].columns = list('abc') + expected['index'] = expected[0] + expected['columns'] = expected[1] + + # old signature + for axis in expected: + with tm.assert_produces_warning(FutureWarning): + result = df.set_axis(axis, list('abc'), inplace=False) + tm.assert_frame_equal(result, expected[axis]) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 1f0d16e959cd7..59a30fc69905f 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2,48 +2,52 @@ from __future__ import print_function -from datetime import timedelta, datetime +import warnings +from datetime import timedelta from distutils.version import LooseVersion import sys import pytest +from string import ascii_lowercase from numpy import nan from numpy.random import randn import numpy as np -from pandas.compat import lrange -from pandas import (compat, isnull, notnull, DataFrame, Series, - MultiIndex, date_range, Timestamp) +from pandas.compat import lrange, product +from pandas import (compat, isna, notna, DataFrame, Series, + MultiIndex, date_range, Timestamp, Categorical, + _np_version_under1p15) import pandas as pd import pandas.core.nanops as nanops import pandas.core.algorithms as algorithms -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing import pandas.util.testing as tm +import pandas.util._test_decorators as td from pandas.tests.frame.common import TestData -class TestDataFrameAnalytics(tm.TestCase, TestData): +class TestDataFrameAnalytics(TestData): # ---------------------------------------------------------------------= # Correlation and covariance + @td.skip_if_no_scipy def test_corr_pearson(self): - tm._skip_if_no_scipy() self.frame['A'][:5] = nan self.frame['B'][5:10] = nan self._check_method('pearson') + @td.skip_if_no_scipy def test_corr_kendall(self): - tm._skip_if_no_scipy() self.frame['A'][:5] = nan self.frame['B'][5:10] = nan self._check_method('kendall') + @td.skip_if_no_scipy def test_corr_spearman(self): - tm._skip_if_no_scipy() self.frame['A'][:5] = nan self.frame['B'][5:10] = nan @@ -60,8 +64,8 @@ def _check_method(self, method='pearson', check_minp=False): expected.loc['A', 'B'] = expected.loc['B', 'A'] = nan tm.assert_frame_equal(result, expected) + @td.skip_if_no_scipy def test_corr_non_numeric(self): - tm._skip_if_no_scipy() self.frame['A'][:5] = nan self.frame['B'][5:10] = nan @@ -70,9 +74,8 @@ def test_corr_non_numeric(self): expected = self.mixed_frame.loc[:, ['A', 'B', 'C', 'D']].corr() tm.assert_frame_equal(result, expected) + @td.skip_if_no_scipy def test_corr_nooverlap(self): - tm._skip_if_no_scipy() - # nothing in common for meth in ['pearson', 'kendall', 'spearman']: df = DataFrame({'A': [1, 1.5, 1, np.nan, np.nan, np.nan], @@ -80,34 +83,31 @@ def test_corr_nooverlap(self): 'C': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]}) rs = df.corr(meth) - self.assertTrue(isnull(rs.loc['A', 'B'])) - self.assertTrue(isnull(rs.loc['B', 'A'])) - self.assertEqual(rs.loc['A', 'A'], 1) - self.assertEqual(rs.loc['B', 'B'], 1) - self.assertTrue(isnull(rs.loc['C', 'C'])) + assert isna(rs.loc['A', 'B']) + assert isna(rs.loc['B', 'A']) + assert rs.loc['A', 'A'] == 1 + assert rs.loc['B', 'B'] == 1 + assert isna(rs.loc['C', 'C']) + @td.skip_if_no_scipy def test_corr_constant(self): - tm._skip_if_no_scipy() - # constant --> all NA for meth in ['pearson', 'spearman']: df = DataFrame({'A': [1, 1, 1, np.nan, np.nan, np.nan], 'B': [np.nan, np.nan, np.nan, 1, 1, 1]}) rs = df.corr(meth) - self.assertTrue(isnull(rs.values).all()) + assert isna(rs.values).all() def test_corr_int(self): # dtypes other than float64 #1761 df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) - # it works! df3.cov() df3.corr() + @td.skip_if_no_scipy def test_corr_int_and_boolean(self): - tm._skip_if_no_scipy() - # when dtypes of pandas series are different # then ndarray will have dtype=object, # so it need to be properly handled @@ -116,7 +116,20 @@ def test_corr_int_and_boolean(self): expected = DataFrame(np.ones((2, 2)), index=[ 'a', 'b'], columns=['a', 'b']) for meth in ['pearson', 'kendall', 'spearman']: - tm.assert_frame_equal(df.corr(meth), expected) + + # RuntimeWarning + with warnings.catch_warnings(record=True): + result = df.corr(meth) + tm.assert_frame_equal(result, expected) + + def test_corr_cov_independent_index_column(self): + # GH 14617 + df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), + columns=list("abcd")) + for method in ['cov', 'corr']: + result = getattr(df, method)() + assert result.index is not result.columns + assert result.index.equals(result.columns) def test_cov(self): # min_periods no NAs (corner case) @@ -126,7 +139,7 @@ def test_cov(self): tm.assert_frame_equal(expected, result) result = self.frame.cov(min_periods=len(self.frame) + 1) - self.assertTrue(isnull(result.values).all()) + assert isna(result.values).all() # with NAs frame = self.frame.copy() @@ -180,10 +193,10 @@ def test_corrwith(self): dropped = a.corrwith(b, axis=0, drop=True) tm.assert_almost_equal(dropped['A'], a['A'].corr(b['A'])) - self.assertNotIn('B', dropped) + assert 'B' not in dropped dropped = a.corrwith(b, axis=1, drop=True) - self.assertNotIn(a.index[-1], dropped.index) + assert a.index[-1] not in dropped.index # non time-series data index = ['a', 'b', 'c', 'd', 'e'] @@ -224,7 +237,17 @@ def test_corrwith_matches_corrcoef(self): c2 = np.corrcoef(df1['a'], df2['a'])[0][1] tm.assert_almost_equal(c1, c2) - self.assertTrue(c1 < 1) + assert c1 < 1 + + def test_corrwith_mixed_dtypes(self): + # GH 18570 + df = pd.DataFrame({'a': [1, 4, 3, 2], 'b': [4, 6, 7, 3], + 'c': ['a', 'b', 'c', 'd']}) + s = pd.Series([0, 6, 7, 3]) + result = df.corrwith(s) + corrs = [df['a'].corr(s), df['b'].corr(s)] + expected = pd.Series(data=corrs, index=['a', 'b']) + tm.assert_series_equal(result, expected) def test_bool_describe_in_mixed_frame(self): df = DataFrame({ @@ -282,6 +305,36 @@ def test_describe_bool_frame(self): index=['count', 'unique', 'top', 'freq']) tm.assert_frame_equal(result, expected) + def test_describe_categorical(self): + df = DataFrame({'value': np.random.randint(0, 10000, 100)}) + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=['value'], ascending=True) + df['value_group'] = pd.cut(df.value, range(0, 10500, 500), + right=False, labels=cat_labels) + cat = df + + # Categoricals should not show up together with numerical columns + result = cat.describe() + assert len(result.columns) == 1 + + # In a frame, describe() for the cat should be the same as for string + # arrays (count, unique, top, freq) + + cat = Categorical(["a", "b", "b", "b"], categories=['a', 'b', 'c'], + ordered=True) + s = Series(cat) + result = s.describe() + expected = Series([4, 2, "b", 3], + index=['count', 'unique', 'top', 'freq']) + tm.assert_series_equal(result, expected) + + cat = Series(Categorical(["a", "b", "c", "c"])) + df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) + res = df3.describe() + tm.assert_numpy_array_equal(res["cat"].values, res["s"].values) + def test_describe_categorical_columns(self): # GH 11558 columns = pd.CategoricalIndex(['int1', 'int2', 'obj'], @@ -325,8 +378,8 @@ def test_describe_datetime_columns(self): '50%', '75%', 'max']) expected.columns = exp_columns tm.assert_frame_equal(result, expected) - self.assertEqual(result.columns.freq, 'MS') - self.assertEqual(result.columns.tz, expected.columns.tz) + assert result.columns.freq == 'MS' + assert result.columns.tz == expected.columns.tz def test_describe_timedelta_values(self): # GH 6145 @@ -363,7 +416,7 @@ def test_describe_timedelta_values(self): "50% 3 days 00:00:00 0 days 03:00:00\n" "75% 4 days 00:00:00 0 days 04:00:00\n" "max 5 days 00:00:00 0 days 05:00:00") - self.assertEqual(repr(res), exp_repr) + assert repr(res) == exp_repr def test_reduce_mixed_frame(self): # GH 6806 @@ -379,7 +432,7 @@ def test_reduce_mixed_frame(self): tm.assert_series_equal(test, df.T.sum(axis=1)) def test_count(self): - f = lambda s: notnull(s).sum() + f = lambda s: notna(s).sum() self._check_stat_op('count', f, has_skipna=False, has_numeric_only=True, @@ -389,10 +442,10 @@ def test_count(self): # corner case frame = DataFrame() ct1 = frame.count(1) - tm.assertIsInstance(ct1, Series) + assert isinstance(ct1, Series) ct2 = frame.count(0) - tm.assertIsInstance(ct2, Series) + assert isinstance(ct2, Series) # GH #423 df = DataFrame(index=lrange(10)) @@ -426,7 +479,8 @@ def test_nunique(self): Series({0: 1, 1: 3, 2: 2})) def test_sum(self): - self._check_stat_op('sum', np.sum, has_numeric_only=True) + self._check_stat_op('sum', np.sum, has_numeric_only=True, + skipna_alternative=np.nansum) # mixed types (with upcasting happening) self._check_stat_op('sum', np.sum, @@ -434,7 +488,11 @@ def test_sum(self): has_numeric_only=True, check_dtype=False, check_less_precise=True) - def test_stat_operators_attempt_obj_array(self): + @pytest.mark.parametrize( + "method", ['sum', 'mean', 'prod', 'var', + 'std', 'skew', 'min', 'max']) + def test_stat_operators_attempt_obj_array(self, method): + # GH #676 data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, 0.00067695870775883013], @@ -444,20 +502,17 @@ def test_stat_operators_attempt_obj_array(self): } df1 = DataFrame(data, index=['foo', 'bar', 'baz'], dtype='O') - methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max'] - # GH #676 df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object) for df in [df1, df2]: - for meth in methods: - self.assertEqual(df.values.dtype, np.object_) - result = getattr(df, meth)(1) - expected = getattr(df.astype('f8'), meth)(1) + assert df.values.dtype == np.object_ + result = getattr(df, method)(1) + expected = getattr(df.astype('f8'), method)(1) - if not tm._incompat_bottleneck_version(meth): - tm.assert_series_equal(result, expected) + if method in ['sum', 'prod']: + tm.assert_series_equal(result, expected) def test_mean(self): self._check_stat_op('mean', np.mean, check_dates=True) @@ -467,7 +522,7 @@ def test_product(self): def test_median(self): def wrapper(x): - if isnull(x).any(): + if isna(x).any(): return np.nan return np.median(x) @@ -498,7 +553,7 @@ def test_cummin(self): # fix issue cummin_xs = self.tsframe.cummin(axis=1) - self.assertEqual(np.shape(cummin_xs), np.shape(self.tsframe)) + assert np.shape(cummin_xs) == np.shape(self.tsframe) def test_cummax(self): self.tsframe.loc[5:10, 0] = nan @@ -521,7 +576,7 @@ def test_cummax(self): # fix issue cummax_xs = self.tsframe.cummax(axis=1) - self.assertEqual(np.shape(cummax_xs), np.shape(self.tsframe)) + assert np.shape(cummax_xs) == np.shape(self.tsframe) def test_max(self): self._check_stat_op('max', np.max, check_dates=True) @@ -548,16 +603,16 @@ def test_var_std(self): arr = np.repeat(np.random.random((1, 1000)), 1000, 0) result = nanops.nanvar(arr, axis=0) - self.assertFalse((result < 0).any()) - if nanops._USE_BOTTLENECK: - nanops._USE_BOTTLENECK = False + assert not (result < 0).any() + + with pd.option_context('use_bottleneck', False): result = nanops.nanvar(arr, axis=0) - self.assertFalse((result < 0).any()) - nanops._USE_BOTTLENECK = True + assert not (result < 0).any() - def test_numeric_only_flag(self): + @pytest.mark.parametrize( + "meth", ['sem', 'var', 'std']) + def test_numeric_only_flag(self, meth): # GH #9201 - methods = ['sem', 'var', 'std'] df1 = DataFrame(np.random.randn(5, 3), columns=['foo', 'bar', 'baz']) # set one entry to a number in str format df1.loc[0, 'foo'] = '100' @@ -566,20 +621,34 @@ def test_numeric_only_flag(self): # set one entry to a non-number str df2.loc[0, 'foo'] = 'a' - for meth in methods: - result = getattr(df1, meth)(axis=1, numeric_only=True) - expected = getattr(df1[['bar', 'baz']], meth)(axis=1) - tm.assert_series_equal(expected, result) + result = getattr(df1, meth)(axis=1, numeric_only=True) + expected = getattr(df1[['bar', 'baz']], meth)(axis=1) + tm.assert_series_equal(expected, result) + + result = getattr(df2, meth)(axis=1, numeric_only=True) + expected = getattr(df2[['bar', 'baz']], meth)(axis=1) + tm.assert_series_equal(expected, result) + + # df1 has all numbers, df2 has a letter inside + pytest.raises(TypeError, lambda: getattr(df1, meth)( + axis=1, numeric_only=False)) + pytest.raises(TypeError, lambda: getattr(df2, meth)( + axis=1, numeric_only=False)) - result = getattr(df2, meth)(axis=1, numeric_only=True) - expected = getattr(df2[['bar', 'baz']], meth)(axis=1) - tm.assert_series_equal(expected, result) + def test_mixed_ops(self): + # GH 16116 + df = DataFrame({'int': [1, 2, 3, 4], + 'float': [1., 2., 3., 4.], + 'str': ['a', 'b', 'c', 'd']}) - # df1 has all numbers, df2 has a letter inside - self.assertRaises(TypeError, lambda: getattr(df1, meth) - (axis=1, numeric_only=False)) - self.assertRaises(TypeError, lambda: getattr(df2, meth) - (axis=1, numeric_only=False)) + for op in ['mean', 'std', 'var', 'skew', + 'kurt', 'sem']: + result = getattr(df, op)() + assert len(result) == 2 + + with pd.option_context('use_bottleneck', False): + result = getattr(df, op)() + assert len(result) == 2 def test_cumsum(self): self.tsframe.loc[5:10, 0] = nan @@ -602,7 +671,7 @@ def test_cumsum(self): # fix issue cumsum_xs = self.tsframe.cumsum(axis=1) - self.assertEqual(np.shape(cumsum_xs), np.shape(self.tsframe)) + assert np.shape(cumsum_xs) == np.shape(self.tsframe) def test_cumprod(self): self.tsframe.loc[5:10, 0] = nan @@ -621,7 +690,7 @@ def test_cumprod(self): # fix issue cumprod_xs = self.tsframe.cumprod(axis=1) - self.assertEqual(np.shape(cumprod_xs), np.shape(self.tsframe)) + assert np.shape(cumprod_xs) == np.shape(self.tsframe) # ints df = self.tsframe.fillna(0).astype(int) @@ -633,173 +702,6 @@ def test_cumprod(self): df.cumprod(0) df.cumprod(1) - def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.frame['A'][::2] = np.nan - self.frame['B'][::3] = np.nan - self.frame['C'][::4] = np.nan - self.frame['D'][::5] = np.nan - - ranks0 = self.frame.rank() - ranks1 = self.frame.rank(1) - mask = np.isnan(self.frame.values) - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fvals) - exp0[mask] = np.nan - - exp1 = np.apply_along_axis(rankdata, 1, fvals) - exp1[mask] = np.nan - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # integers - df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) - - result = df.rank() - exp = df.astype(float).rank() - tm.assert_frame_equal(result, exp) - - result = df.rank(1) - exp = df.astype(float).rank(1) - tm.assert_frame_equal(result, exp) - - def test_rank2(self): - df = DataFrame([[1, 3, 2], [1, 2, 3]]) - expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 - result = df.rank(1, pct=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame([[1, 3, 2], [1, 2, 3]]) - expected = df.rank(0) / 2.0 - result = df.rank(0, pct=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) - expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) - result = df.rank(1, numeric_only=False) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) - result = df.rank(0, numeric_only=False) - tm.assert_frame_equal(result, expected) - - df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) - expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) - result = df.rank(1, numeric_only=False) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]]) - result = df.rank(0, numeric_only=False) - tm.assert_frame_equal(result, expected) - - # f7u12, this does not work without extensive workaround - data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], - [datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 1)]] - df = DataFrame(data) - - # check the rank - expected = DataFrame([[2., nan, 1.], - [2., 3., 1.]]) - result = df.rank(1, numeric_only=False, ascending=True) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[1., nan, 2.], - [2., 1., 3.]]) - result = df.rank(1, numeric_only=False, ascending=False) - tm.assert_frame_equal(result, expected) - - # mixed-type frames - self.mixed_frame['datetime'] = datetime.now() - self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) - - result = self.mixed_frame.rank(1) - expected = self.mixed_frame.rank(1, numeric_only=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, - 1e60, 1e80, 1e-30]}) - exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]}) - tm.assert_frame_equal(df.rank(), exp) - - def test_rank_na_option(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.frame['A'][::2] = np.nan - self.frame['B'][::3] = np.nan - self.frame['C'][::4] = np.nan - self.frame['D'][::5] = np.nan - - # bottom - ranks0 = self.frame.rank(na_option='bottom') - ranks1 = self.frame.rank(1, na_option='bottom') - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fvals) - exp1 = np.apply_along_axis(rankdata, 1, fvals) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # top - ranks0 = self.frame.rank(na_option='top') - ranks1 = self.frame.rank(1, na_option='top') - - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values - fval1 = self.frame.T - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T - fval1 = fval1.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fval0) - exp1 = np.apply_along_axis(rankdata, 1, fval1) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # descending - - # bottom - ranks0 = self.frame.rank(na_option='top', ascending=False) - ranks1 = self.frame.rank(1, na_option='top', ascending=False) - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, -fvals) - exp1 = np.apply_along_axis(rankdata, 1, -fvals) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # descending - - # top - ranks0 = self.frame.rank(na_option='bottom', ascending=False) - ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) - - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values - fval1 = self.frame.T - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T - fval1 = fval1.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, -fval0) - exp1 = np.apply_along_axis(rankdata, 1, -fval1) - - tm.assert_numpy_array_equal(ranks0.values, exp0) - tm.assert_numpy_array_equal(ranks1.values, exp1) - - def test_rank_axis(self): - # check if using axes' names gives the same result - df = pd.DataFrame([[2, 1], [4, 3]]) - tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) - tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) - def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) self._check_stat_op('sem', alt) @@ -811,35 +713,14 @@ def test_sem(self): arr = np.repeat(np.random.random((1, 1000)), 1000, 0) result = nanops.nansem(arr, axis=0) - self.assertFalse((result < 0).any()) - if nanops._USE_BOTTLENECK: - nanops._USE_BOTTLENECK = False - result = nanops.nansem(arr, axis=0) - self.assertFalse((result < 0).any()) - nanops._USE_BOTTLENECK = True - - def test_sort_invalid_kwargs(self): - df = DataFrame([1, 2, 3], columns=['a']) + assert not (result < 0).any() - msg = r"sort\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, df.sort, foo=2) - - # Neither of these should raise an error because they - # are explicit keyword arguments in the signature and - # hence should not be swallowed by the kwargs parameter - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - df.sort(axis=1) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - df.sort(kind='mergesort') - - msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, df.sort, order=2) + with pd.option_context('use_bottleneck', False): + result = nanops.nansem(arr, axis=0) + assert not (result < 0).any() + @td.skip_if_no_scipy def test_skew(self): - tm._skip_if_no_scipy() from scipy.stats import skew def alt(x): @@ -849,9 +730,8 @@ def alt(x): self._check_stat_op('skew', alt) + @td.skip_if_no_scipy def test_kurt(self): - tm._skip_if_no_scipy() - from scipy.stats import kurtosis def alt(x): @@ -870,12 +750,13 @@ def alt(x): kurt = df.kurt() kurt2 = df.kurt(level=0).xs('bar') tm.assert_series_equal(kurt, kurt2, check_names=False) - self.assertTrue(kurt.name is None) - self.assertEqual(kurt2.name, 'bar') + assert kurt.name is None + assert kurt2.name == 'bar' def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, has_numeric_only=False, check_dtype=True, - check_dates=False, check_less_precise=False): + check_dates=False, check_less_precise=False, + skipna_alternative=None): if frame is None: frame = self.frame # set some NAs @@ -888,23 +769,19 @@ def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, df = DataFrame({'b': date_range('1/1/2001', periods=2)}) _f = getattr(df, name) result = _f() - self.assertIsInstance(result, Series) + assert isinstance(result, Series) df['a'] = lrange(len(df)) result = getattr(df, name)() - self.assertIsInstance(result, Series) - self.assertTrue(len(result)) + assert isinstance(result, Series) + assert len(result) if has_skipna: - def skipna_wrapper(x): - nona = x.dropna() - if len(nona) == 0: - return np.nan - return alternative(nona) - def wrapper(x): return alternative(x.values) + skipna_wrapper = tm._make_skipna_wrapper(alternative, + skipna_alternative) result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) tm.assert_series_equal(result0, frame.apply(wrapper), @@ -923,7 +800,7 @@ def wrapper(x): tm.assert_series_equal(result0, frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) - if not tm._incompat_bottleneck_version(name): + if name in ['sum', 'prod']: exp = frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, exp, check_dtype=False, check_less_precise=check_less_precise) @@ -931,15 +808,15 @@ def wrapper(x): # check dtypes if check_dtype: lcd_dtype = frame.values.dtype - self.assertEqual(lcd_dtype, result0.dtype) - self.assertEqual(lcd_dtype, result1.dtype) + assert lcd_dtype == result0.dtype + assert lcd_dtype == result1.dtype # result = f(axis=1) # comp = frame.apply(alternative, axis=1).reindex(result.index) # assert_series_equal(result, comp) # bad axis - tm.assertRaisesRegexp(ValueError, 'No axis named 2', f, axis=2) + tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) # make sure works on mixed-type frame getattr(self.mixed_frame, name)(axis=0) getattr(self.mixed_frame, name)(axis=1) @@ -955,9 +832,12 @@ def wrapper(x): all_na = self.frame * np.NaN r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) - if not tm._incompat_bottleneck_version(name): - self.assertTrue(np.isnan(r0).all()) - self.assertTrue(np.isnan(r1).all()) + if name in ['sum', 'prod']: + unit = int(name == 'prod') + expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) + tm.assert_series_equal(r0, expected) + expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) + tm.assert_series_equal(r1, expected) def test_mode(self): df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11], @@ -967,18 +847,23 @@ def test_mode(self): "E": [8, 8, 1, 1, 3, 3]}) tm.assert_frame_equal(df[["A"]].mode(), pd.DataFrame({"A": [12]})) - expected = pd.Series([], dtype='int64', name='D').to_frame() + expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\ + to_frame() tm.assert_frame_equal(df[["D"]].mode(), expected) expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() tm.assert_frame_equal(df[["E"]].mode(), expected) tm.assert_frame_equal(df[["A", "B"]].mode(), pd.DataFrame({"A": [12], "B": [10.]})) tm.assert_frame_equal(df.mode(), - pd.DataFrame({"A": [12, np.nan, np.nan], - "B": [10, np.nan, np.nan], - "C": [8, 9, np.nan], - "D": [np.nan, np.nan, np.nan], - "E": [1, 3, 8]})) + pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, + np.nan, np.nan], + "B": [10, np.nan, np.nan, np.nan, + np.nan, np.nan], + "C": [8, 9, np.nan, np.nan, np.nan, + np.nan], + "D": [0, 1, 2, 3, 4, 5], + "E": [1, 3, 8, np.nan, np.nan, + np.nan]})) # outputs in sorted order df["C"] = list(reversed(df["C"])) @@ -995,20 +880,12 @@ def test_mode(self): df = pd.DataFrame({"A": np.arange(6, dtype='int64'), "B": pd.date_range('2011', periods=6), "C": list('abcdef')}) - exp = pd.DataFrame({"A": pd.Series([], dtype=df["A"].dtype), - "B": pd.Series([], dtype=df["B"].dtype), - "C": pd.Series([], dtype=df["C"].dtype)}) - tm.assert_frame_equal(df.mode(), exp) - - # and also when not empty - df.loc[1, "A"] = 0 - df.loc[4, "B"] = df.loc[3, "B"] - df.loc[5, "C"] = 'e' - exp = pd.DataFrame({"A": pd.Series([0], dtype=df["A"].dtype), - "B": pd.Series([df.loc[3, "B"]], + exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), + dtype=df["A"].dtype), + "B": pd.Series(pd.date_range('2011', periods=6), dtype=df["B"].dtype), - "C": pd.Series(['e'], dtype=df["C"].dtype)}) - + "C": pd.Series(list('abcdef'), + dtype=df["C"].dtype)}) tm.assert_frame_equal(df.mode(), exp) def test_operators_timedelta64(self): @@ -1023,19 +900,19 @@ def test_operators_timedelta64(self): # min result = diffs.min() - self.assertEqual(result[0], diffs.loc[0, 'A']) - self.assertEqual(result[1], diffs.loc[0, 'B']) + assert result[0] == diffs.loc[0, 'A'] + assert result[1] == diffs.loc[0, 'B'] result = diffs.min(axis=1) - self.assertTrue((result == diffs.loc[0, 'B']).all()) + assert (result == diffs.loc[0, 'B']).all() # max result = diffs.max() - self.assertEqual(result[0], diffs.loc[2, 'A']) - self.assertEqual(result[1], diffs.loc[2, 'B']) + assert result[0] == diffs.loc[2, 'A'] + assert result[1] == diffs.loc[2, 'B'] result = diffs.max(axis=1) - self.assertTrue((result == diffs['A']).all()) + assert (result == diffs['A']).all() # abs result = diffs.abs() @@ -1053,7 +930,7 @@ def test_operators_timedelta64(self): mixed['F'] = Timestamp('20130101') # results in an object array - from pandas.tseries.timedeltas import ( + from pandas.core.tools.timedeltas import ( _coerce_scalar_to_timedelta_type as _coerce) result = mixed.min() @@ -1083,20 +960,80 @@ def test_operators_timedelta64(self): df = DataFrame({'time': date_range('20130102', periods=5), 'time2': date_range('20130105', periods=5)}) df['off1'] = df['time2'] - df['time'] - self.assertEqual(df['off1'].dtype, 'timedelta64[ns]') + assert df['off1'].dtype == 'timedelta64[ns]' df['off2'] = df['time'] - df['time2'] df._consolidate_inplace() - self.assertTrue(df['off1'].dtype == 'timedelta64[ns]') - self.assertTrue(df['off2'].dtype == 'timedelta64[ns]') + assert df['off1'].dtype == 'timedelta64[ns]' + assert df['off2'].dtype == 'timedelta64[ns]' def test_sum_corner(self): axis0 = self.empty.sum(0) axis1 = self.empty.sum(1) - tm.assertIsInstance(axis0, Series) - tm.assertIsInstance(axis1, Series) - self.assertEqual(len(axis0), 0) - self.assertEqual(len(axis1), 0) + assert isinstance(axis0, Series) + assert isinstance(axis1, Series) + assert len(axis0) == 0 + assert len(axis1) == 0 + + @pytest.mark.parametrize('method, unit', [ + ('sum', 0), + ('prod', 1), + ]) + def test_sum_prod_nanops(self, method, unit): + idx = ['a', 'b', 'c'] + df = pd.DataFrame({"a": [unit, unit], + "b": [unit, np.nan], + "c": [np.nan, np.nan]}) + # The default + result = getattr(df, method) + expected = pd.Series([unit, unit, unit], index=idx, dtype='float64') + + # min_count=1 + result = getattr(df, method)(min_count=1) + expected = pd.Series([unit, unit, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = getattr(df, method)(min_count=0) + expected = pd.Series([unit, unit, unit], index=idx, dtype='float64') + tm.assert_series_equal(result, expected) + + result = getattr(df.iloc[1:], method)(min_count=1) + expected = pd.Series([unit, np.nan, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count > 1 + df = pd.DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) + result = getattr(df, method)(min_count=5) + expected = pd.Series(result, index=['A', 'B']) + tm.assert_series_equal(result, expected) + + result = getattr(df, method)(min_count=6) + expected = pd.Series(result, index=['A', 'B']) + tm.assert_series_equal(result, expected) + + def test_sum_nanops_timedelta(self): + # prod isn't defined on timedeltas + idx = ['a', 'b', 'c'] + df = pd.DataFrame({"a": [0, 0], + "b": [0, np.nan], + "c": [np.nan, np.nan]}) + + df2 = df.apply(pd.to_timedelta) + + # 0 by default + result = df2.sum() + expected = pd.Series([0, 0, 0], dtype='m8[ns]', index=idx) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df2.sum(min_count=0) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df2.sum(min_count=1) + expected = pd.Series([0, 0, np.nan], dtype='m8[ns]', index=idx) + tm.assert_series_equal(result, expected) def test_sum_object(self): values = self.frame.values.astype(int) @@ -1115,18 +1052,18 @@ def test_mean_corner(self): # unit test when have object data the_mean = self.mixed_frame.mean(axis=0) the_sum = self.mixed_frame.sum(axis=0, numeric_only=True) - self.assert_index_equal(the_sum.index, the_mean.index) - self.assertTrue(len(the_mean.index) < len(self.mixed_frame.columns)) + tm.assert_index_equal(the_sum.index, the_mean.index) + assert len(the_mean.index) < len(self.mixed_frame.columns) # xs sum mixed type, just want to know it works... the_mean = self.mixed_frame.mean(axis=1) the_sum = self.mixed_frame.sum(axis=1, numeric_only=True) - self.assert_index_equal(the_sum.index, the_mean.index) + tm.assert_index_equal(the_sum.index, the_mean.index) # take mean of boolean column self.frame['bool'] = self.frame['A'] > 0 means = self.frame.mean(0) - self.assertEqual(means['bool'], self.frame['bool'].values.mean()) + assert means['bool'] == self.frame['bool'].values.mean() def test_stats_mixed_type(self): # don't blow up @@ -1137,7 +1074,7 @@ def test_stats_mixed_type(self): def test_median_corner(self): def wrapper(x): - if isnull(x).any(): + if isna(x).any(): return np.nan return np.median(x) @@ -1161,8 +1098,8 @@ def test_cumsum_corner(self): def test_sum_bools(self): df = DataFrame(index=lrange(1), columns=lrange(10)) - bools = isnull(df) - self.assertEqual(bools.sum(axis=1)[0], 10) + bools = isna(df) + assert bools.sum(axis=1)[0] == 10 # Index of max / min @@ -1178,7 +1115,7 @@ def test_idxmin(self): skipna=skipna) tm.assert_series_equal(result, expected) - self.assertRaises(ValueError, frame.idxmin, axis=2) + pytest.raises(ValueError, frame.idxmin, axis=2) def test_idxmax(self): frame = self.frame @@ -1192,7 +1129,7 @@ def test_idxmax(self): skipna=skipna) tm.assert_series_equal(result, expected) - self.assertRaises(ValueError, frame.idxmax, axis=2) + pytest.raises(ValueError, frame.idxmax, axis=2) # ---------------------------------------------------------------------- # Logical reductions @@ -1267,7 +1204,7 @@ def wrapper(x): # assert_series_equal(result, comp) # bad axis - self.assertRaises(ValueError, f, axis=2) + pytest.raises(ValueError, f, axis=2) # make sure works on mixed-type frame mixed = self.mixed_frame @@ -1294,80 +1231,13 @@ def __nonzero__(self): r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) if name == 'any': - self.assertFalse(r0.any()) - self.assertFalse(r1.any()) + assert not r0.any() + assert not r1.any() else: - self.assertTrue(r0.all()) - self.assertTrue(r1.all()) + assert r0.all() + assert r1.all() # ---------------------------------------------------------------------- - # Top / bottom - - def test_nlargest(self): - # GH10393 - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10])}) - result = df.nlargest(5, 'a') - expected = df.sort_values('a', ascending=False).head(5) - tm.assert_frame_equal(result, expected) - - def test_nlargest_multiple_columns(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10]), - 'c': np.random.permutation(10).astype('float64')}) - result = df.nlargest(5, ['a', 'b']) - expected = df.sort_values(['a', 'b'], ascending=False).head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10])}) - result = df.nsmallest(5, 'a') - expected = df.sort_values('a').head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest_multiple_columns(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10]), - 'c': np.random.permutation(10).astype('float64')}) - result = df.nsmallest(5, ['a', 'c']) - expected = df.sort_values(['a', 'c']).head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest_nlargest_duplicate_index(self): - # GH 13412 - df = pd.DataFrame({'a': [1, 2, 3, 4], - 'b': [4, 3, 2, 1], - 'c': [0, 1, 2, 3]}, - index=[0, 0, 1, 1]) - result = df.nsmallest(4, 'a') - expected = df.sort_values('a').head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, 'a') - expected = df.sort_values('a', ascending=False).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nsmallest(4, ['a', 'c']) - expected = df.sort_values(['a', 'c']).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nsmallest(4, ['c', 'a']) - expected = df.sort_values(['c', 'a']).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, ['a', 'c']) - expected = df.sort_values(['a', 'c'], ascending=False).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, ['c', 'a']) - expected = df.sort_values(['c', 'a'], ascending=False).head(4) - tm.assert_frame_equal(result, expected) - # ---------------------------------------------------------------------- # Isin def test_isin(self): @@ -1381,10 +1251,13 @@ def test_isin(self): expected = DataFrame([df.loc[s].isin(other) for s in df.index]) tm.assert_frame_equal(result, expected) - def test_isin_empty(self): + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']}) - result = df.isin([]) - expected = pd.DataFrame(False, df.index, df.columns) + expected = DataFrame(False, df.index, df.columns) + + result = df.isin(empty) tm.assert_frame_equal(result, expected) def test_isin_dict(self): @@ -1410,10 +1283,10 @@ def test_isin_with_string_scalar(self): df = DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']}, index=['foo', 'bar', 'baz', 'qux']) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.isin('a') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.isin('aaa') def test_isin_df(self): @@ -1431,23 +1304,31 @@ def test_isin_df(self): expected['B'] = False tm.assert_frame_equal(result, expected) + def test_isin_tuples(self): + # GH16394 + df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) + df['C'] = list(zip(df['A'], df['B'])) + result = df['C'].isin([(1, 'a')]) + tm.assert_series_equal(result, + Series([True, False, False], name="C")) + def test_isin_df_dupe_values(self): df1 = DataFrame({'A': [1, 2, 3, 4], 'B': [2, np.nan, 4, 4]}) # just cols duped df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=['B', 'B']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.isin(df2) # just index duped df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=['A', 'B'], index=[0, 0, 1, 1]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.isin(df2) # cols and index: df2.columns = ['B', 'B'] - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.isin(df2) def test_isin_dupe_self(self): @@ -1493,6 +1374,27 @@ def test_isin_multiIndex(self): result = df1.isin(df2) tm.assert_frame_equal(result, expected) + def test_isin_empty_datetimelike(self): + # GH 15473 + df1_ts = DataFrame({'date': + pd.to_datetime(['2014-01-01', '2014-01-02'])}) + df1_td = DataFrame({'date': + [pd.Timedelta(1, 's'), pd.Timedelta(2, 's')]}) + df2 = DataFrame({'date': []}) + df3 = DataFrame() + + expected = DataFrame({'date': [False, False]}) + + result = df1_ts.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_ts.isin(df3) + tm.assert_frame_equal(result, expected) + + result = df1_td.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_td.isin(df3) + tm.assert_frame_equal(result, expected) + # ---------------------------------------------------------------------- # Row deduplication @@ -1516,13 +1418,7 @@ def test_drop_duplicates(self): result = df.drop_duplicates('AAA', keep=False) expected = df.loc[[]] tm.assert_frame_equal(result, expected) - self.assertEqual(len(result), 0) - - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('AAA', take_last=True) - expected = df.loc[[6, 7]] - tm.assert_frame_equal(result, expected) + assert len(result) == 0 # multi column expected = df.loc[[0, 1, 2, 3]] @@ -1539,12 +1435,6 @@ def test_drop_duplicates(self): expected = df.loc[[0]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(('AAA', 'B'), take_last=True) - expected = df.loc[[0, 5, 6, 7]] - tm.assert_frame_equal(result, expected) - # consider everything df2 = df.loc[:, ['AAA', 'B', 'C']] @@ -1561,13 +1451,6 @@ def test_drop_duplicates(self): expected = df2.drop_duplicates(['AAA', 'B'], keep=False) tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df2.drop_duplicates(take_last=True) - with tm.assert_produces_warning(FutureWarning): - expected = df2.drop_duplicates(['AAA', 'B'], take_last=True) - tm.assert_frame_equal(result, expected) - # integers result = df.drop_duplicates('C') expected = df.iloc[[0, 2]] @@ -1608,7 +1491,35 @@ def test_drop_duplicates(self): df = df.append([[1] + [0] * 8], ignore_index=True) for keep in ['first', 'last', False]: - self.assertEqual(df.duplicated(keep=keep).sum(), 0) + assert df.duplicated(keep=keep).sum() == 0 + + @pytest.mark.parametrize('subset', ['a', ['a'], ['a', 'B']]) + def test_duplicated_with_misspelled_column_name(self, subset): + # GH 19730 + df = pd.DataFrame({'A': [0, 0, 1], + 'B': [0, 0, 1], + 'C': [0, 0, 1]}) + + with pytest.raises(KeyError): + df.duplicated(subset) + + with pytest.raises(KeyError): + df.drop_duplicates(subset) + + def test_drop_duplicates_with_duplicate_column_names(self): + # GH17836 + df = DataFrame([ + [1, 2, 5], + [3, 4, 6], + [3, 4, 7] + ], columns=['a', 'a', 'b']) + + result0 = df.drop_duplicates() + tm.assert_frame_equal(result0, df) + + result1 = df.drop_duplicates('a') + expected1 = df[:2] + tm.assert_frame_equal(result1, expected1) def test_drop_duplicates_for_take_all(self): df = DataFrame({'AAA': ['foo', 'bar', 'baz', 'bar', @@ -1663,13 +1574,7 @@ def test_drop_duplicates_tuple(self): result = df.drop_duplicates(('AA', 'AB'), keep=False) expected = df.loc[[]] # empty df - self.assertEqual(len(result), 0) - tm.assert_frame_equal(result, expected) - - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(('AA', 'AB'), take_last=True) - expected = df.loc[[6, 7]] + assert len(result) == 0 tm.assert_frame_equal(result, expected) # multi column @@ -1698,13 +1603,7 @@ def test_drop_duplicates_NA(self): result = df.drop_duplicates('A', keep=False) expected = df.loc[[]] # empty df tm.assert_frame_equal(result, expected) - self.assertEqual(len(result), 0) - - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('A', take_last=True) - expected = df.loc[[1, 6, 7]] - tm.assert_frame_equal(result, expected) + assert len(result) == 0 # multi column result = df.drop_duplicates(['A', 'B']) @@ -1719,12 +1618,6 @@ def test_drop_duplicates_NA(self): expected = df.loc[[6]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(['A', 'B'], take_last=True) - expected = df.loc[[1, 5, 6, 7]] - tm.assert_frame_equal(result, expected) - # nan df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'bar', 'foo'], @@ -1745,13 +1638,7 @@ def test_drop_duplicates_NA(self): result = df.drop_duplicates('C', keep=False) expected = df.loc[[]] # empty df tm.assert_frame_equal(result, expected) - self.assertEqual(len(result), 0) - - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('C', take_last=True) - expected = df.loc[[3, 7]] - tm.assert_frame_equal(result, expected) + assert len(result) == 0 # multi column result = df.drop_duplicates(['C', 'B']) @@ -1766,12 +1653,6 @@ def test_drop_duplicates_NA(self): expected = df.loc[[1]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(['C', 'B'], take_last=True) - expected = df.loc[[1, 3, 6, 7]] - tm.assert_frame_equal(result, expected) - def test_drop_duplicates_NA_for_take_all(self): # none df = DataFrame({'A': [None, None, 'foo', 'bar', @@ -1832,15 +1713,7 @@ def test_drop_duplicates_inplace(self): expected = orig.loc[[]] result = df tm.assert_frame_equal(result, expected) - self.assertEqual(len(df), 0) - - # deprecate take_last - df = orig.copy() - with tm.assert_produces_warning(FutureWarning): - df.drop_duplicates('A', take_last=True, inplace=True) - expected = orig.loc[[6, 7]] - result = df - tm.assert_frame_equal(result, expected) + assert len(df) == 0 # multi column df = orig.copy() @@ -1861,14 +1734,6 @@ def test_drop_duplicates_inplace(self): result = df tm.assert_frame_equal(result, expected) - # deprecate take_last - df = orig.copy() - with tm.assert_produces_warning(FutureWarning): - df.drop_duplicates(['A', 'B'], take_last=True, inplace=True) - expected = orig.loc[[0, 5, 6, 7]] - result = df - tm.assert_frame_equal(result, expected) - # consider everything orig2 = orig.loc[:, ['A', 'B', 'C']].copy() @@ -1891,17 +1756,7 @@ def test_drop_duplicates_inplace(self): result = df2 tm.assert_frame_equal(result, expected) - # deprecate take_last - df2 = orig2.copy() - with tm.assert_produces_warning(FutureWarning): - df2.drop_duplicates(take_last=True, inplace=True) - with tm.assert_produces_warning(FutureWarning): - expected = orig2.drop_duplicates(['A', 'B'], take_last=True) - result = df2 - tm.assert_frame_equal(result, expected) - # Rounding - def test_round(self): # GH 2665 @@ -1930,7 +1785,7 @@ def test_round(self): # Round with a list round_list = [1, 2] - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(round_list) # Round with a dictionary @@ -1953,34 +1808,34 @@ def test_round(self): # float input to `decimals` non_int_round_dict = {'col1': 1, 'col2': 0.5} - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_dict) # String input non_int_round_dict = {'col1': 1, 'col2': 'foo'} - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_dict) non_int_round_Series = Series(non_int_round_dict) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_Series) # List input non_int_round_dict = {'col1': 1, 'col2': [1, 2]} - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_dict) non_int_round_Series = Series(non_int_round_dict) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_Series) # Non integer Series inputs non_int_round_Series = Series(non_int_round_dict) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_Series) non_int_round_Series = Series(non_int_round_dict) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(non_int_round_Series) # Negative numbers @@ -1999,12 +1854,12 @@ def test_round(self): 'col1': [1.123, 2.123, 3.123], 'col2': [1.2, 2.2, 3.2]}) - if sys.version < LooseVersion('2.7'): + if LooseVersion(sys.version) < LooseVersion('2.7'): # Rounding with decimal is a ValueError in Python < 2.7 - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.round(nan_round_Series) else: - with self.assertRaises(TypeError): + with pytest.raises(TypeError): df.round(nan_round_Series) # Make sure this doesn't break existing Series.round @@ -2033,7 +1888,7 @@ def test_numpy_round(self): tm.assert_frame_equal(out, expected) msg = "the 'out' parameter is not supported" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): np.round(df, decimals=0, out=df) def test_round_mixed_type(self): @@ -2059,14 +1914,14 @@ def test_round_issue(self): dfs = pd.concat((df, df), axis=1) rounded = dfs.round() - self.assert_index_equal(rounded.index, dfs.index) + tm.assert_index_equal(rounded.index, dfs.index) decimals = pd.Series([1, 0, 2], index=['A', 'B', 'A']) - self.assertRaises(ValueError, df.round, decimals) + pytest.raises(ValueError, df.round, decimals) def test_built_in_round(self): if not compat.PY3: - pytest.skip("build in round cannot be overriden " + pytest.skip("build in round cannot be overridden " "prior to Python 3") # GH11763 @@ -2079,19 +1934,54 @@ def test_built_in_round(self): {'col1': [1., 2., 3.], 'col2': [1., 2., 3.]}) tm.assert_frame_equal(round(df), expected_rounded) + def test_pct_change(self): + # GH 11150 + pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( + 0, 40, 10)]).astype(np.float64) + pnl.iat[1, 0] = np.nan + pnl.iat[1, 1] = np.nan + pnl.iat[2, 3] = 60 + + for axis in range(2): + expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( + axis=axis) - 1 + result = pnl.pct_change(axis=axis, fill_method='pad') + + tm.assert_frame_equal(result, expected) + # Clip def test_clip(self): median = self.frame.median().median() + original = self.frame.copy() capped = self.frame.clip_upper(median) - self.assertFalse((capped.values > median).any()) + assert not (capped.values > median).any() floored = self.frame.clip_lower(median) - self.assertFalse((floored.values < median).any()) + assert not (floored.values < median).any() double = self.frame.clip(upper=median, lower=median) - self.assertFalse((double.values != median).any()) + assert not (double.values != median).any() + + # Verify that self.frame was not changed inplace + assert (self.frame.values == original.values).all() + + def test_inplace_clip(self): + # GH #15388 + median = self.frame.median().median() + frame_copy = self.frame.copy() + + frame_copy.clip_upper(median, inplace=True) + assert not (frame_copy.values > median).any() + frame_copy = self.frame.copy() + + frame_copy.clip_lower(median, inplace=True) + assert not (frame_copy.values < median).any() + frame_copy = self.frame.copy() + + frame_copy.clip(upper=median, lower=median, inplace=True) + assert not (frame_copy.values != median).any() def test_dataframe_clip(self): # GH #2747 @@ -2104,41 +1994,80 @@ def test_dataframe_clip(self): lb_mask = df.values <= lb ub_mask = df.values >= ub mask = ~lb_mask & ~ub_mask - self.assertTrue((clipped_df.values[lb_mask] == lb).all()) - self.assertTrue((clipped_df.values[ub_mask] == ub).all()) - self.assertTrue((clipped_df.values[mask] == - df.values[mask]).all()) - - def test_clip_against_series(self): + assert (clipped_df.values[lb_mask] == lb).all() + assert (clipped_df.values[ub_mask] == ub).all() + assert (clipped_df.values[mask] == df.values[mask]).all() + + def test_clip_mixed_numeric(self): + # TODO(jreback) + # clip on mixed integer or floats + # with integer clippers coerces to float + df = DataFrame({'A': [1, 2, 3], + 'B': [1., np.nan, 3.]}) + result = df.clip(1, 2) + expected = DataFrame({'A': [1, 2, 2.], + 'B': [1., np.nan, 2.]}) + tm.assert_frame_equal(result, expected, check_like=True) + + @pytest.mark.parametrize("inplace", [True, False]) + def test_clip_against_series(self, inplace): # GH #6966 df = DataFrame(np.random.randn(1000, 2)) lb = Series(np.random.randn(1000)) ub = lb + 1 - clipped_df = df.clip(lb, ub, axis=0) + original = df.copy() + clipped_df = df.clip(lb, ub, axis=0, inplace=inplace) + + if inplace: + clipped_df = df for i in range(2): - lb_mask = df.iloc[:, i] <= lb - ub_mask = df.iloc[:, i] >= ub + lb_mask = original.iloc[:, i] <= lb + ub_mask = original.iloc[:, i] >= ub mask = ~lb_mask & ~ub_mask result = clipped_df.loc[lb_mask, i] tm.assert_series_equal(result, lb[lb_mask], check_names=False) - self.assertEqual(result.name, i) + assert result.name == i result = clipped_df.loc[ub_mask, i] tm.assert_series_equal(result, ub[ub_mask], check_names=False) - self.assertEqual(result.name, i) + assert result.name == i tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) - def test_clip_against_frame(self): + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])]) + @pytest.mark.parametrize("axis,res", [ + (0, [[2., 2., 3.], [4., 5., 6.], [7., 7., 7.]]), + (1, [[2., 3., 4.], [4., 5., 6.], [5., 6., 7.]]) + ]) + def test_clip_against_list_like(self, inplace, lower, axis, res): + # GH #15390 + original = self.simple.copy(deep=True) + + result = original.clip(lower=lower, upper=[5, 6, 7], + axis=axis, inplace=inplace) + + expected = pd.DataFrame(res, + columns=original.columns, + index=original.index) + if inplace: + result = original + tm.assert_frame_equal(result, expected, check_exact=True) + + @pytest.mark.xfail( + not _np_version_under1p15, + reason="failing under numpy-dev gh-19976") + @pytest.mark.parametrize("axis", [0, 1, None]) + def test_clip_against_frame(self, axis): df = DataFrame(np.random.randn(1000, 2)) lb = DataFrame(np.random.randn(1000, 2)) ub = lb + 1 - clipped_df = df.clip(lb, ub) + clipped_df = df.clip(lb, ub, axis=axis) lb_mask = df <= lb ub_mask = df >= ub @@ -2148,6 +2077,17 @@ def test_clip_against_frame(self): tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) tm.assert_frame_equal(clipped_df[mask], df[mask]) + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH # 17276 + tm.assert_frame_equal(self.frame.clip(np.nan), self.frame) + tm.assert_frame_equal(self.frame.clip(upper=[1, 2, np.nan]), + self.frame) + tm.assert_frame_equal(self.frame.clip(lower=[1, np.nan, 3]), + self.frame) + tm.assert_frame_equal(self.frame.clip(upper=np.nan, lower=np.nan), + self.frame) + # Matrix-like def test_dot(self): @@ -2168,11 +2108,11 @@ def test_dot(self): # Check series argument result = a.dot(b['one']) tm.assert_series_equal(result, expected['one'], check_names=False) - self.assertTrue(result.name is None) + assert result.name is None result = a.dot(b1['one']) tm.assert_series_equal(result, expected['one'], check_names=False) - self.assertTrue(result.name is None) + assert result.name is None # can pass correct-length arrays row = a.iloc[0].values @@ -2181,7 +2121,8 @@ def test_dot(self): exp = a.dot(a.iloc[0]) tm.assert_series_equal(result, exp) - with tm.assertRaisesRegexp(ValueError, 'Dot product shape mismatch'): + with tm.assert_raises_regex(ValueError, + 'Dot product shape mismatch'): a.dot(row[:-1]) a = np.random.rand(1, 5) @@ -2198,5 +2139,159 @@ def test_dot(self): df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) - with tm.assertRaisesRegexp(ValueError, 'aligned'): + with tm.assert_raises_regex(ValueError, 'aligned'): df.dot(df2) + + +@pytest.fixture +def df_duplicates(): + return pd.DataFrame({'a': [1, 2, 3, 4, 4], + 'b': [1, 1, 1, 1, 1], + 'c': [0, 1, 2, 5, 4]}, + index=[0, 0, 1, 1, 1]) + + +@pytest.fixture +def df_strings(): + return pd.DataFrame({'a': np.random.permutation(10), + 'b': list(ascii_lowercase[:10]), + 'c': np.random.permutation(10).astype('float64')}) + + +@pytest.fixture +def df_main_dtypes(): + return pd.DataFrame( + {'group': [1, 1, 2], + 'int': [1, 2, 3], + 'float': [4., 5., 6.], + 'string': list('abc'), + 'category_string': pd.Series(list('abc')).astype('category'), + 'category_int': [7, 8, 9], + 'datetime': pd.date_range('20130101', periods=3), + 'datetimetz': pd.date_range('20130101', + periods=3, + tz='US/Eastern'), + 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')}, + columns=['group', 'int', 'float', 'string', + 'category_string', 'category_int', + 'datetime', 'datetimetz', + 'timedelta']) + + +class TestNLargestNSmallest(object): + + dtype_error_msg_template = ("Column {column!r} has dtype {dtype}, cannot " + "use method {method!r} with this dtype") + + # ---------------------------------------------------------------------- + # Top / bottom + @pytest.mark.parametrize( + 'method, n, order', + product(['nsmallest', 'nlargest'], range(1, 11), + [['a'], + ['c'], + ['a', 'b'], + ['a', 'c'], + ['b', 'a'], + ['b', 'c'], + ['a', 'b', 'c'], + ['c', 'a', 'b'], + ['c', 'b', 'a'], + ['b', 'c', 'a'], + ['b', 'a', 'c'], + + # dups! + ['b', 'c', 'c'], + + ])) + def test_n(self, df_strings, method, n, order): + # GH10393 + df = df_strings + if 'b' in order: + + error_msg = self.dtype_error_msg_template.format( + column='b', method=method, dtype='object') + with tm.assert_raises_regex(TypeError, error_msg): + getattr(df, method)(n, order) + else: + ascending = method == 'nsmallest' + result = getattr(df, method)(n, order) + expected = df.sort_values(order, ascending=ascending).head(n) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + 'method, columns', + product(['nsmallest', 'nlargest'], + product(['group'], ['category_string', 'string']) + )) + def test_n_error(self, df_main_dtypes, method, columns): + df = df_main_dtypes + error_msg = self.dtype_error_msg_template.format( + column=columns[1], method=method, dtype=df[columns[1]].dtype) + # escape some characters that may be in the repr + error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)") + .replace("[", "\\[").replace("]", "\\]")) + with tm.assert_raises_regex(TypeError, error_msg): + getattr(df, method)(2, columns) + + def test_n_all_dtypes(self, df_main_dtypes): + df = df_main_dtypes + df.nsmallest(2, list(set(df) - {'category_string', 'string'})) + df.nlargest(2, list(set(df) - {'category_string', 'string'})) + + def test_n_identical_values(self): + # GH15297 + df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]}) + + result = df.nlargest(3, 'a') + expected = pd.DataFrame( + {'a': [1] * 3, 'b': [1, 2, 3]}, index=[0, 1, 2] + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(3, 'a') + expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + 'n, order', + product([1, 2, 3, 4, 5], + [['a', 'b', 'c'], + ['c', 'b', 'a'], + ['a'], + ['b'], + ['a', 'b'], + ['c', 'b']])) + def test_n_duplicate_index(self, df_duplicates, n, order): + # GH 13412 + + df = df_duplicates + result = df.nsmallest(n, order) + expected = df.sort_values(order).head(n) + tm.assert_frame_equal(result, expected) + + result = df.nlargest(n, order) + expected = df.sort_values(order, ascending=False).head(n) + tm.assert_frame_equal(result, expected) + + def test_series_broadcasting(self): + # smoke test for numpy warnings + # GH 16378, GH 16306 + df = DataFrame([1.0, 1.0, 1.0]) + df_nan = DataFrame({'A': [np.nan, 2.0, np.nan]}) + s = Series([1, 1, 1]) + s_nan = Series([np.nan, np.nan, 1]) + + with tm.assert_produces_warning(None): + df_nan.clip_lower(s, axis=0) + for op in ['lt', 'le', 'gt', 'ge', 'eq', 'ne']: + getattr(df, op)(s_nan, axis=0) + + def test_series_nat_conversion(self): + # GH 18521 + # Check rank does not mutate DataFrame + df = DataFrame(np.random.randn(10, 3), dtype='float64') + expected = df.copy() + df.rank() + result = df + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py new file mode 100644 index 0000000000000..8ba5469480e64 --- /dev/null +++ b/pandas/tests/frame/test_api.py @@ -0,0 +1,519 @@ +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import pytest + +# pylint: disable-msg=W0612,E1101 +from copy import deepcopy +import sys +from distutils.version import LooseVersion + +from pandas.compat import range, lrange, long +from pandas import compat + +from numpy.random import randn +import numpy as np + +from pandas import (DataFrame, Series, date_range, timedelta_range, + Categorical, SparseDataFrame) +import pandas as pd + +from pandas.util.testing import (assert_almost_equal, + assert_series_equal, + assert_frame_equal) + +import pandas.util.testing as tm + +from pandas.tests.frame.common import TestData + + +class SharedWithSparse(object): + """ + A collection of tests DataFrame and SparseDataFrame can share. + + In generic tests on this class, use ``self._assert_frame_equal()`` and + ``self._assert_series_equal()`` which are implemented in sub-classes + and dispatch correctly. + """ + def _assert_frame_equal(self, left, right): + """Dispatch to frame class dependent assertion""" + raise NotImplementedError + + def _assert_series_equal(self, left, right): + """Dispatch to series class dependent assertion""" + raise NotImplementedError + + def test_copy_index_name_checking(self): + # don't want to be able to modify the index stored elsewhere after + # making a copy + for attr in ('index', 'columns'): + ind = getattr(self.frame, attr) + ind.name = None + cp = self.frame.copy() + getattr(cp, attr).name = 'foo' + assert getattr(self.frame, attr).name is None + + def test_getitem_pop_assign_name(self): + s = self.frame['A'] + assert s.name == 'A' + + s = self.frame.pop('A') + assert s.name == 'A' + + s = self.frame.loc[:, 'B'] + assert s.name == 'B' + + s2 = s.loc[:] + assert s2.name == 'B' + + def test_get_value(self): + for idx in self.frame.index: + for col in self.frame.columns: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.frame.get_value(idx, col) + expected = self.frame[col][idx] + tm.assert_almost_equal(result, expected) + + def test_add_prefix_suffix(self): + with_prefix = self.frame.add_prefix('foo#') + expected = pd.Index(['foo#%s' % c for c in self.frame.columns]) + tm.assert_index_equal(with_prefix.columns, expected) + + with_suffix = self.frame.add_suffix('#foo') + expected = pd.Index(['%s#foo' % c for c in self.frame.columns]) + tm.assert_index_equal(with_suffix.columns, expected) + + with_pct_prefix = self.frame.add_prefix('%') + expected = pd.Index(['%{}'.format(c) for c in self.frame.columns]) + tm.assert_index_equal(with_pct_prefix.columns, expected) + + with_pct_suffix = self.frame.add_suffix('%') + expected = pd.Index(['{}%'.format(c) for c in self.frame.columns]) + tm.assert_index_equal(with_pct_suffix.columns, expected) + + def test_get_axis(self): + f = self.frame + assert f._get_axis_number(0) == 0 + assert f._get_axis_number(1) == 1 + assert f._get_axis_number('index') == 0 + assert f._get_axis_number('rows') == 0 + assert f._get_axis_number('columns') == 1 + + assert f._get_axis_name(0) == 'index' + assert f._get_axis_name(1) == 'columns' + assert f._get_axis_name('index') == 'index' + assert f._get_axis_name('rows') == 'index' + assert f._get_axis_name('columns') == 'columns' + + assert f._get_axis(0) is f.index + assert f._get_axis(1) is f.columns + + tm.assert_raises_regex( + ValueError, 'No axis named', f._get_axis_number, 2) + tm.assert_raises_regex( + ValueError, 'No axis.*foo', f._get_axis_name, 'foo') + tm.assert_raises_regex( + ValueError, 'No axis.*None', f._get_axis_name, None) + tm.assert_raises_regex(ValueError, 'No axis named', + f._get_axis_number, None) + + def test_keys(self): + getkeys = self.frame.keys + assert getkeys() is self.frame.columns + + def test_column_contains_typeerror(self): + try: + self.frame.columns in self.frame + except TypeError: + pass + + def test_tab_completion(self): + # DataFrame whose columns are identifiers shall have them in __dir__. + df = pd.DataFrame([list('abcd'), list('efgh')], columns=list('ABCD')) + for key in list('ABCD'): + assert key in dir(df) + assert isinstance(df.__getitem__('A'), pd.Series) + + # DataFrame whose first-level columns are identifiers shall have + # them in __dir__. + df = pd.DataFrame( + [list('abcd'), list('efgh')], + columns=pd.MultiIndex.from_tuples(list(zip('ABCD', 'EFGH')))) + for key in list('ABCD'): + assert key in dir(df) + for key in list('EFGH'): + assert key not in dir(df) + assert isinstance(df.__getitem__('A'), pd.DataFrame) + + def test_not_hashable(self): + df = self.klass([1]) + pytest.raises(TypeError, hash, df) + pytest.raises(TypeError, hash, self.empty) + + def test_new_empty_index(self): + df1 = self.klass(randn(0, 3)) + df2 = self.klass(randn(0, 3)) + df1.index.name = 'foo' + assert df2.index.name is None + + def test_array_interface(self): + with np.errstate(all='ignore'): + result = np.sqrt(self.frame) + assert isinstance(result, type(self.frame)) + assert result.index is self.frame.index + assert result.columns is self.frame.columns + + self._assert_frame_equal(result, self.frame.apply(np.sqrt)) + + def test_get_agg_axis(self): + cols = self.frame._get_agg_axis(0) + assert cols is self.frame.columns + + idx = self.frame._get_agg_axis(1) + assert idx is self.frame.index + + pytest.raises(ValueError, self.frame._get_agg_axis, 2) + + def test_nonzero(self): + assert self.empty.empty + + assert not self.frame.empty + assert not self.mixed_frame.empty + + # corner case + df = DataFrame({'A': [1., 2., 3.], + 'B': ['a', 'b', 'c']}, + index=np.arange(3)) + del df['A'] + assert not df.empty + + def test_iteritems(self): + df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) + for k, v in compat.iteritems(df): + assert isinstance(v, self.klass._constructor_sliced) + + def test_items(self): + # issue #17213, #13918 + cols = ['a', 'b', 'c'] + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) + for c, (k, v) in zip(cols, df.items()): + assert c == k + assert isinstance(v, Series) + assert (df[k] == v).all() + + def test_iter(self): + assert tm.equalContents(list(self.frame), self.frame.columns) + + def test_iterrows(self): + for k, v in self.frame.iterrows(): + exp = self.frame.loc[k] + self._assert_series_equal(v, exp) + + for k, v in self.mixed_frame.iterrows(): + exp = self.mixed_frame.loc[k] + self._assert_series_equal(v, exp) + + def test_iterrows_iso8601(self): + # GH19671 + if self.klass == SparseDataFrame: + pytest.xfail(reason='SparseBlock datetime type not implemented.') + + s = self.klass( + {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], + 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) + for k, v in s.iterrows(): + exp = s.loc[k] + self._assert_series_equal(v, exp) + + def test_itertuples(self): + for i, tup in enumerate(self.frame.itertuples()): + s = self.klass._constructor_sliced(tup[1:]) + s.name = tup[0] + expected = self.frame.iloc[i, :].reset_index(drop=True) + self._assert_series_equal(s, expected) + + df = self.klass({'floats': np.random.randn(5), + 'ints': lrange(5)}, columns=['floats', 'ints']) + + for tup in df.itertuples(index=False): + assert isinstance(tup[1], (int, long)) + + df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + dfaa = df[['a', 'a']] + + assert (list(dfaa.itertuples()) == + [(0, 1, 1), (1, 2, 2), (2, 3, 3)]) + + # repr with be int/long on 32-bit/windows + if not (compat.is_platform_windows() or compat.is_platform_32bit()): + assert (repr(list(df.itertuples(name=None))) == + '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]') + + tup = next(df.itertuples(name='TestName')) + + if LooseVersion(sys.version) >= LooseVersion('2.7'): + assert tup._fields == ('Index', 'a', 'b') + assert (tup.Index, tup.a, tup.b) == tup + assert type(tup).__name__ == 'TestName' + + df.columns = ['def', 'return'] + tup2 = next(df.itertuples(name='TestName')) + assert tup2 == (0, 1, 4) + + if LooseVersion(sys.version) >= LooseVersion('2.7'): + assert tup2._fields == ('Index', '_1', '_2') + + df3 = DataFrame({'f' + str(i): [i] for i in range(1024)}) + # will raise SyntaxError if trying to create namedtuple + tup3 = next(df3.itertuples()) + assert not hasattr(tup3, '_fields') + assert isinstance(tup3, tuple) + + def test_sequence_like_with_categorical(self): + + # GH 7839 + # make sure can iterate + df = DataFrame({"id": [1, 2, 3, 4, 5, 6], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) + df['grade'] = Categorical(df['raw_grade']) + + # basic sequencing testing + result = list(df.grade.values) + expected = np.array(df.grade.values).tolist() + tm.assert_almost_equal(result, expected) + + # iteration + for t in df.itertuples(index=False): + str(t) + + for row, s in df.iterrows(): + str(s) + + for c, col in df.iteritems(): + str(s) + + def test_len(self): + assert len(self.frame) == len(self.frame.index) + + def test_values(self): + frame = self.frame + arr = frame.values + + frame_cols = frame.columns + for i, row in enumerate(arr): + for j, value in enumerate(row): + col = frame_cols[j] + if np.isnan(value): + assert np.isnan(frame[col][i]) + else: + assert value == frame[col][i] + + # mixed type + arr = self.mixed_frame[['foo', 'A']].values + assert arr[0, 0] == 'bar' + + df = self.klass({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) + arr = df.values + assert arr[0, 0] == 1j + + # single block corner case + arr = self.frame[['A', 'B']].values + expected = self.frame.reindex(columns=['A', 'B']).values + assert_almost_equal(arr, expected) + + def test_transpose(self): + frame = self.frame + dft = frame.T + for idx, series in compat.iteritems(dft): + for col, value in compat.iteritems(series): + if np.isnan(value): + assert np.isnan(frame[col][idx]) + else: + assert value == frame[col][idx] + + # mixed type + index, data = tm.getMixedTypeDict() + mixed = self.klass(data, index=index) + + mixed_T = mixed.T + for col, s in compat.iteritems(mixed_T): + assert s.dtype == np.object_ + + def test_swapaxes(self): + df = self.klass(np.random.randn(10, 5)) + self._assert_frame_equal(df.T, df.swapaxes(0, 1)) + self._assert_frame_equal(df.T, df.swapaxes(1, 0)) + self._assert_frame_equal(df, df.swapaxes(0, 0)) + pytest.raises(ValueError, df.swapaxes, 2, 5) + + def test_axis_aliases(self): + f = self.frame + + # reg name + expected = f.sum(axis=0) + result = f.sum(axis='index') + assert_series_equal(result, expected) + + expected = f.sum(axis=1) + result = f.sum(axis='columns') + assert_series_equal(result, expected) + + def test_class_axis(self): + # https://github.com/pandas-dev/pandas/issues/18147 + DataFrame.index # no exception! + DataFrame.columns # no exception! + + def test_more_values(self): + values = self.mixed_frame.values + assert values.shape[1] == len(self.mixed_frame.columns) + + def test_repr_with_mi_nat(self): + df = self.klass({'X': [1, 2]}, + index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) + res = repr(df) + exp = ' X\nNaT a 1\n2013-01-01 b 2' + assert res == exp + + def test_iteritems_names(self): + for k, v in compat.iteritems(self.mixed_frame): + assert v.name == k + + def test_series_put_names(self): + series = self.mixed_frame._series + for k, v in compat.iteritems(series): + assert v.name == k + + def test_empty_nonzero(self): + df = self.klass([1, 2, 3]) + assert not df.empty + df = self.klass(index=[1], columns=[1]) + assert not df.empty + df = self.klass(index=['a', 'b'], columns=['c', 'd']).dropna() + assert df.empty + assert df.T.empty + empty_frames = [self.klass(), + self.klass(index=[1]), + self.klass(columns=[1]), + self.klass({1: []})] + for df in empty_frames: + assert df.empty + assert df.T.empty + + def test_with_datetimelikes(self): + + df = self.klass({'A': date_range('20130101', periods=10), + 'B': timedelta_range('1 day', periods=10)}) + t = df.T + + result = t.get_dtype_counts() + expected = Series({'object': 10}) + tm.assert_series_equal(result, expected) + + +class TestDataFrameMisc(SharedWithSparse, TestData): + + klass = DataFrame + # SharedWithSparse tests use generic, klass-agnostic assertion + _assert_frame_equal = staticmethod(assert_frame_equal) + _assert_series_equal = staticmethod(assert_series_equal) + + def test_values(self): + self.frame.values[:, 0] = 5. + assert (self.frame.values[:, 0] == 5).all() + + def test_as_matrix_deprecated(self): + # GH18458 + with tm.assert_produces_warning(FutureWarning): + result = self.frame.as_matrix(columns=self.frame.columns.tolist()) + expected = self.frame.values + tm.assert_numpy_array_equal(result, expected) + + def test_deepcopy(self): + cp = deepcopy(self.frame) + series = cp['A'] + series[:] = 10 + for idx, value in compat.iteritems(series): + assert self.frame['A'][idx] != value + + def test_transpose_get_view(self): + dft = self.frame.T + dft.values[:, 5:10] = 5 + + assert (self.frame.values[5:10] == 5).all() + + def test_inplace_return_self(self): + # re #1893 + + data = DataFrame({'a': ['foo', 'bar', 'baz', 'qux'], + 'b': [0, 0, 1, 1], + 'c': [1, 2, 3, 4]}) + + def _check_f(base, f): + result = f(base) + assert result is None + + # -----DataFrame----- + + # set_index + f = lambda x: x.set_index('a', inplace=True) + _check_f(data.copy(), f) + + # reset_index + f = lambda x: x.reset_index(inplace=True) + _check_f(data.set_index('a'), f) + + # drop_duplicates + f = lambda x: x.drop_duplicates(inplace=True) + _check_f(data.copy(), f) + + # sort + f = lambda x: x.sort_values('b', inplace=True) + _check_f(data.copy(), f) + + # sort_index + f = lambda x: x.sort_index(inplace=True) + _check_f(data.copy(), f) + + # fillna + f = lambda x: x.fillna(0, inplace=True) + _check_f(data.copy(), f) + + # replace + f = lambda x: x.replace(1, 0, inplace=True) + _check_f(data.copy(), f) + + # rename + f = lambda x: x.rename({1: 'foo'}, inplace=True) + _check_f(data.copy(), f) + + # -----Series----- + d = data.copy()['c'] + + # reset_index + f = lambda x: x.reset_index(inplace=True, drop=True) + _check_f(data.set_index('a')['c'], f) + + # fillna + f = lambda x: x.fillna(0, inplace=True) + _check_f(d.copy(), f) + + # replace + f = lambda x: x.replace(1, 0, inplace=True) + _check_f(d.copy(), f) + + # rename + f = lambda x: x.rename({1: 'foo'}, inplace=True) + _check_f(d.copy(), f) + + def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip('IPython', minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; df = pd.DataFrame()" + ip.run_code(code) + with tm.assert_produces_warning(None): + with provisionalcompleter('ignore'): + list(ip.Completer.completions('df.', 1)) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 30fde4b5b78d8..a057ca0879cac 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -2,50 +2,54 @@ from __future__ import print_function +import pytest + from datetime import datetime import warnings import numpy as np -from pandas import (notnull, DataFrame, Series, MultiIndex, date_range, +from pandas import (notna, DataFrame, Series, MultiIndex, date_range, Timestamp, compat) import pandas as pd -from pandas.types.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.apply import frame_apply from pandas.util.testing import (assert_series_equal, assert_frame_equal) import pandas.util.testing as tm from pandas.tests.frame.common import TestData -class TestDataFrameApply(tm.TestCase, TestData): +class TestDataFrameApply(TestData): def test_apply(self): with np.errstate(all='ignore'): # ufunc applied = self.frame.apply(np.sqrt) - assert_series_equal(np.sqrt(self.frame['A']), applied['A']) + tm.assert_series_equal(np.sqrt(self.frame['A']), applied['A']) # aggregator applied = self.frame.apply(np.mean) - self.assertEqual(applied['A'], np.mean(self.frame['A'])) + assert applied['A'] == np.mean(self.frame['A']) d = self.frame.index[0] applied = self.frame.apply(np.mean, axis=1) - self.assertEqual(applied[d], np.mean(self.frame.xs(d))) - self.assertIs(applied.index, self.frame.index) # want this + assert applied[d] == np.mean(self.frame.xs(d)) + assert applied.index is self.frame.index # want this # invalid axis df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) - self.assertRaises(ValueError, df.apply, lambda x: x, 2) + pytest.raises(ValueError, df.apply, lambda x: x, 2) - # GH9573 + # see gh-9573 df = DataFrame({'c0': ['A', 'A', 'B', 'B'], 'c1': ['C', 'C', 'D', 'D']}) df = df.apply(lambda ts: ts.astype('category')) - self.assertEqual(df.shape, (4, 2)) - self.assertTrue(isinstance(df['c0'].dtype, CategoricalDtype)) - self.assertTrue(isinstance(df['c1'].dtype, CategoricalDtype)) + + assert df.shape == (4, 2) + assert isinstance(df['c0'].dtype, CategoricalDtype) + assert isinstance(df['c1'].dtype, CategoricalDtype) def test_apply_mixed_datetimelike(self): # mixed datetimelike @@ -58,10 +62,10 @@ def test_apply_mixed_datetimelike(self): def test_apply_empty(self): # empty applied = self.empty.apply(np.sqrt) - self.assertTrue(applied.empty) + assert applied.empty applied = self.empty.apply(np.mean) - self.assertTrue(applied.empty) + assert applied.empty no_rows = self.frame[:0] result = no_rows.apply(lambda x: x.mean()) @@ -78,23 +82,29 @@ def test_apply_empty(self): rs = xp.apply(lambda x: x['a'], axis=1) assert_frame_equal(xp, rs) + def test_apply_with_reduce_empty(self): # reduce with an empty DataFrame x = [] - result = self.empty.apply(x.append, axis=1, reduce=False) + result = self.empty.apply(x.append, axis=1, result_type='expand') assert_frame_equal(result, self.empty) - result = self.empty.apply(x.append, axis=1, reduce=True) + result = self.empty.apply(x.append, axis=1, result_type='reduce') assert_series_equal(result, Series( [], index=pd.Index([], dtype=object))) empty_with_cols = DataFrame(columns=['a', 'b', 'c']) - result = empty_with_cols.apply(x.append, axis=1, reduce=False) + result = empty_with_cols.apply(x.append, axis=1, result_type='expand') assert_frame_equal(result, empty_with_cols) - result = empty_with_cols.apply(x.append, axis=1, reduce=True) + result = empty_with_cols.apply(x.append, axis=1, result_type='reduce') assert_series_equal(result, Series( [], index=pd.Index([], dtype=object))) # Ensure that x.append hasn't been called - self.assertEqual(x, []) + assert x == [] + + def test_apply_deprecate_reduce(self): + with warnings.catch_warnings(record=True): + x = [] + self.empty.apply(x.append, axis=1, result_type='reduce') def test_apply_standard_nonunique(self): df = DataFrame( @@ -106,17 +116,90 @@ def test_apply_standard_nonunique(self): rs = df.T.apply(lambda s: s[0], axis=0) assert_series_equal(rs, xp) - def test_apply_broadcast(self): - broadcasted = self.frame.apply(np.mean, broadcast=True) - agged = self.frame.apply(np.mean) + def test_with_string_args(self): - for col, ts in compat.iteritems(broadcasted): - self.assertTrue((ts == agged[col]).all()) + for arg in ['sum', 'mean', 'min', 'max', 'std']: + result = self.frame.apply(arg) + expected = getattr(self.frame, arg)() + tm.assert_series_equal(result, expected) + + result = self.frame.apply(arg, axis=1) + expected = getattr(self.frame, arg)(axis=1) + tm.assert_series_equal(result, expected) + + def test_apply_broadcast_deprecated(self): + with tm.assert_produces_warning(FutureWarning): + self.frame.apply(np.mean, broadcast=True) + + def test_apply_broadcast(self): - broadcasted = self.frame.apply(np.mean, axis=1, broadcast=True) - agged = self.frame.apply(np.mean, axis=1) - for idx in broadcasted.index: - self.assertTrue((broadcasted.xs(idx) == agged[idx]).all()) + # scalars + result = self.frame.apply(np.mean, result_type='broadcast') + expected = DataFrame([self.frame.mean()], index=self.frame.index) + tm.assert_frame_equal(result, expected) + + result = self.frame.apply(np.mean, axis=1, result_type='broadcast') + m = self.frame.mean(axis=1) + expected = DataFrame({c: m for c in self.frame.columns}) + tm.assert_frame_equal(result, expected) + + # lists + result = self.frame.apply( + lambda x: list(range(len(self.frame.columns))), + axis=1, + result_type='broadcast') + m = list(range(len(self.frame.columns))) + expected = DataFrame([m] * len(self.frame.index), + dtype='float64', + index=self.frame.index, + columns=self.frame.columns) + tm.assert_frame_equal(result, expected) + + result = self.frame.apply(lambda x: list(range(len(self.frame.index))), + result_type='broadcast') + m = list(range(len(self.frame.index))) + expected = DataFrame({c: m for c in self.frame.columns}, + dtype='float64', + index=self.frame.index) + tm.assert_frame_equal(result, expected) + + # preserve columns + df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + columns=list('ABC')) + result = df.apply(lambda x: [1, 2, 3], + axis=1, + result_type='broadcast') + tm.assert_frame_equal(result, df) + + df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + columns=list('ABC')) + result = df.apply(lambda x: Series([1, 2, 3], index=list('abc')), + axis=1, + result_type='broadcast') + expected = df.copy() + tm.assert_frame_equal(result, expected) + + def test_apply_broadcast_error(self): + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + # > 1 ndim + with pytest.raises(ValueError): + df.apply(lambda x: np.array([1, 2]).reshape(-1, 2), + axis=1, + result_type='broadcast') + + # cannot broadcast + with pytest.raises(ValueError): + df.apply(lambda x: [1, 2], + axis=1, + result_type='broadcast') + + with pytest.raises(ValueError): + df.apply(lambda x: Series([1, 2]), + axis=1, + result_type='broadcast') def test_apply_raw(self): result0 = self.frame.apply(np.mean, raw=True) @@ -136,11 +219,12 @@ def test_apply_raw(self): def test_apply_axis1(self): d = self.frame.index[0] tapplied = self.frame.apply(np.mean, axis=1) - self.assertEqual(tapplied[d], np.mean(self.frame.xs(d))) + assert tapplied[d] == np.mean(self.frame.xs(d)) def test_apply_ignore_failures(self): - result = self.mixed_frame._apply_standard(np.mean, 0, - ignore_failures=True) + result = frame_apply(self.mixed_frame, + np.mean, 0, + ignore_failures=True).apply_standard() expected = self.mixed_frame._get_numeric_data().apply(np.mean) assert_series_equal(result, expected) @@ -176,10 +260,10 @@ def _checkit(axis=0, raw=False): res = df.apply(f, axis=axis, raw=raw) if is_reduction: agg_axis = df._get_agg_axis(axis) - tm.assertIsInstance(res, Series) - self.assertIs(res.index, agg_axis) + assert isinstance(res, Series) + assert res.index is agg_axis else: - tm.assertIsInstance(res, DataFrame) + assert isinstance(res, DataFrame) _checkit() _checkit(axis=1) @@ -192,8 +276,8 @@ def _checkit(axis=0, raw=False): _check(no_index, lambda x: x) _check(no_index, lambda x: x.mean()) - result = no_cols.apply(lambda x: x.mean(), broadcast=True) - tm.assertIsInstance(result, DataFrame) + result = no_cols.apply(lambda x: x.mean(), result_type='broadcast') + assert isinstance(result, DataFrame) def test_apply_with_args_kwds(self): def add_some(x, howmuch=0): @@ -264,18 +348,17 @@ def transform(row): return row def transform2(row): - if (notnull(row['C']) and row['C'].startswith('shin') and + if (notna(row['C']) and row['C'].startswith('shin') and row['A'] == 'foo'): row['D'] = 7 return row try: - transformed = data.apply(transform, axis=1) # noqa + data.apply(transform, axis=1) except AttributeError as e: - self.assertEqual(len(e.args), 2) - self.assertEqual(e.args[1], 'occurred at index 4') - self.assertEqual( - e.args[0], "'float' object has no attribute 'startswith'") + assert len(e.args) == 2 + assert e.args[1] == 'occurred at index 4' + assert e.args[0] == "'float' object has no attribute 'startswith'" def test_apply_bug(self): @@ -335,33 +418,37 @@ def test_apply_attach_name(self): result = self.frame.apply(lambda x: np.repeat(x.name, len(x)), axis=1) - expected = DataFrame(np.tile(self.frame.index, - (len(self.frame.columns), 1)).T, - index=self.frame.index, - columns=self.frame.columns) - assert_frame_equal(result, expected) + expected = Series(np.repeat(t[0], len(self.frame.columns)) + for t in self.frame.itertuples()) + expected.index = self.frame.index + assert_series_equal(result, expected) def test_apply_multi_index(self): - s = DataFrame([[1, 2], [3, 4], [5, 6]]) - s.index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']]) - s.columns = ['col1', 'col2'] - res = s.apply(lambda x: Series({'min': min(x), 'max': max(x)}), 1) - tm.assertIsInstance(res.index, MultiIndex) + index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']]) + s = DataFrame([[1, 2], [3, 4], [5, 6]], + index=index, + columns=['col1', 'col2']) + result = s.apply( + lambda x: Series({'min': min(x), 'max': max(x)}), 1) + expected = DataFrame([[1, 2], [3, 4], [5, 6]], + index=index, + columns=['min', 'max']) + assert_frame_equal(result, expected, check_like=True) def test_apply_dict(self): # GH 8735 A = DataFrame([['foo', 'bar'], ['spam', 'eggs']]) - A_dicts = pd.Series([dict([(0, 'foo'), (1, 'spam')]), - dict([(0, 'bar'), (1, 'eggs')])]) + A_dicts = Series([dict([(0, 'foo'), (1, 'spam')]), + dict([(0, 'bar'), (1, 'eggs')])]) B = DataFrame([[0, 1], [2, 3]]) - B_dicts = pd.Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])]) + B_dicts = Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])]) fn = lambda x: x.to_dict() for df, dicts in [(A, A_dicts), (B, B_dicts)]: - reduce_true = df.apply(fn, reduce=True) - reduce_false = df.apply(fn, reduce=False) - reduce_none = df.apply(fn, reduce=None) + reduce_true = df.apply(fn, result_type='reduce') + reduce_false = df.apply(fn, result_type='expand') + reduce_none = df.apply(fn) assert_series_equal(reduce_true, dicts) assert_frame_equal(reduce_false, df) @@ -369,23 +456,23 @@ def test_apply_dict(self): def test_applymap(self): applied = self.frame.applymap(lambda x: x * 2) - assert_frame_equal(applied, self.frame * 2) - result = self.frame.applymap(type) + tm.assert_frame_equal(applied, self.frame * 2) + self.frame.applymap(type) - # GH #465, function returning tuples + # gh-465: function returning tuples result = self.frame.applymap(lambda x: (x, x)) - tm.assertIsInstance(result['A'][0], tuple) + assert isinstance(result['A'][0], tuple) - # GH 2909, object conversion to float in constructor? + # gh-2909: object conversion to float in constructor? df = DataFrame(data=[1, 'a']) result = df.applymap(lambda x: x) - self.assertEqual(result.dtypes[0], object) + assert result.dtypes[0] == object df = DataFrame(data=[1., 'a']) result = df.applymap(lambda x: x) - self.assertEqual(result.dtypes[0], object) + assert result.dtypes[0] == object - # GH2786 + # see gh-2786 df = DataFrame(np.random.random((3, 4))) df2 = df.copy() cols = ['a', 'a', 'a', 'a'] @@ -394,16 +481,16 @@ def test_applymap(self): expected = df2.applymap(str) expected.columns = cols result = df.applymap(str) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # datetime/timedelta df['datetime'] = Timestamp('20130101') df['timedelta'] = pd.Timedelta('1 min') result = df.applymap(str) for f in ['datetime', 'timedelta']: - self.assertEqual(result.loc[0, f], str(df.loc[0, f])) + assert result.loc[0, f] == str(df.loc[0, f]) - # GH 8222 + # see gh-8222 empty_frames = [pd.DataFrame(), pd.DataFrame(columns=list('ABC')), pd.DataFrame(index=list('ABC')), @@ -413,6 +500,16 @@ def test_applymap(self): result = frame.applymap(func) tm.assert_frame_equal(result, frame) + def test_applymap_box_timestamps(self): + # #2689, #2627 + ser = pd.Series(date_range('1/1/2000', periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + pd.DataFrame(ser).applymap(func) + def test_applymap_box(self): # ufunc will not be boxed. Same test cases as the test_map_box df = pd.DataFrame({'a': [pd.Timestamp('2011-01-01'), @@ -438,10 +535,10 @@ def test_frame_apply_dont_convert_datetime64(self): df = df.applymap(lambda x: x + BDay()) df = df.applymap(lambda x: x + BDay()) - self.assertTrue(df.x1.dtype == 'M8[ns]') + assert df.x1.dtype == 'M8[ns]' - # See gh-12244 def test_apply_non_numpy_dtype(self): + # See gh-12244 df = DataFrame({'dt': pd.date_range( "2015-01-01", periods=3, tz='Europe/Brussels')}) result = df.apply(lambda x: x) @@ -455,3 +552,463 @@ def test_apply_non_numpy_dtype(self): df = DataFrame({'dt': ['a', 'b', 'c', 'a']}, dtype='category') result = df.apply(lambda x: x) assert_frame_equal(result, df) + + +class TestInferOutputShape(object): + # the user has supplied an opaque UDF where + # they are transforming the input that requires + # us to infer the output + + def test_infer_row_shape(self): + # gh-17437 + # if row shape is changing, infer it + df = pd.DataFrame(np.random.rand(10, 2)) + result = df.apply(np.fft.fft, axis=0) + assert result.shape == (10, 2) + + result = df.apply(np.fft.rfft, axis=0) + assert result.shape == (6, 2) + + def test_with_dictlike_columns(self): + # gh 17602 + df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) + result = df.apply(lambda x: {'s': x['a'] + x['b']}, + axis=1) + expected = Series([{'s': 3} for t in df.itertuples()]) + assert_series_equal(result, expected) + + df['tm'] = [pd.Timestamp('2017-05-01 00:00:00'), + pd.Timestamp('2017-05-02 00:00:00')] + result = df.apply(lambda x: {'s': x['a'] + x['b']}, + axis=1) + assert_series_equal(result, expected) + + # compose a series + result = (df['a'] + df['b']).apply(lambda x: {'s': x}) + expected = Series([{'s': 3}, {'s': 3}]) + assert_series_equal(result, expected) + + # gh-18775 + df = DataFrame() + df["author"] = ["X", "Y", "Z"] + df["publisher"] = ["BBC", "NBC", "N24"] + df["date"] = pd.to_datetime(['17-10-2010 07:15:30', + '13-05-2011 08:20:35', + '15-01-2013 09:09:09']) + result = df.apply(lambda x: {}, axis=1) + expected = Series([{}, {}, {}]) + assert_series_equal(result, expected) + + def test_with_dictlike_columns_with_infer(self): + # gh 17602 + df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) + result = df.apply(lambda x: {'s': x['a'] + x['b']}, + axis=1, result_type='expand') + expected = DataFrame({'s': [3, 3]}) + assert_frame_equal(result, expected) + + df['tm'] = [pd.Timestamp('2017-05-01 00:00:00'), + pd.Timestamp('2017-05-02 00:00:00')] + result = df.apply(lambda x: {'s': x['a'] + x['b']}, + axis=1, result_type='expand') + assert_frame_equal(result, expected) + + def test_with_listlike_columns(self): + # gh-17348 + df = DataFrame({'a': Series(np.random.randn(4)), + 'b': ['a', 'list', 'of', 'words'], + 'ts': date_range('2016-10-01', periods=4, freq='H')}) + + result = df[['a', 'b']].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[['a', 'b']].itertuples()]) + assert_series_equal(result, expected) + + result = df[['a', 'ts']].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[['a', 'ts']].itertuples()]) + assert_series_equal(result, expected) + + # gh-18919 + df = DataFrame({'x': Series([['a', 'b'], ['q']]), + 'y': Series([['z'], ['q', 't']])}) + df.index = MultiIndex.from_tuples([('i0', 'j0'), ('i1', 'j1')]) + + result = df.apply( + lambda row: [el for el in row['x'] if el in row['y']], + axis=1) + expected = Series([[], ['q']], index=df.index) + assert_series_equal(result, expected) + + def test_infer_output_shape_columns(self): + # gh-18573 + + df = DataFrame({'number': [1., 2.], + 'string': ['foo', 'bar'], + 'datetime': [pd.Timestamp('2017-11-29 03:30:00'), + pd.Timestamp('2017-11-29 03:45:00')]}) + result = df.apply(lambda row: (row.number, row.string), axis=1) + expected = Series([(t.number, t.string) for t in df.itertuples()]) + assert_series_equal(result, expected) + + def test_infer_output_shape_listlike_columns(self): + # gh-16353 + + df = DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C']) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + expected = Series([[1, 2, 3] for t in df.itertuples()]) + assert_series_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1) + expected = Series([[1, 2] for t in df.itertuples()]) + assert_series_equal(result, expected) + + # gh-17970 + df = DataFrame({"a": [1, 2, 3]}, index=list('abc')) + + result = df.apply(lambda row: np.ones(1), axis=1) + expected = Series([np.ones(1) for t in df.itertuples()], + index=df.index) + assert_series_equal(result, expected) + + result = df.apply(lambda row: np.ones(2), axis=1) + expected = Series([np.ones(2) for t in df.itertuples()], + index=df.index) + assert_series_equal(result, expected) + + # gh-17892 + df = pd.DataFrame({'a': [pd.Timestamp('2010-02-01'), + pd.Timestamp('2010-02-04'), + pd.Timestamp('2010-02-05'), + pd.Timestamp('2010-02-06')], + 'b': [9, 5, 4, 3], + 'c': [5, 3, 4, 2], + 'd': [1, 2, 3, 4]}) + + def fun(x): + return (1, 2) + + result = df.apply(fun, axis=1) + expected = Series([(1, 2) for t in df.itertuples()]) + assert_series_equal(result, expected) + + def test_consistent_coerce_for_shapes(self): + # we want column names to NOT be propagated + # just because the shape matches the input shape + df = DataFrame(np.random.randn(4, 3), columns=['A', 'B', 'C']) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + expected = Series([[1, 2, 3] for t in df.itertuples()]) + assert_series_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1) + expected = Series([[1, 2] for t in df.itertuples()]) + assert_series_equal(result, expected) + + def test_consistent_names(self): + # if a Series is returned, we should use the resulting index names + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + result = df.apply(lambda x: Series([1, 2, 3], + index=['test', 'other', 'cols']), + axis=1) + expected = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['test', 'other', 'cols']) + assert_frame_equal(result, expected) + + result = df.apply( + lambda x: pd.Series([1, 2], index=['test', 'other']), axis=1) + expected = DataFrame( + np.tile(np.arange(2, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['test', 'other']) + assert_frame_equal(result, expected) + + def test_result_type(self): + # result_type should be consistent no matter which + # path we take in the code + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') + expected = df.copy() + expected.columns = [0, 1, 2] + assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1, result_type='expand') + expected = df[['A', 'B']].copy() + expected.columns = [0, 1] + assert_frame_equal(result, expected) + + # broadcast result + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') + expected = df.copy() + assert_frame_equal(result, expected) + + columns = ['other', 'col', 'names'] + result = df.apply( + lambda x: pd.Series([1, 2, 3], + index=columns), + axis=1, + result_type='broadcast') + expected = df.copy() + assert_frame_equal(result, expected) + + # series result + result = df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1) + expected = df.copy() + assert_frame_equal(result, expected) + + # series result with other index + columns = ['other', 'col', 'names'] + result = df.apply( + lambda x: pd.Series([1, 2, 3], index=columns), + axis=1) + expected = df.copy() + expected.columns = columns + assert_frame_equal(result, expected) + + @pytest.mark.parametrize("result_type", ['foo', 1]) + def test_result_type_error(self, result_type): + # allowed result_type + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + with pytest.raises(ValueError): + df.apply(lambda x: [1, 2, 3], + axis=1, + result_type=result_type) + + @pytest.mark.parametrize( + "box", + [lambda x: list(x), + lambda x: tuple(x), + lambda x: np.array(x, dtype='int64')], + ids=['list', 'tuple', 'array']) + def test_consistency_for_boxed(self, box): + # passing an array or list should not affect the output shape + df = DataFrame( + np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + + result = df.apply(lambda x: box([1, 2]), axis=1) + expected = Series([box([1, 2]) for t in df.itertuples()]) + assert_series_equal(result, expected) + + result = df.apply(lambda x: box([1, 2]), axis=1, result_type='expand') + expected = DataFrame( + np.tile(np.arange(2, dtype='int64'), 6).reshape(6, -1) + 1) + assert_frame_equal(result, expected) + + +def zip_frames(*frames): + """ + take a list of frames, zip the columns together for each + assume that these all have the first frame columns + + return a new frame + """ + columns = frames[0].columns + zipped = [f[c] for c in columns for f in frames] + return pd.concat(zipped, axis=1) + + +class TestDataFrameAggregate(TestData): + + def test_agg_transform(self): + + with np.errstate(all='ignore'): + + f_sqrt = np.sqrt(self.frame) + f_abs = np.abs(self.frame) + + # ufunc + result = self.frame.transform(np.sqrt) + expected = f_sqrt.copy() + assert_frame_equal(result, expected) + + result = self.frame.apply(np.sqrt) + assert_frame_equal(result, expected) + + result = self.frame.transform(np.sqrt) + assert_frame_equal(result, expected) + + # list-like + result = self.frame.apply([np.sqrt]) + expected = f_sqrt.copy() + expected.columns = pd.MultiIndex.from_product( + [self.frame.columns, ['sqrt']]) + assert_frame_equal(result, expected) + + result = self.frame.transform([np.sqrt]) + assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both + # functions per series and then concatting + expected = zip_frames(f_sqrt, f_abs) + expected.columns = pd.MultiIndex.from_product( + [self.frame.columns, ['sqrt', 'absolute']]) + result = self.frame.apply([np.sqrt, np.abs]) + assert_frame_equal(result, expected) + + result = self.frame.transform(['sqrt', np.abs]) + assert_frame_equal(result, expected) + + def test_transform_and_agg_err(self): + # cannot both transform and agg + def f(): + self.frame.transform(['max', 'min']) + pytest.raises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.frame.agg(['max', 'sqrt']) + pytest.raises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.frame.transform(['max', 'sqrt']) + pytest.raises(ValueError, f) + + df = pd.DataFrame({'A': range(5), 'B': 5}) + + def f(): + with np.errstate(all='ignore'): + df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']}) + + def test_demo(self): + # demonstration tests + df = pd.DataFrame({'A': range(5), 'B': 5}) + + result = df.agg(['min', 'max']) + expected = DataFrame({'A': [0, 4], 'B': [5, 5]}, + columns=['A', 'B'], + index=['min', 'max']) + tm.assert_frame_equal(result, expected) + + result = df.agg({'A': ['min', 'max'], 'B': ['sum', 'max']}) + expected = DataFrame({'A': [4.0, 0.0, np.nan], + 'B': [5.0, np.nan, 25.0]}, + columns=['A', 'B'], + index=['max', 'min', 'sum']) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + def test_agg_dict_nested_renaming_depr(self): + + df = pd.DataFrame({'A': range(5), 'B': 5}) + + # nested renaming + with tm.assert_produces_warning(FutureWarning): + df.agg({'A': {'foo': 'min'}, + 'B': {'bar': 'max'}}) + + def test_agg_reduce(self): + # all reducers + expected = zip_frames(self.frame.mean().to_frame(), + self.frame.max().to_frame(), + self.frame.sum().to_frame()).T + expected.index = ['mean', 'max', 'sum'] + result = self.frame.agg(['mean', 'max', 'sum']) + assert_frame_equal(result, expected) + + # dict input with scalars + result = self.frame.agg({'A': 'mean', 'B': 'sum'}) + expected = Series([self.frame.A.mean(), self.frame.B.sum()], + index=['A', 'B']) + assert_series_equal(result.reindex_like(expected), expected) + + # dict input with lists + result = self.frame.agg({'A': ['mean'], 'B': ['sum']}) + expected = DataFrame({'A': Series([self.frame.A.mean()], + index=['mean']), + 'B': Series([self.frame.B.sum()], + index=['sum'])}) + assert_frame_equal(result.reindex_like(expected), expected) + + # dict input with lists with multiple + result = self.frame.agg({'A': ['mean', 'sum'], + 'B': ['sum', 'max']}) + expected = DataFrame({'A': Series([self.frame.A.mean(), + self.frame.A.sum()], + index=['mean', 'sum']), + 'B': Series([self.frame.B.sum(), + self.frame.B.max()], + index=['sum', 'max'])}) + assert_frame_equal(result.reindex_like(expected), expected) + + def test_nuiscance_columns(self): + + # GH 15015 + df = DataFrame({'A': [1, 2, 3], + 'B': [1., 2., 3.], + 'C': ['foo', 'bar', 'baz'], + 'D': pd.date_range('20130101', periods=3)}) + + result = df.agg('min') + expected = Series([1, 1., 'bar', pd.Timestamp('20130101')], + index=df.columns) + assert_series_equal(result, expected) + + result = df.agg(['min']) + expected = DataFrame([[1, 1., 'bar', pd.Timestamp('20130101')]], + index=['min'], columns=df.columns) + assert_frame_equal(result, expected) + + result = df.agg('sum') + expected = Series([6, 6., 'foobarbaz'], + index=['A', 'B', 'C']) + assert_series_equal(result, expected) + + result = df.agg(['sum']) + expected = DataFrame([[6, 6., 'foobarbaz']], + index=['sum'], columns=['A', 'B', 'C']) + assert_frame_equal(result, expected) + + def test_non_callable_aggregates(self): + + # GH 16405 + # 'size' is a property of frame/series + # validate that this is working + df = DataFrame({'A': [None, 2, 3], + 'B': [1.0, np.nan, 3.0], + 'C': ['foo', None, 'bar']}) + + # Function aggregate + result = df.agg({'A': 'count'}) + expected = Series({'A': 2}) + + assert_series_equal(result, expected) + + # Non-function aggregate + result = df.agg({'A': 'size'}) + expected = Series({'A': 3}) + + assert_series_equal(result, expected) + + # Mix function and non-function aggs + result1 = df.agg(['count', 'size']) + result2 = df.agg({'A': ['count', 'size'], + 'B': ['count', 'size'], + 'C': ['count', 'size']}) + expected = pd.DataFrame({'A': {'count': 2, 'size': 3}, + 'B': {'count': 2, 'size': 3}, + 'C': {'count': 2, 'size': 3}}) + + assert_frame_equal(result1, result2, check_like=True) + assert_frame_equal(result2, expected, check_like=True) + + # Just functional string arg is same as calling df.arg() + result = df.agg('count') + expected = df.count() + + assert_series_equal(result, expected) + + # Just a string attribute arg same as calling df.arg + result = df.agg('size') + expected = df.size + + assert result == expected diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py new file mode 100644 index 0000000000000..65afe85628f8e --- /dev/null +++ b/pandas/tests/frame/test_arithmetic.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- +import pytest +import numpy as np + +from pandas.compat import range + +import pandas as pd +import pandas.util.testing as tm + + +# ------------------------------------------------------------------- +# Comparisons + +class TestFrameComparisons(object): + def test_df_boolean_comparison_error(self): + # GH#4576 + # boolean comparisons with a tuple/list give unexpected results + df = pd.DataFrame(np.arange(6).reshape((3, 2))) + + # not shape compatible + with pytest.raises(ValueError): + df == (2, 2) + with pytest.raises(ValueError): + df == [2, 2] + + def test_df_float_none_comparison(self): + df = pd.DataFrame(np.random.randn(8, 3), index=range(8), + columns=['A', 'B', 'C']) + + with pytest.raises(TypeError): + df.__eq__(None) + + def test_df_string_comparison(self): + df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) + mask_a = df.a > 1 + tm.assert_frame_equal(df[mask_a], df.loc[1:1, :]) + tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :]) + + mask_b = df.b == "foo" + tm.assert_frame_equal(df[mask_b], df.loc[0:0, :]) + tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :]) + + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) + def test_df_flex_cmp_constant_return_types(self, opname): + # GH#15077, non-empty DataFrame + df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) + const = 2 + + result = getattr(df, opname)(const).get_dtype_counts() + tm.assert_series_equal(result, pd.Series([2], ['bool'])) + + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) + def test_df_flex_cmp_constant_return_types_empty(self, opname): + # GH#15077 empty DataFrame + df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) + const = 2 + + empty = df.iloc[:0] + result = getattr(empty, opname)(const).get_dtype_counts() + tm.assert_series_equal(result, pd.Series([2], ['bool'])) + + @pytest.mark.parametrize('timestamps', [ + [pd.Timestamp('2012-01-01 13:00:00+00:00')] * 2, + [pd.Timestamp('2012-01-01 13:00:00')] * 2]) + def test_tz_aware_scalar_comparison(self, timestamps): + # Test for issue #15966 + df = pd.DataFrame({'test': timestamps}) + expected = pd.DataFrame({'test': [False, False]}) + tm.assert_frame_equal(df == -1, expected) + + +# ------------------------------------------------------------------- +# Arithmetic + +class TestFrameFlexArithmetic(object): + def test_df_add_flex_filled_mixed_dtypes(self): + # GH#19611 + dti = pd.date_range('2016-01-01', periods=3) + ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]') + df = pd.DataFrame({'A': dti, 'B': ser}) + other = pd.DataFrame({'A': ser, 'B': ser}) + fill = pd.Timedelta(days=1).to_timedelta64() + result = df.add(other, fill_value=fill) + + expected = pd.DataFrame( + {'A': pd.Series(['2016-01-02', '2016-01-03', '2016-01-05'], + dtype='datetime64[ns]'), + 'B': ser * 2}) + tm.assert_frame_equal(result, expected) + + +class TestFrameMulDiv(object): + """Tests for DataFrame multiplication and division""" + # ------------------------------------------------------------------ + # Mod By Zero + + def test_df_mod_zero_df(self): + # GH#3590, modulo as ints + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + # this is technically wrong, as the integer portion is coerced to float + # ### + first = pd.Series([0, 0, 0, 0], dtype='float64') + second = pd.Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({'first': first, 'second': second}) + result = df % df + tm.assert_frame_equal(result, expected) + + def test_df_mod_zero_array(self): + # GH#3590, modulo as ints + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + # this is technically wrong, as the integer portion is coerced to float + # ### + first = pd.Series([0, 0, 0, 0], dtype='float64') + second = pd.Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({'first': first, 'second': second}) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all='ignore'): + arr = df.values % df.values + result2 = pd.DataFrame(arr, index=df.index, + columns=df.columns, dtype='float64') + result2.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_int(self): + # GH#3590, modulo as ints + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + result = df % 0 + expected = pd.DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all='ignore'): + arr = df.values.astype('float64') % 0 + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_series_does_not_commute(self): + # GH#3590, modulo as ints + # not commutative with series + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser % df + res2 = df % ser + assert not res.fillna(0).equals(res2.fillna(0)) + + # ------------------------------------------------------------------ + # Division By Zero + + def test_df_div_zero_df(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + result = df / df + + first = pd.Series([1.0, 1.0, 1.0, 1.0]) + second = pd.Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({'first': first, 'second': second}) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_array(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + first = pd.Series([1.0, 1.0, 1.0, 1.0]) + second = pd.Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({'first': first, 'second': second}) + + with np.errstate(all='ignore'): + arr = df.values.astype('float') / df.values + result = pd.DataFrame(arr, index=df.index, + columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_int(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) + + result = df / 0 + expected = pd.DataFrame(np.inf, index=df.index, columns=df.columns) + expected.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all='ignore'): + arr = df.values.astype('float64') / 0 + result2 = pd.DataFrame(arr, index=df.index, + columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_div_zero_series_does_not_commute(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser / df + res2 = df / ser + assert not res.fillna(0).equals(res2.fillna(0)) + + +class TestFrameArithmetic(object): + + @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') + def test_df_sub_datetime64_not_ns(self): + df = pd.DataFrame(pd.date_range('20130101', periods=3)) + dt64 = np.datetime64('2013-01-01') + assert dt64.dtype == 'datetime64[D]' + res = df - dt64 + expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1), + pd.Timedelta(days=2)]) + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_df_radd_str_invalid(self, dtype, data): + df = pd.DataFrame(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + df + + @pytest.mark.parametrize('dtype', [None, object]) + def test_df_with_dtype_radd_int(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([2, 3, 4], dtype=dtype) + result = 1 + df + tm.assert_frame_equal(result, expected) + result = df + 1 + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_df_with_dtype_radd_nan(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + result = np.nan + df + tm.assert_frame_equal(result, expected) + result = df + np.nan + tm.assert_frame_equal(result, expected) + + def test_df_radd_str(self): + df = pd.DataFrame(['x', np.nan, 'x']) + tm.assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) + tm.assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) + + +class TestPeriodFrameArithmetic(object): + + def test_ops_frame_period(self): + # GH 13043 + df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), + pd.Period('2015-02', freq='M')], + 'B': [pd.Period('2014-01', freq='M'), + pd.Period('2014-02', freq='M')]}) + assert df['A'].dtype == object + assert df['B'].dtype == object + + p = pd.Period('2015-03', freq='M') + # dtype will be object because of original dtype + exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), + 'B': np.array([14, 13], dtype=object)}) + tm.assert_frame_equal(p - df, exp) + tm.assert_frame_equal(df - p, -1 * exp) + + df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), + pd.Period('2015-06', freq='M')], + 'B': [pd.Period('2015-05', freq='M'), + pd.Period('2015-06', freq='M')]}) + assert df2['A'].dtype == object + assert df2['B'].dtype == object + + exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), + 'B': np.array([16, 16], dtype=object)}) + tm.assert_frame_equal(df2 - df, exp) + tm.assert_frame_equal(df - df2, -1 * exp) diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index 8bb26d3d7474c..fea6a5370109e 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -4,42 +4,38 @@ from pandas import (DataFrame, date_range, Timestamp, Series, to_datetime) -from pandas.util.testing import assert_frame_equal, assert_series_equal import pandas.util.testing as tm from .common import TestData -class TestFrameAsof(TestData, tm.TestCase): - - def setUp(self): +class TestFrameAsof(TestData): + def setup_method(self, method): self.N = N = 50 - rng = date_range('1/1/1990', periods=N, freq='53s') + self.rng = date_range('1/1/1990', periods=N, freq='53s') self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, - index=rng) + index=self.rng) def test_basic(self): - df = self.df.copy() df.loc[15:30, 'A'] = np.nan dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') result = df.asof(dates) - self.assertTrue(result.notnull().all(1).all()) + assert result.notna().all(1).all() lb = df.index[14] ub = df.index[30] dates = list(dates) result = df.asof(dates) - self.assertTrue(result.notnull().all(1).all()) + assert result.notna().all(1).all() mask = (result.index >= lb) & (result.index < ub) rs = result[mask] - self.assertTrue((rs == 14).all(1).all()) + assert (rs == 14).all(1).all() def test_subset(self): - N = 10 rng = date_range('1/1/1990', periods=N, freq='53s') df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, @@ -51,19 +47,19 @@ def test_subset(self): # with a subset of A should be the same result = df.asof(dates, subset='A') expected = df.asof(dates) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # same with A/B result = df.asof(dates, subset=['A', 'B']) expected = df.asof(dates) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # B gives self.df.asof result = df.asof(dates, subset='B') expected = df.resample('25s', closed='right').ffill().reindex(dates) expected.iloc[20:] = 9 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_missing(self): # GH 15118 @@ -75,9 +71,38 @@ def test_missing(self): result = df.asof('1989-12-31') expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31')) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = df.asof(to_datetime(['1989-12-31'])) expected = DataFrame(index=to_datetime(['1989-12-31']), columns=['A', 'B'], dtype='float64') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) + + def test_all_nans(self): + # GH 15713 + # DataFrame is all nans + result = DataFrame([np.nan]).asof([0]) + expected = DataFrame([np.nan]) + tm.assert_frame_equal(result, expected) + + # testing non-default indexes, multiple inputs + dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') + result = DataFrame(np.nan, index=self.rng, columns=['A']).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=['A']) + tm.assert_frame_equal(result, expected) + + # testing multiple columns + dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') + result = DataFrame(np.nan, index=self.rng, + columns=['A', 'B', 'C']).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=['A', 'B', 'C']) + tm.assert_frame_equal(result, expected) + + # testing scalar input + result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof([3]) + expected = DataFrame(np.nan, index=[3], columns=['A', 'B']) + tm.assert_frame_equal(result, expected) + + result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof(3) + expected = Series(np.nan, index=['A', 'B'], name=3) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 839ceb5368240..28e82f7585850 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from datetime import datetime from numpy import random @@ -9,20 +11,18 @@ from pandas.compat import lrange, lzip, u from pandas import (compat, DataFrame, Series, Index, MultiIndex, - date_range, isnull) + date_range, isna) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_frame_equal -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning import pandas.util.testing as tm from pandas.tests.frame.common import TestData -class TestDataFrameSelectReindex(tm.TestCase, TestData): +class TestDataFrameSelectReindex(TestData): # These are specific reindex-based tests; other indexing tests should go in # test_indexing @@ -37,29 +37,34 @@ def test_drop_names(self): df_inplace_b.drop('b', inplace=True) df_inplace_e.drop('e', axis=1, inplace=True) for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e): - self.assertEqual(obj.index.name, 'first') - self.assertEqual(obj.columns.name, 'second') - self.assertEqual(list(df.columns), ['d', 'e', 'f']) + assert obj.index.name == 'first' + assert obj.columns.name == 'second' + assert list(df.columns) == ['d', 'e', 'f'] - self.assertRaises(ValueError, df.drop, ['g']) - self.assertRaises(ValueError, df.drop, ['g'], 1) + pytest.raises(KeyError, df.drop, ['g']) + pytest.raises(KeyError, df.drop, ['g'], 1) # errors = 'ignore' dropped = df.drop(['g'], errors='ignore') expected = Index(['a', 'b', 'c'], name='first') - self.assert_index_equal(dropped.index, expected) + tm.assert_index_equal(dropped.index, expected) dropped = df.drop(['b', 'g'], errors='ignore') expected = Index(['a', 'c'], name='first') - self.assert_index_equal(dropped.index, expected) + tm.assert_index_equal(dropped.index, expected) dropped = df.drop(['g'], axis=1, errors='ignore') expected = Index(['d', 'e', 'f'], name='second') - self.assert_index_equal(dropped.columns, expected) + tm.assert_index_equal(dropped.columns, expected) dropped = df.drop(['d', 'g'], axis=1, errors='ignore') expected = Index(['e', 'f'], name='second') - self.assert_index_equal(dropped.columns, expected) + tm.assert_index_equal(dropped.columns, expected) + + # GH 16398 + dropped = df.drop([], errors='ignore') + expected = Index(['a', 'b', 'c'], name='first') + tm.assert_index_equal(dropped.index, expected) def test_drop_col_still_multiindex(self): arrays = [['a', 'b', 'c', 'top'], @@ -82,10 +87,10 @@ def test_drop(self): assert_frame_equal(simple.drop( [0, 3], axis='index'), simple.loc[[1, 2], :]) - self.assertRaises(ValueError, simple.drop, 5) - self.assertRaises(ValueError, simple.drop, 'C', 1) - self.assertRaises(ValueError, simple.drop, [1, 5]) - self.assertRaises(ValueError, simple.drop, ['A', 'C'], 1) + pytest.raises(KeyError, simple.drop, 5) + pytest.raises(KeyError, simple.drop, 'C', 1) + pytest.raises(KeyError, simple.drop, [1, 5]) + pytest.raises(KeyError, simple.drop, ['A', 'C'], 1) # errors = 'ignore' assert_frame_equal(simple.drop(5, errors='ignore'), simple) @@ -100,6 +105,7 @@ def test_drop(self): columns=['a', 'a', 'b']) assert_frame_equal(nu_df.drop('a', axis=1), nu_df[['b']]) assert_frame_equal(nu_df.drop('b', axis='columns'), nu_df['a']) + assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398 nu_df = nu_df.set_index(pd.Index(['X', 'Y', 'X'])) nu_df.columns = list('abc') @@ -120,7 +126,7 @@ def test_drop_multiindex_not_lexsorted(self): lexsorted_mi = MultiIndex.from_tuples( [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c']) lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) - self.assertTrue(lexsorted_df.columns.is_lexsorted()) + assert lexsorted_df.columns.is_lexsorted() # define the non-lexsorted version not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'], @@ -129,7 +135,7 @@ def test_drop_multiindex_not_lexsorted(self): not_lexsorted_df = not_lexsorted_df.pivot_table( index='a', columns=['b', 'c'], values='d') not_lexsorted_df = not_lexsorted_df.reset_index() - self.assertFalse(not_lexsorted_df.columns.is_lexsorted()) + assert not not_lexsorted_df.columns.is_lexsorted() # compare the results tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) @@ -140,6 +146,41 @@ def test_drop_multiindex_not_lexsorted(self): tm.assert_frame_equal(result, expected) + def test_drop_api_equivalence(self): + # equivalence of the labels/axis and index/columns API's (GH12392) + df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=['a', 'b', 'c'], + columns=['d', 'e', 'f']) + + res1 = df.drop('a') + res2 = df.drop(index='a') + tm.assert_frame_equal(res1, res2) + + res1 = df.drop('d', 1) + res2 = df.drop(columns='d') + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(labels='e', axis=1) + res2 = df.drop(columns='e') + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(['a'], axis=0) + res2 = df.drop(index=['a']) + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(['a'], axis=0).drop(['d'], axis=1) + res2 = df.drop(index=['a'], columns=['d']) + tm.assert_frame_equal(res1, res2) + + with pytest.raises(ValueError): + df.drop(labels='a', index='b') + + with pytest.raises(ValueError): + df.drop(labels='a', columns='b') + + with pytest.raises(ValueError): + df.drop(axis=1) + def test_merge_join_different_levels(self): # GH 9455 @@ -172,16 +213,16 @@ def test_reindex(self): for idx, val in compat.iteritems(newFrame[col]): if idx in self.frame.index: if np.isnan(val): - self.assertTrue(np.isnan(self.frame[col][idx])) + assert np.isnan(self.frame[col][idx]) else: - self.assertEqual(val, self.frame[col][idx]) + assert val == self.frame[col][idx] else: - self.assertTrue(np.isnan(val)) + assert np.isnan(val) for col, series in compat.iteritems(newFrame): - self.assertTrue(tm.equalContents(series.index, newFrame.index)) + assert tm.equalContents(series.index, newFrame.index) emptyFrame = self.frame.reindex(Index([])) - self.assertEqual(len(emptyFrame.index), 0) + assert len(emptyFrame.index) == 0 # Cython code should be unit-tested directly nonContigFrame = self.frame.reindex(self.ts1.index[::2]) @@ -190,41 +231,40 @@ def test_reindex(self): for idx, val in compat.iteritems(nonContigFrame[col]): if idx in self.frame.index: if np.isnan(val): - self.assertTrue(np.isnan(self.frame[col][idx])) + assert np.isnan(self.frame[col][idx]) else: - self.assertEqual(val, self.frame[col][idx]) + assert val == self.frame[col][idx] else: - self.assertTrue(np.isnan(val)) + assert np.isnan(val) for col, series in compat.iteritems(nonContigFrame): - self.assertTrue(tm.equalContents(series.index, - nonContigFrame.index)) + assert tm.equalContents(series.index, nonContigFrame.index) # corner cases # Same index, copies values but not index if copy=False newFrame = self.frame.reindex(self.frame.index, copy=False) - self.assertIs(newFrame.index, self.frame.index) + assert newFrame.index is self.frame.index # length zero newFrame = self.frame.reindex([]) - self.assertTrue(newFrame.empty) - self.assertEqual(len(newFrame.columns), len(self.frame.columns)) + assert newFrame.empty + assert len(newFrame.columns) == len(self.frame.columns) # length zero with columns reindexed with non-empty index newFrame = self.frame.reindex([]) newFrame = newFrame.reindex(self.frame.index) - self.assertEqual(len(newFrame.index), len(self.frame.index)) - self.assertEqual(len(newFrame.columns), len(self.frame.columns)) + assert len(newFrame.index) == len(self.frame.index) + assert len(newFrame.columns) == len(self.frame.columns) # pass non-Index newFrame = self.frame.reindex(list(self.ts1.index)) - self.assert_index_equal(newFrame.index, self.ts1.index) + tm.assert_index_equal(newFrame.index, self.ts1.index) # copy with no axes result = self.frame.reindex() assert_frame_equal(result, self.frame) - self.assertFalse(result is self.frame) + assert result is not self.frame def test_reindex_nan(self): df = pd.DataFrame([[1, 2], [3, 5], [7, 11], [9, 23]], @@ -256,27 +296,27 @@ def test_reindex_name_remains(self): i = Series(np.arange(10), name='iname') df = df.reindex(i) - self.assertEqual(df.index.name, 'iname') + assert df.index.name == 'iname' df = df.reindex(Index(np.arange(10), name='tmpname')) - self.assertEqual(df.index.name, 'tmpname') + assert df.index.name == 'tmpname' s = Series(random.rand(10)) df = DataFrame(s.T, index=np.arange(len(s))) i = Series(np.arange(10), name='iname') df = df.reindex(columns=i) - self.assertEqual(df.columns.name, 'iname') + assert df.columns.name == 'iname' def test_reindex_int(self): smaller = self.intframe.reindex(self.intframe.index[::2]) - self.assertEqual(smaller['A'].dtype, np.int64) + assert smaller['A'].dtype == np.int64 bigger = smaller.reindex(self.intframe.index) - self.assertEqual(bigger['A'].dtype, np.float64) + assert bigger['A'].dtype == np.float64 smaller = self.intframe.reindex(columns=['A', 'B']) - self.assertEqual(smaller['A'].dtype, np.int64) + assert smaller['A'].dtype == np.int64 def test_reindex_like(self): other = self.frame.reindex(index=self.frame.index[:10], @@ -285,15 +325,15 @@ def test_reindex_like(self): assert_frame_equal(other, self.frame.reindex_like(other)) def test_reindex_columns(self): - newFrame = self.frame.reindex(columns=['A', 'B', 'E']) + new_frame = self.frame.reindex(columns=['A', 'B', 'E']) - assert_series_equal(newFrame['B'], self.frame['B']) - self.assertTrue(np.isnan(newFrame['E']).all()) - self.assertNotIn('C', newFrame) + tm.assert_series_equal(new_frame['B'], self.frame['B']) + assert np.isnan(new_frame['E']).all() + assert 'C' not in new_frame - # length zero - newFrame = self.frame.reindex(columns=[]) - self.assertTrue(newFrame.empty) + # Length zero + new_frame = self.frame.reindex(columns=[]) + assert new_frame.empty def test_reindex_columns_method(self): @@ -347,15 +387,15 @@ def test_reindex_axes(self): both_freq = df.reindex(index=time_freq, columns=some_cols).index.freq seq_freq = df.reindex(index=time_freq).reindex( columns=some_cols).index.freq - self.assertEqual(index_freq, both_freq) - self.assertEqual(index_freq, seq_freq) + assert index_freq == both_freq + assert index_freq == seq_freq def test_reindex_fill_value(self): df = DataFrame(np.random.randn(10, 4)) # axis=0 result = df.reindex(lrange(15)) - self.assertTrue(np.isnan(result.values[-5:]).all()) + assert np.isnan(result.values[-5:]).all() result = df.reindex(lrange(15), fill_value=0) expected = df.reindex(lrange(15)).fillna(0) @@ -378,11 +418,13 @@ def test_reindex_fill_value(self): assert_frame_equal(result, expected) # reindex_axis - result = df.reindex_axis(lrange(15), fill_value=0., axis=0) + with tm.assert_produces_warning(FutureWarning): + result = df.reindex_axis(lrange(15), fill_value=0., axis=0) expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) - result = df.reindex_axis(lrange(5), fill_value=0., axis=1) + with tm.assert_produces_warning(FutureWarning): + result = df.reindex_axis(lrange(5), fill_value=0., axis=1) expected = df.reindex(columns=lrange(5)).fillna(0) assert_frame_equal(result, expected) @@ -405,37 +447,135 @@ def test_reindex_dups(self): assert_frame_equal(result, expected) # reindex fails - self.assertRaises(ValueError, df.reindex, index=list(range(len(df)))) + pytest.raises(ValueError, df.reindex, index=list(range(len(df)))) + + def test_reindex_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = pd.DataFrame({"A": [1, 2, np.nan], "B": [4, 5, np.nan]}, + index=[0, 1, 3]) + result = df.reindex([0, 1, 3]) + assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis=0) + assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis='index') + assert_frame_equal(result, expected) + + def test_reindex_positional_warns(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = pd.DataFrame({"A": [1., 2], 'B': [4., 5], + "C": [np.nan, np.nan]}) + with tm.assert_produces_warning(FutureWarning): + result = df.reindex([0, 1], ['A', 'B', 'C']) + + assert_frame_equal(result, expected) + + def test_reindex_axis_style_raises(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], 'B': [4, 5, 6]}) + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + df.reindex([0, 1], ['A'], axis=1) + + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + df.reindex([0, 1], ['A'], axis='index') + + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis='index') + + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis='columns') + + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + df.reindex(columns=[0, 1], axis='columns') + + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + df.reindex(index=[0, 1], columns=[0, 1], axis='columns') + + with tm.assert_raises_regex(TypeError, 'Cannot specify all'): + df.reindex([0, 1], [0], ['A']) + + # Mixing styles + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis='index') + + with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis='columns') + + # Duplicates + with tm.assert_raises_regex(TypeError, "multiple values"): + df.reindex([0, 1], labels=[0, 1]) + + def test_reindex_single_named_indexer(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}) + result = df.reindex([0, 1], columns=['A']) + expected = pd.DataFrame({"A": [1, 2]}) + assert_frame_equal(result, expected) + + def test_reindex_api_equivalence(self): + # https://github.com/pandas-dev/pandas/issues/12392 + # equivalence of the labels/axis and index/columns API's + df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=['a', 'b', 'c'], + columns=['d', 'e', 'f']) + + res1 = df.reindex(['b', 'a']) + res2 = df.reindex(index=['b', 'a']) + res3 = df.reindex(labels=['b', 'a']) + res4 = df.reindex(labels=['b', 'a'], axis=0) + res5 = df.reindex(['b', 'a'], axis=0) + for res in [res2, res3, res4, res5]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(columns=['e', 'd']) + res2 = df.reindex(['e', 'd'], axis=1) + res3 = df.reindex(labels=['e', 'd'], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + with tm.assert_produces_warning(FutureWarning) as m: + res1 = df.reindex(['b', 'a'], ['e', 'd']) + assert 'reindex' in str(m[0].message) + res2 = df.reindex(columns=['e', 'd'], index=['b', 'a']) + res3 = df.reindex(labels=['b', 'a'], axis=0).reindex(labels=['e', 'd'], + axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) def test_align(self): af, bf = self.frame.align(self.frame) - self.assertIsNot(af._data, self.frame._data) + assert af._data is not self.frame._data af, bf = self.frame.align(self.frame, copy=False) - self.assertIs(af._data, self.frame._data) + assert af._data is self.frame._data # axis = 0 other = self.frame.iloc[:-5, :3] af, bf = self.frame.align(other, axis=0, fill_value=-1) - self.assert_index_equal(bf.columns, other.columns) + + tm.assert_index_equal(bf.columns, other.columns) + # test fill value join_idx = self.frame.index.join(other.index) diff_a = self.frame.index.difference(join_idx) diff_b = other.index.difference(join_idx) diff_a_vals = af.reindex(diff_a).values diff_b_vals = bf.reindex(diff_b).values - self.assertTrue((diff_a_vals == -1).all()) + assert (diff_a_vals == -1).all() af, bf = self.frame.align(other, join='right', axis=0) - self.assert_index_equal(bf.columns, other.columns) - self.assert_index_equal(bf.index, other.index) - self.assert_index_equal(af.index, other.index) + tm.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.index, other.index) + tm.assert_index_equal(af.index, other.index) # axis = 1 other = self.frame.iloc[:-5, :3].copy() af, bf = self.frame.align(other, axis=1) - self.assert_index_equal(bf.columns, self.frame.columns) - self.assert_index_equal(bf.index, other.index) + tm.assert_index_equal(bf.columns, self.frame.columns) + tm.assert_index_equal(bf.index, other.index) # test fill value join_idx = self.frame.index.join(other.index) @@ -446,42 +586,42 @@ def test_align(self): # TODO(wesm): unused? diff_b_vals = bf.reindex(diff_b).values # noqa - self.assertTrue((diff_a_vals == -1).all()) + assert (diff_a_vals == -1).all() af, bf = self.frame.align(other, join='inner', axis=1) - self.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.columns, other.columns) af, bf = self.frame.align(other, join='inner', axis=1, method='pad') - self.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.columns, other.columns) # test other non-float types af, bf = self.intframe.align(other, join='inner', axis=1, method='pad') - self.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.columns, other.columns) af, bf = self.mixed_frame.align(self.mixed_frame, join='inner', axis=1, method='pad') - self.assert_index_equal(bf.columns, self.mixed_frame.columns) + tm.assert_index_equal(bf.columns, self.mixed_frame.columns) af, bf = self.frame.align(other.iloc[:, 0], join='inner', axis=1, method=None, fill_value=None) - self.assert_index_equal(bf.index, Index([])) + tm.assert_index_equal(bf.index, Index([])) af, bf = self.frame.align(other.iloc[:, 0], join='inner', axis=1, method=None, fill_value=0) - self.assert_index_equal(bf.index, Index([])) + tm.assert_index_equal(bf.index, Index([])) # mixed floats/ints af, bf = self.mixed_float.align(other.iloc[:, 0], join='inner', axis=1, method=None, fill_value=0) - self.assert_index_equal(bf.index, Index([])) + tm.assert_index_equal(bf.index, Index([])) af, bf = self.mixed_int.align(other.iloc[:, 0], join='inner', axis=1, method=None, fill_value=0) - self.assert_index_equal(bf.index, Index([])) + tm.assert_index_equal(bf.index, Index([])) - # try to align dataframe to series along bad axis - self.assertRaises(ValueError, self.frame.align, af.iloc[0, :3], - join='inner', axis=2) + # Try to align DataFrame to Series along bad axis + with pytest.raises(ValueError): + self.frame.align(af.iloc[0, :3], join='inner', axis=2) # align dataframe to series with broadcast or not idx = self.frame.index @@ -490,7 +630,7 @@ def test_align(self): left, right = self.frame.align(s, axis=0) tm.assert_index_equal(left.index, self.frame.index) tm.assert_index_equal(right.index, self.frame.index) - self.assertTrue(isinstance(right, Series)) + assert isinstance(right, Series) left, right = self.frame.align(s, broadcast_axis=1) tm.assert_index_equal(left.index, self.frame.index) @@ -499,17 +639,17 @@ def test_align(self): expected[c] = s expected = DataFrame(expected, index=self.frame.index, columns=self.frame.columns) - assert_frame_equal(right, expected) + tm.assert_frame_equal(right, expected) - # GH 9558 + # see gh-9558 df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) result = df[df['a'] == 2] expected = DataFrame([[2, 5]], index=[1], columns=['a', 'b']) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.where(df['a'] == 2, 0) expected = DataFrame({'a': [0, 2, 0], 'b': [0, 5, 0]}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): aa, ab = a.align(b, axis=axis, join=how, method=method, limit=limit, @@ -655,33 +795,33 @@ def test_align_series_combinations(self): tm.assert_frame_equal(res2, exp1) def test_filter(self): - # items + # Items filtered = self.frame.filter(['A', 'B', 'E']) - self.assertEqual(len(filtered.columns), 2) - self.assertNotIn('E', filtered) + assert len(filtered.columns) == 2 + assert 'E' not in filtered filtered = self.frame.filter(['A', 'B', 'E'], axis='columns') - self.assertEqual(len(filtered.columns), 2) - self.assertNotIn('E', filtered) + assert len(filtered.columns) == 2 + assert 'E' not in filtered - # other axis + # Other axis idx = self.frame.index[0:4] filtered = self.frame.filter(idx, axis='index') expected = self.frame.reindex(index=idx) - assert_frame_equal(filtered, expected) + tm.assert_frame_equal(filtered, expected) # like fcopy = self.frame.copy() fcopy['AA'] = 1 filtered = fcopy.filter(like='A') - self.assertEqual(len(filtered.columns), 2) - self.assertIn('AA', filtered) + assert len(filtered.columns) == 2 + assert 'AA' in filtered # like with ints in column names df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B']) filtered = df.filter(like='_') - self.assertEqual(len(filtered.columns), 2) + assert len(filtered.columns) == 2 # regex with ints in column names # from PR #10384 @@ -689,41 +829,41 @@ def test_filter(self): expected = DataFrame( 0., index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)) filtered = df.filter(regex='^[0-9]+$') - assert_frame_equal(filtered, expected) + tm.assert_frame_equal(filtered, expected) expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1']) # shouldn't remove anything filtered = expected.filter(regex='^[0-9]+$') - assert_frame_equal(filtered, expected) + tm.assert_frame_equal(filtered, expected) # pass in None - with assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assert_raises_regex(TypeError, 'Must pass'): self.frame.filter() - with assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assert_raises_regex(TypeError, 'Must pass'): self.frame.filter(items=None) - with assertRaisesRegexp(TypeError, 'Must pass'): + with tm.assert_raises_regex(TypeError, 'Must pass'): self.frame.filter(axis=1) # test mutually exclusive arguments - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', like='bbi') - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', axis=1) - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$') - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi', axis=0) - with assertRaisesRegexp(TypeError, 'mutually exclusive'): + with tm.assert_raises_regex(TypeError, 'mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi') # objects filtered = self.mixed_frame.filter(like='foo') - self.assertIn('foo', filtered) + assert 'foo' in filtered # unicode columns, won't ascii-encode df = self.frame.rename(columns={'B': u('\u2202')}) filtered = df.filter(like='C') - self.assertTrue('C' in filtered) + assert 'C' in filtered def test_filter_regex_search(self): fcopy = self.frame.copy() @@ -731,8 +871,8 @@ def test_filter_regex_search(self): # regex filtered = fcopy.filter(regex='[A]+') - self.assertEqual(len(filtered.columns), 2) - self.assertIn('AA', filtered) + assert len(filtered.columns) == 2 + assert 'AA' in filtered # doesn't have to be at beginning df = DataFrame({'aBBa': [1, 2], @@ -744,6 +884,27 @@ def test_filter_regex_search(self): exp = df[[x for x in df.columns if 'BB' in x]] assert_frame_equal(result, exp) + @pytest.mark.parametrize('name,expected', [ + ('a', DataFrame({u'a': [1, 2]})), + (u'a', DataFrame({u'a': [1, 2]})), + (u'あ', DataFrame({u'あ': [3, 4]})) + ]) + def test_filter_unicode(self, name, expected): + # GH13101 + df = DataFrame({u'a': [1, 2], u'あ': [3, 4]}) + + assert_frame_equal(df.filter(like=name), expected) + assert_frame_equal(df.filter(regex=name), expected) + + @pytest.mark.parametrize('name', ['a', u'a']) + def test_filter_bytestring(self, name): + # GH13101 + df = DataFrame({b'a': [1, 2], b'b': [3, 4]}) + expected = DataFrame({b'a': [1, 2]}) + + assert_frame_equal(df.filter(like=name), expected) + assert_frame_equal(df.filter(regex=name), expected) + def test_filter_corner(self): empty = DataFrame() @@ -754,16 +915,38 @@ def test_filter_corner(self): assert_frame_equal(result, empty) def test_select(self): + + # deprecated: gh-12410 f = lambda x: x.weekday() == 2 - result = self.tsframe.select(f, axis=0) - expected = self.tsframe.reindex( - index=self.tsframe.index[[f(x) for x in self.tsframe.index]]) - assert_frame_equal(result, expected) + index = self.tsframe.index[[f(x) for x in self.tsframe.index]] + expected_weekdays = self.tsframe.reindex(index=index) - result = self.frame.select(lambda x: x in ('B', 'D'), axis=1) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.tsframe.select(f, axis=0) + assert_frame_equal(result, expected_weekdays) + + result = self.frame.select(lambda x: x in ('B', 'D'), axis=1) + expected = self.frame.reindex(columns=['B', 'D']) + assert_frame_equal(result, expected, check_names=False) + + # replacement + f = lambda x: x.weekday == 2 + result = self.tsframe.loc(axis=0)[f(self.tsframe.index)] + assert_frame_equal(result, expected_weekdays) + + crit = lambda x: x in ['B', 'D'] + result = self.frame.loc(axis=1)[(self.frame.columns.map(crit))] expected = self.frame.reindex(columns=['B', 'D']) + assert_frame_equal(result, expected, check_names=False) + + # doc example + df = DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz']) - # TODO should reindex check_names? + crit = lambda x: x in ['bar', 'baz'] + with tm.assert_produces_warning(FutureWarning): + expected = df.select(crit) + result = df.loc[df.index.map(crit)] assert_frame_equal(result, expected, check_names=False) def test_take(self): @@ -780,7 +963,7 @@ def test_take(self): expected = df.loc[:, ['D', 'B', 'C', 'A']] assert_frame_equal(result, expected, check_names=False) - # neg indicies + # negative indices order = [2, 1, -1] for df in [self.frame]: @@ -788,16 +971,24 @@ def test_take(self): expected = df.reindex(df.index.take(order)) assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): + result = df.take(order, convert=True, axis=0) + assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = df.take(order, convert=False, axis=0) + assert_frame_equal(result, expected) + # axis = 1 result = df.take(order, axis=1) expected = df.loc[:, ['C', 'B', 'D']] assert_frame_equal(result, expected, check_names=False) # illegal indices - self.assertRaises(IndexError, df.take, [3, 1, 2, 30], axis=0) - self.assertRaises(IndexError, df.take, [3, 1, 2, -31], axis=0) - self.assertRaises(IndexError, df.take, [3, 1, 2, 5], axis=1) - self.assertRaises(IndexError, df.take, [3, 1, 2, -5], axis=1) + pytest.raises(IndexError, df.take, [3, 1, 2, 30], axis=0) + pytest.raises(IndexError, df.take, [3, 1, 2, -31], axis=0) + pytest.raises(IndexError, df.take, [3, 1, 2, 5], axis=1) + pytest.raises(IndexError, df.take, [3, 1, 2, -5], axis=1) # mixed-dtype order = [4, 1, 2, 0, 3] @@ -812,7 +1003,7 @@ def test_take(self): expected = df.loc[:, ['foo', 'B', 'C', 'A', 'D']] assert_frame_equal(result, expected) - # neg indicies + # negative indices order = [4, 1, -2] for df in [self.mixed_frame]: @@ -844,46 +1035,52 @@ def test_reindex_boolean(self): columns=[0, 2]) reindexed = frame.reindex(np.arange(10)) - self.assertEqual(reindexed.values.dtype, np.object_) - self.assertTrue(isnull(reindexed[0][1])) + assert reindexed.values.dtype == np.object_ + assert isna(reindexed[0][1]) reindexed = frame.reindex(columns=lrange(3)) - self.assertEqual(reindexed.values.dtype, np.object_) - self.assertTrue(isnull(reindexed[1]).all()) + assert reindexed.values.dtype == np.object_ + assert isna(reindexed[1]).all() def test_reindex_objects(self): reindexed = self.mixed_frame.reindex(columns=['foo', 'A', 'B']) - self.assertIn('foo', reindexed) + assert 'foo' in reindexed reindexed = self.mixed_frame.reindex(columns=['A', 'B']) - self.assertNotIn('foo', reindexed) + assert 'foo' not in reindexed def test_reindex_corner(self): index = Index(['a', 'b', 'c']) dm = self.empty.reindex(index=[1, 2, 3]) reindexed = dm.reindex(columns=index) - self.assert_index_equal(reindexed.columns, index) + tm.assert_index_equal(reindexed.columns, index) # ints are weird smaller = self.intframe.reindex(columns=['A', 'B', 'E']) - self.assertEqual(smaller['E'].dtype, np.float64) + assert smaller['E'].dtype == np.float64 def test_reindex_axis(self): cols = ['A', 'B', 'E'] - reindexed1 = self.intframe.reindex_axis(cols, axis=1) + with tm.assert_produces_warning(FutureWarning) as m: + reindexed1 = self.intframe.reindex_axis(cols, axis=1) + assert 'reindex' in str(m[0].message) reindexed2 = self.intframe.reindex(columns=cols) assert_frame_equal(reindexed1, reindexed2) rows = self.intframe.index[0:5] - reindexed1 = self.intframe.reindex_axis(rows, axis=0) + with tm.assert_produces_warning(FutureWarning) as m: + reindexed1 = self.intframe.reindex_axis(rows, axis=0) + assert 'reindex' in str(m[0].message) reindexed2 = self.intframe.reindex(index=rows) assert_frame_equal(reindexed1, reindexed2) - self.assertRaises(ValueError, self.intframe.reindex_axis, rows, axis=2) + pytest.raises(ValueError, self.intframe.reindex_axis, rows, axis=2) # no-op case cols = self.frame.columns.copy() - newFrame = self.frame.reindex_axis(cols, axis=1) + with tm.assert_produces_warning(FutureWarning) as m: + newFrame = self.frame.reindex_axis(cols, axis=1) + assert 'reindex' in str(m[0].message) assert_frame_equal(newFrame, self.frame) def test_reindex_with_nans(self): @@ -906,21 +1103,21 @@ def test_reindex_with_nans(self): def test_reindex_multi(self): df = DataFrame(np.random.randn(3, 3)) - result = df.reindex(lrange(4), lrange(4)) + result = df.reindex(index=lrange(4), columns=lrange(4)) expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(lrange(4), lrange(4)) + result = df.reindex(index=lrange(4), columns=lrange(4)) expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(lrange(2), lrange(2)) + result = df.reindex(index=lrange(2), columns=lrange(2)) expected = df.reindex(lrange(2)).reindex(columns=lrange(2)) assert_frame_equal(result, expected) @@ -931,3 +1128,26 @@ def test_reindex_multi(self): expected = df.reindex([0, 1]).reindex(columns=['a', 'b']) assert_frame_equal(result, expected) + + data = [[1, 2, 3], [1, 2, 3]] + + @pytest.mark.parametrize('actual', [ + DataFrame(data=data, index=['a', 'a']), + DataFrame(data=data, index=['a', 'b']), + DataFrame(data=data, index=['a', 'b']).set_index([0, 1]), + DataFrame(data=data, index=['a', 'a']).set_index([0, 1]) + ]) + def test_raise_on_drop_duplicate_index(self, actual): + + # issue 19186 + level = 0 if isinstance(actual.index, MultiIndex) else None + with pytest.raises(KeyError): + actual.drop('c', level=level, axis=0) + with pytest.raises(KeyError): + actual.T.drop('c', level=level, axis=1) + expected_no_err = actual.drop('c', axis=0, level=level, + errors='ignore') + assert_frame_equal(expected_no_err, actual) + expected_no_err = actual.T.drop('c', axis=1, level=level, + errors='ignore') + assert_frame_equal(expected_no_err.T, actual) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 7b64dea8c102d..8e012922d25f1 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from datetime import datetime, timedelta import itertools @@ -15,8 +17,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) + assert_frame_equal) import pandas.util.testing as tm @@ -27,7 +28,7 @@ # structure -class TestDataFrameBlockInternals(tm.TestCase, TestData): +class TestDataFrameBlockInternals(TestData): def test_cast_internals(self): casted = DataFrame(self.frame._data, dtype=int) @@ -40,18 +41,24 @@ def test_cast_internals(self): def test_consolidate(self): self.frame['E'] = 7. - consolidated = self.frame.consolidate() - self.assertEqual(len(consolidated._data.blocks), 1) + consolidated = self.frame._consolidate() + assert len(consolidated._data.blocks) == 1 # Ensure copy, do I want this? - recons = consolidated.consolidate() - self.assertIsNot(recons, consolidated) - assert_frame_equal(recons, consolidated) + recons = consolidated._consolidate() + assert recons is not consolidated + tm.assert_frame_equal(recons, consolidated) self.frame['F'] = 8. - self.assertEqual(len(self.frame._data.blocks), 3) - self.frame.consolidate(inplace=True) - self.assertEqual(len(self.frame._data.blocks), 1) + assert len(self.frame._data.blocks) == 3 + + self.frame._consolidate(inplace=True) + assert len(self.frame._data.blocks) == 1 + + def test_consolidate_deprecation(self): + self.frame['E'] = 7 + with tm.assert_produces_warning(FutureWarning): + self.frame.consolidate() def test_consolidate_inplace(self): frame = self.frame.copy() # noqa @@ -60,20 +67,20 @@ def test_consolidate_inplace(self): for letter in range(ord('A'), ord('Z')): self.frame[chr(letter)] = chr(letter) - def test_as_matrix_consolidate(self): + def test_values_consolidate(self): self.frame['E'] = 7. - self.assertFalse(self.frame._data.is_consolidated()) - _ = self.frame.as_matrix() # noqa - self.assertTrue(self.frame._data.is_consolidated()) + assert not self.frame._data.is_consolidated() + _ = self.frame.values # noqa + assert self.frame._data.is_consolidated() def test_modify_values(self): self.frame.values[5] = 5 - self.assertTrue((self.frame.values[5] == 5).all()) + assert (self.frame.values[5] == 5).all() # unconsolidated self.frame['E'] = 7. self.frame.values[6] = 6 - self.assertTrue((self.frame.values[6] == 6).all()) + assert (self.frame.values[6] == 6).all() def test_boolean_set_uncons(self): self.frame['E'] = 7. @@ -84,51 +91,51 @@ def test_boolean_set_uncons(self): self.frame[self.frame > 1] = 2 assert_almost_equal(expected, self.frame.values) - def test_as_matrix_numeric_cols(self): + def test_values_numeric_cols(self): self.frame['foo'] = 'bar' - values = self.frame.as_matrix(['A', 'B', 'C', 'D']) - self.assertEqual(values.dtype, np.float64) + values = self.frame[['A', 'B', 'C', 'D']].values + assert values.dtype == np.float64 - def test_as_matrix_lcd(self): + def test_values_lcd(self): # mixed lcd - values = self.mixed_float.as_matrix(['A', 'B', 'C', 'D']) - self.assertEqual(values.dtype, np.float64) + values = self.mixed_float[['A', 'B', 'C', 'D']].values + assert values.dtype == np.float64 - values = self.mixed_float.as_matrix(['A', 'B', 'C']) - self.assertEqual(values.dtype, np.float32) + values = self.mixed_float[['A', 'B', 'C']].values + assert values.dtype == np.float32 - values = self.mixed_float.as_matrix(['C']) - self.assertEqual(values.dtype, np.float16) + values = self.mixed_float[['C']].values + assert values.dtype == np.float16 # GH 10364 # B uint64 forces float because there are other signed int types - values = self.mixed_int.as_matrix(['A', 'B', 'C', 'D']) - self.assertEqual(values.dtype, np.float64) + values = self.mixed_int[['A', 'B', 'C', 'D']].values + assert values.dtype == np.float64 - values = self.mixed_int.as_matrix(['A', 'D']) - self.assertEqual(values.dtype, np.int64) + values = self.mixed_int[['A', 'D']].values + assert values.dtype == np.int64 # B uint64 forces float because there are other signed int types - values = self.mixed_int.as_matrix(['A', 'B', 'C']) - self.assertEqual(values.dtype, np.float64) + values = self.mixed_int[['A', 'B', 'C']].values + assert values.dtype == np.float64 # as B and C are both unsigned, no forcing to float is needed - values = self.mixed_int.as_matrix(['B', 'C']) - self.assertEqual(values.dtype, np.uint64) + values = self.mixed_int[['B', 'C']].values + assert values.dtype == np.uint64 - values = self.mixed_int.as_matrix(['A', 'C']) - self.assertEqual(values.dtype, np.int32) + values = self.mixed_int[['A', 'C']].values + assert values.dtype == np.int32 - values = self.mixed_int.as_matrix(['C', 'D']) - self.assertEqual(values.dtype, np.int64) + values = self.mixed_int[['C', 'D']].values + assert values.dtype == np.int64 - values = self.mixed_int.as_matrix(['A']) - self.assertEqual(values.dtype, np.int32) + values = self.mixed_int[['A']].values + assert values.dtype == np.int32 - values = self.mixed_int.as_matrix(['C']) - self.assertEqual(values.dtype, np.uint8) + values = self.mixed_int[['C']].values + assert values.dtype == np.uint8 def test_constructor_with_convert(self): # this is actually mostly a test of lib.maybe_convert_objects @@ -213,8 +220,8 @@ def test_construction_with_mixed(self): # mixed-type frames self.mixed_frame['datetime'] = datetime.now() self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) - self.assertEqual(self.mixed_frame['datetime'].dtype, 'M8[ns]') - self.assertEqual(self.mixed_frame['timedelta'].dtype, 'm8[ns]') + assert self.mixed_frame['datetime'].dtype == 'M8[ns]' + assert self.mixed_frame['timedelta'].dtype == 'm8[ns]' result = self.mixed_frame.get_dtype_counts().sort_values() expected = Series({'float64': 4, 'object': 1, @@ -226,10 +233,6 @@ def test_construction_with_conversions(self): # convert from a numpy array of non-ns timedelta64 arr = np.array([1, 2, 3], dtype='timedelta64[s]') - s = Series(arr) - expected = Series(pd.timedelta_range('00:00:01', periods=3, freq='s')) - assert_series_equal(s, expected) - df = DataFrame(index=range(3)) df['A'] = arr expected = DataFrame({'A': pd.timedelta_range('00:00:01', periods=3, @@ -237,21 +240,6 @@ def test_construction_with_conversions(self): index=range(3)) assert_frame_equal(df, expected) - # convert from a numpy array of non-ns datetime64 - # note that creating a numpy datetime64 is in LOCAL time!!!! - # seems to work for M8[D], but not for M8[s] - - s = Series(np.array(['2013-01-01', '2013-01-02', - '2013-01-03'], dtype='datetime64[D]')) - assert_series_equal(s, Series(date_range('20130101', periods=3, - freq='D'))) - - # s = Series(np.array(['2013-01-01 00:00:01','2013-01-01 - # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]')) - - # assert_series_equal(s,date_range('20130101 - # 00:00:01',period=3,freq='s')) - expected = DataFrame({ 'dt1': Timestamp('20130101'), 'dt2': date_range('20130101', periods=3), @@ -279,10 +267,10 @@ def f(dtype): columns=["A", "B", "C"], dtype=dtype) - self.assertRaises(NotImplementedError, f, - [("A", "datetime64[h]"), - ("B", "str"), - ("C", "int32")]) + pytest.raises(NotImplementedError, f, + [("A", "datetime64[h]"), + ("B", "str"), + ("C", "int32")]) # these work (though results may be unexpected) f('int64') @@ -300,12 +288,12 @@ def test_equals_different_blocks(self): df1 = df0.reset_index()[["A", "B", "C"]] # this assert verifies that the above operations have # induced a block rearrangement - self.assertTrue(df0._data.blocks[0].dtype != - df1._data.blocks[0].dtype) + assert (df0._data.blocks[0].dtype != df1._data.blocks[0].dtype) + # do the real tests assert_frame_equal(df0, df1) - self.assertTrue(df0.equals(df1)) - self.assertTrue(df1.equals(df0)) + assert df0.equals(df1) + assert df1.equals(df0) def test_copy_blocks(self): # API/ENH 9607 @@ -313,13 +301,17 @@ def test_copy_blocks(self): column = df.columns[0] # use the default copy=True, change a column - blocks = df.as_blocks() + + # deprecated 0.21.0 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + blocks = df.as_blocks() for dtype, _df in blocks.items(): if column in _df: _df.loc[:, column] = _df[column] + 1 # make sure we did not change the original DataFrame - self.assertFalse(_df[column].equals(df[column])) + assert not _df[column].equals(df[column]) def test_no_copy_blocks(self): # API/ENH 9607 @@ -327,36 +319,40 @@ def test_no_copy_blocks(self): column = df.columns[0] # use the copy=False, change a column - blocks = df.as_blocks(copy=False) + + # deprecated 0.21.0 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + blocks = df.as_blocks(copy=False) for dtype, _df in blocks.items(): if column in _df: _df.loc[:, column] = _df[column] + 1 # make sure we did change the original DataFrame - self.assertTrue(_df[column].equals(df[column])) + assert _df[column].equals(df[column]) def test_copy(self): cop = self.frame.copy() cop['E'] = cop['A'] - self.assertNotIn('E', self.frame) + assert 'E' not in self.frame # copy objects copy = self.mixed_frame.copy() - self.assertIsNot(copy._data, self.mixed_frame._data) + assert copy._data is not self.mixed_frame._data def test_pickle(self): - unpickled = self.round_trip_pickle(self.mixed_frame) + unpickled = tm.round_trip_pickle(self.mixed_frame) assert_frame_equal(self.mixed_frame, unpickled) # buglet self.mixed_frame._data.ndim # empty - unpickled = self.round_trip_pickle(self.empty) + unpickled = tm.round_trip_pickle(self.empty) repr(unpickled) # tz frame - unpickled = self.round_trip_pickle(self.tzframe) + unpickled = tm.round_trip_pickle(self.tzframe) assert_frame_equal(self.tzframe, unpickled) def test_consolidate_datetime64(self): @@ -392,8 +388,8 @@ def test_consolidate_datetime64(self): tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index) def test_is_mixed_type(self): - self.assertFalse(self.frame._is_mixed_type) - self.assertTrue(self.mixed_frame._is_mixed_type) + assert not self.frame._is_mixed_type + assert self.mixed_frame._is_mixed_type def test_get_numeric_data(self): # TODO(wesm): unused? @@ -409,8 +405,8 @@ def test_get_numeric_data(self): result = df.get_dtype_counts() expected = Series({'int64': 1, 'float64': 1, datetime64name: 1, objectname: 1}) - result.sort_index() - expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() assert_series_equal(result, expected) df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', @@ -445,35 +441,35 @@ def test_convert_objects(self): oops = self.mixed_frame.T.T converted = oops._convert(datetime=True) assert_frame_equal(converted, self.mixed_frame) - self.assertEqual(converted['A'].dtype, np.float64) + assert converted['A'].dtype == np.float64 # force numeric conversion self.mixed_frame['H'] = '1.' self.mixed_frame['I'] = '1' # add in some items that will be nan - l = len(self.mixed_frame) + length = len(self.mixed_frame) self.mixed_frame['J'] = '1.' self.mixed_frame['K'] = '1' self.mixed_frame.loc[0:5, ['J', 'K']] = 'garbled' converted = self.mixed_frame._convert(datetime=True, numeric=True) - self.assertEqual(converted['H'].dtype, 'float64') - self.assertEqual(converted['I'].dtype, 'int64') - self.assertEqual(converted['J'].dtype, 'float64') - self.assertEqual(converted['K'].dtype, 'float64') - self.assertEqual(len(converted['J'].dropna()), l - 5) - self.assertEqual(len(converted['K'].dropna()), l - 5) + assert converted['H'].dtype == 'float64' + assert converted['I'].dtype == 'int64' + assert converted['J'].dtype == 'float64' + assert converted['K'].dtype == 'float64' + assert len(converted['J'].dropna()) == length - 5 + assert len(converted['K'].dropna()) == length - 5 # via astype converted = self.mixed_frame.copy() converted['H'] = converted['H'].astype('float64') converted['I'] = converted['I'].astype('int64') - self.assertEqual(converted['H'].dtype, 'float64') - self.assertEqual(converted['I'].dtype, 'int64') + assert converted['H'].dtype == 'float64' + assert converted['I'].dtype == 'int64' # via astype, but errors converted = self.mixed_frame.copy() - with assertRaisesRegexp(ValueError, 'invalid literal'): + with tm.assert_raises_regex(ValueError, 'invalid literal'): converted['H'].astype('int32') # mixed in a single column @@ -488,6 +484,32 @@ def test_convert_objects_no_conversion(self): mixed2 = mixed1._convert(datetime=True) assert_frame_equal(mixed1, mixed2) + def test_infer_objects(self): + # GH 11221 + df = DataFrame({'a': ['a', 1, 2, 3], + 'b': ['b', 2.0, 3.0, 4.1], + 'c': ['c', datetime(2016, 1, 1), + datetime(2016, 1, 2), + datetime(2016, 1, 3)], + 'd': [1, 2, 3, 'd']}, + columns=['a', 'b', 'c', 'd']) + df = df.iloc[1:].infer_objects() + + assert df['a'].dtype == 'int64' + assert df['b'].dtype == 'float64' + assert df['c'].dtype == 'M8[ns]' + assert df['d'].dtype == 'object' + + expected = DataFrame({'a': [1, 2, 3], + 'b': [2.0, 3.0, 4.1], + 'c': [datetime(2016, 1, 1), + datetime(2016, 1, 2), + datetime(2016, 1, 3)], + 'd': [2, 3, 'd']}, + columns=['a', 'b', 'c', 'd']) + # reconstruct frame to verify inference is same + tm.assert_frame_equal(df.reset_index(drop=True), expected) + def test_stale_cached_series_bug_473(self): # this is chained, but ok @@ -500,7 +522,7 @@ def test_stale_cached_series_bug_473(self): repr(Y) result = Y.sum() # noqa exp = Y['g'].sum() # noqa - self.assertTrue(pd.isnull(Y['g']['c'])) + assert pd.isna(Y['g']['c']) def test_get_X_columns(self): # numeric and object columns @@ -511,8 +533,8 @@ def test_get_X_columns(self): 'd': [None, None, None], 'e': [3.14, 0.577, 2.773]}) - self.assert_index_equal(df._get_numeric_data().columns, - pd.Index(['a', 'b', 'e'])) + tm.assert_index_equal(df._get_numeric_data().columns, + pd.Index(['a', 'b', 'e'])) def test_strange_column_corruption_issue(self): # (wesm) Unclear how exactly this is related to internal matters @@ -533,6 +555,6 @@ def test_strange_column_corruption_issue(self): myid = 100 - first = len(df.loc[pd.isnull(df[myid]), [myid]]) - second = len(df.loc[pd.isnull(df[myid]), [myid]]) - self.assertTrue(first == second == 0) + first = len(df.loc[pd.isna(df[myid]), [myid]]) + second = len(df.loc[pd.isna(df[myid]), [myid]]) + assert first == second == 0 diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index eed4d6261d6e8..e82faaeef2986 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -15,12 +15,10 @@ from pandas.tests.frame.common import TestData import pandas.util.testing as tm -from pandas.util.testing import (assertRaisesRegexp, - assert_frame_equal, - assert_series_equal) +from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestDataFrameConcatCommon(tm.TestCase, TestData): +class TestDataFrameConcatCommon(TestData): def test_concat_multiple_frames_dtypes(self): @@ -78,11 +76,13 @@ def test_append_series_dict(self): columns=['foo', 'bar', 'baz', 'qux']) series = df.loc[4] - with assertRaisesRegexp(ValueError, 'Indexes have overlapping values'): + with tm.assert_raises_regex(ValueError, + 'Indexes have overlapping values'): df.append(series, verify_integrity=True) series.name = None - with assertRaisesRegexp(TypeError, 'Can only append a Series if ' - 'ignore_index=True'): + with tm.assert_raises_regex(TypeError, + 'Can only append a Series if ' + 'ignore_index=True'): df.append(series, verify_integrity=True) result = df.append(series[::-1], ignore_index=True) @@ -270,7 +270,7 @@ def test_update_raise(self): other = DataFrame([[2., nan], [nan, 7]], index=[1, 3], columns=[1, 2]) - with assertRaisesRegexp(ValueError, "Data overlaps"): + with tm.assert_raises_regex(ValueError, "Data overlaps"): df.update(other, raise_conflict=True) def test_update_from_non_df(self): @@ -303,7 +303,7 @@ def test_join_str_datetime(self): tst = A.join(C, on='aa') - self.assertEqual(len(tst.columns), 3) + assert len(tst.columns) == 3 def test_join_multiindex_leftright(self): # GH 10741 @@ -419,11 +419,29 @@ def test_concat_axis_parameter(self): assert_frame_equal(concatted_1_series, expected_columns_series) # Testing ValueError - with assertRaisesRegexp(ValueError, 'No axis named'): + with tm.assert_raises_regex(ValueError, 'No axis named'): pd.concat([series1, series2], axis='something') - -class TestDataFrameCombineFirst(tm.TestCase, TestData): + def test_concat_numerical_names(self): + # #15262 # #12223 + df = pd.DataFrame({'col': range(9)}, + dtype='int32', + index=(pd.MultiIndex + .from_product([['A0', 'A1', 'A2'], + ['B0', 'B1', 'B2']], + names=[1, 2]))) + result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :])) + expected = pd.DataFrame({'col': [0, 1, 7, 8]}, + dtype='int32', + index=pd.MultiIndex.from_tuples([('A0', 'B0'), + ('A0', 'B1'), + ('A2', 'B1'), + ('A2', 'B2')], + names=[1, 2])) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameCombineFirst(TestData): def test_combine_first_mixed(self): a = Series(['a', 'b'], index=lrange(2)) @@ -446,7 +464,7 @@ def test_combine_first(self): combined = head.combine_first(tail) reordered_frame = self.frame.reindex(combined.index) assert_frame_equal(combined, reordered_frame) - self.assertTrue(tm.equalContents(combined.columns, self.frame.columns)) + assert tm.equalContents(combined.columns, self.frame.columns) assert_series_equal(combined['A'], reordered_frame['A']) # same index @@ -460,7 +478,7 @@ def test_combine_first(self): combined = fcopy.combine_first(fcopy2) - self.assertTrue((combined['A'] == 1).all()) + assert (combined['A'] == 1).all() assert_series_equal(combined['B'], fcopy['B']) assert_series_equal(combined['C'], fcopy2['C']) assert_series_equal(combined['D'], fcopy['D']) @@ -470,12 +488,12 @@ def test_combine_first(self): head['A'] = 1 combined = head.combine_first(tail) - self.assertTrue((combined['A'][:10] == 1).all()) + assert (combined['A'][:10] == 1).all() # reverse overlap tail['A'][:10] = 0 combined = tail.combine_first(head) - self.assertTrue((combined['A'][:10] == 0).all()) + assert (combined['A'][:10] == 0).all() # no overlap f = self.frame[:10] @@ -492,13 +510,13 @@ def test_combine_first(self): assert_frame_equal(comb, self.frame) comb = self.frame.combine_first(DataFrame(index=["faz", "boo"])) - self.assertTrue("faz" in comb.index) + assert "faz" in comb.index # #2525 df = DataFrame({'a': [1]}, index=[datetime(2012, 1, 1)]) df2 = DataFrame({}, columns=['b']) result = df.combine_first(df2) - self.assertTrue('b' in result) + assert 'b' in result def test_combine_first_mixed_bug(self): idx = Index(['a', 'b', 'c', 'e']) @@ -520,7 +538,7 @@ def test_combine_first_mixed_bug(self): "col5": ser3}) combined = frame1.combine_first(frame2) - self.assertEqual(len(combined.columns), 5) + assert len(combined.columns) == 5 # gh 3016 (same as in update) df = DataFrame([[1., 2., False, True], [4., 5., True, False]], @@ -585,28 +603,28 @@ def test_combine_first_align_nan(self): dfa = pd.DataFrame([[pd.Timestamp('2011-01-01'), 2]], columns=['a', 'b']) dfb = pd.DataFrame([[4], [5]], columns=['b']) - self.assertEqual(dfa['a'].dtype, 'datetime64[ns]') - self.assertEqual(dfa['b'].dtype, 'int64') + assert dfa['a'].dtype == 'datetime64[ns]' + assert dfa['b'].dtype == 'int64' res = dfa.combine_first(dfb) exp = pd.DataFrame({'a': [pd.Timestamp('2011-01-01'), pd.NaT], 'b': [2., 5.]}, columns=['a', 'b']) tm.assert_frame_equal(res, exp) - self.assertEqual(res['a'].dtype, 'datetime64[ns]') + assert res['a'].dtype == 'datetime64[ns]' # ToDo: this must be int64 - self.assertEqual(res['b'].dtype, 'float64') + assert res['b'].dtype == 'float64' res = dfa.iloc[:0].combine_first(dfb) exp = pd.DataFrame({'a': [np.nan, np.nan], 'b': [4, 5]}, columns=['a', 'b']) tm.assert_frame_equal(res, exp) # ToDo: this must be datetime64 - self.assertEqual(res['a'].dtype, 'float64') + assert res['a'].dtype == 'float64' # ToDo: this must be int64 - self.assertEqual(res['b'].dtype, 'int64') + assert res['b'].dtype == 'int64' def test_combine_first_timezone(self): - # GH 7630 + # see gh-7630 data1 = pd.to_datetime('20100101 01:01').tz_localize('UTC') df1 = pd.DataFrame(columns=['UTCdatetime', 'abc'], data=data1, @@ -626,10 +644,10 @@ def test_combine_first_timezone(self): index=pd.date_range('20140627', periods=2, freq='D')) tm.assert_frame_equal(res, exp) - self.assertEqual(res['UTCdatetime'].dtype, 'datetime64[ns, UTC]') - self.assertEqual(res['abc'].dtype, 'datetime64[ns, UTC]') + assert res['UTCdatetime'].dtype == 'datetime64[ns, UTC]' + assert res['abc'].dtype == 'datetime64[ns, UTC]' - # GH 10567 + # see gh-10567 dts1 = pd.date_range('2015-01-01', '2015-01-05', tz='UTC') df1 = pd.DataFrame({'DATE': dts1}) dts2 = pd.date_range('2015-01-03', '2015-01-05', tz='UTC') @@ -637,7 +655,7 @@ def test_combine_first_timezone(self): res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) - self.assertEqual(res['DATE'].dtype, 'datetime64[ns, UTC]') + assert res['DATE'].dtype == 'datetime64[ns, UTC]' dts1 = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03', '2011-01-04'], tz='US/Eastern') @@ -662,7 +680,7 @@ def test_combine_first_timezone(self): # if df1 doesn't have NaN, keep its dtype res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) - self.assertEqual(res['DATE'].dtype, 'datetime64[ns, US/Eastern]') + assert res['DATE'].dtype == 'datetime64[ns, US/Eastern]' dts1 = pd.date_range('2015-01-01', '2015-01-02', tz='US/Eastern') df1 = pd.DataFrame({'DATE': dts1}) @@ -675,7 +693,7 @@ def test_combine_first_timezone(self): pd.Timestamp('2015-01-03')] exp = pd.DataFrame({'DATE': exp_dts}) tm.assert_frame_equal(res, exp) - self.assertEqual(res['DATE'].dtype, 'object') + assert res['DATE'].dtype == 'object' def test_combine_first_timedelta(self): data1 = pd.TimedeltaIndex(['1 day', 'NaT', '3 day', '4day']) @@ -688,7 +706,7 @@ def test_combine_first_timedelta(self): '11 day', '3 day', '4 day']) exp = pd.DataFrame({'TD': exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) - self.assertEqual(res['TD'].dtype, 'timedelta64[ns]') + assert res['TD'].dtype == 'timedelta64[ns]' def test_combine_first_period(self): data1 = pd.PeriodIndex(['2011-01', 'NaT', '2011-03', @@ -704,7 +722,7 @@ def test_combine_first_period(self): freq='M') exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) - self.assertEqual(res['P'].dtype, 'object') + assert res['P'].dtype == 'object' # different freq dts2 = pd.PeriodIndex(['2012-01-01', '2012-01-02', @@ -720,7 +738,7 @@ def test_combine_first_period(self): pd.Period('2011-04', freq='M')] exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) - self.assertEqual(res['P'].dtype, 'object') + assert res['P'].dtype == 'object' def test_combine_first_int(self): # GH14687 - integer series that do no align exactly @@ -730,7 +748,7 @@ def test_combine_first_int(self): res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) - self.assertEqual(res['a'].dtype, 'int64') + assert res['a'].dtype == 'int64' def test_concat_datetime_datetime64_frame(self): # #2624 @@ -745,3 +763,26 @@ def test_concat_datetime_datetime64_frame(self): # it works! pd.concat([df1, df2_obj]) + + +class TestDataFrameUpdate(TestData): + + def test_update_nan(self): + # #15593 #15617 + # test 1 + df1 = DataFrame({'A': [1.0, 2, 3], 'B': date_range('2000', periods=3)}) + df2 = DataFrame({'A': [None, 2, 3]}) + expected = df1.copy() + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) + + # test 2 + df1 = DataFrame({'A': [1.0, None, 3], + 'B': date_range('2000', periods=3)}) + df2 = DataFrame({'A': [None, 2, 3]}) + expected = DataFrame({'A': [1.0, 2, 3], + 'B': date_range('2000', periods=3)}) + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 76eb61bd81110..499751e864331 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -13,18 +13,16 @@ import numpy.ma as ma import numpy.ma.mrecords as mrecords -from pandas.types.common import is_integer_dtype +from pandas.core.dtypes.common import is_integer_dtype from pandas.compat import (lmap, long, zip, range, lrange, lzip, - OrderedDict, is_platform_little_endian) + OrderedDict, is_platform_little_endian, PY36) from pandas import compat -from pandas import (DataFrame, Index, Series, isnull, +from pandas import (DataFrame, Index, Series, isna, MultiIndex, Timedelta, Timestamp, - date_range) -from pandas.core.common import PandasError + date_range, Categorical) import pandas as pd -import pandas.core.common as com -import pandas.lib as lib import pandas.util.testing as tm +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.tests.frame.common import TestData @@ -34,14 +32,14 @@ 'int32', 'int64'] -class TestDataFrameConstructors(tm.TestCase, TestData): +class TestDataFrameConstructors(TestData): def test_constructor(self): df = DataFrame() - self.assertEqual(len(df.index), 0) + assert len(df.index) == 0 df = DataFrame(data={}) - self.assertEqual(len(df.index), 0) + assert len(df.index) == 0 def test_constructor_mixed(self): index, data = tm.getMixedTypeDict() @@ -50,11 +48,11 @@ def test_constructor_mixed(self): indexed_frame = DataFrame(data, index=index) # noqa unindexed_frame = DataFrame(data) # noqa - self.assertEqual(self.mixed_frame['foo'].dtype, np.object_) + assert self.mixed_frame['foo'].dtype == np.object_ def test_constructor_cast_failure(self): foo = DataFrame({'a': ['a', 'b', 'c']}, dtype=np.float64) - self.assertEqual(foo['a'].dtype, object) + assert foo['a'].dtype == object # GH 3010, constructing with odd arrays df = DataFrame(np.ones((4, 2))) @@ -63,8 +61,8 @@ def test_constructor_cast_failure(self): df['foo'] = np.ones((4, 2)).tolist() # this is not ok - self.assertRaises(ValueError, df.__setitem__, tuple(['test']), - np.ones((4, 2))) + pytest.raises(ValueError, df.__setitem__, tuple(['test']), + np.ones((4, 2))) # this is ok df['foo2'] = np.ones((4, 2)).tolist() @@ -78,32 +76,31 @@ def test_constructor_dtype_copy(self): new_df = pd.DataFrame(orig_df, dtype=float, copy=True) new_df['col1'] = 200. - self.assertEqual(orig_df['col1'][0], 1.) + assert orig_df['col1'][0] == 1. def test_constructor_dtype_nocast_view(self): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype) should_be_view[0][0] = 99 - self.assertEqual(df.values[0, 0], 99) + assert df.values[0, 0] == 99 should_be_view = DataFrame(df.values, dtype=df[0].dtype) should_be_view[0][0] = 97 - self.assertEqual(df.values[0, 0], 97) + assert df.values[0, 0] == 97 def test_constructor_dtype_list_data(self): df = DataFrame([[1, '2'], [None, 'a']], dtype=object) - self.assertIsNone(df.loc[1, 0]) - self.assertEqual(df.loc[0, 1], '2') + assert df.loc[1, 0] is None + assert df.loc[0, 1] == '2' def test_constructor_list_frames(self): - - # GH 3243 + # see gh-3243 result = DataFrame([DataFrame([])]) - self.assertEqual(result.shape, (1, 0)) + assert result.shape == (1, 0) result = DataFrame([DataFrame(dict(A=lrange(5)))]) - tm.assertIsInstance(result.iloc[0, 0], DataFrame) + assert isinstance(result.iloc[0, 0], DataFrame) def test_constructor_mixed_dtypes(self): @@ -123,7 +120,7 @@ def _make_mixed_dtypes_df(typ, ad=None): assert(a.dtype == d) if ad is None: ad = dict() - ad.update(dict([(d, a) for d, a in zipper])) + ad.update({d: a for d, a in zipper}) return DataFrame(ad) def _check_mixed_dtypes(df, dtypes=None): @@ -151,8 +148,8 @@ def test_constructor_complex_dtypes(self): b = np.random.rand(10).astype(np.complex128) df = DataFrame({'a': a, 'b': b}) - self.assertEqual(a.dtype, df.a.dtype) - self.assertEqual(b.dtype, df.b.dtype) + assert a.dtype == df.a.dtype + assert b.dtype == df.b.dtype def test_constructor_rec(self): rec = self.frame.to_records(index=False) @@ -163,11 +160,11 @@ def test_constructor_rec(self): index = self.frame.index df = DataFrame(rec) - self.assert_index_equal(df.columns, pd.Index(rec.dtype.names)) + tm.assert_index_equal(df.columns, pd.Index(rec.dtype.names)) df2 = DataFrame(rec, index=index) - self.assert_index_equal(df2.columns, pd.Index(rec.dtype.names)) - self.assert_index_equal(df2.index, index) + tm.assert_index_equal(df2.columns, pd.Index(rec.dtype.names)) + tm.assert_index_equal(df2.index, index) rng = np.arange(len(rec))[::-1] df3 = DataFrame(rec, index=rng, columns=['C', 'B']) @@ -177,7 +174,7 @@ def test_constructor_rec(self): def test_constructor_bool(self): df = DataFrame({0: np.ones(10, dtype=bool), 1: np.zeros(10, dtype=bool)}) - self.assertEqual(df.values.dtype, np.bool_) + assert df.values.dtype == np.bool_ def test_constructor_overflow_int64(self): # see gh-14881 @@ -185,7 +182,7 @@ def test_constructor_overflow_int64(self): dtype=np.uint64) result = DataFrame({'a': values}) - self.assertEqual(result['a'].dtype, np.uint64) + assert result['a'].dtype == np.uint64 # see gh-2355 data_scores = [(6311132704823138710, 273), (2685045978526272070, 23), @@ -196,7 +193,19 @@ def test_constructor_overflow_int64(self): data = np.zeros((len(data_scores),), dtype=dtype) data[:] = data_scores df_crawls = DataFrame(data) - self.assertEqual(df_crawls['uid'].dtype, np.uint64) + assert df_crawls['uid'].dtype == np.uint64 + + @pytest.mark.parametrize("values", [np.array([2**64], dtype=object), + np.array([2**65]), [2**64 + 1], + np.array([-2**63 - 4], dtype=object), + np.array([-2**64 - 1]), [-2**65 - 2]]) + def test_constructor_int_overflow(self, values): + # see gh-18584 + value = values[0] + result = DataFrame(values) + + assert result[0].dtype == object + assert result[0][0] == value def test_constructor_ordereddict(self): import random @@ -205,15 +214,15 @@ def test_constructor_ordereddict(self): random.shuffle(nums) expected = ['A%d' % i for i in nums] df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) - self.assertEqual(expected, list(df.columns)) + assert expected == list(df.columns) def test_constructor_dict(self): frame = DataFrame({'col1': self.ts1, 'col2': self.ts2}) # col2 is padded with NaN - self.assertEqual(len(self.ts1), 30) - self.assertEqual(len(self.ts2), 25) + assert len(self.ts1) == 30 + assert len(self.ts2) == 25 tm.assert_series_equal(self.ts1, frame['col1'], check_names=False) @@ -225,104 +234,130 @@ def test_constructor_dict(self): 'col2': self.ts2}, columns=['col2', 'col3', 'col4']) - self.assertEqual(len(frame), len(self.ts2)) - self.assertNotIn('col1', frame) - self.assertTrue(isnull(frame['col3']).all()) + assert len(frame) == len(self.ts2) + assert 'col1' not in frame + assert isna(frame['col3']).all() # Corner cases - self.assertEqual(len(DataFrame({})), 0) + assert len(DataFrame({})) == 0 # mix dict and array, wrong size - no spec for which error should raise # first - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) # Length-one dict micro-optimization frame = DataFrame({'A': {'1': 1, '2': 2}}) - self.assert_index_equal(frame.index, pd.Index(['1', '2'])) + tm.assert_index_equal(frame.index, pd.Index(['1', '2'])) # empty dict plus index idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx) - self.assertIs(frame.index, idx) + assert frame.index is idx # empty with index and columns idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx, columns=idx) - self.assertIs(frame.index, idx) - self.assertIs(frame.columns, idx) - self.assertEqual(len(frame._series), 3) + assert frame.index is idx + assert frame.columns is idx + assert len(frame._series) == 3 # with dict of empty list and Series frame = DataFrame({'A': [], 'B': []}, columns=['A', 'B']) - self.assert_index_equal(frame.index, Index([], dtype=np.int64)) + tm.assert_index_equal(frame.index, Index([], dtype=np.int64)) # GH 14381 # Dict with None value frame_none = DataFrame(dict(a=None), index=[0]) frame_none_list = DataFrame(dict(a=[None]), index=[0]) - tm.assert_equal(frame_none.get_value(0, 'a'), None) - tm.assert_equal(frame_none_list.get_value(0, 'a'), None) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert frame_none.get_value(0, 'a') is None + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert frame_none_list.get_value(0, 'a') is None tm.assert_frame_equal(frame_none, frame_none_list) # GH10856 # dict with scalar values should raise error, even if columns passed - with tm.assertRaises(ValueError): + msg = 'If using all scalar values, you must pass an index' + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}) - with tm.assertRaises(ValueError): + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}, columns=['a']) - with tm.assertRaises(ValueError): + with tm.assert_raises_regex(ValueError, msg): DataFrame({'a': 0.7}, columns=['b']) + @pytest.mark.skipif(not PY36, reason='Insertion order for Python>=3.6') + def test_constructor_dict_order_insertion(self): + # GH19018 + # initialization ordering: by insertion order if python>= 3.6 + d = {'b': self.ts2, 'a': self.ts1} + frame = DataFrame(data=d) + expected = DataFrame(data=d, columns=list('ba')) + tm.assert_frame_equal(frame, expected) + + @pytest.mark.skipif(PY36, reason='order by value for Python<3.6') + def test_constructor_dict_order_by_values(self): + # GH19018 + # initialization ordering: by value if python<3.6 + d = {'b': self.ts2, 'a': self.ts1} + frame = DataFrame(data=d) + expected = DataFrame(data=d, columns=list('ab')) + tm.assert_frame_equal(frame, expected) + def test_constructor_multi_index(self): # GH 4078 # construction error with mi and all-nan frame tuples = [(2, 3), (3, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - self.assertTrue(pd.isnull(df).values.ravel().all()) + assert pd.isna(df).values.ravel().all() tuples = [(3, 3), (2, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - self.assertTrue(pd.isnull(df).values.ravel().all()) + assert pd.isna(df).values.ravel().all() def test_constructor_error_msgs(self): msg = "Empty data passed with indices specified." # passing an empty array with columns specified. - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame(np.empty(0), columns=list('abc')) msg = "Mixing dicts with non-Series may lead to ambiguous ordering." # mix dict and array, wrong size - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) # wrong size ndarray, GH 3105 msg = r"Shape of passed values is \(3, 4\), indices imply \(3, 3\)" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], index=pd.date_range('2000-01-01', periods=3)) # higher dim raise exception - with tm.assertRaisesRegexp(ValueError, 'Must pass 2-d input'): + with tm.assert_raises_regex(ValueError, 'Must pass 2-d input'): DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) # wrong size axis labels - with tm.assertRaisesRegexp(ValueError, "Shape of passed values is " - r"\(3, 2\), indices imply \(3, 1\)"): + with tm.assert_raises_regex(ValueError, "Shape of passed values " + r"is \(3, 2\), indices " + r"imply \(3, 1\)"): DataFrame(np.random.rand(2, 3), columns=['A', 'B', 'C'], index=[1]) - with tm.assertRaisesRegexp(ValueError, "Shape of passed values is " - r"\(3, 2\), indices imply \(2, 2\)"): + with tm.assert_raises_regex(ValueError, "Shape of passed values " + r"is \(3, 2\), indices " + r"imply \(2, 2\)"): DataFrame(np.random.rand(2, 3), columns=['A', 'B'], index=[1, 2]) - with tm.assertRaisesRegexp(ValueError, 'If using all scalar values, ' - 'you must pass an index'): + with tm.assert_raises_regex(ValueError, "If using all scalar " + "values, you must pass " + "an index"): DataFrame({'a': False, 'b': True}) def test_constructor_with_embedded_frames(self): @@ -345,8 +380,8 @@ def test_constructor_subclass_dict(self): data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in range(10)), 'col2': tm.TestSubDict((x, 20.0 * x) for x in range(10))} df = DataFrame(data) - refdf = DataFrame(dict((col, dict(compat.iteritems(val))) - for col, val in compat.iteritems(data))) + refdf = DataFrame({col: dict(compat.iteritems(val)) + for col, val in compat.iteritems(data)}) tm.assert_frame_equal(refdf, df) data = tm.TestSubDict(compat.iteritems(data)) @@ -377,14 +412,14 @@ def test_constructor_dict_cast(self): 'B': {'1': '1', '2': '2', '3': '3'}, } frame = DataFrame(test_data, dtype=float) - self.assertEqual(len(frame), 3) - self.assertEqual(frame['B'].dtype, np.float64) - self.assertEqual(frame['A'].dtype, np.float64) + assert len(frame) == 3 + assert frame['B'].dtype == np.float64 + assert frame['A'].dtype == np.float64 frame = DataFrame(test_data) - self.assertEqual(len(frame), 3) - self.assertEqual(frame['B'].dtype, np.object_) - self.assertEqual(frame['A'].dtype, np.float64) + assert len(frame) == 3 + assert frame['B'].dtype == np.object_ + assert frame['A'].dtype == np.float64 # can't cast to float test_data = { @@ -392,25 +427,24 @@ def test_constructor_dict_cast(self): 'B': dict(zip(range(15), randn(15))) } frame = DataFrame(test_data, dtype=float) - self.assertEqual(len(frame), 20) - self.assertEqual(frame['A'].dtype, np.object_) - self.assertEqual(frame['B'].dtype, np.float64) + assert len(frame) == 20 + assert frame['A'].dtype == np.object_ + assert frame['B'].dtype == np.float64 def test_constructor_dict_dont_upcast(self): d = {'Col1': {'Row1': 'A String', 'Row2': np.nan}} df = DataFrame(d) - tm.assertIsInstance(df['Col1']['Row2'], float) + assert isinstance(df['Col1']['Row2'], float) dm = DataFrame([[1, 2], ['a', 'b']], index=[1, 2], columns=[1, 2]) - tm.assertIsInstance(dm[1][1], int) + assert isinstance(dm[1][1], int) def test_constructor_dict_of_tuples(self): # GH #1491 data = {'a': (1, 2, 3), 'b': (4, 5, 6)} result = DataFrame(data) - expected = DataFrame(dict((k, list(v)) - for k, v in compat.iteritems(data))) + expected = DataFrame({k: list(v) for k, v in compat.iteritems(data)}) tm.assert_frame_equal(result, expected, check_dtype=False) def test_constructor_dict_multiindex(self): @@ -443,8 +477,8 @@ def test_constructor_dict_datetime64_index(self): dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] def create_data(constructor): - return dict((i, {constructor(s): 2 * i}) - for i, s in enumerate(dates_as_str)) + return {i: {constructor(s): 2 * i} + for i, s in enumerate(dates_as_str)} data_datetime64 = create_data(np.datetime64) data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d')) @@ -468,8 +502,8 @@ def test_constructor_dict_timedelta64_index(self): td_as_int = [1, 2, 3, 4] def create_data(constructor): - return dict((i, {constructor(s): 2 * i}) - for i, s in enumerate(td_as_int)) + return {i: {constructor(s): 2 * i} + for i, s in enumerate(td_as_int)} data_timedelta64 = create_data(lambda x: np.timedelta64(x, 'D')) data_timedelta = create_data(lambda x: timedelta(days=x)) @@ -493,14 +527,14 @@ def test_constructor_period(self): a = pd.PeriodIndex(['2012-01', 'NaT', '2012-04'], freq='M') b = pd.PeriodIndex(['2012-02-01', '2012-03-01', 'NaT'], freq='D') df = pd.DataFrame({'a': a, 'b': b}) - self.assertEqual(df['a'].dtype, 'object') - self.assertEqual(df['b'].dtype, 'object') + assert df['a'].dtype == 'object' + assert df['b'].dtype == 'object' # list of periods - df = pd.DataFrame({'a': a.asobject.tolist(), - 'b': b.asobject.tolist()}) - self.assertEqual(df['a'].dtype, 'object') - self.assertEqual(df['b'].dtype, 'object') + df = pd.DataFrame({'a': a.astype(object).tolist(), + 'b': b.astype(object).tolist()}) + assert df['a'].dtype == 'object' + assert df['b'].dtype == 'object' def test_nested_dict_frame_constructor(self): rng = pd.period_range('1/1/2000', periods=5) @@ -509,7 +543,9 @@ def test_nested_dict_frame_constructor(self): data = {} for col in df.columns: for row in df.index: - data.setdefault(col, {})[row] = df.get_value(row, col) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + data.setdefault(col, {})[row] = df.get_value(row, col) result = DataFrame(data, columns=rng) tm.assert_frame_equal(result, df) @@ -517,67 +553,69 @@ def test_nested_dict_frame_constructor(self): data = {} for col in df.columns: for row in df.index: - data.setdefault(row, {})[col] = df.get_value(row, col) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + data.setdefault(row, {})[col] = df.get_value(row, col) result = DataFrame(data, index=rng).T tm.assert_frame_equal(result, df) def _check_basic_constructor(self, empty): - # mat: 2d matrix with shpae (3, 2) to input. empty - makes sized + # mat: 2d matrix with shape (3, 2) to input. empty - makes sized # objects mat = empty((2, 3), dtype=float) # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) + assert len(frame.index) == 2 + assert len(frame.columns) == 3 # 1-D input frame = DataFrame(empty((3,)), columns=['A'], index=[1, 2, 3]) - self.assertEqual(len(frame.index), 3) - self.assertEqual(len(frame.columns), 1) + assert len(frame.index) == 3 + assert len(frame.columns) == 1 # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2], dtype=np.int64) - self.assertEqual(frame.values.dtype, np.int64) + assert frame.values.dtype == np.int64 # wrong size axis labels msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame(mat, columns=['A', 'B', 'C'], index=[1]) msg = r'Shape of passed values is \(3, 2\), indices imply \(2, 2\)' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame(mat, columns=['A', 'B'], index=[1, 2]) # higher dim raise exception - with tm.assertRaisesRegexp(ValueError, 'Must pass 2-d input'): + with tm.assert_raises_regex(ValueError, 'Must pass 2-d input'): DataFrame(empty((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) # automatic labeling frame = DataFrame(mat) - self.assert_index_equal(frame.index, pd.Index(lrange(2))) - self.assert_index_equal(frame.columns, pd.Index(lrange(3))) + tm.assert_index_equal(frame.index, pd.Index(lrange(2))) + tm.assert_index_equal(frame.columns, pd.Index(lrange(3))) frame = DataFrame(mat, index=[1, 2]) - self.assert_index_equal(frame.columns, pd.Index(lrange(3))) + tm.assert_index_equal(frame.columns, pd.Index(lrange(3))) frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_index_equal(frame.index, pd.Index(lrange(2))) + tm.assert_index_equal(frame.index, pd.Index(lrange(2))) # 0-length axis frame = DataFrame(empty((0, 3))) - self.assertEqual(len(frame.index), 0) + assert len(frame.index) == 0 frame = DataFrame(empty((3, 0))) - self.assertEqual(len(frame.columns), 0) + assert len(frame.columns) == 0 def test_constructor_ndarray(self): self._check_basic_constructor(np.ones) frame = DataFrame(['foo', 'bar'], index=[0, 1], columns=['A']) - self.assertEqual(len(frame), 2) + assert len(frame) == 2 def test_constructor_maskedarray(self): self._check_basic_constructor(ma.masked_all) @@ -587,13 +625,13 @@ def test_constructor_maskedarray(self): mat[0, 0] = 1.0 mat[1, 2] = 2.0 frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(1.0, frame['A'][1]) - self.assertEqual(2.0, frame['C'][2]) + assert 1.0 == frame['A'][1] + assert 2.0 == frame['C'][2] # what is this even checking?? mat = ma.masked_all((2, 3), dtype=float) frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertTrue(np.all(~np.asarray(frame == frame))) + assert np.all(~np.asarray(frame == frame)) def test_constructor_maskedarray_nonfloat(self): # masked int promoted to float @@ -601,66 +639,66 @@ def test_constructor_maskedarray_nonfloat(self): # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) - self.assertTrue(np.all(~np.asarray(frame == frame))) + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert np.all(~np.asarray(frame == frame)) # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2], dtype=np.float64) - self.assertEqual(frame.values.dtype, np.float64) + assert frame.values.dtype == np.float64 # Check non-masked values mat2 = ma.copy(mat) mat2[0, 0] = 1 mat2[1, 2] = 2 frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(1, frame['A'][1]) - self.assertEqual(2, frame['C'][2]) + assert 1 == frame['A'][1] + assert 2 == frame['C'][2] # masked np.datetime64 stays (use lib.NaT as null) mat = ma.masked_all((2, 3), dtype='M8[ns]') # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) - self.assertTrue(isnull(frame).values.all()) + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert isna(frame).values.all() # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2], dtype=np.int64) - self.assertEqual(frame.values.dtype, np.int64) + assert frame.values.dtype == np.int64 # Check non-masked values mat2 = ma.copy(mat) mat2[0, 0] = 1 mat2[1, 2] = 2 frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(1, frame['A'].view('i8')[1]) - self.assertEqual(2, frame['C'].view('i8')[2]) + assert 1 == frame['A'].view('i8')[1] + assert 2 == frame['C'].view('i8')[2] # masked bool promoted to object mat = ma.masked_all((2, 3), dtype=bool) # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) - self.assertTrue(np.all(~np.asarray(frame == frame))) + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert np.all(~np.asarray(frame == frame)) # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2], dtype=object) - self.assertEqual(frame.values.dtype, object) + assert frame.values.dtype == object # Check non-masked values mat2 = ma.copy(mat) mat2[0, 0] = True mat2[1, 2] = False frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) - self.assertEqual(True, frame['A'][1]) - self.assertEqual(False, frame['C'][2]) + assert frame['A'][1] is True + assert frame['C'][2] is False def test_constructor_mrecarray(self): # Ensure mrecarray produces frame identical to dict of masked arrays @@ -688,8 +726,8 @@ def test_constructor_mrecarray(self): mrecs = mrecords.fromarrays(data, names=names) # fill the comb - comb = dict([(k, v.filled()) if hasattr( - v, 'filled') else (k, v) for k, v in comb]) + comb = {k: (v.filled() if hasattr(v, 'filled') else v) + for k, v in comb} expected = DataFrame(comb, columns=names) result = DataFrame(mrecs) @@ -707,41 +745,41 @@ def test_constructor_mrecarray(self): def test_constructor_corner(self): df = DataFrame(index=[]) - self.assertEqual(df.values.shape, (0, 0)) + assert df.values.shape == (0, 0) # empty but with specified dtype df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=object) - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ # does not error but ends up float df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=int) - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ # #1783 empty dtype object df = DataFrame({}, columns=['foo', 'bar']) - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ df = DataFrame({'b': 1}, index=lrange(10), columns=list('abc'), dtype=int) - self.assertEqual(df.values.dtype, np.object_) + assert df.values.dtype == np.object_ def test_constructor_scalar_inference(self): data = {'int': 1, 'bool': True, 'float': 3., 'complex': 4j, 'object': 'foo'} df = DataFrame(data, index=np.arange(10)) - self.assertEqual(df['int'].dtype, np.int64) - self.assertEqual(df['bool'].dtype, np.bool_) - self.assertEqual(df['float'].dtype, np.float64) - self.assertEqual(df['complex'].dtype, np.complex128) - self.assertEqual(df['object'].dtype, np.object_) + assert df['int'].dtype == np.int64 + assert df['bool'].dtype == np.bool_ + assert df['float'].dtype == np.float64 + assert df['complex'].dtype == np.complex128 + assert df['object'].dtype == np.object_ def test_constructor_arrays_and_scalars(self): df = DataFrame({'a': randn(10), 'b': True}) exp = DataFrame({'a': df['a'].values, 'b': [True] * 10}) tm.assert_frame_equal(df, exp) - with tm.assertRaisesRegexp(ValueError, 'must pass an index'): + with tm.assert_raises_regex(ValueError, 'must pass an index'): DataFrame({'a': False, 'b': True}) def test_constructor_DataFrame(self): @@ -749,38 +787,38 @@ def test_constructor_DataFrame(self): tm.assert_frame_equal(df, self.frame) df_casted = DataFrame(self.frame, dtype=np.int64) - self.assertEqual(df_casted.values.dtype, np.int64) + assert df_casted.values.dtype == np.int64 def test_constructor_more(self): # used to be in test_matrix.py arr = randn(10) dm = DataFrame(arr, columns=['A'], index=np.arange(10)) - self.assertEqual(dm.values.ndim, 2) + assert dm.values.ndim == 2 arr = randn(0) dm = DataFrame(arr) - self.assertEqual(dm.values.ndim, 2) - self.assertEqual(dm.values.ndim, 2) + assert dm.values.ndim == 2 + assert dm.values.ndim == 2 # no data specified dm = DataFrame(columns=['A', 'B'], index=np.arange(10)) - self.assertEqual(dm.values.shape, (10, 2)) + assert dm.values.shape == (10, 2) dm = DataFrame(columns=['A', 'B']) - self.assertEqual(dm.values.shape, (0, 2)) + assert dm.values.shape == (0, 2) dm = DataFrame(index=np.arange(10)) - self.assertEqual(dm.values.shape, (10, 0)) + assert dm.values.shape == (10, 0) # corner, silly # TODO: Fix this Exception to be better... - with tm.assertRaisesRegexp(PandasError, 'constructor not ' - 'properly called'): + with tm.assert_raises_regex(ValueError, 'constructor not ' + 'properly called'): DataFrame((1, 2, 3)) # can't cast mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1) - with tm.assertRaisesRegexp(ValueError, 'cast'): + with tm.assert_raises_regex(ValueError, 'cast'): DataFrame(mat, index=[0, 1], columns=[0], dtype=float) dm = DataFrame(DataFrame(self.frame._series)) @@ -791,8 +829,8 @@ def test_constructor_more(self): 'B': np.ones(10, dtype=np.float64)}, index=np.arange(10)) - self.assertEqual(len(dm.columns), 2) - self.assertEqual(dm.values.dtype, np.float64) + assert len(dm.columns) == 2 + assert dm.values.dtype == np.float64 def test_constructor_empty_list(self): df = DataFrame([], index=[]) @@ -816,12 +854,12 @@ def test_constructor_list_of_lists(self): # GH #484 l = [[1, 'a'], [2, 'b']] df = DataFrame(data=l, columns=["num", "str"]) - self.assertTrue(is_integer_dtype(df['num'])) - self.assertEqual(df['str'].dtype, np.object_) + assert is_integer_dtype(df['num']) + assert df['str'].dtype == np.object_ # GH 4851 # list of 0-dim ndarrays - expected = DataFrame({0: range(10)}) + expected = DataFrame({0: np.arange(10)}) data = [np.array(x) for x in range(10)] result = DataFrame(data) tm.assert_frame_equal(result, expected) @@ -851,7 +889,7 @@ def __len__(self, n): # GH 4297 # support Array import array - result = DataFrame.from_items([('A', array.array('i', range(10)))]) + result = DataFrame({'A': array.array('i', range(10))}) expected = DataFrame({'A': list(range(10))}) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -1006,8 +1044,8 @@ class CustomDict(dict): def test_constructor_ragged(self): data = {'A': randn(10), 'B': randn(8)} - with tm.assertRaisesRegexp(ValueError, - 'arrays must all be same length'): + with tm.assert_raises_regex(ValueError, + 'arrays must all be same length'): DataFrame(data) def test_constructor_scalar(self): @@ -1026,10 +1064,10 @@ def test_constructor_mixed_dict_and_Series(self): data['B'] = Series([4, 3, 2, 1], index=['bar', 'qux', 'baz', 'foo']) result = DataFrame(data) - self.assertTrue(result.index.is_monotonic) + assert result.index.is_monotonic # ordering ambiguous, raise exception - with tm.assertRaisesRegexp(ValueError, 'ambiguous ordering'): + with tm.assert_raises_regex(ValueError, 'ambiguous ordering'): DataFrame({'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) # this is OK though @@ -1071,11 +1109,30 @@ def test_constructor_orient(self): xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) tm.assert_frame_equal(rs, xp) + def test_from_dict_columns_parameter(self): + # GH 18529 + # Test new columns parameter for from_dict that was added to make + # from_items(..., orient='index', columns=[...]) easier to replicate + result = DataFrame.from_dict(OrderedDict([('A', [1, 2]), + ('B', [4, 5])]), + orient='index', columns=['one', 'two']) + expected = DataFrame([[1, 2], [4, 5]], index=['A', 'B'], + columns=['one', 'two']) + tm.assert_frame_equal(result, expected) + + msg = "cannot use columns parameter with orient='columns'" + with tm.assert_raises_regex(ValueError, msg): + DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), + orient='columns', columns=['one', 'two']) + with tm.assert_raises_regex(ValueError, msg): + DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), + columns=['one', 'two']) + def test_constructor_Series_named(self): a = Series([1, 2, 3], index=['a', 'b', 'c'], name='x') df = DataFrame(a) - self.assertEqual(df.columns[0], 'x') - self.assert_index_equal(df.index, a.index) + assert df.columns[0] == 'x' + tm.assert_index_equal(df.index, a.index) # ndarray like arr = np.random.randn(10) @@ -1089,12 +1146,12 @@ def test_constructor_Series_named(self): expected = DataFrame({0: s}) tm.assert_frame_equal(df, expected) - self.assertRaises(ValueError, DataFrame, s, columns=[1, 2]) + pytest.raises(ValueError, DataFrame, s, columns=[1, 2]) # #2234 a = Series([], name='x') df = DataFrame(a) - self.assertEqual(df.columns[0], 'x') + assert df.columns[0] == 'x' # series with name and w/o s1 = Series(arr, name='x') @@ -1108,6 +1165,22 @@ def test_constructor_Series_named(self): expected = DataFrame({1: s1, 0: arr}, columns=[0, 1]) tm.assert_frame_equal(df, expected) + def test_constructor_Series_named_and_columns(self): + # GH 9232 validation + + s0 = Series(range(5), name=0) + s1 = Series(range(5), name=1) + + # matching name and column gives standard frame + tm.assert_frame_equal(pd.DataFrame(s0, columns=[0]), + s0.to_frame()) + tm.assert_frame_equal(pd.DataFrame(s1, columns=[1]), + s1.to_frame()) + + # non-matching produces empty frame + assert pd.DataFrame(s0, columns=[1]).empty + assert pd.DataFrame(s1, columns=[0]).empty + def test_constructor_Series_differently_indexed(self): # name s1 = Series([1, 2, 3], index=['a', 'b', 'c'], name='x') @@ -1119,13 +1192,13 @@ def test_constructor_Series_differently_indexed(self): df1 = DataFrame(s1, index=other_index) exp1 = DataFrame(s1.reindex(other_index)) - self.assertEqual(df1.columns[0], 'x') + assert df1.columns[0] == 'x' tm.assert_frame_equal(df1, exp1) df2 = DataFrame(s2, index=other_index) exp2 = DataFrame(s2.reindex(other_index)) - self.assertEqual(df2.columns[0], 0) - self.assert_index_equal(df2.index, other_index) + assert df2.columns[0] == 0 + tm.assert_index_equal(df2.index, other_index) tm.assert_frame_equal(df2, exp2) def test_constructor_manager_resize(self): @@ -1134,58 +1207,100 @@ def test_constructor_manager_resize(self): result = DataFrame(self.frame._data, index=index, columns=columns) - self.assert_index_equal(result.index, Index(index)) - self.assert_index_equal(result.columns, Index(columns)) + tm.assert_index_equal(result.index, Index(index)) + tm.assert_index_equal(result.columns, Index(columns)) def test_constructor_from_items(self): items = [(c, self.frame[c]) for c in self.frame.columns] - recons = DataFrame.from_items(items) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(items) tm.assert_frame_equal(recons, self.frame) # pass some columns - recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) tm.assert_frame_equal(recons, self.frame.loc[:, ['C', 'B', 'A']]) # orient='index' row_items = [(idx, self.mixed_frame.xs(idx)) for idx in self.mixed_frame.index] - - recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, - orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(row_items, + columns=self.mixed_frame.columns, + orient='index') tm.assert_frame_equal(recons, self.mixed_frame) - self.assertEqual(recons['A'].dtype, np.float64) + assert recons['A'].dtype == np.float64 - with tm.assertRaisesRegexp(TypeError, - "Must pass columns with orient='index'"): - DataFrame.from_items(row_items, orient='index') + with tm.assert_raises_regex(TypeError, + "Must pass columns with " + "orient='index'"): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples - arr = lib.list_to_object_array( + arr = construct_1d_object_array_from_listlike( [('bar', 'baz')] * len(self.mixed_frame)) self.mixed_frame['foo'] = arr row_items = [(idx, list(self.mixed_frame.xs(idx))) for idx in self.mixed_frame.index] - recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, - orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(row_items, + columns=self.mixed_frame.columns, + orient='index') tm.assert_frame_equal(recons, self.mixed_frame) - tm.assertIsInstance(recons['foo'][0], tuple) + assert isinstance(recons['foo'][0], tuple) - rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], - orient='index', - columns=['one', 'two', 'three']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], + orient='index', + columns=['one', 'two', 'three']) xp = DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', 'B'], columns=['one', 'two', 'three']) tm.assert_frame_equal(rs, xp) + def test_constructor_from_items_scalars(self): + # GH 17312 + with tm.assert_raises_regex(ValueError, + r'The value in each \(key, value\) ' + 'pair must be an array, Series, or dict'): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', 1), ('B', 4)]) + + with tm.assert_raises_regex(ValueError, + r'The value in each \(key, value\) ' + 'pair must be an array, Series, or dict'): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + orient='index') + + def test_from_items_deprecation(self): + # GH 17320 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])]) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], + columns=['col1', 'col2', 'col3'], + orient='index') + def test_constructor_mix_series_nonseries(self): df = DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])}, columns=['A', 'B']) tm.assert_frame_equal(df, self.frame.loc[:, ['A', 'B']]) - with tm.assertRaisesRegexp(ValueError, 'does not match index length'): + with tm.assert_raises_regex(ValueError, 'does not match ' + 'index length'): DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])[:-2]}) def test_constructor_miscast_na_int_dtype(self): @@ -1194,8 +1309,8 @@ def test_constructor_miscast_na_int_dtype(self): tm.assert_frame_equal(df, expected) def test_constructor_iterator_failure(self): - with tm.assertRaisesRegexp(TypeError, 'iterator'): - df = DataFrame(iter([1, 2, 3])) # noqa + with tm.assert_raises_regex(TypeError, 'iterator'): + DataFrame(iter([1, 2, 3])) def test_constructor_column_duplicates(self): # it works! #2079 @@ -1205,13 +1320,13 @@ def test_constructor_column_duplicates(self): tm.assert_frame_equal(df, edf) - idf = DataFrame.from_items( - [('a', [8]), ('a', [5])], columns=['a', 'a']) + idf = DataFrame.from_records([(8, 5)], + columns=['a', 'a']) + tm.assert_frame_equal(idf, edf) - self.assertRaises(ValueError, DataFrame.from_items, - [('a', [8]), ('a', [5]), ('b', [6])], - columns=['b', 'a', 'a']) + pytest.raises(ValueError, DataFrame.from_dict, + OrderedDict([('b', 8), ('a', 5), ('a', 6)])) def test_constructor_empty_with_string_dtype(self): # GH 9428 @@ -1242,9 +1357,10 @@ def test_constructor_single_value(self): dtype=object), index=[1, 2], columns=['a', 'c'])) - self.assertRaises(com.PandasError, DataFrame, 'a', [1, 2]) - self.assertRaises(com.PandasError, DataFrame, 'a', columns=['a', 'c']) - with tm.assertRaisesRegexp(TypeError, 'incompatible data and dtype'): + pytest.raises(ValueError, DataFrame, 'a', [1, 2]) + pytest.raises(ValueError, DataFrame, 'a', columns=['a', 'c']) + with tm.assert_raises_regex(TypeError, 'incompatible data ' + 'and dtype'): DataFrame('a', [1, 2], ['a', 'c'], float) def test_constructor_with_datetimes(self): @@ -1283,9 +1399,8 @@ def test_constructor_with_datetimes(self): expected['float64'] = 1 expected[floatname] = 1 - result.sort_index() - expected = Series(expected) - expected.sort_index() + result = result.sort_index() + expected = Series(expected).sort_index() tm.assert_series_equal(result, expected) # check with ndarray construction ndim>0 @@ -1294,19 +1409,19 @@ def test_constructor_with_datetimes(self): intname: np.array([1] * 10, dtype=intname)}, index=np.arange(10)) result = df.get_dtype_counts() - result.sort_index() + result = result.sort_index() tm.assert_series_equal(result, expected) # GH 2809 ind = date_range(start="2000-01-01", freq="D", periods=10) datetimes = [ts.to_pydatetime() for ts in ind] datetime_s = Series(datetimes) - self.assertEqual(datetime_s.dtype, 'M8[ns]') + assert datetime_s.dtype == 'M8[ns]' df = DataFrame({'datetime_s': datetime_s}) result = df.get_dtype_counts() expected = Series({datetime64name: 1}) - result.sort_index() - expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) # GH 2810 @@ -1316,8 +1431,8 @@ def test_constructor_with_datetimes(self): df = DataFrame({'datetimes': datetimes, 'dates': dates}) result = df.get_dtype_counts() expected = Series({datetime64name: 1, objectname: 1}) - result.sort_index() - expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) # GH 7594 @@ -1327,12 +1442,12 @@ def test_constructor_with_datetimes(self): dt = tz.localize(datetime(2012, 1, 1)) df = DataFrame({'End Date': dt}, index=[0]) - self.assertEqual(df.iat[0, 0], dt) + assert df.iat[0, 0] == dt tm.assert_series_equal(df.dtypes, Series( {'End Date': 'datetime64[ns, US/Eastern]'})) df = DataFrame([{'End Date': dt}]) - self.assertEqual(df.iat[0, 0], dt) + assert df.iat[0, 0] == dt tm.assert_series_equal(df.dtypes, Series( {'End Date': 'datetime64[ns, US/Eastern]'})) @@ -1340,13 +1455,13 @@ def test_constructor_with_datetimes(self): # GH 8411 dr = date_range('20130101', periods=3) df = DataFrame({'value': dr}) - self.assertTrue(df.iat[0, 0].tz is None) + assert df.iat[0, 0].tz is None dr = date_range('20130101', periods=3, tz='UTC') df = DataFrame({'value': dr}) - self.assertTrue(str(df.iat[0, 0].tz) == 'UTC') + assert str(df.iat[0, 0].tz) == 'UTC' dr = date_range('20130101', periods=3, tz='US/Eastern') df = DataFrame({'value': dr}) - self.assertTrue(str(df.iat[0, 0].tz) == 'US/Eastern') + assert str(df.iat[0, 0].tz) == 'US/Eastern' # GH 7822 # preserver an index with a tz on dict construction @@ -1368,6 +1483,15 @@ def test_constructor_with_datetimes(self): .reset_index(drop=True), 'b': i_no_tz}) tm.assert_frame_equal(df, expected) + def test_constructor_datetimes_with_nulls(self): + # gh-15869 + for arr in [np.array([None, None, None, None, + datetime.now(), None]), + np.array([None, None, datetime.now(), None])]: + result = DataFrame(arr).get_dtype_counts() + expected = Series({'datetime64[ns]': 1}) + tm.assert_series_equal(result, expected) + def test_constructor_for_list_with_dtypes(self): # TODO(wesm): unused intname = np.dtype(np.int_).name # noqa @@ -1431,25 +1555,25 @@ def test_constructor_for_list_with_dtypes(self): result = df.get_dtype_counts() expected = Series( {'int64': 1, 'float64': 2, datetime64name: 1, objectname: 1}) - result.sort_index() - expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) def test_constructor_frame_copy(self): cop = DataFrame(self.frame, copy=True) cop['A'] = 5 - self.assertTrue((cop['A'] == 5).all()) - self.assertFalse((self.frame['A'] == 5).all()) + assert (cop['A'] == 5).all() + assert not (self.frame['A'] == 5).all() def test_constructor_ndarray_copy(self): df = DataFrame(self.frame.values) self.frame.values[5] = 5 - self.assertTrue((df.values[5] == 5).all()) + assert (df.values[5] == 5).all() df = DataFrame(self.frame.values, copy=True) self.frame.values[6] = 6 - self.assertFalse((df.values[6] == 6).all()) + assert not (df.values[6] == 6).all() def test_constructor_series_copy(self): series = self.frame._series @@ -1457,7 +1581,7 @@ def test_constructor_series_copy(self): df = DataFrame({'A': series['A']}) df['A'][:] = 5 - self.assertFalse((series['A'] == 5).all()) + assert not (series['A'] == 5).all() def test_constructor_with_nas(self): # GH 5016 @@ -1468,7 +1592,7 @@ def check(df): df.iloc[:, i] # allow single nans to succeed - indexer = np.arange(len(df.columns))[isnull(df.columns)] + indexer = np.arange(len(df.columns))[isna(df.columns)] if len(indexer) == 1: tm.assert_series_equal(df.iloc[:, indexer[0]], @@ -1479,7 +1603,7 @@ def check(df): def f(): df.loc[:, np.nan] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]) check(df) @@ -1498,8 +1622,81 @@ def f(): def test_constructor_lists_to_object_dtype(self): # from #1074 d = DataFrame({'a': [np.nan, False]}) - self.assertEqual(d['a'].dtype, np.object_) - self.assertFalse(d['a'][1]) + assert d['a'].dtype == np.object_ + assert not d['a'][1] + + def test_constructor_categorical(self): + + # GH8626 + + # dict creation + df = DataFrame({'A': list('abc')}, dtype='category') + expected = Series(list('abc'), dtype='category', name='A') + tm.assert_series_equal(df['A'], expected) + + # to_frame + s = Series(list('abc'), dtype='category') + result = s.to_frame() + expected = Series(list('abc'), dtype='category', name=0) + tm.assert_series_equal(result[0], expected) + result = s.to_frame(name='foo') + expected = Series(list('abc'), dtype='category', name='foo') + tm.assert_series_equal(result['foo'], expected) + + # list-like creation + df = DataFrame(list('abc'), dtype='category') + expected = Series(list('abc'), dtype='category', name=0) + tm.assert_series_equal(df[0], expected) + + # ndim != 1 + df = DataFrame([Categorical(list('abc'))]) + expected = DataFrame({0: Series(list('abc'), dtype='category')}) + tm.assert_frame_equal(df, expected) + + df = DataFrame([Categorical(list('abc')), Categorical(list('abd'))]) + expected = DataFrame({0: Series(list('abc'), dtype='category'), + 1: Series(list('abd'), dtype='category')}, + columns=[0, 1]) + tm.assert_frame_equal(df, expected) + + # mixed + df = DataFrame([Categorical(list('abc')), list('def')]) + expected = DataFrame({0: Series(list('abc'), dtype='category'), + 1: list('def')}, columns=[0, 1]) + tm.assert_frame_equal(df, expected) + + # invalid (shape) + pytest.raises(ValueError, + lambda: DataFrame([Categorical(list('abc')), + Categorical(list('abdefg'))])) + + # ndim > 1 + pytest.raises(NotImplementedError, + lambda: Categorical(np.array([list('abcd')]))) + + def test_constructor_categorical_series(self): + + l = [1, 2, 3, 1] + exp = Series(l).astype('category') + res = Series(l, dtype='category') + tm.assert_series_equal(res, exp) + + l = ["a", "b", "c", "a"] + exp = Series(l).astype('category') + res = Series(l, dtype='category') + tm.assert_series_equal(res, exp) + + # insert into frame with different index + # GH 8076 + index = date_range('20000101', periods=3) + expected = Series(Categorical(values=[np.nan, np.nan, np.nan], + categories=['a', 'b', 'c'])) + expected.index = index + + expected = DataFrame({'x': expected}) + df = DataFrame( + {'x': Series(['a', 'b', 'c'], dtype='category')}, index=index) + tm.assert_frame_equal(df, expected) def test_from_records_to_records(self): # from numpy documentation @@ -1511,7 +1708,7 @@ def test_from_records_to_records(self): index = pd.Index(np.arange(len(arr))[::-1]) indexed_frame = DataFrame.from_records(arr, index=index) - self.assert_index_equal(indexed_frame.index, index) + tm.assert_index_equal(indexed_frame.index, index) # without names, it should go to last ditch arr2 = np.zeros((2, 3)) @@ -1519,18 +1716,18 @@ def test_from_records_to_records(self): # wrong length msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): DataFrame.from_records(arr, index=index[:-1]) indexed_frame = DataFrame.from_records(arr, index='f1') # what to do? records = indexed_frame.to_records() - self.assertEqual(len(records.dtype.names), 3) + assert len(records.dtype.names) == 3 records = indexed_frame.to_records(index=False) - self.assertEqual(len(records.dtype.names), 2) - self.assertNotIn('index', records.dtype.names) + assert len(records.dtype.names) == 2 + assert 'index' not in records.dtype.names def test_from_records_nones(self): tuples = [(1, 2, None, 3), @@ -1538,7 +1735,7 @@ def test_from_records_nones(self): (None, 2, 5, 3)] df = DataFrame.from_records(tuples, columns=['a', 'b', 'c', 'd']) - self.assertTrue(np.isnan(df['c'][0])) + assert np.isnan(df['c'][0]) def test_from_records_iterator(self): arr = np.array([(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5., 5., 6, 6), @@ -1603,7 +1800,7 @@ def test_from_records_columns_not_modified(self): df = DataFrame.from_records(tuples, columns=columns, index='a') # noqa - self.assertEqual(columns, original_columns) + assert columns == original_columns def test_from_records_decimal(self): from decimal import Decimal @@ -1611,11 +1808,11 @@ def test_from_records_decimal(self): tuples = [(Decimal('1.5'),), (Decimal('2.5'),), (None,)] df = DataFrame.from_records(tuples, columns=['a']) - self.assertEqual(df['a'].dtype, object) + assert df['a'].dtype == object df = DataFrame.from_records(tuples, columns=['a'], coerce_float=True) - self.assertEqual(df['a'].dtype, np.float64) - self.assertTrue(np.isnan(df['a'].values[-1])) + assert df['a'].dtype == np.float64 + assert np.isnan(df['a'].values[-1]) def test_from_records_duplicates(self): result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], @@ -1635,12 +1832,12 @@ def create_dict(order_id): documents.append({'order_id': 10, 'quantity': 5}) result = DataFrame.from_records(documents, index='order_id') - self.assertEqual(result.index.name, 'order_id') + assert result.index.name == 'order_id' # MultiIndex result = DataFrame.from_records(documents, index=['order_id', 'quantity']) - self.assertEqual(result.index.names, ('order_id', 'quantity')) + assert result.index.names == ('order_id', 'quantity') def test_from_records_misc_brokenness(self): # #2179 @@ -1671,7 +1868,7 @@ def test_from_records_misc_brokenness(self): rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), 1]) df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - results = df2_obj.get_dtype_counts() + results = df2_obj.get_dtype_counts().sort_index() expected = Series({'datetime64[ns]': 1, 'int64': 1}) tm.assert_series_equal(results, expected) @@ -1689,13 +1886,13 @@ def test_from_records_empty_with_nonempty_fields_gh3682(self): a = np.array([(1, 2)], dtype=[('id', np.int64), ('value', np.int64)]) df = DataFrame.from_records(a, index='id') tm.assert_index_equal(df.index, Index([1], name='id')) - self.assertEqual(df.index.name, 'id') + assert df.index.name == 'id' tm.assert_index_equal(df.columns, Index(['value'])) b = np.array([], dtype=[('id', np.int64), ('value', np.int64)]) df = DataFrame.from_records(b, index='id') tm.assert_index_equal(df.index, Index([], name='id')) - self.assertEqual(df.index.name, 'id') + assert df.index.name == 'id' def test_from_records_with_datetimes(self): @@ -1738,7 +1935,7 @@ def test_from_records_sequencelike(self): # this is actually tricky to create the recordlike arrays and # have the dtypes be intact - blocks = df.blocks + blocks = df._to_dict_of_blocks() tuples = [] columns = [] dtypes = [] @@ -1791,13 +1988,13 @@ def test_from_records_sequencelike(self): # empty case result = DataFrame.from_records([], columns=['foo', 'bar', 'baz']) - self.assertEqual(len(result), 0) - self.assert_index_equal(result.columns, - pd.Index(['foo', 'bar', 'baz'])) + assert len(result) == 0 + tm.assert_index_equal(result.columns, + pd.Index(['foo', 'bar', 'baz'])) result = DataFrame.from_records([]) - self.assertEqual(len(result), 0) - self.assertEqual(len(result.columns), 0) + assert len(result) == 0 + assert len(result.columns) == 0 def test_from_records_dictlike(self): @@ -1813,12 +2010,13 @@ def test_from_records_dictlike(self): # columns is in a different order here than the actual items iterated # from the dict + blocks = df._to_dict_of_blocks() columns = [] - for dtype, b in compat.iteritems(df.blocks): + for dtype, b in compat.iteritems(blocks): columns.extend(b.columns) - asdict = dict((x, y) for x, y in compat.iteritems(df)) - asdict2 = dict((x, y.values) for x, y in compat.iteritems(df)) + asdict = {x: y for x, y in compat.iteritems(df)} + asdict2 = {x: y.values for x, y in compat.iteritems(df)} # dict of series & dict of ndarrays (have dtype info) results = [] @@ -1850,8 +2048,8 @@ def test_from_records_bad_index_column(self): tm.assert_index_equal(df1.index, Index(df.C)) # should fail - self.assertRaises(ValueError, DataFrame.from_records, df, index=[2]) - self.assertRaises(KeyError, DataFrame.from_records, df, index=2) + pytest.raises(ValueError, DataFrame.from_records, df, index=[2]) + pytest.raises(KeyError, DataFrame.from_records, df, index=2) def test_from_records_non_tuple(self): class Record(object): @@ -1876,13 +2074,30 @@ def test_from_records_len0_with_columns(self): # #2633 result = DataFrame.from_records([], index='foo', columns=['foo', 'bar']) + expected = Index(['bar']) + + assert len(result) == 0 + assert result.index.name == 'foo' + tm.assert_index_equal(result.columns, expected) + + def test_to_frame_with_falsey_names(self): + # GH 16114 + result = Series(name=0).to_frame().dtypes + expected = Series({0: np.float64}) + tm.assert_series_equal(result, expected) - self.assertTrue(np.array_equal(result.columns, ['bar'])) - self.assertEqual(len(result), 0) - self.assertEqual(result.index.name, 'foo') + result = DataFrame(Series(name=0)).dtypes + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, 'uint8', 'category']) + def test_constructor_range_dtype(self, dtype): + # GH 16804 + expected = DataFrame({'A': [0, 1, 2, 3, 4]}, dtype=dtype or 'int64') + result = DataFrame({'A': range(5)}, dtype=dtype) + tm.assert_frame_equal(result, expected) -class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData): +class TestDataFrameConstructorWithDatetimeTZ(TestData): def test_from_dict(self): @@ -1895,8 +2110,8 @@ def test_from_dict(self): # construction df = DataFrame({'A': idx, 'B': dr}) - self.assertTrue(df['A'].dtype, 'M8[ns, US/Eastern') - self.assertTrue(df['A'].name == 'A') + assert df['A'].dtype, 'M8[ns, US/Eastern' + assert df['A'].name == 'A' tm.assert_series_equal(df['A'], Series(idx, name='A')) tm.assert_series_equal(df['B'], Series(dr, name='B')) @@ -1929,7 +2144,7 @@ def test_frame_datetime64_mixed_index_ctor_1681(self): # it works! d = DataFrame({'A': 'foo', 'B': ts}, index=dr) - self.assertTrue(d['B'].isnull().all()) + assert d['B'].isna().all() def test_frame_timeseries_to_records(self): index = date_range('1/1/2000', periods=10) @@ -1940,3 +2155,14 @@ def test_frame_timeseries_to_records(self): result['index'].dtype == 'M8[ns]' result = df.to_records(index=False) + + def test_frame_timeseries_column(self): + # GH19157 + dr = date_range(start='20130101T10:00:00', periods=3, freq='T', + tz='US/Eastern') + result = DataFrame(dr, columns=['timestamps']) + expected = DataFrame({'timestamps': [ + Timestamp('20130101T10:00:00', tz='US/Eastern'), + Timestamp('20130101T10:01:00', tz='US/Eastern'), + Timestamp('20130101T10:02:00', tz='US/Eastern')]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 1bc8313726d0c..024de8bc13f72 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -1,64 +1,22 @@ # -*- coding: utf-8 -*- -from __future__ import print_function +from datetime import datetime -from numpy import nan +import pytest +import pytz +import collections import numpy as np from pandas import compat +from pandas.compat import long from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range) import pandas.util.testing as tm - from pandas.tests.frame.common import TestData -class TestDataFrameConvertTo(tm.TestCase, TestData): - - def test_to_dict(self): - test_data = { - 'A': {'1': 1, '2': 2}, - 'B': {'1': '1', '2': '2', '3': '3'}, - } - recons_data = DataFrame(test_data).to_dict() - - for k, v in compat.iteritems(test_data): - for k2, v2 in compat.iteritems(v): - self.assertEqual(v2, recons_data[k][k2]) - - recons_data = DataFrame(test_data).to_dict("l") - - for k, v in compat.iteritems(test_data): - for k2, v2 in compat.iteritems(v): - self.assertEqual(v2, recons_data[k][int(k2) - 1]) - - recons_data = DataFrame(test_data).to_dict("s") - - for k, v in compat.iteritems(test_data): - for k2, v2 in compat.iteritems(v): - self.assertEqual(v2, recons_data[k][k2]) - - recons_data = DataFrame(test_data).to_dict("sp") - expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], - 'data': [[1.0, '1'], [2.0, '2'], [nan, '3']]} - tm.assert_dict_equal(recons_data, expected_split) - - recons_data = DataFrame(test_data).to_dict("r") - expected_records = [{'A': 1.0, 'B': '1'}, - {'A': 2.0, 'B': '2'}, - {'A': nan, 'B': '3'}] - tm.assertIsInstance(recons_data, list) - self.assertEqual(len(recons_data), 3) - for l, r in zip(recons_data, expected_records): - tm.assert_dict_equal(l, r) - - # GH10844 - recons_data = DataFrame(test_data).to_dict("i") - - for k, v in compat.iteritems(test_data): - for k2, v2 in compat.iteritems(v): - self.assertEqual(v2, recons_data[k2][k]) +class TestDataFrameConvertTo(TestData): def test_to_dict_timestamp(self): @@ -75,10 +33,10 @@ def test_to_dict_timestamp(self): expected_records_mixed = [{'A': tsmp, 'B': 1}, {'A': tsmp, 'B': 2}] - self.assertEqual(test_data.to_dict(orient='records'), - expected_records) - self.assertEqual(test_data_mixed.to_dict(orient='records'), - expected_records_mixed) + assert (test_data.to_dict(orient='records') == + expected_records) + assert (test_data_mixed.to_dict(orient='records') == + expected_records_mixed) expected_series = { 'A': Series([tsmp, tsmp], name='A'), @@ -114,16 +72,16 @@ def test_to_dict_timestamp(self): def test_to_dict_invalid_orient(self): df = DataFrame({'A': [0, 1]}) - self.assertRaises(ValueError, df.to_dict, orient='xinvalid') + pytest.raises(ValueError, df.to_dict, orient='xinvalid') def test_to_records_dt64(self): df = DataFrame([["one", "two", "three"], ["four", "five", "six"]], index=date_range("2012-01-01", "2012-01-02")) - self.assertEqual(df.to_records()['index'][0], df.index[0]) + assert df.to_records()['index'][0] == df.index[0] rs = df.to_records(convert_datetime64=False) - self.assertEqual(rs['index'][0], df.index.values[0]) + assert rs['index'][0] == df.index.values[0] def test_to_records_with_multindex(self): # GH3189 @@ -132,8 +90,8 @@ def test_to_records_with_multindex(self): data = np.zeros((8, 4)) df = DataFrame(data, index=index) r = df.to_records(index=True)['level_0'] - self.assertTrue('bar' in r) - self.assertTrue('one' not in r) + assert 'bar' in r + assert 'one' not in r def test_to_records_with_Mapping_type(self): import email @@ -159,16 +117,16 @@ def test_to_records_index_name(self): df = DataFrame(np.random.randn(3, 3)) df.index.name = 'X' rs = df.to_records() - self.assertIn('X', rs.dtype.fields) + assert 'X' in rs.dtype.fields df = DataFrame(np.random.randn(3, 3)) rs = df.to_records() - self.assertIn('index', rs.dtype.fields) + assert 'index' in rs.dtype.fields df.index = MultiIndex.from_tuples([('a', 'x'), ('a', 'y'), ('b', 'z')]) df.index.names = ['A', None] rs = df.to_records() - self.assertIn('level_0', rs.dtype.fields) + assert 'level_0' in rs.dtype.fields def test_to_records_with_unicode_index(self): # GH13172 @@ -177,3 +135,156 @@ def test_to_records_with_unicode_index(self): .to_records() expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')]) tm.assert_almost_equal(result, expected) + + def test_to_records_with_unicode_column_names(self): + # xref issue: https://github.com/numpy/numpy/issues/2407 + # Issue #11879. to_records used to raise an exception when used + # with column names containing non-ascii characters in Python 2 + result = DataFrame(data={u"accented_name_é": [1.0]}).to_records() + + # Note that numpy allows for unicode field names but dtypes need + # to be specified using dictionary instead of list of tuples. + expected = np.rec.array( + [(0, 1.0)], + dtype={"names": ["index", u"accented_name_é"], + "formats": ['= 1.14 preserves the full repr + expected = DataFrame(['1.1234567890123457']) assert_frame_equal(result, expected) - def test_astype_dict(self): - # GH7271 + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): + # GH7271 & GH16717 a = Series(date_range('2010-01-04', periods=5)) b = Series(range(5)) c = Series([0.0, 0.2, 0.4, 0.6, 0.8]) @@ -445,7 +552,8 @@ def test_astype_dict(self): original = df.copy(deep=True) # change type of a subset of columns - result = df.astype({'b': 'str', 'd': 'float32'}) + dt1 = dtype_class({'b': 'str', 'd': 'float32'}) + result = df.astype(dt1) expected = DataFrame({ 'a': a, 'b': Series(['0', '1', '2', '3', '4']), @@ -454,7 +562,8 @@ def test_astype_dict(self): assert_frame_equal(result, expected) assert_frame_equal(df, original) - result = df.astype({'b': np.float32, 'c': 'float32', 'd': np.float64}) + dt2 = dtype_class({'b': np.float32, 'c': 'float32', 'd': np.float64}) + result = df.astype(dt2) expected = DataFrame({ 'a': a, 'b': Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype='float32'), @@ -464,19 +573,31 @@ def test_astype_dict(self): assert_frame_equal(df, original) # change all columns - assert_frame_equal(df.astype({'a': str, 'b': str, 'c': str, 'd': str}), + dt3 = dtype_class({'a': str, 'b': str, 'c': str, 'd': str}) + assert_frame_equal(df.astype(dt3), df.astype(str)) assert_frame_equal(df, original) # error should be raised when using something other than column labels # in the keys of the dtype dict - self.assertRaises(KeyError, df.astype, {'b': str, 2: str}) - self.assertRaises(KeyError, df.astype, {'e': str}) + dt4 = dtype_class({'b': str, 2: str}) + dt5 = dtype_class({'e': str}) + pytest.raises(KeyError, df.astype, dt4) + pytest.raises(KeyError, df.astype, dt5) assert_frame_equal(df, original) # if the dtypes provided are the same as the original dtypes, the # resulting DataFrame should be the same as the original DataFrame - equiv = df.astype({col: df[col].dtype for col in df.columns}) + dt6 = dtype_class({col: df[col].dtype for col in df.columns}) + equiv = df.astype(dt6) + assert_frame_equal(df, equiv) + assert_frame_equal(df, original) + + # GH 16717 + # if dtypes provided is empty, the resulting DataFrame + # should be the same as the original DataFrame + dt7 = dtype_class({}) + result = df.astype(dt7) assert_frame_equal(df, equiv) assert_frame_equal(df, original) @@ -498,13 +619,133 @@ def test_astype_duplicate_col(self): expected = concat([a1_str, b, a2_str], axis=1) assert_frame_equal(result, expected) + @pytest.mark.parametrize('dtype', [ + 'category', + CategoricalDtype(), + CategoricalDtype(ordered=True), + CategoricalDtype(ordered=False), + CategoricalDtype(categories=list('abcdef')), + CategoricalDtype(categories=list('edba'), ordered=False), + CategoricalDtype(categories=list('edcb'), ordered=True)], ids=repr) + def test_astype_categorical(self, dtype): + # GH 18099 + d = {'A': list('abbc'), 'B': list('bccd'), 'C': list('cdde')} + df = DataFrame(d) + result = df.astype(dtype) + expected = DataFrame({k: Categorical(d[k], dtype=dtype) for k in d}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("cls", [ + pd.api.types.CategoricalDtype, + pd.api.types.DatetimeTZDtype, + pd.api.types.IntervalDtype + ]) + def test_astype_categoricaldtype_class_raises(self, cls): + df = DataFrame({"A": ['a', 'a', 'b', 'c']}) + xpr = "Expected an instance of {}".format(cls.__name__) + with tm.assert_raises_regex(TypeError, xpr): + df.astype({"A": cls}) + + with tm.assert_raises_regex(TypeError, xpr): + df['A'].astype(cls) + + @pytest.mark.parametrize('dtype', [ + {100: 'float64', 200: 'uint64'}, 'category', 'float64']) + def test_astype_column_metadata(self, dtype): + # GH 19920 + columns = pd.UInt64Index([100, 200, 300], name='foo') + df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) + df = df.astype(dtype) + tm.assert_index_equal(df.columns, columns) + + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_from_datetimelike_to_objectt(self, dtype, unit): + # tests astype to object dtype + # gh-19223 / gh-12425 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(object) + assert (result.dtypes == object).all() + + if dtype.startswith('M8'): + assert result.iloc[0, 0] == pd.to_datetime(1, unit=unit) + else: + assert result.iloc[0, 0] == pd.to_timedelta(1, unit=unit) + + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units from numeric origination + # gh-19223 / gh-12425 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([[1, 2, 3]], dtype=arr_dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_to_datetime_unit(self, unit): + # tests all units from datetime origination + # gh-19223 + dtype = "M8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ['ns']) + def test_astype_to_timedelta_unit_ns(self, unit): + # preserver the timedelta conversion + # gh-19223 + dtype = "m8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ['us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_to_timedelta_unit(self, unit): + # coerce to float + # gh-19223 + dtype = "m8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(df.values.astype(dtype).astype(float)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_astype_to_incorrect_datetimelike(self, unit): + # trying to astype a m to a M, or vice-versa + # gh-19224 + dtype = "M8[{}]".format(unit) + other = "m8[{}]".format(unit) + + df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) + with pytest.raises(TypeError): + df.astype(other) + + df = DataFrame(np.array([[1, 2, 3]], dtype=other)) + with pytest.raises(TypeError): + df.astype(dtype) + def test_timedeltas(self): df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3, freq='D')), B=Series([timedelta(days=i) for i in range(3)]))) - result = df.get_dtype_counts().sort_values() + result = df.get_dtype_counts().sort_index() expected = Series( - {'datetime64[ns]': 1, 'timedelta64[ns]': 1}).sort_values() + {'datetime64[ns]': 1, 'timedelta64[ns]': 1}).sort_index() assert_series_equal(result, expected) df['C'] = df['A'] + df['B'] @@ -526,7 +767,7 @@ def test_arg_for_errors_in_astype(self): df = DataFrame([1, 2, 3]) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.astype(np.float64, errors=True) with tm.assert_produces_warning(FutureWarning): @@ -534,8 +775,27 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') + @pytest.mark.parametrize('input_vals', [ + ([1, 2]), + ([1.0, 2.0, np.nan]), + (['1', '2']), + (list(pd.date_range('1/1/2011', periods=2, freq='H'))), + (list(pd.date_range('1/1/2011', periods=2, freq='H', + tz='US/Eastern'))), + ([pd.Interval(left=0, right=5)]), + ]) + def test_constructor_list_str(self, input_vals): + # GH 16605 + # Ensure that data elements are converted to strings when + # dtype is str, 'str', or 'U' + + for dtype in ['str', str, 'U']: + result = DataFrame({'A': input_vals}, dtype=dtype) + expected = DataFrame({'A': input_vals}).astype({'A': dtype}) + assert_frame_equal(result, expected) + -class TestDataFrameDatetimeWithTZ(tm.TestCase, TestData): +class TestDataFrameDatetimeWithTZ(TestData): def test_interleave(self): @@ -553,7 +813,7 @@ def test_interleave(self): pd.NaT, Timestamp('2013-01-03 00:00:00+0100', tz='CET')], ['foo', 'foo', 'foo']], dtype=object).T - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) # interleave with only datetime64[ns] result = self.tzframe.values @@ -569,7 +829,7 @@ def test_interleave(self): pd.NaT, Timestamp('2013-01-03 00:00:00+0100', tz='CET')]], dtype=object).T - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) def test_astype(self): # astype @@ -613,12 +873,12 @@ def test_astype_str(self): ['2013-01-03', '2013-01-03 00:00:00-05:00', '2013-01-03 00:00:00+01:00']], columns=self.tzframe.columns) - self.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = str(self.tzframe) - self.assertTrue('0 2013-01-01 2013-01-01 00:00:00-05:00 ' - '2013-01-01 00:00:00+01:00' in result) - self.assertTrue('1 2013-01-02 ' - 'NaT NaT' in result) - self.assertTrue('2 2013-01-03 2013-01-03 00:00:00-05:00 ' - '2013-01-03 00:00:00+01:00' in result) + assert ('0 2013-01-01 2013-01-01 00:00:00-05:00 ' + '2013-01-01 00:00:00+01:00') in result + assert ('1 2013-01-02 ' + 'NaT NaT') in result + assert ('2 2013-01-03 2013-01-03 00:00:00-05:00 ' + '2013-01-03 00:00:00+01:00') in result diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index c06faa75ed346..a8b81b1b03552 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -10,24 +10,27 @@ from numpy import nan from numpy.random import randn + +import pytest import numpy as np import pandas.core.common as com -from pandas import (DataFrame, Index, Series, notnull, isnull, +from pandas import (DataFrame, Index, Series, notna, isna, MultiIndex, DatetimeIndex, Timestamp, - date_range) + date_range, Categorical) +from pandas.core.dtypes.dtypes import CategoricalDtype + import pandas as pd +from pandas._libs.tslib import iNaT from pandas.tseries.offsets import BDay -from pandas.types.common import (is_float_dtype, - is_integer, - is_scalar) +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer, + is_scalar) from pandas.util.testing import (assert_almost_equal, - assert_numpy_array_equal, assert_series_equal, - assert_frame_equal, - assertRaisesRegexp, - assertRaises) + assert_frame_equal) from pandas.core.indexing import IndexingError import pandas.util.testing as tm @@ -35,33 +38,36 @@ from pandas.tests.frame.common import TestData -class TestDataFrameIndexing(tm.TestCase, TestData): +class TestDataFrameIndexing(TestData): def test_getitem(self): - # slicing + # Slicing sl = self.frame[:20] - self.assertEqual(20, len(sl.index)) - - # column access + assert len(sl.index) == 20 + # Column access for _, series in compat.iteritems(sl): - self.assertEqual(20, len(series.index)) - self.assertTrue(tm.equalContents(series.index, sl.index)) + assert len(series.index) == 20 + assert tm.equalContents(series.index, sl.index) for key, _ in compat.iteritems(self.frame._series): - self.assertIsNotNone(self.frame[key]) + assert self.frame[key] is not None - self.assertNotIn('random', self.frame) - with assertRaisesRegexp(KeyError, 'random'): + assert 'random' not in self.frame + with tm.assert_raises_regex(KeyError, 'random'): self.frame['random'] df = self.frame.copy() df['$10'] = randn(len(df)) + ad = randn(len(df)) df['@awesome_domain'] = ad - self.assertRaises(KeyError, df.__getitem__, 'df["$10"]') + + with pytest.raises(KeyError): + df.__getitem__('df["$10"]') + res = df['@awesome_domain'] - assert_numpy_array_equal(ad, res.values) + tm.assert_numpy_array_equal(ad, res.values) def test_getitem_dupe_cols(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) @@ -76,7 +82,7 @@ def test_get(self): b = self.frame.get('B') assert_series_equal(b, self.frame['B']) - self.assertIsNone(self.frame.get('foo')) + assert self.frame.get('foo') is None assert_series_equal(self.frame.get('foo', self.frame['B']), self.frame['B']) # None @@ -84,7 +90,7 @@ def test_get(self): for df in [DataFrame(), DataFrame(columns=list('AB')), DataFrame(columns=list('AB'), index=range(3))]: result = df.get(None) - self.assertIsNone(result) + assert result is None def test_getitem_iterator(self): idx = iter(['A', 'B', 'C']) @@ -109,11 +115,11 @@ def test_getitem_list(self): assert_frame_equal(result, expected) assert_frame_equal(result2, expected) - self.assertEqual(result.columns.name, 'foo') + assert result.columns.name == 'foo' - with assertRaisesRegexp(KeyError, 'not in index'): + with tm.assert_raises_regex(KeyError, 'not in index'): self.frame[['B', 'A', 'food']] - with assertRaisesRegexp(KeyError, 'not in index'): + with tm.assert_raises_regex(KeyError, 'not in index'): self.frame[Index(['B', 'A', 'foo'])] # tuples @@ -124,7 +130,7 @@ def test_getitem_list(self): result = df[[('foo', 'bar'), ('baz', 'qux')]] expected = df.iloc[:, :2] assert_frame_equal(result, expected) - self.assertEqual(result.columns.names, ['sth', 'sth2']) + assert result.columns.names == ['sth', 'sth2'] def test_getitem_callable(self): # GH 12533 @@ -147,12 +153,13 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A'], check_names=False) assert_series_equal(self.frame['A'], data['B'], check_names=False) - with assertRaisesRegexp(ValueError, - 'Columns must be same length as key'): + with tm.assert_raises_regex(ValueError, + 'Columns must be same length as key'): data[['A']] = self.frame[['A', 'B']] - with assertRaisesRegexp(ValueError, 'Length of values does not match ' - 'length of index'): + with tm.assert_raises_regex(ValueError, 'Length of values ' + 'does not match ' + 'length of index'): data['A'] = range(len(data.index) - 1) df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_) @@ -233,14 +240,14 @@ def test_getitem_boolean(self): subindex = self.tsframe.index[indexer] subframe = self.tsframe[indexer] - self.assert_index_equal(subindex, subframe.index) - with assertRaisesRegexp(ValueError, 'Item wrong length'): + tm.assert_index_equal(subindex, subframe.index) + with tm.assert_raises_regex(ValueError, 'Item wrong length'): self.tsframe[indexer[:-1]] subframe_obj = self.tsframe[indexer_obj] assert_frame_equal(subframe_obj, subframe) - with tm.assertRaisesRegexp(ValueError, 'boolean values only'): + with tm.assert_raises_regex(ValueError, 'boolean values only'): self.tsframe[self.tsframe] # test that Series work @@ -264,8 +271,8 @@ def test_getitem_boolean(self): data = df._get_numeric_data() bif = df[df > 0] - bifw = DataFrame(dict([(c, np.where(data[c] > 0, data[c], np.nan)) - for c in data.columns]), + bifw = DataFrame(dict((c, np.where(data[c] > 0, data[c], np.nan)) + for c in data.columns), index=data.index, columns=data.columns) # add back other columns to compare @@ -277,7 +284,7 @@ def test_getitem_boolean(self): assert_frame_equal(bif, bifw, check_dtype=False) for c in df.columns: if bif[c].dtype != bifw[c].dtype: - self.assertEqual(bif[c].dtype, df[c].dtype) + assert bif[c].dtype == df[c].dtype def test_getitem_boolean_casting(self): @@ -307,7 +314,7 @@ def test_getitem_boolean_casting(self): df = DataFrame(data=np.random.randn(100, 50)) df = df.where(df > 0) # create nans bools = df > 0 - mask = isnull(df) + mask = isna(df) expected = bools.astype(float).mask(mask) result = bools.mask(mask) assert_frame_equal(result, expected) @@ -386,11 +393,11 @@ def test_getitem_setitem_ix_negative_integers(self): with catch_warnings(record=True): self.frame.ix[:, [-1]] = 0 - self.assertTrue((self.frame['D'] == 0).all()) + assert (self.frame['D'] == 0).all() df = DataFrame(np.random.randn(8, 4)) with catch_warnings(record=True): - self.assertTrue(isnull(df.ix[:, [-1]].values).all()) + assert isna(df.ix[:, [-1]].values).all() # #1942 a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)]) @@ -399,28 +406,28 @@ def test_getitem_setitem_ix_negative_integers(self): with catch_warnings(record=True): assert_series_equal(a.ix[-1], a.ix[-2], check_names=False) - self.assertEqual(a.ix[-1].name, 'T') - self.assertEqual(a.ix[-2].name, 'S') + assert a.ix[-1].name == 'T' + assert a.ix[-2].name == 'S' def test_getattr(self): assert_series_equal(self.frame.A, self.frame['A']) - self.assertRaises(AttributeError, getattr, self.frame, - 'NONEXISTENT_NAME') + pytest.raises(AttributeError, getattr, self.frame, + 'NONEXISTENT_NAME') def test_setattr_column(self): df = DataFrame({'foobar': 1}, index=lrange(10)) df.foobar = 5 - self.assertTrue((df.foobar == 5).all()) + assert (df.foobar == 5).all() def test_setitem(self): # not sure what else to do here series = self.frame['A'][::2] self.frame['col5'] = series - self.assertIn('col5', self.frame) + assert 'col5' in self.frame - self.assertEqual(len(series), 15) - self.assertEqual(len(self.frame), 30) + assert len(series) == 15 + assert len(self.frame) == 30 exp = np.ravel(np.column_stack((series.values, [np.nan] * 15))) exp = Series(exp, index=self.frame.index, name='col5') @@ -430,13 +437,13 @@ def test_setitem(self): self.frame['col6'] = series tm.assert_series_equal(series, self.frame['col6'], check_names=False) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): self.frame[randn(len(self.frame) + 1)] = 1 # set ndarray arr = randn(len(self.frame)) self.frame['col9'] = arr - self.assertTrue((self.frame['col9'] == arr).all()) + assert (self.frame['col9'] == arr).all() self.frame['col7'] = 5 assert((self.frame['col7'] == 5).all()) @@ -453,14 +460,14 @@ def test_setitem(self): def f(): smaller['col10'] = ['1', '2'] - self.assertRaises(com.SettingWithCopyError, f) - self.assertEqual(smaller['col10'].dtype, np.object_) - self.assertTrue((smaller['col10'] == ['1', '2']).all()) + pytest.raises(com.SettingWithCopyError, f) + assert smaller['col10'].dtype == np.object_ + assert (smaller['col10'] == ['1', '2']).all() # with a dtype for dtype in ['int32', 'int64', 'float32', 'float64']: self.frame[dtype] = np.array(arr, dtype=dtype) - self.assertEqual(self.frame[dtype].dtype.name, dtype) + assert self.frame[dtype].dtype.name == dtype # dtype changing GH4204 df = DataFrame([[0, 0]]) @@ -482,7 +489,7 @@ def test_setitem_always_copy(self): self.frame['E'] = s self.frame['E'][5:10] = nan - self.assertTrue(notnull(s[5:10]).all()) + assert notna(s[5:10]).all() def test_setitem_boolean(self): df = self.frame.copy() @@ -517,8 +524,8 @@ def test_setitem_boolean(self): values[values == 2] = 3 assert_almost_equal(df.values, values) - with assertRaisesRegexp(TypeError, 'Must pass DataFrame with boolean ' - 'values only'): + msg = "Must pass DataFrame or 2-d ndarray with boolean values only" + with tm.assert_raises_regex(TypeError, msg): df[df * 0] = 2 # index with DataFrame @@ -534,34 +541,53 @@ def test_setitem_boolean(self): np.putmask(expected.values, mask.values, df.values * 2) assert_frame_equal(df, expected) + @pytest.mark.parametrize( + "mask_type", + [lambda df: df > np.abs(df) / 2, + lambda df: (df > np.abs(df) / 2).values], + ids=['dataframe', 'array']) + def test_setitem_boolean_mask(self, mask_type): + + # Test for issue #18582 + df = self.frame.copy() + mask = mask_type(df) + + # index with boolean mask + result = df.copy() + result[mask] = np.nan + + expected = df.copy() + expected.values[np.array(mask)] = np.nan + assert_frame_equal(result, expected) + def test_setitem_cast(self): self.frame['D'] = self.frame['D'].astype('i8') - self.assertEqual(self.frame['D'].dtype, np.int64) + assert self.frame['D'].dtype == np.int64 # #669, should not cast? # this is now set to int64, which means a replacement of the column to # the value dtype (and nothing to do with the existing dtype) self.frame['B'] = 0 - self.assertEqual(self.frame['B'].dtype, np.int64) + assert self.frame['B'].dtype == np.int64 # cast if pass array of course self.frame['B'] = np.arange(len(self.frame)) - self.assertTrue(issubclass(self.frame['B'].dtype.type, np.integer)) + assert issubclass(self.frame['B'].dtype.type, np.integer) self.frame['foo'] = 'bar' self.frame['foo'] = 0 - self.assertEqual(self.frame['foo'].dtype, np.int64) + assert self.frame['foo'].dtype == np.int64 self.frame['foo'] = 'bar' self.frame['foo'] = 2.5 - self.assertEqual(self.frame['foo'].dtype, np.float64) + assert self.frame['foo'].dtype == np.float64 self.frame['something'] = 0 - self.assertEqual(self.frame['something'].dtype, np.int64) + assert self.frame['something'].dtype == np.int64 self.frame['something'] = 2 - self.assertEqual(self.frame['something'].dtype, np.int64) + assert self.frame['something'].dtype == np.int64 self.frame['something'] = 2.5 - self.assertEqual(self.frame['something'].dtype, np.float64) + assert self.frame['something'].dtype == np.float64 # GH 7704 # dtype conversion on setting @@ -575,9 +601,9 @@ def test_setitem_cast(self): # Test that data type is preserved . #5782 df = DataFrame({'one': np.arange(6, dtype=np.int8)}) df.loc[1, 'one'] = 6 - self.assertEqual(df.dtypes.one, np.dtype(np.int8)) + assert df.dtypes.one == np.dtype(np.int8) df.one = np.int8(7) - self.assertEqual(df.dtypes.one, np.dtype(np.int8)) + assert df.dtypes.one == np.dtype(np.int8) def test_setitem_boolean_column(self): expected = self.frame.copy() @@ -588,6 +614,16 @@ def test_setitem_boolean_column(self): assert_frame_equal(self.frame, expected) + def test_frame_setitem_timestamp(self): + # GH#2155 + columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=BDay()) + index = lrange(10) + data = DataFrame(columns=columns, index=index) + t = datetime(2012, 11, 1) + ts = Timestamp(t) + data[ts] = np.nan # works, mostly a smoke-test + assert np.isnan(data[ts]).all() + def test_setitem_corner(self): # corner case df = DataFrame({'B': [1., 2., 3.], @@ -595,8 +631,8 @@ def test_setitem_corner(self): index=np.arange(3)) del df['B'] df['B'] = [1., 2., 3.] - self.assertIn('B', df) - self.assertEqual(len(df.columns), 2) + assert 'B' in df + assert len(df.columns) == 2 df['A'] = 'beginning' df['E'] = 'foo' @@ -608,29 +644,29 @@ def test_setitem_corner(self): dm = DataFrame(index=self.frame.index) dm['A'] = 'foo' dm['B'] = 'bar' - self.assertEqual(len(dm.columns), 2) - self.assertEqual(dm.values.dtype, np.object_) + assert len(dm.columns) == 2 + assert dm.values.dtype == np.object_ # upcast dm['C'] = 1 - self.assertEqual(dm['C'].dtype, np.int64) + assert dm['C'].dtype == np.int64 dm['E'] = 1. - self.assertEqual(dm['E'].dtype, np.float64) + assert dm['E'].dtype == np.float64 # set existing column dm['A'] = 'bar' - self.assertEqual('bar', dm['A'][0]) + assert 'bar' == dm['A'][0] dm = DataFrame(index=np.arange(3)) dm['A'] = 1 dm['foo'] = 'bar' del dm['foo'] dm['foo'] = 'bar' - self.assertEqual(dm['foo'].dtype, np.object_) + assert dm['foo'].dtype == np.object_ dm['coercable'] = ['1', '2', '3'] - self.assertEqual(dm['coercable'].dtype, np.object_) + assert dm['coercable'].dtype == np.object_ def test_setitem_corner2(self): data = {"title": ['foobar', 'bar', 'foobar'] + ['foobar'] * 17, @@ -642,14 +678,14 @@ def test_setitem_corner2(self): df.loc[ix, ['title']] = 'foobar' df.loc[ix, ['cruft']] = 0 - self.assertEqual(df.loc[1, 'title'], 'foobar') - self.assertEqual(df.loc[1, 'cruft'], 0) + assert df.loc[1, 'title'] == 'foobar' + assert df.loc[1, 'cruft'] == 0 def test_setitem_ambig(self): - # difficulties with mixed-type data + # Difficulties with mixed-type data from decimal import Decimal - # created as float type + # Created as float type dm = DataFrame(index=lrange(3), columns=lrange(3)) coercable_series = Series([Decimal(1) for _ in range(3)], @@ -657,32 +693,29 @@ def test_setitem_ambig(self): uncoercable_series = Series(['foo', 'bzr', 'baz'], index=lrange(3)) dm[0] = np.ones(3) - self.assertEqual(len(dm.columns), 3) - # self.assertIsNone(dm.objects) + assert len(dm.columns) == 3 dm[1] = coercable_series - self.assertEqual(len(dm.columns), 3) - # self.assertIsNone(dm.objects) + assert len(dm.columns) == 3 dm[2] = uncoercable_series - self.assertEqual(len(dm.columns), 3) - # self.assertIsNotNone(dm.objects) - self.assertEqual(dm[2].dtype, np.object_) + assert len(dm.columns) == 3 + assert dm[2].dtype == np.object_ def test_setitem_clear_caches(self): - # GH #304 + # see gh-304 df = DataFrame({'x': [1.1, 2.1, 3.1, 4.1], 'y': [5.1, 6.1, 7.1, 8.1]}, index=[0, 1, 2, 3]) df.insert(2, 'z', np.nan) # cache it foo = df['z'] - df.loc[df.index[2:], 'z'] = 42 expected = Series([np.nan, np.nan, 42, 42], index=df.index, name='z') - self.assertIsNot(df['z'], foo) - assert_series_equal(df['z'], expected) + + assert df['z'] is not foo + tm.assert_series_equal(df['z'], expected) def test_setitem_None(self): # GH #766 @@ -702,7 +735,7 @@ def test_setitem_empty(self): 'c': ['111', '222', '333']}) result = df.copy() - result.loc[result.b.isnull(), 'a'] = result.a + result.loc[result.b.isna(), 'a'] = result.a assert_frame_equal(result, df) def test_setitem_empty_frame_with_boolean(self): @@ -718,6 +751,13 @@ def test_setitem_empty_frame_with_boolean(self): df[df > df2] = 47 assert_frame_equal(df, df2) + def test_setitem_scalars_no_index(self): + # GH16823 / 17894 + df = DataFrame() + df['foo'] = 1 + expected = DataFrame(columns=['foo']).astype(np.int64) + assert_frame_equal(df, expected) + def test_getitem_empty_frame_with_boolean(self): # Test for issue #11859 @@ -728,10 +768,10 @@ def test_getitem_empty_frame_with_boolean(self): def test_delitem_corner(self): f = self.frame.copy() del f['D'] - self.assertEqual(len(f.columns), 3) - self.assertRaises(KeyError, f.__delitem__, 'D') + assert len(f.columns) == 3 + pytest.raises(KeyError, f.__delitem__, 'D') del f['B'] - self.assertEqual(len(f.columns), 2) + assert len(f.columns) == 2 def test_getitem_fancy_2d(self): f = self.frame @@ -771,20 +811,20 @@ def test_getitem_fancy_2d(self): assert_frame_equal(f, exp) with catch_warnings(record=True): - self.assertRaises(ValueError, f.ix.__getitem__, f > 0.5) + pytest.raises(ValueError, f.ix.__getitem__, f > 0.5) def test_slice_floats(self): index = [52195.504153, 52196.303147, 52198.369883] df = DataFrame(np.random.rand(3, 2), index=index) s1 = df.loc[52195.1:52196.5] - self.assertEqual(len(s1), 2) + assert len(s1) == 2 s1 = df.loc[52195.1:52196.6] - self.assertEqual(len(s1), 2) + assert len(s1) == 2 s1 = df.loc[52195.1:52198.9] - self.assertEqual(len(s1), 3) + assert len(s1) == 3 def test_getitem_fancy_slice_integers_step(self): df = DataFrame(np.random.randn(10, 5)) @@ -792,7 +832,7 @@ def test_getitem_fancy_slice_integers_step(self): # this is OK result = df.iloc[:8:2] # noqa df.iloc[:8:2] = np.nan - self.assertTrue(isnull(df.iloc[:8:2]).values.all()) + assert isna(df.iloc[:8:2]).values.all() def test_getitem_setitem_integer_slice_keyerrors(self): df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2)) @@ -800,12 +840,12 @@ def test_getitem_setitem_integer_slice_keyerrors(self): # this is OK cp = df.copy() cp.iloc[4:10] = 0 - self.assertTrue((cp.iloc[4:10] == 0).values.all()) + assert (cp.iloc[4:10] == 0).values.all() # so is this cp = df.copy() cp.iloc[3:11] = 0 - self.assertTrue((cp.iloc[3:11] == 0).values.all()) + assert (cp.iloc[3:11] == 0).values.all() result = df.iloc[2:6] result2 = df.loc[3:11] @@ -816,8 +856,8 @@ def test_getitem_setitem_integer_slice_keyerrors(self): # non-monotonic, raise KeyError df2 = df.iloc[lrange(5) + lrange(5, 10)[::-1]] - self.assertRaises(KeyError, df2.loc.__getitem__, slice(3, 11)) - self.assertRaises(KeyError, df2.loc.__setitem__, slice(3, 11), 0) + pytest.raises(KeyError, df2.loc.__getitem__, slice(3, 11)) + pytest.raises(KeyError, df2.loc.__setitem__, slice(3, 11), 0) def test_setitem_fancy_2d(self): @@ -927,7 +967,7 @@ def test_setitem_fancy_2d(self): def test_fancy_getitem_slice_mixed(self): sliced = self.mixed_frame.iloc[:, -3:] - self.assertEqual(sliced['D'].dtype, np.float64) + assert sliced['D'].dtype == np.float64 # get view with single block # setting it triggers setting with copy @@ -935,8 +975,8 @@ def test_fancy_getitem_slice_mixed(self): def f(): sliced['C'] = 4. - self.assertRaises(com.SettingWithCopyError, f) - self.assertTrue((self.frame['C'] == 4).all()) + pytest.raises(com.SettingWithCopyError, f) + assert (self.frame['C'] == 4).all() def test_fancy_setitem_int_labels(self): # integer index defers to label-based indexing @@ -996,31 +1036,28 @@ def test_fancy_index_int_labels_exceptions(self): with catch_warnings(record=True): # labels that aren't contained - self.assertRaises(KeyError, df.ix.__setitem__, - ([0, 1, 2], [2, 3, 4]), 5) + pytest.raises(KeyError, df.ix.__setitem__, + ([0, 1, 2], [2, 3, 4]), 5) # try to set indices not contained in frame - self.assertRaises(KeyError, - self.frame.ix.__setitem__, - ['foo', 'bar', 'baz'], 1) - self.assertRaises(KeyError, - self.frame.ix.__setitem__, - (slice(None, None), ['E']), 1) + pytest.raises(KeyError, self.frame.ix.__setitem__, + ['foo', 'bar', 'baz'], 1) + pytest.raises(KeyError, self.frame.ix.__setitem__, + (slice(None, None), ['E']), 1) # partial setting now allows this GH2578 - # self.assertRaises(KeyError, - # self.frame.ix.__setitem__, - # (slice(None, None), 'E'), 1) + # pytest.raises(KeyError, self.frame.ix.__setitem__, + # (slice(None, None), 'E'), 1) def test_setitem_fancy_mixed_2d(self): with catch_warnings(record=True): self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5 result = self.mixed_frame.ix[:5, ['C', 'B', 'A']] - self.assertTrue((result.values == 5).all()) + assert (result.values == 5).all() self.mixed_frame.ix[5] = np.nan - self.assertTrue(isnull(self.mixed_frame.ix[5]).all()) + assert isna(self.mixed_frame.ix[5]).all() self.mixed_frame.ix[5] = self.mixed_frame.ix[6] assert_series_equal(self.mixed_frame.ix[5], self.mixed_frame.ix[6], @@ -1030,7 +1067,7 @@ def test_setitem_fancy_mixed_2d(self): with catch_warnings(record=True): df = DataFrame({1: [1., 2., 3.], 2: [3, 4, 5]}) - self.assertTrue(df._is_mixed_type) + assert df._is_mixed_type df.ix[1] = [5, 10] @@ -1172,29 +1209,29 @@ def test_getitem_fancy_1d(self): # return self if no slicing...for now with catch_warnings(record=True): - self.assertIs(f.ix[:, :], f) + assert f.ix[:, :] is f # low dimensional slice with catch_warnings(record=True): xs1 = f.ix[2, ['C', 'B', 'A']] xs2 = f.xs(f.index[2]).reindex(['C', 'B', 'A']) - assert_series_equal(xs1, xs2) + tm.assert_series_equal(xs1, xs2) with catch_warnings(record=True): ts1 = f.ix[5:10, 2] ts2 = f[f.columns[2]][5:10] - assert_series_equal(ts1, ts2) + tm.assert_series_equal(ts1, ts2) # positional xs with catch_warnings(record=True): xs1 = f.ix[0] xs2 = f.xs(f.index[0]) - assert_series_equal(xs1, xs2) + tm.assert_series_equal(xs1, xs2) with catch_warnings(record=True): xs1 = f.ix[f.index[5]] xs2 = f.xs(f.index[5]) - assert_series_equal(xs1, xs2) + tm.assert_series_equal(xs1, xs2) # single column with catch_warnings(record=True): @@ -1205,18 +1242,18 @@ def test_getitem_fancy_1d(self): exp = f.copy() exp.values[5] = 4 f.ix[5][:] = 4 - assert_frame_equal(exp, f) + tm.assert_frame_equal(exp, f) with catch_warnings(record=True): exp.values[:, 1] = 6 f.ix[:, 1][:] = 6 - assert_frame_equal(exp, f) + tm.assert_frame_equal(exp, f) # slice of mixed-frame with catch_warnings(record=True): xs = self.mixed_frame.ix[5] exp = self.mixed_frame.xs(self.mixed_frame.index[5]) - assert_series_equal(xs, exp) + tm.assert_series_equal(xs, exp) def test_setitem_fancy_1d(self): @@ -1282,7 +1319,7 @@ def test_getitem_fancy_scalar(self): for col in f.columns: ts = f[col] for idx in f.index[::5]: - self.assertEqual(ix[idx, col], ts[idx]) + assert ix[idx, col] == ts[idx] def test_setitem_fancy_scalar(self): f = self.frame @@ -1351,10 +1388,10 @@ def test_getitem_fancy_ints(self): def test_getitem_setitem_fancy_exceptions(self): ix = self.frame.iloc - with assertRaisesRegexp(IndexingError, 'Too many indexers'): + with tm.assert_raises_regex(IndexingError, 'Too many indexers'): ix[:, :, :] - with assertRaises(IndexingError): + with pytest.raises(IndexingError): ix[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self): @@ -1394,17 +1431,17 @@ def test_getitem_setitem_float_labels(self): result = df.loc[1.5:4] expected = df.reindex([1.5, 2, 3, 4]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 4) + assert len(result) == 4 result = df.loc[4:5] expected = df.reindex([4, 5]) # reindex with int assert_frame_equal(result, expected, check_index_type=False) - self.assertEqual(len(result), 2) + assert len(result) == 2 result = df.loc[4:5] expected = df.reindex([4.0, 5.0]) # reindex with float assert_frame_equal(result, expected) - self.assertEqual(len(result), 2) + assert len(result) == 2 # loc_float changes this to work properly result = df.loc[1:2] @@ -1413,63 +1450,63 @@ def test_getitem_setitem_float_labels(self): df.loc[1:2] = 0 result = df[1:2] - self.assertTrue((result == 0).all().all()) + assert (result == 0).all().all() # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) df = DataFrame(np.random.randn(5, 5), index=index) # positional slicing only via iloc! - self.assertRaises(TypeError, lambda: df.iloc[1.0:5]) + pytest.raises(TypeError, lambda: df.iloc[1.0:5]) result = df.iloc[4:5] expected = df.reindex([5.0]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 1) + assert len(result) == 1 cp = df.copy() def f(): cp.iloc[1.0:5] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): result = cp.iloc[1.0:5] == 0 # noqa - self.assertRaises(TypeError, f) - self.assertTrue(result.values.all()) - self.assertTrue((cp.iloc[0:1] == df.iloc[0:1]).values.all()) + pytest.raises(TypeError, f) + assert result.values.all() + assert (cp.iloc[0:1] == df.iloc[0:1]).values.all() cp = df.copy() cp.iloc[4:5] = 0 - self.assertTrue((cp.iloc[4:5] == 0).values.all()) - self.assertTrue((cp.iloc[0:4] == df.iloc[0:4]).values.all()) + assert (cp.iloc[4:5] == 0).values.all() + assert (cp.iloc[0:4] == df.iloc[0:4]).values.all() # float slicing result = df.loc[1.0:5] expected = df assert_frame_equal(result, expected) - self.assertEqual(len(result), 5) + assert len(result) == 5 result = df.loc[1.1:5] expected = df.reindex([2.5, 3.5, 4.5, 5.0]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 4) + assert len(result) == 4 result = df.loc[4.51:5] expected = df.reindex([5.0]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 1) + assert len(result) == 1 result = df.loc[1.0:5.0] expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0]) assert_frame_equal(result, expected) - self.assertEqual(len(result), 5) + assert len(result) == 5 cp = df.copy() cp.loc[1.0:5.0] = 0 result = cp.loc[1.0:5.0] - self.assertTrue((result == 0).values.all()) + assert (result == 0).values.all() def test_setitem_single_column_mixed(self): df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'], @@ -1491,21 +1528,20 @@ def test_setitem_single_column_mixed_datetime(self): assert_series_equal(result, expected) # set an allowable datetime64 type - from pandas import tslib - df.loc['b', 'timestamp'] = tslib.iNaT - self.assertTrue(isnull(df.loc['b', 'timestamp'])) + df.loc['b', 'timestamp'] = iNaT + assert isna(df.loc['b', 'timestamp']) # allow this syntax df.loc['c', 'timestamp'] = nan - self.assertTrue(isnull(df.loc['c', 'timestamp'])) + assert isna(df.loc['c', 'timestamp']) # allow this syntax df.loc['d', :] = nan - self.assertTrue(isnull(df.loc['c', :]).all() == False) # noqa + assert not isna(df.loc['c', :]).all() # as of GH 3216 this will now work! # try to set with a list like item - # self.assertRaises( + # pytest.raises( # Exception, df.loc.__setitem__, ('d', 'timestamp'), [nan]) def test_setitem_frame(self): @@ -1610,11 +1646,11 @@ def test_getitem_setitem_ix_bool_keyerror(self): # #2199 df = DataFrame({'a': [1, 2, 3]}) - self.assertRaises(KeyError, df.loc.__getitem__, False) - self.assertRaises(KeyError, df.loc.__getitem__, True) + pytest.raises(KeyError, df.loc.__getitem__, False) + pytest.raises(KeyError, df.loc.__getitem__, True) - self.assertRaises(KeyError, df.loc.__setitem__, False, 0) - self.assertRaises(KeyError, df.loc.__setitem__, True, 0) + pytest.raises(KeyError, df.loc.__setitem__, False, 0) + pytest.raises(KeyError, df.loc.__setitem__, True, 0) def test_getitem_list_duplicates(self): # #1943 @@ -1622,7 +1658,7 @@ def test_getitem_list_duplicates(self): df.columns.name = 'foo' result = df[['B', 'C']] - self.assertEqual(result.columns.name, 'foo') + assert result.columns.name == 'foo' expected = df.iloc[:, 2:] assert_frame_equal(result, expected) @@ -1630,15 +1666,19 @@ def test_getitem_list_duplicates(self): def test_get_value(self): for idx in self.frame.index: for col in self.frame.columns: - result = self.frame.get_value(idx, col) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.frame.get_value(idx, col) expected = self.frame[col][idx] - self.assertEqual(result, expected) + assert result == expected def test_lookup(self): def alt(df, rows, cols, dtype): result = [] for r, c in zip(rows, cols): - result.append(df.get_value(r, c)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result.append(df.get_value(r, c)) return np.array(result, dtype=dtype) def testit(df): @@ -1658,46 +1698,62 @@ def testit(df): df['mask'] = df.lookup(df.index, 'mask_' + df['label']) exp_mask = alt(df, df.index, 'mask_' + df['label'], dtype=np.bool_) tm.assert_series_equal(df['mask'], pd.Series(exp_mask, name='mask')) - self.assertEqual(df['mask'].dtype, np.bool_) + assert df['mask'].dtype == np.bool_ - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): self.frame.lookup(['xyz'], ['A']) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): self.frame.lookup([self.frame.index[0]], ['xyz']) - with tm.assertRaisesRegexp(ValueError, 'same size'): + with tm.assert_raises_regex(ValueError, 'same size'): self.frame.lookup(['a', 'b', 'c'], ['a']) def test_set_value(self): for idx in self.frame.index: for col in self.frame.columns: - self.frame.set_value(idx, col, 1) - self.assertEqual(self.frame[col][idx], 1) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.frame.set_value(idx, col, 1) + assert self.frame[col][idx] == 1 def test_set_value_resize(self): - res = self.frame.set_value('foobar', 'B', 0) - self.assertIs(res, self.frame) - self.assertEqual(res.index[-1], 'foobar') - self.assertEqual(res.get_value('foobar', 'B'), 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = self.frame.set_value('foobar', 'B', 0) + assert res is self.frame + assert res.index[-1] == 'foobar' + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert res.get_value('foobar', 'B') == 0 self.frame.loc['foobar', 'qux'] = 0 - self.assertEqual(self.frame.get_value('foobar', 'qux'), 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert self.frame.get_value('foobar', 'qux') == 0 res = self.frame.copy() - res3 = res.set_value('foobar', 'baz', 'sam') - self.assertEqual(res3['baz'].dtype, np.object_) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res3 = res.set_value('foobar', 'baz', 'sam') + assert res3['baz'].dtype == np.object_ res = self.frame.copy() - res3 = res.set_value('foobar', 'baz', True) - self.assertEqual(res3['baz'].dtype, np.object_) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res3 = res.set_value('foobar', 'baz', True) + assert res3['baz'].dtype == np.object_ res = self.frame.copy() - res3 = res.set_value('foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['baz'])) - self.assertTrue(isnull(res3['baz'].drop(['foobar'])).all()) - self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res3 = res.set_value('foobar', 'baz', 5) + assert is_float_dtype(res3['baz']) + assert isna(res3['baz'].drop(['foobar'])).all() + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): df_orig = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC')) @@ -1705,44 +1761,48 @@ def test_set_value_with_index_dtype_change(self): # this is actually ambiguous as the 2 is interpreted as a positional # so column is not created df = df_orig.copy() - df.set_value('C', 2, 1.0) - self.assertEqual(list(df.index), list(df_orig.index) + ['C']) - # self.assertEqual(list(df.columns), list(df_orig.columns) + [2]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df.set_value('C', 2, 1.0) + assert list(df.index) == list(df_orig.index) + ['C'] + # assert list(df.columns) == list(df_orig.columns) + [2] df = df_orig.copy() df.loc['C', 2] = 1.0 - self.assertEqual(list(df.index), list(df_orig.index) + ['C']) - # self.assertEqual(list(df.columns), list(df_orig.columns) + [2]) + assert list(df.index) == list(df_orig.index) + ['C'] + # assert list(df.columns) == list(df_orig.columns) + [2] # create both new df = df_orig.copy() - df.set_value('C', 'D', 1.0) - self.assertEqual(list(df.index), list(df_orig.index) + ['C']) - self.assertEqual(list(df.columns), list(df_orig.columns) + ['D']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df.set_value('C', 'D', 1.0) + assert list(df.index) == list(df_orig.index) + ['C'] + assert list(df.columns) == list(df_orig.columns) + ['D'] df = df_orig.copy() df.loc['C', 'D'] = 1.0 - self.assertEqual(list(df.index), list(df_orig.index) + ['C']) - self.assertEqual(list(df.columns), list(df_orig.columns) + ['D']) + assert list(df.index) == list(df_orig.index) + ['C'] + assert list(df.columns) == list(df_orig.columns) + ['D'] def test_get_set_value_no_partial_indexing(self): # partial w/ MultiIndex raise exception index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)]) df = DataFrame(index=index, columns=lrange(4)) - self.assertRaises(KeyError, df.get_value, 0, 1) - # self.assertRaises(KeyError, df.set_value, 0, 1, 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pytest.raises(KeyError, df.get_value, 0, 1) def test_single_element_ix_dont_upcast(self): self.frame['E'] = 1 - self.assertTrue(issubclass(self.frame['E'].dtype.type, - (int, np.integer))) + assert issubclass(self.frame['E'].dtype.type, (int, np.integer)) with catch_warnings(record=True): result = self.frame.ix[self.frame.index[5], 'E'] - self.assertTrue(is_integer(result)) + assert is_integer(result) result = self.frame.loc[self.frame.index[5], 'E'] - self.assertTrue(is_integer(result)) + assert is_integer(result) # GH 11617 df = pd.DataFrame(dict(a=[1.23])) @@ -1750,9 +1810,9 @@ def test_single_element_ix_dont_upcast(self): with catch_warnings(record=True): result = df.ix[0, "b"] - self.assertTrue(is_integer(result)) + assert is_integer(result) result = df.loc[0, "b"] - self.assertTrue(is_integer(result)) + assert is_integer(result) expected = Series([666], [0], name='b') with catch_warnings(record=True): @@ -1761,13 +1821,9 @@ def test_single_element_ix_dont_upcast(self): result = df.loc[[0], "b"] assert_series_equal(result, expected) - def test_irow(self): + def test_iloc_row(self): df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2)) - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - df.irow(1) - result = df.iloc[1] exp = df.loc[2] assert_series_equal(result, exp) @@ -1785,7 +1841,7 @@ def test_irow(self): # setting it makes it raise/warn def f(): result[2] = 0. - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) exp_col = df[2].copy() exp_col[4:8] = 0. assert_series_equal(df[2], exp_col) @@ -1795,14 +1851,10 @@ def f(): expected = df.reindex(df.index[[1, 2, 4, 6]]) assert_frame_equal(result, expected) - def test_icol(self): + def test_iloc_col(self): df = DataFrame(np.random.randn(4, 10), columns=lrange(0, 20, 2)) - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - df.icol(1) - result = df.iloc[:, 1] exp = df.loc[:, 2] assert_series_equal(result, exp) @@ -1820,16 +1872,15 @@ def test_icol(self): # and that we are setting a copy def f(): result[8] = 0. - self.assertRaises(com.SettingWithCopyError, f) - self.assertTrue((df[8] == 0).all()) + pytest.raises(com.SettingWithCopyError, f) + assert (df[8] == 0).all() # list of integers result = df.iloc[:, [1, 2, 4, 6]] expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) assert_frame_equal(result, expected) - def test_irow_icol_duplicates(self): - # 10711, deprecated + def test_iloc_duplicates(self): df = DataFrame(np.random.rand(3, 3), columns=list('ABC'), index=list('aab')) @@ -1837,14 +1888,14 @@ def test_irow_icol_duplicates(self): result = df.iloc[0] with catch_warnings(record=True): result2 = df.ix[0] - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) assert_series_equal(result, result2) with catch_warnings(record=True): result = df.T.iloc[:, 0] result2 = df.T.ix[:, 0] - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) assert_series_equal(result, result2) @@ -1874,22 +1925,18 @@ def test_irow_icol_duplicates(self): expected = df.take([0], axis=1) assert_frame_equal(result, expected) - def test_icol_sparse_propegate_fill_value(self): - from pandas.sparse.api import SparseDataFrame + def test_iloc_sparse_propegate_fill_value(self): + from pandas.core.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) - self.assertTrue(len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values)) - - def test_iget_value(self): - # 10711 deprecated + assert len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values) - with tm.assert_produces_warning(FutureWarning): - self.frame.iget_value(0, 0) + def test_iat(self): for i, row in enumerate(self.frame.index): for j, col in enumerate(self.frame.columns): result = self.frame.iat[i, j] expected = self.frame.at[row, col] - self.assertEqual(result, expected) + assert result == expected def test_nested_exception(self): # Ignore the strange way of triggering the problem @@ -1905,7 +1952,7 @@ def test_nested_exception(self): try: repr(df) except Exception as e: - self.assertNotEqual(type(e), UnboundLocalError) + assert type(e) != UnboundLocalError def test_reindex_methods(self): df = pd.DataFrame({'x': list(range(5))}) @@ -1920,9 +1967,13 @@ def test_reindex_methods(self): actual = df.reindex_like(df, method=method, tolerance=0) assert_frame_equal(df, actual) + actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) + assert_frame_equal(df, actual) actual = df.reindex(target, method=method, tolerance=1) assert_frame_equal(expected, actual) + actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1]) + assert_frame_equal(expected, actual) e2 = expected[::-1] actual = df.reindex(target[::-1], method=method) @@ -1943,20 +1994,25 @@ def test_reindex_methods(self): actual = df.reindex(target, method='nearest', tolerance=0.2) assert_frame_equal(expected, actual) + expected = pd.DataFrame({'x': [0, np.nan, 1, np.nan]}, index=target) + actual = df.reindex(target, method='nearest', + tolerance=[0.5, 0.01, 0.4, 0.1]) + assert_frame_equal(expected, actual) + def test_reindex_frame_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) result = df.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(result['B'].dtype, np.dtype('M8[ns]')) - mask = com.isnull(result)['B'] - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) + mask = com.isna(result)['B'] + assert mask[-5:].all() + assert not mask[:-5].any() def test_set_dataframe_column_ns_dtype(self): x = DataFrame([datetime.now(), datetime.now()]) - self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) + assert x[0].dtype == np.dtype('M8[ns]') def test_non_monotonic_reindex_methods(self): dr = pd.date_range('2013-08-01', periods=6, freq='B') @@ -1965,11 +2021,10 @@ def test_non_monotonic_reindex_methods(self): df_rev = pd.DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list('A')) # index is not monotonic increasing or decreasing - self.assertRaises(ValueError, df_rev.reindex, df.index, method='pad') - self.assertRaises(ValueError, df_rev.reindex, df.index, method='ffill') - self.assertRaises(ValueError, df_rev.reindex, df.index, method='bfill') - self.assertRaises(ValueError, df_rev.reindex, - df.index, method='nearest') + pytest.raises(ValueError, df_rev.reindex, df.index, method='pad') + pytest.raises(ValueError, df_rev.reindex, df.index, method='ffill') + pytest.raises(ValueError, df_rev.reindex, df.index, method='bfill') + pytest.raises(ValueError, df_rev.reindex, df.index, method='nearest') def test_reindex_level(self): from itertools import permutations @@ -2111,13 +2166,13 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self): assert_series_equal(df['dates'], column) def test_setitem_datetime_coercion(self): - # GH 1048 + # gh-1048 df = pd.DataFrame({'c': [pd.Timestamp('2010-10-01')] * 3}) df.loc[0:1, 'c'] = np.datetime64('2008-08-08') - self.assertEqual(pd.Timestamp('2008-08-08'), df.loc[0, 'c']) - self.assertEqual(pd.Timestamp('2008-08-08'), df.loc[1, 'c']) + assert pd.Timestamp('2008-08-08') == df.loc[0, 'c'] + assert pd.Timestamp('2008-08-08') == df.loc[1, 'c'] df.loc[2, 'c'] = date(2005, 5, 5) - self.assertEqual(pd.Timestamp('2005-05-05'), df.loc[2, 'c']) + assert pd.Timestamp('2005-05-05') == df.loc[2, 'c'] def test_setitem_datetimelike_with_inference(self): # GH 7592 @@ -2155,14 +2210,14 @@ def test_at_time_between_time_datetimeindex(self): expected2 = df.iloc[ainds] assert_frame_equal(result, expected) assert_frame_equal(result, expected2) - self.assertEqual(len(result), 4) + assert len(result) == 4 result = df.between_time(bkey.start, bkey.stop) expected = df.loc[bkey] expected2 = df.iloc[binds] assert_frame_equal(result, expected) assert_frame_equal(result, expected2) - self.assertEqual(len(result), 12) + assert len(result) == 12 result = df.copy() result.loc[akey] = 0 @@ -2193,9 +2248,9 @@ def test_xs(self): xs = self.frame.xs(idx) for item, value in compat.iteritems(xs): if np.isnan(value): - self.assertTrue(np.isnan(self.frame[item][idx])) + assert np.isnan(self.frame[item][idx]) else: - self.assertEqual(value, self.frame[item][idx]) + assert value == self.frame[item][idx] # mixed-type xs test_data = { @@ -2204,11 +2259,11 @@ def test_xs(self): } frame = DataFrame(test_data) xs = frame.xs('1') - self.assertEqual(xs.dtype, np.object_) - self.assertEqual(xs['A'], 1) - self.assertEqual(xs['B'], '1') + assert xs.dtype == np.object_ + assert xs['A'] == 1 + assert xs['B'] == '1' - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): self.tsframe.xs(self.tsframe.index[0] - BDay()) # xs get column @@ -2219,7 +2274,7 @@ def test_xs(self): # view is returned if possible series = self.frame.xs('A', axis=1) series[:] = 5 - self.assertTrue((expected == 5).all()) + assert (expected == 5).all() def test_xs_corner(self): # pathological mixed-type reordering case @@ -2269,7 +2324,7 @@ def test_xs_view(self): index=lrange(4), columns=lrange(5)) dm.xs(2)[:] = 10 - self.assertTrue((dm.xs(2) == 10).all()) + assert (dm.xs(2) == 10).all() def test_index_namedtuple(self): from collections import namedtuple @@ -2282,10 +2337,10 @@ def test_index_namedtuple(self): with catch_warnings(record=True): result = df.ix[IndexType("foo", "bar")]["A"] - self.assertEqual(result, 1) + assert result == 1 result = df.loc[IndexType("foo", "bar")]["A"] - self.assertEqual(result, 1) + assert result == 1 def test_boolean_indexing(self): idx = lrange(3) @@ -2305,7 +2360,7 @@ def test_boolean_indexing(self): df1[df1 > 2.0 * df2] = -1 assert_frame_equal(df1, expected) - with assertRaisesRegexp(ValueError, 'Item wrong length'): + with tm.assert_raises_regex(ValueError, 'Item wrong length'): df1[df1.index[:-1] > 2] = -1 def test_boolean_indexing_mixed(self): @@ -2336,7 +2391,8 @@ def test_boolean_indexing_mixed(self): assert_frame_equal(df2, expected) df['foo'] = 'test' - with tm.assertRaisesRegexp(TypeError, 'boolean setting on mixed-type'): + with tm.assert_raises_regex(TypeError, 'boolean setting ' + 'on mixed-type'): df[df > 0.3] = 1 def test_where(self): @@ -2349,8 +2405,8 @@ def is_ok(s): return (issubclass(s.dtype.type, (np.integer, np.floating)) and s.dtype != 'uint8') - return DataFrame(dict([(c, s + 1) if is_ok(s) else (c, s) - for c, s in compat.iteritems(df)])) + return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s) + for c, s in compat.iteritems(df))) def _check_get(df, cond, check_dtypes=True): other1 = _safe_add(df) @@ -2364,7 +2420,7 @@ def _check_get(df, cond, check_dtypes=True): # dtypes if check_dtypes: - self.assertTrue((rs.dtypes == df.dtypes).all()) + assert (rs.dtypes == df.dtypes).all() # check getting for df in [default_frame, self.mixed_frame, @@ -2373,9 +2429,9 @@ def _check_get(df, cond, check_dtypes=True): _check_get(df, cond) # upcasting case (GH # 2794) - df = DataFrame(dict([(c, Series([1] * 3, dtype=c)) - for c in ['int64', 'int32', - 'float32', 'float64']])) + df = DataFrame(dict((c, Series([1] * 3, dtype=c)) + for c in ['float32', 'float64', + 'int32', 'int64'])) df.iloc[1, :] = 0 result = df.where(df >= 0).get_dtype_counts() @@ -2413,7 +2469,7 @@ def _check_align(df, cond, other, check_dtypes=True): # can't check dtype when other is an ndarray if check_dtypes and not isinstance(other, np.ndarray): - self.assertTrue((rs.dtypes == df.dtypes).all()) + assert (rs.dtypes == df.dtypes).all() for df in [self.mixed_frame, self.mixed_float, self.mixed_int]: @@ -2427,21 +2483,21 @@ def _check_align(df, cond, other, check_dtypes=True): # integers are upcast, so don't check the dtypes cond = df > 0 - check_dtypes = all([not issubclass(s.type, np.integer) - for s in df.dtypes]) + check_dtypes = all(not issubclass(s.type, np.integer) + for s in df.dtypes) _check_align(df, cond, np.nan, check_dtypes=check_dtypes) # invalid conditions df = default_frame err1 = (df + 1).values[0:2, :] - self.assertRaises(ValueError, df.where, cond, err1) + pytest.raises(ValueError, df.where, cond, err1) err2 = cond.iloc[:2, :].values other1 = _safe_add(df) - self.assertRaises(ValueError, df.where, err2, other1) + pytest.raises(ValueError, df.where, err2, other1) - self.assertRaises(ValueError, df.mask, True) - self.assertRaises(ValueError, df.mask, 0) + pytest.raises(ValueError, df.mask, True) + pytest.raises(ValueError, df.mask, 0) # where inplace def _check_set(df, cond, check_dtypes=True): @@ -2457,7 +2513,7 @@ def _check_set(df, cond, check_dtypes=True): for k, v in compat.iteritems(df.dtypes): if issubclass(v.type, np.integer) and not cond[k].all(): v = np.dtype('float64') - self.assertEqual(dfi[k].dtype, v) + assert dfi[k].dtype == v for df in [default_frame, self.mixed_frame, self.mixed_float, self.mixed_int]: @@ -2479,6 +2535,96 @@ def _check_set(df, cond, check_dtypes=True): expected = df[df['a'] == 1].reindex(df.index) assert_frame_equal(result, expected) + def test_where_array_like(self): + # see gh-15414 + klasses = [list, tuple, np.array] + + df = DataFrame({'a': [1, 2, 3]}) + cond = [[False], [True], [True]] + expected = DataFrame({'a': [np.nan, 2, 3]}) + + for klass in klasses: + result = df.where(klass(cond)) + assert_frame_equal(result, expected) + + df['b'] = 2 + expected['b'] = [2, np.nan, 2] + cond = [[False, True], [True, False], [True, True]] + + for klass in klasses: + result = df.where(klass(cond)) + assert_frame_equal(result, expected) + + def test_where_invalid_input(self): + # see gh-15414: only boolean arrays accepted + df = DataFrame({'a': [1, 2, 3]}) + msg = "Boolean array expected for the condition" + + conds = [ + [[1], [0], [1]], + Series([[2], [5], [7]]), + DataFrame({'a': [2, 5, 7]}), + [["True"], ["False"], ["True"]], + [[Timestamp("2017-01-01")], + [pd.NaT], [Timestamp("2017-01-02")]] + ] + + for cond in conds: + with tm.assert_raises_regex(ValueError, msg): + df.where(cond) + + df['b'] = 2 + conds = [ + [[0, 1], [1, 0], [1, 1]], + Series([[0, 2], [5, 0], [4, 7]]), + [["False", "True"], ["True", "False"], + ["True", "True"]], + DataFrame({'a': [2, 5, 7], 'b': [4, 8, 9]}), + [[pd.NaT, Timestamp("2017-01-01")], + [Timestamp("2017-01-02"), pd.NaT], + [Timestamp("2017-01-03"), Timestamp("2017-01-03")]] + ] + + for cond in conds: + with tm.assert_raises_regex(ValueError, msg): + df.where(cond) + + def test_where_dataframe_col_match(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + cond = DataFrame([[True, False, True], [False, False, True]]) + + result = df.where(cond) + expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]]) + tm.assert_frame_equal(result, expected) + + # this *does* align, though has no matching columns + cond.columns = ["a", "b", "c"] + result = df.where(cond) + expected = DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_where_ndframe_align(self): + msg = "Array conditional must be same shape as self" + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + + cond = [True] + with tm.assert_raises_regex(ValueError, msg): + df.where(cond) + + expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + cond = np.array([False, True, False, True]) + with tm.assert_raises_regex(ValueError, msg): + df.where(cond) + + expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + def test_where_bug(self): # GH 2793 @@ -2515,7 +2661,7 @@ def test_where_bug(self): # GH7506 a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]}) b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]}) - do_not_replace = b.isnull() | (a > b) + do_not_replace = b.isna() | (a > b) expected = a.copy() expected[~do_not_replace] = b @@ -2525,7 +2671,7 @@ def test_where_bug(self): a = DataFrame({0: [4, 6], 1: [1, 0]}) b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]}) - do_not_replace = b.isnull() | (a > b) + do_not_replace = b.isna() | (a > b) expected = a.copy() expected[~do_not_replace] = b @@ -2558,9 +2704,10 @@ def test_where_none(self): # GH 7656 df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, { 'A': np.nan, 'B': 'Test', 'C': np.nan}]) - expected = df.where(~isnull(df), None) - with tm.assertRaisesRegexp(TypeError, 'boolean setting on mixed-type'): - df.where(~isnull(df), None, inplace=True) + expected = df.where(~isna(df), None) + with tm.assert_raises_regex(TypeError, 'boolean setting ' + 'on mixed-type'): + df.where(~isna(df), None, inplace=True) def test_where_align(self): @@ -2574,10 +2721,10 @@ def create(): # series df = create() expected = df.fillna(df.mean()) - result = df.where(pd.notnull(df), df.mean(), axis='columns') + result = df.where(pd.notna(df), df.mean(), axis='columns') assert_frame_equal(result, expected) - df.where(pd.notnull(df), df.mean(), inplace=True, axis='columns') + df.where(pd.notna(df), df.mean(), inplace=True, axis='columns') assert_frame_equal(df, expected) df = create().fillna(0) @@ -2590,7 +2737,7 @@ def create(): # frame df = create() expected = df.fillna(1) - result = df.where(pd.notnull(df), DataFrame( + result = df.where(pd.notna(df), DataFrame( 1, index=df.index, columns=df.columns)) assert_frame_equal(result, expected) @@ -2637,7 +2784,7 @@ def test_where_axis(self): result.where(mask, s, axis='index', inplace=True) assert_frame_equal(result, expected) - expected = DataFrame([[0, np.nan], [0, np.nan]], dtype='float64') + expected = DataFrame([[0, np.nan], [0, np.nan]]) result = df.where(mask, s, axis='columns') assert_frame_equal(result, expected) @@ -2648,17 +2795,18 @@ def test_where_axis(self): assert_frame_equal(result, expected) # Multiple dtypes (=> multiple Blocks) - df = pd.concat([DataFrame(np.random.randn(10, 2)), - DataFrame(np.random.randint(0, 10, size=(10, 2)))], - ignore_index=True, axis=1) + df = pd.concat([ + DataFrame(np.random.randn(10, 2)), + DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype='int64')], + ignore_index=True, axis=1) mask = DataFrame(False, columns=df.columns, index=df.index) s1 = Series(1, index=df.columns) s2 = Series(2, index=df.index) result = df.where(mask, s1, axis='columns') expected = DataFrame(1.0, columns=df.columns, index=df.index) - expected[2] = expected[2].astype(int) - expected[3] = expected[3].astype(int) + expected[2] = expected[2].astype('int64') + expected[3] = expected[3].astype('int64') assert_frame_equal(result, expected) result = df.copy() @@ -2667,8 +2815,8 @@ def test_where_axis(self): result = df.where(mask, s2, axis='index') expected = DataFrame(2.0, columns=df.columns, index=df.index) - expected[2] = expected[2].astype(int) - expected[3] = expected[3].astype(int) + expected[2] = expected[2].astype('int64') + expected[3] = expected[3].astype('int64') assert_frame_equal(result, expected) result = df.copy() @@ -2817,7 +2965,7 @@ def test_type_error_multiindex(self): dg = df.pivot_table(index='i', columns='c', values=['x', 'y']) - with assertRaisesRegexp(TypeError, "is an invalid key"): + with tm.assert_raises_regex(TypeError, "is an invalid key"): str(dg[:, 0]) index = Index(range(2), name='i') @@ -2837,9 +2985,9 @@ def test_type_error_multiindex(self): assert_series_equal(result, expected) -class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): +class TestDataFrameIndexingDatetimeWithTZ(TestData): - def setUp(self): + def setup_method(self, method): self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), name='foo') self.dr = date_range('20130110', periods=3) @@ -2863,16 +3011,15 @@ def test_setitem(self): # are copies) b1 = df._data.blocks[1] b2 = df._data.blocks[2] - self.assertTrue(b1.values.equals(b2.values)) - self.assertFalse(id(b1.values.values.base) == - id(b2.values.values.base)) + assert b1.values.equals(b2.values) + assert id(b1.values.values.base) != id(b2.values.values.base) # with nan df2 = df.copy() df2.iloc[1, 1] = pd.NaT df2.iloc[1, 2] = pd.NaT result = df2['B'] - assert_series_equal(notnull(result), Series( + assert_series_equal(notna(result), Series( [True, False, True], name='B')) assert_series_equal(df2.dtypes, df.dtypes) @@ -2883,7 +3030,7 @@ def test_set_reset(self): # set/reset df = DataFrame({'A': [0, 1, 2]}, index=idx) result = df.reset_index() - self.assertTrue(result['foo'].dtype, 'M8[ns, US/Eastern') + assert result['foo'].dtype, 'M8[ns, US/Eastern' df = result.set_index('foo') tm.assert_index_equal(df.index, idx) @@ -2896,9 +3043,9 @@ def test_transpose(self): assert_frame_equal(result, expected) -class TestDataFrameIndexingUInt64(tm.TestCase, TestData): +class TestDataFrameIndexingUInt64(TestData): - def setUp(self): + def setup_method(self, method): self.ir = Index(np.arange(3), dtype=np.uint64) self.idx = Index([2**63, 2**63 + 5, 2**63 + 10], name='foo') @@ -2924,7 +3071,7 @@ def test_setitem(self): df2.iloc[1, 1] = pd.NaT df2.iloc[1, 2] = pd.NaT result = df2['B'] - assert_series_equal(notnull(result), Series( + assert_series_equal(notna(result), Series( [True, False, True], name='B')) assert_series_equal(df2.dtypes, Series([np.dtype('uint64'), np.dtype('O'), np.dtype('O')], @@ -2937,7 +3084,7 @@ def test_set_reset(self): # set/reset df = DataFrame({'A': [0, 1, 2]}, index=idx) result = df.reset_index() - self.assertEqual(result['foo'].dtype, np.dtype('uint64')) + assert result['foo'].dtype == np.dtype('uint64') df = result.set_index('foo') tm.assert_index_equal(df.index, idx) @@ -2948,3 +3095,372 @@ def test_transpose(self): expected = DataFrame(self.df.values.T) expected.index = ['A', 'B'] assert_frame_equal(result, expected) + + +class TestDataFrameIndexingCategorical(object): + + def test_assignment(self): + # assignment + df = DataFrame({'value': np.array( + np.random.randint(0, 10000, 100), dtype='int32')}) + labels = Categorical(["{0} - {1}".format(i, i + 499) + for i in range(0, 10000, 500)]) + + df = df.sort_values(by=['value'], ascending=True) + s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) + d = s.values + df['D'] = d + str(df) + + result = df.dtypes + expected = Series( + [np.dtype('int32'), CategoricalDtype(categories=labels, + ordered=False)], + index=['value', 'D']) + tm.assert_series_equal(result, expected) + + df['E'] = s + str(df) + + result = df.dtypes + expected = Series([np.dtype('int32'), + CategoricalDtype(categories=labels, ordered=False), + CategoricalDtype(categories=labels, ordered=False)], + index=['value', 'D', 'E']) + tm.assert_series_equal(result, expected) + + result1 = df['D'] + result2 = df['E'] + tm.assert_categorical_equal(result1._data._block.values, d) + + # sorting + s.name = 'E' + tm.assert_series_equal(result2.sort_index(), s.sort_index()) + + cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + df = DataFrame(Series(cat)) + + def test_assigning_ops(self): + # systematically test the assigning operations: + # for all slicing ops: + # for value in categories and value not in categories: + + # - assign a single value -> exp_single_cats_value + + # - assign a complete row (mixed values) -> exp_single_row + + # assign multiple rows (mixed values) (-> array) -> exp_multi_row + + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col + + cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], + categories=["a", "b"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 1, 1, 1, 1, 1, 1] + orig = DataFrame({"cats": cats, "values": values}, index=idx) + + # the expected values + # changed single row + cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], + categories=["a", "b"]) + idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values1 = [1, 1, 2, 1, 1, 1, 1] + exp_single_row = DataFrame({"cats": cats1, + "values": values1}, index=idx1) + + # changed multiple rows + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], + categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = DataFrame({"cats": cats2, + "values": values2}, index=idx2) + + # changed part of the cats column + cats3 = Categorical( + ["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx3 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values3 = [1, 1, 1, 1, 1, 1, 1] + exp_parts_cats_col = DataFrame({"cats": cats3, + "values": values3}, index=idx3) + + # changed single value in cats col + cats4 = Categorical( + ["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx4 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values4 = [1, 1, 1, 1, 1, 1, 1] + exp_single_cats_value = DataFrame({"cats": cats4, + "values": values4}, index=idx4) + + # iloc + # ############### + # - assign a single value -> exp_single_cats_value + df = orig.copy() + df.iloc[2, 0] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + df = orig.copy() + df.iloc[df.index == "j", 0] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + def f(): + df = orig.copy() + df.iloc[2, 0] = "c" + + pytest.raises(ValueError, f) + + # - assign a complete row (mixed values) -> exp_single_row + df = orig.copy() + df.iloc[2, :] = ["b", 2] + tm.assert_frame_equal(df, exp_single_row) + + # - assign a complete row (mixed values) not in categories set + def f(): + df = orig.copy() + df.iloc[2, :] = ["c", 2] + + pytest.raises(ValueError, f) + + # - assign multiple rows (mixed values) -> exp_multi_row + df = orig.copy() + df.iloc[2:4, :] = [["b", 2], ["b", 2]] + tm.assert_frame_equal(df, exp_multi_row) + + def f(): + df = orig.copy() + df.iloc[2:4, :] = [["c", 2], ["c", 2]] + + pytest.raises(ValueError, f) + + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + df = orig.copy() + df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + # different categories -> not sure if this should fail or pass + df = orig.copy() + df.iloc[2:4, 0] = Categorical(list('bb'), categories=list('abc')) + + with pytest.raises(ValueError): + # different values + df = orig.copy() + df.iloc[2:4, 0] = Categorical(list('cc'), categories=list('abc')) + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col + df = orig.copy() + df.iloc[2:4, 0] = ["b", "b"] + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + df.iloc[2:4, 0] = ["c", "c"] + + # loc + # ############## + # - assign a single value -> exp_single_cats_value + df = orig.copy() + df.loc["j", "cats"] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + df = orig.copy() + df.loc[df.index == "j", "cats"] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + def f(): + df = orig.copy() + df.loc["j", "cats"] = "c" + + pytest.raises(ValueError, f) + + # - assign a complete row (mixed values) -> exp_single_row + df = orig.copy() + df.loc["j", :] = ["b", 2] + tm.assert_frame_equal(df, exp_single_row) + + # - assign a complete row (mixed values) not in categories set + def f(): + df = orig.copy() + df.loc["j", :] = ["c", 2] + + pytest.raises(ValueError, f) + + # - assign multiple rows (mixed values) -> exp_multi_row + df = orig.copy() + df.loc["j":"k", :] = [["b", 2], ["b", 2]] + tm.assert_frame_equal(df, exp_multi_row) + + def f(): + df = orig.copy() + df.loc["j":"k", :] = [["c", 2], ["c", 2]] + + pytest.raises(ValueError, f) + + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + df = orig.copy() + df.loc["j":"k", "cats"] = Categorical( + ["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + # different categories -> not sure if this should fail or pass + df = orig.copy() + df.loc["j":"k", "cats"] = Categorical( + ["b", "b"], categories=["a", "b", "c"]) + + with pytest.raises(ValueError): + # different values + df = orig.copy() + df.loc["j":"k", "cats"] = Categorical( + ["c", "c"], categories=["a", "b", "c"]) + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col + df = orig.copy() + df.loc["j":"k", "cats"] = ["b", "b"] + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + df.loc["j":"k", "cats"] = ["c", "c"] + + # loc + # ############## + # - assign a single value -> exp_single_cats_value + df = orig.copy() + df.loc["j", df.columns[0]] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + df = orig.copy() + df.loc[df.index == "j", df.columns[0]] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + def f(): + df = orig.copy() + df.loc["j", df.columns[0]] = "c" + + pytest.raises(ValueError, f) + + # - assign a complete row (mixed values) -> exp_single_row + df = orig.copy() + df.loc["j", :] = ["b", 2] + tm.assert_frame_equal(df, exp_single_row) + + # - assign a complete row (mixed values) not in categories set + def f(): + df = orig.copy() + df.loc["j", :] = ["c", 2] + + pytest.raises(ValueError, f) + + # - assign multiple rows (mixed values) -> exp_multi_row + df = orig.copy() + df.loc["j":"k", :] = [["b", 2], ["b", 2]] + tm.assert_frame_equal(df, exp_multi_row) + + def f(): + df = orig.copy() + df.loc["j":"k", :] = [["c", 2], ["c", 2]] + + pytest.raises(ValueError, f) + + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + df = orig.copy() + df.loc["j":"k", df.columns[0]] = Categorical( + ["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + # different categories -> not sure if this should fail or pass + df = orig.copy() + df.loc["j":"k", df.columns[0]] = Categorical( + ["b", "b"], categories=["a", "b", "c"]) + + with pytest.raises(ValueError): + # different values + df = orig.copy() + df.loc["j":"k", df.columns[0]] = Categorical( + ["c", "c"], categories=["a", "b", "c"]) + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col + df = orig.copy() + df.loc["j":"k", df.columns[0]] = ["b", "b"] + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + df.loc["j":"k", df.columns[0]] = ["c", "c"] + + # iat + df = orig.copy() + df.iat[2, 0] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + def f(): + df = orig.copy() + df.iat[2, 0] = "c" + + pytest.raises(ValueError, f) + + # at + # - assign a single value -> exp_single_cats_value + df = orig.copy() + df.at["j", "cats"] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + def f(): + df = orig.copy() + df.at["j", "cats"] = "c" + + pytest.raises(ValueError, f) + + # fancy indexing + catsf = Categorical(["a", "a", "c", "c", "a", "a", "a"], + categories=["a", "b", "c"]) + idxf = Index(["h", "i", "j", "k", "l", "m", "n"]) + valuesf = [1, 1, 3, 3, 1, 1, 1] + df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf) + + exp_fancy = exp_multi_row.copy() + exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True) + + df[df["cats"] == "c"] = ["b", 2] + # category c is kept in .categories + tm.assert_frame_equal(df, exp_fancy) + + # set_value + df = orig.copy() + df.at["j", "cats"] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + def f(): + df = orig.copy() + df.at["j", "cats"] = "c" + + pytest.raises(ValueError, f) + + # Assigning a Category to parts of a int/... column uses the values of + # the Catgorical + df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")}) + exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")}) + df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp) + + def test_functions_no_warnings(self): + df = DataFrame({'value': np.random.randint(0, 100, 20)}) + labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] + with tm.assert_produces_warning(False): + df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, + labels=labels) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py new file mode 100644 index 0000000000000..ccdba6df2521a --- /dev/null +++ b/pandas/tests/frame/test_join.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- + +import pytest +import numpy as np + +from pandas import DataFrame, Index, PeriodIndex +from pandas.tests.frame.common import TestData +import pandas.util.testing as tm + + +@pytest.fixture +def frame_with_period_index(): + return DataFrame( + data=np.arange(20).reshape(4, 5), + columns=list('abcde'), + index=PeriodIndex(start='2000', freq='A', periods=4)) + + +@pytest.fixture +def frame(): + return TestData().frame + + +@pytest.fixture +def left(): + return DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right(): + return DataFrame({'b': [300, 100, 200]}, index=[3, 1, 2]) + + +@pytest.mark.parametrize( + "how, sort, expected", + [('inner', False, DataFrame({'a': [20, 10], + 'b': [200, 100]}, + index=[2, 1])), + ('inner', True, DataFrame({'a': [10, 20], + 'b': [100, 200]}, + index=[1, 2])), + ('left', False, DataFrame({'a': [20, 10, 0], + 'b': [200, 100, np.nan]}, + index=[2, 1, 0])), + ('left', True, DataFrame({'a': [0, 10, 20], + 'b': [np.nan, 100, 200]}, + index=[0, 1, 2])), + ('right', False, DataFrame({'a': [np.nan, 10, 20], + 'b': [300, 100, 200]}, + index=[3, 1, 2])), + ('right', True, DataFrame({'a': [10, 20, np.nan], + 'b': [100, 200, 300]}, + index=[1, 2, 3])), + ('outer', False, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3])), + ('outer', True, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3]))]) +def test_join(left, right, how, sort, expected): + + result = left.join(right, how=how, sort=sort) + tm.assert_frame_equal(result, expected) + + +def test_join_index(frame): + # left / right + + f = frame.loc[frame.index[:10], ['A', 'B']] + f2 = frame.loc[frame.index[5:], ['C', 'D']].iloc[::-1] + + joined = f.join(f2) + tm.assert_index_equal(f.index, joined.index) + expected_columns = Index(['A', 'B', 'C', 'D']) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how='left') + tm.assert_index_equal(joined.index, f.index) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how='right') + tm.assert_index_equal(joined.index, f2.index) + tm.assert_index_equal(joined.columns, expected_columns) + + # inner + + joined = f.join(f2, how='inner') + tm.assert_index_equal(joined.index, f.index[5:10]) + tm.assert_index_equal(joined.columns, expected_columns) + + # outer + + joined = f.join(f2, how='outer') + tm.assert_index_equal(joined.index, frame.index.sort_values()) + tm.assert_index_equal(joined.columns, expected_columns) + + tm.assert_raises_regex( + ValueError, 'join method', f.join, f2, how='foo') + + # corner case - overlapping columns + for how in ('outer', 'left', 'inner'): + with tm.assert_raises_regex(ValueError, 'columns overlap but ' + 'no suffix'): + frame.join(frame, how=how) + + +def test_join_index_more(frame): + af = frame.loc[:, ['A', 'B']] + bf = frame.loc[::2, ['C', 'D']] + + expected = af.copy() + expected['C'] = frame['C'][::2] + expected['D'] = frame['D'][::2] + + result = af.join(bf) + tm.assert_frame_equal(result, expected) + + result = af.join(bf, how='right') + tm.assert_frame_equal(result, expected[::2]) + + result = bf.join(af, how='right') + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + +def test_join_index_series(frame): + df = frame.copy() + s = df.pop(frame.columns[-1]) + joined = df.join(s) + + # TODO should this check_names ? + tm.assert_frame_equal(joined, frame, check_names=False) + + s.name = None + tm.assert_raises_regex(ValueError, 'must have a name', df.join, s) + + +def test_join_overlap(frame): + df1 = frame.loc[:, ['A', 'B', 'C']] + df2 = frame.loc[:, ['B', 'C', 'D']] + + joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2') + df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1') + df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2') + + no_overlap = frame.loc[:, ['A', 'D']] + expected = df1_suf.join(df2_suf).join(no_overlap) + + # column order not necessarily sorted + tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) + + +def test_join_period_index(frame_with_period_index): + other = frame_with_period_index.rename( + columns=lambda x: '{key}{key}'.format(key=x)) + + joined_values = np.concatenate( + [frame_with_period_index.values] * 2, axis=1) + + joined_cols = frame_with_period_index.columns.append(other.columns) + + joined = frame_with_period_index.join(other) + expected = DataFrame( + data=joined_values, + columns=joined_cols, + index=frame_with_period_index.index) + + tm.assert_frame_equal(joined, expected) + + +def test_join_left_sequence_non_unique_index(): + # https://github.com/pandas-dev/pandas/issues/19607 + df1 = DataFrame({'a': [0, 10, 20]}, index=[1, 2, 3]) + df2 = DataFrame({'b': [100, 200, 300]}, index=[4, 3, 2]) + df3 = DataFrame({'c': [400, 500, 600]}, index=[2, 2, 4]) + + joined = df1.join([df2, df3], how='left') + + expected = DataFrame({ + 'a': [0, 10, 10, 20], + 'b': [np.nan, 300, 300, 200], + 'c': [np.nan, 400, 500, np.nan] + }, index=[1, 2, 2, 3]) + + tm.assert_frame_equal(joined, expected) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py deleted file mode 100644 index 674202980807a..0000000000000 --- a/pandas/tests/frame/test_misc_api.py +++ /dev/null @@ -1,483 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import print_function -# pylint: disable-msg=W0612,E1101 -from copy import deepcopy -import sys -from distutils.version import LooseVersion - -from pandas.compat import range, lrange -from pandas import compat - -from numpy.random import randn -import numpy as np - -from pandas import DataFrame, Series -import pandas as pd - -from pandas.util.testing import (assert_almost_equal, - assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) - -import pandas.util.testing as tm - -from pandas.tests.frame.common import TestData - - -class SharedWithSparse(object): - - def test_copy_index_name_checking(self): - # don't want to be able to modify the index stored elsewhere after - # making a copy - for attr in ('index', 'columns'): - ind = getattr(self.frame, attr) - ind.name = None - cp = self.frame.copy() - getattr(cp, attr).name = 'foo' - self.assertIsNone(getattr(self.frame, attr).name) - - def test_getitem_pop_assign_name(self): - s = self.frame['A'] - self.assertEqual(s.name, 'A') - - s = self.frame.pop('A') - self.assertEqual(s.name, 'A') - - s = self.frame.loc[:, 'B'] - self.assertEqual(s.name, 'B') - - s2 = s.loc[:] - self.assertEqual(s2.name, 'B') - - def test_get_value(self): - for idx in self.frame.index: - for col in self.frame.columns: - result = self.frame.get_value(idx, col) - expected = self.frame[col][idx] - tm.assert_almost_equal(result, expected) - - def test_join_index(self): - # left / right - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2) - self.assert_index_equal(f.index, joined.index) - self.assertEqual(len(joined.columns), 4) - - joined = f.join(f2, how='left') - self.assert_index_equal(joined.index, f.index) - self.assertEqual(len(joined.columns), 4) - - joined = f.join(f2, how='right') - self.assert_index_equal(joined.index, f2.index) - self.assertEqual(len(joined.columns), 4) - - # inner - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2, how='inner') - self.assert_index_equal(joined.index, f.index.intersection(f2.index)) - self.assertEqual(len(joined.columns), 4) - - # outer - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2, how='outer') - self.assertTrue(tm.equalContents(self.frame.index, joined.index)) - self.assertEqual(len(joined.columns), 4) - - assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo') - - # corner case - overlapping columns - for how in ('outer', 'left', 'inner'): - with assertRaisesRegexp(ValueError, 'columns overlap but ' - 'no suffix'): - self.frame.join(self.frame, how=how) - - def test_join_index_more(self): - af = self.frame.loc[:, ['A', 'B']] - bf = self.frame.loc[::2, ['C', 'D']] - - expected = af.copy() - expected['C'] = self.frame['C'][::2] - expected['D'] = self.frame['D'][::2] - - result = af.join(bf) - assert_frame_equal(result, expected) - - result = af.join(bf, how='right') - assert_frame_equal(result, expected[::2]) - - result = bf.join(af, how='right') - assert_frame_equal(result, expected.loc[:, result.columns]) - - def test_join_index_series(self): - df = self.frame.copy() - s = df.pop(self.frame.columns[-1]) - joined = df.join(s) - - # TODO should this check_names ? - assert_frame_equal(joined, self.frame, check_names=False) - - s.name = None - assertRaisesRegexp(ValueError, 'must have a name', df.join, s) - - def test_join_overlap(self): - df1 = self.frame.loc[:, ['A', 'B', 'C']] - df2 = self.frame.loc[:, ['B', 'C', 'D']] - - joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2') - df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1') - df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2') - - no_overlap = self.frame.loc[:, ['A', 'D']] - expected = df1_suf.join(df2_suf).join(no_overlap) - - # column order not necessarily sorted - assert_frame_equal(joined, expected.loc[:, joined.columns]) - - def test_add_prefix_suffix(self): - with_prefix = self.frame.add_prefix('foo#') - expected = pd.Index(['foo#%s' % c for c in self.frame.columns]) - self.assert_index_equal(with_prefix.columns, expected) - - with_suffix = self.frame.add_suffix('#foo') - expected = pd.Index(['%s#foo' % c for c in self.frame.columns]) - self.assert_index_equal(with_suffix.columns, expected) - - -class TestDataFrameMisc(tm.TestCase, SharedWithSparse, TestData): - - klass = DataFrame - - def test_get_axis(self): - f = self.frame - self.assertEqual(f._get_axis_number(0), 0) - self.assertEqual(f._get_axis_number(1), 1) - self.assertEqual(f._get_axis_number('index'), 0) - self.assertEqual(f._get_axis_number('rows'), 0) - self.assertEqual(f._get_axis_number('columns'), 1) - - self.assertEqual(f._get_axis_name(0), 'index') - self.assertEqual(f._get_axis_name(1), 'columns') - self.assertEqual(f._get_axis_name('index'), 'index') - self.assertEqual(f._get_axis_name('rows'), 'index') - self.assertEqual(f._get_axis_name('columns'), 'columns') - - self.assertIs(f._get_axis(0), f.index) - self.assertIs(f._get_axis(1), f.columns) - - assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, 2) - assertRaisesRegexp(ValueError, 'No axis.*foo', f._get_axis_name, 'foo') - assertRaisesRegexp(ValueError, 'No axis.*None', f._get_axis_name, None) - assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, - None) - - def test_keys(self): - getkeys = self.frame.keys - self.assertIs(getkeys(), self.frame.columns) - - def test_column_contains_typeerror(self): - try: - self.frame.columns in self.frame - except TypeError: - pass - - def test_not_hashable(self): - df = pd.DataFrame([1]) - self.assertRaises(TypeError, hash, df) - self.assertRaises(TypeError, hash, self.empty) - - def test_new_empty_index(self): - df1 = DataFrame(randn(0, 3)) - df2 = DataFrame(randn(0, 3)) - df1.index.name = 'foo' - self.assertIsNone(df2.index.name) - - def test_array_interface(self): - with np.errstate(all='ignore'): - result = np.sqrt(self.frame) - tm.assertIsInstance(result, type(self.frame)) - self.assertIs(result.index, self.frame.index) - self.assertIs(result.columns, self.frame.columns) - - assert_frame_equal(result, self.frame.apply(np.sqrt)) - - def test_get_agg_axis(self): - cols = self.frame._get_agg_axis(0) - self.assertIs(cols, self.frame.columns) - - idx = self.frame._get_agg_axis(1) - self.assertIs(idx, self.frame.index) - - self.assertRaises(ValueError, self.frame._get_agg_axis, 2) - - def test_nonzero(self): - self.assertTrue(self.empty.empty) - - self.assertFalse(self.frame.empty) - self.assertFalse(self.mixed_frame.empty) - - # corner case - df = DataFrame({'A': [1., 2., 3.], - 'B': ['a', 'b', 'c']}, - index=np.arange(3)) - del df['A'] - self.assertFalse(df.empty) - - def test_iteritems(self): - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) - for k, v in compat.iteritems(df): - self.assertEqual(type(v), Series) - - def test_iter(self): - self.assertTrue(tm.equalContents(list(self.frame), self.frame.columns)) - - def test_iterrows(self): - for i, (k, v) in enumerate(self.frame.iterrows()): - exp = self.frame.xs(self.frame.index[i]) - assert_series_equal(v, exp) - - for i, (k, v) in enumerate(self.mixed_frame.iterrows()): - exp = self.mixed_frame.xs(self.mixed_frame.index[i]) - assert_series_equal(v, exp) - - def test_itertuples(self): - for i, tup in enumerate(self.frame.itertuples()): - s = Series(tup[1:]) - s.name = tup[0] - expected = self.frame.iloc[i, :].reset_index(drop=True) - assert_series_equal(s, expected) - - df = DataFrame({'floats': np.random.randn(5), - 'ints': lrange(5)}, columns=['floats', 'ints']) - - for tup in df.itertuples(index=False): - tm.assertIsInstance(tup[1], np.integer) - - df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) - dfaa = df[['a', 'a']] - self.assertEqual(list(dfaa.itertuples()), [ - (0, 1, 1), (1, 2, 2), (2, 3, 3)]) - - self.assertEqual(repr(list(df.itertuples(name=None))), - '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]') - - tup = next(df.itertuples(name='TestName')) - - # no support for field renaming in Python 2.6, regular tuples are - # returned - if sys.version >= LooseVersion('2.7'): - self.assertEqual(tup._fields, ('Index', 'a', 'b')) - self.assertEqual((tup.Index, tup.a, tup.b), tup) - self.assertEqual(type(tup).__name__, 'TestName') - - df.columns = ['def', 'return'] - tup2 = next(df.itertuples(name='TestName')) - self.assertEqual(tup2, (0, 1, 4)) - - if sys.version >= LooseVersion('2.7'): - self.assertEqual(tup2._fields, ('Index', '_1', '_2')) - - df3 = DataFrame(dict(('f' + str(i), [i]) for i in range(1024))) - # will raise SyntaxError if trying to create namedtuple - tup3 = next(df3.itertuples()) - self.assertFalse(hasattr(tup3, '_fields')) - self.assertIsInstance(tup3, tuple) - - def test_len(self): - self.assertEqual(len(self.frame), len(self.frame.index)) - - def test_as_matrix(self): - frame = self.frame - mat = frame.as_matrix() - - frameCols = frame.columns - for i, row in enumerate(mat): - for j, value in enumerate(row): - col = frameCols[j] - if np.isnan(value): - self.assertTrue(np.isnan(frame[col][i])) - else: - self.assertEqual(value, frame[col][i]) - - # mixed type - mat = self.mixed_frame.as_matrix(['foo', 'A']) - self.assertEqual(mat[0, 0], 'bar') - - df = DataFrame({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) - mat = df.as_matrix() - self.assertEqual(mat[0, 0], 1j) - - # single block corner case - mat = self.frame.as_matrix(['A', 'B']) - expected = self.frame.reindex(columns=['A', 'B']).values - assert_almost_equal(mat, expected) - - def test_values(self): - self.frame.values[:, 0] = 5. - self.assertTrue((self.frame.values[:, 0] == 5).all()) - - def test_deepcopy(self): - cp = deepcopy(self.frame) - series = cp['A'] - series[:] = 10 - for idx, value in compat.iteritems(series): - self.assertNotEqual(self.frame['A'][idx], value) - - # --------------------------------------------------------------------- - # Transposing - - def test_transpose(self): - frame = self.frame - dft = frame.T - for idx, series in compat.iteritems(dft): - for col, value in compat.iteritems(series): - if np.isnan(value): - self.assertTrue(np.isnan(frame[col][idx])) - else: - self.assertEqual(value, frame[col][idx]) - - # mixed type - index, data = tm.getMixedTypeDict() - mixed = DataFrame(data, index=index) - - mixed_T = mixed.T - for col, s in compat.iteritems(mixed_T): - self.assertEqual(s.dtype, np.object_) - - def test_transpose_get_view(self): - dft = self.frame.T - dft.values[:, 5:10] = 5 - - self.assertTrue((self.frame.values[5:10] == 5).all()) - - def test_swapaxes(self): - df = DataFrame(np.random.randn(10, 5)) - assert_frame_equal(df.T, df.swapaxes(0, 1)) - assert_frame_equal(df.T, df.swapaxes(1, 0)) - assert_frame_equal(df, df.swapaxes(0, 0)) - self.assertRaises(ValueError, df.swapaxes, 2, 5) - - def test_axis_aliases(self): - f = self.frame - - # reg name - expected = f.sum(axis=0) - result = f.sum(axis='index') - assert_series_equal(result, expected) - - expected = f.sum(axis=1) - result = f.sum(axis='columns') - assert_series_equal(result, expected) - - def test_more_asMatrix(self): - values = self.mixed_frame.as_matrix() - self.assertEqual(values.shape[1], len(self.mixed_frame.columns)) - - def test_repr_with_mi_nat(self): - df = DataFrame({'X': [1, 2]}, - index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) - res = repr(df) - exp = ' X\nNaT a 1\n2013-01-01 b 2' - self.assertEqual(res, exp) - - def test_iterkv_deprecation(self): - with tm.assert_produces_warning(FutureWarning): - self.mixed_float.iterkv() - - def test_iterkv_names(self): - for k, v in compat.iteritems(self.mixed_frame): - self.assertEqual(v.name, k) - - def test_series_put_names(self): - series = self.mixed_frame._series - for k, v in compat.iteritems(series): - self.assertEqual(v.name, k) - - def test_empty_nonzero(self): - df = DataFrame([1, 2, 3]) - self.assertFalse(df.empty) - df = pd.DataFrame(index=[1], columns=[1]) - self.assertFalse(df.empty) - df = DataFrame(index=['a', 'b'], columns=['c', 'd']).dropna() - self.assertTrue(df.empty) - self.assertTrue(df.T.empty) - empty_frames = [pd.DataFrame(), - pd.DataFrame(index=[1]), - pd.DataFrame(columns=[1]), - pd.DataFrame({1: []})] - for df in empty_frames: - self.assertTrue(df.empty) - self.assertTrue(df.T.empty) - - def test_inplace_return_self(self): - # re #1893 - - data = DataFrame({'a': ['foo', 'bar', 'baz', 'qux'], - 'b': [0, 0, 1, 1], - 'c': [1, 2, 3, 4]}) - - def _check_f(base, f): - result = f(base) - self.assertTrue(result is None) - - # -----DataFrame----- - - # set_index - f = lambda x: x.set_index('a', inplace=True) - _check_f(data.copy(), f) - - # reset_index - f = lambda x: x.reset_index(inplace=True) - _check_f(data.set_index('a'), f) - - # drop_duplicates - f = lambda x: x.drop_duplicates(inplace=True) - _check_f(data.copy(), f) - - # sort - f = lambda x: x.sort_values('b', inplace=True) - _check_f(data.copy(), f) - - # sort_index - f = lambda x: x.sort_index(inplace=True) - _check_f(data.copy(), f) - - # fillna - f = lambda x: x.fillna(0, inplace=True) - _check_f(data.copy(), f) - - # replace - f = lambda x: x.replace(1, 0, inplace=True) - _check_f(data.copy(), f) - - # rename - f = lambda x: x.rename({1: 'foo'}, inplace=True) - _check_f(data.copy(), f) - - # -----Series----- - d = data.copy()['c'] - - # reset_index - f = lambda x: x.reset_index(inplace=True, drop=True) - _check_f(data.set_index('a')['c'], f) - - # fillna - f = lambda x: x.fillna(0, inplace=True) - _check_f(d.copy(), f) - - # replace - f = lambda x: x.replace(1, 0, inplace=True) - _check_f(d.copy(), f) - - # rename - f = lambda x: x.rename({1: 'foo'}, inplace=True) - _check_f(d.copy(), f) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 80ea01d3a05aa..2e4e8b9582cf6 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -2,23 +2,32 @@ from __future__ import print_function +import pytest + from distutils.version import LooseVersion from numpy import nan, random import numpy as np from pandas.compat import lrange from pandas import (DataFrame, Series, Timestamp, - date_range) + date_range, Categorical) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm +import pandas.util._test_decorators as td from pandas.tests.frame.common import TestData, _check_mixed_float +try: + import scipy + _is_scipy_ge_0190 = (LooseVersion(scipy.__version__) >= + LooseVersion('0.19.0')) +except: + _is_scipy_ge_0190 = False + + def _skip_if_no_pchip(): try: from scipy.interpolate import pchip_interpolate # noqa @@ -27,7 +36,7 @@ def _skip_if_no_pchip(): pytest.skip('scipy.interpolate.pchip missing') -class TestDataFrameMissingData(tm.TestCase, TestData): +class TestDataFrameMissingData(TestData): def test_dropEmptyRows(self): N = len(self.frame.index) @@ -71,10 +80,10 @@ def test_dropIncompleteRows(self): samesize_frame = frame.dropna(subset=['bar']) assert_series_equal(frame['foo'], original) - self.assertTrue((frame['bar'] == 5).all()) + assert (frame['bar'] == 5).all() inp_frame2.dropna(subset=['bar'], inplace=True) - self.assert_index_equal(samesize_frame.index, self.frame.index) - self.assert_index_equal(inp_frame2.index, self.frame.index) + tm.assert_index_equal(samesize_frame.index, self.frame.index) + tm.assert_index_equal(inp_frame2.index, self.frame.index) def test_dropna(self): df = DataFrame(np.random.randn(6, 4)) @@ -132,7 +141,7 @@ def test_dropna(self): assert_frame_equal(dropped, expected) # bad input - self.assertRaises(ValueError, df.dropna, axis=3) + pytest.raises(ValueError, df.dropna, axis=3) def test_drop_and_dropna_caching(self): # tst that cacher updates @@ -151,10 +160,10 @@ def test_drop_and_dropna_caching(self): def test_dropna_corner(self): # bad input - self.assertRaises(ValueError, self.frame.dropna, how='foo') - self.assertRaises(TypeError, self.frame.dropna, how=None) + pytest.raises(ValueError, self.frame.dropna, how='foo') + pytest.raises(TypeError, self.frame.dropna, how=None) # non-existent column - 8303 - self.assertRaises(KeyError, self.frame.dropna, subset=['A', 'X']) + pytest.raises(KeyError, self.frame.dropna, subset=['A', 'X']) def test_dropna_multiple_axes(self): df = DataFrame([[1, np.nan, 2, 3], @@ -180,13 +189,12 @@ def test_fillna(self): tf.loc[tf.index[-5:], 'A'] = nan zero_filled = self.tsframe.fillna(0) - self.assertTrue((zero_filled.loc[zero_filled.index[:5], 'A'] == 0 - ).all()) + assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all() padded = self.tsframe.fillna(method='pad') - self.assertTrue(np.isnan(padded.loc[padded.index[:5], 'A']).all()) - self.assertTrue((padded.loc[padded.index[-5:], 'A'] == - padded.loc[padded.index[-5], 'A']).all()) + assert np.isnan(padded.loc[padded.index[:5], 'A']).all() + assert (padded.loc[padded.index[-5:], 'A'] == + padded.loc[padded.index[-5], 'A']).all() # mixed type mf = self.mixed_frame @@ -195,8 +203,8 @@ def test_fillna(self): result = self.mixed_frame.fillna(value=0) result = self.mixed_frame.fillna(method='pad') - self.assertRaises(ValueError, self.tsframe.fillna) - self.assertRaises(ValueError, self.tsframe.fillna, 5, method='ffill') + pytest.raises(ValueError, self.tsframe.fillna) + pytest.raises(ValueError, self.tsframe.fillna, 5, method='ffill') # mixed numeric (but no float16) mf = self.mixed_float.reindex(columns=['A', 'B', 'D']) @@ -250,6 +258,95 @@ def test_fillna(self): result = df.fillna(value={'Date': df['Date2']}) assert_frame_equal(result, expected) + # with timezone + # GH 15855 + df = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.NaT]}) + exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.Timestamp('2012-11-11 00:00:00+01:00')]}) + assert_frame_equal(df.fillna(method='pad'), exp) + + df = pd.DataFrame({'A': [pd.NaT, + pd.Timestamp('2012-11-11 00:00:00+01:00')]}) + exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.Timestamp('2012-11-11 00:00:00+01:00')]}) + assert_frame_equal(df.fillna(method='bfill'), exp) + + def test_na_actions_categorical(self): + + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + vals = ["a", "b", np.nan, "d"] + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3]) + vals2 = ["a", "b", "b", "d"] + df_exp_fill = DataFrame({"cats": cat2, "vals": vals2}) + cat3 = Categorical([1, 2, 3], categories=[1, 2, 3]) + vals3 = ["a", "b", np.nan] + df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3}) + cat4 = Categorical([1, 2], categories=[1, 2, 3]) + vals4 = ["a", "b"] + df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4}) + + # fillna + res = df.fillna(value={"cats": 3, "vals": "b"}) + tm.assert_frame_equal(res, df_exp_fill) + + with tm.assert_raises_regex(ValueError, "fill value must be " + "in categories"): + df.fillna(value={"cats": 4, "vals": "c"}) + + res = df.fillna(method='pad') + tm.assert_frame_equal(res, df_exp_fill) + + # dropna + res = df.dropna(subset=["cats"]) + tm.assert_frame_equal(res, df_exp_drop_cats) + + res = df.dropna() + tm.assert_frame_equal(res, df_exp_drop_all) + + # make sure that fillna takes missing values into account + c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) + df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]}) + + cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) + df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) + + res = df.fillna("a") + tm.assert_frame_equal(res, df_exp) + + def test_fillna_categorical_nan(self): + # GH 14021 + # np.nan should always be a valid filler + cat = Categorical([np.nan, 2, np.nan]) + val = Categorical([np.nan, np.nan, np.nan]) + df = DataFrame({"cats": cat, "vals": val}) + res = df.fillna(df.median()) + v_exp = [np.nan, np.nan, np.nan] + df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, + dtype='category') + tm.assert_frame_equal(res, df_exp) + + result = df.cats.fillna(np.nan) + tm.assert_series_equal(result, df.cats) + result = df.vals.fillna(np.nan) + tm.assert_series_equal(result, df.vals) + + idx = pd.DatetimeIndex(['2011-01-01 09:00', '2016-01-01 23:45', + '2011-01-01 09:00', pd.NaT, pd.NaT]) + df = DataFrame({'a': Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + + idx = pd.PeriodIndex(['2011-01', '2011-01', '2011-01', + pd.NaT, pd.NaT], freq='M') + df = DataFrame({'a': Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + + idx = pd.TimedeltaIndex(['1 days', '2 days', + '1 days', pd.NaT, pd.NaT]) + df = DataFrame({'a': Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + def test_fillna_downcast(self): # GH 15277 # infer int64 from float64 @@ -303,7 +400,7 @@ def test_fillna_datetime_columns(self): 'C': ['foo', 'bar', '?'], 'D': ['foo2', 'bar2', '?']}, index=date_range('20130110', periods=3)) - self.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df = pd.DataFrame({'A': [-1, -2, np.nan], 'B': [pd.Timestamp('2013-01-01'), @@ -318,7 +415,7 @@ def test_fillna_datetime_columns(self): 'C': ['foo', 'bar', '?'], 'D': ['foo2', 'bar2', '?']}, index=pd.date_range('20130110', periods=3)) - self.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_ffill(self): self.tsframe['A'][:5] = nan @@ -382,18 +479,21 @@ def test_fillna_inplace(self): df[3][-4:] = np.nan expected = df.fillna(value=0) - self.assertIsNot(expected, df) + assert expected is not df df.fillna(value=0, inplace=True) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) + + expected = df.fillna(value={0: 0}, inplace=True) + assert expected is None df[1][:4] = np.nan df[3][-4:] = np.nan expected = df.fillna(method='ffill') - self.assertIsNot(expected, df) + assert expected is not df df.fillna(method='ffill', inplace=True) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_fillna_dict_series(self): df = DataFrame({'a': [nan, 1, 2, nan, nan], @@ -416,7 +516,8 @@ def test_fillna_dict_series(self): assert_frame_equal(result, expected) # disable this for now - with assertRaisesRegexp(NotImplementedError, 'column by column'): + with tm.assert_raises_regex(NotImplementedError, + 'column by column'): df.fillna(df.max(1), axis=1) def test_fillna_dataframe(self): @@ -456,24 +557,23 @@ def test_fillna_columns(self): assert_frame_equal(result, expected) def test_fillna_invalid_method(self): - with assertRaisesRegexp(ValueError, 'ffil'): + with tm.assert_raises_regex(ValueError, 'ffil'): self.frame.fillna(method='ffil') def test_fillna_invalid_value(self): # list - self.assertRaises(TypeError, self.frame.fillna, [1, 2]) + pytest.raises(TypeError, self.frame.fillna, [1, 2]) # tuple - self.assertRaises(TypeError, self.frame.fillna, (1, 2)) + pytest.raises(TypeError, self.frame.fillna, (1, 2)) # frame with series - self.assertRaises(ValueError, self.frame.iloc[:, 0].fillna, - self.frame) + pytest.raises(TypeError, self.frame.iloc[:, 0].fillna, self.frame) def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] data = np.random.rand(20, 5) df = DataFrame(index=lrange(20), columns=cols, data=data) filled = df.fillna(method='ffill') - self.assertEqual(df.columns.tolist(), filled.columns.tolist()) + assert df.columns.tolist() == filled.columns.tolist() def test_fill_corner(self): mf = self.mixed_frame @@ -481,7 +581,7 @@ def test_fill_corner(self): mf.loc[mf.index[-10:], 'A'] = nan filled = self.mixed_frame.fillna(value=0) - self.assertTrue((filled.loc[filled.index[5:20], 'foo'] == 0).all()) + assert (filled.loc[filled.index[5:20], 'foo'] == 0).all() del self.mixed_frame['foo'] empty_float = self.frame.reindex(columns=[]) @@ -499,7 +599,7 @@ def test_fill_value_when_combine_const(self): assert_frame_equal(res, exp) -class TestDataFrameInterpolate(tm.TestCase, TestData): +class TestDataFrameInterpolate(TestData): def test_interp_basic(self): df = DataFrame({'A': [1, 2, np.nan, 4], @@ -524,7 +624,7 @@ def test_interp_bad_method(self): 'B': [1, 4, 9, np.nan], 'C': [1, 2, 3, 5], 'D': list('abcd')}) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.interpolate(method='not_a_method') def test_interp_combo(self): @@ -544,11 +644,11 @@ def test_interp_combo(self): def test_interp_nan_idx(self): df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]}) df = df.set_index('A') - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.interpolate(method='values') + @td.skip_if_no_scipy def test_interp_various(self): - tm._skip_if_no_scipy() df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) df = df.set_index('C') @@ -560,8 +660,15 @@ def test_interp_various(self): assert_frame_equal(result, expected) result = df.interpolate(method='cubic') - expected.A.loc[3] = 2.81621174 - expected.A.loc[13] = 5.64146581 + # GH #15662. + # new cubic and quadratic interpolation algorithms from scipy 0.19.0. + # previously `splmake` was used. See scipy/scipy#6710 + if _is_scipy_ge_0190: + expected.A.loc[3] = 2.81547781 + expected.A.loc[13] = 5.52964175 + else: + expected.A.loc[3] = 2.81621174 + expected.A.loc[13] = 5.64146581 assert_frame_equal(result, expected) result = df.interpolate(method='nearest') @@ -570,8 +677,12 @@ def test_interp_various(self): assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') - expected.A.loc[3] = 2.82533638 - expected.A.loc[13] = 6.02817974 + if _is_scipy_ge_0190: + expected.A.loc[3] = 2.82150771 + expected.A.loc[13] = 6.12648668 + else: + expected.A.loc[3] = 2.82533638 + expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected) result = df.interpolate(method='slinear') @@ -584,13 +695,8 @@ def test_interp_various(self): expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) - result = df.interpolate(method='quadratic') - expected.A.loc[3] = 2.82533638 - expected.A.loc[13] = 6.02817974 - assert_frame_equal(result, expected) - + @td.skip_if_no_scipy def test_interp_alt_scipy(self): - tm._skip_if_no_scipy() df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) result = df.interpolate(method='barycentric') @@ -612,7 +718,7 @@ def test_interp_alt_scipy(self): result = df.interpolate(method='pchip') expected.loc[2, 'A'] = 3 - if LooseVersion(scipy.__version__) >= '0.17.0': + if LooseVersion(scipy.__version__) >= LooseVersion('0.17.0'): expected.loc[5, 'A'] = 6.0 else: expected.loc[5, 'A'] = 6.125 @@ -633,8 +739,6 @@ def test_interp_rowwise(self): expected[4] = expected[4].astype(np.float64) assert_frame_equal(result, expected) - # scipy route - tm._skip_if_no_scipy() result = df.interpolate(axis=1, method='values') assert_frame_equal(result, expected) @@ -647,7 +751,10 @@ def test_rowwise_alt(self): 1: [1, 2, 3, 4, 3, 2, 1, 0, -1]}) df.interpolate(axis=0) - def test_interp_leading_nans(self): + @pytest.mark.parametrize("check_scipy", [ + False, pytest.param(True, marks=td.skip_if_no_scipy) + ]) + def test_interp_leading_nans(self, check_scipy): df = DataFrame({"A": [np.nan, np.nan, .5, .25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]}) result = df.interpolate() @@ -655,9 +762,9 @@ def test_interp_leading_nans(self): expected['B'].loc[3] = -3.75 assert_frame_equal(result, expected) - tm._skip_if_no_scipy() - result = df.interpolate(method='polynomial', order=1) - assert_frame_equal(result, expected) + if check_scipy: + result = df.interpolate(method='polynomial', order=1) + assert_frame_equal(result, expected) def test_interp_raise_on_only_mixed(self): df = DataFrame({'A': [1, 2, np.nan, 4], @@ -665,7 +772,7 @@ def test_interp_raise_on_only_mixed(self): 'C': [np.nan, 2, 5, 7], 'D': [np.nan, np.nan, 9, 9], 'E': [1, 2, 3, 4]}) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.interpolate(axis=1) def test_interp_inplace(self): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 6b4c56747c981..51ffe2966b4e5 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -1,15 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import print_function - +import pytest from pandas.compat import range, lrange import numpy as np +from pandas.compat import PY36 -from pandas import DataFrame, Series, Index +from pandas import DataFrame, Series, Index, MultiIndex -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_frame_equal import pandas.util.testing as tm @@ -19,7 +18,7 @@ # Column add, remove, delete. -class TestDataFrameMutateColumns(tm.TestCase, TestData): +class TestDataFrameMutateColumns(TestData): def test_assign(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) @@ -63,28 +62,62 @@ def test_assign_multiple(self): [3, 6, 9, 3, 6]], columns=list('ABCDE')) assert_frame_equal(result, expected) - def test_assign_alphabetical(self): + def test_assign_order(self): # GH 9818 df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) result = df.assign(D=df.A + df.B, C=df.A - df.B) - expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], - columns=list('ABCD')) + + if PY36: + expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], + columns=list('ABDC')) + else: + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], + columns=list('ABCD')) assert_frame_equal(result, expected) result = df.assign(C=df.A - df.B, D=df.A + df.B) + + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], + columns=list('ABCD')) + assert_frame_equal(result, expected) def test_assign_bad(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + # non-keyword argument - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.assign(lambda x: x.A) - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): df.assign(C=df.A, D=df.A + df.C) - with tm.assertRaises(KeyError): - df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C']) - with tm.assertRaises(KeyError): + + @pytest.mark.skipif(PY36, reason="""Issue #14207: valid for python + 3.6 and above""") + def test_assign_dependent_old_python(self): + df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + + # Key C does not exist at definition time of df + with pytest.raises(KeyError): + df.assign(C=lambda df: df.A, + D=lambda df: df['A'] + df['C']) + with pytest.raises(KeyError): df.assign(C=df.A, D=lambda x: x['A'] + x['C']) + @pytest.mark.skipif(not PY36, reason="""Issue #14207: not valid for + python 3.5 and below""") + def test_assign_dependent(self): + df = DataFrame({'A': [1, 2], 'B': [3, 4]}) + + result = df.assign(C=df.A, D=lambda x: x['A'] + x['C']) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], + columns=list('ABCD')) + assert_frame_equal(result, expected) + + result = df.assign(C=lambda df: df.A, + D=lambda df: df['A'] + df['C']) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], + columns=list('ABCD')) + assert_frame_equal(result, expected) + def test_insert_error_msmgs(self): # GH 7432 @@ -93,7 +126,7 @@ def test_insert_error_msmgs(self): s = DataFrame({'foo': ['a', 'b', 'c', 'a'], 'fiz': [ 'g', 'h', 'i', 'j']}).set_index('foo') msg = 'cannot reindex from a duplicate axis' - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df['newcol'] = s # GH 4107, more descriptive error message @@ -101,7 +134,7 @@ def test_insert_error_msmgs(self): columns=['a', 'b', 'c', 'd']) msg = 'incompatible index of inserted column with frame index' - with assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): df['gr'] = df.groupby(['b', 'c']).count() def test_insert_benchmark(self): @@ -121,38 +154,38 @@ def test_insert(self): columns=['c', 'b', 'a']) df.insert(0, 'foo', df['a']) - self.assert_index_equal(df.columns, Index(['foo', 'c', 'b', 'a'])) + tm.assert_index_equal(df.columns, Index(['foo', 'c', 'b', 'a'])) tm.assert_series_equal(df['a'], df['foo'], check_names=False) df.insert(2, 'bar', df['c']) - self.assert_index_equal(df.columns, - Index(['foo', 'c', 'bar', 'b', 'a'])) + tm.assert_index_equal(df.columns, + Index(['foo', 'c', 'bar', 'b', 'a'])) tm.assert_almost_equal(df['c'], df['bar'], check_names=False) # diff dtype # new item df['x'] = df['a'].astype('float32') - result = Series(dict(float64=5, float32=1)) - self.assertTrue((df.get_dtype_counts() == result).all()) + result = Series(dict(float32=1, float64=5)) + assert (df.get_dtype_counts().sort_index() == result).all() # replacing current (in different block) df['a'] = df['a'].astype('float32') - result = Series(dict(float64=4, float32=2)) - self.assertTrue((df.get_dtype_counts() == result).all()) + result = Series(dict(float32=2, float64=4)) + assert (df.get_dtype_counts().sort_index() == result).all() df['y'] = df['a'].astype('int32') - result = Series(dict(float64=4, float32=2, int32=1)) - self.assertTrue((df.get_dtype_counts() == result).all()) + result = Series(dict(float32=2, float64=4, int32=1)) + assert (df.get_dtype_counts().sort_index() == result).all() - with assertRaisesRegexp(ValueError, 'already exists'): + with tm.assert_raises_regex(ValueError, 'already exists'): df.insert(1, 'a', df['b']) - self.assertRaises(ValueError, df.insert, 1, 'c', df['b']) + pytest.raises(ValueError, df.insert, 1, 'c', df['b']) df.columns.name = 'some_name' # preserve columns name field df.insert(0, 'baz', df['c']) - self.assertEqual(df.columns.name, 'some_name') + assert df.columns.name == 'some_name' # GH 13522 df = DataFrame(index=['A', 'B', 'C']) @@ -163,21 +196,46 @@ def test_insert(self): def test_delitem(self): del self.frame['A'] - self.assertNotIn('A', self.frame) + assert 'A' not in self.frame + + def test_delitem_multiindex(self): + midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) + df = DataFrame(np.random.randn(4, 4), columns=midx) + assert len(df.columns) == 4 + assert ('A', ) in df.columns + assert 'A' in df.columns + + result = df['A'] + assert isinstance(result, DataFrame) + del df['A'] + + assert len(df.columns) == 2 + + # A still in the levels, BUT get a KeyError if trying + # to delete + assert ('A', ) not in df.columns + with pytest.raises(KeyError): + del df[('A',)] + + # behavior of dropped/deleted MultiIndex levels changed from + # GH 2770 to GH 19027: MultiIndex no longer '.__contains__' + # levels which are dropped/deleted + assert 'A' not in df.columns + with pytest.raises(KeyError): + del df['A'] def test_pop(self): self.frame.columns.name = 'baz' self.frame.pop('A') - self.assertNotIn('A', self.frame) + assert 'A' not in self.frame self.frame['foo'] = 'bar' self.frame.pop('foo') - self.assertNotIn('foo', self.frame) - # TODO self.assertEqual(self.frame.columns.name, 'baz') + assert 'foo' not in self.frame + # TODO assert self.frame.columns.name == 'baz' - # 10912 - # inplace ops cause caching issue + # gh-10912: inplace ops cause caching issue a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[ 'A', 'B', 'C'], index=['X', 'Y']) b = a.pop('B') @@ -186,23 +244,23 @@ def test_pop(self): # original frame expected = DataFrame([[1, 3], [4, 6]], columns=[ 'A', 'C'], index=['X', 'Y']) - assert_frame_equal(a, expected) + tm.assert_frame_equal(a, expected) # result expected = Series([2, 5], index=['X', 'Y'], name='B') + 1 - assert_series_equal(b, expected) + tm.assert_series_equal(b, expected) def test_pop_non_unique_cols(self): df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]}) df.columns = ["a", "b", "a"] res = df.pop("a") - self.assertEqual(type(res), DataFrame) - self.assertEqual(len(res), 2) - self.assertEqual(len(df.columns), 1) - self.assertTrue("b" in df.columns) - self.assertFalse("a" in df.columns) - self.assertEqual(len(df.index), 2) + assert type(res) == DataFrame + assert len(res) == 2 + assert len(df.columns) == 1 + assert "b" in df.columns + assert "a" not in df.columns + assert len(df.index) == 2 def test_insert_column_bug_4032(self): diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 4ad88a12a2625..0b32ec89d3909 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -2,22 +2,21 @@ from __future__ import print_function +import pytest import numpy as np from pandas.compat import lrange, u from pandas import DataFrame, Series, MultiIndex, date_range import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from pandas.tests.frame.common import TestData -class TestDataFrameNonuniqueIndexes(tm.TestCase, TestData): +class TestDataFrameNonuniqueIndexes(TestData): def test_column_dups_operations(self): @@ -52,7 +51,7 @@ def check(result, expected=None): [2, 1, 3, 5, 'bah']], columns=['foo', 'bar', 'foo', 'hello', 'string']) check(df, expected) - with assertRaisesRegexp(ValueError, 'Length of value'): + with tm.assert_raises_regex(ValueError, 'Length of value'): df.insert(0, 'AnotherColumn', range(len(df.index) - 1)) # insert same dtype @@ -87,7 +86,7 @@ def check(result, expected=None): check(df, expected) # consolidate - df = df.consolidate() + df = df._consolidate() expected = DataFrame([[1, 1, 'bah', 3], [1, 2, 'bah', 3], [2, 3, 'bah', 3]], columns=['foo', 'foo', 'string', 'foo2']) @@ -102,8 +101,8 @@ def check(result, expected=None): check(df, expected) # insert a dup - assertRaisesRegexp(ValueError, 'cannot insert', - df.insert, 2, 'new_col', 4.) + tm.assert_raises_regex(ValueError, 'cannot insert', + df.insert, 2, 'new_col', 4.) df.insert(2, 'new_col', 4., allow_duplicates=True) expected = DataFrame([[1, 1, 4., 5., 'bah', 3], [1, 2, 4., 5., 'bah', 3], @@ -152,18 +151,18 @@ def check(result, expected=None): df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=['x', 'x']) result = df.values expected = np.array([[1, 2.5], [3, 4.5]]) - self.assertTrue((result == expected).all().all()) + assert (result == expected).all().all() # rename, GH 4403 df4 = DataFrame( - {'TClose': [22.02], - 'RT': [0.0454], + {'RT': [0.0454], + 'TClose': [22.02], 'TExg': [0.0422]}, index=MultiIndex.from_tuples([(600809, 20130331)], names=['STK_ID', 'RPT_Date'])) - df5 = DataFrame({'STK_ID': [600809] * 3, - 'RPT_Date': [20120930, 20121231, 20130331], + df5 = DataFrame({'RPT_Date': [20120930, 20121231, 20130331], + 'STK_ID': [600809] * 3, 'STK_Name': [u('饡驦'), u('饡驦'), u('饡驦')], 'TClose': [38.05, 41.66, 30.01]}, index=MultiIndex.from_tuples( @@ -189,8 +188,8 @@ def check(result, expected=None): # reindex is invalid! df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]], columns=['bar', 'a', 'a']) - self.assertRaises(ValueError, df.reindex, columns=['bar']) - self.assertRaises(ValueError, df.reindex, columns=['bar', 'foo']) + pytest.raises(ValueError, df.reindex, columns=['bar']) + pytest.raises(ValueError, df.reindex, columns=['bar', 'foo']) # drop df = DataFrame([[1, 5, 7.], [1, 5, 7.], [1, 5, 7.]], @@ -215,9 +214,10 @@ def check(result, expected=None): for index in [df.index, pd.Index(list('edcba'))]: this_df = df.copy() expected_ser = pd.Series(index.values, index=this_df.index) - expected_df = DataFrame.from_items([('A', expected_ser), - ('B', this_df['B']), - ('A', expected_ser)]) + expected_df = DataFrame({'A': expected_ser, + 'B': this_df['B'], + 'A': expected_ser}, + columns=['A', 'B', 'A']) this_df['A'] = index check(this_df, expected_df) @@ -307,7 +307,7 @@ def check(result, expected=None): # boolean with the duplicate raises df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype='float64') - self.assertRaises(ValueError, lambda: df[df.A > 6]) + pytest.raises(ValueError, lambda: df[df.A > 6]) # dup aligining operations should work # GH 5185 @@ -324,7 +324,7 @@ def check(result, expected=None): columns=['A', 'A']) # not-comparing like-labelled - self.assertRaises(ValueError, lambda: df1 == df2) + pytest.raises(ValueError, lambda: df1 == df2) df1r = df1.reindex_like(df2) result = df1r == df2 @@ -411,7 +411,7 @@ def test_columns_with_dups(self): assert_frame_equal(df, expected) # this is an error because we cannot disambiguate the dup columns - self.assertRaises(Exception, lambda x: DataFrame( + pytest.raises(Exception, lambda x: DataFrame( [[1, 2, 'foo', 'bar']], columns=['a', 'a', 'a', 'a'])) # dups across blocks @@ -426,10 +426,10 @@ def test_columns_with_dups(self): columns=df_float.columns) df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) - self.assertEqual(len(df._data._blknos), len(df.columns)) - self.assertEqual(len(df._data._blklocs), len(df.columns)) + assert len(df._data._blknos) == len(df.columns) + assert len(df._data._blklocs) == len(df.columns) - # testing iget + # testing iloc for i in range(len(df.columns)): df.iloc[:, i] @@ -440,7 +440,7 @@ def test_columns_with_dups(self): xp.columns = ['A', 'A', 'B'] assert_frame_equal(rs, xp) - def test_as_matrix_duplicates(self): + def test_values_duplicates(self): df = DataFrame([[1, 2, 'a', 'b'], [1, 2, 'a', 'b']], columns=['one', 'one', 'two', 'two']) @@ -449,7 +449,7 @@ def test_as_matrix_duplicates(self): expected = np.array([[1, 2, 'a', 'b'], [1, 2, 'a', 'b']], dtype=object) - self.assertTrue(np.array_equal(result, expected)) + tm.assert_numpy_array_equal(result, expected) def test_set_value_by_index(self): # See gh-12344 diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index d6a3592446fd5..5df50f3d7835b 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import print_function - +from collections import deque from datetime import datetime import operator @@ -10,18 +10,17 @@ from numpy import nan, random import numpy as np -from pandas.compat import lrange +from pandas.compat import range from pandas import compat from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range) import pandas.core.common as com -import pandas.formats.printing as printing +import pandas.io.formats.printing as printing import pandas as pd from pandas.util.testing import (assert_numpy_array_equal, assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) + assert_frame_equal) import pandas.util.testing as tm @@ -29,7 +28,7 @@ _check_mixed_int) -class TestDataFrameOperators(tm.TestCase, TestData): +class TestDataFrameOperators(TestData): def test_operators(self): garbage = random.random(4) @@ -42,17 +41,17 @@ def test_operators(self): for idx, val in compat.iteritems(series): origVal = self.frame[col][idx] * 2 if not np.isnan(val): - self.assertEqual(val, origVal) + assert val == origVal else: - self.assertTrue(np.isnan(origVal)) + assert np.isnan(origVal) for col, series in compat.iteritems(seriesSum): for idx, val in compat.iteritems(series): origVal = self.frame[col][idx] + colSeries[col] if not np.isnan(val): - self.assertEqual(val, origVal) + assert val == origVal else: - self.assertTrue(np.isnan(origVal)) + assert np.isnan(origVal) added = self.frame2 + self.frame2 expected = self.frame2 * 2 @@ -69,7 +68,7 @@ def test_operators(self): DataFrame(index=[0], dtype=dtype), ] for df in frames: - self.assertTrue((df + df).equals(df)) + assert (df + df).equals(df) assert_frame_equal(df + df, df) def test_ops_np_scalar(self): @@ -119,12 +118,12 @@ def test_operators_boolean(self): def f(): DataFrame(1.0, index=[1], columns=['A']) | DataFrame( True, index=[1], columns=['A']) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): DataFrame('foo', index=[1], columns=['A']) | DataFrame( True, index=[1], columns=['A']) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_operators_none_as_na(self): df = DataFrame({"col1": [2, 5.0, 123, None], @@ -138,12 +137,12 @@ def test_operators_none_as_na(self): filled = df.fillna(np.nan) result = op(df, 3) expected = op(filled, 3).astype(object) - expected[com.isnull(expected)] = None + expected[com.isna(expected)] = None assert_frame_equal(result, expected) result = op(df, df) expected = op(filled, filled).astype(object) - expected[com.isnull(expected)] = None + expected[com.isna(expected)] = None assert_frame_equal(result, expected) result = op(df, df.fillna(7)) @@ -157,12 +156,12 @@ def test_comparison_invalid(self): def check(df, df2): for (x, y) in [(df, df2), (df2, df)]: - self.assertRaises(TypeError, lambda: x == y) - self.assertRaises(TypeError, lambda: x != y) - self.assertRaises(TypeError, lambda: x >= y) - self.assertRaises(TypeError, lambda: x > y) - self.assertRaises(TypeError, lambda: x < y) - self.assertRaises(TypeError, lambda: x <= y) + pytest.raises(TypeError, lambda: x == y) + pytest.raises(TypeError, lambda: x != y) + pytest.raises(TypeError, lambda: x >= y) + pytest.raises(TypeError, lambda: x > y) + pytest.raises(TypeError, lambda: x < y) + pytest.raises(TypeError, lambda: x <= y) # GH4968 # invalid date/int comparisons @@ -189,6 +188,7 @@ def test_timestamp_compare(self): df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + for left, right in ops.items(): left_f = getattr(operator, left) right_f = getattr(operator, right) @@ -203,90 +203,20 @@ def test_timestamp_compare(self): result = right_f(Timestamp('nat'), df) assert_frame_equal(result, expected) - def test_modulo(self): - # GH3590, modulo as ints - p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) - - # this is technically wrong as the integer portion is coerced to float - # ### - expected = DataFrame({'first': Series([0, 0, 0, 0], dtype='float64'), - 'second': Series([np.nan, np.nan, np.nan, 0])}) - result = p % p - assert_frame_equal(result, expected) - - # numpy has a slightly different (wrong) treatement - with np.errstate(all='ignore'): - arr = p.values % p.values - result2 = DataFrame(arr, index=p.index, - columns=p.columns, dtype='float64') - result2.iloc[0:3, 1] = np.nan - assert_frame_equal(result2, expected) - - result = p % 0 - expected = DataFrame(np.nan, index=p.index, columns=p.columns) - assert_frame_equal(result, expected) - - # numpy has a slightly different (wrong) treatement - with np.errstate(all='ignore'): - arr = p.values.astype('float64') % 0 - result2 = DataFrame(arr, index=p.index, columns=p.columns) - assert_frame_equal(result2, expected) - - # not commutative with series - p = DataFrame(np.random.randn(10, 5)) - s = p[0] - res = s % p - res2 = p % s - self.assertFalse(np.array_equal(res.fillna(0), res2.fillna(0))) - - def test_div(self): - - # integer div, but deal with the 0's (GH 9144) - p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) - result = p / p - - expected = DataFrame({'first': Series([1.0, 1.0, 1.0, 1.0]), - 'second': Series([nan, nan, nan, 1])}) - assert_frame_equal(result, expected) - - with np.errstate(all='ignore'): - arr = p.values.astype('float') / p.values - result2 = DataFrame(arr, index=p.index, - columns=p.columns) - assert_frame_equal(result2, expected) - - result = p / 0 - expected = DataFrame(np.inf, index=p.index, columns=p.columns) - expected.iloc[0:3, 1] = nan - assert_frame_equal(result, expected) - - # numpy has a slightly different (wrong) treatement - with np.errstate(all='ignore'): - arr = p.values.astype('float64') / 0 - result2 = DataFrame(arr, index=p.index, - columns=p.columns) - assert_frame_equal(result2, expected) - - p = DataFrame(np.random.randn(10, 5)) - s = p[0] - res = s / p - res2 = p / s - self.assertFalse(np.array_equal(res.fillna(0), res2.fillna(0))) - def test_logical_operators(self): def _check_bin_op(op): result = op(df1, df2) expected = DataFrame(op(df1.values, df2.values), index=df1.index, columns=df1.columns) - self.assertEqual(result.values.dtype, np.bool_) + assert result.values.dtype == np.bool_ assert_frame_equal(result, expected) def _check_unary_op(op): result = op(df1) expected = DataFrame(op(df1.values), index=df1.index, columns=df1.columns) - self.assertEqual(result.values.dtype, np.bool_) + assert result.values.dtype == np.bool_ assert_frame_equal(result, expected) df1 = {'a': {'a': True, 'b': False, 'c': False, 'd': True, 'e': True}, @@ -316,14 +246,12 @@ def _check_unary_op(op): # operator.neg is deprecated in numpy >= 1.9 _check_unary_op(operator.inv) - def test_logical_typeerror(self): - if not compat.PY3: - self.assertRaises(TypeError, self.frame.__eq__, 'foo') - self.assertRaises(TypeError, self.frame.__lt__, 'foo') - self.assertRaises(TypeError, self.frame.__gt__, 'foo') - self.assertRaises(TypeError, self.frame.__ne__, 'foo') - else: - pytest.skip('test_logical_typeerror not tested on PY3') + @pytest.mark.parametrize('op,res', [('__eq__', False), + ('__ne__', True)]) + def test_logical_typeerror_with_non_valid(self, op, res): + # we are comparing floats vs a string + result = getattr(self.frame, op)('foo') + assert bool(result.all().all()) is res def test_logical_with_nas(self): d = DataFrame({'a': [np.nan, False], 'b': [True, True]}) @@ -343,13 +271,50 @@ def test_logical_with_nas(self): expected = Series([True, True]) assert_series_equal(result, expected) - def test_neg(self): - # what to do? - assert_frame_equal(-self.frame, -1 * self.frame) + @pytest.mark.parametrize('df,expected', [ + (pd.DataFrame({'a': [-1, 1]}), pd.DataFrame({'a': [1, -1]})), + (pd.DataFrame({'a': [False, True]}), + pd.DataFrame({'a': [True, False]})), + (pd.DataFrame({'a': pd.Series(pd.to_timedelta([-1, 1]))}), + pd.DataFrame({'a': pd.Series(pd.to_timedelta([1, -1]))})) + ]) + def test_neg_numeric(self, df, expected): + assert_frame_equal(-df, expected) + assert_series_equal(-df['a'], expected['a']) + + @pytest.mark.parametrize('df', [ + pd.DataFrame({'a': ['a', 'b']}), + pd.DataFrame({'a': pd.to_datetime(['2017-01-22', '1970-01-01'])}), + ]) + def test_neg_raises(self, df): + with pytest.raises(TypeError): + (- df) + with pytest.raises(TypeError): + (- df['a']) def test_invert(self): assert_frame_equal(-(self.frame < 0), ~(self.frame < 0)) + @pytest.mark.parametrize('df', [ + pd.DataFrame({'a': [-1, 1]}), + pd.DataFrame({'a': [False, True]}), + pd.DataFrame({'a': pd.Series(pd.to_timedelta([-1, 1]))}), + ]) + def test_pos_numeric(self, df): + # GH 16073 + assert_frame_equal(+df, df) + assert_series_equal(+df['a'], df['a']) + + @pytest.mark.parametrize('df', [ + pd.DataFrame({'a': ['a', 'b']}), + pd.DataFrame({'a': pd.to_datetime(['2017-01-22', '1970-01-01'])}), + ]) + def test_pos_raises(self, df): + with pytest.raises(TypeError): + (+ df) + with pytest.raises(TypeError): + (+ df['a']) + def test_arith_flex_frame(self): ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] if not compat.PY3: @@ -423,10 +388,10 @@ def test_arith_flex_frame(self): # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) msg = "Unable to coerce to Series/DataFrame" - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): f(self.frame, ndim_5) - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): getattr(self.frame, op)(ndim_5) # res_add = self.frame.add(self.frame) @@ -448,11 +413,24 @@ def test_arith_flex_frame(self): result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) - with assertRaisesRegexp(NotImplementedError, 'fill_value'): + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], fill_value=3) - with assertRaisesRegexp(NotImplementedError, 'fill_value'): + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): self.frame.add(self.frame.iloc[0], axis='index', fill_value=3) + def test_arith_flex_zero_len_raises(self): + # GH#19522 passing fill_value to frame flex arith methods should + # raise even in the zero-length special cases + ser_len0 = pd.Series([]) + df_len0 = pd.DataFrame([], columns=['A', 'B']) + df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) + + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + df.add(ser_len0, fill_value='E') + + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + df_len0.sub(df['A'], axis=None, fill_value=3) + def test_binary_ops_align(self): # test aligning binary ops @@ -574,8 +552,8 @@ def _check_unaligned_frame(meth, op, df, other): assert_frame_equal(rs, xp) # DataFrame - self.assertTrue(df.eq(df).values.all()) - self.assertFalse(df.ne(df).values.any()) + assert df.eq(df).values.all() + assert not df.ne(df).values.any() for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: f = getattr(df, op) o = getattr(operator, op) @@ -589,7 +567,7 @@ def _check_unaligned_frame(meth, op, df, other): # NAs msg = "Unable to coerce to Series/DataFrame" assert_frame_equal(f(np.nan), o(df, np.nan)) - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): f(ndim_5) # Series @@ -635,17 +613,17 @@ def _test_seq(df, idx_ser, col_ser): # NA df.loc[0, 0] = np.nan rs = df.eq(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] rs = df.ne(df) - self.assertTrue(rs.loc[0, 0]) + assert rs.loc[0, 0] rs = df.gt(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] rs = df.lt(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] rs = df.ge(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] rs = df.le(df) - self.assertFalse(rs.loc[0, 0]) + assert not rs.loc[0, 0] # complex arr = np.array([np.nan, 1, 6, np.nan]) @@ -653,14 +631,14 @@ def _test_seq(df, idx_ser, col_ser): df = DataFrame({'a': arr}) df2 = DataFrame({'a': arr2}) rs = df.gt(df2) - self.assertFalse(rs.values.any()) + assert not rs.values.any() rs = df.ne(df2) - self.assertTrue(rs.values.all()) + assert rs.values.all() arr3 = np.array([2j, np.nan, None]) df3 = DataFrame({'a': arr3}) rs = df3.gt(2j) - self.assertFalse(rs.values.any()) + assert not rs.values.any() # corner, dtype=object df1 = DataFrame({'col': ['foo', np.nan, 'bar']}) @@ -669,22 +647,6 @@ def _test_seq(df, idx_ser, col_ser): exp = DataFrame({'col': [False, True, False]}) assert_frame_equal(result, exp) - def test_return_dtypes_bool_op_costant(self): - # GH15077 - df = DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]}) - const = 2 - - # not empty DataFrame - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - result = getattr(df, op)(const).get_dtype_counts() - self.assert_series_equal(result, Series([2], ['bool'])) - - # empty DataFrame - empty = df.iloc[:0] - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - result = getattr(empty, op)(const).get_dtype_counts() - self.assert_series_equal(result, Series([2], ['bool'])) - def test_dti_tz_convert_to_utc(self): base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz='UTC') @@ -751,6 +713,15 @@ def test_arith_non_pandas_object(self): added = DataFrame(df.values + val3, index=df.index, columns=df.columns) assert_frame_equal(df.add(val3), added) + @pytest.mark.parametrize('values', [[1, 2], (1, 2), np.array([1, 2]), + range(1, 3), deque([1, 2])]) + def test_arith_alignment_non_pandas_object(self, values): + # GH 17901 + df = DataFrame({'A': [1, 1], 'B': [1, 1]}) + expected = DataFrame({'A': [2, 2], 'B': [3, 3]}) + result = df + values + assert_frame_equal(result, expected) + def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) @@ -759,39 +730,38 @@ def test_combineFrame(self): added = self.frame + frame_copy - indexer = added['A'].valid().index + indexer = added['A'].dropna().index exp = (self.frame['A'] * 2).copy() - tm.assert_series_equal(added['A'].valid(), exp.loc[indexer]) + tm.assert_series_equal(added['A'].dropna(), exp.loc[indexer]) exp.loc[~exp.index.isin(indexer)] = np.nan tm.assert_series_equal(added['A'], exp.loc[added['A'].index]) - self.assertTrue( - np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()) + assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all() # assert(False) - self.assertTrue(np.isnan(added['D']).all()) + assert np.isnan(added['D']).all() self_added = self.frame + self.frame - self.assert_index_equal(self_added.index, self.frame.index) + tm.assert_index_equal(self_added.index, self.frame.index) added_rev = frame_copy + self.frame - self.assertTrue(np.isnan(added['D']).all()) - self.assertTrue(np.isnan(added_rev['D']).all()) + assert np.isnan(added['D']).all() + assert np.isnan(added_rev['D']).all() # corner cases # empty plus_empty = self.frame + self.empty - self.assertTrue(np.isnan(plus_empty.values).all()) + assert np.isnan(plus_empty.values).all() empty_plus = self.empty + self.frame - self.assertTrue(np.isnan(empty_plus.values).all()) + assert np.isnan(empty_plus.values).all() empty_empty = self.empty + self.empty - self.assertTrue(empty_empty.empty) + assert empty_empty.empty # out of order reverse = self.frame.reindex(columns=self.frame.columns[::-1]) @@ -831,12 +801,14 @@ def test_combineSeries(self): for key, s in compat.iteritems(self.frame): assert_series_equal(larger_added[key], s + series[key]) - self.assertIn('E', larger_added) - self.assertTrue(np.isnan(larger_added['E']).all()) + assert 'E' in larger_added + assert np.isnan(larger_added['E']).all() - # vs mix (upcast) as needed + # no upcast needed added = self.mixed_float + series - _check_mixed_float(added, dtype='float64') + _check_mixed_float(added) + + # vs mix (upcast) as needed added = self.mixed_float + series.astype('float32') _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float + series.astype('float16') @@ -857,22 +829,22 @@ def test_combineSeries(self): # 10890 # we no longer allow auto timeseries broadcasting - # and require explict broadcasting + # and require explicit broadcasting added = self.tsframe.add(ts, axis='index') for key, col in compat.iteritems(self.tsframe): result = col + ts assert_series_equal(added[key], result, check_names=False) - self.assertEqual(added[key].name, key) + assert added[key].name == key if col.name == ts.name: - self.assertEqual(result.name, 'A') + assert result.name == 'A' else: - self.assertTrue(result.name is None) + assert result.name is None smaller_frame = self.tsframe[:-5] smaller_added = smaller_frame.add(ts, axis='index') - self.assert_index_equal(smaller_added.index, self.tsframe.index) + tm.assert_index_equal(smaller_added.index, self.tsframe.index) smaller_ts = ts[:-5] smaller_added2 = self.tsframe.add(smaller_ts, axis='index') @@ -893,22 +865,22 @@ def test_combineSeries(self): # empty but with non-empty index frame = self.tsframe[:1].reindex(columns=[]) result = frame.mul(ts, axis='index') - self.assertEqual(len(result), len(ts)) + assert len(result) == len(ts) def test_combineFunc(self): result = self.frame * 2 - self.assert_numpy_array_equal(result.values, self.frame.values * 2) + tm.assert_numpy_array_equal(result.values, self.frame.values * 2) # vs mix result = self.mixed_float * 2 for c, s in compat.iteritems(result): - self.assert_numpy_array_equal( + tm.assert_numpy_array_equal( s.values, self.mixed_float[c].values * 2) _check_mixed_float(result, dtype=dict(C=None)) result = self.empty * 2 - self.assertIs(result.index, self.empty.index) - self.assertEqual(len(result.columns), 0) + assert result.index is self.empty.index + assert len(result.columns) == 0 def test_comparisons(self): df1 = tm.makeTimeDataFrame() @@ -919,21 +891,23 @@ def test_comparisons(self): def test_comp(func): result = func(df1, df2) - self.assert_numpy_array_equal(result.values, - func(df1.values, df2.values)) - with assertRaisesRegexp(ValueError, 'Wrong number of dimensions'): + tm.assert_numpy_array_equal(result.values, + func(df1.values, df2.values)) + with tm.assert_raises_regex(ValueError, + 'Wrong number of dimensions'): func(df1, ndim_5) result2 = func(self.simple, row) - self.assert_numpy_array_equal(result2.values, - func(self.simple.values, row.values)) + tm.assert_numpy_array_equal(result2.values, + func(self.simple.values, row.values)) result3 = func(self.frame, 0) - self.assert_numpy_array_equal(result3.values, - func(self.frame.values, 0)) + tm.assert_numpy_array_equal(result3.values, + func(self.frame.values, 0)) - with assertRaisesRegexp(ValueError, 'Can only compare ' - 'identically-labeled DataFrame'): + with tm.assert_raises_regex(ValueError, + 'Can only compare identically' + '-labeled DataFrame'): func(self.simple, self.simple[:2]) test_comp(operator.eq) @@ -950,23 +924,7 @@ def test_comparison_protected_from_errstate(self): expected = missing_df.values < 0 with np.errstate(invalid='raise'): result = (missing_df < 0).values - self.assert_numpy_array_equal(result, expected) - - def test_string_comparison(self): - df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) - mask_a = df.a > 1 - assert_frame_equal(df[mask_a], df.loc[1:1, :]) - assert_frame_equal(df[-mask_a], df.loc[0:0, :]) - - mask_b = df.b == "foo" - assert_frame_equal(df[mask_b], df.loc[0:0, :]) - assert_frame_equal(df[-mask_b], df.loc[1:1, :]) - - def test_float_none_comparison(self): - df = DataFrame(np.random.randn(8, 3), index=lrange(8), - columns=['A', 'B', 'C']) - - self.assertRaises(TypeError, df.__eq__, None) + tm.assert_numpy_array_equal(result, expected) def test_boolean_comparison(self): @@ -999,8 +957,8 @@ def test_boolean_comparison(self): result = df.values > b_r assert_numpy_array_equal(result, expected.values) - self.assertRaises(ValueError, df.__gt__, b_c) - self.assertRaises(ValueError, df.values.__gt__, b_c) + pytest.raises(ValueError, df.__gt__, b_c) + pytest.raises(ValueError, df.values.__gt__, b_c) # == expected = DataFrame([[False, False], [True, False], [False, False]]) @@ -1019,8 +977,8 @@ def test_boolean_comparison(self): result = df.values == b_r assert_numpy_array_equal(result, expected.values) - self.assertRaises(ValueError, lambda: df == b_c) - self.assertFalse(np.array_equal(df.values, b_c)) + pytest.raises(ValueError, lambda: df == b_c) + assert df.values.shape != b_c.shape # with alignment df = DataFrame(np.arange(6).reshape((3, 2)), @@ -1034,86 +992,14 @@ def test_boolean_comparison(self): result = df == tup assert_frame_equal(result, expected) - # not shape compatible - self.assertRaises(ValueError, lambda: df == (2, 2)) - self.assertRaises(ValueError, lambda: df == [2, 2]) - - def test_combineAdd(self): - - with tm.assert_produces_warning(FutureWarning): - # trivial - comb = self.frame.combineAdd(self.frame) - assert_frame_equal(comb, self.frame * 2) - - # more rigorous - a = DataFrame([[1., nan, nan, 2., nan]], - columns=np.arange(5)) - b = DataFrame([[2., 3., nan, 2., 6., nan]], - columns=np.arange(6)) - expected = DataFrame([[3., 3., nan, 4., 6., nan]], - columns=np.arange(6)) - - with tm.assert_produces_warning(FutureWarning): - result = a.combineAdd(b) - assert_frame_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result2 = a.T.combineAdd(b.T) - assert_frame_equal(result2, expected.T) - - expected2 = a.combine(b, operator.add, fill_value=0.) - assert_frame_equal(expected, expected2) - - # corner cases - with tm.assert_produces_warning(FutureWarning): - comb = self.frame.combineAdd(self.empty) - assert_frame_equal(comb, self.frame) - - with tm.assert_produces_warning(FutureWarning): - comb = self.empty.combineAdd(self.frame) - assert_frame_equal(comb, self.frame) - - # integer corner case - df1 = DataFrame({'x': [5]}) - df2 = DataFrame({'x': [1]}) - df3 = DataFrame({'x': [6]}) - - with tm.assert_produces_warning(FutureWarning): - comb = df1.combineAdd(df2) - assert_frame_equal(comb, df3) - - # mixed type GH2191 - df1 = DataFrame({'A': [1, 2], 'B': [3, 4]}) - df2 = DataFrame({'A': [1, 2], 'C': [5, 6]}) - with tm.assert_produces_warning(FutureWarning): - rs = df1.combineAdd(df2) - xp = DataFrame({'A': [2, 4], 'B': [3, 4.], 'C': [5, 6.]}) - assert_frame_equal(xp, rs) - - # TODO: test integer fill corner? - - def test_combineMult(self): - with tm.assert_produces_warning(FutureWarning): - # trivial - comb = self.frame.combineMult(self.frame) - - assert_frame_equal(comb, self.frame ** 2) - - # corner cases - comb = self.frame.combineMult(self.empty) - assert_frame_equal(comb, self.frame) - - comb = self.empty.combineMult(self.frame) - assert_frame_equal(comb, self.frame) - def test_combine_generic(self): df1 = self.frame df2 = self.frame.loc[self.frame.index[:-5], ['A', 'B', 'C']] combined = df1.combine(df2, np.add) combined2 = df2.combine(df1, np.add) - self.assertTrue(combined['D'].isnull().all()) - self.assertTrue(combined2['D'].isnull().all()) + assert combined['D'].isna().all() + assert combined2['D'].isna().all() chunk = combined.loc[combined.index[:-5], ['A', 'B', 'C']] chunk2 = combined2.loc[combined2.index[:-5], ['A', 'B', 'C']] @@ -1183,16 +1069,16 @@ def test_inplace_ops_identity(self): s += 1 assert_series_equal(s, s2) assert_series_equal(s_orig + 1, s) - self.assertIs(s, s2) - self.assertIs(s._data, s2._data) + assert s is s2 + assert s._data is s2._data df = df_orig.copy() df2 = df df += 1 assert_frame_equal(df, df2) assert_frame_equal(df_orig + 1, df) - self.assertIs(df, df2) - self.assertIs(df._data, df2._data) + assert df is df2 + assert df._data is df2._data # dtype change s = s_orig.copy() @@ -1206,8 +1092,8 @@ def test_inplace_ops_identity(self): df += 1.5 assert_frame_equal(df, df2) assert_frame_equal(df_orig + 1.5, df) - self.assertIs(df, df2) - self.assertIs(df._data, df2._data) + assert df is df2 + assert df._data is df2._data # mixed dtype arr = np.random.randint(0, 10, size=5) @@ -1218,7 +1104,7 @@ def test_inplace_ops_identity(self): expected = DataFrame({'A': arr.copy() + 1, 'B': 'foo'}) assert_frame_equal(df, expected) assert_frame_equal(df2, expected) - self.assertIs(df._data, df2._data) + assert df._data is df2._data df = df_orig.copy() df2 = df @@ -1226,7 +1112,34 @@ def test_inplace_ops_identity(self): expected = DataFrame({'A': arr.copy() + 1.5, 'B': 'foo'}) assert_frame_equal(df, expected) assert_frame_equal(df2, expected) - self.assertIs(df._data, df2._data) + assert df._data is df2._data + + @pytest.mark.parametrize('op', ['add', 'and', 'div', 'floordiv', 'mod', + 'mul', 'or', 'pow', 'sub', 'truediv', + 'xor']) + def test_inplace_ops_identity2(self, op): + + if compat.PY3 and op == 'div': + return + + df = DataFrame({'a': [1., 2., 3.], + 'b': [1, 2, 3]}) + + operand = 2 + if op in ('and', 'or', 'xor'): + # cannot use floats for boolean ops + df['a'] = [True, False, True] + + df_copy = df.copy() + iop = '__i{}__'.format(op) + op = '__{}__'.format(op) + + # no id change and value is correct + getattr(df, iop)(operand) + expected = getattr(df_copy, op)(operand) + assert_frame_equal(df, expected) + expected = id(df) + assert id(df) == expected def test_alignment_non_pandas(self): index = ['A', 'B', 'C'] @@ -1234,8 +1147,8 @@ def test_alignment_non_pandas(self): df = pd.DataFrame(np.random.randn(3, 3), index=index, columns=columns) align = pd.core.ops._align_method_FRAME - - for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype=np.int64)]: + for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype=np.int64), + range(1, 4)]: tm.assert_series_equal(align(df, val, 'index'), Series([1, 2, 3], index=df.index)) @@ -1244,11 +1157,12 @@ def test_alignment_non_pandas(self): # length mismatch msg = 'Unable to coerce to Series, length must be 3: given 2' - for val in [[1, 2], (1, 2), np.array([1, 2])]: - with tm.assertRaisesRegexp(ValueError, msg): + for val in [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]: + + with tm.assert_raises_regex(ValueError, msg): align(df, val, 'index') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): align(df, val, 'columns') val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) @@ -1262,14 +1176,14 @@ def test_alignment_non_pandas(self): # shape mismatch msg = 'Unable to coerce to DataFrame, shape must be' val = np.array([[1, 2, 3], [4, 5, 6]]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): align(df, val, 'index') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): align(df, val, 'columns') val = np.zeros((3, 3, 3)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): align(df, val, 'index') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): align(df, val, 'columns') diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 84d10a2e78d28..482210966fe6b 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -12,9 +12,9 @@ def _permute(obj): return obj.take(np.random.permutation(len(obj))) -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): - def setUp(self): + def setup_method(self, method): pass def test_as_frame_columns(self): @@ -37,11 +37,11 @@ def test_frame_setitem(self): df['Index'] = rng rs = Index(df['Index']) tm.assert_index_equal(rs, rng, check_names=False) - self.assertEqual(rs.name, 'Index') - self.assertEqual(rng.name, 'index') + assert rs.name == 'Index' + assert rng.name == 'index' rs = df.reset_index().set_index('index') - tm.assertIsInstance(rs.index, PeriodIndex) + assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) def test_frame_to_time_stamp(self): @@ -106,18 +106,19 @@ def _get_with_delta(delta, freq='A-DEC'): tm.assert_index_equal(result.columns, exp_index) # invalid axis - tm.assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) + tm.assert_raises_regex( + ValueError, 'axis', df.to_timestamp, axis=2) result1 = df.to_timestamp('5t', axis=1) result2 = df.to_timestamp('t', axis=1) expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS') - self.assertTrue(isinstance(result1.columns, DatetimeIndex)) - self.assertTrue(isinstance(result2.columns, DatetimeIndex)) - self.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) - self.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) + assert isinstance(result1.columns, DatetimeIndex) + assert isinstance(result2.columns, DatetimeIndex) + tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) + tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) # PeriodIndex.to_timestamp always use 'infer' - self.assertEqual(result1.columns.freqstr, 'AS-JAN') - self.assertEqual(result2.columns.freqstr, 'AS-JAN') + assert result1.columns.freqstr == 'AS-JAN' + assert result2.columns.freqstr == 'AS-JAN' def test_frame_index_to_string(self): index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 909a1a6a4c917..2f264874378bc 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -9,28 +9,25 @@ from pandas import (DataFrame, Series, Timestamp, _np_version_under1p11) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm -from pandas import _np_version_under1p9 from pandas.tests.frame.common import TestData -class TestDataFrameQuantile(tm.TestCase, TestData): +class TestDataFrameQuantile(TestData): def test_quantile(self): from numpy import percentile q = self.tsframe.quantile(0.1, axis=0) - self.assertEqual(q['A'], percentile(self.tsframe['A'], 10)) + assert q['A'] == percentile(self.tsframe['A'], 10) tm.assert_index_equal(q.index, self.tsframe.columns) q = self.tsframe.quantile(0.9, axis=1) - self.assertEqual(q['2000-01-17'], - percentile(self.tsframe.loc['2000-01-17'], 90)) + assert (q['2000-01-17'] == + percentile(self.tsframe.loc['2000-01-17'], 90)) tm.assert_index_equal(q.index, self.tsframe.index) # test degenerate case @@ -77,7 +74,7 @@ def test_quantile_axis_mixed(self): # must raise def f(): df.quantile(.5, axis=1, numeric_only=False) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_quantile_axis_parameter(self): # GH 9543/9544 @@ -100,44 +97,41 @@ def test_quantile_axis_parameter(self): result = df.quantile(.5, axis="columns") assert_series_equal(result, expected) - self.assertRaises(ValueError, df.quantile, 0.1, axis=-1) - self.assertRaises(ValueError, df.quantile, 0.1, axis="column") + pytest.raises(ValueError, df.quantile, 0.1, axis=-1) + pytest.raises(ValueError, df.quantile, 0.1, axis="column") def test_quantile_interpolation(self): - # GH #10174 - if _np_version_under1p9: - pytest.skip("Numpy version under 1.9") - + # see gh-10174 from numpy import percentile # interpolation = linear (default case) q = self.tsframe.quantile(0.1, axis=0, interpolation='linear') - self.assertEqual(q['A'], percentile(self.tsframe['A'], 10)) + assert q['A'] == percentile(self.tsframe['A'], 10) q = self.intframe.quantile(0.1) - self.assertEqual(q['A'], percentile(self.intframe['A'], 10)) + assert q['A'] == percentile(self.intframe['A'], 10) # test with and without interpolation keyword q1 = self.intframe.quantile(0.1) - self.assertEqual(q1['A'], np.percentile(self.intframe['A'], 10)) - assert_series_equal(q, q1) + assert q1['A'] == np.percentile(self.intframe['A'], 10) + tm.assert_series_equal(q, q1) # interpolation method other than default linear df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) result = df.quantile(.5, axis=1, interpolation='nearest') expected = Series([1, 2, 3], index=[1, 2, 3], name=0.5) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # cross-check interpolation=nearest results in original dtype exp = np.percentile(np.array([[1, 2, 3], [2, 3, 4]]), .5, axis=0, interpolation='nearest') expected = Series(exp, index=[1, 2, 3], name=0.5, dtype='int64') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # float df = DataFrame({"A": [1., 2., 3.], "B": [2., 3., 4.]}, index=[1, 2, 3]) result = df.quantile(.5, axis=1, interpolation='nearest') expected = Series([1., 2., 3.], index=[1, 2, 3], name=0.5) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) exp = np.percentile(np.array([[1., 2., 3.], [2., 3., 4.]]), .5, axis=0, interpolation='nearest') expected = Series(exp, index=[1, 2, 3], name=0.5, dtype='float64') @@ -168,44 +162,6 @@ def test_quantile_interpolation(self): index=[.25, .5], columns=['a', 'b', 'c']) assert_frame_equal(result, expected) - def test_quantile_interpolation_np_lt_1p9(self): - # GH #10174 - if not _np_version_under1p9: - pytest.skip("Numpy version is greater than 1.9") - - from numpy import percentile - - # interpolation = linear (default case) - q = self.tsframe.quantile(0.1, axis=0, interpolation='linear') - self.assertEqual(q['A'], percentile(self.tsframe['A'], 10)) - q = self.intframe.quantile(0.1) - self.assertEqual(q['A'], percentile(self.intframe['A'], 10)) - - # test with and without interpolation keyword - q1 = self.intframe.quantile(0.1) - self.assertEqual(q1['A'], np.percentile(self.intframe['A'], 10)) - assert_series_equal(q, q1) - - # interpolation method other than default linear - expErrMsg = "Interpolation methods other than linear" - df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - with assertRaisesRegexp(ValueError, expErrMsg): - df.quantile(.5, axis=1, interpolation='nearest') - - with assertRaisesRegexp(ValueError, expErrMsg): - df.quantile([.5, .75], axis=1, interpolation='lower') - - # test degenerate case - df = DataFrame({'x': [], 'y': []}) - with assertRaisesRegexp(ValueError, expErrMsg): - q = df.quantile(0.1, axis=0, interpolation='higher') - - # multi - df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], - columns=['a', 'b', 'c']) - with assertRaisesRegexp(ValueError, expErrMsg): - df.quantile([.25, .5], interpolation='midpoint') - def test_quantile_multi(self): df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=['a', 'b', 'c']) @@ -268,7 +224,7 @@ def test_quantile_datetime(self): def test_quantile_invalid(self): msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.tsframe.quantile(invalid) def test_quantile_box(self): @@ -431,7 +387,7 @@ def test_quantile_empty(self): # res = df.quantile(0.5) # datetimes - df = DataFrame(columns=['a', 'b'], dtype='datetime64') + df = DataFrame(columns=['a', 'b'], dtype='datetime64[ns]') # FIXME (gives NaNs instead of NaT in 0.18.1 or 0.19.0) # res = df.quantile(0.5, numeric_only=False) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 647af92b42273..a226f8de3c8bd 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -4,7 +4,6 @@ import operator import pytest -from itertools import product from pandas.compat import (zip, range, lrange, StringIO) from pandas import DataFrame, Series, Index, MultiIndex, date_range @@ -15,17 +14,27 @@ from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assertRaises, makeCustomDataframe as mkdf) import pandas.util.testing as tm -from pandas.computation import _NUMEXPR_INSTALLED +import pandas.util._test_decorators as td +from pandas.core.computation.check import _NUMEXPR_INSTALLED from pandas.tests.frame.common import TestData PARSERS = 'python', 'pandas' -ENGINES = 'python', 'numexpr' +ENGINES = 'python', pytest.param('numexpr', marks=td.skip_if_no_ne) + + +@pytest.fixture(params=PARSERS, ids=lambda x: x) +def parser(request): + return request.param + + +@pytest.fixture(params=ENGINES, ids=lambda x: x) +def engine(request): + return request.param def skip_if_no_pandas_parser(parser): @@ -33,16 +42,9 @@ def skip_if_no_pandas_parser(parser): pytest.skip("cannot evaluate with parser {0!r}".format(parser)) -def skip_if_no_ne(engine='numexpr'): - if engine == 'numexpr': - if not _NUMEXPR_INSTALLED: - pytest.skip("cannot query engine numexpr when numexpr not " - "installed") +class TestCompat(object): - -class TestCompat(tm.TestCase): - - def setUp(self): + def setup_method(self, method): self.df = DataFrame({'A': [1, 2, 3]}) self.expected1 = self.df[self.df.A > 0] self.expected2 = self.df.A + 1 @@ -82,13 +84,13 @@ def test_query_numexpr(self): result = df.eval('A+1', engine='numexpr') assert_series_equal(result, self.expected2, check_names=False) else: - self.assertRaises(ImportError, - lambda: df.query('A>0', engine='numexpr')) - self.assertRaises(ImportError, - lambda: df.eval('A+1', engine='numexpr')) + pytest.raises(ImportError, + lambda: df.query('A>0', engine='numexpr')) + pytest.raises(ImportError, + lambda: df.eval('A+1', engine='numexpr')) -class TestDataFrameEval(tm.TestCase, TestData): +class TestDataFrameEval(TestData): def test_ops(self): @@ -139,10 +141,10 @@ def test_query_non_str(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'b']}) msg = "expr must be a string to be evaluated" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.query(lambda x: x.B == "b") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.query(111) def test_query_empty_string(self): @@ -150,7 +152,7 @@ def test_query_empty_string(self): df = pd.DataFrame({'A': [1, 2, 3]}) msg = "expr cannot be an empty string" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df.query('') def test_eval_resolvers_as_list(self): @@ -158,16 +160,16 @@ def test_eval_resolvers_as_list(self): df = DataFrame(randn(10, 2), columns=list('ab')) dict1 = {'a': 1} dict2 = {'b': 2} - self.assertTrue(df.eval('a + b', resolvers=[dict1, dict2]) == - dict1['a'] + dict2['b']) - self.assertTrue(pd.eval('a + b', resolvers=[dict1, dict2]) == - dict1['a'] + dict2['b']) + assert (df.eval('a + b', resolvers=[dict1, dict2]) == + dict1['a'] + dict2['b']) + assert (pd.eval('a + b', resolvers=[dict1, dict2]) == + dict1['a'] + dict2['b']) -class TestDataFrameQueryWithMultiIndex(tm.TestCase): +class TestDataFrameQueryWithMultiIndex(object): - def check_query_with_named_multiindex(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_query_with_named_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.random.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b], names=['color', 'food']) @@ -215,12 +217,8 @@ def check_query_with_named_multiindex(self, parser, engine): assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) - def test_query_with_named_multiindex(self): - for parser, engine in product(['pandas'], ENGINES): - yield self.check_query_with_named_multiindex, parser, engine - - def check_query_with_unnamed_multiindex(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_query_with_unnamed_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.random.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b]) @@ -309,12 +307,8 @@ def check_query_with_unnamed_multiindex(self, parser, engine): assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) - def test_query_with_unnamed_multiindex(self): - for parser, engine in product(['pandas'], ENGINES): - yield self.check_query_with_unnamed_multiindex, parser, engine - - def check_query_with_partially_named_multiindex(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_query_with_partially_named_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.arange(10) index = MultiIndex.from_arrays([a, b]) @@ -342,17 +336,7 @@ def check_query_with_partially_named_multiindex(self, parser, engine): exp = df[ind != "red"] assert_frame_equal(res, exp) - def test_query_with_partially_named_multiindex(self): - for parser, engine in product(['pandas'], ENGINES): - yield (self.check_query_with_partially_named_multiindex, - parser, engine) - def test_query_multiindex_get_index_resolvers(self): - for parser, engine in product(['pandas'], ENGINES): - yield (self.check_query_multiindex_get_index_resolvers, parser, - engine) - - def check_query_multiindex_get_index_resolvers(self, parser, engine): df = mkdf(10, 3, r_idx_nlevels=2, r_idx_names=['spam', 'eggs']) resolvers = df._get_index_resolvers() @@ -376,41 +360,23 @@ def to_series(mi, level): else: raise AssertionError("object must be a Series or Index") - def test_raise_on_panel_with_multiindex(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_raise_on_panel_with_multiindex, parser, engine - - def check_raise_on_panel_with_multiindex(self, parser, engine): - tm.skip_if_no_ne() + def test_raise_on_panel_with_multiindex(self, parser, engine): p = tm.makePanel(7) p.items = tm.makeCustomIndex(len(p.items), nlevels=2) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): pd.eval('p + 1', parser=parser, engine=engine) - def test_raise_on_panel4d_with_multiindex(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_raise_on_panel4d_with_multiindex, parser, engine - - def check_raise_on_panel4d_with_multiindex(self, parser, engine): - tm.skip_if_no_ne() - p4d = tm.makePanel4D(7) - p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2) - with tm.assertRaises(NotImplementedError): - pd.eval('p4d + 1', parser=parser, engine=engine) - -class TestDataFrameQueryNumExprPandas(tm.TestCase): +@td.skip_if_no_ne +class TestDataFrameQueryNumExprPandas(object): @classmethod - def setUpClass(cls): - super(TestDataFrameQueryNumExprPandas, cls).setUpClass() + def setup_class(cls): cls.engine = 'numexpr' cls.parser = 'pandas' - tm.skip_if_no_ne(cls.engine) @classmethod - def tearDownClass(cls): - super(TestDataFrameQueryNumExprPandas, cls).tearDownClass() + def teardown_class(cls): del cls.engine, cls.parser def test_date_query_with_attribute_access(self): @@ -484,7 +450,7 @@ def test_date_index_query_with_NaT_duplicates(self): df = DataFrame(d) df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT df.set_index('dates1', inplace=True, drop=True) - res = df.query('index < 20130101 < dates3', engine=engine, + res = df.query('dates1 < 20130101 < dates3', engine=engine, parser=parser) expec = df[(df.index.to_series() < '20130101') & ('20130101' < df.dates3)] @@ -500,18 +466,18 @@ def test_date_query_with_non_date(self): ops = '==', '!=', '<', '>', '<=', '>=' for op in ops: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.query('dates %s nondate' % op, parser=parser, engine=engine) def test_query_syntax_error(self): engine, parser = self.engine, self.parser df = DataFrame({"i": lrange(10), "+": lrange(3, 13), "r": lrange(4, 14)}) - with tm.assertRaises(SyntaxError): + with pytest.raises(SyntaxError): df.query('i - +', engine=engine, parser=parser) def test_query_scope(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) @@ -527,34 +493,34 @@ def test_query_scope(self): assert_frame_equal(res, expected) # no local variable c - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('@a > b > @c', engine=engine, parser=parser) # no column named 'c' - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('@a > b > c', engine=engine, parser=parser) def test_query_doesnt_pickup_local(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError engine, parser = self.engine, self.parser n = m = 10 df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list('abc')) # we don't pick up the local 'sin' - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('sin > 5', engine=engine, parser=parser) def test_query_builtin(self): - from pandas.computation.engines import NumExprClobberingError + from pandas.core.computation.engines import NumExprClobberingError engine, parser = self.engine, self.parser n = m = 10 df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list('abc')) df.index.name = 'sin' - with tm.assertRaisesRegexp(NumExprClobberingError, - 'Variables in expression.+'): + with tm.assert_raises_regex(NumExprClobberingError, + 'Variables in expression.+'): df.query('sin > 5', engine=engine, parser=parser) def test_query(self): @@ -624,12 +590,12 @@ def test_nested_scope(self): assert_frame_equal(result, expected) def test_nested_raises_on_local_self_reference(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError df = DataFrame(np.random.randn(5, 3)) # can't reference ourself b/c we're a local so @ is necessary - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('df > 0', engine=self.engine, parser=self.parser) def test_local_syntax(self): @@ -683,12 +649,12 @@ def test_at_inside_string(self): assert_frame_equal(result, expected) def test_query_undefined_local(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) df = DataFrame(np.random.rand(10, 2), columns=list('ab')) - with tm.assertRaisesRegexp(UndefinedVariableError, - "local variable 'c' is not defined"): + with tm.assert_raises_regex(UndefinedVariableError, + "local variable 'c' is not defined"): df.query('a == @c', engine=engine, parser=parser) def test_index_resolvers_come_after_columns_with_the_same_name(self): @@ -731,14 +697,14 @@ def test_inf(self): assert_frame_equal(result, expected) +@td.skip_if_no_ne class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas): @classmethod - def setUpClass(cls): - super(TestDataFrameQueryNumExprPython, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameQueryNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' - tm.skip_if_no_ne(cls.engine) cls.frame = TestData().frame def test_date_query_no_attribute_access(self): @@ -799,26 +765,26 @@ def test_date_index_query_with_NaT_duplicates(self): df['dates3'] = date_range('1/1/2014', periods=n) df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT df.set_index('dates1', inplace=True, drop=True) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query('index < 20130101 < dates3', engine=engine, parser=parser) def test_nested_scope(self): - from pandas.computation.ops import UndefinedVariableError + from pandas.core.computation.ops import UndefinedVariableError engine = self.engine parser = self.parser # smoke test x = 1 # noqa result = pd.eval('x + 1', engine=engine, parser=parser) - self.assertEqual(result, 2) + assert result == 2 df = DataFrame(np.random.randn(5, 3)) df2 = DataFrame(np.random.randn(5, 3)) # don't have the pandas parser - with tm.assertRaises(SyntaxError): + with pytest.raises(SyntaxError): df.query('(@df>0) & (@df2>0)', engine=engine, parser=parser) - with tm.assertRaises(UndefinedVariableError): + with pytest.raises(UndefinedVariableError): df.query('(df>0) & (df2>0)', engine=engine, parser=parser) expected = df[(df > 0) & (df2 > 0)] @@ -835,8 +801,8 @@ def test_nested_scope(self): class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas): @classmethod - def setUpClass(cls): - super(TestDataFrameQueryPythonPandas, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameQueryPythonPandas, cls).setup_class() cls.engine = 'python' cls.parser = 'pandas' cls.frame = TestData().frame @@ -856,8 +822,8 @@ def test_query_builtin(self): class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython): @classmethod - def setUpClass(cls): - super(TestDataFrameQueryPythonPython, cls).setUpClass() + def setup_class(cls): + super(TestDataFrameQueryPythonPython, cls).setup_class() cls.engine = cls.parser = 'python' cls.frame = TestData().frame @@ -873,10 +839,9 @@ def test_query_builtin(self): assert_frame_equal(expected, result) -class TestDataFrameQueryStrings(tm.TestCase): +class TestDataFrameQueryStrings(object): - def check_str_query_method(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_str_query_method(self, parser, engine): df = DataFrame(randn(10, 1), columns=['b']) df['strings'] = Series(list('aabbccddee')) expect = df[df.strings == 'a'] @@ -893,8 +858,9 @@ def check_str_query_method(self, parser, engine): for lhs, op, rhs in zip(lhs, ops, rhs): ex = '{lhs} {op} {rhs}'.format(lhs=lhs, op=op, rhs=rhs) - assertRaises(NotImplementedError, df.query, ex, engine=engine, - parser=parser, local_dict={'strings': df.strings}) + pytest.raises(NotImplementedError, df.query, ex, + engine=engine, parser=parser, + local_dict={'strings': df.strings}) else: res = df.query('"a" == strings', engine=engine, parser=parser) assert_frame_equal(res, expect) @@ -911,16 +877,7 @@ def check_str_query_method(self, parser, engine): assert_frame_equal(res, expect) assert_frame_equal(res, df[~df.strings.isin(['a'])]) - def test_str_query_method(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_str_query_method, parser, engine - - def test_str_list_query_method(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_str_list_query_method, parser, engine - - def check_str_list_query_method(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_str_list_query_method(self, parser, engine): df = DataFrame(randn(10, 1), columns=['b']) df['strings'] = Series(list('aabbccddee')) expect = df[df.strings.isin(['a', 'b'])] @@ -937,7 +894,7 @@ def check_str_list_query_method(self, parser, engine): for lhs, op, rhs in zip(lhs, ops, rhs): ex = '{lhs} {op} {rhs}'.format(lhs=lhs, op=op, rhs=rhs) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query(ex, engine=engine, parser=parser) else: res = df.query('strings == ["a", "b"]', engine=engine, @@ -958,8 +915,7 @@ def check_str_list_query_method(self, parser, engine): parser=parser) assert_frame_equal(res, expect) - def check_query_with_string_columns(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_query_with_string_columns(self, parser, engine): df = DataFrame({'a': list('aaaabbbbcccc'), 'b': list('aabbccddeeff'), 'c': np.random.randint(5, size=12), @@ -973,18 +929,13 @@ def check_query_with_string_columns(self, parser, engine): expec = df[df.a.isin(df.b) & (df.c < df.d)] assert_frame_equal(res, expec) else: - with assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query('a in b', parser=parser, engine=engine) - with assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.query('a in b and c < d', parser=parser, engine=engine) - def test_query_with_string_columns(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_query_with_string_columns, parser, engine - - def check_object_array_eq_ne(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_object_array_eq_ne(self, parser, engine): df = DataFrame({'a': list('aaaabbbbcccc'), 'b': list('aabbccddeeff'), 'c': np.random.randint(5, size=12), @@ -997,12 +948,7 @@ def check_object_array_eq_ne(self, parser, engine): exp = df[df.a != df.b] assert_frame_equal(res, exp) - def test_object_array_eq_ne(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_object_array_eq_ne, parser, engine - - def check_query_with_nested_strings(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_query_with_nested_strings(self, parser, engine): skip_if_no_pandas_parser(parser) raw = """id event timestamp 1 "page 1 load" 1/1/2014 0:00:01 @@ -1025,26 +971,15 @@ def check_query_with_nested_strings(self, parser, engine): engine=engine) assert_frame_equal(expected, res) - def test_query_with_nested_string(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_query_with_nested_strings, parser, engine - - def check_query_with_nested_special_character(self, parser, engine): + def test_query_with_nested_special_character(self, parser, engine): skip_if_no_pandas_parser(parser) - tm.skip_if_no_ne(engine) df = DataFrame({'a': ['a', 'b', 'test & test'], 'b': [1, 2, 3]}) res = df.query('a == "test & test"', parser=parser, engine=engine) expec = df[df.a == 'test & test'] assert_frame_equal(res, expec) - def test_query_with_nested_special_character(self): - for parser, engine in product(PARSERS, ENGINES): - yield (self.check_query_with_nested_special_character, - parser, engine) - - def check_query_lex_compare_strings(self, parser, engine): - tm.skip_if_no_ne(engine=engine) + def test_query_lex_compare_strings(self, parser, engine): import operator as opr a = Series(np.random.choice(list('abcde'), 20)) @@ -1058,12 +993,7 @@ def check_query_lex_compare_strings(self, parser, engine): expected = df[func(df.X, 'd')] assert_frame_equal(res, expected) - def test_query_lex_compare_strings(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_query_lex_compare_strings, parser, engine - - def check_query_single_element_booleans(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_query_single_element_booleans(self, parser, engine): columns = 'bid', 'bidsize', 'ask', 'asksize' data = np.random.randint(2, size=(1, len(columns))).astype(bool) df = DataFrame(data, columns=columns) @@ -1071,12 +1001,8 @@ def check_query_single_element_booleans(self, parser, engine): expected = df[df.bid & df.ask] assert_frame_equal(res, expected) - def test_query_single_element_booleans(self): - for parser, engine in product(PARSERS, ENGINES): - yield self.check_query_single_element_booleans, parser, engine - - def check_query_string_scalar_variable(self, parser, engine): - tm.skip_if_no_ne(engine) + def test_query_string_scalar_variable(self, parser, engine): + skip_if_no_pandas_parser(parser) df = pd.DataFrame({'Symbol': ['BUD US', 'BUD US', 'IBM US', 'IBM US'], 'Price': [109.70, 109.72, 183.30, 183.35]}) e = df[df.Symbol == 'BUD US'] @@ -1084,70 +1010,30 @@ def check_query_string_scalar_variable(self, parser, engine): r = df.query('Symbol == @symb', parser=parser, engine=engine) assert_frame_equal(e, r) - def test_query_string_scalar_variable(self): - for parser, engine in product(['pandas'], ENGINES): - yield self.check_query_string_scalar_variable, parser, engine - -class TestDataFrameEvalNumExprPandas(tm.TestCase): - - @classmethod - def setUpClass(cls): - super(TestDataFrameEvalNumExprPandas, cls).setUpClass() - cls.engine = 'numexpr' - cls.parser = 'pandas' - tm.skip_if_no_ne() +class TestDataFrameEvalWithFrame(object): - def setUp(self): + def setup_method(self, method): self.frame = DataFrame(randn(10, 3), columns=list('abc')) - def tearDown(self): + def teardown_method(self, method): del self.frame - def test_simple_expr(self): - res = self.frame.eval('a + b', engine=self.engine, parser=self.parser) + def test_simple_expr(self, parser, engine): + res = self.frame.eval('a + b', engine=engine, parser=parser) expect = self.frame.a + self.frame.b assert_series_equal(res, expect) - def test_bool_arith_expr(self): - res = self.frame.eval('a[a < 1] + b', engine=self.engine, - parser=self.parser) + def test_bool_arith_expr(self, parser, engine): + res = self.frame.eval('a[a < 1] + b', engine=engine, parser=parser) expect = self.frame.a[self.frame.a < 1] + self.frame.b assert_series_equal(res, expect) - def test_invalid_type_for_operator_raises(self): + def test_invalid_type_for_operator_raises(self, parser, engine): df = DataFrame({'a': [1, 2], 'b': ['c', 'd']}) ops = '+', '-', '*', '/' for op in ops: - with tm.assertRaisesRegexp(TypeError, - r"unsupported operand type\(s\) for " - r".+: '.+' and '.+'"): - df.eval('a {0} b'.format(op), engine=self.engine, - parser=self.parser) - - -class TestDataFrameEvalNumExprPython(TestDataFrameEvalNumExprPandas): - - @classmethod - def setUpClass(cls): - super(TestDataFrameEvalNumExprPython, cls).setUpClass() - cls.engine = 'numexpr' - cls.parser = 'python' - tm.skip_if_no_ne(cls.engine) - - -class TestDataFrameEvalPythonPandas(TestDataFrameEvalNumExprPandas): - - @classmethod - def setUpClass(cls): - super(TestDataFrameEvalPythonPandas, cls).setUpClass() - cls.engine = 'python' - cls.parser = 'pandas' - - -class TestDataFrameEvalPythonPython(TestDataFrameEvalNumExprPython): - - @classmethod - def setUpClass(cls): - super(TestDataFrameEvalPythonPython, cls).tearDownClass() - cls.engine = cls.parser = 'python' + with tm.assert_raises_regex(TypeError, + r"unsupported operand type\(s\) " + "for .+: '.+' and '.+'"): + df.eval('a {0} b'.format(op), engine=engine, parser=parser) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py new file mode 100644 index 0000000000000..b8ba408b54715 --- /dev/null +++ b/pandas/tests/frame/test_rank.py @@ -0,0 +1,299 @@ +# -*- coding: utf-8 -*- +import pytest +import numpy as np +import pandas.util.testing as tm + +from distutils.version import LooseVersion +from datetime import timedelta, datetime +from numpy import nan + +from pandas.util.testing import assert_frame_equal +from pandas.tests.frame.common import TestData +from pandas import Series, DataFrame +from pandas.compat import product + + +class TestRank(TestData): + s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) + df = DataFrame({'A': s, 'B': s}) + + results = { + 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, + 3.5, 1.5, 8.0, nan, 5.5]), + 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), + 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), + 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), + 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), + } + + def test_rank(self): + rankdata = pytest.importorskip('scipy.stats.rankdata') + + self.frame['A'][::2] = np.nan + self.frame['B'][::3] = np.nan + self.frame['C'][::4] = np.nan + self.frame['D'][::5] = np.nan + + ranks0 = self.frame.rank() + ranks1 = self.frame.rank(1) + mask = np.isnan(self.frame.values) + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp0[mask] = np.nan + + exp1 = np.apply_along_axis(rankdata, 1, fvals) + exp1[mask] = np.nan + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # integers + df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) + + result = df.rank() + exp = df.astype(float).rank() + tm.assert_frame_equal(result, exp) + + result = df.rank(1) + exp = df.astype(float).rank(1) + tm.assert_frame_equal(result, exp) + + def test_rank2(self): + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 + result = df.rank(1, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = df.rank(0) / 2.0 + result = df.rank(0, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) + expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) + expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + # f7u12, this does not work without extensive workaround + data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 1)]] + df = DataFrame(data) + + # check the rank + expected = DataFrame([[2., nan, 1.], + [2., 3., 1.]]) + result = df.rank(1, numeric_only=False, ascending=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[1., nan, 2.], + [2., 1., 3.]]) + result = df.rank(1, numeric_only=False, ascending=False) + tm.assert_frame_equal(result, expected) + + # mixed-type frames + self.mixed_frame['datetime'] = datetime.now() + self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) + + result = self.mixed_frame.rank(1) + expected = self.mixed_frame.rank(1, numeric_only=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, + 1e60, 1e80, 1e-30]}) + exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]}) + tm.assert_frame_equal(df.rank(), exp) + + def test_rank_na_option(self): + rankdata = pytest.importorskip('scipy.stats.rankdata') + + self.frame['A'][::2] = np.nan + self.frame['B'][::3] = np.nan + self.frame['C'][::4] = np.nan + self.frame['D'][::5] = np.nan + + # bottom + ranks0 = self.frame.rank(na_option='bottom') + ranks1 = self.frame.rank(1, na_option='bottom') + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp1 = np.apply_along_axis(rankdata, 1, fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # top + ranks0 = self.frame.rank(na_option='top') + ranks1 = self.frame.rank(1, na_option='top') + + fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values + fval1 = self.frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fval0) + exp1 = np.apply_along_axis(rankdata, 1, fval1) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # bottom + ranks0 = self.frame.rank(na_option='top', ascending=False) + ranks1 = self.frame.rank(1, na_option='top', ascending=False) + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fvals) + exp1 = np.apply_along_axis(rankdata, 1, -fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # top + ranks0 = self.frame.rank(na_option='bottom', ascending=False) + ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) + + fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values + fval1 = self.frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fval0) + exp1 = np.apply_along_axis(rankdata, 1, -fval1) + + tm.assert_numpy_array_equal(ranks0.values, exp0) + tm.assert_numpy_array_equal(ranks1.values, exp1) + + def test_rank_axis(self): + # check if using axes' names gives the same result + df = DataFrame([[2, 1], [4, 3]]) + tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) + tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) + + def test_rank_methods_frame(self): + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') + import scipy + + xs = np.random.randint(0, 21, (100, 26)) + xs = (xs - 10.0) / 10.0 + cols = [chr(ord('z') - i) for i in range(xs.shape[1])] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + df = DataFrame(vals, columns=cols) + + for ax in [0, 1]: + for m in ['average', 'min', 'max', 'first', 'dense']: + result = df.rank(axis=ax, method=m) + sprank = np.apply_along_axis( + rankdata, ax, vals, + m if m != 'first' else 'ordinal') + sprank = sprank.astype(np.float64) + expected = DataFrame(sprank, columns=cols) + + if (LooseVersion(scipy.__version__) >= + LooseVersion('0.17.0')): + expected = expected.astype('float64') + tm.assert_frame_equal(result, expected) + + def test_rank_descending(self): + dtypes = ['O', 'f8', 'i8'] + + for dtype, method in product(dtypes, self.results): + if 'i' in dtype: + df = self.df.dropna() + else: + df = self.df.astype(dtype) + + res = df.rank(ascending=False) + expected = (df.max() - df).rank() + assert_frame_equal(res, expected) + + if method == 'first' and dtype == 'O': + continue + + expected = (df.max() - df).rank(method=method) + + if dtype != 'O': + res2 = df.rank(method=method, ascending=False, + numeric_only=True) + assert_frame_equal(res2, expected) + + res3 = df.rank(method=method, ascending=False, + numeric_only=False) + assert_frame_equal(res3, expected) + + def test_rank_2d_tie_methods(self): + df = self.df + + def _check2d(df, expected, method='average', axis=0): + exp_df = DataFrame({'A': expected, 'B': expected}) + + if axis == 1: + df = df.T + exp_df = exp_df.T + + result = df.rank(method=method, axis=axis) + assert_frame_equal(result, exp_df) + + dtypes = [None, object] + disabled = set([(object, 'first')]) + results = self.results + + for method, axis, dtype in product(results, [0, 1], dtypes): + if (dtype, method) in disabled: + continue + frame = df if dtype is None else df.astype(dtype) + _check2d(frame, results[method], method=method, axis=axis) + + +@pytest.mark.parametrize( + "method,exp", [("dense", + [[1., 1., 1.], + [1., 0.5, 2. / 3], + [1., 0.5, 1. / 3]]), + ("min", + [[1. / 3, 1., 1.], + [1. / 3, 1. / 3, 2. / 3], + [1. / 3, 1. / 3, 1. / 3]]), + ("max", + [[1., 1., 1.], + [1., 2. / 3, 2. / 3], + [1., 2. / 3, 1. / 3]]), + ("average", + [[2. / 3, 1., 1.], + [2. / 3, 0.5, 2. / 3], + [2. / 3, 0.5, 1. / 3]]), + ("first", + [[1. / 3, 1., 1.], + [2. / 3, 1. / 3, 2. / 3], + [3. / 3, 2. / 3, 1. / 3]])]) +def test_rank_pct_true(method, exp): + # see gh-15630. + + df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]]) + result = df.rank(method=method, pct=True) + + expected = DataFrame(exp) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 8b50036cd50f8..dd83a94b7062a 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -2,6 +2,8 @@ from __future__ import print_function +import pytest + from datetime import datetime import re @@ -21,7 +23,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameReplace(tm.TestCase, TestData): +class TestDataFrameReplace(TestData): def test_replace_inplace(self): self.tsframe['A'][:5] = nan @@ -31,9 +33,6 @@ def test_replace_inplace(self): tsframe.replace(nan, 0, inplace=True) assert_frame_equal(tsframe, self.tsframe.fillna(0)) - self.assertRaises(TypeError, self.tsframe.replace, nan, inplace=True) - self.assertRaises(TypeError, self.tsframe.replace, nan) - # mixed type mf = self.mixed_frame mf.iloc[5:20, mf.columns.get_loc('foo')] = nan @@ -546,7 +545,7 @@ def test_regex_replace_numeric_to_object_conversion(self): expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']}) res = df.replace(0, 'a') assert_frame_equal(res, expec) - self.assertEqual(res.a.dtype, np.object_) + assert res.a.dtype == np.object_ def test_replace_regex_metachar(self): metachars = '[]', '()', r'\d', r'\w', r'\s' @@ -718,7 +717,6 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self): assert_frame_equal(expected, result) def test_replace_value_is_none(self): - self.assertRaises(TypeError, self.tsframe.replace, nan) orig_value = self.tsframe.iloc[0, 0] orig2 = self.tsframe.iloc[1, 0] @@ -779,7 +777,7 @@ def test_replace_dtypes(self): # bools df = DataFrame({'bools': [True, False, True]}) result = df.replace(False, True) - self.assertTrue(result.values.all()) + assert result.values.all() # complex blocks df = DataFrame({'complex': [1j, 2j, 3j]}) @@ -795,7 +793,7 @@ def test_replace_dtypes(self): expected = DataFrame({'datetime64': Index([now] * 3)}) assert_frame_equal(result, expected) - def test_replace_input_formats(self): + def test_replace_input_formats_listlike(self): # both dicts to_rep = {'A': np.nan, 'B': 0, 'C': ''} values = {'A': 0, 'B': -1, 'C': 'missing'} @@ -812,15 +810,6 @@ def test_replace_input_formats(self): 'C': ['', 'asdf', 'fd']}) assert_frame_equal(result, expected) - # dict to scalar - filled = df.replace(to_rep, 0) - expected = {} - for k, v in compat.iteritems(df): - expected[k] = v.replace(to_rep[k], 0) - assert_frame_equal(filled, DataFrame(expected)) - - self.assertRaises(TypeError, df.replace, to_rep, [np.nan, 0, '']) - # scalar to dict values = {'A': 0, 'B': -1, 'C': 'missing'} df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5], @@ -840,7 +829,21 @@ def test_replace_input_formats(self): expected.replace(to_rep[i], values[i], inplace=True) assert_frame_equal(result, expected) - self.assertRaises(ValueError, df.replace, to_rep, values[1:]) + pytest.raises(ValueError, df.replace, to_rep, values[1:]) + + def test_replace_input_formats_scalar(self): + df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5], + 'C': ['', 'asdf', 'fd']}) + + # dict to scalar + to_rep = {'A': np.nan, 'B': 0, 'C': ''} + filled = df.replace(to_rep, 0) + expected = {} + for k, v in compat.iteritems(df): + expected[k] = v.replace(to_rep[k], 0) + assert_frame_equal(filled, DataFrame(expected)) + + pytest.raises(TypeError, df.replace, to_rep, [np.nan, 0, '']) # list to scalar to_rep = [np.nan, 0, ''] @@ -911,7 +914,7 @@ def test_replace_bool_with_bool(self): def test_replace_with_dict_with_bool_keys(self): df = DataFrame({0: [True, False], 1: [False, True]}) - with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): + with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): df.replace({'asdf': 'asdb', True: 'yes'}) def test_replace_truthy(self): @@ -922,7 +925,8 @@ def test_replace_truthy(self): def test_replace_int_to_int_chain(self): df = DataFrame({'a': lrange(1, 5)}) - with tm.assertRaisesRegexp(ValueError, "Replacement not allowed .+"): + with tm.assert_raises_regex(ValueError, + "Replacement not allowed .+"): df.replace({'a': dict(zip(range(1, 5), range(2, 6)))}) def test_replace_str_to_str_chain(self): @@ -930,7 +934,8 @@ def test_replace_str_to_str_chain(self): astr = a.astype(str) bstr = np.arange(2, 6).astype(str) df = DataFrame({'a': astr}) - with tm.assertRaisesRegexp(ValueError, "Replacement not allowed .+"): + with tm.assert_raises_regex(ValueError, + "Replacement not allowed .+"): df.replace({'a': dict(zip(astr, bstr))}) def test_replace_swapping_bug(self): @@ -969,7 +974,7 @@ def test_replace_period(self): 'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json', 'out_augmented_JAN_2011.json'], columns=['fname']) - tm.assert_equal(set(df.fname.values), set(d['fname'].keys())) + assert set(df.fname.values) == set(d['fname'].keys()) expected = DataFrame({'fname': [d['fname'][k] for k in df.fname.values]}) result = df.replace(d) @@ -992,7 +997,7 @@ def test_replace_datetime(self): 'out_augmented_MAY_2011.json', 'out_augmented_AUG_2011.json', 'out_augmented_JAN_2011.json'], columns=['fname']) - tm.assert_equal(set(df.fname.values), set(d['fname'].keys())) + assert set(df.fname.values) == set(d['fname'].keys()) expected = DataFrame({'fname': [d['fname'][k] for k in df.fname.values]}) result = df.replace(d) @@ -1063,3 +1068,36 @@ def test_replace_with_empty_dictlike(self): assert_frame_equal(df, df.replace({'b': {}})) assert_frame_equal(df, df.replace(Series({'b': {}}))) + + @pytest.mark.parametrize("to_replace, method, expected", [ + (0, 'bfill', {'A': [1, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + (nan, 'bfill', {'A': [0, 1, 2], + 'B': [5.0, 7.0, 7.0], + 'C': ['a', 'b', 'c']}), + ('d', 'ffill', {'A': [0, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + ([0, 2], 'bfill', {'A': [1, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + ([1, 2], 'pad', {'A': [0, 0, 0], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + ((1, 2), 'bfill', {'A': [0, 2, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}), + (['b', 'c'], 'ffill', {'A': [0, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'a', 'a']}), + ]) + def test_replace_method(self, to_replace, method, expected): + # GH 19632 + df = DataFrame({'A': [0, 1, 2], + 'B': [5, nan, 7], + 'C': ['a', 'b', 'c']}) + + result = df.replace(to_replace=to_replace, value=None, method=method) + expected = DataFrame(expected) + assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 2df297d03bcdf..3e5aae10618e9 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -8,10 +8,12 @@ from numpy import nan import numpy as np +import pytest -from pandas import (DataFrame, compat, option_context) -from pandas.compat import StringIO, lrange, u -import pandas.formats.format as fmt +from pandas import (DataFrame, Series, compat, option_context, + date_range, period_range, Categorical) +from pandas.compat import StringIO, lrange, u, PYPY +import pandas.io.formats.format as fmt import pandas as pd import pandas.util.testing as tm @@ -23,7 +25,7 @@ # structure -class TestDataFrameReprInfoEtc(tm.TestCase, TestData): +class TestDataFrameReprInfoEtc(TestData): def test_repr_empty(self): # empty @@ -40,7 +42,7 @@ def test_repr_mixed(self): foo = repr(self.mixed_frame) # noqa self.mixed_frame.info(verbose=False, buf=buf) - @tm.slow + @pytest.mark.slow def test_repr_mixed_big(self): # big mixed biggie = DataFrame({'A': np.random.randn(200), @@ -72,22 +74,22 @@ def test_repr(self): self.empty.info(buf=buf) df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) - self.assertFalse("\t" in repr(df)) - self.assertFalse("\r" in repr(df)) - self.assertFalse("a\n" in repr(df)) + assert "\t" not in repr(df) + assert "\r" not in repr(df) + assert "a\n" not in repr(df) def test_repr_dimensions(self): df = DataFrame([[1, 2, ], [3, 4]]) with option_context('display.show_dimensions', True): - self.assertTrue("2 rows x 2 columns" in repr(df)) + assert "2 rows x 2 columns" in repr(df) with option_context('display.show_dimensions', False): - self.assertFalse("2 rows x 2 columns" in repr(df)) + assert "2 rows x 2 columns" not in repr(df) with option_context('display.show_dimensions', 'truncate'): - self.assertFalse("2 rows x 2 columns" in repr(df)) + assert "2 rows x 2 columns" not in repr(df) - @tm.slow + @pytest.mark.slow def test_repr_big(self): # big one biggie = DataFrame(np.zeros((200, 4)), columns=lrange(4), @@ -118,7 +120,7 @@ def test_repr_unsortable(self): fmt.set_option('display.max_rows', 1000, 'display.max_columns', 1000) repr(self.frame) - self.reset_display_options() + tm.reset_display_options() warnings.filters = warn_filters @@ -132,11 +134,11 @@ def test_repr_unicode(self): result = repr(df) ex_top = ' A' - self.assertEqual(result.split('\n')[0].rstrip(), ex_top) + assert result.split('\n')[0].rstrip() == ex_top df = DataFrame({'A': [uval, uval]}) result = repr(df) - self.assertEqual(result.split('\n')[0].rstrip(), ex_top) + assert result.split('\n')[0].rstrip() == ex_top def test_unicode_string_with_unicode(self): df = DataFrame({'A': [u("\u05d0")]}) @@ -171,7 +173,7 @@ def test_repr_column_name_unicode_truncation_bug(self): ' the File through the code..')}) result = repr(df) - self.assertIn('StringCol', result) + assert 'StringCol' in result def test_latex_repr(self): result = r"""\begin{tabular}{llll} @@ -186,11 +188,12 @@ def test_latex_repr(self): with option_context("display.latex.escape", False, 'display.latex.repr', True): df = DataFrame([[r'$\alpha$', 'b', 'c'], [1, 2, 3]]) - self.assertEqual(result, df._repr_latex_()) + assert result == df._repr_latex_() # GH 12182 - self.assertIsNone(df._repr_latex_()) + assert df._repr_latex_() is None + @tm.capture_stdout def test_info(self): io = StringIO() self.frame.info(buf=io) @@ -198,11 +201,8 @@ def test_info(self): frame = DataFrame(np.random.randn(5, 3)) - import sys - sys.stdout = StringIO() frame.info() frame.info(verbose=False) - sys.stdout = sys.__stdout__ def test_info_wide(self): from pandas import set_option, reset_option @@ -213,13 +213,13 @@ def test_info_wide(self): io = StringIO() df.info(buf=io, max_cols=101) rs = io.getvalue() - self.assertTrue(len(rs.splitlines()) > 100) + assert len(rs.splitlines()) > 100 xp = rs set_option('display.max_info_columns', 101) io = StringIO() df.info(buf=io) - self.assertEqual(rs, xp) + assert rs == xp reset_option('display.max_info_columns') def test_info_duplicate_columns(self): @@ -239,8 +239,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self): frame.info(buf=io) io.seek(0) lines = io.readlines() - self.assertEqual('a 1 non-null int64\n', lines[3]) - self.assertEqual('a 1 non-null float64\n', lines[4]) + assert 'a 1 non-null int64\n' == lines[3] + assert 'a 1 non-null float64\n' == lines[4] def test_info_shows_column_dtypes(self): dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]', @@ -265,7 +265,7 @@ def test_info_max_cols(self): buf = StringIO() df.info(buf=buf, verbose=verbose) res = buf.getvalue() - self.assertEqual(len(res.strip().split('\n')), len_) + assert len(res.strip().split('\n')) == len_ for len_, verbose in [(10, None), (5, False), (10, True)]: @@ -274,7 +274,7 @@ def test_info_max_cols(self): buf = StringIO() df.info(buf=buf, verbose=verbose) res = buf.getvalue() - self.assertEqual(len(res.strip().split('\n')), len_) + assert len(res.strip().split('\n')) == len_ for len_, max_cols in [(10, 5), (5, 4)]: # setting truncates @@ -282,14 +282,14 @@ def test_info_max_cols(self): buf = StringIO() df.info(buf=buf, max_cols=max_cols) res = buf.getvalue() - self.assertEqual(len(res.strip().split('\n')), len_) + assert len(res.strip().split('\n')) == len_ # setting wouldn't truncate with option_context('max_info_columns', 5): buf = StringIO() df.info(buf=buf, max_cols=max_cols) res = buf.getvalue() - self.assertEqual(len(res.strip().split('\n')), len_) + assert len(res.strip().split('\n')) == len_ def test_info_memory_usage(self): # Ensure memory usage is displayed, when asserted, on the last line @@ -301,41 +301,28 @@ def test_info_memory_usage(self): data[i] = np.random.randint(2, size=n).astype(dtype) df = DataFrame(data) buf = StringIO() + # display memory usage case df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() - self.assertTrue("memory usage: " in res[-1]) - # do not display memory usage cas + assert "memory usage: " in res[-1] + + # do not display memory usage case df.info(buf=buf, memory_usage=False) res = buf.getvalue().splitlines() - self.assertTrue("memory usage: " not in res[-1]) + assert "memory usage: " not in res[-1] df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() + # memory usage is a lower bound, so print it as XYZ+ MB - self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1])) + assert re.match(r"memory usage: [^+]+\+", res[-1]) df.iloc[:, :5].info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() - # excluded column with object dtype, so estimate is accurate - self.assertFalse(re.match(r"memory usage: [^+]+\+", res[-1])) - - df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) - df_with_object_index.info(buf=buf, memory_usage=True) - res = buf.getvalue().splitlines() - self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1])) - - df_with_object_index.info(buf=buf, memory_usage='deep') - res = buf.getvalue().splitlines() - self.assertTrue(re.match(r"memory usage: [^+]+$", res[-1])) - self.assertGreater(df_with_object_index.memory_usage(index=True, - deep=True).sum(), - df_with_object_index.memory_usage(index=True).sum()) - - df_object = pd.DataFrame({'a': ['a']}) - self.assertGreater(df_object.memory_usage(deep=True).sum(), - df_object.memory_usage().sum()) + # excluded column with object dtype, so estimate is accurate + assert not re.match(r"memory usage: [^+]+\+", res[-1]) # Test a DataFrame with duplicate columns dtypes = ['int64', 'int64', 'int64', 'float64'] @@ -346,19 +333,27 @@ def test_info_memory_usage(self): df = DataFrame(data) df.columns = dtypes + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + df_with_object_index.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df_with_object_index.info(buf=buf, memory_usage='deep') + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+$", res[-1]) + # Ensure df size is as expected # (cols * rows * bytes) + index size df_size = df.memory_usage().sum() exp_size = len(dtypes) * n * 8 + df.index.nbytes - self.assertEqual(df_size, exp_size) + assert df_size == exp_size # Ensure number of cols in memory_usage is the same as df size_df = np.size(df.columns.values) + 1 # index=True; default - self.assertEqual(size_df, np.size(df.memory_usage())) + assert size_df == np.size(df.memory_usage()) # assert deep works only on object - self.assertEqual(df.memory_usage().sum(), - df.memory_usage(deep=True).sum()) + assert df.memory_usage().sum() == df.memory_usage(deep=True).sum() # test for validity DataFrame(1, index=['a'], columns=['A'] @@ -375,10 +370,76 @@ def test_info_memory_usage(self): df.memory_usage(index=True) df.index.values.nbytes + mem = df.memory_usage(deep=True).sum() + assert mem > 0 + + @pytest.mark.skipif(PYPY, + reason="on PyPy deep=True doesn't change result") + def test_info_memory_usage_deep_not_pypy(self): + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() > + df_with_object_index.memory_usage( + index=True).sum()) + + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() > + df_object.memory_usage().sum()) + + @pytest.mark.skipif(not PYPY, + reason="on PyPy deep=True does not change result") + def test_info_memory_usage_deep_pypy(self): + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() == + df_with_object_index.memory_usage( + index=True).sum()) + + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() == + df_object.memory_usage().sum()) + + @pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design") + def test_usage_via_getsizeof(self): + df = DataFrame( + data=1, + index=pd.MultiIndex.from_product( + [['a'], range(1000)]), + columns=['A'] + ) + mem = df.memory_usage(deep=True).sum() # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead - diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) - self.assertTrue(abs(diff) < 100) + diff = mem - sys.getsizeof(df) + assert abs(diff) < 100 + + def test_info_memory_usage_qualified(self): + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=[1, 2, 3]) + df.info(buf=buf) + assert '+' not in buf.getvalue() + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=list('ABC')) + df.info(buf=buf) + assert '+' in buf.getvalue() + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=pd.MultiIndex.from_product( + [range(3), range(3)])) + df.info(buf=buf) + assert '+' not in buf.getvalue() + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=pd.MultiIndex.from_product( + [range(3), ['foo', 'bar']])) + df.info(buf=buf) + assert '+' in buf.getvalue() def test_info_memory_usage_bug_on_multiindex(self): # GH 14308 @@ -398,11 +459,11 @@ def memory_usage(f): df = DataFrame({'value': np.random.randn(N * M)}, index=index) unstacked = df.unstack('id') - self.assertEqual(df.values.nbytes, unstacked.values.nbytes) - self.assertTrue(memory_usage(df) > memory_usage(unstacked)) + assert df.values.nbytes == unstacked.values.nbytes + assert memory_usage(df) > memory_usage(unstacked) # high upper bound - self.assertTrue(memory_usage(unstacked) - memory_usage(df) < 2000) + assert memory_usage(unstacked) - memory_usage(df) < 2000 def test_info_categorical(self): # GH14298 @@ -411,3 +472,34 @@ def test_info_categorical(self): buf = StringIO() df.info(buf=buf) + + def test_info_categorical_column(self): + + # make sure it works + n = 2500 + df = DataFrame({'int64': np.random.randint(100, size=n)}) + df['category'] = Series(np.array(list('abcdefghij')).take( + np.random.randint(0, 10, size=n))).astype('category') + df.isna() + buf = StringIO() + df.info(buf=buf) + + df2 = df[df['category'] == 'd'] + buf = compat.StringIO() + df2.info(buf=buf) + + def test_repr_categorical_dates_periods(self): + # normal DataFrame + dt = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + p = period_range('2011-01', freq='M', periods=5) + df = DataFrame({'dt': dt, 'p': p}) + exp = """ dt p +0 2011-01-01 09:00:00-05:00 2011-01 +1 2011-01-01 10:00:00-05:00 2011-02 +2 2011-01-01 11:00:00-05:00 2011-03 +3 2011-01-01 12:00:00-05:00 2011-04 +4 2011-01-01 13:00:00-05:00 2011-05""" + + df = DataFrame({'dt': Categorical(dt), 'p': Categorical(p)}) + assert repr(df) == exp diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 1890b33e3dbaa..68df0982a1e3e 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -2,8 +2,11 @@ from __future__ import print_function +from warnings import catch_warnings from datetime import datetime + import itertools +import pytest from numpy.random import randn from numpy import nan @@ -14,16 +17,14 @@ Timedelta, Period) import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from pandas.tests.frame.common import TestData -class TestDataFrameReshape(tm.TestCase, TestData): +class TestDataFrameReshape(TestData): def test_pivot(self): data = { @@ -40,44 +41,45 @@ def test_pivot(self): 'One': {'A': 1., 'B': 2., 'C': 3.}, 'Two': {'A': 1., 'B': 2., 'C': 3.} }) - expected.index.name, expected.columns.name = 'index', 'columns' - assert_frame_equal(pivoted, expected) + expected.index.name, expected.columns.name = 'index', 'columns' + tm.assert_frame_equal(pivoted, expected) # name tracking - self.assertEqual(pivoted.index.name, 'index') - self.assertEqual(pivoted.columns.name, 'columns') + assert pivoted.index.name == 'index' + assert pivoted.columns.name == 'columns' # don't specify values pivoted = frame.pivot(index='index', columns='columns') - self.assertEqual(pivoted.index.name, 'index') - self.assertEqual(pivoted.columns.names, (None, 'columns')) + assert pivoted.index.name == 'index' + assert pivoted.columns.names == (None, 'columns') - # pivot multiple columns - wp = tm.makePanel() - lp = wp.to_frame() - df = lp.reset_index() - assert_frame_equal(df.pivot('major', 'minor'), lp.unstack()) + with catch_warnings(record=True): + # pivot multiple columns + wp = tm.makePanel() + lp = wp.to_frame() + df = lp.reset_index() + tm.assert_frame_equal(df.pivot('major', 'minor'), lp.unstack()) def test_pivot_duplicates(self): data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], 'b': ['one', 'two', 'one', 'one', 'two'], 'c': [1., 2., 3., 3., 4.]}) - with assertRaisesRegexp(ValueError, 'duplicate entries'): + with tm.assert_raises_regex(ValueError, 'duplicate entries'): data.pivot('a', 'b', 'c') def test_pivot_empty(self): df = DataFrame({}, columns=['a', 'b', 'c']) result = df.pivot('a', 'b', 'c') expected = DataFrame({}) - assert_frame_equal(result, expected, check_names=False) + tm.assert_frame_equal(result, expected, check_names=False) def test_pivot_integer_bug(self): df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")]) result = df.pivot(index=1, columns=0, values=2) repr(result) - self.assert_index_equal(result.columns, Index(['A', 'B'], name=0)) + tm.assert_index_equal(result.columns, Index(['A', 'B'], name=0)) def test_pivot_index_none(self): # gh-3962 @@ -104,36 +106,56 @@ def test_pivot_index_none(self): ('values', 'Two')], names=[None, 'columns']) expected.index.name = 'index' - assert_frame_equal(result, expected, check_names=False) - self.assertEqual(result.index.name, 'index',) - self.assertEqual(result.columns.names, (None, 'columns')) + tm.assert_frame_equal(result, expected, check_names=False) + assert result.index.name == 'index' + assert result.columns.names == (None, 'columns') expected.columns = expected.columns.droplevel(0) - - data = { - 'index': range(7), - 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], - 'values': [1., 2., 3., 3., 2., 1.] - } - result = frame.pivot(columns='columns', values='values') expected.columns.name = 'columns' - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_stack_unstack(self): - stacked = self.frame.stack() + df = self.frame.copy() + df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) + + stacked = df.stack() stacked_df = DataFrame({'foo': stacked, 'bar': stacked}) unstacked = stacked.unstack() unstacked_df = stacked_df.unstack() - assert_frame_equal(unstacked, self.frame) - assert_frame_equal(unstacked_df['bar'], self.frame) + assert_frame_equal(unstacked, df) + assert_frame_equal(unstacked_df['bar'], df) unstacked_cols = stacked.unstack(0) unstacked_cols_df = stacked_df.unstack(0) - assert_frame_equal(unstacked_cols.T, self.frame) - assert_frame_equal(unstacked_cols_df['bar'].T, self.frame) + assert_frame_equal(unstacked_cols.T, df) + assert_frame_equal(unstacked_cols_df['bar'].T, df) + + def test_stack_mixed_level(self): + # GH 18310 + levels = [range(3), [3, 'a', 'b'], [1, 2]] + + # flat columns: + df = DataFrame(1, index=levels[0], columns=levels[1]) + result = df.stack() + expected = Series(1, index=MultiIndex.from_product(levels[:2])) + assert_series_equal(result, expected) + + # MultiIndex columns: + df = DataFrame(1, index=levels[0], + columns=MultiIndex.from_product(levels[1:])) + result = df.stack(1) + expected = DataFrame(1, index=MultiIndex.from_product([levels[0], + levels[2]]), + columns=levels[1]) + assert_frame_equal(result, expected) + + # as above, but used labels in level are actually of homogeneous type + result = df[['a', 'b']].stack(1) + expected = expected[['a', 'b']] + assert_frame_equal(result, expected) def test_unstack_fill(self): @@ -156,6 +178,30 @@ def test_unstack_fill(self): index=['x', 'y', 'z'], dtype=np.float) assert_frame_equal(result, expected) + # GH #13971: fill_value when unstacking multiple levels: + df = DataFrame({'x': ['a', 'a', 'b'], + 'y': ['j', 'k', 'j'], + 'z': [0, 1, 2], + 'w': [0, 1, 2]}).set_index(['x', 'y', 'z']) + unstacked = df.unstack(['x', 'y'], fill_value=0) + key = ('w', 'b', 'j') + expected = unstacked[key] + result = pd.Series([0, 0, 2], index=unstacked.index, name=key) + assert_series_equal(result, expected) + + stacked = unstacked.stack(['x', 'y']) + stacked.index = stacked.index.reorder_levels(df.index.names) + # Workaround for GH #17886 (unnecessarily casts to float): + stacked = stacked.astype(np.int64) + result = stacked.loc[df.index] + assert_frame_equal(result, df) + + # From a series + s = df['w'] + result = s.unstack(['x', 'y'], fill_value=0) + expected = unstacked['w'] + assert_frame_equal(result, expected) + def test_unstack_fill_frame(self): # From a dataframe @@ -359,7 +405,7 @@ def test_stack_mixed_levels(self): # When mixed types are passed and the ints are not level # names, raise - self.assertRaises(ValueError, df2.stack, level=['animal', 0]) + pytest.raises(ValueError, df2.stack, level=['animal', 0]) # GH #8584: Having 0 in the level names could raise a # strange error about lexsort depth @@ -440,7 +486,7 @@ def test_unstack_to_series(self): # check reversibility data = self.frame.unstack() - self.assertTrue(isinstance(data, Series)) + assert isinstance(data, Series) undo = data.unstack().T assert_frame_equal(undo, self.frame) @@ -511,18 +557,76 @@ def test_unstack_dtypes(self): right = right.set_index(['A', 'B']).unstack(0) right[('D', 'a')] = right[('D', 'a')].astype('int64') - self.assertEqual(left.shape, (3, 2)) - assert_frame_equal(left, right) - - def test_unstack_non_unique_index_names(self): - idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], - names=['c1', 'c1']) - df = DataFrame([1, 2], index=idx) - with tm.assertRaises(ValueError): - df.unstack('c1') - - with tm.assertRaises(ValueError): - df.T.stack('c1') + assert left.shape == (3, 2) + tm.assert_frame_equal(left, right) + + def test_unstack_unused_levels(self): + # GH 17845: unused labels in index make unstack() cast int to float + idx = pd.MultiIndex.from_product([['a'], ['A', 'B', 'C', 'D']])[:-1] + df = pd.DataFrame([[1, 0]] * 3, index=idx) + + result = df.unstack() + exp_col = pd.MultiIndex.from_product([[0, 1], ['A', 'B', 'C']]) + expected = pd.DataFrame([[1, 1, 1, 0, 0, 0]], index=['a'], + columns=exp_col) + tm.assert_frame_equal(result, expected) + assert((result.columns.levels[1] == idx.levels[1]).all()) + + # Unused items on both levels + levels = [[0, 1, 7], [0, 1, 2, 3]] + labels = [[0, 0, 1, 1], [0, 2, 0, 2]] + idx = pd.MultiIndex(levels, labels) + block = np.arange(4).reshape(2, 2) + df = pd.DataFrame(np.concatenate([block, block + 4]), index=idx) + result = df.unstack() + expected = pd.DataFrame(np.concatenate([block * 2, block * 2 + 1], + axis=1), + columns=idx) + tm.assert_frame_equal(result, expected) + assert((result.columns.levels[1] == idx.levels[1]).all()) + + # With mixed dtype and NaN + levels = [['a', 2, 'c'], [1, 3, 5, 7]] + labels = [[0, -1, 1, 1], [0, 2, -1, 2]] + idx = pd.MultiIndex(levels, labels) + data = np.arange(8) + df = pd.DataFrame(data.reshape(4, 2), index=idx) + + cases = ((0, [13, 16, 6, 9, 2, 5, 8, 11], + [np.nan, 'a', 2], [np.nan, 5, 1]), + (1, [8, 11, 1, 4, 12, 15, 13, 16], + [np.nan, 5, 1], [np.nan, 'a', 2])) + for level, idces, col_level, idx_level in cases: + result = df.unstack(level=level) + exp_data = np.zeros(18) * np.nan + exp_data[idces] = data + cols = pd.MultiIndex.from_product([[0, 1], col_level]) + expected = pd.DataFrame(exp_data.reshape(3, 6), + index=idx_level, columns=cols) + # Broken (GH 18455): + # tm.assert_frame_equal(result, expected) + diff = result - expected + assert(diff.sum().sum() == 0) + assert((diff + 1).sum().sum() == 8) + + assert((result.columns.levels[1] == idx.levels[level]).all()) + + @pytest.mark.parametrize("cols", [['A', 'C'], slice(None)]) + def test_unstack_unused_level(self, cols): + # GH 18562 : unused labels on the unstacked level + df = pd.DataFrame([[2010, 'a', 'I'], + [2011, 'b', 'II']], + columns=['A', 'B', 'C']) + + ind = df.set_index(['A', 'B', 'C'], drop=False) + selection = ind.loc[(slice(None), slice(None), 'I'), cols] + result = selection.unstack() + + expected = ind.iloc[[0]][cols] + expected.columns = MultiIndex.from_product([expected.columns, ['I']], + names=[None, 'C']) + expected.index = expected.index.droplevel('C') + tm.assert_frame_equal(result, expected) def test_unstack_nan_index(self): # GH7466 cast = lambda val: '{0:1}'.format('' if val != val else val) @@ -530,12 +634,12 @@ def test_unstack_nan_index(self): # GH7466 def verify(df): mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] - rows, cols = df.notnull().values.nonzero() + rows, cols = df.notna().values.nonzero() for i, j in zip(rows, cols): left = sorted(df.iloc[i, j].split('.')) right = mk_list(df.index[i]) + mk_list(df.columns[j]) right = sorted(list(map(cast, right))) - self.assertEqual(left, right) + assert left == right df = DataFrame({'jim': ['a', 'b', nan, 'd'], 'joe': ['w', 'x', 'y', 'z'], @@ -549,7 +653,7 @@ def verify(df): mi = df.set_index(list(idx)) for lev in range(2): udf = mi.unstack(level=lev) - self.assertEqual(udf.notnull().values.sum(), len(df)) + assert udf.notna().values.sum() == len(df) verify(udf['jolie']) df = DataFrame({'1st': ['d'] * 3 + [nan] * 5 + ['a'] * 2 + @@ -567,7 +671,7 @@ def verify(df): mi = df.set_index(list(idx)) for lev in range(3): udf = mi.unstack(level=lev) - self.assertEqual(udf.notnull().values.sum(), 2 * len(df)) + assert udf.notna().values.sum() == 2 * len(df) for col in ['4th', '5th']: verify(udf[col]) @@ -615,9 +719,10 @@ def verify(df): assert_frame_equal(left, right) # GH7401 - df = pd.DataFrame({'A': list('aaaaabbbbb'), 'C': np.arange(10), + df = pd.DataFrame({'A': list('aaaaabbbbb'), 'B': (date_range('2012-01-01', periods=5) - .tolist() * 2)}) + .tolist() * 2), + 'C': np.arange(10)}) df.iloc[3, 1] = np.NaN left = df.set_index(['A', 'B']).unstack() @@ -672,12 +777,12 @@ def verify(df): df.loc[1, '3rd'] = df.loc[4, '3rd'] = nan left = df.set_index(['1st', '2nd', '3rd']).unstack(['2nd', '3rd']) - self.assertEqual(left.notnull().values.sum(), 2 * len(df)) + assert left.notna().values.sum() == 2 * len(df) for col in ['jim', 'joe']: for _, r in df.iterrows(): key = r['1st'], (col, r['2nd'], r['3rd']) - self.assertEqual(r[col], left.loc[key]) + assert r[col] == left.loc[key] def test_stack_datetime_column_multiIndex(self): # GH 8039 @@ -761,3 +866,26 @@ def test_stack_preserve_categorical_dtype(self): expected = Series([10, 11, 12], index=midx) tm.assert_series_equal(result, expected) + + +def test_unstack_fill_frame_object(): + # GH12815 Test unstacking with object. + data = pd.Series(['a', 'b', 'c', 'a'], dtype='object') + data.index = pd.MultiIndex.from_tuples( + [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')]) + + # By default missing values will be NaN + result = data.unstack() + expected = pd.DataFrame( + {'a': ['a', np.nan, 'a'], 'b': ['b', 'c', np.nan]}, + index=list('xyz') + ) + assert_frame_equal(result, expected) + + # Fill with any value replaces missing values as expected + result = data.unstack(fill_value='d') + expected = pd.DataFrame( + {'a': ['a', 'd', 'a'], 'b': ['b', 'c', 'd']}, + index=list('xyz') + ) + assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_sort_values_level_as_str.py b/pandas/tests/frame/test_sort_values_level_as_str.py new file mode 100644 index 0000000000000..3b4eadfce81cd --- /dev/null +++ b/pandas/tests/frame/test_sort_values_level_as_str.py @@ -0,0 +1,126 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index +from pandas.errors import PerformanceWarning +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal + + +@pytest.fixture +def df_none(): + return DataFrame({ + 'outer': ['a', 'a', 'a', 'b', 'b', 'b'], + 'inner': [1, 2, 2, 2, 1, 1], + 'A': np.arange(6, 0, -1), + ('B', 5): ['one', 'one', 'two', 'two', 'one', 'one']}) + + +@pytest.fixture(params=[ + ['outer'], + ['outer', 'inner'] +]) +def df_idx(request, df_none): + levels = request.param + return df_none.set_index(levels) + + +@pytest.fixture(params=[ + 'inner', # index level + ['outer'], # list of index level + 'A', # column + [('B', 5)], # list of column + ['inner', 'outer'], # two index levels + [('B', 5), 'outer'], # index level and column + ['A', ('B', 5)], # Two columns + ['inner', 'outer'] # two index levels and column +]) +def sort_names(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def ascending(request): + return request.param + + +def test_sort_index_level_and_column_label( + df_none, df_idx, sort_names, ascending): + + # GH 14353 + + # Get index levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on columns and the setting index + expected = df_none.sort_values(by=sort_names, + ascending=ascending, + axis=0).set_index(levels) + + # Compute result sorting on mix on columns and index levels + result = df_idx.sort_values(by=sort_names, + ascending=ascending, + axis=0) + + assert_frame_equal(result, expected) + + +def test_sort_column_level_and_index_label( + df_none, df_idx, sort_names, ascending): + + # GH 14353 + + # Get levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on axis=0, setting index levels, and then + # transposing. For some cases this will result in a frame with + # multiple column levels + expected = df_none.sort_values(by=sort_names, + ascending=ascending, + axis=0).set_index(levels).T + + # Compute result by transposing and sorting on axis=1. + result = df_idx.T.sort_values(by=sort_names, + ascending=ascending, + axis=1) + + if len(levels) > 1: + # Accessing multi-level columns that are not lexsorted raises a + # performance warning + with tm.assert_produces_warning(PerformanceWarning, + check_stacklevel=False): + assert_frame_equal(result, expected) + else: + assert_frame_equal(result, expected) + + +def test_sort_values_column_index_level_precedence(): + # GH 14353, when a string passed as the `by` parameter + # matches a column and an index level the column takes + # precedence + + # Construct DataFrame with index and column named 'idx' + idx = Index(np.arange(1, 7), name='idx') + df = DataFrame({'A': np.arange(11, 17), + 'idx': np.arange(6, 0, -1)}, + index=idx) + + # Sorting by 'idx' should sort by the idx column and raise a + # FutureWarning + with tm.assert_produces_warning(FutureWarning): + result = df.sort_values(by='idx') + + # This should be equivalent to sorting by the 'idx' index level in + # descending order + expected = df.sort_index(level='idx', ascending=False) + assert_frame_equal(result, expected) + + # Perform same test with MultiIndex + df_multi = df.set_index('A', append=True) + + with tm.assert_produces_warning(FutureWarning): + result = df_multi.sort_values(by='idx') + + expected = df_multi.sort_index(level='idx', ascending=False) + assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 7779afdc47b48..5bd239f8a3034 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -2,71 +2,30 @@ from __future__ import print_function +import pytest +import random import numpy as np +import pandas as pd from pandas.compat import lrange +from pandas.api.types import CategoricalDtype from pandas import (DataFrame, Series, MultiIndex, Timestamp, - date_range, NaT) + date_range, NaT, IntervalIndex) -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from pandas.tests.frame.common import TestData -class TestDataFrameSorting(tm.TestCase, TestData): - - def test_sort_index(self): - # GH13496 - - frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], - columns=['A', 'B', 'C', 'D']) - - # axis=0 : sort rows by index labels - unordered = frame.loc[[3, 2, 4, 1]] - result = unordered.sort_index(axis=0) - expected = frame - assert_frame_equal(result, expected) - - result = unordered.sort_index(ascending=False) - expected = frame[::-1] - assert_frame_equal(result, expected) - - # axis=1 : sort columns by column names - unordered = frame.iloc[:, [2, 1, 3, 0]] - result = unordered.sort_index(axis=1) - assert_frame_equal(result, frame) - - result = unordered.sort_index(axis=1, ascending=False) - expected = frame.iloc[:, ::-1] - assert_frame_equal(result, expected) - - def test_sort_index_multiindex(self): - # GH13496 - - # sort rows by specified level of multi-index - mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC')) - df = DataFrame([[1, 2], [3, 4]], mi) - - # MI sort, but no level: sort_level has no effect - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) - df = DataFrame([[1, 2], [3, 4]], mi) - result = df.sort_index(sort_remaining=False) - expected = df.sort_index() - assert_frame_equal(result, expected) +class TestDataFrameSorting(TestData): def test_sort(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - frame.sort(columns='A') - with tm.assert_produces_warning(FutureWarning): - frame.sort() + # see gh-9816 with tm.assert_produces_warning(FutureWarning): frame.sortlevel() @@ -103,7 +62,7 @@ def test_sort_values(self): sorted_df = frame.sort_values(by=['B', 'A'], ascending=[True, False]) assert_frame_equal(sorted_df, expected) - self.assertRaises(ValueError, lambda: frame.sort_values( + pytest.raises(ValueError, lambda: frame.sort_values( by=['A', 'B'], axis=2, inplace=True)) # by row (axis=1): GH 10806 @@ -128,7 +87,7 @@ def test_sort_values(self): assert_frame_equal(sorted_df, expected) msg = r'Length of ascending \(5\) != length of by \(2\)' - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5) def test_sort_values_inplace(self): @@ -155,21 +114,6 @@ def test_sort_values_inplace(self): expected = frame.sort_values(by=['A', 'B'], ascending=False) assert_frame_equal(sorted_df, expected) - def test_sort_index_categorical_index(self): - - df = (DataFrame({'A': np.arange(6, dtype='int64'), - 'B': Series(list('aabbca')) - .astype('category', categories=list('cab'))}) - .set_index('B')) - - result = df.sort_index() - expected = df.iloc[[4, 0, 1, 5, 2, 3]] - assert_frame_equal(result, expected) - - result = df.sort_index(ascending=False) - expected = df.iloc[[3, 2, 5, 1, 0, 4]] - assert_frame_equal(result, expected) - def test_sort_nan(self): # GH3917 nan = np.nan @@ -295,8 +239,122 @@ def test_stable_descending_multicolumn_sort(self): kind='mergesort') assert_frame_equal(sorted_df, expected) + def test_stable_categorial(self): + # GH 16793 + df = DataFrame({ + 'x': pd.Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True) + }) + expected = df.copy() + sorted_df = df.sort_values('x', kind='mergesort') + assert_frame_equal(sorted_df, expected) + + def test_sort_datetimes(self): + + # GH 3461, argsort / lexsort differences for a datetime column + df = DataFrame(['a', 'a', 'a', 'b', 'c', 'd', 'e', 'f', 'g'], + columns=['A'], + index=date_range('20130101', periods=9)) + dts = [Timestamp(x) + for x in ['2004-02-11', '2004-01-21', '2004-01-26', + '2005-09-20', '2010-10-04', '2009-05-12', + '2008-11-12', '2010-09-28', '2010-09-28']] + df['B'] = dts[::2] + dts[1::2] + df['C'] = 2. + df['A1'] = 3. + + df1 = df.sort_values(by='A') + df2 = df.sort_values(by=['A']) + assert_frame_equal(df1, df2) + + df1 = df.sort_values(by='B') + df2 = df.sort_values(by=['B']) + assert_frame_equal(df1, df2) + + df1 = df.sort_values(by='B') + + df2 = df.sort_values(by=['C', 'B']) + assert_frame_equal(df1, df2) + + def test_frame_column_inplace_sort_exception(self): + s = self.frame['A'] + with tm.assert_raises_regex(ValueError, "This Series is a view"): + s.sort_values(inplace=True) + + cp = s.copy() + cp.sort_values() # it works! + + def test_sort_nat_values_in_int_column(self): + + # GH 14922: "sorting with large float and multiple columns incorrect" + + # cause was that the int64 value NaT was considered as "na". Which is + # only correct for datetime64 columns. + + int_values = (2, int(NaT)) + float_values = (2.0, -1.797693e308) + + df = DataFrame(dict(int=int_values, float=float_values), + columns=["int", "float"]) + + df_reversed = DataFrame(dict(int=int_values[::-1], + float=float_values[::-1]), + columns=["int", "float"], + index=[1, 0]) + + # NaT is not a "na" for int64 columns, so na_position must not + # influence the result: + df_sorted = df.sort_values(["int", "float"], na_position="last") + assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["int", "float"], na_position="first") + assert_frame_equal(df_sorted, df_reversed) + + # reverse sorting order + df_sorted = df.sort_values(["int", "float"], ascending=False) + assert_frame_equal(df_sorted, df) + + # and now check if NaT is still considered as "na" for datetime64 + # columns: + df = DataFrame(dict(datetime=[Timestamp("2016-01-01"), NaT], + float=float_values), columns=["datetime", "float"]) + + df_reversed = DataFrame(dict(datetime=[NaT, Timestamp("2016-01-01")], + float=float_values[::-1]), + columns=["datetime", "float"], + index=[1, 0]) + + df_sorted = df.sort_values(["datetime", "float"], na_position="first") + assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["datetime", "float"], na_position="last") + assert_frame_equal(df_sorted, df) + + # Ascending should not affect the results. + df_sorted = df.sort_values(["datetime", "float"], ascending=False) + assert_frame_equal(df_sorted, df) + + def test_sort_nat(self): + + # GH 16836 + + d1 = [Timestamp(x) for x in ['2016-01-01', '2015-01-01', + np.nan, '2016-01-01']] + d2 = [Timestamp(x) for x in ['2017-01-01', '2014-01-01', + '2016-01-01', '2015-01-01']] + df = pd.DataFrame({'a': d1, 'b': d2}, index=[0, 1, 2, 3]) + + d3 = [Timestamp(x) for x in ['2015-01-01', '2016-01-01', + '2016-01-01', np.nan]] + d4 = [Timestamp(x) for x in ['2014-01-01', '2015-01-01', + '2017-01-01', '2016-01-01']] + expected = pd.DataFrame({'a': d3, 'b': d4}, index=[1, 3, 0, 2]) + sorted_df = df.sort_values(by=['a', 'b'], ) + tm.assert_frame_equal(sorted_df, expected) + + +class TestDataFrameSortIndexKinds(TestData): + def test_sort_index_multicolumn(self): - import random A = np.arange(5).repeat(20) B = np.tile(np.arange(5), 20) random.shuffle(A) @@ -340,7 +398,7 @@ def test_sort_index_inplace(self): df.sort_index(inplace=True) expected = frame assert_frame_equal(df, expected) - self.assertNotEqual(a_id, id(df['A'])) + assert a_id != id(df['A']) df = unordered.copy() df.sort_index(ascending=False, inplace=True) @@ -397,26 +455,26 @@ def test_sort_index_duplicates(self): df = DataFrame([lrange(5, 9), lrange(4)], columns=['a', 'a', 'b', 'b']) - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): df.sort_values(by='a') - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by=['a']) - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): df.sort_values(by=['a']) - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): # multi-column 'by' is separate codepath df.sort_index(by=['a', 'b']) - with assertRaisesRegexp(ValueError, 'duplicate'): + with tm.assert_raises_regex(ValueError, 'not unique'): # multi-column 'by' is separate codepath df.sort_values(by=['a', 'b']) @@ -424,11 +482,11 @@ def test_sort_index_duplicates(self): # GH4370 df = DataFrame(np.random.randn(4, 2), columns=MultiIndex.from_tuples([('a', 0), ('a', 1)])) - with assertRaisesRegexp(ValueError, 'levels'): + with tm.assert_raises_regex(ValueError, 'level'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with assertRaisesRegexp(ValueError, 'levels'): + with tm.assert_raises_regex(ValueError, 'level'): df.sort_values(by='a') # convert tuples to a list of tuples @@ -452,78 +510,73 @@ def test_sort_index_level(self): res = df.sort_index(level=['A', 'B'], sort_remaining=False) assert_frame_equal(df, res) - def test_sort_datetimes(self): - - # GH 3461, argsort / lexsort differences for a datetime column - df = DataFrame(['a', 'a', 'a', 'b', 'c', 'd', 'e', 'f', 'g'], - columns=['A'], - index=date_range('20130101', periods=9)) - dts = [Timestamp(x) - for x in ['2004-02-11', '2004-01-21', '2004-01-26', - '2005-09-20', '2010-10-04', '2009-05-12', - '2008-11-12', '2010-09-28', '2010-09-28']] - df['B'] = dts[::2] + dts[1::2] - df['C'] = 2. - df['A1'] = 3. - - df1 = df.sort_values(by='A') - df2 = df.sort_values(by=['A']) - assert_frame_equal(df1, df2) - - df1 = df.sort_values(by='B') - df2 = df.sort_values(by=['B']) - assert_frame_equal(df1, df2) - - def test_frame_column_inplace_sort_exception(self): - s = self.frame['A'] - with assertRaisesRegexp(ValueError, "This Series is a view"): - s.sort_values(inplace=True) - - cp = s.copy() - cp.sort_values() # it works! + def test_sort_index_categorical_index(self): - def test_sort_nat_values_in_int_column(self): + df = (DataFrame({'A': np.arange(6, dtype='int64'), + 'B': Series(list('aabbca')) + .astype(CategoricalDtype(list('cab')))}) + .set_index('B')) - # GH 14922: "sorting with large float and multiple columns incorrect" + result = df.sort_index() + expected = df.iloc[[4, 0, 1, 5, 2, 3]] + assert_frame_equal(result, expected) - # cause was that the int64 value NaT was considered as "na". Which is - # only correct for datetime64 columns. + result = df.sort_index(ascending=False) + expected = df.iloc[[3, 2, 5, 1, 0, 4]] + assert_frame_equal(result, expected) - int_values = (2, int(NaT)) - float_values = (2.0, -1.797693e308) + def test_sort_index(self): + # GH13496 - df = DataFrame(dict(int=int_values, float=float_values), - columns=["int", "float"]) + frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], + columns=['A', 'B', 'C', 'D']) - df_reversed = DataFrame(dict(int=int_values[::-1], - float=float_values[::-1]), - columns=["int", "float"], - index=[1, 0]) + # axis=0 : sort rows by index labels + unordered = frame.loc[[3, 2, 4, 1]] + result = unordered.sort_index(axis=0) + expected = frame + assert_frame_equal(result, expected) - # NaT is not a "na" for int64 columns, so na_position must not - # influence the result: - df_sorted = df.sort_values(["int", "float"], na_position="last") - assert_frame_equal(df_sorted, df_reversed) + result = unordered.sort_index(ascending=False) + expected = frame[::-1] + assert_frame_equal(result, expected) - df_sorted = df.sort_values(["int", "float"], na_position="first") - assert_frame_equal(df_sorted, df_reversed) + # axis=1 : sort columns by column names + unordered = frame.iloc[:, [2, 1, 3, 0]] + result = unordered.sort_index(axis=1) + assert_frame_equal(result, frame) - # reverse sorting order - df_sorted = df.sort_values(["int", "float"], ascending=False) - assert_frame_equal(df_sorted, df) + result = unordered.sort_index(axis=1, ascending=False) + expected = frame.iloc[:, ::-1] + assert_frame_equal(result, expected) - # and now check if NaT is still considered as "na" for datetime64 - # columns: - df = DataFrame(dict(datetime=[Timestamp("2016-01-01"), NaT], - float=float_values), columns=["datetime", "float"]) + def test_sort_index_multiindex(self): + # GH13496 - df_reversed = DataFrame(dict(datetime=[NaT, Timestamp("2016-01-01")], - float=float_values[::-1]), - columns=["datetime", "float"], - index=[1, 0]) + # sort rows by specified level of multi-index + mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC')) + df = DataFrame([[1, 2], [3, 4]], mi) - df_sorted = df.sort_values(["datetime", "float"], na_position="first") - assert_frame_equal(df_sorted, df_reversed) + # MI sort, but no level: sort_level has no effect + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + df = DataFrame([[1, 2], [3, 4]], mi) + result = df.sort_index(sort_remaining=False) + expected = df.sort_index() + assert_frame_equal(result, expected) - df_sorted = df.sort_values(["datetime", "float"], na_position="last") - assert_frame_equal(df_sorted, df_reversed) + def test_sort_index_intervalindex(self): + # this is a de-facto sort via unstack + # confirming that we sort in the order of the bins + y = Series(np.random.randn(100)) + x1 = Series(np.sign(np.random.randn(100))) + x2 = pd.cut(Series(np.random.randn(100)), + bins=[-3, -0.5, 0, 0.5, 3]) + model = pd.concat([y, x1, x2], axis=1, keys=['Y', 'X1', 'X2']) + + result = model.groupby(['X1', 'X2']).mean().unstack() + expected = IntervalIndex.from_tuples( + [(-3.0, -0.5), (-0.5, 0.0), + (0.0, 0.5), (0.5, 3.0)], + closed='right') + result = result.columns.levels[1].categories + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 9052a16bf973c..caaa311e9ee96 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -2,16 +2,17 @@ from __future__ import print_function +from warnings import catch_warnings import numpy as np -from pandas import DataFrame, Series, MultiIndex, Panel +from pandas import DataFrame, Series, MultiIndex, Panel, Index import pandas as pd import pandas.util.testing as tm from pandas.tests.frame.common import TestData -class TestDataFrameSubclassing(tm.TestCase, TestData): +class TestDataFrameSubclassing(TestData): def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it @@ -49,45 +50,45 @@ def custom_frame_function(self): cdf = CustomDataFrame(data) # Did we get back our own DF class? - self.assertTrue(isinstance(cdf, CustomDataFrame)) + assert isinstance(cdf, CustomDataFrame) # Do we get back our own Series class after selecting a column? cdf_series = cdf.col1 - self.assertTrue(isinstance(cdf_series, CustomSeries)) - self.assertEqual(cdf_series.custom_series_function(), 'OK') + assert isinstance(cdf_series, CustomSeries) + assert cdf_series.custom_series_function() == 'OK' # Do we get back our own DF class after slicing row-wise? cdf_rows = cdf[1:5] - self.assertTrue(isinstance(cdf_rows, CustomDataFrame)) - self.assertEqual(cdf_rows.custom_frame_function(), 'OK') + assert isinstance(cdf_rows, CustomDataFrame) + assert cdf_rows.custom_frame_function() == 'OK' # Make sure sliced part of multi-index frame is custom class mcol = pd.MultiIndex.from_tuples([('A', 'A'), ('A', 'B')]) cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) - self.assertTrue(isinstance(cdf_multi['A'], CustomDataFrame)) + assert isinstance(cdf_multi['A'], CustomDataFrame) mcol = pd.MultiIndex.from_tuples([('A', ''), ('B', '')]) cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) - self.assertTrue(isinstance(cdf_multi2['A'], CustomSeries)) + assert isinstance(cdf_multi2['A'], CustomSeries) def test_dataframe_metadata(self): df = tm.SubclassedDataFrame({'X': [1, 2, 3], 'Y': [1, 2, 3]}, index=['a', 'b', 'c']) df.testattr = 'XXX' - self.assertEqual(df.testattr, 'XXX') - self.assertEqual(df[['X']].testattr, 'XXX') - self.assertEqual(df.loc[['a', 'b'], :].testattr, 'XXX') - self.assertEqual(df.iloc[[0, 1], :].testattr, 'XXX') + assert df.testattr == 'XXX' + assert df[['X']].testattr == 'XXX' + assert df.loc[['a', 'b'], :].testattr == 'XXX' + assert df.iloc[[0, 1], :].testattr == 'XXX' - # GH9776 - self.assertEqual(df.iloc[0:1, :].testattr, 'XXX') + # see gh-9776 + assert df.iloc[0:1, :].testattr == 'XXX' - # GH10553 - unpickled = self.round_trip_pickle(df) + # see gh-10553 + unpickled = tm.round_trip_pickle(df) tm.assert_frame_equal(df, unpickled) - self.assertEqual(df._metadata, unpickled._metadata) - self.assertEqual(df.testattr, unpickled.testattr) + assert df._metadata == unpickled._metadata + assert df.testattr == unpickled.testattr def test_indexing_sliced(self): # GH 11559 @@ -98,54 +99,55 @@ def test_indexing_sliced(self): res = df.loc[:, 'X'] exp = tm.SubclassedSeries([1, 2, 3], index=list('abc'), name='X') tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) + assert isinstance(res, tm.SubclassedSeries) res = df.iloc[:, 1] exp = tm.SubclassedSeries([4, 5, 6], index=list('abc'), name='Y') tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) + assert isinstance(res, tm.SubclassedSeries) res = df.loc[:, 'Z'] exp = tm.SubclassedSeries([7, 8, 9], index=list('abc'), name='Z') tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) + assert isinstance(res, tm.SubclassedSeries) res = df.loc['a', :] exp = tm.SubclassedSeries([1, 4, 7], index=list('XYZ'), name='a') tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) + assert isinstance(res, tm.SubclassedSeries) res = df.iloc[1, :] exp = tm.SubclassedSeries([2, 5, 8], index=list('XYZ'), name='b') tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) + assert isinstance(res, tm.SubclassedSeries) res = df.loc['c', :] exp = tm.SubclassedSeries([3, 6, 9], index=list('XYZ'), name='c') tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) + assert isinstance(res, tm.SubclassedSeries) def test_to_panel_expanddim(self): # GH 9762 - class SubclassedFrame(DataFrame): + with catch_warnings(record=True): + class SubclassedFrame(DataFrame): - @property - def _constructor_expanddim(self): - return SubclassedPanel - - class SubclassedPanel(Panel): - pass - - index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)]) - df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index) - result = df.to_panel() - self.assertTrue(isinstance(result, SubclassedPanel)) - expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]], - items=['X', 'Y'], major_axis=[0], - minor_axis=[0, 1, 2], - dtype='int64') - tm.assert_panel_equal(result, expected) + @property + def _constructor_expanddim(self): + return SubclassedPanel + + class SubclassedPanel(Panel): + pass + + index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)]) + df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index) + result = df.to_panel() + assert isinstance(result, SubclassedPanel) + expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]], + items=['X', 'Y'], major_axis=[0], + minor_axis=[0, 1, 2], + dtype='int64') + tm.assert_panel_equal(result, expected) def test_subclass_attr_err_propagation(self): # GH 11808 @@ -154,7 +156,7 @@ class A(DataFrame): @property def bar(self): return self.i_dont_exist - with tm.assertRaisesRegexp(AttributeError, '.*i_dont_exist.*'): + with tm.assert_raises_regex(AttributeError, '.*i_dont_exist.*'): A().bar def test_subclass_align(self): @@ -171,15 +173,15 @@ def test_subclass_align(self): exp2 = tm.SubclassedDataFrame({'c': [1, 2, np.nan, 4, np.nan], 'd': [1, 2, np.nan, 4, np.nan]}, index=list('ABCDE')) - tm.assertIsInstance(res1, tm.SubclassedDataFrame) + assert isinstance(res1, tm.SubclassedDataFrame) tm.assert_frame_equal(res1, exp1) - tm.assertIsInstance(res2, tm.SubclassedDataFrame) + assert isinstance(res2, tm.SubclassedDataFrame) tm.assert_frame_equal(res2, exp2) res1, res2 = df1.a.align(df2.c) - tm.assertIsInstance(res1, tm.SubclassedSeries) + assert isinstance(res1, tm.SubclassedSeries) tm.assert_series_equal(res1, exp1.a) - tm.assertIsInstance(res2, tm.SubclassedSeries) + assert isinstance(res2, tm.SubclassedSeries) tm.assert_series_equal(res2, exp2.c) def test_subclass_align_combinations(self): @@ -197,23 +199,23 @@ def test_subclass_align_combinations(self): exp2 = pd.Series([1, 2, np.nan, 4, np.nan], index=list('ABCDE'), name='x') - tm.assertIsInstance(res1, tm.SubclassedDataFrame) + assert isinstance(res1, tm.SubclassedDataFrame) tm.assert_frame_equal(res1, exp1) - tm.assertIsInstance(res2, tm.SubclassedSeries) + assert isinstance(res2, tm.SubclassedSeries) tm.assert_series_equal(res2, exp2) # series + frame res1, res2 = s.align(df) - tm.assertIsInstance(res1, tm.SubclassedSeries) + assert isinstance(res1, tm.SubclassedSeries) tm.assert_series_equal(res1, exp2) - tm.assertIsInstance(res2, tm.SubclassedDataFrame) + assert isinstance(res2, tm.SubclassedDataFrame) tm.assert_frame_equal(res2, exp1) def test_subclass_iterrows(self): # GH 13977 df = tm.SubclassedDataFrame({'a': [1]}) for i, row in df.iterrows(): - tm.assertIsInstance(row, tm.SubclassedSeries) + assert isinstance(row, tm.SubclassedSeries) tm.assert_series_equal(row, df.loc[i]) def test_subclass_sparse_slice(self): @@ -227,9 +229,9 @@ def test_subclass_sparse_slice(self): tm.SubclassedSparseDataFrame(rows[:2])) tm.assert_sp_frame_equal(ssdf[:2], tm.SubclassedSparseDataFrame(rows[:2])) - tm.assert_equal(ssdf.loc[:2].testattr, "testattr") - tm.assert_equal(ssdf.iloc[:2].testattr, "testattr") - tm.assert_equal(ssdf[:2].testattr, "testattr") + assert ssdf.loc[:2].testattr == "testattr" + assert ssdf.iloc[:2].testattr == "testattr" + assert ssdf[:2].testattr == "testattr" tm.assert_sp_series_equal(ssdf.loc[1], tm.SubclassedSparseSeries(rows[1]), @@ -245,3 +247,326 @@ def test_subclass_sparse_transpose(self): [2, 5], [3, 6]]) tm.assert_sp_frame_equal(ossdf.T, essdf) + + def test_subclass_stack(self): + # GH 15564 + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.stack() + exp = tm.SubclassedSeries( + [1, 2, 3, 4, 5, 6, 7, 8, 9], + index=[list('aaabbbccc'), list('XYZXYZXYZ')]) + + tm.assert_series_equal(res, exp) + + def test_subclass_stack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 12], + [11, 13], + [20, 22], + [21, 23], + [30, 32], + [31, 33], + [40, 42], + [41, 43]], + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), + names=['aaa', 'ccc', 'yyy']), + columns=Index(['W', 'X'], name='www')) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack('yyy') + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame([ + [10, 11], + [12, 13], + [20, 21], + [22, 23], + [30, 31], + [32, 33], + [40, 41], + [42, 43]], + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), + names=['aaa', 'ccc', 'www']), + columns=Index(['y', 'z'], name='yyy')) + + res = df.stack('www') + tm.assert_frame_equal(res, exp) + + def test_subclass_stack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame([ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0]], + index=MultiIndex.from_tuples( + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 12.0], + [11, 13.0], + [20, 22.0], + [21, 23.0], + [30, 32.0], + [31, 33.0], + [40, 42.0], + [41, 43.0]], + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))), + names=['aaa', 'ccc', 'yyy']), + columns=Index(['W', 'X'], name='www')) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack('yyy') + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame([ + [10.0, 11.0], + [12.0, 13.0], + [20.0, 21.0], + [22.0, 23.0], + [30.0, 31.0], + [32.0, 33.0], + [40.0, 41.0], + [42.0, 43.0]], + index=MultiIndex.from_tuples(list(zip( + list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))), + names=['aaa', 'ccc', 'www']), + columns=Index(['y', 'z'], name='yyy')) + + res = df.stack('www') + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack(self): + # GH 15564 + df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'b', 'c'], + columns=['X', 'Y', 'Z']) + + res = df.unstack() + exp = tm.SubclassedSeries( + [1, 4, 7, 2, 5, 8, 3, 6, 9], + index=[list('XXXYYYZZZ'), list('abcabcabc')]) + + tm.assert_series_equal(res, exp) + + def test_subclass_unstack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame([ + [10, 11, 12, 13], + [20, 21, 22, 23], + [30, 31, 32, 33], + [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 20, 11, 21, 12, 22, 13, 23], + [30, 40, 31, 41, 32, 42, 33, 43]], + index=Index(['A', 'B'], name='aaa'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), + names=['www', 'yyy', 'ccc'])) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack('ccc') + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame([ + [10, 30, 11, 31, 12, 32, 13, 33], + [20, 40, 21, 41, 22, 42, 23, 43]], + index=Index(['c', 'd'], name='ccc'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), + names=['www', 'yyy', 'aaa'])) + + res = df.unstack('aaa') + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame([ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0]], + index=MultiIndex.from_tuples( + list(zip(list('AABB'), list('cdcd'))), + names=['aaa', 'ccc']), + columns=MultiIndex.from_tuples( + list(zip(list('WWXX'), list('yzyz'))), + names=['www', 'yyy'])) + + exp = tm.SubclassedDataFrame([ + [10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0], + [30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0]], + index=Index(['A', 'B'], name='aaa'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))), + names=['www', 'yyy', 'ccc'])) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack('ccc') + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame([ + [10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0], + [20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0]], + index=Index(['c', 'd'], name='ccc'), + columns=MultiIndex.from_tuples(list(zip( + list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))), + names=['www', 'yyy', 'aaa'])) + + res = df.unstack('aaa') + tm.assert_frame_equal(res, exp) + + def test_subclass_pivot(self): + # GH 15564 + df = tm.SubclassedDataFrame({ + 'index': ['A', 'B', 'C', 'C', 'B', 'A'], + 'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'], + 'values': [1., 2., 3., 3., 2., 1.]}) + + pivoted = df.pivot( + index='index', columns='columns', values='values') + + expected = tm.SubclassedDataFrame({ + 'One': {'A': 1., 'B': 2., 'C': 3.}, + 'Two': {'A': 1., 'B': 2., 'C': 3.}}) + + expected.index.name, expected.columns.name = 'index', 'columns' + + tm.assert_frame_equal(pivoted, expected) + + def test_subclassed_melt(self): + # GH 15564 + cheese = tm.SubclassedDataFrame({ + 'first': ['John', 'Mary'], + 'last': ['Doe', 'Bo'], + 'height': [5.5, 6.0], + 'weight': [130, 150]}) + + melted = pd.melt(cheese, id_vars=['first', 'last']) + + expected = tm.SubclassedDataFrame([ + ['John', 'Doe', 'height', 5.5], + ['Mary', 'Bo', 'height', 6.0], + ['John', 'Doe', 'weight', 130], + ['Mary', 'Bo', 'weight', 150]], + columns=['first', 'last', 'variable', 'value']) + + tm.assert_frame_equal(melted, expected) + + def test_subclassed_wide_to_long(self): + # GH 9762 + + np.random.seed(123) + x = np.random.randn(3) + df = tm.SubclassedDataFrame({ + "A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: .7}, + "B1980": {0: 3.2, 1: 1.3, 2: .1}, + "X": dict(zip(range(3), x))}) + + df["id"] = df.index + exp_data = {"X": x.tolist() + x.tolist(), + "A": ['a', 'b', 'c', 'd', 'e', 'f'], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2]} + expected = tm.SubclassedDataFrame(exp_data) + expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] + long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year") + + tm.assert_frame_equal(long_frame, expected) + + def test_subclassed_apply(self): + # GH 19822 + + def check_row_subclass(row): + assert isinstance(row, tm.SubclassedSeries) + + def strech(row): + if row["variable"] == "height": + row["value"] += 0.5 + return row + + df = tm.SubclassedDataFrame([ + ['John', 'Doe', 'height', 5.5], + ['Mary', 'Bo', 'height', 6.0], + ['John', 'Doe', 'weight', 130], + ['Mary', 'Bo', 'weight', 150]], + columns=['first', 'last', 'variable', 'value']) + + df.apply(lambda x: check_row_subclass(x)) + df.apply(lambda x: check_row_subclass(x), axis=1) + + expected = tm.SubclassedDataFrame([ + ['John', 'Doe', 'height', 6.0], + ['Mary', 'Bo', 'height', 6.5], + ['John', 'Doe', 'weight', 130], + ['Mary', 'Bo', 'weight', 150]], + columns=['first', 'last', 'variable', 'value']) + + result = df.apply(lambda x: strech(x), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedDataFrame([ + [1, 2, 3], + [1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + + result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedSeries([ + [1, 2, 3], + [1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + assert not isinstance(result, tm.SubclassedDataFrame) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 862f76b4ecc05..ceb6c942c81b1 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -4,20 +4,22 @@ from datetime import datetime, time +import pytest + from numpy import nan from numpy.random import randn import numpy as np from pandas import (DataFrame, Series, Index, - Timestamp, DatetimeIndex, - to_datetime, date_range) + Timestamp, DatetimeIndex, MultiIndex, + to_datetime, date_range, period_range) import pandas as pd import pandas.tseries.offsets as offsets -from pandas.util.testing import (assert_almost_equal, - assert_series_equal, +from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assertRaisesRegexp) + assert_index_equal, + assert_raises_regex) import pandas.util.testing as tm from pandas.compat import product @@ -25,7 +27,7 @@ from pandas.tests.frame.common import TestData -class TestDataFrameTimeSeriesMethods(tm.TestCase, TestData): +class TestDataFrameTimeSeriesMethods(TestData): def test_diff(self): the_diff = self.tsframe.diff(1) @@ -39,7 +41,7 @@ def test_diff(self): s = Series([a, b]) rs = DataFrame({'s': s}).diff() - self.assertEqual(rs.s[1], 1) + assert rs.s[1] == 1 # mixed numeric tf = self.tsframe.astype('float32') @@ -55,6 +57,32 @@ def test_diff(self): 1), 'z': pd.Series(1)}).astype('float64') assert_frame_equal(result, expected) + @pytest.mark.parametrize('tz', [None, 'UTC']) + def test_diff_datetime_axis0(self, tz): + # GH 18578 + df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz), + 1: date_range('2010', freq='D', periods=2, tz=tz)}) + + result = df.diff(axis=0) + expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']), + 1: pd.TimedeltaIndex(['NaT', '1 days'])}) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('tz', [None, 'UTC']) + def test_diff_datetime_axis1(self, tz): + # GH 18578 + df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz), + 1: date_range('2010', freq='D', periods=2, tz=tz)}) + if tz is None: + result = df.diff(axis=1) + expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']), + 1: pd.TimedeltaIndex(['0 days', + '0 days'])}) + assert_frame_equal(result, expected) + else: + with pytest.raises(NotImplementedError): + result = df.diff(axis=1) + def test_diff_timedelta(self): # GH 4533 df = DataFrame(dict(time=[Timestamp('20130101 9:01'), @@ -72,7 +100,7 @@ def test_diff_mixed_dtype(self): df['A'] = np.array([1, 2, 3, 4, 5], dtype=object) result = df.diff() - self.assertEqual(result[0].dtype, np.float64) + assert result[0].dtype == np.float64 def test_diff_neg_n(self): rs = self.tsframe.diff(-1) @@ -106,7 +134,9 @@ def test_pct_change(self): rs = self.tsframe.pct_change(freq='5D') filled = self.tsframe.fillna(method='pad') - assert_frame_equal(rs, filled / filled.shift(freq='5D') - 1) + assert_frame_equal(rs, + (filled / filled.shift(freq='5D') - 1) + .reindex_like(filled)) def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) @@ -114,23 +144,50 @@ def test_pct_change_shift_over_nas(self): df = DataFrame({'a': s, 'b': s}) chg = df.pct_change() - expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) + expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2]) edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) + @pytest.mark.parametrize("freq, periods, fill_method, limit", + [('5B', 5, None, None), + ('3B', 3, None, None), + ('3B', 3, 'bfill', None), + ('7B', 7, 'pad', 1), + ('7B', 7, 'bfill', 3), + ('14B', 14, None, None)]) + def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): + # GH 7292 + rs_freq = self.tsframe.pct_change(freq=freq, + fill_method=fill_method, + limit=limit) + rs_periods = self.tsframe.pct_change(periods, + fill_method=fill_method, + limit=limit) + assert_frame_equal(rs_freq, rs_periods) + + empty_ts = DataFrame(index=self.tsframe.index, + columns=self.tsframe.columns) + rs_freq = empty_ts.pct_change(freq=freq, + fill_method=fill_method, + limit=limit) + rs_periods = empty_ts.pct_change(periods, + fill_method=fill_method, + limit=limit) + assert_frame_equal(rs_freq, rs_periods) + def test_frame_ctor_datetime64_column(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') dates = np.asarray(rng) df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) - self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(df['B'].dtype, np.dtype('M8[ns]')) def test_frame_add_datetime64_column(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') df = DataFrame(index=np.arange(len(rng))) df['A'] = rng - self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(df['A'].dtype, np.dtype('M8[ns]')) def test_frame_datetime64_pre1900_repr(self): df = DataFrame({'year': date_range('1/1/1700', periods=50, @@ -154,8 +211,8 @@ def test_frame_add_datetime64_col_other_units(self): ex_vals = to_datetime(vals.astype('O')).values - self.assertEqual(df[unit].dtype, ns_dtype) - self.assertTrue((df[unit].values == ex_vals).all()) + assert df[unit].dtype == ns_dtype + assert (df[unit].values == ex_vals).all() # Test insertion into existing datetime64 column df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) @@ -170,18 +227,18 @@ def test_frame_add_datetime64_col_other_units(self): tmp['dates'] = vals ex_vals = to_datetime(vals.astype('O')).values - self.assertTrue((tmp['dates'].values == ex_vals).all()) + assert (tmp['dates'].values == ex_vals).all() def test_shift(self): # naive shift shiftedFrame = self.tsframe.shift(5) - self.assert_index_equal(shiftedFrame.index, self.tsframe.index) + tm.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe['A'].shift(5) assert_series_equal(shiftedFrame['A'], shiftedSeries) shiftedFrame = self.tsframe.shift(-5) - self.assert_index_equal(shiftedFrame.index, self.tsframe.index) + tm.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe['A'].shift(-5) assert_series_equal(shiftedFrame['A'], shiftedSeries) @@ -192,7 +249,7 @@ def test_shift(self): # shift by DateOffset shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay()) - self.assertEqual(len(shiftedFrame), len(self.tsframe)) + assert len(shiftedFrame) == len(self.tsframe) shiftedFrame2 = self.tsframe.shift(5, freq='B') assert_frame_equal(shiftedFrame, shiftedFrame2) @@ -209,9 +266,9 @@ def test_shift(self): ps = tm.makePeriodFrame() shifted = ps.shift(1) unshifted = shifted.shift(-1) - self.assert_index_equal(shifted.index, ps.index) - self.assert_index_equal(unshifted.index, ps.index) - tm.assert_numpy_array_equal(unshifted.iloc[:, 0].valid().values, + tm.assert_index_equal(shifted.index, ps.index) + tm.assert_index_equal(unshifted.index, ps.index) + tm.assert_numpy_array_equal(unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values) shifted2 = ps.shift(1, 'B') @@ -219,8 +276,9 @@ def test_shift(self): assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) - assertRaisesRegexp(ValueError, 'does not match PeriodIndex freq', - ps.shift, freq='D') + tm.assert_raises_regex(ValueError, + 'does not match PeriodIndex freq', + ps.shift, freq='D') # shift other axis # GH 6371 @@ -266,6 +324,28 @@ def test_shift_empty(self): assert_frame_equal(df, rs) + def test_shift_duplicate_columns(self): + # GH 9092; verify that position-based shifting works + # in the presence of duplicate columns + column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] + data = np.random.randn(20, 5) + + shifted = [] + for columns in column_lists: + df = pd.DataFrame(data.copy(), columns=columns) + for s in range(5): + df.iloc[:, s] = df.iloc[:, s].shift(s + 1) + df.columns = range(5) + shifted.append(df) + + # sanity check the base case + nulls = shifted[0].isna().sum() + assert_series_equal(nulls, Series(range(1, 6), dtype='int64')) + + # check all answers are the same + assert_frame_equal(shifted[0], shifted[1]) + assert_frame_equal(shifted[0], shifted[2]) + def test_tshift(self): # PeriodIndex ps = tm.makePeriodFrame() @@ -280,7 +360,8 @@ def test_tshift(self): shifted3 = ps.tshift(freq=offsets.BDay()) assert_frame_equal(shifted, shifted3) - assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M') + tm.assert_raises_regex( + ValueError, 'does not match', ps.tshift, freq='M') # DatetimeIndex shifted = self.tsframe.tshift(1) @@ -300,7 +381,7 @@ def test_tshift(self): assert_frame_equal(unshifted, inferred_ts) no_freq = self.tsframe.iloc[[0, 5, 7], :] - self.assertRaises(ValueError, no_freq.tshift) + pytest.raises(ValueError, no_freq.tshift) def test_truncate(self): ts = self.tsframe[::3] @@ -341,21 +422,48 @@ def test_truncate(self): truncated = ts.truncate(after=end_missing) assert_frame_equal(truncated, expected) - self.assertRaises(ValueError, ts.truncate, - before=ts.index[-1] - 1, - after=ts.index[0] + 1) + pytest.raises(ValueError, ts.truncate, + before=ts.index[-1] - 1, + after=ts.index[0] + 1) def test_truncate_copy(self): index = self.tsframe.index truncated = self.tsframe.truncate(index[5], index[10]) truncated.values[:] = 5. - self.assertFalse((self.tsframe.values[5:11] == 5).any()) + assert not (self.tsframe.values[5:11] == 5).any() + + def test_truncate_nonsortedindex(self): + # GH 17935 + + df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e']}, + index=[5, 3, 2, 9, 0]) + with tm.assert_raises_regex(ValueError, + 'truncate requires a sorted index'): + df.truncate(before=3, after=9) + + rng = pd.date_range('2011-01-01', '2012-01-01', freq='W') + ts = pd.DataFrame({'A': np.random.randn(len(rng)), + 'B': np.random.randn(len(rng))}, + index=rng) + with tm.assert_raises_regex(ValueError, + 'truncate requires a sorted index'): + ts.sort_values('A', ascending=False).truncate(before='2011-11', + after='2011-12') + + df = pd.DataFrame({3: np.random.randn(5), + 20: np.random.randn(5), + 2: np.random.randn(5), + 0: np.random.randn(5)}, + columns=[3, 20, 2, 0]) + with tm.assert_raises_regex(ValueError, + 'truncate requires a sorted index'): + df.truncate(before=2, after=20, axis=1) def test_asfreq(self): offset_monthly = self.tsframe.asfreq(offsets.BMonthEnd()) rule_monthly = self.tsframe.asfreq('BM') - assert_almost_equal(offset_monthly['A'], rule_monthly['A']) + tm.assert_almost_equal(offset_monthly['A'], rule_monthly['A']) filled = rule_monthly.asfreq('B', method='pad') # noqa # TODO: actually check that this worked. @@ -366,17 +474,17 @@ def test_asfreq(self): # test does not blow up on length-0 DataFrame zero_length = self.tsframe.reindex([]) result = zero_length.asfreq('BM') - self.assertIsNot(result, zero_length) + assert result is not zero_length def test_asfreq_datetimeindex(self): df = DataFrame({'A': [1, 2, 3]}, index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)]) df = df.asfreq('B') - tm.assertIsInstance(df.index, DatetimeIndex) + assert isinstance(df.index, DatetimeIndex) ts = df['A'].asfreq('B') - tm.assertIsInstance(ts.index, DatetimeIndex) + assert isinstance(ts.index, DatetimeIndex) def test_asfreq_fillvalue(self): # test for fill value during upsampling, related to issue 3715 @@ -407,23 +515,28 @@ def test_first_last_valid(self): frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() - self.assertEqual(index, frame.index[5]) + assert index == frame.index[5] index = frame.last_valid_index() - self.assertEqual(index, frame.index[-6]) + assert index == frame.index[-6] # GH12800 empty = DataFrame() - self.assertIsNone(empty.last_valid_index()) - self.assertIsNone(empty.first_valid_index()) + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None + + # GH17400: no valid entries + frame[:] = nan + assert frame.last_valid_index() is None + assert frame.first_valid_index() is None def test_at_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() result = ts.at_time('9:30') expected = ts.at_time(time(9, 30)) @@ -445,7 +558,7 @@ def test_at_time_frame(self): rng = date_range('1/1/2012', freq='23Min', periods=384) ts = DataFrame(np.random.randn(len(rng), 2), rng) rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) + assert len(rs) == 0 def test_between_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') @@ -462,18 +575,18 @@ def test_between_time_frame(self): if not inc_end: exp_len -= 4 - self.assertEqual(len(filtered), exp_len) + assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: - self.assertTrue(t >= stime) + assert t >= stime else: - self.assertTrue(t > stime) + assert t > stime if inc_end: - self.assertTrue(t <= etime) + assert t <= etime else: - self.assertTrue(t < etime) + assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) @@ -494,18 +607,18 @@ def test_between_time_frame(self): if not inc_end: exp_len -= 4 - self.assertEqual(len(filtered), exp_len) + assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) + assert (t >= stime) or (t <= etime) else: - self.assertTrue((t > stime) or (t <= etime)) + assert (t > stime) or (t <= etime) if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) + assert (t <= etime) or (t >= stime) else: - self.assertTrue((t < etime) or (t >= stime)) + assert (t < etime) or (t >= stime) def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. @@ -548,7 +661,7 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): def test_frame_to_period(self): K = 5 - from pandas.tseries.period import period_range + from pandas.core.indexes.period import period_range dr = date_range('1/1/2000', '1/1/2001') pr = period_range('1/1/2000', '1/1/2001') @@ -572,4 +685,77 @@ def test_frame_to_period(self): pts = df.to_period('M', axis=1) tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) - self.assertRaises(ValueError, df.to_period, axis=2) + pytest.raises(ValueError, df.to_period, axis=2) + + @pytest.mark.parametrize("fn", ['tz_localize', 'tz_convert']) + def test_tz_convert_and_localize(self, fn): + l0 = date_range('20140701', periods=5, freq='D') + + # TODO: l1 should be a PeriodIndex for testing + # after GH2106 is addressed + with pytest.raises(NotImplementedError): + period_range('20140701', periods=1).tz_convert('UTC') + with pytest.raises(NotImplementedError): + period_range('20140701', periods=1).tz_localize('UTC') + # l1 = period_range('20140701', periods=5, freq='D') + l1 = date_range('20140701', periods=5, freq='D') + + int_idx = Index(range(5)) + + if fn == 'tz_convert': + l0 = l0.tz_localize('UTC') + l1 = l1.tz_localize('UTC') + + for idx in [l0, l1]: + + l0_expected = getattr(idx, fn)('US/Pacific') + l1_expected = getattr(idx, fn)('US/Pacific') + + df1 = DataFrame(np.ones(5), index=l0) + df1 = getattr(df1, fn)('US/Pacific') + assert_index_equal(df1.index, l0_expected) + + # MultiIndex + # GH7846 + df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) + + df3 = getattr(df2, fn)('US/Pacific', level=0) + assert not df3.index.levels[0].equals(l0) + assert_index_equal(df3.index.levels[0], l0_expected) + assert_index_equal(df3.index.levels[1], l1) + assert not df3.index.levels[1].equals(l1_expected) + + df3 = getattr(df2, fn)('US/Pacific', level=1) + assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + df4 = DataFrame(np.ones(5), + MultiIndex.from_arrays([int_idx, l0])) + + # TODO: untested + df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa + + assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + # Bad Inputs + + # Not DatetimeIndex / PeriodIndex + with assert_raises_regex(TypeError, 'DatetimeIndex'): + df = DataFrame(index=int_idx) + df = getattr(df, fn)('US/Pacific') + + # Not DatetimeIndex / PeriodIndex + with assert_raises_regex(TypeError, 'DatetimeIndex'): + df = DataFrame(np.ones(5), + MultiIndex.from_arrays([int_idx, l0])) + df = getattr(df, fn)('US/Pacific', level=0) + + # Invalid level + with assert_raises_regex(ValueError, 'not valid'): + df = DataFrame(index=l0) + df = getattr(df, fn)('US/Pacific', level=1) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py new file mode 100644 index 0000000000000..fa589a0aa4817 --- /dev/null +++ b/pandas/tests/frame/test_timezones.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +""" +Tests for DataFrame timezone-related methods +""" +from datetime import datetime + +import pytest +import pytz +import numpy as np + +import pandas.util.testing as tm +from pandas.compat import lrange +from pandas.core.indexes.datetimes import date_range +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas import Series, DataFrame + + +class TestDataFrameTimezones(object): + def test_frame_from_records_utc(self): + rec = {'datum': 1.5, + 'begin_time': datetime(2006, 4, 27, tzinfo=pytz.utc)} + + # it works + DataFrame.from_records([rec], index='begin_time') + + def test_frame_tz_localize(self): + rng = date_range('1/1/2011', periods=100, freq='H') + + df = DataFrame({'a': 1}, index=rng) + result = df.tz_localize('utc') + expected = DataFrame({'a': 1}, rng.tz_localize('UTC')) + assert result.index.tz.zone == 'UTC' + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_localize('utc', axis=1) + assert result.columns.tz.zone == 'UTC' + tm.assert_frame_equal(result, expected.T) + + def test_frame_tz_convert(self): + rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') + + df = DataFrame({'a': 1}, index=rng) + result = df.tz_convert('Europe/Berlin') + expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin')) + assert result.index.tz.zone == 'Europe/Berlin' + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_convert('Europe/Berlin', axis=1) + assert result.columns.tz.zone == 'Europe/Berlin' + tm.assert_frame_equal(result, expected.T) + + def test_frame_join_tzaware(self): + test1 = DataFrame(np.zeros((6, 3)), + index=date_range("2012-11-15 00:00:00", periods=6, + freq="100L", tz="US/Central")) + test2 = DataFrame(np.zeros((3, 3)), + index=date_range("2012-11-15 00:00:00", periods=3, + freq="250L", tz="US/Central"), + columns=lrange(3, 6)) + + result = test1.join(test2, how='outer') + ex_index = test1.index.union(test2.index) + + tm.assert_index_equal(result.index, ex_index) + assert result.index.tz.zone == 'US/Central' + + def test_frame_add_tz_mismatch_converts_to_utc(self): + rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') + df = DataFrame(np.random.randn(len(rng)), index=rng, columns=['a']) + + df_moscow = df.tz_convert('Europe/Moscow') + result = df + df_moscow + assert result.index.tz is pytz.utc + + result = df_moscow + df + assert result.index.tz is pytz.utc + + def test_frame_align_aware(self): + idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') + idx2 = date_range('2001', periods=5, freq='2H', tz='US/Eastern') + df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) + df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) + new1, new2 = df1.align(df2) + assert df1.index.tz == new1.index.tz + assert df2.index.tz == new2.index.tz + + # different timezones convert to UTC + + # frame with frame + df1_central = df1.tz_convert('US/Central') + new1, new2 = df1.align(df1_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + # frame with Series + new1, new2 = df1.align(df1_central[0], axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + df1[0].align(df1_central, axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_frame_no_datetime64_dtype(self, tz): + # after GH#7822 + # these retain the timezones on dict construction + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + dr_tz = dr.tz_localize(tz) + df = DataFrame({'A': 'foo', 'B': dr_tz}, index=dr) + tz_expected = DatetimeTZDtype('ns', dr_tz.tzinfo) + assert df['B'].dtype == tz_expected + + # GH#2810 (with timezones) + datetimes_naive = [ts.to_pydatetime() for ts in dr] + datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] + df = DataFrame({'dr': dr, + 'dr_tz': dr_tz, + 'datetimes_naive': datetimes_naive, + 'datetimes_with_tz': datetimes_with_tz}) + result = df.get_dtype_counts().sort_index() + expected = Series({'datetime64[ns]': 2, + str(tz_expected): 2}).sort_index() + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_frame_reset_index(self, tz): + dr = date_range('2012-06-02', periods=10, tz=tz) + df = DataFrame(np.random.randn(len(dr)), dr) + roundtripped = df.reset_index().set_index('index') + xp = df.index.tz + rs = roundtripped.index.tz + assert xp == rs diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 471fc536a90f6..dda5cdea52cac 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -3,12 +3,14 @@ from __future__ import print_function import csv +import pytest from numpy import nan import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) -from pandas.parser import ParserError +import pandas.core.common as com +from pandas.errors import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) import pandas as pd @@ -17,8 +19,7 @@ assert_series_equal, assert_frame_equal, ensure_clean, - makeCustomDataframe as mkdf, - assertRaisesRegexp, slow) + makeCustomDataframe as mkdf) import pandas.util.testing as tm from pandas.tests.frame.common import TestData @@ -29,7 +30,22 @@ 'int32', 'int64'] -class TestDataFrameToCSV(tm.TestCase, TestData): +class TestDataFrameToCSV(TestData): + + def read_csv(self, path, **kwargs): + params = dict(index_col=0, parse_dates=True) + params.update(**kwargs) + + return pd.read_csv(path, **params) + + def test_from_csv_deprecation(self): + # see gh-17812 + with ensure_clean('__tmp_from_csv_deprecation__') as path: + self.tsframe.to_csv(path) + + with tm.assert_produces_warning(FutureWarning): + depr_recons = DataFrame.from_csv(path) + assert_frame_equal(self.tsframe, depr_recons) def test_to_csv_from_csv1(self): @@ -43,24 +59,25 @@ def test_to_csv_from_csv1(self): # test roundtrip self.tsframe.to_csv(path) - recons = DataFrame.from_csv(path) - + recons = self.read_csv(path) assert_frame_equal(self.tsframe, recons) self.tsframe.to_csv(path, index_label='index') - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) + assert(len(recons.columns) == len(self.tsframe.columns) + 1) # no index self.tsframe.to_csv(path, index=False) - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) assert_almost_equal(self.tsframe.values, recons.values) # corner case dm = DataFrame({'s1': Series(lrange(3), lrange(3)), 's2': Series(lrange(2), lrange(2))}) dm.to_csv(path) - recons = DataFrame.from_csv(path) + + recons = self.read_csv(path) assert_frame_equal(dm, recons) def test_to_csv_from_csv2(self): @@ -71,31 +88,30 @@ def test_to_csv_from_csv2(self): df = DataFrame(np.random.randn(3, 3), index=['a', 'a', 'b'], columns=['x', 'y', 'z']) df.to_csv(path) - result = DataFrame.from_csv(path) + result = self.read_csv(path) assert_frame_equal(result, df) midx = MultiIndex.from_tuples( [('A', 1, 2), ('A', 1, 2), ('B', 1, 2)]) df = DataFrame(np.random.randn(3, 3), index=midx, columns=['x', 'y', 'z']) + df.to_csv(path) - result = DataFrame.from_csv(path, index_col=[0, 1, 2], - parse_dates=False) - # TODO from_csv names index ['Unnamed: 1', 'Unnamed: 2'] should it - # ? + result = self.read_csv(path, index_col=[0, 1, 2], + parse_dates=False) assert_frame_equal(result, df, check_names=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_csv(path, header=col_aliases) - rs = DataFrame.from_csv(path) + + rs = self.read_csv(path) xp = self.frame2.copy() xp.columns = col_aliases - assert_frame_equal(xp, rs) - self.assertRaises(ValueError, self.frame2.to_csv, path, - header=['AA', 'X']) + pytest.raises(ValueError, self.frame2.to_csv, path, + header=['AA', 'X']) def test_to_csv_from_csv3(self): @@ -205,7 +221,7 @@ def _check_df(df, cols=None): cols = ['b', 'a'] _check_df(df, cols) - @slow + @pytest.mark.slow def test_to_csv_dtnat(self): # GH3437 from pandas import NaT @@ -231,12 +247,13 @@ def make_dtnat_arr(n, nnat=None): with ensure_clean('1.csv') as pth: df = DataFrame(dict(a=s1, b=s2)) df.to_csv(pth, chunksize=chunksize) - recons = DataFrame.from_csv(pth)._convert(datetime=True, - coerce=True) + + recons = self.read_csv(pth)._convert(datetime=True, + coerce=True) assert_frame_equal(df, recons, check_names=False, check_less_precise=True) - @slow + @pytest.mark.slow def test_to_csv_moar(self): def _do_test(df, r_dtype=None, c_dtype=None, @@ -247,16 +264,17 @@ def _do_test(df, r_dtype=None, c_dtype=None, if rnlvl is not None: kwargs['index_col'] = lrange(rnlvl) kwargs['header'] = lrange(cnlvl) + with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', - chunksize=chunksize, tupleize_cols=False) - recons = DataFrame.from_csv( - path, tupleize_cols=False, **kwargs) + chunksize=chunksize) + recons = self.read_csv(path, **kwargs) else: kwargs['header'] = 0 + with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', chunksize=chunksize) - recons = DataFrame.from_csv(path, **kwargs) + recons = self.read_csv(path, **kwargs) def _to_uni(x): if not isinstance(x, compat.text_type): @@ -398,7 +416,7 @@ def test_to_csv_from_csv_w_some_infs(self): with ensure_clean() as path: self.frame.to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) # TODO to_csv drops column name assert_frame_equal(self.frame, recons, check_names=False) @@ -413,7 +431,7 @@ def test_to_csv_from_csv_w_all_infs(self): with ensure_clean() as path: self.frame.to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) # TODO to_csv drops column name assert_frame_equal(self.frame, recons, check_names=False) @@ -433,13 +451,13 @@ def test_to_csv_no_index(self): assert_frame_equal(df, result) def test_to_csv_with_mix_columns(self): - # GH11637, incorrect output when a mix of integer and string column + # gh-11637: incorrect output when a mix of integer and string column # names passed as columns parameter in to_csv df = DataFrame({0: ['a', 'b', 'c'], 1: ['aa', 'bb', 'cc']}) df['test'] = 'txt' - self.assertEqual(df.to_csv(), df.to_csv(columns=[0, 1, 'test'])) + assert df.to_csv() == df.to_csv(columns=[0, 1, 'test']) def test_to_csv_headers(self): # GH6186, the presence or absence of `index` incorrectly @@ -448,11 +466,13 @@ def test_to_csv_headers(self): to_df = DataFrame([[1, 2], [3, 4]], columns=['X', 'Y']) with ensure_clean('__tmp_to_csv_headers__') as path: from_df.to_csv(path, header=['X', 'Y']) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + assert_frame_equal(to_df, recons) from_df.to_csv(path, index=False, header=['X', 'Y']) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + recons.reset_index(inplace=True) assert_frame_equal(to_df, recons) @@ -471,13 +491,15 @@ def test_to_csv_multiindex(self): # round trip frame.to_csv(path) - df = DataFrame.from_csv(path, index_col=[0, 1], parse_dates=False) + + df = self.read_csv(path, index_col=[0, 1], + parse_dates=False) # TODO to_csv drops column name assert_frame_equal(frame, df, check_names=False) - self.assertEqual(frame.index.names, df.index.names) + assert frame.index.names == df.index.names - # needed if setUP becomes a classmethod + # needed if setUp becomes a class method self.frame.index = old_index # try multiindex with dates @@ -487,21 +509,22 @@ def test_to_csv_multiindex(self): tsframe.index = MultiIndex.from_arrays(new_index) tsframe.to_csv(path, index_label=['time', 'foo']) - recons = DataFrame.from_csv(path, index_col=[0, 1]) + recons = self.read_csv(path, index_col=[0, 1]) + # TODO to_csv drops column name assert_frame_equal(tsframe, recons, check_names=False) # do not load index tsframe.to_csv(path) - recons = DataFrame.from_csv(path, index_col=None) - self.assertEqual(len(recons.columns), len(tsframe.columns) + 2) + recons = self.read_csv(path, index_col=None) + assert len(recons.columns) == len(tsframe.columns) + 2 # no index tsframe.to_csv(path, index=False) - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) assert_almost_equal(recons.values, self.tsframe.values) - # needed if setUP becomes classmethod + # needed if setUp becomes class method self.tsframe.index = old_index with ensure_clean('__tmp_to_csv_multiindex__') as path: @@ -519,92 +542,101 @@ def _make_frame(names=None): # column & index are multi-index df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], + index_col=[0, 1]) assert_frame_equal(df, result) # column is mi df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) + df.to_csv(path) result = read_csv( - path, header=[0, 1, 2, 3], index_col=0, tupleize_cols=False) + path, header=[0, 1, 2, 3], index_col=0) assert_frame_equal(df, result) # dup column names? df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1, 2, 3], index_col=[ - 0, 1, 2], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], + index_col=[0, 1, 2]) assert_frame_equal(df, result) # writing with no index df = _make_frame() - df.to_csv(path, tupleize_cols=False, index=False) - result = read_csv(path, header=[0, 1], tupleize_cols=False) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) assert_frame_equal(df, result) # we lose the names here df = _make_frame(True) - df.to_csv(path, tupleize_cols=False, index=False) - result = read_csv(path, header=[0, 1], tupleize_cols=False) - self.assertTrue(all([x is None for x in result.columns.names])) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) + assert com._all_none(*result.columns.names) result.columns.names = df.columns.names assert_frame_equal(df, result) # tupleize_cols=True and index=False df = _make_frame(True) - df.to_csv(path, tupleize_cols=True, index=False) - result = read_csv( - path, header=0, tupleize_cols=True, index_col=None) + with tm.assert_produces_warning(FutureWarning): + df.to_csv(path, tupleize_cols=True, index=False) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = read_csv(path, header=0, + tupleize_cols=True, + index_col=None) result.columns = df.columns assert_frame_equal(df, result) # whatsnew example df = _make_frame() - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1], index_col=[ - 0], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1], + index_col=[0]) assert_frame_equal(df, result) df = _make_frame(True) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1], index_col=[ - 0], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1], + index_col=[0]) assert_frame_equal(df, result) # column & index are multi-index (compatibility) df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=True) - result = read_csv(path, header=0, index_col=[ - 0, 1], tupleize_cols=True) + with tm.assert_produces_warning(FutureWarning): + df.to_csv(path, tupleize_cols=True) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = read_csv(path, header=0, index_col=[0, 1], + tupleize_cols=True) result.columns = df.columns assert_frame_equal(df, result) # invalid options df = _make_frame(True) - df.to_csv(path, tupleize_cols=False) + df.to_csv(path) for i in [6, 7]: msg = 'len of {i}, but only 5 lines in file'.format(i=i) - with assertRaisesRegexp(ParserError, msg): - read_csv(path, tupleize_cols=False, - header=lrange(i), index_col=0) + with tm.assert_raises_regex(ParserError, msg): + read_csv(path, header=lrange(i), index_col=0) # write with cols - with assertRaisesRegexp(TypeError, 'cannot specify cols with a ' - 'MultiIndex'): - df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar']) + with tm.assert_raises_regex(TypeError, 'cannot specify cols ' + 'with a MultiIndex'): + df.to_csv(path, columns=['foo', 'bar']) with ensure_clean('__tmp_to_csv_multiindex__') as path: # empty tsframe[:0].to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + exp = tsframe[:0] exp.index = [] - self.assert_index_equal(recons.columns, exp.columns) - self.assertEqual(len(recons), 0) + tm.assert_index_equal(recons.columns, exp.columns) + assert len(recons) == 0 def test_to_csv_float32_nanrep(self): df = DataFrame(np.random.randn(1, 4).astype(np.float32)) @@ -615,7 +647,7 @@ def test_to_csv_float32_nanrep(self): with open(path) as f: lines = f.readlines() - self.assertEqual(lines[1].split(',')[2], '999') + assert lines[1].split(',')[2] == '999' def test_to_csv_withcommas(self): @@ -624,7 +656,7 @@ def test_to_csv_withcommas(self): with ensure_clean('__tmp_to_csv_withcommas__.csv') as path: df.to_csv(path) - df2 = DataFrame.from_csv(path) + df2 = self.read_csv(path) assert_frame_equal(df2, df) def test_to_csv_mixed(self): @@ -728,7 +760,7 @@ def test_to_csv_chunking(self): rs = read_csv(filename, index_col=0) assert_frame_equal(rs, aa) - @slow + @pytest.mark.slow def test_to_csv_wide_frame_formatting(self): # Issue #8621 df = DataFrame(np.random.randn(1, 100010), columns=None, index=None) @@ -739,7 +771,7 @@ def test_to_csv_wide_frame_formatting(self): def test_to_csv_bug(self): f1 = StringIO('a,1.0\nb,2.0') - df = DataFrame.from_csv(f1, header=None) + df = self.read_csv(f1, header=None) newdf = DataFrame({'t': df[df.columns[0]]}) with ensure_clean() as path: @@ -813,7 +845,7 @@ def test_to_csv_unicodewriter_quoting(self): '2,"bar"\n' '3,"baz"\n') - self.assertEqual(result, expected) + assert result == expected def test_to_csv_quote_none(self): # GH4328 @@ -824,7 +856,7 @@ def test_to_csv_quote_none(self): encoding=encoding, index=False) result = buf.getvalue() expected = 'A\nhello\n{"hello"}\n' - self.assertEqual(result, expected) + assert result == expected def test_to_csv_index_no_leading_comma(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, @@ -836,7 +868,7 @@ def test_to_csv_index_no_leading_comma(self): 'one,1,4\n' 'two,2,5\n' 'three,3,6\n') - self.assertEqual(buf.getvalue(), expected) + assert buf.getvalue() == expected def test_to_csv_line_terminators(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, @@ -848,7 +880,7 @@ def test_to_csv_line_terminators(self): 'one,1,4\r\n' 'two,2,5\r\n' 'three,3,6\r\n') - self.assertEqual(buf.getvalue(), expected) + assert buf.getvalue() == expected buf = StringIO() df.to_csv(buf) # The default line terminator remains \n @@ -856,7 +888,7 @@ def test_to_csv_line_terminators(self): 'one,1,4\n' 'two,2,5\n' 'three,3,6\n') - self.assertEqual(buf.getvalue(), expected) + assert buf.getvalue() == expected def test_to_csv_from_csv_categorical(self): @@ -868,7 +900,7 @@ def test_to_csv_from_csv_categorical(self): s.to_csv(res) exp = StringIO() s2.to_csv(exp) - self.assertEqual(res.getvalue(), exp.getvalue()) + assert res.getvalue() == exp.getvalue() df = DataFrame({"s": s}) df2 = DataFrame({"s": s2}) @@ -876,84 +908,40 @@ def test_to_csv_from_csv_categorical(self): df.to_csv(res) exp = StringIO() df2.to_csv(exp) - self.assertEqual(res.getvalue(), exp.getvalue()) + assert res.getvalue() == exp.getvalue() def test_to_csv_path_is_none(self): # GH 8215 # Make sure we return string for consistency with # Series.to_csv() csv_str = self.frame.to_csv(path_or_buf=None) - self.assertIsInstance(csv_str, str) + assert isinstance(csv_str, str) recons = pd.read_csv(StringIO(csv_str), index_col=0) assert_frame_equal(self.frame, recons) - def test_to_csv_compression_gzip(self): - # GH7615 - # use the compression kw in to_csv - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean() as filename: - - df.to_csv(filename, compression="gzip") + def test_to_csv_compression(self, compression_no_zip): - # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="gzip", index_col=0) - assert_frame_equal(df, rs) - - # explicitly make sure file is gziped - import gzip - f = gzip.open(filename, 'rb') - text = f.read().decode('utf8') - f.close() - for col in df.columns: - self.assertIn(col, text) - - def test_to_csv_compression_bz2(self): - # GH7615 - # use the compression kw in to_csv df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean() as filename: - df.to_csv(filename, compression="bz2") + df.to_csv(filename, compression=compression_no_zip) # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="bz2", index_col=0) + rs = read_csv(filename, compression=compression_no_zip, + index_col=0) assert_frame_equal(df, rs) - # explicitly make sure file is bz2ed - import bz2 - f = bz2.BZ2File(filename, 'rb') - text = f.read().decode('utf8') - f.close() - for col in df.columns: - self.assertIn(col, text) + # explicitly make sure file is compressed + with tm.decompress_file(filename, compression_no_zip) as fh: + text = fh.read().decode('utf8') + for col in df.columns: + assert col in text - def test_to_csv_compression_xz(self): - # GH11852 - # use the compression kw in to_csv - tm._skip_if_no_lzma() - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean() as filename: - - df.to_csv(filename, compression="xz") - - # test the round trip - to_csv -> read_csv - rs = read_csv(filename, compression="xz", index_col=0) - assert_frame_equal(df, rs) - - # explicitly make sure file is xzipped - lzma = compat.import_lzma() - f = lzma.open(filename, 'rb') - assert_frame_equal(df, read_csv(f, index_col=0)) - f.close() + with tm.decompress_file(filename, compression_no_zip) as fh: + assert_frame_equal(df, read_csv(fh, index_col=0)) def test_to_csv_compression_value_error(self): # GH7615 @@ -965,8 +953,8 @@ def test_to_csv_compression_value_error(self): with ensure_clean() as filename: # zip compression is not supported and should raise ValueError import zipfile - self.assertRaises(zipfile.BadZipfile, df.to_csv, - filename, compression="zip") + pytest.raises(zipfile.BadZipfile, df.to_csv, + filename, compression="zip") def test_to_csv_date_format(self): with ensure_clean('__tmp_to_csv_date_format__') as path: @@ -1066,10 +1054,10 @@ def test_to_csv_with_dst_transitions(self): def test_to_csv_quoting(self): df = DataFrame({ - 'c_string': ['a', 'b,c'], - 'c_int': [42, np.nan], - 'c_float': [1.0, 3.2], 'c_bool': [True, False], + 'c_float': [1.0, 3.2], + 'c_int': [42, np.nan], + 'c_string': ['a', 'b,c'], }) expected = """\ @@ -1078,13 +1066,13 @@ def test_to_csv_quoting(self): 1,False,3.2,,"b,c" """ result = df.to_csv() - self.assertEqual(result, expected) + assert result == expected result = df.to_csv(quoting=None) - self.assertEqual(result, expected) + assert result == expected result = df.to_csv(quoting=csv.QUOTE_MINIMAL) - self.assertEqual(result, expected) + assert result == expected expected = """\ "","c_bool","c_float","c_int","c_string" @@ -1092,7 +1080,7 @@ def test_to_csv_quoting(self): "1","False","3.2","","b,c" """ result = df.to_csv(quoting=csv.QUOTE_ALL) - self.assertEqual(result, expected) + assert result == expected # see gh-12922, gh-13259: make sure changes to # the formatters do not break this behaviour @@ -1102,14 +1090,14 @@ def test_to_csv_quoting(self): 1,False,3.2,"","b,c" """ result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) - self.assertEqual(result, expected) + assert result == expected msg = "need to escape, but no escapechar set" - tm.assertRaisesRegexp(csv.Error, msg, df.to_csv, - quoting=csv.QUOTE_NONE) - tm.assertRaisesRegexp(csv.Error, msg, df.to_csv, - quoting=csv.QUOTE_NONE, - escapechar=None) + tm.assert_raises_regex(csv.Error, msg, df.to_csv, + quoting=csv.QUOTE_NONE) + tm.assert_raises_regex(csv.Error, msg, df.to_csv, + quoting=csv.QUOTE_NONE, + escapechar=None) expected = """\ ,c_bool,c_float,c_int,c_string @@ -1118,7 +1106,7 @@ def test_to_csv_quoting(self): """ result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar='!') - self.assertEqual(result, expected) + assert result == expected expected = """\ ,c_bool,c_ffloat,c_int,c_string @@ -1127,7 +1115,7 @@ def test_to_csv_quoting(self): """ result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar='f') - self.assertEqual(result, expected) + assert result == expected # see gh-3503: quoting Windows line terminators # presents with encoding? @@ -1135,11 +1123,52 @@ def test_to_csv_quoting(self): df = pd.read_csv(StringIO(text)) buf = StringIO() df.to_csv(buf, encoding='utf-8', index=False) - self.assertEqual(buf.getvalue(), text) + assert buf.getvalue() == text # xref gh-7791: make sure the quoting parameter is passed through # with multi-indexes df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) df = df.set_index(['a', 'b']) expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' - self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected) + assert df.to_csv(quoting=csv.QUOTE_ALL) == expected + + def test_period_index_date_overflow(self): + # see gh-15982 + + dates = ["1990-01-01", "2000-01-01", "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = pd.DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected = ',0\n1990-01-01,4\n2000-01-01,5\n3005-01-01,6\n' + assert result == expected + + date_format = "%m-%d-%Y" + result = df.to_csv(date_format=date_format) + + expected = ',0\n01-01-1990,4\n01-01-2000,5\n01-01-3005,6\n' + assert result == expected + + # Overflow with pd.NaT + dates = ["1990-01-01", pd.NaT, "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = pd.DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected = ',0\n1990-01-01,4\n,5\n3005-01-01,6\n' + assert result == expected + + def test_multi_index_header(self): + # see gh-5539 + columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), + ("b", 1), ("b", 2)]) + df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + df.columns = columns + + header = ["a", "b", "c", "d"] + result = df.to_csv(header=header) + + expected = ",a,b,c,d\n0,1,2,3,4\n1,5,6,7,8\n" + assert result == expected diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index e1ef87bb3271a..2de0e866f6e70 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -1,33 +1,33 @@ -from unittest import TestCase from pandas.core.frame import DataFrame +import pytest +import pandas.util.testing as tm -class TestDataFrameValidate(TestCase): - """Tests for error handling related to data types of method arguments.""" - df = DataFrame({'a': [1, 2], 'b': [3, 4]}) - - def test_validate_bool_args(self): - # Tests for error handling related to boolean arguments. - invalid_values = [1, "True", [1, 2, 3], 5.0] - - for value in invalid_values: - with self.assertRaises(ValueError): - self.df.query('a > b', inplace=value) - with self.assertRaises(ValueError): - self.df.eval('a + b', inplace=value) +@pytest.fixture +def dataframe(): + return DataFrame({'a': [1, 2], 'b': [3, 4]}) - with self.assertRaises(ValueError): - self.df.set_index(keys=['a'], inplace=value) - with self.assertRaises(ValueError): - self.df.reset_index(inplace=value) - - with self.assertRaises(ValueError): - self.df.dropna(inplace=value) - - with self.assertRaises(ValueError): - self.df.drop_duplicates(inplace=value) +class TestDataFrameValidate(object): + """Tests for error handling related to data types of method arguments.""" - with self.assertRaises(ValueError): - self.df.sort_values(by=['a'], inplace=value) + @pytest.mark.parametrize("func", ["query", "eval", "set_index", + "reset_index", "dropna", + "drop_duplicates", "sort_values"]) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, dataframe, func, inplace): + msg = "For argument \"inplace\" expected type bool" + kwargs = dict(inplace=inplace) + + if func == "query": + kwargs["expr"] = "a > b" + elif func == "eval": + kwargs["expr"] = "a + b" + elif func == "set_index": + kwargs["keys"] = ["a"] + elif func == "sort_values": + kwargs["by"] = ["a"] + + with tm.assert_raises_regex(ValueError, msg): + getattr(dataframe, func)(**kwargs) diff --git a/pandas/tests/generic/__init__.py b/pandas/tests/generic/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py new file mode 100644 index 0000000000000..9da59ca77d862 --- /dev/null +++ b/pandas/tests/generic/test_frame.py @@ -0,0 +1,270 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=E1101,W0612 + +from operator import methodcaller +from copy import deepcopy +from distutils.version import LooseVersion + +import pytest +import numpy as np +import pandas as pd + +from pandas import Series, DataFrame, date_range, MultiIndex + +from pandas.compat import range +from pandas.util.testing import (assert_series_equal, + assert_frame_equal, + assert_almost_equal) + +import pandas.util.testing as tm +import pandas.util._test_decorators as td +from .test_generic import Generic + +try: + import xarray + _XARRAY_INSTALLED = True +except ImportError: + _XARRAY_INSTALLED = False + + +class TestDataFrame(Generic): + _typ = DataFrame + _comparator = lambda self, x, y: assert_frame_equal(x, y) + + def test_rename_mi(self): + df = DataFrame([ + 11, 21, 31 + ], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]])) + df.rename(str.lower) + + def test_set_axis_name(self): + df = pd.DataFrame([[1, 2], [3, 4]]) + funcs = ['_set_axis_name', 'rename_axis'] + for func in funcs: + result = methodcaller(func, 'foo')(df) + assert df.index.name is None + assert result.index.name == 'foo' + + result = methodcaller(func, 'cols', axis=1)(df) + assert df.columns.name is None + assert result.columns.name == 'cols' + + def test_set_axis_name_mi(self): + df = DataFrame( + np.empty((3, 3)), + index=MultiIndex.from_tuples([("A", x) for x in list('aBc')]), + columns=MultiIndex.from_tuples([('C', x) for x in list('xyz')]) + ) + + level_names = ['L1', 'L2'] + funcs = ['_set_axis_name', 'rename_axis'] + for func in funcs: + result = methodcaller(func, level_names)(df) + assert result.index.names == level_names + assert result.columns.names == [None, None] + + result = methodcaller(func, level_names, axis=1)(df) + assert result.columns.names == ["L1", "L2"] + assert result.index.names == [None, None] + + def test_nonzero_single_element(self): + + # allow single item via bool method + df = DataFrame([[True]]) + assert df.bool() + + df = DataFrame([[False]]) + assert not df.bool() + + df = DataFrame([[False, False]]) + pytest.raises(ValueError, lambda: df.bool()) + pytest.raises(ValueError, lambda: bool(df)) + + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + o = DataFrame({'A': [1, '2', 3.]}) + result = o._get_numeric_data() + expected = DataFrame(index=[0, 1, 2], dtype=object) + self._compare(result, expected) + + def test_metadata_propagation_indiv(self): + + # groupby + df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + result = df.groupby('A').sum() + self.check_metadata(df, result) + + # resample + df = DataFrame(np.random.randn(1000, 2), + index=date_range('20130101', periods=1000, freq='s')) + result = df.resample('1T') + self.check_metadata(df, result) + + # merging with override + # GH 6923 + _metadata = DataFrame._metadata + _finalize = DataFrame.__finalize__ + + np.random.seed(10) + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['a', 'b']) + df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['c', 'd']) + DataFrame._metadata = ['filename'] + df1.filename = 'fname1.csv' + df2.filename = 'fname2.csv' + + def finalize(self, other, method=None, **kwargs): + + for name in self._metadata: + if method == 'merge': + left, right = other.left, other.right + value = getattr(left, name, '') + '|' + getattr(right, + name, '') + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, '')) + + return self + + DataFrame.__finalize__ = finalize + result = df1.merge(df2, left_on=['a'], right_on=['c'], how='inner') + assert result.filename == 'fname1.csv|fname2.csv' + + # concat + # GH 6927 + DataFrame._metadata = ['filename'] + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list('ab')) + df1.filename = 'foo' + + def finalize(self, other, method=None, **kwargs): + for name in self._metadata: + if method == 'concat': + value = '+'.join([getattr( + o, name) for o in other.objs if getattr(o, name, None) + ]) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, None)) + + return self + + DataFrame.__finalize__ = finalize + + result = pd.concat([df1, df1]) + assert result.filename == 'foo+foo' + + # reset + DataFrame._metadata = _metadata + DataFrame.__finalize__ = _finalize + + def test_set_attribute(self): + # Test for consistent setattr behavior when an attribute and a column + # have the same name (Issue #8994) + df = DataFrame({'x': [1, 2, 3]}) + + df.y = 2 + df['y'] = [2, 4, 6] + df.y = 5 + + assert df.y == 5 + assert_series_equal(df['y'], Series([2, 4, 6], name='y')) + + @pytest.mark.skipif(not _XARRAY_INSTALLED or _XARRAY_INSTALLED and + LooseVersion(xarray.__version__) < + LooseVersion('0.10.0'), + reason='xarray >= 0.10.0 required') + @pytest.mark.parametrize( + "index", ['FloatIndex', 'IntIndex', + 'StringIndex', 'UnicodeIndex', + 'DateIndex', 'PeriodIndex', + 'CategoricalIndex', 'TimedeltaIndex']) + def test_to_xarray_index_types(self, index): + from xarray import Dataset + + index = getattr(tm, 'make{}'.format(index)) + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', + periods=3, + tz='US/Eastern')} + ) + + df.index = index(3) + df.index.name = 'foo' + df.columns.name = 'bar' + result = df.to_xarray() + assert result.dims['foo'] == 3 + assert len(result.coords) == 1 + assert len(result.data_vars) == 8 + assert_almost_equal(list(result.coords.keys()), ['foo']) + assert isinstance(result, Dataset) + + # idempotency + # categoricals are not preserved + # datetimes w/tz are not preserved + # column names are lost + expected = df.copy() + expected['f'] = expected['f'].astype(object) + expected['h'] = expected['h'].astype('datetime64[ns]') + expected.columns.name = None + assert_frame_equal(result.to_dataframe(), expected, + check_index_type=False, check_categorical=False) + + @td.skip_if_no('xarray', min_version='0.7.0') + def test_to_xarray(self): + from xarray import Dataset + + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', + periods=3, + tz='US/Eastern')} + ) + + df.index.name = 'foo' + result = df[0:0].to_xarray() + assert result.dims['foo'] == 0 + assert isinstance(result, Dataset) + + # available in 0.7.1 + # MultiIndex + df.index = pd.MultiIndex.from_product([['a'], range(3)], + names=['one', 'two']) + result = df.to_xarray() + assert result.dims['one'] == 1 + assert result.dims['two'] == 3 + assert len(result.coords) == 2 + assert len(result.data_vars) == 8 + assert_almost_equal(list(result.coords.keys()), ['one', 'two']) + assert isinstance(result, Dataset) + + result = result.to_dataframe() + expected = df.copy() + expected['f'] = expected['f'].astype(object) + expected['h'] = expected['h'].astype('datetime64[ns]') + expected.columns.name = None + assert_frame_equal(result, + expected, + check_index_type=False) + + def test_deepcopy_empty(self): + # This test covers empty frame copying with non-empty column sets + # as reported in issue GH15370 + empty_frame = DataFrame(data=[], index=[], columns=['A']) + empty_frame_copy = deepcopy(empty_frame) + + self._compare(empty_frame_copy, empty_frame) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py new file mode 100644 index 0000000000000..311c71f734945 --- /dev/null +++ b/pandas/tests/generic/test_generic.py @@ -0,0 +1,1010 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=E1101,W0612 + +from copy import copy, deepcopy +from warnings import catch_warnings + +import pytest +import numpy as np +import pandas as pd + +from pandas.core.dtypes.common import is_scalar +from pandas import (Series, DataFrame, Panel, + date_range, MultiIndex) + +import pandas.io.formats.printing as printing + +from pandas.compat import range, zip, PY3 +from pandas.util.testing import (assert_raises_regex, + assert_series_equal, + assert_panel_equal, + assert_frame_equal) + +import pandas.util.testing as tm + + +# ---------------------------------------------------------------------- +# Generic types test cases + +class Generic(object): + + @property + def _ndim(self): + return self._typ._AXIS_LEN + + def _axes(self): + """ return the axes for my object typ """ + return self._typ._AXIS_ORDERS + + def _construct(self, shape, value=None, dtype=None, **kwargs): + """ construct an object for the given shape + if value is specified use that if its a scalar + if value is an array, repeat it as needed """ + + if isinstance(shape, int): + shape = tuple([shape] * self._ndim) + if value is not None: + if is_scalar(value): + if value == 'empty': + arr = None + + # remove the info axis + kwargs.pop(self._typ._info_axis_name, None) + else: + arr = np.empty(shape, dtype=dtype) + arr.fill(value) + else: + fshape = np.prod(shape) + arr = value.ravel() + new_shape = fshape / arr.shape[0] + if fshape % arr.shape[0] != 0: + raise Exception("invalid value passed in _construct") + + arr = np.repeat(arr, new_shape).reshape(shape) + else: + arr = np.random.randn(*shape) + return self._typ(arr, dtype=dtype, **kwargs) + + def _compare(self, result, expected): + self._comparator(result, expected) + + def test_rename(self): + + # single axis + idx = list('ABCD') + # relabeling values passed into self.rename + args = [ + str.lower, + {x: x.lower() for x in idx}, + Series({x: x.lower() for x in idx}), + ] + + for axis in self._axes(): + kwargs = {axis: idx} + obj = self._construct(4, **kwargs) + + for arg in args: + # rename a single axis + result = obj.rename(**{axis: arg}) + expected = obj.copy() + setattr(expected, axis, list('abcd')) + self._compare(result, expected) + + # multiple axes at once + + def test_get_numeric_data(self): + + n = 4 + kwargs = {} + for i in range(self._ndim): + kwargs[self._typ._AXIS_NAMES[i]] = list(range(n)) + + # get the numeric data + o = self._construct(n, **kwargs) + result = o._get_numeric_data() + self._compare(result, o) + + # non-inclusion + result = o._get_bool_data() + expected = self._construct(n, value='empty', **kwargs) + self._compare(result, expected) + + # get the bool data + arr = np.array([True, True, False, True]) + o = self._construct(n, value=arr, **kwargs) + result = o._get_numeric_data() + self._compare(result, o) + + # _get_numeric_data is includes _get_bool_data, so can't test for + # non-inclusion + + def test_get_default(self): + + # GH 7725 + d0 = "a", "b", "c", "d" + d1 = np.arange(4, dtype='int64') + others = "e", 10 + + for data, index in ((d0, d1), (d1, d0)): + s = Series(data, index=index) + for i, d in zip(index, data): + assert s.get(i) == d + assert s.get(i, d) == d + assert s.get(i, "z") == d + for other in others: + assert s.get(other, "z") == "z" + assert s.get(other, other) == other + + def test_nonzero(self): + + # GH 4633 + # look at the boolean/nonzero behavior for objects + obj = self._construct(shape=4) + pytest.raises(ValueError, lambda: bool(obj == 0)) + pytest.raises(ValueError, lambda: bool(obj == 1)) + pytest.raises(ValueError, lambda: bool(obj)) + + obj = self._construct(shape=4, value=1) + pytest.raises(ValueError, lambda: bool(obj == 0)) + pytest.raises(ValueError, lambda: bool(obj == 1)) + pytest.raises(ValueError, lambda: bool(obj)) + + obj = self._construct(shape=4, value=np.nan) + pytest.raises(ValueError, lambda: bool(obj == 0)) + pytest.raises(ValueError, lambda: bool(obj == 1)) + pytest.raises(ValueError, lambda: bool(obj)) + + # empty + obj = self._construct(shape=0) + pytest.raises(ValueError, lambda: bool(obj)) + + # invalid behaviors + + obj1 = self._construct(shape=4, value=1) + obj2 = self._construct(shape=4, value=1) + + def f(): + if obj1: + printing.pprint_thing("this works and shouldn't") + + pytest.raises(ValueError, f) + pytest.raises(ValueError, lambda: obj1 and obj2) + pytest.raises(ValueError, lambda: obj1 or obj2) + pytest.raises(ValueError, lambda: not obj1) + + def test_downcast(self): + # test close downcasting + + o = self._construct(shape=4, value=9, dtype=np.int64) + result = o.copy() + result._data = o._data.downcast(dtypes='infer') + self._compare(result, o) + + o = self._construct(shape=4, value=9.) + expected = o.astype(np.int64) + result = o.copy() + result._data = o._data.downcast(dtypes='infer') + self._compare(result, expected) + + o = self._construct(shape=4, value=9.5) + result = o.copy() + result._data = o._data.downcast(dtypes='infer') + self._compare(result, o) + + # are close + o = self._construct(shape=4, value=9.000000000005) + result = o.copy() + result._data = o._data.downcast(dtypes='infer') + expected = o.astype(np.int64) + self._compare(result, expected) + + def test_constructor_compound_dtypes(self): + # GH 5191 + # compound dtypes should raise not-implementederror + + def f(dtype): + return self._construct(shape=3, dtype=dtype) + + pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"), + ("B", "str"), + ("C", "int32")]) + + # these work (though results may be unexpected) + f('int64') + f('float64') + f('M8[ns]') + + def check_metadata(self, x, y=None): + for m in x._metadata: + v = getattr(x, m, None) + if y is None: + assert v is None + else: + assert v == getattr(y, m, None) + + def test_metadata_propagation(self): + # check that the metadata matches up on the resulting ops + + o = self._construct(shape=3) + o.name = 'foo' + o2 = self._construct(shape=3) + o2.name = 'bar' + + # TODO + # Once panel can do non-trivial combine operations + # (currently there is an a raise in the Panel arith_ops to prevent + # this, though it actually does work) + # can remove all of these try: except: blocks on the actual operations + + # ---------- + # preserving + # ---------- + + # simple ops with scalars + for op in ['__add__', '__sub__', '__truediv__', '__mul__']: + result = getattr(o, op)(1) + self.check_metadata(o, result) + + # ops with like + for op in ['__add__', '__sub__', '__truediv__', '__mul__']: + try: + result = getattr(o, op)(o) + self.check_metadata(o, result) + except (ValueError, AttributeError): + pass + + # simple boolean + for op in ['__eq__', '__le__', '__ge__']: + v1 = getattr(o, op)(o) + self.check_metadata(o, v1) + + try: + self.check_metadata(o, v1 & v1) + except (ValueError): + pass + + try: + self.check_metadata(o, v1 | v1) + except (ValueError): + pass + + # combine_first + try: + result = o.combine_first(o2) + self.check_metadata(o, result) + except (AttributeError): + pass + + # --------------------------- + # non-preserving (by default) + # --------------------------- + + # add non-like + try: + result = o + o2 + self.check_metadata(result) + except (ValueError, AttributeError): + pass + + # simple boolean + for op in ['__eq__', '__le__', '__ge__']: + + # this is a name matching op + v1 = getattr(o, op)(o) + + v2 = getattr(o, op)(o2) + self.check_metadata(v2) + + try: + self.check_metadata(v1 & v2) + except (ValueError): + pass + + try: + self.check_metadata(v1 | v2) + except (ValueError): + pass + + def test_head_tail(self): + # GH5370 + + o = self._construct(shape=10) + + # check all index types + for index in [tm.makeFloatIndex, tm.makeIntIndex, tm.makeStringIndex, + tm.makeUnicodeIndex, tm.makeDateIndex, + tm.makePeriodIndex]: + axis = o._get_axis_name(0) + setattr(o, axis, index(len(getattr(o, axis)))) + + # Panel + dims + try: + o.head() + except (NotImplementedError): + pytest.skip('not implemented on {0}'.format( + o.__class__.__name__)) + + self._compare(o.head(), o.iloc[:5]) + self._compare(o.tail(), o.iloc[-5:]) + + # 0-len + self._compare(o.head(0), o.iloc[0:0]) + self._compare(o.tail(0), o.iloc[0:0]) + + # bounded + self._compare(o.head(len(o) + 1), o) + self._compare(o.tail(len(o) + 1), o) + + # neg index + self._compare(o.head(-3), o.head(7)) + self._compare(o.tail(-3), o.tail(7)) + + def test_sample(self): + # Fixes issue: 2419 + + o = self._construct(shape=10) + + ### + # Check behavior of random_state argument + ### + + # Check for stability when receives seed or random state -- run 10 + # times. + for test in range(10): + seed = np.random.randint(0, 100) + self._compare( + o.sample(n=4, random_state=seed), o.sample(n=4, + random_state=seed)) + self._compare( + o.sample(frac=0.7, random_state=seed), o.sample( + frac=0.7, random_state=seed)) + + self._compare( + o.sample(n=4, random_state=np.random.RandomState(test)), + o.sample(n=4, random_state=np.random.RandomState(test))) + + self._compare( + o.sample(frac=0.7, random_state=np.random.RandomState(test)), + o.sample(frac=0.7, random_state=np.random.RandomState(test))) + + os1, os2 = [], [] + for _ in range(2): + np.random.seed(test) + os1.append(o.sample(n=4)) + os2.append(o.sample(frac=0.7)) + self._compare(*os1) + self._compare(*os2) + + # Check for error when random_state argument invalid. + with pytest.raises(ValueError): + o.sample(random_state='astring!') + + ### + # Check behavior of `frac` and `N` + ### + + # Giving both frac and N throws error + with pytest.raises(ValueError): + o.sample(n=3, frac=0.3) + + # Check that raises right error for negative lengths + with pytest.raises(ValueError): + o.sample(n=-3) + with pytest.raises(ValueError): + o.sample(frac=-0.3) + + # Make sure float values of `n` give error + with pytest.raises(ValueError): + o.sample(n=3.2) + + # Check lengths are right + assert len(o.sample(n=4) == 4) + assert len(o.sample(frac=0.34) == 3) + assert len(o.sample(frac=0.36) == 4) + + ### + # Check weights + ### + + # Weight length must be right + with pytest.raises(ValueError): + o.sample(n=3, weights=[0, 1]) + + with pytest.raises(ValueError): + bad_weights = [0.5] * 11 + o.sample(n=3, weights=bad_weights) + + with pytest.raises(ValueError): + bad_weight_series = Series([0, 0, 0.2]) + o.sample(n=4, weights=bad_weight_series) + + # Check won't accept negative weights + with pytest.raises(ValueError): + bad_weights = [-0.1] * 10 + o.sample(n=3, weights=bad_weights) + + # Check inf and -inf throw errors: + with pytest.raises(ValueError): + weights_with_inf = [0.1] * 10 + weights_with_inf[0] = np.inf + o.sample(n=3, weights=weights_with_inf) + + with pytest.raises(ValueError): + weights_with_ninf = [0.1] * 10 + weights_with_ninf[0] = -np.inf + o.sample(n=3, weights=weights_with_ninf) + + # All zeros raises errors + zero_weights = [0] * 10 + with pytest.raises(ValueError): + o.sample(n=3, weights=zero_weights) + + # All missing weights + nan_weights = [np.nan] * 10 + with pytest.raises(ValueError): + o.sample(n=3, weights=nan_weights) + + # Check np.nan are replaced by zeros. + weights_with_nan = [np.nan] * 10 + weights_with_nan[5] = 0.5 + self._compare( + o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6]) + + # Check None are also replaced by zeros. + weights_with_None = [None] * 10 + weights_with_None[5] = 0.5 + self._compare( + o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6]) + + def test_size_compat(self): + # GH8846 + # size property should be defined + + o = self._construct(shape=10) + assert o.size == np.prod(o.shape) + assert o.size == 10 ** len(o.axes) + + def test_split_compat(self): + # xref GH8846 + o = self._construct(shape=10) + assert len(np.array_split(o, 5)) == 5 + assert len(np.array_split(o, 2)) == 2 + + def test_unexpected_keyword(self): # GH8597 + df = DataFrame(np.random.randn(5, 2), columns=['jim', 'joe']) + ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) + ts = df['joe'].copy() + ts[2] = np.nan + + with assert_raises_regex(TypeError, 'unexpected keyword'): + df.drop('joe', axis=1, in_place=True) + + with assert_raises_regex(TypeError, 'unexpected keyword'): + df.reindex([1, 0], inplace=True) + + with assert_raises_regex(TypeError, 'unexpected keyword'): + ca.fillna(0, inplace=True) + + with assert_raises_regex(TypeError, 'unexpected keyword'): + ts.fillna(0, in_place=True) + + # See gh-12301 + def test_stat_unexpected_keyword(self): + obj = self._construct(5) + starwars = 'Star Wars' + errmsg = 'unexpected keyword' + + with assert_raises_regex(TypeError, errmsg): + obj.max(epic=starwars) # stat_function + with assert_raises_regex(TypeError, errmsg): + obj.var(epic=starwars) # stat_function_ddof + with assert_raises_regex(TypeError, errmsg): + obj.sum(epic=starwars) # cum_function + with assert_raises_regex(TypeError, errmsg): + obj.any(epic=starwars) # logical_function + + def test_api_compat(self): + + # GH 12021 + # compat for __name__, __qualname__ + + obj = self._construct(5) + for func in ['sum', 'cumsum', 'any', 'var']: + f = getattr(obj, func) + assert f.__name__ == func + if PY3: + assert f.__qualname__.endswith(func) + + def test_stat_non_defaults_args(self): + obj = self._construct(5) + out = np.array([0]) + errmsg = "the 'out' parameter is not supported" + + with assert_raises_regex(ValueError, errmsg): + obj.max(out=out) # stat_function + with assert_raises_regex(ValueError, errmsg): + obj.var(out=out) # stat_function_ddof + with assert_raises_regex(ValueError, errmsg): + obj.sum(out=out) # cum_function + with assert_raises_regex(ValueError, errmsg): + obj.any(out=out) # logical_function + + def test_truncate_out_of_bounds(self): + # GH11382 + + # small + shape = [int(2e3)] + ([1] * (self._ndim - 1)) + small = self._construct(shape, dtype='int8') + self._compare(small.truncate(), small) + self._compare(small.truncate(before=0, after=3e3), small) + self._compare(small.truncate(before=-1, after=2e3), small) + + # big + shape = [int(2e6)] + ([1] * (self._ndim - 1)) + big = self._construct(shape, dtype='int8') + self._compare(big.truncate(), big) + self._compare(big.truncate(before=0, after=3e6), big) + self._compare(big.truncate(before=-1, after=2e6), big) + + def test_validate_bool_args(self): + df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) + invalid_values = [1, "True", [1, 2, 3], 5.0] + + for value in invalid_values: + with pytest.raises(ValueError): + super(DataFrame, df).rename_axis(mapper={'a': 'x', 'b': 'y'}, + axis=1, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).drop('a', axis=1, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).sort_index(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df)._consolidate(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).fillna(value=0, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).replace(to_replace=1, value=7, + inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).interpolate(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df)._where(cond=df.a > 2, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).mask(cond=df.a > 2, inplace=value) + + def test_copy_and_deepcopy(self): + # GH 15444 + for shape in [0, 1, 2]: + obj = self._construct(shape) + for func in [copy, + deepcopy, + lambda x: x.copy(deep=False), + lambda x: x.copy(deep=True)]: + obj_copy = func(obj) + assert obj_copy is not obj + self._compare(obj_copy, obj) + + @pytest.mark.parametrize("periods,fill_method,limit,exp", [ + (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), + (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), + (1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]), + (1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]), + (-1, "ffill", None, [np.nan, np.nan, -.5, -.5, -.6, 0, 0, np.nan]), + (-1, "ffill", 1, [np.nan, np.nan, -.5, -.5, -.6, 0, np.nan, np.nan]), + (-1, "bfill", None, [0, 0, -.5, -.5, -.6, np.nan, np.nan, np.nan]), + (-1, "bfill", 1, [np.nan, 0, -.5, -.5, -.6, np.nan, np.nan, np.nan]) + ]) + def test_pct_change(self, periods, fill_method, limit, exp): + vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] + obj = self._typ(vals) + func = getattr(obj, 'pct_change') + res = func(periods=periods, fill_method=fill_method, limit=limit) + if type(obj) is DataFrame: + tm.assert_frame_equal(res, DataFrame(exp)) + else: + tm.assert_series_equal(res, Series(exp)) + + +class TestNDFrame(object): + # tests that don't fit elsewhere + + def test_sample(sel): + # Fixes issue: 2419 + # additional specific object based tests + + # A few dataframe test with degenerate weights. + easy_weight_list = [0] * 10 + easy_weight_list[5] = 1 + + df = pd.DataFrame({'col1': range(10, 20), + 'col2': range(20, 30), + 'colString': ['a'] * 10, + 'easyweights': easy_weight_list}) + sample1 = df.sample(n=1, weights='easyweights') + assert_frame_equal(sample1, df.iloc[5:6]) + + # Ensure proper error if string given as weight for Series, panel, or + # DataFrame with axis = 1. + s = Series(range(10)) + with pytest.raises(ValueError): + s.sample(n=3, weights='weight_column') + + with catch_warnings(record=True): + panel = Panel(items=[0, 1, 2], major_axis=[2, 3, 4], + minor_axis=[3, 4, 5]) + with pytest.raises(ValueError): + panel.sample(n=1, weights='weight_column') + + with pytest.raises(ValueError): + df.sample(n=1, weights='weight_column', axis=1) + + # Check weighting key error + with pytest.raises(KeyError): + df.sample(n=3, weights='not_a_real_column_name') + + # Check that re-normalizes weights that don't sum to one. + weights_less_than_1 = [0] * 10 + weights_less_than_1[0] = 0.5 + tm.assert_frame_equal( + df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) + + ### + # Test axis argument + ### + + # Test axis argument + df = pd.DataFrame({'col1': range(10), 'col2': ['a'] * 10}) + second_column_weight = [0, 1] + assert_frame_equal( + df.sample(n=1, axis=1, weights=second_column_weight), df[['col2']]) + + # Different axis arg types + assert_frame_equal(df.sample(n=1, axis='columns', + weights=second_column_weight), + df[['col2']]) + + weight = [0] * 10 + weight[5] = 0.5 + assert_frame_equal(df.sample(n=1, axis='rows', weights=weight), + df.iloc[5:6]) + assert_frame_equal(df.sample(n=1, axis='index', weights=weight), + df.iloc[5:6]) + + # Check out of range axis values + with pytest.raises(ValueError): + df.sample(n=1, axis=2) + + with pytest.raises(ValueError): + df.sample(n=1, axis='not_a_name') + + with pytest.raises(ValueError): + s = pd.Series(range(10)) + s.sample(n=1, axis=1) + + # Test weight length compared to correct axis + with pytest.raises(ValueError): + df.sample(n=1, axis=1, weights=[0.5] * 10) + + # Check weights with axis = 1 + easy_weight_list = [0] * 3 + easy_weight_list[2] = 1 + + df = pd.DataFrame({'col1': range(10, 20), + 'col2': range(20, 30), + 'colString': ['a'] * 10}) + sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) + assert_frame_equal(sample1, df[['colString']]) + + # Test default axes + with catch_warnings(record=True): + p = Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6], + minor_axis=[1, 3, 5]) + assert_panel_equal( + p.sample(n=3, random_state=42), p.sample(n=3, axis=1, + random_state=42)) + assert_frame_equal( + df.sample(n=3, random_state=42), df.sample(n=3, axis=0, + random_state=42)) + + # Test that function aligns weights with frame + df = DataFrame( + {'col1': [5, 6, 7], + 'col2': ['a', 'b', 'c'], }, index=[9, 5, 3]) + s = Series([1, 0, 0], index=[3, 5, 9]) + assert_frame_equal(df.loc[[3]], df.sample(1, weights=s)) + + # Weights have index values to be dropped because not in + # sampled DataFrame + s2 = Series([0.001, 0, 10000], index=[3, 5, 10]) + assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2)) + + # Weights have empty values to be filed with zeros + s3 = Series([0.01, 0], index=[3, 5]) + assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3)) + + # No overlap in weight and sampled DataFrame indices + s4 = Series([1, 0], index=[1, 2]) + with pytest.raises(ValueError): + df.sample(1, weights=s4) + + def test_squeeze(self): + # noop + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + tm.assert_series_equal(s.squeeze(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.squeeze(), df) + with catch_warnings(record=True): + for p in [tm.makePanel()]: + tm.assert_panel_equal(p.squeeze(), p) + + # squeezing + df = tm.makeTimeDataFrame().reindex(columns=['A']) + tm.assert_series_equal(df.squeeze(), df['A']) + + with catch_warnings(record=True): + p = tm.makePanel().reindex(items=['ItemA']) + tm.assert_frame_equal(p.squeeze(), p['ItemA']) + + p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) + tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) + + # don't fail with 0 length dimensions GH11229 & GH8999 + empty_series = Series([], name='five') + empty_frame = DataFrame([empty_series]) + with catch_warnings(record=True): + empty_panel = Panel({'six': empty_frame}) + + [tm.assert_series_equal(empty_series, higher_dim.squeeze()) + for higher_dim in [empty_series, empty_frame, empty_panel]] + + # axis argument + df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] + assert df.shape == (1, 1) + tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) + tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) + assert df.squeeze() == df.iloc[0, 0] + pytest.raises(ValueError, df.squeeze, axis=2) + pytest.raises(ValueError, df.squeeze, axis='x') + + df = tm.makeTimeDataFrame(3) + tm.assert_frame_equal(df.squeeze(axis=0), df) + + def test_numpy_squeeze(self): + s = tm.makeFloatSeries() + tm.assert_series_equal(np.squeeze(s), s) + + df = tm.makeTimeDataFrame().reindex(columns=['A']) + tm.assert_series_equal(np.squeeze(df), df['A']) + + def test_transpose(self): + msg = (r"transpose\(\) got multiple values for " + r"keyword argument 'axes'") + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + # calls implementation in pandas/core/base.py + tm.assert_series_equal(s.transpose(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.transpose().transpose(), df) + + with catch_warnings(record=True): + for p in [tm.makePanel()]: + tm.assert_panel_equal(p.transpose(2, 0, 1) + .transpose(1, 2, 0), p) + tm.assert_raises_regex(TypeError, msg, p.transpose, + 2, 0, 1, axes=(2, 0, 1)) + + def test_numpy_transpose(self): + msg = "the 'axes' parameter is not supported" + + s = tm.makeFloatSeries() + tm.assert_series_equal( + np.transpose(s), s) + tm.assert_raises_regex(ValueError, msg, + np.transpose, s, axes=1) + + df = tm.makeTimeDataFrame() + tm.assert_frame_equal(np.transpose( + np.transpose(df)), df) + tm.assert_raises_regex(ValueError, msg, + np.transpose, df, axes=1) + + with catch_warnings(record=True): + p = tm.makePanel() + tm.assert_panel_equal(np.transpose( + np.transpose(p, axes=(2, 0, 1)), + axes=(1, 2, 0)), p) + + def test_take(self): + indices = [1, 5, -2, 6, 3, -1] + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + out = s.take(indices) + expected = Series(data=s.values.take(indices), + index=s.index.take(indices), dtype=s.dtype) + tm.assert_series_equal(out, expected) + for df in [tm.makeTimeDataFrame()]: + out = df.take(indices) + expected = DataFrame(data=df.values.take(indices, axis=0), + index=df.index.take(indices), + columns=df.columns) + tm.assert_frame_equal(out, expected) + + indices = [-3, 2, 0, 1] + with catch_warnings(record=True): + for p in [tm.makePanel()]: + out = p.take(indices) + expected = Panel(data=p.values.take(indices, axis=0), + items=p.items.take(indices), + major_axis=p.major_axis, + minor_axis=p.minor_axis) + tm.assert_panel_equal(out, expected) + + def test_take_invalid_kwargs(self): + indices = [-3, 2, 0, 1] + s = tm.makeFloatSeries() + df = tm.makeTimeDataFrame() + + with catch_warnings(record=True): + p = tm.makePanel() + + for obj in (s, df, p): + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, obj.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, obj.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, obj.take, + indices, mode='clip') + + def test_equals(self): + s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) + s2 = s1.copy() + assert s1.equals(s2) + + s1[1] = 99 + assert not s1.equals(s2) + + # NaNs compare as equal + s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) + s2 = s1.copy() + assert s1.equals(s2) + + s2[0] = 9.9 + assert not s1.equals(s2) + + idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) + s1 = Series([1, 2, np.nan], index=idx) + s2 = s1.copy() + assert s1.equals(s2) + + # Add object dtype column with nans + index = np.random.random(10) + df1 = DataFrame( + np.random.random(10, ), index=index, columns=['floats']) + df1['text'] = 'the sky is so blue. we could use more chocolate.'.split( + ) + df1['start'] = date_range('2000-1-1', periods=10, freq='T') + df1['end'] = date_range('2000-1-1', periods=10, freq='D') + df1['diff'] = df1['end'] - df1['start'] + df1['bool'] = (np.arange(10) % 3 == 0) + df1.loc[::2] = np.nan + df2 = df1.copy() + assert df1['text'].equals(df2['text']) + assert df1['start'].equals(df2['start']) + assert df1['end'].equals(df2['end']) + assert df1['diff'].equals(df2['diff']) + assert df1['bool'].equals(df2['bool']) + assert df1.equals(df2) + assert not df1.equals(object) + + # different dtype + different = df1.copy() + different['floats'] = different['floats'].astype('float32') + assert not df1.equals(different) + + # different index + different_index = -index + different = df2.set_index(different_index) + assert not df1.equals(different) + + # different columns + different = df2.copy() + different.columns = df2.columns[::-1] + assert not df1.equals(different) + + # DatetimeIndex + index = pd.date_range('2000-1-1', periods=10, freq='T') + df1 = df1.set_index(index) + df2 = df1.copy() + assert df1.equals(df2) + + # MultiIndex + df3 = df1.set_index(['text'], append=True) + df2 = df1.set_index(['text'], append=True) + assert df3.equals(df2) + + df2 = df1.set_index(['floats'], append=True) + assert not df3.equals(df2) + + # NaN in index + df3 = df1.set_index(['floats'], append=True) + df2 = df1.set_index(['floats'], append=True) + assert df3.equals(df2) + + # GH 8437 + a = pd.Series([False, np.nan]) + b = pd.Series([False, np.nan]) + c = pd.Series(index=range(2)) + d = pd.Series(index=range(2)) + e = pd.Series(index=range(2)) + f = pd.Series(index=range(2)) + c[:-1] = d[:-1] = e[0] = f[0] = False + assert a.equals(a) + assert a.equals(b) + assert a.equals(c) + assert a.equals(d) + assert a.equals(e) + assert e.equals(f) + + def test_describe_raises(self): + with catch_warnings(record=True): + with pytest.raises(NotImplementedError): + tm.makePanel().describe() + + def test_pipe(self): + df = DataFrame({'A': [1, 2, 3]}) + f = lambda x, y: x ** y + result = df.pipe(f, 2) + expected = DataFrame({'A': [1, 4, 9]}) + assert_frame_equal(result, expected) + + result = df.A.pipe(f, 2) + assert_series_equal(result, expected.A) + + def test_pipe_tuple(self): + df = DataFrame({'A': [1, 2, 3]}) + f = lambda x, y: y + result = df.pipe((f, 'y'), 0) + assert_frame_equal(result, df) + + result = df.A.pipe((f, 'y'), 0) + assert_series_equal(result, df.A) + + def test_pipe_tuple_error(self): + df = DataFrame({"A": [1, 2, 3]}) + f = lambda x, y: y + with pytest.raises(ValueError): + df.pipe((f, 'y'), x=1, y=0) + + with pytest.raises(ValueError): + df.A.pipe((f, 'y'), x=1, y=0) + + def test_pipe_panel(self): + with catch_warnings(record=True): + wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})}) + f = lambda x, y: x + y + result = wp.pipe(f, 2) + expected = wp + 2 + assert_panel_equal(result, expected) + + result = wp.pipe((f, 'y'), x=1) + expected = wp + 1 + assert_panel_equal(result, expected) + + with pytest.raises(ValueError): + result = wp.pipe((f, 'y'), x=1, y=1) diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py new file mode 100644 index 0000000000000..8b133e654a869 --- /dev/null +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -0,0 +1,430 @@ +import pytest +import pandas as pd +import pandas.util.testing as tm +from pandas.core.dtypes.missing import array_equivalent + + +# Fixtures +# ======== +@pytest.fixture +def df(): + """DataFrame with columns 'L1', 'L2', and 'L3' """ + return pd.DataFrame({'L1': [1, 2, 3], + 'L2': [11, 12, 13], + 'L3': ['A', 'B', 'C']}) + + +@pytest.fixture(params=[[], ['L1'], ['L1', 'L2'], ['L1', 'L2', 'L3']]) +def df_levels(request, df): + """DataFrame with columns or index levels 'L1', 'L2', and 'L3' """ + levels = request.param + + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture +def df_ambig(df): + """DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3' """ + df = df.set_index(['L1', 'L2']) + + df['L1'] = df['L3'] + + return df + + +@pytest.fixture +def df_duplabels(df): + """DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2' """ + df = df.set_index(['L1']) + df = pd.concat([df, df['L2']], axis=1) + + return df + + +@pytest.fixture +def panel(): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + return pd.Panel() + + +# Test is label/level reference +# ============================= +def get_labels_levels(df_levels): + expected_labels = list(df_levels.columns) + expected_levels = [name for name in df_levels.index.names + if name is not None] + return expected_labels, expected_levels + + +def assert_label_reference(frame, labels, axis): + for label in labels: + assert frame._is_label_reference(label, axis=axis) + assert not frame._is_level_reference(label, axis=axis) + assert frame._is_label_or_level_reference(label, axis=axis) + + +def assert_level_reference(frame, levels, axis): + for level in levels: + assert frame._is_level_reference(level, axis=axis) + assert not frame._is_label_reference(level, axis=axis) + assert frame._is_label_or_level_reference(level, axis=axis) + + +# DataFrame +# --------- +@pytest.mark.parametrize('axis', [0, 1]) +def test_is_level_or_label_reference_df_simple(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_level_reference(df_levels, expected_levels, axis=axis) + assert_label_reference(df_levels, expected_labels, axis=axis) + + +@pytest.mark.parametrize('axis', [0, 1]) +def test_is_level_reference_df_ambig(df_ambig, axis): + + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + + # df has both an on-axis level and off-axis label named L1 + # Therefore L1 should reference the label, not the level + assert_label_reference(df_ambig, ['L1'], axis=axis) + + # df has an on-axis level named L2 and it is not ambiguous + # Therefore L2 is an level reference + assert_level_reference(df_ambig, ['L2'], axis=axis) + + # df has a column named L3 and it not an level reference + assert_label_reference(df_ambig, ['L3'], axis=axis) + + +# Series +# ------ +def test_is_level_reference_series_simple_axis0(df): + + # Make series with L1 as index + s = df.set_index('L1').L2 + assert_level_reference(s, ['L1'], axis=0) + assert not s._is_level_reference('L2') + + # Make series with L1 and L2 as index + s = df.set_index(['L1', 'L2']).L3 + assert_level_reference(s, ['L1', 'L2'], axis=0) + assert not s._is_level_reference('L3') + + +def test_is_level_reference_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index('L1').L2 + + with tm.assert_raises_regex(ValueError, "No axis named 1"): + s._is_level_reference('L1', axis=1) + + +# Panel +# ----- +def test_is_level_reference_panel_error(panel): + msg = ("_is_level_reference is not implemented for {type}" + .format(type=type(panel))) + + with tm.assert_raises_regex(NotImplementedError, msg): + panel._is_level_reference('L1', axis=0) + + +def test_is_label_reference_panel_error(panel): + msg = ("_is_label_reference is not implemented for {type}" + .format(type=type(panel))) + + with tm.assert_raises_regex(NotImplementedError, msg): + panel._is_label_reference('L1', axis=0) + + +def test_is_label_or_level_reference_panel_error(panel): + msg = ("_is_label_or_level_reference is not implemented for {type}" + .format(type=type(panel))) + + with tm.assert_raises_regex(NotImplementedError, msg): + panel._is_label_or_level_reference('L1', axis=0) + + +# Test _check_label_or_level_ambiguity_df +# ======================================= + +# DataFrame +# --------- +@pytest.mark.parametrize('axis', [0, 1]) +def test_check_label_or_level_ambiguity_df(df_ambig, axis): + + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + + # df_ambig has both an on-axis level and off-axis label named L1 + # Therefore L1 is ambiguous + with tm.assert_produces_warning(FutureWarning, + clear=True) as w: + + assert df_ambig._check_label_or_level_ambiguity('L1', axis=axis) + warning_msg = w[0].message.args[0] + if axis == 0: + assert warning_msg.startswith("'L1' is both an index level " + "and a column label") + else: + assert warning_msg.startswith("'L1' is both a column level " + "and an index label") + + # df_ambig has an on-axis level named L2 and it is not ambiguous + # No warning should be raised + with tm.assert_produces_warning(None): + assert not df_ambig._check_label_or_level_ambiguity('L2', axis=axis) + + # df_ambig has an off-axis label named L3 and it is not ambiguous + with tm.assert_produces_warning(None): + assert not df_ambig._is_level_reference('L3', axis=axis) + + +# Series +# ------ +def test_check_label_or_level_ambiguity_series(df): + + # A series has no columns and therefore references are never ambiguous + + # Make series with L1 as index + s = df.set_index('L1').L2 + with tm.assert_produces_warning(None): + assert not s._check_label_or_level_ambiguity('L1', axis=0) + assert not s._check_label_or_level_ambiguity('L2', axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(['L1', 'L2']).L3 + with tm.assert_produces_warning(None): + assert not s._check_label_or_level_ambiguity('L1', axis=0) + assert not s._check_label_or_level_ambiguity('L2', axis=0) + assert not s._check_label_or_level_ambiguity('L3', axis=0) + + +def test_check_label_or_level_ambiguity_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index('L1').L2 + + with tm.assert_raises_regex(ValueError, "No axis named 1"): + s._check_label_or_level_ambiguity('L1', axis=1) + + +# Panel +# ----- +def test_check_label_or_level_ambiguity_panel_error(panel): + msg = ("_check_label_or_level_ambiguity is not implemented for {type}" + .format(type=type(panel))) + + with tm.assert_raises_regex(NotImplementedError, msg): + panel._check_label_or_level_ambiguity('L1', axis=0) + + +# Test _get_label_or_level_values +# =============================== +def assert_label_values(frame, labels, axis): + for label in labels: + if axis == 0: + expected = frame[label]._values + else: + expected = frame.loc[label]._values + + result = frame._get_label_or_level_values(label, axis=axis, + stacklevel=2) + assert array_equivalent(expected, result) + + +def assert_level_values(frame, levels, axis): + for level in levels: + if axis == 0: + expected = frame.index.get_level_values(level=level)._values + else: + expected = (frame.columns + .get_level_values(level=level) + ._values) + + result = frame._get_label_or_level_values(level, axis=axis) + assert array_equivalent(expected, result) + + +# DataFrame +# --------- +@pytest.mark.parametrize('axis', [0, 1]) +def test_get_label_or_level_values_df_simple(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_label_values(df_levels, expected_labels, axis=axis) + assert_level_values(df_levels, expected_levels, axis=axis) + + +@pytest.mark.parametrize('axis', [0, 1]) +def test_get_label_or_level_values_df_ambig(df_ambig, axis): + + # Transpose frame if axis == 1 + if axis == 1: + df_ambig = df_ambig.T + + # df has both an on-axis level and off-axis label named L1 + # Therefore L1 is ambiguous but will default to label + with tm.assert_produces_warning(FutureWarning): + assert_label_values(df_ambig, ['L1'], axis=axis) + + # df has an on-axis level named L2 and it is not ambiguous + with tm.assert_produces_warning(None): + assert_level_values(df_ambig, ['L2'], axis=axis) + + # df has an off-axis label named L3 and it is not ambiguous + with tm.assert_produces_warning(None): + assert_label_values(df_ambig, ['L3'], axis=axis) + + +@pytest.mark.parametrize('axis', [0, 1]) +def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): + + # Transpose frame if axis == 1 + if axis == 1: + df_duplabels = df_duplabels.T + + # df has unambiguous level 'L1' + assert_level_values(df_duplabels, ['L1'], axis=axis) + + # df has unique label 'L3' + assert_label_values(df_duplabels, ['L3'], axis=axis) + + # df has duplicate labels 'L2' + if axis == 0: + expected_msg = "The column label 'L2' is not unique" + else: + expected_msg = "The index label 'L2' is not unique" + + with tm.assert_raises_regex(ValueError, expected_msg): + assert_label_values(df_duplabels, ['L2'], axis=axis) + + +# Series +# ------ +def test_get_label_or_level_values_series_axis0(df): + + # Make series with L1 as index + s = df.set_index('L1').L2 + assert_level_values(s, ['L1'], axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(['L1', 'L2']).L3 + assert_level_values(s, ['L1', 'L2'], axis=0) + + +def test_get_label_or_level_values_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index('L1').L2 + + with tm.assert_raises_regex(ValueError, "No axis named 1"): + s._get_label_or_level_values('L1', axis=1) + + +# Panel +# ----- +def test_get_label_or_level_values_panel_error(panel): + msg = ("_get_label_or_level_values is not implemented for {type}" + .format(type=type(panel))) + + with tm.assert_raises_regex(NotImplementedError, msg): + panel._get_label_or_level_values('L1', axis=0) + + +# Test _drop_labels_or_levels +# =========================== +def assert_labels_dropped(frame, labels, axis): + for label in labels: + df_dropped = frame._drop_labels_or_levels(label, axis=axis) + + if axis == 0: + assert label in frame.columns + assert label not in df_dropped.columns + else: + assert label in frame.index + assert label not in df_dropped.index + + +def assert_levels_dropped(frame, levels, axis): + for level in levels: + df_dropped = frame._drop_labels_or_levels(level, axis=axis) + + if axis == 0: + assert level in frame.index.names + assert level not in df_dropped.index.names + else: + assert level in frame.columns.names + assert level not in df_dropped.columns.names + + +# DataFrame +# --------- +@pytest.mark.parametrize('axis', [0, 1]) +def test_drop_labels_or_levels_df(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + # Transpose frame if axis == 1 + if axis == 1: + df_levels = df_levels.T + + # Perform checks + assert_labels_dropped(df_levels, expected_labels, axis=axis) + assert_levels_dropped(df_levels, expected_levels, axis=axis) + + with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + df_levels._drop_labels_or_levels('L4', axis=axis) + + +# Series +# ------ +def test_drop_labels_or_levels_series(df): + + # Make series with L1 as index + s = df.set_index('L1').L2 + assert_levels_dropped(s, ['L1'], axis=0) + + with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + s._drop_labels_or_levels('L4', axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(['L1', 'L2']).L3 + assert_levels_dropped(s, ['L1', 'L2'], axis=0) + + with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + s._drop_labels_or_levels('L4', axis=0) + + +# Panel +# ----- +def test_drop_labels_or_levels_panel_error(panel): + msg = ("_drop_labels_or_levels is not implemented for {type}" + .format(type=type(panel))) + + with tm.assert_raises_regex(NotImplementedError, msg): + panel._drop_labels_or_levels('L1', axis=0) diff --git a/pandas/tests/generic/test_panel.py b/pandas/tests/generic/test_panel.py new file mode 100644 index 0000000000000..49cb773a1bd10 --- /dev/null +++ b/pandas/tests/generic/test_panel.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=E1101,W0612 + +from warnings import catch_warnings + +from pandas import Panel +from pandas.util.testing import (assert_panel_equal, + assert_almost_equal) + +import pandas.util.testing as tm +import pandas.util._test_decorators as td +from .test_generic import Generic + + +class TestPanel(Generic): + _typ = Panel + _comparator = lambda self, x, y: assert_panel_equal(x, y, by_blocks=True) + + @td.skip_if_no('xarray', min_version='0.7.0') + def test_to_xarray(self): + from xarray import DataArray + + with catch_warnings(record=True): + p = tm.makePanel() + + result = p.to_xarray() + assert isinstance(result, DataArray) + assert len(result.coords) == 3 + assert_almost_equal(list(result.coords.keys()), + ['items', 'major_axis', 'minor_axis']) + assert len(result.dims) == 3 + + # idempotency + assert_panel_equal(result.to_pandas(), p) + + +# run all the tests, but wrap each in a warning catcher +for t in ['test_rename', 'test_get_numeric_data', + 'test_get_default', 'test_nonzero', + 'test_downcast', 'test_constructor_compound_dtypes', + 'test_head_tail', + 'test_size_compat', 'test_split_compat', + 'test_unexpected_keyword', + 'test_stat_unexpected_keyword', 'test_api_compat', + 'test_stat_non_defaults_args', + 'test_truncate_out_of_bounds', + 'test_metadata_propagation', 'test_copy_and_deepcopy', + 'test_pct_change', 'test_sample']: + + def f(): + def tester(self): + f = getattr(super(TestPanel, self), t) + with catch_warnings(record=True): + f() + return tester + + setattr(TestPanel, t, f()) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py new file mode 100644 index 0000000000000..3393d7704e411 --- /dev/null +++ b/pandas/tests/generic/test_series.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=E1101,W0612 + +from operator import methodcaller + +import pytest +import numpy as np +import pandas as pd + +from distutils.version import LooseVersion +from pandas import Series, date_range, MultiIndex + +from pandas.compat import range +from pandas.util.testing import (assert_series_equal, + assert_almost_equal) + +import pandas.util.testing as tm +import pandas.util._test_decorators as td +from .test_generic import Generic + +try: + import xarray + _XARRAY_INSTALLED = True +except ImportError: + _XARRAY_INSTALLED = False + + +class TestSeries(Generic): + _typ = Series + _comparator = lambda self, x, y: assert_series_equal(x, y) + + def setup_method(self): + self.ts = tm.makeTimeSeries() # Was at top level in test_series + self.ts.name = 'ts' + + self.series = tm.makeStringSeries() + self.series.name = 'series' + + def test_rename_mi(self): + s = Series([11, 21, 31], + index=MultiIndex.from_tuples( + [("A", x) for x in ["a", "B", "c"]])) + s.rename(str.lower) + + def test_set_axis_name(self): + s = Series([1, 2, 3], index=['a', 'b', 'c']) + funcs = ['rename_axis', '_set_axis_name'] + name = 'foo' + for func in funcs: + result = methodcaller(func, name)(s) + assert s.index.name is None + assert result.index.name == name + + def test_set_axis_name_mi(self): + s = Series([11, 21, 31], index=MultiIndex.from_tuples( + [("A", x) for x in ["a", "B", "c"]], + names=['l1', 'l2']) + ) + funcs = ['rename_axis', '_set_axis_name'] + for func in funcs: + result = methodcaller(func, ['L1', 'L2'])(s) + assert s.index.name is None + assert s.index.names == ['l1', 'l2'] + assert result.index.name is None + assert result.index.names, ['L1', 'L2'] + + def test_set_axis_name_raises(self): + s = pd.Series([1]) + with pytest.raises(ValueError): + s._set_axis_name(name='a', axis=1) + + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + o = Series([1, 2, 3]) + result = o._get_numeric_data() + self._compare(result, o) + + o = Series([1, '2', 3.]) + result = o._get_numeric_data() + expected = Series([], dtype=object, index=pd.Index([], dtype=object)) + self._compare(result, expected) + + o = Series([True, False, True]) + result = o._get_numeric_data() + self._compare(result, o) + + o = Series([True, False, True]) + result = o._get_bool_data() + self._compare(result, o) + + o = Series(date_range('20130101', periods=3)) + result = o._get_numeric_data() + expected = Series([], dtype='M8[ns]', index=pd.Index([], dtype=object)) + self._compare(result, expected) + + def test_nonzero_single_element(self): + + # allow single item via bool method + s = Series([True]) + assert s.bool() + + s = Series([False]) + assert not s.bool() + + # single item nan to raise + for s in [Series([np.nan]), Series([pd.NaT]), Series([True]), + Series([False])]: + pytest.raises(ValueError, lambda: bool(s)) + + for s in [Series([np.nan]), Series([pd.NaT])]: + pytest.raises(ValueError, lambda: s.bool()) + + # multiple bool are still an error + for s in [Series([True, True]), Series([False, False])]: + pytest.raises(ValueError, lambda: bool(s)) + pytest.raises(ValueError, lambda: s.bool()) + + # single non-bool are an error + for s in [Series([1]), Series([0]), Series(['a']), Series([0.0])]: + pytest.raises(ValueError, lambda: bool(s)) + pytest.raises(ValueError, lambda: s.bool()) + + def test_metadata_propagation_indiv(self): + # check that the metadata matches up on the resulting ops + + o = Series(range(3), range(3)) + o.name = 'foo' + o2 = Series(range(3), range(3)) + o2.name = 'bar' + + result = o.T + self.check_metadata(o, result) + + # resample + ts = Series(np.random.rand(1000), + index=date_range('20130101', periods=1000, freq='s'), + name='foo') + result = ts.resample('1T').mean() + self.check_metadata(ts, result) + + result = ts.resample('1T').min() + self.check_metadata(ts, result) + + result = ts.resample('1T').apply(lambda x: x.sum()) + self.check_metadata(ts, result) + + _metadata = Series._metadata + _finalize = Series.__finalize__ + Series._metadata = ['name', 'filename'] + o.filename = 'foo' + o2.filename = 'bar' + + def finalize(self, other, method=None, **kwargs): + for name in self._metadata: + if method == 'concat' and name == 'filename': + value = '+'.join([getattr( + o, name) for o in other.objs if getattr(o, name, None) + ]) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, None)) + + return self + + Series.__finalize__ = finalize + + result = pd.concat([o, o2]) + assert result.filename == 'foo+bar' + assert result.name is None + + # reset + Series._metadata = _metadata + Series.__finalize__ = _finalize + + @pytest.mark.skipif(not _XARRAY_INSTALLED or _XARRAY_INSTALLED and + LooseVersion(xarray.__version__) < + LooseVersion('0.10.0'), + reason='xarray >= 0.10.0 required') + @pytest.mark.parametrize( + "index", + ['FloatIndex', 'IntIndex', + 'StringIndex', 'UnicodeIndex', + 'DateIndex', 'PeriodIndex', + 'TimedeltaIndex', 'CategoricalIndex']) + def test_to_xarray_index_types(self, index): + from xarray import DataArray + + index = getattr(tm, 'make{}'.format(index)) + s = Series(range(6), index=index(6)) + s.index.name = 'foo' + result = s.to_xarray() + repr(result) + assert len(result) == 6 + assert len(result.coords) == 1 + assert_almost_equal(list(result.coords.keys()), ['foo']) + assert isinstance(result, DataArray) + + # idempotency + assert_series_equal(result.to_series(), s, + check_index_type=False, + check_categorical=True) + + @td.skip_if_no('xarray', min_version='0.7.0') + def test_to_xarray(self): + from xarray import DataArray + + s = Series([]) + s.index.name = 'foo' + result = s.to_xarray() + assert len(result) == 0 + assert len(result.coords) == 1 + assert_almost_equal(list(result.coords.keys()), ['foo']) + assert isinstance(result, DataArray) + + s = Series(range(6)) + s.index.name = 'foo' + s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)], + names=['one', 'two']) + result = s.to_xarray() + assert len(result) == 2 + assert_almost_equal(list(result.coords.keys()), ['one', 'two']) + assert isinstance(result, DataArray) + assert_series_equal(result.to_series(), s) + + def test_valid_deprecated(self): + # GH18800 + with tm.assert_produces_warning(FutureWarning): + pd.Series([]).valid() diff --git a/pandas/tests/groupby/aggregate/__init__.py b/pandas/tests/groupby/aggregate/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py new file mode 100644 index 0000000000000..7cc6c2fa7b88c --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -0,0 +1,307 @@ +# -*- coding: utf-8 -*- + +""" +test .agg behavior / note that .apply is tested generally in test_groupby.py +""" + +import pytest + +import numpy as np +import pandas as pd + +from pandas import concat, DataFrame, Index, MultiIndex, Series +from pandas.core.groupby import Grouping, SpecificationError +from pandas.compat import OrderedDict +import pandas.util.testing as tm + + +@pytest.fixture +def ts(): + return tm.makeTimeSeries() + + +@pytest.fixture +def tsframe(): + return DataFrame(tm.getTimeSeriesData()) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + +@pytest.fixture +def mframe(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + return DataFrame(np.random.randn(10, 3), + index=index, + columns=['A', 'B', 'C']) + + +@pytest.fixture +def three_group(): + return DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', + 'bar', 'bar', 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', + 'one', 'two', 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', + 'shiny', 'dull', 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) + + +def test_agg_regression1(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_must_agg(df): + grouped = df.groupby('A')['C'] + + msg = "Must produce aggregated value" + with tm.assert_raises_regex(Exception, msg): + grouped.agg(lambda x: x.describe()) + with tm.assert_raises_regex(Exception, msg): + grouped.agg(lambda x: x.index[:2]) + + +def test_agg_ser_multi_key(df): + # TODO(wesm): unused + ser = df.C # noqa + + f = lambda x: x.sum() + results = df.C.groupby([df.A, df.B]).aggregate(f) + expected = df.groupby(['A', 'B']).sum()['C'] + tm.assert_series_equal(results, expected) + + +def test_agg_apply_corner(ts, tsframe): + # nothing to group, all NA + grouped = ts.groupby(ts * np.nan) + assert ts.dtype == np.float64 + + # groupby float64 values results in Float64Index + exp = Series([], dtype=np.float64, + index=pd.Index([], dtype=np.float64)) + tm.assert_series_equal(grouped.sum(), exp) + tm.assert_series_equal(grouped.agg(np.sum), exp) + tm.assert_series_equal(grouped.apply(np.sum), exp, + check_index_type=False) + + # DataFrame + grouped = tsframe.groupby(tsframe['A'] * np.nan) + exp_df = DataFrame(columns=tsframe.columns, dtype=float, + index=pd.Index([], dtype=np.float64)) + tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False) + tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) + tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0], + check_names=False) + + +def test_agg_grouping_is_list_tuple(ts): + df = tm.makeTimeDataFrame() + + grouped = df.groupby(lambda x: x.year) + grouper = grouped.grouper.groupings[0].grouper + grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_python_multiindex(mframe): + grouped = mframe.groupby(['A', 'B']) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('groupbyfunc', [ + lambda x: x.weekday(), + [lambda x: x.month, lambda x: x.weekday()], +]) +def test_aggregate_str_func(tsframe, groupbyfunc): + grouped = tsframe.groupby(groupbyfunc) + + # single series + result = grouped['A'].agg('std') + expected = grouped['A'].std() + tm.assert_series_equal(result, expected) + + # group frame by function name + result = grouped.aggregate('var') + expected = grouped.var() + tm.assert_frame_equal(result, expected) + + # group frame by function dict + result = grouped.agg(OrderedDict([['A', 'var'], + ['B', 'std'], + ['C', 'mean'], + ['D', 'sem']])) + expected = DataFrame(OrderedDict([['A', grouped['A'].var()], + ['B', grouped['B'].std()], + ['C', grouped['C'].mean()], + ['D', grouped['D'].sem()]])) + tm.assert_frame_equal(result, expected) + + +def test_aggregate_item_by_item(df): + grouped = df.groupby('A') + + aggfun = lambda ser: ser.size + result = grouped.agg(aggfun) + foo = (df.A == 'foo').sum() + bar = (df.A == 'bar').sum() + K = len(result.columns) + + # GH5782 + # odd comparisons can result here, so cast to make easy + exp = pd.Series(np.array([foo] * K), index=list('BCD'), + dtype=np.float64, name='foo') + tm.assert_series_equal(result.xs('foo'), exp) + + exp = pd.Series(np.array([bar] * K), index=list('BCD'), + dtype=np.float64, name='bar') + tm.assert_almost_equal(result.xs('bar'), exp) + + def aggfun(ser): + return ser.size + + result = DataFrame().groupby(df.A).agg(aggfun) + assert isinstance(result, DataFrame) + assert len(result) == 0 + + +def test_wrap_agg_out(three_group): + grouped = three_group.groupby(['A', 'B']) + + def func(ser): + if ser.dtype == np.object: + raise TypeError + else: + return ser.sum() + + result = grouped.aggregate(func) + exp_grouped = three_group.loc[:, three_group.columns != 'C'] + expected = exp_grouped.groupby(['A', 'B']).aggregate(func) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_maintain_order(df): + # GH #610 + funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)] + result = df.groupby('A')['C'].agg(funcs) + exp_cols = Index(['mean', 'max', 'min']) + + tm.assert_index_equal(result.columns, exp_cols) + + +def test_multiple_functions_tuples_and_non_tuples(df): + # #1359 + funcs = [('foo', 'mean'), 'std'] + ex_funcs = [('foo', 'mean'), ('std', 'std')] + + result = df.groupby('A')['C'].agg(funcs) + expected = df.groupby('A')['C'].agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + result = df.groupby('A').agg(funcs) + expected = df.groupby('A').agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_too_many_lambdas(df): + grouped = df.groupby('A') + funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] + + msg = 'Function names must be unique, found multiple named ' + with tm.assert_raises_regex(SpecificationError, msg): + grouped.agg(funcs) + + +def test_more_flexible_frame_multi_function(df): + grouped = df.groupby('A') + + exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) + exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) + + expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) + expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) + + d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) + result = grouped.aggregate(d) + + tm.assert_frame_equal(result, expected) + + # be careful + result = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + expected = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + tm.assert_frame_equal(result, expected) + + def foo(x): + return np.mean(x) + + def bar(x): + return np.std(x, ddof=1) + + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + d = OrderedDict([['C', np.mean], + ['D', OrderedDict([['foo', np.mean], + ['bar', np.std]])]]) + result = grouped.aggregate(d) + + d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) + expected = grouped.aggregate(d) + + tm.assert_frame_equal(result, expected) + + +def test_multi_function_flexible_mix(df): + # GH #1268 + grouped = df.groupby('A') + + # Expected + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', {'sum': 'sum'}]]) + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + expected = grouped.aggregate(d) + + # Test 1 + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', 'sum']]) + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = grouped.aggregate(d) + tm.assert_frame_equal(result, expected) + + # Test 2 + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])], + ['D', ['sum']]]) + # this uses column selection & renaming + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = grouped.aggregate(d) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py new file mode 100644 index 0000000000000..cef3a699ed24b --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -0,0 +1,206 @@ +# -*- coding: utf-8 -*- + +""" +test cython .agg behavior +""" + +from __future__ import print_function + +import pytest + +import numpy as np +from numpy import nan +import pandas as pd + +from pandas import (bdate_range, DataFrame, Index, Series, Timestamp, + Timedelta, NaT) +from pandas.core.groupby import DataError +import pandas.util.testing as tm + + +@pytest.mark.parametrize('op_name', [ + 'count', + 'sum', + 'std', + 'var', + 'sem', + 'mean', + 'median', + 'prod', + 'min', + 'max', +]) +def test_cythonized_aggers(op_name): + data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], + 'B': ['A', 'B'] * 6, + 'C': np.random.randn(12)} + df = DataFrame(data) + df.loc[2:10:2, 'C'] = nan + + op = lambda x: getattr(x, op_name)() + + # single column + grouped = df.drop(['B'], axis=1).groupby('A') + exp = {} + for cat, group in grouped: + exp[cat] = op(group['C']) + exp = DataFrame({'C': exp}) + exp.index.name = 'A' + result = op(grouped) + tm.assert_frame_equal(result, exp) + + # multiple columns + grouped = df.groupby(['A', 'B']) + expd = {} + for (cat1, cat2), group in grouped: + expd.setdefault(cat1, {})[cat2] = op(group['C']) + exp = DataFrame(expd).T.stack(dropna=False) + exp.index.names = ['A', 'B'] + exp.name = 'C' + + result = op(grouped)['C'] + if op_name in ['sum', 'prod']: + tm.assert_series_equal(result, exp) + + +def test_cython_agg_boolean(): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': np.random.randint(0, 2, 50).astype('bool')}) + result = frame.groupby('a')['b'].mean() + expected = frame.groupby('a')['b'].agg(np.mean) + + tm.assert_series_equal(result, expected) + + +def test_cython_agg_nothing_to_agg(): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25}) + msg = "No numeric types to aggregate" + + with tm.assert_raises_regex(DataError, msg): + frame.groupby('a')['b'].mean() + + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25}) + with tm.assert_raises_regex(DataError, msg): + frame[['b']].groupby(frame['a']).mean() + + +def test_cython_agg_nothing_to_agg_with_dates(): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25, + 'dates': pd.date_range('now', periods=50, freq='T')}) + msg = "No numeric types to aggregate" + with tm.assert_raises_regex(DataError, msg): + frame.groupby('b').dates.mean() + + +def test_cython_agg_frame_columns(): + # #2113 + df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) + + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + + +def test_cython_agg_return_dict(): + # GH 16741 + df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + ts = df.groupby('A')['B'].agg(lambda x: x.value_counts().to_dict()) + expected = Series([{'two': 1, 'one': 1, 'three': 1}, + {'two': 2, 'one': 2, 'three': 1}], + index=Index(['bar', 'foo'], name='A'), + name='B') + tm.assert_series_equal(ts, expected) + + +def test_cython_fail_agg(): + dr = bdate_range('1/1/2000', periods=50) + ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr) + + grouped = ts.groupby(lambda x: x.month) + summed = grouped.sum() + expected = grouped.agg(np.sum) + tm.assert_series_equal(summed, expected) + + +@pytest.mark.parametrize('op, targop', [ + ('mean', np.mean), + ('median', np.median), + ('var', np.var), + ('add', np.sum), + ('prod', np.prod), + ('min', np.min), + ('max', np.max), + ('first', lambda x: x.iloc[0]), + ('last', lambda x: x.iloc[-1]), +]) +def test__cython_agg_general(op, targop): + df = DataFrame(np.random.randn(1000)) + labels = np.random.randint(0, 50, size=1000).astype(float) + + result = df.groupby(labels)._cython_agg_general(op) + expected = df.groupby(labels).agg(targop) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('op, targop', [ + ('mean', np.mean), + ('median', lambda x: np.median(x) if len(x) > 0 else np.nan), + ('var', lambda x: np.var(x, ddof=1)), + ('min', np.min), + ('max', np.max), ] +) +def test_cython_agg_empty_buckets(op, targop): + df = pd.DataFrame([11, 12, 13]) + grps = range(0, 55, 5) + + # calling _cython_agg_general directly, instead of via the user API + # which sets different values for min_count, so do that here. + result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op) + expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x)) + tm.assert_frame_equal(result, expected) + + +def test_cython_agg_empty_buckets_nanops(): + # GH-18869 can't call nanops on empty groups, so hardcode expected + # for these + df = pd.DataFrame([11, 12, 13], columns=['a']) + grps = range(0, 25, 5) + # add / sum + result = df.groupby(pd.cut(df['a'], grps))._cython_agg_general('add') + intervals = pd.interval_range(0, 20, freq=5) + expected = pd.DataFrame( + {"a": [0, 0, 36, 0]}, + index=pd.CategoricalIndex(intervals, name='a', ordered=True)) + tm.assert_frame_equal(result, expected) + + # prod + result = df.groupby(pd.cut(df['a'], grps))._cython_agg_general('prod') + expected = pd.DataFrame( + {"a": [1, 1, 1716, 1]}, + index=pd.CategoricalIndex(intervals, name='a', ordered=True)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('op', ['first', 'last', 'max', 'min']) +@pytest.mark.parametrize('data', [ + Timestamp('2016-10-14 21:00:44.557'), + Timedelta('17088 days 21:00:44.557'), ]) +def test_cython_with_timestamp_and_nat(op, data): + # https://github.com/pandas-dev/pandas/issues/19526 + df = DataFrame({'a': [0, 1], 'b': [data, NaT]}) + index = Index([0, 1], name='a') + + # We will group by a and test the cython aggregations + expected = DataFrame({'b': [data, NaT]}, index=index) + + result = df.groupby('a').aggregate(op) + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py new file mode 100644 index 0000000000000..4c407ad8a0d93 --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -0,0 +1,502 @@ +# -*- coding: utf-8 -*- + +""" +test all other .agg behavior +""" + +from __future__ import print_function + +import pytest +from collections import OrderedDict + +import datetime as dt +from functools import partial + +import numpy as np +import pandas as pd + +from pandas import ( + date_range, DataFrame, Index, MultiIndex, PeriodIndex, period_range, Series +) +from pandas.core.groupby import SpecificationError +from pandas.io.formats.printing import pprint_thing +import pandas.util.testing as tm + + +def test_agg_api(): + # GH 6337 + # http://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error + # different api for agg when passed custom function with mixed frame + + df = DataFrame({'data1': np.random.randn(5), + 'data2': np.random.randn(5), + 'key1': ['a', 'a', 'b', 'b', 'a'], + 'key2': ['one', 'two', 'one', 'two', 'one']}) + grouped = df.groupby('key1') + + def peak_to_peak(arr): + return arr.max() - arr.min() + + expected = grouped.agg([peak_to_peak]) + expected.columns = ['data1', 'data2'] + result = grouped.agg(peak_to_peak) + tm.assert_frame_equal(result, expected) + + +def test_agg_datetimes_mixed(): + data = [[1, '2012-01-01', 1.0], + [2, '2012-01-02', 2.0], + [3, None, 3.0]] + + df1 = DataFrame({'key': [x[0] for x in data], + 'date': [x[1] for x in data], + 'value': [x[2] for x in data]}) + + data = [[row[0], + (dt.datetime.strptime(row[1], '%Y-%m-%d').date() + if row[1] else None), + row[2]] + for row in data] + + df2 = DataFrame({'key': [x[0] for x in data], + 'date': [x[1] for x in data], + 'value': [x[2] for x in data]}) + + df1['weights'] = df1['value'] / df1['value'].sum() + gb1 = df1.groupby('date').aggregate(np.sum) + + df2['weights'] = df1['value'] / df1['value'].sum() + gb2 = df2.groupby('date').aggregate(np.sum) + + assert (len(gb1) == len(gb2)) + + +def test_agg_period_index(): + prng = period_range('2012-1-1', freq='M', periods=3) + df = DataFrame(np.random.randn(3, 2), index=prng) + rs = df.groupby(level=0).sum() + assert isinstance(rs.index, PeriodIndex) + + # GH 3579 + index = period_range(start='1999-01', periods=5, freq='M') + s1 = Series(np.random.rand(len(index)), index=index) + s2 = Series(np.random.rand(len(index)), index=index) + series = [('s1', s1), ('s2', s2)] + df = DataFrame.from_dict(OrderedDict(series)) + grouped = df.groupby(df.index.month) + list(grouped) + + +def test_agg_dict_parameter_cast_result_dtypes(): + # GH 12821 + + df = DataFrame({'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'], + 'time': date_range('1/1/2011', periods=8, freq='H')}) + df.loc[[0, 1, 2, 5], 'time'] = None + + # test for `first` function + exp = df.loc[[0, 3, 4, 6]].set_index('class') + grouped = df.groupby('class') + tm.assert_frame_equal(grouped.first(), exp) + tm.assert_frame_equal(grouped.agg('first'), exp) + tm.assert_frame_equal(grouped.agg({'time': 'first'}), exp) + tm.assert_series_equal(grouped.time.first(), exp['time']) + tm.assert_series_equal(grouped.time.agg('first'), exp['time']) + + # test for `last` function + exp = df.loc[[0, 3, 4, 7]].set_index('class') + grouped = df.groupby('class') + tm.assert_frame_equal(grouped.last(), exp) + tm.assert_frame_equal(grouped.agg('last'), exp) + tm.assert_frame_equal(grouped.agg({'time': 'last'}), exp) + tm.assert_series_equal(grouped.time.last(), exp['time']) + tm.assert_series_equal(grouped.time.agg('last'), exp['time']) + + # count + exp = pd.Series([2, 2, 2, 2], + index=Index(list('ABCD'), name='class'), + name='time') + tm.assert_series_equal(grouped.time.agg(len), exp) + tm.assert_series_equal(grouped.time.size(), exp) + + exp = pd.Series([0, 1, 1, 2], + index=Index(list('ABCD'), name='class'), + name='time') + tm.assert_series_equal(grouped.time.count(), exp) + + +def test_agg_cast_results_dtypes(): + # similar to GH12821 + # xref #11444 + u = [dt.datetime(2015, x + 1, 1) for x in range(12)] + v = list('aaabbbbbbccd') + df = pd.DataFrame({'X': v, 'Y': u}) + + result = df.groupby('X')['Y'].agg(len) + expected = df.groupby('X')['Y'].count() + tm.assert_series_equal(result, expected) + + +def test_aggregate_float64_no_int64(): + # see gh-11199 + df = DataFrame({"a": [1, 2, 3, 4, 5], + "b": [1, 2, 2, 4, 5], + "c": [1, 2, 3, 4, 5]}) + + expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a"]].mean() + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]}, + index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a", "c"]].mean() + tm.assert_frame_equal(result, expected) + + +def test_aggregate_api_consistency(): + # GH 9052 + # make sure that the aggregates via dict + # are consistent + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': np.random.randn(8) + 1.0, + 'D': np.arange(8)}) + + grouped = df.groupby(['A', 'B']) + c_mean = grouped['C'].mean() + c_sum = grouped['C'].sum() + d_mean = grouped['D'].mean() + d_sum = grouped['D'].sum() + + result = grouped['D'].agg(['sum', 'mean']) + expected = pd.concat([d_sum, d_mean], axis=1) + expected.columns = ['sum', 'mean'] + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg([np.sum, np.mean]) + expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1) + expected.columns = MultiIndex.from_product([['C', 'D'], + ['sum', 'mean']]) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped[['D', 'C']].agg([np.sum, np.mean]) + expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1) + expected.columns = MultiIndex.from_product([['D', 'C'], + ['sum', 'mean']]) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg({'C': 'mean', 'D': 'sum'}) + expected = pd.concat([d_sum, c_mean], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg({'C': ['mean', 'sum'], + 'D': ['mean', 'sum']}) + expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) + expected.columns = MultiIndex.from_product([['C', 'D'], + ['mean', 'sum']]) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = grouped[['D', 'C']].agg({'r': np.sum, + 'r2': np.mean}) + expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1) + expected.columns = MultiIndex.from_product([['r', 'r2'], + ['D', 'C']]) + tm.assert_frame_equal(result, expected, check_like=True) + + +def test_agg_dict_renaming_deprecation(): + # 15931 + df = pd.DataFrame({'A': [1, 1, 1, 2, 2], + 'B': range(5), + 'C': range(5)}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False) as w: + df.groupby('A').agg({'B': {'foo': ['sum', 'max']}, + 'C': {'bar': ['count', 'min']}}) + assert "using a dict with renaming" in str(w[0].message) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df.groupby('A')[['B', 'C']].agg({'ma': 'max'}) + + with tm.assert_produces_warning(FutureWarning) as w: + df.groupby('A').B.agg({'foo': 'count'}) + assert "using a dict on a Series for aggregation" in str(w[0].message) + + +def test_agg_compat(): + # GH 12334 + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': np.random.randn(8) + 1.0, + 'D': np.arange(8)}) + + g = df.groupby(['A', 'B']) + + expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1) + expected.columns = MultiIndex.from_tuples([('C', 'sum'), + ('C', 'std')]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = g['D'].agg({'C': ['sum', 'std']}) + tm.assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1) + expected.columns = ['C', 'D'] + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = g['D'].agg({'C': 'sum', 'D': 'std'}) + tm.assert_frame_equal(result, expected, check_like=True) + + +def test_agg_nested_dicts(): + # API change for disallowing these types of nested dicts + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'two', + 'two', 'two', 'one', 'two'], + 'C': np.random.randn(8) + 1.0, + 'D': np.arange(8)}) + + g = df.groupby(['A', 'B']) + + msg = r'cannot perform renaming for r[1-2] with a nested dictionary' + with tm.assert_raises_regex(SpecificationError, msg): + g.aggregate({'r1': {'C': ['mean', 'sum']}, + 'r2': {'D': ['mean', 'sum']}}) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = g.agg({'C': {'ra': ['mean', 'std']}, + 'D': {'rb': ['mean', 'std']}}) + expected = pd.concat([g['C'].mean(), g['C'].std(), + g['D'].mean(), g['D'].std()], + axis=1) + expected.columns = pd.MultiIndex.from_tuples( + [('ra', 'mean'), ('ra', 'std'), + ('rb', 'mean'), ('rb', 'std')]) + tm.assert_frame_equal(result, expected, check_like=True) + + # same name as the original column + # GH9052 + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + expected = g['D'].agg({'result1': np.sum, 'result2': np.mean}) + expected = expected.rename(columns={'result1': 'D'}) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = g['D'].agg({'D': np.sum, 'result2': np.mean}) + tm.assert_frame_equal(result, expected, check_like=True) + + +def test_agg_item_by_item_raise_typeerror(): + df = DataFrame(np.random.randint(10, size=(20, 10))) + + def raiseException(df): + pprint_thing('----------------------------------------') + pprint_thing(df.to_string()) + raise TypeError('test') + + with tm.assert_raises_regex(TypeError, 'test'): + df.groupby(0).agg(raiseException) + + +def test_series_agg_multikey(): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + + result = grouped.agg(np.sum) + expected = grouped.sum() + tm.assert_series_equal(result, expected) + + +def test_series_agg_multi_pure_python(): + data = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) + + def bad(x): + assert (len(x.base) > 0) + return 'foo' + + result = data.groupby(['A', 'B']).agg(bad) + expected = data.groupby(['A', 'B']).agg(lambda x: 'foo') + tm.assert_frame_equal(result, expected) + + +def test_agg_consistency(): + # agg with ([]) and () not consistent + # GH 6715 + def P1(a): + try: + return np.percentile(a.dropna(), q=1) + except Exception: + return np.nan + + df = DataFrame({'col1': [1, 2, 3, 4], + 'col2': [10, 25, 26, 31], + 'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10), + dt.date(2013, 2, 11), dt.date(2013, 2, 11)]}) + + g = df.groupby('date') + + expected = g.agg([P1]) + expected.columns = expected.columns.levels[0] + + result = g.agg(P1) + tm.assert_frame_equal(result, expected) + + +def test_agg_callables(): + # GH 7929 + df = DataFrame({'foo': [1, 2], 'bar': [3, 4]}).astype(np.int64) + + class fn_class(object): + + def __call__(self, x): + return sum(x) + + equiv_callables = [sum, + np.sum, + lambda x: sum(x), + lambda x: x.sum(), + partial(sum), + fn_class(), ] + + expected = df.groupby("foo").agg(sum) + for ecall in equiv_callables: + result = df.groupby('foo').agg(ecall) + tm.assert_frame_equal(result, expected) + + +def test_agg_over_numpy_arrays(): + # GH 3788 + df = pd.DataFrame([[1, np.array([10, 20, 30])], + [1, np.array([40, 50, 60])], + [2, np.array([20, 30, 40])]], + columns=['category', 'arraydata']) + result = df.groupby('category').agg(sum) + + expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] + expected_index = pd.Index([1, 2], name='category') + expected_column = ['arraydata'] + expected = pd.DataFrame(expected_data, + index=expected_index, + columns=expected_column) + + tm.assert_frame_equal(result, expected) + + +def test_agg_timezone_round_trip(): + # GH 15426 + ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific') + df = pd.DataFrame({'a': 1, + 'b': [ts + dt.timedelta(minutes=nn) + for nn in range(10)]}) + + result1 = df.groupby('a')['b'].agg(np.min).iloc[0] + result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0] + result3 = df.groupby('a')['b'].min().iloc[0] + + assert result1 == ts + assert result2 == ts + assert result3 == ts + + dates = [pd.Timestamp("2016-01-0%d 12:00:00" % i, tz='US/Pacific') + for i in range(1, 5)] + df = pd.DataFrame({'A': ['a', 'b'] * 2, 'B': dates}) + grouped = df.groupby('A') + + ts = df['B'].iloc[0] + assert ts == grouped.nth(0)['B'].iloc[0] + assert ts == grouped.head(1)['B'].iloc[0] + assert ts == grouped.first()['B'].iloc[0] + assert ts == grouped.apply(lambda x: x.iloc[0])[0] + + ts = df['B'].iloc[2] + assert ts == grouped.last()['B'].iloc[0] + assert ts == grouped.apply(lambda x: x.iloc[-1])[0] + + +def test_sum_uint64_overflow(): + # see gh-14758 + # Convert to uint64 and don't overflow + df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) + df = df + 9223372036854775807 + + index = pd.Index([9223372036854775808, + 9223372036854775810, + 9223372036854775812], + dtype=np.uint64) + expected = pd.DataFrame({1: [9223372036854775809, + 9223372036854775811, + 9223372036854775813]}, + index=index) + + expected.index.name = 0 + result = df.groupby(0).sum() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("structure, expected", [ + (tuple, pd.DataFrame({'C': {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), + (list, pd.DataFrame({'C': {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), + (lambda x: tuple(x), pd.DataFrame({'C': {(1, 1): (1, 1, 1), + (3, 4): (3, 4, 4)}})), + (lambda x: list(x), pd.DataFrame({'C': {(1, 1): [1, 1, 1], + (3, 4): [3, 4, 4]}})) +]) +def test_agg_structs_dataframe(structure, expected): + df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], + 'B': [1, 1, 1, 4, 4, 4], + 'C': [1, 1, 1, 3, 4, 4]}) + + result = df.groupby(['A', 'B']).aggregate(structure) + expected.index.names = ['A', 'B'] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("structure, expected", [ + (tuple, pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name='C')), + (list, pd.Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name='C')), + (lambda x: tuple(x), pd.Series([(1, 1, 1), (3, 4, 4)], + index=[1, 3], name='C')), + (lambda x: list(x), pd.Series([[1, 1, 1], [3, 4, 4]], + index=[1, 3], name='C')) +]) +def test_agg_structs_series(structure, expected): + # Issue #18079 + df = pd.DataFrame({'A': [1, 1, 1, 3, 3, 3], + 'B': [1, 1, 1, 4, 4, 4], + 'C': [1, 1, 1, 3, 4, 4]}) + + result = df.groupby('A')['C'].aggregate(structure) + expected.index.name = 'A' + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(reason="GH-18869: agg func not called on empty groups.") +def test_agg_category_nansum(): + categories = ['a', 'b', 'c'] + df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'], + categories=categories), + 'B': [1, 2, 3]}) + result = df.groupby("A").B.agg(np.nansum) + expected = pd.Series([3, 3, 0], + index=pd.CategoricalIndex(['a', 'b', 'c'], + categories=categories, + name='A'), + name='B') + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py index 8a70777d08682..3e99e8211b4f8 100644 --- a/pandas/tests/groupby/common.py +++ b/pandas/tests/groupby/common.py @@ -1,13 +1,34 @@ """ Base setup """ +import pytest import numpy as np from pandas.util import testing as tm from pandas import DataFrame, MultiIndex +@pytest.fixture +def mframe(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + return DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + class MixIn(object): - def setUp(self): + def setup_method(self, method): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() @@ -15,12 +36,7 @@ def setUp(self): self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - + self.df = df() self.df_mixed_floats = DataFrame( {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], @@ -28,13 +44,7 @@ def setUp(self): 'D': np.array( np.random.randn(8), dtype='float32')}) - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) + self.mframe = mframe() self.three_group = DataFrame( {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py deleted file mode 100644 index a1fc97eb8d780..0000000000000 --- a/pandas/tests/groupby/test_aggregate.py +++ /dev/null @@ -1,740 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -we test .agg behavior / note that .apply is tested -generally in test_groupby.py -""" - -from __future__ import print_function -from datetime import datetime -from functools import partial - -import numpy as np -from numpy import nan -import pandas as pd - -from pandas import (date_range, MultiIndex, DataFrame, - Series, Index, bdate_range) -from pandas.util.testing import assert_frame_equal, assert_series_equal -from pandas.core.groupby import SpecificationError, DataError -from pandas.compat import OrderedDict -from pandas.formats.printing import pprint_thing -import pandas.util.testing as tm - - -class TestGroupByAggregate(tm.TestCase): - - def setUp(self): - self.ts = tm.makeTimeSeries() - - self.seriesd = tm.getSeriesData() - self.tsd = tm.getTimeSeriesData() - self.frame = DataFrame(self.seriesd) - self.tsframe = DataFrame(self.tsd) - - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - self.df_mixed_floats = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.array( - np.random.randn(8), dtype='float32')}) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) - - self.three_group = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) - - def test_agg_api(self): - - # GH 6337 - # http://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error - # different api for agg when passed custom function with mixed frame - - df = DataFrame({'data1': np.random.randn(5), - 'data2': np.random.randn(5), - 'key1': ['a', 'a', 'b', 'b', 'a'], - 'key2': ['one', 'two', 'one', 'two', 'one']}) - grouped = df.groupby('key1') - - def peak_to_peak(arr): - return arr.max() - arr.min() - - expected = grouped.agg([peak_to_peak]) - expected.columns = ['data1', 'data2'] - result = grouped.agg(peak_to_peak) - assert_frame_equal(result, expected) - - def test_agg_regression1(self): - grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) - result = grouped.agg(np.mean) - expected = grouped.mean() - assert_frame_equal(result, expected) - - def test_agg_datetimes_mixed(self): - data = [[1, '2012-01-01', 1.0], [2, '2012-01-02', 2.0], [3, None, 3.0]] - - df1 = DataFrame({'key': [x[0] for x in data], - 'date': [x[1] for x in data], - 'value': [x[2] for x in data]}) - - data = [[row[0], datetime.strptime(row[1], '%Y-%m-%d').date() if row[1] - else None, row[2]] for row in data] - - df2 = DataFrame({'key': [x[0] for x in data], - 'date': [x[1] for x in data], - 'value': [x[2] for x in data]}) - - df1['weights'] = df1['value'] / df1['value'].sum() - gb1 = df1.groupby('date').aggregate(np.sum) - - df2['weights'] = df1['value'] / df1['value'].sum() - gb2 = df2.groupby('date').aggregate(np.sum) - - assert (len(gb1) == len(gb2)) - - def test_agg_period_index(self): - from pandas import period_range, PeriodIndex - prng = period_range('2012-1-1', freq='M', periods=3) - df = DataFrame(np.random.randn(3, 2), index=prng) - rs = df.groupby(level=0).sum() - tm.assertIsInstance(rs.index, PeriodIndex) - - # GH 3579 - index = period_range(start='1999-01', periods=5, freq='M') - s1 = Series(np.random.rand(len(index)), index=index) - s2 = Series(np.random.rand(len(index)), index=index) - series = [('s1', s1), ('s2', s2)] - df = DataFrame.from_items(series) - grouped = df.groupby(df.index.month) - list(grouped) - - def test_agg_dict_parameter_cast_result_dtypes(self): - # GH 12821 - - df = DataFrame( - {'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'], - 'time': date_range('1/1/2011', periods=8, freq='H')}) - df.loc[[0, 1, 2, 5], 'time'] = None - - # test for `first` function - exp = df.loc[[0, 3, 4, 6]].set_index('class') - grouped = df.groupby('class') - assert_frame_equal(grouped.first(), exp) - assert_frame_equal(grouped.agg('first'), exp) - assert_frame_equal(grouped.agg({'time': 'first'}), exp) - assert_series_equal(grouped.time.first(), exp['time']) - assert_series_equal(grouped.time.agg('first'), exp['time']) - - # test for `last` function - exp = df.loc[[0, 3, 4, 7]].set_index('class') - grouped = df.groupby('class') - assert_frame_equal(grouped.last(), exp) - assert_frame_equal(grouped.agg('last'), exp) - assert_frame_equal(grouped.agg({'time': 'last'}), exp) - assert_series_equal(grouped.time.last(), exp['time']) - assert_series_equal(grouped.time.agg('last'), exp['time']) - - def test_agg_must_agg(self): - grouped = self.df.groupby('A')['C'] - self.assertRaises(Exception, grouped.agg, lambda x: x.describe()) - self.assertRaises(Exception, grouped.agg, lambda x: x.index[:2]) - - def test_agg_ser_multi_key(self): - # TODO(wesm): unused - ser = self.df.C # noqa - - f = lambda x: x.sum() - results = self.df.C.groupby([self.df.A, self.df.B]).aggregate(f) - expected = self.df.groupby(['A', 'B']).sum()['C'] - assert_series_equal(results, expected) - - def test_agg_apply_corner(self): - # nothing to group, all NA - grouped = self.ts.groupby(self.ts * np.nan) - self.assertEqual(self.ts.dtype, np.float64) - - # groupby float64 values results in Float64Index - exp = Series([], dtype=np.float64, index=pd.Index( - [], dtype=np.float64)) - assert_series_equal(grouped.sum(), exp) - assert_series_equal(grouped.agg(np.sum), exp) - assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False) - - # DataFrame - grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan) - exp_df = DataFrame(columns=self.tsframe.columns, dtype=float, - index=pd.Index([], dtype=np.float64)) - assert_frame_equal(grouped.sum(), exp_df, check_names=False) - assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) - assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0], - check_names=False) - - def test_agg_grouping_is_list_tuple(self): - from pandas.core.groupby import Grouping - - df = tm.makeTimeDataFrame() - - grouped = df.groupby(lambda x: x.year) - grouper = grouped.grouper.groupings[0].grouper - grouped.grouper.groupings[0] = Grouping(self.ts.index, list(grouper)) - - result = grouped.agg(np.mean) - expected = grouped.mean() - tm.assert_frame_equal(result, expected) - - grouped.grouper.groupings[0] = Grouping(self.ts.index, tuple(grouper)) - - result = grouped.agg(np.mean) - expected = grouped.mean() - tm.assert_frame_equal(result, expected) - - def test_aggregate_api_consistency(self): - # GH 9052 - # make sure that the aggregates via dict - # are consistent - - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'two', - 'two', 'two', 'one', 'two'], - 'C': np.random.randn(8) + 1.0, - 'D': np.arange(8)}) - - grouped = df.groupby(['A', 'B']) - c_mean = grouped['C'].mean() - c_sum = grouped['C'].sum() - d_mean = grouped['D'].mean() - d_sum = grouped['D'].sum() - - result = grouped['D'].agg(['sum', 'mean']) - expected = pd.concat([d_sum, d_mean], - axis=1) - expected.columns = ['sum', 'mean'] - assert_frame_equal(result, expected, check_like=True) - - result = grouped.agg([np.sum, np.mean]) - expected = pd.concat([c_sum, - c_mean, - d_sum, - d_mean], - axis=1) - expected.columns = MultiIndex.from_product([['C', 'D'], - ['sum', 'mean']]) - assert_frame_equal(result, expected, check_like=True) - - result = grouped[['D', 'C']].agg([np.sum, np.mean]) - expected = pd.concat([d_sum, - d_mean, - c_sum, - c_mean], - axis=1) - expected.columns = MultiIndex.from_product([['D', 'C'], - ['sum', 'mean']]) - assert_frame_equal(result, expected, check_like=True) - - result = grouped.agg({'C': 'mean', 'D': 'sum'}) - expected = pd.concat([d_sum, - c_mean], - axis=1) - assert_frame_equal(result, expected, check_like=True) - - result = grouped.agg({'C': ['mean', 'sum'], - 'D': ['mean', 'sum']}) - expected = pd.concat([c_mean, - c_sum, - d_mean, - d_sum], - axis=1) - expected.columns = MultiIndex.from_product([['C', 'D'], - ['mean', 'sum']]) - - result = grouped[['D', 'C']].agg({'r': np.sum, - 'r2': np.mean}) - expected = pd.concat([d_sum, - c_sum, - d_mean, - c_mean], - axis=1) - expected.columns = MultiIndex.from_product([['r', 'r2'], - ['D', 'C']]) - assert_frame_equal(result, expected, check_like=True) - - def test_agg_compat(self): - - # GH 12334 - - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'two', - 'two', 'two', 'one', 'two'], - 'C': np.random.randn(8) + 1.0, - 'D': np.arange(8)}) - - g = df.groupby(['A', 'B']) - - expected = pd.concat([g['D'].sum(), - g['D'].std()], - axis=1) - expected.columns = MultiIndex.from_tuples([('C', 'sum'), - ('C', 'std')]) - result = g['D'].agg({'C': ['sum', 'std']}) - assert_frame_equal(result, expected, check_like=True) - - expected = pd.concat([g['D'].sum(), - g['D'].std()], - axis=1) - expected.columns = ['C', 'D'] - result = g['D'].agg({'C': 'sum', 'D': 'std'}) - assert_frame_equal(result, expected, check_like=True) - - def test_agg_nested_dicts(self): - - # API change for disallowing these types of nested dicts - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'two', - 'two', 'two', 'one', 'two'], - 'C': np.random.randn(8) + 1.0, - 'D': np.arange(8)}) - - g = df.groupby(['A', 'B']) - - def f(): - g.aggregate({'r1': {'C': ['mean', 'sum']}, - 'r2': {'D': ['mean', 'sum']}}) - - self.assertRaises(SpecificationError, f) - - result = g.agg({'C': {'ra': ['mean', 'std']}, - 'D': {'rb': ['mean', 'std']}}) - expected = pd.concat([g['C'].mean(), g['C'].std(), g['D'].mean(), - g['D'].std()], axis=1) - expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( - 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) - assert_frame_equal(result, expected, check_like=True) - - # same name as the original column - # GH9052 - expected = g['D'].agg({'result1': np.sum, 'result2': np.mean}) - expected = expected.rename(columns={'result1': 'D'}) - result = g['D'].agg({'D': np.sum, 'result2': np.mean}) - assert_frame_equal(result, expected, check_like=True) - - def test_agg_python_multiindex(self): - grouped = self.mframe.groupby(['A', 'B']) - - result = grouped.agg(np.mean) - expected = grouped.mean() - tm.assert_frame_equal(result, expected) - - def test_aggregate_str_func(self): - def _check_results(grouped): - # single series - result = grouped['A'].agg('std') - expected = grouped['A'].std() - assert_series_equal(result, expected) - - # group frame by function name - result = grouped.aggregate('var') - expected = grouped.var() - assert_frame_equal(result, expected) - - # group frame by function dict - result = grouped.agg(OrderedDict([['A', 'var'], ['B', 'std'], - ['C', 'mean'], ['D', 'sem']])) - expected = DataFrame(OrderedDict([['A', grouped['A'].var( - )], ['B', grouped['B'].std()], ['C', grouped['C'].mean()], - ['D', grouped['D'].sem()]])) - assert_frame_equal(result, expected) - - by_weekday = self.tsframe.groupby(lambda x: x.weekday()) - _check_results(by_weekday) - - by_mwkday = self.tsframe.groupby([lambda x: x.month, - lambda x: x.weekday()]) - _check_results(by_mwkday) - - def test_aggregate_item_by_item(self): - - df = self.df.copy() - df['E'] = ['a'] * len(self.df) - grouped = self.df.groupby('A') - - # API change in 0.11 - # def aggfun(ser): - # return len(ser + 'a') - # result = grouped.agg(aggfun) - # self.assertEqual(len(result.columns), 1) - - aggfun = lambda ser: ser.size - result = grouped.agg(aggfun) - foo = (self.df.A == 'foo').sum() - bar = (self.df.A == 'bar').sum() - K = len(result.columns) - - # GH5782 - # odd comparisons can result here, so cast to make easy - exp = pd.Series(np.array([foo] * K), index=list('BCD'), - dtype=np.float64, name='foo') - tm.assert_series_equal(result.xs('foo'), exp) - - exp = pd.Series(np.array([bar] * K), index=list('BCD'), - dtype=np.float64, name='bar') - tm.assert_almost_equal(result.xs('bar'), exp) - - def aggfun(ser): - return ser.size - - result = DataFrame().groupby(self.df.A).agg(aggfun) - tm.assertIsInstance(result, DataFrame) - self.assertEqual(len(result), 0) - - def test_agg_item_by_item_raise_typeerror(self): - from numpy.random import randint - - df = DataFrame(randint(10, size=(20, 10))) - - def raiseException(df): - pprint_thing('----------------------------------------') - pprint_thing(df.to_string()) - raise TypeError - - self.assertRaises(TypeError, df.groupby(0).agg, raiseException) - - def test_series_agg_multikey(self): - ts = tm.makeTimeSeries() - grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) - - result = grouped.agg(np.sum) - expected = grouped.sum() - assert_series_equal(result, expected) - - def test_series_agg_multi_pure_python(self): - data = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) - - def bad(x): - assert (len(x.base) > 0) - return 'foo' - - result = data.groupby(['A', 'B']).agg(bad) - expected = data.groupby(['A', 'B']).agg(lambda x: 'foo') - assert_frame_equal(result, expected) - - def test_cythonized_aggers(self): - data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], - 'B': ['A', 'B'] * 6, - 'C': np.random.randn(12)} - df = DataFrame(data) - df.loc[2:10:2, 'C'] = nan - - def _testit(name): - - op = lambda x: getattr(x, name)() - - # single column - grouped = df.drop(['B'], axis=1).groupby('A') - exp = {} - for cat, group in grouped: - exp[cat] = op(group['C']) - exp = DataFrame({'C': exp}) - exp.index.name = 'A' - result = op(grouped) - assert_frame_equal(result, exp) - - # multiple columns - grouped = df.groupby(['A', 'B']) - expd = {} - for (cat1, cat2), group in grouped: - expd.setdefault(cat1, {})[cat2] = op(group['C']) - exp = DataFrame(expd).T.stack(dropna=False) - exp.index.names = ['A', 'B'] - exp.name = 'C' - - result = op(grouped)['C'] - if not tm._incompat_bottleneck_version(name): - assert_series_equal(result, exp) - - _testit('count') - _testit('sum') - _testit('std') - _testit('var') - _testit('sem') - _testit('mean') - _testit('median') - _testit('prod') - _testit('min') - _testit('max') - - def test_cython_agg_boolean(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': np.random.randint(0, 2, 50).astype('bool')}) - result = frame.groupby('a')['b'].mean() - expected = frame.groupby('a')['b'].agg(np.mean) - - assert_series_equal(result, expected) - - def test_cython_agg_nothing_to_agg(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25}) - self.assertRaises(DataError, frame.groupby('a')['b'].mean) - - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25}) - self.assertRaises(DataError, frame[['b']].groupby(frame['a']).mean) - - def test_cython_agg_nothing_to_agg_with_dates(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25, - 'dates': pd.date_range('now', periods=50, - freq='T')}) - with tm.assertRaisesRegexp(DataError, "No numeric types to aggregate"): - frame.groupby('b').dates.mean() - - def test_cython_agg_frame_columns(self): - # #2113 - df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) - - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - - def test_cython_fail_agg(self): - dr = bdate_range('1/1/2000', periods=50) - ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr) - - grouped = ts.groupby(lambda x: x.month) - summed = grouped.sum() - expected = grouped.agg(np.sum) - assert_series_equal(summed, expected) - - def test_agg_consistency(self): - # agg with ([]) and () not consistent - # GH 6715 - - def P1(a): - try: - return np.percentile(a.dropna(), q=1) - except: - return np.nan - - import datetime as dt - df = DataFrame({'col1': [1, 2, 3, 4], - 'col2': [10, 25, 26, 31], - 'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10), - dt.date(2013, 2, 11), dt.date(2013, 2, 11)]}) - - g = df.groupby('date') - - expected = g.agg([P1]) - expected.columns = expected.columns.levels[0] - - result = g.agg(P1) - assert_frame_equal(result, expected) - - def test_wrap_agg_out(self): - grouped = self.three_group.groupby(['A', 'B']) - - def func(ser): - if ser.dtype == np.object: - raise TypeError - else: - return ser.sum() - - result = grouped.aggregate(func) - exp_grouped = self.three_group.loc[:, self.three_group.columns != 'C'] - expected = exp_grouped.groupby(['A', 'B']).aggregate(func) - assert_frame_equal(result, expected) - - def test_agg_multiple_functions_maintain_order(self): - # GH #610 - funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)] - result = self.df.groupby('A')['C'].agg(funcs) - exp_cols = Index(['mean', 'max', 'min']) - - self.assert_index_equal(result.columns, exp_cols) - - def test_multiple_functions_tuples_and_non_tuples(self): - # #1359 - - funcs = [('foo', 'mean'), 'std'] - ex_funcs = [('foo', 'mean'), ('std', 'std')] - - result = self.df.groupby('A')['C'].agg(funcs) - expected = self.df.groupby('A')['C'].agg(ex_funcs) - assert_frame_equal(result, expected) - - result = self.df.groupby('A').agg(funcs) - expected = self.df.groupby('A').agg(ex_funcs) - assert_frame_equal(result, expected) - - def test_agg_multiple_functions_too_many_lambdas(self): - grouped = self.df.groupby('A') - funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] - - self.assertRaises(SpecificationError, grouped.agg, funcs) - - def test_more_flexible_frame_multi_function(self): - from pandas import concat - - grouped = self.df.groupby('A') - - exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) - exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) - - expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) - expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) - - d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) - result = grouped.aggregate(d) - - assert_frame_equal(result, expected) - - # be careful - result = grouped.aggregate(OrderedDict([['C', np.mean], - ['D', [np.mean, np.std]]])) - expected = grouped.aggregate(OrderedDict([['C', np.mean], - ['D', [np.mean, np.std]]])) - assert_frame_equal(result, expected) - - def foo(x): - return np.mean(x) - - def bar(x): - return np.std(x, ddof=1) - - d = OrderedDict([['C', np.mean], ['D', OrderedDict( - [['foo', np.mean], ['bar', np.std]])]]) - result = grouped.aggregate(d) - - d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) - expected = grouped.aggregate(d) - - assert_frame_equal(result, expected) - - def test_multi_function_flexible_mix(self): - # GH #1268 - grouped = self.df.groupby('A') - - d = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', 'sum']]) - result = grouped.aggregate(d) - d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', ['sum']]]) - result2 = grouped.aggregate(d2) - - d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', {'sum': 'sum'}]]) - expected = grouped.aggregate(d3) - - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - def test_agg_callables(self): - # GH 7929 - df = DataFrame({'foo': [1, 2], 'bar': [3, 4]}).astype(np.int64) - - class fn_class(object): - - def __call__(self, x): - return sum(x) - - equiv_callables = [sum, np.sum, lambda x: sum(x), lambda x: x.sum(), - partial(sum), fn_class()] - - expected = df.groupby("foo").agg(sum) - for ecall in equiv_callables: - result = df.groupby('foo').agg(ecall) - assert_frame_equal(result, expected) - - def test__cython_agg_general(self): - ops = [('mean', np.mean), - ('median', np.median), - ('var', np.var), - ('add', np.sum), - ('prod', np.prod), - ('min', np.min), - ('max', np.max), - ('first', lambda x: x.iloc[0]), - ('last', lambda x: x.iloc[-1]), ] - df = DataFrame(np.random.randn(1000)) - labels = np.random.randint(0, 50, size=1000).astype(float) - - for op, targop in ops: - result = df.groupby(labels)._cython_agg_general(op) - expected = df.groupby(labels).agg(targop) - try: - tm.assert_frame_equal(result, expected) - except BaseException as exc: - exc.args += ('operation: %s' % op, ) - raise - - def test_cython_agg_empty_buckets(self): - ops = [('mean', np.mean), - ('median', lambda x: np.median(x) if len(x) > 0 else np.nan), - ('var', lambda x: np.var(x, ddof=1)), - ('add', lambda x: np.sum(x) if len(x) > 0 else np.nan), - ('prod', np.prod), - ('min', np.min), - ('max', np.max), ] - - df = pd.DataFrame([11, 12, 13]) - grps = range(0, 55, 5) - - for op, targop in ops: - result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op) - expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x)) - try: - tm.assert_frame_equal(result, expected) - except BaseException as exc: - exc.args += ('operation: %s' % op,) - raise - - def test_agg_over_numpy_arrays(self): - # GH 3788 - df = pd.DataFrame([[1, np.array([10, 20, 30])], - [1, np.array([40, 50, 60])], - [2, np.array([20, 30, 40])]], - columns=['category', 'arraydata']) - result = df.groupby('category').agg(sum) - - expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] - expected_index = pd.Index([1, 2], name='category') - expected_column = ['arraydata'] - expected = pd.DataFrame(expected_data, - index=expected_index, - columns=expected_column) - - assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 51a10f4141ab5..979b2f7a539af 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -1,14 +1,15 @@ # -*- coding: utf-8 -*- +import pytest + from numpy import nan import numpy as np -from pandas.types.common import _ensure_int64 -from pandas import Index, isnull +from pandas.core.dtypes.common import _ensure_int64 +from pandas import Index, isna from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm -import pandas.lib as lib -import pandas.algos as algos +from pandas._libs import lib, groupby, reduction def test_series_grouper(): @@ -18,7 +19,7 @@ def test_series_grouper(): labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) - grouper = lib.SeriesGrouper(obj, np.mean, labels, 2, dummy) + grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy) result, counts = grouper.get_result() expected = np.array([obj[3:6].mean(), obj[6:].mean()]) @@ -35,7 +36,7 @@ def test_series_bin_grouper(): bins = np.array([3, 6]) - grouper = lib.SeriesBinGrouper(obj, np.mean, bins, dummy) + grouper = reduction.SeriesBinGrouper(obj, np.mean, bins, dummy) result, counts = grouper.get_result() expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) @@ -45,9 +46,9 @@ def test_series_bin_grouper(): assert_almost_equal(counts, exp_counts) -class TestBinGroupers(tm.TestCase): +class TestBinGroupers(object): - def setUp(self): + def setup_method(self, method): self.obj = np.random.randn(10, 1) self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64) self.bins = np.array([3, 6], dtype=np.int64) @@ -71,15 +72,15 @@ def test_generate_bins(self): bins = func(values, binner, closed='right') assert ((bins == np.array([3, 6])).all()) - self.assertRaises(ValueError, generate_bins_generic, values, [], - 'right') - self.assertRaises(ValueError, generate_bins_generic, values[:0], - binner, 'right') + pytest.raises(ValueError, generate_bins_generic, values, [], + 'right') + pytest.raises(ValueError, generate_bins_generic, values[:0], + binner, 'right') - self.assertRaises(ValueError, generate_bins_generic, values, [4], - 'right') - self.assertRaises(ValueError, generate_bins_generic, values, [-3, -1], - 'right') + pytest.raises(ValueError, generate_bins_generic, values, [4], + 'right') + pytest.raises(ValueError, generate_bins_generic, values, [-3, -1], + 'right') def test_group_ohlc(): @@ -92,11 +93,11 @@ def _check(dtype): labels = _ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) - func = getattr(algos, 'group_ohlc_%s' % dtype) + func = getattr(groupby, 'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) def _ohlc(group): - if isnull(group).all(): + if isna(group).all(): return np.repeat(nan, 4) return [group[0], group.max(), group.min(), group[-1]] @@ -116,36 +117,37 @@ def _ohlc(group): _check('float64') -class TestMoments(tm.TestCase): +class TestMoments(object): pass -class TestReducer(tm.TestCase): +class TestReducer(object): def test_int_index(self): from pandas.core.series import Series arr = np.random.randn(100, 4) - result = lib.reduce(arr, np.sum, labels=Index(np.arange(4))) + result = reduction.reduce(arr, np.sum, labels=Index(np.arange(4))) expected = arr.sum(0) assert_almost_equal(result, expected) - result = lib.reduce(arr, np.sum, axis=1, labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, + labels=Index(np.arange(100))) expected = arr.sum(1) assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(100)) - result = lib.reduce(arr, np.sum, dummy=dummy, - labels=Index(np.arange(4))) + result = reduction.reduce(arr, np.sum, dummy=dummy, + labels=Index(np.arange(4))) expected = arr.sum(0) assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(4)) - result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, - labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) expected = arr.sum(1) assert_almost_equal(result, expected) - result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, - labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) assert_almost_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index eebd0e0f490c1..bcd0da28b5a34 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -2,18 +2,156 @@ from __future__ import print_function from datetime import datetime +import pytest + import numpy as np from numpy import nan import pandas as pd from pandas import (Index, MultiIndex, CategoricalIndex, - DataFrame, Categorical, Series) + DataFrame, Categorical, Series, Interval) from pandas.util.testing import assert_frame_equal, assert_series_equal import pandas.util.testing as tm from .common import MixIn -class TestGroupByCategorical(MixIn, tm.TestCase): +class TestGroupByCategorical(MixIn): + + def test_groupby(self): + + cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"], + categories=["a", "b", "c", "d"], ordered=True) + data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) + + exp_index = CategoricalIndex(list('abcd'), name='b', ordered=True) + expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index) + result = data.groupby("b").mean() + tm.assert_frame_equal(result, expected) + + raw_cat1 = Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + raw_cat2 = Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]}) + + # single grouper + gb = df.groupby("A") + exp_idx = CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True) + expected = DataFrame({'values': Series([3, 7, 0], index=exp_idx)}) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + # multiple groupers + gb = df.groupby(['A', 'B']) + exp_index = pd.MultiIndex.from_product( + [Categorical(["a", "b", "z"], ordered=True), + Categorical(["c", "d", "y"], ordered=True)], + names=['A', 'B']) + expected = DataFrame({'values': [1, 2, np.nan, 3, 4, np.nan, + np.nan, np.nan, np.nan]}, + index=exp_index) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + # multiple groupers with a non-cat + df = df.copy() + df['C'] = ['foo', 'bar'] * 2 + gb = df.groupby(['A', 'B', 'C']) + exp_index = pd.MultiIndex.from_product( + [Categorical(["a", "b", "z"], ordered=True), + Categorical(["c", "d", "y"], ordered=True), + ['foo', 'bar']], + names=['A', 'B', 'C']) + expected = DataFrame({'values': Series( + np.nan, index=exp_index)}).sort_index() + expected.iloc[[1, 2, 7, 8], 0] = [1, 2, 3, 4] + result = gb.sum() + tm.assert_frame_equal(result, expected) + + # GH 8623 + x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], + [1, 'John P. Doe']], + columns=['person_id', 'person_name']) + x['person_name'] = Categorical(x.person_name) + + g = x.groupby(['person_id']) + result = g.transform(lambda x: x) + tm.assert_frame_equal(result, x[['person_name']]) + + result = x.drop_duplicates('person_name') + expected = x.iloc[[0, 1]] + tm.assert_frame_equal(result, expected) + + def f(x): + return x.drop_duplicates('person_name').iloc[0] + + result = g.apply(f) + expected = x.iloc[[0, 1]].copy() + expected.index = Index([1, 2], name='person_id') + expected['person_name'] = expected['person_name'].astype('object') + tm.assert_frame_equal(result, expected) + + # GH 9921 + # Monotonic + df = DataFrame({"a": [5, 15, 25]}) + c = pd.cut(df.a, bins=[0, 10, 20, 30, 40]) + + result = df.a.groupby(c).transform(sum) + tm.assert_series_equal(result, df['a']) + + tm.assert_series_equal( + df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a']) + tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']]) + tm.assert_frame_equal( + df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']]) + + # Filter + tm.assert_series_equal(df.a.groupby(c).filter(np.all), df['a']) + tm.assert_frame_equal(df.groupby(c).filter(np.all), df) + + # Non-monotonic + df = DataFrame({"a": [5, 15, 25, -5]}) + c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40]) + + result = df.a.groupby(c).transform(sum) + tm.assert_series_equal(result, df['a']) + + tm.assert_series_equal( + df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a']) + tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']]) + tm.assert_frame_equal( + df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']]) + + # GH 9603 + df = DataFrame({'a': [1, 0, 0, 0]}) + c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list('abcd'))) + result = df.groupby(c).apply(len) + + exp_index = CategoricalIndex( + c.values.categories, ordered=c.values.ordered) + expected = Series([1, 0, 0, 0], index=exp_index) + expected.index.name = 'a' + tm.assert_series_equal(result, expected) + + def test_groupby_sort(self): + + # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby + # This should result in a properly sorted Series so that the plot + # has a sorted x axis + # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') + + df = DataFrame({'value': np.random.randint(0, 10000, 100)}) + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=['value'], ascending=True) + df['value_group'] = pd.cut(df.value, range(0, 10500, 500), + right=False, labels=cat_labels) + + res = df.groupby(['value_group'])['value_group'].count() + exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] + exp.index = CategoricalIndex(exp.index, name=exp.index.name) + tm.assert_series_equal(res, exp) def test_level_groupby_get_group(self): # GH15155 @@ -46,7 +184,7 @@ def get_stats(group): 'mean': group.mean()} result = self.df.groupby(cats).D.apply(get_stats) - self.assertEqual(result.index.names[0], 'C') + assert result.index.names[0] == 'C' def test_apply_categorical_data(self): # GH 10138 @@ -113,14 +251,12 @@ def test_groupby_categorical(self): expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) exp = CategoricalIndex(expc) - self.assert_index_equal((desc_result.stack() - .index - .get_level_values(0)), exp) + tm.assert_index_equal((desc_result.stack().index + .get_level_values(0)), exp) exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4) - self.assert_index_equal((desc_result.stack() - .index - .get_level_values(1)), exp) + tm.assert_index_equal((desc_result.stack().index + .get_level_values(1)), exp) def test_groupby_datetime_categorical(self): # GH9049: ensure backward compatibility @@ -157,14 +293,12 @@ def test_groupby_datetime_categorical(self): expc = Categorical.from_codes( np.arange(4).repeat(8), levels, ordered=True) exp = CategoricalIndex(expc) - self.assert_index_equal((desc_result.stack() - .index - .get_level_values(0)), exp) + tm.assert_index_equal((desc_result.stack().index + .get_level_values(0)), exp) exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4) - self.assert_index_equal((desc_result.stack() - .index - .get_level_values(1)), exp) + tm.assert_index_equal((desc_result.stack().index + .get_level_values(1)), exp) def test_groupby_categorical_index(self): @@ -231,7 +365,7 @@ def test_groupby_bins_unequal_len(self): # len(bins) != len(series) here def f(): series.groupby(bins).mean() - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_groupby_multi_categorical_as_index(self): # GH13204 @@ -254,8 +388,8 @@ def test_groupby_multi_categorical_as_index(self): columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) - # another not in-axis grouper (conflicting names in index) - s = Series(['a', 'b', 'b'], name='cat') + # another not in-axis grouper + s = Series(['a', 'b', 'b'], name='cat2') result = df.groupby(['cat', s], as_index=False).sum() expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10.0, nan, nan, 22.0, nan, nan], @@ -263,6 +397,10 @@ def test_groupby_multi_categorical_as_index(self): columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) + # GH18872: conflicting names in desired index + pytest.raises(ValueError, lambda: df.groupby(['cat', + s.rename('cat')]).sum()) + # is original index dropped? expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10, 11, 10, 11, 10, 11], @@ -284,6 +422,30 @@ def test_groupby_multi_categorical_as_index(self): tm.assert_frame_equal(result, expected, check_index_type=True) + def test_groupby_preserve_categories(self): + # GH-13179 + categories = list('abc') + + # ordered=True + df = DataFrame({'A': pd.Categorical(list('ba'), + categories=categories, + ordered=True)}) + index = pd.CategoricalIndex(categories, categories, ordered=True) + tm.assert_index_equal(df.groupby('A', sort=True).first().index, index) + tm.assert_index_equal(df.groupby('A', sort=False).first().index, index) + + # ordered=False + df = DataFrame({'A': pd.Categorical(list('ba'), + categories=categories, + ordered=False)}) + sort_index = pd.CategoricalIndex(categories, categories, ordered=False) + nosort_index = pd.CategoricalIndex(list('bac'), list('bac'), + ordered=False) + tm.assert_index_equal(df.groupby('A', sort=True).first().index, + sort_index) + tm.assert_index_equal(df.groupby('A', sort=False).first().index, + nosort_index) + def test_groupby_preserve_categorical_dtype(self): # GH13743, GH13854 df = DataFrame({'A': [1, 2, 1, 1, 2], @@ -356,7 +518,7 @@ def test_groupby_categorical_no_compress(self): result = data.groupby("b").mean() result = result["a"].values exp = np.array([1, 2, 4, np.nan]) - self.assert_numpy_array_equal(result, exp) + tm.assert_numpy_array_equal(result, exp) def test_groupby_sort_categorical(self): # dataframe groupby sort was being ignored # GH 8868 @@ -495,7 +657,8 @@ def test_groupby_categorical_two_columns(self): res = groups_double_key.agg('mean') nan = np.nan idx = MultiIndex.from_product( - [Categorical(["(1, 2]", "(2, 3]", "(3, 6]"], ordered=True), + [Categorical([Interval(1, 2), Interval(2, 3), + Interval(3, 6)], ordered=True), [1, 2, 3, 4]], names=["cat", "C2"]) exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3, @@ -503,3 +666,53 @@ def test_groupby_categorical_two_columns(self): "C3": [nan, nan, nan, nan, 10, 100, nan, nan, nan, nan, 200, 34]}, index=idx) tm.assert_frame_equal(res, exp) + + def test_empty_sum(self): + # https://github.com/pandas-dev/pandas/issues/18678 + df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'], + categories=['a', 'b', 'c']), + 'B': [1, 2, 1]}) + expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A') + + # 0 by default + result = df.groupby("A").B.sum() + expected = pd.Series([3, 1, 0], expected_idx, name='B') + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df.groupby("A").B.sum(min_count=0) + expected = pd.Series([3, 1, 0], expected_idx, name='B') + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df.groupby("A").B.sum(min_count=1) + expected = pd.Series([3, 1, np.nan], expected_idx, name='B') + tm.assert_series_equal(result, expected) + + # min_count>1 + result = df.groupby("A").B.sum(min_count=2) + expected = pd.Series([3, np.nan, np.nan], expected_idx, name='B') + tm.assert_series_equal(result, expected) + + def test_empty_prod(self): + # https://github.com/pandas-dev/pandas/issues/18678 + df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'], + categories=['a', 'b', 'c']), + 'B': [1, 2, 1]}) + + expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A') + + # 1 by default + result = df.groupby("A").B.prod() + expected = pd.Series([2, 1, 1], expected_idx, name='B') + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df.groupby("A").B.prod(min_count=0) + expected = pd.Series([2, 1, 1], expected_idx, name='B') + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df.groupby("A").B.prod(min_count=1) + expected = pd.Series([2, 1, np.nan], expected_idx, name='B') + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py new file mode 100644 index 0000000000000..787d99086873e --- /dev/null +++ b/pandas/tests/groupby/test_counting.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function + +import numpy as np +import pytest + +from pandas import (DataFrame, Series, MultiIndex, Timestamp, Timedelta, + Period) +from pandas.util.testing import (assert_series_equal, assert_frame_equal) +from pandas.compat import (range, product as cart_product) + + +class TestCounting(object): + + def test_cumcount(self): + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 1, 2, 0, 3]) + + assert_series_equal(expected, g.cumcount()) + assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_empty(self): + ge = DataFrame().groupby(level=0) + se = Series().groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype='int64') + + assert_series_equal(e, ge.cumcount()) + assert_series_equal(e, se.cumcount()) + + def test_cumcount_dupe_index(self): + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=[0] * 5) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + assert_series_equal(expected, g.cumcount()) + assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=mi) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=mi) + + assert_series_equal(expected, g.cumcount()) + assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_groupby_not_col(self): + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=[0] * 5) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + assert_series_equal(expected, g.cumcount()) + assert_series_equal(expected, sg.cumcount()) + + def test_ngroup(self): + df = DataFrame({'A': list('aaaba')}) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 0, 0, 1, 0]) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_distinct(self): + df = DataFrame({'A': list('abcde')}) + g = df.groupby('A') + sg = g.A + + expected = Series(range(5), dtype='int64') + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_one_group(self): + df = DataFrame({'A': [0] * 5}) + g = df.groupby('A') + sg = g.A + + expected = Series([0] * 5) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_empty(self): + ge = DataFrame().groupby(level=0) + se = Series().groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype='int64') + + assert_series_equal(e, ge.ngroup()) + assert_series_equal(e, se.ngroup()) + + def test_ngroup_series_matches_frame(self): + df = DataFrame({'A': list('aaaba')}) + s = Series(list('aaaba')) + + assert_series_equal(df.groupby(s).ngroup(), + s.groupby(s).ngroup()) + + def test_ngroup_dupe_index(self): + df = DataFrame({'A': list('aaaba')}, index=[0] * 5) + g = df.groupby('A') + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame({'A': list('aaaba')}, index=mi) + g = df.groupby('A') + sg = g.A + expected = Series([0, 0, 0, 1, 0], index=mi) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_groupby_not_col(self): + df = DataFrame({'A': list('aaaba')}, index=[0] * 5) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + assert_series_equal(expected, g.ngroup()) + assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_descending(self): + df = DataFrame(['a', 'a', 'b', 'a', 'b'], columns=['A']) + g = df.groupby(['A']) + + ascending = Series([0, 0, 1, 0, 1]) + descending = Series([1, 1, 0, 1, 0]) + + assert_series_equal(descending, (g.ngroups - 1) - ascending) + assert_series_equal(ascending, g.ngroup(ascending=True)) + assert_series_equal(descending, g.ngroup(ascending=False)) + + def test_ngroup_matches_cumcount(self): + # verify one manually-worked out case works + df = DataFrame([['a', 'x'], ['a', 'y'], ['b', 'x'], + ['a', 'x'], ['b', 'y']], columns=['A', 'X']) + g = df.groupby(['A', 'X']) + g_ngroup = g.ngroup() + g_cumcount = g.cumcount() + expected_ngroup = Series([0, 1, 2, 0, 3]) + expected_cumcount = Series([0, 0, 0, 1, 0]) + + assert_series_equal(g_ngroup, expected_ngroup) + assert_series_equal(g_cumcount, expected_cumcount) + + def test_ngroup_cumcount_pair(self): + # brute force comparison for all small series + for p in cart_product(range(3), repeat=4): + df = DataFrame({'a': p}) + g = df.groupby(['a']) + + order = sorted(set(p)) + ngroupd = [order.index(val) for val in p] + cumcounted = [p[:i].count(val) for i, val in enumerate(p)] + + assert_series_equal(g.ngroup(), Series(ngroupd)) + assert_series_equal(g.cumcount(), Series(cumcounted)) + + def test_ngroup_respects_groupby_order(self): + np.random.seed(0) + df = DataFrame({'a': np.random.choice(list('abcdef'), 100)}) + for sort_flag in (False, True): + g = df.groupby(['a'], sort=sort_flag) + df['group_id'] = -1 + df['group_index'] = -1 + + for i, (_, group) in enumerate(g): + df.loc[group.index, 'group_id'] = i + for j, ind in enumerate(group.index): + df.loc[ind, 'group_index'] = j + + assert_series_equal(Series(df['group_id'].values), + g.ngroup()) + assert_series_equal(Series(df['group_index'].values), + g.cumcount()) + + @pytest.mark.parametrize('datetimelike', [ + [Timestamp('2016-05-%02d 20:09:25+00:00' % i) for i in range(1, 4)], + [Timestamp('2016-05-%02d 20:09:25' % i) for i in range(1, 4)], + [Timedelta(x, unit="h") for x in range(1, 4)], + [Period(freq="2W", year=2017, month=x) for x in range(1, 4)]]) + def test_count_with_datetimelike(self, datetimelike): + # test for #13393, where DataframeGroupBy.count() fails + # when counting a datetimelike column. + + df = DataFrame({'x': ['a', 'a', 'b'], 'y': datetimelike}) + res = df.groupby('x').count() + expected = DataFrame({'y': [2, 1]}, index=['a', 'b']) + expected.index.name = "x" + assert_frame_equal(expected, res) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 1640858802047..cac6b46af8f87 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -2,6 +2,7 @@ from __future__ import print_function from numpy import nan +import pytest from pandas import Timestamp from pandas.core.index import MultiIndex @@ -22,9 +23,9 @@ import pandas as pd -class TestGroupByFilter(tm.TestCase): +class TestGroupByFilter(object): - def setUp(self): + def setup_method(self, method): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() @@ -164,8 +165,8 @@ def raise_if_sum_is_zero(x): s = pd.Series([-1, 0, 1, 2]) grouper = s.apply(lambda x: x % 2) grouped = s.groupby(grouper) - self.assertRaises(TypeError, - lambda: grouped.filter(raise_if_sum_is_zero)) + pytest.raises(TypeError, + lambda: grouped.filter(raise_if_sum_is_zero)) def test_filter_with_axis_in_groupby(self): # issue 11041 @@ -186,16 +187,16 @@ def test_filter_bad_shapes(self): g_s = s.groupby(s) f = lambda x: x - self.assertRaises(TypeError, lambda: g_df.filter(f)) - self.assertRaises(TypeError, lambda: g_s.filter(f)) + pytest.raises(TypeError, lambda: g_df.filter(f)) + pytest.raises(TypeError, lambda: g_s.filter(f)) f = lambda x: x == 1 - self.assertRaises(TypeError, lambda: g_df.filter(f)) - self.assertRaises(TypeError, lambda: g_s.filter(f)) + pytest.raises(TypeError, lambda: g_df.filter(f)) + pytest.raises(TypeError, lambda: g_s.filter(f)) f = lambda x: np.outer(x, x) - self.assertRaises(TypeError, lambda: g_df.filter(f)) - self.assertRaises(TypeError, lambda: g_s.filter(f)) + pytest.raises(TypeError, lambda: g_df.filter(f)) + pytest.raises(TypeError, lambda: g_s.filter(f)) def test_filter_nan_is_false(self): df = DataFrame({'A': np.arange(8), @@ -216,6 +217,7 @@ def test_filter_against_workaround(self): grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 + old_way = s[grouped.transform(f).astype('bool')] new_way = grouped.filter(f) assert_series_equal(new_way.sort_values(), old_way.sort_values()) @@ -576,7 +578,8 @@ def test_filter_enforces_scalarness(self): ['worst', 'd', 'y'], ['best', 'd', 'z'], ], columns=['a', 'b', 'c']) - with tm.assertRaisesRegexp(TypeError, 'filter function returned a.*'): + with tm.assert_raises_regex(TypeError, + 'filter function returned a.*'): df.groupby('c').filter(lambda g: g['a'] == 'best') def test_filter_non_bool_raises(self): @@ -589,7 +592,8 @@ def test_filter_non_bool_raises(self): ['worst', 'd', 1], ['best', 'd', 1], ], columns=['a', 'b', 'c']) - with tm.assertRaisesRegexp(TypeError, 'filter function returned a.*'): + with tm.assert_raises_regex(TypeError, + 'filter function returned a.*'): df.groupby('a').filter(lambda g: g.c.mean()) def test_filter_dropna_with_empty_groups(self): @@ -616,24 +620,3 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): expected = f(df.groupby(tups)[field]) for k, v in compat.iteritems(expected): assert (result[k] == v) - - -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index - - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) - - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) - - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) - - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) diff --git a/pandas/tests/groupby/test_functional.py b/pandas/tests/groupby/test_functional.py new file mode 100644 index 0000000000000..b9718663570bd --- /dev/null +++ b/pandas/tests/groupby/test_functional.py @@ -0,0 +1,372 @@ +# -*- coding: utf-8 -*- + +""" test function application """ + +import pytest + +from string import ascii_lowercase +from pandas import (date_range, Timestamp, + Index, MultiIndex, DataFrame, Series) +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.compat import product as cart_product + +import numpy as np + +import pandas.util.testing as tm +import pandas as pd +from .common import MixIn + + +# describe +# -------------------------------- + +class TestDescribe(MixIn): + + def test_apply_describe_bug(self): + grouped = self.mframe.groupby(level='first') + grouped.describe() # it works! + + def test_series_describe_multikey(self): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + assert_series_equal(result['mean'], grouped.mean(), check_names=False) + assert_series_equal(result['std'], grouped.std(), check_names=False) + assert_series_equal(result['min'], grouped.min(), check_names=False) + + def test_series_describe_single(self): + ts = tm.makeTimeSeries() + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(lambda x: x.describe()) + expected = grouped.describe().stack() + assert_series_equal(result, expected) + + def test_series_index_name(self): + grouped = self.df.loc[:, ['C']].groupby(self.df['A']) + result = grouped.agg(lambda x: x.mean()) + assert result.index.name == 'A' + + def test_frame_describe_multikey(self): + grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + desc_groups = [] + for col in self.tsframe: + group = grouped[col].describe() + # GH 17464 - Remove duplicate MultiIndex levels + group_col = pd.MultiIndex( + levels=[[col], group.columns], + labels=[[0] * len(group.columns), range(len(group.columns))]) + group = pd.DataFrame(group.values, + columns=group_col, + index=group.index) + desc_groups.append(group) + expected = pd.concat(desc_groups, axis=1) + tm.assert_frame_equal(result, expected) + + groupedT = self.tsframe.groupby({'A': 0, 'B': 0, + 'C': 1, 'D': 1}, axis=1) + result = groupedT.describe() + expected = self.tsframe.describe().T + expected.index = pd.MultiIndex( + levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) + tm.assert_frame_equal(result, expected) + + def test_frame_describe_tupleindex(self): + + # GH 14848 - regression from 0.19.0 to 0.19.1 + df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3, + 'y': [10, 20, 30, 40, 50] * 3, + 'z': [100, 200, 300, 400, 500] * 3}) + df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 + df2 = df1.rename(columns={'k': 'key'}) + pytest.raises(ValueError, lambda: df1.groupby('k').describe()) + pytest.raises(ValueError, lambda: df2.groupby('key').describe()) + + def test_frame_describe_unstacked_format(self): + # GH 4792 + prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990, + pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499, + pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499} + volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, + pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, + pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000} + df = pd.DataFrame({'PRICE': prices, + 'VOLUME': volumes}) + result = df.groupby('PRICE').VOLUME.describe() + data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(), + df[df.PRICE == 25499].VOLUME.describe().values.tolist()] + expected = pd.DataFrame(data, + index=pd.Index([24990, 25499], name='PRICE'), + columns=['count', 'mean', 'std', 'min', + '25%', '50%', '75%', 'max']) + tm.assert_frame_equal(result, expected) + + +# nunique +# -------------------------------- + +class TestNUnique(MixIn): + + def test_series_groupby_nunique(self): + + def check_nunique(df, keys, as_index=True): + for sort, dropna in cart_product((False, True), repeat=2): + gr = df.groupby(keys, as_index=as_index, sort=sort) + left = gr['julie'].nunique(dropna=dropna) + + gr = df.groupby(keys, as_index=as_index, sort=sort) + right = gr['julie'].apply(Series.nunique, dropna=dropna) + if not as_index: + right = right.reset_index(drop=True) + + assert_series_equal(left, right, check_names=False) + + days = date_range('2015-08-23', periods=10) + + for n, m in cart_product(10 ** np.arange(2, 6), (10, 100, 1000)): + frame = DataFrame({ + 'jim': np.random.choice( + list(ascii_lowercase), n), + 'joe': np.random.choice(days, n), + 'julie': np.random.randint(0, m, n) + }) + + check_nunique(frame, ['jim']) + check_nunique(frame, ['jim', 'joe']) + + frame.loc[1::17, 'jim'] = None + frame.loc[3::37, 'joe'] = None + frame.loc[7::19, 'julie'] = None + frame.loc[8::19, 'julie'] = None + frame.loc[9::19, 'julie'] = None + + check_nunique(frame, ['jim']) + check_nunique(frame, ['jim', 'joe']) + check_nunique(frame, ['jim'], as_index=False) + check_nunique(frame, ['jim', 'joe'], as_index=False) + + def test_nunique(self): + df = DataFrame({ + 'A': list('abbacc'), + 'B': list('abxacc'), + 'C': list('abbacx'), + }) + + expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]}) + result = df.groupby('A', as_index=False).nunique() + tm.assert_frame_equal(result, expected) + + # as_index + expected.index = list('abc') + expected.index.name = 'A' + result = df.groupby('A').nunique() + tm.assert_frame_equal(result, expected) + + # with na + result = df.replace({'x': None}).groupby('A').nunique(dropna=False) + tm.assert_frame_equal(result, expected) + + # dropna + expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3}, + index=list('abc')) + expected.index.name = 'A' + result = df.replace({'x': None}).groupby('A').nunique() + tm.assert_frame_equal(result, expected) + + def test_nunique_with_object(self): + # GH 11077 + data = pd.DataFrame( + [[100, 1, 'Alice'], + [200, 2, 'Bob'], + [300, 3, 'Charlie'], + [-400, 4, 'Dan'], + [500, 5, 'Edith']], + columns=['amount', 'id', 'name'] + ) + + result = data.groupby(['id', 'amount'])['name'].nunique() + index = MultiIndex.from_arrays([data.id, data.amount]) + expected = pd.Series([1] * 5, name='name', index=index) + tm.assert_series_equal(result, expected) + + def test_nunique_with_empty_series(self): + # GH 12553 + data = pd.Series(name='name') + result = data.groupby(level=0).nunique() + expected = pd.Series(name='name', dtype='int64') + tm.assert_series_equal(result, expected) + + def test_nunique_with_timegrouper(self): + # GH 13453 + test = pd.DataFrame({ + 'time': [Timestamp('2016-06-28 09:35:35'), + Timestamp('2016-06-28 16:09:30'), + Timestamp('2016-06-28 16:46:28')], + 'data': ['1', '2', '3']}).set_index('time') + result = test.groupby(pd.Grouper(freq='h'))['data'].nunique() + expected = test.groupby( + pd.Grouper(freq='h') + )['data'].apply(pd.Series.nunique) + tm.assert_series_equal(result, expected) + + +# count +# -------------------------------- + +class TestCount(MixIn): + + def test_groupby_timedelta_cython_count(self): + df = DataFrame({'g': list('ab' * 2), + 'delt': np.arange(4).astype('timedelta64[ns]')}) + expected = Series([ + 2, 2 + ], index=pd.Index(['a', 'b'], name='g'), name='delt') + result = df.groupby('g').delt.count() + tm.assert_series_equal(expected, result) + + def test_count(self): + n = 1 << 15 + dr = date_range('2015-08-30', periods=n // 10, freq='T') + + df = DataFrame({ + '1st': np.random.choice( + list(ascii_lowercase), n), + '2nd': np.random.randint(0, 5, n), + '3rd': np.random.randn(n).round(3), + '4th': np.random.randint(-10, 10, n), + '5th': np.random.choice(dr, n), + '6th': np.random.randn(n).round(3), + '7th': np.random.randn(n).round(3), + '8th': np.random.choice(dr, n) - np.random.choice(dr, 1), + '9th': np.random.choice( + list(ascii_lowercase), n) + }) + + for col in df.columns.drop(['1st', '2nd', '4th']): + df.loc[np.random.choice(n, n // 10), col] = np.nan + + df['9th'] = df['9th'].astype('category') + + for key in '1st', '2nd', ['1st', '2nd']: + left = df.groupby(key).count() + right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1) + assert_frame_equal(left, right) + + # GH5610 + # count counts non-nulls + df = pd.DataFrame([[1, 2, 'foo'], + [1, np.nan, 'bar'], + [3, np.nan, np.nan]], + columns=['A', 'B', 'C']) + + count_as = df.groupby('A').count() + count_not_as = df.groupby('A', as_index=False).count() + + expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'], + index=[1, 3]) + expected.index.name = 'A' + assert_frame_equal(count_not_as, expected.reset_index()) + assert_frame_equal(count_as, expected) + + count_B = df.groupby('A')['B'].count() + assert_series_equal(count_B, expected['B']) + + def test_count_object(self): + df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3, 'c': [2] * 3 + [3] * 3}) + result = df.groupby('c').a.count() + expected = pd.Series([ + 3, 3 + ], index=pd.Index([2, 3], name='c'), name='a') + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({'a': ['a', np.nan, np.nan] + ['b'] * 3, + 'c': [2] * 3 + [3] * 3}) + result = df.groupby('c').a.count() + expected = pd.Series([ + 1, 3 + ], index=pd.Index([2, 3], name='c'), name='a') + tm.assert_series_equal(result, expected) + + def test_count_cross_type(self): # GH8169 + vals = np.hstack((np.random.randint(0, 5, (100, 2)), np.random.randint( + 0, 2, (100, 2)))) + + df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd']) + df[df == 2] = np.nan + expected = df.groupby(['c', 'd']).count() + + for t in ['float32', 'object']: + df['a'] = df['a'].astype(t) + df['b'] = df['b'].astype(t) + result = df.groupby(['c', 'd']).count() + tm.assert_frame_equal(result, expected) + + def test_lower_int_prec_count(self): + df = DataFrame({'a': np.array( + [0, 1, 2, 100], np.int8), + 'b': np.array( + [1, 2, 3, 6], np.uint32), + 'c': np.array( + [4, 5, 6, 8], np.int16), + 'grp': list('ab' * 2)}) + result = df.groupby('grp').count() + expected = DataFrame({'a': [2, 2], + 'b': [2, 2], + 'c': [2, 2]}, index=pd.Index(list('ab'), + name='grp')) + tm.assert_frame_equal(result, expected) + + def test_count_uses_size_on_exception(self): + class RaisingObjectException(Exception): + pass + + class RaisingObject(object): + + def __init__(self, msg='I will raise inside Cython'): + super(RaisingObject, self).__init__() + self.msg = msg + + def __eq__(self, other): + # gets called in Cython to check that raising calls the method + raise RaisingObjectException(self.msg) + + df = DataFrame({'a': [RaisingObject() for _ in range(4)], + 'grp': list('ab' * 2)}) + result = df.groupby('grp').count() + expected = DataFrame({'a': [2, 2]}, index=pd.Index( + list('ab'), name='grp')) + tm.assert_frame_equal(result, expected) + + +# size +# -------------------------------- + +class TestSize(MixIn): + + def test_size(self): + grouped = self.df.groupby(['A', 'B']) + result = grouped.size() + for key, group in grouped: + assert result[key] == len(group) + + grouped = self.df.groupby('A') + result = grouped.size() + for key, group in grouped: + assert result[key] == len(group) + + grouped = self.df.groupby('B') + result = grouped.size() + for key, group in grouped: + assert result[key] == len(group) + + df = DataFrame(np.random.choice(20, (1000, 3)), columns=list('abc')) + for sort, key in cart_product((False, True), ('a', 'b', ['a', 'b'])): + left = df.groupby(key, sort=sort).size() + right = df.groupby(key, sort=sort)['c'].apply(lambda a: a.shape[0]) + assert_series_equal(left, right, check_names=False) + + # GH11699 + df = DataFrame([], columns=['A', 'B']) + out = Series([], dtype='int64', index=Index([], name='A')) + assert_series_equal(df.groupby('A').size(), out) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d625fa07d932c..be0c32cefa6ff 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1,19 +1,20 @@ # -*- coding: utf-8 -*- from __future__ import print_function -from string import ascii_lowercase +import pytest + +from warnings import catch_warnings from datetime import datetime -from numpy import nan from pandas import (date_range, bdate_range, Timestamp, - isnull, Index, MultiIndex, DataFrame, Series, - concat, Panel) -from pandas.core.common import UnsupportedFunctionCall -from pandas.util.testing import (assert_panel_equal, assert_frame_equal, - assert_series_equal, assert_almost_equal, - assert_index_equal, assertRaisesRegexp) -from pandas.compat import (range, long, lrange, StringIO, lmap, lzip, map, zip, - builtins, OrderedDict, product as cart_product) + Index, MultiIndex, DataFrame, Series, + concat, Panel, DatetimeIndex, read_csv) +from pandas.core.dtypes.missing import isna +from pandas.errors import UnsupportedFunctionCall, PerformanceWarning +from pandas.util.testing import (assert_frame_equal, assert_index_equal, + assert_series_equal, assert_almost_equal) +from pandas.compat import (range, lrange, StringIO, lmap, lzip, map, zip, + builtins, OrderedDict) from pandas import compat from collections import defaultdict import pandas.core.common as com @@ -25,7 +26,16 @@ from .common import MixIn -class TestGroupBy(MixIn, tm.TestCase): +class TestGrouper(object): + + def test_repr(self): + # GH18203 + result = repr(pd.Grouper(key='A', level='B')) + expected = "Grouper(key='A', level='B', axis=0, sort=False)" + assert result == expected + + +class TestGroupBy(MixIn): def test_basic(self): def checkit(dtype): @@ -38,10 +48,10 @@ def checkit(dtype): grouped = data.groupby(lambda x: x // 3) for k, v in grouped: - self.assertEqual(len(v), 3) + assert len(v) == 3 agged = grouped.aggregate(np.mean) - self.assertEqual(agged[1], 1) + assert agged[1] == 1 assert_series_equal(agged, grouped.agg(np.mean)) # shorthand assert_series_equal(agged, grouped.mean()) @@ -49,7 +59,7 @@ def checkit(dtype): expected = grouped.apply(lambda x: x * x.sum()) transformed = grouped.transform(lambda x: x * x.sum()) - self.assertEqual(transformed[7], 12) + assert transformed[7] == 12 assert_series_equal(transformed, expected) value_grouped = data.groupby(data) @@ -58,648 +68,21 @@ def checkit(dtype): # complex agg agged = grouped.aggregate([np.mean, np.std]) - agged = grouped.aggregate({'one': np.mean, 'two': np.std}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + agged = grouped.aggregate({'one': np.mean, 'two': np.std}) group_constants = {0: 10, 1: 20, 2: 30} agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) - self.assertEqual(agged[1], 21) + assert agged[1] == 21 # corner cases - self.assertRaises(Exception, grouped.aggregate, lambda x: x * 2) + pytest.raises(Exception, grouped.aggregate, lambda x: x * 2) for dtype in ['int64', 'int32', 'float64', 'float32']: checkit(dtype) - def test_select_bad_cols(self): - df = DataFrame([[1, 2]], columns=['A', 'B']) - g = df.groupby('A') - self.assertRaises(KeyError, g.__getitem__, ['C']) # g[['C']] - - self.assertRaises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] - with assertRaisesRegexp(KeyError, '^[^A]+$'): - # A should not be referenced as a bad column... - # will have to rethink regex if you change message! - g[['A', 'C']] - - def test_first_last_nth(self): - # tests for first / last / nth - grouped = self.df.groupby('A') - first = grouped.first() - expected = self.df.loc[[1, 0], ['B', 'C', 'D']] - expected.index = Index(['bar', 'foo'], name='A') - expected = expected.sort_index() - assert_frame_equal(first, expected) - - nth = grouped.nth(0) - assert_frame_equal(nth, expected) - - last = grouped.last() - expected = self.df.loc[[5, 7], ['B', 'C', 'D']] - expected.index = Index(['bar', 'foo'], name='A') - assert_frame_equal(last, expected) - - nth = grouped.nth(-1) - assert_frame_equal(nth, expected) - - nth = grouped.nth(1) - expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy() - expected.index = Index(['foo', 'bar'], name='A') - expected = expected.sort_index() - assert_frame_equal(nth, expected) - - # it works! - grouped['B'].first() - grouped['B'].last() - grouped['B'].nth(0) - - self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan - self.assertTrue(isnull(grouped['B'].first()['foo'])) - self.assertTrue(isnull(grouped['B'].last()['foo'])) - self.assertTrue(isnull(grouped['B'].nth(0)['foo'])) - - # v0.14.0 whatsnew - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) - g = df.groupby('A') - result = g.first() - expected = df.iloc[[1, 2]].set_index('A') - assert_frame_equal(result, expected) - - expected = df.iloc[[1, 2]].set_index('A') - result = g.nth(0, dropna='any') - assert_frame_equal(result, expected) - - def test_first_last_nth_dtypes(self): - - df = self.df_mixed_floats.copy() - df['E'] = True - df['F'] = 1 - - # tests for first / last / nth - grouped = df.groupby('A') - first = grouped.first() - expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']] - expected.index = Index(['bar', 'foo'], name='A') - expected = expected.sort_index() - assert_frame_equal(first, expected) - - last = grouped.last() - expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']] - expected.index = Index(['bar', 'foo'], name='A') - expected = expected.sort_index() - assert_frame_equal(last, expected) - - nth = grouped.nth(1) - expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']] - expected.index = Index(['bar', 'foo'], name='A') - expected = expected.sort_index() - assert_frame_equal(nth, expected) - - # GH 2763, first/last shifting dtypes - idx = lrange(10) - idx.append(9) - s = Series(data=lrange(11), index=idx, name='IntCol') - self.assertEqual(s.dtype, 'int64') - f = s.groupby(level=0).first() - self.assertEqual(f.dtype, 'int64') - - def test_nth(self): - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) - g = df.groupby('A') - - assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A')) - assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A')) - assert_frame_equal(g.nth(2), df.loc[[]].set_index('A')) - assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A')) - assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A')) - assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A')) - assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]]) - assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]]) - assert_frame_equal(g[['B']].nth(0), - df.loc[[0, 2], ['A', 'B']].set_index('A')) - - exp = df.set_index('A') - assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]]) - assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]]) - - exp['B'] = np.nan - assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]]) - assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]]) - - # out of bounds, regression from 0.13.1 - # GH 6621 - df = DataFrame({'color': {0: 'green', - 1: 'green', - 2: 'red', - 3: 'red', - 4: 'red'}, - 'food': {0: 'ham', - 1: 'eggs', - 2: 'eggs', - 3: 'ham', - 4: 'pork'}, - 'two': {0: 1.5456590000000001, - 1: -0.070345000000000005, - 2: -2.4004539999999999, - 3: 0.46206000000000003, - 4: 0.52350799999999997}, - 'one': {0: 0.56573799999999996, - 1: -0.9742360000000001, - 2: 1.033801, - 3: -0.78543499999999999, - 4: 0.70422799999999997}}).set_index(['color', - 'food']) - - result = df.groupby(level=0, as_index=False).nth(2) - expected = df.iloc[[-1]] - assert_frame_equal(result, expected) - - result = df.groupby(level=0, as_index=False).nth(3) - expected = df.loc[[]] - assert_frame_equal(result, expected) - - # GH 7559 - # from the vbench - df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64') - s = df[1] - g = df[0] - expected = s.groupby(g).first() - expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) - assert_series_equal(expected2, expected, check_names=False) - self.assertTrue(expected.name, 0) - self.assertEqual(expected.name, 1) - - # validate first - v = s[g == 1].iloc[0] - self.assertEqual(expected.iloc[0], v) - self.assertEqual(expected2.iloc[0], v) - - # this is NOT the same as .first (as sorted is default!) - # as it keeps the order in the series (and not the group order) - # related GH 7287 - expected = s.groupby(g, sort=False).first() - result = s.groupby(g, sort=False).nth(0, dropna='all') - assert_series_equal(result, expected) - - # doc example - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) - g = df.groupby('A') - result = g.B.nth(0, dropna=True) - expected = g.B.first() - assert_series_equal(result, expected) - - # test multiple nth values - df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], - columns=['A', 'B']) - g = df.groupby('A') - - assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A')) - assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A')) - assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A')) - assert_frame_equal( - g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) - assert_frame_equal( - g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) - assert_frame_equal( - g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) - assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A')) - assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A')) - - business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', - freq='B') - df = DataFrame(1, index=business_dates, columns=['a', 'b']) - # get the first, fourth and last two business days for each month - key = (df.index.year, df.index.month) - result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) - expected_dates = pd.to_datetime( - ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', - '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5', - '2014/6/27', '2014/6/30']) - expected = DataFrame(1, columns=['a', 'b'], index=expected_dates) - assert_frame_equal(result, expected) - - def test_nth_multi_index(self): - # PR 9090, related to issue 8979 - # test nth on MultiIndex, should match .first() - grouped = self.three_group.groupby(['A', 'B']) - result = grouped.nth(0) - expected = grouped.first() - assert_frame_equal(result, expected) - - def test_nth_multi_index_as_expected(self): - # PR 9090, related to issue 8979 - # test nth on MultiIndex - three_group = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny']}) - grouped = three_group.groupby(['A', 'B']) - result = grouped.nth(0) - expected = DataFrame( - {'C': ['dull', 'dull', 'dull', 'dull']}, - index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], - ['one', 'two', 'one', 'two']], - names=['A', 'B'])) - assert_frame_equal(result, expected) - - def test_group_selection_cache(self): - # GH 12839 nth, head, and tail should return same result consistently - df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) - expected = df.iloc[[0, 2]].set_index('A') - - g = df.groupby('A') - result1 = g.head(n=2) - result2 = g.nth(0) - assert_frame_equal(result1, df) - assert_frame_equal(result2, expected) - - g = df.groupby('A') - result1 = g.tail(n=2) - result2 = g.nth(0) - assert_frame_equal(result1, df) - assert_frame_equal(result2, expected) - - g = df.groupby('A') - result1 = g.nth(0) - result2 = g.head(n=2) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, df) - - g = df.groupby('A') - result1 = g.nth(0) - result2 = g.tail(n=2) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, df) - - def test_grouper_index_types(self): - # related GH5375 - # groupby misbehaving when using a Floatlike index - df = DataFrame(np.arange(10).reshape(5, 2), columns=list('AB')) - for index in [tm.makeFloatIndex, tm.makeStringIndex, - tm.makeUnicodeIndex, tm.makeIntIndex, tm.makeDateIndex, - tm.makePeriodIndex]: - - df.index = index(len(df)) - df.groupby(list('abcde')).apply(lambda x: x) - - df.index = list(reversed(df.index.tolist())) - df.groupby(list('abcde')).apply(lambda x: x) - - def test_grouper_multilevel_freq(self): - - # GH 7885 - # with level and freq specified in a pd.Grouper - from datetime import date, timedelta - d0 = date.today() - timedelta(days=14) - dates = date_range(d0, date.today()) - date_index = pd.MultiIndex.from_product( - [dates, dates], names=['foo', 'bar']) - df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index) - - # Check string level - expected = df.reset_index().groupby([pd.Grouper( - key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum() - # reset index changes columns dtype to object - expected.columns = pd.Index([0], dtype='int64') - - result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper( - level='bar', freq='W')]).sum() - assert_frame_equal(result, expected) - - # Check integer level - result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper( - level=1, freq='W')]).sum() - assert_frame_equal(result, expected) - - def test_grouper_creation_bug(self): - - # GH 8795 - df = DataFrame({'A': [0, 0, 1, 1, 2, 2], 'B': [1, 2, 3, 4, 5, 6]}) - g = df.groupby('A') - expected = g.sum() - - g = df.groupby(pd.Grouper(key='A')) - result = g.sum() - assert_frame_equal(result, expected) - - result = g.apply(lambda x: x.sum()) - assert_frame_equal(result, expected) - - g = df.groupby(pd.Grouper(key='A', axis=0)) - result = g.sum() - assert_frame_equal(result, expected) - - # GH14334 - # pd.Grouper(key=...) may be passed in a list - df = DataFrame({'A': [0, 0, 0, 1, 1, 1], - 'B': [1, 1, 2, 2, 3, 3], - 'C': [1, 2, 3, 4, 5, 6]}) - # Group by single column - expected = df.groupby('A').sum() - g = df.groupby([pd.Grouper(key='A')]) - result = g.sum() - assert_frame_equal(result, expected) - - # Group by two columns - # using a combination of strings and Grouper objects - expected = df.groupby(['A', 'B']).sum() - - # Group with two Grouper objects - g = df.groupby([pd.Grouper(key='A'), pd.Grouper(key='B')]) - result = g.sum() - assert_frame_equal(result, expected) - - # Group with a string and a Grouper object - g = df.groupby(['A', pd.Grouper(key='B')]) - result = g.sum() - assert_frame_equal(result, expected) - - # Group with a Grouper object and a string - g = df.groupby([pd.Grouper(key='A'), 'B']) - result = g.sum() - assert_frame_equal(result, expected) - - # GH8866 - s = Series(np.arange(8, dtype='int64'), - index=pd.MultiIndex.from_product( - [list('ab'), range(2), - date_range('20130101', periods=2)], - names=['one', 'two', 'three'])) - result = s.groupby(pd.Grouper(level='three', freq='M')).sum() - expected = Series([28], index=Index( - [Timestamp('2013-01-31')], freq='M', name='three')) - assert_series_equal(result, expected) - - # just specifying a level breaks - result = s.groupby(pd.Grouper(level='one')).sum() - expected = s.groupby(level='one').sum() - assert_series_equal(result, expected) - - def test_grouper_column_and_index(self): - # GH 14327 - - # Grouping a multi-index frame by a column and an index level should - # be equivalent to resetting the index and grouping by two columns - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), - ('b', 1), ('b', 2), ('b', 3)]) - idx.names = ['outer', 'inner'] - df_multi = pd.DataFrame({"A": np.arange(6), - 'B': ['one', 'one', 'two', - 'two', 'one', 'one']}, - index=idx) - result = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() - expected = df_multi.reset_index().groupby(['B', 'inner']).mean() - assert_frame_equal(result, expected) - - # Test the reverse grouping order - result = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() - expected = df_multi.reset_index().groupby(['inner', 'B']).mean() - assert_frame_equal(result, expected) - - # Grouping a single-index frame by a column and the index should - # be equivalent to resetting the index and grouping by two columns - df_single = df_multi.reset_index('outer') - result = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() - expected = df_single.reset_index().groupby(['B', 'inner']).mean() - assert_frame_equal(result, expected) - - # Test the reverse grouping order - result = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() - expected = df_single.reset_index().groupby(['inner', 'B']).mean() - assert_frame_equal(result, expected) - - def test_grouper_index_level_as_string(self): - # GH 5677, allow strings passed as the `by` parameter to reference - # columns or index levels - - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), - ('b', 1), ('b', 2), ('b', 3)]) - idx.names = ['outer', 'inner'] - df_multi = pd.DataFrame({"A": np.arange(6), - 'B': ['one', 'one', 'two', - 'two', 'one', 'one']}, - index=idx) - - df_single = df_multi.reset_index('outer') - - # Column and Index on MultiIndex - result = df_multi.groupby(['B', 'inner']).mean() - expected = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() - assert_frame_equal(result, expected) - - # Index and Column on MultiIndex - result = df_multi.groupby(['inner', 'B']).mean() - expected = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() - assert_frame_equal(result, expected) - - # Column and Index on single Index - result = df_single.groupby(['B', 'inner']).mean() - expected = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() - assert_frame_equal(result, expected) - - # Index and Column on single Index - result = df_single.groupby(['inner', 'B']).mean() - expected = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() - assert_frame_equal(result, expected) - - # Single element list of Index on MultiIndex - result = df_multi.groupby(['inner']).mean() - expected = df_multi.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Single element list of Index on single Index - result = df_single.groupby(['inner']).mean() - expected = df_single.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Index on MultiIndex - result = df_multi.groupby('inner').mean() - expected = df_multi.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Index on single Index - result = df_single.groupby('inner').mean() - expected = df_single.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - def test_grouper_column_index_level_precedence(self): - # GH 5677, when a string passed as the `by` parameter - # matches a column and an index level the column takes - # precedence - - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), - ('b', 1), ('b', 2), ('b', 3)]) - idx.names = ['outer', 'inner'] - df_multi_both = pd.DataFrame({"A": np.arange(6), - 'B': ['one', 'one', 'two', - 'two', 'one', 'one'], - 'inner': [1, 1, 1, 1, 1, 1]}, - index=idx) - - df_single_both = df_multi_both.reset_index('outer') - - # Group MultiIndex by single key - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby('inner').mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - self.assertFalse(result.index.equals(not_expected.index)) - - # Group single Index by single key - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby('inner').mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - self.assertFalse(result.index.equals(not_expected.index)) - - # Group MultiIndex by single key list - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['inner']).mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - self.assertFalse(result.index.equals(not_expected.index)) - - # Group single Index by single key list - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['inner']).mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - self.assertFalse(result.index.equals(not_expected.index)) - - # Group MultiIndex by two keys (1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['B', 'inner']).mean() - - expected = df_multi_both.groupby(['B', - pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(['B', - pd.Grouper(level='inner') - ]).mean() - self.assertFalse(result.index.equals(not_expected.index)) - - # Group MultiIndex by two keys (2) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['inner', 'B']).mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner'), - 'B']).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby([pd.Grouper(level='inner'), - 'B']).mean() - self.assertFalse(result.index.equals(not_expected.index)) - - # Group single Index by two keys (1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['B', 'inner']).mean() - - expected = df_single_both.groupby(['B', - pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(['B', - pd.Grouper(level='inner') - ]).mean() - self.assertFalse(result.index.equals(not_expected.index)) - - # Group single Index by two keys (2) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['inner', 'B']).mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner'), - 'B']).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby([pd.Grouper(level='inner'), - 'B']).mean() - self.assertFalse(result.index.equals(not_expected.index)) - - def test_grouper_getting_correct_binner(self): - - # GH 10063 - # using a non-time-based grouper and a time-based grouper - # and specifying levels - df = DataFrame({'A': 1}, index=pd.MultiIndex.from_product( - [list('ab'), date_range('20130101', periods=80)], names=['one', - 'two'])) - result = df.groupby([pd.Grouper(level='one'), pd.Grouper( - level='two', freq='M')]).sum() - expected = DataFrame({'A': [31, 28, 21, 31, 28, 21]}, - index=MultiIndex.from_product( - [list('ab'), - date_range('20130101', freq='M', periods=3)], - names=['one', 'two'])) - assert_frame_equal(result, expected) - - def test_grouper_iter(self): - self.assertEqual(sorted(self.df.groupby('A').grouper), ['bar', 'foo']) - - def test_empty_groups(self): - # GH # 1048 - self.assertRaises(ValueError, self.df.groupby, []) - - def test_groupby_grouper(self): - grouped = self.df.groupby('A') - - result = self.df.groupby(grouped.grouper).mean() - expected = grouped.mean() - assert_frame_equal(result, expected) - - def test_groupby_duplicated_column_errormsg(self): - # GH7511 - df = DataFrame(columns=['A', 'B', 'A', 'C'], - data=[range(4), range(2, 6), range(0, 8, 2)]) - - self.assertRaises(ValueError, df.groupby, 'A') - self.assertRaises(ValueError, df.groupby, ['A', 'B']) - - grouped = df.groupby('B') - c = grouped.count() - self.assertTrue(c.columns.nlevels == 1) - self.assertTrue(c.columns.size == 3) - - def test_groupby_dict_mapping(self): - # GH #679 - from pandas import Series - s = Series({'T1': 5}) - result = s.groupby({'T1': 'T2'}).agg(sum) - expected = s.groupby(['T2']).agg(sum) - assert_series_equal(result, expected) - - s = Series([1., 2., 3., 4.], index=list('abcd')) - mapping = {'a': 0, 'b': 0, 'c': 1, 'd': 1} - - result = s.groupby(mapping).mean() - result2 = s.groupby(mapping).agg(np.mean) - expected = s.groupby([0, 0, 1, 1]).mean() - expected2 = s.groupby([0, 0, 1, 1]).mean() - assert_series_equal(result, expected) - assert_series_equal(result, result2) - assert_series_equal(result, expected2) - - def test_groupby_grouper_f_sanity_checked(self): - dates = date_range('01-Jan-2013', periods=12, freq='MS') - ts = Series(np.random.randn(12), index=dates) - - # GH3035 - # index.map is used to apply grouper to the index - # if it fails on the elements, map tries it on the entire index as - # a sequence. That can yield invalid results that cause trouble - # down the line. - # the surprise comes from using key[0:6] rather then str(key)[0:6] - # when the elements are Timestamp. - # the result is Index[0:6], very confusing. - - self.assertRaises(AssertionError, ts.groupby, lambda key: key[0:6]) - def test_groupby_nonobject_dtype(self): key = self.mframe.index.labels[0] grouped = self.mframe.groupby(key) @@ -717,9 +100,9 @@ def max_value(group): applied = df.groupby('A').apply(max_value) result = applied.get_dtype_counts().sort_values() - expected = Series({'object': 2, - 'float64': 2, - 'int64': 1}).sort_values() + expected = Series({'float64': 2, + 'int64': 1, + 'object': 2}).sort_values() assert_series_equal(result, expected) def test_groupby_return_type(self): @@ -736,7 +119,7 @@ def func(dataf): return dataf["val2"] - dataf["val2"].mean() result = df1.groupby("val1", squeeze=True).apply(func) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) df2 = DataFrame( [{"val1": 1, "val2": 20}, @@ -749,12 +132,12 @@ def func(dataf): return dataf["val2"] - dataf["val2"].mean() result = df2.groupby("val1", squeeze=True).apply(func) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) # GH3596, return a consistent type (regression in 0.11 from 0.10.1) df = DataFrame([[1, 1], [1, 1]], columns=['X', 'Y']) result = df.groupby('X', squeeze=False).count() - tm.assertIsInstance(result, DataFrame) + assert isinstance(result, DataFrame) # GH5592 # inconcistent return type @@ -813,82 +196,6 @@ def f(grp): e.name = None assert_series_equal(result, e) - def test_get_group(self): - wp = tm.makePanel() - grouped = wp.groupby(lambda x: x.month, axis='major') - - gp = grouped.get_group(1) - expected = wp.reindex(major=[x for x in wp.major_axis if x.month == 1]) - assert_panel_equal(gp, expected) - - # GH 5267 - # be datelike friendly - df = DataFrame({'DATE': pd.to_datetime( - ['10-Oct-2013', '10-Oct-2013', '10-Oct-2013', '11-Oct-2013', - '11-Oct-2013', '11-Oct-2013']), - 'label': ['foo', 'foo', 'bar', 'foo', 'foo', 'bar'], - 'VAL': [1, 2, 3, 4, 5, 6]}) - - g = df.groupby('DATE') - key = list(g.groups)[0] - result1 = g.get_group(key) - result2 = g.get_group(Timestamp(key).to_pydatetime()) - result3 = g.get_group(str(Timestamp(key))) - assert_frame_equal(result1, result2) - assert_frame_equal(result1, result3) - - g = df.groupby(['DATE', 'label']) - - key = list(g.groups)[0] - result1 = g.get_group(key) - result2 = g.get_group((Timestamp(key[0]).to_pydatetime(), key[1])) - result3 = g.get_group((str(Timestamp(key[0])), key[1])) - assert_frame_equal(result1, result2) - assert_frame_equal(result1, result3) - - # must pass a same-length tuple with multiple keys - self.assertRaises(ValueError, lambda: g.get_group('foo')) - self.assertRaises(ValueError, lambda: g.get_group(('foo'))) - self.assertRaises(ValueError, - lambda: g.get_group(('foo', 'bar', 'baz'))) - - def test_get_group_empty_bins(self): - - d = pd.DataFrame([3, 1, 7, 6]) - bins = [0, 5, 10, 15] - g = d.groupby(pd.cut(d[0], bins)) - - result = g.get_group('(0, 5]') - expected = DataFrame([3, 1], index=[0, 1]) - assert_frame_equal(result, expected) - - self.assertRaises(KeyError, lambda: g.get_group('(10, 15]')) - - def test_get_group_grouped_by_tuple(self): - # GH 8121 - df = DataFrame([[(1, ), (1, 2), (1, ), (1, 2)]], index=['ids']).T - gr = df.groupby('ids') - expected = DataFrame({'ids': [(1, ), (1, )]}, index=[0, 2]) - result = gr.get_group((1, )) - assert_frame_equal(result, expected) - - dt = pd.to_datetime(['2010-01-01', '2010-01-02', '2010-01-01', - '2010-01-02']) - df = DataFrame({'ids': [(x, ) for x in dt]}) - gr = df.groupby('ids') - result = gr.get_group(('2010-01-01', )) - expected = DataFrame({'ids': [(dt[0], ), (dt[0], )]}, index=[0, 2]) - assert_frame_equal(result, expected) - - def test_grouping_error_on_multidim_input(self): - from pandas.core.groupby import Grouping - self.assertRaises(ValueError, - Grouping, self.df.index, self.df[['A', 'A']]) - - def test_apply_describe_bug(self): - grouped = self.mframe.groupby(level='first') - grouped.describe() # it works! - def test_apply_issues(self): # GH 5788 @@ -938,7 +245,7 @@ def func_with_no_date(batch): return pd.Series({'c': 2}) def func_with_date(batch): - return pd.Series({'c': 2, 'b': datetime(2015, 1, 1)}) + return pd.Series({'b': datetime(2015, 1, 1), 'c': 2}) dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date) dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1]) @@ -950,39 +257,24 @@ def func_with_date(batch): 'c': 2}, index=[1]) dfg_conversion_expected.index.name = 'a' - self.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected) - self.assert_frame_equal(dfg_conversion, dfg_conversion_expected) + tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected) + tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected) def test_len(self): df = tm.makeTimeDataFrame() grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) - self.assertEqual(len(grouped), len(df)) + assert len(grouped) == len(df) grouped = df.groupby([lambda x: x.year, lambda x: x.month]) - expected = len(set([(x.year, x.month) for x in df.index])) - self.assertEqual(len(grouped), expected) + expected = len({(x.year, x.month) for x in df.index}) + assert len(grouped) == expected # issue 11016 df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3])) - self.assertEqual(len(df.groupby(('a'))), 0) - self.assertEqual(len(df.groupby(('b'))), 3) - self.assertEqual(len(df.groupby(('a', 'b'))), 3) - - def test_groups(self): - grouped = self.df.groupby(['A']) - groups = grouped.groups - self.assertIs(groups, grouped.groups) # caching works - - for k, v in compat.iteritems(grouped.groups): - self.assertTrue((self.df.loc[v]['A'] == k).all()) - - grouped = self.df.groupby(['A', 'B']) - groups = grouped.groups - self.assertIs(groups, grouped.groups) # caching works - for k, v in compat.iteritems(grouped.groups): - self.assertTrue((self.df.loc[v]['A'] == k[0]).all()) - self.assertTrue((self.df.loc[v]['B'] == k[1]).all()) + assert len(df.groupby(('a'))) == 0 + assert len(df.groupby(('b'))) == 3 + assert len(df.groupby(['a', 'b'])) == 3 def test_basic_regression(self): # regression @@ -995,13 +287,13 @@ def test_basic_regression(self): grouped = result.groupby(groupings) grouped.mean() - def test_with_na(self): + def test_with_na_groups(self): index = Index(np.arange(10)) for dtype in ['float64', 'float32', 'int64', 'int32', 'int16', 'int8']: values = Series(np.ones(10), index, dtype=dtype) - labels = Series([nan, 'foo', 'bar', 'bar', nan, nan, 'bar', - 'bar', nan, 'foo'], index=index) + labels = Series([np.nan, 'foo', 'bar', 'bar', np.nan, np.nan, + 'bar', 'bar', np.nan, 'foo'], index=index) # this SHOULD be an int grouped = values.groupby(labels) @@ -1010,9 +302,9 @@ def test_with_na(self): assert_series_equal(agged, expected, check_dtype=False) - # self.assertTrue(issubclass(agged.dtype.type, np.integer)) + # assert issubclass(agged.dtype.type, np.integer) - # explicity return a float from my function + # explicitly return a float from my function def f(x): return float(len(x)) @@ -1020,7 +312,7 @@ def f(x): expected = Series([4, 2], index=['bar', 'foo']) assert_series_equal(agged, expected, check_dtype=False) - self.assertTrue(issubclass(agged.dtype.type, np.dtype(dtype).type)) + assert issubclass(agged.dtype.type, np.dtype(dtype).type) def test_indices_concatenation_order(self): @@ -1065,12 +357,12 @@ def f3(x): assert_frame_equal(result1, result2) # should fail (not the same number of levels) - self.assertRaises(AssertionError, df.groupby('a').apply, f2) - self.assertRaises(AssertionError, df2.groupby('a').apply, f2) + pytest.raises(AssertionError, df.groupby('a').apply, f2) + pytest.raises(AssertionError, df2.groupby('a').apply, f2) # should fail (incorrect shape) - self.assertRaises(AssertionError, df.groupby('a').apply, f3) - self.assertRaises(AssertionError, df2.groupby('a').apply, f3) + pytest.raises(AssertionError, df.groupby('a').apply, f3) + pytest.raises(AssertionError, df2.groupby('a').apply, f3) def test_attr_wrapper(self): grouped = self.ts.groupby(lambda x: x.weekday()) @@ -1092,90 +384,15 @@ def test_attr_wrapper(self): expected = grouped.agg(lambda x: x.dtype) # make sure raises error - self.assertRaises(AttributeError, getattr, grouped, 'foo') - - def test_series_describe_multikey(self): - ts = tm.makeTimeSeries() - grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) - result = grouped.describe() - assert_series_equal(result['mean'], grouped.mean(), check_names=False) - assert_series_equal(result['std'], grouped.std(), check_names=False) - assert_series_equal(result['min'], grouped.min(), check_names=False) - - def test_series_describe_single(self): - ts = tm.makeTimeSeries() - grouped = ts.groupby(lambda x: x.month) - result = grouped.apply(lambda x: x.describe()) - expected = grouped.describe().stack() - assert_series_equal(result, expected) - - def test_series_index_name(self): - grouped = self.df.loc[:, ['C']].groupby(self.df['A']) - result = grouped.agg(lambda x: x.mean()) - self.assertEqual(result.index.name, 'A') - - def test_frame_describe_multikey(self): - grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) - result = grouped.describe() - desc_groups = [] - for col in self.tsframe: - group = grouped[col].describe() - group_col = pd.MultiIndex([[col] * len(group.columns), - group.columns], - [[0] * len(group.columns), - range(len(group.columns))]) - group = pd.DataFrame(group.values, - columns=group_col, - index=group.index) - desc_groups.append(group) - expected = pd.concat(desc_groups, axis=1) - tm.assert_frame_equal(result, expected) - - groupedT = self.tsframe.groupby({'A': 0, 'B': 0, - 'C': 1, 'D': 1}, axis=1) - result = groupedT.describe() - expected = self.tsframe.describe().T - expected.index = pd.MultiIndex([[0, 0, 1, 1], expected.index], - [range(4), range(len(expected.index))]) - tm.assert_frame_equal(result, expected) - - def test_frame_describe_tupleindex(self): - - # GH 14848 - regression from 0.19.0 to 0.19.1 - df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3, - 'y': [10, 20, 30, 40, 50] * 3, - 'z': [100, 200, 300, 400, 500] * 3}) - df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 - df2 = df1.rename(columns={'k': 'key'}) - tm.assertRaises(ValueError, lambda: df1.groupby('k').describe()) - tm.assertRaises(ValueError, lambda: df2.groupby('key').describe()) - - def test_frame_describe_unstacked_format(self): - # GH 4792 - prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990, - pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499, - pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499} - volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, - pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, - pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000} - df = pd.DataFrame({'PRICE': prices, - 'VOLUME': volumes}) - result = df.groupby('PRICE').VOLUME.describe() - data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(), - df[df.PRICE == 25499].VOLUME.describe().values.tolist()] - expected = pd.DataFrame(data, - index=pd.Index([24990, 25499], name='PRICE'), - columns=['count', 'mean', 'std', 'min', - '25%', '50%', '75%', 'max']) - tm.assert_frame_equal(result, expected) + pytest.raises(AttributeError, getattr, grouped, 'foo') def test_frame_groupby(self): grouped = self.tsframe.groupby(lambda x: x.weekday()) # aggregate aggregated = grouped.aggregate(np.mean) - self.assertEqual(len(aggregated), 5) - self.assertEqual(len(aggregated.columns), 4) + assert len(aggregated) == 5 + assert len(aggregated.columns) == 4 # by string tscopy = self.tsframe.copy() @@ -1186,8 +403,8 @@ def test_frame_groupby(self): # transform grouped = self.tsframe.head(30).groupby(lambda x: x.weekday()) transformed = grouped.transform(lambda x: x - x.mean()) - self.assertEqual(len(transformed), 30) - self.assertEqual(len(transformed.columns), 4) + assert len(transformed) == 30 + assert len(transformed.columns) == 4 # transform propagate transformed = grouped.transform(lambda x: x.mean()) @@ -1199,7 +416,7 @@ def test_frame_groupby(self): # iterate for weekday, group in grouped: - self.assertEqual(group.index[0].weekday(), weekday) + assert group.index[0].weekday() == weekday # groups / group_indices groups = grouped.groups @@ -1207,17 +424,7 @@ def test_frame_groupby(self): for k, v in compat.iteritems(groups): samething = self.tsframe.index.take(indices[k]) - self.assertTrue((samething == v).all()) - - def test_grouping_is_iterable(self): - # this code path isn't used anywhere else - # not sure it's useful - grouped = self.tsframe.groupby([lambda x: x.weekday(), lambda x: x.year - ]) - - # test it works - for g in grouped.grouper.groupings[0]: - pass + assert (samething == v).all() def test_frame_groupby_columns(self): mapping = {'A': 0, 'B': 0, 'C': 1, 'D': 1} @@ -1225,8 +432,8 @@ def test_frame_groupby_columns(self): # aggregate aggregated = grouped.aggregate(np.mean) - self.assertEqual(len(aggregated), len(self.tsframe)) - self.assertEqual(len(aggregated.columns), 2) + assert len(aggregated) == len(self.tsframe) + assert len(aggregated.columns) == 2 # transform tf = lambda x: x - x.mean() @@ -1235,98 +442,34 @@ def test_frame_groupby_columns(self): # iterate for k, v in grouped: - self.assertEqual(len(v.columns), 2) + assert len(v.columns) == 2 def test_frame_set_name_single(self): grouped = self.df.groupby('A') result = grouped.mean() - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = self.df.groupby('A', as_index=False).mean() - self.assertNotEqual(result.index.name, 'A') + assert result.index.name != 'A' result = grouped.agg(np.mean) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = grouped.agg({'C': np.mean, 'D': np.std}) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = grouped['C'].mean() - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = grouped['C'].agg(np.mean) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' result = grouped['C'].agg([np.mean, np.std]) - self.assertEqual(result.index.name, 'A') - - result = grouped['C'].agg({'foo': np.mean, 'bar': np.std}) - self.assertEqual(result.index.name, 'A') - - def test_multi_iter(self): - s = Series(np.arange(6)) - k1 = np.array(['a', 'a', 'a', 'b', 'b', 'b']) - k2 = np.array(['1', '2', '1', '2', '1', '2']) - - grouped = s.groupby([k1, k2]) - - iterated = list(grouped) - expected = [('a', '1', s[[0, 2]]), ('a', '2', s[[1]]), - ('b', '1', s[[4]]), ('b', '2', s[[3, 5]])] - for i, ((one, two), three) in enumerate(iterated): - e1, e2, e3 = expected[i] - self.assertEqual(e1, one) - self.assertEqual(e2, two) - assert_series_equal(three, e3) - - def test_multi_iter_frame(self): - k1 = np.array(['b', 'b', 'b', 'a', 'a', 'a']) - k2 = np.array(['1', '2', '1', '2', '1', '2']) - df = DataFrame({'v1': np.random.randn(6), - 'v2': np.random.randn(6), - 'k1': k1, 'k2': k2}, - index=['one', 'two', 'three', 'four', 'five', 'six']) + assert result.index.name == 'A' - grouped = df.groupby(['k1', 'k2']) - - # things get sorted! - iterated = list(grouped) - idx = df.index - expected = [('a', '1', df.loc[idx[[4]]]), - ('a', '2', df.loc[idx[[3, 5]]]), - ('b', '1', df.loc[idx[[0, 2]]]), - ('b', '2', df.loc[idx[[1]]])] - for i, ((one, two), three) in enumerate(iterated): - e1, e2, e3 = expected[i] - self.assertEqual(e1, one) - self.assertEqual(e2, two) - assert_frame_equal(three, e3) - - # don't iterate through groups with no data - df['k1'] = np.array(['b', 'b', 'b', 'a', 'a', 'a']) - df['k2'] = np.array(['1', '1', '1', '2', '2', '2']) - grouped = df.groupby(['k1', 'k2']) - groups = {} - for key, gp in grouped: - groups[key] = gp - self.assertEqual(len(groups), 2) - - # axis = 1 - three_levels = self.three_group.groupby(['A', 'B', 'C']).mean() - grouped = three_levels.T.groupby(axis=1, level=(1, 2)) - for key, group in grouped: - pass - - def test_multi_iter_panel(self): - wp = tm.makePanel() - grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()], - axis=1) - - for (month, wd), group in grouped: - exp_axis = [x - for x in wp.major_axis - if x.month == month and x.weekday() == wd] - expected = wp.reindex(major=exp_axis) - assert_panel_equal(group, expected) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = grouped['C'].agg({'foo': np.mean, 'bar': np.std}) + assert result.index.name == 'A' def test_multi_func(self): col1 = self.df['A'] @@ -1387,25 +530,26 @@ def test_groupby_multiple_columns(self): def _check_op(op): - result1 = op(grouped) - - expected = defaultdict(dict) - for n1, gp1 in data.groupby('A'): - for n2, gp2 in gp1.groupby('B'): - expected[n1][n2] = op(gp2.loc[:, ['C', 'D']]) - expected = dict((k, DataFrame(v)) - for k, v in compat.iteritems(expected)) - expected = Panel.fromDict(expected).swapaxes(0, 1) - expected.major_axis.name, expected.minor_axis.name = 'A', 'B' - - # a little bit crude - for col in ['C', 'D']: - result_col = op(grouped[col]) - exp = expected[col] - pivoted = result1[col].unstack() - pivoted2 = result_col.unstack() - assert_frame_equal(pivoted.reindex_like(exp), exp) - assert_frame_equal(pivoted2.reindex_like(exp), exp) + with catch_warnings(record=True): + result1 = op(grouped) + + expected = defaultdict(dict) + for n1, gp1 in data.groupby('A'): + for n2, gp2 in gp1.groupby('B'): + expected[n1][n2] = op(gp2.loc[:, ['C', 'D']]) + expected = dict((k, DataFrame(v)) + for k, v in compat.iteritems(expected)) + expected = Panel.fromDict(expected).swapaxes(0, 1) + expected.major_axis.name, expected.minor_axis.name = 'A', 'B' + + # a little bit crude + for col in ['C', 'D']: + result_col = op(grouped[col]) + exp = expected[col] + pivoted = result1[col].unstack() + pivoted2 = result_col.unstack() + assert_frame_equal(pivoted.reindex_like(exp), exp) + assert_frame_equal(pivoted2.reindex_like(exp), exp) _check_op(lambda x: x.sum()) _check_op(lambda x: x.mean()) @@ -1433,7 +577,10 @@ def test_groupby_as_index_agg(self): grouped = self.df.groupby('A', as_index=True) expected3 = grouped['C'].sum() expected3 = DataFrame(expected3).rename(columns={'C': 'Q'}) - result3 = grouped['C'].agg({'Q': np.sum}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result3 = grouped['C'].agg({'Q': np.sum}) assert_frame_equal(result3, expected3) # multi-key @@ -1472,158 +619,32 @@ def test_groupby_as_index_agg(self): assert_frame_equal(left, right) - def test_series_groupby_nunique(self): - - def check_nunique(df, keys, as_index=True): - for sort, dropna in cart_product((False, True), repeat=2): - gr = df.groupby(keys, as_index=as_index, sort=sort) - left = gr['julie'].nunique(dropna=dropna) - - gr = df.groupby(keys, as_index=as_index, sort=sort) - right = gr['julie'].apply(Series.nunique, dropna=dropna) - if not as_index: - right = right.reset_index(drop=True) - - assert_series_equal(left, right, check_names=False) - - days = date_range('2015-08-23', periods=10) - - for n, m in cart_product(10 ** np.arange(2, 6), (10, 100, 1000)): - frame = DataFrame({ - 'jim': np.random.choice( - list(ascii_lowercase), n), - 'joe': np.random.choice(days, n), - 'julie': np.random.randint(0, m, n) - }) - - check_nunique(frame, ['jim']) - check_nunique(frame, ['jim', 'joe']) - - frame.loc[1::17, 'jim'] = None - frame.loc[3::37, 'joe'] = None - frame.loc[7::19, 'julie'] = None - frame.loc[8::19, 'julie'] = None - frame.loc[9::19, 'julie'] = None - - check_nunique(frame, ['jim']) - check_nunique(frame, ['jim', 'joe']) - check_nunique(frame, ['jim'], as_index=False) - check_nunique(frame, ['jim', 'joe'], as_index=False) - - def test_series_groupby_value_counts(self): - from itertools import product - np.random.seed(1234) - - def rebuild_index(df): - arr = list(map(df.index.get_level_values, range(df.index.nlevels))) - df.index = MultiIndex.from_arrays(arr, names=df.index.names) - return df - - def check_value_counts(df, keys, bins): - for isort, normalize, sort, ascending, dropna \ - in product((False, True), repeat=5): - - kwargs = dict(normalize=normalize, sort=sort, - ascending=ascending, dropna=dropna, bins=bins) - - gr = df.groupby(keys, sort=isort) - left = gr['3rd'].value_counts(**kwargs) - - gr = df.groupby(keys, sort=isort) - right = gr['3rd'].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1] + ['3rd'] - - # have to sort on index because of unstable sort on values - left, right = map(rebuild_index, (left, right)) # xref GH9212 - assert_series_equal(left.sort_index(), right.sort_index()) - - def loop(df): - bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2) - keys = '1st', '2nd', ('1st', '2nd') - for k, b in product(keys, bins): - check_value_counts(df, k, b) - - days = date_range('2015-08-24', periods=10) - - for n, m in product((100, 1000), (5, 20)): - frame = DataFrame({ - '1st': np.random.choice( - list('abcd'), n), - '2nd': np.random.choice(days, n), - '3rd': np.random.randint(1, m + 1, n) - }) - - loop(frame) - - frame.loc[1::11, '1st'] = nan - frame.loc[3::17, '2nd'] = nan - frame.loc[7::19, '3rd'] = nan - frame.loc[8::19, '3rd'] = nan - frame.loc[9::19, '3rd'] = nan - - loop(frame) - - def test_multiindex_passthru(self): - - # GH 7997 - # regression from 0.14.1 - df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - df.columns = pd.MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) - - result = df.groupby(axis=1, level=[0, 1]).first() - assert_frame_equal(result, df) - - def test_multiindex_negative_level(self): - # GH 13901 - result = self.mframe.groupby(level=-1).sum() - expected = self.mframe.groupby(level='second').sum() - assert_frame_equal(result, expected) - - result = self.mframe.groupby(level=-2).sum() - expected = self.mframe.groupby(level='first').sum() - assert_frame_equal(result, expected) - - result = self.mframe.groupby(level=[-2, -1]).sum() - expected = self.mframe - assert_frame_equal(result, expected) - - result = self.mframe.groupby(level=[-1, 'first']).sum() - expected = self.mframe.groupby(level=['second', 'first']).sum() - assert_frame_equal(result, expected) - - def test_multifunc_select_col_integer_cols(self): - df = self.df - df.columns = np.arange(len(df.columns)) - - # it works! - df.groupby(1, as_index=False)[2].agg({'Q': np.mean}) - def test_as_index_series_return_frame(self): grouped = self.df.groupby('A', as_index=False) grouped2 = self.df.groupby(['A', 'B'], as_index=False) result = grouped['C'].agg(np.sum) expected = grouped.agg(np.sum).loc[:, ['A', 'C']] - tm.assertIsInstance(result, DataFrame) + assert isinstance(result, DataFrame) assert_frame_equal(result, expected) result2 = grouped2['C'].agg(np.sum) expected2 = grouped2.agg(np.sum).loc[:, ['A', 'B', 'C']] - tm.assertIsInstance(result2, DataFrame) + assert isinstance(result2, DataFrame) assert_frame_equal(result2, expected2) result = grouped['C'].sum() expected = grouped.sum().loc[:, ['A', 'C']] - tm.assertIsInstance(result, DataFrame) + assert isinstance(result, DataFrame) assert_frame_equal(result, expected) result2 = grouped2['C'].sum() expected2 = grouped2.sum().loc[:, ['A', 'B', 'C']] - tm.assertIsInstance(result2, DataFrame) + assert isinstance(result2, DataFrame) assert_frame_equal(result2, expected2) # corner case - self.assertRaises(Exception, grouped['C'].__getitem__, 'D') + pytest.raises(Exception, grouped['C'].__getitem__, 'D') def test_groupby_as_index_cython(self): data = self.df @@ -1641,7 +662,7 @@ def test_groupby_as_index_cython(self): result = grouped.mean() expected = data.groupby(['A', 'B']).mean() - arrays = lzip(*expected.index._tuple_index) + arrays = lzip(*expected.index.values) expected.insert(0, 'A', arrays[0]) expected.insert(1, 'B', arrays[1]) expected.index = np.arange(len(expected)) @@ -1657,11 +678,11 @@ def test_groupby_as_index_series_scalar(self): assert_frame_equal(result, expected) def test_groupby_as_index_corner(self): - self.assertRaises(TypeError, self.ts.groupby, lambda x: x.weekday(), - as_index=False) + pytest.raises(TypeError, self.ts.groupby, lambda x: x.weekday(), + as_index=False) - self.assertRaises(ValueError, self.df.groupby, lambda x: x.lower(), - as_index=False, axis=1) + pytest.raises(ValueError, self.df.groupby, lambda x: x.lower(), + as_index=False, axis=1) def test_groupby_as_index_apply(self): # GH #4648 and #3417 @@ -1696,55 +717,6 @@ def test_groupby_as_index_apply(self): res = df.groupby(0, as_index=False).apply(lambda x: x).index assert_index_equal(res, ind) - def test_groupby_head_tail(self): - df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) - g_as = df.groupby('A', as_index=True) - g_not_as = df.groupby('A', as_index=False) - - # as_index= False, much easier - assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1)) - assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) - - empty_not_as = DataFrame(columns=df.columns, - index=pd.Index([], dtype=df.index.dtype)) - empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype) - empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype) - assert_frame_equal(empty_not_as, g_not_as.head(0)) - assert_frame_equal(empty_not_as, g_not_as.tail(0)) - assert_frame_equal(empty_not_as, g_not_as.head(-1)) - assert_frame_equal(empty_not_as, g_not_as.tail(-1)) - - assert_frame_equal(df, g_not_as.head(7)) # contains all - assert_frame_equal(df, g_not_as.tail(7)) - - # as_index=True, (used to be different) - df_as = df - - assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1)) - assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1)) - - empty_as = DataFrame(index=df_as.index[:0], columns=df.columns) - empty_as['A'] = empty_not_as['A'].astype(df.A.dtype) - empty_as['B'] = empty_not_as['B'].astype(df.B.dtype) - assert_frame_equal(empty_as, g_as.head(0)) - assert_frame_equal(empty_as, g_as.tail(0)) - assert_frame_equal(empty_as, g_as.head(-1)) - assert_frame_equal(empty_as, g_as.tail(-1)) - - assert_frame_equal(df_as, g_as.head(7)) # contains all - assert_frame_equal(df_as, g_as.tail(7)) - - # test with selection - assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []]) - assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0, 2], ['A']]) - assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0, 2], ['B']]) - assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0, 2]]) - - assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []]) - assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0, 2], ['A']]) - assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0, 2], ['B']]) - assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0, 2]]) - def test_groupby_multiple_key(self): df = tm.makeTimeDataFrame() grouped = df.groupby([lambda x: x.year, lambda x: x.month, @@ -1757,7 +729,7 @@ def test_groupby_multiple_key(self): lambda x: x.day], axis=1) agged = grouped.agg(lambda x: x.sum()) - self.assert_index_equal(agged.index, df.columns) + tm.assert_index_equal(agged.index, df.columns) assert_almost_equal(df.T.values, agged.values) agged = grouped.agg(lambda x: x.sum()) @@ -1794,8 +766,8 @@ def test_omit_nuisance(self): # won't work with axis = 1 grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1) - result = self.assertRaises(TypeError, grouped.agg, - lambda x: x.sum(0, numeric_only=False)) + result = pytest.raises(TypeError, grouped.agg, + lambda x: x.sum(0, numeric_only=False)) def test_omit_nuisance_python_multiple(self): grouped = self.three_group.groupby(['A', 'B']) @@ -1821,7 +793,7 @@ def test_empty_groups_corner(self): agged = grouped.apply(lambda x: x.mean()) agged_A = grouped['A'].apply(np.mean) assert_series_equal(agged['A'], agged_A) - self.assertEqual(agged.index.name, 'first') + assert agged.index.name == 'first' def test_apply_concat_preserve_names(self): grouped = self.three_group.groupby(['A', 'B']) @@ -1849,17 +821,17 @@ def desc3(group): return result result = grouped.apply(desc) - self.assertEqual(result.index.names, ('A', 'B', 'stat')) + assert result.index.names == ('A', 'B', 'stat') result2 = grouped.apply(desc2) - self.assertEqual(result2.index.names, ('A', 'B', 'stat')) + assert result2.index.names == ('A', 'B', 'stat') result3 = grouped.apply(desc3) - self.assertEqual(result3.index.names, ('A', 'B', None)) + assert result3.index.names == ('A', 'B', None) def test_nonsense_func(self): df = DataFrame([0]) - self.assertRaises(Exception, df.groupby, lambda x: x + 'foo') + pytest.raises(Exception, df.groupby, lambda x: x + 'foo') def test_builtins_apply(self): # GH8155 df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)), @@ -1895,16 +867,16 @@ def test_max_min_non_numeric(self): 'ss': 4 * ['mama']}) result = aa.groupby('nn').max() - self.assertTrue('ss' in result) + assert 'ss' in result result = aa.groupby('nn').max(numeric_only=False) - self.assertTrue('ss' in result) + assert 'ss' in result result = aa.groupby('nn').min() - self.assertTrue('ss' in result) + assert 'ss' in result result = aa.groupby('nn').min(numeric_only=False) - self.assertTrue('ss' in result) + assert 'ss' in result def test_arg_passthru(self): # make sure that we are passing thru kwargs @@ -2007,7 +979,8 @@ def test_arg_passthru(self): for attr in ['cummin', 'cummax']: f = getattr(df.groupby('group'), attr) result = f() - tm.assert_index_equal(result.columns, expected_columns_numeric) + # GH 15561: numeric_only=False set by default like min/max + tm.assert_index_equal(result.columns, expected_columns) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) @@ -2022,22 +995,13 @@ def test_arg_passthru(self): result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) - def test_groupby_timedelta_cython_count(self): - df = DataFrame({'g': list('ab' * 2), - 'delt': np.arange(4).astype('timedelta64[ns]')}) - expected = Series([ - 2, 2 - ], index=pd.Index(['a', 'b'], name='g'), name='delt') - result = df.groupby('g').delt.count() - tm.assert_series_equal(expected, result) - def test_wrap_aggregated_output_multindex(self): df = self.mframe.T df['baz', 'two'] = 'peekaboo' keys = [np.array([0, 0, 1]), np.array([0, 0, 1])] agged = df.groupby(keys).agg(np.mean) - tm.assertIsInstance(agged.columns, MultiIndex) + assert isinstance(agged.columns, MultiIndex) def aggfun(ser): if ser.name == ('foo', 'one'): @@ -2046,101 +1010,18 @@ def aggfun(ser): return ser.sum() agged2 = df.groupby(keys).aggregate(aggfun) - self.assertEqual(len(agged2.columns) + 1, len(df.columns)) - - def test_groupby_level(self): - frame = self.mframe - deleveled = frame.reset_index() - - result0 = frame.groupby(level=0).sum() - result1 = frame.groupby(level=1).sum() - - expected0 = frame.groupby(deleveled['first'].values).sum() - expected1 = frame.groupby(deleveled['second'].values).sum() - - expected0 = expected0.reindex(frame.index.levels[0]) - expected1 = expected1.reindex(frame.index.levels[1]) - - self.assertEqual(result0.index.name, 'first') - self.assertEqual(result1.index.name, 'second') - - assert_frame_equal(result0, expected0) - assert_frame_equal(result1, expected1) - self.assertEqual(result0.index.name, frame.index.names[0]) - self.assertEqual(result1.index.name, frame.index.names[1]) - - # groupby level name - result0 = frame.groupby(level='first').sum() - result1 = frame.groupby(level='second').sum() - assert_frame_equal(result0, expected0) - assert_frame_equal(result1, expected1) - - # axis=1 - - result0 = frame.T.groupby(level=0, axis=1).sum() - result1 = frame.T.groupby(level=1, axis=1).sum() - assert_frame_equal(result0, expected0.T) - assert_frame_equal(result1, expected1.T) - - # raise exception for non-MultiIndex - self.assertRaises(ValueError, self.df.groupby, level=1) - - def test_groupby_level_index_names(self): - # GH4014 this used to raise ValueError since 'exp'>1 (in py2) - df = DataFrame({'exp': ['A'] * 3 + ['B'] * 3, - 'var1': lrange(6), }).set_index('exp') - df.groupby(level='exp') - self.assertRaises(ValueError, df.groupby, level='foo') - - def test_groupby_level_with_nas(self): - index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], - labels=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, - 2, 3]]) - - # factorizing doesn't confuse things - s = Series(np.arange(8.), index=index) - result = s.groupby(level=0).sum() - expected = Series([22., 6.], index=[1, 0]) - assert_series_equal(result, expected) - - index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], - labels=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, - 1, 2, 3]]) - - # factorizing doesn't confuse things - s = Series(np.arange(8.), index=index) - result = s.groupby(level=0).sum() - expected = Series([18., 6.], index=[1, 0]) - assert_series_equal(result, expected) + assert len(agged2.columns) + 1 == len(df.columns) def test_groupby_level_apply(self): frame = self.mframe result = frame.groupby(level=0).count() - self.assertEqual(result.index.name, 'first') + assert result.index.name == 'first' result = frame.groupby(level=1).count() - self.assertEqual(result.index.name, 'second') + assert result.index.name == 'second' result = frame['A'].groupby(level=0).count() - self.assertEqual(result.index.name, 'first') - - def test_groupby_args(self): - # PR8618 and issue 8015 - frame = self.mframe - - def j(): - frame.groupby() - - self.assertRaisesRegexp(TypeError, - "You have to supply one of 'by' and 'level'", - j) - - def k(): - frame.groupby(by=None, level=None) - - self.assertRaisesRegexp(TypeError, - "You have to supply one of 'by' and 'level'", - k) + assert result.index.name == 'first' def test_groupby_level_mapper(self): frame = self.mframe @@ -2169,20 +1050,20 @@ def test_groupby_level_nonmulti(self): Index(range(1, 7), name='foo')) result = s.groupby(level=0).sum() - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s.groupby(level=[0]).sum() - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s.groupby(level=-1).sum() - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s.groupby(level=[-1]).sum() - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) - tm.assertRaises(ValueError, s.groupby, level=1) - tm.assertRaises(ValueError, s.groupby, level=-2) - tm.assertRaises(ValueError, s.groupby, level=[]) - tm.assertRaises(ValueError, s.groupby, level=[0, 0]) - tm.assertRaises(ValueError, s.groupby, level=[0, 1]) - tm.assertRaises(ValueError, s.groupby, level=[1]) + pytest.raises(ValueError, s.groupby, level=1) + pytest.raises(ValueError, s.groupby, level=-2) + pytest.raises(ValueError, s.groupby, level=[]) + pytest.raises(ValueError, s.groupby, level=[0, 0]) + pytest.raises(ValueError, s.groupby, level=[0, 1]) + pytest.raises(ValueError, s.groupby, level=[1]) def test_groupby_complex(self): # GH 12902 @@ -2195,16 +1076,6 @@ def test_groupby_complex(self): result = a.sum(level=0) assert_series_equal(result, expected) - def test_level_preserve_order(self): - grouped = self.mframe.groupby(level=0) - exp_labels = np.array([0, 0, 0, 1, 1, 2, 2, 3, 3, 3], np.intp) - assert_almost_equal(grouped.grouper.labels[0], exp_labels) - - def test_grouping_labels(self): - grouped = self.mframe.groupby(self.mframe.index.get_level_values(0)) - exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) - assert_almost_equal(grouped.grouper.labels[0], exp_labels) - def test_apply_series_to_frame(self): def f(piece): with np.errstate(invalid='ignore'): @@ -2219,29 +1090,29 @@ def f(piece): grouped = ts.groupby(lambda x: x.month) result = grouped.apply(f) - tm.assertIsInstance(result, DataFrame) - self.assert_index_equal(result.index, ts.index) + assert isinstance(result, DataFrame) + tm.assert_index_equal(result.index, ts.index) def test_apply_series_yield_constant(self): result = self.df.groupby(['A', 'B'])['C'].apply(len) - self.assertEqual(result.index.names[:2], ('A', 'B')) + assert result.index.names[:2] == ('A', 'B') def test_apply_frame_yield_constant(self): # GH13568 result = self.df.groupby(['A', 'B']).apply(len) - self.assertTrue(isinstance(result, Series)) - self.assertIsNone(result.name) + assert isinstance(result, Series) + assert result.name is None result = self.df.groupby(['A', 'B'])[['C', 'D']].apply(len) - self.assertTrue(isinstance(result, Series)) - self.assertIsNone(result.name) + assert isinstance(result, Series) + assert result.name is None def test_apply_frame_to_series(self): grouped = self.df.groupby(['A', 'B']) result = grouped.apply(len) expected = grouped.count()['C'] - self.assert_index_equal(result.index, expected.index) - self.assert_numpy_array_equal(result.values, expected.values) + tm.assert_index_equal(result.index, expected.index) + tm.assert_numpy_array_equal(result.values, expected.values) def test_apply_frame_concat_series(self): def trans(group): @@ -2258,7 +1129,7 @@ def trans2(group): result = df.groupby('A').apply(trans) exp = df.groupby('A')['C'].apply(trans2) assert_series_equal(result, exp, check_names=False) - self.assertEqual(result.name, 'C') + assert result.name == 'C' def test_apply_transform(self): grouped = self.ts.groupby(lambda x: x.month) @@ -2354,26 +1225,26 @@ def test_groupby_with_hier_columns(self): df = DataFrame(np.random.randn(8, 4), index=index, columns=columns) result = df.groupby(level=0).mean() - self.assert_index_equal(result.columns, columns) + tm.assert_index_equal(result.columns, columns) result = df.groupby(level=0, axis=1).mean() - self.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.index, df.index) result = df.groupby(level=0).agg(np.mean) - self.assert_index_equal(result.columns, columns) + tm.assert_index_equal(result.columns, columns) result = df.groupby(level=0).apply(lambda x: x.mean()) - self.assert_index_equal(result.columns, columns) + tm.assert_index_equal(result.columns, columns) result = df.groupby(level=0, axis=1).agg(lambda x: x.mean(1)) - self.assert_index_equal(result.columns, Index(['A', 'B'])) - self.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, Index(['A', 'B'])) + tm.assert_index_equal(result.index, df.index) # add a nuisance column sorted_columns, _ = columns.sortlevel(0) df['A', 'foo'] = 'bar' result = df.groupby(level=0).mean() - self.assert_index_equal(result.columns, df.columns[:-1]) + tm.assert_index_equal(result.columns, df.columns[:-1]) def test_pass_args_kwargs(self): from numpy import percentile @@ -2416,157 +1287,26 @@ def f(x, q=None, axis=0): assert_frame_equal(agg_result, expected, check_names=False) assert_frame_equal(apply_result, expected) - def test_size(self): - grouped = self.df.groupby(['A', 'B']) - result = grouped.size() - for key, group in grouped: - self.assertEqual(result[key], len(group)) - - grouped = self.df.groupby('A') - result = grouped.size() - for key, group in grouped: - self.assertEqual(result[key], len(group)) - - grouped = self.df.groupby('B') - result = grouped.size() - for key, group in grouped: - self.assertEqual(result[key], len(group)) - - df = DataFrame(np.random.choice(20, (1000, 3)), columns=list('abc')) - for sort, key in cart_product((False, True), ('a', 'b', ['a', 'b'])): - left = df.groupby(key, sort=sort).size() - right = df.groupby(key, sort=sort)['c'].apply(lambda a: a.shape[0]) - assert_series_equal(left, right, check_names=False) - - # GH11699 - df = DataFrame([], columns=['A', 'B']) - out = Series([], dtype='int64', index=Index([], name='A')) - assert_series_equal(df.groupby('A').size(), out) - - def test_count(self): - from string import ascii_lowercase - n = 1 << 15 - dr = date_range('2015-08-30', periods=n // 10, freq='T') - - df = DataFrame({ - '1st': np.random.choice( - list(ascii_lowercase), n), - '2nd': np.random.randint(0, 5, n), - '3rd': np.random.randn(n).round(3), - '4th': np.random.randint(-10, 10, n), - '5th': np.random.choice(dr, n), - '6th': np.random.randn(n).round(3), - '7th': np.random.randn(n).round(3), - '8th': np.random.choice(dr, n) - np.random.choice(dr, 1), - '9th': np.random.choice( - list(ascii_lowercase), n) - }) - - for col in df.columns.drop(['1st', '2nd', '4th']): - df.loc[np.random.choice(n, n // 10), col] = np.nan - - df['9th'] = df['9th'].astype('category') - - for key in '1st', '2nd', ['1st', '2nd']: - left = df.groupby(key).count() - right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1) - assert_frame_equal(left, right) - - # GH5610 - # count counts non-nulls - df = pd.DataFrame([[1, 2, 'foo'], [1, nan, 'bar'], [3, nan, nan]], - columns=['A', 'B', 'C']) - - count_as = df.groupby('A').count() - count_not_as = df.groupby('A', as_index=False).count() - - expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'], - index=[1, 3]) - expected.index.name = 'A' - assert_frame_equal(count_not_as, expected.reset_index()) - assert_frame_equal(count_as, expected) - - count_B = df.groupby('A')['B'].count() - assert_series_equal(count_B, expected['B']) - - def test_count_object(self): - df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3, 'c': [2] * 3 + [3] * 3}) - result = df.groupby('c').a.count() - expected = pd.Series([ - 3, 3 - ], index=pd.Index([2, 3], name='c'), name='a') - tm.assert_series_equal(result, expected) - - df = pd.DataFrame({'a': ['a', np.nan, np.nan] + ['b'] * 3, - 'c': [2] * 3 + [3] * 3}) - result = df.groupby('c').a.count() - expected = pd.Series([ - 1, 3 - ], index=pd.Index([2, 3], name='c'), name='a') - tm.assert_series_equal(result, expected) - - def test_count_cross_type(self): # GH8169 - vals = np.hstack((np.random.randint(0, 5, (100, 2)), np.random.randint( - 0, 2, (100, 2)))) - - df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd']) - df[df == 2] = np.nan - expected = df.groupby(['c', 'd']).count() - - for t in ['float32', 'object']: - df['a'] = df['a'].astype(t) - df['b'] = df['b'].astype(t) - result = df.groupby(['c', 'd']).count() - tm.assert_frame_equal(result, expected) - - def test_nunique(self): - df = DataFrame({ - 'A': list('abbacc'), - 'B': list('abxacc'), - 'C': list('abbacx'), - }) - - expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]}) - result = df.groupby('A', as_index=False).nunique() - tm.assert_frame_equal(result, expected) - - # as_index - expected.index = list('abc') - expected.index.name = 'A' - result = df.groupby('A').nunique() - tm.assert_frame_equal(result, expected) - - # with na - result = df.replace({'x': None}).groupby('A').nunique(dropna=False) - tm.assert_frame_equal(result, expected) - - # dropna - expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3}, - index=list('abc')) - expected.index.name = 'A' - result = df.replace({'x': None}).groupby('A').nunique() - tm.assert_frame_equal(result, expected) - def test_non_cython_api(self): # GH5610 # non-cython calls should not include the grouper df = DataFrame( - [[1, 2, 'foo'], [1, - nan, - 'bar', ], [3, nan, 'baz'] - ], columns=['A', 'B', 'C']) + [[1, 2, 'foo'], + [1, np.nan, 'bar'], + [3, np.nan, 'baz']], + columns=['A', 'B', 'C']) g = df.groupby('A') gni = df.groupby('A', as_index=False) # mad - expected = DataFrame([[0], [nan]], columns=['B'], index=[1, 3]) + expected = DataFrame([[0], [np.nan]], columns=['B'], index=[1, 3]) expected.index.name = 'A' result = g.mad() assert_frame_equal(result, expected) - expected = DataFrame([[0., 0.], [0, nan]], columns=['A', 'B'], + expected = DataFrame([[0., 0.], [0, np.nan]], columns=['A', 'B'], index=[0, 1]) result = gni.mad() assert_frame_equal(result, expected) @@ -2577,8 +1317,9 @@ def test_non_cython_api(self): ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']], labels=[[0] * 8, list(range(8))]) - expected = pd.DataFrame([[1.0, 2.0, nan, 2.0, 2.0, 2.0, 2.0, 2.0], - [0.0, nan, nan, nan, nan, nan, nan, nan]], + expected = pd.DataFrame([[1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, + np.nan, np.nan]], index=expected_index, columns=expected_col) result = g.describe() @@ -2598,7 +1339,7 @@ def test_non_cython_api(self): assert_frame_equal(result, expected) # idxmax - expected = DataFrame([[0], [nan]], columns=['B'], index=[1, 3]) + expected = DataFrame([[0.0], [np.nan]], columns=['B'], index=[1, 3]) expected.index.name = 'A' result = g.idxmax() assert_frame_equal(result, expected) @@ -2718,10 +1459,9 @@ def f(g): return g result = grouped.apply(f) - self.assertTrue('value3' in result) + assert 'value3' in result def test_groupby_wrong_multi_labels(self): - from pandas import read_csv data = """index,foo,bar,baz,spam,data 0,foo1,bar1,baz1,spam2,20 1,foo1,bar2,baz1,spam3,30 @@ -2740,24 +1480,24 @@ def test_groupby_wrong_multi_labels(self): def test_groupby_series_with_name(self): result = self.df.groupby(self.df['A']).mean() result2 = self.df.groupby(self.df['A'], as_index=False).mean() - self.assertEqual(result.index.name, 'A') - self.assertIn('A', result2) + assert result.index.name == 'A' + assert 'A' in result2 result = self.df.groupby([self.df['A'], self.df['B']]).mean() result2 = self.df.groupby([self.df['A'], self.df['B']], as_index=False).mean() - self.assertEqual(result.index.names, ('A', 'B')) - self.assertIn('A', result2) - self.assertIn('B', result2) + assert result.index.names == ('A', 'B') + assert 'A' in result2 + assert 'B' in result2 def test_seriesgroupby_name_attr(self): # GH 6265 result = self.df.groupby('A')['C'] - self.assertEqual(result.count().name, 'C') - self.assertEqual(result.mean().name, 'C') + assert result.count().name == 'C' + assert result.mean().name == 'C' testFunc = lambda x: np.sum(x) * 2 - self.assertEqual(result.agg(testFunc).name, 'C') + assert result.agg(testFunc).name == 'C' def test_consistency_name(self): # GH 12363 @@ -2789,11 +1529,11 @@ def summarize_random_name(df): }, name=df.iloc[0]['A']) metrics = self.df.groupby('A').apply(summarize) - self.assertEqual(metrics.columns.name, None) + assert metrics.columns.name is None metrics = self.df.groupby('A').apply(summarize, 'metrics') - self.assertEqual(metrics.columns.name, 'metrics') + assert metrics.columns.name == 'metrics' metrics = self.df.groupby('A').apply(summarize_random_name) - self.assertEqual(metrics.columns.name, None) + assert metrics.columns.name is None def test_groupby_nonstring_columns(self): df = DataFrame([np.arange(10) for x in range(10)]) @@ -2821,7 +1561,7 @@ def test_cython_grouper_series_bug_noncontig(self): inds = np.tile(lrange(10), 10) result = obj.groupby(inds).agg(Series.median) - self.assertTrue(result.isnull().all()) + assert result.isna().all() def test_series_grouper_noncontig_index(self): index = Index(tm.rands_array(10, 100)) @@ -2854,12 +1594,12 @@ def convert_force_pure(x): grouped = s.groupby(labels) result = grouped.agg(convert_fast) - self.assertEqual(result.dtype, np.object_) - tm.assertIsInstance(result[0], Decimal) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) result = grouped.agg(convert_force_pure) - self.assertEqual(result.dtype, np.object_) - tm.assertIsInstance(result[0], Decimal) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) def test_fast_apply(self): # make sure that fast apply is correctly called @@ -2885,12 +1625,12 @@ def f(g): group_keys = grouper._get_group_keys() values, mutated = splitter.fast_apply(f, group_keys) - self.assertFalse(mutated) + assert not mutated def test_apply_with_mixed_dtype(self): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 - df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], - 'foo2': np.random.randn(6)}) + df = DataFrame({'foo1': np.random.randn(6), + 'foo2': ['one', 'two', 'two', 'three', 'one', 'two']}) result = df.apply(lambda x: x, axis=1) assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts()) @@ -2929,7 +1669,7 @@ def test_groupby_aggregation_mixed_dtype(self): def test_groupby_dtype_inference_empty(self): # GH 6733 df = DataFrame({'x': [], 'range': np.arange(0, dtype='int64')}) - self.assertEqual(df['x'].dtype, np.float64) + assert df['x'].dtype == np.float64 result = df.groupby('x').first() exp_index = Index([], name='x', dtype=np.float64) @@ -2942,7 +1682,7 @@ def test_groupby_list_infer_array_like(self): expected = self.df.groupby(self.df['A']).mean() assert_frame_equal(result, expected, check_names=False) - self.assertRaises(Exception, self.df.groupby, list(self.df['A'][:-1])) + pytest.raises(Exception, self.df.groupby, list(self.df['A'][:-1])) # pathological case of ambiguity df = DataFrame({'foo': [0, 1], @@ -2968,9 +1708,9 @@ def test_groupby_keys_same_size_as_index(self): def test_groupby_one_row(self): # GH 11741 df1 = pd.DataFrame(np.random.randn(1, 4), columns=list('ABCD')) - self.assertRaises(KeyError, df1.groupby, 'Z') + pytest.raises(KeyError, df1.groupby, 'Z') df2 = pd.DataFrame(np.random.randn(2, 4), columns=list('ABCD')) - self.assertRaises(KeyError, df2.groupby, 'Z') + pytest.raises(KeyError, df2.groupby, 'Z') def test_groupby_nat_exclude(self): # GH 6992 @@ -2984,7 +1724,7 @@ def test_groupby_nat_exclude(self): expected = [pd.Index([1, 7]), pd.Index([3, 5])] keys = sorted(grouped.groups.keys()) - self.assertEqual(len(keys), 2) + assert len(keys) == 2 for k, e in zip(keys, expected): # grouped.groups keys are np.datetime64 with system tz # not to be affected by tz, only compare values @@ -2992,7 +1732,7 @@ def test_groupby_nat_exclude(self): # confirm obj is not filtered tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df) - self.assertEqual(grouped.ngroups, 2) + assert grouped.ngroups == 2 expected = { Timestamp('2013-01-01 00:00:00'): np.array([1, 7], dtype=np.int64), @@ -3000,40 +1740,33 @@ def test_groupby_nat_exclude(self): } for k in grouped.indices: - self.assert_numpy_array_equal(grouped.indices[k], expected[k]) + tm.assert_numpy_array_equal(grouped.indices[k], expected[k]) tm.assert_frame_equal( grouped.get_group(Timestamp('2013-01-01')), df.iloc[[1, 7]]) tm.assert_frame_equal( grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]]) - self.assertRaises(KeyError, grouped.get_group, pd.NaT) + pytest.raises(KeyError, grouped.get_group, pd.NaT) nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan], 'nat': [pd.NaT, pd.NaT, pd.NaT]}) - self.assertEqual(nan_df['nan'].dtype, 'float64') - self.assertEqual(nan_df['nat'].dtype, 'datetime64[ns]') + assert nan_df['nan'].dtype == 'float64' + assert nan_df['nat'].dtype == 'datetime64[ns]' for key in ['nan', 'nat']: grouped = nan_df.groupby(key) - self.assertEqual(grouped.groups, {}) - self.assertEqual(grouped.ngroups, 0) - self.assertEqual(grouped.indices, {}) - self.assertRaises(KeyError, grouped.get_group, np.nan) - self.assertRaises(KeyError, grouped.get_group, pd.NaT) - - def test_dictify(self): - dict(iter(self.df.groupby('A'))) - dict(iter(self.df.groupby(['A', 'B']))) - dict(iter(self.df['C'].groupby(self.df['A']))) - dict(iter(self.df['C'].groupby([self.df['A'], self.df['B']]))) - dict(iter(self.df.groupby('A')['C'])) - dict(iter(self.df.groupby(['A', 'B'])['C'])) + assert grouped.groups == {} + assert grouped.ngroups == 0 + assert grouped.indices == {} + pytest.raises(KeyError, grouped.get_group, np.nan) + pytest.raises(KeyError, grouped.get_group, pd.NaT) def test_sparse_friendly(self): sdf = self.df[['C', 'D']].to_sparse() - panel = tm.makePanel() - tm.add_nans(panel) + with catch_warnings(record=True): + panel = tm.makePanel() + tm.add_nans(panel) def _check_work(gp): gp.mean() @@ -3049,43 +1782,28 @@ def _check_work(gp): # _check_work(panel.groupby(lambda x: x.month, axis=1)) def test_panel_groupby(self): - self.panel = tm.makePanel() - tm.add_nans(self.panel) - grouped = self.panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1}, - axis='items') - agged = grouped.mean() - agged2 = grouped.agg(lambda x: x.mean('items')) + with catch_warnings(record=True): + self.panel = tm.makePanel() + tm.add_nans(self.panel) + grouped = self.panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1}, + axis='items') + agged = grouped.mean() + agged2 = grouped.agg(lambda x: x.mean('items')) - tm.assert_panel_equal(agged, agged2) + tm.assert_panel_equal(agged, agged2) - self.assert_index_equal(agged.items, Index([0, 1])) + tm.assert_index_equal(agged.items, Index([0, 1])) - grouped = self.panel.groupby(lambda x: x.month, axis='major') - agged = grouped.mean() + grouped = self.panel.groupby(lambda x: x.month, axis='major') + agged = grouped.mean() - exp = Index(sorted(list(set(self.panel.major_axis.month)))) - self.assert_index_equal(agged.major_axis, exp) + exp = Index(sorted(list(set(self.panel.major_axis.month)))) + tm.assert_index_equal(agged.major_axis, exp) - grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, - axis='minor') - agged = grouped.mean() - self.assert_index_equal(agged.minor_axis, Index([0, 1])) - - def test_numpy_groupby(self): - from pandas.core.groupby import numpy_groupby - - data = np.random.randn(100, 100) - labels = np.random.randint(0, 10, size=100) - - df = DataFrame(data) - - result = df.groupby(labels).sum().values - expected = numpy_groupby(data, labels) - assert_almost_equal(result, expected) - - result = df.groupby(labels, axis=1).sum().values - expected = numpy_groupby(data, labels, axis=1) - assert_almost_equal(result, expected) + grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, + axis='minor') + agged = grouped.mean() + tm.assert_index_equal(agged.minor_axis, Index([0, 1])) def test_groupby_2d_malformed(self): d = DataFrame(index=lrange(2)) @@ -3095,8 +1813,8 @@ def test_groupby_2d_malformed(self): d['label'] = ['l1', 'l2'] tmp = d.groupby(['group']).mean() res_values = np.array([[0, 1], [0, 1]], dtype=np.int64) - self.assert_index_equal(tmp.columns, Index(['zeros', 'ones'])) - self.assert_numpy_array_equal(tmp.values, res_values) + tm.assert_index_equal(tmp.columns, Index(['zeros', 'ones'])) + tm.assert_numpy_array_equal(tmp.values, res_values) def test_int32_overflow(self): B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000) @@ -3110,86 +1828,7 @@ def test_int32_overflow(self): left = df.groupby(['A', 'B', 'C', 'D']).sum() right = df.groupby(['D', 'C', 'B', 'A']).sum() - self.assertEqual(len(left), len(right)) - - def test_int64_overflow(self): - from pandas.core.groupby import _int64_overflow_possible - - B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) - A = np.arange(2500) - df = DataFrame({'A': A, - 'B': B, - 'C': A, - 'D': B, - 'E': A, - 'F': B, - 'G': A, - 'H': B, - 'values': np.random.randn(2500)}) - - lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) - rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']) - - left = lg.sum()['values'] - right = rg.sum()['values'] - - exp_index, _ = left.index.sortlevel() - self.assert_index_equal(left.index, exp_index) - - exp_index, _ = right.index.sortlevel(0) - self.assert_index_equal(right.index, exp_index) - - tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H' - ]].values)) - tups = com._asarray_tuplesafe(tups) - - expected = df.groupby(tups).sum()['values'] - - for k, v in compat.iteritems(expected): - self.assertEqual(left[k], right[k[::-1]]) - self.assertEqual(left[k], v) - self.assertEqual(len(left), len(right)) - - # GH9096 - values = range(55109) - data = pd.DataFrame.from_dict({'a': values, - 'b': values, - 'c': values, - 'd': values}) - grouped = data.groupby(['a', 'b', 'c', 'd']) - self.assertEqual(len(grouped), len(values)) - - arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) - i = np.random.choice(len(arr), len(arr) * 4) - arr = np.vstack((arr, arr[i])) # add sume duplicate rows - - i = np.random.permutation(len(arr)) - arr = arr[i] # shuffle rows - - df = DataFrame(arr, columns=list('abcde')) - df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10 - gr = df.groupby(list('abcde')) - - # verify this is testing what it is supposed to test! - self.assertTrue(_int64_overflow_possible(gr.grouper.shape)) - - # mannually compute groupings - jim, joe = defaultdict(list), defaultdict(list) - for key, a, b in zip(map(tuple, arr), df['jim'], df['joe']): - jim[key].append(a) - joe[key].append(b) - - self.assertEqual(len(gr), len(jim)) - mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde')) - - def aggr(func): - f = lambda a: np.fromiter(map(func, a), dtype='f8') - arr = np.vstack((f(jim.values()), f(joe.values()))).T - res = DataFrame(arr, columns=['jim', 'joe'], index=mi) - return res.sort_index() - - assert_frame_equal(gr.mean(), aggr(np.mean)) - assert_frame_equal(gr.median(), aggr(np.median)) + assert len(left) == len(right) def test_groupby_sort_multi(self): df = DataFrame({'a': ['foo', 'bar', 'baz'], @@ -3200,17 +1839,17 @@ def test_groupby_sort_multi(self): tups = lmap(tuple, df[['a', 'b', 'c']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['a', 'b', 'c'], sort=True).sum() - self.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]]) + tm.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]]) tups = lmap(tuple, df[['c', 'a', 'b']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['c', 'a', 'b'], sort=True).sum() - self.assert_numpy_array_equal(result.index.values, tups) + tm.assert_numpy_array_equal(result.index.values, tups) tups = lmap(tuple, df[['b', 'c', 'a']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['b', 'c', 'a'], sort=True).sum() - self.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]]) + tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]]) df = DataFrame({'a': [0, 1, 2, 0, 1, 2], 'b': [0, 0, 0, 1, 1, 1], @@ -3229,16 +1868,6 @@ def test_intercept_builtin_sum(self): assert_series_equal(result, expected) assert_series_equal(result2, expected) - def test_column_select_via_attr(self): - result = self.df.groupby('A').C.sum() - expected = self.df.groupby('A')['C'].sum() - assert_series_equal(result, expected) - - self.df['mean'] = 1.5 - result = self.df.groupby('A').mean() - expected = self.df.groupby('A').agg(np.mean) - assert_frame_equal(result, expected) - def test_rank_apply(self): lev1 = tm.rands_array(10, 100) lev2 = tm.rands_array(10, 130) @@ -3267,6 +1896,196 @@ def test_rank_apply(self): expected = expected.reindex(result.index) assert_series_equal(result, expected) + @pytest.mark.parametrize("grps", [ + ['qux'], ['qux', 'quux']]) + @pytest.mark.parametrize("vals", [ + [2, 2, 8, 2, 6], + [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), + pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'), + pd.Timestamp('2018-01-06')]]) + @pytest.mark.parametrize("ties_method,ascending,pct,exp", [ + ('average', True, False, [2., 2., 5., 2., 4.]), + ('average', True, True, [0.4, 0.4, 1.0, 0.4, 0.8]), + ('average', False, False, [4., 4., 1., 4., 2.]), + ('average', False, True, [.8, .8, .2, .8, .4]), + ('min', True, False, [1., 1., 5., 1., 4.]), + ('min', True, True, [0.2, 0.2, 1.0, 0.2, 0.8]), + ('min', False, False, [3., 3., 1., 3., 2.]), + ('min', False, True, [.6, .6, .2, .6, .4]), + ('max', True, False, [3., 3., 5., 3., 4.]), + ('max', True, True, [0.6, 0.6, 1.0, 0.6, 0.8]), + ('max', False, False, [5., 5., 1., 5., 2.]), + ('max', False, True, [1., 1., .2, 1., .4]), + ('first', True, False, [1., 2., 5., 3., 4.]), + ('first', True, True, [0.2, 0.4, 1.0, 0.6, 0.8]), + ('first', False, False, [3., 4., 1., 5., 2.]), + ('first', False, True, [.6, .8, .2, 1., .4]), + ('dense', True, False, [1., 1., 3., 1., 2.]), + ('dense', True, True, [0.2, 0.2, 0.6, 0.2, 0.4]), + ('dense', False, False, [3., 3., 1., 3., 2.]), + ('dense', False, True, [.6, .6, .2, .6, .4]), + ]) + def test_rank_args(self, grps, vals, ties_method, ascending, pct, exp): + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({'key': key, 'val': vals}) + result = df.groupby('key').rank(method=ties_method, + ascending=ascending, pct=pct) + + exp_df = DataFrame(exp * len(grps), columns=['val']) + assert_frame_equal(result, exp_df) + + @pytest.mark.parametrize("grps", [ + ['qux'], ['qux', 'quux']]) + @pytest.mark.parametrize("vals", [ + [2, 2, np.nan, 8, 2, 6, np.nan, np.nan], # floats + [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), np.nan, + pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'), + pd.Timestamp('2018-01-06'), np.nan, np.nan] + ]) + @pytest.mark.parametrize("ties_method,ascending,na_option,pct,exp", [ + ('average', True, 'keep', False, + [2., 2., np.nan, 5., 2., 4., np.nan, np.nan]), + ('average', True, 'keep', True, + [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan]), + ('average', False, 'keep', False, + [4., 4., np.nan, 1., 4., 2., np.nan, np.nan]), + ('average', False, 'keep', True, + [.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan]), + ('min', True, 'keep', False, + [1., 1., np.nan, 5., 1., 4., np.nan, np.nan]), + ('min', True, 'keep', True, + [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]), + ('min', False, 'keep', False, + [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]), + ('min', False, 'keep', True, + [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]), + ('max', True, 'keep', False, + [3., 3., np.nan, 5., 3., 4., np.nan, np.nan]), + ('max', True, 'keep', True, + [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]), + ('max', False, 'keep', False, + [5., 5., np.nan, 1., 5., 2., np.nan, np.nan]), + ('max', False, 'keep', True, + [1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan]), + ('first', True, 'keep', False, + [1., 2., np.nan, 5., 3., 4., np.nan, np.nan]), + ('first', True, 'keep', True, + [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]), + ('first', False, 'keep', False, + [3., 4., np.nan, 1., 5., 2., np.nan, np.nan]), + ('first', False, 'keep', True, + [.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan]), + ('dense', True, 'keep', False, + [1., 1., np.nan, 3., 1., 2., np.nan, np.nan]), + ('dense', True, 'keep', True, + [0.2, 0.2, np.nan, 0.6, 0.2, 0.4, np.nan, np.nan]), + ('dense', False, 'keep', False, + [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]), + ('dense', False, 'keep', True, + [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]), + ('average', True, 'no_na', False, [2., 2., 7., 5., 2., 4., 7., 7.]), + ('average', True, 'no_na', True, + [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875]), + ('average', False, 'no_na', False, [4., 4., 7., 1., 4., 2., 7., 7.]), + ('average', False, 'no_na', True, + [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875]), + ('min', True, 'no_na', False, [1., 1., 6., 5., 1., 4., 6., 6.]), + ('min', True, 'no_na', True, + [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75]), + ('min', False, 'no_na', False, [3., 3., 6., 1., 3., 2., 6., 6.]), + ('min', False, 'no_na', True, + [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75]), + ('max', True, 'no_na', False, [3., 3., 8., 5., 3., 4., 8., 8.]), + ('max', True, 'no_na', True, + [0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.]), + ('max', False, 'no_na', False, [5., 5., 8., 1., 5., 2., 8., 8.]), + ('max', False, 'no_na', True, + [0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.]), + ('first', True, 'no_na', False, [1., 2., 6., 5., 3., 4., 7., 8.]), + ('first', True, 'no_na', True, + [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.]), + ('first', False, 'no_na', False, [3., 4., 6., 1., 5., 2., 7., 8.]), + ('first', False, 'no_na', True, + [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.]), + ('dense', True, 'no_na', False, [1., 1., 4., 3., 1., 2., 4., 4.]), + ('dense', True, 'no_na', True, + [0.125, 0.125, 0.5, 0.375, 0.125, 0.25, 0.5, 0.5]), + ('dense', False, 'no_na', False, [3., 3., 4., 1., 3., 2., 4., 4.]), + ('dense', False, 'no_na', True, + [0.375, 0.375, 0.5, 0.125, 0.375, 0.25, 0.5, 0.5]) + ]) + def test_rank_args_missing(self, grps, vals, ties_method, ascending, + na_option, pct, exp): + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({'key': key, 'val': vals}) + result = df.groupby('key').rank(method=ties_method, + ascending=ascending, + na_option=na_option, pct=pct) + + exp_df = DataFrame(exp * len(grps), columns=['val']) + assert_frame_equal(result, exp_df) + + @pytest.mark.parametrize("pct,exp", [ + (False, [3., 3., 3., 3., 3.]), + (True, [.6, .6, .6, .6, .6])]) + def test_rank_resets_each_group(self, pct, exp): + df = DataFrame( + {'key': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'], + 'val': [1] * 10} + ) + result = df.groupby('key').rank(pct=pct) + exp_df = DataFrame(exp * 2, columns=['val']) + assert_frame_equal(result, exp_df) + + def test_rank_avg_even_vals(self): + df = DataFrame({'key': ['a'] * 4, 'val': [1] * 4}) + result = df.groupby('key').rank() + exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=['val']) + assert_frame_equal(result, exp_df) + + @pytest.mark.parametrize("ties_method", [ + 'average', 'min', 'max', 'first', 'dense']) + @pytest.mark.parametrize("ascending", [True, False]) + @pytest.mark.parametrize("na_option", ["keep", "top", "bottom"]) + @pytest.mark.parametrize("pct", [True, False]) + @pytest.mark.parametrize("vals", [ + ['bar', 'bar', 'foo', 'bar', 'baz'], + ['bar', np.nan, 'foo', np.nan, 'baz'] + ]) + def test_rank_object_raises(self, ties_method, ascending, na_option, + pct, vals): + df = DataFrame({'key': ['foo'] * 5, 'val': vals}) + with tm.assert_raises_regex(TypeError, "not callable"): + df.groupby('key').rank(method=ties_method, + ascending=ascending, + na_option=na_option, pct=pct) + + @pytest.mark.parametrize("agg_func", ['any', 'all']) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("vals", [ + ['foo', 'bar', 'baz'], ['foo', '', ''], ['', '', ''], + [1, 2, 3], [1, 0, 0], [0, 0, 0], + [1., 2., 3.], [1., 0., 0.], [0., 0., 0.], + [True, True, True], [True, False, False], [False, False, False], + [np.nan, np.nan, np.nan] + ]) + def test_groupby_bool_aggs(self, agg_func, skipna, vals): + df = DataFrame({'key': ['a'] * 3 + ['b'] * 3, 'val': vals * 2}) + + # Figure out expectation using Python builtin + exp = getattr(compat.builtins, agg_func)(vals) + + # edge case for missing data with skipna and 'any' + if skipna and all(isna(vals)) and agg_func == 'any': + exp = False + + exp_df = DataFrame([exp] * 2, columns=['val'], index=pd.Index( + ['a', 'b'], name='key')) + result = getattr(df.groupby('key'), agg_func)(skipna=skipna) + assert_frame_equal(result, exp_df) + def test_dont_clobber_name_column(self): df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'], 'name': ['foo', 'bar', 'baz'] * 2}) @@ -3305,7 +2124,7 @@ def test_no_nonsense_name(self): s.name = None result = s.groupby(self.frame['A']).agg(np.sum) - self.assertIsNone(result.name) + assert result.name is None def test_multifunc_sum_bug(self): # GH #1065 @@ -3315,55 +2134,21 @@ def test_multifunc_sum_bug(self): grouped = x.groupby('test') result = grouped.agg({'fl': 'sum', 2: 'size'}) - self.assertEqual(result['fl'].dtype, np.float64) + assert result['fl'].dtype == np.float64 def test_handle_dict_return_value(self): def f(group): - return {'min': group.min(), 'max': group.max()} + return {'max': group.max(), 'min': group.min()} def g(group): - return Series({'min': group.min(), 'max': group.max()}) + return Series({'max': group.max(), 'min': group.min()}) result = self.df.groupby('A')['C'].apply(f) expected = self.df.groupby('A')['C'].apply(g) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) assert_series_equal(result, expected) - def test_getitem_list_of_columns(self): - df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8), - 'E': np.random.randn(8)}) - - result = df.groupby('A')[['C', 'D']].mean() - result2 = df.groupby('A')['C', 'D'].mean() - result3 = df.groupby('A')[df.columns[2:4]].mean() - - expected = df.loc[:, ['A', 'C', 'D']].groupby('A').mean() - - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected) - - def test_getitem_numeric_column_names(self): - # GH #13731 - df = DataFrame({0: list('abcd') * 2, - 2: np.random.randn(8), - 4: np.random.randn(8), - 6: np.random.randn(8)}) - result = df.groupby(0)[df.columns[1:3]].mean() - result2 = df.groupby(0)[2, 4].mean() - result3 = df.groupby(0)[[2, 4]].mean() - - expected = df.loc[:, [0, 2, 4]].groupby(0).mean() - - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected) - def test_set_group_name(self): def f(group): assert group.name is not None @@ -3391,15 +2176,30 @@ def _check_all(grouped): _check_all(self.df.groupby('A')) _check_all(self.df.groupby(['A', 'B'])) - def test_no_dummy_key_names(self): - # GH #1291 + def test_group_name_available_in_inference_pass(self): + # gh-15062 + df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)}) + names = [] + + def f(group): + names.append(group.name) + return group.copy() + + df.groupby('a', sort=False, group_keys=False).apply(f) + # we expect 2 zeros because we call ``f`` once to see if a faster route + # can be used. + expected_names = [0, 0, 1, 2] + assert names == expected_names + + def test_no_dummy_key_names(self): + # see gh-1291 result = self.df.groupby(self.df['A'].values).sum() - self.assertIsNone(result.index.name) + assert result.index.name is None result = self.df.groupby([self.df['A'].values, self.df['B'].values ]).sum() - self.assertEqual(result.index.names, (None, None)) + assert result.index.names == (None, None) def test_groupby_sort_multiindex_series(self): # series multiindex groupby sort argument was not being passed through @@ -3419,7 +2219,6 @@ def test_groupby_sort_multiindex_series(self): assert_series_equal(result, mseries_result.sort_index()) def test_groupby_reindex_inside_function(self): - from pandas.tseries.api import DatetimeIndex periods = 1000 ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) @@ -3432,7 +2231,8 @@ def agg_before(hour, func, fix=False): """ def _func(data): - d = data.select(lambda x: x.hour < 11).dropna() + d = data.loc[data.index.map( + lambda x: x.hour < 11)].dropna() if fix: data[data.index[0]] if len(d) == 0: @@ -3451,30 +2251,6 @@ def afunc(data): assert_frame_equal(closure_bad, closure_good) - def test_multiindex_columns_empty_level(self): - l = [['count', 'values'], ['to filter', '']] - midx = MultiIndex.from_tuples(l) - - df = DataFrame([[long(1), 'A']], columns=midx) - - grouped = df.groupby('to filter').groups - self.assertEqual(grouped['A'], [0]) - - grouped = df.groupby([('to filter', '')]).groups - self.assertEqual(grouped['A'], [0]) - - df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx) - - expected = df.groupby('to filter').groups - result = df.groupby([('to filter', '')]).groups - self.assertEqual(result, expected) - - df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx) - - expected = df.groupby('to filter').groups - result = df.groupby([('to filter', '')]).groups - tm.assert_dict_equal(result, expected) - def test_cython_median(self): df = DataFrame(np.random.randn(1000)) df.values[::2] = np.nan @@ -3501,7 +2277,19 @@ def test_median_empty_bins(self): expected = df.groupby(bins).agg(lambda x: x.median()) assert_frame_equal(result, expected) - def test_groupby_non_arithmetic_agg_types(self): + @pytest.mark.parametrize("dtype", [ + 'int8', 'int16', 'int32', 'int64', 'float32', 'float64']) + @pytest.mark.parametrize("method,data", [ + ('first', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}), + ('last', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}), + ('min', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}), + ('max', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}), + ('nth', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], + 'args': [1]}), + ('count', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], + 'out_type': 'int64'}) + ]) + def test_groupby_non_arithmetic_agg_types(self, dtype, method, data): # GH9311, GH6620 df = pd.DataFrame( [{'a': 1, 'b': 1}, @@ -3509,39 +2297,25 @@ def test_groupby_non_arithmetic_agg_types(self): {'a': 2, 'b': 3}, {'a': 2, 'b': 4}]) - dtypes = ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'] - - grp_exp = {'first': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, - 'last': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, - 'min': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, - 'max': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, - 'nth': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], - 'args': [1]}, - 'count': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], - 'out_type': 'int64'}} + df['b'] = df.b.astype(dtype) - for dtype in dtypes: - df_in = df.copy() - df_in['b'] = df_in.b.astype(dtype) - - for method, data in compat.iteritems(grp_exp): - if 'args' not in data: - data['args'] = [] + if 'args' not in data: + data['args'] = [] - if 'out_type' in data: - out_type = data['out_type'] - else: - out_type = dtype + if 'out_type' in data: + out_type = data['out_type'] + else: + out_type = dtype - exp = data['df'] - df_out = pd.DataFrame(exp) + exp = data['df'] + df_out = pd.DataFrame(exp) - df_out['b'] = df_out.b.astype(out_type) - df_out.set_index('a', inplace=True) + df_out['b'] = df_out.b.astype(out_type) + df_out.set_index('a', inplace=True) - grpd = df_in.groupby('a') - t = getattr(grpd, method)(*data['args']) - assert_frame_equal(t, df_out) + grpd = df.groupby('a') + t = getattr(grpd, method)(*data['args']) + assert_frame_equal(t, df_out) def test_groupby_non_arithmetic_agg_intlike_precision(self): # GH9311, GH6620 @@ -3567,7 +2341,7 @@ def test_groupby_non_arithmetic_agg_intlike_precision(self): grpd = df.groupby('a') res = getattr(grpd, method)(*data['args']) - self.assertEqual(res.iloc[0].b, data['expected']) + assert res.iloc[0].b == data['expected'] def test_groupby_multiindex_missing_pair(self): # GH9049 @@ -3591,7 +2365,7 @@ def test_groupby_multiindex_not_lexsorted(self): lexsorted_mi = MultiIndex.from_tuples( [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c']) lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) - self.assertTrue(lexsorted_df.columns.is_lexsorted()) + assert lexsorted_df.columns.is_lexsorted() # define the non-lexsorted version not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'], @@ -3600,13 +2374,13 @@ def test_groupby_multiindex_not_lexsorted(self): not_lexsorted_df = not_lexsorted_df.pivot_table( index='a', columns=['b', 'c'], values='d') not_lexsorted_df = not_lexsorted_df.reset_index() - self.assertFalse(not_lexsorted_df.columns.is_lexsorted()) + assert not not_lexsorted_df.columns.is_lexsorted() # compare the results tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) expected = lexsorted_df.groupby('a').mean() - with tm.assert_produces_warning(com.PerformanceWarning): + with tm.assert_produces_warning(PerformanceWarning): result = not_lexsorted_df.groupby('a').mean() tm.assert_frame_equal(expected, result) @@ -3615,7 +2389,7 @@ def test_groupby_multiindex_not_lexsorted(self): df = DataFrame({'x': ['a', 'a', 'b', 'a'], 'y': [1, 1, 2, 2], 'z': [1, 2, 3, 4]}).set_index(['x', 'y']) - self.assertFalse(df.index.is_lexsorted()) + assert not df.index.is_lexsorted() for level in [0, 1, [0, 1]]: for sort in [False, True]: @@ -3629,22 +2403,6 @@ def test_groupby_multiindex_not_lexsorted(self): expected = df.sort_index() tm.assert_frame_equal(expected, result) - def test_groupby_levels_and_columns(self): - # GH9344, GH9049 - idx_names = ['x', 'y'] - idx = pd.MultiIndex.from_tuples( - [(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names) - df = pd.DataFrame(np.arange(12).reshape(-1, 3), index=idx) - - by_levels = df.groupby(level=idx_names).mean() - # reset_index changes columns dtype to object - by_columns = df.reset_index().groupby(idx_names).mean() - - tm.assert_frame_equal(by_levels, by_columns, check_column_type=False) - - by_columns.columns = pd.Index(by_columns.columns, dtype=np.int64) - tm.assert_frame_equal(by_levels, by_columns) - def test_gb_apply_list_of_unequal_len_arrays(self): # GH1738 @@ -3669,128 +2427,6 @@ def noddy(value, weight): # don't die df_grouped.apply(lambda x: noddy(x.value, x.weight)) - def test_groupby_with_empty(self): - index = pd.DatetimeIndex(()) - data = () - series = pd.Series(data, index) - grouper = pd.tseries.resample.TimeGrouper('D') - grouped = series.groupby(grouper) - assert next(iter(grouped), None) is None - - def test_groupby_with_single_column(self): - df = pd.DataFrame({'a': list('abssbab')}) - tm.assert_frame_equal(df.groupby('a').get_group('a'), df.iloc[[0, 5]]) - # GH 13530 - exp = pd.DataFrame([], index=pd.Index(['a', 'b', 's'], name='a')) - tm.assert_frame_equal(df.groupby('a').count(), exp) - tm.assert_frame_equal(df.groupby('a').sum(), exp) - tm.assert_frame_equal(df.groupby('a').nth(1), exp) - - def test_groupby_with_small_elem(self): - # GH 8542 - # length=2 - df = pd.DataFrame({'event': ['start', 'start'], - 'change': [1234, 5678]}, - index=pd.DatetimeIndex(['2014-09-10', '2013-10-10'])) - grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) - self.assertEqual(len(grouped.groups), 2) - self.assertEqual(grouped.ngroups, 2) - self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups) - self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups) - - res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) - tm.assert_frame_equal(res, df.iloc[[0], :]) - res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start')) - tm.assert_frame_equal(res, df.iloc[[1], :]) - - df = pd.DataFrame({'event': ['start', 'start', 'start'], - 'change': [1234, 5678, 9123]}, - index=pd.DatetimeIndex(['2014-09-10', '2013-10-10', - '2014-09-15'])) - grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) - self.assertEqual(len(grouped.groups), 2) - self.assertEqual(grouped.ngroups, 2) - self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups) - self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups) - - res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) - tm.assert_frame_equal(res, df.iloc[[0, 2], :]) - res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start')) - tm.assert_frame_equal(res, df.iloc[[1], :]) - - # length=3 - df = pd.DataFrame({'event': ['start', 'start', 'start'], - 'change': [1234, 5678, 9123]}, - index=pd.DatetimeIndex(['2014-09-10', '2013-10-10', - '2014-08-05'])) - grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event']) - self.assertEqual(len(grouped.groups), 3) - self.assertEqual(grouped.ngroups, 3) - self.assertIn((pd.Timestamp('2014-09-30'), 'start'), grouped.groups) - self.assertIn((pd.Timestamp('2013-10-31'), 'start'), grouped.groups) - self.assertIn((pd.Timestamp('2014-08-31'), 'start'), grouped.groups) - - res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) - tm.assert_frame_equal(res, df.iloc[[0], :]) - res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start')) - tm.assert_frame_equal(res, df.iloc[[1], :]) - res = grouped.get_group((pd.Timestamp('2014-08-31'), 'start')) - tm.assert_frame_equal(res, df.iloc[[2], :]) - - def test_cumcount(self): - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) - g = df.groupby('A') - sg = g.A - - expected = Series([0, 1, 2, 0, 3]) - - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) - - def test_cumcount_empty(self): - ge = DataFrame().groupby(level=0) - se = Series().groupby(level=0) - - # edge case, as this is usually considered float - e = Series(dtype='int64') - - assert_series_equal(e, ge.cumcount()) - assert_series_equal(e, se.cumcount()) - - def test_cumcount_dupe_index(self): - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], - index=[0] * 5) - g = df.groupby('A') - sg = g.A - - expected = Series([0, 1, 2, 0, 3], index=[0] * 5) - - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) - - def test_cumcount_mi(self): - mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], - index=mi) - g = df.groupby('A') - sg = g.A - - expected = Series([0, 1, 2, 0, 3], index=mi) - - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) - - def test_cumcount_groupby_not_col(self): - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], - index=[0] * 5) - g = df.groupby([0, 0, 0, 1, 0]) - sg = g.A - - expected = Series([0, 1, 2, 0, 3], index=[0] * 5) - - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) - def test_fill_constistency(self): # GH9221 @@ -3837,278 +2473,6 @@ def test_index_label_overlaps_location(self): expected = ser.take([1, 3, 4]) assert_series_equal(actual, expected) - def test_groupby_selection_with_methods(self): - # some methods which require DatetimeIndex - rng = pd.date_range('2014', periods=len(self.df)) - self.df.index = rng - - g = self.df.groupby(['A'])[['C']] - g_exp = self.df[['C']].groupby(self.df['A']) - # TODO check groupby with > 1 col ? - - # methods which are called as .foo() - methods = ['count', - 'corr', - 'cummax', - 'cummin', - 'cumprod', - 'describe', - 'rank', - 'quantile', - 'diff', - 'shift', - 'all', - 'any', - 'idxmin', - 'idxmax', - 'ffill', - 'bfill', - 'pct_change', - 'tshift'] - - for m in methods: - res = getattr(g, m)() - exp = getattr(g_exp, m)() - assert_frame_equal(res, exp) # should always be frames! - - # methods which aren't just .foo() - assert_frame_equal(g.fillna(0), g_exp.fillna(0)) - assert_frame_equal(g.dtypes, g_exp.dtypes) - assert_frame_equal(g.apply(lambda x: x.sum()), - g_exp.apply(lambda x: x.sum())) - - assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean()) - assert_frame_equal(g.resample('D').ohlc(), - g_exp.resample('D').ohlc()) - - assert_frame_equal(g.filter(lambda x: len(x) == 3), - g_exp.filter(lambda x: len(x) == 3)) - - def test_groupby_whitelist(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - s = df.floats - - df_whitelist = frozenset([ - 'last', - 'first', - 'mean', - 'sum', - 'min', - 'max', - 'head', - 'tail', - 'cumcount', - 'resample', - 'rank', - 'quantile', - 'fillna', - 'mad', - 'any', - 'all', - 'take', - 'idxmax', - 'idxmin', - 'shift', - 'tshift', - 'ffill', - 'bfill', - 'pct_change', - 'skew', - 'plot', - 'boxplot', - 'hist', - 'median', - 'dtypes', - 'corrwith', - 'corr', - 'cov', - 'diff', - ]) - s_whitelist = frozenset([ - 'last', - 'first', - 'mean', - 'sum', - 'min', - 'max', - 'head', - 'tail', - 'cumcount', - 'resample', - 'rank', - 'quantile', - 'fillna', - 'mad', - 'any', - 'all', - 'take', - 'idxmax', - 'idxmin', - 'shift', - 'tshift', - 'ffill', - 'bfill', - 'pct_change', - 'skew', - 'plot', - 'hist', - 'median', - 'dtype', - 'corr', - 'cov', - 'diff', - 'unique', - # 'nlargest', 'nsmallest', - ]) - - for obj, whitelist in zip((df, s), (df_whitelist, s_whitelist)): - gb = obj.groupby(df.letters) - self.assertEqual(whitelist, gb._apply_whitelist) - for m in whitelist: - getattr(type(gb), m) - - AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', - 'mad', 'std', 'var', 'sem'] - AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] - - def test_groupby_whitelist_deprecations(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - - # 10711 deprecated - with tm.assert_produces_warning(FutureWarning): - df.groupby('letters').irow(0) - with tm.assert_produces_warning(FutureWarning): - df.groupby('letters').floats.irow(0) - - def test_regression_whitelist_methods(self): - - # GH6944 - # explicity test the whitelest methods - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - raw_frame = DataFrame(np.random.randn(10, 3), index=index, - columns=Index(['A', 'B', 'C'], name='exp')) - raw_frame.iloc[1, [1, 2]] = np.nan - raw_frame.iloc[7, [0, 1]] = np.nan - - for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, - lrange(2), lrange(2), - [True, False]): - - if axis == 0: - frame = raw_frame - else: - frame = raw_frame.T - - if op in self.AGG_FUNCTIONS_WITH_SKIPNA: - grouped = frame.groupby(level=level, axis=axis) - result = getattr(grouped, op)(skipna=skipna) - expected = getattr(frame, op)(level=level, axis=axis, - skipna=skipna) - assert_frame_equal(result, expected) - else: - grouped = frame.groupby(level=level, axis=axis) - result = getattr(grouped, op)() - expected = getattr(frame, op)(level=level, axis=axis) - assert_frame_equal(result, expected) - - def test_groupby_blacklist(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - s = df.floats - - blacklist = [ - 'eval', 'query', 'abs', 'where', - 'mask', 'align', 'groupby', 'clip', 'astype', - 'at', 'combine', 'consolidate', 'convert_objects', - ] - to_methods = [method for method in dir(df) if method.startswith('to_')] - - blacklist.extend(to_methods) - - # e.g., to_csv - defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the " - "'apply' method$)") - - # e.g., query, eval - not_defined = "(?:^{1!r} object has no attribute {0!r}$)" - fmt = defined_but_not_allowed + '|' + not_defined - for bl in blacklist: - for obj in (df, s): - gb = obj.groupby(df.letters) - msg = fmt.format(bl, type(gb).__name__) - with tm.assertRaisesRegexp(AttributeError, msg): - getattr(gb, bl) - - def test_tab_completion(self): - grp = self.mframe.groupby(level='second') - results = set([v for v in dir(grp) if not v.startswith('_')]) - expected = set( - ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', - 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', - 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', - 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', - 'nunique', 'head', 'irow', 'describe', 'cummax', 'quantile', - 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', - 'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', - 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', - 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', - 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding']) - self.assertEqual(results, expected) - - def test_lower_int_prec_count(self): - df = DataFrame({'a': np.array( - [0, 1, 2, 100], np.int8), - 'b': np.array( - [1, 2, 3, 6], np.uint32), - 'c': np.array( - [4, 5, 6, 8], np.int16), - 'grp': list('ab' * 2)}) - result = df.groupby('grp').count() - expected = DataFrame({'a': [2, 2], - 'b': [2, 2], - 'c': [2, 2]}, index=pd.Index(list('ab'), - name='grp')) - tm.assert_frame_equal(result, expected) - - def test_count_uses_size_on_exception(self): - class RaisingObjectException(Exception): - pass - - class RaisingObject(object): - - def __init__(self, msg='I will raise inside Cython'): - super(RaisingObject, self).__init__() - self.msg = msg - - def __eq__(self, other): - # gets called in Cython to check that raising calls the method - raise RaisingObjectException(self.msg) - - df = DataFrame({'a': [RaisingObject() for _ in range(4)], - 'grp': list('ab' * 2)}) - result = df.groupby('grp').count() - expected = DataFrame({'a': [2, 2]}, index=pd.Index( - list('ab'), name='grp')) - tm.assert_frame_equal(result, expected) - def test_groupby_cumprod(self): # GH 4095 df = pd.DataFrame({'key': ['b'] * 10, 'value': 2}) @@ -4168,7 +2532,7 @@ def test_max_nan_bug(self): r = gb[['File']].max() e = gb['File'].max().to_frame() tm.assert_frame_equal(r, e) - self.assertFalse(r['File'].isnull().any()) + assert not r['File'].isna().any() def test_nlargest(self): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) @@ -4186,8 +2550,6 @@ def test_nlargest(self): 3, 2, 1, 3, 3, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]])) assert_series_equal(gb.nlargest(3, keep='last'), e) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(gb.nlargest(3, take_last=True), e) def test_nsmallest(self): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) @@ -4205,8 +2567,6 @@ def test_nsmallest(self): 0, 1, 1, 0, 1, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]])) assert_series_equal(gb.nsmallest(3, keep='last'), e) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(gb.nsmallest(3, take_last=True), e) def test_transform_doesnt_clobber_ints(self): # GH 7972 @@ -4284,29 +2644,6 @@ def test_sort(x): g.apply(test_sort) - def test_nunique_with_object(self): - # GH 11077 - data = pd.DataFrame( - [[100, 1, 'Alice'], - [200, 2, 'Bob'], - [300, 3, 'Charlie'], - [-400, 4, 'Dan'], - [500, 5, 'Edith']], - columns=['amount', 'id', 'name'] - ) - - result = data.groupby(['id', 'amount'])['name'].nunique() - index = MultiIndex.from_arrays([data.id, data.amount]) - expected = pd.Series([1] * 5, name='name', index=index) - tm.assert_series_equal(result, expected) - - def test_nunique_with_empty_series(self): - # GH 12553 - data = pd.Series(name='name') - result = data.groupby(level=0).nunique() - expected = pd.Series(name='name', dtype='int64') - tm.assert_series_equal(result, expected) - def test_numpy_compat(self): # see gh-12811 df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]}) @@ -4315,20 +2652,10 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with groupby" for func in ('mean', 'var', 'std', 'cumprod', 'cumsum'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(g, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(g, func), foo=1) - - def test_grouping_string_repr(self): - # GH 13394 - mi = MultiIndex.from_arrays([list("AAB"), list("aba")]) - df = DataFrame([[1, 2, 3]], columns=mi) - gr = df.groupby(df[('A', 'a')]) - - result = gr.grouper.groupings[0].__repr__() - expected = "Grouping(('A', 'a'))" - tm.assert_equal(result, expected) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(g, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(g, func), foo=1) def test_group_shift_with_null_key(self): # This test is designed to replicate the segfault in issue #13813. @@ -4337,7 +2664,7 @@ def test_group_shift_with_null_key(self): # Generate a moderately large dataframe with occasional missing # values in column `B`, and then group by [`A`, `B`]. This should # force `-1` in `labels` array of `g.grouper.group_info` exactly - # at those places, where the group-by key is partilly missing. + # at those places, where the group-by key is partially missing. df = DataFrame([(i % 12, i % 3 if i % 3 else np.nan, i) for i in range(n_rows)], dtype=float, columns=["A", "B", "Z"], index=None) @@ -4361,7 +2688,7 @@ def test_pivot_table_values_key_error(self): df['year'] = df.set_index('eventDate').index.year df['month'] = df.set_index('eventDate').index.month - with self.assertRaises(KeyError): + with pytest.raises(KeyError): df.reset_index().pivot_table(index='year', columns='month', values='badname', aggfunc='count') @@ -4444,31 +2771,245 @@ def test_cummin_cummax(self): result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(expected, result) + # GH 15561 + df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001']))) + expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b') + for method in ['cummax', 'cummin']: + result = getattr(df.groupby('a')['b'], method)() + tm.assert_series_equal(expected, result) + + # GH 15635 + df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) + result = df.groupby('a').b.cummax() + expected = pd.Series([2, 1, 2], name='b') + tm.assert_series_equal(result, expected) -def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - tups = lmap(tuple, df[keys].values) - tups = com._asarray_tuplesafe(tups) - expected = f(df.groupby(tups)[field]) - for k, v in compat.iteritems(expected): - assert (result[k] == v) + df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) + result = df.groupby('a').b.cummin() + expected = pd.Series([1, 2, 1], name='b') + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('in_vals, out_vals', [ + + # Basics: strictly increasing (T), strictly decreasing (F), + # abs val increasing (F), non-strictly increasing (T) + ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1], + [True, False, False, True]), + + # Test with inf vals + ([1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf], + [True, False, True, False]), + + # Test with nan vals; should always be False + ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False]), + ]) + def test_is_monotonic_increasing(self, in_vals, out_vals): + # GH 17015 + source_dict = { + 'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'], + 'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'], + 'C': in_vals} + df = pd.DataFrame(source_dict) + result = df.groupby('B').C.is_monotonic_increasing + index = Index(list('abcd'), name='B') + expected = pd.Series(index=index, data=out_vals, name='C') + tm.assert_series_equal(result, expected) + + # Also check result equal to manually taking x.is_monotonic_increasing. + expected = ( + df.groupby(['B']).C.apply(lambda x: x.is_monotonic_increasing)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('in_vals, out_vals', [ + # Basics: strictly decreasing (T), strictly increasing (F), + # abs val decreasing (F), non-strictly increasing (T) + ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1], + [True, False, False, True]), + + # Test with inf vals + ([np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf], + [True, True, False, True]), + + # Test with nan vals; should always be False + ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False]), + ]) + def test_is_monotonic_decreasing(self, in_vals, out_vals): + # GH 17015 + source_dict = { + 'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'], + 'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'], + 'C': in_vals} + + df = pd.DataFrame(source_dict) + result = df.groupby('B').C.is_monotonic_decreasing + index = Index(list('abcd'), name='B') + expected = pd.Series(index=index, data=out_vals, name='C') + tm.assert_series_equal(result, expected) + + def test_apply_numeric_coercion_when_datetime(self): + # In the past, group-by/apply operations have been over-eager + # in converting dtypes to numeric, in the presence of datetime + # columns. Various GH issues were filed, the reproductions + # for which are here. + + # GH 15670 + df = pd.DataFrame({'Number': [1, 2], + 'Date': ["2017-03-02"] * 2, + 'Str': ["foo", "inf"]}) + expected = df.groupby(['Number']).apply(lambda x: x.iloc[0]) + df.Date = pd.to_datetime(df.Date) + result = df.groupby(['Number']).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(result['Str'], expected['Str']) + + # GH 15421 + df = pd.DataFrame({'A': [10, 20, 30], + 'B': ['foo', '3', '4'], + 'T': [pd.Timestamp("12:31:22")] * 3}) + + def get_B(g): + return g.iloc[0][['B']] + result = df.groupby('A').apply(get_B)['B'] + expected = df.B + expected.index = df.A + tm.assert_series_equal(result, expected) + + # GH 14423 + def predictions(tool): + out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object) + if 'step1' in list(tool.State): + out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0]) + if 'step2' in list(tool.State): + out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0]) + out['useTime'] = str( + tool[tool.State == 'step2'].oTime.values[0]) + return out + df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'], + 'State': ['step1', 'step2', 'step1', 'step2'], + 'oTime': ['', '2016-09-19 05:24:33', + '', '2016-09-19 23:59:04'], + 'Machine': ['23', '36L', '36R', '36R']}) + df2 = df1.copy() + df2.oTime = pd.to_datetime(df2.oTime) + expected = df1.groupby('Key').apply(predictions).p1 + result = df2.groupby('Key').apply(predictions).p1 + tm.assert_series_equal(expected, result) + + def test_pipe(self): + # Test the pipe method of DataFrameGroupBy. + # Issue #17871 + + random_state = np.random.RandomState(1234567890) + + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': random_state.randn(8), + 'C': random_state.randn(8)}) + + def f(dfgb): + return dfgb.B.max() - dfgb.C.min().min() + + def square(srs): + return srs ** 2 + # Note that the transformations are + # GroupBy -> Series + # Series -> Series + # This then chains the GroupBy.pipe and the + # NDFrame.pipe methods + result = df.groupby('A').pipe(f).pipe(square) -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index + index = Index([u'bar', u'foo'], dtype='object', name=u'A') + expected = pd.Series([8.99110003361, 8.17516964785], name='B', + index=index) - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) + assert_series_equal(expected, result) - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) + def test_pipe_args(self): + # Test passing args to the pipe method of DataFrameGroupBy. + # Issue #17871 - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) + df = pd.DataFrame({'group': ['A', 'A', 'B', 'B', 'C'], + 'x': [1.0, 2.0, 3.0, 2.0, 5.0], + 'y': [10.0, 100.0, 1000.0, -100.0, -1000.0]}) - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) + def f(dfgb, arg1): + return (dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False) + .groupby(dfgb.grouper)) + + def g(dfgb, arg2): + return dfgb.sum() / dfgb.sum().sum() + arg2 + + def h(df, arg3): + return df.x + df.y - arg3 + + result = (df + .groupby('group') + .pipe(f, 0) + .pipe(g, 10) + .pipe(h, 100)) + + # Assert the results here + index = pd.Index(['A', 'B', 'C'], name='group') + expected = pd.Series([-79.5160891089, -78.4839108911, -80], + index=index) + + assert_series_equal(expected, result) + + # test SeriesGroupby.pipe + ser = pd.Series([1, 1, 2, 2, 3, 3]) + result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count()) + + expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3])) + + assert_series_equal(result, expected) + + def test_empty_dataframe_groupby(self): + # GH8093 + df = DataFrame(columns=['A', 'B', 'C']) + + result = df.groupby('A').sum() + expected = DataFrame(columns=['B', 'C'], dtype=np.float64) + expected.index.name = 'A' + + assert_frame_equal(result, expected) + + def test_tuple_warns(self): + # https://github.com/pandas-dev/pandas/issues/18314 + df = pd.DataFrame({('a', 'b'): [1, 1, 2, 2], 'a': [1, 1, 1, 2], + 'b': [1, 2, 2, 2], 'c': [1, 1, 1, 1]}) + with tm.assert_produces_warning(FutureWarning) as w: + df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean() + + assert "Interpreting tuple 'by' as a list" in str(w[0].message) + + with tm.assert_produces_warning(None): + df.groupby(('a', 'b')).c.mean() + + def test_tuple_warns_unhashable(self): + # https://github.com/pandas-dev/pandas/issues/18314 + business_dates = date_range(start='4/1/2014', end='6/30/2014', + freq='B') + df = DataFrame(1, index=business_dates, columns=['a', 'b']) + + with tm.assert_produces_warning(FutureWarning) as w: + df.groupby((df.index.year, df.index.month)).nth([0, 3, -1]) + + assert "Interpreting tuple 'by' as a list" in str(w[0].message) + + def test_tuple_correct_keyerror(self): + # https://github.com/pandas-dev/pandas/issues/18798 + df = pd.DataFrame(1, index=range(3), + columns=pd.MultiIndex.from_product([[1, 2], + [3, 4]])) + with tm.assert_raises_regex(KeyError, "(7, 8)"): + df.groupby((7, 8)).mean() + + +def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): + tups = lmap(tuple, df[keys].values) + tups = com._asarray_tuplesafe(tups) + expected = f(df.groupby(tups)[field]) + for k, v in compat.iteritems(expected): + assert (result[k] == v) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py new file mode 100644 index 0000000000000..8702062e9cd0a --- /dev/null +++ b/pandas/tests/groupby/test_grouping.py @@ -0,0 +1,803 @@ +# -*- coding: utf-8 -*- + +""" test where we are determining what we are grouping, or getting groups """ + +import pytest + +from warnings import catch_warnings +from pandas import (date_range, Timestamp, + Index, MultiIndex, DataFrame, Series, CategoricalIndex) +from pandas.util.testing import (assert_panel_equal, assert_frame_equal, + assert_series_equal, assert_almost_equal) +from pandas.compat import lrange, long + +from pandas import compat +import numpy as np + +import pandas.util.testing as tm +import pandas as pd +from .common import MixIn + + +# selection +# -------------------------------- + +class TestSelection(MixIn): + + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=['A', 'B']) + g = df.groupby('A') + pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] + + pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] + with tm.assert_raises_regex(KeyError, '^[^A]+$'): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[['A', 'C']] + + def test_groupby_duplicated_column_errormsg(self): + # GH7511 + df = DataFrame(columns=['A', 'B', 'A', 'C'], + data=[range(4), range(2, 6), range(0, 8, 2)]) + + pytest.raises(ValueError, df.groupby, 'A') + pytest.raises(ValueError, df.groupby, ['A', 'B']) + + grouped = df.groupby('B') + c = grouped.count() + assert c.columns.nlevels == 1 + assert c.columns.size == 3 + + def test_column_select_via_attr(self): + result = self.df.groupby('A').C.sum() + expected = self.df.groupby('A')['C'].sum() + assert_series_equal(result, expected) + + self.df['mean'] = 1.5 + result = self.df.groupby('A').mean() + expected = self.df.groupby('A').agg(np.mean) + assert_frame_equal(result, expected) + + def test_getitem_list_of_columns(self): + df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8), + 'E': np.random.randn(8)}) + + result = df.groupby('A')[['C', 'D']].mean() + result2 = df.groupby('A')['C', 'D'].mean() + result3 = df.groupby('A')[df.columns[2:4]].mean() + + expected = df.loc[:, ['A', 'C', 'D']].groupby('A').mean() + + assert_frame_equal(result, expected) + assert_frame_equal(result2, expected) + assert_frame_equal(result3, expected) + + def test_getitem_numeric_column_names(self): + # GH #13731 + df = DataFrame({0: list('abcd') * 2, + 2: np.random.randn(8), + 4: np.random.randn(8), + 6: np.random.randn(8)}) + result = df.groupby(0)[df.columns[1:3]].mean() + result2 = df.groupby(0)[2, 4].mean() + result3 = df.groupby(0)[[2, 4]].mean() + + expected = df.loc[:, [0, 2, 4]].groupby(0).mean() + + assert_frame_equal(result, expected) + assert_frame_equal(result2, expected) + assert_frame_equal(result3, expected) + + +# grouping +# -------------------------------- + +class TestGrouping(MixIn): + + def test_grouper_index_types(self): + # related GH5375 + # groupby misbehaving when using a Floatlike index + df = DataFrame(np.arange(10).reshape(5, 2), columns=list('AB')) + for index in [tm.makeFloatIndex, tm.makeStringIndex, + tm.makeUnicodeIndex, tm.makeIntIndex, tm.makeDateIndex, + tm.makePeriodIndex]: + + df.index = index(len(df)) + df.groupby(list('abcde')).apply(lambda x: x) + + df.index = list(reversed(df.index.tolist())) + df.groupby(list('abcde')).apply(lambda x: x) + + def test_grouper_multilevel_freq(self): + + # GH 7885 + # with level and freq specified in a pd.Grouper + from datetime import date, timedelta + d0 = date.today() - timedelta(days=14) + dates = date_range(d0, date.today()) + date_index = pd.MultiIndex.from_product( + [dates, dates], names=['foo', 'bar']) + df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index) + + # Check string level + expected = df.reset_index().groupby([pd.Grouper( + key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum() + # reset index changes columns dtype to object + expected.columns = pd.Index([0], dtype='int64') + + result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper( + level='bar', freq='W')]).sum() + assert_frame_equal(result, expected) + + # Check integer level + result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper( + level=1, freq='W')]).sum() + assert_frame_equal(result, expected) + + def test_grouper_creation_bug(self): + + # GH 8795 + df = DataFrame({'A': [0, 0, 1, 1, 2, 2], 'B': [1, 2, 3, 4, 5, 6]}) + g = df.groupby('A') + expected = g.sum() + + g = df.groupby(pd.Grouper(key='A')) + result = g.sum() + assert_frame_equal(result, expected) + + result = g.apply(lambda x: x.sum()) + assert_frame_equal(result, expected) + + g = df.groupby(pd.Grouper(key='A', axis=0)) + result = g.sum() + assert_frame_equal(result, expected) + + # GH14334 + # pd.Grouper(key=...) may be passed in a list + df = DataFrame({'A': [0, 0, 0, 1, 1, 1], + 'B': [1, 1, 2, 2, 3, 3], + 'C': [1, 2, 3, 4, 5, 6]}) + # Group by single column + expected = df.groupby('A').sum() + g = df.groupby([pd.Grouper(key='A')]) + result = g.sum() + assert_frame_equal(result, expected) + + # Group by two columns + # using a combination of strings and Grouper objects + expected = df.groupby(['A', 'B']).sum() + + # Group with two Grouper objects + g = df.groupby([pd.Grouper(key='A'), pd.Grouper(key='B')]) + result = g.sum() + assert_frame_equal(result, expected) + + # Group with a string and a Grouper object + g = df.groupby(['A', pd.Grouper(key='B')]) + result = g.sum() + assert_frame_equal(result, expected) + + # Group with a Grouper object and a string + g = df.groupby([pd.Grouper(key='A'), 'B']) + result = g.sum() + assert_frame_equal(result, expected) + + # GH8866 + s = Series(np.arange(8, dtype='int64'), + index=pd.MultiIndex.from_product( + [list('ab'), range(2), + date_range('20130101', periods=2)], + names=['one', 'two', 'three'])) + result = s.groupby(pd.Grouper(level='three', freq='M')).sum() + expected = Series([28], index=Index( + [Timestamp('2013-01-31')], freq='M', name='three')) + assert_series_equal(result, expected) + + # just specifying a level breaks + result = s.groupby(pd.Grouper(level='one')).sum() + expected = s.groupby(level='one').sum() + assert_series_equal(result, expected) + + def test_grouper_column_and_index(self): + # GH 14327 + + # Grouping a multi-index frame by a column and an index level should + # be equivalent to resetting the index and grouping by two columns + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), + ('b', 1), ('b', 2), ('b', 3)]) + idx.names = ['outer', 'inner'] + df_multi = pd.DataFrame({"A": np.arange(6), + 'B': ['one', 'one', 'two', + 'two', 'one', 'one']}, + index=idx) + result = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() + expected = df_multi.reset_index().groupby(['B', 'inner']).mean() + assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() + expected = df_multi.reset_index().groupby(['inner', 'B']).mean() + assert_frame_equal(result, expected) + + # Grouping a single-index frame by a column and the index should + # be equivalent to resetting the index and grouping by two columns + df_single = df_multi.reset_index('outer') + result = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() + expected = df_single.reset_index().groupby(['B', 'inner']).mean() + assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() + expected = df_single.reset_index().groupby(['inner', 'B']).mean() + assert_frame_equal(result, expected) + + def test_groupby_levels_and_columns(self): + # GH9344, GH9049 + idx_names = ['x', 'y'] + idx = pd.MultiIndex.from_tuples( + [(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names) + df = pd.DataFrame(np.arange(12).reshape(-1, 3), index=idx) + + by_levels = df.groupby(level=idx_names).mean() + # reset_index changes columns dtype to object + by_columns = df.reset_index().groupby(idx_names).mean() + + tm.assert_frame_equal(by_levels, by_columns, check_column_type=False) + + by_columns.columns = pd.Index(by_columns.columns, dtype=np.int64) + tm.assert_frame_equal(by_levels, by_columns) + + def test_groupby_categorical_index_and_columns(self): + # GH18432 + columns = ['A', 'B', 'A', 'B'] + categories = ['B', 'A'] + data = np.ones((5, 4), int) + cat_columns = CategoricalIndex(columns, + categories=categories, + ordered=True) + df = DataFrame(data=data, columns=cat_columns) + result = df.groupby(axis=1, level=0).sum() + expected_data = 2 * np.ones((5, 2), int) + expected_columns = CategoricalIndex(categories, + categories=categories, + ordered=True) + expected = DataFrame(data=expected_data, columns=expected_columns) + assert_frame_equal(result, expected) + + # test transposed version + df = DataFrame(data.T, index=cat_columns) + result = df.groupby(axis=0, level=0).sum() + expected = DataFrame(data=expected_data.T, index=expected_columns) + assert_frame_equal(result, expected) + + def test_grouper_getting_correct_binner(self): + + # GH 10063 + # using a non-time-based grouper and a time-based grouper + # and specifying levels + df = DataFrame({'A': 1}, index=pd.MultiIndex.from_product( + [list('ab'), date_range('20130101', periods=80)], names=['one', + 'two'])) + result = df.groupby([pd.Grouper(level='one'), pd.Grouper( + level='two', freq='M')]).sum() + expected = DataFrame({'A': [31, 28, 21, 31, 28, 21]}, + index=MultiIndex.from_product( + [list('ab'), + date_range('20130101', freq='M', periods=3)], + names=['one', 'two'])) + assert_frame_equal(result, expected) + + def test_grouper_iter(self): + assert sorted(self.df.groupby('A').grouper) == ['bar', 'foo'] + + def test_empty_groups(self): + # see gh-1048 + pytest.raises(ValueError, self.df.groupby, []) + + def test_groupby_grouper(self): + grouped = self.df.groupby('A') + + result = self.df.groupby(grouped.grouper).mean() + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + def test_groupby_dict_mapping(self): + # GH #679 + from pandas import Series + s = Series({'T1': 5}) + result = s.groupby({'T1': 'T2'}).agg(sum) + expected = s.groupby(['T2']).agg(sum) + assert_series_equal(result, expected) + + s = Series([1., 2., 3., 4.], index=list('abcd')) + mapping = {'a': 0, 'b': 0, 'c': 1, 'd': 1} + + result = s.groupby(mapping).mean() + result2 = s.groupby(mapping).agg(np.mean) + expected = s.groupby([0, 0, 1, 1]).mean() + expected2 = s.groupby([0, 0, 1, 1]).mean() + assert_series_equal(result, expected) + assert_series_equal(result, result2) + assert_series_equal(result, expected2) + + def test_groupby_grouper_f_sanity_checked(self): + dates = date_range('01-Jan-2013', periods=12, freq='MS') + ts = Series(np.random.randn(12), index=dates) + + # GH3035 + # index.map is used to apply grouper to the index + # if it fails on the elements, map tries it on the entire index as + # a sequence. That can yield invalid results that cause trouble + # down the line. + # the surprise comes from using key[0:6] rather then str(key)[0:6] + # when the elements are Timestamp. + # the result is Index[0:6], very confusing. + + pytest.raises(AssertionError, ts.groupby, lambda key: key[0:6]) + + def test_grouping_error_on_multidim_input(self): + from pandas.core.groupby import Grouping + pytest.raises(ValueError, + Grouping, self.df.index, self.df[['A', 'A']]) + + def test_multiindex_passthru(self): + + # GH 7997 + # regression from 0.14.1 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df.columns = pd.MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) + + result = df.groupby(axis=1, level=[0, 1]).first() + assert_frame_equal(result, df) + + def test_multiindex_negative_level(self): + # GH 13901 + result = self.mframe.groupby(level=-1).sum() + expected = self.mframe.groupby(level='second').sum() + assert_frame_equal(result, expected) + + result = self.mframe.groupby(level=-2).sum() + expected = self.mframe.groupby(level='first').sum() + assert_frame_equal(result, expected) + + result = self.mframe.groupby(level=[-2, -1]).sum() + expected = self.mframe + assert_frame_equal(result, expected) + + result = self.mframe.groupby(level=[-1, 'first']).sum() + expected = self.mframe.groupby(level=['second', 'first']).sum() + assert_frame_equal(result, expected) + + def test_multifunc_select_col_integer_cols(self): + df = self.df + df.columns = np.arange(len(df.columns)) + + # it works! + df.groupby(1, as_index=False)[2].agg({'Q': np.mean}) + + def test_multiindex_columns_empty_level(self): + lst = [['count', 'values'], ['to filter', '']] + midx = MultiIndex.from_tuples(lst) + + df = DataFrame([[long(1), 'A']], columns=midx) + + grouped = df.groupby('to filter').groups + assert grouped['A'] == [0] + + grouped = df.groupby([('to filter', '')]).groups + assert grouped['A'] == [0] + + df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx) + + expected = df.groupby('to filter').groups + result = df.groupby([('to filter', '')]).groups + assert result == expected + + df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx) + + expected = df.groupby('to filter').groups + result = df.groupby([('to filter', '')]).groups + tm.assert_dict_equal(result, expected) + + def test_groupby_multiindex_tuple(self): + # GH 17979 + df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], + columns=pd.MultiIndex.from_arrays( + [['a', 'b', 'b', 'c'], + [1, 1, 2, 2]])) + expected = df.groupby([('b', 1)]).groups + result = df.groupby(('b', 1)).groups + tm.assert_dict_equal(expected, result) + + df2 = pd.DataFrame(df.values, + columns=pd.MultiIndex.from_arrays( + [['a', 'b', 'b', 'c'], + ['d', 'd', 'e', 'e']])) + expected = df2.groupby([('b', 'd')]).groups + result = df.groupby(('b', 1)).groups + tm.assert_dict_equal(expected, result) + + df3 = pd.DataFrame(df.values, + columns=[('a', 'd'), ('b', 'd'), ('b', 'e'), 'c']) + expected = df3.groupby([('b', 'd')]).groups + result = df.groupby(('b', 1)).groups + tm.assert_dict_equal(expected, result) + + @pytest.mark.parametrize('sort', [True, False]) + def test_groupby_level(self, sort): + # GH 17537 + frame = self.mframe + deleveled = frame.reset_index() + + result0 = frame.groupby(level=0, sort=sort).sum() + result1 = frame.groupby(level=1, sort=sort).sum() + + expected0 = frame.groupby(deleveled['first'].values, sort=sort).sum() + expected1 = frame.groupby(deleveled['second'].values, sort=sort).sum() + + expected0.index.name = 'first' + expected1.index.name = 'second' + + assert result0.index.name == 'first' + assert result1.index.name == 'second' + + assert_frame_equal(result0, expected0) + assert_frame_equal(result1, expected1) + assert result0.index.name == frame.index.names[0] + assert result1.index.name == frame.index.names[1] + + # groupby level name + result0 = frame.groupby(level='first', sort=sort).sum() + result1 = frame.groupby(level='second', sort=sort).sum() + assert_frame_equal(result0, expected0) + assert_frame_equal(result1, expected1) + + # axis=1 + + result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum() + result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum() + assert_frame_equal(result0, expected0.T) + assert_frame_equal(result1, expected1.T) + + # raise exception for non-MultiIndex + pytest.raises(ValueError, self.df.groupby, level=1) + + def test_groupby_level_index_names(self): + # GH4014 this used to raise ValueError since 'exp'>1 (in py2) + df = DataFrame({'exp': ['A'] * 3 + ['B'] * 3, + 'var1': lrange(6), }).set_index('exp') + df.groupby(level='exp') + pytest.raises(ValueError, df.groupby, level='foo') + + @pytest.mark.parametrize('sort', [True, False]) + def test_groupby_level_with_nas(self, sort): + # GH 17537 + index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], + labels=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, + 2, 3]]) + + # factorizing doesn't confuse things + s = Series(np.arange(8.), index=index) + result = s.groupby(level=0, sort=sort).sum() + expected = Series([6., 22.], index=[0, 1]) + assert_series_equal(result, expected) + + index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], + labels=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, + 1, 2, 3]]) + + # factorizing doesn't confuse things + s = Series(np.arange(8.), index=index) + result = s.groupby(level=0, sort=sort).sum() + expected = Series([6., 18.], index=[0.0, 1.0]) + assert_series_equal(result, expected) + + def test_groupby_args(self): + # PR8618 and issue 8015 + frame = self.mframe + + def j(): + frame.groupby() + + tm.assert_raises_regex(TypeError, "You have to supply one of " + "'by' and 'level'", j) + + def k(): + frame.groupby(by=None, level=None) + + tm.assert_raises_regex(TypeError, "You have to supply one of " + "'by' and 'level'", k) + + @pytest.mark.parametrize('sort,labels', [ + [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]], + [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]] + ]) + def test_level_preserve_order(self, sort, labels): + # GH 17537 + grouped = self.mframe.groupby(level=0, sort=sort) + exp_labels = np.array(labels, np.intp) + assert_almost_equal(grouped.grouper.labels[0], exp_labels) + + def test_grouping_labels(self): + grouped = self.mframe.groupby(self.mframe.index.get_level_values(0)) + exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) + assert_almost_equal(grouped.grouper.labels[0], exp_labels) + + +# get_group +# -------------------------------- + +class TestGetGroup(MixIn): + + def test_get_group(self): + with catch_warnings(record=True): + wp = tm.makePanel() + grouped = wp.groupby(lambda x: x.month, axis='major') + + gp = grouped.get_group(1) + expected = wp.reindex( + major=[x for x in wp.major_axis if x.month == 1]) + assert_panel_equal(gp, expected) + + # GH 5267 + # be datelike friendly + df = DataFrame({'DATE': pd.to_datetime( + ['10-Oct-2013', '10-Oct-2013', '10-Oct-2013', '11-Oct-2013', + '11-Oct-2013', '11-Oct-2013']), + 'label': ['foo', 'foo', 'bar', 'foo', 'foo', 'bar'], + 'VAL': [1, 2, 3, 4, 5, 6]}) + + g = df.groupby('DATE') + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group(Timestamp(key).to_pydatetime()) + result3 = g.get_group(str(Timestamp(key))) + assert_frame_equal(result1, result2) + assert_frame_equal(result1, result3) + + g = df.groupby(['DATE', 'label']) + + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group((Timestamp(key[0]).to_pydatetime(), key[1])) + result3 = g.get_group((str(Timestamp(key[0])), key[1])) + assert_frame_equal(result1, result2) + assert_frame_equal(result1, result3) + + # must pass a same-length tuple with multiple keys + pytest.raises(ValueError, lambda: g.get_group('foo')) + pytest.raises(ValueError, lambda: g.get_group(('foo'))) + pytest.raises(ValueError, + lambda: g.get_group(('foo', 'bar', 'baz'))) + + def test_get_group_empty_bins(self): + + d = pd.DataFrame([3, 1, 7, 6]) + bins = [0, 5, 10, 15] + g = d.groupby(pd.cut(d[0], bins)) + + # TODO: should prob allow a str of Interval work as well + # IOW '(0, 5]' + result = g.get_group(pd.Interval(0, 5)) + expected = DataFrame([3, 1], index=[0, 1]) + assert_frame_equal(result, expected) + + pytest.raises(KeyError, lambda: g.get_group(pd.Interval(10, 15))) + + def test_get_group_grouped_by_tuple(self): + # GH 8121 + df = DataFrame([[(1, ), (1, 2), (1, ), (1, 2)]], index=['ids']).T + gr = df.groupby('ids') + expected = DataFrame({'ids': [(1, ), (1, )]}, index=[0, 2]) + result = gr.get_group((1, )) + assert_frame_equal(result, expected) + + dt = pd.to_datetime(['2010-01-01', '2010-01-02', '2010-01-01', + '2010-01-02']) + df = DataFrame({'ids': [(x, ) for x in dt]}) + gr = df.groupby('ids') + result = gr.get_group(('2010-01-01', )) + expected = DataFrame({'ids': [(dt[0], ), (dt[0], )]}, index=[0, 2]) + assert_frame_equal(result, expected) + + def test_groupby_with_empty(self): + index = pd.DatetimeIndex(()) + data = () + series = pd.Series(data, index) + grouper = pd.Grouper(freq='D') + grouped = series.groupby(grouper) + assert next(iter(grouped), None) is None + + def test_groupby_with_single_column(self): + df = pd.DataFrame({'a': list('abssbab')}) + tm.assert_frame_equal(df.groupby('a').get_group('a'), df.iloc[[0, 5]]) + # GH 13530 + exp = pd.DataFrame([], index=pd.Index(['a', 'b', 's'], name='a')) + tm.assert_frame_equal(df.groupby('a').count(), exp) + tm.assert_frame_equal(df.groupby('a').sum(), exp) + tm.assert_frame_equal(df.groupby('a').nth(1), exp) + + def test_gb_key_len_equal_axis_len(self): + # GH16843 + # test ensures that index and column keys are recognized correctly + # when number of keys equals axis length of groupby + df = pd.DataFrame([['foo', 'bar', 'B', 1], + ['foo', 'bar', 'B', 2], + ['foo', 'baz', 'C', 3]], + columns=['first', 'second', 'third', 'one']) + df = df.set_index(['first', 'second']) + df = df.groupby(['first', 'second', 'third']).size() + assert df.loc[('foo', 'bar', 'B')] == 2 + assert df.loc[('foo', 'baz', 'C')] == 1 + + +# groups & iteration +# -------------------------------- + +class TestIteration(MixIn): + + def test_groups(self): + grouped = self.df.groupby(['A']) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in compat.iteritems(grouped.groups): + assert (self.df.loc[v]['A'] == k).all() + + grouped = self.df.groupby(['A', 'B']) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in compat.iteritems(grouped.groups): + assert (self.df.loc[v]['A'] == k[0]).all() + assert (self.df.loc[v]['B'] == k[1]).all() + + def test_grouping_is_iterable(self): + # this code path isn't used anywhere else + # not sure it's useful + grouped = self.tsframe.groupby([lambda x: x.weekday(), lambda x: x.year + ]) + + # test it works + for g in grouped.grouper.groupings[0]: + pass + + def test_multi_iter(self): + s = Series(np.arange(6)) + k1 = np.array(['a', 'a', 'a', 'b', 'b', 'b']) + k2 = np.array(['1', '2', '1', '2', '1', '2']) + + grouped = s.groupby([k1, k2]) + + iterated = list(grouped) + expected = [('a', '1', s[[0, 2]]), ('a', '2', s[[1]]), + ('b', '1', s[[4]]), ('b', '2', s[[3, 5]])] + for i, ((one, two), three) in enumerate(iterated): + e1, e2, e3 = expected[i] + assert e1 == one + assert e2 == two + assert_series_equal(three, e3) + + def test_multi_iter_frame(self): + k1 = np.array(['b', 'b', 'b', 'a', 'a', 'a']) + k2 = np.array(['1', '2', '1', '2', '1', '2']) + df = DataFrame({'v1': np.random.randn(6), + 'v2': np.random.randn(6), + 'k1': k1, 'k2': k2}, + index=['one', 'two', 'three', 'four', 'five', 'six']) + + grouped = df.groupby(['k1', 'k2']) + + # things get sorted! + iterated = list(grouped) + idx = df.index + expected = [('a', '1', df.loc[idx[[4]]]), + ('a', '2', df.loc[idx[[3, 5]]]), + ('b', '1', df.loc[idx[[0, 2]]]), + ('b', '2', df.loc[idx[[1]]])] + for i, ((one, two), three) in enumerate(iterated): + e1, e2, e3 = expected[i] + assert e1 == one + assert e2 == two + assert_frame_equal(three, e3) + + # don't iterate through groups with no data + df['k1'] = np.array(['b', 'b', 'b', 'a', 'a', 'a']) + df['k2'] = np.array(['1', '1', '1', '2', '2', '2']) + grouped = df.groupby(['k1', 'k2']) + groups = {} + for key, gp in grouped: + groups[key] = gp + assert len(groups) == 2 + + # axis = 1 + three_levels = self.three_group.groupby(['A', 'B', 'C']).mean() + grouped = three_levels.T.groupby(axis=1, level=(1, 2)) + for key, group in grouped: + pass + + def test_multi_iter_panel(self): + with catch_warnings(record=True): + wp = tm.makePanel() + grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()], + axis=1) + + for (month, wd), group in grouped: + exp_axis = [x + for x in wp.major_axis + if x.month == month and x.weekday() == wd] + expected = wp.reindex(major=exp_axis) + assert_panel_equal(group, expected) + + def test_dictify(self): + dict(iter(self.df.groupby('A'))) + dict(iter(self.df.groupby(['A', 'B']))) + dict(iter(self.df['C'].groupby(self.df['A']))) + dict(iter(self.df['C'].groupby([self.df['A'], self.df['B']]))) + dict(iter(self.df.groupby('A')['C'])) + dict(iter(self.df.groupby(['A', 'B'])['C'])) + + def test_groupby_with_small_elem(self): + # GH 8542 + # length=2 + df = pd.DataFrame({'event': ['start', 'start'], + 'change': [1234, 5678]}, + index=pd.DatetimeIndex(['2014-09-10', '2013-10-10'])) + grouped = df.groupby([pd.Grouper(freq='M'), 'event']) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups + assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups + + res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) + tm.assert_frame_equal(res, df.iloc[[0], :]) + res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start')) + tm.assert_frame_equal(res, df.iloc[[1], :]) + + df = pd.DataFrame({'event': ['start', 'start', 'start'], + 'change': [1234, 5678, 9123]}, + index=pd.DatetimeIndex(['2014-09-10', '2013-10-10', + '2014-09-15'])) + grouped = df.groupby([pd.Grouper(freq='M'), 'event']) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups + assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups + + res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) + tm.assert_frame_equal(res, df.iloc[[0, 2], :]) + res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start')) + tm.assert_frame_equal(res, df.iloc[[1], :]) + + # length=3 + df = pd.DataFrame({'event': ['start', 'start', 'start'], + 'change': [1234, 5678, 9123]}, + index=pd.DatetimeIndex(['2014-09-10', '2013-10-10', + '2014-08-05'])) + grouped = df.groupby([pd.Grouper(freq='M'), 'event']) + assert len(grouped.groups) == 3 + assert grouped.ngroups == 3 + assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups + assert (pd.Timestamp('2013-10-31'), 'start') in grouped.groups + assert (pd.Timestamp('2014-08-31'), 'start') in grouped.groups + + res = grouped.get_group((pd.Timestamp('2014-09-30'), 'start')) + tm.assert_frame_equal(res, df.iloc[[0], :]) + res = grouped.get_group((pd.Timestamp('2013-10-31'), 'start')) + tm.assert_frame_equal(res, df.iloc[[1], :]) + res = grouped.get_group((pd.Timestamp('2014-08-31'), 'start')) + tm.assert_frame_equal(res, df.iloc[[2], :]) + + def test_grouping_string_repr(self): + # GH 13394 + mi = MultiIndex.from_arrays([list("AAB"), list("aba")]) + df = DataFrame([[1, 2, 3]], columns=mi) + gr = df.groupby(df[('A', 'a')]) + + result = gr.grouper.groupings[0].__repr__() + expected = "Grouping(('A', 'a'))" + assert result == expected diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py new file mode 100644 index 0000000000000..9fe677664049e --- /dev/null +++ b/pandas/tests/groupby/test_index_as_string.py @@ -0,0 +1,116 @@ +import pytest +import pandas as pd +import numpy as np + +from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm + + +@pytest.fixture(params=[['inner'], ['inner', 'outer']]) +def frame(request): + levels = request.param + df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'], + 'inner': [1, 2, 3, 1, 2, 3], + 'A': np.arange(6), + 'B': ['one', 'one', 'two', 'two', 'one', 'one']}) + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture() +def series(): + df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'], + 'inner': [1, 2, 3, 1, 2, 3], + 'A': np.arange(6), + 'B': ['one', 'one', 'two', 'two', 'one', 'one']}) + s = df.set_index(['outer', 'inner', 'B'])['A'] + + return s + + +@pytest.mark.parametrize('key_strs,groupers', [ + ('inner', # Index name + pd.Grouper(level='inner') + ), + (['inner'], # List of index name + [pd.Grouper(level='inner')] + ), + (['B', 'inner'], # Column and index + ['B', pd.Grouper(level='inner')] + ), + (['inner', 'B'], # Index and column + [pd.Grouper(level='inner'), 'B'])]) +def test_grouper_index_level_as_string(frame, key_strs, groupers): + result = frame.groupby(key_strs).mean() + expected = frame.groupby(groupers).mean() + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('levels', [ + 'inner', 'outer', 'B', + ['inner'], ['outer'], ['B'], + ['inner', 'outer'], ['outer', 'inner'], + ['inner', 'outer', 'B'], ['B', 'outer', 'inner'] +]) +def test_grouper_index_level_as_string_series(series, levels): + + # Compute expected result + if isinstance(levels, list): + groupers = [pd.Grouper(level=lv) for lv in levels] + else: + groupers = pd.Grouper(level=levels) + + expected = series.groupby(groupers).mean() + + # Compute and check result + result = series.groupby(levels).mean() + assert_series_equal(result, expected) + + +@pytest.mark.parametrize('key_strs,key_groupers,level_groupers', [ + ('inner', # Index name + pd.Grouper(key='inner'), + pd.Grouper(level='inner'), + ), + (['inner'], # List of index name + [pd.Grouper(key='inner')], + [pd.Grouper(level='inner')] + ), + (['B', 'inner'], # Column and index + ['B', pd.Grouper(key='inner')], + ['B', pd.Grouper(level='inner')] + ), + (['inner', 'B'], # Index and column + [pd.Grouper(key='inner'), 'B'], + [pd.Grouper(level='inner'), 'B'])]) +def test_grouper_column_index_level_precedence(frame, + key_strs, + key_groupers, + level_groupers): + + # GH 5677, when a string passed as the `by` parameter + # matches a column and an index level the column takes + # precedence and a FutureWarning is raised + + # Add 'inner' column to frame + # (frame already has an 'inner' index) + frame['inner'] = [1, 1, 1, 1, 1, 1] + + # Performing a groupby with strings should produce warning + with tm.assert_produces_warning(FutureWarning): + result = frame.groupby(key_strs).mean() + + # Grouping with key Grouper should produce the same result and no warning + with tm.assert_produces_warning(False): + expected = frame.groupby(key_groupers).mean() + + assert_frame_equal(result, expected) + + # Grouping with level Grouper should produce a different result but + # still no warning + with tm.assert_produces_warning(False): + not_expected = frame.groupby(level_groupers).mean() + + assert not result.index.equals(not_expected.index) diff --git a/pandas/tests/groupby/test_misc.py b/pandas/tests/groupby/test_misc.py deleted file mode 100644 index 9395304385681..0000000000000 --- a/pandas/tests/groupby/test_misc.py +++ /dev/null @@ -1,101 +0,0 @@ -""" misc non-groupby routines, as they are defined in core/groupby.py """ - -import pytest -import numpy as np -from numpy import nan -from pandas.util import testing as tm -from pandas.core.groupby import _nargsort, _lexsort_indexer - - -class TestSorting(tm.TestCase): - - def test_lexsort_indexer(self): - keys = [[nan] * 5 + list(range(100)) + [nan] * 5] - # orders=True, na_position='last' - result = _lexsort_indexer(keys, orders=True, na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=True, na_position='first' - result = _lexsort_indexer(keys, orders=True, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='last' - result = _lexsort_indexer(keys, orders=False, na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='first' - result = _lexsort_indexer(keys, orders=False, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - def test_nargsort(self): - # np.argsort(items) places NaNs last - items = [nan] * 5 + list(range(100)) + [nan] * 5 - # np.argsort(items2) may not place NaNs first - items2 = np.array(items, dtype='O') - - try: - # GH 2785; due to a regression in NumPy1.6.2 - np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) - np.argsort(items2, kind='mergesort') - except TypeError: - pytest.skip('requested sort not available for type') - - # mergesort is the most difficult to get right because we want it to be - # stable. - - # According to numpy/core/tests/test_multiarray, """The number of - # sorted items must be greater than ~50 to check the actual algorithm - # because quick and merge sort fall over to insertion sort for small - # arrays.""" - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py new file mode 100644 index 0000000000000..ccde545b5b8e9 --- /dev/null +++ b/pandas/tests/groupby/test_nth.py @@ -0,0 +1,331 @@ +import numpy as np +import pandas as pd +from pandas import DataFrame, MultiIndex, Index, Series, isna +from pandas.compat import lrange +from pandas.util.testing import ( + assert_frame_equal, + assert_produces_warning, + assert_series_equal) + +from .common import MixIn + + +class TestNth(MixIn): + + def test_first_last_nth(self): + # tests for first / last / nth + grouped = self.df.groupby('A') + first = grouped.first() + expected = self.df.loc[[1, 0], ['B', 'C', 'D']] + expected.index = Index(['bar', 'foo'], name='A') + expected = expected.sort_index() + assert_frame_equal(first, expected) + + nth = grouped.nth(0) + assert_frame_equal(nth, expected) + + last = grouped.last() + expected = self.df.loc[[5, 7], ['B', 'C', 'D']] + expected.index = Index(['bar', 'foo'], name='A') + assert_frame_equal(last, expected) + + nth = grouped.nth(-1) + assert_frame_equal(nth, expected) + + nth = grouped.nth(1) + expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy() + expected.index = Index(['foo', 'bar'], name='A') + expected = expected.sort_index() + assert_frame_equal(nth, expected) + + # it works! + grouped['B'].first() + grouped['B'].last() + grouped['B'].nth(0) + + self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan + assert isna(grouped['B'].first()['foo']) + assert isna(grouped['B'].last()['foo']) + assert isna(grouped['B'].nth(0)['foo']) + + # v0.14.0 whatsnew + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + result = g.first() + expected = df.iloc[[1, 2]].set_index('A') + assert_frame_equal(result, expected) + + expected = df.iloc[[1, 2]].set_index('A') + result = g.nth(0, dropna='any') + assert_frame_equal(result, expected) + + def test_first_last_nth_dtypes(self): + + df = self.df_mixed_floats.copy() + df['E'] = True + df['F'] = 1 + + # tests for first / last / nth + grouped = df.groupby('A') + first = grouped.first() + expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']] + expected.index = Index(['bar', 'foo'], name='A') + expected = expected.sort_index() + assert_frame_equal(first, expected) + + last = grouped.last() + expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']] + expected.index = Index(['bar', 'foo'], name='A') + expected = expected.sort_index() + assert_frame_equal(last, expected) + + nth = grouped.nth(1) + expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']] + expected.index = Index(['bar', 'foo'], name='A') + expected = expected.sort_index() + assert_frame_equal(nth, expected) + + # GH 2763, first/last shifting dtypes + idx = lrange(10) + idx.append(9) + s = Series(data=lrange(11), index=idx, name='IntCol') + assert s.dtype == 'int64' + f = s.groupby(level=0).first() + assert f.dtype == 'int64' + + def test_nth(self): + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + + assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A')) + assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A')) + assert_frame_equal(g.nth(2), df.loc[[]].set_index('A')) + assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A')) + assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A')) + assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A')) + assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]]) + assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]]) + assert_frame_equal(g[['B']].nth(0), + df.loc[[0, 2], ['A', 'B']].set_index('A')) + + exp = df.set_index('A') + assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]]) + assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]]) + + exp['B'] = np.nan + assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]]) + assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]]) + + # out of bounds, regression from 0.13.1 + # GH 6621 + df = DataFrame({'color': {0: 'green', + 1: 'green', + 2: 'red', + 3: 'red', + 4: 'red'}, + 'food': {0: 'ham', + 1: 'eggs', + 2: 'eggs', + 3: 'ham', + 4: 'pork'}, + 'two': {0: 1.5456590000000001, + 1: -0.070345000000000005, + 2: -2.4004539999999999, + 3: 0.46206000000000003, + 4: 0.52350799999999997}, + 'one': {0: 0.56573799999999996, + 1: -0.9742360000000001, + 2: 1.033801, + 3: -0.78543499999999999, + 4: 0.70422799999999997}}).set_index(['color', + 'food']) + + result = df.groupby(level=0, as_index=False).nth(2) + expected = df.iloc[[-1]] + assert_frame_equal(result, expected) + + result = df.groupby(level=0, as_index=False).nth(3) + expected = df.loc[[]] + assert_frame_equal(result, expected) + + # GH 7559 + # from the vbench + df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64') + s = df[1] + g = df[0] + expected = s.groupby(g).first() + expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) + assert_series_equal(expected2, expected, check_names=False) + assert expected.name == 1 + assert expected2.name == 1 + + # validate first + v = s[g == 1].iloc[0] + assert expected.iloc[0] == v + assert expected2.iloc[0] == v + + # this is NOT the same as .first (as sorted is default!) + # as it keeps the order in the series (and not the group order) + # related GH 7287 + expected = s.groupby(g, sort=False).first() + result = s.groupby(g, sort=False).nth(0, dropna='all') + assert_series_equal(result, expected) + + # doc example + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + # PR 17493, related to issue 11038 + # test Series.nth with True for dropna produces FutureWarning + with assert_produces_warning(FutureWarning): + result = g.B.nth(0, dropna=True) + expected = g.B.first() + assert_series_equal(result, expected) + + # test multiple nth values + df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], + columns=['A', 'B']) + g = df.groupby('A') + + assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A')) + assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A')) + assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) + assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A')) + assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A')) + + business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', + freq='B') + df = DataFrame(1, index=business_dates, columns=['a', 'b']) + # get the first, fourth and last two business days for each month + key = [df.index.year, df.index.month] + result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) + expected_dates = pd.to_datetime( + ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', + '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5', + '2014/6/27', '2014/6/30']) + expected = DataFrame(1, columns=['a', 'b'], index=expected_dates) + assert_frame_equal(result, expected) + + def test_nth_multi_index(self): + # PR 9090, related to issue 8979 + # test nth on MultiIndex, should match .first() + grouped = self.three_group.groupby(['A', 'B']) + result = grouped.nth(0) + expected = grouped.first() + assert_frame_equal(result, expected) + + def test_nth_multi_index_as_expected(self): + # PR 9090, related to issue 8979 + # test nth on MultiIndex + three_group = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny']}) + grouped = three_group.groupby(['A', 'B']) + result = grouped.nth(0) + expected = DataFrame( + {'C': ['dull', 'dull', 'dull', 'dull']}, + index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], + ['one', 'two', 'one', 'two']], + names=['A', 'B'])) + assert_frame_equal(result, expected) + + def test_groupby_head_tail(self): + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) + g_as = df.groupby('A', as_index=True) + g_not_as = df.groupby('A', as_index=False) + + # as_index= False, much easier + assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1)) + assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) + + empty_not_as = DataFrame(columns=df.columns, + index=pd.Index([], dtype=df.index.dtype)) + empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype) + empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype) + assert_frame_equal(empty_not_as, g_not_as.head(0)) + assert_frame_equal(empty_not_as, g_not_as.tail(0)) + assert_frame_equal(empty_not_as, g_not_as.head(-1)) + assert_frame_equal(empty_not_as, g_not_as.tail(-1)) + + assert_frame_equal(df, g_not_as.head(7)) # contains all + assert_frame_equal(df, g_not_as.tail(7)) + + # as_index=True, (used to be different) + df_as = df + + assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1)) + assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1)) + + empty_as = DataFrame(index=df_as.index[:0], columns=df.columns) + empty_as['A'] = empty_not_as['A'].astype(df.A.dtype) + empty_as['B'] = empty_not_as['B'].astype(df.B.dtype) + assert_frame_equal(empty_as, g_as.head(0)) + assert_frame_equal(empty_as, g_as.tail(0)) + assert_frame_equal(empty_as, g_as.head(-1)) + assert_frame_equal(empty_as, g_as.tail(-1)) + + assert_frame_equal(df_as, g_as.head(7)) # contains all + assert_frame_equal(df_as, g_as.tail(7)) + + # test with selection + assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []]) + assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0, 2], ['A']]) + assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0, 2], ['B']]) + assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0, 2]]) + + assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []]) + assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0, 2], ['A']]) + assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0, 2], ['B']]) + assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0, 2]]) + + def test_group_selection_cache(self): + # GH 12839 nth, head, and tail should return same result consistently + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) + expected = df.iloc[[0, 2]].set_index('A') + + g = df.groupby('A') + result1 = g.head(n=2) + result2 = g.nth(0) + assert_frame_equal(result1, df) + assert_frame_equal(result2, expected) + + g = df.groupby('A') + result1 = g.tail(n=2) + result2 = g.nth(0) + assert_frame_equal(result1, df) + assert_frame_equal(result2, expected) + + g = df.groupby('A') + result1 = g.nth(0) + result2 = g.head(n=2) + assert_frame_equal(result1, expected) + assert_frame_equal(result2, df) + + g = df.groupby('A') + result1 = g.nth(0) + result2 = g.tail(n=2) + assert_frame_equal(result1, expected) + assert_frame_equal(result2, df) + + +def test_nth_empty(): + # GH 16064 + df = DataFrame(index=[0], columns=['a', 'b', 'c']) + result = df.groupby('a').nth(10) + expected = DataFrame(index=Index([], name='a'), columns=['b', 'c']) + assert_frame_equal(result, expected) + + result = df.groupby(['a', 'b']).nth(10) + expected = DataFrame(index=MultiIndex([[], []], [[], []], + names=['a', 'b']), + columns=['c']) + assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 3142b74b56778..d359bfa5351a9 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -1,17 +1,21 @@ """ test with the TimeGrouper / grouping with datetimes """ +import pytest +import pytz + from datetime import datetime import numpy as np from numpy import nan import pandas as pd -from pandas import DataFrame, date_range, Index, Series, MultiIndex, Timestamp +from pandas import (DataFrame, date_range, Index, + Series, MultiIndex, Timestamp, DatetimeIndex) from pandas.compat import StringIO from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestGroupBy(tm.TestCase): +class TestGroupBy(object): def test_groupby_with_timegrouper(self): # GH 4161 @@ -37,21 +41,20 @@ def test_groupby_with_timegrouper(self): df = df.set_index(['Date']) expected = DataFrame( - {'Quantity': np.nan}, + {'Quantity': 0}, index=date_range('20130901 13:00:00', '20131205 13:00:00', freq='5D', name='Date', closed='left')) - expected.iloc[[0, 6, 18], 0] = np.array( - [24., 6., 9.], dtype='float64') + expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype='int64') result1 = df.resample('5D') .sum() assert_frame_equal(result1, expected) df_sorted = df.sort_index() - result2 = df_sorted.groupby(pd.TimeGrouper(freq='5D')).sum() + result2 = df_sorted.groupby(pd.Grouper(freq='5D')).sum() assert_frame_equal(result2, expected) - result3 = df.groupby(pd.TimeGrouper(freq='5D')).sum() + result3 = df.groupby(pd.Grouper(freq='5D')).sum() assert_frame_equal(result3, expected) def test_groupby_with_timegrouper_methods(self): @@ -76,12 +79,12 @@ def test_groupby_with_timegrouper_methods(self): for df in [df_original, df_sorted]: df = df.set_index('Date', drop=False) - g = df.groupby(pd.TimeGrouper('6M')) - self.assertTrue(g.group_keys) - self.assertTrue(isinstance(g.grouper, pd.core.groupby.BinGrouper)) + g = df.groupby(pd.Grouper(freq='6M')) + assert g.group_keys + assert isinstance(g.grouper, pd.core.groupby.BinGrouper) groups = g.groups - self.assertTrue(isinstance(groups, dict)) - self.assertTrue(len(groups) == 3) + assert isinstance(groups, dict) + assert len(groups) == 3 def test_timegrouper_with_reg_groups(self): @@ -185,7 +188,7 @@ def test_timegrouper_with_reg_groups(self): ]).sum() assert_frame_equal(result, expected) - with self.assertRaises(KeyError): + with pytest.raises(KeyError): df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum() # passing the level @@ -197,7 +200,7 @@ def test_timegrouper_with_reg_groups(self): ) assert_frame_equal(result, expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.groupby([pd.Grouper(freq='1M', level='foo'), 'Buyer']).sum() @@ -218,7 +221,7 @@ def test_timegrouper_with_reg_groups(self): assert_frame_equal(result, expected) # error as we have both a level and a name! - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.groupby([pd.Grouper(freq='1M', key='Date', level='Date'), 'Buyer']).sum() @@ -241,6 +244,8 @@ def test_timegrouper_with_reg_groups(self): result = df.groupby([pd.Grouper(freq='1M', key='Date')]).sum() assert_frame_equal(result, expected) + @pytest.mark.parametrize('freq', ['D', 'M', 'A', 'Q-APR']) + def test_timegrouper_with_reg_groups_freq(self, freq): # GH 6764 multiple grouping with/without sort df = DataFrame({ 'date': pd.to_datetime([ @@ -254,20 +259,24 @@ def test_timegrouper_with_reg_groups(self): 'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12] }).set_index('date') - for freq in ['D', 'M', 'A', 'Q-APR']: - expected = df.groupby('user_id')[ - 'whole_cost'].resample( - freq).sum().dropna().reorder_levels( - ['date', 'user_id']).sort_index().astype('int64') - expected.name = 'whole_cost' - - result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq), - 'user_id'])['whole_cost'].sum() - assert_series_equal(result1, expected) - - result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[ - 'whole_cost'].sum() - assert_series_equal(result2, expected) + expected = ( + df.groupby('user_id')['whole_cost'] + .resample(freq) + .sum(min_count=1) # XXX + .dropna() + .reorder_levels(['date', 'user_id']) + .sort_index() + .astype('int64') + ) + expected.name = 'whole_cost' + + result1 = df.sort_index().groupby([pd.Grouper(freq=freq), + 'user_id'])['whole_cost'].sum() + assert_series_equal(result1, expected) + + result2 = df.groupby([pd.Grouper(freq=freq), 'user_id'])[ + 'whole_cost'].sum() + assert_series_equal(result2, expected) def test_timegrouper_get_group(self): # GH 6914 @@ -336,7 +345,7 @@ def sumfunc_series(x): return pd.Series([x['value'].sum()], ('sum',)) expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series) - result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + result = (df_dt.groupby(pd.Grouper(freq='M', key='date')) .apply(sumfunc_series)) assert_frame_equal(result.reset_index(drop=True), expected.reset_index(drop=True)) @@ -354,14 +363,15 @@ def sumfunc_value(x): return x.value.sum() expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value) - result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) - .apply(sumfunc_value)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + .apply(sumfunc_value)) assert_series_equal(result.reset_index(drop=True), expected.reset_index(drop=True)) def test_groupby_groups_datetimeindex(self): # #1430 - from pandas.tseries.api import DatetimeIndex periods = 1000 ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) df = DataFrame({'high': np.arange(periods), @@ -370,7 +380,7 @@ def test_groupby_groups_datetimeindex(self): # it works! groups = grouped.groups - tm.assertIsInstance(list(groups.keys())[0], datetime) + assert isinstance(list(groups.keys())[0], datetime) # GH 11442 index = pd.date_range('2015/01/01', periods=5, name='date') @@ -442,7 +452,7 @@ def test_frame_datetime64_handling_groupby(self): (3, np.datetime64('2012-07-04'))], columns=['a', 'date']) result = df.groupby('a').first() - self.assertEqual(result['date'][3], Timestamp('2012-07-03')) + assert result['date'][3] == Timestamp('2012-07-03') def test_groupby_multi_timezone(self): @@ -526,15 +536,15 @@ def test_groupby_first_datetime64(self): df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) df[1] = df[1].view('M8[ns]') - self.assertTrue(issubclass(df[1].dtype.type, np.datetime64)) + assert issubclass(df[1].dtype.type, np.datetime64) result = df.groupby(level=0).first() got_dt = result[1].dtype - self.assertTrue(issubclass(got_dt.type, np.datetime64)) + assert issubclass(got_dt.type, np.datetime64) result = df[1].groupby(level=0).first() got_dt = result.dtype - self.assertTrue(issubclass(got_dt.type, np.datetime64)) + assert issubclass(got_dt.type, np.datetime64) def test_groupby_max_datetime64(self): # GH 5869 @@ -567,16 +577,14 @@ def test_groupby_with_timezone_selection(self): tm.assert_series_equal(df1, df2) def test_timezone_info(self): - # GH 11682 - # Timezone info lost when broadcasting scalar datetime to DataFrame - tm._skip_if_no_pytz() - import pytz + # see gh-11682: Timezone info lost when broadcasting + # scalar datetime to DataFrame df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]}) - self.assertEqual(df['b'][0].tzinfo, pytz.utc) + assert df['b'][0].tzinfo == pytz.utc df = pd.DataFrame({'a': [1, 2, 3]}) df['b'] = datetime.now(pytz.utc) - self.assertEqual(df['b'][0].tzinfo, pytz.utc) + assert df['b'][0].tzinfo == pytz.utc def test_datetime_count(self): df = DataFrame({'a': [1, 2, 3] * 2, @@ -598,7 +606,7 @@ def test_first_last_max_min_on_time_data(self): 'td': [nan, td(days=1), td(days=2), td(days=3), nan]}) df_test.dt = pd.to_datetime(df_test.dt) df_test['group'] = 'A' - df_ref = df_test[df_test.dt.notnull()] + df_ref = df_test[df_test.dt.notna()] grouped_test = df_test.groupby('group') grouped_ref = df_ref.groupby('group') @@ -607,3 +615,35 @@ def test_first_last_max_min_on_time_data(self): assert_frame_equal(grouped_ref.min(), grouped_test.min()) assert_frame_equal(grouped_ref.first(), grouped_test.first()) assert_frame_equal(grouped_ref.last(), grouped_test.last()) + + def test_nunique_with_timegrouper_and_nat(self): + # GH 17575 + test = pd.DataFrame({ + 'time': [Timestamp('2016-06-28 09:35:35'), + pd.NaT, + Timestamp('2016-06-28 16:46:28')], + 'data': ['1', '2', '3']}) + + grouper = pd.Grouper(key='time', freq='h') + result = test.groupby(grouper)['data'].nunique() + expected = test[test.time.notnull()].groupby(grouper)['data'].nunique() + tm.assert_series_equal(result, expected) + + def test_scalar_call_versus_list_call(self): + # Issue: 17530 + data_frame = { + 'location': ['shanghai', 'beijing', 'shanghai'], + 'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15', + '2017-08-11 22:23:15'], + dtype='datetime64[ns]'), + 'value': [1, 2, 3] + } + data_frame = pd.DataFrame(data_frame).set_index('time') + grouper = pd.Grouper(freq='D') + + grouped = data_frame.groupby(grouper) + result = grouped.count() + grouped = data_frame.groupby([grouper]) + expected = grouped.count() + + assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index cf5e9eb26ff13..bce38b8cf9eed 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1,10 +1,15 @@ """ test with the .transform """ +import pytest + import numpy as np import pandas as pd from pandas.util import testing as tm -from pandas import Series, DataFrame, Timestamp, MultiIndex, concat -from pandas.types.common import _ensure_platform_int +from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range +from pandas.core.dtypes.common import ( + _ensure_platform_int, is_timedelta64_dtype) +from pandas.compat import StringIO +from pandas._libs import groupby from .common import MixIn, assert_fp_equal from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -12,7 +17,7 @@ from pandas.core.config import option_context -class TestGroupBy(MixIn, tm.TestCase): +class TestGroupBy(MixIn): def test_transform(self): data = Series(np.arange(9) // 3, index=np.arange(9)) @@ -24,7 +29,7 @@ def test_transform(self): grouped = data.groupby(lambda x: x // 3) transformed = grouped.transform(lambda x: x * x.sum()) - self.assertEqual(transformed[7], 12) + assert transformed[7] == 12 # GH 8046 # make sure that we preserve the input order @@ -52,7 +57,7 @@ def demean(arr): # GH 8430 df = tm.makeTimeDataFrame() - g = df.groupby(pd.TimeGrouper('M')) + g = df.groupby(pd.Grouper(freq='M')) g.transform(lambda x: x - 1) # GH 9700 @@ -108,13 +113,13 @@ def test_transform_broadcast(self): grouped = self.ts.groupby(lambda x: x.month) result = grouped.transform(np.mean) - self.assert_index_equal(result.index, self.ts.index) + tm.assert_index_equal(result.index, self.ts.index) for _, gp in grouped: assert_fp_equal(result.reindex(gp.index), gp.mean()) grouped = self.tsframe.groupby(lambda x: x.month) result = grouped.transform(np.mean) - self.assert_index_equal(result.index, self.tsframe.index) + tm.assert_index_equal(result.index, self.tsframe.index) for _, gp in grouped: agged = gp.mean() res = result.reindex(gp.index) @@ -125,8 +130,8 @@ def test_transform_broadcast(self): grouped = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, axis=1) result = grouped.transform(np.mean) - self.assert_index_equal(result.index, self.tsframe.index) - self.assert_index_equal(result.columns, self.tsframe.columns) + tm.assert_index_equal(result.index, self.tsframe.index) + tm.assert_index_equal(result.columns, self.tsframe.columns) for _, gp in grouped: agged = gp.mean(1) res = result.reindex(columns=gp.columns) @@ -190,6 +195,82 @@ def test_transform_bug(self): expected = Series(np.arange(5, 0, step=-1), name='B') assert_series_equal(result, expected) + def test_transform_numeric_to_boolean(self): + # GH 16875 + # inconsistency in transforming boolean values + expected = pd.Series([True, True], name='A') + + df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]}) + result = df.groupby('B').A.transform(lambda x: True) + assert_series_equal(result, expected) + + df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]}) + result = df.groupby('B').A.transform(lambda x: True) + assert_series_equal(result, expected) + + def test_transform_datetime_to_timedelta(self): + # GH 15429 + # transforming a datetime to timedelta + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) + expected = pd.Series([ + Timestamp('20130101') - Timestamp('20130101')] * 5, name='A') + + # this does date math without changing result type in transform + base_time = df['A'][0] + result = df.groupby('A')['A'].transform( + lambda x: x.max() - x.min() + base_time) - base_time + assert_series_equal(result, expected) + + # this does date math and causes the transform to return timedelta + result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min()) + assert_series_equal(result, expected) + + def test_transform_datetime_to_numeric(self): + # GH 10972 + # convert dt to float + df = DataFrame({ + 'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')}) + result = df.groupby('a').b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.mean()) + + expected = Series([-0.5, 0.5], name='b') + assert_series_equal(result, expected) + + # convert dt to int + df = DataFrame({ + 'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')}) + result = df.groupby('a').b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.min()) + + expected = Series([0, 1], name='b') + assert_series_equal(result, expected) + + def test_transform_casting(self): + # 13046 + data = """ + idx A ID3 DATETIME + 0 B-028 b76cd912ff "2014-10-08 13:43:27" + 1 B-054 4a57ed0b02 "2014-10-08 14:26:19" + 2 B-076 1a682034f8 "2014-10-08 14:29:01" + 3 B-023 b76cd912ff "2014-10-08 18:39:34" + 4 B-023 f88g8d7sds "2014-10-08 18:40:18" + 5 B-033 b76cd912ff "2014-10-08 18:44:30" + 6 B-032 b76cd912ff "2014-10-08 18:46:00" + 7 B-037 b76cd912ff "2014-10-08 18:52:15" + 8 B-046 db959faf02 "2014-10-08 18:59:59" + 9 B-053 b76cd912ff "2014-10-08 19:17:48" + 10 B-065 b76cd912ff "2014-10-08 19:21:38" + """ + df = pd.read_csv(StringIO(data), sep=r'\s+', + index_col=[0], parse_dates=['DATETIME']) + + result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.dtype) + + result = df[['ID3', 'DATETIME']].groupby('ID3').transform( + lambda x: x.diff()) + assert is_timedelta64_dtype(result.DATETIME.dtype) + def test_transform_multiple(self): grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) @@ -340,7 +421,7 @@ def f(group): grouped = df.groupby('c') result = grouped.apply(f) - self.assertEqual(result['d'].dtype, np.float64) + assert result['d'].dtype == np.float64 # this is by definition a mutating operation! with option_context('mode.chained_assignment', None): @@ -353,8 +434,8 @@ def test_cython_group_transform_algos(self): dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, np.uint64, np.float32, np.float64] - ops = [(pd.algos.group_cumprod_float64, np.cumproduct, [np.float64]), - (pd.algos.group_cumsum, np.cumsum, dtypes)] + ops = [(groupby.group_cumprod_float64, np.cumproduct, [np.float64]), + (groupby.group_cumsum, np.cumsum, dtypes)] is_datetimelike = False for pd_op, np_op, dtypes in ops: @@ -363,8 +444,8 @@ def test_cython_group_transform_algos(self): ans = np.zeros_like(data) labels = np.array([0, 0, 0, 0], dtype=np.int64) pd_op(ans, data, labels, is_datetimelike) - self.assert_numpy_array_equal(np_op(data), ans[:, 0], - check_dtype=False) + tm.assert_numpy_array_equal(np_op(data), ans[:, 0], + check_dtype=False) # with nans labels = np.array([0, 0, 0, 0, 0], dtype=np.int64) @@ -372,53 +453,87 @@ def test_cython_group_transform_algos(self): data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') actual = np.zeros_like(data) actual.fill(np.nan) - pd.algos.group_cumprod_float64(actual, data, labels, is_datetimelike) + groupby.group_cumprod_float64(actual, data, labels, is_datetimelike) expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') - self.assert_numpy_array_equal(actual[:, 0], expected) + tm.assert_numpy_array_equal(actual[:, 0], expected) actual = np.zeros_like(data) actual.fill(np.nan) - pd.algos.group_cumsum(actual, data, labels, is_datetimelike) + groupby.group_cumsum(actual, data, labels, is_datetimelike) expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') - self.assert_numpy_array_equal(actual[:, 0], expected) + tm.assert_numpy_array_equal(actual[:, 0], expected) # timedelta is_datetimelike = True data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] actual = np.zeros_like(data, dtype='int64') - pd.algos.group_cumsum(actual, data.view('int64'), labels, - is_datetimelike) + groupby.group_cumsum(actual, data.view('int64'), labels, + is_datetimelike) expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), np.timedelta64(5, 'ns')]) - self.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected) - - def test_cython_transform(self): + tm.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected) + + @pytest.mark.parametrize( + "op, args, targop", + [('cumprod', (), lambda x: x.cumprod()), + ('cumsum', (), lambda x: x.cumsum()), + ('shift', (-1, ), lambda x: x.shift(-1)), + ('shift', (1, ), lambda x: x.shift())]) + def test_cython_transform_series(self, op, args, targop): # GH 4095 - ops = [(('cumprod', - ()), lambda x: x.cumprod()), (('cumsum', ()), - lambda x: x.cumsum()), - (('shift', (-1, )), - lambda x: x.shift(-1)), (('shift', - (1, )), lambda x: x.shift())] - s = Series(np.random.randn(1000)) s_missing = s.copy() s_missing.iloc[2:10] = np.nan labels = np.random.randint(0, 50, size=1000).astype(float) # series - for (op, args), targop in ops: - for data in [s, s_missing]: - # print(data.head()) - expected = data.groupby(labels).transform(targop) - - tm.assert_series_equal(expected, - data.groupby(labels).transform(op, - *args)) - tm.assert_series_equal(expected, getattr( - data.groupby(labels), op)(*args)) - + for data in [s, s_missing]: + # print(data.head()) + expected = data.groupby(labels).transform(targop) + + tm.assert_series_equal( + expected, + data.groupby(labels).transform(op, *args)) + tm.assert_series_equal(expected, getattr( + data.groupby(labels), op)(*args)) + + @pytest.mark.parametrize("op", ['cumprod', 'cumsum']) + @pytest.mark.parametrize("skipna", [False, True]) + @pytest.mark.parametrize('input, exp', [ + # When everything is NaN + ({'key': ['b'] * 10, 'value': np.nan}, + pd.Series([np.nan] * 10, name='value')), + # When there is a single NaN + ({'key': ['b'] * 10 + ['a'] * 2, + 'value': [3] * 3 + [np.nan] + [3] * 8}, + {('cumprod', False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0], + ('cumprod', True): [3.0, 9.0, 27.0, np.nan, 81., 243., 729., + 2187., 6561., 19683., 3.0, 9.0], + ('cumsum', False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0], + ('cumsum', True): [3.0, 6.0, 9.0, np.nan, 12., 15., 18., + 21., 24., 27., 3.0, 6.0]})]) + def test_groupby_cum_skipna(self, op, skipna, input, exp): + df = pd.DataFrame(input) + result = df.groupby('key')['value'].transform(op, skipna=skipna) + if isinstance(exp, dict): + expected = exp[(op, skipna)] + else: + expected = exp + expected = pd.Series(expected, name='value') + tm.assert_series_equal(expected, result) + + @pytest.mark.parametrize( + "op, args, targop", + [('cumprod', (), lambda x: x.cumprod()), + ('cumsum', (), lambda x: x.cumsum()), + ('shift', (-1, ), lambda x: x.shift(-1)), + ('shift', (1, ), lambda x: x.shift())]) + def test_cython_transform_frame(self, op, args, targop): + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) strings = list('qwertyuiopasdfghjklz') strings_missing = strings[:] strings_missing[5] = np.nan @@ -429,7 +544,9 @@ def test_cython_transform(self): 'timedelta': pd.timedelta_range(1, freq='s', periods=1000), 'string': strings * 50, - 'string_missing': strings_missing * 50}) + 'string_missing': strings_missing * 50}, + columns=['float', 'float_missing', 'int', 'datetime', + 'timedelta', 'string', 'string_missing']) df['cat'] = df['string'].astype('category') df2 = df.copy() @@ -449,34 +566,35 @@ def test_cython_transform(self): if op == 'shift': gb._set_group_selection() - for (op, args), targop in ops: - if op != 'shift' and 'int' not in gb_target: - # numeric apply fastpath promotes dtype so have - # to apply seperately and concat - i = gb[['int']].apply(targop) - f = gb[['float', 'float_missing']].apply(targop) - expected = pd.concat([f, i], axis=1) + if op != 'shift' and 'int' not in gb_target: + # numeric apply fastpath promotes dtype so have + # to apply separately and concat + i = gb[['int']].apply(targop) + f = gb[['float', 'float_missing']].apply(targop) + expected = pd.concat([f, i], axis=1) + else: + expected = gb.apply(targop) + + expected = expected.sort_index(axis=1) + tm.assert_frame_equal(expected, + gb.transform(op, *args).sort_index( + axis=1)) + tm.assert_frame_equal( + expected, + getattr(gb, op)(*args).sort_index(axis=1)) + # individual columns + for c in df: + if c not in ['float', 'int', 'float_missing' + ] and op != 'shift': + pytest.raises(DataError, gb[c].transform, op) + pytest.raises(DataError, getattr(gb[c], op)) else: - expected = gb.apply(targop) - - expected = expected.sort_index(axis=1) - tm.assert_frame_equal(expected, - gb.transform(op, *args).sort_index( - axis=1)) - tm.assert_frame_equal(expected, getattr(gb, op)(*args)) - # individual columns - for c in df: - if c not in ['float', 'int', 'float_missing' - ] and op != 'shift': - self.assertRaises(DataError, gb[c].transform, op) - self.assertRaises(DataError, getattr(gb[c], op)) - else: - expected = gb[c].apply(targop) - expected.name = c - tm.assert_series_equal(expected, - gb[c].transform(op, *args)) - tm.assert_series_equal(expected, - getattr(gb[c], op)(*args)) + expected = gb[c].apply(targop) + expected.name = c + tm.assert_series_equal(expected, + gb[c].transform(op, *args)) + tm.assert_series_equal(expected, + getattr(gb[c], op)(*args)) def test_transform_with_non_scalar_group(self): # GH 10165 @@ -488,7 +606,120 @@ def test_transform_with_non_scalar_group(self): df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), columns=cols, index=['A', 'C', 'G', 'T']) - self.assertRaisesRegexp(ValueError, 'transform must return a scalar ' - 'value for each group.*', df.groupby - (axis=1, level=1).transform, - lambda z: z.div(z.sum(axis=1), axis=0)) + tm.assert_raises_regex(ValueError, 'transform must return ' + 'a scalar value for each ' + 'group.*', + df.groupby(axis=1, level=1).transform, + lambda z: z.div(z.sum(axis=1), axis=0)) + + @pytest.mark.parametrize('cols,exp,comp_func', [ + ('a', pd.Series([1, 1, 1], name='a'), tm.assert_series_equal), + (['a', 'c'], pd.DataFrame({'a': [1, 1, 1], 'c': [1, 1, 1]}), + tm.assert_frame_equal) + ]) + @pytest.mark.parametrize('agg_func', [ + 'count', 'rank', 'size']) + def test_transform_numeric_ret(self, cols, exp, comp_func, agg_func): + if agg_func == 'size' and isinstance(cols, list): + pytest.xfail("'size' transformation not supported with " + "NDFrameGroupy") + + # GH 19200 + df = pd.DataFrame( + {'a': pd.date_range('2018-01-01', periods=3), + 'b': range(3), + 'c': range(7, 10)}) + + result = df.groupby('b')[cols].transform(agg_func) + + if agg_func == 'rank': + exp = exp.astype('float') + + comp_func(result, exp) + + @pytest.mark.parametrize("mix_groupings", [True, False]) + @pytest.mark.parametrize("as_series", [True, False]) + @pytest.mark.parametrize("val1,val2", [ + ('foo', 'bar'), (1, 2), (1., 2.)]) + @pytest.mark.parametrize("fill_method,limit,exp_vals", [ + ("ffill", None, + [np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']), + ("ffill", 1, + [np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]), + ("bfill", None, + ['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]), + ("bfill", 1, + [np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan]) + ]) + def test_group_fill_methods(self, mix_groupings, as_series, val1, val2, + fill_method, limit, exp_vals): + vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan] + _exp_vals = list(exp_vals) + # Overwrite placeholder values + for index, exp_val in enumerate(_exp_vals): + if exp_val == 'val1': + _exp_vals[index] = val1 + elif exp_val == 'val2': + _exp_vals[index] = val2 + + # Need to modify values and expectations depending on the + # Series / DataFrame that we ultimately want to generate + if mix_groupings: # ['a', 'b', 'a, 'b', ...] + keys = ['a', 'b'] * len(vals) + + def interweave(list_obj): + temp = list() + for x in list_obj: + temp.extend([x, x]) + + return temp + + _exp_vals = interweave(_exp_vals) + vals = interweave(vals) + else: # ['a', 'a', 'a', ... 'b', 'b', 'b'] + keys = ['a'] * len(vals) + ['b'] * len(vals) + _exp_vals = _exp_vals * 2 + vals = vals * 2 + + df = DataFrame({'key': keys, 'val': vals}) + if as_series: + result = getattr( + df.groupby('key')['val'], fill_method)(limit=limit) + exp = Series(_exp_vals, name='val') + assert_series_equal(result, exp) + else: + result = getattr(df.groupby('key'), fill_method)(limit=limit) + exp = DataFrame({'key': keys, 'val': _exp_vals}) + assert_frame_equal(result, exp) + + @pytest.mark.parametrize("test_series", [True, False]) + @pytest.mark.parametrize("periods,fill_method,limit", [ + (1, 'ffill', None), (1, 'ffill', 1), + (1, 'bfill', None), (1, 'bfill', 1), + (-1, 'ffill', None), (-1, 'ffill', 1), + (-1, 'bfill', None), (-1, 'bfill', 1)]) + def test_pct_change(self, test_series, periods, fill_method, limit): + vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] + exp_vals = Series(vals).pct_change(periods=periods, + fill_method=fill_method, + limit=limit).tolist() + + df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals), + 'vals': vals * 2}) + grp = df.groupby('key') + + def get_result(grp_obj): + return grp_obj.pct_change(periods=periods, + fill_method=fill_method, + limit=limit) + + if test_series: + exp = pd.Series(exp_vals * 2) + exp.name = 'vals' + grp = grp['vals'] + result = get_result(grp) + tm.assert_series_equal(result, exp) + else: + exp = DataFrame({'vals': exp_vals * 2}) + result = get_result(grp) + tm.assert_frame_equal(result, exp) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py new file mode 100644 index 0000000000000..1434656115d18 --- /dev/null +++ b/pandas/tests/groupby/test_value_counts.py @@ -0,0 +1,76 @@ +""" +these are systematically testing all of the args to value_counts +with different size combinations. This is to ensure stability of the sorting +and proper parameter handling +""" + +import pytest + +from itertools import product +import numpy as np + +from pandas.util import testing as tm +from pandas import MultiIndex, DataFrame, Series, date_range + + +# our starting frame +def seed_df(seed_nans, n, m): + np.random.seed(1234) + days = date_range('2015-08-24', periods=10) + + frame = DataFrame({ + '1st': np.random.choice( + list('abcd'), n), + '2nd': np.random.choice(days, n), + '3rd': np.random.randint(1, m + 1, n) + }) + + if seed_nans: + frame.loc[1::11, '1st'] = np.nan + frame.loc[3::17, '2nd'] = np.nan + frame.loc[7::19, '3rd'] = np.nan + frame.loc[8::19, '3rd'] = np.nan + frame.loc[9::19, '3rd'] = np.nan + + return frame + + +# create input df, keys, and the bins +binned = [] +ids = [] +for seed_nans in [True, False]: + for n, m in product((100, 1000), (5, 20)): + + df = seed_df(seed_nans, n, m) + bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2) + keys = '1st', '2nd', ['1st', '2nd'] + for k, b in product(keys, bins): + binned.append((df, k, b, n, m)) + ids.append("{}-{}-{}".format(k, n, m)) + + +@pytest.mark.slow +@pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids) +def test_series_groupby_value_counts(df, keys, bins, n, m): + + def rebuild_index(df): + arr = list(map(df.index.get_level_values, range(df.index.nlevels))) + df.index = MultiIndex.from_arrays(arr, names=df.index.names) + return df + + for isort, normalize, sort, ascending, dropna \ + in product((False, True), repeat=5): + + kwargs = dict(normalize=normalize, sort=sort, + ascending=ascending, dropna=dropna, bins=bins) + + gr = df.groupby(keys, sort=isort) + left = gr['3rd'].value_counts(**kwargs) + + gr = df.groupby(keys, sort=isort) + right = gr['3rd'].apply(Series.value_counts, **kwargs) + right.index.names = right.index.names[:-1] + ['3rd'] + + # have to sort on index because of unstable sort on values + left, right = map(rebuild_index, (left, right)) # xref GH9212 + tm.assert_series_equal(left.sort_index(), right.sort_index()) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py new file mode 100644 index 0000000000000..8d6e074881cbb --- /dev/null +++ b/pandas/tests/groupby/test_whitelist.py @@ -0,0 +1,313 @@ +""" +test methods relating to generic function evaluation +the so-called white/black lists +""" + +import pytest +from string import ascii_lowercase +import numpy as np +from pandas import DataFrame, Series, compat, date_range, Index, MultiIndex +from pandas.util import testing as tm +from pandas.compat import lrange, product + +AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', + 'mad', 'std', 'var', 'sem'] +AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] + +df_whitelist = frozenset([ + 'last', + 'first', + 'mean', + 'sum', + 'min', + 'max', + 'head', + 'tail', + 'cumcount', + 'ngroup', + 'resample', + 'rank', + 'quantile', + 'fillna', + 'mad', + 'any', + 'all', + 'take', + 'idxmax', + 'idxmin', + 'shift', + 'tshift', + 'ffill', + 'bfill', + 'pct_change', + 'skew', + 'plot', + 'hist', + 'median', + 'dtypes', + 'corrwith', + 'corr', + 'cov', + 'diff', +]) + +s_whitelist = frozenset([ + 'last', + 'first', + 'mean', + 'sum', + 'min', + 'max', + 'head', + 'tail', + 'cumcount', + 'ngroup', + 'resample', + 'rank', + 'quantile', + 'fillna', + 'mad', + 'any', + 'all', + 'take', + 'idxmax', + 'idxmin', + 'shift', + 'tshift', + 'ffill', + 'bfill', + 'pct_change', + 'skew', + 'plot', + 'hist', + 'median', + 'dtype', + 'corr', + 'cov', + 'diff', + 'unique', + 'nlargest', + 'nsmallest', + 'is_monotonic_increasing', + 'is_monotonic_decreasing', +]) + + +@pytest.fixture +def mframe(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + return DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + +@pytest.fixture +def df_letters(): + letters = np.array(list(ascii_lowercase)) + N = 10 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), + 'letters': Series(random_letters)}) + return df + + +@pytest.mark.parametrize( + "obj, whitelist", zip((df_letters(), df_letters().floats), + (df_whitelist, s_whitelist))) +def test_groupby_whitelist(df_letters, obj, whitelist): + df = df_letters + + # these are aliases so ok to have the alias __name__ + alias = {'bfill': 'backfill', + 'ffill': 'pad', + 'boxplot': None} + + gb = obj.groupby(df.letters) + + assert whitelist == gb._apply_whitelist + for m in whitelist: + + m = alias.get(m, m) + if m is None: + continue + + f = getattr(type(gb), m) + + # name + try: + n = f.__name__ + except AttributeError: + continue + assert n == m + + # qualname + if compat.PY3: + try: + n = f.__qualname__ + except AttributeError: + continue + assert n.endswith(m) + + +@pytest.fixture +def raw_frame(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + raw_frame = DataFrame(np.random.randn(10, 3), index=index, + columns=Index(['A', 'B', 'C'], name='exp')) + raw_frame.iloc[1, [1, 2]] = np.nan + raw_frame.iloc[7, [0, 1]] = np.nan + return raw_frame + + +@pytest.mark.parametrize( + "op, level, axis, skipna, sort", + product(AGG_FUNCTIONS, + lrange(2), lrange(2), + [True, False], + [True, False])) +def test_regression_whitelist_methods( + raw_frame, op, level, + axis, skipna, sort): + # GH6944 + # GH 17537 + # explicitly test the whitelist methods + + if axis == 0: + frame = raw_frame + else: + frame = raw_frame.T + + if op in AGG_FUNCTIONS_WITH_SKIPNA: + grouped = frame.groupby(level=level, axis=axis, sort=sort) + result = getattr(grouped, op)(skipna=skipna) + expected = getattr(frame, op)(level=level, axis=axis, + skipna=skipna) + if sort: + expected = expected.sort_index(axis=axis, level=level) + tm.assert_frame_equal(result, expected) + else: + grouped = frame.groupby(level=level, axis=axis, sort=sort) + result = getattr(grouped, op)() + expected = getattr(frame, op)(level=level, axis=axis) + if sort: + expected = expected.sort_index(axis=axis, level=level) + tm.assert_frame_equal(result, expected) + + +def test_groupby_blacklist(df_letters): + df = df_letters + s = df_letters.floats + + blacklist = [ + 'eval', 'query', 'abs', 'where', + 'mask', 'align', 'groupby', 'clip', 'astype', + 'at', 'combine', 'consolidate', 'convert_objects', + ] + to_methods = [method for method in dir(df) if method.startswith('to_')] + + blacklist.extend(to_methods) + + # e.g., to_csv + defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the " + "'apply' method$)") + + # e.g., query, eval + not_defined = "(?:^{1!r} object has no attribute {0!r}$)" + fmt = defined_but_not_allowed + '|' + not_defined + for bl in blacklist: + for obj in (df, s): + gb = obj.groupby(df.letters) + msg = fmt.format(bl, type(gb).__name__) + with tm.assert_raises_regex(AttributeError, msg): + getattr(gb, bl) + + +def test_tab_completion(mframe): + grp = mframe.groupby(level='second') + results = {v for v in dir(grp) if not v.startswith('_')} + expected = { + 'A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', + 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', + 'mean', 'median', 'min', 'ngroups', 'nth', 'ohlc', 'plot', + 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', + 'nunique', 'head', 'describe', 'cummax', 'quantile', + 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', + 'cumsum', 'cumcount', 'ngroup', 'all', 'shift', 'skew', + 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', + 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', + 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding', 'pipe', + } + assert results == expected + + +def test_groupby_function_rename(mframe): + grp = mframe.groupby(level='second') + for name in ['sum', 'prod', 'min', 'max', 'first', 'last']: + f = getattr(grp, name) + assert f.__name__ == name + + +def test_groupby_selection_with_methods(df): + # some methods which require DatetimeIndex + rng = date_range('2014', periods=len(df)) + df.index = rng + + g = df.groupby(['A'])[['C']] + g_exp = df[['C']].groupby(df['A']) + # TODO check groupby with > 1 col ? + + # methods which are called as .foo() + methods = ['count', + 'corr', + 'cummax', + 'cummin', + 'cumprod', + 'describe', + 'rank', + 'quantile', + 'diff', + 'shift', + 'all', + 'any', + 'idxmin', + 'idxmax', + 'ffill', + 'bfill', + 'pct_change', + 'tshift'] + + for m in methods: + res = getattr(g, m)() + exp = getattr(g_exp, m)() + + # should always be frames! + tm.assert_frame_equal(res, exp) + + # methods which aren't just .foo() + tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) + tm.assert_frame_equal(g.dtypes, g_exp.dtypes) + tm.assert_frame_equal(g.apply(lambda x: x.sum()), + g_exp.apply(lambda x: x.sum())) + + tm.assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean()) + tm.assert_frame_equal(g.resample('D').ohlc(), + g_exp.resample('D').ohlc()) + + tm.assert_frame_equal(g.filter(lambda x: len(x) == 3), + g_exp.filter(lambda x: len(x) == 3)) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 81ad0524807f3..8f51dbabd5b71 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from pandas import compat from pandas.compat import PY3 @@ -7,9 +9,12 @@ from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, - TimedeltaIndex, PeriodIndex, notnull, isnull) -from pandas.types.common import needs_i8_conversion -from pandas.util.testing import assertRaisesRegexp + TimedeltaIndex, PeriodIndex, IntervalIndex, isna) +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas.core.dtypes.common import needs_i8_conversion +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas._libs.tslib import iNaT import pandas.util.testing as tm @@ -25,9 +30,9 @@ def setup_indices(self): for name, idx in self.indices.items(): setattr(self, name, idx) - def verify_pickle(self, index): - unpickled = self.round_trip_pickle(index) - self.assertTrue(index.equals(unpickled)) + def verify_pickle(self, indices): + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) def test_pickle_compat_construction(self): # this is testing for pickle compat @@ -35,14 +40,57 @@ def test_pickle_compat_construction(self): return # need an object to create with - self.assertRaises(TypeError, self._holder) + pytest.raises(TypeError, self._holder) + + def test_to_series(self): + # assert that we are creating a copy of the index + + idx = self.create_index() + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name + + def test_to_series_with_arguments(self): + # GH18699 + + # index kwarg + idx = self.create_index() + s = idx.to_series(index=idx) + + assert s.values is not idx.values + assert s.index is idx + assert s.name == idx.name + + # name kwarg + idx = self.create_index() + s = idx.to_series(name='__test') + + assert s.values is not idx.values + assert s.index is not idx + assert s.name != idx.name + + def test_to_frame(self): + # see gh-15230 + idx = self.create_index() + name = idx.name or 0 + + df = idx.to_frame() + + assert df.index is idx + assert len(df.columns) == 1 + assert df.columns[0] == name + assert df[name].values is not idx.values + + df = idx.to_frame(index=False) + assert df.index is not idx def test_shift(self): # GH8083 test the base class for shift idx = self.create_index() - self.assertRaises(NotImplementedError, idx.shift, 1) - self.assertRaises(NotImplementedError, idx.shift, 1, 2) + pytest.raises(NotImplementedError, idx.shift, 1) + pytest.raises(NotImplementedError, idx.shift, 1, 2) def test_create_index_existing_name(self): @@ -77,26 +125,27 @@ def test_create_index_existing_name(self): def test_numeric_compat(self): idx = self.create_index() - tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", - lambda: idx * 1) - tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", - lambda: 1 * idx) + tm.assert_raises_regex(TypeError, "cannot perform __mul__", + lambda: idx * 1) + tm.assert_raises_regex(TypeError, "cannot perform __rmul__", + lambda: 1 * idx) div_err = "cannot perform __truediv__" if PY3 \ - else "cannot perform __div__" - tm.assertRaisesRegexp(TypeError, div_err, lambda: idx / 1) - tm.assertRaisesRegexp(TypeError, div_err, lambda: 1 / idx) - tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", - lambda: idx // 1) - tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", - lambda: 1 // idx) + else "cannot perform __div__" + tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) + div_err = div_err.replace(' __', ' __r') + tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) + tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", + lambda: idx // 1) + tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", + lambda: 1 // idx) def test_logical_compat(self): idx = self.create_index() - tm.assertRaisesRegexp(TypeError, 'cannot perform all', - lambda: idx.all()) - tm.assertRaisesRegexp(TypeError, 'cannot perform any', - lambda: idx.any()) + tm.assert_raises_regex(TypeError, 'cannot perform all', + lambda: idx.all()) + tm.assert_raises_regex(TypeError, 'cannot perform any', + lambda: idx.any()) def test_boolean_context_compat(self): @@ -107,7 +156,7 @@ def f(): if idx: pass - tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) + tm.assert_raises_regex(ValueError, 'The truth value of a', f) def test_reindex_base(self): idx = self.create_index() @@ -116,18 +165,36 @@ def test_reindex_base(self): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): + with tm.assert_raises_regex(ValueError, 'Invalid fill method'): idx.get_indexer(idx, method='invalid') - def test_ndarray_compat_properties(self): + def test_get_indexer_consistency(self): + # See GH 16819 + for name, index in self.indices.items(): + if isinstance(index, IntervalIndex): + continue + + if index.is_unique or isinstance(index, CategoricalIndex): + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = index.get_indexer(index[0:2]) + indexer, _ = index.get_indexer_non_unique(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + def test_ndarray_compat_properties(self): idx = self.create_index() - self.assertTrue(idx.T.equals(idx)) - self.assertTrue(idx.transpose().equals(idx)) + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) values = idx.values for prop in self._compat_props: - self.assertEqual(getattr(idx, prop), getattr(values, prop)) + assert getattr(idx, prop) == getattr(values, prop) # test for validity idx.nbytes @@ -143,14 +210,13 @@ def test_str(self): # test the string repr idx = self.create_index() idx.name = 'foo' - self.assertTrue("'foo'" in str(idx)) - self.assertTrue(idx.__class__.__name__ in str(idx)) + assert "'foo'" in str(idx) + assert idx.__class__.__name__ in str(idx) - def test_dtype_str(self): - for idx in self.indices.values(): - dtype = idx.dtype_str - self.assertIsInstance(dtype, compat.string_types) - self.assertEqual(dtype, str(idx.dtype)) + def test_dtype_str(self, indices): + dtype = indices.dtype_str + assert isinstance(dtype, compat.string_types) + assert dtype == str(indices.dtype) def test_repr_max_seq_item_setting(self): # GH10182 @@ -158,54 +224,50 @@ def test_repr_max_seq_item_setting(self): idx = idx.repeat(50) with pd.option_context("display.max_seq_items", None): repr(idx) - self.assertFalse('...' in str(idx)) + assert '...' not in str(idx) - def test_wrong_number_names(self): + def test_wrong_number_names(self, indices): def testit(ind): ind.names = ["apple", "banana", "carrot"] + tm.assert_raises_regex(ValueError, "^Length", testit, indices) - for ind in self.indices.values(): - assertRaisesRegexp(ValueError, "^Length", testit, ind) - - def test_set_name_methods(self): + def test_set_name_methods(self, indices): new_name = "This is the new name for this index" - for ind in self.indices.values(): - # don't tests a MultiIndex here (as its tested separated) - if isinstance(ind, MultiIndex): - continue - - original_name = ind.name - new_ind = ind.set_names([new_name]) - self.assertEqual(new_ind.name, new_name) - self.assertEqual(ind.name, original_name) - res = ind.rename(new_name, inplace=True) - - # should return None - self.assertIsNone(res) - self.assertEqual(ind.name, new_name) - self.assertEqual(ind.names, [new_name]) - # with assertRaisesRegexp(TypeError, "list-like"): - # # should still fail even if it would be the right length - # ind.set_names("a") - with assertRaisesRegexp(ValueError, "Level must be None"): - ind.set_names("a", level=0) - - # rename in place just leaves tuples and other containers alone - name = ('A', 'B') - ind.rename(name, inplace=True) - self.assertEqual(ind.name, name) - self.assertEqual(ind.names, [name]) - - def test_hash_error(self): - for ind in self.indices.values(): - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(ind).__name__): - hash(ind) + # don't tests a MultiIndex here (as its tested separated) + if isinstance(indices, MultiIndex): + return + original_name = indices.name + new_ind = indices.set_names([new_name]) + assert new_ind.name == new_name + assert indices.name == original_name + res = indices.rename(new_name, inplace=True) + + # should return None + assert res is None + assert indices.name == new_name + assert indices.names == [new_name] + # with tm.assert_raises_regex(TypeError, "list-like"): + # # should still fail even if it would be the right length + # ind.set_names("a") + with tm.assert_raises_regex(ValueError, "Level must be None"): + indices.set_names("a", level=0) + + # rename in place just leaves tuples and other containers alone + name = ('A', 'B') + indices.rename(name, inplace=True) + assert indices.name == name + assert indices.names == [name] + + def test_hash_error(self, indices): + index = indices + tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__, hash, indices) def test_copy_name(self): - # Check that "name" argument passed at initialization is honoured - # GH12309 + # gh-12309: Check that the "name" argument + # passed at initialization is honored. + for name, index in compat.iteritems(self.indices): if isinstance(index, MultiIndex): continue @@ -214,18 +276,21 @@ def test_copy_name(self): second = first.__class__(first, copy=False) # Even though "copy=False", we want a new object. - self.assertIsNot(first, second) - # Not using tm.assert_index_equal() since names differ: - self.assertTrue(index.equals(first)) + assert first is not second + + # Not using tm.assert_index_equal() since names differ. + assert index.equals(first) - self.assertEqual(first.name, 'mario') - self.assertEqual(second.name, 'mario') + assert first.name == 'mario' + assert second.name == 'mario' s1 = Series(2, index=first) s2 = Series(3, index=second[:-1]) - if not isinstance(index, CategoricalIndex): # See GH13365 + + if not isinstance(index, CategoricalIndex): + # See gh-13365 s3 = s1 * s2 - self.assertEqual(s3.index.name, 'mario') + assert s3.index.name == 'mario' def test_ensure_copied_data(self): # Check the "copy" argument of each Index.__new__ is honoured @@ -246,125 +311,126 @@ def test_ensure_copied_data(self): tm.assert_numpy_array_equal(index.values, result.values, check_same='copy') - if not isinstance(index, PeriodIndex): - result = index_type(index.values, copy=False, **init_kwargs) - tm.assert_numpy_array_equal(index.values, result.values, - check_same='same') - tm.assert_numpy_array_equal(index._values, result._values, - check_same='same') - else: + if isinstance(index, PeriodIndex): # .values an object array of Period, thus copied result = index_type(ordinal=index.asi8, copy=False, **init_kwargs) - tm.assert_numpy_array_equal(index._values, result._values, + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, + check_same='same') + elif isinstance(index, IntervalIndex): + # checked in test_interval.py + pass + else: + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='same') + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, check_same='same') - def test_copy_and_deepcopy(self): + def test_copy_and_deepcopy(self, indices): from copy import copy, deepcopy - for ind in self.indices.values(): + if isinstance(indices, MultiIndex): + return + for func in (copy, deepcopy): + idx_copy = func(indices) + assert idx_copy is not indices + assert idx_copy.equals(indices) - # don't tests a MultiIndex here (as its tested separated) - if isinstance(ind, MultiIndex): - continue + new_copy = indices.copy(deep=True, name="banana") + assert new_copy.name == "banana" - for func in (copy, deepcopy): - idx_copy = func(ind) - self.assertIsNot(idx_copy, ind) - self.assertTrue(idx_copy.equals(ind)) + def test_duplicates(self, indices): + if type(indices) is not self._holder: + return + if not len(indices) or isinstance(indices, MultiIndex): + return + idx = self._holder([indices[0]] * 5) + assert not idx.is_unique + assert idx.has_duplicates + + def test_unique(self, indices): + # don't test a MultiIndex here (as its tested separated) + # don't test a CategoricalIndex because categories change (GH 18291) + if isinstance(indices, (MultiIndex, CategoricalIndex)): + return - new_copy = ind.copy(deep=True, name="banana") - self.assertEqual(new_copy.name, "banana") + # GH 17896 + expected = indices.drop_duplicates() + for level in 0, indices.name, None: + result = indices.unique(level=level) + tm.assert_index_equal(result, expected) - def test_duplicates(self): - for ind in self.indices.values(): + for level in 3, 'wrong': + pytest.raises((IndexError, KeyError), indices.unique, level=level) - if not len(ind): - continue - if isinstance(ind, MultiIndex): - continue - idx = self._holder([ind[0]] * 5) - self.assertFalse(idx.is_unique) - self.assertTrue(idx.has_duplicates) - - # GH 10115 - # preserve names - idx.name = 'foo' - result = idx.drop_duplicates() - self.assertEqual(result.name, 'foo') - self.assert_index_equal(result, Index([ind[0]], name='foo')) - - def test_get_unique_index(self): - for ind in self.indices.values(): - - # MultiIndex tested separately - if not len(ind) or isinstance(ind, MultiIndex): - continue + def test_unique_na(self): + idx = pd.Index([2, np.nan, 2, 1], name='my_index') + expected = pd.Index([2, np.nan, 1], name='my_index') + result = idx.unique() + tm.assert_index_equal(result, expected) - idx = ind[[0] * 5] - idx_unique = ind[[0]] - # We test against `idx_unique`, so first we make sure it's unique - # and doesn't contain nans. - self.assertTrue(idx_unique.is_unique) - try: - self.assertFalse(idx_unique.hasnans) - except NotImplementedError: - pass + def test_get_unique_index(self, indices): + # MultiIndex tested separately + if not len(indices) or isinstance(indices, MultiIndex): + return - for dropna in [False, True]: - result = idx._get_unique_index(dropna=dropna) - self.assert_index_equal(result, idx_unique) + idx = indices[[0] * 5] + idx_unique = indices[[0]] - # nans: + # We test against `idx_unique`, so first we make sure it's unique + # and doesn't contain nans. + assert idx_unique.is_unique + try: + assert not idx_unique.hasnans + except NotImplementedError: + pass - if not ind._can_hold_na: - continue + for dropna in [False, True]: + result = idx._get_unique_index(dropna=dropna) + tm.assert_index_equal(result, idx_unique) - if needs_i8_conversion(ind): - vals = ind.asi8[[0] * 5] - vals[0] = pd.tslib.iNaT - else: - vals = ind.values[[0] * 5] - vals[0] = np.nan - - vals_unique = vals[:2] - idx_nan = ind._shallow_copy(vals) - idx_unique_nan = ind._shallow_copy(vals_unique) - self.assertTrue(idx_unique_nan.is_unique) - - self.assertEqual(idx_nan.dtype, ind.dtype) - self.assertEqual(idx_unique_nan.dtype, ind.dtype) - - for dropna, expected in zip([False, True], - [idx_unique_nan, idx_unique]): - for i in [idx_nan, idx_unique_nan]: - result = i._get_unique_index(dropna=dropna) - self.assert_index_equal(result, expected) - - def test_sort(self): - for ind in self.indices.values(): - self.assertRaises(TypeError, ind.sort) - - def test_order(self): - for ind in self.indices.values(): - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ind.order() - - def test_mutability(self): - for ind in self.indices.values(): - if not len(ind): - continue - self.assertRaises(TypeError, ind.__setitem__, 0, ind[0]) + # nans: + if not indices._can_hold_na: + return + + if needs_i8_conversion(indices): + vals = indices.asi8[[0] * 5] + vals[0] = iNaT + else: + vals = indices.values[[0] * 5] + vals[0] = np.nan + + vals_unique = vals[:2] + idx_nan = indices._shallow_copy(vals) + idx_unique_nan = indices._shallow_copy(vals_unique) + assert idx_unique_nan.is_unique - def test_view(self): - for ind in self.indices.values(): - i_view = ind.view() - self.assertEqual(i_view.name, ind.name) + assert idx_nan.dtype == indices.dtype + assert idx_unique_nan.dtype == indices.dtype - def test_compat(self): - for ind in self.indices.values(): - self.assertEqual(ind.tolist(), list(ind)) + for dropna, expected in zip([False, True], + [idx_unique_nan, + idx_unique]): + for i in [idx_nan, idx_unique_nan]: + result = i._get_unique_index(dropna=dropna) + tm.assert_index_equal(result, expected) + + def test_sort(self, indices): + pytest.raises(TypeError, indices.sort) + + def test_mutability(self, indices): + if not len(indices): + return + pytest.raises(TypeError, indices.__setitem__, 0, indices[0]) + + def test_view(self, indices): + assert indices.view().name == indices.name + + def test_compat(self, indices): + assert indices.tolist() == list(indices) def test_memory_usage(self): for name, index in compat.iteritems(self.indices): @@ -374,17 +440,18 @@ def test_memory_usage(self): result2 = index.memory_usage() result3 = index.memory_usage(deep=True) - # RangeIndex doesn't use a hashtable engine - if not isinstance(index, RangeIndex): - self.assertTrue(result2 > result) + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(index, (RangeIndex, IntervalIndex)): + assert result2 > result if index.inferred_type == 'object': - self.assertTrue(result3 > result2) + assert result3 > result2 else: # we report 0 for no-length - self.assertEqual(result, 0) + assert result == 0 def test_argsort(self): for k, ind in self.indices.items(): @@ -407,27 +474,27 @@ def test_numpy_argsort(self): # pandas compatibility input validation - the # rest already perform separate (or no) such # validation via their 'values' attribute as - # defined in pandas/indexes/base.py - they + # defined in pandas.core.indexes/base.py - they # cannot be changed at the moment due to # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, - np.argsort, ind, axis=1) + tm.assert_raises_regex(ValueError, msg, + np.argsort, ind, axis=1) msg = "the 'kind' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - ind, kind='mergesort') + tm.assert_raises_regex(ValueError, msg, np.argsort, + ind, kind='mergesort') msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - ind, order=('a', 'b')) + tm.assert_raises_regex(ValueError, msg, np.argsort, + ind, order=('a', 'b')) - def test_pickle(self): - for ind in self.indices.values(): - self.verify_pickle(ind) - ind.name = 'foo' - self.verify_pickle(ind) + def test_pickle(self, indices): + self.verify_pickle(indices) + original_name, indices.name = indices.name, 'foo' + self.verify_pickle(indices) + indices.name = original_name def test_take(self): indexer = [4, 3, 0, 2] @@ -439,12 +506,12 @@ def test_take(self): result = ind.take(indexer) expected = ind[indexer] - self.assertTrue(result.equals(expected)) + assert result.equals(expected) if not isinstance(ind, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): # GH 10791 - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): ind.freq def test_take_invalid_kwargs(self): @@ -452,16 +519,16 @@ def test_take_invalid_kwargs(self): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') def test_repeat(self): rep = 2 @@ -481,20 +548,21 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(i, rep), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, - i, rep, axis=0) + tm.assert_raises_regex(ValueError, msg, np.repeat, + i, rep, axis=0) - def test_where(self): + @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) + def test_where(self, klass): i = self.create_index() - result = i.where(notnull(i)) + + cond = [True] * len(i) + result = i.where(klass(cond)) expected = i tm.assert_index_equal(result, expected) - _nan = i._na_value cond = [False] + [True] * len(i[1:]) - expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype) - - result = i.where(cond) + expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype) + result = i.where(klass(cond)) tm.assert_index_equal(result, expected) def test_setops_errorcases(self): @@ -506,9 +574,10 @@ def test_setops_errorcases(self): for method in methods: for case in cases: - assertRaisesRegexp(TypeError, - "Input must be Index or array-like", - method, case) + tm.assert_raises_regex(TypeError, + "Input must be Index " + "or array-like", + method, case) def test_intersection_base(self): for name, idx in compat.iteritems(self.indices): @@ -519,7 +588,7 @@ def test_intersection_base(self): if isinstance(idx, CategoricalIndex): pass else: - self.assertTrue(tm.equalContents(intersect, second)) + assert tm.equalContents(intersect, second) # GH 10149 cases = [klass(second.values) @@ -527,17 +596,17 @@ def test_intersection_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result = first.intersection(case) elif isinstance(idx, CategoricalIndex): pass else: result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) + assert tm.equalContents(result, second) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): result = first.intersection([1, 2, 3]) def test_union_base(self): @@ -546,7 +615,7 @@ def test_union_base(self): second = idx[:5] everything = idx union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) + assert tm.equalContents(union, everything) # GH 10149 cases = [klass(second.values) @@ -554,17 +623,17 @@ def test_union_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result = first.union(case) elif isinstance(idx, CategoricalIndex): pass else: result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) + assert tm.equalContents(result, everything) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): result = first.union([1, 2, 3]) def test_difference_base(self): @@ -577,7 +646,7 @@ def test_difference_base(self): if isinstance(idx, CategoricalIndex): pass else: - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) # GH 10149 cases = [klass(second.values) @@ -585,20 +654,21 @@ def test_difference_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result = first.difference(case) elif isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): - self.assertEqual(result.__class__, answer.__class__) - tm.assert_numpy_array_equal(result.asi8, answer.asi8) + assert result.__class__ == answer.__class__ + tm.assert_numpy_array_equal(result.sort_values().asi8, + answer.sort_values().asi8) else: result = first.difference(case) - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): result = first.difference([1, 2, 3]) def test_symmetric_difference(self): @@ -610,7 +680,7 @@ def test_symmetric_difference(self): else: answer = idx[[0, -1]] result = first.symmetric_difference(second) - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) # GH 10149 cases = [klass(second.values) @@ -618,22 +688,18 @@ def test_symmetric_difference(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): result = first.symmetric_difference(case) elif isinstance(idx, CategoricalIndex): pass else: result = first.symmetric_difference(case) - self.assertTrue(tm.equalContents(result, answer)) + assert tm.equalContents(result, answer) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): - result = first.symmetric_difference([1, 2, 3]) - - # 12591 deprecated - with tm.assert_produces_warning(FutureWarning): - first.sym_diff(second) + with tm.assert_raises_regex(TypeError, msg): + first.symmetric_difference([1, 2, 3]) def test_insert_base(self): @@ -644,7 +710,7 @@ def test_insert_base(self): continue # test 0th element - self.assertTrue(idx[0:4].equals(result.insert(0, idx[0]))) + assert idx[0:4].equals(result.insert(0, idx[0])) def test_delete_base(self): @@ -659,37 +725,37 @@ def test_delete_base(self): expected = idx[1:] result = idx.delete(0) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) + assert result.equals(expected) + assert result.name == expected.name expected = idx[:-1] result = idx.delete(-1) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) + assert result.equals(expected) + assert result.name == expected.name - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(len(idx)) def test_equals(self): for name, idx in compat.iteritems(self.indices): - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.astype(object))) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(np.array(idx))) + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) # Cannot pass in non-int64 dtype to RangeIndex if not isinstance(idx, RangeIndex): same_values = Index(idx, dtype=object) - self.assertTrue(idx.equals(same_values)) - self.assertTrue(same_values.equals(idx)) + assert idx.equals(same_values) + assert same_values.equals(idx) if idx.nlevels == 1: # do not test MultiIndex - self.assertFalse(idx.equals(pd.Series(idx))) + assert not idx.equals(pd.Series(idx)) def test_equals_op(self): # GH9947, GH10637 @@ -701,7 +767,7 @@ def test_equals_op(self): index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -713,7 +779,7 @@ def test_equals_op(self): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -723,22 +789,23 @@ def test_equals_op(self): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == series_b + tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == index_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == series_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): series_a == series_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding @@ -765,10 +832,10 @@ def test_numpy_ufuncs(self): np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, np.rad2deg]: - if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): + if isinstance(idx, DatetimeIndexOpsMixin): # raise TypeError or ValueError (PeriodIndex) # PeriodIndex behavior should be changed in future version - with tm.assertRaises(Exception): + with pytest.raises(Exception): with np.errstate(all='ignore'): func(idx) elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): @@ -776,33 +843,33 @@ def test_numpy_ufuncs(self): with np.errstate(all='ignore'): result = func(idx) exp = Index(func(idx.values), name=idx.name) - self.assert_index_equal(result, exp) - self.assertIsInstance(result, pd.Float64Index) + + tm.assert_index_equal(result, exp) + assert isinstance(result, pd.Float64Index) else: # raise AttributeError or TypeError if len(idx) == 0: continue else: - with tm.assertRaises(Exception): + with pytest.raises(Exception): with np.errstate(all='ignore'): func(idx) for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: - if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): + if isinstance(idx, DatetimeIndexOpsMixin): # raise TypeError or ValueError (PeriodIndex) - with tm.assertRaises(Exception): + with pytest.raises(Exception): func(idx) elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): - # results in bool array + # Results in bool array result = func(idx) - exp = func(idx.values) - self.assertIsInstance(result, np.ndarray) - tm.assertNotIsInstance(result, Index) + assert isinstance(result, np.ndarray) + assert not isinstance(result, Index) else: if len(idx) == 0: continue else: - with tm.assertRaises(Exception): + with pytest.raises(Exception): func(idx) def test_hasnans_isnans(self): @@ -815,16 +882,16 @@ def test_hasnans_isnans(self): # cases in indices doesn't include NaN expected = np.array([False] * len(idx), dtype=bool) - self.assert_numpy_array_equal(idx._isnan, expected) - self.assertFalse(idx.hasnans) + tm.assert_numpy_array_equal(idx._isnan, expected) + assert not idx.hasnans idx = index.copy() values = idx.values if len(index) == 0: continue - elif isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): - values[1] = pd.tslib.iNaT + elif isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT elif isinstance(index, (Int64Index, UInt64Index)): continue else: @@ -837,8 +904,8 @@ def test_hasnans_isnans(self): expected = np.array([False] * len(idx), dtype=bool) expected[1] = True - self.assert_numpy_array_equal(idx._isnan, expected) - self.assertTrue(idx.hasnans) + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans def test_fillna(self): # GH 11343 @@ -847,24 +914,24 @@ def test_fillna(self): pass elif isinstance(index, MultiIndex): idx = index.copy() - msg = "isnull is not defined for MultiIndex" - with self.assertRaisesRegexp(NotImplementedError, msg): + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): idx.fillna(idx[0]) else: idx = index.copy() result = idx.fillna(idx[0]) - self.assert_index_equal(result, idx) - self.assertFalse(result is idx) + tm.assert_index_equal(result, idx) + assert result is not idx msg = "'value' must be a scalar, passed: " - with self.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): idx.fillna([idx[0]]) idx = index.copy() values = idx.values - if isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): - values[1] = pd.tslib.iNaT + if isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT elif isinstance(index, (Int64Index, UInt64Index)): continue else: @@ -877,30 +944,168 @@ def test_fillna(self): expected = np.array([False] * len(idx), dtype=bool) expected[1] = True - self.assert_numpy_array_equal(idx._isnan, expected) - self.assertTrue(idx.hasnans) + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans def test_nulls(self): # this is really a smoke test for the methods - # as these are adequantely tested for function elsewhere + # as these are adequately tested for function elsewhere for name, index in self.indices.items(): if len(index) == 0: - self.assert_numpy_array_equal( - index.isnull(), np.array([], dtype=bool)) + tm.assert_numpy_array_equal( + index.isna(), np.array([], dtype=bool)) elif isinstance(index, MultiIndex): idx = index.copy() - msg = "isnull is not defined for MultiIndex" - with self.assertRaisesRegexp(NotImplementedError, msg): - idx.isnull() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.isna() else: if not index.hasnans: - self.assert_numpy_array_equal( - index.isnull(), np.zeros(len(index), dtype=bool)) - self.assert_numpy_array_equal( - index.notnull(), np.ones(len(index), dtype=bool)) + tm.assert_numpy_array_equal( + index.isna(), np.zeros(len(index), dtype=bool)) + tm.assert_numpy_array_equal( + index.notna(), np.ones(len(index), dtype=bool)) else: - result = isnull(index) - self.assert_numpy_array_equal(index.isnull(), result) - self.assert_numpy_array_equal(index.notnull(), ~result) + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) + + def test_empty(self): + # GH 15270 + index = self.create_index() + assert not index.empty + assert index[:0].empty + + @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) + def test_join_self_unique(self, how): + index = self.create_index() + if index.is_unique: + joined = index.join(index, how=how) + assert (index == joined).all() + + def test_searchsorted_monotonic(self, indices): + # GH17271 + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(indices, (MultiIndex, IntervalIndex)): + return + + # nothing to test if the index is empty + if indices.empty: + return + value = indices[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (indices == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(indices) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if indices.is_monotonic_increasing: + ssm_left = indices._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + ss_left = indices.searchsorted(value, side='left') + assert expected_left == ss_left + + ss_right = indices.searchsorted(value, side='right') + assert expected_right == ss_right + + elif indices.is_monotonic_decreasing: + ssm_left = indices._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + indices._searchsorted_monotonic(value, side='left') + + def test_map(self): + # callable + index = self.create_index() + + # we don't infer UInt64 + if isinstance(index, pd.UInt64Index): + expected = index.astype('int64') + else: + expected = index + + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) + def test_map_dictlike(self, mapper): + + index = self.create_index() + if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(index))) + + identity = mapper(index.values, index) + + # we don't infer to UInt64 for a dict + if isinstance(index, pd.UInt64Index) and isinstance(identity, dict): + expected = index.astype('int64') + else: + expected = index + + result = index.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + def test_putmask_with_wrong_mask(self): + # GH18368 + index = self.create_index() + + with pytest.raises(ValueError): + index.putmask(np.ones(len(index) + 1, np.bool), 1) + + with pytest.raises(ValueError): + index.putmask(np.ones(len(index) - 1, np.bool), 1) + + with pytest.raises(ValueError): + index.putmask('foo', 1) + + @pytest.mark.parametrize('copy', [True, False]) + @pytest.mark.parametrize('name', [None, 'foo']) + @pytest.mark.parametrize('ordered', [True, False]) + def test_astype_category(self, copy, name, ordered): + # GH 18630 + index = self.create_index() + if name: + index = index.rename(name) + + # standard categories + dtype = CategoricalDtype(ordered=ordered) + result = index.astype(dtype, copy=copy) + expected = CategoricalIndex(index.values, name=name, ordered=ordered) + tm.assert_index_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered) + result = index.astype(dtype, copy=copy) + expected = CategoricalIndex(index.values, name=name, dtype=dtype) + tm.assert_index_equal(result, expected) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + result = index.astype('category', copy=copy) + expected = CategoricalIndex(index.values, name=name) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py new file mode 100644 index 0000000000000..6d88ef0cfa6c5 --- /dev/null +++ b/pandas/tests/indexes/conftest.py @@ -0,0 +1,47 @@ +import pytest +import numpy as np +import pandas as pd + +import pandas.util.testing as tm +from pandas.core.indexes.api import Index, MultiIndex +from pandas.compat import lzip, long + + +@pytest.fixture(params=[tm.makeUnicodeIndex(100), + tm.makeStringIndex(100), + tm.makeDateIndex(100), + tm.makePeriodIndex(100), + tm.makeTimedeltaIndex(100), + tm.makeIntIndex(100), + tm.makeUIntIndex(100), + tm.makeFloatIndex(100), + Index([True, False]), + tm.makeCategoricalIndex(100), + Index([]), + MultiIndex.from_tuples(lzip( + ['foo', 'bar', 'baz'], [1, 2, 3])), + Index([0, 0, 1, 1, 2, 2])], + ids=lambda x: type(x).__name__) +def indices(request): + return request.param + + +@pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) +def one(request): + # zero-dim integer array behaves like an integer + return request.param + + +zeros = [box([0] * 5, dtype=dtype) + for box in [pd.Index, np.array] + for dtype in [np.int64, np.uint64, np.float64]] +zeros.extend([np.array(0, dtype=dtype) + for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([0, 0.0, long(0)]) + + +@pytest.fixture(params=zeros) +def zero(request): + # For testing division by (or of) zero for Index with length 5, this + # gives several scalar-zeros and length-5 vector-zeros + return request.param diff --git a/pandas/tests/indexes/data/s1-0.12.0.pickle b/pandas/tests/indexes/data/s1-0.12.0.pickle deleted file mode 100644 index 0ce9cfdf3aa94fdfd9f8ad6ea00e72fa7eda6552..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 862 zcmZo*O3o|IEvVE>&M!*U%Pq|*$xJLNO049HFG@|$&nqq|DorloDr8J9NX$z~EQTm6 zPA$qzE#?Zz%uNl3FbkQy8CpXbliGs{nKir_z2#aV#&V^UR2HOi6|#gfrCE40cryYO zuxfZShcepu`T2SM2LdqR%}|om8T!FSUEoT>tcUxr)s_8Ijs3LWI_;WmV#&?@vK#N0 zoL>KQf8a;{X^E~M_s?K#x%;8zsr`i_ZuK`-fA@cJS*24K`*Oe8y*l?PA3p8(`ei62 zbnS%w5$TfaS1jJ`*NZZ%)|&BS|JvJ~j)$H<-hVEx)^DEoqx~v>jx4H;Il6z+xzj(_ zHvYH2f1A@KgW>9a)gQa(NFF=8|Ec5GEr(Bh*?<3;k-CNYJNvscUe7tHch>&PvcCaG zj~}sj+&X>D?sxa?tt`$aH9h;b|8i~p|IC=r_VS!ZE_W{fxSv5`anj?~XZ!izWbc?U z_u77j@IL~N8t(6RE7W>y+IiN#Bm*9>C6GYX%gjqjt>AJ=EJ-Z^2CpZWSI7nrU3N(5 zW&i`!7Z#u#8s03J0a~jO%9K|Oj0+$vX#)o@1H;3TA1{@885j)MdM#9dbp3S2b`X6| zW7|~_ExXH0MHNVM<#7n80qMy9bK*esiV22mLG*XUW4}SP+vX@Qbs%k=E@7qtq?>-M zX#vp}I~^~BXa(tOikd(=e@#{ah>l6nh|vPlt3U2p1)_g71^xulo+8iOwSn}ErB%~F zbZewVlMax+^6Jb55UpI3D6b2oi+I1~g6Mh<-JF)+2r15n#=YH_g(Fr^oAYBm;f)fRGVgmUKP z7v(1AWLBjX@^pr>K~xv=CZ|B7g9{WVDXBRniCl$zz~l^szy!@7%9354Qd}rd8_JmH R%~+b`*WQ-o*VdM#2LML=S0Vra diff --git a/pandas/tests/indexes/data/s2-0.12.0.pickle b/pandas/tests/indexes/data/s2-0.12.0.pickle deleted file mode 100644 index 2318be2d9978bd2edefc4fe1afb2244e0f4c4601..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 814 zcmZY7Z%7ki9KiA0G;?Q5kzN!cDrru$iA6FbI2KcLS0k+>lx5wvGnekR`!h;wro&Cv zs|e9i=8K{d(<1sqr0`HpGi+k`r@(CBaFL0)GE-C5b1r?;J#e2p`0>2>@~ao?X{*Un zBiEZ*N^Y`N^G1?o$r|(wOX9fc?+Ad{*{T=rTjKyZwHh~7?*j!)rvISJi}9740r_w|xsf(d z7f}_-Q#{OAwEo9LZC2bGu2>1f9oq;OEyE-K4`-7RVw!`^;o+U@84+Yi_IxZ=iXe$E z)v0Mvl#Y(u11$};l?!%U%jp*UoznkDu;59VIvsl8+|FDX)V|c!DEPsp91%=T7*EN7 zn!Cn+Dzz!U~i}3ioJMQuxLl1a4*z-x3)#lQL3nVSi_Bq=86iA;yLuNl{3;5 z$Eo1vGLno22DLFUGD1R|Srb`ptfiQ3E+Q&C%}Dizf7wx?y@9IcNtD?R*ApJNps_?` Y)dd5`#MuZDjf<>0O_NinaXNMKziLWDZU6uP diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 964511a2e9d5b..7d01a2a70145d 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -1,5 +1,7 @@ """ generic datetimelike tests """ - +import pytest +import numpy as np +import pandas as pd from .common import Base import pandas.util.testing as tm @@ -9,25 +11,25 @@ class DatetimeLike(Base): def test_shift_identity(self): idx = self.create_index() - self.assert_index_equal(idx, idx.shift(0)) + tm.assert_index_equal(idx, idx.shift(0)) def test_str(self): # test the string repr idx = self.create_index() idx.name = 'foo' - self.assertFalse("length=%s" % len(idx) in str(idx)) - self.assertTrue("'foo'" in str(idx)) - self.assertTrue(idx.__class__.__name__ in str(idx)) + assert not "length=%s" % len(idx) in str(idx) + assert "'foo'" in str(idx) + assert idx.__class__.__name__ in str(idx) if hasattr(idx, 'tz'): if idx.tz is not None: - self.assertTrue(idx.tz in str(idx)) + assert idx.tz in str(idx) if hasattr(idx, 'freq'): - self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) + assert "freq='%s'" % idx.freqstr in str(idx) - def test_view(self): - super(DatetimeLike, self).test_view() + def test_view(self, indices): + super(DatetimeLike, self).test_view(indices) i = self.create_index() @@ -38,3 +40,46 @@ def test_view(self): i_view = i.view(self._holder) result = self._holder(i) tm.assert_index_equal(result, i_view) + + def test_map_callable(self): + + expected = self.index + 1 + result = self.index.map(lambda x: x + 1) + tm.assert_index_equal(result, expected) + + # map to NaT + result = self.index.map(lambda x: pd.NaT if x == self.index[0] else x) + expected = pd.Index([pd.NaT] + self.index[1:].tolist()) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) + def test_map_dictlike(self, mapper): + expected = self.index + 1 + + # don't compare the freqs + if isinstance(expected, pd.DatetimeIndex): + expected.freq = None + + result = self.index.map(mapper(expected, self.index)) + tm.assert_index_equal(result, expected) + + expected = pd.Index([pd.NaT] + self.index[1:].tolist()) + result = self.index.map(mapper(expected, self.index)) + tm.assert_index_equal(result, expected) + + # empty map; these map to np.nan because we cannot know + # to re-infer things + expected = pd.Index([np.nan] * len(self.index)) + result = self.index.map(mapper([], [])) + tm.assert_index_equal(result, expected) + + def test_asobject_deprecated(self): + # GH18572 + d = self.create_index() + with tm.assert_produces_warning(FutureWarning): + i = d.asobject + assert isinstance(i, pd.Index) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py new file mode 100644 index 0000000000000..8f259a7e78897 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -0,0 +1,1052 @@ +# -*- coding: utf-8 -*- +import warnings +from datetime import datetime, timedelta +import operator + +import pytest + +import numpy as np + +import pandas as pd +from pandas.compat.numpy import np_datetime64_compat +import pandas.util.testing as tm +from pandas.errors import PerformanceWarning, NullFrequencyError +from pandas import (Timestamp, Timedelta, Series, + DatetimeIndex, TimedeltaIndex, + date_range) +from pandas.core import ops +from pandas._libs import tslib +from pandas._libs.tslibs.offsets import shift_months + + +@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', + 'US/Eastern', 'dateutil/Asia/Singapore', + 'dateutil/US/Pacific']) +def tz(request): + return request.param + + +@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)], + ids=str) +def delta(request): + # Several ways of representing two hours + return request.param + + +@pytest.fixture( + params=[ + datetime(2011, 1, 1), + DatetimeIndex(['2011-01-01', '2011-01-02']), + DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize('US/Eastern'), + np.datetime64('2011-01-01'), + Timestamp('2011-01-01')], + ids=lambda x: type(x).__name__) +def addend(request): + return request.param + + +class TestDatetimeIndexComparisons(object): + @pytest.mark.parametrize('other', [datetime(2016, 1, 1), + Timestamp('2016-01-01'), + np.datetime64('2016-01-01')]) + def test_dti_cmp_datetimelike(self, other, tz): + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + if tz is not None: + if isinstance(other, np.datetime64): + # no tzaware version available + return + elif isinstance(other, Timestamp): + other = other.tz_localize(dti.tzinfo) + else: + other = tslib._localize_pydatetime(other, dti.tzinfo) + + result = dti == other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti > other + expected = np.array([False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti >= other + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti < other + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti <= other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + def dti_cmp_non_datetime(self, tz): + # GH#19301 by convention datetime.date is not considered comparable + # to Timestamp or DatetimeIndex. This may change in the future. + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + + other = datetime(2016, 1, 1).date() + assert not (dti == other).any() + assert (dti != other).all() + with pytest.raises(TypeError): + dti < other + with pytest.raises(TypeError): + dti <= other + with pytest.raises(TypeError): + dti > other + with pytest.raises(TypeError): + dti >= other + + @pytest.mark.parametrize('other', [None, np.nan, pd.NaT]) + def test_dti_eq_null_scalar(self, other, tz): + # GH#19301 + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + assert not (dti == other).any() + + @pytest.mark.parametrize('other', [None, np.nan, pd.NaT]) + def test_dti_ne_null_scalar(self, other, tz): + # GH#19301 + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + assert (dti != other).all() + + @pytest.mark.parametrize('other', [None, np.nan]) + def test_dti_cmp_null_scalar_inequality(self, tz, other): + # GH#19301 + dti = pd.date_range('2016-01-01', periods=2, tz=tz) + + with pytest.raises(TypeError): + dti < other + with pytest.raises(TypeError): + dti <= other + with pytest.raises(TypeError): + dti > other + with pytest.raises(TypeError): + dti >= other + + def test_dti_cmp_nat(self): + left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, + pd.Timestamp('2011-01-03')]) + right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) + + for lhs, rhs in [(left, right), + (left.astype(object), right.astype(object))]: + result = rhs == lhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = lhs != rhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > lhs, expected) + + def test_dti_cmp_nat_behaves_like_float_cmp_nan(self): + fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) + fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) + + didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, + '2014-05-01', '2014-07-01']) + didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT, + '2014-06-01', '2014-07-01']) + darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'), + np_datetime64_compat('2014-03-01 00:00Z'), + np_datetime64_compat('nat'), np.datetime64('nat'), + np_datetime64_compat('2014-06-01 00:00Z'), + np_datetime64_compat('2014-07-01 00:00Z')]) + + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: + result = idx1 < val + expected = np.array([False, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, True, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: + result = idx1 < val + expected = np.array([True, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + expected = np.array([False, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + expected = np.array([True, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + expected = np.array([False, False, True, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + expected = np.array([False, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, False, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_comparison_tzawareness_compat(self, op): + # GH#18162 + dr = pd.date_range('2016-01-01', periods=6) + dz = dr.tz_localize('US/Pacific') + + with pytest.raises(TypeError): + op(dr, dz) + with pytest.raises(TypeError): + op(dr, list(dz)) + with pytest.raises(TypeError): + op(dz, dr) + with pytest.raises(TypeError): + op(dz, list(dr)) + + # Check that there isn't a problem aware-aware and naive-naive do not + # raise + assert (dr == dr).all() + assert (dr == list(dr)).all() + assert (dz == dz).all() + assert (dz == list(dz)).all() + + # Check comparisons against scalar Timestamps + ts = pd.Timestamp('2000-03-14 01:59') + ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam') + + assert (dr > ts).all() + with pytest.raises(TypeError): + op(dr, ts_tz) + + assert (dz > ts_tz).all() + with pytest.raises(TypeError): + op(dz, ts) + + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_nat_comparison_tzawareness(self, op): + # GH#19276 + # tzaware DatetimeIndex should not raise when compared to NaT + dti = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, + '2014-05-01', '2014-07-01']) + expected = np.array([op == operator.ne] * len(dti)) + result = op(dti, pd.NaT) + tm.assert_numpy_array_equal(result, expected) + + result = op(dti.tz_localize('US/Pacific'), pd.NaT) + tm.assert_numpy_array_equal(result, expected) + + def test_dti_cmp_int_raises(self): + rng = date_range('1/1/2000', periods=10) + + # raise TypeError for now + with pytest.raises(TypeError): + rng < rng[3].value + + def test_dti_cmp_list(self): + rng = date_range('1/1/2000', periods=10) + + result = rng == list(rng) + expected = rng == rng + tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeIndexArithmetic(object): + + # ------------------------------------------------------------- + # Invalid Operations + + @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])]) + @pytest.mark.parametrize('op', [operator.add, ops.radd, + operator.sub, ops.rsub]) + def test_dti_add_sub_float(self, op, other): + dti = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + with pytest.raises(TypeError): + op(dti, other) + + def test_dti_add_timestamp_raises(self): + idx = DatetimeIndex(['2011-01-01', '2011-01-02']) + msg = "cannot add DatetimeIndex and Timestamp" + with tm.assert_raises_regex(TypeError, msg): + idx + Timestamp('2011-01-01') + + def test_dti_radd_timestamp_raises(self): + idx = DatetimeIndex(['2011-01-01', '2011-01-02']) + msg = "cannot add DatetimeIndex and Timestamp" + with tm.assert_raises_regex(TypeError, msg): + Timestamp('2011-01-01') + idx + + # ------------------------------------------------------------- + # Binary operations DatetimeIndex and int + + def test_dti_add_int(self, tz, one): + # Variants of `one` for #19012 + rng = pd.date_range('2000-01-01 09:00', freq='H', + periods=10, tz=tz) + result = rng + one + expected = pd.date_range('2000-01-01 10:00', freq='H', + periods=10, tz=tz) + tm.assert_index_equal(result, expected) + + def test_dti_iadd_int(self, tz, one): + rng = pd.date_range('2000-01-01 09:00', freq='H', + periods=10, tz=tz) + expected = pd.date_range('2000-01-01 10:00', freq='H', + periods=10, tz=tz) + rng += one + tm.assert_index_equal(rng, expected) + + def test_dti_sub_int(self, tz, one): + rng = pd.date_range('2000-01-01 09:00', freq='H', + periods=10, tz=tz) + result = rng - one + expected = pd.date_range('2000-01-01 08:00', freq='H', + periods=10, tz=tz) + tm.assert_index_equal(result, expected) + + def test_dti_isub_int(self, tz, one): + rng = pd.date_range('2000-01-01 09:00', freq='H', + periods=10, tz=tz) + expected = pd.date_range('2000-01-01 08:00', freq='H', + periods=10, tz=tz) + rng -= one + tm.assert_index_equal(rng, expected) + + # ------------------------------------------------------------- + # DatetimeIndex.shift is used in integer addition + + def test_dti_shift_tzaware(self, tz): + # GH#9903 + idx = pd.DatetimeIndex([], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' + '2011-01-01 12:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' + '2011-01-01 15:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' + '2011-01-01 09:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + def test_dti_shift_freqs(self): + # test shift for DatetimeIndex and non DatetimeIndex + # GH#8083 + drange = pd.date_range('20130101', periods=5) + result = drange.shift(1) + expected = pd.DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', + '2013-01-06'], freq='D') + tm.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = pd.DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', + '2013-01-03', '2013-01-04'], + freq='D') + tm.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D') + expected = pd.DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', + '2013-01-10', + '2013-01-11'], freq='D') + tm.assert_index_equal(result, expected) + + def test_dti_shift_int(self): + rng = date_range('1/1/2000', periods=20) + + result = rng + 5 + expected = rng.shift(5) + tm.assert_index_equal(result, expected) + + result = rng - 5 + expected = rng.shift(-5) + tm.assert_index_equal(result, expected) + + def test_dti_shift_no_freq(self): + # GH#19147 + dti = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01'], freq=None) + with pytest.raises(NullFrequencyError): + dti.shift(2) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_shift_localized(self, tzstr): + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + dr_tz = dr.tz_localize(tzstr) + + result = dr_tz.shift(1, '10T') + assert result.tz == dr_tz.tz + + # ------------------------------------------------------------- + # Binary operations DatetimeIndex and timedelta-like + + def test_dti_add_timedeltalike(self, tz, delta): + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + result = rng + delta + expected = pd.date_range('2000-01-01 02:00', + '2000-02-01 02:00', tz=tz) + tm.assert_index_equal(result, expected) + + def test_dti_iadd_timedeltalike(self, tz, delta): + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + expected = pd.date_range('2000-01-01 02:00', + '2000-02-01 02:00', tz=tz) + rng += delta + tm.assert_index_equal(rng, expected) + + def test_dti_sub_timedeltalike(self, tz, delta): + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + expected = pd.date_range('1999-12-31 22:00', + '2000-01-31 22:00', tz=tz) + result = rng - delta + tm.assert_index_equal(result, expected) + + def test_dti_isub_timedeltalike(self, tz, delta): + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + expected = pd.date_range('1999-12-31 22:00', + '2000-01-31 22:00', tz=tz) + rng -= delta + tm.assert_index_equal(rng, expected) + + # ------------------------------------------------------------- + # Binary operations DatetimeIndex and TimedeltaIndex/array + def test_dti_add_tdi(self, tz): + # GH 17558 + dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) + tdi = pd.timedelta_range('0 days', periods=10) + expected = pd.date_range('2017-01-01', periods=10, tz=tz) + + # add with TimdeltaIndex + result = dti + tdi + tm.assert_index_equal(result, expected) + + result = tdi + dti + tm.assert_index_equal(result, expected) + + # add with timedelta64 array + result = dti + tdi.values + tm.assert_index_equal(result, expected) + + result = tdi.values + dti + tm.assert_index_equal(result, expected) + + def test_dti_iadd_tdi(self, tz): + # GH 17558 + dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) + tdi = pd.timedelta_range('0 days', periods=10) + expected = pd.date_range('2017-01-01', periods=10, tz=tz) + + # iadd with TimdeltaIndex + result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) + result += tdi + tm.assert_index_equal(result, expected) + + result = pd.timedelta_range('0 days', periods=10) + result += dti + tm.assert_index_equal(result, expected) + + # iadd with timedelta64 array + result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) + result += tdi.values + tm.assert_index_equal(result, expected) + + result = pd.timedelta_range('0 days', periods=10) + result += dti + tm.assert_index_equal(result, expected) + + def test_dti_sub_tdi(self, tz): + # GH 17558 + dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) + tdi = pd.timedelta_range('0 days', periods=10) + expected = pd.date_range('2017-01-01', periods=10, tz=tz, freq='-1D') + + # sub with TimedeltaIndex + result = dti - tdi + tm.assert_index_equal(result, expected) + + msg = 'cannot subtract .*TimedeltaIndex' + with tm.assert_raises_regex(TypeError, msg): + tdi - dti + + # sub with timedelta64 array + result = dti - tdi.values + tm.assert_index_equal(result, expected) + + msg = 'cannot perform __neg__ with this index type:' + with tm.assert_raises_regex(TypeError, msg): + tdi.values - dti + + def test_dti_isub_tdi(self, tz): + # GH 17558 + dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) + tdi = pd.timedelta_range('0 days', periods=10) + expected = pd.date_range('2017-01-01', periods=10, tz=tz, freq='-1D') + + # isub with TimedeltaIndex + result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) + result -= tdi + tm.assert_index_equal(result, expected) + + msg = 'cannot subtract .*TimedeltaIndex' + with tm.assert_raises_regex(TypeError, msg): + tdi -= dti + + # isub with timedelta64 array + result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10) + result -= tdi.values + tm.assert_index_equal(result, expected) + + msg = '|'.join(['cannot perform __neg__ with this index type:', + 'ufunc subtract cannot use operands with types']) + with tm.assert_raises_regex(TypeError, msg): + tdi.values -= dti + + # ------------------------------------------------------------- + # Binary Operations DatetimeIndex and datetime-like + # TODO: A couple other tests belong in this section. Move them in + # A PR where there isn't already a giant diff. + + def test_add_datetimelike_and_dti(self, addend): + # GH#9631 + dti = DatetimeIndex(['2011-01-01', '2011-01-02']) + msg = 'cannot add DatetimeIndex and {0}'.format( + type(addend).__name__) + with tm.assert_raises_regex(TypeError, msg): + dti + addend + with tm.assert_raises_regex(TypeError, msg): + addend + dti + + def test_add_datetimelike_and_dti_tz(self, addend): + # GH#9631 + dti_tz = DatetimeIndex(['2011-01-01', + '2011-01-02']).tz_localize('US/Eastern') + msg = 'cannot add DatetimeIndex and {0}'.format( + type(addend).__name__) + with tm.assert_raises_regex(TypeError, msg): + dti_tz + addend + with tm.assert_raises_regex(TypeError, msg): + addend + dti_tz + + # ------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_dti_add_dt64_array_raises(self, tz): + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + dtarr = dti.values + + with pytest.raises(TypeError): + dti + dtarr + with pytest.raises(TypeError): + dtarr + dti + + def test_dti_sub_dt64_array_naive(self): + dti = pd.date_range('2016-01-01', periods=3, tz=None) + dtarr = dti.values + + expected = dti - dti + result = dti - dtarr + tm.assert_index_equal(result, expected) + result = dtarr - dti + tm.assert_index_equal(result, expected) + + def test_dti_sub_dt64_array_aware_raises(self, tz): + if tz is None: + return + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + dtarr = dti.values + + with pytest.raises(TypeError): + dti - dtarr + with pytest.raises(TypeError): + dtarr - dti + + def test_dti_add_td64_array(self, tz): + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = dti + tdi + result = dti + tdarr + tm.assert_index_equal(result, expected) + result = tdarr + dti + tm.assert_index_equal(result, expected) + + def test_dti_sub_td64_array(self, tz): + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = dti - tdi + result = dti - tdarr + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + tdarr - dti + + # ------------------------------------------------------------- + + def test_sub_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now changed to + # return subtraction -> TimeDeltaIndex (GH ...) + + dti = date_range('20130101', periods=3) + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC') + expected = TimedeltaIndex([0, 0, 0]) + + result = dti - dti + tm.assert_index_equal(result, expected) + + result = dti_tz - dti_tz + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + dti_tz - dti + + with pytest.raises(TypeError): + dti - dti_tz + + with pytest.raises(TypeError): + dti_tz - dti_tz2 + + # isub + dti -= dti + tm.assert_index_equal(dti, expected) + + # different length raises ValueError + dti1 = date_range('20130101', periods=3) + dti2 = date_range('20130101', periods=4) + with pytest.raises(ValueError): + dti1 - dti2 + + # NaN propagation + dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03']) + dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan]) + expected = TimedeltaIndex(['1 days', np.nan, np.nan]) + result = dti2 - dti1 + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('freq', [None, 'D']) + def test_sub_period(self, freq): + # GH#13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) + + with pytest.raises(TypeError): + idx - p + + with pytest.raises(TypeError): + p - idx + + def test_ufunc_coercions(self): + idx = date_range('2011-01-01', periods=3, freq='2D', name='x') + + delta = np.timedelta64(1, 'D') + for result in [idx + delta, np.add(idx, delta)]: + assert isinstance(result, DatetimeIndex) + exp = date_range('2011-01-02', periods=3, freq='2D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '2D' + + for result in [idx - delta, np.subtract(idx, delta)]: + assert isinstance(result, DatetimeIndex) + exp = date_range('2010-12-31', periods=3, freq='2D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '2D' + + delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), + np.timedelta64(3, 'D')]) + for result in [idx + delta, np.add(idx, delta)]: + assert isinstance(result, DatetimeIndex) + exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], + freq='3D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '3D' + + for result in [idx - delta, np.subtract(idx, delta)]: + assert isinstance(result, DatetimeIndex) + exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], + freq='D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == 'D' + + def test_datetimeindex_sub_timestamp_overflow(self): + dtimax = pd.to_datetime(['now', pd.Timestamp.max]) + dtimin = pd.to_datetime(['now', pd.Timestamp.min]) + + tsneg = Timestamp('1950-01-01') + ts_neg_variants = [tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype('datetime64[ns]'), + tsneg.to_datetime64().astype('datetime64[D]')] + + tspos = Timestamp('1980-01-01') + ts_pos_variants = [tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype('datetime64[ns]'), + tspos.to_datetime64().astype('datetime64[D]')] + + for variant in ts_neg_variants: + with pytest.raises(OverflowError): + dtimax - variant + + expected = pd.Timestamp.max.value - tspos.value + for variant in ts_pos_variants: + res = dtimax - variant + assert res[1].value == expected + + expected = pd.Timestamp.min.value - tsneg.value + for variant in ts_neg_variants: + res = dtimin - variant + assert res[1].value == expected + + for variant in ts_pos_variants: + with pytest.raises(OverflowError): + dtimin - variant + + @pytest.mark.parametrize('names', [('foo', None, None), + ('baz', 'bar', None), + ('bar', 'bar', 'bar')]) + @pytest.mark.parametrize('tz', [None, 'America/Chicago']) + def test_dti_add_series(self, tz, names): + # GH#13905 + index = DatetimeIndex(['2016-06-28 05:30', '2016-06-28 05:31'], + tz=tz, name=names[0]) + ser = Series([Timedelta(seconds=5)] * 2, + index=index, name=names[1]) + expected = Series(index + Timedelta(seconds=5), + index=index, name=names[2]) + + # passing name arg isn't enough when names[2] is None + expected.name = names[2] + assert expected.dtype == index.dtype + result = ser + index + tm.assert_series_equal(result, expected) + result2 = index + ser + tm.assert_series_equal(result2, expected) + + expected = index + Timedelta(seconds=5) + result3 = ser.values + index + tm.assert_index_equal(result3, expected) + result4 = index + ser.values + tm.assert_index_equal(result4, expected) + + def test_dti_add_offset_array(self, tz): + # GH#18849 + dti = pd.date_range('2017-01-01', periods=2, tz=tz) + other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + with tm.assert_produces_warning(PerformanceWarning): + res = dti + other + expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))], + name=dti.name, freq='infer') + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + dti + tm.assert_index_equal(res2, expected) + + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_dti_add_offset_index(self, tz, names): + # GH#18849, GH#19744 + dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0]) + other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], + name=names[1]) + + with tm.assert_produces_warning(PerformanceWarning): + res = dti + other + expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))], + name=names[2], freq='infer') + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + dti + tm.assert_index_equal(res2, expected) + + def test_dti_sub_offset_array(self, tz): + # GH#18824 + dti = pd.date_range('2017-01-01', periods=2, tz=tz) + other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + with tm.assert_produces_warning(PerformanceWarning): + res = dti - other + expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))], + name=dti.name, freq='infer') + tm.assert_index_equal(res, expected) + + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_dti_sub_offset_index(self, tz, names): + # GH#18824, GH#19744 + dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0]) + other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], + name=names[1]) + + with tm.assert_produces_warning(PerformanceWarning): + res = dti - other + expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))], + name=names[2], freq='infer') + tm.assert_index_equal(res, expected) + + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_dti_with_offset_series(self, tz, names): + # GH#18849 + dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0]) + other = Series([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], + name=names[1]) + + expected_add = Series([dti[n] + other[n] for n in range(len(dti))], + name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res = dti + other + tm.assert_series_equal(res, expected_add) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + dti + tm.assert_series_equal(res2, expected_add) + + expected_sub = Series([dti[n] - other[n] for n in range(len(dti))], + name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res3 = dti - other + tm.assert_series_equal(res3, expected_sub) + + def test_dti_add_offset_tzaware(self): + dates = date_range('2012-11-01', periods=3, tz='US/Pacific') + offset = dates + pd.offsets.Hour(5) + assert dates[0] + pd.offsets.Hour(5) == offset[0] + + # GH#6818 + for tz in ['UTC', 'US/Pacific', 'Asia/Tokyo']: + dates = date_range('2010-11-01 00:00', periods=3, tz=tz, freq='H') + expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', + '2010-11-01 07:00'], freq='H', tz=tz) + + offset = dates + pd.offsets.Hour(5) + tm.assert_index_equal(offset, expected) + offset = dates + np.timedelta64(5, 'h') + tm.assert_index_equal(offset, expected) + offset = dates + timedelta(hours=5) + tm.assert_index_equal(offset, expected) + + +@pytest.mark.parametrize('klass,assert_func', [ + (Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) +def test_dt64_with_offset_array(klass, assert_func): + # GH#10699 + # array of offsets + box = Series if klass is Series else pd.Index + with tm.assert_produces_warning(PerformanceWarning): + s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + result = s + box([pd.offsets.DateOffset(years=1), + pd.offsets.MonthEnd()]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29')]) + assert_func(result, exp) + + # same offset + result = s + box([pd.offsets.DateOffset(years=1), + pd.offsets.DateOffset(years=1)]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) + assert_func(result, exp) + + +@pytest.mark.parametrize('klass,assert_func', [ + (Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) +def test_dt64_with_DateOffsets_relativedelta(klass, assert_func): + # GH#10699 + vec = klass([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + + # DateOffset relativedelta fastpath + relative_kwargs = [('years', 2), ('months', 5), ('days', 3), + ('hours', 5), ('minutes', 10), ('seconds', 2), + ('microseconds', 5)] + for i, kwd in enumerate(relative_kwargs): + op = pd.DateOffset(**dict([kwd])) + assert_func(klass([x + op for x in vec]), vec + op) + assert_func(klass([x - op for x in vec]), vec - op) + op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) + assert_func(klass([x + op for x in vec]), vec + op) + assert_func(klass([x - op for x in vec]), vec - op) + + +@pytest.mark.parametrize('cls_and_kwargs', [ + 'YearBegin', ('YearBegin', {'month': 5}), + 'YearEnd', ('YearEnd', {'month': 5}), + 'MonthBegin', 'MonthEnd', + 'SemiMonthEnd', 'SemiMonthBegin', + 'Week', ('Week', {'weekday': 3}), + 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', + 'CustomBusinessDay', 'CDay', 'CBMonthEnd', + 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', + 'BusinessHour', 'BYearBegin', 'BYearEnd', + 'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}), + ('FY5253Quarter', {'qtr_with_extra_week': 1, + 'startingMonth': 1, + 'weekday': 2, + 'variation': 'nearest'}), + ('FY5253', {'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}), + ('WeekOfMonth', {'weekday': 2, 'week': 2}), + 'Easter', ('DateOffset', {'day': 4}), + ('DateOffset', {'month': 5})]) +@pytest.mark.parametrize('normalize', [True, False]) +@pytest.mark.parametrize('klass,assert_func', [ + (Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) +def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_and_kwargs): + # GH#10699 + # assert these are equal on a piecewise basis + vec = klass([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + + if isinstance(cls_and_kwargs, tuple): + # If cls_name param is a tuple, then 2nd entry is kwargs for + # the offset constructor + cls_name, kwargs = cls_and_kwargs + else: + cls_name = cls_and_kwargs + kwargs = {} + + offset_cls = getattr(pd.offsets, cls_name) + + with warnings.catch_warnings(record=True): + for n in [0, 5]: + if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth', + 'FY5253Quarter', 'FY5253'] and n == 0): + # passing n = 0 is invalid for these offset classes + continue + + offset = offset_cls(n, normalize=normalize, **kwargs) + assert_func(klass([x + offset for x in vec]), vec + offset) + assert_func(klass([x - offset for x in vec]), vec - offset) + assert_func(klass([offset + x for x in vec]), offset + vec) + + +# GH 10699 +@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex], + [tm.assert_series_equal, + tm.assert_index_equal])) +def test_datetime64_with_DateOffset(klass, assert_func): + s = klass(date_range('2000-01-01', '2000-01-31'), name='a') + result = s + pd.DateOffset(years=1) + result2 = pd.DateOffset(years=1) + s + exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') + assert_func(result, exp) + assert_func(result2, exp) + + result = s - pd.DateOffset(years=1) + exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') + assert_func(result, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), + Timestamp('2000-02-16', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + +@pytest.mark.parametrize('years', [-1, 0, 1]) +@pytest.mark.parametrize('months', [-2, 0, 2]) +def test_shift_months(years, months): + s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31')]) + actual = DatetimeIndex(shift_months(s.asi8, years * 12 + months)) + + raw = [x + pd.offsets.DateOffset(years=years, months=months) + for x in s] + expected = DatetimeIndex(raw) + tm.assert_index_equal(actual, expected) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index c9a695ee8db3b..8acdd301f241a 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -1,13 +1,19 @@ +import pytest + +import pytz +import dateutil import numpy as np from datetime import datetime +from dateutil.tz import tzlocal + import pandas as pd import pandas.util.testing as tm from pandas import (DatetimeIndex, date_range, Series, NaT, Index, Timestamp, Int64Index, Period) -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): def test_astype(self): # GH 13149, GH 13209 @@ -24,8 +30,8 @@ def test_astype(self): rng = date_range('1/1/2000', periods=10) result = rng.astype('i8') - self.assert_index_equal(result, Index(rng.asi8)) - self.assert_numpy_array_equal(result.values, rng.asi8) + tm.assert_index_equal(result, Index(rng.asi8)) + tm.assert_numpy_array_equal(result.values, rng.asi8) def test_astype_with_tz(self): @@ -51,9 +57,21 @@ def test_astype_with_tz(self): dtype=object) tm.assert_series_equal(result, expected) + # GH 18951: tz-aware to tz-aware + idx = date_range('20170101', periods=4, tz='US/Pacific') + result = idx.astype('datetime64[ns, US/Eastern]') + expected = date_range('20170101 03:00:00', periods=4, tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # GH 18951: tz-naive to tz-aware + idx = date_range('20170101', periods=4) + result = idx.astype('datetime64[ns, US/Eastern]') + expected = date_range('20170101', periods=4, tz='US/Eastern') + tm.assert_index_equal(result, expected) + def test_astype_str_compat(self): # GH 13149, GH 13209 - # verify that we are returing NaT as a string (and not unicode) + # verify that we are returning NaT as a string (and not unicode) idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) result = idx.astype(str) @@ -99,11 +117,11 @@ def test_astype_datetime64(self): result = idx.astype('datetime64[ns]') tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) + assert result is not idx result = idx.astype('datetime64[ns]', copy=False) tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) + assert result is idx idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST') result = idx_tz.astype('datetime64[ns]') @@ -111,26 +129,57 @@ def test_astype_datetime64(self): dtype='datetime64[ns]') tm.assert_index_equal(result, expected) - def test_astype_raises(self): + def test_astype_object(self): + rng = date_range('1/1/2000', periods=20) + + casted = rng.astype('O') + exp_values = list(rng) + + tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) + assert casted.tolist() == exp_values + + @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo']) + def test_astype_object_tz(self, tz): + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', + name='idx', tz=tz) + expected_list = [Timestamp('2013-01-31', tz=tz), + Timestamp('2013-02-28', tz=tz), + Timestamp('2013-03-31', tz=tz), + Timestamp('2013-04-30', tz=tz)] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), + pd.NaT, datetime(2013, 1, 4)], name='idx') + expected_list = [Timestamp('2013-01-01'), + Timestamp('2013-01-02'), pd.NaT, + Timestamp('2013-01-04')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + @pytest.mark.parametrize('dtype', [ + float, 'timedelta64', 'timedelta64[ns]', 'datetime64', + 'datetime64[D]']) + def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[D]') + msg = 'Cannot cast DatetimeIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + idx.astype(dtype) def test_index_convert_to_datetime_array(self): - tm._skip_if_no_pytz() - def _check_rng(rng): converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) + assert isinstance(converted, np.ndarray) for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo rng = date_range('20090415', '20090519') rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') @@ -141,16 +190,13 @@ def _check_rng(rng): _check_rng(rng_utc) def test_index_convert_to_datetime_array_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - def _check_rng(rng): converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) + assert isinstance(converted, np.ndarray) for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo rng = date_range('20090415', '20090519') rng_eastern = date_range('20090415', '20090519', @@ -162,16 +208,13 @@ def _check_rng(rng): _check_rng(rng_utc) def test_index_convert_to_datetime_array_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - def _check_rng(rng): converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) + assert isinstance(converted, np.ndarray) for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo rng = date_range('20090415', '20090519') rng_eastern = date_range('20090415', '20090519', @@ -183,9 +226,9 @@ def _check_rng(rng): _check_rng(rng_utc) -class TestToPeriod(tm.TestCase): +class TestToPeriod(object): - def setUp(self): + def setup_method(self, method): data = [Timestamp('2007-01-01 10:11:12.123456Z'), Timestamp('2007-01-01 10:11:13.789123Z')] self.index = DatetimeIndex(data) @@ -194,21 +237,19 @@ def test_to_period_millisecond(self): index = self.index period = index.to_period(freq='L') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) + assert 2 == len(period) + assert period[0] == Period('2007-01-01 10:11:12.123Z', 'L') + assert period[1] == Period('2007-01-01 10:11:13.789Z', 'L') def test_to_period_microsecond(self): index = self.index period = index.to_period(freq='U') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) + assert 2 == len(period) + assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U') + assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') def test_to_period_tz_pytz(self): - tm._skip_if_no_pytz() - from dateutil.tz import tzlocal from pytz import utc as UTC xp = date_range('1/1/2000', '4/1/2000').to_period() @@ -218,7 +259,7 @@ def test_to_period_tz_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertEqual(result, expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=UTC) @@ -226,7 +267,7 @@ def test_to_period_tz_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertEqual(result, expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) @@ -234,14 +275,10 @@ def test_to_period_tz_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertEqual(result, expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) def test_to_period_tz_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - from dateutil.tz import tzlocal - xp = date_range('1/1/2000', '4/1/2000').to_period() ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) @@ -249,7 +286,7 @@ def test_to_period_tz_explicit_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) @@ -257,7 +294,7 @@ def test_to_period_tz_explicit_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) @@ -265,14 +302,10 @@ def test_to_period_tz_explicit_pytz(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) def test_to_period_tz_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - from dateutil.tz import tzlocal - xp = date_range('1/1/2000', '4/1/2000').to_period() ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') @@ -280,7 +313,7 @@ def test_to_period_tz_dateutil(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) @@ -288,7 +321,7 @@ def test_to_period_tz_dateutil(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) @@ -296,15 +329,21 @@ def test_to_period_tz_dateutil(self): result = ts.to_period()[0] expected = ts[0].to_period() - self.assertTrue(result == expected) + assert result == expected tm.assert_index_equal(ts.to_period(), xp) - def test_astype_object(self): - # NumPy 1.6.1 weak ns support - rng = date_range('1/1/2000', periods=20) - - casted = rng.astype('O') - exp_values = list(rng) - - tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) - self.assertEqual(casted.tolist(), exp_values) + def test_to_period_nofreq(self): + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) + pytest.raises(ValueError, idx.to_period) + + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], + freq='infer') + assert idx.freqstr == 'D' + expected = pd.PeriodIndex(['2000-01-01', '2000-01-02', + '2000-01-03'], freq='D') + tm.assert_index_equal(idx.to_period(), expected) + + # GH 7606 + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) + assert idx.freqstr is None + tm.assert_index_equal(idx.to_period(), expected) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 772d76305cff2..176f5bd0c1a2a 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -1,51 +1,69 @@ +import pytest + +import pytz import numpy as np from datetime import timedelta import pandas as pd -from pandas import tslib, offsets, lib +from pandas import offsets import pandas.util.testing as tm -from pandas.tslib import OutOfBoundsDatetime +from pandas._libs.tslib import OutOfBoundsDatetime +from pandas._libs.tslibs import conversion from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, to_datetime) -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): + + def test_construction_caching(self): + + df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3), + 'dttz': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'dt_with_null': [pd.Timestamp('20130101'), pd.NaT, + pd.Timestamp('20130103')], + 'dtns': pd.date_range('20130101', periods=3, + freq='ns')}) + assert df.dttz.dtype.tz.zone == 'US/Eastern' def test_construction_with_alt(self): i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') i2 = DatetimeIndex(i, dtype=i.dtype) - self.assert_index_equal(i, i2) + tm.assert_index_equal(i, i2) + assert i.tz.zone == 'US/Eastern' i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) - self.assert_index_equal(i, i2) + tm.assert_index_equal(i, i2) + assert i.tz.zone == 'US/Eastern' i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) - self.assert_index_equal(i, i2) + tm.assert_index_equal(i, i2) + assert i.tz.zone == 'US/Eastern' i2 = DatetimeIndex( i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) - self.assert_index_equal(i, i2) + tm.assert_index_equal(i, i2) + assert i.tz.zone == 'US/Eastern' # localize into the provided tz i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') expected = i.tz_localize(None).tz_localize('UTC') - self.assert_index_equal(i2, expected) + tm.assert_index_equal(i2, expected) # incompat tz/dtype - self.assertRaises(ValueError, lambda: DatetimeIndex( + pytest.raises(ValueError, lambda: DatetimeIndex( i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) def test_construction_index_with_mixed_timezones(self): - # GH 11488 - # no tz results in DatetimeIndex + # gh-11488: no tz results in DatetimeIndex result = Index([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None # same tz results in DatetimeIndex result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), @@ -54,10 +72,10 @@ def test_construction_index_with_mixed_timezones(self): exp = DatetimeIndex( [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') ], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz # same tz results in DatetimeIndex (DST) result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), @@ -66,20 +84,20 @@ def test_construction_index_with_mixed_timezones(self): exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz - # different tz results in Index(dtype=object) + # Different tz results in Index(dtype=object) result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') exp = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], @@ -87,37 +105,37 @@ def test_construction_index_with_mixed_timezones(self): exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) # length = 1 result = Index([Timestamp('2011-01-01')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None # length = 1 with tz result = Index( [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz def test_construction_index_with_mixed_timezones_with_NaT(self): - # GH 11488 + # see gh-11488 result = Index([pd.NaT, Timestamp('2011-01-01'), pd.NaT, Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), pd.NaT, Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None - # same tz results in DatetimeIndex + # Same tz results in DatetimeIndex result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], @@ -125,10 +143,10 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz # same tz results in DatetimeIndex (DST) result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), @@ -138,10 +156,10 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz # different tz results in Index(dtype=object) result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), @@ -151,8 +169,8 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', @@ -160,23 +178,24 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) # all NaT result = Index([pd.NaT, pd.NaT], name='idx') exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None # all NaT with tz result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) + + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) @@ -186,8 +205,8 @@ def test_construction_dti_with_mixed_timezones(self): [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), @@ -197,8 +216,8 @@ def test_construction_dti_with_mixed_timezones(self): exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex (DST) result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), @@ -208,8 +227,8 @@ def test_construction_dti_with_mixed_timezones(self): exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) # different tz coerces tz-naive to tz-awareIndex(dtype=object) result = DatetimeIndex([Timestamp('2011-01-01 10:00'), @@ -218,27 +237,29 @@ def test_construction_dti_with_mixed_timezones(self): exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')], tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) # tz mismatch affecting to tz-aware raises TypeError/ValueError - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') - with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): + with tm.assert_raises_regex(TypeError, + 'data is already tz-aware'): DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') - with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): + with tm.assert_raises_regex(TypeError, + 'data is already tz-aware'): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index([pd.NaT, Timestamp('2011-01-01 10:00'), @@ -264,7 +285,7 @@ def test_construction_outofbounds(self): # coerces to object tm.assert_index_equal(Index(dates), exp) - with tm.assertRaises(OutOfBoundsDatetime): + with pytest.raises(OutOfBoundsDatetime): # can't create DatetimeIndex DatetimeIndex(dates) @@ -286,13 +307,14 @@ def test_constructor_coverage(self): exp = date_range('1/1/2000', periods=10) tm.assert_index_equal(rng, exp) - self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', - periods='foo', freq='D') + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + DatetimeIndex(start='1/1/2000', periods='foo', freq='D') - self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', - end='1/10/2000') + pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', + end='1/10/2000') - self.assertRaises(ValueError, DatetimeIndex, '1/1/2000') + pytest.raises(ValueError, DatetimeIndex, '1/1/2000') # generator expression gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) @@ -320,66 +342,60 @@ def test_constructor_coverage(self): tm.assert_index_equal(from_ints, expected) # non-conforming - self.assertRaises(ValueError, DatetimeIndex, - ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') - - self.assertRaises(ValueError, DatetimeIndex, start='2011-01-01', - freq='b') - self.assertRaises(ValueError, DatetimeIndex, end='2011-01-01', - freq='B') - self.assertRaises(ValueError, DatetimeIndex, periods=10, freq='D') - - def test_constructor_datetime64_tzformat(self): - # GH 6572 - tm._skip_if_no_pytz() - import pytz - # ISO 8601 format results in pytz.FixedOffset - for freq in ['AS', 'W-SUN']: - idx = date_range('2013-01-01T00:00:00-05:00', - '2016-01-01T23:59:59-05:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(-300)) - tm.assert_index_equal(idx, expected) - # Unable to use `US/Eastern` because of DST - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='America/Lima') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - idx = date_range('2013-01-01T00:00:00+09:00', - '2016-01-01T23:59:59+09:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(540)) - tm.assert_index_equal(idx, expected) - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='Asia/Tokyo') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - tm._skip_if_no_dateutil() + pytest.raises(ValueError, DatetimeIndex, + ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') + + pytest.raises(ValueError, DatetimeIndex, start='2011-01-01', + freq='b') + pytest.raises(ValueError, DatetimeIndex, end='2011-01-01', + freq='B') + pytest.raises(ValueError, DatetimeIndex, periods=10, freq='D') + + @pytest.mark.parametrize('freq', ['AS', 'W-SUN']) + def test_constructor_datetime64_tzformat(self, freq): + # see GH#6572: ISO 8601 format results in pytz.FixedOffset + idx = date_range('2013-01-01T00:00:00-05:00', + '2016-01-01T23:59:59-05:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(-300)) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='America/Lima') + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range('2013-01-01T00:00:00+09:00', + '2016-01-01T23:59:59+09:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(540)) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='Asia/Tokyo') + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) # Non ISO 8601 format results in dateutil.tz.tzoffset - for freq in ['AS', 'W-SUN']: - idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', - freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(-300)) - tm.assert_index_equal(idx, expected) - # Unable to use `US/Eastern` because of DST - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='America/Lima') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - idx = date_range('2013/1/1 0:00:00+9:00', - '2016/1/1 23:59:59+09:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(540)) - tm.assert_index_equal(idx, expected) - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='Asia/Tokyo') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', + freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(-300)) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='America/Lima') + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range('2013/1/1 0:00:00+9:00', + '2016/1/1 23:59:59+09:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(540)) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='Asia/Tokyo') + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) def test_constructor_dtype(self): @@ -398,120 +414,117 @@ def test_constructor_dtype(self): idx = DatetimeIndex(['2013-01-01', '2013-01-02'], dtype='datetime64[ns, US/Eastern]') - self.assertRaises(ValueError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns]')) + pytest.raises(ValueError, + lambda: DatetimeIndex(idx, + dtype='datetime64[ns]')) # this is effectively trying to convert tz's - self.assertRaises(TypeError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns, CET]')) - self.assertRaises(ValueError, - lambda: DatetimeIndex( - idx, tz='CET', - dtype='datetime64[ns, US/Eastern]')) + pytest.raises(TypeError, + lambda: DatetimeIndex(idx, + dtype='datetime64[ns, CET]')) + pytest.raises(ValueError, + lambda: DatetimeIndex( + idx, tz='CET', + dtype='datetime64[ns, US/Eastern]')) result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]') tm.assert_index_equal(idx, result) def test_constructor_name(self): idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A', name='TEST') - self.assertEqual(idx.name, 'TEST') + assert idx.name == 'TEST' def test_000constructor_resolution(self): # 2252 t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) idx = DatetimeIndex([t1]) - self.assertEqual(idx.nanosecond[0], t1.nanosecond) + assert idx.nanosecond[0] == t1.nanosecond -class TestTimeSeries(tm.TestCase): +class TestTimeSeries(object): def test_dti_constructor_preserve_dti_freq(self): rng = date_range('1/1/2000', '1/2/2000', freq='5min') rng2 = DatetimeIndex(rng) - self.assertEqual(rng.freq, rng2.freq) + assert rng.freq == rng2.freq - def test_dti_constructor_years_only(self): + @pytest.mark.parametrize('tz', [None, 'UTC', 'Asia/Tokyo', + 'dateutil/US/Pacific']) + def test_dti_constructor_years_only(self, tz): # GH 6961 - for tz in [None, 'UTC', 'Asia/Tokyo', 'dateutil/US/Pacific']: - rng1 = date_range('2014', '2015', freq='M', tz=tz) - expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) + rng1 = date_range('2014', '2015', freq='M', tz=tz) + expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) - rng2 = date_range('2014', '2015', freq='MS', tz=tz) - expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', - tz=tz) + rng2 = date_range('2014', '2015', freq='MS', tz=tz) + expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', tz=tz) - rng3 = date_range('2014', '2020', freq='A', tz=tz) - expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) + rng3 = date_range('2014', '2020', freq='A', tz=tz) + expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) - rng4 = date_range('2014', '2020', freq='AS', tz=tz) - expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', - tz=tz) + rng4 = date_range('2014', '2020', freq='AS', tz=tz) + expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', tz=tz) - for rng, expected in [(rng1, expected1), (rng2, expected2), - (rng3, expected3), (rng4, expected4)]: - tm.assert_index_equal(rng, expected) + for rng, expected in [(rng1, expected1), (rng2, expected2), + (rng3, expected3), (rng4, expected4)]: + tm.assert_index_equal(rng, expected) - def test_dti_constructor_small_int(self): + @pytest.mark.parametrize('dtype', [np.int64, np.int32, np.int16, np.int8]) + def test_dti_constructor_small_int(self, dtype): # GH 13721 exp = DatetimeIndex(['1970-01-01 00:00:00.00000000', '1970-01-01 00:00:00.00000001', '1970-01-01 00:00:00.00000002']) - for dtype in [np.int64, np.int32, np.int16, np.int8]: - arr = np.array([0, 10, 20], dtype=dtype) - tm.assert_index_equal(DatetimeIndex(arr), exp) + arr = np.array([0, 10, 20], dtype=dtype) + tm.assert_index_equal(DatetimeIndex(arr), exp) def test_ctor_str_intraday(self): rng = DatetimeIndex(['1-1-2000 00:00:01']) - self.assertEqual(rng[0].second, 1) + assert rng[0].second == 1 def test_is_(self): dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - self.assertTrue(dti.is_(dti)) - self.assertTrue(dti.is_(dti.view())) - self.assertFalse(dti.is_(dti.copy())) + assert dti.is_(dti) + assert dti.is_(dti.view()) + assert not dti.is_(dti.copy()) def test_index_cast_datetime64_other_units(self): arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') idx = Index(arr) - self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) + assert (idx.values == conversion.ensure_datetime64ns(arr)).all() def test_constructor_int64_nocopy(self): - # #1624 + # GH#1624 arr = np.arange(1000, dtype=np.int64) index = DatetimeIndex(arr) arr[50:100] = -1 - self.assertTrue((index.asi8[50:100] == -1).all()) + assert (index.asi8[50:100] == -1).all() arr = np.arange(1000, dtype=np.int64) index = DatetimeIndex(arr, copy=True) arr[50:100] = -1 - self.assertTrue((index.asi8[50:100] != -1).all()) - - def test_from_freq_recreate_from_data(self): - freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', - 'C'] + assert (index.asi8[50:100] != -1).all() - for f in freqs: - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) - idx = DatetimeIndex(org, freq=f) - tm.assert_index_equal(idx, org) + @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', 'BH', + 'T', 'S', 'L', 'U', 'H', 'N', 'C']) + def test_from_freq_recreate_from_data(self, freq): + org = DatetimeIndex(start='2001/02/01 09:00', freq=freq, periods=1) + idx = DatetimeIndex(org, freq=freq) + tm.assert_index_equal(idx, org) - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, - tz='US/Pacific', periods=1) - idx = DatetimeIndex(org, freq=f, tz='US/Pacific') - tm.assert_index_equal(idx, org) + org = DatetimeIndex(start='2001/02/01 09:00', freq=freq, + tz='US/Pacific', periods=1) + idx = DatetimeIndex(org, freq=freq, tz='US/Pacific') + tm.assert_index_equal(idx, org) def test_datetimeindex_constructor_misc(self): arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] - self.assertRaises(Exception, DatetimeIndex, arr) + pytest.raises(Exception, DatetimeIndex, arr) arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] idx1 = DatetimeIndex(arr) @@ -519,7 +532,7 @@ def test_datetimeindex_constructor_misc(self): arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] idx2 = DatetimeIndex(arr) - arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', + arr = [Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', '2005-01-04'] idx3 = DatetimeIndex(arr) @@ -540,34 +553,34 @@ def test_datetimeindex_constructor_misc(self): tm.assert_index_equal(idx7, idx8) for other in [idx2, idx3, idx4, idx5, idx6]: - self.assertTrue((idx1.values == other.values).all()) + assert (idx1.values == other.values).all() sdate = datetime(1999, 12, 25) edate = datetime(2000, 1, 1) idx = DatetimeIndex(start=sdate, freq='1B', periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) - self.assertEqual(idx.freq, 'B') + assert len(idx) == 20 + assert idx[0] == sdate + 0 * offsets.BDay() + assert idx.freq == 'B' idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[-1], edate) - self.assertEqual(idx.freq, '5D') + assert len(idx) == 20 + assert idx[-1] == edate + assert idx.freq == '5D' idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') idx2 = DatetimeIndex(start=sdate, end=edate, freq=offsets.Week(weekday=6)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) + assert len(idx1) == len(idx2) + assert idx1.offset == idx2.offset idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') idx2 = DatetimeIndex(start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) + assert len(idx1) == len(idx2) + assert idx1.offset == idx2.offset idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') idx2 = DatetimeIndex(start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) + assert len(idx1) == len(idx2) + assert idx1.offset == idx2.offset diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 80664ce246bf8..d2ec465468dfb 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1,34 +1,112 @@ """ -test date_range, bdate_range, cdate_range -construction from the convenience range functions +test date_range, bdate_range construction from the convenience range functions """ +import pytest + import numpy as np +import pytz +from pytz import timezone from datetime import datetime, timedelta, time import pandas as pd import pandas.util.testing as tm +import pandas.util._test_decorators as td from pandas import compat -from pandas.tseries.index import bdate_range, cdate_range -from pandas import date_range, offsets, DatetimeIndex, Timestamp -from pandas.tseries.offsets import (generate_range, CDay, BDay, - DateOffset, MonthEnd) +from pandas import date_range, bdate_range, offsets, DatetimeIndex, Timestamp +from pandas.tseries.offsets import (generate_range, CDay, BDay, DateOffset, + MonthEnd, prefix_mapping) from pandas.tests.series.common import TestData START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) -def eq_gen_range(kwargs, expected): - rng = generate_range(**kwargs) - assert (np.array_equal(list(rng), expected)) +class TestTimestampEquivDateRange(object): + # Older tests in TestTimeSeries constructed their `stamp` objects + # using `date_range` instead of the `Timestamp` constructor. + # TestTimestampEquivDateRange checks that these are equivalent in the + # pertinent cases. + + def test_date_range_timestamp_equiv(self): + rng = date_range('20090415', '20090519', tz='US/Eastern') + stamp = rng[0] + + ts = Timestamp('20090415', tz='US/Eastern', freq='D') + assert ts == stamp + + def test_date_range_timestamp_equiv_dateutil(self): + rng = date_range('20090415', '20090519', tz='dateutil/US/Eastern') + stamp = rng[0] + + ts = Timestamp('20090415', tz='dateutil/US/Eastern', freq='D') + assert ts == stamp + + def test_date_range_timestamp_equiv_explicit_pytz(self): + rng = date_range('20090415', '20090519', + tz=pytz.timezone('US/Eastern')) + stamp = rng[0] + + ts = Timestamp('20090415', tz=pytz.timezone('US/Eastern'), freq='D') + assert ts == stamp + + @td.skip_if_windows_python_3 + def test_date_range_timestamp_equiv_explicit_dateutil(self): + from pandas._libs.tslibs.timezones import dateutil_gettz as gettz + + rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) + stamp = rng[0] + + ts = Timestamp('20090415', tz=gettz('US/Eastern'), freq='D') + assert ts == stamp + + def test_date_range_timestamp_equiv_from_datetime_instance(self): + datetime_instance = datetime(2014, 3, 4) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + timestamp_instance = date_range(datetime_instance, periods=1, + freq='D')[0] + + ts = Timestamp(datetime_instance, freq='D') + assert ts == timestamp_instance + + def test_date_range_timestamp_equiv_preserve_frequency(self): + timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] + ts = Timestamp('2014-03-05', freq='D') + + assert timestamp_instance == ts -class TestDateRanges(TestData, tm.TestCase): +class TestDateRanges(TestData): def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') - self.assertEqual(len(rng), 4) + assert len(rng) == 4 + + @pytest.mark.parametrize("freq", ["AS", "YS"]) + def test_begin_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex(["2013-01-01", "2014-01-01", + "2015-01-01", "2016-01-01", + "2017-01-01"], freq=freq) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["A", "Y"]) + def test_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex(["2013-12-31", "2014-12-31", + "2015-12-31", "2016-12-31"], freq=freq) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["BA", "BY"]) + def test_business_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex(["2013-12-31", "2014-12-31", + "2015-12-31", "2016-12-30"], freq=freq) + tm.assert_index_equal(rng, exp) def test_date_range_negative_freq(self): # GH 11018 @@ -36,20 +114,20 @@ def test_date_range_negative_freq(self): exp = pd.DatetimeIndex(['2011-12-31', '2009-12-31', '2007-12-31'], freq='-2A') tm.assert_index_equal(rng, exp) - self.assertEqual(rng.freq, '-2A') + assert rng.freq == '-2A' rng = date_range('2011-01-31', freq='-2M', periods=3) exp = pd.DatetimeIndex(['2011-01-31', '2010-11-30', '2010-09-30'], freq='-2M') tm.assert_index_equal(rng, exp) - self.assertEqual(rng.freq, '-2M') + assert rng.freq == '-2M' def test_date_range_bms_bug(self): # #1645 rng = date_range('1/1/2000', periods=10, freq='BMS') ex_first = Timestamp('2000-01-03') - self.assertEqual(rng[0], ex_first) + assert rng[0] == ex_first def test_date_range_normalize(self): snap = datetime.today() @@ -66,21 +144,23 @@ def test_date_range_normalize(self): freq='B') the_time = time(8, 15) for val in rng: - self.assertEqual(val.time(), the_time) + assert val.time() == the_time def test_date_range_fy5252(self): dr = date_range(start="2013-01-01", periods=2, freq=offsets.FY5253( startingMonth=1, weekday=3, variation="nearest")) - self.assertEqual(dr[0], Timestamp('2013-01-31')) - self.assertEqual(dr[1], Timestamp('2014-01-30')) + assert dr[0] == Timestamp('2013-01-31') + assert dr[1] == Timestamp('2014-01-30') def test_date_range_ambiguous_arguments(self): # #2538 start = datetime(2011, 1, 1, 5, 3, 40) end = datetime(2011, 1, 1, 8, 9, 40) - self.assertRaises(ValueError, date_range, start, end, freq='s', - periods=10) + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + date_range(start, end, periods=10, freq='s') def test_date_range_businesshour(self): idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', @@ -118,60 +198,75 @@ def test_date_range_businesshour(self): def test_range_misspecified(self): # GH #1095 + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + + with tm.assert_raises_regex(ValueError, msg): + date_range(start='1/1/2000') + + with tm.assert_raises_regex(ValueError, msg): + date_range(end='1/1/2000') - self.assertRaises(ValueError, date_range, '1/1/2000') - self.assertRaises(ValueError, date_range, end='1/1/2000') - self.assertRaises(ValueError, date_range, periods=10) + with tm.assert_raises_regex(ValueError, msg): + date_range(periods=10) - self.assertRaises(ValueError, date_range, '1/1/2000', freq='H') - self.assertRaises(ValueError, date_range, end='1/1/2000', freq='H') - self.assertRaises(ValueError, date_range, periods=10, freq='H') + with tm.assert_raises_regex(ValueError, msg): + date_range(start='1/1/2000', freq='H') - def test_compat_replace(self): + with tm.assert_raises_regex(ValueError, msg): + date_range(end='1/1/2000', freq='H') + + with tm.assert_raises_regex(ValueError, msg): + date_range(periods=10, freq='H') + + with tm.assert_raises_regex(ValueError, msg): + date_range() + + @pytest.mark.parametrize('f', [compat.long, int]) + def test_compat_replace(self, f): # https://github.com/statsmodels/statsmodels/issues/3349 # replace should take ints/longs for compat - - for f in [compat.long, int]: - result = date_range(Timestamp('1960-04-01 00:00:00', - freq='QS-JAN'), - periods=f(76), - freq='QS-JAN') - self.assertEqual(len(result), 76) + result = date_range(Timestamp('1960-04-01 00:00:00', freq='QS-JAN'), + periods=f(76), freq='QS-JAN') + assert len(result) == 76 def test_catch_infinite_loop(self): offset = offsets.DateOffset(minute=5) # blow up, don't loop forever - self.assertRaises(Exception, date_range, datetime(2011, 11, 11), - datetime(2011, 11, 12), freq=offset) + pytest.raises(Exception, date_range, datetime(2011, 11, 11), + datetime(2011, 11, 12), freq=offset) -class TestGenRangeGeneration(tm.TestCase): +class TestGenRangeGeneration(object): def test_generate(self): rng1 = list(generate_range(START, END, offset=BDay())) rng2 = list(generate_range(START, END, time_rule='B')) - self.assertEqual(rng1, rng2) + assert rng1 == rng2 def test_generate_cday(self): rng1 = list(generate_range(START, END, offset=CDay())) rng2 = list(generate_range(START, END, time_rule='C')) - self.assertEqual(rng1, rng2) + assert rng1 == rng2 def test_1(self): - eq_gen_range(dict(start=datetime(2009, 3, 25), periods=2), - [datetime(2009, 3, 25), datetime(2009, 3, 26)]) + rng = list(generate_range(start=datetime(2009, 3, 25), periods=2)) + expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)] + assert rng == expected def test_2(self): - eq_gen_range(dict(start=datetime(2008, 1, 1), - end=datetime(2008, 1, 3)), - [datetime(2008, 1, 1), - datetime(2008, 1, 2), - datetime(2008, 1, 3)]) + rng = list(generate_range(start=datetime(2008, 1, 1), + end=datetime(2008, 1, 3))) + expected = [datetime(2008, 1, 1), + datetime(2008, 1, 2), + datetime(2008, 1, 3)] + assert rng == expected def test_3(self): - eq_gen_range(dict(start=datetime(2008, 1, 5), - end=datetime(2008, 1, 6)), - []) + rng = list(generate_range(start=datetime(2008, 1, 5), + end=datetime(2008, 1, 6))) + expected = [] + assert rng == expected def test_precision_finer_than_offset(self): # GH 9907 @@ -190,68 +285,85 @@ def test_precision_finer_than_offset(self): freq='Q-DEC', tz=None) expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', freq='W-SUN', tz=None) - self.assert_index_equal(result1, expected1) - self.assert_index_equal(result2, expected2) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + dt1, dt2 = '2017-01-01', '2017-01-01' + tz1, tz2 = 'US/Eastern', 'Europe/London' + @pytest.mark.parametrize("start,end", [ + (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)), + (pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)), + (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)), + (pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1)) + ]) + def test_mismatching_tz_raises_err(self, start, end): + # issue 18488 + with pytest.raises(TypeError): + pd.date_range(start, end) + with pytest.raises(TypeError): + pd.DatetimeIndex(start, end, freq=BDay()) -class TestBusinessDateRange(tm.TestCase): - def setUp(self): - self.rng = bdate_range(START, END) +class TestBusinessDateRange(object): def test_constructor(self): bdate_range(START, END, freq=BDay()) bdate_range(START, periods=20, freq=BDay()) bdate_range(end=START, periods=20, freq=BDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') - self.assertRaises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') + + msg = 'periods must be a number, got B' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'B') + + with tm.assert_raises_regex(TypeError, msg): + bdate_range('2011-1-1', '2012-1-1', 'B') def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) - aware = bdate_range(START, END, freq=BDay(), - tz="Asia/Hong_Kong") - self.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", - naive.join, aware) - self.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", - aware.join, naive) + aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") + + msg = 'tz-naive.*tz-aware' + with tm.assert_raises_regex(TypeError, msg): + naive.join(aware) + + with tm.assert_raises_regex(TypeError, msg): + aware.join(naive) def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=BDay()) DatetimeIndex._cached_range(START, periods=20, offset=BDay()) DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - self.assertRaisesRegexp(TypeError, "offset", - DatetimeIndex._cached_range, - START, END) + with tm.assert_raises_regex(TypeError, "offset"): + DatetimeIndex._cached_range(START, END) - self.assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(START, offset=BDay()) - self.assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(end=END, offset=BDay()) - self.assertRaisesRegexp(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) + with tm.assert_raises_regex(TypeError, "start or end"): + DatetimeIndex._cached_range(periods=20, offset=BDay()) def test_cached_range_bug(self): rng = date_range('2010-09-01 05:00:00', periods=50, freq=DateOffset(hours=6)) - self.assertEqual(len(rng), 50) - self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) + assert len(rng) == 50 + assert rng[0] == datetime(2010, 9, 1, 5) def test_timezone_comparaison_bug(self): # smoke test start = Timestamp('20130220 10:00', tz='US/Eastern') result = date_range(start, periods=2, tz='US/Eastern') - self.assertEqual(len(result), 2) + assert len(result) == 2 def test_timezone_comparaison_assert(self): start = Timestamp('20130220 10:00', tz='US/Eastern') - self.assertRaises(AssertionError, date_range, start, periods=2, - tz='Europe/Berlin') + msg = 'Inferred time zone not equal to passed time zone' + with tm.assert_raises_regex(AssertionError, msg): + date_range(start, periods=2, tz='Europe/Berlin') def test_misc(self): end = datetime(2009, 5, 13) @@ -265,14 +377,17 @@ def test_misc(self): def test_date_parse_failure(self): badly_formed_date = '2007/100/1' - self.assertRaises(ValueError, Timestamp, badly_formed_date) + with pytest.raises(ValueError): + Timestamp(badly_formed_date) + + with pytest.raises(ValueError): + bdate_range(start=badly_formed_date, periods=10) - self.assertRaises(ValueError, bdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, badly_formed_date, - badly_formed_date) + with pytest.raises(ValueError): + bdate_range(end=badly_formed_date, periods=10) + + with pytest.raises(ValueError): + bdate_range(badly_formed_date, badly_formed_date) def test_daterange_bug_456(self): # GH #456 @@ -281,11 +396,12 @@ def test_daterange_bug_456(self): rng2.offset = BDay() result = rng1.union(rng2) - tm.assertIsInstance(result, DatetimeIndex) + assert isinstance(result, DatetimeIndex) def test_error_with_zero_monthends(self): - self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', - freq=MonthEnd(0)) + msg = r'Offset <0 \* MonthEnds> did not increment date' + with tm.assert_raises_regex(ValueError, msg): + date_range('1/1/2000', '1/1/2001', freq=MonthEnd(0)) def test_range_bug(self): # GH #770 @@ -293,37 +409,31 @@ def test_range_bug(self): result = date_range("2011-1-1", "2012-1-31", freq=offset) start = datetime(2011, 1, 1) - exp_values = [start + i * offset for i in range(5)] - tm.assert_index_equal(result, DatetimeIndex(exp_values)) + expected = DatetimeIndex([start + i * offset for i in range(5)]) + tm.assert_index_equal(result, expected) def test_range_tz_pytz(self): - # GH 2906 - tm._skip_if_no_pytz() - from pytz import timezone - + # see gh-2906 tz = timezone('US/Eastern') start = tz.localize(datetime(2011, 1, 1)) end = tz.localize(datetime(2011, 1, 3)) dr = date_range(start=start, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end dr = date_range(end=end, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end dr = date_range(start=start, end=end) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end def test_range_tz_dst_straddle_pytz(self): - - tm._skip_if_no_pytz() - from pytz import timezone tz = timezone('US/Eastern') dates = [(tz.localize(datetime(2014, 3, 6)), tz.localize(datetime(2014, 3, 12))), @@ -331,64 +441,64 @@ def test_range_tz_dst_straddle_pytz(self): tz.localize(datetime(2013, 11, 6)))] for (start, end) in dates: dr = date_range(start, end, freq='D') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) dr = date_range(start, end, freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) dr = date_range(start.replace(tzinfo=None), end.replace( tzinfo=None), freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) def test_range_tz_dateutil(self): - # GH 2906 - tm._skip_if_no_dateutil() + # see gh-2906 + # Use maybe_get_tz to fix filename in tz under dateutil. - from pandas.tslib import maybe_get_tz + from pandas._libs.tslibs.timezones import maybe_get_tz tz = lambda x: maybe_get_tz('dateutil/' + x) start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) dr = date_range(start=start, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) + assert dr.tz == tz('US/Eastern') + assert dr[0] == start + assert dr[2] == end dr = date_range(end=end, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) + assert dr.tz == tz('US/Eastern') + assert dr[0] == start + assert dr[2] == end dr = date_range(start=start, end=end) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) + assert dr.tz == tz('US/Eastern') + assert dr[0] == start + assert dr[2] == end - def test_range_closed(self): + @pytest.mark.parametrize('freq', ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed(self, freq): begin = datetime(2011, 1, 1) end = datetime(2014, 1, 1) - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_left, left) + tm.assert_index_equal(expected_right, right) def test_range_closed_with_tz_aware_start_end(self): # GH12409, GH12684 @@ -407,8 +517,8 @@ def test_range_closed_with_tz_aware_start_end(self): if begin == closed[0]: expected_right = closed[1:] - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) + tm.assert_index_equal(expected_left, left) + tm.assert_index_equal(expected_right, right) begin = Timestamp('2011/1/1') end = Timestamp('2014/1/1') @@ -430,37 +540,37 @@ def test_range_closed_with_tz_aware_start_end(self): if begintz == closed[0]: expected_right = closed[1:] - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - def test_range_closed_boundary(self): - # GH 11804 - for closed in ['right', 'left', None]: - right_boundary = date_range('2015-09-12', '2015-12-01', - freq='QS-MAR', closed=closed) - left_boundary = date_range('2015-09-01', '2015-09-12', - freq='QS-MAR', closed=closed) - both_boundary = date_range('2015-09-01', '2015-12-01', - freq='QS-MAR', closed=closed) - expected_right = expected_left = expected_both = both_boundary - - if closed == 'right': - expected_left = both_boundary[1:] - if closed == 'left': - expected_right = both_boundary[:-1] - if closed is None: - expected_right = both_boundary[1:] - expected_left = both_boundary[:-1] - - self.assert_index_equal(right_boundary, expected_right) - self.assert_index_equal(left_boundary, expected_left) - self.assert_index_equal(both_boundary, expected_both) + tm.assert_index_equal(expected_left, left) + tm.assert_index_equal(expected_right, right) + + @pytest.mark.parametrize('closed', ['right', 'left', None]) + def test_range_closed_boundary(self, closed): + # GH#11804 + right_boundary = date_range('2015-09-12', '2015-12-01', + freq='QS-MAR', closed=closed) + left_boundary = date_range('2015-09-01', '2015-09-12', + freq='QS-MAR', closed=closed) + both_boundary = date_range('2015-09-01', '2015-12-01', + freq='QS-MAR', closed=closed) + expected_right = expected_left = expected_both = both_boundary + + if closed == 'right': + expected_left = both_boundary[1:] + if closed == 'left': + expected_right = both_boundary[:-1] + if closed is None: + expected_right = both_boundary[1:] + expected_left = both_boundary[:-1] + + tm.assert_index_equal(right_boundary, expected_right) + tm.assert_index_equal(left_boundary, expected_left) + tm.assert_index_equal(both_boundary, expected_both) def test_years_only(self): # GH 6961 dr = date_range('2014', '2015', freq='M') - self.assertEqual(dr[0], datetime(2014, 1, 31)) - self.assertEqual(dr[-1], datetime(2014, 12, 31)) + assert dr[0] == datetime(2014, 1, 31) + assert dr[-1] == datetime(2014, 12, 31) def test_freq_divides_end_in_nanos(self): # GH 10885 @@ -476,20 +586,23 @@ def test_freq_divides_end_in_nanos(self): '2005-01-13 15:45:00'], dtype='datetime64[ns]', freq='345T', tz=None) - self.assert_index_equal(result_1, expected_1) - self.assert_index_equal(result_2, expected_2) + tm.assert_index_equal(result_1, expected_1) + tm.assert_index_equal(result_2, expected_2) -class TestCustomDateRange(tm.TestCase): - def setUp(self): - self.rng = cdate_range(START, END) +class TestCustomDateRange(object): def test_constructor(self): - cdate_range(START, END, freq=CDay()) - cdate_range(START, periods=20, freq=CDay()) - cdate_range(end=START, periods=20, freq=CDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') - self.assertRaises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') + bdate_range(START, END, freq=CDay()) + bdate_range(START, periods=20, freq=CDay()) + bdate_range(end=START, periods=20, freq=CDay()) + + msg = 'periods must be a number, got C' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'C') + + with tm.assert_raises_regex(TypeError, msg): + bdate_range('2011-1-1', '2012-1-1', 'C') def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=CDay()) @@ -498,66 +611,93 @@ def test_cached_range(self): DatetimeIndex._cached_range(end=START, periods=20, offset=CDay()) - self.assertRaises(Exception, DatetimeIndex._cached_range, START, END) + # with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "offset"): + DatetimeIndex._cached_range(START, END) - self.assertRaises(Exception, DatetimeIndex._cached_range, START, - freq=CDay()) + # with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(START, offset=CDay()) - self.assertRaises(Exception, DatetimeIndex._cached_range, end=END, - freq=CDay()) + # with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(end=END, offset=CDay()) - self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, - freq=CDay()) + # with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "start or end"): + DatetimeIndex._cached_range(periods=20, offset=CDay()) def test_misc(self): end = datetime(2009, 5, 13) - dr = cdate_range(end=end, periods=20) + dr = bdate_range(end=end, periods=20, freq='C') firstDate = end - 19 * CDay() assert len(dr) == 20 assert dr[0] == firstDate assert dr[-1] == end - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - self.assertRaises(ValueError, Timestamp, badly_formed_date) - - self.assertRaises(ValueError, cdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, badly_formed_date, - badly_formed_date) - def test_daterange_bug_456(self): # GH #456 - rng1 = cdate_range('12/5/2011', '12/5/2011') - rng2 = cdate_range('12/2/2011', '12/5/2011') + rng1 = bdate_range('12/5/2011', '12/5/2011', freq='C') + rng2 = bdate_range('12/2/2011', '12/5/2011', freq='C') rng2.offset = CDay() result = rng1.union(rng2) - tm.assertIsInstance(result, DatetimeIndex) + assert isinstance(result, DatetimeIndex) def test_cdaterange(self): - rng = cdate_range('2013-05-01', periods=3) - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) - self.assert_index_equal(xp, rng) + result = bdate_range('2013-05-01', periods=3, freq='C') + expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) + tm.assert_index_equal(result, expected) def test_cdaterange_weekmask(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu') - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) - self.assert_index_equal(xp, rng) + result = bdate_range('2013-05-01', periods=3, freq='C', + weekmask='Sun Mon Tue Wed Thu') + expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ('a custom frequency string is required when holidays or ' + 'weekmask are passed, got frequency B') + with tm.assert_raises_regex(ValueError, msg): + bdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu') def test_cdaterange_holidays(self): - rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) - self.assert_index_equal(xp, rng) + result = bdate_range('2013-05-01', periods=3, freq='C', + holidays=['2013-05-01']) + expected = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ('a custom frequency string is required when holidays or ' + 'weekmask are passed, got frequency B') + with tm.assert_raises_regex(ValueError, msg): + bdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) def test_cdaterange_weekmask_and_holidays(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu', - holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) - self.assert_index_equal(xp, rng) + result = bdate_range('2013-05-01', periods=3, freq='C', + weekmask='Sun Mon Tue Wed Thu', + holidays=['2013-05-01']) + expected = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ('a custom frequency string is required when holidays or ' + 'weekmask are passed, got frequency B') + with tm.assert_raises_regex(ValueError, msg): + bdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu', + holidays=['2013-05-01']) + + @pytest.mark.parametrize('freq', [freq for freq in prefix_mapping + if freq.startswith('C')]) + def test_all_custom_freq(self, freq): + # should not raise + bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri', + holidays=['2009-03-14']) + + bad_freq = freq + 'FOO' + msg = 'invalid custom frequency string: {freq}' + with tm.assert_raises_regex(ValueError, msg.format(freq=bad_freq)): + bdate_range(START, END, freq=bad_freq) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 2c87c48bcda11..b685584a29fb9 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -1,123 +1,40 @@ + +import pytest + import numpy as np -from datetime import date, timedelta, time +from datetime import date +import dateutil import pandas as pd import pandas.util.testing as tm from pandas.compat import lrange -from pandas.compat.numpy import np_datetime64_compat -from pandas import (DatetimeIndex, Index, date_range, Series, DataFrame, - Timestamp, datetime, offsets, _np_version_under1p8) +from pandas import (DatetimeIndex, Index, date_range, DataFrame, + Timestamp, offsets) -from pandas.util.testing import assert_series_equal, assert_almost_equal +from pandas.util.testing import assert_almost_equal randn = np.random.randn -class TestDatetimeIndex(tm.TestCase): - - def test_get_loc(self): - idx = pd.date_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - if method is not None: - self.assertEqual(idx.get_loc(idx[1], method, - tolerance=pd.Timedelta('0 days')), - 1) - - self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) - - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') - - self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) - - self.assertEqual(idx.get_loc('1999', method='nearest'), 0) - self.assertEqual(idx.get_loc('2001', method='nearest'), 2) - - with tm.assertRaises(KeyError): - idx.get_loc('1999', method='pad') - with tm.assertRaises(KeyError): - idx.get_loc('2001', method='backfill') - - with tm.assertRaises(KeyError): - idx.get_loc('foobar') - with tm.assertRaises(TypeError): - idx.get_loc(slice(2)) - - idx = pd.to_datetime(['2000-01-01', '2000-01-04']) - self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) - - # time indexing - idx = pd.date_range('2000-01-01', periods=24, freq='H') - tm.assert_numpy_array_equal(idx.get_loc(time(12)), - np.array([12]), check_dtype=False) - tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), - np.array([]), check_dtype=False) - with tm.assertRaises(NotImplementedError): - idx.get_loc(time(12, 30), method='pad') - - def test_get_indexer(self): - idx = pd.date_range('2000-01-01', periods=3) - exp = np.array([0, 1, 2], dtype=np.intp) - tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) - - target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', - '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')), - np.array([0, -1, 1], dtype=np.intp)) - with tm.assertRaises(ValueError): - idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') - - def test_reasonable_keyerror(self): - # GH #1062 - index = DatetimeIndex(['1/3/2000']) - try: - index.get_loc('1/1/2000') - except KeyError as e: - self.assertIn('2000', str(e)) +class TestDatetimeIndex(object): def test_roundtrip_pickle_with_tz(self): # GH 8367 # round-trip of timezone index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') - unpickled = self.round_trip_pickle(index) - self.assert_index_equal(index, unpickled) + unpickled = tm.round_trip_pickle(index) + tm.assert_index_equal(index, unpickled) def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): # GH7774 index = date_range('20130101', periods=3, tz='US/Eastern') - self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') - self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') + assert str(index.reindex([])[0].tz) == 'US/Eastern' + assert str(index.reindex(np.array([]))[0].tz) == 'US/Eastern' def test_time_loc(self): # GH8667 from datetime import time - from pandas.index import _SIZE_CUTOFF + from pandas._libs.index import _SIZE_CUTOFF ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) key = time(15, 11, 30) @@ -148,45 +65,13 @@ def test_time_overflow_for_32bit_machines(self): periods = np.int_(1000) idx1 = pd.date_range(start='2000', periods=periods, freq='S') - self.assertEqual(len(idx1), periods) + assert len(idx1) == periods idx2 = pd.date_range(end='2000', periods=periods, freq='S') - self.assertEqual(len(idx2), periods) + assert len(idx2) == periods def test_nat(self): - self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) - - def test_ufunc_coercions(self): - idx = date_range('2011-01-01', periods=3, freq='2D', name='x') - - delta = np.timedelta64(1, 'D') - for result in [idx + delta, np.add(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = date_range('2011-01-02', periods=3, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - for result in [idx - delta, np.subtract(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = date_range('2010-12-31', periods=3, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), - np.timedelta64(3, 'D')]) - for result in [idx + delta, np.add(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], - freq='3D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '3D') - - for result in [idx - delta, np.subtract(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], - freq='D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') + assert DatetimeIndex([np.nan])[0] is pd.NaT def test_week_of_month_frequency(self): # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise @@ -210,8 +95,8 @@ def test_week_of_month_frequency(self): def test_hash_error(self): index = date_range('20010101', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): + with tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__): hash(index) def test_stringified_slice_with_tz(self): @@ -227,131 +112,11 @@ def test_append_join_nondatetimeindex(self): idx = Index(['a', 'b', 'c', 'd']) result = rng.append(idx) - tm.assertIsInstance(result[0], Timestamp) + assert isinstance(result[0], Timestamp) # it works rng.join(idx, how='outer') - def test_to_period_nofreq(self): - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.to_period) - - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], - freq='infer') - self.assertEqual(idx.freqstr, 'D') - expected = pd.PeriodIndex(['2000-01-01', '2000-01-02', - '2000-01-03'], freq='D') - tm.assert_index_equal(idx.to_period(), expected) - - # GH 7606 - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) - self.assertEqual(idx.freqstr, None) - tm.assert_index_equal(idx.to_period(), expected) - - def test_comparisons_coverage(self): - rng = date_range('1/1/2000', periods=10) - - # raise TypeError for now - self.assertRaises(TypeError, rng.__lt__, rng[3].value) - - result = rng == list(rng) - exp = rng == rng - self.assert_numpy_array_equal(result, exp) - - def test_comparisons_nat(self): - - fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) - fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) - - didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, - '2014-05-01', '2014-07-01']) - didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT, - '2014-06-01', '2014-07-01']) - darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'), - np_datetime64_compat('2014-03-01 00:00Z'), - np_datetime64_compat('nat'), np.datetime64('nat'), - np_datetime64_compat('2014-06-01 00:00Z'), - np_datetime64_compat('2014-07-01 00:00Z')]) - - if _np_version_under1p8: - # cannot test array because np.datetime('nat') returns today's date - cases = [(fidx1, fidx2), (didx1, didx2)] - else: - cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] - - # Check pd.NaT is handles as the same as np.nan - with tm.assert_produces_warning(None): - for idx1, idx2 in cases: - - result = idx1 < idx2 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 > idx1 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - self.assert_numpy_array_equal(result, expected) - - with tm.assert_produces_warning(None): - for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: - result = idx1 < val - expected = np.array([False, False, False, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 > val - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= val - self.assert_numpy_array_equal(result, expected) - result = idx1 >= val - self.assert_numpy_array_equal(result, expected) - - result = idx1 == val - self.assert_numpy_array_equal(result, expected) - - result = idx1 != val - expected = np.array([True, True, True, True, True, True]) - self.assert_numpy_array_equal(result, expected) - - # Check pd.NaT is handles as the same as np.nan - with tm.assert_produces_warning(None): - for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: - result = idx1 < val - expected = np.array([True, False, False, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 > val - expected = np.array([False, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= val - expected = np.array([True, False, True, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 >= val - expected = np.array([False, False, True, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == val - expected = np.array([False, False, True, False, False, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != val - expected = np.array([True, True, False, True, True, True]) - self.assert_numpy_array_equal(result, expected) - def test_map(self): rng = date_range('1/1/2000', periods=10) @@ -361,17 +126,13 @@ def test_map(self): tm.assert_index_equal(result, exp) def test_iteration_preserves_tz(self): - - tm._skip_if_no_dateutil() - - # GH 8890 - import dateutil + # see gh-8890 index = date_range("2012-01-01", periods=3, freq='H', tz='US/Eastern') for i, ts in enumerate(index): result = ts expected = index[i] - self.assertEqual(result, expected) + assert result == expected index = date_range("2012-01-01", periods=3, freq='H', tz=dateutil.tz.tzoffset(None, -28800)) @@ -379,8 +140,8 @@ def test_iteration_preserves_tz(self): for i, ts in enumerate(index): result = ts expected = index[i] - self.assertEqual(result._repr_base, expected._repr_base) - self.assertEqual(result, expected) + assert result._repr_base == expected._repr_base + assert result == expected # 9100 index = pd.DatetimeIndex(['2014-12-01 03:32:39.987000-08:00', @@ -388,19 +149,19 @@ def test_iteration_preserves_tz(self): for i, ts in enumerate(index): result = ts expected = index[i] - self.assertEqual(result._repr_base, expected._repr_base) - self.assertEqual(result, expected) + assert result._repr_base == expected._repr_base + assert result == expected def test_misc_coverage(self): rng = date_range('1/1/2000', periods=5) result = rng.groupby(rng.day) - tm.assertIsInstance(list(result.values())[0][0], Timestamp) + assert isinstance(list(result.values())[0][0], Timestamp) idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) - self.assertFalse(idx.equals(list(idx))) + assert not idx.equals(list(idx)) non_datetime = Index(list('abc')) - self.assertFalse(idx.equals(list(non_datetime))) + assert not idx.equals(list(non_datetime)) def test_string_index_series_name_converted(self): # #1644 @@ -408,29 +169,10 @@ def test_string_index_series_name_converted(self): index=date_range('1/1/2000', periods=10)) result = df.loc['1/3/2000'] - self.assertEqual(result.name, df.index[2]) + assert result.name == df.index[2] result = df.T['1/3/2000'] - self.assertEqual(result.name, df.index[2]) - - def test_overflow_offset(self): - # xref https://github.com/statsmodels/statsmodels/issues/3374 - # ends up multiplying really large numbers which overflow - - t = Timestamp('2017-01-13 00:00:00', freq='D') - offset = 20169940 * pd.offsets.Day(1) - - def f(): - t + offset - self.assertRaises(OverflowError, f) - - def f(): - offset + t - self.assertRaises(OverflowError, f) - - def f(): - t - offset - self.assertRaises(OverflowError, f) + assert result.name == df.index[2] def test_get_duplicates(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', @@ -442,110 +184,25 @@ def test_get_duplicates(self): def test_argmin_argmax(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02']) - self.assertEqual(idx.argmin(), 1) - self.assertEqual(idx.argmax(), 0) + assert idx.argmin() == 1 + assert idx.argmax() == 0 def test_sort_values(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02']) ordered = idx.sort_values() - self.assertTrue(ordered.is_monotonic) + assert ordered.is_monotonic ordered = idx.sort_values(ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) + assert ordered[::-1].is_monotonic ordered, dexer = idx.sort_values(return_indexer=True) - self.assertTrue(ordered.is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([1, 2, 0], dtype=np.intp)) + assert ordered.is_monotonic + tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([0, 2, 1], dtype=np.intp)) - - def test_take(self): - dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15), - datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)] - - for tz in [None, 'US/Eastern', 'Asia/Tokyo']: - idx = DatetimeIndex(start='2010-01-01 09:00', - end='2010-02-01 09:00', freq='H', tz=tz, - name='idx') - expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz) - - taken1 = idx.take([5, 6, 8, 12]) - taken2 = idx[[5, 6, 8, 12]] - - for taken in [taken1, taken2]: - tm.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, DatetimeIndex) - self.assertIsNone(taken.freq) - self.assertEqual(taken.tz, expected.tz) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx') - result = idx.take(np.array([1, 0, -1])) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_take_fill_value_with_timezone(self): - idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - result = idx.take(np.array([1, 0, -1])) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) + assert ordered[::-1].is_monotonic + tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) def test_map_bug_1677(self): index = DatetimeIndex(['2012-04-25 09:30:00.393000']) @@ -561,7 +218,7 @@ def test_groupby_function_tuple_1677(self): monthly_group = df.groupby(lambda x: (x.year, x.month)) result = monthly_group.mean() - tm.assertIsInstance(result.index[0], tuple) + assert isinstance(result.index[0], tuple) def test_append_numpy_bug_1681(self): # another datetime64 bug @@ -570,54 +227,34 @@ def test_append_numpy_bug_1681(self): c = DataFrame({'A': 'foo', 'B': dr}, index=dr) result = a.append(c) - self.assertTrue((result['B'] == dr).all()) + assert (result['B'] == dr).all() def test_isin(self): index = tm.makeDateIndex(4) result = index.isin(index) - self.assertTrue(result.all()) + assert result.all() result = index.isin(list(index)) - self.assertTrue(result.all()) + assert result.all() assert_almost_equal(index.isin([index[2], 5]), np.array([False, False, True, False])) - def test_time(self): - rng = pd.date_range('1/1/2000', freq='12min', periods=10) - result = pd.Index(rng).time - expected = [t.time() for t in rng] - self.assertTrue((result == expected).all()) - - def test_date(self): - rng = pd.date_range('1/1/2000', freq='12H', periods=10) - result = pd.Index(rng).date - expected = [t.date() for t in rng] - self.assertTrue((result == expected).all()) - def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe(10, 10, data_gen_f=lambda *args, **kwargs: randn(), r_idx_type='i', c_idx_type='dt') cols = df.columns.join(df.index, how='outer') joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) + assert cols.dtype == np.dtype('O') + assert cols.dtype == joined.dtype tm.assert_numpy_array_equal(cols.values, joined.values) - def test_slice_keeps_name(self): - # GH4226 - st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles') - et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles') - dr = pd.date_range(st, et, freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) - - def test_join_self(self): + @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) + def test_join_self(self, how): index = date_range('1/1/2000', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - self.assertIs(index, joined) + joined = index.join(index, how=how) + assert index is joined def assert_index_parameters(self, index): assert index.freq == '40960N' @@ -637,17 +274,17 @@ def test_ns_index(self): freq=index.freq) self.assert_index_parameters(new_index) - def test_join_with_period_index(self): + @pytest.mark.parametrize('how', ['left', 'right', 'inner', 'outer']) + def test_join_with_period_index(self, how): df = tm.makeCustomDataframe( 10, 10, data_gen_f=lambda *args: np.random.randint(2), c_idx_type='p', r_idx_type='dt') s = df.iloc[:5, 0] - joins = 'left', 'right', 'inner', 'outer' - for join in joins: - with tm.assertRaisesRegexp(ValueError, 'can only call with other ' - 'PeriodIndex-ed objects'): - df.columns.join(s.index, how=join) + with tm.assert_raises_regex(ValueError, + 'can only call with other ' + 'PeriodIndex-ed objects'): + df.columns.join(s.index, how=how) def test_factorize(self): idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02', @@ -657,11 +294,11 @@ def test_factorize(self): exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) # tz must be preserved @@ -669,7 +306,7 @@ def test_factorize(self): exp_idx = exp_idx.tz_localize('Asia/Tokyo') arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) idx2 = pd.DatetimeIndex(['2014-03', '2014-03', '2014-02', '2014-01', @@ -678,34 +315,34 @@ def test_factorize(self): exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) arr, idx = idx2.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) exp_idx = DatetimeIndex(['2014-03', '2014-02', '2014-01']) arr, idx = idx2.factorize() - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) # freq must be preserved idx3 = date_range('2000-01', periods=4, freq='M', tz='Asia/Tokyo') exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) arr, idx = idx3.factorize() - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) - def test_factorize_tz(self): - # GH 13750 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) - idx = base.repeat(5) + @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']) + def test_factorize_tz(self, tz): + # GH#13750 + base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) + idx = base.repeat(5) - exp_arr = np.arange(100, dtype=np.intp).repeat(5) + exp_arr = np.arange(100, dtype=np.intp).repeat(5) - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(res, base) + for obj in [idx, pd.Series(idx)]: + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(res, base) def test_factorize_dst(self): # GH 13750 @@ -714,7 +351,7 @@ def test_factorize_dst(self): for obj in [idx, pd.Series(idx)]: arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) tm.assert_index_equal(res, idx) idx = pd.date_range('2016-06-13', freq='H', periods=12, @@ -722,54 +359,14 @@ def test_factorize_dst(self): for obj in [idx, pd.Series(idx)]: arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) tm.assert_index_equal(res, idx) - def test_slice_with_negative_step(self): - ts = Series(np.arange(20), - date_range('2014-01-01', periods=20, freq='MS')) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - assert_series_equal(ts[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - - assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1]) - assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1]) - - assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1]) - assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1]) - - assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp( - '2014-10-01'):-1], SLC[13:8:-1]) - assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1], - SLC[13:8:-1]) - - assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0]) - - def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), - date_range('2014-01-01', periods=20, freq='MS')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - - def test_slice_bounds_empty(self): - # GH 14354 - empty_idx = DatetimeIndex(freq='1H', periods=0, end='2015') - - right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc') - exp = Timestamp('2015-01-02 23:59:59.999999999') - self.assertEqual(right, exp) - - left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') - exp = Timestamp('2015-01-02 00:00:00') - self.assertEqual(left, exp) + @pytest.mark.parametrize('arr, expected', [ + (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])), + (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'), + pd.DatetimeIndex(['2017'], tz='US/Eastern')), + ]) + def test_unique(self, arr, expected): + result = arr.unique() + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 2b254bc8be931..c6b3a77773dc7 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -1,76 +1,31 @@ """ generic tests from the Datetimelike class """ -import numpy as np -import pandas as pd from pandas.util import testing as tm -from pandas import Series, Index, DatetimeIndex, date_range +from pandas import DatetimeIndex, date_range from ..datetimelike import DatetimeLike -class TestDatetimeIndex(DatetimeLike, tm.TestCase): +class TestDatetimeIndex(DatetimeLike): _holder = DatetimeIndex - def setUp(self): - self.indices = dict(index=tm.makeDateIndex(10)) + def setup_method(self, method): + self.indices = dict(index=tm.makeDateIndex(10), + index_dec=date_range('20130110', periods=10, + freq='-1D')) self.setup_indices() def create_index(self): return date_range('20130101', periods=5) def test_shift(self): - - # test shift for datetimeIndex and non datetimeIndex - # GH8083 - - drange = self.create_index() - result = drange.shift(1) - expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', - '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(-1) - expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', - '2013-01-03', '2013-01-04'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D') - expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', - '2013-01-10', - '2013-01-11'], freq='D') - self.assert_index_equal(result, expected) + pass # handled in test_ops def test_pickle_compat_construction(self): pass def test_intersection(self): - first = self.index - second = self.index[5:] - intersect = first.intersection(second) - self.assertTrue(tm.equalContents(intersect, second)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) - - third = Index(['a', 'b', 'c']) - result = first.intersection(third) - expected = pd.Index([], dtype=object) - self.assert_index_equal(result, expected) + pass # handled in test_setops def test_union(self): - first = self.index[:5] - second = self.index[5:] - everything = self.index - union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) + pass # handled in test_setops diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py new file mode 100644 index 0000000000000..0d1a9e65ce6c6 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_formats.py @@ -0,0 +1,220 @@ +from datetime import datetime +from pandas import DatetimeIndex, Series + +import numpy as np +import dateutil.tz +import pytz +import pytest + +import pandas.util.testing as tm +import pandas as pd + + +def test_to_native_types(): + index = DatetimeIndex(freq='1D', periods=3, start='2017-01-01') + + # First, with no arguments. + expected = np.array(['2017-01-01', '2017-01-02', + '2017-01-03'], dtype=object) + + result = index.to_native_types() + tm.assert_numpy_array_equal(result, expected) + + # No NaN values, so na_rep has no effect + result = index.to_native_types(na_rep='pandas') + tm.assert_numpy_array_equal(result, expected) + + # Make sure slicing works + expected = np.array(['2017-01-01', '2017-01-03'], dtype=object) + + result = index.to_native_types([0, 2]) + tm.assert_numpy_array_equal(result, expected) + + # Make sure date formatting works + expected = np.array(['01-2017-01', '01-2017-02', + '01-2017-03'], dtype=object) + + result = index.to_native_types(date_format='%m-%Y-%d') + tm.assert_numpy_array_equal(result, expected) + + # NULL object handling should work + index = DatetimeIndex(['2017-01-01', pd.NaT, '2017-01-03']) + expected = np.array(['2017-01-01', 'NaT', '2017-01-03'], dtype=object) + + result = index.to_native_types() + tm.assert_numpy_array_equal(result, expected) + + expected = np.array(['2017-01-01', 'pandas', + '2017-01-03'], dtype=object) + + result = index.to_native_types(na_rep='pandas') + tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeIndexRendering(object): + def test_dti_repr_short(self): + dr = pd.date_range(start='1/1/2012', periods=1) + repr(dr) + + dr = pd.date_range(start='1/1/2012', periods=2) + repr(dr) + + dr = pd.date_range(start='1/1/2012', periods=3) + repr(dr) + + @pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__']) + def test_dti_representation(self, method): + idxs = [] + idxs.append(DatetimeIndex([], freq='D')) + idxs.append(DatetimeIndex(['2011-01-01'], freq='D')) + idxs.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) + idxs.append(DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + freq='D')) + idxs.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' + ], freq='H', tz='Asia/Tokyo')) + idxs.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) + idxs.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) + + exp = [] + exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") + exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " + "freq='D')") + exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " + "dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " + "dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " + "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" + ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") + exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " + "'2011-01-01 10:00:00-05:00', 'NaT'], " + "dtype='datetime64[ns, US/Eastern]', freq=None)") + exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " + "'2011-01-01 10:00:00+00:00', 'NaT'], " + "dtype='datetime64[ns, UTC]', freq=None)""") + + with pd.option_context('display.width', 300): + for indx, expected in zip(idxs, exp): + result = getattr(indx, method)() + assert result == expected + + def test_dti_representation_to_series(self): + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) + + exp1 = """Series([], dtype: datetime64[ns])""" + + exp2 = ("0 2011-01-01\n" + "dtype: datetime64[ns]") + + exp3 = ("0 2011-01-01\n" + "1 2011-01-02\n" + "dtype: datetime64[ns]") + + exp4 = ("0 2011-01-01\n" + "1 2011-01-02\n" + "2 2011-01-03\n" + "dtype: datetime64[ns]") + + exp5 = ("0 2011-01-01 09:00:00+09:00\n" + "1 2011-01-01 10:00:00+09:00\n" + "2 2011-01-01 11:00:00+09:00\n" + "dtype: datetime64[ns, Asia/Tokyo]") + + exp6 = ("0 2011-01-01 09:00:00-05:00\n" + "1 2011-01-01 10:00:00-05:00\n" + "2 NaT\n" + "dtype: datetime64[ns, US/Eastern]") + + exp7 = ("0 2011-01-01 09:00:00\n" + "1 2011-01-02 10:15:00\n" + "dtype: datetime64[ns]") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, + idx5, idx6, idx7], + [exp1, exp2, exp3, exp4, + exp5, exp6, exp7]): + result = repr(Series(idx)) + assert result == expected + + def test_dti_summary(self): + # GH#9116 + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], + freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + + exp1 = ("DatetimeIndex: 0 entries\n" + "Freq: D") + + exp2 = ("DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\n" + "Freq: D") + + exp3 = ("DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\n" + "Freq: D") + + exp4 = ("DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\n" + "Freq: D") + + exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " + "to 2011-01-01 11:00:00+09:00\n" + "Freq: H") + + exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], + [exp1, exp2, exp3, exp4, exp5, exp6]): + result = idx.summary() + assert result == expected + + def test_dti_business_repr(self): + # only really care that it works + repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1))) + + def test_dti_business_summary(self): + rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1)) + rng.summary() + rng[2:2].summary() + + def test_dti_business_summary_pytz(self): + pd.bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_dti_business_summary_dateutil(self): + pd.bdate_range('1/1/2005', '1/1/2009', + tz=dateutil.tz.tzutc()).summary() + + def test_dti_custom_business_repr(self): + # only really care that it works + repr(pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), + freq='C')) + + def test_dti_custom_business_summary(self): + rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), + freq='C') + rng.summary() + rng[2:2].summary() + + def test_dti_custom_business_summary_pytz(self): + pd.bdate_range('1/1/2005', '1/1/2009', freq='C', tz=pytz.utc).summary() + + def test_dti_custom_business_summary_dateutil(self): + pd.bdate_range('1/1/2005', '1/1/2009', freq='C', + tz=dateutil.tz.tzutc()).summary() diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 23271a8d45499..af65a8618d30f 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -1,45 +1,289 @@ -import numpy as np +from datetime import datetime, timedelta, time +import pytest +import pytz +import numpy as np import pandas as pd import pandas.util.testing as tm import pandas.compat as compat -from pandas import notnull, Index, DatetimeIndex, datetime, date_range +from pandas import notna, Index, DatetimeIndex, date_range, Timestamp +from pandas.tseries.offsets import CDay, BDay +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) -class TestDatetimeIndex(tm.TestCase): - def test_where_other(self): +class TestGetItem(object): + def test_getitem(self): + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', + tz='Asia/Tokyo', name='idx') + + for idx in [idx1, idx2]: + result = idx[0] + assert result == Timestamp('2011-01-01', tz=idx.tz) + + result = idx[0:5] + expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[0:10:2] + expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[-20:-5:3] + expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[4::-1] + expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', + '2011-01-02', '2011-01-01'], + freq='-1D', tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + def test_dti_business_getitem(self): + rng = pd.bdate_range(START, END) + smaller = rng[:5] + exp = DatetimeIndex(rng.view(np.ndarray)[:5]) + tm.assert_index_equal(smaller, exp) + + assert smaller.offset == rng.offset + sliced = rng[::5] + assert sliced.offset == BDay() * 5 + + fancy_indexed = rng[[4, 3, 2, 1, 0]] + assert len(fancy_indexed) == 5 + assert isinstance(fancy_indexed, DatetimeIndex) + assert fancy_indexed.freq is None + + # 32-bit vs. 64-bit platforms + assert rng[4] == rng[np.int_(4)] + + def test_dti_business_getitem_matplotlib_hackaround(self): + rng = pd.bdate_range(START, END) + values = rng[:, None] + expected = rng.values[:, None] + tm.assert_numpy_array_equal(values, expected) + + def test_dti_custom_getitem(self): + rng = pd.bdate_range(START, END, freq='C') + smaller = rng[:5] + exp = DatetimeIndex(rng.view(np.ndarray)[:5]) + tm.assert_index_equal(smaller, exp) + assert smaller.offset == rng.offset + + sliced = rng[::5] + assert sliced.offset == CDay() * 5 + + fancy_indexed = rng[[4, 3, 2, 1, 0]] + assert len(fancy_indexed) == 5 + assert isinstance(fancy_indexed, DatetimeIndex) + assert fancy_indexed.freq is None + + # 32-bit vs. 64-bit platforms + assert rng[4] == rng[np.int_(4)] + + def test_dti_custom_getitem_matplotlib_hackaround(self): + rng = pd.bdate_range(START, END, freq='C') + values = rng[:, None] + expected = rng.values[:, None] + tm.assert_numpy_array_equal(values, expected) + + +class TestWhere(object): + def test_where_other(self): # other is ndarray or Index i = pd.date_range('20130101', periods=3, tz='US/Eastern') for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) + result = i.where(notna(i), other=np.nan) expected = i tm.assert_index_equal(result, expected) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2) + result = i.where(notna(i2), i2) tm.assert_index_equal(result, i2) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2.values) + result = i.where(notna(i2), i2.values) tm.assert_index_equal(result, i2) def test_where_tz(self): i = pd.date_range('20130101', periods=3, tz='US/Eastern') - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2)) + result = i.where(notna(i2)) expected = i2 tm.assert_index_equal(result, expected) + +class TestTake(object): + def test_take(self): + # GH#10295 + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', + tz='Asia/Tokyo', name='idx') + + for idx in [idx1, idx2]: + result = idx.take([0]) + assert result == Timestamp('2011-01-01', tz=idx.tz) + + result = idx.take([0, 1, 2]) + expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([7, 4, 1]) + expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', + tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([3, 2, 5]) + expected = DatetimeIndex(['2011-01-04', '2011-01-03', + '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq is None + + result = idx.take([-3, 2, 5]) + expected = DatetimeIndex(['2011-01-29', '2011-01-03', + '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_take_invalid_kwargs(self): + idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + # TODO: This method came from test_datetime; de-dup with version above + @pytest.mark.parametrize('tz', [None, 'US/Eastern', 'Asia/Tokyo']) + def test_take2(self, tz): + dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15), + datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)] + + idx = DatetimeIndex(start='2010-01-01 09:00', + end='2010-02-01 09:00', freq='H', tz=tz, + name='idx') + expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz) + + taken1 = idx.take([5, 6, 8, 12]) + taken2 = idx[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, DatetimeIndex) + assert taken.freq is None + assert taken.tz == expected.tz + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH#12631 + idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx') + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_fill_value_with_timezone(self): + idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +class TestDatetimeIndex(object): + @pytest.mark.parametrize('null', [None, np.nan, pd.NaT]) + @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern']) + def test_insert_nat(self, tz, null): + # GH#16537, GH#18295 (test missing) + idx = pd.DatetimeIndex(['2017-01-01'], tz=tz) + expected = pd.DatetimeIndex(['NaT', '2017-01-01'], tz=tz) + res = idx.insert(0, null) + tm.assert_index_equal(res, expected) + def test_insert(self): idx = DatetimeIndex( ['2000-01-04', '2000-01-01', '2000-01-02'], name='idx') @@ -54,9 +298,9 @@ def test_insert(self): expected = Index([datetime(2000, 1, 4), 'inserted', datetime(2000, 1, 1), datetime(2000, 1, 2)], name='idx') - self.assertNotIsInstance(result, DatetimeIndex) + assert not isinstance(result, DatetimeIndex) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name idx = date_range('1/1/2000', periods=3, freq='M', name='idx') @@ -85,33 +329,29 @@ def test_insert(self): for n, d, expected in cases: result = idx.insert(n, d) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq # reset freq to None result = idx.insert(3, datetime(2000, 1, 2)) expected = DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-01-02'], name='idx', freq=None) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertTrue(result.freq is None) - - # GH 7299 - tm._skip_if_no_pytz() - import pytz + assert result.name == expected.name + assert result.freq is None + # see gh-7299 idx = date_range('1/1/2000', periods=3, freq='D', tz='Asia/Tokyo', name='idx') - with tm.assertRaises(ValueError): - result = idx.insert(3, pd.Timestamp('2000-01-04')) - with tm.assertRaises(ValueError): - result = idx.insert(3, datetime(2000, 1, 4)) - with tm.assertRaises(ValueError): - result = idx.insert(3, pd.Timestamp('2000-01-04', tz='US/Eastern')) - with tm.assertRaises(ValueError): - result = idx.insert(3, - datetime(2000, 1, 4, - tzinfo=pytz.timezone('US/Eastern'))) + with pytest.raises(ValueError): + idx.insert(3, pd.Timestamp('2000-01-04')) + with pytest.raises(ValueError): + idx.insert(3, datetime(2000, 1, 4)) + with pytest.raises(ValueError): + idx.insert(3, pd.Timestamp('2000-01-04', tz='US/Eastern')) + with pytest.raises(ValueError): + idx.insert(3, datetime(2000, 1, 4, + tzinfo=pytz.timezone('US/Eastern'))) for tz in ['US/Pacific', 'Asia/Singapore']: idx = date_range('1/1/2000 09:00', periods=6, freq='H', tz=tz, @@ -124,9 +364,9 @@ def test_insert(self): result = idx.insert(6, d) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz expected = DatetimeIndex(['2000-01-01 09:00', '2000-01-01 10:00', '2000-01-01 11:00', @@ -139,9 +379,9 @@ def test_insert(self): pytz.timezone(tz).localize(datetime(2000, 1, 1, 10))]: result = idx.insert(6, d) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertTrue(result.freq is None) - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.tz == expected.tz + assert result.freq is None def test_delete(self): idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx') @@ -164,10 +404,10 @@ def test_delete(self): for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depeidnig on numpy version result = idx.delete(5) @@ -179,17 +419,17 @@ def test_delete(self): freq='H', name='idx', tz=tz) result = idx.delete(0) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freqstr, 'H') - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freqstr == 'H' + assert result.tz == expected.tz expected = date_range(start='2000-01-01 09:00', periods=9, freq='H', name='idx', tz=tz) result = idx.delete(-1) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freqstr, 'H') - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freqstr == 'H' + assert result.tz == expected.tz def test_delete_slice(self): idx = date_range(start='2000-01-01', periods=10, freq='D', name='idx') @@ -211,13 +451,13 @@ def test_delete_slice(self): for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq result = idx.delete(slice(n[0], n[-1] + 1)) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq for tz in [None, 'Asia/Tokyo', 'US/Pacific']: ts = pd.Series(1, index=pd.date_range( @@ -227,9 +467,9 @@ def test_delete_slice(self): expected = pd.date_range('2000-01-01 14:00', periods=5, freq='H', name='idx', tz=tz) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz # reset freq to None result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index @@ -238,6 +478,112 @@ def test_delete_slice(self): '2000-01-01 15:00', '2000-01-01 17:00'], freq=None, name='idx', tz=tz) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.tz, expected.tz) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + def test_get_loc(self): + idx = pd.date_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + if method is not None: + assert idx.get_loc(idx[1], method, + tolerance=pd.Timedelta('0 days')) == 1 + + assert idx.get_loc('2000-01-01', method='nearest') == 0 + assert idx.get_loc('2000-01-01T12', method='nearest') == 1 + + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance='1 day') == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=pd.Timedelta('1D')) == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=np.timedelta64(1, 'D')) == 1 + assert idx.get_loc('2000-01-01T12', method='nearest', + tolerance=timedelta(1)) == 1 + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): + idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') + with pytest.raises(KeyError): + idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') + with pytest.raises( + ValueError, + match='tolerance size must match target index size'): + idx.get_loc('2000-01-01', method='nearest', + tolerance=[pd.Timedelta('1day').to_timedelta64(), + pd.Timedelta('1day').to_timedelta64()]) + + assert idx.get_loc('2000', method='nearest') == slice(0, 3) + assert idx.get_loc('2000-01', method='nearest') == slice(0, 3) + + assert idx.get_loc('1999', method='nearest') == 0 + assert idx.get_loc('2001', method='nearest') == 2 + + with pytest.raises(KeyError): + idx.get_loc('1999', method='pad') + with pytest.raises(KeyError): + idx.get_loc('2001', method='backfill') + + with pytest.raises(KeyError): + idx.get_loc('foobar') + with pytest.raises(TypeError): + idx.get_loc(slice(2)) + + idx = pd.to_datetime(['2000-01-01', '2000-01-04']) + assert idx.get_loc('2000-01-02', method='nearest') == 0 + assert idx.get_loc('2000-01-03', method='nearest') == 1 + assert idx.get_loc('2000-01', method='nearest') == slice(0, 2) + + # time indexing + idx = pd.date_range('2000-01-01', periods=24, freq='H') + tm.assert_numpy_array_equal(idx.get_loc(time(12)), + np.array([12]), check_dtype=False) + tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), + np.array([]), check_dtype=False) + with pytest.raises(NotImplementedError): + idx.get_loc(time(12, 30), method='pad') + + def test_get_indexer(self): + idx = pd.date_range('2000-01-01', periods=3) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) + + target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', + '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')), + np.array([0, -1, 1], dtype=np.intp)) + tol_raw = [pd.Timedelta('1 hour'), + pd.Timedelta('1 hour'), + pd.Timedelta('1 hour').to_timedelta64(), ] + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=[np.timedelta64(x) for x in tol_raw]), + np.array([0, -1, 1], dtype=np.intp)) + tol_bad = [pd.Timedelta('2 hour').to_timedelta64(), + pd.Timedelta('1 hour').to_timedelta64(), + 'foo', ] + with pytest.raises( + ValueError, match='abbreviation w/o a number'): + idx.get_indexer(target, 'nearest', tolerance=tol_bad) + with pytest.raises(ValueError): + idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') + + def test_reasonable_keyerror(self): + # GH#1062 + index = DatetimeIndex(['1/3/2000']) + try: + index.get_loc('1/1/2000') + except KeyError as e: + assert '2000' in str(e) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 6b0191edbda5a..056924f2c6663 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -1,58 +1,16 @@ -import numpy as np +import locale +import calendar + +import pytest +import numpy as np import pandas as pd import pandas.util.testing as tm from pandas import (Index, DatetimeIndex, datetime, offsets, - Float64Index, date_range, Timestamp) - - -class TestDateTimeIndexToJulianDate(tm.TestCase): - - def test_1700(self): - r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, - 2345901.5]) - r2 = date_range(start=Timestamp('1710-10-01'), periods=5, - freq='D').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_2000(self): - r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, - 2451605.5]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='D').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_hour(self): - r1 = Float64Index( - [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, - 2451601.625, 2451601.6666666666666666]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='H').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_minute(self): - r1 = Float64Index( - [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, - 2451601.5020833333333333, 2451601.5027777777777777]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='T').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_second(self): - r1 = Float64Index( - [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, - 2451601.5000347222222222, 2451601.5000462962962962]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='S').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - -class TestTimeSeries(tm.TestCase): + date_range, Timestamp) + + +class TestTimeSeries(object): def test_pass_datetimeindex_to_index(self): # Bugs in #1396 @@ -61,7 +19,7 @@ def test_pass_datetimeindex_to_index(self): expected = Index(rng.to_pydatetime(), dtype=object) - self.assert_numpy_array_equal(idx.values, expected.values) + tm.assert_numpy_array_equal(idx.values, expected.values) def test_range_edges(self): # GH 13672 @@ -128,144 +86,115 @@ def test_range_edges(self): '1970-01-03', '1970-01-04']) tm.assert_index_equal(idx, exp) - def test_datetimeindex_integers_shift(self): - rng = date_range('1/1/2000', periods=20) - - result = rng + 5 - expected = rng.shift(5) - tm.assert_index_equal(result, expected) - - result = rng - 5 - expected = rng.shift(-5) - tm.assert_index_equal(result, expected) - - def test_datetimeindex_repr_short(self): - dr = date_range(start='1/1/2012', periods=1) - repr(dr) - - dr = date_range(start='1/1/2012', periods=2) - repr(dr) - - dr = date_range(start='1/1/2012', periods=3) - repr(dr) - def test_normalize(self): - rng = date_range('1/1/2000 9:30', periods=10, freq='D') - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D') - tm.assert_index_equal(result, expected) - - rng_ns = pd.DatetimeIndex(np.array([1380585623454345752, - 1380585612343234312]).astype( - "datetime64[ns]")) - rng_ns_normalized = rng_ns.normalize() - expected = pd.DatetimeIndex(np.array([1380585600000000000, - 1380585600000000000]).astype( - "datetime64[ns]")) - tm.assert_index_equal(rng_ns_normalized, expected) - - self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) - - -class TestDatetime64(tm.TestCase): +class TestDatetime64(object): def test_datetimeindex_accessors(self): - dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) - - self.assertEqual(dti.year[0], 1998) - self.assertEqual(dti.month[0], 1) - self.assertEqual(dti.day[0], 1) - self.assertEqual(dti.hour[0], 0) - self.assertEqual(dti.minute[0], 0) - self.assertEqual(dti.second[0], 0) - self.assertEqual(dti.microsecond[0], 0) - self.assertEqual(dti.dayofweek[0], 3) - - self.assertEqual(dti.dayofyear[0], 1) - self.assertEqual(dti.dayofyear[120], 121) - - self.assertEqual(dti.weekofyear[0], 1) - self.assertEqual(dti.weekofyear[120], 18) - - self.assertEqual(dti.quarter[0], 1) - self.assertEqual(dti.quarter[120], 2) - - self.assertEqual(dti.days_in_month[0], 31) - self.assertEqual(dti.days_in_month[90], 30) - - self.assertEqual(dti.is_month_start[0], True) - self.assertEqual(dti.is_month_start[1], False) - self.assertEqual(dti.is_month_start[31], True) - self.assertEqual(dti.is_quarter_start[0], True) - self.assertEqual(dti.is_quarter_start[90], True) - self.assertEqual(dti.is_year_start[0], True) - self.assertEqual(dti.is_year_start[364], False) - self.assertEqual(dti.is_month_end[0], False) - self.assertEqual(dti.is_month_end[30], True) - self.assertEqual(dti.is_month_end[31], False) - self.assertEqual(dti.is_month_end[364], True) - self.assertEqual(dti.is_quarter_end[0], False) - self.assertEqual(dti.is_quarter_end[30], False) - self.assertEqual(dti.is_quarter_end[89], True) - self.assertEqual(dti.is_quarter_end[364], True) - self.assertEqual(dti.is_year_end[0], False) - self.assertEqual(dti.is_year_end[364], True) - - # GH 11128 - self.assertEqual(dti.weekday_name[4], u'Monday') - self.assertEqual(dti.weekday_name[5], u'Tuesday') - self.assertEqual(dti.weekday_name[6], u'Wednesday') - self.assertEqual(dti.weekday_name[7], u'Thursday') - self.assertEqual(dti.weekday_name[8], u'Friday') - self.assertEqual(dti.weekday_name[9], u'Saturday') - self.assertEqual(dti.weekday_name[10], u'Sunday') - - self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') - self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') - self.assertEqual(Timestamp('2016-04-06').weekday_name, u'Wednesday') - self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') - self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') - self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') - self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') - - self.assertEqual(len(dti.year), 365) - self.assertEqual(len(dti.month), 365) - self.assertEqual(len(dti.day), 365) - self.assertEqual(len(dti.hour), 365) - self.assertEqual(len(dti.minute), 365) - self.assertEqual(len(dti.second), 365) - self.assertEqual(len(dti.microsecond), 365) - self.assertEqual(len(dti.dayofweek), 365) - self.assertEqual(len(dti.dayofyear), 365) - self.assertEqual(len(dti.weekofyear), 365) - self.assertEqual(len(dti.quarter), 365) - self.assertEqual(len(dti.is_month_start), 365) - self.assertEqual(len(dti.is_month_end), 365) - self.assertEqual(len(dti.is_quarter_start), 365) - self.assertEqual(len(dti.is_quarter_end), 365) - self.assertEqual(len(dti.is_year_start), 365) - self.assertEqual(len(dti.is_year_end), 365) - self.assertEqual(len(dti.weekday_name), 365) + dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), + periods=365) + # GH 13303 + dti_tz = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), + periods=365, tz='US/Eastern') + for dti in [dti_naive, dti_tz]: + + assert dti.year[0] == 1998 + assert dti.month[0] == 1 + assert dti.day[0] == 1 + assert dti.hour[0] == 0 + assert dti.minute[0] == 0 + assert dti.second[0] == 0 + assert dti.microsecond[0] == 0 + assert dti.dayofweek[0] == 3 + + assert dti.dayofyear[0] == 1 + assert dti.dayofyear[120] == 121 + + assert dti.weekofyear[0] == 1 + assert dti.weekofyear[120] == 18 + + assert dti.quarter[0] == 1 + assert dti.quarter[120] == 2 + + assert dti.days_in_month[0] == 31 + assert dti.days_in_month[90] == 30 + + assert dti.is_month_start[0] + assert not dti.is_month_start[1] + assert dti.is_month_start[31] + assert dti.is_quarter_start[0] + assert dti.is_quarter_start[90] + assert dti.is_year_start[0] + assert not dti.is_year_start[364] + assert not dti.is_month_end[0] + assert dti.is_month_end[30] + assert not dti.is_month_end[31] + assert dti.is_month_end[364] + assert not dti.is_quarter_end[0] + assert not dti.is_quarter_end[30] + assert dti.is_quarter_end[89] + assert dti.is_quarter_end[364] + assert not dti.is_year_end[0] + assert dti.is_year_end[364] + + assert len(dti.year) == 365 + assert len(dti.month) == 365 + assert len(dti.day) == 365 + assert len(dti.hour) == 365 + assert len(dti.minute) == 365 + assert len(dti.second) == 365 + assert len(dti.microsecond) == 365 + assert len(dti.dayofweek) == 365 + assert len(dti.dayofyear) == 365 + assert len(dti.weekofyear) == 365 + assert len(dti.quarter) == 365 + assert len(dti.is_month_start) == 365 + assert len(dti.is_month_end) == 365 + assert len(dti.is_quarter_start) == 365 + assert len(dti.is_quarter_end) == 365 + assert len(dti.is_year_start) == 365 + assert len(dti.is_year_end) == 365 + assert len(dti.weekday_name) == 365 + + dti.name = 'name' + + # non boolean accessors -> return Index + for accessor in DatetimeIndex._field_ops: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == 'name' + + # boolean accessors -> return array + for accessor in DatetimeIndex._bool_ops: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name') + tm.assert_index_equal(res, exp) dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) - self.assertEqual(sum(dti.is_quarter_start), 0) - self.assertEqual(sum(dti.is_quarter_end), 4) - self.assertEqual(sum(dti.is_year_start), 0) - self.assertEqual(sum(dti.is_year_end), 1) + assert sum(dti.is_quarter_start) == 0 + assert sum(dti.is_quarter_end) == 4 + assert sum(dti.is_year_start) == 0 + assert sum(dti.is_year_end) == 1 # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, # CBD requires np >= 1.7 bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) - self.assertRaises(ValueError, lambda: dti.is_month_start) + pytest.raises(ValueError, lambda: dti.is_month_start) dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) - self.assertEqual(dti.is_month_start[0], 1) + assert dti.is_month_start[0] == 1 tests = [ (Timestamp('2013-06-01', freq='M').is_month_start, 1), @@ -302,10 +231,68 @@ def test_datetimeindex_accessors(self): (Timestamp('2013-02-01').days_in_month, 28)] for ts, value in tests: - self.assertEqual(ts, value) + assert ts == value + + # GH 6538: Check that DatetimeIndex and its TimeStamp elements + # return the same weekofyear accessor close to new year w/ tz + dates = ["2013/12/29", "2013/12/30", "2013/12/31"] + dates = DatetimeIndex(dates, tz="Europe/Brussels") + expected = [52, 1, 1] + assert dates.weekofyear.tolist() == expected + assert [d.weekofyear for d in dates] == expected + + # GH 12806 + @pytest.mark.parametrize('time_locale', [ + None] if tm.get_locales() is None else [None] + tm.get_locales()) + def test_datetime_name_accessors(self, time_locale): + # Test Monday -> Sunday and January -> December, in that sequence + if time_locale is None: + # If the time_locale is None, day-name and month_name should + # return the english attributes + expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', + 'Friday', 'Saturday', 'Sunday'] + expected_months = ['January', 'February', 'March', 'April', 'May', + 'June', 'July', 'August', 'September', + 'October', 'November', 'December'] + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_days = calendar.day_name[:] + expected_months = calendar.month_name[1:] + + # GH 11128 + dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), + periods=365) + english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', + 'Friday', 'Saturday', 'Sunday'] + for day, name, eng_name in zip(range(4, 11), + expected_days, + english_days): + name = name.capitalize() + assert dti.weekday_name[day] == eng_name + assert dti.day_name(locale=time_locale)[day] == name + ts = Timestamp(datetime(2016, 4, day)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert ts.weekday_name == eng_name + assert ts.day_name(locale=time_locale) == name + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.day_name(locale=time_locale)[-1]) + ts = Timestamp(pd.NaT) + assert np.isnan(ts.day_name(locale=time_locale)) + + # GH 12805 + dti = DatetimeIndex(freq='M', start='2012', end='2013') + result = dti.month_name(locale=time_locale) + expected = Index([month.capitalize() for month in expected_months]) + tm.assert_index_equal(result, expected) + for date, expected in zip(dti, expected_months): + result = date.month_name(locale=time_locale) + assert result == expected.capitalize() + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.month_name(locale=time_locale)[-1]) def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_numpy_array_equal(dti.nanosecond, - np.arange(10, dtype=np.int32)) + tm.assert_index_equal(dti.nanosecond, + pd.Index(np.arange(10, dtype=np.int64))) diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py index 8f3752227b6d0..c8d47caa7e947 100644 --- a/pandas/tests/indexes/datetimes/test_missing.py +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -1,50 +1,52 @@ +import pytest + import pandas as pd import pandas.util.testing as tm -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): - def test_fillna_datetime64(self): + @pytest.mark.parametrize('tz', ['US/Eastern', 'Asia/Tokyo']) + def test_fillna_datetime64(self, tz): # GH 11343 - for tz in ['US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00']) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00']) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # tz mismatch - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), - pd.Timestamp('2011-01-01 10:00', tz=tz), - pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', - pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00'], tz=tz) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], tz=tz) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - pd.Timestamp('2011-01-01 10:00'), - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - 'x', - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00']) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00']) + tm.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # tz mismatch + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), + pd.Timestamp('2011-01-01 10:00', tz=tz), + pd.Timestamp('2011-01-01 11:00')], dtype=object) + tm.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', + pd.Timestamp('2011-01-01 11:00')], dtype=object) + tm.assert_index_equal(idx.fillna('x'), exp) + + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00'], tz=tz) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], tz=tz) + tm.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + pd.Timestamp('2011-01-01 10:00'), + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + tm.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + 'x', + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + tm.assert_index_equal(idx.fillna('x'), exp) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 63bf07ec041d3..ed7e425924097 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1,15 +1,14 @@ +import pytest import warnings import numpy as np -from datetime import timedelta +from datetime import datetime import pandas as pd -import pandas.tslib as tslib +import pandas._libs.tslib as tslib import pandas.util.testing as tm -from pandas.core.common import PerformanceWarning -from pandas.tseries.index import cdate_range -from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, - date_range, TimedeltaIndex, _np_version_under1p10, Index, - datetime, Float64Index, offsets, bdate_range) +from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, + date_range, _np_version_under1p10, Index, + bdate_range) from pandas.tseries.offsets import BMonthEnd, CDay, BDay from pandas.tests.test_base import Ops @@ -21,598 +20,194 @@ class TestDatetimeIndexOps(Ops): tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', 'dateutil/US/Pacific'] - def setUp(self): - super(TestDatetimeIndexOps, self).setUp() + def setup_method(self, method): + super(TestDatetimeIndexOps, self).setup_method(method) mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex)) self.is_valid_objs = [o for o in self.objs if mask(o)] self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): - self.check_ops_properties( - ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', - 'week', 'dayofweek', 'dayofyear', 'quarter']) - self.check_ops_properties(['date', 'time', 'microsecond', 'nanosecond', - 'is_month_start', 'is_month_end', - 'is_quarter_start', - 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'], - lambda x: isinstance(x, DatetimeIndex)) + f = lambda x: isinstance(x, DatetimeIndex) + self.check_ops_properties(DatetimeIndex._field_ops, f) + self.check_ops_properties(DatetimeIndex._object_ops, f) + self.check_ops_properties(DatetimeIndex._bool_ops, f) def test_ops_properties_basic(self): # sanity check that the behavior didn't change # GH7206 for op in ['year', 'day', 'second', 'weekday']: - self.assertRaises(TypeError, lambda x: getattr(self.dt_series, op)) + pytest.raises(TypeError, lambda x: getattr(self.dt_series, op)) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) - self.assertEqual(s.year, 2000) - self.assertEqual(s.month, 1) - self.assertEqual(s.day, 10) - self.assertRaises(AttributeError, lambda: s.weekday) - - def test_asobject_tolist(self): - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', - name='idx') - expected_list = [Timestamp('2013-01-31'), - Timestamp('2013-02-28'), - Timestamp('2013-03-31'), - Timestamp('2013-04-30')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', - name='idx', tz='Asia/Tokyo') - expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'), - Timestamp('2013-02-28', tz='Asia/Tokyo'), - Timestamp('2013-03-31', tz='Asia/Tokyo'), - Timestamp('2013-04-30', tz='Asia/Tokyo')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), - pd.NaT, datetime(2013, 1, 4)], name='idx') - expected_list = [Timestamp('2013-01-01'), - Timestamp('2013-01-02'), pd.NaT, - Timestamp('2013-01-04')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) + assert s.year == 2000 + assert s.month == 1 + assert s.day == 10 + pytest.raises(AttributeError, lambda: s.weekday) def test_minmax(self): for tz in self.tz: # monotonic idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz=tz) - self.assertTrue(idx1.is_monotonic) + assert idx1.is_monotonic # non-monotonic idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', '2011-01-02', pd.NaT], tz=tz) - self.assertFalse(idx2.is_monotonic) + assert not idx2.is_monotonic for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timestamp('2011-01-01', tz=tz)) - self.assertEqual(idx.max(), Timestamp('2011-01-03', tz=tz)) - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) + assert idx.min() == Timestamp('2011-01-01', tz=tz) + assert idx.max() == Timestamp('2011-01-03', tz=tz) + assert idx.argmin() == 0 + assert idx.argmax() == 2 for op in ['min', 'max']: # Return NaT obj = DatetimeIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') - self.assertEqual(np.min(dr), - Timestamp('2016-01-15 00:00:00', freq='D')) - self.assertEqual(np.max(dr), - Timestamp('2016-01-20 00:00:00', freq='D')) + assert np.min(dr) == Timestamp('2016-01-15 00:00:00', freq='D') + assert np.max(dr) == Timestamp('2016-01-20 00:00:00', freq='D') errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, dr, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.min, dr, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.max, dr, out=0) - self.assertEqual(np.argmin(dr), 0) - self.assertEqual(np.argmax(dr), 5) + assert np.argmin(dr) == 0 + assert np.argmax(dr) == 5 if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, dr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, dr, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmin, dr, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmax, dr, out=0) - def test_round(self): - for tz in self.tz: - rng = pd.date_range(start='2016-01-01', periods=5, - freq='30Min', tz=tz) - elt = rng[1] - - expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(rng.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): - rng.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - - def test_repeat_range(self): + @pytest.mark.parametrize('tz', tz) + def test_repeat_range(self, tz): rng = date_range('1/1/2000', '1/1/2001') result = rng.repeat(5) - self.assertIsNone(result.freq) - self.assertEqual(len(result), 5 * len(rng)) - - for tz in self.tz: - index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', - '2001-01-02', '2001-01-02'], tz=tz) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', - '2001-01-03', '2001-01-03'], tz=tz) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], - tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01', - 'NaT', 'NaT', 'NaT', - '2003-01-01', '2003-01-01', '2003-01-01'], - tz=tz) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - def test_repeat(self): + assert result.freq is None + assert len(result) == 5 * len(rng) + + index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', + '2001-01-02', '2001-01-02'], tz=tz) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', + '2001-01-03', '2001-01-03'], tz=tz) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], + tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01', + 'NaT', 'NaT', 'NaT', + '2003-01-01', '2003-01-01', '2003-01-01'], + tz=tz) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + @pytest.mark.parametrize('tz', tz) + def test_repeat(self, tz): reps = 2 msg = "the 'axis' parameter is not supported" - for tz in self.tz: - rng = pd.date_range(start='2016-01-01', periods=2, - freq='30Min', tz=tz) - - expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), - ]) - - res = rng.repeat(reps) - tm.assert_index_equal(res, expected_rng) - self.assertIsNone(res.freq) - - tm.assert_index_equal(np.repeat(rng, reps), expected_rng) - tm.assertRaisesRegexp(ValueError, msg, np.repeat, - rng, reps, axis=1) - - def test_representation(self): - - idx = [] - idx.append(DatetimeIndex([], freq='D')) - idx.append(DatetimeIndex(['2011-01-01'], freq='D')) - idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) - idx.append(DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' - ], freq='H', tz='Asia/Tokyo')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) - - exp = [] - exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") - exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " - "freq='D')") - exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " - "dtype='datetime64[ns]', freq='D')") - exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " - "dtype='datetime64[ns]', freq='D')") - exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " - "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" - ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") - exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " - "'2011-01-01 10:00:00-05:00', 'NaT'], " - "dtype='datetime64[ns, US/Eastern]', freq=None)") - exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " - "'2011-01-01 10:00:00+00:00', 'NaT'], " - "dtype='datetime64[ns, UTC]', freq=None)""") - - with pd.option_context('display.width', 300): - for indx, expected in zip(idx, exp): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(indx, func)() - self.assertEqual(result, expected) - - def test_representation_to_series(self): - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) - - exp1 = """Series([], dtype: datetime64[ns])""" - - exp2 = """0 2011-01-01 -dtype: datetime64[ns]""" - - exp3 = """0 2011-01-01 -1 2011-01-02 -dtype: datetime64[ns]""" - - exp4 = """0 2011-01-01 -1 2011-01-02 -2 2011-01-03 -dtype: datetime64[ns]""" - - exp5 = """0 2011-01-01 09:00:00+09:00 -1 2011-01-01 10:00:00+09:00 -2 2011-01-01 11:00:00+09:00 -dtype: datetime64[ns, Asia/Tokyo]""" - - exp6 = """0 2011-01-01 09:00:00-05:00 -1 2011-01-01 10:00:00-05:00 -2 NaT -dtype: datetime64[ns, US/Eastern]""" - - exp7 = """0 2011-01-01 09:00:00 -1 2011-01-02 10:15:00 -dtype: datetime64[ns]""" - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, - idx5, idx6, idx7], - [exp1, exp2, exp3, exp4, - exp5, exp6, exp7]): - result = repr(Series(idx)) - self.assertEqual(result, expected) - - def test_summary(self): - # GH9116 - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], - freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - - exp1 = """DatetimeIndex: 0 entries -Freq: D""" - - exp2 = """DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01 -Freq: D""" - - exp3 = """DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02 -Freq: D""" - - exp4 = """DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03 -Freq: D""" - - exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " - "to 2011-01-01 11:00:00+09:00\n" - "Freq: H") - - exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], - [exp1, exp2, exp3, exp4, exp5, exp6]): - result = idx.summary() - self.assertEqual(result, expected) - - def test_resolution(self): + rng = pd.date_range(start='2016-01-01', periods=2, + freq='30Min', tz=tz) + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + ]) + + res = rng.repeat(reps) + tm.assert_index_equal(res, expected_rng) + assert res.freq is None + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + tm.assert_raises_regex(ValueError, msg, np.repeat, + rng, reps, axis=1) + + @pytest.mark.parametrize('tz', tz) + def test_resolution(self, tz): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], ['day', 'day', 'day', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond']): - for tz in self.tz: - idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, - tz=tz) - self.assertEqual(idx.resolution, expected) - - def test_union(self): - for tz in self.tz: - # union - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) - - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) - - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3)]: - - result_union = rng.union(other) - tm.assert_index_equal(result_union, expected) - - def test_add_iadd(self): - for tz in self.tz: - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - result = rng + delta - expected = pd.date_range('2000-01-01 02:00', - '2000-02-01 02:00', tz=tz) - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - # int - rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, + idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, tz=tz) - result = rng + 1 - expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - idx = DatetimeIndex(['2011-01-01', '2011-01-02']) - msg = "cannot add a datelike to a DatetimeIndex" - with tm.assertRaisesRegexp(TypeError, msg): - idx + Timestamp('2011-01-01') - - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp('2011-01-01') + idx + assert idx.resolution == expected - def test_add_dti_dti(self): - # previously performed setop (deprecated in 0.16.0), now raises - # TypeError (GH14164) - - dti = date_range('20130101', periods=3) - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - - with tm.assertRaises(TypeError): - dti + dti - - with tm.assertRaises(TypeError): - dti_tz + dti_tz - - with tm.assertRaises(TypeError): - dti_tz + dti - - with tm.assertRaises(TypeError): - dti + dti_tz - - def test_difference(self): - for tz in self.tz: - # diff - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) - - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3)]: - result_diff = rng.difference(other) - tm.assert_index_equal(result_diff, expected) - - def test_sub_isub(self): - for tz in self.tz: - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - expected = pd.date_range('1999-12-31 22:00', - '2000-01-31 22:00', tz=tz) - - result = rng - delta - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - # int - rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, - tz=tz) - result = rng - 1 - expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - def test_sub_dti_dti(self): - # previously performed setop (deprecated in 0.16.0), now changed to - # return subtraction -> TimeDeltaIndex (GH ...) - - dti = date_range('20130101', periods=3) - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC') - expected = TimedeltaIndex([0, 0, 0]) - - result = dti - dti - tm.assert_index_equal(result, expected) - - result = dti_tz - dti_tz - tm.assert_index_equal(result, expected) - - with tm.assertRaises(TypeError): - dti_tz - dti - - with tm.assertRaises(TypeError): - dti - dti_tz - - with tm.assertRaises(TypeError): - dti_tz - dti_tz2 - - # isub - dti -= dti - tm.assert_index_equal(dti, expected) - - # different length raises ValueError - dti1 = date_range('20130101', periods=3) - dti2 = date_range('20130101', periods=4) - with tm.assertRaises(ValueError): - dti1 - dti2 - - # NaN propagation - dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03']) - dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan]) - expected = TimedeltaIndex(['1 days', np.nan, np.nan]) - result = dti2 - dti1 - tm.assert_index_equal(result, expected) - - def test_sub_period(self): - # GH 13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - for freq in [None, 'D']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) - - with tm.assertRaises(TypeError): - idx - p - - with tm.assertRaises(TypeError): - p - idx - - def test_comp_nat(self): - left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')]) - right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) - - for l, r in [(left, right), (left.asobject, right.asobject)]: - result = l == r - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = l != r - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == r, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(l != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != l, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > l, expected) - - def test_value_counts_unique(self): + @pytest.mark.parametrize('tz', tz) + def test_value_counts_unique(self, tz): # GH 7735 - for tz in self.tz: - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), - tz=tz) + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), + tz=tz) - exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, - tz=tz) - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, + tz=tz) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) - expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(idx.unique(), expected) + expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, + tz=tz) + tm.assert_index_equal(idx.unique(), expected) - idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', - '2013-01-01 09:00', '2013-01-01 08:00', - '2013-01-01 08:00', pd.NaT], tz=tz) + idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', + '2013-01-01 09:00', '2013-01-01 08:00', + '2013-01-01 08:00', pd.NaT], tz=tz) - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], - tz=tz) - expected = Series([3, 2], index=exp_idx) + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], + tz=tz) + expected = Series([3, 2], index=exp_idx) - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', - pd.NaT], tz=tz) - expected = Series([3, 2, 1], index=exp_idx) + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', + pd.NaT], tz=tz) + expected = Series([3, 2, 1], index=exp_idx) - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), - expected) + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), + expected) - tm.assert_index_equal(idx.unique(), exp_idx) + tm.assert_index_equal(idx.unique(), exp_idx) def test_nonunique_contains(self): # GH 9512 for idx in map(DatetimeIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], ['2015', '2015', '2016'], ['2015', '2015', '2014'])): - tm.assertIn(idx[0], idx) + assert idx[0] in idx def test_order(self): # with freq @@ -624,31 +219,30 @@ def test_order(self): for idx in [idx1, idx2]: ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) + tm.assert_index_equal(ordered, idx) + assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) + tm.assert_index_equal(ordered, expected) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), + check_dtype=False) + assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assert_numpy_array_equal(indexer, - np.array([2, 1, 0]), - check_dtype=False) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) + tm.assert_index_equal(ordered, expected) + tm.assert_numpy_array_equal(indexer, + np.array([2, 1, 0]), + check_dtype=False) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 # without freq for tz in self.tz: @@ -674,74 +268,40 @@ def test_order(self): for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) + tm.assert_index_equal(ordered, expected) + assert ordered.freq is None ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) + tm.assert_index_equal(ordered, expected[::-1]) + assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) + tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - self.assert_index_equal(ordered, expected[::-1]) + tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - def test_getitem(self): - idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', - tz='Asia/Tokyo', name='idx') - - for idx in [idx1, idx2]: - result = idx[0] - self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) - - result = idx[0:5] - expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[0:10:2] - expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[-20:-5:3] - expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[4::-1] - expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', - '2011-01-02', '2011-01-01'], - freq='-1D', tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq is None def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') result = idx.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset + assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertIsNone(result.freq) + tm.assert_index_equal(idx, result) + assert result.freq is None def test_drop_duplicates(self): # to check Index/Series compat @@ -764,73 +324,16 @@ def test_drop_duplicates(self): res = Series(idx).drop_duplicates(keep=False) tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - def test_take(self): - # GH 10295 - idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', - tz='Asia/Tokyo', name='idx') - - for idx in [idx1, idx2]: - result = idx.take([0]) - self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) - - result = idx.take([0, 1, 2]) - expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([7, 4, 1]) - expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([3, 2, 5]) - expected = DatetimeIndex(['2011-01-04', '2011-01-03', - '2011-01-06'], - freq=None, tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - result = idx.take([-3, 2, 5]) - expected = DatetimeIndex(['2011-01-29', '2011-01-03', - '2011-01-06'], - freq=None, tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - def test_take_invalid_kwargs(self): - idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - indices = [1, 6, 5, 9, 10, 13, 15, 3] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') - - def test_infer_freq(self): + @pytest.mark.parametrize('freq', [ + 'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', + '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', + '-3S']) + def test_infer_freq(self, freq): # GH 11018 - for freq in ['A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', - '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', - '-3S']: - idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) - result = pd.DatetimeIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) + idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) + result = pd.DatetimeIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + assert result.freq == freq def test_nat_new(self): idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') @@ -842,431 +345,157 @@ def test_nat_new(self): exp = np.array([tslib.iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) - def test_shift(self): - # GH 9903 - for tz in self.tz: - idx = pd.DatetimeIndex([], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' - '2011-01-01 12:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' - '2011-01-01 15:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' - '2011-01-01 09:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - def test_nat(self): - self.assertIs(pd.DatetimeIndex._na_value, pd.NaT) - self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT) - - for tz in [None, 'US/Eastern', 'UTC']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.intp)) - - idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.intp)) + @pytest.mark.parametrize('tz', [None, 'US/Eastern', 'UTC']) + def test_nat(self, tz): + assert pd.DatetimeIndex._na_value is pd.NaT + assert pd.DatetimeIndex([])._na_value is pd.NaT - def test_equals(self): + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert not idx.hasnans + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([], dtype=np.intp)) + + idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([1], dtype=np.intp)) + + @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']) + def test_equals(self, tz): # GH 13107 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) - - idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], - tz='US/Pacific') - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) - - # same internal, different tz - idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') - tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) - self.assertFalse(idx.equals(idx3)) - self.assertFalse(idx.equals(idx3.copy())) - self.assertFalse(idx.equals(idx3.asobject)) - self.assertFalse(idx.asobject.equals(idx3)) - self.assertFalse(idx.equals(list(idx3))) - self.assertFalse(idx.equals(pd.Series(idx3))) - - -class TestDateTimeIndexToJulianDate(tm.TestCase): - - def test_1700(self): - r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, - 2345901.5]) - r2 = date_range(start=Timestamp('1710-10-01'), periods=5, - freq='D').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_2000(self): - r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, - 2451605.5]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='D').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_hour(self): - r1 = Float64Index( - [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, - 2451601.625, 2451601.6666666666666666]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='H').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_minute(self): - r1 = Float64Index( - [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, - 2451601.5020833333333333, 2451601.5027777777777777]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='T').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_second(self): - r1 = Float64Index( - [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, - 2451601.5000347222222222, 2451601.5000462962962962]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='S').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - -class TestDatetimeIndex(tm.TestCase): - - # GH 10699 - def test_datetime64_with_DateOffset(self): - for klass, assert_func in zip([Series, DatetimeIndex], - [self.assert_series_equal, - tm.assert_index_equal]): - s = klass(date_range('2000-01-01', '2000-01-31'), name='a') - result = s + pd.DateOffset(years=1) - result2 = pd.DateOffset(years=1) + s - exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') - assert_func(result, exp) - assert_func(result2, exp) - - result = s - pd.DateOffset(years=1) - exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') - assert_func(result, exp) - - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - pd.Timestamp('2000-02-15', tz='US/Central')], name='a') - result = s + pd.offsets.Day() - result2 = pd.offsets.Day() + s - exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), - Timestamp('2000-02-16', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - pd.Timestamp('2000-02-15', tz='US/Central')], name='a') - result = s + pd.offsets.MonthEnd() - result2 = pd.offsets.MonthEnd() + s - exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), - Timestamp('2000-02-29', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - # array of offsets - valid for Series only - if klass is Series: - with tm.assert_produces_warning(PerformanceWarning): - s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) - result = s + Series([pd.offsets.DateOffset(years=1), - pd.offsets.MonthEnd()]) - exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29') - ]) - assert_func(result, exp) - - # same offset - result = s + Series([pd.offsets.DateOffset(years=1), - pd.offsets.DateOffset(years=1)]) - exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) - assert_func(result, exp) - - s = klass([Timestamp('2000-01-05 00:15:00'), - Timestamp('2000-01-31 00:23:00'), - Timestamp('2000-01-01'), - Timestamp('2000-03-31'), - Timestamp('2000-02-29'), - Timestamp('2000-12-31'), - Timestamp('2000-05-15'), - Timestamp('2001-06-15')]) - - # DateOffset relativedelta fastpath - relative_kwargs = [('years', 2), ('months', 5), ('days', 3), - ('hours', 5), ('minutes', 10), ('seconds', 2), - ('microseconds', 5)] - for i, kwd in enumerate(relative_kwargs): - op = pd.DateOffset(**dict([kwd])) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - - # assert these are equal on a piecewise basis - offsets = ['YearBegin', ('YearBegin', {'month': 5}), 'YearEnd', - ('YearEnd', {'month': 5}), 'MonthBegin', 'MonthEnd', - 'SemiMonthEnd', 'SemiMonthBegin', - 'Week', ('Week', { - 'weekday': 3 - }), 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', - 'CustomBusinessDay', 'CDay', 'CBMonthEnd', - 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', - 'BusinessHour', 'BYearBegin', 'BYearEnd', - 'BQuarterBegin', ('LastWeekOfMonth', { - 'weekday': 2 - }), ('FY5253Quarter', {'qtr_with_extra_week': 1, - 'startingMonth': 1, - 'weekday': 2, - 'variation': 'nearest'}), - ('FY5253', {'weekday': 0, - 'startingMonth': 2, - 'variation': - 'nearest'}), ('WeekOfMonth', {'weekday': 2, - 'week': 2}), - 'Easter', ('DateOffset', {'day': 4}), - ('DateOffset', {'month': 5})] - - with warnings.catch_warnings(record=True): - for normalize in (True, False): - for do in offsets: - if isinstance(do, tuple): - do, kwargs = do - else: - do = do - kwargs = {} - - for n in [0, 5]: - if (do in ['WeekOfMonth', 'LastWeekOfMonth', - 'FY5253Quarter', 'FY5253'] and n == 0): - continue - op = getattr(pd.offsets, do)(n, - normalize=normalize, - **kwargs) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - assert_func(klass([op + x for x in s]), op + s) - - def test_shift_months(self): - s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( - '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( - '2000-02-29'), Timestamp('2000-12-31')]) - for years in [-1, 0, 1]: - for months in [-2, 0, 2]: - actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + - months)) - expected = DatetimeIndex([x + offsets.DateOffset( - years=years, months=months) for x in s]) - tm.assert_index_equal(actual, expected) - - -class TestBusinessDatetimeIndex(tm.TestCase): - - def setUp(self): + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], + tz='US/Pacific') + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + +class TestBusinessDatetimeIndex(object): + + def setup_method(self, method): self.rng = bdate_range(START, END) def test_comparison(self): d = self.rng[10] comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) + assert comp[11] + assert not comp[9] def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) + unpickled = tm.round_trip_pickle(self.rng) + assert unpickled.offset is not None def test_copy(self): cp = self.rng.copy() repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, BDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) + tm.assert_index_equal(cp, self.rng) def test_shift(self): shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[0] == self.rng[5] + assert shifted.offset == self.rng.offset shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[5] == self.rng[0] + assert shifted.offset == self.rng.offset shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[0] == self.rng[0] + assert shifted.offset == self.rng.offset rng = date_range(START, END, freq=BMonthEnd()) shifted = rng.shift(1, freq=BDay()) - self.assertEqual(shifted[0], rng[0] + BDay()) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + assert shifted[0] == rng[0] + BDay() def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) + assert not self.rng.equals(list(self.rng)) def test_identical(self): t1 = self.rng.copy() t2 = self.rng.copy() - self.assertTrue(t1.identical(t2)) + assert t1.identical(t2) # name t1 = t1.rename('foo') - self.assertTrue(t1.equals(t2)) - self.assertFalse(t1.identical(t2)) + assert t1.equals(t2) + assert not t1.identical(t2) t2 = t2.rename('foo') - self.assertTrue(t1.identical(t2)) + assert t1.identical(t2) # freq t2v = Index(t2.values) - self.assertTrue(t1.equals(t2v)) - self.assertFalse(t1.identical(t2v)) - + assert t1.equals(t2v) + assert not t1.identical(t2v) -class TestCustomDatetimeIndex(tm.TestCase): - def setUp(self): - self.rng = cdate_range(START, END) +class TestCustomDatetimeIndex(object): + def setup_method(self, method): + self.rng = bdate_range(START, END, freq='C') def test_comparison(self): d = self.rng[10] comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) + assert comp[11] + assert not comp[9] def test_copy(self): cp = self.rng.copy() repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, CDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) + tm.assert_index_equal(cp, self.rng) def test_shift(self): shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[0] == self.rng[5] + assert shifted.offset == self.rng.offset shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[5] == self.rng[0] + assert shifted.offset == self.rng.offset shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) + assert shifted[0] == self.rng[0] + assert shifted.offset == self.rng.offset - with tm.assert_produces_warning(PerformanceWarning): + # PerformanceWarning + with warnings.catch_warnings(record=True): rng = date_range(START, END, freq=BMonthEnd()) shifted = rng.shift(1, freq=CDay()) - self.assertEqual(shifted[0], rng[0] + CDay()) + assert shifted[0] == rng[0] + CDay() def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + unpickled = tm.round_trip_pickle(self.rng) + assert unpickled.offset is not None def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) + assert not self.rng.equals(list(self.rng)) diff --git a/pandas/tests/indexes/datetimes/test_partial_slcing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py similarity index 57% rename from pandas/tests/indexes/datetimes/test_partial_slcing.py rename to pandas/tests/indexes/datetimes/test_partial_slicing.py index a960f5cf9235a..f263ac78cd343 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slcing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -1,14 +1,95 @@ """ test partial slicing on Series/Frame """ + +import pytest + from datetime import datetime import numpy as np import pandas as pd +import operator as op from pandas import (DatetimeIndex, Series, DataFrame, date_range, Index, Timedelta, Timestamp) from pandas.util import testing as tm -class TestSlicing(tm.TestCase): +class TestSlicing(object): + def test_dti_slicing(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + dti2 = dti[[1, 3, 5]] + + v1 = dti2[0] + v2 = dti2[1] + v3 = dti2[2] + + assert v1 == Timestamp('2/28/2005') + assert v2 == Timestamp('4/30/2005') + assert v3 == Timestamp('6/30/2005') + + # don't carry freq through irregular slicing + assert dti2.freq is None + + def test_slice_keeps_name(self): + # GH4226 + st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles') + et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles') + dr = pd.date_range(st, et, freq='H', name='timebucket') + assert dr[1:].name == dr.name + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), + date_range('2014-01-01', periods=20, freq='MS')) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1]) + assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1]) + + assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1]) + assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1]) + + assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp( + '2014-10-01'):-1], SLC[13:8:-1]) + assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1], + SLC[13:8:-1]) + + assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), + date_range('2014-01-01', periods=20, freq='MS')) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + + def test_slice_bounds_empty(self): + # GH 14354 + empty_idx = DatetimeIndex(freq='1H', periods=0, end='2015') + + right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc') + exp = Timestamp('2015-01-02 23:59:59.999999999') + assert right == exp + + left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') + exp = Timestamp('2015-01-02 00:00:00') + assert left == exp + + def test_slice_duplicate_monotonic(self): + # https://github.com/pandas-dev/pandas/issues/16515 + idx = pd.DatetimeIndex(['2017', '2017']) + result = idx._maybe_cast_slice_bound('2017-01-01', 'left', 'loc') + expected = Timestamp('2017-01-01') + assert result == expected def test_slice_year(self): dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) @@ -27,24 +108,24 @@ def test_slice_year(self): result = rng.get_loc('2009') expected = slice(3288, 3653) - self.assertEqual(result, expected) + assert result == expected def test_slice_quarter(self): dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2001Q1']), 90) + assert len(s['2001Q1']) == 90 df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['1Q01']), 90) + assert len(df.loc['1Q01']) == 90 def test_slice_month(self): dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2005-11']), 30) + assert len(s['2005-11']) == 30 df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['2005-11']), 30) + assert len(df.loc['2005-11']) == 30 tm.assert_series_equal(s['2005-11'], s['11-2005']) @@ -65,9 +146,9 @@ def test_partial_slice(self): tm.assert_series_equal(result, expected) result = s['2005-1-1'] - self.assertEqual(result, s.iloc[0]) + assert result == s.iloc[0] - self.assertRaises(Exception, s.__getitem__, '2004-12-31') + pytest.raises(Exception, s.__getitem__, '2004-12-31') def test_partial_slice_daily(self): rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) @@ -76,7 +157,7 @@ def test_partial_slice_daily(self): result = s['2005-1-31'] tm.assert_series_equal(result, s.iloc[:24]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') + pytest.raises(Exception, s.__getitem__, '2004-12-31 00') def test_partial_slice_hourly(self): rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), @@ -89,8 +170,8 @@ def test_partial_slice_hourly(self): result = s['2005-1-1 20'] tm.assert_series_equal(result, s.iloc[:60]) - self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') + assert s['2005-1-1 20:00'] == s.iloc[0] + pytest.raises(Exception, s.__getitem__, '2004-12-31 00:15') def test_partial_slice_minutely(self): rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), @@ -103,8 +184,8 @@ def test_partial_slice_minutely(self): result = s['2005-1-1'] tm.assert_series_equal(result, s.iloc[:60]) - self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') + assert s[Timestamp('2005-1-1 23:59:00')] == s.iloc[0] + pytest.raises(Exception, s.__getitem__, '2004-12-31 00:00:00') def test_partial_slice_second_precision(self): rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, @@ -118,9 +199,9 @@ def test_partial_slice_second_precision(self): tm.assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) - self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) - self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) + assert s[Timestamp('2005-1-1 00:00:59.999990')] == s.iloc[0] + tm.assert_raises_regex(KeyError, '2005-1-1 00:00:00', + lambda: s['2005-1-1 00:00:00']) def test_partial_slicing_dataframe(self): # GH14856 @@ -141,7 +222,7 @@ def test_partial_slicing_dataframe(self): middate, middate + unit]) values = [1, 2, 3] df = DataFrame({'a': values}, index, dtype=np.int64) - self.assertEqual(df.index.resolution, resolution) + assert df.index.resolution == resolution # Timestamp with the same resolution as index # Should be exact match for Series (return scalar) @@ -150,9 +231,9 @@ def test_partial_slicing_dataframe(self): ts_string = timestamp.strftime(formats[rnum]) # make ts_string as precise as index result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts_string) + assert isinstance(result, np.int64) + assert result == expected + pytest.raises(KeyError, df.__getitem__, ts_string) # Timestamp with resolution less precise than index for fmt in formats[:rnum]: @@ -177,17 +258,17 @@ def test_partial_slicing_dataframe(self): for fmt in formats[rnum + 1:]: ts_string = index[1].strftime(fmt) result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts_string) + assert isinstance(result, np.int64) + assert result == 2 + pytest.raises(KeyError, df.__getitem__, ts_string) # Not compatible with existing key # Should raise KeyError for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: ts = index[1] + Timedelta("1 " + res) ts_string = ts.strftime(fmt) - self.assertRaises(KeyError, df['a'].__getitem__, ts_string) - self.assertRaises(KeyError, df.__getitem__, ts_string) + pytest.raises(KeyError, df['a'].__getitem__, ts_string) + pytest.raises(KeyError, df.__getitem__, ts_string) def test_partial_slicing_with_multiindex(self): @@ -216,7 +297,7 @@ def test_partial_slicing_with_multiindex(self): def f(): df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) # GH 4294 # partial slice on a series mi @@ -246,11 +327,40 @@ def test_partial_slice_doesnt_require_monotonicity(self): timestamp = pd.Timestamp('2014-01-10') tm.assert_series_equal(nonmonotonic['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) + tm.assert_raises_regex(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic[timestamp:]) tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) + tm.assert_raises_regex(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic.loc[timestamp:]) + + def test_loc_datetime_length_one(self): + # GH16071 + df = pd.DataFrame(columns=['1'], + index=pd.date_range('2016-10-01T00:00:00', + '2016-10-01T23:59:59')) + result = df.loc[datetime(2016, 10, 1):] + tm.assert_frame_equal(result, df) + + result = df.loc['2016-10-01T00:00:00':] + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize('datetimelike', [ + Timestamp('20130101'), datetime(2013, 1, 1), + np.datetime64('2013-01-01T00:00', 'ns')]) + @pytest.mark.parametrize('op,expected', [ + (op.lt, [True, False, False, False]), + (op.le, [True, True, False, False]), + (op.eq, [False, True, False, False]), + (op.gt, [False, False, False, True])]) + def test_selection_by_datetimelike(self, datetimelike, op, expected): + # GH issue #17965, test for ability to compare datetime64[ns] columns + # to datetimelike + df = DataFrame({'A': [pd.Timestamp('20120101'), + pd.Timestamp('20130101'), + np.nan, pd.Timestamp('20130103')]}) + result = op(df.A, datetimelike) + expected = Series(expected, name='A') + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py new file mode 100644 index 0000000000000..9180bb0af3af3 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +""" +Tests for DatetimeIndex methods behaving like their Timestamp counterparts +""" +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util.testing as tm +import pandas as pd + +from pandas import date_range, Timestamp, DatetimeIndex + + +@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', + 'US/Eastern', 'dateutil/Asia/Singapore', + 'dateutil/US/Pacific']) +def tz(request): + return request.param + + +class TestDatetimeIndexOps(object): + def test_dti_time(self): + rng = date_range('1/1/2000', freq='12min', periods=10) + result = pd.Index(rng).time + expected = [t.time() for t in rng] + assert (result == expected).all() + + def test_dti_date(self): + rng = date_range('1/1/2000', freq='12H', periods=10) + result = pd.Index(rng).date + expected = [t.date() for t in rng] + assert (result == expected).all() + + def test_dti_date_out_of_range(self): + # GH#1475 + pytest.raises(ValueError, DatetimeIndex, ['1400-01-01']) + pytest.raises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) + + @pytest.mark.parametrize('field', [ + 'dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', + 'days_in_month', 'is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', 'is_year_start', + 'is_year_end', 'weekday_name']) + def test_dti_timestamp_fields(self, field): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + expected = getattr(idx, field)[-1] + if field == 'weekday_name': + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = getattr(Timestamp(idx[-1]), field) + else: + result = getattr(Timestamp(idx[-1]), field) + assert result == expected + + def test_dti_timestamp_freq_fields(self): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + + assert idx.freq == Timestamp(idx[-1], idx.freq).freq + assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr + + # ---------------------------------------------------------------- + # DatetimeIndex.round + + def test_round_daily(self): + dti = date_range('20130101 09:10:11', periods=5) + result = dti.round('D') + expected = date_range('20130101', periods=5) + tm.assert_index_equal(result, expected) + + dti = dti.tz_localize('UTC').tz_convert('US/Eastern') + result = dti.round('D') + expected = date_range('20130101', + periods=5).tz_localize('US/Eastern') + tm.assert_index_equal(result, expected) + + result = dti.round('s') + tm.assert_index_equal(result, dti) + + # invalid + for freq in ['Y', 'M', 'foobar']: + pytest.raises(ValueError, lambda: dti.round(freq)) + + def test_round(self, tz): + rng = date_range(start='2016-01-01', periods=5, + freq='30Min', tz=tz) + elt = rng[1] + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(rng.round(freq='H'), expected_rng) + assert elt.round(freq='H') == expected_elt + + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): + rng.round(freq='foo') + with tm.assert_raises_regex(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + tm.assert_raises_regex(ValueError, msg, rng.round, freq='M') + tm.assert_raises_regex(ValueError, msg, elt.round, freq='M') + + # GH#14440 & GH#15578 + index = DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) + result = index.round('ms') + expected = DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz) + tm.assert_index_equal(result, expected) + + for freq in ['us', 'ns']: + tm.assert_index_equal(index, index.round(freq)) + + index = DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz) + result = index.round('ms') + expected = DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz) + tm.assert_index_equal(result, expected) + + index = DatetimeIndex(['2016-10-17 12:00:00.001501031']) + result = index.round('10ns') + expected = DatetimeIndex(['2016-10-17 12:00:00.001501030']) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(): + ts = '2016-10-17 12:00:00.001501031' + DatetimeIndex([ts]).round('1010ns') + + @pytest.mark.parametrize('test_input, rounder, freq, expected', [ + (['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']), + (['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']), + (['2117-01-01 00:00:45.000000012'], 'floor', '10ns', + ['2117-01-01 00:00:45.000000010']), + (['1823-01-01 00:00:01.000000012'], 'ceil', '10ns', + ['1823-01-01 00:00:01.000000020']), + (['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']), + (['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']), + (('NaT', '1823-01-01 00:00:01'), 'floor', '1s', + ('NaT', '1823-01-01 00:00:01')), + (('NaT', '1823-01-01 00:00:01'), 'ceil', '1s', + ('NaT', '1823-01-01 00:00:01')) + ]) + def test_ceil_floor_edge(self, tz, test_input, rounder, freq, expected): + dt = DatetimeIndex(list(test_input)) + func = getattr(dt, rounder) + result = func(freq) + expected = DatetimeIndex(list(expected)) + assert expected.equals(result) + + # ---------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize(self): + rng = date_range('1/1/2000 9:30', periods=10, freq='D') + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D') + tm.assert_index_equal(result, expected) + + arr_ns = np.array([1380585623454345752, + 1380585612343234312]).astype("datetime64[ns]") + rng_ns = DatetimeIndex(arr_ns) + rng_ns_normalized = rng_ns.normalize() + + arr_ns = np.array([1380585600000000000, + 1380585600000000000]).astype("datetime64[ns]") + expected = DatetimeIndex(arr_ns) + tm.assert_index_equal(rng_ns_normalized, expected) + + assert result.is_normalized + assert not rng.is_normalized + + +class TestDateTimeIndexToJulianDate(object): + + def test_1700(self): + dr = date_range(start=Timestamp('1710-10-01'), periods=5, freq='D') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='D') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='H') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='T') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + dr = date_range(start=Timestamp('2000-02-27'), periods=5, freq='S') + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 8d05a4016ba45..84632e59e2bfb 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -1,10 +1,11 @@ from datetime import datetime +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm -from pandas.tseries.index import cdate_range +import pandas.util._test_decorators as td from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, Int64Index, Index, to_datetime) from pandas.tseries.offsets import Minute, BMonthEnd, MonthEnd @@ -12,14 +13,44 @@ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndexSetOps(object): + tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', + 'dateutil/US/Pacific'] - def test_union(self): - i1 = Int64Index(np.arange(0, 20, 2)) - i2 = Int64Index(np.arange(10, 30, 2)) - result = i1.union(i2) - expected = Int64Index(np.arange(0, 30, 2)) - tm.assert_index_equal(result, expected) + # TODO: moved from test_datetimelike; dedup with version below + def test_union2(self): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + union = first.union(second) + assert tm.equalContents(union, everything) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.union(case) + assert tm.equalContents(result, everything) + + @pytest.mark.parametrize("tz", tz) + def test_union(self, tz): + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3)]: + + result_union = rng.union(other) + tm.assert_index_equal(result_union, expected) def test_union_coverage(self): idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) @@ -29,7 +60,7 @@ def test_union_coverage(self): result = ordered[:0].union(ordered) tm.assert_index_equal(result, ordered) - self.assertEqual(result.freq, ordered.freq) + assert result.freq == ordered.freq def test_union_bug_1730(self): rng_a = date_range('1/1/2012', periods=4, freq='3H') @@ -65,7 +96,7 @@ def test_union_freq_both_none(self): result = expected.union(expected) tm.assert_index_equal(result, expected) - self.assertIsNone(result.freq) + assert result.freq is None def test_union_dataframe_index(self): rng1 = date_range('1/1/1999', '1/1/2012', freq='MS') @@ -84,69 +115,87 @@ def test_union_with_DatetimeIndex(self): i1.union(i2) # Works i2.union(i1) # Fails with "AttributeError: can't set attribute" - def test_intersection(self): - # GH 4690 (with tz) - for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: - base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx') - - # if target has the same name, it is preserved - rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx') - expected2 = date_range('6/1/2000', '6/20/2000', freq='D', - name='idx') - - # if target name is different, it will be reset - rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other') - expected3 = date_range('6/1/2000', '6/20/2000', freq='D', - name=None) - - rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx') - expected4 = DatetimeIndex([], name='idx') - - for (rng, expected) in [(rng2, expected2), (rng3, expected3), - (rng4, expected4)]: - result = base.intersection(rng) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.tz, expected.tz) - - # non-monotonic - base = DatetimeIndex(['2011-01-05', '2011-01-04', - '2011-01-02', '2011-01-03'], - tz=tz, name='idx') - - rng2 = DatetimeIndex(['2011-01-04', '2011-01-02', - '2011-02-02', '2011-02-03'], - tz=tz, name='idx') - expected2 = DatetimeIndex( - ['2011-01-04', '2011-01-02'], tz=tz, name='idx') - - rng3 = DatetimeIndex(['2011-01-04', '2011-01-02', - '2011-02-02', '2011-02-03'], - tz=tz, name='other') - expected3 = DatetimeIndex( - ['2011-01-04', '2011-01-02'], tz=tz, name=None) - - # GH 7880 - rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz, - name='idx') - expected4 = DatetimeIndex([], tz=tz, name='idx') - - for (rng, expected) in [(rng2, expected2), (rng3, expected3), - (rng4, expected4)]: - result = base.intersection(rng) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertIsNone(result.freq) - self.assertEqual(result.tz, expected.tz) + # TODO: moved from test_datetimelike; de-duplicate with version below + def test_intersection2(self): + first = tm.makeDateIndex(10) + second = first[5:] + intersect = first.intersection(second) + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + assert tm.equalContents(result, second) + + third = Index(['a', 'b', 'c']) + result = first.intersection(third) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("tz", [None, 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific']) + def test_intersection(self, tz): + # GH 4690 (with tz) + base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx') + + # if target has the same name, it is preserved + rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx') + expected2 = date_range('6/1/2000', '6/20/2000', freq='D', name='idx') + + # if target name is different, it will be reset + rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other') + expected3 = date_range('6/1/2000', '6/20/2000', freq='D', name=None) + + rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx') + expected4 = DatetimeIndex([], name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + # non-monotonic + base = DatetimeIndex(['2011-01-05', '2011-01-04', + '2011-01-02', '2011-01-03'], + tz=tz, name='idx') + + rng2 = DatetimeIndex(['2011-01-04', '2011-01-02', + '2011-02-02', '2011-02-03'], + tz=tz, name='idx') + expected2 = DatetimeIndex(['2011-01-04', '2011-01-02'], + tz=tz, name='idx') + + rng3 = DatetimeIndex(['2011-01-04', '2011-01-02', + '2011-02-02', '2011-02-03'], + tz=tz, name='other') + expected3 = DatetimeIndex(['2011-01-04', '2011-01-02'], + tz=tz, name=None) + + # GH 7880 + rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz, + name='idx') + expected4 = DatetimeIndex([], tz=tz, name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq is None + assert result.tz == expected.tz + + def test_intersection_empty(self): # empty same freq GH2129 rng = date_range('6/1/2000', '6/15/2000', freq='T') result = rng[0:0].intersection(rng) - self.assertEqual(len(result), 0) + assert len(result) == 0 result = rng.intersection(rng[0:0]) - self.assertEqual(len(result), 0) + assert len(result) == 0 def test_intersection_bug_1708(self): from pandas import DateOffset @@ -154,7 +203,27 @@ def test_intersection_bug_1708(self): index_2 = index_1 + DateOffset(hours=1) result = index_1 & index_2 - self.assertEqual(len(result), 0) + assert len(result) == 0 + + @pytest.mark.parametrize("tz", tz) + def test_difference(self, tz): + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3)]: + result_diff = rng.difference(other) + tm.assert_index_equal(result_diff, expected) def test_difference_freq(self): # GH14323: difference of DatetimeIndex should not preserve frequency @@ -177,18 +246,18 @@ def test_datetimeindex_diff(self): periods=100) dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=98) - self.assertEqual(len(dti1.difference(dti2)), 2) + assert len(dti1.difference(dti2)) == 2 def test_datetimeindex_union_join_empty(self): dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') empty = Index([]) result = dti.union(empty) - tm.assertIsInstance(result, DatetimeIndex) - self.assertIs(result, result) + assert isinstance(result, DatetimeIndex) + assert result is result result = dti.join(empty) - tm.assertIsInstance(result, DatetimeIndex) + assert isinstance(result, DatetimeIndex) def test_join_nonunique(self): idx1 = to_datetime(['2012-11-06 16:00:11.477563', @@ -196,12 +265,12 @@ def test_join_nonunique(self): idx2 = to_datetime(['2012-11-06 15:11:09.006507', '2012-11-06 15:11:09.006507']) rs = idx1.join(idx2, how='outer') - self.assertTrue(rs.is_monotonic) + assert rs.is_monotonic -class TestBusinessDatetimeIndex(tm.TestCase): +class TestBusinessDatetimeIndex(object): - def setUp(self): + def setup_method(self, method): self.rng = bdate_range(START, END) def test_union(self): @@ -210,21 +279,21 @@ def test_union(self): right = self.rng[5:10] the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) + assert isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_union = left.union(right) - tm.assertIsInstance(the_union, Index) + assert isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) + assert isinstance(the_union, DatetimeIndex) # order does not matter tm.assert_index_equal(right.union(left), the_union) @@ -233,7 +302,7 @@ def test_union(self): rng = date_range(START, END, freq=BMonthEnd()) the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) + assert isinstance(the_union, DatetimeIndex) def test_outer_join(self): # should just behave as union @@ -243,42 +312,42 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) + assert isinstance(the_join, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) + assert isinstance(the_join, DatetimeIndex) # overlapping, but different offset rng = date_range(START, END, freq=BMonthEnd()) the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None def test_union_not_cacheable(self): rng = date_range('1/1/2000', periods=50, freq=Minute()) rng1 = rng[10:] rng2 = rng[:25] the_union = rng1.union(rng2) - self.assert_index_equal(the_union, rng) + tm.assert_index_equal(the_union, rng) rng1 = rng[10:] rng2 = rng[15:35] the_union = rng1.union(rng2) expected = rng[10:] - self.assert_index_equal(the_union, expected) + tm.assert_index_equal(the_union, expected) def test_intersection(self): rng = date_range('1/1/2000', periods=50, freq=Minute()) @@ -286,27 +355,26 @@ def test_intersection(self): rng2 = rng[:25] the_int = rng1.intersection(rng2) expected = rng[10:25] - self.assert_index_equal(the_int, expected) - tm.assertIsInstance(the_int, DatetimeIndex) - self.assertEqual(the_int.offset, rng.offset) + tm.assert_index_equal(the_int, expected) + assert isinstance(the_int, DatetimeIndex) + assert the_int.offset == rng.offset the_int = rng1.intersection(rng2.view(DatetimeIndex)) - self.assert_index_equal(the_int, expected) + tm.assert_index_equal(the_int, expected) # non-overlapping the_int = rng[:10].intersection(rng[10:]) expected = DatetimeIndex([]) - self.assert_index_equal(the_int, expected) + tm.assert_index_equal(the_int, expected) def test_intersection_bug(self): # GH #771 a = bdate_range('11/30/2011', '12/31/2011') b = bdate_range('12/10/2011', '12/20/2011') result = a.intersection(b) - self.assert_index_equal(result, b) + tm.assert_index_equal(result, b) def test_month_range_union_tz_pytz(self): - tm._skip_if_no_pytz() from pytz import timezone tz = timezone('US/Eastern') @@ -323,11 +391,10 @@ def test_month_range_union_tz_pytz(self): early_dr.union(late_dr) + @td.skip_if_windows_python_3 def test_month_range_union_tz_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as timezone - tz = timezone('US/Eastern') + from pandas._libs.tslibs.timezones import dateutil_gettz + tz = dateutil_gettz('US/Eastern') early_start = datetime(2011, 1, 1) early_end = datetime(2011, 3, 1) @@ -343,10 +410,10 @@ def test_month_range_union_tz_dateutil(self): early_dr.union(late_dr) -class TestCustomDatetimeIndex(tm.TestCase): +class TestCustomDatetimeIndex(object): - def setUp(self): - self.rng = cdate_range(START, END) + def setup_method(self, method): + self.rng = bdate_range(START, END, freq='C') def test_union(self): # overlapping @@ -354,30 +421,30 @@ def test_union(self): right = self.rng[5:10] the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) + assert isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_union = left.union(right) - tm.assertIsInstance(the_union, Index) + assert isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) + assert isinstance(the_union, DatetimeIndex) # order does not matter - self.assert_index_equal(right.union(left), the_union) + tm.assert_index_equal(right.union(left), the_union) # overlapping, but different offset rng = date_range(START, END, freq=BMonthEnd()) the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) + assert isinstance(the_union, DatetimeIndex) def test_outer_join(self): # should just behave as union @@ -387,33 +454,33 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) + assert isinstance(the_join, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) + assert isinstance(the_join, DatetimeIndex) # overlapping, but different offset rng = date_range(START, END, freq=BMonthEnd()) the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None def test_intersection_bug(self): # GH #771 - a = cdate_range('11/30/2011', '12/31/2011') - b = cdate_range('12/10/2011', '12/20/2011') + a = bdate_range('11/30/2011', '12/31/2011', freq='C') + b = bdate_range('12/10/2011', '12/20/2011', freq='C') result = a.intersection(b) - self.assert_index_equal(result, b) + tm.assert_index_equal(result, b) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py new file mode 100644 index 0000000000000..217610b76cf0f --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -0,0 +1,1029 @@ +# -*- coding: utf-8 -*- +""" +Tests for DatetimeIndex timezone-related methods +""" +from datetime import datetime, timedelta, tzinfo +from distutils.version import LooseVersion + +import pytest +import pytz +import dateutil +from dateutil.tz import gettz, tzlocal +import numpy as np + +import pandas.util.testing as tm +import pandas.util._test_decorators as td + +import pandas as pd +from pandas._libs import tslib +from pandas._libs.tslibs import timezones +from pandas.compat import lrange, zip, PY3 +from pandas import (DatetimeIndex, date_range, bdate_range, + Timestamp, isna, to_datetime, Index) + + +class FixedOffset(tzinfo): + """Fixed offset in minutes east from UTC.""" + + def __init__(self, offset, name): + self.__offset = timedelta(minutes=offset) + self.__name = name + + def utcoffset(self, dt): + return self.__offset + + def tzname(self, dt): + return self.__name + + def dst(self, dt): + return timedelta(0) + + +fixed_off = FixedOffset(-420, '-07:00') +fixed_off_no_name = FixedOffset(-330, None) + + +class TestDatetimeIndexTimezones(object): + # ------------------------------------------------------------- + # DatetimeIndex.tz_convert + def test_tz_convert_nat(self): + # GH#5546 + dates = [pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize('US/Pacific') + tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Pacific')) + idx = idx.tz_convert('US/Eastern') + tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Eastern')) + idx = idx.tz_convert('UTC') + tm.assert_index_equal(idx, DatetimeIndex(dates, tz='UTC')) + + dates = ['2010-12-01 00:00', '2010-12-02 00:00', pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize('US/Pacific') + tm.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Pacific')) + idx = idx.tz_convert('US/Eastern') + expected = ['2010-12-01 03:00', '2010-12-02 03:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + + idx = idx + pd.offsets.Hour(5) + expected = ['2010-12-01 08:00', '2010-12-02 08:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + idx = idx.tz_convert('US/Pacific') + expected = ['2010-12-01 05:00', '2010-12-02 05:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Pacific')) + + idx = idx + np.timedelta64(3, 'h') + expected = ['2010-12-01 08:00', '2010-12-02 08:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Pacific')) + + idx = idx.tz_convert('US/Eastern') + expected = ['2010-12-01 11:00', '2010-12-02 11:00', pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + + @pytest.mark.parametrize('prefix', ['', 'dateutil/']) + def test_dti_tz_convert_compat_timestamp(self, prefix): + strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] + idx = DatetimeIndex(strdates, tz=prefix + 'US/Eastern') + + conv = idx[0].tz_convert(prefix + 'US/Pacific') + expected = idx.tz_convert(prefix + 'US/Pacific')[0] + + assert conv == expected + + def test_dti_tz_convert_hour_overflow_dst(self): + # Regression test for: + # https://github.com/pandas-dev/pandas/issues/13306 + + # sorted case US/Eastern -> UTC + ts = ['2008-05-12 09:50:00', + '2008-12-12 09:50:35', + '2009-05-12 09:50:32'] + tt = DatetimeIndex(ts).tz_localize('US/Eastern') + ut = tt.tz_convert('UTC') + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = ['2008-05-12 13:50:00', + '2008-12-12 14:50:35', + '2009-05-12 13:50:32'] + tt = DatetimeIndex(ts).tz_localize('UTC') + ut = tt.tz_convert('US/Eastern') + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = ['2008-05-12 09:50:00', + '2008-12-12 09:50:35', + '2008-05-12 09:50:32'] + tt = DatetimeIndex(ts).tz_localize('US/Eastern') + ut = tt.tz_convert('UTC') + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = ['2008-05-12 13:50:00', + '2008-12-12 14:50:35', + '2008-05-12 13:50:32'] + tt = DatetimeIndex(ts).tz_localize('UTC') + ut = tt.tz_convert('US/Eastern') + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): + # Regression test for GH#13306 + + # sorted case US/Eastern -> UTC + ts = [Timestamp('2008-05-12 09:50:00', tz=tz), + Timestamp('2008-12-12 09:50:35', tz=tz), + Timestamp('2009-05-12 09:50:32', tz=tz)] + tt = DatetimeIndex(ts) + ut = tt.tz_convert('UTC') + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), + Timestamp('2008-12-12 14:50:35', tz='UTC'), + Timestamp('2009-05-12 13:50:32', tz='UTC')] + tt = DatetimeIndex(ts) + ut = tt.tz_convert('US/Eastern') + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = [Timestamp('2008-05-12 09:50:00', tz=tz), + Timestamp('2008-12-12 09:50:35', tz=tz), + Timestamp('2008-05-12 09:50:32', tz=tz)] + tt = DatetimeIndex(ts) + ut = tt.tz_convert('UTC') + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), + Timestamp('2008-12-12 14:50:35', tz='UTC'), + Timestamp('2008-05-12 13:50:32', tz='UTC')] + tt = DatetimeIndex(ts) + ut = tt.tz_convert('US/Eastern') + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize('freq, n', [('H', 1), ('T', 60), ('S', 3600)]) + def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): + # Regression test for tslib.tz_convert(vals, tz1, tz2). + # See https://github.com/pandas-dev/pandas/issues/4496 for details. + idx = date_range(datetime(2011, 3, 26, 23), + datetime(2011, 3, 27, 1), freq=freq) + idx = idx.tz_localize('UTC') + idx = idx.tz_convert('Europe/Moscow') + + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + def test_dti_tz_convert_dst(self): + for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: + # Start DST + idx = date_range('2014-03-08 23:00', '2014-03-09 09:00', freq=freq, + tz='UTC') + idx = idx.tz_convert('US/Eastern') + expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, + 0, 1, 3, 4, 5]), + np.array([n, n, n, n, n, n, n, n, n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, + tz='US/Eastern') + idx = idx.tz_convert('UTC') + expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + np.array([n, n, n, n, n, n, n, n, n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + # End DST + idx = date_range('2014-11-01 23:00', '2014-11-02 09:00', freq=freq, + tz='UTC') + idx = idx.tz_convert('US/Eastern') + expected = np.repeat(np.array([19, 20, 21, 22, 23, + 0, 1, 1, 2, 3, 4]), + np.array([n, n, n, n, n, n, n, n, n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range('2014-11-01 18:00', '2014-11-02 05:00', freq=freq, + tz='US/Eastern') + idx = idx.tz_convert('UTC') + expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10]), + np.array([n, n, n, n, n, n, n, n, n, + n, n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + # daily + # Start DST + idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', + tz='UTC') + idx = idx.tz_convert('US/Eastern') + tm.assert_index_equal(idx.hour, Index([19, 19])) + + idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', + tz='US/Eastern') + idx = idx.tz_convert('UTC') + tm.assert_index_equal(idx.hour, Index([5, 5])) + + # End DST + idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', + tz='UTC') + idx = idx.tz_convert('US/Eastern') + tm.assert_index_equal(idx.hour, Index([20, 20])) + + idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', + tz='US/Eastern') + idx = idx.tz_convert('UTC') + tm.assert_index_equal(idx.hour, Index([4, 4])) + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific']) + def test_tz_convert_roundtrip(self, tz): + idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M', + tz='UTC') + exp1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') + + idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D', + tz='UTC') + exp2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') + + idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H', + tz='UTC') + exp3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') + + idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T', + tz='UTC') + exp4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') + + for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), + (idx4, exp4)]: + converted = idx.tz_convert(tz) + reset = converted.tz_convert(None) + tm.assert_index_equal(reset, expected) + assert reset.tzinfo is None + expected = converted.tz_convert('UTC').tz_localize(None) + tm.assert_index_equal(reset, expected) + + def test_dti_tz_convert_tzlocal(self): + # GH#13583 + # tz_convert doesn't affect to internal + dti = date_range(start='2001-01-01', end='2001-03-01', tz='UTC') + dti2 = dti.tz_convert(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + dti = date_range(start='2001-01-01', end='2001-03-01', + tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_convert(None) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern', + pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_convert_utc_to_local_no_modify(self, tz): + rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') + rng_eastern = rng.tz_convert(tz) + + # Values are unmodified + tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) + + assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz)) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_tz_convert_unsorted(self, tzstr): + dr = date_range('2012-03-09', freq='H', periods=100, tz='utc') + dr = dr.tz_convert(tzstr) + + result = dr[::-1].hour + exp = dr.hour[::-1] + tm.assert_almost_equal(result, exp) + + # ------------------------------------------------------------- + # DatetimeIndex.tz_localize + + def test_dti_tz_localize_nonexistent_raise_coerce(self): + # GH#13057 + times = ['2015-03-08 01:00', '2015-03-08 02:00', '2015-03-08 03:00'] + index = DatetimeIndex(times) + tz = 'US/Eastern' + with pytest.raises(pytz.NonExistentTimeError): + index.tz_localize(tz=tz) + + with pytest.raises(pytz.NonExistentTimeError): + index.tz_localize(tz=tz, errors='raise') + + result = index.tz_localize(tz=tz, errors='coerce') + test_times = ['2015-03-08 01:00-05:00', 'NaT', + '2015-03-08 03:00-04:00'] + dti = DatetimeIndex(test_times) + expected = dti.tz_localize('UTC').tz_convert('US/Eastern') + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_ambiguous_infer(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + # With no repeated hours, we cannot infer the transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, + freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError): + dr.tz_localize(tz) + + # With repeated hours, we can infer the transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, + freq=pd.offsets.Hour(), tz=tz) + times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', + '11/06/2011 02:00', '11/06/2011 03:00'] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous='infer') + tm.assert_index_equal(dr, localized) + with tm.assert_produces_warning(FutureWarning): + localized_old = di.tz_localize(tz, infer_dst=True) + tm.assert_index_equal(dr, localized_old) + tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, + ambiguous='infer')) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, + freq=pd.offsets.Hour()) + localized = dr.tz_localize(tz) + localized_infer = dr.tz_localize(tz, ambiguous='infer') + tm.assert_index_equal(localized, localized_infer) + with tm.assert_produces_warning(FutureWarning): + localized_infer_old = dr.tz_localize(tz, infer_dst=True) + tm.assert_index_equal(localized, localized_infer_old) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_ambiguous_times(self, tz): + # March 13, 2011, spring forward, skip from 2 AM to 3 AM + dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, + freq=pd.offsets.Hour()) + with pytest.raises(pytz.NonExistentTimeError): + dr.tz_localize(tz) + + # after dst transition, it works + dr = date_range(datetime(2011, 3, 13, 3, 30), periods=3, + freq=pd.offsets.Hour(), tz=tz) + + # November 6, 2011, fall back, repeat 2 AM hour + dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, + freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError): + dr.tz_localize(tz) + + # UTC is OK + dr = date_range(datetime(2011, 3, 13), periods=48, + freq=pd.offsets.Minute(30), tz=pytz.utc) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_tz_localize_pass_dates_to_utc(self, tzstr): + strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] + + idx = DatetimeIndex(strdates) + conv = idx.tz_localize(tzstr) + + fromdates = DatetimeIndex(strdates, tz=tzstr) + + assert conv.tz == fromdates.tz + tm.assert_numpy_array_equal(conv.values, fromdates.values) + + @pytest.mark.parametrize('prefix', ['', 'dateutil/']) + def test_dti_tz_localize(self, prefix): + tzstr = prefix + 'US/Eastern' + dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256', + freq='L') + dti2 = dti.tz_localize(tzstr) + + dti_utc = DatetimeIndex(start='1/1/2005 05:00', + end='1/1/2005 5:00:30.256', freq='L', tz='utc') + + tm.assert_numpy_array_equal(dti2.values, dti_utc.values) + + dti3 = dti2.tz_convert(prefix + 'US/Pacific') + tm.assert_numpy_array_equal(dti3.values, dti_utc.values) + + dti = DatetimeIndex(start='11/6/2011 1:59', end='11/6/2011 2:00', + freq='L') + with pytest.raises(pytz.AmbiguousTimeError): + dti.tz_localize(tzstr) + + dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00', + freq='L') + with pytest.raises(pytz.NonExistentTimeError): + dti.tz_localize(tzstr) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern', + pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_utc_conversion(self, tz): + # Localizing to time zone should: + # 1) check for DST ambiguities + # 2) convert to UTC + + rng = date_range('3/10/2012', '3/11/2012', freq='30T') + + converted = rng.tz_localize(tz) + expected_naive = rng + pd.offsets.Hour(5) + tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) + + # DST ambiguity, this should fail + rng = date_range('3/11/2012', '3/12/2012', freq='30T') + # Is this really how it should fail?? + with pytest.raises(pytz.NonExistentTimeError): + rng.tz_localize(tz) + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific']) + def test_dti_tz_localize_roundtrip(self, tz): + idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') + idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') + idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') + idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') + for idx in [idx1, idx2, idx3, idx4]: + localized = idx.tz_localize(tz) + expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, + tz=tz) + tm.assert_index_equal(localized, expected) + + with pytest.raises(TypeError): + localized.tz_localize(tz) + + reset = localized.tz_localize(None) + tm.assert_index_equal(reset, idx) + assert reset.tzinfo is None + + def test_dti_tz_localize_naive(self): + rng = date_range('1/1/2011', periods=100, freq='H') + + conv = rng.tz_localize('US/Pacific') + exp = date_range('1/1/2011', periods=100, freq='H', tz='US/Pacific') + + tm.assert_index_equal(conv, exp) + + def test_dti_tz_localize_tzlocal(self): + # GH#13583 + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = int(offset.total_seconds() * 1000000000) + + dti = date_range(start='2001-01-01', end='2001-03-01') + dti2 = dti.tz_localize(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) + + dti = date_range(start='2001-01-01', end='2001-03-01', + tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_localize(None) + tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_ambiguous_nat(self, tz): + times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', + '11/06/2011 02:00', '11/06/2011 03:00'] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous='NaT') + + times = ['11/06/2011 00:00', np.NaN, np.NaN, '11/06/2011 02:00', + '11/06/2011 03:00'] + di_test = DatetimeIndex(times, tz='US/Eastern') + + # left dtype is datetime64[ns, US/Eastern] + # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] + tm.assert_numpy_array_equal(di_test.values, localized.values) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_tz_localize_ambiguous_flags(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + + # Pass in flags to determine right dst transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, + freq=pd.offsets.Hour(), tz=tz) + times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', + '11/06/2011 02:00', '11/06/2011 03:00'] + + # Test tz_localize + di = DatetimeIndex(times) + is_dst = [1, 1, 0, 0, 0] + localized = di.tz_localize(tz, ambiguous=is_dst) + tm.assert_index_equal(dr, localized) + tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, + ambiguous=is_dst)) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) + tm.assert_index_equal(dr, localized) + + localized = di.tz_localize(tz, + ambiguous=np.array(is_dst).astype('bool')) + tm.assert_index_equal(dr, localized) + + # Test constructor + localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) + tm.assert_index_equal(dr, localized) + + # Test duplicate times where infer_dst fails + times += times + di = DatetimeIndex(times) + + # When the sizes are incompatible, make sure error is raised + with pytest.raises(Exception): + di.tz_localize(tz, ambiguous=is_dst) + + # When sizes are compatible and there are repeats ('infer' won't work) + is_dst = np.hstack((is_dst, is_dst)) + localized = di.tz_localize(tz, ambiguous=is_dst) + dr = dr.append(dr) + tm.assert_index_equal(dr, localized) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, + freq=pd.offsets.Hour()) + is_dst = np.array([1] * 10) + localized = dr.tz_localize(tz) + localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) + tm.assert_index_equal(localized, localized_is_dst) + + # TODO: belongs outside tz_localize tests? + @pytest.mark.parametrize('tz', ['Europe/London', 'dateutil/Europe/London']) + def test_dti_construction_ambiguous_endpoint(self, tz): + # construction with an ambiguous end-point + # GH#11626 + + # FIXME: This next block fails to raise; it was taken from an older + # version of this test that had an indention mistake that caused it + # to not get executed. + # with pytest.raises(pytz.AmbiguousTimeError): + # date_range("2013-10-26 23:00", "2013-10-27 01:00", + # tz="Europe/London", freq="H") + + times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H", + tz=tz, ambiguous='infer') + assert times[0] == Timestamp('2013-10-26 23:00', tz=tz, freq="H") + + if str(tz).startswith('dateutil'): + if LooseVersion(dateutil.__version__) < LooseVersion('2.6.0'): + # see GH#14621 + assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', + tz=tz, freq="H") + elif LooseVersion(dateutil.__version__) > LooseVersion('2.6.0'): + # fixed ambiguous behavior + assert times[-1] == Timestamp('2013-10-27 01:00:00+0100', + tz=tz, freq="H") + else: + assert times[-1] == Timestamp('2013-10-27 01:00:00+0000', + tz=tz, freq="H") + + def test_dti_tz_localize_bdate_range(self): + dr = pd.bdate_range('1/1/2009', '1/1/2010') + dr_utc = pd.bdate_range('1/1/2009', '1/1/2010', tz=pytz.utc) + localized = dr.tz_localize(pytz.utc) + tm.assert_index_equal(dr_utc, localized) + + # ------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize_tz(self): + rng = date_range('1/1/2000 9:30', periods=10, freq='D', + tz='US/Eastern') + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D', + tz='US/Eastern') + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz='UTC') + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D', tz='UTC') + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz=tzlocal()) + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D', tz=tzlocal()) + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + @td.skip_if_windows + @pytest.mark.parametrize('timezone', ['US/Pacific', 'US/Eastern', 'UTC', + 'Asia/Kolkata', 'Asia/Shanghai', + 'Australia/Canberra']) + def test_normalize_tz_local(self, timezone): + # GH#13459 + with tm.set_timezone(timezone): + rng = date_range('1/1/2000 9:30', periods=10, freq='D', + tz=tzlocal()) + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D', + tz=tzlocal()) + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + # ------------------------------------------------------------ + # DatetimeIndex.__new__ + + @pytest.mark.parametrize('prefix', ['', 'dateutil/']) + def test_dti_constructor_static_tzinfo(self, prefix): + # it works! + index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + 'EST') + index.hour + index[0] + + def test_dti_constructor_with_fixed_tz(self): + off = FixedOffset(420, '+07:00') + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + rng2 = date_range(start, periods=len(rng), tz=off) + tm.assert_index_equal(rng, rng2) + + rng3 = date_range('3/11/2012 05:00:00+07:00', + '6/11/2012 05:00:00+07:00') + assert (rng.values == rng3.values).all() + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_convert_datetime_list(self, tzstr): + dr = date_range('2012-06-02', periods=10, + tz=tzstr, name='foo') + dr2 = DatetimeIndex(list(dr), name='foo') + tm.assert_index_equal(dr, dr2) + assert dr.tz == dr2.tz + assert dr2.name == 'foo' + + def test_dti_construction_univalent(self): + rng = date_range('03/12/2012 00:00', periods=10, freq='W-FRI', + tz='US/Eastern') + rng2 = DatetimeIndex(data=rng, tz='US/Eastern') + tm.assert_index_equal(rng, rng2) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_from_tzaware_datetime(self, tz): + d = [datetime(2012, 8, 19, tzinfo=tz)] + + index = DatetimeIndex(d) + assert timezones.tz_compare(index.tz, tz) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_tz_constructors(self, tzstr): + """ Test different DatetimeIndex constructions with timezone + Follow-up of GH#4229 + """ + + arr = ['11/10/2005 08:00:00', '11/10/2005 09:00:00'] + + idx1 = to_datetime(arr).tz_localize(tzstr) + idx2 = DatetimeIndex(start="2005-11-10 08:00:00", freq='H', periods=2, + tz=tzstr) + idx3 = DatetimeIndex(arr, tz=tzstr) + idx4 = DatetimeIndex(np.array(arr), tz=tzstr) + + for other in [idx2, idx3, idx4]: + tm.assert_index_equal(idx1, other) + + # ------------------------------------------------------------- + # Unsorted + + @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) + def test_join_utc_convert(self, how): + rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') + + left = rng.tz_convert('US/Eastern') + right = rng.tz_convert('Europe/Berlin') + + result = left.join(left[:-5], how=how) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz + + result = left.join(right[:-5], how=how) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == 'UTC' + + def test_dti_drop_dont_lose_tz(self): + # GH#2621 + ind = date_range("2012-12-01", periods=10, tz="utc") + ind = ind.drop(ind[-1]) + + assert ind.tz is not None + + def test_date_range_localize(self): + rng = date_range('3/11/2012 03:00', periods=15, freq='H', + tz='US/Eastern') + rng2 = DatetimeIndex(['3/11/2012 03:00', '3/11/2012 04:00'], + tz='US/Eastern') + rng3 = date_range('3/11/2012 03:00', periods=15, freq='H') + rng3 = rng3.tz_localize('US/Eastern') + + tm.assert_index_equal(rng, rng3) + + # DST transition time + val = rng[0] + exp = Timestamp('3/11/2012 03:00', tz='US/Eastern') + + assert val.hour == 3 + assert exp.hour == 3 + assert val == exp # same UTC value + tm.assert_index_equal(rng[:2], rng2) + + # Right before the DST transition + rng = date_range('3/11/2012 00:00', periods=2, freq='H', + tz='US/Eastern') + rng2 = DatetimeIndex(['3/11/2012 00:00', '3/11/2012 01:00'], + tz='US/Eastern') + tm.assert_index_equal(rng, rng2) + exp = Timestamp('3/11/2012 00:00', tz='US/Eastern') + assert exp.hour == 0 + assert rng[0] == exp + exp = Timestamp('3/11/2012 01:00', tz='US/Eastern') + assert exp.hour == 1 + assert rng[1] == exp + + rng = date_range('3/11/2012 00:00', periods=10, freq='H', + tz='US/Eastern') + assert rng[2].hour == 3 + + def test_timestamp_equality_different_timezones(self): + utc_range = date_range('1/1/2000', periods=20, tz='UTC') + eastern_range = utc_range.tz_convert('US/Eastern') + berlin_range = utc_range.tz_convert('Europe/Berlin') + + for a, b, c in zip(utc_range, eastern_range, berlin_range): + assert a == b + assert b == c + assert a == c + + assert (utc_range == eastern_range).all() + assert (utc_range == berlin_range).all() + assert (berlin_range == eastern_range).all() + + def test_dti_intersection(self): + rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') + + left = rng[10:90][::-1] + right = rng[20:80][::-1] + + assert left.tz == rng.tz + result = left.intersection(right) + assert result.tz == left.tz + + def test_dti_equals_with_tz(self): + left = date_range('1/1/2011', periods=100, freq='H', tz='utc') + right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern') + + assert not left.equals(right) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_tz_nat(self, tzstr): + idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT]) + + assert isna(idx[1]) + assert idx[0].tzinfo is not None + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_astype_asobject_tzinfos(self, tzstr): + # GH#1345 + + # dates around a dst transition + rng = date_range('2/13/2010', '5/6/2010', tz=tzstr) + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_with_timezone_repr(self, tzstr): + rng = date_range('4/13/2010', '5/6/2010') + + rng_eastern = rng.tz_localize(tzstr) + + rng_repr = repr(rng_eastern) + assert '2010-04-13 00:00:00' in rng_repr + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_dti_take_dont_lose_meta(self, tzstr): + rng = date_range('1/1/2000', periods=20, tz=tzstr) + + result = rng.take(lrange(5)) + assert result.tz == rng.tz + assert result.freq == rng.freq + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_utc_box_timestamp_and_localize(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') + rng_eastern = rng.tz_convert(tzstr) + + expected = rng[-1].astimezone(tz) + + stamp = rng_eastern[-1] + assert stamp == expected + assert stamp.tzinfo == expected.tzinfo + + # right tzinfo + rng = date_range('3/13/2012', '3/14/2012', freq='H', tz='utc') + rng_eastern = rng.tz_convert(tzstr) + # test not valid for dateutil timezones. + # assert 'EDT' in repr(rng_eastern[0].tzinfo) + assert ('EDT' in repr(rng_eastern[0].tzinfo) or + 'tzfile' in repr(rng_eastern[0].tzinfo)) + + def test_dti_to_pydatetime(self): + dt = dateutil.parser.parse('2012-06-13T01:39:00Z') + dt = dt.replace(tzinfo=tzlocal()) + + arr = np.array([dt], dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + rng = date_range('2012-11-03 03:00', '2012-11-05 03:00', tz=tzlocal()) + arr = rng.to_pydatetime() + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_dti_to_pydatetime_fizedtz(self): + dates = np.array([datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off)]) + dti = DatetimeIndex(dates) + + result = dti.to_pydatetime() + tm.assert_numpy_array_equal(dates, result) + + result = dti._mpl_repr() + tm.assert_numpy_array_equal(dates, result) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Central'), + gettz('US/Central')]) + def test_with_tz(self, tz): + # just want it to work + start = datetime(2011, 3, 12, tzinfo=pytz.utc) + dr = bdate_range(start, periods=50, freq=pd.offsets.Hour()) + assert dr.tz is pytz.utc + + # DateRange with naive datetimes + dr = bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc) + dr = bdate_range('1/1/2005', '1/1/2009', tz=tz) + + # normalized + central = dr.tz_convert(tz) + assert central.tz is tz + naive = central[0].to_pydatetime().replace(tzinfo=None) + comp = tslib._localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # compare vs a localized tz + naive = dr[0].to_pydatetime().replace(tzinfo=None) + comp = tslib._localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # datetimes with tzinfo set + dr = bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), + datetime(2009, 1, 1, tzinfo=pytz.utc)) + with pytest.raises(Exception): + bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), '1/1/2009', + tz=tz) + + @pytest.mark.parametrize('prefix', ['', 'dateutil/']) + def test_field_access_localize(self, prefix): + strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] + rng = DatetimeIndex(strdates, tz=prefix + 'US/Eastern') + assert (rng.hour == 0).all() + + # a more unusual time zone, #1946 + dr = date_range('2011-10-02 00:00', freq='h', periods=10, + tz=prefix + 'America/Atikokan') + + expected = Index(np.arange(10, dtype=np.int64)) + tm.assert_index_equal(dr.hour, expected) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern')]) + def test_dti_convert_tz_aware_datetime_datetime(self, tz): + # GH#1581 + dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), + datetime(2000, 1, 3)] + + dates_aware = [tslib._localize_pydatetime(x, tz) for x in dates] + result = DatetimeIndex(dates_aware) + assert timezones.tz_compare(result.tz, tz) + + converted = to_datetime(dates_aware, utc=True) + ex_vals = np.array([Timestamp(x).value for x in dates_aware]) + tm.assert_numpy_array_equal(converted.asi8, ex_vals) + assert converted.tz is pytz.utc + + def test_dti_union_aware(self): + # non-overlapping + rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", + tz="US/Central") + + rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", + tz="US/Eastern") + + result = rng.union(rng2) + assert result.tz.zone == 'UTC' + + @pytest.mark.parametrize('tz', [None, 'UTC', "US/Central", + dateutil.tz.tzoffset(None, -28800)]) + @pytest.mark.usefixtures("datetime_tz_utc") + @pytest.mark.skipif(not PY3, reason="datetime.timezone not in PY2") + def test_iteration_preserves_nanoseconds(self, tz): + # GH 19603 + index = DatetimeIndex(["2018-02-08 15:00:00.168456358", + "2018-02-08 15:00:00.168456359"], tz=tz) + for i, ts in enumerate(index): + assert ts == index[i] + + +class TestDateRange(object): + """Tests for date_range with timezones""" + def test_hongkong_tz_convert(self): + # GH#1673 smoke test + dr = date_range('2012-01-01', '2012-01-10', freq='D', tz='Hongkong') + + # it works! + dr.hour + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_date_range_span_dst_transition(self, tzstr): + # GH#1778 + + # Standard -> Daylight Savings Time + dr = date_range('03/06/2012 00:00', periods=200, freq='W-FRI', + tz='US/Eastern') + + assert (dr.hour == 0).all() + + dr = date_range('2012-11-02', periods=10, tz=tzstr) + assert (dr.hour == 0).all() + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_date_range_timezone_str_argument(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + result = date_range('1/1/2000', periods=10, tz=tzstr) + expected = date_range('1/1/2000', periods=10, tz=tz) + + tm.assert_index_equal(result, expected) + + def test_date_range_with_fixedoffset_noname(self): + off = fixed_off_no_name + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + idx = Index([start, end]) + assert off == idx.tz + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_date_range_with_tz(self, tzstr): + stamp = Timestamp('3/11/2012 05:00', tz=tzstr) + assert stamp.hour == 5 + + rng = date_range('3/11/2012 04:00', periods=10, freq='H', + tz=tzstr) + + assert stamp == rng[1] + + +class TestToDatetime(object): + """Tests for the to_datetime constructor with timezones""" + def test_to_datetime_utc(self): + arr = np.array([dateutil.parser.parse('2012-06-13T01:39:00Z')], + dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_to_datetime_fixed_offset(self): + dates = [datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off)] + result = to_datetime(dates) + assert result.tz == fixed_off diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 1b67ffce63b10..0d42b6e9692fe 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1,30 +1,36 @@ """ test to_datetime """ import sys +import pytz import pytest import locale import calendar +import dateutil import numpy as np +from dateutil.parser import parse from datetime import datetime, date, time from distutils.version import LooseVersion import pandas as pd -from pandas import tslib -from pandas.tseries import tools -from pandas.tseries.tools import normalize_date -from pandas.compat import lmap -from pandas.compat.numpy import np_array_datetime64_compat -from pandas.types.common import is_datetime64_ns_dtype +from pandas.conftest import is_dateutil_le_261, is_dateutil_gt_261 +from pandas._libs import tslib +from pandas._libs.tslibs import parsing +from pandas.core.tools import datetimes as tools + +from pandas.errors import OutOfBoundsDatetime +from pandas.compat import lmap, PY3 +from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm -from pandas.util.testing import assert_series_equal, _skip_if_has_locale -from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, - Index, DatetimeIndex, NaT, date_range, bdate_range, - compat, lib) +import pandas.util._test_decorators as td +from pandas.util.testing import assert_series_equal +from pandas import (isna, to_datetime, Timestamp, Series, DataFrame, + Index, DatetimeIndex, NaT, date_range, compat) -class TimeConversionFormats(tm.TestCase): +class TestTimeConversionFormats(object): - def test_to_datetime_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format(self, cache): values = ['1/1/2000', '1/2/2000', '1/3/2000'] results1 = [Timestamp('20000101'), Timestamp('20000201'), @@ -39,24 +45,25 @@ def test_to_datetime_format(self): (values[2], (results1[2], results2[2]))]: for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): - result = to_datetime(vals, format=fmt) + result = to_datetime(vals, format=fmt, cache=cache) expected = expecteds[i] if isinstance(expected, Series): assert_series_equal(result, Series(expected)) elif isinstance(expected, Timestamp): - self.assertEqual(result, expected) + assert result == expected else: tm.assert_index_equal(result, expected) - def test_to_datetime_format_YYYYMMDD(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_YYYYMMDD(self, cache): s = Series([19801222, 19801222] + [19810105] * 5) expected = Series([Timestamp(x) for x in s.apply(str)]) - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) - result = to_datetime(s.apply(str), format='%Y%m%d') + result = to_datetime(s.apply(str), format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # with NaT @@ -65,44 +72,48 @@ def test_to_datetime_format_YYYYMMDD(self): expected[2] = np.nan s[2] = np.nan - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # string with NaT s = s.apply(str) s[2] = 'nat' - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # coercion # GH 7930 s = Series([20121231, 20141231, 99991231]) - result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') + result = pd.to_datetime(s, format='%Y%m%d', errors='ignore', + cache=cache) expected = Series([datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)], dtype=object) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) - result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') + result = pd.to_datetime(s, format='%Y%m%d', errors='coerce', + cache=cache) expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') assert_series_equal(result, expected) - # GH 10178 - def test_to_datetime_format_integer(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_integer(self, cache): + # GH 10178 s = Series([2000, 2001, 2002]) expected = Series([Timestamp(x) for x in s.apply(str)]) - result = to_datetime(s, format='%Y') + result = to_datetime(s, format='%Y', cache=cache) assert_series_equal(result, expected) s = Series([200001, 200105, 200206]) expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) ]) - result = to_datetime(s, format='%Y%m') + result = to_datetime(s, format='%Y%m', cache=cache) assert_series_equal(result, expected) - def test_to_datetime_format_microsecond(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_microsecond(self, cache): # these are locale dependent lang, _ = locale.getlocale() @@ -110,11 +121,12 @@ def test_to_datetime_format_microsecond(self): val = '01-{}-2011 00:00:01.978'.format(month_abbr) format = '%d-%b-%Y %H:%M:%S.%f' - result = to_datetime(val, format=format) + result = to_datetime(val, format=format, cache=cache) exp = datetime.strptime(val, format) - self.assertEqual(result, exp) + assert result == exp - def test_to_datetime_format_time(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_time(self, cache): data = [ ['01/10/2010 15:20', '%m/%d/%Y %H:%M', Timestamp('2010-01-10 15:20')], @@ -130,12 +142,12 @@ def test_to_datetime_format_time(self): # Timestamp('2010-01-10 09:12:56')] ] for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) + assert to_datetime(s, format=format, cache=cache) == dt - def test_to_datetime_with_non_exact(self): + @td.skip_if_has_locale + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_non_exact(self, cache): # GH 10834 - tm._skip_if_has_locale() - # 8904 # exact kw if sys.version_info < (2, 7): @@ -143,12 +155,13 @@ def test_to_datetime_with_non_exact(self): s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', '19MAY11 00:00:00Z']) - result = to_datetime(s, format='%d%b%y', exact=False) + result = to_datetime(s, format='%d%b%y', exact=False, cache=cache) expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), - format='%d%b%y') + format='%d%b%y', cache=cache) assert_series_equal(result, expected) - def test_parse_nanoseconds_with_formula(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parse_nanoseconds_with_formula(self, cache): # GH8989 # trunctaing the nanoseconds when a format was provided @@ -157,55 +170,136 @@ def test_parse_nanoseconds_with_formula(self): "2012-01-01 09:00:00.001", "2012-01-01 09:00:00.001000", "2012-01-01 09:00:00.001000000", ]: - expected = pd.to_datetime(v) - result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") - self.assertEqual(result, expected) + expected = pd.to_datetime(v, cache=cache) + result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f", + cache=cache) + assert result == expected - def test_to_datetime_format_weeks(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_weeks(self, cache): data = [ ['2009324', '%Y%W%w', Timestamp('2009-08-13')], ['2013020', '%Y%U%w', Timestamp('2013-01-13')] ] for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - -class TestToDatetime(tm.TestCase): - - def test_to_datetime_dt64s(self): + assert to_datetime(s, format=format, cache=cache) == dt + + +class TestToDatetime(object): + def test_to_datetime_pydatetime(self): + actual = pd.to_datetime(datetime(2008, 1, 15)) + assert actual == datetime(2008, 1, 15) + + def test_to_datetime_YYYYMMDD(self): + actual = pd.to_datetime('20080115') + assert actual == datetime(2008, 1, 15) + + def test_to_datetime_unparseable_ignore(self): + # unparseable + s = 'Month 1, 1999' + assert pd.to_datetime(s, errors='ignore') == s + + @td.skip_if_windows # `tm.set_timezone` does not work in windows + def test_to_datetime_now(self): + # See GH#18666 + with tm.set_timezone('US/Eastern'): + npnow = np.datetime64('now').astype('datetime64[ns]') + pdnow = pd.to_datetime('now') + pdnow2 = pd.to_datetime(['now'])[0] + + # These should all be equal with infinite perf; this gives + # a generous margin of 10 seconds + assert abs(pdnow.value - npnow.astype(np.int64)) < 1e10 + assert abs(pdnow2.value - npnow.astype(np.int64)) < 1e10 + + assert pdnow.tzinfo is None + assert pdnow2.tzinfo is None + + @td.skip_if_windows # `tm.set_timezone` does not work in windows + def test_to_datetime_today(self): + # See GH#18666 + # Test with one timezone far ahead of UTC and another far behind, so + # one of these will _almost_ alawys be in a different day from UTC. + # Unfortunately this test between 12 and 1 AM Samoa time + # this both of these timezones _and_ UTC will all be in the same day, + # so this test will not detect the regression introduced in #18666. + with tm.set_timezone('Pacific/Auckland'): # 12-13 hours ahead of UTC + nptoday = np.datetime64('today')\ + .astype('datetime64[ns]').astype(np.int64) + pdtoday = pd.to_datetime('today') + pdtoday2 = pd.to_datetime(['today'])[0] + + tstoday = pd.Timestamp('today') + tstoday2 = pd.Timestamp.today() + + # These should all be equal with infinite perf; this gives + # a generous margin of 10 seconds + assert abs(pdtoday.normalize().value - nptoday) < 1e10 + assert abs(pdtoday2.normalize().value - nptoday) < 1e10 + assert abs(pdtoday.value - tstoday.value) < 1e10 + assert abs(pdtoday.value - tstoday2.value) < 1e10 + + assert pdtoday.tzinfo is None + assert pdtoday2.tzinfo is None + + with tm.set_timezone('US/Samoa'): # 11 hours behind UTC + nptoday = np.datetime64('today')\ + .astype('datetime64[ns]').astype(np.int64) + pdtoday = pd.to_datetime('today') + pdtoday2 = pd.to_datetime(['today'])[0] + + # These should all be equal with infinite perf; this gives + # a generous margin of 10 seconds + assert abs(pdtoday.normalize().value - nptoday) < 1e10 + assert abs(pdtoday2.normalize().value - nptoday) < 1e10 + + assert pdtoday.tzinfo is None + assert pdtoday2.tzinfo is None + + def test_to_datetime_today_now_unicode_bytes(self): + to_datetime([u'now']) + to_datetime([u'today']) + if not PY3: + to_datetime(['now']) + to_datetime(['today']) + + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_dt64s(self, cache): in_bound_dts = [ np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] for dt in in_bound_dts: - self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) + assert pd.to_datetime(dt, cache=cache) == Timestamp(dt) oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] for dt in oob_dts: - self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') - self.assertRaises(ValueError, Timestamp, dt) - self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) + pytest.raises(ValueError, pd.to_datetime, dt, errors='raise') + pytest.raises(ValueError, Timestamp, dt) + assert pd.to_datetime(dt, errors='coerce', cache=cache) is NaT - def test_to_datetime_array_of_dt64s(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_array_of_dt64s(self, cache): dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] # Assuming all datetimes are in bounds, to_datetime() returns # an array that is equal to Timestamp() parsing - self.assert_numpy_array_equal( - pd.to_datetime(dts, box=False), + tm.assert_numpy_array_equal( + pd.to_datetime(dts, box=False, cache=cache), np.array([Timestamp(x).asm8 for x in dts]) ) # A list of datetimes where the last one is out of bounds dts_with_oob = dts + [np.datetime64('9999-01-01')] - self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, - errors='raise') + pytest.raises(ValueError, pd.to_datetime, dts_with_oob, + errors='raise') - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce'), + tm.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='coerce', + cache=cache), np.array( [ Timestamp(dts_with_oob[0]).asm8, @@ -219,21 +313,23 @@ def test_to_datetime_array_of_dt64s(self): # With errors='ignore', out of bounds datetime64s # are converted to their .item(), which depending on the version of # numpy is either a python datetime.datetime or datetime.date - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore'), + tm.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='ignore', + cache=cache), np.array( [dt.item() for dt in dts_with_oob], dtype='O' ) ) - def test_to_datetime_tz(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz(self, cache): # xref 8260 # uniform returns a DatetimeIndex arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] - result = pd.to_datetime(arr) + result = pd.to_datetime(arr, cache=cache) expected = DatetimeIndex( ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') tm.assert_index_equal(result, expected) @@ -241,37 +337,82 @@ def test_to_datetime_tz(self): # mixed tzs will raise arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) - - def test_to_datetime_tz_pytz(self): - - # xref 8260 - tm._skip_if_no_pytz() - import pytz + pytest.raises(ValueError, lambda: pd.to_datetime(arr, cache=cache)) + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz_pytz(self, cache): + # see gh-8260 us_eastern = pytz.timezone('US/Eastern') arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, hour=3, minute=0)), us_eastern.localize(datetime(year=2000, month=6, day=1, hour=3, minute=0))], dtype=object) - result = pd.to_datetime(arr, utc=True) + result = pd.to_datetime(arr, utc=True, cache=cache) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) tm.assert_index_equal(result, expected) - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) - - def test_to_datetime_tz_psycopg2(self): + @pytest.mark.parametrize('cache', [True, False]) + @pytest.mark.parametrize("init_constructor, end_constructor, test_method", + [(Index, DatetimeIndex, tm.assert_index_equal), + (list, DatetimeIndex, tm.assert_index_equal), + (np.array, DatetimeIndex, tm.assert_index_equal), + (Series, Series, tm.assert_series_equal)]) + def test_to_datetime_utc_true(self, + cache, + init_constructor, + end_constructor, + test_method): + # See gh-11934 & gh-6415 + data = ['20100102 121314', '20100102 121315'] + expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), + pd.Timestamp('2010-01-02 12:13:15', tz='utc')] + + result = pd.to_datetime(init_constructor(data), + format='%Y%m%d %H%M%S', + utc=True, + cache=cache) + expected = end_constructor(expected_data) + test_method(result, expected) + + # Test scalar case as well + for scalar, expected in zip(data, expected_data): + result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True, + cache=cache) + assert result == expected + + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_utc_true_with_series_single_value(self, cache): + # GH 15760 UTC=True with Series + ts = 1.5e18 + result = pd.to_datetime(pd.Series([ts]), utc=True, cache=cache) + expected = pd.Series([pd.Timestamp(ts, tz='utc')]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_utc_true_with_series_tzaware_string(self, cache): + ts = '2013-01-01 00:00:00-01:00' + expected_ts = '2013-01-01 01:00:00' + data = pd.Series([ts] * 3) + result = pd.to_datetime(data, utc=True, cache=cache) + expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('cache', [True, False]) + @pytest.mark.parametrize('date, dtype', + [('2013-01-01 01:00:00', 'datetime64[ns]'), + ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')]) + def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, + dtype): + expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')]) + result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True, + cache=cache) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz_psycopg2(self, cache): # xref 8260 try: @@ -286,7 +427,7 @@ def test_to_datetime_tz_psycopg2(self): datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], dtype=object) - result = pd.to_datetime(arr, errors='coerce', utc=True) + result = pd.to_datetime(arr, errors='coerce', utc=True, cache=cache) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) @@ -296,112 +437,171 @@ def test_to_datetime_tz_psycopg2(self): i = pd.DatetimeIndex([ '2000-01-01 08:00:00+00:00' ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertTrue(is_datetime64_ns_dtype(i)) + assert is_datetime64_ns_dtype(i) # tz coerceion - result = pd.to_datetime(i, errors='coerce') + result = pd.to_datetime(i, errors='coerce', cache=cache) tm.assert_index_equal(result, i) - result = pd.to_datetime(i, errors='coerce', utc=True) + result = pd.to_datetime(i, errors='coerce', utc=True, cache=cache) expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], dtype='datetime64[ns, UTC]') tm.assert_index_equal(result, expected) - def test_datetime_bool(self): + @pytest.mark.parametrize( + 'cache', + [pytest.param(True, + marks=pytest.mark.skipif(True, reason="GH 18111")), + False]) + def test_datetime_bool(self, cache): # GH13176 - with self.assertRaises(TypeError): + with pytest.raises(TypeError): to_datetime(False) - self.assertTrue(to_datetime(False, errors="coerce") is NaT) - self.assertEqual(to_datetime(False, errors="ignore"), False) - with self.assertRaises(TypeError): + assert to_datetime(False, errors="coerce", cache=cache) is NaT + assert to_datetime(False, errors="ignore", cache=cache) is False + with pytest.raises(TypeError): to_datetime(True) - self.assertTrue(to_datetime(True, errors="coerce") is NaT) - self.assertEqual(to_datetime(True, errors="ignore"), True) - with self.assertRaises(TypeError): - to_datetime([False, datetime.today()]) - with self.assertRaises(TypeError): - to_datetime(['20130101', True]) + assert to_datetime(True, errors="coerce", cache=cache) is NaT + assert to_datetime(True, errors="ignore", cache=cache) is True + with pytest.raises(TypeError): + to_datetime([False, datetime.today()], cache=cache) + with pytest.raises(TypeError): + to_datetime(['20130101', True], cache=cache) tm.assert_index_equal(to_datetime([0, False, NaT, 0.0], - errors="coerce"), - DatetimeIndex([to_datetime(0), NaT, - NaT, to_datetime(0)])) + errors="coerce", cache=cache), + DatetimeIndex([to_datetime(0, cache=cache), + NaT, + NaT, + to_datetime(0, cache=cache)])) def test_datetime_invalid_datatype(self): # GH13176 - with self.assertRaises(TypeError): + with pytest.raises(TypeError): pd.to_datetime(bool) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): pd.to_datetime(pd.to_datetime) - -class ToDatetimeUnit(tm.TestCase): - - def test_unit(self): + @pytest.mark.parametrize("utc", [True, None]) + @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None]) + @pytest.mark.parametrize("box", [True, False]) + @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index]) + def test_to_datetime_cache(self, utc, format, box, constructor): + date = '20130101 00:00:00' + test_dates = [date] * 10**5 + data = constructor(test_dates) + result = pd.to_datetime(data, utc=utc, format=format, box=box, + cache=True) + expected = pd.to_datetime(data, utc=utc, format=format, box=box, + cache=False) + if box: + tm.assert_index_equal(result, expected) + else: + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("utc", [True, None]) + @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None]) + def test_to_datetime_cache_series(self, utc, format): + date = '20130101 00:00:00' + test_dates = [date] * 10**5 + data = pd.Series(test_dates) + result = pd.to_datetime(data, utc=utc, format=format, cache=True) + expected = pd.to_datetime(data, utc=utc, format=format, cache=False) + tm.assert_series_equal(result, expected) + + def test_to_datetime_cache_scalar(self): + date = '20130101 00:00:00' + result = pd.to_datetime(date, cache=True) + expected = pd.Timestamp('20130101 00:00:00') + assert result == expected + + @pytest.mark.parametrize('date, format', + [('2017-20', '%Y-%W'), + ('20 Sunday', '%W %A'), + ('20 Sun', '%W %a'), + ('2017-21', '%Y-%U'), + ('20 Sunday', '%U %A'), + ('20 Sun', '%U %a')]) + def test_week_without_day_and_calendar_year(self, date, format): + # GH16774 + + msg = "Cannot use '%W' or '%U' without day and year" + with tm.assert_raises_regex(ValueError, msg): + pd.to_datetime(date, format=format) + + +class TestToDatetimeUnit(object): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit(self, cache): # GH 11758 # test proper behavior with erros - with self.assertRaises(ValueError): - to_datetime([1], unit='D', format='%Y%m%d') + with pytest.raises(ValueError): + to_datetime([1], unit='D', format='%Y%m%d', cache=cache) values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan, 'NaT', ''] - result = to_datetime(values, unit='D', errors='ignore') + result = to_datetime(values, unit='D', errors='ignore', cache=cache) expected = Index([11111111, Timestamp('1970-01-02'), Timestamp('1970-01-02'), NaT, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) - result = to_datetime(values, unit='D', errors='coerce') + result = to_datetime(values, unit='D', errors='coerce', cache=cache) expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, unit='D', errors='raise') + with pytest.raises(tslib.OutOfBoundsDatetime): + to_datetime(values, unit='D', errors='raise', cache=cache) values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT'] - result = to_datetime(values, errors='ignore', unit='s') + result = to_datetime(values, errors='ignore', unit='s', cache=cache) expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) - result = to_datetime(values, errors='coerce', unit='s') + result = to_datetime(values, errors='coerce', unit='s', cache=cache) expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, errors='raise', unit='s') + with pytest.raises(tslib.OutOfBoundsDatetime): + to_datetime(values, errors='raise', unit='s', cache=cache) # if we have a string, then we raise a ValueError # and NOT an OutOfBoundsDatetime for val in ['foo', Timestamp('20130101')]: try: - to_datetime(val, errors='raise', unit='s') + to_datetime(val, errors='raise', unit='s', cache=cache) except tslib.OutOfBoundsDatetime: raise AssertionError("incorrect exception raised") except ValueError: pass - def test_unit_consistency(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_consistency(self, cache): # consistency of conversions expected = Timestamp('1970-05-09 14:25:11') - result = pd.to_datetime(11111111, unit='s', errors='raise') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) + result = pd.to_datetime(11111111, unit='s', errors='raise', + cache=cache) + assert result == expected + assert isinstance(result, Timestamp) - result = pd.to_datetime(11111111, unit='s', errors='coerce') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) + result = pd.to_datetime(11111111, unit='s', errors='coerce', + cache=cache) + assert result == expected + assert isinstance(result, Timestamp) - result = pd.to_datetime(11111111, unit='s', errors='ignore') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) + result = pd.to_datetime(11111111, unit='s', errors='ignore', + cache=cache) + assert result == expected + assert isinstance(result, Timestamp) - def test_unit_with_numeric(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_with_numeric(self, cache): # GH 13180 # coercions from floats/ints are ok @@ -410,10 +610,10 @@ def test_unit_with_numeric(self): arr1 = [1.434692e+18, 1.432766e+18] arr2 = np.array(arr1).astype('int64') for errors in ['ignore', 'raise', 'coerce']: - result = pd.to_datetime(arr1, errors=errors) + result = pd.to_datetime(arr1, errors=errors, cache=cache) tm.assert_index_equal(result, expected) - result = pd.to_datetime(arr2, errors=errors) + result = pd.to_datetime(arr2, errors=errors, cache=cache) tm.assert_index_equal(result, expected) # but we want to make sure that we are coercing @@ -422,7 +622,7 @@ def test_unit_with_numeric(self): '2015-06-19 05:33:20', '2015-05-27 22:33:20']) arr = ['foo', 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) expected = DatetimeIndex(['2015-06-19 05:33:20', @@ -430,31 +630,33 @@ def test_unit_with_numeric(self): 'NaT', 'NaT']) arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) - def test_unit_mixed(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_mixed(self, cache): # mixed integers/datetimes expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') + with pytest.raises(ValueError): + pd.to_datetime(arr, errors='raise', cache=cache) expected = DatetimeIndex(['NaT', 'NaT', '2013-01-01']) arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') + with pytest.raises(ValueError): + pd.to_datetime(arr, errors='raise', cache=cache) - def test_dataframe(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dataframe(self, cache): df = DataFrame({'year': [2015, 2016], 'month': [2, 3], @@ -468,19 +670,20 @@ def test_dataframe(self): result = to_datetime({'year': df['year'], 'month': df['month'], - 'day': df['day']}) + 'day': df['day']}, cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:0:00')]) assert_series_equal(result, expected) # dict-like - result = to_datetime(df[['year', 'month', 'day']].to_dict()) + result = to_datetime(df[['year', 'month', 'day']].to_dict(), + cache=cache) assert_series_equal(result, expected) # dict but with constructable df2 = df[['year', 'month', 'day']].to_dict() df2['month'] = 2 - result = to_datetime(df2) + result = to_datetime(df2, cache=cache) expected2 = Series([Timestamp('20150204 00:00:00'), Timestamp('20160205 00:0:00')]) assert_series_equal(result, expected2) @@ -501,7 +704,8 @@ def test_dataframe(self): ] for d in units: - result = to_datetime(df[list(d.keys())].rename(columns=d)) + result = to_datetime(df[list(d.keys())].rename(columns=d), + cache=cache) expected = Series([Timestamp('20150204 06:58:10'), Timestamp('20160305 07:59:11')]) assert_series_equal(result, expected) @@ -516,65 +720,74 @@ def test_dataframe(self): 'us': 'us', 'ns': 'ns'} - result = to_datetime(df.rename(columns=d)) + result = to_datetime(df.rename(columns=d), cache=cache) expected = Series([Timestamp('20150204 06:58:10.001002003'), Timestamp('20160305 07:59:11.001002003')]) assert_series_equal(result, expected) # coerce back to int - result = to_datetime(df.astype(str)) + result = to_datetime(df.astype(str), cache=cache) assert_series_equal(result, expected) # passing coerce df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], 'day': [4, 5]}) - with self.assertRaises(ValueError): - to_datetime(df2) - result = to_datetime(df2, errors='coerce') + + msg = ("cannot assemble the datetimes: time data .+ does not " + r"match format '%Y%m%d' \(match\)") + with tm.assert_raises_regex(ValueError, msg): + to_datetime(df2, cache=cache) + result = to_datetime(df2, errors='coerce', cache=cache) expected = Series([Timestamp('20150204 00:00:00'), NaT]) assert_series_equal(result, expected) # extra columns - with self.assertRaises(ValueError): + msg = ("extra keys have been passed to the datetime assemblage: " + r"\[foo\]") + with tm.assert_raises_regex(ValueError, msg): df2 = df.copy() df2['foo'] = 1 - to_datetime(df2) + to_datetime(df2, cache=cache) # not enough + msg = (r'to assemble mappings requires at least that \[year, month, ' + r'day\] be specified: \[.+\] is missing') for c in [['year'], ['year', 'month'], ['year', 'month', 'second'], ['month', 'day'], ['year', 'day', 'second']]: - with self.assertRaises(ValueError): - to_datetime(df[c]) + with tm.assert_raises_regex(ValueError, msg): + to_datetime(df[c], cache=cache) # duplicates + msg = 'cannot assemble with duplicate keys' df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], 'day': [4, 5]}) df2.columns = ['year', 'year', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) + with tm.assert_raises_regex(ValueError, msg): + to_datetime(df2, cache=cache) df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], 'day': [4, 5], 'hour': [4, 5]}) df2.columns = ['year', 'month', 'day', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) + with tm.assert_raises_regex(ValueError, msg): + to_datetime(df2, cache=cache) - def test_dataframe_dtypes(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dataframe_dtypes(self, cache): # #13451 df = DataFrame({'year': [2015, 2016], 'month': [2, 3], 'day': [4, 5]}) # int16 - result = to_datetime(df.astype('int16')) + result = to_datetime(df.astype('int16'), cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:00:00')]) assert_series_equal(result, expected) @@ -582,7 +795,7 @@ def test_dataframe_dtypes(self): # mixed dtypes df['month'] = df['month'].astype('int8') df['day'] = df['day'].astype('int8') - result = to_datetime(df) + result = to_datetime(df, cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:00:00')]) assert_series_equal(result, expected) @@ -591,129 +804,132 @@ def test_dataframe_dtypes(self): df = DataFrame({'year': [2000, 2001], 'month': [1.5, 1], 'day': [1, 1]}) - with self.assertRaises(ValueError): - to_datetime(df) + with pytest.raises(ValueError): + to_datetime(df, cache=cache) -class ToDatetimeMisc(tm.TestCase): +class TestToDatetimeMisc(object): + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) - def test_index_to_datetime(self): - idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) + with pytest.raises(OutOfBoundsDatetime): + to_datetime(arr) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = idx.to_datetime() - expected = DatetimeIndex(pd.to_datetime(idx.values)) - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - today = datetime.today() - idx = Index([today], dtype=object) - result = idx.to_datetime() - expected = DatetimeIndex([today]) - tm.assert_index_equal(result, expected) - - def test_to_datetime_iso8601(self): - result = to_datetime(["2012-01-01 00:00:00"]) + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_iso8601(self, cache): + result = to_datetime(["2012-01-01 00:00:00"], cache=cache) exp = Timestamp("2012-01-01 00:00:00") - self.assertEqual(result[0], exp) + assert result[0] == exp - result = to_datetime(['20121001']) # bad iso 8601 + result = to_datetime(['20121001'], cache=cache) # bad iso 8601 exp = Timestamp('2012-10-01') - self.assertEqual(result[0], exp) + assert result[0] == exp - def test_to_datetime_default(self): - rs = to_datetime('2001') + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_default(self, cache): + rs = to_datetime('2001', cache=cache) xp = datetime(2001, 1, 1) - self.assertTrue(rs, xp) + assert rs == xp # dayfirst is essentially broken # to_datetime('01-13-2012', dayfirst=True) - # self.assertRaises(ValueError, to_datetime('01-13-2012', + # pytest.raises(ValueError, to_datetime('01-13-2012', # dayfirst=True)) - def test_to_datetime_on_datetime64_series(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_on_datetime64_series(self, cache): # #2699 s = Series(date_range('1/1/2000', periods=10)) - result = to_datetime(s) - self.assertEqual(result[0], s[0]) + result = to_datetime(s, cache=cache) + assert result[0] == s[0] - def test_to_datetime_with_space_in_series(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_space_in_series(self, cache): # GH 6428 s = Series(['10/18/2006', '10/18/2008', ' ']) - tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) - result_coerce = to_datetime(s, errors='coerce') + pytest.raises(ValueError, lambda: to_datetime(s, + errors='raise', + cache=cache)) + result_coerce = to_datetime(s, errors='coerce', cache=cache) expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT]) tm.assert_series_equal(result_coerce, expected_coerce) - result_ignore = to_datetime(s, errors='ignore') + result_ignore = to_datetime(s, errors='ignore', cache=cache) tm.assert_series_equal(result_ignore, s) - def test_to_datetime_with_apply(self): + @td.skip_if_has_locale + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_apply(self, cache): # this is only locale tested with US/None locales - tm._skip_if_has_locale() - # GH 5195 # with a format and coerce a single item to_datetime fails td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) - expected = pd.to_datetime(td, format='%b %y') - result = td.apply(pd.to_datetime, format='%b %y') + expected = pd.to_datetime(td, format='%b %y', cache=cache) + result = td.apply(pd.to_datetime, format='%b %y', cache=cache) assert_series_equal(result, expected) td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) - self.assertRaises(ValueError, - lambda: pd.to_datetime(td, format='%b %y', - errors='raise')) - self.assertRaises(ValueError, - lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise')) - expected = pd.to_datetime(td, format='%b %y', errors='coerce') + pytest.raises(ValueError, + lambda: pd.to_datetime(td, format='%b %y', + errors='raise', + cache=cache)) + pytest.raises(ValueError, + lambda: td.apply(pd.to_datetime, format='%b %y', + errors='raise', cache=cache)) + expected = pd.to_datetime(td, format='%b %y', errors='coerce', + cache=cache) result = td.apply( - lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) + lambda x: pd.to_datetime(x, format='%b %y', errors='coerce', + cache=cache)) assert_series_equal(result, expected) - def test_to_datetime_types(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_types(self, cache): # empty string - result = to_datetime('') - self.assertIs(result, NaT) + result = to_datetime('', cache=cache) + assert result is NaT - result = to_datetime(['', '']) - self.assertTrue(isnull(result).all()) + result = to_datetime(['', ''], cache=cache) + assert isna(result).all() # ints result = Timestamp(0) - expected = to_datetime(0) - self.assertEqual(result, expected) + expected = to_datetime(0, cache=cache) + assert result == expected # GH 3888 (strings) - expected = to_datetime(['2012'])[0] - result = to_datetime('2012') - self.assertEqual(result, expected) + expected = to_datetime(['2012'], cache=cache)[0] + result = to_datetime('2012', cache=cache) + assert result == expected # array = ['2012','20120101','20120101 12:01:01'] array = ['20120101', '20120101 12:01:01'] - expected = list(to_datetime(array)) + expected = list(to_datetime(array, cache=cache)) result = lmap(Timestamp, array) tm.assert_almost_equal(result, expected) # currently fails ### # result = Timestamp('2012') # expected = to_datetime('2012') - # self.assertEqual(result, expected) + # assert result == expected - def test_to_datetime_unprocessable_input(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_unprocessable_input(self, cache): # GH 4928 - self.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore'), + tm.assert_numpy_array_equal( + to_datetime([1, '1'], errors='ignore', cache=cache), np.array([1, '1'], dtype='O') ) - self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') + pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise', + cache=cache) def test_to_datetime_other_datetime64_units(self): # 5/25/2012 @@ -721,10 +937,10 @@ def test_to_datetime_other_datetime64_units(self): as_obj = scalar.astype('O') index = DatetimeIndex([scalar]) - self.assertEqual(index[0], scalar.astype('O')) + assert index[0] == scalar.astype('O') value = Timestamp(scalar) - self.assertEqual(value, as_obj) + assert value == as_obj def test_to_datetime_list_of_integers(self): rng = date_range('1/1/2000', periods=20) @@ -736,13 +952,15 @@ def test_to_datetime_list_of_integers(self): tm.assert_index_equal(rng, result) - def test_to_datetime_freq(self): - xp = bdate_range('2000-1-1', periods=10, tz='UTC') - rs = xp.to_datetime() - self.assertEqual(xp.freq, rs.freq) - self.assertEqual(xp.tzinfo, rs.tzinfo) + def test_to_datetime_overflow(self): + # gh-17637 + # we are overflowing Timedelta range here + + with pytest.raises(OverflowError): + date_range(start='1/1/1700', freq='B', periods=100000) - def test_string_na_nat_conversion(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_string_na_nat_conversion(self, cache): # GH #999, #858 from pandas.compat import parse_date @@ -752,7 +970,7 @@ def test_string_na_nat_conversion(self): expected = np.empty(4, dtype='M8[ns]') for i, val in enumerate(strings): - if isnull(val): + if isna(val): expected[i] = tslib.iNaT else: expected[i] = parse_date(val) @@ -760,65 +978,73 @@ def test_string_na_nat_conversion(self): result = tslib.array_to_datetime(strings) tm.assert_almost_equal(result, expected) - result2 = to_datetime(strings) - tm.assertIsInstance(result2, DatetimeIndex) + result2 = to_datetime(strings, cache=cache) + assert isinstance(result2, DatetimeIndex) tm.assert_numpy_array_equal(result, result2.values) malformed = np.array(['1/100/2000', np.nan], dtype=object) # GH 10636, default is now 'raise' - self.assertRaises(ValueError, - lambda: to_datetime(malformed, errors='raise')) + pytest.raises(ValueError, + lambda: to_datetime(malformed, errors='raise', + cache=cache)) - result = to_datetime(malformed, errors='ignore') + result = to_datetime(malformed, errors='ignore', cache=cache) tm.assert_numpy_array_equal(result, malformed) - self.assertRaises(ValueError, to_datetime, malformed, errors='raise') + pytest.raises(ValueError, to_datetime, malformed, errors='raise', + cache=cache) idx = ['a', 'b', 'c', 'd', 'e'] series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000', cache=cache), np.nan, + to_datetime('1/3/2000', cache=cache), np.nan, + to_datetime('1/5/2000', cache=cache)], + index=idx, name='foo') - result = to_datetime(series) - dresult = to_datetime(dseries) + result = to_datetime(series, cache=cache) + dresult = to_datetime(dseries, cache=cache) expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) for i in range(5): x = series[i] - if isnull(x): + if isna(x): expected[i] = tslib.iNaT else: - expected[i] = to_datetime(x) + expected[i] = to_datetime(x, cache=cache) assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' assert_series_equal(dresult, expected, check_names=False) - self.assertEqual(dresult.name, 'foo') - - def test_dti_constructor_numpy_timeunits(self): + assert dresult.name == 'foo' + + @pytest.mark.parametrize('dtype', [ + 'datetime64[h]', 'datetime64[m]', + 'datetime64[s]', 'datetime64[ms]', + 'datetime64[us]', 'datetime64[ns]']) + @pytest.mark.parametrize('cache', [True, False]) + def test_dti_constructor_numpy_timeunits(self, cache, dtype): # GH 9114 - base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) + base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'], + cache=cache) - for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', - 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: - values = base.values.astype(dtype) + values = base.values.astype(dtype) - tm.assert_index_equal(DatetimeIndex(values), base) - tm.assert_index_equal(to_datetime(values), base) + tm.assert_index_equal(DatetimeIndex(values), base) + tm.assert_index_equal(to_datetime(values, cache=cache), base) - def test_dayfirst(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dayfirst(self, cache): # GH 5917 arr = ['10/02/2014', '11/02/2014', '12/02/2014'] expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)]) idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) - idx3 = to_datetime(arr, dayfirst=True) - idx4 = to_datetime(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True, cache=cache) + idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache) idx5 = DatetimeIndex(Index(arr), dayfirst=True) idx6 = DatetimeIndex(Series(arr), dayfirst=True) tm.assert_index_equal(expected, idx1) @@ -829,85 +1055,32 @@ def test_dayfirst(self): tm.assert_index_equal(expected, idx6) -class TestGuessDatetimeFormat(tm.TestCase): - - def test_guess_datetime_format_with_parseable_formats(self): - tm._skip_if_not_us_locale() - dt_string_to_format = (('20111230', '%Y%m%d'), - ('2011-12-30', '%Y-%m-%d'), - ('30-12-2011', '%d-%m-%Y'), - ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), - ('2011-12-30 00:00:00.000000', - '%Y-%m-%d %H:%M:%S.%f'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_with_dayfirst(self): - ambiguous_string = '01/01/2011' - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=True), - '%d/%m/%Y' - ) - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=False), - '%m/%d/%Y' - ) - - def test_guess_datetime_format_with_locale_specific_formats(self): - # The month names will vary depending on the locale, in which - # case these wont be parsed properly (dateutil can't parse them) - tm._skip_if_has_locale() - - dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), - ('30/December/2011', '%d/%B/%Y'), - ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) +class TestGuessDatetimeFormat(object): - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) + @td.skip_if_not_us_locale + @is_dateutil_le_261 + def test_guess_datetime_format_for_array(self): + expected_format = '%Y-%m-%d %H:%M:%S.%f' + dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) - def test_guess_datetime_format_invalid_inputs(self): - # A datetime string must include a year, month and a day for it - # to be guessable, in addition to being a string that looks like - # a datetime - invalid_dts = [ - '2013', - '01/2013', - '12:00:00', - '1/1/1/1', - 'this_is_not_a_datetime', - '51a', - 9, - datetime(2011, 1, 1), + test_arrays = [ + np.array([dt_string, dt_string, dt_string], dtype='O'), + np.array([np.nan, np.nan, dt_string], dtype='O'), + np.array([dt_string, 'random_string'], dtype='O'), ] - for invalid_dt in invalid_dts: - self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) - - def test_guess_datetime_format_nopadding(self): - # GH 11142 - dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), - ('30-1-2011', '%d-%m-%Y'), - ('1/1/2011', '%m/%d/%Y'), - ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), - ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) + for test_array in test_arrays: + assert tools._guess_datetime_format_for_array( + test_array) == expected_format - def test_guess_datetime_format_for_array(self): - tm._skip_if_not_us_locale() + format_for_string_of_nans = tools._guess_datetime_format_for_array( + np.array( + [np.nan, np.nan, np.nan], dtype='O')) + assert format_for_string_of_nans is None + + @td.skip_if_not_us_locale + @is_dateutil_gt_261 + def test_guess_datetime_format_for_array_gt_261(self): expected_format = '%Y-%m-%d %H:%M:%S.%f' dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) @@ -918,20 +1091,19 @@ def test_guess_datetime_format_for_array(self): ] for test_array in test_arrays: - self.assertEqual( - tools._guess_datetime_format_for_array(test_array), - expected_format - ) + assert tools._guess_datetime_format_for_array( + test_array) is None format_for_string_of_nans = tools._guess_datetime_format_for_array( np.array( [np.nan, np.nan, np.nan], dtype='O')) - self.assertTrue(format_for_string_of_nans is None) + assert format_for_string_of_nans is None -class TestToDatetimeInferFormat(tm.TestCase): +class TestToDatetimeInferFormat(object): - def test_to_datetime_infer_datetime_format_consistent_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_consistent_format(self, cache): s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', @@ -940,112 +1112,117 @@ def test_to_datetime_infer_datetime_format_consistent_format(self): for test_format in test_formats: s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) - with_format = pd.to_datetime(s_as_dt_strings, format=test_format) + with_format = pd.to_datetime(s_as_dt_strings, format=test_format, + cache=cache) no_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=False) + infer_datetime_format=False, + cache=cache) yes_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=True) + infer_datetime_format=True, + cache=cache) # Whether the format is explicitly passed, it is inferred, or # it is not inferred, the results should all be the same - self.assert_series_equal(with_format, no_infer) - self.assert_series_equal(no_infer, yes_infer) + tm.assert_series_equal(with_format, no_infer) + tm.assert_series_equal(no_infer, yes_infer) - def test_to_datetime_infer_datetime_format_inconsistent_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_inconsistent_format(self, + cache): s = pd.Series(np.array(['01/01/2011 00:00:00', '01-02-2011 00:00:00', '2011-01-03T00:00:00'])) # When the format is inconsistent, infer_datetime_format should just # fallback to the default parsing - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) - def test_to_datetime_infer_datetime_format_series_with_nans(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_series_with_nans(self, cache): s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, '01/03/2011 00:00:00', np.nan])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) + + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_series_start_with_nans(self, + cache): s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) - def test_to_datetime_iso8601_noleading_0s(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_iso8601_noleading_0s(self, cache): # GH 11871 s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) expected = pd.Series([pd.Timestamp('2014-01-01'), pd.Timestamp('2014-02-02'), pd.Timestamp('2015-03-03')]) - tm.assert_series_equal(pd.to_datetime(s), expected) - tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) + tm.assert_series_equal(pd.to_datetime(s, cache=cache), expected) + tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d', + cache=cache), expected) -class TestDaysInMonth(tm.TestCase): +class TestDaysInMonth(object): # tests for issue #10154 - def test_day_not_in_month_coerce(self): - self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce'))) - - def test_day_not_in_month_raise(self): - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise') - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d") - - def test_day_not_in_month_ignore(self): - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore'), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') - self.assertEqual(to_datetime( - '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') - - -class TestDatetimeParsingWrappers(tm.TestCase): - def test_does_not_convert_mixed_integer(self): - bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') - - for bad_date_string in bad_date_strings: - self.assertFalse(tslib._does_string_look_like_datetime( - bad_date_string)) - - good_date_strings = ('2012-01-01', - '01/01/2012', - 'Mon Sep 16, 2013', - '01012012', - '0101', - '1-1', ) - - for good_date_string in good_date_strings: - self.assertTrue(tslib._does_string_look_like_datetime( - good_date_string)) - - def test_parsers(self): - + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_coerce(self, cache): + assert isna(to_datetime('2015-02-29', errors='coerce', cache=cache)) + assert isna(to_datetime('2015-02-29', format="%Y-%m-%d", + errors='coerce', cache=cache)) + assert isna(to_datetime('2015-02-32', format="%Y-%m-%d", + errors='coerce', cache=cache)) + assert isna(to_datetime('2015-04-31', format="%Y-%m-%d", + errors='coerce', cache=cache)) + + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_raise(self, cache): + pytest.raises(ValueError, to_datetime, '2015-02-29', + errors='raise', cache=cache) + pytest.raises(ValueError, to_datetime, '2015-02-29', + errors='raise', format="%Y-%m-%d", cache=cache) + pytest.raises(ValueError, to_datetime, '2015-02-32', + errors='raise', format="%Y-%m-%d", cache=cache) + pytest.raises(ValueError, to_datetime, '2015-04-31', + errors='raise', format="%Y-%m-%d", cache=cache) + + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_ignore(self, cache): + assert to_datetime('2015-02-29', errors='ignore', + cache=cache) == '2015-02-29' + assert to_datetime('2015-02-29', errors='ignore', + format="%Y-%m-%d", cache=cache) == '2015-02-29' + assert to_datetime('2015-02-32', errors='ignore', + format="%Y-%m-%d", cache=cache) == '2015-02-32' + assert to_datetime('2015-04-31', errors='ignore', + format="%Y-%m-%d", cache=cache) == '2015-04-31' + + +class TestDatetimeParsingWrappers(object): + + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers(self, cache): + + # dateutil >= 2.5.0 defaults to yearfirst=True # https://github.com/dateutil/dateutil/issues/217 - import dateutil - yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') + yearfirst = True cases = {'2011-01-01': datetime(2011, 1, 1), '2Q2005': datetime(2005, 4, 1), @@ -1099,51 +1276,44 @@ def test_parsers(self): } for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, - yearfirst=yearfirst) + result1, _, _ = parsing.parse_time_string(date_str, + yearfirst=yearfirst) result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below result4 = to_datetime(np.array([date_str], dtype=object), - yearfirst=yearfirst) + yearfirst=yearfirst, cache=cache) result6 = DatetimeIndex([date_str], yearfirst=yearfirst) # result7 is used below result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) for res in [result1, result2]: - self.assertEqual(res, expected) + assert res == expected for res in [result3, result4, result6, result8, result9]: exp = DatetimeIndex([pd.Timestamp(expected)]) tm.assert_index_equal(res, exp) - # these really need to have yearfist, but we don't support + # these really need to have yearfirst, but we don't support if not yearfirst: result5 = Timestamp(date_str) - self.assertEqual(result5, expected) + assert result5 == expected result7 = date_range(date_str, freq='S', periods=1, yearfirst=yearfirst) - self.assertEqual(result7, expected) + assert result7 == expected # NaT - result1, _, _ = tools.parse_time_string('NaT') + result1, _, _ = parsing.parse_time_string('NaT') result2 = to_datetime('NaT') result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - - def test_parsers_quarter_invalid(self): - - cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] - for case in cases: - self.assertRaises(ValueError, tools.parse_time_string, case) - - def test_parsers_dayfirst_yearfirst(self): - tm._skip_if_no_dateutil() + assert result1 is tslib.NaT + assert result2 is tslib.NaT + assert result3 is tslib.NaT + assert result4 is tslib.NaT + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_dayfirst_yearfirst(self, cache): # OK # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 @@ -1184,8 +1354,7 @@ def test_parsers_dayfirst_yearfirst(self): # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - import dateutil - is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') + is_lt_253 = LooseVersion(dateutil.__version__) < LooseVersion('2.5.3') # str : dayfirst, yearfirst, expected cases = {'10-11-12': [(False, False, @@ -1205,7 +1374,6 @@ def test_parsers_dayfirst_yearfirst(self): (True, True, datetime(2020, 12, 21))]} - from dateutil.parser import parse for date_str, values in compat.iteritems(cases): for dayfirst, yearfirst, expected in values: @@ -1217,37 +1385,35 @@ def test_parsers_dayfirst_yearfirst(self): # compare with dateutil result dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst) - self.assertEqual(dateutil_result, expected) + assert dateutil_result == expected - result1, _, _ = tools.parse_time_string(date_str, - dayfirst=dayfirst, - yearfirst=yearfirst) + result1, _, _ = parsing.parse_time_string(date_str, + dayfirst=dayfirst, + yearfirst=yearfirst) # we don't support dayfirst/yearfirst here: if not dayfirst and not yearfirst: result2 = Timestamp(date_str) - self.assertEqual(result2, expected) + assert result2 == expected result3 = to_datetime(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) + yearfirst=yearfirst, cache=cache) result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0] - self.assertEqual(result1, expected) - self.assertEqual(result3, expected) - self.assertEqual(result4, expected) - - def test_parsers_timestring(self): - tm._skip_if_no_dateutil() - from dateutil.parser import parse + assert result1 == expected + assert result3 == expected + assert result4 == expected + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_timestring(self, cache): # must be the same as dateutil result cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)), '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))} for date_str, (exp_now, exp_def) in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str) + result1, _, _ = parsing.parse_time_string(date_str) result2 = to_datetime(date_str) result3 = to_datetime([date_str]) result4 = Timestamp(date_str) @@ -1255,79 +1421,51 @@ def test_parsers_timestring(self): # parse time string return time string based on default date # others are not, and can't be changed because it is used in # time series plot - self.assertEqual(result1, exp_def) - self.assertEqual(result2, exp_now) - self.assertEqual(result3, exp_now) - self.assertEqual(result4, exp_now) - self.assertEqual(result5, exp_now) + assert result1 == exp_def + assert result2 == exp_now + assert result3 == exp_now + assert result4 == exp_now + assert result5 == exp_now + @td.skip_if_has_locale def test_parsers_time(self): # GH11818 - _skip_if_has_locale() strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500", "2:15:00pm", "021500pm", time(14, 15)] expected = time(14, 15) for time_string in strings: - self.assertEqual(tools.to_time(time_string), expected) + assert tools.to_time(time_string) == expected new_string = "14.15" - self.assertRaises(ValueError, tools.to_time, new_string) - self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) + pytest.raises(ValueError, tools.to_time, new_string) + assert tools.to_time(new_string, format="%H.%M") == expected arg = ["14:15", "20:20"] expected_arr = [time(14, 15), time(20, 20)] - self.assertEqual(tools.to_time(arg), expected_arr) - self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr) - self.assertEqual(tools.to_time(arg, infer_time_format=True), - expected_arr) - self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), - [None, None]) + assert tools.to_time(arg) == expected_arr + assert tools.to_time(arg, format="%H:%M") == expected_arr + assert tools.to_time(arg, infer_time_format=True) == expected_arr + assert tools.to_time(arg, format="%I:%M%p", + errors="coerce") == [None, None] res = tools.to_time(arg, format="%I:%M%p", errors="ignore") - self.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): tools.to_time(arg, format="%I:%M%p", errors="raise") - self.assert_series_equal(tools.to_time(Series(arg, name="test")), - Series(expected_arr, name="test")) + tm.assert_series_equal(tools.to_time(Series(arg, name="test")), + Series(expected_arr, name="test")) res = tools.to_time(np.array(arg)) - self.assertIsInstance(res, list) - self.assert_equal(res, expected_arr) + assert isinstance(res, list) + assert res == expected_arr - def test_parsers_monthfreq(self): - cases = {'201101': datetime(2011, 1, 1, 0, 0), - '200005': datetime(2000, 5, 1, 0, 0)} - - for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, freq='M') - self.assertEqual(result1, expected) - - def test_parsers_quarterly_with_freq(self): - msg = ('Incorrect quarterly string is given, quarter ' - 'must be between 1 and 4: 2013Q5') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): - tools.parse_time_string('2013Q5') - - # GH 5418 - msg = ('Unable to retrieve month information from given freq: ' - 'INVLD-L-DEC-SAT') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): - tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') - - cases = {('2013Q2', None): datetime(2013, 4, 1), - ('2013Q2', 'A-APR'): datetime(2012, 8, 1), - ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} - - for (date_str, freq), exp in compat.iteritems(cases): - result, _, _ = tools.parse_time_string(date_str, freq=freq) - self.assertEqual(result, exp) - - def test_parsers_timezone_minute_offsets_roundtrip(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_timezone_minute_offsets_roundtrip(self, cache): # GH11708 - base = to_datetime("2013-01-01 00:00:00") + base = to_datetime("2013-01-01 00:00:00", cache=cache) dt_strings = [ ('2013-01-01 05:45+0545', "Asia/Katmandu", @@ -1338,180 +1476,143 @@ def test_parsers_timezone_minute_offsets_roundtrip(self): ] for dt_string, tz, dt_string_repr in dt_strings: - dt_time = to_datetime(dt_string) - self.assertEqual(base, dt_time) + dt_time = to_datetime(dt_string, cache=cache) + assert base == dt_time converted_time = dt_time.tz_localize('UTC').tz_convert(tz) - self.assertEqual(dt_string_repr, repr(converted_time)) - - def test_parsers_iso8601(self): - # GH 12060 - # test only the iso parser - flexibility to different - # separators and leadings 0s - # Timestamp construction falls back to dateutil - cases = {'2011-01-02': datetime(2011, 1, 2), - '2011-1-2': datetime(2011, 1, 2), - '2011-01': datetime(2011, 1, 1), - '2011-1': datetime(2011, 1, 1), - '2011 01 02': datetime(2011, 1, 2), - '2011.01.02': datetime(2011, 1, 2), - '2011/01/02': datetime(2011, 1, 2), - '2011\\01\\02': datetime(2011, 1, 2), - '2013-01-01 05:30:00': datetime(2013, 1, 1, 5, 30), - '2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)} - for date_str, exp in compat.iteritems(cases): - actual = tslib._test_parse_iso8601(date_str) - self.assertEqual(actual, exp) - - # seperators must all match - YYYYMM not valid - invalid_cases = ['2011-01/02', '2011^11^11', - '201401', '201111', '200101', - # mixed separated and unseparated - '2005-0101', '200501-01', - '20010101 12:3456', '20010101 1234:56', - # HHMMSS must have two digits in each component - # if unseparated - '20010101 1', '20010101 123', '20010101 12345', - '20010101 12345Z', - # wrong separator for HHMMSS - '2001-01-01 12-34-56'] - for date_str in invalid_cases: - with tm.assertRaises(ValueError): - tslib._test_parse_iso8601(date_str) - # If no ValueError raised, let me know which case failed. - raise Exception(date_str) - - -class TestArrayToDatetime(tm.TestCase): - - def test_try_parse_dates(self): - from dateutil.parser import parse - arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) - - result = lib.try_parse_dates(arr, dayfirst=True) - expected = [parse(d, dayfirst=True) for d in arr] - self.assertTrue(np.array_equal(result, expected)) - - def test_parsing_valid_dates(self): - arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - '2013-01-02T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) + assert dt_string_repr == repr(converted_time) - arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-09-16T00:00:00.000000000-0000', - '2013-09-17T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - def test_parsing_timezone_offsets(self): - # All of these datetime strings with offsets are equivalent - # to the same datetime after the timezone offset is added - dt_strings = [ - '01-01-2013 08:00:00+08:00', - '2013-01-01T08:00:00.000000000+0800', - '2012-12-31T16:00:00.000000000-0800', - '12-31-2012 23:00:00-01:00' - ] +def test_normalize_date(): + value = date(2012, 9, 7) - expected_output = tslib.array_to_datetime(np.array( - ['01-01-2013 00:00:00'], dtype=object)) + result = tslib.normalize_date(value) + assert (result == datetime(2012, 9, 7)) - for dt_string in dt_strings: - self.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([dt_string], dtype=object) - ), - expected_output - ) + value = datetime(2012, 9, 7, 12) - def test_number_looking_strings_not_into_datetime(self): - # #4601 - # These strings don't look like datetimes so they shouldn't be - # attempted to be converted - arr = np.array(['-352.737091', '183.575577'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - def test_coercing_dates_outside_of_datetime64_ns_bounds(self): - invalid_dates = [ - date(1000, 1, 1), - datetime(1000, 1, 1), - '1000-01-01', - 'Jan 1, 1000', - np.datetime64('1000-01-01'), - ] + result = tslib.normalize_date(value) + assert (result == datetime(2012, 9, 7)) - for invalid_date in invalid_dates: - self.assertRaises(ValueError, - tslib.array_to_datetime, - np.array( - [invalid_date], dtype='object'), - errors='raise', ) - self.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([invalid_date], dtype='object'), - errors='coerce'), - np.array([tslib.iNaT], dtype='M8[ns]') - ) - arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - tslib.iNaT, - '2000-01-01T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) +@pytest.fixture(params=['D', 's', 'ms', 'us', 'ns']) +def units(request): + return request.param - def test_coerce_of_invalid_datetimes(self): - arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) - # Without coercing, the presence of any invalid dates prevents - # any values from being converted - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) +@pytest.fixture +def epoch_1960(): + # for origin as 1960-01-01 + return Timestamp('1960-01-01') - # With coercing, the invalid dates becomes iNaT - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - tslib.iNaT, - tslib.iNaT - ], - dtype='M8[ns]' - ) - ) +@pytest.fixture +def units_from_epochs(): + return list(range(5)) -def test_normalize_date(): - value = date(2012, 9, 7) - result = normalize_date(value) - assert (result == datetime(2012, 9, 7)) +@pytest.fixture(params=[epoch_1960(), + epoch_1960().to_pydatetime(), + epoch_1960().to_datetime64(), + str(epoch_1960())]) +def epochs(request): + return request.param - value = datetime(2012, 9, 7, 12) - result = normalize_date(value) - assert (result == datetime(2012, 9, 7)) +@pytest.fixture +def julian_dates(): + return pd.date_range('2014-1-1', periods=10).to_julian_date().values + + +class TestOrigin(object): + + def test_to_basic(self, julian_dates): + # gh-11276, gh-11745 + # for origin as julian + + result = Series(pd.to_datetime( + julian_dates, unit='D', origin='julian')) + expected = Series(pd.to_datetime( + julian_dates - pd.Timestamp(0).to_julian_date(), unit='D')) + assert_series_equal(result, expected) + + result = Series(pd.to_datetime( + [0, 1, 2], unit='D', origin='unix')) + expected = Series([Timestamp('1970-01-01'), + Timestamp('1970-01-02'), + Timestamp('1970-01-03')]) + assert_series_equal(result, expected) + + # default + result = Series(pd.to_datetime( + [0, 1, 2], unit='D')) + expected = Series([Timestamp('1970-01-01'), + Timestamp('1970-01-02'), + Timestamp('1970-01-03')]) + assert_series_equal(result, expected) + + def test_julian_round_trip(self): + result = pd.to_datetime(2456658, origin='julian', unit='D') + assert result.to_julian_date() == 2456658 + + # out-of-bounds + with pytest.raises(ValueError): + pd.to_datetime(1, origin="julian", unit='D') + + def test_invalid_unit(self, units, julian_dates): + + # checking for invalid combination of origin='julian' and unit != D + if units != 'D': + with pytest.raises(ValueError): + pd.to_datetime(julian_dates, unit=units, origin='julian') + + def test_invalid_origin(self): + + # need to have a numeric specified + with pytest.raises(ValueError): + pd.to_datetime("2005-01-01", origin="1960-01-01") + + with pytest.raises(ValueError): + pd.to_datetime("2005-01-01", origin="1960-01-01", unit='D') + + def test_epoch(self, units, epochs, epoch_1960, units_from_epochs): + + expected = Series( + [pd.Timedelta(x, unit=units) + + epoch_1960 for x in units_from_epochs]) + + result = Series(pd.to_datetime( + units_from_epochs, unit=units, origin=epochs)) + assert_series_equal(result, expected) + + @pytest.mark.parametrize("origin, exc", + [('random_string', ValueError), + ('epoch', ValueError), + ('13-24-1990', ValueError), + (datetime(1, 1, 1), tslib.OutOfBoundsDatetime)]) + def test_invalid_origins(self, origin, exc, units, units_from_epochs): + + with pytest.raises(exc): + pd.to_datetime(units_from_epochs, unit=units, + origin=origin) + + def test_invalid_origins_tzinfo(self): + # GH16842 + with pytest.raises(ValueError): + pd.to_datetime(1, unit='D', + origin=datetime(2000, 1, 1, tzinfo=pytz.utc)) + + def test_processing_order(self): + # make sure we handle out-of-bounds *before* + # constructing the dates + + result = pd.to_datetime(200 * 365, unit='D') + expected = Timestamp('2169-11-13 00:00:00') + assert result == expected + + result = pd.to_datetime(200 * 365, unit='D', origin='1870-01-01') + expected = Timestamp('2069-11-13 00:00:00') + assert result == expected + + result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01') + expected = Timestamp('2169-10-20 00:00:00') + assert result == expected diff --git a/pandas/tests/indexes/interval/__init__.py b/pandas/tests/indexes/interval/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py new file mode 100644 index 0000000000000..b3a4bfa878c3f --- /dev/null +++ b/pandas/tests/indexes/interval/test_astype.py @@ -0,0 +1,209 @@ +from __future__ import division + +import pytest +import numpy as np +from pandas import ( + Index, + IntervalIndex, + interval_range, + CategoricalIndex, + Timestamp, + Timedelta, + NaT) +from pandas.core.dtypes.dtypes import CategoricalDtype, IntervalDtype +import pandas.util.testing as tm + + +class Base(object): + """Tests common to IntervalIndex with any subtype""" + + def test_astype_idempotent(self, index): + result = index.astype('interval') + tm.assert_index_equal(result, index) + + result = index.astype(index.dtype) + tm.assert_index_equal(result, index) + + def test_astype_object(self, index): + result = index.astype(object) + expected = Index(index.values, dtype='object') + tm.assert_index_equal(result, expected) + assert not result.equals(index) + + def test_astype_category(self, index): + result = index.astype('category') + expected = CategoricalIndex(index.values) + tm.assert_index_equal(result, expected) + + result = index.astype(CategoricalDtype()) + tm.assert_index_equal(result, expected) + + # non-default params + categories = index.dropna().unique().values[:-1] + dtype = CategoricalDtype(categories=categories, ordered=True) + result = index.astype(dtype) + expected = CategoricalIndex( + index.values, categories=categories, ordered=True) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('dtype', [ + 'int64', 'uint64', 'float64', 'complex128', 'period[M]', + 'timedelta64', 'timedelta64[ns]', 'datetime64', 'datetime64[ns]', + 'datetime64[ns, US/Eastern]']) + def test_astype_cannot_cast(self, index, dtype): + msg = 'Cannot cast IntervalIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + def test_astype_invalid_dtype(self, index): + msg = 'data type "fake_dtype" not understood' + with tm.assert_raises_regex(TypeError, msg): + index.astype('fake_dtype') + + +class TestIntSubtype(Base): + """Tests specific to IntervalIndex with integer-like subtype""" + + indexes = [ + IntervalIndex.from_breaks(np.arange(-10, 11, dtype='int64')), + IntervalIndex.from_breaks( + np.arange(100, dtype='uint64'), closed='left'), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', [ + 'float64', 'datetime64[ns]', 'timedelta64[ns]']) + def test_subtype_conversion(self, index, subtype): + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('subtype_start, subtype_end', [ + ('int64', 'uint64'), ('uint64', 'int64')]) + def test_subtype_integer(self, subtype_start, subtype_end): + index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) + dtype = IntervalDtype(subtype_end) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype_end), + index.right.astype(subtype_end), + closed=index.closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason='GH 15832') + def test_subtype_integer_errors(self): + # int64 -> uint64 fails with negative values + index = interval_range(-10, 10) + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + +class TestFloatSubtype(Base): + """Tests specific to IntervalIndex with float subtype""" + + indexes = [ + interval_range(-10.0, 10.0, closed='neither'), + IntervalIndex.from_arrays([-1.5, np.nan, 0., 0., 1.5], + [-0.5, np.nan, 1., 1., 3.], + closed='both'), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', ['int64', 'uint64']) + def test_subtype_integer(self, subtype): + index = interval_range(0.0, 10.0) + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + # raises with NA + msg = 'Cannot convert NA to integer' + with tm.assert_raises_regex(ValueError, msg): + index.insert(0, np.nan).astype(dtype) + + @pytest.mark.xfail(reason='GH 15832') + def test_subtype_integer_errors(self): + # float64 -> uint64 fails with negative values + index = interval_range(-10.0, 10.0) + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + # float64 -> integer-like fails with non-integer valued floats + index = interval_range(0.0, 10.0, freq=0.25) + dtype = IntervalDtype('int64') + with pytest.raises(ValueError): + index.astype(dtype) + + dtype = IntervalDtype('uint64') + with pytest.raises(ValueError): + index.astype(dtype) + + @pytest.mark.parametrize('subtype', ['datetime64[ns]', 'timedelta64[ns]']) + def test_subtype_datetimelike(self, index, subtype): + dtype = IntervalDtype(subtype) + msg = 'Cannot convert .* to .*; subtypes are incompatible' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + +class TestDatetimelikeSubtype(Base): + """Tests specific to IntervalIndex with datetime-like subtype""" + + indexes = [ + interval_range(Timestamp('2018-01-01'), periods=10, closed='neither'), + interval_range(Timestamp('2018-01-01'), periods=10).insert(2, NaT), + interval_range(Timestamp('2018-01-01', tz='US/Eastern'), periods=10), + interval_range(Timedelta('0 days'), periods=10, closed='both'), + interval_range(Timedelta('0 days'), periods=10).insert(2, NaT), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize('subtype', ['int64', 'uint64']) + def test_subtype_integer(self, index, subtype): + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays(index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed) + tm.assert_index_equal(result, expected) + + def test_subtype_float(self, index): + dtype = IntervalDtype('float64') + msg = 'Cannot convert .* to .*; subtypes are incompatible' + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + def test_subtype_datetimelike(self): + # datetime -> timedelta raises + dtype = IntervalDtype('timedelta64[ns]') + msg = 'Cannot convert .* to .*; subtypes are incompatible' + + index = interval_range(Timestamp('2018-01-01'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) + + # timedelta -> datetime raises + dtype = IntervalDtype('datetime64[ns]') + index = interval_range(Timedelta('0 days'), periods=10) + with tm.assert_raises_regex(TypeError, msg): + index.astype(dtype) diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py new file mode 100644 index 0000000000000..5fdf92dcb2044 --- /dev/null +++ b/pandas/tests/indexes/interval/test_construction.py @@ -0,0 +1,342 @@ +from __future__ import division + +import pytest +import numpy as np +from functools import partial + +from pandas import ( + Interval, IntervalIndex, Index, Int64Index, Float64Index, Categorical, + date_range, timedelta_range, period_range, notna) +from pandas.compat import lzip +from pandas.core.dtypes.dtypes import IntervalDtype +import pandas.core.common as com +import pandas.util.testing as tm + + +@pytest.fixture(params=['left', 'right', 'both', 'neither']) +def closed(request): + return request.param + + +@pytest.fixture(params=[None, 'foo']) +def name(request): + return request.param + + +class Base(object): + """ + Common tests for all variations of IntervalIndex construction. Input data + to be supplied in breaks format, then converted by the subclass method + get_kwargs_from_breaks to the expected format. + """ + + @pytest.mark.parametrize('breaks', [ + [3, 14, 15, 92, 653], + np.arange(10, dtype='int64'), + Int64Index(range(-10, 11)), + Float64Index(np.arange(20, 30, 0.5)), + date_range('20180101', periods=10), + date_range('20180101', periods=10, tz='US/Eastern'), + timedelta_range('1 day', periods=10)]) + def test_constructor(self, constructor, breaks, closed, name): + result_kwargs = self.get_kwargs_from_breaks(breaks, closed) + result = constructor(closed=closed, name=name, **result_kwargs) + + assert result.closed == closed + assert result.name == name + assert result.dtype.subtype == getattr(breaks, 'dtype', 'int64') + tm.assert_index_equal(result.left, Index(breaks[:-1])) + tm.assert_index_equal(result.right, Index(breaks[1:])) + + @pytest.mark.parametrize('breaks, subtype', [ + (Int64Index([0, 1, 2, 3, 4]), 'float64'), + (Int64Index([0, 1, 2, 3, 4]), 'datetime64[ns]'), + (Int64Index([0, 1, 2, 3, 4]), 'timedelta64[ns]'), + (Float64Index([0, 1, 2, 3, 4]), 'int64'), + (date_range('2017-01-01', periods=5), 'int64'), + (timedelta_range('1 day', periods=5), 'int64')]) + def test_constructor_dtype(self, constructor, breaks, subtype): + # GH 19262: conversion via dtype parameter + expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) + expected = constructor(**expected_kwargs) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + iv_dtype = IntervalDtype(subtype) + for dtype in (iv_dtype, str(iv_dtype)): + result = constructor(dtype=dtype, **result_kwargs) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('breaks', [ + [np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) + def test_constructor_nan(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_subtype = np.float64 + expected_values = np.array(breaks[:-1], dtype=object) + + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(result.values, expected_values) + + @pytest.mark.parametrize('breaks', [ + [], + np.array([], dtype='int64'), + np.array([], dtype='float64'), + np.array([], dtype='datetime64[ns]'), + np.array([], dtype='timedelta64[ns]')]) + def test_constructor_empty(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_values = np.array([], dtype=object) + expected_subtype = getattr(breaks, 'dtype', np.int64) + + assert result.empty + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(result.values, expected_values) + + @pytest.mark.parametrize('breaks', [ + tuple('0123456789'), + list('abcdefghij'), + np.array(list('abcdefghij'), dtype=object), + np.array(list('abcdefghij'), dtype=' with value 0 " + "is not an interval") + with tm.assert_raises_regex(TypeError, msg): + constructor([0, 1]) + + +class TestFromIntervals(TestClassConstructors): + """ + Tests for IntervalIndex.from_intervals, which is deprecated in favor of the + IntervalIndex constructor. Same tests as the IntervalIndex constructor, + plus deprecation test. Should only need to delete this class when removed. + """ + + @pytest.fixture + def constructor(self): + def from_intervals_ignore_warnings(*args, **kwargs): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + return IntervalIndex.from_intervals(*args, **kwargs) + return from_intervals_ignore_warnings + + def test_deprecated(self): + ivs = [Interval(0, 1), Interval(1, 2)] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + IntervalIndex.from_intervals(ivs) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py new file mode 100644 index 0000000000000..71a6f78125004 --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval.py @@ -0,0 +1,966 @@ +from __future__ import division + +import pytest +import numpy as np +from pandas import ( + Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp, + Timedelta, date_range, timedelta_range) +from pandas.compat import lzip +import pandas.core.common as com +from pandas.tests.indexes.common import Base +import pandas.util.testing as tm +import pandas as pd + + +@pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither']) +def closed(request): + return request.param + + +@pytest.fixture(scope='class', params=[None, 'foo']) +def name(request): + return request.param + + +class TestIntervalIndex(Base): + _holder = IntervalIndex + + def setup_method(self, method): + self.index = IntervalIndex.from_arrays([0, 1], [1, 2]) + self.index_with_nan = IntervalIndex.from_tuples( + [(0, 1), np.nan, (1, 2)]) + self.indices = dict(intervalIndex=tm.makeIntervalIndex(10)) + + def create_index(self, closed='right'): + return IntervalIndex.from_breaks(range(11), closed=closed) + + def create_index_with_nan(self, closed='right'): + mask = [True, False] + [True] * 8 + return IntervalIndex.from_arrays( + np.where(mask, np.arange(10), np.nan), + np.where(mask, np.arange(1, 11), np.nan), closed=closed) + + def test_properties(self, closed): + index = self.create_index(closed=closed) + assert len(index) == 10 + assert index.size == 10 + assert index.shape == (10, ) + + tm.assert_index_equal(index.left, Index(np.arange(10))) + tm.assert_index_equal(index.right, Index(np.arange(1, 11))) + tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) + + assert index.closed == closed + + ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] + expected = np.array(ivs, dtype=object) + tm.assert_numpy_array_equal(np.asarray(index), expected) + tm.assert_numpy_array_equal(index.values, expected) + + # with nans + index = self.create_index_with_nan(closed=closed) + assert len(index) == 10 + assert index.size == 10 + assert index.shape == (10, ) + + expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) + expected_right = expected_left + 1 + expected_mid = expected_left + 0.5 + tm.assert_index_equal(index.left, expected_left) + tm.assert_index_equal(index.right, expected_right) + tm.assert_index_equal(index.mid, expected_mid) + + assert index.closed == closed + + ivs = [Interval(l, r, closed) if notna(l) else np.nan + for l, r in zip(expected_left, expected_right)] + expected = np.array(ivs, dtype=object) + tm.assert_numpy_array_equal(np.asarray(index), expected) + tm.assert_numpy_array_equal(index.values, expected) + + @pytest.mark.parametrize('breaks', [ + [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], + [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], + pd.to_datetime(['20170101', '20170202', '20170303', '20170404']), + pd.to_timedelta(['1ns', '2ms', '3s', '4M', '5H', '6D'])]) + def test_length(self, closed, breaks): + # GH 18789 + index = IntervalIndex.from_breaks(breaks, closed=closed) + result = index.length + expected = Index(iv.length for iv in index) + tm.assert_index_equal(result, expected) + + # with NA + index = index.insert(1, np.nan) + result = index.length + expected = Index(iv.length if notna(iv) else iv for iv in index) + tm.assert_index_equal(result, expected) + + def test_with_nans(self, closed): + index = self.create_index(closed=closed) + assert not index.hasnans + + result = index.isna() + expected = np.repeat(False, len(index)) + tm.assert_numpy_array_equal(result, expected) + + result = index.notna() + expected = np.repeat(True, len(index)) + tm.assert_numpy_array_equal(result, expected) + + index = self.create_index_with_nan(closed=closed) + assert index.hasnans + + result = index.isna() + expected = np.array([False, True] + [False] * (len(index) - 2)) + tm.assert_numpy_array_equal(result, expected) + + result = index.notna() + expected = np.array([True, False] + [True] * (len(index) - 2)) + tm.assert_numpy_array_equal(result, expected) + + def test_copy(self, closed): + expected = self.create_index(closed=closed) + + result = expected.copy() + assert result.equals(expected) + + result = expected.copy(deep=True) + assert result.equals(expected) + assert result.left is not expected.left + + def test_ensure_copied_data(self, closed): + # exercise the copy flag in the constructor + + # not copying + index = self.create_index(closed=closed) + result = IntervalIndex(index, copy=False) + tm.assert_numpy_array_equal(index.left.values, result.left.values, + check_same='same') + tm.assert_numpy_array_equal(index.right.values, result.right.values, + check_same='same') + + # by-definition make a copy + result = IntervalIndex(index.values, copy=False) + tm.assert_numpy_array_equal(index.left.values, result.left.values, + check_same='copy') + tm.assert_numpy_array_equal(index.right.values, result.right.values, + check_same='copy') + + def test_equals(self, closed): + expected = IntervalIndex.from_breaks(np.arange(5), closed=closed) + assert expected.equals(expected) + assert expected.equals(expected.copy()) + + assert not expected.equals(expected.astype(object)) + assert not expected.equals(np.array(expected)) + assert not expected.equals(list(expected)) + + assert not expected.equals([1, 2]) + assert not expected.equals(np.array([1, 2])) + assert not expected.equals(pd.date_range('20130101', periods=2)) + + expected_name1 = IntervalIndex.from_breaks( + np.arange(5), closed=closed, name='foo') + expected_name2 = IntervalIndex.from_breaks( + np.arange(5), closed=closed, name='bar') + assert expected.equals(expected_name1) + assert expected_name1.equals(expected_name2) + + for other_closed in {'left', 'right', 'both', 'neither'} - {closed}: + expected_other_closed = IntervalIndex.from_breaks( + np.arange(5), closed=other_closed) + assert not expected.equals(expected_other_closed) + + @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) + def test_where(self, closed, klass): + idx = self.create_index(closed=closed) + cond = [True] * len(idx) + expected = idx + result = expected.where(klass(cond)) + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * len(idx[1:]) + expected = IntervalIndex([np.nan] + idx[1:].tolist()) + result = idx.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_delete(self, closed): + expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) + result = self.create_index(closed=closed).delete(0) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('data', [ + interval_range(0, periods=10, closed='neither'), + interval_range(1.7, periods=8, freq=2.5, closed='both'), + interval_range(Timestamp('20170101'), periods=12, closed='left'), + interval_range(Timedelta('1 day'), periods=6, closed='right')]) + def test_insert(self, data): + item = data[0] + idx_item = IntervalIndex([item]) + + # start + expected = idx_item.append(data) + result = data.insert(0, item) + tm.assert_index_equal(result, expected) + + # end + expected = data.append(idx_item) + result = data.insert(len(data), item) + tm.assert_index_equal(result, expected) + + # mid + expected = data[:3].append(idx_item).append(data[3:]) + result = data.insert(3, item) + tm.assert_index_equal(result, expected) + + # invalid type + msg = 'can only insert Interval objects and NA into an IntervalIndex' + with tm.assert_raises_regex(ValueError, msg): + data.insert(1, 'foo') + + # invalid closed + msg = 'inserted item must be closed on the same side as the index' + for closed in {'left', 'right', 'both', 'neither'} - {item.closed}: + with tm.assert_raises_regex(ValueError, msg): + bad_item = Interval(item.left, item.right, closed=closed) + data.insert(1, bad_item) + + # GH 18295 (test missing) + na_idx = IntervalIndex([np.nan], closed=data.closed) + for na in (np.nan, pd.NaT, None): + expected = data[:1].append(na_idx).append(data[1:]) + result = data.insert(1, na) + tm.assert_index_equal(result, expected) + + def test_take(self, closed): + index = self.create_index(closed=closed) + + result = index.take(range(10)) + tm.assert_index_equal(result, index) + + result = index.take([0, 0, 1]) + expected = IntervalIndex.from_arrays( + [0, 0, 1], [1, 1, 2], closed=closed) + tm.assert_index_equal(result, expected) + + def test_unique(self, closed): + # unique non-overlapping + idx = IntervalIndex.from_tuples( + [(0, 1), (2, 3), (4, 5)], closed=closed) + assert idx.is_unique + + # unique overlapping - distinct endpoints + idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) + assert idx.is_unique + + # unique overlapping - shared endpoints + idx = pd.IntervalIndex.from_tuples( + [(1, 2), (1, 3), (2, 3)], closed=closed) + assert idx.is_unique + + # unique nested + idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) + assert idx.is_unique + + # duplicate + idx = IntervalIndex.from_tuples( + [(0, 1), (0, 1), (2, 3)], closed=closed) + assert not idx.is_unique + + # empty + idx = IntervalIndex([], closed=closed) + assert idx.is_unique + + def test_monotonic(self, closed): + # increasing non-overlapping + idx = IntervalIndex.from_tuples( + [(0, 1), (2, 3), (4, 5)], closed=closed) + assert idx.is_monotonic + assert idx._is_strictly_monotonic_increasing + assert not idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing + + # decreasing non-overlapping + idx = IntervalIndex.from_tuples( + [(4, 5), (2, 3), (1, 2)], closed=closed) + assert not idx.is_monotonic + assert not idx._is_strictly_monotonic_increasing + assert idx.is_monotonic_decreasing + assert idx._is_strictly_monotonic_decreasing + + # unordered non-overlapping + idx = IntervalIndex.from_tuples( + [(0, 1), (4, 5), (2, 3)], closed=closed) + assert not idx.is_monotonic + assert not idx._is_strictly_monotonic_increasing + assert not idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing + + # increasing overlapping + idx = IntervalIndex.from_tuples( + [(0, 2), (0.5, 2.5), (1, 3)], closed=closed) + assert idx.is_monotonic + assert idx._is_strictly_monotonic_increasing + assert not idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing + + # decreasing overlapping + idx = IntervalIndex.from_tuples( + [(1, 3), (0.5, 2.5), (0, 2)], closed=closed) + assert not idx.is_monotonic + assert not idx._is_strictly_monotonic_increasing + assert idx.is_monotonic_decreasing + assert idx._is_strictly_monotonic_decreasing + + # unordered overlapping + idx = IntervalIndex.from_tuples( + [(0.5, 2.5), (0, 2), (1, 3)], closed=closed) + assert not idx.is_monotonic + assert not idx._is_strictly_monotonic_increasing + assert not idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing + + # increasing overlapping shared endpoints + idx = pd.IntervalIndex.from_tuples( + [(1, 2), (1, 3), (2, 3)], closed=closed) + assert idx.is_monotonic + assert idx._is_strictly_monotonic_increasing + assert not idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing + + # decreasing overlapping shared endpoints + idx = pd.IntervalIndex.from_tuples( + [(2, 3), (1, 3), (1, 2)], closed=closed) + assert not idx.is_monotonic + assert not idx._is_strictly_monotonic_increasing + assert idx.is_monotonic_decreasing + assert idx._is_strictly_monotonic_decreasing + + # stationary + idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) + assert idx.is_monotonic + assert not idx._is_strictly_monotonic_increasing + assert idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing + + # empty + idx = IntervalIndex([], closed=closed) + assert idx.is_monotonic + assert idx._is_strictly_monotonic_increasing + assert idx.is_monotonic_decreasing + assert idx._is_strictly_monotonic_decreasing + + @pytest.mark.skip(reason='not a valid repr as we use interval notation') + def test_repr(self): + i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right') + expected = ("IntervalIndex(left=[0, 1]," + "\n right=[1, 2]," + "\n closed='right'," + "\n dtype='interval[int64]')") + assert repr(i) == expected + + i = IntervalIndex.from_tuples((Timestamp('20130101'), + Timestamp('20130102')), + (Timestamp('20130102'), + Timestamp('20130103')), + closed='right') + expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02']," + "\n right=['2013-01-02', '2013-01-03']," + "\n closed='right'," + "\n dtype='interval[datetime64[ns]]')") + assert repr(i) == expected + + @pytest.mark.skip(reason='not a valid repr as we use interval notation') + def test_repr_max_seq_item_setting(self): + super(TestIntervalIndex, self).test_repr_max_seq_item_setting() + + @pytest.mark.skip(reason='not a valid repr as we use interval notation') + def test_repr_roundtrip(self): + super(TestIntervalIndex, self).test_repr_roundtrip() + + # TODO: check this behavior is consistent with test_interval_new.py + def test_get_item(self, closed): + i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), + closed=closed) + assert i[0] == Interval(0.0, 1.0, closed=closed) + assert i[1] == Interval(1.0, 2.0, closed=closed) + assert isna(i[2]) + + result = i[0:1] + expected = IntervalIndex.from_arrays((0.,), (1.,), closed=closed) + tm.assert_index_equal(result, expected) + + result = i[0:2] + expected = IntervalIndex.from_arrays((0., 1), (1., 2.), closed=closed) + tm.assert_index_equal(result, expected) + + result = i[1:3] + expected = IntervalIndex.from_arrays((1., np.nan), (2., np.nan), + closed=closed) + tm.assert_index_equal(result, expected) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_get_loc_value(self): + pytest.raises(KeyError, self.index.get_loc, 0) + assert self.index.get_loc(0.5) == 0 + assert self.index.get_loc(1) == 0 + assert self.index.get_loc(1.5) == 1 + assert self.index.get_loc(2) == 1 + pytest.raises(KeyError, self.index.get_loc, -1) + pytest.raises(KeyError, self.index.get_loc, 3) + + idx = IntervalIndex.from_tuples([(0, 2), (1, 3)]) + assert idx.get_loc(0.5) == 0 + assert idx.get_loc(1) == 0 + tm.assert_numpy_array_equal(idx.get_loc(1.5), + np.array([0, 1], dtype='int64')) + tm.assert_numpy_array_equal(np.sort(idx.get_loc(2)), + np.array([0, 1], dtype='int64')) + assert idx.get_loc(3) == 1 + pytest.raises(KeyError, idx.get_loc, 3.5) + + idx = IntervalIndex.from_arrays([0, 2], [1, 3]) + pytest.raises(KeyError, idx.get_loc, 1.5) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def slice_locs_cases(self, breaks): + # TODO: same tests for more index types + index = IntervalIndex.from_breaks([0, 1, 2], closed='right') + assert index.slice_locs() == (0, 2) + assert index.slice_locs(0, 1) == (0, 1) + assert index.slice_locs(1, 1) == (0, 1) + assert index.slice_locs(0, 2) == (0, 2) + assert index.slice_locs(0.5, 1.5) == (0, 2) + assert index.slice_locs(0, 0.5) == (0, 1) + assert index.slice_locs(start=1) == (0, 2) + assert index.slice_locs(start=1.2) == (1, 2) + assert index.slice_locs(end=1) == (0, 1) + assert index.slice_locs(end=1.1) == (0, 2) + assert index.slice_locs(end=1.0) == (0, 1) + assert index.slice_locs(-1, -1) == (0, 0) + + index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') + assert index.slice_locs(0, 1) == (0, 1) + assert index.slice_locs(0, 2) == (0, 2) + assert index.slice_locs(0.5, 1.5) == (0, 2) + assert index.slice_locs(1, 1) == (1, 1) + assert index.slice_locs(1, 2) == (1, 2) + + index = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], + closed='both') + assert index.slice_locs(1, 1) == (0, 1) + assert index.slice_locs(1, 2) == (0, 2) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_slice_locs_int64(self): + self.slice_locs_cases([0, 1, 2]) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_slice_locs_float64(self): + self.slice_locs_cases([0.0, 1.0, 2.0]) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def slice_locs_decreasing_cases(self, tuples): + index = IntervalIndex.from_tuples(tuples) + assert index.slice_locs(1.5, 0.5) == (1, 3) + assert index.slice_locs(2, 0) == (1, 3) + assert index.slice_locs(2, 1) == (1, 3) + assert index.slice_locs(3, 1.1) == (0, 3) + assert index.slice_locs(3, 3) == (0, 2) + assert index.slice_locs(3.5, 3.3) == (0, 1) + assert index.slice_locs(1, -3) == (2, 3) + + slice_locs = index.slice_locs(-1, -1) + assert slice_locs[0] == slice_locs[1] + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_slice_locs_decreasing_int64(self): + self.slice_locs_cases([(2, 4), (1, 3), (0, 2)]) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_slice_locs_decreasing_float64(self): + self.slice_locs_cases([(2., 4.), (1., 3.), (0., 2.)]) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_slice_locs_fails(self): + index = IntervalIndex.from_tuples([(1, 2), (0, 1), (2, 3)]) + with pytest.raises(KeyError): + index.slice_locs(1, 2) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_get_loc_interval(self): + assert self.index.get_loc(Interval(0, 1)) == 0 + assert self.index.get_loc(Interval(0, 0.5)) == 0 + assert self.index.get_loc(Interval(0, 1, 'left')) == 0 + pytest.raises(KeyError, self.index.get_loc, Interval(2, 3)) + pytest.raises(KeyError, self.index.get_loc, + Interval(-1, 0, 'left')) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_get_indexer(self): + actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) + expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(self.index) + expected = np.array([0, 1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + index = IntervalIndex.from_breaks([0, 1, 2], closed='left') + actual = index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) + expected = np.array([-1, 0, 0, 1, 1, -1, -1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(index[:1]) + expected = np.array([0], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(index) + expected = np.array([-1, 1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_get_indexer_subintervals(self): + + # TODO: is this right? + # return indexers for wholly contained subintervals + target = IntervalIndex.from_breaks(np.linspace(0, 2, 5)) + actual = self.index.get_indexer(target) + expected = np.array([0, 0, 1, 1], dtype='p') + tm.assert_numpy_array_equal(actual, expected) + + target = IntervalIndex.from_breaks([0, 0.67, 1.33, 2]) + actual = self.index.get_indexer(target) + expected = np.array([0, 0, 1, 1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(target[[0, -1]]) + expected = np.array([0, 1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + target = IntervalIndex.from_breaks([0, 0.33, 0.67, 1], closed='left') + actual = self.index.get_indexer(target) + expected = np.array([0, 0, 0], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_contains(self): + # Only endpoints are valid. + i = IntervalIndex.from_arrays([0, 1], [1, 2]) + + # Invalid + assert 0 not in i + assert 1 not in i + assert 2 not in i + + # Valid + assert Interval(0, 1) in i + assert Interval(0, 2) in i + assert Interval(0, 0.5) in i + assert Interval(3, 5) not in i + assert Interval(-1, 0, closed='left') not in i + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def testcontains(self): + # can select values that are IN the range of a value + i = IntervalIndex.from_arrays([0, 1], [1, 2]) + + assert i.contains(0.1) + assert i.contains(0.5) + assert i.contains(1) + assert i.contains(Interval(0, 1)) + assert i.contains(Interval(0, 2)) + + # these overlaps completely + assert i.contains(Interval(0, 3)) + assert i.contains(Interval(1, 3)) + + assert not i.contains(20) + assert not i.contains(-20) + + def test_dropna(self, closed): + + expected = IntervalIndex.from_tuples( + [(0.0, 1.0), (1.0, 2.0)], closed=closed) + + ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) + result = ii.dropna() + tm.assert_index_equal(result, expected) + + ii = IntervalIndex.from_arrays( + [0, 1, np.nan], [1, 2, np.nan], closed=closed) + result = ii.dropna() + tm.assert_index_equal(result, expected) + + # TODO: check this behavior is consistent with test_interval_new.py + def test_non_contiguous(self, closed): + index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) + target = [0.5, 1.5, 2.5] + actual = index.get_indexer(target) + expected = np.array([0, -1, 1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + assert 1.5 not in index + + def test_union(self, closed): + index = self.create_index(closed=closed) + other = IntervalIndex.from_breaks(range(5, 13), closed=closed) + + expected = IntervalIndex.from_breaks(range(13), closed=closed) + result = index.union(other) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + tm.assert_index_equal(index.union(index), index) + tm.assert_index_equal(index.union(index[:1]), index) + + # GH 19101: empty result, same dtype + index = IntervalIndex(np.array([], dtype='int64'), closed=closed) + result = index.union(index) + tm.assert_index_equal(result, index) + + # GH 19101: empty result, different dtypes + other = IntervalIndex(np.array([], dtype='float64'), closed=closed) + result = index.union(other) + tm.assert_index_equal(result, index) + + def test_intersection(self, closed): + index = self.create_index(closed=closed) + other = IntervalIndex.from_breaks(range(5, 13), closed=closed) + + expected = IntervalIndex.from_breaks(range(5, 11), closed=closed) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + result = other.intersection(index) + tm.assert_index_equal(result, expected) + + tm.assert_index_equal(index.intersection(index), index) + + # GH 19101: empty result, same dtype + other = IntervalIndex.from_breaks(range(300, 314), closed=closed) + expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different dtypes + breaks = np.arange(300, 314, dtype='float64') + other = IntervalIndex.from_breaks(breaks, closed=closed) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + def test_difference(self, closed): + index = self.create_index(closed=closed) + tm.assert_index_equal(index.difference(index[:1]), index[1:]) + + # GH 19101: empty result, same dtype + result = index.difference(index) + expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays(index.left.astype('float64'), + index.right, closed=closed) + result = index.difference(other) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self, closed): + index = self.create_index(closed=closed) + result = index[1:].symmetric_difference(index[:-1]) + expected = IntervalIndex([index[0], index[-1]]) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, same dtype + result = index.symmetric_difference(index) + expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays(index.left.astype('float64'), + index.right, closed=closed) + result = index.symmetric_difference(other) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('op_name', [ + 'union', 'intersection', 'difference', 'symmetric_difference']) + def test_set_operation_errors(self, closed, op_name): + index = self.create_index(closed=closed) + set_op = getattr(index, op_name) + + # non-IntervalIndex + msg = ('the other index needs to be an IntervalIndex too, but ' + 'was type Int64Index') + with tm.assert_raises_regex(TypeError, msg): + set_op(Index([1, 2, 3])) + + # mixed closed + msg = ('can only do set operations between two IntervalIndex objects ' + 'that are closed on the same side') + for other_closed in {'right', 'left', 'both', 'neither'} - {closed}: + other = self.create_index(closed=other_closed) + with tm.assert_raises_regex(ValueError, msg): + set_op(other) + + # GH 19016: incompatible dtypes + other = interval_range(Timestamp('20180101'), periods=9, closed=closed) + msg = ('can only do {op} between two IntervalIndex objects that have ' + 'compatible dtypes').format(op=op_name) + with tm.assert_raises_regex(TypeError, msg): + set_op(other) + + def test_isin(self, closed): + index = self.create_index(closed=closed) + + expected = np.array([True] + [False] * (len(index) - 1)) + result = index.isin(index[:1]) + tm.assert_numpy_array_equal(result, expected) + + result = index.isin([index[0]]) + tm.assert_numpy_array_equal(result, expected) + + other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) + expected = np.array([True] * (len(index) - 1) + [False]) + result = index.isin(other) + tm.assert_numpy_array_equal(result, expected) + + result = index.isin(other.tolist()) + tm.assert_numpy_array_equal(result, expected) + + for other_closed in {'right', 'left', 'both', 'neither'}: + other = self.create_index(closed=other_closed) + expected = np.repeat(closed == other_closed, len(index)) + result = index.isin(other) + tm.assert_numpy_array_equal(result, expected) + + result = index.isin(other.tolist()) + tm.assert_numpy_array_equal(result, expected) + + def test_comparison(self): + actual = Interval(0, 1) < self.index + expected = np.array([False, True]) + tm.assert_numpy_array_equal(actual, expected) + + actual = Interval(0.5, 1.5) < self.index + expected = np.array([False, True]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index > Interval(0.5, 1.5) + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index + expected = np.array([True, True]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index <= self.index + tm.assert_numpy_array_equal(actual, expected) + actual = self.index >= self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index < self.index + expected = np.array([False, False]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index > self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == IntervalIndex.from_breaks([0, 1, 2], 'left') + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index.values == self.index + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index <= self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index != self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index > self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index.values > self.index + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + # invalid comparisons + actual = self.index == 0 + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index == self.index.left + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + with tm.assert_raises_regex(TypeError, 'unorderable types'): + self.index > 0 + with tm.assert_raises_regex(TypeError, 'unorderable types'): + self.index <= 0 + with pytest.raises(TypeError): + self.index > np.arange(2) + with pytest.raises(ValueError): + self.index > np.arange(3) + + def test_missing_values(self, closed): + idx = Index([np.nan, Interval(0, 1, closed=closed), + Interval(1, 2, closed=closed)]) + idx2 = IntervalIndex.from_arrays( + [np.nan, 0, 1], [np.nan, 1, 2], closed=closed) + assert idx.equals(idx2) + + with pytest.raises(ValueError): + IntervalIndex.from_arrays( + [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) + + tm.assert_numpy_array_equal(isna(idx), + np.array([True, False, False])) + + def test_sort_values(self, closed): + index = self.create_index(closed=closed) + + result = index.sort_values() + tm.assert_index_equal(result, index) + + result = index.sort_values(ascending=False) + tm.assert_index_equal(result, index[::-1]) + + # with nan + index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) + + result = index.sort_values() + expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) + tm.assert_index_equal(result, expected) + + result = index.sort_values(ascending=False) + expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('tz', [None, 'US/Eastern']) + def test_datetime(self, tz): + start = Timestamp('2000-01-01', tz=tz) + dates = date_range(start=start, periods=10) + index = IntervalIndex.from_breaks(dates) + + # test mid + start = Timestamp('2000-01-01T12:00', tz=tz) + expected = date_range(start=start, periods=9) + tm.assert_index_equal(index.mid, expected) + + # __contains__ doesn't check individual points + assert Timestamp('2000-01-01', tz=tz) not in index + assert Timestamp('2000-01-01T12', tz=tz) not in index + assert Timestamp('2000-01-02', tz=tz) not in index + iv_true = Interval(Timestamp('2000-01-01T08', tz=tz), + Timestamp('2000-01-01T18', tz=tz)) + iv_false = Interval(Timestamp('1999-12-31', tz=tz), + Timestamp('2000-01-01', tz=tz)) + assert iv_true in index + assert iv_false not in index + + # .contains does check individual points + assert not index.contains(Timestamp('2000-01-01', tz=tz)) + assert index.contains(Timestamp('2000-01-01T12', tz=tz)) + assert index.contains(Timestamp('2000-01-02', tz=tz)) + assert index.contains(iv_true) + assert not index.contains(iv_false) + + # test get_indexer + start = Timestamp('1999-12-31T12:00', tz=tz) + target = date_range(start=start, periods=7, freq='12H') + actual = index.get_indexer(target) + expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + start = Timestamp('2000-01-08T18:00', tz=tz) + target = date_range(start=start, periods=7, freq='6H') + actual = index.get_indexer(target) + expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + def test_append(self, closed): + + index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) + index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) + + result = index1.append(index2) + expected = IntervalIndex.from_arrays( + [0, 1, 1, 2], [1, 2, 2, 3], closed=closed) + tm.assert_index_equal(result, expected) + + result = index1.append([index1, index2]) + expected = IntervalIndex.from_arrays( + [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) + tm.assert_index_equal(result, expected) + + msg = ('can only append two IntervalIndex objects that are closed ' + 'on the same side') + for other_closed in {'left', 'right', 'both', 'neither'} - {closed}: + index_other_closed = IntervalIndex.from_arrays( + [0, 1], [1, 2], closed=other_closed) + with tm.assert_raises_regex(ValueError, msg): + index1.append(index_other_closed) + + def test_is_non_overlapping_monotonic(self, closed): + # Should be True in all cases + tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is True + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is True + + # Should be False in all cases (overlapping) + tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False in all cases (non-monotonic) + tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False for closed='both', otherwise True (GH16560) + if closed == 'both': + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is False + else: + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is True + + @pytest.mark.parametrize('tuples', [ + lzip(range(10), range(1, 11)), + lzip(date_range('20170101', periods=10), + date_range('20170101', periods=10)), + lzip(timedelta_range('0 days', periods=10), + timedelta_range('1 day', periods=10))]) + def test_to_tuples(self, tuples): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples() + expected = Index(com._asarray_tuplesafe(tuples)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('tuples', [ + lzip(range(10), range(1, 11)) + [np.nan], + lzip(date_range('20170101', periods=10), + date_range('20170101', periods=10)) + [np.nan], + lzip(timedelta_range('0 days', periods=10), + timedelta_range('1 day', periods=10)) + [np.nan]]) + @pytest.mark.parametrize('na_tuple', [True, False]) + def test_to_tuples_na(self, tuples, na_tuple): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples(na_tuple=na_tuple) + + # check the non-NA portion + expected_notna = Index(com._asarray_tuplesafe(tuples[:-1])) + result_notna = result[:-1] + tm.assert_index_equal(result_notna, expected_notna) + + # check the NA portion + result_na = result[-1] + if na_tuple: + assert isinstance(result_na, tuple) + assert len(result_na) == 2 + assert all(isna(x) for x in result_na) + else: + assert isna(result_na) diff --git a/pandas/tests/indexes/interval/test_interval_new.py b/pandas/tests/indexes/interval/test_interval_new.py new file mode 100644 index 0000000000000..a0d11db46d316 --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_new.py @@ -0,0 +1,315 @@ +from __future__ import division + +import pytest +import numpy as np + +from pandas import Interval, IntervalIndex, Int64Index +import pandas.util.testing as tm + + +pytestmark = pytest.mark.skip(reason="new indexing tests for issue 16316") + + +class TestIntervalIndex(object): + + def _compare_tuple_of_numpy_array(self, result, expected): + lidx, ridx = result + lidx_expected, ridx_expected = expected + + tm.assert_numpy_array_equal(lidx, lidx_expected) + tm.assert_numpy_array_equal(ridx, ridx_expected) + + @pytest.mark.parametrize("idx_side", ['right', 'left', 'both', 'neither']) + @pytest.mark.parametrize("side", ['right', 'left', 'both', 'neither']) + def test_get_loc_interval(self, idx_side, side): + + idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=idx_side) + + for bound in [[0, 1], [1, 2], [2, 3], [3, 4], + [0, 2], [2.5, 3], [-1, 4]]: + # if get_loc is supplied an interval, it should only search + # for exact matches, not overlaps or covers, else KeyError. + if idx_side == side: + if bound == [0, 1]: + assert idx.get_loc(Interval(0, 1, closed=side)) == 0 + elif bound == [2, 3]: + assert idx.get_loc(Interval(2, 3, closed=side)) == 1 + else: + with pytest.raises(KeyError): + idx.get_loc(Interval(*bound, closed=side)) + else: + with pytest.raises(KeyError): + idx.get_loc(Interval(*bound, closed=side)) + + @pytest.mark.parametrize("idx_side", ['right', 'left', 'both', 'neither']) + @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) + def test_get_loc_scalar(self, idx_side, scalar): + + # correct = {side: {query: answer}}. + # If query is not in the dict, that query should raise a KeyError + correct = {'right': {0.5: 0, 1: 0, 2.5: 1, 3: 1}, + 'left': {0: 0, 0.5: 0, 2: 1, 2.5: 1}, + 'both': {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1}, + 'neither': {0.5: 0, 2.5: 1}} + + idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=idx_side) + + # if get_loc is supplied a scalar, it should return the index of + # the interval which contains the scalar, or KeyError. + if scalar in correct[idx_side].keys(): + assert idx.get_loc(scalar) == correct[idx_side][scalar] + else: + pytest.raises(KeyError, idx.get_loc, scalar) + + def test_slice_locs_with_interval(self): + + # increasing monotonically + index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) + + assert index.slice_locs( + start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) + assert index.slice_locs(start=Interval(0, 2)) == (0, 3) + assert index.slice_locs(end=Interval(2, 4)) == (0, 3) + assert index.slice_locs(end=Interval(0, 2)) == (0, 1) + assert index.slice_locs( + start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) + + # decreasing monotonically + index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) + + assert index.slice_locs( + start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) + assert index.slice_locs(start=Interval(0, 2)) == (2, 3) + assert index.slice_locs(end=Interval(2, 4)) == (0, 1) + assert index.slice_locs(end=Interval(0, 2)) == (0, 3) + assert index.slice_locs( + start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) + + # sorted duplicates + index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) + + assert index.slice_locs( + start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) + assert index.slice_locs(start=Interval(0, 2)) == (0, 3) + assert index.slice_locs(end=Interval(2, 4)) == (0, 3) + assert index.slice_locs(end=Interval(0, 2)) == (0, 2) + assert index.slice_locs( + start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) + + # unsorted duplicates + index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) + + pytest.raises(KeyError, index.slice_locs( + start=Interval(0, 2), end=Interval(2, 4))) + pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2))) + assert index.slice_locs(end=Interval(2, 4)) == (0, 2) + pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2))) + pytest.raises(KeyError, index.slice_locs( + start=Interval(2, 4), end=Interval(0, 2))) + + # another unsorted duplicates + index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) + + assert index.slice_locs( + start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) + assert index.slice_locs(start=Interval(0, 2)) == (0, 4) + assert index.slice_locs(end=Interval(2, 4)) == (0, 3) + assert index.slice_locs(end=Interval(0, 2)) == (0, 2) + assert index.slice_locs( + start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) + + def test_slice_locs_with_ints_and_floats_succeeds(self): + + # increasing non-overlapping + index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) + + assert index.slice_locs(0, 1) == (0, 1) + assert index.slice_locs(0, 2) == (0, 2) + assert index.slice_locs(0, 3) == (0, 2) + assert index.slice_locs(3, 1) == (2, 1) + assert index.slice_locs(3, 4) == (2, 3) + assert index.slice_locs(0, 4) == (0, 3) + + # decreasing non-overlapping + index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)]) + assert index.slice_locs(0, 1) == (3, 2) + assert index.slice_locs(0, 2) == (3, 1) + assert index.slice_locs(0, 3) == (3, 1) + assert index.slice_locs(3, 1) == (1, 2) + assert index.slice_locs(3, 4) == (1, 0) + assert index.slice_locs(0, 4) == (3, 0) + + @pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], + [3, 1], [3, 4], [0, 4]]) + def test_slice_locs_with_ints_and_floats_fails(self, query): + + # increasing overlapping + index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) + pytest.raises(KeyError, index.slice_locs, query) + + # decreasing overlapping + index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) + pytest.raises(KeyError, index.slice_locs, query) + + # sorted duplicates + index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) + pytest.raises(KeyError, index.slice_locs, query) + + # unsorted duplicates + index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) + pytest.raises(KeyError, index.slice_locs, query) + + # another unsorted duplicates + index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) + pytest.raises(KeyError, index.slice_locs, query) + + @pytest.mark.parametrize("query", [ + Interval(1, 3, closed='right'), + Interval(1, 3, closed='left'), + Interval(1, 3, closed='both'), + Interval(1, 3, closed='neither'), + Interval(1, 4, closed='right'), + Interval(0, 4, closed='right'), + Interval(1, 2, closed='right')]) + @pytest.mark.parametrize("expected_result", [1, -1, -1, -1, -1, -1, -1]) + def test_get_indexer_with_interval_single_queries( + self, query, expected_result): + + index = IntervalIndex.from_tuples( + [(0, 2.5), (1, 3), (2, 4)], closed='right') + + result = index.get_indexer([query]) + expect = np.array([expected_result], dtype='intp') + tm.assert_numpy_array_equal(result, expect) + + @pytest.mark.parametrize("query", [ + [Interval(2, 4, closed='right'), Interval(1, 3, closed='right')], + [Interval(1, 3, closed='right'), Interval(0, 2, closed='right')], + [Interval(1, 3, closed='right'), Interval(1, 3, closed='left')]]) + @pytest.mark.parametrize("expected_result", [[2, 1], [1, -1], [1, -1]]) + def test_get_indexer_with_interval_multiple_queries( + self, query, expected_result): + + index = IntervalIndex.from_tuples( + [(0, 2.5), (1, 3), (2, 4)], closed='right') + + result = index.get_indexer(query) + expect = np.array(expected_result, dtype='intp') + tm.assert_numpy_array_equal(result, expect) + + @pytest.mark.parametrize( + "query", + [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]) + @pytest.mark.parametrize( + "expected_result", + [-1, -1, 0, 0, 1, 1, -1, -1, 2, 2, -1]) + def test_get_indexer_with_ints_and_floats_single_queries( + self, query, expected_result): + + index = IntervalIndex.from_tuples( + [(0, 1), (1, 2), (3, 4)], closed='right') + + result = index.get_indexer([query]) + expect = np.array([expected_result], dtype='intp') + tm.assert_numpy_array_equal(result, expect) + + @pytest.mark.parametrize( + "query", + [[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 2]]) + @pytest.mark.parametrize( + "expected_result", + [[0, 1], [0, 1, -1], [0, 1, -1, 2], [0, 1, -1, 2, 1]]) + def test_get_indexer_with_ints_and_floats_multiple_queries( + self, query, expected_result): + + index = IntervalIndex.from_tuples( + [(0, 1), (1, 2), (3, 4)], closed='right') + + result = index.get_indexer(query) + expect = np.array(expected_result, dtype='intp') + tm.assert_numpy_array_equal(result, expect) + + index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) + # TODO: @shoyer believes this should raise, master branch doesn't + + @pytest.mark.parametrize( + "query", + [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5]) + @pytest.mark.parametrize("expected_result", [ + (Int64Index([], dtype='int64'), np.array([0])), + (Int64Index([0], dtype='int64'), np.array([])), + (Int64Index([0], dtype='int64'), np.array([])), + (Int64Index([0, 1], dtype='int64'), np.array([])), + (Int64Index([0, 1], dtype='int64'), np.array([])), + (Int64Index([0, 1, 2], dtype='int64'), np.array([])), + (Int64Index([1, 2], dtype='int64'), np.array([])), + (Int64Index([2], dtype='int64'), np.array([])), + (Int64Index([2], dtype='int64'), np.array([])), + (Int64Index([], dtype='int64'), np.array([0])), + (Int64Index([], dtype='int64'), np.array([0]))]) + def test_get_indexer_non_unique_with_ints_and_floats_single_queries( + self, query, expected_result): + + index = IntervalIndex.from_tuples( + [(0, 2.5), (1, 3), (2, 4)], closed='left') + + result = index.get_indexer_non_unique([query]) + tm.assert_numpy_array_equal(result, expected_result) + + @pytest.mark.parametrize( + "query", + [[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 2]]) + @pytest.mark.parametrize("expected_result", [ + (Int64Index([0, 1, 0, 1, 2], dtype='int64'), np.array([])), + (Int64Index([0, 1, 0, 1, 2, 2], dtype='int64'), np.array([])), + (Int64Index([0, 1, 0, 1, 2, 2, -1], dtype='int64'), np.array([3])), + (Int64Index([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], dtype='int64'), + np.array([3]))]) + def test_get_indexer_non_unique_with_ints_and_floats_multiple_queries( + self, query, expected_result): + + index = IntervalIndex.from_tuples( + [(0, 2.5), (1, 3), (2, 4)], closed='left') + + result = index.get_indexer_non_unique(query) + tm.assert_numpy_array_equal(result, expected_result) + + # TODO we may also want to test get_indexer for the case when + # the intervals are duplicated, decreasing, non-monotonic, etc.. + + def test_contains(self): + + index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right') + + # __contains__ requires perfect matches to intervals. + assert 0 not in index + assert 1 not in index + assert 2 not in index + + assert Interval(0, 1, closed='right') in index + assert Interval(0, 2, closed='right') not in index + assert Interval(0, 0.5, closed='right') not in index + assert Interval(3, 5, closed='right') not in index + assert Interval(-1, 0, closed='left') not in index + assert Interval(0, 1, closed='left') not in index + assert Interval(0, 1, closed='both') not in index + + def test_contains_method(self): + + index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right') + + assert not index.contains(0) + assert index.contains(0.1) + assert index.contains(0.5) + assert index.contains(1) + + assert index.contains(Interval(0, 1), closed='right') + assert not index.contains(Interval(0, 1), closed='left') + assert not index.contains(Interval(0, 1), closed='both') + assert not index.contains(Interval(0, 2), closed='right') + + assert not index.contains(Interval(0, 3), closed='right') + assert not index.contains(Interval(1, 3), closed='right') + + assert not index.contains(20) + assert not index.contains(-20) diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py new file mode 100644 index 0000000000000..203e8e3128edc --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -0,0 +1,301 @@ +from __future__ import division + +import pytest +import numpy as np +from datetime import timedelta +from pandas import ( + Interval, IntervalIndex, Timestamp, Timedelta, DateOffset, + interval_range, date_range, timedelta_range) +from pandas.tseries.offsets import Day +import pandas.util.testing as tm +import pandas as pd + + +@pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither']) +def closed(request): + return request.param + + +@pytest.fixture(scope='class', params=[None, 'foo']) +def name(request): + return request.param + + +class TestIntervalRange(object): + + def test_construction_from_numeric(self, closed, name): + # combinations of start/end/periods without freq + expected = IntervalIndex.from_breaks( + np.arange(0, 6), name=name, closed=closed) + + result = interval_range(start=0, end=5, name=name, closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=0, periods=5, name=name, closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=5, periods=5, name=name, closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with freq + expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], + name=name, closed=closed) + + result = interval_range(start=0, end=6, freq=2, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=0, periods=3, freq=2, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=6, periods=3, freq=2, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], + name=name, closed=closed) + result = interval_range(start=0, end=4, freq=1.5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('tz', [None, 'US/Eastern']) + def test_construction_from_timestamp(self, closed, name, tz): + # combinations of start/end/periods without freq + start = Timestamp('2017-01-01', tz=tz) + end = Timestamp('2017-01-06', tz=tz) + breaks = date_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start = Timestamp('2017-01-01', tz=tz) + end = Timestamp('2017-01-07', tz=tz) + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2017-01-08', tz=tz) + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with non-fixed freq + freq = 'M' + start = Timestamp('2017-01-01', tz=tz) + end = Timestamp('2017-12-31', tz=tz) + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=11, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=11, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2018-01-15', tz=tz) + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + def test_construction_from_timedelta(self, closed, name): + # combinations of start/end/periods without freq + start, end = Timedelta('1 day'), Timedelta('6 days') + breaks = timedelta_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start, end = Timedelta('1 day'), Timedelta('7 days') + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timedelta('7 days 1 hour') + result = interval_range(start=start, end=end, freq=freq, name=name, + closed=closed) + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + # float value for periods + expected = pd.interval_range(start=0, periods=10) + result = pd.interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + + # equivalent timestamp-like start/end + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pydatetime(), + end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timestamp + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), + DateOffset(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pytimedelta(), + end=end.to_pytimedelta()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timedelta + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0) + + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=5) + + with tm.assert_raises_regex(ValueError, msg): + interval_range(periods=2) + + with tm.assert_raises_regex(ValueError, msg): + interval_range() + + # too many params + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=5, periods=6) + + # mixed units + msg = 'start, end, freq need to be type compatible' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timestamp('20130101'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timedelta('1 day'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timedelta('1 day'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timestamp('20130110'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timestamp('20130110'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timedelta('10 days'), freq=2) + + # invalid periods + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, periods='foo') + + # invalid start + msg = 'start must be numeric or datetime-like, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start='foo', periods=10) + + # invalid end + msg = r'end must be numeric or datetime-like, got \(0, 1\]' + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Interval(0, 1), periods=10) + + # invalid freq for datetime-like + msg = 'freq must be numeric or convertible to DateOffset, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=Timestamp('20130101'), periods=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Timedelta('1 day'), periods=10, freq='foo') + + # mixed tz + start = Timestamp('2017-01-01', tz='US/Eastern') + end = Timestamp('2017-01-07', tz='US/Pacific') + msg = 'Start and end cannot both be tz-aware with different timezones' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=start, end=end) diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py new file mode 100644 index 0000000000000..343131125f640 --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -0,0 +1,93 @@ +from __future__ import division + +import pytest +import numpy as np +from pandas import compat +from pandas._libs.interval import IntervalTree +import pandas.util.testing as tm + + +@pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither']) +def closed(request): + return request.param + + +class TestIntervalTree(object): + def setup_method(self, method): + def gentree(dtype): + left = np.arange(5, dtype=dtype) + right = left + 2 + return IntervalTree(left, right) + + self.tree = gentree('int64') + self.trees = {dtype: gentree(dtype) + for dtype in ['int32', 'int64', 'float32', 'float64']} + + def test_get_loc(self): + for dtype, tree in self.trees.items(): + tm.assert_numpy_array_equal(tree.get_loc(1), + np.array([0], dtype='int64')) + tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)), + np.array([0, 1], dtype='int64')) + with pytest.raises(KeyError): + tree.get_loc(-1) + + def test_get_indexer(self): + for dtype, tree in self.trees.items(): + tm.assert_numpy_array_equal( + tree.get_indexer(np.array([1.0, 5.5, 6.5])), + np.array([0, 4, -1], dtype='int64')) + with pytest.raises(KeyError): + tree.get_indexer(np.array([3.0])) + + def test_get_indexer_non_unique(self): + indexer, missing = self.tree.get_indexer_non_unique( + np.array([1.0, 2.0, 6.5])) + tm.assert_numpy_array_equal(indexer[:1], + np.array([0], dtype='int64')) + tm.assert_numpy_array_equal(np.sort(indexer[1:3]), + np.array([0, 1], dtype='int64')) + tm.assert_numpy_array_equal(np.sort(indexer[3:]), + np.array([-1], dtype='int64')) + tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64')) + + def test_duplicates(self): + tree = IntervalTree([0, 0, 0], [1, 1, 1]) + tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)), + np.array([0, 1, 2], dtype='int64')) + + with pytest.raises(KeyError): + tree.get_indexer(np.array([0.5])) + + indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) + tm.assert_numpy_array_equal(np.sort(indexer), + np.array([0, 1, 2], dtype='int64')) + tm.assert_numpy_array_equal(missing, np.array([], dtype='int64')) + + def test_get_loc_closed(self, closed): + tree = IntervalTree([0], [1], closed=closed) + for p, errors in [(0, tree.open_left), + (1, tree.open_right)]: + if errors: + with pytest.raises(KeyError): + tree.get_loc(p) + else: + tm.assert_numpy_array_equal(tree.get_loc(p), + np.array([0], dtype='int64')) + + @pytest.mark.skipif(compat.is_platform_32bit(), + reason="int type mismatch on 32bit") + @pytest.mark.parametrize('leaf_size', [1, 10, 100, 10000]) + def test_get_indexer_closed(self, closed, leaf_size): + x = np.arange(1000, dtype='float64') + found = x.astype('intp') + not_found = (-1 * np.ones(1000)).astype('intp') + + tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) + tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) + + expected = found if tree.closed_left else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) + + expected = found if tree.closed_right else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py new file mode 100644 index 0000000000000..c75fdd35a974c --- /dev/null +++ b/pandas/tests/indexes/period/test_arithmetic.py @@ -0,0 +1,885 @@ +# -*- coding: utf-8 -*- +from datetime import timedelta +import operator + +import pytest +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (Timedelta, + period_range, Period, PeriodIndex, + _np_version_under1p10) +import pandas.core.indexes.period as period +from pandas.core import ops +from pandas.errors import PerformanceWarning + + +_common_mismatch = [pd.offsets.YearBegin(2), + pd.offsets.MonthBegin(1), + pd.offsets.Minute()] + + +@pytest.fixture(params=[timedelta(minutes=30), + np.timedelta64(30, 's'), + Timedelta(seconds=30)] + _common_mismatch) +def not_hourly(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Hourly frequencies. + """ + return request.param + + +@pytest.fixture(params=[np.timedelta64(4, 'h'), + timedelta(hours=23), + Timedelta('23:00:00')] + _common_mismatch) +def not_daily(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Daily frequencies. + """ + return request.param + + +@pytest.fixture(params=[np.timedelta64(365, 'D'), + timedelta(365), + Timedelta(days=365)] + _common_mismatch) +def mismatched(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Monthly or Annual frequencies. + """ + return request.param + + +@pytest.fixture(params=[pd.offsets.Day(3), + timedelta(days=3), + np.timedelta64(3, 'D'), + pd.offsets.Hour(72), + timedelta(minutes=60 * 24 * 3), + np.timedelta64(72, 'h'), + Timedelta('72:00:00')]) +def three_days(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 3-day timedelta + """ + return request.param + + +@pytest.fixture(params=[pd.offsets.Hour(2), + timedelta(hours=2), + np.timedelta64(2, 'h'), + pd.offsets.Minute(120), + timedelta(minutes=120), + np.timedelta64(120, 'm')]) +def two_hours(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 2-hour timedelta + """ + return request.param + + +class TestPeriodIndexComparisons(object): + def test_pi_cmp_period(self): + idx = period_range('2007-01', periods=20, freq='M') + + result = idx < idx[10] + exp = idx.values < idx.values[10] + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_pi_cmp_pi(self, freq): + base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq=freq) + per = Period('2011-02', freq=freq) + + exp = np.array([False, True, False, False]) + tm.assert_numpy_array_equal(base == per, exp) + tm.assert_numpy_array_equal(per == base, exp) + + exp = np.array([True, False, True, True]) + tm.assert_numpy_array_equal(base != per, exp) + tm.assert_numpy_array_equal(per != base, exp) + + exp = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(base > per, exp) + tm.assert_numpy_array_equal(per < base, exp) + + exp = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(base < per, exp) + tm.assert_numpy_array_equal(per > base, exp) + + exp = np.array([False, True, True, True]) + tm.assert_numpy_array_equal(base >= per, exp) + tm.assert_numpy_array_equal(per <= base, exp) + + exp = np.array([True, True, False, False]) + tm.assert_numpy_array_equal(base <= per, exp) + tm.assert_numpy_array_equal(per >= base, exp) + + idx = PeriodIndex(['2011-02', '2011-01', '2011-03', '2011-05'], + freq=freq) + + exp = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(base == idx, exp) + + exp = np.array([True, True, False, True]) + tm.assert_numpy_array_equal(base != idx, exp) + + exp = np.array([False, True, False, False]) + tm.assert_numpy_array_equal(base > idx, exp) + + exp = np.array([True, False, False, True]) + tm.assert_numpy_array_equal(base < idx, exp) + + exp = np.array([False, True, True, False]) + tm.assert_numpy_array_equal(base >= idx, exp) + + exp = np.array([True, False, True, True]) + tm.assert_numpy_array_equal(base <= idx, exp) + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_pi_cmp_pi_mismatched_freq_raises(self, freq): + # different base freq + base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq=freq) + + msg = "Input has different freq=A-DEC from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='A') + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + Period('2011', freq='A') >= base + + idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + base <= idx + + # Different frequency + msg = "Input has different freq=4M from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='4M') + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + Period('2011', freq='4M') >= base + + idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + base <= idx + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_pi_cmp_nat(self, freq): + idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) + + result = idx1 > Period('2011-02', freq=freq) + exp = np.array([False, False, False, True]) + tm.assert_numpy_array_equal(result, exp) + result = Period('2011-02', freq=freq) < idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == Period('NaT', freq=freq) + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + result = Period('NaT', freq=freq) == idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != Period('NaT', freq=freq) + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + result = Period('NaT', freq=freq) != idx1 + tm.assert_numpy_array_equal(result, exp) + + idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq=freq) + result = idx1 < idx2 + exp = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx2 + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx2 + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx1 + exp = np.array([True, True, False, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx1 + exp = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_pi_cmp_nat_mismatched_freq_raises(self, freq): + idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) + + diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M') + msg = "Input has different freq=4M from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + idx1 > diff + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + idx1 == diff + + # TODO: De-duplicate with test_pi_cmp_nat + @pytest.mark.parametrize('dtype', [object, None]) + def test_comp_nat(self, dtype): + left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT, + pd.Period('2011-01-03')]) + right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')]) + + if dtype is not None: + left = left.astype(dtype) + right = right.astype(dtype) + + result = left == right + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = left != right + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == right, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(left != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != left, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > left, expected) + + +class TestPeriodIndexArithmetic(object): + + # ------------------------------------------------------------- + # Invalid Operations + + @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])]) + @pytest.mark.parametrize('op', [operator.add, ops.radd, + operator.sub, ops.rsub]) + def test_pi_add_sub_float(self, op, other): + dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + pi = dti.to_period('D') + with pytest.raises(TypeError): + op(pi, other) + + # ----------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_pi_add_sub_dt64_array_raises(self): + rng = pd.period_range('1/1/2000', freq='D', periods=3) + dti = pd.date_range('2016-01-01', periods=3) + dtarr = dti.values + + with pytest.raises(TypeError): + rng + dtarr + with pytest.raises(TypeError): + dtarr + rng + + with pytest.raises(TypeError): + rng - dtarr + with pytest.raises(TypeError): + dtarr - rng + + def test_pi_add_sub_td64_array_non_tick_raises(self): + rng = pd.period_range('1/1/2000', freq='Q', periods=3) + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + with pytest.raises(period.IncompatibleFrequency): + rng + tdarr + with pytest.raises(period.IncompatibleFrequency): + tdarr + rng + + with pytest.raises(period.IncompatibleFrequency): + rng - tdarr + with pytest.raises(period.IncompatibleFrequency): + tdarr - rng + + @pytest.mark.xfail(reason='op with TimedeltaIndex raises, with ndarray OK') + def test_pi_add_sub_td64_array_tick(self): + rng = pd.period_range('1/1/2000', freq='Q', periods=3) + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = rng + tdi + result = rng + tdarr + tm.assert_index_equal(result, expected) + result = tdarr + rng + tm.assert_index_equal(result, expected) + + expected = rng - tdi + result = rng - tdarr + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + tdarr - rng + + # ----------------------------------------------------------------- + # operations with array/Index of DateOffset objects + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_pi_add_offset_array(self, box): + # GH#18849 + pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')]) + offs = box([pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12)]) + expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi + offs + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = offs + pi + tm.assert_index_equal(res2, expected) + + unanchored = np.array([pd.offsets.Hour(n=1), + pd.offsets.Minute(n=-2)]) + # addition/subtraction ops with incompatible offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(period.IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + pi + unanchored + with pytest.raises(period.IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + unanchored + pi + + @pytest.mark.parametrize('box', [np.array, pd.Index]) + def test_pi_sub_offset_array(self, box): + # GH#18824 + pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')]) + other = box([pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12)]) + + expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi - other + tm.assert_index_equal(res, expected) + + anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(period.IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + pi - anchored + with pytest.raises(period.IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + anchored - pi + + def test_pi_add_iadd_pi_raises(self): + rng = pd.period_range('1/1/2000', freq='D', periods=5) + other = pd.period_range('1/6/2000', freq='D', periods=5) + + # previously performed setop union, now raises TypeError (GH14164) + with pytest.raises(TypeError): + rng + other + + with pytest.raises(TypeError): + rng += other + + def test_pi_add_iadd_int(self, one): + # Variants of `one` for #19012 + rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) + result = rng + one + expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng += one + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_int(self, one): + """ + PeriodIndex.__sub__ and __isub__ with several representations of + the integer 1, e.g. int, long, np.int64, np.uint8, ... + """ + rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) + result = rng - one + expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng -= one + tm.assert_index_equal(rng, expected) + + @pytest.mark.parametrize('five', [5, np.array(5, dtype=np.int64)]) + def test_pi_sub_intlike(self, five): + rng = period_range('2007-01', periods=50) + + result = rng - five + exp = rng + (-five) + tm.assert_index_equal(result, exp) + + def test_pi_sub_isub_pi_raises(self): + # previously performed setop, now raises TypeError (GH14164) + # TODO needs to wait on #13077 for decision on result type + rng = pd.period_range('1/1/2000', freq='D', periods=5) + other = pd.period_range('1/6/2000', freq='D', periods=5) + + with pytest.raises(TypeError): + rng - other + + with pytest.raises(TypeError): + rng -= other + + def test_pi_sub_isub_offset(self): + # offset + # DateOffset + rng = pd.period_range('2014', '2024', freq='A') + result = rng - pd.offsets.YearEnd(5) + expected = pd.period_range('2009', '2019', freq='A') + tm.assert_index_equal(result, expected) + rng -= pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + rng = pd.period_range('2014-01', '2016-12', freq='M') + result = rng - pd.offsets.MonthEnd(5) + expected = pd.period_range('2013-08', '2016-07', freq='M') + tm.assert_index_equal(result, expected) + + rng -= pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + # --------------------------------------------------------------- + # Timedelta-like (timedelta, timedelta64, Timedelta, Tick) + # TODO: Some of these are misnomers because of non-Tick DateOffsets + + def test_pi_add_iadd_timedeltalike_daily(self, three_days): + # Tick + other = three_days + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + expected = pd.period_range('2014-05-04', '2014-05-18', freq='D') + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_timedeltalike_daily(self, three_days): + # Tick-like 3 Days + other = three_days + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + expected = pd.period_range('2014-04-28', '2014-05-12', freq='D') + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_pi_add_iadd_timedeltalike_freq_mismatch_daily(self, not_daily): + other = not_daily + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng + other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng += other + + def test_pi_sub_timedeltalike_freq_mismatch_daily(self, not_daily): + other = not_daily + rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng - other + + def test_pi_add_iadd_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + expected = pd.period_range('2014-01-01 12:00', '2014-01-05 12:00', + freq='H') + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_pi_add_timedeltalike_mismatched_freq_hourly(self, not_hourly): + other = not_hourly + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng + other + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng += other + + def test_pi_sub_isub_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') + expected = pd.period_range('2014-01-01 08:00', '2014-01-05 08:00', + freq='H') + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_add_iadd_timedeltalike_annual(self): + # offset + # DateOffset + rng = pd.period_range('2014', '2024', freq='A') + result = rng + pd.offsets.YearEnd(5) + expected = pd.period_range('2019', '2029', freq='A') + tm.assert_index_equal(result, expected) + rng += pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_iadd_timedeltalike_freq_mismatch_annual(self, mismatched): + other = mismatched + rng = pd.period_range('2014', '2024', freq='A') + msg = ('Input has different freq(=.+)? ' + 'from PeriodIndex\\(freq=A-DEC\\)') + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng + other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng += other + + def test_pi_sub_isub_timedeltalike_freq_mismatch_annual(self, mismatched): + other = mismatched + rng = pd.period_range('2014', '2024', freq='A') + msg = ('Input has different freq(=.+)? ' + 'from PeriodIndex\\(freq=A-DEC\\)') + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng - other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng -= other + + def test_pi_add_iadd_timedeltalike_M(self): + rng = pd.period_range('2014-01', '2016-12', freq='M') + expected = pd.period_range('2014-06', '2017-05', freq='M') + + result = rng + pd.offsets.MonthEnd(5) + tm.assert_index_equal(result, expected) + + rng += pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_iadd_timedeltalike_freq_mismatch_monthly(self, mismatched): + other = mismatched + rng = pd.period_range('2014-01', '2016-12', freq='M') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng + other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng += other + + def test_pi_sub_isub_timedeltalike_freq_mismatch_monthly(self, mismatched): + other = mismatched + rng = pd.period_range('2014-01', '2016-12', freq='M') + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng - other + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + rng -= other + + # --------------------------------------------------------------- + # PeriodIndex.shift is used by __add__ and __sub__ + + def test_pi_shift_ndarray(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') + result = idx.shift(np.array([1, 2, 3, 4])) + expected = PeriodIndex(['2011-02', '2011-04', 'NaT', '2011-08'], + freq='M', name='idx') + tm.assert_index_equal(result, expected) + + result = idx.shift(np.array([1, -2, 3, -4])) + expected = PeriodIndex(['2011-02', '2010-12', 'NaT', '2010-12'], + freq='M', name='idx') + tm.assert_index_equal(result, expected) + + def test_shift(self): + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') + + tm.assert_index_equal(pi1.shift(0), pi1) + + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + def test_shift_corner_cases(self): + # GH#9903 + idx = pd.PeriodIndex([], name='xxx', freq='H') + + with pytest.raises(TypeError): + # period shift doesn't accept freq + idx.shift(1, freq='H') + + tm.assert_index_equal(idx.shift(0), idx) + tm.assert_index_equal(idx.shift(3), idx) + + idx = pd.PeriodIndex(['2011-01-01 10:00', '2011-01-01 11:00' + '2011-01-01 12:00'], name='xxx', freq='H') + tm.assert_index_equal(idx.shift(0), idx) + exp = pd.PeriodIndex(['2011-01-01 13:00', '2011-01-01 14:00' + '2011-01-01 15:00'], name='xxx', freq='H') + tm.assert_index_equal(idx.shift(3), exp) + exp = pd.PeriodIndex(['2011-01-01 07:00', '2011-01-01 08:00' + '2011-01-01 09:00'], name='xxx', freq='H') + tm.assert_index_equal(idx.shift(-3), exp) + + def test_shift_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') + result = idx.shift(1) + expected = PeriodIndex(['2011-02', '2011-03', 'NaT', '2011-05'], + freq='M', name='idx') + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_shift_gh8083(self): + # test shift for PeriodIndex + # GH#8083 + drange = pd.period_range('20130101', periods=5, freq='D') + result = drange.shift(1) + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') + tm.assert_index_equal(result, expected) + + +class TestPeriodIndexSeriesMethods(object): + """ Test PeriodIndex and Period Series Ops consistency """ + + def _check(self, values, func, expected): + idx = pd.PeriodIndex(values) + result = func(idx) + if isinstance(expected, pd.Index): + tm.assert_index_equal(result, expected) + else: + # comp op results in bool + tm.assert_numpy_array_equal(result, expected) + + ser = pd.Series(values) + result = func(ser) + + exp = pd.Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_ops(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq='M', name='idx') + + expected = PeriodIndex(['2011-03', '2011-04', '2011-05', '2011-06'], + freq='M', name='idx') + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + + self._check(idx + 2, lambda x: x - 2, idx) + result = idx - Period('2011-01', freq='M') + exp = pd.Index([0, 1, 2, 3], name='idx') + tm.assert_index_equal(result, exp) + + result = Period('2011-01', freq='M') - idx + exp = pd.Index([0, -1, -2, -3], name='idx') + tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize('ng', ["str", 1.5]) + def test_pi_ops_errors(self, ng): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq='M', name='idx') + ser = pd.Series(idx) + + msg = r"unsupported operand type\(s\)" + + for obj in [idx, ser]: + with tm.assert_raises_regex(TypeError, msg): + obj + ng + + with pytest.raises(TypeError): + # error message differs between PY2 and 3 + ng + obj + + with tm.assert_raises_regex(TypeError, msg): + obj - ng + + with pytest.raises(TypeError): + np.add(obj, ng) + + if _np_version_under1p10: + assert np.add(ng, obj) is NotImplemented + else: + with pytest.raises(TypeError): + np.add(ng, obj) + + with pytest.raises(TypeError): + np.subtract(obj, ng) + + if _np_version_under1p10: + assert np.subtract(ng, obj) is NotImplemented + else: + with pytest.raises(TypeError): + np.subtract(ng, obj) + + def test_pi_ops_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') + expected = PeriodIndex(['2011-03', '2011-04', 'NaT', '2011-06'], + freq='M', name='idx') + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + self._check(idx, lambda x: np.add(x, 2), expected) + + self._check(idx + 2, lambda x: x - 2, idx) + self._check(idx + 2, lambda x: np.subtract(x, 2), idx) + + # freq with mult + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='2M', name='idx') + expected = PeriodIndex(['2011-07', '2011-08', 'NaT', '2011-10'], + freq='2M', name='idx') + self._check(idx, lambda x: x + 3, expected) + self._check(idx, lambda x: 3 + x, expected) + self._check(idx, lambda x: np.add(x, 3), expected) + + self._check(idx + 3, lambda x: x - 3, idx) + self._check(idx + 3, lambda x: np.subtract(x, 3), idx) + + def test_pi_ops_array_int(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') + f = lambda x: x + np.array([1, 2, 3, 4]) + exp = PeriodIndex(['2011-02', '2011-04', 'NaT', '2011-08'], + freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: np.add(x, np.array([4, -1, 1, 2])) + exp = PeriodIndex(['2011-05', '2011-01', 'NaT', '2011-06'], + freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: x - np.array([1, 2, 3, 4]) + exp = PeriodIndex(['2010-12', '2010-12', 'NaT', '2010-12'], + freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) + exp = PeriodIndex(['2010-10', '2010-12', 'NaT', '2011-06'], + freq='M', name='idx') + self._check(idx, f, exp) + + def test_pi_ops_offset(self): + idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', + '2011-04-01'], freq='D', name='idx') + f = lambda x: x + pd.offsets.Day() + exp = PeriodIndex(['2011-01-02', '2011-02-02', '2011-03-02', + '2011-04-02'], freq='D', name='idx') + self._check(idx, f, exp) + + f = lambda x: x + pd.offsets.Day(2) + exp = PeriodIndex(['2011-01-03', '2011-02-03', '2011-03-03', + '2011-04-03'], freq='D', name='idx') + self._check(idx, f, exp) + + f = lambda x: x - pd.offsets.Day(2) + exp = PeriodIndex(['2010-12-30', '2011-01-30', '2011-02-27', + '2011-03-30'], freq='D', name='idx') + self._check(idx, f, exp) + + def test_pi_offset_errors(self): + idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', + '2011-04-01'], freq='D', name='idx') + ser = pd.Series(idx) + + # Series op is applied per Period instance, thus error is raised + # from Period + msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" + msg_s = r"Input cannot be converted to Period\(freq=D\)" + for obj, msg in [(idx, msg_idx), (ser, msg_s)]: + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + obj + pd.offsets.Hour(2) + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + pd.offsets.Hour(2) + obj + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + obj - pd.offsets.Hour(2) + + def test_pi_sub_period(self): + # GH 13071 + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], + freq='M', name='idx') + + result = idx - pd.Period('2012-01', freq='M') + exp = pd.Index([-12, -11, -10, -9], name='idx') + tm.assert_index_equal(result, exp) + + result = np.subtract(idx, pd.Period('2012-01', freq='M')) + tm.assert_index_equal(result, exp) + + result = pd.Period('2012-01', freq='M') - idx + exp = pd.Index([12, 11, 10, 9], name='idx') + tm.assert_index_equal(result, exp) + + result = np.subtract(pd.Period('2012-01', freq='M'), idx) + if _np_version_under1p10: + assert result is NotImplemented + else: + tm.assert_index_equal(result, exp) + + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') + tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) + tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) + + def test_pi_sub_pdnat(self): + # GH 13071 + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], + freq='M', name='idx') + exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx') + tm.assert_index_equal(pd.NaT - idx, exp) + tm.assert_index_equal(idx - pd.NaT, exp) + + def test_pi_sub_period_nat(self): + # GH 13071 + idx = PeriodIndex(['2011-01', 'NaT', '2011-03', '2011-04'], + freq='M', name='idx') + + result = idx - pd.Period('2012-01', freq='M') + exp = pd.Index([-12, np.nan, -10, -9], name='idx') + tm.assert_index_equal(result, exp) + + result = pd.Period('2012-01', freq='M') - idx + exp = pd.Index([12, np.nan, 10, 9], name='idx') + tm.assert_index_equal(result, exp) + + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') + tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) + tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py index 96e3d0bbd8abc..ea59a57069faa 100644 --- a/pandas/tests/indexes/period/test_asfreq.py +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -1,14 +1,12 @@ -import numpy as np +import pytest +import numpy as np import pandas as pd from pandas.util import testing as tm from pandas import PeriodIndex, Series, DataFrame -class TestPeriodIndex(tm.TestCase): - - def setUp(self): - pass +class TestPeriodIndex(object): def test_asfreq(self): pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') @@ -19,64 +17,64 @@ def test_asfreq(self): pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00') pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') - self.assertEqual(pi1.asfreq('Q', 'S'), pi2) - self.assertEqual(pi1.asfreq('Q', 's'), pi2) - self.assertEqual(pi1.asfreq('M', 'start'), pi3) - self.assertEqual(pi1.asfreq('D', 'StarT'), pi4) - self.assertEqual(pi1.asfreq('H', 'beGIN'), pi5) - self.assertEqual(pi1.asfreq('Min', 'S'), pi6) - self.assertEqual(pi1.asfreq('S', 'S'), pi7) - - self.assertEqual(pi2.asfreq('A', 'S'), pi1) - self.assertEqual(pi2.asfreq('M', 'S'), pi3) - self.assertEqual(pi2.asfreq('D', 'S'), pi4) - self.assertEqual(pi2.asfreq('H', 'S'), pi5) - self.assertEqual(pi2.asfreq('Min', 'S'), pi6) - self.assertEqual(pi2.asfreq('S', 'S'), pi7) - - self.assertEqual(pi3.asfreq('A', 'S'), pi1) - self.assertEqual(pi3.asfreq('Q', 'S'), pi2) - self.assertEqual(pi3.asfreq('D', 'S'), pi4) - self.assertEqual(pi3.asfreq('H', 'S'), pi5) - self.assertEqual(pi3.asfreq('Min', 'S'), pi6) - self.assertEqual(pi3.asfreq('S', 'S'), pi7) - - self.assertEqual(pi4.asfreq('A', 'S'), pi1) - self.assertEqual(pi4.asfreq('Q', 'S'), pi2) - self.assertEqual(pi4.asfreq('M', 'S'), pi3) - self.assertEqual(pi4.asfreq('H', 'S'), pi5) - self.assertEqual(pi4.asfreq('Min', 'S'), pi6) - self.assertEqual(pi4.asfreq('S', 'S'), pi7) - - self.assertEqual(pi5.asfreq('A', 'S'), pi1) - self.assertEqual(pi5.asfreq('Q', 'S'), pi2) - self.assertEqual(pi5.asfreq('M', 'S'), pi3) - self.assertEqual(pi5.asfreq('D', 'S'), pi4) - self.assertEqual(pi5.asfreq('Min', 'S'), pi6) - self.assertEqual(pi5.asfreq('S', 'S'), pi7) - - self.assertEqual(pi6.asfreq('A', 'S'), pi1) - self.assertEqual(pi6.asfreq('Q', 'S'), pi2) - self.assertEqual(pi6.asfreq('M', 'S'), pi3) - self.assertEqual(pi6.asfreq('D', 'S'), pi4) - self.assertEqual(pi6.asfreq('H', 'S'), pi5) - self.assertEqual(pi6.asfreq('S', 'S'), pi7) - - self.assertEqual(pi7.asfreq('A', 'S'), pi1) - self.assertEqual(pi7.asfreq('Q', 'S'), pi2) - self.assertEqual(pi7.asfreq('M', 'S'), pi3) - self.assertEqual(pi7.asfreq('D', 'S'), pi4) - self.assertEqual(pi7.asfreq('H', 'S'), pi5) - self.assertEqual(pi7.asfreq('Min', 'S'), pi6) - - self.assertRaises(ValueError, pi7.asfreq, 'T', 'foo') + assert pi1.asfreq('Q', 'S') == pi2 + assert pi1.asfreq('Q', 's') == pi2 + assert pi1.asfreq('M', 'start') == pi3 + assert pi1.asfreq('D', 'StarT') == pi4 + assert pi1.asfreq('H', 'beGIN') == pi5 + assert pi1.asfreq('Min', 'S') == pi6 + assert pi1.asfreq('S', 'S') == pi7 + + assert pi2.asfreq('A', 'S') == pi1 + assert pi2.asfreq('M', 'S') == pi3 + assert pi2.asfreq('D', 'S') == pi4 + assert pi2.asfreq('H', 'S') == pi5 + assert pi2.asfreq('Min', 'S') == pi6 + assert pi2.asfreq('S', 'S') == pi7 + + assert pi3.asfreq('A', 'S') == pi1 + assert pi3.asfreq('Q', 'S') == pi2 + assert pi3.asfreq('D', 'S') == pi4 + assert pi3.asfreq('H', 'S') == pi5 + assert pi3.asfreq('Min', 'S') == pi6 + assert pi3.asfreq('S', 'S') == pi7 + + assert pi4.asfreq('A', 'S') == pi1 + assert pi4.asfreq('Q', 'S') == pi2 + assert pi4.asfreq('M', 'S') == pi3 + assert pi4.asfreq('H', 'S') == pi5 + assert pi4.asfreq('Min', 'S') == pi6 + assert pi4.asfreq('S', 'S') == pi7 + + assert pi5.asfreq('A', 'S') == pi1 + assert pi5.asfreq('Q', 'S') == pi2 + assert pi5.asfreq('M', 'S') == pi3 + assert pi5.asfreq('D', 'S') == pi4 + assert pi5.asfreq('Min', 'S') == pi6 + assert pi5.asfreq('S', 'S') == pi7 + + assert pi6.asfreq('A', 'S') == pi1 + assert pi6.asfreq('Q', 'S') == pi2 + assert pi6.asfreq('M', 'S') == pi3 + assert pi6.asfreq('D', 'S') == pi4 + assert pi6.asfreq('H', 'S') == pi5 + assert pi6.asfreq('S', 'S') == pi7 + + assert pi7.asfreq('A', 'S') == pi1 + assert pi7.asfreq('Q', 'S') == pi2 + assert pi7.asfreq('M', 'S') == pi3 + assert pi7.asfreq('D', 'S') == pi4 + assert pi7.asfreq('H', 'S') == pi5 + assert pi7.asfreq('Min', 'S') == pi6 + + pytest.raises(ValueError, pi7.asfreq, 'T', 'foo') result1 = pi1.asfreq('3M') result2 = pi1.asfreq('M') expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') - self.assert_numpy_array_equal(result1.asi8, expected.asi8) - self.assertEqual(result1.freqstr, '3M') - self.assert_numpy_array_equal(result2.asi8, expected.asi8) - self.assertEqual(result2.freqstr, 'M') + tm.assert_numpy_array_equal(result1.asi8, expected.asi8) + assert result1.freqstr == '3M' + tm.assert_numpy_array_equal(result2.asi8, expected.asi8) + assert result2.freqstr == 'M' def test_asfreq_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M') @@ -84,21 +82,21 @@ def test_asfreq_nat(self): expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q') tm.assert_index_equal(result, expected) - def test_asfreq_mult_pi(self): + @pytest.mark.parametrize('freq', ['D', '3D']) + def test_asfreq_mult_pi(self, freq): pi = PeriodIndex(['2001-01', '2001-02', 'NaT', '2001-03'], freq='2M') - for freq in ['D', '3D']: - result = pi.asfreq(freq) - exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', - '2001-04-30'], freq=freq) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + result = pi.asfreq(freq) + exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', + '2001-04-30'], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq - result = pi.asfreq(freq, how='S') - exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', - '2001-03-01'], freq=freq) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + result = pi.asfreq(freq, how='S') + exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', + '2001-03-01'], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq def test_asfreq_combined_pi(self): pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], @@ -107,8 +105,8 @@ def test_asfreq_combined_pi(self): freq='25H') for freq, how in zip(['1D1H', '1H1D'], ['S', 'E']): result = pi.asfreq(freq, how=how) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq for freq in ['1D1H', '1H1D']: pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', @@ -116,16 +114,16 @@ def test_asfreq_combined_pi(self): result = pi.asfreq('H') exp = PeriodIndex(['2001-01-02 00:00', '2001-01-03 02:00', 'NaT'], freq='H') - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], freq=freq) result = pi.asfreq('H', how='S') exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], freq='H') - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq def test_asfreq_ts(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') @@ -135,12 +133,12 @@ def test_asfreq_ts(self): result = ts.asfreq('D', how='end') df_result = df.asfreq('D', how='end') exp_index = index.asfreq('D', how='end') - self.assertEqual(len(result), len(ts)) + assert len(result) == len(ts) tm.assert_index_equal(result.index, exp_index) tm.assert_index_equal(df_result.index, exp_index) result = ts.asfreq('D', how='start') - self.assertEqual(len(result), len(ts)) + assert len(result) == len(ts) tm.assert_index_equal(result.index, index.asfreq('D', how='start')) def test_astype_asfreq(self): diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py new file mode 100644 index 0000000000000..f2126487496c4 --- /dev/null +++ b/pandas/tests/indexes/period/test_astype.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas import NaT, Period, PeriodIndex, Int64Index, Index, period_range + + +class TestPeriodIndexAsType(object): + @pytest.mark.parametrize('dtype', [ + float, 'timedelta64', 'timedelta64[ns]']) + def test_astype_raises(self, dtype): + # GH#13149, GH#13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + msg = 'Cannot cast PeriodIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + idx.astype(dtype) + + def test_astype_conversion(self): + # GH#13149, GH#13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + + result = idx.astype(object) + expected = Index([Period('2016-05-16', freq='D')] + + [Period(NaT, freq='D')] * 3, dtype='object') + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([16937] + [-9223372036854775808] * 3, + dtype=np.int64) + tm.assert_index_equal(result, expected) + + result = idx.astype(str) + expected = Index(str(x) for x in idx) + tm.assert_index_equal(result, expected) + + idx = period_range('1990', '2009', freq='A') + result = idx.astype('i8') + tm.assert_index_equal(result, Index(idx.asi8)) + tm.assert_numpy_array_equal(result.values, idx.asi8) + + def test_astype_object(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], + dtype=object) + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + # TODO: de-duplicate this version (from test_ops) with the one above + # (from test_period) + def test_astype_object2(self): + idx = pd.period_range(start='2013-01-01', periods=4, freq='M', + name='idx') + expected_list = [pd.Period('2013-01-31', freq='M'), + pd.Period('2013-02-28', freq='M'), + pd.Period('2013-03-31', freq='M'), + pd.Period('2013-04-30', freq='M')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert idx.tolist() == expected_list + + idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', + '2013-01-04'], freq='D', name='idx') + expected_list = [pd.Period('2013-01-01', freq='D'), + pd.Period('2013-01-02', freq='D'), + pd.Period('NaT', freq='D'), + pd.Period('2013-01-04', freq='D')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + for i in [0, 1, 3]: + assert result[i] == expected[i] + assert result[2] is pd.NaT + assert result.name == expected.name + + result_list = idx.tolist() + for i in [0, 1, 3]: + assert result_list[i] == expected_list[i] + assert result_list[2] is pd.NaT diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 228615829b5b8..be741592ec7a2 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -1,16 +1,17 @@ -import numpy as np +import pytest +import numpy as np import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas.compat import lrange, PY3, text_type, lmap from pandas import (Period, PeriodIndex, period_range, offsets, date_range, Series, Index) -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): - def setUp(self): + def setup_method(self, method): pass def test_construction_base_constructor(self): @@ -58,12 +59,12 @@ def test_constructor_field_arrays(self): years = [2007, 2007, 2007] months = [1, 2] - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='M') - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='2M') - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='M', start=Period('2007-01', freq='M')) + pytest.raises(ValueError, PeriodIndex, year=years, month=months, + freq='M') + pytest.raises(ValueError, PeriodIndex, year=years, month=months, + freq='2M') + pytest.raises(ValueError, PeriodIndex, year=years, month=months, + freq='M', start=Period('2007-01', freq='M')) years = [2007, 2007, 2007] months = [1, 2, 3] @@ -73,8 +74,8 @@ def test_constructor_field_arrays(self): def test_constructor_U(self): # U was used as undefined period - self.assertRaises(ValueError, period_range, '2007-1-1', periods=500, - freq='X') + pytest.raises(ValueError, period_range, '2007-1-1', periods=500, + freq='X') def test_constructor_nano(self): idx = period_range(start=Period(ordinal=1, freq='N'), @@ -91,21 +92,21 @@ def test_constructor_arrays_negative_year(self): pindex = PeriodIndex(year=years, quarter=quarters) - self.assert_numpy_array_equal(pindex.year, years) - self.assert_numpy_array_equal(pindex.quarter, quarters) + tm.assert_index_equal(pindex.year, pd.Index(years)) + tm.assert_index_equal(pindex.quarter, pd.Index(quarters)) def test_constructor_invalid_quarters(self): - self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), - quarter=lrange(4), freq='Q-DEC') + pytest.raises(ValueError, PeriodIndex, year=lrange(2000, 2004), + quarter=lrange(4), freq='Q-DEC') def test_constructor_corner(self): - self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') + pytest.raises(ValueError, PeriodIndex, periods=10, freq='A') start = Period('2007', freq='A-JUN') end = Period('2010', freq='A-DEC') - self.assertRaises(ValueError, PeriodIndex, start=start, end=end) - self.assertRaises(ValueError, PeriodIndex, start=start) - self.assertRaises(ValueError, PeriodIndex, end=end) + pytest.raises(ValueError, PeriodIndex, start=start, end=end) + pytest.raises(ValueError, PeriodIndex, start=start) + pytest.raises(ValueError, PeriodIndex, end=end) result = period_range('2007-01', periods=10.5, freq='M') exp = period_range('2007-01', periods=10, freq='M') @@ -118,10 +119,10 @@ def test_constructor_fromarraylike(self): tm.assert_index_equal(PeriodIndex(idx.values), idx) tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) - self.assertRaises(ValueError, PeriodIndex, idx._values) - self.assertRaises(ValueError, PeriodIndex, list(idx._values)) - self.assertRaises(ValueError, PeriodIndex, - data=Period('2007', freq='A')) + pytest.raises(ValueError, PeriodIndex, idx._ndarray_values) + pytest.raises(ValueError, PeriodIndex, list(idx._ndarray_values)) + pytest.raises(TypeError, PeriodIndex, + data=Period('2007', freq='A')) result = PeriodIndex(iter(idx)) tm.assert_index_equal(result, idx) @@ -134,15 +135,15 @@ def test_constructor_fromarraylike(self): result = PeriodIndex(idx, freq=offsets.MonthEnd()) tm.assert_index_equal(result, idx) - self.assertTrue(result.freq, 'M') + assert result.freq == 'M' result = PeriodIndex(idx, freq='2M') tm.assert_index_equal(result, idx.asfreq('2M')) - self.assertTrue(result.freq, '2M') + assert result.freq == '2M' result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) tm.assert_index_equal(result, idx.asfreq('2M')) - self.assertTrue(result.freq, '2M') + assert result.freq == '2M' result = PeriodIndex(idx, freq='D') exp = idx.asfreq('D', 'e') @@ -152,19 +153,19 @@ def test_constructor_datetime64arr(self): vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) vals = vals.view(np.dtype('M8[us]')) - self.assertRaises(ValueError, PeriodIndex, vals, freq='D') + pytest.raises(ValueError, PeriodIndex, vals, freq='D') def test_constructor_dtype(self): # passing a dtype with a tz should localize idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]') exp = PeriodIndex(['2013-01', '2013-03'], freq='M') tm.assert_index_equal(idx, exp) - self.assertEqual(idx.dtype, 'period[M]') + assert idx.dtype == 'period[M]' idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]') exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D') tm.assert_index_equal(idx, exp) - self.assertEqual(idx.dtype, 'period[3D]') + assert idx.dtype == 'period[3D]' # if we already have a freq and its not the same, then asfreq # (not changed) @@ -173,23 +174,23 @@ def test_constructor_dtype(self): res = PeriodIndex(idx, dtype='period[M]') exp = PeriodIndex(['2013-01', '2013-01'], freq='M') tm.assert_index_equal(res, exp) - self.assertEqual(res.dtype, 'period[M]') + assert res.dtype == 'period[M]' res = PeriodIndex(idx, freq='M') tm.assert_index_equal(res, exp) - self.assertEqual(res.dtype, 'period[M]') + assert res.dtype == 'period[M]' msg = 'specified freq and dtype are different' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex(['2011-01'], freq='M', dtype='period[D]') def test_constructor_empty(self): idx = pd.PeriodIndex([], freq='M') - tm.assertIsInstance(idx, PeriodIndex) - self.assertEqual(len(idx), 0) - self.assertEqual(idx.freq, 'M') + assert isinstance(idx, PeriodIndex) + assert len(idx) == 0 + assert idx.freq == 'M' - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): pd.PeriodIndex([]) def test_constructor_pi_nat(self): @@ -215,35 +216,35 @@ def test_constructor_pi_nat(self): idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex([pd.NaT, pd.NaT]) - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(np.array([pd.NaT, pd.NaT])) - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(['NaT', 'NaT']) - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(np.array(['NaT', 'NaT'])) def test_constructor_incompat_freq(self): msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='D')]) - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='D')])) # first element is pd.NaT - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex([pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='D')]) - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='D')])) @@ -272,12 +273,12 @@ def test_constructor_simple_new(self): result = idx._simple_new([pd.Period('2007-01', freq='M'), pd.Period('2007-02', freq='M')], 'p', freq=idx.freq) - self.assert_index_equal(result, idx) + tm.assert_index_equal(result, idx) result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'), pd.Period('2007-02', freq='M')]), 'p', freq=idx.freq) - self.assert_index_equal(result, idx) + tm.assert_index_equal(result, idx) def test_constructor_simple_new_empty(self): # GH13079 @@ -285,17 +286,20 @@ def test_constructor_simple_new_empty(self): result = idx._simple_new(idx, name='p', freq='M') tm.assert_index_equal(result, idx) - def test_constructor_simple_new_floats(self): - # GH13079 - for floats in [[1.1], np.array([1.1])]: - with self.assertRaises(TypeError): - pd.PeriodIndex._simple_new(floats, freq='M') + @pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])]) + def test_constructor_floats(self, floats): + # GH#13079 + with pytest.raises(TypeError): + pd.PeriodIndex._simple_new(floats, freq='M') + + with pytest.raises(TypeError): + pd.PeriodIndex(floats, freq='M') def test_constructor_nat(self): - self.assertRaises(ValueError, period_range, start='NaT', - end='2011-01-01', freq='M') - self.assertRaises(ValueError, period_range, start='2011-01-01', - end='NaT', freq='M') + pytest.raises(ValueError, period_range, start='NaT', + end='2011-01-01', freq='M') + pytest.raises(ValueError, period_range, start='2011-01-01', + end='NaT', freq='M') def test_constructor_year_and_quarter(self): year = pd.Series([2001, 2002, 2003]) @@ -328,27 +332,25 @@ def test_constructor_freq_mult(self): msg = ('Frequency must be positive, because it' ' represents span: -1M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): PeriodIndex(['2011-01'], freq='-1M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): PeriodIndex(['2011-01'], freq='0M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): period_range('2011-01', periods=3, freq='0M') - def test_constructor_freq_mult_dti_compat(self): - import itertools - mults = [1, 2, 3, 4, 5] - freqs = ['A', 'M', 'D', 'T', 'S'] - for mult, freq in itertools.product(mults, freqs): - freqstr = str(mult) + freq - pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) - expected = date_range(start='2014-04-01', freq=freqstr, - periods=10).to_period(freqstr) - tm.assert_index_equal(pidx, expected) + @pytest.mark.parametrize('freq', ['A', 'M', 'D', 'T', 'S']) + @pytest.mark.parametrize('mult', [1, 2, 3, 4, 5]) + def test_constructor_freq_mult_dti_compat(self, mult, freq): + freqstr = str(mult) + freq + pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) + expected = date_range(start='2014-04-01', freq=freqstr, + periods=10).to_period(freqstr) + tm.assert_index_equal(pidx, expected) def test_constructor_freq_combined(self): for freq in ['1D1H', '1H1D']: @@ -363,88 +365,90 @@ def test_constructor_freq_combined(self): def test_constructor(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 9) + assert len(pi) == 9 pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 4 * 9) + assert len(pi) == 4 * 9 pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 12 * 9) + assert len(pi) == 12 * 9 pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009') - self.assertEqual(len(pi), 365 * 9 + 2) + assert len(pi) == 365 * 9 + 2 pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009') - self.assertEqual(len(pi), 261 * 9) + assert len(pi) == 261 * 9 pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00') - self.assertEqual(len(pi), 365 * 24) + assert len(pi) == 365 * 24 pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59') - self.assertEqual(len(pi), 24 * 60) + assert len(pi) == 24 * 60 pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') - self.assertEqual(len(pi), 24 * 60 * 60) + assert len(pi) == 24 * 60 * 60 start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) - self.assertEqual(len(i1), 20) - self.assertEqual(i1.freq, start.freq) - self.assertEqual(i1[0], start) + assert len(i1) == 20 + assert i1.freq == start.freq + assert i1[0] == start end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), 10) - self.assertEqual(i1.freq, end_intv.freq) - self.assertEqual(i1[-1], end_intv) + assert len(i1) == 10 + assert i1.freq == end_intv.freq + assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) + assert len(i2) == 2 + assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) + assert len(i2) == 2 + assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] - self.assertRaises(ValueError, PeriodIndex, vals) + pytest.raises(ValueError, PeriodIndex, vals) vals = np.array(vals) - self.assertRaises(ValueError, PeriodIndex, vals) + pytest.raises(ValueError, PeriodIndex, vals) def test_constructor_error(self): start = Period('02-Apr-2005', 'B') end_intv = Period('2006-12-31', ('w', 1)) - msg = 'Start and end must have same freq' - with tm.assertRaisesRegexp(ValueError, msg): + msg = 'start and end must have same freq' + with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start, end=end_intv) - msg = 'Must specify 2 of start, end, periods' - with tm.assertRaisesRegexp(ValueError, msg): + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start) - def test_recreate_from_data(self): - for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: - org = PeriodIndex(start='2001/04/01', freq=o, periods=1) - idx = PeriodIndex(org.values, freq=o) - tm.assert_index_equal(idx, org) + @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', + 'T', 'S', 'L', 'U', 'N', 'H']) + def test_recreate_from_data(self, freq): + org = PeriodIndex(start='2001/04/01', freq=freq, periods=1) + idx = PeriodIndex(org.values, freq=freq) + tm.assert_index_equal(idx, org) def test_map_with_string_constructor(self): raw = [2005, 2007, 2009] @@ -460,22 +464,22 @@ def test_map_with_string_constructor(self): res = index.map(t) # should return an Index - tm.assertIsInstance(res, Index) + assert isinstance(res, Index) # preserve element types - self.assertTrue(all(isinstance(resi, t) for resi in res)) + assert all(isinstance(resi, t) for resi in res) # lastly, values should compare equal tm.assert_index_equal(res, expected) -class TestSeriesPeriod(tm.TestCase): +class TestSeriesPeriod(object): - def setUp(self): + def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) def test_constructor_cant_cast_period(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): Series(period_range('2000-01-01', periods=10, freq='D'), dtype=float) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py new file mode 100644 index 0000000000000..b1a1060bf86c4 --- /dev/null +++ b/pandas/tests/indexes/period/test_formats.py @@ -0,0 +1,209 @@ +from pandas import PeriodIndex + +import numpy as np +import pytest + +import pandas.util.testing as tm +import pandas as pd + + +def test_to_native_types(): + index = PeriodIndex(['2017-01-01', '2017-01-02', + '2017-01-03'], freq='D') + + # First, with no arguments. + expected = np.array(['2017-01-01', '2017-01-02', + '2017-01-03'], dtype='= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) + + +class TestIndexing(object): + + def test_get_loc_msg(self): + idx = period_range('2000-1-1', freq='A', periods=10) + bad_period = Period('2012', 'A') + pytest.raises(KeyError, idx.get_loc, bad_period) + + try: + idx.get_loc(bad_period) + except KeyError as inst: + assert inst.args[0] == bad_period + + def test_get_loc_nat(self): + didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) + pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + + # check DatetimeIndex compat + for idx in [didx, pidx]: + assert idx.get_loc(pd.NaT) == 1 + assert idx.get_loc(None) == 1 + assert idx.get_loc(float('nan')) == 1 + assert idx.get_loc(np.nan) == 1 + + def test_get_loc(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + # get the location of p1/p2 from + # monotonic increasing PeriodIndex with non-duplicate + idx0 = pd.PeriodIndex([p0, p1, p2]) + expected_idx1_p1 = 1 + expected_idx1_p2 = 2 + + assert idx0.get_loc(p1) == expected_idx1_p1 + assert idx0.get_loc(str(p1)) == expected_idx1_p1 + assert idx0.get_loc(p2) == expected_idx1_p2 + assert idx0.get_loc(str(p2)) == expected_idx1_p2 + + pytest.raises(tslibs.parsing.DateParseError, idx0.get_loc, 'foo') + pytest.raises(KeyError, idx0.get_loc, 1.1) + pytest.raises(TypeError, idx0.get_loc, idx0) + + # get the location of p1/p2 from + # monotonic increasing PeriodIndex with duplicate + idx1 = pd.PeriodIndex([p1, p1, p2]) + expected_idx1_p1 = slice(0, 2) + expected_idx1_p2 = 2 + + assert idx1.get_loc(p1) == expected_idx1_p1 + assert idx1.get_loc(str(p1)) == expected_idx1_p1 + assert idx1.get_loc(p2) == expected_idx1_p2 + assert idx1.get_loc(str(p2)) == expected_idx1_p2 + + pytest.raises(tslibs.parsing.DateParseError, idx1.get_loc, 'foo') + pytest.raises(KeyError, idx1.get_loc, 1.1) + pytest.raises(TypeError, idx1.get_loc, idx1) + + # get the location of p1/p2 from + # non-monotonic increasing/decreasing PeriodIndex with duplicate + idx2 = pd.PeriodIndex([p2, p1, p2]) + expected_idx2_p1 = 1 + expected_idx2_p2 = np.array([True, False, True]) + + assert idx2.get_loc(p1) == expected_idx2_p1 + assert idx2.get_loc(str(p1)) == expected_idx2_p1 + tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2) + tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2) + + def test_is_monotonic_increasing(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx_inc0 = pd.PeriodIndex([p0, p1, p2]) + idx_inc1 = pd.PeriodIndex([p0, p1, p1]) + idx_dec0 = pd.PeriodIndex([p2, p1, p0]) + idx_dec1 = pd.PeriodIndex([p2, p1, p1]) + idx = pd.PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_increasing + assert idx_inc1.is_monotonic_increasing + assert not idx_dec0.is_monotonic_increasing + assert not idx_dec1.is_monotonic_increasing + assert not idx.is_monotonic_increasing + + def test_is_monotonic_decreasing(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx_inc0 = pd.PeriodIndex([p0, p1, p2]) + idx_inc1 = pd.PeriodIndex([p0, p1, p1]) + idx_dec0 = pd.PeriodIndex([p2, p1, p0]) + idx_dec1 = pd.PeriodIndex([p2, p1, p1]) + idx = pd.PeriodIndex([p1, p2, p0]) + + assert not idx_inc0.is_monotonic_decreasing + assert not idx_inc1.is_monotonic_decreasing + assert idx_dec0.is_monotonic_decreasing + assert idx_dec1.is_monotonic_decreasing + assert not idx.is_monotonic_decreasing + + def test_is_unique(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx0 = pd.PeriodIndex([p0, p1, p2]) + assert idx0.is_unique + + idx1 = pd.PeriodIndex([p1, p1, p2]) + assert not idx1.is_unique + + def test_contains(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + p3 = pd.Period('2017-09-04') + + ps0 = [p0, p1, p2] + idx0 = pd.PeriodIndex(ps0) + + for p in ps0: + assert idx0.contains(p) + assert p in idx0 + + assert idx0.contains(str(p)) + assert str(p) in idx0 + + assert idx0.contains('2017-09-01 00:00:01') + assert '2017-09-01 00:00:01' in idx0 + + assert idx0.contains('2017-09') + assert '2017-09' in idx0 + + assert not idx0.contains(p3) + assert p3 not in idx0 + + def test_get_value(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx0 = pd.PeriodIndex([p0, p1, p2]) + input0 = np.array([1, 2, 3]) + expected0 = 2 + + result0 = idx0.get_value(input0, p1) + assert result0 == expected0 + + idx1 = pd.PeriodIndex([p1, p1, p2]) + input1 = np.array([1, 2, 3]) + expected1 = np.array([1, 2]) + + result1 = idx1.get_value(input1, p1) + tm.assert_numpy_array_equal(result1, expected1) + + idx2 = pd.PeriodIndex([p1, p2, p1]) + input2 = np.array([1, 2, 3]) + expected2 = np.array([1, 3]) + + result2 = idx2.get_value(input2, p1) + tm.assert_numpy_array_equal(result2, expected2) + + def test_get_indexer(self): + # GH 17717 + p1 = pd.Period('2017-09-01') + p2 = pd.Period('2017-09-04') + p3 = pd.Period('2017-09-07') + + tp0 = pd.Period('2017-08-31') + tp1 = pd.Period('2017-09-02') + tp2 = pd.Period('2017-09-05') + tp3 = pd.Period('2017-09-09') + + idx = pd.PeriodIndex([p1, p2, p3]) + + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.PeriodIndex([tp0, tp1, tp2, tp3]) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2, -1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 0, 1, 2], dtype=np.intp)) + + res = idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 day')) + tm.assert_numpy_array_equal(res, + np.array([0, 0, 1, -1], dtype=np.intp)) + + def test_get_indexer_non_unique(self): + # GH 17717 + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + p3 = pd.Period('2017-09-04') + p4 = pd.Period('2017-09-05') + + idx1 = pd.PeriodIndex([p1, p2, p1]) + idx2 = pd.PeriodIndex([p2, p1, p3, p4]) + + result = idx1.get_indexer_non_unique(idx2) + expected_indexer = np.array([1, 0, 2, -1, -1], dtype=np.intp) + expected_missing = np.array([2, 3], dtype=np.int64) + + tm.assert_numpy_array_equal(result[0], expected_indexer) + tm.assert_numpy_array_equal(result[1], expected_missing) + + # TODO: This method came from test_period; de-dup with version above + def test_get_loc2(self): + idx = pd.period_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].asfreq('H', how='start'), method) == 1 + assert idx.get_loc(idx[1].to_timestamp(), method) == 1 + assert idx.get_loc(idx[1].to_timestamp() + .to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + idx = pd.period_range('2000-01-01', periods=5)[::2] + assert idx.get_loc('2000-01-02T12', method='nearest', + tolerance='1 day') == 1 + assert idx.get_loc('2000-01-02T12', method='nearest', + tolerance=pd.Timedelta('1D')) == 1 + assert idx.get_loc('2000-01-02T12', method='nearest', + tolerance=np.timedelta64(1, 'D')) == 1 + assert idx.get_loc('2000-01-02T12', method='nearest', + tolerance=timedelta(1)) == 1 + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): + idx.get_loc('2000-01-10', method='nearest', tolerance='foo') + + msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' + with tm.assert_raises_regex(ValueError, msg): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') + with pytest.raises(KeyError): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') + with pytest.raises( + ValueError, + match='list-like tolerance size must match target index size'): + idx.get_loc('2000-01-10', method='nearest', + tolerance=[pd.Timedelta('1 day').to_timedelta64(), + pd.Timedelta('1 day').to_timedelta64()]) + + # TODO: This method came from test_period; de-dup with version above + def test_get_indexer2(self): + idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', + '2000-01-02T01'], freq='H') + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', + tolerance='1 hour'), + np.array([0, -1, 1], dtype=np.intp)) + + msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' + with tm.assert_raises_regex(ValueError, msg): + idx.get_indexer(target, 'nearest', tolerance='1 minute') + + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', + tolerance='1 day'), + np.array([0, 1, 1], dtype=np.intp)) + tol_raw = [pd.Timedelta('1 hour'), + pd.Timedelta('1 hour'), + np.timedelta64(1, 'D'), ] + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=[np.timedelta64(x) for x in tol_raw]), + np.array([0, -1, 1], dtype=np.intp)) + tol_bad = [pd.Timedelta('2 hour').to_timedelta64(), + pd.Timedelta('1 hour').to_timedelta64(), + np.timedelta64(1, 'M'), ] + with pytest.raises( + libperiod.IncompatibleFrequency, + match='Input has different freq from'): + idx.get_indexer(target, 'nearest', tolerance=tol_bad) + + def test_indexing(self): + # GH 4390, iat incorrectly indexing + index = period_range('1/1/2001', periods=10) + s = Series(np.random.randn(10), index=index) + expected = s[index[0]] + result = s.iat[0] + assert expected == result + + def test_period_index_indexer(self): + # GH4125 + idx = pd.period_range('2002-01', '2003-12', freq='M') + df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) + tm.assert_frame_equal(df, df.loc[idx]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) + tm.assert_frame_equal(df, df.loc[list(idx)]) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 82a881d7c65bc..7d117b0b626cf 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,277 +1,84 @@ + import numpy as np -from datetime import timedelta +import pytest import pandas as pd -import pandas.tslib as tslib +import pandas._libs.tslib as tslib import pandas.util.testing as tm -import pandas.tseries.period as period -from pandas import (DatetimeIndex, PeriodIndex, period_range, Series, Period, - _np_version_under1p10, Index, Timedelta, offsets) +from pandas import (DatetimeIndex, PeriodIndex, Series, Period, + _np_version_under1p10, Index) from pandas.tests.test_base import Ops class TestPeriodIndexOps(Ops): - def setUp(self): - super(TestPeriodIndexOps, self).setUp() + def setup_method(self, method): + super(TestPeriodIndexOps, self).setup_method(method) mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex)) self.is_valid_objs = [o for o in self.objs if mask(o)] self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): - self.check_ops_properties( - ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', - 'week', 'dayofweek', 'dayofyear', 'quarter']) - self.check_ops_properties(['qyear'], - lambda x: isinstance(x, PeriodIndex)) - - def test_asobject_tolist(self): - idx = pd.period_range(start='2013-01-01', periods=4, freq='M', - name='idx') - expected_list = [pd.Period('2013-01-31', freq='M'), - pd.Period('2013-02-28', freq='M'), - pd.Period('2013-03-31', freq='M'), - pd.Period('2013-04-30', freq='M')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = PeriodIndex(['2013-01-01', '2013-01-02', 'NaT', - '2013-01-04'], freq='D', name='idx') - expected_list = [pd.Period('2013-01-01', freq='D'), - pd.Period('2013-01-02', freq='D'), - pd.Period('NaT', freq='D'), - pd.Period('2013-01-04', freq='D')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - tm.assert_index_equal(result, expected) - for i in [0, 1, 3]: - self.assertEqual(result[i], expected[i]) - self.assertIs(result[2], pd.NaT) - self.assertEqual(result.name, expected.name) - - result_list = idx.tolist() - for i in [0, 1, 3]: - self.assertEqual(result_list[i], expected_list[i]) - self.assertIs(result_list[2], pd.NaT) + f = lambda x: isinstance(x, PeriodIndex) + self.check_ops_properties(PeriodIndex._field_ops, f) + self.check_ops_properties(PeriodIndex._object_ops, f) + self.check_ops_properties(PeriodIndex._bool_ops, f) def test_minmax(self): # monotonic idx1 = pd.PeriodIndex([pd.NaT, '2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - self.assertTrue(idx1.is_monotonic) + assert idx1.is_monotonic # non-monotonic idx2 = pd.PeriodIndex(['2011-01-01', pd.NaT, '2011-01-03', '2011-01-02', pd.NaT], freq='D') - self.assertFalse(idx2.is_monotonic) + assert not idx2.is_monotonic for idx in [idx1, idx2]: - self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D')) - self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D')) - self.assertEqual(idx1.argmin(), 1) - self.assertEqual(idx2.argmin(), 0) - self.assertEqual(idx1.argmax(), 3) - self.assertEqual(idx2.argmax(), 2) + assert idx.min() == pd.Period('2011-01-01', freq='D') + assert idx.max() == pd.Period('2011-01-03', freq='D') + assert idx1.argmin() == 1 + assert idx2.argmin() == 0 + assert idx1.argmax() == 3 + assert idx2.argmax() == 2 for op in ['min', 'max']: # Return NaT obj = PeriodIndex([], freq='M') result = getattr(obj, op)() - self.assertIs(result, tslib.NaT) + assert result is tslib.NaT obj = PeriodIndex([pd.NaT], freq='M') result = getattr(obj, op)() - self.assertIs(result, tslib.NaT) + assert result is tslib.NaT obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M') result = getattr(obj, op)() - self.assertIs(result, tslib.NaT) + assert result is tslib.NaT def test_numpy_minmax(self): pr = pd.period_range(start='2016-01-15', end='2016-01-20') - self.assertEqual(np.min(pr), Period('2016-01-15', freq='D')) - self.assertEqual(np.max(pr), Period('2016-01-20', freq='D')) + assert np.min(pr) == Period('2016-01-15', freq='D') + assert np.max(pr) == Period('2016-01-20', freq='D') errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, pr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, pr, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.min, pr, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.max, pr, out=0) - self.assertEqual(np.argmin(pr), 0) - self.assertEqual(np.argmax(pr), 5) + assert np.argmin(pr) == 0 + assert np.argmax(pr) == 5 if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, pr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, pr, out=0) - - def test_representation(self): - # GH 7601 - idx1 = PeriodIndex([], freq='D') - idx2 = PeriodIndex(['2011-01-01'], freq='D') - idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], - freq='D') - idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') - idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', - 'NaT'], freq='H') - idx7 = pd.period_range('2013Q1', periods=1, freq="Q") - idx8 = pd.period_range('2013Q1', periods=2, freq="Q") - idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - idx10 = PeriodIndex(['2011-01-01', '2011-02-01'], freq='3D') - - exp1 = """PeriodIndex([], dtype='period[D]', freq='D')""" - - exp2 = """PeriodIndex(['2011-01-01'], dtype='period[D]', freq='D')""" - - exp3 = ("PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]', " - "freq='D')") - - exp4 = ("PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " - "dtype='period[D]', freq='D')") - - exp5 = ("PeriodIndex(['2011', '2012', '2013'], dtype='period[A-DEC]', " - "freq='A-DEC')") - - exp6 = ("PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], " - "dtype='period[H]', freq='H')") - - exp7 = ("PeriodIndex(['2013Q1'], dtype='period[Q-DEC]', " - "freq='Q-DEC')") - - exp8 = ("PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]', " - "freq='Q-DEC')") - - exp9 = ("PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], " - "dtype='period[Q-DEC]', freq='Q-DEC')") - - exp10 = ("PeriodIndex(['2011-01-01', '2011-02-01'], " - "dtype='period[3D]', freq='3D')") - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, - idx6, idx7, idx8, idx9, idx10], - [exp1, exp2, exp3, exp4, exp5, - exp6, exp7, exp8, exp9, exp10]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) - - def test_representation_to_series(self): - # GH 10971 - idx1 = PeriodIndex([], freq='D') - idx2 = PeriodIndex(['2011-01-01'], freq='D') - idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = PeriodIndex(['2011-01-01', '2011-01-02', - '2011-01-03'], freq='D') - idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') - idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', - 'NaT'], freq='H') - - idx7 = pd.period_range('2013Q1', periods=1, freq="Q") - idx8 = pd.period_range('2013Q1', periods=2, freq="Q") - idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - - exp1 = """Series([], dtype: object)""" - - exp2 = """0 2011-01-01 -dtype: object""" - - exp3 = """0 2011-01-01 -1 2011-01-02 -dtype: object""" - - exp4 = """0 2011-01-01 -1 2011-01-02 -2 2011-01-03 -dtype: object""" - - exp5 = """0 2011 -1 2012 -2 2013 -dtype: object""" - - exp6 = """0 2011-01-01 09:00 -1 2012-02-01 10:00 -2 NaT -dtype: object""" - - exp7 = """0 2013Q1 -dtype: object""" - - exp8 = """0 2013Q1 -1 2013Q2 -dtype: object""" - - exp9 = """0 2013Q1 -1 2013Q2 -2 2013Q3 -dtype: object""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, - idx6, idx7, idx8, idx9], - [exp1, exp2, exp3, exp4, exp5, - exp6, exp7, exp8, exp9]): - result = repr(pd.Series(idx)) - self.assertEqual(result, expected) - - def test_summary(self): - # GH9116 - idx1 = PeriodIndex([], freq='D') - idx2 = PeriodIndex(['2011-01-01'], freq='D') - idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = PeriodIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') - idx6 = PeriodIndex( - ['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], freq='H') - - idx7 = pd.period_range('2013Q1', periods=1, freq="Q") - idx8 = pd.period_range('2013Q1', periods=2, freq="Q") - idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - - exp1 = """PeriodIndex: 0 entries -Freq: D""" - - exp2 = """PeriodIndex: 1 entries, 2011-01-01 to 2011-01-01 -Freq: D""" - - exp3 = """PeriodIndex: 2 entries, 2011-01-01 to 2011-01-02 -Freq: D""" - - exp4 = """PeriodIndex: 3 entries, 2011-01-01 to 2011-01-03 -Freq: D""" - - exp5 = """PeriodIndex: 3 entries, 2011 to 2013 -Freq: A-DEC""" - - exp6 = """PeriodIndex: 3 entries, 2011-01-01 09:00 to NaT -Freq: H""" - - exp7 = """PeriodIndex: 1 entries, 2013Q1 to 2013Q1 -Freq: Q-DEC""" - - exp8 = """PeriodIndex: 2 entries, 2013Q1 to 2013Q2 -Freq: Q-DEC""" - - exp9 = """PeriodIndex: 3 entries, 2013Q1 to 2013Q3 -Freq: Q-DEC""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, - idx6, idx7, idx8, idx9], - [exp1, exp2, exp3, exp4, exp5, - exp6, exp7, exp8, exp9]): - result = idx.summary() - self.assertEqual(result, expected) + tm.assert_raises_regex( + ValueError, errmsg, np.argmin, pr, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmax, pr, out=0) def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', @@ -281,233 +88,7 @@ def test_resolution(self): 'millisecond', 'microsecond']): idx = pd.period_range(start='2013-04-01', periods=30, freq=freq) - self.assertEqual(idx.resolution, expected) - - def test_add_iadd(self): - rng = pd.period_range('1/1/2000', freq='D', periods=5) - other = pd.period_range('1/6/2000', freq='D', periods=5) - - # previously performed setop union, now raises TypeError (GH14164) - with tm.assertRaises(TypeError): - rng + other - - with tm.assertRaises(TypeError): - rng += other - - # offset - # DateOffset - rng = pd.period_range('2014', '2024', freq='A') - result = rng + pd.offsets.YearEnd(5) - expected = pd.period_range('2019', '2029', freq='A') - tm.assert_index_equal(result, expected) - rng += pd.offsets.YearEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365), Timedelta(days=365)]: - msg = ('Input has different freq(=.+)? ' - 'from PeriodIndex\\(freq=A-DEC\\)') - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - rng + o - - rng = pd.period_range('2014-01', '2016-12', freq='M') - result = rng + pd.offsets.MonthEnd(5) - expected = pd.period_range('2014-06', '2017-05', freq='M') - tm.assert_index_equal(result, expected) - rng += pd.offsets.MonthEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365), Timedelta(days=365)]: - rng = pd.period_range('2014-01', '2016-12', freq='M') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - rng + o - - # Tick - offsets = [pd.offsets.Day(3), timedelta(days=3), - np.timedelta64(3, 'D'), pd.offsets.Hour(72), - timedelta(minutes=60 * 24 * 3), np.timedelta64(72, 'h'), - Timedelta('72:00:00')] - for delta in offsets: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - result = rng + delta - expected = pd.period_range('2014-05-04', '2014-05-18', freq='D') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23), Timedelta('23:00:00')]: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - rng + o - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), pd.offsets.Minute(120), - timedelta(minutes=120), np.timedelta64(120, 'm'), - Timedelta(minutes=120)] - for delta in offsets: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', - freq='H') - result = rng + delta - expected = pd.period_range('2014-01-01 12:00', '2014-01-05 12:00', - freq='H') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - for delta in [pd.offsets.YearBegin(2), timedelta(minutes=30), - np.timedelta64(30, 's'), Timedelta(seconds=30)]: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', - freq='H') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - result = rng + delta - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - rng += delta - - # int - rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) - result = rng + 1 - expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - def test_sub(self): - rng = period_range('2007-01', periods=50) - - result = rng - 5 - exp = rng + (-5) - tm.assert_index_equal(result, exp) - - def test_sub_isub(self): - - # previously performed setop, now raises TypeError (GH14164) - # TODO needs to wait on #13077 for decision on result type - rng = pd.period_range('1/1/2000', freq='D', periods=5) - other = pd.period_range('1/6/2000', freq='D', periods=5) - - with tm.assertRaises(TypeError): - rng - other - - with tm.assertRaises(TypeError): - rng -= other - - # offset - # DateOffset - rng = pd.period_range('2014', '2024', freq='A') - result = rng - pd.offsets.YearEnd(5) - expected = pd.period_range('2009', '2019', freq='A') - tm.assert_index_equal(result, expected) - rng -= pd.offsets.YearEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - rng = pd.period_range('2014', '2024', freq='A') - msg = ('Input has different freq(=.+)? ' - 'from PeriodIndex\\(freq=A-DEC\\)') - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - rng - o - - rng = pd.period_range('2014-01', '2016-12', freq='M') - result = rng - pd.offsets.MonthEnd(5) - expected = pd.period_range('2013-08', '2016-07', freq='M') - tm.assert_index_equal(result, expected) - rng -= pd.offsets.MonthEnd(5) - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - rng = pd.period_range('2014-01', '2016-12', freq='M') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - rng - o - - # Tick - offsets = [pd.offsets.Day(3), timedelta(days=3), - np.timedelta64(3, 'D'), pd.offsets.Hour(72), - timedelta(minutes=60 * 24 * 3), np.timedelta64(72, 'h')] - for delta in offsets: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - result = rng - delta - expected = pd.period_range('2014-04-28', '2014-05-12', freq='D') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), - pd.offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - rng - o - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), pd.offsets.Minute(120), - timedelta(minutes=120), np.timedelta64(120, 'm')] - for delta in offsets: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', - freq='H') - result = rng - delta - expected = pd.period_range('2014-01-01 08:00', '2014-01-05 08:00', - freq='H') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - for delta in [pd.offsets.YearBegin(2), timedelta(minutes=30), - np.timedelta64(30, 's')]: - rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', - freq='H') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - result = rng + delta - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - rng += delta - - # int - rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10) - result = rng - 1 - expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - def test_comp_nat(self): - left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT, - pd.Period('2011-01-03')]) - right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')]) - - for l, r in [(left, right), (left.asobject, right.asobject)]: - result = l == r - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = l != r - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == r, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(l != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != l, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > l, expected) + assert idx.resolution == expected def test_value_counts_unique(self): # GH 7735 @@ -555,13 +136,13 @@ def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx') result = idx.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq idx_dup = idx.append(idx) # freq will not be reset result = idx_dup.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq def test_drop_duplicates(self): # to check Index/Series compat @@ -588,33 +169,31 @@ def test_drop_duplicates(self): def test_order_compat(self): def _check_freq(index, expected_index): if isinstance(index, PeriodIndex): - self.assertEqual(index.freq, expected_index.freq) + assert index.freq == expected_index.freq pidx = PeriodIndex(['2011', '2012', '2013'], name='pidx', freq='A') # for compatibility check iidx = Index([2011, 2012, 2013], name='idx') for idx in [pidx, iidx]: ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) + tm.assert_index_equal(ordered, idx) _check_freq(ordered, idx) ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, idx[::-1]) + tm.assert_index_equal(ordered, idx[::-1]) _check_freq(ordered, idx[::-1]) ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), + check_dtype=False) _check_freq(ordered, idx) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - self.assert_index_equal(ordered, idx[::-1]) - self.assert_numpy_array_equal(indexer, - np.array([2, 1, 0]), - check_dtype=False) + tm.assert_index_equal(ordered, idx[::-1]) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), + check_dtype=False) _check_freq(ordered, idx[::-1]) pidx = PeriodIndex(['2011', '2013', '2015', '2012', @@ -626,27 +205,26 @@ def _check_freq(index, expected_index): iexpected = Index([2011, 2011, 2012, 2013, 2015], name='idx') for idx, expected in [(pidx, pexpected), (iidx, iexpected)]: ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) + tm.assert_index_equal(ordered, expected) _check_freq(ordered, idx) ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) + tm.assert_index_equal(ordered, expected[::-1]) _check_freq(ordered, idx) ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) + tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) _check_freq(ordered, idx) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - self.assert_index_equal(ordered, expected[::-1]) + tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, - check_dtype=False) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) _check_freq(ordered, idx) pidx = PeriodIndex(['2011', '2013', 'NaT', '2011'], name='pidx', @@ -655,14 +233,14 @@ def _check_freq(index, expected_index): result = pidx.sort_values() expected = PeriodIndex(['NaT', '2011', '2011', '2013'], name='pidx', freq='D') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, 'D') + tm.assert_index_equal(result, expected) + assert result.freq == 'D' result = pidx.sort_values(ascending=False) expected = PeriodIndex( ['2013', '2011', '2011', 'NaT'], name='pidx', freq='D') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, 'D') + tm.assert_index_equal(result, expected) + assert result.freq == 'D' def test_order(self): for freq in ['D', '2D', '4D']: @@ -670,32 +248,30 @@ def test_order(self): freq=freq, name='idx') ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) + tm.assert_index_equal(ordered, idx) + assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq, freq) + tm.assert_index_equal(ordered, expected) + assert ordered.freq == expected.freq + assert ordered.freq == freq ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) - self.assertEqual(ordered.freq, freq) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), + check_dtype=False) + assert ordered.freq == idx.freq + assert ordered.freq == freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assert_numpy_array_equal(indexer, - np.array([2, 1, 0]), - check_dtype=False) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq, freq) + tm.assert_index_equal(ordered, expected) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), + check_dtype=False) + assert ordered.freq == expected.freq + assert ordered.freq == freq idx1 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02', '2011-01-01'], freq='D', name='idx1') @@ -716,27 +292,27 @@ def test_order(self): for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, 'D') + tm.assert_index_equal(ordered, expected) + assert ordered.freq == 'D' ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - self.assertEqual(ordered.freq, 'D') + tm.assert_index_equal(ordered, expected[::-1]) + assert ordered.freq == 'D' ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) + tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertEqual(ordered.freq, 'D') + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq == 'D' ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - self.assert_index_equal(ordered, expected[::-1]) + tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertEqual(ordered.freq, 'D') + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq == 'D' def test_nat_new(self): @@ -750,25 +326,8 @@ def test_nat_new(self): tm.assert_numpy_array_equal(result, exp) def test_shift(self): - # GH 9903 - idx = pd.PeriodIndex([], name='xxx', freq='H') - - with tm.assertRaises(TypeError): - # period shift doesn't accept freq - idx.shift(1, freq='H') - - tm.assert_index_equal(idx.shift(0), idx) - tm.assert_index_equal(idx.shift(3), idx) - - idx = pd.PeriodIndex(['2011-01-01 10:00', '2011-01-01 11:00' - '2011-01-01 12:00'], name='xxx', freq='H') - tm.assert_index_equal(idx.shift(0), idx) - exp = pd.PeriodIndex(['2011-01-01 13:00', '2011-01-01 14:00' - '2011-01-01 15:00'], name='xxx', freq='H') - tm.assert_index_equal(idx.shift(3), exp) - exp = pd.PeriodIndex(['2011-01-01 07:00', '2011-01-01 08:00' - '2011-01-01 09:00'], name='xxx', freq='H') - tm.assert_index_equal(idx.shift(-3), exp) + # This is tested in test_arithmetic + pass def test_repeat(self): index = pd.period_range('2001-01-01', periods=2, freq='D') @@ -791,59 +350,59 @@ def test_repeat(self): tm.assert_index_equal(res, exp) def test_nat(self): - self.assertIs(pd.PeriodIndex._na_value, pd.NaT) - self.assertIs(pd.PeriodIndex([], freq='M')._na_value, pd.NaT) + assert pd.PeriodIndex._na_value is pd.NaT + assert pd.PeriodIndex([], freq='M')._na_value is pd.NaT idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) + assert not idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D') - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) + assert idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) - def test_equals(self): - # GH 13107 - for freq in ['D', 'M']: - idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], - freq=freq) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) - - idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], - freq='H') - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) - - # same internal, different tz - idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H') - tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) - self.assertFalse(idx.equals(idx3)) - self.assertFalse(idx.equals(idx3.copy())) - self.assertFalse(idx.equals(idx3.asobject)) - self.assertFalse(idx.asobject.equals(idx3)) - self.assertFalse(idx.equals(list(idx3))) - self.assertFalse(idx.equals(pd.Series(idx3))) - - -class TestPeriodIndexSeriesMethods(tm.TestCase): + @pytest.mark.parametrize('freq', ['D', 'M']) + def test_equals(self, freq): + # GH#13107 + idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], + freq=freq) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'], + freq='H') + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H') + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + +class TestPeriodIndexSeriesMethods(object): """ Test PeriodIndex and Period Series Ops consistency """ def _check(self, values, func, expected): @@ -861,196 +420,6 @@ def _check(self, values, func, expected): exp = pd.Series(expected, name=values.name) tm.assert_series_equal(result, exp) - def test_pi_ops(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - - expected = PeriodIndex(['2011-03', '2011-04', - '2011-05', '2011-06'], freq='M', name='idx') - self._check(idx, lambda x: x + 2, expected) - self._check(idx, lambda x: 2 + x, expected) - - self._check(idx + 2, lambda x: x - 2, idx) - result = idx - Period('2011-01', freq='M') - exp = pd.Index([0, 1, 2, 3], name='idx') - tm.assert_index_equal(result, exp) - - result = Period('2011-01', freq='M') - idx - exp = pd.Index([0, -1, -2, -3], name='idx') - tm.assert_index_equal(result, exp) - - def test_pi_ops_errors(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - s = pd.Series(idx) - - msg = r"unsupported operand type\(s\)" - - for obj in [idx, s]: - for ng in ["str", 1.5]: - with tm.assertRaisesRegexp(TypeError, msg): - obj + ng - - with tm.assertRaises(TypeError): - # error message differs between PY2 and 3 - ng + obj - - with tm.assertRaisesRegexp(TypeError, msg): - obj - ng - - with tm.assertRaises(TypeError): - np.add(obj, ng) - - if _np_version_under1p10: - self.assertIs(np.add(ng, obj), NotImplemented) - else: - with tm.assertRaises(TypeError): - np.add(ng, obj) - - with tm.assertRaises(TypeError): - np.subtract(obj, ng) - - if _np_version_under1p10: - self.assertIs(np.subtract(ng, obj), NotImplemented) - else: - with tm.assertRaises(TypeError): - np.subtract(ng, obj) - - def test_pi_ops_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - expected = PeriodIndex(['2011-03', '2011-04', - 'NaT', '2011-06'], freq='M', name='idx') - self._check(idx, lambda x: x + 2, expected) - self._check(idx, lambda x: 2 + x, expected) - self._check(idx, lambda x: np.add(x, 2), expected) - - self._check(idx + 2, lambda x: x - 2, idx) - self._check(idx + 2, lambda x: np.subtract(x, 2), idx) - - # freq with mult - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='2M', name='idx') - expected = PeriodIndex(['2011-07', '2011-08', - 'NaT', '2011-10'], freq='2M', name='idx') - self._check(idx, lambda x: x + 3, expected) - self._check(idx, lambda x: 3 + x, expected) - self._check(idx, lambda x: np.add(x, 3), expected) - - self._check(idx + 3, lambda x: x - 3, idx) - self._check(idx + 3, lambda x: np.subtract(x, 3), idx) - - def test_pi_ops_array_int(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - f = lambda x: x + np.array([1, 2, 3, 4]) - exp = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: np.add(x, np.array([4, -1, 1, 2])) - exp = PeriodIndex(['2011-05', '2011-01', 'NaT', - '2011-06'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: x - np.array([1, 2, 3, 4]) - exp = PeriodIndex(['2010-12', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) - exp = PeriodIndex(['2010-10', '2010-12', 'NaT', - '2011-06'], freq='M', name='idx') - self._check(idx, f, exp) - - def test_pi_ops_offset(self): - idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', - '2011-04-01'], freq='D', name='idx') - f = lambda x: x + offsets.Day() - exp = PeriodIndex(['2011-01-02', '2011-02-02', '2011-03-02', - '2011-04-02'], freq='D', name='idx') - self._check(idx, f, exp) - - f = lambda x: x + offsets.Day(2) - exp = PeriodIndex(['2011-01-03', '2011-02-03', '2011-03-03', - '2011-04-03'], freq='D', name='idx') - self._check(idx, f, exp) - - f = lambda x: x - offsets.Day(2) - exp = PeriodIndex(['2010-12-30', '2011-01-30', '2011-02-27', - '2011-03-30'], freq='D', name='idx') - self._check(idx, f, exp) - - def test_pi_offset_errors(self): - idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', - '2011-04-01'], freq='D', name='idx') - s = pd.Series(idx) - - # Series op is applied per Period instance, thus error is raised - # from Period - msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" - msg_s = r"Input cannot be converted to Period\(freq=D\)" - for obj, msg in [(idx, msg_idx), (s, msg_s)]: - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - obj + offsets.Hour(2) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - offsets.Hour(2) + obj - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - obj - offsets.Hour(2) - - def test_pi_sub_period(self): - # GH 13071 - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - - result = idx - pd.Period('2012-01', freq='M') - exp = pd.Index([-12, -11, -10, -9], name='idx') - tm.assert_index_equal(result, exp) - - result = np.subtract(idx, pd.Period('2012-01', freq='M')) - tm.assert_index_equal(result, exp) - - result = pd.Period('2012-01', freq='M') - idx - exp = pd.Index([12, 11, 10, 9], name='idx') - tm.assert_index_equal(result, exp) - - result = np.subtract(pd.Period('2012-01', freq='M'), idx) - if _np_version_under1p10: - self.assertIs(result, NotImplemented) - else: - tm.assert_index_equal(result, exp) - - exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') - tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) - tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) - - def test_pi_sub_pdnat(self): - # GH 13071 - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx') - tm.assert_index_equal(pd.NaT - idx, exp) - tm.assert_index_equal(idx - pd.NaT, exp) - - def test_pi_sub_period_nat(self): - # GH 13071 - idx = PeriodIndex(['2011-01', 'NaT', '2011-03', - '2011-04'], freq='M', name='idx') - - result = idx - pd.Period('2012-01', freq='M') - exp = pd.Index([-12, np.nan, -10, -9], name='idx') - tm.assert_index_equal(result, exp) - - result = pd.Period('2012-01', freq='M') - idx - exp = pd.Index([12, np.nan, 10, 9], name='idx') - tm.assert_index_equal(result, exp) - - exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') - tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) - tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) - def test_pi_comp_period(self): idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq='M', name='idx') @@ -1122,207 +491,3 @@ def test_pi_comp_period_nat(self): f = lambda x: tslib.NaT >= x exp = np.array([False, False, False, False], dtype=np.bool) self._check(idx, f, exp) - - -class TestSeriesPeriod(tm.TestCase): - - def setUp(self): - self.series = Series(period_range('2000-01-01', periods=10, freq='D')) - - def test_ops_series_timedelta(self): - # GH 13043 - s = pd.Series([pd.Period('2015-01-01', freq='D'), - pd.Period('2015-01-02', freq='D')], name='xxx') - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Period('2015-01-02', freq='D'), - pd.Period('2015-01-03', freq='D')], name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - tm.assert_series_equal(s + pd.tseries.offsets.Day(), exp) - tm.assert_series_equal(pd.tseries.offsets.Day() + s, exp) - - def test_ops_series_period(self): - # GH 13043 - s = pd.Series([pd.Period('2015-01-01', freq='D'), - pd.Period('2015-01-02', freq='D')], name='xxx') - self.assertEqual(s.dtype, object) - - p = pd.Period('2015-01-10', freq='D') - # dtype will be object because of original dtype - exp = pd.Series([9, 8], name='xxx', dtype=object) - tm.assert_series_equal(p - s, exp) - tm.assert_series_equal(s - p, -exp) - - s2 = pd.Series([pd.Period('2015-01-05', freq='D'), - pd.Period('2015-01-04', freq='D')], name='xxx') - self.assertEqual(s2.dtype, object) - - exp = pd.Series([4, 2], name='xxx', dtype=object) - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - -class TestFramePeriod(tm.TestCase): - - def test_ops_frame_period(self): - # GH 13043 - df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), - pd.Period('2015-02', freq='M')], - 'B': [pd.Period('2014-01', freq='M'), - pd.Period('2014-02', freq='M')]}) - self.assertEqual(df['A'].dtype, object) - self.assertEqual(df['B'].dtype, object) - - p = pd.Period('2015-03', freq='M') - # dtype will be object because of original dtype - exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), - 'B': np.array([14, 13], dtype=object)}) - tm.assert_frame_equal(p - df, exp) - tm.assert_frame_equal(df - p, -exp) - - df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), - pd.Period('2015-06', freq='M')], - 'B': [pd.Period('2015-05', freq='M'), - pd.Period('2015-06', freq='M')]}) - self.assertEqual(df2['A'].dtype, object) - self.assertEqual(df2['B'].dtype, object) - - exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), - 'B': np.array([16, 16], dtype=object)}) - tm.assert_frame_equal(df2 - df, exp) - tm.assert_frame_equal(df - df2, -exp) - - -class TestPeriodIndexComparisons(tm.TestCase): - - def test_pi_pi_comp(self): - - for freq in ['M', '2M', '3M']: - base = PeriodIndex(['2011-01', '2011-02', - '2011-03', '2011-04'], freq=freq) - p = Period('2011-02', freq=freq) - - exp = np.array([False, True, False, False]) - self.assert_numpy_array_equal(base == p, exp) - self.assert_numpy_array_equal(p == base, exp) - - exp = np.array([True, False, True, True]) - self.assert_numpy_array_equal(base != p, exp) - self.assert_numpy_array_equal(p != base, exp) - - exp = np.array([False, False, True, True]) - self.assert_numpy_array_equal(base > p, exp) - self.assert_numpy_array_equal(p < base, exp) - - exp = np.array([True, False, False, False]) - self.assert_numpy_array_equal(base < p, exp) - self.assert_numpy_array_equal(p > base, exp) - - exp = np.array([False, True, True, True]) - self.assert_numpy_array_equal(base >= p, exp) - self.assert_numpy_array_equal(p <= base, exp) - - exp = np.array([True, True, False, False]) - self.assert_numpy_array_equal(base <= p, exp) - self.assert_numpy_array_equal(p >= base, exp) - - idx = PeriodIndex(['2011-02', '2011-01', '2011-03', - '2011-05'], freq=freq) - - exp = np.array([False, False, True, False]) - self.assert_numpy_array_equal(base == idx, exp) - - exp = np.array([True, True, False, True]) - self.assert_numpy_array_equal(base != idx, exp) - - exp = np.array([False, True, False, False]) - self.assert_numpy_array_equal(base > idx, exp) - - exp = np.array([True, False, False, True]) - self.assert_numpy_array_equal(base < idx, exp) - - exp = np.array([False, True, True, False]) - self.assert_numpy_array_equal(base >= idx, exp) - - exp = np.array([True, False, True, True]) - self.assert_numpy_array_equal(base <= idx, exp) - - # different base freq - msg = "Input has different freq=A-DEC from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='A') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='A') >= base - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') - base <= idx - - # different mult - msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='4M') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='4M') >= base - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') - base <= idx - - def test_pi_nat_comp(self): - for freq in ['M', '2M', '3M']: - idx1 = PeriodIndex( - ['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) - - result = idx1 > Period('2011-02', freq=freq) - exp = np.array([False, False, False, True]) - self.assert_numpy_array_equal(result, exp) - result = Period('2011-02', freq=freq) < idx1 - self.assert_numpy_array_equal(result, exp) - - result = idx1 == Period('NaT', freq=freq) - exp = np.array([False, False, False, False]) - self.assert_numpy_array_equal(result, exp) - result = Period('NaT', freq=freq) == idx1 - self.assert_numpy_array_equal(result, exp) - - result = idx1 != Period('NaT', freq=freq) - exp = np.array([True, True, True, True]) - self.assert_numpy_array_equal(result, exp) - result = Period('NaT', freq=freq) != idx1 - self.assert_numpy_array_equal(result, exp) - - idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', - 'NaT'], freq=freq) - result = idx1 < idx2 - exp = np.array([True, False, False, False]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 == idx2 - exp = np.array([False, False, False, False]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 != idx2 - exp = np.array([True, True, True, True]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 == idx1 - exp = np.array([True, True, False, True]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 != idx1 - exp = np.array([False, False, True, False]) - self.assert_numpy_array_equal(result, exp) - - diff = PeriodIndex(['2011-02', '2011-01', '2011-04', - 'NaT'], freq='4M') - msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx1 > diff - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx1 == diff diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index b051c4a0dcab1..6d142722c315a 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np import pandas as pd @@ -6,9 +8,9 @@ DataFrame, _np_version_under1p12, Period) -class TestPeriodIndex(tm.TestCase): +class TestPeriodIndex(object): - def setUp(self): + def setup_method(self, method): pass def test_slice_with_negative_step(self): @@ -40,16 +42,16 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), period_range('2014-01', periods=20, freq='M')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) def test_slice_keep_name(self): idx = period_range('20010101', periods=10, freq='D', name='bob') - self.assertEqual(idx.name, idx[1:].name) + assert idx.name == idx[1:].name def test_pindex_slice_index(self): pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='M') @@ -75,7 +77,7 @@ def test_range_slice_day(self): values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', '2013/02/01 09:00'] for v in values: - with tm.assertRaises(exc): + with pytest.raises(exc): idx[v:] s = Series(np.random.rand(len(idx)), index=idx) @@ -87,7 +89,7 @@ def test_range_slice_day(self): invalid = ['2013/02/01 9H', '2013/02/01 09:00'] for v in invalid: - with tm.assertRaises(exc): + with pytest.raises(exc): idx[v:] def test_range_slice_seconds(self): @@ -105,7 +107,7 @@ def test_range_slice_seconds(self): values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', '2013/02/01 09:00'] for v in values: - with tm.assertRaises(exc): + with pytest.raises(exc): idx[v:] s = Series(np.random.rand(len(idx)), index=idx) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6a8128bb8985f..4548d7fa1a468 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -1,163 +1,49 @@ +import pytest + import numpy as np -from numpy.random import randn -from datetime import timedelta import pandas as pd +import pandas.util._test_decorators as td from pandas.util import testing as tm -from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, - Index, Period, Int64Index, Series, DataFrame, date_range, +from pandas import (PeriodIndex, period_range, DatetimeIndex, NaT, + Index, Period, Series, DataFrame, date_range, offsets) from ..datetimelike import DatetimeLike -class TestPeriodIndex(DatetimeLike, tm.TestCase): +class TestPeriodIndex(DatetimeLike): _holder = PeriodIndex - _multiprocess_can_split_ = True - def setUp(self): - self.indices = dict(index=tm.makePeriodIndex(10)) + def setup_method(self, method): + self.indices = dict(index=tm.makePeriodIndex(10), + index_dec=period_range('20130101', periods=10, + freq='D')[::-1]) self.setup_indices() def create_index(self): return period_range('20130101', periods=5, freq='D') - def test_astype(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - result = idx.astype(object) - expected = Index([Period('2016-05-16', freq='D')] + - [Period(NaT, freq='D')] * 3, dtype='object') - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([16937] + [-9223372036854775808] * 3, - dtype=np.int64) - tm.assert_index_equal(result, expected) - - idx = period_range('1990', '2009', freq='A') - result = idx.astype('i8') - self.assert_index_equal(result, Index(idx.asi8)) - self.assert_numpy_array_equal(result.values, idx.asi8) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - def test_pickle_compat_construction(self): pass - def test_get_loc(self): - idx = pd.period_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual( - idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) - self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) - self.assertEqual( - idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - idx = pd.period_range('2000-01-01', periods=5)[::2] - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-10', method='nearest', tolerance='foo') - - msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(ValueError, msg): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') + @pytest.mark.parametrize('freq', ['D', 'M', 'A']) + def test_pickle_round_trip(self, freq): + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq=freq) + result = tm.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) def test_where(self): - i = self.create_index() - result = i.where(notnull(i)) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2)) - expected = i2 - tm.assert_index_equal(result, expected) - - def test_where_other(self): - - i = self.create_index() - for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2), i2) - tm.assert_index_equal(result, i2) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2), i2.values) - tm.assert_index_equal(result, i2) - - def test_get_indexer(self): - idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') - tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.intp)) - - target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', - '2000-01-02T01'], freq='H') - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', - tolerance='1 hour'), - np.array([0, -1, 1], dtype=np.intp)) - - msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with self.assertRaisesRegexp(ValueError, msg): - idx.get_indexer(target, 'nearest', tolerance='1 minute') - - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', - tolerance='1 day'), - np.array([0, 1, 1], dtype=np.intp)) + # This is handled in test_indexing + pass def test_repeat(self): # GH10183 idx = pd.period_range('2000-01-01', periods=3, freq='D') res = idx.repeat(3) exp = PeriodIndex(idx.values.repeat(3), freq='D') - self.assert_index_equal(res, exp) - self.assertEqual(res.freqstr, 'D') - - def test_period_index_indexer(self): - # GH4125 - idx = pd.period_range('2002-01', '2003-12', freq='M') - df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) - self.assert_frame_equal(df, df.loc[idx]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) - self.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_index_equal(res, exp) + assert res.freqstr == 'D' def test_fillna_period(self): # GH 11343 @@ -166,24 +52,24 @@ def test_fillna_period(self): exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], freq='H') - self.assert_index_equal( + tm.assert_index_equal( idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) + tm.assert_index_equal(idx.fillna('x'), exp) exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), pd.Period('2011-01-01', freq='D'), pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')), - exp) + tm.assert_index_equal(idx.fillna( + pd.Period('2011-01-01', freq='D')), exp) def test_no_millisecond_field(self): - with self.assertRaises(AttributeError): + with pytest.raises(AttributeError): DatetimeIndex.millisecond - with self.assertRaises(AttributeError): + with pytest.raises(AttributeError): DatetimeIndex([]).millisecond def test_difference_freq(self): @@ -206,14 +92,14 @@ def test_difference_freq(self): def test_hash_error(self): index = period_range('20010101', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): + with tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__): hash(index) def test_make_time_series(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index) - tm.assertIsInstance(series, Series) + assert isinstance(series, Series) def test_shallow_copy_empty(self): @@ -226,12 +112,12 @@ def test_shallow_copy_empty(self): def test_dtype_str(self): pi = pd.PeriodIndex([], freq='M') - self.assertEqual(pi.dtype_str, 'period[M]') - self.assertEqual(pi.dtype_str, str(pi.dtype)) + assert pi.dtype_str == 'period[M]' + assert pi.dtype_str == str(pi.dtype) pi = pd.PeriodIndex([], freq='3M') - self.assertEqual(pi.dtype_str, 'period[3M]') - self.assertEqual(pi.dtype_str, str(pi.dtype)) + assert pi.dtype_str == 'period[3M]' + assert pi.dtype_str == str(pi.dtype) def test_view_asi8(self): idx = pd.PeriodIndex([], freq='M') @@ -258,7 +144,7 @@ def test_values(self): tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') @@ -266,7 +152,7 @@ def test_values(self): tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([492, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') @@ -275,41 +161,41 @@ def test_values(self): tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) def test_period_index_length(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 9) + assert len(pi) == 9 pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 4 * 9) + assert len(pi) == 4 * 9 pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 12 * 9) + assert len(pi) == 12 * 9 start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) - self.assertEqual(len(i1), 20) - self.assertEqual(i1.freq, start.freq) - self.assertEqual(i1[0], start) + assert len(i1) == 20 + assert i1.freq == start.freq + assert i1[0] == start end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), 10) - self.assertEqual(i1.freq, end_intv.freq) - self.assertEqual(i1[-1], end_intv) + assert len(i1) == 10 + assert i1.freq == end_intv.freq + assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq try: PeriodIndex(start=start, end=end_intv) @@ -329,18 +215,18 @@ def test_period_index_length(self): # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) + assert len(i2) == 2 + assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) + assert len(i2) == 2 + assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] - self.assertRaises(ValueError, PeriodIndex, vals) + pytest.raises(ValueError, PeriodIndex, vals) vals = np.array(vals) - self.assertRaises(ValueError, PeriodIndex, vals) + pytest.raises(ValueError, PeriodIndex, vals) def test_fields(self): # year, month, day, hour, minute @@ -377,34 +263,25 @@ def test_fields(self): def _check_all_fields(self, periodindex): fields = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', - 'quarter', 'qyear', 'days_in_month', 'is_leap_year'] + 'weekofyear', 'week', 'dayofweek', 'dayofyear', + 'quarter', 'qyear', 'days_in_month'] periods = list(periodindex) s = pd.Series(periodindex) for field in fields: field_idx = getattr(periodindex, field) - self.assertEqual(len(periodindex), len(field_idx)) + assert len(periodindex) == len(field_idx) for x, val in zip(periods, field_idx): - self.assertEqual(getattr(x, field), val) + assert getattr(x, field) == val if len(s) == 0: continue field_s = getattr(s.dt, field) - self.assertEqual(len(periodindex), len(field_s)) + assert len(periodindex) == len(field_s) for x, val in zip(periods, field_s): - self.assertEqual(getattr(x, field), val) - - def test_indexing(self): - - # GH 4390, iat incorrectly indexing - index = period_range('1/1/2001', periods=10) - s = Series(randn(10), index=index) - expected = s[index[0]] - result = s.iat[0] - self.assertEqual(expected, result) + assert getattr(x, field) == val def test_period_set_index_reindex(self): # GH 6631 @@ -425,11 +302,11 @@ def test_factorize(self): exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) idx2 = pd.PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', @@ -437,95 +314,58 @@ def test_factorize(self): exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) arr, idx = idx2.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) exp_idx = PeriodIndex(['2014-03', '2014-02', '2014-01'], freq='M') arr, idx = idx2.factorize() - self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) - def test_asobject_like(self): - idx = pd.PeriodIndex([], freq='M') - - exp = np.array([], dtype=object) - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - - exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], - dtype=object) - idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - def test_is_(self): create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') index = create_index() - self.assertEqual(index.is_(index), True) - self.assertEqual(index.is_(create_index()), False) - self.assertEqual(index.is_(index.view()), True) - self.assertEqual( - index.is_(index.view().view().view().view().view()), True) - self.assertEqual(index.view().is_(index), True) + assert index.is_(index) + assert not index.is_(create_index()) + assert index.is_(index.view()) + assert index.is_(index.view().view().view().view().view()) + assert index.view().is_(index) ind2 = index.view() index.name = "Apple" - self.assertEqual(ind2.is_(index), True) - self.assertEqual(index.is_(index[:]), False) - self.assertEqual(index.is_(index.asfreq('M')), False) - self.assertEqual(index.is_(index.asfreq('A')), False) - self.assertEqual(index.is_(index - 2), False) - self.assertEqual(index.is_(index - 0), False) - - def test_comp_period(self): - idx = period_range('2007-01', periods=20, freq='M') - - result = idx < idx[10] - exp = idx.values < idx.values[10] - self.assert_numpy_array_equal(result, exp) + assert ind2.is_(index) + assert not index.is_(index[:]) + assert not index.is_(index.asfreq('M')) + assert not index.is_(index.asfreq('A')) + assert not index.is_(index - 2) + assert not index.is_(index - 0) def test_contains(self): rng = period_range('2007-01', freq='M', periods=10) - self.assertTrue(Period('2007-01', freq='M') in rng) - self.assertFalse(Period('2007-01', freq='D') in rng) - self.assertFalse(Period('2007-01', freq='2M') in rng) + assert Period('2007-01', freq='M') in rng + assert not Period('2007-01', freq='D') in rng + assert not Period('2007-01', freq='2M') in rng def test_contains_nat(self): - # GH13582 + # see gh-13582 idx = period_range('2007-01', freq='M', periods=10) - self.assertFalse(pd.NaT in idx) - self.assertFalse(None in idx) - self.assertFalse(float('nan') in idx) - self.assertFalse(np.nan in idx) + assert pd.NaT not in idx + assert None not in idx + assert float('nan') not in idx + assert np.nan not in idx idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertTrue(pd.NaT in idx) - self.assertTrue(None in idx) - self.assertTrue(float('nan') in idx) - self.assertTrue(np.nan in idx) + assert pd.NaT in idx + assert None in idx + assert float('nan') in idx + assert np.nan in idx def test_periods_number_check(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): period_range('2011-1-1', '2012-1-1', 'B') - def test_start_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') - expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') - tm.assert_index_equal(index.start_time, expected_index) - - def test_end_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') - expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') - tm.assert_index_equal(index.end_time, expected_index) - def test_index_duplicate_periods(self): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') @@ -535,7 +375,7 @@ def test_index_duplicate_periods(self): expected = ts[1:3] tm.assert_series_equal(result, expected) result[:] = 1 - self.assertTrue((ts[1:3] == 1).all()) + assert (ts[1:3] == 1).all() # not monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN') @@ -548,83 +388,23 @@ def test_index_duplicate_periods(self): def test_index_unique(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN') - self.assert_index_equal(idx.unique(), expected) - self.assertEqual(idx.nunique(), 3) + tm.assert_index_equal(idx.unique(), expected) + assert idx.nunique() == 3 idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN', tz='US/Eastern') expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN', tz='US/Eastern') - self.assert_index_equal(idx.unique(), expected) - self.assertEqual(idx.nunique(), 3) - - def test_shift_gh8083(self): - - # test shift for PeriodIndex - # GH8083 - drange = self.create_index() - result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) + tm.assert_index_equal(idx.unique(), expected) + assert idx.nunique() == 3 def test_shift(self): - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') - - tm.assert_index_equal(pi1.shift(0), pi1) - - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - def test_shift_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(1) - expected = PeriodIndex(['2011-02', '2011-03', 'NaT', - '2011-05'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - - def test_shift_ndarray(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(np.array([1, 2, 3, 4])) - expected = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') - tm.assert_index_equal(result, expected) + # This is tested in test_arithmetic + pass - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(np.array([1, -2, 3, -4])) - expected = PeriodIndex(['2011-02', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') - tm.assert_index_equal(result, expected) + @td.skip_if_32bit + def test_ndarray_compat_properties(self): + super(TestPeriodIndex, self).test_ndarray_compat_properties() def test_negative_ordinals(self): Period(ordinal=-1000, freq='A') @@ -636,18 +416,18 @@ def test_negative_ordinals(self): def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2012-03', '2012-04'], freq='D') + '2012-03', '2012-04'], freq='D', name='name') - exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_numpy_array_equal(idx.year, exp) - exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(idx.month, exp) + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name='name') + tm.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name='name') + tm.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') s = Series(np.random.rand(len(pi)), index=pi).cumsum() # Todo: fix these accessors! - self.assertEqual(s['05Q4'], s[2]) + assert s['05Q4'] == s[2] def test_numpy_repeat(self): index = period_range('20010101', periods=2) @@ -657,50 +437,51 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(index, 2), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, index, 2, axis=1) + tm.assert_raises_regex( + ValueError, msg, np.repeat, index, 2, axis=1) def test_pindex_multiples(self): pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07', '2011-09', '2011-11'], freq='2M') tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == '2M' pi = period_range(start='1/1/11', end='12/31/11', freq='2M') tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == '2M' pi = period_range(start='1/1/11', periods=6, freq='2M') tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == '2M' def test_iteration(self): index = PeriodIndex(start='1/1/10', periods=4, freq='B') result = list(index) - tm.assertIsInstance(result[0], Period) - self.assertEqual(result[0].freq, index.freq) + assert isinstance(result[0], Period) + assert result[0].freq == index.freq def test_is_full(self): index = PeriodIndex([2005, 2007, 2009], freq='A') - self.assertFalse(index.is_full) + assert not index.is_full index = PeriodIndex([2005, 2006, 2007], freq='A') - self.assertTrue(index.is_full) + assert index.is_full index = PeriodIndex([2005, 2005, 2007], freq='A') - self.assertFalse(index.is_full) + assert not index.is_full index = PeriodIndex([2005, 2005, 2006], freq='A') - self.assertTrue(index.is_full) + assert index.is_full index = PeriodIndex([2006, 2005, 2005], freq='A') - self.assertRaises(ValueError, getattr, index, 'is_full') + pytest.raises(ValueError, getattr, index, 'is_full') - self.assertTrue(index[:0].is_full) + assert index[:0].is_full def test_with_multi_index(self): # #1705 @@ -709,16 +490,16 @@ def test_with_multi_index(self): s = Series([0, 1, 2, 3], index_as_arrays) - tm.assertIsInstance(s.index.levels[0], PeriodIndex) + assert isinstance(s.index.levels[0], PeriodIndex) - tm.assertIsInstance(s.index.values[0][0], Period) + assert isinstance(s.index.values[0][0], Period) def test_convert_array_of_periods(self): rng = period_range('1/1/2000', periods=20, freq='D') periods = list(rng) result = pd.Index(periods) - tm.assertIsInstance(result, PeriodIndex) + assert isinstance(result, PeriodIndex) def test_append_concat(self): # #1815 @@ -733,22 +514,34 @@ def test_append_concat(self): # drops index result = pd.concat([s1, s2]) - tm.assertIsInstance(result.index, PeriodIndex) - self.assertEqual(result.index[0], s1.index[0]) + assert isinstance(result.index, PeriodIndex) + assert result.index[0] == s1.index[0] def test_pickle_freq(self): # GH2891 prng = period_range('1/1/2011', '1/1/2012', freq='M') - new_prng = self.round_trip_pickle(prng) - self.assertEqual(new_prng.freq, offsets.MonthEnd()) - self.assertEqual(new_prng.freqstr, 'M') + new_prng = tm.round_trip_pickle(prng) + assert new_prng.freq == offsets.MonthEnd() + assert new_prng.freqstr == 'M' def test_map(self): - index = PeriodIndex([2005, 2007, 2009], freq='A') - result = index.map(lambda x: x + 1) - expected = index + 1 - tm.assert_index_equal(result, expected) + # test_map_dictlike generally tests + index = PeriodIndex([2005, 2007, 2009], freq='A') result = index.map(lambda x: x.ordinal) exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) + def test_join_self(self, how): + index = period_range('1/1/2000', periods=10) + joined = index.join(index, how=how) + assert index is joined + + def test_insert(self): + # GH 18295 (test missing) + expected = PeriodIndex( + ['2017Q1', pd.NaT, '2017Q2', '2017Q3', '2017Q4'], freq='Q') + for na in (np.nan, pd.NaT, None): + result = period_range('2017Q1', periods=4, freq='Q').insert(1, na) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py new file mode 100644 index 0000000000000..640f24f67f72f --- /dev/null +++ b/pandas/tests/indexes/period/test_period_range.py @@ -0,0 +1,94 @@ +import pytest +import pandas.util.testing as tm +from pandas import date_range, NaT, period_range, Period, PeriodIndex + + +class TestPeriodRange(object): + + @pytest.mark.parametrize('freq', ['D', 'W', 'M', 'Q', 'A']) + def test_construction_from_string(self, freq): + # non-empty + expected = date_range(start='2017-01-01', periods=5, + freq=freq, name='foo').to_period() + start, end = str(expected[0]), str(expected[-1]) + + result = period_range(start=start, end=end, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=start, periods=5, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=5, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq=freq, name='foo') + + result = period_range(start=start, periods=0, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + def test_construction_from_period(self): + # upsampling + start, end = Period('2017Q1', freq='Q'), Period('2018Q1', freq='Q') + expected = date_range(start='2017-03-31', end='2018-03-31', freq='M', + name='foo').to_period() + result = period_range(start=start, end=end, freq='M', name='foo') + tm.assert_index_equal(result, expected) + + # downsampling + start, end = Period('2017-1', freq='M'), Period('2019-12', freq='M') + expected = date_range(start='2017-01-31', end='2019-12-31', freq='Q', + name='foo').to_period() + result = period_range(start=start, end=end, freq='Q', name='foo') + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq='W', name='foo') + + result = period_range(start=start, periods=0, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(end='2017Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(periods=5) + + with tm.assert_raises_regex(ValueError, msg): + period_range() + + # too many params + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q') + + # start/end NaT + msg = 'start and end must not be NaT' + with tm.assert_raises_regex(ValueError, msg): + period_range(start=NaT, end='2018Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1', end=NaT) + + # invalid periods param + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + period_range(start='2017Q1', periods='foo') diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py new file mode 100644 index 0000000000000..56bd2adf58719 --- /dev/null +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +"""Tests for PeriodIndex behaving like a vectorized Period scalar""" + +from pandas import PeriodIndex, date_range +import pandas.util.testing as tm + + +class TestPeriodIndexOps(object): + def test_start_time(self): + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') + tm.assert_index_equal(index.end_time, expected_index) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index d4f06bae8bc32..ec0836dfa174b 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -1,8 +1,10 @@ +import pytest + import numpy as np import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas import period_range, PeriodIndex, Index, date_range @@ -10,26 +12,23 @@ def _permute(obj): return obj.take(np.random.permutation(len(obj))) -class TestPeriodIndex(tm.TestCase): - - def setUp(self): - pass +class TestPeriodIndex(object): - def test_joins(self): + @pytest.mark.parametrize('kind', ['inner', 'outer', 'left', 'right']) + def test_joins(self, kind): index = period_range('1/1/2000', '1/20/2000', freq='D') - for kind in ['inner', 'outer', 'left', 'right']: - joined = index.join(index[:-5], how=kind) + joined = index.join(index[:-5], how=kind) - tm.assertIsInstance(joined, PeriodIndex) - self.assertEqual(joined.freq, index.freq) + assert isinstance(joined, PeriodIndex) + assert joined.freq == index.freq - def test_join_self(self): + @pytest.mark.parametrize('kind', ['inner', 'outer', 'left', 'right']) + def test_join_self(self, kind): index = period_range('1/1/2000', '1/20/2000', freq='D') - for kind in ['inner', 'outer', 'left', 'right']: - res = index.join(index, how=kind) - self.assertIs(index, res) + res = index.join(index, how=kind) + assert index is res def test_join_does_not_recur(self): df = tm.makeCustomDataframe( @@ -106,15 +105,15 @@ def test_union_misc(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): index.union(index2) msg = 'can only call with other PeriodIndex-ed objects' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): index.join(index.to_timestamp()) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): index.join(index3) def test_union_dataframe_index(self): @@ -126,7 +125,7 @@ def test_union_dataframe_index(self): df = pd.DataFrame({'s1': s1, 's2': s2}) exp = pd.period_range('1/1/1980', '1/1/2012', freq='M') - self.assert_index_equal(df.index, exp) + tm.assert_index_equal(df.index, exp) def test_intersection(self): index = period_range('1/1/2000', '1/20/2000', freq='D') @@ -143,11 +142,11 @@ def test_intersection(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): index.intersection(index2) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - with tm.assertRaises(period.IncompatibleFrequency): + with pytest.raises(period.IncompatibleFrequency): index.intersection(index3) def test_intersection_cases(self): @@ -170,8 +169,8 @@ def test_intersection_cases(self): (rng4, expected4)]: result = base.intersection(rng) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + assert result.name == expected.name + assert result.freq == expected.freq # non-monotonic base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02', @@ -196,16 +195,16 @@ def test_intersection_cases(self): (rng4, expected4)]: result = base.intersection(rng) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, 'D') + assert result.name == expected.name + assert result.freq == 'D' # empty same freq rng = date_range('6/1/2000', '6/15/2000', freq='T') result = rng[0:0].intersection(rng) - self.assertEqual(len(result), 0) + assert len(result) == 0 result = rng.intersection(rng[0:0]) - self.assertEqual(len(result), 0) + assert len(result) == 0 def test_difference(self): # diff diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index e09d405afd375..38c6f257b2206 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -1,17 +1,19 @@ import numpy as np from datetime import datetime, timedelta +import pytest import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period +import pandas.core.indexes.period as period from pandas.compat import lrange -from pandas.tseries.frequencies import get_freq, MONTHS -from pandas._period import period_ordinal, period_asfreq + +from pandas._libs.tslibs.ccalendar import MONTHS + from pandas import (PeriodIndex, Period, DatetimeIndex, Timestamp, Series, date_range, to_datetime, period_range) -class TestPeriodRepresentation(tm.TestCase): +class TestPeriodRepresentation(object): """ Wish to match NumPy units """ @@ -19,8 +21,8 @@ class TestPeriodRepresentation(tm.TestCase): def _check_freq(self, freq, base_date): rng = PeriodIndex(start=base_date, periods=10, freq=freq) exp = np.arange(10, dtype=np.int64) - self.assert_numpy_array_equal(rng._values, exp) - self.assert_numpy_array_equal(rng.asi8, exp) + + tm.assert_numpy_array_equal(rng.asi8, exp) def test_annual(self): self._check_freq('A', 1970) @@ -28,32 +30,10 @@ def test_annual(self): def test_monthly(self): self._check_freq('M', '1970-01') - def test_weekly(self): - self._check_freq('W-THU', '1970-01-01') - - def test_daily(self): - self._check_freq('D', '1970-01-01') - - def test_business_daily(self): - self._check_freq('B', '1970-01-01') - - def test_hourly(self): - self._check_freq('H', '1970-01-01') - - def test_minutely(self): - self._check_freq('T', '1970-01-01') - - def test_secondly(self): - self._check_freq('S', '1970-01-01') - - def test_millisecondly(self): - self._check_freq('L', '1970-01-01') - - def test_microsecondly(self): - self._check_freq('U', '1970-01-01') - - def test_nanosecondly(self): - self._check_freq('N', '1970-01-01') + @pytest.mark.parametrize('freq', ['W-THU', 'D', 'B', 'H', 'T', + 'S', 'L', 'U', 'N']) + def test_freq(self, freq): + self._check_freq(freq, '1970-01-01') def test_negone_ordinals(self): freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S'] @@ -65,7 +45,7 @@ def test_negone_ordinals(self): for freq in freqs: period = Period(ordinal=-1, freq=freq) repr(period) - self.assertEqual(period.year, 1969) + assert period.year == 1969 period = Period(ordinal=-1, freq='B') repr(period) @@ -73,114 +53,7 @@ def test_negone_ordinals(self): repr(period) -class TestTslib(tm.TestCase): - def test_intraday_conversion_factors(self): - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('H'), False), 24) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('T'), False), 1440) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('S'), False), 86400) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('L'), False), 86400000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('U'), False), 86400000000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('N'), False), 86400000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('T'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('S'), False), 3600) - self.assertEqual(period_asfreq(1, get_freq('H'), - get_freq('L'), False), 3600000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('U'), False), 3600000000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('N'), False), 3600000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('S'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('L'), False), 60000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('U'), False), 60000000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('N'), False), 60000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('S'), get_freq('L'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('S'), - get_freq('U'), False), 1000000) - self.assertEqual(period_asfreq(1, get_freq( - 'S'), get_freq('N'), False), 1000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('L'), get_freq('U'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('L'), - get_freq('N'), False), 1000000) - - self.assertEqual(period_asfreq( - 1, get_freq('U'), get_freq('N'), False), 1000) - - def test_period_ordinal_start_values(self): - # information for 1.1.1970 - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('A'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('M'))) - self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('D'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_period_ordinal_week(self): - self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, - get_freq('W'))) - - self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('W'))) - - def test_period_ordinal_business_day(self): - # Thursday - self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, - get_freq('B'))) - # Friday - self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, - get_freq('B'))) - # Saturday - self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, - get_freq('B'))) - # Sunday - self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('B'))) - # Monday - self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('B'))) - # Tuesday - self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, - get_freq('B'))) - - -class TestPeriodIndex(tm.TestCase): - - def setUp(self): - pass - - def test_tolist(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - rs = index.tolist() - [tm.assertIsInstance(x, Period) for x in rs] - - recon = PeriodIndex(rs) - tm.assert_index_equal(index, recon) - +class TestPeriodIndex(object): def test_to_timestamp(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index, name='foo') @@ -188,7 +61,7 @@ def test_to_timestamp(self): exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') result = series.to_timestamp(how='end') tm.assert_index_equal(result.index, exp_index) - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') result = series.to_timestamp(how='start') @@ -220,25 +93,7 @@ def _get_with_delta(delta, freq='A-DEC'): freq='H') result = series.to_timestamp(how='end') tm.assert_index_equal(result.index, exp_index) - self.assertEqual(result.name, 'foo') - - def test_to_timestamp_quarterly_bug(self): - years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(lrange(1, 5), 40) - - pindex = PeriodIndex(year=years, quarter=quarters) - - stamps = pindex.to_timestamp('D', 'end') - expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex]) - tm.assert_index_equal(stamps, expected) - - def test_to_timestamp_preserve_name(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', - name='foo') - self.assertEqual(index.name, 'foo') - - conv = index.to_timestamp('D') - self.assertEqual(conv.name, 'foo') + assert result.name == 'foo' def test_to_timestamp_repr_is_code(self): zs = [Timestamp('99-04-17 00:00:00', tz='UTC'), @@ -246,58 +101,7 @@ def test_to_timestamp_repr_is_code(self): Timestamp('2001-04-17 00:00:00', tz='America/Los_Angeles'), Timestamp('2001-04-17 00:00:00', tz=None)] for z in zs: - self.assertEqual(eval(repr(z)), z) - - def test_to_timestamp_pi_nat(self): - # GH 7228 - index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', - name='idx') - - result = index.to_timestamp('D') - expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), - datetime(2011, 2, 1)], name='idx') - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, 'idx') - - result2 = result.to_period(freq='M') - tm.assert_index_equal(result2, index) - self.assertEqual(result2.name, 'idx') - - result3 = result.to_period(freq='3M') - exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') - self.assert_index_equal(result3, exp) - self.assertEqual(result3.freqstr, '3M') - - msg = ('Frequency must be positive, because it' - ' represents span: -2A') - with tm.assertRaisesRegexp(ValueError, msg): - result.to_period(freq='-2A') - - def test_to_timestamp_pi_mult(self): - idx = PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='2M', name='idx') - result = idx.to_timestamp() - expected = DatetimeIndex( - ['2011-01-01', 'NaT', '2011-02-01'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E') - expected = DatetimeIndex( - ['2011-02-28', 'NaT', '2011-03-31'], name='idx') - self.assert_index_equal(result, expected) - - def test_to_timestamp_pi_combined(self): - idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') - result = idx.to_timestamp() - expected = DatetimeIndex( - ['2011-01-01 00:00', '2011-01-02 01:00'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E') - expected = DatetimeIndex( - ['2011-01-02 00:59:59', '2011-01-03 01:59:59'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E', freq='H') - expected = DatetimeIndex( - ['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') - self.assert_index_equal(result, expected) + assert eval(repr(z)) == z def test_to_timestamp_to_period_astype(self): idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') @@ -308,7 +112,7 @@ def test_to_timestamp_to_period_astype(self): res = idx.astype('period[3M]') exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') - self.assert_index_equal(res, exp) + tm.assert_index_equal(res, exp) def test_dti_to_period(self): dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') @@ -316,13 +120,13 @@ def test_dti_to_period(self): pi2 = dti.to_period(freq='D') pi3 = dti.to_period(freq='3D') - self.assertEqual(pi1[0], Period('Jan 2005', freq='M')) - self.assertEqual(pi2[0], Period('1/31/2005', freq='D')) - self.assertEqual(pi3[0], Period('1/31/2005', freq='3D')) + assert pi1[0] == Period('Jan 2005', freq='M') + assert pi2[0] == Period('1/31/2005', freq='D') + assert pi3[0] == Period('1/31/2005', freq='3D') - self.assertEqual(pi1[-1], Period('Nov 2005', freq='M')) - self.assertEqual(pi2[-1], Period('11/30/2005', freq='D')) - self.assertEqual(pi3[-1], Period('11/30/2005', freq='3D')) + assert pi1[-1] == Period('Nov 2005', freq='M') + assert pi2[-1] == Period('11/30/2005', freq='D') + assert pi3[-1], Period('11/30/2005', freq='3D') tm.assert_index_equal(pi1, period_range('1/1/2005', '11/1/2005', freq='M')) @@ -331,61 +135,40 @@ def test_dti_to_period(self): tm.assert_index_equal(pi3, period_range('1/1/2005', '11/1/2005', freq='M').asfreq('3D')) - def test_period_astype_to_timestamp(self): - pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') - - exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) - tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) - - exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) - tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) - - exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - tz='US/Eastern') - res = pi.astype('datetime64[ns, US/Eastern]') - tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) - - exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], - tz='US/Eastern') - res = pi.astype('datetime64[ns, US/Eastern]', how='end') - tm.assert_index_equal(res, exp) - - def test_to_period_quarterly(self): + @pytest.mark.parametrize('month', MONTHS) + def test_to_period_quarterly(self, month): # make sure we can make the round trip - for month in MONTHS: - freq = 'Q-%s' % month - rng = period_range('1989Q3', '1991Q3', freq=freq) - stamps = rng.to_timestamp() - result = stamps.to_period(freq) - tm.assert_index_equal(rng, result) - - def test_to_period_quarterlyish(self): - offsets = ['BQ', 'QS', 'BQS'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - self.assertEqual(prng.freq, 'Q-DEC') + freq = 'Q-%s' % month + rng = period_range('1989Q3', '1991Q3', freq=freq) + stamps = rng.to_timestamp() + result = stamps.to_period(freq) + tm.assert_index_equal(rng, result) + + @pytest.mark.parametrize('off', ['BQ', 'QS', 'BQS']) + def test_to_period_quarterlyish(self, off): + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == 'Q-DEC' - def test_to_period_annualish(self): - offsets = ['BA', 'AS', 'BAS'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - self.assertEqual(prng.freq, 'A-DEC') + @pytest.mark.parametrize('off', ['BA', 'AS', 'BAS']) + def test_to_period_annualish(self, off): + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == 'A-DEC' def test_to_period_monthish(self): offsets = ['MS', 'BM'] for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'M') + assert prng.freq == 'M' rng = date_range('01-Jan-2012', periods=8, freq='M') prng = rng.to_period() - self.assertEqual(prng.freq, 'M') + assert prng.freq == 'M' - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): date_range('01-Jan-2012', periods=8, freq='EOM') def test_period_dt64_round_trip(self): @@ -397,20 +180,6 @@ def test_period_dt64_round_trip(self): pi = dti.to_period(freq='H') tm.assert_index_equal(pi.to_timestamp(), dti) - def test_to_timestamp_1703(self): - index = period_range('1/1/2012', periods=4, freq='D') - - result = index.to_timestamp() - self.assertEqual(result[0], Timestamp('1/1/2012')) - - def test_to_datetime_depr(self): - index = period_range('1/1/2012', periods=4, freq='D') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = index.to_datetime() - self.assertEqual(result[0], Timestamp('1/1/2012')) - def test_combine_first(self): # GH 3367 didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M') @@ -426,24 +195,137 @@ def test_combine_first(self): dtype=np.float64) tm.assert_series_equal(result, expected) - def test_searchsorted(self): - for freq in ['D', '2D']: - pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03', - '2014-01-04', '2014-01-05'], freq=freq) + @pytest.mark.parametrize('freq', ['D', '2D']) + def test_searchsorted(self, freq): + pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03', + '2014-01-04', '2014-01-05'], freq=freq) + + p1 = pd.Period('2014-01-01', freq=freq) + assert pidx.searchsorted(p1) == 0 + + p2 = pd.Period('2014-01-04', freq=freq) + assert pidx.searchsorted(p2) == 3 + + msg = "Input has different freq=H from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + pidx.searchsorted(pd.Period('2014-01-01', freq='H')) + + msg = "Input has different freq=5D from PeriodIndex" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) + + with tm.assert_produces_warning(FutureWarning): + pidx.searchsorted(key=p2) - p1 = pd.Period('2014-01-01', freq=freq) - self.assertEqual(pidx.searchsorted(p1), 0) - p2 = pd.Period('2014-01-04', freq=freq) - self.assertEqual(pidx.searchsorted(p2), 3) +class TestPeriodIndexConversion(object): + def test_tolist(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + rs = index.tolist() + for x in rs: + assert isinstance(x, Period) - msg = "Input has different freq=H from PeriodIndex" - with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): - pidx.searchsorted(pd.Period('2014-01-01', freq='H')) + recon = PeriodIndex(rs) + tm.assert_index_equal(index, recon) - msg = "Input has different freq=5D from PeriodIndex" - with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): - pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) + def test_to_timestamp_pi_nat(self): + # GH#7228 + index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', + name='idx') - with tm.assert_produces_warning(FutureWarning): - pidx.searchsorted(key=p2) + result = index.to_timestamp('D') + expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), + datetime(2011, 2, 1)], name='idx') + tm.assert_index_equal(result, expected) + assert result.name == 'idx' + + result2 = result.to_period(freq='M') + tm.assert_index_equal(result2, index) + assert result2.name == 'idx' + + result3 = result.to_period(freq='3M') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], + freq='3M', name='idx') + tm.assert_index_equal(result3, exp) + assert result3.freqstr == '3M' + + msg = ('Frequency must be positive, because it' + ' represents span: -2A') + with tm.assert_raises_regex(ValueError, msg): + result.to_period(freq='-2A') + + def test_to_timestamp_preserve_name(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', + name='foo') + assert index.name == 'foo' + + conv = index.to_timestamp('D') + assert conv.name == 'foo' + + def test_to_timestamp_quarterly_bug(self): + years = np.arange(1960, 2000).repeat(4) + quarters = np.tile(lrange(1, 5), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + stamps = pindex.to_timestamp('D', 'end') + expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex]) + tm.assert_index_equal(stamps, expected) + + def test_to_timestamp_pi_mult(self): + idx = PeriodIndex(['2011-01', 'NaT', '2011-02'], + freq='2M', name='idx') + + result = idx.to_timestamp() + expected = DatetimeIndex(['2011-01-01', 'NaT', '2011-02-01'], + name='idx') + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how='E') + expected = DatetimeIndex(['2011-02-28', 'NaT', '2011-03-31'], + name='idx') + tm.assert_index_equal(result, expected) + + def test_to_timestamp_pi_combined(self): + idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') + + result = idx.to_timestamp() + expected = DatetimeIndex(['2011-01-01 00:00', '2011-01-02 01:00'], + name='idx') + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how='E') + expected = DatetimeIndex(['2011-01-02 00:59:59', + '2011-01-03 01:59:59'], + name='idx') + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how='E', freq='H') + expected = DatetimeIndex(['2011-01-02 00:00', '2011-01-03 01:00'], + name='idx') + tm.assert_index_equal(result, expected) + + def test_period_astype_to_timestamp(self): + pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) + tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) + tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]') + tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]', how='end') + tm.assert_index_equal(res, exp) + + def test_to_timestamp_1703(self): + index = period_range('1/1/2012', periods=4, freq='D') + + result = index.to_timestamp() + assert result[0] == Timestamp('1/1/2012') diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 2f5b98d145e57..eb429f46a3355 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1,38 +1,42 @@ # -*- coding: utf-8 -*- +import pytest + from datetime import datetime, timedelta +from collections import defaultdict + import pandas.util.testing as tm -from pandas.indexes.api import Index, MultiIndex +from pandas.core.dtypes.generic import ABCIndex +from pandas.core.dtypes.common import is_unsigned_integer_dtype +from pandas.core.indexes.api import Index, MultiIndex from pandas.tests.indexes.common import Base from pandas.compat import (range, lrange, lzip, u, - text_type, zip, PY3, PY36) + text_type, zip, PY3, PY36, PYPY) import operator -import os - import numpy as np from pandas import (period_range, date_range, Series, - DataFrame, Float64Index, Int64Index, + DataFrame, Float64Index, Int64Index, UInt64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, - PeriodIndex) -from pandas.core.index import _get_combined_index + PeriodIndex, isna) +from pandas.core.index import _get_combined_index, _ensure_index_from_sequences from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat import pandas.core.config as cf -from pandas.tseries.index import _to_m8 +from pandas.core.indexes.datetimes import _to_m8 import pandas as pd -from pandas.lib import Timestamp +from pandas._libs.tslib import Timestamp -class TestIndex(Base, tm.TestCase): +class TestIndex(Base): _holder = Index - def setUp(self): + def setup_method(self, method): self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), strIndex=tm.makeStringIndex(100), dateIndex=tm.makeDateIndex(100), @@ -46,7 +50,8 @@ def setUp(self): catIndex=tm.makeCategoricalIndex(100), empty=Index([]), tuples=MultiIndex.from_tuples(lzip( - ['foo', 'bar', 'baz'], [1, 2, 3]))) + ['foo', 'bar', 'baz'], [1, 2, 3])), + repeats=Index([0, 0, 1, 1, 2, 2])) self.setup_indices() def create_index(self): @@ -54,14 +59,14 @@ def create_index(self): def test_new_axis(self): new_index = self.dateIndex[None, :] - self.assertEqual(new_index.ndim, 2) - tm.assertIsInstance(new_index, np.ndarray) + assert new_index.ndim == 2 + assert isinstance(new_index, np.ndarray) - def test_copy_and_deepcopy(self): - super(TestIndex, self).test_copy_and_deepcopy() + def test_copy_and_deepcopy(self, indices): + super(TestIndex, self).test_copy_and_deepcopy(indices) new_copy2 = self.intIndex.copy(dtype=int) - self.assertEqual(new_copy2.dtype.kind, 'i') + assert new_copy2.dtype.kind == 'i' def test_constructor(self): # regular instance creation @@ -77,48 +82,57 @@ def test_constructor(self): # copy arr = np.array(self.strIndex) index = Index(arr, copy=True, name='name') - tm.assertIsInstance(index, Index) - self.assertEqual(index.name, 'name') + assert isinstance(index, Index) + assert index.name == 'name' tm.assert_numpy_array_equal(arr, index.values) arr[0] = "SOMEBIGLONGSTRING" - self.assertNotEqual(index[0], "SOMEBIGLONGSTRING") + assert index[0] != "SOMEBIGLONGSTRING" # what to do here? # arr = np.array(5.) - # self.assertRaises(Exception, arr.view, Index) + # pytest.raises(Exception, arr.view, Index) def test_constructor_corner(self): # corner case - self.assertRaises(TypeError, Index, 0) + pytest.raises(TypeError, Index, 0) def test_construction_list_mixed_tuples(self): - # 10697 - # if we are constructing from a mixed list of tuples, make sure that we - # are independent of the sorting order + # see gh-10697: if we are constructing from a mixed list of tuples, + # make sure that we are independent of the sorting order. idx1 = Index([('A', 1), 'B']) - self.assertIsInstance(idx1, Index) and self.assertNotInstance( - idx1, MultiIndex) + assert isinstance(idx1, Index) + assert not isinstance(idx1, MultiIndex) + idx2 = Index(['B', ('A', 1)]) - self.assertIsInstance(idx2, Index) and self.assertNotInstance( - idx2, MultiIndex) + assert isinstance(idx2, Index) + assert not isinstance(idx2, MultiIndex) + + @pytest.mark.parametrize('na_value', [None, np.nan]) + @pytest.mark.parametrize('vtype', [list, tuple, iter]) + def test_construction_list_tuples_nan(self, na_value, vtype): + # GH 18505 : valid tuples containing NaN + values = [(1, 'two'), (3., na_value)] + result = Index(vtype(values)) + expected = MultiIndex.from_tuples(values) + tm.assert_index_equal(result, expected) def test_constructor_from_index_datetimetz(self): idx = pd.date_range('2015-01-01 10:00', freq='D', periods=3, tz='US/Eastern') result = pd.Index(idx) tm.assert_index_equal(result, idx) - self.assertEqual(result.tz, idx.tz) + assert result.tz == idx.tz - result = pd.Index(idx.asobject) + result = pd.Index(idx.astype(object)) tm.assert_index_equal(result, idx) - self.assertEqual(result.tz, idx.tz) + assert result.tz == idx.tz def test_constructor_from_index_timedelta(self): idx = pd.timedelta_range('1 days', freq='D', periods=3) result = pd.Index(idx) tm.assert_index_equal(result, idx) - result = pd.Index(idx.asobject) + result = pd.Index(idx.astype(object)) tm.assert_index_equal(result, idx) def test_constructor_from_index_period(self): @@ -126,7 +140,7 @@ def test_constructor_from_index_period(self): result = pd.Index(idx) tm.assert_index_equal(result, idx) - result = pd.Index(idx.asobject) + result = pd.Index(idx.astype(object)) tm.assert_index_equal(result, idx) def test_constructor_from_series_datetimetz(self): @@ -134,7 +148,7 @@ def test_constructor_from_series_datetimetz(self): tz='US/Eastern') result = pd.Index(pd.Series(idx)) tm.assert_index_equal(result, idx) - self.assertEqual(result.tz, idx.tz) + assert result.tz == idx.tz def test_constructor_from_series_timedelta(self): idx = pd.timedelta_range('1 days', freq='D', periods=3) @@ -153,9 +167,9 @@ def test_constructor_from_series(self): s = Series([Timestamp('20110101'), Timestamp('20120101'), Timestamp('20130101')]) result = Index(s) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = DatetimeIndex(s) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # GH 6273 # create from a series, passing a freq @@ -164,31 +178,30 @@ def test_constructor_from_series(self): result = DatetimeIndex(s, freq='MS') expected = DatetimeIndex(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'], freq='MS') - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) df = pd.DataFrame(np.random.rand(5, 3)) df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'] result = DatetimeIndex(df['date'], freq='MS') expected.name = 'date' - self.assert_index_equal(result, expected) - self.assertEqual(df['date'].dtype, object) + tm.assert_index_equal(result, expected) + assert df['date'].dtype == object exp = pd.Series(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'], name='date') - self.assert_series_equal(df['date'], exp) + tm.assert_series_equal(df['date'], exp) # GH 6274 # infer freq of same result = pd.infer_freq(df['date']) - self.assertEqual(result, 'MS') + assert result == 'MS' def test_constructor_ndarray_like(self): # GH 5460#issuecomment-44474502 # it should be possible to convert any object that satisfies the numpy # ndarray interface directly into an Index class ArrayLike(object): - def __init__(self, array): self.array = array @@ -199,22 +212,53 @@ def __array__(self, dtype=None): date_range('2000-01-01', periods=3).values]: expected = pd.Index(array) result = pd.Index(ArrayLike(array)) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('dtype', [ + int, 'int64', 'int32', 'int16', 'int8', 'uint64', 'uint32', + 'uint16', 'uint8']) + def test_constructor_int_dtype_float(self, dtype): + # GH 18400 + if is_unsigned_integer_dtype(dtype): + index_type = UInt64Index + else: + index_type = Int64Index + + expected = index_type([0, 1, 2, 3]) + result = Index([0., 1., 2., 3.], dtype=dtype) + tm.assert_index_equal(result, expected) + + def test_constructor_int_dtype_nan(self): + # see gh-15187 + data = [np.nan] + msg = "cannot convert" + + with tm.assert_raises_regex(ValueError, msg): + Index(data, dtype='int64') + + with tm.assert_raises_regex(ValueError, msg): + Index(data, dtype='uint64') + + # This, however, should not break + # because NaN is float. + expected = Float64Index(data) + result = Index(data, dtype='float') + tm.assert_index_equal(result, expected) def test_index_ctor_infer_nan_nat(self): # GH 13467 exp = pd.Float64Index([np.nan, np.nan]) - self.assertEqual(exp.dtype, np.float64) + assert exp.dtype == np.float64 tm.assert_index_equal(Index([np.nan, np.nan]), exp) tm.assert_index_equal(Index(np.array([np.nan, np.nan])), exp) exp = pd.DatetimeIndex([pd.NaT, pd.NaT]) - self.assertEqual(exp.dtype, 'datetime64[ns]') + assert exp.dtype == 'datetime64[ns]' tm.assert_index_equal(Index([pd.NaT, pd.NaT]), exp) tm.assert_index_equal(Index(np.array([pd.NaT, pd.NaT])), exp) exp = pd.DatetimeIndex([pd.NaT, pd.NaT]) - self.assertEqual(exp.dtype, 'datetime64[ns]') + assert exp.dtype == 'datetime64[ns]' for data in [[pd.NaT, np.nan], [np.nan, pd.NaT], [np.nan, np.datetime64('nat')], @@ -223,13 +267,12 @@ def test_index_ctor_infer_nan_nat(self): tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) exp = pd.TimedeltaIndex([pd.NaT, pd.NaT]) - self.assertEqual(exp.dtype, 'timedelta64[ns]') + assert exp.dtype == 'timedelta64[ns]' for data in [[np.nan, np.timedelta64('nat')], [np.timedelta64('nat'), np.nan], [pd.NaT, np.timedelta64('nat')], [np.timedelta64('nat'), pd.NaT]]: - tm.assert_index_equal(Index(data), exp) tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) @@ -248,47 +291,47 @@ def test_index_ctor_infer_periodindex(self): xp = period_range('2012-1-1', freq='M', periods=3) rs = Index(xp) tm.assert_index_equal(rs, xp) - tm.assertIsInstance(rs, PeriodIndex) + assert isinstance(rs, PeriodIndex) def test_constructor_simple_new(self): idx = Index([1, 2, 3, 4, 5], name='int') result = idx._simple_new(idx, 'int') - self.assert_index_equal(result, idx) + tm.assert_index_equal(result, idx) idx = Index([1.1, np.nan, 2.2, 3.0], name='float') result = idx._simple_new(idx, 'float') - self.assert_index_equal(result, idx) + tm.assert_index_equal(result, idx) idx = Index(['A', 'B', 'C', np.nan], name='obj') result = idx._simple_new(idx, 'obj') - self.assert_index_equal(result, idx) + tm.assert_index_equal(result, idx) def test_constructor_dtypes(self): for idx in [Index(np.array([1, 2, 3], dtype=int)), Index(np.array([1, 2, 3], dtype=int), dtype=int), Index([1, 2, 3], dtype=int)]: - self.assertIsInstance(idx, Int64Index) + assert isinstance(idx, Int64Index) - # these should coerce + # These should coerce for idx in [Index(np.array([1., 2., 3.], dtype=float), dtype=int), Index([1., 2., 3.], dtype=int)]: - self.assertIsInstance(idx, Int64Index) + assert isinstance(idx, Int64Index) for idx in [Index(np.array([1., 2., 3.], dtype=float)), Index(np.array([1, 2, 3], dtype=int), dtype=float), Index(np.array([1., 2., 3.], dtype=float), dtype=float), Index([1, 2, 3], dtype=float), Index([1., 2., 3.], dtype=float)]: - self.assertIsInstance(idx, Float64Index) + assert isinstance(idx, Float64Index) for idx in [Index(np.array([True, False, True], dtype=bool)), Index([True, False, True]), Index(np.array([True, False, True], dtype=bool), dtype=bool), Index([True, False, True], dtype=bool)]: - self.assertIsInstance(idx, Index) - self.assertEqual(idx.dtype, object) + assert isinstance(idx, Index) + assert idx.dtype == object for idx in [Index(np.array([1, 2, 3], dtype=int), dtype='category'), Index([1, 2, 3], dtype='category'), @@ -297,32 +340,32 @@ def test_constructor_dtypes(self): dtype='category'), Index([datetime(2011, 1, 1), datetime(2011, 1, 2)], dtype='category')]: - self.assertIsInstance(idx, CategoricalIndex) + assert isinstance(idx, CategoricalIndex) for idx in [Index(np.array([np_datetime64_compat('2011-01-01'), np_datetime64_compat('2011-01-02')])), Index([datetime(2011, 1, 1), datetime(2011, 1, 2)])]: - self.assertIsInstance(idx, DatetimeIndex) + assert isinstance(idx, DatetimeIndex) for idx in [Index(np.array([np_datetime64_compat('2011-01-01'), np_datetime64_compat('2011-01-02')]), dtype=object), Index([datetime(2011, 1, 1), datetime(2011, 1, 2)], dtype=object)]: - self.assertNotIsInstance(idx, DatetimeIndex) - self.assertIsInstance(idx, Index) - self.assertEqual(idx.dtype, object) + assert not isinstance(idx, DatetimeIndex) + assert isinstance(idx, Index) + assert idx.dtype == object for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64( 1, 'D')])), Index([timedelta(1), timedelta(1)])]: - self.assertIsInstance(idx, TimedeltaIndex) + assert isinstance(idx, TimedeltaIndex) for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')]), dtype=object), Index([timedelta(1), timedelta(1)], dtype=object)]: - self.assertNotIsInstance(idx, TimedeltaIndex) - self.assertIsInstance(idx, Index) - self.assertEqual(idx.dtype, object) + assert not isinstance(idx, TimedeltaIndex) + assert isinstance(idx, Index) + assert idx.dtype == object def test_constructor_dtypes_datetime(self): @@ -372,7 +415,7 @@ def test_view_with_args(self): ind = self.indices[i] # with arguments - self.assertRaises(TypeError, lambda: ind.view('i8')) + pytest.raises(TypeError, lambda: ind.view('i8')) # these are ok for i in list(set(self.indices.keys()) - set(restricted)): @@ -381,15 +424,6 @@ def test_view_with_args(self): # with arguments ind.view('i8') - def test_legacy_pickle_identity(self): - - # GH 8431 - pth = tm.get_data_path() - s1 = pd.read_pickle(os.path.join(pth, 's1-0.12.0.pickle')) - s2 = pd.read_pickle(os.path.join(pth, 's2-0.12.0.pickle')) - self.assertFalse(s1.index.identical(s2.index)) - self.assertFalse(s1.index.equals(s2.index)) - def test_astype(self): casted = self.intIndex.astype('i8') @@ -399,20 +433,20 @@ def test_astype(self): # pass on name self.intIndex.name = 'foobar' casted = self.intIndex.astype('i8') - self.assertEqual(casted.name, 'foobar') + assert casted.name == 'foobar' def test_equals_object(self): # same - self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c']))) + assert Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c'])) # different length - self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b']))) + assert not Index(['a', 'b', 'c']).equals(Index(['a', 'b'])) # same length, different values - self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'd']))) + assert not Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'd'])) # Must also be an Index - self.assertFalse(Index(['a', 'b', 'c']).equals(['a', 'b', 'c'])) + assert not Index(['a', 'b', 'c']).equals(['a', 'b', 'c']) def test_insert(self): @@ -421,34 +455,40 @@ def test_insert(self): result = Index(['b', 'c', 'd']) # test 0th element - self.assert_index_equal(Index(['a', 'b', 'c', 'd']), - result.insert(0, 'a')) + tm.assert_index_equal(Index(['a', 'b', 'c', 'd']), + result.insert(0, 'a')) # test Nth element that follows Python list behavior - self.assert_index_equal(Index(['b', 'c', 'e', 'd']), - result.insert(-1, 'e')) + tm.assert_index_equal(Index(['b', 'c', 'e', 'd']), + result.insert(-1, 'e')) # test loc +/- neq (0, -1) - self.assert_index_equal(result.insert(1, 'z'), result.insert(-2, 'z')) + tm.assert_index_equal(result.insert(1, 'z'), result.insert(-2, 'z')) # test empty null_index = Index([]) - self.assert_index_equal(Index(['a']), null_index.insert(0, 'a')) + tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a')) + + # GH 18295 (test missing) + expected = Index(['a', np.nan, 'b', 'c']) + for na in (np.nan, pd.NaT, None): + result = Index(list('abc')).insert(1, na) + tm.assert_index_equal(result, expected) def test_delete(self): idx = Index(['a', 'b', 'c', 'd'], name='idx') expected = Index(['b', 'c', 'd'], name='idx') result = idx.delete(0) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + tm.assert_index_equal(result, expected) + assert result.name == expected.name expected = Index(['a', 'b', 'c'], name='idx') result = idx.delete(-1) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + tm.assert_index_equal(result, expected) + assert result.name == expected.name - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(5) @@ -458,60 +498,59 @@ def test_identical(self): i1 = Index(['a', 'b', 'c']) i2 = Index(['a', 'b', 'c']) - self.assertTrue(i1.identical(i2)) + assert i1.identical(i2) i1 = i1.rename('foo') - self.assertTrue(i1.equals(i2)) - self.assertFalse(i1.identical(i2)) + assert i1.equals(i2) + assert not i1.identical(i2) i2 = i2.rename('foo') - self.assertTrue(i1.identical(i2)) + assert i1.identical(i2) i3 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')]) i4 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')], tupleize_cols=False) - self.assertFalse(i3.identical(i4)) + assert not i3.identical(i4) def test_is_(self): ind = Index(range(10)) - self.assertTrue(ind.is_(ind)) - self.assertTrue(ind.is_(ind.view().view().view().view())) - self.assertFalse(ind.is_(Index(range(10)))) - self.assertFalse(ind.is_(ind.copy())) - self.assertFalse(ind.is_(ind.copy(deep=False))) - self.assertFalse(ind.is_(ind[:])) - self.assertFalse(ind.is_(ind.view(np.ndarray).view(Index))) - self.assertFalse(ind.is_(np.array(range(10)))) + assert ind.is_(ind) + assert ind.is_(ind.view().view().view().view()) + assert not ind.is_(Index(range(10))) + assert not ind.is_(ind.copy()) + assert not ind.is_(ind.copy(deep=False)) + assert not ind.is_(ind[:]) + assert not ind.is_(np.array(range(10))) # quasi-implementation dependent - self.assertTrue(ind.is_(ind.view())) + assert ind.is_(ind.view()) ind2 = ind.view() ind2.name = 'bob' - self.assertTrue(ind.is_(ind2)) - self.assertTrue(ind2.is_(ind)) + assert ind.is_(ind2) + assert ind2.is_(ind) # doesn't matter if Indices are *actually* views of underlying data, - self.assertFalse(ind.is_(Index(ind.values))) + assert not ind.is_(Index(ind.values)) arr = np.array(range(1, 11)) ind1 = Index(arr, copy=False) ind2 = Index(arr, copy=False) - self.assertFalse(ind1.is_(ind2)) + assert not ind1.is_(ind2) def test_asof(self): d = self.dateIndex[0] - self.assertEqual(self.dateIndex.asof(d), d) - self.assertTrue(np.isnan(self.dateIndex.asof(d - timedelta(1)))) + assert self.dateIndex.asof(d) == d + assert isna(self.dateIndex.asof(d - timedelta(1))) d = self.dateIndex[-1] - self.assertEqual(self.dateIndex.asof(d + timedelta(1)), d) + assert self.dateIndex.asof(d + timedelta(1)) == d d = self.dateIndex[0].to_pydatetime() - tm.assertIsInstance(self.dateIndex.asof(d), Timestamp) + assert isinstance(self.dateIndex.asof(d), Timestamp) def test_asof_datetime_partial(self): idx = pd.date_range('2010-01-01', periods=2, freq='m') expected = Timestamp('2010-02-28') result = idx.asof('2010-02') - self.assertEqual(result, expected) - self.assertFalse(isinstance(result, Index)) + assert result == expected + assert not isinstance(result, Index) def test_nanosecond_index_access(self): s = Series([Timestamp('20130101')]).values.view('i8')[0] @@ -521,12 +560,11 @@ def test_nanosecond_index_access(self): first_value = x.asof(x.index[0]) # this does not yet work, as parsing strings is done via dateutil - # self.assertEqual(first_value, - # x['2013-01-01 00:00:00.000000050+0000']) + # assert first_value == x['2013-01-01 00:00:00.000000050+0000'] exp_ts = np_datetime64_compat('2013-01-01 00:00:00.000000050+0000', 'ns') - self.assertEqual(first_value, x[Timestamp(exp_ts)]) + assert first_value == x[Timestamp(exp_ts)] def test_comparators(self): index = self.dateIndex @@ -539,7 +577,7 @@ def _check(op): arr_result = op(arr, element) index_result = op(index, element) - self.assertIsInstance(index_result, np.ndarray) + assert isinstance(index_result, np.ndarray) tm.assert_numpy_array_equal(arr_result, index_result) _check(operator.eq) @@ -556,16 +594,16 @@ def test_booleanindex(self): subIndex = self.strIndex[boolIdx] for i, val in enumerate(subIndex): - self.assertEqual(subIndex.get_loc(val), i) + assert subIndex.get_loc(val) == i subIndex = self.strIndex[list(boolIdx)] for i, val in enumerate(subIndex): - self.assertEqual(subIndex.get_loc(val), i) + assert subIndex.get_loc(val) == i def test_fancy(self): sl = self.strIndex[[1, 2, 3]] for i in sl: - self.assertEqual(i, sl[sl.get_loc(i)]) + assert i == sl[sl.get_loc(i)] def test_empty_fancy(self): empty_farr = np.array([], dtype=np.float_) @@ -577,64 +615,70 @@ def test_empty_fancy(self): for idx in [self.strIndex, self.intIndex, self.floatIndex]: empty_idx = idx.__class__([]) - self.assertTrue(idx[[]].identical(empty_idx)) - self.assertTrue(idx[empty_iarr].identical(empty_idx)) - self.assertTrue(idx[empty_barr].identical(empty_idx)) + assert idx[[]].identical(empty_idx) + assert idx[empty_iarr].identical(empty_idx) + assert idx[empty_barr].identical(empty_idx) # np.ndarray only accepts ndarray of int & bool dtypes, so should # Index. - self.assertRaises(IndexError, idx.__getitem__, empty_farr) + pytest.raises(IndexError, idx.__getitem__, empty_farr) - def test_getitem(self): - arr = np.array(self.dateIndex) - exp = self.dateIndex[5] - exp = _to_m8(exp) + def test_getitem_error(self, indices): - self.assertEqual(exp, arr[5]) + with pytest.raises(IndexError): + indices[101] + + with pytest.raises(IndexError): + indices['no_int'] def test_intersection(self): first = self.strIndex[:20] second = self.strIndex[:10] intersect = first.intersection(second) - self.assertTrue(tm.equalContents(intersect, second)) + assert tm.equalContents(intersect, second) # Corner cases inter = first.intersection(first) - self.assertIs(inter, first) + assert inter is first idx1 = Index([1, 2, 3, 4, 5], name='idx') # if target has the same name, it is preserved idx2 = Index([3, 4, 5, 6, 7], name='idx') expected2 = Index([3, 4, 5], name='idx') result2 = idx1.intersection(idx2) - self.assert_index_equal(result2, expected2) - self.assertEqual(result2.name, expected2.name) + tm.assert_index_equal(result2, expected2) + assert result2.name == expected2.name # if target name is different, it will be reset idx3 = Index([3, 4, 5, 6, 7], name='other') expected3 = Index([3, 4, 5], name=None) result3 = idx1.intersection(idx3) - self.assert_index_equal(result3, expected3) - self.assertEqual(result3.name, expected3.name) + tm.assert_index_equal(result3, expected3) + assert result3.name == expected3.name # non monotonic idx1 = Index([5, 3, 2, 4, 1], name='idx') idx2 = Index([4, 7, 6, 5, 3], name='idx') - result2 = idx1.intersection(idx2) - self.assertTrue(tm.equalContents(result2, expected2)) - self.assertEqual(result2.name, expected2.name) + expected = Index([5, 3, 4], name='idx') + result = idx1.intersection(idx2) + tm.assert_index_equal(result, expected) - idx3 = Index([4, 7, 6, 5, 3], name='other') - result3 = idx1.intersection(idx3) - self.assertTrue(tm.equalContents(result3, expected3)) - self.assertEqual(result3.name, expected3.name) + idx2 = Index([4, 7, 6, 5, 3], name='other') + expected = Index([5, 3, 4], name=None) + result = idx1.intersection(idx2) + tm.assert_index_equal(result, expected) # non-monotonic non-unique idx1 = Index(['A', 'B', 'A', 'C']) idx2 = Index(['B', 'D']) expected = Index(['B'], dtype='object') result = idx1.intersection(idx2) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) + + idx2 = Index(['B', 'D', 'A']) + expected = Index(['A', 'B', 'A'], dtype='object') + result = idx1.intersection(idx2) + tm.assert_index_equal(result, expected) # preserve names first = self.strIndex[5:20] @@ -642,39 +686,48 @@ def test_intersection(self): first.name = 'A' second.name = 'A' intersect = first.intersection(second) - self.assertEqual(intersect.name, 'A') + assert intersect.name == 'A' second.name = 'B' intersect = first.intersection(second) - self.assertIsNone(intersect.name) + assert intersect.name is None first.name = None second.name = 'B' intersect = first.intersection(second) - self.assertIsNone(intersect.name) + assert intersect.name is None + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + assert len(res) == 0 def test_union(self): first = self.strIndex[5:20] second = self.strIndex[:10] everything = self.strIndex[:20] union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) + assert tm.equalContents(union, everything) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) + assert tm.equalContents(result, everything) # Corner cases union = first.union(first) - self.assertIs(union, first) + assert union is first union = first.union([]) - self.assertIs(union, first) + assert union is first union = Index([]).union(first) - self.assertIs(union, first) + assert union is first # preserve names first = Index(list('ab'), name='A') @@ -740,8 +793,8 @@ def test_union(self): else: appended = np.append(self.strIndex, self.dateIndex.astype('O')) - self.assertTrue(tm.equalContents(firstCat, appended)) - self.assertTrue(tm.equalContents(secondCat, self.strIndex)) + assert tm.equalContents(firstCat, appended) + assert tm.equalContents(secondCat, self.strIndex) tm.assert_contains_all(self.strIndex, firstCat) tm.assert_contains_all(self.strIndex, secondCat) tm.assert_contains_all(self.dateIndex, firstCat) @@ -749,23 +802,23 @@ def test_union(self): def test_add(self): idx = self.strIndex expected = Index(self.strIndex.values * 2) - self.assert_index_equal(idx + idx, expected) - self.assert_index_equal(idx + idx.tolist(), expected) - self.assert_index_equal(idx.tolist() + idx, expected) + tm.assert_index_equal(idx + idx, expected) + tm.assert_index_equal(idx + idx.tolist(), expected) + tm.assert_index_equal(idx.tolist() + idx, expected) # test add and radd idx = Index(list('abc')) expected = Index(['a1', 'b1', 'c1']) - self.assert_index_equal(idx + '1', expected) + tm.assert_index_equal(idx + '1', expected) expected = Index(['1a', '1b', '1c']) - self.assert_index_equal('1' + idx, expected) + tm.assert_index_equal('1' + idx, expected) def test_sub(self): idx = self.strIndex - self.assertRaises(TypeError, lambda: idx - 'a') - self.assertRaises(TypeError, lambda: idx - idx) - self.assertRaises(TypeError, lambda: idx - idx.tolist()) - self.assertRaises(TypeError, lambda: idx.tolist() - idx) + pytest.raises(TypeError, lambda: idx - 'a') + pytest.raises(TypeError, lambda: idx - idx) + pytest.raises(TypeError, lambda: idx - idx.tolist()) + pytest.raises(TypeError, lambda: idx.tolist() - idx) def test_map_identity_mapping(self): # GH 12766 @@ -777,15 +830,16 @@ def test_map_with_tuples(self): # Test that returning a single tuple from an Index # returns an Index. - boolean_index = tm.makeIntIndex(3).map(lambda x: (x,)) - expected = Index([(0,), (1,), (2,)]) - tm.assert_index_equal(boolean_index, expected) + idx = tm.makeIntIndex(3) + result = tm.makeIntIndex(3).map(lambda x: (x,)) + expected = Index([(i,) for i in idx]) + tm.assert_index_equal(result, expected) # Test that returning a tuple from a map of a single index # returns a MultiIndex object. - boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1)) - expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)]) - tm.assert_index_equal(boolean_index, expected) + result = idx.map(lambda x: (x, x == 1)) + expected = MultiIndex.from_tuples([(i, i == 1) for i in idx]) + tm.assert_index_equal(result, expected) # Test that returning a single object from a MultiIndex # returns an Index. @@ -809,45 +863,101 @@ def test_map_tseries_indices_return_index(self): exp = Index(range(24), name='hourly') tm.assert_index_equal(exp, date_index.map(lambda x: x.hour)) + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) + def test_map_dictlike(self, mapper): + # GH 12756 + expected = Index(['foo', 'bar', 'baz']) + idx = tm.makeIntIndex(3) + result = idx.map(mapper(expected.values, idx)) + tm.assert_index_equal(result, expected) + + for name in self.indices.keys(): + if name == 'catIndex': + # Tested in test_categorical + continue + elif name == 'repeats': + # Cannot map duplicated index + continue + + index = self.indices[name] + expected = Index(np.arange(len(index), 0, -1)) + + # to match proper result coercion for uints + if name == 'empty': + expected = Index([]) + + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + def test_map_with_non_function_missing_values(self): + # GH 12756 + expected = Index([2., np.nan, 'foo']) + input = Index([2, 1, 0]) + + mapper = Series(['foo', 2., 'baz'], index=[0, 2, -1]) + tm.assert_index_equal(expected, input.map(mapper)) + + mapper = {0: 'foo', 2: 2.0, -1: 'baz'} + tm.assert_index_equal(expected, input.map(mapper)) + + def test_map_na_exclusion(self): + idx = Index([1.5, np.nan, 3, np.nan, 5]) + + result = idx.map(lambda x: x * 2, na_action='ignore') + exp = idx * 2 + tm.assert_index_equal(result, exp) + + def test_map_defaultdict(self): + idx = Index([1, 2, 3]) + default_dict = defaultdict(lambda: 'blank') + default_dict[1] = 'stuff' + result = idx.map(default_dict) + expected = Index(['stuff', 'blank', 'blank']) + tm.assert_index_equal(result, expected) + def test_append_multiple(self): index = Index(['a', 'b', 'c', 'd', 'e', 'f']) foos = [index[:2], index[2:4], index[4:]] result = foos[0].append(foos[1:]) - self.assert_index_equal(result, index) + tm.assert_index_equal(result, index) # empty result = index.append([]) - self.assert_index_equal(result, index) + tm.assert_index_equal(result, index) def test_append_empty_preserve_name(self): left = Index([], name='foo') right = Index([1, 2, 3], name='foo') result = left.append(right) - self.assertEqual(result.name, 'foo') + assert result.name == 'foo' left = Index([], name='foo') right = Index([1, 2, 3], name='bar') result = left.append(right) - self.assertIsNone(result.name) + assert result.name is None def test_add_string(self): # from bug report index = Index(['a', 'b', 'c']) index2 = index + 'foo' - self.assertNotIn('a', index2) - self.assertIn('afoo', index2) + assert 'a' not in index2 + assert 'afoo' in index2 def test_iadd_string(self): index = pd.Index(['a', 'b', 'c']) # doesn't fail test unless there is a check before `+=` - self.assertIn('a', index) + assert 'a' in index index += '_x' - self.assertIn('a_x', index) + assert 'a_x' in index def test_difference(self): @@ -858,23 +968,23 @@ def test_difference(self): # different names result = first.difference(second) - self.assertTrue(tm.equalContents(result, answer)) - self.assertEqual(result.name, None) + assert tm.equalContents(result, answer) + assert result.name is None # same names second.name = 'name' result = first.difference(second) - self.assertEqual(result.name, 'name') + assert result.name == 'name' # with empty result = first.difference([]) - self.assertTrue(tm.equalContents(result, first)) - self.assertEqual(result.name, first.name) + assert tm.equalContents(result, first) + assert result.name == first.name - # with everythin + # with everything result = first.difference(first) - self.assertEqual(len(result), 0) - self.assertEqual(result.name, first.name) + assert len(result) == 0 + assert result.name == first.name def test_symmetric_difference(self): # smoke @@ -882,20 +992,20 @@ def test_symmetric_difference(self): idx2 = Index([2, 3, 4, 5]) result = idx1.symmetric_difference(idx2) expected = Index([1, 5]) - self.assertTrue(tm.equalContents(result, expected)) - self.assertIsNone(result.name) + assert tm.equalContents(result, expected) + assert result.name is None # __xor__ syntax expected = idx1 ^ idx2 - self.assertTrue(tm.equalContents(result, expected)) - self.assertIsNone(result.name) + assert tm.equalContents(result, expected) + assert result.name is None # multiIndex idx1 = MultiIndex.from_tuples(self.tuples) idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) result = idx1.symmetric_difference(idx2) expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) - self.assertTrue(tm.equalContents(result, expected)) + assert tm.equalContents(result, expected) # nans: # GH 13514 change: {nan} - {nan} == {} @@ -917,32 +1027,32 @@ def test_symmetric_difference(self): idx2 = np.array([2, 3, 4, 5]) expected = Index([1, 5]) result = idx1.symmetric_difference(idx2) - self.assertTrue(tm.equalContents(result, expected)) - self.assertEqual(result.name, 'idx1') + assert tm.equalContents(result, expected) + assert result.name == 'idx1' result = idx1.symmetric_difference(idx2, result_name='new_name') - self.assertTrue(tm.equalContents(result, expected)) - self.assertEqual(result.name, 'new_name') + assert tm.equalContents(result, expected) + assert result.name == 'new_name' def test_is_numeric(self): - self.assertFalse(self.dateIndex.is_numeric()) - self.assertFalse(self.strIndex.is_numeric()) - self.assertTrue(self.intIndex.is_numeric()) - self.assertTrue(self.floatIndex.is_numeric()) - self.assertFalse(self.catIndex.is_numeric()) + assert not self.dateIndex.is_numeric() + assert not self.strIndex.is_numeric() + assert self.intIndex.is_numeric() + assert self.floatIndex.is_numeric() + assert not self.catIndex.is_numeric() def test_is_object(self): - self.assertTrue(self.strIndex.is_object()) - self.assertTrue(self.boolIndex.is_object()) - self.assertFalse(self.catIndex.is_object()) - self.assertFalse(self.intIndex.is_object()) - self.assertFalse(self.dateIndex.is_object()) - self.assertFalse(self.floatIndex.is_object()) + assert self.strIndex.is_object() + assert self.boolIndex.is_object() + assert not self.catIndex.is_object() + assert not self.intIndex.is_object() + assert not self.dateIndex.is_object() + assert not self.floatIndex.is_object() def test_is_all_dates(self): - self.assertTrue(self.dateIndex.is_all_dates) - self.assertFalse(self.strIndex.is_all_dates) - self.assertFalse(self.intIndex.is_all_dates) + assert self.dateIndex.is_all_dates + assert not self.strIndex.is_all_dates + assert not self.intIndex.is_all_dates def test_summary(self): self._check_method_works(Index.summary) @@ -950,8 +1060,8 @@ def test_summary(self): ind = Index(['{other}%s', "~:{range}:0"], name='A') result = ind.summary() # shouldn't be formatted accidentally. - self.assertIn('~:{range}:0', result) - self.assertIn('{other}%s', result) + assert '~:{range}:0' in result + assert '{other}%s' in result def test_format(self): self._check_method_works(Index.format) @@ -959,25 +1069,25 @@ def test_format(self): # GH 14626 # windows has different precision on datetime.datetime.now (it doesn't # include us since the default for Timestamp shows these but Index - # formating does not we are skipping) + # formatting does not we are skipping) now = datetime.now() if not str(now).endswith("000"): index = Index([now]) formatted = index.format() expected = [str(index[0])] - self.assertEqual(formatted, expected) + assert formatted == expected # 2845 index = Index([1, 2.0 + 3.0j, np.nan]) formatted = index.format() expected = [str(index[0]), str(index[1]), u('NaN')] - self.assertEqual(formatted, expected) + assert formatted == expected # is this really allowed? index = Index([1, 2.0 + 3.0j, None]) formatted = index.format() expected = [str(index[0]), str(index[1]), u('NaN')] - self.assertEqual(formatted, expected) + assert formatted == expected self.strIndex[:0].format() @@ -987,27 +1097,27 @@ def test_format_with_name_time_info(self): dates = Index([dt + inc for dt in self.dateIndex], name='something') formatted = dates.format(name=True) - self.assertEqual(formatted[0], 'something') + assert formatted[0] == 'something' def test_format_datetime_with_time(self): t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) result = t.format() expected = ['2012-02-07 00:00:00', '2012-02-07 23:00:00'] - self.assertEqual(len(result), 2) - self.assertEqual(result, expected) + assert len(result) == 2 + assert result == expected def test_format_none(self): values = ['a', 'b', 'c', None] idx = Index(values) idx.format() - self.assertIsNone(idx[3]) + assert idx[3] is None def test_logical_compat(self): idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() def _check_method_works(self, method): method(self.empty) @@ -1049,46 +1159,64 @@ def test_get_indexer_invalid(self): # GH10411 idx = Index(np.arange(10)) - with tm.assertRaisesRegexp(ValueError, 'tolerance argument'): + with tm.assert_raises_regex(ValueError, 'tolerance argument'): idx.get_indexer([1, 0], tolerance=1) - with tm.assertRaisesRegexp(ValueError, 'limit argument'): + with tm.assert_raises_regex(ValueError, 'limit argument'): idx.get_indexer([1, 0], limit=1) - def test_get_indexer_nearest(self): + @pytest.mark.parametrize( + 'method, tolerance, indexer, expected', + [ + ('pad', None, [0, 5, 9], [0, 5, 9]), + ('backfill', None, [0, 5, 9], [0, 5, 9]), + ('nearest', None, [0, 5, 9], [0, 5, 9]), + ('pad', 0, [0, 5, 9], [0, 5, 9]), + ('backfill', 0, [0, 5, 9], [0, 5, 9]), + ('nearest', 0, [0, 5, 9], [0, 5, 9]), + + ('pad', None, [0.2, 1.8, 8.5], [0, 1, 8]), + ('backfill', None, [0.2, 1.8, 8.5], [1, 2, 9]), + ('nearest', None, [0.2, 1.8, 8.5], [0, 2, 9]), + ('pad', 1, [0.2, 1.8, 8.5], [0, 1, 8]), + ('backfill', 1, [0.2, 1.8, 8.5], [1, 2, 9]), + ('nearest', 1, [0.2, 1.8, 8.5], [0, 2, 9]), + + ('pad', 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), + ('backfill', 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), + ('nearest', 0.2, [0.2, 1.8, 8.5], [0, 2, -1])]) + def test_get_indexer_nearest(self, method, tolerance, indexer, expected): idx = Index(np.arange(10)) - all_methods = ['pad', 'backfill', 'nearest'] - for method in all_methods: - actual = idx.get_indexer([0, 5, 9], method=method) - tm.assert_numpy_array_equal(actual, np.array([0, 5, 9], - dtype=np.intp)) - - actual = idx.get_indexer([0, 5, 9], method=method, tolerance=0) - tm.assert_numpy_array_equal(actual, np.array([0, 5, 9], - dtype=np.intp)) - - for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], - [0, 2, 9]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) - - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, - tolerance=1) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) + actual = idx.get_indexer(indexer, method=method, tolerance=tolerance) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) + + @pytest.mark.parametrize('listtype', [list, tuple, Series, np.array]) + @pytest.mark.parametrize( + 'tolerance, expected', + list(zip([[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], + [0.1, 0.5, 0.5]], + [[0, 2, -1], [0, -1, -1], + [-1, 2, 9]]))) + def test_get_indexer_nearest_listlike_tolerance(self, tolerance, + expected, listtype): + idx = Index(np.arange(10)) - for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1], - [0, 2, -1]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, - tolerance=0.2) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) + actual = idx.get_indexer([0.2, 1.8, 8.5], method='nearest', + tolerance=listtype(tolerance)) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) - with tm.assertRaisesRegexp(ValueError, 'limit argument'): + def test_get_indexer_nearest_error(self): + idx = Index(np.arange(10)) + with tm.assert_raises_regex(ValueError, 'limit argument'): idx.get_indexer([1, 0], method='nearest', limit=1) + with pytest.raises(ValueError, match='tolerance size must match'): + idx.get_indexer([1, 0], method='nearest', + tolerance=[1, 2, 3]) + def test_get_indexer_nearest_decreasing(self): idx = Index(np.arange(10))[::-1] @@ -1115,41 +1243,54 @@ def test_get_indexer_strings(self): expected = np.array([0, 0, 1, -1], dtype=np.intp) tm.assert_numpy_array_equal(actual, expected) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='nearest') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + with pytest.raises(TypeError): + idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', + tolerance=[2, 2, 2, 2]) + + def test_get_indexer_numeric_index_boolean_target(self): + # GH 16877 + numeric_idx = pd.Index(range(4)) + result = numeric_idx.get_indexer([True, False, True]) + expected = np.array([-1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + def test_get_loc(self): idx = pd.Index([0, 1, 2]) all_methods = [None, 'pad', 'backfill', 'nearest'] for method in all_methods: - self.assertEqual(idx.get_loc(1, method=method), 1) + assert idx.get_loc(1, method=method) == 1 if method is not None: - self.assertEqual(idx.get_loc(1, method=method, tolerance=0), 1) - with tm.assertRaises(TypeError): + assert idx.get_loc(1, method=method, tolerance=0) == 1 + with pytest.raises(TypeError): idx.get_loc([1, 2], method=method) for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method), loc) + assert idx.get_loc(1.1, method) == loc for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method, tolerance=1), loc) + assert idx.get_loc(1.1, method, tolerance=1) == loc for method in ['pad', 'backfill', 'nearest']: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): idx.get_loc(1.1, method, tolerance=0.05) - with tm.assertRaisesRegexp(ValueError, 'must be numeric'): + with tm.assert_raises_regex(ValueError, 'must be numeric'): idx.get_loc(1.1, 'nearest', tolerance='invalid') - with tm.assertRaisesRegexp(ValueError, 'tolerance .* valid if'): + with tm.assert_raises_regex(ValueError, 'tolerance .* valid if'): idx.get_loc(1.1, tolerance=1) + with pytest.raises(ValueError, match='tolerance size must match'): + idx.get_loc(1.1, 'nearest', tolerance=[1, 1]) idx = pd.Index(['a', 'c']) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_loc('a', method='nearest') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): idx.get_loc('a', method='pad', tolerance='invalid') def test_slice_locs(self): @@ -1157,71 +1298,71 @@ def test_slice_locs(self): idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) n = len(idx) - self.assertEqual(idx.slice_locs(start=2), (2, n)) - self.assertEqual(idx.slice_locs(start=3), (3, n)) - self.assertEqual(idx.slice_locs(3, 8), (3, 6)) - self.assertEqual(idx.slice_locs(5, 10), (3, n)) - self.assertEqual(idx.slice_locs(end=8), (0, 6)) - self.assertEqual(idx.slice_locs(end=9), (0, 7)) + assert idx.slice_locs(start=2) == (2, n) + assert idx.slice_locs(start=3) == (3, n) + assert idx.slice_locs(3, 8) == (3, 6) + assert idx.slice_locs(5, 10) == (3, n) + assert idx.slice_locs(end=8) == (0, 6) + assert idx.slice_locs(end=9) == (0, 7) # reversed idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(8, 2), (2, 6)) - self.assertEqual(idx2.slice_locs(7, 3), (2, 5)) + assert idx2.slice_locs(8, 2) == (2, 6) + assert idx2.slice_locs(7, 3) == (2, 5) # float slicing idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=float)) n = len(idx) - self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) - self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) + assert idx.slice_locs(5.0, 10.0) == (3, n) + assert idx.slice_locs(4.5, 10.5) == (3, 8) idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) - self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) + assert idx2.slice_locs(8.5, 1.5) == (2, 6) + assert idx2.slice_locs(10.5, -1) == (0, n) # int slicing with floats # GH 4892, these are all TypeErrors idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=int)) - self.assertRaises(TypeError, - lambda: idx.slice_locs(5.0, 10.0), (3, n)) - self.assertRaises(TypeError, - lambda: idx.slice_locs(4.5, 10.5), (3, 8)) + pytest.raises(TypeError, + lambda: idx.slice_locs(5.0, 10.0), (3, n)) + pytest.raises(TypeError, + lambda: idx.slice_locs(4.5, 10.5), (3, 8)) idx2 = idx[::-1] - self.assertRaises(TypeError, - lambda: idx2.slice_locs(8.5, 1.5), (2, 6)) - self.assertRaises(TypeError, - lambda: idx2.slice_locs(10.5, -1), (0, n)) + pytest.raises(TypeError, + lambda: idx2.slice_locs(8.5, 1.5), (2, 6)) + pytest.raises(TypeError, + lambda: idx2.slice_locs(10.5, -1), (0, n)) def test_slice_locs_dup(self): idx = Index(['a', 'a', 'b', 'c', 'd', 'd']) - self.assertEqual(idx.slice_locs('a', 'd'), (0, 6)) - self.assertEqual(idx.slice_locs(end='d'), (0, 6)) - self.assertEqual(idx.slice_locs('a', 'c'), (0, 4)) - self.assertEqual(idx.slice_locs('b', 'd'), (2, 6)) + assert idx.slice_locs('a', 'd') == (0, 6) + assert idx.slice_locs(end='d') == (0, 6) + assert idx.slice_locs('a', 'c') == (0, 4) + assert idx.slice_locs('b', 'd') == (2, 6) idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs('d', 'a'), (0, 6)) - self.assertEqual(idx2.slice_locs(end='a'), (0, 6)) - self.assertEqual(idx2.slice_locs('d', 'b'), (0, 4)) - self.assertEqual(idx2.slice_locs('c', 'a'), (2, 6)) + assert idx2.slice_locs('d', 'a') == (0, 6) + assert idx2.slice_locs(end='a') == (0, 6) + assert idx2.slice_locs('d', 'b') == (0, 4) + assert idx2.slice_locs('c', 'a') == (2, 6) for dtype in [int, float]: idx = Index(np.array([10, 12, 12, 14], dtype=dtype)) - self.assertEqual(idx.slice_locs(12, 12), (1, 3)) - self.assertEqual(idx.slice_locs(11, 13), (1, 3)) + assert idx.slice_locs(12, 12) == (1, 3) + assert idx.slice_locs(11, 13) == (1, 3) idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(12, 12), (1, 3)) - self.assertEqual(idx2.slice_locs(13, 11), (1, 3)) + assert idx2.slice_locs(12, 12) == (1, 3) + assert idx2.slice_locs(13, 11) == (1, 3) def test_slice_locs_na(self): idx = Index([np.nan, 1, 2]) - self.assertRaises(KeyError, idx.slice_locs, start=1.5) - self.assertRaises(KeyError, idx.slice_locs, end=1.5) - self.assertEqual(idx.slice_locs(1), (1, 3)) - self.assertEqual(idx.slice_locs(np.nan), (0, 3)) + pytest.raises(KeyError, idx.slice_locs, start=1.5) + pytest.raises(KeyError, idx.slice_locs, end=1.5) + assert idx.slice_locs(1) == (1, 3) + assert idx.slice_locs(np.nan) == (0, 3) idx = Index([0, np.nan, np.nan, 1, 2]) - self.assertEqual(idx.slice_locs(np.nan), (1, 5)) + assert idx.slice_locs(np.nan) == (1, 5) def test_slice_locs_negative_step(self): idx = Index(list('bcdxy')) @@ -1233,7 +1374,7 @@ def check_slice(in_slice, expected): in_slice.step) result = idx[s_start:s_stop:in_slice.step] expected = pd.Index(list(expected)) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) for in_slice, expected in [ (SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''), @@ -1255,40 +1396,61 @@ def test_drop(self): drop = self.strIndex[lrange(5, 10)] dropped = self.strIndex.drop(drop) expected = self.strIndex[lrange(5) + lrange(10, n)] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) - self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar']) - self.assertRaises(ValueError, self.strIndex.drop, ['1', 'bar']) + pytest.raises(KeyError, self.strIndex.drop, ['foo', 'bar']) + pytest.raises(KeyError, self.strIndex.drop, ['1', 'bar']) # errors='ignore' mixed = drop.tolist() + ['foo'] dropped = self.strIndex.drop(mixed, errors='ignore') expected = self.strIndex[lrange(5) + lrange(10, n)] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) dropped = self.strIndex.drop(['foo', 'bar'], errors='ignore') expected = self.strIndex[lrange(n)] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) dropped = self.strIndex.drop(self.strIndex[0]) expected = self.strIndex[1:] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) ser = Index([1, 2, 3]) dropped = ser.drop(1) expected = Index([2, 3]) - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) # errors='ignore' - self.assertRaises(ValueError, ser.drop, [3, 4]) + pytest.raises(KeyError, ser.drop, [3, 4]) dropped = ser.drop(4, errors='ignore') expected = Index([1, 2, 3]) - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) dropped = ser.drop([3, 4, 5], errors='ignore') expected = Index([1, 2]) - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) + + @pytest.mark.parametrize("values", [['a', 'b', ('c', 'd')], + ['a', ('c', 'd'), 'b'], + [('c', 'd'), 'a', 'b']]) + @pytest.mark.parametrize("to_drop", [[('c', 'd'), 'a'], ['a', ('c', 'd')]]) + def test_drop_tuple(self, values, to_drop): + # GH 18304 + index = pd.Index(values) + expected = pd.Index(['b']) + + result = index.drop(to_drop) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[0]) + for drop_me in to_drop[1], [to_drop[1]]: + result = removed.drop(drop_me) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[1]) + for drop_me in to_drop[1], [to_drop[1]]: + pytest.raises(KeyError, removed.drop, drop_me) def test_tuple_union_bug(self): import pandas @@ -1307,19 +1469,21 @@ def test_tuple_union_bug(self): int_idx = idx1.intersection(idx2) # needs to be 1d like idx1 and idx2 expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2))) - self.assertEqual(int_idx.ndim, 1) - self.assert_index_equal(int_idx, expected) + assert int_idx.ndim == 1 + tm.assert_index_equal(int_idx, expected) # union broken union_idx = idx1.union(idx2) expected = idx2 - self.assertEqual(union_idx.ndim, 1) - self.assert_index_equal(union_idx, expected) + assert union_idx.ndim == 1 + tm.assert_index_equal(union_idx, expected) def test_is_monotonic_incomparable(self): index = Index([5, datetime.now(), 7]) - self.assertFalse(index.is_monotonic) - self.assertFalse(index.is_monotonic_decreasing) + assert not index.is_monotonic_increasing + assert not index.is_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing def test_get_set_value(self): values = np.random.randn(100) @@ -1328,7 +1492,7 @@ def test_get_set_value(self): assert_almost_equal(self.dateIndex.get_value(values, date), values[67]) self.dateIndex.set_value(values, date, 10) - self.assertEqual(values[67], 10) + assert values[67] == 10 def test_isin(self): values = ['foo', 'bar', 'quux'] @@ -1345,26 +1509,37 @@ def test_isin(self): # empty, return dtype bool idx = Index([]) result = idx.isin(values) - self.assertEqual(len(result), 0) - self.assertEqual(result.dtype, np.bool_) + assert len(result) == 0 + assert result.dtype == np.bool_ - def test_isin_nan(self): + @pytest.mark.skipif(PYPY, reason="np.nan is float('nan') on PyPy") + def test_isin_nan_not_pypy(self): + tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), + np.array([False, False])) + + @pytest.mark.skipif(not PYPY, reason="np.nan is float('nan') on PyPy") + def test_isin_nan_pypy(self): + tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), + np.array([False, True])) + + def test_isin_nan_common(self): tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([np.nan]), np.array([False, True])) tm.assert_numpy_array_equal(Index(['a', pd.NaT]).isin([pd.NaT]), np.array([False, True])) - tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), - np.array([False, False])) tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([pd.NaT]), np.array([False, False])) + # Float64Index overrides isin, so must be checked separately tm.assert_numpy_array_equal(Float64Index([1.0, np.nan]).isin([np.nan]), np.array([False, True])) tm.assert_numpy_array_equal( Float64Index([1.0, np.nan]).isin([float('nan')]), np.array([False, True])) + + # we cannot compare NaT with NaN tm.assert_numpy_array_equal(Float64Index([1.0, np.nan]).isin([pd.NaT]), - np.array([False, True])) + np.array([False, False])) def test_isin_level_kwarg(self): def check_idx(idx): @@ -1374,24 +1549,33 @@ def check_idx(idx): tm.assert_numpy_array_equal(expected, idx.isin(values, level=0)) tm.assert_numpy_array_equal(expected, idx.isin(values, level=-1)) - self.assertRaises(IndexError, idx.isin, values, level=1) - self.assertRaises(IndexError, idx.isin, values, level=10) - self.assertRaises(IndexError, idx.isin, values, level=-2) + pytest.raises(IndexError, idx.isin, values, level=1) + pytest.raises(IndexError, idx.isin, values, level=10) + pytest.raises(IndexError, idx.isin, values, level=-2) - self.assertRaises(KeyError, idx.isin, values, level=1.0) - self.assertRaises(KeyError, idx.isin, values, level='foobar') + pytest.raises(KeyError, idx.isin, values, level=1.0) + pytest.raises(KeyError, idx.isin, values, level='foobar') idx.name = 'foobar' tm.assert_numpy_array_equal(expected, idx.isin(values, level='foobar')) - self.assertRaises(KeyError, idx.isin, values, level='xyzzy') - self.assertRaises(KeyError, idx.isin, values, level=np.nan) + pytest.raises(KeyError, idx.isin, values, level='xyzzy') + pytest.raises(KeyError, idx.isin, values, level=np.nan) check_idx(Index(['qux', 'baz', 'foo', 'bar'])) # Float64Index overrides isin, so must be checked separately check_idx(Float64Index([1.0, 2.0, 3.0, 4.0])) + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + idx = Index(["a", "b"]) + expected = np.array([False, False]) + + result = idx.isin(empty) + tm.assert_numpy_array_equal(expected, result) + def test_boolean_cmp(self): values = [1, 2, 3, 4] @@ -1403,11 +1587,17 @@ def test_boolean_cmp(self): def test_get_level_values(self): result = self.strIndex.get_level_values(0) - self.assert_index_equal(result, self.strIndex) + tm.assert_index_equal(result, self.strIndex) + + # test for name (GH 17414) + index_with_name = self.strIndex.copy() + index_with_name.name = 'a' + result = index_with_name.get_level_values('a') + tm.assert_index_equal(result, index_with_name) def test_slice_keep_name(self): idx = Index(['a', 'b'], name='asdf') - self.assertEqual(idx.name, idx[1:].name) + assert idx.name == idx[1:].name def test_join_self(self): # instance attributes of the form self.Index @@ -1418,7 +1608,7 @@ def test_join_self(self): for kind in kinds: joined = res.join(res, how=kind) - self.assertIs(res, joined) + assert res is joined def test_str_attribute(self): # GH9068 @@ -1434,8 +1624,8 @@ def test_str_attribute(self): MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), PeriodIndex(start='2000', end='2010', freq='A')] for idx in indices: - with self.assertRaisesRegexp(AttributeError, - 'only use .str accessor'): + with tm.assert_raises_regex(AttributeError, + 'only use .str accessor'): idx.str.repeat(2) idx = Index(['a b c', 'd e', 'f']) @@ -1451,7 +1641,7 @@ def test_str_attribute(self): idx = Index(['a1', 'a2', 'b1', 'b2']) expected = np.array([True, True, False, False]) tm.assert_numpy_array_equal(idx.str.startswith('a'), expected) - self.assertIsInstance(idx.str.startswith('a'), np.ndarray) + assert isinstance(idx.str.startswith('a'), np.ndarray) s = Series(range(4), index=idx) expected = Series(range(2), index=['a1', 'a2']) tm.assert_series_equal(s[s.index.str.startswith('a')], expected) @@ -1459,17 +1649,16 @@ def test_str_attribute(self): def test_tab_completion(self): # GH 9910 idx = Index(list('abcd')) - self.assertTrue('str' in dir(idx)) + assert 'str' in dir(idx) idx = Index(range(4)) - self.assertTrue('str' not in dir(idx)) + assert 'str' not in dir(idx) def test_indexing_doesnt_change_class(self): idx = Index([1, 2, 3, 'a', 'b', 'c']) - self.assertTrue(idx[1:3].identical(pd.Index([2, 3], dtype=np.object_))) - self.assertTrue(idx[[0, 1]].identical(pd.Index( - [1, 2], dtype=np.object_))) + assert idx[1:3].identical(pd.Index([2, 3], dtype=np.object_)) + assert idx[[0, 1]].identical(pd.Index([1, 2], dtype=np.object_)) def test_outer_join_sort(self): left_idx = Index(np.random.permutation(15)) @@ -1510,20 +1699,14 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) - def test_reshape_raise(self): - msg = "reshaping is not supported" - idx = pd.Index([0, 1, 2]) - tm.assertRaisesRegexp(NotImplementedError, msg, - idx.reshape, idx.shape) - def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): # GH6552 idx = pd.Index([0, 1, 2]) @@ -1531,28 +1714,28 @@ def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): dt_idx = pd.date_range('20130101', periods=3) idx.name = None - self.assertEqual(idx.reindex([])[0].name, None) - self.assertEqual(idx.reindex(np.array([]))[0].name, None) - self.assertEqual(idx.reindex(idx.tolist())[0].name, None) - self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, None) - self.assertEqual(idx.reindex(idx.values)[0].name, None) - self.assertEqual(idx.reindex(idx.values[:-1])[0].name, None) + assert idx.reindex([])[0].name is None + assert idx.reindex(np.array([]))[0].name is None + assert idx.reindex(idx.tolist())[0].name is None + assert idx.reindex(idx.tolist()[:-1])[0].name is None + assert idx.reindex(idx.values)[0].name is None + assert idx.reindex(idx.values[:-1])[0].name is None # Must preserve name even if dtype changes. - self.assertEqual(idx.reindex(dt_idx.values)[0].name, None) - self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, None) + assert idx.reindex(dt_idx.values)[0].name is None + assert idx.reindex(dt_idx.tolist())[0].name is None idx.name = 'foobar' - self.assertEqual(idx.reindex([])[0].name, 'foobar') - self.assertEqual(idx.reindex(np.array([]))[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.tolist())[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.values)[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.values[:-1])[0].name, 'foobar') + assert idx.reindex([])[0].name == 'foobar' + assert idx.reindex(np.array([]))[0].name == 'foobar' + assert idx.reindex(idx.tolist())[0].name == 'foobar' + assert idx.reindex(idx.tolist()[:-1])[0].name == 'foobar' + assert idx.reindex(idx.values)[0].name == 'foobar' + assert idx.reindex(idx.values[:-1])[0].name == 'foobar' # Must preserve name even if dtype changes. - self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar') - self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar') + assert idx.reindex(dt_idx.values)[0].name == 'foobar' + assert idx.reindex(dt_idx.tolist())[0].name == 'foobar' def test_reindex_preserves_type_if_target_is_empty_list_or_array(self): # GH7774 @@ -1561,10 +1744,9 @@ def test_reindex_preserves_type_if_target_is_empty_list_or_array(self): def get_reindex_type(target): return idx.reindex(target)[0].dtype.type - self.assertEqual(get_reindex_type([]), np.object_) - self.assertEqual(get_reindex_type(np.array([])), np.object_) - self.assertEqual(get_reindex_type(np.array([], dtype=np.int64)), - np.object_) + assert get_reindex_type([]) == np.object_ + assert get_reindex_type(np.array([])) == np.object_ + assert get_reindex_type(np.array([], dtype=np.int64)) == np.object_ def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): # GH7774 @@ -1573,14 +1755,14 @@ def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): def get_reindex_type(target): return idx.reindex(target)[0].dtype.type - self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int64) - self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64) - self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64) + assert get_reindex_type(pd.Int64Index([])) == np.int64 + assert get_reindex_type(pd.Float64Index([])) == np.float64 + assert get_reindex_type(pd.DatetimeIndex([])) == np.datetime64 reindexed = idx.reindex(pd.MultiIndex( [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0] - self.assertEqual(reindexed.levels[0].dtype.type, np.int64) - self.assertEqual(reindexed.levels[1].dtype.type, np.float64) + assert reindexed.levels[0].dtype.type == np.int64 + assert reindexed.levels[1].dtype.type == np.float64 def test_groupby(self): idx = Index(range(5)) @@ -1601,11 +1783,11 @@ def test_equals_op_multiindex(self): mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)]) tm.assert_numpy_array_equal(df.index == mi2, np.array([True, False])) mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): df.index == mi3 index_a = Index(['foo', 'bar', 'baz']) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): df.index == index_a tm.assert_numpy_array_equal(index_a == mi3, np.array([False, False, False])) @@ -1613,8 +1795,8 @@ def test_equals_op_multiindex(self): def test_conversion_preserves_name(self): # GH 10875 i = pd.Index(['01:02:03', '01:02:04'], name='label') - self.assertEqual(i.name, pd.to_datetime(i).name) - self.assertEqual(i.name, pd.to_timedelta(i).name) + assert i.name == pd.to_datetime(i).name + assert i.name == pd.to_timedelta(i).name def test_string_index_repr(self): # py3/py2 repr can differ because of "u" prefix @@ -1629,10 +1811,10 @@ def test_string_index_repr(self): idx = pd.Index(['a', 'bb', 'ccc']) if PY3: expected = u"""Index(['a', 'bb', 'ccc'], dtype='object')""" - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""Index([u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # multiple lines idx = pd.Index(['a', 'bb', 'ccc'] * 10) @@ -1643,7 +1825,7 @@ def test_string_index_repr(self): 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], dtype='object')""" - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""\ Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', @@ -1651,7 +1833,7 @@ def test_string_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # truncated idx = pd.Index(['a', 'bb', 'ccc'] * 100) @@ -1662,7 +1844,7 @@ def test_string_index_repr(self): 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], dtype='object', length=300)""" - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""\ Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', @@ -1670,16 +1852,16 @@ def test_string_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object', length=300)""" - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # short idx = pd.Index([u'あ', u'いい', u'ううう']) if PY3: expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')""" - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # multiple lines idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) @@ -1691,7 +1873,7 @@ def test_string_index_repr(self): u" 'あ', 'いい', 'ううう', 'あ', 'いい', " u"'ううう'],\n" u" dtype='object')") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', " u"u'ううう', u'あ', u'いい', u'ううう', u'あ',\n" @@ -1700,7 +1882,7 @@ def test_string_index_repr(self): u" u'ううう', u'あ', u'いい', u'ううう', u'あ', " u"u'いい', u'ううう', u'あ', u'いい', u'ううう'],\n" u" dtype='object')") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # truncated idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) @@ -1711,7 +1893,7 @@ def test_string_index_repr(self): u" 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', " u"'ううう', 'あ', 'いい', 'ううう'],\n" u" dtype='object', length=300)") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', " u"u'ううう', u'あ', u'いい', u'ううう', u'あ',\n" @@ -1720,7 +1902,7 @@ def test_string_index_repr(self): u"u'いい', u'ううう', u'あ', u'いい', u'ううう'],\n" u" dtype='object', length=300)") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # Emable Unicode option ----------------------------------------- with cf.option_context('display.unicode.east_asian_width', True): @@ -1730,11 +1912,11 @@ def test_string_index_repr(self): if PY3: expected = (u"Index(['あ', 'いい', 'ううう'], " u"dtype='object')") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう'], " u"dtype='object')") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # multiple lines idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) @@ -1748,7 +1930,7 @@ def test_string_index_repr(self): u" 'あ', 'いい', 'ううう'],\n" u" dtype='object')""") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', " u"u'ううう', u'あ', u'いい',\n" @@ -1760,7 +1942,7 @@ def test_string_index_repr(self): u"u'あ', u'いい', u'ううう'],\n" u" dtype='object')") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected # truncated idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) @@ -1774,7 +1956,7 @@ def test_string_index_repr(self): u" 'ううう'],\n" u" dtype='object', length=300)") - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', " u"u'ううう', u'あ', u'いい',\n" @@ -1785,45 +1967,60 @@ def test_string_index_repr(self): u" u'いい', u'ううう'],\n" u" dtype='object', length=300)") - self.assertEqual(coerce(idx), expected) + assert coerce(idx) == expected + @pytest.mark.parametrize('dtype', [np.int64, np.float64]) + @pytest.mark.parametrize('delta', [1, 0, -1]) + def test_addsub_arithmetic(self, dtype, delta): + # GH 8142 + delta = dtype(delta) + idx = pd.Index([10, 11, 12], dtype=dtype) + result = idx + delta + expected = pd.Index(idx.values + delta, dtype=dtype) + tm.assert_index_equal(result, expected) -class TestMixedIntIndex(Base, tm.TestCase): + # this subtraction used to fail + result = idx - delta + expected = pd.Index(idx.values - delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + tm.assert_index_equal(idx + idx, 2 * idx) + tm.assert_index_equal(idx - idx, 0 * idx) + assert not (idx - idx).empty + + def test_iadd_preserves_name(self): + # GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name + ser = pd.Series([1, 2, 3]) + ser.index.name = 'foo' + + ser.index += 1 + assert ser.index.name == "foo" + + ser.index -= 1 + assert ser.index.name == "foo" + + +class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ # in py2 and py3 because ints and strings are uncomparable in py3 # (GH 13514) _holder = Index - def setUp(self): + def setup_method(self, method): self.indices = dict(mixedIndex=Index([0, 'a', 1, 'b', 2, 'c'])) self.setup_indices() def create_index(self): return self.mixedIndex - def test_order(self): - idx = self.create_index() - # 9816 deprecated - if PY36: - with tm.assertRaisesRegexp(TypeError, "'>' not supported"): - with tm.assert_produces_warning(FutureWarning): - idx.order() - elif PY3: - with tm.assertRaisesRegexp(TypeError, "unorderable types"): - with tm.assert_produces_warning(FutureWarning): - idx.order() - else: - with tm.assert_produces_warning(FutureWarning): - idx.order() - def test_argsort(self): idx = self.create_index() if PY36: - with tm.assertRaisesRegexp(TypeError, "'>' not supported"): + with tm.assert_raises_regex(TypeError, "'>|<' not supported"): result = idx.argsort() elif PY3: - with tm.assertRaisesRegexp(TypeError, "unorderable types"): + with tm.assert_raises_regex(TypeError, "unorderable types"): result = idx.argsort() else: result = idx.argsort() @@ -1833,10 +2030,10 @@ def test_argsort(self): def test_numpy_argsort(self): idx = self.create_index() if PY36: - with tm.assertRaisesRegexp(TypeError, "'>' not supported"): + with tm.assert_raises_regex(TypeError, "'>|<' not supported"): result = np.argsort(idx) elif PY3: - with tm.assertRaisesRegexp(TypeError, "unorderable types"): + with tm.assert_raises_regex(TypeError, "unorderable types"): result = np.argsort(idx) else: result = np.argsort(idx) @@ -1852,22 +2049,22 @@ def test_copy_name(self): second = first.__class__(first, copy=False) # Even though "copy=False", we want a new object. - self.assertIsNot(first, second) + assert first is not second # Not using tm.assert_index_equal() since names differ: - self.assertTrue(idx.equals(first)) + assert idx.equals(first) - self.assertEqual(first.name, 'mario') - self.assertEqual(second.name, 'mario') + assert first.name == 'mario' + assert second.name == 'mario' s1 = Series(2, index=first) s2 = Series(3, index=second[:-1]) - if PY3: - with tm.assert_produces_warning(RuntimeWarning): - # unorderable types - s3 = s1 * s2 - else: + + warning_type = RuntimeWarning if PY3 else None + with tm.assert_produces_warning(warning_type): + # Python 3: Unorderable types s3 = s1 * s2 - self.assertEqual(s3.index.name, 'mario') + + assert s3.index.name == 'mario' def test_copy_name2(self): # Check that adding a "name" parameter to the copy is honored @@ -1875,23 +2072,23 @@ def test_copy_name2(self): idx = pd.Index([1, 2], name='MyName') idx1 = idx.copy() - self.assertTrue(idx.equals(idx1)) - self.assertEqual(idx.name, 'MyName') - self.assertEqual(idx1.name, 'MyName') + assert idx.equals(idx1) + assert idx.name == 'MyName' + assert idx1.name == 'MyName' idx2 = idx.copy(name='NewName') - self.assertTrue(idx.equals(idx2)) - self.assertEqual(idx.name, 'MyName') - self.assertEqual(idx2.name, 'NewName') + assert idx.equals(idx2) + assert idx.name == 'MyName' + assert idx2.name == 'NewName' idx3 = idx.copy(names=['NewName']) - self.assertTrue(idx.equals(idx3)) - self.assertEqual(idx.name, 'MyName') - self.assertEqual(idx.names, ['MyName']) - self.assertEqual(idx3.name, 'NewName') - self.assertEqual(idx3.names, ['NewName']) + assert idx.equals(idx3) + assert idx.name == 'MyName' + assert idx.names == ['MyName'] + assert idx3.name == 'NewName' + assert idx3.names == ['NewName'] def test_union_base(self): idx = self.create_index() @@ -1903,11 +2100,11 @@ def test_union_base(self): # unorderable types result = first.union(second) expected = Index(['b', 2, 'c', 0, 'a', 1]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) else: result = first.union(second) expected = Index(['b', 2, 'c', 0, 'a', 1]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # GH 10149 cases = [klass(second.values) @@ -1917,10 +2114,10 @@ def test_union_base(self): with tm.assert_produces_warning(RuntimeWarning): # unorderable types result = first.union(case) - self.assertTrue(tm.equalContents(result, idx)) + assert tm.equalContents(result, idx) else: result = first.union(case) - self.assertTrue(tm.equalContents(result, idx)) + assert tm.equalContents(result, idx) def test_intersection_base(self): # (same results for py2 and py3 but sortedness not tested elsewhere) @@ -1929,14 +2126,14 @@ def test_intersection_base(self): second = idx[:3] result = first.intersection(second) expected = Index([0, 'a', 1]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) + assert tm.equalContents(result, second) def test_difference_base(self): # (same results for py2 and py3 but sortedness not tested elsewhere) @@ -1946,7 +2143,7 @@ def test_difference_base(self): result = first.difference(second) expected = Index([0, 1, 'a']) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_symmetric_difference(self): # (same results for py2 and py3 but sortedness not tested elsewhere) @@ -1956,12 +2153,12 @@ def test_symmetric_difference(self): result = first.symmetric_difference(second) expected = Index([0, 1, 2, 'a', 'c']) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_logical_compat(self): idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() def test_dropna(self): # GH 6194 @@ -2001,7 +2198,7 @@ def test_dropna(self): tm.assert_index_equal(nanidx.dropna(), idx) msg = "invalid how option: xxx" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): pd.Index([1, 2, 3]).dropna(how='xxx') def test_get_combined_index(self): @@ -2030,14 +2227,16 @@ def test_is_monotonic_na(self): pd.to_datetime(['2000-01-01', 'NaT', '2000-01-02']), pd.to_timedelta(['1 day', 'NaT']), ] for index in examples: - self.assertFalse(index.is_monotonic_increasing) - self.assertFalse(index.is_monotonic_decreasing) + assert not index.is_monotonic_increasing + assert not index.is_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing def test_repr_summary(self): with cf.option_context('display.max_seq_items', 10): r = repr(pd.Index(np.arange(1000))) - self.assertTrue(len(r) < 200) - self.assertTrue("..." in r) + assert len(r) < 200 + assert "..." in r def test_int_name_format(self): index = Index(['a', 'b', 'c'], name=0) @@ -2074,4 +2273,63 @@ def test_intersect_str_dates(self): i2 = Index(['aa'], dtype=object) res = i2.intersection(i1) - self.assertEqual(len(res), 0) + assert len(res) == 0 + + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_comparison_tzawareness_compat(self, op): + # GH#18162 + dr = pd.date_range('2016-01-01', periods=6) + dz = dr.tz_localize('US/Pacific') + + # Check that there isn't a problem aware-aware and naive-naive do not + # raise + naive_series = Series(dr) + aware_series = Series(dz) + with pytest.raises(TypeError): + op(dz, naive_series) + with pytest.raises(TypeError): + op(dr, aware_series) + + # TODO: implement _assert_tzawareness_compat for the reverse + # comparison with the Series on the left-hand side + + +class TestIndexUtils(object): + + @pytest.mark.parametrize('data, names, expected', [ + ([[1, 2, 3]], None, Index([1, 2, 3])), + ([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')), + ([['a', 'a'], ['c', 'd']], None, + MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])), + ([['a', 'a'], ['c', 'd']], ['L1', 'L2'], + MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]], + names=['L1', 'L2'])), + ]) + def test_ensure_index_from_sequences(self, data, names, expected): + result = _ensure_index_from_sequences(data, names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize('opname', ['eq', 'ne', 'le', 'lt', 'ge', 'gt', + 'add', 'radd', 'sub', 'rsub', + 'mul', 'rmul', 'truediv', 'rtruediv', + 'floordiv', 'rfloordiv', + 'pow', 'rpow', 'mod', 'divmod']) +def test_generated_op_names(opname, indices): + index = indices + if isinstance(index, ABCIndex) and opname == 'rsub': + # pd.Index.__rsub__ does not exist; though the method does exist + # for subclasses. see GH#19723 + return + opname = '__{name}__'.format(name=opname) + method = getattr(index, opname) + assert method.__name__ == opname + + +@pytest.mark.parametrize('idx_maker', tm.index_subclass_makers_generator()) +def test_index_subclass_constructor_wrong_kwargs(idx_maker): + # GH #19348 + with tm.assert_raises_regex(TypeError, 'unexpected keyword argument'): + idx_maker(foo='bar') diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 6b6885c082533..e9fddfde90348 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -1,17 +1,17 @@ # -*- coding: utf-8 -*- -# TODO(wesm): fix long line flake8 issues -# flake8: noqa +import pytest import pandas.util.testing as tm -from pandas.indexes.api import Index, CategoricalIndex +from pandas.core.indexes.api import Index, CategoricalIndex +from pandas.core.dtypes.dtypes import CategoricalDtype from .common import Base from pandas.compat import range, PY3 import numpy as np -from pandas import Categorical, compat, notnull +from pandas import Categorical, IntervalIndex, compat from pandas.util.testing import assert_almost_equal import pandas.core.config as cf import pandas as pd @@ -20,10 +20,10 @@ unicode = lambda x: x -class TestCategoricalIndex(Base, tm.TestCase): +class TestCategoricalIndex(Base): _holder = CategoricalIndex - def setUp(self): + def setup_method(self, method): self.indices = dict(catIndex=tm.makeCategoricalIndex(100)) self.setup_indices() @@ -40,62 +40,71 @@ def test_construction(self): result = Index(ci) tm.assert_index_equal(result, ci, exact=True) - self.assertFalse(result.ordered) + assert not result.ordered result = Index(ci.values) tm.assert_index_equal(result, ci, exact=True) - self.assertFalse(result.ordered) + assert not result.ordered # empty result = CategoricalIndex(categories=categories) - self.assert_index_equal(result.categories, Index(categories)) + tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) - self.assertFalse(result.ordered) + assert not result.ordered # passing categories result = CategoricalIndex(list('aabbca'), categories=categories) - self.assert_index_equal(result.categories, Index(categories)) + tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, - np.array([0, 0, 1, 1, 2, 0], dtype='int8')) + np.array([0, 0, 1, + 1, 2, 0], dtype='int8')) c = pd.Categorical(list('aabbca')) result = CategoricalIndex(c) - self.assert_index_equal(result.categories, Index(list('abc'))) + tm.assert_index_equal(result.categories, Index(list('abc'))) tm.assert_numpy_array_equal(result.codes, - np.array([0, 0, 1, 1, 2, 0], dtype='int8')) - self.assertFalse(result.ordered) + np.array([0, 0, 1, + 1, 2, 0], dtype='int8')) + assert not result.ordered result = CategoricalIndex(c, categories=categories) - self.assert_index_equal(result.categories, Index(categories)) + tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, - np.array([0, 0, 1, 1, 2, 0], dtype='int8')) - self.assertFalse(result.ordered) + np.array([0, 0, 1, + 1, 2, 0], dtype='int8')) + assert not result.ordered ci = CategoricalIndex(c, categories=list('abcd')) result = CategoricalIndex(ci) - self.assert_index_equal(result.categories, Index(categories)) + tm.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, - np.array([0, 0, 1, 1, 2, 0], dtype='int8')) - self.assertFalse(result.ordered) + np.array([0, 0, 1, + 1, 2, 0], dtype='int8')) + assert not result.ordered result = CategoricalIndex(ci, categories=list('ab')) - self.assert_index_equal(result.categories, Index(list('ab'))) + tm.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal(result.codes, - np.array([0, 0, 1, 1, -1, 0], - dtype='int8')) - self.assertFalse(result.ordered) + np.array([0, 0, 1, + 1, -1, 0], dtype='int8')) + assert not result.ordered result = CategoricalIndex(ci, categories=list('ab'), ordered=True) - self.assert_index_equal(result.categories, Index(list('ab'))) + tm.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal(result.codes, - np.array([0, 0, 1, 1, -1, 0], - dtype='int8')) - self.assertTrue(result.ordered) + np.array([0, 0, 1, + 1, -1, 0], dtype='int8')) + assert result.ordered + + result = pd.CategoricalIndex(ci, categories=list('ab'), ordered=True) + expected = pd.CategoricalIndex(ci, categories=list('ab'), ordered=True, + dtype='category') + tm.assert_index_equal(result, expected, exact=True) # turn me to an Index result = Index(np.array(ci)) - self.assertIsInstance(result, Index) - self.assertNotIsInstance(result, CategoricalIndex) + assert isinstance(result, Index) + assert not isinstance(result, CategoricalIndex) def test_construction_with_dtype(self): @@ -122,18 +131,56 @@ def test_construction_with_dtype(self): result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True) + def test_construction_with_categorical_dtype(self): + # construction with CategoricalDtype + # GH18109 + data, cats, ordered = 'a a b b'.split(), 'c b a'.split(), True + dtype = CategoricalDtype(categories=cats, ordered=ordered) + + result = CategoricalIndex(data, dtype=dtype) + expected = CategoricalIndex(data, categories=cats, ordered=ordered) + tm.assert_index_equal(result, expected, exact=True) + + # GH 19032 + result = Index(data, dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) + + # error when combining categories/ordered and dtype kwargs + msg = 'Cannot specify both `dtype` and `categories` or `ordered`.' + with pytest.raises(ValueError, match=msg): + CategoricalIndex(data, categories=cats, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Index(data, categories=cats, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + CategoricalIndex(data, ordered=ordered, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Index(data, ordered=ordered, dtype=dtype) + + def test_create_categorical(self): + # https://github.com/pandas-dev/pandas/pull/17513 + # The public CI constructor doesn't hit this code path with + # instances of CategoricalIndex, but we still want to test the code + ci = CategoricalIndex(['a', 'b', 'c']) + # First ci is self, second ci is data. + result = CategoricalIndex._create_categorical(ci, ci) + expected = Categorical(['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + def test_disallow_set_ops(self): # GH 10039 # set ops (+/-) raise TypeError idx = pd.Index(pd.Categorical(['a', 'b'])) - self.assertRaises(TypeError, lambda: idx - idx) - self.assertRaises(TypeError, lambda: idx + idx) - self.assertRaises(TypeError, lambda: idx - ['a', 'b']) - self.assertRaises(TypeError, lambda: idx + ['a', 'b']) - self.assertRaises(TypeError, lambda: ['a', 'b'] - idx) - self.assertRaises(TypeError, lambda: ['a', 'b'] + idx) + pytest.raises(TypeError, lambda: idx - idx) + pytest.raises(TypeError, lambda: idx + idx) + pytest.raises(TypeError, lambda: idx - ['a', 'b']) + pytest.raises(TypeError, lambda: idx + ['a', 'b']) + pytest.raises(TypeError, lambda: ['a', 'b'] - idx) + pytest.raises(TypeError, lambda: ['a', 'b'] + idx) def test_method_delegation(self): @@ -147,6 +194,11 @@ def test_method_delegation(self): tm.assert_index_equal(result, CategoricalIndex( list('ffggef'), categories=list('efg'))) + # GH18862 (let rename_categories take callables) + result = ci.rename_categories(lambda x: x.upper()) + tm.assert_index_equal(result, CategoricalIndex( + list('AABBCA'), categories=list('CAB'))) + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) tm.assert_index_equal(result, CategoricalIndex( @@ -167,41 +219,36 @@ def test_method_delegation(self): list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid - self.assertRaises(ValueError, lambda: ci.set_categories( + pytest.raises(ValueError, lambda: ci.set_categories( list('cab'), inplace=True)) def test_contains(self): ci = self.create_index(categories=list('cabdef')) - self.assertTrue('a' in ci) - self.assertTrue('z' not in ci) - self.assertTrue('e' not in ci) - self.assertTrue(np.nan not in ci) + assert 'a' in ci + assert 'z' not in ci + assert 'e' not in ci + assert np.nan not in ci # assert codes NOT in index - self.assertFalse(0 in ci) - self.assertFalse(1 in ci) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ci = CategoricalIndex( - list('aabbca'), categories=list('cabdef') + [np.nan]) - self.assertFalse(np.nan in ci) + assert 0 not in ci + assert 1 not in ci ci = CategoricalIndex( list('aabbca') + [np.nan], categories=list('cabdef')) - self.assertTrue(np.nan in ci) + assert np.nan in ci def test_min_max(self): ci = self.create_index(ordered=False) - self.assertRaises(TypeError, lambda: ci.min()) - self.assertRaises(TypeError, lambda: ci.max()) + pytest.raises(TypeError, lambda: ci.min()) + pytest.raises(TypeError, lambda: ci.max()) ci = self.create_index(ordered=True) - self.assertEqual(ci.min(), 'c') - self.assertEqual(ci.max(), 'b') + assert ci.min() == 'c' + assert ci.max() == 'b' def test_map(self): ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'), @@ -220,7 +267,8 @@ def test_map(self): # GH 12766: Return an index not an array tm.assert_index_equal(ci.map(lambda x: 1), - Index(np.array([1] * 5, dtype=np.int64), name='XXX')) + Index(np.array([1] * 5, dtype=np.int64), + name='XXX')) # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), @@ -230,21 +278,41 @@ def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) result = ci.map(f) - exp = pd.CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], + exp = pd.CategoricalIndex([10, 20, 10, 20, 30], + categories=[20, 10, 30], ordered=False) tm.assert_index_equal(result, exp) - def test_where(self): + result = ci.map(pd.Series([10, 20, 30], index=['A', 'B', 'C'])) + tm.assert_index_equal(result, exp) + + result = ci.map({'A': 10, 'B': 20, 'C': 30}) + tm.assert_index_equal(result, exp) + + def test_map_with_categorical_series(self): + # GH 12756 + a = pd.Index([1, 2, 3, 4]) + b = pd.Series(["even", "odd", "even", "odd"], + dtype="category") + c = pd.Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = pd.Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + + @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) + def test_where(self, klass): i = self.create_index() - result = i.where(notnull(i)) + cond = [True] * len(i) expected = i + result = i.where(klass(cond)) tm.assert_index_equal(result, expected) - i2 = i.copy() - i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(), - categories=i.categories) - result = i.where(notnull(i2)) - expected = i2 + cond = [False] + [True] * (len(i) - 1) + expected = CategoricalIndex([np.nan] + i[1:].tolist(), + categories=i.categories) + result = i.where(klass(cond)) tm.assert_index_equal(result, expected) def test_append(self): @@ -265,10 +333,10 @@ def test_append(self): tm.assert_index_equal(result, ci, exact=True) # appending with different categories or reoreded is not ok - self.assertRaises( + pytest.raises( TypeError, lambda: ci.append(ci.values.set_categories(list('abcd')))) - self.assertRaises( + pytest.raises( TypeError, lambda: ci.append(ci.values.reorder_categories(list('abc')))) @@ -278,13 +346,21 @@ def test_append(self): tm.assert_index_equal(result, expected, exact=True) # invalid objects - self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd']))) + pytest.raises(TypeError, lambda: ci.append(Index(['a', 'd']))) # GH14298 - if base object is not categorical -> coerce to object result = Index(['c', 'a']).append(ci) expected = Index(list('caaabbca')) tm.assert_index_equal(result, expected, exact=True) + def test_append_to_another(self): + # hits _concat_index_asobject + fst = Index(['a', 'b']) + snd = CategoricalIndex(['d', 'e']) + result = fst.append(snd) + expected = Index(['a', 'b', 'd', 'e']) + tm.assert_index_equal(result, expected) + def test_insert(self): ci = self.create_index() @@ -306,7 +382,13 @@ def test_insert(self): tm.assert_index_equal(result, expected, exact=True) # invalid - self.assertRaises(TypeError, lambda: ci.insert(0, 'd')) + pytest.raises(TypeError, lambda: ci.insert(0, 'd')) + + # GH 18295 (test missing) + expected = CategoricalIndex(['a', np.nan, 'a', 'b', 'c', 'b']) + for na in (np.nan, pd.NaT, None): + result = CategoricalIndex(list('aabcb')).insert(1, na) + tm.assert_index_equal(result, expected) def test_delete(self): @@ -321,37 +403,79 @@ def test_delete(self): expected = CategoricalIndex(list('aabbc'), categories=categories) tm.assert_index_equal(result, expected, exact=True) - with tm.assertRaises((IndexError, ValueError)): - # either depeidnig on numpy version - result = ci.delete(10) + with pytest.raises((IndexError, ValueError)): + # Either depending on NumPy version + ci.delete(10) def test_astype(self): ci = self.create_index() - result = ci.astype('category') - tm.assert_index_equal(result, ci, exact=True) - result = ci.astype(object) - self.assert_index_equal(result, Index(np.array(ci))) + tm.assert_index_equal(result, Index(np.array(ci))) # this IS equal, but not the same class - self.assertTrue(result.equals(ci)) - self.assertIsInstance(result, Index) - self.assertNotIsInstance(result, CategoricalIndex) + assert result.equals(ci) + assert isinstance(result, Index) + assert not isinstance(result, CategoricalIndex) - def test_reindex_base(self): + # interval + ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], + right=[2, 4], + closed='right') + + ci = CategoricalIndex(Categorical.from_codes( + [0, 1, -1], categories=ii, ordered=True)) - # determined by cat ordering - idx = self.create_index() - expected = np.array([4, 0, 1, 5, 2, 3], dtype=np.intp) + result = ci.astype('interval') + expected = ii.take([0, 1, -1]) + tm.assert_index_equal(result, expected) + + result = IntervalIndex(result.values) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('name', [None, 'foo']) + @pytest.mark.parametrize('dtype_ordered', [True, False]) + @pytest.mark.parametrize('index_ordered', [True, False]) + def test_astype_category(self, name, dtype_ordered, index_ordered): + # GH 18630 + index = self.create_index(ordered=index_ordered) + if name: + index = index.rename(name) + + # standard categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = index.astype(dtype) + expected = CategoricalIndex(index.tolist(), + name=name, + categories=index.categories, + ordered=dtype_ordered) + tm.assert_index_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered) + result = index.astype(dtype) + expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype) + tm.assert_index_equal(result, expected) + + if dtype_ordered is False: + # dtype='category' can't specify ordered, so only test once + result = index.astype('category') + expected = index + tm.assert_index_equal(result, expected) + + def test_reindex_base(self): + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) + expected = np.arange(len(idx), dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): - idx.get_indexer(idx, method='invalid') + with tm.assert_raises_regex(ValueError, "Invalid fill method"): + idx.get_indexer(idx, method="invalid") def test_reindexing(self): + np.random.seed(123456789) ci = self.create_index() oidx = Index(np.array(ci)) @@ -361,15 +485,26 @@ def test_reindexing(self): expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal( - expected.values, actual, check_dtype=False) + tm.assert_numpy_array_equal(expected, actual) + + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, - np.array([0, 3, 2], dtype=np.int64)) + np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(Categorical(['a', 'c'])) @@ -377,7 +512,7 @@ def test_reindex_dtype(self): exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, - np.array([0, 3, 2], dtype=np.int64)) + np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) @@ -385,7 +520,7 @@ def test_reindex_dtype(self): exp = Index(['a', 'a', 'c'], dtype='object') tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, - np.array([0, 3, 2], dtype=np.int64)) + np.array([0, 3, 2], dtype=np.intp)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) @@ -393,17 +528,57 @@ def test_reindex_dtype(self): exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, - np.array([0, 3, 2], dtype=np.int64)) + np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_empty_index(self): + # See GH16770 + c = CategoricalIndex([]) + res, indexer = c.reindex(['a', 'b']) + tm.assert_index_equal(res, Index(['a', 'b']), exact=True) + tm.assert_numpy_array_equal(indexer, + np.array([-1, -1], dtype=np.intp)) + + def test_is_monotonic(self): + c = CategoricalIndex([1, 2, 3]) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], ordered=True) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], categories=[3, 2, 1]) + assert not c.is_monotonic_increasing + assert c.is_monotonic_decreasing + + c = CategoricalIndex([1, 3, 2], categories=[3, 2, 1]) + assert not c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], categories=[3, 2, 1], ordered=True) + assert not c.is_monotonic_increasing + assert c.is_monotonic_decreasing + + # non lexsorted categories + categories = [9, 0, 1, 2, 3] + + c = CategoricalIndex([9, 0], categories=categories) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([0, 1], categories=categories) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') - self.assertFalse(idx.is_unique) - self.assertTrue(idx.has_duplicates) + assert not idx.is_unique + assert idx.has_duplicates expected = CategoricalIndex([0], name='foo') - self.assert_index_equal(idx.drop_duplicates(), expected) - self.assert_index_equal(idx.unique(), expected) + tm.assert_index_equal(idx.drop_duplicates(), expected) + tm.assert_index_equal(idx.unique(), expected) def test_get_indexer(self): @@ -414,22 +589,22 @@ def test_get_indexer(self): r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='pad')) - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='backfill')) - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='nearest')) + pytest.raises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='pad')) + pytest.raises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='backfill')) + pytest.raises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='nearest')) def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) idx1 = Index(list('abcde')) - self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a')) - self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e')) + assert cidx1.get_loc('a') == idx1.get_loc('a') + assert cidx1.get_loc('e') == idx1.get_loc('e') for i in [cidx1, idx1]: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): i.get_loc('NOT-EXIST') # non-unique @@ -438,16 +613,16 @@ def test_get_loc(self): # results in bool array res = cidx2.get_loc('d') - self.assert_numpy_array_equal(res, idx2.get_loc('d')) - self.assert_numpy_array_equal(res, np.array([False, False, False, - True, False, True])) + tm.assert_numpy_array_equal(res, idx2.get_loc('d')) + tm.assert_numpy_array_equal(res, np.array([False, False, False, + True, False, True])) # unique element results in scalar res = cidx2.get_loc('e') - self.assertEqual(res, idx2.get_loc('e')) - self.assertEqual(res, 4) + assert res == idx2.get_loc('e') + assert res == 4 for i in [cidx2, idx2]: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): i.get_loc('NOT-EXIST') # non-unique, slicable @@ -456,15 +631,15 @@ def test_get_loc(self): # results in slice res = cidx3.get_loc('a') - self.assertEqual(res, idx3.get_loc('a')) - self.assertEqual(res, slice(0, 2, None)) + assert res == idx3.get_loc('a') + assert res == slice(0, 2, None) res = cidx3.get_loc('b') - self.assertEqual(res, idx3.get_loc('b')) - self.assertEqual(res, slice(2, 5, None)) + assert res == idx3.get_loc('b') + assert res == slice(2, 5, None) for i in [cidx3, idx3]: - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): i.get_loc('c') def test_repr_roundtrip(self): @@ -500,104 +675,113 @@ def test_isin(self): ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) # mismatched categorical -> coerced to ndarray so doesn't matter - tm.assert_numpy_array_equal( - ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] * - 6)) - tm.assert_numpy_array_equal( - ci.isin(ci.set_categories(list('defghi'))), - np.array([False] * 5 + [True])) + result = ci.isin(ci.set_categories(list('abcdefghi'))) + expected = np.array([True] * 6) + tm.assert_numpy_array_equal(result, expected) + + result = ci.isin(ci.set_categories(list('defghi'))) + expected = np.array([False] * 5 + [True]) + tm.assert_numpy_array_equal(result, expected) def test_identical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) - self.assertTrue(ci1.identical(ci1)) - self.assertTrue(ci1.identical(ci1.copy())) - self.assertFalse(ci1.identical(ci2)) + assert ci1.identical(ci1) + assert ci1.identical(ci1.copy()) + assert not ci1.identical(ci2) def test_ensure_copied_data(self): - # Check the "copy" argument of each Index.__new__ is honoured - # GH12309 + # gh-12309: Check the "copy" argument of each + # Index.__new__ is honored. + # # Must be tested separately from other indexes because - # self.value is not an ndarray + # self.value is not an ndarray. _base = lambda ar: ar if ar.base is None else ar.base + for index in self.indices.values(): result = CategoricalIndex(index.values, copy=True) tm.assert_index_equal(index, result) - self.assertIsNot(_base(index.values), _base(result.values)) + assert _base(index.values) is not _base(result.values) result = CategoricalIndex(index.values, copy=False) - self.assertIs(_base(index.values), _base(result.values)) + assert _base(index.values) is _base(result.values) def test_equals_categorical(self): - ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) - self.assertTrue(ci1.equals(ci1)) - self.assertFalse(ci1.equals(ci2)) - self.assertTrue(ci1.equals(ci1.astype(object))) - self.assertTrue(ci1.astype(object).equals(ci1)) + assert ci1.equals(ci1) + assert not ci1.equals(ci2) + assert ci1.equals(ci1.astype(object)) + assert ci1.astype(object).equals(ci1) - self.assertTrue((ci1 == ci1).all()) - self.assertFalse((ci1 != ci1).all()) - self.assertFalse((ci1 > ci1).all()) - self.assertFalse((ci1 < ci1).all()) - self.assertTrue((ci1 <= ci1).all()) - self.assertTrue((ci1 >= ci1).all()) + assert (ci1 == ci1).all() + assert not (ci1 != ci1).all() + assert not (ci1 > ci1).all() + assert not (ci1 < ci1).all() + assert (ci1 <= ci1).all() + assert (ci1 >= ci1).all() - self.assertFalse((ci1 == 1).all()) - self.assertTrue((ci1 == Index(['a', 'b'])).all()) - self.assertTrue((ci1 == ci1.values).all()) + assert not (ci1 == 1).all() + assert (ci1 == Index(['a', 'b'])).all() + assert (ci1 == ci1.values).all() # invalid comparisons - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) - self.assertRaises(TypeError, lambda: ci1 == ci2) - self.assertRaises( + pytest.raises(TypeError, lambda: ci1 == ci2) + pytest.raises( TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) - self.assertRaises( + pytest.raises( TypeError, lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) - self.assertFalse(ci.equals(list('aabca'))) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) - self.assertTrue(ci.equals(ci.copy())) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ci = CategoricalIndex(list('aabca'), - categories=['c', 'a', 'b', np.nan]) - self.assertFalse(ci.equals(list('aabca'))) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(ci.equals(ci.copy())) + assert not ci.equals(list('aabca')) + # Same categories, but different order + # Unordered + assert ci.equals(CategoricalIndex(list('aabca'))) + # Ordered + assert not ci.equals(CategoricalIndex(list('aabca'), ordered=True)) + assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) - self.assertFalse(ci.equals(list('aabca'))) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) - self.assertTrue(ci.equals(ci.copy())) + assert not ci.equals(list('aabca')) + assert not ci.equals(CategoricalIndex(list('aabca'))) + assert ci.equals(ci.copy()) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) - self.assertFalse(ci.equals(list('aabca') + [np.nan])) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan]))) - self.assertTrue(ci.equals(ci.copy())) + assert not ci.equals(list('aabca') + [np.nan]) + assert ci.equals(CategoricalIndex(list('aabca') + [np.nan])) + assert not ci.equals(CategoricalIndex(list('aabca') + [np.nan], + ordered=True)) + assert ci.equals(ci.copy()) + + def test_equals_categoridcal_unordered(self): + # https://github.com/pandas-dev/pandas/issues/16603 + a = pd.CategoricalIndex(['A'], categories=['A', 'B']) + b = pd.CategoricalIndex(['A'], categories=['B', 'A']) + c = pd.CategoricalIndex(['C'], categories=['B', 'A']) + assert a.equals(b) + assert not a.equals(c) + assert not b.equals(c) def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex(['a', 'bb', 'ccc']) if PY3: - expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" - self.assertEqual(repr(idx), expected) + expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa + assert repr(idx) == expected else: - expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" - self.assertEqual(unicode(idx), expected) + expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" # noqa + assert unicode(idx) == expected # multiple lines idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 10) @@ -605,17 +789,17 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" + categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # truncated idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 100) @@ -623,42 +807,42 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', ... 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', ... u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" + categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # larger categories idx = pd.CategoricalIndex(list('abcdefghijklmmo')) if PY3: expected = u"""CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'm', 'o'], - categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" + categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', u'i', u'j', u'k', u'l', u'm', u'm', u'o'], - categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" + categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # short idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" - self.assertEqual(repr(idx), expected) + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa + assert repr(idx) == expected else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" - self.assertEqual(unicode(idx), expected) + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa + assert unicode(idx) == expected # multiple lines idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10) @@ -666,17 +850,17 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # truncated idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100) @@ -684,33 +868,33 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', ... 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', ... u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # larger categories idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ')) if PY3: expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ'], - categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], - categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # Emable Unicode option ----------------------------------------- with cf.option_context('display.unicode.east_asian_width', True): @@ -718,11 +902,11 @@ def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" - self.assertEqual(repr(idx), expected) + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa + assert repr(idx) == expected else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" - self.assertEqual(unicode(idx), expected) + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa + assert unicode(idx) == expected # multiple lines idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10) @@ -731,18 +915,18 @@ def test_string_categorical_index_repr(self): 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # truncated idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100) @@ -752,44 +936,44 @@ def test_string_categorical_index_repr(self): ... 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', ... u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected # larger categories idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ')) if PY3: expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ'], - categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(repr(idx), expected) + assert repr(idx) == expected else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], - categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" # noqa - self.assertEqual(unicode(idx), expected) + assert unicode(idx) == expected def test_fillna_categorical(self): # GH 11343 idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x') # fill by value in categories exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x') - self.assert_index_equal(idx.fillna(1.0), exp) + tm.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError - with tm.assertRaisesRegexp(ValueError, - 'fill value must be in categories'): + with tm.assert_raises_regex(ValueError, + 'fill value must be in categories'): idx.fillna(2.0) def test_take_fill_value(self): @@ -843,12 +1027,12 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_take_fill_value_datetime(self): @@ -881,12 +1065,12 @@ def test_take_fill_value_datetime(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_take_invalid_kwargs(self): @@ -894,13 +1078,13 @@ def test_take_invalid_kwargs(self): indices = [1, 0, -1] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py new file mode 100644 index 0000000000000..ca9841112b1d5 --- /dev/null +++ b/pandas/tests/indexes/test_frozen.py @@ -0,0 +1,71 @@ +import numpy as np +from pandas.util import testing as tm +from pandas.tests.test_base import CheckImmutable, CheckStringMixin +from pandas.core.indexes.frozen import FrozenList, FrozenNDArray +from pandas.compat import u + + +class TestFrozenList(CheckImmutable, CheckStringMixin): + mutable_methods = ('extend', 'pop', 'remove', 'insert') + unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) + + def setup_method(self, method): + self.lst = [1, 2, 3, 4, 5] + self.container = FrozenList(self.lst) + self.klass = FrozenList + + def test_add(self): + result = self.container + (1, 2, 3) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) + self.check_result(result, expected) + + def test_inplace(self): + q = r = self.container + q += [5] + self.check_result(q, self.lst + [5]) + # other shouldn't be mutated + self.check_result(r, self.lst) + + +class TestFrozenNDArray(CheckImmutable, CheckStringMixin): + mutable_methods = ('put', 'itemset', 'fill') + unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) + + def setup_method(self, method): + self.lst = [3, 5, 7, -2] + self.container = FrozenNDArray(self.lst) + self.klass = FrozenNDArray + + def test_shallow_copying(self): + original = self.container.copy() + assert isinstance(self.container.view(), FrozenNDArray) + assert not isinstance(self.container.view(np.ndarray), FrozenNDArray) + assert self.container.view() is not self.container + tm.assert_numpy_array_equal(self.container, original) + + # Shallow copy should be the same too + assert isinstance(self.container._shallow_copy(), FrozenNDArray) + + # setting should not be allowed + def testit(container): + container[0] = 16 + + self.check_mutable_error(testit, self.container) + + def test_values(self): + original = self.container.view(np.ndarray).copy() + n = original[0] + 15 + + vals = self.container.values() + tm.assert_numpy_array_equal(original, vals) + + assert original is not vals + vals[0] = n + + assert isinstance(self.container, FrozenNDArray) + tm.assert_numpy_array_equal(self.container.values(), original) + assert vals[0] == n diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 702c4758da245..cd6a5c761d0c2 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -14,25 +14,25 @@ from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex, compat, date_range, period_range) -from pandas.compat import PY3, long, lrange, lzip, range, u -from pandas.core.common import PerformanceWarning, UnsortedIndexError -from pandas.indexes.base import InvalidIndexError -from pandas.lib import Timestamp +from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY +from pandas.errors import PerformanceWarning, UnsortedIndexError +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas._libs.tslib import Timestamp import pandas.util.testing as tm -from pandas.util.testing import (assertRaises, assertRaisesRegexp, - assert_almost_equal, assert_copy) - +from pandas.util.testing import assert_almost_equal, assert_copy from .common import Base -class TestMultiIndex(Base, tm.TestCase): +class TestMultiIndex(Base): _holder = MultiIndex _compat_props = ['shape', 'ndim', 'size', 'itemsize'] - def setUp(self): + def setup_method(self, method): major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) @@ -60,25 +60,25 @@ def f(): if common: pass - tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) + tm.assert_raises_regex(ValueError, 'The truth value of a', f) def test_labels_dtypes(self): # GH 8456 i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - self.assertTrue(i.labels[0].dtype == 'int8') - self.assertTrue(i.labels[1].dtype == 'int8') + assert i.labels[0].dtype == 'int8' + assert i.labels[1].dtype == 'int8' i = MultiIndex.from_product([['a'], range(40)]) - self.assertTrue(i.labels[1].dtype == 'int8') + assert i.labels[1].dtype == 'int8' i = MultiIndex.from_product([['a'], range(400)]) - self.assertTrue(i.labels[1].dtype == 'int16') + assert i.labels[1].dtype == 'int16' i = MultiIndex.from_product([['a'], range(40000)]) - self.assertTrue(i.labels[1].dtype == 'int32') + assert i.labels[1].dtype == 'int32' i = pd.MultiIndex.from_product([['a'], range(1000)]) - self.assertTrue((i.labels[0] >= 0).all()) - self.assertTrue((i.labels[1] >= 0).all()) + assert (i.labels[0] >= 0).all() + assert (i.labels[1] >= 0).all() def test_where(self): i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) @@ -86,7 +86,16 @@ def test_where(self): def f(): i.where(True) - self.assertRaises(NotImplementedError, f) + pytest.raises(NotImplementedError, f) + + def test_where_array_like(self): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + klasses = [list, tuple, np.array, pd.Series] + cond = [False, True] + + for klass in klasses: + f = lambda: i.where(klass(cond)) + pytest.raises(NotImplementedError, f) def test_repeat(self): reps = 2 @@ -115,39 +124,58 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(m, reps), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, m, reps, axis=1) + tm.assert_raises_regex( + ValueError, msg, np.repeat, m, reps, axis=1) def test_set_name_methods(self): # so long as these are synonyms, we don't need to test set_names - self.assertEqual(self.index.rename, self.index.set_names) + assert self.index.rename == self.index.set_names new_names = [name + "SUFFIX" for name in self.index_names] ind = self.index.set_names(new_names) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, new_names) - with assertRaisesRegexp(ValueError, "^Length"): + assert self.index.names == self.index_names + assert ind.names == new_names + with tm.assert_raises_regex(ValueError, "^Length"): ind.set_names(new_names + new_names) new_names2 = [name + "SUFFIX2" for name in new_names] res = ind.set_names(new_names2, inplace=True) - self.assertIsNone(res) - self.assertEqual(ind.names, new_names2) + assert res is None + assert ind.names == new_names2 # set names for specific level (# GH7792) ind = self.index.set_names(new_names[0], level=0) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, [new_names[0], self.index_names[1]]) + assert self.index.names == self.index_names + assert ind.names == [new_names[0], self.index_names[1]] res = ind.set_names(new_names2[0], level=0, inplace=True) - self.assertIsNone(res) - self.assertEqual(ind.names, [new_names2[0], self.index_names[1]]) + assert res is None + assert ind.names == [new_names2[0], self.index_names[1]] # set names for multiple levels ind = self.index.set_names(new_names, level=[0, 1]) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, new_names) + assert self.index.names == self.index_names + assert ind.names == new_names res = ind.set_names(new_names2, level=[0, 1], inplace=True) - self.assertIsNone(res) - self.assertEqual(ind.names, new_names2) + assert res is None + assert ind.names == new_names2 + + def test_set_levels_labels_directly(self): + # setting levels/labels directly raises AttributeError + + levels = self.index.levels + new_levels = [[lev + 'a' for lev in level] for level in levels] + + labels = self.index.labels + major_labels, minor_labels = labels + major_labels = [(x + 1) % 3 for x in major_labels] + minor_labels = [(x + 1) % 1 for x in minor_labels] + new_labels = [major_labels, minor_labels] + + with pytest.raises(AttributeError): + self.index.levels = new_levels + + with pytest.raises(AttributeError): + self.index.labels = new_labels def test_set_levels(self): # side note - you probably wouldn't want to use levels and labels @@ -158,7 +186,7 @@ def test_set_levels(self): def assert_matching(actual, expected, check_dtype=False): # avoid specifying internal representation # as much as possible - self.assertEqual(len(actual), len(expected)) + assert len(actual) == len(expected) for act, exp in zip(actual, expected): act = np.asarray(act) exp = np.asarray(exp) @@ -172,7 +200,7 @@ def assert_matching(actual, expected, check_dtype=False): # level changing [w/ mutation] ind2 = self.index.copy() inplace_return = ind2.set_levels(new_levels, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.levels, new_levels) # level changing specific level [w/o mutation] @@ -192,13 +220,13 @@ def assert_matching(actual, expected, check_dtype=False): # level changing specific level [w/ mutation] ind2 = self.index.copy() inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.levels, [new_levels[0], levels[1]]) assert_matching(self.index.levels, levels) ind2 = self.index.copy() inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.levels, [levels[0], new_levels[1]]) assert_matching(self.index.levels, levels) @@ -206,7 +234,7 @@ def assert_matching(actual, expected, check_dtype=False): ind2 = self.index.copy() inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.levels, new_levels) assert_matching(self.index.levels, levels) @@ -214,23 +242,23 @@ def assert_matching(actual, expected, check_dtype=False): # GH 13754 original_index = self.index.copy() for inplace in [True, False]: - with assertRaisesRegexp(ValueError, "^On"): + with tm.assert_raises_regex(ValueError, "^On"): self.index.set_levels(['c'], level=0, inplace=inplace) assert_matching(self.index.levels, original_index.levels, check_dtype=True) - with assertRaisesRegexp(ValueError, "^On"): + with tm.assert_raises_regex(ValueError, "^On"): self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) assert_matching(self.index.labels, original_index.labels, check_dtype=True) - with assertRaisesRegexp(TypeError, "^Levels"): + with tm.assert_raises_regex(TypeError, "^Levels"): self.index.set_levels('c', level=0, inplace=inplace) assert_matching(self.index.levels, original_index.levels, check_dtype=True) - with assertRaisesRegexp(TypeError, "^Labels"): + with tm.assert_raises_regex(TypeError, "^Labels"): self.index.set_labels(1, level=0, inplace=inplace) assert_matching(self.index.labels, original_index.labels, check_dtype=True) @@ -247,7 +275,7 @@ def test_set_labels(self): def assert_matching(actual, expected): # avoid specifying internal representation # as much as possible - self.assertEqual(len(actual), len(expected)) + assert len(actual) == len(expected) for act, exp in zip(actual, expected): act = np.asarray(act) exp = np.asarray(exp, dtype=np.int8) @@ -261,7 +289,7 @@ def assert_matching(actual, expected): # label changing [w/ mutation] ind2 = self.index.copy() inplace_return = ind2.set_labels(new_labels, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.labels, new_labels) # label changing specific level [w/o mutation] @@ -281,13 +309,13 @@ def assert_matching(actual, expected): # label changing specific level [w/ mutation] ind2 = self.index.copy() inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.labels, [new_labels[0], labels[1]]) assert_matching(self.index.labels, labels) ind2 = self.index.copy() inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.labels, [labels[0], new_labels[1]]) assert_matching(self.index.labels, labels) @@ -295,54 +323,69 @@ def assert_matching(actual, expected): ind2 = self.index.copy() inplace_return = ind2.set_labels(new_labels, level=[0, 1], inplace=True) - self.assertIsNone(inplace_return) + assert inplace_return is None assert_matching(ind2.labels, new_labels) assert_matching(self.index.labels, labels) + # label changing for levels of different magnitude of categories + ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_labels = range(129, -1, -1) + expected = pd.MultiIndex.from_tuples( + [(0, i) for i in new_labels]) + + # [w/o mutation] + result = ind.set_labels(labels=new_labels, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + result.set_labels(labels=new_labels, level=1, inplace=True) + assert result.equals(expected) + def test_set_levels_labels_names_bad_input(self): levels, labels = self.index.levels, self.index.labels names = self.index.names - with tm.assertRaisesRegexp(ValueError, 'Length of levels'): + with tm.assert_raises_regex(ValueError, 'Length of levels'): self.index.set_levels([levels[0]]) - with tm.assertRaisesRegexp(ValueError, 'Length of labels'): + with tm.assert_raises_regex(ValueError, 'Length of labels'): self.index.set_labels([labels[0]]) - with tm.assertRaisesRegexp(ValueError, 'Length of names'): + with tm.assert_raises_regex(ValueError, 'Length of names'): self.index.set_names([names[0]]) # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + with tm.assert_raises_regex(TypeError, 'list of lists-like'): self.index.set_levels(levels[0]) # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + with tm.assert_raises_regex(TypeError, 'list of lists-like'): self.index.set_labels(labels[0]) # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list-like'): + with tm.assert_raises_regex(TypeError, 'list-like'): self.index.set_names(names[0]) # should have equal lengths - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + with tm.assert_raises_regex(TypeError, 'list of lists-like'): self.index.set_levels(levels[0], level=[0, 1]) - with tm.assertRaisesRegexp(TypeError, 'list-like'): + with tm.assert_raises_regex(TypeError, 'list-like'): self.index.set_levels(levels, level=0) # should have equal lengths - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + with tm.assert_raises_regex(TypeError, 'list of lists-like'): self.index.set_labels(labels[0], level=[0, 1]) - with tm.assertRaisesRegexp(TypeError, 'list-like'): + with tm.assert_raises_regex(TypeError, 'list-like'): self.index.set_labels(labels, level=0) # should have equal lengths - with tm.assertRaisesRegexp(ValueError, 'Length of names'): + with tm.assert_raises_regex(ValueError, 'Length of names'): self.index.set_names(names[0], level=[0, 1]) - with tm.assertRaisesRegexp(TypeError, 'string'): + with tm.assert_raises_regex(TypeError, 'string'): self.index.set_names(names, level=0) def test_set_levels_categorical(self): @@ -365,57 +408,64 @@ def test_metadata_immutable(self): levels, labels = self.index.levels, self.index.labels # shouldn't be able to set at either the top level or base level mutable_regex = re.compile('does not support mutable operations') - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): levels[0] = levels[0] - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): levels[0][0] = levels[0][0] # ditto for labels - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): labels[0] = labels[0] - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): labels[0][0] = labels[0][0] # and for names names = self.index.names - with assertRaisesRegexp(TypeError, mutable_regex): + with tm.assert_raises_regex(TypeError, mutable_regex): names[0] = names[0] def test_inplace_mutation_resets_values(self): levels = [['a', 'b', 'c'], [4]] levels2 = [[1, 2, 3], ['a']] labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + mi1 = MultiIndex(levels=levels, labels=labels) mi2 = MultiIndex(levels=levels2, labels=labels) vals = mi1.values.copy() vals2 = mi2.values.copy() - self.assertIsNotNone(mi1._tuples) - # make sure level setting works + assert mi1._tuples is not None + + # Make sure level setting works new_vals = mi1.set_levels(levels2).values - assert_almost_equal(vals2, new_vals) - # non-inplace doesn't kill _tuples [implementation detail] - assert_almost_equal(mi1._tuples, vals) - # and values is still same too - assert_almost_equal(mi1.values, vals) + tm.assert_almost_equal(vals2, new_vals) - # inplace should kill _tuples + # Non-inplace doesn't kill _tuples [implementation detail] + tm.assert_almost_equal(mi1._tuples, vals) + + # ...and values is still same too + tm.assert_almost_equal(mi1.values, vals) + + # Inplace should kill _tuples mi1.set_levels(levels2, inplace=True) - assert_almost_equal(mi1.values, vals2) + tm.assert_almost_equal(mi1.values, vals2) - # make sure label setting works too + # Make sure label setting works too labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] exp_values = np.empty((6, ), dtype=object) exp_values[:] = [(long(1), 'a')] * 6 - # must be 1d array of tuples - self.assertEqual(exp_values.shape, (6, )) + + # Must be 1d array of tuples + assert exp_values.shape == (6, ) new_values = mi2.set_labels(labels2).values - # not inplace shouldn't change - assert_almost_equal(mi2._tuples, vals2) - # should have correct values - assert_almost_equal(exp_values, new_values) - # and again setting inplace should kill _tuples, etc + # Not inplace shouldn't change + tm.assert_almost_equal(mi2._tuples, vals2) + + # Should have correct values + tm.assert_almost_equal(exp_values, new_values) + + # ...and again setting inplace should kill _tuples, etc mi2.set_labels(labels2, inplace=True) - assert_almost_equal(mi2.values, new_values) + tm.assert_almost_equal(mi2.values, new_values) def test_copy_in_constructor(self): levels = np.array(["a", "b", "c"]) @@ -423,12 +473,12 @@ def test_copy_in_constructor(self): val = labels[0] mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], copy=True) - self.assertEqual(mi.labels[0][0], val) + assert mi.labels[0][0] == val labels[0] = 15 - self.assertEqual(mi.labels[0][0], val) + assert mi.labels[0][0] == val val = levels[0] levels[0] = "PANDA" - self.assertEqual(mi.levels[0][0], val) + assert mi.levels[0][0] == val def test_set_value_keeps_names(self): # motivating example from #3742 @@ -440,11 +490,11 @@ def test_set_value_keeps_names(self): columns=['one', 'two', 'three', 'four'], index=idx) df = df.sort_index() - self.assertIsNone(df.is_copy) - self.assertEqual(df.index.names, ('Name', 'Number')) - df = df.set_value(('grethe', '4'), 'one', 99.34) - self.assertIsNone(df.is_copy) - self.assertEqual(df.index.names, ('Name', 'Number')) + assert df._is_copy is None + assert df.index.names == ('Name', 'Number') + df.at[('grethe', '4'), 'one'] = 99.34 + assert df._is_copy is None + assert df.index.names == ('Name', 'Number') def test_copy_names(self): # Check that adding a "names" parameter to the copy is honored @@ -452,62 +502,54 @@ def test_copy_names(self): multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2']) multi_idx1 = multi_idx.copy() - self.assertTrue(multi_idx.equals(multi_idx1)) - self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) - self.assertEqual(multi_idx1.names, ['MyName1', 'MyName2']) + assert multi_idx.equals(multi_idx1) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx1.names == ['MyName1', 'MyName2'] multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2']) - self.assertTrue(multi_idx.equals(multi_idx2)) - self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) - self.assertEqual(multi_idx2.names, ['NewName1', 'NewName2']) + assert multi_idx.equals(multi_idx2) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx2.names == ['NewName1', 'NewName2'] multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2']) - self.assertTrue(multi_idx.equals(multi_idx3)) - self.assertEqual(multi_idx.names, ['MyName1', 'MyName2']) - self.assertEqual(multi_idx3.names, ['NewName1', 'NewName2']) + assert multi_idx.equals(multi_idx3) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx3.names == ['NewName1', 'NewName2'] def test_names(self): - # names are assigned in __init__ + # names are assigned in setup names = self.index_names level_names = [level.name for level in self.index.levels] - self.assertEqual(names, level_names) + assert names == level_names # setting bad names on existing index = self.index - assertRaisesRegexp(ValueError, "^Length of names", setattr, index, - "names", list(index.names) + ["third"]) - assertRaisesRegexp(ValueError, "^Length of names", setattr, index, - "names", []) + tm.assert_raises_regex(ValueError, "^Length of names", + setattr, index, "names", + list(index.names) + ["third"]) + tm.assert_raises_regex(ValueError, "^Length of names", + setattr, index, "names", []) # initializing with bad names (should always be equivalent) major_axis, minor_axis = self.index.levels major_labels, minor_labels = self.index.labels - assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first']) - assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first', 'second', 'third']) + tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first']) + tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first', 'second', 'third']) # names are assigned index.names = ["a", "b"] ind_names = list(index.names) level_names = [level.name for level in index.levels] - self.assertEqual(ind_names, level_names) - - def test_reference_duplicate_name(self): - idx = MultiIndex.from_tuples( - [('a', 'b'), ('c', 'd')], names=['x', 'x']) - self.assertTrue(idx._reference_duplicate_name('x')) - - idx = MultiIndex.from_tuples( - [('a', 'b'), ('c', 'd')], names=['x', 'y']) - self.assertFalse(idx._reference_duplicate_name('x')) + assert ind_names == level_names def test_astype(self): expected = self.index.copy() @@ -516,79 +558,95 @@ def test_astype(self): assert_copy(actual.labels, expected.labels) self.check_level_names(actual, expected.names) - with assertRaisesRegexp(TypeError, "^Setting.*dtype.*object"): + with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): self.index.astype(np.dtype(int)) - def test_constructor_single_level(self): - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) - tm.assertIsInstance(single_level, Index) - self.assertNotIsInstance(single_level, MultiIndex) - self.assertEqual(single_level.name, 'first') + @pytest.mark.parametrize('ordered', [True, False]) + def test_astype_category(self, ordered): + # GH 18630 + msg = '> 1 ndim Categorical are not supported at this time' + with tm.assert_raises_regex(NotImplementedError, msg): + self.index.astype(CategoricalDtype(ordered=ordered)) - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]]) - self.assertIsNone(single_level.name) + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + with tm.assert_raises_regex(NotImplementedError, msg): + self.index.astype('category') + + def test_constructor_single_level(self): + result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]], names=['first']) + assert isinstance(result, MultiIndex) + expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ['first'] def test_constructor_no_levels(self): - assertRaisesRegexp(ValueError, "non-zero number of levels/labels", - MultiIndex, levels=[], labels=[]) + tm.assert_raises_regex(ValueError, "non-zero number " + "of levels/labels", + MultiIndex, levels=[], labels=[]) both_re = re.compile('Must pass both levels and labels') - with tm.assertRaisesRegexp(TypeError, both_re): + with tm.assert_raises_regex(TypeError, both_re): MultiIndex(levels=[]) - with tm.assertRaisesRegexp(TypeError, both_re): + with tm.assert_raises_regex(TypeError, both_re): MultiIndex(labels=[]) def test_constructor_mismatched_label_levels(self): labels = [np.array([1]), np.array([2]), np.array([3])] levels = ["a"] - assertRaisesRegexp(ValueError, "Length of levels and labels must be" - " the same", MultiIndex, levels=levels, - labels=labels) + tm.assert_raises_regex(ValueError, "Length of levels and labels " + "must be the same", MultiIndex, + levels=levels, labels=labels) length_error = re.compile('>= length of level') label_error = re.compile(r'Unequal label lengths: \[4, 2\]') # important to check that it's looking at the right thing. - with tm.assertRaisesRegexp(ValueError, length_error): + with tm.assert_raises_regex(ValueError, length_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) - with tm.assertRaisesRegexp(ValueError, label_error): + with tm.assert_raises_regex(ValueError, label_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) # external API - with tm.assertRaisesRegexp(ValueError, length_error): + with tm.assert_raises_regex(ValueError, length_error): self.index.copy().set_levels([['a'], ['b']]) - with tm.assertRaisesRegexp(ValueError, label_error): + with tm.assert_raises_regex(ValueError, label_error): self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) - # deprecated properties - with warnings.catch_warnings(): - warnings.simplefilter('ignore') + @pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2], + [1, 'a', 1]]) + def test_duplicate_level_names(self, names): + # GH18872 + pytest.raises(ValueError, pd.MultiIndex.from_product, + [[0, 1]] * 3, names=names) - with tm.assertRaisesRegexp(ValueError, length_error): - self.index.copy().levels = [['a'], ['b']] + # With .rename() + mi = pd.MultiIndex.from_product([[0, 1]] * 3) + tm.assert_raises_regex(ValueError, "Duplicated level name:", + mi.rename, names) - with tm.assertRaisesRegexp(ValueError, label_error): - self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] + # With .rename(., level=) + mi.rename(names[0], level=1, inplace=True) + tm.assert_raises_regex(ValueError, "Duplicated level name:", + mi.rename, names[:2], level=[0, 2]) def assert_multiindex_copied(self, copy, original): - # levels should be (at least, shallow copied) - assert_copy(copy.levels, original.levels) + # Levels should be (at least, shallow copied) + tm.assert_copy(copy.levels, original.levels) + tm.assert_almost_equal(copy.labels, original.labels) - assert_almost_equal(copy.labels, original.labels) + # Labels doesn't matter which way copied + tm.assert_almost_equal(copy.labels, original.labels) + assert copy.labels is not original.labels - # labels doesn't matter which way copied - assert_almost_equal(copy.labels, original.labels) - self.assertIsNot(copy.labels, original.labels) + # Names doesn't matter which way copied + assert copy.names == original.names + assert copy.names is not original.names - # names doesn't matter which way copied - self.assertEqual(copy.names, original.names) - self.assertIsNot(copy.names, original.names) - - # sort order should be copied - self.assertEqual(copy.sortorder, original.sortorder) + # Sort order should be copied + assert copy.sortorder == original.sortorder def test_copy(self): i_copy = self.index.copy() @@ -606,7 +664,7 @@ def test_view(self): self.assert_multiindex_copied(i_view, self.index) def check_level_names(self, index, names): - self.assertEqual([level.name for level in index.levels], list(names)) + assert [level.name for level in index.levels] == list(names) def test_changing_names(self): @@ -632,33 +690,43 @@ def test_changing_names(self): shallow_copy.names = [name + "c" for name in shallow_copy.names] self.check_level_names(self.index, new_names) - def test_duplicate_names(self): - self.index.names = ['foo', 'foo'] - assertRaisesRegexp(KeyError, 'Level foo not found', - self.index._get_level_number, 'foo') - def test_get_level_number_integer(self): self.index.names = [1, 0] - self.assertEqual(self.index._get_level_number(1), 0) - self.assertEqual(self.index._get_level_number(0), 1) - self.assertRaises(IndexError, self.index._get_level_number, 2) - assertRaisesRegexp(KeyError, 'Level fourth not found', - self.index._get_level_number, 'fourth') + assert self.index._get_level_number(1) == 0 + assert self.index._get_level_number(0) == 1 + pytest.raises(IndexError, self.index._get_level_number, 2) + tm.assert_raises_regex(KeyError, 'Level fourth not found', + self.index._get_level_number, 'fourth') def test_from_arrays(self): arrays = [] for lev, lab in zip(self.index.levels, self.index.labels): arrays.append(np.asarray(lev).take(lab)) - result = MultiIndex.from_arrays(arrays) - self.assertEqual(list(result), list(self.index)) + # list of arrays as input + result = MultiIndex.from_arrays(arrays, names=self.index.names) + tm.assert_index_equal(result, self.index) # infer correctly result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], ['a', 'b']]) - self.assertTrue(result.levels[0].equals(Index([Timestamp('20130101') - ]))) - self.assertTrue(result.levels[1].equals(Index(['a', 'b']))) + assert result.levels[0].equals(Index([Timestamp('20130101')])) + assert result.levels[1].equals(Index(['a', 'b'])) + + def test_from_arrays_iterator(self): + # GH 18434 + arrays = [] + for lev, lab in zip(self.index.levels, self.index.labels): + arrays.append(np.asarray(lev).take(lab)) + + # iterator as input + result = MultiIndex.from_arrays(iter(arrays), names=self.index.names) + tm.assert_index_equal(result, self.index) + + # invalid iterator input + with tm.assert_raises_regex( + TypeError, "Input must be a list / sequence of array-likes."): + MultiIndex.from_arrays(0) def test_from_arrays_index_series_datetimetz(self): idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, @@ -746,21 +814,22 @@ def test_from_arrays_index_series_categorical(self): def test_from_arrays_empty(self): # 0 levels - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( ValueError, "Must pass non-zero number of levels/labels"): MultiIndex.from_arrays(arrays=[]) # 1 level result = MultiIndex.from_arrays(arrays=[[]], names=['A']) + assert isinstance(result, MultiIndex) expected = Index([], name='A') - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result.levels[0], expected) # N levels for N in [2, 3]: arrays = [[]] * N names = list('ABC')[:N] result = MultiIndex.from_arrays(arrays=arrays, names=names) - expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N, + expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, names=names) tm.assert_index_equal(result, expected) @@ -768,24 +837,27 @@ def test_from_arrays_invalid_input(self): invalid_inputs = [1, [1], [1, 2], [[1], 2], 'a', ['a'], ['a', 'b'], [['a'], 'b']] for i in invalid_inputs: - tm.assertRaises(TypeError, MultiIndex.from_arrays, arrays=i) + pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i) def test_from_arrays_different_lengths(self): - # GH13599 + # see gh-13599 idx1 = [1, 2, 3] idx2 = ['a', 'b'] - assertRaisesRegexp(ValueError, '^all arrays must be same length$', - MultiIndex.from_arrays, [idx1, idx2]) + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) idx1 = [] idx2 = ['a', 'b'] - assertRaisesRegexp(ValueError, '^all arrays must be same length$', - MultiIndex.from_arrays, [idx1, idx2]) + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) idx1 = [1, 2, 3] idx2 = [] - assertRaisesRegexp(ValueError, '^all arrays must be same length$', - MultiIndex.from_arrays, [idx1, idx2]) + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) def test_from_product(self): @@ -800,18 +872,36 @@ def test_from_product(self): expected = MultiIndex.from_tuples(tuples, names=names) tm.assert_index_equal(result, expected) - self.assertEqual(result.names, names) + + def test_from_product_iterator(self): + # GH 18434 + first = ['foo', 'bar', 'buz'] + second = ['a', 'b', 'c'] + names = ['first', 'second'] + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), + ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), + ('buz', 'c')] + expected = MultiIndex.from_tuples(tuples, names=names) + + # iterator as input + result = MultiIndex.from_product(iter([first, second]), names=names) + tm.assert_index_equal(result, expected) + + # Invalid non-iterable input + with tm.assert_raises_regex( + TypeError, "Input must be a list / sequence of iterables."): + MultiIndex.from_product(0) def test_from_product_empty(self): # 0 levels - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( ValueError, "Must pass non-zero number of levels/labels"): MultiIndex.from_product([]) # 1 level result = MultiIndex.from_product([[]], names=['A']) - expected = pd.Float64Index([], name='A') - tm.assert_index_equal(result, expected) + expected = pd.Index([], name='A') + tm.assert_index_equal(result.levels[0], expected) # 2 levels l1 = [[], ['foo', 'bar', 'baz'], []] @@ -819,7 +909,7 @@ def test_from_product_empty(self): names = ['A', 'B'] for first, second in zip(l1, l2): result = MultiIndex.from_product([first, second], names=names) - expected = MultiIndex(levels=[np.array(first), np.array(second)], + expected = MultiIndex(levels=[first, second], labels=[[], []], names=names) tm.assert_index_equal(result, expected) @@ -828,8 +918,7 @@ def test_from_product_empty(self): for N in range(4): lvl2 = lrange(N) result = MultiIndex.from_product([[], lvl2, []], names=names) - expected = MultiIndex(levels=[np.array(A) - for A in [[], lvl2, []]], + expected = MultiIndex(levels=[[], lvl2, []], labels=[[], [], []], names=names) tm.assert_index_equal(result, expected) @@ -837,12 +926,12 @@ def test_from_product_invalid_input(self): invalid_inputs = [1, [1], [1, 2], [[1], 2], 'a', ['a'], ['a', 'b'], [['a'], 'b']] for i in invalid_inputs: - tm.assertRaises(TypeError, MultiIndex.from_product, iterables=i) + pytest.raises(TypeError, MultiIndex.from_product, iterables=i) def test_from_product_datetimeindex(self): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = pd.lib.list_to_object_array([(1, pd.Timestamp( + etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) tm.assert_numpy_array_equal(mi.values, etalon) @@ -867,23 +956,70 @@ def test_values_boxed(self): (1, pd.Timestamp('2000-01-04')), (2, pd.Timestamp('2000-01-02')), (3, pd.Timestamp('2000-01-03'))] - mi = pd.MultiIndex.from_tuples(tuples) - tm.assert_numpy_array_equal(mi.values, - pd.lib.list_to_object_array(tuples)) + result = pd.MultiIndex.from_tuples(tuples) + expected = construct_1d_object_array_from_listlike(tuples) + tm.assert_numpy_array_equal(result.values, expected) # Check that code branches for boxed values produce identical results - tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values) + tm.assert_numpy_array_equal(result.values[:4], result[:4].values) + + def test_values_multiindex_datetimeindex(self): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(10**18, 10**18 + 5) + naive = pd.DatetimeIndex(ints) + aware = pd.DatetimeIndex(ints, tz='US/Central') + + idx = pd.MultiIndex.from_arrays([naive, aware]) + result = idx.values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive[:2]) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware[:2]) + + def test_values_multiindex_periodindex(self): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(2007, 2012) + pidx = pd.PeriodIndex(ints, freq='D') + + idx = pd.MultiIndex.from_arrays([ints, pidx]) + result = idx.values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints[:2])) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx[:2]) def test_append(self): result = self.index[:3].append(self.index[3:]) - self.assertTrue(result.equals(self.index)) + assert result.equals(self.index) foos = [self.index[:1], self.index[1:3], self.index[3:]] result = foos[0].append(foos[1:]) - self.assertTrue(result.equals(self.index)) + assert result.equals(self.index) # empty result = self.index.append([]) - self.assertTrue(result.equals(self.index)) + assert result.equals(self.index) def test_append_mixed_dtypes(self): # GH 13660 @@ -895,7 +1031,7 @@ def test_append_mixed_dtypes(self): [1.1, np.nan, 3.3], ['a', 'b', 'c'], dti, dti_tz, pi]) - self.assertEqual(mi.nlevels, 6) + assert mi.nlevels == 6 res = mi.append(mi) exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], @@ -924,7 +1060,7 @@ def test_get_level_values(self): expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], name='first') tm.assert_index_equal(result, expected) - self.assertEqual(result.name, 'first') + assert result.name == 'first' result = self.index.get_level_values('first') expected = self.index.get_level_values(0) @@ -935,56 +1071,72 @@ def test_get_level_values(self): ['A', 'B']), CategoricalIndex([1, 2, 3])], labels=[np.array( [0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])]) exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) - self.assert_index_equal(index.get_level_values(0), exp) + tm.assert_index_equal(index.get_level_values(0), exp) exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) - self.assert_index_equal(index.get_level_values(1), exp) + tm.assert_index_equal(index.get_level_values(1), exp) - def test_get_level_values_na(self): + def test_get_level_values_int_with_na(self): + # GH 17924 arrays = [['a', 'b', 'b'], [1, np.nan, 2]] index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(1) - expected = np.array([1, np.nan, 2]) - tm.assert_numpy_array_equal(values.values.astype(float), expected) + result = index.get_level_values(1) + expected = Index([1, np.nan, 2]) + tm.assert_index_equal(result, expected) arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(1) - expected = np.array([np.nan, np.nan, 2]) - tm.assert_numpy_array_equal(values.values.astype(float), expected) + result = index.get_level_values(1) + expected = Index([np.nan, np.nan, 2]) + tm.assert_index_equal(result, expected) + def test_get_level_values_na(self): arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(0) - expected = np.array([np.nan, np.nan, np.nan]) - tm.assert_numpy_array_equal(values.values.astype(float), expected) - values = index.get_level_values(1) - expected = np.array(['a', np.nan, 1], dtype=object) - tm.assert_numpy_array_equal(values.values, expected) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan]) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = pd.Index(['a', np.nan, 1]) + tm.assert_index_equal(result, expected) arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(1) + result = index.get_level_values(1) expected = pd.DatetimeIndex([0, 1, pd.NaT]) - tm.assert_numpy_array_equal(values.values, expected.values) + tm.assert_index_equal(result, expected) arrays = [[], []] index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(0) - self.assertEqual(values.shape, (0, )) + result = index.get_level_values(0) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + + def test_get_level_values_all_na(self): + # GH 17924 when level entirely consists of nan + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = pd.Index(['a', np.nan, 1], dtype=object) + tm.assert_index_equal(result, expected) def test_reorder_levels(self): # this blows up - assertRaisesRegexp(IndexError, '^Too many levels', - self.index.reorder_levels, [2, 1, 0]) + tm.assert_raises_regex(IndexError, '^Too many levels', + self.index.reorder_levels, [2, 1, 0]) def test_nlevels(self): - self.assertEqual(self.index.nlevels, 2) + assert self.index.nlevels == 2 def test_iter(self): result = list(self.index) expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] - self.assertEqual(result, expected) + assert result == expected def test_legacy_pickle(self): if PY3: @@ -995,7 +1147,7 @@ def test_legacy_pickle(self): obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) - self.assertTrue(obj.equals(obj2)) + assert obj.equals(obj2) res = obj.get_indexer(obj) exp = np.arange(len(obj), dtype=np.intp) @@ -1014,7 +1166,7 @@ def test_legacy_v2_unpickle(self): obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) - self.assertTrue(obj.equals(obj2)) + assert obj.equals(obj2) res = obj.get_indexer(obj) exp = np.arange(len(obj), dtype=np.intp) @@ -1034,73 +1186,99 @@ def test_roundtrip_pickle_with_tz(self): [[1, 2], ['a', 'b'], date_range('20130101', periods=3, tz='US/Eastern') ], names=['one', 'two', 'three']) - unpickled = self.round_trip_pickle(index) - self.assertTrue(index.equal_levels(unpickled)) + unpickled = tm.round_trip_pickle(index) + assert index.equal_levels(unpickled) def test_from_tuples_index_values(self): result = MultiIndex.from_tuples(self.index) - self.assertTrue((result.values == self.index.values).all()) + assert (result.values == self.index.values).all() def test_contains(self): - self.assertIn(('foo', 'two'), self.index) - self.assertNotIn(('bar', 'two'), self.index) - self.assertNotIn(None, self.index) + assert ('foo', 'two') in self.index + assert ('bar', 'two') not in self.index + assert None not in self.index + + def test_contains_top_level(self): + midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) + assert 'A' in midx + assert 'A' not in midx._engine + + def test_contains_with_nat(self): + # MI with a NaT + mi = MultiIndex(levels=[['C'], + pd.date_range('2012-01-01', periods=5)], + labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, 'B']) + assert ('C', pd.Timestamp('2012-01-01')) in mi + for val in mi.values: + assert val in mi def test_is_all_dates(self): - self.assertFalse(self.index.is_all_dates) + assert not self.index.is_all_dates def test_is_numeric(self): # MultiIndex is never numeric - self.assertFalse(self.index.is_numeric()) + assert not self.index.is_numeric() def test_getitem(self): # scalar - self.assertEqual(self.index[2], ('bar', 'one')) + assert self.index[2] == ('bar', 'one') # slice result = self.index[2:5] expected = self.index[[2, 3, 4]] - self.assertTrue(result.equals(expected)) + assert result.equals(expected) # boolean result = self.index[[True, False, True, False, True, True]] result2 = self.index[np.array([True, False, True, False, True, True])] expected = self.index[[0, 2, 4, 5]] - self.assertTrue(result.equals(expected)) - self.assertTrue(result2.equals(expected)) + assert result.equals(expected) + assert result2.equals(expected) def test_getitem_group_select(self): sorted_idx, _ = self.index.sortlevel(0) - self.assertEqual(sorted_idx.get_loc('baz'), slice(3, 4)) - self.assertEqual(sorted_idx.get_loc('foo'), slice(0, 2)) + assert sorted_idx.get_loc('baz') == slice(3, 4) + assert sorted_idx.get_loc('foo') == slice(0, 2) def test_get_loc(self): - self.assertEqual(self.index.get_loc(('foo', 'two')), 1) - self.assertEqual(self.index.get_loc(('baz', 'two')), 3) - self.assertRaises(KeyError, self.index.get_loc, ('bar', 'two')) - self.assertRaises(KeyError, self.index.get_loc, 'quux') + assert self.index.get_loc(('foo', 'two')) == 1 + assert self.index.get_loc(('baz', 'two')) == 3 + pytest.raises(KeyError, self.index.get_loc, ('bar', 'two')) + pytest.raises(KeyError, self.index.get_loc, 'quux') - self.assertRaises(NotImplementedError, self.index.get_loc, 'foo', - method='nearest') + pytest.raises(NotImplementedError, self.index.get_loc, 'foo', + method='nearest') # 3 levels index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - self.assertRaises(KeyError, index.get_loc, (1, 1)) - self.assertEqual(index.get_loc((2, 0)), slice(3, 5)) + pytest.raises(KeyError, index.get_loc, (1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) def test_get_loc_duplicates(self): index = Index([2, 2, 2, 2]) result = index.get_loc(2) expected = slice(0, 4) - self.assertEqual(result, expected) - # self.assertRaises(Exception, index.get_loc, 2) + assert result == expected + # pytest.raises(Exception, index.get_loc, 2) index = Index(['c', 'a', 'a', 'b', 'b']) rs = index.get_loc('c') xp = 0 - assert (rs == xp) + assert rs == xp + + def test_get_value_duplicates(self): + index = MultiIndex(levels=[['D', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + + assert index.get_loc('D') == slice(0, 3) + with pytest.raises(KeyError): + index._engine.get_value(np.array([]), 'D') def test_get_loc_level(self): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( @@ -1110,22 +1288,73 @@ def test_get_loc_level(self): loc, new_index = index.get_loc_level((0, 1)) expected = slice(1, 2) exp_index = index[expected].droplevel(0).droplevel(0) - self.assertEqual(loc, expected) - self.assertTrue(new_index.equals(exp_index)) + assert loc == expected + assert new_index.equals(exp_index) loc, new_index = index.get_loc_level((0, 1, 0)) expected = 1 - self.assertEqual(loc, expected) - self.assertIsNone(new_index) + assert loc == expected + assert new_index is None - self.assertRaises(KeyError, index.get_loc_level, (2, 2)) + pytest.raises(KeyError, index.get_loc_level, (2, 2)) index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) result, new_index = index.get_loc_level((2000, slice(None, None))) expected = slice(None, None) - self.assertEqual(result, expected) - self.assertTrue(new_index.equals(index.droplevel(0))) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + @pytest.mark.parametrize('level', [0, 1]) + @pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None]) + def test_get_loc_nan(self, level, null_val): + # GH 18485 : NaN in MultiIndex + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + levels[level] = np.array([0, null_val], dtype=type(null_val)) + key[level] = null_val + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + def test_get_loc_missing_nan(self): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + pytest.raises(KeyError, idx.get_loc, 3) + pytest.raises(KeyError, idx.get_loc, np.nan) + pytest.raises(KeyError, idx.get_loc, [np.nan]) + + @pytest.mark.parametrize('dtype1', [int, float, bool, str]) + @pytest.mark.parametrize('dtype2', [int, float, bool, str]) + def test_get_loc_multiple_dtypes(self, dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), + np.array([0, 1]).astype(dtype2)] + idx = pd.MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + @pytest.mark.parametrize('level', [0, 1]) + @pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) + def test_get_loc_implicit_cast(self, level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + def test_get_loc_cast_bool(self): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype='int64')] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + pytest.raises(KeyError, idx.get_loc, (False, True)) + pytest.raises(KeyError, idx.get_loc, (True, False)) def test_slice_locs(self): df = tm.makeTimeDataFrame() @@ -1147,17 +1376,19 @@ def test_slice_locs_with_type_mismatch(self): df = tm.makeTimeDataFrame() stacked = df.stack() idx = stacked.index - assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, - (1, 3)) - assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, - df.index[5] + timedelta(seconds=30), (5, 2)) + tm.assert_raises_regex(TypeError, '^Level type mismatch', + idx.slice_locs, (1, 3)) + tm.assert_raises_regex(TypeError, '^Level type mismatch', + idx.slice_locs, + df.index[5] + timedelta( + seconds=30), (5, 2)) df = tm.makeCustomDataframe(5, 5) stacked = df.stack() idx = stacked.index - with assertRaisesRegexp(TypeError, '^Level type mismatch'): + with tm.assert_raises_regex(TypeError, '^Level type mismatch'): idx.slice_locs(timedelta(seconds=30)) # TODO: Try creating a UnicodeDecodeError in exception message - with assertRaisesRegexp(TypeError, '^Level type mismatch'): + with tm.assert_raises_regex(TypeError, '^Level type mismatch'): idx.slice_locs(df.index[1], (16, "a")) def test_slice_locs_not_sorted(self): @@ -1165,9 +1396,9 @@ def test_slice_locs_not_sorted(self): lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - assertRaisesRegexp(KeyError, "[Kk]ey length.*greater than MultiIndex" - " lexsort depth", index.slice_locs, (1, 0, 1), - (2, 1, 0)) + tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than " + "MultiIndex lexsort depth", + index.slice_locs, (1, 0, 1), (2, 1, 0)) # works sorted_index, _ = index.sortlevel(0) @@ -1178,16 +1409,16 @@ def test_slice_locs_partial(self): sorted_idx, _ = self.index.sortlevel(0) result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) - self.assertEqual(result, (1, 5)) + assert result == (1, 5) result = sorted_idx.slice_locs(None, ('qux', 'one')) - self.assertEqual(result, (0, 5)) + assert result == (0, 5) result = sorted_idx.slice_locs(('foo', 'two'), None) - self.assertEqual(result, (1, len(sorted_idx))) + assert result == (1, len(sorted_idx)) result = sorted_idx.slice_locs('bar', 'baz') - self.assertEqual(result, (2, 4)) + assert result == (2, 4) def test_slice_locs_not_contained(self): # some searchsorted action @@ -1197,22 +1428,22 @@ def test_slice_locs_not_contained(self): [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) result = index.slice_locs((1, 0), (5, 2)) - self.assertEqual(result, (3, 6)) + assert result == (3, 6) result = index.slice_locs(1, 5) - self.assertEqual(result, (3, 6)) + assert result == (3, 6) result = index.slice_locs((2, 2), (5, 2)) - self.assertEqual(result, (3, 6)) + assert result == (3, 6) result = index.slice_locs(2, 5) - self.assertEqual(result, (3, 6)) + assert result == (3, 6) result = index.slice_locs((1, 0), (6, 3)) - self.assertEqual(result, (3, 8)) + assert result == (3, 8) result = index.slice_locs(-1, 10) - self.assertEqual(result, (0, len(index))) + assert result == (0, len(index)) def test_consistency(self): # need to construct an overflow @@ -1232,7 +1463,7 @@ def test_consistency(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - self.assertFalse(index.is_unique) + assert not index.is_unique def test_truncate(self): major_axis = Index(lrange(4)) @@ -1245,18 +1476,18 @@ def test_truncate(self): labels=[major_labels, minor_labels]) result = index.truncate(before=1) - self.assertNotIn('foo', result.levels[0]) - self.assertIn(1, result.levels[0]) + assert 'foo' not in result.levels[0] + assert 1 in result.levels[0] result = index.truncate(after=1) - self.assertNotIn(2, result.levels[0]) - self.assertIn(1, result.levels[0]) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] result = index.truncate(before=1, after=2) - self.assertEqual(len(result.levels[0]), 2) + assert len(result.levels[0]) == 2 # after < before - self.assertRaises(ValueError, index.truncate, 3, 1) + pytest.raises(ValueError, index.truncate, 3, 1) def test_get_indexer(self): major_axis = Index(lrange(4)) @@ -1294,28 +1525,41 @@ def test_get_indexer(self): assert_almost_equal(r1, rbfill1) # pass non-MultiIndex - r1 = idx1.get_indexer(idx2._tuple_index) + r1 = idx1.get_indexer(idx2.values) rexp1 = idx1.get_indexer(idx2) assert_almost_equal(r1, rexp1) r1 = idx1.get_indexer([1, 2, 3]) - self.assertTrue((r1 == [-1, -1, -1]).all()) + assert (r1 == [-1, -1, -1]).all() # create index with duplicates idx1 = Index(lrange(10) + lrange(10)) idx2 = Index(lrange(20)) msg = "Reindexing only valid with uniquely valued Index objects" - with assertRaisesRegexp(InvalidIndexError, msg): + with tm.assert_raises_regex(InvalidIndexError, msg): idx1.get_indexer(idx2) def test_get_indexer_nearest(self): midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): midx.get_indexer(['a'], method='nearest') - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): midx.get_indexer(['a'], method='pad', tolerance=2) + def test_hash_collisions(self): + # non-smoke test that we don't get hash collisions + + index = MultiIndex.from_product([np.arange(1000), np.arange(1000)], + names=['one', 'two']) + result = index.get_indexer(index.values) + tm.assert_numpy_array_equal(result, np.arange( + len(index), dtype='intp')) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + assert result == i + def test_format(self): self.index.format() self.index[:0].format() @@ -1331,7 +1575,7 @@ def test_format_sparse_display(self): [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) result = index.format() - self.assertEqual(result[3], '1 0 0 0') + assert result[3] == '1 0 0 0' def test_format_sparse_config(self): warn_filters = warnings.filters @@ -1341,9 +1585,9 @@ def test_format_sparse_config(self): pd.set_option('display.multi_sparse', False) result = self.index.format() - self.assertEqual(result[1], 'foo two') + assert result[1] == 'foo two' - self.reset_display_options() + tm.reset_display_options() warnings.filters = warn_filters @@ -1392,7 +1636,7 @@ def test_to_hierarchical(self): labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) + assert result.names == index.names # K > 1 result = index.to_hierarchical(3, 2) @@ -1400,7 +1644,7 @@ def test_to_hierarchical(self): labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) + assert result.names == index.names # non-sorted index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'), @@ -1414,18 +1658,19 @@ def test_to_hierarchical(self): (2, 'b'), (2, 'b')], names=['N1', 'N2']) tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) + assert result.names == index.names def test_bounds(self): self.index._bounds def test_equals_multi(self): - self.assertTrue(self.index.equals(self.index)) - self.assertTrue(self.index.equal_levels(self.index)) - - self.assertFalse(self.index.equals(self.index[:-1])) + assert self.index.equals(self.index) + assert not self.index.equals(self.index.values) + assert self.index.equals(Index(self.index.values)) - self.assertTrue(self.index.equals(self.index._tuple_index)) + assert self.index.equal_levels(self.index) + assert not self.index.equals(self.index[:-1]) + assert not self.index.equals(self.index[-1]) # different number of levels index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( @@ -1433,8 +1678,8 @@ def test_equals_multi(self): [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) - self.assertFalse(index.equals(index2)) - self.assertFalse(index.equal_levels(index2)) + assert not index.equals(index2) + assert not index.equal_levels(index2) # levels are different major_axis = Index(lrange(4)) @@ -1445,8 +1690,8 @@ def test_equals_multi(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - self.assertFalse(self.index.equals(index)) - self.assertFalse(self.index.equal_levels(index)) + assert not self.index.equals(index) + assert not self.index.equal_levels(index) # some of the labels are different major_axis = Index(['foo', 'bar', 'baz', 'qux']) @@ -1457,52 +1702,63 @@ def test_equals_multi(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - self.assertFalse(self.index.equals(index)) + assert not self.index.equals(index) + + def test_equals_missing_values(self): + # make sure take is not using -1 + i = pd.MultiIndex.from_tuples([(0, pd.NaT), + (0, pd.Timestamp('20130101'))]) + result = i[0:1].equals(i[0]) + assert not result + result = i[1:2].equals(i[1]) + assert not result def test_identical(self): mi = self.index.copy() mi2 = self.index.copy() - self.assertTrue(mi.identical(mi2)) + assert mi.identical(mi2) mi = mi.set_names(['new1', 'new2']) - self.assertTrue(mi.equals(mi2)) - self.assertFalse(mi.identical(mi2)) + assert mi.equals(mi2) + assert not mi.identical(mi2) mi2 = mi2.set_names(['new1', 'new2']) - self.assertTrue(mi.identical(mi2)) + assert mi.identical(mi2) mi3 = Index(mi.tolist(), names=mi.names) mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) - self.assertTrue(mi.identical(mi3)) - self.assertFalse(mi.identical(mi4)) - self.assertTrue(mi.equals(mi4)) + assert mi.identical(mi3) + assert not mi.identical(mi4) + assert mi.equals(mi4) def test_is_(self): mi = MultiIndex.from_tuples(lzip(range(10), range(10))) - self.assertTrue(mi.is_(mi)) - self.assertTrue(mi.is_(mi.view())) - self.assertTrue(mi.is_(mi.view().view().view().view())) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) mi2 = mi.view() # names are metadata, they don't change id mi2.names = ["A", "B"] - self.assertTrue(mi2.is_(mi)) - self.assertTrue(mi.is_(mi2)) + assert mi2.is_(mi) + assert mi.is_(mi2) - self.assertTrue(mi.is_(mi.set_names(["C", "D"]))) + assert mi.is_(mi.set_names(["C", "D"])) mi2 = mi.view() mi2.set_names(["E", "F"], inplace=True) - self.assertTrue(mi.is_(mi2)) + assert mi.is_(mi2) # levels are inherent properties, they change identity mi3 = mi2.set_levels([lrange(10), lrange(10)]) - self.assertFalse(mi3.is_(mi2)) + assert not mi3.is_(mi2) # shouldn't change - self.assertTrue(mi2.is_(mi)) + assert mi2.is_(mi) mi4 = mi3.view() - mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True) - self.assertFalse(mi4.is_(mi3)) + + # GH 17464 - Remove duplicate MultiIndex levels + mi4.set_levels([lrange(10), lrange(10)], inplace=True) + assert not mi4.is_(mi3) mi5 = mi.view() mi5.set_levels(mi5.levels, inplace=True) - self.assertFalse(mi5.is_(mi)) + assert not mi5.is_(mi) def test_union(self): piece1 = self.index[:5][::-1] @@ -1510,69 +1766,69 @@ def test_union(self): the_union = piece1 | piece2 - tups = sorted(self.index._tuple_index) + tups = sorted(self.index.values) expected = MultiIndex.from_tuples(tups) - self.assertTrue(the_union.equals(expected)) + assert the_union.equals(expected) # corner case, pass self or empty thing: the_union = self.index.union(self.index) - self.assertIs(the_union, self.index) + assert the_union is self.index the_union = self.index.union(self.index[:0]) - self.assertIs(the_union, self.index) + assert the_union is self.index # won't work in python 3 - # tuples = self.index._tuple_index + # tuples = self.index.values # result = self.index[:4] | tuples[4:] - # self.assertTrue(result.equals(tuples)) + # assert result.equals(tuples) # not valid for python 3 # def test_union_with_regular_index(self): # other = Index(['A', 'B', 'C']) # result = other.union(self.index) - # self.assertIn(('foo', 'one'), result) - # self.assertIn('B', result) + # assert ('foo', 'one') in result + # assert 'B' in result # result2 = self.index.union(other) - # self.assertTrue(result.equals(result2)) + # assert result.equals(result2) def test_intersection(self): piece1 = self.index[:5][::-1] piece2 = self.index[3:] the_int = piece1 & piece2 - tups = sorted(self.index[3:5]._tuple_index) + tups = sorted(self.index[3:5].values) expected = MultiIndex.from_tuples(tups) - self.assertTrue(the_int.equals(expected)) + assert the_int.equals(expected) # corner case, pass self the_int = self.index.intersection(self.index) - self.assertIs(the_int, self.index) + assert the_int is self.index # empty intersection: disjoint empty = self.index[:2] & self.index[2:] expected = self.index[:0] - self.assertTrue(empty.equals(expected)) + assert empty.equals(expected) # can't do in python 3 - # tuples = self.index._tuple_index + # tuples = self.index.values # result = self.index & tuples - # self.assertTrue(result.equals(tuples)) + # assert result.equals(tuples) def test_sub(self): first = self.index # - now raises (previously was set op difference) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): first - self.index[-3:] - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.index[-3:] - first - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.index[-3:] - first.tolist() - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): first.tolist() - self.index[-3:] def test_difference(self): @@ -1583,66 +1839,95 @@ def test_difference(self): sortorder=0, names=self.index.names) - tm.assertIsInstance(result, MultiIndex) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.names, self.index.names) + assert isinstance(result, MultiIndex) + assert result.equals(expected) + assert result.names == self.index.names # empty difference: reflexive result = self.index.difference(self.index) expected = self.index[:0] - self.assertTrue(result.equals(expected)) - self.assertEqual(result.names, self.index.names) + assert result.equals(expected) + assert result.names == self.index.names # empty difference: superset result = self.index[-3:].difference(self.index) expected = self.index[:0] - self.assertTrue(result.equals(expected)) - self.assertEqual(result.names, self.index.names) + assert result.equals(expected) + assert result.names == self.index.names # empty difference: degenerate result = self.index[:0].difference(self.index) expected = self.index[:0] - self.assertTrue(result.equals(expected)) - self.assertEqual(result.names, self.index.names) + assert result.equals(expected) + assert result.names == self.index.names # names not the same chunklet = self.index[-3:] chunklet.names = ['foo', 'baz'] result = first.difference(chunklet) - self.assertEqual(result.names, (None, None)) + assert result.names == (None, None) # empty, but non-equal result = self.index.difference(self.index.sortlevel(1)[0]) - self.assertEqual(len(result), 0) + assert len(result) == 0 # raise Exception called with non-MultiIndex - result = first.difference(first._tuple_index) - self.assertTrue(result.equals(first[:0])) + result = first.difference(first.values) + assert result.equals(first[:0]) # name from empty array result = first.difference([]) - self.assertTrue(first.equals(result)) - self.assertEqual(first.names, result.names) + assert first.equals(result) + assert first.names == result.names # name from non-empty array result = first.difference([('foo', 'one')]) expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) expected.names = first.names - self.assertEqual(first.names, result.names) - assertRaisesRegexp(TypeError, "other must be a MultiIndex or a list" - " of tuples", first.difference, [1, 2, 3, 4, 5]) + assert first.names == result.names + tm.assert_raises_regex(TypeError, "other must be a MultiIndex " + "or a list of tuples", + first.difference, [1, 2, 3, 4, 5]) def test_from_tuples(self): - assertRaisesRegexp(TypeError, 'Cannot infer number of levels from' - ' empty list', MultiIndex.from_tuples, []) + tm.assert_raises_regex(TypeError, 'Cannot infer number of levels ' + 'from empty list', + MultiIndex.from_tuples, []) + + expected = MultiIndex(levels=[[1, 3], [2, 4]], + labels=[[0, 1], [0, 1]], + names=['a', 'b']) + + # input tuples + result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) + tm.assert_index_equal(result, expected) + + def test_from_tuples_iterator(self): + # GH 18434 + # input iterator for tuples + expected = MultiIndex(levels=[[1, 3], [2, 4]], + labels=[[0, 1], [0, 1]], + names=['a', 'b']) + + result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b']) + tm.assert_index_equal(result, expected) - idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) - self.assertEqual(len(idx), 2) + # input non-iterables + with tm.assert_raises_regex( + TypeError, 'Input must be a list / sequence of tuple-likes.'): + MultiIndex.from_tuples(0) + + def test_from_tuples_empty(self): + # GH 16777 + result = MultiIndex.from_tuples([], names=['a', 'b']) + expected = MultiIndex.from_arrays(arrays=[[], []], + names=['a', 'b']) + tm.assert_index_equal(result, expected) def test_argsort(self): result = self.index.argsort() - expected = self.index._tuple_index.argsort() + expected = self.index.values.argsort() tm.assert_numpy_array_equal(result, expected) def test_sortlevel(self): @@ -1655,23 +1940,23 @@ def test_sortlevel(self): sorted_idx, _ = index.sortlevel(0) expected = MultiIndex.from_tuples(sorted(tuples)) - self.assertTrue(sorted_idx.equals(expected)) + assert sorted_idx.equals(expected) sorted_idx, _ = index.sortlevel(0, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) + assert sorted_idx.equals(expected[::-1]) sorted_idx, _ = index.sortlevel(1) by1 = sorted(tuples, key=lambda x: (x[1], x[0])) expected = MultiIndex.from_tuples(by1) - self.assertTrue(sorted_idx.equals(expected)) + assert sorted_idx.equals(expected) sorted_idx, _ = index.sortlevel(1, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) + assert sorted_idx.equals(expected[::-1]) def test_sortlevel_not_sort_remaining(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) - self.assertTrue(sorted_idx.equals(mi)) + assert sorted_idx.equals(mi) def test_sortlevel_deterministic(self): tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), @@ -1681,18 +1966,18 @@ def test_sortlevel_deterministic(self): sorted_idx, _ = index.sortlevel(0) expected = MultiIndex.from_tuples(sorted(tuples)) - self.assertTrue(sorted_idx.equals(expected)) + assert sorted_idx.equals(expected) sorted_idx, _ = index.sortlevel(0, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) + assert sorted_idx.equals(expected[::-1]) sorted_idx, _ = index.sortlevel(1) by1 = sorted(tuples, key=lambda x: (x[1], x[0])) expected = MultiIndex.from_tuples(by1) - self.assertTrue(sorted_idx.equals(expected)) + assert sorted_idx.equals(expected) sorted_idx, _ = index.sortlevel(1, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) + assert sorted_idx.equals(expected[::-1]) def test_dims(self): pass @@ -1704,66 +1989,66 @@ def test_drop(self): dropped2 = self.index.drop(index) expected = self.index[[0, 2, 3, 5]] - self.assert_index_equal(dropped, expected) - self.assert_index_equal(dropped2, expected) + tm.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped2, expected) dropped = self.index.drop(['bar']) expected = self.index[[0, 1, 3, 4, 5]] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) dropped = self.index.drop('foo') expected = self.index[[2, 3, 4, 5]] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) index = MultiIndex.from_tuples([('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, [('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, index) - self.assertRaises(KeyError, self.index.drop, ['foo', 'two']) + pytest.raises(KeyError, self.index.drop, [('bar', 'two')]) + pytest.raises(KeyError, self.index.drop, index) + pytest.raises(KeyError, self.index.drop, ['foo', 'two']) # partially correct argument mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, mixed_index) + pytest.raises(KeyError, self.index.drop, mixed_index) # error='ignore' dropped = self.index.drop(index, errors='ignore') expected = self.index[[0, 1, 2, 3, 4, 5]] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) dropped = self.index.drop(mixed_index, errors='ignore') expected = self.index[[0, 1, 2, 3, 5]] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) dropped = self.index.drop(['foo', 'two'], errors='ignore') expected = self.index[[2, 3, 4, 5]] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) # mixed partial / full drop dropped = self.index.drop(['foo', ('qux', 'one')]) expected = self.index[[2, 3, 5]] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) # mixed partial / full drop / error='ignore' mixed_index = ['foo', ('qux', 'one'), 'two'] - self.assertRaises(KeyError, self.index.drop, mixed_index) + pytest.raises(KeyError, self.index.drop, mixed_index) dropped = self.index.drop(mixed_index, errors='ignore') expected = self.index[[2, 3, 5]] - self.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped, expected) def test_droplevel_with_names(self): index = self.index[self.index.get_loc('foo')] dropped = index.droplevel(0) - self.assertEqual(dropped.name, 'second') + assert dropped.name == 'second' index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], names=['one', 'two', 'three']) dropped = index.droplevel(0) - self.assertEqual(dropped.names, ('two', 'three')) + assert dropped.names == ('two', 'three') dropped = index.droplevel('two') expected = index.droplevel(1) - self.assertTrue(dropped.equals(expected)) + assert dropped.equals(expected) def test_droplevel_multiple(self): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( @@ -1773,7 +2058,7 @@ def test_droplevel_multiple(self): dropped = index[:2].droplevel(['three', 'one']) expected = index[:2].droplevel(2).droplevel(0) - self.assertTrue(dropped.equals(expected)) + assert dropped.equals(expected) def test_drop_not_lexsorted(self): # GH 12078 @@ -1781,7 +2066,7 @@ def test_drop_not_lexsorted(self): # define the lexsorted version of the multi-index tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')] lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c']) - self.assertTrue(lexsorted_mi.is_lexsorted()) + assert lexsorted_mi.is_lexsorted() # and the not-lexsorted version df = pd.DataFrame(columns=['a', 'b', 'c', 'd'], @@ -1789,19 +2074,19 @@ def test_drop_not_lexsorted(self): df = df.pivot_table(index='a', columns=['b', 'c'], values='d') df = df.reset_index() not_lexsorted_mi = df.columns - self.assertFalse(not_lexsorted_mi.is_lexsorted()) + assert not not_lexsorted_mi.is_lexsorted() # compare the results - self.assert_index_equal(lexsorted_mi, not_lexsorted_mi) - with self.assert_produces_warning(PerformanceWarning): - self.assert_index_equal(lexsorted_mi.drop('a'), - not_lexsorted_mi.drop('a')) + tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_index_equal(lexsorted_mi.drop('a'), + not_lexsorted_mi.drop('a')) def test_insert(self): # key contained in all levels new_index = self.index.insert(0, ('bar', 'two')) - self.assertTrue(new_index.equal_levels(self.index)) - self.assertEqual(new_index[0], ('bar', 'two')) + assert new_index.equal_levels(self.index) + assert new_index[0] == ('bar', 'two') # key not contained in all levels new_index = self.index.insert(0, ('abc', 'three')) @@ -1811,11 +2096,11 @@ def test_insert(self): exp1 = Index(list(self.index.levels[1]) + ['three'], name='second') tm.assert_index_equal(new_index.levels[1], exp1) - self.assertEqual(new_index[0], ('abc', 'three')) + assert new_index[0] == ('abc', 'three') # key wrong length msg = "Item must have length equal to number of levels" - with assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.index.insert(0, ('foo2', )) left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], @@ -1865,7 +2150,7 @@ def test_insert(self): def test_take_preserve_name(self): taken = self.index.take([3, 0, 1]) - self.assertEqual(taken.names, self.index.names) + assert taken.names == self.index.names def test_take_fill_value(self): # GH 12631 @@ -1899,12 +2184,12 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def take_invalid_kwargs(self): @@ -1914,16 +2199,16 @@ def take_invalid_kwargs(self): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') def test_join_level(self): def _check_how(other, how): @@ -1932,8 +2217,8 @@ def _check_how(other, how): return_indexers=True) exp_level = other.join(self.index.levels[1], how=how) - self.assertTrue(join_index.levels[0].equals(self.index.levels[0])) - self.assertTrue(join_index.levels[1].equals(exp_level)) + assert join_index.levels[0].equals(self.index.levels[0]) + assert join_index.levels[1].equals(exp_level) # pare down levels mask = np.array( @@ -1946,7 +2231,7 @@ def _check_how(other, how): self.index.join(other, how=how, level='second', return_indexers=True) - self.assertTrue(join_index.equals(join_index2)) + assert join_index.equals(join_index2) tm.assert_numpy_array_equal(lidx, lidx2) tm.assert_numpy_array_equal(ridx, ridx2) tm.assert_numpy_array_equal(join_index2.values, exp_values) @@ -1964,17 +2249,17 @@ def _check_all(other): # some corner cases idx = Index(['three', 'one', 'two']) result = idx.join(self.index, level='second') - tm.assertIsInstance(result, MultiIndex) + assert isinstance(result, MultiIndex) - assertRaisesRegexp(TypeError, "Join.*MultiIndex.*ambiguous", - self.index.join, self.index, level=1) + tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", + self.index.join, self.index, level=1) def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' for kind in kinds: res = self.index joined = res.join(res, how=kind) - self.assertIs(res, joined) + assert res is joined def test_join_multi(self): # GH 10665 @@ -1988,36 +2273,36 @@ def test_join_multi(self): [np.arange(4), [1, 2]], names=['a', 'b']) exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) - self.assert_index_equal(jidx, exp_idx) - self.assert_numpy_array_equal(lidx, exp_lidx) - self.assert_numpy_array_equal(ridx, exp_ridx) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) # flip jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True) - self.assert_index_equal(jidx, exp_idx) - self.assert_numpy_array_equal(lidx, exp_lidx) - self.assert_numpy_array_equal(ridx, exp_ridx) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) # keep MultiIndex jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp) - self.assert_index_equal(jidx, midx) - self.assertIsNone(lidx) - self.assert_numpy_array_equal(ridx, exp_ridx) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) # flip jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True) - self.assert_index_equal(jidx, midx) - self.assertIsNone(lidx) - self.assert_numpy_array_equal(ridx, exp_ridx) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) def test_reindex(self): result, indexer = self.index.reindex(list(self.index[:4])) - tm.assertIsInstance(result, MultiIndex) + assert isinstance(result, MultiIndex) self.check_level_names(result, self.index[:4].names) result, indexer = self.index.reindex(list(self.index)) - tm.assertIsInstance(result, MultiIndex) - self.assertIsNone(indexer) + assert isinstance(result, MultiIndex) + assert indexer is None self.check_level_names(result, self.index.names) def test_reindex_level(self): @@ -2029,28 +2314,29 @@ def test_reindex_level(self): exp_index = self.index.join(idx, level='second', how='right') exp_index2 = self.index.join(idx, level='second', how='left') - self.assertTrue(target.equals(exp_index)) + assert target.equals(exp_index) exp_indexer = np.array([0, 2, 4]) tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) - self.assertTrue(target2.equals(exp_index2)) + assert target2.equals(exp_index2) exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) - assertRaisesRegexp(TypeError, "Fill method not supported", - self.index.reindex, self.index, method='pad', - level='second') + tm.assert_raises_regex(TypeError, "Fill method not supported", + self.index.reindex, self.index, + method='pad', level='second') - assertRaisesRegexp(TypeError, "Fill method not supported", idx.reindex, - idx, method='bfill', level='first') + tm.assert_raises_regex(TypeError, "Fill method not supported", + idx.reindex, idx, method='bfill', + level='first') def test_duplicates(self): - self.assertFalse(self.index.has_duplicates) - self.assertTrue(self.index.append(self.index).has_duplicates) + assert not self.index.has_duplicates + assert self.index.append(self.index).has_duplicates index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - self.assertTrue(index.has_duplicates) + assert index.has_duplicates # GH 9075 t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), @@ -2073,7 +2359,7 @@ def test_duplicates(self): (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] index = pd.MultiIndex.from_tuples(t) - self.assertFalse(index.has_duplicates) + assert not index.has_duplicates # handle int64 overflow if possible def check(nlevels, with_nulls): @@ -2082,7 +2368,7 @@ def check(nlevels, with_nulls): if with_nulls: # inject some null values labels[500] = -1 # common nan value - labels = list(labels.copy() for i in range(nlevels)) + labels = [labels.copy() for i in range(nlevels)] for i in range(nlevels): labels[i][500 + i - nlevels // 2] = -1 @@ -2094,7 +2380,7 @@ def check(nlevels, with_nulls): # no dups index = MultiIndex(levels=levels, labels=labels) - self.assertFalse(index.has_duplicates) + assert not index.has_duplicates # with a dup if with_nulls: @@ -2105,7 +2391,7 @@ def check(nlevels, with_nulls): values = index.values.tolist() index = MultiIndex.from_tuples(values + [values[0]]) - self.assertTrue(index.has_duplicates) + assert index.has_duplicates # no overflow check(4, False) @@ -2123,14 +2409,14 @@ def check(nlevels, with_nulls): for keep in ['first', 'last', False]: left = mi.duplicated(keep=keep) - right = pd.hashtable.duplicated_object(mi.values, keep=keep) + right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) tm.assert_numpy_array_equal(left, right) # GH5873 for a in [101, 102]: mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) - self.assertFalse(mi.has_duplicates) - self.assertEqual(mi.get_duplicates(), []) + assert not mi.has_duplicates + assert mi.get_duplicates() == [] tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( 2, dtype='bool')) @@ -2140,9 +2426,9 @@ def check(nlevels, with_nulls): lab = product(range(-1, n), range(-1, m)) mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], labels=np.random.permutation(list(lab)).T) - self.assertEqual(len(mi), (n + 1) * (m + 1)) - self.assertFalse(mi.has_duplicates) - self.assertEqual(mi.get_duplicates(), []) + assert len(mi) == (n + 1) * (m + 1) + assert not mi.has_duplicates + assert mi.get_duplicates() == [] tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( len(mi), dtype='bool')) @@ -2154,8 +2440,8 @@ def test_duplicate_meta_data(self): index.set_names([None, None]), index.set_names([None, 'Num']), index.set_names(['Upper', 'Num']), ]: - self.assertTrue(idx.has_duplicates) - self.assertEqual(idx.drop_duplicates().names, idx.names) + assert idx.has_duplicates + assert idx.drop_duplicates().names == idx.names def test_get_unique_index(self): idx = self.index[[0, 1, 0, 1, 1, 0, 0]] @@ -2163,8 +2449,8 @@ def test_get_unique_index(self): for dropna in [False, True]: result = idx._get_unique_index(dropna=dropna) - self.assertTrue(result.unique) - self.assert_index_equal(result, expected) + assert result.unique + tm.assert_index_equal(result, expected) def test_unique(self): mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]]) @@ -2183,6 +2469,20 @@ def test_unique(self): exp = pd.MultiIndex.from_arrays([['a'], ['a']]) tm.assert_index_equal(res, exp) + @pytest.mark.parametrize('level', [0, 'first', 1, 'second']) + def test_unique_level(self, level): + # GH #17896 - with level= argument + result = self.index.unique(level=level) + expected = self.index.get_level_values(level).unique() + tm.assert_index_equal(result, expected) + + # With already unique level + mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], + names=['first', 'second']) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + def test_unique_datetimelike(self): idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', '2015-01-01', 'NaT', 'NaT']) @@ -2201,14 +2501,13 @@ def test_unique_datetimelike(self): def test_tolist(self): result = self.index.tolist() exp = list(self.index.values) - self.assertEqual(result, exp) + assert result == exp def test_repr_with_unicode_data(self): with pd.core.config.option_context("display.encoding", 'UTF-8'): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} index = pd.DataFrame(d).set_index(["a", "b"]).index - self.assertFalse("\\u" in repr(index) - ) # we don't want unicode-escaped + assert "\\u" not in repr(index) # we don't want unicode-escaped def test_repr_roundtrip(self): @@ -2222,10 +2521,8 @@ def test_repr_roundtrip(self): result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) - self.assertEqual( - mi.get_level_values('first').inferred_type, 'string') - self.assertEqual( - result.get_level_values('first').inferred_type, 'unicode') + assert mi.get_level_values('first').inferred_type == 'string' + assert result.get_level_values('first').inferred_type == 'unicode' mi_u = MultiIndex.from_product( [list(u'ab'), range(3)], names=['first', 'second']) @@ -2241,7 +2538,6 @@ def test_repr_roundtrip(self): # long format mi = MultiIndex.from_product([list('abcdefg'), range(10)], names=['first', 'second']) - result = str(mi) if PY3: tm.assert_index_equal(eval(repr(mi)), mi, exact=True) @@ -2249,13 +2545,9 @@ def test_repr_roundtrip(self): result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) - self.assertEqual( - mi.get_level_values('first').inferred_type, 'string') - self.assertEqual( - result.get_level_values('first').inferred_type, 'unicode') + assert mi.get_level_values('first').inferred_type == 'string' + assert result.get_level_values('first').inferred_type == 'unicode' - mi = MultiIndex.from_product( - [list(u'abcdefg'), range(10)], names=['first', 'second']) result = eval(repr(mi_u)) tm.assert_index_equal(result, mi_u, exact=True) @@ -2284,25 +2576,288 @@ def test_bytestring_with_unicode(self): def test_slice_keep_name(self): x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')], names=['x', 'y']) - self.assertEqual(x[1:].names, x.names) + assert x[1:].names == x.names - def test_isnull_behavior(self): + def test_isna_behavior(self): # should not segfault GH5123 # NOTE: if MI representation changes, may make sense to allow - # isnull(MI) - with tm.assertRaises(NotImplementedError): - pd.isnull(self.index) + # isna(MI) + with pytest.raises(NotImplementedError): + pd.isna(self.index) def test_level_setting_resets_attributes(self): - ind = MultiIndex.from_arrays([ + ind = pd.MultiIndex.from_arrays([ ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic - ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], - inplace=True) + ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. assert not ind.is_monotonic + def test_is_monotonic_increasing(self): + i = MultiIndex.from_product([np.arange(10), + np.arange(10)], names=['one', 'two']) + assert i.is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([np.arange(10, 0, -1), + np.arange(10)], names=['one', 'two']) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([np.arange(10), + np.arange(10, 0, -1)], + names=['one', 'two']) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + # string ordering + i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert not i.is_monotonic + assert not Index(i.values).is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], + ['mom', 'next', 'zenith']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert i.is_monotonic + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values)._is_strictly_monotonic_increasing + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237', + 'nl0000289783', + 'nl0000289965', 'nl0000301109']], + labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=['household_id', 'asset_id']) + + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values)._is_strictly_monotonic_increasing + + def test_is_monotonic_decreasing(self): + i = MultiIndex.from_product([np.arange(9, -1, -1), + np.arange(9, -1, -1)], + names=['one', 'two']) + assert i.is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([np.arange(10), + np.arange(10, 0, -1)], + names=['one', 'two']) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([np.arange(10, 0, -1), + np.arange(10)], names=['one', 'two']) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']]) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + # string ordering + i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], + ['three', 'two', 'one']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert not i.is_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], + ['zenith', 'next', 'mom']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert i.is_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values)._is_strictly_monotonic_decreasing + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965', + 'nl0000289783', 'lu0197800237', + 'gb00b03mlx29']], + labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=['household_id', 'asset_id']) + + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values)._is_strictly_monotonic_decreasing + + def test_is_strictly_monotonic_increasing(self): + idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']], + labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + assert idx.is_monotonic_increasing + assert not idx._is_strictly_monotonic_increasing + + def test_is_strictly_monotonic_decreasing(self): + idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']], + labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + assert idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing + + def test_reconstruct_sort(self): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert mi.is_lexsorted() + assert mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert recons.is_lexsorted() + assert recons.is_monotonic + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + def test_reconstruct_remove_unused(self): + # xref to GH 2770 + df = DataFrame([['deleteMe', 1, 9], + ['keepMe', 2, 9], + ['keepMeToo', 3, 9]], + columns=['first', 'second', 'third']) + df2 = df.set_index(['first', 'second'], drop=False) + df2 = df2[df2['first'] != 'deleteMe'] + + # removed levels are there + expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], + [1, 2, 3]], + labels=[[1, 2], [1, 2]], + names=['first', 'second']) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], + [2, 3]], + labels=[[0, 1], [0, 1]], + names=['first', 'second']) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2.is_(result) + + @pytest.mark.parametrize('level0', [['a', 'd', 'b'], + ['a', 'd', 'b', 'unused']]) + @pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'], + ['w', 'x', 'y', 'z', 'unused']]) + def test_remove_unused_nan(self, level0, level1): + # GH 18417 + mi = pd.MultiIndex(levels=[level0, level1], + labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) + + result = mi.remove_unused_levels() + tm.assert_index_equal(result, mi) + for level in 0, 1: + assert('unused' not in result.levels[level]) + + @pytest.mark.parametrize('first_type,second_type', [ + ('int64', 'int64'), + ('datetime64[D]', 'str')]) + def test_remove_unused_levels_large(self, first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.RandomState(4) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame(dict( + first=rng.randint(0, 1 << 13, size).astype(first_type), + second=rng.randint(0, 1 << 10, size).astype(second_type), + third=rng.rand(size))) + df = df.groupby(['first', 'second']).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(['first', 'second']).index + tm.assert_index_equal(result, expected) + def test_isin(self): values = [('foo', 2), ('bar', 3), ('quux', 4)] @@ -2315,16 +2870,25 @@ def test_isin(self): # empty, return dtype bool idx = MultiIndex.from_arrays([[], []]) result = idx.isin(values) - self.assertEqual(len(result), 0) - self.assertEqual(result.dtype, np.bool_) + assert len(result) == 0 + assert result.dtype == np.bool_ - def test_isin_nan(self): + @pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") + def test_isin_nan_not_pypy(self): idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), np.array([False, False])) tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), np.array([False, False])) + @pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") + def test_isin_nan_pypy(self): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, True])) + def test_isin_level_kwarg(self): idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( 4)]) @@ -2339,18 +2903,18 @@ def test_isin_level_kwarg(self): tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) - self.assertRaises(IndexError, idx.isin, vals_0, level=5) - self.assertRaises(IndexError, idx.isin, vals_0, level=-5) + pytest.raises(IndexError, idx.isin, vals_0, level=5) + pytest.raises(IndexError, idx.isin, vals_0, level=-5) - self.assertRaises(KeyError, idx.isin, vals_0, level=1.0) - self.assertRaises(KeyError, idx.isin, vals_1, level=-1.0) - self.assertRaises(KeyError, idx.isin, vals_1, level='A') + pytest.raises(KeyError, idx.isin, vals_0, level=1.0) + pytest.raises(KeyError, idx.isin, vals_1, level=-1.0) + pytest.raises(KeyError, idx.isin, vals_1, level='A') idx.names = ['A', 'B'] tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) - self.assertRaises(KeyError, idx.isin, vals_1, level='C') + pytest.raises(KeyError, idx.isin, vals_1, level='C') def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): # GH6552 @@ -2361,39 +2925,33 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) # list & ndarray cases - self.assertEqual(idx.reindex([])[0].names, [None, None]) - self.assertEqual(idx.reindex(np.array([]))[0].names, [None, None]) - self.assertEqual(idx.reindex(target.tolist())[0].names, [None, None]) - self.assertEqual(idx.reindex(target.values)[0].names, [None, None]) - self.assertEqual( - idx.reindex(other_dtype.tolist())[0].names, [None, None]) - self.assertEqual( - idx.reindex(other_dtype.values)[0].names, [None, None]) + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] idx.names = ['foo', 'bar'] - self.assertEqual(idx.reindex([])[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(np.array([]))[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(target.tolist())[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(target.values)[0].names, ['foo', 'bar']) - self.assertEqual( - idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar']) - self.assertEqual( - idx.reindex(other_dtype.values)[0].names, ['foo', 'bar']) + assert idx.reindex([])[0].names == ['foo', 'bar'] + assert idx.reindex(np.array([]))[0].names == ['foo', 'bar'] + assert idx.reindex(target.tolist())[0].names == ['foo', 'bar'] + assert idx.reindex(target.values)[0].names == ['foo', 'bar'] + assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar'] + assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar'] def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self): # GH7774 idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], names=['foo', 'bar']) - self.assertEqual(idx.reindex([], level=0)[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex([], level=1)[0].names, ['foo', 'bar']) + assert idx.reindex([], level=0)[0].names == ['foo', 'bar'] + assert idx.reindex([], level=1)[0].names == ['foo', 'bar'] def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self): # GH7774 idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - self.assertEqual(idx.reindex([], level=0)[0].levels[0].dtype.type, - np.int64) - self.assertEqual(idx.reindex([], level=1)[0].levels[1].dtype.type, - np.object_) + assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 + assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ def test_groupby(self): groups = self.index.groupby(np.array([1, 1, 1, 2, 2, 2])) @@ -2403,7 +2961,7 @@ def test_groupby(self): # GH5620 groups = self.index.groupby(self.index) - exp = dict((key, [key]) for key in self.index) + exp = {key: [key] for key in self.index} tm.assert_dict_equal(groups, exp) def test_index_name_retained(self): @@ -2421,23 +2979,23 @@ def test_index_name_retained(self): def test_equals_operator(self): # GH9785 - self.assertTrue((self.index == self.index).all()) + assert (self.index == self.index).all() def test_large_multiindex_error(self): # GH12527 df_below_1000000 = pd.DataFrame( 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), columns=['dest']) - with assertRaises(KeyError): + with pytest.raises(KeyError): df_below_1000000.loc[(-1, 0), 'dest'] - with assertRaises(KeyError): + with pytest.raises(KeyError): df_below_1000000.loc[(3, 0), 'dest'] df_above_1000000 = pd.DataFrame( 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), columns=['dest']) - with assertRaises(KeyError): + with pytest.raises(KeyError): df_above_1000000.loc[(-1, 0), 'dest'] - with assertRaises(KeyError): + with pytest.raises(KeyError): df_above_1000000.loc[(3, 0), 'dest'] def test_partial_string_timestamp_multiindex(self): @@ -2490,7 +3048,7 @@ def test_partial_string_timestamp_multiindex(self): # ambiguous and we don't want to extend this behavior forward to work # in multi-indexes. This would amount to selecting a scalar from a # column. - with assertRaises(KeyError): + with pytest.raises(KeyError): df['2016-01-01'] # partial string match on year only @@ -2519,7 +3077,7 @@ def test_partial_string_timestamp_multiindex(self): tm.assert_frame_equal(result, expected) # Slicing date on first level should break (of course) - with assertRaises(KeyError): + with pytest.raises(KeyError): df_swap.loc['2016-01-01'] # GH12685 (partial string with daily resolution or below) @@ -2572,7 +3130,7 @@ def test_dropna(self): tm.assert_index_equal(idx.dropna(how='all'), exp) msg = "invalid how option: xxx" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.dropna(how='xxx') def test_unsortedindex(self): @@ -2583,19 +3141,92 @@ def test_unsortedindex(self): df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, columns=['one', 'two']) - with assertRaises(UnsortedIndexError): - df.loc(axis=0)['z', :] + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)['z', 'a'] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + + with pytest.raises(UnsortedIndexError): + df.loc(axis=0)['z', slice('a')] df.sort_index(inplace=True) - self.assertEqual(len(df.loc(axis=0)['z', :]), 2) + assert len(df.loc(axis=0)['z', :]) == 2 - with assertRaises(KeyError): + with pytest.raises(KeyError): df.loc(axis=0)['q', :] + def test_unsortedindex_doc_examples(self): + # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa + dfm = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}) + + dfm = dfm.set_index(['jim', 'joe']) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, 'z')] + + with pytest.raises(UnsortedIndexError): + dfm.loc[(0, 'y'):(1, 'z')] + + assert not dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, 'z')] + dfm.loc[(0, 'y'):(1, 'z')] + + assert dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 2 + def test_tuples_with_name_string(self): # GH 15110 and GH 14848 li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] - with assertRaises(ValueError): + with pytest.raises(ValueError): pd.Index(li, name='abc') - with assertRaises(ValueError): + with pytest.raises(ValueError): pd.Index(li, name='a') + + def test_nan_stays_float(self): + + # GH 7031 + idx0 = pd.MultiIndex(levels=[["A", "B"], []], + labels=[[1, 0], [-1, -1]], + names=[0, 1]) + idx1 = pd.MultiIndex(levels=[["C"], ["D"]], + labels=[[0], [0]], + names=[0, 1]) + idxm = idx0.join(idx1, how='outer') + assert pd.isna(idx0.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(idxm.get_level_values(1)[:-1]).all() + + df0 = pd.DataFrame([[1, 2]], index=idx0) + df1 = pd.DataFrame([[3, 4]], index=idx1) + dfm = df0 - df1 + assert pd.isna(df0.index.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() + + def test_million_record_attribute_error(self): + # GH 18165 + r = list(range(1000000)) + df = pd.DataFrame({'a': r, 'b': r}, + index=pd.MultiIndex.from_tuples([(x, x) for x in r])) + + with tm.assert_raises_regex(AttributeError, + "'Series' object has no attribute 'foo'"): + df['a'].foo() + + def test_duplicate_multiindex_labels(self): + # GH 17464 + # Make sure that a MultiIndex with duplicate levels throws a ValueError + with pytest.raises(ValueError): + ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) + + # And that using set_levels with duplicate levels fails + ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], + [1, 2, 1, 2, 3]]) + with pytest.raises(ValueError): + ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], + inplace=True) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 1bf9a10628542..bafb6ae2e45f4 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from datetime import datetime from pandas.compat import range, PY3 @@ -11,7 +13,7 @@ import pandas.util.testing as tm import pandas as pd -from pandas.lib import Timestamp +from pandas._libs.tslib import Timestamp, Timedelta from pandas.tests.indexes.common import Base @@ -24,23 +26,60 @@ def full_like(array, value): return ret +class TestIndexArithmeticWithTimedeltaScalar(object): + + @pytest.mark.parametrize('index', [ + Int64Index(range(1, 11)), + UInt64Index(range(1, 11)), + Float64Index(range(1, 11)), + RangeIndex(1, 11)]) + @pytest.mark.parametrize('scalar_td', [Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta()]) + def test_index_mul_timedelta(self, scalar_td, index): + # GH#19333 + expected = pd.timedelta_range('1 days', '10 days') + + result = index * scalar_td + tm.assert_index_equal(result, expected) + commute = scalar_td * index + tm.assert_index_equal(commute, expected) + + @pytest.mark.parametrize('index', [Int64Index(range(1, 3)), + UInt64Index(range(1, 3)), + Float64Index(range(1, 3)), + RangeIndex(1, 3)]) + @pytest.mark.parametrize('scalar_td', [Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta()]) + def test_index_rdiv_timedelta(self, scalar_td, index): + expected = pd.TimedeltaIndex(['1 Day', '12 Hours']) + + result = scalar_td / index + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + index / scalar_td + + class Numeric(Base): def test_numeric_compat(self): + pass # override Base method + def test_mul_int(self): idx = self.create_index() - didx = idx * idx - result = idx * 1 tm.assert_index_equal(result, idx) + def test_rmul_int(self): + idx = self.create_index() + result = 1 * idx tm.assert_index_equal(result, idx) - # in general not true for RangeIndex - if not isinstance(idx, RangeIndex): - result = idx * idx - tm.assert_index_equal(result, idx ** 2) + def test_div_int(self): + idx = self.create_index() # truediv under PY3 result = idx / 1 @@ -55,9 +94,16 @@ def test_numeric_compat(self): expected = Index(idx.values / 2) tm.assert_index_equal(result, expected) + def test_floordiv_int(self): + idx = self.create_index() + result = idx // 1 tm.assert_index_equal(result, idx) + def test_mul_int_array(self): + idx = self.create_index() + didx = idx * idx + result = idx * np.array(5, dtype='int64') tm.assert_index_equal(result, idx * 5) @@ -65,19 +111,45 @@ def test_numeric_compat(self): result = idx * np.arange(5, dtype=arr_dtype) tm.assert_index_equal(result, didx) + def test_mul_int_series(self): + idx = self.create_index() + didx = idx * idx + + arr_dtype = 'uint64' if isinstance(idx, UInt64Index) else 'int64' result = idx * Series(np.arange(5, dtype=arr_dtype)) - tm.assert_index_equal(result, didx) + tm.assert_series_equal(result, Series(didx)) - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - expected = Float64Index(np.arange(5, dtype='float64') * - (np.arange(5, dtype='float64') + 0.1)) - tm.assert_index_equal(result, expected) + def test_mul_float_series(self): + idx = self.create_index() + rng5 = np.arange(5, dtype='float64') - # invalid - self.assertRaises(TypeError, - lambda: idx * date_range('20130101', periods=5)) - self.assertRaises(ValueError, lambda: idx * idx[0:3]) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + result = idx * Series(rng5 + 0.1) + expected = Series(rng5 * (rng5 + 0.1)) + tm.assert_series_equal(result, expected) + + def test_mul_index(self): + idx = self.create_index() + + # in general not true for RangeIndex + if not isinstance(idx, RangeIndex): + result = idx * idx + tm.assert_index_equal(result, idx ** 2) + + def test_mul_datelike_raises(self): + idx = self.create_index() + with pytest.raises(TypeError): + idx * date_range('20130101', periods=5) + + def test_mul_size_mismatch_raises(self): + idx = self.create_index() + + with pytest.raises(ValueError): + idx * idx[0:3] + with pytest.raises(ValueError): + idx * np.array([1, 2]) + + def test_divmod(self): + idx = self.create_index() result = divmod(idx, 2) with np.errstate(all='ignore'): @@ -93,29 +165,80 @@ def test_numeric_compat(self): for r, e in zip(result, expected): tm.assert_index_equal(r, e) - result = divmod(idx, Series(full_like(idx.values, 2))) - with np.errstate(all='ignore'): - div, mod = divmod( - idx.values, - full_like(idx.values, 2), - ) - expected = Index(div), Index(mod) - for r, e in zip(result, expected): - tm.assert_index_equal(r, e) + def test_pow_float(self): + # test power calculations both ways, GH 14973 + idx = self.create_index() + + expected = pd.Float64Index(idx.values**2.0) + result = idx**2.0 + tm.assert_index_equal(result, expected) + def test_rpow_float(self): # test power calculations both ways, GH 14973 + idx = self.create_index() + expected = pd.Float64Index(2.0**idx.values) result = 2.0**idx tm.assert_index_equal(result, expected) - expected = pd.Float64Index(idx.values**2.0) - result = idx**2.0 + @pytest.mark.xfail(reason='GH#19252 Series has no __rdivmod__') + def test_divmod_series(self): + idx = self.create_index() + + result = divmod(idx, Series(full_like(idx.values, 2))) + with np.errstate(all='ignore'): + div, mod = divmod(idx.values, full_like(idx.values, 2)) + expected = Series(div), Series(mod) + + for r, e in zip(result, expected): + tm.assert_series_equal(r, e) + + def test_div_zero(self, zero): + idx = self.create_index() + + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + result = idx / zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_floordiv_zero(self, zero): + idx = self.create_index() + expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + + result = idx // zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_mod_zero(self, zero): + idx = self.create_index() + + expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan], + dtype=np.float64) + result = idx % zero tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') + tm.assert_series_equal(ser_compat, Series(result)) + + def test_divmod_zero(self, zero): + idx = self.create_index() + + exleft = Index([np.nan, np.inf, np.inf, np.inf, np.inf], + dtype=np.float64) + exright = Index([np.nan, np.nan, np.nan, np.nan, np.nan], + dtype=np.float64) + + result = divmod(idx, zero) + tm.assert_index_equal(result[0], exleft) + tm.assert_index_equal(result[1], exright) def test_explicit_conversions(self): # GH 8608 - # add/sub are overriden explicity for Float/Int Index + # add/sub are overridden explicitly for Float/Int Index idx = self._holder(np.arange(5, dtype='int64')) # float conversions @@ -171,15 +294,36 @@ def test_modulo(self): # GH 9244 index = self.create_index() expected = Index(index.values % 2) - self.assert_index_equal(index % 2, expected) + tm.assert_index_equal(index % 2, expected) + + @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) + def test_where(self, klass): + i = self.create_index() + cond = [True] * len(i) + expected = i + result = i.where(klass(cond)) + + cond = [False] + [True] * (len(i) - 1) + expected = Float64Index([i._na_value] + i[1:].tolist()) + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_insert(self): + # GH 18295 (test missing) + expected = Float64Index([0, np.nan, 1, 2, 3, 4]) + for na in (np.nan, pd.NaT, None): + result = self.create_index().insert(1, na) + tm.assert_index_equal(result, expected) -class TestFloat64Index(Numeric, tm.TestCase): +class TestFloat64Index(Numeric): _holder = Float64Index - def setUp(self): + def setup_method(self, method): self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), - float=Float64Index(np.arange(5) * 2.5)) + float=Float64Index(np.arange(5) * 2.5), + mixed_dec=Float64Index([5, 4, 3, 2, 1.5]), + float_dec=Float64Index(np.arange(4, -1, -1) * 2.5)) self.setup_indices() def create_index(self): @@ -190,14 +334,14 @@ def test_repr_roundtrip(self): tm.assert_index_equal(eval(repr(ind)), ind) def check_is_index(self, i): - self.assertIsInstance(i, Index) - self.assertNotIsInstance(i, Float64Index) + assert isinstance(i, Index) + assert not isinstance(i, Float64Index) def check_coerce(self, a, b, is_float_index=True): - self.assertTrue(a.equals(b)) - self.assert_index_equal(a, b, exact=False) + assert a.equals(b) + tm.assert_index_equal(a, b, exact=False) if is_float_index: - self.assertIsInstance(b, Float64Index) + assert isinstance(b, Float64Index) else: self.check_is_index(b) @@ -205,39 +349,39 @@ def test_constructor(self): # explicit construction index = Float64Index([1, 2, 3, 4, 5]) - self.assertIsInstance(index, Float64Index) + assert isinstance(index, Float64Index) expected = np.array([1, 2, 3, 4, 5], dtype='float64') - self.assert_numpy_array_equal(index.values, expected) + tm.assert_numpy_array_equal(index.values, expected) index = Float64Index(np.array([1, 2, 3, 4, 5])) - self.assertIsInstance(index, Float64Index) + assert isinstance(index, Float64Index) index = Float64Index([1., 2, 3, 4, 5]) - self.assertIsInstance(index, Float64Index) + assert isinstance(index, Float64Index) index = Float64Index(np.array([1., 2, 3, 4, 5])) - self.assertIsInstance(index, Float64Index) - self.assertEqual(index.dtype, float) + assert isinstance(index, Float64Index) + assert index.dtype == float index = Float64Index(np.array([1., 2, 3, 4, 5]), dtype=np.float32) - self.assertIsInstance(index, Float64Index) - self.assertEqual(index.dtype, np.float64) + assert isinstance(index, Float64Index) + assert index.dtype == np.float64 index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32) - self.assertIsInstance(index, Float64Index) - self.assertEqual(index.dtype, np.float64) + assert isinstance(index, Float64Index) + assert index.dtype == np.float64 # nan handling result = Float64Index([np.nan, np.nan]) - self.assertTrue(pd.isnull(result.values).all()) + assert pd.isna(result.values).all() result = Float64Index(np.array([np.nan])) - self.assertTrue(pd.isnull(result.values).all()) + assert pd.isna(result.values).all() result = Index(np.array([np.nan])) - self.assertTrue(pd.isnull(result.values).all()) + assert pd.isna(result.values).all() def test_constructor_invalid(self): # invalid - self.assertRaises(TypeError, Float64Index, 0.) - self.assertRaises(TypeError, Float64Index, ['a', 'b', 0.]) - self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')]) + pytest.raises(TypeError, Float64Index, 0.) + pytest.raises(TypeError, Float64Index, ['a', 'b', 0.]) + pytest.raises(TypeError, Float64Index, [Timestamp('20130101')]) def test_constructor_coerce(self): @@ -258,15 +402,15 @@ def test_constructor_explicit(self): def test_astype(self): result = self.float.astype(object) - self.assertTrue(result.equals(self.float)) - self.assertTrue(self.float.equals(result)) + assert result.equals(self.float) + assert self.float.equals(result) self.check_is_index(result) i = self.mixed.copy() i.name = 'foo' result = i.astype(object) - self.assertTrue(result.equals(i)) - self.assertTrue(i.equals(result)) + assert result.equals(i) + assert i.equals(result) self.check_is_index(result) # GH 12881 @@ -295,28 +439,28 @@ def test_astype(self): # invalid for dtype in ['M8[ns]', 'm8[ns]']: - self.assertRaises(TypeError, lambda: i.astype(dtype)) + pytest.raises(TypeError, lambda: i.astype(dtype)) # GH 13149 for dtype in ['int16', 'int32', 'int64']: i = Float64Index([0, 1.1, np.NAN]) - self.assertRaises(ValueError, lambda: i.astype(dtype)) + pytest.raises(ValueError, lambda: i.astype(dtype)) def test_equals_numeric(self): i = Float64Index([1.0, 2.0]) - self.assertTrue(i.equals(i)) - self.assertTrue(i.identical(i)) + assert i.equals(i) + assert i.identical(i) i2 = Float64Index([1.0, 2.0]) - self.assertTrue(i.equals(i2)) + assert i.equals(i2) i = Float64Index([1.0, np.nan]) - self.assertTrue(i.equals(i)) - self.assertTrue(i.identical(i)) + assert i.equals(i) + assert i.identical(i) i2 = Float64Index([1.0, np.nan]) - self.assertTrue(i.equals(i2)) + assert i.equals(i2) def test_get_indexer(self): idx = Float64Index([0.0, 1.0, 2.0]) @@ -334,54 +478,70 @@ def test_get_indexer(self): def test_get_loc(self): idx = Float64Index([0.0, 1.0, 2.0]) for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(1, method), 1) + assert idx.get_loc(1, method) == 1 if method is not None: - self.assertEqual(idx.get_loc(1, method, tolerance=0), 1) + assert idx.get_loc(1, method, tolerance=0) == 1 for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method), loc) - self.assertEqual(idx.get_loc(1.1, method, tolerance=0.9), loc) + assert idx.get_loc(1.1, method) == loc + assert idx.get_loc(1.1, method, tolerance=0.9) == loc - self.assertRaises(KeyError, idx.get_loc, 'foo') - self.assertRaises(KeyError, idx.get_loc, 1.5) - self.assertRaises(KeyError, idx.get_loc, 1.5, method='pad', - tolerance=0.1) + pytest.raises(KeyError, idx.get_loc, 'foo') + pytest.raises(KeyError, idx.get_loc, 1.5) + pytest.raises(KeyError, idx.get_loc, 1.5, method='pad', + tolerance=0.1) - with tm.assertRaisesRegexp(ValueError, 'must be numeric'): + with tm.assert_raises_regex(ValueError, 'must be numeric'): idx.get_loc(1.4, method='nearest', tolerance='foo') + with pytest.raises(ValueError, match='must contain numeric elements'): + idx.get_loc(1.4, method='nearest', tolerance=np.array(['foo'])) + + with pytest.raises( + ValueError, + match='tolerance size must match target index size'): + idx.get_loc(1.4, method='nearest', tolerance=np.array([1, 2])) + def test_get_loc_na(self): idx = Float64Index([np.nan, 1, 2]) - self.assertEqual(idx.get_loc(1), 1) - self.assertEqual(idx.get_loc(np.nan), 0) + assert idx.get_loc(1) == 1 + assert idx.get_loc(np.nan) == 0 idx = Float64Index([np.nan, 1, np.nan]) - self.assertEqual(idx.get_loc(1), 1) + assert idx.get_loc(1) == 1 # representable by slice [0:2:2] - # self.assertRaises(KeyError, idx.slice_locs, np.nan) + # pytest.raises(KeyError, idx.slice_locs, np.nan) sliced = idx.slice_locs(np.nan) - self.assertTrue(isinstance(sliced, tuple)) - self.assertEqual(sliced, (0, 3)) + assert isinstance(sliced, tuple) + assert sliced == (0, 3) # not representable by slice idx = Float64Index([np.nan, 1, np.nan, np.nan]) - self.assertEqual(idx.get_loc(1), 1) - self.assertRaises(KeyError, idx.slice_locs, np.nan) + assert idx.get_loc(1) == 1 + pytest.raises(KeyError, idx.slice_locs, np.nan) + + def test_get_loc_missing_nan(self): + # GH 8569 + idx = Float64Index([1, 2]) + assert idx.get_loc(1) == 0 + pytest.raises(KeyError, idx.get_loc, 3) + pytest.raises(KeyError, idx.get_loc, np.nan) + pytest.raises(KeyError, idx.get_loc, [np.nan]) def test_contains_nans(self): i = Float64Index([1.0, 2.0, np.nan]) - self.assertTrue(np.nan in i) + assert np.nan in i def test_contains_not_nans(self): i = Float64Index([1.0, 2.0, np.nan]) - self.assertTrue(1.0 in i) + assert 1.0 in i def test_doesnt_contain_all_the_things(self): i = Float64Index([np.nan]) - self.assertFalse(i.isin([0]).item()) - self.assertFalse(i.isin([1]).item()) - self.assertTrue(i.isin([np.nan]).item()) + assert not i.isin([0]).item() + assert not i.isin([1]).item() + assert i.isin([np.nan]).item() def test_nan_multiple_containment(self): i = Float64Index([1.0, np.nan]) @@ -398,7 +558,7 @@ def test_astype_from_object(self): index = Index([1.0, np.nan, 0.2], dtype='object') result = index.astype(float) expected = Float64Index([1.0, np.nan, 0.2]) - self.assertEqual(result.dtype, expected.dtype) + assert result.dtype == expected.dtype tm.assert_index_equal(result, expected) def test_fillna_float64(self): @@ -406,15 +566,15 @@ def test_fillna_float64(self): idx = Index([1.0, np.nan, 3.0], dtype=float, name='x') # can't downcast exp = Index([1.0, 0.1, 3.0], name='x') - self.assert_index_equal(idx.fillna(0.1), exp) + tm.assert_index_equal(idx.fillna(0.1), exp) # downcast exp = Float64Index([1.0, 2.0, 3.0], name='x') - self.assert_index_equal(idx.fillna(2), exp) + tm.assert_index_equal(idx.fillna(2), exp) # object exp = Index([1.0, 'obj', 3.0], name='x') - self.assert_index_equal(idx.fillna('obj'), exp) + tm.assert_index_equal(idx.fillna('obj'), exp) def test_take_fill_value(self): # GH 12631 @@ -436,23 +596,23 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) class NumericInt(Numeric): - def test_view(self): - super(NumericInt, self).test_view() + def test_view(self, indices): + super(NumericInt, self).test_view(indices) i = self._holder([], name='Foo') i_view = i.view() - self.assertEqual(i_view.name, 'Foo') + assert i_view.name == 'Foo' i_view = i.view(self._dtype) tm.assert_index_equal(i, self._holder(i_view, name='Foo')) @@ -461,42 +621,61 @@ def test_view(self): tm.assert_index_equal(i, self._holder(i_view, name='Foo')) def test_is_monotonic(self): - self.assertTrue(self.index.is_monotonic) - self.assertTrue(self.index.is_monotonic_increasing) - self.assertFalse(self.index.is_monotonic_decreasing) + assert self.index.is_monotonic + assert self.index.is_monotonic_increasing + assert self.index._is_strictly_monotonic_increasing + assert not self.index.is_monotonic_decreasing + assert not self.index._is_strictly_monotonic_decreasing index = self._holder([4, 3, 2, 1]) - self.assertFalse(index.is_monotonic) - self.assertTrue(index.is_monotonic_decreasing) + assert not index.is_monotonic + assert not index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing index = self._holder([1]) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing + + def test_is_strictly_monotonic(self): + index = self._holder([1, 1, 2, 3]) + assert index.is_monotonic_increasing + assert not index._is_strictly_monotonic_increasing + + index = self._holder([3, 2, 1, 1]) + assert index.is_monotonic_decreasing + assert not index._is_strictly_monotonic_decreasing + + index = self._holder([1, 1]) + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing def test_logical_compat(self): idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() def test_identical(self): i = Index(self.index.copy()) - self.assertTrue(i.identical(self.index)) + assert i.identical(self.index) same_values_different_type = Index(i, dtype=object) - self.assertFalse(i.identical(same_values_different_type)) + assert not i.identical(same_values_different_type) i = self.index.copy(dtype=object) i = i.rename('foo') same_values = Index(i, dtype=object) - self.assertTrue(same_values.identical(i)) + assert same_values.identical(i) - self.assertFalse(i.identical(self.index)) - self.assertTrue(Index(same_values, name='foo', dtype=object).identical( - i)) + assert not i.identical(self.index) + assert Index(same_values, name='foo', dtype=object).identical(i) - self.assertFalse(self.index.copy(dtype=object) - .identical(self.index.copy(dtype=self._dtype))) + assert not self.index.copy(dtype=object).identical( + self.index.copy(dtype=self._dtype)) def test_join_non_unique(self): left = Index([4, 4, 3, 3]) @@ -504,7 +683,7 @@ def test_join_non_unique(self): joined, lidx, ridx = left.join(left, return_indexers=True) exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) - self.assert_index_equal(joined, exp_joined) + tm.assert_index_equal(joined, exp_joined) exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) tm.assert_numpy_array_equal(lidx, exp_lidx) @@ -516,7 +695,7 @@ def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' for kind in kinds: joined = self.index.join(self.index, how=kind) - self.assertIs(self.index, joined) + assert self.index is joined def test_union_noncomparable(self): from datetime import datetime, timedelta @@ -534,23 +713,23 @@ def test_union_noncomparable(self): def test_cant_or_shouldnt_cast(self): # can't data = ['foo', 'bar', 'baz'] - self.assertRaises(TypeError, self._holder, data) + pytest.raises(TypeError, self._holder, data) # shouldn't data = ['0', '1', '2'] - self.assertRaises(TypeError, self._holder, data) + pytest.raises(TypeError, self._holder, data) def test_view_index(self): self.index.view(Index) def test_prevent_casting(self): result = self.index.astype('O') - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ def test_take_preserve_name(self): index = self._holder([1, 2, 3, 4], name='foo') taken = index.take([3, 0, 1]) - self.assertEqual(index.name, taken.name) + assert index.name == taken.name def test_take_fill_value(self): # see gh-12631 @@ -564,7 +743,7 @@ def test_take_fill_value(self): "{name} cannot contain NA").format(name=name) # fill_value=True - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False @@ -573,59 +752,60 @@ def test_take_fill_value(self): expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_slice_keep_name(self): idx = self._holder([1, 2], name='asdf') - self.assertEqual(idx.name, idx[1:].name) + assert idx.name == idx[1:].name def test_ufunc_coercions(self): idx = self._holder([1, 2, 3, 4, 5], name='x') result = np.sqrt(idx) - tm.assertIsInstance(result, Float64Index) + assert isinstance(result, Float64Index) exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') tm.assert_index_equal(result, exp) result = np.divide(idx, 2.) - tm.assertIsInstance(result, Float64Index) + assert isinstance(result, Float64Index) exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') tm.assert_index_equal(result, exp) # _evaluate_numeric_binop result = idx + 2. - tm.assertIsInstance(result, Float64Index) + assert isinstance(result, Float64Index) exp = Float64Index([3., 4., 5., 6., 7.], name='x') tm.assert_index_equal(result, exp) result = idx - 2. - tm.assertIsInstance(result, Float64Index) + assert isinstance(result, Float64Index) exp = Float64Index([-1., 0., 1., 2., 3.], name='x') tm.assert_index_equal(result, exp) result = idx * 1. - tm.assertIsInstance(result, Float64Index) + assert isinstance(result, Float64Index) exp = Float64Index([1., 2., 3., 4., 5.], name='x') tm.assert_index_equal(result, exp) result = idx / 2. - tm.assertIsInstance(result, Float64Index) + assert isinstance(result, Float64Index) exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') tm.assert_index_equal(result, exp) -class TestInt64Index(NumericInt, tm.TestCase): +class TestInt64Index(NumericInt): _dtype = 'int64' _holder = Int64Index - def setUp(self): - self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) + def setup_method(self, method): + self.indices = dict(index=Int64Index(np.arange(0, 20, 2)), + index_dec=Int64Index(np.arange(19, -1, -1))) self.setup_indices() def create_index(self): @@ -642,7 +822,7 @@ def test_constructor(self): tm.assert_index_equal(index, expected) # scalar raise Exception - self.assertRaises(TypeError, Int64Index, 5) + pytest.raises(TypeError, Int64Index, 5) # copy arr = self.index.values @@ -652,7 +832,7 @@ def test_constructor(self): # this should not change index arr[0] = val - self.assertNotEqual(new_index[0], val) + assert new_index[0] != val # interpret list-like expected = Int64Index([5, 0]) @@ -665,26 +845,26 @@ def test_constructor(self): def test_constructor_corner(self): arr = np.array([1, 2, 3, 4], dtype=object) index = Int64Index(arr) - self.assertEqual(index.values.dtype, np.int64) - self.assert_index_equal(index, Index(arr)) + assert index.values.dtype == np.int64 + tm.assert_index_equal(index, Index(arr)) # preventing casting arr = np.array([1, '2', 3, '4'], dtype=object) - with tm.assertRaisesRegexp(TypeError, 'casting'): + with tm.assert_raises_regex(TypeError, 'casting'): Int64Index(arr) arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] - with tm.assertRaisesRegexp(TypeError, 'casting'): + with tm.assert_raises_regex(TypeError, 'casting'): Int64Index(arr_with_floats) def test_coerce_list(self): # coerce things arr = Index([1, 2, 3, 4]) - tm.assertIsInstance(arr, Int64Index) + assert isinstance(arr, Int64Index) # but not if explicit dtype passed arr = Index([1, 2, 3, 4], dtype=object) - tm.assertIsInstance(arr, Index) + assert isinstance(arr, Index) def test_get_indexer(self): target = Int64Index(np.arange(10)) @@ -732,8 +912,8 @@ def test_join_inner(self): elidx = np.array([1, 6], dtype=np.intp) eridx = np.array([4, 1], dtype=np.intp) - tm.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -742,12 +922,12 @@ def test_join_inner(self): return_indexers=True) res2 = self.index.intersection(other_mono) - self.assert_index_equal(res, res2) + tm.assert_index_equal(res, res2) elidx = np.array([1, 6], dtype=np.intp) eridx = np.array([1, 4], dtype=np.intp) - tm.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -762,9 +942,9 @@ def test_join_left(self): eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.intp) - tm.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # monotonic @@ -772,9 +952,9 @@ def test_join_left(self): return_indexers=True) eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.intp) - tm.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # non-unique @@ -784,7 +964,7 @@ def test_join_left(self): eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - self.assert_index_equal(res, eres) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -798,20 +978,20 @@ def test_join_right(self): eres = other elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) - tm.assertIsInstance(other, Int64Index) - self.assert_index_equal(res, eres) + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) + assert ridx is None # monotonic res, lidx, ridx = self.index.join(other_mono, how='right', return_indexers=True) eres = other_mono elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) - tm.assertIsInstance(other, Int64Index) - self.assert_index_equal(res, eres) + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) + assert ridx is None # non-unique idx = Index([1, 1, 2, 5]) @@ -820,7 +1000,7 @@ def test_join_right(self): eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - self.assert_index_equal(res, eres) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -830,26 +1010,26 @@ def test_join_non_int_index(self): outer = self.index.join(other, how='outer') outer2 = other.join(self.index, how='outer') expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) - self.assert_index_equal(outer, outer2) - self.assert_index_equal(outer, expected) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) inner = self.index.join(other, how='inner') inner2 = other.join(self.index, how='inner') expected = Index([6, 8, 10]) - self.assert_index_equal(inner, inner2) - self.assert_index_equal(inner, expected) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) left = self.index.join(other, how='left') - self.assert_index_equal(left, self.index.astype(object)) + tm.assert_index_equal(left, self.index.astype(object)) left2 = other.join(self.index, how='left') - self.assert_index_equal(left2, other) + tm.assert_index_equal(left2, other) right = self.index.join(other, how='right') - self.assert_index_equal(right, other) + tm.assert_index_equal(right, other) right2 = other.join(self.index, how='right') - self.assert_index_equal(right2, self.index.astype(object)) + tm.assert_index_equal(right2, self.index.astype(object)) def test_join_outer(self): other = Int64Index([7, 12, 25, 1, 2, 5]) @@ -860,7 +1040,7 @@ def test_join_outer(self): res, lidx, ridx = self.index.join(other, how='outer', return_indexers=True) noidx_res = self.index.join(other, how='outer') - self.assert_index_equal(res, noidx_res) + tm.assert_index_equal(res, noidx_res) eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], @@ -868,8 +1048,8 @@ def test_join_outer(self): eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp) - tm.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -877,26 +1057,27 @@ def test_join_outer(self): res, lidx, ridx = self.index.join(other_mono, how='outer', return_indexers=True) noidx_res = self.index.join(other_mono, how='outer') - self.assert_index_equal(res, noidx_res) + tm.assert_index_equal(res, noidx_res) elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp) - tm.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) -class TestUInt64Index(NumericInt, tm.TestCase): +class TestUInt64Index(NumericInt): _dtype = 'uint64' _holder = UInt64Index - def setUp(self): - self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15, - 2**63 + 20, 2**63 + 25])) + def setup_method(self, method): + vals = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + self.indices = dict(index=UInt64Index(vals), + index_dec=UInt64Index(reversed(vals))) self.setup_indices() def create_index(self): @@ -970,8 +1151,8 @@ def test_join_inner(self): elidx = np.array([1, 4], dtype=np.intp) eridx = np.array([5, 2], dtype=np.intp) - tm.assertIsInstance(res, UInt64Index) - self.assert_index_equal(res, eres) + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -980,13 +1161,13 @@ def test_join_inner(self): return_indexers=True) res2 = self.index.intersection(other_mono) - self.assert_index_equal(res, res2) + tm.assert_index_equal(res, res2) elidx = np.array([1, 4], dtype=np.intp) eridx = np.array([3, 5], dtype=np.intp) - tm.assertIsInstance(res, UInt64Index) - self.assert_index_equal(res, eres) + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -1002,9 +1183,9 @@ def test_join_left(self): eres = self.index eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp) - tm.assertIsInstance(res, UInt64Index) - self.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # monotonic @@ -1012,9 +1193,9 @@ def test_join_left(self): return_indexers=True) eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp) - tm.assertIsInstance(res, UInt64Index) - self.assert_index_equal(res, eres) - self.assertIsNone(lidx) + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # non-unique @@ -1028,7 +1209,7 @@ def test_join_left(self): eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - self.assert_index_equal(res, eres) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -1045,9 +1226,9 @@ def test_join_right(self): elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp) tm.assert_numpy_array_equal(lidx, elidx) - tm.assertIsInstance(other, UInt64Index) - self.assert_index_equal(res, eres) - self.assertIsNone(ridx) + assert isinstance(other, UInt64Index) + tm.assert_index_equal(res, eres) + assert ridx is None # monotonic res, lidx, ridx = self.index.join(other_mono, how='right', @@ -1055,10 +1236,10 @@ def test_join_right(self): eres = other_mono elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp) - tm.assertIsInstance(other, UInt64Index) + assert isinstance(other, UInt64Index) tm.assert_numpy_array_equal(lidx, elidx) - self.assert_index_equal(res, eres) - self.assertIsNone(ridx) + tm.assert_index_equal(res, eres) + assert ridx is None # non-unique idx = UInt64Index(2**63 + np.array([1, 1, 2, 5], dtype='uint64')) @@ -1071,7 +1252,7 @@ def test_join_right(self): elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - self.assert_index_equal(res, eres) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -1083,26 +1264,26 @@ def test_join_non_int_index(self): outer2 = other.join(self.index, how='outer') expected = Index(2**63 + np.array( [0, 1, 5, 7, 10, 15, 20, 25], dtype='uint64')) - self.assert_index_equal(outer, outer2) - self.assert_index_equal(outer, expected) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) inner = self.index.join(other, how='inner') inner2 = other.join(self.index, how='inner') expected = Index(2**63 + np.array([10, 20], dtype='uint64')) - self.assert_index_equal(inner, inner2) - self.assert_index_equal(inner, expected) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) left = self.index.join(other, how='left') - self.assert_index_equal(left, self.index.astype(object)) + tm.assert_index_equal(left, self.index.astype(object)) left2 = other.join(self.index, how='left') - self.assert_index_equal(left2, other) + tm.assert_index_equal(left2, other) right = self.index.join(other, how='right') - self.assert_index_equal(right, other) + tm.assert_index_equal(right, other) right2 = other.join(self.index, how='right') - self.assert_index_equal(right2, self.index.astype(object)) + tm.assert_index_equal(right2, self.index.astype(object)) def test_join_outer(self): other = UInt64Index(2**63 + np.array( @@ -1115,15 +1296,15 @@ def test_join_outer(self): res, lidx, ridx = self.index.join(other, how='outer', return_indexers=True) noidx_res = self.index.join(other, how='outer') - self.assert_index_equal(res, noidx_res) + tm.assert_index_equal(res, noidx_res) eres = UInt64Index(2**63 + np.array( [0, 1, 2, 7, 10, 12, 15, 20, 25], dtype='uint64')) elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp) - tm.assertIsInstance(res, UInt64Index) - self.assert_index_equal(res, eres) + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) @@ -1131,12 +1312,12 @@ def test_join_outer(self): res, lidx, ridx = self.index.join(other_mono, how='outer', return_indexers=True) noidx_res = self.index.join(other_mono, how='outer') - self.assert_index_equal(res, noidx_res) + tm.assert_index_equal(res, noidx_res) elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp) - tm.assertIsInstance(res, UInt64Index) - self.assert_index_equal(res, eres) + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 38e715fce2720..1ebeef072fdc5 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest + from datetime import datetime from itertools import combinations import operator @@ -8,8 +10,8 @@ import numpy as np -from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex) -from pandas.util.testing import assertRaisesRegexp +from pandas import (isna, Series, Index, Float64Index, + Int64Index, RangeIndex) import pandas.util.testing as tm @@ -18,12 +20,13 @@ from .test_numeric import Numeric -class TestRangeIndex(Numeric, tm.TestCase): +class TestRangeIndex(Numeric): _holder = RangeIndex _compat_props = ['shape', 'ndim', 'size', 'itemsize'] - def setUp(self): - self.indices = dict(index=RangeIndex(0, 20, 2, name='foo')) + def setup_method(self, method): + self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'), + index_dec=RangeIndex(18, -1, -2, name='bar')) self.setup_indices() def create_index(self): @@ -62,105 +65,105 @@ def test_too_many_names(self): def testit(): self.index.names = ["roger", "harold"] - assertRaisesRegexp(ValueError, "^Length", testit) + tm.assert_raises_regex(ValueError, "^Length", testit) def test_constructor(self): index = RangeIndex(5) expected = np.arange(5, dtype=np.int64) - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index._start, 0) - self.assertEqual(index._stop, 5) - self.assertEqual(index._step, 1) - self.assertEqual(index.name, None) + assert isinstance(index, RangeIndex) + assert index._start == 0 + assert index._stop == 5 + assert index._step == 1 + assert index.name is None tm.assert_index_equal(Index(expected), index) index = RangeIndex(1, 5) expected = np.arange(1, 5, dtype=np.int64) - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index._start, 1) + assert isinstance(index, RangeIndex) + assert index._start == 1 tm.assert_index_equal(Index(expected), index) index = RangeIndex(1, 5, 2) expected = np.arange(1, 5, 2, dtype=np.int64) - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index._step, 2) + assert isinstance(index, RangeIndex) + assert index._step == 2 tm.assert_index_equal(Index(expected), index) msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): RangeIndex() for index in [RangeIndex(0), RangeIndex(start=0), RangeIndex(stop=0), RangeIndex(0, 0)]: expected = np.empty(0, dtype=np.int64) - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index._start, 0) - self.assertEqual(index._stop, 0) - self.assertEqual(index._step, 1) + assert isinstance(index, RangeIndex) + assert index._start == 0 + assert index._stop == 0 + assert index._step == 1 tm.assert_index_equal(Index(expected), index) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): RangeIndex(name='Foo') for index in [RangeIndex(0, name='Foo'), RangeIndex(start=0, name='Foo'), RangeIndex(stop=0, name='Foo'), RangeIndex(0, 0, name='Foo')]: - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index.name, 'Foo') + assert isinstance(index, RangeIndex) + assert index.name == 'Foo' # we don't allow on a bare Index - self.assertRaises(TypeError, lambda: Index(0, 1000)) + pytest.raises(TypeError, lambda: Index(0, 1000)) # invalid args for i in [Index(['a', 'b']), Series(['a', 'b']), np.array(['a', 'b']), [], 'foo', datetime(2000, 1, 1, 0, 0), np.arange(0, 10), np.array([1]), [1]]: - self.assertRaises(TypeError, lambda: RangeIndex(i)) + pytest.raises(TypeError, lambda: RangeIndex(i)) def test_constructor_same(self): # pass thru w and w/o copy index = RangeIndex(1, 5, 2) result = RangeIndex(index, copy=False) - self.assertTrue(result.identical(index)) + assert result.identical(index) result = RangeIndex(index, copy=True) - self.assert_index_equal(result, index, exact=True) + tm.assert_index_equal(result, index, exact=True) result = RangeIndex(index) - self.assert_index_equal(result, index, exact=True) + tm.assert_index_equal(result, index, exact=True) - self.assertRaises(TypeError, - lambda: RangeIndex(index, dtype='float64')) + pytest.raises(TypeError, + lambda: RangeIndex(index, dtype='float64')) def test_constructor_range(self): - self.assertRaises(TypeError, lambda: RangeIndex(range(1, 5, 2))) + pytest.raises(TypeError, lambda: RangeIndex(range(1, 5, 2))) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) - self.assertRaises(TypeError, - lambda: Index(range(1, 5, 2), dtype='float64')) + pytest.raises(TypeError, + lambda: Index(range(1, 5, 2), dtype='float64')) def test_constructor_name(self): # GH12288 @@ -170,16 +173,16 @@ def test_constructor_name(self): copy = RangeIndex(orig) copy.name = 'copy' - self.assertTrue(orig.name, 'original') - self.assertTrue(copy.name, 'copy') + assert orig.name == 'original' + assert copy.name == 'copy' new = Index(copy) - self.assertTrue(new.name, 'copy') + assert new.name == 'copy' new.name = 'new' - self.assertTrue(orig.name, 'original') - self.assertTrue(new.name, 'copy') - self.assertTrue(new.name, 'new') + assert orig.name == 'original' + assert copy.name == 'copy' + assert new.name == 'new' def test_numeric_compat2(self): # validate that we are handling the RangeIndex overrides to numeric ops @@ -189,15 +192,15 @@ def test_numeric_compat2(self): result = idx * 2 expected = RangeIndex(0, 20, 4) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) result = idx + 2 expected = RangeIndex(2, 12, 2) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) result = idx - 2 expected = RangeIndex(-2, 8, 2) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) # truediv under PY3 result = idx / 2 @@ -206,11 +209,11 @@ def test_numeric_compat2(self): expected = RangeIndex(0, 5, 1).astype('float64') else: expected = RangeIndex(0, 5, 1) - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) result = idx / 4 expected = RangeIndex(0, 10, 2) / 4 - self.assert_index_equal(result, expected, exact=True) + tm.assert_index_equal(result, expected, exact=True) result = idx // 1 expected = idx @@ -244,25 +247,25 @@ def test_numeric_compat2(self): def test_constructor_corner(self): arr = np.array([1, 2, 3, 4], dtype=object) index = RangeIndex(1, 5) - self.assertEqual(index.values.dtype, np.int64) - self.assert_index_equal(index, Index(arr)) + assert index.values.dtype == np.int64 + tm.assert_index_equal(index, Index(arr)) # non-int raise Exception - self.assertRaises(TypeError, RangeIndex, '1', '10', '1') - self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) + pytest.raises(TypeError, RangeIndex, '1', '10', '1') + pytest.raises(TypeError, RangeIndex, 1.1, 10.2, 1.3) # invalid passed type - self.assertRaises(TypeError, lambda: RangeIndex(1, 5, dtype='float64')) + pytest.raises(TypeError, lambda: RangeIndex(1, 5, dtype='float64')) def test_copy(self): i = RangeIndex(5, name='Foo') i_copy = i.copy() - self.assertTrue(i_copy is not i) - self.assertTrue(i_copy.identical(i)) - self.assertEqual(i_copy._start, 0) - self.assertEqual(i_copy._stop, 5) - self.assertEqual(i_copy._step, 1) - self.assertEqual(i_copy.name, 'Foo') + assert i_copy is not i + assert i_copy.identical(i) + assert i_copy._start == 0 + assert i_copy._stop == 5 + assert i_copy._step == 1 + assert i_copy.name == 'Foo' def test_repr(self): i = RangeIndex(5, name='Foo') @@ -271,18 +274,18 @@ def test_repr(self): expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" else: expected = "RangeIndex(start=0, stop=5, step=1, name=u'Foo')" - self.assertTrue(result, expected) + assert result == expected result = eval(result) - self.assert_index_equal(result, i, exact=True) + tm.assert_index_equal(result, i, exact=True) i = RangeIndex(5, 0, -1) result = repr(i) expected = "RangeIndex(start=5, stop=0, step=-1)" - self.assertEqual(result, expected) + assert result == expected result = eval(result) - self.assert_index_equal(result, i, exact=True) + tm.assert_index_equal(result, i, exact=True) def test_insert(self): @@ -290,31 +293,37 @@ def test_insert(self): result = idx[1:4] # test 0th element - self.assert_index_equal(idx[0:4], result.insert(0, idx[0])) + tm.assert_index_equal(idx[0:4], result.insert(0, idx[0])) + + # GH 18295 (test missing) + expected = Float64Index([0, np.nan, 1, 2, 3, 4]) + for na in (np.nan, pd.NaT, None): + result = RangeIndex(5).insert(1, na) + tm.assert_index_equal(result, expected) def test_delete(self): idx = RangeIndex(5, name='Foo') expected = idx[1:].astype(int) result = idx.delete(0) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + tm.assert_index_equal(result, expected) + assert result.name == expected.name expected = idx[:-1].astype(int) result = idx.delete(-1) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + tm.assert_index_equal(result, expected) + assert result.name == expected.name - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(len(idx)) - def test_view(self): - super(TestRangeIndex, self).test_view() + def test_view(self, indices): + super(TestRangeIndex, self).test_view(indices) i = RangeIndex(0, name='Foo') i_view = i.view() - self.assertEqual(i_view.name, 'Foo') + assert i_view.name == 'Foo' i_view = i.view('i8') tm.assert_numpy_array_equal(i.values, i_view) @@ -323,31 +332,41 @@ def test_view(self): tm.assert_index_equal(i, i_view) def test_dtype(self): - self.assertEqual(self.index.dtype, np.int64) + assert self.index.dtype == np.int64 def test_is_monotonic(self): - self.assertTrue(self.index.is_monotonic) - self.assertTrue(self.index.is_monotonic_increasing) - self.assertFalse(self.index.is_monotonic_decreasing) + assert self.index.is_monotonic + assert self.index.is_monotonic_increasing + assert not self.index.is_monotonic_decreasing + assert self.index._is_strictly_monotonic_increasing + assert not self.index._is_strictly_monotonic_decreasing index = RangeIndex(4, 0, -1) - self.assertFalse(index.is_monotonic) - self.assertTrue(index.is_monotonic_decreasing) + assert not index.is_monotonic + assert not index._is_strictly_monotonic_increasing + assert index.is_monotonic_decreasing + assert index._is_strictly_monotonic_decreasing index = RangeIndex(1, 2) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing index = RangeIndex(2, 1) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing index = RangeIndex(1, 1) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) + assert index.is_monotonic + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert index._is_strictly_monotonic_increasing + assert index._is_strictly_monotonic_decreasing def test_equals_range(self): equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), @@ -355,54 +374,53 @@ def test_equals_range(self): (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2))] for left, right in equiv_pairs: - self.assertTrue(left.equals(right)) - self.assertTrue(right.equals(left)) + assert left.equals(right) + assert right.equals(left) def test_logical_compat(self): idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() def test_identical(self): i = Index(self.index.copy()) - self.assertTrue(i.identical(self.index)) + assert i.identical(self.index) # we don't allow object dtype for RangeIndex if isinstance(self.index, RangeIndex): return same_values_different_type = Index(i, dtype=object) - self.assertFalse(i.identical(same_values_different_type)) + assert not i.identical(same_values_different_type) i = self.index.copy(dtype=object) i = i.rename('foo') same_values = Index(i, dtype=object) - self.assertTrue(same_values.identical(self.index.copy(dtype=object))) + assert same_values.identical(self.index.copy(dtype=object)) - self.assertFalse(i.identical(self.index)) - self.assertTrue(Index(same_values, name='foo', dtype=object).identical( - i)) + assert not i.identical(self.index) + assert Index(same_values, name='foo', dtype=object).identical(i) - self.assertFalse(self.index.copy(dtype=object) - .identical(self.index.copy(dtype='int64'))) + assert not self.index.copy(dtype=object).identical( + self.index.copy(dtype='int64')) def test_get_indexer(self): target = RangeIndex(10) indexer = self.index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) - self.assert_numpy_array_equal(indexer, expected) + tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_pad(self): target = RangeIndex(10) indexer = self.index.get_indexer(target, method='pad') expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) - self.assert_numpy_array_equal(indexer, expected) + tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_backfill(self): target = RangeIndex(10) indexer = self.index.get_indexer(target, method='backfill') expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) - self.assert_numpy_array_equal(indexer, expected) + tm.assert_numpy_array_equal(indexer, expected) def test_join_outer(self): # join with Int64Index @@ -411,7 +429,7 @@ def test_join_outer(self): res, lidx, ridx = self.index.join(other, how='outer', return_indexers=True) noidx_res = self.index.join(other, how='outer') - self.assert_index_equal(res, noidx_res) + tm.assert_index_equal(res, noidx_res) eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]) @@ -420,11 +438,11 @@ def test_join_outer(self): eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], dtype=np.intp) - self.assertIsInstance(res, Int64Index) - self.assertFalse(isinstance(res, RangeIndex)) - self.assert_index_equal(res, eres) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) # join with RangeIndex other = RangeIndex(25, 14, -1) @@ -432,13 +450,13 @@ def test_join_outer(self): res, lidx, ridx = self.index.join(other, how='outer', return_indexers=True) noidx_res = self.index.join(other, how='outer') - self.assert_index_equal(res, noidx_res) + tm.assert_index_equal(res, noidx_res) - self.assertIsInstance(res, Int64Index) - self.assertFalse(isinstance(res, RangeIndex)) - self.assert_index_equal(res, eres) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) def test_join_inner(self): # Join with non-RangeIndex @@ -457,10 +475,10 @@ def test_join_inner(self): elidx = np.array([8, 9], dtype=np.intp) eridx = np.array([9, 7], dtype=np.intp) - self.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) # Join two RangeIndex other = RangeIndex(25, 14, -1) @@ -468,10 +486,10 @@ def test_join_inner(self): res, lidx, ridx = self.index.join(other, how='inner', return_indexers=True) - self.assertIsInstance(res, RangeIndex) - self.assert_index_equal(res, eres) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) def test_join_left(self): # Join with Int64Index @@ -482,10 +500,10 @@ def test_join_left(self): eres = self.index eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) - self.assertIsInstance(res, RangeIndex) - self.assert_index_equal(res, eres) - self.assertIsNone(lidx) - self.assert_numpy_array_equal(ridx, eridx) + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) # Join withRangeIndex other = Int64Index(np.arange(25, 14, -1)) @@ -493,10 +511,10 @@ def test_join_left(self): res, lidx, ridx = self.index.join(other, how='left', return_indexers=True) - self.assertIsInstance(res, RangeIndex) - self.assert_index_equal(res, eres) - self.assertIsNone(lidx) - self.assert_numpy_array_equal(ridx, eridx) + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) def test_join_right(self): # Join with Int64Index @@ -508,10 +526,10 @@ def test_join_right(self): elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp) - self.assertIsInstance(other, Int64Index) - self.assert_index_equal(res, eres) - self.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None # Join withRangeIndex other = RangeIndex(25, 14, -1) @@ -520,10 +538,10 @@ def test_join_right(self): return_indexers=True) eres = other - self.assertIsInstance(other, RangeIndex) - self.assert_index_equal(res, eres) - self.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) + assert isinstance(other, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None def test_join_non_int_index(self): other = Index([3, 6, 7, 8, 10], dtype=object) @@ -531,26 +549,26 @@ def test_join_non_int_index(self): outer = self.index.join(other, how='outer') outer2 = other.join(self.index, how='outer') expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) - self.assert_index_equal(outer, outer2) - self.assert_index_equal(outer, expected) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) inner = self.index.join(other, how='inner') inner2 = other.join(self.index, how='inner') expected = Index([6, 8, 10]) - self.assert_index_equal(inner, inner2) - self.assert_index_equal(inner, expected) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) left = self.index.join(other, how='left') - self.assert_index_equal(left, self.index.astype(object)) + tm.assert_index_equal(left, self.index.astype(object)) left2 = other.join(self.index, how='left') - self.assert_index_equal(left2, other) + tm.assert_index_equal(left2, other) right = self.index.join(other, how='right') - self.assert_index_equal(right, other) + tm.assert_index_equal(right, other) right2 = other.join(self.index, how='right') - self.assert_index_equal(right2, self.index.astype(object)) + tm.assert_index_equal(right2, self.index.astype(object)) def test_join_non_unique(self): other = Index([4, 4, 3, 3]) @@ -562,15 +580,15 @@ def test_join_non_unique(self): eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) - self.assert_index_equal(res, eres) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' for kind in kinds: joined = self.index.join(self.index, how=kind) - self.assertIs(self.index, joined) + assert self.index is joined def test_intersection(self): # intersect with Int64Index @@ -578,26 +596,41 @@ def test_intersection(self): result = self.index.intersection(other) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = other.intersection(self.index) expected = Index(np.sort(np.asarray(np.intersect1d(self.index.values, other.values)))) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = self.index.intersection(other) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = self.index.intersection(other) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) + + # reversed (GH 17296) + result = other.intersection(self.index) + tm.assert_index_equal(result, expected) + + # GH 17296: intersect two decreasing RangeIndexes + first = RangeIndex(10, -2, -2) + other = RangeIndex(5, -4, -1) + expected = first.astype(int).intersection(other.astype(int)) + result = first.intersection(other).astype(int) + tm.assert_index_equal(result, expected) + + # reversed + result = other.intersection(first).astype(int) + tm.assert_index_equal(result, expected) index = RangeIndex(5) @@ -605,37 +638,28 @@ def test_intersection(self): other = RangeIndex(5, 10, 1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = other.intersection(index) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other) expected = RangeIndex(0, 0, 1) - self.assert_index_equal(result, expected) - - def test_intersect_str_dates(self): - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - - i1 = Index(dt_dates, dtype=object) - i2 = Index(['aa'], dtype=object) - res = i2.intersection(i1) - - self.assertEqual(len(res), 0) + tm.assert_index_equal(result, expected) def test_union_noncomparable(self): from datetime import datetime, timedelta @@ -644,11 +668,11 @@ def test_union_noncomparable(self): other = Index([now + timedelta(i) for i in range(4)], dtype=object) result = self.index.union(other) expected = Index(np.concatenate((self.index, other))) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = other.union(self.index) expected = Index(np.concatenate((other, self.index))) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_union(self): RI = RangeIndex @@ -687,30 +711,30 @@ def test_nbytes(self): # memory savings vs int index i = RangeIndex(0, 1000) - self.assertTrue(i.nbytes < i.astype(int).nbytes / 10) + assert i.nbytes < i._int64index.nbytes / 10 # constant memory usage i2 = RangeIndex(0, 10) - self.assertEqual(i.nbytes, i2.nbytes) + assert i.nbytes == i2.nbytes def test_cant_or_shouldnt_cast(self): # can't - self.assertRaises(TypeError, RangeIndex, 'foo', 'bar', 'baz') + pytest.raises(TypeError, RangeIndex, 'foo', 'bar', 'baz') # shouldn't - self.assertRaises(TypeError, RangeIndex, '0', '1', '2') + pytest.raises(TypeError, RangeIndex, '0', '1', '2') def test_view_Index(self): self.index.view(Index) def test_prevent_casting(self): result = self.index.astype('O') - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ def test_take_preserve_name(self): index = RangeIndex(1, 5, name='foo') taken = index.take([3, 0, 1]) - self.assertEqual(index.name, taken.name) + assert index.name == taken.name def test_take_fill_value(self): # GH 12631 @@ -721,7 +745,7 @@ def test_take_fill_value(self): # fill_value msg = "Unable to fill values because RangeIndex cannot contain NA" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False @@ -731,12 +755,12 @@ def test_take_fill_value(self): tm.assert_index_equal(result, expected) msg = "Unable to fill values because RangeIndex cannot contain NA" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_print_unicode_columns(self): @@ -750,12 +774,12 @@ def test_repr_roundtrip(self): def test_slice_keep_name(self): idx = RangeIndex(1, 2, name='asdf') - self.assertEqual(idx.name, idx[1:].name) + assert idx.name == idx[1:].name def test_explicit_conversions(self): # GH 8608 - # add/sub are overriden explicity for Float/Int Index + # add/sub are overridden explicitly for Float/Int Index idx = RangeIndex(5) # float conversions @@ -782,8 +806,8 @@ def test_duplicates(self): if not len(ind): continue idx = self.indices[ind] - self.assertTrue(idx.is_unique) - self.assertFalse(idx.has_duplicates) + assert idx.is_unique + assert not idx.has_duplicates def test_ufunc_compat(self): idx = RangeIndex(5) @@ -793,48 +817,48 @@ def test_ufunc_compat(self): def test_extended_gcd(self): result = self.index._extended_gcd(6, 10) - self.assertEqual(result[0], result[1] * 6 + result[2] * 10) - self.assertEqual(2, result[0]) + assert result[0] == result[1] * 6 + result[2] * 10 + assert 2 == result[0] result = self.index._extended_gcd(10, 6) - self.assertEqual(2, result[1] * 10 + result[2] * 6) - self.assertEqual(2, result[0]) + assert 2 == result[1] * 10 + result[2] * 6 + assert 2 == result[0] def test_min_fitting_element(self): result = RangeIndex(0, 20, 2)._min_fitting_element(1) - self.assertEqual(2, result) + assert 2 == result result = RangeIndex(1, 6)._min_fitting_element(1) - self.assertEqual(1, result) + assert 1 == result result = RangeIndex(18, -2, -2)._min_fitting_element(1) - self.assertEqual(2, result) + assert 2 == result result = RangeIndex(5, 0, -1)._min_fitting_element(1) - self.assertEqual(1, result) + assert 1 == result big_num = 500000000000000000000000 result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) - self.assertEqual(big_num, result) + assert big_num == result def test_max_fitting_element(self): result = RangeIndex(0, 20, 2)._max_fitting_element(17) - self.assertEqual(16, result) + assert 16 == result result = RangeIndex(1, 6)._max_fitting_element(4) - self.assertEqual(4, result) + assert 4 == result result = RangeIndex(18, -2, -2)._max_fitting_element(17) - self.assertEqual(16, result) + assert 16 == result result = RangeIndex(5, 0, -1)._max_fitting_element(4) - self.assertEqual(4, result) + assert 4 == result big_num = 500000000000000000000000 result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num) - self.assertEqual(big_num, result) + assert big_num == result def test_pickle_compat_construction(self): # RangeIndex() is a valid constructor @@ -845,53 +869,53 @@ def test_slice_specialised(self): # scalar indexing res = self.index[1] expected = 2 - self.assertEqual(res, expected) + assert res == expected res = self.index[-1] expected = 18 - self.assertEqual(res, expected) + assert res == expected # slicing # slice value completion index = self.index[:] expected = self.index - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) # positive slice values index = self.index[7:10:2] expected = Index(np.array([14, 18]), name='foo') - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) # negative slice values index = self.index[-1:-5:-2] expected = Index(np.array([18, 14]), name='foo') - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) # stop overshoot index = self.index[2:100:4] expected = Index(np.array([4, 12]), name='foo') - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) # reverse index = self.index[::-1] expected = Index(self.index.values[::-1], name='foo') - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) index = self.index[-8::-1] expected = Index(np.array([4, 2, 0]), name='foo') - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) index = self.index[-40::-1] expected = Index(np.array([], dtype=np.int64), name='foo') - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) index = self.index[40::-1] expected = Index(self.index.values[40::-1], name='foo') - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) index = self.index[10::-1] expected = Index(self.index.values[::-1], name='foo') - self.assert_index_equal(index, expected) + tm.assert_index_equal(index, expected) def test_len_specialised(self): @@ -902,16 +926,71 @@ def test_len_specialised(self): arr = np.arange(0, 5, step) i = RangeIndex(0, 5, step) - self.assertEqual(len(i), len(arr)) + assert len(i) == len(arr) i = RangeIndex(5, 0, step) - self.assertEqual(len(i), 0) + assert len(i) == 0 for step in np.arange(-6, -1, 1): arr = np.arange(5, 0, step) i = RangeIndex(5, 0, step) - self.assertEqual(len(i), len(arr)) + assert len(i) == len(arr) i = RangeIndex(0, 5, step) - self.assertEqual(len(i), 0) + assert len(i) == 0 + + def test_append(self): + # GH16212 + RI = RangeIndex + I64 = Int64Index + F64 = Float64Index + OI = Index + cases = [([RI(1, 12, 5)], RI(1, 12, 5)), + ([RI(0, 6, 4)], RI(0, 6, 4)), + ([RI(1, 3), RI(3, 7)], RI(1, 7)), + ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)), + ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)), + ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)), + ([RI(-4, -8), RI(-8, -12)], RI(0, 0)), + ([RI(-4, -8), RI(3, -4)], RI(0, 0)), + ([RI(-4, -8), RI(3, 5)], RI(3, 5)), + ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])), + ([RI(-2,), RI(3, 5)], RI(3, 5)), + ([RI(2,), RI(2)], I64([0, 1, 0, 1])), + ([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), + ([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), + ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), + ([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), + ([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])), + ([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])), + ([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14])) + ] + + for indices, expected in cases: + result = indices[0].append(indices[1:]) + tm.assert_index_equal(result, expected, exact=True) + + if len(indices) == 2: + # Append single item rather than list + result2 = indices[0].append(indices[1]) + tm.assert_index_equal(result2, expected, exact=True) + + @pytest.mark.parametrize('start,stop,step', + [(0, 400, 3), (500, 0, -6), (-10**6, 10**6, 4), + (10**6, -10**6, -4), (0, 10, 20)]) + def test_max_min(self, start, stop, step): + # GH17607 + idx = RangeIndex(start, stop, step) + expected = idx._int64index.max() + result = idx.max() + assert result == expected + + expected = idx._int64index.min() + result = idx.min() + assert result == expected + + # empty + idx = RangeIndex(start, stop, -step) + assert isna(idx.max()) + assert isna(idx.min()) diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py new file mode 100644 index 0000000000000..9035434046ccb --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -0,0 +1,1123 @@ +# -*- coding: utf-8 -*- +import operator + +import pytest +import numpy as np +from datetime import timedelta +from distutils.version import LooseVersion + +import pandas as pd +import pandas.util.testing as tm +from pandas import (DatetimeIndex, TimedeltaIndex, Float64Index, Int64Index, + to_timedelta, timedelta_range, date_range, + Series, + Timestamp, Timedelta) +from pandas.errors import PerformanceWarning, NullFrequencyError +from pandas.core import ops + + +@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)], + ids=str) +def delta(request): + # Several ways of representing two hours + return request.param + + +@pytest.fixture(params=['B', 'D']) +def freq(request): + return request.param + + +class TestTimedeltaIndexComparisons(object): + def test_tdi_cmp_str_invalid(self): + # GH 13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + + for left, right in [(tdi, 'a'), ('a', tdi)]: + with pytest.raises(TypeError): + left > right + + with pytest.raises(TypeError): + left == right + + with pytest.raises(TypeError): + left != right + + def test_comparisons_coverage(self): + rng = timedelta_range('1 days', periods=10) + + result = rng < rng[3] + exp = np.array([True, True, True] + [False] * 7) + tm.assert_numpy_array_equal(result, exp) + + # raise TypeError for now + pytest.raises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + tm.assert_numpy_array_equal(result, exp) + + def test_comp_nat(self): + left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, + pd.Timedelta('3 days')]) + right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) + + for lhs, rhs in [(left, right), + (left.astype(object), right.astype(object))]: + result = rhs == lhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = rhs != lhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > lhs, expected) + + def test_comparisons_nat(self): + tdidx1 = pd.TimedeltaIndex(['1 day', pd.NaT, '1 day 00:00:01', pd.NaT, + '1 day 00:00:01', '5 day 00:00:03']) + tdidx2 = pd.TimedeltaIndex(['2 day', '2 day', pd.NaT, pd.NaT, + '1 day 00:00:02', '5 days 00:00:03']) + tdarr = np.array([np.timedelta64(2, 'D'), + np.timedelta64(2, 'D'), np.timedelta64('nat'), + np.timedelta64('nat'), + np.timedelta64(1, 'D') + np.timedelta64(2, 's'), + np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) + + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + + # Check pd.NaT is handles as the same as np.nan + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + +class TestTimedeltaIndexMultiplicationDivision(object): + # __mul__, __rmul__, + # __div__, __rdiv__, __floordiv__, __rfloordiv__, + # __mod__, __rmod__, __divmod__, __rdivmod__ + + # ------------------------------------------------------------- + # Multiplication + # organized with scalar others first, then array-like + + def test_tdi_mul_int(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + result = idx * 1 + tm.assert_index_equal(result, idx) + + def test_tdi_rmul_int(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + result = 1 * idx + tm.assert_index_equal(result, idx) + + def test_tdi_mul_tdlike_scalar_raises(self, delta): + rng = timedelta_range('1 days', '10 days', name='foo') + with pytest.raises(TypeError): + rng * delta + + def test_tdi_mul_int_array_zerodim(self): + rng5 = np.arange(5, dtype='int64') + idx = TimedeltaIndex(rng5) + expected = TimedeltaIndex(rng5 * 5) + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, expected) + + def test_tdi_mul_int_array(self): + rng5 = np.arange(5, dtype='int64') + idx = TimedeltaIndex(rng5) + didx = TimedeltaIndex(rng5 ** 2) + + result = idx * rng5 + tm.assert_index_equal(result, didx) + + def test_tdi_mul_dti_raises(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + with pytest.raises(TypeError): + idx * idx + + def test_tdi_mul_too_short_raises(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + with pytest.raises(TypeError): + idx * TimedeltaIndex(np.arange(3)) + with pytest.raises(ValueError): + idx * np.array([1, 2]) + + def test_tdi_mul_int_series(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + didx = TimedeltaIndex(np.arange(5, dtype='int64') ** 2) + + result = idx * Series(np.arange(5, dtype='int64')) + + tm.assert_series_equal(result, Series(didx)) + + def test_tdi_mul_float_series(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + + rng5f = np.arange(5, dtype='float64') + result = idx * Series(rng5f + 0.1) + expected = Series(TimedeltaIndex(rng5f * (rng5f + 0.1))) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('other', [np.arange(1, 11), + pd.Int64Index(range(1, 11)), + pd.UInt64Index(range(1, 11)), + pd.Float64Index(range(1, 11)), + pd.RangeIndex(1, 11)]) + def test_tdi_rmul_arraylike(self, other): + tdi = TimedeltaIndex(['1 Day'] * 10) + expected = timedelta_range('1 days', '10 days') + + result = other * tdi + tm.assert_index_equal(result, expected) + commute = tdi * other + tm.assert_index_equal(commute, expected) + + # ------------------------------------------------------------- + # TimedeltaIndex.__div__ + + def test_tdi_div_int(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + result = idx / 1 + tm.assert_index_equal(result, idx) + + def test_tdi_div_tdlike_scalar(self, delta): + rng = timedelta_range('1 days', '10 days', name='foo') + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + + result = rng / delta + tm.assert_index_equal(result, expected, exact=False) + + def test_tdi_div_tdlike_scalar_with_nat(self, delta): + rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + expected = Float64Index([12, np.nan, 24], name='foo') + result = rng / delta + tm.assert_index_equal(result, expected) + + def test_tdi_div_nat_raises(self): + # don't allow division by NaT (make could in the future) + rng = timedelta_range('1 days', '10 days', name='foo') + with pytest.raises(TypeError): + rng / pd.NaT + + # ------------------------------------------------------------- + # TimedeltaIndex.__floordiv__ + + def test_tdi_floordiv_int(self): + idx = TimedeltaIndex(np.arange(5, dtype='int64')) + result = idx // 1 + tm.assert_index_equal(result, idx) + + def test_tdi_floordiv_tdlike_scalar(self, delta): + tdi = timedelta_range('1 days', '10 days', name='foo') + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + + result = tdi // delta + tm.assert_index_equal(result, expected, exact=False) + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=10, seconds=7), + Timedelta('10m7s'), + Timedelta('10m7s').to_timedelta64()]) + def test_tdi_floordiv_timedelta_scalar(self, scalar_td): + # GH#19125 + tdi = TimedeltaIndex(['00:05:03', '00:05:03', pd.NaT], freq=None) + expected = pd.Index([2.0, 2.0, np.nan]) + + res = tdi.__rfloordiv__(scalar_td) + tm.assert_index_equal(res, expected) + + expected = pd.Index([0.0, 0.0, np.nan]) + + res = tdi // (scalar_td) + tm.assert_index_equal(res, expected) + + +class TestTimedeltaIndexArithmetic(object): + # Addition and Subtraction Operations + + # ------------------------------------------------------------- + # Invalid Operations + + @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])]) + @pytest.mark.parametrize('op', [operator.add, ops.radd, + operator.sub, ops.rsub]) + def test_tdi_add_sub_float(self, op, other): + dti = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + tdi = dti - dti.shift(1) + with pytest.raises(TypeError): + op(tdi, other) + + def test_tdi_add_str_invalid(self): + # GH 13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + + with pytest.raises(TypeError): + tdi + 'a' + with pytest.raises(TypeError): + 'a' + tdi + + @pytest.mark.parametrize('freq', [None, 'H']) + def test_tdi_sub_period(self, freq): + # GH#13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) + + with pytest.raises(TypeError): + idx - p + + with pytest.raises(TypeError): + p - idx + + # ------------------------------------------------------------- + # TimedeltaIndex.shift is used by __add__/__sub__ + + def test_tdi_shift_empty(self): + # GH#9903 + idx = pd.TimedeltaIndex([], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + def test_tdi_shift_hours(self): + # GH#9903 + idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + def test_tdi_shift_minutes(self): + # GH#9903 + idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='T'), idx) + exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], + name='xxx') + tm.assert_index_equal(idx.shift(3, freq='T'), exp) + exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], + name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='T'), exp) + + def test_tdi_shift_int(self): + # GH#8083 + trange = pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + result = trange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + tm.assert_index_equal(result, expected) + + def test_tdi_shift_nonstandard_freq(self): + # GH#8083 + trange = pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + result = trange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + tm.assert_index_equal(result, expected) + + def test_shift_no_freq(self): + # GH#19147 + tdi = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00'], freq=None) + with pytest.raises(NullFrequencyError): + tdi.shift(2) + + # ------------------------------------------------------------- + + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_tdi_add_offset_index(self, names): + # GH#18849, GH#19744 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], + name=names[0]) + other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], + name=names[1]) + + expected = TimedeltaIndex([tdi[n] + other[n] for n in range(len(tdi))], + freq='infer', name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_index_equal(res2, expected) + + def test_tdi_add_offset_array(self): + # GH#18849 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) + other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + + expected = TimedeltaIndex([tdi[n] + other[n] for n in range(len(tdi))], + freq='infer') + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_index_equal(res2, expected) + + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_tdi_sub_offset_index(self, names): + # GH#18824, GH#19744 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], + name=names[0]) + other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], + name=names[1]) + + expected = TimedeltaIndex([tdi[n] - other[n] for n in range(len(tdi))], + freq='infer', name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi - other + tm.assert_index_equal(res, expected) + + def test_tdi_sub_offset_array(self): + # GH#18824 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) + other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + + expected = TimedeltaIndex([tdi[n] - other[n] for n in range(len(tdi))], + freq='infer') + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi - other + tm.assert_index_equal(res, expected) + + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('foo', 'foo', 'foo')]) + def test_tdi_with_offset_series(self, names): + # GH#18849 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], + name=names[0]) + other = Series([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], + name=names[1]) + + expected_add = Series([tdi[n] + other[n] for n in range(len(tdi))], + name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_series_equal(res, expected_add) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_series_equal(res2, expected_add) + + expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))], + name=names[2]) + + with tm.assert_produces_warning(PerformanceWarning): + res3 = tdi - other + tm.assert_series_equal(res3, expected_sub) + + @pytest.mark.parametrize('box', [np.array, pd.Index, pd.Series]) + def test_tdi_add_sub_anchored_offset_arraylike(self, box): + # GH#18824 + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) + + anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + tdi + anchored + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + anchored + tdi + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + tdi - anchored + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + anchored - tdi + + def test_ufunc_coercions(self): + # normal ops are also tested in tseries/test_timedeltas.py + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + + for result in [idx * 2, np.multiply(idx, 2)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], + freq='4H', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '4H' + + for result in [idx / 2, np.divide(idx, 2)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], + freq='H', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == 'H' + + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + for result in [-idx, np.negative(idx)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], + freq='-2H', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '-2H' + + idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], + freq='H', name='x') + for result in [abs(idx), np.absolute(idx)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], + freq=None, name='x') + tm.assert_index_equal(result, exp) + assert result.freq is None + + # ------------------------------------------------------------- + # Binary operations TimedeltaIndex and integer + + def test_tdi_add_int(self, one): + # Variants of `one` for #19012 + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng + one + expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + + def test_tdi_iadd_int(self, one): + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) + rng += one + tm.assert_index_equal(rng, expected) + + def test_tdi_sub_int(self, one): + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng - one + expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + + def test_tdi_isub_int(self, one): + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) + rng -= one + tm.assert_index_equal(rng, expected) + + # ------------------------------------------------------------- + # Binary operations TimedeltaIndex and timedelta-like + + def test_tdi_add_timedeltalike(self, delta): + # only test adding/sub offsets as + is now numeric + rng = timedelta_range('1 days', '10 days') + result = rng + delta + expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', + freq='D') + tm.assert_index_equal(result, expected) + + def test_tdi_iadd_timedeltalike(self, delta): + # only test adding/sub offsets as + is now numeric + rng = timedelta_range('1 days', '10 days') + expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', + freq='D') + rng += delta + tm.assert_index_equal(rng, expected) + + def test_tdi_sub_timedeltalike(self, delta): + # only test adding/sub offsets as - is now numeric + rng = timedelta_range('1 days', '10 days') + result = rng - delta + expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') + tm.assert_index_equal(result, expected) + + def test_tdi_isub_timedeltalike(self, delta): + # only test adding/sub offsets as - is now numeric + rng = timedelta_range('1 days', '10 days') + expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') + rng -= delta + tm.assert_index_equal(rng, expected) + + # ------------------------------------------------------------- + # Binary operations TimedeltaIndex and datetime-like + + def test_tdi_sub_timestamp_raises(self): + idx = TimedeltaIndex(['1 day', '2 day']) + msg = "cannot subtract a datelike from a TimedeltaIndex" + with tm.assert_raises_regex(TypeError, msg): + idx - Timestamp('2011-01-01') + + def test_tdi_add_timestamp(self): + idx = TimedeltaIndex(['1 day', '2 day']) + + result = idx + Timestamp('2011-01-01') + expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + tm.assert_index_equal(result, expected) + + def test_tdi_radd_timestamp(self): + idx = TimedeltaIndex(['1 day', '2 day']) + + result = Timestamp('2011-01-01') + idx + expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_tdi_sub_dt64_array(self): + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + dtarr = dti.values + + with pytest.raises(TypeError): + tdi - dtarr + + # TimedeltaIndex.__rsub__ + expected = pd.DatetimeIndex(dtarr) - tdi + result = dtarr - tdi + tm.assert_index_equal(result, expected) + + def test_tdi_add_dt64_array(self): + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + dtarr = dti.values + + expected = pd.DatetimeIndex(dtarr) + tdi + result = tdi + dtarr + tm.assert_index_equal(result, expected) + result = dtarr + tdi + tm.assert_index_equal(result, expected) + + def test_tdi_add_td64_array(self): + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = 2 * tdi + result = tdi + tdarr + tm.assert_index_equal(result, expected) + result = tdarr + tdi + tm.assert_index_equal(result, expected) + + def test_tdi_sub_td64_array(self): + dti = pd.date_range('2016-01-01', periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = 0 * tdi + result = tdi - tdarr + tm.assert_index_equal(result, expected) + result = tdarr - tdi + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------- + + def test_subtraction_ops(self): + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + td = Timedelta('1 days') + dt = Timestamp('20130101') + + pytest.raises(TypeError, lambda: tdi - dt) + pytest.raises(TypeError, lambda: tdi - dti) + pytest.raises(TypeError, lambda: td - dt) + pytest.raises(TypeError, lambda: td - dti) + + result = dt - dti + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') + tm.assert_index_equal(result, expected) + + result = dti - dt + expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar') + tm.assert_index_equal(result, expected) + + result = tdi - td + expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo') + tm.assert_index_equal(result, expected, check_names=False) + + result = td - tdi + expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo') + tm.assert_index_equal(result, expected, check_names=False) + + result = dti - td + expected = DatetimeIndex( + ['20121231', '20130101', '20130102'], name='bar') + tm.assert_index_equal(result, expected, check_names=False) + + result = dt - tdi + expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo') + tm.assert_index_equal(result, expected) + + def test_subtraction_ops_with_tz(self): + + # check that dt/dti subtraction ops with tz are validated + dti = date_range('20130101', periods=3) + ts = Timestamp('20130101') + dt = ts.to_pydatetime() + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + ts_tz = Timestamp('20130101').tz_localize('US/Eastern') + ts_tz2 = Timestamp('20130101').tz_localize('CET') + dt_tz = ts_tz.to_pydatetime() + td = Timedelta('1 days') + + def _check(result, expected): + assert result == expected + assert isinstance(result, Timedelta) + + # scalars + result = ts - ts + expected = Timedelta('0 days') + _check(result, expected) + + result = dt_tz - ts_tz + expected = Timedelta('0 days') + _check(result, expected) + + result = ts_tz - dt_tz + expected = Timedelta('0 days') + _check(result, expected) + + # tz mismatches + pytest.raises(TypeError, lambda: dt_tz - ts) + pytest.raises(TypeError, lambda: dt_tz - dt) + pytest.raises(TypeError, lambda: dt_tz - ts_tz2) + pytest.raises(TypeError, lambda: dt - dt_tz) + pytest.raises(TypeError, lambda: ts - dt_tz) + pytest.raises(TypeError, lambda: ts_tz2 - ts) + pytest.raises(TypeError, lambda: ts_tz2 - dt) + pytest.raises(TypeError, lambda: ts_tz - ts_tz2) + + # with dti + pytest.raises(TypeError, lambda: dti - ts_tz) + pytest.raises(TypeError, lambda: dti_tz - ts) + pytest.raises(TypeError, lambda: dti_tz - ts_tz2) + + result = dti_tz - dt_tz + expected = TimedeltaIndex(['0 days', '1 days', '2 days']) + tm.assert_index_equal(result, expected) + + result = dt_tz - dti_tz + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) + tm.assert_index_equal(result, expected) + + result = dti_tz - ts_tz + expected = TimedeltaIndex(['0 days', '1 days', '2 days']) + tm.assert_index_equal(result, expected) + + result = ts_tz - dti_tz + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) + tm.assert_index_equal(result, expected) + + result = td - td + expected = Timedelta('0 days') + _check(result, expected) + + result = dti_tz - td + expected = DatetimeIndex( + ['20121231', '20130101', '20130102'], tz='US/Eastern') + tm.assert_index_equal(result, expected) + + def test_dti_tdi_numeric_ops(self): + # These are normally union/diff set-like ops + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + + # TODO(wesm): unused? + # td = Timedelta('1 days') + # dt = Timestamp('20130101') + + result = tdi - tdi + expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = tdi + tdi + expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = dti - tdi # name will be reset + expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) + tm.assert_index_equal(result, expected) + + def test_addition_ops(self): + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + td = Timedelta('1 days') + dt = Timestamp('20130101') + + result = tdi + dt + expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') + tm.assert_index_equal(result, expected) + + result = dt + tdi + expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') + tm.assert_index_equal(result, expected) + + result = td + tdi + expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = tdi + td + expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') + tm.assert_index_equal(result, expected) + + # unequal length + pytest.raises(ValueError, lambda: tdi + dti[0:1]) + pytest.raises(ValueError, lambda: tdi[0:1] + dti) + + # random indexes + pytest.raises(NullFrequencyError, lambda: tdi + Int64Index([1, 2, 3])) + + # this is a union! + # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + + result = tdi + dti # name will be reset + expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) + tm.assert_index_equal(result, expected) + + result = dti + tdi # name will be reset + expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) + tm.assert_index_equal(result, expected) + + result = dt + td + expected = Timestamp('20130102') + assert result == expected + + result = td + dt + expected = Timestamp('20130102') + assert result == expected + + def test_ops_ndarray(self): + td = Timedelta('1 day') + + # timedelta, timedelta + other = pd.to_timedelta(['1 day']).values + expected = pd.to_timedelta(['2 days']).values + tm.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= LooseVersion('1.8'): + tm.assert_numpy_array_equal(other + td, expected) + pytest.raises(TypeError, lambda: td + np.array([1])) + pytest.raises(TypeError, lambda: np.array([1]) + td) + + expected = pd.to_timedelta(['0 days']).values + tm.assert_numpy_array_equal(td - other, expected) + if LooseVersion(np.__version__) >= LooseVersion('1.8'): + tm.assert_numpy_array_equal(-other + td, expected) + pytest.raises(TypeError, lambda: td - np.array([1])) + pytest.raises(TypeError, lambda: np.array([1]) - td) + + expected = pd.to_timedelta(['2 days']).values + tm.assert_numpy_array_equal(td * np.array([2]), expected) + tm.assert_numpy_array_equal(np.array([2]) * td, expected) + pytest.raises(TypeError, lambda: td * other) + pytest.raises(TypeError, lambda: other * td) + + tm.assert_numpy_array_equal(td / other, + np.array([1], dtype=np.float64)) + if LooseVersion(np.__version__) >= LooseVersion('1.8'): + tm.assert_numpy_array_equal(other / td, + np.array([1], dtype=np.float64)) + + # timedelta, datetime + other = pd.to_datetime(['2000-01-01']).values + expected = pd.to_datetime(['2000-01-02']).values + tm.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= LooseVersion('1.8'): + tm.assert_numpy_array_equal(other + td, expected) + + expected = pd.to_datetime(['1999-12-31']).values + tm.assert_numpy_array_equal(-td + other, expected) + if LooseVersion(np.__version__) >= LooseVersion('1.8'): + tm.assert_numpy_array_equal(other - td, expected) + + def test_ops_series(self): + # regression test for GH8813 + td = Timedelta('1 day') + other = pd.Series([1, 2]) + expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) + tm.assert_series_equal(expected, td * other) + tm.assert_series_equal(expected, other * td) + + def test_ops_series_object(self): + # GH 13043 + s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), + pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], + name='xxx') + assert s.dtype == object + + exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), + pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], + name='xxx') + tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) + tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) + + # object series & object series + s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), + pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], + name='xxx') + assert s2.dtype == object + exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], + name='xxx') + tm.assert_series_equal(s2 - s, exp) + tm.assert_series_equal(s - s2, -exp) + + s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], + name='xxx', dtype=object) + assert s.dtype == object + + exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], + name='xxx') + tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) + tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) + + def test_timedelta_ops_with_missing_values(self): + # setup + s1 = pd.to_timedelta(Series(['00:00:01'])) + s2 = pd.to_timedelta(Series(['00:00:02'])) + sn = pd.to_timedelta(Series([pd.NaT])) + df1 = pd.DataFrame(['00:00:01']).apply(pd.to_timedelta) + df2 = pd.DataFrame(['00:00:02']).apply(pd.to_timedelta) + dfn = pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + scalar1 = pd.to_timedelta('00:00:01') + scalar2 = pd.to_timedelta('00:00:02') + timedelta_NaT = pd.to_timedelta('NaT') + NA = np.nan + + actual = scalar1 + scalar1 + assert actual == scalar2 + actual = scalar2 - scalar1 + assert actual == scalar1 + + actual = s1 + s1 + tm.assert_series_equal(actual, s2) + actual = s2 - s1 + tm.assert_series_equal(actual, s1) + + actual = s1 + scalar1 + tm.assert_series_equal(actual, s2) + actual = scalar1 + s1 + tm.assert_series_equal(actual, s2) + actual = s2 - scalar1 + tm.assert_series_equal(actual, s1) + actual = -scalar1 + s2 + tm.assert_series_equal(actual, s1) + + actual = s1 + timedelta_NaT + tm.assert_series_equal(actual, sn) + actual = timedelta_NaT + s1 + tm.assert_series_equal(actual, sn) + actual = s1 - timedelta_NaT + tm.assert_series_equal(actual, sn) + actual = -timedelta_NaT + s1 + tm.assert_series_equal(actual, sn) + + with pytest.raises(TypeError): + s1 + np.nan + with pytest.raises(TypeError): + np.nan + s1 + with pytest.raises(TypeError): + s1 - np.nan + with pytest.raises(TypeError): + -np.nan + s1 + + actual = s1 + pd.NaT + tm.assert_series_equal(actual, sn) + actual = s2 - pd.NaT + tm.assert_series_equal(actual, sn) + + actual = s1 + df1 + tm.assert_frame_equal(actual, df2) + actual = s2 - df1 + tm.assert_frame_equal(actual, df1) + actual = df1 + s1 + tm.assert_frame_equal(actual, df2) + actual = df2 - s1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + df1 + tm.assert_frame_equal(actual, df2) + actual = df2 - df1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + scalar1 + tm.assert_frame_equal(actual, df2) + actual = df2 - scalar1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + timedelta_NaT + tm.assert_frame_equal(actual, dfn) + actual = df1 - timedelta_NaT + tm.assert_frame_equal(actual, dfn) + + actual = df1 + NA + tm.assert_frame_equal(actual, dfn) + actual = df1 - NA + tm.assert_frame_equal(actual, dfn) + + actual = df1 + pd.NaT # NaT is datetime, not timedelta + tm.assert_frame_equal(actual, dfn) + actual = df1 - pd.NaT + tm.assert_frame_equal(actual, dfn) + + def test_add_overflow(self): + # see gh-14068 + msg = "too (big|large) to convert" + with tm.assert_raises_regex(OverflowError, msg): + to_timedelta(106580, 'D') + Timestamp('2000') + with tm.assert_raises_regex(OverflowError, msg): + Timestamp('2000') + to_timedelta(106580, 'D') + + _NaT = int(pd.NaT) + 1 + msg = "Overflow in int64 addition" + with tm.assert_raises_regex(OverflowError, msg): + to_timedelta([106580], 'D') + Timestamp('2000') + with tm.assert_raises_regex(OverflowError, msg): + Timestamp('2000') + to_timedelta([106580], 'D') + with tm.assert_raises_regex(OverflowError, msg): + to_timedelta([_NaT]) - Timedelta('1 days') + with tm.assert_raises_regex(OverflowError, msg): + to_timedelta(['5 days', _NaT]) - Timedelta('1 days') + with tm.assert_raises_regex(OverflowError, msg): + (to_timedelta([_NaT, '5 days', '1 hours']) - + to_timedelta(['7 seconds', _NaT, '4 hours'])) + + # These should not overflow! + exp = TimedeltaIndex([pd.NaT]) + result = to_timedelta([pd.NaT]) - Timedelta('1 days') + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(['4 days', pd.NaT]) + result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days') + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours']) + result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + + to_timedelta(['7 seconds', pd.NaT, '4 hours'])) + tm.assert_index_equal(result, exp) + + def test_timedeltaindex_add_timestamp_nat_masking(self): + # GH17991 checking for overflow-masking with NaT + tdinat = pd.to_timedelta(['24658 days 11:15:00', 'NaT']) + + tsneg = Timestamp('1950-01-01') + ts_neg_variants = [tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype('datetime64[ns]'), + tsneg.to_datetime64().astype('datetime64[D]')] + + tspos = Timestamp('1980-01-01') + ts_pos_variants = [tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype('datetime64[ns]'), + tspos.to_datetime64().astype('datetime64[D]')] + + for variant in ts_neg_variants + ts_pos_variants: + res = tdinat + variant + assert res[1] is pd.NaT + + def test_tdi_ops_attributes(self): + rng = timedelta_range('2 days', periods=5, freq='2D', name='x') + + result = rng + 1 + exp = timedelta_range('4 days', periods=5, freq='2D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '2D' + + result = rng - 2 + exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '2D' + + result = rng * 2 + exp = timedelta_range('4 days', periods=5, freq='4D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '4D' + + result = rng / 2 + exp = timedelta_range('1 days', periods=5, freq='D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == 'D' + + result = -rng + exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') + tm.assert_index_equal(result, exp) + assert result.freq == '-2D' + + rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') + + result = abs(rng) + exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', + '2 days'], name='x') + tm.assert_index_equal(result, exp) + assert result.freq is None + + # TODO: Needs more informative name, probably split up into + # more targeted tests + def test_timedelta(self, freq): + index = date_range('1/1/2000', periods=50, freq=freq) + + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + tm.assert_index_equal(index, back) + + if freq == 'D': + expected = pd.tseries.offsets.Day(1) + assert index.freq == expected + assert shifted.freq == expected + assert back.freq == expected + else: # freq == 'B' + assert index.freq == pd.tseries.offsets.BusinessDay(1) + assert shifted.freq is None + assert back.freq == pd.tseries.offsets.BusinessDay(1) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + # GH4134, buggy with timedeltas + rng = date_range('2013', '2014') + s = Series(rng) + result1 = rng - pd.offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - pd.offsets.Hour(1)) + tm.assert_index_equal(result1, result4) + tm.assert_index_equal(result2, result3) diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 88e7b1387feff..329f0c2467e8b 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -1,30 +1,40 @@ +from datetime import timedelta + +import pytest + import numpy as np -import pandas as pd import pandas.util.testing as tm from pandas import (TimedeltaIndex, timedelta_range, Int64Index, Float64Index, - Index, Timedelta, Series) - -from ..datetimelike import DatetimeLike + Index, Timedelta, NaT) -class TestTimedeltaIndex(DatetimeLike, tm.TestCase): - _holder = TimedeltaIndex - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=tm.makeTimedeltaIndex(10)) - self.setup_indices() +class TestTimedeltaIndex(object): + def test_astype_object(self): + idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), + Timedelta('3 days'), Timedelta('4 days')] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name='idx') + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list - def create_index(self): - return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + def test_astype_object_with_nat(self): + idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), NaT, + timedelta(days=4)], name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), NaT, + Timedelta('4 days')] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name='idx') + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list def test_astype(self): # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) result = idx.astype(object) - expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, + expected = Index([Timedelta('1 days 03:46:40')] + [NaT] * 3, dtype=object) tm.assert_index_equal(result, expected) @@ -33,15 +43,18 @@ def test_astype(self): dtype=np.int64) tm.assert_index_equal(result, expected) - rng = timedelta_range('1 days', periods=10) + result = idx.astype(str) + expected = Index(str(x) for x in idx) + tm.assert_index_equal(result, expected) + rng = timedelta_range('1 days', periods=10) result = rng.astype('i8') - self.assert_index_equal(result, Index(rng.asi8)) - self.assert_numpy_array_equal(rng.asi8, result.values) + tm.assert_index_equal(result, Index(rng.asi8)) + tm.assert_numpy_array_equal(rng.asi8, result.values) def test_astype_timedelta64(self): # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) result = idx.astype('timedelta64') expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') @@ -49,73 +62,17 @@ def test_astype_timedelta64(self): result = idx.astype('timedelta64[ns]') tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) + assert result is not idx result = idx.astype('timedelta64[ns]', copy=False) tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) + assert result is idx - def test_astype_raises(self): + @pytest.mark.parametrize('dtype', [ + float, 'datetime64', 'datetime64[ns]']) + def test_astype_raises(self, dtype): # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') - - def test_pickle_compat_construction(self): - pass - - def test_shift(self): - # test shift for TimedeltaIndex - # err8083 - - drange = self.create_index() - result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', - '8 days 01:00:03', '9 days 01:00:03', - '10 days 01:00:03'], freq='D') - self.assert_index_equal(result, expected) - - def test_numeric_compat(self): - - idx = self._holder(np.arange(5, dtype='int64')) - didx = self._holder(np.arange(5, dtype='int64') ** 2) - result = idx * 1 - tm.assert_index_equal(result, idx) - - result = 1 * idx - tm.assert_index_equal(result, idx) - - result = idx / 1 - tm.assert_index_equal(result, idx) - - result = idx // 1 - tm.assert_index_equal(result, idx) - - result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, - self._holder(np.arange(5, dtype='int64') * 5)) - - result = idx * np.arange(5, dtype='int64') - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='int64')) - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - tm.assert_index_equal(result, self._holder(np.arange( - 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) - - # invalid - self.assertRaises(TypeError, lambda: idx * idx) - self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) + msg = 'Cannot cast TimedeltaIndex to dtype' + with tm.assert_raises_regex(TypeError, msg): + idx.astype(dtype) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 0810b13eb0f53..68dc0003e2312 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -1,15 +1,14 @@ +import pytest + import numpy as np from datetime import timedelta import pandas as pd import pandas.util.testing as tm -from pandas import TimedeltaIndex, timedelta_range, tslib, to_timedelta - -iNaT = tslib.iNaT +from pandas import TimedeltaIndex, timedelta_range, to_timedelta -class TestTimedeltaIndex(tm.TestCase): - _multiprocess_can_split_ = True +class TestTimedeltaIndex(object): def test_construction_base_constructor(self): arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] @@ -48,41 +47,42 @@ def test_constructor(self): def test_constructor_coverage(self): rng = timedelta_range('1 days', periods=10.5) exp = timedelta_range('1 days', periods=10) - self.assert_index_equal(rng, exp) + tm.assert_index_equal(rng, exp) - self.assertRaises(ValueError, TimedeltaIndex, start='1 days', - periods='foo', freq='D') + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + TimedeltaIndex(start='1 days', periods='foo', freq='D') - self.assertRaises(ValueError, TimedeltaIndex, start='1 days', - end='10 days') + pytest.raises(ValueError, TimedeltaIndex, start='1 days', + end='10 days') - self.assertRaises(ValueError, TimedeltaIndex, '1 days') + pytest.raises(ValueError, TimedeltaIndex, '1 days') # generator expression gen = (timedelta(i) for i in range(10)) result = TimedeltaIndex(gen) expected = TimedeltaIndex([timedelta(i) for i in range(10)]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # NumPy string array strings = np.array(['1 days', '2 days', '3 days']) result = TimedeltaIndex(strings) expected = to_timedelta([1, 2, 3], unit='d') - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) from_ints = TimedeltaIndex(expected.asi8) - self.assert_index_equal(from_ints, expected) + tm.assert_index_equal(from_ints, expected) # non-conforming freq - self.assertRaises(ValueError, TimedeltaIndex, - ['1 days', '2 days', '4 days'], freq='D') + pytest.raises(ValueError, TimedeltaIndex, + ['1 days', '2 days', '4 days'], freq='D') - self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D') + pytest.raises(ValueError, TimedeltaIndex, periods=10, freq='D') def test_constructor_name(self): idx = TimedeltaIndex(start='1 days', periods=1, freq='D', name='TEST') - self.assertEqual(idx.name, 'TEST') + assert idx.name == 'TEST' # GH10025 idx2 = TimedeltaIndex(idx, name='something else') - self.assertEqual(idx2.name, 'something else') + assert idx2.name == 'something else' diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py new file mode 100644 index 0000000000000..a8375459d74e4 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +import pytest + +import pandas as pd +from pandas import TimedeltaIndex + + +class TestTimedeltaIndexRendering(object): + @pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__']) + def test_representation(self, method): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" + + exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " + "freq='D')") + + exp3 = ("TimedeltaIndex(['1 days', '2 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " + "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = getattr(idx, method)() + assert result == expected + + def test_representation_to_series(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """Series([], dtype: timedelta64[ns])""" + + exp2 = ("0 1 days\n" + "dtype: timedelta64[ns]") + + exp3 = ("0 1 days\n" + "1 2 days\n" + "dtype: timedelta64[ns]") + + exp4 = ("0 1 days\n" + "1 2 days\n" + "2 3 days\n" + "dtype: timedelta64[ns]") + + exp5 = ("0 1 days 00:00:01\n" + "1 2 days 00:00:00\n" + "2 3 days 00:00:00\n" + "dtype: timedelta64[ns]") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = repr(pd.Series(idx)) + assert result == expected + + def test_summary(self): + # GH#9116 + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = ("TimedeltaIndex: 0 entries\n" + "Freq: D") + + exp2 = ("TimedeltaIndex: 1 entries, 1 days to 1 days\n" + "Freq: D") + + exp3 = ("TimedeltaIndex: 2 entries, 1 days to 2 days\n" + "Freq: D") + + exp4 = ("TimedeltaIndex: 3 entries, 1 days to 3 days\n" + "Freq: D") + + exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " + "00:00:00") + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = idx.summary() + assert result == expected diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index b4a8bc79921bf..08992188265bd 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -1,11 +1,157 @@ from datetime import timedelta +import pytest +import numpy as np + +import pandas as pd import pandas.util.testing as tm from pandas import TimedeltaIndex, timedelta_range, compat, Index, Timedelta -class TestTimedeltaIndex(tm.TestCase): - _multiprocess_can_split_ = True +class TestGetItem(object): + def test_getitem(self): + idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx[0] + assert result == Timedelta('1 day') + + result = idx[0:5] + expected = timedelta_range('1 day', '5 day', freq='D', + name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[0:10:2] + expected = timedelta_range('1 day', '9 day', freq='2D', + name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[-20:-5:3] + expected = timedelta_range('12 day', '24 day', freq='3D', + name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[4::-1] + expected = TimedeltaIndex(['5 day', '4 day', '3 day', + '2 day', '1 day'], + freq='-1D', name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + +class TestWhere(object): + # placeholder for symmetry with DatetimeIndex and PeriodIndex tests + pass + + +class TestTake(object): + def test_take(self): + # GH 10295 + idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx.take([0]) + assert result == Timedelta('1 day') + + result = idx.take([-1]) + assert result == Timedelta('31 day') + + result = idx.take([0, 1, 2]) + expected = timedelta_range('1 day', '3 day', freq='D', + name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = timedelta_range('1 day', '5 day', freq='2D', + name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([7, 4, 1]) + expected = timedelta_range('8 day', '2 day', freq='-3D', + name='idx') + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([3, 2, 5]) + expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') + tm.assert_index_equal(result, expected) + assert result.freq is None + + result = idx.take([-3, 2, 5]) + expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_take_invalid_kwargs(self): + idx = timedelta_range('1 day', '31 day', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + # TODO: This method came from test_timedelta; de-dup with version above + def test_take2(self): + tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] + idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') + expected = TimedeltaIndex(tds, freq=None, name='idx') + + taken1 = idx.take([2, 4, 10]) + taken2 = idx[[2, 4, 10]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, TimedeltaIndex) + assert taken.freq is None + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH 12631 + idx = TimedeltaIndex(['1 days', '2 days', '3 days'], + name='xxx') + result = idx.take(np.array([1, 0, -1])) + expected = TimedeltaIndex(['2 days', '1 days', '3 days'], + name='xxx') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = TimedeltaIndex(['2 days', '1 days', 'NaT'], + name='xxx') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = TimedeltaIndex(['2 days', '1 days', '3 days'], + name='xxx') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +class TestTimedeltaIndex(object): def test_insert(self): @@ -13,15 +159,15 @@ def test_insert(self): result = idx.insert(2, timedelta(days=5)) exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') - self.assert_index_equal(result, exp) + tm.assert_index_equal(result, exp) # insertion of non-datetime should coerce to object index result = idx.insert(1, 'inserted') expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), Timedelta('2day')], name='idx') - self.assertNotIsInstance(result, TimedeltaIndex) + assert not isinstance(result, TimedeltaIndex) tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) + assert result.name == expected.name idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') @@ -49,9 +195,15 @@ def test_insert(self): for n, d, expected in cases: result = idx.insert(n, d) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + # GH 18295 (test missing) + expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day']) + for na in (np.nan, pd.NaT, None): + result = timedelta_range('1day', '3day').insert(1, na) + tm.assert_index_equal(result, expected) def test_delete(self): idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') @@ -73,11 +225,11 @@ def test_delete(self): 1: expected_1} for n, expected in compat.iteritems(cases): result = idx.delete(n) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq - with tm.assertRaises((IndexError, ValueError)): + with pytest.raises((IndexError, ValueError)): # either depeidnig on numpy version result = idx.delete(5) @@ -100,11 +252,71 @@ def test_delete_slice(self): (3, 4, 5): expected_3_5} for n, expected in compat.iteritems(cases): result = idx.delete(n) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq result = idx.delete(slice(n[0], n[-1] + 1)) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + def test_get_loc(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + + for method in [None, 'pad', 'backfill', 'nearest']: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + assert idx.get_loc(idx[1], 'pad', + tolerance=Timedelta(0)) == 1 + assert idx.get_loc(idx[1], 'pad', + tolerance=np.timedelta64(0, 's')) == 1 + assert idx.get_loc(idx[1], 'pad', + tolerance=timedelta(0)) == 1 + + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): + idx.get_loc(idx[1], method='nearest', tolerance='foo') + + with pytest.raises( + ValueError, + match='tolerance size must match'): + idx.get_loc(idx[1], method='nearest', + tolerance=[Timedelta(0).to_timedelta64(), + Timedelta(0).to_timedelta64()]) + + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: + assert idx.get_loc('1 day 1 hour', method) == loc + + # GH 16909 + assert idx.get_loc(idx[1].to_timedelta64()) == 1 + + # GH 16896 + assert idx.get_loc('0 days') == 0 + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) + + assert tidx.get_loc(pd.NaT) == 1 + assert tidx.get_loc(None) == 1 + assert tidx.get_loc(float('nan')) == 1 + assert tidx.get_loc(np.nan) == 1 + + def test_get_indexer(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + + res = idx.get_indexer(target, 'nearest', + tolerance=Timedelta('1 hour')) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 406a5bdbf3bcd..49737e5359c2f 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -1,517 +1,77 @@ +import pytest + import numpy as np from datetime import timedelta -from distutils.version import LooseVersion import pandas as pd import pandas.util.testing as tm from pandas import to_timedelta -from pandas.util.testing import assert_series_equal, assert_frame_equal -from pandas import (Series, Timedelta, DataFrame, Timestamp, TimedeltaIndex, - timedelta_range, date_range, DatetimeIndex, Int64Index, - _np_version_under1p10, Float64Index, Index, tslib) - +from pandas import (Series, Timedelta, Timestamp, TimedeltaIndex, + timedelta_range, + _np_version_under1p10) +from pandas._libs.tslib import iNaT from pandas.tests.test_base import Ops class TestTimedeltaIndexOps(Ops): - def setUp(self): - super(TestTimedeltaIndexOps, self).setUp() + def setup_method(self, method): + super(TestTimedeltaIndexOps, self).setup_method(method) mask = lambda x: isinstance(x, TimedeltaIndex) self.is_valid_objs = [o for o in self.objs if mask(o)] self.not_valid_objs = [] def test_ops_properties(self): - self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', - 'milliseconds']) - self.check_ops_properties(['microseconds', 'nanoseconds']) - - def test_asobject_tolist(self): - idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), - Timedelta('3 days'), Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, - timedelta(days=4)], name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, - Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) + f = lambda x: isinstance(x, TimedeltaIndex) + self.check_ops_properties(TimedeltaIndex._field_ops, f) + self.check_ops_properties(TimedeltaIndex._object_ops, f) def test_minmax(self): # monotonic idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) - self.assertTrue(idx1.is_monotonic) + assert idx1.is_monotonic # non-monotonic idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) - self.assertFalse(idx2.is_monotonic) + assert not idx2.is_monotonic for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timedelta('1 days')), - self.assertEqual(idx.max(), Timedelta('3 days')), - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) + assert idx.min() == Timedelta('1 days') + assert idx.max() == Timedelta('3 days') + assert idx.argmin() == 0 + assert idx.argmax() == 2 for op in ['min', 'max']: # Return NaT obj = TimedeltaIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isna(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isna(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') td = TimedeltaIndex(np.asarray(dr)) - self.assertEqual(np.min(td), Timedelta('16815 days')) - self.assertEqual(np.max(td), Timedelta('16820 days')) + assert np.min(td) == Timedelta('16815 days') + assert np.max(td) == Timedelta('16820 days') errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.min, td, out=0) + tm.assert_raises_regex(ValueError, errmsg, np.max, td, out=0) - self.assertEqual(np.argmin(td), 0) - self.assertEqual(np.argmax(td), 5) + assert np.argmin(td) == 0 + assert np.argmax(td) == 5 if not _np_version_under1p10: errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) - - def test_round(self): - td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') - elt = td[1] - - expected_rng = TimedeltaIndex([ - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 01:00:00'), - Timedelta('16801 days 02:00:00'), - Timedelta('16801 days 02:00:00'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(td.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - td.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - - def test_representation(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" - - exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " - "freq='D')") - - exp3 = ("TimedeltaIndex(['1 days', '2 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " - "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) - - def test_representation_to_series(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """Series([], dtype: timedelta64[ns])""" - - exp2 = """0 1 days -dtype: timedelta64[ns]""" - - exp3 = """0 1 days -1 2 days -dtype: timedelta64[ns]""" - - exp4 = """0 1 days -1 2 days -2 3 days -dtype: timedelta64[ns]""" - - exp5 = """0 1 days 00:00:01 -1 2 days 00:00:00 -2 3 days 00:00:00 -dtype: timedelta64[ns]""" - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = repr(pd.Series(idx)) - self.assertEqual(result, expected) - - def test_summary(self): - # GH9116 - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex: 0 entries -Freq: D""" - - exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days -Freq: D""" - - exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days -Freq: D""" - - exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days -Freq: D""" - - exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " - "00:00:00") - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = idx.summary() - self.assertEqual(result, expected) - - def test_add_iadd(self): - - # only test adding/sub offsets as + is now numeric - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = timedelta_range('1 days', '10 days') - result = rng + delta - expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', - freq='D') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - # int - rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) - result = rng + 1 - expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - def test_sub_isub(self): - # only test adding/sub offsets as - is now numeric - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = timedelta_range('1 days', '10 days') - result = rng - delta - expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - # int - rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) - result = rng - 1 - expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - idx = TimedeltaIndex(['1 day', '2 day']) - msg = "cannot subtract a datelike from a TimedeltaIndex" - with tm.assertRaisesRegexp(TypeError, msg): - idx - Timestamp('2011-01-01') - - result = Timestamp('2011-01-01') + idx - expected = DatetimeIndex(['2011-01-02', '2011-01-03']) - tm.assert_index_equal(result, expected) - - def test_ops_compat(self): - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - rng = timedelta_range('1 days', '10 days', name='foo') - - # multiply - for offset in offsets: - self.assertRaises(TypeError, lambda: rng * offset) - - # divide - expected = Int64Index((np.arange(10) + 1) * 12, name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected, exact=False) - - # divide with nats - rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - expected = Float64Index([12, np.nan, 24], name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected) - - # don't allow division by NaT (make could in the future) - self.assertRaises(TypeError, lambda: rng / pd.NaT) - - def test_subtraction_ops(self): - - # with datetimes/timedelta and tdi/dti - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - td = Timedelta('1 days') - dt = Timestamp('20130101') - - self.assertRaises(TypeError, lambda: tdi - dt) - self.assertRaises(TypeError, lambda: tdi - dti) - self.assertRaises(TypeError, lambda: td - dt) - self.assertRaises(TypeError, lambda: td - dti) - - result = dt - dti - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') - tm.assert_index_equal(result, expected) - - result = dti - dt - expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar') - tm.assert_index_equal(result, expected) - - result = tdi - td - expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo') - tm.assert_index_equal(result, expected, check_names=False) - - result = td - tdi - expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo') - tm.assert_index_equal(result, expected, check_names=False) - - result = dti - td - expected = DatetimeIndex( - ['20121231', '20130101', '20130102'], name='bar') - tm.assert_index_equal(result, expected, check_names=False) - - result = dt - tdi - expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo') - tm.assert_index_equal(result, expected) - - def test_subtraction_ops_with_tz(self): - - # check that dt/dti subtraction ops with tz are validated - dti = date_range('20130101', periods=3) - ts = Timestamp('20130101') - dt = ts.to_pydatetime() - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - ts_tz = Timestamp('20130101').tz_localize('US/Eastern') - ts_tz2 = Timestamp('20130101').tz_localize('CET') - dt_tz = ts_tz.to_pydatetime() - td = Timedelta('1 days') - - def _check(result, expected): - self.assertEqual(result, expected) - self.assertIsInstance(result, Timedelta) - - # scalars - result = ts - ts - expected = Timedelta('0 days') - _check(result, expected) - - result = dt_tz - ts_tz - expected = Timedelta('0 days') - _check(result, expected) - - result = ts_tz - dt_tz - expected = Timedelta('0 days') - _check(result, expected) - - # tz mismatches - self.assertRaises(TypeError, lambda: dt_tz - ts) - self.assertRaises(TypeError, lambda: dt_tz - dt) - self.assertRaises(TypeError, lambda: dt_tz - ts_tz2) - self.assertRaises(TypeError, lambda: dt - dt_tz) - self.assertRaises(TypeError, lambda: ts - dt_tz) - self.assertRaises(TypeError, lambda: ts_tz2 - ts) - self.assertRaises(TypeError, lambda: ts_tz2 - dt) - self.assertRaises(TypeError, lambda: ts_tz - ts_tz2) - - # with dti - self.assertRaises(TypeError, lambda: dti - ts_tz) - self.assertRaises(TypeError, lambda: dti_tz - ts) - self.assertRaises(TypeError, lambda: dti_tz - ts_tz2) - - result = dti_tz - dt_tz - expected = TimedeltaIndex(['0 days', '1 days', '2 days']) - tm.assert_index_equal(result, expected) - - result = dt_tz - dti_tz - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) - tm.assert_index_equal(result, expected) - - result = dti_tz - ts_tz - expected = TimedeltaIndex(['0 days', '1 days', '2 days']) - tm.assert_index_equal(result, expected) - - result = ts_tz - dti_tz - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) - tm.assert_index_equal(result, expected) - - result = td - td - expected = Timedelta('0 days') - _check(result, expected) - - result = dti_tz - td - expected = DatetimeIndex( - ['20121231', '20130101', '20130102'], tz='US/Eastern') - tm.assert_index_equal(result, expected) - - def test_dti_tdi_numeric_ops(self): - - # These are normally union/diff set-like ops - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - - # TODO(wesm): unused? - # td = Timedelta('1 days') - # dt = Timestamp('20130101') - - result = tdi - tdi - expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = tdi + tdi - expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = dti - tdi # name will be reset - expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) - tm.assert_index_equal(result, expected) - - def test_sub_period(self): - # GH 13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - for freq in [None, 'H']: - idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) - - with tm.assertRaises(TypeError): - idx - p - - with tm.assertRaises(TypeError): - p - idx - - def test_addition_ops(self): - - # with datetimes/timedelta and tdi/dti - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - td = Timedelta('1 days') - dt = Timestamp('20130101') - - result = tdi + dt - expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') - tm.assert_index_equal(result, expected) - - result = dt + tdi - expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') - tm.assert_index_equal(result, expected) - - result = td + tdi - expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = tdi + td - expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') - tm.assert_index_equal(result, expected) - - # unequal length - self.assertRaises(ValueError, lambda: tdi + dti[0:1]) - self.assertRaises(ValueError, lambda: tdi[0:1] + dti) - - # random indexes - self.assertRaises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) - - # this is a union! - # self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) - - result = tdi + dti # name will be reset - expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) - tm.assert_index_equal(result, expected) - - result = dti + tdi # name will be reset - expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) - tm.assert_index_equal(result, expected) - - result = dt + td - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - result = td + dt - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - def test_comp_nat(self): - left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')]) - right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) - - for l, r in [(left, right), (left.asobject, right.asobject)]: - result = l == r - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = l != r - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == r, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(l != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != l, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > l, expected) + tm.assert_raises_regex( + ValueError, errmsg, np.argmin, td, out=0) + tm.assert_raises_regex( + ValueError, errmsg, np.argmax, td, out=0) def test_value_counts_unique(self): # GH 7735 @@ -553,14 +113,14 @@ def test_nonunique_contains(self): for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], ['00:01:00', '00:01:00', '00:02:00'], ['00:01:00', '00:01:00', '00:00:01'])): - tm.assertIn(idx[0], idx) + assert idx[0] in idx def test_unknown_attribute(self): - # GH 9680 + # see gh-9680 tdi = pd.timedelta_range(start=0, periods=10, freq='1s') ts = pd.Series(np.random.normal(size=10), index=tdi) - self.assertNotIn('foo', ts.__dict__.keys()) - self.assertRaises(AttributeError, lambda: ts.foo) + assert 'foo' not in ts.__dict__.keys() + pytest.raises(AttributeError, lambda: ts.foo) def test_order(self): # GH 10295 @@ -571,27 +131,26 @@ def test_order(self): for idx in [idx1, idx2]: ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) + tm.assert_index_equal(ordered, idx) + assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) + tm.assert_index_equal(ordered, expected) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), + check_dtype=False) + assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - self.assert_index_equal(ordered, idx[::-1]) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) + tm.assert_index_equal(ordered, idx[::-1]) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', '2 hour ', '1 hour'], name='idx1') @@ -612,72 +171,40 @@ def test_order(self): for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) + tm.assert_index_equal(ordered, expected) + assert ordered.freq is None ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) + tm.assert_index_equal(ordered, expected[::-1]) + assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) + tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - self.assert_index_equal(ordered, expected[::-1]) + tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - def test_getitem(self): - idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - - for idx in [idx1]: - result = idx[0] - self.assertEqual(result, pd.Timedelta('1 day')) - - result = idx[0:5] - expected = pd.timedelta_range('1 day', '5 day', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[0:10:2] - expected = pd.timedelta_range('1 day', '9 day', freq='2D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[-20:-5:3] - expected = pd.timedelta_range('12 day', '24 day', freq='3D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[4::-1] - expected = TimedeltaIndex(['5 day', '4 day', '3 day', - '2 day', '1 day'], - freq='-1D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq is None def test_drop_duplicates_metadata(self): # GH 10115 idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') result = idx.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset + assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertIsNone(result.freq) + tm.assert_index_equal(idx, result) + assert result.freq is None def test_drop_duplicates(self): # to check Index/Series compat @@ -700,69 +227,15 @@ def test_drop_duplicates(self): res = Series(idx).drop_duplicates(keep=False) tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - def test_take(self): - # GH 10295 - idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - - for idx in [idx1]: - result = idx.take([0]) - self.assertEqual(result, pd.Timedelta('1 day')) - - result = idx.take([-1]) - self.assertEqual(result, pd.Timedelta('31 day')) - - result = idx.take([0, 1, 2]) - expected = pd.timedelta_range('1 day', '3 day', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.timedelta_range('1 day', '5 day', freq='2D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([7, 4, 1]) - expected = pd.timedelta_range('8 day', '2 day', freq='-3D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([3, 2, 5]) - expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - result = idx.take([-3, 2, 5]) - expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - def test_take_invalid_kwargs(self): - idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - indices = [1, 6, 5, 9, 10, 13, 15, 3] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') - - def test_infer_freq(self): - # GH 11018 - for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' - ]: - idx = pd.timedelta_range('1', freq=freq, periods=10) - result = pd.TimedeltaIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) + @pytest.mark.parametrize('freq', ['D', '3D', '-3D', + 'H', '2H', '-2H', + 'T', '2T', 'S', '-3S']) + def test_infer_freq(self, freq): + # GH#11018 + idx = pd.timedelta_range('1', freq=freq, periods=10) + result = pd.TimedeltaIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + assert result.freq == freq def test_nat_new(self): @@ -772,36 +245,18 @@ def test_nat_new(self): tm.assert_index_equal(result, exp) result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + exp = np.array([iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) def test_shift(self): - # GH 9903 - idx = pd.TimedeltaIndex([], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - tm.assert_index_equal(idx.shift(0, freq='T'), idx) - exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], - name='xxx') - tm.assert_index_equal(idx.shift(3, freq='T'), exp) - exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], - name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='T'), exp) + pass # handled in test_arithmetic.py def test_repeat(self): index = pd.timedelta_range('1 days', periods=2, freq='D') exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) + assert res.freq is None index = TimedeltaIndex(['1 days', 'NaT', '3 days']) exp = TimedeltaIndex(['1 days', '1 days', '1 days', @@ -809,206 +264,50 @@ def test_repeat(self): '3 days', '3 days', '3 days']) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) + assert res.freq is None def test_nat(self): - self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) - self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT) + assert pd.TimedeltaIndex._na_value is pd.NaT + assert pd.TimedeltaIndex([])._na_value is pd.NaT idx = pd.TimedeltaIndex(['1 days', '2 days']) - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) + assert not idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = pd.TimedeltaIndex(['1 days', 'NaT']) - self.assertTrue(idx._can_hold_na) + assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) + assert idx.hasnans tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.asobject.equals(idx2.asobject)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) - - -class TestTimedeltas(tm.TestCase): - _multiprocess_can_split_ = True - - def test_ops(self): - - td = Timedelta(10, unit='d') - self.assertEqual(-td, Timedelta(-10, unit='d')) - self.assertEqual(+td, Timedelta(10, unit='d')) - self.assertEqual(td - td, Timedelta(0, unit='ns')) - self.assertTrue((td - pd.NaT) is pd.NaT) - self.assertEqual(td + td, Timedelta(20, unit='d')) - self.assertTrue((td + pd.NaT) is pd.NaT) - self.assertEqual(td * 2, Timedelta(20, unit='d')) - self.assertTrue((td * pd.NaT) is pd.NaT) - self.assertEqual(td / 2, Timedelta(5, unit='d')) - self.assertEqual(abs(td), td) - self.assertEqual(abs(-td), td) - self.assertEqual(td / td, 1) - self.assertTrue((td / pd.NaT) is np.nan) - - # invert - self.assertEqual(-td, Timedelta('-10d')) - self.assertEqual(td * -1, Timedelta('-10d')) - self.assertEqual(-1 * td, Timedelta('-10d')) - self.assertEqual(abs(-td), Timedelta('10d')) - - # invalid - self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) - - # invalid multiply with another timedelta - self.assertRaises(TypeError, lambda: td * td) - - # can't operate with integers - self.assertRaises(TypeError, lambda: td + 2) - self.assertRaises(TypeError, lambda: td - 2) - - def test_ops_offsets(self): - td = Timedelta(10, unit='d') - self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) - self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) - self.assertEqual(240, td / pd.offsets.Hour(1)) - self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) - self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) - self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) - - def test_ops_ndarray(self): - td = Timedelta('1 day') - - # timedelta, timedelta - other = pd.to_timedelta(['1 day']).values - expected = pd.to_timedelta(['2 days']).values - self.assert_numpy_array_equal(td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other + td, expected) - self.assertRaises(TypeError, lambda: td + np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) + td) - - expected = pd.to_timedelta(['0 days']).values - self.assert_numpy_array_equal(td - other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(-other + td, expected) - self.assertRaises(TypeError, lambda: td - np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) - td) - - expected = pd.to_timedelta(['2 days']).values - self.assert_numpy_array_equal(td * np.array([2]), expected) - self.assert_numpy_array_equal(np.array([2]) * td, expected) - self.assertRaises(TypeError, lambda: td * other) - self.assertRaises(TypeError, lambda: other * td) - - self.assert_numpy_array_equal(td / other, - np.array([1], dtype=np.float64)) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other / td, - np.array([1], dtype=np.float64)) - - # timedelta, datetime - other = pd.to_datetime(['2000-01-01']).values - expected = pd.to_datetime(['2000-01-02']).values - self.assert_numpy_array_equal(td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other + td, expected) - - expected = pd.to_datetime(['1999-12-31']).values - self.assert_numpy_array_equal(-td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other - td, expected) - - def test_ops_series(self): - # regression test for GH8813 - td = Timedelta('1 day') - other = pd.Series([1, 2]) - expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) - tm.assert_series_equal(expected, td * other) - tm.assert_series_equal(expected, other * td) - - def test_ops_series_object(self): - # GH 13043 - s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), - pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], - name='xxx') - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), - pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - # object series & object series - s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), - pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], - name='xxx') - self.assertEqual(s2.dtype, object) - exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], - name='xxx') - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], - name='xxx', dtype=object) - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) - tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) - - def test_ops_notimplemented(self): - class Other: - pass - - other = Other() - - td = Timedelta('1 day') - self.assertTrue(td.__add__(other) is NotImplemented) - self.assertTrue(td.__sub__(other) is NotImplemented) - self.assertTrue(td.__truediv__(other) is NotImplemented) - self.assertTrue(td.__mul__(other) is NotImplemented) - self.assertTrue(td.__floordiv__(td) is NotImplemented) - - def test_ops_error_str(self): - # GH 13624 - tdi = TimedeltaIndex(['1 day', '2 days']) - - for l, r in [(tdi, 'a'), ('a', tdi)]: - with tm.assertRaises(TypeError): - l + r - - with tm.assertRaises(TypeError): - l > r - - with tm.assertRaises(TypeError): - l == r - - with tm.assertRaises(TypeError): - l != r + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.astype(object).equals(idx2.astype(object)) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + +class TestTimedeltas(object): def test_timedelta_ops(self): # GH4984 @@ -1019,258 +318,48 @@ def test_timedelta_ops(self): result = td.mean() expected = to_timedelta(timedelta(seconds=9)) - self.assertEqual(result, expected) + assert result == expected result = td.to_frame().mean() - self.assertEqual(result[0], expected) + assert result[0] == expected result = td.quantile(.1) expected = Timedelta(np.timedelta64(2600, 'ms')) - self.assertEqual(result, expected) + assert result == expected result = td.median() expected = to_timedelta('00:00:09') - self.assertEqual(result, expected) + assert result == expected result = td.to_frame().median() - self.assertEqual(result[0], expected) + assert result[0] == expected # GH 6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta('00:01:21') - self.assertEqual(result, expected) + assert result == expected result = td.to_frame().sum() - self.assertEqual(result[0], expected) + assert result[0] == expected # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) - self.assertEqual(result, expected) + assert result == expected result = td.to_frame().std() - self.assertEqual(result[0], expected) + assert result[0] == expected # invalid ops for op in ['skew', 'kurt', 'sem', 'prod']: - self.assertRaises(TypeError, getattr(td, op)) + pytest.raises(TypeError, getattr(td, op)) # GH 10040 # make sure NaT is properly handled by median() s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) - self.assertEqual(s.diff().median(), timedelta(days=4)) + assert s.diff().median() == timedelta(days=4) s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15')]) - self.assertEqual(s.diff().median(), timedelta(days=6)) - - def test_timedelta_ops_scalar(self): - # GH 6808 - base = pd.to_datetime('20130101 09:01:12.123456') - expected_add = pd.to_datetime('20130101 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), - np.timedelta64(10, 's'), - np.timedelta64(10000000000, 'ns'), - pd.offsets.Second(10)]: - result = base + offset - self.assertEqual(result, expected_add) - - result = base - offset - self.assertEqual(result, expected_sub) - - base = pd.to_datetime('20130102 09:01:12.123456') - expected_add = pd.to_datetime('20130103 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta('1 day, 00:00:10'), - pd.to_timedelta('1 days, 00:00:10'), - timedelta(days=1, seconds=10), - np.timedelta64(1, 'D') + np.timedelta64(10, 's'), - pd.offsets.Day() + pd.offsets.Second(10)]: - result = base + offset - self.assertEqual(result, expected_add) - - result = base - offset - self.assertEqual(result, expected_sub) - - def test_timedelta_ops_with_missing_values(self): - # setup - s1 = pd.to_timedelta(Series(['00:00:01'])) - s2 = pd.to_timedelta(Series(['00:00:02'])) - sn = pd.to_timedelta(Series([pd.NaT])) - df1 = DataFrame(['00:00:01']).apply(pd.to_timedelta) - df2 = DataFrame(['00:00:02']).apply(pd.to_timedelta) - dfn = DataFrame([pd.NaT]).apply(pd.to_timedelta) - scalar1 = pd.to_timedelta('00:00:01') - scalar2 = pd.to_timedelta('00:00:02') - timedelta_NaT = pd.to_timedelta('NaT') - NA = np.nan - - actual = scalar1 + scalar1 - self.assertEqual(actual, scalar2) - actual = scalar2 - scalar1 - self.assertEqual(actual, scalar1) - - actual = s1 + s1 - assert_series_equal(actual, s2) - actual = s2 - s1 - assert_series_equal(actual, s1) - - actual = s1 + scalar1 - assert_series_equal(actual, s2) - actual = scalar1 + s1 - assert_series_equal(actual, s2) - actual = s2 - scalar1 - assert_series_equal(actual, s1) - actual = -scalar1 + s2 - assert_series_equal(actual, s1) - - actual = s1 + timedelta_NaT - assert_series_equal(actual, sn) - actual = timedelta_NaT + s1 - assert_series_equal(actual, sn) - actual = s1 - timedelta_NaT - assert_series_equal(actual, sn) - actual = -timedelta_NaT + s1 - assert_series_equal(actual, sn) - - actual = s1 + NA - assert_series_equal(actual, sn) - actual = NA + s1 - assert_series_equal(actual, sn) - actual = s1 - NA - assert_series_equal(actual, sn) - actual = -NA + s1 - assert_series_equal(actual, sn) - - actual = s1 + pd.NaT - assert_series_equal(actual, sn) - actual = s2 - pd.NaT - assert_series_equal(actual, sn) - - actual = s1 + df1 - assert_frame_equal(actual, df2) - actual = s2 - df1 - assert_frame_equal(actual, df1) - actual = df1 + s1 - assert_frame_equal(actual, df2) - actual = df2 - s1 - assert_frame_equal(actual, df1) - - actual = df1 + df1 - assert_frame_equal(actual, df2) - actual = df2 - df1 - assert_frame_equal(actual, df1) - - actual = df1 + scalar1 - assert_frame_equal(actual, df2) - actual = df2 - scalar1 - assert_frame_equal(actual, df1) - - actual = df1 + timedelta_NaT - assert_frame_equal(actual, dfn) - actual = df1 - timedelta_NaT - assert_frame_equal(actual, dfn) - - actual = df1 + NA - assert_frame_equal(actual, dfn) - actual = df1 - NA - assert_frame_equal(actual, dfn) - - actual = df1 + pd.NaT # NaT is datetime, not timedelta - assert_frame_equal(actual, dfn) - actual = df1 - pd.NaT - assert_frame_equal(actual, dfn) - - def test_compare_timedelta_series(self): - # regresssion test for GH5963 - s = pd.Series([timedelta(days=1), timedelta(days=2)]) - actual = s > timedelta(days=1) - expected = pd.Series([False, True]) - tm.assert_series_equal(actual, expected) - - def test_compare_timedelta_ndarray(self): - # GH11835 - periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] - arr = np.array(periods) - result = arr[0] > arr - expected = np.array([False, False]) - self.assert_numpy_array_equal(result, expected) - - -class TestSlicing(tm.TestCase): - - def test_tdi_ops_attributes(self): - rng = timedelta_range('2 days', periods=5, freq='2D', name='x') - - result = rng + 1 - exp = timedelta_range('4 days', periods=5, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - result = rng - 2 - exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - result = rng * 2 - exp = timedelta_range('4 days', periods=5, freq='4D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4D') - - result = rng / 2 - exp = timedelta_range('1 days', periods=5, freq='D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') - - result = -rng - exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2D') - - rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') - - result = abs(rng) - exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', - '2 days'], name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - - def test_add_overflow(self): - # see gh-14068 - msg = "too (big|large) to convert" - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta(106580, 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): - Timestamp('2000') + to_timedelta(106580, 'D') - - _NaT = int(pd.NaT) + 1 - msg = "Overflow in int64 addition" - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta([106580], 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): - Timestamp('2000') + to_timedelta([106580], 'D') - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta([_NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta(['5 days', _NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): - (to_timedelta([_NaT, '5 days', '1 hours']) - - to_timedelta(['7 seconds', _NaT, '4 hours'])) - - # These should not overflow! - exp = TimedeltaIndex([pd.NaT]) - result = to_timedelta([pd.NaT]) - Timedelta('1 days') - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex(['4 days', pd.NaT]) - result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days') - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours']) - result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + - to_timedelta(['7 seconds', pd.NaT, '4 hours'])) - tm.assert_index_equal(result, exp) + assert s.diff().median() == timedelta(days=6) diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index 0d46ee4172211..7c5f82193da6d 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np import pandas.util.testing as tm @@ -6,7 +8,11 @@ from pandas.util.testing import assert_series_equal -class TestSlicing(tm.TestCase): +class TestSlicing(object): + def test_slice_keeps_name(self): + # GH4226 + dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') + assert dr[1:].name == dr.name def test_partial_slice(self): rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) @@ -25,9 +31,9 @@ def test_partial_slice(self): assert_series_equal(result, expected) result = s['6 days, 23:11:12'] - self.assertEqual(result, s.iloc[133]) + assert result == s.iloc[133] - self.assertRaises(KeyError, s.__getitem__, '50 days') + pytest.raises(KeyError, s.__getitem__, '50 days') def test_partial_slice_high_reso(self): @@ -44,7 +50,7 @@ def test_partial_slice_high_reso(self): assert_series_equal(result, expected) result = s['1 days, 10:11:12.001001'] - self.assertEqual(result, s.iloc[1001]) + assert result == s.iloc[1001] def test_slice_with_negative_step(self): ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) @@ -73,9 +79,9 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py new file mode 100644 index 0000000000000..7d97e1fadea30 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" +Tests for TimedeltaIndex methods behaving like their Timedelta counterparts +""" + +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import timedelta_range, Timedelta, TimedeltaIndex, Index, Series + + +class TestVectorizedTimedelta(object): + def test_tdi_total_seconds(self): + # GH#10939 + # test index + rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, + freq='s') + expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, + 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] + tm.assert_almost_equal(rng.total_seconds(), Index(expt)) + + # test Series + ser = Series(rng) + s_expt = Series(expt, index=[0, 1]) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with nat + ser[1] = np.nan + s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 + + 12 + 100123456. / 1e9, np.nan], index=[0, 1]) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with both nat + ser = Series([np.nan, np.nan], dtype='timedelta64[ns]') + tm.assert_series_equal(ser.dt.total_seconds(), + Series([np.nan, np.nan], index=[0, 1])) + + def test_tdi_round(self): + td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') + elt = td[1] + + expected_rng = TimedeltaIndex([Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 01:00:00'), + Timedelta('16801 days 02:00:00'), + Timedelta('16801 days 02:00:00')]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq='H'), expected_rng) + assert elt.round(freq='H') == expected_elt + + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): + td.round(freq='foo') + with tm.assert_raises_regex(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + with tm.assert_raises_regex(ValueError, msg): + td.round(freq='M') + with tm.assert_raises_regex(ValueError, msg): + elt.round(freq='M') diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 9000fb3beb279..020e9079b3436 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -5,8 +5,7 @@ from pandas import TimedeltaIndex, timedelta_range, Int64Index -class TestTimedeltaIndex(tm.TestCase): - _multiprocess_can_split_ = True +class TestTimedeltaIndex(object): def test_union(self): @@ -14,7 +13,7 @@ def test_union(self): i2 = timedelta_range('3day', periods=5) result = i1.union(i2) expected = timedelta_range('1day', periods=7) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) i1 = Int64Index(np.arange(0, 20, 2)) i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') @@ -26,11 +25,11 @@ def test_union_coverage(self): idx = TimedeltaIndex(['3d', '1d', '2d']) ordered = TimedeltaIndex(idx.sort_values(), freq='infer') result = ordered.union(idx) - self.assert_index_equal(result, ordered) + tm.assert_index_equal(result, ordered) result = ordered[:0].union(ordered) - self.assert_index_equal(result, ordered) - self.assertEqual(result.freq, ordered.freq) + tm.assert_index_equal(result, ordered) + assert result.freq == ordered.freq def test_union_bug_1730(self): @@ -39,7 +38,7 @@ def test_union_bug_1730(self): result = rng_a.union(rng_b) exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) - self.assert_index_equal(result, exp) + tm.assert_index_equal(result, exp) def test_union_bug_1745(self): @@ -50,7 +49,7 @@ def test_union_bug_1745(self): result = left.union(right) exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) - self.assert_index_equal(result, exp) + tm.assert_index_equal(result, exp) def test_union_bug_4564(self): @@ -59,14 +58,14 @@ def test_union_bug_4564(self): result = left.union(right) exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) - self.assert_index_equal(result, exp) + tm.assert_index_equal(result, exp) def test_intersection_bug_1708(self): index_1 = timedelta_range('1 day', periods=4, freq='h') index_2 = index_1 + pd.offsets.Hour(5) result = index_1 & index_2 - self.assertEqual(len(result), 0) + assert len(result) == 0 index_1 = timedelta_range('1 day', periods=4, freq='h') index_2 = index_1 + pd.offsets.Hour(1) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 4c8571e4f08f9..37db9d704aa1f 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -1,11 +1,13 @@ +import pytest + import numpy as np from datetime import timedelta import pandas as pd import pandas.util.testing as tm from pandas import (timedelta_range, date_range, Series, Timedelta, - DatetimeIndex, TimedeltaIndex, Index, DataFrame, - Int64Index, _np_version_under1p8) + TimedeltaIndex, Index, DataFrame, + Int64Index) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_index_equal) @@ -14,168 +16,40 @@ randn = np.random.randn -class TestTimedeltaIndex(DatetimeLike, tm.TestCase): +class TestTimedeltaIndex(DatetimeLike): _holder = TimedeltaIndex - _multiprocess_can_split_ = True - def setUp(self): + def setup_method(self, method): self.indices = dict(index=tm.makeTimedeltaIndex(10)) self.setup_indices() def create_index(self): return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - def test_shift(self): - # test shift for TimedeltaIndex - # err8083 - - drange = self.create_index() - result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', - '8 days 01:00:03', '9 days 01:00:03', - '10 days 01:00:03'], freq='D') - self.assert_index_equal(result, expected) - - def test_get_loc(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) - self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) - - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc(idx[1], method='nearest', tolerance='foo') - - for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) - - def test_get_loc_nat(self): - tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) - - self.assertEqual(tidx.get_loc(pd.NaT), 1) - self.assertEqual(tidx.get_loc(None), 1) - self.assertEqual(tidx.get_loc(float('nan')), 1) - self.assertEqual(tidx.get_loc(np.nan), 1) - - def test_get_indexer(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.intp)) - - target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - - res = idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')) - tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) - def test_numeric_compat(self): + # Dummy method to override super's version; this test is now done + # in test_arithmetic.py + pass - idx = self._holder(np.arange(5, dtype='int64')) - didx = self._holder(np.arange(5, dtype='int64') ** 2) - result = idx * 1 - tm.assert_index_equal(result, idx) - - result = 1 * idx - tm.assert_index_equal(result, idx) - - result = idx / 1 - tm.assert_index_equal(result, idx) - - result = idx // 1 - tm.assert_index_equal(result, idx) - - result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, - self._holder(np.arange(5, dtype='int64') * 5)) - - result = idx * np.arange(5, dtype='int64') - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='int64')) - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - tm.assert_index_equal(result, self._holder(np.arange( - 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) - - # invalid - self.assertRaises(TypeError, lambda: idx * idx) - self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + def test_shift(self): + pass # this is handled in test_arithmetic.py def test_pickle_compat_construction(self): pass - def test_ufunc_coercions(self): - # normal ops are also tested in tseries/test_timedeltas.py - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - - for result in [idx * 2, np.multiply(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], - freq='4H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4H') - - for result in [idx / 2, np.divide(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], - freq='H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'H') - - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - for result in [-idx, np.negative(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], - freq='-2H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2H') - - idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], - freq='H', name='x') - for result in [abs(idx), np.absolute(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], - freq=None, name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - def test_fillna_timedelta(self): # GH 11343 idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) - self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) + tm.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) idx.fillna(pd.Timedelta('3 hour')) exp = pd.Index( [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) + tm.assert_index_equal(idx.fillna('x'), exp) def test_difference_freq(self): # GH14323: Difference of TimedeltaIndex should not preserve frequency @@ -194,61 +68,14 @@ def test_difference_freq(self): tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) - def test_take(self): - - tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] - idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') - expected = TimedeltaIndex(tds, freq=None, name='idx') - - taken1 = idx.take([2, 4, 10]) - taken2 = idx[[2, 4, 10]] - - for taken in [taken1, taken2]: - self.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, TimedeltaIndex) - self.assertIsNone(taken.freq) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days'], - name='xxx') - result = idx.take(np.array([1, 0, -1])) - expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], - name='xxx') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'], - name='xxx') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], - name='xxx') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - def test_isin(self): index = tm.makeTimedeltaIndex(4) result = index.isin(index) - self.assertTrue(result.all()) + assert result.all() result = index.isin(list(index)) - self.assertTrue(result.all()) + assert result.all() assert_almost_equal(index.isin([index[2], 5]), np.array([False, False, True, False])) @@ -261,33 +88,25 @@ def test_factorize(self): exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day']) arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, exp_idx) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, exp_idx) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) # freq must be preserved idx3 = timedelta_range('1 day', periods=4, freq='s') exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) arr, idx = idx3.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, idx3) - - def test_join_self(self): + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + @pytest.mark.parametrize('kind', ['outer', 'inner', 'left', 'right']) + def test_join_self(self, kind): index = timedelta_range('1 day', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - tm.assert_index_equal(index, joined) - - def test_slice_keeps_name(self): - - # GH4226 - dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) + joined = index.join(index, how=kind) + tm.assert_index_equal(index, joined) def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe(10, 10, @@ -297,8 +116,8 @@ def test_does_not_convert_mixed_integer(self): cols = df.columns.join(df.index, how='outer') joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) + assert cols.dtype == np.dtype('O') + assert cols.dtype == joined.dtype tm.assert_index_equal(cols, joined) def test_sort_values(self): @@ -306,22 +125,22 @@ def test_sort_values(self): idx = TimedeltaIndex(['4d', '1d', '2d']) ordered = idx.sort_values() - self.assertTrue(ordered.is_monotonic) + assert ordered.is_monotonic ordered = idx.sort_values(ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) + assert ordered[::-1].is_monotonic ordered, dexer = idx.sort_values(return_indexer=True) - self.assertTrue(ordered.is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([1, 2, 0]), - check_dtype=False) + assert ordered.is_monotonic + + tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0]), + check_dtype=False) ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([0, 2, 1]), - check_dtype=False) + assert ordered[::-1].is_monotonic + + tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]), + check_dtype=False) def test_get_duplicates(self): idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', @@ -329,27 +148,28 @@ def test_get_duplicates(self): result = idx.get_duplicates() ex = TimedeltaIndex(['2 day', '3day']) - self.assert_index_equal(result, ex) + tm.assert_index_equal(result, ex) def test_argmin_argmax(self): idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01', '1 day 00:00:02']) - self.assertEqual(idx.argmin(), 1) - self.assertEqual(idx.argmax(), 0) + assert idx.argmin() == 1 + assert idx.argmax() == 0 def test_misc_coverage(self): rng = timedelta_range('1 day', periods=5) result = rng.groupby(rng.days) - tm.assertIsInstance(list(result.values())[0][0], Timedelta) + assert isinstance(list(result.values())[0][0], Timedelta) idx = TimedeltaIndex(['3d', '1d', '2d']) - self.assertFalse(idx.equals(list(idx))) + assert not idx.equals(list(idx)) non_td = Index(list('abc')) - self.assertFalse(idx.equals(list(non_td))) + assert not idx.equals(list(non_td)) def test_map(self): + # test_map_dictlike generally tests rng = timedelta_range('1 day', periods=10) @@ -358,90 +178,6 @@ def test_map(self): exp = Int64Index([f(x) for x in rng]) tm.assert_index_equal(result, exp) - def test_comparisons_nat(self): - - tdidx1 = pd.TimedeltaIndex(['1 day', pd.NaT, '1 day 00:00:01', pd.NaT, - '1 day 00:00:01', '5 day 00:00:03']) - tdidx2 = pd.TimedeltaIndex(['2 day', '2 day', pd.NaT, pd.NaT, - '1 day 00:00:02', '5 days 00:00:03']) - tdarr = np.array([np.timedelta64(2, 'D'), - np.timedelta64(2, 'D'), np.timedelta64('nat'), - np.timedelta64('nat'), - np.timedelta64(1, 'D') + np.timedelta64(2, 's'), - np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) - - if _np_version_under1p8: - # cannot test array because np.datetime('nat') returns today's date - cases = [(tdidx1, tdidx2)] - else: - cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] - - # Check pd.NaT is handles as the same as np.nan - for idx1, idx2 in cases: - - result = idx1 < idx2 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 > idx1 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - self.assert_numpy_array_equal(result, expected) - - def test_comparisons_coverage(self): - rng = timedelta_range('1 days', periods=10) - - result = rng < rng[3] - exp = np.array([True, True, True] + [False] * 7) - self.assert_numpy_array_equal(result, exp) - - # raise TypeError for now - self.assertRaises(TypeError, rng.__lt__, rng[3].value) - - result = rng == list(rng) - exp = rng == rng - self.assert_numpy_array_equal(result, exp) - - def test_total_seconds(self): - # GH 10939 - # test index - rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, - freq='s') - expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, - 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] - tm.assert_almost_equal(rng.total_seconds(), np.array(expt)) - - # test Series - s = Series(rng) - s_expt = Series(expt, index=[0, 1]) - tm.assert_series_equal(s.dt.total_seconds(), s_expt) - - # with nat - s[1] = np.nan - s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 + - 12 + 100123456. / 1e9, np.nan], index=[0, 1]) - tm.assert_series_equal(s.dt.total_seconds(), s_expt) - - # with both nat - s = Series([np.nan, np.nan], dtype='timedelta64[ns]') - tm.assert_series_equal(s.dt.total_seconds(), - Series([np.nan, np.nan], index=[0, 1])) - def test_pass_TimedeltaIndex_to_index(self): rng = timedelta_range('1 days', '10 days') @@ -449,18 +185,18 @@ def test_pass_TimedeltaIndex_to_index(self): expected = Index(rng.to_pytimedelta(), dtype=object) - self.assert_numpy_array_equal(idx.values, expected.values) + tm.assert_numpy_array_equal(idx.values, expected.values) def test_pickle(self): rng = timedelta_range('1 days', periods=10) - rng_p = self.round_trip_pickle(rng) + rng_p = tm.round_trip_pickle(rng) tm.assert_index_equal(rng, rng_p) def test_hash_error(self): index = timedelta_range('1 days', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): + with tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__): hash(index) def test_append_join_nondatetimeindex(self): @@ -468,7 +204,7 @@ def test_append_join_nondatetimeindex(self): idx = Index(['a', 'b', 'c', 'd']) result = rng.append(idx) - tm.assertIsInstance(result[0], Timedelta) + assert isinstance(result[0], Timedelta) # it works rng.join(idx, how='outer') @@ -481,25 +217,25 @@ def test_append_numpy_bug_1681(self): str(c) result = a.append(c) - self.assertTrue((result['B'] == td).all()) + assert (result['B'] == td).all() def test_fields(self): rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, freq='s') - self.assert_numpy_array_equal(rng.days, np.array( - [1, 1], dtype='int64')) - self.assert_numpy_array_equal( + tm.assert_index_equal(rng.days, Index([1, 1], dtype='int64')) + tm.assert_index_equal( rng.seconds, - np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], - dtype='int64')) - self.assert_numpy_array_equal(rng.microseconds, np.array( - [100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) - self.assert_numpy_array_equal(rng.nanoseconds, np.array( - [456, 456], dtype='int64')) - - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], + dtype='int64')) + tm.assert_index_equal( + rng.microseconds, + Index([100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) + tm.assert_index_equal(rng.nanoseconds, + Index([456, 456], dtype='int64')) + + pytest.raises(AttributeError, lambda: rng.hours) + pytest.raises(AttributeError, lambda: rng.minutes) + pytest.raises(AttributeError, lambda: rng.milliseconds) # with nat s = Series(rng) @@ -509,6 +245,10 @@ def test_fields(self): tm.assert_series_equal(s.dt.seconds, Series( [10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1])) + # preserve name (GH15589) + rng.name = 'name' + assert rng.days.name == 'name' + def test_freq_conversion(self): # doc example @@ -556,37 +296,10 @@ def test_freq_conversion(self): assert_index_equal(result, expected) -class TestSlicing(tm.TestCase): - - def test_timedelta(self): - # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') - shifted = index + timedelta(1) - back = shifted + timedelta(-1) - self.assertTrue(tm.equalContents(index, back)) - self.assertEqual(shifted.freq, index.freq) - self.assertEqual(shifted.freq, back.freq) - - result = index - timedelta(1) - expected = index + timedelta(-1) - tm.assert_index_equal(result, expected) - - # GH4134, buggy with timedeltas - rng = date_range('2013', '2014') - s = Series(rng) - result1 = rng - pd.offsets.Hour(1) - result2 = DatetimeIndex(s - np.timedelta64(100000000)) - result3 = rng - np.timedelta64(100000000) - result4 = DatetimeIndex(s - pd.offsets.Hour(1)) - tm.assert_index_equal(result1, result4) - tm.assert_index_equal(result2, result3) - - -class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True +class TestTimeSeries(object): def test_series_box_timedelta(self): rng = timedelta_range('1 day 1 s', periods=5, freq='h') s = Series(rng) - tm.assertIsInstance(s[1], Timedelta) - tm.assertIsInstance(s.iat[2], Timedelta) + assert isinstance(s[1], Timedelta) + assert isinstance(s.iat[2], Timedelta) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 8bd56b5885bba..784ef845fea10 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,5 +1,4 @@ import numpy as np - import pandas as pd import pandas.util.testing as tm from pandas.tseries.offsets import Day, Second @@ -7,8 +6,7 @@ from pandas.util.testing import assert_frame_equal -class TestTimedeltas(tm.TestCase): - _multiprocess_can_split_ = True +class TestTimedeltas(object): def test_timedelta_range(self): @@ -37,10 +35,10 @@ def test_timedelta_range(self): arr = np.arange(10).reshape(2, 5) df = pd.DataFrame(np.arange(10).reshape(2, 5)) for arg in (arr, df): - with tm.assertRaisesRegexp(TypeError, "1-d array"): + with tm.assert_raises_regex(TypeError, "1-d array"): to_timedelta(arg) for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): + with tm.assert_raises_regex(TypeError, "1-d array"): to_timedelta(arg, errors=errors) # issue10583 @@ -49,3 +47,23 @@ def test_timedelta_range(self): expected = df.loc[pd.Timedelta('0s'):, :] result = df.loc['0s':, :] assert_frame_equal(expected, result) + + def test_errors(self): + # not enough params + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(start='0 days') + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(end='5 days') + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(periods=2) + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range() + + # too many params + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(start='0 days', end='5 days', periods=10) diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 2442051547312..daa9739132d9e 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -1,15 +1,16 @@ +import pytest + from datetime import time, timedelta import numpy as np import pandas as pd import pandas.util.testing as tm from pandas.util.testing import assert_series_equal -from pandas import (Series, Timedelta, to_timedelta, tslib, isnull, - TimedeltaIndex) +from pandas import Series, to_timedelta, isna, TimedeltaIndex +from pandas._libs.tslib import iNaT -class TestTimedeltas(tm.TestCase): - _multiprocess_can_split_ = True +class TestTimedeltas(object): def test_to_timedelta(self): def conv(v): @@ -17,19 +18,18 @@ def conv(v): d1 = np.timedelta64(1, 'D') - self.assertEqual(to_timedelta('1 days 06:05:01.00003', box=False), - conv(d1 + np.timedelta64(6 * 3600 + - 5 * 60 + 1, 's') + - np.timedelta64(30, 'us'))) - self.assertEqual(to_timedelta('15.5us', box=False), - conv(np.timedelta64(15500, 'ns'))) + assert (to_timedelta('1 days 06:05:01.00003', box=False) == + conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') + + np.timedelta64(30, 'us'))) + assert (to_timedelta('15.5us', box=False) == + conv(np.timedelta64(15500, 'ns'))) # empty string result = to_timedelta('', box=False) - self.assertEqual(result.astype('int64'), tslib.iNaT) + assert result.astype('int64') == iNaT result = to_timedelta(['', '']) - self.assertTrue(isnull(result).all()) + assert isna(result).all() # pass thru result = to_timedelta(np.array([np.timedelta64(1, 's')])) @@ -39,7 +39,7 @@ def conv(v): # ints result = np.timedelta64(0, 'ns') expected = to_timedelta(0, box=False) - self.assertEqual(result, expected) + assert result == expected # Series expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) @@ -56,12 +56,12 @@ def conv(v): v = timedelta(seconds=1) result = to_timedelta(v, box=False) expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) + assert result == expected v = np.timedelta64(timedelta(seconds=1)) result = to_timedelta(v, box=False) expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) + assert result == expected # arrays of various dtypes arr = np.array([1] * 5, dtype='int64') @@ -110,19 +110,18 @@ def test_to_timedelta_invalid(self): # bad value for errors parameter msg = "errors must be one of" - tm.assertRaisesRegexp(ValueError, msg, to_timedelta, - ['foo'], errors='never') + tm.assert_raises_regex(ValueError, msg, to_timedelta, + ['foo'], errors='never') # these will error - self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) - self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) + pytest.raises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) + pytest.raises(ValueError, lambda: to_timedelta(1, unit='foo')) # time not supported ATM - self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) - self.assertTrue(to_timedelta( - time(second=1), errors='coerce') is pd.NaT) + pytest.raises(ValueError, lambda: to_timedelta(time(second=1))) + assert to_timedelta(time(second=1), errors='coerce') is pd.NaT - self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) + pytest.raises(ValueError, lambda: to_timedelta(['foo', 'bar'])) tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), to_timedelta(['foo', 'bar'], errors='coerce')) @@ -132,8 +131,7 @@ def test_to_timedelta_invalid(self): # gh-13613: these should not error because errors='ignore' invalid_data = 'apple' - self.assertEqual(invalid_data, to_timedelta( - invalid_data, errors='ignore')) + assert invalid_data == to_timedelta(invalid_data, errors='ignore') invalid_data = ['apple', '1 days'] tm.assert_numpy_array_equal( @@ -170,32 +168,7 @@ def test_to_timedelta_on_missing_values(self): assert_series_equal(actual, expected) actual = pd.to_timedelta(np.nan) - self.assertEqual(actual.value, timedelta_NaT.astype('int64')) + assert actual.value == timedelta_NaT.astype('int64') actual = pd.to_timedelta(pd.NaT) - self.assertEqual(actual.value, timedelta_NaT.astype('int64')) - - def test_to_timedelta_on_nanoseconds(self): - # GH 9273 - result = Timedelta(nanoseconds=100) - expected = Timedelta('100ns') - self.assertEqual(result, expected) - - result = Timedelta(days=1, hours=1, minutes=1, weeks=1, seconds=1, - milliseconds=1, microseconds=1, nanoseconds=1) - expected = Timedelta(694861001001001) - self.assertEqual(result, expected) - - result = Timedelta(microseconds=1) + Timedelta(nanoseconds=1) - expected = Timedelta('1us1ns') - self.assertEqual(result, expected) - - result = Timedelta(microseconds=1) - Timedelta(nanoseconds=1) - expected = Timedelta('999ns') - self.assertEqual(result, expected) - - result = Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2) - expected = Timedelta('990ns') - self.assertEqual(result, expected) - - self.assertRaises(TypeError, lambda: Timedelta(nanoseconds='abc')) + assert actual.value == timedelta_NaT.astype('int64') diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 73167393cf35d..ded16224aedf2 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -1,5 +1,279 @@ """ common utilities """ +import itertools +from warnings import catch_warnings +import numpy as np + +from pandas.compat import lrange +from pandas.core.dtypes.common import is_scalar +from pandas import Series, DataFrame, Panel, date_range, UInt64Index +from pandas.util import testing as tm +from pandas.io.formats.printing import pprint_thing + +_verbose = False + def _mklbl(prefix, n): return ["%s%s" % (prefix, i) for i in range(n)] + + +def _axify(obj, key, axis): + # create a tuple accessor + axes = [slice(None)] * obj.ndim + axes[axis] = key + return tuple(axes) + + +class Base(object): + """ indexing comprehensive base class """ + + _objs = set(['series', 'frame', 'panel']) + _typs = set(['ints', 'uints', 'labels', 'mixed', + 'ts', 'floats', 'empty', 'ts_rev']) + + def setup_method(self, method): + + self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2)) + self.frame_ints = DataFrame(np.random.randn(4, 4), + index=lrange(0, 8, 2), + columns=lrange(0, 12, 3)) + with catch_warnings(record=True): + self.panel_ints = Panel(np.random.rand(4, 4, 4), + items=lrange(0, 8, 2), + major_axis=lrange(0, 12, 3), + minor_axis=lrange(0, 16, 4)) + + self.series_uints = Series(np.random.rand(4), + index=UInt64Index(lrange(0, 8, 2))) + self.frame_uints = DataFrame(np.random.randn(4, 4), + index=UInt64Index(lrange(0, 8, 2)), + columns=UInt64Index(lrange(0, 12, 3))) + with catch_warnings(record=True): + self.panel_uints = Panel(np.random.rand(4, 4, 4), + items=UInt64Index(lrange(0, 8, 2)), + major_axis=UInt64Index(lrange(0, 12, 3)), + minor_axis=UInt64Index(lrange(0, 16, 4))) + + self.series_labels = Series(np.random.randn(4), index=list('abcd')) + self.frame_labels = DataFrame(np.random.randn(4, 4), + index=list('abcd'), columns=list('ABCD')) + with catch_warnings(record=True): + self.panel_labels = Panel(np.random.randn(4, 4, 4), + items=list('abcd'), + major_axis=list('ABCD'), + minor_axis=list('ZYXW')) + + self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) + self.frame_mixed = DataFrame(np.random.randn(4, 4), + index=[2, 4, 'null', 8]) + with catch_warnings(record=True): + self.panel_mixed = Panel(np.random.randn(4, 4, 4), + items=[2, 4, 'null', 8]) + + self.series_ts = Series(np.random.randn(4), + index=date_range('20130101', periods=4)) + self.frame_ts = DataFrame(np.random.randn(4, 4), + index=date_range('20130101', periods=4)) + with catch_warnings(record=True): + self.panel_ts = Panel(np.random.randn(4, 4, 4), + items=date_range('20130101', periods=4)) + + dates_rev = (date_range('20130101', periods=4) + .sort_values(ascending=False)) + self.series_ts_rev = Series(np.random.randn(4), + index=dates_rev) + self.frame_ts_rev = DataFrame(np.random.randn(4, 4), + index=dates_rev) + with catch_warnings(record=True): + self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), + items=dates_rev) + + self.frame_empty = DataFrame({}) + self.series_empty = Series({}) + with catch_warnings(record=True): + self.panel_empty = Panel({}) + + # form agglomerates + for o in self._objs: + + d = dict() + for t in self._typs: + d[t] = getattr(self, '%s_%s' % (o, t), None) + + setattr(self, o, d) + + def generate_indices(self, f, values=False): + """ generate the indicies + if values is True , use the axis values + is False, use the range + """ + + axes = f.axes + if values: + axes = [lrange(len(a)) for a in axes] + + return itertools.product(*axes) + + def get_result(self, obj, method, key, axis): + """ return the result for this obj with this key and this axis """ + + if isinstance(key, dict): + key = key[axis] + + # use an artificial conversion to map the key as integers to the labels + # so ix can work for comparisons + if method == 'indexer': + method = 'ix' + key = obj._get_axis(axis)[key] + + # in case we actually want 0 index slicing + with catch_warnings(record=True): + try: + xp = getattr(obj, method).__getitem__(_axify(obj, key, axis)) + except: + xp = getattr(obj, method).__getitem__(key) + + return xp + + def get_value(self, f, i, values=False): + """ return the value for the location i """ + + # check against values + if values: + return f.values[i] + + # this is equiv of f[col][row]..... + # v = f + # for a in reversed(i): + # v = v.__getitem__(a) + # return v + with catch_warnings(record=True): + return f.ix[i] + + def check_values(self, f, func, values=False): + + if f is None: + return + axes = f.axes + indicies = itertools.product(*axes) + + for i in indicies: + result = getattr(f, func)[i] + + # check against values + if values: + expected = f.values[i] + else: + expected = f + for a in reversed(i): + expected = expected.__getitem__(a) + + tm.assert_almost_equal(result, expected) + + def check_result(self, name, method1, key1, method2, key2, typs=None, + objs=None, axes=None, fails=None): + def _eq(t, o, a, obj, k1, k2): + """ compare equal for these 2 keys """ + + if a is not None and a > obj.ndim - 1: + return + + def _print(result, error=None): + if error is not None: + error = str(error) + v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s," + "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % + (name, result, t, o, method1, method2, a, error or '')) + if _verbose: + pprint_thing(v) + + try: + rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a)) + + try: + xp = self.get_result(obj, method2, k2, a) + except: + result = 'no comp' + _print(result) + return + + detail = None + + try: + if is_scalar(rs) and is_scalar(xp): + assert rs == xp + elif xp.ndim == 1: + tm.assert_series_equal(rs, xp) + elif xp.ndim == 2: + tm.assert_frame_equal(rs, xp) + elif xp.ndim == 3: + tm.assert_panel_equal(rs, xp) + result = 'ok' + except AssertionError as e: + detail = str(e) + result = 'fail' + + # reverse the checks + if fails is True: + if result == 'fail': + result = 'ok (fail)' + + _print(result) + if not result.startswith('ok'): + raise AssertionError(detail) + + except AssertionError: + raise + except Exception as detail: + + # if we are in fails, the ok, otherwise raise it + if fails is not None: + if isinstance(detail, fails): + result = 'ok (%s)' % type(detail).__name__ + _print(result) + return + + result = type(detail).__name__ + raise AssertionError(_print(result, error=detail)) + + if typs is None: + typs = self._typs + + if objs is None: + objs = self._objs + + if axes is not None: + if not isinstance(axes, (tuple, list)): + axes = [axes] + else: + axes = list(axes) + else: + axes = [0, 1, 2] + + # check + for o in objs: + if o not in self._objs: + continue + + d = getattr(self, o) + for a in axes: + for t in typs: + if t not in self._typs: + continue + + obj = d[t] + if obj is None: + continue + + def _call(obj=obj): + obj = obj.copy() + + k2 = key2 + _eq(t, o, a, obj, key1, k2) + + # Panel deprecations + if isinstance(obj, Panel): + with catch_warnings(record=True): + _call() + else: + _call() diff --git a/pandas/tests/indexing/interval/__init__.py b/pandas/tests/indexing/interval/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py new file mode 100644 index 0000000000000..233fbd2c8d7be --- /dev/null +++ b/pandas/tests/indexing/interval/test_interval.py @@ -0,0 +1,270 @@ +import pytest +import numpy as np +import pandas as pd + +from pandas import Series, DataFrame, IntervalIndex, Interval +from pandas.compat import product +import pandas.util.testing as tm + + +class TestIntervalIndex(object): + + def setup_method(self, method): + self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_loc_with_scalar(self): + + s = self.s + + expected = s.iloc[:3] + tm.assert_series_equal(expected, s.loc[:3]) + tm.assert_series_equal(expected, s.loc[:2.5]) + tm.assert_series_equal(expected, s.loc[0.1:2.5]) + tm.assert_series_equal(expected, s.loc[-1:3]) + + expected = s.iloc[1:4] + tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, s.loc[[2, 3, 4]]) + tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]]) + + expected = s.iloc[2:5] + tm.assert_series_equal(expected, s.loc[s >= 2]) + + # TODO: check this behavior is consistent with test_interval_new.py + def test_getitem_with_scalar(self): + + s = self.s + + expected = s.iloc[:3] + tm.assert_series_equal(expected, s[:3]) + tm.assert_series_equal(expected, s[:2.5]) + tm.assert_series_equal(expected, s[0.1:2.5]) + tm.assert_series_equal(expected, s[-1:3]) + + expected = s.iloc[1:4] + tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, s[[2, 3, 4]]) + tm.assert_series_equal(expected, s[[1.5, 3, 4]]) + + expected = s.iloc[2:5] + tm.assert_series_equal(expected, s[s >= 2]) + + # TODO: check this behavior is consistent with test_interval_new.py + @pytest.mark.parametrize('direction, closed', + product(('increasing', 'decreasing'), + ('left', 'right', 'neither', 'both'))) + def test_nonoverlapping_monotonic(self, direction, closed): + tpls = [(0, 1), (2, 3), (4, 5)] + if direction == 'decreasing': + tpls = tpls[::-1] + + idx = IntervalIndex.from_tuples(tpls, closed=closed) + s = Series(list('abc'), idx) + + for key, expected in zip(idx.left, s): + if idx.closed_left: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError): + s[key] + with pytest.raises(KeyError): + s.loc[key] + + for key, expected in zip(idx.right, s): + if idx.closed_right: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError): + s[key] + with pytest.raises(KeyError): + s.loc[key] + + for key, expected in zip(idx.mid, s): + assert s[key] == expected + assert s.loc[key] == expected + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_with_interval(self): + + s = self.s + expected = 0 + + result = s.loc[Interval(0, 1)] + assert result == expected + + result = s[Interval(0, 1)] + assert result == expected + + expected = s.iloc[3:5] + result = s.loc[Interval(3, 6)] + tm.assert_series_equal(expected, result) + + expected = s.iloc[3:5] + result = s.loc[[Interval(3, 6)]] + tm.assert_series_equal(expected, result) + + expected = s.iloc[3:5] + result = s.loc[[Interval(3, 5)]] + tm.assert_series_equal(expected, result) + + # missing + with pytest.raises(KeyError): + s.loc[Interval(-2, 0)] + + with pytest.raises(KeyError): + s[Interval(-2, 0)] + + with pytest.raises(KeyError): + s.loc[Interval(5, 6)] + + with pytest.raises(KeyError): + s[Interval(5, 6)] + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_with_slices(self): + + s = self.s + + # slice of interval + with pytest.raises(NotImplementedError): + s.loc[Interval(3, 6):] + + with pytest.raises(NotImplementedError): + s[Interval(3, 6):] + + expected = s.iloc[3:5] + result = s[[Interval(3, 6)]] + tm.assert_series_equal(expected, result) + + # slice of scalar with step != 1 + with pytest.raises(ValueError): + s[0:4:2] + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_with_overlaps(self): + + s = self.s + expected = s.iloc[[3, 4, 3, 4]] + result = s.loc[[Interval(3, 6), Interval(3, 6)]] + tm.assert_series_equal(expected, result) + + idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + result = s[4] + expected = s + tm.assert_series_equal(expected, result) + + result = s[[4]] + expected = s + tm.assert_series_equal(expected, result) + + result = s.loc[[4]] + expected = s + tm.assert_series_equal(expected, result) + + result = s[Interval(3, 5)] + expected = s + tm.assert_series_equal(expected, result) + + result = s.loc[Interval(3, 5)] + expected = s + tm.assert_series_equal(expected, result) + + # doesn't intersect unique set of intervals + with pytest.raises(KeyError): + s[[Interval(3, 5)]] + + with pytest.raises(KeyError): + s.loc[[Interval(3, 5)]] + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_non_unique(self): + + idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) + + s = Series(range(len(idx)), index=idx) + + result = s.loc[Interval(1, 3)] + assert result == 0 + + result = s.loc[[Interval(1, 3)]] + expected = s.iloc[0:1] + tm.assert_series_equal(expected, result) + + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_non_unique_moar(self): + + idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + result = s.loc[Interval(1, 3)] + expected = s.iloc[[0, 1]] + tm.assert_series_equal(expected, result) + + # non-unique index and slices not allowed + with pytest.raises(ValueError): + s.loc[Interval(1, 3):] + + with pytest.raises(ValueError): + s[Interval(1, 3):] + + # non-unique + with pytest.raises(ValueError): + s[[Interval(1, 3)]] + + # TODO: check this behavior is consistent with test_interval_new.py + def test_non_matching(self): + s = self.s + + # this is a departure from our current + # indexin scheme, but simpler + with pytest.raises(KeyError): + s.loc[[-1, 3, 4, 5]] + + with pytest.raises(KeyError): + s.loc[[-1, 3]] + + def test_large_series(self): + s = Series(np.arange(1000000), + index=IntervalIndex.from_breaks(np.arange(1000001))) + + result1 = s.loc[:80000] + result2 = s.loc[0:80000] + result3 = s.loc[0:80000:1] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + def test_loc_getitem_frame(self): + + df = DataFrame({'A': range(10)}) + s = pd.cut(df.A, 5) + df['B'] = s + df = df.set_index('B') + + result = df.loc[4] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError): + df.loc[10] + + # single list-like + result = df.loc[[4]] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + # non-unique + result = df.loc[[4, 5]] + expected = df.take([4, 5, 4, 5]) + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError): + df.loc[[10]] + + # partial missing + with pytest.raises(KeyError): + df.loc[[10, 4]] diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py new file mode 100644 index 0000000000000..3eb5f38ba0c80 --- /dev/null +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -0,0 +1,247 @@ +import pytest +import numpy as np + +from pandas import Series, IntervalIndex, Interval +import pandas.util.testing as tm + + +pytestmark = pytest.mark.skip(reason="new indexing tests for issue 16316") + + +class TestIntervalIndex(object): + + def setup_method(self, method): + self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + def test_loc_with_interval(self): + + # loc with single label / list of labels: + # - Intervals: only exact matches + # - scalars: those that contain it + + s = self.s + + expected = 0 + result = s.loc[Interval(0, 1)] + assert result == expected + result = s[Interval(0, 1)] + assert result == expected + + expected = s.iloc[3:5] + result = s.loc[[Interval(3, 4), Interval(4, 5)]] + tm.assert_series_equal(expected, result) + result = s[[Interval(3, 4), Interval(4, 5)]] + tm.assert_series_equal(expected, result) + + # missing or not exact + with pytest.raises(KeyError): + s.loc[Interval(3, 5, closed='left')] + + with pytest.raises(KeyError): + s[Interval(3, 5, closed='left')] + + with pytest.raises(KeyError): + s[Interval(3, 5)] + + with pytest.raises(KeyError): + s.loc[Interval(3, 5)] + + with pytest.raises(KeyError): + s[Interval(3, 5)] + + with pytest.raises(KeyError): + s.loc[Interval(-2, 0)] + + with pytest.raises(KeyError): + s[Interval(-2, 0)] + + with pytest.raises(KeyError): + s.loc[Interval(5, 6)] + + with pytest.raises(KeyError): + s[Interval(5, 6)] + + def test_loc_with_scalar(self): + + # loc with single label / list of labels: + # - Intervals: only exact matches + # - scalars: those that contain it + + s = self.s + + assert s.loc[1] == 0 + assert s.loc[1.5] == 1 + assert s.loc[2] == 1 + + # TODO with __getitem__ same rules as loc, or positional ? + # assert s[1] == 0 + # assert s[1.5] == 1 + # assert s[2] == 1 + + expected = s.iloc[1:4] + tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, s.loc[[2, 3, 4]]) + tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]]) + + expected = s.iloc[[1, 1, 2, 1]] + tm.assert_series_equal(expected, s.loc[[1.5, 2, 2.5, 1.5]]) + + expected = s.iloc[2:5] + tm.assert_series_equal(expected, s.loc[s >= 2]) + + def test_loc_with_slices(self): + + # loc with slices: + # - Interval objects: only works with exact matches + # - scalars: only works for non-overlapping, monotonic intervals, + # and start/stop select location based on the interval that + # contains them: + # (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop)) + + s = self.s + + # slice of interval + + expected = s.iloc[:3] + result = s.loc[Interval(0, 1):Interval(2, 3)] + tm.assert_series_equal(expected, result) + result = s[Interval(0, 1):Interval(2, 3)] + tm.assert_series_equal(expected, result) + + expected = s.iloc[4:] + result = s.loc[Interval(3, 4):] + tm.assert_series_equal(expected, result) + result = s[Interval(3, 4):] + tm.assert_series_equal(expected, result) + + with pytest.raises(KeyError): + s.loc[Interval(3, 6):] + + with pytest.raises(KeyError): + s[Interval(3, 6):] + + with pytest.raises(KeyError): + s.loc[Interval(3, 4, closed='left'):] + + with pytest.raises(KeyError): + s[Interval(3, 4, closed='left'):] + + # TODO with non-existing intervals ? + # s.loc[Interval(-1, 0):Interval(2, 3)] + + # slice of scalar + + expected = s.iloc[:3] + tm.assert_series_equal(expected, s.loc[:3]) + tm.assert_series_equal(expected, s.loc[:2.5]) + tm.assert_series_equal(expected, s.loc[0.1:2.5]) + + # TODO should this work? (-1 is not contained in any of the Intervals) + # tm.assert_series_equal(expected, s.loc[-1:3]) + + # TODO with __getitem__ same rules as loc, or positional ? + # tm.assert_series_equal(expected, s[:3]) + # tm.assert_series_equal(expected, s[:2.5]) + # tm.assert_series_equal(expected, s[0.1:2.5]) + + # slice of scalar with step != 1 + with pytest.raises(NotImplementedError): + s[0:4:2] + + def test_loc_with_overlap(self): + + idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + # scalar + expected = s + result = s.loc[4] + tm.assert_series_equal(expected, result) + + result = s[4] + tm.assert_series_equal(expected, result) + + result = s.loc[[4]] + tm.assert_series_equal(expected, result) + + result = s[[4]] + tm.assert_series_equal(expected, result) + + # interval + expected = 0 + result = s.loc[Interval(1, 5)] + tm.assert_series_equal(expected, result) + + result = s[Interval(1, 5)] + tm.assert_series_equal(expected, result) + + expected = s + result = s.loc[[Interval(1, 5), Interval(3, 7)]] + tm.assert_series_equal(expected, result) + + result = s[[Interval(1, 5), Interval(3, 7)]] + tm.assert_series_equal(expected, result) + + with pytest.raises(KeyError): + s.loc[Interval(3, 5)] + + with pytest.raises(KeyError): + s.loc[[Interval(3, 5)]] + + with pytest.raises(KeyError): + s[Interval(3, 5)] + + with pytest.raises(KeyError): + s[[Interval(3, 5)]] + + # slices with interval (only exact matches) + expected = s + result = s.loc[Interval(1, 5):Interval(3, 7)] + tm.assert_series_equal(expected, result) + + result = s[Interval(1, 5):Interval(3, 7)] + tm.assert_series_equal(expected, result) + + with pytest.raises(KeyError): + s.loc[Interval(1, 6):Interval(3, 8)] + + with pytest.raises(KeyError): + s[Interval(1, 6):Interval(3, 8)] + + # slices with scalar raise for overlapping intervals + # TODO KeyError is the appropriate error? + with pytest.raises(KeyError): + s.loc[1:4] + + def test_non_unique(self): + + idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + result = s.loc[Interval(1, 3)] + assert result == 0 + + result = s.loc[[Interval(1, 3)]] + expected = s.iloc[0:1] + tm.assert_series_equal(expected, result) + + def test_non_unique_moar(self): + + idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + expected = s.iloc[[0, 1]] + result = s.loc[Interval(1, 3)] + tm.assert_series_equal(expected, result) + + expected = s + result = s.loc[Interval(1, 3):] + tm.assert_series_equal(expected, result) + + expected = s + result = s[Interval(1, 3):] + tm.assert_series_equal(expected, result) + + expected = s.iloc[[0, 1]] + result = s[[Interval(1, 3)]] + tm.assert_series_equal(expected, result) diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index 1d70205076b86..95b406517be62 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm -class TestIndexingCallable(tm.TestCase): +class TestIndexingCallable(object): def test_frame_loc_ix_callable(self): # GH 11485 @@ -59,10 +59,10 @@ def test_frame_loc_ix_callable(self): # scalar res = df.loc[lambda x: 1, lambda x: 'A'] - self.assertEqual(res, df.loc[1, 'A']) + assert res == df.loc[1, 'A'] res = df.loc[lambda x: 1, lambda x: 'A'] - self.assertEqual(res, df.loc[1, 'A']) + assert res == df.loc[1, 'A'] def test_frame_loc_ix_callable_mixture(self): # GH 11485 diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index b8a24cb2dcb03..634ad0d8160ed 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -1,39 +1,43 @@ # -*- coding: utf-8 -*- +import pytest + import pandas as pd +import pandas.compat as compat import numpy as np -from pandas import Series, DataFrame +from pandas import (Series, DataFrame, Timestamp, Categorical, + CategoricalIndex, Interval, Index) from pandas.util.testing import assert_series_equal, assert_frame_equal from pandas.util import testing as tm +from pandas.core.dtypes.common import is_categorical_dtype +from pandas.api.types import CategoricalDtype as CDT +from pandas.core.dtypes.dtypes import CategoricalDtype -class TestCategoricalIndex(tm.TestCase): +class TestCategoricalIndex(object): - def setUp(self): + def setup_method(self, method): self.df = DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype( - 'category', categories=list( - 'cab'))}).set_index('B') + CDT(list('cab')))}).set_index('B') self.df2 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype( - 'category', categories=list( - 'cabe'))}).set_index('B') + CDT(list('cabe')))}).set_index('B') self.df3 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': (Series([1, 1, 2, 1, 3, 2]) - .astype('category', categories=[3, 2, 1], - ordered=True))}).set_index('B') + .astype(CDT([3, 2, 1], ordered=True))) + }).set_index('B') self.df4 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': (Series([1, 1, 2, 1, 3, 2]) - .astype('category', categories=[3, 2, 1], - ordered=False))}).set_index('B') + .astype(CDT([3, 2, 1], ordered=False))) + }).set_index('B') def test_loc_scalar(self): result = self.df.loc['a'] expected = (DataFrame({'A': [0, 1, 5], 'B': (Series(list('aaa')) - .astype('category', - categories=list('cab')))}) + .astype(CDT(list('cab'))))}) .set_index('B')) assert_frame_equal(result, expected) @@ -41,28 +45,261 @@ def test_loc_scalar(self): df.loc['a'] = 20 expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20], 'B': (Series(list('aabbca')) - .astype('category', - categories=list('cab')))}) + .astype(CDT(list('cab'))))}) .set_index('B')) assert_frame_equal(df, expected) # value not in the categories - self.assertRaises(KeyError, lambda: df.loc['d']) + pytest.raises(KeyError, lambda: df.loc['d']) def f(): df.loc['d'] = 10 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): df.loc['d', 'A'] = 10 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): df.loc['d', 'C'] = 10 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) + + def test_getitem_scalar(self): + + cats = Categorical([Timestamp('12-31-1999'), + Timestamp('12-31-2000')]) + + s = Series([1, 2], index=cats) + + expected = s.iloc[0] + result = s[cats[0]] + assert result == expected + + def test_slicing_directly(self): + cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) + sliced = cat[3] + assert sliced == "d" + sliced = cat[3:5] + expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd']) + tm.assert_numpy_array_equal(sliced._codes, expected._codes) + tm.assert_index_equal(sliced.categories, expected.categories) + + def test_slicing(self): + cat = Series(Categorical([1, 2, 3, 4])) + reversed = cat[::-1] + exp = np.array([4, 3, 2, 1], dtype=np.int64) + tm.assert_numpy_array_equal(reversed.__array__(), exp) + + df = DataFrame({'value': (np.arange(100) + 1).astype('int64')}) + df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) + + expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10) + result = df.iloc[10] + tm.assert_series_equal(result, expected) + + expected = DataFrame({'value': np.arange(11, 21).astype('int64')}, + index=np.arange(10, 20).astype('int64')) + expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) + result = df.iloc[10:20] + tm.assert_frame_equal(result, expected) + + expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8) + result = df.loc[8] + tm.assert_series_equal(result, expected) + + def test_slicing_and_getting_ops(self): + + # systematically test the slicing operations: + # for all slicing ops: + # - returning a dataframe + # - returning a column + # - returning a row + # - returning a single value + + cats = Categorical( + ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 2, 3, 4, 5, 6, 7] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + # the expected values + cats2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + idx2 = Index(["j", "k"]) + values2 = [3, 4] + + # 2:4,: | "j":"k",: + exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2) + + # :,"cats" | :,0 + exp_col = Series(cats, index=idx, name='cats') + + # "j",: | 2,: + exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", + name="j") + + # "j","cats | 2,0 + exp_val = "b" + + # iloc + # frame + res_df = df.iloc[2:4, :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + # row + res_row = df.iloc[2, :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], compat.string_types) + + # col + res_col = df.iloc[:, 0] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col) + + # single value + res_val = df.iloc[2, 0] + assert res_val == exp_val + + # loc + # frame + res_df = df.loc["j":"k", :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + # row + res_row = df.loc["j", :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], compat.string_types) + + # col + res_col = df.loc[:, "cats"] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col) + + # single value + res_val = df.loc["j", "cats"] + assert res_val == exp_val + + # ix + # frame + # res_df = df.loc["j":"k",[0,1]] # doesn't work? + res_df = df.loc["j":"k", :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + # row + res_row = df.loc["j", :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], compat.string_types) + + # col + res_col = df.loc[:, "cats"] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col) + + # single value + res_val = df.loc["j", df.columns[0]] + assert res_val == exp_val + + # iat + res_val = df.iat[2, 0] + assert res_val == exp_val + + # at + res_val = df.at["j", "cats"] + assert res_val == exp_val + + # fancy indexing + exp_fancy = df.iloc[[2]] + + res_fancy = df[df["cats"] == "b"] + tm.assert_frame_equal(res_fancy, exp_fancy) + res_fancy = df[df["values"] == 3] + tm.assert_frame_equal(res_fancy, exp_fancy) + + # get_value + res_val = df.at["j", "cats"] + assert res_val == exp_val + + # i : int, slice, or sequence of integers + res_row = df.iloc[2] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], compat.string_types) + + res_df = df.iloc[slice(2, 4)] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + res_df = df.iloc[[2, 3]] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + res_col = df.iloc[:, 0] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col) + + res_df = df.iloc[:, slice(0, 2)] + tm.assert_frame_equal(res_df, df) + assert is_categorical_dtype(res_df["cats"]) + + res_df = df.iloc[:, [0, 1]] + tm.assert_frame_equal(res_df, df) + assert is_categorical_dtype(res_df["cats"]) + + def test_slicing_doc_examples(self): + + # GH 7918 + cats = Categorical(["a", "b", "b", "b", "c", "c", "c"], + categories=["a", "b", "c"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n", ]) + values = [1, 2, 2, 2, 3, 4, 5] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + result = df.iloc[2:4, :] + expected = DataFrame( + {"cats": Categorical(['b', 'b'], categories=['a', 'b', 'c']), + "values": [2, 2]}, index=['j', 'k']) + tm.assert_frame_equal(result, expected) + + result = df.iloc[2:4, :].dtypes + expected = Series(['category', 'int64'], ['cats', 'values']) + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j", "cats"] + expected = Series(Categorical(['a', 'b', 'b'], + categories=['a', 'b', 'c']), + index=['h', 'i', 'j'], name='cats') + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j", df.columns[0:1]] + expected = DataFrame({'cats': Categorical(['a', 'b', 'b'], + categories=['a', 'b', 'c'])}, + index=['h', 'i', 'j']) + tm.assert_frame_equal(result, expected) + + def test_getitem_category_type(self): + # GH 14580 + # test iloc() on Series with Categorical data + + s = Series([1, 2, 3]).astype('category') + + # get slice + result = s.iloc[0:2] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get list of indexes + result = s.iloc[[0, 1]] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get boolean array + result = s.iloc[[True, False, False]] + expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) def test_loc_listlike(self): @@ -72,70 +309,71 @@ def test_loc_listlike(self): assert_frame_equal(result, expected, check_index_type=True) result = self.df2.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( + exp_index = CategoricalIndex( list('aaabbe'), categories=list('cabe'), name='B') expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) assert_frame_equal(result, expected, check_index_type=True) # element in the categories but not in the values - self.assertRaises(KeyError, lambda: self.df2.loc['e']) + pytest.raises(KeyError, lambda: self.df2.loc['e']) # assign is ok df = self.df2.copy() df.loc['e'] = 20 result = df.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( + exp_index = CategoricalIndex( list('aaabbe'), categories=list('cabe'), name='B') expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index) assert_frame_equal(result, expected) df = self.df2.copy() result = df.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( + exp_index = CategoricalIndex( list('aaabbe'), categories=list('cabe'), name='B') expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) assert_frame_equal(result, expected, check_index_type=True) # not all labels in the categories - self.assertRaises(KeyError, lambda: self.df2.loc[['a', 'd']]) + with pytest.raises(KeyError): + self.df2.loc[['a', 'd']] def test_loc_listlike_dtypes(self): # GH 11586 # unique categories and codes - index = pd.CategoricalIndex(['a', 'b', 'c']) + index = CategoricalIndex(['a', 'b', 'c']) df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index) # unique slice res = df.loc[['a', 'b']] - exp_index = pd.CategoricalIndex(['a', 'b'], - categories=index.categories) + exp_index = CategoricalIndex(['a', 'b'], + categories=index.categories) exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice res = df.loc[['a', 'a', 'b']] - exp_index = pd.CategoricalIndex(['a', 'a', 'b'], - categories=index.categories) + exp_index = CategoricalIndex(['a', 'a', 'b'], + categories=index.categories) exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( KeyError, 'a list-indexer must only include values that are ' 'in the categories'): df.loc[['a', 'x']] # duplicated categories and codes - index = pd.CategoricalIndex(['a', 'b', 'a']) + index = CategoricalIndex(['a', 'b', 'a']) df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index) # unique slice res = df.loc[['a', 'b']] exp = DataFrame({'A': [1, 3, 2], 'B': [4, 6, 5]}, - index=pd.CategoricalIndex(['a', 'a', 'b'])) + index=CategoricalIndex(['a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice @@ -143,93 +381,148 @@ def test_loc_listlike_dtypes(self): exp = DataFrame( {'A': [1, 3, 1, 3, 2], 'B': [4, 6, 4, 6, 5 - ]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) + ]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( KeyError, 'a list-indexer must only include values ' 'that are in the categories'): df.loc[['a', 'x']] # contains unused category - index = pd.CategoricalIndex( + index = CategoricalIndex( ['a', 'b', 'a', 'c'], categories=list('abcde')) df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index) res = df.loc[['a', 'b']] - exp = DataFrame({'A': [1, 3, 2], - 'B': [5, 7, 6]}, index=pd.CategoricalIndex( - ['a', 'a', 'b'], categories=list('abcde'))) + exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]}, + index=CategoricalIndex(['a', 'a', 'b'], + categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) res = df.loc[['a', 'e']] exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]}, - index=pd.CategoricalIndex(['a', 'a', 'e'], - categories=list('abcde'))) + index=CategoricalIndex(['a', 'a', 'e'], + categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice res = df.loc[['a', 'a', 'b']] exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]}, - index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'], - categories=list('abcde'))) + index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'], + categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( KeyError, 'a list-indexer must only include values ' 'that are in the categories'): df.loc[['a', 'x']] + def test_get_indexer_array(self): + arr = np.array([Timestamp('1999-12-31 00:00:00'), + Timestamp('2000-12-31 00:00:00')], dtype=object) + cats = [Timestamp('1999-12-31 00:00:00'), + Timestamp('2000-12-31 00:00:00')] + ci = CategoricalIndex(cats, + categories=cats, + ordered=False, dtype='category') + result = ci.get_indexer(arr) + expected = np.array([0, 1], dtype='intp') + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_same_order(self): + ci = CategoricalIndex(['a', 'b'], categories=['a', 'b']) + + result = ci.get_indexer(CategoricalIndex(['b', 'b'], + categories=['a', 'b'])) + expected = np.array([1, 1], dtype='intp') + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19551 + ci = CategoricalIndex(['a', 'b'], categories=['a', 'b']) + + result = ci.get_indexer(CategoricalIndex(['b', 'b'], + categories=['b', 'a'])) + expected = np.array([1, 1], dtype='intp') + tm.assert_numpy_array_equal(result, expected) + + def test_getitem_with_listlike(self): + # GH 16115 + cats = Categorical([Timestamp('12-31-1999'), + Timestamp('12-31-2000')]) + + expected = DataFrame([[1, 0], [0, 1]], dtype='uint8', + index=[0, 1], columns=cats) + dummies = pd.get_dummies(cats) + result = dummies[[c for c in dummies.columns]] + assert_frame_equal(result, expected) + + def test_setitem_listlike(self): + + # GH 9469 + # properly coerce the input indexers + np.random.seed(1) + c = Categorical(np.random.randint(0, 5, size=150000).astype( + np.int8)).add_categories([-1000]) + indexer = np.array([100000]).astype(np.int64) + c[indexer] = -1000 + + # we are asserting the code result here + # which maps to the -1000 category + result = c.codes[np.array([100000]).astype(np.int64)] + tm.assert_numpy_array_equal(result, np.array([5], dtype='int8')) + def test_ix_categorical_index(self): # GH 12531 - df = pd.DataFrame(np.random.randn(3, 3), - index=list('ABC'), columns=list('XYZ')) + df = DataFrame(np.random.randn(3, 3), + index=list('ABC'), columns=list('XYZ')) cdf = df.copy() - cdf.index = pd.CategoricalIndex(df.index) - cdf.columns = pd.CategoricalIndex(df.columns) + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) - expect = pd.Series(df.loc['A', :], index=cdf.columns, name='A') + expect = Series(df.loc['A', :], index=cdf.columns, name='A') assert_series_equal(cdf.loc['A', :], expect) - expect = pd.Series(df.loc[:, 'X'], index=cdf.index, name='X') + expect = Series(df.loc[:, 'X'], index=cdf.index, name='X') assert_series_equal(cdf.loc[:, 'X'], expect) - exp_index = pd.CategoricalIndex(list('AB'), categories=['A', 'B', 'C']) - expect = pd.DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns, - index=exp_index) + exp_index = CategoricalIndex(list('AB'), categories=['A', 'B', 'C']) + expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns, + index=exp_index) assert_frame_equal(cdf.loc[['A', 'B'], :], expect) - exp_columns = pd.CategoricalIndex(list('XY'), - categories=['X', 'Y', 'Z']) - expect = pd.DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index, - columns=exp_columns) + exp_columns = CategoricalIndex(list('XY'), + categories=['X', 'Y', 'Z']) + expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index, + columns=exp_columns) assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect) # non-unique - df = pd.DataFrame(np.random.randn(3, 3), - index=list('ABA'), columns=list('XYX')) + df = DataFrame(np.random.randn(3, 3), + index=list('ABA'), columns=list('XYX')) cdf = df.copy() - cdf.index = pd.CategoricalIndex(df.index) - cdf.columns = pd.CategoricalIndex(df.columns) + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) - exp_index = pd.CategoricalIndex(list('AA'), categories=['A', 'B']) - expect = pd.DataFrame(df.loc['A', :], columns=cdf.columns, - index=exp_index) + exp_index = CategoricalIndex(list('AA'), categories=['A', 'B']) + expect = DataFrame(df.loc['A', :], columns=cdf.columns, + index=exp_index) assert_frame_equal(cdf.loc['A', :], expect) - exp_columns = pd.CategoricalIndex(list('XX'), categories=['X', 'Y']) - expect = pd.DataFrame(df.loc[:, 'X'], index=cdf.index, - columns=exp_columns) + exp_columns = CategoricalIndex(list('XX'), categories=['X', 'Y']) + expect = DataFrame(df.loc[:, 'X'], index=cdf.index, + columns=exp_columns) assert_frame_equal(cdf.loc[:, 'X'], expect) - expect = pd.DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns, - index=pd.CategoricalIndex(list('AAB'))) + expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns, + index=CategoricalIndex(list('AAB'))) assert_frame_equal(cdf.loc[['A', 'B'], :], expect) - expect = pd.DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index, - columns=pd.CategoricalIndex(list('XXY'))) + expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index, + columns=CategoricalIndex(list('XXY'))) assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect) def test_read_only_source(self): @@ -279,16 +572,16 @@ def test_reindexing(self): # then return a Categorical cats = list('cabe') - result = self.df2.reindex(pd.Categorical(['a', 'd'], categories=cats)) + result = self.df2.reindex(Categorical(['a', 'd'], categories=cats)) expected = DataFrame({'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype( - 'category', categories=cats)}).set_index('B') + CDT(cats))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(pd.Categorical(['a'], categories=cats)) + result = self.df2.reindex(Categorical(['a'], categories=cats)) expected = DataFrame({'A': [0, 1, 5], 'B': Series(list('aaa')).astype( - 'category', categories=cats)}).set_index('B') + CDT(cats))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['a', 'b', 'e']) @@ -307,39 +600,38 @@ def test_reindexing(self): assert_frame_equal(result, expected, check_index_type=True) # give back the type of categorical that we received - result = self.df2.reindex(pd.Categorical( + result = self.df2.reindex(Categorical( ['a', 'd'], categories=cats, ordered=True)) expected = DataFrame( {'A': [0, 1, 5, np.nan], - 'B': Series(list('aaad')).astype('category', categories=cats, - ordered=True)}).set_index('B') + 'B': Series(list('aaad')).astype( + CDT(cats, ordered=True))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(pd.Categorical( + result = self.df2.reindex(Categorical( ['a', 'd'], categories=['a', 'd'])) expected = DataFrame({'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype( - 'category', categories=['a', 'd' - ])}).set_index('B') + CDT(['a', 'd']))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # passed duplicate indexers are not allowed - self.assertRaises(ValueError, lambda: self.df2.reindex(['a', 'a'])) + pytest.raises(ValueError, lambda: self.df2.reindex(['a', 'a'])) # args NotImplemented ATM - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], method='ffill')) - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], level=1)) - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], limit=2)) + pytest.raises(NotImplementedError, + lambda: self.df2.reindex(['a'], method='ffill')) + pytest.raises(NotImplementedError, + lambda: self.df2.reindex(['a'], level=1)) + pytest.raises(NotImplementedError, + lambda: self.df2.reindex(['a'], limit=2)) def test_loc_slice(self): # slicing # not implemented ATM # GH9748 - self.assertRaises(TypeError, lambda: self.df.loc[1:5]) + pytest.raises(TypeError, lambda: self.df.loc[1:5]) # result = df.loc[1:5] # expected = df.iloc[[1,2,3,4]] @@ -387,8 +679,8 @@ def test_boolean_selection(self): # categories=[3, 2, 1], # ordered=False, # name=u'B') - self.assertRaises(TypeError, lambda: df4[df4.index < 2]) - self.assertRaises(TypeError, lambda: df4[df4.index > 1]) + pytest.raises(TypeError, lambda: df4[df4.index < 2]) + pytest.raises(TypeError, lambda: df4[df4.index > 1]) def test_indexing_with_category(self): @@ -405,3 +697,21 @@ def test_indexing_with_category(self): res = (cat[['A']] == 'foo') tm.assert_frame_equal(res, exp) + + def test_map_with_dict_or_series(self): + orig_values = ['a', 'B', 1, 'a'] + new_values = ['one', 2, 3.0, 'one'] + cur_index = pd.CategoricalIndex(orig_values, name='XXX') + expected = pd.CategoricalIndex(new_values, + name='XXX', categories=[3.0, 2, 'one']) + + mapper = pd.Series(new_values[:-1], index=orig_values[:-1]) + output = cur_index.map(mapper) + # Order of categories in output can be different + tm.assert_index_equal(expected, output) + + mapper = {o: n for o, n in + zip(orig_values[:-1], new_values[:-1])} + output = cur_index.map(mapper) + # Order of categories in output can be different + tm.assert_index_equal(expected, output) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 0e921aaf826f9..0e396a3248e3f 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -1,3 +1,7 @@ +from warnings import catch_warnings + +import pytest + import numpy as np import pandas as pd from pandas.core import common as com @@ -6,7 +10,7 @@ from pandas.util import testing as tm -class TestCaching(tm.TestCase): +class TestCaching(object): def test_slice_consolidate_invalidate_item_cache(self): @@ -28,7 +32,7 @@ def test_slice_consolidate_invalidate_item_cache(self): # Assignment to wrong series df['bb'].iloc[0] = 0.17 df._clear_item_cache() - self.assertAlmostEqual(df['bb'][0], 0.17) + tm.assert_almost_equal(df['bb'][0], 0.17) def test_setitem_cache_updating(self): # GH 5424 @@ -41,13 +45,13 @@ def test_setitem_cache_updating(self): # ref the cache if do_ref: - df.ix[0, "c"] + df.loc[0, "c"] # set it - df.ix[7, 'c'] = 1 + df.loc[7, 'c'] = 1 - self.assertEqual(df.ix[0, 'c'], 0.0) - self.assertEqual(df.ix[7, 'c'], 1.0) + assert df.loc[0, 'c'] == 0.0 + assert df.loc[7, 'c'] == 1.0 # GH 7084 # not updating cache on series setting with slices @@ -86,7 +90,7 @@ def test_setitem_cache_updating(self): tm.assert_series_equal(out['A'], expected['A']) -class TestChaining(tm.TestCase): +class TestChaining(object): def test_setitem_chained_setfault(self): @@ -132,7 +136,8 @@ def test_detect_chained_assignment(self): expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB')) df = DataFrame(np.arange(4).reshape(2, 2), columns=list('AB'), dtype='int64') - self.assertIsNone(df.is_copy) + assert df._is_copy is None + df['A'][0] = -5 df['A'][1] = -6 tm.assert_frame_equal(df, expected) @@ -140,71 +145,56 @@ def test_detect_chained_assignment(self): # test with the chaining df = DataFrame({'A': Series(range(2), dtype='int64'), 'B': np.array(np.arange(2, 4), dtype=np.float64)}) - self.assertIsNone(df.is_copy) + assert df._is_copy is None - def f(): + with pytest.raises(com.SettingWithCopyError): df['A'][0] = -5 - self.assertRaises(com.SettingWithCopyError, f) - - def f(): + with pytest.raises(com.SettingWithCopyError): df['A'][1] = np.nan - self.assertRaises(com.SettingWithCopyError, f) - self.assertIsNone(df['A'].is_copy) + assert df['A']._is_copy is None - # using a copy (the chain), fails + # Using a copy (the chain), fails df = DataFrame({'A': Series(range(2), dtype='int64'), 'B': np.array(np.arange(2, 4), dtype=np.float64)}) - def f(): + with pytest.raises(com.SettingWithCopyError): df.loc[0]['A'] = -5 - self.assertRaises(com.SettingWithCopyError, f) - - # doc example + # Doc example df = DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six'], 'c': Series(range(7), dtype='int64')}) - self.assertIsNone(df.is_copy) - expected = DataFrame({'a': ['one', 'one', 'two', 'three', - 'two', 'one', 'six'], - 'c': [42, 42, 2, 3, 4, 42, 6]}) + assert df._is_copy is None - def f(): + with pytest.raises(com.SettingWithCopyError): indexer = df.a.str.startswith('o') df[indexer]['c'] = 42 - self.assertRaises(com.SettingWithCopyError, f) - expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) - def f(): + with pytest.raises(com.SettingWithCopyError): df['A'][0] = 111 - self.assertRaises(com.SettingWithCopyError, f) - - def f(): + with pytest.raises(com.SettingWithCopyError): df.loc[0]['A'] = 111 - self.assertRaises(com.SettingWithCopyError, f) - df.loc[0, 'A'] = 111 tm.assert_frame_equal(df, expected) - # make sure that is_copy is picked up reconstruction - # GH5475 + # gh-5475: Make sure that is_copy is picked up reconstruction df = DataFrame({"A": [1, 2]}) - self.assertIsNone(df.is_copy) + assert df._is_copy is None + with tm.ensure_clean('__tmp__pickle') as path: df.to_pickle(path) df2 = pd.read_pickle(path) df2["B"] = df2["A"] df2["B"] = df2["A"] - # a suprious raise as we are setting the entire column here - # GH5597 + # gh-5597: a spurious raise as we are setting the entire column here from string import ascii_letters as letters def random_text(nobs=100): @@ -212,54 +202,60 @@ def random_text(nobs=100): for i in range(nobs): idx = np.random.randint(len(letters), size=2) idx.sort() + df.append([letters[idx[0]:idx[1]]]) return DataFrame(df, columns=['letters']) df = random_text(100000) - # always a copy + # Always a copy x = df.iloc[[0, 1, 2]] - self.assertIsNotNone(x.is_copy) + assert x._is_copy is not None + x = df.iloc[[0, 1, 2, 4]] - self.assertIsNotNone(x.is_copy) + assert x._is_copy is not None - # explicity copy + # Explicitly copy indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer].copy() - self.assertIsNone(df.is_copy) + df = df.loc[indexer].copy() + + assert df._is_copy is None df['letters'] = df['letters'].apply(str.lower) - # implicity take + # Implicitly take df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) + df = df.loc[indexer] + + assert df._is_copy is not None df['letters'] = df['letters'].apply(str.lower) - # implicity take 2 + # Implicitly take 2 df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) + + df = df.loc[indexer] + assert df._is_copy is not None df.loc[:, 'letters'] = df['letters'].apply(str.lower) - # should be ok even though it's a copy! - self.assertIsNone(df.is_copy) + # Should be ok even though it's a copy! + assert df._is_copy is None + df['letters'] = df['letters'].apply(str.lower) - self.assertIsNone(df.is_copy) + assert df._is_copy is None df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) + df.loc[indexer, 'letters'] = ( + df.loc[indexer, 'letters'].apply(str.lower)) # an identical take, so no copy df = DataFrame({'a': [1]}).dropna() - self.assertIsNone(df.is_copy) + assert df._is_copy is None df['a'] += 1 - # inplace ops - # original from: + # Inplace ops, originally from: # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug a = [12, 23] b = [123, None] @@ -274,23 +270,25 @@ def random_text(nobs=100): multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) zed = DataFrame(events, index=['a', 'b'], columns=multiind) - def f(): + with pytest.raises(com.SettingWithCopyError): zed['eyes']['right'].fillna(value=555, inplace=True) - self.assertRaises(com.SettingWithCopyError, f) - df = DataFrame(np.random.randn(10, 4)) s = df.iloc[:, 0].sort_values() + tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) tm.assert_series_equal(s, df[0].sort_values()) - # false positives GH6025 + # see gh-6025: false positives df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]}) str(df) + df['column1'] = df['column1'] + 'b' str(df) + df = df[df['column2'] != 8] str(df) + df['column1'] = df['column1'] + 'c' str(df) @@ -299,45 +297,36 @@ def f(): df = DataFrame(np.arange(0, 9), columns=['count']) df['group'] = 'b' - def f(): + with pytest.raises(com.SettingWithCopyError): df.iloc[0:5]['group'] = 'a' - self.assertRaises(com.SettingWithCopyError, f) - - # mixed type setting - # same dtype & changing dtype + # Mixed type setting but same dtype & changing dtype df = DataFrame(dict(A=date_range('20130101', periods=5), B=np.random.randn(5), C=np.arange(5, dtype='int64'), D=list('abcde'))) - def f(): - df.ix[2]['D'] = 'foo' - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df.ix[2]['C'] = 'foo' + with pytest.raises(com.SettingWithCopyError): + df.loc[2]['D'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) + with pytest.raises(com.SettingWithCopyError): + df.loc[2]['C'] = 'foo' - def f(): + with pytest.raises(com.SettingWithCopyError): df['C'][2] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) - def test_setting_with_copy_bug(self): # operating on a copy - df = pd.DataFrame({'a': list(range(4)), - 'b': list('ab..'), - 'c': ['a', 'b', np.nan, 'd']}) - mask = pd.isnull(df.c) + df = DataFrame({'a': list(range(4)), + 'b': list('ab..'), + 'c': ['a', 'b', np.nan, 'd']}) + mask = pd.isna(df.c) def f(): df[['c']][mask] = df[['b']][mask] - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) # invalid warning as we are returning a new object # GH 8730 @@ -356,3 +345,87 @@ def test_detect_chained_assignment_warnings(self): with tm.assert_produces_warning( expected_warning=com.SettingWithCopyWarning): df.loc[0]['A'] = 111 + + def test_chained_getitem_with_lists(self): + + # GH6394 + # Regression in chained getitem indexing with embedded list-like from + # 0.12 + def check(result, expected): + tm.assert_numpy_array_equal(result, expected) + assert isinstance(result, np.ndarray) + + df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]}) + expected = df['A'].iloc[2] + result = df.loc[2, 'A'] + check(result, expected) + result2 = df.iloc[2]['A'] + check(result2, expected) + result3 = df['A'].loc[2] + check(result3, expected) + result4 = df['A'].iloc[2] + check(result4, expected) + + def test_cache_updating(self): + # GH 4939, make sure to update the cache on setitem + + df = tm.makeDataFrame() + df['A'] # cache series + with catch_warnings(record=True): + df.ix["Hello Friend"] = df.ix[0] + assert "Hello Friend" in df['A'].index + assert "Hello Friend" in df['B'].index + + with catch_warnings(record=True): + panel = tm.makePanel() + panel.ix[0] # get first item into cache + panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 + assert "A+1" in panel.ix[0].columns + assert "A+1" in panel.ix[1].columns + + # 5216 + # make sure that we don't try to set a dead cache + a = np.random.rand(10, 3) + df = DataFrame(a, columns=['x', 'y', 'z']) + tuples = [(i, j) for i in range(5) for j in range(2)] + index = MultiIndex.from_tuples(tuples) + df.index = index + + # setting via chained assignment + # but actually works, since everything is a view + df.loc[0]['z'].iloc[0] = 1. + result = df.loc[(0, 0), 'z'] + assert result == 1 + + # correct setting + df.loc[(0, 0), 'z'] = 2 + result = df.loc[(0, 0), 'z'] + assert result == 2 + + # 10264 + df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ + 'a', 'b', 'c', 'd', 'e'], index=range(5)) + df['f'] = 0 + df.f.values[3] = 1 + + # TODO(wesm): unused? + # y = df.iloc[np.arange(2, len(df))] + + df.f.values[3] = 2 + expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[ + 'a', 'b', 'c', 'd', 'e', 'f'], index=range(5)) + expected.at[3, 'f'] = 2 + tm.assert_frame_equal(df, expected) + expected = Series([0, 0, 0, 2, 0], name='f') + tm.assert_series_equal(df.f, expected) + + def test_deprecate_is_copy(self): + # GH18801 + df = DataFrame({"A": [1, 2, 3]}) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # getter + df.is_copy + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # setter + df.is_copy = "test deprecated is_copy" diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 38f8bb5355a69..de756375db8cb 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import itertools import pytest import numpy as np @@ -13,6 +14,27 @@ ############################################################### +@pytest.fixture(autouse=True, scope='class') +def check_comprehensiveness(request): + # Iterate over combination of dtype, method and klass + # and ensure that each are contained within a collected test + cls = request.cls + combos = itertools.product(cls.klasses, cls.dtypes, [cls.method]) + + def has_test(combo): + klass, dtype, method = combo + cls_funcs = request.node.session.items + return any(klass in x.name and dtype in x.name and + method in x.name for x in cls_funcs) + + for combo in combos: + if not has_test(combo): + msg = 'test method is not defined: {0}, {1}' + raise AssertionError(msg.format(type(cls), combo)) + + yield + + class CoercionBase(object): klasses = ['index', 'series'] @@ -31,20 +53,11 @@ def _assert(self, left, right, dtype): tm.assert_index_equal(left, right) else: raise NotImplementedError - self.assertEqual(left.dtype, dtype) - self.assertEqual(right.dtype, dtype) - - def test_has_comprehensive_tests(self): - for klass in self.klasses: - for dtype in self.dtypes: - method_name = 'test_{0}_{1}_{2}'.format(self.method, - klass, dtype) - if not hasattr(self, method_name): - msg = 'test method is not defined: {0}, {1}' - raise AssertionError(msg.format(type(self), method_name)) + assert left.dtype == dtype + assert right.dtype == dtype -class TestSetitemCoercion(CoercionBase, tm.TestCase): +class TestSetitemCoercion(CoercionBase): method = 'setitem' @@ -55,191 +68,160 @@ def _assert_setitem_series_conversion(self, original_series, loc_value, temp[1] = loc_value tm.assert_series_equal(temp, expected_series) # check dtype explicitly for sure - self.assertEqual(temp.dtype, expected_dtype) + assert temp.dtype == expected_dtype # .loc works different rule, temporary disable # temp = original_series.copy() # temp.loc[1] = loc_value # tm.assert_series_equal(temp, expected_series) - def test_setitem_series_object(self): + @pytest.mark.parametrize("val,exp_dtype", [ + (1, np.object), + (1.1, np.object), + (1 + 1j, np.object), + (True, np.object)]) + def test_setitem_series_object(self, val, exp_dtype): obj = pd.Series(list('abcd')) - self.assertEqual(obj.dtype, np.object) - - # object + int -> object - exp = pd.Series(['a', 1, 'c', 'd']) - self._assert_setitem_series_conversion(obj, 1, exp, np.object) - - # object + float -> object - exp = pd.Series(['a', 1.1, 'c', 'd']) - self._assert_setitem_series_conversion(obj, 1.1, exp, np.object) + assert obj.dtype == np.object - # object + complex -> object - exp = pd.Series(['a', 1 + 1j, 'c', 'd']) - self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.object) + exp = pd.Series(['a', val, 'c', 'd']) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) - # object + bool -> object - exp = pd.Series(['a', True, 'c', 'd']) - self._assert_setitem_series_conversion(obj, True, exp, np.object) - - def test_setitem_series_int64(self): + @pytest.mark.parametrize("val,exp_dtype", [ + (1, np.int64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.object)]) + def test_setitem_series_int64(self, val, exp_dtype): obj = pd.Series([1, 2, 3, 4]) - self.assertEqual(obj.dtype, np.int64) - - # int + int -> int - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, 1, exp, np.int64) - - # int + float -> float - # TODO_GH12747 The result must be float - # tm.assert_series_equal(temp, pd.Series([1, 1.1, 3, 4])) - # self.assertEqual(temp.dtype, np.float64) - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) - - # int + complex -> complex - exp = pd.Series([1, 1 + 1j, 3, 4]) - self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - - # int + bool -> int - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, True, exp, np.int64) - - def test_setitem_series_float64(self): + assert obj.dtype == np.int64 + + if exp_dtype is np.float64: + exp = pd.Series([1, 1, 3, 4]) + self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) + pytest.xfail("GH12747 The result must be float") + + exp = pd.Series([1, val, 3, 4]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize("val,exp_dtype", [ + (np.int32(1), np.int8), + (np.int16(2**9), np.int16)]) + def test_setitem_series_int8(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4], dtype=np.int8) + assert obj.dtype == np.int8 + + if exp_dtype is np.int16: + exp = pd.Series([1, 0, 3, 4], dtype=np.int8) + self._assert_setitem_series_conversion(obj, val, exp, np.int8) + pytest.xfail("BUG: it must be Series([1, 1, 3, 4], dtype=np.int16") + + exp = pd.Series([1, val, 3, 4], dtype=np.int8) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize("val,exp_dtype", [ + (1, np.float64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.object)]) + def test_setitem_series_float64(self, val, exp_dtype): obj = pd.Series([1.1, 2.2, 3.3, 4.4]) - self.assertEqual(obj.dtype, np.float64) - - # float + int -> float - exp = pd.Series([1.1, 1.0, 3.3, 4.4]) - self._assert_setitem_series_conversion(obj, 1, exp, np.float64) - - # float + float -> float - exp = pd.Series([1.1, 1.1, 3.3, 4.4]) - self._assert_setitem_series_conversion(obj, 1.1, exp, np.float64) - - # float + complex -> complex - exp = pd.Series([1.1, 1 + 1j, 3.3, 4.4]) - self._assert_setitem_series_conversion(obj, 1 + 1j, exp, - np.complex128) + assert obj.dtype == np.float64 - # float + bool -> float - exp = pd.Series([1.1, 1.0, 3.3, 4.4]) - self._assert_setitem_series_conversion(obj, True, exp, np.float64) + exp = pd.Series([1.1, val, 3.3, 4.4]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) - def test_setitem_series_complex128(self): + @pytest.mark.parametrize("val,exp_dtype", [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, np.object)]) + def test_setitem_series_complex128(self, val, exp_dtype): obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) - self.assertEqual(obj.dtype, np.complex128) - - # complex + int -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) - - # complex + float -> complex - exp = pd.Series([1 + 1j, 1.1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, 1.1, exp, np.complex128) - - # complex + complex -> complex - exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) - - def test_setitem_series_bool(self): + assert obj.dtype == np.complex128 + + exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize("val,exp_dtype", [ + (1, np.int64), + (3, np.int64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.bool)]) + def test_setitem_series_bool(self, val, exp_dtype): obj = pd.Series([True, False, True, False]) - self.assertEqual(obj.dtype, np.bool) - - # bool + int -> int - # TODO_GH12747 The result must be int - # tm.assert_series_equal(temp, pd.Series([1, 1, 1, 0])) - # self.assertEqual(temp.dtype, np.int64) - exp = pd.Series([True, True, True, False]) - self._assert_setitem_series_conversion(obj, 1, exp, np.bool) - - # TODO_GH12747 The result must be int - # assigning int greater than bool - # tm.assert_series_equal(temp, pd.Series([1, 3, 1, 0])) - # self.assertEqual(temp.dtype, np.int64) - exp = pd.Series([True, True, True, False]) - self._assert_setitem_series_conversion(obj, 3, exp, np.bool) - - # bool + float -> float - # TODO_GH12747 The result must be float - # tm.assert_series_equal(temp, pd.Series([1., 1.1, 1., 0.])) - # self.assertEqual(temp.dtype, np.float64) - exp = pd.Series([True, True, True, False]) - self._assert_setitem_series_conversion(obj, 1.1, exp, np.bool) - - # bool + complex -> complex (buggy, results in bool) - # TODO_GH12747 The result must be complex - # tm.assert_series_equal(temp, pd.Series([1, 1 + 1j, 1, 0])) - # self.assertEqual(temp.dtype, np.complex128) - exp = pd.Series([True, True, True, False]) - self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.bool) - - # bool + bool -> bool - exp = pd.Series([True, True, True, False]) - self._assert_setitem_series_conversion(obj, True, exp, np.bool) - - def test_setitem_series_datetime64(self): + assert obj.dtype == np.bool + + if exp_dtype is np.int64: + exp = pd.Series([True, True, True, False]) + self._assert_setitem_series_conversion(obj, val, exp, np.bool) + pytest.xfail("TODO_GH12747 The result must be int") + elif exp_dtype is np.float64: + exp = pd.Series([True, True, True, False]) + self._assert_setitem_series_conversion(obj, val, exp, np.bool) + pytest.xfail("TODO_GH12747 The result must be float") + elif exp_dtype is np.complex128: + exp = pd.Series([True, True, True, False]) + self._assert_setitem_series_conversion(obj, val, exp, np.bool) + pytest.xfail("TODO_GH12747 The result must be complex") + + exp = pd.Series([True, val, True, False]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize("val,exp_dtype", [ + (pd.Timestamp('2012-01-01'), 'datetime64[ns]'), + (1, np.object), + ('x', np.object)]) + def test_setitem_series_datetime64(self, val, exp_dtype): obj = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self.assertEqual(obj.dtype, 'datetime64[ns]') - - # datetime64 + datetime64 -> datetime64 - exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp('2012-01-01'), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - self._assert_setitem_series_conversion(obj, pd.Timestamp('2012-01-01'), - exp, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' - # datetime64 + int -> object - # ToDo: The result must be object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + val, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_setitem_series_conversion(obj, 1, exp, 'datetime64[ns]') - - # ToDo: add more tests once the above issue has been fixed - - def test_setitem_series_datetime64tz(self): + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize("val,exp_dtype", [ + (pd.Timestamp('2012-01-01', tz='US/Eastern'), + 'datetime64[ns, US/Eastern]'), + (pd.Timestamp('2012-01-01', tz='US/Pacific'), np.object), + (pd.Timestamp('2012-01-01'), np.object), + (1, np.object)]) + def test_setitem_series_datetime64tz(self, val, exp_dtype): tz = 'US/Eastern' obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz), pd.Timestamp('2011-01-02', tz=tz), pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) - self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') + assert obj.dtype == 'datetime64[ns, US/Eastern]' - # datetime64tz + datetime64tz -> datetime64tz exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp('2012-01-01', tz=tz), + val, pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) - value = pd.Timestamp('2012-01-01', tz=tz) - self._assert_setitem_series_conversion(obj, value, exp, - 'datetime64[ns, US/Eastern]') - - # datetime64 + int -> object - # ToDo: The result must be object - exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp(1, tz=tz), - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_setitem_series_conversion(obj, 1, exp, - 'datetime64[ns, US/Eastern]') - - # ToDo: add more tests once the above issue has been fixed - - def test_setitem_series_timedelta64(self): - pass - - def test_setitem_series_period(self): - pass + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize("val,exp_dtype", [ + (pd.Timedelta('12 day'), 'timedelta64[ns]'), + (1, np.object), + ('x', np.object)]) + def test_setitem_series_timedelta64(self, val, exp_dtype): + obj = pd.Series([pd.Timedelta('1 day'), + pd.Timedelta('2 day'), + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + assert obj.dtype == 'timedelta64[ns]' + + exp = pd.Series([pd.Timedelta('1 day'), + val, + pd.Timedelta('3 day'), + pd.Timedelta('4 day')]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) def _assert_setitem_index_conversion(self, original_series, loc_key, expected_index, expected_dtype): @@ -249,67 +231,63 @@ def _assert_setitem_index_conversion(self, original_series, loc_key, exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) tm.assert_series_equal(temp, exp) # check dtype explicitly for sure - self.assertEqual(temp.index.dtype, expected_dtype) + assert temp.index.dtype == expected_dtype temp = original_series.copy() temp.loc[loc_key] = 5 exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) tm.assert_series_equal(temp, exp) # check dtype explicitly for sure - self.assertEqual(temp.index.dtype, expected_dtype) + assert temp.index.dtype == expected_dtype - def test_setitem_index_object(self): + @pytest.mark.parametrize("val,exp_dtype", [ + ('x', np.object), + (5, IndexError), + (1.1, np.object)]) + def test_setitem_index_object(self, val, exp_dtype): obj = pd.Series([1, 2, 3, 4], index=list('abcd')) - self.assertEqual(obj.index.dtype, np.object) - - # object + object -> object - exp_index = pd.Index(list('abcdx')) - self._assert_setitem_index_conversion(obj, 'x', exp_index, np.object) - - # object + int -> IndexError, regarded as location - temp = obj.copy() - with tm.assertRaises(IndexError): - temp[5] = 5 + assert obj.index.dtype == np.object - # object + float -> object - exp_index = pd.Index(['a', 'b', 'c', 'd', 1.1]) - self._assert_setitem_index_conversion(obj, 1.1, exp_index, np.object) - - def test_setitem_index_int64(self): - # tests setitem with non-existing numeric key + if exp_dtype is IndexError: + temp = obj.copy() + with pytest.raises(exp_dtype): + temp[5] = 5 + else: + exp_index = pd.Index(list('abcd') + [val]) + self._assert_setitem_index_conversion(obj, val, exp_index, + exp_dtype) + + @pytest.mark.parametrize("val,exp_dtype", [ + (5, np.int64), + (1.1, np.float64), + ('x', np.object)]) + def test_setitem_index_int64(self, val, exp_dtype): obj = pd.Series([1, 2, 3, 4]) - self.assertEqual(obj.index.dtype, np.int64) - - # int + int -> int - exp_index = pd.Index([0, 1, 2, 3, 5]) - self._assert_setitem_index_conversion(obj, 5, exp_index, np.int64) - - # int + float -> float - exp_index = pd.Index([0, 1, 2, 3, 1.1]) - self._assert_setitem_index_conversion(obj, 1.1, exp_index, np.float64) + assert obj.index.dtype == np.int64 - # int + object -> object - exp_index = pd.Index([0, 1, 2, 3, 'x']) - self._assert_setitem_index_conversion(obj, 'x', exp_index, np.object) + exp_index = pd.Index([0, 1, 2, 3, val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) - def test_setitem_index_float64(self): - # tests setitem with non-existing numeric key + @pytest.mark.parametrize("val,exp_dtype", [ + (5, IndexError), + (5.1, np.float64), + ('x', np.object)]) + def test_setitem_index_float64(self, val, exp_dtype): obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1]) - self.assertEqual(obj.index.dtype, np.float64) + assert obj.index.dtype == np.float64 - # float + int -> int - temp = obj.copy() - # TODO_GH12747 The result must be float - with tm.assertRaises(IndexError): - temp[5] = 5 + if exp_dtype is IndexError: + # float + int -> int + temp = obj.copy() + with pytest.raises(exp_dtype): + temp[5] = 5 + pytest.xfail("TODO_GH12747 The result must be float") - # float + float -> float - exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, 5.1]) - self._assert_setitem_index_conversion(obj, 5.1, exp_index, np.float64) + exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) - # float + object -> object - exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, 'x']) - self._assert_setitem_index_conversion(obj, 'x', exp_index, np.object) + def test_setitem_series_period(self): + pass def test_setitem_index_complex128(self): pass @@ -330,7 +308,7 @@ def test_setitem_index_period(self): pass -class TestInsertIndexCoercion(CoercionBase, tm.TestCase): +class TestInsertIndexCoercion(CoercionBase): klasses = ['index'] method = 'insert' @@ -341,126 +319,75 @@ def _assert_insert_conversion(self, original, value, target = original.copy() res = target.insert(1, value) tm.assert_index_equal(res, expected) - self.assertEqual(res.dtype, expected_dtype) - - def test_insert_index_object(self): + assert res.dtype == expected_dtype + + @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [ + (1, 1, np.object), + (1.1, 1.1, np.object), + (False, False, np.object), + ('x', 'x', np.object)]) + def test_insert_index_object(self, insert, coerced_val, coerced_dtype): obj = pd.Index(list('abcd')) - self.assertEqual(obj.dtype, np.object) - - # object + int -> object - exp = pd.Index(['a', 1, 'b', 'c', 'd']) - self._assert_insert_conversion(obj, 1, exp, np.object) - - # object + float -> object - exp = pd.Index(['a', 1.1, 'b', 'c', 'd']) - self._assert_insert_conversion(obj, 1.1, exp, np.object) + assert obj.dtype == np.object - # object + bool -> object - res = obj.insert(1, False) - tm.assert_index_equal(res, pd.Index(['a', False, 'b', 'c', 'd'])) - self.assertEqual(res.dtype, np.object) + exp = pd.Index(['a', coerced_val, 'b', 'c', 'd']) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) - # object + object -> object - exp = pd.Index(['a', 'x', 'b', 'c', 'd']) - self._assert_insert_conversion(obj, 'x', exp, np.object) - - def test_insert_index_int64(self): + @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [ + (1, 1, np.int64), + (1.1, 1.1, np.float64), + (False, 0, np.int64), + ('x', 'x', np.object)]) + def test_insert_index_int64(self, insert, coerced_val, coerced_dtype): obj = pd.Int64Index([1, 2, 3, 4]) - self.assertEqual(obj.dtype, np.int64) - - # int + int -> int - exp = pd.Index([1, 1, 2, 3, 4]) - self._assert_insert_conversion(obj, 1, exp, np.int64) - - # int + float -> float - exp = pd.Index([1, 1.1, 2, 3, 4]) - self._assert_insert_conversion(obj, 1.1, exp, np.float64) - - # int + bool -> int - exp = pd.Index([1, 0, 2, 3, 4]) - self._assert_insert_conversion(obj, False, exp, np.int64) + assert obj.dtype == np.int64 - # int + object -> object - exp = pd.Index([1, 'x', 2, 3, 4]) - self._assert_insert_conversion(obj, 'x', exp, np.object) + exp = pd.Index([1, coerced_val, 2, 3, 4]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) - def test_insert_index_float64(self): + @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [ + (1, 1., np.float64), + (1.1, 1.1, np.float64), + (False, 0., np.float64), + ('x', 'x', np.object)]) + def test_insert_index_float64(self, insert, coerced_val, coerced_dtype): obj = pd.Float64Index([1., 2., 3., 4.]) - self.assertEqual(obj.dtype, np.float64) + assert obj.dtype == np.float64 - # float + int -> int - exp = pd.Index([1., 1., 2., 3., 4.]) - self._assert_insert_conversion(obj, 1, exp, np.float64) - - # float + float -> float - exp = pd.Index([1., 1.1, 2., 3., 4.]) - self._assert_insert_conversion(obj, 1.1, exp, np.float64) - - # float + bool -> float - exp = pd.Index([1., 0., 2., 3., 4.]) - self._assert_insert_conversion(obj, False, exp, np.float64) - - # float + object -> object - exp = pd.Index([1., 'x', 2., 3., 4.]) - self._assert_insert_conversion(obj, 'x', exp, np.object) - - def test_insert_index_complex128(self): - pass - - def test_insert_index_bool(self): - pass - - def test_insert_index_datetime64(self): - obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', - '2011-01-04']) - self.assertEqual(obj.dtype, 'datetime64[ns]') - - # datetime64 + datetime64 => datetime64 - exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', '2011-01-02', - '2011-01-03', '2011-01-04']) - self._assert_insert_conversion(obj, pd.Timestamp('2012-01-01'), - exp, 'datetime64[ns]') - - # ToDo: must coerce to object - msg = "Passed item and index have different timezone" - with tm.assertRaisesRegexp(ValueError, msg): - obj.insert(1, pd.Timestamp('2012-01-01', tz='US/Eastern')) - - # ToDo: must coerce to object - msg = "cannot insert DatetimeIndex with incompatible label" - with tm.assertRaisesRegexp(TypeError, msg): - obj.insert(1, 1) + exp = pd.Index([1., coerced_val, 2., 3., 4.]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) - def test_insert_index_datetime64tz(self): + @pytest.mark.parametrize('fill_val,exp_dtype', [ + (pd.Timestamp('2012-01-01'), 'datetime64[ns]'), + (pd.Timestamp('2012-01-01', tz='US/Eastern'), + 'datetime64[ns, US/Eastern]')], + ids=['datetime64', 'datetime64tz']) + def test_insert_index_datetimes(self, fill_val, exp_dtype): obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', - '2011-01-04'], tz='US/Eastern') - self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') + '2011-01-04'], tz=fill_val.tz) + assert obj.dtype == exp_dtype - # datetime64tz + datetime64tz => datetime64 - exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', '2011-01-02', - '2011-01-03', '2011-01-04'], tz='US/Eastern') - val = pd.Timestamp('2012-01-01', tz='US/Eastern') - self._assert_insert_conversion(obj, val, exp, - 'datetime64[ns, US/Eastern]') + exp = pd.DatetimeIndex(['2011-01-01', fill_val.date(), '2011-01-02', + '2011-01-03', '2011-01-04'], tz=fill_val.tz) + self._assert_insert_conversion(obj, fill_val, exp, exp_dtype) - # ToDo: must coerce to object msg = "Passed item and index have different timezone" - with tm.assertRaisesRegexp(ValueError, msg): - obj.insert(1, pd.Timestamp('2012-01-01')) + if fill_val.tz: + with tm.assert_raises_regex(ValueError, msg): + obj.insert(1, pd.Timestamp('2012-01-01')) - # ToDo: must coerce to object - msg = "Passed item and index have different timezone" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo')) - # ToDo: must coerce to object msg = "cannot insert DatetimeIndex with incompatible label" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.insert(1, 1) + pytest.xfail("ToDo: must coerce to object") + def test_insert_index_timedelta64(self): obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day']) - self.assertEqual(obj.dtype, 'timedelta64[ns]') + assert obj.dtype == 'timedelta64[ns]' # timedelta64 + timedelta64 => timedelta64 exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day']) @@ -469,52 +396,44 @@ def test_insert_index_timedelta64(self): # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.insert(1, pd.Timestamp('2012-01-01')) # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): obj.insert(1, 1) - def test_insert_index_period(self): + @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [ + (pd.Period('2012-01', freq='M'), '2012-01', 'period[M]'), + (pd.Timestamp('2012-01-01'), pd.Timestamp('2012-01-01'), np.object), + (1, 1, np.object), + ('x', 'x', np.object)]) + def test_insert_index_period(self, insert, coerced_val, coerced_dtype): obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq='M') - self.assertEqual(obj.dtype, 'period[M]') - - # period + period => period - exp = pd.PeriodIndex(['2011-01', '2012-01', '2011-02', - '2011-03', '2011-04'], freq='M') - self._assert_insert_conversion(obj, pd.Period('2012-01', freq='M'), - exp, 'period[M]') - - # period + datetime64 => object - exp = pd.Index([pd.Period('2011-01', freq='M'), - pd.Timestamp('2012-01-01'), - pd.Period('2011-02', freq='M'), - pd.Period('2011-03', freq='M'), - pd.Period('2011-04', freq='M')], freq='M') - self._assert_insert_conversion(obj, pd.Timestamp('2012-01-01'), - exp, np.object) - - # period + int => object - exp = pd.Index([pd.Period('2011-01', freq='M'), - 1, - pd.Period('2011-02', freq='M'), - pd.Period('2011-03', freq='M'), - pd.Period('2011-04', freq='M')], freq='M') - self._assert_insert_conversion(obj, 1, exp, np.object) - - # period + object => object - exp = pd.Index([pd.Period('2011-01', freq='M'), - 'x', - pd.Period('2011-02', freq='M'), - pd.Period('2011-03', freq='M'), - pd.Period('2011-04', freq='M')], freq='M') - self._assert_insert_conversion(obj, 'x', exp, np.object) - - -class TestWhereCoercion(CoercionBase, tm.TestCase): + assert obj.dtype == 'period[M]' + + if isinstance(insert, pd.Period): + index_type = pd.PeriodIndex + else: + index_type = pd.Index + + exp = index_type([pd.Period('2011-01', freq='M'), + coerced_val, + pd.Period('2011-02', freq='M'), + pd.Period('2011-03', freq='M'), + pd.Period('2011-04', freq='M')], freq='M') + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + def test_insert_index_complex128(self): + pass + + def test_insert_index_bool(self): + pass + + +class TestWhereCoercion(CoercionBase): method = 'where' @@ -525,313 +444,189 @@ def _assert_where_conversion(self, original, cond, values, res = target.where(cond, values) self._assert(res, expected, expected_dtype) - def _where_object_common(self, klass): + @pytest.mark.parametrize("klass", [pd.Series, pd.Index], + ids=['series', 'index']) + @pytest.mark.parametrize("fill_val,exp_dtype", [ + (1, np.object), + (1.1, np.object), + (1 + 1j, np.object), + (True, np.object)]) + def test_where_object(self, klass, fill_val, exp_dtype): obj = klass(list('abcd')) - self.assertEqual(obj.dtype, np.object) + assert obj.dtype == np.object cond = klass([True, False, True, False]) - # object + int -> object - exp = klass(['a', 1, 'c', 1]) - self._assert_where_conversion(obj, cond, 1, exp, np.object) - - values = klass([5, 6, 7, 8]) - exp = klass(['a', 6, 'c', 8]) - self._assert_where_conversion(obj, cond, values, exp, np.object) - - # object + float -> object - exp = klass(['a', 1.1, 'c', 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.object) - - values = klass([5.5, 6.6, 7.7, 8.8]) - exp = klass(['a', 6.6, 'c', 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.object) - - # object + complex -> object - exp = klass(['a', 1 + 1j, 'c', 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.object) - - values = klass([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = klass(['a', 6 + 6j, 'c', 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, np.object) - - if klass is pd.Series: - exp = klass(['a', 1, 'c', 1]) - self._assert_where_conversion(obj, cond, True, exp, np.object) + if fill_val is True and klass is pd.Series: + ret_val = 1 + else: + ret_val = fill_val - values = klass([True, False, True, True]) - exp = klass(['a', 0, 'c', 1]) - self._assert_where_conversion(obj, cond, values, exp, np.object) - elif klass is pd.Index: - # object + bool -> object - exp = klass(['a', True, 'c', True]) - self._assert_where_conversion(obj, cond, True, exp, np.object) + exp = klass(['a', ret_val, 'c', ret_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + if fill_val is True: values = klass([True, False, True, True]) - exp = klass(['a', False, 'c', True]) - self._assert_where_conversion(obj, cond, values, exp, np.object) else: - NotImplementedError - - def test_where_series_object(self): - self._where_object_common(pd.Series) - - def test_where_index_object(self): - self._where_object_common(pd.Index) - - def _where_int64_common(self, klass): + values = klass(fill_val * x for x in [5, 6, 7, 8]) + + exp = klass(['a', values[1], 'c', values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize("klass", [pd.Series, pd.Index], + ids=['series', 'index']) + @pytest.mark.parametrize("fill_val,exp_dtype", [ + (1, np.int64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.object)]) + def test_where_int64(self, klass, fill_val, exp_dtype): + if klass is pd.Index and exp_dtype is np.complex128: + pytest.skip("Complex Index not supported") obj = klass([1, 2, 3, 4]) - self.assertEqual(obj.dtype, np.int64) + assert obj.dtype == np.int64 cond = klass([True, False, True, False]) - # int + int -> int - exp = klass([1, 1, 3, 1]) - self._assert_where_conversion(obj, cond, 1, exp, np.int64) - - values = klass([5, 6, 7, 8]) - exp = klass([1, 6, 3, 8]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) - - # int + float -> float - exp = klass([1, 1.1, 3, 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) - - values = klass([5.5, 6.6, 7.7, 8.8]) - exp = klass([1, 6.6, 3, 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) - - # int + complex -> complex - if klass is pd.Series: - exp = klass([1, 1 + 1j, 3, 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, - np.complex128) - - values = klass([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = klass([1, 6 + 6j, 3, 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, - np.complex128) - - # int + bool -> int - exp = klass([1, 1, 3, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.int64) + exp = klass([1, fill_val, 3, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) - values = klass([True, False, True, True]) - exp = klass([1, 0, 3, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) - - def test_where_series_int64(self): - self._where_int64_common(pd.Series) - - def test_where_index_int64(self): - self._where_int64_common(pd.Index) - - def _where_float64_common(self, klass): + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + exp = klass([1, values[1], 3, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize("klass", [pd.Series, pd.Index], + ids=['series', 'index']) + @pytest.mark.parametrize("fill_val, exp_dtype", [ + (1, np.float64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.object)]) + def test_where_float64(self, klass, fill_val, exp_dtype): + if klass is pd.Index and exp_dtype is np.complex128: + pytest.skip("Complex Index not supported") obj = klass([1.1, 2.2, 3.3, 4.4]) - self.assertEqual(obj.dtype, np.float64) + assert obj.dtype == np.float64 cond = klass([True, False, True, False]) - # float + int -> float - exp = klass([1.1, 1.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, 1, exp, np.float64) - - values = klass([5, 6, 7, 8]) - exp = klass([1.1, 6.0, 3.3, 8.0]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) - - # float + float -> float - exp = klass([1.1, 1.1, 3.3, 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) - - values = klass([5.5, 6.6, 7.7, 8.8]) - exp = klass([1.1, 6.6, 3.3, 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) - - # float + complex -> complex - if klass is pd.Series: - exp = klass([1.1, 1 + 1j, 3.3, 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, - np.complex128) + exp = klass([1.1, fill_val, 3.3, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) - values = klass([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = klass([1.1, 6 + 6j, 3.3, 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, - np.complex128) - - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, True, exp, np.float64) - - values = klass([True, False, True, True]) - exp = klass([1.1, 0.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) - - def test_where_series_float64(self): - self._where_float64_common(pd.Series) - - def test_where_index_float64(self): - self._where_float64_common(pd.Index) - - def test_where_series_complex128(self): + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + exp = klass([1.1, values[1], 3.3, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize("fill_val,exp_dtype", [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, np.object)]) + def test_where_series_complex128(self, fill_val, exp_dtype): obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) - self.assertEqual(obj.dtype, np.complex128) + assert obj.dtype == np.complex128 cond = pd.Series([True, False, True, False]) - # complex + int -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, 1, exp, np.complex128) - - values = pd.Series([5, 6, 7, 8]) - exp = pd.Series([1 + 1j, 6.0, 3 + 3j, 8.0]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([1 + 1j, fill_val, 3 + 3j, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) - # complex + float -> complex - exp = pd.Series([1 + 1j, 1.1, 3 + 3j, 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.complex128) - - values = pd.Series([5.5, 6.6, 7.7, 8.8]) - exp = pd.Series([1 + 1j, 6.6, 3 + 3j, 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) - - # complex + complex -> complex - exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.complex128) - - values = pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = pd.Series([1 + 1j, 6 + 6j, 3 + 3j, 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) - - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.complex128) - - values = pd.Series([True, False, True, True]) - exp = pd.Series([1 + 1j, 0, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + if fill_val is True: + values = pd.Series([True, False, True, True]) + else: + values = pd.Series(x * fill_val for x in [5, 6, 7, 8]) + exp = pd.Series([1 + 1j, values[1], 3 + 3j, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) - def test_where_index_complex128(self): - pass + @pytest.mark.parametrize("fill_val,exp_dtype", [ + (1, np.object), + (1.1, np.object), + (1 + 1j, np.object), + (True, np.bool)]) + def test_where_series_bool(self, fill_val, exp_dtype): - def test_where_series_bool(self): obj = pd.Series([True, False, True, False]) - self.assertEqual(obj.dtype, np.bool) + assert obj.dtype == np.bool cond = pd.Series([True, False, True, False]) - # bool + int -> int - exp = pd.Series([1, 1, 1, 1]) - self._assert_where_conversion(obj, cond, 1, exp, np.int64) - - values = pd.Series([5, 6, 7, 8]) - exp = pd.Series([1, 6, 1, 8]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) - - # bool + float -> float - exp = pd.Series([1.0, 1.1, 1.0, 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) - - values = pd.Series([5.5, 6.6, 7.7, 8.8]) - exp = pd.Series([1.0, 6.6, 1.0, 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) - - # bool + complex -> complex - exp = pd.Series([1, 1 + 1j, 1, 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.complex128) - - values = pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = pd.Series([1, 6 + 6j, 1, 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) - - # bool + bool -> bool - exp = pd.Series([True, True, True, True]) - self._assert_where_conversion(obj, cond, True, exp, np.bool) - - values = pd.Series([True, False, True, True]) - exp = pd.Series([True, False, True, True]) - self._assert_where_conversion(obj, cond, values, exp, np.bool) - - def test_where_index_bool(self): - pass + exp = pd.Series([True, fill_val, True, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) - def test_where_series_datetime64(self): + if fill_val is True: + values = pd.Series([True, False, True, True]) + else: + values = pd.Series(x * fill_val for x in [5, 6, 7, 8]) + exp = pd.Series([True, values[1], True, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize("fill_val,exp_dtype", [ + (pd.Timestamp('2012-01-01'), 'datetime64[ns]'), + (pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)], + ids=['datetime64', 'datetime64tz']) + def test_where_series_datetime64(self, fill_val, exp_dtype): obj = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self.assertEqual(obj.dtype, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' cond = pd.Series([True, False, True, False]) - # datetime64 + datetime64 -> datetime64 - exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp('2012-01-01'), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2012-01-01')]) - self._assert_where_conversion(obj, cond, pd.Timestamp('2012-01-01'), - exp, 'datetime64[ns]') - - values = pd.Series([pd.Timestamp('2012-01-01'), - pd.Timestamp('2012-01-02'), - pd.Timestamp('2012-01-03'), - pd.Timestamp('2012-01-04')]) - exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp('2012-01-02'), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2012-01-04')]) - self._assert_where_conversion(obj, cond, values, exp, 'datetime64[ns]') - - # ToDo: coerce to object - msg = "cannot coerce a Timestamp with a tz on a naive Block" - with tm.assertRaisesRegexp(TypeError, msg): - obj.where(cond, pd.Timestamp('2012-01-01', tz='US/Eastern')) - - # ToDo: do not coerce to UTC, must be object - values = pd.Series([pd.Timestamp('2012-01-01', tz='US/Eastern'), - pd.Timestamp('2012-01-02', tz='US/Eastern'), - pd.Timestamp('2012-01-03', tz='US/Eastern'), - pd.Timestamp('2012-01-04', tz='US/Eastern')]) - exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp('2012-01-02 05:00'), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2012-01-04 05:00')]) - self._assert_where_conversion(obj, cond, values, exp, 'datetime64[ns]') + exp = pd.Series([pd.Timestamp('2011-01-01'), fill_val, + pd.Timestamp('2011-01-03'), fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) - def test_where_index_datetime64(self): + values = pd.Series(pd.date_range(fill_val, periods=4)) + if fill_val.tz: + exp = pd.Series([pd.Timestamp('2011-01-01'), + pd.Timestamp('2012-01-02 05:00'), + pd.Timestamp('2011-01-03'), + pd.Timestamp('2012-01-04 05:00')]) + self._assert_where_conversion(obj, cond, values, exp, + 'datetime64[ns]') + pytest.xfail("ToDo: do not coerce to UTC, must be object") + + exp = pd.Series([pd.Timestamp('2011-01-01'), values[1], + pd.Timestamp('2011-01-03'), values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize("fill_val,exp_dtype", [ + (pd.Timestamp('2012-01-01'), 'datetime64[ns]'), + (pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)], + ids=['datetime64', 'datetime64tz']) + def test_where_index_datetime(self, fill_val, exp_dtype): obj = pd.Index([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self.assertEqual(obj.dtype, 'datetime64[ns]') + assert obj.dtype == 'datetime64[ns]' cond = pd.Index([True, False, True, False]) - # datetime64 + datetime64 -> datetime64 - # must support scalar - msg = "cannot coerce a Timestamp with a tz on a naive Block" - with tm.assertRaises(TypeError): - obj.where(cond, pd.Timestamp('2012-01-01')) - - values = pd.Index([pd.Timestamp('2012-01-01'), - pd.Timestamp('2012-01-02'), - pd.Timestamp('2012-01-03'), - pd.Timestamp('2012-01-04')]) - exp = pd.Index([pd.Timestamp('2011-01-01'), - pd.Timestamp('2012-01-02'), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2012-01-04')]) - self._assert_where_conversion(obj, cond, values, exp, 'datetime64[ns]') - - # ToDo: coerce to object msg = ("Index\\(\\.\\.\\.\\) must be called with a collection " "of some kind") - with tm.assertRaisesRegexp(TypeError, msg): - obj.where(cond, pd.Timestamp('2012-01-01', tz='US/Eastern')) - - # ToDo: do not ignore timezone, must be object - values = pd.Index([pd.Timestamp('2012-01-01', tz='US/Eastern'), - pd.Timestamp('2012-01-02', tz='US/Eastern'), - pd.Timestamp('2012-01-03', tz='US/Eastern'), - pd.Timestamp('2012-01-04', tz='US/Eastern')]) + with tm.assert_raises_regex(TypeError, msg): + obj.where(cond, fill_val) + + values = pd.Index(pd.date_range(fill_val, periods=4)) exp = pd.Index([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-02'), pd.Timestamp('2011-01-03'), pd.Timestamp('2012-01-04')]) - self._assert_where_conversion(obj, cond, values, exp, 'datetime64[ns]') + + if fill_val.tz: + self._assert_where_conversion(obj, cond, values, exp, + 'datetime64[ns]') + pytest.xfail("ToDo: do not ignore timezone, must be object") + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + pytest.xfail("datetime64 + datetime64 -> datetime64 must support" + " scalar") + + def test_where_index_complex128(self): + pass + + def test_where_index_bool(self): + pass def test_where_series_datetime64tz(self): pass @@ -852,12 +647,15 @@ def test_where_index_period(self): pass -class TestFillnaSeriesCoercion(CoercionBase, tm.TestCase): +class TestFillnaSeriesCoercion(CoercionBase): # not indexing, but place here for consisntency method = 'fillna' + def test_has_comprehensive_tests(self): + pass + def _assert_fillna_conversion(self, original, value, expected, expected_dtype): """ test coercion triggered by fillna """ @@ -865,273 +663,112 @@ def _assert_fillna_conversion(self, original, value, res = target.fillna(value) self._assert(res, expected, expected_dtype) - def _fillna_object_common(self, klass): + @pytest.mark.parametrize("klass", [pd.Series, pd.Index], + ids=['series', 'index']) + @pytest.mark.parametrize("fill_val, fill_dtype", [ + (1, np.object), + (1.1, np.object), + (1 + 1j, np.object), + (True, np.object)]) + def test_fillna_object(self, klass, fill_val, fill_dtype): obj = klass(['a', np.nan, 'c', 'd']) - self.assertEqual(obj.dtype, np.object) - - # object + int -> object - exp = klass(['a', 1, 'c', 'd']) - self._assert_fillna_conversion(obj, 1, exp, np.object) - - # object + float -> object - exp = klass(['a', 1.1, 'c', 'd']) - self._assert_fillna_conversion(obj, 1.1, exp, np.object) - - # object + complex -> object - exp = klass(['a', 1 + 1j, 'c', 'd']) - self._assert_fillna_conversion(obj, 1 + 1j, exp, np.object) - - # object + bool -> object - exp = klass(['a', True, 'c', 'd']) - self._assert_fillna_conversion(obj, True, exp, np.object) + assert obj.dtype == np.object + + exp = klass(['a', fill_val, 'c', 'd']) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize("klass", [pd.Series, pd.Index], + ids=['series', 'index']) + @pytest.mark.parametrize("fill_val,fill_dtype", [ + (1, np.float64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.object)]) + def test_fillna_float64(self, klass, fill_val, fill_dtype): + obj = klass([1.1, np.nan, 3.3, 4.4]) + assert obj.dtype == np.float64 + + exp = klass([1.1, fill_val, 3.3, 4.4]) + # float + complex -> we don't support a complex Index + # complex for Series, + # object for Index + if fill_dtype == np.complex128 and klass == pd.Index: + fill_dtype = np.object + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize("fill_val,fill_dtype", [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, np.object)]) + def test_fillna_series_complex128(self, fill_val, fill_dtype): + obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) + assert obj.dtype == np.complex128 + + exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize("klass", [pd.Series, pd.Index], + ids=['series', 'index']) + @pytest.mark.parametrize("fill_val,fill_dtype", [ + (pd.Timestamp('2012-01-01'), 'datetime64[ns]'), + (pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object), + (1, np.object), ('x', np.object)], + ids=['datetime64', 'datetime64tz', 'object', 'object']) + def test_fillna_datetime(self, klass, fill_val, fill_dtype): + obj = klass([pd.Timestamp('2011-01-01'), + pd.NaT, + pd.Timestamp('2011-01-03'), + pd.Timestamp('2011-01-04')]) + assert obj.dtype == 'datetime64[ns]' + + exp = klass([pd.Timestamp('2011-01-01'), + fill_val, + pd.Timestamp('2011-01-03'), + pd.Timestamp('2011-01-04')]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize("klass", [pd.Series, pd.Index]) + @pytest.mark.parametrize("fill_val,fill_dtype", [ + (pd.Timestamp('2012-01-01', tz='US/Eastern'), + 'datetime64[ns, US/Eastern]'), + (pd.Timestamp('2012-01-01'), np.object), + (pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), np.object), + (1, np.object), + ('x', np.object)]) + def test_fillna_datetime64tz(self, klass, fill_val, fill_dtype): + tz = 'US/Eastern' - def test_fillna_series_object(self): - self._fillna_object_common(pd.Series) + obj = klass([pd.Timestamp('2011-01-01', tz=tz), + pd.NaT, + pd.Timestamp('2011-01-03', tz=tz), + pd.Timestamp('2011-01-04', tz=tz)]) + assert obj.dtype == 'datetime64[ns, US/Eastern]' - def test_fillna_index_object(self): - self._fillna_object_common(pd.Index) + exp = klass([pd.Timestamp('2011-01-01', tz=tz), + fill_val, + pd.Timestamp('2011-01-03', tz=tz), + pd.Timestamp('2011-01-04', tz=tz)]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) def test_fillna_series_int64(self): - # int can't hold NaN pass def test_fillna_index_int64(self): pass - def _fillna_float64_common(self, klass): - obj = klass([1.1, np.nan, 3.3, 4.4]) - self.assertEqual(obj.dtype, np.float64) - - # float + int -> float - exp = klass([1.1, 1.0, 3.3, 4.4]) - self._assert_fillna_conversion(obj, 1, exp, np.float64) - - # float + float -> float - exp = klass([1.1, 1.1, 3.3, 4.4]) - self._assert_fillna_conversion(obj, 1.1, exp, np.float64) - - if klass is pd.Series: - # float + complex -> complex - exp = klass([1.1, 1 + 1j, 3.3, 4.4]) - self._assert_fillna_conversion(obj, 1 + 1j, exp, np.complex128) - elif klass is pd.Index: - # float + complex -> object - exp = klass([1.1, 1 + 1j, 3.3, 4.4]) - self._assert_fillna_conversion(obj, 1 + 1j, exp, np.object) - else: - NotImplementedError - - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 4.4]) - self._assert_fillna_conversion(obj, True, exp, np.float64) - - def test_fillna_series_float64(self): - self._fillna_float64_common(pd.Series) - - def test_fillna_index_float64(self): - self._fillna_float64_common(pd.Index) - - def test_fillna_series_complex128(self): - obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) - self.assertEqual(obj.dtype, np.complex128) - - # complex + int -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_fillna_conversion(obj, 1, exp, np.complex128) - - # complex + float -> complex - exp = pd.Series([1 + 1j, 1.1, 3 + 3j, 4 + 4j]) - self._assert_fillna_conversion(obj, 1.1, exp, np.complex128) - - # complex + complex -> complex - exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) - self._assert_fillna_conversion(obj, 1 + 1j, exp, np.complex128) - - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_fillna_conversion(obj, True, exp, np.complex128) - - def test_fillna_index_complex128(self): - self._fillna_float64_common(pd.Index) - def test_fillna_series_bool(self): - # bool can't hold NaN pass def test_fillna_index_bool(self): pass - def test_fillna_series_datetime64(self): - obj = pd.Series([pd.Timestamp('2011-01-01'), - pd.NaT, - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - self.assertEqual(obj.dtype, 'datetime64[ns]') - - # datetime64 + datetime64 => datetime64 - exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp('2012-01-01'), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, pd.Timestamp('2012-01-01'), - exp, 'datetime64[ns]') - - # datetime64 + datetime64tz => object - exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp('2012-01-01', tz='US/Eastern'), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - value = pd.Timestamp('2012-01-01', tz='US/Eastern') - self._assert_fillna_conversion(obj, value, exp, np.object) - - # datetime64 + int => object - # ToDo: must be coerced to object - exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, 1, exp, 'datetime64[ns]') - - # datetime64 + object => object - exp = pd.Series([pd.Timestamp('2011-01-01'), - 'x', - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, 'x', exp, np.object) - - def test_fillna_series_datetime64tz(self): - tz = 'US/Eastern' - - obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.NaT, - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') - - # datetime64tz + datetime64tz => datetime64tz - exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp('2012-01-01', tz=tz), - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - value = pd.Timestamp('2012-01-01', tz=tz) - self._assert_fillna_conversion(obj, value, exp, - 'datetime64[ns, US/Eastern]') - - # datetime64tz + datetime64 => object - exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp('2012-01-01'), - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - value = pd.Timestamp('2012-01-01') - self._assert_fillna_conversion(obj, value, exp, np.object) - - # datetime64tz + datetime64tz(different tz) => object - exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - value = pd.Timestamp('2012-01-01', tz='Asia/Tokyo') - self._assert_fillna_conversion(obj, value, exp, np.object) - - # datetime64tz + int => datetime64tz - # ToDo: must be object - exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp(1, tz=tz), - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_fillna_conversion(obj, 1, exp, - 'datetime64[ns, US/Eastern]') - - # datetime64tz + object => object - exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - 'x', - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_fillna_conversion(obj, 'x', exp, np.object) - def test_fillna_series_timedelta64(self): pass def test_fillna_series_period(self): pass - def test_fillna_index_datetime64(self): - obj = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03', - '2011-01-04']) - self.assertEqual(obj.dtype, 'datetime64[ns]') - - # datetime64 + datetime64 => datetime64 - exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', - '2011-01-03', '2011-01-04']) - self._assert_fillna_conversion(obj, pd.Timestamp('2012-01-01'), - exp, 'datetime64[ns]') - - # datetime64 + datetime64tz => object - exp = pd.Index([pd.Timestamp('2011-01-01'), - pd.Timestamp('2012-01-01', tz='US/Eastern'), - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - value = pd.Timestamp('2012-01-01', tz='US/Eastern') - self._assert_fillna_conversion(obj, value, exp, np.object) - - # datetime64 + int => object - exp = pd.Index([pd.Timestamp('2011-01-01'), - 1, - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, 1, exp, np.object) - - # datetime64 + object => object - exp = pd.Index([pd.Timestamp('2011-01-01'), - 'x', - pd.Timestamp('2011-01-03'), - pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, 'x', exp, np.object) - - def test_fillna_index_datetime64tz(self): - tz = 'US/Eastern' - - obj = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03', - '2011-01-04'], tz=tz) - self.assertEqual(obj.dtype, 'datetime64[ns, US/Eastern]') - - # datetime64tz + datetime64tz => datetime64tz - exp = pd.DatetimeIndex(['2011-01-01', '2012-01-01', - '2011-01-03', '2011-01-04'], tz=tz) - value = pd.Timestamp('2012-01-01', tz=tz) - self._assert_fillna_conversion(obj, value, exp, - 'datetime64[ns, US/Eastern]') - - # datetime64tz + datetime64 => object - exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp('2012-01-01'), - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - value = pd.Timestamp('2012-01-01') - self._assert_fillna_conversion(obj, value, exp, np.object) - - # datetime64tz + datetime64tz(different tz) => object - exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - value = pd.Timestamp('2012-01-01', tz='Asia/Tokyo') - self._assert_fillna_conversion(obj, value, exp, np.object) - - # datetime64tz + int => object - exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz), - 1, - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_fillna_conversion(obj, 1, exp, np.object) - - # datetime64tz + object => object - exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz), - 'x', - pd.Timestamp('2011-01-03', tz=tz), - pd.Timestamp('2011-01-04', tz=tz)]) - self._assert_fillna_conversion(obj, 'x', exp, np.object) - def test_fillna_index_timedelta64(self): pass @@ -1139,25 +776,54 @@ def test_fillna_index_period(self): pass -class TestReplaceSeriesCoercion(CoercionBase, tm.TestCase): - - # not indexing, but place here for consisntency +class TestReplaceSeriesCoercion(CoercionBase): klasses = ['series'] method = 'replace' - def setUp(self): - self.rep = {} - self.rep['object'] = ['a', 'b'] - self.rep['int64'] = [4, 5] - self.rep['float64'] = [1.1, 2.2] - self.rep['complex128'] = [1 + 1j, 2 + 2j] - self.rep['bool'] = [True, False] + rep = {} + rep['object'] = ['a', 'b'] + rep['int64'] = [4, 5] + rep['float64'] = [1.1, 2.2] + rep['complex128'] = [1 + 1j, 2 + 2j] + rep['bool'] = [True, False] + rep['datetime64[ns]'] = [pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-01-03')] + + for tz in ['UTC', 'US/Eastern']: + # to test tz => different tz replacement + key = 'datetime64[ns, {0}]'.format(tz) + rep[key] = [pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2011-01-03', tz=tz)] + + rep['timedelta64[ns]'] = [pd.Timedelta('1 day'), + pd.Timedelta('2 day')] + + @pytest.mark.parametrize('how', ['dict', 'series']) + @pytest.mark.parametrize('to_key', [ + 'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]', + 'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]' + ], ids=['object', 'int64', 'float64', 'complex128', 'bool', + 'datetime64', 'datetime64tz', 'datetime64tz', 'timedelta64']) + @pytest.mark.parametrize('from_key', [ + 'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]', + 'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]'] + ) + def test_replace_series(self, how, to_key, from_key): + if from_key == 'bool' and how == 'series' and compat.PY3: + # doesn't work in PY3, though ...dict_from_bool works fine + pytest.skip("doesn't work as in PY3") - def _assert_replace_conversion(self, from_key, to_key, how): index = pd.Index([3, 4], name='xxx') obj = pd.Series(self.rep[from_key], index=index, name='yyy') - self.assertEqual(obj.dtype, from_key) + assert obj.dtype == from_key + + if (from_key.startswith('datetime') and to_key.startswith('datetime')): + # tested below + return + elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']: + # tested below + return if how == 'dict': replacer = dict(zip(self.rep[from_key], self.rep[to_key])) @@ -1168,27 +834,11 @@ def _assert_replace_conversion(self, from_key, to_key, how): result = obj.replace(replacer) - # buggy on windows for bool/int64 - if (from_key == 'bool' and - to_key == 'int64' and - tm.is_platform_windows()): - pytest.skip("windows platform buggy: {0} -> {1}".format - (from_key, to_key)) - - if ((from_key == 'float64' and - to_key in ('bool', 'int64')) or - + if ((from_key == 'float64' and to_key in ('int64')) or (from_key == 'complex128' and - to_key in ('bool', 'int64', 'float64')) or - - (from_key == 'int64' and - to_key in ('bool')) or + to_key in ('int64', 'float64'))): - # TODO_GH12747 The result must be int? - (from_key == 'bool' and to_key == 'int64')): - - # buggy on 32-bit - if tm.is_platform_32bit(): + if compat.is_platform_32bit() or compat.is_platform_windows(): pytest.skip("32-bit platform buggy: {0} -> {1}".format (from_key, to_key)) @@ -1198,63 +848,73 @@ def _assert_replace_conversion(self, from_key, to_key, how): else: exp = pd.Series(self.rep[to_key], index=index, name='yyy') - self.assertEqual(exp.dtype, to_key) + assert exp.dtype == to_key tm.assert_series_equal(result, exp) - def test_replace_series_object(self): - from_key = 'object' - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='dict') - - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='series') + # TODO(jbrockmendel) commented out to only have a single xfail printed + @pytest.mark.xfail(reason='GH #18376, tzawareness-compat bug ' + 'in BlockManager.replace_list') + # @pytest.mark.parametrize('how', ['dict', 'series']) + # @pytest.mark.parametrize('to_key', ['timedelta64[ns]', 'bool', 'object', + # 'complex128', 'float64', 'int64']) + # @pytest.mark.parametrize('from_key', ['datetime64[ns, UTC]', + # 'datetime64[ns, US/Eastern]']) + # def test_replace_series_datetime_tz(self, how, to_key, from_key): + def test_replace_series_datetime_tz(self): + how = 'series' + from_key = 'datetime64[ns, US/Eastern]' + to_key = 'timedelta64[ns]' - def test_replace_series_int64(self): - from_key = 'int64' - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='dict') - - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='series') - - def test_replace_series_float64(self): - from_key = 'float64' - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='dict') - - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='series') - - def test_replace_series_complex128(self): - from_key = 'complex128' - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='dict') + index = pd.Index([3, 4], name='xxx') + obj = pd.Series(self.rep[from_key], index=index, name='yyy') + assert obj.dtype == from_key - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='series') + if how == 'dict': + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == 'series': + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError - def test_replace_series_bool(self): - from_key = 'bool' - for to_key in self.rep: - self._assert_replace_conversion(from_key, to_key, how='dict') + result = obj.replace(replacer) + exp = pd.Series(self.rep[to_key], index=index, name='yyy') + assert exp.dtype == to_key - for to_key in self.rep: + tm.assert_series_equal(result, exp) - if compat.PY3: - # doesn't work in PY3, though ...dict_from_bool works fine - pytest.skip("doesn't work as in PY3") + # TODO(jreback) commented out to only have a single xfail printed + @pytest.mark.xfail(reason="different tz, " + "currently mask_missing raises SystemError") + # @pytest.mark.parametrize('how', ['dict', 'series']) + # @pytest.mark.parametrize('to_key', [ + # 'datetime64[ns]', 'datetime64[ns, UTC]', + # 'datetime64[ns, US/Eastern]']) + # @pytest.mark.parametrize('from_key', [ + # 'datetime64[ns]', 'datetime64[ns, UTC]', + # 'datetime64[ns, US/Eastern]']) + # def test_replace_series_datetime_datetime(self, how, to_key, from_key): + def test_replace_series_datetime_datetime(self): + how = 'dict' + to_key = 'datetime64[ns]' + from_key = 'datetime64[ns]' - self._assert_replace_conversion(from_key, to_key, how='series') + index = pd.Index([3, 4], name='xxx') + obj = pd.Series(self.rep[from_key], index=index, name='yyy') + assert obj.dtype == from_key - def test_replace_series_datetime64(self): - pass + if how == 'dict': + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == 'series': + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError - def test_replace_series_datetime64tz(self): - pass + result = obj.replace(replacer) + exp = pd.Series(self.rep[to_key], index=index, name='yyy') + assert exp.dtype == to_key - def test_replace_series_timedelta64(self): - pass + tm.assert_series_equal(result, exp) def test_replace_series_period(self): pass diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 1c4e5772d316f..a5c12e4152c90 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -4,7 +4,34 @@ from pandas.util import testing as tm -class TestDatetimeIndex(tm.TestCase): +class TestDatetimeIndex(object): + + def test_setitem_with_datetime_tz(self): + # 16889 + # support .loc with alignment and tz-aware DatetimeIndex + mask = np.array([True, False, True, False]) + + idx = date_range('20010101', periods=4, tz='UTC') + df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64') + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + idx = date_range('20010101', periods=4) + df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64') + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) def test_indexing_with_datetime_tz(self): @@ -35,10 +62,10 @@ def test_indexing_with_datetime_tz(self): df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')}) result = df.iloc[5] expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D') - self.assertEqual(result, expected) + assert result == expected result = df.loc[5] - self.assertEqual(result, expected) + assert result == expected # indexing - boolean result = df[df.a > df.a[3]] @@ -54,23 +81,55 @@ def test_indexing_with_datetime_tz(self): 'US/Pacific') # trying to set a single element on a part of a different timezone - def f(): - df.loc[df.new_col == 'new', 'time'] = v + # this converts to object + df2 = df.copy() + df2.loc[df2.new_col == 'new', 'time'] = v - self.assertRaises(ValueError, f) + expected = Series([v[0], df.loc[1, 'time']], name='time') + tm.assert_series_equal(df2.time, expected) v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') df.loc[df.new_col == 'new', 'time'] = v tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v) + def test_consistency_with_tz_aware_scalar(self): + # xef gh-12938 + # various ways of indexing the same tz-aware scalar + df = Series([Timestamp('2016-03-30 14:35:25', + tz='Europe/Brussels')]).to_frame() + + df = pd.concat([df, df]).reset_index(drop=True) + expected = Timestamp('2016-03-30 14:35:25+0200', + tz='Europe/Brussels') + + result = df[0][0] + assert result == expected + + result = df.iloc[0, 0] + assert result == expected + + result = df.loc[0, 0] + assert result == expected + + result = df.iat[0, 0] + assert result == expected + + result = df.at[0, 0] + assert result == expected + + result = df[0].loc[0] + assert result == expected + + result = df[0].at[0] + assert result == expected + def test_indexing_with_datetimeindex_tz(self): # GH 12050 # indexing on a series with a datetimeindex with tz - index = pd.date_range('2015-01-01', periods=2, tz='utc') + index = date_range('2015-01-01', periods=2, tz='utc') - ser = pd.Series(range(2), index=index, - dtype='int64') + ser = Series(range(2), index=index, dtype='int64') # list-like indexing @@ -81,7 +140,7 @@ def test_indexing_with_datetimeindex_tz(self): # setitem result = ser.copy() result[sel] = 1 - expected = pd.Series(1, index=index) + expected = Series(1, index=index) tm.assert_series_equal(result, expected) # .loc getitem @@ -90,36 +149,35 @@ def test_indexing_with_datetimeindex_tz(self): # .loc setitem result = ser.copy() result.loc[sel] = 1 - expected = pd.Series(1, index=index) + expected = Series(1, index=index) tm.assert_series_equal(result, expected) # single element indexing # getitem - self.assertEqual(ser[index[1]], 1) + assert ser[index[1]] == 1 # setitem result = ser.copy() result[index[1]] = 5 - expected = pd.Series([0, 5], index=index) + expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) # .loc getitem - self.assertEqual(ser.loc[index[1]], 1) + assert ser.loc[index[1]] == 1 # .loc setitem result = ser.copy() result.loc[index[1]] = 5 - expected = pd.Series([0, 5], index=index) + expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) def test_partial_setting_with_datetimelike_dtype(self): # GH9478 # a datetimeindex alignment issue with partial setting - df = pd.DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), - index=pd.date_range('1/1/2000', periods=3, - freq='1H')) + df = DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), + index=date_range('1/1/2000', periods=3, freq='1H')) expected = df.copy() expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT] @@ -136,7 +194,7 @@ def test_loc_setitem_datetime(self): for conv in [lambda x: x, lambda x: x.to_datetime64(), lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]: - df = pd.DataFrame() + df = DataFrame() df.loc[conv(dt1), 'one'] = 100 df.loc[conv(dt2), 'one'] = 200 @@ -163,7 +221,9 @@ def test_series_partial_set_datetime(self): Timestamp('2011-01-03')] exp = Series([np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name='idx'), name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) def test_series_partial_set_period(self): # GH 11497 @@ -188,5 +248,7 @@ def test_series_partial_set_period(self): pd.Period('2011-01-03', freq='D')] exp = Series([np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name='idx'), name='s') - result = ser.loc[keys] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = ser.loc[keys] tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 99e7460b2a3de..e3f93924aca0d 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -1,13 +1,16 @@ # -*- coding: utf-8 -*- +import pytest + from warnings import catch_warnings import numpy as np -from pandas import Series, DataFrame, Index, Float64Index +from pandas import (Series, DataFrame, Index, Float64Index, Int64Index, + RangeIndex) from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm -class TestFloatIndexers(tm.TestCase): +class TestFloatIndexers(object): def check(self, result, original, indexer, getitem): """ @@ -46,13 +49,13 @@ def test_scalar_error(self): def f(): s.iloc[3.0] - self.assertRaisesRegexp(TypeError, - 'cannot do positional indexing', - f) + tm.assert_raises_regex(TypeError, + 'cannot do positional indexing', + f) def f(): s.iloc[3.0] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_scalar_non_numeric(self): @@ -87,7 +90,7 @@ def f(): error = KeyError else: error = TypeError - self.assertRaises(error, f) + pytest.raises(error, f) # label based can be a TypeError or KeyError def f(): @@ -97,15 +100,15 @@ def f(): error = KeyError else: error = TypeError - self.assertRaises(error, f) + pytest.raises(error, f) # contains - self.assertFalse(3.0 in s) + assert 3.0 not in s # setting with a float fails with iloc def f(): s.iloc[3.0] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # setting with an indexer if s.index.inferred_type in ['categorical']: @@ -121,26 +124,26 @@ def f(): # s2 = s.copy() # def f(): # idxr(s2)[3.0] = 0 - # self.assertRaises(TypeError, f) + # pytest.raises(TypeError, f) pass else: s2 = s.copy() s2.loc[3.0] = 10 - self.assertTrue(s2.index.is_object()) + assert s2.index.is_object() for idxr in [lambda x: x.ix, lambda x: x]: s2 = s.copy() with catch_warnings(record=True): idxr(s2)[3.0] = 0 - self.assertTrue(s2.index.is_object()) + assert s2.index.is_object() # fallsback to position selection, series only s = Series(np.arange(len(i)), index=i) s[3] - self.assertRaises(TypeError, lambda: s[3.0]) + pytest.raises(TypeError, lambda: s[3.0]) def test_scalar_with_mixed(self): @@ -157,44 +160,55 @@ def f(): with catch_warnings(record=True): idxr(s2)[1.0] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) - self.assertRaises(KeyError, lambda: s2.loc[1.0]) + pytest.raises(KeyError, lambda: s2.loc[1.0]) result = s2.loc['b'] expected = 2 - self.assertEqual(result, expected) + assert result == expected # mixed index so we have label # indexing - for idxr in [lambda x: x.ix, - lambda x: x]: + for idxr in [lambda x: x]: def f(): - with catch_warnings(record=True): - idxr(s3)[1.0] + idxr(s3)[1.0] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) result = idxr(s3)[1] expected = 2 - self.assertEqual(result, expected) + assert result == expected - self.assertRaises(TypeError, lambda: s3.iloc[1.0]) - self.assertRaises(KeyError, lambda: s3.loc[1.0]) + # mixed index so we have label + # indexing + for idxr in [lambda x: x.ix]: + with catch_warnings(record=True): + + def f(): + idxr(s3)[1.0] + + pytest.raises(TypeError, f) + + result = idxr(s3)[1] + expected = 2 + assert result == expected + + pytest.raises(TypeError, lambda: s3.iloc[1.0]) + pytest.raises(KeyError, lambda: s3.loc[1.0]) result = s3.loc[1.5] expected = 3 - self.assertEqual(result, expected) + assert result == expected def test_scalar_integer(self): # test how scalar float indexers work on int indexes # integer index - for index in [tm.makeIntIndex, tm.makeRangeIndex]: + for i in [Int64Index(range(5)), RangeIndex(5)]: - i = index(5) for s in [Series(np.arange(len(i))), DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i)]: @@ -214,7 +228,8 @@ def test_scalar_integer(self): (lambda x: x, True)]: if isinstance(s, Series): - compare = self.assertEqual + def compare(x, y): + assert x == y expected = 100 else: compare = tm.assert_series_equal @@ -237,7 +252,7 @@ def test_scalar_integer(self): # contains # coerce to equal int - self.assertTrue(3.0 in s) + assert 3.0 in s def test_scalar_float(self): @@ -270,10 +285,10 @@ def f(): # random integer is a KeyError with catch_warnings(record=True): - self.assertRaises(KeyError, lambda: idxr(s)[3.5]) + pytest.raises(KeyError, lambda: idxr(s)[3.5]) # contains - self.assertTrue(3.0 in s) + assert 3.0 in s # iloc succeeds with an integer expected = s.iloc[3] @@ -284,11 +299,11 @@ def f(): self.check(result, s, 3, False) # iloc raises with a float - self.assertRaises(TypeError, lambda: s.iloc[3.0]) + pytest.raises(TypeError, lambda: s.iloc[3.0]) def g(): s2.iloc[3.0] = 0 - self.assertRaises(TypeError, g) + pytest.raises(TypeError, g) def test_slice_non_numeric(self): @@ -311,7 +326,7 @@ def test_slice_non_numeric(self): def f(): s.iloc[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) for idxr in [lambda x: x.ix, lambda x: x.loc, @@ -321,7 +336,7 @@ def f(): def f(): with catch_warnings(record=True): idxr(s)[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # setitem for l in [slice(3.0, 4), @@ -330,7 +345,7 @@ def f(): def f(): s.iloc[l] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) for idxr in [lambda x: x.ix, lambda x: x.loc, @@ -339,17 +354,17 @@ def f(): def f(): with catch_warnings(record=True): idxr(s)[l] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_slice_integer(self): # same as above, but for Integer based indexes # these coerce to a like integer - # oob indiciates if we are out of bounds + # oob indicates if we are out of bounds # of positional indexing - for index, oob in [(tm.makeIntIndex(5), False), - (tm.makeRangeIndex(5), False), - (tm.makeIntIndex(5) + 10, True)]: + for index, oob in [(Int64Index(range(5)), False), + (RangeIndex(5), False), + (Int64Index(range(5)) + 10, True)]: # s is an in-range index s = Series(range(5), index=index) @@ -378,7 +393,7 @@ def test_slice_integer(self): def f(): s[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # getitem out-of-bounds for l in [slice(-6, 6), @@ -402,7 +417,7 @@ def f(): def f(): s[slice(-6.0, 6.0)] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # getitem odd floats for l, res1 in [(slice(2.5, 4), slice(3, 5)), @@ -425,7 +440,7 @@ def f(): def f(): s[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # setitem for l in [slice(3.0, 4), @@ -438,13 +453,13 @@ def f(): with catch_warnings(record=True): idxr(sc)[l] = 0 result = idxr(sc)[l].values.ravel() - self.assertTrue((result == 0).all()) + assert (result == 0).all() # positional indexing def f(): s[l] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_integer_positional_indexing(self): """ make sure that we are raising on positional indexing @@ -466,26 +481,23 @@ def test_integer_positional_indexing(self): def f(): idxr(s)[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_slice_integer_frame_getitem(self): # similar to above, but on the getitem dim (of a DataFrame) - for index in [tm.makeIntIndex, tm.makeRangeIndex]: + for index in [Int64Index(range(5)), RangeIndex(5)]: - index = index(5) s = DataFrame(np.random.randn(5, 2), index=index) - for idxr in [lambda x: x.loc, - lambda x: x.ix]: + def f(idxr): # getitem for l in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: - with catch_warnings(record=True): - result = idxr(s)[l] + result = idxr(s)[l] indexer = slice(0, 2) self.check(result, s, indexer, False) @@ -493,7 +505,7 @@ def test_slice_integer_frame_getitem(self): def f(): s[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # getitem out-of-bounds for l in [slice(-10, 10), @@ -506,22 +518,21 @@ def f(): def f(): s[slice(-10.0, 10.0)] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # getitem odd floats for l, res in [(slice(0.5, 1), slice(1, 2)), (slice(0, 0.5), slice(0, 1)), (slice(0.5, 1.5), slice(1, 2))]: - with catch_warnings(record=True): - result = idxr(s)[l] + result = idxr(s)[l] self.check(result, s, res, False) # positional indexing def f(): s[l] - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # setitem for l in [slice(3.0, 4), @@ -529,16 +540,19 @@ def f(): slice(3.0, 4.0)]: sc = s.copy() - with catch_warnings(record=True): - idxr(sc)[l] = 0 - result = idxr(sc)[l].values.ravel() - self.assertTrue((result == 0).all()) + idxr(sc)[l] = 0 + result = idxr(sc)[l].values.ravel() + assert (result == 0).all() # positional indexing def f(): s[l] = 0 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) + + f(lambda x: x.loc) + with catch_warnings(record=True): + f(lambda x: x.ix) def test_slice_float(self): @@ -560,24 +574,24 @@ def test_slice_float(self): with catch_warnings(record=True): result = idxr(s)[l] if isinstance(s, Series): - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) else: - self.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # setitem s2 = s.copy() with catch_warnings(record=True): idxr(s2)[l] = 0 result = idxr(s2)[l].values.ravel() - self.assertTrue((result == 0).all()) + assert (result == 0).all() def test_floating_index_doc_example(self): index = Index([1.5, 2, 3, 4.5, 5]) s = Series(range(5), index=index) - self.assertEqual(s[3], 2) - self.assertEqual(s.loc[3], 2) - self.assertEqual(s.loc[3], 2) - self.assertEqual(s.iloc[3], 3) + assert s[3] == 2 + assert s.loc[3] == 2 + assert s.loc[3] == 2 + assert s.iloc[3] == 3 def test_floating_misc(self): @@ -596,23 +610,23 @@ def test_floating_misc(self): result1 = s[5.0] result2 = s.loc[5.0] result3 = s.loc[5.0] - self.assertEqual(result1, result2) - self.assertEqual(result1, result3) + assert result1 == result2 + assert result1 == result3 result1 = s[5] result2 = s.loc[5] result3 = s.loc[5] - self.assertEqual(result1, result2) - self.assertEqual(result1, result3) + assert result1 == result2 + assert result1 == result3 - self.assertEqual(s[5.0], s[5]) + assert s[5.0] == s[5] # value not found (and no fallbacking at all) # scalar integers - self.assertRaises(KeyError, lambda: s.loc[4]) - self.assertRaises(KeyError, lambda: s.loc[4]) - self.assertRaises(KeyError, lambda: s[4]) + pytest.raises(KeyError, lambda: s.loc[4]) + pytest.raises(KeyError, lambda: s.loc[4]) + pytest.raises(KeyError, lambda: s[4]) # fancy floats/integers create the correct entry (as nan) # fancy tests @@ -700,15 +714,17 @@ def test_floating_misc(self): assert_series_equal(result1, Series([1], index=[2.5])) def test_floating_tuples(self): - # GH13509 + # see gh-13509 s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name='foo') + result = s[0.0] - self.assertEqual(result, (1, 1)) + assert result == (1, 1) + expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo') s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name='foo') + result = s[0.0] - expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py new file mode 100644 index 0000000000000..a5506abe8f355 --- /dev/null +++ b/pandas/tests/indexing/test_iloc.py @@ -0,0 +1,651 @@ +""" test positional based indexing with iloc """ + +import pytest + +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas.compat import lrange, lmap +from pandas import Series, DataFrame, date_range, concat, isna +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestiLoc(Base): + + def test_iloc_exceeds_bounds(self): + + # GH6296 + # iloc should allow indexers that exceed the bounds + df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) + expected = df + + # lists of positions should raise IndexErrror! + with tm.assert_raises_regex(IndexError, + 'positional indexers ' + 'are out-of-bounds'): + df.iloc[:, [0, 1, 2, 3, 4, 5]] + pytest.raises(IndexError, lambda: df.iloc[[1, 30]]) + pytest.raises(IndexError, lambda: df.iloc[[1, -30]]) + pytest.raises(IndexError, lambda: df.iloc[[100]]) + + s = df['A'] + pytest.raises(IndexError, lambda: s.iloc[[100]]) + pytest.raises(IndexError, lambda: s.iloc[[-100]]) + + # still raise on a single indexer + msg = 'single positional indexer is out-of-bounds' + with tm.assert_raises_regex(IndexError, msg): + df.iloc[30] + pytest.raises(IndexError, lambda: df.iloc[-30]) + + # GH10779 + # single positive/negative indexer exceeding Series bounds should raise + # an IndexError + with tm.assert_raises_regex(IndexError, msg): + s.iloc[30] + pytest.raises(IndexError, lambda: s.iloc[-30]) + + # slices are ok + result = df.iloc[:, 4:10] # 0 < start < len < stop + expected = df.iloc[:, 4:] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -4:-10] # stop < 0 < start < len + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:, :4:-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:, 4::-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:4] # start < 0 < stop < len + expected = df.iloc[:, :4] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4] # 0 < stop < len < start + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:11] # 0 < len < start < stop + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + # slice bounds exceeding is ok + result = s.iloc[18:30] + expected = s.iloc[18:] + tm.assert_series_equal(result, expected) + + result = s.iloc[30:] + expected = s.iloc[:0] + tm.assert_series_equal(result, expected) + + result = s.iloc[30::-1] + expected = s.iloc[::-1] + tm.assert_series_equal(result, expected) + + # doc example + def check(result, expected): + str(result) + result.dtypes + tm.assert_frame_equal(result, expected) + + dfl = DataFrame(np.random.randn(5, 2), columns=list('AB')) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) + check(dfl.iloc[4:6], dfl.iloc[[4]]) + + pytest.raises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) + pytest.raises(IndexError, lambda: dfl.iloc[:, 4]) + + def test_iloc_getitem_int(self): + + # integer + self.check_result('integer', 'iloc', 2, 'ix', + {0: 4, 1: 6, 2: 8}, typs=['ints', 'uints']) + self.check_result('integer', 'iloc', 2, 'indexer', 2, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_neg_int(self): + + # neg integer + self.check_result('neg int', 'iloc', -1, 'ix', + {0: 6, 1: 9, 2: 12}, typs=['ints', 'uints']) + self.check_result('neg int', 'iloc', -1, 'indexer', -1, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_list_int(self): + + # list of ints + self.check_result('list int', 'iloc', [0, 1, 2], 'ix', + {0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]}, + typs=['ints', 'uints']) + self.check_result('list int', 'iloc', [2], 'ix', + {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) + self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2], + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + # array of ints (GH5006), make sure that a single indexer is returning + # the correct type + self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix', + {0: [0, 2, 4], + 1: [0, 3, 6], + 2: [0, 4, 8]}, typs=['ints', 'uints']) + self.check_result('array int', 'iloc', np.array([2]), 'ix', + {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) + self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer', + [0, 1, 2], + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_neg_int_can_reach_first_index(self): + # GH10547 and GH10779 + # negative integers should be able to reach index 0 + df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]}) + s = df['A'] + + expected = df.iloc[0] + result = df.iloc[-3] + tm.assert_series_equal(result, expected) + + expected = df.iloc[[0]] + result = df.iloc[[-3]] + tm.assert_frame_equal(result, expected) + + expected = s.iloc[0] + result = s.iloc[-3] + assert result == expected + + expected = s.iloc[[0]] + result = s.iloc[[-3]] + tm.assert_series_equal(result, expected) + + # check the length 1 Series case highlighted in GH10547 + expected = Series(['a'], index=['A']) + result = expected.iloc[[-1]] + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_dups(self): + + # no dups in panel (bug?) + self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', + {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]}, + objs=['series', 'frame'], typs=['ints', 'uints']) + + # GH 6766 + df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) + df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) + df = concat([df1, df2], axis=1) + + # cross-sectional indexing + result = df.iloc[0, 0] + assert isna(result) + + result = df.iloc[0, :] + expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], + name=0) + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_array(self): + + # array like + s = Series(index=lrange(1, 4)) + self.check_result('array like', 'iloc', s.index, 'ix', + {0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]}, + typs=['ints', 'uints']) + + def test_iloc_getitem_bool(self): + + # boolean indexers + b = [True, False, True, False, ] + self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints']) + self.check_result('bool', 'iloc', b, 'ix', b, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_slice(self): + + # slices + self.check_result('slice', 'iloc', slice(1, 3), 'ix', + {0: [2, 4], 1: [3, 6], 2: [4, 8]}, + typs=['ints', 'uints']) + self.check_result('slice', 'iloc', slice(1, 3), 'indexer', + slice(1, 3), + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_slice_dups(self): + + df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']) + df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2), + columns=['A', 'C']) + + # axis=1 + df = concat([df1, df2], axis=1) + tm.assert_frame_equal(df.iloc[:, :4], df1) + tm.assert_frame_equal(df.iloc[:, 4:], df2) + + df = concat([df2, df1], axis=1) + tm.assert_frame_equal(df.iloc[:, :2], df2) + tm.assert_frame_equal(df.iloc[:, 2:], df1) + + exp = concat([df2, df1.iloc[:, [0]]], axis=1) + tm.assert_frame_equal(df.iloc[:, 0:3], exp) + + # axis=0 + df = concat([df, df], axis=0) + tm.assert_frame_equal(df.iloc[0:10, :2], df2) + tm.assert_frame_equal(df.iloc[0:10, 2:], df1) + tm.assert_frame_equal(df.iloc[10:, :2], df2) + tm.assert_frame_equal(df.iloc[10:, 2:], df1) + + def test_iloc_setitem(self): + df = self.frame_ints + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + assert result == 1 + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + # GH5771 + s = Series(0, index=[4, 5, 6]) + s.iloc[1:2] += 1 + expected = Series([0, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + @pytest.mark.parametrize( + 'data, indexes, values, expected_k', [ + # test without indexer value in first level of MultiIndex + ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), + # test like code sample 1 in the issue + ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], + [755, 1066]), + # test like code sample 2 in the issue + ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), + # test like code sample 3 in the issue + ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], + [8, 15, 13]) + ]) + def test_iloc_setitem_int_multiindex_series( + self, data, indexes, values, expected_k): + # GH17148 + df = DataFrame(data=data, columns=['i', 'j', 'k']) + df = df.set_index(['i', 'j']) + + series = df.k.copy() + for i, v in zip(indexes, values): + series.iloc[i] += v + + df['k'] = expected_k + expected = df.k + tm.assert_series_equal(series, expected) + + def test_iloc_setitem_list(self): + + # setitem with an iloc list + df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"], + columns=["A", "B", "C"]) + df.iloc[[0, 1], [1, 2]] + df.iloc[[0, 1], [1, 2]] += 100 + + expected = DataFrame( + np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), + index=["A", "B", "C"], columns=["A", "B", "C"]) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_pandas_object(self): + # GH 17193, affecting old numpy (1.7 and 1.8) + s_orig = Series([0, 1, 2, 3]) + expected = Series([0, -1, -2, 3]) + + s = s_orig.copy() + s.iloc[Series([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.iloc[pd.Index([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_dups(self): + + # GH 6766 + # iloc with a mask aligning from another iloc + df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) + df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) + df = concat([df1, df2], axis=1) + + expected = df.fillna(3) + expected['A'] = expected['A'].astype('float64') + inds = np.isnan(df.iloc[:, 0]) + mask = inds[inds].index + df.iloc[mask, 0] = df.iloc[mask, 2] + tm.assert_frame_equal(df, expected) + + # del a dup column across blocks + expected = DataFrame({0: [1, 2], 1: [3, 4]}) + expected.columns = ['B', 'B'] + del df['A'] + tm.assert_frame_equal(df, expected) + + # assign back to self + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] + tm.assert_frame_equal(df, expected) + + # reversed x 2 + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( + drop=True) + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( + drop=True) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_frame(self): + df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), + columns=lrange(0, 8, 2)) + + result = df.iloc[2] + with catch_warnings(record=True): + exp = df.ix[4] + tm.assert_series_equal(result, exp) + + result = df.iloc[2, 2] + with catch_warnings(record=True): + exp = df.ix[4, 4] + assert result == exp + + # slice + result = df.iloc[4:8] + with catch_warnings(record=True): + expected = df.ix[8:14] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 2:3] + with catch_warnings(record=True): + expected = df.ix[:, 4:5] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[0, 1, 3]] + with catch_warnings(record=True): + expected = df.ix[[0, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.iloc[[0, 1, 3], [0, 1]] + with catch_warnings(record=True): + expected = df.ix[[0, 2, 6], [0, 2]] + tm.assert_frame_equal(result, expected) + + # neg indicies + result = df.iloc[[-1, 1, 3], [-1, 1]] + with catch_warnings(record=True): + expected = df.ix[[18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # dups indicies + result = df.iloc[[-1, -1, 1, 3], [-1, 1]] + with catch_warnings(record=True): + expected = df.ix[[18, 18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # with index-like + s = Series(index=lrange(1, 5)) + result = df.iloc[s.index] + with catch_warnings(record=True): + expected = df.ix[[2, 4, 6, 8]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_labelled_frame(self): + # try with labelled frame + df = DataFrame(np.random.randn(10, 4), + index=list('abcdefghij'), columns=list('ABCD')) + + result = df.iloc[1, 1] + exp = df.loc['b', 'B'] + assert result == exp + + result = df.iloc[:, 2:3] + expected = df.loc[:, ['C']] + tm.assert_frame_equal(result, expected) + + # negative indexing + result = df.iloc[-1, -1] + exp = df.loc['j', 'D'] + assert result == exp + + # out-of-bounds exception + pytest.raises(IndexError, df.iloc.__getitem__, tuple([10, 5])) + + # trying to use a label + pytest.raises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) + + def test_iloc_getitem_doc_issue(self): + + # multi axis slicing issue with single block + # surfaced in GH 6059 + + arr = np.random.randn(6, 4) + index = date_range('20130101', periods=6) + columns = list('ABCD') + df = DataFrame(arr, index=index, columns=columns) + + # defines ref_locs + df.describe() + + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], + columns=columns[0:2]) + tm.assert_frame_equal(result, expected) + + # for dups + df.columns = list('aaaa') + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], + columns=list('aa')) + tm.assert_frame_equal(result, expected) + + # related + arr = np.random.randn(6, 4) + index = list(range(0, 12, 2)) + columns = list(range(0, 8, 2)) + df = DataFrame(arr, index=index, columns=columns) + + df._data.blocks[0].mgr_locs + result = df.iloc[1:5, 2:4] + str(result) + result.dtypes + expected = DataFrame(arr[1:5, 2:4], index=index[1:5], + columns=columns[2:4]) + tm.assert_frame_equal(result, expected) + + def test_iloc_setitem_series(self): + df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), + columns=list('ABCD')) + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + assert result == 1 + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) + + s.iloc[1] = 1 + result = s.iloc[1] + assert result == 1 + + s.iloc[:4] = 0 + expected = s.iloc[:4] + result = s.iloc[:4] + tm.assert_series_equal(result, expected) + + s = Series([-1] * 6) + s.iloc[0::2] = [0, 2, 4] + s.iloc[1::2] = [1, 3, 5] + result = s + expected = Series([0, 1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + def test_iloc_setitem_list_of_lists(self): + + # GH 7551 + # list-of-list is set incorrectly in mixed vs. single dtyped frames + df = DataFrame(dict(A=np.arange(5, dtype='int64'), + B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [[10, 11], [12, 13]] + expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) + tm.assert_frame_equal(df, expected) + + df = DataFrame( + dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [['x', 11], ['y', 13]] + expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], + B=[5, 6, 11, 13, 9])) + tm.assert_frame_equal(df, expected) + + def test_iloc_mask(self): + + # GH 3631, iloc with a mask (of a series) should raise + df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) + mask = (df.a % 2 == 0) + pytest.raises(ValueError, df.iloc.__getitem__, tuple([mask])) + mask.index = lrange(len(mask)) + pytest.raises(NotImplementedError, df.iloc.__getitem__, + tuple([mask])) + + # ndarray ok + result = df.iloc[np.array([True] * len(mask), dtype=bool)] + tm.assert_frame_equal(result, df) + + # the possibilities + locs = np.arange(4) + nums = 2 ** locs + reps = lmap(bin, nums) + df = DataFrame({'locs': locs, 'nums': nums}, reps) + + expected = { + (None, ''): '0b1100', + (None, '.loc'): '0b1100', + (None, '.iloc'): '0b1100', + ('index', ''): '0b11', + ('index', '.loc'): '0b11', + ('index', '.iloc'): ('iLocation based boolean indexing ' + 'cannot use an indexable as a mask'), + ('locs', ''): 'Unalignable boolean Series provided as indexer ' + '(index of the boolean Series and of the indexed ' + 'object do not match', + ('locs', '.loc'): 'Unalignable boolean Series provided as indexer ' + '(index of the boolean Series and of the ' + 'indexed object do not match', + ('locs', '.iloc'): ('iLocation based boolean indexing on an ' + 'integer type is not available'), + } + + # UserWarnings from reindex of a boolean mask + with catch_warnings(record=True): + result = dict() + for idx in [None, 'index', 'locs']: + mask = (df.nums > 2).values + if idx: + mask = Series(mask, list(reversed(getattr(df, idx)))) + for method in ['', '.loc', '.iloc']: + try: + if method: + accessor = getattr(df, method[1:]) + else: + accessor = df + ans = str(bin(accessor[mask]['nums'].sum())) + except Exception as e: + ans = str(e) + + key = tuple([idx, method]) + r = expected.get(key) + if r != ans: + raise AssertionError( + "[%s] does not match [%s], received [%s]" + % (key, ans, r)) + + def test_iloc_non_unique_indexing(self): + + # GH 4017, non-unique indexing (on the axis) + df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000}) + idx = np.array(lrange(30)) * 99 + expected = df.iloc[idx] + + df3 = concat([df, 2 * df, 3 * df]) + result = df3.iloc[idx] + + tm.assert_frame_equal(result, expected) + + df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000}) + df2 = concat([df2, 2 * df2, 3 * df2]) + + sidx = df2.index.to_series() + expected = df2.iloc[idx[idx <= sidx.max()]] + + new_list = [] + for r, s in expected.iterrows(): + new_list.append(s) + new_list.append(s * 2) + new_list.append(s * 3) + + expected = DataFrame(new_list) + expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df2.loc[idx] + tm.assert_frame_equal(result, expected, check_index_type=False) + + def test_iloc_empty_list_indexer_is_ok(self): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) + + def test_identity_slice_returns_new_object(self): + # GH13873 + original_df = DataFrame({'a': [1, 2, 3]}) + sliced_df = original_df.iloc[:] + assert sliced_df is not original_df + + # should be a shallow copy + original_df['a'] = [4, 4, 4] + assert (sliced_df['a'] == 4).all() + + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.iloc[:] + assert sliced_series is not original_series + + # should also be a shallow copy + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f7a4af711bbb8..c66310d10ebdc 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,1482 +1,34 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import itertools -import warnings + +""" test fancy indexing & misc """ + +import pytest + +import weakref from warnings import catch_warnings from datetime import datetime -from pandas.types.common import (is_integer_dtype, - is_float_dtype, - is_scalar) -from pandas.compat import range, lrange, lzip, StringIO, lmap -from pandas.tslib import NaT -from numpy import nan -from numpy.random import randn +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_float_dtype) +from pandas.compat import range, lrange, lzip, StringIO import numpy as np import pandas as pd -from pandas import option_context from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice -from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, - MultiIndex, Timestamp, Timedelta, UInt64Index) -from pandas.formats.printing import pprint_thing -from pandas import concat -from pandas.core.common import PerformanceWarning -from pandas.tests.indexing.common import _mklbl +from pandas import NaT, DataFrame, Index, Series, MultiIndex import pandas.util.testing as tm -from pandas import date_range +from pandas.tests.indexing.common import Base, _mklbl -_verbose = False # ------------------------------------------------------------------------ # Indexing test cases -def _generate_indices(f, values=False): - """ generate the indicies - if values is True , use the axis values - is False, use the range - """ - - axes = f.axes - if values: - axes = [lrange(len(a)) for a in axes] - - return itertools.product(*axes) - - -def _get_value(f, i, values=False): - """ return the value for the location i """ - - # check agains values - if values: - return f.values[i] - - # this is equiv of f[col][row]..... - # v = f - # for a in reversed(i): - # v = v.__getitem__(a) - # return v - with catch_warnings(record=True): - return f.ix[i] - - -def _get_result(obj, method, key, axis): - """ return the result for this obj with this key and this axis """ - - if isinstance(key, dict): - key = key[axis] - - # use an artifical conversion to map the key as integers to the labels - # so ix can work for comparisions - if method == 'indexer': - method = 'ix' - key = obj._get_axis(axis)[key] - - # in case we actually want 0 index slicing - try: - xp = getattr(obj, method).__getitem__(_axify(obj, key, axis)) - except: - xp = getattr(obj, method).__getitem__(key) - - return xp - - -def _axify(obj, key, axis): - # create a tuple accessor - axes = [slice(None)] * obj.ndim - axes[axis] = key - return tuple(axes) - - -class TestIndexing(tm.TestCase): - - _objs = set(['series', 'frame', 'panel']) - _typs = set(['ints', 'uints', 'labels', 'mixed', - 'ts', 'floats', 'empty', 'ts_rev']) - - def setUp(self): - - self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2)) - self.frame_ints = DataFrame(np.random.randn(4, 4), - index=lrange(0, 8, 2), - columns=lrange(0, 12, 3)) - self.panel_ints = Panel(np.random.rand(4, 4, 4), - items=lrange(0, 8, 2), - major_axis=lrange(0, 12, 3), - minor_axis=lrange(0, 16, 4)) - - self.series_uints = Series(np.random.rand(4), - index=UInt64Index(lrange(0, 8, 2))) - self.frame_uints = DataFrame(np.random.randn(4, 4), - index=UInt64Index(lrange(0, 8, 2)), - columns=UInt64Index(lrange(0, 12, 3))) - self.panel_uints = Panel(np.random.rand(4, 4, 4), - items=UInt64Index(lrange(0, 8, 2)), - major_axis=UInt64Index(lrange(0, 12, 3)), - minor_axis=UInt64Index(lrange(0, 16, 4))) - - self.series_labels = Series(np.random.randn(4), index=list('abcd')) - self.frame_labels = DataFrame(np.random.randn(4, 4), - index=list('abcd'), columns=list('ABCD')) - self.panel_labels = Panel(np.random.randn(4, 4, 4), - items=list('abcd'), - major_axis=list('ABCD'), - minor_axis=list('ZYXW')) - - self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) - self.frame_mixed = DataFrame(np.random.randn(4, 4), - index=[2, 4, 'null', 8]) - self.panel_mixed = Panel(np.random.randn(4, 4, 4), - items=[2, 4, 'null', 8]) - - self.series_ts = Series(np.random.randn(4), - index=date_range('20130101', periods=4)) - self.frame_ts = DataFrame(np.random.randn(4, 4), - index=date_range('20130101', periods=4)) - self.panel_ts = Panel(np.random.randn(4, 4, 4), - items=date_range('20130101', periods=4)) - - dates_rev = (date_range('20130101', periods=4) - .sort_values(ascending=False)) - self.series_ts_rev = Series(np.random.randn(4), - index=dates_rev) - self.frame_ts_rev = DataFrame(np.random.randn(4, 4), - index=dates_rev) - self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), - items=dates_rev) - - self.frame_empty = DataFrame({}) - self.series_empty = Series({}) - self.panel_empty = Panel({}) - - # form agglomerates - for o in self._objs: - - d = dict() - for t in self._typs: - d[t] = getattr(self, '%s_%s' % (o, t), None) - - setattr(self, o, d) - - def check_values(self, f, func, values=False): - - if f is None: - return - axes = f.axes - indicies = itertools.product(*axes) - - for i in indicies: - result = getattr(f, func)[i] - - # check agains values - if values: - expected = f.values[i] - else: - expected = f - for a in reversed(i): - expected = expected.__getitem__(a) - - tm.assert_almost_equal(result, expected) - - def check_result(self, name, method1, key1, method2, key2, typs=None, - objs=None, axes=None, fails=None): - def _eq(t, o, a, obj, k1, k2): - """ compare equal for these 2 keys """ - - if a is not None and a > obj.ndim - 1: - return - - def _print(result, error=None): - if error is not None: - error = str(error) - v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s," - "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % - (name, result, t, o, method1, method2, a, error or '')) - if _verbose: - pprint_thing(v) - - try: - rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a)) - - try: - xp = _get_result(obj, method2, k2, a) - except: - result = 'no comp' - _print(result) - return - - detail = None - - try: - if is_scalar(rs) and is_scalar(xp): - self.assertEqual(rs, xp) - elif xp.ndim == 1: - tm.assert_series_equal(rs, xp) - elif xp.ndim == 2: - tm.assert_frame_equal(rs, xp) - elif xp.ndim == 3: - tm.assert_panel_equal(rs, xp) - result = 'ok' - except AssertionError as e: - detail = str(e) - result = 'fail' - - # reverse the checks - if fails is True: - if result == 'fail': - result = 'ok (fail)' - - _print(result) - if not result.startswith('ok'): - raise AssertionError(detail) - - except AssertionError: - raise - except Exception as detail: - - # if we are in fails, the ok, otherwise raise it - if fails is not None: - if isinstance(detail, fails): - result = 'ok (%s)' % type(detail).__name__ - _print(result) - return - - result = type(detail).__name__ - raise AssertionError(_print(result, error=detail)) - - if typs is None: - typs = self._typs - - if objs is None: - objs = self._objs - - if axes is not None: - if not isinstance(axes, (tuple, list)): - axes = [axes] - else: - axes = list(axes) - else: - axes = [0, 1, 2] - - # check - for o in objs: - if o not in self._objs: - continue - - d = getattr(self, o) - for a in axes: - for t in typs: - if t not in self._typs: - continue - - obj = d[t] - if obj is not None: - obj = obj.copy() - - k2 = key2 - _eq(t, o, a, obj, key1, k2) - - def test_ix_deprecation(self): - # GH 15114 - - df = DataFrame({'A': [1, 2, 3]}) - with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): - df.ix[1, 'A'] - - def test_indexer_caching(self): - # GH5727 - # make sure that indexers are in the _internal_names_set - n = 1000001 - arrays = [lrange(n), lrange(n)] - index = MultiIndex.from_tuples(lzip(*arrays)) - s = Series(np.zeros(n), index=index) - str(s) - - # setitem - expected = Series(np.ones(n), index=index) - s = Series(np.zeros(n), index=index) - s[s == 0] = 1 - tm.assert_series_equal(s, expected) - - def test_at_and_iat_get(self): - def _check(f, func, values=False): - - if f is not None: - indicies = _generate_indices(f, values) - for i in indicies: - result = getattr(f, func)[i] - expected = _get_value(f, i, values) - tm.assert_almost_equal(result, expected) - - for o in self._objs: - - d = getattr(self, o) - - # iat - for f in [d['ints'], d['uints']]: - _check(f, 'iat', values=True) - - for f in [d['labels'], d['ts'], d['floats']]: - if f is not None: - self.assertRaises(ValueError, self.check_values, f, 'iat') - - # at - for f in [d['ints'], d['uints'], d['labels'], - d['ts'], d['floats']]: - _check(f, 'at') - - def test_at_and_iat_set(self): - def _check(f, func, values=False): - - if f is not None: - indicies = _generate_indices(f, values) - for i in indicies: - getattr(f, func)[i] = 1 - expected = _get_value(f, i, values) - tm.assert_almost_equal(expected, 1) - - for t in self._objs: - - d = getattr(self, t) - - # iat - for f in [d['ints'], d['uints']]: - _check(f, 'iat', values=True) - - for f in [d['labels'], d['ts'], d['floats']]: - if f is not None: - self.assertRaises(ValueError, _check, f, 'iat') - - # at - for f in [d['ints'], d['uints'], d['labels'], - d['ts'], d['floats']]: - _check(f, 'at') - - def test_at_iat_coercion(self): - - # as timestamp is not a tuple! - dates = date_range('1/1/2000', periods=8) - df = DataFrame(randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) - s = df['A'] - - result = s.at[dates[5]] - xp = s.values[5] - self.assertEqual(result, xp) - - # GH 7729 - # make sure we are boxing the returns - s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]') - expected = Timestamp('2014-02-02') - - for r in [lambda: s.iat[1], lambda: s.iloc[1]]: - result = r() - self.assertEqual(result, expected) - - s = Series(['1 days', '2 days'], dtype='timedelta64[ns]') - expected = Timedelta('2 days') - - for r in [lambda: s.iat[1], lambda: s.iloc[1]]: - result = r() - self.assertEqual(result, expected) - - def test_iat_invalid_args(self): - pass - - def test_imethods_with_dups(self): - - # GH6493 - # iat/iloc with dups - - s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64') - result = s.iloc[2] - self.assertEqual(result, 2) - result = s.iat[2] - self.assertEqual(result, 2) - - self.assertRaises(IndexError, lambda: s.iat[10]) - self.assertRaises(IndexError, lambda: s.iat[-10]) - - result = s.iloc[[2, 3]] - expected = Series([2, 3], [2, 2], dtype='int64') - tm.assert_series_equal(result, expected) - - df = s.to_frame() - result = df.iloc[2] - expected = Series(2, index=[0], name=2) - tm.assert_series_equal(result, expected) - - result = df.iat[2, 0] - expected = 2 - self.assertEqual(result, 2) - - def test_repeated_getitem_dups(self): - # GH 5678 - # repeated gettitems on a dup index returing a ndarray - df = DataFrame( - np.random.random_sample((20, 5)), - index=['ABCDE' [x % 5] for x in range(20)]) - expected = df.loc['A', 0] - result = df.loc[:, 0].loc['A'] - tm.assert_series_equal(result, expected) - - def test_iloc_exceeds_bounds(self): - - # GH6296 - # iloc should allow indexers that exceed the bounds - df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) - expected = df - - # lists of positions should raise IndexErrror! - with tm.assertRaisesRegexp(IndexError, - 'positional indexers are out-of-bounds'): - df.iloc[:, [0, 1, 2, 3, 4, 5]] - self.assertRaises(IndexError, lambda: df.iloc[[1, 30]]) - self.assertRaises(IndexError, lambda: df.iloc[[1, -30]]) - self.assertRaises(IndexError, lambda: df.iloc[[100]]) - - s = df['A'] - self.assertRaises(IndexError, lambda: s.iloc[[100]]) - self.assertRaises(IndexError, lambda: s.iloc[[-100]]) - - # still raise on a single indexer - msg = 'single positional indexer is out-of-bounds' - with tm.assertRaisesRegexp(IndexError, msg): - df.iloc[30] - self.assertRaises(IndexError, lambda: df.iloc[-30]) - - # GH10779 - # single positive/negative indexer exceeding Series bounds should raise - # an IndexError - with tm.assertRaisesRegexp(IndexError, msg): - s.iloc[30] - self.assertRaises(IndexError, lambda: s.iloc[-30]) - - # slices are ok - result = df.iloc[:, 4:10] # 0 < start < len < stop - expected = df.iloc[:, 4:] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -4:-10] # stop < 0 < start < len - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) - expected = df.iloc[:, :4:-1] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) - expected = df.iloc[:, 4::-1] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -10:4] # start < 0 < stop < len - expected = df.iloc[:, :4] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:4] # 0 < stop < len < start - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:11] # 0 < len < start < stop - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - # slice bounds exceeding is ok - result = s.iloc[18:30] - expected = s.iloc[18:] - tm.assert_series_equal(result, expected) - - result = s.iloc[30:] - expected = s.iloc[:0] - tm.assert_series_equal(result, expected) - - result = s.iloc[30::-1] - expected = s.iloc[::-1] - tm.assert_series_equal(result, expected) - - # doc example - def check(result, expected): - str(result) - result.dtypes - tm.assert_frame_equal(result, expected) - - dfl = DataFrame(np.random.randn(5, 2), columns=list('AB')) - check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) - check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) - check(dfl.iloc[4:6], dfl.iloc[[4]]) - - self.assertRaises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) - self.assertRaises(IndexError, lambda: dfl.iloc[:, 4]) - - def test_iloc_getitem_int(self): - - # integer - self.check_result('integer', 'iloc', 2, 'ix', - {0: 4, 1: 6, 2: 8}, typs=['ints', 'uints']) - self.check_result('integer', 'iloc', 2, 'indexer', 2, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_neg_int(self): - - # neg integer - self.check_result('neg int', 'iloc', -1, 'ix', - {0: 6, 1: 9, 2: 12}, typs=['ints', 'uints']) - self.check_result('neg int', 'iloc', -1, 'indexer', -1, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_list_int(self): - - # list of ints - self.check_result('list int', 'iloc', [0, 1, 2], 'ix', - {0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]}, - typs=['ints', 'uints']) - self.check_result('list int', 'iloc', [2], 'ix', - {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) - self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2], - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - # array of ints (GH5006), make sure that a single indexer is returning - # the correct type - self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix', - {0: [0, 2, 4], - 1: [0, 3, 6], - 2: [0, 4, 8]}, typs=['ints', 'uints']) - self.check_result('array int', 'iloc', np.array([2]), 'ix', - {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) - self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer', - [0, 1, 2], - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_neg_int_can_reach_first_index(self): - # GH10547 and GH10779 - # negative integers should be able to reach index 0 - df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]}) - s = df['A'] - - expected = df.iloc[0] - result = df.iloc[-3] - tm.assert_series_equal(result, expected) - - expected = df.iloc[[0]] - result = df.iloc[[-3]] - tm.assert_frame_equal(result, expected) - - expected = s.iloc[0] - result = s.iloc[-3] - self.assertEqual(result, expected) - - expected = s.iloc[[0]] - result = s.iloc[[-3]] - tm.assert_series_equal(result, expected) - - # check the length 1 Series case highlighted in GH10547 - expected = pd.Series(['a'], index=['A']) - result = expected.iloc[[-1]] - tm.assert_series_equal(result, expected) - - def test_iloc_getitem_dups(self): - - # no dups in panel (bug?) - self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', - {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]}, - objs=['series', 'frame'], typs=['ints', 'uints']) - - # GH 6766 - df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) - df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) - df = concat([df1, df2], axis=1) - - # cross-sectional indexing - result = df.iloc[0, 0] - self.assertTrue(isnull(result)) - - result = df.iloc[0, :] - expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], - name=0) - tm.assert_series_equal(result, expected) - - def test_iloc_getitem_array(self): - - # array like - s = Series(index=lrange(1, 4)) - self.check_result('array like', 'iloc', s.index, 'ix', - {0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]}, - typs=['ints', 'uints']) - - def test_iloc_getitem_bool(self): - - # boolean indexers - b = [True, False, True, False, ] - self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints']) - self.check_result('bool', 'iloc', b, 'ix', b, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_slice(self): - - # slices - self.check_result('slice', 'iloc', slice(1, 3), 'ix', - {0: [2, 4], 1: [3, 6], 2: [4, 8]}, - typs=['ints', 'uints']) - self.check_result('slice', 'iloc', slice(1, 3), 'indexer', - slice(1, 3), - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_slice_dups(self): - - df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']) - df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2), - columns=['A', 'C']) - - # axis=1 - df = concat([df1, df2], axis=1) - tm.assert_frame_equal(df.iloc[:, :4], df1) - tm.assert_frame_equal(df.iloc[:, 4:], df2) - - df = concat([df2, df1], axis=1) - tm.assert_frame_equal(df.iloc[:, :2], df2) - tm.assert_frame_equal(df.iloc[:, 2:], df1) - - exp = concat([df2, df1.iloc[:, [0]]], axis=1) - tm.assert_frame_equal(df.iloc[:, 0:3], exp) - - # axis=0 - df = concat([df, df], axis=0) - tm.assert_frame_equal(df.iloc[0:10, :2], df2) - tm.assert_frame_equal(df.iloc[0:10, 2:], df1) - tm.assert_frame_equal(df.iloc[10:, :2], df2) - tm.assert_frame_equal(df.iloc[10:, 2:], df1) - - def test_iloc_setitem(self): - df = self.frame_ints - - df.iloc[1, 1] = 1 - result = df.iloc[1, 1] - self.assertEqual(result, 1) - - df.iloc[:, 2:3] = 0 - expected = df.iloc[:, 2:3] - result = df.iloc[:, 2:3] - tm.assert_frame_equal(result, expected) - - # GH5771 - s = Series(0, index=[4, 5, 6]) - s.iloc[1:2] += 1 - expected = Series([0, 1, 0], index=[4, 5, 6]) - tm.assert_series_equal(s, expected) - - def test_loc_setitem_slice(self): - # GH10503 - - # assigning the same type should not change the type - df1 = DataFrame({'a': [0, 1, 1], - 'b': Series([100, 200, 300], dtype='uint32')}) - ix = df1['a'] == 1 - newb1 = df1.loc[ix, 'b'] + 1 - df1.loc[ix, 'b'] = newb1 - expected = DataFrame({'a': [0, 1, 1], - 'b': Series([100, 201, 301], dtype='uint32')}) - tm.assert_frame_equal(df1, expected) - - # assigning a new type should get the inferred type - df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, - dtype='uint64') - ix = df1['a'] == 1 - newb2 = df2.loc[ix, 'b'] - df1.loc[ix, 'b'] = newb2 - expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, - dtype='uint64') - tm.assert_frame_equal(df2, expected) - - def test_ix_loc_setitem_consistency(self): - - # GH 5771 - # loc with slice and series - s = Series(0, index=[4, 5, 6]) - s.loc[4:5] += 1 - expected = Series([1, 1, 0], index=[4, 5, 6]) - tm.assert_series_equal(s, expected) - - # GH 5928 - # chained indexing assignment - df = DataFrame({'a': [0, 1, 2]}) - expected = df.copy() - with catch_warnings(record=True): - expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a'] - - with catch_warnings(record=True): - df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]] - tm.assert_frame_equal(df, expected) - - df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]}) - with catch_warnings(record=True): - df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype( - 'float64') + 0.5 - expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]}) - tm.assert_frame_equal(df, expected) - - # GH 8607 - # ix setitem consistency - df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580], - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) - expected = DataFrame({'timestamp': pd.to_datetime( - [1413840976, 1413842580, 1413760580], unit='s'), - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) - - df2 = df.copy() - df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - df2 = df.copy() - df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - df2 = df.copy() - with catch_warnings(record=True): - df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - def test_ix_loc_consistency(self): - - # GH 8613 - # some edge cases where ix/loc should return the same - # this is not an exhaustive case - - def compare(result, expected): - if is_scalar(expected): - self.assertEqual(result, expected) - else: - self.assertTrue(expected.equals(result)) - - # failure cases for .loc, but these work for .ix - df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) - for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), - tuple([slice(0, 2), df.columns[0:2]])]: - - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeTimedeltaIndex]: - df.index = index(len(df.index)) - with catch_warnings(record=True): - df.ix[key] - - self.assertRaises(TypeError, lambda: df.loc[key]) - - df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD'), - index=pd.date_range('2012-01-01', periods=5)) - - for key in ['2012-01-03', - '2012-01-31', - slice('2012-01-03', '2012-01-03'), - slice('2012-01-03', '2012-01-04'), - slice('2012-01-03', '2012-01-06', 2), - slice('2012-01-03', '2012-01-31'), - tuple([[True, True, True, False, True]]), ]: - - # getitem - - # if the expected raises, then compare the exceptions - try: - with catch_warnings(record=True): - expected = df.ix[key] - except KeyError: - self.assertRaises(KeyError, lambda: df.loc[key]) - continue - - result = df.loc[key] - compare(result, expected) - - # setitem - df1 = df.copy() - df2 = df.copy() - - with catch_warnings(record=True): - df1.ix[key] = 10 - df2.loc[key] = 10 - compare(df2, df1) - - # edge cases - s = Series([1, 2, 3, 4], index=list('abde')) - - result1 = s['a':'c'] - with catch_warnings(record=True): - result2 = s.ix['a':'c'] - result3 = s.loc['a':'c'] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - - # now work rather than raising KeyError - s = Series(range(5), [-2, -1, 1, 2, 3]) - - with catch_warnings(record=True): - result1 = s.ix[-10:3] - result2 = s.loc[-10:3] - tm.assert_series_equal(result1, result2) - - with catch_warnings(record=True): - result1 = s.ix[0:3] - result2 = s.loc[0:3] - tm.assert_series_equal(result1, result2) - - def test_loc_setitem_dups(self): - - # GH 6541 - df_orig = DataFrame( - {'me': list('rttti'), - 'foo': list('aaade'), - 'bar': np.arange(5, dtype='float64') * 1.34 + 2, - 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me') - - indexer = tuple(['r', ['bar', 'bar2']]) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - indexer = tuple(['r', 'bar']) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - self.assertEqual(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - indexer = tuple(['t', ['bar', 'bar2']]) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - def test_iloc_setitem_dups(self): - - # GH 6766 - # iloc with a mask aligning from another iloc - df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) - df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) - df = concat([df1, df2], axis=1) - - expected = df.fillna(3) - expected['A'] = expected['A'].astype('float64') - inds = np.isnan(df.iloc[:, 0]) - mask = inds[inds].index - df.iloc[mask, 0] = df.iloc[mask, 2] - tm.assert_frame_equal(df, expected) - - # del a dup column across blocks - expected = DataFrame({0: [1, 2], 1: [3, 4]}) - expected.columns = ['B', 'B'] - del df['A'] - tm.assert_frame_equal(df, expected) - - # assign back to self - df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] - tm.assert_frame_equal(df, expected) - - # reversed x 2 - df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( - drop=True) - df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( - drop=True) - tm.assert_frame_equal(df, expected) - - def test_chained_getitem_with_lists(self): - - # GH6394 - # Regression in chained getitem indexing with embedded list-like from - # 0.12 - def check(result, expected): - tm.assert_numpy_array_equal(result, expected) - tm.assertIsInstance(result, np.ndarray) - - df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]}) - expected = df['A'].iloc[2] - result = df.loc[2, 'A'] - check(result, expected) - result2 = df.iloc[2]['A'] - check(result2, expected) - result3 = df['A'].loc[2] - check(result3, expected) - result4 = df['A'].iloc[2] - check(result4, expected) - - def test_loc_getitem_int(self): - - # int label - self.check_result('int label', 'loc', 2, 'ix', 2, - typs=['ints', 'uints'], axes=0) - self.check_result('int label', 'loc', 3, 'ix', 3, - typs=['ints', 'uints'], axes=1) - self.check_result('int label', 'loc', 4, 'ix', 4, - typs=['ints', 'uints'], axes=2) - self.check_result('int label', 'loc', 2, 'ix', 2, - typs=['label'], fails=KeyError) - - def test_loc_getitem_label(self): - - # label - self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'], - axes=0) - self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'], - axes=0) - self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0) - self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1, - typs=['ts'], axes=0) - self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'], - fails=KeyError) - - def test_loc_getitem_label_out_of_range(self): - - # out of range label - self.check_result('label range', 'loc', 'f', 'ix', 'f', - typs=['ints', 'uints', 'labels', 'mixed', 'ts'], - fails=KeyError) - self.check_result('label range', 'loc', 'f', 'ix', 'f', - typs=['floats'], fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, - typs=['ints', 'uints', 'mixed'], fails=KeyError) - self.check_result('label range', 'loc', 20, 'ix', 20, - typs=['labels'], fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], - axes=0, fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'], - axes=0, fails=TypeError) - - def test_loc_getitem_label_list(self): - - # list of labels - self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4], - typs=['ints', 'uints'], axes=0) - self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9], - typs=['ints', 'uints'], axes=1) - self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12], - typs=['ints', 'uints'], axes=2) - self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix', - ['a', 'b', 'd'], typs=['labels'], axes=0) - self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix', - ['A', 'B', 'C'], typs=['labels'], axes=1) - self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix', - ['Z', 'Y', 'W'], typs=['labels'], axes=2) - self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix', - [2, 8, 'null'], typs=['mixed'], axes=0) - self.check_result('list lbl', 'loc', - [Timestamp('20130102'), Timestamp('20130103')], 'ix', - [Timestamp('20130102'), Timestamp('20130103')], - typs=['ts'], axes=0) - - self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], - typs=['empty'], fails=KeyError) - self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], - typs=['ints', 'uints'], axes=0, fails=KeyError) - self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], - typs=['ints', 'uints'], axes=1, fails=KeyError) - self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], - typs=['ints', 'uints'], axes=2, fails=KeyError) - - def test_loc_getitem_label_list_fails(self): - # fails - self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], - typs=['ints', 'uints'], axes=1, fails=KeyError) - self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], - typs=['ints', 'uints'], axes=2, fails=KeyError) - - def test_loc_getitem_label_array_like(self): - # array like - self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index, - 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0) - self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index, - 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1) - self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, - 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) - - def test_loc_getitem_bool(self): - # boolean indexers - b = [True, False, True, False] - self.check_result('bool', 'loc', b, 'ix', b, - typs=['ints', 'uints', 'labels', - 'mixed', 'ts', 'floats']) - self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], - fails=KeyError) - - def test_loc_getitem_int_slice(self): - - # ok - self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4], - typs=['ints', 'uints'], axes=0) - self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6], - typs=['ints', 'uints'], axes=1) - self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], - typs=['ints', 'uints'], axes=2) - - # GH 3053 - # loc should treat integer slices like label slices - from itertools import product - - index = MultiIndex.from_tuples([t for t in product( - [6, 7, 8], ['a', 'b'])]) - df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[6:8, :] - with catch_warnings(record=True): - expected = df.ix[6:8, :] - tm.assert_frame_equal(result, expected) - - index = MultiIndex.from_tuples([t - for t in product( - [10, 20, 30], ['a', 'b'])]) - df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[20:30, :] - with catch_warnings(record=True): - expected = df.ix[20:30, :] - tm.assert_frame_equal(result, expected) - - # doc examples - result = df.loc[10, :] - with catch_warnings(record=True): - expected = df.ix[10, :] - tm.assert_frame_equal(result, expected) - - result = df.loc[:, 10] - # expected = df.ix[:,10] (this fails) - expected = df[10] - tm.assert_frame_equal(result, expected) - - def test_loc_to_fail(self): - - # GH3449 - df = DataFrame(np.random.random((3, 3)), - index=['a', 'b', 'c'], - columns=['e', 'f', 'g']) - - # raise a KeyError? - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([[1, 2], [1, 2]])) - - # GH 7496 - # loc should not fallback - - s = Series() - s.loc[1] = 1 - s.loc['a'] = 2 - - self.assertRaises(KeyError, lambda: s.loc[-1]) - self.assertRaises(KeyError, lambda: s.loc[[-1, -2]]) - - self.assertRaises(KeyError, lambda: s.loc[['4']]) - - s.loc[-1] = 3 - result = s.loc[[-1, -2]] - expected = Series([3, np.nan], index=[-1, -2]) - tm.assert_series_equal(result, expected) - - s['a'] = 2 - self.assertRaises(KeyError, lambda: s.loc[[-2]]) - - del s['a'] - - def f(): - s.loc[[-2]] = 0 - - self.assertRaises(KeyError, f) - - # inconsistency between .loc[values] and .loc[values,:] - # GH 7999 - df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value']) - - def f(): - df.loc[[3], :] - - self.assertRaises(KeyError, f) - - def f(): - df.loc[[3]] - - self.assertRaises(KeyError, f) - - def test_at_to_fail(self): - # at should not fallback - # GH 7814 - s = Series([1, 2, 3], index=list('abc')) - result = s.at['a'] - self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda: s.at[0]) - - df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) - result = df.at['a', 'A'] - self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda: df.at['a', 0]) - - s = Series([1, 2, 3], index=[3, 2, 1]) - result = s.at[1] - self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda: s.at['a']) - - df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) - result = df.at[1, 0] - self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda: df.at['a', 0]) - - # GH 13822, incorrect error string with non-unique columns when missing - # column is accessed - df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]}) - df.columns = ['x', 'x', 'z'] - - # Check that we get the correct value in the KeyError - self.assertRaisesRegexp(KeyError, r"\['y'\] not in index", - lambda: df[['x', 'y', 'z']]) - - def test_loc_getitem_label_slice(self): - - # label slices (with ints) - self.check_result('lab slice', 'loc', slice(1, 3), - 'ix', slice(1, 3), - typs=['labels', 'mixed', 'empty', 'ts', 'floats'], - fails=TypeError) - - # real label slices - self.check_result('lab slice', 'loc', slice('a', 'c'), - 'ix', slice('a', 'c'), typs=['labels'], axes=0) - self.check_result('lab slice', 'loc', slice('A', 'C'), - 'ix', slice('A', 'C'), typs=['labels'], axes=1) - self.check_result('lab slice', 'loc', slice('W', 'Z'), - 'ix', slice('W', 'Z'), typs=['labels'], axes=2) - - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=0) - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=1, fails=TypeError) - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=2, fails=TypeError) - - # GH 14316 - self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'), - 'indexer', [0, 1, 2], typs=['ts_rev'], axes=0) - - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=0, fails=TypeError) - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=1, fails=KeyError) - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=2, fails=KeyError) - - self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice( - 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError) - - def test_loc_general(self): - - df = DataFrame( - np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'], - index=['A', 'B', 'C', 'D']) - - # want this to work - result = df.loc[:, "A":"B"].iloc[0:2, :] - self.assertTrue((result.columns == ['A', 'B']).all()) - self.assertTrue((result.index == ['A', 'B']).all()) - - # mixed type - result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] - expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0) - tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, object) - - def test_loc_setitem_consistency(self): - # GH 6149 - # coerce similary for setitem and loc when rows have a null-slice - expected = DataFrame({'date': Series(0, index=range(5), - dtype=np.int64), - 'val': Series(range(5), dtype=np.int64)}) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series( - range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 0 - tm.assert_frame_equal(df, expected) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = np.array(0, dtype=np.int64) - tm.assert_frame_equal(df, expected) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64) - tm.assert_frame_equal(df, expected) - - expected = DataFrame({'date': Series('foo', index=range(5)), - 'val': Series(range(5), dtype=np.int64)}) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 'foo' - tm.assert_frame_equal(df, expected) - - expected = DataFrame({'date': Series(1.0, index=range(5)), - 'val': Series(range(5), dtype=np.int64)}) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 1.0 - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_consistency_empty(self): - # empty (essentially noops) - expected = DataFrame(columns=['x', 'y']) - expected['x'] = expected['x'].astype(np.int64) - df = DataFrame(columns=['x', 'y']) - df.loc[:, 'x'] = 1 - tm.assert_frame_equal(df, expected) - - df = DataFrame(columns=['x', 'y']) - df['x'] = 1 - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_consistency_slice_column_len(self): - # .loc[:,column] setting with slice == len of the column - # GH10408 - data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat -Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse -Region,Site,RespondentID,,,,, -Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, -Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes -Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, -Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" - - df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) - df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, ( - 'Respondent', 'StartDate')]) - df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, ( - 'Respondent', 'EndDate')]) - df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( - 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')] - - df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( - 'Respondent', 'Duration')].astype('timedelta64[s]') - expected = Series([1380, 720, 840, 2160.], index=df.index, - name=('Respondent', 'Duration')) - tm.assert_series_equal(df[('Respondent', 'Duration')], expected) - - def test_loc_setitem_frame(self): - df = self.frame_labels - - result = df.iloc[0, 0] - - df.loc['a', 'A'] = 1 - result = df.loc['a', 'A'] - self.assertEqual(result, 1) - - result = df.iloc[0, 0] - self.assertEqual(result, 1) - - df.loc[:, 'B':'D'] = 0 - expected = df.loc[:, 'B':'D'] - with catch_warnings(record=True): - result = df.ix[:, 1:] - tm.assert_frame_equal(result, expected) - - # GH 6254 - # setting issue - df = DataFrame(index=[3, 5, 4], columns=['A']) - df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64') - expected = DataFrame(dict(A=Series( - [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4]) - tm.assert_frame_equal(df, expected) - - # GH 6252 - # setting with an empty frame - keys1 = ['@' + str(i) for i in range(5)] - val1 = np.arange(5, dtype='int64') - - keys2 = ['@' + str(i) for i in range(4)] - val2 = np.arange(4, dtype='int64') - - index = list(set(keys1).union(keys2)) - df = DataFrame(index=index) - df['A'] = nan - df.loc[keys1, 'A'] = val1 - - df['B'] = nan - df.loc[keys2, 'B'] = val2 - - expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series( - val2, index=keys2))).reindex(index=index) - tm.assert_frame_equal(df, expected) - - # GH 8669 - # invalid coercion of nan -> int - df = DataFrame({'A': [1, 2, 3], 'B': np.nan}) - df.loc[df.B > df.A, 'B'] = df.A - expected = DataFrame({'A': [1, 2, 3], 'B': np.nan}) - tm.assert_frame_equal(df, expected) - - # GH 6546 - # setting with mixed labels - df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']}) - - result = df.loc[0, [1, 2]] - expected = Series([1, 3], index=[1, 2], dtype=object, name=0) - tm.assert_series_equal(result, expected) - - expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']}) - df.loc[0, [1, 2]] = [5, 6] - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_frame_multiples(self): - # multiple setting - df = DataFrame({'A': ['foo', 'bar', 'baz'], - 'B': Series( - range(3), dtype=np.int64)}) - rhs = df.loc[1:2] - rhs.index = df.index[0:2] - df.loc[0:1] = rhs - expected = DataFrame({'A': ['bar', 'baz', 'baz'], - 'B': Series( - [1, 2, 2], dtype=np.int64)}) - tm.assert_frame_equal(df, expected) - - # multiple setting with frame on rhs (with M8) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series( - range(5), dtype=np.int64)}) - expected = DataFrame({'date': [Timestamp('20000101'), Timestamp( - '20000102'), Timestamp('20000101'), Timestamp('20000102'), - Timestamp('20000103')], - 'val': Series( - [0, 1, 0, 1, 2], dtype=np.int64)}) - rhs = df.loc[0:2] - rhs.index = df.index[2:5] - df.loc[2:4] = rhs - tm.assert_frame_equal(df, expected) - - def test_iloc_getitem_frame(self): - df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), - columns=lrange(0, 8, 2)) - - result = df.iloc[2] - with catch_warnings(record=True): - exp = df.ix[4] - tm.assert_series_equal(result, exp) - - result = df.iloc[2, 2] - with catch_warnings(record=True): - exp = df.ix[4, 4] - self.assertEqual(result, exp) - - # slice - result = df.iloc[4:8] - with catch_warnings(record=True): - expected = df.ix[8:14] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 2:3] - with catch_warnings(record=True): - expected = df.ix[:, 4:5] - tm.assert_frame_equal(result, expected) - - # list of integers - result = df.iloc[[0, 1, 3]] - with catch_warnings(record=True): - expected = df.ix[[0, 2, 6]] - tm.assert_frame_equal(result, expected) - - result = df.iloc[[0, 1, 3], [0, 1]] - with catch_warnings(record=True): - expected = df.ix[[0, 2, 6], [0, 2]] - tm.assert_frame_equal(result, expected) - - # neg indicies - result = df.iloc[[-1, 1, 3], [-1, 1]] - with catch_warnings(record=True): - expected = df.ix[[18, 2, 6], [6, 2]] - tm.assert_frame_equal(result, expected) - - # dups indicies - result = df.iloc[[-1, -1, 1, 3], [-1, 1]] - with catch_warnings(record=True): - expected = df.ix[[18, 18, 2, 6], [6, 2]] - tm.assert_frame_equal(result, expected) - - # with index-like - s = Series(index=lrange(1, 5)) - result = df.iloc[s.index] - with catch_warnings(record=True): - expected = df.ix[[2, 4, 6, 8]] - tm.assert_frame_equal(result, expected) - - def test_iloc_getitem_labelled_frame(self): - # try with labelled frame - df = DataFrame(np.random.randn(10, 4), - index=list('abcdefghij'), columns=list('ABCD')) - - result = df.iloc[1, 1] - exp = df.loc['b', 'B'] - self.assertEqual(result, exp) - - result = df.iloc[:, 2:3] - expected = df.loc[:, ['C']] - tm.assert_frame_equal(result, expected) - - # negative indexing - result = df.iloc[-1, -1] - exp = df.loc['j', 'D'] - self.assertEqual(result, exp) - - # out-of-bounds exception - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10, 5])) - - # trying to use a label - self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) - - def test_iloc_getitem_doc_issue(self): - - # multi axis slicing issue with single block - # surfaced in GH 6059 - - arr = np.random.randn(6, 4) - index = date_range('20130101', periods=6) - columns = list('ABCD') - df = DataFrame(arr, index=index, columns=columns) - - # defines ref_locs - df.describe() - - result = df.iloc[3:5, 0:2] - str(result) - result.dtypes - - expected = DataFrame(arr[3:5, 0:2], index=index[3:5], - columns=columns[0:2]) - tm.assert_frame_equal(result, expected) - - # for dups - df.columns = list('aaaa') - result = df.iloc[3:5, 0:2] - str(result) - result.dtypes - - expected = DataFrame(arr[3:5, 0:2], index=index[3:5], - columns=list('aa')) - tm.assert_frame_equal(result, expected) - - # related - arr = np.random.randn(6, 4) - index = list(range(0, 12, 2)) - columns = list(range(0, 8, 2)) - df = DataFrame(arr, index=index, columns=columns) - - df._data.blocks[0].mgr_locs - result = df.iloc[1:5, 2:4] - str(result) - result.dtypes - expected = DataFrame(arr[1:5, 2:4], index=index[1:5], - columns=columns[2:4]) - tm.assert_frame_equal(result, expected) +class TestFancy(Base): + """ pure get/set item & fancy indexing """ def test_setitem_ndarray_1d(self): # GH5508 @@ -1487,17 +39,11 @@ def test_setitem_ndarray_1d(self): df['bar'] = np.zeros(10, dtype=np.complex) # invalid - def f(): - with catch_warnings(record=True): - df.ix[2:5, 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2]) - - self.assertRaises(ValueError, f) - def f(): df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # valid df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, @@ -1516,140 +62,42 @@ def f(): def f(): df[2:5] = np.arange(1, 4) * 1j - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) - def test_iloc_setitem_series(self): - df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), - columns=list('ABCD')) + def test_inf_upcast(self): + # GH 16957 + # We should be able to use np.inf as a key + # np.inf should cause an index to convert to float - df.iloc[1, 1] = 1 - result = df.iloc[1, 1] - self.assertEqual(result, 1) + # Test with np.inf in rows + df = DataFrame(columns=[0]) + df.loc[1] = 1 + df.loc[2] = 2 + df.loc[np.inf] = 3 - df.iloc[:, 2:3] = 0 - expected = df.iloc[:, 2:3] - result = df.iloc[:, 2:3] - tm.assert_frame_equal(result, expected) - - s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - - s.iloc[1] = 1 - result = s.iloc[1] - self.assertEqual(result, 1) - - s.iloc[:4] = 0 - expected = s.iloc[:4] - result = s.iloc[:4] - tm.assert_series_equal(result, expected) - - s = Series([-1] * 6) - s.iloc[0::2] = [0, 2, 4] - s.iloc[1::2] = [1, 3, 5] - result = s - expected = Series([0, 1, 2, 3, 4, 5]) - tm.assert_series_equal(result, expected) - - def test_iloc_setitem_list_of_lists(self): - - # GH 7551 - # list-of-list is set incorrectly in mixed vs. single dtyped frames - df = DataFrame(dict(A=np.arange(5, dtype='int64'), - B=np.arange(5, 10, dtype='int64'))) - df.iloc[2:4] = [[10, 11], [12, 13]] - expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) - tm.assert_frame_equal(df, expected) - - df = DataFrame( - dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) - df.iloc[2:4] = [['x', 11], ['y', 13]] - expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], - B=[5, 6, 11, 13, 9])) - tm.assert_frame_equal(df, expected) - - def test_ix_general(self): - - # ix general issues - - # GH 2817 - data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} - df = DataFrame(data).set_index(keys=['col', 'year']) - key = 4.0, 2012 - - # emits a PerformanceWarning, ok - with self.assert_produces_warning(PerformanceWarning): - tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - - # this is ok - df.sort_index(inplace=True) - res = df.loc[key] - - # col has float dtype, result should be Float64Index - index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], - names=['col', 'year']) - expected = DataFrame({'amount': [222, 333, 444]}, index=index) - tm.assert_frame_equal(res, expected) - - def test_ix_weird_slicing(self): - # http://stackoverflow.com/q/17056560/1240268 - df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], - 'two': [1, 2, 3, 4, 5]}) - df.loc[df['one'] > 1, 'two'] = -df['two'] - - expected = DataFrame({'one': {0: 1.0, - 1: 2.0, - 2: 3.0, - 3: nan, - 4: nan}, - 'two': {0: 1, - 1: -2, - 2: -3, - 3: 4, - 4: 5}}) - tm.assert_frame_equal(df, expected) - - def test_loc_coerceion(self): + # make sure we can look up the value + assert df.loc[np.inf, 0] == 3 - # 12411 - df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), - pd.NaT]}) - expected = df.dtypes + result = df.index + expected = pd.Float64Index([1, 2, np.inf]) + tm.assert_index_equal(result, expected) - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) - - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) - - # 12045 - import datetime - df = DataFrame({'date': [datetime.datetime(2012, 1, 1), - datetime.datetime(1012, 1, 2)]}) - expected = df.dtypes - - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) - - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) - - # 11594 - df = DataFrame({'text': ['some words'] + [None] * 9}) - expected = df.dtypes - - result = df.iloc[0:2] - tm.assert_series_equal(result.dtypes, expected) + # Test with np.inf in columns + df = DataFrame() + df.loc[0, 0] = 1 + df.loc[1, 1] = 2 + df.loc[0, np.inf] = 3 - result = df.iloc[3:] - tm.assert_series_equal(result.dtypes, expected) + result = df.columns + expected = pd.Float64Index([0, 1, np.inf]) + tm.assert_index_equal(result, expected) def test_setitem_dtype_upcast(self): # GH3216 df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df['c'] = np.nan - self.assertEqual(df['c'].dtype, np.float64) + assert df['c'].dtype == np.float64 df.loc[0, 'c'] = 'foo' expected = DataFrame([{"a": 1, "c": 'foo'}, @@ -1668,8 +116,8 @@ def test_setitem_dtype_upcast(self): columns=['foo', 'bar', 'baz']) tm.assert_frame_equal(left, right) - self.assertTrue(is_integer_dtype(left['foo'])) - self.assertTrue(is_integer_dtype(left['baz'])) + assert is_integer_dtype(left['foo']) + assert is_integer_dtype(left['baz']) left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0, index=list('ab'), @@ -1680,21 +128,8 @@ def test_setitem_dtype_upcast(self): columns=['foo', 'bar', 'baz']) tm.assert_frame_equal(left, right) - self.assertTrue(is_float_dtype(left['foo'])) - self.assertTrue(is_float_dtype(left['baz'])) - - def test_setitem_iloc(self): - - # setitem with an iloc list - df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"], - columns=["A", "B", "C"]) - df.iloc[[0, 1], [1, 2]] - df.iloc[[0, 1], [1, 2]] += 100 - - expected = DataFrame( - np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), - index=["A", "B", "C"], columns=["A", "B", "C"]) - tm.assert_frame_equal(df, expected) + assert is_float_dtype(left['foo']) + assert is_float_dtype(left['baz']) def test_dups_fancy_indexing(self): @@ -1704,7 +139,7 @@ def test_dups_fancy_indexing(self): df.columns = ['a', 'a', 'b'] result = df[['b', 'a']].columns expected = Index(['b', 'a', 'a']) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) # across dtypes df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']], @@ -1742,7 +177,8 @@ def test_dups_fancy_indexing(self): 'test1': [7., 6, np.nan], 'other': ['d', 'c', np.nan]}, index=rows) - result = df.loc[rows] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[rows] tm.assert_frame_equal(result, expected) # see GH5553, make sure we use the right indexer @@ -1752,28 +188,32 @@ def test_dups_fancy_indexing(self): 'other': [np.nan, np.nan, np.nan, 'd', 'c', np.nan]}, index=rows) - result = df.loc[rows] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[rows] tm.assert_frame_equal(result, expected) # inconsistent returns for unique/duplicate indices when values are # missing - df = DataFrame(randn(4, 3), index=list('ABCD')) - expected = df.ix[['E']] + df = DataFrame(np.random.randn(4, 3), index=list('ABCD')) + expected = df.reindex(['E']) - dfnu = DataFrame(randn(5, 3), index=list('AABCD')) - result = dfnu.ix[['E']] + dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD')) + with catch_warnings(record=True): + result = dfnu.ix[['E']] tm.assert_frame_equal(result, expected) # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) - result = df.ix[[0, 8, 0]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) - result = df.ix[[0, 8, 0]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) @@ -1781,7 +221,8 @@ def test_dups_fancy_indexing(self): df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) expected = DataFrame( {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) - result = df.ix[['A', 'A', 'E']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected) # GH 5835 @@ -1790,9 +231,10 @@ def test_dups_fancy_indexing(self): np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A']) expected = pd.concat( - [df.ix[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], - index=df.index)], axis=1) - result = df.ix[:, ['A', 'B', 'C']] + [df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], + index=df.index)], axis=1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[:, ['A', 'B', 'C']] tm.assert_frame_equal(result, expected) # GH 6504, multi-axis indexing @@ -1822,9 +264,9 @@ def test_indexing_mixed_frame_bug(self): # this does not work, ie column test is not changed idx = df['test'] == '_' - temp = df.ix[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) - df.ix[idx, 'test'] = temp - self.assertEqual(df.iloc[0, 2], '-----') + temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) + df.loc[idx, 'test'] = temp + assert df.iloc[0, 2] == '-----' # if I look at df, then element [0,2] equals '_'. If instead I type # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I @@ -1832,12 +274,12 @@ def test_indexing_mixed_frame_bug(self): def test_multitype_list_index_access(self): # GH 10610 - df = pd.DataFrame(np.random.random((10, 5)), - columns=["a"] + [20, 21, 22, 23]) + df = DataFrame(np.random.random((10, 5)), + columns=["a"] + [20, 21, 22, 23]) - with self.assertRaises(KeyError): + with pytest.raises(KeyError): df[[22, 26, -8]] - self.assertEqual(df[21].shape[0], df.shape[0]) + assert df[21].shape[0] == df.shape[0] def test_set_index_nan(self): @@ -1859,17 +301,17 @@ def test_set_index_nan(self): 'QC': {17: 0.0, 18: 0.0, 19: 0.0, - 20: nan, - 21: nan, - 22: nan, - 23: nan, + 20: np.nan, + 21: np.nan, + 22: np.nan, + 23: np.nan, 24: 1.0, - 25: nan, - 26: nan, - 27: nan, - 28: nan, - 29: nan, - 30: nan}, + 25: np.nan, + 26: np.nan, + 27: np.nan, + 28: np.nan, + 29: np.nan, + 30: np.nan}, 'data': {17: 7.9544899999999998, 18: 8.0142609999999994, 19: 7.8591520000000008, @@ -1920,19 +362,19 @@ def test_multi_nan_indexing(self): def test_multi_assign(self): - # GH 3626, an assignement of a sub-df to a df + # GH 3626, an assignment of a sub-df to a df df = DataFrame({'FC': ['a', 'b', 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': lrange(6), 'col2': lrange(6, 12)}) - df.ix[1, 0] = np.nan + df.iloc[1, 0] = np.nan df2 = df.copy() - mask = ~df2.FC.isnull() + mask = ~df2.FC.isna() cols = ['col1', 'col2'] dft = df2 * 2 - dft.ix[3, 3] = np.nan + dft.iloc[3, 3] = np.nan expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], @@ -1940,17 +382,23 @@ def test_multi_assign(self): 'col2': [12, 7, 16, np.nan, 20, 22]}) # frame on rhs - df2.ix[mask, cols] = dft.ix[mask, cols] + df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) - df2.ix[mask, cols] = dft.ix[mask, cols] + df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) # with an ndarray on rhs + # coerces to float64 because values has float64 dtype + # GH 14001 + expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], + 'PF': [0, 0, 0, 0, 1, 1], + 'col1': [0., 1., 4., 6., 8., 10.], + 'col2': [12, 7, 16, np.nan, 20, 22]}) df2 = df.copy() - df2.ix[mask, cols] = dft.ix[mask, cols].values + df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) - df2.ix[mask, cols] = dft.ix[mask, cols].values + df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) # broadcasting on the rhs is required @@ -1965,79 +413,18 @@ def test_multi_assign(self): df.loc[df['A'] == 0, ['A', 'B']] = df['D'] tm.assert_frame_equal(df, expected) - def test_ix_assign_column_mixed(self): - # GH #1142 - df = DataFrame(tm.getSeriesData()) - df['foo'] = 'bar' - - orig = df.ix[:, 'B'].copy() - df.ix[:, 'B'] = df.ix[:, 'B'] + 1 - tm.assert_series_equal(df.B, orig + 1) - - # GH 3668, mixed frame with series value - df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'}) - expected = df.copy() - - for i in range(5): - indexer = i * 2 - v = 1000 + i * 200 - expected.ix[indexer, 'y'] = v - self.assertEqual(expected.ix[indexer, 'y'], v) - - df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100 - tm.assert_frame_equal(df, expected) - - # GH 4508, making sure consistency of assignments - df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) - df.ix[[0, 2, ], 'b'] = [100, -100] - expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) - tm.assert_frame_equal(df, expected) - - df = pd.DataFrame({'a': lrange(4)}) - df['b'] = np.nan - df.ix[[1, 3], 'b'] = [100, -100] - expected = DataFrame({'a': [0, 1, 2, 3], - 'b': [np.nan, 100, np.nan, -100]}) - tm.assert_frame_equal(df, expected) - - # ok, but chained assignments are dangerous - # if we turn off chained assignement it will work - with option_context('chained_assignment', None): - df = pd.DataFrame({'a': lrange(4)}) - df['b'] = np.nan - df['b'].ix[[1, 3]] = [100, -100] - tm.assert_frame_equal(df, expected) - - def test_ix_get_set_consistency(self): - - # GH 4544 - # ix/loc get/set not consistent when - # a mixed int/string index - df = DataFrame(np.arange(16).reshape((4, 4)), - columns=['a', 'b', 8, 'c'], - index=['e', 7, 'f', 'g']) - - self.assertEqual(df.ix['e', 8], 2) - self.assertEqual(df.loc['e', 8], 2) - - df.ix['e', 8] = 42 - self.assertEqual(df.ix['e', 8], 42) - self.assertEqual(df.loc['e', 8], 42) - - df.loc['e', 8] = 45 - self.assertEqual(df.ix['e', 8], 45) - self.assertEqual(df.loc['e', 8], 45) - def test_setitem_list(self): # GH 6043 # ix with a list df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = [1, 2, 3] - df.ix[1, 0] = [1, 2] + with catch_warnings(record=True): + df.ix[1, 0] = [1, 2, 3] + df.ix[1, 0] = [1, 2] result = DataFrame(index=[0, 1], columns=[0]) - result.ix[1, 0] = [1, 2] + with catch_warnings(record=True): + result.ix[1, 0] = [1, 2] tm.assert_frame_equal(result, df) @@ -2059,206 +446,43 @@ def view(self): return self df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = TO(1) - df.ix[1, 0] = TO(2) + with catch_warnings(record=True): + df.ix[1, 0] = TO(1) + df.ix[1, 0] = TO(2) result = DataFrame(index=[0, 1], columns=[0]) - result.ix[1, 0] = TO(2) + with catch_warnings(record=True): + result.ix[1, 0] = TO(2) tm.assert_frame_equal(result, df) # remains object dtype even after setting it back df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = TO(1) - df.ix[1, 0] = np.nan + with catch_warnings(record=True): + df.ix[1, 0] = TO(1) + df.ix[1, 0] = np.nan result = DataFrame(index=[0, 1], columns=[0]) tm.assert_frame_equal(result, df) - def test_iloc_mask(self): - - # GH 3631, iloc with a mask (of a series) should raise - df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) - mask = (df.a % 2 == 0) - self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) - mask.index = lrange(len(mask)) - self.assertRaises(NotImplementedError, df.iloc.__getitem__, - tuple([mask])) - - # ndarray ok - result = df.iloc[np.array([True] * len(mask), dtype=bool)] - tm.assert_frame_equal(result, df) - - # the possibilities - locs = np.arange(4) - nums = 2 ** locs - reps = lmap(bin, nums) - df = DataFrame({'locs': locs, 'nums': nums}, reps) - - expected = { - (None, ''): '0b1100', - (None, '.loc'): '0b1100', - (None, '.iloc'): '0b1100', - ('index', ''): '0b11', - ('index', '.loc'): '0b11', - ('index', '.iloc'): ('iLocation based boolean indexing ' - 'cannot use an indexable as a mask'), - ('locs', ''): 'Unalignable boolean Series provided as indexer ' - '(index of the boolean Series and of the indexed ' - 'object do not match', - ('locs', '.loc'): 'Unalignable boolean Series provided as indexer ' - '(index of the boolean Series and of the ' - 'indexed object do not match', - ('locs', '.iloc'): ('iLocation based boolean indexing on an ' - 'integer type is not available'), - } - - # UserWarnings from reindex of a boolean mask - with warnings.catch_warnings(record=True): - result = dict() - for idx in [None, 'index', 'locs']: - mask = (df.nums > 2).values - if idx: - mask = Series(mask, list(reversed(getattr(df, idx)))) - for method in ['', '.loc', '.iloc']: - try: - if method: - accessor = getattr(df, method[1:]) - else: - accessor = df - ans = str(bin(accessor[mask]['nums'].sum())) - except Exception as e: - ans = str(e) - - key = tuple([idx, method]) - r = expected.get(key) - if r != ans: - raise AssertionError( - "[%s] does not match [%s], received [%s]" - % (key, ans, r)) - - def test_ix_slicing_strings(self): - # GH3836 - data = {'Classification': - ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], - 'Random': [1, 2, 3, 4, 5], - 'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']} - df = DataFrame(data) - x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF' - ])] - df.ix[x.index, 'X'] = df['Classification'] - - expected = DataFrame({'Classification': {0: 'SA EQUITY CFD', - 1: 'bbb', - 2: 'SA EQUITY', - 3: 'SA SSF', - 4: 'aaa'}, - 'Random': {0: 1, - 1: 2, - 2: 3, - 3: 4, - 4: 5}, - 'X': {0: 'correct', - 1: 'bbb', - 2: 'correct', - 3: 'correct', - 4: 'aaa'}}) # bug was 4: 'bbb' - - tm.assert_frame_equal(df, expected) - - def test_non_unique_loc(self): - # GH3659 - # non-unique indexer with loc slice - # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs - - # these are going to raise becuase the we are non monotonic - df = DataFrame({'A': [1, 2, 3, 4, 5, 6], - 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]) - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([slice(1, None)])) - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([slice(0, None)])) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) - - # monotonic are ok - df = DataFrame({'A': [1, 2, 3, 4, 5, 6], - 'B': [3, 4, 5, 6, 7, 8]}, - index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0) - result = df.loc[1:] - expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]}, - index=[1, 1, 2, 3]) - tm.assert_frame_equal(result, expected) - - result = df.loc[0:] - tm.assert_frame_equal(result, df) - - result = df.loc[1:2] - expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]}, - index=[1, 1, 2]) - tm.assert_frame_equal(result, expected) - - def test_loc_name(self): - # GH 3880 - df = DataFrame([[1, 1], [1, 1]]) - df.index.name = 'index_name' - result = df.iloc[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - result = df.ix[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - result = df.loc[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - def test_iloc_non_unique_indexing(self): - - # GH 4017, non-unique indexing (on the axis) - df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000}) - idx = np.array(lrange(30)) * 99 - expected = df.iloc[idx] - - df3 = pd.concat([df, 2 * df, 3 * df]) - result = df3.iloc[idx] - - tm.assert_frame_equal(result, expected) - - df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000}) - df2 = pd.concat([df2, 2 * df2, 3 * df2]) - - sidx = df2.index.to_series() - expected = df2.iloc[idx[idx <= sidx.max()]] - - new_list = [] - for r, s in expected.iterrows(): - new_list.append(s) - new_list.append(s * 2) - new_list.append(s * 3) - - expected = DataFrame(new_list) - expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) - ]) - result = df2.loc[idx] - tm.assert_frame_equal(result, expected, check_index_type=False) - def test_string_slice(self): # GH 14424 # string indexing against datetimelike with object # dtype should properly raises KeyError - df = pd.DataFrame([1], pd.Index([pd.Timestamp('2011-01-01')], - dtype=object)) - self.assertTrue(df.index.is_all_dates) - with tm.assertRaises(KeyError): + df = DataFrame([1], Index([pd.Timestamp('2011-01-01')], dtype=object)) + assert df.index.is_all_dates + with pytest.raises(KeyError): df['2011'] - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df.loc['2011', 0] - df = pd.DataFrame() - self.assertFalse(df.index.is_all_dates) - with tm.assertRaises(KeyError): + df = DataFrame() + assert not df.index.is_all_dates + with pytest.raises(KeyError): df['2011'] - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df.loc['2011', 0] def test_mi_access(self): @@ -2300,43 +524,6 @@ def test_mi_access(self): result = df2['A']['B2'] tm.assert_frame_equal(result, expected) - def test_non_unique_loc_memory_error(self): - - # GH 4280 - # non_unique index with a large selection triggers a memory error - - columns = list('ABCDEFG') - - def gen_test(l, l2): - return pd.concat([DataFrame(randn(l, len(columns)), - index=lrange(l), columns=columns), - DataFrame(np.ones((l2, len(columns))), - index=[0] * l2, columns=columns)]) - - def gen_expected(df, mask): - l = len(mask) - return pd.concat([df.take([0], convert=False), - DataFrame(np.ones((l, len(columns))), - index=[0] * l, - columns=columns), - df.take(mask[1:], convert=False)]) - - df = gen_test(900, 100) - self.assertFalse(df.index.is_unique) - - mask = np.arange(100) - result = df.loc[mask] - expected = gen_expected(df, mask) - tm.assert_frame_equal(result, expected) - - df = gen_test(900000, 100000) - self.assertFalse(df.index.is_unique) - - mask = np.arange(100000) - result = df.loc[mask] - expected = gen_expected(df, mask) - tm.assert_frame_equal(result, expected) - def test_astype_assignment(self): # GH4312 (iloc) @@ -2383,757 +570,119 @@ def test_astype_assignment_with_dups(self): # GH 4686 # assignment with dups that has a dtype change - cols = pd.MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')]) + cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')]) df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object) index = df.index.copy() df['A'] = df['A'].astype(np.float64) - self.assert_index_equal(df.index, index) + tm.assert_index_equal(df.index, index) # TODO(wesm): unused variables # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() - def test_dups_loc(self): - - # GH4726 - # dup indexing with iloc/loc - df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]], - columns=['a', 'a', 'a', 'a', 'a'], index=[1]) - expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')], - index=['a', 'a', 'a', 'a', 'a'], name=1) - - result = df.iloc[0] - tm.assert_series_equal(result, expected) - - result = df.loc[1] - tm.assert_series_equal(result, expected) - - def test_partial_setting(self): - - # GH2578, allow ix and friends to partially set - - # series - s_orig = Series([1, 2, 3]) - - s = s_orig.copy() - s[5] = 5 - expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s.loc[5] = 5 - expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s[5] = 5. - expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s.loc[5] = 5. - expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - # iloc/iat raise - s = s_orig.copy() - - def f(): - s.iloc[3] = 5. - - self.assertRaises(IndexError, f) - - def f(): - s.iat[3] = 5. - - self.assertRaises(IndexError, f) - - # ## frame ## - - df_orig = DataFrame( - np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64') - - # iloc/iat raise - df = df_orig.copy() - - def f(): - df.iloc[4, 2] = 5. - - self.assertRaises(IndexError, f) - - def f(): - df.iat[4, 2] = 5. - - self.assertRaises(IndexError, f) - - # row setting where it exists - expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) - df = df_orig.copy() - df.iloc[1] = df.iloc[2] - tm.assert_frame_equal(df, expected) - - expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) - df = df_orig.copy() - df.loc[1] = df.loc[2] - tm.assert_frame_equal(df, expected) - - # like 2578, partial setting with dtype preservation - expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]})) - df = df_orig.copy() - df.loc[3] = df.loc[2] - tm.assert_frame_equal(df, expected) - - # single dtype frame, overwrite - expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]})) - df = df_orig.copy() - df.ix[:, 'B'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # mixed dtype frame, overwrite - expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])})) - df = df_orig.copy() - df['B'] = df['B'].astype(np.float64) - df.ix[:, 'B'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # single dtype frame, partial setting - expected = df_orig.copy() - expected['C'] = df['A'] - df = df_orig.copy() - df.ix[:, 'C'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # mixed frame, partial setting - expected = df_orig.copy() - expected['C'] = df['A'] - df = df_orig.copy() - df.ix[:, 'C'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # ## panel ## - p_orig = Panel(np.arange(16).reshape(2, 4, 2), - items=['Item1', 'Item2'], - major_axis=pd.date_range('2001/1/12', periods=4), - minor_axis=['A', 'B'], dtype='float64') - - # panel setting via item - p_orig = Panel(np.arange(16).reshape(2, 4, 2), - items=['Item1', 'Item2'], - major_axis=pd.date_range('2001/1/12', periods=4), - minor_axis=['A', 'B'], dtype='float64') - expected = p_orig.copy() - expected['Item3'] = expected['Item1'] - p = p_orig.copy() - p.loc['Item3'] = p['Item1'] - tm.assert_panel_equal(p, expected) - - # panel with aligned series - expected = p_orig.copy() - expected = expected.transpose(2, 1, 0) - expected['C'] = DataFrame({'Item1': [30, 30, 30, 30], - 'Item2': [32, 32, 32, 32]}, - index=p_orig.major_axis) - expected = expected.transpose(2, 1, 0) - p = p_orig.copy() - p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) - tm.assert_panel_equal(p, expected) - - # GH 8473 - dates = date_range('1/1/2000', periods=8) - df_orig = DataFrame(np.random.randn(8, 4), index=dates, - columns=['A', 'B', 'C', 'D']) - - expected = pd.concat([df_orig, DataFrame( - {'A': 7}, index=[dates[-1] + 1])]) - df = df_orig.copy() - df.loc[dates[-1] + 1, 'A'] = 7 - tm.assert_frame_equal(df, expected) - df = df_orig.copy() - df.at[dates[-1] + 1, 'A'] = 7 - tm.assert_frame_equal(df, expected) - - exp_other = DataFrame({0: 7}, index=[dates[-1] + 1]) - expected = pd.concat([df_orig, exp_other], axis=1) - - df = df_orig.copy() - df.loc[dates[-1] + 1, 0] = 7 - tm.assert_frame_equal(df, expected) - df = df_orig.copy() - df.at[dates[-1] + 1, 0] = 7 - tm.assert_frame_equal(df, expected) - - def test_partial_setting_mixed_dtype(self): - - # in a mixed dtype environment, try to preserve dtypes - # by appending - df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) - - s = df.loc[1].copy() - s.name = 2 - expected = df.append(s) - - df.loc[2] = df.loc[1] - tm.assert_frame_equal(df, expected) - - # columns will align - df = DataFrame(columns=['A', 'B']) - df.loc[0] = Series(1, index=range(4)) - tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0])) - - # columns will align - df = DataFrame(columns=['A', 'B']) - df.loc[0] = Series(1, index=['B']) - - exp = DataFrame([[np.nan, 1]], columns=['A', 'B'], - index=[0], dtype='float64') - tm.assert_frame_equal(df, exp) - - # list-like must conform - df = DataFrame(columns=['A', 'B']) - - def f(): - df.loc[0] = [1, 2, 3] - - self.assertRaises(ValueError, f) - - # these are coerced to float unavoidably (as its a list-like to begin) - df = DataFrame(columns=['A', 'B']) - df.loc[3] = [6, 7] - - exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'], - dtype='float64') - tm.assert_frame_equal(df, exp) - - def test_series_partial_set(self): - # partial set with new index - # Regression from GH4825 - ser = Series([0.1, 0.2], index=[1, 2]) - - # loc - expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) - result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x']) - result = ser.loc[[3, 2, 3, 'x']] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) - result = ser.loc[[2, 2, 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1]) - result = ser.loc[[2, 2, 'x', 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # raises as nothing in in the index - self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) - - expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) - result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[4, 5, 6, 7]).loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # iloc - expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) - result = ser.iloc[[1, 1, 0, 0]] - tm.assert_series_equal(result, expected, check_index_type=True) - - def test_series_partial_set_with_name(self): - # GH 11497 - - idx = Index([1, 2], dtype='int64', name='idx') - ser = Series([0.1, 0.2], index=idx, name='s') - - # loc - exp_idx = Index([3, 2, 3], dtype='int64', name='idx') - expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') - expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, - name='s') - result = ser.loc[[3, 2, 3, 'x']] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([2, 2, 1], dtype='int64', name='idx') - expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s') - result = ser.loc[[2, 2, 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx') - expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s') - result = ser.loc[[2, 2, 'x', 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # raises as nothing in in the index - self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) - - exp_idx = Index([2, 2, 3], dtype='int64', name='idx') - expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([3, 4, 4], dtype='int64', name='idx') - expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([1, 2, 3], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([5, 3, 3], dtype='int64', name='idx') - expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([5, 4, 4], dtype='int64', name='idx') - expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([7, 2, 2], dtype='int64', name='idx') - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([4, 5, 6, 7], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([4, 5, 5], dtype='int64', name='idx') - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # iloc - exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx') - expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s') - result = ser.iloc[[1, 1, 0, 0]] - tm.assert_series_equal(result, expected, check_index_type=True) - - def test_partial_set_invalid(self): - - # GH 4940 - # allow only setting of 'valid' values - - orig = tm.makeTimeDataFrame() - df = orig.copy() - - # don't allow not string inserts - def f(): - df.loc[100.0, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.loc[100, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.ix[100.0, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.ix[100, :] = df.ix[0] - - self.assertRaises(ValueError, f) - - # allow object conversion here - df = orig.copy() - df.loc['a', :] = df.ix[0] - exp = orig.append(pd.Series(df.ix[0], name='a')) - tm.assert_frame_equal(df, exp) - tm.assert_index_equal(df.index, - pd.Index(orig.index.tolist() + ['a'])) - self.assertEqual(df.index.dtype, 'object') - - def test_partial_set_empty_series(self): - - # GH5226 - - # partially set with an empty object series - s = Series() - s.loc[1] = 1 - tm.assert_series_equal(s, Series([1], index=[1])) - s.loc[3] = 3 - tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) - - s = Series() - s.loc[1] = 1. - tm.assert_series_equal(s, Series([1.], index=[1])) - s.loc[3] = 3. - tm.assert_series_equal(s, Series([1., 3.], index=[1, 3])) - - s = Series() - s.loc['foo'] = 1 - tm.assert_series_equal(s, Series([1], index=['foo'])) - s.loc['bar'] = 3 - tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar'])) - s.loc[3] = 4 - tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3])) - - def test_partial_set_empty_frame(self): - - # partially set with an empty object - # frame - df = DataFrame() - - def f(): - df.loc[1] = 1 - - self.assertRaises(ValueError, f) - - def f(): - df.loc[1] = Series([1], index=['foo']) - - self.assertRaises(ValueError, f) - - def f(): - df.loc[:, 1] = 1 - - self.assertRaises(ValueError, f) - - # these work as they don't really change - # anything but the index - # GH5632 - expected = DataFrame(columns=['foo'], index=pd.Index( - [], dtype='int64')) - - def f(): - df = DataFrame() - df['foo'] = Series([], dtype='object') - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = Series(df.index) - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = df.index - return df - - tm.assert_frame_equal(f(), expected) - - expected = DataFrame(columns=['foo'], - index=pd.Index([], dtype='int64')) - expected['foo'] = expected['foo'].astype('float64') - - def f(): - df = DataFrame() - df['foo'] = [] - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = Series(range(len(df))) - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - tm.assert_index_equal(df.index, pd.Index([], dtype='object')) - df['foo'] = range(len(df)) - return df - - expected = DataFrame(columns=['foo'], - index=pd.Index([], dtype='int64')) - expected['foo'] = expected['foo'].astype('float64') - tm.assert_frame_equal(f(), expected) - - df = DataFrame() - tm.assert_index_equal(df.columns, pd.Index([], dtype=object)) - df2 = DataFrame() - df2[1] = Series([1], index=['foo']) - df.loc[:, 1] = Series([1], index=['foo']) - tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1])) - tm.assert_frame_equal(df, df2) - - # no index to start - expected = DataFrame({0: Series(1, index=range(4))}, - columns=['A', 'B', 0]) - - df = DataFrame(columns=['A', 'B']) - df[0] = Series(1, index=range(4)) - df.dtypes - str(df) - tm.assert_frame_equal(df, expected) - - df = DataFrame(columns=['A', 'B']) - df.loc[:, 0] = Series(1, index=range(4)) - df.dtypes - str(df) - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame_row(self): - # GH5720, GH5744 - # don't create rows when empty - expected = DataFrame(columns=['A', 'B', 'New'], - index=pd.Index([], dtype='int64')) - expected['A'] = expected['A'].astype('int64') - expected['B'] = expected['B'].astype('float64') - expected['New'] = expected['New'].astype('float64') - - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) - y = df[df.A > 5] - y['New'] = np.nan - tm.assert_frame_equal(y, expected) - # tm.assert_frame_equal(y,expected) - - expected = DataFrame(columns=['a', 'b', 'c c', 'd']) - expected['d'] = expected['d'].astype('int64') - df = DataFrame(columns=['a', 'b', 'c c']) - df['d'] = 3 - tm.assert_frame_equal(df, expected) - tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object)) - - # reindex columns is ok - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) - y = df[df.A > 5] - result = y.reindex(columns=['A', 'B', 'C']) - expected = DataFrame(columns=['A', 'B', 'C'], - index=pd.Index([], dtype='int64')) - expected['A'] = expected['A'].astype('int64') - expected['B'] = expected['B'].astype('float64') - expected['C'] = expected['C'].astype('float64') - tm.assert_frame_equal(result, expected) - - def test_partial_set_empty_frame_set_series(self): - # GH 5756 - # setting with empty Series - df = DataFrame(Series()) - tm.assert_frame_equal(df, DataFrame({0: Series()})) - - df = DataFrame(Series(name='foo')) - tm.assert_frame_equal(df, DataFrame({'foo': Series()})) - - def test_partial_set_empty_frame_empty_copy_assignment(self): - # GH 5932 - # copy on empty with assignment fails - df = DataFrame(index=[0]) - df = df.copy() - df['a'] = 0 - expected = DataFrame(0, index=[0], columns=['a']) - tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("index,val", [ + (Index([0, 1, 2]), 2), + (Index([0, 1, '2']), '2'), + (Index([0, 1, 2, np.inf, 4]), 4), + (Index([0, 1, 2, np.nan, 4]), 4), + (Index([0, 1, 2, np.inf]), np.inf), + (Index([0, 1, 2, np.nan]), np.nan), + ]) + def test_index_contains(self, index, val): + assert val in index + + @pytest.mark.parametrize("index,val", [ + (Index([0, 1, 2]), '2'), + (Index([0, 1, '2']), 2), + (Index([0, 1, 2, np.inf]), 4), + (Index([0, 1, 2, np.nan]), 4), + (Index([0, 1, 2, np.inf]), np.nan), + (Index([0, 1, 2, np.nan]), np.inf), + # Checking if np.inf in Int64Index should not cause an OverflowError + # Related to GH 16957 + (pd.Int64Index([0, 1, 2]), np.inf), + (pd.Int64Index([0, 1, 2]), np.nan), + (pd.UInt64Index([0, 1, 2]), np.inf), + (pd.UInt64Index([0, 1, 2]), np.nan), + ]) + def test_index_not_contains(self, index, val): + assert val not in index - def test_partial_set_empty_frame_empty_consistencies(self): - # GH 6171 - # consistency on empty frames - df = DataFrame(columns=['x', 'y']) - df['x'] = [1, 2] - expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) - tm.assert_frame_equal(df, expected, check_dtype=False) + def test_index_type_coercion(self): - df = DataFrame(columns=['x', 'y']) - df['x'] = ['1', '2'] - expected = DataFrame( - dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object) - tm.assert_frame_equal(df, expected) + with catch_warnings(record=True): - df = DataFrame(columns=['x', 'y']) - df.loc[0, 'x'] = 1 - expected = DataFrame(dict(x=[1], y=[np.nan])) - tm.assert_frame_equal(df, expected, check_dtype=False) - - def test_cache_updating(self): - # GH 4939, make sure to update the cache on setitem - - df = tm.makeDataFrame() - df['A'] # cache series - df.ix["Hello Friend"] = df.ix[0] - self.assertIn("Hello Friend", df['A'].index) - self.assertIn("Hello Friend", df['B'].index) - - panel = tm.makePanel() - panel.ix[0] # get first item into cache - panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 - self.assertIn("A+1", panel.ix[0].columns) - self.assertIn("A+1", panel.ix[1].columns) - - # 5216 - # make sure that we don't try to set a dead cache - a = np.random.rand(10, 3) - df = DataFrame(a, columns=['x', 'y', 'z']) - tuples = [(i, j) for i in range(5) for j in range(2)] - index = MultiIndex.from_tuples(tuples) - df.index = index - - # setting via chained assignment - # but actually works, since everything is a view - df.loc[0]['z'].iloc[0] = 1. - result = df.loc[(0, 0), 'z'] - self.assertEqual(result, 1) - - # correct setting - df.loc[(0, 0), 'z'] = 2 - result = df.loc[(0, 0), 'z'] - self.assertEqual(result, 2) - - # 10264 - df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ - 'a', 'b', 'c', 'd', 'e'], index=range(5)) - df['f'] = 0 - df.f.values[3] = 1 + # GH 11836 + # if we have an index type and set it with something that looks + # to numpy like the same, but is actually, not + # (e.g. setting with a float or string '0') + # then we need to coerce to object - # TODO(wesm): unused? - # y = df.iloc[np.arange(2, len(df))] + # integer indexes + for s in [Series(range(5)), + Series(range(5), index=range(1, 6))]: - df.f.values[3] = 2 - expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[ - 'a', 'b', 'c', 'd', 'e', 'f'], index=range(5)) - expected.at[3, 'f'] = 2 - tm.assert_frame_equal(df, expected) - expected = Series([0, 0, 0, 2, 0], name='f') - tm.assert_series_equal(df.f, expected) - - def test_set_ix_out_of_bounds_axis_0(self): - df = pd.DataFrame( - randn(2, 5), index=["row%s" % i for i in range(2)], - columns=["col%s" % i for i in range(5)]) - self.assertRaises(ValueError, df.ix.__setitem__, (2, 0), 100) - - def test_set_ix_out_of_bounds_axis_1(self): - df = pd.DataFrame( - randn(5, 2), index=["row%s" % i for i in range(5)], - columns=["col%s" % i for i in range(2)]) - self.assertRaises(ValueError, df.ix.__setitem__, (0, 2), 100) - - def test_iloc_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - - def test_loc_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - - def test_ix_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0], - check_index_type=True, - check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) + assert s.index.is_integer() - def test_index_type_coercion(self): + for indexer in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: + s2 = s.copy() + indexer(s2)[0.1] = 0 + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 - # GH 11836 - # if we have an index type and set it with something that looks - # to numpy like the same, but is actually, not - # (e.g. setting with a float or string '0') - # then we need to coerce to object + s2 = s.copy() + indexer(s2)[0.0] = 0 + exp = s.index + if 0 not in s: + exp = Index(s.index.tolist() + [0]) + tm.assert_index_equal(s2.index, exp) - # integer indexes - for s in [Series(range(5)), - Series(range(5), index=range(1, 6))]: + s2 = s.copy() + indexer(s2)['0'] = 0 + assert s2.index.is_object() - self.assertTrue(s.index.is_integer()) + for s in [Series(range(5), index=np.arange(5.))]: - for indexer in [lambda x: x.ix, - lambda x: x.loc, - lambda x: x]: - s2 = s.copy() - indexer(s2)[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(indexer(s2)[0.1] == 0) + assert s.index.is_floating() - s2 = s.copy() - indexer(s2)[0.0] = 0 - exp = s.index - if 0 not in s: - exp = Index(s.index.tolist() + [0]) - tm.assert_index_equal(s2.index, exp) + for idxr in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: - s2 = s.copy() - indexer(s2)['0'] = 0 - self.assertTrue(s2.index.is_object()) + s2 = s.copy() + idxr(s2)[0.1] = 0 + assert s2.index.is_floating() + assert idxr(s2)[0.1] == 0 - for s in [Series(range(5), index=np.arange(5.))]: + s2 = s.copy() + idxr(s2)[0.0] = 0 + tm.assert_index_equal(s2.index, s.index) - self.assertTrue(s.index.is_floating()) + s2 = s.copy() + idxr(s2)['0'] = 0 + assert s2.index.is_object() - for idxr in [lambda x: x.ix, - lambda x: x.loc, - lambda x: x]: - s2 = s.copy() - idxr(s2)[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(idxr(s2)[0.1] == 0) +class TestMisc(Base): - s2 = s.copy() - idxr(s2)[0.0] = 0 - tm.assert_index_equal(s2.index, s.index) + def test_indexer_caching(self): + # GH5727 + # make sure that indexers are in the _internal_names_set + n = 1000001 + arrays = [lrange(n), lrange(n)] + index = MultiIndex.from_tuples(lzip(*arrays)) + s = Series(np.zeros(n), index=index) + str(s) - s2 = s.copy() - idxr(s2)['0'] = 0 - self.assertTrue(s2.index.is_object()) + # setitem + expected = Series(np.ones(n), index=index) + s = Series(np.zeros(n), index=index) + s[s == 0] = 1 + tm.assert_series_equal(s, expected) def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) @@ -3143,13 +692,6 @@ def test_float_index_to_mixed(self): 'a': [10] * 10}), df) - def test_duplicate_ix_returns_series(self): - df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2], - columns=list('abc')) - r = df.ix[0.2, 'a'] - e = df.loc[0.2, 'a'] - tm.assert_series_equal(r, e) - def test_float_index_non_scalar_assignment(self): df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.]) df.loc[df.index[:2]] = 1 @@ -3162,11 +704,11 @@ def test_float_index_non_scalar_assignment(self): tm.assert_frame_equal(df, df2) def test_float_index_at_iat(self): - s = pd.Series([1, 2, 3], index=[0.1, 0.2, 0.3]) + s = Series([1, 2, 3], index=[0.1, 0.2, 0.3]) for el, item in s.iteritems(): - self.assertEqual(s.at[el], item) + assert s.at[el] == item for i in range(len(s)): - self.assertEqual(s.iat[i], i + 1) + assert s.iat[i] == i + 1 def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is @@ -3185,20 +727,23 @@ def run_tests(df, rhs, right): tm.assert_frame_equal(left, right) left = df.copy() - left.ix[s, l] = rhs + with catch_warnings(record=True): + left.ix[s, l] = rhs tm.assert_frame_equal(left, right) left = df.copy() - left.ix[i, j] = rhs + with catch_warnings(record=True): + left.ix[i, j] = rhs tm.assert_frame_equal(left, right) left = df.copy() - left.ix[r, c] = rhs + with catch_warnings(record=True): + left.ix[r, c] = rhs tm.assert_frame_equal(left, right) xs = np.arange(20).reshape(5, 4) cols = ['jim', 'joe', 'jolie', 'joline'] - df = pd.DataFrame(xs, columns=cols, index=list('abcde')) + df = DataFrame(xs, columns=cols, index=list('abcde')) # right hand side; permute the indices and multiplpy by -2 rhs = -2 * df.iloc[3:0:-1, 2:0:-1] @@ -3226,7 +771,7 @@ def assert_slices_equivalent(l_slc, i_slc): if not idx.is_integer: # For integer indices, ix and plain getitem are position-based. tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) for idx in [_mklbl('A', 20), np.arange(20) + 100, np.linspace(100, 150, 20)]: @@ -3239,17 +784,19 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s.ix[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: s[::0]) + tm.assert_raises_regex(ValueError, 'slice step cannot be zero', + lambda: s.loc[::0]) + with catch_warnings(record=True): + tm.assert_raises_regex(ValueError, + 'slice step cannot be zero', + lambda: s.ix[::0]) def test_indexing_assignment_dict_already_exists(self): - df = pd.DataFrame({'x': [1, 2, 6], - 'y': [2, 2, 8], - 'z': [-5, 0, 5]}).set_index('z') + df = DataFrame({'x': [1, 2, 6], + 'y': [2, 2, 8], + 'z': [-5, 0, 5]}).set_index('z') expected = df.copy() rhs = dict(x=9, y=99) df.loc[5] = rhs @@ -3259,17 +806,19 @@ def test_indexing_assignment_dict_already_exists(self): def test_indexing_dtypes_on_empty(self): # Check that .iloc and .ix return correct dtypes GH9983 df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']}) - df2 = df.ix[[], :] + with catch_warnings(record=True): + df2 = df.ix[[], :] - self.assertEqual(df2.loc[:, 'a'].dtype, np.int64) + assert df2.loc[:, 'a'].dtype == np.int64 tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0]) - tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) + with catch_warnings(record=True): + tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) def test_range_in_series_indexing(self): # range can cause an indexing error # GH 11652 for x in [5, 999999, 1000000]: - s = pd.Series(index=range(x)) + s = Series(index=range(x)) s.loc[range(1)] = 42 tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) @@ -3277,7 +826,7 @@ def test_range_in_series_indexing(self): tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) def test_non_reducing_slice(self): - df = pd.DataFrame([[0, 1], [2, 3]]) + df = DataFrame([[0, 1], [2, 3]]) slices = [ # pd.IndexSlice[:, :], @@ -3291,35 +840,58 @@ def test_non_reducing_slice(self): slice(None, None, None), [0, 1], np.array([0, 1]), - pd.Series([0, 1]) + Series([0, 1]) ] for slice_ in slices: tslice_ = _non_reducing_slice(slice_) - self.assertTrue(isinstance(df.loc[tslice_], DataFrame)) + assert isinstance(df.loc[tslice_], DataFrame) def test_list_slice(self): # like dataframe getitem - slices = [['A'], pd.Series(['A']), np.array(['A'])] - df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['A', 'B']) + slices = [['A'], Series(['A']), np.array(['A'])] + df = DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['A', 'B']) expected = pd.IndexSlice[:, ['A']] for subset in slices: result = _non_reducing_slice(subset) tm.assert_frame_equal(df.loc[result], df.loc[expected]) def test_maybe_numeric_slice(self): - df = pd.DataFrame({'A': [1, 2], 'B': ['c', 'd'], 'C': [True, False]}) + df = DataFrame({'A': [1, 2], 'B': ['c', 'd'], 'C': [True, False]}) result = _maybe_numeric_slice(df, slice_=None) expected = pd.IndexSlice[:, ['A']] - self.assertEqual(result, expected) + assert result == expected result = _maybe_numeric_slice(df, None, include_bool=True) expected = pd.IndexSlice[:, ['A', 'C']] result = _maybe_numeric_slice(df, [1]) expected = [1] - self.assertEqual(result, expected) + assert result == expected + + def test_partial_boolean_frame_indexing(self): + # GH 17170 + df = DataFrame(np.arange(9.).reshape(3, 3), + index=list('abc'), columns=list('ABC')) + index_df = DataFrame(1, index=list('ab'), columns=list('AB')) + result = df[index_df.notnull()] + expected = DataFrame(np.array([[0., 1., np.nan], + [3., 4., np.nan], + [np.nan] * 3]), + index=list('abc'), + columns=list('ABC')) + tm.assert_frame_equal(result, expected) + + def test_no_reference_cycle(self): + df = DataFrame({'a': [0, 1], 'b': [2, 3]}) + for name in ('loc', 'iloc', 'at', 'iat'): + getattr(df, name) + with catch_warnings(record=True): + getattr(df, 'ix') + wr = weakref.ref(df) + del df + assert wr() is None -class TestSeriesNoneCoercion(tm.TestCase): +class TestSeriesNoneCoercion(object): EXPECTED_RESULTS = [ # For numeric series, we should coerce to NaN. ([1, 2, 3], [np.nan, 2, 3]), @@ -3366,7 +938,7 @@ def test_coercion_with_loc_and_series(self): tm.assert_series_equal(start_series, expected_series) -class TestDataframeNoneCoercion(tm.TestCase): +class TestDataframeNoneCoercion(object): EXPECTED_SINGLE_ROW_RESULTS = [ # For numeric series, we should coerce to NaN. ([1, 2, 3], [np.nan, 2, 3]), diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index 42b50e37f0492..f4d581f450363 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -6,11 +6,12 @@ import pandas as pd from pandas.core.api import Series, DataFrame, MultiIndex import pandas.util.testing as tm +import pytest -class TestIndexingSlow(tm.TestCase): +class TestIndexingSlow(object): - @tm.slow + @pytest.mark.slow def test_multiindex_get_loc(self): # GH7724, GH2646 with warnings.catch_warnings(record=True): @@ -27,10 +28,10 @@ def validate(mi, df, key): mask &= df.iloc[:, i] == k if not mask.any(): - self.assertNotIn(key[:i + 1], mi.index) + assert key[:i + 1] not in mi.index continue - self.assertIn(key[:i + 1], mi.index) + assert key[:i + 1] in mi.index right = df[mask].copy() if i + 1 != len(key): # partial key @@ -69,7 +70,7 @@ def loop(mi, df, keys): keys += list(map(lambda t: t[:-1], vals[::n // m])) # covers both unique index and non-unique index - df = pd.DataFrame(vals, columns=cols) + df = DataFrame(vals, columns=cols) a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) for frame in a, b: @@ -80,7 +81,7 @@ def loop(mi, df, keys): assert not mi.index.lexsort_depth < i loop(mi, df, keys) - @tm.slow + @pytest.mark.slow def test_large_dataframe_indexing(self): # GH10692 result = DataFrame({'x': range(10 ** 6)}, dtype='int64') @@ -88,7 +89,7 @@ def test_large_dataframe_indexing(self): expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64') tm.assert_frame_equal(result, expected) - @tm.slow + @pytest.mark.slow def test_large_mi_dataframe_indexing(self): # GH10645 result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)]) diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py new file mode 100644 index 0000000000000..c84576c984525 --- /dev/null +++ b/pandas/tests/indexing/test_ix.py @@ -0,0 +1,337 @@ +""" test indexing with ix """ + +import pytest + +from warnings import catch_warnings + +import numpy as np +import pandas as pd + +from pandas.core.dtypes.common import is_scalar +from pandas.compat import lrange +from pandas import Series, DataFrame, option_context, MultiIndex +from pandas.util import testing as tm +from pandas.errors import PerformanceWarning + + +class TestIX(object): + + def test_ix_deprecation(self): + # GH 15114 + + df = DataFrame({'A': [1, 2, 3]}) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + df.ix[1, 'A'] + + def test_ix_loc_setitem_consistency(self): + + # GH 5771 + # loc with slice and series + s = Series(0, index=[4, 5, 6]) + s.loc[4:5] += 1 + expected = Series([1, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + # GH 5928 + # chained indexing assignment + df = DataFrame({'a': [0, 1, 2]}) + expected = df.copy() + with catch_warnings(record=True): + expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a'] + + with catch_warnings(record=True): + df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]] + tm.assert_frame_equal(df, expected) + + df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]}) + with catch_warnings(record=True): + df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype( + 'float64') + 0.5 + expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]}) + tm.assert_frame_equal(df, expected) + + # GH 8607 + # ix setitem consistency + df = DataFrame({'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470], + 'timestamp': [1413840976, 1413842580, 1413760580]}) + expected = DataFrame({'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470], + 'timestamp': pd.to_datetime( + [1413840976, 1413842580, 1413760580], + unit='s') + }) + + df2 = df.copy() + df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + df2 = df.copy() + df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + df2 = df.copy() + with catch_warnings(record=True): + df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + def test_ix_loc_consistency(self): + + # GH 8613 + # some edge cases where ix/loc should return the same + # this is not an exhaustive case + + def compare(result, expected): + if is_scalar(expected): + assert result == expected + else: + assert expected.equals(result) + + # failure cases for .loc, but these work for .ix + df = DataFrame(np.random.randn(5, 4), columns=list('ABCD')) + for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), + tuple([slice(0, 2), df.columns[0:2]])]: + + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, + tm.makeTimedeltaIndex]: + df.index = index(len(df.index)) + with catch_warnings(record=True): + df.ix[key] + + pytest.raises(TypeError, lambda: df.loc[key]) + + df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'), + index=pd.date_range('2012-01-01', periods=5)) + + for key in ['2012-01-03', + '2012-01-31', + slice('2012-01-03', '2012-01-03'), + slice('2012-01-03', '2012-01-04'), + slice('2012-01-03', '2012-01-06', 2), + slice('2012-01-03', '2012-01-31'), + tuple([[True, True, True, False, True]]), ]: + + # getitem + + # if the expected raises, then compare the exceptions + try: + with catch_warnings(record=True): + expected = df.ix[key] + except KeyError: + pytest.raises(KeyError, lambda: df.loc[key]) + continue + + result = df.loc[key] + compare(result, expected) + + # setitem + df1 = df.copy() + df2 = df.copy() + + with catch_warnings(record=True): + df1.ix[key] = 10 + df2.loc[key] = 10 + compare(df2, df1) + + # edge cases + s = Series([1, 2, 3, 4], index=list('abde')) + + result1 = s['a':'c'] + with catch_warnings(record=True): + result2 = s.ix['a':'c'] + result3 = s.loc['a':'c'] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + # now work rather than raising KeyError + s = Series(range(5), [-2, -1, 1, 2, 3]) + + with catch_warnings(record=True): + result1 = s.ix[-10:3] + result2 = s.loc[-10:3] + tm.assert_series_equal(result1, result2) + + with catch_warnings(record=True): + result1 = s.ix[0:3] + result2 = s.loc[0:3] + tm.assert_series_equal(result1, result2) + + def test_ix_weird_slicing(self): + # http://stackoverflow.com/q/17056560/1240268 + df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], + 'two': [1, 2, 3, 4, 5]}) + df.loc[df['one'] > 1, 'two'] = -df['two'] + + expected = DataFrame({'one': {0: 1.0, + 1: 2.0, + 2: 3.0, + 3: np.nan, + 4: np.nan}, + 'two': {0: 1, + 1: -2, + 2: -3, + 3: 4, + 4: 5}}) + tm.assert_frame_equal(df, expected) + + def test_ix_general(self): + + # ix general issues + + # GH 2817 + data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} + df = DataFrame(data).set_index(keys=['col', 'year']) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + df.sort_index(inplace=True) + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], + names=['col', 'year']) + expected = DataFrame({'amount': [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) + + def test_ix_assign_column_mixed(self): + # GH #1142 + df = DataFrame(tm.getSeriesData()) + df['foo'] = 'bar' + + orig = df.loc[:, 'B'].copy() + df.loc[:, 'B'] = df.loc[:, 'B'] + 1 + tm.assert_series_equal(df.B, orig + 1) + + # GH 3668, mixed frame with series value + df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'}) + expected = df.copy() + + for i in range(5): + indexer = i * 2 + v = 1000 + i * 200 + expected.loc[indexer, 'y'] = v + assert expected.loc[indexer, 'y'] == v + + df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100 + tm.assert_frame_equal(df, expected) + + # GH 4508, making sure consistency of assignments + df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) + df.loc[[0, 2, ], 'b'] = [100, -100] + expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({'a': lrange(4)}) + df['b'] = np.nan + df.loc[[1, 3], 'b'] = [100, -100] + expected = DataFrame({'a': [0, 1, 2, 3], + 'b': [np.nan, 100, np.nan, -100]}) + tm.assert_frame_equal(df, expected) + + # ok, but chained assignments are dangerous + # if we turn off chained assignment it will work + with option_context('chained_assignment', None): + df = DataFrame({'a': lrange(4)}) + df['b'] = np.nan + df['b'].loc[[1, 3]] = [100, -100] + tm.assert_frame_equal(df, expected) + + def test_ix_get_set_consistency(self): + + # GH 4544 + # ix/loc get/set not consistent when + # a mixed int/string index + df = DataFrame(np.arange(16).reshape((4, 4)), + columns=['a', 'b', 8, 'c'], + index=['e', 7, 'f', 'g']) + + with catch_warnings(record=True): + assert df.ix['e', 8] == 2 + assert df.loc['e', 8] == 2 + + with catch_warnings(record=True): + df.ix['e', 8] = 42 + assert df.ix['e', 8] == 42 + assert df.loc['e', 8] == 42 + + df.loc['e', 8] = 45 + with catch_warnings(record=True): + assert df.ix['e', 8] == 45 + assert df.loc['e', 8] == 45 + + def test_ix_slicing_strings(self): + # see gh-3836 + data = {'Classification': + ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], + 'Random': [1, 2, 3, 4, 5], + 'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']} + df = DataFrame(data) + x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF' + ])] + with catch_warnings(record=True): + df.ix[x.index, 'X'] = df['Classification'] + + expected = DataFrame({'Classification': {0: 'SA EQUITY CFD', + 1: 'bbb', + 2: 'SA EQUITY', + 3: 'SA SSF', + 4: 'aaa'}, + 'Random': {0: 1, + 1: 2, + 2: 3, + 3: 4, + 4: 5}, + 'X': {0: 'correct', + 1: 'bbb', + 2: 'correct', + 3: 'correct', + 4: 'aaa'}}) # bug was 4: 'bbb' + + tm.assert_frame_equal(df, expected) + + def test_ix_setitem_out_of_bounds_axis_0(self): + df = DataFrame( + np.random.randn(2, 5), index=["row%s" % i for i in range(2)], + columns=["col%s" % i for i in range(5)]) + with catch_warnings(record=True): + pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100) + + def test_ix_setitem_out_of_bounds_axis_1(self): + df = DataFrame( + np.random.randn(5, 2), index=["row%s" % i for i in range(5)], + columns=["col%s" % i for i in range(2)]) + with catch_warnings(record=True): + pytest.raises(ValueError, df.ix.__setitem__, (0, 2), 100) + + def test_ix_empty_list_indexer_is_ok(self): + with catch_warnings(record=True): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0], + check_index_type=True, + check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) + + def test_ix_duplicate_returns_series(self): + df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2], + columns=list('abc')) + with catch_warnings(record=True): + r = df.ix[0.2, 'a'] + e = df.loc[0.2, 'a'] + tm.assert_series_equal(r, e) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py new file mode 100644 index 0000000000000..86a5a82441ee8 --- /dev/null +++ b/pandas/tests/indexing/test_loc.py @@ -0,0 +1,754 @@ +""" test label based indexing with loc """ + +import itertools +import pytest + +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas.compat import lrange, StringIO +from pandas import Series, DataFrame, Timestamp, date_range, MultiIndex, Index +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestLoc(Base): + + def test_loc_getitem_dups(self): + # GH 5678 + # repeated gettitems on a dup index returning a ndarray + df = DataFrame( + np.random.random_sample((20, 5)), + index=['ABCDE' [x % 5] for x in range(20)]) + expected = df.loc['A', 0] + result = df.loc[:, 0].loc['A'] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_dups2(self): + + # GH4726 + # dup indexing with iloc/loc + df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]], + columns=['a', 'a', 'a', 'a', 'a'], index=[1]) + expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')], + index=['a', 'a', 'a', 'a', 'a'], name=1) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + result = df.loc[1] + tm.assert_series_equal(result, expected) + + def test_loc_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame( + {'me': list('rttti'), + 'foo': list('aaade'), + 'bar': np.arange(5, dtype='float64') * 1.34 + 2, + 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me') + + indexer = tuple(['r', ['bar', 'bar2']]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + indexer = tuple(['r', 'bar']) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + assert df.loc[indexer] == 2.0 * df_orig.loc[indexer] + + indexer = tuple(['t', ['bar', 'bar2']]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + def test_loc_setitem_slice(self): + # GH10503 + + # assigning the same type should not change the type + df1 = DataFrame({'a': [0, 1, 1], + 'b': Series([100, 200, 300], dtype='uint32')}) + ix = df1['a'] == 1 + newb1 = df1.loc[ix, 'b'] + 1 + df1.loc[ix, 'b'] = newb1 + expected = DataFrame({'a': [0, 1, 1], + 'b': Series([100, 201, 301], dtype='uint32')}) + tm.assert_frame_equal(df1, expected) + + # assigning a new type should get the inferred type + df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, + dtype='uint64') + ix = df1['a'] == 1 + newb2 = df2.loc[ix, 'b'] + df1.loc[ix, 'b'] = newb2 + expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, + dtype='uint64') + tm.assert_frame_equal(df2, expected) + + def test_loc_getitem_int(self): + + # int label + self.check_result('int label', 'loc', 2, 'ix', 2, + typs=['ints', 'uints'], axes=0) + self.check_result('int label', 'loc', 3, 'ix', 3, + typs=['ints', 'uints'], axes=1) + self.check_result('int label', 'loc', 4, 'ix', 4, + typs=['ints', 'uints'], axes=2) + self.check_result('int label', 'loc', 2, 'ix', 2, + typs=['label'], fails=KeyError) + + def test_loc_getitem_label(self): + + # label + self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'], + axes=0) + self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'], + axes=0) + self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0) + self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1, + typs=['ts'], axes=0) + self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'], + fails=KeyError) + + def test_loc_getitem_label_out_of_range(self): + + # out of range label + self.check_result('label range', 'loc', 'f', 'ix', 'f', + typs=['ints', 'uints', 'labels', 'mixed', 'ts'], + fails=KeyError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', + typs=['floats'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, + typs=['ints', 'uints', 'mixed'], fails=KeyError) + self.check_result('label range', 'loc', 20, 'ix', 20, + typs=['labels'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], + axes=0, fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'], + axes=0, fails=TypeError) + + def test_loc_getitem_label_list(self): + + # list of labels + self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4], + typs=['ints', 'uints'], axes=0) + self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9], + typs=['ints', 'uints'], axes=1) + self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12], + typs=['ints', 'uints'], axes=2) + self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix', + ['a', 'b', 'd'], typs=['labels'], axes=0) + self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix', + ['A', 'B', 'C'], typs=['labels'], axes=1) + self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix', + ['Z', 'Y', 'W'], typs=['labels'], axes=2) + self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix', + [2, 8, 'null'], typs=['mixed'], axes=0) + self.check_result('list lbl', 'loc', + [Timestamp('20130102'), Timestamp('20130103')], 'ix', + [Timestamp('20130102'), Timestamp('20130103')], + typs=['ts'], axes=0) + + def test_loc_getitem_label_list_with_missing(self): + self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], + typs=['empty'], fails=KeyError) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], + typs=['ints', 'uints'], axes=0, fails=KeyError) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], + typs=['ints', 'uints'], axes=1, fails=KeyError) + self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], + typs=['ints', 'uints'], axes=2, fails=KeyError) + + def test_getitem_label_list_with_missing(self): + s = Series(range(3), index=['a', 'b', 'c']) + + # consistency + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + s[['a', 'd']] + + s = Series(range(3)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + s[[0, 3]] + + def test_loc_getitem_label_list_fails(self): + # fails + self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], + typs=['ints', 'uints'], axes=1, fails=KeyError) + self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], + typs=['ints', 'uints'], axes=2, fails=KeyError) + + def test_loc_getitem_label_array_like(self): + # array like + self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index, + 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0) + self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index, + 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1) + self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, + 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) + + def test_loc_getitem_bool(self): + # boolean indexers + b = [True, False, True, False] + self.check_result('bool', 'loc', b, 'ix', b, + typs=['ints', 'uints', 'labels', + 'mixed', 'ts', 'floats']) + self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], + fails=KeyError) + + def test_loc_getitem_int_slice(self): + + # ok + self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4], + typs=['ints', 'uints'], axes=0) + self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6], + typs=['ints', 'uints'], axes=1) + self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], + typs=['ints', 'uints'], axes=2) + + # GH 3053 + # loc should treat integer slices like label slices + + index = MultiIndex.from_tuples([t for t in itertools.product( + [6, 7, 8], ['a', 'b'])]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[6:8, :] + expected = df + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_tuples([t + for t in itertools.product( + [10, 20, 30], ['a', 'b'])]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[20:30, :] + expected = df.iloc[2:] + tm.assert_frame_equal(result, expected) + + # doc examples + result = df.loc[10, :] + expected = df.iloc[0:2] + expected.index = ['a', 'b'] + tm.assert_frame_equal(result, expected) + + result = df.loc[:, 10] + # expected = df.ix[:,10] (this fails) + expected = df[10] + tm.assert_frame_equal(result, expected) + + def test_loc_to_fail(self): + + # GH3449 + df = DataFrame(np.random.random((3, 3)), + index=['a', 'b', 'c'], + columns=['e', 'f', 'g']) + + # raise a KeyError? + pytest.raises(KeyError, df.loc.__getitem__, + tuple([[1, 2], [1, 2]])) + + # GH 7496 + # loc should not fallback + + s = Series() + s.loc[1] = 1 + s.loc['a'] = 2 + + pytest.raises(KeyError, lambda: s.loc[-1]) + pytest.raises(KeyError, lambda: s.loc[[-1, -2]]) + + pytest.raises(KeyError, lambda: s.loc[['4']]) + + s.loc[-1] = 3 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s.loc[[-1, -2]] + expected = Series([3, np.nan], index=[-1, -2]) + tm.assert_series_equal(result, expected) + + s['a'] = 2 + pytest.raises(KeyError, lambda: s.loc[[-2]]) + + del s['a'] + + def f(): + s.loc[[-2]] = 0 + + pytest.raises(KeyError, f) + + # inconsistency between .loc[values] and .loc[values,:] + # GH 7999 + df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value']) + + def f(): + df.loc[[3], :] + + pytest.raises(KeyError, f) + + def f(): + df.loc[[3]] + + pytest.raises(KeyError, f) + + def test_loc_getitem_list_with_fail(self): + # 15747 + # should KeyError if *any* missing labels + + s = Series([1, 2, 3]) + + s.loc[[2]] + + with pytest.raises(KeyError): + s.loc[[3]] + + # a non-match and a match + with tm.assert_produces_warning(FutureWarning): + expected = s.loc[[2, 3]] + result = s.reindex([2, 3]) + tm.assert_series_equal(result, expected) + + def test_loc_getitem_label_slice(self): + + # label slices (with ints) + self.check_result('lab slice', 'loc', slice(1, 3), + 'ix', slice(1, 3), + typs=['labels', 'mixed', 'empty', 'ts', 'floats'], + fails=TypeError) + + # real label slices + self.check_result('lab slice', 'loc', slice('a', 'c'), + 'ix', slice('a', 'c'), typs=['labels'], axes=0) + self.check_result('lab slice', 'loc', slice('A', 'C'), + 'ix', slice('A', 'C'), typs=['labels'], axes=1) + self.check_result('lab slice', 'loc', slice('W', 'Z'), + 'ix', slice('W', 'Z'), typs=['labels'], axes=2) + + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=0) + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=1, fails=TypeError) + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=2, fails=TypeError) + + # GH 14316 + self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'), + 'indexer', [0, 1, 2], typs=['ts_rev'], axes=0) + + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=0, fails=TypeError) + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=1, fails=KeyError) + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=2, fails=KeyError) + + self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice( + 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError) + + def test_loc_index(self): + # gh-17131 + # a boolean index should index like a boolean numpy array + + df = DataFrame( + np.random.random(size=(5, 10)), + index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"]) + + mask = df.index.map(lambda x: "alpha" in x) + expected = df.loc[np.array(mask)] + + result = df.loc[mask] + tm.assert_frame_equal(result, expected) + + result = df.loc[mask.values] + tm.assert_frame_equal(result, expected) + + def test_loc_general(self): + + df = DataFrame( + np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'], + index=['A', 'B', 'C', 'D']) + + # want this to work + result = df.loc[:, "A":"B"].iloc[0:2, :] + assert (result.columns == ['A', 'B']).all() + assert (result.index == ['A', 'B']).all() + + # mixed type + result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] + expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0) + tm.assert_series_equal(result, expected) + assert result.dtype == object + + def test_loc_setitem_consistency(self): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + expected = DataFrame({'date': Series(0, index=range(5), + dtype=np.int64), + 'val': Series(range(5), dtype=np.int64)}) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 0 + tm.assert_frame_equal(df, expected) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = np.array(0, dtype=np.int64) + tm.assert_frame_equal(df, expected) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64) + tm.assert_frame_equal(df, expected) + + expected = DataFrame({'date': Series('foo', index=range(5)), + 'val': Series(range(5), dtype=np.int64)}) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 'foo' + tm.assert_frame_equal(df, expected) + + expected = DataFrame({'date': Series(1.0, index=range(5)), + 'val': Series(range(5), dtype=np.int64)}) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 1.0 + tm.assert_frame_equal(df, expected) + + # GH 15494 + # setting on frame with single row + df = DataFrame({'date': Series([Timestamp('20180101')])}) + df.loc[:, 'date'] = 'string' + expected = DataFrame({'date': Series(['string'])}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_empty(self): + # empty (essentially noops) + expected = DataFrame(columns=['x', 'y']) + expected['x'] = expected['x'].astype(np.int64) + df = DataFrame(columns=['x', 'y']) + df.loc[:, 'x'] = 1 + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['x', 'y']) + df['x'] = 1 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_slice_column_len(self): + # .loc[:,column] setting with slice == len of the column + # GH10408 + data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat +Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse +Region,Site,RespondentID,,,,, +Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, +Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes +Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, +Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" + + df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) + df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, ( + 'Respondent', 'StartDate')]) + df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, ( + 'Respondent', 'EndDate')]) + df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( + 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')] + + df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( + 'Respondent', 'Duration')].astype('timedelta64[s]') + expected = Series([1380, 720, 840, 2160.], index=df.index, + name=('Respondent', 'Duration')) + tm.assert_series_equal(df[('Respondent', 'Duration')], expected) + + def test_loc_setitem_frame(self): + df = self.frame_labels + + result = df.iloc[0, 0] + + df.loc['a', 'A'] = 1 + result = df.loc['a', 'A'] + assert result == 1 + + result = df.iloc[0, 0] + assert result == 1 + + df.loc[:, 'B':'D'] = 0 + expected = df.loc[:, 'B':'D'] + result = df.iloc[:, 1:] + tm.assert_frame_equal(result, expected) + + # GH 6254 + # setting issue + df = DataFrame(index=[3, 5, 4], columns=['A']) + df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64') + expected = DataFrame(dict(A=Series( + [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4]) + tm.assert_frame_equal(df, expected) + + # GH 6252 + # setting with an empty frame + keys1 = ['@' + str(i) for i in range(5)] + val1 = np.arange(5, dtype='int64') + + keys2 = ['@' + str(i) for i in range(4)] + val2 = np.arange(4, dtype='int64') + + index = list(set(keys1).union(keys2)) + df = DataFrame(index=index) + df['A'] = np.nan + df.loc[keys1, 'A'] = val1 + + df['B'] = np.nan + df.loc[keys2, 'B'] = val2 + + expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series( + val2, index=keys2))).reindex(index=index) + tm.assert_frame_equal(df, expected) + + # GH 8669 + # invalid coercion of nan -> int + df = DataFrame({'A': [1, 2, 3], 'B': np.nan}) + df.loc[df.B > df.A, 'B'] = df.A + expected = DataFrame({'A': [1, 2, 3], 'B': np.nan}) + tm.assert_frame_equal(df, expected) + + # GH 6546 + # setting with mixed labels + df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']}) + + result = df.loc[0, [1, 2]] + expected = Series([1, 3], index=[1, 2], dtype=object, name=0) + tm.assert_series_equal(result, expected) + + expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']}) + df.loc[0, [1, 2]] = [5, 6] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_multiples(self): + # multiple setting + df = DataFrame({'A': ['foo', 'bar', 'baz'], + 'B': Series( + range(3), dtype=np.int64)}) + rhs = df.loc[1:2] + rhs.index = df.index[0:2] + df.loc[0:1] = rhs + expected = DataFrame({'A': ['bar', 'baz', 'baz'], + 'B': Series( + [1, 2, 2], dtype=np.int64)}) + tm.assert_frame_equal(df, expected) + + # multiple setting with frame on rhs (with M8) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + expected = DataFrame({'date': [Timestamp('20000101'), Timestamp( + '20000102'), Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103')], + 'val': Series( + [0, 1, 0, 1, 2], dtype=np.int64)}) + rhs = df.loc[0:2] + rhs.index = df.index[2:5] + df.loc[2:4] = rhs + tm.assert_frame_equal(df, expected) + + def test_loc_coerceion(self): + + # 12411 + df = DataFrame({'date': [Timestamp('20130101').tz_localize('UTC'), + pd.NaT]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + # 12045 + import datetime + df = DataFrame({'date': [datetime.datetime(2012, 1, 1), + datetime.datetime(1012, 1, 2)]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + # 11594 + df = DataFrame({'text': ['some words'] + [None] * 9}) + expected = df.dtypes + + result = df.iloc[0:2] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[3:] + tm.assert_series_equal(result.dtypes, expected) + + def test_loc_non_unique(self): + # GH3659 + # non-unique indexer with loc slice + # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + + # these are going to raise because the we are non monotonic + df = DataFrame({'A': [1, 2, 3, 4, 5, 6], + 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]) + pytest.raises(KeyError, df.loc.__getitem__, + tuple([slice(1, None)])) + pytest.raises(KeyError, df.loc.__getitem__, + tuple([slice(0, None)])) + pytest.raises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) + + # monotonic are ok + df = DataFrame({'A': [1, 2, 3, 4, 5, 6], + 'B': [3, 4, 5, 6, 7, 8]}, + index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0) + result = df.loc[1:] + expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]}, + index=[1, 1, 2, 3]) + tm.assert_frame_equal(result, expected) + + result = df.loc[0:] + tm.assert_frame_equal(result, df) + + result = df.loc[1:2] + expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]}, + index=[1, 1, 2]) + tm.assert_frame_equal(result, expected) + + def test_loc_non_unique_memory_error(self): + + # GH 4280 + # non_unique index with a large selection triggers a memory error + + columns = list('ABCDEFG') + + def gen_test(l, l2): + return pd.concat([ + DataFrame(np.random.randn(l, len(columns)), + index=lrange(l), columns=columns), + DataFrame(np.ones((l2, len(columns))), + index=[0] * l2, columns=columns)]) + + def gen_expected(df, mask): + l = len(mask) + return pd.concat([df.take([0]), + DataFrame(np.ones((l, len(columns))), + index=[0] * l, + columns=columns), + df.take(mask[1:])]) + + df = gen_test(900, 100) + assert not df.index.is_unique + + mask = np.arange(100) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + df = gen_test(900000, 100000) + assert not df.index.is_unique + + mask = np.arange(100000) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + def test_loc_name(self): + # GH 3880 + df = DataFrame([[1, 1], [1, 1]]) + df.index.name = 'index_name' + result = df.iloc[[0, 1]].index.name + assert result == 'index_name' + + with catch_warnings(record=True): + result = df.ix[[0, 1]].index.name + assert result == 'index_name' + + result = df.loc[[0, 1]].index.name + assert result == 'index_name' + + def test_loc_empty_list_indexer_is_ok(self): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) + + def test_identity_slice_returns_new_object(self): + # GH13873 + original_df = DataFrame({'a': [1, 2, 3]}) + sliced_df = original_df.loc[:] + assert sliced_df is not original_df + assert original_df[:] is not original_df + + # should be a shallow copy + original_df['a'] = [4, 4, 4] + assert (sliced_df['a'] == 4).all() + + # These should not return copies + assert original_df is original_df.loc[:, :] + df = DataFrame(np.random.randn(10, 4)) + assert df[0] is df.loc[:, 0] + + # Same tests for Series + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.loc[:] + assert sliced_series is not original_series + assert original_series[:] is not original_series + + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) + + @pytest.mark.parametrize( + 'indexer_type_1', + (list, tuple, set, slice, np.ndarray, Series, Index)) + @pytest.mark.parametrize( + 'indexer_type_2', + (list, tuple, set, slice, np.ndarray, Series, Index)) + def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2): + # GH #19686 + # .loc should work with nested indexers which can be + # any list-like objects (see `pandas.api.types.is_list_like`) or slices + + def convert_nested_indexer(indexer_type, keys): + if indexer_type == np.ndarray: + return np.array(keys) + if indexer_type == slice: + return slice(*keys) + return indexer_type(keys) + + a = [10, 20, 30] + b = [1, 2, 3] + index = pd.MultiIndex.from_product([a, b]) + df = pd.DataFrame( + np.arange(len(index), dtype='int64'), + index=index, columns=['Data']) + + keys = ([10, 20], [2, 3]) + types = (indexer_type_1, indexer_type_2) + + # check indexers with all the combinations of nested objects + # of all the valid types + indexer = tuple( + convert_nested_indexer(indexer_type, k) + for indexer_type, k in zip(types, keys)) + + result = df.loc[indexer, 'Data'] + expected = pd.Series( + [1, 2, 4, 5], name='Data', + index=pd.MultiIndex.from_product(keys)) + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 1e6ecbbcdc756..43656a392e582 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -5,11 +5,11 @@ from pandas import (Panel, Series, MultiIndex, DataFrame, Timestamp, Index, date_range) from pandas.util import testing as tm -from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.tests.indexing.common import _mklbl -class TestMultiIndexBasic(tm.TestCase): +class TestMultiIndexBasic(object): def test_iloc_getitem_multiindex2(self): # TODO(wesm): fix this @@ -30,7 +30,7 @@ def test_iloc_getitem_multiindex2(self): rs = df.iloc[2, 2] xp = df.values[2, 2] - self.assertEqual(rs, xp) + assert rs == xp # for multiple items # GH 5528 @@ -46,101 +46,106 @@ def test_iloc_getitem_multiindex2(self): tm.assert_frame_equal(rs, xp) def test_setitem_multiindex(self): - for index_fn in ('ix', 'loc'): - - def check(target, indexers, value, compare_fn, expected=None): - fn = getattr(target, index_fn) - fn.__setitem__(indexers, value) - result = fn.__getitem__(indexers) - if expected is None: - expected = value - compare_fn(result, expected) - # GH7190 - index = pd.MultiIndex.from_product([np.arange(0, 100), - np.arange(0, 80)], - names=['time', 'firm']) - t, n = 0, 2 - df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=0, - compare_fn=self.assertEqual) - - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=1, - compare_fn=self.assertEqual) - - df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=2, - compare_fn=self.assertEqual) - - # GH 7218, assinging with 0-dim arrays - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, - indexers=((t, n), 'X'), - value=np.array(3), - compare_fn=self.assertEqual, - expected=3, ) - - # GH5206 - df = pd.DataFrame(np.arange(25).reshape(5, 5), - columns='A,B,C,D,E'.split(','), dtype=float) - df['F'] = 99 - row_selection = df['A'] % 2 == 0 - col_selection = ['B', 'C'] - with catch_warnings(record=True): - df.ix[row_selection, col_selection] = df['F'] - output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) - with catch_warnings(record=True): - tm.assert_frame_equal(df.ix[row_selection, col_selection], - output) - check(target=df, - indexers=(row_selection, col_selection), - value=df['F'], - compare_fn=tm.assert_frame_equal, - expected=output, ) - - # GH11372 - idx = pd.MultiIndex.from_product([ - ['A', 'B', 'C'], - pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) - cols = pd.MultiIndex.from_product([ - ['foo', 'bar'], - pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) - - df = pd.DataFrame(np.random.random((12, 4)), - index=idx, columns=cols) - - subidx = pd.MultiIndex.from_tuples( - [('A', pd.Timestamp('2015-01-01')), - ('A', pd.Timestamp('2015-02-01'))]) - subcols = pd.MultiIndex.from_tuples( - [('foo', pd.Timestamp('2016-01-01')), - ('foo', pd.Timestamp('2016-02-01'))]) - - vals = pd.DataFrame(np.random.random((2, 2)), - index=subidx, columns=subcols) - check(target=df, - indexers=(subidx, subcols), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # set all columns - vals = pd.DataFrame( - np.random.random((2, 4)), index=subidx, columns=cols) - check(target=df, - indexers=(subidx, slice(None, None, None)), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # identity - copy = df.copy() - check(target=df, indexers=(df.index, df.columns), value=df, - compare_fn=tm.assert_frame_equal, expected=copy) + with catch_warnings(record=True): + + for index_fn in ('ix', 'loc'): + + def assert_equal(a, b): + assert a == b + + def check(target, indexers, value, compare_fn, expected=None): + fn = getattr(target, index_fn) + fn.__setitem__(indexers, value) + result = fn.__getitem__(indexers) + if expected is None: + expected = value + compare_fn(result, expected) + # GH7190 + index = MultiIndex.from_product([np.arange(0, 100), + np.arange(0, 80)], + names=['time', 'firm']) + t, n = 0, 2 + df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=0, + compare_fn=assert_equal) + + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=1, + compare_fn=assert_equal) + + df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=2, + compare_fn=assert_equal) + + # gh-7218: assigning with 0-dim arrays + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, + indexers=((t, n), 'X'), + value=np.array(3), + compare_fn=assert_equal, + expected=3, ) + + # GH5206 + df = DataFrame(np.arange(25).reshape(5, 5), + columns='A,B,C,D,E'.split(','), dtype=float) + df['F'] = 99 + row_selection = df['A'] % 2 == 0 + col_selection = ['B', 'C'] + with catch_warnings(record=True): + df.ix[row_selection, col_selection] = df['F'] + output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) + with catch_warnings(record=True): + tm.assert_frame_equal(df.ix[row_selection, col_selection], + output) + check(target=df, + indexers=(row_selection, col_selection), + value=df['F'], + compare_fn=tm.assert_frame_equal, + expected=output, ) + + # GH11372 + idx = MultiIndex.from_product([ + ['A', 'B', 'C'], + date_range('2015-01-01', '2015-04-01', freq='MS')]) + cols = MultiIndex.from_product([ + ['foo', 'bar'], + date_range('2016-01-01', '2016-02-01', freq='MS')]) + + df = DataFrame(np.random.random((12, 4)), + index=idx, columns=cols) + + subidx = MultiIndex.from_tuples( + [('A', Timestamp('2015-01-01')), + ('A', Timestamp('2015-02-01'))]) + subcols = MultiIndex.from_tuples( + [('foo', Timestamp('2016-01-01')), + ('foo', Timestamp('2016-02-01'))]) + + vals = DataFrame(np.random.random((2, 2)), + index=subidx, columns=subcols) + check(target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # set all columns + vals = DataFrame( + np.random.random((2, 4)), index=subidx, columns=cols) + check(target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # identity + copy = df.copy() + check(target=df, indexers=(df.index, df.columns), value=df, + compare_fn=tm.assert_frame_equal, expected=copy) def test_loc_getitem_series(self): # GH14730 @@ -158,12 +163,46 @@ def test_loc_getitem_series(self): result = x.loc[[1, 3]] tm.assert_series_equal(result, expected) + # GH15424 + y1 = Series([1, 3], index=[1, 2]) + result = x.loc[y1] + tm.assert_series_equal(result, expected) + empty = Series(data=[], dtype=np.float64) expected = Series([], index=MultiIndex( levels=index.levels, labels=[[], []], dtype=np.float64)) result = x.loc[empty] tm.assert_series_equal(result, expected) + def test_loc_getitem_array(self): + # GH15434 + # passing an array as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = np.array([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]), + dtype=np.float64) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + # empty array: + empty = np.array([]) + expected = Series([], index=MultiIndex( + levels=index.levels, labels=[[], []], dtype=np.float64)) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + # 0-dim array (scalar): + scalar = np.int64(1) + expected = Series( + data=[0, 1, 2], + index=['A', 'B', 'C'], + dtype=np.float64) + result = x.loc[scalar] + tm.assert_series_equal(result, expected) + def test_iloc_getitem_multiindex(self): mi_labels = DataFrame(np.random.randn(4, 3), columns=[['i', 'i', 'j'], ['A', 'A', 'B']], @@ -179,8 +218,8 @@ def test_iloc_getitem_multiindex(self): with catch_warnings(record=True): xp = mi_int.ix[4].ix[8] tm.assert_series_equal(rs, xp, check_names=False) - self.assertEqual(rs.name, (4, 8)) - self.assertEqual(xp.name, 8) + assert rs.name == (4, 8) + assert xp.name == 8 # 2nd (last) columns rs = mi_int.iloc[:, 2] @@ -192,13 +231,13 @@ def test_iloc_getitem_multiindex(self): rs = mi_int.iloc[2, 2] with catch_warnings(record=True): xp = mi_int.ix[:, 2].ix[2] - self.assertEqual(rs, xp) + assert rs == xp # this is basically regular indexing rs = mi_labels.iloc[2, 2] with catch_warnings(record=True): xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0] - self.assertEqual(rs, xp) + assert rs == xp def test_loc_multiindex(self): @@ -239,6 +278,30 @@ def test_loc_multiindex(self): xp = mi_int.ix[4] tm.assert_frame_equal(rs, xp) + def test_getitem_partial_int(self): + # GH 12416 + # with single item + l1 = [10, 20] + l2 = ['a', 'b'] + df = DataFrame(index=range(2), + columns=MultiIndex.from_product([l1, l2])) + expected = DataFrame(index=range(2), + columns=l2) + result = df[20] + tm.assert_frame_equal(result, expected) + + # with list + expected = DataFrame(index=range(2), + columns=MultiIndex.from_product([l1[1:], l2])) + result = df[[20]] + tm.assert_frame_equal(result, expected) + + # missing item: + with tm.assert_raises_regex(KeyError, '1'): + df[1] + with tm.assert_raises_regex(KeyError, r"'\[1\] not in index'"): + df[[1]] + def test_loc_multiindex_indexer_none(self): # GH6788 @@ -255,8 +318,8 @@ def test_loc_multiindex_indexer_none(self): # GH 7349 # loc with a multi-index seems to be doing fallback df = DataFrame(np.arange(12).reshape(-1, 1), - index=pd.MultiIndex.from_product([[1, 2, 3, 4], - [1, 2, 3]])) + index=MultiIndex.from_product([[1, 2, 3, 4], + [1, 2, 3]])) expected = df.loc[([1, 2], ), :] result = df.loc[[1, 2]] @@ -266,8 +329,8 @@ def test_loc_multiindex_incomplete(self): # GH 7399 # incomplete indexers - s = pd.Series(np.arange(15, dtype='int64'), - MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + s = Series(np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) expected = s.loc[:, 'a':'c'] result = s.loc[0:4, 'a':'c'] @@ -284,8 +347,8 @@ def test_loc_multiindex_incomplete(self): # GH 7400 # multiindexer gettitem with list of indexers skips wrong element - s = pd.Series(np.arange(15, dtype='int64'), - MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + s = Series(np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) expected = s.iloc[[6, 7, 8, 12, 13, 14]] result = s.loc[2:4:2, 'a':'c'] tm.assert_series_equal(result, expected) @@ -373,9 +436,8 @@ def test_multiindex_setitem(self): np.array(['one', 'two', 'one', 'one', 'two', 'one']), np.arange(0, 6, 1)] - df_orig = pd.DataFrame(np.random.randn(6, 3), - index=arrays, - columns=['A', 'B', 'C']).sort_index() + df_orig = DataFrame(np.random.randn(6, 3), index=arrays, + columns=['A', 'B', 'C']).sort_index() expected = df_orig.loc[['bar']] * 2 df = df_orig.copy() @@ -386,7 +448,7 @@ def test_multiindex_setitem(self): def f(): df.loc['bar'] *= 2 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) # from SO # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation @@ -413,9 +475,10 @@ def f(): df.loc[idx[:, :, 'Stock'], 'price'] *= 2 tm.assert_frame_equal(df, expected) - def test_getitem_multiindex(self): + def test_getitem_duplicates_multiindex(self): # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise # the appropriate error, only in PY3 of course! + index = MultiIndex(levels=[['D', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], @@ -431,12 +494,12 @@ def test_getitem_multiindex(self): def f(): df.val['A'] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) def f(): df.val['X'] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) # A is treated as a special Timestamp index = MultiIndex(levels=[['A', 'B', 'C'], @@ -453,19 +516,19 @@ def f(): def f(): df.val['X'] - self.assertRaises(KeyError, f) + pytest.raises(KeyError, f) # GH 7866 # multi-index slicing with missing indexers - idx = pd.MultiIndex.from_product([['A', 'B', 'C'], - ['foo', 'bar', 'baz']], - names=['one', 'two']) - s = pd.Series(np.arange(9, dtype='int64'), index=idx).sort_index() + idx = MultiIndex.from_product([['A', 'B', 'C'], + ['foo', 'bar', 'baz']], + names=['one', 'two']) + s = Series(np.arange(9, dtype='int64'), index=idx).sort_index() - exp_idx = pd.MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], - names=['one', 'two']) - expected = pd.Series(np.arange(3, dtype='int64'), - index=exp_idx).sort_index() + exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], + names=['one', 'two']) + expected = Series(np.arange(3, dtype='int64'), + index=exp_idx).sort_index() result = s.loc[['A']] tm.assert_series_equal(result, expected) @@ -473,7 +536,7 @@ def f(): tm.assert_series_equal(result, expected) # not any values found - self.assertRaises(KeyError, lambda: s.loc[['D']]) + pytest.raises(KeyError, lambda: s.loc[['D']]) # empty ok result = s.loc[[]] @@ -481,7 +544,7 @@ def f(): tm.assert_series_equal(result, expected) idx = pd.IndexSlice - expected = pd.Series([0, 3, 6], index=pd.MultiIndex.from_product( + expected = Series([0, 3, 6], index=MultiIndex.from_product( [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() result = s.loc[idx[:, ['foo']]] @@ -491,8 +554,8 @@ def f(): # GH 8737 # empty indexer - multi_index = pd.MultiIndex.from_product((['foo', 'bar', 'baz'], - ['alpha', 'beta'])) + multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], + ['alpha', 'beta'])) df = DataFrame( np.random.randn(5, 6), index=range(5), columns=multi_index) df = df.sort_index(level=0, axis=1) @@ -511,7 +574,7 @@ def f(): ('functs', 'median')]), index=['function', 'name']) result = df.loc['function', ('functs', 'mean')] - self.assertEqual(result, np.mean) + assert result == np.mean def test_multiindex_assignment(self): @@ -524,34 +587,39 @@ def test_multiindex_assignment(self): df['d'] = np.nan arr = np.array([0., 1.]) - df.ix[4, 'd'] = arr - tm.assert_series_equal(df.ix[4, 'd'], - Series(arr, index=[8, 10], name='d')) + with catch_warnings(record=True): + df.ix[4, 'd'] = arr + tm.assert_series_equal(df.ix[4, 'd'], + Series(arr, index=[8, 10], name='d')) # single dtype df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), columns=list('abc'), index=[[4, 4, 8], [8, 10, 12]]) - df.ix[4, 'c'] = arr - exp = Series(arr, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) + with catch_warnings(record=True): + df.ix[4, 'c'] = arr + exp = Series(arr, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) # scalar ok - df.ix[4, 'c'] = 10 - exp = Series(10, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) + with catch_warnings(record=True): + df.ix[4, 'c'] = 10 + exp = Series(10, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) # invalid assignments def f(): - df.ix[4, 'c'] = [0, 1, 2, 3] + with catch_warnings(record=True): + df.ix[4, 'c'] = [0, 1, 2, 3] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): - df.ix[4, 'c'] = [0] + with catch_warnings(record=True): + df.ix[4, 'c'] = [0] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # groupby example NUM_ROWS = 100 @@ -579,7 +647,8 @@ def f(name, df2): # but in this case, that's ok for name, df2 in grp: new_vals = np.arange(df2.shape[0]) - df.ix[name, 'new_col'] = new_vals + with catch_warnings(record=True): + df.ix[name, 'new_col'] = new_vals def test_multiindex_label_slicing_with_negative_step(self): s = Series(np.arange(20), @@ -589,7 +658,8 @@ def test_multiindex_label_slicing_with_negative_step(self): def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) + with catch_warnings(record=True): + tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) assert_slices_equivalent(SLC[::-1], SLC[::-1]) @@ -612,22 +682,51 @@ def assert_slices_equivalent(l_slc, i_slc): def test_multiindex_slice_first_level(self): # GH 12697 freq = ['a', 'b', 'c', 'd'] - idx = pd.MultiIndex.from_product([freq, np.arange(500)]) - df = pd.DataFrame(list(range(2000)), index=idx, columns=['Test']) + idx = MultiIndex.from_product([freq, np.arange(500)]) + df = DataFrame(list(range(2000)), index=idx, columns=['Test']) df_slice = df.loc[pd.IndexSlice[:, 30:70], :] result = df_slice.loc['a'] - expected = pd.DataFrame(list(range(30, 71)), - columns=['Test'], - index=range(30, 71)) + expected = DataFrame(list(range(30, 71)), + columns=['Test'], index=range(30, 71)) tm.assert_frame_equal(result, expected) result = df_slice.loc['d'] - expected = pd.DataFrame(list(range(1530, 1571)), - columns=['Test'], - index=range(30, 71)) + expected = DataFrame(list(range(1530, 1571)), + columns=['Test'], index=range(30, 71)) tm.assert_frame_equal(result, expected) + def test_multiindex_symmetric_difference(self): + # GH 13490 + idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']], + names=['a', 'b']) + result = idx ^ idx + assert result.names == idx.names + + idx2 = idx.copy().rename(['A', 'B']) + result = idx ^ idx2 + assert result.names == [None, None] + + def test_multiindex_contains_dropped(self): + # GH 19027 + # test that dropped MultiIndex levels are not in the MultiIndex + # despite continuing to be in the MultiIndex's levels + idx = MultiIndex.from_product([[1, 2], [3, 4]]) + assert 2 in idx + idx = idx.drop(2) -class TestMultiIndexSlicers(tm.TestCase): + # drop implementation keeps 2 in the levels + assert 2 in idx.levels[0] + # but it should no longer be in the index itself + assert 2 not in idx + + # also applies to strings + idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) + assert 'a' in idx + idx = idx.drop('a') + assert 'a' in idx.levels[0] + assert 'a' not in idx + + +class TestMultiIndexSlicers(object): def test_per_axis_per_level_getitem(self): @@ -718,26 +817,30 @@ def test_per_axis_per_level_getitem(self): def f(): df.loc[(slice(None), np.array([True, False])), :] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # ambiguous cases # these can be multiply interpreted (e.g. in this case # as df.loc[slice(None),[1]] as well - self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) + pytest.raises(KeyError, lambda: df.loc[slice(None), [1]]) result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # not lexsorted - self.assertEqual(df.index.lexsort_depth, 2) + assert df.index.lexsort_depth == 2 df = df.sort_index(level=1, axis=0) - self.assertEqual(df.index.lexsort_depth, 0) - with tm.assertRaisesRegexp( + assert df.index.lexsort_depth == 0 + with tm.assert_raises_regex( UnsortedIndexError, - 'MultiIndex Slicing requires the index to be fully ' - r'lexsorted tuple len \(2\), lexsort depth \(0\)'): - df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + 'MultiIndex slicing requires the index to be ' + r'lexsorted: slicing on levels \[1\], lexsort depth 0'): + df.loc[(slice(None), slice('bar')), :] + + # GH 16734: not sorted, but no real slicing + result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + tm.assert_frame_equal(result, df.iloc[[1, 3], :]) def test_multiindex_slicers_non_unique(self): @@ -748,7 +851,7 @@ def test_multiindex_slicers_non_unique(self): C=[1, 2, 1, 3], D=[1, 2, 3, 4])) .set_index(['A', 'B', 'C']).sort_index()) - self.assertFalse(df.index.is_unique) + assert not df.index.is_unique expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], C=[1, 1], D=[1, 3])) .set_index(['A', 'B', 'C']).sort_index()) @@ -764,12 +867,12 @@ def test_multiindex_slicers_non_unique(self): C=[1, 2, 1, 2], D=[1, 2, 3, 4])) .set_index(['A', 'B', 'C']).sort_index()) - self.assertFalse(df.index.is_unique) + assert not df.index.is_unique expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], C=[1, 1], D=[1, 3])) .set_index(['A', 'B', 'C']).sort_index()) result = df.loc[(slice(None), slice(None), 1), :] - self.assertFalse(result.index.is_unique) + assert not result.index.is_unique tm.assert_frame_equal(result, expected) # GH12896 @@ -919,9 +1022,14 @@ def test_per_axis_per_level_doc_examples(self): # not sorted def f(): - df.loc['A1', (slice(None), 'foo')] + df.loc['A1', ('a', slice('foo'))] + + pytest.raises(UnsortedIndexError, f) + + # GH 16734: not sorted, but no real slicing + tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')], + df.loc['A1'].iloc[:, [0, 2]]) - self.assertRaises(UnsortedIndexError, f) df = df.sort_index(axis=1) # slicing @@ -970,17 +1078,17 @@ def test_loc_axis_arguments(self): def f(): df.loc(axis=-1)[:, :, ['C1', 'C3']] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): df.loc(axis=2)[:, :, ['C1', 'C3']] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): df.loc(axis='foo')[:, :, ['C1', 'C3']] - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_per_axis_per_level_setitem(self): @@ -1085,13 +1193,13 @@ def f(): df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( [[100], [100, 100]], dtype='int64') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def f(): df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( [100, 100, 100, 100], dtype='int64') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # with an alignable rhs df = df_orig.copy() @@ -1117,90 +1225,101 @@ def f(): tm.assert_frame_equal(df, expected) -class TestMultiIndexPanel(tm.TestCase): +class TestMultiIndexPanel(object): def test_iloc_getitem_panel_multiindex(self): - # GH 7199 - # Panel with multi-index - multi_index = pd.MultiIndex.from_tuples([('ONE', 'one'), - ('TWO', 'two'), - ('THREE', 'three')], - names=['UPPER', 'lower']) - - simple_index = [x[0] for x in multi_index] - wd1 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], - minor_axis=multi_index) - - wd2 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], - minor_axis=simple_index) - - expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] - result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG - tm.assert_frame_equal(result1, expected1) - - expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] - result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] - tm.assert_frame_equal(result2, expected2) - - expected1 = DataFrame(index=['a'], columns=multi_index, - dtype='float64') - result1 = wd1.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result1, expected1) - - expected2 = DataFrame(index=['a'], columns=simple_index, - dtype='float64') - result2 = wd2.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result2, expected2) - - # GH 7516 - mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) - p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), - items=['a', 'b', 'c'], major_axis=mi, - minor_axis=['u', 'v', 'w']) - result = p.iloc[:, 1, 0] - expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') - tm.assert_series_equal(result, expected) - result = p.loc[:, (1, 'y'), 'u'] - tm.assert_series_equal(result, expected) + with catch_warnings(record=True): + + # GH 7199 + # Panel with multi-index + multi_index = MultiIndex.from_tuples([('ONE', 'one'), + ('TWO', 'two'), + ('THREE', 'three')], + names=['UPPER', 'lower']) + + simple_index = [x[0] for x in multi_index] + wd1 = Panel(items=['First', 'Second'], + major_axis=['a', 'b', 'c', 'd'], + minor_axis=multi_index) + + wd2 = Panel(items=['First', 'Second'], + major_axis=['a', 'b', 'c', 'd'], + minor_axis=simple_index) + + expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] + result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG + tm.assert_frame_equal(result1, expected1) + + expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] + result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] + tm.assert_frame_equal(result2, expected2) + + expected1 = DataFrame(index=['a'], columns=multi_index, + dtype='float64') + result1 = wd1.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result1, expected1) + + expected2 = DataFrame(index=['a'], columns=simple_index, + dtype='float64') + result2 = wd2.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result2, expected2) + + # GH 7516 + mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) + p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), + items=['a', 'b', 'c'], major_axis=mi, + minor_axis=['u', 'v', 'w']) + result = p.iloc[:, 1, 0] + expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') + tm.assert_series_equal(result, expected) + + result = p.loc[:, (1, 'y'), 'u'] + tm.assert_series_equal(result, expected) def test_panel_setitem_with_multiindex(self): - # 10360 - # failing with a multi-index - arr = np.array([[[1, 2, 3], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]], - dtype=np.float64) - - # reg index - axes = dict(items=['A', 'B'], major_axis=[0, 1], - minor_axis=['X', 'Y', 'Z']) - p1 = Panel(0., **axes) - p1.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p1, expected) - - # multi-indexes - axes['items'] = pd.MultiIndex.from_tuples([('A', 'a'), ('B', 'b')]) - p2 = Panel(0., **axes) - p2.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p2, expected) - - axes['major_axis'] = pd.MultiIndex.from_tuples([('A', 1), ('A', 2)]) - p3 = Panel(0., **axes) - p3.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p3, expected) - - axes['minor_axis'] = pd.MultiIndex.from_product([['X'], range(3)]) - p4 = Panel(0., **axes) - p4.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p4, expected) - - arr = np.array( - [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], dtype=np.float64) - p5 = Panel(0., **axes) - p5.iloc[0, :, 0] = [1, 2] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p5, expected) + with catch_warnings(record=True): + # 10360 + # failing with a multi-index + arr = np.array([[[1, 2, 3], [0, 0, 0]], + [[0, 0, 0], [0, 0, 0]]], + dtype=np.float64) + + # reg index + axes = dict(items=['A', 'B'], major_axis=[0, 1], + minor_axis=['X', 'Y', 'Z']) + p1 = Panel(0., **axes) + p1.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p1, expected) + + # multi-indexes + axes['items'] = MultiIndex.from_tuples( + [('A', 'a'), ('B', 'b')]) + p2 = Panel(0., **axes) + p2.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p2, expected) + + axes['major_axis'] = MultiIndex.from_tuples( + [('A', 1), ('A', 2)]) + p3 = Panel(0., **axes) + p3.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p3, expected) + + axes['minor_axis'] = MultiIndex.from_product( + [['X'], range(3)]) + p4 = Panel(0., **axes) + p4.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p4, expected) + + arr = np.array( + [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], + dtype=np.float64) + p5 = Panel(0., **axes) + p5.iloc[0, :, 0] = [1, 2] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p5, expected) diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index 5ec3076af599a..c4f7bd28e4d90 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -1,209 +1,219 @@ +import pytest +from warnings import catch_warnings + import numpy as np from pandas.util import testing as tm from pandas import Panel, date_range, DataFrame -class TestPanel(tm.TestCase): +class TestPanel(object): def test_iloc_getitem_panel(self): - # GH 7189 - p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), - items=['A', 'B', 'C', 'D'], - major_axis=['a', 'b', 'c'], - minor_axis=['one', 'two']) + with catch_warnings(record=True): + # GH 7189 + p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), + items=['A', 'B', 'C', 'D'], + major_axis=['a', 'b', 'c'], + minor_axis=['one', 'two']) - result = p.iloc[1] - expected = p.loc['B'] - tm.assert_frame_equal(result, expected) + result = p.iloc[1] + expected = p.loc['B'] + tm.assert_frame_equal(result, expected) - result = p.iloc[1, 1] - expected = p.loc['B', 'b'] - tm.assert_series_equal(result, expected) + result = p.iloc[1, 1] + expected = p.loc['B', 'b'] + tm.assert_series_equal(result, expected) - result = p.iloc[1, 1, 1] - expected = p.loc['B', 'b', 'two'] - self.assertEqual(result, expected) + result = p.iloc[1, 1, 1] + expected = p.loc['B', 'b', 'two'] + assert result == expected - # slice - result = p.iloc[1:3] - expected = p.loc[['B', 'C']] - tm.assert_panel_equal(result, expected) + # slice + result = p.iloc[1:3] + expected = p.loc[['B', 'C']] + tm.assert_panel_equal(result, expected) - result = p.iloc[:, 0:2] - expected = p.loc[:, ['a', 'b']] - tm.assert_panel_equal(result, expected) + result = p.iloc[:, 0:2] + expected = p.loc[:, ['a', 'b']] + tm.assert_panel_equal(result, expected) - # list of integers - result = p.iloc[[0, 2]] - expected = p.loc[['A', 'C']] - tm.assert_panel_equal(result, expected) + # list of integers + result = p.iloc[[0, 2]] + expected = p.loc[['A', 'C']] + tm.assert_panel_equal(result, expected) - # neg indicies - result = p.iloc[[-1, 1], [-1, 1]] - expected = p.loc[['D', 'B'], ['c', 'b']] - tm.assert_panel_equal(result, expected) + # neg indicies + result = p.iloc[[-1, 1], [-1, 1]] + expected = p.loc[['D', 'B'], ['c', 'b']] + tm.assert_panel_equal(result, expected) - # dups indicies - result = p.iloc[[-1, -1, 1], [-1, 1]] - expected = p.loc[['D', 'D', 'B'], ['c', 'b']] - tm.assert_panel_equal(result, expected) + # dups indicies + result = p.iloc[[-1, -1, 1], [-1, 1]] + expected = p.loc[['D', 'D', 'B'], ['c', 'b']] + tm.assert_panel_equal(result, expected) - # combined - result = p.iloc[0, [True, True], [0, 1]] - expected = p.loc['A', ['a', 'b'], ['one', 'two']] - tm.assert_frame_equal(result, expected) + # combined + result = p.iloc[0, [True, True], [0, 1]] + expected = p.loc['A', ['a', 'b'], ['one', 'two']] + tm.assert_frame_equal(result, expected) - # out-of-bounds exception - self.assertRaises(IndexError, p.iloc.__getitem__, tuple([10, 5])) + # out-of-bounds exception + with pytest.raises(IndexError): + p.iloc[tuple([10, 5])] - def f(): - p.iloc[0, [True, True], [0, 1, 2]] + def f(): + p.iloc[0, [True, True], [0, 1, 2]] - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) - # trying to use a label - self.assertRaises(ValueError, p.iloc.__getitem__, tuple(['j', 'D'])) + # trying to use a label + with pytest.raises(ValueError): + p.iloc[tuple(['j', 'D'])] - # GH - p = Panel( - np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], - major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) - expected = p['A'] + # GH + p = Panel( + np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], + major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) + expected = p['A'] - result = p.iloc[0, :, :] - tm.assert_frame_equal(result, expected) + result = p.iloc[0, :, :] + tm.assert_frame_equal(result, expected) - result = p.iloc[0, [True, True, True], :] - tm.assert_frame_equal(result, expected) + result = p.iloc[0, [True, True, True], :] + tm.assert_frame_equal(result, expected) - result = p.iloc[0, [True, True, True], [0, 1]] - tm.assert_frame_equal(result, expected) + result = p.iloc[0, [True, True, True], [0, 1]] + tm.assert_frame_equal(result, expected) - def f(): - p.iloc[0, [True, True, True], [0, 1, 2]] + def f(): + p.iloc[0, [True, True, True], [0, 1, 2]] - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) - def f(): - p.iloc[0, [True, True, True], [2]] + def f(): + p.iloc[0, [True, True, True], [2]] - self.assertRaises(IndexError, f) + pytest.raises(IndexError, f) def test_iloc_panel_issue(self): - # GH 3617 - p = Panel(np.random.randn(4, 4, 4)) + with catch_warnings(record=True): + # see gh-3617 + p = Panel(np.random.randn(4, 4, 4)) - self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) - self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) - self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) - self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) - self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) + assert p.iloc[:3, :3, :3].shape == (3, 3, 3) + assert p.iloc[1, :3, :3].shape == (3, 3) + assert p.iloc[:3, 1, :3].shape == (3, 3) + assert p.iloc[:3, :3, 1].shape == (3, 3) + assert p.iloc[1, 1, :3].shape == (3, ) + assert p.iloc[1, :3, 1].shape == (3, ) + assert p.iloc[:3, 1, 1].shape == (3, ) def test_panel_getitem(self): - # GH4016, date selection returns a frame when a partial string - # selection - ind = date_range(start="2000", freq="D", periods=1000) - df = DataFrame( - np.random.randn( - len(ind), 5), index=ind, columns=list('ABCDE')) - panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) - test2 = panel.ix[:, "2002":"2002-12-31"] - test1 = panel.ix[:, "2002"] - tm.assert_panel_equal(test1, test2) + with catch_warnings(record=True): + # GH4016, date selection returns a frame when a partial string + # selection + ind = date_range(start="2000", freq="D", periods=1000) + df = DataFrame( + np.random.randn( + len(ind), 5), index=ind, columns=list('ABCDE')) + panel = Panel({'frame_' + c: df for c in list('ABC')}) - # GH8710 - # multi-element getting with a list - panel = tm.makePanel() + test2 = panel.loc[:, "2002":"2002-12-31"] + test1 = panel.loc[:, "2002"] + tm.assert_panel_equal(test1, test2) - expected = panel.iloc[[0, 1]] + # GH8710 + # multi-element getting with a list + panel = tm.makePanel() - result = panel.loc[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) + expected = panel.iloc[[0, 1]] - result = panel.loc[['ItemA', 'ItemB'], :, :] - tm.assert_panel_equal(result, expected) + result = panel.loc[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) - result = panel[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) + result = panel.loc[['ItemA', 'ItemB'], :, :] + tm.assert_panel_equal(result, expected) - result = panel.loc['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) + result = panel[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) - result = panel.ix['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) + result = panel.loc['ItemA':'ItemB'] + tm.assert_panel_equal(result, expected) - result = panel.ix[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) + with catch_warnings(record=True): + result = panel.ix[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) - # with an object-like - # GH 9140 - class TestObject: + # with an object-like + # GH 9140 + class TestObject: - def __str__(self): - return "TestObject" + def __str__(self): + return "TestObject" - obj = TestObject() + obj = TestObject() - p = Panel(np.random.randn(1, 5, 4), items=[obj], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) + p = Panel(np.random.randn(1, 5, 4), items=[obj], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) - expected = p.iloc[0] - result = p[obj] - tm.assert_frame_equal(result, expected) + expected = p.iloc[0] + result = p[obj] + tm.assert_frame_equal(result, expected) def test_panel_setitem(self): - # GH 7763 - # loc and setitem have setting differences - np.random.seed(0) - index = range(3) - columns = list('abc') + with catch_warnings(record=True): + # GH 7763 + # loc and setitem have setting differences + np.random.seed(0) + index = range(3) + columns = list('abc') + + panel = Panel({'A': DataFrame(np.random.randn(3, 3), + index=index, columns=columns), + 'B': DataFrame(np.random.randn(3, 3), + index=index, columns=columns), + 'C': DataFrame(np.random.randn(3, 3), + index=index, columns=columns)}) + + replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) + expected = Panel({'A': replace, 'B': replace, 'C': replace}) + + p = panel.copy() + for idx in list('ABC'): + p[idx] = replace + tm.assert_panel_equal(p, expected) + + p = panel.copy() + for idx in list('ABC'): + p.loc[idx, :, :] = replace + tm.assert_panel_equal(p, expected) + + def test_panel_assignment(self): - panel = Panel({'A': DataFrame(np.random.randn(3, 3), - index=index, columns=columns), - 'B': DataFrame(np.random.randn(3, 3), - index=index, columns=columns), - 'C': DataFrame(np.random.randn(3, 3), - index=index, columns=columns)}) + with catch_warnings(record=True): + # GH3777 + wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) - replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) - expected = Panel({'A': replace, 'B': replace, 'C': replace}) + # TODO: unused? + # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - p = panel.copy() - for idx in list('ABC'): - p[idx] = replace - tm.assert_panel_equal(p, expected) + def f(): + wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ + ['Item1', 'Item2'], :, ['A', 'B']] - p = panel.copy() - for idx in list('ABC'): - p.loc[idx, :, :] = replace - tm.assert_panel_equal(p, expected) + pytest.raises(NotImplementedError, f) - def test_panel_assignment(self): - # GH3777 - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - - # TODO: unused? - # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - - def f(): - wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ - ['Item1', 'Item2'], :, ['A', 'B']] - - self.assertRaises(NotImplementedError, f) - - # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] - # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign - # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - # tm.assert_panel_equal(result,expected) + # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] + # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign + # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + # tm.assert_panel_equal(result,expected) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py new file mode 100644 index 0000000000000..f95f493c66043 --- /dev/null +++ b/pandas/tests/indexing/test_partial.py @@ -0,0 +1,639 @@ +""" +test setting *parts* of objects both positionally and label based + +TOD: these should be split among the indexer tests +""" + +import pytest + +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas import Series, DataFrame, Panel, Index, date_range +from pandas.util import testing as tm + + +class TestPartialSetting(object): + + def test_partial_setting(self): + + # GH2578, allow ix and friends to partially set + + # series + s_orig = Series([1, 2, 3]) + + s = s_orig.copy() + s[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s[5] = 5. + expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5. + expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + # iloc/iat raise + s = s_orig.copy() + + def f(): + s.iloc[3] = 5. + + pytest.raises(IndexError, f) + + def f(): + s.iat[3] = 5. + + pytest.raises(IndexError, f) + + # ## frame ## + + df_orig = DataFrame( + np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64') + + # iloc/iat raise + df = df_orig.copy() + + def f(): + df.iloc[4, 2] = 5. + + pytest.raises(IndexError, f) + + def f(): + df.iat[4, 2] = 5. + + pytest.raises(IndexError, f) + + # row setting where it exists + expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) + df = df_orig.copy() + df.iloc[1] = df.iloc[2] + tm.assert_frame_equal(df, expected) + + expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) + df = df_orig.copy() + df.loc[1] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # like 2578, partial setting with dtype preservation + expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]})) + df = df_orig.copy() + df.loc[3] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # single dtype frame, overwrite + expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]})) + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'B'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # mixed dtype frame, overwrite + expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])})) + df = df_orig.copy() + df['B'] = df['B'].astype(np.float64) + with catch_warnings(record=True): + df.ix[:, 'B'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # single dtype frame, partial setting + expected = df_orig.copy() + expected['C'] = df['A'] + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'C'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # mixed frame, partial setting + expected = df_orig.copy() + expected['C'] = df['A'] + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'C'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + with catch_warnings(record=True): + # ## panel ## + p_orig = Panel(np.arange(16).reshape(2, 4, 2), + items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') + + # panel setting via item + p_orig = Panel(np.arange(16).reshape(2, 4, 2), + items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') + expected = p_orig.copy() + expected['Item3'] = expected['Item1'] + p = p_orig.copy() + p.loc['Item3'] = p['Item1'] + tm.assert_panel_equal(p, expected) + + # panel with aligned series + expected = p_orig.copy() + expected = expected.transpose(2, 1, 0) + expected['C'] = DataFrame({'Item1': [30, 30, 30, 30], + 'Item2': [32, 32, 32, 32]}, + index=p_orig.major_axis) + expected = expected.transpose(2, 1, 0) + p = p_orig.copy() + p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) + tm.assert_panel_equal(p, expected) + + # GH 8473 + dates = date_range('1/1/2000', periods=8) + df_orig = DataFrame(np.random.randn(8, 4), index=dates, + columns=['A', 'B', 'C', 'D']) + + expected = pd.concat([df_orig, DataFrame( + {'A': 7}, index=[dates[-1] + 1])]) + df = df_orig.copy() + df.loc[dates[-1] + 1, 'A'] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + 1, 'A'] = 7 + tm.assert_frame_equal(df, expected) + + exp_other = DataFrame({0: 7}, index=[dates[-1] + 1]) + expected = pd.concat([df_orig, exp_other], axis=1) + + df = df_orig.copy() + df.loc[dates[-1] + 1, 0] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + 1, 0] = 7 + tm.assert_frame_equal(df, expected) + + def test_partial_setting_mixed_dtype(self): + + # in a mixed dtype environment, try to preserve dtypes + # by appending + df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) + + s = df.loc[1].copy() + s.name = 2 + expected = df.append(s) + + df.loc[2] = df.loc[1] + tm.assert_frame_equal(df, expected) + + # columns will align + df = DataFrame(columns=['A', 'B']) + df.loc[0] = Series(1, index=range(4)) + tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0])) + + # columns will align + df = DataFrame(columns=['A', 'B']) + df.loc[0] = Series(1, index=['B']) + + exp = DataFrame([[np.nan, 1]], columns=['A', 'B'], + index=[0], dtype='float64') + tm.assert_frame_equal(df, exp) + + # list-like must conform + df = DataFrame(columns=['A', 'B']) + + def f(): + df.loc[0] = [1, 2, 3] + + pytest.raises(ValueError, f) + + # TODO: #15657, these are left as object and not coerced + df = DataFrame(columns=['A', 'B']) + df.loc[3] = [6, 7] + + exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'], + dtype='object') + tm.assert_frame_equal(df, exp) + + def test_series_partial_set(self): + # partial set with new index + # Regression from GH4825 + ser = Series([0.1, 0.2], index=[1, 2]) + + # loc equiv to .reindex + expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([3, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x']) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3, 'x']] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([3, 2, 3, 'x']) + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 'x', 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([2, 2, 'x', 1]) + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing in in the index + pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]]) + + expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([2, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) + expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[3, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([3, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) + expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[5, 3, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([5, 3, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) + expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[5, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([5, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], + index=[4, 5, 6, 7]) + expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[7, 2, 2]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([7, 2, 2]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) + expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[4, 5, 5]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([4, 5, 5]) + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_series_partial_set_with_name(self): + # GH 11497 + + idx = Index([1, 2], dtype='int64', name='idx') + ser = Series([0.1, 0.2], index=idx, name='s') + + # loc + exp_idx = Index([3, 2, 3], dtype='int64', name='idx') + expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') + expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, + name='s') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3, 'x']] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([2, 2, 1], dtype='int64', name='idx') + expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s') + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx') + expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 'x', 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing in in the index + pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]]) + + exp_idx = Index([2, 2, 3], dtype='int64', name='idx') + expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([3, 4, 4], dtype='int64', name='idx') + expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([1, 2, 3], dtype='int64', name='idx') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3], + index=idx, + name='s').loc[[3, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([5, 3, 3], dtype='int64', name='idx') + expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 3, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([5, 4, 4], dtype='int64', name='idx') + expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([7, 2, 2], dtype='int64', name='idx') + expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([4, 5, 6, 7], dtype='int64', name='idx') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[7, 2, 2]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([4, 5, 5], dtype='int64', name='idx') + expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[4, 5, 5]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx') + expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s') + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_partial_set_invalid(self): + + # GH 4940 + # allow only setting of 'valid' values + + orig = tm.makeTimeDataFrame() + df = orig.copy() + + # don't allow not string inserts + def f(): + with catch_warnings(record=True): + df.loc[100.0, :] = df.ix[0] + + pytest.raises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.loc[100, :] = df.ix[0] + + pytest.raises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.ix[100.0, :] = df.ix[0] + + pytest.raises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.ix[100, :] = df.ix[0] + + pytest.raises(ValueError, f) + + # allow object conversion here + df = orig.copy() + with catch_warnings(record=True): + df.loc['a', :] = df.ix[0] + exp = orig.append(Series(df.ix[0], name='a')) + tm.assert_frame_equal(df, exp) + tm.assert_index_equal(df.index, Index(orig.index.tolist() + ['a'])) + assert df.index.dtype == 'object' + + def test_partial_set_empty_series(self): + + # GH5226 + + # partially set with an empty object series + s = Series() + s.loc[1] = 1 + tm.assert_series_equal(s, Series([1], index=[1])) + s.loc[3] = 3 + tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) + + s = Series() + s.loc[1] = 1. + tm.assert_series_equal(s, Series([1.], index=[1])) + s.loc[3] = 3. + tm.assert_series_equal(s, Series([1., 3.], index=[1, 3])) + + s = Series() + s.loc['foo'] = 1 + tm.assert_series_equal(s, Series([1], index=['foo'])) + s.loc['bar'] = 3 + tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar'])) + s.loc[3] = 4 + tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3])) + + def test_partial_set_empty_frame(self): + + # partially set with an empty object + # frame + df = DataFrame() + + def f(): + df.loc[1] = 1 + + pytest.raises(ValueError, f) + + def f(): + df.loc[1] = Series([1], index=['foo']) + + pytest.raises(ValueError, f) + + def f(): + df.loc[:, 1] = 1 + + pytest.raises(ValueError, f) + + # these work as they don't really change + # anything but the index + # GH5632 + expected = DataFrame(columns=['foo'], index=Index([], dtype='int64')) + + def f(): + df = DataFrame() + df['foo'] = Series([], dtype='object') + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = Series(df.index) + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = df.index + return df + + tm.assert_frame_equal(f(), expected) + + expected = DataFrame(columns=['foo'], index=Index([], dtype='int64')) + expected['foo'] = expected['foo'].astype('float64') + + def f(): + df = DataFrame() + df['foo'] = [] + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = Series(np.arange(len(df)), dtype='float64') + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + tm.assert_index_equal(df.index, Index([], dtype='object')) + df['foo'] = range(len(df)) + return df + + expected = DataFrame(columns=['foo'], index=Index([], dtype='int64')) + expected['foo'] = expected['foo'].astype('float64') + tm.assert_frame_equal(f(), expected) + + df = DataFrame() + tm.assert_index_equal(df.columns, Index([], dtype=object)) + df2 = DataFrame() + df2[1] = Series([1], index=['foo']) + df.loc[:, 1] = Series([1], index=['foo']) + tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1])) + tm.assert_frame_equal(df, df2) + + # no index to start + expected = DataFrame({0: Series(1, index=range(4))}, + columns=['A', 'B', 0]) + + df = DataFrame(columns=['A', 'B']) + df[0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['A', 'B']) + df.loc[:, 0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_row(self): + # GH5720, GH5744 + # don't create rows when empty + expected = DataFrame(columns=['A', 'B', 'New'], + index=Index([], dtype='int64')) + expected['A'] = expected['A'].astype('int64') + expected['B'] = expected['B'].astype('float64') + expected['New'] = expected['New'].astype('float64') + + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + y['New'] = np.nan + tm.assert_frame_equal(y, expected) + # tm.assert_frame_equal(y,expected) + + expected = DataFrame(columns=['a', 'b', 'c c', 'd']) + expected['d'] = expected['d'].astype('int64') + df = DataFrame(columns=['a', 'b', 'c c']) + df['d'] = 3 + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object)) + + # reindex columns is ok + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + result = y.reindex(columns=['A', 'B', 'C']) + expected = DataFrame(columns=['A', 'B', 'C'], + index=Index([], dtype='int64')) + expected['A'] = expected['A'].astype('int64') + expected['B'] = expected['B'].astype('float64') + expected['C'] = expected['C'].astype('float64') + tm.assert_frame_equal(result, expected) + + def test_partial_set_empty_frame_set_series(self): + # GH 5756 + # setting with empty Series + df = DataFrame(Series()) + tm.assert_frame_equal(df, DataFrame({0: Series()})) + + df = DataFrame(Series(name='foo')) + tm.assert_frame_equal(df, DataFrame({'foo': Series()})) + + def test_partial_set_empty_frame_empty_copy_assignment(self): + # GH 5932 + # copy on empty with assignment fails + df = DataFrame(index=[0]) + df = df.copy() + df['a'] = 0 + expected = DataFrame(0, index=[0], columns=['a']) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_consistencies(self): + # GH 6171 + # consistency on empty frames + df = DataFrame(columns=['x', 'y']) + df['x'] = [1, 2] + expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) + tm.assert_frame_equal(df, expected, check_dtype=False) + + df = DataFrame(columns=['x', 'y']) + df['x'] = ['1', '2'] + expected = DataFrame( + dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['x', 'y']) + df.loc[0, 'x'] = 1 + expected = DataFrame(dict(x=[1], y=[np.nan])) + tm.assert_frame_equal(df, expected, check_dtype=False) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py new file mode 100644 index 0000000000000..7314ff6619049 --- /dev/null +++ b/pandas/tests/indexing/test_scalar.py @@ -0,0 +1,172 @@ +""" test scalar indexing, including at and iat """ + +import pytest + +import numpy as np + +from pandas import (Series, DataFrame, Timestamp, + Timedelta, date_range) +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestScalar(Base): + + def test_at_and_iat_get(self): + def _check(f, func, values=False): + + if f is not None: + indicies = self.generate_indices(f, values) + for i in indicies: + result = getattr(f, func)[i] + expected = self.get_value(f, i, values) + tm.assert_almost_equal(result, expected) + + for o in self._objs: + + d = getattr(self, o) + + # iat + for f in [d['ints'], d['uints']]: + _check(f, 'iat', values=True) + + for f in [d['labels'], d['ts'], d['floats']]: + if f is not None: + pytest.raises(ValueError, self.check_values, f, 'iat') + + # at + for f in [d['ints'], d['uints'], d['labels'], + d['ts'], d['floats']]: + _check(f, 'at') + + def test_at_and_iat_set(self): + def _check(f, func, values=False): + + if f is not None: + indicies = self.generate_indices(f, values) + for i in indicies: + getattr(f, func)[i] = 1 + expected = self.get_value(f, i, values) + tm.assert_almost_equal(expected, 1) + + for t in self._objs: + + d = getattr(self, t) + + # iat + for f in [d['ints'], d['uints']]: + _check(f, 'iat', values=True) + + for f in [d['labels'], d['ts'], d['floats']]: + if f is not None: + pytest.raises(ValueError, _check, f, 'iat') + + # at + for f in [d['ints'], d['uints'], d['labels'], + d['ts'], d['floats']]: + _check(f, 'at') + + def test_at_iat_coercion(self): + + # as timestamp is not a tuple! + dates = date_range('1/1/2000', periods=8) + df = DataFrame(np.random.randn(8, 4), + index=dates, + columns=['A', 'B', 'C', 'D']) + s = df['A'] + + result = s.at[dates[5]] + xp = s.values[5] + assert result == xp + + # GH 7729 + # make sure we are boxing the returns + s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]') + expected = Timestamp('2014-02-02') + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + assert result == expected + + s = Series(['1 days', '2 days'], dtype='timedelta64[ns]') + expected = Timedelta('2 days') + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + assert result == expected + + def test_iat_invalid_args(self): + pass + + def test_imethods_with_dups(self): + + # GH6493 + # iat/iloc with dups + + s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64') + result = s.iloc[2] + assert result == 2 + result = s.iat[2] + assert result == 2 + + pytest.raises(IndexError, lambda: s.iat[10]) + pytest.raises(IndexError, lambda: s.iat[-10]) + + result = s.iloc[[2, 3]] + expected = Series([2, 3], [2, 2], dtype='int64') + tm.assert_series_equal(result, expected) + + df = s.to_frame() + result = df.iloc[2] + expected = Series(2, index=[0], name=2) + tm.assert_series_equal(result, expected) + + result = df.iat[2, 0] + assert result == 2 + + def test_at_to_fail(self): + # at should not fallback + # GH 7814 + s = Series([1, 2, 3], index=list('abc')) + result = s.at['a'] + assert result == 1 + pytest.raises(ValueError, lambda: s.at[0]) + + df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) + result = df.at['a', 'A'] + assert result == 1 + pytest.raises(ValueError, lambda: df.at['a', 0]) + + s = Series([1, 2, 3], index=[3, 2, 1]) + result = s.at[1] + assert result == 3 + pytest.raises(ValueError, lambda: s.at['a']) + + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + result = df.at[1, 0] + assert result == 3 + pytest.raises(ValueError, lambda: df.at['a', 0]) + + # GH 13822, incorrect error string with non-unique columns when missing + # column is accessed + df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]}) + df.columns = ['x', 'x', 'z'] + + # Check that we get the correct value in the KeyError + tm.assert_raises_regex(KeyError, r"\['y'\] not in index", + lambda: df[['x', 'y', 'z']]) + + def test_at_with_tz(self): + # gh-15822 + df = DataFrame({'name': ['John', 'Anderson'], + 'date': [Timestamp(2017, 3, 13, 13, 32, 56), + Timestamp(2017, 2, 16, 12, 10, 3)]}) + df['date'] = df['date'].dt.tz_localize('Asia/Shanghai') + + expected = Timestamp('2017-03-13 13:32:56+0800', tz='Asia/Shanghai') + + result = df.loc[0, 'date'] + assert result == expected + + result = df.at[0, 'date'] + assert result == expected diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index e5ccd72cac20a..3ad3b771b2ab2 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -1,9 +1,11 @@ +import pytest + import pandas as pd from pandas.util import testing as tm +import numpy as np -class TestTimedeltaIndexing(tm.TestCase): - +class TestTimedeltaIndexing(object): def test_boolean_indexing(self): # GH 14946 df = pd.DataFrame({'x': range(10)}) @@ -13,9 +15,56 @@ def test_boolean_indexing(self): [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], [10, 10, 10, 3, 4, 5, 6, 7, 8, 9]] for cond, data in zip(conditions, expected_data): - result = df.copy() - result.loc[cond, 'x'] = 10 + result = df.assign(x=df.mask(cond, 10).astype('int64')) expected = pd.DataFrame(data, index=pd.to_timedelta(range(10), unit='s'), - columns=['x']) + columns=['x'], + dtype='int64') tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "indexer, expected", + [(0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), + ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9])]) + def test_list_like_indexing(self, indexer, expected): + # GH 16637 + df = pd.DataFrame({'x': range(10)}, dtype="int64") + df.index = pd.to_timedelta(range(10), unit='s') + + df.loc[df.index[indexer], 'x'] = 20 + + expected = pd.DataFrame(expected, + index=pd.to_timedelta(range(10), unit='s'), + columns=['x'], + dtype="int64") + + tm.assert_frame_equal(expected, df) + + def test_string_indexing(self): + # GH 16896 + df = pd.DataFrame({'x': range(3)}, + index=pd.to_timedelta(range(3), unit='days')) + expected = df.iloc[0] + sliced = df.loc['0 days'] + tm.assert_series_equal(sliced, expected) + + @pytest.mark.parametrize( + "value", + [None, pd.NaT, np.nan]) + def test_masked_setitem(self, value): + # issue (#18586) + series = pd.Series([0, 1, 2], dtype='timedelta64[ns]') + series[series == series[0]] = value + expected = pd.Series([pd.NaT, 1, 2], dtype='timedelta64[ns]') + tm.assert_series_equal(series, expected) + + @pytest.mark.parametrize( + "value", + [None, pd.NaT, np.nan]) + def test_listlike_setitem(self, value): + # issue (#18586) + series = pd.Series([0, 1, 2], dtype='timedelta64[ns]') + series.iloc[0] = value + expected = pd.Series([pd.NaT, 1, 2], dtype='timedelta64[ns]') + tm.assert_series_equal(series, expected) diff --git a/pandas/tests/internals/__init__.py b/pandas/tests/internals/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/test_internals.py b/pandas/tests/internals/test_internals.py similarity index 62% rename from pandas/tests/test_internals.py rename to pandas/tests/internals/test_internals.py index f086935df6dc8..9338aba90d7cb 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -2,32 +2,43 @@ # pylint: disable=W0102 from datetime import datetime, date - +import operator +import sys import pytest import numpy as np import re +from distutils.version import LooseVersion import itertools from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex, - Series, Categorical) + Series, Categorical, TimedeltaIndex, SparseArray) from pandas.compat import OrderedDict, lrange -from pandas.sparse.array import SparseArray from pandas.core.internals import (BlockPlacement, SingleBlockManager, make_block, BlockManager) import pandas.core.algorithms as algos import pandas.util.testing as tm import pandas as pd -from pandas import lib from pandas.util.testing import (assert_almost_equal, assert_frame_equal, randn, assert_series_equal) from pandas.compat import zip, u +# in 3.6.1 a c-api slicing function changed, see src/compat_helper.h +PY361 = LooseVersion(sys.version) >= LooseVersion('3.6.1') + + +@pytest.fixture +def mgr(): + return create_mgr( + 'a: f8; b: object; c: f8; d: object; e: f8;' + 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;' + 'k: M8[ns, US/Eastern]; l: M8[ns, CET];') + def assert_block_equal(left, right): tm.assert_numpy_array_equal(left.values, right.values) - assert (left.dtype == right.dtype) - tm.assertIsInstance(left.mgr_locs, lib.BlockPlacement) - tm.assertIsInstance(right.mgr_locs, lib.BlockPlacement) + assert left.dtype == right.dtype + assert isinstance(left.mgr_locs, BlockPlacement) + assert isinstance(right.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array) @@ -180,9 +191,9 @@ def create_mgr(descr, item_shape=None): [mgr_items] + [np.arange(n) for n in item_shape]) -class TestBlock(tm.TestCase): +class TestBlock(object): - def setUp(self): + def setup_method(self, method): # self.fblock = get_float_ex() # a,c,e # self.cblock = get_complex_ex() # # self.oblock = get_obj_ex() @@ -197,11 +208,11 @@ def setUp(self): def test_constructor(self): int32block = create_block('i4', [0]) - self.assertEqual(int32block.dtype, np.int32) + assert int32block.dtype == np.int32 def test_pickle(self): def _check(blk): - assert_block_equal(self.round_trip_pickle(blk), blk) + assert_block_equal(tm.round_trip_pickle(blk), blk) _check(self.fblock) _check(self.cblock) @@ -209,14 +220,14 @@ def _check(blk): _check(self.bool_block) def test_mgr_locs(self): - tm.assertIsInstance(self.fblock.mgr_locs, lib.BlockPlacement) + assert isinstance(self.fblock.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64)) def test_attrs(self): - self.assertEqual(self.fblock.shape, self.fblock.values.shape) - self.assertEqual(self.fblock.dtype, self.fblock.values.dtype) - self.assertEqual(len(self.fblock), len(self.fblock.values)) + assert self.fblock.shape == self.fblock.values.shape + assert self.fblock.dtype == self.fblock.values.dtype + assert len(self.fblock) == len(self.fblock.values) def test_merge(self): avals = randn(2, 10) @@ -236,7 +247,7 @@ def test_merge(self): def test_copy(self): cop = self.fblock.copy() - self.assertIsNot(cop, self.fblock) + assert cop is not self.fblock assert_block_equal(self.fblock, cop) def test_reindex_index(self): @@ -251,102 +262,82 @@ def test_insert(self): def test_delete(self): newb = self.fblock.copy() newb.delete(0) - tm.assertIsInstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([2, 4], dtype=np.int64)) - self.assertTrue((newb.values[0] == 1).all()) + assert (newb.values[0] == 1).all() newb = self.fblock.copy() newb.delete(1) - tm.assertIsInstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([0, 4], dtype=np.int64)) - self.assertTrue((newb.values[1] == 2).all()) + assert (newb.values[1] == 2).all() newb = self.fblock.copy() newb.delete(2) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([0, 2], dtype=np.int64)) - self.assertTrue((newb.values[1] == 1).all()) + assert (newb.values[1] == 1).all() newb = self.fblock.copy() - self.assertRaises(Exception, newb.delete, 3) - - def test_split_block_at(self): + with pytest.raises(Exception): + newb.delete(3) - # with dup column support this method was taken out - # GH3679 - pytest.skip("skipping for now") + def test_make_block_same_class(self): + # issue 19431 + block = create_block('M8[ns, US/Eastern]', [3]) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + block.make_block_same_class(block.values.values, + dtype=block.values.dtype) - bs = list(self.fblock.split_block_at('a')) - self.assertEqual(len(bs), 1) - self.assertTrue(np.array_equal(bs[0].items, ['c', 'e'])) - bs = list(self.fblock.split_block_at('c')) - self.assertEqual(len(bs), 2) - self.assertTrue(np.array_equal(bs[0].items, ['a'])) - self.assertTrue(np.array_equal(bs[1].items, ['e'])) - - bs = list(self.fblock.split_block_at('e')) - self.assertEqual(len(bs), 1) - self.assertTrue(np.array_equal(bs[0].items, ['a', 'c'])) - - # bblock = get_bool_ex(['f']) - # bs = list(bblock.split_block_at('f')) - # self.assertEqual(len(bs), 0) - - -class TestDatetimeBlock(tm.TestCase): +class TestDatetimeBlock(object): def test_try_coerce_arg(self): block = create_block('datetime', [0]) # coerce None none_coerced = block._try_coerce_args(block.values, None)[2] - self.assertTrue(pd.Timestamp(none_coerced) is pd.NaT) + assert pd.Timestamp(none_coerced) is pd.NaT # coerce different types of date bojects vals = (np.datetime64('2010-10-10'), datetime(2010, 10, 10), date(2010, 10, 10)) for val in vals: coerced = block._try_coerce_args(block.values, val)[2] - self.assertEqual(np.int64, type(coerced)) - self.assertEqual(pd.Timestamp('2010-10-10'), pd.Timestamp(coerced)) - + assert np.int64 == type(coerced) + assert pd.Timestamp('2010-10-10') == pd.Timestamp(coerced) -class TestBlockManager(tm.TestCase): - def setUp(self): - self.mgr = create_mgr( - 'a: f8; b: object; c: f8; d: object; e: f8;' - 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;' - 'k: M8[ns, US/Eastern]; l: M8[ns, CET];') +class TestBlockManager(object): def test_constructor_corner(self): pass def test_attrs(self): mgr = create_mgr('a,b,c: f8-1; d,e,f: f8-2') - self.assertEqual(mgr.nblocks, 2) - self.assertEqual(len(mgr), 6) + assert mgr.nblocks == 2 + assert len(mgr) == 6 def test_is_mixed_dtype(self): - self.assertFalse(create_mgr('a,b:f8').is_mixed_type) - self.assertFalse(create_mgr('a:f8-1; b:f8-2').is_mixed_type) + assert not create_mgr('a,b:f8').is_mixed_type + assert not create_mgr('a:f8-1; b:f8-2').is_mixed_type - self.assertTrue(create_mgr('a,b:f8; c,d: f4').is_mixed_type) - self.assertTrue(create_mgr('a,b:f8; c,d: object').is_mixed_type) + assert create_mgr('a,b:f8; c,d: f4').is_mixed_type + assert create_mgr('a,b:f8; c,d: object').is_mixed_type def test_is_indexed_like(self): mgr1 = create_mgr('a,b: f8') mgr2 = create_mgr('a:i8; b:bool') mgr3 = create_mgr('a,b,c: f8') - self.assertTrue(mgr1._is_indexed_like(mgr1)) - self.assertTrue(mgr1._is_indexed_like(mgr2)) - self.assertTrue(mgr1._is_indexed_like(mgr3)) + assert mgr1._is_indexed_like(mgr1) + assert mgr1._is_indexed_like(mgr2) + assert mgr1._is_indexed_like(mgr3) - self.assertFalse(mgr1._is_indexed_like(mgr1.get_slice( - slice(-1), axis=1))) + assert not mgr1._is_indexed_like(mgr1.get_slice( + slice(-1), axis=1)) def test_duplicate_ref_loc_failure(self): tmp_mgr = create_mgr('a:bool; a: f8') @@ -355,61 +346,63 @@ def test_duplicate_ref_loc_failure(self): blocks[0].mgr_locs = np.array([0]) blocks[1].mgr_locs = np.array([0]) + # test trying to create block manager with overlapping ref locs - self.assertRaises(AssertionError, BlockManager, blocks, axes) + with pytest.raises(AssertionError): + BlockManager(blocks, axes) blocks[0].mgr_locs = np.array([0]) blocks[1].mgr_locs = np.array([1]) mgr = BlockManager(blocks, axes) mgr.iget(1) - def test_contains(self): - self.assertIn('a', self.mgr) - self.assertNotIn('baz', self.mgr) + def test_contains(self, mgr): + assert 'a' in mgr + assert 'baz' not in mgr - def test_pickle(self): + def test_pickle(self, mgr): - mgr2 = self.round_trip_pickle(self.mgr) - assert_frame_equal(DataFrame(self.mgr), DataFrame(mgr2)) + mgr2 = tm.round_trip_pickle(mgr) + assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) # share ref_items - # self.assertIs(mgr2.blocks[0].ref_items, mgr2.blocks[1].ref_items) + # assert mgr2.blocks[0].ref_items is mgr2.blocks[1].ref_items # GH2431 - self.assertTrue(hasattr(mgr2, "_is_consolidated")) - self.assertTrue(hasattr(mgr2, "_known_consolidated")) + assert hasattr(mgr2, "_is_consolidated") + assert hasattr(mgr2, "_known_consolidated") # reset to False on load - self.assertFalse(mgr2._is_consolidated) - self.assertFalse(mgr2._known_consolidated) + assert not mgr2._is_consolidated + assert not mgr2._known_consolidated def test_non_unique_pickle(self): mgr = create_mgr('a,a,a:f8') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) mgr = create_mgr('a: f8; a: i8') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) def test_categorical_block_pickle(self): mgr = create_mgr('a: category') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) smgr = create_single_mgr('category') - smgr2 = self.round_trip_pickle(smgr) + smgr2 = tm.round_trip_pickle(smgr) assert_series_equal(Series(smgr), Series(smgr2)) - def test_get_scalar(self): - for item in self.mgr.items: - for i, index in enumerate(self.mgr.axes[1]): - res = self.mgr.get_scalar((item, index)) - exp = self.mgr.get(item, fastpath=False)[i] - self.assertEqual(res, exp) - exp = self.mgr.get(item).internal_values()[i] - self.assertEqual(res, exp) + def test_get_scalar(self, mgr): + for item in mgr.items: + for i, index in enumerate(mgr.axes[1]): + res = mgr.get_scalar((item, index)) + exp = mgr.get(item, fastpath=False)[i] + assert res == exp + exp = mgr.get(item).internal_values()[i] + assert res == exp def test_get(self): cols = Index(list('abc')) @@ -438,30 +431,21 @@ def test_set(self): tm.assert_numpy_array_equal(mgr.get('d').internal_values(), np.array(['foo'] * 3, dtype=np.object_)) - def test_insert(self): - self.mgr.insert(0, 'inserted', np.arange(N)) - - self.assertEqual(self.mgr.items[0], 'inserted') - assert_almost_equal(self.mgr.get('inserted'), np.arange(N)) + def test_set_change_dtype(self, mgr): + mgr.set('baz', np.zeros(N, dtype=bool)) - for blk in self.mgr.blocks: - yield self.assertIs, self.mgr.items, blk.ref_items + mgr.set('baz', np.repeat('foo', N)) + assert mgr.get('baz').dtype == np.object_ - def test_set_change_dtype(self): - self.mgr.set('baz', np.zeros(N, dtype=bool)) - - self.mgr.set('baz', np.repeat('foo', N)) - self.assertEqual(self.mgr.get('baz').dtype, np.object_) - - mgr2 = self.mgr.consolidate() + mgr2 = mgr.consolidate() mgr2.set('baz', np.repeat('foo', N)) - self.assertEqual(mgr2.get('baz').dtype, np.object_) + assert mgr2.get('baz').dtype == np.object_ mgr2.set('quux', randn(N).astype(int)) - self.assertEqual(mgr2.get('quux').dtype, np.int_) + assert mgr2.get('quux').dtype == np.int_ mgr2.set('quux', randn(N)) - self.assertEqual(mgr2.get('quux').dtype, np.float_) + assert mgr2.get('quux').dtype == np.float_ def test_set_change_dtype_slice(self): # GH8850 cols = MultiIndex.from_tuples([('1st', 'a'), ('2nd', 'b'), ('3rd', 'c') @@ -469,70 +453,70 @@ def test_set_change_dtype_slice(self): # GH8850 df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) df['2nd'] = df['2nd'] * 2.0 - self.assertEqual(sorted(df.blocks.keys()), ['float64', 'int64']) - assert_frame_equal(df.blocks['float64'], DataFrame( + blocks = df._to_dict_of_blocks() + assert sorted(blocks.keys()) == ['float64', 'int64'] + assert_frame_equal(blocks['float64'], DataFrame( [[1.0, 4.0], [4.0, 10.0]], columns=cols[:2])) - assert_frame_equal(df.blocks['int64'], DataFrame( + assert_frame_equal(blocks['int64'], DataFrame( [[3], [6]], columns=cols[2:])) - def test_copy(self): - cp = self.mgr.copy(deep=False) - for blk, cp_blk in zip(self.mgr.blocks, cp.blocks): + def test_copy(self, mgr): + cp = mgr.copy(deep=False) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): # view assertion - self.assertTrue(cp_blk.equals(blk)) - self.assertTrue(cp_blk.values.base is blk.values.base) + assert cp_blk.equals(blk) + assert cp_blk.values.base is blk.values.base - cp = self.mgr.copy(deep=True) - for blk, cp_blk in zip(self.mgr.blocks, cp.blocks): + cp = mgr.copy(deep=True) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): # copy assertion we either have a None for a base or in case of # some blocks it is an array (e.g. datetimetz), but was copied - self.assertTrue(cp_blk.equals(blk)) + assert cp_blk.equals(blk) if cp_blk.values.base is not None and blk.values.base is not None: - self.assertFalse(cp_blk.values.base is blk.values.base) + assert cp_blk.values.base is not blk.values.base else: - self.assertTrue(cp_blk.values.base is None and blk.values.base - is None) + assert cp_blk.values.base is None and blk.values.base is None def test_sparse(self): mgr = create_mgr('a: sparse-1; b: sparse-2') # what to test here? - self.assertEqual(mgr.as_matrix().dtype, np.float64) + assert mgr.as_array().dtype == np.float64 def test_sparse_mixed(self): mgr = create_mgr('a: sparse-1; b: sparse-2; c: f8') - self.assertEqual(len(mgr.blocks), 3) - self.assertIsInstance(mgr, BlockManager) + assert len(mgr.blocks) == 3 + assert isinstance(mgr, BlockManager) # what to test here? - def test_as_matrix_float(self): + def test_as_array_float(self): mgr = create_mgr('c: f4; d: f2; e: f8') - self.assertEqual(mgr.as_matrix().dtype, np.float64) + assert mgr.as_array().dtype == np.float64 mgr = create_mgr('c: f4; d: f2') - self.assertEqual(mgr.as_matrix().dtype, np.float32) + assert mgr.as_array().dtype == np.float32 - def test_as_matrix_int_bool(self): + def test_as_array_int_bool(self): mgr = create_mgr('a: bool-1; b: bool-2') - self.assertEqual(mgr.as_matrix().dtype, np.bool_) + assert mgr.as_array().dtype == np.bool_ mgr = create_mgr('a: i8-1; b: i8-2; c: i4; d: i2; e: u1') - self.assertEqual(mgr.as_matrix().dtype, np.int64) + assert mgr.as_array().dtype == np.int64 mgr = create_mgr('c: i4; d: i2; e: u1') - self.assertEqual(mgr.as_matrix().dtype, np.int32) + assert mgr.as_array().dtype == np.int32 - def test_as_matrix_datetime(self): + def test_as_array_datetime(self): mgr = create_mgr('h: datetime-1; g: datetime-2') - self.assertEqual(mgr.as_matrix().dtype, 'M8[ns]') + assert mgr.as_array().dtype == 'M8[ns]' - def test_as_matrix_datetime_tz(self): + def test_as_array_datetime_tz(self): mgr = create_mgr('h: M8[ns, US/Eastern]; g: M8[ns, CET]') - self.assertEqual(mgr.get('h').dtype, 'datetime64[ns, US/Eastern]') - self.assertEqual(mgr.get('g').dtype, 'datetime64[ns, CET]') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.get('h').dtype == 'datetime64[ns, US/Eastern]' + assert mgr.get('g').dtype == 'datetime64[ns, CET]' + assert mgr.as_array().dtype == 'object' def test_astype(self): # coerce all @@ -540,9 +524,9 @@ def test_astype(self): for t in ['float16', 'float32', 'float64', 'int32', 'int64']: t = np.dtype(t) tmgr = mgr.astype(t) - self.assertEqual(tmgr.get('c').dtype.type, t) - self.assertEqual(tmgr.get('d').dtype.type, t) - self.assertEqual(tmgr.get('e').dtype.type, t) + assert tmgr.get('c').dtype.type == t + assert tmgr.get('d').dtype.type == t + assert tmgr.get('e').dtype.type == t # mixed mgr = create_mgr('a,b: object; c: bool; d: datetime;' @@ -550,24 +534,24 @@ def test_astype(self): for t in ['float16', 'float32', 'float64', 'int32', 'int64']: t = np.dtype(t) tmgr = mgr.astype(t, errors='ignore') - self.assertEqual(tmgr.get('c').dtype.type, t) - self.assertEqual(tmgr.get('e').dtype.type, t) - self.assertEqual(tmgr.get('f').dtype.type, t) - self.assertEqual(tmgr.get('g').dtype.type, t) + assert tmgr.get('c').dtype.type == t + assert tmgr.get('e').dtype.type == t + assert tmgr.get('f').dtype.type == t + assert tmgr.get('g').dtype.type == t - self.assertEqual(tmgr.get('a').dtype.type, np.object_) - self.assertEqual(tmgr.get('b').dtype.type, np.object_) + assert tmgr.get('a').dtype.type == np.object_ + assert tmgr.get('b').dtype.type == np.object_ if t != np.int64: - self.assertEqual(tmgr.get('d').dtype.type, np.datetime64) + assert tmgr.get('d').dtype.type == np.datetime64 else: - self.assertEqual(tmgr.get('d').dtype.type, t) + assert tmgr.get('d').dtype.type == t def test_convert(self): def _compare(old_mgr, new_mgr): """ compare the blocks, numeric compare ==, object don't """ old_blocks = set(old_mgr.blocks) new_blocks = set(new_mgr.blocks) - self.assertEqual(len(old_blocks), len(new_blocks)) + assert len(old_blocks) == len(new_blocks) # compare non-numeric for b in old_blocks: @@ -576,7 +560,7 @@ def _compare(old_mgr, new_mgr): if (b.values == nb.values).all(): found = True break - self.assertTrue(found) + assert found for b in new_blocks: found = False @@ -584,7 +568,7 @@ def _compare(old_mgr, new_mgr): if (b.values == ob.values).all(): found = True break - self.assertTrue(found) + assert found # noops mgr = create_mgr('f: i8; g: f8') @@ -601,11 +585,11 @@ def _compare(old_mgr, new_mgr): mgr.set('b', np.array(['2.'] * N, dtype=np.object_)) mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) - self.assertEqual(new_mgr.get('a').dtype, np.int64) - self.assertEqual(new_mgr.get('b').dtype, np.float64) - self.assertEqual(new_mgr.get('foo').dtype, np.object_) - self.assertEqual(new_mgr.get('f').dtype, np.int64) - self.assertEqual(new_mgr.get('g').dtype, np.float64) + assert new_mgr.get('a').dtype == np.int64 + assert new_mgr.get('b').dtype == np.float64 + assert new_mgr.get('foo').dtype == np.object_ + assert new_mgr.get('f').dtype == np.int64 + assert new_mgr.get('g').dtype == np.float64 mgr = create_mgr('a,b,foo: object; f: i4; bool: bool; dt: datetime;' 'i: i8; g: f8; h: f2') @@ -613,15 +597,15 @@ def _compare(old_mgr, new_mgr): mgr.set('b', np.array(['2.'] * N, dtype=np.object_)) mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) - self.assertEqual(new_mgr.get('a').dtype, np.int64) - self.assertEqual(new_mgr.get('b').dtype, np.float64) - self.assertEqual(new_mgr.get('foo').dtype, np.object_) - self.assertEqual(new_mgr.get('f').dtype, np.int32) - self.assertEqual(new_mgr.get('bool').dtype, np.bool_) - self.assertEqual(new_mgr.get('dt').dtype.type, np.datetime64) - self.assertEqual(new_mgr.get('i').dtype, np.int64) - self.assertEqual(new_mgr.get('g').dtype, np.float64) - self.assertEqual(new_mgr.get('h').dtype, np.float16) + assert new_mgr.get('a').dtype == np.int64 + assert new_mgr.get('b').dtype == np.float64 + assert new_mgr.get('foo').dtype == np.object_ + assert new_mgr.get('f').dtype == np.int32 + assert new_mgr.get('bool').dtype == np.bool_ + assert new_mgr.get('dt').dtype.type, np.datetime64 + assert new_mgr.get('i').dtype == np.int64 + assert new_mgr.get('g').dtype == np.float64 + assert new_mgr.get('h').dtype == np.float16 def test_interleave(self): @@ -629,49 +613,49 @@ def test_interleave(self): for dtype in ['f8', 'i8', 'object', 'bool', 'complex', 'M8[ns]', 'm8[ns]']: mgr = create_mgr('a: {0}'.format(dtype)) - self.assertEqual(mgr.as_matrix().dtype, dtype) + assert mgr.as_array().dtype == dtype mgr = create_mgr('a: {0}; b: {0}'.format(dtype)) - self.assertEqual(mgr.as_matrix().dtype, dtype) + assert mgr.as_array().dtype == dtype # will be converted according the actual dtype of the underlying mgr = create_mgr('a: category') - self.assertEqual(mgr.as_matrix().dtype, 'i8') + assert mgr.as_array().dtype == 'i8' mgr = create_mgr('a: category; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'i8'), + assert mgr.as_array().dtype == 'i8' mgr = create_mgr('a: category; b: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: category2; b: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' # combinations mgr = create_mgr('a: f8') - self.assertEqual(mgr.as_matrix().dtype, 'f8') + assert mgr.as_array().dtype == 'f8' mgr = create_mgr('a: f8; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'f8') + assert mgr.as_array().dtype == 'f8' mgr = create_mgr('a: f4; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'f4') + assert mgr.as_array().dtype == 'f8' mgr = create_mgr('a: f4; b: i8; d: object') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: bool; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: complex') - self.assertEqual(mgr.as_matrix().dtype, 'complex') + assert mgr.as_array().dtype == 'complex' mgr = create_mgr('a: f8; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: bool') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: m8[ns]; b: bool') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: m8[ns]; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: m8[ns]') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_array().dtype == 'object' def test_interleave_non_unique_cols(self): df = DataFrame([ @@ -682,26 +666,26 @@ def test_interleave_non_unique_cols(self): df_unique = df.copy() df_unique.columns = ['x', 'y'] - self.assertEqual(df_unique.values.shape, df.values.shape) + assert df_unique.values.shape == df.values.shape tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) def test_consolidate(self): pass - def test_consolidate_ordering_issues(self): - self.mgr.set('f', randn(N)) - self.mgr.set('d', randn(N)) - self.mgr.set('b', randn(N)) - self.mgr.set('g', randn(N)) - self.mgr.set('h', randn(N)) - - # we have datetime/tz blocks in self.mgr - cons = self.mgr.consolidate() - self.assertEqual(cons.nblocks, 4) - cons = self.mgr.consolidate().get_numeric_data() - self.assertEqual(cons.nblocks, 1) - tm.assertIsInstance(cons.blocks[0].mgr_locs, lib.BlockPlacement) + def test_consolidate_ordering_issues(self, mgr): + mgr.set('f', randn(N)) + mgr.set('d', randn(N)) + mgr.set('b', randn(N)) + mgr.set('g', randn(N)) + mgr.set('h', randn(N)) + + # we have datetime/tz blocks in mgr + cons = mgr.consolidate() + assert cons.nblocks == 4 + cons = mgr.consolidate().get_numeric_data() + assert cons.nblocks == 1 + assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64)) @@ -714,7 +698,7 @@ def test_reindex_items(self): 'f: bool; g: f8-2') reindexed = mgr.reindex_axis(['g', 'c', 'a', 'd'], axis=0) - self.assertEqual(reindexed.nblocks, 2) + assert reindexed.nblocks == 2 tm.assert_index_equal(reindexed.items, pd.Index(['g', 'c', 'a', 'd'])) assert_almost_equal( mgr.get('g', fastpath=False), reindexed.get('g', fastpath=False)) @@ -748,9 +732,9 @@ def test_multiindex_xs(self): mgr.set_axis(1, index) result = mgr.xs('bar', axis=1) - self.assertEqual(result.shape, (6, 2)) - self.assertEqual(result.axes[1][0], ('bar', 'one')) - self.assertEqual(result.axes[1][1], ('bar', 'two')) + assert result.shape == (6, 2) + assert result.axes[1][0] == ('bar', 'one') + assert result.axes[1][1] == ('bar', 'two') def test_get_numeric_data(self): mgr = create_mgr('int: int; float: float; complex: complex;' @@ -826,11 +810,11 @@ def test_equals(self): # unique items bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2') bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) - self.assertTrue(bm1.equals(bm2)) + assert bm1.equals(bm2) bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2') bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) - self.assertTrue(bm1.equals(bm2)) + assert bm1.equals(bm2) def test_equals_block_order_different_dtypes(self): # GH 9330 @@ -848,19 +832,19 @@ def test_equals_block_order_different_dtypes(self): block_perms = itertools.permutations(bm.blocks) for bm_perm in block_perms: bm_this = BlockManager(bm_perm, bm.axes) - self.assertTrue(bm.equals(bm_this)) - self.assertTrue(bm_this.equals(bm)) + assert bm.equals(bm_this) + assert bm_this.equals(bm) def test_single_mgr_ctor(self): mgr = create_single_mgr('f8', num_rows=5) - self.assertEqual(mgr.as_matrix().tolist(), [0., 1., 2., 3., 4.]) + assert mgr.as_array().tolist() == [0., 1., 2., 3., 4.] def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2') for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): bm1.replace_list([1], [2], inplace=value) @@ -900,7 +884,7 @@ class TestIndexing(object): def test_get_slice(self): def assert_slice_ok(mgr, axis, slobj): # import pudb; pudb.set_trace() - mat = mgr.as_matrix() + mat = mgr.as_array() # we maybe using an ndarray to test slicing and # might not be the full length of the axis @@ -911,140 +895,161 @@ def assert_slice_ok(mgr, axis, slobj): len(ax) - len(slobj), dtype=bool)]) sliced = mgr.get_slice(slobj, axis=axis) mat_slobj = (slice(None), ) * axis + (slobj, ) - tm.assert_numpy_array_equal(mat[mat_slobj], sliced.as_matrix(), + tm.assert_numpy_array_equal(mat[mat_slobj], sliced.as_array(), check_dtype=False) tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis]) for mgr in self.MANAGERS: for ax in range(mgr.ndim): # slice - yield assert_slice_ok, mgr, ax, slice(None) - yield assert_slice_ok, mgr, ax, slice(3) - yield assert_slice_ok, mgr, ax, slice(100) - yield assert_slice_ok, mgr, ax, slice(1, 4) - yield assert_slice_ok, mgr, ax, slice(3, 0, -2) + assert_slice_ok(mgr, ax, slice(None)) + assert_slice_ok(mgr, ax, slice(3)) + assert_slice_ok(mgr, ax, slice(100)) + assert_slice_ok(mgr, ax, slice(1, 4)) + assert_slice_ok(mgr, ax, slice(3, 0, -2)) # boolean mask - yield assert_slice_ok, mgr, ax, np.array([], dtype=np.bool_) - yield (assert_slice_ok, mgr, ax, - np.ones(mgr.shape[ax], dtype=np.bool_)) - yield (assert_slice_ok, mgr, ax, - np.zeros(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, np.array([], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.ones(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.zeros(mgr.shape[ax], dtype=np.bool_)) if mgr.shape[ax] >= 3: - yield (assert_slice_ok, mgr, ax, - np.arange(mgr.shape[ax]) % 3 == 0) - yield (assert_slice_ok, mgr, ax, np.array( - [True, True, False], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.arange(mgr.shape[ax]) % 3 == 0) + assert_slice_ok( + mgr, ax, np.array( + [True, True, False], dtype=np.bool_)) # fancy indexer - yield assert_slice_ok, mgr, ax, [] - yield assert_slice_ok, mgr, ax, lrange(mgr.shape[ax]) + assert_slice_ok(mgr, ax, []) + assert_slice_ok(mgr, ax, lrange(mgr.shape[ax])) if mgr.shape[ax] >= 3: - yield assert_slice_ok, mgr, ax, [0, 1, 2] - yield assert_slice_ok, mgr, ax, [-1, -2, -3] + assert_slice_ok(mgr, ax, [0, 1, 2]) + assert_slice_ok(mgr, ax, [-1, -2, -3]) def test_take(self): def assert_take_ok(mgr, axis, indexer): - mat = mgr.as_matrix() + mat = mgr.as_array() taken = mgr.take(indexer, axis) tm.assert_numpy_array_equal(np.take(mat, indexer, axis), - taken.as_matrix(), check_dtype=False) + taken.as_array(), check_dtype=False) tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis]) for mgr in self.MANAGERS: for ax in range(mgr.ndim): # take/fancy indexer - yield assert_take_ok, mgr, ax, [] - yield assert_take_ok, mgr, ax, [0, 0, 0] - yield assert_take_ok, mgr, ax, lrange(mgr.shape[ax]) + assert_take_ok(mgr, ax, []) + assert_take_ok(mgr, ax, [0, 0, 0]) + assert_take_ok(mgr, ax, lrange(mgr.shape[ax])) if mgr.shape[ax] >= 3: - yield assert_take_ok, mgr, ax, [0, 1, 2] - yield assert_take_ok, mgr, ax, [-1, -2, -3] + assert_take_ok(mgr, ax, [0, 1, 2]) + assert_take_ok(mgr, ax, [-1, -2, -3]) def test_reindex_axis(self): def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): - mat = mgr.as_matrix() + mat = mgr.as_array() indexer = mgr.axes[axis].get_indexer_for(new_labels) reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value) tm.assert_numpy_array_equal(algos.take_nd(mat, indexer, axis, fill_value=fill_value), - reindexed.as_matrix(), + reindexed.as_array(), check_dtype=False) tm.assert_index_equal(reindexed.axes[axis], new_labels) for mgr in self.MANAGERS: for ax in range(mgr.ndim): for fill_value in (None, np.nan, 100.): - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index([]), fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, mgr.axes[ax], - fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][[0, 0, 0]], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index(['foo', mgr.axes[ax][0], 'baz']), - fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index([]), fill_value) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax], + fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][[0, 0, 0]], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index(['foo', mgr.axes[ax][0], 'baz']), + fill_value) if mgr.shape[ax] >= 3: - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][:-3], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][-3::-1], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][:-3], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][-3::-1], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value) def test_reindex_indexer(self): def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): - mat = mgr.as_matrix() + mat = mgr.as_array() reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value) reindexed = mgr.reindex_indexer(new_labels, indexer, axis, fill_value=fill_value) tm.assert_numpy_array_equal(reindexed_mat, - reindexed.as_matrix(), + reindexed.as_array(), check_dtype=False) tm.assert_index_equal(reindexed.axes[axis], new_labels) for mgr in self.MANAGERS: for ax in range(mgr.ndim): for fill_value in (None, np.nan, 100.): - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index([]), [], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo'] * mgr.shape[ax]), - np.arange(mgr.shape[ax]), fill_value) - - yield (assert_reindex_indexer_is_ok, mgr, ax, - mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), - fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, mgr.axes[ax], - np.arange(mgr.shape[ax])[::-1], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [0, 0, 0], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [-1, 0, -1], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', mgr.axes[ax][0], 'baz']), - [-1, -1, -1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index([]), [], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo'] * mgr.shape[ax]), + np.arange(mgr.shape[ax]), fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), + fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], + np.arange(mgr.shape[ax])[::-1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [0, 0, 0], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [-1, 0, -1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', mgr.axes[ax][0], 'baz']), + [-1, -1, -1], fill_value) if mgr.shape[ax] >= 3: - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [0, 1, 2], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [0, 1, 2], fill_value) # test_get_slice(slice_like, axis) # take(indexer, axis) @@ -1052,24 +1057,26 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, # reindex_indexer(new_labels, indexer, axis) -class TestBlockPlacement(tm.TestCase): +class TestBlockPlacement(object): def test_slice_len(self): - self.assertEqual(len(BlockPlacement(slice(0, 4))), 4) - self.assertEqual(len(BlockPlacement(slice(0, 4, 2))), 2) - self.assertEqual(len(BlockPlacement(slice(0, 3, 2))), 2) + assert len(BlockPlacement(slice(0, 4))) == 4 + assert len(BlockPlacement(slice(0, 4, 2))) == 2 + assert len(BlockPlacement(slice(0, 3, 2))) == 2 - self.assertEqual(len(BlockPlacement(slice(0, 1, 2))), 1) - self.assertEqual(len(BlockPlacement(slice(1, 0, -1))), 1) + assert len(BlockPlacement(slice(0, 1, 2))) == 1 + assert len(BlockPlacement(slice(1, 0, -1))) == 1 def test_zero_step_raises(self): - self.assertRaises(ValueError, BlockPlacement, slice(1, 1, 0)) - self.assertRaises(ValueError, BlockPlacement, slice(1, 2, 0)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 1, 0)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 2, 0)) def test_unbounded_slice_raises(self): def assert_unbounded_slice_error(slc): - self.assertRaisesRegexp(ValueError, "unbounded slice", - lambda: BlockPlacement(slc)) + tm.assert_raises_regex(ValueError, "unbounded slice", + lambda: BlockPlacement(slc)) assert_unbounded_slice_error(slice(None, None)) assert_unbounded_slice_error(slice(10, None)) @@ -1087,7 +1094,7 @@ def assert_unbounded_slice_error(slc): def test_not_slice_like_slices(self): def assert_not_slice_like(slc): - self.assertTrue(not BlockPlacement(slc).is_slice_like) + assert not BlockPlacement(slc).is_slice_like assert_not_slice_like(slice(0, 0)) assert_not_slice_like(slice(100, 0)) @@ -1095,12 +1102,12 @@ def assert_not_slice_like(slc): assert_not_slice_like(slice(100, 100, -1)) assert_not_slice_like(slice(0, 100, -1)) - self.assertTrue(not BlockPlacement(slice(0, 0)).is_slice_like) - self.assertTrue(not BlockPlacement(slice(100, 100)).is_slice_like) + assert not BlockPlacement(slice(0, 0)).is_slice_like + assert not BlockPlacement(slice(100, 100)).is_slice_like def test_array_to_slice_conversion(self): def assert_as_slice_equals(arr, slc): - self.assertEqual(BlockPlacement(arr).as_slice, slc) + assert BlockPlacement(arr).as_slice == slc assert_as_slice_equals([0], slice(0, 1, 1)) assert_as_slice_equals([100], slice(100, 101, 1)) @@ -1110,12 +1117,14 @@ def assert_as_slice_equals(arr, slc): assert_as_slice_equals([0, 100], slice(0, 200, 100)) assert_as_slice_equals([2, 1], slice(2, 0, -1)) - assert_as_slice_equals([2, 1, 0], slice(2, None, -1)) - assert_as_slice_equals([100, 0], slice(100, None, -100)) + + if not PY361: + assert_as_slice_equals([2, 1, 0], slice(2, None, -1)) + assert_as_slice_equals([100, 0], slice(100, None, -100)) def test_not_slice_like_arrays(self): def assert_not_slice_like(arr): - self.assertTrue(not BlockPlacement(arr).is_slice_like) + assert not BlockPlacement(arr).is_slice_like assert_not_slice_like([]) assert_not_slice_like([-1]) @@ -1128,13 +1137,13 @@ def assert_not_slice_like(arr): assert_not_slice_like([1, 1, 1]) def test_slice_iter(self): - self.assertEqual(list(BlockPlacement(slice(0, 3))), [0, 1, 2]) - self.assertEqual(list(BlockPlacement(slice(0, 0))), []) - self.assertEqual(list(BlockPlacement(slice(3, 0))), []) + assert list(BlockPlacement(slice(0, 3))) == [0, 1, 2] + assert list(BlockPlacement(slice(0, 0))) == [] + assert list(BlockPlacement(slice(3, 0))) == [] - self.assertEqual(list(BlockPlacement(slice(3, 0, -1))), [3, 2, 1]) - self.assertEqual(list(BlockPlacement(slice(3, None, -1))), - [3, 2, 1, 0]) + if not PY361: + assert list(BlockPlacement(slice(3, 0, -1))) == [3, 2, 1] + assert list(BlockPlacement(slice(3, None, -1))) == [3, 2, 1, 0] def test_slice_to_array_conversion(self): def assert_as_array_equals(slc, asarray): @@ -1147,39 +1156,136 @@ def assert_as_array_equals(slc, asarray): assert_as_array_equals(slice(3, 0), []) assert_as_array_equals(slice(3, 0, -1), [3, 2, 1]) - assert_as_array_equals(slice(3, None, -1), [3, 2, 1, 0]) - assert_as_array_equals(slice(31, None, -10), [31, 21, 11, 1]) + + if not PY361: + assert_as_array_equals(slice(3, None, -1), [3, 2, 1, 0]) + assert_as_array_equals(slice(31, None, -10), [31, 21, 11, 1]) def test_blockplacement_add(self): bpl = BlockPlacement(slice(0, 5)) - self.assertEqual(bpl.add(1).as_slice, slice(1, 6, 1)) - self.assertEqual(bpl.add(np.arange(5)).as_slice, slice(0, 10, 2)) - self.assertEqual(list(bpl.add(np.arange(5, 0, -1))), [5, 5, 5, 5, 5]) + assert bpl.add(1).as_slice == slice(1, 6, 1) + assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2) + assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5] def test_blockplacement_add_int(self): def assert_add_equals(val, inc, result): - self.assertEqual(list(BlockPlacement(val).add(inc)), result) + assert list(BlockPlacement(val).add(inc)) == result assert_add_equals(slice(0, 0), 0, []) assert_add_equals(slice(1, 4), 0, [1, 2, 3]) assert_add_equals(slice(3, 0, -1), 0, [3, 2, 1]) - assert_add_equals(slice(2, None, -1), 0, [2, 1, 0]) assert_add_equals([1, 2, 4], 0, [1, 2, 4]) assert_add_equals(slice(0, 0), 10, []) assert_add_equals(slice(1, 4), 10, [11, 12, 13]) assert_add_equals(slice(3, 0, -1), 10, [13, 12, 11]) - assert_add_equals(slice(2, None, -1), 10, [12, 11, 10]) assert_add_equals([1, 2, 4], 10, [11, 12, 14]) assert_add_equals(slice(0, 0), -1, []) assert_add_equals(slice(1, 4), -1, [0, 1, 2]) - assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0]) assert_add_equals([1, 2, 4], -1, [0, 1, 3]) - self.assertRaises(ValueError, - lambda: BlockPlacement(slice(1, 4)).add(-10)) - self.assertRaises(ValueError, - lambda: BlockPlacement([1, 2, 4]).add(-10)) - self.assertRaises(ValueError, - lambda: BlockPlacement(slice(2, None, -1)).add(-1)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 4)).add(-10) + with pytest.raises(ValueError): + BlockPlacement([1, 2, 4]).add(-10) + + if not PY361: + assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0]) + assert_add_equals(slice(2, None, -1), 0, [2, 1, 0]) + assert_add_equals(slice(2, None, -1), 10, [12, 11, 10]) + + with pytest.raises(ValueError): + BlockPlacement(slice(2, None, -1)).add(-1) + + +class DummyElement(object): + def __init__(self, value, dtype): + self.value = value + self.dtype = np.dtype(dtype) + + def __array__(self): + return np.array(self.value, dtype=self.dtype) + + def __str__(self): + return "DummyElement({}, {})".format(self.value, self.dtype) + + def __repr__(self): + return str(self) + + def astype(self, dtype, copy=False): + self.dtype = dtype + return self + + def view(self, dtype): + return type(self)(self.value.view(dtype), dtype) + + def any(self, axis=None): + return bool(self.value) + + +class TestCanHoldElement(object): + @pytest.mark.parametrize('value, dtype', [ + (1, 'i8'), + (1.0, 'f8'), + (2**63, 'f8'), + (1j, 'complex128'), + (2**63, 'complex128'), + (True, 'bool'), + (np.timedelta64(20, 'ns'), ' - + @@ -4849,7 +4849,7 @@

Failed Bank List

'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + assert result == expected + + def test_to_html_truncate_multi_index(self): + pytest.skip("unreliable on travis") + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + df = DataFrame(index=arrays, columns=arrays) + fmt.set_option('display.max_rows', 7) + fmt.set_option('display.max_columns', 7) + result = df._repr_html_() + expected = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
barbaz...fooqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
...........................
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
+

8 rows × 8 columns

+'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + assert result == expected + + def test_to_html_truncate_multi_index_sparse_off(self): + pytest.skip("unreliable on travis") + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + df = DataFrame(index=arrays, columns=arrays) + fmt.set_option('display.max_rows', 7) + fmt.set_option('display.max_columns', 7) + fmt.set_option('display.multi_sparse', False) + result = df._repr_html_() + expected = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
barbarbaz...fooquxqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
bartwoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
quxtwoNaNNaNNaN...NaNNaNNaN
+

8 rows × 8 columns

+'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + assert result == expected + + def test_to_html_border(self): + df = DataFrame({'A': [1, 2]}) + result = df.to_html() + assert 'border="1"' in result + + def test_to_html_border_option(self): + df = DataFrame({'A': [1, 2]}) + with pd.option_context('display.html.border', 0): + result = df.to_html() + assert 'border="0"' in result + assert 'border="0"' in df._repr_html_() + + def test_to_html_border_zero(self): + df = DataFrame({'A': [1, 2]}) + result = df.to_html(border=0) + assert 'border="0"' in result + + @tm.capture_stdout + def test_display_option_warning(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.options.html.border + + def test_to_html(self): + # big mixed + biggie = DataFrame({'A': np.random.randn(200), + 'B': tm.makeStringIndex(200)}, + index=lrange(200)) + + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan + s = biggie.to_html() + + buf = StringIO() + retval = biggie.to_html(buf=buf) + assert retval is None + assert buf.getvalue() == s + + assert isinstance(s, compat.string_types) + + biggie.to_html(columns=['B', 'A'], col_space=17) + biggie.to_html(columns=['B', 'A'], + formatters={'A': lambda x: '{x:.1f}'.format(x=x)}) + + biggie.to_html(columns=['B', 'A'], float_format=str) + biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) + + frame = DataFrame(index=np.arange(200)) + frame.to_html() + + def test_to_html_filename(self): + biggie = DataFrame({'A': np.random.randn(200), + 'B': tm.makeStringIndex(200)}, + index=lrange(200)) + + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan + with tm.ensure_clean('test.html') as path: + biggie.to_html(path) + with open(path, 'r') as f: + s = biggie.to_html() + s2 = f.read() + assert s == s2 + + frame = DataFrame(index=np.arange(200)) + with tm.ensure_clean('test.html') as path: + frame.to_html(path) + with open(path, 'r') as f: + assert frame.to_html() == f.read() + + def test_to_html_with_no_bold(self): + x = DataFrame({'x': np.random.randn(5)}) + ashtml = x.to_html(bold_rows=False) + assert '")] + + def test_to_html_columns_arg(self): + frame = DataFrame(tm.getSeriesData()) + result = frame.to_html(columns=['A']) + assert 'B' not in result + + def test_to_html_multiindex(self): + columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), + np.mod(lrange(4), 2))), + names=['CL0', 'CL1']) + df = DataFrame([list('abcd'), list('efgh')], columns=columns) + result = df.to_html(justify='left') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
CL001
CL10101
0abcd
1efgh
') + + assert result == expected + + columns = MultiIndex.from_tuples(list(zip( + range(4), np.mod( + lrange(4), 2)))) + df = DataFrame([list('abcd'), list('efgh')], columns=columns) + + result = df.to_html(justify='right') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
0123
0101
0abcd
1efgh
') + + assert result == expected + + @pytest.mark.parametrize("justify", fmt._VALID_JUSTIFY_PARAMETERS) + def test_to_html_justify(self, justify): + df = DataFrame({'A': [6, 30000, 2], + 'B': [1, 2, 70000], + 'C': [223442, 0, 1]}, + columns=['A', 'B', 'C']) + result = df.to_html(justify=justify) + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
061223442
13000020
22700001
'.format(justify=justify)) + assert result == expected + + @pytest.mark.parametrize("justify", ["super-right", "small-left", + "noinherit", "tiny", "pandas"]) + def test_to_html_invalid_justify(self, justify): + # see gh-17527 + df = DataFrame() + msg = "Invalid value for justify parameter" + + with tm.assert_raises_regex(ValueError, msg): + df.to_html(justify=justify) + + def test_to_html_index(self): + index = ['foo', 'bar', 'baz'] + df = DataFrame({'A': [1, 2, 3], + 'B': [1.2, 3.4, 5.6], + 'C': ['one', 'two', np.nan]}, + columns=['A', 'B', 'C'], + index=index) + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
foo11.2one
bar23.4two
baz35.6NaN
') + assert df.to_html() == expected_with_index + + expected_without_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
11.2one
23.4two
35.6NaN
') + result = df.to_html(index=False) + for i in index: + assert i not in result + assert result == expected_without_index + df.index = Index(['foo', 'bar', 'baz'], name='idx') + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
idx
foo11.2one
bar23.4two
baz35.6NaN
') + assert df.to_html() == expected_with_index + assert df.to_html(index=False) == expected_without_index + + tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')] + df.index = MultiIndex.from_tuples(tuples) + + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
foocar11.2one
bike23.4two
barcar35.6NaN
') + assert df.to_html() == expected_with_index + + result = df.to_html(index=False) + for i in ['foo', 'bar', 'car', 'bike']: + assert i not in result + # must be the same result as normal index + assert result == expected_without_index + + df.index = MultiIndex.from_tuples(tuples, names=['idx1', 'idx2']) + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
idx1idx2
foocar11.2one
bike23.4two
barcar35.6NaN
') + assert df.to_html() == expected_with_index + assert df.to_html(index=False) == expected_without_index + + def test_to_html_with_classes(self): + df = DataFrame() + result = df.to_html(classes="sortable draggable") + expected = dedent(""" + + + + + + + + + +
+ + """).strip() + assert result == expected + + result = df.to_html(classes=["sortable", "draggable"]) + assert result == expected + + def test_to_html_no_index_max_rows(self): + # GH https://github.com/pandas-dev/pandas/issues/14998 + df = DataFrame({"A": [1, 2, 3, 4]}) + result = df.to_html(index=False, max_rows=1) + expected = dedent("""\ + + + + + + + + + + + +
A
1
""") + assert result == expected + + def test_to_html_notebook_has_style(self): + df = pd.DataFrame({"A": [1, 2, 3]}) + result = df.to_html(notebook=True) + assert "tbody tr th:only-of-type" in result + assert "vertical-align: middle;" in result + assert "thead th" in result + + def test_to_html_notebook_has_no_style(self): + df = pd.DataFrame({"A": [1, 2, 3]}) + result = df.to_html() + assert "tbody tr th:only-of-type" not in result + assert "vertical-align: middle;" not in result + assert "thead th" not in result + + def test_to_html_with_index_names_false(self): + # gh-16493 + df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(['a', 'b'], + name='myindexname')) + result = df.to_html(index_names=False) + assert 'myindexname' not in result + + def test_to_html_with_id(self): + # gh-8496 + df = pd.DataFrame({"A": [1, 2]}, index=pd.Index(['a', 'b'], + name='myindexname')) + result = df.to_html(index_names=False, table_id="TEST_ID") + assert ' id="TEST_ID"' in result diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py new file mode 100644 index 0000000000000..5ebf196be094e --- /dev/null +++ b/pandas/tests/io/formats/test_to_latex.py @@ -0,0 +1,623 @@ +from datetime import datetime + +import pytest + +import pandas as pd +from pandas import DataFrame, compat, Series +from pandas.util import testing as tm +from pandas.compat import u +import codecs + + +@pytest.fixture +def frame(): + return DataFrame(tm.getSeriesData()) + + +class TestToLatex(object): + + def test_to_latex_filename(self, frame): + with tm.ensure_clean('test.tex') as path: + frame.to_latex(path) + + with open(path, 'r') as f: + assert frame.to_latex() == f.read() + + # test with utf-8 and encoding option (GH 7061) + df = DataFrame([[u'au\xdfgangen']]) + with tm.ensure_clean('test.tex') as path: + df.to_latex(path, encoding='utf-8') + with codecs.open(path, 'r', encoding='utf-8') as f: + assert df.to_latex() == f.read() + + # test with utf-8 without encoding option + if compat.PY3: # python3: pandas default encoding is utf-8 + with tm.ensure_clean('test.tex') as path: + df.to_latex(path) + with codecs.open(path, 'r', encoding='utf-8') as f: + assert df.to_latex() == f.read() + else: + # python2 default encoding is ascii, so an error should be raised + with tm.ensure_clean('test.tex') as path: + with pytest.raises(UnicodeEncodeError): + df.to_latex(path) + + def test_to_latex(self, frame): + # it works! + frame.to_latex() + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex() + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule + a & b \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_format(self, frame): + # GH Bug #9402 + frame.to_latex(column_format='ccc') + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(column_format='ccc') + withindex_expected = r"""\begin{tabular}{ccc} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + def test_to_latex_empty(self): + df = DataFrame() + result = df.to_latex() + expected = r"""\begin{tabular}{l} +\toprule +Empty DataFrame +Columns: Index([], dtype='object') +Index: Index([], dtype='object') \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + result = df.to_latex(longtable=True) + expected = r"""\begin{longtable}{l} +\toprule +Empty DataFrame +Columns: Index([], dtype='object') +Index: Index([], dtype='object') \\ +\end{longtable} +""" + assert result == expected + + def test_to_latex_with_formatters(self): + df = DataFrame({'datetime64': [datetime(2016, 1, 1), + datetime(2016, 2, 5), + datetime(2016, 3, 3)], + 'float': [1.0, 2.0, 3.0], + 'int': [1, 2, 3], + 'object': [(1, 2), True, False], + }) + + formatters = {'datetime64': lambda x: x.strftime('%Y-%m'), + 'float': lambda x: '[{x: 4.1f}]'.format(x=x), + 'int': lambda x: '0x{x:x}'.format(x=x), + 'object': lambda x: '-{x!s}-'.format(x=x), + '__index__': lambda x: 'index: {x}'.format(x=x)} + result = df.to_latex(formatters=dict(formatters)) + + expected = r"""\begin{tabular}{llrrl} +\toprule +{} & datetime64 & float & int & object \\ +\midrule +index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ +index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ +index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + def test_to_latex_multiindex(self): + df = DataFrame({('x', 'y'): ['a']}) + result = df.to_latex() + expected = r"""\begin{tabular}{ll} +\toprule +{} & x \\ +{} & y \\ +\midrule +0 & a \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + result = df.T.to_latex() + expected = r"""\begin{tabular}{lll} +\toprule + & & 0 \\ +\midrule +x & y & a \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + df = DataFrame.from_dict({ + ('c1', 0): pd.Series({x: x for x in range(4)}), + ('c1', 1): pd.Series({x: x + 4 for x in range(4)}), + ('c2', 0): pd.Series({x: x for x in range(4)}), + ('c2', 1): pd.Series({x: x + 4 for x in range(4)}), + ('c3', 0): pd.Series({x: x for x in range(4)}), + }).T + result = df.to_latex() + expected = r"""\begin{tabular}{llrrrr} +\toprule + & & 0 & 1 & 2 & 3 \\ +\midrule +c1 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ +c2 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ +c3 & 0 & 0 & 1 & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + # GH 14184 + df = df.T + df.columns.names = ['a', 'b'] + result = df.to_latex() + expected = r"""\begin{tabular}{lrrrrr} +\toprule +a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ +b & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 4 & 0 & 4 & 0 \\ +1 & 1 & 5 & 1 & 5 & 1 \\ +2 & 2 & 6 & 2 & 6 & 2 \\ +3 & 3 & 7 & 3 & 7 & 3 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + # GH 10660 + df = pd.DataFrame({'a': [0, 0, 1, 1], + 'b': list('abab'), + 'c': [1, 2, 3, 4]}) + result = df.set_index(['a', 'b']).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & c \\ +a & b & \\ +\midrule +0 & a & 1 \\ + & b & 2 \\ +1 & a & 3 \\ + & b & 4 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + result = df.groupby('a').describe().to_latex() + expected = r"""\begin{tabular}{lrrrrrrrr} +\toprule +{} & \multicolumn{8}{l}{c} \\ +{} & count & mean & std & min & 25\% & 50\% & 75\% & max \\ +a & & & & & & & & \\ +\midrule +0 & 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 & 2.0 \\ +1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 & 3.5 & 3.75 & 4.0 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + def test_to_latex_multiindex_dupe_level(self): + # see gh-14484 + # + # If an index is repeated in subsequent rows, it should be + # replaced with a blank in the created table. This should + # ONLY happen if all higher order indices (to the left) are + # equal too. In this test, 'c' has to be printed both times + # because the higher order index 'A' != 'B'. + df = pd.DataFrame(index=pd.MultiIndex.from_tuples( + [('A', 'c'), ('B', 'c')]), columns=['col']) + result = df.to_latex() + expected = r"""\begin{tabular}{lll} +\toprule + & & col \\ +\midrule +A & c & NaN \\ +B & c & NaN \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + def test_to_latex_multicolumnrow(self): + df = pd.DataFrame({ + ('c1', 0): {x: x for x in range(5)}, + ('c1', 1): {x: x + 5 for x in range(5)}, + ('c2', 0): {x: x for x in range(5)}, + ('c2', 1): {x: x + 5 for x in range(5)}, + ('c3', 0): {x: x for x in range(5)} + }) + result = df.to_latex() + expected = r"""\begin{tabular}{lrrrrr} +\toprule +{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ +{} & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 5 & 0 & 5 & 0 \\ +1 & 1 & 6 & 1 & 6 & 1 \\ +2 & 2 & 7 & 2 & 7 & 2 \\ +3 & 3 & 8 & 3 & 8 & 3 \\ +4 & 4 & 9 & 4 & 9 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + result = df.to_latex(multicolumn=False) + expected = r"""\begin{tabular}{lrrrrr} +\toprule +{} & c1 & & c2 & & c3 \\ +{} & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 5 & 0 & 5 & 0 \\ +1 & 1 & 6 & 1 & 6 & 1 \\ +2 & 2 & 7 & 2 & 7 & 2 \\ +3 & 3 & 8 & 3 & 8 & 3 \\ +4 & 4 & 9 & 4 & 9 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + result = df.T.to_latex(multirow=True) + expected = r"""\begin{tabular}{llrrrrr} +\toprule + & & 0 & 1 & 2 & 3 & 4 \\ +\midrule +\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + df.index = df.T.index + result = df.T.to_latex(multirow=True, multicolumn=True, + multicolumn_format='c') + expected = r"""\begin{tabular}{llrrrrr} +\toprule + & & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\ + & & 0 & 1 & 0 & 1 & 0 \\ +\midrule +\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + def test_to_latex_escape(self): + a = 'a' + b = 'b' + + test_dict = {u('co$e^x$'): {a: "a", + b: "b"}, + u('co^l1'): {a: "a", + b: "b"}} + + unescaped_result = DataFrame(test_dict).to_latex(escape=False) + escaped_result = DataFrame(test_dict).to_latex( + ) # default: escape=True + + unescaped_expected = r'''\begin{tabular}{lll} +\toprule +{} & co$e^x$ & co^l1 \\ +\midrule +a & a & a \\ +b & b & b \\ +\bottomrule +\end{tabular} +''' + + escaped_expected = r'''\begin{tabular}{lll} +\toprule +{} & co\$e\textasciicircumx\$ & co\textasciicircuml1 \\ +\midrule +a & a & a \\ +b & b & b \\ +\bottomrule +\end{tabular} +''' + + assert unescaped_result == unescaped_expected + assert escaped_result == escaped_expected + + def test_to_latex_longtable(self, frame): + frame.to_latex(longtable=True) + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(longtable=True) + withindex_expected = r"""\begin{longtable}{lrl} +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False, longtable=True) + withoutindex_expected = r"""\begin{longtable}{rl} +\toprule + a & b \\ +\midrule +\endhead +\midrule +\multicolumn{2}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot + 1 & b1 \\ + 2 & b2 \\ +\end{longtable} +""" + + assert withoutindex_result == withoutindex_expected + + df = DataFrame({'a': [1, 2]}) + with1column_result = df.to_latex(index=False, longtable=True) + assert r"\multicolumn{1}" in with1column_result + + df = DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) + with3columns_result = df.to_latex(index=False, longtable=True) + assert r"\multicolumn{3}" in with3columns_result + + def test_to_latex_escape_special_chars(self): + special_characters = ['&', '%', '$', '#', '_', '{', '}', '~', '^', + '\\'] + df = DataFrame(data=special_characters) + observed = df.to_latex() + expected = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & \& \\ +1 & \% \\ +2 & \$ \\ +3 & \# \\ +4 & \_ \\ +5 & \{ \\ +6 & \} \\ +7 & \textasciitilde \\ +8 & \textasciicircum \\ +9 & \textbackslash \\ +\bottomrule +\end{tabular} +""" + + assert observed == expected + + def test_to_latex_no_header(self): + # GH 7124 + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(header=False) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False, header=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_specified_header(self): + # GH 7124 + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(header=['AA', 'BB']) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & AA & BB \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule +AA & BB \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + withoutescape_result = df.to_latex(header=['$A$', '$B$'], escape=False) + withoutescape_expected = r"""\begin{tabular}{lrl} +\toprule +{} & $A$ & $B$ \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutescape_result == withoutescape_expected + + with pytest.raises(ValueError): + df.to_latex(header=['A']) + + def test_to_latex_decimal(self, frame): + # GH 12031 + frame.to_latex() + + df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(decimal=',') + + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1,0 & b1 \\ +1 & 2,1 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + def test_to_latex_series(self): + s = Series(['a', 'b', 'c']) + withindex_result = s.to_latex() + withindex_expected = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & a \\ +1 & b \\ +2 & c \\ +\bottomrule +\end{tabular} +""" + assert withindex_result == withindex_expected + + def test_to_latex_bold_rows(self): + # GH 16707 + df = pd.DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + observed = df.to_latex(bold_rows=True) + expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +\textbf{0} & 1 & b1 \\ +\textbf{1} & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_no_bold_rows(self): + # GH 16707 + df = pd.DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + observed = df.to_latex(bold_rows=False) + expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + @pytest.mark.parametrize('name0', [None, 'named0']) + @pytest.mark.parametrize('name1', [None, 'named1']) + @pytest.mark.parametrize('axes', [[0], [1], [0, 1]]) + def test_to_latex_multiindex_names(self, name0, name1, axes): + # GH 18667 + names = [name0, name1] + mi = pd.MultiIndex.from_product([[1, 2], [3, 4]]) + df = pd.DataFrame(-1, index=mi.copy(), columns=mi.copy()) + for idx in axes: + df.axes[idx].names = names + + idx_names = tuple(n or '{}' for n in names) + idx_names_row = ('%s & %s & & & & \\\\\n' % idx_names + if (0 in axes and any(names)) else '') + placeholder = '{}' if any(names) and 1 in axes else ' ' + col_names = [n if (bool(n) and 1 in axes) else placeholder + for n in names] + observed = df.to_latex() + expected = r"""\begin{tabular}{llrrrr} +\toprule + & %s & \multicolumn{2}{l}{1} & \multicolumn{2}{l}{2} \\ + & %s & 3 & 4 & 3 & 4 \\ +%s\midrule +1 & 3 & -1 & -1 & -1 & -1 \\ + & 4 & -1 & -1 & -1 & -1 \\ +2 & 3 & -1 & -1 & -1 & -1 \\ + & 4 & -1 & -1 & -1 & -1 \\ +\bottomrule +\end{tabular} +""" % tuple(list(col_names) + [idx_names_row]) + assert observed == expected diff --git a/pandas/io/tests/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py old mode 100644 new mode 100755 similarity index 64% rename from pandas/io/tests/generate_legacy_storage_files.py rename to pandas/tests/io/generate_legacy_storage_files.py index d0365cb2c30b3..9f1ac8b1e677b --- a/pandas/io/tests/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -1,18 +1,62 @@ -""" self-contained to write legacy storage (pickle/msgpack) files """ +#!/usr/env/bin python + +""" +self-contained to write legacy storage (pickle/msgpack) files + +To use this script. Create an environment where you want +generate pickles, say its for 0.18.1, with your pandas clone +in ~/pandas + +. activate pandas_0.18.1 +cd ~/ + +$ python pandas/pandas/tests/io/generate_legacy_storage_files.py \ + pandas/pandas/tests/io/data/legacy_pickle/0.18.1/ pickle + +This script generates a storage file for the current arch, system, +and python version + pandas version: 0.18.1 + output dir : pandas/pandas/tests/io/data/legacy_pickle/0.18.1/ + storage format: pickle +created pickle file: 0.18.1_x86_64_darwin_3.5.2.pickle + +The idea here is you are using the *current* version of the +generate_legacy_storage_files with an *older* version of pandas to +generate a pickle file. We will then check this file into a current +branch, and test using test_pickle.py. This will load the *older* +pickles and test versus the current data that is generated +(with master). These are then compared. + +If we have cases where we changed the signature (e.g. we renamed +offset -> freq in Timestamp). Then we have to conditionally execute +in the generate_legacy_storage_files.py to make it +run under the older AND the newer version. + +""" + from __future__ import print_function +from warnings import catch_warnings from distutils.version import LooseVersion from pandas import (Series, DataFrame, Panel, SparseSeries, SparseDataFrame, Index, MultiIndex, bdate_range, to_msgpack, - date_range, period_range, + date_range, period_range, timedelta_range, Timestamp, NaT, Categorical, Period) +from pandas.tseries.offsets import ( + DateOffset, Hour, Minute, Day, + MonthBegin, MonthEnd, YearBegin, + YearEnd, Week, WeekOfMonth, LastWeekOfMonth, + BusinessDay, BusinessHour, CustomBusinessDay, FY5253, + Easter, + SemiMonthEnd, SemiMonthBegin, + QuarterBegin, QuarterEnd) from pandas.compat import u import os import sys import numpy as np import pandas import platform as pl - +from datetime import timedelta _loose_version = LooseVersion(pandas.__version__) @@ -72,7 +116,18 @@ def create_data(): index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), - period=period_range('2013-01-01', freq='M', periods=10)) + period=period_range('2013-01-01', freq='M', periods=10), + float=Index(np.arange(10, dtype=np.float64)), + uint=Index(np.arange(10, dtype=np.uint64)), + timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) + + if _loose_version >= LooseVersion('0.18'): + from pandas import RangeIndex + index['range'] = RangeIndex(10) + + if _loose_version >= LooseVersion('0.21'): + from pandas import interval_range + index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', @@ -124,17 +179,23 @@ def create_data(): mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), - u'B': Timestamp('20130603', tz='CET')}, index=range(5)) + u'B': Timestamp('20130603', tz='CET')}, index=range(5)), + dt_mixed2_tzs=DataFrame({ + u'A': Timestamp('20130102', tz='US/Eastern'), + u'B': Timestamp('20130603', tz='CET'), + u'C': Timestamp('20130603', tz='UTC')}, index=range(5)) ) - mixed_dup_panel = Panel({u'ItemA': frame[u'float'], - u'ItemB': frame[u'int']}) - mixed_dup_panel.items = [u'ItemA', u'ItemA'] - panel = dict(float=Panel({u'ItemA': frame[u'float'], - u'ItemB': frame[u'float'] + 1}), - dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), - items=[u'A', u'B', u'A']), - mixed_dup=mixed_dup_panel) + with catch_warnings(record=True): + mixed_dup_panel = Panel({u'ItemA': frame[u'float'], + u'ItemB': frame[u'int']}) + mixed_dup_panel.items = [u'ItemA', u'ItemA'] + panel = dict(float=Panel({u'ItemA': frame[u'float'], + u'ItemB': frame[u'float'] + 1}), + dup=Panel( + np.arange(30).reshape(3, 5, 2).astype(np.float64), + items=[u'A', u'B', u'A']), + mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), @@ -142,10 +203,39 @@ def create_data(): timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, - tz=Timestamp('2011-01-01', tz='US/Eastern'), - freq=Timestamp('2011-01-01', freq='D'), - both=Timestamp('2011-01-01', tz='Asia/Tokyo', - freq='M')) + tz=Timestamp('2011-01-01', tz='US/Eastern')) + + if _loose_version < LooseVersion('0.19.2'): + timestamp['freq'] = Timestamp('2011-01-01', offset='D') + timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', + offset='M') + else: + timestamp['freq'] = Timestamp('2011-01-01', freq='D') + timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', + freq='M') + + off = {'DateOffset': DateOffset(years=1), + 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), + 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), + 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), + 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), + 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), + 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), + 'MonthBegin': MonthBegin(1), + 'MonthEnd': MonthEnd(1), + 'QuarterBegin': QuarterBegin(1), + 'QuarterEnd': QuarterEnd(1), + 'Day': Day(1), + 'YearBegin': YearBegin(1), + 'YearEnd': YearEnd(1), + 'Week': Week(1), + 'Week_Tues': Week(2, normalize=False, weekday=1), + 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), + 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), + 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), + 'Easter': Easter(), + 'Hour': Hour(1), + 'Minute': Minute(1)} return dict(series=series, frame=frame, @@ -157,7 +247,8 @@ def create_data(): ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, - timestamp=timestamp) + timestamp=timestamp, + offsets=off) def create_pickle_data(): @@ -165,10 +256,10 @@ def create_pickle_data(): # Pre-0.14.1 versions generated non-unpicklable mixed-type frames and # panels if their columns/items were non-unique. - if _loose_version < '0.14.1': + if _loose_version < LooseVersion('0.14.1'): del data['frame']['mixed_dup'] del data['panel']['mixed_dup'] - if _loose_version < '0.17.0': + if _loose_version < LooseVersion('0.17.0'): del data['series']['period'] del data['scalars']['period'] return data @@ -180,12 +271,12 @@ def _u(x): def create_msgpack_data(): data = create_data() - if _loose_version < '0.17.0': + if _loose_version < LooseVersion('0.17.0'): del data['frame']['mixed_dup'] del data['panel']['mixed_dup'] del data['frame']['dup'] del data['panel']['dup'] - if _loose_version < '0.18.0': + if _loose_version < LooseVersion('0.18.0'): del data['series']['dt_tz'] del data['frame']['dt_mixed_tzs'] # Not supported @@ -196,6 +287,9 @@ def create_msgpack_data(): del data['frame']['cat_onecol'] del data['frame']['cat_and_float'] del data['scalars']['period'] + if _loose_version < LooseVersion('0.23.0'): + del data['index']['interval'] + del data['offsets'] return _u(data) diff --git a/pandas/tests/io/json/__init__.py b/pandas/tests/io/json/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/io/tests/json/data/tsframe_iso_v012.json b/pandas/tests/io/json/data/tsframe_iso_v012.json similarity index 100% rename from pandas/io/tests/json/data/tsframe_iso_v012.json rename to pandas/tests/io/json/data/tsframe_iso_v012.json diff --git a/pandas/io/tests/json/data/tsframe_v012.json b/pandas/tests/io/json/data/tsframe_v012.json similarity index 100% rename from pandas/io/tests/json/data/tsframe_v012.json rename to pandas/tests/io/json/data/tsframe_v012.json diff --git a/pandas/tests/io/json/data/tsframe_v012.json.zip b/pandas/tests/io/json/data/tsframe_v012.json.zip new file mode 100644 index 0000000000000000000000000000000000000000..100ba0c87b2ba55c169081bb0ed60c5db7391bbb GIT binary patch literal 436 zcmWIWW@Zs#-~d8>PgidSBp}Ejz)(`0R+N~V8ee8$Xrz}_oSzpO!Nb60eJyg=i>r~} z7)2P4PTcFqY$(uj|LLnEw<6!?Th+y}ylfKDbYKphQr@pG)b!*{7t{95#=p{PX2~tP zo9VSN!2DO`Wj2tkn(477rQ0RX7Wsm1^R literal 0 HcmV?d00001 diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py new file mode 100644 index 0000000000000..08335293f9292 --- /dev/null +++ b/pandas/tests/io/json/test_compression.py @@ -0,0 +1,100 @@ +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal, assert_raises_regex + + +def test_compression_roundtrip(compression_no_zip): + df = pd.DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression_no_zip) + assert_frame_equal(df, pd.read_json(path, + compression=compression_no_zip)) + + # explicitly ensure file was compressed. + with tm.decompress_file(path, compression_no_zip) as fh: + result = fh.read().decode('utf8') + assert_frame_equal(df, pd.read_json(result)) + + +def test_compress_zip_value_error(): + df = pd.DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with tm.ensure_clean() as path: + import zipfile + pytest.raises(zipfile.BadZipfile, df.to_json, path, compression="zip") + + +def test_read_zipped_json(): + uncompressed_path = tm.get_data_path("tsframe_v012.json") + uncompressed_df = pd.read_json(uncompressed_path) + + compressed_path = tm.get_data_path("tsframe_v012.json.zip") + compressed_df = pd.read_json(compressed_path, compression='zip') + + assert_frame_equal(uncompressed_df, compressed_df) + + +def test_with_s3_url(compression_no_zip): + boto3 = pytest.importorskip('boto3') + pytest.importorskip('s3fs') + moto = pytest.importorskip('moto') + + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with moto.mock_s3(): + conn = boto3.resource("s3", region_name="us-east-1") + bucket = conn.create_bucket(Bucket="pandas-test") + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression_no_zip) + with open(path, 'rb') as f: + bucket.put_object(Key='test-1', Body=f) + + roundtripped_df = pd.read_json('s3://pandas-test/test-1', + compression=compression_no_zip) + assert_frame_equal(df, roundtripped_df) + + +def test_lines_with_compression(compression_no_zip): + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df.to_json(path, orient='records', lines=True, + compression=compression_no_zip) + roundtripped_df = pd.read_json(path, lines=True, + compression=compression_no_zip) + assert_frame_equal(df, roundtripped_df) + + +def test_chunksize_with_compression(compression_no_zip): + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') + df.to_json(path, orient='records', lines=True, + compression=compression_no_zip) + + res = pd.read_json(path, lines=True, chunksize=1, + compression=compression_no_zip) + roundtripped_df = pd.concat(res) + assert_frame_equal(df, roundtripped_df) + + +def test_write_unsupported_compression_type(): + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + assert_raises_regex(ValueError, msg, df.to_json, + path, compression="unsupported") + + +def test_read_unsupported_compression_type(): + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + assert_raises_regex(ValueError, msg, pd.read_json, + path, compression="unsupported") diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py new file mode 100644 index 0000000000000..49b39c17238ae --- /dev/null +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -0,0 +1,562 @@ +"""Tests for Table Schema integration.""" +import json +from collections import OrderedDict + +import numpy as np +import pandas as pd +import pytest + +from pandas import DataFrame +from pandas.core.dtypes.dtypes import ( + PeriodDtype, CategoricalDtype, DatetimeTZDtype) +from pandas.io.json.table_schema import ( + as_json_table_type, + build_table_schema, + convert_pandas_type_to_json_field, + convert_json_field_to_pandas_type, + set_default_names) +import pandas.util.testing as tm + + +class TestBuildSchema(object): + + def setup_method(self, method): + self.df = DataFrame( + {'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=4), + 'D': pd.timedelta_range('1H', periods=4, freq='T'), + }, + index=pd.Index(range(4), name='idx')) + + def test_build_table_schema(self): + result = build_table_schema(self.df, version=False) + expected = { + 'fields': [{'name': 'idx', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}, + {'name': 'D', 'type': 'duration'}, + ], + 'primaryKey': ['idx'] + } + assert result == expected + result = build_table_schema(self.df) + assert "pandas_version" in result + + def test_series(self): + s = pd.Series([1, 2, 3], name='foo') + result = build_table_schema(s, version=False) + expected = {'fields': [{'name': 'index', 'type': 'integer'}, + {'name': 'foo', 'type': 'integer'}], + 'primaryKey': ['index']} + assert result == expected + result = build_table_schema(s) + assert 'pandas_version' in result + + def test_series_unnamed(self): + result = build_table_schema(pd.Series([1, 2, 3]), version=False) + expected = {'fields': [{'name': 'index', 'type': 'integer'}, + {'name': 'values', 'type': 'integer'}], + 'primaryKey': ['index']} + assert result == expected + + def test_multiindex(self): + df = self.df.copy() + idx = pd.MultiIndex.from_product([('a', 'b'), (1, 2)]) + df.index = idx + + result = build_table_schema(df, version=False) + expected = { + 'fields': [{'name': 'level_0', 'type': 'string'}, + {'name': 'level_1', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}, + {'name': 'D', 'type': 'duration'}, + ], + 'primaryKey': ['level_0', 'level_1'] + } + assert result == expected + + df.index.names = ['idx0', None] + expected['fields'][0]['name'] = 'idx0' + expected['primaryKey'] = ['idx0', 'level_1'] + result = build_table_schema(df, version=False) + assert result == expected + + +class TestTableSchemaType(object): + + @pytest.mark.parametrize('int_type', [ + np.int, np.int16, np.int32, np.int64]) + def test_as_json_table_type_int_data(self, int_type): + int_data = [1, 2, 3] + assert as_json_table_type(np.array( + int_data, dtype=int_type)) == 'integer' + + @pytest.mark.parametrize('float_type', [ + np.float, np.float16, np.float32, np.float64]) + def test_as_json_table_type_float_data(self, float_type): + float_data = [1., 2., 3.] + assert as_json_table_type(np.array( + float_data, dtype=float_type)) == 'number' + + @pytest.mark.parametrize('bool_type', [bool, np.bool]) + def test_as_json_table_type_bool_data(self, bool_type): + bool_data = [True, False] + assert as_json_table_type(np.array( + bool_data, dtype=bool_type)) == 'boolean' + + @pytest.mark.parametrize('date_data', [ + pd.to_datetime(['2016']), + pd.to_datetime(['2016'], utc=True), + pd.Series(pd.to_datetime(['2016'])), + pd.Series(pd.to_datetime(['2016'], utc=True)), + pd.period_range('2016', freq='A', periods=3) + ]) + def test_as_json_table_type_date_data(self, date_data): + assert as_json_table_type(date_data) == 'datetime' + + @pytest.mark.parametrize('str_data', [ + pd.Series(['a', 'b']), pd.Index(['a', 'b'])]) + def test_as_json_table_type_string_data(self, str_data): + assert as_json_table_type(str_data) == 'string' + + @pytest.mark.parametrize('cat_data', [ + pd.Categorical(['a']), + pd.Categorical([1]), + pd.Series(pd.Categorical([1])), + pd.CategoricalIndex([1]), + pd.Categorical([1])]) + def test_as_json_table_type_categorical_data(self, cat_data): + assert as_json_table_type(cat_data) == 'any' + + # ------ + # dtypes + # ------ + @pytest.mark.parametrize('int_dtype', [ + np.int, np.int16, np.int32, np.int64]) + def test_as_json_table_type_int_dtypes(self, int_dtype): + assert as_json_table_type(int_dtype) == 'integer' + + @pytest.mark.parametrize('float_dtype', [ + np.float, np.float16, np.float32, np.float64]) + def test_as_json_table_type_float_dtypes(self, float_dtype): + assert as_json_table_type(float_dtype) == 'number' + + @pytest.mark.parametrize('bool_dtype', [bool, np.bool]) + def test_as_json_table_type_bool_dtypes(self, bool_dtype): + assert as_json_table_type(bool_dtype) == 'boolean' + + @pytest.mark.parametrize('date_dtype', [ + np.datetime64, np.dtype("=1" + + with tm.assert_raises_regex(ValueError, msg): + pd.read_json(StringIO(lines_json_df), lines=True, + chunksize=chunksize) + + +@pytest.mark.parametrize("chunksize", [None, 1, 2]) +def test_readjson_chunks_multiple_empty_lines(chunksize): + j = """ + + {"A":1,"B":4} + + + + {"A":2,"B":5} + + + + + + + + {"A":3,"B":6} + """ + orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + test = pd.read_json(j, lines=True, chunksize=chunksize) + if chunksize is not None: + test = pd.concat(test) + tm.assert_frame_equal( + orig, test, obj="chunksize: {chunksize}".format(chunksize=chunksize)) diff --git a/pandas/io/tests/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py similarity index 72% rename from pandas/io/tests/json/test_ujson.py rename to pandas/tests/io/json/test_ujson.py index 6a986710ae444..e949772981eb7 100644 --- a/pandas/io/tests/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -1,23 +1,21 @@ # -*- coding: utf-8 -*- -from unittest import TestCase - try: import json except ImportError: import simplejson as json import math +import pytz import pytest -import platform -import sys import time import datetime import calendar import re import decimal +import dateutil from functools import partial from pandas.compat import range, zip, StringIO, u -import pandas.json as ujson +import pandas._libs.json as ujson import pandas.compat as compat import numpy as np @@ -25,23 +23,61 @@ import pandas.util.testing as tm -def _skip_if_python_ver(skip_major, skip_minor=None): - major, minor = sys.version_info[:2] - if major == skip_major and (skip_minor is None or minor == skip_minor): - pytest.skip("skipping Python version %d.%d" % (major, minor)) - - json_unicode = (json.dumps if compat.PY3 else partial(json.dumps, encoding="utf-8")) -class UltraJSONTests(TestCase): +class TestUltraJSONTests(object): + @pytest.mark.skipif(compat.is_platform_32bit(), + reason="not compliant on 32-bit, xref #15865") def test_encodeDecimal(self): sut = decimal.Decimal("1337.1337") encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) - self.assertEqual(decoded, 1337.1337) + assert decoded == 1337.1337 + + sut = decimal.Decimal("0.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "1.0" + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.94") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "0.9" + decoded = ujson.decode(encoded) + assert decoded == 0.9 + + sut = decimal.Decimal("1.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "2.0" + decoded = ujson.decode(encoded) + assert decoded == 2.0 + + sut = decimal.Decimal("-1.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "-2.0" + decoded = ujson.decode(encoded) + assert decoded == -2.0 + + sut = decimal.Decimal("0.995") + encoded = ujson.encode(sut, double_precision=2) + assert encoded == "1.0" + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.9995") + encoded = ujson.encode(sut, double_precision=3) + assert encoded == "1.0" + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.99999999999999944") + encoded = ujson.encode(sut, double_precision=15) + assert encoded == "1.0" + decoded = ujson.decode(encoded) + assert decoded == 1.0 def test_encodeStringConversion(self): input = "A string \\ / \b \f \n \r \t &" @@ -52,9 +88,9 @@ def test_encodeStringConversion(self): def helper(expected_output, **encode_kwargs): output = ujson.encode(input, **encode_kwargs) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, expected_output) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == expected_output + assert input == ujson.decode(output) # Default behavior assumes encode_html_chars=False. helper(not_html_encoded, ensure_ascii=True) @@ -72,19 +108,19 @@ def test_doubleLongIssue(self): sut = {u('a'): -4342969734183514} encoded = json.dumps(sut) decoded = json.loads(encoded) - self.assertEqual(sut, decoded) + assert sut == decoded encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) - self.assertEqual(sut, decoded) + assert sut == decoded def test_doubleLongDecimalIssue(self): sut = {u('a'): -12345678901234.56789012} encoded = json.dumps(sut) decoded = json.loads(encoded) - self.assertEqual(sut, decoded) + assert sut == decoded encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) - self.assertEqual(sut, decoded) + assert sut == decoded def test_encodeNonCLocale(self): import locale @@ -96,8 +132,8 @@ def test_encodeNonCLocale(self): locale.setlocale(locale.LC_NUMERIC, 'Italian_Italy') except: pytest.skip('Could not set locale for testing') - self.assertEqual(ujson.loads(ujson.dumps(4.78e60)), 4.78e60) - self.assertEqual(ujson.loads('4.78', precise_float=True), 4.78) + assert ujson.loads(ujson.dumps(4.78e60)) == 4.78e60 + assert ujson.loads('4.78', precise_float=True) == 4.78 locale.setlocale(locale.LC_NUMERIC, savedlocale) def test_encodeDecodeLongDecimal(self): @@ -109,20 +145,19 @@ def test_decimalDecodeTestPrecise(self): sut = {u('a'): 4.56} encoded = ujson.encode(sut) decoded = ujson.decode(encoded, precise_float=True) - self.assertEqual(sut, decoded) + assert sut == decoded + @pytest.mark.skipif(compat.is_platform_windows() and not compat.PY3, + reason="buggy on win-64 for py2") def test_encodeDoubleTinyExponential(self): - if compat.is_platform_windows() and not compat.PY3: - pytest.skip("buggy on win-64 for py2") - num = 1e-40 - self.assertEqual(num, ujson.decode(ujson.encode(num))) + assert num == ujson.decode(ujson.encode(num)) num = 1e-100 - self.assertEqual(num, ujson.decode(ujson.encode(num))) + assert num == ujson.decode(ujson.encode(num)) num = -1e-45 - self.assertEqual(num, ujson.decode(ujson.encode(num))) + assert num == ujson.decode(ujson.encode(num)) num = -1e-145 - self.assertTrue(np.allclose(num, ujson.decode(ujson.encode(num)))) + assert np.allclose(num, ujson.decode(ujson.encode(num))) def test_encodeDictWithUnicodeKeys(self): input = {u("key1"): u("value1"), u("key1"): @@ -140,27 +175,27 @@ def test_encodeDictWithUnicodeKeys(self): def test_encodeDoubleConversion(self): input = math.pi output = ujson.encode(input) - self.assertEqual(round(input, 5), round(json.loads(output), 5)) - self.assertEqual(round(input, 5), round(ujson.decode(output), 5)) + assert round(input, 5) == round(json.loads(output), 5) + assert round(input, 5) == round(ujson.decode(output), 5) def test_encodeWithDecimal(self): input = 1.0 output = ujson.encode(input) - self.assertEqual(output, "1.0") + assert output == "1.0" def test_encodeDoubleNegConversion(self): input = -math.pi output = ujson.encode(input) - self.assertEqual(round(input, 5), round(json.loads(output), 5)) - self.assertEqual(round(input, 5), round(ujson.decode(output), 5)) + assert round(input, 5) == round(json.loads(output), 5) + assert round(input, 5) == round(ujson.decode(output), 5) def test_encodeArrayOfNestedArrays(self): input = [[[[]]]] * 20 output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - # self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + # assert output == json.dumps(input) + assert input == ujson.decode(output) input = np.array(input) tm.assert_numpy_array_equal(input, ujson.decode( output, numpy=True, dtype=input.dtype)) @@ -168,44 +203,44 @@ def test_encodeArrayOfNestedArrays(self): def test_encodeArrayOfDoubles(self): input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10 output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - # self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + # assert output == json.dumps(input) + assert input == ujson.decode(output) tm.assert_numpy_array_equal( np.array(input), ujson.decode(output, numpy=True)) def test_doublePrecisionTest(self): input = 30.012345678901234 output = ujson.encode(input, double_precision=15) - self.assertEqual(input, json.loads(output)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert input == ujson.decode(output) output = ujson.encode(input, double_precision=9) - self.assertEqual(round(input, 9), json.loads(output)) - self.assertEqual(round(input, 9), ujson.decode(output)) + assert round(input, 9) == json.loads(output) + assert round(input, 9) == ujson.decode(output) output = ujson.encode(input, double_precision=3) - self.assertEqual(round(input, 3), json.loads(output)) - self.assertEqual(round(input, 3), ujson.decode(output)) + assert round(input, 3) == json.loads(output) + assert round(input, 3) == ujson.decode(output) def test_invalidDoublePrecision(self): input = 30.12345678901234567890 - self.assertRaises(ValueError, ujson.encode, input, double_precision=20) - self.assertRaises(ValueError, ujson.encode, input, double_precision=-1) + pytest.raises(ValueError, ujson.encode, input, double_precision=20) + pytest.raises(ValueError, ujson.encode, input, double_precision=-1) # will throw typeError - self.assertRaises(TypeError, ujson.encode, input, double_precision='9') + pytest.raises(TypeError, ujson.encode, input, double_precision='9') # will throw typeError - self.assertRaises(TypeError, ujson.encode, - input, double_precision=None) + pytest.raises(TypeError, ujson.encode, + input, double_precision=None) def test_encodeStringConversion2(self): input = "A string \\ / \b \f \n \r \t" output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, '"A string \\\\ \\/ \\b \\f \\n \\r \\t"') - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"' + assert input == ujson.decode(output) pass def test_decodeUnicodeConversion(self): @@ -215,61 +250,55 @@ def test_encodeUnicodeConversion1(self): input = "Räksmörgås اسامة بن محمد بن عوض بن لادن" enc = ujson.encode(input) dec = ujson.decode(enc) - self.assertEqual(enc, json_unicode(input)) - self.assertEqual(dec, json.loads(enc)) + assert enc == json_unicode(input) + assert dec == json.loads(enc) def test_encodeControlEscaping(self): input = "\x19" enc = ujson.encode(input) dec = ujson.decode(enc) - self.assertEqual(input, dec) - self.assertEqual(enc, json_unicode(input)) + assert input == dec + assert enc == json_unicode(input) def test_encodeUnicodeConversion2(self): input = "\xe6\x97\xa5\xd1\x88" enc = ujson.encode(input) dec = ujson.decode(enc) - self.assertEqual(enc, json_unicode(input)) - self.assertEqual(dec, json.loads(enc)) + assert enc == json_unicode(input) + assert dec == json.loads(enc) def test_encodeUnicodeSurrogatePair(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf0\x90\x8d\x86" enc = ujson.encode(input) dec = ujson.decode(enc) - self.assertEqual(enc, json_unicode(input)) - self.assertEqual(dec, json.loads(enc)) + assert enc == json_unicode(input) + assert dec == json.loads(enc) def test_encodeUnicode4BytesUTF8(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf0\x91\x80\xb0TRAILINGNORMAL" enc = ujson.encode(input) dec = ujson.decode(enc) - self.assertEqual(enc, json_unicode(input)) - self.assertEqual(dec, json.loads(enc)) + assert enc == json_unicode(input) + assert dec == json.loads(enc) def test_encodeUnicode4BytesUTF8Highest(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL" enc = ujson.encode(input) dec = ujson.decode(enc) - self.assertEqual(enc, json_unicode(input)) - self.assertEqual(dec, json.loads(enc)) + assert enc == json_unicode(input) + assert dec == json.loads(enc) def test_encodeArrayInArray(self): input = [[[[]]]] output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) tm.assert_numpy_array_equal( np.array(input), ujson.decode(output, numpy=True)) pass @@ -277,32 +306,32 @@ def test_encodeArrayInArray(self): def test_encodeIntConversion(self): input = 31337 output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) pass def test_encodeIntNegConversion(self): input = -31337 output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) pass def test_encodeLongNegConversion(self): input = -9223372036854775808 output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) def test_encodeListConversion(self): input = [1, 2, 3, 4] output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert input == ujson.decode(output) tm.assert_numpy_array_equal( np.array(input), ujson.decode(output, numpy=True)) pass @@ -310,41 +339,41 @@ def test_encodeListConversion(self): def test_encodeDictConversion(self): input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4} output = ujson.encode(input) # noqa - self.assertEqual(input, json.loads(output)) - self.assertEqual(input, ujson.decode(output)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert input == ujson.decode(output) + assert input == ujson.decode(output) pass def test_encodeNoneConversion(self): input = None output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) pass def test_encodeTrueConversion(self): input = True output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) pass def test_encodeFalseConversion(self): input = False output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) def test_encodeDatetimeConversion(self): ts = time.time() input = datetime.datetime.fromtimestamp(ts) output = ujson.encode(input, date_unit='s') expected = calendar.timegm(input.utctimetuple()) - self.assertEqual(int(expected), json.loads(output)) - self.assertEqual(int(expected), ujson.decode(output)) + assert int(expected) == json.loads(output) + assert int(expected) == ujson.decode(output) def test_encodeDateConversion(self): ts = time.time() @@ -354,8 +383,8 @@ def test_encodeDateConversion(self): tup = (input.year, input.month, input.day, 0, 0, 0) expected = calendar.timegm(tup) - self.assertEqual(int(expected), json.loads(output)) - self.assertEqual(int(expected), ujson.decode(output)) + assert int(expected) == json.loads(output) + assert int(expected) == ujson.decode(output) def test_encodeTimeConversion(self): tests = [ @@ -365,26 +394,22 @@ def test_encodeTimeConversion(self): ] for test in tests: output = ujson.encode(test) - expected = '"%s"' % test.isoformat() - self.assertEqual(expected, output) + expected = '"{iso}"'.format(iso=test.isoformat()) + assert expected == output def test_encodeTimeConversion_pytz(self): - # GH11473 to_json segfaults with timezone-aware datetimes - tm._skip_if_no_pytz() - import pytz + # see gh-11473: to_json segfaults with timezone-aware datetimes test = datetime.time(10, 12, 15, 343243, pytz.utc) output = ujson.encode(test) - expected = '"%s"' % test.isoformat() - self.assertEqual(expected, output) + expected = '"{iso}"'.format(iso=test.isoformat()) + assert expected == output def test_encodeTimeConversion_dateutil(self): - # GH11473 to_json segfaults with timezone-aware datetimes - tm._skip_if_no_dateutil() - import dateutil + # see gh-11473: to_json segfaults with timezone-aware datetimes test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc()) output = ujson.encode(test) - expected = '"%s"' % test.isoformat() - self.assertEqual(expected, output) + expected = '"{iso}"'.format(iso=test.isoformat()) + assert expected == output def test_nat(self): input = NaT @@ -392,7 +417,7 @@ def test_nat(self): def test_npy_nat(self): from distutils.version import LooseVersion - if LooseVersion(np.__version__) < '1.7.0': + if LooseVersion(np.__version__) < LooseVersion('1.7.0'): pytest.skip("numpy version < 1.7.0, is " "{0}".format(np.__version__)) @@ -400,38 +425,37 @@ def test_npy_nat(self): assert ujson.encode(input) == 'null', "Expected null" def test_datetime_units(self): - from pandas.lib import Timestamp + from pandas._libs.tslib import Timestamp val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) roundtrip = ujson.decode(ujson.encode(val, date_unit='s')) - self.assertEqual(roundtrip, stamp.value // 10**9) + assert roundtrip == stamp.value // 10**9 roundtrip = ujson.decode(ujson.encode(val, date_unit='ms')) - self.assertEqual(roundtrip, stamp.value // 10**6) + assert roundtrip == stamp.value // 10**6 roundtrip = ujson.decode(ujson.encode(val, date_unit='us')) - self.assertEqual(roundtrip, stamp.value // 10**3) + assert roundtrip == stamp.value // 10**3 roundtrip = ujson.decode(ujson.encode(val, date_unit='ns')) - self.assertEqual(roundtrip, stamp.value) + assert roundtrip == stamp.value - self.assertRaises(ValueError, ujson.encode, val, date_unit='foo') + pytest.raises(ValueError, ujson.encode, val, date_unit='foo') def test_encodeToUTF8(self): - _skip_if_python_ver(2, 5) input = "\xe6\x97\xa5\xd1\x88" enc = ujson.encode(input, ensure_ascii=False) dec = ujson.decode(enc) - self.assertEqual(enc, json_unicode(input, ensure_ascii=False)) - self.assertEqual(dec, json.loads(enc)) + assert enc == json_unicode(input, ensure_ascii=False) + assert dec == json.loads(enc) def test_decodeFromUnicode(self): input = u("{\"obj\": 31337}") dec1 = ujson.decode(input) dec2 = ujson.decode(str(input)) - self.assertEqual(dec1, dec2) + assert dec1 == dec2 def test_encodeRecursionMax(self): # 8 is the max recursion depth @@ -648,14 +672,14 @@ def test_decodeDictWithNoValue(self): def test_decodeNumericIntPos(self): input = "31337" - self.assertEqual(31337, ujson.decode(input)) + assert 31337 == ujson.decode(input) def test_decodeNumericIntNeg(self): input = "-31337" - self.assertEqual(-31337, ujson.decode(input)) + assert -31337 == ujson.decode(input) + @pytest.mark.skipif(compat.PY3, reason="only PY2") def test_encodeUnicode4BytesUTF8Fail(self): - _skip_if_python_ver(3) input = "\xfd\xbf\xbf\xbf\xbf\xbf" try: enc = ujson.encode(input) # noqa @@ -666,29 +690,29 @@ def test_encodeUnicode4BytesUTF8Fail(self): def test_encodeNullCharacter(self): input = "31337 \x00 1337" output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) input = "\x00" output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) - self.assertEqual('" \\u0000\\r\\n "', ujson.dumps(u(" \u0000\r\n "))) + assert '" \\u0000\\r\\n "' == ujson.dumps(u(" \u0000\r\n ")) pass def test_decodeNullCharacter(self): input = "\"31337 \\u0000 31337\"" - self.assertEqual(ujson.decode(input), json.loads(input)) + assert ujson.decode(input) == json.loads(input) def test_encodeListLongConversion(self): input = [9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807] output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert input == ujson.decode(output) tm.assert_numpy_array_equal(np.array(input), ujson.decode(output, numpy=True, dtype=np.int64)) @@ -697,55 +721,55 @@ def test_encodeListLongConversion(self): def test_encodeLongConversion(self): input = 9223372036854775807 output = ujson.encode(input) - self.assertEqual(input, json.loads(output)) - self.assertEqual(output, json.dumps(input)) - self.assertEqual(input, ujson.decode(output)) + assert input == json.loads(output) + assert output == json.dumps(input) + assert input == ujson.decode(output) pass def test_numericIntExp(self): input = "1337E40" output = ujson.decode(input) - self.assertEqual(output, json.loads(input)) + assert output == json.loads(input) def test_numericIntFrcExp(self): input = "1.337E40" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpEPLUS(self): input = "1337E+9" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpePLUS(self): input = "1.337e+40" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpE(self): input = "1337E40" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpe(self): input = "1337e40" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpEMinus(self): input = "1.337E-4" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_decodeNumericIntExpeMinus(self): input = "1.337e-4" output = ujson.decode(input) - self.assertAlmostEqual(output, json.loads(input)) + tm.assert_almost_equal(output, json.loads(input)) def test_dumpToFile(self): f = StringIO() ujson.dump([1, 2, 3], f) - self.assertEqual("[1,2,3]", f.getvalue()) + assert "[1,2,3]" == f.getvalue() def test_dumpToFileLikeObject(self): class filelike: @@ -757,7 +781,7 @@ def write(self, bytes): self.bytes += bytes f = filelike() ujson.dump([1, 2, 3], f) - self.assertEqual("[1,2,3]", f.bytes) + assert "[1,2,3]" == f.bytes def test_dumpFileArgsError(self): try: @@ -769,7 +793,8 @@ def test_dumpFileArgsError(self): def test_loadFile(self): f = StringIO("[1,2,3,4]") - self.assertEqual([1, 2, 3, 4], ujson.load(f)) + assert [1, 2, 3, 4] == ujson.load(f) + f = StringIO("[1,2,3,4]") tm.assert_numpy_array_equal( np.array([1, 2, 3, 4]), ujson.load(f, numpy=True)) @@ -784,7 +809,8 @@ def read(self): self.end = True return "[1,2,3,4]" f = filelike() - self.assertEqual([1, 2, 3, 4], ujson.load(f)) + assert [1, 2, 3, 4] == ujson.load(f) + f = filelike() tm.assert_numpy_array_equal( np.array([1, 2, 3, 4]), ujson.load(f, numpy=True)) @@ -830,13 +856,13 @@ def test_decodeNumberWith32bitSignBit(self): boundary2 = 2**32 # noqa docs = ( '{"id": 3590016419}', - '{"id": %s}' % 2**31, - '{"id": %s}' % 2**32, - '{"id": %s}' % ((2**32) - 1), + '{{"id": {low}}}'.format(low=2**31), + '{{"id": {high}}}'.format(high=2**32), + '{{"id": {one_less}}}'.format(one_less=(2**32) - 1), ) results = (3590016419, 2**31, 2**32, 2**32 - 1) for doc, result in zip(docs, results): - self.assertEqual(ujson.decode(doc)['id'], result) + assert ujson.decode(doc)['id'] == result def test_encodeBigEscape(self): for x in range(10): @@ -868,7 +894,7 @@ def toDict(self): o = DictTest() output = ujson.encode(o) dec = ujson.decode(output) - self.assertEqual(dec, d) + assert dec == d def test_defaultHandler(self): @@ -884,79 +910,81 @@ def recursive_attr(self): def __str__(self): return str(self.val) - self.assertRaises(OverflowError, ujson.encode, _TestObject("foo")) - self.assertEqual('"foo"', ujson.encode(_TestObject("foo"), - default_handler=str)) + pytest.raises(OverflowError, ujson.encode, _TestObject("foo")) + assert '"foo"' == ujson.encode(_TestObject("foo"), + default_handler=str) def my_handler(obj): return "foobar" - self.assertEqual('"foobar"', ujson.encode(_TestObject("foo"), - default_handler=my_handler)) + + assert '"foobar"' == ujson.encode(_TestObject("foo"), + default_handler=my_handler) def my_handler_raises(obj): raise TypeError("I raise for anything") - with tm.assertRaisesRegexp(TypeError, "I raise for anything"): + + with tm.assert_raises_regex(TypeError, "I raise for anything"): ujson.encode(_TestObject("foo"), default_handler=my_handler_raises) def my_int_handler(obj): return 42 - self.assertEqual( - 42, ujson.decode(ujson.encode(_TestObject("foo"), - default_handler=my_int_handler))) + + assert ujson.decode(ujson.encode( + _TestObject("foo"), default_handler=my_int_handler)) == 42 def my_obj_handler(obj): return datetime.datetime(2013, 2, 3) - self.assertEqual( - ujson.decode(ujson.encode(datetime.datetime(2013, 2, 3))), - ujson.decode(ujson.encode(_TestObject("foo"), - default_handler=my_obj_handler))) + + assert (ujson.decode(ujson.encode(datetime.datetime(2013, 2, 3))) == + ujson.decode(ujson.encode(_TestObject("foo"), + default_handler=my_obj_handler))) l = [_TestObject("foo"), _TestObject("bar")] - self.assertEqual(json.loads(json.dumps(l, default=str)), - ujson.decode(ujson.encode(l, default_handler=str))) + assert (json.loads(json.dumps(l, default=str)) == + ujson.decode(ujson.encode(l, default_handler=str))) -class NumpyJSONTests(TestCase): +class TestNumpyJSONTests(object): - def testBool(self): + def test_Bool(self): b = np.bool(True) - self.assertEqual(ujson.decode(ujson.encode(b)), b) + assert ujson.decode(ujson.encode(b)) == b - def testBoolArray(self): + def test_BoolArray(self): inpt = np.array([True, False, True, True, False, True, False, False], dtype=np.bool) outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=np.bool) tm.assert_numpy_array_equal(inpt, outp) - def testInt(self): + def test_Int(self): num = np.int(2562010) - self.assertEqual(np.int(ujson.decode(ujson.encode(num))), num) + assert np.int(ujson.decode(ujson.encode(num))) == num num = np.int8(127) - self.assertEqual(np.int8(ujson.decode(ujson.encode(num))), num) + assert np.int8(ujson.decode(ujson.encode(num))) == num num = np.int16(2562010) - self.assertEqual(np.int16(ujson.decode(ujson.encode(num))), num) + assert np.int16(ujson.decode(ujson.encode(num))) == num num = np.int32(2562010) - self.assertEqual(np.int32(ujson.decode(ujson.encode(num))), num) + assert np.int32(ujson.decode(ujson.encode(num))) == num num = np.int64(2562010) - self.assertEqual(np.int64(ujson.decode(ujson.encode(num))), num) + assert np.int64(ujson.decode(ujson.encode(num))) == num num = np.uint8(255) - self.assertEqual(np.uint8(ujson.decode(ujson.encode(num))), num) + assert np.uint8(ujson.decode(ujson.encode(num))) == num num = np.uint16(2562010) - self.assertEqual(np.uint16(ujson.decode(ujson.encode(num))), num) + assert np.uint16(ujson.decode(ujson.encode(num))) == num num = np.uint32(2562010) - self.assertEqual(np.uint32(ujson.decode(ujson.encode(num))), num) + assert np.uint32(ujson.decode(ujson.encode(num))) == num num = np.uint64(2562010) - self.assertEqual(np.uint64(ujson.decode(ujson.encode(num))), num) + assert np.uint64(ujson.decode(ujson.encode(num))) == num - def testIntArray(self): + def test_IntArray(self): arr = np.arange(100, dtype=np.int) dtypes = (np.int, np.int8, np.int16, np.int32, np.int64, np.uint, np.uint8, np.uint16, np.uint32, np.uint64) @@ -965,47 +993,47 @@ def testIntArray(self): outp = np.array(ujson.decode(ujson.encode(inpt)), dtype=dtype) tm.assert_numpy_array_equal(inpt, outp) - def testIntMax(self): + def test_IntMax(self): num = np.int(np.iinfo(np.int).max) - self.assertEqual(np.int(ujson.decode(ujson.encode(num))), num) + assert np.int(ujson.decode(ujson.encode(num))) == num num = np.int8(np.iinfo(np.int8).max) - self.assertEqual(np.int8(ujson.decode(ujson.encode(num))), num) + assert np.int8(ujson.decode(ujson.encode(num))) == num num = np.int16(np.iinfo(np.int16).max) - self.assertEqual(np.int16(ujson.decode(ujson.encode(num))), num) + assert np.int16(ujson.decode(ujson.encode(num))) == num num = np.int32(np.iinfo(np.int32).max) - self.assertEqual(np.int32(ujson.decode(ujson.encode(num))), num) + assert np.int32(ujson.decode(ujson.encode(num))) == num num = np.uint8(np.iinfo(np.uint8).max) - self.assertEqual(np.uint8(ujson.decode(ujson.encode(num))), num) + assert np.uint8(ujson.decode(ujson.encode(num))) == num num = np.uint16(np.iinfo(np.uint16).max) - self.assertEqual(np.uint16(ujson.decode(ujson.encode(num))), num) + assert np.uint16(ujson.decode(ujson.encode(num))) == num num = np.uint32(np.iinfo(np.uint32).max) - self.assertEqual(np.uint32(ujson.decode(ujson.encode(num))), num) + assert np.uint32(ujson.decode(ujson.encode(num))) == num - if platform.architecture()[0] != '32bit': + if not compat.is_platform_32bit(): num = np.int64(np.iinfo(np.int64).max) - self.assertEqual(np.int64(ujson.decode(ujson.encode(num))), num) + assert np.int64(ujson.decode(ujson.encode(num))) == num # uint64 max will always overflow as it's encoded to signed num = np.uint64(np.iinfo(np.int64).max) - self.assertEqual(np.uint64(ujson.decode(ujson.encode(num))), num) + assert np.uint64(ujson.decode(ujson.encode(num))) == num - def testFloat(self): + def test_Float(self): num = np.float(256.2013) - self.assertEqual(np.float(ujson.decode(ujson.encode(num))), num) + assert np.float(ujson.decode(ujson.encode(num))) == num num = np.float32(256.2013) - self.assertEqual(np.float32(ujson.decode(ujson.encode(num))), num) + assert np.float32(ujson.decode(ujson.encode(num))) == num num = np.float64(256.2013) - self.assertEqual(np.float64(ujson.decode(ujson.encode(num))), num) + assert np.float64(ujson.decode(ujson.encode(num))) == num - def testFloatArray(self): + def test_FloatArray(self): arr = np.arange(12.5, 185.72, 1.7322, dtype=np.float) dtypes = (np.float, np.float32, np.float64) @@ -1015,7 +1043,7 @@ def testFloatArray(self): inpt, double_precision=15)), dtype=dtype) tm.assert_almost_equal(inpt, outp) - def testFloatMax(self): + def test_FloatMax(self): num = np.float(np.finfo(np.float).max / 10) tm.assert_almost_equal(np.float(ujson.decode( ujson.encode(num, double_precision=15))), num, 15) @@ -1028,7 +1056,7 @@ def testFloatMax(self): tm.assert_almost_equal(np.float64(ujson.decode( ujson.encode(num, double_precision=15))), num, 15) - def testArrays(self): + def test_Arrays(self): arr = np.arange(100) arr = arr.reshape((10, 10)) @@ -1069,13 +1097,13 @@ def testArrays(self): outp = ujson.decode(ujson.encode(arr), numpy=True, dtype=np.float32) tm.assert_almost_equal(arr, outp) - def testOdArray(self): + def test_OdArray(self): def will_raise(): ujson.encode(np.array(1)) - self.assertRaises(TypeError, will_raise) + pytest.raises(TypeError, will_raise) - def testArrayNumpyExcept(self): + def test_ArrayNumpyExcept(self): input = ujson.dumps([42, {}, 'a']) try: @@ -1158,18 +1186,18 @@ def testArrayNumpyExcept(self): except: assert False, "Wrong exception" - def testArrayNumpyLabelled(self): + def test_ArrayNumpyLabelled(self): input = {'a': []} output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) - self.assertTrue((np.empty((1, 0)) == output[0]).all()) - self.assertTrue((np.array(['a']) == output[1]).all()) - self.assertTrue(output[2] is None) + assert (np.empty((1, 0)) == output[0]).all() + assert (np.array(['a']) == output[1]).all() + assert output[2] is None input = [{'a': 42}] output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) - self.assertTrue((np.array([42]) == output[0]).all()) - self.assertTrue(output[1] is None) - self.assertTrue((np.array([u('a')]) == output[2]).all()) + assert (np.array([42]) == output[0]).all() + assert output[1] is None + assert (np.array([u('a')]) == output[2]).all() # Write out the dump explicitly so there is no dependency on iteration # order GH10837 @@ -1178,76 +1206,76 @@ def testArrayNumpyLabelled(self): output = ujson.loads(input_dumps, numpy=True, labelled=True) expectedvals = np.array( [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) - self.assertTrue((expectedvals == output[0]).all()) - self.assertTrue(output[1] is None) - self.assertTrue((np.array([u('a'), 'b']) == output[2]).all()) + assert (expectedvals == output[0]).all() + assert output[1] is None + assert (np.array([u('a'), 'b']) == output[2]).all() input_dumps = ('{"1": {"a": 42, "b":31}, "2": {"a": 24, "c": 99}, ' '"3": {"a": 2.4, "b": 78}}') output = ujson.loads(input_dumps, numpy=True, labelled=True) expectedvals = np.array( [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) - self.assertTrue((expectedvals == output[0]).all()) - self.assertTrue((np.array(['1', '2', '3']) == output[1]).all()) - self.assertTrue((np.array(['a', 'b']) == output[2]).all()) + assert (expectedvals == output[0]).all() + assert (np.array(['1', '2', '3']) == output[1]).all() + assert (np.array(['a', 'b']) == output[2]).all() -class PandasJSONTests(TestCase): +class TestPandasJSONTests(object): - def testDataFrame(self): + def test_DataFrame(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ 'a', 'b'], columns=['x', 'y', 'z']) # column indexed outp = DataFrame(ujson.decode(ujson.encode(df))) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) dec = _clean_dict(ujson.decode(ujson.encode(df, orient="split"))) outp = DataFrame(**dec) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) outp = DataFrame(ujson.decode(ujson.encode(df, orient="records"))) outp.index = df.index - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) outp = DataFrame(ujson.decode(ujson.encode(df, orient="values"))) outp.index = df.index - self.assertTrue((df.values == outp.values).all()) + assert (df.values == outp.values).all() outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"))) - self.assertTrue((df.transpose() == outp).values.all()) + assert (df.transpose() == outp).values.all() tm.assert_index_equal(df.transpose().columns, outp.columns) tm.assert_index_equal(df.transpose().index, outp.index) - def testDataFrameNumpy(self): + def test_DataFrameNumpy(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ 'a', 'b'], columns=['x', 'y', 'z']) # column indexed outp = DataFrame(ujson.decode(ujson.encode(df), numpy=True)) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) dec = _clean_dict(ujson.decode(ujson.encode(df, orient="split"), numpy=True)) outp = DataFrame(**dec) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"), numpy=True)) - self.assertTrue((df.transpose() == outp).values.all()) + assert (df.transpose() == outp).values.all() tm.assert_index_equal(df.transpose().columns, outp.columns) tm.assert_index_equal(df.transpose().index, outp.index) - def testDataFrameNested(self): + def test_DataFrameNested(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ 'a', 'b'], columns=['x', 'y', 'z']) @@ -1255,52 +1283,48 @@ def testDataFrameNested(self): exp = {'df1': ujson.decode(ujson.encode(df)), 'df2': ujson.decode(ujson.encode(df))} - self.assertTrue(ujson.decode(ujson.encode(nested)) == exp) + assert ujson.decode(ujson.encode(nested)) == exp exp = {'df1': ujson.decode(ujson.encode(df, orient="index")), 'df2': ujson.decode(ujson.encode(df, orient="index"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="index")) == exp) + assert ujson.decode(ujson.encode(nested, orient="index")) == exp exp = {'df1': ujson.decode(ujson.encode(df, orient="records")), 'df2': ujson.decode(ujson.encode(df, orient="records"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="records")) == exp) + assert ujson.decode(ujson.encode(nested, orient="records")) == exp exp = {'df1': ujson.decode(ujson.encode(df, orient="values")), 'df2': ujson.decode(ujson.encode(df, orient="values"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="values")) == exp) + assert ujson.decode(ujson.encode(nested, orient="values")) == exp exp = {'df1': ujson.decode(ujson.encode(df, orient="split")), 'df2': ujson.decode(ujson.encode(df, orient="split"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="split")) == exp) + assert ujson.decode(ujson.encode(nested, orient="split")) == exp - def testDataFrameNumpyLabelled(self): + def test_DataFrameNumpyLabelled(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ 'a', 'b'], columns=['x', 'y', 'z']) # column indexed outp = DataFrame(*ujson.decode(ujson.encode(df), numpy=True, labelled=True)) - self.assertTrue((df.T == outp).values.all()) + assert (df.T == outp).values.all() tm.assert_index_equal(df.T.columns, outp.columns) tm.assert_index_equal(df.T.index, outp.index) outp = DataFrame(*ujson.decode(ujson.encode(df, orient="records"), numpy=True, labelled=True)) outp.index = df.index - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) outp = DataFrame(*ujson.decode(ujson.encode(df, orient="index"), numpy=True, labelled=True)) - self.assertTrue((df == outp).values.all()) + assert (df == outp).values.all() tm.assert_index_equal(df.columns, outp.columns) tm.assert_index_equal(df.index, outp.index) - def testSeries(self): + def test_Series(self): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]).sort_values() @@ -1348,7 +1372,7 @@ def testSeries(self): s, orient="index"), numpy=True)).sort_values() tm.assert_series_equal(outp, exp) - def testSeriesNested(self): + def test_SeriesNested(self): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]).sort_values() @@ -1356,29 +1380,25 @@ def testSeriesNested(self): exp = {'s1': ujson.decode(ujson.encode(s)), 's2': ujson.decode(ujson.encode(s))} - self.assertTrue(ujson.decode(ujson.encode(nested)) == exp) + assert ujson.decode(ujson.encode(nested)) == exp exp = {'s1': ujson.decode(ujson.encode(s, orient="split")), 's2': ujson.decode(ujson.encode(s, orient="split"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="split")) == exp) + assert ujson.decode(ujson.encode(nested, orient="split")) == exp exp = {'s1': ujson.decode(ujson.encode(s, orient="records")), 's2': ujson.decode(ujson.encode(s, orient="records"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="records")) == exp) + assert ujson.decode(ujson.encode(nested, orient="records")) == exp exp = {'s1': ujson.decode(ujson.encode(s, orient="values")), 's2': ujson.decode(ujson.encode(s, orient="values"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="values")) == exp) + assert ujson.decode(ujson.encode(nested, orient="values")) == exp exp = {'s1': ujson.decode(ujson.encode(s, orient="index")), 's2': ujson.decode(ujson.encode(s, orient="index"))} - self.assertTrue(ujson.decode( - ujson.encode(nested, orient="index")) == exp) + assert ujson.decode(ujson.encode(nested, orient="index")) == exp - def testIndex(self): + def test_Index(self): i = Index([23, 45, 18, 98, 43, 11], name="index") # column indexed @@ -1391,13 +1411,13 @@ def testIndex(self): dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"))) outp = Index(**dec) tm.assert_index_equal(i, outp) - self.assertTrue(i.name == outp.name) + assert i.name == outp.name dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"), numpy=True)) outp = Index(**dec) tm.assert_index_equal(i, outp) - self.assertTrue(i.name == outp.name) + assert i.name == outp.name outp = Index(ujson.decode(ujson.encode(i, orient="values")), name='index') @@ -1424,7 +1444,7 @@ def testIndex(self): tm.assert_index_equal(i, outp) def test_datetimeindex(self): - from pandas.tseries.index import date_range + from pandas.core.indexes.datetimes import date_range rng = date_range('1/1/2000', periods=20) @@ -1559,36 +1579,49 @@ def test_decodeArrayFaultyUnicode(self): def test_decodeFloatingPointAdditionalTests(self): places = 15 - self.assertAlmostEqual(-1.1234567893, - ujson.loads("-1.1234567893"), places=places) - self.assertAlmostEqual(-1.234567893, - ujson.loads("-1.234567893"), places=places) - self.assertAlmostEqual(-1.34567893, - ujson.loads("-1.34567893"), places=places) - self.assertAlmostEqual(-1.4567893, - ujson.loads("-1.4567893"), places=places) - self.assertAlmostEqual(-1.567893, - ujson.loads("-1.567893"), places=places) - self.assertAlmostEqual(-1.67893, - ujson.loads("-1.67893"), places=places) - self.assertAlmostEqual(-1.7893, ujson.loads("-1.7893"), places=places) - self.assertAlmostEqual(-1.893, ujson.loads("-1.893"), places=places) - self.assertAlmostEqual(-1.3, ujson.loads("-1.3"), places=places) - - self.assertAlmostEqual(1.1234567893, ujson.loads( - "1.1234567893"), places=places) - self.assertAlmostEqual(1.234567893, ujson.loads( - "1.234567893"), places=places) - self.assertAlmostEqual( - 1.34567893, ujson.loads("1.34567893"), places=places) - self.assertAlmostEqual( - 1.4567893, ujson.loads("1.4567893"), places=places) - self.assertAlmostEqual( - 1.567893, ujson.loads("1.567893"), places=places) - self.assertAlmostEqual(1.67893, ujson.loads("1.67893"), places=places) - self.assertAlmostEqual(1.7893, ujson.loads("1.7893"), places=places) - self.assertAlmostEqual(1.893, ujson.loads("1.893"), places=places) - self.assertAlmostEqual(1.3, ujson.loads("1.3"), places=places) + tm.assert_almost_equal(-1.1234567893, + ujson.loads("-1.1234567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.234567893, + ujson.loads("-1.234567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.34567893, + ujson.loads("-1.34567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.4567893, + ujson.loads("-1.4567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.567893, + ujson.loads("-1.567893"), + check_less_precise=places) + tm.assert_almost_equal(-1.67893, + ujson.loads("-1.67893"), + check_less_precise=places) + tm.assert_almost_equal(-1.7893, ujson.loads("-1.7893"), + check_less_precise=places) + tm.assert_almost_equal(-1.893, ujson.loads("-1.893"), + check_less_precise=places) + tm.assert_almost_equal(-1.3, ujson.loads("-1.3"), + check_less_precise=places) + + tm.assert_almost_equal(1.1234567893, ujson.loads( + "1.1234567893"), check_less_precise=places) + tm.assert_almost_equal(1.234567893, ujson.loads( + "1.234567893"), check_less_precise=places) + tm.assert_almost_equal( + 1.34567893, ujson.loads("1.34567893"), check_less_precise=places) + tm.assert_almost_equal( + 1.4567893, ujson.loads("1.4567893"), check_less_precise=places) + tm.assert_almost_equal( + 1.567893, ujson.loads("1.567893"), check_less_precise=places) + tm.assert_almost_equal(1.67893, ujson.loads("1.67893"), + check_less_precise=places) + tm.assert_almost_equal(1.7893, ujson.loads("1.7893"), + check_less_precise=places) + tm.assert_almost_equal(1.893, ujson.loads("1.893"), + check_less_precise=places) + tm.assert_almost_equal(1.3, ujson.loads("1.3"), + check_less_precise=places) def test_encodeBigSet(self): s = set() @@ -1598,7 +1631,7 @@ def test_encodeBigSet(self): def test_encodeEmptySet(self): s = set() - self.assertEqual("[]", ujson.encode(s)) + assert "[]" == ujson.encode(s) def test_encodeSet(self): s = set([1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -1606,8 +1639,8 @@ def test_encodeSet(self): dec = ujson.decode(enc) for v in dec: - self.assertTrue(v in s) + assert v in s def _clean_dict(d): - return dict((str(k), v) for k, v in compat.iteritems(d)) + return {str(k): v for k, v in compat.iteritems(d)} diff --git a/pandas/tests/io/msgpack/__init__.py b/pandas/tests/io/msgpack/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/io/msgpack/common.py b/pandas/tests/io/msgpack/common.py new file mode 100644 index 0000000000000..b770d12cffbfa --- /dev/null +++ b/pandas/tests/io/msgpack/common.py @@ -0,0 +1,10 @@ +from pandas.compat import PY3 + + +# array compat +if PY3: + frombytes = lambda obj, data: obj.frombytes(data) + tobytes = lambda obj: obj.tobytes() +else: + frombytes = lambda obj, data: obj.fromstring(data) + tobytes = lambda obj: obj.tostring() diff --git a/pandas/tests/io/msgpack/data/frame.mp b/pandas/tests/io/msgpack/data/frame.mp new file mode 100644 index 0000000000000000000000000000000000000000..21e20d262b26c1a4835bdb4c00109a371e7e46f1 GIT binary patch literal 309 zcmYk2O%8%E5Jo9yGVV3T#H|+~Bc1pIl%`V+>`BSsB9Di(QO7A<_Z!tkRhFlHfXnkW7Y@kqOFZ^iNWKsK=wu;N|_+U!*!fyhnO_?A!CdXU{q@e~9VdSucd1w*T|Td;pDWe-{7% literal 0 HcmV?d00001 diff --git a/pandas/tests/test_msgpack/test_buffer.py b/pandas/tests/io/msgpack/test_buffer.py similarity index 76% rename from pandas/tests/test_msgpack/test_buffer.py rename to pandas/tests/io/msgpack/test_buffer.py index caaa22bfd08fc..8ebec734f1d3d 100644 --- a/pandas/tests/test_msgpack/test_buffer.py +++ b/pandas/tests/io/msgpack/test_buffer.py @@ -1,12 +1,13 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb +from .common import frombytes def test_unpack_buffer(): from array import array buf = array('b') - buf.fromstring(packb((b'foo', b'bar'))) + frombytes(buf, packb((b'foo', b'bar'))) obj = unpackb(buf, use_list=1) assert [b'foo', b'bar'] == obj diff --git a/pandas/tests/test_msgpack/test_case.py b/pandas/tests/io/msgpack/test_case.py similarity index 95% rename from pandas/tests/test_msgpack/test_case.py rename to pandas/tests/io/msgpack/test_case.py index a8a45b5b37eb0..c0e76b37ee46d 100644 --- a/pandas/tests/test_msgpack/test_case.py +++ b/pandas/tests/io/msgpack/test_case.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb def check(length, obj): @@ -98,10 +98,10 @@ def test_match(): (tuple(range(16)), (b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07" b"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f")), ({}, b'\x80'), - (dict([(x, x) for x in range(15)]), + ({x: x for x in range(15)}, (b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07' b'\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e')), - (dict([(x, x) for x in range(16)]), + ({x: x for x in range(16)}, (b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06' b'\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e' b'\x0f\x0f')), diff --git a/pandas/tests/io/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py new file mode 100644 index 0000000000000..5a803c5eba34b --- /dev/null +++ b/pandas/tests/io/msgpack/test_except.py @@ -0,0 +1,39 @@ +# coding: utf-8 + +from datetime import datetime +from pandas.io.msgpack import packb, unpackb + +import pytest +import pandas.util.testing as tm + + +class DummyException(Exception): + pass + + +class TestExceptions(object): + + def test_raise_on_find_unsupported_value(self): + msg = "can\'t serialize datetime" + with tm.assert_raises_regex(TypeError, msg): + packb(datetime.now()) + + def test_raise_from_object_hook(self): + def hook(_): + raise DummyException() + + pytest.raises(DummyException, unpackb, packb({}), object_hook=hook) + pytest.raises(DummyException, unpackb, packb({'fizz': 'buzz'}), + object_hook=hook) + pytest.raises(DummyException, unpackb, packb({'fizz': 'buzz'}), + object_pairs_hook=hook) + pytest.raises(DummyException, unpackb, + packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) + pytest.raises(DummyException, unpackb, + packb({'fizz': {'buzz': 'spam'}}), + object_pairs_hook=hook) + + def test_invalid_value(self): + msg = "Unpack failed: error" + with tm.assert_raises_regex(ValueError, msg): + unpackb(b"\xd9\x97#DL_") diff --git a/pandas/tests/test_msgpack/test_extension.py b/pandas/tests/io/msgpack/test_extension.py similarity index 89% rename from pandas/tests/test_msgpack/test_extension.py rename to pandas/tests/io/msgpack/test_extension.py index 97f0962a753d9..2ee72c8a55cb4 100644 --- a/pandas/tests/test_msgpack/test_extension.py +++ b/pandas/tests/io/msgpack/test_extension.py @@ -1,7 +1,9 @@ from __future__ import print_function import array -import pandas.msgpack as msgpack -from pandas.msgpack import ExtType + +import pandas.io.msgpack as msgpack +from pandas.io.msgpack import ExtType +from .common import frombytes, tobytes def test_pack_ext_type(): @@ -42,15 +44,15 @@ def default(obj): print('default called', obj) if isinstance(obj, array.array): typecode = 123 # application specific typecode - data = obj.tostring() + data = tobytes(obj) return ExtType(typecode, data) - raise TypeError("Unknwon type object %r" % (obj, )) + raise TypeError("Unknown type object %r" % (obj, )) def ext_hook(code, data): print('ext_hook called', code, data) assert code == 123 obj = array.array('d') - obj.fromstring(data) + frombytes(obj, data) return obj obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] diff --git a/pandas/tests/test_msgpack/test_format.py b/pandas/tests/io/msgpack/test_format.py similarity index 98% rename from pandas/tests/test_msgpack/test_format.py rename to pandas/tests/io/msgpack/test_format.py index a4b309ebb657d..3659602e1381f 100644 --- a/pandas/tests/test_msgpack/test_format.py +++ b/pandas/tests/io/msgpack/test_format.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import unpackb +from pandas.io.msgpack import unpackb def check(src, should, use_list=0): diff --git a/pandas/tests/test_msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py similarity index 64% rename from pandas/tests/test_msgpack/test_limits.py rename to pandas/tests/io/msgpack/test_limits.py index 9c08f328b90dd..e4abd4ddb8d13 100644 --- a/pandas/tests/test_msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -1,32 +1,33 @@ # coding: utf-8 from __future__ import (absolute_import, division, print_function, unicode_literals) -import pandas.util.testing as tm +from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType -from pandas.msgpack import packb, unpackb, Packer, Unpacker, ExtType +import pytest +import pandas.util.testing as tm -class TestLimits(tm.TestCase): +class TestLimits(object): def test_integer(self): x = -(2 ** 63) assert unpackb(packb(x)) == x - self.assertRaises((OverflowError, ValueError), packb, x - 1) + pytest.raises((OverflowError, ValueError), packb, x - 1) x = 2 ** 64 - 1 assert unpackb(packb(x)) == x - self.assertRaises((OverflowError, ValueError), packb, x + 1) + pytest.raises((OverflowError, ValueError), packb, x + 1) def test_array_header(self): packer = Packer() packer.pack_array_header(2 ** 32 - 1) - self.assertRaises((OverflowError, ValueError), - packer.pack_array_header, 2 ** 32) + pytest.raises((OverflowError, ValueError), + packer.pack_array_header, 2 ** 32) def test_map_header(self): packer = Packer() packer.pack_map_header(2 ** 32 - 1) - self.assertRaises((OverflowError, ValueError), - packer.pack_array_header, 2 ** 32) + pytest.raises((OverflowError, ValueError), + packer.pack_array_header, 2 ** 32) def test_max_str_len(self): d = 'x' * 3 @@ -38,7 +39,10 @@ def test_max_str_len(self): unpacker = Unpacker(max_str_len=2, encoding='utf-8') unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_str_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_bin_len(self): d = b'x' * 3 @@ -50,7 +54,10 @@ def test_max_bin_len(self): unpacker = Unpacker(max_bin_len=2) unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_bin_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_array_len(self): d = [1, 2, 3] @@ -62,7 +69,10 @@ def test_max_array_len(self): unpacker = Unpacker(max_array_len=2) unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_array_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_map_len(self): d = {1: 2, 3: 4, 5: 6} @@ -74,7 +84,10 @@ def test_max_map_len(self): unpacker = Unpacker(max_map_len=2) unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_map_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_ext_len(self): d = ExtType(42, b"abc") @@ -86,4 +99,7 @@ def test_max_ext_len(self): unpacker = Unpacker(max_ext_len=2) unpacker.feed(packed) - self.assertRaises(ValueError, unpacker.unpack) + + msg = "4 exceeds max_ext_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() diff --git a/pandas/tests/test_msgpack/test_newspec.py b/pandas/tests/io/msgpack/test_newspec.py similarity index 97% rename from pandas/tests/test_msgpack/test_newspec.py rename to pandas/tests/io/msgpack/test_newspec.py index 4eb9a0425c57b..783bfc1b364f8 100644 --- a/pandas/tests/test_msgpack/test_newspec.py +++ b/pandas/tests/io/msgpack/test_newspec.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb, ExtType +from pandas.io.msgpack import packb, unpackb, ExtType def test_str8(): diff --git a/pandas/tests/test_msgpack/test_obj.py b/pandas/tests/io/msgpack/test_obj.py similarity index 85% rename from pandas/tests/test_msgpack/test_obj.py rename to pandas/tests/io/msgpack/test_obj.py index bcc76929fe8f8..4a6b89907954e 100644 --- a/pandas/tests/test_msgpack/test_obj.py +++ b/pandas/tests/io/msgpack/test_obj.py @@ -1,14 +1,15 @@ # coding: utf-8 -import unittest -from pandas.msgpack import packb, unpackb +import pytest + +from pandas.io.msgpack import packb, unpackb class DecodeError(Exception): pass -class TestObj(unittest.TestCase): +class TestObj(object): def _arr_to_str(self, arr): return ''.join(str(c) for c in arr) @@ -46,15 +47,15 @@ def test_decode_pairs_hook(self): assert unpacked[1] == prod_sum def test_only_one_obj_hook(self): - self.assertRaises(TypeError, unpackb, b'', object_hook=lambda x: x, - object_pairs_hook=lambda x: x) + pytest.raises(TypeError, unpackb, b'', object_hook=lambda x: x, + object_pairs_hook=lambda x: x) def test_bad_hook(self): def f(): packed = packb([3, 1 + 2j], default=lambda o: o) unpacked = unpackb(packed, use_list=1) # noqa - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def test_array_hook(self): packed = packb([1, 2, 3]) @@ -66,11 +67,11 @@ def f(): packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}}) unpackb(packed, object_hook=self.bad_complex_decoder) - self.assertRaises(DecodeError, f) + pytest.raises(DecodeError, f) def test_an_exception_in_objecthook2(self): def f(): packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]}) unpackb(packed, list_hook=self.bad_complex_decoder, use_list=1) - self.assertRaises(DecodeError, f) + pytest.raises(DecodeError, f) diff --git a/pandas/tests/test_msgpack/test_pack.py b/pandas/tests/io/msgpack/test_pack.py similarity index 89% rename from pandas/tests/test_msgpack/test_pack.py rename to pandas/tests/io/msgpack/test_pack.py index 005352691d908..3afd1fc086b33 100644 --- a/pandas/tests/test_msgpack/test_pack.py +++ b/pandas/tests/io/msgpack/test_pack.py @@ -1,14 +1,15 @@ # coding: utf-8 -import unittest +import pytest import struct + from pandas import compat from pandas.compat import u, OrderedDict -from pandas.msgpack import packb, unpackb, Unpacker, Packer +from pandas.io.msgpack import packb, unpackb, Unpacker, Packer -class TestPack(unittest.TestCase): +class TestPack(object): def check(self, data, use_list=False): re = unpackb(packb(data), use_list=use_list) @@ -64,12 +65,12 @@ def testIgnoreUnicodeErrors(self): assert re == "abcdef" def testStrictUnicodeUnpack(self): - self.assertRaises(UnicodeDecodeError, unpackb, packb(b'abc\xeddef'), - encoding='utf-8', use_list=1) + pytest.raises(UnicodeDecodeError, unpackb, packb(b'abc\xeddef'), + encoding='utf-8', use_list=1) def testStrictUnicodePack(self): - self.assertRaises(UnicodeEncodeError, packb, compat.u("abc\xeddef"), - encoding='ascii', unicode_errors='strict') + pytest.raises(UnicodeEncodeError, packb, compat.u("abc\xeddef"), + encoding='ascii', unicode_errors='strict') def testIgnoreErrorsPack(self): re = unpackb( @@ -79,7 +80,7 @@ def testIgnoreErrorsPack(self): assert re == compat.u("abcdef") def testNoEncoding(self): - self.assertRaises(TypeError, packb, compat.u("abc"), encoding=None) + pytest.raises(TypeError, packb, compat.u("abc"), encoding=None) def testDecodeBinary(self): re = unpackb(packb("abc"), encoding=None, use_list=1) @@ -131,7 +132,7 @@ def testMapSize(self, sizes=[0, 5, 50, 1000]): bio.seek(0) unpacker = Unpacker(bio) for size in sizes: - assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) + assert unpacker.unpack() == {i: i * 2 for i in range(size)} def test_odict(self): seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] diff --git a/pandas/tests/test_msgpack/test_read_size.py b/pandas/tests/io/msgpack/test_read_size.py similarity index 96% rename from pandas/tests/test_msgpack/test_read_size.py rename to pandas/tests/io/msgpack/test_read_size.py index 965e97a7007de..ef521fa345637 100644 --- a/pandas/tests/test_msgpack/test_read_size.py +++ b/pandas/tests/io/msgpack/test_read_size.py @@ -1,5 +1,5 @@ """Test Unpacker's read_array_header and read_map_header methods""" -from pandas.msgpack import packb, Unpacker, OutOfData +from pandas.io.msgpack import packb, Unpacker, OutOfData UnexpectedTypeException = ValueError diff --git a/pandas/tests/test_msgpack/test_seq.py b/pandas/tests/io/msgpack/test_seq.py similarity index 90% rename from pandas/tests/test_msgpack/test_seq.py rename to pandas/tests/io/msgpack/test_seq.py index 927c2622419a6..06e9872a22777 100644 --- a/pandas/tests/test_msgpack/test_seq.py +++ b/pandas/tests/io/msgpack/test_seq.py @@ -1,7 +1,7 @@ # coding: utf-8 import io -import pandas.msgpack as msgpack +import pandas.io.msgpack as msgpack binarydata = bytes(bytearray(range(256))) @@ -25,7 +25,7 @@ def test_exceeding_unpacker_read_size(): # double free or corruption (!prev) # 40 ok for read_size=1024, while 50 introduces errors - # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** + # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** # python: double free or corruption (!prev): for idx in range(NUMBER_OF_STRINGS): diff --git a/pandas/tests/test_msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py similarity index 71% rename from pandas/tests/test_msgpack/test_sequnpack.py rename to pandas/tests/io/msgpack/test_sequnpack.py index fe089ccda1c7f..dc6fc5ef916b4 100644 --- a/pandas/tests/test_msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -1,28 +1,26 @@ # coding: utf-8 -import unittest - from pandas import compat -from pandas.msgpack import Unpacker, BufferFull -from pandas.msgpack import OutOfData +from pandas.io.msgpack import Unpacker, BufferFull +from pandas.io.msgpack import OutOfData + +import pytest +import pandas.util.testing as tm -class TestPack(unittest.TestCase): +class TestPack(object): - def test_partialdata(self): + def test_partial_data(self): unpacker = Unpacker() - unpacker.feed(b'\xa5') - self.assertRaises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'h') - self.assertRaises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'a') - self.assertRaises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'l') - self.assertRaises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'l') - self.assertRaises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'o') - assert next(iter(unpacker)) == b'hallo' + msg = "No more data to unpack" + + for data in [b"\xa5", b"h", b"a", b"l", b"l"]: + unpacker.feed(data) + with tm.assert_raises_regex(StopIteration, msg): + next(iter(unpacker)) + + unpacker.feed(b"o") + assert next(iter(unpacker)) == b"hallo" def test_foobar(self): unpacker = Unpacker(read_size=3, use_list=1) @@ -33,7 +31,7 @@ def test_foobar(self): assert unpacker.unpack() == ord(b'b') assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') - self.assertRaises(OutOfData, unpacker.unpack) + pytest.raises(OutOfData, unpacker.unpack) unpacker.feed(b'foo') unpacker.feed(b'bar') @@ -53,13 +51,13 @@ def test_foobar_skip(self): unpacker.skip() assert unpacker.unpack() == ord(b'a') unpacker.skip() - self.assertRaises(OutOfData, unpacker.unpack) + pytest.raises(OutOfData, unpacker.unpack) def test_maxbuffersize(self): - self.assertRaises(ValueError, Unpacker, read_size=5, max_buffer_size=3) + pytest.raises(ValueError, Unpacker, read_size=5, max_buffer_size=3) unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) unpacker.feed(b'fo') - self.assertRaises(BufferFull, unpacker.feed, b'ob') + pytest.raises(BufferFull, unpacker.feed, b'ob') unpacker.feed(b'o') assert ord('f') == next(unpacker) unpacker.feed(b'b') diff --git a/pandas/tests/test_msgpack/test_subtype.py b/pandas/tests/io/msgpack/test_subtype.py similarity index 90% rename from pandas/tests/test_msgpack/test_subtype.py rename to pandas/tests/io/msgpack/test_subtype.py index d6dd72c4d9850..e27ec66c63e1f 100644 --- a/pandas/tests/test_msgpack/test_subtype.py +++ b/pandas/tests/io/msgpack/test_subtype.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb +from pandas.io.msgpack import packb from collections import namedtuple diff --git a/pandas/tests/test_msgpack/test_unpack.py b/pandas/tests/io/msgpack/test_unpack.py similarity index 90% rename from pandas/tests/test_msgpack/test_unpack.py rename to pandas/tests/io/msgpack/test_unpack.py index ae8227ab276fb..c056f8d800e11 100644 --- a/pandas/tests/test_msgpack/test_unpack.py +++ b/pandas/tests/io/msgpack/test_unpack.py @@ -1,11 +1,10 @@ from io import BytesIO import sys -from pandas.msgpack import Unpacker, packb, OutOfData, ExtType -import pandas.util.testing as tm +from pandas.io.msgpack import Unpacker, packb, OutOfData, ExtType import pytest -class TestUnpack(tm.TestCase): +class TestUnpack(object): def test_unpack_array_header_from_file(self): f = BytesIO(packb([1, 2, 3, 4])) @@ -15,7 +14,7 @@ def test_unpack_array_header_from_file(self): assert unpacker.unpack() == 2 assert unpacker.unpack() == 3 assert unpacker.unpack() == 4 - self.assertRaises(OutOfData, unpacker.unpack) + pytest.raises(OutOfData, unpacker.unpack) def test_unpacker_hook_refcnt(self): if not hasattr(sys, 'getrefcount'): diff --git a/pandas/tests/test_msgpack/test_unpack_raw.py b/pandas/tests/io/msgpack/test_unpack_raw.py similarity index 94% rename from pandas/tests/test_msgpack/test_unpack_raw.py rename to pandas/tests/io/msgpack/test_unpack_raw.py index c6bf747c8d992..a261bf4cbbcd7 100644 --- a/pandas/tests/test_msgpack/test_unpack_raw.py +++ b/pandas/tests/io/msgpack/test_unpack_raw.py @@ -1,7 +1,7 @@ """Tests for cases where the user seeks to obtain packed msgpack objects""" import io -from pandas.msgpack import Unpacker, packb +from pandas.io.msgpack import Unpacker, packb def test_write_bytes(): diff --git a/pandas/tests/io/parser/__init__.py b/pandas/tests/io/parser/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py similarity index 76% rename from pandas/io/tests/parser/c_parser_only.py rename to pandas/tests/io/parser/c_parser_only.py index ffbd904843bfc..e0422249289b7 100644 --- a/pandas/io/tests/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -7,13 +7,17 @@ further arguments when parsing. """ +import os +import sys +import tarfile + import pytest import numpy as np import pandas as pd import pandas.util.testing as tm +import pandas.util._test_decorators as td from pandas import DataFrame -from pandas import compat from pandas.compat import StringIO, range, lrange @@ -33,7 +37,7 @@ def test_buffer_overflow(self): try: self.read_table(StringIO(malf)) except Exception as err: - self.assertIn(cperr, str(err)) + assert cperr in str(err) def test_buffer_rd_bytes(self): # see gh-12098: src->buffer in the C parser can be freed twice leading @@ -96,7 +100,7 @@ def test_dtype_and_names_error(self): 3.0 3 """ # fallback casting, but not castable - with tm.assertRaisesRegexp(ValueError, 'cannot safely convert'): + with tm.assert_raises_regex(ValueError, 'cannot safely convert'): self.read_csv(StringIO(data), sep=r'\s+', header=None, names=['a', 'b'], dtype={'a': np.int32}) @@ -108,26 +112,25 @@ def test_unsupported_dtype(self): df.to_csv(path) # valid but we don't support it (date) - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'datetime64', 'B': 'float64'}, - index_col=0) - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'datetime64', 'B': 'float64'}, - index_col=0, parse_dates=['B']) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'datetime64', 'B': 'float64'}, + index_col=0) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'datetime64', 'B': 'float64'}, + index_col=0, parse_dates=['B']) # valid but we don't support it - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'timedelta64', 'B': 'float64'}, - index_col=0) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'timedelta64', 'B': 'float64'}, + index_col=0) # valid but unsupported - fixed width unicode string - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'U8'}, - index_col=0) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'U8'}, + index_col=0) + @td.skip_if_32bit def test_precise_conversion(self): - # see gh-8002 - tm._skip_if_32bit() from decimal import Decimal normal_errors = [] @@ -152,29 +155,10 @@ def error(val): precise_errors.append(error(precise_val)) # round-trip should match float() - self.assertEqual(roundtrip_val, float(text[2:])) - - self.assertTrue(sum(precise_errors) <= sum(normal_errors)) - self.assertTrue(max(precise_errors) <= max(normal_errors)) - - def test_pass_dtype_as_recarray(self): - if compat.is_platform_windows() and self.low_memory: - pytest.skip( - "segfaults on win-64, only when all tests are run") + assert roundtrip_val == float(text[2:]) - data = """\ -one,two -1,2.5 -2,3.5 -3,4.5 -4,5.5""" - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - result = self.read_csv(StringIO(data), dtype={ - 'one': 'u1', 1: 'S1'}, as_recarray=True) - self.assertEqual(result['one'].dtype, 'u1') - self.assertEqual(result['two'].dtype, 'S1') + assert sum(precise_errors) <= sum(normal_errors) + assert max(precise_errors) <= max(normal_errors) def test_usecols_dtypes(self): data = """\ @@ -195,8 +179,8 @@ def test_usecols_dtypes(self): converters={'a': str}, dtype={'b': int, 'c': float}, ) - self.assertTrue((result.dtypes == [object, np.int, np.float]).all()) - self.assertTrue((result2.dtypes == [object, np.float]).all()) + assert (result.dtypes == [object, np.int, np.float]).all() + assert (result2.dtypes == [object, np.float]).all() def test_disable_bool_parsing(self): # #2090 @@ -208,10 +192,10 @@ def test_disable_bool_parsing(self): No,No,No""" result = self.read_csv(StringIO(data), dtype=object) - self.assertTrue((result.dtypes == object).all()) + assert (result.dtypes == object).all() result = self.read_csv(StringIO(data), dtype=object, na_filter=False) - self.assertEqual(result['B'][2], '') + assert result['B'][2] == '' def test_custom_lineterminator(self): data = 'a,b,c~1,2,3~4,5,6' @@ -286,11 +270,11 @@ def test_empty_header_read(count): test_empty_header_read(count) def test_parse_trim_buffers(self): - # This test is part of a bugfix for issue #13703. It attmepts to + # This test is part of a bugfix for issue #13703. It attempts to # to stress the system memory allocator, to cause it to move the # stream buffer and either let the OS reclaim the region, or let # other memory requests of parser otherwise modify the contents - # of memory space, where it was formely located. + # of memory space, where it was formally located. # This test is designed to cause a `segfault` with unpatched # `tokenizer.c`. Sometimes the test fails on `segfault`, other # times it fails due to memory corruption, which causes the @@ -342,7 +326,7 @@ def test_parse_trim_buffers(self): # Generate the expected output: manually create the dataframe # by splitting by comma and repeating the `n_lines` times. - row = tuple(val_ if val_ else float("nan") + row = tuple(val_ if val_ else np.nan for val_ in record_.split(",")) expected = pd.DataFrame([row for _ in range(n_lines)], dtype=object, columns=None, index=None) @@ -355,6 +339,15 @@ def test_parse_trim_buffers(self): # Check for data corruption if there was no segfault tm.assert_frame_equal(result, expected) + # This extra test was added to replicate the fault in gh-5291. + # Force 'utf-8' encoding, so that `_string_convert` would take + # a different execution branch. + chunks_ = self.read_csv(StringIO(csv_data), header=None, + dtype=object, chunksize=chunksize, + encoding='utf_8') + result = pd.concat(chunks_, axis=0, ignore_index=True) + tm.assert_frame_equal(result, expected) + def test_internal_null_byte(self): # see gh-14012 # @@ -388,7 +381,7 @@ def test_read_nrows_large(self): df = self.read_csv(StringIO(test_input), sep='\t', nrows=1010) - self.assertTrue(df.size == 1010 * 10) + assert df.size == 1010 * 10 def test_float_precision_round_trip_with_text(self): # gh-15140 - This should not segfault on Python 2.7+ @@ -408,3 +401,87 @@ def test_large_difference_in_columns(self): expected = DataFrame([row.split(',')[0] for row in rows]) tm.assert_frame_equal(result, expected) + + def test_data_after_quote(self): + # see gh-15910 + + data = 'a\n1\n"b"a' + result = self.read_csv(StringIO(data)) + expected = DataFrame({'a': ['1', 'ba']}) + + tm.assert_frame_equal(result, expected) + + @tm.capture_stderr + def test_comment_whitespace_delimited(self): + test_input = """\ +1 2 +2 2 3 +3 2 3 # 3 fields +4 2 3# 3 fields +5 2 # 2 fields +6 2# 2 fields +7 # 1 field, NaN +8# 1 field, NaN +9 2 3 # skipped line +# comment""" + df = self.read_csv(StringIO(test_input), comment='#', header=None, + delimiter='\\s+', skiprows=0, + error_bad_lines=False) + error = sys.stderr.getvalue() + # skipped lines 2, 3, 4, 9 + for line_num in (2, 3, 4, 9): + assert 'Skipping line {}'.format(line_num) in error, error + expected = DataFrame([[1, 2], + [5, 2], + [6, 2], + [7, np.nan], + [8, np.nan]]) + tm.assert_frame_equal(df, expected) + + def test_file_like_no_next(self): + # gh-16530: the file-like need not have a "next" or "__next__" + # attribute despite having an "__iter__" attribute. + # + # NOTE: This is only true for the C engine, not Python engine. + class NoNextBuffer(StringIO): + def __next__(self): + raise AttributeError("No next method") + + next = __next__ + + data = "a\n1" + + expected = pd.DataFrame({"a": [1]}) + result = self.read_csv(NoNextBuffer(data)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"]) + def test_read_tarfile(self, tar_suffix): + # see gh-16530 + # + # Unfortunately, Python's CSV library can't handle + # tarfile objects (expects string, not bytes when + # iterating through a file-like). + tar_path = os.path.join(self.dirpath, "tar_csv" + tar_suffix) + + with tarfile.open(tar_path, "r") as tar: + data_file = tar.extractfile("tar_data.csv") + + out = self.read_csv(data_file) + expected = pd.DataFrame({"a": [1]}) + tm.assert_frame_equal(out, expected) + + @pytest.mark.high_memory + def test_bytes_exceed_2gb(self): + """Read from a "CSV" that has a column larger than 2GB. + + GH 16798 + """ + if self.low_memory: + pytest.skip("not a high_memory test") + + csv = StringIO('strings\n' + '\n'.join( + ['x' * (1 << 20) for _ in range(2100)])) + df = self.read_csv(csv, low_memory=False) + assert not df.empty diff --git a/pandas/io/tests/parser/comment.py b/pandas/tests/io/parser/comment.py similarity index 100% rename from pandas/io/tests/parser/comment.py rename to pandas/tests/io/parser/comment.py diff --git a/pandas/io/tests/parser/common.py b/pandas/tests/io/parser/common.py similarity index 75% rename from pandas/io/tests/parser/common.py rename to pandas/tests/io/parser/common.py index 0671901fc170a..cf7ec9e2f2652 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -8,10 +8,11 @@ import re import sys from datetime import datetime +from collections import OrderedDict import pytest import numpy as np -from pandas.lib import Timestamp +from pandas._libs.tslib import Timestamp import pandas as pd import pandas.util.testing as tm @@ -19,7 +20,8 @@ from pandas import compat from pandas.compat import (StringIO, BytesIO, PY3, range, lrange, u) -from pandas.io.common import DtypeWarning, EmptyDataError, URLError +from pandas.errors import DtypeWarning, EmptyDataError, ParserError +from pandas.io.common import URLError from pandas.io.parsers import TextFileReader, TextParser @@ -43,7 +45,7 @@ def test_empty_decimal_marker(self): """ # Parsers support only length-1 decimals msg = 'Only length-1 decimal markers supported' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), decimal='') def test_bad_stream_exception(self): @@ -63,7 +65,7 @@ def test_bad_stream_exception(self): msg = "'utf-8' codec can't decode byte" else: msg = "'utf8' codec can't decode byte" - with tm.assertRaisesRegexp(UnicodeDecodeError, msg): + with tm.assert_raises_regex(UnicodeDecodeError, msg): self.read_csv(stream) stream.close() @@ -104,7 +106,7 @@ def test_squeeze(self): expected = Series([1, 2, 3], name=1, index=idx) result = self.read_table(StringIO(data), sep=',', index_col=0, header=None, squeeze=True) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) tm.assert_series_equal(result, expected) def test_squeeze_no_view(self): @@ -112,7 +114,7 @@ def test_squeeze_no_view(self): # Series should not be a view data = """time,data\n0,10\n1,11\n2,12\n4,14\n5,15\n3,13""" result = self.read_csv(StringIO(data), index_col='time', squeeze=True) - self.assertFalse(result._is_view) + assert not result._is_view def test_malformed(self): # see gh-6607 @@ -125,7 +127,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 4, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): self.read_table(StringIO(data), sep=',', header=1, comment='#') @@ -139,7 +141,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, @@ -156,7 +158,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) @@ -172,7 +174,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) @@ -189,7 +191,7 @@ def test_malformed(self): footer """ msg = 'Expected 3 fields in line 4, saw 5' - with tm.assertRaisesRegexp(Exception, msg): + with tm.assert_raises_regex(Exception, msg): self.read_table(StringIO(data), sep=',', header=1, comment='#', skipfooter=1) @@ -201,12 +203,12 @@ def test_quoting(self): Klosterdruckerei\tKlosterdruckerei (1609-1805)\t"Furststiftische Hofdruckerei, (1609-1805)\tGaller, Alois Klosterdruckerei\tKlosterdruckerei (1609-1805)\tHochfurstliche Buchhandlung """ # noqa - self.assertRaises(Exception, self.read_table, StringIO(bad_line_small), - sep='\t') + pytest.raises(Exception, self.read_table, StringIO(bad_line_small), + sep='\t') good_line_small = bad_line_small + '"' df = self.read_table(StringIO(good_line_small), sep='\t') - self.assertEqual(len(df), 3) + assert len(df) == 3 def test_unnamed_columns(self): data = """A,B,C,, @@ -219,30 +221,9 @@ def test_unnamed_columns(self): [11, 12, 13, 14, 15]], dtype=np.int64) df = self.read_table(StringIO(data), sep=',') tm.assert_almost_equal(df.values, expected) - self.assert_index_equal(df.columns, - Index(['A', 'B', 'C', 'Unnamed: 3', - 'Unnamed: 4'])) - - def test_duplicate_columns(self): - # TODO: add test for condition 'mangle_dupe_cols=False' - # once it is actually supported (gh-12935) - data = """A,A,B,B,B -1,2,3,4,5 -6,7,8,9,10 -11,12,13,14,15 -""" - - for method in ('read_csv', 'read_table'): - - # check default behavior - df = getattr(self, method)(StringIO(data), sep=',') - self.assertEqual(list(df.columns), - ['A', 'A.1', 'B', 'B.1', 'B.2']) - - df = getattr(self, method)(StringIO(data), sep=',', - mangle_dupe_cols=True) - self.assertEqual(list(df.columns), - ['A', 'A.1', 'B', 'B.1', 'B.2']) + tm.assert_index_equal(df.columns, + Index(['A', 'B', 'C', 'Unnamed: 3', + 'Unnamed: 4'])) def test_csv_mixed_type(self): data = """A,B,C @@ -260,29 +241,27 @@ def test_read_csv_dataframe(self): df = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = self.read_table(self.csv1, sep=',', index_col=0, parse_dates=True) - self.assert_index_equal(df.columns, pd.Index(['A', 'B', 'C', 'D'])) - self.assertEqual(df.index.name, 'index') - self.assertIsInstance( + tm.assert_index_equal(df.columns, pd.Index(['A', 'B', 'C', 'D'])) + assert df.index.name == 'index' + assert isinstance( df.index[0], (datetime, np.datetime64, Timestamp)) - self.assertEqual(df.values.dtype, np.float64) + assert df.values.dtype == np.float64 tm.assert_frame_equal(df, df2) def test_read_csv_no_index_name(self): df = self.read_csv(self.csv2, index_col=0, parse_dates=True) df2 = self.read_table(self.csv2, sep=',', index_col=0, parse_dates=True) - self.assert_index_equal(df.columns, - pd.Index(['A', 'B', 'C', 'D', 'E'])) - self.assertIsInstance(df.index[0], - (datetime, np.datetime64, Timestamp)) - self.assertEqual(df.loc[:, ['A', 'B', 'C', 'D']].values.dtype, - np.float64) + tm.assert_index_equal(df.columns, + pd.Index(['A', 'B', 'C', 'D', 'E'])) + assert isinstance(df.index[0], (datetime, np.datetime64, Timestamp)) + assert df.loc[:, ['A', 'B', 'C', 'D']].values.dtype == np.float64 tm.assert_frame_equal(df, df2) def test_read_table_unicode(self): fin = BytesIO(u('\u0141aski, Jan;1').encode('utf-8')) df1 = self.read_table(fin, sep=";", encoding="utf-8", header=None) - tm.assertIsInstance(df1[0].values[0], compat.text_type) + assert isinstance(df1[0].values[0], compat.text_type) def test_read_table_wrong_num_columns(self): # too few! @@ -291,7 +270,7 @@ def test_read_table_wrong_num_columns(self): 6,7,8,9,10,11,12 11,12,13,14,15,16 """ - self.assertRaises(ValueError, self.read_csv, StringIO(data)) + pytest.raises(ValueError, self.read_csv, StringIO(data)) def test_read_duplicate_index_explicit(self): data = """index,A,B,C,D @@ -334,7 +313,7 @@ def test_parse_bools(self): True,3 """ data = self.read_csv(StringIO(data)) - self.assertEqual(data['A'].dtype, np.bool_) + assert data['A'].dtype == np.bool_ data = """A,B YES,1 @@ -346,7 +325,7 @@ def test_parse_bools(self): data = self.read_csv(StringIO(data), true_values=['yes', 'Yes', 'YES'], false_values=['no', 'NO', 'No']) - self.assertEqual(data['A'].dtype, np.bool_) + assert data['A'].dtype == np.bool_ data = """A,B TRUE,1 @@ -354,7 +333,7 @@ def test_parse_bools(self): TRUE,3 """ data = self.read_csv(StringIO(data)) - self.assertEqual(data['A'].dtype, np.bool_) + assert data['A'].dtype == np.bool_ data = """A,B foo,bar @@ -371,8 +350,8 @@ def test_int_conversion(self): 3.0,3 """ data = self.read_csv(StringIO(data)) - self.assertEqual(data['A'].dtype, np.float64) - self.assertEqual(data['B'].dtype, np.int64) + assert data['A'].dtype == np.float64 + assert data['B'].dtype == np.int64 def test_read_nrows(self): expected = self.read_csv(StringIO(self.data1))[:3] @@ -384,14 +363,17 @@ def test_read_nrows(self): df = self.read_csv(StringIO(self.data1), nrows=3.0) tm.assert_frame_equal(df, expected) - msg = "must be an integer" + msg = r"'nrows' must be an integer >=0" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(self.data1), nrows=1.2) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(self.data1), nrows='foo') + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(self.data1), nrows=-1) + def test_read_chunksize(self): reader = self.read_csv(StringIO(self.data1), index_col=0, chunksize=2) df = self.read_csv(StringIO(self.data1), index_col=0) @@ -402,6 +384,45 @@ def test_read_chunksize(self): tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) + # with invalid chunksize value: + msg = r"'chunksize' must be an integer >=1" + + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize=1.3) + + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize='foo') + + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize=0) + + def test_read_chunksize_and_nrows(self): + + # gh-15755 + # With nrows + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=2, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(pd.concat(reader), df) + + # chunksize > nrows + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=8, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(pd.concat(reader), df) + + # with changing "size": + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=8, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(reader.get_chunk(size=2), df.iloc[:2]) + tm.assert_frame_equal(reader.get_chunk(size=4), df.iloc[2:5]) + with pytest.raises(StopIteration): + reader.get_chunk(size=3) + def test_read_chunksize_named(self): reader = self.read_csv( StringIO(self.data1), index_col='index', chunksize=2) @@ -422,7 +443,7 @@ def test_get_chunk_passed_chunksize(self): result = self.read_csv(StringIO(data), chunksize=2) piece = result.get_chunk() - self.assertEqual(len(piece), 2) + assert len(piece) == 2 def test_read_chunksize_generated_index(self): # GH 12185 @@ -477,7 +498,7 @@ def test_iterator(self): treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, iterator=True) - tm.assertIsInstance(treader, TextFileReader) + assert isinstance(treader, TextFileReader) # gh-3967: stopping iteration when chunksize is specified data = """A,B,C @@ -496,15 +517,15 @@ def test_iterator(self): result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) - self.assertEqual(len(result), 3) + assert len(result) == 3 tm.assert_frame_equal(pd.concat(result), expected) # skipfooter is not supported with the C parser yet if self.engine == 'python': # test bad parameter (skipfooter) reader = self.read_csv(StringIO(self.data1), index_col=0, - iterator=True, skipfooter=True) - self.assertRaises(ValueError, reader.read, 3) + iterator=True, skipfooter=1) + pytest.raises(ValueError, reader.read, 3) def test_pass_names_with_index(self): lines = self.data1.split('\n') @@ -600,7 +621,7 @@ def test_no_unnamed_index(self): 2 2 2 e f """ df = self.read_table(StringIO(data), sep=' ') - self.assertIsNone(df.index.name) + assert df.index.name is None def test_read_csv_parse_simple_list(self): text = """foo @@ -617,7 +638,7 @@ def test_read_csv_parse_simple_list(self): def test_url(self): # HTTP(S) url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/io/tests/parser/data/salaries.csv') + 'pandas/tests/io/parser/data/salaries.csv') url_table = self.read_table(url) dirpath = tm.get_data_path() localtable = os.path.join(dirpath, 'salaries.csv') @@ -625,7 +646,7 @@ def test_url(self): tm.assert_frame_equal(url_table, local_table) # TODO: ftp testing - @tm.slow + @pytest.mark.slow def test_file(self): dirpath = tm.get_data_path() localtable = os.path.join(dirpath, 'salaries.csv') @@ -640,11 +661,24 @@ def test_file(self): tm.assert_frame_equal(url_table, local_table) + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_csv, + lambda p: self.read_csv(p, index_col=0)) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath( + df.to_csv, + lambda p: self.read_csv(p, index_col=0)) + tm.assert_frame_equal(df, result) + def test_nonexistent_path(self): # gh-2428: pls no segfault # gh-14086: raise more helpful FileNotFoundError path = '%s.csv' % tm.rands(10) - self.assertRaises(compat.FileNotFoundError, self.read_csv, path) + pytest.raises(compat.FileNotFoundError, self.read_csv, path) def test_missing_trailing_delimiters(self): data = """A,B,C,D @@ -652,7 +686,7 @@ def test_missing_trailing_delimiters(self): 1,3,3, 1,4,5""" result = self.read_csv(StringIO(data)) - self.assertTrue(result['D'].isnull()[1:].all()) + assert result['D'].isna()[1:].all() def test_skipinitialspace(self): s = ('"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, ' @@ -666,7 +700,7 @@ def test_skipinitialspace(self): # it's 33 columns result = self.read_csv(sfile, names=lrange(33), na_values=['-9999.0'], header=None, skipinitialspace=True) - self.assertTrue(pd.isnull(result.iloc[0, 29])) + assert pd.isna(result.iloc[0, 29]) def test_utf16_bom_skiprows(self): # #2298 @@ -710,12 +744,12 @@ def test_utf16_example(self): # it works! and is the right length result = self.read_table(path, encoding='utf-16') - self.assertEqual(len(result), 50) + assert len(result) == 50 if not compat.PY3: buf = BytesIO(open(path, 'rb').read()) result = self.read_table(buf, encoding='utf-16') - self.assertEqual(len(result), 50) + assert len(result) == 50 def test_unicode_encoding(self): pth = tm.get_data_path('unicode_series.csv') @@ -726,7 +760,7 @@ def test_unicode_encoding(self): got = result[1][1632] expected = u('\xc1 k\xf6ldum klaka (Cold Fever) (1994)') - self.assertEqual(got, expected) + assert got == expected def test_trailing_delimiters(self): # #2442. grumble grumble @@ -751,10 +785,10 @@ def test_escapechar(self): result = self.read_csv(StringIO(data), escapechar='\\', quotechar='"', encoding='utf-8') - self.assertEqual(result['SEARCH_TERM'][2], - 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals serie') - self.assertTrue(np.array_equal(result.columns, - ['SEARCH_TERM', 'ACTUAL_URL'])) + assert result['SEARCH_TERM'][2] == ('SLAGBORD, "Bergslagen", ' + 'IKEA:s 1700-tals serie') + tm.assert_index_equal(result.columns, + Index(['SEARCH_TERM', 'ACTUAL_URL'])) def test_int64_min_issues(self): # #2599 @@ -790,7 +824,7 @@ def test_parse_integers_above_fp_precision(self): 17007000002000192, 17007000002000194]}) - self.assertTrue(np.array_equal(result['Numbers'], expected['Numbers'])) + tm.assert_series_equal(result['Numbers'], expected['Numbers']) def test_chunks_have_consistent_numerical_type(self): integers = [str(i) for i in range(499999)] @@ -799,8 +833,8 @@ def test_chunks_have_consistent_numerical_type(self): with tm.assert_produces_warning(False): df = self.read_csv(StringIO(data)) # Assert that types were coerced. - self.assertTrue(type(df.a[0]) is np.float64) - self.assertEqual(df.a.dtype, np.float) + assert type(df.a[0]) is np.float64 + assert df.a.dtype == np.float def test_warn_if_chunks_have_mismatched_type(self): warning_type = False @@ -814,17 +848,17 @@ def test_warn_if_chunks_have_mismatched_type(self): with tm.assert_produces_warning(warning_type): df = self.read_csv(StringIO(data)) - self.assertEqual(df.a.dtype, np.object) + assert df.a.dtype == np.object def test_integer_overflow_bug(self): # see gh-2601 data = "65248E10 11\n55555E55 22\n" result = self.read_csv(StringIO(data), header=None, sep=' ') - self.assertTrue(result[0].dtype == np.float64) + assert result[0].dtype == np.float64 result = self.read_csv(StringIO(data), header=None, sep=r'\s+') - self.assertTrue(result[0].dtype == np.float64) + assert result[0].dtype == np.float64 def test_catch_too_many_names(self): # see gh-5156 @@ -833,8 +867,8 @@ def test_catch_too_many_names(self): 4,,6 7,8,9 10,11,12\n""" - tm.assertRaises(ValueError, self.read_csv, StringIO(data), - header=0, names=['a', 'b', 'c', 'd']) + pytest.raises(ValueError, self.read_csv, StringIO(data), + header=0, names=['a', 'b', 'c', 'd']) def test_ignore_leading_whitespace(self): # see gh-3374, gh-6607 @@ -847,7 +881,7 @@ def test_chunk_begins_with_newline_whitespace(self): # see gh-10022 data = '\n hello\nworld\n' result = self.read_csv(StringIO(data), header=None) - self.assertEqual(len(result), 2) + assert len(result) == 2 # see gh-9735: this issue is C parser-specific (bug when # parsing whitespace and characters at chunk boundary) @@ -891,8 +925,9 @@ def test_float_parser(self): def test_scientific_no_exponent(self): # see gh-12215 - df = DataFrame.from_items([('w', ['2e']), ('x', ['3E']), - ('y', ['42e']), ('z', ['632E'])]) + df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']), + ('y', ['42e']), + ('z', ['632E'])])) data = df.to_csv(index=False) for prec in self.float_precision_choices: df_roundtrip = self.read_csv( @@ -912,14 +947,14 @@ def test_int64_overflow(self): # 13007854817840016671868 > UINT64_MAX, so this # will overflow and return object as the dtype. result = self.read_csv(StringIO(data)) - self.assertTrue(result['ID'].dtype == object) + assert result['ID'].dtype == object # 13007854817840016671868 > UINT64_MAX, so attempts # to cast to either int64 or uint64 will result in # an OverflowError being raised. for conv in (np.int64, np.uint64): - self.assertRaises(OverflowError, self.read_csv, - StringIO(data), converters={'ID': conv}) + pytest.raises(OverflowError, self.read_csv, + StringIO(data), converters={'ID': conv}) # These numbers fall right inside the int64-uint64 range, # so they should be parsed as string. @@ -964,23 +999,6 @@ def test_empty_with_nrows_chunksize(self): StringIO('foo,bar\n'), chunksize=10))) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - result = self.read_csv(StringIO('foo,bar\n'), - nrows=10, as_recarray=True) - result = DataFrame(result[2], columns=result[1], - index=result[0]) - tm.assert_frame_equal(DataFrame.from_records( - result), expected, check_index_type=False) - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - result = next(iter(self.read_csv(StringIO('foo,bar\n'), - chunksize=10, as_recarray=True))) - result = DataFrame(result[2], columns=result[1], index=result[0]) - tm.assert_frame_equal(DataFrame.from_records(result), expected, - check_index_type=False) - def test_eof_states(self): # see gh-10728, gh-10548 @@ -1039,18 +1057,18 @@ def test_eof_states(self): # ESCAPED_CHAR data = "a,b,c\n4,5,6\n\\" - self.assertRaises(Exception, self.read_csv, - StringIO(data), escapechar='\\') + pytest.raises(Exception, self.read_csv, + StringIO(data), escapechar='\\') # ESCAPE_IN_QUOTED_FIELD data = 'a,b,c\n4,5,6\n"\\' - self.assertRaises(Exception, self.read_csv, - StringIO(data), escapechar='\\') + pytest.raises(Exception, self.read_csv, + StringIO(data), escapechar='\\') # IN_QUOTED_FIELD data = 'a,b,c\n4,5,6\n"' - self.assertRaises(Exception, self.read_csv, - StringIO(data), escapechar='\\') + pytest.raises(Exception, self.read_csv, + StringIO(data), escapechar='\\') def test_uneven_lines_with_usecols(self): # See gh-12203 @@ -1063,7 +1081,7 @@ def test_uneven_lines_with_usecols(self): # make sure that an error is still thrown # when the 'usecols' parameter is not provided msg = r"Expected \d+ fields in line \d+, saw \d+" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): df = self.read_csv(StringIO(csv)) expected = DataFrame({ @@ -1089,10 +1107,10 @@ def test_read_empty_with_usecols(self): # throws the correct error, with or without usecols errmsg = "No columns to parse from file" - with tm.assertRaisesRegexp(EmptyDataError, errmsg): + with tm.assert_raises_regex(EmptyDataError, errmsg): self.read_csv(StringIO('')) - with tm.assertRaisesRegexp(EmptyDataError, errmsg): + with tm.assert_raises_regex(EmptyDataError, errmsg): self.read_csv(StringIO(''), usecols=usecols) expected = DataFrame(columns=usecols, index=[0], dtype=np.float64) @@ -1131,7 +1149,8 @@ def test_trailing_spaces(self): def test_raise_on_sep_with_delim_whitespace(self): # see gh-6607 data = 'a b c\n1 2 3' - with tm.assertRaisesRegexp(ValueError, 'you can only specify one'): + with tm.assert_raises_regex(ValueError, + 'you can only specify one'): self.read_table(StringIO(data), sep=r'\s', delim_whitespace=True) def test_single_char_leading_whitespace(self): @@ -1202,7 +1221,7 @@ def test_regex_separator(self): df = self.read_table(StringIO(data), sep=r'\s+') expected = self.read_csv(StringIO(re.sub('[ ]+', ',', data)), index_col=0) - self.assertIsNone(expected.index.name) + assert expected.index.name is None tm.assert_frame_equal(df, expected) data = ' a b c\n1 2 3 \n4 5 6\n 7 8 9' @@ -1211,6 +1230,7 @@ def test_regex_separator(self): columns=['a', 'b', 'c']) tm.assert_frame_equal(result, expected) + @tm.capture_stdout def test_verbose_import(self): text = """a,b,c,d one,1,2,3 @@ -1222,22 +1242,18 @@ def test_verbose_import(self): one,1,2,3 two,1,2,3""" - buf = StringIO() - sys.stdout = buf + # Engines are verbose in different ways. + self.read_csv(StringIO(text), verbose=True) + output = sys.stdout.getvalue() - try: # engines are verbose in different ways - self.read_csv(StringIO(text), verbose=True) - if self.engine == 'c': - self.assertIn('Tokenization took:', buf.getvalue()) - self.assertIn('Parser memory cleanup took:', buf.getvalue()) - else: # Python engine - self.assertEqual(buf.getvalue(), - 'Filled 3 NA values in column a\n') - finally: - sys.stdout = sys.__stdout__ + if self.engine == 'c': + assert 'Tokenization took:' in output + assert 'Parser memory cleanup took:' in output + else: # Python engine + assert output == 'Filled 3 NA values in column a\n' - buf = StringIO() - sys.stdout = buf + # Reset the stdout buffer. + sys.stdout = StringIO() text = """a,b,c,d one,1,2,3 @@ -1249,16 +1265,15 @@ def test_verbose_import(self): seven,1,2,3 eight,1,2,3""" - try: # engines are verbose in different ways - self.read_csv(StringIO(text), verbose=True, index_col=0) - if self.engine == 'c': - self.assertIn('Tokenization took:', buf.getvalue()) - self.assertIn('Parser memory cleanup took:', buf.getvalue()) - else: # Python engine - self.assertEqual(buf.getvalue(), - 'Filled 1 NA values in column a\n') - finally: - sys.stdout = sys.__stdout__ + self.read_csv(StringIO(text), verbose=True, index_col=0) + output = sys.stdout.getvalue() + + # Engines are verbose in different ways. + if self.engine == 'c': + assert 'Tokenization took:' in output + assert 'Parser memory cleanup took:' in output + else: # Python engine + assert output == 'Filled 1 NA values in column a\n' def test_iteration_open_handle(self): if PY3: @@ -1275,8 +1290,8 @@ def test_iteration_open_handle(self): break if self.engine == 'c': - tm.assertRaises(Exception, self.read_table, - f, squeeze=True, header=None) + pytest.raises(Exception, self.read_table, + f, squeeze=True, header=None) else: result = self.read_table(f, squeeze=True, header=None) expected = Series(['DDD', 'EEE', 'FFF', 'GGG'], name=0) @@ -1293,9 +1308,9 @@ def test_1000_sep_with_decimal(self): 'C': [5, 10.] }) - tm.assert_equal(expected.A.dtype, 'int64') - tm.assert_equal(expected.B.dtype, 'float') - tm.assert_equal(expected.C.dtype, 'float') + assert expected.A.dtype == 'int64' + assert expected.B.dtype == 'float' + assert expected.C.dtype == 'float' df = self.read_csv(StringIO(data), sep='|', thousands=',', decimal='.') tm.assert_frame_equal(df, expected) @@ -1323,23 +1338,9 @@ def test_euro_decimal_format(self): 3;878,158;108013,434;GHI;rez;2,735694704""" df2 = self.read_csv(StringIO(data), sep=';', decimal=',') - self.assertEqual(df2['Number1'].dtype, float) - self.assertEqual(df2['Number2'].dtype, float) - self.assertEqual(df2['Number3'].dtype, float) - - def test_read_duplicate_names(self): - # See gh-7160 - data = "a,b,a\n0,1,2\n3,4,5" - df = self.read_csv(StringIO(data)) - expected = DataFrame([[0, 1, 2], [3, 4, 5]], - columns=['a', 'b', 'a.1']) - tm.assert_frame_equal(df, expected) - - data = "0,1,2\n3,4,5" - df = self.read_csv(StringIO(data), names=["a", "b", "a"]) - expected = DataFrame([[0, 1, 2], [3, 4, 5]], - columns=['a', 'b', 'a.1']) - tm.assert_frame_equal(df, expected) + assert df2['Number1'].dtype == float + assert df2['Number2'].dtype == float + assert df2['Number3'].dtype == float def test_inf_parsing(self): data = """\ @@ -1366,141 +1367,11 @@ def test_inf_parsing(self): def test_raise_on_no_columns(self): # single newline data = "\n" - self.assertRaises(EmptyDataError, self.read_csv, StringIO(data)) + pytest.raises(EmptyDataError, self.read_csv, StringIO(data)) # test with more than a single newline data = "\n\n\n" - self.assertRaises(EmptyDataError, self.read_csv, StringIO(data)) - - def test_compact_ints_use_unsigned(self): - # see gh-13323 - data = 'a,b,c\n1,9,258' - - # sanity check - expected = DataFrame({ - 'a': np.array([1], dtype=np.int64), - 'b': np.array([9], dtype=np.int64), - 'c': np.array([258], dtype=np.int64), - }) - out = self.read_csv(StringIO(data)) - tm.assert_frame_equal(out, expected) - - expected = DataFrame({ - 'a': np.array([1], dtype=np.int8), - 'b': np.array([9], dtype=np.int8), - 'c': np.array([258], dtype=np.int16), - }) - - # default behaviour for 'use_unsigned' - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - out = self.read_csv(StringIO(data), compact_ints=True) - tm.assert_frame_equal(out, expected) - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - out = self.read_csv(StringIO(data), compact_ints=True, - use_unsigned=False) - tm.assert_frame_equal(out, expected) - - expected = DataFrame({ - 'a': np.array([1], dtype=np.uint8), - 'b': np.array([9], dtype=np.uint8), - 'c': np.array([258], dtype=np.uint16), - }) - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - out = self.read_csv(StringIO(data), compact_ints=True, - use_unsigned=True) - tm.assert_frame_equal(out, expected) - - def test_compact_ints_as_recarray(self): - data = ('0,1,0,0\n' - '1,1,0,0\n' - '0,1,0,1') - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - result = self.read_csv(StringIO(data), delimiter=',', header=None, - compact_ints=True, as_recarray=True) - ex_dtype = np.dtype([(str(i), 'i1') for i in range(4)]) - self.assertEqual(result.dtype, ex_dtype) - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - result = self.read_csv(StringIO(data), delimiter=',', header=None, - as_recarray=True, compact_ints=True, - use_unsigned=True) - ex_dtype = np.dtype([(str(i), 'u1') for i in range(4)]) - self.assertEqual(result.dtype, ex_dtype) - - def test_as_recarray(self): - # basic test - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - data = 'a,b\n1,a\n2,b' - expected = np.array([(1, 'a'), (2, 'b')], - dtype=[('a', '=i8'), ('b', 'O')]) - out = self.read_csv(StringIO(data), as_recarray=True) - tm.assert_numpy_array_equal(out, expected) - - # index_col ignored - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - data = 'a,b\n1,a\n2,b' - expected = np.array([(1, 'a'), (2, 'b')], - dtype=[('a', '=i8'), ('b', 'O')]) - out = self.read_csv(StringIO(data), as_recarray=True, index_col=0) - tm.assert_numpy_array_equal(out, expected) - - # respects names - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - data = '1,a\n2,b' - expected = np.array([(1, 'a'), (2, 'b')], - dtype=[('a', '=i8'), ('b', 'O')]) - out = self.read_csv(StringIO(data), names=['a', 'b'], - header=None, as_recarray=True) - tm.assert_numpy_array_equal(out, expected) - - # header order is respected even though it conflicts - # with the natural ordering of the column names - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - data = 'b,a\n1,a\n2,b' - expected = np.array([(1, 'a'), (2, 'b')], - dtype=[('b', '=i8'), ('a', 'O')]) - out = self.read_csv(StringIO(data), as_recarray=True) - tm.assert_numpy_array_equal(out, expected) - - # overrides the squeeze parameter - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - data = 'a\n1' - expected = np.array([(1,)], dtype=[('a', '=i8')]) - out = self.read_csv(StringIO(data), as_recarray=True, squeeze=True) - tm.assert_numpy_array_equal(out, expected) - - # does data conversions before doing recarray conversion - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - data = 'a,b\n1,a\n2,b' - conv = lambda x: int(x) + 1 - expected = np.array([(2, 'a'), (3, 'b')], - dtype=[('a', '=i8'), ('b', 'O')]) - out = self.read_csv(StringIO(data), as_recarray=True, - converters={'a': conv}) - tm.assert_numpy_array_equal(out, expected) - - # filters by usecols before doing recarray conversion - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - data = 'a,b\n1,a\n2,b' - expected = np.array([(1,), (2,)], dtype=[('a', '=i8')]) - out = self.read_csv(StringIO(data), as_recarray=True, - usecols=['a']) - tm.assert_numpy_array_equal(out, expected) + pytest.raises(EmptyDataError, self.read_csv, StringIO(data)) def test_memory_map(self): mmap_file = os.path.join(self.dirpath, 'test_mmap.csv') @@ -1526,7 +1397,7 @@ def test_null_byte_char(self): tm.assert_frame_equal(out, expected) else: msg = "NULL byte detected" - with tm.assertRaisesRegexp(csv.Error, msg): + with tm.assert_raises_regex(ParserError, msg): self.read_csv(StringIO(data), names=cols) def test_utf8_bom(self): @@ -1613,16 +1484,41 @@ def test_internal_eof_byte(self): result = self.read_csv(StringIO(data)) tm.assert_frame_equal(result, expected) + def test_internal_eof_byte_to_file(self): + # see gh-16559 + data = b'c1,c2\r\n"test \x1a test", test\r\n' + expected = pd.DataFrame([["test \x1a test", " test"]], + columns=["c1", "c2"]) + + path = '__%s__.csv' % tm.rands(10) + + with tm.ensure_clean(path) as path: + with open(path, "wb") as f: + f.write(data) + + result = self.read_csv(path) + tm.assert_frame_equal(result, expected) + + def test_sub_character(self): + # see gh-16893 + dirpath = tm.get_data_path() + filename = os.path.join(dirpath, "sub_char.csv") + + expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"]) + result = self.read_csv(filename) + + tm.assert_frame_equal(result, expected) + def test_file_handles(self): # GH 14418 - don't close user provided file handles fh = StringIO('a,b\n1,2') self.read_csv(fh) - self.assertFalse(fh.closed) + assert not fh.closed with open(self.csv1, 'r') as f: self.read_csv(f) - self.assertFalse(f.closed) + assert not f.closed # mmap not working with python engine if self.engine != 'python': @@ -1633,5 +1529,75 @@ def test_file_handles(self): self.read_csv(m) # closed attribute new in python 3.2 if PY3: - self.assertFalse(m.closed) + assert not m.closed m.close() + + def test_invalid_file_buffer(self): + # see gh-15337 + + class InvalidBuffer(object): + pass + + msg = "Invalid file path or buffer object type" + + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(InvalidBuffer()) + + # gh-16135: we want to ensure that "tell" and "seek" + # aren't actually being used when we call `read_csv` + # + # Thus, while the object may look "invalid" (these + # methods are attributes of the `StringIO` class), + # it is still a valid file-object for our purposes. + class NoSeekTellBuffer(StringIO): + def tell(self): + raise AttributeError("No tell method") + + def seek(self, pos, whence=0): + raise AttributeError("No seek method") + + data = "a\n1" + + expected = pd.DataFrame({"a": [1]}) + result = self.read_csv(NoSeekTellBuffer(data)) + + tm.assert_frame_equal(result, expected) + + if PY3: + from unittest import mock + + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(mock.Mock()) + + @tm.capture_stderr + def test_skip_bad_lines(self): + # see gh-15925 + data = 'a\n1\n1,2,3\n4\n5,6,7' + + with pytest.raises(ParserError): + self.read_csv(StringIO(data)) + + with pytest.raises(ParserError): + self.read_csv(StringIO(data), error_bad_lines=True) + + expected = DataFrame({'a': [1, 4]}) + + out = self.read_csv(StringIO(data), + error_bad_lines=False, + warn_bad_lines=False) + tm.assert_frame_equal(out, expected) + + val = sys.stderr.getvalue() + assert val == '' + + # Reset the stderr buffer. + sys.stderr = StringIO() + + out = self.read_csv(StringIO(data), + error_bad_lines=False, + warn_bad_lines=True) + tm.assert_frame_equal(out, expected) + + val = sys.stderr.getvalue() + assert 'Skipping line 3' in val + assert 'Skipping line 5' in val diff --git a/pandas/io/tests/parser/compression.py b/pandas/tests/io/parser/compression.py similarity index 76% rename from pandas/io/tests/parser/compression.py rename to pandas/tests/io/parser/compression.py index bdcd10fc64aa5..4291d59123e8b 100644 --- a/pandas/io/tests/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -7,16 +7,16 @@ import pytest +import pandas as pd +import pandas.compat as compat import pandas.util.testing as tm +import pandas.util._test_decorators as td class CompressionTests(object): def test_zip(self): - try: - import zipfile - except ImportError: - pytest.skip('need zipfile to run') + import zipfile with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -45,29 +45,27 @@ def test_zip(self): tmp.writestr(file_name, data) tmp.close() - self.assertRaisesRegexp(ValueError, 'Multiple files', - self.read_csv, path, compression='zip') + tm.assert_raises_regex(ValueError, 'Multiple files', + self.read_csv, path, compression='zip') - self.assertRaisesRegexp(ValueError, 'Multiple files', - self.read_csv, path, compression='infer') + tm.assert_raises_regex(ValueError, 'Multiple files', + self.read_csv, path, + compression='infer') with tm.ensure_clean() as path: tmp = zipfile.ZipFile(path, mode='w') tmp.close() - self.assertRaisesRegexp(ValueError, 'Zero files', - self.read_csv, path, compression='zip') + tm.assert_raises_regex(ValueError, 'Zero files', + self.read_csv, path, compression='zip') with tm.ensure_clean() as path: with open(path, 'wb') as f: - self.assertRaises(zipfile.BadZipfile, self.read_csv, - f, compression='zip') + pytest.raises(zipfile.BadZipfile, self.read_csv, + f, compression='zip') def test_gzip(self): - try: - import gzip - except ImportError: - pytest.skip('need gzip to run') + import gzip with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -93,10 +91,7 @@ def test_gzip(self): tm.assert_frame_equal(result, expected) def test_bz2(self): - try: - import bz2 - except ImportError: - pytest.skip('need bz2 to run') + import bz2 with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -110,8 +105,8 @@ def test_bz2(self): result = self.read_csv(path, compression='bz2') tm.assert_frame_equal(result, expected) - self.assertRaises(ValueError, self.read_csv, - path, compression='bz3') + pytest.raises(ValueError, self.read_csv, + path, compression='bz3') with open(path, 'rb') as fin: result = self.read_csv(fin, compression='bz2') @@ -124,8 +119,9 @@ def test_bz2(self): result = self.read_csv(path, compression='infer') tm.assert_frame_equal(result, expected) + @td.skip_if_no_lzma def test_xz(self): - lzma = tm._skip_if_no_lzma() + lzma = compat.import_lzma() with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -165,7 +161,20 @@ def test_read_csv_infer_compression(self): inputs[3].close() + def test_read_csv_compressed_utf16_example(self): + # GH18071 + path = tm.get_data_path('utf16_ex_small.zip') + + result = self.read_csv(path, encoding='utf-16', + compression='zip', sep='\t') + expected = pd.DataFrame({ + u'Country': [u'Venezuela', u'Venezuela'], + u'Twitter': [u'Hugo Chávez Frías', u'Henrique Capriles R.'] + }) + + tm.assert_frame_equal(result, expected) + def test_invalid_compression(self): msg = 'Unrecognized compression type: sfark' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv('test_file.zip', compression='sfark') diff --git a/pandas/io/tests/parser/converters.py b/pandas/tests/io/parser/converters.py similarity index 89% rename from pandas/io/tests/parser/converters.py rename to pandas/tests/io/parser/converters.py index 859d2e19bd56a..ae35d45591dc5 100644 --- a/pandas/io/tests/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -13,7 +13,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas import DataFrame, Index from pandas.compat import parse_date, StringIO, lmap @@ -24,7 +24,7 @@ def test_converters_type_must_be_dict(self): data = """index,A,B,C,D foo,2,3,4,5 """ - with tm.assertRaisesRegexp(TypeError, 'Type converters.+'): + with tm.assert_raises_regex(TypeError, 'Type converters.+'): self.read_csv(StringIO(data), converters=0) def test_converters(self): @@ -39,7 +39,7 @@ def test_converters(self): expected = self.read_csv(StringIO(data)) expected['D'] = expected['D'].map(parse_date) - tm.assertIsInstance(result['D'][0], (datetime, Timestamp)) + assert isinstance(result['D'][0], (datetime, Timestamp)) tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) @@ -56,7 +56,7 @@ def test_converters_no_implicit_conv(self): f = lambda x: x.strip() converter = {0: f} df = self.read_csv(StringIO(data), header=None, converters=converter) - self.assertEqual(df[0].dtype, object) + assert df[0].dtype == object def test_converters_euro_decimal_format(self): data = """Id;Number1;Number2;Text1;Text2;Number3 @@ -66,9 +66,9 @@ def test_converters_euro_decimal_format(self): f = lambda x: float(x.replace(",", ".")) converter = {'Number1': f, 'Number2': f, 'Number3': f} df2 = self.read_csv(StringIO(data), sep=';', converters=converter) - self.assertEqual(df2['Number1'].dtype, float) - self.assertEqual(df2['Number2'].dtype, float) - self.assertEqual(df2['Number3'].dtype, float) + assert df2['Number1'].dtype == float + assert df2['Number2'].dtype == float + assert df2['Number3'].dtype == float def test_converter_return_string_bug(self): # see gh-583 @@ -79,7 +79,7 @@ def test_converter_return_string_bug(self): f = lambda x: float(x.replace(",", ".")) converter = {'Number1': f, 'Number2': f, 'Number3': f} df2 = self.read_csv(StringIO(data), sep=';', converters=converter) - self.assertEqual(df2['Number1'].dtype, float) + assert df2['Number1'].dtype == float def test_converters_corner_with_nas(self): # skip aberration observed on Win64 Python 3.2.2 @@ -133,7 +133,7 @@ def convert_score(x): result = self.read_csv(fh, converters={'score': convert_score, 'days': convert_days}, na_values=['', None]) - self.assertTrue(pd.isnull(result['days'][1])) + assert pd.isna(result['days'][1]) fh = StringIO(data) result2 = self.read_csv(fh, converters={'score': convert_score, @@ -150,4 +150,4 @@ def test_converter_index_col_bug(self): xp = DataFrame({'B': [2, 4]}, index=Index([1, 3], name='A')) tm.assert_frame_equal(rs, xp) - self.assertEqual(rs.index.name, xp.index.name) + assert rs.index.name == xp.index.name diff --git a/pandas/io/tests/parser/data/iris.csv b/pandas/tests/io/parser/data/iris.csv similarity index 100% rename from pandas/io/tests/parser/data/iris.csv rename to pandas/tests/io/parser/data/iris.csv diff --git a/pandas/tests/io/parser/data/items.jsonl b/pandas/tests/io/parser/data/items.jsonl new file mode 100644 index 0000000000000..f784d37befa82 --- /dev/null +++ b/pandas/tests/io/parser/data/items.jsonl @@ -0,0 +1,2 @@ +{"a": 1, "b": 2} +{"b":2, "a" :1} diff --git a/pandas/io/tests/parser/data/salaries.csv b/pandas/tests/io/parser/data/salaries.csv similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv rename to pandas/tests/io/parser/data/salaries.csv diff --git a/pandas/io/tests/parser/data/salaries.csv.bz2 b/pandas/tests/io/parser/data/salaries.csv.bz2 similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.bz2 rename to pandas/tests/io/parser/data/salaries.csv.bz2 diff --git a/pandas/io/tests/parser/data/salaries.csv.gz b/pandas/tests/io/parser/data/salaries.csv.gz similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.gz rename to pandas/tests/io/parser/data/salaries.csv.gz diff --git a/pandas/io/tests/parser/data/salaries.csv.xz b/pandas/tests/io/parser/data/salaries.csv.xz similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.xz rename to pandas/tests/io/parser/data/salaries.csv.xz diff --git a/pandas/io/tests/parser/data/salaries.csv.zip b/pandas/tests/io/parser/data/salaries.csv.zip similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.zip rename to pandas/tests/io/parser/data/salaries.csv.zip diff --git a/pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv b/pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv similarity index 100% rename from pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv rename to pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv diff --git a/pandas/tests/io/parser/data/sub_char.csv b/pandas/tests/io/parser/data/sub_char.csv new file mode 100644 index 0000000000000..ff1fa777832c7 --- /dev/null +++ b/pandas/tests/io/parser/data/sub_char.csv @@ -0,0 +1,2 @@ +a,"b",c +1,2,3 \ No newline at end of file diff --git a/pandas/tests/io/parser/data/tar_csv.tar b/pandas/tests/io/parser/data/tar_csv.tar new file mode 100644 index 0000000000000000000000000000000000000000..d1819550e0a0064b4d9ad829f120e49760c3ffe2 GIT binary patch literal 10240 zcmeIuK?;O03_#JW1@F)k3{BNsM}nR}J9B-TY7p4K!(9_GO$s`)68z@>0YS zW+wk!;+jjzL^~}framQ!s=W;o;!FQIwf(Nymk>_1JC}X6!*X|eRCwcUqis`RFe4E_ x009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009IZ32f(E6h{C6 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/tar_csv.tar.gz b/pandas/tests/io/parser/data/tar_csv.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b5a0f3e1b580535a3fbdc2ff943b79d8c585df9f GIT binary patch literal 10240 zcmeIu%?W@o41m#`1$XEK(ok*3k)RW3b$+WS^{9xKFPG3j^YgMz{b<>mVP55<@Fil5 zvgZ=_TuM;(Z{IR;yy82--BN0FV w0R#|0009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R&zNY&Ss^RsaA1 literal 0 HcmV?d00001 diff --git a/pandas/io/tests/parser/data/test1.csv b/pandas/tests/io/parser/data/test1.csv similarity index 100% rename from pandas/io/tests/parser/data/test1.csv rename to pandas/tests/io/parser/data/test1.csv diff --git a/pandas/io/tests/parser/data/test1.csv.bz2 b/pandas/tests/io/parser/data/test1.csv.bz2 similarity index 100% rename from pandas/io/tests/parser/data/test1.csv.bz2 rename to pandas/tests/io/parser/data/test1.csv.bz2 diff --git a/pandas/io/tests/parser/data/test1.csv.gz b/pandas/tests/io/parser/data/test1.csv.gz similarity index 100% rename from pandas/io/tests/parser/data/test1.csv.gz rename to pandas/tests/io/parser/data/test1.csv.gz diff --git a/pandas/io/tests/parser/data/test2.csv b/pandas/tests/io/parser/data/test2.csv similarity index 100% rename from pandas/io/tests/parser/data/test2.csv rename to pandas/tests/io/parser/data/test2.csv diff --git a/pandas/io/tests/parser/data/test_mmap.csv b/pandas/tests/io/parser/data/test_mmap.csv similarity index 100% rename from pandas/io/tests/parser/data/test_mmap.csv rename to pandas/tests/io/parser/data/test_mmap.csv diff --git a/pandas/io/tests/parser/data/tips.csv b/pandas/tests/io/parser/data/tips.csv similarity index 100% rename from pandas/io/tests/parser/data/tips.csv rename to pandas/tests/io/parser/data/tips.csv diff --git a/pandas/tests/io/parser/data/tips.csv.bz2 b/pandas/tests/io/parser/data/tips.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..1452896b05e9d41f58ffd816a0459d86796718a6 GIT binary patch literal 1316 zcmV+<1>5>UT4*^jL0KkKS@WgHUjPpp-+%xR00n>G1qTcuzHi=eU zr8L5NfM@_34^ia=nn@4@ngc)pXm>4ki*@VR?6|SRoF#LZ+TkL$)Z)}c<#mBig_KMX zruJeOi&bv;V=*04xP@hDQp(ibF*2pqxW%nuMr@F6Gix?+fsH|aKayy7UwGa_-`dVs zYfM$)R7$k8wpC6gfmM#M!-v|)iP#1h4cPkh|rkJNTD3*02| zUew#%bX<$c*~vCvMH>_%oV^S&6a+#ukskADG3ECrBRBE^v4aChy? zvDazQUv(jtyOFJd%+RitVq;Fo?$ru4tx8y4RWLAw3OQ&r5YZ6QA(|s=%EqEnNvFyDucBxbJ63X0f6|L)lrAb?vZoDHd%^>qwTK z8M-E+R_N`PibFFSF!cCl2Z7}>xeJ`*<3&DX2?dNalnbN*vYZ7QTLis}+CyTbyv{>s zl!hm_!_I4KZE}>uSzBr=*www83fCT-SPZ&+p@dCkFG(R6{D)ETHdAf-8>fnW#-GXdM4pE5VK!{hIp z4{*7H7hK39V*E6-z)7yKmA;#^4 z#PVN7@@@mJL*EhAX#`mH2SAk2lkhNXJBL>BHS&`^r&JS)>z58UjoYiOCqY*zmz*K6 z1SFlk-!Cn`6liVaz=_bPhSWpu1LJ>%Cxlk3T;w2WIQ0LRX3%vrxUPW z8d$X$uIXc_sI{9kN=EXFie6i&h29y!AZcb)r??rFOLu%3R3P<2gpt$oRe1O6gk~8T zu3j+kM{M-PhPbG60sxBGP*RgE)NL!@Yr%+f=+n7l@JL0;84IYj5yo31-0M)BHp<)Q zzkK_6UA}%i|M3mU6cFV&C+q8L8zqA-)xv!>^z@7=Fgi9q_iLEzwg+!G2w0Ts9jf*M z64F>g8RrtB4m-(FnM=?v>|@tRdI1$7H2kMsssN5^GU(*!z`p{ft@Qr;@_OlzdPSq# z=N&m=z8R{dV?dV-Iwe>fL1(0h{JJ}+<6sZ(@ePlLCs;FVmX?rYPxs1DA(^whpU+gQLdb{bOK!0;_ zkQW*TzXUDj{aqJ}zCZT`AFw?MCRq$YLmUun3sPt|TJ|F1y1->qh6EwxZc5srUOK?6 zfIOA24Gq;xs91xZWkXI-kgFkpK@VM+dImzp9WY2eRlGn`2@#FO*RJOK&vl0mX5&x| zsC*~R>SEi53Wfn0JC1s5&DImTC?CmS%t%KJn8SnJ{vz7Tu;z{(oX1Uj?2r-D=FHLg z#Nx)*tqL1*0`$uskSzVPPI~Zw87JK{kHS;|mjvLPazsSBBGTEE(XeUKcA)Oa1!1&{ ziGd~d!Xgpq$A_L=)+{U2btCFAD_NiGHe#QuSj!mhzmK3jN5V2e#ai_;@D^ZS3^-kH z6guhK*S?INWvhtT8n-^y8%I8HZbrKc2koF=btc|VG&cU-G4a~h=kf7qrTv=Ut%I~S zEXzKRMTs`<+xJ_K%nb(}Ie8d~S$W#@BiccQnPiO(+O^Yd9ou<9tf*;o$=WeUAZqAG zyzyj!F_p;rzPQ?Y92;+@To35Y<=xOSTm>@DJ;}6?*Lzr=TgaG9BIbr{y}$`b72TY! zqYYtgpVJv*bV|eFpvy$Pm>HFtbh_Na_)b19LfLd-0+3QVd;u1iG1e^0tsmq27&c@f zqhD+!jOz~T@n@5$<6yJqL9iFfH0&B9mSe(Zd*O_H&`()&cv#qX>*83gV@pnS)Uxa6 zh&!W4Kw{zbuyG*bJ30s^kL%1hKc#3Y!TLa1|HGI+q2~|%8;0j+sEAdd#O2^p#_J5{ zqk&o!uGkw*Xq2S)W72nPTLSJR3mF;xQOdr}*By;^C3XK=k7;*$ zylq6O8Vck|96AOM^M;z(GGMh%)?T{?8o*P+jIR3%VPB~S`#)bVj@Hps@zV;k&aoL? zJT_x>_m~9QgT~p5h literal 0 HcmV?d00001 diff --git a/pandas/tests/formats/data/unicode_series.csv b/pandas/tests/io/parser/data/unicode_series.csv similarity index 100% rename from pandas/tests/formats/data/unicode_series.csv rename to pandas/tests/io/parser/data/unicode_series.csv diff --git a/pandas/io/tests/parser/data/utf16_ex.txt b/pandas/tests/io/parser/data/utf16_ex.txt similarity index 100% rename from pandas/io/tests/parser/data/utf16_ex.txt rename to pandas/tests/io/parser/data/utf16_ex.txt diff --git a/pandas/tests/io/parser/data/utf16_ex_small.zip b/pandas/tests/io/parser/data/utf16_ex_small.zip new file mode 100644 index 0000000000000000000000000000000000000000..b0560c1b1f6c41307b575f2a86509021b49649f4 GIT binary patch literal 285 zcmWIWW@Zs#U|`^2c&?n{%@J5Cmki|10Ae8q8HUo5G()ra)Qb4x+{Bz5y^@NO&=5`r zX2v6bB0;#cf}4SnIF&U?+Ut|9%K z*xYyP>aL!jYp@|<#>O`+A~_|u(y}H_2)et1Z)sMQR;tn2-aQ>H(-D~jm`P25a**gJ;0ll4Wxq+ M2qS>>dJu;J0ImR5j{pDw literal 0 HcmV?d00001 diff --git a/pandas/io/tests/parser/dialect.py b/pandas/tests/io/parser/dialect.py similarity index 95% rename from pandas/io/tests/parser/dialect.py rename to pandas/tests/io/parser/dialect.py index ee50cf812f72e..f756fe71bf684 100644 --- a/pandas/io/tests/parser/dialect.py +++ b/pandas/tests/io/parser/dialect.py @@ -9,7 +9,7 @@ from pandas import DataFrame from pandas.compat import StringIO -from pandas.io.common import ParserWarning +from pandas.errors import ParserWarning import pandas.util.testing as tm @@ -61,7 +61,7 @@ class InvalidDialect(object): data = 'a\n1' msg = 'Invalid dialect' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), dialect=InvalidDialect) def test_dialect_conflict(self): diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py similarity index 67% rename from pandas/io/tests/parser/dtypes.py rename to pandas/tests/io/parser/dtypes.py index fa95c18c4d7a9..b91ce04673e29 100644 --- a/pandas/io/tests/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -5,14 +5,16 @@ for all of the parsers defined in parsers.py """ +import pytest + import numpy as np import pandas as pd import pandas.util.testing as tm from pandas import DataFrame, Series, Index, MultiIndex, Categorical from pandas.compat import StringIO -from pandas.types.dtypes import CategoricalDtype -from pandas.io.common import ParserWarning +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.errors import ParserWarning class DtypeTests(object): @@ -40,9 +42,9 @@ def test_passing_dtype(self): tm.assert_frame_equal(result, df) # invalid dtype - self.assertRaises(TypeError, self.read_csv, path, - dtype={'A': 'foo', 'B': 'float64'}, - index_col=0) + pytest.raises(TypeError, self.read_csv, path, + dtype={'A': 'foo', 'B': 'float64'}, + index_col=0) # see gh-12048: empty frame actual = self.read_csv(StringIO('A,B'), dtype=str) @@ -58,8 +60,8 @@ def test_pass_dtype(self): 4,5.5""" result = self.read_csv(StringIO(data), dtype={'one': 'u1', 1: 'S1'}) - self.assertEqual(result['one'].dtype, 'u1') - self.assertEqual(result['two'].dtype, 'object') + assert result['one'].dtype == 'u1' + assert result['two'].dtype == 'object' def test_categorical_dtype(self): # GH 10153 @@ -112,6 +114,17 @@ def test_categorical_dtype(self): actual = self.read_csv(StringIO(data), dtype='category') tm.assert_frame_equal(actual, expected) + @pytest.mark.slow + def test_categorical_dtype_high_cardinality_numeric(self): + # GH 18186 + data = np.sort([str(i) for i in range(524289)]) + expected = DataFrame({'a': Categorical(data, ordered=True)}) + actual = self.read_csv(StringIO('a\n' + '\n'.join(data)), + dtype='category') + actual["a"] = actual["a"].cat.reorder_categories( + np.sort(actual.a.cat.categories), ordered=True) + tm.assert_frame_equal(actual, expected) + def test_categorical_dtype_encoding(self): # GH 10153 pth = tm.get_data_path('unicode_series.csv') @@ -147,6 +160,105 @@ def test_categorical_dtype_chunksize(self): for actual, expected in zip(actuals, expecteds): tm.assert_frame_equal(actual, expected) + @pytest.mark.parametrize('ordered', [False, True]) + @pytest.mark.parametrize('categories', [ + ['a', 'b', 'c'], + ['a', 'c', 'b'], + ['a', 'b', 'c', 'd'], + ['c', 'b', 'a'], + ]) + def test_categorical_categoricaldtype(self, categories, ordered): + data = """a,b +1,a +1,b +1,b +2,c""" + expected = pd.DataFrame({ + "a": [1, 1, 1, 2], + "b": Categorical(['a', 'b', 'b', 'c'], + categories=categories, + ordered=ordered) + }) + dtype = {"b": CategoricalDtype(categories=categories, + ordered=ordered)} + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categorical_categoricaldtype_unsorted(self): + data = """a,b +1,a +1,b +1,b +2,c""" + dtype = CategoricalDtype(['c', 'b', 'a']) + expected = pd.DataFrame({ + 'a': [1, 1, 1, 2], + 'b': Categorical(['a', 'b', 'b', 'c'], categories=['c', 'b', 'a']) + }) + result = self.read_csv(StringIO(data), dtype={'b': dtype}) + tm.assert_frame_equal(result, expected) + + def test_categoricaldtype_coerces_numeric(self): + dtype = {'b': CategoricalDtype([1, 2, 3])} + data = "b\n1\n1\n2\n3" + expected = pd.DataFrame({'b': Categorical([1, 1, 2, 3])}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categoricaldtype_coerces_datetime(self): + dtype = { + 'b': CategoricalDtype(pd.date_range('2017', '2019', freq='AS')) + } + data = "b\n2017-01-01\n2018-01-01\n2019-01-01" + expected = pd.DataFrame({'b': Categorical(dtype['b'].categories)}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + dtype = { + 'b': CategoricalDtype([pd.Timestamp("2014")]) + } + data = "b\n2014-01-01\n2014-01-01T00:00:00" + expected = pd.DataFrame({'b': Categorical([pd.Timestamp('2014')] * 2)}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categoricaldtype_coerces_timedelta(self): + dtype = {'b': CategoricalDtype(pd.to_timedelta(['1H', '2H', '3H']))} + data = "b\n1H\n2H\n3H" + expected = pd.DataFrame({'b': Categorical(dtype['b'].categories)}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categoricaldtype_unexpected_categories(self): + dtype = {'b': CategoricalDtype(['a', 'b', 'd', 'e'])} + data = "b\nd\na\nc\nd" # Unexpected c + expected = pd.DataFrame({"b": Categorical(list('dacd'), + dtype=dtype['b'])}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categorical_categoricaldtype_chunksize(self): + # GH 10153 + data = """a,b +1,a +1,b +1,b +2,c""" + cats = ['a', 'b', 'c'] + expecteds = [pd.DataFrame({'a': [1, 1], + 'b': Categorical(['a', 'b'], + categories=cats)}), + pd.DataFrame({'a': [1, 2], + 'b': Categorical(['b', 'c'], + categories=cats)}, + index=[2, 3])] + dtype = CategoricalDtype(cats) + actuals = self.read_csv(StringIO(data), dtype={'b': dtype}, + chunksize=2) + + for actual, expected in zip(actuals, expecteds): + tm.assert_frame_equal(actual, expected) + def test_empty_pass_dtype(self): data = 'one,two' result = self.read_csv(StringIO(data), dtype={'one': 'u1'}) @@ -202,10 +314,11 @@ def test_empty_with_dup_column_pass_dtype_by_indexes(self): result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'}) tm.assert_frame_equal(result, expected, check_index_type=False) - data = '' - result = self.read_csv(StringIO(data), names=['one', 'one'], - dtype={0: 'u1', 1: 'f'}) - tm.assert_frame_equal(result, expected, check_index_type=False) + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + data = '' + result = self.read_csv(StringIO(data), names=['one', 'one'], + dtype={0: 'u1', 1: 'f'}) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_raise_on_passed_int_dtype_with_nas(self): # see gh-2631 @@ -213,9 +326,9 @@ def test_raise_on_passed_int_dtype_with_nas(self): 2001,106380451,10 2001,,11 2001,106380451,67""" - self.assertRaises(ValueError, self.read_csv, StringIO(data), - sep=",", skipinitialspace=True, - dtype={'DOY': np.int64}) + pytest.raises(ValueError, self.read_csv, StringIO(data), + sep=",", skipinitialspace=True, + dtype={'DOY': np.int64}) def test_dtype_with_converter(self): data = """a,b diff --git a/pandas/io/tests/parser/header.py b/pandas/tests/io/parser/header.py similarity index 68% rename from pandas/io/tests/parser/header.py rename to pandas/tests/io/parser/header.py index dc6d2ad1daa47..3fb0650348763 100644 --- a/pandas/io/tests/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -5,6 +5,8 @@ during parsing for all of the parsers defined in parsers.py """ +import pytest + import numpy as np import pandas.util.testing as tm @@ -17,7 +19,7 @@ class HeaderTests(object): def test_read_with_bad_header(self): errmsg = r"but only \d+ lines in file" - with tm.assertRaisesRegexp(ValueError, errmsg): + with tm.assert_raises_regex(ValueError, errmsg): s = StringIO(',,') self.read_csv(s, header=[10]) @@ -30,9 +32,9 @@ def test_bool_header_arg(self): a b""" for arg in [True, False]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.read_csv(StringIO(data), header=arg) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.read_table(StringIO(data), header=arg) def test_no_header_prefix(self): @@ -48,9 +50,9 @@ def test_no_header_prefix(self): [11, 12, 13, 14, 15]], dtype=np.int64) tm.assert_almost_equal(df_pref.values, expected) - self.assert_index_equal(df_pref.columns, - Index(['Field0', 'Field1', 'Field2', - 'Field3', 'Field4'])) + tm.assert_index_equal(df_pref.columns, + Index(['Field0', 'Field1', 'Field2', + 'Field3', 'Field4'])) def test_header_with_index_col(self): data = """foo,1,2,3 @@ -60,7 +62,7 @@ def test_header_with_index_col(self): names = ['A', 'B', 'C'] df = self.read_csv(StringIO(data), names=names) - self.assertEqual(names, ['A', 'B', 'C']) + assert list(df.columns) == ['A', 'B', 'C'] values = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] expected = DataFrame(values, index=['foo', 'bar', 'baz'], @@ -103,41 +105,31 @@ def test_header_multi_index(self): R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2 """ - df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1]) tm.assert_frame_equal(df, expected) # skipping lines in the header - df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1]) tm.assert_frame_equal(df, expected) # INVALID OPTIONS - # no as_recarray - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - self.assertRaises(ValueError, self.read_csv, - StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], as_recarray=True, - tupleize_cols=False) - # names - self.assertRaises(ValueError, self.read_csv, - StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], names=['foo', 'bar'], - tupleize_cols=False) + pytest.raises(ValueError, self.read_csv, + StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1], names=['foo', 'bar']) # usecols - self.assertRaises(ValueError, self.read_csv, - StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], usecols=['foo', 'bar'], - tupleize_cols=False) + pytest.raises(ValueError, self.read_csv, + StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1], usecols=['foo', 'bar']) # non-numeric index_col - self.assertRaises(ValueError, self.read_csv, - StringIO(data), header=[0, 1, 2, 3], - index_col=['foo', 'bar'], tupleize_cols=False) + pytest.raises(ValueError, self.read_csv, + StringIO(data), header=[0, 1, 2, 3], + index_col=['foo', 'bar']) def test_header_multiindex_common_format(self): @@ -270,8 +262,51 @@ def test_no_header(self): tm.assert_almost_equal(df.values, expected) tm.assert_almost_equal(df.values, df2.values) - self.assert_index_equal(df_pref.columns, - Index(['X0', 'X1', 'X2', 'X3', 'X4'])) - self.assert_index_equal(df.columns, Index(lrange(5))) + tm.assert_index_equal(df_pref.columns, + Index(['X0', 'X1', 'X2', 'X3', 'X4'])) + tm.assert_index_equal(df.columns, Index(lrange(5))) + + tm.assert_index_equal(df2.columns, Index(names)) + + def test_non_int_header(self): + # GH 16338 + msg = 'header must be integer or list of integers' + data = """1,2\n3,4""" + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), sep=',', header=['a', 'b']) + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(data), sep=',', header='string_header') + + def test_singleton_header(self): + # See GH #7757 + data = """a,b,c\n0,1,2\n1,2,3""" + df = self.read_csv(StringIO(data), header=[0]) + expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]}) + tm.assert_frame_equal(df, expected) + + def test_mangles_multi_index(self): + # See GH 18062 + data = """A,A,A,B\none,one,one,two\n0,40,34,0.1""" + df = self.read_csv(StringIO(data), header=[0, 1]) + expected = DataFrame([[0, 40, 34, 0.1]], + columns=MultiIndex.from_tuples( + [('A', 'one'), ('A', 'one.1'), + ('A', 'one.2'), ('B', 'two')])) + tm.assert_frame_equal(df, expected) + + data = """A,A,A,B\none,one,one.1,two\n0,40,34,0.1""" + df = self.read_csv(StringIO(data), header=[0, 1]) + expected = DataFrame([[0, 40, 34, 0.1]], + columns=MultiIndex.from_tuples( + [('A', 'one'), ('A', 'one.1'), + ('A', 'one.1.1'), ('B', 'two')])) + tm.assert_frame_equal(df, expected) - self.assert_index_equal(df2.columns, Index(names)) + data = """A,A,A,B,B\none,one,one.1,two,two\n0,40,34,0.1,0.1""" + df = self.read_csv(StringIO(data), header=[0, 1]) + expected = DataFrame([[0, 40, 34, 0.1, 0.1]], + columns=MultiIndex.from_tuples( + [('A', 'one'), ('A', 'one.1'), + ('A', 'one.1.1'), ('B', 'two'), + ('B', 'two.1')])) + tm.assert_frame_equal(df, expected) diff --git a/pandas/io/tests/parser/index_col.py b/pandas/tests/io/parser/index_col.py similarity index 92% rename from pandas/io/tests/parser/index_col.py rename to pandas/tests/io/parser/index_col.py index 6eb15eb3e043c..ee9b210443636 100644 --- a/pandas/io/tests/parser/index_col.py +++ b/pandas/tests/io/parser/index_col.py @@ -6,6 +6,8 @@ the parsers defined in parsers.py """ +import pytest + import pandas.util.testing as tm from pandas import DataFrame, Index, MultiIndex @@ -29,8 +31,8 @@ def test_index_col_named(self): xp = self.read_csv(StringIO(data), header=0).set_index('ID') tm.assert_frame_equal(rs, xp) - self.assertRaises(ValueError, self.read_csv, StringIO(no_header), - index_col='ID') + pytest.raises(ValueError, self.read_csv, StringIO(no_header), + index_col='ID') data = """\ 1,2,3,4,hello @@ -43,16 +45,16 @@ def test_index_col_named(self): index=Index(['hello', 'world', 'foo'], name='message')) rs = self.read_csv(StringIO(data), names=names, index_col=['message']) tm.assert_frame_equal(xp, rs) - self.assertEqual(xp.index.name, rs.index.name) + assert xp.index.name == rs.index.name rs = self.read_csv(StringIO(data), names=names, index_col='message') tm.assert_frame_equal(xp, rs) - self.assertEqual(xp.index.name, rs.index.name) + assert xp.index.name == rs.index.name def test_index_col_is_true(self): # see gh-9798 - self.assertRaises(ValueError, self.read_csv, - StringIO(self.ts_data), index_col=True) + pytest.raises(ValueError, self.read_csv, + StringIO(self.ts_data), index_col=True) def test_infer_index_col(self): data = """A,B,C @@ -61,7 +63,7 @@ def test_infer_index_col(self): baz,7,8,9 """ data = self.read_csv(StringIO(data)) - self.assertTrue(data.index.equals(Index(['foo', 'bar', 'baz']))) + assert data.index.equals(Index(['foo', 'bar', 'baz'])) def test_empty_index_col_scenarios(self): data = 'x,y,z' diff --git a/pandas/tests/io/parser/mangle_dupes.py b/pandas/tests/io/parser/mangle_dupes.py new file mode 100644 index 0000000000000..6df69eb475bf7 --- /dev/null +++ b/pandas/tests/io/parser/mangle_dupes.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +""" +Tests that duplicate columns are handled appropriately when parsed by the +CSV engine. In general, the expected result is that they are either thoroughly +de-duplicated (if mangling requested) or ignored otherwise. +""" + +from pandas.compat import StringIO +from pandas import DataFrame + +import pandas.util.testing as tm + + +class DupeColumnTests(object): + def test_basic(self): + # TODO: add test for condition "mangle_dupe_cols=False" + # once it is actually supported (gh-12935) + data = "a,a,b,b,b\n1,2,3,4,5" + + for method in ("read_csv", "read_table"): + # Check default behavior. + expected = ["a", "a.1", "b", "b.1", "b.2"] + df = getattr(self, method)(StringIO(data), sep=",") + assert list(df.columns) == expected + + df = getattr(self, method)(StringIO(data), sep=",", + mangle_dupe_cols=True) + assert list(df.columns) == expected + + def test_basic_names(self): + # See gh-7160 + data = "a,b,a\n0,1,2\n3,4,5" + expected = DataFrame([[0, 1, 2], [3, 4, 5]], + columns=["a", "b", "a.1"]) + + df = self.read_csv(StringIO(data)) + tm.assert_frame_equal(df, expected) + + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + data = "0,1,2\n3,4,5" + df = self.read_csv(StringIO(data), + names=["a", "b", "a"]) + tm.assert_frame_equal(df, expected) + + def test_thorough_mangle_columns(self): + # see gh-17060 + data = "a,a,a.1\n1,2,3" + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.1.1"] + + data = "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6" + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1", + "a.1.1.1.1", "a.1.1.1.1.1"] + + data = "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7" + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", + "a.2", "a.2.1", "a.3.1"] + + def test_thorough_mangle_names(self): + # see gh-17095 + data = "a,b,b\n1,2,3" + names = ["a.1", "a.1", "a.1.1"] + + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a.1", "a.1.1", "a.1.1.1"] + + data = "a,b,c,d,e,f\n1,2,3,4,5,6" + names = ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"] + + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1", + "a.1.1.1.1", "a.1.1.1.1.1"] + + data = "a,b,c,d,e,f,g\n1,2,3,4,5,6,7" + names = ["a", "a", "a.3", "a.1", "a.2", "a", "a"] + + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", + "a.2", "a.2.1", "a.3.1"] diff --git a/pandas/io/tests/parser/multithread.py b/pandas/tests/io/parser/multithread.py similarity index 100% rename from pandas/io/tests/parser/multithread.py rename to pandas/tests/io/parser/multithread.py diff --git a/pandas/io/tests/parser/na_values.py b/pandas/tests/io/parser/na_values.py similarity index 76% rename from pandas/io/tests/parser/na_values.py rename to pandas/tests/io/parser/na_values.py index 2cbd7cdedf2ab..d2c3f82e95c4d 100644 --- a/pandas/io/tests/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -8,10 +8,10 @@ import numpy as np from numpy import nan -import pandas.io.parsers as parsers +import pandas.io.common as com import pandas.util.testing as tm -from pandas import DataFrame, MultiIndex +from pandas import DataFrame, Index, MultiIndex from pandas.compat import StringIO, range @@ -70,9 +70,9 @@ def test_non_string_na_values(self): def test_default_na_values(self): _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', - '#N/A', 'N/A', 'NA', '#NA', 'NULL', 'NaN', - 'nan', '-NaN', '-nan', '#N/A N/A', '']) - self.assertEqual(_NA_VALUES, parsers._NA_VALUES) + '#N/A', 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', + 'NaN', 'nan', '-NaN', '-nan', '#N/A N/A', '']) + assert _NA_VALUES == com._NA_VALUES nv = len(_NA_VALUES) def f(i, v): @@ -88,7 +88,7 @@ def f(i, v): return buf - data = StringIO('\n'.join([f(i, v) for i, v in enumerate(_NA_VALUES)])) + data = StringIO('\n'.join(f(i, v) for i, v in enumerate(_NA_VALUES))) expected = DataFrame(np.nan, columns=range(nv), index=range(nv)) df = self.read_csv(data, header=None) tm.assert_frame_equal(df, expected) @@ -224,6 +224,45 @@ def test_na_values_keep_default(self): 'seven']}) tm.assert_frame_equal(xp.reindex(columns=df.columns), df) + def test_no_keep_default_na_dict_na_values(self): + # see gh-19227 + data = "a,b\n,2" + + df = self.read_csv(StringIO(data), na_values={"b": ["2"]}, + keep_default_na=False) + expected = DataFrame({"a": [""], "b": [np.nan]}) + tm.assert_frame_equal(df, expected) + + # Scalar values shouldn't cause the parsing to crash or fail. + data = "a,b\n1,2" + + df = self.read_csv(StringIO(data), na_values={"b": 2}, + keep_default_na=False) + expected = DataFrame({"a": [1], "b": [np.nan]}) + tm.assert_frame_equal(df, expected) + + data = """\ +113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008 +729639,"qwer","",asdfkj,466.681,,252.373 +""" + expected = DataFrame({0: [np.nan, 729639.0], + 1: [np.nan, "qwer"], + 2: ["/blaha", np.nan], + 3: ["kjsdkj", "asdfkj"], + 4: [412.166, 466.681], + 5: ["225.874", ""], + 6: [np.nan, 252.373]}) + + df = self.read_csv(StringIO(data), header=None, keep_default_na=False, + na_values={2: "", 6: "214.008", + 1: "blah", 0: 113125}) + tm.assert_frame_equal(df, expected) + + df = self.read_csv(StringIO(data), header=None, keep_default_na=False, + na_values={2: "", 6: "214.008", + 1: "blah", 0: "113125"}) + tm.assert_frame_equal(df, expected) + def test_na_values_na_filter_override(self): data = """\ A,B @@ -248,8 +287,8 @@ def test_na_trailing_columns(self): 2012-05-12,USD,SBUX,SELL,500""" result = self.read_csv(StringIO(data)) - self.assertEqual(result['Date'][1], '2012-05-12') - self.assertTrue(result['UnitPrice'].isnull().all()) + assert result['Date'][1] == '2012-05-12' + assert result['UnitPrice'].isna().all() def test_na_values_scalar(self): # see gh-12224 @@ -303,3 +342,30 @@ def test_na_values_uint64(self): expected = DataFrame([[str(2**63), 1], ['', 2]]) out = self.read_csv(StringIO(data), header=None) tm.assert_frame_equal(out, expected) + + def test_empty_na_values_no_default_with_index(self): + # see gh-15835 + data = "a,1\nb,2" + + expected = DataFrame({'1': [2]}, index=Index(["b"], name="a")) + out = self.read_csv(StringIO(data), keep_default_na=False, index_col=0) + + tm.assert_frame_equal(out, expected) + + def test_no_na_filter_on_index(self): + # see gh-5239 + data = "a,b,c\n1,,3\n4,5,6" + + # Don't parse NA-values in index when na_filter=False. + out = self.read_csv(StringIO(data), index_col=[1], na_filter=False) + + expected = DataFrame({"a": [1, 4], "c": [3, 6]}, + index=Index(["", "5"], name="b")) + tm.assert_frame_equal(out, expected) + + # Parse NA-values in index when na_filter=True. + out = self.read_csv(StringIO(data), index_col=[1], na_filter=True) + + expected = DataFrame({"a": [1, 4], "c": [3, 6]}, + index=Index([np.nan, 5.0], name="b")) + tm.assert_frame_equal(out, expected) diff --git a/pandas/io/tests/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py similarity index 61% rename from pandas/io/tests/parser/parse_dates.py rename to pandas/tests/io/parser/parse_dates.py index 6197d07d4eafa..919b357f14236 100644 --- a/pandas/io/tests/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -6,22 +6,24 @@ """ from distutils.version import LooseVersion -from datetime import datetime +from datetime import datetime, date import pytest import numpy as np -import pandas.lib as lib -from pandas.lib import Timestamp +from pandas._libs.tslibs import parsing +from pandas._libs.tslib import Timestamp import pandas as pd import pandas.io.parsers as parsers -import pandas.tseries.tools as tools +import pandas.core.tools.datetimes as tools import pandas.util.testing as tm -from pandas import DataFrame, Series, Index, DatetimeIndex +import pandas.io.date_converters as conv +from pandas import DataFrame, Series, Index, DatetimeIndex, MultiIndex from pandas import compat from pandas.compat import parse_date, StringIO, lrange -from pandas.tseries.index import date_range +from pandas.compat.numpy import np_array_datetime64_compat +from pandas.core.indexes.datetimes import date_range class ParseDatesTests(object): @@ -51,33 +53,34 @@ def test_multiple_date_col(self): """ def func(*date_cols): - return lib.try_parse_dates(parsers._concat_date_cols(date_cols)) + res = parsing.try_parse_dates(parsers._concat_date_cols(date_cols)) + return res df = self.read_csv(StringIO(data), header=None, date_parser=func, prefix='X', parse_dates={'nominal': [1, 2], 'actual': [1, 3]}) - self.assertIn('nominal', df) - self.assertIn('actual', df) - self.assertNotIn('X1', df) - self.assertNotIn('X2', df) - self.assertNotIn('X3', df) + assert 'nominal' in df + assert 'actual' in df + assert 'X1' not in df + assert 'X2' not in df + assert 'X3' not in df d = datetime(1999, 1, 27, 19, 0) - self.assertEqual(df.loc[0, 'nominal'], d) + assert df.loc[0, 'nominal'] == d df = self.read_csv(StringIO(data), header=None, date_parser=func, parse_dates={'nominal': [1, 2], 'actual': [1, 3]}, keep_date_col=True) - self.assertIn('nominal', df) - self.assertIn('actual', df) + assert 'nominal' in df + assert 'actual' in df - self.assertIn(1, df) - self.assertIn(2, df) - self.assertIn(3, df) + assert 1 in df + assert 2 in df + assert 3 in df data = """\ KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 @@ -90,23 +93,23 @@ def func(*date_cols): df = self.read_csv(StringIO(data), header=None, prefix='X', parse_dates=[[1, 2], [1, 3]]) - self.assertIn('X1_X2', df) - self.assertIn('X1_X3', df) - self.assertNotIn('X1', df) - self.assertNotIn('X2', df) - self.assertNotIn('X3', df) + assert 'X1_X2' in df + assert 'X1_X3' in df + assert 'X1' not in df + assert 'X2' not in df + assert 'X3' not in df d = datetime(1999, 1, 27, 19, 0) - self.assertEqual(df.loc[0, 'X1_X2'], d) + assert df.loc[0, 'X1_X2'] == d df = self.read_csv(StringIO(data), header=None, parse_dates=[[1, 2], [1, 3]], keep_date_col=True) - self.assertIn('1_2', df) - self.assertIn('1_3', df) - self.assertIn(1, df) - self.assertIn(2, df) - self.assertIn(3, df) + assert '1_2' in df + assert '1_3' in df + assert 1 in df + assert 2 in df + assert 3 in df data = '''\ KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 @@ -118,7 +121,7 @@ def func(*date_cols): df = self.read_csv(StringIO(data), sep=',', header=None, parse_dates=[1], index_col=1) d = datetime(1999, 1, 27, 19, 0) - self.assertEqual(df.index[0], d) + assert df.index[0] == d def test_multiple_date_cols_int_cast(self): data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" @@ -133,7 +136,7 @@ def test_multiple_date_cols_int_cast(self): # it works! df = self.read_csv(StringIO(data), header=None, parse_dates=date_spec, date_parser=conv.parse_date_time) - self.assertIn('nominal', df) + assert 'nominal' in df def test_multiple_date_col_timestamp_parse(self): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 @@ -142,7 +145,7 @@ def test_multiple_date_col_timestamp_parse(self): parse_dates=[[0, 1]], date_parser=Timestamp) ex_val = Timestamp('05/31/2012 15:30:00.029') - self.assertEqual(result['0_1'][0], ex_val) + assert result['0_1'][0] == ex_val def test_multiple_date_cols_with_header(self): data = """\ @@ -155,7 +158,7 @@ def test_multiple_date_cols_with_header(self): KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" df = self.read_csv(StringIO(data), parse_dates={'nominal': [1, 2]}) - self.assertNotIsInstance(df.nominal[0], compat.string_types) + assert not isinstance(df.nominal[0], compat.string_types) ts_data = """\ ID,date,nominalTime,actualTime,A,B,C,D,E @@ -168,8 +171,8 @@ def test_multiple_date_cols_with_header(self): """ def test_multiple_date_col_name_collision(self): - self.assertRaises(ValueError, self.read_csv, StringIO(self.ts_data), - parse_dates={'ID': [1, 2]}) + with pytest.raises(ValueError): + self.read_csv(StringIO(self.ts_data), parse_dates={'ID': [1, 2]}) data = """\ date_NominalTime,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir @@ -180,8 +183,8 @@ def test_multiple_date_col_name_collision(self): KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa - self.assertRaises(ValueError, self.read_csv, StringIO(data), - parse_dates=[[1, 2]]) + with pytest.raises(ValueError): + self.read_csv(StringIO(data), parse_dates=[[1, 2]]) def test_date_parser_int_bug(self): # See gh-3071 @@ -214,8 +217,8 @@ def test_nat_parse(self): tm.assert_series_equal(expected, result.dtypes) # test with NaT for the nan_rep - # we don't have a method to specif the Datetime na_rep (it defaults - # to '') + # we don't have a method to specify the Datetime na_rep + # (it defaults to '') df.to_csv(path) result = self.read_csv(path, index_col=0, parse_dates=['B']) tm.assert_frame_equal(result, df) @@ -239,7 +242,7 @@ def test_parse_dates_implicit_first_col(self): """ df = self.read_csv(StringIO(data), parse_dates=True) expected = self.read_csv(StringIO(data), index_col=0, parse_dates=True) - self.assertIsInstance( + assert isinstance( df.index[0], (datetime, np.datetime64, Timestamp)) tm.assert_frame_equal(df, expected) @@ -267,7 +270,7 @@ def test_yy_format_with_yearfirst(self): # See gh-217 import dateutil - if dateutil.__version__ >= LooseVersion('2.5.0'): + if LooseVersion(dateutil.__version__) >= LooseVersion('2.5.0'): pytest.skip("testing yearfirst=True not-support" "on datetutil < 2.5.0 this works but" "is wrong") @@ -318,13 +321,13 @@ def test_multi_index_parse_dates(self): 20090103,three,c,4,5 """ df = self.read_csv(StringIO(data), index_col=[0, 1], parse_dates=True) - self.assertIsInstance(df.index.levels[0][0], - (datetime, np.datetime64, Timestamp)) + assert isinstance(df.index.levels[0][0], + (datetime, np.datetime64, Timestamp)) # specify columns out of order! df2 = self.read_csv(StringIO(data), index_col=[1, 0], parse_dates=True) - self.assertIsInstance(df2.index.levels[1][0], - (datetime, np.datetime64, Timestamp)) + assert isinstance(df2.index.levels[1][0], + (datetime, np.datetime64, Timestamp)) def test_parse_dates_custom_euroformat(self): text = """foo,bar,baz @@ -345,11 +348,11 @@ def test_parse_dates_custom_euroformat(self): tm.assert_frame_equal(df, expected) parser = lambda d: parse_date(d, day_first=True) - self.assertRaises(TypeError, self.read_csv, - StringIO(text), skiprows=[0], - names=['time', 'Q', 'NTU'], index_col=0, - parse_dates=True, date_parser=parser, - na_values=['NA']) + pytest.raises(TypeError, self.read_csv, + StringIO(text), skiprows=[0], + names=['time', 'Q', 'NTU'], index_col=0, + parse_dates=True, date_parser=parser, + na_values=['NA']) def test_parse_tz_aware(self): # See gh-1693 @@ -359,15 +362,15 @@ def test_parse_tz_aware(self): # it works result = self.read_csv(data, index_col=0, parse_dates=True) stamp = result.index[0] - self.assertEqual(stamp.minute, 39) + assert stamp.minute == 39 try: - self.assertIs(result.index.tz, pytz.utc) + assert result.index.tz is pytz.utc except AssertionError: # hello Yaroslav arr = result.index.to_pydatetime() result = tools.to_datetime(arr, utc=True)[0] - self.assertEqual(stamp.minute, result.minute) - self.assertEqual(stamp.hour, result.hour) - self.assertEqual(stamp.day, result.day) + assert stamp.minute == result.minute + assert stamp.hour == result.hour + assert stamp.day == result.day def test_multiple_date_cols_index(self): data = """ @@ -400,7 +403,7 @@ def test_multiple_date_cols_chunked(self): chunks = list(reader) - self.assertNotIn('nominalTime', df) + assert 'nominalTime' not in df tm.assert_frame_equal(chunks[0], df[:2]) tm.assert_frame_equal(chunks[1], df[2:4]) @@ -433,11 +436,11 @@ def test_read_with_parse_dates_scalar_non_bool(self): data = """A,B,C 1,2,2003-11-1""" - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates="C") - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates="C", - index_col="C") + tm.assert_raises_regex(TypeError, errmsg, self.read_csv, + StringIO(data), parse_dates="C") + tm.assert_raises_regex(TypeError, errmsg, self.read_csv, + StringIO(data), parse_dates="C", + index_col="C") def test_read_with_parse_dates_invalid_type(self): errmsg = ("Only booleans, lists, and " @@ -446,19 +449,20 @@ def test_read_with_parse_dates_invalid_type(self): data = """A,B,C 1,2,2003-11-1""" - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates=(1,)) - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates=np.array([4, 5])) - tm.assertRaisesRegexp(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates=set([1, 3, 3])) + tm.assert_raises_regex(TypeError, errmsg, self.read_csv, + StringIO(data), parse_dates=(1,)) + tm.assert_raises_regex(TypeError, errmsg, + self.read_csv, StringIO(data), + parse_dates=np.array([4, 5])) + tm.assert_raises_regex(TypeError, errmsg, self.read_csv, + StringIO(data), parse_dates=set([1, 3, 3])) def test_parse_dates_empty_string(self): # see gh-2263 data = "Date, test\n2012-01-01, 1\n,2" result = self.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False) - self.assertTrue(result['Date'].isnull()[1]) + assert result['Date'].isna()[1] def test_parse_dates_noconvert_thousands(self): # see gh-14066 @@ -491,3 +495,182 @@ def test_parse_dates_noconvert_thousands(self): result = self.read_csv(StringIO(data), index_col=[0, 1], parse_dates=True, thousands='.') tm.assert_frame_equal(result, expected) + + def test_parse_date_time_multi_level_column_name(self): + data = """\ +D,T,A,B +date, time,a,b +2001-01-05, 09:00:00, 0.0, 10. +2001-01-06, 00:00:00, 1.0, 11. +""" + datecols = {'date_time': [0, 1]} + result = self.read_csv(StringIO(data), sep=',', header=[0, 1], + parse_dates=datecols, + date_parser=conv.parse_date_time) + + expected_data = [[datetime(2001, 1, 5, 9, 0, 0), 0., 10.], + [datetime(2001, 1, 6, 0, 0, 0), 1., 11.]] + expected = DataFrame(expected_data, + columns=['date_time', ('A', 'a'), ('B', 'b')]) + tm.assert_frame_equal(result, expected) + + def test_parse_date_time(self): + dates = np.array(['2007/1/3', '2008/2/4'], dtype=object) + times = np.array(['05:07:09', '06:08:00'], dtype=object) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), + datetime(2008, 2, 4, 6, 8, 0)]) + + result = conv.parse_date_time(dates, times) + assert (result == expected).all() + + data = """\ +date, time, a, b +2001-01-05, 10:00:00, 0.0, 10. +2001-01-05, 00:00:00, 1., 11. +""" + datecols = {'date_time': [0, 1]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_date_time) + assert 'date_time' in df + assert df.date_time.loc[0] == datetime(2001, 1, 5, 10, 0, 0) + + data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900") + + date_spec = {'nominal': [1, 2], 'actual': [1, 3]} + df = self.read_csv(StringIO(data), header=None, parse_dates=date_spec, + date_parser=conv.parse_date_time) + + def test_parse_date_fields(self): + years = np.array([2007, 2008]) + months = np.array([1, 2]) + days = np.array([3, 4]) + result = conv.parse_date_fields(years, months, days) + expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) + assert (result == expected).all() + + data = ("year, month, day, a\n 2001 , 01 , 10 , 10.\n" + "2001 , 02 , 1 , 11.") + datecols = {'ymd': [0, 1, 2]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_date_fields) + assert 'ymd' in df + assert df.ymd.loc[0] == datetime(2001, 1, 10) + + def test_datetime_six_col(self): + years = np.array([2007, 2008]) + months = np.array([1, 2]) + days = np.array([3, 4]) + hours = np.array([5, 6]) + minutes = np.array([7, 8]) + seconds = np.array([9, 0]) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), + datetime(2008, 2, 4, 6, 8, 0)]) + + result = conv.parse_all_fields(years, months, days, + hours, minutes, seconds) + + assert (result == expected).all() + + data = """\ +year, month, day, hour, minute, second, a, b +2001, 01, 05, 10, 00, 0, 0.0, 10. +2001, 01, 5, 10, 0, 00, 1., 11. +""" + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_all_fields) + assert 'ymdHMS' in df + assert df.ymdHMS.loc[0] == datetime(2001, 1, 5, 10, 0, 0) + + def test_datetime_fractional_seconds(self): + data = """\ +year, month, day, hour, minute, second, a, b +2001, 01, 05, 10, 00, 0.123456, 0.0, 10. +2001, 01, 5, 10, 0, 0.500000, 1., 11. +""" + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_all_fields) + assert 'ymdHMS' in df + assert df.ymdHMS.loc[0] == datetime(2001, 1, 5, 10, 0, 0, + microsecond=123456) + assert df.ymdHMS.loc[1] == datetime(2001, 1, 5, 10, 0, 0, + microsecond=500000) + + def test_generic(self): + data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11." + datecols = {'ym': [0, 1]} + dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1) + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=dateconverter) + assert 'ym' in df + assert df.ym.loc[0] == date(2001, 1, 1) + + def test_dateparser_resolution_if_not_ns(self): + # GH 10245 + data = """\ +date,time,prn,rxstatus +2013-11-03,19:00:00,126,00E80000 +2013-11-03,19:00:00,23,00E80000 +2013-11-03,19:00:00,13,00E80000 +""" + + def date_parser(date, time): + datetime = np_array_datetime64_compat( + date + 'T' + time + 'Z', dtype='datetime64[s]') + return datetime + + df = self.read_csv(StringIO(data), date_parser=date_parser, + parse_dates={'datetime': ['date', 'time']}, + index_col=['datetime', 'prn']) + + datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3, + dtype='datetime64[s]') + df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3}, + index=MultiIndex.from_tuples( + [(datetimes[0], 126), + (datetimes[1], 23), + (datetimes[2], 13)], + names=['datetime', 'prn'])) + tm.assert_frame_equal(df, df_correct) + + def test_parse_date_column_with_empty_string(self): + # GH 6428 + data = """case,opdate + 7,10/18/2006 + 7,10/18/2008 + 621, """ + result = self.read_csv(StringIO(data), parse_dates=['opdate']) + expected_data = [[7, '10/18/2006'], + [7, '10/18/2008'], + [621, ' ']] + expected = DataFrame(expected_data, columns=['case', 'opdate']) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("data,expected", [ + ("a\n135217135789158401\n1352171357E+5", + DataFrame({"a": [135217135789158401, + 135217135700000]}, dtype="float64")), + ("a\n99999999999\n123456789012345\n1234E+0", + DataFrame({"a": [99999999999, + 123456789012345, + 1234]}, dtype="float64")) + ]) + @pytest.mark.parametrize("parse_dates", [True, False]) + def test_parse_date_float(self, data, expected, parse_dates): + # see gh-2697 + # + # Date parsing should fail, so we leave the data untouched + # (i.e. float precision should remain unchanged). + result = self.read_csv(StringIO(data), parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py similarity index 69% rename from pandas/io/tests/parser/python_parser_only.py rename to pandas/tests/io/parser/python_parser_only.py index bd76070933c47..c0616ebbab4a5 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -8,31 +8,43 @@ """ import csv -import sys import pytest import pandas.util.testing as tm from pandas import DataFrame, Index from pandas import compat +from pandas.errors import ParserError from pandas.compat import StringIO, BytesIO, u class PythonParserTests(object): - def test_negative_skipfooter_raises(self): - text = """#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -1/1/2000,1.,2.,3. -1/2/2000,4,5,6 -1/3/2000,7,8,9 -""" + def test_default_separator(self): + # GH17333 + # csv.Sniffer in Python treats 'o' as separator. + text = 'aob\n1o2\n3o4' + expected = DataFrame({'a': [1, 3], 'b': [2, 4]}) + + result = self.read_csv(StringIO(text), sep=None) + + tm.assert_frame_equal(result, expected) + + def test_invalid_skipfooter(self): + text = "a\n1\n2" + + # see gh-15925 (comment) + msg = "skipfooter must be an integer" + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(text), skipfooter="foo") + + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(text), skipfooter=1.5) + + with tm.assert_raises_regex(ValueError, msg): + self.read_csv(StringIO(text), skipfooter=True) - with tm.assertRaisesRegexp( - ValueError, 'skip footer cannot be negative'): + msg = "skipfooter cannot be negative" + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(text), skipfooter=-1) def test_sniff_delimiter(self): @@ -42,8 +54,8 @@ def test_sniff_delimiter(self): baz|7|8|9 """ data = self.read_csv(StringIO(text), index_col=0, sep=None) - self.assert_index_equal(data.index, - Index(['foo', 'bar', 'baz'], name='index')) + tm.assert_index_equal(data.index, + Index(['foo', 'bar', 'baz'], name='index')) data2 = self.read_csv(StringIO(text), index_col=0, delimiter='|') tm.assert_frame_equal(data, data2) @@ -89,16 +101,9 @@ def test_BytesIO_input(self): def test_single_line(self): # see gh-6607: sniff separator - - buf = StringIO() - sys.stdout = buf - - try: - df = self.read_csv(StringIO('1,2'), names=['a', 'b'], - header=None, sep=None) - tm.assert_frame_equal(DataFrame({'a': [1], 'b': [2]}), df) - finally: - sys.stdout = sys.__stdout__ + df = self.read_csv(StringIO('1,2'), names=['a', 'b'], + header=None, sep=None) + tm.assert_frame_equal(DataFrame({'a': [1], 'b': [2]}), df) def test_skipfooter(self): # see gh-6607 @@ -153,8 +158,8 @@ def test_decompression_regex_sep(self): result = self.read_csv(path, sep='::', compression='bz2') tm.assert_frame_equal(result, expected) - self.assertRaises(ValueError, self.read_csv, - path, compression='bz3') + pytest.raises(ValueError, self.read_csv, + path, compression='bz3') def test_read_table_buglet_4x_multiindex(self): # see gh-6607 @@ -165,7 +170,7 @@ def test_read_table_buglet_4x_multiindex(self): x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" df = self.read_table(StringIO(text), sep=r'\s+') - self.assertEqual(df.index.names, ('one', 'two', 'three', 'four')) + assert df.index.names == ('one', 'two', 'three', 'four') # see gh-6893 data = ' A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9' @@ -213,27 +218,46 @@ def test_multi_char_sep_quotes(self): data = 'a,,b\n1,,a\n2,,"2,,b"' msg = 'ignored when a multi-char delimiter is used' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ParserError, msg): self.read_csv(StringIO(data), sep=',,') # We expect no match, so there should be an assertion # error out of the inner context manager. - with tm.assertRaises(AssertionError): - with tm.assertRaisesRegexp(ValueError, msg): + with pytest.raises(AssertionError): + with tm.assert_raises_regex(ParserError, msg): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) + def test_none_delimiter(self): + # see gh-13374 and gh-17465 + + data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9" + expected = DataFrame({'a': [0, 7], + 'b': [1, 8], + 'c': [2, 9]}) + + # We expect the third line in the data to be + # skipped because it is malformed, + # but we do not expect any errors to occur. + result = self.read_csv(StringIO(data), header=0, + sep=None, + error_bad_lines=False, + warn_bad_lines=True) + tm.assert_frame_equal(result, expected) + def test_skipfooter_bad_row(self): # see gh-13879 + # see gh-15910 - data = 'a,b,c\ncat,foo,bar\ndog,foo,"baz' msg = 'parsing errors in the skipped footer rows' - with tm.assertRaisesRegexp(csv.Error, msg): - self.read_csv(StringIO(data), skipfooter=1) + for data in ('a\n1\n"b"a', + 'a,b,c\ncat,foo,bar\ndog,foo,"baz'): + with tm.assert_raises_regex(ParserError, msg): + self.read_csv(StringIO(data), skipfooter=1) - # We expect no match, so there should be an assertion - # error out of the inner context manager. - with tm.assertRaises(AssertionError): - with tm.assertRaisesRegexp(csv.Error, msg): - self.read_csv(StringIO(data)) + # We expect no match, so there should be an assertion + # error out of the inner context manager. + with pytest.raises(AssertionError): + with tm.assert_raises_regex(ParserError, msg): + self.read_csv(StringIO(data)) diff --git a/pandas/io/tests/parser/quoting.py b/pandas/tests/io/parser/quoting.py similarity index 82% rename from pandas/io/tests/parser/quoting.py rename to pandas/tests/io/parser/quoting.py index a692e03e868c7..15427aaf9825c 100644 --- a/pandas/io/tests/parser/quoting.py +++ b/pandas/tests/io/parser/quoting.py @@ -20,29 +20,29 @@ def test_bad_quote_char(self): # Python 2.x: "...must be an 1-character..." # Python 3.x: "...must be a 1-character..." msg = '"quotechar" must be a(n)? 1-character string' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar='foo') + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar='foo') msg = 'quotechar must be set if quoting enabled' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar=None, - quoting=csv.QUOTE_MINIMAL) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar=None, + quoting=csv.QUOTE_MINIMAL) msg = '"quotechar" must be string, not int' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar=2) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar=2) def test_bad_quoting(self): data = '1,2,3' msg = '"quoting" must be an integer' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quoting='foo') + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quoting='foo') # quoting must in the range [0, 3] msg = 'bad "quoting" value' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quoting=5) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quoting=5) def test_quote_char_basic(self): data = 'a,b,c\n1,2,"cat"' @@ -68,13 +68,13 @@ def test_null_quote_char(self): # sanity checks msg = 'quotechar must be set if quoting enabled' - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar=None, - quoting=csv.QUOTE_MINIMAL) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar=None, + quoting=csv.QUOTE_MINIMAL) - tm.assertRaisesRegexp(TypeError, msg, self.read_csv, - StringIO(data), quotechar='', - quoting=csv.QUOTE_MINIMAL) + tm.assert_raises_regex(TypeError, msg, self.read_csv, + StringIO(data), quotechar='', + quoting=csv.QUOTE_MINIMAL) # no errors should be raised if quoting is None expected = DataFrame([[1, 2, 3]], diff --git a/pandas/io/tests/parser/skiprows.py b/pandas/tests/io/parser/skiprows.py similarity index 98% rename from pandas/io/tests/parser/skiprows.py rename to pandas/tests/io/parser/skiprows.py index c53e6a1579267..fb08ec0447267 100644 --- a/pandas/io/tests/parser/skiprows.py +++ b/pandas/tests/io/parser/skiprows.py @@ -12,7 +12,7 @@ import pandas.util.testing as tm from pandas import DataFrame -from pandas.io.common import EmptyDataError +from pandas.errors import EmptyDataError from pandas.compat import StringIO, range, lrange @@ -215,11 +215,11 @@ def test_skiprows_callable(self): skiprows = lambda x: True msg = "No columns to parse from file" - with tm.assertRaisesRegexp(EmptyDataError, msg): + with tm.assert_raises_regex(EmptyDataError, msg): self.read_csv(StringIO(data), skiprows=skiprows) # This is a bad callable and should raise. msg = "by zero" skiprows = lambda x: 1 / 0 - with tm.assertRaisesRegexp(ZeroDivisionError, msg): + with tm.assert_raises_regex(ZeroDivisionError, msg): self.read_csv(StringIO(data), skiprows=skiprows) diff --git a/pandas/io/tests/parser/test_network.py b/pandas/tests/io/parser/test_network.py similarity index 61% rename from pandas/io/tests/parser/test_network.py rename to pandas/tests/io/parser/test_network.py index 4d75b59b09560..f16338fda6245 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -4,85 +4,75 @@ Tests parsers ability to read and parse non-local files and hence require a network connection to be read. """ - -import os import pytest -import functools -from itertools import product import pandas.util.testing as tm +import pandas.util._test_decorators as td from pandas import DataFrame from pandas.io.parsers import read_csv, read_table +from pandas.compat import BytesIO -class TestCompressedUrl(object): +@pytest.mark.network +@pytest.mark.parametrize( + "compress_type, extension", [ + ('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), + pytest.param('xz', '.xz', marks=td.skip_if_no_lzma) + ] +) +@pytest.mark.parametrize('mode', ['explicit', 'infer']) +@pytest.mark.parametrize('engine', ['python', 'c']) +def test_compressed_urls(salaries_table, compress_type, extension, mode, + engine): + check_compressed_urls(salaries_table, compress_type, extension, mode, + engine) - compression_to_extension = { - 'gzip': '.gz', - 'bz2': '.bz2', - 'zip': '.zip', - 'xz': '.xz', - } - def setup(self): - path = os.path.join(tm.get_data_path(), 'salaries.csv') - self.local_table = read_table(path) - self.base_url = ('https://github.com/pandas-dev/pandas/raw/master/' - 'pandas/io/tests/parser/data/salaries.csv') +@tm.network +def check_compressed_urls(salaries_table, compression, extension, mode, + engine): + # test reading compressed urls with various engines and + # extension inference + base_url = ('https://github.com/pandas-dev/pandas/raw/master/' + 'pandas/tests/io/parser/data/salaries.csv') - @tm.network - def test_compressed_urls(self): - # Test reading compressed tables from URL. - msg = ('Test reading {}-compressed tables from URL: ' - 'compression="{}", engine="{}"') - - for compression, extension in self.compression_to_extension.items(): - url = self.base_url + extension - # args is a (compression, engine) tuple - for args in product([compression, 'infer'], ['python', 'c']): - # test_fxn is a workaround for more descriptive nose reporting. - # See http://stackoverflow.com/a/37393684/4651668. - test_fxn = functools.partial(self.check_table) - test_fxn.description = msg.format(compression, *args) - yield (test_fxn, url) + args - - def check_table(self, url, compression, engine): - if url.endswith('.xz'): - tm._skip_if_no_lzma() - url_table = read_table(url, compression=compression, engine=engine) - tm.assert_frame_equal(url_table, self.local_table) - - -class TestS3(tm.TestCase): - - def setUp(self): - try: - import s3fs # noqa - except ImportError: - pytest.skip("s3fs not installed") + url = base_url + extension + + if mode != 'explicit': + compression = mode + + url_table = read_table(url, compression=compression, engine=engine) + tm.assert_frame_equal(url_table, salaries_table) + + +class TestS3(object): @tm.network def test_parse_public_s3_bucket(self): + pytest.importorskip('s3fs') + # more of an integration test due to the not-public contents portion + # can probably mock this though. for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) # Read public file from bucket with not-public contents df = read_csv('s3://cant_get_it/tips.csv') - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df) @tm.network def test_parse_public_s3n_bucket(self): + # Read from AWS s3 as "s3n" URL df = read_csv('s3n://pandas-test/tips.csv', nrows=10) - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -90,8 +80,8 @@ def test_parse_public_s3n_bucket(self): def test_parse_public_s3a_bucket(self): # Read from AWS s3 as "s3a" URL df = read_csv('s3a://pandas-test/tips.csv', nrows=10) - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -100,8 +90,8 @@ def test_parse_public_s3_bucket_nrows(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, nrows=10, compression=comp) - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @@ -113,13 +103,13 @@ def test_parse_public_s3_bucket_chunked(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp) - self.assertEqual(df_reader.chunksize, chunksize) + assert df_reader.chunksize == chunksize for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them # properly. df = df_reader.get_chunk() - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty true_df = local_tips.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) @@ -133,12 +123,12 @@ def test_parse_public_s3_bucket_chunked_python(self): df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp, engine='python') - self.assertEqual(df_reader.chunksize, chunksize) + assert df_reader.chunksize == chunksize for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them properly. df = df_reader.get_chunk() - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty true_df = local_tips.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) @@ -148,8 +138,8 @@ def test_parse_public_s3_bucket_python(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression=comp) - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) @@ -158,8 +148,8 @@ def test_infer_s3_compression(self): for ext in ['', '.gz', '.bz2']: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression='infer') - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) @@ -168,17 +158,33 @@ def test_parse_public_s3_bucket_nrows_python(self): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', nrows=10, compression=comp) - self.assertTrue(isinstance(df, DataFrame)) - self.assertFalse(df.empty) + assert isinstance(df, DataFrame) + assert not df.empty tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) @tm.network def test_s3_fails(self): - with tm.assertRaises(IOError): + with pytest.raises(IOError): read_csv('s3://nyqpug/asdf.csv') # Receive a permission error when trying to read a private bucket. # It's irrelevant here that this isn't actually a table. - with tm.assertRaises(IOError): + with pytest.raises(IOError): read_csv('s3://cant_get_it/') + + def test_read_csv_handles_boto_s3_object(self, + s3_resource, + tips_file): + # see gh-16135 + + s3_object = s3_resource.meta.client.get_object( + Bucket='pandas-test', + Key='tips.csv') + + result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8') + assert isinstance(result, DataFrame) + assert not result.empty + + expected = read_csv(tips_file) + tm.assert_frame_equal(result, expected) diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py similarity index 53% rename from pandas/io/tests/parser/test_parsers.py rename to pandas/tests/io/parser/test_parsers.py index 2ae557a7d57db..7717102b64fc5 100644 --- a/pandas/io/tests/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- import os - import pandas.util.testing as tm -from pandas import read_csv, read_table -from pandas.core.common import AbstractMethodError +from pandas import read_csv, read_table, DataFrame +import pandas.core.common as com +from pandas._libs.tslib import Timestamp +from pandas.compat import StringIO from .common import ParserTests from .header import HeaderTests @@ -20,6 +21,7 @@ from .c_parser_only import CParserTests from .parse_dates import ParseDatesTests from .compression import CompressionTests +from .mangle_dupes import DupeColumnTests from .multithread import MultithreadTests from .python_parser_only import PythonParserTests from .dtypes import DtypeTests @@ -27,11 +29,12 @@ class BaseParser(CommentTests, CompressionTests, ConverterTests, DialectTests, + DtypeTests, DupeColumnTests, HeaderTests, IndexColTests, MultithreadTests, NAvaluesTests, ParseDatesTests, ParserTests, SkipRowsTests, UsecolsTests, - QuotingTests, DtypeTests): + QuotingTests): def read_csv(self, *args, **kwargs): raise NotImplementedError @@ -40,9 +43,9 @@ def read_table(self, *args, **kwargs): raise NotImplementedError def float_precision_choices(self): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.csv1 = os.path.join(self.dirpath, 'test1.csv') self.csv2 = os.path.join(self.dirpath, 'test2.csv') @@ -50,7 +53,7 @@ def setUp(self): self.csv_shiftjs = os.path.join(self.dirpath, 'sauron.SHIFT_JIS.csv') -class TestCParserHighMemory(BaseParser, CParserTests, tm.TestCase): +class TestCParserHighMemory(BaseParser, CParserTests): engine = 'c' low_memory = False float_precision_choices = [None, 'high', 'round_trip'] @@ -68,7 +71,7 @@ def read_table(self, *args, **kwds): return read_table(*args, **kwds) -class TestCParserLowMemory(BaseParser, CParserTests, tm.TestCase): +class TestCParserLowMemory(BaseParser, CParserTests): engine = 'c' low_memory = True float_precision_choices = [None, 'high', 'round_trip'] @@ -86,7 +89,7 @@ def read_table(self, *args, **kwds): return read_table(*args, **kwds) -class TestPythonParser(BaseParser, PythonParserTests, tm.TestCase): +class TestPythonParser(BaseParser, PythonParserTests): engine = 'python' float_precision_choices = [None] @@ -99,3 +102,51 @@ def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine return read_table(*args, **kwds) + + +class TestUnsortedUsecols(object): + def test_override__set_noconvert_columns(self): + # GH 17351 - usecols needs to be sorted in _setnoconvert_columns + # based on the test_usecols_with_parse_dates test from usecols.py + from pandas.io.parsers import CParserWrapper, TextFileReader + + s = """a,b,c,d,e + 0,1,20140101,0900,4 + 0,1,20140102,1000,4""" + + parse_dates = [[1, 2]] + cols = { + 'a': [0, 0], + 'c_d': [ + Timestamp('2014-01-01 09:00:00'), + Timestamp('2014-01-02 10:00:00') + ] + } + expected = DataFrame(cols, columns=['c_d', 'a']) + + class MyTextFileReader(TextFileReader): + def __init__(self): + self._currow = 0 + self.squeeze = False + + class MyCParserWrapper(CParserWrapper): + def _set_noconvert_columns(self): + if self.usecols_dtype == 'integer': + # self.usecols is a set, which is documented as unordered + # but in practice, a CPython set of integers is sorted. + # In other implementations this assumption does not hold. + # The following code simulates a different order, which + # before GH 17351 would cause the wrong columns to be + # converted via the parse_dates parameter + self.usecols = list(self.usecols) + self.usecols.reverse() + return CParserWrapper._set_noconvert_columns(self) + + parser = MyTextFileReader() + parser.options = {'usecols': [0, 2, 3], + 'parse_dates': parse_dates, + 'delimiter': ','} + parser._engine = MyCParserWrapper(StringIO(s), **parser.options) + df = parser.read() + + tm.assert_frame_equal(df, expected) diff --git a/pandas/io/tests/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py similarity index 90% rename from pandas/io/tests/parser/test_read_fwf.py rename to pandas/tests/io/parser/test_read_fwf.py index dccae06afe4d1..a60f2b5a4c946 100644 --- a/pandas/io/tests/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -19,7 +19,7 @@ from pandas.io.parsers import read_csv, read_fwf, EmptyDataError -class TestFwfParsing(tm.TestCase): +class TestFwfParsing(object): def test_fwf(self): data_expected = """\ @@ -67,10 +67,11 @@ def test_fwf(self): StringIO(data3), colspecs=colspecs, delimiter='~', header=None) tm.assert_frame_equal(df, expected) - with tm.assertRaisesRegexp(ValueError, "must specify only one of"): + with tm.assert_raises_regex(ValueError, + "must specify only one of"): read_fwf(StringIO(data3), colspecs=colspecs, widths=[6, 10, 10, 7]) - with tm.assertRaisesRegexp(ValueError, "Must specify either"): + with tm.assert_raises_regex(ValueError, "Must specify either"): read_fwf(StringIO(data3), colspecs=None, widths=None) def test_BytesIO_input(self): @@ -93,9 +94,9 @@ def test_fwf_colspecs_is_list_or_tuple(self): bar2,12,13,14,15 """ - with tm.assertRaisesRegexp(TypeError, - 'column specifications must be a list or ' - 'tuple.+'): + with tm.assert_raises_regex(TypeError, + 'column specifications must ' + 'be a list or tuple.+'): pd.io.parsers.FixedWidthReader(StringIO(data), {'a': 1}, ',', '#') @@ -109,8 +110,9 @@ def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self): bar2,12,13,14,15 """ - with tm.assertRaisesRegexp(TypeError, - 'Each column specification must be.+'): + with tm.assert_raises_regex(TypeError, + 'Each column specification ' + 'must be.+'): read_fwf(StringIO(data), [('a', 1)]) def test_fwf_colspecs_None(self): @@ -164,7 +166,7 @@ def test_fwf_regression(self): for c in df.columns: res = df.loc[:, c] - self.assertTrue(len(res)) + assert len(res) def test_fwf_for_uint8(self): data = """1421302965.213420 PRI=3 PGN=0xef00 DST=0x17 SRC=0x28 04 154 00 00 00 00 00 127 @@ -243,7 +245,7 @@ def test_bool_header_arg(self): a b""" for arg in [True, False]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): read_fwf(StringIO(data), header=arg) def test_full_file(self): @@ -289,7 +291,7 @@ def test_full_file_with_spaces(self): tm.assert_frame_equal(expected, read_fwf(StringIO(test))) def test_full_file_with_spaces_and_missing(self): - # File with spaces and missing values in columsn + # File with spaces and missing values in columns test = """ Account Name Balance CreditLimit AccountCreated 101 10000.00 1/17/1998 @@ -401,5 +403,34 @@ def test_skiprows_inference_empty(self): 78 901 2 """.strip() - with tm.assertRaises(EmptyDataError): + with pytest.raises(EmptyDataError): read_fwf(StringIO(test), skiprows=3) + + def test_whitespace_preservation(self): + # Addresses Issue #16772 + data_expected = """ + a ,bbb + cc,dd """ + expected = read_csv(StringIO(data_expected), header=None) + + test_data = """ + a bbb + ccdd """ + result = read_fwf(StringIO(test_data), widths=[3, 3], + header=None, skiprows=[0], delimiter="\n\t") + + tm.assert_frame_equal(result, expected) + + def test_default_delimiter(self): + data_expected = """ +a,bbb +cc,dd""" + expected = read_csv(StringIO(data_expected), header=None) + + test_data = """ +a \tbbb +cc\tdd """ + result = read_fwf(StringIO(test_data), widths=[3, 3], + header=None, skiprows=[0]) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py similarity index 67% rename from pandas/io/tests/parser/test_textreader.py rename to pandas/tests/io/parser/test_textreader.py index 0e91ca806e8fe..ab4c14034cd20 100644 --- a/pandas/io/tests/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -5,6 +5,8 @@ is integral to the C engine in parsers.py """ +import pytest + from pandas.compat import StringIO, BytesIO, map from pandas import compat @@ -20,13 +22,13 @@ import pandas.util.testing as tm -from pandas.parser import TextReader -import pandas.parser as parser +from pandas._libs.parsers import TextReader +import pandas._libs.parsers as parser -class TestTextReader(tm.TestCase): +class TestTextReader(object): - def setUp(self): + def setup_method(self, method): self.dirpath = tm.get_data_path() self.csv1 = os.path.join(self.dirpath, 'test1.csv') self.csv2 = os.path.join(self.dirpath, 'test2.csv') @@ -64,7 +66,7 @@ def test_string_factorize(self): data = 'a\nb\na\nb\na' reader = TextReader(StringIO(data), header=None) result = reader.read() - self.assertEqual(len(set(map(id, result[0]))), 2) + assert len(set(map(id, result[0]))) == 2 def test_skipinitialspace(self): data = ('a, b\n' @@ -76,12 +78,10 @@ def test_skipinitialspace(self): header=None) result = reader.read() - self.assert_numpy_array_equal(result[0], - np.array(['a', 'a', 'a', 'a'], - dtype=np.object_)) - self.assert_numpy_array_equal(result[1], - np.array(['b', 'b', 'b', 'b'], - dtype=np.object_)) + tm.assert_numpy_array_equal(result[0], np.array(['a', 'a', 'a', 'a'], + dtype=np.object_)) + tm.assert_numpy_array_equal(result[1], np.array(['b', 'b', 'b', 'b'], + dtype=np.object_)) def test_parse_booleans(self): data = 'True\nFalse\nTrue\nTrue' @@ -89,7 +89,7 @@ def test_parse_booleans(self): reader = TextReader(StringIO(data), header=None) result = reader.read() - self.assertEqual(result[0].dtype, np.bool_) + assert result[0].dtype == np.bool_ def test_delimit_whitespace(self): data = 'a b\na\t\t "b"\n"a"\t \t b' @@ -98,10 +98,10 @@ def test_delimit_whitespace(self): header=None) result = reader.read() - self.assert_numpy_array_equal(result[0], np.array(['a', 'a', 'a'], - dtype=np.object_)) - self.assert_numpy_array_equal(result[1], np.array(['b', 'b', 'b'], - dtype=np.object_)) + tm.assert_numpy_array_equal(result[0], np.array(['a', 'a', 'a'], + dtype=np.object_)) + tm.assert_numpy_array_equal(result[1], np.array(['b', 'b', 'b'], + dtype=np.object_)) def test_embedded_newline(self): data = 'a\n"hello\nthere"\nthis' @@ -110,7 +110,7 @@ def test_embedded_newline(self): result = reader.read() expected = np.array(['a', 'hello\nthere', 'this'], dtype=np.object_) - self.assert_numpy_array_equal(result[0], expected) + tm.assert_numpy_array_equal(result[0], expected) def test_euro_decimal(self): data = '12345,67\n345,678' @@ -142,6 +142,7 @@ def test_integer_thousands_alt(self): expected = DataFrame([123456, 12500]) tm.assert_frame_equal(result, expected) + @tm.capture_stderr def test_skip_bad_lines(self): # too many lines, see #2430 for why data = ('a:b:c\n' @@ -153,31 +154,27 @@ def test_skip_bad_lines(self): reader = TextReader(StringIO(data), delimiter=':', header=None) - self.assertRaises(parser.ParserError, reader.read) + pytest.raises(parser.ParserError, reader.read) reader = TextReader(StringIO(data), delimiter=':', header=None, error_bad_lines=False, warn_bad_lines=False) result = reader.read() - expected = {0: ['a', 'd', 'g', 'l'], - 1: ['b', 'e', 'h', 'm'], - 2: ['c', 'f', 'i', 'n']} + expected = {0: np.array(['a', 'd', 'g', 'l'], dtype=object), + 1: np.array(['b', 'e', 'h', 'm'], dtype=object), + 2: np.array(['c', 'f', 'i', 'n'], dtype=object)} assert_array_dicts_equal(result, expected) - stderr = sys.stderr - sys.stderr = StringIO() - try: - reader = TextReader(StringIO(data), delimiter=':', - header=None, - error_bad_lines=False, - warn_bad_lines=True) - reader.read() - val = sys.stderr.getvalue() - self.assertTrue('Skipping line 4' in val) - self.assertTrue('Skipping line 6' in val) - finally: - sys.stderr = stderr + reader = TextReader(StringIO(data), delimiter=':', + header=None, + error_bad_lines=False, + warn_bad_lines=True) + reader.read() + val = sys.stderr.getvalue() + + assert 'Skipping line 4' in val + assert 'Skipping line 6' in val def test_header_not_enough_lines(self): data = ('skip this\n' @@ -189,36 +186,13 @@ def test_header_not_enough_lines(self): reader = TextReader(StringIO(data), delimiter=',', header=2) header = reader.header expected = [['a', 'b', 'c']] - self.assertEqual(header, expected) - - recs = reader.read() - expected = {0: [1, 4], 1: [2, 5], 2: [3, 6]} - assert_array_dicts_equal(expected, recs) - - # not enough rows - self.assertRaises(parser.ParserError, TextReader, StringIO(data), - delimiter=',', header=5, as_recarray=True) - - def test_header_not_enough_lines_as_recarray(self): - data = ('skip this\n' - 'skip this\n' - 'a,b,c\n' - '1,2,3\n' - '4,5,6') - - reader = TextReader(StringIO(data), delimiter=',', header=2, - as_recarray=True) - header = reader.header - expected = [['a', 'b', 'c']] - self.assertEqual(header, expected) + assert header == expected recs = reader.read() - expected = {'a': [1, 4], 'b': [2, 5], 'c': [3, 6]} - assert_array_dicts_equal(expected, recs) - - # not enough rows - self.assertRaises(parser.ParserError, TextReader, StringIO(data), - delimiter=',', header=5, as_recarray=True) + expected = {0: np.array([1, 4], dtype=np.int64), + 1: np.array([2, 5], dtype=np.int64), + 2: np.array([3, 6], dtype=np.int64)} + assert_array_dicts_equal(recs, expected) def test_escapechar(self): data = ('\\"hello world\"\n' @@ -228,7 +202,7 @@ def test_escapechar(self): reader = TextReader(StringIO(data), delimiter=',', header=None, escapechar='\\') result = reader.read() - expected = {0: ['"hello world"'] * 3} + expected = {0: np.array(['"hello world"'] * 3, dtype=object)} assert_array_dicts_equal(result, expected) def test_eof_has_eol(self): @@ -253,37 +227,18 @@ def _make_reader(**kwds): reader = _make_reader(dtype='S5,i4') result = reader.read() - self.assertEqual(result[0].dtype, 'S5') + assert result[0].dtype == 'S5' ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaaa'], dtype='S5') - self.assertTrue((result[0] == ex_values).all()) - self.assertEqual(result[1].dtype, 'i4') + assert (result[0] == ex_values).all() + assert result[1].dtype == 'i4' reader = _make_reader(dtype='S4') result = reader.read() - self.assertEqual(result[0].dtype, 'S4') - ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaa'], dtype='S4') - self.assertTrue((result[0] == ex_values).all()) - self.assertEqual(result[1].dtype, 'S4') - - def test_numpy_string_dtype_as_recarray(self): - data = """\ -a,1 -aa,2 -aaa,3 -aaaa,4 -aaaaa,5""" - - def _make_reader(**kwds): - return TextReader(StringIO(data), delimiter=',', header=None, - **kwds) - - reader = _make_reader(dtype='S4', as_recarray=True) - result = reader.read() - self.assertEqual(result['0'].dtype, 'S4') + assert result[0].dtype == 'S4' ex_values = np.array(['a', 'aa', 'aaa', 'aaaa', 'aaaa'], dtype='S4') - self.assertTrue((result['0'] == ex_values).all()) - self.assertEqual(result['1'].dtype, 'S4') + assert (result[0] == ex_values).all() + assert result[1].dtype == 'S4' def test_pass_dtype(self): data = """\ @@ -298,19 +253,19 @@ def _make_reader(**kwds): reader = _make_reader(dtype={'one': 'u1', 1: 'S1'}) result = reader.read() - self.assertEqual(result[0].dtype, 'u1') - self.assertEqual(result[1].dtype, 'S1') + assert result[0].dtype == 'u1' + assert result[1].dtype == 'S1' reader = _make_reader(dtype={'one': np.uint8, 1: object}) result = reader.read() - self.assertEqual(result[0].dtype, 'u1') - self.assertEqual(result[1].dtype, 'O') + assert result[0].dtype == 'u1' + assert result[1].dtype == 'O' reader = _make_reader(dtype={'one': np.dtype('u1'), 1: np.dtype('O')}) result = reader.read() - self.assertEqual(result[0].dtype, 'u1') - self.assertEqual(result[1].dtype, 'O') + assert result[0].dtype == 'u1' + assert result[1].dtype == 'O' def test_usecols(self): data = """\ @@ -327,9 +282,9 @@ def _make_reader(**kwds): result = reader.read() exp = _make_reader().read() - self.assertEqual(len(result), 2) - self.assertTrue((result[1] == exp[1]).all()) - self.assertTrue((result[2] == exp[2]).all()) + assert len(result) == 2 + assert (result[1] == exp[1]).all() + assert (result[2] == exp[2]).all() def test_cr_delimited(self): def _test(text, **kwargs): @@ -363,7 +318,7 @@ def test_empty_field_eof(self): result = TextReader(StringIO(data), delimiter=',').read() - expected = {0: np.array([1, 4]), + expected = {0: np.array([1, 4], dtype=np.int64), 1: np.array(['2', ''], dtype=object), 2: np.array(['3', ''], dtype=object)} assert_array_dicts_equal(result, expected) @@ -395,9 +350,10 @@ def test_empty_csv_input(self): # GH14867 df = read_csv(StringIO(), chunksize=20, header=None, names=['a', 'b', 'c']) - self.assertTrue(isinstance(df, TextFileReader)) + assert isinstance(df, TextFileReader) def assert_array_dicts_equal(left, right): for k, v in compat.iteritems(left): - assert(np.array_equal(v, right[k])) + assert tm.assert_numpy_array_equal(np.asarray(v), + np.asarray(right[k])) diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py similarity index 53% rename from pandas/io/tests/parser/test_unsupported.py rename to pandas/tests/io/parser/test_unsupported.py index 999db47cf2eaf..3117f6fae55da 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,11 +13,18 @@ import pandas.util.testing as tm from pandas.compat import StringIO -from pandas.io.common import ParserError +from pandas.errors import ParserError from pandas.io.parsers import read_csv, read_table +import pytest -class TestUnsupportedFeatures(tm.TestCase): + +@pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val) +def python_engine(request): + return request.param + + +class TestUnsupportedFeatures(object): def test_mangle_dupe_cols_false(self): # see gh-12935 @@ -25,33 +32,24 @@ def test_mangle_dupe_cols_false(self): msg = 'is not supported' for engine in ('c', 'python'): - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) - def test_nrows_and_chunksize(self): - data = 'a b c' - msg = "cannot be used together yet" - - for engine in ('c', 'python'): - with tm.assertRaisesRegexp(NotImplementedError, msg): - read_csv(StringIO(data), engine=engine, - nrows=10, chunksize=5) - def test_c_engine(self): # see gh-6607 data = 'a b c\n1 2 3' msg = 'does not support' # specify C engine with unsupported options (raise) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_table(StringIO(data), engine='c', sep=None, delim_whitespace=False) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_table(StringIO(data), engine='c', sep=r'\s') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_table(StringIO(data), engine='c', quotechar=chr(128)) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_table(StringIO(data), engine='c', skipfooter=1) # specify C-unsupported options without python-unsupported options @@ -71,9 +69,9 @@ def test_c_engine(self): x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" msg = 'Error tokenizing data' - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): read_table(StringIO(text), sep='\\s+') - with tm.assertRaisesRegexp(ParserError, msg): + with tm.assert_raises_regex(ParserError, msg): read_table(StringIO(text), engine='c', sep='\\s+') msg = "Only length-1 thousands markers supported" @@ -81,17 +79,17 @@ def test_c_engine(self): 1|2,334|5 10|13|10. """ - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), thousands=',,') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), thousands='') msg = "Only length-1 line terminators supported" data = 'a,b,c~~1,2,3~~4,5,6' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): read_csv(StringIO(data), lineterminator='~~') - def test_python_engine(self): + def test_python_engine(self, python_engine): from pandas.io.parsers import _python_unsupported as py_unsupported data = """1,2,3,, @@ -99,46 +97,43 @@ def test_python_engine(self): 1,2,3,4,5 1,2,,, 1,2,3,4,""" - engines = 'python', 'python-fwf' - for engine in engines: - for default in py_unsupported: - msg = ('The %r option is not supported ' - 'with the %r engine' % (default, engine)) + for default in py_unsupported: + msg = ('The %r option is not supported ' + 'with the %r engine' % (default, python_engine)) + + kwargs = {default: object()} + with tm.assert_raises_regex(ValueError, msg): + read_csv(StringIO(data), engine=python_engine, **kwargs) - kwargs = {default: object()} - with tm.assertRaisesRegexp(ValueError, msg): - read_csv(StringIO(data), engine=engine, **kwargs) + def test_python_engine_file_no_next(self, python_engine): + # see gh-16530 + class NoNextBuffer(object): + def __init__(self, csv_data): + self.data = csv_data + def __iter__(self): + return self -class TestDeprecatedFeatures(tm.TestCase): + def read(self): + return self.data - def test_deprecated_args(self): - data = '1,2,3' + data = "a\n1" + msg = "The 'python' engine cannot iterate" - # deprecated arguments with non-default values - deprecated = { - 'as_recarray': True, - 'buffer_lines': True, - 'compact_ints': True, - 'skip_footer': True, - 'use_unsigned': True, - } + with tm.assert_raises_regex(ValueError, msg): + read_csv(NoNextBuffer(data), engine=python_engine) - engines = 'c', 'python' - for engine in engines: - for arg, non_default_val in deprecated.items(): - if engine == 'c' and arg == 'skip_footer': - # unsupported --> exception is raised - continue +class TestDeprecatedFeatures(object): - if engine == 'python' and arg == 'buffer_lines': - # unsupported --> exception is raised - continue + @pytest.mark.parametrize("engine", ["c", "python"]) + @pytest.mark.parametrize("kwargs", [{"tupleize_cols": True}, + {"tupleize_cols": False}]) + def test_deprecated_args(self, engine, kwargs): + data = "1,2,3" + arg, _ = list(kwargs.items())[0] - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - kwargs = {arg: non_default_val} - read_csv(StringIO(data), engine=engine, - **kwargs) + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False): + read_csv(StringIO(data), engine=engine, **kwargs) diff --git a/pandas/io/tests/parser/usecols.py b/pandas/tests/io/parser/usecols.py similarity index 84% rename from pandas/io/tests/parser/usecols.py rename to pandas/tests/io/parser/usecols.py index 95df077dae997..195fb4cba2aed 100644 --- a/pandas/io/tests/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -11,7 +11,7 @@ import pandas.util.testing as tm from pandas import DataFrame, Index -from pandas.lib import Timestamp +from pandas._libs.tslib import Timestamp from pandas.compat import StringIO @@ -28,7 +28,7 @@ def test_raise_on_mixed_dtype_usecols(self): "all integers or a callable") usecols = [0, 'b', 2] - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), usecols=usecols) def test_usecols(self): @@ -43,9 +43,9 @@ def test_usecols(self): result2 = self.read_csv(StringIO(data), usecols=('b', 'c')) exp = self.read_csv(StringIO(data)) - self.assertEqual(len(result.columns), 2) - self.assertTrue((result['b'] == exp['b']).all()) - self.assertTrue((result['c'] == exp['c']).all()) + assert len(result.columns) == 2 + assert (result['b'] == exp['b']).all() + assert (result['c'] == exp['c']).all() tm.assert_frame_equal(result, result2) @@ -82,8 +82,8 @@ def test_usecols(self): tm.assert_frame_equal(result, expected) # length conflict, passed names and usecols disagree - self.assertRaises(ValueError, self.read_csv, StringIO(data), - names=['a', 'b'], usecols=[1], header=None) + pytest.raises(ValueError, self.read_csv, StringIO(data), + names=['a', 'b'], usecols=[1], header=None) def test_usecols_index_col_False(self): # see gh-9082 @@ -351,10 +351,10 @@ def test_usecols_with_mixed_encoding_strings(self): msg = ("'usecols' must either be all strings, all unicode, " "all integers or a callable") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(s), usecols=[u'AAA', b'BBB']) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(s), usecols=[b'AAA', u'BBB']) def test_usecols_with_multibyte_characters(self): @@ -475,3 +475,63 @@ def test_uneven_length_cols(self): 'C': [3, 5, 4, 3, 3, 7]}) df = self.read_csv(StringIO(data), usecols=usecols) tm.assert_frame_equal(df, expected) + + def test_raise_on_usecols_names_mismatch(self): + # GH 14671 + data = 'a,b,c,d\n1,2,3,4\n5,6,7,8' + + msg = ( + "Usecols do not match columns, " + "columns expected but not found: {missing}" + ) + + usecols = ['a', 'b', 'c', 'd'] + df = self.read_csv(StringIO(data), usecols=usecols) + expected = DataFrame({'a': [1, 5], 'b': [2, 6], 'c': [3, 7], + 'd': [4, 8]}) + tm.assert_frame_equal(df, expected) + + usecols = ['a', 'b', 'c', 'f'] + with tm.assert_raises_regex( + ValueError, msg.format(missing=r"\['f'\]")): + self.read_csv(StringIO(data), usecols=usecols) + + usecols = ['a', 'b', 'f'] + with tm.assert_raises_regex( + ValueError, msg.format(missing=r"\['f'\]")): + self.read_csv(StringIO(data), usecols=usecols) + + usecols = ['a', 'b', 'f', 'g'] + with tm.assert_raises_regex( + ValueError, msg.format(missing=r"\[('f', 'g'|'g', 'f')\]")): + self.read_csv(StringIO(data), usecols=usecols) + + names = ['A', 'B', 'C', 'D'] + + df = self.read_csv(StringIO(data), header=0, names=names) + expected = DataFrame({'A': [1, 5], 'B': [2, 6], 'C': [3, 7], + 'D': [4, 8]}) + tm.assert_frame_equal(df, expected) + + # TODO: https://github.com/pandas-dev/pandas/issues/16469 + # usecols = ['A','C'] + # df = self.read_csv(StringIO(data), header=0, names=names, + # usecols=usecols) + # expected = DataFrame({'A': [1,5], 'C': [3,7]}) + # tm.assert_frame_equal(df, expected) + # + # usecols = [0,2] + # df = self.read_csv(StringIO(data), header=0, names=names, + # usecols=usecols) + # expected = DataFrame({'A': [1,5], 'C': [3,7]}) + # tm.assert_frame_equal(df, expected) + + usecols = ['A', 'B', 'C', 'f'] + with tm.assert_raises_regex( + ValueError, msg.format(missing=r"\['f'\]")): + self.read_csv(StringIO(data), header=0, names=names, + usecols=usecols) + usecols = ['A', 'B', 'f'] + with tm.assert_raises_regex( + ValueError, msg.format(missing=r"\['f'\]")): + self.read_csv(StringIO(data), names=names, usecols=usecols) diff --git a/pandas/tests/io/sas/__init__.py b/pandas/tests/io/sas/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/io/tests/sas/data/DEMO_G.csv b/pandas/tests/io/sas/data/DEMO_G.csv similarity index 100% rename from pandas/io/tests/sas/data/DEMO_G.csv rename to pandas/tests/io/sas/data/DEMO_G.csv diff --git a/pandas/io/tests/sas/data/DEMO_G.xpt b/pandas/tests/io/sas/data/DEMO_G.xpt similarity index 100% rename from pandas/io/tests/sas/data/DEMO_G.xpt rename to pandas/tests/io/sas/data/DEMO_G.xpt diff --git a/pandas/io/tests/sas/data/DRXFCD_G.csv b/pandas/tests/io/sas/data/DRXFCD_G.csv similarity index 100% rename from pandas/io/tests/sas/data/DRXFCD_G.csv rename to pandas/tests/io/sas/data/DRXFCD_G.csv diff --git a/pandas/io/tests/sas/data/DRXFCD_G.xpt b/pandas/tests/io/sas/data/DRXFCD_G.xpt similarity index 100% rename from pandas/io/tests/sas/data/DRXFCD_G.xpt rename to pandas/tests/io/sas/data/DRXFCD_G.xpt diff --git a/pandas/io/tests/sas/data/SSHSV1_A.csv b/pandas/tests/io/sas/data/SSHSV1_A.csv similarity index 100% rename from pandas/io/tests/sas/data/SSHSV1_A.csv rename to pandas/tests/io/sas/data/SSHSV1_A.csv diff --git a/pandas/io/tests/sas/data/SSHSV1_A.xpt b/pandas/tests/io/sas/data/SSHSV1_A.xpt similarity index 100% rename from pandas/io/tests/sas/data/SSHSV1_A.xpt rename to pandas/tests/io/sas/data/SSHSV1_A.xpt diff --git a/pandas/io/tests/sas/data/airline.csv b/pandas/tests/io/sas/data/airline.csv similarity index 100% rename from pandas/io/tests/sas/data/airline.csv rename to pandas/tests/io/sas/data/airline.csv diff --git a/pandas/io/tests/sas/data/airline.sas7bdat b/pandas/tests/io/sas/data/airline.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/airline.sas7bdat rename to pandas/tests/io/sas/data/airline.sas7bdat diff --git a/pandas/tests/io/sas/data/datetime.csv b/pandas/tests/io/sas/data/datetime.csv new file mode 100644 index 0000000000000..6126f6d04eaf0 --- /dev/null +++ b/pandas/tests/io/sas/data/datetime.csv @@ -0,0 +1,5 @@ +Date1,Date2,DateTime,DateTimeHi,Taiw +1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145226,1912-01-01 +1960-01-01,1960-01-01,1960-01-01 00:00:00,1960-01-01 00:00:00.000000,1960-01-01 +2016-02-29,2016-02-29,2016-02-29 23:59:59,2016-02-29 23:59:59.123456,2016-02-29 +2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854774,2262-04-11 diff --git a/pandas/tests/io/sas/data/datetime.sas7bdat b/pandas/tests/io/sas/data/datetime.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..6469dbf29f8eeeafb4703f01657eae4d0872ef10 GIT binary patch literal 131072 zcmeIy!E0Pa7y$6uG_|QjNibjzl`cUf(556Ypaq4jB(VY0M!VHk1REh>Al?e~Ac){W zXbvg`59Z)O`~zAqf?_Du9z_sAk>Ei*cn)gQHtWpnd)dueN>l`e--MU%n|a^Ny!pMC zgwSeNetF~U<$?aMK6|$(?7ud)GI+4Be`qL-4^_IxySK~sRCbJyS7OQIA@uF3lqDx? zC#y5FPfbtN>eKbvx#{uh-)nJxEzNx3`He%LeED7N<#$ef^l6NFpTRFCTQO~2i9fA& z^vLMM=-AxYa}STd_{jV-Ct7#@T9)>Hdf@%z<<&X+^6JQX8Y`c%^S(X-0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1pY4qmsUsC%V%BwJ!W0~;hq?(TOs^dY2F!#d2dnl)6Eb@i}CrG zUrgh%KM?cBiu~%05ax^g?U+v#`3q$p`@?Pd#%56y$Jg5Om5#hvCWYKSICOn2CCykp zyz*YSa`D@Ta?Aw>zs~n_zKdUzJ>N{GvSwWSd}8VQ+Q&a%%ds^c?{922>vF7qc;Ra0 ziMVf$xvsdMa*Y510t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oU&Z0$Vp4jdF#%W71Q0a(pvXVtg&!wPUn> z&VjH$+#j!2JD2AycdXPt=e}5+u2wsptLN`9XJ0Jd`D{~ppUKeOS|(=g`~3Ml6M4?z z&hs=Hjp6Xa>uI5Cy!M8Dv2Jhb#&)Zbc1XQS*xI(b8Rtx;+eORI)?LZZSmu54UYQS; zc~2$pQI41M<8qDBvJ)OGc7L>p$BMI`4e3lFyt?Dmx8mHlw&%t(bhKxe7SGogURs`7 zIJY&~2+o12OcKNIsbHlAXwK3(?fORp@ZetmXsdOOCtG=6NUS)1;q`uf7s`MKHS zPo6r_TrUnaTLenal3e~GQSne?gV*8ZM~q4IU*?jLC&^^5=j literal 0 HcmV?d00001 diff --git a/pandas/io/tests/sas/data/paxraw_d_short.csv b/pandas/tests/io/sas/data/paxraw_d_short.csv similarity index 100% rename from pandas/io/tests/sas/data/paxraw_d_short.csv rename to pandas/tests/io/sas/data/paxraw_d_short.csv diff --git a/pandas/io/tests/sas/data/paxraw_d_short.xpt b/pandas/tests/io/sas/data/paxraw_d_short.xpt similarity index 100% rename from pandas/io/tests/sas/data/paxraw_d_short.xpt rename to pandas/tests/io/sas/data/paxraw_d_short.xpt diff --git a/pandas/tests/io/sas/data/productsales.csv b/pandas/tests/io/sas/data/productsales.csv new file mode 100644 index 0000000000000..1f6a4424e1a97 --- /dev/null +++ b/pandas/tests/io/sas/data/productsales.csv @@ -0,0 +1,1441 @@ +ACTUAL,PREDICT,COUNTRY,REGION,DIVISION,PRODTYPE,PRODUCT,QUARTER,YEAR,MONTH +925,850,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +999,297,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +608,846,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +642,533,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +656,646,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +948,486,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +612,717,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +114,564,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +685,230,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +657,494,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +608,903,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +353,266,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +107,190,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +354,139,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +101,217,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +553,560,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +877,148,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +431,762,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +511,457,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +157,532,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +520,629,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +114,491,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +277,0,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +561,979,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +220,585,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +444,267,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +178,487,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +756,764,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +329,312,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +910,531,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +530,536,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +101,773,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +515,143,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +730,126,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +993,862,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +954,754,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +267,410,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +347,701,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +991,204,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +923,509,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +437,378,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +737,507,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +104,49,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +840,876,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +704,66,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +889,819,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +107,351,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +571,201,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +688,209,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +544,51,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +954,135,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +445,47,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +829,379,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +464,758,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +968,475,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +842,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +721,507,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +966,269,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +332,699,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +328,824,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +355,497,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +506,44,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +585,522,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +634,378,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +662,689,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +783,90,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +786,720,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +710,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +950,457,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +274,947,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +406,834,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +515,71,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +35,282,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +995,538,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +670,679,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +406,601,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +825,577,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +467,908,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +709,819,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +522,687,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +688,157,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +956,111,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +129,31,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +687,790,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +877,795,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +845,379,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +425,114,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +899,475,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +987,747,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +641,372,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +448,415,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +341,955,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +137,356,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +235,316,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +482,351,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +678,164,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +240,386,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +605,113,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +274,68,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +422,885,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +763,575,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +561,743,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +339,816,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +877,203,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +192,581,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +604,815,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +55,333,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +87,40,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +942,672,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +912,23,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +768,948,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +951,291,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +768,839,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +978,864,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +20,337,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +298,95,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +193,535,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +336,191,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +617,412,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +709,711,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +5,425,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +164,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +422,948,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +424,544,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +854,764,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +168,446,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +8,957,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +748,967,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +682,11,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +300,110,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +672,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +894,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +944,965,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +403,423,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +596,753,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +481,770,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +503,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +126,79,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +721,441,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +271,858,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +721,667,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +157,193,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +991,394,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +499,680,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +284,414,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +705,770,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +737,679,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +745,7,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +633,713,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +983,851,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +591,944,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +42,130,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +771,485,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +465,23,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +296,193,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +890,7,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +312,919,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +777,768,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +364,854,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +601,411,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +823,736,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +847,10,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +490,311,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +387,348,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +688,458,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +650,195,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +447,658,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +91,704,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +197,807,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +51,861,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +570,873,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +423,933,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +524,355,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +416,794,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +789,645,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +551,700,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +400,831,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +361,800,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +189,830,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +554,828,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +585,12,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +281,501,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +629,914,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +43,685,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +533,755,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +882,708,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +790,595,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +600,32,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +148,49,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +237,727,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +488,239,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +457,273,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +401,986,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +181,544,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +995,182,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +120,197,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +119,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +319,974,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +333,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +923,688,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +634,750,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +493,155,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +461,860,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +304,102,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +641,425,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +992,224,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +202,408,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +770,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +202,816,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +14,515,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +134,793,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +977,460,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +174,732,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +429,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +514,38,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +784,616,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +973,225,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +511,402,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +30,697,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +895,567,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +557,231,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +282,372,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +909,15,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +276,866,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +234,452,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +479,663,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +782,982,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +755,813,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +689,523,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +496,871,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +24,511,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +379,819,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +441,525,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +49,13,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +243,694,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +295,782,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +395,839,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +929,461,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +997,303,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +889,421,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +72,421,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +926,433,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +850,394,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +826,338,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +651,764,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +854,216,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +899,96,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +309,550,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +943,636,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +138,427,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +99,652,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +270,478,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +862,18,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +574,40,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +359,453,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +958,987,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +791,26,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +284,101,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +190,969,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +527,492,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +112,263,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +271,593,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +643,923,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +554,146,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +211,305,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +368,318,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +778,417,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +808,623,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +46,761,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +466,272,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +18,988,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +87,821,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +765,962,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +62,615,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +13,523,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +775,806,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +636,586,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +458,520,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +206,908,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +310,30,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +813,247,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +22,647,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +742,55,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +394,154,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +957,344,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +205,95,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +198,665,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +638,145,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +155,925,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +688,395,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +730,749,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +208,279,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +525,288,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +483,509,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +748,255,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +6,214,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +168,473,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +301,702,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +9,814,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +778,231,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +799,422,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +309,572,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +433,363,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +969,919,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +181,355,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +787,992,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +971,147,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +440,183,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +209,375,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +537,77,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +364,308,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +377,660,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +251,555,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +607,455,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +127,888,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +513,652,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +146,799,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +917,249,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +776,539,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +330,198,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +981,340,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +862,152,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +612,347,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +607,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +786,855,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +160,87,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +199,69,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +972,807,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +870,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +494,798,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +975,714,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +760,17,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +180,797,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +256,422,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +422,621,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +859,661,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +586,363,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +441,910,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +597,998,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +717,95,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +713,731,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +591,718,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +492,467,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +170,126,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +684,127,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +981,746,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +966,878,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +439,27,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +151,569,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +602,812,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +187,603,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +415,506,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +61,185,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +839,692,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +596,565,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +751,512,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +460,86,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +922,399,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +153,672,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +928,801,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +951,730,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +394,408,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +615,982,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +653,499,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +180,307,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +649,741,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +921,640,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +11,300,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +696,929,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +795,309,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +550,340,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +320,228,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +845,1000,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +245,21,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +142,583,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +717,506,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +3,405,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +790,556,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +646,72,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +230,103,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +938,262,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +629,102,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +317,841,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +812,159,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +141,570,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +64,375,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +207,298,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +435,32,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +96,760,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +252,338,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +956,149,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +633,343,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +190,151,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +227,44,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +24,583,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +420,230,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +910,907,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +709,783,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +810,117,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +723,416,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +911,318,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +230,888,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +448,60,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +945,596,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +508,576,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +262,576,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +441,280,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +15,219,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +795,133,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +301,273,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +304,86,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +49,400,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +576,364,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +669,63,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +325,929,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +272,344,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +80,768,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +46,668,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +223,407,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +774,536,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +784,657,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +92,215,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +67,966,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +747,674,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +686,574,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +93,266,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +192,680,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +51,362,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +498,412,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +546,431,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +485,94,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +925,345,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +292,445,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +540,632,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +21,855,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +100,36,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +49,250,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +353,427,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +911,367,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +823,245,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +278,893,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +576,490,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +655,88,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +763,964,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +88,62,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +746,506,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +927,680,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +297,153,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +291,403,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +838,98,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +112,376,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +509,477,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +472,50,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +495,592,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +1000,813,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +241,740,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +693,873,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +903,459,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +791,224,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +108,562,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +845,199,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +452,275,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +479,355,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +410,947,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +379,454,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +740,450,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +471,575,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +325,6,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +455,847,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +563,338,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +879,517,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +312,630,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +587,381,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +628,864,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +486,416,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +811,852,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +990,815,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +35,23,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +764,527,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +619,693,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +996,977,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +554,549,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +540,951,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +140,390,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +554,204,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +724,78,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +693,613,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +866,745,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +833,56,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +164,887,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +753,651,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +60,691,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +688,767,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +883,709,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +109,417,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +950,326,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +438,599,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +286,818,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +342,13,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +383,185,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +80,140,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +322,717,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +749,852,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +606,125,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +641,325,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +494,648,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +428,365,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +936,120,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +597,347,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +728,638,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +933,732,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +663,465,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +394,262,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +334,947,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +114,694,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +89,482,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +874,600,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +674,94,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +347,323,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +105,49,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +286,70,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +669,844,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +786,773,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +104,68,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +770,110,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +263,42,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +900,171,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +630,644,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +597,408,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +185,45,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +175,522,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +576,166,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +957,885,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +993,713,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +500,838,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +410,267,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +592,967,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +64,529,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +208,656,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +273,665,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +906,419,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +429,776,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +961,971,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +338,248,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +472,486,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +903,674,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +299,603,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +948,492,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +931,512,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +570,391,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +97,313,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +674,758,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +468,304,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +430,846,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +893,912,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +519,810,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +267,122,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +908,102,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +176,161,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +673,450,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +798,215,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +291,765,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +583,557,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +442,739,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +951,811,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +430,780,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +559,645,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +726,365,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +944,597,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +497,126,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +388,655,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +81,604,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +111,280,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +288,115,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +845,205,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +745,672,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +352,339,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +234,70,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +167,528,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +606,220,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +670,691,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +764,197,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +659,239,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +996,50,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +424,135,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +899,972,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +392,475,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +555,868,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +860,451,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +114,565,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +943,116,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +365,385,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +249,375,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +192,357,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +328,230,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +311,829,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +576,971,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +915,280,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +522,853,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +625,953,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +873,874,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +498,578,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +808,768,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +742,178,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +744,916,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +30,917,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +747,633,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +672,107,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +564,523,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +785,924,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +825,481,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +243,240,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +959,819,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +123,602,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +714,538,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +252,632,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +715,952,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +670,480,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +81,700,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +653,726,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +795,526,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +182,410,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +725,307,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +101,73,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +143,232,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +15,993,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +742,652,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +339,761,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +39,428,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +465,4,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +889,101,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +856,869,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +358,271,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +452,633,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +387,481,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +824,302,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +185,245,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +151,941,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +419,721,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +643,893,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +63,898,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +202,94,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +332,962,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +723,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +148,108,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +840,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +601,767,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +962,323,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +166,982,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +531,614,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +963,839,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +994,388,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +978,296,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +72,429,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +33,901,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +428,350,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +413,581,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +737,583,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +85,92,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +916,647,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +785,771,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +302,26,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +1000,598,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +458,715,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +896,74,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +615,580,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +174,848,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +651,118,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +784,54,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +121,929,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +341,393,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +615,820,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +697,336,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +215,299,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +197,747,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +205,154,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +256,486,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +377,251,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +577,225,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +686,77,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +332,74,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +534,596,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +485,493,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +594,782,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +413,487,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +13,127,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +483,538,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +820,94,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +745,252,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +79,722,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +36,536,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +950,958,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +74,466,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +458,309,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +609,680,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +429,539,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +956,511,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +205,505,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +629,720,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +277,823,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +266,21,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +872,142,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +435,95,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +988,398,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +953,328,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +556,151,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +211,978,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +389,918,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +351,542,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +14,96,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +181,496,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +452,77,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +511,236,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +193,913,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +797,49,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +988,967,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +487,502,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +941,790,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +577,121,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +456,55,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +982,739,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +593,683,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +702,610,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +528,248,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +873,530,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +301,889,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +769,245,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +724,473,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +466,938,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +774,150,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +111,772,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +954,201,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +780,945,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +210,177,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +93,378,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +332,83,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +186,803,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +782,398,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +41,215,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +222,194,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +992,287,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +477,410,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +948,50,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +817,204,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +597,239,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +649,637,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +3,938,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +731,788,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +181,399,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +468,576,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +891,187,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +226,703,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +28,455,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +609,244,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +224,868,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +230,353,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +216,101,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +282,924,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +501,144,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +320,0,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +720,910,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +464,259,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +363,107,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +49,63,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +223,270,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +452,554,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +210,154,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +444,205,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +222,441,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +678,183,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +25,459,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +57,810,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +981,268,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +740,916,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +408,742,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +966,522,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +107,299,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +488,677,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +759,709,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +504,310,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +99,160,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +503,698,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +724,540,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +309,901,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +625,34,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +294,536,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +890,780,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +501,716,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +34,532,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +203,871,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +140,199,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +845,845,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +774,591,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +645,378,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +986,942,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +296,686,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +936,720,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +341,546,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +32,845,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +277,667,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +548,627,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +727,142,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +812,655,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +168,556,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +150,459,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +136,89,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +695,726,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +363,38,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +853,60,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +621,369,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +764,381,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +669,465,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +772,981,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +228,758,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +261,31,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +821,237,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +100,285,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +465,94,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +350,561,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +991,143,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +910,95,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +206,341,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +263,388,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +374,272,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +875,890,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +810,734,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +398,364,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +565,619,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +417,517,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +291,781,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +251,327,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +449,48,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +774,809,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +386,73,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +22,936,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +940,400,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +132,736,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +103,211,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +152,271,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +952,855,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +872,923,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +748,854,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +749,769,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +876,271,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +860,383,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +900,29,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +705,185,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +913,351,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +315,560,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +466,840,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +233,517,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +906,949,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +148,633,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +661,636,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +847,138,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +768,481,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +866,408,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +475,130,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +112,813,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +136,661,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +763,311,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +388,872,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +996,643,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +486,174,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +494,528,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +771,124,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +49,126,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +322,440,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +878,881,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +827,292,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +852,873,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +716,357,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +81,247,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +916,18,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +673,395,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +242,620,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +914,946,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +902,72,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +707,691,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +223,95,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +619,878,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +254,757,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +688,898,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +477,172,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +280,419,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +546,849,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +630,807,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +455,599,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +505,59,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +823,790,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +891,574,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +840,96,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +436,376,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +168,352,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +177,741,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +727,12,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +278,157,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +443,10,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +905,544,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +881,817,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +507,754,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +363,425,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +603,492,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +473,485,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +128,369,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +105,560,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +325,651,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +711,326,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +983,180,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +241,935,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +71,403,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +395,345,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +168,278,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +512,376,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +291,104,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +776,543,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +271,798,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +946,333,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +195,833,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +165,132,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +238,629,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +409,337,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +720,300,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +309,470,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +812,875,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +441,237,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +500,272,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +517,860,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +924,415,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +572,140,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +768,367,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +692,195,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +28,245,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +202,285,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +76,98,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +421,932,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +636,898,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +52,330,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +184,603,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +739,280,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +841,507,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +65,202,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +623,513,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +517,132,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +636,21,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +845,657,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +232,195,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +26,323,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +680,299,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +364,811,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +572,739,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +145,889,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +644,189,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +87,698,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +620,646,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +535,562,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +661,753,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +884,425,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +689,693,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +646,941,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +4,975,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +813,455,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +773,260,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +205,69,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +657,147,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +154,533,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +747,881,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +787,457,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +867,441,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +307,859,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +571,177,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +92,633,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +269,382,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +764,707,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +662,566,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +818,349,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +617,128,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +649,231,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +895,258,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +750,812,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +738,362,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +107,133,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +278,60,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +32,88,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +129,378,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +187,569,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +670,186,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +678,875,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +423,636,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +389,360,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +257,677,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +780,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +159,158,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +97,384,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +479,927,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +9,134,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +614,273,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +261,27,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +115,209,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +358,470,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +133,219,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +891,907,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +702,778,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +58,998,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +606,194,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +668,933,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +813,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +450,949,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +956,579,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +276,131,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +889,689,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +708,908,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +14,524,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +904,336,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +272,916,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +257,236,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +343,965,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +80,350,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +530,599,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +340,901,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +595,935,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +47,667,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +279,104,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +293,803,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +162,64,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +935,825,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +689,839,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +484,184,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +230,348,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +164,904,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +401,219,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +607,381,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +229,524,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +786,902,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +92,212,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +455,762,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +409,182,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +166,442,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +277,919,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +92,67,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +631,741,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +390,617,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +403,214,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +964,202,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +223,788,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +684,639,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +645,336,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +470,937,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +424,399,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +862,21,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +736,125,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +554,635,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +790,229,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +115,770,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +853,622,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +643,109,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +794,975,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +892,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +728,123,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +744,135,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +678,535,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +768,971,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +234,166,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +333,814,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +968,557,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +119,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +469,486,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +261,429,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +984,65,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +845,977,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +374,410,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +687,150,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +157,630,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +49,488,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +817,112,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +223,598,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +433,705,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +41,226,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +396,979,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +131,19,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +521,204,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +751,805,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +45,549,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +144,912,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +119,427,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +728,1,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +120,540,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +657,940,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +409,644,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +881,821,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +113,560,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +831,309,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +129,1000,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +76,945,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +260,931,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +882,504,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +157,950,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +443,278,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +111,225,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +497,6,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +321,124,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +194,206,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +684,320,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +634,270,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +622,278,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +689,447,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +120,170,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +374,87,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +926,384,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +687,574,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +600,585,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +779,947,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +223,984,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +628,189,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +326,364,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +836,49,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +361,851,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +444,643,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +501,143,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +743,763,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +861,987,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +203,264,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +762,439,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +705,750,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +153,37,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +436,95,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +428,79,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +804,832,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +805,649,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +860,838,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +104,439,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +434,207,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +912,804,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +571,875,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +267,473,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +415,845,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +261,91,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +746,630,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +30,185,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +662,317,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +916,88,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +415,607,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +514,35,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +756,680,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +461,78,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +460,117,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +305,440,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +198,652,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +234,249,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +638,658,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +88,563,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +751,737,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +816,789,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +437,988,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +715,220,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +780,946,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +245,986,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +201,129,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +815,433,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +865,492,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +634,306,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +901,154,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +789,206,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +882,81,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +953,882,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +862,848,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +628,664,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +765,389,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +741,182,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +61,505,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +470,861,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +869,263,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +650,400,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +750,556,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +602,497,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +54,181,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +384,619,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +161,332,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +977,669,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +615,487,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +783,994,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +977,331,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +375,739,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +298,665,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +104,921,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +713,862,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +556,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +323,517,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +391,352,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +593,166,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +906,859,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +130,571,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +613,976,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +58,466,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +314,79,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +67,864,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +654,623,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +312,170,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +349,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +415,763,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +404,896,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +22,973,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +744,161,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +804,934,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +101,697,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +293,116,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +266,84,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +372,604,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +38,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +385,783,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +262,335,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +961,321,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +831,177,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +579,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +301,583,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +693,364,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +895,343,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +320,854,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +284,691,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +362,387,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +132,298,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +42,635,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +118,81,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +42,375,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +18,846,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +512,933,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +337,237,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +167,964,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +749,382,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +890,610,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +910,148,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +403,837,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +403,85,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +661,425,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +485,633,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +789,515,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +415,512,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +418,156,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +163,464,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +298,813,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +584,455,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +797,366,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +767,734,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +984,451,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +388,134,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +924,547,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +566,802,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +390,61,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +608,556,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +840,202,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +112,964,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +288,112,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +408,445,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +876,884,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +224,348,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +133,564,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +662,568,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +68,882,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +626,542,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +678,119,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +361,248,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +464,868,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +681,841,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +377,484,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +222,986,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +972,39,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +56,930,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +695,252,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +908,794,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +328,658,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +891,139,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +265,331,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +251,261,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +783,122,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +425,296,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +859,391,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +314,75,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +153,731,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +955,883,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +654,707,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +693,97,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +757,390,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +221,237,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +942,496,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +31,814,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +540,765,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +352,308,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +904,327,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +436,266,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +281,699,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +801,599,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +273,950,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +716,117,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +902,632,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +341,35,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +155,562,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +796,144,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +257,142,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +611,273,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +6,915,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +125,920,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +745,294,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +437,681,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +906,86,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +844,764,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +413,269,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +869,138,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +403,834,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +137,112,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +922,921,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +202,859,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +955,442,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +781,593,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +12,346,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +931,312,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +95,690,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +795,344,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +542,784,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +935,639,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +269,726,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +197,596,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +828,263,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +461,194,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +35,895,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +88,502,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +832,342,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +900,421,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +368,901,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +201,474,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +758,571,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +504,511,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +864,379,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +574,68,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +61,210,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +565,478,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +475,296,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +44,664,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +145,880,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +813,607,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +703,97,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +757,908,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +96,152,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +860,622,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +750,309,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +585,912,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +127,429,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +669,580,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +708,179,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +830,119,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +550,369,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +762,882,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +468,727,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +151,823,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +103,783,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +876,884,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +881,891,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +116,909,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +677,765,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +477,180,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +154,712,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +331,175,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +784,869,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +563,820,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +229,554,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +451,126,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +974,760,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +484,446,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +69,254,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +755,516,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +331,779,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +482,987,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +632,318,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +750,427,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +618,86,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +935,553,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +716,315,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +205,328,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +215,521,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +871,156,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +552,841,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +619,623,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +701,849,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +104,438,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +114,719,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +854,906,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +563,267,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +73,542,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +427,552,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +348,428,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +148,158,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +895,379,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +394,142,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +792,588,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +175,506,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +208,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +354,132,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +163,652,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +336,723,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +804,682,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +863,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +326,125,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +568,321,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +691,922,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +152,884,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +565,38,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +38,194,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +185,996,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +318,532,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +960,391,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +122,104,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +400,22,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +301,650,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +909,143,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +433,999,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +508,415,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +648,350,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +793,342,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +129,215,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +481,52,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +406,292,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +512,862,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +668,309,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +551,886,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +124,172,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +655,912,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +523,666,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +739,656,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +87,145,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +890,664,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +665,639,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +329,707,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +417,891,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +828,466,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +298,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +356,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +909,874,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +251,805,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +526,426,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +652,932,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +573,581,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 diff --git a/pandas/io/tests/sas/data/productsales.sas7bdat b/pandas/tests/io/sas/data/productsales.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/productsales.sas7bdat rename to pandas/tests/io/sas/data/productsales.sas7bdat diff --git a/pandas/io/tests/sas/data/test1.sas7bdat b/pandas/tests/io/sas/data/test1.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test1.sas7bdat rename to pandas/tests/io/sas/data/test1.sas7bdat diff --git a/pandas/io/tests/sas/data/test10.sas7bdat b/pandas/tests/io/sas/data/test10.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test10.sas7bdat rename to pandas/tests/io/sas/data/test10.sas7bdat diff --git a/pandas/io/tests/sas/data/test11.sas7bdat b/pandas/tests/io/sas/data/test11.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test11.sas7bdat rename to pandas/tests/io/sas/data/test11.sas7bdat diff --git a/pandas/io/tests/sas/data/test12.sas7bdat b/pandas/tests/io/sas/data/test12.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test12.sas7bdat rename to pandas/tests/io/sas/data/test12.sas7bdat diff --git a/pandas/io/tests/sas/data/test13.sas7bdat b/pandas/tests/io/sas/data/test13.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test13.sas7bdat rename to pandas/tests/io/sas/data/test13.sas7bdat diff --git a/pandas/io/tests/sas/data/test14.sas7bdat b/pandas/tests/io/sas/data/test14.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test14.sas7bdat rename to pandas/tests/io/sas/data/test14.sas7bdat diff --git a/pandas/io/tests/sas/data/test15.sas7bdat b/pandas/tests/io/sas/data/test15.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test15.sas7bdat rename to pandas/tests/io/sas/data/test15.sas7bdat diff --git a/pandas/io/tests/sas/data/test16.sas7bdat b/pandas/tests/io/sas/data/test16.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test16.sas7bdat rename to pandas/tests/io/sas/data/test16.sas7bdat diff --git a/pandas/io/tests/sas/data/test2.sas7bdat b/pandas/tests/io/sas/data/test2.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test2.sas7bdat rename to pandas/tests/io/sas/data/test2.sas7bdat diff --git a/pandas/io/tests/sas/data/test3.sas7bdat b/pandas/tests/io/sas/data/test3.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test3.sas7bdat rename to pandas/tests/io/sas/data/test3.sas7bdat diff --git a/pandas/io/tests/sas/data/test4.sas7bdat b/pandas/tests/io/sas/data/test4.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test4.sas7bdat rename to pandas/tests/io/sas/data/test4.sas7bdat diff --git a/pandas/io/tests/sas/data/test5.sas7bdat b/pandas/tests/io/sas/data/test5.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test5.sas7bdat rename to pandas/tests/io/sas/data/test5.sas7bdat diff --git a/pandas/io/tests/sas/data/test6.sas7bdat b/pandas/tests/io/sas/data/test6.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test6.sas7bdat rename to pandas/tests/io/sas/data/test6.sas7bdat diff --git a/pandas/io/tests/sas/data/test7.sas7bdat b/pandas/tests/io/sas/data/test7.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test7.sas7bdat rename to pandas/tests/io/sas/data/test7.sas7bdat diff --git a/pandas/io/tests/sas/data/test8.sas7bdat b/pandas/tests/io/sas/data/test8.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test8.sas7bdat rename to pandas/tests/io/sas/data/test8.sas7bdat diff --git a/pandas/io/tests/sas/data/test9.sas7bdat b/pandas/tests/io/sas/data/test9.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test9.sas7bdat rename to pandas/tests/io/sas/data/test9.sas7bdat diff --git a/pandas/io/tests/sas/data/test_12659.csv b/pandas/tests/io/sas/data/test_12659.csv similarity index 100% rename from pandas/io/tests/sas/data/test_12659.csv rename to pandas/tests/io/sas/data/test_12659.csv diff --git a/pandas/io/tests/sas/data/test_12659.sas7bdat b/pandas/tests/io/sas/data/test_12659.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test_12659.sas7bdat rename to pandas/tests/io/sas/data/test_12659.sas7bdat diff --git a/pandas/io/tests/sas/data/test_sas7bdat_1.csv b/pandas/tests/io/sas/data/test_sas7bdat_1.csv similarity index 100% rename from pandas/io/tests/sas/data/test_sas7bdat_1.csv rename to pandas/tests/io/sas/data/test_sas7bdat_1.csv diff --git a/pandas/io/tests/sas/data/test_sas7bdat_2.csv b/pandas/tests/io/sas/data/test_sas7bdat_2.csv similarity index 100% rename from pandas/io/tests/sas/data/test_sas7bdat_2.csv rename to pandas/tests/io/sas/data/test_sas7bdat_2.csv diff --git a/pandas/io/tests/data/legacy_hdf/legacy_0.10.h5 b/pandas/tests/io/sas/data/zero_variables.sas7bdat similarity index 61% rename from pandas/io/tests/data/legacy_hdf/legacy_0.10.h5 rename to pandas/tests/io/sas/data/zero_variables.sas7bdat index b1439ef16361abbc0756fbf7d344fd65d8a1a473..85fec09447ec5055139f23847430227fd4226193 100644 GIT binary patch literal 149504 zcmeI&&x=)6902h1#xciGP!e*X`LH2`Y?$#4gF*|R%%}rSn)4EEqXq*FSSv#pMFcGZ zT}9BQMT`CeTeOJ=30(GrGO55_i&ib7&>COo+Yj1o8R8d*__*`JQ|4`P?_F zIU#i8`p<8jyS8oX7oWa65;oqLxVnAUmaUE5q0y)fP1HJN>uP=ZM(xQ)*cn27U8iKe zIbT0Kdu-;#nUlxQo@$<)Z5}!{Ghct;m~OV3X;HfM8@s-{IoJ51ncL!VKDf2iX-DJj zOYdC*~T*7GHhsa+OQXzOkvX#=d8!4os!a*h0@#(Y?HX>g?2HH=FJ| z@%$&(@~1mf_730P+(^B?=Nl|CCP07y0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&=1c4*BH#b(g?AOaxcIc6?K6Y=_ zYeizuy8h~=D!;uy9bSH-tUo=}Ke4yUKbzBKS;+Z;beOCXSMQdcs=OF{`1GBn)G6Eg z-mCA2>+juss%VRXU0)URMSl%n2S>g+n99m_>9fhj@0%b0bfajy`8faU-LkG|>mObD zymlzww`hyHYCiP^0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly@XrM9{n2jcD?A!IBY9A?Z--iJUk{J; zWqa4`2phxW@zafq#hMEPSL$8!SS(JTZk#TvPuyqChFIMHY}3Vk4u;|GWn!=QK7ZfO zWU*%N;C|Ze_MY(Lo9ROJ_*ow|#Jcrq7-PAe?vO^caIa(aXIwL#rmHBPZMarEWA1N> zbGg4g_eW~QJ#s#;kC$tY=Rp{)?!Lcjk5+p>7t)?WxZJnv<+%2p&f2(#fjG6eaH)0v zr3=&NFD^{Px-)0y=BDGrACCQ$+n3sAYbK9di?1xCacg#Nrqjl{ls~#y)~2~s-#Wi| zX>RtpBQG2;uNPBgM4B zQeA4}&%E>Q;;@LjE9CC1kjwQCv;8eb{>A+si3IdW}1Ceo+e zX$S`*SlmJpMcNku0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly@Sh8$GY=`! z|NpDlIdHtA9?xhr=UX!;>bWAf=@3QF+H@8o#eo^Vsq-_et~-%||7_4(Pmb>CLM!tTDhbN%Ii0n4paMgRZ+ literal 238321 zcmeEP1zZ(P*WW9Mh@u#X2_kkN4d&3&T_)1fDF_yJcOWK;ofw$lV1pvkCDIMjDWKo% z-Q9bENA}+5jqmsV-q%&`Y|MXW&Y3f3&YUy5yGKi1Q&3>60ETb;{1`7L#N4HRevw=K zB_8G}>X^E|n%rTK>25N;-jCQ}V0;*UZ8Ujah`j$q;&xa$dEU%KLjyxOsryqYc^Ql4 zBlf8KQ0M=o{~H|8(oiwNDUBe>WgejY%+16JO0ISX6UXa_1RIP^XYS#0$dCS+X`AUG zLBmF%KU^*;xn*WTKiYl{?kf0?D*mH}4I|~Xw_N|-`}WSSrl(?Rips<5(I5R$^;lwJ zXpYmy&X3ocAG6RfG1WFSVCKiW4wYY8QVJd6`IVR{1NAgKztK`N6;(YAQ)^8_6MYpk z>%QZevD$y#LUMe1*g z6V4v?4(rf?t(&KvtCY3BjoVrWU-q#x>U{ViX0y)OR~n^M2K*S8cuM|-@JwhkH84~s z`Z#gS&r76z{ZhEq6Z^_RoTudYup@mwiq6p+>GMq|FXxP;&o`R9ypTga`n&x7tCYTz zBKc=Twi{a?ai>QIV}xG9^$Miidf2!-;9SS^ch}PflK5F%PgC^q zqkWX0Sy5jM*R$w9+LW2U6kn*6jq6X~?48~5jD6uqb=|Ezyfl8y%f`dr#@E^}(2H43 zW8Ej2_yqj zd!qjYnK_%1w6nEp5oPbWOrnLGuh0v`NAQ&GC~*V$Egdi}8U9XxFnf8T+pnh%%h zOhc)x(`be1DBaq2dpn*c?NLd=(~0BK#b=<@^R|l@o~~FueZ@?a-l~tfiKjod8n@%A z_Q=vDvru~KO3+C>-LX_@>ui*!R>mjdsl)~wu{kL95o~eC(^u#BM&apQ6Rqo#DD5iB zX~9#ywcZ+YQF?}t?>L@ThMX>(D|qOb%@(b_89*zvzoFW&c(_Fto=%wF2vY}K)qPd> z5{x#MD2lgjfb+g@D_$i&L^%unH}$4 z0n(&$ecJCA0zRXbyOM1U@bpu^x8+zp*r&>$n}7NPc%{!X(rRfbs2On6r8A%w)@c+L z#qX{M(=Ke-F>hfB=!>sw^t|SoSgHmo>KLr?@X=J)mDMuRRB~6a@K$kJE90VPr6Rk` z&);&bgR8cgbCBjb8!aCeoHRnm`+GsZJPf21jSPIAz?(=zW?L^9+{byx@;89<8 zn2L#s%2HHkHa*K9>5+>@#w(Vlh8law9r=;PpxpTOHQ0sT>Rva|E_3baPE6#?{P=jq|= z=d;$%&(lZ1OIl#M9bsKd`Vm&Nq^F~!uY;ejfa+RbXAcKoUv-;60WTQ=J8Ns(wa#vS z&K}m*0-m-m4t9P5Ua|rPekNXW0@LliOnxC{G+YR3c zIVJWnthdf&_hW8#~{Ef>1$=8fynWd>eT=0{`{w({XQ6(>p% zEy8VFF>xx9pBT-k*8G$HuW+FE98!xUJa$q~4*aX*ic~yo_e7$c>tuQC@s?XVTZ(yu zKk+bvkN?T>uGq1^ThYCgl={E@wqjF6+=zrEvOyXAZ5=y^=^N(`GW)AM}q zqlx5+$-{eJ>3K{t4;wwB=lR}8{N#z4DZQ`sJZ6n|r2nMn)qmd+TfhBvUfO;Bf&5?p zmHti2kI;9&;xs=X`~;L=hw2}c51ZZJrq3i=F@|zEFb{k8H+Fk)=g+13`SxMN-NT6F zZ}(3UhVhpPwtbpW16C*bCo7nB!4+01#Y;EmgUaof`JZh02(?#4YDJhQLiahvS`UW( zgsE>#RD-}>IKEz1cF3nw9lX_WEj<>Auxq zTe-LGpn3JcYLb}zFR^qecxim|5Saofqat!6{Co^Js4Zb!|Dh5VI%^I2wxbRj?(@x^ z99IHd7mK-l-CPbV*6@{wDZK)h>;~4`&CCJ3?-Fm^^vMA>Z?8V?e4Gzb4xL(Gd9@9` zo*2SAcuE7<`ouduKDHj-UdhODvB`mLzB4|2tFMQ`mpiAO`_u|o6c@iR=P3fpm(o<` zWmiK5$*;CI9ExDfhr@e=Q{thT$R`(%qg^m|#t6-E8J(bB>K%AKq7EK7I`u`r+-6{M zE?Ib3OdO~iza%d%D-|5Oco(W{`3enkopgrRCBy3ZEoq6r8la`u#jYD6)sR2j#?rYW z3JU(}7r1j$88Ck59QAZS3fTTZ-)>*wFKF6mX*FOU7V}8B|F4-zMR42d#wU?&Mev3C z!hvqiHDJ)*(vF0PG@vud@_FT@LU7UWQSgbRF4$PR+(EP+$;Fw_l2q3itfQ; zVo%)^(d*0wr$2n%niW|Jz0V}i{;bso-_4!lxBpc%thng8eD%F1aO(bY5Ao-v@WL{Xz;}(QhNe%TlZma1fsVPeo2#XX)EXlPcU>PJ zO+OjOKqYe@KWR%D4HZ3U&v|}Uy6U!m8gAakn%?g6{*G45*1G%I2dXXechPrK^j7o? zG}6&m(DN`^=dGmVp9Oi$iz`)nY*o!jj$r8=I{ zOSISMYI&&WnYh}j>#ft+Jz`GniTzhVC*RKrYgTzdal0-VfOJSr*3z=|$%I?-KtQB}&ZFRnJqzX(8J>ER2umQL}*6UE?&jIQK z(?yea*8*J&y<&%N%^+CvRX=rrV(1(9Khh0qhazL&ZTD8G1IqmK8=XsIz}kjziPO2w zaOHacy1ApSfJfKml_eh)fUAyr23-eU!v{&0Pp1#gfWBKgVr`>4pxhi8&xp^7;N!wK zUq%clfp}*Z=e{ zZq`n5dR|6(G0`rRKcGh^`E&4J&jHH6yjqbU)t>CNz4bn4edxu!0lwx5mrHUTkEa{4 zjEA|7X$i_fIrXNy;R}|~G9c4w%x5H~3HYIvNjB59WzZRS8y8O>Yy6Q7{Io27S{6U; zj8-MFPRj|{pan#OvFl{HgkRLd)5Dr5+h!g9909(5D)_JEqSZ)ls65kYZqjIR5-I`h z$)G)SQ?w_G_T=$Bv^WaCu0ZbNITZ1ITTf3WP>DLgugt>_&^iemftEWF2lxfF>dD5> zOB#_UiRw#@8+|_yf%{1*K6#+$EUc-8Y3Tev?gysICEAC1OZHP7NQSWd+B^h{crYnu=zS_$+<>c4)&pZ` zALj+`!)J>zIzStKuKT(lwmdU0WB39q9E%s(gKXp^#uxwKjyimq7JeO1@eS{;L>2ja zV$eftxGxyT;GRG{59Q$d-I{_PsGec|*eA&LG#ep{ErV64(Fy{yh$` z)&Y_WO4blU#d;q8pSRnP5TlKOq@1}1T3bU;-CD}pKt*2zKaa}@B{%pn)t}hvkT;Ec9vwQinJ@DQ2W{*CBNd7%ujypW+!@tMNbB9O$ z`1g1P?(nEjv&Wn6^h;T5YU?q3BHnH|?DiTCbPYzS-9?c1ke9h{O{yN zhK2uo<;t>f-Sy?~%T>ltqxuLU-AdJ4j)ni7?j-UvVXU9u(=X4u&p$@L0t??a{S@E$ zP#f@L1}27Q2!8>KHMP3=uk!=;x*y6u7h=vU z*mG_IjN3`w{VCM*W9;X_D0_*-v+RF1zWcm{T`q+q{7$6ZFJ)~{yxIaU7dK%jyB7Bt z;mtJ|Gr|*eo_H?_3rCWQ%`VY;6AZsE>5_`gDH| z!_m#t#t$)}kNQYctjFtvFA<~YApJT_?-#Q&R82^?8R3=dK5ytzADQm;!TllBeUaZx z7GDB4n0)H87k^~B*GH@yj;s&qc4O8E?X$Vl2VZ2n*Mq;?{fPFJk?W%#vK;C`It-ch zK>OY6LHVmVJ>RN;YJ*{%{PtEB%xp;J|7`z@9O%6sU5I3Ym2ppQP_ARDpK0~H{I`#& zc)@5x0;)uo*L(iRIbN`uc>}t50lsvWsyDNr6Y&DJitiaTU}65AL1L-)r~Lfg>))e}vgmC@B!;r3C=5;$?#id)0%4Zu0B!lCf4^H? zAb2@(X(O_$$TvK&aS?XJ#eDL{z2gG0X2kVBzkiPdz2gGwNI2!s`{&~Vl>Oe*zuY}8 z@b~@uH1Pq}C_xa?-A~JoOZ1En{C)pEm+^r<{rg!IBwtxR0^V75a{QsPvp6&1d$LMGK`TM4y z;v08`wzK|Se)_aN{Pa`9d$uI5jPZ@7kcq=TzkfLJhXa2&@P`9`IPix9|3w^N z+lznv?Rc{?&}*CUc>_7$<-*@?#gv9#{_P(AcHD7ASy`lin{q=j9m|v(in(l+azl}h zceXv=qh5df?#x!Ad)Fx8ckk<+jYiJQX2aS@{{kofe1toZ|Brv3;KTpgZ(siTwlzID z@$d4_KXoL^$t26`J#XafpVwmEfX+WZ2Ic6Xr|`Fb;!EJsr!`2|e7E>y1bM%{{qQ^f zdO!4x0I^PAWg5P4968_7FY>yhUv=g$8E|8&LNegS^v+WOQ>J&G3Yaq2@#6>hI&$Vh zell*zT*ptw4bd`wv{)al-zVb-l%Jw+|9f};6!U#8-5l_}^)dPV11`Q}K$Y9~dVR|O ziZiyG|D9Qd|2_Y^7*iIlNP+yT|GkcReJTn%t08_-?`gt$hx=&oH3B_+*2q1BpTm9R zxGNgJE$Nb`U2~7G;m0u((I1W>-6PoV{82s1L%uxxfm9qzwJYT#@9u-@fkFPeMI?rk zU%kx(Wkj15Nklilx{(iYDH&Z-BX-$-_23}l%&9*7>RQZI4HW86$}I+Q+!A+hcI?*90m{`9}^59iXK-lspDOMiNw{%|h+>8L-k^>ugp+5Ygp z{kfFB7DV@;){V3M;QyFEo9z$(o&F%&m)Xp$pYH9)t_RyM-d&gc9{)SJVf(}Trk}!b z=g+3(y|-QR@A7B2^x@C$xI}4Lf`zye#5a~ABo6=l{^7tM4*cQ39}fKCz#k6$7jb}X z?{$A~0XOT;DSM}fpOuUC-@o<$N_AgX{2Tu-<<4OGe<^nc)9FFEGwA)lT==Cae@~Bf z={@|?zgc(vd$$ZCfX&w){Q2XD=GqVa`|p;=lIAMWTAbI3$7~}yEB}8{pZ!A$_Z;ZU z4;{R@=RIuEw{P){bf5jx-@c;mZbR=LttoPbQM8 z*J|=U|LTNV*RD){KY2CDA8zNT|M+`G1erV^O%g<{gXPDa;b`+B`#Yz2)9&3we(FBN z(VyS{6%O?7=gK7EYe@Z1@uB;7squ5J!h=RNV~aBwIaN7%vK$N#32{P}mi--`S1HlUCh>-mgC{9!44BL`$w9Em}_kqM6u zu(#gN2lRe#B%8lI>E$fvRDSN>W6Irk8~T%9x5KcQ5H=0J+ashNy>BY?cURfx2q{P3 z9Y4!H_q#oQ^5nW*zq`slN9o_hz3g-UNqcO4^`HKJPWpcJkA9byT0cbf4^|wyUq5D# z;ru<$iOl9?kR5}HA5-xXs=u)NUH(3z{gv@$2&}Dlzhk!xcXU&B=tQy~gpqgdeIHK! z)Y|Q$;k>sEplwgUT~*tB*w#<>{cF!QP_#Yc+UG+h@WqOxVaFZXK#a_aZSEt!!ew0x zEVk)IgAv7BR_CWxLGv7K-ltg|@Y2logZ^K(!e_%}3*-~qp-|V{;M}5Iz_X=3TIX#F z3~!QE7%FrZ%yZuDBxh0yZ+mQVu2(65ftU7)SA}K(&FSe616)(UM|shqk&?0Ct55aZeCv_H>|W#_UYH37$WI&g)YNyAx92lg$oUk4g&rl1loL*P4Bd8FRmll9^=h|UrmgJY&mWkl5VtcVg zNg{mR3WDBSSAc@>#i3>!ia>5hyhOOjdlFn(Hg11F3QM)sr{3z+W5MZ=N0f9oQF%BxpaW1Dit^>5Ax-z_oSh`}}iqf#J?3 z**91#d^jX%hv|b_IH=^!7v-PLz%j_!@1SWr$mUJDZB+9WR0kfNqw*^SI7qlC-`W!k zIs(^rm941)l2^(_^e&e}=@q92j#^#|w)~37s+R8p*K1}4je1cHhu$rWD!W++vu(F| zoa6Zh&sDt1A26^2P99&n?|VxwoG3W;OYNg7$Y_+`W%aV3^14g&g&nW;Q@$Vbi&2o* z0bkYLif}>iy7zB8`2OZ4!I&Y@%P;1iONXBVTrARdJc8vrhhC5?ZinJImxn0$R6()H zyyA6!*AE8>H^EQ3YM=Qg zRKpjGjY89&G{CzBvx{aby@rjm%6-Ehr9jc(56Y#VKZ7fiv#Z+D%m>=OHC?D$soac^O21ci@~Eo&6n)06JTglm1xM_HaMvM=*@%X2Z7{rbzSL# zuV8_@UeWz|EpSK8Q-{2?0vJ5pW^=)g0?4=i#X(2(oe;s(sUf@Nf51b7Ewl8p>)`t6PK;pKJ-~DfLroDIhYl}BXgA(oGxhtuiMWCobV`vhQ8kt6#Tw}mr{yneqJpF zDvZhMqw|X4p^d@gwbiO&amCIZ!@noM8Ot*zN)zirqiWqo-w6dEqHgEpct#z3>0MMS zZS(^nA>e#Rs`+8c^R+1oduRB4cwnC(*h+2mwgpo zej8*rMBaVia2cA{kJZ_u*9_)1deztEmBOLIN%hj(KEdR*iWh4oGof~>FW>ob(ZHbQ z(F60J4CTUdL!Hb1d9YRHd<=g;G8|A*ENVWW98TyD1%}9d2HTH`WB1=Tz|;w)7o_4! z;Y2Id^ViS42MJG1<)sZO!F}sX;=GZ$(C?ex26f>&C_76^xW1+Wu8#g5^*X8!td1J~ zJosG$7zxvWzg{66rlMG$|EwH@9Ltd#i~3pO(X&QJPGtiRhec|$+c0GV>DAg_f(oGA z%YrXE=3+7ZX6br~eD8$ok9DaBoT~$E=B-PIZ%u`Z&2y^#HF%X@-Fx>^leZqagdQ?` zG5aSR^JVaciGERV;+L;RAUhd+E$@G5#@jOJyLqh0I-L%%>D#3mO|fRsAO{mcT@>1zh^K}?Pnw0`!uX@zI+o{p~!nleM=}fbvb>-8SQo;|3T=yh*bkz zUvbj)MO_n|{oM7sz|>M0`XKtQ)SE)k*mm#C*}Ph4Fis$O?lbyhThVG1EIQo{bg+y2=w@Z_T%on{?S@RVMQ!-GRGWRq@F=h-3v_pR<|5Uqz- zqZ&L{94my)Q7R=Tw&uX7ev?&iNR^=Waa!xwm$Il0T;4nV#-oHX9hXN*1^jcU%gb@kO8uw z7EeG>4a|1j;UksZ3U4{>u>aDY4~Ov?Shp@H0@GAXSCnka25Ze9S@C`-1YV&rzYZ_^ z3?A&8TrNB&8uE_1qBcvv5!CHoKUpfS5a#;l2Grlcl#6YghE}@N0o55>7XR8(4SjvQ zOz-Wf2M1-R$`|LQ!`y*qW>37B2*S(;Xv{j&0{m+}jE*h44Q7?YeU-kYVE%wB7YxNP z<%-3#%QwbWfJq_+b*(q+Kw++GwM%Rf6xhE|TwCTje5#L?#cof6y!DCujh7Tc>9r># zcOR*M^FJ;6;lsERFFtg=n;O3DjaP!J*7GeTraL6j&*(VM# zV)jZm2#@ot0FgSfu_G*!fV%f~Q!x2EXi#`^I{91;G?MRk`l0rBSiLw-Ml7xg?mYbI zV%zaHC^L88vu`3{;FS0IEC?1jqq;u@o=npa)_GhH!P3TxORx<-x-k0OkS&Gd2@ zFl?_?-jo8E_&hgUG&c`OcdU@|h)9AH-uIu^bw3Ua96E2PeNi)1w~ARNR@??3Ze3lQ ztH4mMdA&!dwK5%E^_8`an|l#Hm6jU+zOxSOS=#S`fNwp_%u(p#o81P)M6HAm&if2j zm%m&1aprgEnfSn|(5n-e`-*Jy2j78tko>I1&@MRA^SLHw+6Xfr34HjK)&y}xrss;&|^6a@XeDP0EMz1;X|8~Xm!_^wm7 z0&*>&LEZGi54A>Exj6pznANRt&4^0rA1}*5lF-zV^Q|FRZ=-B5aAy{X9xf|>PwqZg zudG`+Y%8XG;oJt%_hALdesztnF8cx2pE%>%b-D%)IrQtvORzaQij5I?l=W9Mb2bb|@_!ccKLx zsgUAdaJmJu*K-YGt}FBNvGMTr^7N$~=zXm#Bd6yG^Y`Y-?EIYGQ-l`Fq3!Q{mj(AD z^fmuy&u4hasWrVHfn85xKEr7q!p{GnJ}>3YpTVYwyZIbDKez2l_=ot(3FlwuvD|&< zo|?C^+jj)nzI9Cbp?)~cS2?#YaX$un%Q{YB_wU>%Fy)RY$jBRSM42qB|RM-eI5LK1ytAiI(s+I&|>|t#!fF3$vwp} z<1EkN_78v>f2pS$(E1s)w5JDJmBEPC(VQTh-vBB!-Y;Ds)&_Qp-<>u0ODp68t%hM$ z9y2zbF|3DD>4x{JPB*}~!9F6dpVf1NR%b9KMDF~x<7zsvXlm1U5h(yO3nf;~Zu~%x zmf}00G+9uE!H9d(Ja5mFHn9K7?A)p@OgY&h{O#Bqzv$^E(5e_FJ-ooV>6PuDO(+5jQKSAWu0^( zoOSfY++f=b;AK(8SEwFMU-sq@@m);1E93SB%pQ{iZjNr*K6XI`P&YUby=hAcec1$B zm4QuOtMkKg;yZBZqo8)PR3)@i7v4~Xy!^E7O`s9o@6EEV6qja!_LY0x;tw;F6PBF3 zy)C$ao0=oK7sL;p`T0!-eE2wM$7<1EzHQz1w>B~--{x}WMjbT5IIm1@uA1jXc z0TtIXp?t7G;;K`H^koxhM7KY`#n4Wt5)kEa7pqyB4gJHjOJv6X;s%ZAHu1Xqd0Rw2 z7_a~=YkZA-S@xD=bw}rMgGO?0^y$>dZ>I{tg5*hW9o!SZ<6{fnFY5Y9kDl;cXYv%B z?)#l_k2Vkd3FXbdxgR;70Ie@9kKQ{hh8r}Z`$drRTnV#xVAMQu?Q>TXVa3wHms2H5 zxk0O8jNBya)Lfl`?Y5g8S--y?3~txmoNdwu z72eq0($s_WXaNs?p{a;&2LGP`DJeC1Qi#3=X zdb$ZT(rY7pOMhK9S`U9Nyd;p|(gt<%*H1p4z*X654D2~?{57#tO+fnWiRfn{17bSb zjh38H=fHEKY(%$E!v52`=Uc$Fn-Ao4UlziJiD`L1ORDMVzJ6Hx4zs<@Grko)I@$RSPS8~nx)xwvmp z0bCPX9|<&D=+T-Fm+4HyI`_uv- z*p$mki+#Rv>eQj<%eEp!`jxXkUAToPm{JwQRxG5XSxq7qI=69=V!9~f{ zNr}6k!{p)mZ@q02>FFlQM(sT&$~;UgAsYr+`dmEowidd9a|S=AbFjF!w%y*&q-zM^pY zniFUw=Zi$%WQ4jj!0JOuy2{6|z%WMA=Z!l#aE(ACx|2lS+(AKcXgO-R`&IFL=xgE- z8Sy-Yp65w>R8nxd8Sy0}ALndr0M&jUzs+nc02zyRJ&3Wn!VMbHEwg}cmrPX(7_78e z=I#3e_&6&0w$JinZqSHs-5-I%(u1ntE?0h;nxXkHCTCzlJBo$T_7bAz$ZmMA?XL->R_H;hRdp z;MphI!`D>8QMU&D7;>M3J}1yfuMNHE+4#Vy76zK#+8J=E4qTsS_dWi3EPacuSUr8k zOkA(Y_?7gZGmlR>`P;ITrM_9PD@bZ)o7F>Z(1`BTj!m*28{R|z5cAi2-133Y^^Z|& zPV>ajN-9w{ve!;5nYWfztRUdT|(T!pA8zRSBf2@U~v8PtvBajuMud} z-cfrJY+qEjz?Gv8Td#Rv2@C>5wFppjlH5PI=etECXQeu}*6fo9T8!o`opW+&03wMUjNnT6V$@lDTt z#r&ULaQ((Y`3YT(;LxXjrH1=BXhi~z=)V1O9egCrP;NauK*u({9{hT!oV_=WD>Rbx zyqXz1%-f5=bHn|i%U{)lRR>R9HnVS}=NeHqYHuY*^frEx6sRc#kI6-rfSjr->8_(3 zG}EaoK_?O246HV5iXddvgX@Ct48?;!!Q{2sJu07@veCRmd;S1;*rEc6 zt*{FXSyKoPjnA4I+t@~5_Ku}WTW2%rK496XwDlu6D&q-+HYdZCd(9_Go{gX{n?R%S zdwJ@^>m>$C5M(Q{bVDhZYRW zE|~FIGQ>S5lN&VB=LP9kO|)M%z{B+`NLyd6jysW21k@>*MkfHcFpF?{~Y_JiVgWGEn z*S?JXvZx7MSUBc}ooOMME-hUx#&|_fH-ScUUv_z=Zd{1Pq<*|!si|56vM;_!Y^=M( z4I1_DueBGVO0zzI#)w=KUbI%PTsz_9^!O9ppiz5&7c~*jSIULL4KbHC-f4vA((2Z{ zY2?sDeFR(FnRM%{db>k;X9?Ic>{$Mc_&1<*QTjg1$N}^tFo8yLUd2;%dWCfo7|b}F zV>jjpEGT%gGD@x$(2m~(8nyS!FCR4w?xw;`SyMJ$(5r%jPQ6|lsD^P9uW7^^f1@Ed)GaPqDb>*Ko>^u*i@R9IX4hCR*1eaXD9$vyzR9_zn(q zJeo5`stTl4jW6TT7|80qxQ~EpSJJIGvp}=-m+!U)k9$akR|okAAF%l zcNOKdFzKGIcC96HOfksnT)y$_h;-03h+oOPlR?V$u3_3xjJ z^FHmd?f{bDqP>gP2k6{ayO+MLhvGqg6IsF;*o`W-de8&;p4D6}e0dhCTji@xzxTeR2gL}o?ppjk^U9fQeNT(Q3zI=>y4ORyY9$AYBw_l?{ zqY4-JwN*&q_PKw)c%tj(dbs$G#K(6(zJlrR`vpz7-^>jf-NyFF5aEv&`S5fEcGz}P zCTyKM;nCD{bef4(Hlq9P`Q~fU!#{wahW4(Ln$5tNarn4^$3AY#M(rIq*RB>DQVcGX ztY?IKyaHP#Mfq**=y;CGX2nB?he*^_l*WO<1zo}8_tk^*5l>3}vl%pFC<~4Bn!XVb zk2{tEJBosz-P+O$EW&gKKW(7%fV0p@&JT*2<~w$lfHL*m(lfH<@MFOTk*lV3^D!10 z(QOzOaJIoR6O8-VdDWsd4`f?Uo@7{0HxeN<84tx6VvWYxP4nyFtwp@q(hZeBRN0_Y zK!^j~tazyXy;*bb`!<0OD}?lvPS(McV(*4lQ@W@Ft8CQXDj`p!r@2*uoR7_ei{l!> zz8TZK=Ga8hw>MEXlJgS*5h^JU-hqB_^ZdattD(w}lF{;RbUsZ~HWd$LoOiU$+L&Aj z!^8(*>GHwN5*dG=L`W{PIXw=?Ljj}>ZAJqXp@qS(lN8bZ7I&;em zLpkuAjE6E9eE!0G>p#_jyKOfD8bSn=g-S(!Ovs_r$f#@rjr6&l#<6`$-Zh~AbNL{( zxE82AVgBC3Tj}gI78;FX=Z3dVG%#YsJOlAQ{^3`khgJ3=Uq?Ef%0i>|j(TOeDKzsn zsIOhf(>S0E1|*z0o@d9QY$_g#mF?LrqIx6>I%imJ5kAlfM~LR`b`7VSL8G!+@lcm= ztF2#PE0}-VM1s$_9*&i7&A+{kF51FEqxMeSZ;|AxTLm3Pj1tSamj_}igRB|f=zLr( zG@@H(#^KbcsuD0sP+`^$DptOO){0HL3re4;O#6%XCGDC}nbm`JE}BPGwtt_&VueJ*3k5;{K_zhWhwy{^wb3c5cVLFg$`=V~bfZ5i zn-veO$=@y))%5~Of8SP!4Q>WQkK37+2ei<)7=cE9rn8|V-+Zqx0hU9c-Kp6Pp#93? zs}oF{=xa{KLm7-u2U0fa&TIg81I$+@C>4T+*>!h;c@>R>vfx;bpLNVrzz2`Z$Vrm(9C(s%1LwT zn&HDXnVMs3D#7gS)1^hj=w>x6G#bC9_iAm4(D(t&G?&1W-*TYD&tFF>rqZp3VxbY; zHK+T%@PnhESeJx#d`unW4SMYSup@(>ZZaN# zbAv{e*gI_F(Nz#Vo#(93G+d|=o)NgPZ0sUBPZ0tUWg|HcJKR5A$Ug^`y-nC+7r+}c z@YJ`eexh-ZwttiHPzL52{~^7#G!@)dFj}-|QZbxvl^t_L{4;&c$#^KnC_QQS#dAeI ztO=R1;E`@7Fqpsl++7zAy@VAH9pg87-JD2-KI_efXHjP1#7V1*=}T zy1uvwgzO3*`n0PCxJQgjv>hMM4I0fu!$;Jfce+~xC)^!=#ARdy7%um|3PVyu%QY$< ziVZP+$*3Ch5KK*ceS^oh4u(6Vy{J0Op_dS7G=7I%%{scxuNpWWiX6LO@qm~ydLIwX z*iEg_V#A!_!8h#?s@Ivi+E7KM?Pt^ULif*R6G=GtM_?0sHy=>I5}_4 zI=v3KJTE7B$P*`eG%FtZ*dv2)Wa(SzH;n&s(7|_bg}XxBTFo!?TqEP57{h33ou9qJ zYj{vsaJ(xYrd-$fC5lIv!`y%s53P7KP-t{{0jQbscxK4>N~mevIBJp$hgl7QM&q~j zo%p@Gz88ZlMpNaMq9=d!$dlJQUmV?<(1+S!M-KqqX$fP<6DL1g|tUtJ>( zGYbNZ^x9r)HDAW4Pw=bQ=e2>uGC||hhcxRbn!7HEo3~Df!&f9|ICQA1&@cF`JRlPBp3NGd%dR&o%bD~$#^KnI8$8HIjK4w zrk(NAJ-@60@E^RlZ`wsVp9c$#B&9*LgD(uOXy{diicwU zYo6~^{8l{`=xQP z*Qj_XgO@S!s8?YVYy*pL?XBqqhjoi9Z>Foz%Q=BYbf?K*cd(t<4!jF)Khv961%3Co zb!?31FfU=nLnoiuexoAiD;&Ii_w`$^OJLQq&Tu)uTyDxndTrCyM)^ms-{3;GS<_Va zdY>B4)dlXt&CvI|LB{tW4z>eo73e2?+SKrlgN04T zLoo(#G{5nu-K8)#N#l-E zCvW61hLZ763=^9$&dTpg32Znf2WlJ9`w9{w9p<}nu&~K^D8?9ct?b4Q$4dCr?aiqV z7m`3%x#^UETsr?b;u?WQ4FVt*PhG^#m) zMms}iYJHfKnE>y{hOC&KR|mHl4E7qYNcXG*3ytJlpLg<{XXBp0*Xx$=-*WaUFs%I9 zx$6{%`8O*b`uOw>rP{Md8$NKqsdzIJtSDZg7-3sRFXs|OJQRKFNYd+Ja_t*9C&#<^ zd~poi>}Xusu!w_|OQ6ww%=X+(N0G@bz-ejc_LA>tL`uJX=<#w6BQSwR^O8lGX}=a( z6oTWsr}C+fX@cRW^44?~a*!4Rjr6%$xc$M!C>9%L$?tBL*$fo>PaW}a4~O|T6%WPC zqHc-HPG|&5l}ATd6ug3?bj{{We1_4tHyIDbFbH%c6Zu==`O%dlijW^U_xZV$mvtPp z0~rrxU@3yG>lxQ8(VTzs8OfQI@Zf=%pk!gXr!-v>2(Vk4`pEMmu`JdnI{PFGj0e8+QjN&ZJwXiCT<|hoUtHHyVv5JV^yz*PKqfwif`C zfW|ZaOE}C0S@FRyBcTI>#0qm#qhqWvA_BTR3<`S@BS*^n?X7zLtP>!_M!O z-B|?`CrN_lDb@5`Bhbh`zrJNx{7K0hP-l}wpvU-1AZePnS9s2M`t~N!sJ-iE4%wP$ zk_alFCnxLADuI*Eg<0rZaD_&4zM>?|PItj)Fud^m9p%;suxS5~XwOxV^fhP2LqE3i zR}Bv>gwLj|21Vne;O6BCbyh+t^ff2rp%@mv=hbe{rL8a_Z1?1@v<|51BN{r{or6v# z(1`B6yiVJWjcfwNiv$9&qu*ip5_75CIJ&qXk`@Au=xz^Hd|n+_4jlG4ZP`1l1X``w zS5{`iA;Lw)LmB6C7G2=oSP3lVD>CM*wSaA>;m%E*v?41WD)spDvSlxGz~pgyYfUBy z!~{kh`xr0rou21pJQTw^RXZd#zrKgtee?zo8}ec4nz6pM^W z?-zPK9wY|cSDWEp3gh-TYJU!+vs+NvgS2m`8*Js6%QTJ5+i)4?lHX4b^Q5OvvkneKF#XQn4k1qqvD~AFw9d#RWw=Us?5fiG3zy1#FUgWCpbdt{VkI<}m=(q>D9`WLbX z%&me=Efdui%4NYP`hm@j5h?UMC(wv)vw^Qp9n>uYS=U_-Wh$kDNX^+(-X+mJU5bjN z;-QSiI&v?6J~|7H9L5GJrtWpw^C#^u$mePDTNpUVa zH{^qvN&0J6Jg2jI5SomKqGwx_B5Gx{vq3}jx={4;OE_oP)JaDVa*$L4jpk!l8%vM8 z&QAid4+o`&<`saS7w24;4|z||H3E(Lcg&+lsmEO(0{pmq2y#oHt4MnP81 z(R?gLp!Hng=u+4)|4R9qhDH#wRW!_?Kiz6agr?%5C`Ogwx${;g4D%JcI%0qyG(LAI zMEL{NKyI zT=;ynv||+Q>KInd(fB=Wn7c^ysdjKA$zJF7t@DgB<&OA8ebWyt79XRdQ?g1aTQtyJ#J16PF%mlog6 zhO}cS84qP(Q*t^UTGU(0ux^Wx#dxq-F|Rp?@|sKp30ve&kH z6#kIOsK2JhYhqhe_P87I<^U{4VdbR%o~=>GWP34w6d7 zLovp+8NBkPt7G6(TVun=Vuj$=%ZHbeuG9GyQO(JCC!E`e!R?Sg+YYl7}8}Oh5-d!@`plEOk9CbFSAtRYXi?QON7xFKZ9QyDE z4B0A^aCGN87`OSwCW(*N=p~gE4=p#Db}>n=9qjKao+`I05B6_tcV96jlN&UW^E#>5 zvUA)Uq0K10HD*&1fqua1O}<8SaUw)F6%S<$2@y0%I`IJrS!)hPb~+d{-~^A$nJ4r- zXT?K5?X}y!KCu|SXc^ii)E{|>ekCb+iMDWqMs|zDzF6%8AF7~n=!wT&1ubBXRkYv1 z5p*+2RC885bZ?+u>CAKK0Dd>xo357uO^2@vI`ZQtH)W&ods9MQf?r}IxT^WWTWnJy zcrP_idEKHmdNeB@nlrLC=ego95G~oif#S@F(&8Cc7y8RfPkBfyya<0*( zzPRH{BaA#3kgM7l3kOW9nVYuo5Ixlf*U;>TgT=1axw!=%CfcKq_;mLtNz;+RnCjSK-xJJf9 zF|6P4F|$0zHp3|s`R!s9^I^i%{kh|oR?*j-KqI;X)>NL*UiA(>6K=eERQVztZ7^)@ zg&QyEYtD*?3Tf{@ZolU}m@P9fd*VSAo#zPrc;pe#GBb8;qy7Inw z=dd5}dqH}*RSXB)fsBWuH}1%XA5QRm38rcvD+yc17h_h=`)cBLy7dmIY$_hgfblCI z#ecsBi{1oZVECqhSL*WemUHUqxkkl9u@UVpdFf*5Fy`3!le09k;F+V2mN5(He44DX zk$t{skn5V#V-@h0N0|(ErxNgeTX(JXDTnc!iia}ZHJvUiUt0{#@~anb_?!%mCSb2a ze{mSUS@F=H!@o4kpf_qAF+!1tT^~WD;kom=j z;42N0JdITa^wL7cLoqDYT(M9ssR6bPGyb@-b{AZ5obRBm);D^dlkrdncH~p$Sd$S= zV9X3b$N44Ya9e43)~yK~xJJc8vD_hrJpL*HT?6T?UX2nBaE8kdG5tRf5ON%xo{w#n;=SS~+rCLId zrsAR4z4=mFk2kf!AcMU7x`)2Q83Jd23StlFxkkl98AJI3olY)BtJhP#c=Ri>LE-gf z{Y~c4tuaCEO`wt8GV48m?w-N*&|fWk)25jZfX3~sLj)&quzJXND1+fKbj%YJyjT8a zZuErzav@NC7Fkh%z6e086^W>#3Wh1%oe=kYcQ%Z+&kR%lUJJ+QB?eh}aOj}~8rdxe z#BYWcZE1m{wKVLW%zq0E=WoCFf9>6AT#Z}UKk&UvnUk5!awtPIDZ{avG^bHWNku9t zB~6ksLuMlLkSVi_8M4Z(NOMZ_sChRh(zBfHoLA?z&#UM2|NNidE3d9~t?PTOweI`2 zlEJ~<`x^FF2jj-5TL*Jj??0Pus=U4nW*=}$CQ#5;yg=(8Gex4acm)8a-9hbO8 z_h%*ezV{LQzTf}chnCb%EkE8rhe-8yjFG)`pMJlp@Uc_-cXyJkWB=TTesKRD6}|O4 zjotdHM#Vpy8mZi@67v!9=JD%3l*hX~v{7a%d-{Lc?z}!@Z5ovu+G~U9```VH$v?T- z_xFQ_j!&CU@HH+gq;0(3+(1{<)J&Vv6!-gn|8pOzxYH%5uLDnG%Pc=>@xu)?cTPYA zpZy&AZ&%B&`%oV5aAJU&Oa3XkPBv`+*;D~Ft-Q5&iQa$8&HA0C7k|;esgABU?o#cp z*hGSQw%ZyV7xC8f>pqmnugg4jCLy|zhDLl;Jj;InT<_6Syxv|f{Qb=Tz7OR;?mvHV z?Y<)Fn#z6@J0XLV-H!5}RQ|i4Ut{LK?n8Mz_wdX$mX{mpyjdx0?h^rBkrY=hyY>2i z$<69ss36b#F}{{gmv2>>EF&N`#og|HnBGEuYyRs#l*c>MwR*$Dr~)b(t2B7v#dsQ@ z6zYD=Uc@^8`#zLs6{4r!*zFUk)HZ8+eDo6aTdkq5Rd(a|GynTOl>N(f@8#4}Kvi!_ z9+ypMqN_)*i;E2v@m2HdK9u!)NQTmCN%ns#`E)t=Mo>!{txw*3(YyM;ENOB}3e^#8SlR@asO5&u_bzZJTc$PizOKO0K zhxSxZ_tB5fq}^U7>h+sgOm~=F36tt8Vu${^4`shetSC<<{7xm+PMi99mu?f?v!J(( zjMDG^T@71nf8U4lznzU48K>Dq%8LJp?ntO1ZQEbgE$S`e&Ewa7D37OLcafiUF z{F3_oJ8cg?&`Rlo+jDyq5w$%xCau_&^k1UcxfZ=n&f)2>5)xY;U@$xV4Rt1OpU5>Q z{(iOmb06wHL~3rWTnzcVLp-J7csZ4hR+$&0nDYDFzwSeMyd6C}GsCh>S8nZF}U;fL?*>}koC&?oQZY?BG zL4VhM_LB;N#@8#xKIc;VfA2T=Vnz^v00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZaf&Y5~g+6T+vb{C= z{G%hC@`q(5k$rY2Hg6mwAm;5UL4LW7g#XJ#?AM>po}P{l-aMW#P>au3+i_&<6aRCh z-eZr1*`|7uGeXL_>3$jgeV{gSWsp3lPz z6{k0+&~tNoJX~oVOXrVmmDKDS$eD4P;$9l}6X;z&g)3Jh(Ikq?~e_iqrzY0k_+bc zC9}Tg(8w+iY-~=R=TsGDWPOy*?lmiOODk>D{V>FRS~VH{aJr^T-$D_AI()vivC+`$ zE|p~Jpoxc;ITsS=>;qk#@(MVoI^wb6xIccb)wX%5Wa{&T78qdwM;iGE4lfY8Q6WZ$?dq5r(Cl8GCqilOE63D!)+7aL|+W z{MgosVa5+{@HAFvY)G4JRU;yh_0h3&Xjn?#dve25>B{O;Su|^TcmjQ$AR>^h)kdEu z8#8)+p&|1&d>pm)8*To)DJ$BwNJJppk2~Ax(^#=)`t0naRCA3S!V7C}nxs-nxVF`z z&XY@i*2tUc)24WQ)GK;-{>Otim0hWmQsLmL&!PfZAI-I1N;fTQqz&iBsg86lChoK6 ze*JN&oO7zejI72~KPs6Lxtl7u%rX|F=hDl|+k$%*)rpuYTdRDlhr3?8wv!7wc9&nv z7ZKN-_ZDBaa&N21{famKsxd)5`h$MbO4?t4*L6&!&zIk(~D{1zP%0ZHWHkNl`xR4Rpl`?XLL`ekOlLeDleMzm1qsQ;uc%{ofAhG z$ojY^V)O%3-F&)yzFhyD@2TX-;A9!?@Jvpe)-kWYu^RdONTT~YhSYX#IB9Qg5*EtQ7^sIV1U+>^ixu(y=D{RSujpw;fIblZD$12|RciEbiw6E>V z@11>8sAkAKsiGBWoHYsqb@;p~CdcI5FNtZio4twN7#>ZDgG5p{d+v7V-nB8Dz-r`G zYHf2moR>&uK5XzfdZCsI?DG92!;&~N3IkamJqPz18(WZ1YGWkl41bhO9j`a<^h)K% z5eBkzHRPa}MdpZVI=|jnb>`&;^6*7VrPHN+PE}zbJ9FAQ4;o(F-9(4GuX7shdx9>w zcQW_3lYn!o$}5IUm-~6<@Q29v{d09KH9bDUGvjS3k<5HlyWu?dDJKkM&qu$N^)p7_ zDXc99qk**7QmJg3n|W2{W@Z^@#?5Wlx3U`fd^zodh z`SYIJ?mgQ(iQT$|Hhcbv;>;)vWczW>*YADyc5kMA{}@#0E6dXutb9mYDy3dTAbVfk z=O(Ec$a_U>#&2s(l=whr1xf5!Ggu&Es%))3daat^;vY_z551EvA*iOa#!Swcs#DAv zIC@yd82O)T)lBw@l=Z$6dSsqnl!j9&>Hnx;lX3rY5rJAfo(j**LDj#LZho^j=~zky z705~_ZVcvrA`3IJK8m{xtm*vrhI%bKljpoLg(fTV!uGAo5|NSBxFMaixtHhBOPd<^ z>)K_};6A>e-Oi?Q26|q1Tg__Z^Th(UNp5-EM#!HkP{m*AgH9Y#W`zAEY6co)380Dk39$c35s$BVj97M?y=SY~v235p{`25gD7rG`Jq+ z!i?%C?pKc#}!h zG$Fpv9OeG?oKqDBvi%rztXRqXRxGg#zol!KUP5~qZd%#>k~rt{Q5eW-EOS07e&I?z zIe4vIEnKyPtV$5Cdv~2LB9OfUoZ6^9_4tBjk`SNLxWvwv6u8FFZpY&|r+Tga=~Y%E zkGG=v?xTWY@z^1AR@^_eyn&v#iGM8dxQ=qE3Io}GOxNuBZsN>B8g7=?_m=aAs`ZZ2tAH8U=@TTfUBZOFtJ6a@v!eOcy$jP_KW*{k|#;WNWo=U$w{2 z`_+?8)^!VlFK3YPp~;iZUak_6k)1ho8t-%L$ZF!f@K&8pk2-pxRqczTv8buC{ivE{ z)_dLgR60OB?9iV5C3LsgT>I~rpK)e17*M7+>1Q9~x1Q7QWmG{=Y+JN8Ix>N#O!5i# zmN`kd`X~%!XO8jQh`FC+TWPpnp0e!RFc;Z^e z??l7n@Hwpm+%*a_vS&x11rlwB&l^e7YV{jpFG}c;e$wJvK~0=fJ$f9aw^~FXtI;T4 z(O$4TkB$p0eNvxONxltf^&~&ui3ntUe5V|oY~j~L>rX}R`<_%qH@)U5hsdW=u6Beh zQr|rJ=d{5iiN3F8uqS%(%aFgn?|Wda2%;tbeGHJTn`;MyVr* z$cI^X>vy1vbE?8XwjX20yna`=w4AQ#J$do6DXG+T)~)v;L*hjQvi&GHXk%l4tb~l{ zR`G4{cL9;PpxAfpe2xMz@dv1r;-GdDzf2q{-i-kMrg0Z_KE=fP;43rIW7&YbR^HEA8QOZWShP>52 zpO&naNj#I<^Ur&8XA}mq8pmr7%(-HCk?iyQR2OIXie7neCT>z4_h+CmkoEDSUyRhY zfL+vV;K0#Q$Fk^y<#LPa+PN2iFp#ZP@2fE<7az^18)n9w*r=63-CF&(_bSSuTx-?4 zcgrePBaiPL_hs^kh#I=n&Chsw=tp9j!Jm`q%KeQj3}nv^DXV7nS0?2&;a#@b^RNce zf98Ztm6nSlGP36I;G)T)iEraX1hN_xw&?cRKDC-I?|1Hw z)$=cOT-nvRIUBjZJK|1UIm;r3Io}hqj&U6ukFE^MD;9{`!h3{q*S3xUM363=EmB?gER|C+u(}z0k^piD`6n(W1dxB>afLCbgW_bci&`h(4kYB z&$=IY$%!KjWNX#+m~7?BUEip*_Hh}L2m#p^CoMLkphQF<>tofBC)4}SswH_f-#6}U z%%j6gpFHR@G?z2bc&*pmsXzDQXW1vd?zSEDnR9brmFj%5-${D(9)5Suw?|!bd* zhSq`K+llo+_w&6huLELZ1 z!a&x?%eAp)#+TQUH68vu$q2(l(mv z@AbIn_%Bo{!_r`vCil@H%*f81zRT^EV}jx+Q9M>=vG)$;C6A20cRiCcV`cd9qMv=_ zS?#cYGe<9j5y)z+$nV#1Fu#f%?RBH9&$D`BoO0{W=Ml-2 z>m(Ou{Q2w%ReMqKDV@#;te9yYf0ZWJNM@%!$`g^1eOqa(6lXYZ7SP1uW+UzQRg$h< ziSmJH?oZwSdpC8c7y=N000bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00RFj0xoKcmRV^n_gU@F&M)@_|P=sW_zt( zqxLTKe;_lx|9%0-5Nn*wHI*C`6<4{kSrjS6Oi5YYO#809`E8_i2lch7H0b&`!7^qb12MPDoA4nfnf|+ljf#%3 zm!*mMGDkLxsB6`_i{I?06S{96#vhYI7Jk2EqT6zVMp~!;IQ*c6V-}7x=KGqs`7JQ9 zVneK#eD}U$S|dGJXI2q$r-q!|E#Y^4{};-CkR@&HU(7Ltg;9X+>KQIdrdq5c51Oat ztqgia)^?k;Yn(w5@$t2CIBF42SKbg$aeLmtQOLAqPMXV1&AjziGmB+qo(HZktfKRe z$Y0rYE|vPlcr8{Nl0q9gqF#R6|B7Q4CSLBA>bh#~?iOqoS7skt7k9da>{;dFqBb#_ zm`~W=`><>Q4VbgC>U4WK#}H-#TGLktdIg#ovmti6kNK!oQBFq9TIq4XtCmcDZnU!f zP!m0$D!=J;)mM%}bOZh8t1A1f)b(UTbc=bS|17PO*qcVlPCd|7!$M$E^Y(55k=i!O zX^lb?#}Lz|&+ze3@iAYc#)b$882Of5Y#@geW;-^iHIf2_`) zGY{}GF`KTbqsN*$|9(G{%Q6kLVCCbldKqP8LC}u`gTZf!gM3+zO=bbdEF4xlu5wvu zK37|r&EnMC$h1!nUlNt0_j7ma6p{`_Tm4(J1@w6POQl(6r5r>mWM;1~K{+7%^*#eFs9JHKuSL+z5Xgl!P7`7Abr0!G@ zfyKMw6SsEI){v`9o}8?s{sp5`jz8ta{`(#=)iXCSSmvN)Y0gIY8d4wB&##JZu`|on z+F3(-j~%dA-zJm}IN`P6?CwJH??VKv)^bvwZth~O$Y$}et$6b3UF{@z!2o}Y_&lmL zB6COD>q7ccO@B9o^hT_Ui50Lc04})6J!&%^Vhv zy59Qc&RWXGR&0p2IeT?|A{*$6TaV7PH>6RO^=`H~ON+_lOKx`sAFDWq(4Xd}s%+%% z@8Q7;8RiijX<%PQO+9+gEE(2G0vEL0nL0L=o~-N8&K#1@F@&D`0v9cp!0FxwY>1aF zHgmU5`a(m7gie@Z`i^vVkN)wtwL*T+=xh!PH7y4(O+DXLW;$$vf40u(Rg!&= z$W$4ag^VvHzN;GEdW}e=A0m!gdWJM}3}Lj;#AemBWe$$ZnMK%Bx%DrGrP27sy+3;k zKGC}hi}Eab))OPgfZn>T>E&xef= LooseVersion("0.9.3"): + # Xlrd >= 0.9.3 can handle Excel milliseconds. + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54)]}) + else: + # Xlrd < 0.9.3 rounds Excel milliseconds. + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56), + time(4, 29, 49), + time(6, 13, 42), + time(7, 57, 35), + time(9, 41, 29), + time(11, 25, 22), + time(13, 9, 15), + time(14, 53, 8), + time(16, 37, 1), + time(18, 20, 54)]}) + + actual = self.get_exceldf('times_1900', ext, 'Sheet1') + tm.assert_frame_equal(actual, expected) + + actual = self.get_exceldf('times_1904', ext, 'Sheet1') + tm.assert_frame_equal(actual, expected) + + def test_read_excel_multiindex(self, ext): + # GH 4679 + mi = MultiIndex.from_product([['foo', 'bar'], ['a', 'b']]) + mi_file = os.path.join(self.dirpath, 'testmultiindex' + ext) + + expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], + [2, 3.5, pd.Timestamp('2015-01-02'), False], + [3, 4.5, pd.Timestamp('2015-01-03'), False], + [4, 5.5, pd.Timestamp('2015-01-04'), True]], + columns=mi) + + actual = read_excel(mi_file, 'mi_column', header=[0, 1]) + tm.assert_frame_equal(actual, expected) + actual = read_excel(mi_file, 'mi_column', header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + expected.columns = ['a', 'b', 'c', 'd'] + expected.index = mi + actual = read_excel(mi_file, 'mi_index', index_col=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + expected.columns = mi + actual = read_excel(mi_file, 'both', index_col=[0, 1], header=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + expected.index = mi.set_names(['ilvl1', 'ilvl2']) + expected.columns = ['a', 'b', 'c', 'd'] + actual = read_excel(mi_file, 'mi_index_name', index_col=[0, 1]) + tm.assert_frame_equal(actual, expected) + + expected.index = list(range(4)) + expected.columns = mi.set_names(['c1', 'c2']) + actual = read_excel(mi_file, 'mi_column_name', + header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + # Issue #11317 + expected.columns = mi.set_levels( + [1, 2], level=1).set_names(['c1', 'c2']) + actual = read_excel(mi_file, 'name_with_int', + index_col=0, header=[0, 1]) + tm.assert_frame_equal(actual, expected) + + expected.columns = mi.set_names(['c1', 'c2']) + expected.index = mi.set_names(['ilvl1', 'ilvl2']) + actual = read_excel(mi_file, 'both_name', + index_col=[0, 1], header=[0, 1]) + tm.assert_frame_equal(actual, expected) + + actual = read_excel(mi_file, 'both_name', + index_col=[0, 1], header=[0, 1]) + tm.assert_frame_equal(actual, expected) + + actual = read_excel(mi_file, 'both_name_skiprows', index_col=[0, 1], + header=[0, 1], skiprows=2) + tm.assert_frame_equal(actual, expected) + + @td.skip_if_no('xlsxwriter') + def test_read_excel_multiindex_empty_level(self, ext): + # GH 12453 + with ensure_clean('.xlsx') as path: + df = DataFrame({ + ('One', 'x'): {0: 1}, + ('Two', 'X'): {0: 3}, + ('Two', 'Y'): {0: 7}, + ('Zero', ''): {0: 0} + }) + + expected = DataFrame({ + ('One', u'x'): {0: 1}, + ('Two', u'X'): {0: 3}, + ('Two', u'Y'): {0: 7}, + ('Zero', 'Unnamed: 3_level_1'): {0: 0} + }) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1]) + tm.assert_frame_equal(actual, expected) + + df = pd.DataFrame({ + ('Beg', ''): {0: 0}, + ('Middle', 'x'): {0: 1}, + ('Tail', 'X'): {0: 3}, + ('Tail', 'Y'): {0: 7} + }) + + expected = pd.DataFrame({ + ('Beg', 'Unnamed: 0_level_1'): {0: 0}, + ('Middle', u'x'): {0: 1}, + ('Tail', u'X'): {0: 3}, + ('Tail', u'Y'): {0: 7} + }) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1]) + tm.assert_frame_equal(actual, expected) + + @td.skip_if_no('xlsxwriter') + def test_excel_multindex_roundtrip(self, ext): + # GH 4679 + with ensure_clean('.xlsx') as pth: + for c_idx_names in [True, False]: + for r_idx_names in [True, False]: + for c_idx_levels in [1, 3]: + for r_idx_levels in [1, 3]: + # column index name can't be serialized unless + # MultiIndex + if (c_idx_levels == 1 and c_idx_names): + continue + + # empty name case current read in as unnamed + # levels, not Nones + check_names = True + if not r_idx_names and r_idx_levels > 1: + check_names = False + + df = mkdf(5, 5, c_idx_names, + r_idx_names, c_idx_levels, + r_idx_levels) + df.to_excel(pth) + act = pd.read_excel( + pth, index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels))) + tm.assert_frame_equal( + df, act, check_names=check_names) + + df.iloc[0, :] = np.nan + df.to_excel(pth) + act = pd.read_excel( + pth, index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels))) + tm.assert_frame_equal( + df, act, check_names=check_names) + + df.iloc[-1, :] = np.nan + df.to_excel(pth) + act = pd.read_excel( + pth, index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels))) + tm.assert_frame_equal( + df, act, check_names=check_names) + + def test_excel_old_index_format(self, ext): + # see gh-4679 + filename = 'test_index_name_pre17' + ext + in_file = os.path.join(self.dirpath, filename) + + # We detect headers to determine if index names exist, so + # that "index" name in the "names" version of the data will + # now be interpreted as rows that include null data. + data = np.array([[None, None, None, None, None], + ['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], + ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], + ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], + ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], + ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) + columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] + mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1', + 'R_l0_g2', 'R_l0_g3', 'R_l0_g4'], + ['R1', 'R_l1_g0', 'R_l1_g1', + 'R_l1_g2', 'R_l1_g3', 'R_l1_g4']], + labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], + names=[None, None]) + si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2', + 'R_l0_g3', 'R_l0_g4'], name=None) + + expected = pd.DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(in_file, 'single_names') + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(in_file, 'multi_names') + tm.assert_frame_equal(actual, expected) + + # The analogous versions of the "names" version data + # where there are explicitly no names for the indices. + data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], + ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], + ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], + ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], + ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) + columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] + mi = MultiIndex(levels=[['R_l0_g0', 'R_l0_g1', 'R_l0_g2', + 'R_l0_g3', 'R_l0_g4'], + ['R_l1_g0', 'R_l1_g1', 'R_l1_g2', + 'R_l1_g3', 'R_l1_g4']], + labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], + names=[None, None]) + si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2', + 'R_l0_g3', 'R_l0_g4'], name=None) + + expected = pd.DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(in_file, 'single_no_names') + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + def test_read_excel_bool_header_arg(self, ext): + # GH 6114 + for arg in [True, False]: + with pytest.raises(TypeError): + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), + header=arg) + + def test_read_excel_chunksize(self, ext): + # GH 8011 + with pytest.raises(NotImplementedError): + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), + chunksize=100) + + @td.skip_if_no('openpyxl') + @td.skip_if_no('xlwt') + def test_read_excel_parse_dates(self, ext): + # GH 11544, 12051 + df = DataFrame( + {'col': [1, 2, 3], + 'date_strings': pd.date_range('2012-01-01', periods=3)}) + df2 = df.copy() + df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') + + with ensure_clean(ext) as pth: + df2.to_excel(pth) + + res = read_excel(pth) + tm.assert_frame_equal(df2, res) + + # no index_col specified when parse_dates is True + with tm.assert_produces_warning(): + res = read_excel(pth, parse_dates=True) + tm.assert_frame_equal(df2, res) + + res = read_excel(pth, parse_dates=['date_strings'], index_col=0) + tm.assert_frame_equal(df, res) + + dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') + res = read_excel(pth, parse_dates=['date_strings'], + date_parser=dateparser, index_col=0) + tm.assert_frame_equal(df, res) + + def test_read_excel_skiprows_list(self, ext): + # GH 4903 + actual = pd.read_excel(os.path.join(self.dirpath, + 'testskiprows' + ext), + 'skiprows_list', skiprows=[0, 2]) + expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], + [2, 3.5, pd.Timestamp('2015-01-02'), False], + [3, 4.5, pd.Timestamp('2015-01-03'), False], + [4, 5.5, pd.Timestamp('2015-01-04'), True]], + columns=['a', 'b', 'c', 'd']) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel(os.path.join(self.dirpath, + 'testskiprows' + ext), + 'skiprows_list', skiprows=np.array([0, 2])) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows(self, ext): + # GH 16645 + num_rows_to_pull = 5 + actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), + nrows=num_rows_to_pull) + expected = pd.read_excel(os.path.join(self.dirpath, + 'test1' + ext)) + expected = expected[:num_rows_to_pull] + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows_greater_than_nrows_in_file(self, ext): + # GH 16645 + expected = pd.read_excel(os.path.join(self.dirpath, + 'test1' + ext)) + num_records_in_file = len(expected) + num_rows_to_pull = num_records_in_file + 10 + actual = pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), + nrows=num_rows_to_pull) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows_non_integer_parameter(self, ext): + # GH 16645 + msg = "'nrows' must be an integer >=0" + with tm.assert_raises_regex(ValueError, msg): + pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), + nrows='5') + + def test_read_excel_squeeze(self, ext): + # GH 12157 + f = os.path.join(self.dirpath, 'test_squeeze' + ext) + + actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True) + expected = pd.Series([2, 3, 4], [4, 5, 6], name='b') + expected.index.name = 'a' + tm.assert_series_equal(actual, expected) + + actual = pd.read_excel(f, 'two_columns', squeeze=True) + expected = pd.DataFrame({'a': [4, 5, 6], + 'b': [2, 3, 4]}) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel(f, 'one_column', squeeze=True) + expected = pd.Series([1, 2, 3], name='a') + tm.assert_series_equal(actual, expected) + + +class _WriterBase(SharedItems): + + @pytest.fixture(autouse=True) + def set_engine_and_path(self, request, merge_cells, engine, ext): + """Fixture to set engine and open file for use in each test case + + Rather than requiring `engine=...` to be provided explicitly as an + argument in each test, this fixture sets a global option to dictate + which engine should be used to write Excel files. After executing + the test it rolls back said change to the global option. + + It also uses a context manager to open a temporary excel file for + the function to write to, accessible via `self.path` + + Notes + ----- + This fixture will run as part of each test method defined in the + class and any subclasses, on account of the `autouse=True` + argument + """ + option_name = 'io.excel.{ext}.writer'.format(ext=ext.strip('.')) + prev_engine = get_option(option_name) + set_option(option_name, engine) + with ensure_clean(ext) as path: + self.path = path + yield + set_option(option_name, prev_engine) # Roll back option change + + +@pytest.mark.parametrize("merge_cells", [True, False]) +@pytest.mark.parametrize("engine,ext", [ + pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param('openpyxl', '.xlsm', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param('xlwt', '.xls', marks=pytest.mark.skipif( + not td.safe_import('xlwt'), reason='No xlwt')), + pytest.param('xlsxwriter', '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('xlsxwriter'), reason='No xlsxwriter')) +]) +class TestExcelWriter(_WriterBase): + # Base class for test cases to run with different Excel writers. + + def test_excel_sheet_by_name_raise(self, merge_cells, engine, ext): + import xlrd + + gt = DataFrame(np.random.randn(10, 2)) + gt.to_excel(self.path) + xl = ExcelFile(self.path) + df = read_excel(xl, 0) + tm.assert_frame_equal(gt, df) + + with pytest.raises(xlrd.XLRDError): + read_excel(xl, '0') + + def test_excelwriter_contextmanager(self, merge_cells, engine, ext): + with ExcelWriter(self.path) as writer: + self.frame.to_excel(writer, 'Data1') + self.frame2.to_excel(writer, 'Data2') + + with ExcelFile(self.path) as reader: + found_df = read_excel(reader, 'Data1') + found_df2 = read_excel(reader, 'Data2') + tm.assert_frame_equal(found_df, self.frame) + tm.assert_frame_equal(found_df2, self.frame2) + + def test_roundtrip(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # test roundtrip + self.frame.to_excel(self.path, 'test1') + recons = read_excel(self.path, 'test1', index_col=0) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', index=False) + recons = read_excel(self.path, 'test1', index_col=None) + recons.index = self.frame.index + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', na_rep='NA') + recons = read_excel(self.path, 'test1', index_col=0, na_values=['NA']) + tm.assert_frame_equal(self.frame, recons) + + # GH 3611 + self.frame.to_excel(self.path, 'test1', na_rep='88') + recons = read_excel(self.path, 'test1', index_col=0, na_values=['88']) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, 'test1', na_rep='88') + recons = read_excel(self.path, 'test1', index_col=0, + na_values=[88, 88.0]) + tm.assert_frame_equal(self.frame, recons) + + # GH 6573 + self.frame.to_excel(self.path, 'Sheet1') + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(self.path, '0') + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(self.frame, recons) + + # GH 8825 Pandas Series should provide to_excel method + s = self.frame["A"] + s.to_excel(self.path) + recons = read_excel(self.path, index_col=0) + tm.assert_frame_equal(s.to_frame(), recons) + + def test_mixed(self, merge_cells, engine, ext): + self.mixed_frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=0) + tm.assert_frame_equal(self.mixed_frame, recons) + + def test_tsframe(self, merge_cells, engine, ext): + df = tm.makeTimeDataFrame()[:5] + + df.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(df, recons) + + def test_basics_with_nan(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + @pytest.mark.parametrize("np_type", [ + np.int8, np.int16, np.int32, np.int64]) + def test_int_types(self, merge_cells, engine, ext, np_type): + # Test np.int values read come back as int (rather than float + # which is Excel's format). + frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), + dtype=np_type) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + int_frame = frame.astype(np.int64) + tm.assert_frame_equal(int_frame, recons) + recons2 = read_excel(self.path, 'test1') + tm.assert_frame_equal(int_frame, recons2) + + # test with convert_float=False comes back as float + float_frame = frame.astype(float) + recons = read_excel(self.path, 'test1', convert_float=False) + tm.assert_frame_equal(recons, float_frame, + check_index_type=False, + check_column_type=False) + + @pytest.mark.parametrize("np_type", [ + np.float16, np.float32, np.float64]) + def test_float_types(self, merge_cells, engine, ext, np_type): + # Test np.float values read come back as float. + frame = DataFrame(np.random.random_sample(10), dtype=np_type) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1').astype(np_type) + tm.assert_frame_equal(frame, recons, check_dtype=False) + + @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) + def test_bool_types(self, merge_cells, engine, ext, np_type): + # Test np.bool values read come back as float. + frame = (DataFrame([1, 0, True, False], dtype=np_type)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1').astype(np_type) + tm.assert_frame_equal(frame, recons) + + def test_inf_roundtrip(self, merge_cells, engine, ext): + frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(frame, recons) + + def test_sheets(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # Test writing to separate sheets + writer = ExcelWriter(self.path) + self.frame.to_excel(writer, 'test1') + self.tsframe.to_excel(writer, 'test2') + writer.save() + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=0) + tm.assert_frame_equal(self.frame, recons) + recons = read_excel(reader, 'test2', index_col=0) + tm.assert_frame_equal(self.tsframe, recons) + assert 2 == len(reader.sheet_names) + assert 'test1' == reader.sheet_names[0] + assert 'test2' == reader.sheet_names[1] + + def test_colaliases(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # column aliases + col_aliases = Index(['AA', 'X', 'Y', 'Z']) + self.frame2.to_excel(self.path, 'test1', header=col_aliases) + reader = ExcelFile(self.path) + rs = read_excel(reader, 'test1', index_col=0) + xp = self.frame2.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) + + def test_roundtrip_indexlabels(self, merge_cells, engine, ext): + self.frame['A'][:5] = nan + + self.frame.to_excel(self.path, 'test1') + self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) + self.frame.to_excel(self.path, 'test1', header=False) + self.frame.to_excel(self.path, 'test1', index=False) + + # test index_label + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, 'test1', + index_label=['test'], + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + assert frame.index.names == recons.index.names + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, + 'test1', + index_label=['test', 'dummy', 'dummy2'], + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + assert frame.index.names == recons.index.names + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(self.path, + 'test1', + index_label='test', + merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=0, + ).astype(np.int64) + frame.index.names = ['test'] + tm.assert_frame_equal(frame, recons.astype(bool)) + + self.frame.to_excel(self.path, + 'test1', + columns=['A', 'B', 'C', 'D'], + index=False, merge_cells=merge_cells) + # take 'A' and 'B' as indexes (same row as cols 'C', 'D') + df = self.frame.copy() + df = df.set_index(['A', 'B']) + + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', index_col=[0, 1]) + tm.assert_frame_equal(df, recons, check_less_precise=True) + + def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): + df = DataFrame(np.random.randn(10, 4)) + df.index.name = 'foo' + + df.to_excel(self.path, merge_cells=merge_cells) + + xf = ExcelFile(self.path) + result = read_excel(xf, xf.sheet_names[0], + index_col=0) + + tm.assert_frame_equal(result, df) + assert result.index.name == 'foo' + + def test_excel_roundtrip_datetime(self, merge_cells, engine, ext): + # datetime.date, not sure what to test here exactly + tsf = self.tsframe.copy() + + tsf.index = [x.date() for x in self.tsframe.index] + tsf.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(self.tsframe, recons) + + # GH4133 - excel output format strings + def test_excel_date_datetime_format(self, merge_cells, engine, ext): + df = DataFrame([[date(2014, 1, 31), + date(1999, 9, 24)], + [datetime(1998, 5, 26, 23, 33, 4), + datetime(2014, 2, 28, 13, 5, 13)]], + index=['DATE', 'DATETIME'], columns=['X', 'Y']) + df_expected = DataFrame([[datetime(2014, 1, 31), + datetime(1999, 9, 24)], + [datetime(1998, 5, 26, 23, 33, 4), + datetime(2014, 2, 28, 13, 5, 13)]], + index=['DATE', 'DATETIME'], columns=['X', 'Y']) + + with ensure_clean(ext) as filename2: + writer1 = ExcelWriter(self.path) + writer2 = ExcelWriter(filename2, + date_format='DD.MM.YYYY', + datetime_format='DD.MM.YYYY HH-MM-SS') + + df.to_excel(writer1, 'test1') + df.to_excel(writer2, 'test1') + + writer1.close() + writer2.close() + + reader1 = ExcelFile(self.path) + reader2 = ExcelFile(filename2) + + rs1 = read_excel(reader1, 'test1', index_col=None) + rs2 = read_excel(reader2, 'test1', index_col=None) + + tm.assert_frame_equal(rs1, rs2) + + # since the reader returns a datetime object for dates, we need + # to use df_expected to check the result + tm.assert_frame_equal(rs2, df_expected) + + def test_to_excel_interval_no_labels(self, merge_cells, engine, ext): + # GH19242 - test writing Interval without labels + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + dtype=np.int64) + expected = frame.copy() + frame['new'] = pd.cut(frame[0], 10) + expected['new'] = pd.cut(expected[0], 10).astype(str) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) + + def test_to_excel_interval_labels(self, merge_cells, engine, ext): + # GH19242 - test writing Interval with labels + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + dtype=np.int64) + expected = frame.copy() + intervals = pd.cut(frame[0], 10, labels=['A', 'B', 'C', 'D', 'E', + 'F', 'G', 'H', 'I', 'J']) + frame['new'] = intervals + expected['new'] = pd.Series(list(intervals)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) + + def test_to_excel_timedelta(self, merge_cells, engine, ext): + # GH 19242, GH9155 - test writing timedelta to xls + frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + columns=['A'], + dtype=np.int64 + ) + expected = frame.copy() + frame['new'] = frame['A'].apply(lambda x: timedelta(seconds=x)) + expected['new'] = expected['A'].apply( + lambda x: timedelta(seconds=x).total_seconds() / float(86400)) + frame.to_excel(self.path, 'test1') + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1') + tm.assert_frame_equal(expected, recons) + + def test_to_excel_periodindex(self, merge_cells, engine, ext): + frame = self.tsframe + xp = frame.resample('M', kind='period').mean() + + xp.to_excel(self.path, 'sht1') + + reader = ExcelFile(self.path) + rs = read_excel(reader, 'sht1', index_col=0) + tm.assert_frame_equal(xp, rs.to_period('M')) + + def test_to_excel_multiindex(self, merge_cells, engine, ext): + frame = self.frame + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, + names=['first', 'second']) + frame.index = new_index + + frame.to_excel(self.path, 'test1', header=False) + frame.to_excel(self.path, 'test1', columns=['A', 'B']) + + # round trip + frame.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + df = read_excel(reader, 'test1', index_col=[0, 1]) + tm.assert_frame_equal(frame, df) + + # GH13511 + def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext): + frame = pd.DataFrame({'A': [None, 2, 3], + 'B': [10, 20, 30], + 'C': np.random.sample(3)}) + frame = frame.set_index(['A', 'B']) + + frame.to_excel(self.path, merge_cells=merge_cells) + df = read_excel(self.path, index_col=[0, 1]) + tm.assert_frame_equal(frame, df) + + # Test for Issue 11328. If column indices are integers, make + # sure they are handled correctly for either setting of + # merge_cells + def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): + frame = self.frame + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, + names=['first', 'second']) + frame.index = new_index + + new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), + (50, 1), (50, 2)]) + frame.columns = new_cols_index + header = [0, 1] + if not merge_cells: + header = 0 + + # round trip + frame.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + df = read_excel(reader, 'test1', header=header, + index_col=[0, 1]) + if not merge_cells: + fm = frame.columns.format(sparsify=False, + adjoin=False, names=False) + frame.columns = [".".join(map(str, q)) for q in zip(*fm)] + tm.assert_frame_equal(frame, df) + + def test_to_excel_multiindex_dates(self, merge_cells, engine, ext): + # try multiindex with dates + tsframe = self.tsframe.copy() + new_index = [tsframe.index, np.arange(len(tsframe.index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.index.names = ['time', 'foo'] + tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells) + reader = ExcelFile(self.path) + recons = read_excel(reader, 'test1', + index_col=[0, 1]) + + tm.assert_frame_equal(tsframe, recons) + assert recons.index.names == ('time', 'foo') + + def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, + ext): + # Test writing and re-reading a MI witout the index. GH 5616. + + # Initial non-MI frame. + frame1 = DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]}) + + # Add a MI. + frame2 = frame1.copy() + multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) + frame2.index = multi_index + + # Write out to Excel without the index. + frame2.to_excel(self.path, 'test1', index=False) + + # Read it back in. + reader = ExcelFile(self.path) + frame3 = read_excel(reader, 'test1') + + # Test that it is the same as the initial frame. + tm.assert_frame_equal(frame1, frame3) + + def test_to_excel_float_format(self, merge_cells, engine, ext): + df = DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + df.to_excel(self.path, 'test1', float_format='%.2f') + + reader = ExcelFile(self.path) + rs = read_excel(reader, 'test1', index_col=None) + xp = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + tm.assert_frame_equal(rs, xp) + + def test_to_excel_output_encoding(self, merge_cells, engine, ext): + # avoid mixed inferred_type + df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'], + [u'\u0195', u'\u0196', u'\u0197']], + index=[u'A\u0192', u'B'], + columns=[u'X\u0193', u'Y', u'Z']) + + with ensure_clean('__tmp_to_excel_float_format__.' + ext) as filename: + df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') + result = read_excel(filename, 'TestSheet', encoding='utf8') + tm.assert_frame_equal(result, df) + + def test_to_excel_unicode_filename(self, merge_cells, engine, ext): + with ensure_clean(u('\u0192u.') + ext) as filename: + try: + f = open(filename, 'wb') + except UnicodeEncodeError: + pytest.skip('no unicode file names on this system') + else: + f.close() + + df = DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + df.to_excel(filename, 'test1', float_format='%.2f') + + reader = ExcelFile(filename) + rs = read_excel(reader, 'test1', index_col=None) + xp = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + tm.assert_frame_equal(rs, xp) + + # def test_to_excel_header_styling_xls(self, merge_cells, engine, ext): + + # import StringIO + # s = StringIO( + # """Date,ticker,type,value + # 2001-01-01,x,close,12.2 + # 2001-01-01,x,open ,12.1 + # 2001-01-01,y,close,12.2 + # 2001-01-01,y,open ,12.1 + # 2001-02-01,x,close,12.2 + # 2001-02-01,x,open ,12.1 + # 2001-02-01,y,close,12.2 + # 2001-02-01,y,open ,12.1 + # 2001-03-01,x,close,12.2 + # 2001-03-01,x,open ,12.1 + # 2001-03-01,y,close,12.2 + # 2001-03-01,y,open ,12.1""") + # df = read_csv(s, parse_dates=["Date"]) + # pdf = df.pivot_table(values="value", rows=["ticker"], + # cols=["Date", "type"]) + + # try: + # import xlwt + # import xlrd + # except ImportError: + # pytest.skip + + # filename = '__tmp_to_excel_header_styling_xls__.xls' + # pdf.to_excel(filename, 'test1') + + # wbk = xlrd.open_workbook(filename, + # formatting_info=True) + # assert ["test1"] == wbk.sheet_names() + # ws = wbk.sheet_by_name('test1') + # assert [(0, 1, 5, 7), (0, 1, 3, 5), (0, 1, 1, 3)] == ws.merged_cells + # for i in range(0, 2): + # for j in range(0, 7): + # xfx = ws.cell_xf_index(0, 0) + # cell_xf = wbk.xf_list[xfx] + # font = wbk.font_list + # assert 1 == font[cell_xf.font_index].bold + # assert 1 == cell_xf.border.top_line_style + # assert 1 == cell_xf.border.right_line_style + # assert 1 == cell_xf.border.bottom_line_style + # assert 1 == cell_xf.border.left_line_style + # assert 2 == cell_xf.alignment.hor_align + # os.remove(filename) + # def test_to_excel_header_styling_xlsx(self, merge_cells, engine, ext): + # import StringIO + # s = StringIO( + # """Date,ticker,type,value + # 2001-01-01,x,close,12.2 + # 2001-01-01,x,open ,12.1 + # 2001-01-01,y,close,12.2 + # 2001-01-01,y,open ,12.1 + # 2001-02-01,x,close,12.2 + # 2001-02-01,x,open ,12.1 + # 2001-02-01,y,close,12.2 + # 2001-02-01,y,open ,12.1 + # 2001-03-01,x,close,12.2 + # 2001-03-01,x,open ,12.1 + # 2001-03-01,y,close,12.2 + # 2001-03-01,y,open ,12.1""") + # df = read_csv(s, parse_dates=["Date"]) + # pdf = df.pivot_table(values="value", rows=["ticker"], + # cols=["Date", "type"]) + # try: + # import openpyxl + # from openpyxl.cell import get_column_letter + # except ImportError: + # pytest.skip + # if openpyxl.__version__ < '1.6.1': + # pytest.skip + # # test xlsx_styling + # filename = '__tmp_to_excel_header_styling_xlsx__.xlsx' + # pdf.to_excel(filename, 'test1') + # wbk = openpyxl.load_workbook(filename) + # assert ["test1"] == wbk.get_sheet_names() + # ws = wbk.get_sheet_by_name('test1') + # xlsaddrs = ["%s2" % chr(i) for i in range(ord('A'), ord('H'))] + # xlsaddrs += ["A%s" % i for i in range(1, 6)] + # xlsaddrs += ["B1", "D1", "F1"] + # for xlsaddr in xlsaddrs: + # cell = ws.cell(xlsaddr) + # assert cell.style.font.bold + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.top.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.right.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.bottom.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.left.border_style) + # assert (openpyxl.style.Alignment.HORIZONTAL_CENTER == + # cell.style.alignment.horizontal) + # mergedcells_addrs = ["C1", "E1", "G1"] + # for maddr in mergedcells_addrs: + # assert ws.cell(maddr).merged + # os.remove(filename) + + def test_excel_010_hemstring(self, merge_cells, engine, ext): + if merge_cells: + pytest.skip('Skip tests for merged MI format.') + + from pandas.util.testing import makeCustomDataframe as mkdf + # ensure limited functionality in 0.10 + # override of #2370 until sorted out in 0.11 + + def roundtrip(df, header=True, parser_hdr=0, index=True): + + df.to_excel(self.path, header=header, + merge_cells=merge_cells, index=index) + xf = ExcelFile(self.path) + res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) + return res + + nrows = 5 + ncols = 3 + for use_headers in (True, False): + for i in range(1, 4): # row multindex up to nlevel=3 + for j in range(1, 4): # col "" + df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) + + # this if will be removed once multi column excel writing + # is implemented for now fixing #9794 + if j > 1: + with pytest.raises(NotImplementedError): + res = roundtrip(df, use_headers, index=False) + else: + res = roundtrip(df, use_headers) + + if use_headers: + assert res.shape == (nrows, ncols + i) + else: + # first row taken as columns + assert res.shape == (nrows - 1, ncols + i) + + # no nans + for r in range(len(res.index)): + for c in range(len(res.columns)): + assert res.iloc[r, c] is not np.nan + + res = roundtrip(DataFrame([0])) + assert res.shape == (1, 1) + assert res.iloc[0, 0] is not np.nan + + res = roundtrip(DataFrame([0]), False, None) + assert res.shape == (1, 2) + assert res.iloc[0, 0] is not np.nan + + def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, + engine, ext): + # This test was failing only for j>1 and header=False, + # So I reproduced a simple test. + if merge_cells: + pytest.skip('Skip tests for merged MI format.') + + from pandas.util.testing import makeCustomDataframe as mkdf + # ensure limited functionality in 0.10 + # override of #2370 until sorted out in 0.11 + + def roundtrip2(df, header=True, parser_hdr=0, index=True): + + df.to_excel(self.path, header=header, + merge_cells=merge_cells, index=index) + xf = ExcelFile(self.path) + res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) + return res + + nrows = 5 + ncols = 3 + j = 2 + i = 1 + df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) + with pytest.raises(NotImplementedError): + roundtrip2(df, header=False, index=False) + + def test_duplicated_columns(self, merge_cells, engine, ext): + # Test for issue #5235 + write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) + colnames = ['A', 'B', 'B'] + + write_frame.columns = colnames + write_frame.to_excel(self.path, 'test1') + + read_frame = read_excel(self.path, 'test1') + read_frame.columns = colnames + tm.assert_frame_equal(write_frame, read_frame) + + # 11007 / #10970 + write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'A', 'B']) + write_frame.to_excel(self.path, 'test1') + read_frame = read_excel(self.path, 'test1') + read_frame.columns = ['A', 'B', 'A', 'B'] + tm.assert_frame_equal(write_frame, read_frame) + + # 10982 + write_frame.to_excel(self.path, 'test1', index=False, header=False) + read_frame = read_excel(self.path, 'test1', header=None) + write_frame.columns = [0, 1, 2, 3] + tm.assert_frame_equal(write_frame, read_frame) + + def test_swapped_columns(self, merge_cells, engine, ext): + # Test for issue #5427. + write_frame = DataFrame({'A': [1, 1, 1], + 'B': [2, 2, 2]}) + write_frame.to_excel(self.path, 'test1', columns=['B', 'A']) + + read_frame = read_excel(self.path, 'test1', header=0) + + tm.assert_series_equal(write_frame['A'], read_frame['A']) + tm.assert_series_equal(write_frame['B'], read_frame['B']) + + def test_invalid_columns(self, merge_cells, engine, ext): + # 10982 + write_frame = DataFrame({'A': [1, 1, 1], + 'B': [2, 2, 2]}) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + write_frame.to_excel(self.path, 'test1', columns=['B', 'C']) + expected = write_frame.reindex(columns=['B', 'C']) + read_frame = read_excel(self.path, 'test1') + tm.assert_frame_equal(expected, read_frame) + + with pytest.raises(KeyError): + write_frame.to_excel(self.path, 'test1', columns=['C', 'D']) + + def test_comment_arg(self, merge_cells, engine, ext): + # Re issue #18735 + # Test the comment argument functionality to read_excel + + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') + + # Read file without comment arg + result1 = read_excel(self.path, 'test_c') + result1.iloc[1, 0] = None + result1.iloc[1, 1] = None + result1.iloc[2, 1] = None + result2 = read_excel(self.path, 'test_c', comment='#') + tm.assert_frame_equal(result1, result2) + + def test_comment_default(self, merge_cells, engine, ext): + # Re issue #18735 + # Test the comment argument default to read_excel + + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') + + # Read file with default and explicit comment=None + result1 = read_excel(self.path, 'test_c') + result2 = read_excel(self.path, 'test_c', comment=None) + tm.assert_frame_equal(result1, result2) + + def test_comment_used(self, merge_cells, engine, ext): + # Re issue #18735 + # Test the comment argument is working as expected when used + + # Create file to read in + df = DataFrame({'A': ['one', '#one', 'one'], + 'B': ['two', 'two', '#two']}) + df.to_excel(self.path, 'test_c') + + # Test read_frame_comment against manually produced expected output + expected = DataFrame({'A': ['one', None, 'one'], + 'B': ['two', None, None]}) + result = read_excel(self.path, 'test_c', comment='#') + tm.assert_frame_equal(result, expected) + + def test_comment_emptyline(self, merge_cells, engine, ext): + # Re issue #18735 + # Test that read_excel ignores commented lines at the end of file + + df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']}) + df.to_excel(self.path, index=False) + + # Test that all-comment lines at EoF are ignored + expected = DataFrame({'a': [1], 'b': [2]}) + result = read_excel(self.path, comment='#') + tm.assert_frame_equal(result, expected) + + def test_datetimes(self, merge_cells, engine, ext): + + # Test writing and reading datetimes. For issue #9139. (xref #9185) + datetimes = [datetime(2013, 1, 13, 1, 2, 3), + datetime(2013, 1, 13, 2, 45, 56), + datetime(2013, 1, 13, 4, 29, 49), + datetime(2013, 1, 13, 6, 13, 42), + datetime(2013, 1, 13, 7, 57, 35), + datetime(2013, 1, 13, 9, 41, 28), + datetime(2013, 1, 13, 11, 25, 21), + datetime(2013, 1, 13, 13, 9, 14), + datetime(2013, 1, 13, 14, 53, 7), + datetime(2013, 1, 13, 16, 37, 0), + datetime(2013, 1, 13, 18, 20, 52)] + + write_frame = DataFrame({'A': datetimes}) + write_frame.to_excel(self.path, 'Sheet1') + read_frame = read_excel(self.path, 'Sheet1', header=0) + + tm.assert_series_equal(write_frame['A'], read_frame['A']) + + # GH7074 + def test_bytes_io(self, merge_cells, engine, ext): + bio = BytesIO() + df = DataFrame(np.random.randn(10, 2)) + # pass engine explicitly as there is no file path to infer from + writer = ExcelWriter(bio, engine=engine) + df.to_excel(writer) + writer.save() + bio.seek(0) + reread_df = read_excel(bio) + tm.assert_frame_equal(df, reread_df) + + # GH8188 + def test_write_lists_dict(self, merge_cells, engine, ext): + df = DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], + 'numeric': [1, 2, 3.0], + 'str': ['apple', 'banana', 'cherry']}) + expected = df.copy() + expected.mixed = expected.mixed.apply(str) + expected.numeric = expected.numeric.astype('int64') + + df.to_excel(self.path, 'Sheet1') + read = read_excel(self.path, 'Sheet1', header=0) + tm.assert_frame_equal(read, expected) + + # GH13347 + def test_true_and_false_value_options(self, merge_cells, engine, ext): + df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2']) + expected = df.replace({'foo': True, + 'bar': False}) + + df.to_excel(self.path) + read_frame = read_excel(self.path, true_values=['foo'], + false_values=['bar']) + tm.assert_frame_equal(read_frame, expected) + + def test_freeze_panes(self, merge_cells, engine, ext): + # GH15160 + expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2']) + expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) + result = read_excel(self.path) + tm.assert_frame_equal(expected, result) + + def test_path_pathlib(self, merge_cells, engine, ext): + df = tm.makeDataFrame() + writer = partial(df.to_excel, engine=engine) + reader = partial(pd.read_excel) + result = tm.round_trip_pathlib(writer, reader, + path="foo.{}".format(ext)) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self, merge_cells, engine, ext): + df = tm.makeDataFrame() + writer = partial(df.to_excel, engine=engine) + reader = partial(pd.read_excel) + result = tm.round_trip_pathlib(writer, reader, + path="foo.{}".format(ext)) + tm.assert_frame_equal(df, result) + + +@td.skip_if_no('openpyxl') +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xlsx', 'openpyxl')]) +class TestOpenpyxlTests(_WriterBase): + + def test_to_excel_styleconverter(self, merge_cells, ext, engine): + from openpyxl import styles + + hstyle = { + "font": { + "color": '00FF0000', + "bold": True, + }, + "borders": { + "top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin", + }, + "alignment": { + "horizontal": "center", + "vertical": "top", + }, + "fill": { + "patternType": 'solid', + 'fgColor': { + 'rgb': '006666FF', + 'tint': 0.3, + }, + }, + "number_format": { + "format_code": "0.00" + }, + "protection": { + "locked": True, + "hidden": False, + }, + } + + font_color = styles.Color('00FF0000') + font = styles.Font(bold=True, color=font_color) + side = styles.Side(style=styles.borders.BORDER_THIN) + border = styles.Border(top=side, right=side, bottom=side, left=side) + alignment = styles.Alignment(horizontal='center', vertical='top') + fill_color = styles.Color(rgb='006666FF', tint=0.3) + fill = styles.PatternFill(patternType='solid', fgColor=fill_color) + + number_format = '0.00' + + protection = styles.Protection(locked=True, hidden=False) + + kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle) + assert kw['font'] == font + assert kw['border'] == border + assert kw['alignment'] == alignment + assert kw['fill'] == fill + assert kw['number_format'] == number_format + assert kw['protection'] == protection + + def test_write_cells_merge_styled(self, merge_cells, ext, engine): + from pandas.io.formats.excel import ExcelCell + + sheet_name = 'merge_styled' + + sty_b1 = {'font': {'color': '00FF0000'}} + sty_a2 = {'font': {'color': '0000FF00'}} + + initial_cells = [ + ExcelCell(col=1, row=0, val=42, style=sty_b1), + ExcelCell(col=0, row=1, val=99, style=sty_a2), + ] + + sty_merged = {'font': {'color': '000000FF', 'bold': True}} + sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged) + openpyxl_sty_merged = sty_kwargs['font'] + merge_cells = [ + ExcelCell(col=0, row=0, val='pandas', + mergestart=1, mergeend=1, style=sty_merged), + ] + + with ensure_clean(ext) as path: + writer = _OpenpyxlWriter(path) + writer.write_cells(initial_cells, sheet_name=sheet_name) + writer.write_cells(merge_cells, sheet_name=sheet_name) + + wks = writer.sheets[sheet_name] + xcell_b1 = wks['B1'] + xcell_a2 = wks['A2'] + assert xcell_b1.font == openpyxl_sty_merged + assert xcell_a2.font == openpyxl_sty_merged + + +@td.skip_if_no('xlwt') +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xls', 'xlwt')]) +class TestXlwtTests(_WriterBase): + + def test_excel_raise_error_on_multiindex_columns_and_no_index( + self, merge_cells, ext, engine): + # MultiIndex as columns is not yet implemented 9794 + cols = MultiIndex.from_tuples([('site', ''), + ('2014', 'height'), + ('2014', 'weight')]) + df = DataFrame(np.random.randn(10, 3), columns=cols) + with pytest.raises(NotImplementedError): + with ensure_clean(ext) as path: + df.to_excel(path, index=False) + + def test_excel_multiindex_columns_and_index_true(self, merge_cells, ext, + engine): + cols = MultiIndex.from_tuples([('site', ''), + ('2014', 'height'), + ('2014', 'weight')]) + df = pd.DataFrame(np.random.randn(10, 3), columns=cols) + with ensure_clean(ext) as path: + df.to_excel(path, index=True) + + def test_excel_multiindex_index(self, merge_cells, ext, engine): + # MultiIndex as index works so assert no error #9794 + cols = MultiIndex.from_tuples([('site', ''), + ('2014', 'height'), + ('2014', 'weight')]) + df = DataFrame(np.random.randn(3, 10), index=cols) + with ensure_clean(ext) as path: + df.to_excel(path, index=False) + + def test_to_excel_styleconverter(self, merge_cells, ext, engine): + import xlwt + + hstyle = {"font": {"bold": True}, + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center", "vertical": "top"}} + + xls_style = _XlwtWriter._convert_to_style(hstyle) + assert xls_style.font.bold + assert xlwt.Borders.THIN == xls_style.borders.top + assert xlwt.Borders.THIN == xls_style.borders.right + assert xlwt.Borders.THIN == xls_style.borders.bottom + assert xlwt.Borders.THIN == xls_style.borders.left + assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz + assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert + + +@td.skip_if_no('xlsxwriter') +@pytest.mark.parametrize("merge_cells,ext,engine", [ + (None, '.xlsx', 'xlsxwriter')]) +class TestXlsxWriterTests(_WriterBase): + + @td.skip_if_no('openpyxl') + def test_column_format(self, merge_cells, ext, engine): + # Test that column formats are applied to cells. Test for issue #9167. + # Applicable to xlsxwriter only. + with warnings.catch_warnings(): + # Ignore the openpyxl lxml warning. + warnings.simplefilter("ignore") + import openpyxl + + with ensure_clean(ext) as path: + frame = DataFrame({'A': [123456, 123456], + 'B': [123456, 123456]}) + + writer = ExcelWriter(path) + frame.to_excel(writer) + + # Add a number format to col B and ensure it is applied to cells. + num_format = '#,##0' + write_workbook = writer.book + write_worksheet = write_workbook.worksheets()[0] + col_format = write_workbook.add_format({'num_format': num_format}) + write_worksheet.set_column('B:B', None, col_format) + writer.save() + + read_workbook = openpyxl.load_workbook(path) + try: + read_worksheet = read_workbook['Sheet1'] + except TypeError: + # compat + read_worksheet = read_workbook.get_sheet_by_name(name='Sheet1') + + # Get the number format from the cell. + try: + cell = read_worksheet['B2'] + except TypeError: + # compat + cell = read_worksheet.cell('B2') + + try: + read_num_format = cell.number_format + except Exception: + read_num_format = cell.style.number_format._format_code + + assert read_num_format == num_format + + +class TestExcelWriterEngineTests(object): + + @pytest.mark.parametrize('klass,ext', [ + pytest.param(_XlsxWriter, '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('xlsxwriter'), reason='No xlsxwriter')), + pytest.param(_OpenpyxlWriter, '.xlsx', marks=pytest.mark.skipif( + not td.safe_import('openpyxl'), reason='No openpyxl')), + pytest.param(_XlwtWriter, '.xls', marks=pytest.mark.skipif( + not td.safe_import('xlwt'), reason='No xlwt')) + ]) + def test_ExcelWriter_dispatch(self, klass, ext): + with ensure_clean(ext) as path: + writer = ExcelWriter(path) + if ext == '.xlsx' and td.safe_import('xlsxwriter'): + # xlsxwriter has preference over openpyxl if both installed + assert isinstance(writer, _XlsxWriter) + else: + assert isinstance(writer, klass) + + def test_ExcelWriter_dispatch_raises(self): + with tm.assert_raises_regex(ValueError, 'No engine'): + ExcelWriter('nothing') + + def test_register_writer(self): + # some awkward mocking to test out dispatch and such actually works + called_save = [] + called_write_cells = [] + + class DummyClass(ExcelWriter): + called_save = False + called_write_cells = False + supported_extensions = ['test', 'xlsx', 'xls'] + engine = 'dummy' + + def save(self): + called_save.append(True) + + def write_cells(self, *args, **kwargs): + called_write_cells.append(True) + + def check_called(func): + func() + assert len(called_save) >= 1 + assert len(called_write_cells) >= 1 + del called_save[:] + del called_write_cells[:] + + with pd.option_context('io.excel.xlsx.writer', 'dummy'): + register_writer(DummyClass) + writer = ExcelWriter('something.test') + assert isinstance(writer, DummyClass) + df = tm.makeCustomDataframe(1, 1) + + with catch_warnings(record=True): + panel = tm.makePanel() + func = lambda: df.to_excel('something.test') + check_called(func) + check_called(lambda: panel.to_excel('something.test')) + check_called(lambda: df.to_excel('something.xlsx')) + check_called( + lambda: df.to_excel( + 'something.xls', engine='dummy')) + + +@pytest.mark.parametrize('engine', [ + pytest.param('xlwt', + marks=pytest.mark.xfail(reason='xlwt does not support ' + 'openpyxl-compatible ' + 'style dicts')), + 'xlsxwriter', + 'openpyxl', +]) +def test_styler_to_excel(engine): + def style(df): + # XXX: RGB colors not supported in xlwt + return DataFrame([['font-weight: bold', '', ''], + ['', 'color: blue', ''], + ['', '', 'text-decoration: underline'], + ['border-style: solid', '', ''], + ['', 'font-style: italic', ''], + ['', '', 'text-align: right'], + ['background-color: red', '', ''], + ['', '', ''], + ['', '', ''], + ['', '', '']], + index=df.index, columns=df.columns) + + def assert_equal_style(cell1, cell2): + # XXX: should find a better way to check equality + assert cell1.alignment.__dict__ == cell2.alignment.__dict__ + assert cell1.border.__dict__ == cell2.border.__dict__ + assert cell1.fill.__dict__ == cell2.fill.__dict__ + assert cell1.font.__dict__ == cell2.font.__dict__ + assert cell1.number_format == cell2.number_format + assert cell1.protection.__dict__ == cell2.protection.__dict__ + + def custom_converter(css): + # use bold iff there is custom style attached to the cell + if css.strip(' \n;'): + return {'font': {'bold': True}} + return {} + + pytest.importorskip('jinja2') + pytest.importorskip(engine) + + # Prepare spreadsheets + + df = DataFrame(np.random.randn(10, 3)) + with ensure_clean('.xlsx' if engine != 'xlwt' else '.xls') as path: + writer = ExcelWriter(path, engine=engine) + df.to_excel(writer, sheet_name='frame') + df.style.to_excel(writer, sheet_name='unstyled') + styled = df.style.apply(style, axis=None) + styled.to_excel(writer, sheet_name='styled') + ExcelFormatter(styled, style_converter=custom_converter).write( + writer, sheet_name='custom') + writer.save() + + if engine not in ('openpyxl', 'xlsxwriter'): + # For other engines, we only smoke test + return + openpyxl = pytest.importorskip('openpyxl') + wb = openpyxl.load_workbook(path) + + # (1) compare DataFrame.to_excel and Styler.to_excel when unstyled + n_cells = 0 + for col1, col2 in zip(wb['frame'].columns, + wb['unstyled'].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + assert cell1.value == cell2.value + assert_equal_style(cell1, cell2) + n_cells += 1 + + # ensure iteration actually happened: + assert n_cells == (10 + 1) * (3 + 1) + + # (2) check styling with default converter + + # XXX: openpyxl (as at 2.4) prefixes colors with 00, xlsxwriter with FF + alpha = '00' if engine == 'openpyxl' else 'FF' + + n_cells = 0 + for col1, col2 in zip(wb['frame'].columns, + wb['styled'].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + ref = '%s%d' % (cell2.column, cell2.row) + # XXX: this isn't as strong a test as ideal; we should + # confirm that differences are exclusive + if ref == 'B2': + assert not cell1.font.bold + assert cell2.font.bold + elif ref == 'C3': + assert cell1.font.color.rgb != cell2.font.color.rgb + assert cell2.font.color.rgb == alpha + '0000FF' + elif ref == 'D4': + # This fails with engine=xlsxwriter due to + # https://bitbucket.org/openpyxl/openpyxl/issues/800 + if engine == 'xlsxwriter' \ + and (LooseVersion(openpyxl.__version__) < + LooseVersion('2.4.6')): + pass + else: + assert cell1.font.underline != cell2.font.underline + assert cell2.font.underline == 'single' + elif ref == 'B5': + assert not cell1.border.left.style + assert (cell2.border.top.style == + cell2.border.right.style == + cell2.border.bottom.style == + cell2.border.left.style == + 'medium') + elif ref == 'C6': + assert not cell1.font.italic + assert cell2.font.italic + elif ref == 'D7': + assert (cell1.alignment.horizontal != + cell2.alignment.horizontal) + assert cell2.alignment.horizontal == 'right' + elif ref == 'B8': + assert cell1.fill.fgColor.rgb != cell2.fill.fgColor.rgb + assert cell1.fill.patternType != cell2.fill.patternType + assert cell2.fill.fgColor.rgb == alpha + 'FF0000' + assert cell2.fill.patternType == 'solid' + else: + assert_equal_style(cell1, cell2) + + assert cell1.value == cell2.value + n_cells += 1 + + assert n_cells == (10 + 1) * (3 + 1) + + # (3) check styling with custom converter + n_cells = 0 + for col1, col2 in zip(wb['frame'].columns, + wb['custom'].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + ref = '%s%d' % (cell2.column, cell2.row) + if ref in ('B2', 'C3', 'D4', 'B5', 'C6', 'D7', 'B8'): + assert not cell1.font.bold + assert cell2.font.bold + else: + assert_equal_style(cell1, cell2) + + assert cell1.value == cell2.value + n_cells += 1 + + assert n_cells == (10 + 1) * (3 + 1) + + +@td.skip_if_no('openpyxl') +class TestFSPath(object): + + @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') + def test_excelfile_fspath(self): + with tm.ensure_clean('foo.xlsx') as path: + df = DataFrame({"A": [1, 2]}) + df.to_excel(path) + xl = ExcelFile(path) + result = os.fspath(xl) + assert result == path + + @pytest.mark.skipif(sys.version_info < (3, 6), reason='requires fspath') + # @pytest.mark.xfail + def test_excelwriter_fspath(self): + with tm.ensure_clean('foo.xlsx') as path: + writer = ExcelWriter(path) + assert os.fspath(writer) == str(path) diff --git a/pandas/io/tests/test_feather.py b/pandas/tests/io/test_feather.py similarity index 52% rename from pandas/io/tests/test_feather.py rename to pandas/tests/io/test_feather.py index 6e2c28a0f68de..9d04111d64125 100644 --- a/pandas/io/tests/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -1,36 +1,40 @@ """ test feather-format compat """ - -import pytest -feather = pytest.importorskip('feather') +from distutils.version import LooseVersion +from warnings import catch_warnings import numpy as np -import pandas as pd -from pandas.io.feather_format import to_feather, read_feather -from feather import FeatherError +import pandas as pd import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, ensure_clean +import pytest +feather = pytest.importorskip('feather') +from feather import FeatherError # noqa:E402 + +from pandas.io.feather_format import to_feather, read_feather # noqa:E402 + +fv = LooseVersion(feather.__version__) -class TestFeather(tm.TestCase): - def setUp(self): - pass +@pytest.mark.single +class TestFeather(object): def check_error_on_write(self, df, exc): # check that we are raising the exception # on writing - def f(): + with pytest.raises(exc): with ensure_clean() as path: to_feather(df, path) - self.assertRaises(exc, f) - def check_round_trip(self, df): + def check_round_trip(self, df, **kwargs): with ensure_clean() as path: to_feather(df, path) - result = read_feather(path) + + with catch_warnings(record=True): + result = read_feather(path, **kwargs) assert_frame_equal(result, df) def test_error(self): @@ -41,20 +45,26 @@ def test_error(self): def test_basic(self): - df = pd.DataFrame({'a': list('abc'), - 'b': list(range(1, 4)), - 'c': np.arange(3, 6).astype('u1'), - 'd': np.arange(4.0, 7.0, dtype='float64'), - 'e': [True, False, True], - 'f': pd.Categorical(list('abc')), - 'g': pd.date_range('20130101', periods=3), - 'h': pd.date_range('20130101', periods=3, - tz='US/Eastern'), - 'i': pd.date_range('20130101', periods=3, - freq='ns')}) - + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4)), + 'uint': np.arange(3, 6).astype('u1'), + 'float': np.arange(4.0, 7.0, dtype='float64'), + 'float_with_null': [1., np.nan, 3], + 'bool': [True, False, True], + 'bool_with_null': [True, np.nan, False], + 'cat': pd.Categorical(list('abc')), + 'dt': pd.date_range('20130101', periods=3), + 'dttz': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'dt_with_null': [pd.Timestamp('20130101'), pd.NaT, + pd.Timestamp('20130103')], + 'dtns': pd.date_range('20130101', periods=3, + freq='ns')}) + + assert df.dttz.dtype.tz.zone == 'US/Eastern' self.check_round_trip(df) + @pytest.mark.skipif(fv >= LooseVersion('0.4.0'), reason='fixed in 0.4.0') def test_strided_data_issues(self): # strided data issuehttps://github.com/wesm/feather/issues/97 @@ -74,16 +84,29 @@ def test_stringify_columns(self): df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy() self.check_error_on_write(df, ValueError) + @pytest.mark.skipif(fv >= LooseVersion('0.4.0'), reason='fixed in 0.4.0') def test_unsupported(self): - # period - df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) - self.check_error_on_write(df, ValueError) + # timedelta + df = pd.DataFrame({'a': pd.timedelta_range('1 day', periods=3)}) + self.check_error_on_write(df, FeatherError) # non-strings df = pd.DataFrame({'a': ['a', 1, 2.0]}) self.check_error_on_write(df, ValueError) + def test_unsupported_other(self): + + # period + df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) + self.check_error_on_write(df, ValueError) + + @pytest.mark.skipif(fv < LooseVersion('0.4.0'), reason='new in 0.4.0') + def test_rw_nthreads(self): + + df = pd.DataFrame({'A': np.arange(100000)}) + self.check_round_trip(df, nthreads=2) + def test_write_with_index(self): df = pd.DataFrame({'A': [1, 2, 3]}) @@ -110,3 +133,13 @@ def test_write_with_index(self): df.index = [0, 1, 2] df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), self.check_error_on_write(df, ValueError) + + def test_path_pathlib(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_localpath(df.to_feather, pd.read_feather) + tm.assert_frame_equal(df, result) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py new file mode 100644 index 0000000000000..58a84ad4d47f8 --- /dev/null +++ b/pandas/tests/io/test_gbq.py @@ -0,0 +1,135 @@ +import pytest +from datetime import datetime +import pytz +import platform +from time import sleep +import os + +import numpy as np +import pandas as pd +from pandas import compat, DataFrame + +from pandas.compat import range + +pandas_gbq = pytest.importorskip('pandas_gbq') + +PROJECT_ID = None +PRIVATE_KEY_JSON_PATH = None +PRIVATE_KEY_JSON_CONTENTS = None + +if compat.PY3: + DATASET_ID = 'pydata_pandas_bq_testing_py3' +else: + DATASET_ID = 'pydata_pandas_bq_testing_py2' + +TABLE_ID = 'new_test' +DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID) + +VERSION = platform.python_version() + + +def _skip_if_no_project_id(): + if not _get_project_id(): + pytest.skip( + "Cannot run integration tests without a project id") + + +def _skip_if_no_private_key_path(): + if not _get_private_key_path(): + pytest.skip("Cannot run integration tests without a " + "private key json file path") + + +def _in_travis_environment(): + return 'TRAVIS_BUILD_DIR' in os.environ and \ + 'GBQ_PROJECT_ID' in os.environ + + +def _get_project_id(): + if _in_travis_environment(): + return os.environ.get('GBQ_PROJECT_ID') + else: + return PROJECT_ID + + +def _get_private_key_path(): + if _in_travis_environment(): + return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci', + 'travis_gbq.json']) + else: + return PRIVATE_KEY_JSON_PATH + + +def clean_gbq_environment(private_key=None): + dataset = pandas_gbq.gbq._Dataset(_get_project_id(), + private_key=private_key) + + for i in range(1, 10): + if DATASET_ID + str(i) in dataset.datasets(): + dataset_id = DATASET_ID + str(i) + table = pandas_gbq.gbq._Table(_get_project_id(), dataset_id, + private_key=private_key) + for j in range(1, 20): + if TABLE_ID + str(j) in dataset.tables(dataset_id): + table.delete(TABLE_ID + str(j)) + + dataset.delete(dataset_id) + + +def make_mixed_dataframe_v2(test_size): + # create df to test for all BQ datatypes except RECORD + bools = np.random.randint(2, size=(1, test_size)).astype(bool) + flts = np.random.randn(1, test_size) + ints = np.random.randint(1, 10, size=(1, test_size)) + strs = np.random.randint(1, 10, size=(1, test_size)).astype(str) + times = [datetime.now(pytz.timezone('US/Arizona')) + for t in range(test_size)] + return DataFrame({'bools': bools[0], + 'flts': flts[0], + 'ints': ints[0], + 'strs': strs[0], + 'times': times[0]}, + index=range(test_size)) + + +@pytest.mark.single +class TestToGBQIntegrationWithServiceAccountKeyPath(object): + + @classmethod + def setup_class(cls): + # - GLOBAL CLASS FIXTURES - + # put here any instruction you want to execute only *ONCE* *BEFORE* + # executing *ALL* tests described below. + + _skip_if_no_project_id() + _skip_if_no_private_key_path() + + clean_gbq_environment(_get_private_key_path()) + pandas_gbq.gbq._Dataset(_get_project_id(), + private_key=_get_private_key_path() + ).create(DATASET_ID + "1") + + @classmethod + def teardown_class(cls): + # - GLOBAL CLASS FIXTURES - + # put here any instruction you want to execute only *ONCE* *AFTER* + # executing all tests. + + clean_gbq_environment(_get_private_key_path()) + + def test_roundtrip(self): + destination_table = DESTINATION_TABLE + "1" + + test_size = 20001 + df = make_mixed_dataframe_v2(test_size) + + df.to_gbq(destination_table, _get_project_id(), chunksize=10000, + private_key=_get_private_key_path()) + + sleep(30) # <- Curses Google!!! + + result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" + .format(destination_table), + project_id=_get_project_id(), + private_key=_get_private_key_path()) + assert result['num_rows'][0] == test_size diff --git a/pandas/io/tests/test_html.py b/pandas/tests/io/test_html.py similarity index 76% rename from pandas/io/tests/test_html.py rename to pandas/tests/io/test_html.py index 232e68a87f16e..b18104e951504 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/tests/io/test_html.py @@ -3,13 +3,17 @@ import glob import os import re +import threading import warnings + +# imports needed for Python 3.x but will fail under Python 2.x try: - from importlib import import_module + from importlib import import_module, reload except ImportError: import_module = __import__ + from distutils.version import LooseVersion import pytest @@ -20,10 +24,11 @@ from pandas import (DataFrame, MultiIndex, read_csv, Timestamp, Index, date_range, Series) from pandas.compat import (map, zip, StringIO, string_types, BytesIO, - is_platform_windows) + is_platform_windows, PY3) from pandas.io.common import URLError, urlopen, file_path_to_url +import pandas.io.html from pandas.io.html import read_html -from pandas.parser import ParserError +from pandas._libs.parsers import ParserError import pandas.util.testing as tm from pandas.util.testing import makeCustomDataframe as mkdf, network @@ -47,7 +52,7 @@ def _skip_if_none_of(module_names): _skip_if_no(module_names) if module_names == 'bs4': import bs4 - if bs4.__version__ == LooseVersion('4.2.0'): + if LooseVersion(bs4.__version__) == LooseVersion('4.2.0'): pytest.skip("Bad version of bs4: 4.2.0") else: not_found = [module_name for module_name in module_names if not @@ -56,7 +61,7 @@ def _skip_if_none_of(module_names): pytest.skip("{0!r} not found".format(not_found)) if 'bs4' in module_names: import bs4 - if bs4.__version__ == LooseVersion('4.2.0'): + if LooseVersion(bs4.__version__) == LooseVersion('4.2.0'): pytest.skip("Bad version of bs4: 4.2.0") @@ -80,7 +85,7 @@ def assert_framelist_equal(list1, list2, *args, **kwargs): def test_bs4_version_fails(): _skip_if_none_of(('bs4', 'html5lib')) import bs4 - if bs4.__version__ == LooseVersion('4.2.0'): + if LooseVersion(bs4.__version__) == LooseVersion('4.2.0'): tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH, "spam.html"), flavor='bs4') @@ -93,14 +98,16 @@ def read_html(self, *args, **kwargs): return read_html(*args, **kwargs) -class TestReadHtml(tm.TestCase, ReadHtmlMixin): +class TestReadHtml(ReadHtmlMixin): flavor = 'bs4' spam_data = os.path.join(DATA_PATH, 'spam.html') + spam_data_kwargs = {} + if PY3: + spam_data_kwargs['encoding'] = 'UTF-8' banklist_data = os.path.join(DATA_PATH, 'banklist.html') @classmethod - def setUpClass(cls): - super(TestReadHtml, cls).setUpClass() + def setup_class(cls): _skip_if_none_of(('bs4', 'html5lib')) def test_to_html_compat(self): @@ -128,7 +135,7 @@ def test_spam_url(self): assert_framelist_equal(df1, df2) - @tm.slow + @pytest.mark.slow def test_banklist(self): df1 = self.read_html(self.banklist_data, '.*Florida.*', attrs={'id': 'table'}) @@ -144,31 +151,31 @@ def test_spam_no_types(self): df2 = self.read_html(self.spam_data, 'Unit') assert_framelist_equal(df1, df2) - self.assertEqual(df1[0].iloc[0, 0], 'Proximates') - self.assertEqual(df1[0].columns[0], 'Nutrient') + assert df1[0].iloc[0, 0] == 'Proximates' + assert df1[0].columns[0] == 'Nutrient' def test_spam_with_types(self): df1 = self.read_html(self.spam_data, '.*Water.*') df2 = self.read_html(self.spam_data, 'Unit') assert_framelist_equal(df1, df2) - self.assertEqual(df1[0].iloc[0, 0], 'Proximates') - self.assertEqual(df1[0].columns[0], 'Nutrient') + assert df1[0].iloc[0, 0] == 'Proximates' + assert df1[0].columns[0] == 'Nutrient' def test_spam_no_match(self): dfs = self.read_html(self.spam_data) for df in dfs: - tm.assertIsInstance(df, DataFrame) + assert isinstance(df, DataFrame) def test_banklist_no_match(self): dfs = self.read_html(self.banklist_data, attrs={'id': 'table'}) for df in dfs: - tm.assertIsInstance(df, DataFrame) + assert isinstance(df, DataFrame) def test_spam_header(self): df = self.read_html(self.spam_data, '.*Water.*', header=1)[0] - self.assertEqual(df.columns[0], 'Proximates') - self.assertFalse(df.empty) + assert df.columns[0] == 'Proximates' + assert not df.empty def test_skiprows_int(self): df1 = self.read_html(self.spam_data, '.*Water.*', skiprows=1) @@ -219,8 +226,8 @@ def test_skiprows_ndarray(self): assert_framelist_equal(df1, df2) def test_skiprows_invalid(self): - with tm.assertRaisesRegexp(TypeError, - 'is not a valid type for skipping rows'): + with tm.assert_raises_regex(TypeError, 'is not a valid type ' + 'for skipping rows'): self.read_html(self.spam_data, '.*Water.*', skiprows='asdf') def test_index(self): @@ -248,10 +255,10 @@ def test_infer_types(self): assert_framelist_equal(df1, df2) def test_string_io(self): - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: data1 = StringIO(f.read()) - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: data2 = StringIO(f.read()) df1 = self.read_html(data1, '.*Water.*') @@ -259,7 +266,7 @@ def test_string_io(self): assert_framelist_equal(df1, df2) def test_string(self): - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: data = f.read() df1 = self.read_html(data, '.*Water.*') @@ -268,41 +275,41 @@ def test_string(self): assert_framelist_equal(df1, df2) def test_file_like(self): - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: df1 = self.read_html(f, '.*Water.*') - with open(self.spam_data) as f: + with open(self.spam_data, **self.spam_data_kwargs) as f: df2 = self.read_html(f, 'Unit') assert_framelist_equal(df1, df2) @network def test_bad_url_protocol(self): - with tm.assertRaises(URLError): + with pytest.raises(URLError): self.read_html('git://github.com', match='.*Water.*') @network def test_invalid_url(self): try: - with tm.assertRaises(URLError): + with pytest.raises(URLError): self.read_html('http://www.a23950sdfa908sd.com', match='.*Water.*') except ValueError as e: - self.assertEqual(str(e), 'No tables found') + assert str(e) == 'No tables found' - @tm.slow + @pytest.mark.slow def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'}) - tm.assertIsInstance(dfs, list) + assert isinstance(dfs, list) for df in dfs: - tm.assertIsInstance(df, DataFrame) + assert isinstance(df, DataFrame) - @tm.slow + @pytest.mark.slow def test_invalid_table_attrs(self): url = self.banklist_data - with tm.assertRaisesRegexp(ValueError, 'No tables found'): + with tm.assert_raises_regex(ValueError, 'No tables found'): self.read_html(url, 'First Federal Bank of Florida', attrs={'id': 'tasdfable'}) @@ -310,67 +317,69 @@ def _bank_data(self, *args, **kwargs): return self.read_html(self.banklist_data, 'Metcalf', attrs={'id': 'table'}, *args, **kwargs) - @tm.slow + @pytest.mark.slow def test_multiindex_header(self): df = self._bank_data(header=[0, 1])[0] - tm.assertIsInstance(df.columns, MultiIndex) + assert isinstance(df.columns, MultiIndex) - @tm.slow + @pytest.mark.slow def test_multiindex_index(self): df = self._bank_data(index_col=[0, 1])[0] - tm.assertIsInstance(df.index, MultiIndex) + assert isinstance(df.index, MultiIndex) - @tm.slow + @pytest.mark.slow def test_multiindex_header_index(self): df = self._bank_data(header=[0, 1], index_col=[0, 1])[0] - tm.assertIsInstance(df.columns, MultiIndex) - tm.assertIsInstance(df.index, MultiIndex) + assert isinstance(df.columns, MultiIndex) + assert isinstance(df.index, MultiIndex) - @tm.slow + @pytest.mark.slow def test_multiindex_header_skiprows_tuples(self): - df = self._bank_data(header=[0, 1], skiprows=1, tupleize_cols=True)[0] - tm.assertIsInstance(df.columns, Index) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df = self._bank_data(header=[0, 1], skiprows=1, + tupleize_cols=True)[0] + assert isinstance(df.columns, Index) - @tm.slow + @pytest.mark.slow def test_multiindex_header_skiprows(self): df = self._bank_data(header=[0, 1], skiprows=1)[0] - tm.assertIsInstance(df.columns, MultiIndex) + assert isinstance(df.columns, MultiIndex) - @tm.slow + @pytest.mark.slow def test_multiindex_header_index_skiprows(self): df = self._bank_data(header=[0, 1], index_col=[0, 1], skiprows=1)[0] - tm.assertIsInstance(df.index, MultiIndex) - tm.assertIsInstance(df.columns, MultiIndex) + assert isinstance(df.index, MultiIndex) + assert isinstance(df.columns, MultiIndex) - @tm.slow + @pytest.mark.slow def test_regex_idempotency(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), match=re.compile(re.compile('Florida')), attrs={'id': 'table'}) - tm.assertIsInstance(dfs, list) + assert isinstance(dfs, list) for df in dfs: - tm.assertIsInstance(df, DataFrame) + assert isinstance(df, DataFrame) def test_negative_skiprows(self): - with tm.assertRaisesRegexp(ValueError, - r'\(you passed a negative value\)'): + with tm.assert_raises_regex(ValueError, + r'\(you passed a negative value\)'): self.read_html(self.spam_data, 'Water', skiprows=-1) @network def test_multiple_matches(self): url = 'https://docs.python.org/2/' dfs = self.read_html(url, match='Python') - self.assertTrue(len(dfs) > 1) + assert len(dfs) > 1 @network def test_python_docs_table(self): url = 'https://docs.python.org/2/' dfs = self.read_html(url, match='Python') zz = [df.iloc[0, 0][0:4] for df in dfs] - self.assertEqual(sorted(zz), sorted(['Repo', 'What'])) + assert sorted(zz) == sorted(['Repo', 'What']) - @tm.slow + @pytest.mark.slow def test_thousands_macau_stats(self): all_non_nan_table_index = -2 macau_data = os.path.join(DATA_PATH, 'macau.html') @@ -378,16 +387,16 @@ def test_thousands_macau_stats(self): attrs={'class': 'style1'}) df = dfs[all_non_nan_table_index] - self.assertFalse(any(s.isnull().any() for _, s in df.iteritems())) + assert not any(s.isna().any() for _, s in df.iteritems()) - @tm.slow + @pytest.mark.slow def test_thousands_macau_index_col(self): all_non_nan_table_index = -2 macau_data = os.path.join(DATA_PATH, 'macau.html') dfs = self.read_html(macau_data, index_col=0, header=0) df = dfs[all_non_nan_table_index] - self.assertFalse(any(s.isnull().any() for _, s in df.iteritems())) + assert not any(s.isna().any() for _, s in df.iteritems()) def test_empty_tables(self): """ @@ -518,10 +527,10 @@ def test_nyse_wsj_commas_table(self): columns = Index(['Issue(Roll over for charts and headlines)', 'Volume', 'Price', 'Chg', '% Chg']) nrows = 100 - self.assertEqual(df.shape[0], nrows) - self.assert_index_equal(df.columns, columns) + assert df.shape[0] == nrows + tm.assert_index_equal(df.columns, columns) - @tm.slow + @pytest.mark.slow def test_banklist_header(self): from pandas.io.html import _remove_whitespace @@ -536,7 +545,7 @@ def try_remove_ws(x): ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'), converters={'Updated Date': Timestamp, 'Closing Date': Timestamp}) - self.assertEqual(df.shape, ground_truth.shape) + assert df.shape == ground_truth.shape old = ['First Vietnamese American BankIn Vietnamese', 'Westernbank Puerto RicoEn Espanol', 'R-G Premier Bank of Puerto RicoEn Espanol', @@ -560,16 +569,16 @@ def try_remove_ws(x): coerce=True) tm.assert_frame_equal(converted, gtnew) - @tm.slow + @pytest.mark.slow def test_gold_canyon(self): gc = 'Gold Canyon' with open(self.banklist_data, 'r') as f: raw_text = f.read() - self.assertIn(gc, raw_text) + assert gc in raw_text df = self.read_html(self.banklist_data, 'Gold Canyon', attrs={'id': 'table'})[0] - self.assertIn(gc, df.to_string()) + assert gc in df.to_string() def test_different_number_of_rows(self): expected = """ @@ -652,9 +661,10 @@ def test_parse_dates_combine(self): def test_computer_sales_page(self): data = os.path.join(DATA_PATH, 'computer_sales_page.html') - with tm.assertRaisesRegexp(ParserError, r"Passed header=\[0,1\] are " - "too many rows for this multi_index " - "of columns"): + with tm.assert_raises_regex(ParserError, + r"Passed header=\[0,1\] are " + r"too many rows for this " + r"multi_index of columns"): self.read_html(data, header=[0, 1]) def test_wikipedia_states_table(self): @@ -662,7 +672,40 @@ def test_wikipedia_states_table(self): assert os.path.isfile(data), '%r is not a file' % data assert os.path.getsize(data), '%r is an empty file' % data result = self.read_html(data, 'Arizona', header=1)[0] - self.assertEqual(result['sq mi'].dtype, np.dtype('float64')) + assert result['sq mi'].dtype == np.dtype('float64') + + @pytest.mark.parametrize("displayed_only,exp0,exp1", [ + (True, DataFrame(["foo"]), None), + (False, DataFrame(["foo bar baz qux"]), DataFrame(["foo"]))]) + def test_displayed_only(self, displayed_only, exp0, exp1): + # GH 20027 + data = StringIO(""" + +
+ + + +
+ foo + bar + baz + qux +
+ + + + +
foo
+ + """) + + dfs = self.read_html(data, displayed_only=displayed_only) + tm.assert_frame_equal(dfs[0], exp0) + + if exp1 is not None: + tm.assert_frame_equal(dfs[1], exp1) + else: + assert len(dfs) == 1 # Should not parse hidden table def test_decimal_rows(self): @@ -691,7 +734,7 @@ def test_decimal_rows(self): def test_bool_header_arg(self): # GH 6114 for arg in [True, False]: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): read_html(self.spam_data, header=arg) def test_converters(self): @@ -760,18 +803,29 @@ def test_keep_default_na(self): html_df = read_html(html_data, keep_default_na=True)[0] tm.assert_frame_equal(expected_df, html_df) + def test_multiple_header_rows(self): + # Issue #13434 + expected_df = DataFrame(data=[("Hillary", 68, "D"), + ("Bernie", 74, "D"), + ("Donald", 69, "R")]) + expected_df.columns = [["Unnamed: 0_level_0", "Age", "Party"], + ["Name", "Unnamed: 1_level_1", + "Unnamed: 2_level_1"]] + html = expected_df.to_html(index=False) + html_df = read_html(html, )[0] + tm.assert_frame_equal(expected_df, html_df) + def _lang_enc(filename): return os.path.splitext(os.path.basename(filename))[0].split('_') -class TestReadHtmlEncoding(tm.TestCase): +class TestReadHtmlEncoding(object): files = glob.glob(os.path.join(DATA_PATH, 'html_encoding', '*.html')) flavor = 'bs4' @classmethod - def setUpClass(cls): - super(TestReadHtmlEncoding, cls).setUpClass() + def setup_class(cls): _skip_if_none_of((cls.flavor, 'html5lib')) def read_html(self, *args, **kwargs): @@ -812,17 +866,16 @@ class TestReadHtmlEncodingLxml(TestReadHtmlEncoding): flavor = 'lxml' @classmethod - def setUpClass(cls): - super(TestReadHtmlEncodingLxml, cls).setUpClass() + def setup_class(cls): + super(TestReadHtmlEncodingLxml, cls).setup_class() _skip_if_no(cls.flavor) -class TestReadHtmlLxml(tm.TestCase, ReadHtmlMixin): +class TestReadHtmlLxml(ReadHtmlMixin): flavor = 'lxml' @classmethod - def setUpClass(cls): - super(TestReadHtmlLxml, cls).setUpClass() + def setup_class(cls): _skip_if_no('lxml') def test_data_fail(self): @@ -830,24 +883,31 @@ def test_data_fail(self): spam_data = os.path.join(DATA_PATH, 'spam.html') banklist_data = os.path.join(DATA_PATH, 'banklist.html') - with tm.assertRaises(XMLSyntaxError): + with pytest.raises(XMLSyntaxError): self.read_html(spam_data) - with tm.assertRaises(XMLSyntaxError): + with pytest.raises(XMLSyntaxError): self.read_html(banklist_data) def test_works_on_valid_markup(self): filename = os.path.join(DATA_PATH, 'valid_markup.html') dfs = self.read_html(filename, index_col=0) - tm.assertIsInstance(dfs, list) - tm.assertIsInstance(dfs[0], DataFrame) + assert isinstance(dfs, list) + assert isinstance(dfs[0], DataFrame) - @tm.slow + @pytest.mark.slow def test_fallback_success(self): _skip_if_none_of(('bs4', 'html5lib')) banklist_data = os.path.join(DATA_PATH, 'banklist.html') self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib']) + def test_to_html_timestamp(self): + rng = date_range('2000-01-01', periods=10) + df = DataFrame(np.random.randn(10, 4), index=rng) + + result = df.to_html() + assert '2000-01-01' in result + def test_parse_dates_list(self): df = DataFrame({'date': date_range('1/1/2001', periods=10)}) expected = df.to_html() @@ -869,10 +929,43 @@ def test_computer_sales_page(self): data = os.path.join(DATA_PATH, 'computer_sales_page.html') self.read_html(data, header=[0, 1]) + @pytest.mark.parametrize("displayed_only,exp0,exp1", [ + (True, DataFrame(["foo"]), None), + (False, DataFrame(["foo bar baz qux"]), DataFrame(["foo"]))]) + def test_displayed_only(self, displayed_only, exp0, exp1): + # GH 20027 + data = StringIO(""" + + + + + +
+ foo + bar + baz + qux +
+ + + + +
foo
+ + """) + + dfs = self.read_html(data, displayed_only=displayed_only) + tm.assert_frame_equal(dfs[0], exp0) + + if exp1 is not None: + tm.assert_frame_equal(dfs[1], exp1) + else: + assert len(dfs) == 1 # Should not parse hidden table + def test_invalid_flavor(): url = 'google.com' - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): read_html(url, 'google', flavor='not a* valid**++ flaver') @@ -885,7 +978,7 @@ def get_elements_from_file(url, element='table'): return soup.find_all(element) -@tm.slow +@pytest.mark.slow def test_bs4_finds_tables(): filepath = os.path.join(DATA_PATH, "spam.html") with warnings.catch_warnings(): @@ -900,13 +993,13 @@ def get_lxml_elements(url, element): return doc.xpath('.//{0}'.format(element)) -@tm.slow +@pytest.mark.slow def test_lxml_finds_tables(): filepath = os.path.join(DATA_PATH, "spam.html") assert get_lxml_elements(filepath, 'table') -@tm.slow +@pytest.mark.slow def test_lxml_finds_tbody(): filepath = os.path.join(DATA_PATH, "spam.html") assert get_lxml_elements(filepath, 'tbody') @@ -918,3 +1011,85 @@ def test_same_ordering(): dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) + + +class ErrorThread(threading.Thread): + def run(self): + try: + super(ErrorThread, self).run() + except Exception as e: + self.err = e + else: + self.err = None + + +@pytest.mark.slow +def test_importcheck_thread_safety(): + # see gh-16928 + + # force import check by reinitalising global vars in html.py + pytest.importorskip('lxml') + reload(pandas.io.html) + + filename = os.path.join(DATA_PATH, 'valid_markup.html') + helper_thread1 = ErrorThread(target=read_html, args=(filename,)) + helper_thread2 = ErrorThread(target=read_html, args=(filename,)) + + helper_thread1.start() + helper_thread2.start() + + while helper_thread1.is_alive() or helper_thread2.is_alive(): + pass + assert None is helper_thread1.err is helper_thread2.err + + +def test_parse_failure_unseekable(): + # Issue #17975 + _skip_if_no('lxml') + _skip_if_no('bs4') + + class UnseekableStringIO(StringIO): + def seekable(self): + return False + + good = UnseekableStringIO(''' +
spam
eggs
''') + bad = UnseekableStringIO(''' +
spameggs
''') + + assert read_html(good) + assert read_html(bad, flavor='bs4') + + bad.seek(0) + + with pytest.raises(ValueError, + match='passed a non-rewindable file object'): + read_html(bad) + + +def test_parse_failure_rewinds(): + # Issue #17975 + _skip_if_no('lxml') + _skip_if_no('bs4') + + class MockFile(object): + def __init__(self, data): + self.data = data + self.at_end = False + + def read(self, size=None): + data = '' if self.at_end else self.data + self.at_end = True + return data + + def seek(self, offset): + self.at_end = False + + def seekable(self): + return True + + good = MockFile('
spam
eggs
') + bad = MockFile('
spameggs
') + + assert read_html(good) + assert read_html(bad) diff --git a/pandas/io/tests/test_packers.py b/pandas/tests/io/test_packers.py similarity index 78% rename from pandas/io/tests/test_packers.py rename to pandas/tests/io/test_packers.py index 4bb6f4a69bab3..919b34dc09f6f 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -1,5 +1,6 @@ import pytest +from warnings import catch_warnings import os import datetime import numpy as np @@ -9,8 +10,9 @@ from pandas import compat from pandas.compat import u, PY3 from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, - date_range, period_range, Index, Categorical) -from pandas.core.common import PerformanceWarning + date_range, period_range, Index, Categorical, + Period, Interval) +from pandas.errors import PerformanceWarning from pandas.io.packers import to_msgpack, read_msgpack import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, @@ -22,7 +24,8 @@ from pandas.tests.test_panel import assert_panel_equal import pandas -from pandas import Timestamp, NaT, tslib +from pandas import Timestamp, NaT +from pandas._libs.tslib import iNaT nan = np.nan @@ -41,6 +44,22 @@ _ZLIB_INSTALLED = True +@pytest.fixture(scope='module') +def current_packers_data(): + # our current version packers data + from pandas.tests.io.generate_legacy_storage_files import ( + create_msgpack_data) + return create_msgpack_data() + + +@pytest.fixture(scope='module') +def all_packers_data(): + # our all of our current version packers data + from pandas.tests.io.generate_legacy_storage_files import ( + create_data) + return create_data() + + def check_arbitrary(a, b): if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): @@ -72,12 +91,12 @@ def check_arbitrary(a, b): assert(a == b) -class TestPackers(tm.TestCase): +class TestPackers(object): - def setUp(self): + def setup_method(self, method): self.path = '__%s__.msg' % tm.rands(10) - def tearDown(self): + def teardown_method(self, method): pass def encode_decode(self, x, compress=None, **kwargs): @@ -116,6 +135,16 @@ def test_string_io(self): result = read_msgpack(p) tm.assert_frame_equal(result, df) + def test_path_pathlib(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_msgpack, read_msgpack) + tm.assert_frame_equal(df, result) + def test_iterator_with_string_io(self): dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)] @@ -130,9 +159,9 @@ class A(object): def __init__(self): self.read = 0 - tm.assertRaises(ValueError, read_msgpack, path_or_buf=None) - tm.assertRaises(ValueError, read_msgpack, path_or_buf={}) - tm.assertRaises(ValueError, read_msgpack, path_or_buf=A()) + pytest.raises(ValueError, read_msgpack, path_or_buf=None) + pytest.raises(ValueError, read_msgpack, path_or_buf={}) + pytest.raises(ValueError, read_msgpack, path_or_buf=A()) class TestNumpy(TestPackers): @@ -145,17 +174,26 @@ def test_numpy_scalar_float(self): def test_numpy_scalar_complex(self): x = np.complex64(np.random.rand() + 1j * np.random.rand()) x_rec = self.encode_decode(x) - self.assertTrue(np.allclose(x, x_rec)) + assert np.allclose(x, x_rec) def test_scalar_float(self): x = np.random.rand() x_rec = self.encode_decode(x) tm.assert_almost_equal(x, x_rec) + def test_scalar_bool(self): + x = np.bool_(1) + x_rec = self.encode_decode(x) + tm.assert_almost_equal(x, x_rec) + + x = np.bool_(0) + x_rec = self.encode_decode(x) + tm.assert_almost_equal(x, x_rec) + def test_scalar_complex(self): x = np.random.rand() + 1j * np.random.rand() x_rec = self.encode_decode(x) - self.assertTrue(np.allclose(x, x_rec)) + assert np.allclose(x, x_rec) def test_list_numpy_float(self): x = [np.float32(np.random.rand()) for i in range(5)] @@ -168,13 +206,13 @@ def test_list_numpy_float(self): def test_list_numpy_float_complex(self): if not hasattr(np, 'complex128'): - pytest.skip('numpy cant handle complex128') + pytest.skip('numpy can not handle complex128') x = [np.float32(np.random.rand()) for i in range(5)] + \ [np.complex128(np.random.rand() + 1j * np.random.rand()) for i in range(5)] x_rec = self.encode_decode(x) - self.assertTrue(np.allclose(x, x_rec)) + assert np.allclose(x, x_rec) def test_list_float(self): x = [np.random.rand() for i in range(5)] @@ -189,7 +227,7 @@ def test_list_float_complex(self): x = [np.random.rand() for i in range(5)] + \ [(np.random.rand() + 1j * np.random.rand()) for i in range(5)] x_rec = self.encode_decode(x) - self.assertTrue(np.allclose(x, x_rec)) + assert np.allclose(x, x_rec) def test_dict_float(self): x = {'foo': 1.0, 'bar': 2.0} @@ -199,9 +237,10 @@ def test_dict_float(self): def test_dict_complex(self): x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j} x_rec = self.encode_decode(x) - self.assertEqual(x, x_rec) + tm.assert_dict_equal(x, x_rec) + for key in x: - self.assertEqual(type(x[key]), type(x_rec[key])) + tm.assert_class_equal(x[key], x_rec[key], obj="complex value") def test_dict_numpy_float(self): x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)} @@ -212,9 +251,10 @@ def test_dict_numpy_complex(self): x = {'foo': np.complex128(1.0 + 1.0j), 'bar': np.complex128(2.0 + 2.0j)} x_rec = self.encode_decode(x) - self.assertEqual(x, x_rec) + tm.assert_dict_equal(x, x_rec) + for key in x: - self.assertEqual(type(x[key]), type(x_rec[key])) + tm.assert_class_equal(x[key], x_rec[key], obj="numpy complex128") def test_numpy_array_float(self): @@ -229,11 +269,11 @@ def test_numpy_array_float(self): def test_numpy_array_complex(self): x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128) x_rec = self.encode_decode(x) - self.assertTrue(all(map(lambda x, y: x == y, x, x_rec)) and - x.dtype == x_rec.dtype) + assert (all(map(lambda x, y: x == y, x, x_rec)) and + x.dtype == x_rec.dtype) def test_list_mixed(self): - x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo')] + x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo'), np.bool_(1)] x_rec = self.encode_decode(x) # current msgpack cannot distinguish list/tuple tm.assert_almost_equal(tuple(x), x_rec) @@ -250,17 +290,17 @@ def test_timestamp(self): '20130101'), Timestamp('20130101', tz='US/Eastern'), Timestamp('201301010501')]: i_rec = self.encode_decode(i) - self.assertEqual(i, i_rec) + assert i == i_rec def test_nat(self): nat_rec = self.encode_decode(NaT) - self.assertIs(NaT, nat_rec) + assert NaT is nat_rec def test_datetimes(self): # fails under 2.6/win32 (np.datetime64 seems broken) - if LooseVersion(sys.version) < '2.7': + if LooseVersion(sys.version) < LooseVersion('2.7'): pytest.skip('2.6 with np.datetime64 is broken') for i in [datetime.datetime(2013, 1, 1), @@ -268,7 +308,7 @@ def test_datetimes(self): datetime.date(2013, 1, 1), np.datetime64(datetime.datetime(2013, 1, 5, 2, 15))]: i_rec = self.encode_decode(i) - self.assertEqual(i, i_rec) + assert i == i_rec def test_timedeltas(self): @@ -276,13 +316,26 @@ def test_timedeltas(self): datetime.timedelta(days=1, seconds=10), np.timedelta64(1000000)]: i_rec = self.encode_decode(i) - self.assertEqual(i, i_rec) + assert i == i_rec + + def test_periods(self): + # 13463 + for i in [Period('2010-09', 'M'), Period('2014-Q1', 'Q')]: + i_rec = self.encode_decode(i) + assert i == i_rec + + def test_intervals(self): + # 19967 + for i in [Interval(0, 1), Interval(0, 1, 'left'), + Interval(10, 25., 'right')]: + i_rec = self.encode_decode(i) + assert i == i_rec class TestIndex(TestPackers): - def setUp(self): - super(TestIndex, self).setUp() + def setup_method(self, method): + super(TestIndex, self).setup_method(method) self.d = { 'string': tm.makeStringIndex(100), @@ -295,6 +348,9 @@ def setUp(self): 'period': Index(period_range('2012-1-1', freq='M', periods=3)), 'date2': Index(date_range('2013-01-1', periods=10)), 'bdate': Index(bdate_range('2013-01-02', periods=10)), + 'cat': tm.makeCategoricalIndex(100), + 'interval': tm.makeIntervalIndex(100), + 'timedelta': tm.makeTimedeltaIndex(100, 'H') } self.mi = { @@ -308,36 +364,43 @@ def test_basic_index(self): for s, i in self.d.items(): i_rec = self.encode_decode(i) - self.assert_index_equal(i, i_rec) + tm.assert_index_equal(i, i_rec) # datetime with no freq (GH5506) i = Index([Timestamp('20130101'), Timestamp('20130103')]) i_rec = self.encode_decode(i) - self.assert_index_equal(i, i_rec) + tm.assert_index_equal(i, i_rec) # datetime with timezone i = Index([Timestamp('20130101 9:00:00'), Timestamp( '20130103 11:00:00')]).tz_localize('US/Eastern') i_rec = self.encode_decode(i) - self.assert_index_equal(i, i_rec) + tm.assert_index_equal(i, i_rec) def test_multi_index(self): for s, i in self.mi.items(): i_rec = self.encode_decode(i) - self.assert_index_equal(i, i_rec) + tm.assert_index_equal(i, i_rec) def test_unicode(self): i = tm.makeUnicodeIndex(100) i_rec = self.encode_decode(i) - self.assert_index_equal(i, i_rec) + tm.assert_index_equal(i, i_rec) + + def categorical_index(self): + # GH15487 + df = DataFrame(np.random.randn(10, 2)) + df = df.astype({0: 'category'}).set_index(0) + result = self.encode_decode(df) + tm.assert_frame_equal(result, df) class TestSeries(TestPackers): - def setUp(self): - super(TestSeries, self).setUp() + def setup_method(self, method): + super(TestSeries, self).setup_method(method) self.d = {} @@ -349,7 +412,7 @@ def setUp(self): s.name = 'object' self.d['object'] = s - s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) + s = Series(iNaT, dtype='M8[ns]', index=range(5)) self.d['date'] = s data = { @@ -363,6 +426,7 @@ def setUp(self): 'G': [Timestamp('20130102', tz='US/Eastern')] * 5, 'H': Categorical([1, 2, 3, 4, 5]), 'I': Categorical([1, 2, 3, 4, 5], ordered=True), + 'J': (np.bool_(1), 2, 3, 4, 5), } self.d['float'] = Series(data['A']) @@ -372,6 +436,7 @@ def setUp(self): self.d['dt_tz'] = Series(data['G']) self.d['cat_ordered'] = Series(data['H']) self.d['cat_unordered'] = Series(data['I']) + self.d['numpy_bool_mixed'] = Series(data['J']) def test_basic(self): @@ -384,8 +449,8 @@ def test_basic(self): class TestCategorical(TestPackers): - def setUp(self): - super(TestCategorical, self).setUp() + def setup_method(self, method): + super(TestCategorical, self).setup_method(method) self.d = {} @@ -407,8 +472,8 @@ def test_basic(self): class TestNDFrame(TestPackers): - def setUp(self): - super(TestNDFrame, self).setUp() + def setup_method(self, method): + super(TestNDFrame, self).setup_method(method) data = { 'A': [0., 1., 2., 3., np.nan], @@ -427,9 +492,10 @@ def setUp(self): 'int': DataFrame(dict(A=data['B'], B=Series(data['B']) + 1)), 'mixed': DataFrame(data)} - self.panel = { - 'float': Panel(dict(ItemA=self.frame['float'], - ItemB=self.frame['float'] + 1))} + with catch_warnings(record=True): + self.panel = { + 'float': Panel(dict(ItemA=self.frame['float'], + ItemB=self.frame['float'] + 1))} def test_basic_frame(self): @@ -439,9 +505,10 @@ def test_basic_frame(self): def test_basic_panel(self): - for s, i in self.panel.items(): - i_rec = self.encode_decode(i) - assert_panel_equal(i, i_rec) + with catch_warnings(record=True): + for s, i in self.panel.items(): + i_rec = self.encode_decode(i) + assert_panel_equal(i, i_rec) def test_multi(self): @@ -458,7 +525,7 @@ def test_multi(self): l = [self.frame['float'], self.frame['float'] .A, self.frame['float'].B, None] l_rec = self.encode_decode(l) - self.assertIsInstance(l_rec, tuple) + assert isinstance(l_rec, tuple) check_arbitrary(l, l_rec) def test_iterator(self): @@ -508,7 +575,7 @@ def _check_roundtrip(self, obj, comparator, **kwargs): # currently these are not implemetned # i_rec = self.encode_decode(obj) # comparator(obj, i_rec, **kwargs) - self.assertRaises(NotImplementedError, self.encode_decode, obj) + pytest.raises(NotImplementedError, self.encode_decode, obj) def test_sparse_series(self): @@ -549,7 +616,7 @@ class TestCompression(TestPackers): """See https://github.com/pandas-dev/pandas/pull/9783 """ - def setUp(self): + def setup_method(self, method): try: from sqlalchemy import create_engine self._create_sql_engine = create_engine @@ -558,7 +625,7 @@ def setUp(self): else: self._SQLALCHEMY_INSTALLED = True - super(TestCompression, self).setUp() + super(TestCompression, self).setup_method(method) data = { 'A': np.arange(1000, dtype=np.float64), 'B': np.arange(1000, dtype=np.int32), @@ -567,8 +634,8 @@ def setUp(self): 'E': [datetime.timedelta(days=x) for x in range(1000)], } self.frame = { - 'float': DataFrame(dict((k, data[k]) for k in ['A', 'A'])), - 'int': DataFrame(dict((k, data[k]) for k in ['B', 'B'])), + 'float': DataFrame({k: data[k] for k in ['A', 'A']}), + 'int': DataFrame({k: data[k] for k in ['B', 'B']}), 'mixed': DataFrame(data), } @@ -585,7 +652,7 @@ def _test_compression(self, compress): assert_frame_equal(value, expected) # make sure that we can write to the new frames for block in value._data.blocks: - self.assertTrue(block.values.flags.writeable) + assert block.values.flags.writeable def test_compression_zlib(self): if not _ZLIB_INSTALLED: @@ -634,22 +701,20 @@ def decompress(ob): # make sure that we can write to the new frames even though # we needed to copy the data for block in value._data.blocks: - self.assertTrue(block.values.flags.writeable) + assert block.values.flags.writeable # mutate the data in some way block.values[0] += rhs[block.dtype] for w in ws: # check the messages from our warnings - self.assertEqual( - str(w.message), - 'copying data after decompressing; this may mean that' - ' decompress is caching its result', - ) + assert str(w.message) == ('copying data after decompressing; ' + 'this may mean that decompress is ' + 'caching its result') for buf, control_buf in zip(not_garbage, control): # make sure none of our mutations above affected the # original buffers - self.assertEqual(buf, control_buf) + assert buf == control_buf def test_compression_warns_when_decompress_caches_zlib(self): if not _ZLIB_INSTALLED: @@ -667,14 +732,14 @@ def _test_small_strings_no_warn(self, compress): empty_unpacked = self.encode_decode(empty, compress=compress) tm.assert_numpy_array_equal(empty_unpacked, empty) - self.assertTrue(empty_unpacked.flags.writeable) + assert empty_unpacked.flags.writeable char = np.array([ord(b'a')], dtype='uint8') with tm.assert_produces_warning(None): char_unpacked = self.encode_decode(char, compress=compress) tm.assert_numpy_array_equal(char_unpacked, char) - self.assertTrue(char_unpacked.flags.writeable) + assert char_unpacked.flags.writeable # if this test fails I am sorry because the interpreter is now in a # bad state where b'a' points to 98 == ord(b'b'). char_unpacked[0] = ord(b'b') @@ -682,7 +747,7 @@ def _test_small_strings_no_warn(self, compress): # we compare the ord of bytes b'a' with unicode u'a' because the should # always be the same (unless we were able to mutate the shared # character singleton in which case ord(b'a') == ord(b'b'). - self.assertEqual(ord(b'a'), ord(u'a')) + assert ord(b'a') == ord(u'a') tm.assert_numpy_array_equal( char_unpacked, np.array([ord(b'b')], dtype='uint8'), @@ -704,15 +769,15 @@ def test_readonly_axis_blosc(self): pytest.skip('no blosc') df1 = DataFrame({'A': list('abcd')}) df2 = DataFrame(df1, index=[1., 2., 3., 4.]) - self.assertTrue(1 in self.encode_decode(df1['A'], compress='blosc')) - self.assertTrue(1. in self.encode_decode(df2['A'], compress='blosc')) + assert 1 in self.encode_decode(df1['A'], compress='blosc') + assert 1. in self.encode_decode(df2['A'], compress='blosc') def test_readonly_axis_zlib(self): # GH11880 df1 = DataFrame({'A': list('abcd')}) df2 = DataFrame(df1, index=[1., 2., 3., 4.]) - self.assertTrue(1 in self.encode_decode(df1['A'], compress='zlib')) - self.assertTrue(1. in self.encode_decode(df2['A'], compress='zlib')) + assert 1 in self.encode_decode(df1['A'], compress='zlib') + assert 1. in self.encode_decode(df2['A'], compress='zlib') def test_readonly_axis_blosc_to_sql(self): # GH11880 @@ -745,8 +810,8 @@ def test_readonly_axis_zlib_to_sql(self): class TestEncoding(TestPackers): - def setUp(self): - super(TestEncoding, self).setUp() + def setup_method(self, method): + super(TestEncoding, self).setup_method(method) data = { 'A': [compat.u('\u2019')] * 1000, 'B': np.arange(1000, dtype=np.int32), @@ -756,8 +821,8 @@ def setUp(self): 'G': [400] * 1000 } self.frame = { - 'float': DataFrame(dict((k, data[k]) for k in ['A', 'A'])), - 'int': DataFrame(dict((k, data[k]) for k in ['B', 'B'])), + 'float': DataFrame({k: data[k] for k in ['A', 'A']}), + 'int': DataFrame({k: data[k] for k in ['B', 'B']}), 'mixed': DataFrame(data), } self.utf_encodings = ['utf8', 'utf16', 'utf32'] @@ -773,12 +838,21 @@ def test_default_encoding(self): for frame in compat.itervalues(self.frame): result = frame.to_msgpack() expected = frame.to_msgpack(encoding='utf8') - self.assertEqual(result, expected) + assert result == expected result = self.encode_decode(frame) assert_frame_equal(result, frame) -class TestMsgpack(): +def legacy_packers_versions(): + # yield the packers versions + path = tm.get_data_path('legacy_msgpack') + for v in os.listdir(path): + p = os.path.join(path, v) + if os.path.isdir(p): + yield v + + +class TestMsgpack(object): """ How to add msgpack tests: @@ -788,48 +862,38 @@ class TestMsgpack(): $ python generate_legacy_storage_files.py msgpack 3. Move the created pickle to "data/legacy_msgpack/" directory. - - NOTE: TestMsgpack can't be a subclass of tm.Testcase to use test generator. - http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class """ - @classmethod - def setup_class(cls): - from pandas.io.tests.generate_legacy_storage_files import ( - create_msgpack_data, create_data) - cls.data = create_msgpack_data() - cls.all_data = create_data() - cls.path = u('__%s__.msgpack' % tm.rands(10)) - cls.minimum_structure = {'series': ['float', 'int', 'mixed', - 'ts', 'mi', 'dup'], - 'frame': ['float', 'int', 'mixed', 'mi'], - 'panel': ['float'], - 'index': ['int', 'date', 'period'], - 'mi': ['reg2']} - - def check_min_structure(self, data): + minimum_structure = {'series': ['float', 'int', 'mixed', + 'ts', 'mi', 'dup'], + 'frame': ['float', 'int', 'mixed', 'mi'], + 'panel': ['float'], + 'index': ['int', 'date', 'period'], + 'mi': ['reg2']} + + def check_min_structure(self, data, version): for typ, v in self.minimum_structure.items(): assert typ in data, '"{0}" not found in unpacked data'.format(typ) for kind in v: msg = '"{0}" not found in data["{1}"]'.format(kind, typ) assert kind in data[typ], msg - def compare(self, vf, version): + def compare(self, current_data, all_data, vf, version): # GH12277 encoding default used to be latin-1, now utf-8 - if LooseVersion(version) < '0.18.0': + if LooseVersion(version) < LooseVersion('0.18.0'): data = read_msgpack(vf, encoding='latin-1') else: data = read_msgpack(vf) - self.check_min_structure(data) + self.check_min_structure(data, version) for typ, dv in data.items(): - assert typ in self.all_data, ('unpacked data contains ' - 'extra key "{0}"' - .format(typ)) + assert typ in all_data, ('unpacked data contains ' + 'extra key "{0}"' + .format(typ)) for dt, result in dv.items(): - assert dt in self.all_data[typ], ('data["{0}"] contains extra ' - 'key "{1}"'.format(typ, dt)) + assert dt in current_data[typ], ('data["{0}"] contains extra ' + 'key "{1}"'.format(typ, dt)) try: - expected = self.data[typ][dt] + expected = current_data[typ][dt] except KeyError: continue @@ -847,7 +911,7 @@ def compare(self, vf, version): def compare_series_dt_tz(self, result, expected, typ, version): # 8260 # dtype is object < 0.17.0 - if LooseVersion(version) < '0.17.0': + if LooseVersion(version) < LooseVersion('0.17.0'): expected = expected.astype(object) tm.assert_series_equal(result, expected) else: @@ -856,15 +920,17 @@ def compare_series_dt_tz(self, result, expected, typ, version): def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): # 8260 # dtype is object < 0.17.0 - if LooseVersion(version) < '0.17.0': + if LooseVersion(version) < LooseVersion('0.17.0'): expected = expected.astype(object) tm.assert_frame_equal(result, expected) else: tm.assert_frame_equal(result, expected) - def read_msgpacks(self, version): + @pytest.mark.parametrize('version', legacy_packers_versions()) + def test_msgpacks_legacy(self, current_packers_data, all_packers_data, + version): - pth = tm.get_data_path('legacy_msgpack/{0}'.format(str(version))) + pth = tm.get_data_path('legacy_msgpack/{0}'.format(version)) n = 0 for f in os.listdir(pth): # GH12142 0.17 files packed in P2 can't be read in P3 @@ -873,19 +939,11 @@ def read_msgpacks(self, version): continue vf = os.path.join(pth, f) try: - self.compare(vf, version) + with catch_warnings(record=True): + self.compare(current_packers_data, all_packers_data, + vf, version) except ImportError: # blosc not installed continue n += 1 assert n > 0, 'Msgpack files are not tested' - - def test_msgpack(self): - msgpack_path = tm.get_data_path('legacy_msgpack') - n = 0 - for v in os.listdir(msgpack_path): - pth = os.path.join(msgpack_path, v) - if os.path.isdir(pth): - yield self.read_msgpacks, v - n += 1 - assert n > 0, 'Msgpack files are not tested' diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py new file mode 100644 index 0000000000000..11cbea8ce6331 --- /dev/null +++ b/pandas/tests/io/test_parquet.py @@ -0,0 +1,504 @@ +""" test parquet compat """ + +import pytest +import datetime +from distutils.version import LooseVersion +from warnings import catch_warnings + +import numpy as np +import pandas as pd +from pandas.compat import PY3, is_platform_windows, is_platform_mac +from pandas.io.parquet import (to_parquet, read_parquet, get_engine, + PyArrowImpl, FastParquetImpl) +from pandas.util import testing as tm + +try: + import pyarrow # noqa + _HAVE_PYARROW = True +except ImportError: + _HAVE_PYARROW = False + +try: + import fastparquet # noqa + _HAVE_FASTPARQUET = True +except ImportError: + _HAVE_FASTPARQUET = False + + +# setup engines & skips +@pytest.fixture(params=[ + pytest.param('fastparquet', + marks=pytest.mark.skipif(not _HAVE_FASTPARQUET, + reason='fastparquet is ' + 'not installed')), + pytest.param('pyarrow', + marks=pytest.mark.skipif(not _HAVE_PYARROW, + reason='pyarrow is ' + 'not installed'))]) +def engine(request): + return request.param + + +@pytest.fixture +def pa(): + if not _HAVE_PYARROW: + pytest.skip("pyarrow is not installed") + return 'pyarrow' + + +@pytest.fixture +def pa_lt_070(): + if not _HAVE_PYARROW: + pytest.skip("pyarrow is not installed") + if LooseVersion(pyarrow.__version__) >= LooseVersion('0.7.0'): + pytest.skip("pyarrow is >= 0.7.0") + return 'pyarrow' + + +@pytest.fixture +def pa_ge_070(): + if not _HAVE_PYARROW: + pytest.skip("pyarrow is not installed") + if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'): + pytest.skip("pyarrow is < 0.7.0") + return 'pyarrow' + + +@pytest.fixture +def fp(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + return 'fastparquet' + + +@pytest.fixture +def fp_lt_014(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): + pytest.skip("fastparquet is >= 0.1.4") + return 'fastparquet' + + +@pytest.fixture +def df_compat(): + return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'}) + + +@pytest.fixture +def df_cross_compat(): + df = pd.DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + # 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.date_range('20130101', periods=3), + # 'g': pd.date_range('20130101', periods=3, + # tz='US/Eastern'), + # 'h': pd.date_range('20130101', periods=3, freq='ns') + }) + return df + + +@pytest.fixture +def df_full(): + return pd.DataFrame( + {'string': list('abc'), + 'string_with_nan': ['a', np.nan, 'c'], + 'string_with_none': ['a', None, 'c'], + 'bytes': [b'foo', b'bar', b'baz'], + 'unicode': [u'foo', u'bar', u'baz'], + 'int': list(range(1, 4)), + 'uint': np.arange(3, 6).astype('u1'), + 'float': np.arange(4.0, 7.0, dtype='float64'), + 'float_with_nan': [2., np.nan, 3.], + 'bool': [True, False, True], + 'datetime': pd.date_range('20130101', periods=3), + 'datetime_with_nat': [pd.Timestamp('20130101'), + pd.NaT, + pd.Timestamp('20130103')]}) + + +def check_round_trip(df, engine=None, path=None, + write_kwargs=None, read_kwargs=None, + expected=None, check_names=True, + repeat=2): + """Verify parquet serializer and deserializer produce the same results. + + Performs a pandas to disk and disk to pandas round trip, + then compares the 2 resulting DataFrames to verify equality. + + Parameters + ---------- + df: Dataframe + engine: str, optional + 'pyarrow' or 'fastparquet' + path: str, optional + write_kwargs: dict of str:str, optional + read_kwargs: dict of str:str, optional + expected: DataFrame, optional + Expected deserialization result, otherwise will be equal to `df` + check_names: list of str, optional + Closed set of column names to be compared + repeat: int, optional + How many times to repeat the test + """ + + write_kwargs = write_kwargs or {'compression': None} + read_kwargs = read_kwargs or {} + + if expected is None: + expected = df + + if engine: + write_kwargs['engine'] = engine + read_kwargs['engine'] = engine + + def compare(repeat): + for _ in range(repeat): + df.to_parquet(path, **write_kwargs) + with catch_warnings(record=True): + actual = read_parquet(path, **read_kwargs) + tm.assert_frame_equal(expected, actual, + check_names=check_names) + + if path is None: + with tm.ensure_clean() as path: + compare(repeat) + else: + compare(repeat) + + +def test_invalid_engine(df_compat): + with pytest.raises(ValueError): + check_round_trip(df_compat, 'foo', 'bar') + + +def test_options_py(df_compat, pa): + # use the set option + + with pd.option_context('io.parquet.engine', 'pyarrow'): + check_round_trip(df_compat) + + +def test_options_fp(df_compat, fp): + # use the set option + + with pd.option_context('io.parquet.engine', 'fastparquet'): + check_round_trip(df_compat) + + +def test_options_auto(df_compat, fp, pa): + # use the set option + + with pd.option_context('io.parquet.engine', 'auto'): + check_round_trip(df_compat) + + +def test_options_get_engine(fp, pa): + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'pyarrow'): + assert isinstance(get_engine('auto'), PyArrowImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'fastparquet'): + assert isinstance(get_engine('auto'), FastParquetImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'auto'): + assert isinstance(get_engine('auto'), PyArrowImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + +@pytest.mark.xfail(is_platform_windows() or is_platform_mac(), + reason="reading pa metadata failing on Windows/mac") +def test_cross_engine_pa_fp(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=pa, compression=None) + + result = read_parquet(path, engine=fp) + tm.assert_frame_equal(result, df) + + result = read_parquet(path, engine=fp, columns=['a', 'd']) + tm.assert_frame_equal(result, df[['a', 'd']]) + + +def test_cross_engine_fp_pa(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=fp, compression=None) + + with catch_warnings(record=True): + result = read_parquet(path, engine=pa) + tm.assert_frame_equal(result, df) + + result = read_parquet(path, engine=pa, columns=['a', 'd']) + tm.assert_frame_equal(result, df[['a', 'd']]) + + +class Base(object): + + def check_error_on_write(self, df, engine, exc): + # check that we are raising the exception on writing + with tm.ensure_clean() as path: + with pytest.raises(exc): + to_parquet(df, path, engine, compression=None) + + +class TestBasic(Base): + + def test_error(self, engine): + for obj in [pd.Series([1, 2, 3]), 1, 'foo', pd.Timestamp('20130101'), + np.array([1, 2, 3])]: + self.check_error_on_write(obj, engine, ValueError) + + def test_columns_dtypes(self, engine): + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4))}) + + # unicode + df.columns = [u'foo', u'bar'] + check_round_trip(df, engine) + + def test_columns_dtypes_invalid(self, engine): + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4))}) + + # numeric + df.columns = [0, 1] + self.check_error_on_write(df, engine, ValueError) + + if PY3: + # bytes on PY3, on PY2 these are str + df.columns = [b'foo', b'bar'] + self.check_error_on_write(df, engine, ValueError) + + # python object + df.columns = [datetime.datetime(2011, 1, 1, 0, 0), + datetime.datetime(2011, 1, 1, 1, 1)] + self.check_error_on_write(df, engine, ValueError) + + @pytest.mark.parametrize('compression', [None, 'gzip', 'snappy', 'brotli']) + def test_compression(self, engine, compression): + + if compression == 'snappy': + pytest.importorskip('snappy') + + elif compression == 'brotli': + pytest.importorskip('brotli') + + df = pd.DataFrame({'A': [1, 2, 3]}) + check_round_trip(df, engine, write_kwargs={'compression': compression}) + + def test_read_columns(self, engine): + # GH18154 + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4))}) + + expected = pd.DataFrame({'string': list('abc')}) + check_round_trip(df, engine, expected=expected, + read_kwargs={'columns': ['string']}) + + def test_write_index(self, engine): + check_names = engine != 'fastparquet' + + if engine == 'pyarrow': + import pyarrow + if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'): + pytest.skip("pyarrow is < 0.7.0") + + df = pd.DataFrame({'A': [1, 2, 3]}) + check_round_trip(df, engine) + + indexes = [ + [2, 3, 4], + pd.date_range('20130101', periods=3), + list('abc'), + [1, 3, 4], + ] + # non-default index + for index in indexes: + df.index = index + check_round_trip(df, engine, check_names=check_names) + + # index with meta-data + df.index = [0, 1, 2] + df.index.name = 'foo' + check_round_trip(df, engine) + + def test_write_multiindex(self, pa_ge_070): + # Not suppoprted in fastparquet as of 0.1.3 or older pyarrow version + engine = pa_ge_070 + + df = pd.DataFrame({'A': [1, 2, 3]}) + index = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]) + df.index = index + check_round_trip(df, engine) + + def test_write_column_multiindex(self, engine): + # column multi-index + mi_columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]) + df = pd.DataFrame(np.random.randn(4, 3), columns=mi_columns) + self.check_error_on_write(df, engine, ValueError) + + def test_multiindex_with_columns(self, pa_ge_070): + engine = pa_ge_070 + dates = pd.date_range('01-Jan-2018', '01-Dec-2018', freq='MS') + df = pd.DataFrame(np.random.randn(2 * len(dates), 3), + columns=list('ABC')) + index1 = pd.MultiIndex.from_product( + [['Level1', 'Level2'], dates], + names=['level', 'date']) + index2 = index1.copy(names=None) + for index in [index1, index2]: + df.index = index + + check_round_trip(df, engine) + check_round_trip(df, engine, read_kwargs={'columns': ['A', 'B']}, + expected=df[['A', 'B']]) + + +class TestParquetPyArrow(Base): + + def test_basic(self, pa, df_full): + + df = df_full + + # additional supported types for pyarrow + import pyarrow + if LooseVersion(pyarrow.__version__) >= LooseVersion('0.7.0'): + df['datetime_tz'] = pd.date_range('20130101', periods=3, + tz='Europe/Brussels') + df['bool_with_none'] = [True, None, True] + + check_round_trip(df, pa) + + @pytest.mark.xfail(reason="pyarrow fails on this (ARROW-1883)") + def test_basic_subset_columns(self, pa, df_full): + # GH18628 + + df = df_full + # additional supported types for pyarrow + df['datetime_tz'] = pd.date_range('20130101', periods=3, + tz='Europe/Brussels') + + check_round_trip(df, pa, expected=df[['string', 'int']], + read_kwargs={'columns': ['string', 'int']}) + + def test_duplicate_columns(self, pa): + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), + columns=list('aaa')).copy() + self.check_error_on_write(df, pa, ValueError) + + def test_unsupported(self, pa): + # period + df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) + self.check_error_on_write(df, pa, ValueError) + + # timedelta + df = pd.DataFrame({'a': pd.timedelta_range('1 day', + periods=3)}) + self.check_error_on_write(df, pa, NotImplementedError) + + # mixed python objects + df = pd.DataFrame({'a': ['a', 1, 2.0]}) + self.check_error_on_write(df, pa, ValueError) + + def test_categorical(self, pa_ge_070): + pa = pa_ge_070 + + # supported in >= 0.7.0 + df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) + + # de-serialized as object + expected = df.assign(a=df.a.astype(object)) + check_round_trip(df, pa, expected=expected) + + def test_categorical_unsupported(self, pa_lt_070): + pa = pa_lt_070 + + # supported in >= 0.7.0 + df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) + self.check_error_on_write(df, pa, NotImplementedError) + + def test_s3_roundtrip(self, df_compat, s3_resource, pa): + # GH #19134 + check_round_trip(df_compat, pa, + path='s3://pandas-test/pyarrow.parquet') + + +class TestParquetFastParquet(Base): + + def test_basic(self, fp, df_full): + df = df_full + + # additional supported types for fastparquet + if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): + df['datetime_tz'] = pd.date_range('20130101', periods=3, + tz='US/Eastern') + df['timedelta'] = pd.timedelta_range('1 day', periods=3) + check_round_trip(df, fp) + + @pytest.mark.skip(reason="not supported") + def test_duplicate_columns(self, fp): + + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), + columns=list('aaa')).copy() + self.check_error_on_write(df, fp, ValueError) + + def test_bool_with_none(self, fp): + df = pd.DataFrame({'a': [True, None, False]}) + expected = pd.DataFrame({'a': [1.0, np.nan, 0.0]}, dtype='float16') + check_round_trip(df, fp, expected=expected) + + def test_unsupported(self, fp): + + # period + df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) + self.check_error_on_write(df, fp, ValueError) + + # mixed + df = pd.DataFrame({'a': ['a', 1, 2.0]}) + self.check_error_on_write(df, fp, ValueError) + + def test_categorical(self, fp): + if LooseVersion(fastparquet.__version__) < LooseVersion("0.1.3"): + pytest.skip("CategoricalDtype not supported for older fp") + df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) + check_round_trip(df, fp) + + def test_datetime_tz(self, fp_lt_014): + + # fastparquet<0.1.4 doesn't preserve tz + df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, + tz='US/Eastern')}) + # warns on the coercion + with catch_warnings(record=True): + check_round_trip(df, fp_lt_014, + expected=df.astype('datetime64[ns]')) + + def test_filter_row_groups(self, fp): + d = {'a': list(range(0, 3))} + df = pd.DataFrame(d) + with tm.ensure_clean() as path: + df.to_parquet(path, fp, compression=None, + row_group_offsets=1) + result = read_parquet(path, fp, filters=[('a', '==', 0)]) + assert len(result) == 1 + + def test_s3_roundtrip(self, df_compat, s3_resource, fp): + # GH #19134 + check_round_trip(df_compat, fp, + path='s3://pandas-test/fastparquet.parquet') diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py new file mode 100644 index 0000000000000..2ba3e174404c7 --- /dev/null +++ b/pandas/tests/io/test_pickle.py @@ -0,0 +1,494 @@ +# pylint: disable=E1101,E1103,W0232 + +""" +manage legacy pickle tests + +How to add pickle tests: + +1. Install pandas version intended to output the pickle. + +2. Execute "generate_legacy_storage_files.py" to create the pickle. +$ python generate_legacy_storage_files.py pickle + +3. Move the created pickle to "data/legacy_pickle/" directory. +""" + +import pytest +from warnings import catch_warnings + +import os +from distutils.version import LooseVersion +import pandas as pd +from pandas import Index +from pandas.compat import is_platform_little_endian +import pandas +import pandas.util.testing as tm +import pandas.util._test_decorators as td +from pandas.tseries.offsets import Day, MonthEnd +import shutil +import sys + + +@pytest.fixture(scope='module') +def current_pickle_data(): + # our current version pickle data + from pandas.tests.io.generate_legacy_storage_files import ( + create_pickle_data) + return create_pickle_data() + + +# --------------------- +# comparison functions +# --------------------- +def compare_element(result, expected, typ, version=None): + if isinstance(expected, Index): + tm.assert_index_equal(expected, result) + return + + if typ.startswith('sp_'): + comparator = getattr(tm, "assert_%s_equal" % typ) + comparator(result, expected, exact_indices=False) + elif typ == 'timestamp': + if expected is pd.NaT: + assert result is pd.NaT + else: + assert result == expected + assert result.freq == expected.freq + else: + comparator = getattr(tm, "assert_%s_equal" % + typ, tm.assert_almost_equal) + comparator(result, expected) + + +def compare(data, vf, version): + + # py3 compat when reading py2 pickle + try: + data = pandas.read_pickle(vf) + except (ValueError) as e: + if 'unsupported pickle protocol:' in str(e): + # trying to read a py3 pickle in py2 + return + else: + raise + + m = globals() + for typ, dv in data.items(): + for dt, result in dv.items(): + try: + expected = data[typ][dt] + except (KeyError): + if version in ('0.10.1', '0.11.0') and dt == 'reg': + break + else: + raise + + # use a specific comparator + # if available + comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt) + + comparator = m.get(comparator, m['compare_element']) + comparator(result, expected, typ, version) + return data + + +def compare_sp_series_ts(res, exp, typ, version): + # SparseTimeSeries integrated into SparseSeries in 0.12.0 + # and deprecated in 0.17.0 + if version and LooseVersion(version) <= LooseVersion("0.12.0"): + tm.assert_sp_series_equal(res, exp, check_series_type=False) + else: + tm.assert_sp_series_equal(res, exp) + + +def compare_series_ts(result, expected, typ, version): + # GH 7748 + tm.assert_series_equal(result, expected) + assert result.index.freq == expected.index.freq + assert not result.index.freq.normalize + tm.assert_series_equal(result > 0, expected > 0) + + # GH 9291 + freq = result.index.freq + assert freq + Day(1) == Day(2) + + res = freq + pandas.Timedelta(hours=1) + assert isinstance(res, pandas.Timedelta) + assert res == pandas.Timedelta(days=1, hours=1) + + res = freq + pandas.Timedelta(nanoseconds=1) + assert isinstance(res, pandas.Timedelta) + assert res == pandas.Timedelta(days=1, nanoseconds=1) + + +def compare_series_dt_tz(result, expected, typ, version): + # 8260 + # dtype is object < 0.17.0 + if LooseVersion(version) < LooseVersion('0.17.0'): + expected = expected.astype(object) + tm.assert_series_equal(result, expected) + else: + tm.assert_series_equal(result, expected) + + +def compare_series_cat(result, expected, typ, version): + # Categorical dtype is added in 0.15.0 + # ordered is changed in 0.16.0 + if LooseVersion(version) < LooseVersion('0.15.0'): + tm.assert_series_equal(result, expected, check_dtype=False, + check_categorical=False) + elif LooseVersion(version) < LooseVersion('0.16.0'): + tm.assert_series_equal(result, expected, check_categorical=False) + else: + tm.assert_series_equal(result, expected) + + +def compare_frame_dt_mixed_tzs(result, expected, typ, version): + # 8260 + # dtype is object < 0.17.0 + if LooseVersion(version) < LooseVersion('0.17.0'): + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + +def compare_frame_cat_onecol(result, expected, typ, version): + # Categorical dtype is added in 0.15.0 + # ordered is changed in 0.16.0 + if LooseVersion(version) < LooseVersion('0.15.0'): + tm.assert_frame_equal(result, expected, check_dtype=False, + check_categorical=False) + elif LooseVersion(version) < LooseVersion('0.16.0'): + tm.assert_frame_equal(result, expected, check_categorical=False) + else: + tm.assert_frame_equal(result, expected) + + +def compare_frame_cat_and_float(result, expected, typ, version): + compare_frame_cat_onecol(result, expected, typ, version) + + +def compare_index_period(result, expected, typ, version): + tm.assert_index_equal(result, expected) + assert isinstance(result.freq, MonthEnd) + assert result.freq == MonthEnd() + assert result.freqstr == 'M' + tm.assert_index_equal(result.shift(2), expected.shift(2)) + + +def compare_sp_frame_float(result, expected, typ, version): + if LooseVersion(version) <= LooseVersion('0.18.1'): + tm.assert_sp_frame_equal(result, expected, exact_indices=False, + check_dtype=False) + else: + tm.assert_sp_frame_equal(result, expected) + + +# --------------------- +# tests +# --------------------- +def legacy_pickle_versions(): + # yield the pickle versions + path = tm.get_data_path('legacy_pickle') + for v in os.listdir(path): + p = os.path.join(path, v) + if os.path.isdir(p): + for f in os.listdir(p): + yield (v, f) + + +@pytest.mark.parametrize('version, f', legacy_pickle_versions()) +def test_pickles(current_pickle_data, version, f): + if not is_platform_little_endian(): + pytest.skip("known failure on non-little endian") + + vf = tm.get_data_path('legacy_pickle/{}/{}'.format(version, f)) + with catch_warnings(record=True): + compare(current_pickle_data, vf, version) + + +def test_round_trip_current(current_pickle_data): + + try: + import cPickle as c_pickle + + def c_pickler(obj, path): + with open(path, 'wb') as fh: + c_pickle.dump(obj, fh, protocol=-1) + + def c_unpickler(path): + with open(path, 'rb') as fh: + fh.seek(0) + return c_pickle.load(fh) + except: + c_pickler = None + c_unpickler = None + + import pickle as python_pickle + + def python_pickler(obj, path): + with open(path, 'wb') as fh: + python_pickle.dump(obj, fh, protocol=-1) + + def python_unpickler(path): + with open(path, 'rb') as fh: + fh.seek(0) + return python_pickle.load(fh) + + data = current_pickle_data + for typ, dv in data.items(): + for dt, expected in dv.items(): + + for writer in [pd.to_pickle, c_pickler, python_pickler]: + if writer is None: + continue + + with tm.ensure_clean() as path: + + # test writing with each pickler + writer(expected, path) + + # test reading with each unpickler + result = pd.read_pickle(path) + compare_element(result, expected, typ) + + if c_unpickler is not None: + result = c_unpickler(path) + compare_element(result, expected, typ) + + result = python_unpickler(path) + compare_element(result, expected, typ) + + +def test_pickle_v0_14_1(): + + cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, + categories=['a', 'b', 'c', 'd']) + pickle_path = os.path.join(tm.get_data_path(), + 'categorical_0_14_1.pickle') + # This code was executed once on v0.14.1 to generate the pickle: + # + # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], + # name='foobar') + # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) + # + tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + + +def test_pickle_v0_15_2(): + # ordered -> _ordered + # GH 9347 + + cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, + categories=['a', 'b', 'c', 'd']) + pickle_path = os.path.join(tm.get_data_path(), + 'categorical_0_15_2.pickle') + # This code was executed once on v0.15.2 to generate the pickle: + # + # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], + # name='foobar') + # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) + # + tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + + +def test_pickle_path_pathlib(): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + +def test_pickle_path_localpath(): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + +# --------------------- +# test pickle compression +# --------------------- + +@pytest.fixture +def get_random_path(): + return u'__%s__.pickle' % tm.rands(10) + + +class TestCompression(object): + + _compression_to_extension = { + None: ".none", + 'gzip': '.gz', + 'bz2': '.bz2', + 'zip': '.zip', + 'xz': '.xz', + } + + def compress_file(self, src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return + + if compression == 'gzip': + import gzip + f = gzip.open(dest_path, "w") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(dest_path, "w") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(dest_path, "w", + compression=zipfile.ZIP_DEFLATED) + zip_file.write(src_path, os.path.basename(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(dest_path, "w") + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + + if compression != "zip": + with open(src_path, "rb") as fh: + f.write(fh.read()) + f.close() + + def test_write_explicit(self, compression_no_zip, get_random_path): + base = get_random_path + path1 = base + ".compressed" + path2 = base + ".raw" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file + df.to_pickle(p1, compression=compression_no_zip) + + # decompress + with tm.decompress_file(p1, compression=compression_no_zip) as f: + with open(p2, "wb") as fh: + fh.write(f.read()) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) + def test_write_explicit_bad(self, compression, get_random_path): + with tm.assert_raises_regex(ValueError, + "Unrecognized compression type"): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, compression=compression) + + @pytest.mark.parametrize('ext', [ + '', '.gz', '.bz2', '.no_compress', + pytest.param('.xz', marks=td.skip_if_no_lzma) + ]) + def test_write_infer(self, ext, get_random_path): + base = get_random_path + path1 = base + ext + path2 = base + ".raw" + compression = None + for c in self._compression_to_extension: + if self._compression_to_extension[c] == ext: + compression = c + break + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file by inferred compression method + df.to_pickle(p1) + + # decompress + with tm.decompress_file(p1, compression=compression) as f: + with open(p2, "wb") as fh: + fh.write(f.read()) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + def test_read_explicit(self, compression, get_random_path): + base = get_random_path + path1 = base + ".raw" + path2 = base + ".compressed" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + self.compress_file(p1, p2, compression=compression) + + # read compressed file + df2 = pd.read_pickle(p2, compression=compression) + + tm.assert_frame_equal(df, df2) + + @pytest.mark.parametrize('ext', [ + '', '.gz', '.bz2', '.zip', '.no_compress', + pytest.param('.xz', marks=td.skip_if_no_lzma) + ]) + def test_read_infer(self, ext, get_random_path): + base = get_random_path + path1 = base + ".raw" + path2 = base + ext + compression = None + for c in self._compression_to_extension: + if self._compression_to_extension[c] == ext: + compression = c + break + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + self.compress_file(p1, p2, compression=compression) + + # read compressed file by inferred compression method + df2 = pd.read_pickle(p2) + + tm.assert_frame_equal(df, df2) + + +# --------------------- +# test pickle compression +# --------------------- + +class TestProtocol(object): + + @pytest.mark.parametrize('protocol', [-1, 0, 1, 2]) + def test_read(self, protocol, get_random_path): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, protocol=protocol) + df2 = pd.read_pickle(path) + tm.assert_frame_equal(df, df2) + + @pytest.mark.parametrize('protocol', [3, 4]) + @pytest.mark.skipif(sys.version_info[:2] >= (3, 4), + reason="Testing invalid parameters for " + "Python 2.x and 3.y (y < 4).") + def test_read_bad_versions(self, protocol, get_random_path): + # For Python 2.x (respectively 3.y with y < 4), [expected] + # HIGHEST_PROTOCOL should be 2 (respectively 3). Hence, the protocol + # parameter should not exceed 2 (respectively 3). + if sys.version_info[:2] < (3, 0): + expect_hp = 2 + else: + expect_hp = 3 + with tm.assert_raises_regex(ValueError, + "pickle protocol %d asked for; the highest" + " available protocol is %d" % (protocol, + expect_hp)): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, protocol=protocol) diff --git a/pandas/io/tests/test_pytables.py b/pandas/tests/io/test_pytables.py similarity index 67% rename from pandas/io/tests/test_pytables.py rename to pandas/tests/io/test_pytables.py index 3fa0eb2ef52dc..e690b1e302d8b 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1,56 +1,44 @@ import pytest -import sys import os -import warnings import tempfile from contextlib import contextmanager +from warnings import catch_warnings +from distutils.version import LooseVersion import datetime +from datetime import timedelta + import numpy as np -import pandas import pandas as pd from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index, RangeIndex, Categorical, bdate_range, date_range, timedelta_range, Index, DatetimeIndex, - isnull) - -from pandas.compat import is_platform_windows, PY3, PY35 -from pandas.formats.printing import pprint_thing - -tables = pytest.importorskip('tables') -from pandas.io.pytables import TableIterator -from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, - IncompatibilityWarning, PerformanceWarning, - AttributeConflictWarning, DuplicateWarning, - PossibleDataLossError, ClosedFileError) + isna, compat, concat, Timestamp) -from pandas.io import pytables as pytables import pandas.util.testing as tm -from pandas.util.testing import (assert_panel4d_equal, - assert_panel_equal, +import pandas.util._test_decorators as td +from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, - assert_produces_warning, set_timezone) -from pandas import concat, Timestamp -from pandas import compat -from pandas.compat import range, lrange, u -try: - import tables -except ImportError: - pytest.skip('no pytables') +from pandas.compat import (is_platform_windows, is_platform_little_endian, + PY35, PY36, BytesIO, text_type, + range, lrange, u) +from pandas.io.formats.printing import pprint_thing +from pandas.core.dtypes.common import is_categorical_dtype -from distutils.version import LooseVersion +tables = pytest.importorskip('tables') +from pandas.io import pytables as pytables # noqa:E402 +from pandas.io.pytables import (TableIterator, # noqa:E402 + HDFStore, get_store, Term, read_hdf, + PossibleDataLossError, ClosedFileError) -_default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2' - else 'zlib') +_default_compressor = ('blosc' if LooseVersion(tables.__version__) >= + LooseVersion('2.2') else 'zlib') -# testing on windows/py3 seems to fault -# for using compression -skip_compression = PY3 and is_platform_windows() # contextmanager to ensure the file cleanup @@ -129,61 +117,50 @@ def _maybe_remove(store, key): pass -@contextmanager -def compat_assert_produces_warning(w): - """ don't produce a warning under PY3 """ - if compat.PY3: - yield - else: - with tm.assert_produces_warning(expected_warning=w, - check_stacklevel=False): - yield - - -class Base(tm.TestCase): +class Base(object): @classmethod - def setUpClass(cls): - super(Base, cls).setUpClass() + def setup_class(cls): # Pytables 3.0.0 deprecates lots of things tm.reset_testing_mode() @classmethod - def tearDownClass(cls): - super(Base, cls).tearDownClass() + def teardown_class(cls): # Pytables 3.0.0 deprecates lots of things tm.set_testing_mode() - def setUp(self): - warnings.filterwarnings(action='ignore', category=FutureWarning) - + def setup_method(self, method): self.path = 'tmp.__%s__.h5' % tm.rands(10) - def tearDown(self): + def teardown_method(self, method): pass -class TestHDFStore(Base, tm.TestCase): +@pytest.mark.single +class TestHDFStore(Base): def test_factory_fun(self): path = create_tempfile(self.path) try: - with get_store(path) as tbl: - raise ValueError('blah') + with catch_warnings(record=True): + with get_store(path) as tbl: + raise ValueError('blah') except ValueError: pass finally: safe_remove(path) try: - with get_store(path) as tbl: - tbl['a'] = tm.makeDataFrame() - - with get_store(path) as tbl: - self.assertEqual(len(tbl), 1) - self.assertEqual(type(tbl['a']), DataFrame) + with catch_warnings(record=True): + with get_store(path) as tbl: + tbl['a'] = tm.makeDataFrame() + + with catch_warnings(record=True): + with get_store(path) as tbl: + assert len(tbl) == 1 + assert type(tbl['a']) == DataFrame finally: safe_remove(self.path) @@ -202,8 +179,8 @@ def test_context(self): tbl['a'] = tm.makeDataFrame() with HDFStore(path) as tbl: - self.assertEqual(len(tbl), 1) - self.assertEqual(type(tbl['a']), DataFrame) + assert len(tbl) == 1 + assert type(tbl['a']) == DataFrame finally: safe_remove(path) @@ -223,8 +200,10 @@ def roundtrip(key, obj, **kwargs): o = tm.makeDataFrame() assert_frame_equal(o, roundtrip('frame', o)) - o = tm.makePanel() - assert_panel_equal(o, roundtrip('panel', o)) + with catch_warnings(record=True): + + o = tm.makePanel() + assert_panel_equal(o, roundtrip('panel', o)) # table df = DataFrame(dict(A=lrange(5), B=lrange(5))) @@ -323,20 +302,20 @@ def test_api(self): # invalid df = tm.makeDataFrame() - self.assertRaises(ValueError, df.to_hdf, path, - 'df', append=True, format='f') - self.assertRaises(ValueError, df.to_hdf, path, - 'df', append=True, format='fixed') + pytest.raises(ValueError, df.to_hdf, path, + 'df', append=True, format='f') + pytest.raises(ValueError, df.to_hdf, path, + 'df', append=True, format='fixed') - self.assertRaises(TypeError, df.to_hdf, path, - 'df', append=True, format='foo') - self.assertRaises(TypeError, df.to_hdf, path, - 'df', append=False, format='bar') + pytest.raises(TypeError, df.to_hdf, path, + 'df', append=True, format='foo') + pytest.raises(TypeError, df.to_hdf, path, + 'df', append=False, format='bar') # File path doesn't exist path = "" - self.assertRaises(compat.FileNotFoundError, - read_hdf, path, 'df') + pytest.raises(compat.FileNotFoundError, + read_hdf, path, 'df') def test_api_default_format(self): @@ -344,41 +323,41 @@ def test_api_default_format(self): with ensure_clean_store(self.path) as store: df = tm.makeDataFrame() - pandas.set_option('io.hdf.default_format', 'fixed') + pd.set_option('io.hdf.default_format', 'fixed') _maybe_remove(store, 'df') store.put('df', df) - self.assertFalse(store.get_storer('df').is_table) - self.assertRaises(ValueError, store.append, 'df2', df) + assert not store.get_storer('df').is_table + pytest.raises(ValueError, store.append, 'df2', df) - pandas.set_option('io.hdf.default_format', 'table') + pd.set_option('io.hdf.default_format', 'table') _maybe_remove(store, 'df') store.put('df', df) - self.assertTrue(store.get_storer('df').is_table) + assert store.get_storer('df').is_table _maybe_remove(store, 'df2') store.append('df2', df) - self.assertTrue(store.get_storer('df').is_table) + assert store.get_storer('df').is_table - pandas.set_option('io.hdf.default_format', None) + pd.set_option('io.hdf.default_format', None) with ensure_clean_path(self.path) as path: df = tm.makeDataFrame() - pandas.set_option('io.hdf.default_format', 'fixed') + pd.set_option('io.hdf.default_format', 'fixed') df.to_hdf(path, 'df') - with get_store(path) as store: - self.assertFalse(store.get_storer('df').is_table) - self.assertRaises(ValueError, df.to_hdf, path, 'df2', append=True) + with HDFStore(path) as store: + assert not store.get_storer('df').is_table + pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True) - pandas.set_option('io.hdf.default_format', 'table') + pd.set_option('io.hdf.default_format', 'table') df.to_hdf(path, 'df3') with HDFStore(path) as store: - self.assertTrue(store.get_storer('df3').is_table) + assert store.get_storer('df3').is_table df.to_hdf(path, 'df4', append=True) with HDFStore(path) as store: - self.assertTrue(store.get_storer('df4').is_table) + assert store.get_storer('df4').is_table - pandas.set_option('io.hdf.default_format', None) + pd.set_option('io.hdf.default_format', None) def test_keys(self): @@ -386,29 +365,33 @@ def test_keys(self): store['a'] = tm.makeTimeSeries() store['b'] = tm.makeStringSeries() store['c'] = tm.makeDataFrame() - store['d'] = tm.makePanel() - store['foo/bar'] = tm.makePanel() - self.assertEqual(len(store), 5) + with catch_warnings(record=True): + store['d'] = tm.makePanel() + store['foo/bar'] = tm.makePanel() + assert len(store) == 5 expected = set(['/a', '/b', '/c', '/d', '/foo/bar']) - self.assertTrue(set(store.keys()) == expected) - self.assertTrue(set(store) == expected) + assert set(store.keys()) == expected + assert set(store) == expected def test_iter_empty(self): with ensure_clean_store(self.path) as store: # GH 12221 - self.assertTrue(list(store) == []) + assert list(store) == [] def test_repr(self): with ensure_clean_store(self.path) as store: repr(store) + store.info() store['a'] = tm.makeTimeSeries() store['b'] = tm.makeStringSeries() store['c'] = tm.makeDataFrame() - store['d'] = tm.makePanel() - store['foo/bar'] = tm.makePanel() - store.append('e', tm.makePanel()) + + with catch_warnings(record=True): + store['d'] = tm.makePanel() + store['foo/bar'] = tm.makePanel() + store.append('e', tm.makePanel()) df = tm.makeDataFrame() df['obj1'] = 'foo' @@ -423,17 +406,18 @@ def test_repr(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) - warnings.filterwarnings('ignore', category=PerformanceWarning) - store['df'] = df - warnings.filterwarnings('always', category=PerformanceWarning) + # PerformanceWarning + with catch_warnings(record=True): + store['df'] = df # make a random group in hdf space store._handle.create_group(store._handle.root, 'bah') - repr(store) - str(store) + assert store.filename in repr(store) + assert store.filename in str(store) + store.info() # storers with ensure_clean_store(self.path) as store: @@ -451,19 +435,18 @@ def test_contains(self): store['a'] = tm.makeTimeSeries() store['b'] = tm.makeDataFrame() store['foo/bar'] = tm.makeDataFrame() - self.assertIn('a', store) - self.assertIn('b', store) - self.assertNotIn('c', store) - self.assertIn('foo/bar', store) - self.assertIn('/foo/bar', store) - self.assertNotIn('/foo/b', store) - self.assertNotIn('bar', store) - - # GH 2694 - warnings.filterwarnings( - 'ignore', category=tables.NaturalNameWarning) - store['node())'] = tm.makeDataFrame() - self.assertIn('node())', store) + assert 'a' in store + assert 'b' in store + assert 'c' not in store + assert 'foo/bar' in store + assert '/foo/bar' in store + assert '/foo/b' not in store + assert 'bar' not in store + + # gh-2694: tables.NaturalNameWarning + with catch_warnings(record=True): + store['node())'] = tm.makeDataFrame() + assert 'node())' in store def test_versioning(self): @@ -474,9 +457,9 @@ def test_versioning(self): _maybe_remove(store, 'df1') store.append('df1', df[:10]) store.append('df1', df[10:]) - self.assertEqual(store.root.a._v_attrs.pandas_version, '0.15.2') - self.assertEqual(store.root.b._v_attrs.pandas_version, '0.15.2') - self.assertEqual(store.root.df1._v_attrs.pandas_version, '0.15.2') + assert store.root.a._v_attrs.pandas_version == '0.15.2' + assert store.root.b._v_attrs.pandas_version == '0.15.2' + assert store.root.df1._v_attrs.pandas_version == '0.15.2' # write a file and wipe its versioning _maybe_remove(store, 'df2') @@ -485,7 +468,7 @@ def test_versioning(self): # this is an error because its table_type is appendable, but no # version info store.get_node('df2')._v_attrs.pandas_version = None - self.assertRaises(Exception, store.select, 'df2') + pytest.raises(Exception, store.select, 'df2') def test_mode(self): @@ -497,11 +480,11 @@ def check(mode): # constructor if mode in ['r', 'r+']: - self.assertRaises(IOError, HDFStore, path, mode=mode) + pytest.raises(IOError, HDFStore, path, mode=mode) else: store = HDFStore(path, mode=mode) - self.assertEqual(store._handle.mode, mode) + assert store._handle.mode == mode store.close() with ensure_clean_path(self.path) as path: @@ -511,25 +494,25 @@ def check(mode): def f(): with HDFStore(path, mode=mode) as store: # noqa pass - self.assertRaises(IOError, f) + pytest.raises(IOError, f) else: with HDFStore(path, mode=mode) as store: - self.assertEqual(store._handle.mode, mode) + assert store._handle.mode == mode with ensure_clean_path(self.path) as path: # conv write if mode in ['r', 'r+']: - self.assertRaises(IOError, df.to_hdf, - path, 'df', mode=mode) + pytest.raises(IOError, df.to_hdf, + path, 'df', mode=mode) df.to_hdf(path, 'df', mode='w') else: df.to_hdf(path, 'df', mode=mode) # conv read if mode in ['w']: - self.assertRaises(ValueError, read_hdf, - path, 'df', mode=mode) + pytest.raises(ValueError, read_hdf, + path, 'df', mode=mode) else: result = read_hdf(path, 'df', mode=mode) assert_frame_equal(result, df) @@ -556,43 +539,43 @@ def test_reopen_handle(self): store['a'] = tm.makeTimeSeries() # invalid mode change - self.assertRaises(PossibleDataLossError, store.open, 'w') + pytest.raises(PossibleDataLossError, store.open, 'w') store.close() - self.assertFalse(store.is_open) + assert not store.is_open # truncation ok here store.open('w') - self.assertTrue(store.is_open) - self.assertEqual(len(store), 0) + assert store.is_open + assert len(store) == 0 store.close() - self.assertFalse(store.is_open) + assert not store.is_open store = HDFStore(path, mode='a') store['a'] = tm.makeTimeSeries() # reopen as read store.open('r') - self.assertTrue(store.is_open) - self.assertEqual(len(store), 1) - self.assertEqual(store._mode, 'r') + assert store.is_open + assert len(store) == 1 + assert store._mode == 'r' store.close() - self.assertFalse(store.is_open) + assert not store.is_open # reopen as append store.open('a') - self.assertTrue(store.is_open) - self.assertEqual(len(store), 1) - self.assertEqual(store._mode, 'a') + assert store.is_open + assert len(store) == 1 + assert store._mode == 'a' store.close() - self.assertFalse(store.is_open) + assert not store.is_open # reopen as append (again) store.open('a') - self.assertTrue(store.is_open) - self.assertEqual(len(store), 1) - self.assertEqual(store._mode, 'a') + assert store.is_open + assert len(store) == 1 + assert store._mode == 'a' store.close() - self.assertFalse(store.is_open) + assert not store.is_open def test_open_args(self): @@ -612,7 +595,7 @@ def test_open_args(self): store.close() # the file should not have actually been written - self.assertFalse(os.path.exists(path)) + assert not os.path.exists(path) def test_flush(self): @@ -633,7 +616,7 @@ def test_get(self): right = store['/a'] tm.assert_series_equal(left, right) - self.assertRaises(KeyError, store.get, 'b') + pytest.raises(KeyError, store.get, 'b') def test_getattr(self): @@ -654,10 +637,10 @@ def test_getattr(self): tm.assert_frame_equal(result, df) # errors - self.assertRaises(AttributeError, getattr, store, 'd') + pytest.raises(AttributeError, getattr, store, 'd') for x in ['mode', 'path', 'handle', 'complib']: - self.assertRaises(AttributeError, getattr, store, x) + pytest.raises(AttributeError, getattr, store, x) # not stores for x in ['mode', 'path', 'handle', 'complib']: @@ -677,17 +660,17 @@ def test_put(self): store.put('c', df[:10], format='table') # not OK, not a table - self.assertRaises( + pytest.raises( ValueError, store.put, 'b', df[10:], append=True) # node does not currently exist, test _is_table_type returns False # in this case # _maybe_remove(store, 'f') - # self.assertRaises(ValueError, store.put, 'f', df[10:], + # pytest.raises(ValueError, store.put, 'f', df[10:], # append=True) # can't put to a table (use append instead) - self.assertRaises(ValueError, store.put, 'c', df[10:], append=True) + pytest.raises(ValueError, store.put, 'c', df[10:], append=True) # overwrite table store.put('c', df[:10], format='table', append=False) @@ -729,25 +712,112 @@ def test_put_compression(self): tm.assert_frame_equal(store['c'], df) # can't compress if format='fixed' - self.assertRaises(ValueError, store.put, 'b', df, - format='fixed', complib='zlib') + pytest.raises(ValueError, store.put, 'b', df, + format='fixed', complib='zlib') + @td.skip_if_windows_python_3 def test_put_compression_blosc(self): - tm.skip_if_no_package('tables', '2.2', app='blosc support') - if skip_compression: - pytest.skip("skipping on windows/PY3") - df = tm.makeTimeDataFrame() with ensure_clean_store(self.path) as store: # can't compress if format='fixed' - self.assertRaises(ValueError, store.put, 'b', df, - format='fixed', complib='blosc') + pytest.raises(ValueError, store.put, 'b', df, + format='fixed', complib='blosc') store.put('c', df, format='table', complib='blosc') tm.assert_frame_equal(store['c'], df) + def test_complibs_default_settings(self): + # GH15943 + df = tm.makeDataFrame() + + # Set complevel and check if complib is automatically set to + # default value + with ensure_clean_path(self.path) as tmpfile: + df.to_hdf(tmpfile, 'df', complevel=9) + result = pd.read_hdf(tmpfile, 'df') + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode='r') as h5file: + for node in h5file.walk_nodes(where='/df', classname='Leaf'): + assert node.filters.complevel == 9 + assert node.filters.complib == 'zlib' + + # Set complib and check to see if compression is disabled + with ensure_clean_path(self.path) as tmpfile: + df.to_hdf(tmpfile, 'df', complib='zlib') + result = pd.read_hdf(tmpfile, 'df') + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode='r') as h5file: + for node in h5file.walk_nodes(where='/df', classname='Leaf'): + assert node.filters.complevel == 0 + assert node.filters.complib is None + + # Check if not setting complib or complevel results in no compression + with ensure_clean_path(self.path) as tmpfile: + df.to_hdf(tmpfile, 'df') + result = pd.read_hdf(tmpfile, 'df') + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode='r') as h5file: + for node in h5file.walk_nodes(where='/df', classname='Leaf'): + assert node.filters.complevel == 0 + assert node.filters.complib is None + + # Check if file-defaults can be overridden on a per table basis + with ensure_clean_path(self.path) as tmpfile: + store = pd.HDFStore(tmpfile) + store.append('dfc', df, complevel=9, complib='blosc') + store.append('df', df) + store.close() + + with tables.open_file(tmpfile, mode='r') as h5file: + for node in h5file.walk_nodes(where='/df', classname='Leaf'): + assert node.filters.complevel == 0 + assert node.filters.complib is None + for node in h5file.walk_nodes(where='/dfc', classname='Leaf'): + assert node.filters.complevel == 9 + assert node.filters.complib == 'blosc' + + def test_complibs(self): + # GH14478 + df = tm.makeDataFrame() + + # Building list of all complibs and complevels tuples + all_complibs = tables.filters.all_complibs + # Remove lzo if its not available on this platform + if not tables.which_lib_version('lzo'): + all_complibs.remove('lzo') + # Remove bzip2 if its not available on this platform + if not tables.which_lib_version("bzip2"): + all_complibs.remove("bzip2") + + all_levels = range(0, 10) + all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels] + + for (lib, lvl) in all_tests: + with ensure_clean_path(self.path) as tmpfile: + gname = 'foo' + + # Write and read file to see if data is consistent + df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl) + result = pd.read_hdf(tmpfile, gname) + tm.assert_frame_equal(result, df) + + # Open file and check metadata + # for correct amount of compression + h5table = tables.open_file(tmpfile, mode='r') + for node in h5table.walk_nodes(where='/' + gname, + classname='Leaf'): + assert node.filters.complevel == lvl + if lvl == 0: + assert node.filters.complib is None + else: + assert node.filters.complib == lib + h5table.close() + def test_put_integer(self): # non-date, non-string index df = DataFrame(np.random.randn(50, 100)) @@ -767,16 +837,14 @@ def test_put_mixed_type(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: _maybe_remove(store, 'df') - # cannot use assert_produces_warning here for some reason - # a PendingDeprecationWarning is also raised? - warnings.filterwarnings('ignore', category=PerformanceWarning) - store.put('df', df) - warnings.filterwarnings('always', category=PerformanceWarning) + # PerformanceWarning + with catch_warnings(record=True): + store.put('df', df) expected = store.get('df') tm.assert_frame_equal(expected, df) @@ -784,99 +852,77 @@ def test_put_mixed_type(self): def test_append(self): with ensure_clean_store(self.path) as store: - df = tm.makeTimeDataFrame() - _maybe_remove(store, 'df1') - store.append('df1', df[:10]) - store.append('df1', df[10:]) - tm.assert_frame_equal(store['df1'], df) - - _maybe_remove(store, 'df2') - store.put('df2', df[:10], format='table') - store.append('df2', df[10:]) - tm.assert_frame_equal(store['df2'], df) - - _maybe_remove(store, 'df3') - store.append('/df3', df[:10]) - store.append('/df3', df[10:]) - tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it - with tm.assert_produces_warning( - expected_warning=tables.NaturalNameWarning): + # tables.NaturalNameWarning): + with catch_warnings(record=True): + + df = tm.makeTimeDataFrame() + _maybe_remove(store, 'df1') + store.append('df1', df[:10]) + store.append('df1', df[10:]) + tm.assert_frame_equal(store['df1'], df) + + _maybe_remove(store, 'df2') + store.put('df2', df[:10], format='table') + store.append('df2', df[10:]) + tm.assert_frame_equal(store['df2'], df) + + _maybe_remove(store, 'df3') + store.append('/df3', df[:10]) + store.append('/df3', df[10:]) + tm.assert_frame_equal(store['df3'], df) + + # this is allowed by almost always don't want to do it + # tables.NaturalNameWarning _maybe_remove(store, '/df3 foo') store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) tm.assert_frame_equal(store['df3 foo'], df) - # panel - wp = tm.makePanel() - _maybe_remove(store, 'wp1') - store.append('wp1', wp.iloc[:, :10, :]) - store.append('wp1', wp.iloc[:, 10:, :]) - assert_panel_equal(store['wp1'], wp) - - # ndim - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - p4d = tm.makePanel4D() - _maybe_remove(store, 'p4d') - store.append('p4d', p4d.iloc[:, :, :10, :]) - store.append('p4d', p4d.iloc[:, :, 10:, :]) - assert_panel4d_equal(store['p4d'], p4d) - - # test using axis labels - _maybe_remove(store, 'p4d') - store.append('p4d', p4d.iloc[:, :, :10, :], axes=[ - 'items', 'major_axis', 'minor_axis']) - store.append('p4d', p4d.iloc[:, :, 10:, :], axes=[ - 'items', 'major_axis', 'minor_axis']) - assert_panel4d_equal(store['p4d'], p4d) - - # test using differnt number of items on each axis - p4d2 = p4d.copy() - p4d2['l4'] = p4d['l1'] - p4d2['l5'] = p4d['l1'] - _maybe_remove(store, 'p4d2') - store.append( - 'p4d2', p4d2, axes=['items', 'major_axis', 'minor_axis']) - assert_panel4d_equal(store['p4d2'], p4d2) - - # test using differt order of items on the non-index axes - _maybe_remove(store, 'wp1') - wp_append1 = wp.iloc[:, :10, :] - store.append('wp1', wp_append1) - wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1]) - store.append('wp1', wp_append2) - assert_panel_equal(store['wp1'], wp) - - # dtype issues - mizxed type in a single object column - df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) - df['mixed_column'] = 'testing' - df.loc[2, 'mixed_column'] = np.nan - _maybe_remove(store, 'df') - store.append('df', df) - tm.assert_frame_equal(store['df'], df) - - # uints - test storage of uints - uint_data = DataFrame({ - 'u08': Series(np.random.randint(0, high=255, size=5), - dtype=np.uint8), - 'u16': Series(np.random.randint(0, high=65535, size=5), - dtype=np.uint16), - 'u32': Series(np.random.randint(0, high=2**30, size=5), - dtype=np.uint32), - 'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62], - dtype=np.uint64)}, index=np.arange(5)) - _maybe_remove(store, 'uints') - store.append('uints', uint_data) - tm.assert_frame_equal(store['uints'], uint_data) - - # uints - test storage of uints in indexable columns - _maybe_remove(store, 'uints') - # 64-bit indices not yet supported - store.append('uints', uint_data, data_columns=[ - 'u08', 'u16', 'u32']) - tm.assert_frame_equal(store['uints'], uint_data) + # panel + wp = tm.makePanel() + _maybe_remove(store, 'wp1') + store.append('wp1', wp.iloc[:, :10, :]) + store.append('wp1', wp.iloc[:, 10:, :]) + assert_panel_equal(store['wp1'], wp) + + # test using differt order of items on the non-index axes + _maybe_remove(store, 'wp1') + wp_append1 = wp.iloc[:, :10, :] + store.append('wp1', wp_append1) + wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1]) + store.append('wp1', wp_append2) + assert_panel_equal(store['wp1'], wp) + + # dtype issues - mizxed type in a single object column + df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) + df['mixed_column'] = 'testing' + df.loc[2, 'mixed_column'] = np.nan + _maybe_remove(store, 'df') + store.append('df', df) + tm.assert_frame_equal(store['df'], df) + + # uints - test storage of uints + uint_data = DataFrame({ + 'u08': Series(np.random.randint(0, high=255, size=5), + dtype=np.uint8), + 'u16': Series(np.random.randint(0, high=65535, size=5), + dtype=np.uint16), + 'u32': Series(np.random.randint(0, high=2**30, size=5), + dtype=np.uint32), + 'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62], + dtype=np.uint64)}, index=np.arange(5)) + _maybe_remove(store, 'uints') + store.append('uints', uint_data) + tm.assert_frame_equal(store['uints'], uint_data) + + # uints - test storage of uints in indexable columns + _maybe_remove(store, 'uints') + # 64-bit indices not yet supported + store.append('uints', uint_data, data_columns=[ + 'u08', 'u16', 'u32']) + tm.assert_frame_equal(store['uints'], uint_data) def test_append_series(self): @@ -890,27 +936,27 @@ def test_append_series(self): store.append('ss', ss) result = store['ss'] tm.assert_series_equal(result, ss) - self.assertIsNone(result.name) + assert result.name is None store.append('ts', ts) result = store['ts'] tm.assert_series_equal(result, ts) - self.assertIsNone(result.name) + assert result.name is None ns.name = 'foo' store.append('ns', ns) result = store['ns'] tm.assert_series_equal(result, ns) - self.assertEqual(result.name, ns.name) + assert result.name == ns.name # select on the values expected = ns[ns > 60] - result = store.select('ns', Term('foo>60')) + result = store.select('ns', 'foo>60') tm.assert_series_equal(result, expected) # select on the index and values expected = ns[(ns > 70) & (ns.index < 90)] - result = store.select('ns', [Term('foo>70'), Term('index<90')]) + result = store.select('ns', 'foo>70 and index<90') tm.assert_series_equal(result, expected) # multi-index @@ -957,16 +1003,16 @@ def check(format, index): else: # only support for fixed types (and they have a perf warning) - self.assertRaises(TypeError, check, 'table', index) - with tm.assert_produces_warning( - expected_warning=PerformanceWarning): + pytest.raises(TypeError, check, 'table', index) + + # PerformanceWarning + with catch_warnings(record=True): check('fixed', index) + @pytest.mark.skipif(not is_platform_little_endian(), + reason="reason platform is not little endian") def test_encoding(self): - if sys.byteorder != 'little': - pytest.skip('system byteorder is not little') - with ensure_clean_store(self.path) as store: df = DataFrame(dict(A='foo', B='bar'), index=range(5)) df.loc[2, 'A'] = np.nan @@ -982,7 +1028,7 @@ def test_encoding(self): def test_latin_encoding(self): if compat.PY2: - self.assertRaisesRegexp( + tm.assert_raises_regex( TypeError, r'\[unicode\] is not implemented as a table column') return @@ -1007,7 +1053,7 @@ def _try_decode(x, encoding='latin-1'): examples = [] for dtype in ['category', object]: for val in values: - examples.append(pandas.Series(val, dtype=dtype)) + examples.append(pd.Series(val, dtype=dtype)) def roundtrip(s, key='data', encoding='latin-1', nan_rep=''): with ensure_clean_path(self.path) as store: @@ -1015,7 +1061,12 @@ def roundtrip(s, key='data', encoding='latin-1', nan_rep=''): nan_rep=nan_rep) retr = read_hdf(store, key) s_nan = s.replace(nan_rep, np.nan) - assert_series_equal(s_nan, retr, check_categorical=False) + if is_categorical_dtype(s_nan): + assert is_categorical_dtype(retr) + assert_series_equal(s_nan, retr, check_dtype=False, + check_categorical=False) + else: + assert_series_equal(s_nan, retr) for s in examples: roundtrip(s) @@ -1087,13 +1138,13 @@ def test_append_all_nans(self): tm.assert_frame_equal(store['df2'], df) # tests the option io.hdf.dropna_table - pandas.set_option('io.hdf.dropna_table', False) + pd.set_option('io.hdf.dropna_table', False) _maybe_remove(store, 'df3') store.append('df3', df[:10]) store.append('df3', df[10:]) tm.assert_frame_equal(store['df3'], df) - pandas.set_option('io.hdf.dropna_table', True) + pd.set_option('io.hdf.dropna_table', True) _maybe_remove(store, 'df4') store.append('df4', df[:10]) store.append('df4', df[10:]) @@ -1152,15 +1203,17 @@ def test_append_all_nans(self): [[np.nan, np.nan, np.nan], [np.nan, 5, 6]], [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]] - panel_with_missing = Panel(matrix, items=['Item1', 'Item2', 'Item3'], - major_axis=[1, 2], - minor_axis=['A', 'B', 'C']) + with catch_warnings(record=True): + panel_with_missing = Panel(matrix, + items=['Item1', 'Item2', 'Item3'], + major_axis=[1, 2], + minor_axis=['A', 'B', 'C']) - with ensure_clean_path(self.path) as path: - panel_with_missing.to_hdf( - path, 'panel_with_missing', format='table') - reloaded_panel = read_hdf(path, 'panel_with_missing') - tm.assert_panel_equal(panel_with_missing, reloaded_panel) + with ensure_clean_path(self.path) as path: + panel_with_missing.to_hdf( + path, 'panel_with_missing', format='table') + reloaded_panel = read_hdf(path, 'panel_with_missing') + tm.assert_panel_equal(panel_with_missing, reloaded_panel) def test_append_frame_column_oriented(self): @@ -1179,13 +1232,14 @@ def test_append_frame_column_oriented(self): # selection on the non-indexable result = store.select( - 'df1', ('columns=A', Term('index=df.index[0:4]'))) + 'df1', ('columns=A', 'index=df.index[0:4]')) expected = df.reindex(columns=['A'], index=df.index[0:4]) tm.assert_frame_equal(expected, result) # this isn't supported - self.assertRaises(TypeError, store.select, 'df1', ( - 'columns=A', Term('index>df.index[4]'))) + with pytest.raises(TypeError): + store.select('df1', + 'columns=A and index>df.index[4]') def test_append_with_different_block_ordering(self): @@ -1221,187 +1275,120 @@ def test_append_with_different_block_ordering(self): df['int16'] = Series([1] * len(df), dtype='int16') store.append('df', df) - # store additonal fields in different blocks + # store additional fields in different blocks df['int16_2'] = Series([1] * len(df), dtype='int16') - self.assertRaises(ValueError, store.append, 'df', df) + pytest.raises(ValueError, store.append, 'df', df) - # store multile additonal fields in different blocks + # store multile additional fields in different blocks df['float_3'] = Series([1.] * len(df), dtype='float64') - self.assertRaises(ValueError, store.append, 'df', df) - - def test_ndim_indexables(self): - # test using ndim tables in new ways - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with ensure_clean_store(self.path) as store: - - p4d = tm.makePanel4D() - - def check_indexers(key, indexers): - for i, idx in enumerate(indexers): - descr = getattr(store.root, key).table.description - self.assertTrue(getattr(descr, idx)._v_pos == i) - - # append then change (will take existing schema) - indexers = ['items', 'major_axis', 'minor_axis'] - - _maybe_remove(store, 'p4d') - store.append('p4d', p4d.iloc[:, :, :10, :], axes=indexers) - store.append('p4d', p4d.iloc[:, :, 10:, :]) - assert_panel4d_equal(store.select('p4d'), p4d) - check_indexers('p4d', indexers) - - # same as above, but try to append with differnt axes - _maybe_remove(store, 'p4d') - store.append('p4d', p4d.iloc[:, :, :10, :], axes=indexers) - store.append('p4d', p4d.iloc[:, :, 10:, :], axes=[ - 'labels', 'items', 'major_axis']) - assert_panel4d_equal(store.select('p4d'), p4d) - check_indexers('p4d', indexers) - - # pass incorrect number of axes - _maybe_remove(store, 'p4d') - self.assertRaises(ValueError, store.append, 'p4d', p4d.iloc[ - :, :, :10, :], axes=['major_axis', 'minor_axis']) - - # different than default indexables #1 - indexers = ['labels', 'major_axis', 'minor_axis'] - _maybe_remove(store, 'p4d') - store.append('p4d', p4d.iloc[:, :, :10, :], axes=indexers) - store.append('p4d', p4d.iloc[:, :, 10:, :]) - assert_panel4d_equal(store['p4d'], p4d) - check_indexers('p4d', indexers) - - # different than default indexables #2 - indexers = ['major_axis', 'labels', 'minor_axis'] - _maybe_remove(store, 'p4d') - store.append('p4d', p4d.iloc[:, :, :10, :], axes=indexers) - store.append('p4d', p4d.iloc[:, :, 10:, :]) - assert_panel4d_equal(store['p4d'], p4d) - check_indexers('p4d', indexers) - - # partial selection - result = store.select('p4d', ['labels=l1']) - expected = p4d.reindex(labels=['l1']) - assert_panel4d_equal(result, expected) - - # partial selection2 - result = store.select('p4d', [Term( - 'labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) - expected = p4d.reindex( - labels=['l1'], items=['ItemA'], minor_axis=['B']) - assert_panel4d_equal(result, expected) - - # non-existant partial selection - result = store.select('p4d', [Term( - 'labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) - expected = p4d.reindex(labels=['l1'], items=[], - minor_axis=['B']) - assert_panel4d_equal(result, expected) + pytest.raises(ValueError, store.append, 'df', df) def test_append_with_strings(self): with ensure_clean_store(self.path) as store: - wp = tm.makePanel() - wp2 = wp.rename_axis( - dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) - - def check_col(key, name, size): - self.assertEqual(getattr(store.get_storer( - key).table.description, name).itemsize, size) - - store.append('s1', wp, min_itemsize=20) - store.append('s1', wp2) - expected = concat([wp, wp2], axis=2) - expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) - assert_panel_equal(store['s1'], expected) - check_col('s1', 'minor_axis', 20) - - # test dict format - store.append('s2', wp, min_itemsize={'minor_axis': 20}) - store.append('s2', wp2) - expected = concat([wp, wp2], axis=2) - expected = expected.reindex(minor_axis=sorted(expected.minor_axis)) - assert_panel_equal(store['s2'], expected) - check_col('s2', 'minor_axis', 20) - - # apply the wrong field (similar to #1) - store.append('s3', wp, min_itemsize={'major_axis': 20}) - self.assertRaises(ValueError, store.append, 's3', wp2) - - # test truncation of bigger strings - store.append('s4', wp) - self.assertRaises(ValueError, store.append, 's4', wp2) - - # avoid truncation on elements - df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) - store.append('df_big', df) - tm.assert_frame_equal(store.select('df_big'), df) - check_col('df_big', 'values_block_1', 15) - - # appending smaller string ok - df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']]) - store.append('df_big', df2) - expected = concat([df, df2]) - tm.assert_frame_equal(store.select('df_big'), expected) - check_col('df_big', 'values_block_1', 15) - - # avoid truncation on elements - df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) - store.append('df_big2', df, min_itemsize={'values': 50}) - tm.assert_frame_equal(store.select('df_big2'), df) - check_col('df_big2', 'values_block_1', 50) - - # bigger string on next append - store.append('df_new', df) - df_new = DataFrame( - [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) - self.assertRaises(ValueError, store.append, 'df_new', df_new) - - # min_itemsize on Series index (GH 11412) - df = tm.makeMixedDataFrame().set_index('C') - store.append('ss', df['B'], min_itemsize={'index': 4}) - tm.assert_series_equal(store.select('ss'), df['B']) - - # same as above, with data_columns=True - store.append('ss2', df['B'], data_columns=True, - min_itemsize={'index': 4}) - tm.assert_series_equal(store.select('ss2'), df['B']) - - # min_itemsize in index without appending (GH 10381) - store.put('ss3', df, format='table', - min_itemsize={'index': 6}) - # just make sure there is a longer string: - df2 = df.copy().reset_index().assign(C='longer').set_index('C') - store.append('ss3', df2) - tm.assert_frame_equal(store.select('ss3'), - pd.concat([df, df2])) - - # same as above, with a Series - store.put('ss4', df['B'], format='table', - min_itemsize={'index': 6}) - store.append('ss4', df2['B']) - tm.assert_series_equal(store.select('ss4'), - pd.concat([df['B'], df2['B']])) - - # with nans - _maybe_remove(store, 'df') - df = tm.makeTimeDataFrame() - df['string'] = 'foo' - df.loc[1:4, 'string'] = np.nan - df['string2'] = 'bar' - df.loc[4:8, 'string2'] = np.nan - df['string3'] = 'bah' - df.loc[1:, 'string3'] = np.nan - store.append('df', df) - result = store.select('df') - tm.assert_frame_equal(result, df) + with catch_warnings(record=True): + wp = tm.makePanel() + wp2 = wp.rename_axis( + {x: "%s_extra" % x for x in wp.minor_axis}, axis=2) + + def check_col(key, name, size): + assert getattr(store.get_storer(key) + .table.description, name).itemsize == size + + store.append('s1', wp, min_itemsize=20) + store.append('s1', wp2) + expected = concat([wp, wp2], axis=2) + expected = expected.reindex( + minor_axis=sorted(expected.minor_axis)) + assert_panel_equal(store['s1'], expected) + check_col('s1', 'minor_axis', 20) + + # test dict format + store.append('s2', wp, min_itemsize={'minor_axis': 20}) + store.append('s2', wp2) + expected = concat([wp, wp2], axis=2) + expected = expected.reindex( + minor_axis=sorted(expected.minor_axis)) + assert_panel_equal(store['s2'], expected) + check_col('s2', 'minor_axis', 20) + + # apply the wrong field (similar to #1) + store.append('s3', wp, min_itemsize={'major_axis': 20}) + pytest.raises(ValueError, store.append, 's3', wp2) + + # test truncation of bigger strings + store.append('s4', wp) + pytest.raises(ValueError, store.append, 's4', wp2) + + # avoid truncation on elements + df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) + store.append('df_big', df) + tm.assert_frame_equal(store.select('df_big'), df) + check_col('df_big', 'values_block_1', 15) + + # appending smaller string ok + df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']]) + store.append('df_big', df2) + expected = concat([df, df2]) + tm.assert_frame_equal(store.select('df_big'), expected) + check_col('df_big', 'values_block_1', 15) + + # avoid truncation on elements + df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) + store.append('df_big2', df, min_itemsize={'values': 50}) + tm.assert_frame_equal(store.select('df_big2'), df) + check_col('df_big2', 'values_block_1', 50) + + # bigger string on next append + store.append('df_new', df) + df_new = DataFrame( + [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']]) + pytest.raises(ValueError, store.append, 'df_new', df_new) + + # min_itemsize on Series index (GH 11412) + df = tm.makeMixedDataFrame().set_index('C') + store.append('ss', df['B'], min_itemsize={'index': 4}) + tm.assert_series_equal(store.select('ss'), df['B']) + + # same as above, with data_columns=True + store.append('ss2', df['B'], data_columns=True, + min_itemsize={'index': 4}) + tm.assert_series_equal(store.select('ss2'), df['B']) + + # min_itemsize in index without appending (GH 10381) + store.put('ss3', df, format='table', + min_itemsize={'index': 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C='longer').set_index('C') + store.append('ss3', df2) + tm.assert_frame_equal(store.select('ss3'), + pd.concat([df, df2])) + + # same as above, with a Series + store.put('ss4', df['B'], format='table', + min_itemsize={'index': 6}) + store.append('ss4', df2['B']) + tm.assert_series_equal(store.select('ss4'), + pd.concat([df['B'], df2['B']])) + + # with nans + _maybe_remove(store, 'df') + df = tm.makeTimeDataFrame() + df['string'] = 'foo' + df.loc[1:4, 'string'] = np.nan + df['string2'] = 'bar' + df.loc[4:8, 'string2'] = np.nan + df['string3'] = 'bah' + df.loc[1:, 'string3'] = np.nan + store.append('df', df) + result = store.select('df') + tm.assert_frame_equal(result, df) with ensure_clean_store(self.path) as store: def check_col(key, name, size): - self.assertEqual(getattr(store.get_storer( - key).table.description, name).itemsize, size) + assert getattr(store.get_storer(key) + .table.description, name).itemsize, size df = DataFrame(dict(A='foo', B='bar'), index=range(10)) @@ -1409,13 +1396,13 @@ def check_col(key, name, size): _maybe_remove(store, 'df') store.append('df', df, min_itemsize={'A': 200}) check_col('df', 'A', 200) - self.assertEqual(store.get_storer('df').data_columns, ['A']) + assert store.get_storer('df').data_columns == ['A'] # a min_itemsize that creates a data_column2 _maybe_remove(store, 'df') store.append('df', df, data_columns=['B'], min_itemsize={'A': 200}) check_col('df', 'A', 200) - self.assertEqual(store.get_storer('df').data_columns, ['B', 'A']) + assert store.get_storer('df').data_columns == ['B', 'A'] # a min_itemsize that creates a data_column2 _maybe_remove(store, 'df') @@ -1423,7 +1410,7 @@ def check_col(key, name, size): 'B'], min_itemsize={'values': 200}) check_col('df', 'B', 200) check_col('df', 'values_block_0', 200) - self.assertEqual(store.get_storer('df').data_columns, ['B']) + assert store.get_storer('df').data_columns == ['B'] # infer the .typ on subsequent appends _maybe_remove(store, 'df') @@ -1435,8 +1422,8 @@ def check_col(key, name, size): df = DataFrame(['foo', 'foo', 'foo', 'barh', 'barh', 'barh'], columns=['A']) _maybe_remove(store, 'df') - self.assertRaises(ValueError, store.append, 'df', - df, min_itemsize={'foo': 20, 'foobar': 20}) + pytest.raises(ValueError, store.append, 'df', + df, min_itemsize={'foo': 20, 'foobar': 20}) def test_to_hdf_with_min_itemsize(self): @@ -1473,13 +1460,13 @@ def test_append_with_data_columns(self): assert(store._handle.root.df.table.cols.B.is_indexed is True) # data column searching - result = store.select('df', [Term('B>0')]) + result = store.select('df', 'B>0') expected = df[df.B > 0] tm.assert_frame_equal(result, expected) # data column searching (with an indexable and a data_columns) result = store.select( - 'df', [Term('B>0'), Term('index>df.index[3]')]) + 'df', 'B>0 and index>df.index[3]') df_new = df.reindex(index=df.index[4:]) expected = df_new[df_new.B > 0] tm.assert_frame_equal(result, expected) @@ -1491,14 +1478,14 @@ def test_append_with_data_columns(self): df_new.loc[5:6, 'string'] = 'bar' _maybe_remove(store, 'df') store.append('df', df_new, data_columns=['string']) - result = store.select('df', [Term('string=foo')]) + result = store.select('df', "string='foo'") expected = df_new[df_new.string == 'foo'] tm.assert_frame_equal(result, expected) # using min_itemsize and a data column def check_col(key, name, size): - self.assertEqual(getattr(store.get_storer( - key).table.description, name).itemsize, size) + assert getattr(store.get_storer(key) + .table.description, name).itemsize == size with ensure_clean_store(self.path) as store: _maybe_remove(store, 'df') @@ -1544,15 +1531,15 @@ def check_col(key, name, size): _maybe_remove(store, 'df') store.append( 'df', df_new, data_columns=['A', 'B', 'string', 'string2']) - result = store.select('df', [Term('string=foo'), Term( - 'string2=foo'), Term('A>0'), Term('B<0')]) + result = store.select('df', + "string='foo' and string2='foo'" + " and A>0 and B<0") expected = df_new[(df_new.string == 'foo') & ( df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)] tm.assert_frame_equal(result, expected, check_index_type=False) # yield an empty frame - result = store.select('df', [Term('string=foo'), Term( - 'string2=cool')]) + result = store.select('df', "string='foo' and string2='cool'") expected = df_new[(df_new.string == 'foo') & ( df_new.string2 == 'cool')] tm.assert_frame_equal(result, expected, check_index_type=False) @@ -1572,7 +1559,7 @@ def check_col(key, name, size): store.append('df_dc', df_dc, data_columns=['B', 'C', 'string', 'string2', 'datetime']) - result = store.select('df_dc', [Term('B>0')]) + result = store.select('df_dc', 'B>0') expected = df_dc[df_dc.B > 0] tm.assert_frame_equal(result, expected, check_index_type=False) @@ -1599,7 +1586,7 @@ def check_col(key, name, size): store.append('df_dc', df_dc, data_columns=[ 'B', 'C', 'string', 'string2']) - result = store.select('df_dc', [Term('B>0')]) + result = store.select('df_dc', 'B>0') expected = df_dc[df_dc.B > 0] tm.assert_frame_equal(result, expected) @@ -1610,98 +1597,103 @@ def check_col(key, name, size): tm.assert_frame_equal(result, expected) with ensure_clean_store(self.path) as store: - # panel - # GH5717 not handling data_columns - np.random.seed(1234) - p = tm.makePanel() + with catch_warnings(record=True): + # panel + # GH5717 not handling data_columns + np.random.seed(1234) + p = tm.makePanel() - store.append('p1', p) - tm.assert_panel_equal(store.select('p1'), p) + store.append('p1', p) + tm.assert_panel_equal(store.select('p1'), p) - store.append('p2', p, data_columns=True) - tm.assert_panel_equal(store.select('p2'), p) + store.append('p2', p, data_columns=True) + tm.assert_panel_equal(store.select('p2'), p) - result = store.select('p2', where='ItemA>0') - expected = p.to_frame() - expected = expected[expected['ItemA'] > 0] - tm.assert_frame_equal(result.to_frame(), expected) + result = store.select('p2', where='ItemA>0') + expected = p.to_frame() + expected = expected[expected['ItemA'] > 0] + tm.assert_frame_equal(result.to_frame(), expected) - result = store.select('p2', where='ItemA>0 & minor_axis=["A","B"]') - expected = p.to_frame() - expected = expected[expected['ItemA'] > 0] - expected = expected[expected.reset_index( - level=['major']).index.isin(['A', 'B'])] - tm.assert_frame_equal(result.to_frame(), expected) + result = store.select( + 'p2', where='ItemA>0 & minor_axis=["A","B"]') + expected = p.to_frame() + expected = expected[expected['ItemA'] > 0] + expected = expected[expected.reset_index( + level=['major']).index.isin(['A', 'B'])] + tm.assert_frame_equal(result.to_frame(), expected) def test_create_table_index(self): with ensure_clean_store(self.path) as store: - def col(t, column): - return getattr(store.get_storer(t).table.cols, column) + with catch_warnings(record=True): + def col(t, column): + return getattr(store.get_storer(t).table.cols, column) - # index=False - wp = tm.makePanel() - store.append('p5', wp, index=False) - store.create_table_index('p5', columns=['major_axis']) - assert(col('p5', 'major_axis').is_indexed is True) - assert(col('p5', 'minor_axis').is_indexed is False) - - # index=True - store.append('p5i', wp, index=True) - assert(col('p5i', 'major_axis').is_indexed is True) - assert(col('p5i', 'minor_axis').is_indexed is True) - - # default optlevels - store.get_storer('p5').create_index() - assert(col('p5', 'major_axis').index.optlevel == 6) - assert(col('p5', 'minor_axis').index.kind == 'medium') - - # let's change the indexing scheme - store.create_table_index('p5') - assert(col('p5', 'major_axis').index.optlevel == 6) - assert(col('p5', 'minor_axis').index.kind == 'medium') - store.create_table_index('p5', optlevel=9) - assert(col('p5', 'major_axis').index.optlevel == 9) - assert(col('p5', 'minor_axis').index.kind == 'medium') - store.create_table_index('p5', kind='full') - assert(col('p5', 'major_axis').index.optlevel == 9) - assert(col('p5', 'minor_axis').index.kind == 'full') - store.create_table_index('p5', optlevel=1, kind='light') - assert(col('p5', 'major_axis').index.optlevel == 1) - assert(col('p5', 'minor_axis').index.kind == 'light') - - # data columns - df = tm.makeTimeDataFrame() - df['string'] = 'foo' - df['string2'] = 'bar' - store.append('f', df, data_columns=['string', 'string2']) - assert(col('f', 'index').is_indexed is True) - assert(col('f', 'string').is_indexed is True) - assert(col('f', 'string2').is_indexed is True) - - # specify index=columns - store.append( - 'f2', df, index=['string'], data_columns=['string', 'string2']) - assert(col('f2', 'index').is_indexed is False) - assert(col('f2', 'string').is_indexed is True) - assert(col('f2', 'string2').is_indexed is False) + # index=False + wp = tm.makePanel() + store.append('p5', wp, index=False) + store.create_table_index('p5', columns=['major_axis']) + assert(col('p5', 'major_axis').is_indexed is True) + assert(col('p5', 'minor_axis').is_indexed is False) + + # index=True + store.append('p5i', wp, index=True) + assert(col('p5i', 'major_axis').is_indexed is True) + assert(col('p5i', 'minor_axis').is_indexed is True) + + # default optlevels + store.get_storer('p5').create_index() + assert(col('p5', 'major_axis').index.optlevel == 6) + assert(col('p5', 'minor_axis').index.kind == 'medium') + + # let's change the indexing scheme + store.create_table_index('p5') + assert(col('p5', 'major_axis').index.optlevel == 6) + assert(col('p5', 'minor_axis').index.kind == 'medium') + store.create_table_index('p5', optlevel=9) + assert(col('p5', 'major_axis').index.optlevel == 9) + assert(col('p5', 'minor_axis').index.kind == 'medium') + store.create_table_index('p5', kind='full') + assert(col('p5', 'major_axis').index.optlevel == 9) + assert(col('p5', 'minor_axis').index.kind == 'full') + store.create_table_index('p5', optlevel=1, kind='light') + assert(col('p5', 'major_axis').index.optlevel == 1) + assert(col('p5', 'minor_axis').index.kind == 'light') + + # data columns + df = tm.makeTimeDataFrame() + df['string'] = 'foo' + df['string2'] = 'bar' + store.append('f', df, data_columns=['string', 'string2']) + assert(col('f', 'index').is_indexed is True) + assert(col('f', 'string').is_indexed is True) + assert(col('f', 'string2').is_indexed is True) - # try to index a non-table - _maybe_remove(store, 'f2') - store.put('f2', df) - self.assertRaises(TypeError, store.create_table_index, 'f2') + # specify index=columns + store.append( + 'f2', df, index=['string'], + data_columns=['string', 'string2']) + assert(col('f2', 'index').is_indexed is False) + assert(col('f2', 'string').is_indexed is True) + assert(col('f2', 'string2').is_indexed is False) + + # try to index a non-table + _maybe_remove(store, 'f2') + store.put('f2', df) + pytest.raises(TypeError, store.create_table_index, 'f2') def test_append_diff_item_order(self): - wp = tm.makePanel() - wp1 = wp.iloc[:, :10, :] - wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']), - 10:, :] + with catch_warnings(record=True): + wp = tm.makePanel() + wp1 = wp.iloc[:, :10, :] + wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']), + 10:, :] - with ensure_clean_store(self.path) as store: - store.put('panel', wp1, format='table') - self.assertRaises(ValueError, store.put, 'panel', wp2, + with ensure_clean_store(self.path) as store: + store.put('panel', wp1, format='table') + pytest.raises(ValueError, store.put, 'panel', wp2, append=True) def test_append_hierarchical(self): @@ -1753,10 +1745,10 @@ def test_column_multiindex(self): check_index_type=True, check_column_type=True) - self.assertRaises(ValueError, store.put, 'df2', df, - format='table', data_columns=['A']) - self.assertRaises(ValueError, store.put, 'df3', df, - format='table', data_columns=True) + pytest.raises(ValueError, store.put, 'df2', df, + format='table', data_columns=['A']) + pytest.raises(ValueError, store.put, 'df3', df, + format='table', data_columns=True) # appending multi-column on existing table (see GH 6167) with ensure_clean_store(self.path) as store: @@ -1819,13 +1811,7 @@ def make_index(names=None): _maybe_remove(store, 'df') df = DataFrame(np.zeros((12, 2)), columns=[ 'a', 'b'], index=make_index(['date', 'a', 't'])) - self.assertRaises(ValueError, store.append, 'df', df) - - # dup within level - _maybe_remove(store, 'df') - df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'], - index=make_index(['date', 'date', 'date'])) - self.assertRaises(ValueError, store.append, 'df', df) + pytest.raises(ValueError, store.append, 'df', df) # fully names _maybe_remove(store, 'df') @@ -1884,35 +1870,21 @@ def test_pass_spec_to_storer(self): with ensure_clean_store(self.path) as store: store.put('df', df) - self.assertRaises(TypeError, store.select, 'df', columns=['A']) - self.assertRaises(TypeError, store.select, - 'df', where=[('columns=A')]) + pytest.raises(TypeError, store.select, 'df', columns=['A']) + pytest.raises(TypeError, store.select, + 'df', where=[('columns=A')]) def test_append_misc(self): with ensure_clean_store(self.path) as store: + df = tm.makeDataFrame() + store.append('df', df, chunksize=1) + result = store.select('df') + tm.assert_frame_equal(result, df) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - - # unsuported data types for non-tables - p4d = tm.makePanel4D() - self.assertRaises(TypeError, store.put, 'p4d', p4d) - - # unsuported data types - self.assertRaises(TypeError, store.put, 'abc', None) - self.assertRaises(TypeError, store.put, 'abc', '123') - self.assertRaises(TypeError, store.put, 'abc', 123) - self.assertRaises(TypeError, store.put, 'abc', np.arange(5)) - - df = tm.makeDataFrame() - store.append('df', df, chunksize=1) - result = store.select('df') - tm.assert_frame_equal(result, df) - - store.append('df1', df, expectedrows=10) - result = store.select('df1') - tm.assert_frame_equal(result, df) + store.append('df1', df, expectedrows=10) + result = store.select('df1') + tm.assert_frame_equal(result, df) # more chunksize in append tests def check(obj, comparator): @@ -1931,12 +1903,9 @@ def check(obj, comparator): df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) - p = tm.makePanel() - check(p, assert_panel_equal) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4d = tm.makePanel4D() - check(p4d, assert_panel4d_equal) + with catch_warnings(record=True): + p = tm.makePanel() + check(p, assert_panel_equal) # empty frame, GH4273 with ensure_clean_store(self.path) as store: @@ -1944,7 +1913,7 @@ def check(obj, comparator): # 0 len df_empty = DataFrame(columns=list('ABC')) store.append('df', df_empty) - self.assertRaises(KeyError, store.select, 'df') + pytest.raises(KeyError, store.select, 'df') # repeated append of 0/non-zero frames df = DataFrame(np.random.rand(10, 3), columns=list('ABC')) @@ -1958,21 +1927,23 @@ def check(obj, comparator): store.put('df2', df) assert_frame_equal(store.select('df2'), df) - # 0 len - p_empty = Panel(items=list('ABC')) - store.append('p', p_empty) - self.assertRaises(KeyError, store.select, 'p') + with catch_warnings(record=True): - # repeated append of 0/non-zero frames - p = Panel(np.random.randn(3, 4, 5), items=list('ABC')) - store.append('p', p) - assert_panel_equal(store.select('p'), p) - store.append('p', p_empty) - assert_panel_equal(store.select('p'), p) + # 0 len + p_empty = Panel(items=list('ABC')) + store.append('p', p_empty) + pytest.raises(KeyError, store.select, 'p') - # store - store.put('p2', p_empty) - assert_panel_equal(store.select('p2'), p_empty) + # repeated append of 0/non-zero frames + p = Panel(np.random.randn(3, 4, 5), items=list('ABC')) + store.append('p', p) + assert_panel_equal(store.select('p'), p) + store.append('p', p_empty) + assert_panel_equal(store.select('p'), p) + + # store + store.put('p2', p_empty) + assert_panel_equal(store.select('p2'), p_empty) def test_append_raise(self): @@ -1983,13 +1954,13 @@ def test_append_raise(self): # list in column df = tm.makeDataFrame() df['invalid'] = [['a']] * len(df) - self.assertEqual(df.dtypes['invalid'], np.object_) - self.assertRaises(TypeError, store.append, 'df', df) + assert df.dtypes['invalid'] == np.object_ + pytest.raises(TypeError, store.append, 'df', df) # multiple invalid columns df['invalid2'] = [['a']] * len(df) df['invalid3'] = [['a']] * len(df) - self.assertRaises(TypeError, store.append, 'df', df) + pytest.raises(TypeError, store.append, 'df', df) # datetime with embedded nans as object df = tm.makeDataFrame() @@ -1997,22 +1968,22 @@ def test_append_raise(self): s = s.astype(object) s[0:5] = np.nan df['invalid'] = s - self.assertEqual(df.dtypes['invalid'], np.object_) - self.assertRaises(TypeError, store.append, 'df', df) + assert df.dtypes['invalid'] == np.object_ + pytest.raises(TypeError, store.append, 'df', df) - # directy ndarray - self.assertRaises(TypeError, store.append, 'df', np.arange(10)) + # directly ndarray + pytest.raises(TypeError, store.append, 'df', np.arange(10)) # series directly - self.assertRaises(TypeError, store.append, - 'df', Series(np.arange(10))) + pytest.raises(TypeError, store.append, + 'df', Series(np.arange(10))) # appending an incompatible table df = tm.makeDataFrame() store.append('df', df) df['foo'] = 'foo' - self.assertRaises(ValueError, store.append, 'df', df) + pytest.raises(ValueError, store.append, 'df', df) def test_table_index_incompatible_dtypes(self): df1 = DataFrame({'a': [1, 2, 3]}) @@ -2021,8 +1992,8 @@ def test_table_index_incompatible_dtypes(self): with ensure_clean_store(self.path) as store: store.put('frame', df1, format='table') - self.assertRaises(TypeError, store.put, 'frame', df2, - format='table', append=True) + pytest.raises(TypeError, store.put, 'frame', df2, + format='table', append=True) def test_table_values_dtypes_roundtrip(self): @@ -2036,7 +2007,7 @@ def test_table_values_dtypes_roundtrip(self): assert_series_equal(df2.dtypes, store['df_i8'].dtypes) # incompatible dtype - self.assertRaises(ValueError, store.append, 'df_i8', df1) + pytest.raises(ValueError, store.append, 'df_i8', df1) # check creation/storage/retrieval of float32 (a bit hacky to # actually create them thought) @@ -2047,9 +2018,9 @@ def test_table_values_dtypes_roundtrip(self): assert df1.dtypes[0] == 'float32' # check with mixed dtypes - df1 = DataFrame(dict([(c, Series(np.random.randn(5), dtype=c)) - for c in ['float32', 'float64', 'int32', - 'int64', 'int16', 'int8']])) + df1 = DataFrame(dict((c, Series(np.random.randn(5), dtype=c)) + for c in ['float32', 'float64', 'int32', + 'int64', 'int16', 'int8'])) df1['string'] = 'foo' df1['float322'] = 1. df1['float322'] = df1['float322'].astype('float32') @@ -2062,8 +2033,8 @@ def test_table_values_dtypes_roundtrip(self): expected = Series({'float32': 2, 'float64': 1, 'int32': 1, 'bool': 1, 'int16': 1, 'int8': 1, 'int64': 1, 'object': 1, 'datetime64[ns]': 2}) - result.sort() - expected.sort() + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) def test_table_mixed_dtypes(self): @@ -2082,40 +2053,29 @@ def test_table_mixed_dtypes(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: store.append('df1_mixed', df) tm.assert_frame_equal(store.select('df1_mixed'), df) - # panel - wp = tm.makePanel() - wp['obj1'] = 'foo' - wp['obj2'] = 'bar' - wp['bool1'] = wp['ItemA'] > 0 - wp['bool2'] = wp['ItemB'] > 0 - wp['int1'] = 1 - wp['int2'] = 2 - wp = wp.consolidate() + with catch_warnings(record=True): - with ensure_clean_store(self.path) as store: - store.append('p1_mixed', wp) - assert_panel_equal(store.select('p1_mixed'), wp) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - # ndim - wp = tm.makePanel4D() + # panel + wp = tm.makePanel() wp['obj1'] = 'foo' wp['obj2'] = 'bar' - wp['bool1'] = wp['l1'] > 0 - wp['bool2'] = wp['l2'] > 0 + wp['bool1'] = wp['ItemA'] > 0 + wp['bool2'] = wp['ItemB'] > 0 wp['int1'] = 1 wp['int2'] = 2 - wp = wp.consolidate() + wp = wp._consolidate() + + with catch_warnings(record=True): with ensure_clean_store(self.path) as store: - store.append('p4d_mixed', wp) - assert_panel4d_equal(store.select('p4d_mixed'), wp) + store.append('p1_mixed', wp) + assert_panel_equal(store.select('p1_mixed'), wp) def test_unimplemented_dtypes_table_columns(self): @@ -2131,7 +2091,7 @@ def test_unimplemented_dtypes_table_columns(self): for n, f in l: df = tm.makeDataFrame() df[n] = f - self.assertRaises( + pytest.raises( TypeError, store.append, 'df1_%s' % n, df) # frame @@ -2139,11 +2099,11 @@ def test_unimplemented_dtypes_table_columns(self): df['obj1'] = 'foo' df['obj2'] = 'bar' df['datetime1'] = datetime.date(2001, 1, 2) - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: # this fails because we have a date in the object block...... - self.assertRaises(TypeError, store.append, 'df_unimplemented', df) + pytest.raises(TypeError, store.append, 'df_unimplemented', df) def test_calendar_roundtrip_issue(self): @@ -2152,7 +2112,7 @@ def test_calendar_roundtrip_issue(self): weekmask_egypt = 'Sun Mon Tue Wed Thu' holidays = ['2012-05-01', datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')] - bday_egypt = pandas.offsets.CustomBusinessDay( + bday_egypt = pd.offsets.CustomBusinessDay( holidays=holidays, weekmask=weekmask_egypt) dt = datetime.datetime(2013, 4, 30) dts = date_range(dt, periods=5, freq=bday_egypt) @@ -2170,11 +2130,21 @@ def test_calendar_roundtrip_issue(self): result = store.select('table') assert_series_equal(result, s) + def test_roundtrip_tz_aware_index(self): + # GH 17618 + time = pd.Timestamp('2000-01-01 01:00:00', tz='US/Eastern') + df = pd.DataFrame(data=[0], index=[time]) + + with ensure_clean_store(self.path) as store: + store.put('frame', df, format='fixed') + recons = store['frame'] + tm.assert_frame_equal(recons, df) + assert recons.index[0].value == 946706400000000000 + def test_append_with_timedelta(self): # GH 3577 # append timedelta - from datetime import timedelta df = DataFrame(dict(A=Timestamp('20130101'), B=[Timestamp( '20130101') + timedelta(days=i, seconds=10) for i in range(10)])) df['C'] = df['A'] - df['B'] @@ -2188,10 +2158,10 @@ def test_append_with_timedelta(self): result = store.select('df') assert_frame_equal(result, df) - result = store.select('df', Term("C<100000")) + result = store.select('df', where="C<100000") assert_frame_equal(result, df) - result = store.select('df', Term("C", "<", -3 * 86400)) + result = store.select('df', where="Cfoo') - self.assertRaises(KeyError, store.remove, 'a', [crit1]) + with catch_warnings(record=True): - # try to remove non-table (with crit) - # non-table ok (where = None) - wp = tm.makePanel(30) - store.put('wp', wp, format='table') - store.remove('wp', ["minor_axis=['A', 'D']"]) - rs = store.select('wp') - expected = wp.reindex(minor_axis=['B', 'C']) - assert_panel_equal(rs, expected) + # non-existance + crit1 = 'index>foo' + pytest.raises(KeyError, store.remove, 'a', [crit1]) - # empty where - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') + # try to remove non-table (with crit) + # non-table ok (where = None) + wp = tm.makePanel(30) + store.put('wp', wp, format='table') + store.remove('wp', ["minor_axis=['A', 'D']"]) + rs = store.select('wp') + expected = wp.reindex(minor_axis=['B', 'C']) + assert_panel_equal(rs, expected) - # deleted number (entire table) - n = store.remove('wp', []) - self.assertTrue(n == 120) + # empty where + _maybe_remove(store, 'wp') + store.put('wp', wp, format='table') - # non - empty where - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - self.assertRaises(ValueError, store.remove, - 'wp', ['foo']) + # deleted number (entire table) + n = store.remove('wp', []) + assert n == 120 - # selectin non-table with a where - # store.put('wp2', wp, format='f') - # self.assertRaises(ValueError, store.remove, - # 'wp2', [('column', ['A', 'D'])]) + # non - empty where + _maybe_remove(store, 'wp') + store.put('wp', wp, format='table') + pytest.raises(ValueError, store.remove, + 'wp', ['foo']) def test_remove_startstop(self): # GH #4835 and #6177 with ensure_clean_store(self.path) as store: - wp = tm.makePanel(30) - - # start - _maybe_remove(store, 'wp1') - store.put('wp1', wp, format='t') - n = store.remove('wp1', start=32) - self.assertTrue(n == 120 - 32) - result = store.select('wp1') - expected = wp.reindex(major_axis=wp.major_axis[:32 // 4]) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp2') - store.put('wp2', wp, format='t') - n = store.remove('wp2', start=-32) - self.assertTrue(n == 32) - result = store.select('wp2') - expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4]) - assert_panel_equal(result, expected) - - # stop - _maybe_remove(store, 'wp3') - store.put('wp3', wp, format='t') - n = store.remove('wp3', stop=32) - self.assertTrue(n == 32) - result = store.select('wp3') - expected = wp.reindex(major_axis=wp.major_axis[32 // 4:]) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp4') - store.put('wp4', wp, format='t') - n = store.remove('wp4', stop=-32) - self.assertTrue(n == 120 - 32) - result = store.select('wp4') - expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:]) - assert_panel_equal(result, expected) - - # start n stop - _maybe_remove(store, 'wp5') - store.put('wp5', wp, format='t') - n = store.remove('wp5', start=16, stop=-16) - self.assertTrue(n == 120 - 32) - result = store.select('wp5') - expected = wp.reindex(major_axis=wp.major_axis[ - :16 // 4].union(wp.major_axis[-16 // 4:])) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp6') - store.put('wp6', wp, format='t') - n = store.remove('wp6', start=16, stop=16) - self.assertTrue(n == 0) - result = store.select('wp6') - expected = wp.reindex(major_axis=wp.major_axis) - assert_panel_equal(result, expected) - - # with where - _maybe_remove(store, 'wp7') - - # TODO: unused? - date = wp.major_axis.take(np.arange(0, 30, 3)) # noqa - - crit = Term('major_axis=date') - store.put('wp7', wp, format='t') - n = store.remove('wp7', where=[crit], stop=80) - self.assertTrue(n == 28) - result = store.select('wp7') - expected = wp.reindex(major_axis=wp.major_axis.difference( - wp.major_axis[np.arange(0, 20, 3)])) - assert_panel_equal(result, expected) + with catch_warnings(record=True): + wp = tm.makePanel(30) + + # start + _maybe_remove(store, 'wp1') + store.put('wp1', wp, format='t') + n = store.remove('wp1', start=32) + assert n == 120 - 32 + result = store.select('wp1') + expected = wp.reindex(major_axis=wp.major_axis[:32 // 4]) + assert_panel_equal(result, expected) + + _maybe_remove(store, 'wp2') + store.put('wp2', wp, format='t') + n = store.remove('wp2', start=-32) + assert n == 32 + result = store.select('wp2') + expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4]) + assert_panel_equal(result, expected) + + # stop + _maybe_remove(store, 'wp3') + store.put('wp3', wp, format='t') + n = store.remove('wp3', stop=32) + assert n == 32 + result = store.select('wp3') + expected = wp.reindex(major_axis=wp.major_axis[32 // 4:]) + assert_panel_equal(result, expected) + + _maybe_remove(store, 'wp4') + store.put('wp4', wp, format='t') + n = store.remove('wp4', stop=-32) + assert n == 120 - 32 + result = store.select('wp4') + expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:]) + assert_panel_equal(result, expected) + + # start n stop + _maybe_remove(store, 'wp5') + store.put('wp5', wp, format='t') + n = store.remove('wp5', start=16, stop=-16) + assert n == 120 - 32 + result = store.select('wp5') + expected = wp.reindex( + major_axis=(wp.major_axis[:16 // 4] + .union(wp.major_axis[-16 // 4:]))) + assert_panel_equal(result, expected) + + _maybe_remove(store, 'wp6') + store.put('wp6', wp, format='t') + n = store.remove('wp6', start=16, stop=16) + assert n == 0 + result = store.select('wp6') + expected = wp.reindex(major_axis=wp.major_axis) + assert_panel_equal(result, expected) + + # with where + _maybe_remove(store, 'wp7') + + # TODO: unused? + date = wp.major_axis.take(np.arange(0, 30, 3)) # noqa + + crit = 'major_axis=date' + store.put('wp7', wp, format='t') + n = store.remove('wp7', where=[crit], stop=80) + assert n == 28 + result = store.select('wp7') + expected = wp.reindex(major_axis=wp.major_axis.difference( + wp.major_axis[np.arange(0, 20, 3)])) + assert_panel_equal(result, expected) def test_remove_crit(self): with ensure_clean_store(self.path) as store: - wp = tm.makePanel(30) - - # group row removal - _maybe_remove(store, 'wp3') - date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) - crit4 = Term('major_axis=date4') - store.put('wp3', wp, format='t') - n = store.remove('wp3', where=[crit4]) - self.assertTrue(n == 36) - - result = store.select('wp3') - expected = wp.reindex(major_axis=wp.major_axis.difference(date4)) - assert_panel_equal(result, expected) - - # upper half - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - date = wp.major_axis[len(wp.major_axis) // 2] - - crit1 = Term('major_axis>date') - crit2 = Term("minor_axis=['A', 'D']") - n = store.remove('wp', where=[crit1]) - self.assertTrue(n == 56) - - n = store.remove('wp', where=[crit2]) - self.assertTrue(n == 32) - - result = store['wp'] - expected = wp.truncate(after=date).reindex(minor=['B', 'C']) - assert_panel_equal(result, expected) - - # individual row elements - _maybe_remove(store, 'wp2') - store.put('wp2', wp, format='table') - - date1 = wp.major_axis[1:3] - crit1 = Term('major_axis=date1') - store.remove('wp2', where=[crit1]) - result = store.select('wp2') - expected = wp.reindex(major_axis=wp.major_axis.difference(date1)) - assert_panel_equal(result, expected) - - date2 = wp.major_axis[5] - crit2 = Term('major_axis=date2') - store.remove('wp2', where=[crit2]) - result = store['wp2'] - expected = wp.reindex(major_axis=wp.major_axis.difference(date1) - .difference(Index([date2]))) - assert_panel_equal(result, expected) - - date3 = [wp.major_axis[7], wp.major_axis[9]] - crit3 = Term('major_axis=date3') - store.remove('wp2', where=[crit3]) - result = store['wp2'] - expected = wp.reindex(major_axis=wp.major_axis - .difference(date1) - .difference(Index([date2])) - .difference(Index(date3))) - assert_panel_equal(result, expected) - - # corners - _maybe_remove(store, 'wp4') - store.put('wp4', wp, format='table') - n = store.remove( - 'wp4', where=[Term('major_axis>wp.major_axis[-1]')]) - result = store.select('wp4') - assert_panel_equal(result, wp) + with catch_warnings(record=True): + wp = tm.makePanel(30) + + # group row removal + _maybe_remove(store, 'wp3') + date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) + crit4 = 'major_axis=date4' + store.put('wp3', wp, format='t') + n = store.remove('wp3', where=[crit4]) + assert n == 36 + + result = store.select('wp3') + expected = wp.reindex( + major_axis=wp.major_axis.difference(date4)) + assert_panel_equal(result, expected) + + # upper half + _maybe_remove(store, 'wp') + store.put('wp', wp, format='table') + date = wp.major_axis[len(wp.major_axis) // 2] + + crit1 = 'major_axis>date' + crit2 = "minor_axis=['A', 'D']" + n = store.remove('wp', where=[crit1]) + assert n == 56 + + n = store.remove('wp', where=[crit2]) + assert n == 32 + + result = store['wp'] + expected = wp.truncate(after=date).reindex(minor=['B', 'C']) + assert_panel_equal(result, expected) + + # individual row elements + _maybe_remove(store, 'wp2') + store.put('wp2', wp, format='table') + + date1 = wp.major_axis[1:3] + crit1 = 'major_axis=date1' + store.remove('wp2', where=[crit1]) + result = store.select('wp2') + expected = wp.reindex( + major_axis=wp.major_axis.difference(date1)) + assert_panel_equal(result, expected) + + date2 = wp.major_axis[5] + crit2 = 'major_axis=date2' + store.remove('wp2', where=[crit2]) + result = store['wp2'] + expected = wp.reindex( + major_axis=(wp.major_axis + .difference(date1) + .difference(Index([date2])) + )) + assert_panel_equal(result, expected) + + date3 = [wp.major_axis[7], wp.major_axis[9]] + crit3 = 'major_axis=date3' + store.remove('wp2', where=[crit3]) + result = store['wp2'] + expected = wp.reindex(major_axis=wp.major_axis + .difference(date1) + .difference(Index([date2])) + .difference(Index(date3))) + assert_panel_equal(result, expected) + + # corners + _maybe_remove(store, 'wp4') + store.put('wp4', wp, format='table') + n = store.remove( + 'wp4', where="major_axis>wp.major_axis[-1]") + result = store.select('wp4') + assert_panel_equal(result, wp) def test_invalid_terms(self): with ensure_clean_store(self.path) as store: - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): df = tm.makeTimeDataFrame() df['string'] = 'foo' df.loc[0:4, 'string'] = 'bar' wp = tm.makePanel() - p4d = tm.makePanel4D() store.put('df', df, format='table') store.put('wp', wp, format='table') - store.put('p4d', p4d, format='table') # some invalid terms - self.assertRaises(ValueError, store.select, - 'wp', "minor=['A', 'B']") - self.assertRaises(ValueError, store.select, - 'wp', ["index=['20121114']"]) - self.assertRaises(ValueError, store.select, 'wp', [ + pytest.raises(ValueError, store.select, + 'wp', "minor=['A', 'B']") + pytest.raises(ValueError, store.select, + 'wp', ["index=['20121114']"]) + pytest.raises(ValueError, store.select, 'wp', [ "index=['20121114', '20121114']"]) - self.assertRaises(TypeError, Term) + pytest.raises(TypeError, Term) # more invalid - self.assertRaises( + pytest.raises( ValueError, store.select, 'df', 'df.index[3]') - self.assertRaises(SyntaxError, store.select, 'df', 'index>') - self.assertRaises( + pytest.raises(SyntaxError, store.select, 'df', 'index>') + pytest.raises( ValueError, store.select, 'wp', "major_axis<'20000108' & minor_axis['A', 'B']") @@ -2482,60 +2455,40 @@ def test_invalid_terms(self): 'ABCD'), index=date_range('20130101', periods=10)) dfq.to_hdf(path, 'dfq', format='table') - self.assertRaises(ValueError, read_hdf, path, - 'dfq', where="A>0 or C>0") + pytest.raises(ValueError, read_hdf, path, + 'dfq', where="A>0 or C>0") def test_terms(self): with ensure_clean_store(self.path) as store: - wp = tm.makePanel() - wpneg = Panel.fromDict({-1: tm.makeDataFrame(), - 0: tm.makeDataFrame(), - 1: tm.makeDataFrame()}) - - with compat_assert_produces_warning(FutureWarning): - - p4d = tm.makePanel4D() - store.put('p4d', p4d, format='table') + with catch_warnings(record=True): - store.put('wp', wp, format='table') - store.put('wpneg', wpneg, format='table') + wp = tm.makePanel() + wpneg = Panel.fromDict({-1: tm.makeDataFrame(), + 0: tm.makeDataFrame(), + 1: tm.makeDataFrame()}) - # panel - result = store.select('wp', [Term( - 'major_axis<"20000108"'), Term("minor_axis=['A', 'B']")]) - expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) - assert_panel_equal(result, expected) - - # with deprecation - result = store.select('wp', [Term( - 'major_axis', '<', "20000108"), Term("minor_axis=['A', 'B']")]) - expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) - tm.assert_panel_equal(result, expected) - - # p4d - with compat_assert_produces_warning(FutureWarning): - - result = store.select('p4d', - [Term('major_axis<"20000108"'), - Term("minor_axis=['A', 'B']"), - Term("items=['ItemA', 'ItemB']")]) - expected = p4d.truncate(after='20000108').reindex( - minor=['A', 'B'], items=['ItemA', 'ItemB']) - assert_panel4d_equal(result, expected) - - # back compat invalid terms - terms = [dict(field='major_axis', op='>', value='20121114'), - [dict(field='major_axis', op='>', value='20121114')], - ["minor_axis=['A','B']", - dict(field='major_axis', op='>', value='20121114')]] - for t in terms: - with tm.assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - Term(t) - - with compat_assert_produces_warning(FutureWarning): + store.put('wp', wp, format='table') + store.put('wpneg', wpneg, format='table') + + # panel + result = store.select( + 'wp', + "major_axis<'20000108' and minor_axis=['A', 'B']") + expected = wp.truncate( + after='20000108').reindex(minor=['A', 'B']) + assert_panel_equal(result, expected) + + # with deprecation + result = store.select( + 'wp', where=("major_axis<'20000108' " + "and minor_axis=['A', 'B']")) + expected = wp.truncate( + after='20000108').reindex(minor=['A', 'B']) + tm.assert_panel_equal(result, expected) + + with catch_warnings(record=True): # valid terms terms = [('major_axis=20121114'), @@ -2554,136 +2507,77 @@ def test_terms(self): for t in terms: store.select('wp', t) - store.select('p4d', t) - # valid for p4d only - terms = [(("labels=['l1', 'l2']"),), - Term("labels=['l1', 'l2']"), - ] - - for t in terms: - store.select('p4d', t) + with tm.assert_raises_regex( + TypeError, 'Only named functions are supported'): + store.select( + 'wp', + 'major_axis == (lambda x: x)("20130101")') - with tm.assertRaisesRegexp(TypeError, - 'Only named functions are supported'): - store.select('wp', Term( - 'major_axis == (lambda x: x)("20130101")')) + with catch_warnings(record=True): + # check USub node parsing + res = store.select('wpneg', 'items == -1') + expected = Panel({-1: wpneg[-1]}) + tm.assert_panel_equal(res, expected) - # check USub node parsing - res = store.select('wpneg', Term('items == -1')) - expected = Panel({-1: wpneg[-1]}) - tm.assert_panel_equal(res, expected) - - with tm.assertRaisesRegexp(NotImplementedError, - 'Unary addition not supported'): - store.select('wpneg', Term('items == +1')) + with tm.assert_raises_regex(NotImplementedError, + 'Unary addition ' + 'not supported'): + store.select('wpneg', 'items == +1') def test_term_compat(self): with ensure_clean_store(self.path) as store: - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - - result = store.select('wp', [Term('major_axis>20000102'), - Term('minor_axis', '=', ['A', 'B'])]) - expected = wp.loc[:, wp.major_axis > - Timestamp('20000102'), ['A', 'B']] - assert_panel_equal(result, expected) - - store.remove('wp', Term('major_axis>20000103')) - result = store.select('wp') - expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] - assert_panel_equal(result, expected) - - with ensure_clean_store(self.path) as store: - - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - - # stringified datetimes - result = store.select( - 'wp', [Term('major_axis', '>', datetime.datetime(2000, 1, 2))]) - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - - result = store.select( - 'wp', [Term('major_axis', '>', - datetime.datetime(2000, 1, 2, 0, 0))]) - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - - result = store.select( - 'wp', [Term('major_axis', '=', - [datetime.datetime(2000, 1, 2, 0, 0), - datetime.datetime(2000, 1, 3, 0, 0)])]) - expected = wp.loc[:, [Timestamp('20000102'), - Timestamp('20000103')]] - assert_panel_equal(result, expected) - - result = store.select('wp', [Term('minor_axis', '=', ['A', 'B'])]) - expected = wp.loc[:, :, ['A', 'B']] - assert_panel_equal(result, expected) - - def test_backwards_compat_without_term_object(self): - with ensure_clean_store(self.path) as store: - - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - result = store.select('wp', [('major_axis>20000102'), - ('minor_axis', '=', ['A', 'B'])]) - expected = wp.loc[:, - wp.major_axis > Timestamp('20000102'), - ['A', 'B']] - assert_panel_equal(result, expected) - - store.remove('wp', ('major_axis>20000103')) - result = store.select('wp') - expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] - assert_panel_equal(result, expected) - - with ensure_clean_store(self.path) as store: - - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - - # stringified datetimes - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - result = store.select('wp', - [('major_axis', - '>', - datetime.datetime(2000, 1, 2))]) - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - result = store.select('wp', - [('major_axis', - '>', - datetime.datetime(2000, 1, 2, 0, 0))]) - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - result = store.select('wp', - [('major_axis', - '=', - [datetime.datetime(2000, 1, 2, 0, 0), - datetime.datetime(2000, 1, 3, 0, 0)])] - ) - expected = wp.loc[:, [Timestamp('20000102'), - Timestamp('20000103')]] - assert_panel_equal(result, expected) + with catch_warnings(record=True): + wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + store.append('wp', wp) + + result = store.select( + 'wp', where=("major_axis>20000102 " + "and minor_axis=['A', 'B']")) + expected = wp.loc[:, wp.major_axis > + Timestamp('20000102'), ['A', 'B']] + assert_panel_equal(result, expected) + + store.remove('wp', 'major_axis>20000103') + result = store.select('wp') + expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] + assert_panel_equal(result, expected) + + with ensure_clean_store(self.path) as store: + + with catch_warnings(record=True): + wp = Panel(np.random.randn(2, 5, 4), + items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + store.append('wp', wp) + + # stringified datetimes + result = store.select( + 'wp', 'major_axis>datetime.datetime(2000, 1, 2)') + expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] + assert_panel_equal(result, expected) + + result = store.select( + 'wp', 'major_axis>datetime.datetime(2000, 1, 2)') + expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] + assert_panel_equal(result, expected) + + result = store.select( + 'wp', + "major_axis=[datetime.datetime(2000, 1, 2, 0, 0), " + "datetime.datetime(2000, 1, 3, 0, 0)]") + expected = wp.loc[:, [Timestamp('20000102'), + Timestamp('20000103')]] + assert_panel_equal(result, expected) + + result = store.select( + 'wp', "minor_axis=['A', 'B']") + expected = wp.loc[:, :, ['A', 'B']] + assert_panel_equal(result, expected) def test_same_name_scoping(self): @@ -2773,62 +2667,64 @@ def test_tuple_index(self): data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) - expected_warning = Warning if PY35 else PerformanceWarning - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): - values = np.random.randn(2) + with catch_warnings(record=True): + values = np.random.randn(2) - func = lambda l, r: tm.assert_series_equal(l, r, - check_dtype=True, - check_index_type=True, - check_series_type=True) + func = lambda l, r: tm.assert_series_equal(l, r, + check_dtype=True, + check_index_type=True, + check_series_type=True) + + with catch_warnings(record=True): + ser = Series(values, [0, 'y']) + self._check_roundtrip(ser, func) + + with catch_warnings(record=True): + ser = Series(values, [datetime.datetime.today(), 0]) + self._check_roundtrip(ser, func) + + with catch_warnings(record=True): + ser = Series(values, ['y', 0]) + self._check_roundtrip(ser, func) + + with catch_warnings(record=True): + ser = Series(values, [datetime.date.today(), 'a']) + self._check_roundtrip(ser, func) + + with catch_warnings(record=True): - # nose has a deprecation warning in 3.5 - expected_warning = Warning if PY35 else PerformanceWarning - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): ser = Series(values, [datetime.datetime.today(), 0]) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): ser = Series(values, [datetime.date.today(), 'a']) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) - ser = Series(values, [1, 1.53]) - self._check_roundtrip(ser, func) + ser = Series(values, [1, 1.53]) + self._check_roundtrip(ser, func) - ser = Series(values, [1, 5]) - self._check_roundtrip(ser, func) + ser = Series(values, [1, 5]) + self._check_roundtrip(ser, func) - ser = Series(values, [datetime.datetime( - 2012, 1, 1), datetime.datetime(2012, 1, 2)]) - self._check_roundtrip(ser, func) + ser = Series(values, [datetime.datetime( + 2012, 1, 1), datetime.datetime(2012, 1, 2)]) + self._check_roundtrip(ser, func) def test_timeseries_preepoch(self): - if sys.version_info[0] == 2 and sys.version_info[1] < 7: - pytest.skip("won't work on Python < 2.7") - dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: @@ -2836,7 +2732,10 @@ def test_timeseries_preepoch(self): except OverflowError: pytest.skip('known failer on some windows platforms') - def test_frame(self): + @pytest.mark.parametrize("compression", [ + False, pytest.param(True, marks=td.skip_if_windows_python_3) + ]) + def test_frame(self, compression): df = tm.makeDataFrame() @@ -2844,28 +2743,21 @@ def test_frame(self): df.values[0, 0] = np.nan df.values[5, 3] = np.nan - self._check_roundtrip_table(df, tm.assert_frame_equal) - self._check_roundtrip(df, tm.assert_frame_equal) - - if not skip_compression: - self._check_roundtrip_table(df, tm.assert_frame_equal, - compression=True) - self._check_roundtrip(df, tm.assert_frame_equal, - compression=True) + self._check_roundtrip_table(df, tm.assert_frame_equal, + compression=compression) + self._check_roundtrip(df, tm.assert_frame_equal, + compression=compression) tdf = tm.makeTimeDataFrame() - self._check_roundtrip(tdf, tm.assert_frame_equal) - - if not skip_compression: - self._check_roundtrip(tdf, tm.assert_frame_equal, - compression=True) + self._check_roundtrip(tdf, tm.assert_frame_equal, + compression=compression) with ensure_clean_store(self.path) as store: # not consolidated df['foo'] = np.random.randn(len(df)) store['df'] = df recons = store['df'] - self.assertTrue(recons._data.is_consolidated()) + assert recons._data.is_consolidated() # empty self._check_roundtrip(df[:0], tm.assert_frame_equal) @@ -2935,6 +2827,27 @@ def test_store_index_name_with_tz(self): recons = store['frame'] tm.assert_frame_equal(recons, df) + @pytest.mark.parametrize('table_format', ['table', 'fixed']) + def test_store_index_name_numpy_str(self, table_format): + # GH #13492 + idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1), + datetime.date(2000, 1, 2)]), + name=u('cols\u05d2')) + idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1), + datetime.date(2010, 1, 2)]), + name=u('rows\u05d0')) + df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) + + # This used to fail, returning numpy strings instead of python strings. + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format=table_format) + df2 = read_hdf(path, 'df') + + assert_frame_equal(df, df2, check_names=True) + + assert type(df2.index.name) == text_type + assert type(df2.columns.name) == text_type + def test_store_series_name(self): df = tm.makeDataFrame() series = df['A'] @@ -2944,7 +2857,10 @@ def test_store_series_name(self): recons = store['series'] tm.assert_series_equal(recons, series) - def test_store_mixed(self): + @pytest.mark.parametrize("compression", [ + False, pytest.param(True, marks=td.skip_if_windows_python_3) + ]) + def test_store_mixed(self, compression): def _make_one(): df = tm.makeDataFrame() @@ -2954,7 +2870,7 @@ def _make_one(): df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 - return df.consolidate() + return df._consolidate() df1 = _make_one() df2 = _make_one() @@ -2969,29 +2885,18 @@ def _make_one(): tm.assert_frame_equal(store['obj'], df2) # check that can store Series of all of these types - self._check_roundtrip(df1['obj1'], tm.assert_series_equal) - self._check_roundtrip(df1['bool1'], tm.assert_series_equal) - self._check_roundtrip(df1['int1'], tm.assert_series_equal) - - if not skip_compression: - self._check_roundtrip(df1['obj1'], tm.assert_series_equal, - compression=True) - self._check_roundtrip(df1['bool1'], tm.assert_series_equal, - compression=True) - self._check_roundtrip(df1['int1'], tm.assert_series_equal, - compression=True) - self._check_roundtrip(df1, tm.assert_frame_equal, - compression=True) + self._check_roundtrip(df1['obj1'], tm.assert_series_equal, + compression=compression) + self._check_roundtrip(df1['bool1'], tm.assert_series_equal, + compression=compression) + self._check_roundtrip(df1['int1'], tm.assert_series_equal, + compression=compression) def test_wide(self): - wp = tm.makePanel() - self._check_roundtrip(wp, assert_panel_equal) - - def test_wide_table(self): - - wp = tm.makePanel() - self._check_roundtrip_table(wp, assert_panel_equal) + with catch_warnings(record=True): + wp = tm.makePanel() + self._check_roundtrip(wp, assert_panel_equal) def test_select_with_dups(self): @@ -3014,7 +2919,7 @@ def test_select_with_dups(self): expected = df.loc[:, ['A']] assert_frame_equal(result, expected) - # dups accross dtypes + # dups across dtypes df = concat([DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']), DataFrame(np.random.randint(0, 10, size=20) @@ -3053,25 +2958,24 @@ def test_select_with_dups(self): assert_frame_equal(result, expected, by_blocks=True) def test_wide_table_dups(self): - wp = tm.makePanel() with ensure_clean_store(self.path) as store: - store.put('panel', wp, format='table') - store.put('panel', wp, format='table', append=True) + with catch_warnings(record=True): + + wp = tm.makePanel() + store.put('panel', wp, format='table') + store.put('panel', wp, format='table', append=True) - with tm.assert_produces_warning(expected_warning=DuplicateWarning): recons = store['panel'] - assert_panel_equal(recons, wp) + assert_panel_equal(recons, wp) def test_long(self): def _check(left, right): assert_panel_equal(left.to_panel(), right.to_panel()) - wp = tm.makePanel() - self._check_roundtrip(wp.to_frame(), _check) - - # empty - # self._check_roundtrip(wp.to_frame()[:0], _check) + with catch_warnings(record=True): + wp = tm.makePanel() + self._check_roundtrip(wp.to_frame(), _check) def test_longpanel(self): pass @@ -3118,70 +3022,72 @@ def test_sparse_with_compression(self): check_frame_type=True) def test_select(self): - wp = tm.makePanel() with ensure_clean_store(self.path) as store: - # put/select ok - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - store.select('wp') - - # non-table ok (where = None) - _maybe_remove(store, 'wp') - store.put('wp2', wp) - store.select('wp2') - - # selection on the non-indexable with a large number of columns - wp = Panel(np.random.randn(100, 100, 100), - items=['Item%03d' % i for i in range(100)], - major_axis=date_range('1/1/2000', periods=100), - minor_axis=['E%03d' % i for i in range(100)]) - - _maybe_remove(store, 'wp') - store.append('wp', wp) - items = ['Item%03d' % i for i in range(80)] - result = store.select('wp', Term('items=items')) - expected = wp.reindex(items=items) - assert_panel_equal(expected, result) - - # selectin non-table with a where - # self.assertRaises(ValueError, store.select, - # 'wp2', ('column', ['A', 'D'])) + with catch_warnings(record=True): + wp = tm.makePanel() - # select with columns= - df = tm.makeTimeDataFrame() - _maybe_remove(store, 'df') - store.append('df', df) - result = store.select('df', columns=['A', 'B']) - expected = df.reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) + # put/select ok + _maybe_remove(store, 'wp') + store.put('wp', wp, format='table') + store.select('wp') + + # non-table ok (where = None) + _maybe_remove(store, 'wp') + store.put('wp2', wp) + store.select('wp2') + + # selection on the non-indexable with a large number of columns + wp = Panel(np.random.randn(100, 100, 100), + items=['Item%03d' % i for i in range(100)], + major_axis=date_range('1/1/2000', periods=100), + minor_axis=['E%03d' % i for i in range(100)]) + + _maybe_remove(store, 'wp') + store.append('wp', wp) + items = ['Item%03d' % i for i in range(80)] + result = store.select('wp', 'items=items') + expected = wp.reindex(items=items) + assert_panel_equal(expected, result) + + # selectin non-table with a where + # pytest.raises(ValueError, store.select, + # 'wp2', ('column', ['A', 'D'])) + + # select with columns= + df = tm.makeTimeDataFrame() + _maybe_remove(store, 'df') + store.append('df', df) + result = store.select('df', columns=['A', 'B']) + expected = df.reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) - # equivalentsly - result = store.select('df', [("columns=['A', 'B']")]) - expected = df.reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) + # equivalentsly + result = store.select('df', [("columns=['A', 'B']")]) + expected = df.reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) - # with a data column - _maybe_remove(store, 'df') - store.append('df', df, data_columns=['A']) - result = store.select('df', ['A > 0'], columns=['A', 'B']) - expected = df[df.A > 0].reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) + # with a data column + _maybe_remove(store, 'df') + store.append('df', df, data_columns=['A']) + result = store.select('df', ['A > 0'], columns=['A', 'B']) + expected = df[df.A > 0].reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) - # all a data columns - _maybe_remove(store, 'df') - store.append('df', df, data_columns=True) - result = store.select('df', ['A > 0'], columns=['A', 'B']) - expected = df[df.A > 0].reindex(columns=['A', 'B']) - tm.assert_frame_equal(expected, result) + # all a data columns + _maybe_remove(store, 'df') + store.append('df', df, data_columns=True) + result = store.select('df', ['A > 0'], columns=['A', 'B']) + expected = df[df.A > 0].reindex(columns=['A', 'B']) + tm.assert_frame_equal(expected, result) - # with a data column, but different columns - _maybe_remove(store, 'df') - store.append('df', df, data_columns=['A']) - result = store.select('df', ['A > 0'], columns=['C', 'D']) - expected = df[df.A > 0].reindex(columns=['C', 'D']) - tm.assert_frame_equal(expected, result) + # with a data column, but different columns + _maybe_remove(store, 'df') + store.append('df', df, data_columns=['A']) + result = store.select('df', ['A > 0'], columns=['C', 'D']) + expected = df[df.A > 0].reindex(columns=['C', 'D']) + tm.assert_frame_equal(expected, result) def test_select_dtypes(self): @@ -3193,7 +3099,7 @@ def test_select_dtypes(self): _maybe_remove(store, 'df') store.append('df', df, data_columns=['ts', 'A']) - result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")]) + result = store.select('df', "ts>=Timestamp('2012-02-01')") expected = df[df.ts >= Timestamp('2012-02-01')] tm.assert_frame_equal(expected, result) @@ -3208,15 +3114,15 @@ def test_select_dtypes(self): expected = (df[df.boolv == True] # noqa .reindex(columns=['A', 'boolv'])) for v in [True, 'true', 1]: - result = store.select('df', Term( - 'boolv == %s' % str(v)), columns=['A', 'boolv']) + result = store.select('df', 'boolv == %s' % str(v), + columns=['A', 'boolv']) tm.assert_frame_equal(expected, result) expected = (df[df.boolv == False] # noqa .reindex(columns=['A', 'boolv'])) for v in [False, 'false', 0]: - result = store.select('df', Term( - 'boolv == %s' % str(v)), columns=['A', 'boolv']) + result = store.select( + 'df', 'boolv == %s' % str(v), columns=['A', 'boolv']) tm.assert_frame_equal(expected, result) # integer index @@ -3224,7 +3130,7 @@ def test_select_dtypes(self): _maybe_remove(store, 'df_int') store.append('df_int', df) result = store.select( - 'df_int', [Term("index<10"), Term("columns=['A']")]) + 'df_int', "index<10 and columns=['A']") expected = df.reindex(index=list(df.index)[0:10], columns=['A']) tm.assert_frame_equal(expected, result) @@ -3234,7 +3140,7 @@ def test_select_dtypes(self): _maybe_remove(store, 'df_float') store.append('df_float', df) result = store.select( - 'df_float', [Term("index<10.0"), Term("columns=['A']")]) + 'df_float', "index<10.0 and columns=['A']") expected = df.reindex(index=list(df.index)[0:10], columns=['A']) tm.assert_frame_equal(expected, result) @@ -3305,14 +3211,14 @@ def test_select_with_many_inputs(self): store.append('df', df, data_columns=['ts', 'A', 'B', 'users']) # regular select - result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")]) + result = store.select('df', "ts>=Timestamp('2012-02-01')") expected = df[df.ts >= Timestamp('2012-02-01')] tm.assert_frame_equal(expected, result) # small selector result = store.select( - 'df', [Term("ts>=Timestamp('2012-02-01') & " - "users=['a','b','c']")]) + 'df', + "ts>=Timestamp('2012-02-01') & users=['a','b','c']") expected = df[(df.ts >= Timestamp('2012-02-01')) & df.users.isin(['a', 'b', 'c'])] tm.assert_frame_equal(expected, result) @@ -3320,24 +3226,24 @@ def test_select_with_many_inputs(self): # big selector along the columns selector = ['a', 'b', 'c'] + ['a%03d' % i for i in range(60)] result = store.select( - 'df', [Term("ts>=Timestamp('2012-02-01')"), - Term('users=selector')]) + 'df', + "ts>=Timestamp('2012-02-01') and users=selector") expected = df[(df.ts >= Timestamp('2012-02-01')) & df.users.isin(selector)] tm.assert_frame_equal(expected, result) selector = range(100, 200) - result = store.select('df', [Term('B=selector')]) + result = store.select('df', 'B=selector') expected = df[df.B.isin(selector)] tm.assert_frame_equal(expected, result) - self.assertEqual(len(result), 100) + assert len(result) == 100 # big selector along the index selector = Index(df.ts[0:100].values) - result = store.select('df', [Term('ts=selector')]) + result = store.select('df', 'ts=selector') expected = df[df.ts.isin(selector.values)] tm.assert_frame_equal(expected, result) - self.assertEqual(len(result), 100) + assert len(result) == 100 def test_select_iterator(self): @@ -3355,7 +3261,7 @@ def test_select_iterator(self): tm.assert_frame_equal(expected, result) results = [s for s in store.select('df', chunksize=100)] - self.assertEqual(len(results), 5) + assert len(results) == 5 result = concat(results) tm.assert_frame_equal(expected, result) @@ -3367,10 +3273,10 @@ def test_select_iterator(self): df = tm.makeTimeDataFrame(500) df.to_hdf(path, 'df_non_table') - self.assertRaises(TypeError, read_hdf, path, - 'df_non_table', chunksize=100) - self.assertRaises(TypeError, read_hdf, path, - 'df_non_table', iterator=True) + pytest.raises(TypeError, read_hdf, path, + 'df_non_table', chunksize=100) + pytest.raises(TypeError, read_hdf, path, + 'df_non_table', iterator=True) with ensure_clean_path(self.path) as path: @@ -3380,7 +3286,7 @@ def test_select_iterator(self): results = [s for s in read_hdf(path, 'df', chunksize=100)] result = concat(results) - self.assertEqual(len(results), 5) + assert len(results) == 5 tm.assert_frame_equal(result, df) tm.assert_frame_equal(result, read_hdf(path, 'df')) @@ -3405,17 +3311,6 @@ def test_select_iterator(self): result = concat(results) tm.assert_frame_equal(expected, result) - # where selection - # expected = store.select_as_multiple( - # ['df1', 'df2'], where= Term('A>0'), selector='df1') - # results = [] - # for s in store.select_as_multiple( - # ['df1', 'df2'], where= Term('A>0'), selector='df1', - # chunksize=25): - # results.append(s) - # result = concat(results) - # tm.assert_frame_equal(expected, result) - def test_select_iterator_complete_8014(self): # GH 8014 @@ -3544,7 +3439,7 @@ def test_select_iterator_non_complete_8014(self): where = "index > '%s'" % end_dt results = [s for s in store.select( 'df', where=where, chunksize=chunksize)] - self.assertEqual(0, len(results)) + assert 0 == len(results) def test_select_iterator_many_empty_frames(self): @@ -3576,7 +3471,7 @@ def test_select_iterator_many_empty_frames(self): results = [s for s in store.select( 'df', where=where, chunksize=chunksize)] - tm.assert_equal(1, len(results)) + assert len(results) == 1 result = concat(results) rexpected = expected[expected.index <= end_dt] tm.assert_frame_equal(rexpected, result) @@ -3587,7 +3482,7 @@ def test_select_iterator_many_empty_frames(self): 'df', where=where, chunksize=chunksize)] # should be 1, is 10 - tm.assert_equal(1, len(results)) + assert len(results) == 1 result = concat(results) rexpected = expected[(expected.index >= beg_dt) & (expected.index <= end_dt)] @@ -3605,7 +3500,7 @@ def test_select_iterator_many_empty_frames(self): 'df', where=where, chunksize=chunksize)] # should be [] - tm.assert_equal(0, len(results)) + assert len(results) == 0 def test_retain_index_attributes(self): @@ -3623,19 +3518,18 @@ def test_retain_index_attributes(self): for attr in ['freq', 'tz', 'name']: for idx in ['index', 'columns']: - self.assertEqual(getattr(getattr(df, idx), attr, None), - getattr(getattr(result, idx), attr, None)) + assert (getattr(getattr(df, idx), attr, None) == + getattr(getattr(result, idx), attr, None)) # try to append a table with a different frequency - with tm.assert_produces_warning( - expected_warning=AttributeConflictWarning): + with catch_warnings(record=True): df2 = DataFrame(dict( A=Series(lrange(3), index=date_range('2002-1-1', periods=3, freq='D')))) store.append('data', df2) - self.assertIsNone(store.get_storer('data').info['index']['freq']) + assert store.get_storer('data').info['index']['freq'] is None # this is ok _maybe_remove(store, 'df2') @@ -3652,9 +3546,8 @@ def test_retain_index_attributes(self): def test_retain_index_attributes2(self): with ensure_clean_path(self.path) as path: - expected_warning = Warning if PY35 else AttributeConflictWarning - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + + with catch_warnings(record=True): df = DataFrame(dict( A=Series(lrange(3), @@ -3672,37 +3565,41 @@ def test_retain_index_attributes2(self): df = DataFrame(dict(A=Series(lrange(3), index=idx))) df.to_hdf(path, 'data', mode='w', append=True) - self.assertEqual(read_hdf(path, 'data').index.name, 'foo') + assert read_hdf(path, 'data').index.name == 'foo' - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): idx2 = date_range('2001-1-1', periods=3, freq='H') idx2.name = 'bar' df2 = DataFrame(dict(A=Series(lrange(3), index=idx2))) df2.to_hdf(path, 'data', append=True) - self.assertIsNone(read_hdf(path, 'data').index.name) + assert read_hdf(path, 'data').index.name is None def test_panel_select(self): - wp = tm.makePanel() - with ensure_clean_store(self.path) as store: - store.put('wp', wp, format='table') - date = wp.major_axis[len(wp.major_axis) // 2] - crit1 = ('major_axis>=date') - crit2 = ("minor_axis=['A', 'D']") + with catch_warnings(record=True): - result = store.select('wp', [crit1, crit2]) - expected = wp.truncate(before=date).reindex(minor=['A', 'D']) - assert_panel_equal(result, expected) + wp = tm.makePanel() - result = store.select( - 'wp', ['major_axis>="20000124"', ("minor_axis=['A', 'B']")]) - expected = wp.truncate(before='20000124').reindex(minor=['A', 'B']) - assert_panel_equal(result, expected) + store.put('wp', wp, format='table') + date = wp.major_axis[len(wp.major_axis) // 2] + + crit1 = ('major_axis>=date') + crit2 = ("minor_axis=['A', 'D']") + + result = store.select('wp', [crit1, crit2]) + expected = wp.truncate(before=date).reindex(minor=['A', 'D']) + assert_panel_equal(result, expected) + + result = store.select( + 'wp', ['major_axis>="20000124"', + ("minor_axis=['A', 'B']")]) + expected = wp.truncate( + before='20000124').reindex(minor=['A', 'B']) + assert_panel_equal(result, expected) def test_frame_select(self): @@ -3713,7 +3610,7 @@ def test_frame_select(self): date = df.index[len(df) // 2] crit1 = Term('index>=date') - self.assertEqual(crit1.env.scope['date'], date) + assert crit1.env.scope['date'] == date crit2 = ("columns=['A', 'D']") crit3 = ('columns=A') @@ -3729,12 +3626,12 @@ def test_frame_select(self): # invalid terms df = tm.makeTimeDataFrame() store.append('df_time', df) - self.assertRaises( - ValueError, store.select, 'df_time', [Term("index>0")]) + pytest.raises( + ValueError, store.select, 'df_time', "index>0") # can't select if not written as table # store['frame'] = df - # self.assertRaises(ValueError, store.select, + # pytest.raises(ValueError, store.select, # 'frame', [crit1, crit2]) def test_frame_select_complex(self): @@ -3773,8 +3670,8 @@ def test_frame_select_complex(self): tm.assert_frame_equal(result, expected) # invert not implemented in numexpr :( - self.assertRaises(NotImplementedError, - store.select, 'df', '~(string="bar")') + pytest.raises(NotImplementedError, + store.select, 'df', '~(string="bar")') # invert ok for filters result = store.select('df', "~(columns=['A','B'])") @@ -3809,15 +3706,10 @@ def test_frame_select_complex2(self): hist.to_hdf(hh, 'df', mode='w', format='table') - expected = read_hdf(hh, 'df', where=Term('l1', '=', [2, 3, 4])) - - # list like - result = read_hdf(hh, 'df', where=Term( - 'l1', '=', selection.index.tolist())) - assert_frame_equal(result, expected) - l = selection.index.tolist() # noqa + expected = read_hdf(hh, 'df', where='l1=[2, 3, 4]') # sccope with list like + l = selection.index.tolist() # noqa store = HDFStore(hh) result = store.select('df', where='l1=l') assert_frame_equal(result, expected) @@ -3867,12 +3759,12 @@ def test_invalid_filtering(self): store.put('df', df, format='table') # not implemented - self.assertRaises(NotImplementedError, store.select, - 'df', "columns=['A'] | columns=['B']") + pytest.raises(NotImplementedError, store.select, + 'df', "columns=['A'] | columns=['B']") # in theory we could deal with this - self.assertRaises(NotImplementedError, store.select, - 'df', "columns=['A','B'] & columns=['C']") + pytest.raises(NotImplementedError, store.select, + 'df', "columns=['A','B'] & columns=['C']") def test_string_select(self): # GH 2973 @@ -3886,12 +3778,12 @@ def test_string_select(self): store.append('df', df, data_columns=['x']) - result = store.select('df', Term('x=none')) + result = store.select('df', 'x=none') expected = df[df.x == 'none'] assert_frame_equal(result, expected) try: - result = store.select('df', Term('x!=none')) + result = store.select('df', 'x!=none') expected = df[df.x != 'none'] assert_frame_equal(result, expected) except Exception as detail: @@ -3903,8 +3795,8 @@ def test_string_select(self): df2.loc[df2.x == '', 'x'] = np.nan store.append('df2', df2, data_columns=['x']) - result = store.select('df2', Term('x!=none')) - expected = df2[isnull(df2.x)] + result = store.select('df2', 'x!=none') + expected = df2[isna(df2.x)] assert_frame_equal(result, expected) # int ==/!= @@ -3913,11 +3805,11 @@ def test_string_select(self): store.append('df3', df, data_columns=['int']) - result = store.select('df3', Term('int=2')) + result = store.select('df3', 'int=2') expected = df[df.int == 2] assert_frame_equal(result, expected) - result = store.select('df3', Term('int!=2')) + result = store.select('df3', 'int!=2') expected = df[df.int != 2] assert_frame_equal(result, expected) @@ -3930,19 +3822,19 @@ def test_read_column(self): store.append('df', df) # error - self.assertRaises(KeyError, store.select_column, 'df', 'foo') + pytest.raises(KeyError, store.select_column, 'df', 'foo') def f(): store.select_column('df', 'index', where=['index>5']) - self.assertRaises(Exception, f) + pytest.raises(Exception, f) # valid result = store.select_column('df', 'index') tm.assert_almost_equal(result.values, Series(df.index).values) - self.assertIsInstance(result, Series) + assert isinstance(result, Series) # not a data indexable column - self.assertRaises( + pytest.raises( ValueError, store.select_column, 'df', 'values_block_0') # a data column @@ -4014,7 +3906,7 @@ def test_coordinates(self): result = store.select('df', where=c) expected = df.loc[3:4, :] tm.assert_frame_equal(result, expected) - self.assertIsInstance(c, Index) + assert isinstance(c, Index) # multiple tables _maybe_remove(store, 'df1') @@ -4052,14 +3944,14 @@ def test_coordinates(self): tm.assert_frame_equal(result, expected) # invalid - self.assertRaises(ValueError, store.select, 'df', - where=np.arange(len(df), dtype='float64')) - self.assertRaises(ValueError, store.select, 'df', - where=np.arange(len(df) + 1)) - self.assertRaises(ValueError, store.select, 'df', - where=np.arange(len(df)), start=5) - self.assertRaises(ValueError, store.select, 'df', - where=np.arange(len(df)), start=5, stop=10) + pytest.raises(ValueError, store.select, 'df', + where=np.arange(len(df), dtype='float64')) + pytest.raises(ValueError, store.select, 'df', + where=np.arange(len(df) + 1)) + pytest.raises(ValueError, store.select, 'df', + where=np.arange(len(df)), start=5) + pytest.raises(ValueError, store.select, 'df', + where=np.arange(len(df)), start=5, stop=10) # selection with filter selection = date_range('20000101', periods=500) @@ -4095,12 +3987,12 @@ def test_append_to_multiple(self): with ensure_clean_store(self.path) as store: # exceptions - self.assertRaises(ValueError, store.append_to_multiple, - {'df1': ['A', 'B'], 'df2': None}, df, - selector='df3') - self.assertRaises(ValueError, store.append_to_multiple, - {'df1': None, 'df2': None}, df, selector='df3') - self.assertRaises( + pytest.raises(ValueError, store.append_to_multiple, + {'df1': ['A', 'B'], 'df2': None}, df, + selector='df3') + pytest.raises(ValueError, store.append_to_multiple, + {'df1': None, 'df2': None}, df, selector='df3') + pytest.raises( ValueError, store.append_to_multiple, 'df1', df, 'df1') # regular operation @@ -4118,6 +4010,7 @@ def test_append_to_multiple_dropna(self): df = concat([df1, df2], axis=1) with ensure_clean_store(self.path) as store: + # dropna=True should guarantee rows are synchronized store.append_to_multiple( {'df1': ['A', 'B'], 'df2': None}, df, selector='df1', @@ -4128,14 +4021,27 @@ def test_append_to_multiple_dropna(self): tm.assert_index_equal(store.select('df1').index, store.select('df2').index) + @pytest.mark.xfail(run=False, + reason="append_to_multiple_dropna_false " + "is not raising as failed") + def test_append_to_multiple_dropna_false(self): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x) + df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan + df = concat([df1, df2], axis=1) + + with ensure_clean_store(self.path) as store: + # dropna=False shouldn't synchronize row indexes store.append_to_multiple( - {'df1': ['A', 'B'], 'df2': None}, df, selector='df1', + {'df1a': ['A', 'B'], 'df2a': None}, df, selector='df1a', dropna=False) - self.assertRaises( - ValueError, store.select_as_multiple, ['df1', 'df2']) - assert not store.select('df1').index.equals( - store.select('df2').index) + + with pytest.raises(ValueError): + store.select_as_multiple(['df1a', 'df2a']) + + assert not store.select('df1a').index.equals( + store.select('df2a').index) def test_select_as_multiple(self): @@ -4146,25 +4052,25 @@ def test_select_as_multiple(self): with ensure_clean_store(self.path) as store: # no tables stored - self.assertRaises(Exception, store.select_as_multiple, - None, where=['A>0', 'B>0'], selector='df1') + pytest.raises(Exception, store.select_as_multiple, + None, where=['A>0', 'B>0'], selector='df1') store.append('df1', df1, data_columns=['A', 'B']) store.append('df2', df2) # exceptions - self.assertRaises(Exception, store.select_as_multiple, - None, where=['A>0', 'B>0'], selector='df1') - self.assertRaises(Exception, store.select_as_multiple, - [None], where=['A>0', 'B>0'], selector='df1') - self.assertRaises(KeyError, store.select_as_multiple, - ['df1', 'df3'], where=['A>0', 'B>0'], - selector='df1') - self.assertRaises(KeyError, store.select_as_multiple, - ['df3'], where=['A>0', 'B>0'], selector='df1') - self.assertRaises(KeyError, store.select_as_multiple, - ['df1', 'df2'], where=['A>0', 'B>0'], - selector='df4') + pytest.raises(Exception, store.select_as_multiple, + None, where=['A>0', 'B>0'], selector='df1') + pytest.raises(Exception, store.select_as_multiple, + [None], where=['A>0', 'B>0'], selector='df1') + pytest.raises(KeyError, store.select_as_multiple, + ['df1', 'df3'], where=['A>0', 'B>0'], + selector='df1') + pytest.raises(KeyError, store.select_as_multiple, + ['df3'], where=['A>0', 'B>0'], selector='df1') + pytest.raises(KeyError, store.select_as_multiple, + ['df1', 'df2'], where=['A>0', 'B>0'], + selector='df4') # default select result = store.select('df1', ['A>0', 'B>0']) @@ -4183,25 +4089,24 @@ def test_select_as_multiple(self): tm.assert_frame_equal(result, expected) # multiple (diff selector) - result = store.select_as_multiple(['df1', 'df2'], where=[Term( - 'index>df2.index[4]')], selector='df2') + result = store.select_as_multiple( + ['df1', 'df2'], where='index>df2.index[4]', selector='df2') expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) # test excpection for diff rows store.append('df3', tm.makeTimeDataFrame(nper=50)) - self.assertRaises(ValueError, store.select_as_multiple, - ['df1', 'df3'], where=['A>0', 'B>0'], - selector='df1') - + pytest.raises(ValueError, store.select_as_multiple, + ['df1', 'df3'], where=['A>0', 'B>0'], + selector='df1') + + @pytest.mark.skipif( + LooseVersion(tables.__version__) < LooseVersion('3.1.0'), + reason=("tables version does not support fix for nan selection " + "bug: GH 4858")) def test_nan_selection_bug_4858(self): - # GH 4858; nan selection bug, only works for pytables >= 3.1 - if LooseVersion(tables.__version__) < '3.1.0': - pytest.skip('tables version does not support fix for nan ' - 'selection bug: GH 4858') - with ensure_clean_store(self.path) as store: df = DataFrame(dict(cols=range(6), values=range(6)), @@ -4226,17 +4131,32 @@ def test_start_stop_table(self): store.append('df', df) result = store.select( - 'df', [Term("columns=['A']")], start=0, stop=5) + 'df', "columns=['A']", start=0, stop=5) expected = df.loc[0:4, ['A']] tm.assert_frame_equal(result, expected) # out of range result = store.select( - 'df', [Term("columns=['A']")], start=30, stop=40) - self.assertTrue(len(result) == 0) + 'df', "columns=['A']", start=30, stop=40) + assert len(result) == 0 expected = df.loc[30:40, ['A']] tm.assert_frame_equal(result, expected) + def test_start_stop_multiple(self): + + # GH 16209 + with ensure_clean_store(self.path) as store: + + df = DataFrame({"foo": [1, 2], "bar": [1, 2]}) + + store.append_to_multiple({'selector': ['foo'], 'data': None}, df, + selector='selector') + result = store.select_as_multiple(['selector', 'data'], + selector='selector', start=0, + stop=1) + expected = df.loc[[0], ['foo', 'bar']] + tm.assert_frame_equal(result, expected) + def test_start_stop_fixed(self): with ensure_clean_store(self.path) as store: @@ -4280,7 +4200,7 @@ def test_start_stop_fixed(self): df.iloc[8:10, -2] = np.nan dfs = df.to_sparse() store.put('dfs', dfs) - with self.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): store.select('dfs', start=0, stop=5) def test_select_filter_corner(self): @@ -4292,14 +4212,70 @@ def test_select_filter_corner(self): with ensure_clean_store(self.path) as store: store.put('frame', df, format='table') - crit = Term('columns=df.columns[:75]') + crit = 'columns=df.columns[:75]' result = store.select('frame', [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75]]) - crit = Term('columns=df.columns[:75:2]') + crit = 'columns=df.columns[:75:2]' result = store.select('frame', [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]]) + def test_path_pathlib(self): + df = tm.makeDataFrame() + + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, 'df'), + lambda p: pd.read_hdf(p, 'df')) + tm.assert_frame_equal(df, result) + + @pytest.mark.parametrize('start, stop', [(0, 2), (1, 2), (None, None)]) + def test_contiguous_mixed_data_table(self, start, stop): + # GH 17021 + # ValueError when reading a contiguous mixed-data table ft. VLArray + df = DataFrame({'a': Series([20111010, 20111011, 20111012]), + 'b': Series(['ab', 'cd', 'ab'])}) + + with ensure_clean_store(self.path) as store: + store.append('test_dataset', df) + + result = store.select('test_dataset', start=start, stop=stop) + assert_frame_equal(df[start:stop], result) + + def test_path_pathlib_hdfstore(self): + df = tm.makeDataFrame() + + def writer(path): + with pd.HDFStore(path) as store: + df.to_hdf(store, 'df') + + def reader(path): + with pd.HDFStore(path) as store: + return pd.read_hdf(store, 'df') + + result = tm.round_trip_pathlib(writer, reader) + tm.assert_frame_equal(df, result) + + def test_pickle_path_localpath(self): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, 'df'), + lambda p: pd.read_hdf(p, 'df')) + tm.assert_frame_equal(df, result) + + def test_path_localpath_hdfstore(self): + df = tm.makeDataFrame() + + def writer(path): + with pd.HDFStore(path) as store: + df.to_hdf(store, 'df') + + def reader(path): + with pd.HDFStore(path) as store: + return pd.read_hdf(store, 'df') + + result = tm.round_trip_localpath(writer, reader) + tm.assert_frame_equal(df, result) + def _check_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} @@ -4333,11 +4309,11 @@ def _check_roundtrip_table(self, obj, comparator, compression=False): with ensure_clean_store(self.path, 'w', **options) as store: store.put('obj', obj, format='table') retrieved = store['obj'] - # sorted_obj = _test_sort(obj) + comparator(retrieved, obj) def test_multiple_open_close(self): - # GH 4409, open & close multiple times + # gh-4409: open & close multiple times with ensure_clean_path(self.path) as path: @@ -4346,11 +4322,12 @@ def test_multiple_open_close(self): # single store = HDFStore(path) - self.assertNotIn('CLOSED', str(store)) - self.assertTrue(store.is_open) + assert 'CLOSED' not in store.info() + assert store.is_open + store.close() - self.assertIn('CLOSED', str(store)) - self.assertFalse(store.is_open) + assert 'CLOSED' in store.info() + assert not store.is_open with ensure_clean_path(self.path) as path: @@ -4361,7 +4338,7 @@ def test_multiple_open_close(self): def f(): HDFStore(path) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) store1.close() else: @@ -4370,22 +4347,22 @@ def f(): store1 = HDFStore(path) store2 = HDFStore(path) - self.assertNotIn('CLOSED', str(store1)) - self.assertNotIn('CLOSED', str(store2)) - self.assertTrue(store1.is_open) - self.assertTrue(store2.is_open) + assert 'CLOSED' not in store1.info() + assert 'CLOSED' not in store2.info() + assert store1.is_open + assert store2.is_open store1.close() - self.assertIn('CLOSED', str(store1)) - self.assertFalse(store1.is_open) - self.assertNotIn('CLOSED', str(store2)) - self.assertTrue(store2.is_open) + assert 'CLOSED' in store1.info() + assert not store1.is_open + assert 'CLOSED' not in store2.info() + assert store2.is_open store2.close() - self.assertIn('CLOSED', str(store1)) - self.assertIn('CLOSED', str(store2)) - self.assertFalse(store1.is_open) - self.assertFalse(store2.is_open) + assert 'CLOSED' in store1.info() + assert 'CLOSED' in store2.info() + assert not store1.is_open + assert not store2.is_open # nested close store = HDFStore(path, mode='w') @@ -4394,12 +4371,12 @@ def f(): store2 = HDFStore(path) store2.append('df2', df) store2.close() - self.assertIn('CLOSED', str(store2)) - self.assertFalse(store2.is_open) + assert 'CLOSED' in store2.info() + assert not store2.is_open store.close() - self.assertIn('CLOSED', str(store)) - self.assertFalse(store.is_open) + assert 'CLOSED' in store.info() + assert not store.is_open # double closing store = HDFStore(path, mode='w') @@ -4407,12 +4384,12 @@ def f(): store2 = HDFStore(path) store.close() - self.assertIn('CLOSED', str(store)) - self.assertFalse(store.is_open) + assert 'CLOSED' in store.info() + assert not store.is_open store2.close() - self.assertIn('CLOSED', str(store2)) - self.assertFalse(store2.is_open) + assert 'CLOSED' in store2.info() + assert not store2.is_open # ops on a closed store with ensure_clean_path(self.path) as path: @@ -4423,21 +4400,21 @@ def f(): store = HDFStore(path) store.close() - self.assertRaises(ClosedFileError, store.keys) - self.assertRaises(ClosedFileError, lambda: 'df' in store) - self.assertRaises(ClosedFileError, lambda: len(store)) - self.assertRaises(ClosedFileError, lambda: store['df']) - self.assertRaises(ClosedFileError, lambda: store.df) - self.assertRaises(ClosedFileError, store.select, 'df') - self.assertRaises(ClosedFileError, store.get, 'df') - self.assertRaises(ClosedFileError, store.append, 'df2', df) - self.assertRaises(ClosedFileError, store.put, 'df3', df) - self.assertRaises(ClosedFileError, store.get_storer, 'df2') - self.assertRaises(ClosedFileError, store.remove, 'df2') + pytest.raises(ClosedFileError, store.keys) + pytest.raises(ClosedFileError, lambda: 'df' in store) + pytest.raises(ClosedFileError, lambda: len(store)) + pytest.raises(ClosedFileError, lambda: store['df']) + pytest.raises(AttributeError, lambda: store.df) + pytest.raises(ClosedFileError, store.select, 'df') + pytest.raises(ClosedFileError, store.get, 'df') + pytest.raises(ClosedFileError, store.append, 'df2', df) + pytest.raises(ClosedFileError, store.put, 'df3', df) + pytest.raises(ClosedFileError, store.get_storer, 'df2') + pytest.raises(ClosedFileError, store.remove, 'df2') def f(): store.select('df') - tm.assertRaisesRegexp(ClosedFileError, 'file is not open', f) + tm.assert_raises_regex(ClosedFileError, 'file is not open', f) def test_pytables_native_read(self): @@ -4445,87 +4422,48 @@ def test_pytables_native_read(self): tm.get_data_path('legacy_hdf/pytables_native.h5'), mode='r') as store: d2 = store['detector/readout'] - self.assertIsInstance(d2, DataFrame) + assert isinstance(d2, DataFrame) + @pytest.mark.skipif(PY35 and is_platform_windows(), + reason="native2 read fails oddly on windows / 3.5") def test_pytables_native2_read(self): - # fails on win/3.5 oddly - if PY35 and is_platform_windows(): - pytest.skip("native2 read fails oddly on windows / 3.5") - with ensure_clean_store( tm.get_data_path('legacy_hdf/pytables_native2.h5'), mode='r') as store: str(store) d1 = store['detector'] - self.assertIsInstance(d1, DataFrame) - - def test_legacy_read(self): - with ensure_clean_store( - tm.get_data_path('legacy_hdf/legacy.h5'), - mode='r') as store: - store['a'] - store['b'] - store['c'] - store['d'] + assert isinstance(d1, DataFrame) def test_legacy_table_read(self): # legacy table types with ensure_clean_store( tm.get_data_path('legacy_hdf/legacy_table.h5'), mode='r') as store: - store.select('df1') - store.select('df2') - store.select('wp1') - # force the frame - store.select('df2', typ='legacy_frame') + with catch_warnings(record=True): + store.select('df1') + store.select('df2') + store.select('wp1') + + # force the frame + store.select('df2', typ='legacy_frame') - # old version warning - with tm.assert_produces_warning( - expected_warning=IncompatibilityWarning): - self.assertRaises( - Exception, store.select, 'wp1', Term('minor_axis=B')) + # old version warning + pytest.raises( + Exception, store.select, 'wp1', 'minor_axis=B') df2 = store.select('df2') - result = store.select('df2', Term('index>df2.index[2]')) + result = store.select('df2', 'index>df2.index[2]') expected = df2[df2.index > df2.index[2]] assert_frame_equal(expected, result) - def test_legacy_0_10_read(self): - # legacy from 0.10 - with compat_assert_produces_warning(FutureWarning): - path = tm.get_data_path('legacy_hdf/legacy_0.10.h5') - with ensure_clean_store(path, mode='r') as store: - str(store) - for k in store.keys(): - store.select(k) - - def test_legacy_0_11_read(self): - # legacy from 0.11 - path = os.path.join('legacy_hdf', 'legacy_table_0.11.h5') - with ensure_clean_store(tm.get_data_path(path), mode='r') as store: - str(store) - assert 'df' in store - assert 'df1' in store - assert 'mi' in store - df = store.select('df') - df1 = store.select('df1') - mi = store.select('mi') - assert isinstance(df, DataFrame) - assert isinstance(df1, DataFrame) - assert isinstance(mi, DataFrame) - def test_copy(self): - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): - def do_copy(f=None, new_f=None, keys=None, + def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): try: - if f is None: - f = tm.get_data_path(os.path.join('legacy_hdf', - 'legacy_0.10.h5')) - store = HDFStore(f, 'r') if new_f is None: @@ -4538,7 +4476,7 @@ def do_copy(f=None, new_f=None, keys=None, # check keys if keys is None: keys = store.keys() - self.assertEqual(set(keys), set(tstore.keys())) + assert set(keys) == set(tstore.keys()) # check indicies & nrows for k in tstore.keys(): @@ -4546,14 +4484,13 @@ def do_copy(f=None, new_f=None, keys=None, new_t = tstore.get_storer(k) orig_t = store.get_storer(k) - self.assertEqual(orig_t.nrows, new_t.nrows) + assert orig_t.nrows == new_t.nrows # check propindixes if propindexes: for a in orig_t.axes: if a.is_indexed: - self.assertTrue( - new_t[a.name].is_indexed) + assert new_t[a.name].is_indexed finally: safe_close(store) @@ -4564,10 +4501,6 @@ def do_copy(f=None, new_f=None, keys=None, pass safe_remove(new_f) - do_copy() - do_copy(keys=['/a', '/b', '/df1_mixed']) - do_copy(propindexes=False) - # new table df = tm.makeDataFrame() @@ -4581,37 +4514,13 @@ def do_copy(f=None, new_f=None, keys=None, finally: safe_remove(path) - def test_legacy_table_write(self): - pytest.skip("cannot write legacy tables") - - store = HDFStore(tm.get_data_path( - 'legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a') - - df = tm.makeDataFrame() - wp = tm.makePanel() - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['foo', 'bar']) - df = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) - store.append('mi', df) - - df = DataFrame(dict(A='foo', B='bar'), index=lrange(10)) - store.append('df', df, data_columns=['B'], min_itemsize={'A': 200}) - store.append('wp', wp) - - store.close() - def test_store_datetime_fractional_secs(self): with ensure_clean_store(self.path) as store: dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456) series = Series([0], [dt]) store['a'] = series - self.assertEqual(store['a'].index[0], dt) + assert store['a'].index[0] == dt def test_tseries_indices_series(self): @@ -4621,18 +4530,18 @@ def test_tseries_indices_series(self): store['a'] = ser result = store['a'] - assert_series_equal(result, ser) - self.assertEqual(type(result.index), type(ser.index)) - self.assertEqual(result.index.freq, ser.index.freq) + tm.assert_series_equal(result, ser) + assert result.index.freq == ser.index.freq + tm.assert_class_equal(result.index, ser.index, obj="series index") idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) store['a'] = ser result = store['a'] - assert_series_equal(result, ser) - self.assertEqual(type(result.index), type(ser.index)) - self.assertEqual(result.index.freq, ser.index.freq) + tm.assert_series_equal(result, ser) + assert result.index.freq == ser.index.freq + tm.assert_class_equal(result.index, ser.index, obj="series index") def test_tseries_indices_frame(self): @@ -4643,8 +4552,9 @@ def test_tseries_indices_frame(self): result = store['a'] assert_frame_equal(result, df) - self.assertEqual(type(result.index), type(df.index)) - self.assertEqual(result.index.freq, df.index.freq) + assert result.index.freq == df.index.freq + tm.assert_class_equal(result.index, df.index, + obj="dataframe index") idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) @@ -4652,14 +4562,16 @@ def test_tseries_indices_frame(self): result = store['a'] assert_frame_equal(result, df) - self.assertEqual(type(result.index), type(df.index)) - self.assertEqual(result.index.freq, df.index.freq) + assert result.index.freq == df.index.freq + tm.assert_class_equal(result.index, df.index, + obj="dataframe index") def test_unicode_index(self): unicode_values = [u('\u03c3'), u('\u03c3\u03c3')] - with compat_assert_produces_warning(PerformanceWarning): + # PerformanceWarning + with catch_warnings(record=True): s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) @@ -4692,7 +4604,7 @@ def test_store_datetime_mixed(self): # index=[np.arange(5).repeat(2), # np.tile(np.arange(2), 5)]) - # self.assertRaises(Exception, store.put, 'foo', df, format='table') + # pytest.raises(Exception, store.put, 'foo', df, format='table') def test_append_with_diff_col_name_types_raises_value_error(self): df = DataFrame(np.random.randn(10, 1)) @@ -4706,7 +4618,7 @@ def test_append_with_diff_col_name_types_raises_value_error(self): store.append(name, df) for d in (df2, df3, df4, df5): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): store.append(name, d) def test_query_with_nested_special_character(self): @@ -4723,7 +4635,7 @@ def test_categorical(self): with ensure_clean_store(self.path) as store: - # basic + # Basic _maybe_remove(store, 's') s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[ 'a', 'b', 'c', 'd'], ordered=False)) @@ -4739,12 +4651,13 @@ def test_categorical(self): tm.assert_series_equal(s, result) _maybe_remove(store, 'df') + df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]}) store.append('df', df, format='table') result = store.select('df') tm.assert_frame_equal(result, df) - # dtypes + # Dtypes s = Series([1, 1, 2, 2, 3, 4, 5]).astype('category') store.append('si', s) result = store.select('si') @@ -4755,17 +4668,18 @@ def test_categorical(self): result = store.select('si2') tm.assert_series_equal(result, s) - # multiple + # Multiple df2 = df.copy() df2['s2'] = Series(list('abcdefg')).astype('category') store.append('df2', df2) result = store.select('df2') tm.assert_frame_equal(result, df2) - # make sure the metadata is ok - self.assertTrue('/df2 ' in str(store)) - self.assertTrue('/df2/meta/values_block_0/meta' in str(store)) - self.assertTrue('/df2/meta/values_block_1/meta' in str(store)) + # Make sure the metadata is OK + info = store.info() + assert '/df2 ' in info + # assert '/df2/meta/values_block_0/meta' in info + assert '/df2/meta/values_block_1/meta' in info # unordered s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[ @@ -4774,7 +4688,7 @@ def test_categorical(self): result = store.select('s2') tm.assert_series_equal(result, s) - # query + # Query store.append('df3', df, data_columns=['s']) expected = df[df.s.isin(['b', 'c'])] result = store.select('df3', where=['s in ["b","c"]']) @@ -4792,7 +4706,7 @@ def test_categorical(self): result = store.select('df3', where=['s in ["f"]']) tm.assert_frame_equal(result, expected) - # appending with same categories is ok + # Appending with same categories is ok store.append('df3', df) df = concat([df, df]) @@ -4800,20 +4714,21 @@ def test_categorical(self): result = store.select('df3', where=['s in ["b","c"]']) tm.assert_frame_equal(result, expected) - # appending must have the same categories + # Appending must have the same categories df3 = df.copy() df3['s'].cat.remove_unused_categories(inplace=True) - self.assertRaises(ValueError, lambda: store.append('df3', df3)) + with pytest.raises(ValueError): + store.append('df3', df3) - # remove - # make sure meta data is removed (its a recursive removal so should - # be) + # Remove, and make sure meta data is removed (its a recursive + # removal so should be). result = store.select('df3/meta/s/meta') - self.assertIsNotNone(result) + assert result is not None store.remove('df3') - self.assertRaises( - KeyError, lambda: store.select('df3/meta/s/meta')) + + with pytest.raises(KeyError): + store.select('df3/meta/s/meta') def test_categorical_conversion(self): @@ -4845,19 +4760,38 @@ def test_categorical_conversion(self): result = read_hdf(path, 'df', where='obsids=B') tm.assert_frame_equal(result, expected) + def test_categorical_nan_only_columns(self): + # GH18413 + # Check that read_hdf with categorical columns with NaN-only values can + # be read back. + df = pd.DataFrame({ + 'a': ['a', 'b', 'c', np.nan], + 'b': [np.nan, np.nan, np.nan, np.nan], + 'c': [1, 2, 3, 4], + 'd': pd.Series([None] * 4, dtype=object) + }) + df['a'] = df.a.astype('category') + df['b'] = df.b.astype('category') + df['d'] = df.b.astype('category') + expected = df + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table', data_columns=True) + result = read_hdf(path, 'df') + tm.assert_frame_equal(result, expected) + def test_duplicate_column_name(self): df = DataFrame(columns=["a", "a"], data=[[0, 0]]) with ensure_clean_path(self.path) as path: - self.assertRaises(ValueError, df.to_hdf, - path, 'df', format='fixed') + pytest.raises(ValueError, df.to_hdf, + path, 'df', format='fixed') df.to_hdf(path, 'df', format='table') other = read_hdf(path, 'df') tm.assert_frame_equal(df, other) - self.assertTrue(df.equals(other)) - self.assertTrue(other.equals(df)) + assert df.equals(other) + assert other.equals(df) def test_round_trip_equals(self): # GH 9330 @@ -4867,8 +4801,8 @@ def test_round_trip_equals(self): df.to_hdf(path, 'df', format='table') other = read_hdf(path, 'df') tm.assert_frame_equal(df, other) - self.assertTrue(df.equals(other)) - self.assertTrue(other.equals(df)) + assert df.equals(other) + assert other.equals(df) def test_preserve_timedeltaindex_type(self): # GH9635 @@ -4883,7 +4817,7 @@ def test_preserve_timedeltaindex_type(self): store['df'] = df assert_frame_equal(store['df'], df) - def test_colums_multiindex_modified(self): + def test_columns_multiindex_modified(self): # BUG: 7212 # read_hdf store.select modified the passed columns parameters # when multi-indexed. @@ -4904,7 +4838,7 @@ def test_colums_multiindex_modified(self): cols2load = list('BCD') cols2load_original = list(cols2load) df_loaded = read_hdf(path, 'df', columns=cols2load) # noqa - self.assertTrue(cols2load_original == cols2load) + assert cols2load_original == cols2load def test_to_hdf_with_object_column_names(self): # GH9057 @@ -4924,18 +4858,21 @@ def test_to_hdf_with_object_column_names(self): for index in types_should_fail: df = DataFrame(np.random.randn(10, 2), columns=index(2)) with ensure_clean_path(self.path) as path: - with self.assertRaises( + with catch_warnings(record=True): + with pytest.raises( ValueError, msg=("cannot have non-object label " "DataIndexableCol")): - df.to_hdf(path, 'df', format='table', data_columns=True) + df.to_hdf(path, 'df', format='table', + data_columns=True) for index in types_should_run: df = DataFrame(np.random.randn(10, 2), columns=index(2)) with ensure_clean_path(self.path) as path: - df.to_hdf(path, 'df', format='table', data_columns=True) - result = pd.read_hdf( - path, 'df', where="index = [{0}]".format(df.index[0])) - assert(len(result)) + with catch_warnings(record=True): + df.to_hdf(path, 'df', format='table', data_columns=True) + result = pd.read_hdf( + path, 'df', where="index = [{0}]".format(df.index[0])) + assert(len(result)) def test_read_hdf_open_store(self): # GH10330 @@ -4952,7 +4889,7 @@ def test_read_hdf_open_store(self): store = HDFStore(path, mode='r') indirect = read_hdf(store, 'df') tm.assert_frame_equal(direct, indirect) - self.assertTrue(store.is_open) + assert store.is_open store.close() def test_read_hdf_iterator(self): @@ -4966,7 +4903,7 @@ def test_read_hdf_iterator(self): df.to_hdf(path, 'df', mode='w', format='t') direct = read_hdf(path, 'df') iterator = read_hdf(path, 'df', iterator=True) - self.assertTrue(isinstance(iterator, TableIterator)) + assert isinstance(iterator, TableIterator) indirect = next(iterator.__iter__()) tm.assert_frame_equal(direct, indirect) iterator.store.close() @@ -4977,21 +4914,22 @@ def test_read_hdf_errors(self): columns=list('ABCDE')) with ensure_clean_path(self.path) as path: - self.assertRaises(IOError, read_hdf, path, 'key') + pytest.raises(IOError, read_hdf, path, 'key') df.to_hdf(path, 'df') store = HDFStore(path, mode='r') store.close() - self.assertRaises(IOError, read_hdf, store, 'df') - with open(path, mode='r') as store: - self.assertRaises(NotImplementedError, read_hdf, store, 'df') + pytest.raises(IOError, read_hdf, store, 'df') + + def test_read_hdf_generic_buffer_errors(self): + pytest.raises(NotImplementedError, read_hdf, BytesIO(b''), 'df') def test_invalid_complib(self): df = DataFrame(np.random.rand(4, 5), index=list('abcd'), columns=list('ABCDE')) with ensure_clean_path(self.path) as path: - self.assertRaises(ValueError, df.to_hdf, path, - 'df', complib='blosc:zlib') + with pytest.raises(ValueError): + df.to_hdf(path, 'df', complib='foolib') # GH10443 def test_read_nokey(self): @@ -5006,7 +4944,7 @@ def test_read_nokey(self): reread = read_hdf(path) assert_frame_equal(df, reread) df.to_hdf(path, 'df2', mode='a') - self.assertRaises(ValueError, read_hdf, path) + pytest.raises(ValueError, read_hdf, path) def test_read_nokey_table(self): # GH13231 @@ -5018,19 +4956,18 @@ def test_read_nokey_table(self): reread = read_hdf(path) assert_frame_equal(df, reread) df.to_hdf(path, 'df2', mode='a', format='table') - self.assertRaises(ValueError, read_hdf, path) + pytest.raises(ValueError, read_hdf, path) def test_read_nokey_empty(self): with ensure_clean_path(self.path) as path: store = HDFStore(path) store.close() - self.assertRaises(ValueError, read_hdf, path) + pytest.raises(ValueError, read_hdf, path) + @td.skip_if_no('pathlib') def test_read_from_pathlib_path(self): # GH11773 - tm._skip_if_no_pathlib() - from pathlib import Path expected = DataFrame(np.random.rand(4, 5), @@ -5044,11 +4981,10 @@ def test_read_from_pathlib_path(self): tm.assert_frame_equal(expected, actual) + @td.skip_if_no('py.path') def test_read_from_py_localpath(self): # GH11773 - tm._skip_if_no_localpath() - from py.path import local as LocalPath expected = DataFrame(np.random.rand(4, 5), @@ -5073,7 +5009,7 @@ def test_query_long_float_literal(self): cutoff = 1000000000.0006 result = store.select('test', "A < %.4f" % cutoff) - self.assertTrue(result.empty) + assert result.empty cutoff = 1000000000.0010 result = store.select('test', "A > %.4f" % cutoff) @@ -5085,6 +5021,88 @@ def test_query_long_float_literal(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) + def test_query_compare_column_type(self): + # GH 15492 + df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'], + 'real_date': date_range('2014-01-01', periods=2), + 'float': [1.1, 1.2], + 'int': [1, 2]}, + columns=['date', 'real_date', 'float', 'int']) + + with ensure_clean_store(self.path) as store: + store.append('test', df, format='table', data_columns=True) + + ts = pd.Timestamp('2014-01-01') # noqa + result = store.select('test', where='real_date > ts') + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + for op in ['<', '>', '==']: + # non strings to string column always fail + for v in [2.1, True, pd.Timestamp('2014-01-01'), + pd.Timedelta(1, 's')]: + query = 'date {op} v'.format(op=op) + with pytest.raises(TypeError): + result = store.select('test', where=query) + + # strings to other columns must be convertible to type + v = 'a' + for col in ['int', 'float', 'real_date']: + query = '{col} {op} v'.format(op=op, col=col) + with pytest.raises(ValueError): + result = store.select('test', where=query) + + for v, col in zip(['1', '1.1', '2014-01-01'], + ['int', 'float', 'real_date']): + query = '{col} {op} v'.format(op=op, col=col) + result = store.select('test', where=query) + + if op == '==': + expected = df.loc[[0], :] + elif op == '>': + expected = df.loc[[1], :] + else: + expected = df.loc[[], :] + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize('format', ['fixed', 'table']) + def test_read_hdf_series_mode_r(self, format): + # GH 16583 + # Tests that reading a Series saved to an HDF file + # still works if a mode='r' argument is supplied + series = tm.makeFloatSeries() + with ensure_clean_path(self.path) as path: + series.to_hdf(path, key='data', format=format) + result = pd.read_hdf(path, key='data', mode='r') + tm.assert_series_equal(result, series) + + @pytest.mark.skipif(not PY36, reason="Need python 3.6") + def test_fspath(self): + with tm.ensure_clean('foo.h5') as path: + with pd.HDFStore(path) as store: + assert os.fspath(store) == str(path) + + def test_read_py2_hdf_file_in_py3(self): + # GH 16781 + + # tests reading a PeriodIndex DataFrame written in Python2 in Python3 + + # the file was generated in Python 2.7 like so: + # + # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex( + # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) + # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p') + + expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex( + ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) + + with ensure_clean_store( + tm.get_data_path( + 'legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5'), + mode='r') as store: + result = store['p'] + assert_frame_equal(result, expected) + class TestHDFComplexValues(Base): # GH10447 @@ -5156,7 +5174,7 @@ def test_complex_mixed_table(self): with ensure_clean_store(self.path) as store: store.append('df', df, data_columns=['A', 'B']) - result = store.select('df', where=Term('A>2')) + result = store.select('df', where='A>2') assert_frame_equal(df.loc[df.A > 2], result) with ensure_clean_path(self.path) as path: @@ -5165,32 +5183,32 @@ def test_complex_mixed_table(self): assert_frame_equal(df, reread) def test_complex_across_dimensions_fixed(self): - complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) - s = Series(complex128, index=list('abcd')) - df = DataFrame({'A': s, 'B': s}) - p = Panel({'One': df, 'Two': df}) + with catch_warnings(record=True): + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list('abcd')) + df = DataFrame({'A': s, 'B': s}) + p = Panel({'One': df, 'Two': df}) - objs = [s, df, p] - comps = [tm.assert_series_equal, tm.assert_frame_equal, - tm.assert_panel_equal] - for obj, comp in zip(objs, comps): - with ensure_clean_path(self.path) as path: - obj.to_hdf(path, 'obj', format='fixed') - reread = read_hdf(path, 'obj') - comp(obj, reread) + objs = [s, df, p] + comps = [tm.assert_series_equal, tm.assert_frame_equal, + tm.assert_panel_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(self.path) as path: + obj.to_hdf(path, 'obj', format='fixed') + reread = read_hdf(path, 'obj') + comp(obj, reread) def test_complex_across_dimensions(self): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list('abcd')) df = DataFrame({'A': s, 'B': s}) - p = Panel({'One': df, 'Two': df}) - with compat_assert_produces_warning(FutureWarning): - p4d = pd.Panel4D({'i': p, 'ii': p}) + with catch_warnings(record=True): + p = Panel({'One': df, 'Two': df}) - objs = [df, p, p4d] - comps = [tm.assert_frame_equal, tm.assert_panel_equal, - tm.assert_panel4d_equal] + objs = [df, p] + comps = [tm.assert_frame_equal, tm.assert_panel_equal] for obj, comp in zip(objs, comps): with ensure_clean_path(self.path) as path: obj.to_hdf(path, 'obj', format='table') @@ -5205,15 +5223,15 @@ def test_complex_indexing_error(self): 'C': complex128}, index=list('abcd')) with ensure_clean_store(self.path) as store: - self.assertRaises(TypeError, store.append, - 'df', df, data_columns=['C']) + pytest.raises(TypeError, store.append, + 'df', df, data_columns=['C']) def test_complex_series_error(self): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list('abcd')) with ensure_clean_path(self.path) as path: - self.assertRaises(TypeError, s.to_hdf, path, 'obj', format='t') + pytest.raises(TypeError, s.to_hdf, path, 'obj', format='t') with ensure_clean_path(self.path) as path: s.to_hdf(path, 'obj', format='t', index=False) @@ -5231,7 +5249,7 @@ def test_complex_append(self): assert_frame_equal(pd.concat([df, df], 0), result) -class TestTimezones(Base, tm.TestCase): +class TestTimezones(Base): def _compare_with_tz(self, a, b): tm.assert_frame_equal(a, b) @@ -5243,16 +5261,15 @@ def _compare_with_tz(self, a, b): b_e = b.loc[i, c] if not (a_e == b_e and a_e.tz == b_e.tz): raise AssertionError( - "invalid tz comparsion [%s] [%s]" % (a_e, b_e)) + "invalid tz comparison [%s] [%s]" % (a_e, b_e)) def test_append_with_timezones_dateutil(self): from datetime import timedelta - tm._skip_if_no_dateutil() # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows # filename issues. - from pandas.tslib import maybe_get_tz + from pandas._libs.tslibs.timezones import maybe_get_tz gettz = lambda x: maybe_get_tz('dateutil/' + x) # as columns @@ -5269,7 +5286,7 @@ def test_append_with_timezones_dateutil(self): # select with tz aware expected = df[df.A >= df.A[3]] - result = store.select('df_tz', where=Term('A>=df.A[3]')) + result = store.select('df_tz', where='A>=df.A[3]') self._compare_with_tz(result, expected) # ensure we include dates in DST and STD time here. @@ -5288,7 +5305,7 @@ def test_append_with_timezones_dateutil(self): tz=gettz('US/Eastern')), B=Timestamp('20130102', tz=gettz('EET'))), index=range(5)) - self.assertRaises(ValueError, store.append, 'df_tz', df) + pytest.raises(ValueError, store.append, 'df_tz', df) # this is ok _maybe_remove(store, 'df_tz') @@ -5302,7 +5319,7 @@ def test_append_with_timezones_dateutil(self): tz=gettz('US/Eastern')), B=Timestamp('20130102', tz=gettz('CET'))), index=range(5)) - self.assertRaises(ValueError, store.append, 'df_tz', df) + pytest.raises(ValueError, store.append, 'df_tz', df) # as index with ensure_clean_store(self.path) as store: @@ -5340,7 +5357,7 @@ def test_append_with_timezones_pytz(self): # select with tz aware self._compare_with_tz(store.select( - 'df_tz', where=Term('A>=df.A[3]')), df[df.A >= df.A[3]]) + 'df_tz', where='A>=df.A[3]'), df[df.A >= df.A[3]]) _maybe_remove(store, 'df_tz') # ensure we include dates in DST and STD time here. @@ -5355,7 +5372,7 @@ def test_append_with_timezones_pytz(self): df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'), B=Timestamp('20130102', tz='EET')), index=range(5)) - self.assertRaises(ValueError, store.append, 'df_tz', df) + pytest.raises(ValueError, store.append, 'df_tz', df) # this is ok _maybe_remove(store, 'df_tz') @@ -5368,7 +5385,7 @@ def test_append_with_timezones_pytz(self): df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'), B=Timestamp('20130102', tz='CET')), index=range(5)) - self.assertRaises(ValueError, store.append, 'df_tz', df) + pytest.raises(ValueError, store.append, 'df_tz', df) # as index with ensure_clean_store(self.path) as store: @@ -5399,7 +5416,7 @@ def test_tseries_select_index_column(self): with ensure_clean_store(self.path) as store: store.append('frame', frame) result = store.select_column('frame', 'index') - self.assertEqual(rng.tz, DatetimeIndex(result.values).tz) + assert rng.tz == DatetimeIndex(result.values).tz # check utc rng = date_range('1/1/2000', '1/30/2000', tz='UTC') @@ -5408,7 +5425,7 @@ def test_tseries_select_index_column(self): with ensure_clean_store(self.path) as store: store.append('frame', frame) result = store.select_column('frame', 'index') - self.assertEqual(rng.tz, result.dt.tz) + assert rng.tz == result.dt.tz # double check non-utc rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern') @@ -5417,7 +5434,7 @@ def test_tseries_select_index_column(self): with ensure_clean_store(self.path) as store: store.append('frame', frame) result = store.select_column('frame', 'index') - self.assertEqual(rng.tz, result.dt.tz) + assert rng.tz == result.dt.tz def test_timezones_fixed(self): with ensure_clean_store(self.path) as store: @@ -5447,9 +5464,10 @@ def test_fixed_offset_tz(self): with ensure_clean_store(self.path) as store: store['frame'] = frame recons = store['frame'] - self.assert_index_equal(recons.index, rng) - self.assertEqual(rng.tz, recons.index.tz) + tm.assert_index_equal(recons.index, rng) + assert rng.tz == recons.index.tz + @td.skip_if_windows def test_store_timezone(self): # GH2852 # issue storing datetime.date with a timezone as it resets when read @@ -5503,12 +5521,3 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) - - -def _test_sort(obj): - if isinstance(obj, DataFrame): - return obj.reindex(sorted(obj.index)) - elif isinstance(obj, Panel): - return obj.reindex(major=sorted(obj.major_axis)) - else: - raise ValueError('type not supported here') diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py new file mode 100644 index 0000000000000..7a3062f470ce8 --- /dev/null +++ b/pandas/tests/io/test_s3.py @@ -0,0 +1,8 @@ +from pandas.io.common import is_s3_url + + +class TestS3URL(object): + + def test_is_s3_url(self): + assert is_s3_url("s3://pandas/somethingelse.com") + assert not is_s3_url("s4://pandas/somethingelse.com") diff --git a/pandas/io/tests/test_sql.py b/pandas/tests/io/test_sql.py similarity index 80% rename from pandas/io/tests/test_sql.py rename to pandas/tests/io/test_sql.py index a6f4d96001021..4530cc9d2fba9 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -18,26 +18,26 @@ """ from __future__ import print_function -import unittest +from warnings import catch_warnings +import pytest import sqlite3 import csv import os -import sys -import pytest import warnings import numpy as np import pandas as pd from datetime import datetime, date, time -from pandas.types.common import (is_object_dtype, is_datetime64_dtype, - is_datetime64tz_dtype) -from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat +from pandas.core.dtypes.common import ( + is_object_dtype, is_datetime64_dtype, + is_datetime64tz_dtype) +from pandas import DataFrame, Series, Index, MultiIndex, isna, concat from pandas import date_range, to_datetime, to_timedelta, Timestamp import pandas.compat as compat -from pandas.compat import StringIO, range, lrange, string_types, PY36 -from pandas.tseries.tools import format as date_format +from pandas.compat import range, lrange, string_types, PY36 +from pandas.core.tools.datetimes import format as date_format import pandas.io.sql as sql from pandas.io.sql import read_sql_table, read_sql_query @@ -88,6 +88,7 @@ "TextCol" TEXT, "DateCol" TEXT, "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, "FloatCol" REAL, "IntCol" INTEGER, "BoolCol" INTEGER, @@ -98,6 +99,7 @@ `TextCol` TEXT, `DateCol` DATETIME, `IntDateCol` INTEGER, + `IntDateOnlyCol` INTEGER, `FloatCol` DOUBLE, `IntCol` INTEGER, `BoolCol` BOOLEAN, @@ -109,6 +111,7 @@ "DateCol" TIMESTAMP, "DateColWithTz" TIMESTAMP WITH TIME ZONE, "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, "FloatCol" DOUBLE PRECISION, "IntCol" INTEGER, "BoolCol" BOOLEAN, @@ -120,31 +123,33 @@ 'sqlite': { 'query': """ INSERT INTO types_test_data - VALUES(?, ?, ?, ?, ?, ?, ?, ?) + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) """, 'fields': ( - 'TextCol', 'DateCol', 'IntDateCol', 'FloatCol', - 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' + 'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol', + 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', + 'BoolColWithNull' ) }, 'mysql': { 'query': """ INSERT INTO types_test_data - VALUES("%s", %s, %s, %s, %s, %s, %s, %s) + VALUES("%s", %s, %s, %s, %s, %s, %s, %s, %s) """, 'fields': ( - 'TextCol', 'DateCol', 'IntDateCol', 'FloatCol', - 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' + 'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol', + 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', + 'BoolColWithNull' ) }, 'postgresql': { 'query': """ INSERT INTO types_test_data - VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s) + VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, 'fields': ( 'TextCol', 'DateCol', 'DateColWithTz', - 'IntDateCol', 'FloatCol', + 'IntDateCol', 'IntDateOnlyCol', 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' ) }, @@ -178,7 +183,7 @@ class MixInBase(object): - def tearDown(self): + def teardown_method(self, method): for tbl in self._get_all_tables(): self.drop_table(tbl) self._close_conn() @@ -271,8 +276,7 @@ def _check_iris_loaded_frame(self, iris_frame): pytype = iris_frame.dtypes[0].type row = iris_frame.iloc[0] - self.assertTrue( - issubclass(pytype, np.floating), 'Loaded frame has incorrect type') + assert issubclass(pytype, np.floating) tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def _load_test1_data(self): @@ -314,13 +318,13 @@ def _load_raw_sql(self): self.drop_table('types_test_data') self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor]) ins = SQL_STRINGS['insert_test_types'][self.flavor] - data = [ { 'TextCol': 'first', 'DateCol': '2000-01-03 00:00:00', 'DateColWithTz': '2000-01-01 00:00:00-08:00', 'IntDateCol': 535852800, + 'IntDateOnlyCol': 20101010, 'FloatCol': 10.10, 'IntCol': 1, 'BoolCol': False, @@ -332,6 +336,7 @@ def _load_raw_sql(self): 'DateCol': '2000-01-04 00:00:00', 'DateColWithTz': '2000-06-01 00:00:00-07:00', 'IntDateCol': 1356998400, + 'IntDateOnlyCol': 20101212, 'FloatCol': 10.10, 'IntCol': 1, 'BoolCol': False, @@ -371,8 +376,7 @@ def _to_sql(self): self.drop_table('test_frame1') self.pandasSQL.to_sql(self.test_frame1, 'test_frame1') - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') + assert self.pandasSQL.has_table('test_frame1') # Nuke table self.drop_table('test_frame1') @@ -386,11 +390,10 @@ def _to_sql_fail(self): self.pandasSQL.to_sql( self.test_frame1, 'test_frame1', if_exists='fail') - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') + assert self.pandasSQL.has_table('test_frame1') - self.assertRaises(ValueError, self.pandasSQL.to_sql, - self.test_frame1, 'test_frame1', if_exists='fail') + pytest.raises(ValueError, self.pandasSQL.to_sql, + self.test_frame1, 'test_frame1', if_exists='fail') self.drop_table('test_frame1') @@ -402,15 +405,12 @@ def _to_sql_replace(self): # Add to table again self.pandasSQL.to_sql( self.test_frame1, 'test_frame1', if_exists='replace') - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') + assert self.pandasSQL.has_table('test_frame1') num_entries = len(self.test_frame1) num_rows = self._count_rows('test_frame1') - self.assertEqual( - num_rows, num_entries, "not the same number of rows as entries") - + assert num_rows == num_entries self.drop_table('test_frame1') def _to_sql_append(self): @@ -423,15 +423,12 @@ def _to_sql_append(self): # Add to table again self.pandasSQL.to_sql( self.test_frame1, 'test_frame1', if_exists='append') - self.assertTrue(self.pandasSQL.has_table( - 'test_frame1'), 'Table not written to DB') + assert self.pandasSQL.has_table('test_frame1') num_entries = 2 * len(self.test_frame1) num_rows = self._count_rows('test_frame1') - self.assertEqual( - num_rows, num_entries, "not the same number of rows as entries") - + assert num_rows == num_entries self.drop_table('test_frame1') def _roundtrip(self): @@ -458,7 +455,7 @@ def _to_sql_save_index(self): columns=['A', 'B', 'C'], index=['A']) self.pandasSQL.to_sql(df, 'test_to_sql_saves_index') ix_cols = self._get_index_columns('test_to_sql_saves_index') - self.assertEqual(ix_cols, [['A', ], ]) + assert ix_cols == [['A', ], ] def _transaction_test(self): self.pandasSQL.execute("CREATE TABLE test_trans (A INT, B TEXT)") @@ -474,13 +471,13 @@ def _transaction_test(self): # ignore raised exception pass res = self.pandasSQL.read_query('SELECT * FROM test_trans') - self.assertEqual(len(res), 0) + assert len(res) == 0 # Make sure when transaction is committed, rows do get inserted with self.pandasSQL.run_transaction() as trans: trans.execute(ins_sql) res2 = self.pandasSQL.read_query('SELECT * FROM test_trans') - self.assertEqual(len(res2), 1) + assert len(res2) == 1 # ----------------------------------------------------------------------------- @@ -506,7 +503,7 @@ class _TestSQLApi(PandasSQLTest): flavor = 'sqlite' mode = None - def setUp(self): + def setup_method(self, method): self.conn = self.connect() self._load_iris_data() self._load_iris_view() @@ -527,19 +524,15 @@ def test_read_sql_view(self): def test_to_sql(self): sql.to_sql(self.test_frame1, 'test_frame1', self.conn) - self.assertTrue( - sql.has_table('test_frame1', self.conn), - 'Table not written to DB') + assert sql.has_table('test_frame1', self.conn) def test_to_sql_fail(self): sql.to_sql(self.test_frame1, 'test_frame2', self.conn, if_exists='fail') - self.assertTrue( - sql.has_table('test_frame2', self.conn), - 'Table not written to DB') + assert sql.has_table('test_frame2', self.conn) - self.assertRaises(ValueError, sql.to_sql, self.test_frame1, - 'test_frame2', self.conn, if_exists='fail') + pytest.raises(ValueError, sql.to_sql, self.test_frame1, + 'test_frame2', self.conn, if_exists='fail') def test_to_sql_replace(self): sql.to_sql(self.test_frame1, 'test_frame3', @@ -547,15 +540,12 @@ def test_to_sql_replace(self): # Add to table again sql.to_sql(self.test_frame1, 'test_frame3', self.conn, if_exists='replace') - self.assertTrue( - sql.has_table('test_frame3', self.conn), - 'Table not written to DB') + assert sql.has_table('test_frame3', self.conn) num_entries = len(self.test_frame1) num_rows = self._count_rows('test_frame3') - self.assertEqual( - num_rows, num_entries, "not the same number of rows as entries") + assert num_rows == num_entries def test_to_sql_append(self): sql.to_sql(self.test_frame1, 'test_frame4', @@ -564,15 +554,12 @@ def test_to_sql_append(self): # Add to table again sql.to_sql(self.test_frame1, 'test_frame4', self.conn, if_exists='append') - self.assertTrue( - sql.has_table('test_frame4', self.conn), - 'Table not written to DB') + assert sql.has_table('test_frame4', self.conn) num_entries = 2 * len(self.test_frame1) num_rows = self._count_rows('test_frame4') - self.assertEqual( - num_rows, num_entries, "not the same number of rows as entries") + assert num_rows == num_entries def test_to_sql_type_mapping(self): sql.to_sql(self.test_frame3, 'test_frame5', self.conn, index=False) @@ -587,8 +574,9 @@ def test_to_sql_series(self): tm.assert_frame_equal(s.to_frame(), s2) def test_to_sql_panel(self): - panel = tm.makePanel() - self.assertRaises(NotImplementedError, sql.to_sql, panel, + with catch_warnings(record=True): + panel = tm.makePanel() + pytest.raises(NotImplementedError, sql.to_sql, panel, 'test_panel', self.conn) def test_roundtrip(self): @@ -620,36 +608,50 @@ def test_execute_sql(self): tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def test_date_parsing(self): - # Test date parsing in read_sq + # Test date parsing in read_sql # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) - self.assertFalse( - issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert not issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['DateCol']) - self.assertTrue( - issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + pd.Timestamp(2000, 1, 3, 0, 0, 0), + pd.Timestamp(2000, 1, 4, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - self.assertTrue( - issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + pd.Timestamp(2000, 1, 3, 0, 0, 0), + pd.Timestamp(2000, 1, 4, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['IntDateCol']) - - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + pd.Timestamp(1986, 12, 25, 0, 0, 0), + pd.Timestamp(2013, 1, 1, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'IntDateCol': 's'}) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + pd.Timestamp(1986, 12, 25, 0, 0, 0), + pd.Timestamp(2013, 1, 1, 0, 0, 0) + ] - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, + parse_dates={'IntDateOnlyCol': '%Y%m%d'}) + assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64) + assert df.IntDateOnlyCol.tolist() == [ + pd.Timestamp('2010-10-10'), + pd.Timestamp('2010-12-12') + ] def test_date_and_index(self): # Test case where same column appears in parse_date and index_col @@ -658,11 +660,8 @@ def test_date_and_index(self): index_col='DateCol', parse_dates=['DateCol', 'IntDateCol']) - self.assertTrue(issubclass(df.index.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") - - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.index.dtype.type, np.datetime64) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) def test_timedelta(self): @@ -677,7 +676,7 @@ def test_timedelta(self): def test_complex(self): df = DataFrame({'a': [1 + 1j, 2j]}) # Complex data type should raise error - self.assertRaises(ValueError, df.to_sql, 'test_complex', self.conn) + pytest.raises(ValueError, df.to_sql, 'test_complex', self.conn) def test_to_sql_index_label(self): temp_frame = DataFrame({'col1': range(4)}) @@ -685,29 +684,39 @@ def test_to_sql_index_label(self): # no index name, defaults to 'index' sql.to_sql(temp_frame, 'test_index_label', self.conn) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'index') + assert frame.columns[0] == 'index' # specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='other_label') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'other_label', - "Specified index_label not written to database") + assert frame.columns[0] == "other_label" # using the index name temp_frame.index.name = 'index_name' sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'index_name', - "Index name not written to database") + assert frame.columns[0] == "index_name" # has index name, but specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label='other_label') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'other_label', - "Specified index_label not written to database") + assert frame.columns[0] == "other_label" + + # index name is integer + temp_frame.index.name = 0 + sql.to_sql(temp_frame, 'test_index_label', self.conn, + if_exists='replace') + frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) + assert frame.columns[0] == "0" + + temp_frame.index.name = None + sql.to_sql(temp_frame, 'test_index_label', self.conn, + if_exists='replace', index_label=0) + frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) + assert frame.columns[0] == "0" def test_to_sql_index_label_multiindex(self): temp_frame = DataFrame({'col1': range(4)}, @@ -717,35 +726,32 @@ def test_to_sql_index_label_multiindex(self): # no index name, defaults to 'level_0' and 'level_1' sql.to_sql(temp_frame, 'test_index_label', self.conn) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[0], 'level_0') - self.assertEqual(frame.columns[1], 'level_1') + assert frame.columns[0] == 'level_0' + assert frame.columns[1] == 'level_1' # specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label=['A', 'B']) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[:2].tolist(), ['A', 'B'], - "Specified index_labels not written to database") + assert frame.columns[:2].tolist() == ['A', 'B'] # using the index name temp_frame.index.names = ['A', 'B'] sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace') frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[:2].tolist(), ['A', 'B'], - "Index names not written to database") + assert frame.columns[:2].tolist() == ['A', 'B'] # has index name, but specifying index_label sql.to_sql(temp_frame, 'test_index_label', self.conn, if_exists='replace', index_label=['C', 'D']) frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) - self.assertEqual(frame.columns[:2].tolist(), ['C', 'D'], - "Specified index_labels not written to database") + assert frame.columns[:2].tolist() == ['C', 'D'] # wrong length of index_label - self.assertRaises(ValueError, sql.to_sql, temp_frame, - 'test_index_label', self.conn, if_exists='replace', - index_label='C') + pytest.raises(ValueError, sql.to_sql, temp_frame, + 'test_index_label', self.conn, if_exists='replace', + index_label='C') def test_multiindex_roundtrip(self): df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')], @@ -763,27 +769,27 @@ def test_integer_col_names(self): def test_get_schema(self): create_sql = sql.get_schema(self.test_frame1, 'test', con=self.conn) - self.assertTrue('CREATE' in create_sql) + assert 'CREATE' in create_sql def test_get_schema_dtypes(self): float_frame = DataFrame({'a': [1.1, 1.2], 'b': [2.1, 2.2]}) dtype = sqlalchemy.Integer if self.mode == 'sqlalchemy' else 'INTEGER' create_sql = sql.get_schema(float_frame, 'test', con=self.conn, dtype={'b': dtype}) - self.assertTrue('CREATE' in create_sql) - self.assertTrue('INTEGER' in create_sql) + assert 'CREATE' in create_sql + assert 'INTEGER' in create_sql def test_get_schema_keys(self): frame = DataFrame({'Col1': [1.1, 1.2], 'Col2': [2.1, 2.2]}) create_sql = sql.get_schema(frame, 'test', con=self.conn, keys='Col1') constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("Col1")' - self.assertTrue(constraint_sentence in create_sql) + assert constraint_sentence in create_sql # multiple columns as key (GH10385) create_sql = sql.get_schema(self.test_frame1, 'test', con=self.conn, keys=['A', 'B']) constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")' - self.assertTrue(constraint_sentence in create_sql) + assert constraint_sentence in create_sql def test_chunksize_read(self): df = DataFrame(np.random.randn(22, 5), columns=list('abcde')) @@ -800,7 +806,7 @@ def test_chunksize_read(self): for chunk in sql.read_sql_query("select * from test_chunksize", self.conn, chunksize=5): res2 = concat([res2, chunk], ignore_index=True) - self.assertEqual(len(chunk), sizes[i]) + assert len(chunk) == sizes[i] i += 1 tm.assert_frame_equal(res1, res2) @@ -814,7 +820,7 @@ def test_chunksize_read(self): for chunk in sql.read_sql_table("test_chunksize", self.conn, chunksize=5): res3 = concat([res3, chunk], ignore_index=True) - self.assertEqual(len(chunk), sizes[i]) + assert len(chunk) == sizes[i] i += 1 tm.assert_frame_equal(res1, res3) @@ -838,8 +844,19 @@ def test_unicode_column_name(self): df = DataFrame([[1, 2], [3, 4]], columns=[u'\xe9', u'b']) df.to_sql('test_unicode', self.conn, index=False) + def test_escaped_table_name(self): + # GH 13206 + df = DataFrame({'A': [0, 1, 2], 'B': [0.2, np.nan, 5.6]}) + df.to_sql('d1187b08-4943-4c8d-a7f6', self.conn, index=False) + + res = sql.read_sql_query('SELECT * FROM `d1187b08-4943-4c8d-a7f6`', + self.conn) -class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi, unittest.TestCase): + tm.assert_frame_equal(res, df) + + +@pytest.mark.single +class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): """ Test the public API as it would be used directly @@ -862,29 +879,24 @@ def test_read_table_columns(self): cols = ['A', 'B'] result = sql.read_sql_table('test_frame', self.conn, columns=cols) - self.assertEqual(result.columns.tolist(), cols, - "Columns not correctly selected") + assert result.columns.tolist() == cols def test_read_table_index_col(self): # test columns argument in read_table sql.to_sql(self.test_frame1, 'test_frame', self.conn) result = sql.read_sql_table('test_frame', self.conn, index_col="index") - self.assertEqual(result.index.names, ["index"], - "index_col not correctly set") + assert result.index.names == ["index"] result = sql.read_sql_table( 'test_frame', self.conn, index_col=["A", "B"]) - self.assertEqual(result.index.names, ["A", "B"], - "index_col not correctly set") + assert result.index.names == ["A", "B"] result = sql.read_sql_table('test_frame', self.conn, index_col=["A", "B"], columns=["C", "D"]) - self.assertEqual(result.index.names, ["A", "B"], - "index_col not correctly set") - self.assertEqual(result.columns.tolist(), ["C", "D"], - "columns not set correctly whith index_col") + assert result.index.names == ["A", "B"] + assert result.columns.tolist() == ["C", "D"] def test_read_sql_delegate(self): iris_frame1 = sql.read_sql_query( @@ -911,10 +923,11 @@ def test_not_reflect_all_tables(self): sql.read_sql_table('other_table', self.conn) sql.read_sql_query('SELECT * FROM other_table', self.conn) # Verify some things - self.assertEqual(len(w), 0, "Warning triggered for other table") + assert len(w) == 0 def test_warning_case_insensitive_table_name(self): - # see GH7815. + # see gh-7815 + # # We can't test that this warning is triggered, a the database # configuration would have to be altered. But here we test that # the warning is certainly NOT triggered in a normal case. @@ -924,8 +937,7 @@ def test_warning_case_insensitive_table_name(self): # This should not trigger a Warning self.test_frame1.to_sql('CaseSensitive', self.conn) # Verify some things - self.assertEqual( - len(w), 0, "Warning triggered for writing a table") + assert len(w) == 0 def _get_index_columns(self, tbl_name): from sqlalchemy.engine import reflection @@ -941,8 +953,7 @@ def test_sqlalchemy_type_mapping(self): utc=True)}) db = sql.SQLDatabase(self.conn) table = sql.SQLTable("test_type", db, frame=df) - self.assertTrue(isinstance( - table.table.c['time'].type, sqltypes.DateTime)) + assert isinstance(table.table.c['time'].type, sqltypes.DateTime) def test_database_uri_string(self): @@ -965,8 +976,15 @@ def test_database_uri_string(self): # using driver that will not be installed on Travis to trigger error # in sqlalchemy.create_engine -> test passing of this error to user + try: + # the rest of this test depends on pg8000's being absent + import pg8000 # noqa + pytest.skip("pg8000 is installed") + except ImportError: + pass + db_uri = "postgresql+pg8000://user:pass@host/dbname" - with tm.assertRaisesRegexp(ImportError, "pg8000"): + with tm.assert_raises_regex(ImportError, "pg8000"): sql.read_sql("select * from table", db_uri) def _make_iris_table_metadata(self): @@ -988,7 +1006,7 @@ def test_query_by_text_obj(self): iris_df = sql.read_sql(name_text, self.conn, params={ 'name': 'Iris-versicolor'}) all_names = set(iris_df['Name']) - self.assertEqual(all_names, set(['Iris-versicolor'])) + assert all_names == set(['Iris-versicolor']) def test_query_by_select_obj(self): # WIP : GH10846 @@ -999,7 +1017,7 @@ def test_query_by_select_obj(self): iris_df = sql.read_sql(name_select, self.conn, params={'name': 'Iris-setosa'}) all_names = set(iris_df['Name']) - self.assertEqual(all_names, set(['Iris-setosa'])) + assert all_names == set(['Iris-setosa']) class _EngineToConnMixin(object): @@ -1007,8 +1025,8 @@ class _EngineToConnMixin(object): A mixin that causes setup_connect to create a conn rather than an engine. """ - def setUp(self): - super(_EngineToConnMixin, self).setUp() + def setup_method(self, method): + super(_EngineToConnMixin, self).setup_method(method) engine = self.conn conn = engine.connect() self.__tx = conn.begin() @@ -1016,19 +1034,21 @@ def setUp(self): self.__engine = engine self.conn = conn - def tearDown(self): + def teardown_method(self, method): self.__tx.rollback() self.conn.close() self.conn = self.__engine self.pandasSQL = sql.SQLDatabase(self.__engine) - super(_EngineToConnMixin, self).tearDown() + super(_EngineToConnMixin, self).teardown_method(method) -class TestSQLApiConn(_EngineToConnMixin, TestSQLApi, unittest.TestCase): +@pytest.mark.single +class TestSQLApiConn(_EngineToConnMixin, TestSQLApi): pass -class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi, unittest.TestCase): +@pytest.mark.single +class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi): """ Test the public sqlite connection fallback API @@ -1060,8 +1080,8 @@ def test_sql_open_close(self): def test_con_string_import_error(self): if not SQLALCHEMY_INSTALLED: conn = 'mysql://root@localhost/pandas_nosetest' - self.assertRaises(ImportError, sql.read_sql, "SELECT * FROM iris", - conn) + pytest.raises(ImportError, sql.read_sql, "SELECT * FROM iris", + conn) else: pytest.skip('SQLAlchemy is installed') @@ -1070,7 +1090,7 @@ def test_read_sql_delegate(self): iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn) tm.assert_frame_equal(iris_frame1, iris_frame2) - self.assertRaises(sql.DatabaseError, sql.read_sql, 'iris', self.conn) + pytest.raises(sql.DatabaseError, sql.read_sql, 'iris', self.conn) def test_safe_names_warning(self): # GH 6798 @@ -1082,7 +1102,7 @@ def test_safe_names_warning(self): def test_get_schema2(self): # without providing a connection object (available for backwards comp) create_sql = sql.get_schema(self.test_frame1, 'test') - self.assertTrue('CREATE' in create_sql) + assert 'CREATE' in create_sql def _get_sqlite_column_type(self, schema, column): @@ -1099,8 +1119,7 @@ def test_sqlite_type_mapping(self): db = sql.SQLiteDatabase(self.conn) table = sql.SQLiteTable("test_type", db, frame=df) schema = table.sql_schema() - self.assertEqual(self._get_sqlite_column_type(schema, 'time'), - "TIMESTAMP") + assert self._get_sqlite_column_type(schema, 'time') == "TIMESTAMP" # ----------------------------------------------------------------------------- @@ -1118,7 +1137,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): flavor = None @classmethod - def setUpClass(cls): + def setup_class(cls): cls.setup_import() cls.setup_driver() @@ -1130,7 +1149,7 @@ def setUpClass(cls): msg = "{0} - can't connect to {1} server".format(cls, cls.flavor) pytest.skip(msg) - def setUp(self): + def setup_method(self, method): self.setup_connect() self._load_iris_data() @@ -1193,8 +1212,7 @@ def test_create_table(self): pandasSQL = sql.SQLDatabase(temp_conn) pandasSQL.to_sql(temp_frame, 'temp_frame') - self.assertTrue( - temp_conn.has_table('temp_frame'), 'Table not written to DB') + assert temp_conn.has_table('temp_frame') def test_drop_table(self): temp_conn = self.connect() @@ -1205,13 +1223,11 @@ def test_drop_table(self): pandasSQL = sql.SQLDatabase(temp_conn) pandasSQL.to_sql(temp_frame, 'temp_frame') - self.assertTrue( - temp_conn.has_table('temp_frame'), 'Table not written to DB') + assert temp_conn.has_table('temp_frame') pandasSQL.drop_table('temp_frame') - self.assertFalse( - temp_conn.has_table('temp_frame'), 'Table not deleted from DB') + assert not temp_conn.has_table('temp_frame') def test_roundtrip(self): self._roundtrip() @@ -1230,25 +1246,20 @@ def test_read_table_columns(self): iris_frame.columns.values, ['SepalLength', 'SepalLength']) def test_read_table_absent(self): - self.assertRaises( + pytest.raises( ValueError, sql.read_sql_table, "this_doesnt_exist", con=self.conn) def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) - self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), - "FloatCol loaded with incorrect type") - self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), - "IntCol loaded with incorrect type") - self.assertTrue(issubclass(df.BoolCol.dtype.type, np.bool_), - "BoolCol loaded with incorrect type") + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + assert issubclass(df.BoolCol.dtype.type, np.bool_) # Int column with NA values stays as float - self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), - "IntColWithNull loaded with incorrect type") + assert issubclass(df.IntColWithNull.dtype.type, np.floating) # Bool column with NA values becomes object - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.object), - "BoolColWithNull loaded with incorrect type") + assert issubclass(df.BoolColWithNull.dtype.type, np.object) def test_bigint(self): # int64 should be converted to BigInteger, GH7433 @@ -1263,8 +1274,7 @@ def test_default_date_load(self): # IMPORTANT - sqlite has no native date type, so shouldn't parse, but # MySQL SHOULD be converted. - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) def test_datetime_with_timezone(self): # edge case that converts postgresql datetime with time zone types @@ -1278,24 +1288,24 @@ def check(col): # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" - self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00')) + assert col[0] == Timestamp('2000-01-01 08:00:00') # "2000-06-01 00:00:00-07:00" should convert to # "2000-06-01 07:00:00" - self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00')) + assert col[1] == Timestamp('2000-06-01 07:00:00') elif is_datetime64tz_dtype(col.dtype): - self.assertTrue(str(col.dt.tz) == 'UTC') + assert str(col.dt.tz) == 'UTC' # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" - self.assertEqual(col[0], Timestamp( - '2000-01-01 08:00:00', tz='UTC')) - # "2000-06-01 00:00:00-07:00" should convert to # "2000-06-01 07:00:00" - self.assertEqual(col[1], Timestamp( - '2000-06-01 07:00:00', tz='UTC')) + # GH 6415 + expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'), + Timestamp('2000-06-01 07:00:00', tz='UTC')] + expected = Series(expected_data, name=col.name) + tm.assert_series_equal(col, expected) else: raise AssertionError("DateCol loaded with incorrect type " @@ -1310,30 +1320,29 @@ def check(col): # even with the same versions of psycopg2 & sqlalchemy, possibly a # Postgrsql server version difference col = df.DateColWithTz - self.assertTrue(is_object_dtype(col.dtype) or - is_datetime64_dtype(col.dtype) or - is_datetime64tz_dtype(col.dtype), - "DateCol loaded with incorrect type -> {0}" - .format(col.dtype)) + assert (is_object_dtype(col.dtype) or + is_datetime64_dtype(col.dtype) or + is_datetime64tz_dtype(col.dtype)) df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz']) if not hasattr(df, 'DateColWithTz'): pytest.skip("no column with datetime with time zone") + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == 'UTC' check(df.DateColWithTz) df = pd.concat(list(pd.read_sql_query("select * from types_test_data", self.conn, chunksize=1)), ignore_index=True) col = df.DateColWithTz - self.assertTrue(is_datetime64tz_dtype(col.dtype), - "DateCol loaded with incorrect type -> {0}" - .format(col.dtype)) - self.assertTrue(str(col.dt.tz) == 'UTC') + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == 'UTC' expected = sql.read_sql_table("types_test_data", self.conn) - tm.assert_series_equal(df.DateColWithTz, - expected.DateColWithTz - .astype('datetime64[ns, UTC]')) + col = expected.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) # xref #7139 # this might or might not be converted depending on the postgres driver @@ -1346,33 +1355,27 @@ def test_date_parsing(self): df = sql.read_sql_table("types_test_data", self.conn, parse_dates=['DateCol']) - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={ 'DateCol': {'format': '%Y-%m-%d %H:%M:%S'}}) - self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.DateCol.dtype.type, np.datetime64) df = sql.read_sql_table( "types_test_data", self.conn, parse_dates=['IntDateCol']) - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) df = sql.read_sql_table( "types_test_data", self.conn, parse_dates={'IntDateCol': 's'}) - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) df = sql.read_sql_table("types_test_data", self.conn, parse_dates={'IntDateCol': {'unit': 's'}}) - self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64), - "IntDateCol loaded with incorrect type") + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) def test_datetime(self): df = DataFrame({'A': date_range('2013-01-01 09:00:00', periods=3), @@ -1388,7 +1391,7 @@ def test_datetime(self): result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) result = result.drop('index', axis=1) if self.flavor == 'sqlite': - self.assertTrue(isinstance(result.loc[0, 'A'], string_types)) + assert isinstance(result.loc[0, 'A'], string_types) result['A'] = to_datetime(result['A']) tm.assert_frame_equal(result, df) else: @@ -1407,7 +1410,7 @@ def test_datetime_NaT(self): # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) if self.flavor == 'sqlite': - self.assertTrue(isinstance(result.loc[0, 'A'], string_types)) + assert isinstance(result.loc[0, 'A'], string_types) result['A'] = to_datetime(result['A'], errors='coerce') tm.assert_frame_equal(result, df) else: @@ -1418,8 +1421,10 @@ def test_datetime_date(self): df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) df.to_sql('test_date', self.conn, index=False) res = read_sql_table('test_date', self.conn) + result = res['a'] + expected = to_datetime(df['a']) # comes back as datetime64 - tm.assert_series_equal(res['a'], to_datetime(df['a'])) + tm.assert_series_equal(result, expected) def test_datetime_time(self): # test support for datetime.time @@ -1540,16 +1545,16 @@ def test_dtype(self): meta = sqlalchemy.schema.MetaData(bind=self.conn) meta.reflect() sqltype = meta.tables['dtype_test2'].columns['B'].type - self.assertTrue(isinstance(sqltype, sqlalchemy.TEXT)) - self.assertRaises(ValueError, df.to_sql, - 'error', self.conn, dtype={'B': str}) + assert isinstance(sqltype, sqlalchemy.TEXT) + pytest.raises(ValueError, df.to_sql, + 'error', self.conn, dtype={'B': str}) # GH9083 df.to_sql('dtype_test3', self.conn, dtype={'B': sqlalchemy.String(10)}) meta.reflect() sqltype = meta.tables['dtype_test3'].columns['B'].type - self.assertTrue(isinstance(sqltype, sqlalchemy.String)) - self.assertEqual(sqltype.length, 10) + assert isinstance(sqltype, sqlalchemy.String) + assert sqltype.length == 10 # single dtype df.to_sql('single_dtype_test', self.conn, dtype=sqlalchemy.TEXT) @@ -1557,10 +1562,10 @@ def test_dtype(self): meta.reflect() sqltypea = meta.tables['single_dtype_test'].columns['A'].type sqltypeb = meta.tables['single_dtype_test'].columns['B'].type - self.assertTrue(isinstance(sqltypea, sqlalchemy.TEXT)) - self.assertTrue(isinstance(sqltypeb, sqlalchemy.TEXT)) + assert isinstance(sqltypea, sqlalchemy.TEXT) + assert isinstance(sqltypeb, sqlalchemy.TEXT) - def test_notnull_dtype(self): + def test_notna_dtype(self): cols = {'Bool': Series([True, None]), 'Date': Series([datetime(2012, 5, 1), None]), 'Int': Series([1, None], dtype='object'), @@ -1568,7 +1573,7 @@ def test_notnull_dtype(self): } df = DataFrame(cols) - tbl = 'notnull_dtype_test' + tbl = 'notna_dtype_test' df.to_sql(tbl, self.conn) returned_df = sql.read_sql_table(tbl, self.conn) # noqa meta = sqlalchemy.schema.MetaData(bind=self.conn) @@ -1580,10 +1585,10 @@ def test_notnull_dtype(self): col_dict = meta.tables[tbl].columns - self.assertTrue(isinstance(col_dict['Bool'].type, my_type)) - self.assertTrue(isinstance(col_dict['Date'].type, sqltypes.DateTime)) - self.assertTrue(isinstance(col_dict['Int'].type, sqltypes.Integer)) - self.assertTrue(isinstance(col_dict['Float'].type, sqltypes.Float)) + assert isinstance(col_dict['Bool'].type, my_type) + assert isinstance(col_dict['Date'].type, sqltypes.DateTime) + assert isinstance(col_dict['Int'].type, sqltypes.Integer) + assert isinstance(col_dict['Float'].type, sqltypes.Float) def test_double_precision(self): V = 1.23456789101112131415 @@ -1600,19 +1605,18 @@ def test_double_precision(self): res = sql.read_sql_table('test_dtypes', self.conn) # check precision of float64 - self.assertEqual(np.round(df['f64'].iloc[0], 14), - np.round(res['f64'].iloc[0], 14)) + assert (np.round(df['f64'].iloc[0], 14) == + np.round(res['f64'].iloc[0], 14)) # check sql types meta = sqlalchemy.schema.MetaData(bind=self.conn) meta.reflect() col_dict = meta.tables['test_dtypes'].columns - self.assertEqual(str(col_dict['f32'].type), - str(col_dict['f64_as_f32'].type)) - self.assertTrue(isinstance(col_dict['f32'].type, sqltypes.Float)) - self.assertTrue(isinstance(col_dict['f64'].type, sqltypes.Float)) - self.assertTrue(isinstance(col_dict['i32'].type, sqltypes.Integer)) - self.assertTrue(isinstance(col_dict['i64'].type, sqltypes.BigInteger)) + assert str(col_dict['f32'].type) == str(col_dict['f64_as_f32'].type) + assert isinstance(col_dict['f32'].type, sqltypes.Float) + assert isinstance(col_dict['f64'].type, sqltypes.Float) + assert isinstance(col_dict['i32'].type, sqltypes.Integer) + assert isinstance(col_dict['i64'].type, sqltypes.BigInteger) def test_connectable_issue_example(self): # This tests the example raised in issue @@ -1661,6 +1665,29 @@ class Temporary(Base): tm.assert_frame_equal(df, expected) + def test_insert_multivalues(self): + # issues addressed + # https://github.com/pandas-dev/pandas/issues/14315 + # https://github.com/pandas-dev/pandas/issues/8953 + + db = sql.SQLDatabase(self.conn) + df = DataFrame({'A': [1, 0, 0], 'B': [1.1, 0.2, 4.3]}) + table = sql.SQLTable("test_table", db, frame=df) + data = [ + {'A': 1, 'B': 0.46}, + {'A': 0, 'B': -2.06} + ] + statement = table.insert_statement(data, conn=self.conn)[0] + + if self.supports_multivalues_insert: + assert statement.parameters == data, ( + 'insert statement should be multivalues' + ) + else: + assert statement.parameters is None, ( + 'insert statement should not be multivalues' + ) + class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy): @@ -1675,6 +1702,7 @@ class _TestSQLiteAlchemy(object): """ flavor = 'sqlite' + supports_multivalues_insert = True @classmethod def connect(cls): @@ -1688,27 +1716,23 @@ def setup_driver(cls): def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) - self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), - "FloatCol loaded with incorrect type") - self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), - "IntCol loaded with incorrect type") + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + # sqlite has no boolean type, so integer type is returned - self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer), - "BoolCol loaded with incorrect type") + assert issubclass(df.BoolCol.dtype.type, np.integer) # Int column with NA values stays as float - self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), - "IntColWithNull loaded with incorrect type") + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + # Non-native Bool column with NA values stays as float - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), - "BoolColWithNull loaded with incorrect type") + assert issubclass(df.BoolColWithNull.dtype.type, np.floating) def test_default_date_load(self): df = sql.read_sql_table("types_test_data", self.conn) # IMPORTANT - sqlite has no native date type, so shouldn't parse, but - self.assertFalse(issubclass(df.DateCol.dtype.type, np.datetime64), - "DateCol loaded with incorrect type") + assert not issubclass(df.DateCol.dtype.type, np.datetime64) def test_bigint_warning(self): # test no warning for BIGINT (to support int64) is raised (GH7433) @@ -1718,7 +1742,7 @@ def test_bigint_warning(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") sql.read_sql_table('test_bigintwarning', self.conn) - self.assertEqual(len(w), 0, "Warning triggered for other table") + assert len(w) == 0 class _TestMySQLAlchemy(object): @@ -1727,37 +1751,38 @@ class _TestMySQLAlchemy(object): """ flavor = 'mysql' + supports_multivalues_insert = True @classmethod def connect(cls): url = 'mysql+{driver}://root@localhost/pandas_nosetest' - return sqlalchemy.create_engine(url.format(driver=cls.driver)) + return sqlalchemy.create_engine(url.format(driver=cls.driver), + connect_args=cls.connect_args) @classmethod def setup_driver(cls): try: import pymysql # noqa cls.driver = 'pymysql' + from pymysql.constants import CLIENT + cls.connect_args = {'client_flag': CLIENT.MULTI_STATEMENTS} except ImportError: pytest.skip('pymysql not installed') def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) - self.assertTrue(issubclass(df.FloatCol.dtype.type, np.floating), - "FloatCol loaded with incorrect type") - self.assertTrue(issubclass(df.IntCol.dtype.type, np.integer), - "IntCol loaded with incorrect type") + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + # MySQL has no real BOOL type (it's an alias for TINYINT) - self.assertTrue(issubclass(df.BoolCol.dtype.type, np.integer), - "BoolCol loaded with incorrect type") + assert issubclass(df.BoolCol.dtype.type, np.integer) # Int column with NA values stays as float - self.assertTrue(issubclass(df.IntColWithNull.dtype.type, np.floating), - "IntColWithNull loaded with incorrect type") + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + # Bool column with NA = int column with NA values => becomes float - self.assertTrue(issubclass(df.BoolColWithNull.dtype.type, np.floating), - "BoolColWithNull loaded with incorrect type") + assert issubclass(df.BoolColWithNull.dtype.type, np.floating) def test_read_procedure(self): # see GH7324. Although it is more an api test, it is added to the @@ -1796,6 +1821,7 @@ class _TestPostgreSQLAlchemy(object): """ flavor = 'postgresql' + supports_multivalues_insert = True @classmethod def connect(cls): @@ -1837,8 +1863,8 @@ def test_schema_support(self): res4 = sql.read_sql_table('test_schema_other', self.conn, schema='other') tm.assert_frame_equal(df, res4) - self.assertRaises(ValueError, sql.read_sql_table, 'test_schema_other', - self.conn, schema='public') + pytest.raises(ValueError, sql.read_sql_table, 'test_schema_other', + self.conn, schema='public') # different if_exists options @@ -1875,39 +1901,41 @@ def test_schema_support(self): tm.assert_frame_equal(res1, res2) -class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy, unittest.TestCase): +@pytest.mark.single +class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): pass -class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn, - unittest.TestCase): +@pytest.mark.single +class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): pass -class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy, - unittest.TestCase): +@pytest.mark.single +class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy): pass -class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn, - unittest.TestCase): +@pytest.mark.single +class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn): pass -class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy, - unittest.TestCase): +@pytest.mark.single +class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy): pass -class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, - unittest.TestCase): +@pytest.mark.single +class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn): pass # ----------------------------------------------------------------------------- # -- Test Sqlite / MySQL fallback -class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, unittest.TestCase): +@pytest.mark.single +class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest): """ Test the fallback mode against an in-memory sqlite database. @@ -1918,7 +1946,7 @@ class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, unittest.TestCase): def connect(cls): return sqlite3.connect(':memory:') - def setUp(self): + def setup_method(self, method): self.conn = self.connect() self.pandasSQL = sql.SQLiteDatabase(self.conn) @@ -1956,13 +1984,11 @@ def test_create_and_drop_table(self): self.pandasSQL.to_sql(temp_frame, 'drop_test_frame') - self.assertTrue(self.pandasSQL.has_table('drop_test_frame'), - 'Table not written to DB') + assert self.pandasSQL.has_table('drop_test_frame') self.pandasSQL.drop_table('drop_test_frame') - self.assertFalse(self.pandasSQL.has_table('drop_test_frame'), - 'Table not deleted from DB') + assert not self.pandasSQL.has_table('drop_test_frame') def test_roundtrip(self): self._roundtrip() @@ -2028,22 +2054,22 @@ def test_dtype(self): df.to_sql('dtype_test2', self.conn, dtype={'B': 'STRING'}) # sqlite stores Boolean values as INTEGER - self.assertEqual(self._get_sqlite_column_type( - 'dtype_test', 'B'), 'INTEGER') + assert self._get_sqlite_column_type( + 'dtype_test', 'B') == 'INTEGER' - self.assertEqual(self._get_sqlite_column_type( - 'dtype_test2', 'B'), 'STRING') - self.assertRaises(ValueError, df.to_sql, - 'error', self.conn, dtype={'B': bool}) + assert self._get_sqlite_column_type( + 'dtype_test2', 'B') == 'STRING' + pytest.raises(ValueError, df.to_sql, + 'error', self.conn, dtype={'B': bool}) # single dtype df.to_sql('single_dtype_test', self.conn, dtype='STRING') - self.assertEqual( - self._get_sqlite_column_type('single_dtype_test', 'A'), 'STRING') - self.assertEqual( - self._get_sqlite_column_type('single_dtype_test', 'B'), 'STRING') + assert self._get_sqlite_column_type( + 'single_dtype_test', 'A') == 'STRING' + assert self._get_sqlite_column_type( + 'single_dtype_test', 'B') == 'STRING' - def test_notnull_dtype(self): + def test_notna_dtype(self): if self.flavor == 'mysql': pytest.skip('Not applicable to MySQL legacy') @@ -2054,21 +2080,20 @@ def test_notnull_dtype(self): } df = DataFrame(cols) - tbl = 'notnull_dtype_test' + tbl = 'notna_dtype_test' df.to_sql(tbl, self.conn) - self.assertEqual(self._get_sqlite_column_type(tbl, 'Bool'), 'INTEGER') - self.assertEqual(self._get_sqlite_column_type( - tbl, 'Date'), 'TIMESTAMP') - self.assertEqual(self._get_sqlite_column_type(tbl, 'Int'), 'INTEGER') - self.assertEqual(self._get_sqlite_column_type(tbl, 'Float'), 'REAL') + assert self._get_sqlite_column_type(tbl, 'Bool') == 'INTEGER' + assert self._get_sqlite_column_type(tbl, 'Date') == 'TIMESTAMP' + assert self._get_sqlite_column_type(tbl, 'Int') == 'INTEGER' + assert self._get_sqlite_column_type(tbl, 'Float') == 'REAL' def test_illegal_names(self): # For sqlite, these should work fine df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) # Raise error on blank - self.assertRaises(ValueError, df.to_sql, "", self.conn) + pytest.raises(ValueError, df.to_sql, "", self.conn) for ndx, weird_name in enumerate( ['test_weird_name]', 'test_weird_name[', @@ -2108,7 +2133,7 @@ def format_query(sql, *args): """ processed_args = [] for arg in args: - if isinstance(arg, float) and isnull(arg): + if isinstance(arg, float) and isna(arg): arg = None formatter = _formatters[type(arg)] @@ -2133,9 +2158,11 @@ def _skip_if_no_pymysql(): pytest.skip('pymysql not installed, skipping') -class TestXSQLite(SQLiteMixIn, tm.TestCase): +@pytest.mark.single +class TestXSQLite(SQLiteMixIn): - def setUp(self): + def setup_method(self, method): + self.method = method self.conn = sqlite3.connect(':memory:') def test_basic(self): @@ -2161,7 +2188,7 @@ def test_write_row_by_row(self): result = sql.read_sql("select * from test", con=self.conn) result.index = frame.index - tm.assert_frame_equal(result, frame) + tm.assert_frame_equal(result, frame, check_less_precise=True) def test_execute(self): frame = tm.makeTimeDataFrame() @@ -2185,15 +2212,16 @@ def test_schema(self): for l in lines: tokens = l.split(' ') if len(tokens) == 2 and tokens[0] == 'A': - self.assertTrue(tokens[1] == 'DATETIME') + assert tokens[1] == 'DATETIME' frame = tm.makeTimeDataFrame() create_sql = sql.get_schema(frame, 'test', keys=['A', 'B']) lines = create_sql.splitlines() - self.assertTrue('PRIMARY KEY ("A", "B")' in create_sql) + assert 'PRIMARY KEY ("A", "B")' in create_sql cur = self.conn.cursor() cur.execute(create_sql) + @tm.capture_stdout def test_execute_fail(self): create_sql = """ CREATE TABLE test @@ -2210,14 +2238,10 @@ def test_execute_fail(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn) - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.conn) - finally: - sys.stdout = sys.__stdout__ + with pytest.raises(Exception): + sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) + @tm.capture_stdout def test_execute_closed_connection(self): create_sql = """ CREATE TABLE test @@ -2233,15 +2257,12 @@ def test_execute_closed_connection(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) self.conn.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, tquery, "select * from test", - con=self.conn) - finally: - sys.stdout = sys.__stdout__ + + with pytest.raises(Exception): + tquery("select * from test", con=self.conn) # Initialize connection again (needed for tearDown) - self.setUp() + self.setup_method(self.method) def test_na_roundtrip(self): pass @@ -2279,10 +2300,10 @@ def test_onecolumn_of_integer(self): sql.to_sql(mono_df, con=self.conn, name='mono_df', index=False) # computing the sum via sql con_x = self.conn - the_sum = sum([my_c0[0] - for my_c0 in con_x.execute("select * from mono_df")]) + the_sum = sum(my_c0[0] + for my_c0 in con_x.execute("select * from mono_df")) # it should not fail, and gives 3 ( Issue #3628 ) - self.assertEqual(the_sum, 3) + assert the_sum == 3 result = sql.read_sql("select * from mono_df", con_x) tm.assert_frame_equal(result, mono_df) @@ -2302,77 +2323,52 @@ def clean_up(test_table_to_drop): self.drop_table(test_table_to_drop) # test if invalid value for if_exists raises appropriate error - self.assertRaises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='notvalidvalue') + pytest.raises(ValueError, + sql.to_sql, + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists='notvalidvalue') clean_up(table_name) # test if_exists='fail' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail') - self.assertRaises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='fail') + pytest.raises(ValueError, + sql.to_sql, + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists='fail') # test if_exists='replace' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='replace', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B')]) + assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')] sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name, if_exists='replace', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(3, 'C'), (4, 'D'), (5, 'E')]) + assert (tquery(sql_select, con=self.conn) == + [(3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) # test if_exists='append' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B')]) + assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')] sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name, if_exists='append', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) + assert (tquery(sql_select, con=self.conn) == + [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) -class TestSQLFlavorDeprecation(tm.TestCase): - """ - gh-13611: test that the 'flavor' parameter - is appropriately deprecated by checking the - functions that directly raise the warning - """ - - con = 1234 # don't need real connection for this - funcs = ['SQLiteDatabase', 'pandasSQL_builder'] - - def test_unsupported_flavor(self): - msg = 'is not supported' - - for func in self.funcs: - tm.assertRaisesRegexp(ValueError, msg, getattr(sql, func), - self.con, flavor='mysql') - - def test_deprecated_flavor(self): - for func in self.funcs: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - getattr(sql, func)(self.con, flavor='sqlite') - - -@unittest.skip("gh-13611: there is no support for MySQL " - "if SQLAlchemy is not installed") -class TestXMySQL(MySQLMixIn, tm.TestCase): +@pytest.mark.single +@pytest.mark.skip(reason="gh-13611: there is no support for MySQL " + "if SQLAlchemy is not installed") +class TestXMySQL(MySQLMixIn): @classmethod - def setUpClass(cls): + def setup_class(cls): _skip_if_no_pymysql() # test connection @@ -2400,7 +2396,7 @@ def setUpClass(cls): "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") - def setUp(self): + def setup_method(self, method): _skip_if_no_pymysql() import pymysql try: @@ -2426,6 +2422,8 @@ def setUp(self): "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") + self.method = method + def test_basic(self): _skip_if_no_pymysql() frame = tm.makeTimeDataFrame() @@ -2450,7 +2448,7 @@ def test_write_row_by_row(self): result = sql.read_sql("select * from test", con=self.conn) result.index = frame.index - tm.assert_frame_equal(result, frame) + tm.assert_frame_equal(result, frame, check_less_precise=True) def test_chunksize_read_type(self): _skip_if_no_pymysql() @@ -2495,17 +2493,18 @@ def test_schema(self): for l in lines: tokens = l.split(' ') if len(tokens) == 2 and tokens[0] == 'A': - self.assertTrue(tokens[1] == 'DATETIME') + assert tokens[1] == 'DATETIME' frame = tm.makeTimeDataFrame() drop_sql = "DROP TABLE IF EXISTS test" create_sql = sql.get_schema(frame, 'test', keys=['A', 'B']) lines = create_sql.splitlines() - self.assertTrue('PRIMARY KEY (`A`, `B`)' in create_sql) + assert 'PRIMARY KEY (`A`, `B`)' in create_sql cur = self.conn.cursor() cur.execute(drop_sql) cur.execute(create_sql) + @tm.capture_stdout def test_execute_fail(self): _skip_if_no_pymysql() drop_sql = "DROP TABLE IF EXISTS test" @@ -2525,14 +2524,10 @@ def test_execute_fail(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn) - try: - sys.stdout = StringIO() - self.assertRaises(Exception, sql.execute, - 'INSERT INTO test VALUES("foo", "bar", 7)', - self.conn) - finally: - sys.stdout = sys.__stdout__ + with pytest.raises(Exception): + sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) + @tm.capture_stdout def test_execute_closed_connection(self): _skip_if_no_pymysql() drop_sql = "DROP TABLE IF EXISTS test" @@ -2551,15 +2546,12 @@ def test_execute_closed_connection(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) self.conn.close() - try: - sys.stdout = StringIO() - self.assertRaises(Exception, tquery, "select * from test", - con=self.conn) - finally: - sys.stdout = sys.__stdout__ + + with pytest.raises(Exception): + tquery("select * from test", con=self.conn) # Initialize connection again (needed for tearDown) - self.setUp() + self.setup_method(self.method) def test_na_roundtrip(self): _skip_if_no_pymysql() @@ -2624,42 +2616,40 @@ def clean_up(test_table_to_drop): self.drop_table(test_table_to_drop) # test if invalid value for if_exists raises appropriate error - self.assertRaises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='notvalidvalue') + pytest.raises(ValueError, + sql.to_sql, + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists='notvalidvalue') clean_up(table_name) # test if_exists='fail' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail', index=False) - self.assertRaises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='fail') + pytest.raises(ValueError, + sql.to_sql, + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists='fail') # test if_exists='replace' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='replace', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B')]) + assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')] sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name, if_exists='replace', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(3, 'C'), (4, 'D'), (5, 'E')]) + assert (tquery(sql_select, con=self.conn) == + [(3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) # test if_exists='append' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B')]) + assert tquery(sql_select, con=self.conn) == [(1, 'A'), (2, 'B')] sql.to_sql(frame=df_if_exists_2, con=self.conn, name=table_name, if_exists='append', index=False) - self.assertEqual(tquery(sql_select, con=self.conn), - [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) + assert (tquery(sql_select, con=self.conn) == + [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) diff --git a/pandas/io/tests/test_stata.py b/pandas/tests/io/test_stata.py similarity index 79% rename from pandas/io/tests/test_stata.py rename to pandas/tests/io/test_stata.py index ae09e671dbca3..49ad07b79d111 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -8,24 +8,38 @@ import warnings from datetime import datetime from distutils.version import LooseVersion +from collections import OrderedDict -import pytest import numpy as np import pandas as pd import pandas.util.testing as tm +import pytest from pandas import compat +from pandas._libs.tslib import NaT from pandas.compat import iterkeys +from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.frame import DataFrame, Series from pandas.io.parsers import read_csv from pandas.io.stata import (read_stata, StataReader, InvalidColumnName, PossiblePrecisionLoss, StataMissingValue) -from pandas.tslib import NaT -from pandas.types.common import is_categorical_dtype -class TestStata(tm.TestCase): +@pytest.fixture +def dirpath(): + return tm.get_data_path() + + +@pytest.fixture +def parsed_114(dirpath): + dta14_114 = os.path.join(dirpath, 'stata5_114.dta') + parsed_114 = read_stata(dta14_114, convert_dates=True) + parsed_114.index.name = 'index' + return parsed_114 + - def setUp(self): +class TestStata(object): + + def setup_method(self, method): self.dirpath = tm.get_data_path() self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta') self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta') @@ -83,6 +97,8 @@ def setUp(self): self.dta24_111 = os.path.join(self.dirpath, 'stata7_111.dta') + self.stata_dates = os.path.join(self.dirpath, 'stata13_dates.dta') + def read_dta(self, file): # Legacy default reader configuration return read_stata(file, convert_dates=True) @@ -108,10 +124,12 @@ def test_data_method(self): parsed_114_read = rdr.read() tm.assert_frame_equal(parsed_114_data, parsed_114_read) - def test_read_dta1(self): + @pytest.mark.parametrize( + 'file', ['dta1_114', 'dta1_117']) + def test_read_dta1(self, file): - parsed_114 = self.read_dta(self.dta1_114) - parsed_117 = self.read_dta(self.dta1_117) + file = getattr(self, file) + parsed = self.read_dta(file) # Pandas uses np.nan as missing value. # Thus, all columns will be of type float, regardless of their name. @@ -123,11 +141,10 @@ def test_read_dta1(self): # the casting doesn't fail so need to match stata here expected['float_miss'] = expected['float_miss'].astype(np.float32) - tm.assert_frame_equal(parsed_114, expected) - tm.assert_frame_equal(parsed_117, expected) + tm.assert_frame_equal(parsed, expected) def test_read_dta2(self): - if LooseVersion(sys.version) < '2.7': + if LooseVersion(sys.version) < LooseVersion('2.7'): pytest.skip('datetime interp under 2.6 is faulty') expected = DataFrame.from_records( @@ -181,7 +198,7 @@ def test_read_dta2(self): w = [x for x in w if x.category is UserWarning] # should get warning for each call to read_dta - self.assertEqual(len(w), 3) + assert len(w) == 3 # buggy test because of the NaT comparison on certain platforms # Format 113 test fails since it does not support tc and tC formats @@ -193,11 +210,12 @@ def test_read_dta2(self): tm.assert_frame_equal(parsed_117, expected, check_datetimelike_compat=True) - def test_read_dta3(self): - parsed_113 = self.read_dta(self.dta3_113) - parsed_114 = self.read_dta(self.dta3_114) - parsed_115 = self.read_dta(self.dta3_115) - parsed_117 = self.read_dta(self.dta3_117) + @pytest.mark.parametrize( + 'file', ['dta3_113', 'dta3_114', 'dta3_115', 'dta3_117']) + def test_read_dta3(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) # match stata here expected = self.read_csv(self.csv3) @@ -205,16 +223,14 @@ def test_read_dta3(self): expected['year'] = expected['year'].astype(np.int16) expected['quarter'] = expected['quarter'].astype(np.int8) - tm.assert_frame_equal(parsed_113, expected) - tm.assert_frame_equal(parsed_114, expected) - tm.assert_frame_equal(parsed_115, expected) - tm.assert_frame_equal(parsed_117, expected) + tm.assert_frame_equal(parsed, expected) - def test_read_dta4(self): - parsed_113 = self.read_dta(self.dta4_113) - parsed_114 = self.read_dta(self.dta4_114) - parsed_115 = self.read_dta(self.dta4_115) - parsed_117 = self.read_dta(self.dta4_117) + @pytest.mark.parametrize( + 'file', ['dta4_113', 'dta4_114', 'dta4_115', 'dta4_117']) + def test_read_dta4(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) expected = DataFrame.from_records( [ @@ -237,10 +253,7 @@ def test_read_dta4(self): for col in expected], axis=1) # stata doesn't save .category metadata - tm.assert_frame_equal(parsed_113, expected, check_categorical=False) - tm.assert_frame_equal(parsed_114, expected, check_categorical=False) - tm.assert_frame_equal(parsed_115, expected, check_categorical=False) - tm.assert_frame_equal(parsed_117, expected, check_categorical=False) + tm.assert_frame_equal(parsed, expected, check_categorical=False) # File containing strls def test_read_dta12(self): @@ -283,7 +296,7 @@ def test_read_dta18(self): u'Floats': u'float data'} tm.assert_dict_equal(vl, vl_expected) - self.assertEqual(rdr.data_label, u'This is a Ünicode data label') + assert rdr.data_label == u'This is a Ünicode data label' def test_read_write_dta5(self): original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)], @@ -323,7 +336,7 @@ def test_read_write_dta10(self): with tm.ensure_clean() as path: original.to_stata(path, {'datetime': 'tc'}) written_and_read_again = self.read_dta(path) - # original.index is np.int32, readed index is np.int64 + # original.index is np.int32, read index is np.int64 tm.assert_frame_equal(written_and_read_again.set_index('index'), original, check_index_type=False) @@ -351,12 +364,12 @@ def test_encoding(self): if compat.PY3: expected = raw.kreis1849[0] - self.assertEqual(result, expected) - self.assertIsInstance(result, compat.string_types) + assert result == expected + assert isinstance(result, compat.string_types) else: expected = raw.kreis1849.str.decode("latin-1")[0] - self.assertEqual(result, expected) - self.assertIsInstance(result, unicode) # noqa + assert result == expected + assert isinstance(result, unicode) # noqa with tm.ensure_clean() as path: encoded.to_stata(path, encoding='latin-1', write_index=False) @@ -377,7 +390,7 @@ def test_read_write_dta11(self): with warnings.catch_warnings(record=True) as w: original.to_stata(path, None) # should get a warning for that format. - self.assertEqual(len(w), 1) + assert len(w) == 1 written_and_read_again = self.read_dta(path) tm.assert_frame_equal( @@ -405,7 +418,7 @@ def test_read_write_dta12(self): with warnings.catch_warnings(record=True) as w: original.to_stata(path, None) # should get a warning for that format. - self.assertEqual(len(w), 1) + assert len(w) == 1 written_and_read_again = self.read_dta(path) tm.assert_frame_equal( @@ -427,7 +440,13 @@ def test_read_write_dta13(self): tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted) - def test_read_write_reread_dta14(self): + @pytest.mark.parametrize( + 'file', ['dta14_113', 'dta14_114', 'dta14_115', 'dta14_117']) + def test_read_write_reread_dta14(self, file, parsed_114): + file = getattr(self, file) + parsed = self.read_dta(file) + parsed.index.name = 'index' + expected = self.read_csv(self.csv14) cols = ['byte_', 'int_', 'long_', 'float_', 'double_'] for col in cols: @@ -436,18 +455,7 @@ def test_read_write_reread_dta14(self): expected['date_td'] = pd.to_datetime( expected['date_td'], errors='coerce') - parsed_113 = self.read_dta(self.dta14_113) - parsed_113.index.name = 'index' - parsed_114 = self.read_dta(self.dta14_114) - parsed_114.index.name = 'index' - parsed_115 = self.read_dta(self.dta14_115) - parsed_115.index.name = 'index' - parsed_117 = self.read_dta(self.dta14_117) - parsed_117.index.name = 'index' - - tm.assert_frame_equal(parsed_114, parsed_113) - tm.assert_frame_equal(parsed_114, parsed_115) - tm.assert_frame_equal(parsed_114, parsed_117) + tm.assert_frame_equal(parsed_114, parsed) with tm.ensure_clean() as path: parsed_114.to_stata(path, {'date_td': 'td'}) @@ -455,7 +463,10 @@ def test_read_write_reread_dta14(self): tm.assert_frame_equal( written_and_read_again.set_index('index'), parsed_114) - def test_read_write_reread_dta15(self): + @pytest.mark.parametrize( + 'file', ['dta15_113', 'dta15_114', 'dta15_115', 'dta15_117']) + def test_read_write_reread_dta15(self, file): + expected = self.read_csv(self.csv15) expected['byte_'] = expected['byte_'].astype(np.int8) expected['int_'] = expected['int_'].astype(np.int16) @@ -465,18 +476,13 @@ def test_read_write_reread_dta15(self): expected['date_td'] = expected['date_td'].apply( datetime.strptime, args=('%Y-%m-%d',)) - parsed_113 = self.read_dta(self.dta15_113) - parsed_114 = self.read_dta(self.dta15_114) - parsed_115 = self.read_dta(self.dta15_115) - parsed_117 = self.read_dta(self.dta15_117) + file = getattr(self, file) + parsed = self.read_dta(file) - tm.assert_frame_equal(expected, parsed_114) - tm.assert_frame_equal(parsed_113, parsed_114) - tm.assert_frame_equal(parsed_114, parsed_115) - tm.assert_frame_equal(parsed_114, parsed_117) + tm.assert_frame_equal(expected, parsed) def test_timestamp_and_label(self): - original = DataFrame([(1,)], columns=['var']) + original = DataFrame([(1,)], columns=['variable']) time_stamp = datetime(2000, 2, 29, 14, 21) data_label = 'This is a data file.' with tm.ensure_clean() as path: @@ -523,7 +529,7 @@ def test_no_index(self): with tm.ensure_clean() as path: original.to_stata(path, write_index=False) written_and_read_again = self.read_dta(path) - tm.assertRaises( + pytest.raises( KeyError, lambda: written_and_read_again['index_not_written']) def test_string_no_dates(self): @@ -583,9 +589,19 @@ def test_105(self): df0['psch_dis'] = df0["psch_dis"].astype(np.float32) tm.assert_frame_equal(df.head(3), df0) + def test_value_labels_old_format(self): + # GH 19417 + # + # Test that value_labels() returns an empty dict if the file format + # predates supporting value labels. + dpath = os.path.join(self.dirpath, 'S4_EDUC1.dta') + reader = StataReader(dpath) + assert reader.value_labels() == {} + reader.close() + def test_date_export_formats(self): columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty'] - conversions = dict(((c, c) for c in columns)) + conversions = {c: c for c in columns} data = [datetime(2006, 11, 20, 23, 13, 20)] * len(columns) original = DataFrame([data], columns=columns) original.index.name = 'index' @@ -647,10 +663,10 @@ def test_variable_labels(self): keys = ('var1', 'var2', 'var3') labels = ('label1', 'label2', 'label3') for k, v in compat.iteritems(sr_115): - self.assertTrue(k in sr_117) - self.assertTrue(v == sr_117[k]) - self.assertTrue(k in keys) - self.assertTrue(v in labels) + assert k in sr_117 + assert v == sr_117[k] + assert k in keys + assert v in labels def test_minimal_size_col(self): str_lens = (1, 100, 244) @@ -667,8 +683,8 @@ def test_minimal_size_col(self): variables = sr.varlist formats = sr.fmtlist for variable, fmt, typ in zip(variables, formats, typlist): - self.assertTrue(int(variable[1:]) == int(fmt[1:-1])) - self.assertTrue(int(variable[1:]) == typ) + assert int(variable[1:]) == int(fmt[1:-1]) + assert int(variable[1:]) == typ def test_excessively_long_string(self): str_lens = (1, 244, 500) @@ -677,7 +693,7 @@ def test_excessively_long_string(self): s['s' + str(str_len)] = Series(['a' * str_len, 'b' * str_len, 'c' * str_len]) original = DataFrame(s) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): with tm.ensure_clean() as path: original.to_stata(path) @@ -694,23 +710,25 @@ def test_missing_value_generator(self): offset = valid_range[t][1] for i in range(0, 27): val = StataMissingValue(offset + 1 + i) - self.assertTrue(val.string == expected_values[i]) + assert val.string == expected_values[i] # Test extremes for floats val = StataMissingValue(struct.unpack(' 0) + assert len(ax.get_children()) > 0 if layout is not None: - result = self._get_axes_layout(plotting._flatten(axes)) - self.assertEqual(result, layout) + result = self._get_axes_layout(_flatten(axes)) + assert result == layout - self.assert_numpy_array_equal( + tm.assert_numpy_array_equal( visible_axes[0].figure.get_size_inches(), np.array(figsize, dtype=np.float64)) @@ -378,7 +382,7 @@ def _flatten_visible(self, axes): axes : matplotlib Axes object, or its list-like """ - axes = plotting._flatten(axes) + axes = _flatten(axes) axes = [ax for ax in axes if ax.get_visible()] return axes @@ -406,8 +410,8 @@ def _check_has_errorbars(self, axes, xerr=0, yerr=0): xerr_count += 1 if has_yerr: yerr_count += 1 - self.assertEqual(xerr, xerr_count) - self.assertEqual(yerr, yerr_count) + assert xerr == xerr_count + assert yerr == yerr_count def _check_box_return_type(self, returned, return_type, expected_keys=None, check_ax_title=True): @@ -434,36 +438,36 @@ def _check_box_return_type(self, returned, return_type, expected_keys=None, if return_type is None: return_type = 'dict' - self.assertTrue(isinstance(returned, types[return_type])) + assert isinstance(returned, types[return_type]) if return_type == 'both': - self.assertIsInstance(returned.ax, Axes) - self.assertIsInstance(returned.lines, dict) + assert isinstance(returned.ax, Axes) + assert isinstance(returned.lines, dict) else: # should be fixed when the returning default is changed if return_type is None: for r in self._flatten_visible(returned): - self.assertIsInstance(r, Axes) + assert isinstance(r, Axes) return - self.assertTrue(isinstance(returned, Series)) + assert isinstance(returned, Series) - self.assertEqual(sorted(returned.keys()), sorted(expected_keys)) + assert sorted(returned.keys()) == sorted(expected_keys) for key, value in iteritems(returned): - self.assertTrue(isinstance(value, types[return_type])) + assert isinstance(value, types[return_type]) # check returned dict has correct mapping if return_type == 'axes': if check_ax_title: - self.assertEqual(value.get_title(), key) + assert value.get_title() == key elif return_type == 'both': if check_ax_title: - self.assertEqual(value.ax.get_title(), key) - self.assertIsInstance(value.ax, Axes) - self.assertIsInstance(value.lines, dict) + assert value.ax.get_title() == key + assert isinstance(value.ax, Axes) + assert isinstance(value.lines, dict) elif return_type == 'dict': line = value['medians'][0] axes = line.axes if self.mpl_ge_1_5_0 else line.get_axes() if check_ax_title: - self.assertEqual(axes.get_title(), key) + assert axes.get_title() == key else: raise AssertionError @@ -488,26 +492,26 @@ def is_grid_on(): spndx += 1 mpl.rc('axes', grid=False) obj.plot(kind=kind, **kws) - self.assertFalse(is_grid_on()) + assert not is_grid_on() self.plt.subplot(1, 4 * len(kinds), spndx) spndx += 1 mpl.rc('axes', grid=True) obj.plot(kind=kind, grid=False, **kws) - self.assertFalse(is_grid_on()) + assert not is_grid_on() if kind != 'pie': self.plt.subplot(1, 4 * len(kinds), spndx) spndx += 1 mpl.rc('axes', grid=True) obj.plot(kind=kind, **kws) - self.assertTrue(is_grid_on()) + assert is_grid_on() self.plt.subplot(1, 4 * len(kinds), spndx) spndx += 1 mpl.rc('axes', grid=False) obj.plot(kind=kind, grid=True, **kws) - self.assertTrue(is_grid_on()) + assert is_grid_on() def _maybe_unpack_cycler(self, rcParams, field='color'): """ diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 31c150bc1e64f..7661b46a79061 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -8,13 +8,12 @@ from pandas import Series, DataFrame, MultiIndex from pandas.compat import range, lzip import pandas.util.testing as tm -from pandas.util.testing import slow +import pandas.util._test_decorators as td import numpy as np from numpy import random -from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) @@ -27,16 +26,16 @@ def _skip_if_mpl_14_or_dev_boxplot(): # Boxplot failures on 1.4 and 1.4.1 # Don't need try / except since that's done at class level import matplotlib - if str(matplotlib.__version__) >= LooseVersion('1.4'): + if LooseVersion(matplotlib.__version__) >= LooseVersion('1.4'): pytest.skip("Matplotlib Regression in 1.4 and current dev.") -@tm.mplskip +@td.skip_if_no_mpl class TestDataFramePlots(TestPlotBase): - @slow - def test_boxplot_legacy(self): - df = DataFrame(randn(6, 4), + @pytest.mark.slow + def test_boxplot_legacy1(self): + df = DataFrame(np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) df['indic'] = ['foo', 'bar'] * 3 @@ -54,11 +53,14 @@ def test_boxplot_legacy(self): _check_plot_works(df.boxplot, by='indic') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by=['indic', 'indic2']) - _check_plot_works(plotting.boxplot, data=df['one'], return_type='dict') + _check_plot_works(plotting._core.boxplot, data=df['one'], + return_type='dict') _check_plot_works(df.boxplot, notch=1, return_type='dict') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='indic', notch=1) + @pytest.mark.slow + def test_boxplot_legacy2(self): df = DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2']) df['X'] = Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) df['Y'] = Series(['A'] * 10) @@ -70,42 +72,42 @@ def test_boxplot_legacy(self): fig, ax = self.plt.subplots() axes = df.boxplot('Col1', by='X', ax=ax) ax_axes = ax.axes if self.mpl_ge_1_5_0 else ax.get_axes() - self.assertIs(ax_axes, axes) + assert ax_axes is axes fig, ax = self.plt.subplots() axes = df.groupby('Y').boxplot(ax=ax, return_type='axes') ax_axes = ax.axes if self.mpl_ge_1_5_0 else ax.get_axes() - self.assertIs(ax_axes, axes['A']) + assert ax_axes is axes['A'] # Multiple columns with an ax argument should use same figure fig, ax = self.plt.subplots() with tm.assert_produces_warning(UserWarning): axes = df.boxplot(column=['Col1', 'Col2'], by='X', ax=ax, return_type='axes') - self.assertIs(axes['Col1'].get_figure(), fig) + assert axes['Col1'].get_figure() is fig # When by is None, check that all relevant lines are present in the # dict fig, ax = self.plt.subplots() d = df.boxplot(ax=ax, return_type='dict') lines = list(itertools.chain.from_iterable(d.values())) - self.assertEqual(len(ax.get_lines()), len(lines)) + assert len(ax.get_lines()) == len(lines) - @slow + @pytest.mark.slow def test_boxplot_return_type_none(self): # GH 12216; return_type=None & by=None -> axes result = self.hist_df.boxplot() - self.assertTrue(isinstance(result, self.plt.Axes)) + assert isinstance(result, self.plt.Axes) - @slow + @pytest.mark.slow def test_boxplot_return_type_legacy(self): # API change in https://github.com/pandas-dev/pandas/pull/7096 import matplotlib as mpl # noqa - df = DataFrame(randn(6, 4), + df = DataFrame(np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.boxplot(return_type='NOTATYPE') result = df.boxplot() @@ -123,13 +125,13 @@ def test_boxplot_return_type_legacy(self): result = df.boxplot(return_type='both') self._check_box_return_type(result, 'both') - @slow + @pytest.mark.slow def test_boxplot_axis_limits(self): def _check_ax_limits(col, ax): y_min, y_max = ax.get_ylim() - self.assertTrue(y_min <= col.min()) - self.assertTrue(y_max >= col.max()) + assert y_min <= col.min() + assert y_max >= col.max() df = self.hist_df.copy() df['age'] = np.random.randint(1, 20, df.shape[0]) @@ -137,7 +139,7 @@ def _check_ax_limits(col, ax): height_ax, weight_ax = df.boxplot(['height', 'weight'], by='category') _check_ax_limits(df['height'], height_ax) _check_ax_limits(df['weight'], weight_ax) - self.assertEqual(weight_ax._sharey, height_ax) + assert weight_ax._sharey == height_ax # Two rows, one partial p = df.boxplot(['height', 'weight', 'age'], by='category') @@ -147,28 +149,36 @@ def _check_ax_limits(col, ax): _check_ax_limits(df['height'], height_ax) _check_ax_limits(df['weight'], weight_ax) _check_ax_limits(df['age'], age_ax) - self.assertEqual(weight_ax._sharey, height_ax) - self.assertEqual(age_ax._sharey, height_ax) - self.assertIsNone(dummy_ax._sharey) + assert weight_ax._sharey == height_ax + assert age_ax._sharey == height_ax + assert dummy_ax._sharey is None - @slow + @pytest.mark.slow def test_boxplot_empty_column(self): _skip_if_mpl_14_or_dev_boxplot() df = DataFrame(np.random.randn(20, 4)) df.loc[:, 0] = np.nan _check_plot_works(df.boxplot, return_type='axes') + @pytest.mark.slow + def test_figsize(self): + df = DataFrame(np.random.rand(10, 5), + columns=['A', 'B', 'C', 'D', 'E']) + result = df.boxplot(return_type='axes', figsize=(12, 8)) + assert result.figure.bbox_inches.width == 12 + assert result.figure.bbox_inches.height == 8 + def test_fontsize(self): df = DataFrame({"a": [1, 2, 3, 4, 5, 6]}) self._check_ticks_props(df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16) -@tm.mplskip +@td.skip_if_no_mpl class TestDataFrameGroupByPlots(TestPlotBase): - @slow - def test_boxplot_legacy(self): + @pytest.mark.slow + def test_boxplot_legacy1(self): grouped = self.hist_df.groupby(by='gender') with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') @@ -176,10 +186,12 @@ def test_boxplot_legacy(self): axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + @pytest.mark.slow + def test_boxplot_legacy2(self): tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) - grouped = df.groupby(level=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') @@ -189,6 +201,11 @@ def test_boxplot_legacy(self): return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + @pytest.mark.slow + def test_boxplot_legacy3(self): + tuples = lzip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), + index=MultiIndex.from_tuples(tuples)) grouped = df.unstack(level=1).groupby(level=0, axis=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') @@ -197,7 +214,7 @@ def test_boxplot_legacy(self): return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - @slow + @pytest.mark.slow def test_grouped_plot_fignums(self): n = 10 weight = Series(np.random.normal(166, 20, size=n)) @@ -208,26 +225,26 @@ def test_grouped_plot_fignums(self): gb = df.groupby('gender') res = gb.plot() - self.assertEqual(len(self.plt.get_fignums()), 2) - self.assertEqual(len(res), 2) + assert len(self.plt.get_fignums()) == 2 + assert len(res) == 2 tm.close() res = gb.boxplot(return_type='axes') - self.assertEqual(len(self.plt.get_fignums()), 1) - self.assertEqual(len(res), 2) + assert len(self.plt.get_fignums()) == 1 + assert len(res) == 2 tm.close() # now works with GH 5610 as gender is excluded res = df.groupby('gender').hist() tm.close() - @slow + @pytest.mark.slow def test_grouped_box_return_type(self): df = self.hist_df # old style: return_type=None result = df.boxplot(by='gender') - self.assertIsInstance(result, np.ndarray) + assert isinstance(result, np.ndarray) self._check_box_return_type( result, None, expected_keys=['height', 'weight', 'category']) @@ -258,17 +275,17 @@ def test_grouped_box_return_type(self): returned = df2.boxplot(by='category', return_type=t) self._check_box_return_type(returned, t, expected_keys=columns2) - @slow + @pytest.mark.slow def test_grouped_box_layout(self): df = self.hist_df - self.assertRaises(ValueError, df.boxplot, column=['weight', 'height'], - by=df.gender, layout=(1, 1)) - self.assertRaises(ValueError, df.boxplot, - column=['height', 'weight', 'category'], - layout=(2, 1), return_type='dict') - self.assertRaises(ValueError, df.boxplot, column=['weight', 'height'], - by=df.gender, layout=(-1, -1)) + pytest.raises(ValueError, df.boxplot, column=['weight', 'height'], + by=df.gender, layout=(1, 1)) + pytest.raises(ValueError, df.boxplot, + column=['height', 'weight', 'category'], + layout=(2, 1), return_type='dict') + pytest.raises(ValueError, df.boxplot, column=['weight', 'height'], + by=df.gender, layout=(-1, -1)) # _check_plot_works adds an ax so catch warning. see GH #13188 with tm.assert_produces_warning(UserWarning): @@ -332,7 +349,7 @@ def test_grouped_box_layout(self): return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3)) - @slow + @pytest.mark.slow def test_grouped_box_multiple_axes(self): # GH 6970, GH 7069 df = self.hist_df @@ -355,8 +372,8 @@ def test_grouped_box_multiple_axes(self): by='gender', return_type='axes', ax=axes[0]) returned = np.array(list(returned.values)) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - self.assert_numpy_array_equal(returned, axes[0]) - self.assertIs(returned[0].figure, fig) + tm.assert_numpy_array_equal(returned, axes[0]) + assert returned[0].figure is fig # draw on second row with tm.assert_produces_warning(UserWarning): @@ -365,10 +382,10 @@ def test_grouped_box_multiple_axes(self): return_type='axes', ax=axes[1]) returned = np.array(list(returned.values)) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - self.assert_numpy_array_equal(returned, axes[1]) - self.assertIs(returned[0].figure, fig) + tm.assert_numpy_array_equal(returned, axes[1]) + assert returned[0].figure is fig - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) # pass different number of axes from required with tm.assert_produces_warning(UserWarning): diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py new file mode 100644 index 0000000000000..47cded19f5300 --- /dev/null +++ b/pandas/tests/plotting/test_converter.py @@ -0,0 +1,354 @@ +import subprocess +import pytest +from datetime import datetime, date + +import numpy as np +from pandas import Timestamp, Period, Index, date_range, Series +from pandas.compat import u +import pandas.core.config as cf +import pandas.util.testing as tm +from pandas.tseries.offsets import Second, Milli, Micro, Day +from pandas.compat.numpy import np_datetime64_compat + +converter = pytest.importorskip('pandas.plotting._converter') +from pandas.plotting import (register_matplotlib_converters, + deregister_matplotlib_converters) + + +def test_timtetonum_accepts_unicode(): + assert (converter.time2num("00:01") == converter.time2num(u("00:01"))) + + +class TestRegistration(object): + + def test_register_by_default(self): + # Run in subprocess to ensure a clean state + code = ("'import matplotlib.units; " + "import pandas as pd; " + "units = dict(matplotlib.units.registry); " + "assert pd.Timestamp in units)'") + call = ['python', '-c', code] + assert subprocess.check_call(call) == 0 + + def test_warns(self): + plt = pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range('2017', periods=12)) + _, ax = plt.subplots() + + # Set to the "warning" state, in case this isn't the first test run + converter._WARN = True + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False) as w: + ax.plot(s.index, s.values) + plt.close() + + assert len(w) == 1 + assert "Using an implicitly registered datetime converter" in str(w[0]) + + def test_registering_no_warning(self): + plt = pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range('2017', periods=12)) + _, ax = plt.subplots() + + # Set to the "warn" state, in case this isn't the first test run + converter._WARN = True + register_matplotlib_converters() + with tm.assert_produces_warning(None) as w: + ax.plot(s.index, s.values) + + assert len(w) == 0 + + def test_pandas_plots_register(self): + pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range('2017', periods=12)) + # Set to the "warn" state, in case this isn't the first test run + converter._WARN = True + with tm.assert_produces_warning(None) as w: + s.plot() + + assert len(w) == 0 + + def test_matplotlib_formatters(self): + units = pytest.importorskip("matplotlib.units") + assert Timestamp in units.registry + + ctx = cf.option_context("plotting.matplotlib.register_converters", + False) + with ctx: + assert Timestamp not in units.registry + + assert Timestamp in units.registry + + def test_option_no_warning(self): + pytest.importorskip("matplotlib.pyplot") + ctx = cf.option_context("plotting.matplotlib.register_converters", + False) + plt = pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range('2017', periods=12)) + _, ax = plt.subplots() + + converter._WARN = True + # Test without registering first, no warning + with ctx: + with tm.assert_produces_warning(None) as w: + ax.plot(s.index, s.values) + + assert len(w) == 0 + + # Now test with registering + converter._WARN = True + register_matplotlib_converters() + with ctx: + with tm.assert_produces_warning(None) as w: + ax.plot(s.index, s.values) + + assert len(w) == 0 + + def test_registry_resets(self): + units = pytest.importorskip("matplotlib.units") + dates = pytest.importorskip("matplotlib.dates") + + # make a copy, to reset to + original = dict(units.registry) + + try: + # get to a known state + units.registry.clear() + date_converter = dates.DateConverter() + units.registry[datetime] = date_converter + units.registry[date] = date_converter + + register_matplotlib_converters() + assert units.registry[date] is not date_converter + deregister_matplotlib_converters() + assert units.registry[date] is date_converter + + finally: + # restore original stater + units.registry.clear() + for k, v in original.items(): + units.registry[k] = v + + def test_old_import_warns(self): + with tm.assert_produces_warning(FutureWarning) as w: + from pandas.tseries import converter + converter.register() + + assert len(w) + assert ('pandas.plotting.register_matplotlib_converters' in + str(w[0].message)) + + +class TestDateTimeConverter(object): + + def setup_method(self, method): + self.dtc = converter.DatetimeConverter() + self.tc = converter.TimeFormatter(None) + + def test_convert_accepts_unicode(self): + r1 = self.dtc.convert("12:22", None, None) + r2 = self.dtc.convert(u("12:22"), None, None) + assert (r1 == r2), "DatetimeConverter.convert should accept unicode" + + def test_conversion(self): + rs = self.dtc.convert(['2012-1-1'], None, None)[0] + xp = datetime(2012, 1, 1).toordinal() + assert rs == xp + + rs = self.dtc.convert('2012-1-1', None, None) + assert rs == xp + + rs = self.dtc.convert(date(2012, 1, 1), None, None) + assert rs == xp + + rs = self.dtc.convert(datetime(2012, 1, 1).toordinal(), None, None) + assert rs == xp + + rs = self.dtc.convert('2012-1-1', None, None) + assert rs == xp + + rs = self.dtc.convert(Timestamp('2012-1-1'), None, None) + assert rs == xp + + # also testing datetime64 dtype (GH8614) + rs = self.dtc.convert(np_datetime64_compat('2012-01-01'), None, None) + assert rs == xp + + rs = self.dtc.convert(np_datetime64_compat( + '2012-01-01 00:00:00+0000'), None, None) + assert rs == xp + + rs = self.dtc.convert(np.array([ + np_datetime64_compat('2012-01-01 00:00:00+0000'), + np_datetime64_compat('2012-01-02 00:00:00+0000')]), None, None) + assert rs[0] == xp + + # we have a tz-aware date (constructed to that when we turn to utc it + # is the same as our sample) + ts = (Timestamp('2012-01-01') + .tz_localize('UTC') + .tz_convert('US/Eastern') + ) + rs = self.dtc.convert(ts, None, None) + assert rs == xp + + rs = self.dtc.convert(ts.to_pydatetime(), None, None) + assert rs == xp + + rs = self.dtc.convert(Index([ts - Day(1), ts]), None, None) + assert rs[1] == xp + + rs = self.dtc.convert(Index([ts - Day(1), ts]).to_pydatetime(), + None, None) + assert rs[1] == xp + + def test_conversion_float(self): + decimals = 9 + + rs = self.dtc.convert( + Timestamp('2012-1-1 01:02:03', tz='UTC'), None, None) + xp = converter.dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC')) + tm.assert_almost_equal(rs, xp, decimals) + + rs = self.dtc.convert( + Timestamp('2012-1-1 09:02:03', tz='Asia/Hong_Kong'), None, None) + tm.assert_almost_equal(rs, xp, decimals) + + rs = self.dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None) + tm.assert_almost_equal(rs, xp, decimals) + + def test_conversion_outofbounds_datetime(self): + # 2579 + values = [date(1677, 1, 1), date(1677, 1, 2)] + rs = self.dtc.convert(values, None, None) + xp = converter.dates.date2num(values) + tm.assert_numpy_array_equal(rs, xp) + rs = self.dtc.convert(values[0], None, None) + xp = converter.dates.date2num(values[0]) + assert rs == xp + + values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] + rs = self.dtc.convert(values, None, None) + xp = converter.dates.date2num(values) + tm.assert_numpy_array_equal(rs, xp) + rs = self.dtc.convert(values[0], None, None) + xp = converter.dates.date2num(values[0]) + assert rs == xp + + def test_time_formatter(self): + # issue 18478 + + # time2num(datetime.time.min) + rs = self.tc(0) + xp = '00:00' + assert rs == xp + + # time2num(datetime.time.max) + rs = self.tc(86399.999999) + xp = '23:59:59.999999' + assert rs == xp + + # some other times + rs = self.tc(90000) + xp = '01:00' + assert rs == xp + rs = self.tc(3723) + xp = '01:02:03' + assert rs == xp + rs = self.tc(39723.2) + xp = '11:02:03.200' + assert rs == xp + + def test_dateindex_conversion(self): + decimals = 9 + + for freq in ('B', 'L', 'S'): + dateindex = tm.makeDateIndex(k=10, freq=freq) + rs = self.dtc.convert(dateindex, None, None) + xp = converter.dates.date2num(dateindex._mpl_repr()) + tm.assert_almost_equal(rs, xp, decimals) + + def test_resolution(self): + def _assert_less(ts1, ts2): + val1 = self.dtc.convert(ts1, None, None) + val2 = self.dtc.convert(ts2, None, None) + if not val1 < val2: + raise AssertionError('{0} is not less than {1}.'.format(val1, + val2)) + + # Matplotlib's time representation using floats cannot distinguish + # intervals smaller than ~10 microsecond in the common range of years. + ts = Timestamp('2012-1-1') + _assert_less(ts, ts + Second()) + _assert_less(ts, ts + Milli()) + _assert_less(ts, ts + Micro(50)) + + def test_convert_nested(self): + inner = [Timestamp('2017-01-01', Timestamp('2017-01-02'))] + data = [inner, inner] + result = self.dtc.convert(data, None, None) + expected = [self.dtc.convert(x, None, None) for x in data] + assert result == expected + + +class TestPeriodConverter(object): + + def setup_method(self, method): + self.pc = converter.PeriodConverter() + + class Axis(object): + pass + + self.axis = Axis() + self.axis.freq = 'D' + + def test_convert_accepts_unicode(self): + r1 = self.pc.convert("2012-1-1", None, self.axis) + r2 = self.pc.convert(u("2012-1-1"), None, self.axis) + assert r1 == r2 + + def test_conversion(self): + rs = self.pc.convert(['2012-1-1'], None, self.axis)[0] + xp = Period('2012-1-1').ordinal + assert rs == xp + + rs = self.pc.convert('2012-1-1', None, self.axis) + assert rs == xp + + rs = self.pc.convert([date(2012, 1, 1)], None, self.axis)[0] + assert rs == xp + + rs = self.pc.convert(date(2012, 1, 1), None, self.axis) + assert rs == xp + + rs = self.pc.convert([Timestamp('2012-1-1')], None, self.axis)[0] + assert rs == xp + + rs = self.pc.convert(Timestamp('2012-1-1'), None, self.axis) + assert rs == xp + + rs = self.pc.convert( + np_datetime64_compat('2012-01-01'), None, self.axis) + assert rs == xp + + rs = self.pc.convert( + np_datetime64_compat('2012-01-01 00:00:00+0000'), None, self.axis) + assert rs == xp + + rs = self.pc.convert(np.array([ + np_datetime64_compat('2012-01-01 00:00:00+0000'), + np_datetime64_compat('2012-01-02 00:00:00+0000')]), + None, self.axis) + assert rs[0] == xp + + def test_integer_passthrough(self): + # GH9012 + rs = self.pc.convert([0, 1], None, self.axis) + xp = [0, 1] + assert rs == xp + + def test_convert_nested(self): + data = ['2012-1-1', '2012-1-2'] + r1 = self.pc.convert([data, data], None, self.axis) + r2 = [self.pc.convert(data, None, self.axis) for _ in range(2)] + assert r1 == r2 diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 25568f7eb61dc..2f2931c9c86ac 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1,30 +1,33 @@ """ Test cases for time series specific (freq conversion, etc) """ from datetime import datetime, timedelta, date, time +import pickle import pytest from pandas.compat import lrange, zip import numpy as np -from pandas import Index, Series, DataFrame - -from pandas.tseries.index import date_range, bdate_range +from pandas import Index, Series, DataFrame, NaT +from pandas.compat import is_platform_mac, PY3 +from pandas.core.indexes.datetimes import date_range, bdate_range +from pandas.core.indexes.timedeltas import timedelta_range from pandas.tseries.offsets import DateOffset -from pandas.tseries.period import period_range, Period, PeriodIndex -from pandas.tseries.resample import DatetimeIndex +from pandas.core.indexes.period import period_range, Period, PeriodIndex +from pandas.core.resample import DatetimeIndex -from pandas.util.testing import assert_series_equal, ensure_clean, slow +from pandas.util.testing import assert_series_equal, ensure_clean import pandas.util.testing as tm +import pandas.util._test_decorators as td from pandas.tests.plotting.common import (TestPlotBase, _skip_if_no_scipy_gaussian_kde) -@tm.mplskip +@td.skip_if_no_mpl class TestTSPlot(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) freq = ['S', 'T', 'H', 'D', 'W', 'M', 'Q', 'A'] idx = [period_range('12/31/1999', freq=x, periods=100) for x in freq] @@ -40,10 +43,10 @@ def setUp(self): columns=['A', 'B', 'C']) for x in idx] - def tearDown(self): + def teardown_method(self, method): tm.close() - @slow + @pytest.mark.slow def test_ts_plot_with_tz(self): # GH2877 index = date_range('1/1/2011', periods=2, freq='H', @@ -53,16 +56,15 @@ def test_ts_plot_with_tz(self): def test_fontsize_set_correctly(self): # For issue #8765 - import matplotlib.pyplot as plt # noqa df = DataFrame(np.random.randn(10, 9), index=range(10)) - ax = df.plot(fontsize=2) + fig, ax = self.plt.subplots() + df.plot(fontsize=2, ax=ax) for label in (ax.get_xticklabels() + ax.get_yticklabels()): - self.assertEqual(label.get_fontsize(), 2) + assert label.get_fontsize() == 2 - @slow + @pytest.mark.slow def test_frame_inferred(self): # inferred freq - import matplotlib.pyplot as plt # noqa idx = date_range('1/1/1987', freq='MS', periods=100) idx = DatetimeIndex(idx.values, freq=None) @@ -88,26 +90,36 @@ def test_is_error_nozeroindex(self): _check_plot_works(a.plot, yerr=a) def test_nonnumeric_exclude(self): - import matplotlib.pyplot as plt - idx = date_range('1/1/1987', freq='A', periods=3) df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}, idx) - ax = df.plot() # it works - self.assertEqual(len(ax.get_lines()), 1) # B was plotted - plt.close(plt.gcf()) + fig, ax = self.plt.subplots() + df.plot(ax=ax) # it works + assert len(ax.get_lines()) == 1 # B was plotted + self.plt.close(fig) + + pytest.raises(TypeError, df['A'].plot) + + def test_tsplot_deprecated(self): + from pandas.tseries.plotting import tsplot + + _, ax = self.plt.subplots() + ts = tm.makeTimeSeries() - self.assertRaises(TypeError, df['A'].plot) + with tm.assert_produces_warning(FutureWarning): + tsplot(ts, self.plt.Axes.plot, ax=ax) - @slow + @pytest.mark.slow def test_tsplot(self): + from pandas.tseries.plotting import tsplot - import matplotlib.pyplot as plt - ax = plt.gca() + _, ax = self.plt.subplots() ts = tm.makeTimeSeries() - f = lambda *args, **kwds: tsplot(s, plt.Axes.plot, *args, **kwds) + def f(*args, **kwds): + with tm.assert_produces_warning(FutureWarning): + return tsplot(s, self.plt.Axes.plot, *args, **kwds) for s in self.period_ser: _check_plot_works(f, s.index.freq, ax=ax, series=s) @@ -121,90 +133,95 @@ def test_tsplot(self): for s in self.datetime_ser: _check_plot_works(s.plot, ax=ax) - ax = ts.plot(style='k') + _, ax = self.plt.subplots() + ts.plot(style='k', ax=ax) color = (0., 0., 0., 1) if self.mpl_ge_2_0_0 else (0., 0., 0.) - self.assertEqual(color, ax.get_lines()[0].get_color()) + assert color == ax.get_lines()[0].get_color() def test_both_style_and_color(self): - import matplotlib.pyplot as plt # noqa ts = tm.makeTimeSeries() - self.assertRaises(ValueError, ts.plot, style='b-', color='#000099') + pytest.raises(ValueError, ts.plot, style='b-', color='#000099') s = ts.reset_index(drop=True) - self.assertRaises(ValueError, s.plot, style='b-', color='#000099') + pytest.raises(ValueError, s.plot, style='b-', color='#000099') - @slow + @pytest.mark.slow def test_high_freq(self): freaks = ['ms', 'us'] for freq in freaks: + _, ax = self.plt.subplots() rng = date_range('1/1/2012', periods=100000, freq=freq) ser = Series(np.random.randn(len(rng)), rng) - _check_plot_works(ser.plot) + _check_plot_works(ser.plot, ax=ax) def test_get_datevalue(self): - from pandas.tseries.converter import get_datevalue - self.assertIsNone(get_datevalue(None, 'D')) - self.assertEqual(get_datevalue(1987, 'A'), 1987) - self.assertEqual(get_datevalue(Period(1987, 'A'), 'M'), - Period('1987-12', 'M').ordinal) - self.assertEqual(get_datevalue('1/1/1987', 'D'), - Period('1987-1-1', 'D').ordinal) - - @slow + from pandas.plotting._converter import get_datevalue + assert get_datevalue(None, 'D') is None + assert get_datevalue(1987, 'A') == 1987 + assert (get_datevalue(Period(1987, 'A'), 'M') == + Period('1987-12', 'M').ordinal) + assert (get_datevalue('1/1/1987', 'D') == + Period('1987-1-1', 'D').ordinal) + + @pytest.mark.slow def test_ts_plot_format_coord(self): def check_format_of_first_point(ax, expected_string): first_line = ax.get_lines()[0] first_x = first_line.get_xdata()[0].ordinal first_y = first_line.get_ydata()[0] try: - self.assertEqual(expected_string, - ax.format_coord(first_x, first_y)) + assert expected_string == ax.format_coord(first_x, first_y) except (ValueError): pytest.skip("skipping test because issue forming " "test comparison GH7664") annual = Series(1, index=date_range('2014-01-01', periods=3, freq='A-DEC')) - check_format_of_first_point(annual.plot(), 't = 2014 y = 1.000000') + _, ax = self.plt.subplots() + annual.plot(ax=ax) + check_format_of_first_point(ax, 't = 2014 y = 1.000000') # note this is added to the annual plot already in existence, and # changes its freq field daily = Series(1, index=date_range('2014-01-01', periods=3, freq='D')) - check_format_of_first_point(daily.plot(), + daily.plot(ax=ax) + check_format_of_first_point(ax, 't = 2014-01-01 y = 1.000000') tm.close() # tsplot - import matplotlib.pyplot as plt from pandas.tseries.plotting import tsplot - tsplot(annual, plt.Axes.plot) - check_format_of_first_point(plt.gca(), 't = 2014 y = 1.000000') - tsplot(daily, plt.Axes.plot) - check_format_of_first_point(plt.gca(), 't = 2014-01-01 y = 1.000000') - - @slow + _, ax = self.plt.subplots() + with tm.assert_produces_warning(FutureWarning): + tsplot(annual, self.plt.Axes.plot, ax=ax) + check_format_of_first_point(ax, 't = 2014 y = 1.000000') + with tm.assert_produces_warning(FutureWarning): + tsplot(daily, self.plt.Axes.plot, ax=ax) + check_format_of_first_point(ax, 't = 2014-01-01 y = 1.000000') + + @pytest.mark.slow def test_line_plot_period_series(self): for s in self.period_ser: _check_plot_works(s.plot, s.index.freq) - @slow + @pytest.mark.slow def test_line_plot_datetime_series(self): for s in self.datetime_ser: _check_plot_works(s.plot, s.index.freq.rule_code) - @slow + @pytest.mark.slow def test_line_plot_period_frame(self): for df in self.period_df: _check_plot_works(df.plot, df.index.freq) - @slow + @pytest.mark.slow def test_line_plot_datetime_frame(self): for df in self.datetime_df: freq = df.index.to_period(df.index.freq.rule_code).freq _check_plot_works(df.plot, freq) - @slow + @pytest.mark.slow def test_line_plot_inferred_freq(self): for ser in self.datetime_ser: ser = Series(ser.values, Index(np.asarray(ser.index))) @@ -214,17 +231,14 @@ def test_line_plot_inferred_freq(self): _check_plot_works(ser.plot) def test_fake_inferred_business(self): - import matplotlib.pyplot as plt - fig = plt.gcf() - plt.clf() - fig.add_subplot(111) + _, ax = self.plt.subplots() rng = date_range('2001-1-1', '2001-1-10') ts = Series(lrange(len(rng)), rng) ts = ts[:3].append(ts[5:]) - ax = ts.plot() - self.assertFalse(hasattr(ax, 'freq')) + ts.plot(ax=ax) + assert not hasattr(ax, 'freq') - @slow + @pytest.mark.slow def test_plot_offset_freq(self): ser = tm.makeTimeSeries() _check_plot_works(ser.plot) @@ -233,25 +247,21 @@ def test_plot_offset_freq(self): ser = Series(np.random.randn(len(dr)), dr) _check_plot_works(ser.plot) - @slow + @pytest.mark.slow def test_plot_multiple_inferred_freq(self): dr = Index([datetime(2000, 1, 1), datetime(2000, 1, 6), datetime( 2000, 1, 11)]) ser = Series(np.random.randn(len(dr)), dr) _check_plot_works(ser.plot) - @slow + @pytest.mark.slow def test_uhf(self): - import pandas.tseries.converter as conv - import matplotlib.pyplot as plt - fig = plt.gcf() - plt.clf() - fig.add_subplot(111) - + import pandas.plotting._converter as conv idx = date_range('2012-6-22 21:59:51.960928', freq='L', periods=500) df = DataFrame(np.random.randn(len(idx), 2), idx) - ax = df.plot() + _, ax = self.plt.subplots() + df.plot(ax=ax) axis = ax.get_xaxis() tlocs = axis.get_ticklocs() @@ -260,96 +270,88 @@ def test_uhf(self): xp = conv._from_ordinal(loc).strftime('%H:%M:%S.%f') rs = str(label.get_text()) if len(rs): - self.assertEqual(xp, rs) + assert xp == rs - @slow + @pytest.mark.slow def test_irreg_hf(self): - import matplotlib.pyplot as plt - fig = plt.gcf() - plt.clf() - fig.add_subplot(111) - idx = date_range('2012-6-22 21:59:51', freq='S', periods=100) df = DataFrame(np.random.randn(len(idx), 2), idx) irreg = df.iloc[[0, 1, 3, 4]] - ax = irreg.plot() + _, ax = self.plt.subplots() + irreg.plot(ax=ax) diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() sec = 1. / 24 / 60 / 60 - self.assertTrue((np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all( - )) + assert (np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all() - plt.clf() - fig.add_subplot(111) + _, ax = self.plt.subplots() df2 = df.copy() - df2.index = df.index.asobject - ax = df2.plot() + df2.index = df.index.astype(object) + df2.plot(ax=ax) diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() - self.assertTrue((np.fabs(diffs[1:] - sec) < 1e-8).all()) + assert (np.fabs(diffs[1:] - sec) < 1e-8).all() def test_irregular_datetime64_repr_bug(self): - import matplotlib.pyplot as plt ser = tm.makeTimeSeries() ser = ser[[0, 1, 2, 7]] - fig = plt.gcf() - plt.clf() - ax = fig.add_subplot(211) - ret = ser.plot() - self.assertIsNotNone(ret) + _, ax = self.plt.subplots() + + ret = ser.plot(ax=ax) + assert ret is not None for rs, xp in zip(ax.get_lines()[0].get_xdata(), ser.index): - self.assertEqual(rs, xp) + assert rs == xp def test_business_freq(self): - import matplotlib.pyplot as plt # noqa bts = tm.makePeriodSeries() - ax = bts.plot() - self.assertEqual(ax.get_lines()[0].get_xydata()[0, 0], - bts.index[0].ordinal) + _, ax = self.plt.subplots() + bts.plot(ax=ax) + assert ax.get_lines()[0].get_xydata()[0, 0] == bts.index[0].ordinal idx = ax.get_lines()[0].get_xdata() - self.assertEqual(PeriodIndex(data=idx).freqstr, 'B') + assert PeriodIndex(data=idx).freqstr == 'B' - @slow + @pytest.mark.slow def test_business_freq_convert(self): n = tm.N tm.N = 300 bts = tm.makeTimeSeries().asfreq('BM') tm.N = n ts = bts.to_period('M') - ax = bts.plot() - self.assertEqual(ax.get_lines()[0].get_xydata()[0, 0], - ts.index[0].ordinal) + _, ax = self.plt.subplots() + bts.plot(ax=ax) + assert ax.get_lines()[0].get_xydata()[0, 0] == ts.index[0].ordinal idx = ax.get_lines()[0].get_xdata() - self.assertEqual(PeriodIndex(data=idx).freqstr, 'M') + assert PeriodIndex(data=idx).freqstr == 'M' def test_nonzero_base(self): # GH2571 idx = (date_range('2012-12-20', periods=24, freq='H') + timedelta( minutes=30)) df = DataFrame(np.arange(24), index=idx) - ax = df.plot() + _, ax = self.plt.subplots() + df.plot(ax=ax) rs = ax.get_lines()[0].get_xdata() - self.assertFalse(Index(rs).is_normalized) + assert not Index(rs).is_normalized def test_dataframe(self): bts = DataFrame({'a': tm.makeTimeSeries()}) - ax = bts.plot() + _, ax = self.plt.subplots() + bts.plot(ax=ax) idx = ax.get_lines()[0].get_xdata() tm.assert_index_equal(bts.index.to_period(), PeriodIndex(idx)) - @slow + @pytest.mark.slow def test_axis_limits(self): - import matplotlib.pyplot as plt def _test(ax): xlim = ax.get_xlim() ax.set_xlim(xlim[0] - 5, xlim[1] + 10) ax.get_figure().canvas.draw() result = ax.get_xlim() - self.assertEqual(result[0], xlim[0] - 5) - self.assertEqual(result[1], xlim[1] + 10) + assert result[0] == xlim[0] - 5 + assert result[1] == xlim[1] + 10 # string expected = (Period('1/1/2000', ax.freq), @@ -357,26 +359,28 @@ def _test(ax): ax.set_xlim('1/1/2000', '4/1/2000') ax.get_figure().canvas.draw() result = ax.get_xlim() - self.assertEqual(int(result[0]), expected[0].ordinal) - self.assertEqual(int(result[1]), expected[1].ordinal) + assert int(result[0]) == expected[0].ordinal + assert int(result[1]) == expected[1].ordinal - # datetim + # datetime expected = (Period('1/1/2000', ax.freq), Period('4/1/2000', ax.freq)) ax.set_xlim(datetime(2000, 1, 1), datetime(2000, 4, 1)) ax.get_figure().canvas.draw() result = ax.get_xlim() - self.assertEqual(int(result[0]), expected[0].ordinal) - self.assertEqual(int(result[1]), expected[1].ordinal) + assert int(result[0]) == expected[0].ordinal + assert int(result[1]) == expected[1].ordinal fig = ax.get_figure() - plt.close(fig) + self.plt.close(fig) ser = tm.makeTimeSeries() - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) _test(ax) + _, ax = self.plt.subplots() df = DataFrame({'a': ser, 'b': ser + 1}) - ax = df.plot() + df.plot(ax=ax) _test(ax) df = DataFrame({'a': ser, 'b': ser + 1}) @@ -386,351 +390,393 @@ def _test(ax): _test(ax) def test_get_finder(self): - import pandas.tseries.converter as conv + import pandas.plotting._converter as conv - self.assertEqual(conv.get_finder('B'), conv._daily_finder) - self.assertEqual(conv.get_finder('D'), conv._daily_finder) - self.assertEqual(conv.get_finder('M'), conv._monthly_finder) - self.assertEqual(conv.get_finder('Q'), conv._quarterly_finder) - self.assertEqual(conv.get_finder('A'), conv._annual_finder) - self.assertEqual(conv.get_finder('W'), conv._daily_finder) + assert conv.get_finder('B') == conv._daily_finder + assert conv.get_finder('D') == conv._daily_finder + assert conv.get_finder('M') == conv._monthly_finder + assert conv.get_finder('Q') == conv._quarterly_finder + assert conv.get_finder('A') == conv._annual_finder + assert conv.get_finder('W') == conv._daily_finder - @slow + @pytest.mark.slow def test_finder_daily(self): - import matplotlib.pyplot as plt - xp = Period('1999-1-1', freq='B').ordinal day_lst = [10, 40, 252, 400, 950, 2750, 10000] - for n in day_lst: + + if self.mpl_ge_2_0_0: + xpl1 = [7565, 7564, 7553, 7546, 7518, 7428, 7066] + xpl2 = [7566, 7564, 7554, 7546, 7519, 7429, 7066] + else: + xpl1 = xpl2 = [Period('1999-1-1', freq='B').ordinal] * len(day_lst) + + for i, n in enumerate(day_lst): + xp = xpl1[i] rng = bdate_range('1999-1-1', periods=n) ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - self.assertEqual(xp, rs) + assert xp == rs + xp = xpl2[i] vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] - self.assertEqual(xp, rs) - plt.close(ax.get_figure()) + assert xp == rs + self.plt.close(ax.get_figure()) - @slow + @pytest.mark.slow def test_finder_quarterly(self): - import matplotlib.pyplot as plt - xp = Period('1988Q1').ordinal yrs = [3.5, 11] - for n in yrs: + + if self.mpl_ge_2_0_0: + xpl1 = [68, 68] + xpl2 = [72, 68] + else: + xpl1 = xpl2 = [Period('1988Q1').ordinal] * len(yrs) + + for i, n in enumerate(yrs): + xp = xpl1[i] rng = period_range('1987Q2', periods=int(n * 4), freq='Q') ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - self.assertEqual(rs, xp) + assert rs == xp + xp = xpl2[i] (vmin, vmax) = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] - self.assertEqual(xp, rs) - plt.close(ax.get_figure()) + assert xp == rs + self.plt.close(ax.get_figure()) - @slow + @pytest.mark.slow def test_finder_monthly(self): - import matplotlib.pyplot as plt - xp = Period('Jan 1988').ordinal yrs = [1.15, 2.5, 4, 11] - for n in yrs: + + if self.mpl_ge_2_0_0: + xpl1 = [216, 216, 204, 204] + xpl2 = [216, 216, 216, 204] + else: + xpl1 = xpl2 = [Period('Jan 1988').ordinal] * len(yrs) + + for i, n in enumerate(yrs): + xp = xpl1[i] rng = period_range('1987Q2', periods=int(n * 12), freq='M') ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - self.assertEqual(rs, xp) + assert rs == xp + xp = xpl2[i] vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] - self.assertEqual(xp, rs) - plt.close(ax.get_figure()) + assert xp == rs + self.plt.close(ax.get_figure()) def test_finder_monthly_long(self): rng = period_range('1988Q1', periods=24 * 12, freq='M') ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1989Q1', 'M').ordinal - self.assertEqual(rs, xp) + assert rs == xp - @slow + @pytest.mark.slow def test_finder_annual(self): - import matplotlib.pyplot as plt - xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] + if self.mpl_ge_2_0_0: + xp = [1986, 1986, 1990, 1990, 1995, 2020, 1970, 1970] + else: + xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] + for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): rng = period_range('1987', periods=nyears, freq='A') ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - self.assertEqual(rs, Period(xp[i], freq='A').ordinal) - plt.close(ax.get_figure()) + assert rs == Period(xp[i], freq='A').ordinal + self.plt.close(ax.get_figure()) - @slow + @pytest.mark.slow def test_finder_minutely(self): nminutes = 50 * 24 * 60 rng = date_range('1/1/1999', freq='Min', periods=nminutes) ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - xp = Period('1/1/1999', freq='Min').ordinal - self.assertEqual(rs, xp) + if self.mpl_ge_2_0_0: + xp = Period('1998-12-29 12:00', freq='Min').ordinal + else: + xp = Period('1/1/1999', freq='Min').ordinal + assert rs == xp def test_finder_hourly(self): nhours = 23 rng = date_range('1/1/1999', freq='H', periods=nhours) ser = Series(np.random.randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] - xp = Period('1/1/1999', freq='H').ordinal - self.assertEqual(rs, xp) - - @slow + if self.mpl_ge_2_0_0: + xp = Period('1998-12-31 22:00', freq='H').ordinal + else: + xp = Period('1/1/1999', freq='H').ordinal + assert rs == xp + + @td.skip_if_mpl_1_5 + @pytest.mark.slow def test_gaps(self): - import matplotlib.pyplot as plt - ts = tm.makeTimeSeries() ts[5:25] = np.nan - ax = ts.plot() + _, ax = self.plt.subplots() + ts.plot(ax=ax) lines = ax.get_lines() - tm._skip_if_mpl_1_5() - self.assertEqual(len(lines), 1) + assert len(lines) == 1 l = lines[0] data = l.get_xydata() - tm.assertIsInstance(data, np.ma.core.MaskedArray) + assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask - self.assertTrue(mask[5:25, 1].all()) - plt.close(ax.get_figure()) + assert mask[5:25, 1].all() + self.plt.close(ax.get_figure()) # irregular ts = tm.makeTimeSeries() ts = ts[[0, 1, 2, 5, 7, 9, 12, 15, 20]] ts[2:5] = np.nan - ax = ts.plot() + _, ax = self.plt.subplots() + ax = ts.plot(ax=ax) lines = ax.get_lines() - self.assertEqual(len(lines), 1) + assert len(lines) == 1 l = lines[0] data = l.get_xydata() - tm.assertIsInstance(data, np.ma.core.MaskedArray) + assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask - self.assertTrue(mask[2:5, 1].all()) - plt.close(ax.get_figure()) + assert mask[2:5, 1].all() + self.plt.close(ax.get_figure()) # non-ts idx = [0, 1, 2, 5, 7, 9, 12, 15, 20] ser = Series(np.random.randn(len(idx)), idx) ser[2:5] = np.nan - ax = ser.plot() + _, ax = self.plt.subplots() + ser.plot(ax=ax) lines = ax.get_lines() - self.assertEqual(len(lines), 1) + assert len(lines) == 1 l = lines[0] data = l.get_xydata() - tm.assertIsInstance(data, np.ma.core.MaskedArray) + assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask - self.assertTrue(mask[2:5, 1].all()) + assert mask[2:5, 1].all() - @slow + @td.skip_if_mpl_1_5 + @pytest.mark.slow def test_gap_upsample(self): low = tm.makeTimeSeries() low[5:25] = np.nan - ax = low.plot() + _, ax = self.plt.subplots() + low.plot(ax=ax) idxh = date_range(low.index[0], low.index[-1], freq='12h') s = Series(np.random.randn(len(idxh)), idxh) s.plot(secondary_y=True) lines = ax.get_lines() - self.assertEqual(len(lines), 1) - self.assertEqual(len(ax.right_ax.get_lines()), 1) + assert len(lines) == 1 + assert len(ax.right_ax.get_lines()) == 1 l = lines[0] data = l.get_xydata() - tm._skip_if_mpl_1_5() - - tm.assertIsInstance(data, np.ma.core.MaskedArray) + assert isinstance(data, np.ma.core.MaskedArray) mask = data.mask - self.assertTrue(mask[5:25, 1].all()) + assert mask[5:25, 1].all() - @slow + @pytest.mark.slow def test_secondary_y(self): - import matplotlib.pyplot as plt - ser = Series(np.random.randn(10)) ser2 = Series(np.random.randn(10)) + fig, _ = self.plt.subplots() ax = ser.plot(secondary_y=True) - self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) - fig = ax.get_figure() + assert hasattr(ax, 'left_ax') + assert not hasattr(ax, 'right_ax') axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()) assert_series_equal(ser, xp) - self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right') - self.assertFalse(axes[0].get_yaxis().get_visible()) - plt.close(fig) + assert ax.get_yaxis().get_ticks_position() == 'right' + assert not axes[0].get_yaxis().get_visible() + self.plt.close(fig) - ax2 = ser2.plot() - self.assertEqual(ax2.get_yaxis().get_ticks_position(), - self.default_tick_position) - plt.close(ax2.get_figure()) + _, ax2 = self.plt.subplots() + ser2.plot(ax=ax2) + assert (ax2.get_yaxis().get_ticks_position() == + self.default_tick_position) + self.plt.close(ax2.get_figure()) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) - self.assertTrue(ax.get_yaxis().get_visible()) - self.assertFalse(hasattr(ax, 'left_ax')) - self.assertTrue(hasattr(ax, 'right_ax')) - self.assertTrue(hasattr(ax2, 'left_ax')) - self.assertFalse(hasattr(ax2, 'right_ax')) + assert ax.get_yaxis().get_visible() + assert not hasattr(ax, 'left_ax') + assert hasattr(ax, 'right_ax') + assert hasattr(ax2, 'left_ax') + assert not hasattr(ax2, 'right_ax') - @slow + @pytest.mark.slow def test_secondary_y_ts(self): - import matplotlib.pyplot as plt idx = date_range('1/1/2000', periods=10) ser = Series(np.random.randn(10), idx) ser2 = Series(np.random.randn(10), idx) + fig, _ = self.plt.subplots() ax = ser.plot(secondary_y=True) - self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) - fig = ax.get_figure() + assert hasattr(ax, 'left_ax') + assert not hasattr(ax, 'right_ax') axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp() assert_series_equal(ser, xp) - self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right') - self.assertFalse(axes[0].get_yaxis().get_visible()) - plt.close(fig) + assert ax.get_yaxis().get_ticks_position() == 'right' + assert not axes[0].get_yaxis().get_visible() + self.plt.close(fig) - ax2 = ser2.plot() - self.assertEqual(ax2.get_yaxis().get_ticks_position(), - self.default_tick_position) - plt.close(ax2.get_figure()) + _, ax2 = self.plt.subplots() + ser2.plot(ax=ax2) + assert (ax2.get_yaxis().get_ticks_position() == + self.default_tick_position) + self.plt.close(ax2.get_figure()) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_secondary_kde(self): - tm._skip_if_no_scipy() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") _skip_if_no_scipy_gaussian_kde() - import matplotlib.pyplot as plt # noqa ser = Series(np.random.randn(10)) - ax = ser.plot(secondary_y=True, kind='density') - self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) - fig = ax.get_figure() + fig, ax = self.plt.subplots() + ax = ser.plot(secondary_y=True, kind='density', ax=ax) + assert hasattr(ax, 'left_ax') + assert not hasattr(ax, 'right_ax') axes = fig.get_axes() - self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right') + assert axes[1].get_yaxis().get_ticks_position() == 'right' - @slow + @pytest.mark.slow def test_secondary_bar(self): ser = Series(np.random.randn(10)) - ax = ser.plot(secondary_y=True, kind='bar') - fig = ax.get_figure() + fig, ax = self.plt.subplots() + ser.plot(secondary_y=True, kind='bar', ax=ax) axes = fig.get_axes() - self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right') + assert axes[1].get_yaxis().get_ticks_position() == 'right' - @slow + @pytest.mark.slow def test_secondary_frame(self): df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c']) axes = df.plot(secondary_y=['a', 'c'], subplots=True) - self.assertEqual(axes[0].get_yaxis().get_ticks_position(), 'right') - self.assertEqual(axes[1].get_yaxis().get_ticks_position(), - self.default_tick_position) - self.assertEqual(axes[2].get_yaxis().get_ticks_position(), 'right') + assert axes[0].get_yaxis().get_ticks_position() == 'right' + assert (axes[1].get_yaxis().get_ticks_position() == + self.default_tick_position) + assert axes[2].get_yaxis().get_ticks_position() == 'right' - @slow + @pytest.mark.slow def test_secondary_bar_frame(self): df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c']) axes = df.plot(kind='bar', secondary_y=['a', 'c'], subplots=True) - self.assertEqual(axes[0].get_yaxis().get_ticks_position(), 'right') - self.assertEqual(axes[1].get_yaxis().get_ticks_position(), - self.default_tick_position) - self.assertEqual(axes[2].get_yaxis().get_ticks_position(), 'right') + assert axes[0].get_yaxis().get_ticks_position() == 'right' + assert (axes[1].get_yaxis().get_ticks_position() == + self.default_tick_position) + assert axes[2].get_yaxis().get_ticks_position() == 'right' def test_mixed_freq_regular_first(self): - import matplotlib.pyplot as plt # noqa + # TODO s1 = tm.makeTimeSeries() s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] # it works! - s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) - ax2 = s2.plot(style='g') + ax2 = s2.plot(style='g', ax=ax) lines = ax2.get_lines() idx1 = PeriodIndex(lines[0].get_xdata()) idx2 = PeriodIndex(lines[1].get_xdata()) - self.assertTrue(idx1.equals(s1.index.to_period('B'))) - self.assertTrue(idx2.equals(s2.index.to_period('B'))) + + tm.assert_index_equal(idx1, s1.index.to_period('B')) + tm.assert_index_equal(idx2, s2.index.to_period('B')) + left, right = ax2.get_xlim() pidx = s1.index.to_period() - self.assertEqual(left, pidx[0].ordinal) - self.assertEqual(right, pidx[-1].ordinal) + assert left <= pidx[0].ordinal + assert right >= pidx[-1].ordinal - @slow + @pytest.mark.slow def test_mixed_freq_irregular_first(self): - import matplotlib.pyplot as plt # noqa s1 = tm.makeTimeSeries() s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] - s2.plot(style='g') - ax = s1.plot() - self.assertFalse(hasattr(ax, 'freq')) + _, ax = self.plt.subplots() + s2.plot(style='g', ax=ax) + s1.plot(ax=ax) + assert not hasattr(ax, 'freq') lines = ax.get_lines() x1 = lines[0].get_xdata() - tm.assert_numpy_array_equal(x1, s2.index.asobject.values) + tm.assert_numpy_array_equal(x1, s2.index.astype(object).values) x2 = lines[1].get_xdata() - tm.assert_numpy_array_equal(x2, s1.index.asobject.values) + tm.assert_numpy_array_equal(x2, s1.index.astype(object).values) def test_mixed_freq_regular_first_df(self): # GH 9852 - import matplotlib.pyplot as plt # noqa s1 = tm.makeTimeSeries().to_frame() s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] - ax = s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) ax2 = s2.plot(style='g', ax=ax) lines = ax2.get_lines() idx1 = PeriodIndex(lines[0].get_xdata()) idx2 = PeriodIndex(lines[1].get_xdata()) - self.assertTrue(idx1.equals(s1.index.to_period('B'))) - self.assertTrue(idx2.equals(s2.index.to_period('B'))) + assert idx1.equals(s1.index.to_period('B')) + assert idx2.equals(s2.index.to_period('B')) left, right = ax2.get_xlim() pidx = s1.index.to_period() - self.assertEqual(left, pidx[0].ordinal) - self.assertEqual(right, pidx[-1].ordinal) + assert left <= pidx[0].ordinal + assert right >= pidx[-1].ordinal - @slow + @pytest.mark.slow def test_mixed_freq_irregular_first_df(self): # GH 9852 - import matplotlib.pyplot as plt # noqa s1 = tm.makeTimeSeries().to_frame() s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] - ax = s2.plot(style='g') - ax = s1.plot(ax=ax) - self.assertFalse(hasattr(ax, 'freq')) + _, ax = self.plt.subplots() + s2.plot(style='g', ax=ax) + s1.plot(ax=ax) + assert not hasattr(ax, 'freq') lines = ax.get_lines() x1 = lines[0].get_xdata() - tm.assert_numpy_array_equal(x1, s2.index.asobject.values) + tm.assert_numpy_array_equal(x1, s2.index.astype(object).values) x2 = lines[1].get_xdata() - tm.assert_numpy_array_equal(x2, s1.index.asobject.values) + tm.assert_numpy_array_equal(x2, s1.index.astype(object).values) def test_mixed_freq_hf_first(self): idxh = date_range('1/1/1999', periods=365, freq='D') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - high.plot() - ax = low.plot() + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'D') + assert PeriodIndex(data=l.get_xdata()).freq == 'D' - @slow + @pytest.mark.slow def test_mixed_freq_alignment(self): ts_ind = date_range('2012-01-01 13:00', '2012-01-02', freq='H') ts_data = np.random.randn(12) @@ -738,44 +784,46 @@ def test_mixed_freq_alignment(self): ts = Series(ts_data, index=ts_ind) ts2 = ts.asfreq('T').interpolate() - ax = ts.plot() - ts2.plot(style='r') + _, ax = self.plt.subplots() + ax = ts.plot(ax=ax) + ts2.plot(style='r', ax=ax) - self.assertEqual(ax.lines[0].get_xdata()[0], - ax.lines[1].get_xdata()[0]) + assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0] - @slow + @pytest.mark.slow def test_mixed_freq_lf_first(self): - import matplotlib.pyplot as plt idxh = date_range('1/1/1999', periods=365, freq='D') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - low.plot(legend=True) - ax = high.plot(legend=True) + _, ax = self.plt.subplots() + low.plot(legend=True, ax=ax) + high.plot(legend=True, ax=ax) for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'D') + assert PeriodIndex(data=l.get_xdata()).freq == 'D' leg = ax.get_legend() - self.assertEqual(len(leg.texts), 2) - plt.close(ax.get_figure()) + assert len(leg.texts) == 2 + self.plt.close(ax.get_figure()) idxh = date_range('1/1/1999', periods=240, freq='T') idxl = date_range('1/1/1999', periods=4, freq='H') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - low.plot() - ax = high.plot() + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'T') + assert PeriodIndex(data=l.get_xdata()).freq == 'T' def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] rng = period_range('1/3/2000', periods=30, freq='B') ps = Series(np.random.randn(len(rng)), rng) - irreg.plot() - ps.plot() + _, ax = self.plt.subplots() + irreg.plot(ax=ax) + ps.plot(ax=ax) def test_mixed_freq_shared_ax(self): @@ -789,10 +837,10 @@ def test_mixed_freq_shared_ax(self): s1.plot(ax=ax1) s2.plot(ax=ax2) - self.assertEqual(ax1.freq, 'M') - self.assertEqual(ax2.freq, 'M') - self.assertEqual(ax1.lines[0].get_xydata()[0, 0], - ax2.lines[0].get_xydata()[0, 0]) + assert ax1.freq == 'M' + assert ax2.freq == 'M' + assert (ax1.lines[0].get_xydata()[0, 0] == + ax2.lines[0].get_xydata()[0, 0]) # using twinx fig, ax1 = self.plt.subplots() @@ -800,8 +848,8 @@ def test_mixed_freq_shared_ax(self): s1.plot(ax=ax1) s2.plot(ax=ax2) - self.assertEqual(ax1.lines[0].get_xydata()[0, 0], - ax2.lines[0].get_xydata()[0, 0]) + assert (ax1.lines[0].get_xydata()[0, 0] == + ax2.lines[0].get_xydata()[0, 0]) # TODO (GH14330, GH14322) # plotting the irregular first does not yet work @@ -809,65 +857,79 @@ def test_mixed_freq_shared_ax(self): # ax2 = ax1.twinx() # s2.plot(ax=ax1) # s1.plot(ax=ax2) - # self.assertEqual(ax1.lines[0].get_xydata()[0, 0], - # ax2.lines[0].get_xydata()[0, 0]) + # assert (ax1.lines[0].get_xydata()[0, 0] == + # ax2.lines[0].get_xydata()[0, 0]) - @slow + def test_nat_handling(self): + + _, ax = self.plt.subplots() + + dti = DatetimeIndex(['2015-01-01', NaT, '2015-01-03']) + s = Series(range(len(dti)), dti) + s.plot(ax=ax) + xdata = ax.get_lines()[0].get_xdata() + # plot x data is bounded by index values + assert s.index.min() <= Series(xdata).min() + assert Series(xdata).max() <= s.index.max() + + @pytest.mark.slow def test_to_weekly_resampling(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - high.plot() - ax = low.plot() + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq - # tsplot + _, ax = self.plt.subplots() from pandas.tseries.plotting import tsplot - import matplotlib.pyplot as plt - - tsplot(high, plt.Axes.plot) - lines = tsplot(low, plt.Axes.plot) + with tm.assert_produces_warning(FutureWarning): + tsplot(high, self.plt.Axes.plot, ax=ax) + with tm.assert_produces_warning(FutureWarning): + lines = tsplot(low, self.plt.Axes.plot, ax=ax) for l in lines: - self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq - @slow + @pytest.mark.slow def test_from_weekly_resampling(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - low.plot() - ax = high.plot() + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) expected_h = idxh.to_period().asi8.astype(np.float64) expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64) for l in ax.get_lines(): - self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines - self.assert_numpy_array_equal(xdata, expected_l) + tm.assert_numpy_array_equal(xdata, expected_l) else: - self.assert_numpy_array_equal(xdata, expected_h) + tm.assert_numpy_array_equal(xdata, expected_h) tm.close() - # tsplot + _, ax = self.plt.subplots() from pandas.tseries.plotting import tsplot - import matplotlib.pyplot as plt - - tsplot(low, plt.Axes.plot) - lines = tsplot(high, plt.Axes.plot) + with tm.assert_produces_warning(FutureWarning): + tsplot(low, self.plt.Axes.plot, ax=ax) + with tm.assert_produces_warning(FutureWarning): + lines = tsplot(high, self.plt.Axes.plot, ax=ax) for l in lines: - self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines - self.assert_numpy_array_equal(xdata, expected_l) + tm.assert_numpy_array_equal(xdata, expected_l) else: - self.assert_numpy_array_equal(xdata, expected_h) + tm.assert_numpy_array_equal(xdata, expected_h) - @slow + @pytest.mark.slow def test_from_resampling_area_line_mixed(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') @@ -878,8 +940,9 @@ def test_from_resampling_area_line_mixed(self): # low to high for kind1, kind2 in [('line', 'area'), ('area', 'line')]: - ax = low.plot(kind=kind1, stacked=True) - ax = high.plot(kind=kind2, stacked=True, ax=ax) + _, ax = self.plt.subplots() + low.plot(kind=kind1, stacked=True, ax=ax) + high.plot(kind=kind2, stacked=True, ax=ax) # check low dataframe result expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, @@ -888,44 +951,43 @@ def test_from_resampling_area_line_mixed(self): expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[i] - self.assertEqual(PeriodIndex(l.get_xdata()).freq, idxh.freq) - self.assert_numpy_array_equal(l.get_xdata(orig=False), - expected_x) + assert PeriodIndex(l.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(l.get_xdata(orig=False), + expected_x) # check stacked values are correct expected_y += low[i].values - self.assert_numpy_array_equal( - l.get_ydata(orig=False), expected_y) + tm.assert_numpy_array_equal(l.get_ydata(orig=False), + expected_y) # check high dataframe result expected_x = idxh.to_period().asi8.astype(np.float64) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[3 + i] - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, - idxh.freq) - self.assert_numpy_array_equal(l.get_xdata(orig=False), - expected_x) + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(l.get_xdata(orig=False), + expected_x) expected_y += high[i].values - self.assert_numpy_array_equal(l.get_ydata(orig=False), - expected_y) + tm.assert_numpy_array_equal(l.get_ydata(orig=False), + expected_y) # high to low for kind1, kind2 in [('line', 'area'), ('area', 'line')]: - ax = high.plot(kind=kind1, stacked=True) - ax = low.plot(kind=kind2, stacked=True, ax=ax) + _, ax = self.plt.subplots() + high.plot(kind=kind1, stacked=True, ax=ax) + low.plot(kind=kind2, stacked=True, ax=ax) # check high dataframe result expected_x = idxh.to_period().asi8.astype(np.float64) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[i] - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, - idxh.freq) - self.assert_numpy_array_equal( - l.get_xdata(orig=False), expected_x) + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(l.get_xdata(orig=False), + expected_x) expected_y += high[i].values - self.assert_numpy_array_equal( - l.get_ydata(orig=False), expected_y) + tm.assert_numpy_array_equal(l.get_ydata(orig=False), + expected_y) # check low dataframe result expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, @@ -934,15 +996,14 @@ def test_from_resampling_area_line_mixed(self): expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[3 + i] - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, - idxh.freq) - self.assert_numpy_array_equal(l.get_xdata(orig=False), - expected_x) + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(l.get_xdata(orig=False), + expected_x) expected_y += low[i].values - self.assert_numpy_array_equal(l.get_ydata(orig=False), - expected_y) + tm.assert_numpy_array_equal(l.get_ydata(orig=False), + expected_y) - @slow + @pytest.mark.slow def test_mixed_freq_second_millisecond(self): # GH 7772, GH 7760 idxh = date_range('2014-07-01 09:00', freq='S', periods=50) @@ -950,21 +1011,23 @@ def test_mixed_freq_second_millisecond(self): high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) # high to low - high.plot() - ax = low.plot() - self.assertEqual(len(ax.get_lines()), 2) + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) + assert len(ax.get_lines()) == 2 for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'L') + assert PeriodIndex(data=l.get_xdata()).freq == 'L' tm.close() # low to high - low.plot() - ax = high.plot() - self.assertEqual(len(ax.get_lines()), 2) + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) + assert len(ax.get_lines()) == 2 for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'L') + assert PeriodIndex(data=l.get_xdata()).freq == 'L' - @slow + @pytest.mark.slow def test_irreg_dtypes(self): # date idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)] @@ -973,11 +1036,13 @@ def test_irreg_dtypes(self): # np.datetime64 idx = date_range('1/1/2000', periods=10) - idx = idx[[0, 2, 5, 9]].asobject + idx = idx[[0, 2, 5, 9]].astype(object) df = DataFrame(np.random.randn(len(idx), 3), idx) - _check_plot_works(df.plot) + _, ax = self.plt.subplots() + _check_plot_works(df.plot, ax=ax) - @slow + @pytest.mark.xfail(not PY3, reason="failing on mpl 1.4.3 on PY2") + @pytest.mark.slow def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() @@ -985,34 +1050,43 @@ def test_time(self): df = DataFrame({'a': np.random.randn(len(ts)), 'b': np.random.randn(len(ts))}, index=ts) - ax = df.plot() + fig, ax = self.plt.subplots() + df.plot(ax=ax) # verify tick labels + fig.canvas.draw() ticks = ax.get_xticks() labels = ax.get_xticklabels() for t, l in zip(ticks, labels): m, s = divmod(int(t), 60) h, m = divmod(m, 60) - xp = l.get_text() - if len(xp) > 0: - rs = time(h, m, s).strftime('%H:%M:%S') - self.assertEqual(xp, rs) + rs = l.get_text() + if len(rs) > 0: + if s != 0: + xp = time(h, m, s).strftime('%H:%M:%S') + else: + xp = time(h, m, s).strftime('%H:%M') + assert xp == rs # change xlim ax.set_xlim('1:30', '5:00') # check tick labels again + fig.canvas.draw() ticks = ax.get_xticks() labels = ax.get_xticklabels() for t, l in zip(ticks, labels): m, s = divmod(int(t), 60) h, m = divmod(m, 60) - xp = l.get_text() - if len(xp) > 0: - rs = time(h, m, s).strftime('%H:%M:%S') - self.assertEqual(xp, rs) - - @slow + rs = l.get_text() + if len(rs) > 0: + if s != 0: + xp = time(h, m, s).strftime('%H:%M:%S') + else: + xp = time(h, m, s).strftime('%H:%M') + assert xp == rs + + @pytest.mark.slow def test_time_musec(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() @@ -1021,159 +1095,166 @@ def test_time_musec(self): df = DataFrame({'a': np.random.randn(len(ts)), 'b': np.random.randn(len(ts))}, index=ts) - ax = df.plot() + fig, ax = self.plt.subplots() + ax = df.plot(ax=ax) # verify tick labels + fig.canvas.draw() ticks = ax.get_xticks() labels = ax.get_xticklabels() for t, l in zip(ticks, labels): m, s = divmod(int(t), 60) - # TODO: unused? - # us = int((t - int(t)) * 1e6) + us = int(round((t - int(t)) * 1e6)) h, m = divmod(m, 60) - xp = l.get_text() - if len(xp) > 0: - rs = time(h, m, s).strftime('%H:%M:%S.%f') - self.assertEqual(xp, rs) - - @slow + rs = l.get_text() + if len(rs) > 0: + if (us % 1000) != 0: + xp = time(h, m, s, us).strftime('%H:%M:%S.%f') + elif (us // 1000) != 0: + xp = time(h, m, s, us).strftime('%H:%M:%S.%f')[:-3] + elif s != 0: + xp = time(h, m, s, us).strftime('%H:%M:%S') + else: + xp = time(h, m, s, us).strftime('%H:%M') + assert xp == rs + + @pytest.mark.slow def test_secondary_upsample(self): idxh = date_range('1/1/1999', periods=365, freq='D') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) - low.plot() - ax = high.plot(secondary_y=True) + _, ax = self.plt.subplots() + low.plot(ax=ax) + ax = high.plot(secondary_y=True, ax=ax) for l in ax.get_lines(): - self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D') - self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) + assert PeriodIndex(l.get_xdata()).freq == 'D' + assert hasattr(ax, 'left_ax') + assert not hasattr(ax, 'right_ax') for l in ax.left_ax.get_lines(): - self.assertEqual(PeriodIndex(l.get_xdata()).freq, 'D') + assert PeriodIndex(l.get_xdata()).freq == 'D' - @slow + @pytest.mark.slow def test_secondary_legend(self): - import matplotlib.pyplot as plt - fig = plt.gcf() - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) # ts df = tm.makeTimeDataFrame() - ax = df.plot(secondary_y=['A', 'B']) + df.plot(secondary_y=['A', 'B'], ax=ax) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) - self.assertEqual(leg.get_texts()[0].get_text(), 'A (right)') - self.assertEqual(leg.get_texts()[1].get_text(), 'B (right)') - self.assertEqual(leg.get_texts()[2].get_text(), 'C') - self.assertEqual(leg.get_texts()[3].get_text(), 'D') - self.assertIsNone(ax.right_ax.get_legend()) + assert len(leg.get_lines()) == 4 + assert leg.get_texts()[0].get_text() == 'A (right)' + assert leg.get_texts()[1].get_text() == 'B (right)' + assert leg.get_texts()[2].get_text() == 'C' + assert leg.get_texts()[3].get_text() == 'D' + assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) # TODO: color cycle problems - self.assertEqual(len(colors), 4) + assert len(colors) == 4 + self.plt.close(fig) - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) - ax = df.plot(secondary_y=['A', 'C'], mark_right=False) + df.plot(secondary_y=['A', 'C'], mark_right=False, ax=ax) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) - self.assertEqual(leg.get_texts()[0].get_text(), 'A') - self.assertEqual(leg.get_texts()[1].get_text(), 'B') - self.assertEqual(leg.get_texts()[2].get_text(), 'C') - self.assertEqual(leg.get_texts()[3].get_text(), 'D') - - plt.clf() - ax = df.plot(kind='bar', secondary_y=['A']) + assert len(leg.get_lines()) == 4 + assert leg.get_texts()[0].get_text() == 'A' + assert leg.get_texts()[1].get_text() == 'B' + assert leg.get_texts()[2].get_text() == 'C' + assert leg.get_texts()[3].get_text() == 'D' + self.plt.close(fig) + + fig, ax = self.plt.subplots() + df.plot(kind='bar', secondary_y=['A'], ax=ax) leg = ax.get_legend() - self.assertEqual(leg.get_texts()[0].get_text(), 'A (right)') - self.assertEqual(leg.get_texts()[1].get_text(), 'B') + assert leg.get_texts()[0].get_text() == 'A (right)' + assert leg.get_texts()[1].get_text() == 'B' + self.plt.close(fig) - plt.clf() - ax = df.plot(kind='bar', secondary_y=['A'], mark_right=False) + fig, ax = self.plt.subplots() + df.plot(kind='bar', secondary_y=['A'], mark_right=False, ax=ax) leg = ax.get_legend() - self.assertEqual(leg.get_texts()[0].get_text(), 'A') - self.assertEqual(leg.get_texts()[1].get_text(), 'B') + assert leg.get_texts()[0].get_text() == 'A' + assert leg.get_texts()[1].get_text() == 'B' + self.plt.close(fig) - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) df = tm.makeTimeDataFrame() - ax = df.plot(secondary_y=['C', 'D']) + ax = df.plot(secondary_y=['C', 'D'], ax=ax) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) - self.assertIsNone(ax.right_ax.get_legend()) + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) # TODO: color cycle problems - self.assertEqual(len(colors), 4) + assert len(colors) == 4 + self.plt.close(fig) # non-ts df = tm.makeDataFrame() - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) - ax = df.plot(secondary_y=['A', 'B']) + ax = df.plot(secondary_y=['A', 'B'], ax=ax) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) - self.assertIsNone(ax.right_ax.get_legend()) + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) # TODO: color cycle problems - self.assertEqual(len(colors), 4) + assert len(colors) == 4 + self.plt.close() - plt.clf() + fig = self.plt.figure() ax = fig.add_subplot(211) - ax = df.plot(secondary_y=['C', 'D']) + ax = df.plot(secondary_y=['C', 'D'], ax=ax) leg = ax.get_legend() - self.assertEqual(len(leg.get_lines()), 4) - self.assertIsNone(ax.right_ax.get_legend()) + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None colors = set() for line in leg.get_lines(): colors.add(line.get_color()) # TODO: color cycle problems - self.assertEqual(len(colors), 4) + assert len(colors) == 4 def test_format_date_axis(self): rng = date_range('1/1/2012', periods=12, freq='M') df = DataFrame(np.random.randn(len(rng), 3), rng) - ax = df.plot() + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) xaxis = ax.get_xaxis() for l in xaxis.get_ticklabels(): if len(l.get_text()) > 0: - self.assertEqual(l.get_rotation(), 30) + assert l.get_rotation() == 30 - @slow + @pytest.mark.slow def test_ax_plot(self): - import matplotlib.pyplot as plt - x = DatetimeIndex(start='2012-01-02', periods=10, freq='D') y = lrange(len(x)) - fig = plt.figure() - ax = fig.add_subplot(111) + _, ax = self.plt.subplots() lines = ax.plot(x, y, label='Y') tm.assert_index_equal(DatetimeIndex(lines[0].get_xdata()), x) - @slow + @pytest.mark.slow def test_mpl_nopandas(self): - import matplotlib.pyplot as plt - dates = [date(2008, 12, 31), date(2009, 1, 31)] values1 = np.arange(10.0, 11.0, 0.5) values2 = np.arange(11.0, 12.0, 0.5) kw = dict(fmt='-', lw=4) - plt.close('all') - fig = plt.figure() - ax = fig.add_subplot(111) + _, ax = self.plt.subplots() ax.plot_date([x.toordinal() for x in dates], values1, **kw) ax.plot_date([x.toordinal() for x in dates], values2, **kw) @@ -1184,22 +1265,23 @@ def test_mpl_nopandas(self): exp = np.array([x.toordinal() for x in dates], dtype=np.float64) tm.assert_numpy_array_equal(line2.get_xydata()[:, 0], exp) - @slow + @pytest.mark.slow def test_irregular_ts_shared_ax_xlim(self): # GH 2960 ts = tm.makeTimeSeries()[:20] ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]] # plot the left section of the irregular series, then the right section - ax = ts_irregular[:5].plot() + _, ax = self.plt.subplots() + ts_irregular[:5].plot(ax=ax) ts_irregular[5:].plot(ax=ax) # check that axis limits are correct left, right = ax.get_xlim() - self.assertEqual(left, ts_irregular.index.min().toordinal()) - self.assertEqual(right, ts_irregular.index.max().toordinal()) + assert left <= ts_irregular.index.min().toordinal() + assert right >= ts_irregular.index.max().toordinal() - @slow + @pytest.mark.slow def test_secondary_y_non_ts_xlim(self): # GH 3490 - non-timeseries with secondary y index_1 = [1, 2, 3, 4] @@ -1207,15 +1289,16 @@ def test_secondary_y_non_ts_xlim(self): s1 = Series(1, index=index_1) s2 = Series(2, index=index_2) - ax = s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) left_before, right_before = ax.get_xlim() s2.plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() - self.assertEqual(left_before, left_after) - self.assertTrue(right_before < right_after) + assert left_before >= left_after + assert right_before < right_after - @slow + @pytest.mark.slow def test_secondary_y_regular_ts_xlim(self): # GH 3490 - regular-timeseries with secondary y index_1 = date_range(start='2000-01-01', periods=4, freq='D') @@ -1223,52 +1306,183 @@ def test_secondary_y_regular_ts_xlim(self): s1 = Series(1, index=index_1) s2 = Series(2, index=index_2) - ax = s1.plot() + _, ax = self.plt.subplots() + s1.plot(ax=ax) left_before, right_before = ax.get_xlim() s2.plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() - self.assertEqual(left_before, left_after) - self.assertTrue(right_before < right_after) + assert left_before >= left_after + assert right_before < right_after - @slow + @pytest.mark.slow def test_secondary_y_mixed_freq_ts_xlim(self): # GH 3490 - mixed frequency timeseries with secondary y rng = date_range('2000-01-01', periods=10000, freq='min') ts = Series(1, index=rng) - ax = ts.plot() + _, ax = self.plt.subplots() + ts.plot(ax=ax) left_before, right_before = ax.get_xlim() ts.resample('D').mean().plot(secondary_y=True, ax=ax) left_after, right_after = ax.get_xlim() # a downsample should not have changed either limit - self.assertEqual(left_before, left_after) - self.assertEqual(right_before, right_after) + assert left_before == left_after + assert right_before == right_after - @slow + @pytest.mark.slow def test_secondary_y_irregular_ts_xlim(self): # GH 3490 - irregular-timeseries with secondary y ts = tm.makeTimeSeries()[:20] ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]] - ax = ts_irregular[:5].plot() + _, ax = self.plt.subplots() + ts_irregular[:5].plot(ax=ax) # plot higher-x values on secondary axis ts_irregular[5:].plot(secondary_y=True, ax=ax) # ensure secondary limits aren't overwritten by plot on primary ts_irregular[:5].plot(ax=ax) left, right = ax.get_xlim() - self.assertEqual(left, ts_irregular.index.min().toordinal()) - self.assertEqual(right, ts_irregular.index.max().toordinal()) + assert left <= ts_irregular.index.min().toordinal() + assert right >= ts_irregular.index.max().toordinal() def test_plot_outofbounds_datetime(self): # 2579 - checking this does not raise values = [date(1677, 1, 1), date(1677, 1, 2)] - self.plt.plot(values) + _, ax = self.plt.subplots() + ax.plot(values) values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] - self.plt.plot(values) + ax.plot(values) + + @td.xfail_if_mpl_2_2 + @pytest.mark.skip( + is_platform_mac(), + "skip on mac for precision display issue on older mpl") + def test_format_timedelta_ticks_narrow(self): + + if self.mpl_ge_2_0_0: + expected_labels = [''] + [ + '00:00:00.00000000{:d}'.format(2 * i) + for i in range(5)] + [''] + else: + expected_labels = [ + '00:00:00.00000000{:d}'.format(i) + for i in range(10)] + + rng = timedelta_range('0', periods=10, freq='ns') + df = DataFrame(np.random.randn(len(rng), 3), rng) + fig, ax = self.plt.subplots() + df.plot(fontsize=2, ax=ax) + fig.canvas.draw() + labels = ax.get_xticklabels() + assert len(labels) == len(expected_labels) + for l, l_expected in zip(labels, expected_labels): + assert l.get_text() == l_expected + + @td.xfail_if_mpl_2_2 + @pytest.mark.skip( + is_platform_mac(), + "skip on mac for precision display issue on older mpl") + def test_format_timedelta_ticks_wide(self): + + if self.mpl_ge_2_0_0: + expected_labels = [ + '', + '00:00:00', + '1 days 03:46:40', + '2 days 07:33:20', + '3 days 11:20:00', + '4 days 15:06:40', + '5 days 18:53:20', + '6 days 22:40:00', + '8 days 02:26:40', + '9 days 06:13:20', + '' + ] + else: + expected_labels = [ + '00:00:00', + '1 days 03:46:40', + '2 days 07:33:20', + '3 days 11:20:00', + '4 days 15:06:40', + '5 days 18:53:20', + '6 days 22:40:00', + '8 days 02:26:40', + '' + ] + + rng = timedelta_range('0', periods=10, freq='1 d') + df = DataFrame(np.random.randn(len(rng), 3), rng) + fig, ax = self.plt.subplots() + ax = df.plot(fontsize=2, ax=ax) + fig.canvas.draw() + labels = ax.get_xticklabels() + assert len(labels) == len(expected_labels) + for l, l_expected in zip(labels, expected_labels): + assert l.get_text() == l_expected + + def test_timedelta_plot(self): + # test issue #8711 + s = Series(range(5), timedelta_range('1day', periods=5)) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + # test long period + index = timedelta_range('1 day 2 hr 30 min 10 s', + periods=10, freq='1 d') + s = Series(np.random.randn(len(index)), index) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + # test short period + index = timedelta_range('1 day 2 hr 30 min 10 s', + periods=10, freq='1 ns') + s = Series(np.random.randn(len(index)), index) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + def test_hist(self): + # https://github.com/matplotlib/matplotlib/issues/8459 + rng = date_range('1/1/2011', periods=10, freq='H') + x = rng + w1 = np.arange(0, 1, .1) + w2 = np.arange(0, 1, .1)[::-1] + _, ax = self.plt.subplots() + ax.hist([x, x], weights=[w1, w2]) + + @pytest.mark.slow + def test_overlapping_datetime(self): + # GB 6608 + s1 = Series([1, 2, 3], index=[datetime(1995, 12, 31), + datetime(2000, 12, 31), + datetime(2005, 12, 31)]) + s2 = Series([1, 2, 3], index=[datetime(1997, 12, 31), + datetime(2003, 12, 31), + datetime(2008, 12, 31)]) + + # plot first series, then add the second series to those axes, + # then try adding the first series again + _, ax = self.plt.subplots() + s1.plot(ax=ax) + s2.plot(ax=ax) + s1.plot(ax=ax) + + @pytest.mark.xfail(reason="GH9053 matplotlib does not use" + " ax.xaxis.converter") + def test_add_matplotlib_datetime64(self): + # GH9053 - ensure that a plot with PeriodConverter still understands + # datetime64 data. This still fails because matplotlib overrides the + # ax.xaxis.converter with a DatetimeConverter + s = Series(np.random.randn(10), + index=date_range('1970-01-02', periods=10)) + ax = s.plot() + ax.plot(s.index, s.values, color='g') + l1, l2 = ax.lines + tm.assert_numpy_array_equal(l1.get_xydata(), l2.get_xydata()) def _check_plot_works(f, freq=None, series=None, *args, **kwargs): @@ -1306,5 +1520,12 @@ def _check_plot_works(f, freq=None, series=None, *args, **kwargs): with ensure_clean(return_filelike=True) as path: plt.savefig(path) + + # GH18439 + # this is supported only in Python 3 pickle since + # pickle in Python2 doesn't support instancemethod pickling + if PY3: + with ensure_clean(return_filelike=True) as path: + pickle.dump(fig, path) finally: plt.close(fig) diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py new file mode 100644 index 0000000000000..2c2d371921d2f --- /dev/null +++ b/pandas/tests/plotting/test_deprecated.py @@ -0,0 +1,58 @@ +# coding: utf-8 + +import string + +import pandas as pd +import pandas.util.testing as tm +import pandas.util._test_decorators as td +import pytest + +from numpy.random import randn + +import pandas.tools.plotting as plotting + +from pandas.tests.plotting.common import TestPlotBase + + +""" +Test cases for plot functions imported from deprecated +pandas.tools.plotting +""" + + +@td.skip_if_no_mpl +class TestDeprecatedNameSpace(TestPlotBase): + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_scatter_plot_legacy(self): + df = pd.DataFrame(randn(100, 2)) + + with tm.assert_produces_warning(FutureWarning): + plotting.scatter_matrix(df) + + with tm.assert_produces_warning(FutureWarning): + pd.scatter_matrix(df) + + @pytest.mark.slow + def test_boxplot_deprecated(self): + df = pd.DataFrame(randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=['one', 'two', 'three', 'four']) + df['indic'] = ['foo', 'bar'] * 3 + + with tm.assert_produces_warning(FutureWarning): + plotting.boxplot(df, column=['one', 'two'], + by='indic') + + @pytest.mark.slow + def test_radviz_deprecated(self): + df = self.iris + with tm.assert_produces_warning(FutureWarning): + plotting.radviz(frame=df, class_column='Name') + + @pytest.mark.slow + def test_plot_params(self): + + with tm.assert_produces_warning(FutureWarning): + pd.plot_params['xaxis.compat'] = True diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 48af366f24ea4..b29afcb404ac6 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -11,28 +11,26 @@ import pandas as pd from pandas import (Series, DataFrame, MultiIndex, PeriodIndex, date_range, bdate_range) -from pandas.types.api import is_list_like -from pandas.compat import (range, lrange, StringIO, lmap, lzip, u, zip, PY3) -from pandas.formats.printing import pprint_thing +from pandas.core.dtypes.api import is_list_like +from pandas.compat import range, lrange, lmap, lzip, u, zip, PY3 +from pandas.io.formats.printing import pprint_thing import pandas.util.testing as tm -from pandas.util.testing import slow - -from pandas.core.config import set_option +import pandas.util._test_decorators as td import numpy as np from numpy.random import rand, randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) -@tm.mplskip +@td.skip_if_no_mpl class TestDataFramePlots(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() @@ -42,7 +40,7 @@ def setUp(self): "C": np.arange(20) + np.random.uniform( size=20)}) - @slow + @pytest.mark.slow def test_plot(self): df = self.tdf _check_plot_works(df.plot, grid=False) @@ -64,7 +62,7 @@ def test_plot(self): df = DataFrame({'x': [1, 2], 'y': [3, 4]}) # mpl >= 1.5.2 (or slightly below) throw AttributError - with tm.assertRaises((TypeError, AttributeError)): + with pytest.raises((TypeError, AttributeError)): df.plot.line(blarg=True) df = DataFrame(np.random.rand(10, 3), @@ -134,12 +132,39 @@ def test_plot(self): # passed ax should be used: fig, ax = self.plt.subplots() axes = df.plot.bar(subplots=True, ax=ax) - self.assertEqual(len(axes), 1) + assert len(axes) == 1 if self.mpl_ge_1_5_0: result = ax.axes else: result = ax.get_axes() # deprecated - self.assertIs(result, axes[0]) + assert result is axes[0] + + # GH 15516 + def test_mpl2_color_cycle_str(self): + # test CN mpl 2.0 color cycle + if self.mpl_ge_2_0_0: + colors = ['C' + str(x) for x in range(10)] + df = DataFrame(randn(10, 3), columns=['a', 'b', 'c']) + for c in colors: + _check_plot_works(df.plot, color=c) + else: + pytest.skip("not supported in matplotlib < 2.0.0") + + def test_color_single_series_list(self): + # GH 3486 + df = DataFrame({"A": [1, 2, 3]}) + _check_plot_works(df.plot, color=['red']) + + def test_rgb_tuple_color(self): + # GH 16695 + df = DataFrame({'x': [1, 2], 'y': [3, 4]}) + _check_plot_works(df.plot, x='x', y='y', color=(1, 0, 0)) + _check_plot_works(df.plot, x='x', y='y', color=(1, 0, 0, 0.5)) + + def test_color_empty_string(self): + df = DataFrame(randn(10, 2)) + with pytest.raises(ValueError): + df.plot(color='') def test_color_and_style_arguments(self): df = DataFrame({'x': [1, 2], 'y': [3, 4]}) @@ -148,35 +173,35 @@ def test_color_and_style_arguments(self): ax = df.plot(color=['red', 'black'], style=['-', '--']) # check that the linestyles are correctly set: linestyle = [line.get_linestyle() for line in ax.lines] - self.assertEqual(linestyle, ['-', '--']) + assert linestyle == ['-', '--'] # check that the colors are correctly set: color = [line.get_color() for line in ax.lines] - self.assertEqual(color, ['red', 'black']) + assert color == ['red', 'black'] # passing both 'color' and 'style' arguments should not be allowed # if there is a color symbol in the style strings: - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(color=['red', 'black'], style=['k-', 'r--']) def test_nonnumeric_exclude(self): df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}) ax = df.plot() - self.assertEqual(len(ax.get_lines()), 1) # B was plotted + assert len(ax.get_lines()) == 1 # B was plotted - @slow + @pytest.mark.slow def test_implicit_label(self): df = DataFrame(randn(10, 3), columns=['a', 'b', 'c']) ax = df.plot(x='a', y='b') self._check_text_labels(ax.xaxis.get_label(), 'a') - @slow + @pytest.mark.slow def test_donot_overwrite_index_name(self): # GH 8494 df = DataFrame(randn(2, 2), columns=['a', 'b']) df.index.name = 'NAME' df.plot(y='b', label='LABEL') - self.assertEqual(df.index.name, 'NAME') + assert df.index.name == 'NAME' - @slow + @pytest.mark.slow def test_plot_xy(self): # columns.inferred_type == 'string' df = self.tdf @@ -202,7 +227,7 @@ def test_plot_xy(self): # columns.inferred_type == 'mixed' # TODO add MultiIndex test - @slow + @pytest.mark.slow def test_logscales(self): df = DataFrame({'a': np.arange(100)}, index=np.arange(100)) ax = df.plot(logy=True) @@ -214,40 +239,41 @@ def test_logscales(self): ax = df.plot(loglog=True) self._check_ax_scales(ax, xaxis='log', yaxis='log') - @slow + @pytest.mark.slow def test_xcompat(self): import pandas as pd df = self.tdf ax = df.plot(x_compat=True) lines = ax.get_lines() - self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) + assert not isinstance(lines[0].get_xdata(), PeriodIndex) tm.close() - pd.plot_params['xaxis.compat'] = True + pd.plotting.plot_params['xaxis.compat'] = True ax = df.plot() lines = ax.get_lines() - self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) + assert not isinstance(lines[0].get_xdata(), PeriodIndex) tm.close() - pd.plot_params['x_compat'] = False + pd.plotting.plot_params['x_compat'] = False + ax = df.plot() lines = ax.get_lines() - self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) - self.assertIsInstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) tm.close() # useful if you're plotting a bunch together - with pd.plot_params.use('x_compat', True): + with pd.plotting.plot_params.use('x_compat', True): ax = df.plot() lines = ax.get_lines() - self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) + assert not isinstance(lines[0].get_xdata(), PeriodIndex) tm.close() ax = df.plot() lines = ax.get_lines() - self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) - self.assertIsInstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) def test_period_compat(self): # GH 9012 @@ -278,7 +304,30 @@ def test_unsorted_index(self): rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name='y') tm.assert_series_equal(rs, df.y) - @slow + def test_unsorted_index_lims(self): + df = DataFrame({'y': [0., 1., 2., 3.]}, index=[1., 0., 3., 2.]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame({'y': [0., 1., np.nan, 3., 4., 5., 6.]}, + index=[1., 0., 3., 2., np.nan, 3., 2.]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame({'y': [0., 1., 2., 3.], 'z': [91., 90., 93., 92.]}) + ax = df.plot(x='z', y='y') + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + @pytest.mark.slow def test_subplots(self): df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) @@ -286,7 +335,7 @@ def test_subplots(self): for kind in ['bar', 'barh', 'line', 'area']: axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - self.assertEqual(axes.shape, (3, )) + assert axes.shape == (3, ) for ax, column in zip(axes, df.columns): self._check_legend_labels(ax, @@ -316,9 +365,9 @@ def test_subplots(self): axes = df.plot(kind=kind, subplots=True, legend=False) for ax in axes: - self.assertTrue(ax.get_legend() is None) + assert ax.get_legend() is None - @slow + @pytest.mark.slow def test_subplots_timeseries(self): idx = date_range(start='2014-07-01', freq='M', periods=10) df = DataFrame(np.random.rand(10, 3), index=idx) @@ -354,7 +403,83 @@ def test_subplots_timeseries(self): self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - @slow + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = {"numeric": np.array([1, 2, 5]), + "timedelta": [pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h")], + "datetime_no_tz": [pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00")], + "datetime_all_tz": [pd.to_datetime("2017-08-01 00:00:00", + utc=True), + pd.to_datetime("2017-08-01 02:00:00", + utc=True), + pd.to_datetime("2017-08-02 00:00:00", + utc=True)], + "text": ["This", "should", "fail"]} + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert (ax_numeric.get_lines()[0].get_data()[1] == + testdata["numeric"].values).all() + ax_timedelta = testdata.plot(y="timedelta") + assert (ax_timedelta.get_lines()[0].get_data()[1] == + testdata["timedelta"].values).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert (ax_datetime_no_tz.get_lines()[0].get_data()[1] == + testdata["datetime_no_tz"].values).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert (ax_datetime_all_tz.get_lines()[0].get_data()[1] == + testdata["datetime_all_tz"].values).all() + with pytest.raises(TypeError): + testdata.plot(y="text") + + @pytest.mark.xfail(reason='not support for period, categorical, ' + 'datetime_mixed_tz') + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formater (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formater (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handels ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = {"numeric": np.array([1, 2, 5]), + "period": [pd.Period('2017-08-01 00:00:00', freq='H'), + pd.Period('2017-08-01 02:00', freq='H'), + pd.Period('2017-08-02 00:00:00', freq='H')], + "categorical": pd.Categorical(["c", "b", "a"], + categories=["a", "b", "c"], + ordered=False), + "datetime_mixed_tz": [pd.to_datetime("2017-08-01 00:00:00", + utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00")]} + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert (ax_period.get_lines()[0].get_data()[1] == + testdata["period"].values).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert (ax_categorical.get_lines()[0].get_data()[1] == + testdata["categorical"].values).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", + y="datetime_mixed_tz") + assert (ax_datetime_mixed_tz.get_lines()[0].get_data()[1] == + testdata["datetime_mixed_tz"].values).all() + + @pytest.mark.slow def test_subplots_layout(self): # GH 6667 df = DataFrame(np.random.rand(10, 3), @@ -362,45 +487,45 @@ def test_subplots_layout(self): axes = df.plot(subplots=True, layout=(2, 2)) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - self.assertEqual(axes.shape, (2, 2)) + assert axes.shape == (2, 2) axes = df.plot(subplots=True, layout=(-1, 2)) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - self.assertEqual(axes.shape, (2, 2)) + assert axes.shape == (2, 2) axes = df.plot(subplots=True, layout=(2, -1)) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - self.assertEqual(axes.shape, (2, 2)) + assert axes.shape == (2, 2) axes = df.plot(subplots=True, layout=(1, 4)) self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - self.assertEqual(axes.shape, (1, 4)) + assert axes.shape == (1, 4) axes = df.plot(subplots=True, layout=(-1, 4)) self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - self.assertEqual(axes.shape, (1, 4)) + assert axes.shape == (1, 4) axes = df.plot(subplots=True, layout=(4, -1)) self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - self.assertEqual(axes.shape, (4, 1)) + assert axes.shape == (4, 1) - with tm.assertRaises(ValueError): - axes = df.plot(subplots=True, layout=(1, 1)) - with tm.assertRaises(ValueError): - axes = df.plot(subplots=True, layout=(-1, -1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) # single column df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) axes = df.plot(subplots=True) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - self.assertEqual(axes.shape, (1, )) + assert axes.shape == (1, ) axes = df.plot(subplots=True, layout=(3, 3)) self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - self.assertEqual(axes.shape, (3, 3)) + assert axes.shape == (3, 3) - @slow + @pytest.mark.slow def test_subplots_warnings(self): # GH 9464 warnings.simplefilter('error') @@ -415,7 +540,7 @@ def test_subplots_warnings(self): self.fail(w) warnings.simplefilter('default') - @slow + @pytest.mark.slow def test_subplots_multiple_axes(self): # GH 5353, 6970, GH 7069 fig, axes = self.plt.subplots(2, 3) @@ -425,18 +550,18 @@ def test_subplots_multiple_axes(self): returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - self.assertEqual(returned.shape, (3, )) - self.assertIs(returned[0].figure, fig) + assert returned.shape == (3, ) + assert returned[0].figure is fig # draw on second row returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - self.assertEqual(returned.shape, (3, )) - self.assertIs(returned[0].figure, fig) + assert returned.shape == (3, ) + assert returned[0].figure is fig self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) tm.close() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) # pass different number of axes from required df.plot(subplots=True, ax=axes) @@ -447,24 +572,23 @@ def test_subplots_multiple_axes(self): # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes fig, axes = self.plt.subplots(2, 2) with warnings.catch_warnings(): - warnings.simplefilter('ignore') df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) returned = df.plot(subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - self.assertEqual(returned.shape, (4, )) + assert returned.shape == (4, ) returned = df.plot(subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - self.assertEqual(returned.shape, (4, )) + assert returned.shape == (4, ) returned = df.plot(subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - self.assertEqual(returned.shape, (4, )) + assert returned.shape == (4, ) # single column fig, axes = self.plt.subplots(1, 1) @@ -473,7 +597,7 @@ def test_subplots_multiple_axes(self): axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - self.assertEqual(axes.shape, (1, )) + assert axes.shape == (1, ) def test_subplots_ts_share_axes(self): # GH 3964 @@ -516,44 +640,44 @@ def test_subplots_sharex_axes_existing_axes(self): for ax in axes.ravel(): self._check_visible(ax.get_yticklabels(), visible=True) - @slow + @pytest.mark.slow def test_subplots_dup_columns(self): # GH 10962 df = DataFrame(np.random.rand(5, 5), columns=list('aaaaa')) axes = df.plot(subplots=True) for ax in axes: self._check_legend_labels(ax, labels=['a']) - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 tm.close() axes = df.plot(subplots=True, secondary_y='a') for ax in axes: # (right) is only attached when subplots=False self._check_legend_labels(ax, labels=['a']) - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 tm.close() ax = df.plot(secondary_y='a') self._check_legend_labels(ax, labels=['a (right)'] * 5) - self.assertEqual(len(ax.lines), 0) - self.assertEqual(len(ax.right_ax.lines), 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 def test_negative_log(self): df = - DataFrame(rand(6, 4), index=list(string.ascii_letters[:6]), columns=['x', 'y', 'z', 'four']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot.area(logy=True) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot.area(loglog=True) def _compare_stacked_y_cood(self, normal_lines, stacked_lines): base = np.zeros(len(normal_lines[0].get_data()[1])) for nl, sl in zip(normal_lines, stacked_lines): - base += nl.get_data()[1] # get y coodinates + base += nl.get_data()[1] # get y coordinates sy = sl.get_data()[1] - self.assert_numpy_array_equal(base, sy) + tm.assert_numpy_array_equal(base, sy) def test_line_area_stacked(self): with tm.RNGContext(42): @@ -584,7 +708,7 @@ def test_line_area_stacked(self): self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:]) _check_plot_works(mixed_df.plot, stacked=False) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): mixed_df.plot(stacked=True) _check_plot_works(df.plot, kind=kind, logx=True, stacked=True) @@ -603,55 +727,55 @@ def test_line_area_nan_df(self): # remove nan for comparison purpose exp = np.array([1, 2, 3], dtype=np.float64) - self.assert_numpy_array_equal(np.delete(masked1.data, 2), exp) + tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp) exp = np.array([3, 2, 1], dtype=np.float64) - self.assert_numpy_array_equal(np.delete(masked2.data, 1), exp) - self.assert_numpy_array_equal( + tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp) + tm.assert_numpy_array_equal( masked1.mask, np.array([False, False, True, False])) - self.assert_numpy_array_equal( + tm.assert_numpy_array_equal( masked2.mask, np.array([False, True, False, False])) expected1 = np.array([1, 2, 0, 3], dtype=np.float64) expected2 = np.array([3, 0, 2, 1], dtype=np.float64) ax = _check_plot_works(d.plot, stacked=True) - self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) - self.assert_numpy_array_equal(ax.lines[1].get_ydata(), - expected1 + expected2) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), + expected1 + expected2) ax = _check_plot_works(d.plot.area) - self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) - self.assert_numpy_array_equal(ax.lines[1].get_ydata(), - expected1 + expected2) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), + expected1 + expected2) ax = _check_plot_works(d.plot.area, stacked=False) - self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) - self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) def test_line_lim(self): df = DataFrame(rand(6, 3), columns=['x', 'y', 'z']) ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data()[0][0]) - self.assertEqual(xmax, lines[0].get_data()[0][-1]) + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] ax = df.plot(secondary_y=True) xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data()[0][0]) - self.assertEqual(xmax, lines[0].get_data()[0][-1]) + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] axes = df.plot(secondary_y=True, subplots=True) self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) for ax in axes: - self.assertTrue(hasattr(ax, 'left_ax')) - self.assertFalse(hasattr(ax, 'right_ax')) + assert hasattr(ax, 'left_ax') + assert not hasattr(ax, 'right_ax') xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data()[0][0]) - self.assertEqual(xmax, lines[0].get_data()[0][-1]) + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] def test_area_lim(self): df = DataFrame(rand(6, 4), columns=['x', 'y', 'z', 'four']) @@ -662,15 +786,15 @@ def test_area_lim(self): xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data()[0][0]) - self.assertEqual(xmax, lines[0].get_data()[0][-1]) - self.assertEqual(ymin, 0) + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + assert ymin == 0 ax = _check_plot_works(neg_df.plot.area, stacked=stacked) ymin, ymax = ax.get_ylim() - self.assertEqual(ymax, 0) + assert ymax == 0 - @slow + @pytest.mark.slow def test_bar_colors(self): import matplotlib.pyplot as plt default_colors = self._maybe_unpack_cycler(plt.rcParams) @@ -706,28 +830,42 @@ def test_bar_colors(self): self._check_colors(ax.patches[::5], facecolors=['green'] * 5) tm.close() - @slow + def test_bar_user_colors(self): + df = pd.DataFrame({"A": range(4), + "B": range(1, 5), + "color": ['red', 'blue', 'blue', 'red']}) + # This should *only* work when `y` is specified, else + # we use one color per column + ax = df.plot.bar(y='A', color=df['color']) + result = [p.get_facecolor() for p in ax.patches] + expected = [(1., 0., 0., 1.), + (0., 0., 1., 1.), + (0., 0., 1., 1.), + (1., 0., 0., 1.)] + assert result == expected + + @pytest.mark.slow def test_bar_linewidth(self): df = DataFrame(randn(5, 5)) # regular ax = df.plot.bar(linewidth=2) for r in ax.patches: - self.assertEqual(r.get_linewidth(), 2) + assert r.get_linewidth() == 2 # stacked ax = df.plot.bar(stacked=True, linewidth=2) for r in ax.patches: - self.assertEqual(r.get_linewidth(), 2) + assert r.get_linewidth() == 2 # subplots axes = df.plot.bar(linewidth=2, subplots=True) self._check_axes_shape(axes, axes_num=5, layout=(5, 1)) for ax in axes: for r in ax.patches: - self.assertEqual(r.get_linewidth(), 2) + assert r.get_linewidth() == 2 - @slow + @pytest.mark.slow def test_bar_barwidth(self): df = DataFrame(randn(5, 5)) @@ -736,36 +874,36 @@ def test_bar_barwidth(self): # regular ax = df.plot.bar(width=width) for r in ax.patches: - self.assertEqual(r.get_width(), width / len(df.columns)) + assert r.get_width() == width / len(df.columns) # stacked ax = df.plot.bar(stacked=True, width=width) for r in ax.patches: - self.assertEqual(r.get_width(), width) + assert r.get_width() == width # horizontal regular ax = df.plot.barh(width=width) for r in ax.patches: - self.assertEqual(r.get_height(), width / len(df.columns)) + assert r.get_height() == width / len(df.columns) # horizontal stacked ax = df.plot.barh(stacked=True, width=width) for r in ax.patches: - self.assertEqual(r.get_height(), width) + assert r.get_height() == width # subplots axes = df.plot.bar(width=width, subplots=True) for ax in axes: for r in ax.patches: - self.assertEqual(r.get_width(), width) + assert r.get_width() == width # horizontal subplots axes = df.plot.barh(width=width, subplots=True) for ax in axes: for r in ax.patches: - self.assertEqual(r.get_height(), width) + assert r.get_height() == width - @slow + @pytest.mark.slow def test_bar_barwidth_position(self): df = DataFrame(randn(5, 5)) self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, @@ -781,7 +919,7 @@ def test_bar_barwidth_position(self): self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, position=0.2) - @slow + @pytest.mark.slow def test_bar_barwidth_position_int(self): # GH 12979 df = DataFrame(randn(5, 5)) @@ -790,10 +928,10 @@ def test_bar_barwidth_position_int(self): ax = df.plot.bar(stacked=True, width=w) ticks = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - self.assertEqual(ax.get_xlim(), (-0.75, 4.75)) + assert ax.get_xlim() == (-0.75, 4.75) # check left-edge of bars - self.assertEqual(ax.patches[0].get_x(), -0.5) - self.assertEqual(ax.patches[-1].get_x(), 3.5) + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 self._check_bar_alignment(df, kind='bar', stacked=True, width=1) self._check_bar_alignment(df, kind='barh', stacked=False, width=1) @@ -801,36 +939,36 @@ def test_bar_barwidth_position_int(self): self._check_bar_alignment(df, kind='bar', subplots=True, width=1) self._check_bar_alignment(df, kind='barh', subplots=True, width=1) - @slow + @pytest.mark.slow def test_bar_bottom_left(self): df = DataFrame(rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] - self.assertEqual(result, [1] * 25) + assert result == [1] * 25 ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5]) result = [p.get_y() for p in ax.patches[:5]] - self.assertEqual(result, [-1, -2, -3, -4, -5]) + assert result == [-1, -2, -3, -4, -5] ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) result = [p.get_x() for p in ax.patches] - self.assertEqual(result, [1] * 25) + assert result == [1] * 25 ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5]) result = [p.get_x() for p in ax.patches[:5]] - self.assertEqual(result, [1, 2, 3, 4, 5]) + assert result == [1, 2, 3, 4, 5] axes = df.plot.bar(subplots=True, bottom=-1) for ax in axes: result = [p.get_y() for p in ax.patches] - self.assertEqual(result, [-1] * 5) + assert result == [-1] * 5 axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1])) for ax in axes: result = [p.get_x() for p in ax.patches] - self.assertEqual(result, [1] * 5) + assert result == [1] * 5 - @slow + @pytest.mark.slow def test_bar_nan(self): df = DataFrame({'A': [10, np.nan, 20], 'B': [5, 10, 20], @@ -838,17 +976,17 @@ def test_bar_nan(self): ax = df.plot.bar() expected = [10, 0, 20, 5, 10, 20, 1, 2, 3] result = [p.get_height() for p in ax.patches] - self.assertEqual(result, expected) + assert result == expected ax = df.plot.bar(stacked=True) result = [p.get_height() for p in ax.patches] - self.assertEqual(result, expected) + assert result == expected result = [p.get_y() for p in ax.patches] expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0] - self.assertEqual(result, expected) + assert result == expected - @slow + @pytest.mark.slow def test_bar_categorical(self): # GH 13019 df1 = pd.DataFrame(np.random.randn(6, 5), @@ -863,18 +1001,18 @@ def test_bar_categorical(self): ax = df.plot.bar() ticks = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) - self.assertEqual(ax.get_xlim(), (-0.5, 5.5)) + assert ax.get_xlim() == (-0.5, 5.5) # check left-edge of bars - self.assertEqual(ax.patches[0].get_x(), -0.25) - self.assertEqual(ax.patches[-1].get_x(), 5.15) + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 5.15 ax = df.plot.bar(stacked=True) tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) - self.assertEqual(ax.get_xlim(), (-0.5, 5.5)) - self.assertEqual(ax.patches[0].get_x(), -0.25) - self.assertEqual(ax.patches[-1].get_x(), 4.75) + assert ax.get_xlim() == (-0.5, 5.5) + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 4.75 - @slow + @pytest.mark.slow def test_plot_scatter(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -883,16 +1021,34 @@ def test_plot_scatter(self): _check_plot_works(df.plot.scatter, x='x', y='y') _check_plot_works(df.plot.scatter, x=1, y=2) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot.scatter(x='x') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot.scatter(y='y') # GH 6951 axes = df.plot(x='x', y='y', kind='scatter', subplots=True) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - @slow + @pytest.mark.slow + def test_plot_scatter_with_categorical_data(self): + # GH 16199 + df = pd.DataFrame({'x': [1, 2, 3, 4], + 'y': pd.Categorical(['a', 'b', 'a', 'c'])}) + + with pytest.raises(ValueError) as ve: + df.plot(x='x', y='y', kind='scatter') + ve.match('requires y column to be numeric') + + with pytest.raises(ValueError) as ve: + df.plot(x='y', y='x', kind='scatter') + ve.match('requires x column to be numeric') + + with pytest.raises(ValueError) as ve: + df.plot(x='y', y='y', kind='scatter') + ve.match('requires x column to be numeric') + + @pytest.mark.slow def test_plot_scatter_with_c(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -902,25 +1058,25 @@ def test_plot_scatter_with_c(self): df.plot.scatter(x=0, y=1, c=2)] for ax in axes: # default to Greys - self.assertEqual(ax.collections[0].cmap.name, 'Greys') + assert ax.collections[0].cmap.name == 'Greys' if self.mpl_ge_1_3_1: # n.b. there appears to be no public method to get the colorbar # label - self.assertEqual(ax.collections[0].colorbar._label, 'z') + assert ax.collections[0].colorbar._label == 'z' cm = 'cubehelix' ax = df.plot.scatter(x='x', y='y', c='z', colormap=cm) - self.assertEqual(ax.collections[0].cmap.name, cm) + assert ax.collections[0].cmap.name == cm # verify turning off colorbar works ax = df.plot.scatter(x='x', y='y', c='z', colorbar=False) - self.assertIs(ax.collections[0].colorbar, None) + assert ax.collections[0].colorbar is None # verify that we can still plot a solid color ax = df.plot.scatter(x=0, y=1, c='red') - self.assertIs(ax.collections[0].colorbar, None) + assert ax.collections[0].colorbar is None self._check_colors(ax.collections, facecolors=['r']) # Ensure that we can pass an np.array straight through to matplotlib, @@ -938,8 +1094,8 @@ def test_plot_scatter_with_c(self): # identical to the values we supplied, normally we'd be on shaky ground # comparing floats for equality but here we expect them to be # identical. - self.assertTrue(np.array_equal(ax.collections[0].get_facecolor(), - rgba_array)) + tm.assert_numpy_array_equal(ax.collections[0] + .get_facecolor(), rgba_array) # we don't test the colors of the faces in this next plot because they # are dependent on the spring colormap, which may change its colors # later. @@ -948,7 +1104,7 @@ def test_plot_scatter_with_c(self): def test_scatter_colors(self): df = DataFrame({'a': [1, 2, 3], 'b': [1, 2, 3], 'c': [1, 2, 3]}) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot.scatter(x='a', y='b', c='c', color='green') default_colors = self._maybe_unpack_cycler(self.plt.rcParams) @@ -962,7 +1118,7 @@ def test_scatter_colors(self): tm.assert_numpy_array_equal(ax.collections[0].get_facecolor()[0], np.array([1, 1, 1, 1], dtype=np.float64)) - @slow + @pytest.mark.slow def test_plot_bar(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -1006,21 +1162,20 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False, if kind == 'bar': axis = ax.xaxis ax_min, ax_max = ax.get_xlim() - min_edge = min([p.get_x() for p in ax.patches]) - max_edge = max([p.get_x() + p.get_width() for p in ax.patches]) + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) elif kind == 'barh': axis = ax.yaxis ax_min, ax_max = ax.get_ylim() - min_edge = min([p.get_y() for p in ax.patches]) - max_edge = max([p.get_y() + p.get_height() for p in ax.patches - ]) + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) else: raise ValueError # GH 7498 # compare margins between lim and bar edges - self.assertAlmostEqual(ax_min, min_edge - 0.25) - self.assertAlmostEqual(ax_max, max_edge + 0.25) + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) p = ax.patches[0] if kind == 'bar' and (stacked is True or subplots is True): @@ -1040,20 +1195,20 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False, raise ValueError # Check the ticks locates on integer - self.assertTrue((axis.get_ticklocs() == np.arange(len(df))).all()) + assert (axis.get_ticklocs() == np.arange(len(df))).all() if align == 'center': # Check whether the bar locates on center - self.assertAlmostEqual(axis.get_ticklocs()[0], center) + tm.assert_almost_equal(axis.get_ticklocs()[0], center) elif align == 'edge': # Check whether the bar's edge starts from the tick - self.assertAlmostEqual(axis.get_ticklocs()[0], edge) + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) else: raise ValueError return axes - @slow + @pytest.mark.slow def test_bar_stacked_center(self): # GH2157 df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) @@ -1062,7 +1217,7 @@ def test_bar_stacked_center(self): self._check_bar_alignment(df, kind='barh', stacked=True) self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9) - @slow + @pytest.mark.slow def test_bar_center(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) self._check_bar_alignment(df, kind='bar', stacked=False) @@ -1070,7 +1225,7 @@ def test_bar_center(self): self._check_bar_alignment(df, kind='barh', stacked=False) self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9) - @slow + @pytest.mark.slow def test_bar_subplots_center(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) self._check_bar_alignment(df, kind='bar', subplots=True) @@ -1078,7 +1233,7 @@ def test_bar_subplots_center(self): self._check_bar_alignment(df, kind='barh', subplots=True) self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9) - @slow + @pytest.mark.slow def test_bar_align_single_column(self): df = DataFrame(randn(5)) self._check_bar_alignment(df, kind='bar', stacked=False) @@ -1088,7 +1243,7 @@ def test_bar_align_single_column(self): self._check_bar_alignment(df, kind='bar', subplots=True) self._check_bar_alignment(df, kind='barh', subplots=True) - @slow + @pytest.mark.slow def test_bar_edge(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) @@ -1113,7 +1268,7 @@ def test_bar_edge(self): self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, align='edge') - @slow + @pytest.mark.slow def test_bar_log_no_subplots(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 # regressions in 1.2.1 @@ -1127,7 +1282,7 @@ def test_bar_log_no_subplots(self): ax = df.plot.bar(grid=True, log=True) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - @slow + @pytest.mark.slow def test_bar_log_subplots(self): expected = np.array([1., 10., 100., 1000.]) if not self.mpl_le_1_2_1: @@ -1139,7 +1294,7 @@ def test_bar_log_subplots(self): tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - @slow + @pytest.mark.slow def test_boxplot(self): df = self.hist_df series = df['height'] @@ -1150,7 +1305,7 @@ def test_boxplot(self): self._check_text_labels(ax.get_xticklabels(), labels) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1)) - self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) # different warning on py3 if not PY3: @@ -1161,7 +1316,7 @@ def test_boxplot(self): self._check_ax_scales(axes, yaxis='log') for ax, label in zip(axes, labels): self._check_text_labels(ax.get_xticklabels(), [label]) - self.assertEqual(len(ax.lines), self.bp_n_objects) + assert len(ax.lines) == self.bp_n_objects axes = series.plot.box(rot=40) self._check_ticks_props(axes, xrot=40, yrot=0) @@ -1175,9 +1330,9 @@ def test_boxplot(self): labels = [pprint_thing(c) for c in numeric_cols] self._check_text_labels(ax.get_xticklabels(), labels) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions) - self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - @slow + @pytest.mark.slow def test_boxplot_vertical(self): df = self.hist_df numeric_cols = df._get_numeric_data().columns @@ -1187,7 +1342,7 @@ def test_boxplot_vertical(self): ax = df.plot.box(rot=50, fontsize=8, vert=False) self._check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8) self._check_text_labels(ax.get_yticklabels(), labels) - self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) # _check_plot_works adds an ax so catch warning. see GH #13188 with tm.assert_produces_warning(UserWarning): @@ -1197,20 +1352,20 @@ def test_boxplot_vertical(self): self._check_ax_scales(axes, xaxis='log') for ax, label in zip(axes, labels): self._check_text_labels(ax.get_yticklabels(), [label]) - self.assertEqual(len(ax.lines), self.bp_n_objects) + assert len(ax.lines) == self.bp_n_objects positions = np.array([3, 2, 8]) ax = df.plot.box(positions=positions, vert=False) self._check_text_labels(ax.get_yticklabels(), labels) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions) - self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - @slow + @pytest.mark.slow def test_boxplot_return_type(self): df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot.box(return_type='NOTATYPE') result = df.plot.box(return_type='dict') @@ -1225,13 +1380,13 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type='both') self._check_box_return_type(result, 'both') - @slow + @pytest.mark.slow def test_boxplot_subplots_return_type(self): df = self.hist_df # normal style: return_type=None result = df.plot.box(subplots=True) - self.assertIsInstance(result, Series) + assert isinstance(result, Series) self._check_box_return_type(result, None, expected_keys=[ 'height', 'weight', 'category']) @@ -1242,10 +1397,13 @@ def test_boxplot_subplots_return_type(self): expected_keys=['height', 'weight', 'category'], check_ax_title=False) - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_kde_df(self): - tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + df = DataFrame(randn(100, 4)) ax = _check_plot_works(df.plot, kind='kde') expected = [pprint_thing(c) for c in df.columns] @@ -1263,15 +1421,18 @@ def test_kde_df(self): axes = df.plot(kind='kde', logy=True, subplots=True) self._check_ax_scales(axes, yaxis='log') - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_kde_missing_vals(self): - tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + df = DataFrame(np.random.uniform(size=(100, 4))) df.loc[0, 0] = np.nan _check_plot_works(df.plot, kind='kde') - @slow + @pytest.mark.slow def test_hist_df(self): from matplotlib.patches import Rectangle if self.mpl_le_1_2_1: @@ -1297,13 +1458,13 @@ def test_hist_df(self): ax = series.plot.hist(normed=True, cumulative=True, bins=4) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] - self.assertAlmostEqual(rects[-1].get_height(), 1.0) + tm.assert_almost_equal(rects[-1].get_height(), 1.0) tm.close() ax = series.plot.hist(cumulative=True, bins=4) rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] - self.assertAlmostEqual(rects[-2].get_height(), 100.0) + tm.assert_almost_equal(rects[-2].get_height(), 100.0) tm.close() # if horizontal, yticklabels are rotated @@ -1319,19 +1480,19 @@ def _check_box_coord(self, patches, expected_y=None, expected_h=None, # dtype is depending on above values, no need to check if expected_y is not None: - self.assert_numpy_array_equal(result_y, expected_y, - check_dtype=False) + tm.assert_numpy_array_equal(result_y, expected_y, + check_dtype=False) if expected_h is not None: - self.assert_numpy_array_equal(result_height, expected_h, - check_dtype=False) + tm.assert_numpy_array_equal(result_height, expected_h, + check_dtype=False) if expected_x is not None: - self.assert_numpy_array_equal(result_x, expected_x, - check_dtype=False) + tm.assert_numpy_array_equal(result_x, expected_x, + check_dtype=False) if expected_w is not None: - self.assert_numpy_array_equal(result_width, expected_w, - check_dtype=False) + tm.assert_numpy_array_equal(result_width, expected_w, + check_dtype=False) - @slow + @pytest.mark.slow def test_hist_df_coord(self): normal_df = DataFrame({'A': np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])), @@ -1422,12 +1583,12 @@ def test_hist_df_coord(self): expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([6, 7, 8, 9, 10])) - @slow + @pytest.mark.slow def test_plot_int_columns(self): df = DataFrame(randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) - @slow + @pytest.mark.slow def test_df_legend_labels(self): kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist'] df = DataFrame(rand(3, 3), columns=['a', 'b', 'c']) @@ -1494,7 +1655,7 @@ def test_df_legend_labels(self): self._check_text_labels(ax.xaxis.get_label(), 'a') ax = df5.plot(y='c', label='LABEL_c', ax=ax) self._check_legend_labels(ax, labels=['LABEL_b', 'LABEL_c']) - self.assertTrue(df5.columns.tolist() == ['b', 'c']) + assert df5.columns.tolist() == ['b', 'c'] def test_legend_name(self): multi = DataFrame(randn(4, 4), @@ -1520,7 +1681,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, 'new') - @slow + @pytest.mark.slow def test_no_legend(self): kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist'] df = DataFrame(rand(3, 3), columns=['a', 'b', 'c']) @@ -1532,7 +1693,7 @@ def test_no_legend(self): ax = df.plot(kind=kind, legend=False) self._check_legend_labels(ax, visible=False) - @slow + @pytest.mark.slow def test_style_by_column(self): import matplotlib.pyplot as plt fig = plt.gcf() @@ -1546,20 +1707,20 @@ def test_style_by_column(self): fig.add_subplot(111) ax = df.plot(style=markers) for i, l in enumerate(ax.get_lines()[:len(markers)]): - self.assertEqual(l.get_marker(), markers[i]) + assert l.get_marker() == markers[i] - @slow + @pytest.mark.slow def test_line_label_none(self): s = Series([1, 2]) ax = s.plot() - self.assertEqual(ax.get_legend(), None) + assert ax.get_legend() is None ax = s.plot(legend=True) - self.assertEqual(ax.get_legend().get_texts()[0].get_text(), 'None') + assert ax.get_legend().get_texts()[0].get_text() == 'None' - @slow + @pytest.mark.slow + @tm.capture_stdout def test_line_colors(self): - import sys from matplotlib import cm custom_colors = 'rgcby' @@ -1568,16 +1729,13 @@ def test_line_colors(self): ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) - tmp = sys.stderr - sys.stderr = StringIO() - try: - tm.close() - ax2 = df.plot(colors=custom_colors) - lines2 = ax2.get_lines() - for l1, l2 in zip(ax.get_lines(), lines2): - self.assertEqual(l1.get_color(), l2.get_color()) - finally: - sys.stderr = tmp + tm.close() + + ax2 = df.plot(colors=custom_colors) + lines2 = ax2.get_lines() + + for l1, l2 in zip(ax.get_lines(), lines2): + assert l1.get_color() == l2.get_color() tm.close() @@ -1606,19 +1764,19 @@ def test_line_colors(self): self._check_colors(ax.get_lines(), linecolors=custom_colors) tm.close() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # Color contains shorthand hex value results in ValueError custom_colors = ['#F00', '#00F', '#FF0', '#000', '#FFF'] # Forced show plot _check_plot_works(df.plot, color=custom_colors) - @slow + @pytest.mark.slow def test_dont_modify_colors(self): colors = ['r', 'g', 'b'] pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) - self.assertEqual(len(colors), 3) + assert len(colors) == 3 - @slow + @pytest.mark.slow def test_line_colors_and_styles_subplots(self): # GH 9894 from matplotlib import cm @@ -1663,7 +1821,7 @@ def test_line_colors_and_styles_subplots(self): self._check_colors(ax.get_lines(), linecolors=[c]) tm.close() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # Color contains shorthand hex value results in ValueError custom_colors = ['#F00', '#00F', '#FF0', '#000', '#FFF'] # Forced show plot @@ -1696,7 +1854,7 @@ def test_line_colors_and_styles_subplots(self): self._check_colors(ax.get_lines(), linecolors=[c]) tm.close() - @slow + @pytest.mark.slow def test_area_colors(self): from matplotlib import cm from matplotlib.collections import PolyCollection @@ -1719,7 +1877,7 @@ def test_area_colors(self): self._check_colors(linehandles, linecolors=custom_colors) for h in handles: - self.assertTrue(h.get_alpha() is None) + assert h.get_alpha() is None tm.close() ax = df.plot.area(colormap='jet') @@ -1736,7 +1894,7 @@ def test_area_colors(self): if not isinstance(x, PolyCollection)] self._check_colors(linehandles, linecolors=jet_colors) for h in handles: - self.assertTrue(h.get_alpha() is None) + assert h.get_alpha() is None tm.close() # When stacked=False, alpha is set to 0.5 @@ -1754,9 +1912,9 @@ def test_area_colors(self): linecolors = jet_colors self._check_colors(handles[:len(jet_colors)], linecolors=linecolors) for h in handles: - self.assertEqual(h.get_alpha(), 0.5) + assert h.get_alpha() == 0.5 - @slow + @pytest.mark.slow def test_hist_colors(self): default_colors = self._maybe_unpack_cycler(self.plt.rcParams) @@ -1790,10 +1948,12 @@ def test_hist_colors(self): self._check_colors(ax.patches[::10], facecolors=['green'] * 5) tm.close() - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_kde_colors(self): - tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") from matplotlib import cm @@ -1813,10 +1973,12 @@ def test_kde_colors(self): rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df))) self._check_colors(ax.get_lines(), linecolors=rgba_colors) - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots(self): - tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") from matplotlib import cm default_colors = self._maybe_unpack_cycler(self.plt.rcParams) @@ -1872,7 +2034,7 @@ def test_kde_colors_and_styles_subplots(self): self._check_colors(ax.get_lines(), linecolors=[c]) tm.close() - @slow + @pytest.mark.slow def test_boxplot_colors(self): def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c='k', fliers_c=None): @@ -1933,7 +2095,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c='k', _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), '#123456') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes='red', xxxx='blue')) @@ -1960,13 +2122,13 @@ def test_unordered_ts(self): columns=['test']) ax = df.plot() xticks = ax.lines[0].get_xdata() - self.assertTrue(xticks[0] < xticks[1]) + assert xticks[0] < xticks[1] ydata = ax.lines[0].get_ydata() tm.assert_numpy_array_equal(ydata, np.array([1.0, 2.0, 3.0])) def test_kind_both_ways(self): df = DataFrame({'x': [1, 2, 3]}) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue df.plot(kind=kind) @@ -1977,21 +2139,21 @@ def test_kind_both_ways(self): def test_all_invalid_plot_data(self): df = DataFrame(list('abcd')) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot(kind=kind) - @slow + @pytest.mark.slow def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot(kind=kind) with tm.RNGContext(42): @@ -2000,74 +2162,94 @@ def test_partially_invalid_plot_data(self): df = DataFrame(rand(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' for kind in kinds: - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot(kind=kind) def test_invalid_kind(self): df = DataFrame(randn(10, 2)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(kind='aasdf') - @slow + @pytest.mark.parametrize("x,y", [ + (['B', 'C'], 'A'), + ('A', ['B', 'C']) + ]) + def test_invalid_xy_args(self, x, y): + # GH 18671 + df = DataFrame({"A": [1, 2], 'B': [3, 4], 'C': [5, 6]}) + with pytest.raises(ValueError): + df.plot(x=x, y=y) + + @pytest.mark.parametrize("x,y", [ + ('A', 'B'), + ('B', 'A') + ]) + def test_invalid_xy_args_dup_cols(self, x, y): + # GH 18671 + df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list('AAB')) + with pytest.raises(ValueError): + df.plot(x=x, y=y) + + @pytest.mark.slow def test_hexbin_basic(self): df = self.hexbin_df ax = df.plot.hexbin(x='A', y='B', gridsize=10) # TODO: need better way to test. This just does existence. - self.assertEqual(len(ax.collections), 1) + assert len(ax.collections) == 1 # GH 6951 axes = df.plot.hexbin(x='A', y='B', subplots=True) # hexbin should have 2 axes in the figure, 1 for plotting and another # is colorbar - self.assertEqual(len(axes[0].figure.axes), 2) + assert len(axes[0].figure.axes) == 2 # return value is single axes self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - @slow + @pytest.mark.slow def test_hexbin_with_c(self): df = self.hexbin_df ax = df.plot.hexbin(x='A', y='B', C='C') - self.assertEqual(len(ax.collections), 1) + assert len(ax.collections) == 1 ax = df.plot.hexbin(x='A', y='B', C='C', reduce_C_function=np.std) - self.assertEqual(len(ax.collections), 1) + assert len(ax.collections) == 1 - @slow + @pytest.mark.slow def test_hexbin_cmap(self): df = self.hexbin_df # Default to BuGn ax = df.plot.hexbin(x='A', y='B') - self.assertEqual(ax.collections[0].cmap.name, 'BuGn') + assert ax.collections[0].cmap.name == 'BuGn' cm = 'cubehelix' ax = df.plot.hexbin(x='A', y='B', colormap=cm) - self.assertEqual(ax.collections[0].cmap.name, cm) + assert ax.collections[0].cmap.name == cm - @slow + @pytest.mark.slow def test_no_color_bar(self): df = self.hexbin_df ax = df.plot.hexbin(x='A', y='B', colorbar=None) - self.assertIs(ax.collections[0].colorbar, None) + assert ax.collections[0].colorbar is None - @slow + @pytest.mark.slow def test_allow_cmap(self): df = self.hexbin_df ax = df.plot.hexbin(x='A', y='B', cmap='YlGn') - self.assertEqual(ax.collections[0].cmap.name, 'YlGn') + assert ax.collections[0].cmap.name == 'YlGn' - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.plot.hexbin(x='A', y='B', cmap='YlGn', colormap='BuGn') - @slow + @pytest.mark.slow def test_pie_df(self): df = DataFrame(np.random.rand(5, 3), columns=['X', 'Y', 'Z'], index=['a', 'b', 'c', 'd', 'e']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot.pie() ax = _check_plot_works(df.plot.pie, y='Y') @@ -2080,11 +2262,11 @@ def test_pie_df(self): with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.plot.pie, subplots=True) - self.assertEqual(len(axes), len(df.columns)) + assert len(axes) == len(df.columns) for ax in axes: self._check_text_labels(ax.texts, df.index) for ax, ylabel in zip(axes, df.columns): - self.assertEqual(ax.get_ylabel(), ylabel) + assert ax.get_ylabel() == ylabel labels = ['A', 'B', 'C', 'D', 'E'] color_args = ['r', 'g', 'b', 'c', 'm'] @@ -2092,7 +2274,7 @@ def test_pie_df(self): axes = _check_plot_works(df.plot.pie, subplots=True, labels=labels, colors=color_args) - self.assertEqual(len(axes), len(df.columns)) + assert len(axes) == len(df.columns) for ax in axes: self._check_text_labels(ax.texts, labels) @@ -2110,83 +2292,85 @@ def test_pie_df_nan(self): expected = list(base_expected) # force copy expected[i] = '' result = [x.get_text() for x in ax.texts] - self.assertEqual(result, expected) + assert result == expected # legend labels # NaN's not included in legend with subplots # see https://github.com/pandas-dev/pandas/issues/8390 - self.assertEqual([x.get_text() for x in - ax.get_legend().get_texts()], - base_expected[:i] + base_expected[i + 1:]) + assert ([x.get_text() for x in ax.get_legend().get_texts()] == + base_expected[:i] + base_expected[i + 1:]) - @slow + @pytest.mark.slow def test_errorbar_plot(self): - d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} - df = DataFrame(d) - d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} - df_err = DataFrame(d_err) - - # check line plots - ax = _check_plot_works(df.plot, yerr=df_err, logy=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) + with warnings.catch_warnings(): + d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} + df_err = DataFrame(d_err) - kinds = ['line', 'bar', 'barh'] - for kind in kinds: - ax = _check_plot_works(df.plot, yerr=df_err['x'], kind=kind) + # check line plots + ax = _check_plot_works(df.plot, yerr=df_err, logy=True) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, - kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], - kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): + ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + kinds = ['line', 'bar', 'barh'] + for kind in kinds: + ax = _check_plot_works(df.plot, yerr=df_err['x'], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, + kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err['x'], + xerr=df_err['x'], + kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + # _check_plot_works adds an ax so catch warning. see GH #13188 axes = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind) - self._check_has_errorbars(axes, xerr=1, yerr=1) + self._check_has_errorbars(axes, xerr=1, yerr=1) - ax = _check_plot_works((df + 1).plot, yerr=df_err, - xerr=df_err, kind='bar', log=True) - self._check_has_errorbars(ax, xerr=2, yerr=2) - - # yerr is raw error values - ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) - self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works((df + 1).plot, yerr=df_err, + xerr=df_err, kind='bar', log=True) + self._check_has_errorbars(ax, xerr=2, yerr=2) - # yerr is iterator - import itertools - ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df))) - self._check_has_errorbars(ax, xerr=0, yerr=2) + # yerr is raw error values + ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=2) - # yerr is column name - for yerr in ['yerr', u('誤差')]: - s_df = df.copy() - s_df[yerr] = np.ones(12) * 0.2 - ax = _check_plot_works(s_df.plot, yerr=yerr) + # yerr is iterator + import itertools + ax = _check_plot_works(df.plot, + yerr=itertools.repeat(0.1, len(df))) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr) - self._check_has_errorbars(ax, xerr=0, yerr=1) - with tm.assertRaises(ValueError): - df.plot(yerr=np.random.randn(11)) + # yerr is column name + for yerr in ['yerr', u('誤差')]: + s_df = df.copy() + s_df[yerr] = np.ones(12) * 0.2 + ax = _check_plot_works(s_df.plot, yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + with pytest.raises(ValueError): + df.plot(yerr=np.random.randn(11)) - df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12}) - with tm.assertRaises((ValueError, TypeError)): - df.plot(yerr=df_err) + df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12}) + with pytest.raises((ValueError, TypeError)): + df.plot(yerr=df_err) - @slow + @pytest.mark.slow def test_errorbar_with_integer_column_names(self): # test with integer column names df = DataFrame(np.random.randn(10, 2)) @@ -2196,7 +2380,7 @@ def test_errorbar_with_integer_column_names(self): ax = _check_plot_works(df.plot, y=0, yerr=1) self._check_has_errorbars(ax, xerr=0, yerr=1) - @slow + @pytest.mark.slow def test_errorbar_with_partial_columns(self): df = DataFrame(np.random.randn(10, 3)) df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2]) @@ -2219,36 +2403,37 @@ def test_errorbar_with_partial_columns(self): ax = _check_plot_works(df.plot, yerr=err) self._check_has_errorbars(ax, xerr=0, yerr=1) - @slow + @pytest.mark.slow def test_errorbar_timeseries(self): - d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} - d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} + with warnings.catch_warnings(): + d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} + d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} - # check time-series plots - ix = date_range('1/1/2000', '1/1/2001', freq='M') - tdf = DataFrame(d, index=ix) - tdf_err = DataFrame(d_err, index=ix) + # check time-series plots + ix = date_range('1/1/2000', '1/1/2001', freq='M') + tdf = DataFrame(d, index=ix) + tdf_err = DataFrame(d_err, index=ix) - kinds = ['line', 'bar', 'barh'] - for kind in kinds: - ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'], - kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): + kinds = ['line', 'bar', 'barh'] + for kind in kinds: + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'], + kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + # _check_plot_works adds an ax so catch warning. see GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) - self._check_has_errorbars(axes, xerr=0, yerr=1) + self._check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): @@ -2266,19 +2451,17 @@ def test_errorbar_asymmetrical(self): expected_0_0 = err[0, :, 0] * np.array([-1, 1]) tm.assert_almost_equal(yerr_0_0, expected_0_0) else: - self.assertEqual(ax.lines[7].get_ydata()[0], - data[0, 1] - err[1, 0, 0]) - self.assertEqual(ax.lines[8].get_ydata()[0], - data[0, 1] + err[1, 1, 0]) - - self.assertEqual(ax.lines[5].get_xdata()[0], -err[1, 0, 0] / 2) - self.assertEqual(ax.lines[6].get_xdata()[0], err[1, 1, 0] / 2) + assert ax.lines[7].get_ydata()[0] == data[0, 1] - err[1, 0, 0] + assert ax.lines[8].get_ydata()[0] == data[0, 1] + err[1, 1, 0] + assert ax.lines[5].get_xdata()[0] == -err[1, 0, 0] / 2 + assert ax.lines[6].get_xdata()[0] == err[1, 1, 0] / 2 - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(yerr=err.T) tm.close() + @td.xfail_if_mpl_2_2 def test_table(self): df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) @@ -2286,9 +2469,9 @@ def test_table(self): _check_plot_works(df.plot, table=df) ax = df.plot() - self.assertTrue(len(ax.tables) == 0) + assert len(ax.tables) == 0 plotting.table(ax, df.T) - self.assertTrue(len(ax.tables) == 1) + assert len(ax.tables) == 1 def test_errorbar_scatter(self): df = DataFrame( @@ -2332,7 +2515,7 @@ def _check_errorbar_color(containers, expected, has_err='has_xerr'): self._check_has_errorbars(ax, xerr=0, yerr=1) _check_errorbar_color(ax.containers, 'green', has_err='has_yerr') - @slow + @pytest.mark.slow def test_sharex_and_ax(self): # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, # the axis in fig.get_axis() are sorted differently than pandas @@ -2348,7 +2531,7 @@ def test_sharex_and_ax(self): def _check(axes): for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) for ax in [axes[0], axes[2]]: self._check_visible(ax.get_xticklabels(), visible=False) @@ -2378,13 +2561,13 @@ def _check(axes): gs.tight_layout(plt.gcf()) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible(ax.get_xticklabels(minor=True), visible=True) tm.close() - @slow + @pytest.mark.slow def test_sharey_and_ax(self): # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, # the axis in fig.get_axis() are sorted differently than pandas @@ -2400,7 +2583,7 @@ def test_sharey_and_ax(self): def _check(axes): for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible( ax.get_xticklabels(minor=True), visible=True) @@ -2430,7 +2613,7 @@ def _check(axes): gs.tight_layout(plt.gcf()) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible(ax.get_xticklabels(minor=True), visible=True) @@ -2441,7 +2624,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting._plot_klass.keys(): + for kind in plotting._core._plot_klass.keys(): if not _ok_for_gaussian_kde(kind): continue args = {} @@ -2463,11 +2646,11 @@ def test_memory_leak(self): gc.collect() for key in results: # check that every plot was collected - with tm.assertRaises(ReferenceError): + with pytest.raises(ReferenceError): # need to actually access something to get an error results[key].lines - @slow + @pytest.mark.slow def test_df_subplots_patterns_minorticks(self): # GH 10657 import matplotlib.pyplot as plt @@ -2480,7 +2663,7 @@ def test_df_subplots_patterns_minorticks(self): fig, axes = plt.subplots(2, 1, sharex=True) axes = df.plot(subplots=True, ax=axes) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) # xaxis of 1st ax must be hidden self._check_visible(axes[0].get_xticklabels(), visible=False) @@ -2493,7 +2676,7 @@ def test_df_subplots_patterns_minorticks(self): with tm.assert_produces_warning(UserWarning): axes = df.plot(subplots=True, ax=axes, sharex=True) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) # xaxis of 1st ax must be hidden self._check_visible(axes[0].get_xticklabels(), visible=False) @@ -2506,13 +2689,13 @@ def test_df_subplots_patterns_minorticks(self): fig, axes = plt.subplots(2, 1) axes = df.plot(subplots=True, ax=axes) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible(ax.get_xticklabels(minor=True), visible=True) tm.close() - @slow + @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 import matplotlib.pyplot as plt @@ -2540,9 +2723,9 @@ def _get_horizontal_grid(): for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: ax1 = ts.plot(ax=ax1) - self.assertEqual(len(ax1.lines), 1) + assert len(ax1.lines) == 1 ax2 = df.plot(ax=ax2) - self.assertEqual(len(ax2.lines), 2) + assert len(ax2.lines) == 2 for ax in [ax1, ax2]: self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) @@ -2553,8 +2736,8 @@ def _get_horizontal_grid(): # subplots=True for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: axes = df.plot(subplots=True, ax=[ax1, ax2]) - self.assertEqual(len(ax1.lines), 1) - self.assertEqual(len(ax2.lines), 1) + assert len(ax1.lines) == 1 + assert len(ax2.lines) == 1 for ax in axes: self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) @@ -2567,8 +2750,8 @@ def _get_horizontal_grid(): with tm.assert_produces_warning(UserWarning): axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) - self.assertEqual(len(axes[0].lines), 1) - self.assertEqual(len(axes[1].lines), 1) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 for ax in [ax1, ax2]: # yaxis are visible because there is only one column self._check_visible(ax.get_yticklabels(), visible=True) @@ -2584,8 +2767,8 @@ def _get_horizontal_grid(): with tm.assert_produces_warning(UserWarning): axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) - self.assertEqual(len(axes[0].lines), 1) - self.assertEqual(len(axes[1].lines), 1) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 self._check_visible(axes[0].get_yticklabels(), visible=True) # yaxis of axes1 (right) are hidden self._check_visible(axes[1].get_yticklabels(), visible=False) @@ -2610,7 +2793,7 @@ def _get_boxed_grid(): index=ts.index, columns=list('ABCD')) axes = df.plot(subplots=True, ax=axes) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 # axis are visible because these are not shared self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) @@ -2622,7 +2805,7 @@ def _get_boxed_grid(): with tm.assert_produces_warning(UserWarning): axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True) for ax in axes: - self.assertEqual(len(ax.lines), 1) + assert len(ax.lines) == 1 for ax in [axes[0], axes[2]]: # left column self._check_visible(ax.get_yticklabels(), visible=True) for ax in [axes[1], axes[3]]: # right column @@ -2635,31 +2818,17 @@ def _get_boxed_grid(): self._check_visible(ax.get_xticklabels(minor=True), visible=True) tm.close() - @slow + @pytest.mark.slow def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), - plotting._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) - - def test_option_mpl_style(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - set_option('display.mpl_style', 'default') - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - set_option('display.mpl_style', None) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - set_option('display.mpl_style', False) - - with tm.assertRaises(ValueError): - set_option('display.mpl_style', 'default2') + plotting._core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_invalid_colormap(self): df = DataFrame(randn(3, 2), columns=['A', 'B']) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.plot(colormap='invalid_colormap') def test_plain_axes(self): @@ -2686,7 +2855,7 @@ def test_plain_axes(self): Series(rand(10)).plot(ax=cax) fig, ax = self.plt.subplots() - from mpl_toolkits.axes_grid.inset_locator import inset_axes + from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1., loc=3) Series(rand(10)).plot(ax=ax) Series(rand(10)).plot(ax=iax) @@ -2696,8 +2865,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) - self.assertEqual(color_tuples, [c.get_facecolor() - for c in barplot.patches]) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): import matplotlib as mpl @@ -2709,8 +2877,24 @@ def test_rcParams_bar_colors(self): except (AttributeError, KeyError): # mpl 1.4 with mpl.rc_context(rc={'axes.color_cycle': color_tuples}): barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") - self.assertEqual(color_tuples, [c.get_facecolor() - for c in barplot.patches]) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + @pytest.mark.parametrize('method', ['line', 'barh', 'bar']) + def test_secondary_axis_font_size(self, method): + # GH: 12565 + df = (pd.DataFrame(np.random.randn(15, 2), + columns=list('AB')) + .assign(C=lambda df: df.B.cumsum()) + .assign(D=lambda df: df.C * 1.1)) + + fontsize = 20 + sy = ['C', 'D'] + + kwargs = dict(secondary_y=sy, fontsize=fontsize, + mark_right=True) + ax = getattr(df.plot, method)(**kwargs) + self._check_ticks_props(axes=ax.right_ax, + ylabelsize=fontsize) def _generate_4_axes_via_gridspec(): diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 93efb3f994c38..a7c99a06c34e9 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -5,13 +5,14 @@ from pandas import Series, DataFrame import pandas.util.testing as tm +import pandas.util._test_decorators as td import numpy as np from pandas.tests.plotting.common import TestPlotBase -@tm.mplskip +@td.skip_if_no_mpl class TestDataFrameGroupByPlots(TestPlotBase): def test_series_groupby_plotting_nominally_works(self): @@ -68,7 +69,7 @@ def test_plot_kwargs(self): res = df.groupby('z').plot(kind='scatter', x='x', y='y') # check that a scatter plot is effectively plotted: the axes should # contain a PathCollection from the scatter plot (GH11805) - self.assertEqual(len(res['a'].collections), 1) + assert len(res['a'].collections) == 1 res = df.groupby('z').plot.scatter(x='x', y='y') - self.assertEqual(len(res['a'].collections), 1) + assert len(res['a'].collections) == 1 diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 4f64f66bd3c4d..864d39eba29c5 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -2,29 +2,31 @@ """ Test cases for .hist method """ +import pytest + from pandas import Series, DataFrame import pandas.util.testing as tm -from pandas.util.testing import slow +import pandas.util._test_decorators as td import numpy as np from numpy.random import randn -import pandas.tools.plotting as plotting +from pandas.plotting._core import grouped_hist from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) -@tm.mplskip +@td.skip_if_no_mpl class TestSeriesPlots(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() self.ts = tm.makeTimeSeries() self.ts.name = 'ts' - @slow + @pytest.mark.slow def test_hist_legacy(self): _check_plot_works(self.ts.hist) _check_plot_works(self.ts.hist, grid=False) @@ -45,25 +47,25 @@ def test_hist_legacy(self): _check_plot_works(self.ts.hist, figure=fig, ax=ax1) _check_plot_works(self.ts.hist, figure=fig, ax=ax2) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): self.ts.hist(by=self.ts.index, figure=fig) - @slow + @pytest.mark.slow def test_hist_bins_legacy(self): df = DataFrame(np.random.randn(10, 2)) ax = df.hist(bins=2)[0][0] - self.assertEqual(len(ax.patches), 2) + assert len(ax.patches) == 2 - @slow + @pytest.mark.slow def test_hist_layout(self): df = self.hist_df - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.height.hist(layout=(1, 1)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.height.hist(layout=[1, 1]) - @slow + @pytest.mark.slow def test_hist_layout_with_by(self): df = self.hist_df @@ -109,7 +111,7 @@ def test_hist_layout_with_by(self): self._check_axes_shape( axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) - @slow + @pytest.mark.slow def test_hist_no_overlap(self): from matplotlib.pyplot import subplot, gcf x = Series(randn(2)) @@ -120,28 +122,28 @@ def test_hist_no_overlap(self): y.hist() fig = gcf() axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes() - self.assertEqual(len(axes), 2) + assert len(axes) == 2 - @slow + @pytest.mark.slow def test_hist_by_no_extra_plots(self): df = self.hist_df axes = df.height.hist(by=df.gender) # noqa - self.assertEqual(len(self.plt.get_fignums()), 1) + assert len(self.plt.get_fignums()) == 1 - @slow + @pytest.mark.slow def test_plot_fails_when_ax_differs_from_figure(self): from pylab import figure fig1 = figure() fig2 = figure() ax1 = fig1.add_subplot(111) - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): self.ts.hist(ax=ax1, figure=fig2) -@tm.mplskip +@td.skip_if_no_mpl class TestDataFramePlots(TestPlotBase): - @slow + @pytest.mark.slow def test_hist_df_legacy(self): from matplotlib.patches import Rectangle with tm.assert_produces_warning(UserWarning): @@ -152,7 +154,7 @@ def test_hist_df_legacy(self): with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, grid=False) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - self.assertFalse(axes[1, 1].get_visible()) + assert not axes[1, 1].get_visible() df = DataFrame(randn(100, 1)) _check_plot_works(df.hist) @@ -194,7 +196,7 @@ def test_hist_df_legacy(self): ax = ser.hist(normed=True, cumulative=True, bins=4) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] - self.assertAlmostEqual(rects[-1].get_height(), 1.0) + tm.assert_almost_equal(rects[-1].get_height(), 1.0) tm.close() ax = ser.hist(log=True) @@ -204,10 +206,10 @@ def test_hist_df_legacy(self): tm.close() # propagate attr exception from matplotlib.Axes.hist - with tm.assertRaises(AttributeError): + with pytest.raises(AttributeError): ser.hist(foo='bar') - @slow + @pytest.mark.slow def test_hist_layout(self): df = DataFrame(randn(100, 3)) @@ -229,20 +231,30 @@ def test_hist_layout(self): self._check_axes_shape(axes, axes_num=3, layout=expected) # layout too small for all 4 plots - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.hist(layout=(1, 1)) # invalid format for layout - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.hist(layout=(1,)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.hist(layout=(-1, -1)) + @pytest.mark.slow + # GH 9351 + def test_tight_layout(self): + if self.mpl_ge_2_0_1: + df = DataFrame(randn(100, 3)) + _check_plot_works(df.hist) + self.plt.tight_layout() + + tm.close() + -@tm.mplskip +@td.skip_if_no_mpl class TestDataFrameGroupByPlots(TestPlotBase): - @slow + @pytest.mark.slow def test_grouped_hist_legacy(self): from matplotlib.patches import Rectangle @@ -250,7 +262,7 @@ def test_grouped_hist_legacy(self): df['C'] = np.random.randint(0, 4, 500) df['D'] = ['X'] * 500 - axes = plotting.grouped_hist(df.A, by=df.C) + axes = grouped_hist(df.A, by=df.C) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) tm.close() @@ -267,32 +279,31 @@ def test_grouped_hist_legacy(self): # make sure kwargs to hist are handled xf, yf = 20, 18 xrot, yrot = 30, 40 - axes = plotting.grouped_hist(df.A, by=df.C, normed=True, - cumulative=True, bins=4, - xlabelsize=xf, xrot=xrot, - ylabelsize=yf, yrot=yrot) + axes = grouped_hist(df.A, by=df.C, normed=True, cumulative=True, + bins=4, xlabelsize=xf, xrot=xrot, + ylabelsize=yf, yrot=yrot) # height of last bin (index 5) must be 1.0 for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] height = rects[-1].get_height() - self.assertAlmostEqual(height, 1.0) + tm.assert_almost_equal(height, 1.0) self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) tm.close() - axes = plotting.grouped_hist(df.A, by=df.C, log=True) + axes = grouped_hist(df.A, by=df.C, log=True) # scale of y must be 'log' self._check_ax_scales(axes, yaxis='log') tm.close() # propagate attr exception from matplotlib.Axes.hist - with tm.assertRaises(AttributeError): - plotting.grouped_hist(df.A, by=df.C, foo='bar') + with pytest.raises(AttributeError): + grouped_hist(df.A, by=df.C, foo='bar') with tm.assert_produces_warning(FutureWarning): df.hist(by='C', figsize='default') - @slow + @pytest.mark.slow def test_grouped_hist_legacy2(self): n = 10 weight = Series(np.random.normal(166, 20, size=n)) @@ -303,19 +314,19 @@ def test_grouped_hist_legacy2(self): 'gender': gender_int}) gb = df_int.groupby('gender') axes = gb.hist() - self.assertEqual(len(axes), 2) - self.assertEqual(len(self.plt.get_fignums()), 2) + assert len(axes) == 2 + assert len(self.plt.get_fignums()) == 2 tm.close() - @slow + @pytest.mark.slow def test_grouped_hist_layout(self): df = self.hist_df - self.assertRaises(ValueError, df.hist, column='weight', by=df.gender, - layout=(1, 1)) - self.assertRaises(ValueError, df.hist, column='height', by=df.category, - layout=(1, 3)) - self.assertRaises(ValueError, df.hist, column='height', by=df.category, - layout=(-1, -1)) + pytest.raises(ValueError, df.hist, column='weight', by=df.gender, + layout=(1, 1)) + pytest.raises(ValueError, df.hist, column='height', by=df.category, + layout=(1, 3)) + pytest.raises(ValueError, df.hist, column='height', by=df.category, + layout=(-1, -1)) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(df.hist, column='height', by=df.gender, @@ -356,7 +367,7 @@ def test_grouped_hist_layout(self): axes = df.hist(column=['height', 'weight', 'category']) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - @slow + @pytest.mark.slow def test_grouped_hist_multiple_axes(self): # GH 6970, GH 7069 df = self.hist_df @@ -364,54 +375,54 @@ def test_grouped_hist_multiple_axes(self): fig, axes = self.plt.subplots(2, 3) returned = df.hist(column=['height', 'weight', 'category'], ax=axes[0]) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - self.assert_numpy_array_equal(returned, axes[0]) - self.assertIs(returned[0].figure, fig) + tm.assert_numpy_array_equal(returned, axes[0]) + assert returned[0].figure is fig returned = df.hist(by='classroom', ax=axes[1]) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - self.assert_numpy_array_equal(returned, axes[1]) - self.assertIs(returned[0].figure, fig) + tm.assert_numpy_array_equal(returned, axes[1]) + assert returned[0].figure is fig - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) # pass different number of axes from required axes = df.hist(column='height', ax=axes) - @slow + @pytest.mark.slow def test_axis_share_x(self): df = self.hist_df # GH4089 ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True) # share x - self.assertTrue(ax1._shared_x_axes.joined(ax1, ax2)) - self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2)) + assert ax1._shared_x_axes.joined(ax1, ax2) + assert ax2._shared_x_axes.joined(ax1, ax2) # don't share y - self.assertFalse(ax1._shared_y_axes.joined(ax1, ax2)) - self.assertFalse(ax2._shared_y_axes.joined(ax1, ax2)) + assert not ax1._shared_y_axes.joined(ax1, ax2) + assert not ax2._shared_y_axes.joined(ax1, ax2) - @slow + @pytest.mark.slow def test_axis_share_y(self): df = self.hist_df ax1, ax2 = df.hist(column='height', by=df.gender, sharey=True) # share y - self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2)) - self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2)) + assert ax1._shared_y_axes.joined(ax1, ax2) + assert ax2._shared_y_axes.joined(ax1, ax2) # don't share x - self.assertFalse(ax1._shared_x_axes.joined(ax1, ax2)) - self.assertFalse(ax2._shared_x_axes.joined(ax1, ax2)) + assert not ax1._shared_x_axes.joined(ax1, ax2) + assert not ax2._shared_x_axes.joined(ax1, ax2) - @slow + @pytest.mark.slow def test_axis_share_xy(self): df = self.hist_df ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True, sharey=True) # share both x and y - self.assertTrue(ax1._shared_x_axes.joined(ax1, ax2)) - self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2)) + assert ax1._shared_x_axes.joined(ax1, ax2) + assert ax2._shared_x_axes.joined(ax1, ax2) - self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2)) - self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2)) + assert ax1._shared_y_axes.joined(ax1, ax2) + assert ax2._shared_y_axes.joined(ax1, ax2) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 11f00386ec592..c5ce8aba9d80e 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -2,91 +2,59 @@ """ Test cases for misc plot functions """ -from pandas import Series, DataFrame +import pytest + +from pandas import DataFrame from pandas.compat import lmap import pandas.util.testing as tm -from pandas.util.testing import slow +import pandas.util._test_decorators as td import numpy as np from numpy import random from numpy.random import randn -import pandas.tools.plotting as plotting -from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, - _ok_for_gaussian_kde) +import pandas.plotting as plotting +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works -@tm.mplskip +@td.skip_if_no_mpl class TestSeriesPlots(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() self.ts = tm.makeTimeSeries() self.ts.name = 'ts' - @slow + @pytest.mark.slow def test_autocorrelation_plot(self): - from pandas.tools.plotting import autocorrelation_plot + from pandas.plotting import autocorrelation_plot _check_plot_works(autocorrelation_plot, series=self.ts) _check_plot_works(autocorrelation_plot, series=self.ts.values) ax = autocorrelation_plot(self.ts, label='Test') self._check_legend_labels(ax, labels=['Test']) - @slow + @pytest.mark.slow def test_lag_plot(self): - from pandas.tools.plotting import lag_plot + from pandas.plotting import lag_plot _check_plot_works(lag_plot, series=self.ts) _check_plot_works(lag_plot, series=self.ts, lag=5) - @slow + @pytest.mark.slow def test_bootstrap_plot(self): - from pandas.tools.plotting import bootstrap_plot + from pandas.plotting import bootstrap_plot _check_plot_works(bootstrap_plot, series=self.ts, size=10) -@tm.mplskip +@td.skip_if_no_mpl class TestDataFramePlots(TestPlotBase): - @slow - def test_scatter_plot_legacy(self): - tm._skip_if_no_scipy() - - df = DataFrame(randn(100, 2)) - - def scat(**kwds): - return plotting.scatter_matrix(df, **kwds) - - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, marker='+') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, vmin=0) - if _ok_for_gaussian_kde('kde'): - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='kde') - if _ok_for_gaussian_kde('density'): - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='density') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='hist') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, range_padding=.1) - - def scat2(x, y, by=None, ax=None, figsize=None): - return plotting.scatter_plot(df, x, y, by, ax, figsize=None) - - _check_plot_works(scat2, x=0, y=1) - grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat2, x=0, y=1, by=grouper) - + @td.xfail_if_mpl_2_2 + @td.skip_if_no_scipy def test_scatter_matrix_axis(self): - tm._skip_if_no_scipy() scatter_matrix = plotting.scatter_matrix with tm.RNGContext(42): @@ -122,9 +90,9 @@ def test_scatter_matrix_axis(self): self._check_ticks_props( axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) - @slow + @pytest.mark.slow def test_andrews_curves(self): - from pandas.tools.plotting import andrews_curves + from pandas.plotting import andrews_curves from matplotlib import cm df = self.iris @@ -187,9 +155,9 @@ def test_andrews_curves(self): with tm.assert_produces_warning(FutureWarning): andrews_curves(data=df, class_column='Name') - @slow + @pytest.mark.slow def test_parallel_coordinates(self): - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates from matplotlib import cm df = self.iris @@ -235,9 +203,30 @@ def test_parallel_coordinates(self): with tm.assert_produces_warning(FutureWarning): parallel_coordinates(df, 'Name', colors=colors) - @slow + @pytest.mark.xfail(reason="unreliable test") + def test_parallel_coordinates_with_sorted_labels(self): + """ For #15908 """ + from pandas.plotting import parallel_coordinates + + df = DataFrame({"feat": [i for i in range(30)], + "class": [2 for _ in range(10)] + + [3 for _ in range(10)] + + [1 for _ in range(10)]}) + ax = parallel_coordinates(df, 'class', sort_labels=True) + polylines, labels = ax.get_legend_handles_labels() + color_label_tuples = \ + zip([polyline.get_color() for polyline in polylines], labels) + ordered_color_label_tuples = sorted(color_label_tuples, + key=lambda x: x[1]) + prev_next_tupels = zip([i for i in ordered_color_label_tuples[0:-1]], + [i for i in ordered_color_label_tuples[1:]]) + for prev, nxt in prev_next_tupels: + # labels and colors are ordered strictly increasing + assert prev[1] < nxt[1] and prev[0] < nxt[0] + + @pytest.mark.slow def test_radviz(self): - from pandas.tools.plotting import radviz + from pandas.plotting import radviz from matplotlib import cm df = self.iris @@ -273,7 +262,7 @@ def test_radviz(self): handles, labels = ax.get_legend_handles_labels() self._check_colors(handles, facecolors=colors) - @slow + @pytest.mark.slow def test_subplot_titles(self): df = self.iris.drop('Name', axis=1).head() # Use the column names as the subplot titles @@ -281,20 +270,37 @@ def test_subplot_titles(self): # Case len(title) == len(df) plot = df.plot(subplots=True, title=title) - self.assertEqual([p.get_title() for p in plot], title) + assert [p.get_title() for p in plot] == title # Case len(title) > len(df) - self.assertRaises(ValueError, df.plot, subplots=True, - title=title + ["kittens > puppies"]) + pytest.raises(ValueError, df.plot, subplots=True, + title=title + ["kittens > puppies"]) # Case len(title) < len(df) - self.assertRaises(ValueError, df.plot, subplots=True, title=title[:2]) + pytest.raises(ValueError, df.plot, subplots=True, title=title[:2]) # Case subplots=False and title is of type list - self.assertRaises(ValueError, df.plot, subplots=False, title=title) + pytest.raises(ValueError, df.plot, subplots=False, title=title) # Case df with 3 numeric columns but layout of (2,2) plot = df.drop('SepalWidth', axis=1).plot(subplots=True, layout=(2, 2), title=title[:-1]) title_list = [ax.get_title() for sublist in plot for ax in sublist] - self.assertEqual(title_list, title[:3] + ['']) + assert title_list == title[:3] + [''] + + def test_get_standard_colors_random_seed(self): + # GH17525 + df = DataFrame(np.zeros((10, 10))) + + # Make sure that the random seed isn't reset by _get_standard_colors + plotting.parallel_coordinates(df, 0) + rand1 = random.random() + plotting.parallel_coordinates(df, 0) + rand2 = random.random() + assert rand1 != rand2 + + # Make sure it produces the same colors every time it's called + from pandas.plotting._style import _get_standard_colors + color1 = _get_standard_colors(1, color_type='random') + color2 = _get_standard_colors(1, color_type='random') + assert color1 == color2 diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 8c00d606059a4..5dc7d52e05778 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -3,7 +3,8 @@ """ Test cases for Series.plot """ -import itertools +from itertools import chain +import pytest from datetime import datetime @@ -11,22 +12,22 @@ from pandas import Series, DataFrame, date_range from pandas.compat import range, lrange import pandas.util.testing as tm -from pandas.util.testing import slow +import pandas.util._test_decorators as td import numpy as np from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) -@tm.mplskip +@td.skip_if_no_mpl class TestSeriesPlots(TestPlotBase): - def setUp(self): - TestPlotBase.setUp(self) + def setup_method(self, method): + TestPlotBase.setup_method(self, method) import matplotlib as mpl mpl.rcdefaults() @@ -39,7 +40,7 @@ def setUp(self): self.iseries = tm.makePeriodSeries() self.iseries.name = 'iseries' - @slow + @pytest.mark.slow def test_plot(self): _check_plot_works(self.ts.plot, label='foo') _check_plot_works(self.ts.plot, use_index=False) @@ -77,10 +78,11 @@ def test_plot(self): ax = _check_plot_works(self.ts.plot, subplots=True, layout=(1, -1)) self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) - @slow + @pytest.mark.slow def test_plot_figsize_and_title(self): # figsize and title - ax = self.series.plot(title='Test', figsize=(16, 8)) + _, ax = self.plt.subplots() + ax = self.series.plot(title='Test', figsize=(16, 8), ax=ax) self._check_text_labels(ax.title, 'Test') self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16, 8)) @@ -91,74 +93,85 @@ def test_dont_modify_rcParams(self): else: key = 'axes.color_cycle' colors = self.plt.rcParams[key] - Series([1, 2, 3]).plot() - self.assertEqual(colors, self.plt.rcParams[key]) + _, ax = self.plt.subplots() + Series([1, 2, 3]).plot(ax=ax) + assert colors == self.plt.rcParams[key] def test_ts_line_lim(self): - ax = self.ts.plot() + fig, ax = self.plt.subplots() + ax = self.ts.plot(ax=ax) xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data(orig=False)[0][0]) - self.assertEqual(xmax, lines[0].get_data(orig=False)[0][-1]) + assert xmin <= lines[0].get_data(orig=False)[0][0] + assert xmax >= lines[0].get_data(orig=False)[0][-1] tm.close() - ax = self.ts.plot(secondary_y=True) + ax = self.ts.plot(secondary_y=True, ax=ax) xmin, xmax = ax.get_xlim() lines = ax.get_lines() - self.assertEqual(xmin, lines[0].get_data(orig=False)[0][0]) - self.assertEqual(xmax, lines[0].get_data(orig=False)[0][-1]) + assert xmin <= lines[0].get_data(orig=False)[0][0] + assert xmax >= lines[0].get_data(orig=False)[0][-1] def test_ts_area_lim(self): - ax = self.ts.plot.area(stacked=False) + _, ax = self.plt.subplots() + ax = self.ts.plot.area(stacked=False, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - self.assertEqual(xmin, line[0]) - self.assertEqual(xmax, line[-1]) + assert xmin <= line[0] + assert xmax >= line[-1] tm.close() # GH 7471 - ax = self.ts.plot.area(stacked=False, x_compat=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.area(stacked=False, x_compat=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - self.assertEqual(xmin, line[0]) - self.assertEqual(xmax, line[-1]) + assert xmin <= line[0] + assert xmax >= line[-1] tm.close() tz_ts = self.ts.copy() tz_ts.index = tz_ts.tz_localize('GMT').tz_convert('CET') - ax = tz_ts.plot.area(stacked=False, x_compat=True) + _, ax = self.plt.subplots() + ax = tz_ts.plot.area(stacked=False, x_compat=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - self.assertEqual(xmin, line[0]) - self.assertEqual(xmax, line[-1]) + assert xmin <= line[0] + assert xmax >= line[-1] tm.close() - ax = tz_ts.plot.area(stacked=False, secondary_y=True) + _, ax = self.plt.subplots() + ax = tz_ts.plot.area(stacked=False, secondary_y=True, ax=ax) xmin, xmax = ax.get_xlim() line = ax.get_lines()[0].get_data(orig=False)[0] - self.assertEqual(xmin, line[0]) - self.assertEqual(xmax, line[-1]) + assert xmin <= line[0] + assert xmax >= line[-1] def test_label(self): s = Series([1, 2]) - ax = s.plot(label='LABEL', legend=True) + _, ax = self.plt.subplots() + ax = s.plot(label='LABEL', legend=True, ax=ax) self._check_legend_labels(ax, labels=['LABEL']) self.plt.close() - ax = s.plot(legend=True) + _, ax = self.plt.subplots() + ax = s.plot(legend=True, ax=ax) self._check_legend_labels(ax, labels=['None']) self.plt.close() # get name from index s.name = 'NAME' - ax = s.plot(legend=True) + _, ax = self.plt.subplots() + ax = s.plot(legend=True, ax=ax) self._check_legend_labels(ax, labels=['NAME']) self.plt.close() # override the default - ax = s.plot(legend=True, label='LABEL') + _, ax = self.plt.subplots() + ax = s.plot(legend=True, label='LABEL', ax=ax) self._check_legend_labels(ax, labels=['LABEL']) self.plt.close() # Add lebel info, but don't draw - ax = s.plot(legend=False, label='LABEL') - self.assertEqual(ax.get_legend(), None) # Hasn't been drawn + _, ax = self.plt.subplots() + ax = s.plot(legend=False, label='LABEL', ax=ax) + assert ax.get_legend() is None # Hasn't been drawn ax.legend() # draw it self._check_legend_labels(ax, labels=['LABEL']) @@ -172,40 +185,44 @@ def test_line_area_nan_series(self): masked = ax.lines[0].get_ydata() # remove nan for comparison purpose exp = np.array([1, 2, 3], dtype=np.float64) - self.assert_numpy_array_equal(np.delete(masked.data, 2), exp) - self.assert_numpy_array_equal( + tm.assert_numpy_array_equal(np.delete(masked.data, 2), exp) + tm.assert_numpy_array_equal( masked.mask, np.array([False, False, True, False])) expected = np.array([1, 2, 0, 3], dtype=np.float64) ax = _check_plot_works(d.plot, stacked=True) - self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) ax = _check_plot_works(d.plot.area) - self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) ax = _check_plot_works(d.plot.area, stacked=False) - self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) def test_line_use_index_false(self): s = Series([1, 2, 3], index=['a', 'b', 'c']) s.index.name = 'The Index' - ax = s.plot(use_index=False) + _, ax = self.plt.subplots() + ax = s.plot(use_index=False, ax=ax) label = ax.get_xlabel() - self.assertEqual(label, '') - ax2 = s.plot.bar(use_index=False) + assert label == '' + _, ax = self.plt.subplots() + ax2 = s.plot.bar(use_index=False, ax=ax) label2 = ax2.get_xlabel() - self.assertEqual(label2, '') + assert label2 == '' - @slow + @pytest.mark.slow def test_bar_log(self): expected = np.array([1., 10., 100., 1000.]) if not self.mpl_le_1_2_1: expected = np.hstack((.1, expected, 1e4)) - ax = Series([200, 500]).plot.bar(log=True) + _, ax = self.plt.subplots() + ax = Series([200, 500]).plot.bar(log=True, ax=ax) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) tm.close() - ax = Series([200, 500]).plot.barh(log=True) + _, ax = self.plt.subplots() + ax = Series([200, 500]).plot.barh(log=True, ax=ax) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) tm.close() @@ -217,46 +234,72 @@ def test_bar_log(self): if self.mpl_ge_2_0_0: expected = np.hstack((1.0e-05, expected)) - ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='bar') + _, ax = self.plt.subplots() + ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='bar', ax=ax) ymin = 0.0007943282347242822 if self.mpl_ge_2_0_0 else 0.001 ymax = 0.12589254117941673 if self.mpl_ge_2_0_0 else .10000000000000001 res = ax.get_ylim() - self.assertAlmostEqual(res[0], ymin) - self.assertAlmostEqual(res[1], ymax) + tm.assert_almost_equal(res[0], ymin) + tm.assert_almost_equal(res[1], ymax) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) tm.close() - ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh') + _, ax = self.plt.subplots() + ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh', ax=ax) res = ax.get_xlim() - self.assertAlmostEqual(res[0], ymin) - self.assertAlmostEqual(res[1], ymax) + tm.assert_almost_equal(res[0], ymin) + tm.assert_almost_equal(res[1], ymax) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) - @slow + @pytest.mark.slow def test_bar_ignore_index(self): df = Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) - ax = df.plot.bar(use_index=False) + _, ax = self.plt.subplots() + ax = df.plot.bar(use_index=False, ax=ax) self._check_text_labels(ax.get_xticklabels(), ['0', '1', '2', '3']) + def test_bar_user_colors(self): + s = Series([1, 2, 3, 4]) + ax = s.plot.bar(color=['red', 'blue', 'blue', 'red']) + result = [p.get_facecolor() for p in ax.patches] + expected = [(1., 0., 0., 1.), + (0., 0., 1., 1.), + (0., 0., 1., 1.), + (1., 0., 0., 1.)] + assert result == expected + def test_rotation(self): df = DataFrame(randn(5, 5)) # Default rot 0 - axes = df.plot() + _, ax = self.plt.subplots() + axes = df.plot(ax=ax) self._check_ticks_props(axes, xrot=0) - axes = df.plot(rot=30) + _, ax = self.plt.subplots() + axes = df.plot(rot=30, ax=ax) self._check_ticks_props(axes, xrot=30) def test_irregular_datetime(self): rng = date_range('1/1/2000', '3/1/2000') rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]] ser = Series(randn(len(rng)), rng) - ax = ser.plot() + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) xp = datetime(1999, 1, 1).toordinal() ax.set_xlim('1/1/1999', '1/1/2001') - self.assertEqual(xp, ax.get_xlim()[0]) + assert xp == ax.get_xlim()[0] - @slow + def test_unsorted_index_xlim(self): + ser = Series([0., 1., np.nan, 3., 4., 5., 6.], + index=[1., 0., 3., 2., np.nan, 3., 2.]) + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data(orig=False)[0]) + assert xmax >= np.nanmax(lines[0].get_data(orig=False)[0]) + + @pytest.mark.slow def test_pie_series(self): # if sum of values is less than 1.0, pie handle them as rate and draw # semicircle. @@ -264,7 +307,7 @@ def test_pie_series(self): index=['a', 'b', 'c', 'd', 'e'], name='YLABEL') ax = _check_plot_works(series.plot.pie) self._check_text_labels(ax.texts, series.index) - self.assertEqual(ax.get_ylabel(), 'YLABEL') + assert ax.get_ylabel() == 'YLABEL' # without wedge labels ax = _check_plot_works(series.plot.pie, labels=None) @@ -290,14 +333,13 @@ def test_pie_series(self): autopct='%.2f', fontsize=7) pcts = ['{0:.2f}'.format(s * 100) for s in series.values / float(series.sum())] - iters = [iter(series.index), iter(pcts)] - expected_texts = list(next(it) for it in itertools.cycle(iters)) + expected_texts = list(chain.from_iterable(zip(series.index, pcts))) self._check_text_labels(ax.texts, expected_texts) for t in ax.texts: - self.assertEqual(t.get_fontsize(), 7) + assert t.get_fontsize() == 7 # includes negative value - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): series = Series([1, 2, 0, 4, -1], index=['a', 'b', 'c', 'd', 'e']) series.plot.pie() @@ -309,31 +351,35 @@ def test_pie_series(self): def test_pie_nan(self): s = Series([1, np.nan, 1, 1]) - ax = s.plot.pie(legend=True) + _, ax = self.plt.subplots() + ax = s.plot.pie(legend=True, ax=ax) expected = ['0', '', '2', '3'] result = [x.get_text() for x in ax.texts] - self.assertEqual(result, expected) + assert result == expected - @slow + @pytest.mark.slow def test_hist_df_kwargs(self): df = DataFrame(np.random.randn(10, 2)) - ax = df.plot.hist(bins=5) - self.assertEqual(len(ax.patches), 10) + _, ax = self.plt.subplots() + ax = df.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 10 - @slow + @pytest.mark.slow def test_hist_df_with_nonnumerics(self): # GH 9853 with tm.RNGContext(1): df = DataFrame( np.random.randn(10, 4), columns=['A', 'B', 'C', 'D']) df['E'] = ['x', 'y'] * 5 - ax = df.plot.hist(bins=5) - self.assertEqual(len(ax.patches), 20) + _, ax = self.plt.subplots() + ax = df.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 20 - ax = df.plot.hist() # bins=10 - self.assertEqual(len(ax.patches), 40) + _, ax = self.plt.subplots() + ax = df.plot.hist(ax=ax) # bins=10 + assert len(ax.patches) == 40 - @slow + @pytest.mark.slow def test_hist_legacy(self): _check_plot_works(self.ts.hist) _check_plot_works(self.ts.hist, grid=False) @@ -356,25 +402,25 @@ def test_hist_legacy(self): _check_plot_works(self.ts.hist, figure=fig, ax=ax1) _check_plot_works(self.ts.hist, figure=fig, ax=ax2) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): self.ts.hist(by=self.ts.index, figure=fig) - @slow + @pytest.mark.slow def test_hist_bins_legacy(self): df = DataFrame(np.random.randn(10, 2)) ax = df.hist(bins=2)[0][0] - self.assertEqual(len(ax.patches), 2) + assert len(ax.patches) == 2 - @slow + @pytest.mark.slow def test_hist_layout(self): df = self.hist_df - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.height.hist(layout=(1, 1)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.height.hist(layout=[1, 1]) - @slow + @pytest.mark.slow def test_hist_layout_with_by(self): df = self.hist_df @@ -418,7 +464,7 @@ def test_hist_layout_with_by(self): self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) - @slow + @pytest.mark.slow def test_hist_no_overlap(self): from matplotlib.pyplot import subplot, gcf x = Series(randn(2)) @@ -429,113 +475,127 @@ def test_hist_no_overlap(self): y.hist() fig = gcf() axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes() - self.assertEqual(len(axes), 2) + assert len(axes) == 2 - @slow + @pytest.mark.slow def test_hist_secondary_legend(self): # GH 9610 df = DataFrame(np.random.randn(30, 4), columns=list('abcd')) # primary -> secondary - ax = df['a'].plot.hist(legend=True) + _, ax = self.plt.subplots() + ax = df['a'].plot.hist(legend=True, ax=ax) df['b'].plot.hist(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b (right)']) - self.assertTrue(ax.get_yaxis().get_visible()) - self.assertTrue(ax.right_ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() tm.close() # secondary -> secondary - ax = df['a'].plot.hist(legend=True, secondary_y=True) + _, ax = self.plt.subplots() + ax = df['a'].plot.hist(legend=True, secondary_y=True, ax=ax) df['b'].plot.hist(ax=ax, legend=True, secondary_y=True) # both legends are draw on left ax # left axis must be invisible, right axis must be visible self._check_legend_labels(ax.left_ax, labels=['a (right)', 'b (right)']) - self.assertFalse(ax.left_ax.get_yaxis().get_visible()) - self.assertTrue(ax.get_yaxis().get_visible()) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() tm.close() # secondary -> primary - ax = df['a'].plot.hist(legend=True, secondary_y=True) + _, ax = self.plt.subplots() + ax = df['a'].plot.hist(legend=True, secondary_y=True, ax=ax) # right axes is returned df['b'].plot.hist(ax=ax, legend=True) # both legends are draw on left ax # left and right axis must be visible self._check_legend_labels(ax.left_ax, labels=['a (right)', 'b']) - self.assertTrue(ax.left_ax.get_yaxis().get_visible()) - self.assertTrue(ax.get_yaxis().get_visible()) + assert ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() tm.close() - @slow + @pytest.mark.slow def test_df_series_secondary_legend(self): # GH 9779 df = DataFrame(np.random.randn(30, 3), columns=list('abc')) s = Series(np.random.randn(30), name='x') # primary -> secondary (without passing ax) - ax = df.plot() - s.plot(legend=True, secondary_y=True) + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) + s.plot(legend=True, secondary_y=True, ax=ax) # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)']) - self.assertTrue(ax.get_yaxis().get_visible()) - self.assertTrue(ax.right_ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() tm.close() # primary -> secondary (with passing ax) - ax = df.plot() + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)']) - self.assertTrue(ax.get_yaxis().get_visible()) - self.assertTrue(ax.right_ax.get_yaxis().get_visible()) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() tm.close() # seconcary -> secondary (without passing ax) - ax = df.plot(secondary_y=True) - s.plot(legend=True, secondary_y=True) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, ax=ax) + s.plot(legend=True, secondary_y=True, ax=ax) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] self._check_legend_labels(ax.left_ax, labels=expected) - self.assertFalse(ax.left_ax.get_yaxis().get_visible()) - self.assertTrue(ax.get_yaxis().get_visible()) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (with passing ax) - ax = df.plot(secondary_y=True) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] self._check_legend_labels(ax.left_ax, expected) - self.assertFalse(ax.left_ax.get_yaxis().get_visible()) - self.assertTrue(ax.get_yaxis().get_visible()) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (with passing ax) - ax = df.plot(secondary_y=True, mark_right=False) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, mark_right=False, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ['a', 'b', 'c', 'x (right)'] self._check_legend_labels(ax.left_ax, expected) - self.assertFalse(ax.left_ax.get_yaxis().get_visible()) - self.assertTrue(ax.get_yaxis().get_visible()) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() tm.close() - @slow + @pytest.mark.slow def test_plot_fails_with_dupe_color_and_style(self): x = Series(randn(2)) - with tm.assertRaises(ValueError): - x.plot(style='k--', color='k') + with pytest.raises(ValueError): + _, ax = self.plt.subplots() + x.plot(style='k--', color='k', ax=ax) - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_hist_kde(self): - ax = self.ts.plot.hist(logy=True) + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(logy=True, ax=ax) self._check_ax_scales(ax, yaxis='log') xlabels = ax.get_xticklabels() # ticks are values, thus ticklabels are blank @@ -543,122 +603,144 @@ def test_hist_kde(self): ylabels = ax.get_yticklabels() self._check_text_labels(ylabels, [''] * len(ylabels)) - tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() _check_plot_works(self.ts.plot.kde) _check_plot_works(self.ts.plot.density) - ax = self.ts.plot.kde(logy=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, ax=ax) self._check_ax_scales(ax, yaxis='log') xlabels = ax.get_xticklabels() self._check_text_labels(xlabels, [''] * len(xlabels)) ylabels = ax.get_yticklabels() self._check_text_labels(ylabels, [''] * len(ylabels)) - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_kde_kwargs(self): - tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() - from numpy import linspace - _check_plot_works(self.ts.plot.kde, bw_method=.5, - ind=linspace(-100, 100, 20)) + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + + sample_points = np.linspace(-100, 100, 20) + _check_plot_works(self.ts.plot.kde, bw_method='scott', ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=np.int(20)) + _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=sample_points) _check_plot_works(self.ts.plot.density, bw_method=.5, - ind=linspace(-100, 100, 20)) - ax = self.ts.plot.kde(logy=True, bw_method=.5, - ind=linspace(-100, 100, 20)) + ind=sample_points) + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=sample_points, + ax=ax) self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density') - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_kde_missing_vals(self): - tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + s = Series(np.random.uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) - # check if the values have any missing values - # GH14821 - self.assertTrue(any(~np.isnan(axes.lines[0].get_xdata())), - msg='Missing Values not dropped') - @slow + # gh-14821: check if the values have any missing values + assert any(~np.isnan(axes.lines[0].get_xdata())) + + @pytest.mark.slow def test_hist_kwargs(self): - ax = self.ts.plot.hist(bins=5) - self.assertEqual(len(ax.patches), 5) + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 5 self._check_text_labels(ax.yaxis.get_label(), 'Frequency') tm.close() if self.mpl_ge_1_3_1: - ax = self.ts.plot.hist(orientation='horizontal') + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(orientation='horizontal', ax=ax) self._check_text_labels(ax.xaxis.get_label(), 'Frequency') tm.close() - ax = self.ts.plot.hist(align='left', stacked=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(align='left', stacked=True, ax=ax) tm.close() - @slow + @pytest.mark.slow + @td.skip_if_no_scipy def test_hist_kde_color(self): - ax = self.ts.plot.hist(logy=True, bins=10, color='b') + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax) self._check_ax_scales(ax, yaxis='log') - self.assertEqual(len(ax.patches), 10) + assert len(ax.patches) == 10 self._check_colors(ax.patches, facecolors=['b'] * 10) - tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() - ax = self.ts.plot.kde(logy=True, color='r') + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, color='r', ax=ax) self._check_ax_scales(ax, yaxis='log') lines = ax.get_lines() - self.assertEqual(len(lines), 1) + assert len(lines) == 1 self._check_colors(lines, ['r']) - @slow + @pytest.mark.slow def test_boxplot_series(self): - ax = self.ts.plot.box(logy=True) + _, ax = self.plt.subplots() + ax = self.ts.plot.box(logy=True, ax=ax) self._check_ax_scales(ax, yaxis='log') xlabels = ax.get_xticklabels() self._check_text_labels(xlabels, [self.ts.name]) ylabels = ax.get_yticklabels() self._check_text_labels(ylabels, [''] * len(ylabels)) - @slow + @pytest.mark.slow def test_kind_both_ways(self): s = Series(range(3)) - for kind in plotting._common_kinds + plotting._series_kinds: + kinds = (plotting._core._common_kinds + + plotting._core._series_kinds) + _, ax = self.plt.subplots() + for kind in kinds: if not _ok_for_gaussian_kde(kind): continue - s.plot(kind=kind) + s.plot(kind=kind, ax=ax) getattr(s.plot, kind)() - @slow + @pytest.mark.slow def test_invalid_plot_data(self): s = Series(list('abcd')) - for kind in plotting._common_kinds: + _, ax = self.plt.subplots() + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with tm.assertRaises(TypeError): - s.plot(kind=kind) + with pytest.raises(TypeError): + s.plot(kind=kind, ax=ax) - @slow + @pytest.mark.slow def test_valid_object_plot(self): s = Series(lrange(10), dtype=object) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) - for kind in plotting._common_kinds: + _, ax = self.plt.subplots() + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with tm.assertRaises(TypeError): - s.plot(kind=kind) + with pytest.raises(TypeError): + s.plot(kind=kind, ax=ax) def test_invalid_kind(self): s = Series([1, 2]) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.plot(kind='aasdf') - @slow + @pytest.mark.slow def test_dup_datetime_index_plot(self): dr1 = date_range('1/1/2009', periods=4) dr2 = date_range('1/2/2009', periods=4) @@ -667,7 +749,7 @@ def test_dup_datetime_index_plot(self): s = Series(values, index=index) _check_plot_works(s.plot) - @slow + @pytest.mark.slow def test_errorbar_plot(self): s = Series(np.arange(10), name='x') @@ -702,81 +784,87 @@ def test_errorbar_plot(self): self._check_has_errorbars(ax, xerr=0, yerr=1) # check incorrect lengths and types - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.plot(yerr=np.arange(11)) s_err = ['zzz'] * 10 # in mpl 1.5+ this is a TypeError - with tm.assertRaises((ValueError, TypeError)): + with pytest.raises((ValueError, TypeError)): s.plot(yerr=s_err) + @td.xfail_if_mpl_2_2 def test_table(self): _check_plot_works(self.series.plot, table=True) _check_plot_works(self.series.plot, table=self.series) - @slow + @pytest.mark.slow def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), - plotting._series_kinds + - plotting._common_kinds) + plotting._core._series_kinds + + plotting._core._common_kinds) - @slow + @pytest.mark.slow def test_standard_colors(self): + from pandas.plotting._style import _get_standard_colors + for c in ['r', 'red', 'green', '#FF0000']: - result = plotting._get_standard_colors(1, color=c) - self.assertEqual(result, [c]) + result = _get_standard_colors(1, color=c) + assert result == [c] - result = plotting._get_standard_colors(1, color=[c]) - self.assertEqual(result, [c]) + result = _get_standard_colors(1, color=[c]) + assert result == [c] - result = plotting._get_standard_colors(3, color=c) - self.assertEqual(result, [c] * 3) + result = _get_standard_colors(3, color=c) + assert result == [c] * 3 - result = plotting._get_standard_colors(3, color=[c]) - self.assertEqual(result, [c] * 3) + result = _get_standard_colors(3, color=[c]) + assert result == [c] * 3 - @slow + @pytest.mark.slow def test_standard_colors_all(self): import matplotlib.colors as colors + from pandas.plotting._style import _get_standard_colors # multiple colors like mediumaquamarine for c in colors.cnames: - result = plotting._get_standard_colors(num_colors=1, color=c) - self.assertEqual(result, [c]) + result = _get_standard_colors(num_colors=1, color=c) + assert result == [c] - result = plotting._get_standard_colors(num_colors=1, color=[c]) - self.assertEqual(result, [c]) + result = _get_standard_colors(num_colors=1, color=[c]) + assert result == [c] - result = plotting._get_standard_colors(num_colors=3, color=c) - self.assertEqual(result, [c] * 3) + result = _get_standard_colors(num_colors=3, color=c) + assert result == [c] * 3 - result = plotting._get_standard_colors(num_colors=3, color=[c]) - self.assertEqual(result, [c] * 3) + result = _get_standard_colors(num_colors=3, color=[c]) + assert result == [c] * 3 # single letter colors like k for c in colors.ColorConverter.colors: - result = plotting._get_standard_colors(num_colors=1, color=c) - self.assertEqual(result, [c]) + result = _get_standard_colors(num_colors=1, color=c) + assert result == [c] - result = plotting._get_standard_colors(num_colors=1, color=[c]) - self.assertEqual(result, [c]) + result = _get_standard_colors(num_colors=1, color=[c]) + assert result == [c] - result = plotting._get_standard_colors(num_colors=3, color=c) - self.assertEqual(result, [c] * 3) + result = _get_standard_colors(num_colors=3, color=c) + assert result == [c] * 3 - result = plotting._get_standard_colors(num_colors=3, color=[c]) - self.assertEqual(result, [c] * 3) + result = _get_standard_colors(num_colors=3, color=[c]) + assert result == [c] * 3 def test_series_plot_color_kwargs(self): # GH1890 - ax = Series(np.arange(12) + 1).plot(color='green') + _, ax = self.plt.subplots() + ax = Series(np.arange(12) + 1).plot(color='green', ax=ax) self._check_colors(ax.get_lines(), linecolors=['green']) def test_time_series_plot_color_kwargs(self): # #1890 + _, ax = self.plt.subplots() ax = Series(np.arange(12) + 1, index=date_range( - '1/1/2000', periods=12)).plot(color='green') + '1/1/2000', periods=12)).plot(color='green', ax=ax) self._check_colors(ax.get_lines(), linecolors=['green']) def test_time_series_plot_color_with_empty_kwargs(self): @@ -791,14 +879,16 @@ def test_time_series_plot_color_with_empty_kwargs(self): ncolors = 3 + _, ax = self.plt.subplots() for i in range(ncolors): - ax = s.plot() + ax = s.plot(ax=ax) self._check_colors(ax.get_lines(), linecolors=def_colors[:ncolors]) def test_xticklabels(self): # GH11529 s = Series(np.arange(10), index=['P%02d' % i for i in range(10)]) - ax = s.plot(xticks=[0, 3, 5, 9]) + _, ax = self.plt.subplots() + ax = s.plot(xticks=[0, 3, 5, 9], ax=ax) exp = ['P%02d' % i for i in [0, 3, 5, 9]] self._check_text_labels(ax.get_xticklabels(), exp) diff --git a/pandas/tests/reshape/__init__.py b/pandas/tests/reshape/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tools/tests/data/cut_data.csv b/pandas/tests/reshape/data/cut_data.csv similarity index 100% rename from pandas/tools/tests/data/cut_data.csv rename to pandas/tests/reshape/data/cut_data.csv diff --git a/pandas/tests/reshape/merge/__init__.py b/pandas/tests/reshape/merge/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tools/tests/data/allow_exact_matches.csv b/pandas/tests/reshape/merge/data/allow_exact_matches.csv similarity index 100% rename from pandas/tools/tests/data/allow_exact_matches.csv rename to pandas/tests/reshape/merge/data/allow_exact_matches.csv diff --git a/pandas/tools/tests/data/allow_exact_matches_and_tolerance.csv b/pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv similarity index 100% rename from pandas/tools/tests/data/allow_exact_matches_and_tolerance.csv rename to pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv diff --git a/pandas/tools/tests/data/asof.csv b/pandas/tests/reshape/merge/data/asof.csv similarity index 100% rename from pandas/tools/tests/data/asof.csv rename to pandas/tests/reshape/merge/data/asof.csv diff --git a/pandas/tools/tests/data/asof2.csv b/pandas/tests/reshape/merge/data/asof2.csv similarity index 100% rename from pandas/tools/tests/data/asof2.csv rename to pandas/tests/reshape/merge/data/asof2.csv diff --git a/pandas/tools/tests/data/quotes.csv b/pandas/tests/reshape/merge/data/quotes.csv similarity index 100% rename from pandas/tools/tests/data/quotes.csv rename to pandas/tests/reshape/merge/data/quotes.csv diff --git a/pandas/tools/tests/data/quotes2.csv b/pandas/tests/reshape/merge/data/quotes2.csv similarity index 100% rename from pandas/tools/tests/data/quotes2.csv rename to pandas/tests/reshape/merge/data/quotes2.csv diff --git a/pandas/tools/tests/data/tolerance.csv b/pandas/tests/reshape/merge/data/tolerance.csv similarity index 100% rename from pandas/tools/tests/data/tolerance.csv rename to pandas/tests/reshape/merge/data/tolerance.csv diff --git a/pandas/tools/tests/data/trades.csv b/pandas/tests/reshape/merge/data/trades.csv similarity index 100% rename from pandas/tools/tests/data/trades.csv rename to pandas/tests/reshape/merge/data/trades.csv diff --git a/pandas/tools/tests/data/trades2.csv b/pandas/tests/reshape/merge/data/trades2.csv similarity index 100% rename from pandas/tools/tests/data/trades2.csv rename to pandas/tests/reshape/merge/data/trades2.csv diff --git a/pandas/tools/tests/test_join.py b/pandas/tests/reshape/merge/test_join.py similarity index 78% rename from pandas/tools/tests/test_join.py rename to pandas/tests/reshape/merge/test_join.py index fe5821a637205..a64069fa700b8 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -1,25 +1,27 @@ # pylint: disable=E1103 +from warnings import catch_warnings from numpy.random import randn import numpy as np +import pytest import pandas as pd from pandas.compat import lrange import pandas.compat as compat from pandas.util.testing import assert_frame_equal -from pandas import DataFrame, MultiIndex, Series, merge, concat +from pandas import DataFrame, MultiIndex, Series, Index, merge, concat -import pandas._join as _join +from pandas._libs import join as libjoin import pandas.util.testing as tm -from pandas.tools.tests.test_merge import get_test_data, N, NGROUPS +from pandas.tests.reshape.merge.test_merge import get_test_data, N, NGROUPS a_ = np.array -class TestJoin(tm.TestCase): +class TestJoin(object): - def setUp(self): + def setup_method(self, method): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), 'key2': get_test_data(), @@ -46,7 +48,7 @@ def test_cython_left_outer_join(self): right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - ls, rs = _join.left_outer_join(left, right, max_group) + ls, rs = libjoin.left_outer_join(left, right, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') @@ -62,15 +64,15 @@ def test_cython_left_outer_join(self): exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 - self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) - self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) def test_cython_right_outer_join(self): left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - rs, ls = _join.left_outer_join(right, left, max_group) + rs, ls = libjoin.left_outer_join(right, left, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') @@ -88,15 +90,15 @@ def test_cython_right_outer_join(self): exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 - self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) - self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) def test_cython_inner_join(self): left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64) max_group = 5 - ls, rs = _join.inner_join(left, right, max_group) + ls, rs = libjoin.inner_join(left, right, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') @@ -112,8 +114,8 @@ def test_cython_inner_join(self): exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 - self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) - self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) def test_left_outer_join(self): joined_key2 = merge(self.df, self.df2, on='key2') @@ -151,25 +153,25 @@ def test_handle_overlap(self): joined = merge(self.df, self.df2, on='key2', suffixes=['.foo', '.bar']) - self.assertIn('key1.foo', joined) - self.assertIn('key1.bar', joined) + assert 'key1.foo' in joined + assert 'key1.bar' in joined def test_handle_overlap_arbitrary_key(self): joined = merge(self.df, self.df2, left_on='key2', right_on='key1', suffixes=['.foo', '.bar']) - self.assertIn('key1.foo', joined) - self.assertIn('key2.bar', joined) + assert 'key1.foo' in joined + assert 'key2.bar' in joined def test_join_on(self): target = self.target source = self.source merged = target.join(source, on='C') - self.assert_series_equal(merged['MergedA'], target['A'], - check_names=False) - self.assert_series_equal(merged['MergedD'], target['D'], - check_names=False) + tm.assert_series_equal(merged['MergedA'], target['A'], + check_names=False) + tm.assert_series_equal(merged['MergedD'], target['D'], + check_names=False) # join with duplicates (fix regression from DataFrame/Matrix merge) df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']}) @@ -188,19 +190,19 @@ def test_join_on(self): columns=['three']) joined = df_a.join(df_b, on='one') joined = joined.join(df_c, on='one') - self.assertTrue(np.isnan(joined['two']['c'])) - self.assertTrue(np.isnan(joined['three']['c'])) + assert np.isnan(joined['two']['c']) + assert np.isnan(joined['three']['c']) # merge column not p resent - self.assertRaises(KeyError, target.join, source, on='E') + pytest.raises(KeyError, target.join, source, on='E') # overlap source_copy = source.copy() source_copy['A'] = 0 - self.assertRaises(ValueError, target.join, source_copy, on='A') + pytest.raises(ValueError, target.join, source_copy, on='A') def test_join_on_fails_with_different_right_index(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}) df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), @@ -209,7 +211,7 @@ def test_join_on_fails_with_different_right_index(self): merge(df, df2, left_on='a', right_index=True) def test_join_on_fails_with_different_left_index(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}, index=tm.makeCustomIndex(10, 2)) @@ -218,7 +220,7 @@ def test_join_on_fails_with_different_left_index(self): merge(df, df2, right_on='b', left_index=True) def test_join_on_fails_with_different_column_counts(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), 'b': np.random.randn(3)}) df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), @@ -232,9 +234,9 @@ def test_join_on_fails_with_wrong_object_type(self): df = DataFrame({'a': [1, 1]}) for obj in wrongly_typed: - with tm.assertRaisesRegexp(ValueError, str(type(obj))): + with tm.assert_raises_regex(ValueError, str(type(obj))): merge(obj, df, left_on='a', right_on='a') - with tm.assertRaisesRegexp(ValueError, str(type(obj))): + with tm.assert_raises_regex(ValueError, str(type(obj))): merge(df, obj, left_on='a', right_on='a') def test_join_on_pass_vector(self): @@ -249,13 +251,13 @@ def test_join_with_len0(self): # nothing to merge merged = self.target.join(self.source.reindex([]), on='C') for col in self.source: - self.assertIn(col, merged) - self.assertTrue(merged[col].isnull().all()) + assert col in merged + assert merged[col].isna().all() merged2 = self.target.join(self.source.reindex([]), on='C', how='inner') - self.assert_index_equal(merged2.columns, merged.columns) - self.assertEqual(len(merged2), 0) + tm.assert_index_equal(merged2.columns, merged.columns) + assert len(merged2) == 0 def test_join_on_inner(self): df = DataFrame({'key': ['a', 'a', 'd', 'b', 'b', 'c']}) @@ -264,12 +266,12 @@ def test_join_on_inner(self): joined = df.join(df2, on='key', how='inner') expected = df.join(df2, on='key') - expected = expected[expected['value'].notnull()] - self.assert_series_equal(joined['key'], expected['key'], - check_dtype=False) - self.assert_series_equal(joined['value'], expected['value'], - check_dtype=False) - self.assert_index_equal(joined.index, expected.index) + expected = expected[expected['value'].notna()] + tm.assert_series_equal(joined['key'], expected['key'], + check_dtype=False) + tm.assert_series_equal(joined['value'], expected['value'], + check_dtype=False) + tm.assert_index_equal(joined.index, expected.index) def test_join_on_singlekey_list(self): df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']}) @@ -299,8 +301,8 @@ def test_join_index_mixed(self): df1 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True}, index=np.arange(10), columns=['A', 'B', 'C', 'D']) - self.assertEqual(df1['B'].dtype, np.int64) - self.assertEqual(df1['D'].dtype, np.bool_) + assert df1['B'].dtype == np.int64 + assert df1['D'].dtype == np.bool_ df2 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True}, index=np.arange(0, 10, 2), @@ -368,22 +370,22 @@ def test_join_multiindex(self): df2 = df2.sort_index(level=0) joined = df1.join(df2, how='outer') - ex_index = index1._tuple_index.union(index2._tuple_index) + ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names assert_frame_equal(joined, expected) - self.assertEqual(joined.index.names, index1.names) + assert joined.index.names == index1.names df1 = df1.sort_index(level=1) df2 = df2.sort_index(level=1) joined = df1.join(df2, how='outer').sort_index(level=0) - ex_index = index1._tuple_index.union(index2._tuple_index) + ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names assert_frame_equal(joined, expected) - self.assertEqual(joined.index.names, index1.names) + assert joined.index.names == index1.names def test_join_inner_multiindex(self): key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux', @@ -420,7 +422,7 @@ def test_join_inner_multiindex(self): expected = expected.drop(['first', 'second'], axis=1) expected.index = joined.index - self.assertTrue(joined.index.is_monotonic) + assert joined.index.is_monotonic assert_frame_equal(joined, expected) # _assert_same_contents(expected, expected2.loc[:, expected.columns]) @@ -435,17 +437,17 @@ def test_join_hierarchical_mixed(self): # GH 9455, 12219 with tm.assert_produces_warning(UserWarning): result = merge(new_df, other_df, left_index=True, right_index=True) - self.assertTrue(('b', 'mean') in result) - self.assertTrue('b' in result) + assert ('b', 'mean') in result + assert 'b' in result def test_join_float64_float32(self): a = DataFrame(randn(10, 2), columns=['a', 'b'], dtype=np.float64) b = DataFrame(randn(10, 1), columns=['c'], dtype=np.float32) joined = a.join(b) - self.assertEqual(joined.dtypes['a'], 'float64') - self.assertEqual(joined.dtypes['b'], 'float64') - self.assertEqual(joined.dtypes['c'], 'float32') + assert joined.dtypes['a'] == 'float64' + assert joined.dtypes['b'] == 'float64' + assert joined.dtypes['c'] == 'float32' a = np.random.randint(0, 5, 100).astype('int64') b = np.random.random(100).astype('float64') @@ -454,10 +456,10 @@ def test_join_float64_float32(self): xpdf = DataFrame({'a': a, 'b': b, 'c': c}) s = DataFrame(np.random.random(5).astype('float32'), columns=['md']) rs = df.merge(s, left_on='a', right_index=True) - self.assertEqual(rs.dtypes['a'], 'int64') - self.assertEqual(rs.dtypes['b'], 'float64') - self.assertEqual(rs.dtypes['c'], 'float32') - self.assertEqual(rs.dtypes['md'], 'float32') + assert rs.dtypes['a'] == 'int64' + assert rs.dtypes['b'] == 'float64' + assert rs.dtypes['c'] == 'float32' + assert rs.dtypes['md'] == 'float32' xp = xpdf.merge(s, left_on='a', right_index=True) assert_frame_equal(rs, xp) @@ -529,7 +531,7 @@ def test_join_sort(self): # smoke test joined = left.join(right, on='key', sort=False) - self.assert_index_equal(joined.index, pd.Index(lrange(4))) + tm.assert_index_equal(joined.index, pd.Index(lrange(4))) def test_join_mixed_non_unique_index(self): # GH 12814, unorderable types in py3 with a non-unique index @@ -548,6 +550,18 @@ def test_join_mixed_non_unique_index(self): index=[1, 2, 2, 'a']) tm.assert_frame_equal(result, expected) + def test_join_non_unique_period_index(self): + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.join(df2, how='inner', rsuffix='_df2') + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum', 'pnum_df2'], index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) + def test_mixed_type_join_with_suffix(self): # GH #916 df = DataFrame(np.random.randn(20, 6), @@ -587,7 +601,7 @@ def _check_diff_index(df_list, result, exp_index): joined = df_list[0].join(df_list[1:], how='inner') _check_diff_index(df_list, joined, df.index[2:8]) - self.assertRaises(ValueError, df_list[0].join, df_list[1:], on='a') + pytest.raises(ValueError, df_list[0].join, df_list[1:], on='a') def test_join_many_mixed(self): df = DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D']) @@ -629,86 +643,89 @@ def test_join_dups(self): assert_frame_equal(dta, expected) def test_panel_join(self): - panel = tm.makePanel() - tm.add_nans(panel) - - p1 = panel.iloc[:2, :10, :3] - p2 = panel.iloc[2:, 5:, 2:] - - # left join - result = p1.join(p2) - expected = p1.copy() - expected['ItemC'] = p2['ItemC'] - tm.assert_panel_equal(result, expected) - - # right join - result = p1.join(p2, how='right') - expected = p2.copy() - expected['ItemA'] = p1['ItemA'] - expected['ItemB'] = p1['ItemB'] - expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC']) - tm.assert_panel_equal(result, expected) - - # inner join - result = p1.join(p2, how='inner') - expected = panel.iloc[:, 5:10, 2:3] - tm.assert_panel_equal(result, expected) - - # outer join - result = p1.join(p2, how='outer') - expected = p1.reindex(major=panel.major_axis, - minor=panel.minor_axis) - expected = expected.join(p2.reindex(major=panel.major_axis, - minor=panel.minor_axis)) - tm.assert_panel_equal(result, expected) + with catch_warnings(record=True): + panel = tm.makePanel() + tm.add_nans(panel) + + p1 = panel.iloc[:2, :10, :3] + p2 = panel.iloc[2:, 5:, 2:] + + # left join + result = p1.join(p2) + expected = p1.copy() + expected['ItemC'] = p2['ItemC'] + tm.assert_panel_equal(result, expected) + + # right join + result = p1.join(p2, how='right') + expected = p2.copy() + expected['ItemA'] = p1['ItemA'] + expected['ItemB'] = p1['ItemB'] + expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC']) + tm.assert_panel_equal(result, expected) + + # inner join + result = p1.join(p2, how='inner') + expected = panel.iloc[:, 5:10, 2:3] + tm.assert_panel_equal(result, expected) + + # outer join + result = p1.join(p2, how='outer') + expected = p1.reindex(major=panel.major_axis, + minor=panel.minor_axis) + expected = expected.join(p2.reindex(major=panel.major_axis, + minor=panel.minor_axis)) + tm.assert_panel_equal(result, expected) def test_panel_join_overlap(self): - panel = tm.makePanel() - tm.add_nans(panel) - - p1 = panel.loc[['ItemA', 'ItemB', 'ItemC']] - p2 = panel.loc[['ItemB', 'ItemC']] - - # Expected index is - # - # ItemA, ItemB_p1, ItemC_p1, ItemB_p2, ItemC_p2 - joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2') - p1_suf = p1.loc[['ItemB', 'ItemC']].add_suffix('_p1') - p2_suf = p2.loc[['ItemB', 'ItemC']].add_suffix('_p2') - no_overlap = panel.loc[['ItemA']] - expected = no_overlap.join(p1_suf.join(p2_suf)) - tm.assert_panel_equal(joined, expected) + with catch_warnings(record=True): + panel = tm.makePanel() + tm.add_nans(panel) + + p1 = panel.loc[['ItemA', 'ItemB', 'ItemC']] + p2 = panel.loc[['ItemB', 'ItemC']] + + # Expected index is + # + # ItemA, ItemB_p1, ItemC_p1, ItemB_p2, ItemC_p2 + joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2') + p1_suf = p1.loc[['ItemB', 'ItemC']].add_suffix('_p1') + p2_suf = p2.loc[['ItemB', 'ItemC']].add_suffix('_p2') + no_overlap = panel.loc[['ItemA']] + expected = no_overlap.join(p1_suf.join(p2_suf)) + tm.assert_panel_equal(joined, expected) def test_panel_join_many(self): - tm.K = 10 - panel = tm.makePanel() - tm.K = 4 + with catch_warnings(record=True): + tm.K = 10 + panel = tm.makePanel() + tm.K = 4 - panels = [panel.iloc[:2], panel.iloc[2:6], panel.iloc[6:]] + panels = [panel.iloc[:2], panel.iloc[2:6], panel.iloc[6:]] - joined = panels[0].join(panels[1:]) - tm.assert_panel_equal(joined, panel) + joined = panels[0].join(panels[1:]) + tm.assert_panel_equal(joined, panel) - panels = [panel.iloc[:2, :-5], - panel.iloc[2:6, 2:], - panel.iloc[6:, 5:-7]] + panels = [panel.iloc[:2, :-5], + panel.iloc[2:6, 2:], + panel.iloc[6:, 5:-7]] - data_dict = {} - for p in panels: - data_dict.update(p.iteritems()) + data_dict = {} + for p in panels: + data_dict.update(p.iteritems()) - joined = panels[0].join(panels[1:], how='inner') - expected = pd.Panel.from_dict(data_dict, intersect=True) - tm.assert_panel_equal(joined, expected) + joined = panels[0].join(panels[1:], how='inner') + expected = pd.Panel.from_dict(data_dict, intersect=True) + tm.assert_panel_equal(joined, expected) - joined = panels[0].join(panels[1:], how='outer') - expected = pd.Panel.from_dict(data_dict, intersect=False) - tm.assert_panel_equal(joined, expected) + joined = panels[0].join(panels[1:], how='outer') + expected = pd.Panel.from_dict(data_dict, intersect=False) + tm.assert_panel_equal(joined, expected) - # edge cases - self.assertRaises(ValueError, panels[0].join, panels[1:], + # edge cases + pytest.raises(ValueError, panels[0].join, panels[1:], how='outer', lsuffix='foo', rsuffix='bar') - self.assertRaises(ValueError, panels[0].join, panels[1:], + pytest.raises(ValueError, panels[0].join, panels[1:], how='right') @@ -717,7 +734,7 @@ def _check_join(left, right, result, join_col, how='left', # some smoke tests for c in join_col: - assert(result[c].notnull().all()) + assert(result[c].notna().all()) left_grouped = left.groupby(join_col) right_grouped = right.groupby(join_col) @@ -771,7 +788,7 @@ def _assert_same_contents(join_chunk, source): jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values svalues = source.fillna(NA_SENTINEL).drop_duplicates().values - rows = set(tuple(row) for row in jvalues) + rows = {tuple(row) for row in jvalues} assert(len(rows) == len(source)) assert(all(tuple(row) in rows for row in svalues)) @@ -780,7 +797,7 @@ def _assert_all_na(join_chunk, source_columns, join_col): for c in source_columns: if c in join_col: continue - assert(join_chunk[c].isnull().all()) + assert(join_chunk[c].isna().all()) def _join_by_hand(a, b, how='left'): diff --git a/pandas/tools/tests/test_merge.py b/pandas/tests/reshape/merge/test_merge.py similarity index 56% rename from pandas/tools/tests/test_merge.py rename to pandas/tests/reshape/merge/test_merge.py index d66cd793ec0be..f6bccc9be6dd0 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1,20 +1,26 @@ # pylint: disable=E1103 -from datetime import datetime +import pytest +from datetime import datetime, date from numpy.random import randn from numpy import nan import numpy as np import random +import re import pandas as pd from pandas.compat import lrange, lzip -from pandas.tools.concat import concat -from pandas.tools.merge import merge, MergeError -from pandas.util.testing import (assert_frame_equal, - assert_series_equal, - slow) +from pandas.core.reshape.concat import concat +from pandas.core.reshape.merge import merge, MergeError +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_object_dtype, +) from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm +from pandas.api.types import CategoricalDtype as CDT N = 50 @@ -32,9 +38,9 @@ def get_test_data(ngroups=NGROUPS, n=N): return arr -class TestMerge(tm.TestCase): +class TestMerge(object): - def setUp(self): + def setup_method(self, method): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), 'key2': get_test_data(), @@ -54,11 +60,28 @@ def setUp(self): self.right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a']) + def test_merge_inner_join_empty(self): + # GH 15328 + df_empty = pd.DataFrame() + df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64') + result = pd.merge(df_empty, df_a, left_index=True, right_index=True) + expected = pd.DataFrame({'a': []}, index=[], dtype='int64') + assert_frame_equal(result, expected) + def test_merge_common(self): joined = merge(self.df, self.df2) exp = merge(self.df, self.df2, on=['key1', 'key2']) tm.assert_frame_equal(joined, exp) + def test_merge_index_as_on_arg(self): + # GH14355 + + left = self.df.set_index('key1') + right = self.df2.set_index('key1') + result = merge(left, right, on='key1') + expected = merge(self.df, self.df2, on='key1').set_index('key1') + assert_frame_equal(result, expected) + def test_merge_index_singlekey_right_vs_left(self): left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) @@ -95,32 +118,32 @@ def test_merge_index_singlekey_inner(self): assert_frame_equal(result, expected.loc[:, result.columns]) def test_merge_misspecified(self): - self.assertRaises(ValueError, merge, self.left, self.right, - left_index=True) - self.assertRaises(ValueError, merge, self.left, self.right, - right_index=True) + pytest.raises(ValueError, merge, self.left, self.right, + left_index=True) + pytest.raises(ValueError, merge, self.left, self.right, + right_index=True) - self.assertRaises(ValueError, merge, self.left, self.left, - left_on='key', on='key') + pytest.raises(ValueError, merge, self.left, self.left, + left_on='key', on='key') - self.assertRaises(ValueError, merge, self.df, self.df2, - left_on=['key1'], right_on=['key1', 'key2']) + pytest.raises(ValueError, merge, self.df, self.df2, + left_on=['key1'], right_on=['key1', 'key2']) def test_index_and_on_parameters_confusion(self): - self.assertRaises(ValueError, merge, self.df, self.df2, how='left', - left_index=False, right_index=['key1', 'key2']) - self.assertRaises(ValueError, merge, self.df, self.df2, how='left', - left_index=['key1', 'key2'], right_index=False) - self.assertRaises(ValueError, merge, self.df, self.df2, how='left', - left_index=['key1', 'key2'], - right_index=['key1', 'key2']) + pytest.raises(ValueError, merge, self.df, self.df2, how='left', + left_index=False, right_index=['key1', 'key2']) + pytest.raises(ValueError, merge, self.df, self.df2, how='left', + left_index=['key1', 'key2'], right_index=False) + pytest.raises(ValueError, merge, self.df, self.df2, how='left', + left_index=['key1', 'key2'], + right_index=['key1', 'key2']) def test_merge_overlap(self): merged = merge(self.left, self.left, on='key') exp_len = (self.left['key'].value_counts() ** 2).sum() - self.assertEqual(len(merged), exp_len) - self.assertIn('v1_x', merged) - self.assertIn('v1_y', merged) + assert len(merged) == exp_len + assert 'v1_x' in merged + assert 'v1_y' in merged def test_merge_different_column_key_names(self): left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], @@ -153,10 +176,10 @@ def test_merge_copy(self): right_index=True, copy=True) merged['a'] = 6 - self.assertTrue((left['a'] == 0).all()) + assert (left['a'] == 0).all() merged['d'] = 'peekaboo' - self.assertTrue((right['d'] == 'bar').all()) + assert (right['d'] == 'bar').all() def test_merge_nocopy(self): left = DataFrame({'a': 0, 'b': 1}, index=lrange(10)) @@ -166,10 +189,10 @@ def test_merge_nocopy(self): right_index=True, copy=False) merged['a'] = 6 - self.assertTrue((left['a'] == 6).all()) + assert (left['a'] == 6).all() merged['d'] = 'peekaboo' - self.assertTrue((right['d'] == 'peekaboo').all()) + assert (right['d'] == 'peekaboo').all() def test_intelligently_handle_join_key(self): # #733, be a bit more 1337 about not returning unconsolidated DataFrame @@ -193,7 +216,7 @@ def test_merge_join_key_dtype_cast(self): df1 = DataFrame({'key': [1], 'v1': [10]}) df2 = DataFrame({'key': [2], 'v1': [20]}) df = merge(df1, df2, how='outer') - self.assertEqual(df['key'].dtype, 'int64') + assert df['key'].dtype == 'int64' df1 = DataFrame({'key': [True], 'v1': [1]}) df2 = DataFrame({'key': [False], 'v1': [0]}) @@ -201,14 +224,14 @@ def test_merge_join_key_dtype_cast(self): # GH13169 # this really should be bool - self.assertEqual(df['key'].dtype, 'object') + assert df['key'].dtype == 'object' df1 = DataFrame({'val': [1]}) df2 = DataFrame({'val': [2]}) lkey = np.array([1]) rkey = np.array([2]) df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer') - self.assertEqual(df['key_0'].dtype, 'int64') + assert df['key_0'].dtype == 'int64' def test_handle_join_key_pass_array(self): left = DataFrame({'key': [1, 1, 2, 2, 3], @@ -220,8 +243,8 @@ def test_handle_join_key_pass_array(self): merged2 = merge(right, left, left_on=key, right_on='key', how='outer') assert_series_equal(merged['key'], merged2['key']) - self.assertTrue(merged['key'].notnull().all()) - self.assertTrue(merged2['key'].notnull().all()) + assert merged['key'].notna().all() + assert merged2['key'].notna().all() left = DataFrame({'value': lrange(5)}, columns=['value']) right = DataFrame({'rvalue': lrange(6)}) @@ -229,23 +252,31 @@ def test_handle_join_key_pass_array(self): rkey = np.array([1, 1, 2, 3, 4, 5]) merged = merge(left, right, left_on=lkey, right_on=rkey, how='outer') - self.assert_series_equal(merged['key_0'], - Series([1, 1, 1, 1, 2, 2, 3, 4, 5], - name='key_0')) + tm.assert_series_equal(merged['key_0'], Series([1, 1, 1, 1, 2, + 2, 3, 4, 5], + name='key_0')) left = DataFrame({'value': lrange(3)}) right = DataFrame({'rvalue': lrange(6)}) key = np.array([0, 1, 1, 2, 2, 3], dtype=np.int64) merged = merge(left, right, left_index=True, right_on=key, how='outer') - self.assert_series_equal(merged['key_0'], Series(key, name='key_0')) + tm.assert_series_equal(merged['key_0'], Series(key, name='key_0')) def test_no_overlap_more_informative_error(self): dt = datetime.now() df1 = DataFrame({'x': ['a']}, index=[dt]) df2 = DataFrame({'y': ['b', 'c']}, index=[dt, dt]) - self.assertRaises(MergeError, merge, df1, df2) + pytest.raises(MergeError, merge, df1, df2) + + msg = ('No common columns to perform merge on. ' + 'Merge options: left_on={lon}, right_on={ron}, ' + 'left_index={lidx}, right_index={ridx}' + .format(lon=None, ron=None, lidx=False, ridx=False)) + + with tm.assert_raises_regex(MergeError, msg): + merge(df1, df2) def test_merge_non_unique_indexes(self): @@ -416,7 +447,7 @@ def test_merge_nosort(self): exp = merge(df, new, on='var3', sort=False) assert_frame_equal(result, exp) - self.assertTrue((df.var3.unique() == result.var3.unique()).all()) + assert (df.var3.unique() == result.var3.unique()).all() def test_merge_nan_right(self): df1 = DataFrame({"i1": [0, 1], "i2": [0, 1]}) @@ -450,7 +481,7 @@ def _constructor(self): nad = NotADataFrame(self.df) result = nad.merge(self.df2, on='key1') - tm.assertIsInstance(result, NotADataFrame) + assert isinstance(result, NotADataFrame) def test_join_append_timedeltas(self): @@ -490,7 +521,7 @@ def test_other_datetime_unit(self): df2 = s.astype(dtype).to_frame('days') # coerces to datetime64[ns], thus sholuld not be affected - self.assertEqual(df2['days'].dtype, 'datetime64[ns]') + assert df2['days'].dtype == 'datetime64[ns]' result = df1.merge(df2, left_on='entity_id', right_index=True) @@ -500,25 +531,23 @@ def test_other_datetime_unit(self): columns=['entity_id', 'days']) tm.assert_frame_equal(result, exp) - def test_other_timedelta_unit(self): + @pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns']) + def test_other_timedelta_unit(self, unit): # GH 13389 df1 = pd.DataFrame({'entity_id': [101, 102]}) s = pd.Series([None, None], index=[101, 102], name='days') - for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', - 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', - 'timedelta64[ns]']: - - df2 = s.astype(dtype).to_frame('days') - self.assertEqual(df2['days'].dtype, dtype) + dtype = "m8[{}]".format(unit) + df2 = s.astype(dtype).to_frame('days') + assert df2['days'].dtype == 'm8[ns]' - result = df1.merge(df2, left_on='entity_id', right_index=True) + result = df1.merge(df2, left_on='entity_id', right_index=True) - exp = pd.DataFrame({'entity_id': [101, 102], - 'days': np.array(['nat', 'nat'], - dtype=dtype)}, - columns=['entity_id', 'days']) - tm.assert_frame_equal(result, exp) + exp = pd.DataFrame({'entity_id': [101, 102], + 'days': np.array(['nat', 'nat'], + dtype=dtype)}, + columns=['entity_id', 'days']) + tm.assert_frame_equal(result, exp) def test_overlapping_columns_error_message(self): df = DataFrame({'key': [1, 2, 3], @@ -540,7 +569,7 @@ def test_overlapping_columns_error_message(self): # #2649, #10639 df2.columns = ['key1', 'foo', 'foo'] - self.assertRaises(ValueError, merge, df, df2) + pytest.raises(ValueError, merge, df, df2) def test_merge_on_datetime64tz(self): @@ -559,22 +588,34 @@ def test_merge_on_datetime64tz(self): result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - left = pd.DataFrame({'value': pd.date_range('20151010', periods=2, - tz='US/Eastern'), - 'key': [1, 2]}) - right = pd.DataFrame({'value': pd.date_range('20151011', periods=2, - tz='US/Eastern'), - 'key': [2, 3]}) + left = pd.DataFrame({'key': [1, 2], + 'value': pd.date_range('20151010', periods=2, + tz='US/Eastern')}) + right = pd.DataFrame({'key': [2, 3], + 'value': pd.date_range('20151011', periods=2, + tz='US/Eastern')}) expected = DataFrame({ + 'key': [1, 2, 3], 'value_x': list(pd.date_range('20151010', periods=2, tz='US/Eastern')) + [pd.NaT], 'value_y': [pd.NaT] + list(pd.date_range('20151011', periods=2, - tz='US/Eastern')), - 'key': [1, 2, 3]}) + tz='US/Eastern'))}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - self.assertEqual(result['value_x'].dtype, 'datetime64[ns, US/Eastern]') - self.assertEqual(result['value_y'].dtype, 'datetime64[ns, US/Eastern]') + assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' + assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + + def test_merge_non_unique_period_index(self): + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.merge(df2, left_index=True, right_index=True, how='inner') + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum_x', 'pnum_y'], index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) def test_merge_on_periods(self): left = pd.DataFrame({'key': pd.period_range('20151010', periods=2, @@ -591,31 +632,32 @@ def test_merge_on_periods(self): result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - left = pd.DataFrame({'value': pd.period_range('20151010', periods=2, - freq='D'), - 'key': [1, 2]}) - right = pd.DataFrame({'value': pd.period_range('20151011', periods=2, - freq='D'), - 'key': [2, 3]}) + left = pd.DataFrame({'key': [1, 2], + 'value': pd.period_range('20151010', periods=2, + freq='D')}) + right = pd.DataFrame({'key': [2, 3], + 'value': pd.period_range('20151011', periods=2, + freq='D')}) exp_x = pd.period_range('20151010', periods=2, freq='D') exp_y = pd.period_range('20151011', periods=2, freq='D') - expected = DataFrame({'value_x': list(exp_x) + [pd.NaT], - 'value_y': [pd.NaT] + list(exp_y), - 'key': [1, 2, 3]}) + expected = DataFrame({'key': [1, 2, 3], + 'value_x': list(exp_x) + [pd.NaT], + 'value_y': [pd.NaT] + list(exp_y)}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - self.assertEqual(result['value_x'].dtype, 'object') - self.assertEqual(result['value_y'].dtype, 'object') + assert result['value_x'].dtype == 'object' + assert result['value_y'].dtype == 'object' def test_indicator(self): # PR #10054. xref #7412 and closes #8790. - df1 = DataFrame({'col1': [0, 1], 'col_left': [ - 'a', 'b'], 'col_conflict': [1, 2]}) + df1 = DataFrame({'col1': [0, 1], 'col_conflict': [1, 2], + 'col_left': ['a', 'b']}) df1_copy = df1.copy() - df2 = DataFrame({'col1': [1, 2, 3, 4, 5], 'col_right': [2, 2, 2, 2, 2], - 'col_conflict': [1, 2, 3, 4, 5]}) + df2 = DataFrame({'col1': [1, 2, 3, 4, 5], + 'col_conflict': [1, 2, 3, 4, 5], + 'col_right': [2, 2, 2, 2, 2]}) df2_copy = df2.copy() df_result = DataFrame({ @@ -654,46 +696,46 @@ def test_indicator(self): assert_frame_equal(test_custom_name, df_result_custom_name) # Check only accepts strings and booleans - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): merge(df1, df2, on='col1', how='outer', indicator=5) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.merge(df2, on='col1', how='outer', indicator=5) # Check result integrity test2 = merge(df1, df2, on='col1', how='left', indicator=True) - self.assertTrue((test2._merge != 'right_only').all()) + assert (test2._merge != 'right_only').all() test2 = df1.merge(df2, on='col1', how='left', indicator=True) - self.assertTrue((test2._merge != 'right_only').all()) + assert (test2._merge != 'right_only').all() test3 = merge(df1, df2, on='col1', how='right', indicator=True) - self.assertTrue((test3._merge != 'left_only').all()) + assert (test3._merge != 'left_only').all() test3 = df1.merge(df2, on='col1', how='right', indicator=True) - self.assertTrue((test3._merge != 'left_only').all()) + assert (test3._merge != 'left_only').all() test4 = merge(df1, df2, on='col1', how='inner', indicator=True) - self.assertTrue((test4._merge == 'both').all()) + assert (test4._merge == 'both').all() test4 = df1.merge(df2, on='col1', how='inner', indicator=True) - self.assertTrue((test4._merge == 'both').all()) + assert (test4._merge == 'both').all() # Check if working name in df for i in ['_right_indicator', '_left_indicator', '_merge']: df_badcolumn = DataFrame({'col1': [1, 2], i: [2, 2]}) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): merge(df1, df_badcolumn, on='col1', how='outer', indicator=True) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.merge(df_badcolumn, on='col1', how='outer', indicator=True) # Check for name conflict with custom name df_badcolumn = DataFrame( {'col1': [1, 2], 'custom_column_name': [2, 2]}) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): merge(df1, df_badcolumn, on='col1', how='outer', indicator='custom_column_name') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df1.merge(df_badcolumn, on='col1', how='outer', indicator='custom_column_name') @@ -715,6 +757,136 @@ def test_indicator(self): how='outer', indicator=True) assert_frame_equal(test5, hand_coded_result) + def test_validation(self): + left = DataFrame({'a': ['a', 'b', 'c', 'd'], + 'b': ['cat', 'dog', 'weasel', 'horse']}, + index=range(4)) + + right = DataFrame({'a': ['a', 'b', 'c', 'd', 'e'], + 'c': ['meow', 'bark', 'um... weasel noise?', + 'nay', 'chirp']}, + index=range(5)) + + # Make sure no side effects. + left_copy = left.copy() + right_copy = right.copy() + + result = merge(left, right, left_index=True, right_index=True, + validate='1:1') + assert_frame_equal(left, left_copy) + assert_frame_equal(right, right_copy) + + # make sure merge still correct + expected = DataFrame({'a_x': ['a', 'b', 'c', 'd'], + 'b': ['cat', 'dog', 'weasel', 'horse'], + 'a_y': ['a', 'b', 'c', 'd'], + 'c': ['meow', 'bark', 'um... weasel noise?', + 'nay']}, + index=range(4), + columns=['a_x', 'b', 'a_y', 'c']) + + result = merge(left, right, left_index=True, right_index=True, + validate='one_to_one') + assert_frame_equal(result, expected) + + expected_2 = DataFrame({'a': ['a', 'b', 'c', 'd'], + 'b': ['cat', 'dog', 'weasel', 'horse'], + 'c': ['meow', 'bark', 'um... weasel noise?', + 'nay']}, + index=range(4)) + + result = merge(left, right, on='a', validate='1:1') + assert_frame_equal(left, left_copy) + assert_frame_equal(right, right_copy) + assert_frame_equal(result, expected_2) + + result = merge(left, right, on='a', validate='one_to_one') + assert_frame_equal(result, expected_2) + + # One index, one column + expected_3 = DataFrame({'b': ['cat', 'dog', 'weasel', 'horse'], + 'a': ['a', 'b', 'c', 'd'], + 'c': ['meow', 'bark', 'um... weasel noise?', + 'nay']}, + columns=['b', 'a', 'c'], + index=range(4)) + + left_index_reset = left.set_index('a') + result = merge(left_index_reset, right, left_index=True, + right_on='a', validate='one_to_one') + assert_frame_equal(result, expected_3) + + # Dups on right + right_w_dups = right.append(pd.DataFrame({'a': ['e'], 'c': ['moo']}, + index=[4])) + merge(left, right_w_dups, left_index=True, right_index=True, + validate='one_to_many') + + with pytest.raises(MergeError): + merge(left, right_w_dups, left_index=True, right_index=True, + validate='one_to_one') + + with pytest.raises(MergeError): + merge(left, right_w_dups, on='a', validate='one_to_one') + + # Dups on left + left_w_dups = left.append(pd.DataFrame({'a': ['a'], 'c': ['cow']}, + index=[3])) + merge(left_w_dups, right, left_index=True, right_index=True, + validate='many_to_one') + + with pytest.raises(MergeError): + merge(left_w_dups, right, left_index=True, right_index=True, + validate='one_to_one') + + with pytest.raises(MergeError): + merge(left_w_dups, right, on='a', validate='one_to_one') + + # Dups on both + merge(left_w_dups, right_w_dups, on='a', validate='many_to_many') + + with pytest.raises(MergeError): + merge(left_w_dups, right_w_dups, left_index=True, + right_index=True, validate='many_to_one') + + with pytest.raises(MergeError): + merge(left_w_dups, right_w_dups, on='a', + validate='one_to_many') + + # Check invalid arguments + with pytest.raises(ValueError): + merge(left, right, on='a', validate='jibberish') + + # Two column merge, dups in both, but jointly no dups. + left = DataFrame({'a': ['a', 'a', 'b', 'b'], + 'b': [0, 1, 0, 1], + 'c': ['cat', 'dog', 'weasel', 'horse']}, + index=range(4)) + + right = DataFrame({'a': ['a', 'a', 'b'], + 'b': [0, 1, 0], + 'd': ['meow', 'bark', 'um... weasel noise?']}, + index=range(3)) + + expected_multi = DataFrame({'a': ['a', 'a', 'b'], + 'b': [0, 1, 0], + 'c': ['cat', 'dog', 'weasel'], + 'd': ['meow', 'bark', + 'um... weasel noise?']}, + index=range(3)) + + with pytest.raises(MergeError): + merge(left, right, on='a', validate='1:1') + + result = merge(left, right, on=['a', 'b'], validate='1:1') + assert_frame_equal(result, expected_multi) + + def test_merge_two_empty_df_no_division_error(self): + # GH17776, PR #17846 + a = pd.DataFrame({'a': [], 'b': [], 'c': []}) + with np.errstate(divide='raise'): + merge(a, a, on=('a', 'b')) + def _check_merge(x, y): for how in ['inner', 'left', 'outer']: @@ -728,9 +900,9 @@ def _check_merge(x, y): assert_frame_equal(result, expected, check_names=False) -class TestMergeMulti(tm.TestCase): +class TestMergeMulti(object): - def setUp(self): + def setup_method(self, method): self.index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], @@ -780,15 +952,15 @@ def run_asserts(left, right): for sort in [False, True]: res = left.join(right, on=icols, how='left', sort=sort) - self.assertTrue(len(left) < len(res) + 1) - self.assertFalse(res['4th'].isnull().any()) - self.assertFalse(res['5th'].isnull().any()) + assert len(left) < len(res) + 1 + assert not res['4th'].isna().any() + assert not res['5th'].isna().any() tm.assert_series_equal( res['4th'], - res['5th'], check_names=False) result = bind_cols(res.iloc[:, :-2]) tm.assert_series_equal(res['4th'], result, check_names=False) - self.assertTrue(result.name is None) + assert result.name is None if sort: tm.assert_frame_equal( @@ -1018,38 +1190,6 @@ def test_left_join_index_multi_match(self): expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) - def test_join_multi_dtypes(self): - - # test with multi dtypes in the join index - def _test(dtype1, dtype2): - left = DataFrame({'k1': np.array([0, 1, 2] * 8, dtype=dtype1), - 'k2': ['foo', 'bar'] * 12, - 'v': np.array(np.arange(24), dtype=np.int64)}) - - index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')]) - right = DataFrame( - {'v2': np.array([5, 7], dtype=dtype2)}, index=index) - - result = left.join(right, on=['k1', 'k2']) - - expected = left.copy() - - if dtype2.kind == 'i': - dtype2 = np.dtype('float64') - expected['v2'] = np.array(np.nan, dtype=dtype2) - expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'), 'v2'] = 5 - expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'), 'v2'] = 7 - - tm.assert_frame_equal(result, expected) - - result = left.join(right, on=['k1', 'k2'], sort=True) - expected.sort_values(['k1', 'k2'], kind='mergesort', inplace=True) - tm.assert_frame_equal(result, expected) - - for d1 in [np.int64, np.int32, np.int16, np.int8, np.uint8]: - for d2 in [np.int64, np.float64, np.float32, np.float16]: - _test(np.dtype(d1), np.dtype(d2)) - def test_left_merge_na_buglet(self): left = DataFrame({'id': list('abcde'), 'v1': randn(5), 'v2': randn(5), 'dummy': list('abcde'), @@ -1092,137 +1232,6 @@ def test_merge_na_keys(self): tm.assert_frame_equal(result, expected) - @slow - def test_int64_overflow_issues(self): - from itertools import product - from collections import defaultdict - from pandas.core.groupby import _int64_overflow_possible - - # #2690, combinatorial explosion - df1 = DataFrame(np.random.randn(1000, 7), - columns=list('ABCDEF') + ['G1']) - df2 = DataFrame(np.random.randn(1000, 7), - columns=list('ABCDEF') + ['G2']) - - # it works! - result = merge(df1, df2, how='outer') - self.assertTrue(len(result) == 2000) - - low, high, n = -1 << 10, 1 << 10, 1 << 20 - left = DataFrame(np.random.randint(low, high, (n, 7)), - columns=list('ABCDEFG')) - left['left'] = left.sum(axis=1) - - # one-2-one match - i = np.random.permutation(len(left)) - right = left.iloc[i].copy() - right.columns = right.columns[:-1].tolist() + ['right'] - right.index = np.arange(len(right)) - right['right'] *= -1 - - out = merge(left, right, how='outer') - self.assertEqual(len(out), len(left)) - assert_series_equal(out['left'], - out['right'], check_names=False) - result = out.iloc[:, :-2].sum(axis=1) - assert_series_equal(out['left'], result, check_names=False) - self.assertTrue(result.name is None) - - out.sort_values(out.columns.tolist(), inplace=True) - out.index = np.arange(len(out)) - for how in ['left', 'right', 'outer', 'inner']: - assert_frame_equal(out, merge(left, right, how=how, sort=True)) - - # check that left merge w/ sort=False maintains left frame order - out = merge(left, right, how='left', sort=False) - assert_frame_equal(left, out[left.columns.tolist()]) - - out = merge(right, left, how='left', sort=False) - assert_frame_equal(right, out[right.columns.tolist()]) - - # one-2-many/none match - n = 1 << 11 - left = DataFrame(np.random.randint(low, high, (n, 7)).astype('int64'), - columns=list('ABCDEFG')) - - # confirm that this is checking what it is supposed to check - shape = left.apply(Series.nunique).values - self.assertTrue(_int64_overflow_possible(shape)) - - # add duplicates to left frame - left = concat([left, left], ignore_index=True) - - right = DataFrame(np.random.randint(low, high, (n // 2, 7)) - .astype('int64'), - columns=list('ABCDEFG')) - - # add duplicates & overlap with left to the right frame - i = np.random.choice(len(left), n) - right = concat([right, right, left.iloc[i]], ignore_index=True) - - left['left'] = np.random.randn(len(left)) - right['right'] = np.random.randn(len(right)) - - # shuffle left & right frames - i = np.random.permutation(len(left)) - left = left.iloc[i].copy() - left.index = np.arange(len(left)) - - i = np.random.permutation(len(right)) - right = right.iloc[i].copy() - right.index = np.arange(len(right)) - - # manually compute outer merge - ldict, rdict = defaultdict(list), defaultdict(list) - - for idx, row in left.set_index(list('ABCDEFG')).iterrows(): - ldict[idx].append(row['left']) - - for idx, row in right.set_index(list('ABCDEFG')).iterrows(): - rdict[idx].append(row['right']) - - vals = [] - for k, lval in ldict.items(): - rval = rdict.get(k, [np.nan]) - for lv, rv in product(lval, rval): - vals.append(k + tuple([lv, rv])) - - for k, rval in rdict.items(): - if k not in ldict: - for rv in rval: - vals.append(k + tuple([np.nan, rv])) - - def align(df): - df = df.sort_values(df.columns.tolist()) - df.index = np.arange(len(df)) - return df - - def verify_order(df): - kcols = list('ABCDEFG') - assert_frame_equal(df[kcols].copy(), - df[kcols].sort_values(kcols, kind='mergesort')) - - out = DataFrame(vals, columns=list('ABCDEFG') + ['left', 'right']) - out = align(out) - - jmask = {'left': out['left'].notnull(), - 'right': out['right'].notnull(), - 'inner': out['left'].notnull() & out['right'].notnull(), - 'outer': np.ones(len(out), dtype='bool')} - - for how in 'left', 'right', 'outer', 'inner': - mask = jmask[how] - frame = align(out[mask].copy()) - self.assertTrue(mask.all() ^ mask.any() or how == 'outer') - - for sort in [False, True]: - res = merge(left, right, how=how, sort=sort) - if sort: - verify_order(res) - - # as in GH9092 dtypes break with outer/right join - assert_frame_equal(frame, align(res), - check_dtype=how not in ('right', 'outer')) - def test_join_multi_levels(self): # GH 3662 @@ -1290,14 +1299,14 @@ def test_join_multi_levels(self): def f(): household.join(portfolio, how='inner') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) portfolio2 = portfolio.copy() portfolio2.index.set_names(['household_id', 'foo']) def f(): portfolio2.join(portfolio, how='inner') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_join_multi_levels2(self): @@ -1336,10 +1345,6 @@ def test_join_multi_levels2(self): .set_index(["household_id", "asset_id", "t"]) .reindex(columns=['share', 'log_return'])) - def f(): - household.join(log_return, how='inner') - self.assertRaises(NotImplementedError, f) - # this is the equivalency result = (merge(household.reset_index(), log_return.reset_index(), on=['asset_id'], how='inner') @@ -1349,7 +1354,7 @@ def f(): expected = ( DataFrame(dict( household_id=[1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4], - asset_id=["nl0000301109", "nl0000289783", "gb00b03mlx29", + asset_id=["nl0000301109", "nl0000301109", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "gb00b03mlx29", "lu0197800237", "lu0197800237", @@ -1362,8 +1367,569 @@ def f(): .09604978, -.06524096, .03532373, .03025441, .036997, None, None] )) - .set_index(["household_id", "asset_id", "t"])) + .set_index(["household_id", "asset_id", "t"]) + .reindex(columns=['share', 'log_return'])) + + result = (merge(household.reset_index(), log_return.reset_index(), + on=['asset_id'], how='outer') + .set_index(['household_id', 'asset_id', 't'])) + + assert_frame_equal(result, expected) + + +@pytest.fixture +def left_multi(): + return ( + DataFrame( + dict(Origin=['A', 'A', 'B', 'B', 'C'], + Destination=['A', 'B', 'A', 'C', 'A'], + Period=['AM', 'PM', 'IP', 'AM', 'OP'], + TripPurp=['hbw', 'nhb', 'hbo', 'nhb', 'hbw'], + Trips=[1987, 3647, 2470, 4296, 4444]), + columns=['Origin', 'Destination', 'Period', + 'TripPurp', 'Trips']) + .set_index(['Origin', 'Destination', 'Period', 'TripPurp'])) + + +@pytest.fixture +def right_multi(): + return ( + DataFrame( + dict(Origin=['A', 'A', 'B', 'B', 'C', 'C', 'E'], + Destination=['A', 'B', 'A', 'B', 'A', 'B', 'F'], + Period=['AM', 'PM', 'IP', 'AM', 'OP', 'IP', 'AM'], + LinkType=['a', 'a', 'c', 'b', 'a', 'b', 'a'], + Distance=[100, 80, 90, 80, 75, 35, 55]), + columns=['Origin', 'Destination', 'Period', + 'LinkType', 'Distance']) + .set_index(['Origin', 'Destination', 'Period', 'LinkType'])) + + +@pytest.fixture +def on_cols(): + return ['Origin', 'Destination', 'Period'] + + +@pytest.fixture +def idx_cols(): + return ['Origin', 'Destination', 'Period', 'TripPurp', 'LinkType'] + + +class TestJoinMultiMulti(object): + + @pytest.mark.parametrize('how', ['left', 'right', 'inner', 'outer']) + def test_join_multi_multi(self, left_multi, right_multi, how, + on_cols, idx_cols): + # Multi-index join tests + expected = (pd.merge(left_multi.reset_index(), + right_multi.reset_index(), + how=how, on=on_cols).set_index(idx_cols) + .sort_index()) + + result = left_multi.join(right_multi, how=how).sort_index() + tm.assert_frame_equal(result, expected) + + """ + @pytest.mark.parametrize('how', ['left', 'right', 'inner', 'outer']) + def test_join_multi_multi_emptylevel(self, left_multi, right_multi, how, + on_cols, idx_cols): + # Join with empty level + num_lvls = len(right_multi.index.get_level_values('Period')) + # Set one level to None + right_multi.index.set_levels([np.nan] * num_lvls, level='Period', + inplace=True) + + expected = (pd.merge(left_multi.reset_index(), + right_multi.reset_index(), + how=how, on=on_cols).set_index(idx_cols) + .sort_index()) + + result = left_multi.join(right_multi, how=how).sort_index() + tm.assert_frame_equal(result, expected) + """ + + @pytest.mark.parametrize('how', ['left', 'right', 'inner', 'outer']) + def test_join_multi_empty_frames(self, left_multi, right_multi, how, + on_cols, idx_cols): + + left_multi = left_multi.drop(columns=left_multi.columns) + right_multi = right_multi.drop(columns=right_multi.columns) + + expected = (pd.merge(left_multi.reset_index(), + right_multi.reset_index(), + how=how, on=on_cols).set_index(idx_cols) + .sort_index()) + + result = left_multi.join(right_multi, how=how).sort_index() + tm.assert_frame_equal(result, expected) + + def test_join_multi_multi_nonunique(self, left_multi): + # Non-unique resulting index + right_multi = ( + DataFrame( + dict(Origin=[1, 1, 2], + Destination=[1, 1, 1], + Period=['AM', 'AM', 'PM'], + LinkType=['a', 'b', 'a'], + Distance=[100, 110, 120]), + columns=['Origin', 'Destination', 'Period', + 'LinkType', 'Distance']) + .set_index(['Origin', 'Destination', 'Period', 'LinkType'])) def f(): - household.join(log_return, how='outer') - self.assertRaises(NotImplementedError, f) + left_multi.join(right_multi, how='left') + pytest.raises(ValueError, f) + + @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index]) + def test_merge_datetime_index(self, klass): + # see gh-19038 + df = DataFrame([1, 2, 3], + ["2016-01-01", "2017-01-01", "2018-01-01"], + columns=["a"]) + df.index = pd.to_datetime(df.index) + on_vector = df.index.year + + if klass is not None: + on_vector = klass(on_vector) + + expected = DataFrame({"a": [1, 2, 3]}) + + if klass == np.asarray: + # The join key is added for ndarray. + expected["key_1"] = [2016, 2017, 2018] + + result = df.merge(df, on=["a", on_vector], how="inner") + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"a_x": [1, 2, 3], + "a_y": [1, 2, 3]}) + result = df.merge(df, on=[df.index.year], how="inner") + tm.assert_frame_equal(result, expected) + + +class TestMergeDtypes(object): + + @pytest.mark.parametrize('right_vals', [ + ['foo', 'bar'], + Series(['foo', 'bar']).astype('category'), + [1, 2], + [1.0, 2.0], + Series([1, 2], dtype='uint64'), + Series([1, 2], dtype='int32') + ] + ) + def test_different(self, right_vals): + + left = DataFrame({'A': ['foo', 'bar'], + 'B': Series(['foo', 'bar']).astype('category'), + 'C': [1, 2], + 'D': [1.0, 2.0], + 'E': Series([1, 2], dtype='uint64'), + 'F': Series([1, 2], dtype='int32')}) + right = DataFrame({'A': right_vals}) + + # GH 9780 + # We allow merging on object and categorical cols and cast + # categorical cols to object + if (is_categorical_dtype(right['A'].dtype) or + is_object_dtype(right['A'].dtype)): + result = pd.merge(left, right, on='A') + assert is_object_dtype(result.A.dtype) + + # GH 9780 + # We raise for merging on object col and int/float col and + # merging on categorical col and int/float col + else: + msg = ("You are trying to merge on " + "{lk_dtype} and {rk_dtype} columns. " + "If you wish to proceed you should use " + "pd.concat".format(lk_dtype=left['A'].dtype, + rk_dtype=right['A'].dtype)) + with tm.assert_raises_regex(ValueError, msg): + pd.merge(left, right, on='A') + + @pytest.mark.parametrize('d1', [np.int64, np.int32, + np.int16, np.int8, np.uint8]) + @pytest.mark.parametrize('d2', [np.int64, np.float64, + np.float32, np.float16]) + def test_join_multi_dtypes(self, d1, d2): + + dtype1 = np.dtype(d1) + dtype2 = np.dtype(d2) + + left = DataFrame({'k1': np.array([0, 1, 2] * 8, dtype=dtype1), + 'k2': ['foo', 'bar'] * 12, + 'v': np.array(np.arange(24), dtype=np.int64)}) + + index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')]) + right = DataFrame({'v2': np.array([5, 7], dtype=dtype2)}, index=index) + + result = left.join(right, on=['k1', 'k2']) + + expected = left.copy() + + if dtype2.kind == 'i': + dtype2 = np.dtype('float64') + expected['v2'] = np.array(np.nan, dtype=dtype2) + expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'), 'v2'] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'), 'v2'] = 7 + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=['k1', 'k2'], sort=True) + expected.sort_values(['k1', 'k2'], kind='mergesort', inplace=True) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('int_vals, float_vals, exp_vals', [ + ([1, 2, 3], [1.0, 2.0, 3.0], {'X': [1, 2, 3], 'Y': [1.0, 2.0, 3.0]}), + ([1, 2, 3], [1.0, 3.0], {'X': [1, 3], 'Y': [1.0, 3.0]}), + ([1, 2], [1.0, 2.0, 3.0], {'X': [1, 2], 'Y': [1.0, 2.0]}), + ]) + def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals): + # GH 16572 + # Check that float column is not cast to object if + # merging on float and int columns + A = DataFrame({'X': int_vals}) + B = DataFrame({'Y': float_vals}) + expected = DataFrame(exp_vals) + + result = A.merge(B, left_on='X', right_on='Y') + assert_frame_equal(result, expected) + + result = B.merge(A, left_on='Y', right_on='X') + assert_frame_equal(result, expected[['Y', 'X']]) + + def test_merge_on_ints_floats_warning(self): + # GH 16572 + # merge will produce a warning when merging on int and + # float columns where the float values are not exactly + # equal to their int representation + A = DataFrame({'X': [1, 2, 3]}) + B = DataFrame({'Y': [1.1, 2.5, 3.0]}) + expected = DataFrame({'X': [3], 'Y': [3.0]}) + + with tm.assert_produces_warning(UserWarning): + result = A.merge(B, left_on='X', right_on='Y') + assert_frame_equal(result, expected) + + with tm.assert_produces_warning(UserWarning): + result = B.merge(A, left_on='Y', right_on='X') + assert_frame_equal(result, expected[['Y', 'X']]) + + @pytest.mark.parametrize('df1_vals, df2_vals', [ + ([0, 1, 2], ["0", "1", "2"]), + ([0.0, 1.0, 2.0], ["0", "1", "2"]), + ([0, 1, 2], [u"0", u"1", u"2"]), + (pd.date_range('1/1/2011', periods=2, freq='D'), ['2011-01-01', + '2011-01-02']), + (pd.date_range('1/1/2011', periods=2, freq='D'), [0, 1]), + (pd.date_range('1/1/2011', periods=2, freq='D'), [0.0, 1.0]), + (pd.date_range('20130101', periods=3), + pd.date_range('20130101', periods=3, tz='US/Eastern')), + ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), + ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), + ]) + def test_merge_incompat_dtypes(self, df1_vals, df2_vals): + # GH 9780, GH 15800 + # Raise a ValueError when a user tries to merge on + # dtypes that are incompatible (e.g., obj and int/float) + + df1 = DataFrame({'A': df1_vals}) + df2 = DataFrame({'A': df2_vals}) + + msg = ("You are trying to merge on {lk_dtype} and " + "{rk_dtype} columns. If you wish to proceed " + "you should use pd.concat".format(lk_dtype=df1['A'].dtype, + rk_dtype=df2['A'].dtype)) + msg = re.escape(msg) + with tm.assert_raises_regex(ValueError, msg): + pd.merge(df1, df2, on=['A']) + + # Check that error still raised when swapping order of dataframes + msg = ("You are trying to merge on {lk_dtype} and " + "{rk_dtype} columns. If you wish to proceed " + "you should use pd.concat".format(lk_dtype=df2['A'].dtype, + rk_dtype=df1['A'].dtype)) + msg = re.escape(msg) + with tm.assert_raises_regex(ValueError, msg): + pd.merge(df2, df1, on=['A']) + + +@pytest.fixture +def left(): + np.random.seed(1234) + return DataFrame( + {'X': Series(np.random.choice( + ['foo', 'bar'], + size=(10,))).astype(CDT(['foo', 'bar'])), + 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) + + +@pytest.fixture +def right(): + np.random.seed(1234) + return DataFrame( + {'X': Series(['foo', 'bar']).astype(CDT(['foo', 'bar'])), + 'Z': [1, 2]}) + + +class TestMergeCategorical(object): + + def test_identical(self, left): + # merging on the same, should preserve dtypes + merged = pd.merge(left, left, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + np.dtype('O')], + index=['X', 'Y_x', 'Y_y']) + assert_series_equal(result, expected) + + def test_basic(self, left, right): + # we have matching Categorical dtypes in X + # so should preserve the merged column + merged = pd.merge(left, right, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + np.dtype('int64')], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) + + def test_merge_categorical(self): + # GH 9426 + + right = DataFrame({'c': {0: 'a', + 1: 'b', + 2: 'c', + 3: 'd', + 4: 'e'}, + 'd': {0: 'null', + 1: 'null', + 2: 'null', + 3: 'null', + 4: 'null'}}) + left = DataFrame({'a': {0: 'f', + 1: 'f', + 2: 'f', + 3: 'f', + 4: 'f'}, + 'b': {0: 'g', + 1: 'g', + 2: 'g', + 3: 'g', + 4: 'g'}}) + df = pd.merge(left, right, how='left', left_on='b', right_on='c') + + # object-object + expected = df.copy() + + # object-cat + # note that we propagate the category + # because we don't have any matching rows + cright = right.copy() + cright['d'] = cright['d'].astype('category') + result = pd.merge(left, cright, how='left', left_on='b', right_on='c') + expected['d'] = expected['d'].astype(CategoricalDtype(['null'])) + tm.assert_frame_equal(result, expected) + + # cat-object + cleft = left.copy() + cleft['b'] = cleft['b'].astype('category') + result = pd.merge(cleft, cright, how='left', left_on='b', right_on='c') + tm.assert_frame_equal(result, expected) + + # cat-cat + cright = right.copy() + cright['d'] = cright['d'].astype('category') + cleft = left.copy() + cleft['b'] = cleft['b'].astype('category') + result = pd.merge(cleft, cright, how='left', left_on='b', right_on='c') + tm.assert_frame_equal(result, expected) + + def tests_merge_categorical_unordered_equal(self): + # GH-19551 + df1 = DataFrame({ + 'Foo': Categorical(['A', 'B', 'C'], categories=['A', 'B', 'C']), + 'Left': ['A0', 'B0', 'C0'], + }) + + df2 = DataFrame({ + 'Foo': Categorical(['C', 'B', 'A'], categories=['C', 'B', 'A']), + 'Right': ['C1', 'B1', 'A1'], + }) + result = pd.merge(df1, df2, on=['Foo']) + expected = DataFrame({ + 'Foo': pd.Categorical(['A', 'B', 'C']), + 'Left': ['A0', 'B0', 'C0'], + 'Right': ['A1', 'B1', 'C1'], + }) + assert_frame_equal(result, expected) + + def test_other_columns(self, left, right): + # non-merge columns should preserve if possible + right = right.assign(Z=right.Z.astype('category')) + + merged = pd.merge(left, right, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + CategoricalDtype()], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) + + # categories are preserved + assert left.X.values.is_dtype_equal(merged.X.values) + assert right.Z.values.is_dtype_equal(merged.Z.values) + + @pytest.mark.parametrize( + 'change', [lambda x: x, + lambda x: x.astype(CDT(['foo', 'bar', 'bah'])), + lambda x: x.astype(CDT(ordered=True))]) + @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) + def test_dtype_on_merged_different(self, change, how, left, right): + # our merging columns, X now has 2 different dtypes + # so we must be object as a result + + X = change(right.X.astype('object')) + right = right.assign(X=X) + assert is_categorical_dtype(left.X.values) + # assert not left.X.values.is_dtype_equal(right.X.values) + + merged = pd.merge(left, right, on='X', how=how) + + result = merged.dtypes.sort_index() + expected = Series([np.dtype('O'), + np.dtype('O'), + np.dtype('int64')], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) + + def test_self_join_multiple_categories(self): + # GH 16767 + # non-duplicates should work with multiple categories + m = 5 + df = pd.DataFrame({ + 'a': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] * m, + 'b': ['t', 'w', 'x', 'y', 'z'] * 2 * m, + 'c': [letter + for each in ['m', 'n', 'u', 'p', 'o'] + for letter in [each] * 2 * m], + 'd': [letter + for each in ['aa', 'bb', 'cc', 'dd', 'ee', + 'ff', 'gg', 'hh', 'ii', 'jj'] + for letter in [each] * m]}) + + # change them all to categorical variables + df = df.apply(lambda x: x.astype('category')) + + # self-join should equal ourselves + result = pd.merge(df, df, on=list(df.columns)) + + assert_frame_equal(result, df) + + def test_dtype_on_categorical_dates(self): + # GH 16900 + # dates should not be coerced to ints + + df = pd.DataFrame( + [[date(2001, 1, 1), 1.1], + [date(2001, 1, 2), 1.3]], + columns=['date', 'num2'] + ) + df['date'] = df['date'].astype('category') + + df2 = pd.DataFrame( + [[date(2001, 1, 1), 1.3], + [date(2001, 1, 3), 1.4]], + columns=['date', 'num4'] + ) + df2['date'] = df2['date'].astype('category') + + expected_outer = pd.DataFrame([ + [pd.Timestamp('2001-01-01'), 1.1, 1.3], + [pd.Timestamp('2001-01-02'), 1.3, np.nan], + [pd.Timestamp('2001-01-03'), np.nan, 1.4]], + columns=['date', 'num2', 'num4'] + ) + result_outer = pd.merge(df, df2, how='outer', on=['date']) + assert_frame_equal(result_outer, expected_outer) + + expected_inner = pd.DataFrame( + [[pd.Timestamp('2001-01-01'), 1.1, 1.3]], + columns=['date', 'num2', 'num4'] + ) + result_inner = pd.merge(df, df2, how='inner', on=['date']) + assert_frame_equal(result_inner, expected_inner) + + @pytest.mark.parametrize('ordered', [True, False]) + @pytest.mark.parametrize('category_column,categories,expected_categories', + [([False, True, True, False], [True, False], + [True, False]), + ([2, 1, 1, 2], [1, 2], [1, 2]), + (['False', 'True', 'True', 'False'], + ['True', 'False'], ['True', 'False'])]) + def test_merging_with_bool_or_int_cateorical_column(self, category_column, + categories, + expected_categories, + ordered): + # GH 17187 + # merging with a boolean/int categorical column + df1 = pd.DataFrame({'id': [1, 2, 3, 4], + 'cat': category_column}) + df1['cat'] = df1['cat'].astype(CDT(categories, ordered=ordered)) + df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]}) + result = df1.merge(df2) + expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories, + 'num': [1, 9]}) + expected['cat'] = expected['cat'].astype( + CDT(categories, ordered=ordered)) + assert_frame_equal(expected, result) + + +@pytest.fixture +def left_df(): + return DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right_df(): + return DataFrame({'b': [300, 100, 200]}, index=[3, 1, 2]) + + +class TestMergeOnIndexes(object): + + @pytest.mark.parametrize( + "how, sort, expected", + [('inner', False, DataFrame({'a': [20, 10], + 'b': [200, 100]}, + index=[2, 1])), + ('inner', True, DataFrame({'a': [10, 20], + 'b': [100, 200]}, + index=[1, 2])), + ('left', False, DataFrame({'a': [20, 10, 0], + 'b': [200, 100, np.nan]}, + index=[2, 1, 0])), + ('left', True, DataFrame({'a': [0, 10, 20], + 'b': [np.nan, 100, 200]}, + index=[0, 1, 2])), + ('right', False, DataFrame({'a': [np.nan, 10, 20], + 'b': [300, 100, 200]}, + index=[3, 1, 2])), + ('right', True, DataFrame({'a': [10, 20, np.nan], + 'b': [100, 200, 300]}, + index=[1, 2, 3])), + ('outer', False, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3])), + ('outer', True, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3]))]) + def test_merge_on_indexes(self, left_df, right_df, how, sort, expected): + + result = pd.merge(left_df, right_df, + left_index=True, + right_index=True, + how=how, + sort=sort) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tools/tests/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py similarity index 89% rename from pandas/tools/tests/test_merge_asof.py rename to pandas/tests/reshape/merge/test_merge_asof.py index 76798b3c895ea..cebbcc41c3e17 100644 --- a/pandas/tools/tests/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1,16 +1,17 @@ import os +import pytest import pytz import numpy as np import pandas as pd from pandas import (merge_asof, read_csv, to_datetime, Timedelta) -from pandas.tools.merge import MergeError +from pandas.core.reshape.merge import MergeError from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal -class TestAsOfMerge(tm.TestCase): +class TestAsOfMerge(object): def read_data(self, name, dedupe=False): path = os.path.join(tm.get_data_path(), name) @@ -22,7 +23,7 @@ def read_data(self, name, dedupe=False): x.time = to_datetime(x.time) return x - def setUp(self): + def setup_method(self, method): self.trades = self.read_data('trades.csv') self.quotes = self.read_data('quotes.csv', dedupe=True) @@ -91,11 +92,30 @@ def test_examples2(self): by='ticker', tolerance=pd.Timedelta('2ms')) - pd.merge_asof(trades, quotes, - on='time', - by='ticker', - tolerance=pd.Timedelta('10ms'), - allow_exact_matches=False) + expected = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.038', + '20160525 13:30:00.048', + '20160525 13:30:00.048', + '20160525 13:30:00.048']), + 'ticker': ['MSFT', 'MSFT', 'GOOG', 'GOOG', 'AAPL'], + 'price': [51.95, 51.95, + 720.77, 720.92, 98.00], + 'quantity': [75, 155, + 100, 100, 100], + 'bid': [np.nan, 51.97, np.nan, + np.nan, np.nan], + 'ask': [np.nan, 51.98, np.nan, + np.nan, np.nan]}, + columns=['time', 'ticker', 'price', 'quantity', + 'bid', 'ask']) + + result = pd.merge_asof(trades, quotes, + on='time', + by='ticker', + tolerance=pd.Timedelta('10ms'), + allow_exact_matches=False) + assert_frame_equal(result, expected) def test_examples3(self): """ doc-string examples """ @@ -147,6 +167,7 @@ def test_basic_categorical(self): trades.ticker = trades.ticker.astype('category') quotes = self.quotes.copy() quotes.ticker = quotes.ticker.astype('category') + expected.ticker = expected.ticker.astype('category') result = merge_asof(trades, quotes, on='time', @@ -199,14 +220,14 @@ def test_multi_index(self): # MultiIndex is prohibited trades = self.trades.set_index(['time', 'price']) quotes = self.quotes.set_index('time') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) trades = self.trades.set_index('time') quotes = self.quotes.set_index(['time', 'bid']) - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) @@ -216,7 +237,7 @@ def test_on_and_index(self): # 'on' parameter and index together is prohibited trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, left_on='price', left_index=True, @@ -224,7 +245,7 @@ def test_on_and_index(self): trades = self.trades.set_index('time') quotes = self.quotes.set_index('time') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, right_on='bid', left_index=True, @@ -367,6 +388,41 @@ def test_multiby_heterogeneous_types(self): by=['ticker', 'exch']) assert_frame_equal(result, expected) + def test_multiby_indexed(self): + # GH15676 + left = pd.DataFrame([ + [pd.to_datetime('20160602'), 1, 'a'], + [pd.to_datetime('20160602'), 2, 'a'], + [pd.to_datetime('20160603'), 1, 'b'], + [pd.to_datetime('20160603'), 2, 'b']], + columns=['time', 'k1', 'k2']).set_index('time') + + right = pd.DataFrame([ + [pd.to_datetime('20160502'), 1, 'a', 1.0], + [pd.to_datetime('20160502'), 2, 'a', 2.0], + [pd.to_datetime('20160503'), 1, 'b', 3.0], + [pd.to_datetime('20160503'), 2, 'b', 4.0]], + columns=['time', 'k1', 'k2', 'value']).set_index('time') + + expected = pd.DataFrame([ + [pd.to_datetime('20160602'), 1, 'a', 1.0], + [pd.to_datetime('20160602'), 2, 'a', 2.0], + [pd.to_datetime('20160603'), 1, 'b', 3.0], + [pd.to_datetime('20160603'), 2, 'b', 4.0]], + columns=['time', 'k1', 'k2', 'value']).set_index('time') + + result = pd.merge_asof(left, + right, + left_index=True, + right_index=True, + by=['k1', 'k2']) + + assert_frame_equal(expected, result) + + with pytest.raises(MergeError): + pd.merge_asof(left, right, left_index=True, right_index=True, + left_by=['k1', 'k2'], right_by=['k1']) + def test_basic2(self): expected = self.read_data('asof2.csv') @@ -396,18 +452,18 @@ def test_valid_join_keys(self): trades = self.trades quotes = self.quotes - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, left_on='time', right_on='bid', by='ticker') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, on=['time', 'ticker'], by='ticker') - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, by='ticker') @@ -438,7 +494,7 @@ def test_valid_allow_exact_matches(self): trades = self.trades quotes = self.quotes - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, on='time', by='ticker', @@ -462,27 +518,27 @@ def test_valid_tolerance(self): tolerance=1) # incompat - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, on='time', by='ticker', tolerance=1) # invalid - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades.reset_index(), quotes.reset_index(), on='index', by='ticker', tolerance=1.0) # invalid negative - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades, quotes, on='time', by='ticker', tolerance=-Timedelta('1s')) - with self.assertRaises(MergeError): + with pytest.raises(MergeError): merge_asof(trades.reset_index(), quotes.reset_index(), on='index', by='ticker', @@ -494,24 +550,24 @@ def test_non_sorted(self): quotes = self.quotes.sort_values('time', ascending=False) # we require that we are already sorted on time & quotes - self.assertFalse(trades.time.is_monotonic) - self.assertFalse(quotes.time.is_monotonic) - with self.assertRaises(ValueError): + assert not trades.time.is_monotonic + assert not quotes.time.is_monotonic + with pytest.raises(ValueError): merge_asof(trades, quotes, on='time', by='ticker') trades = self.trades.sort_values('time') - self.assertTrue(trades.time.is_monotonic) - self.assertFalse(quotes.time.is_monotonic) - with self.assertRaises(ValueError): + assert trades.time.is_monotonic + assert not quotes.time.is_monotonic + with pytest.raises(ValueError): merge_asof(trades, quotes, on='time', by='ticker') quotes = self.quotes.sort_values('time') - self.assertTrue(trades.time.is_monotonic) - self.assertTrue(quotes.time.is_monotonic) + assert trades.time.is_monotonic + assert quotes.time.is_monotonic # ok, though has dupes merge_asof(trades, self.quotes, @@ -855,7 +911,7 @@ def test_on_specialized_type(self): df1 = df1.sort_values('value').reset_index(drop=True) if dtype == np.float16: - with self.assertRaises(MergeError): + with pytest.raises(MergeError): pd.merge_asof(df1, df2, on='value') continue @@ -892,7 +948,7 @@ def test_on_specialized_type_by_int(self): df1 = df1.sort_values('value').reset_index(drop=True) if dtype == np.float16: - with self.assertRaises(MergeError): + with pytest.raises(MergeError): pd.merge_asof(df1, df2, on='value', by='key') else: result = pd.merge_asof(df1, df2, on='value', by='key') @@ -936,3 +992,15 @@ def test_on_float_by_int(self): columns=['symbol', 'exch', 'price', 'mpv']) assert_frame_equal(result, expected) + + def test_merge_datatype_error(self): + """ Tests merge datatype mismatch error """ + msg = r'merge keys \[0\] object and int64, must be the same type' + + left = pd.DataFrame({'left_val': [1, 5, 10], + 'a': ['a', 'b', 'c']}) + right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7], + 'a': [1, 2, 3, 6, 7]}) + + with tm.assert_raises_regex(MergeError, msg): + merge_asof(left, right, on='a') diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py new file mode 100644 index 0000000000000..09109e2692a24 --- /dev/null +++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -0,0 +1,215 @@ +import numpy as np +import pytest + +from pandas import DataFrame +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal + + +@pytest.fixture +def df1(): + return DataFrame(dict( + outer=[1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4], + inner=[1, 2, 3, 1, 2, 3, 4, 1, 2, 1, 2], + v1=np.linspace(0, 1, 11))) + + +@pytest.fixture +def df2(): + return DataFrame(dict( + outer=[1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3], + inner=[1, 2, 2, 3, 3, 4, 2, 3, 1, 1, 2, 3], + v2=np.linspace(10, 11, 12))) + + +@pytest.fixture(params=[[], ['outer'], ['outer', 'inner']]) +def left_df(request, df1): + """ Construct left test DataFrame with specified levels + (any of 'outer', 'inner', and 'v1')""" + levels = request.param + if levels: + df1 = df1.set_index(levels) + + return df1 + + +@pytest.fixture(params=[[], ['outer'], ['outer', 'inner']]) +def right_df(request, df2): + """ Construct right test DataFrame with specified levels + (any of 'outer', 'inner', and 'v2')""" + levels = request.param + + if levels: + df2 = df2.set_index(levels) + + return df2 + + +def compute_expected(df_left, df_right, + on=None, left_on=None, right_on=None, how=None): + """ + Compute the expected merge result for the test case. + + This method computes the expected result of merging two DataFrames on + a combination of their columns and index levels. It does so by + explicitly dropping/resetting their named index levels, performing a + merge on their columns, and then finally restoring the appropriate + index in the result. + + Parameters + ---------- + df_left : DataFrame + The left DataFrame (may have zero or more named index levels) + df_right : DataFrame + The right DataFrame (may have zero or more named index levels) + on : list of str + The on parameter to the merge operation + left_on : list of str + The left_on parameter to the merge operation + right_on : list of str + The right_on parameter to the merge operation + how : str + The how parameter to the merge operation + + Returns + ------- + DataFrame + The expected merge result + """ + + # Handle on param if specified + if on is not None: + left_on, right_on = on, on + + # Compute input named index levels + left_levels = [n for n in df_left.index.names if n is not None] + right_levels = [n for n in df_right.index.names if n is not None] + + # Compute output named index levels + output_levels = [i for i in left_on + if i in right_levels and i in left_levels] + + # Drop index levels that aren't involved in the merge + drop_left = [n for n in left_levels if n not in left_on] + if drop_left: + df_left = df_left.reset_index(drop_left, drop=True) + + drop_right = [n for n in right_levels if n not in right_on] + if drop_right: + df_right = df_right.reset_index(drop_right, drop=True) + + # Convert remaining index levels to columns + reset_left = [n for n in left_levels if n in left_on] + if reset_left: + df_left = df_left.reset_index(level=reset_left) + + reset_right = [n for n in right_levels if n in right_on] + if reset_right: + df_right = df_right.reset_index(level=reset_right) + + # Perform merge + expected = df_left.merge(df_right, + left_on=left_on, + right_on=right_on, + how=how) + + # Restore index levels + if output_levels: + expected = expected.set_index(output_levels) + + return expected + + +@pytest.mark.parametrize('on,how', + [(['outer'], 'inner'), + (['inner'], 'left'), + (['outer', 'inner'], 'right'), + (['inner', 'outer'], 'outer')]) +def test_merge_indexes_and_columns_on(left_df, right_df, on, how): + + # Construct expected result + expected = compute_expected(left_df, right_df, on=on, how=how) + + # Perform merge + result = left_df.merge(right_df, on=on, how=how) + assert_frame_equal(result, expected, check_like=True) + + +@pytest.mark.parametrize('left_on,right_on,how', + [(['outer'], ['outer'], 'inner'), + (['inner'], ['inner'], 'right'), + (['outer', 'inner'], ['outer', 'inner'], 'left'), + (['inner', 'outer'], ['inner', 'outer'], 'outer')]) +def test_merge_indexes_and_columns_lefton_righton( + left_df, right_df, left_on, right_on, how): + + # Construct expected result + expected = compute_expected(left_df, right_df, + left_on=left_on, + right_on=right_on, + how=how) + + # Perform merge + result = left_df.merge(right_df, + left_on=left_on, right_on=right_on, how=how) + assert_frame_equal(result, expected, check_like=True) + + +@pytest.mark.parametrize('left_index', + ['inner', ['inner', 'outer']]) +@pytest.mark.parametrize('how', + ['inner', 'left', 'right', 'outer']) +def test_join_indexes_and_columns_on(df1, df2, left_index, how): + + # Construct left_df + left_df = df1.set_index(left_index) + + # Construct right_df + right_df = df2.set_index(['outer', 'inner']) + + # Result + expected = (left_df.reset_index() + .join(right_df, on=['outer', 'inner'], how=how, + lsuffix='_x', rsuffix='_y') + .set_index(left_index)) + + # Perform join + result = left_df.join(right_df, on=['outer', 'inner'], how=how, + lsuffix='_x', rsuffix='_y') + + assert_frame_equal(result, expected, check_like=True) + + +def test_merge_index_column_precedence(df1, df2): + + # Construct left_df with both an index and a column named 'outer'. + # We make this 'outer' column equal to the 'inner' column so that we + # can verify that the correct values are used by the merge operation + left_df = df1.set_index('outer') + left_df['outer'] = left_df['inner'] + + # Construct right_df with an index level named 'outer' + right_df = df2.set_index('outer') + + # Construct expected result. + # The 'outer' column from left_df is chosen and the resulting + # frame has no index levels + expected = (left_df.reset_index(level='outer', drop=True) + .merge(right_df.reset_index(), on=['outer', 'inner'])) + + # Merge left_df and right_df on 'outer' and 'inner' + # 'outer' for left_df should refer to the 'outer' column, not the + # 'outer' index level and a FutureWarning should be raised + with tm.assert_produces_warning(FutureWarning): + result = left_df.merge(right_df, on=['outer', 'inner']) + + # Check results + assert_frame_equal(result, expected) + + # Perform the same using the left_on and right_on parameters + with tm.assert_produces_warning(FutureWarning): + result = left_df.merge(right_df, + left_on=['outer', 'inner'], + right_on=['outer', 'inner']) + + assert_frame_equal(result, expected) diff --git a/pandas/tools/tests/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py similarity index 73% rename from pandas/tools/tests/test_merge_ordered.py rename to pandas/tests/reshape/merge/test_merge_ordered.py index e4a41ea9a28eb..42d8eb7273ee1 100644 --- a/pandas/tools/tests/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -6,22 +6,15 @@ from numpy import nan -class TestOrderedMerge(tm.TestCase): +class TestMergeOrdered(object): - def setUp(self): + def setup_method(self, method): self.left = DataFrame({'key': ['a', 'c', 'e'], 'lvalue': [1, 2., 3]}) self.right = DataFrame({'key': ['b', 'c', 'd', 'f'], 'rvalue': [1, 2, 3., 4]}) - def test_deprecation(self): - - with tm.assert_produces_warning(FutureWarning): - pd.ordered_merge(self.left, self.right, on='key') - - # GH #813 - def test_basic(self): result = merge_ordered(self.left, self.right, on='key') expected = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f'], @@ -57,7 +50,7 @@ def test_multigroup(self): assert_frame_equal(result, result2.loc[:, result.columns]) result = merge_ordered(left, self.right, on='key', left_by='group') - self.assertTrue(result['group'].notnull().all()) + assert result['group'].notna().all() def test_merge_type(self): class NotADataFrame(DataFrame): @@ -69,7 +62,7 @@ def _constructor(self): nad = NotADataFrame(self.left) result = nad.merge(self.right, on='key') - tm.assertIsInstance(result, NotADataFrame) + assert isinstance(result, NotADataFrame) def test_empty_sequence_concat(self): # GH 9157 @@ -83,8 +76,27 @@ def test_empty_sequence_concat(self): ([None, None], none_pat) ] for df_seq, pattern in test_cases: - tm.assertRaisesRegexp(ValueError, pattern, pd.concat, df_seq) + tm.assert_raises_regex(ValueError, pattern, pd.concat, df_seq) pd.concat([pd.DataFrame()]) pd.concat([None, pd.DataFrame()]) pd.concat([pd.DataFrame(), None]) + + def test_doc_example(self): + left = DataFrame({'group': list('aaabbb'), + 'key': ['a', 'c', 'e', 'a', 'c', 'e'], + 'lvalue': [1, 2, 3] * 2, + }) + + right = DataFrame({'key': ['b', 'c', 'd'], + 'rvalue': [1, 2, 3]}) + + result = merge_ordered(left, right, fill_method='ffill', + left_by='group') + + expected = DataFrame({'group': list('aaaaabbbbb'), + 'key': ['a', 'b', 'c', 'd', 'e'] * 2, + 'lvalue': [1, 1, 2, 2, 3] * 2, + 'rvalue': [nan, 1, 2, 3, 3] * 2}) + + assert_frame_equal(result, expected) diff --git a/pandas/tools/tests/test_concat.py b/pandas/tests/reshape/test_concat.py similarity index 75% rename from pandas/tools/tests/test_concat.py rename to pandas/tests/reshape/test_concat.py index 87a0dda34a525..437b4179c580a 100644 --- a/pandas/tools/tests/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,23 +1,27 @@ +from warnings import catch_warnings + +import dateutil import numpy as np from numpy.random import randn from datetime import datetime -from pandas.compat import StringIO, iteritems +from pandas.compat import StringIO, iteritems, PY2 import pandas as pd from pandas import (DataFrame, concat, - read_csv, isnull, Series, date_range, + read_csv, isna, Series, date_range, Index, Panel, MultiIndex, Timestamp, - DatetimeIndex, Categorical, CategoricalIndex) -from pandas.types.concat import union_categoricals + DatetimeIndex, Categorical) +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.util import testing as tm from pandas.util.testing import (assert_frame_equal, - makeCustomDataframe as mkdf, - assert_almost_equal) + makeCustomDataframe as mkdf) + +import pytest -class ConcatenateBase(tm.TestCase): +class ConcatenateBase(object): - def setUp(self): + def setup_method(self, method): self.frame = DataFrame(tm.getSeriesData()) self.mixed_frame = self.frame.copy() self.mixed_frame['foo'] = 'bar' @@ -29,7 +33,7 @@ class TestConcatAppendCommon(ConcatenateBase): Test common dtype coercion rules between concat and append. """ - def setUp(self): + def setup_method(self, method): dt_data = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), @@ -63,14 +67,14 @@ def _check_expected_dtype(self, obj, label): """ if isinstance(obj, pd.Index): if label == 'bool': - self.assertEqual(obj.dtype, 'object') + assert obj.dtype == 'object' else: - self.assertEqual(obj.dtype, label) + assert obj.dtype == label elif isinstance(obj, pd.Series): if label.startswith('period'): - self.assertEqual(obj.dtype, 'object') + assert obj.dtype == 'object' else: - self.assertEqual(obj.dtype, label) + assert obj.dtype == label else: raise ValueError @@ -122,10 +126,12 @@ def test_concatlike_same_dtypes(self): tm.assert_index_equal(res, exp) # cannot append non-index - with tm.assertRaisesRegexp(TypeError, 'all inputs must be Index'): + with tm.assert_raises_regex(TypeError, + 'all inputs must be Index'): pd.Index(vals1).append(vals2) - with tm.assertRaisesRegexp(TypeError, 'all inputs must be Index'): + with tm.assert_raises_regex(TypeError, + 'all inputs must be Index'): pd.Index(vals1).append([pd.Index(vals2), vals3]) # ----- Series ----- # @@ -172,17 +178,19 @@ def test_concatlike_same_dtypes(self): tm.assert_series_equal(res, exp, check_index_type=True) # cannot append non-index - msg = "cannot concatenate a non-NDFrame object" - with tm.assertRaisesRegexp(TypeError, msg): + msg = (r'cannot concatenate object of type \"(.+?)\";' + ' only pd.Series, pd.DataFrame, and pd.Panel' + r' \(deprecated\) objs are valid') + with tm.assert_raises_regex(TypeError, msg): pd.Series(vals1).append(vals2) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): pd.Series(vals1).append([pd.Series(vals2), vals3]) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): pd.concat([pd.Series(vals1), vals2]) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): pd.concat([pd.Series(vals1), pd.Series(vals2), vals3]) def test_concatlike_dtypes_coercion(self): @@ -270,20 +278,20 @@ def test_concatlike_common_coerce_to_pandas_object(self): res = dti.append(tdi) tm.assert_index_equal(res, exp) - tm.assertIsInstance(res[0], pd.Timestamp) - tm.assertIsInstance(res[-1], pd.Timedelta) + assert isinstance(res[0], pd.Timestamp) + assert isinstance(res[-1], pd.Timedelta) dts = pd.Series(dti) tds = pd.Series(tdi) res = dts.append(tds) tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) - tm.assertIsInstance(res.iloc[0], pd.Timestamp) - tm.assertIsInstance(res.iloc[-1], pd.Timedelta) + assert isinstance(res.iloc[0], pd.Timestamp) + assert isinstance(res.iloc[-1], pd.Timedelta) res = pd.concat([dts, tds]) tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) - tm.assertIsInstance(res.iloc[0], pd.Timestamp) - tm.assertIsInstance(res.iloc[-1], pd.Timedelta) + assert isinstance(res.iloc[0], pd.Timestamp) + assert isinstance(res.iloc[-1], pd.Timedelta) def test_concatlike_datetimetz(self): # GH 7795 @@ -465,7 +473,7 @@ def test_concat_categorical(self): tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) - # completelly different categories (same dtype) => not-category + # completely different categories (same dtype) => not-category s1 = pd.Series([10, 11, np.nan], dtype='category') s2 = pd.Series([np.nan, 1, 3, 2], dtype='category') @@ -473,6 +481,15 @@ def test_concat_categorical(self): tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + def test_union_categorical_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19096 + a = pd.Series(Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c'])) + b = pd.Series(Categorical(['a', 'b', 'c'], categories=['b', 'a', 'c'])) + result = pd.concat([a, b], ignore_index=True) + expected = pd.Series(Categorical(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['a', 'b', 'c'])) + tm.assert_series_equal(result, expected) + def test_concat_categorical_coercion(self): # GH 13524 @@ -501,7 +518,7 @@ def test_concat_categorical_coercion(self): tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) - # completelly different categories => not-category + # completely different categories => not-category s1 = pd.Series([10, 11, np.nan], dtype='category') s2 = pd.Series([1, 3, 2]) @@ -641,7 +658,7 @@ def test_concat_categorical_coercion_nan(self): s1 = pd.Series([np.nan, np.nan], dtype='category') s2 = pd.Series([np.nan, np.nan]) - exp = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype=object) + exp = pd.Series([np.nan, np.nan, np.nan, np.nan]) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) @@ -675,7 +692,7 @@ def test_concat_categorical_empty(self): tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) s1 = pd.Series([], dtype='category') - s2 = pd.Series([]) + s2 = pd.Series([], dtype='object') # different dtype => not-category tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) @@ -705,25 +722,25 @@ def test_append(self): end_frame = self.frame.reindex(end_index) appended = begin_frame.append(end_frame) - assert_almost_equal(appended['A'], self.frame['A']) + tm.assert_almost_equal(appended['A'], self.frame['A']) del end_frame['A'] partial_appended = begin_frame.append(end_frame) - self.assertIn('A', partial_appended) + assert 'A' in partial_appended partial_appended = end_frame.append(begin_frame) - self.assertIn('A', partial_appended) + assert 'A' in partial_appended # mixed type handling appended = self.mixed_frame[:5].append(self.mixed_frame[5:]) - assert_frame_equal(appended, self.mixed_frame) + tm.assert_frame_equal(appended, self.mixed_frame) # what to test here mixed_appended = self.mixed_frame[:5].append(self.frame[5:]) mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:]) # all equal except 'foo' column - assert_frame_equal( + tm.assert_frame_equal( mixed_appended.reindex(columns=['A', 'B', 'C', 'D']), mixed_appended2.reindex(columns=['A', 'B', 'C', 'D'])) @@ -731,25 +748,24 @@ def test_append(self): empty = DataFrame({}) appended = self.frame.append(empty) - assert_frame_equal(self.frame, appended) - self.assertIsNot(appended, self.frame) + tm.assert_frame_equal(self.frame, appended) + assert appended is not self.frame appended = empty.append(self.frame) - assert_frame_equal(self.frame, appended) - self.assertIsNot(appended, self.frame) + tm.assert_frame_equal(self.frame, appended) + assert appended is not self.frame - # overlap - self.assertRaises(ValueError, self.frame.append, self.frame, - verify_integrity=True) + # Overlap + with pytest.raises(ValueError): + self.frame.append(self.frame, verify_integrity=True) - # new columns - # GH 6129 + # see gh-6129: new columns df = DataFrame({'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}}) row = Series([5, 6, 7], index=['a', 'b', 'c'], name='z') expected = DataFrame({'a': {'x': 1, 'y': 2, 'z': 5}, 'b': { 'x': 3, 'y': 4, 'z': 6}, 'c': {'z': 7}}) result = df.append(row) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_append_length0_frame(self): df = DataFrame(columns=['A', 'B', 'C']) @@ -785,8 +801,8 @@ def test_append_different_columns(self): b = df[5:].loc[:, ['strings', 'ints', 'floats']] appended = a.append(b) - self.assertTrue(isnull(appended['strings'][0:4]).all()) - self.assertTrue(isnull(appended['bools'][5:]).all()) + assert isna(appended['strings'][0:4]).all() + assert isna(appended['bools'][5:]).all() def test_append_many(self): chunks = [self.frame[:5], self.frame[5:10], @@ -799,8 +815,8 @@ def test_append_many(self): chunks[-1]['foo'] = 'bar' result = chunks[0].append(chunks[1:]) tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame) - self.assertTrue((result['foo'][15:] == 'bar').all()) - self.assertTrue(result['foo'][:15].isnull().all()) + assert (result['foo'][15:] == 'bar').all() + assert result['foo'][:15].isna().all() def test_append_preserve_index_name(self): # #980 @@ -811,7 +827,7 @@ def test_append_preserve_index_name(self): df2 = df2.set_index(['A']) result = df1.append(df2) - self.assertEqual(result.index.name, 'A') + assert result.index.name == 'A' def test_append_dtype_coerce(self): @@ -846,46 +862,44 @@ def test_append_missing_column_proper_upcast(self): dtype=bool)}) appended = df1.append(df2, ignore_index=True) - self.assertEqual(appended['A'].dtype, 'f8') - self.assertEqual(appended['B'].dtype, 'O') + assert appended['A'].dtype == 'f8' + assert appended['B'].dtype == 'O' class TestConcatenate(ConcatenateBase): def test_concat_copy(self): - df = DataFrame(np.random.randn(4, 3)) df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1)) df3 = DataFrame({5: 'foo'}, index=range(4)) - # these are actual copies + # These are actual copies. result = concat([df, df2, df3], axis=1, copy=True) + for b in result._data.blocks: - self.assertIsNone(b.values.base) + assert b.values.base is None - # these are the same + # These are the same. result = concat([df, df2, df3], axis=1, copy=False) + for b in result._data.blocks: if b.is_float: - self.assertTrue( - b.values.base is df._data.blocks[0].values.base) + assert b.values.base is df._data.blocks[0].values.base elif b.is_integer: - self.assertTrue( - b.values.base is df2._data.blocks[0].values.base) + assert b.values.base is df2._data.blocks[0].values.base elif b.is_object: - self.assertIsNotNone(b.values.base) + assert b.values.base is not None - # float block was consolidated + # Float block was consolidated. df4 = DataFrame(np.random.randn(4, 1)) result = concat([df, df2, df3, df4], axis=1, copy=False) for b in result._data.blocks: if b.is_float: - self.assertIsNone(b.values.base) + assert b.values.base is None elif b.is_integer: - self.assertTrue( - b.values.base is df2._data.blocks[0].values.base) + assert b.values.base is df2._data.blocks[0].values.base elif b.is_object: - self.assertIsNotNone(b.values.base) + assert b.values.base is not None def test_concat_with_group_keys(self): df = DataFrame(np.random.randn(4, 3)) @@ -931,9 +945,9 @@ def test_concat_keys_specific_levels(self): levels=[level], names=['group_key']) - self.assert_index_equal(result.columns.levels[0], - Index(level, name='group_key')) - self.assertEqual(result.columns.names[0], 'group_key') + tm.assert_index_equal(result.columns.levels[0], + Index(level, name='group_key')) + assert result.columns.names[0] == 'group_key' def test_concat_dataframe_keys_bug(self): t1 = DataFrame({ @@ -944,8 +958,7 @@ def test_concat_dataframe_keys_bug(self): # it works result = concat([t1, t2], axis=1, keys=['t1', 't2']) - self.assertEqual(list(result.columns), [('t1', 'value'), - ('t2', 'value')]) + assert list(result.columns) == [('t1', 'value'), ('t2', 'value')] def test_concat_series_partial_columns_names(self): # GH10698 @@ -1019,10 +1032,10 @@ def test_concat_multiindex_with_keys(self): columns=Index(['A', 'B', 'C'], name='exp')) result = concat([frame, frame], keys=[0, 1], names=['iteration']) - self.assertEqual(result.index.names, ('iteration',) + index.names) + assert result.index.names == ('iteration',) + index.names tm.assert_frame_equal(result.loc[0], frame) tm.assert_frame_equal(result.loc[1], frame) - self.assertEqual(result.index.nlevels, 3) + assert result.index.nlevels == 3 def test_concat_multiindex_with_tz(self): # GH 6606 @@ -1045,6 +1058,30 @@ def test_concat_multiindex_with_tz(self): result = concat([df, df]) tm.assert_frame_equal(result, expected) + def test_concat_multiindex_with_none_in_index_names(self): + # GH 15787 + index = pd.MultiIndex.from_product([[1], range(5)], + names=['level1', None]) + df = pd.DataFrame({'col': range(5)}, index=index, dtype=np.int32) + + result = concat([df, df], keys=[1, 2], names=['level2']) + index = pd.MultiIndex.from_product([[1, 2], [1], range(5)], + names=['level2', 'level1', None]) + expected = pd.DataFrame({'col': list(range(5)) * 2}, + index=index, dtype=np.int32) + assert_frame_equal(result, expected) + + result = concat([df, df[:2]], keys=[1, 2], names=['level2']) + level2 = [1] * 5 + [2] * 2 + level1 = [1] * 7 + no_name = list(range(5)) + list(range(2)) + tuples = list(zip(level2, level1, no_name)) + index = pd.MultiIndex.from_tuples(tuples, + names=['level2', 'level1', None]) + expected = pd.DataFrame({'col': no_name}, index=index, + dtype=np.int32) + assert_frame_equal(result, expected) + def test_concat_keys_and_levels(self): df = DataFrame(np.random.randn(1, 3)) df2 = DataFrame(np.random.randn(1, 4)) @@ -1063,35 +1100,34 @@ def test_concat_keys_and_levels(self): names=names + [None]) expected.index = exp_index - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # no names - result = concat([df, df2, df, df2], keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'), ('baz', 'two')], levels=levels) - self.assertEqual(result.index.names, (None,) * 3) + assert result.index.names == (None,) * 3 # no levels result = concat([df, df2, df, df2], keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'), ('baz', 'two')], names=['first', 'second']) - self.assertEqual(result.index.names, ('first', 'second') + (None,)) - self.assert_index_equal(result.index.levels[0], - Index(['baz', 'foo'], name='first')) + assert result.index.names == ('first', 'second') + (None,) + tm.assert_index_equal(result.index.levels[0], + Index(['baz', 'foo'], name='first')) def test_concat_keys_levels_no_overlap(self): # GH #1406 df = DataFrame(np.random.randn(1, 3), index=['a']) df2 = DataFrame(np.random.randn(1, 4), index=['b']) - self.assertRaises(ValueError, concat, [df, df], - keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) + pytest.raises(ValueError, concat, [df, df], + keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) - self.assertRaises(ValueError, concat, [df, df2], - keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) + pytest.raises(ValueError, concat, [df, df2], + keys=['one', 'two'], levels=[['foo', 'bar', 'baz']]) def test_concat_rename_index(self): a = DataFrame(np.random.rand(3, 3), @@ -1110,7 +1146,7 @@ def test_concat_rename_index(self): exp.index.set_names(names, inplace=True) tm.assert_frame_equal(result, exp) - self.assertEqual(result.index.names, exp.index.names) + assert result.index.names == exp.index.names def test_crossed_dtypes_weird_corner(self): columns = ['A', 'B', 'C', 'D'] @@ -1135,7 +1171,7 @@ def test_crossed_dtypes_weird_corner(self): df2 = DataFrame(np.random.randn(1, 4), index=['b']) result = concat( [df, df2], keys=['one', 'two'], names=['first', 'second']) - self.assertEqual(result.index.names, ('first', 'second')) + assert result.index.names == ('first', 'second') def test_dups_index(self): # GH 4771 @@ -1198,7 +1234,7 @@ def test_handle_empty_objects(self): frames = [baz, empty, empty, df[5:]] concatted = concat(frames, axis=0) - expected = df.loc[:, ['a', 'b', 'c', 'd', 'foo']] + expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo']) expected['foo'] = expected['foo'].astype('O') expected.loc[0:4, 'foo'] = 'bar' @@ -1281,8 +1317,9 @@ def test_concat_mixed_objs(self): assert_frame_equal(result, expected) # invalid concatente of mixed dims - panel = tm.makePanel() - self.assertRaises(ValueError, lambda: concat([panel, s1], axis=1)) + with catch_warnings(record=True): + panel = tm.makePanel() + pytest.raises(ValueError, lambda: concat([panel, s1], axis=1)) def test_empty_dtype_coerce(self): @@ -1320,89 +1357,59 @@ def test_dtype_coerceion(self): tm.assert_series_equal(result.dtypes, df.dtypes) def test_panel_concat_other_axes(self): - panel = tm.makePanel() - - p1 = panel.iloc[:, :5, :] - p2 = panel.iloc[:, 5:, :] - - result = concat([p1, p2], axis=1) - tm.assert_panel_equal(result, panel) - - p1 = panel.iloc[:, :, :2] - p2 = panel.iloc[:, :, 2:] - - result = concat([p1, p2], axis=2) - tm.assert_panel_equal(result, panel) - - # if things are a bit misbehaved - p1 = panel.iloc[:2, :, :2] - p2 = panel.iloc[:, :, 2:] - p1['ItemC'] = 'baz' + with catch_warnings(record=True): + panel = tm.makePanel() - result = concat([p1, p2], axis=2) + p1 = panel.iloc[:, :5, :] + p2 = panel.iloc[:, 5:, :] - expected = panel.copy() - expected['ItemC'] = expected['ItemC'].astype('O') - expected.loc['ItemC', :, :2] = 'baz' - tm.assert_panel_equal(result, expected) + result = concat([p1, p2], axis=1) + tm.assert_panel_equal(result, panel) - def test_panel_concat_buglet(self): - # #2257 - def make_panel(): - index = 5 - cols = 3 - - def df(): - return DataFrame(np.random.randn(index, cols), - index=["I%s" % i for i in range(index)], - columns=["C%s" % i for i in range(cols)]) - return Panel(dict([("Item%s" % x, df()) for x in ['A', 'B', 'C']])) - - panel1 = make_panel() - panel2 = make_panel() - - panel2 = panel2.rename_axis(dict([(x, "%s_1" % x) - for x in panel2.major_axis]), - axis=1) - - panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1) - panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2) - - # it works! - concat([panel1, panel3], axis=1, verify_integrity=True) + p1 = panel.iloc[:, :, :2] + p2 = panel.iloc[:, :, 2:] - def test_panel4d_concat(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4d = tm.makePanel4D() + result = concat([p1, p2], axis=2) + tm.assert_panel_equal(result, panel) - p1 = p4d.iloc[:, :, :5, :] - p2 = p4d.iloc[:, :, 5:, :] + # if things are a bit misbehaved + p1 = panel.iloc[:2, :, :2] + p2 = panel.iloc[:, :, 2:] + p1['ItemC'] = 'baz' result = concat([p1, p2], axis=2) - tm.assert_panel4d_equal(result, p4d) - p1 = p4d.iloc[:, :, :, :2] - p2 = p4d.iloc[:, :, :, 2:] + expected = panel.copy() + expected['ItemC'] = expected['ItemC'].astype('O') + expected.loc['ItemC', :, :2] = 'baz' + tm.assert_panel_equal(result, expected) - result = concat([p1, p2], axis=3) - tm.assert_panel4d_equal(result, p4d) + def test_panel_concat_buglet(self): + with catch_warnings(record=True): + # #2257 + def make_panel(): + index = 5 + cols = 3 - def test_panel4d_concat_mixed_type(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4d = tm.makePanel4D() + def df(): + return DataFrame(np.random.randn(index, cols), + index=["I%s" % i for i in range(index)], + columns=["C%s" % i for i in range(cols)]) + return Panel(dict(("Item%s" % x, df()) + for x in ['A', 'B', 'C'])) - # if things are a bit misbehaved - p1 = p4d.iloc[:, :2, :, :2] - p2 = p4d.iloc[:, :, :, 2:] - p1['L5'] = 'baz' + panel1 = make_panel() + panel2 = make_panel() - result = concat([p1, p2], axis=3) + panel2 = panel2.rename_axis(dict((x, "%s_1" % x) + for x in panel2.major_axis), + axis=1) - p2['L5'] = np.nan - expected = concat([p1, p2], axis=3) - expected = expected.loc[result.labels] + panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1) + panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2) - tm.assert_panel4d_equal(result, expected) + # it works! + concat([panel1, panel3], axis=1, verify_integrity=True) def test_concat_series(self): @@ -1413,7 +1420,7 @@ def test_concat_series(self): result = concat(pieces) tm.assert_series_equal(result, ts) - self.assertEqual(result.name, ts.name) + assert result.name == ts.name result = concat(pieces, keys=[0, 1, 2]) expected = ts.copy() @@ -1450,8 +1457,8 @@ def test_concat_series_axis1(self): s2.name = None result = concat([s, s2], axis=1) - self.assertTrue(np.array_equal( - result.columns, Index(['A', 0], dtype='object'))) + tm.assert_index_equal(result.columns, + Index(['A', 0], dtype='object')) # must reindex, #2603 s = Series(randn(3), index=['c', 'a', 'b'], name='A') @@ -1473,18 +1480,18 @@ def test_concat_exclude_none(self): pieces = [df[:5], None, None, df[5:]] result = concat(pieces) tm.assert_frame_equal(result, df) - self.assertRaises(ValueError, concat, [None, None]) + pytest.raises(ValueError, concat, [None, None]) def test_concat_datetime64_block(self): - from pandas.tseries.index import date_range + from pandas.core.indexes.datetimes import date_range rng = date_range('1/1/2000', periods=10) df = DataFrame({'time': rng}) result = concat([df, df]) - self.assertTrue((result.iloc[:10]['time'] == rng).all()) - self.assertTrue((result.iloc[10:]['time'] == rng).all()) + assert (result.iloc[:10]['time'] == rng).all() + assert (result.iloc[10:]['time'] == rng).all() def test_concat_timedelta64_block(self): from pandas import to_timedelta @@ -1494,8 +1501,8 @@ def test_concat_timedelta64_block(self): df = DataFrame({'time': rng}) result = concat([df, df]) - self.assertTrue((result.iloc[:10]['time'] == rng).all()) - self.assertTrue((result.iloc[10:]['time'] == rng).all()) + assert (result.iloc[:10]['time'] == rng).all() + assert (result.iloc[10:]['time'] == rng).all() def test_concat_keys_with_none(self): # #1649 @@ -1511,283 +1518,6 @@ def test_concat_keys_with_none(self): keys=['b', 'c', 'd', 'e']) tm.assert_frame_equal(result, expected) - def test_union_categorical(self): - # GH 13361 - data = [ - (list('abc'), list('abd'), list('abcabd')), - ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), - ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), - - (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'], - ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']), - - (pd.date_range('2014-01-01', '2014-01-05'), - pd.date_range('2014-01-06', '2014-01-07'), - pd.date_range('2014-01-01', '2014-01-07')), - - (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'), - pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'), - pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')), - - (pd.period_range('2014-01-01', '2014-01-05'), - pd.period_range('2014-01-06', '2014-01-07'), - pd.period_range('2014-01-01', '2014-01-07')), - ] - - for a, b, combined in data: - for box in [Categorical, CategoricalIndex, Series]: - result = union_categoricals([box(Categorical(a)), - box(Categorical(b))]) - expected = Categorical(combined) - tm.assert_categorical_equal(result, expected, - check_category_order=True) - - # new categories ordered by appearance - s = Categorical(['x', 'y', 'z']) - s2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([s, s2]) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['x', 'y', 'z', 'a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - s = Categorical([0, 1.2, 2], ordered=True) - s2 = Categorical([0, 1.2, 2], ordered=True) - result = union_categoricals([s, s2]) - expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) - tm.assert_categorical_equal(result, expected) - - # must exactly match types - s = Categorical([0, 1.2, 2]) - s2 = Categorical([2, 3, 4]) - msg = 'dtype of categories must be the same' - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([s, s2]) - - msg = 'No Categoricals to union' - with tm.assertRaisesRegexp(ValueError, msg): - union_categoricals([]) - - def test_union_categoricals_nan(self): - # GH 13759 - res = union_categoricals([pd.Categorical([1, 2, np.nan]), - pd.Categorical([3, 2, np.nan])]) - exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical(['A', 'B']), - pd.Categorical(['B', 'B', np.nan])]) - exp = Categorical(['A', 'B', 'B', 'B', np.nan]) - tm.assert_categorical_equal(res, exp) - - val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'), - pd.NaT] - val2 = [pd.NaT, pd.Timestamp('2011-01-01'), - pd.Timestamp('2011-02-01')] - - res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) - exp = Categorical(val1 + val2, - categories=[pd.Timestamp('2011-01-01'), - pd.Timestamp('2011-03-01'), - pd.Timestamp('2011-02-01')]) - tm.assert_categorical_equal(res, exp) - - # all NaN - res = union_categoricals([pd.Categorical([np.nan, np.nan]), - pd.Categorical(['X'])]) - exp = Categorical([np.nan, np.nan, 'X']) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical([np.nan, np.nan]), - pd.Categorical([np.nan, np.nan])]) - exp = Categorical([np.nan, np.nan, np.nan, np.nan]) - tm.assert_categorical_equal(res, exp) - - def test_union_categoricals_empty(self): - # GH 13759 - res = union_categoricals([pd.Categorical([]), - pd.Categorical([])]) - exp = Categorical([]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical([]), - pd.Categorical([1.0])]) - exp = Categorical([1.0]) - tm.assert_categorical_equal(res, exp) - - # to make dtype equal - nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) - res = union_categoricals([nanc, - pd.Categorical([])]) - tm.assert_categorical_equal(res, nanc) - - def test_union_categorical_same_category(self): - # check fastpath - c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) - c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) - res = union_categoricals([c1, c2]) - exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], - categories=[1, 2, 3, 4]) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z']) - c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z']) - res = union_categoricals([c1, c2]) - exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'], - categories=['x', 'y', 'z']) - tm.assert_categorical_equal(res, exp) - - def test_union_categoricals_ordered(self): - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], ordered=False) - - msg = 'Categorical.ordered must be the same' - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2]) - - res = union_categoricals([c1, c1]) - exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3, np.nan], ordered=True) - c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) - - res = union_categoricals([c1, c2]) - exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) - - msg = "to union ordered Categoricals, all categories must be the same" - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2]) - - def test_union_categoricals_sort(self): - # GH 13846 - c1 = Categorical(['x', 'y', 'z']) - c2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['a', 'b', 'c', 'x', 'y', 'z']) - tm.assert_categorical_equal(result, expected) - - # fastpath - c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) - c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b']) - c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - skip resort - c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) - c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['x', np.nan]) - c2 = Categorical([np.nan, 'b']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['x', np.nan, np.nan, 'b'], - categories=['b', 'x']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([np.nan]) - c2 = Categorical([np.nan]) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical([np.nan, np.nan], categories=[]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([]) - c2 = Categorical([]) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical([]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) - c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) - with tm.assertRaises(TypeError): - union_categoricals([c1, c2], sort_categories=True) - - def test_union_categoricals_sort_false(self): - # GH 13846 - c1 = Categorical(['x', 'y', 'z']) - c2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['x', 'y', 'z', 'a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) - c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['b', 'a', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - skip resort - c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) - c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['x', np.nan]) - c2 = Categorical([np.nan, 'b']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['x', np.nan, np.nan, 'b'], - categories=['x', 'b']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([np.nan]) - c2 = Categorical([np.nan]) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical([np.nan, np.nan], categories=[]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([]) - c2 = Categorical([]) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical([]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) - c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['b', 'a', 'a', 'c'], - categories=['b', 'a', 'c'], ordered=True) - tm.assert_categorical_equal(result, expected) - - def test_union_categorical_unwrap(self): - # GH 14173 - c1 = Categorical(['a', 'b']) - c2 = pd.Series(['b', 'c'], dtype='category') - result = union_categoricals([c1, c2]) - expected = Categorical(['a', 'b', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c2 = CategoricalIndex(c2) - result = union_categoricals([c1, c2]) - tm.assert_categorical_equal(result, expected) - - c1 = Series(c1) - result = union_categoricals([c1, c2]) - tm.assert_categorical_equal(result, expected) - - with tm.assertRaises(TypeError): - union_categoricals([c1, ['a', 'b', 'c']]) - def test_concat_bug_1719(self): ts1 = tm.makeTimeSeries() ts2 = tm.makeTimeSeries()[::2] @@ -1797,7 +1527,7 @@ def test_concat_bug_1719(self): left = concat([ts1, ts2], join='outer', axis=1) right = concat([ts2, ts1], join='outer', axis=1) - self.assertEqual(len(left), len(right)) + assert len(left) == len(right) def test_concat_bug_2972(self): ts0 = Series(np.zeros(5)) @@ -1812,10 +1542,10 @@ def test_concat_bug_2972(self): def test_concat_bug_3602(self): # GH 3602, duplicate columns - df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'stringvar': [ - 'rrr', 'rrr', 'rrr', 'rrr'], 'prc': [6, 6, 6, 6]}) - df2 = DataFrame({'misc': [1, 2, 3, 4], 'prc': [ - 6, 6, 6, 6], 'C': [9, 10, 11, 12]}) + df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'prc': [6, 6, 6, 6], + 'stringvar': ['rrr', 'rrr', 'rrr', 'rrr']}) + df2 = DataFrame({'C': [9, 10, 11, 12], 'misc': [1, 2, 3, 4], + 'prc': [6, 6, 6, 6]}) expected = DataFrame([[0, 6, 'rrr', 9, 1, 6], [0, 6, 'rrr', 10, 2, 6], [0, 6, 'rrr', 11, 3, 6], @@ -1825,13 +1555,25 @@ def test_concat_bug_3602(self): result = concat([df1, df2], axis=1) assert_frame_equal(result, expected) + def test_concat_inner_join_empty(self): + # GH 15328 + df_empty = pd.DataFrame() + df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64') + df_expected = pd.DataFrame({'a': []}, index=[], dtype='int64') + + for how, expected in [('inner', df_expected), ('outer', df_a)]: + result = pd.concat([df_a, df_empty], axis=1, join=how) + assert_frame_equal(result, expected) + def test_concat_series_axis1_same_names_ignore_index(self): dates = date_range('01-Jan-2013', '01-Jan-2014', freq='MS')[0:-1] s1 = Series(randn(len(dates)), index=dates, name='value') s2 = Series(randn(len(dates)), index=dates, name='value') result = concat([s1, s2], axis=1, ignore_index=True) - self.assertTrue(np.array_equal(result.columns, [0, 1])) + expected = Index([0, 1]) + + tm.assert_index_equal(result.columns, expected) def test_concat_iterables(self): from collections import deque, Iterable @@ -1874,12 +1616,12 @@ def test_concat_invalid(self): # trying to concat a ndframe with a non-ndframe df1 = mkdf(10, 2) for obj in [1, dict(), [1, 2], (1, 2)]: - self.assertRaises(TypeError, lambda x: concat([df1, obj])) + pytest.raises(TypeError, lambda x: concat([df1, obj])) def test_concat_invalid_first_argument(self): df1 = mkdf(10, 2) df2 = mkdf(10, 2) - self.assertRaises(TypeError, concat, df1, df2) + pytest.raises(TypeError, concat, df1, df2) # generator ok though concat(DataFrame(np.random.rand(5, 5)) for _ in range(3)) @@ -1944,8 +1686,7 @@ def test_concat_tz_frame(self): assert_frame_equal(df2, df3) def test_concat_tz_series(self): - # GH 11755 - # tz and no tz + # gh-11755: tz and no tz x = Series(date_range('20151124 08:00', '20151124 09:00', freq='1h', tz='UTC')) @@ -1955,8 +1696,7 @@ def test_concat_tz_series(self): result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - # GH 11887 - # concat tz and object + # gh-11887: concat tz and object x = Series(date_range('20151124 08:00', '20151124 09:00', freq='1h', tz='UTC')) @@ -1966,10 +1706,8 @@ def test_concat_tz_series(self): result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - # 12217 - # 12306 fixed I think - - # Concat'ing two UTC times + # see gh-12217 and gh-12306 + # Concatenating two UTC times first = pd.DataFrame([[datetime(2016, 1, 1)]]) first[0] = first[0].dt.tz_localize('UTC') @@ -1977,9 +1715,9 @@ def test_concat_tz_series(self): second[0] = second[0].dt.tz_localize('UTC') result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, UTC]') + assert result[0].dtype == 'datetime64[ns, UTC]' - # Concat'ing two London times + # Concatenating two London times first = pd.DataFrame([[datetime(2016, 1, 1)]]) first[0] = first[0].dt.tz_localize('Europe/London') @@ -1987,9 +1725,9 @@ def test_concat_tz_series(self): second[0] = second[0].dt.tz_localize('Europe/London') result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + assert result[0].dtype == 'datetime64[ns, Europe/London]' - # Concat'ing 2+1 London times + # Concatenating 2+1 London times first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]]) first[0] = first[0].dt.tz_localize('Europe/London') @@ -1997,7 +1735,7 @@ def test_concat_tz_series(self): second[0] = second[0].dt.tz_localize('Europe/London') result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + assert result[0].dtype == 'datetime64[ns, Europe/London]' # Concat'ing 1+2 London times first = pd.DataFrame([[datetime(2016, 1, 1)]]) @@ -2007,11 +1745,10 @@ def test_concat_tz_series(self): second[0] = second[0].dt.tz_localize('Europe/London') result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + assert result[0].dtype == 'datetime64[ns, Europe/London]' def test_concat_tz_series_with_datetimelike(self): - # GH 12620 - # tz and timedelta + # see gh-12620: tz and timedelta x = [pd.Timestamp('2011-01-01', tz='US/Eastern'), pd.Timestamp('2011-02-01', tz='US/Eastern')] y = [pd.Timedelta('1 day'), pd.Timedelta('2 day')] @@ -2024,16 +1761,15 @@ def test_concat_tz_series_with_datetimelike(self): tm.assert_series_equal(result, pd.Series(x + y, dtype='object')) def test_concat_tz_series_tzlocal(self): - # GH 13583 - tm._skip_if_no_dateutil() - import dateutil + # see gh-13583 x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()), pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())] y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()), pd.Timestamp('2012-02-01', tz=dateutil.tz.tzlocal())] + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) tm.assert_series_equal(result, pd.Series(x + y)) - self.assertEqual(result.dtype, 'datetime64[ns, tzlocal()]') + assert result.dtype == 'datetime64[ns, tzlocal()]' def test_concat_period_series(self): x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) @@ -2041,7 +1777,7 @@ def test_concat_period_series(self): expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' # different freq x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) @@ -2049,14 +1785,14 @@ def test_concat_period_series(self): expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M')) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' # non-period x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) @@ -2064,14 +1800,14 @@ def test_concat_period_series(self): expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(['A', 'B']) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') + assert result.dtype == 'object' def test_concat_empty_series(self): # GH 11082 @@ -2101,7 +1837,7 @@ def test_default_index(self): s1 = pd.Series([1, 2, 3], name='x') s2 = pd.Series([4, 5, 6], name='y') res = pd.concat([s1, s2], axis=1, ignore_index=True) - self.assertIsInstance(res.columns, pd.RangeIndex) + assert isinstance(res.columns, pd.RangeIndex) exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) # use check_index_type=True to check the result have # RangeIndex (default index) @@ -2112,7 +1848,7 @@ def test_default_index(self): s1 = pd.Series([1, 2, 3]) s2 = pd.Series([4, 5, 6]) res = pd.concat([s1, s2], axis=1, ignore_index=False) - self.assertIsInstance(res.columns, pd.RangeIndex) + assert isinstance(res.columns, pd.RangeIndex) exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) exp.columns = pd.RangeIndex(2) tm.assert_frame_equal(res, exp, check_index_type=True, @@ -2167,3 +1903,255 @@ def test_concat_multiindex_dfs_with_deepcopy(self): tm.assert_frame_equal(result_copy, expected) result_no_copy = pd.concat(example_dict, names=['testname']) tm.assert_frame_equal(result_no_copy, expected) + + def test_categorical_concat_append(self): + cat = Categorical(["a", "b"], categories=["a", "b"]) + vals = [1, 2] + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"]) + vals2 = [1, 2, 1, 2] + exp = DataFrame({"cats": cat2, "vals": vals2}, + index=Index([0, 1, 0, 1])) + + tm.assert_frame_equal(pd.concat([df, df]), exp) + tm.assert_frame_equal(df.append(df), exp) + + # GH 13524 can concat different categories + cat3 = Categorical(["a", "b"], categories=["a", "b", "c"]) + vals3 = [1, 2] + df_different_categories = DataFrame({"cats": cat3, "vals": vals3}) + + res = pd.concat([df, df_different_categories], ignore_index=True) + exp = DataFrame({"cats": list('abab'), "vals": [1, 2, 1, 2]}) + tm.assert_frame_equal(res, exp) + + res = df.append(df_different_categories, ignore_index=True) + tm.assert_frame_equal(res, exp) + + def test_categorical_concat_dtypes(self): + + # GH8143 + index = ['cat', 'obj', 'num'] + cat = Categorical(['a', 'b', 'c']) + obj = Series(['a', 'b', 'c']) + num = Series([1, 2, 3]) + df = pd.concat([Series(cat), obj, num], axis=1, keys=index) + + result = df.dtypes == 'object' + expected = Series([False, True, False], index=index) + tm.assert_series_equal(result, expected) + + result = df.dtypes == 'int64' + expected = Series([False, False, True], index=index) + tm.assert_series_equal(result, expected) + + result = df.dtypes == 'category' + expected = Series([True, False, False], index=index) + tm.assert_series_equal(result, expected) + + def test_categorical_concat(self): + # See GH 10177 + df1 = DataFrame(np.arange(18, dtype='int64').reshape(6, 3), + columns=["a", "b", "c"]) + + df2 = DataFrame(np.arange(14, dtype='int64').reshape(7, 2), + columns=["a", "c"]) + + cat_values = ["one", "one", "two", "one", "two", "two", "one"] + df2['h'] = Series(Categorical(cat_values)) + + res = pd.concat((df1, df2), axis=0, ignore_index=True) + exp = DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], + 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan, + np.nan, np.nan, np.nan, np.nan], + 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13], + 'h': [None] * 6 + cat_values}) + tm.assert_frame_equal(res, exp) + + def test_categorical_concat_gh7864(self): + # GH 7864 + # make sure ordering is preserverd + df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list('abbaae')}) + df["grade"] = Categorical(df["raw_grade"]) + df['grade'].cat.set_categories(['e', 'a', 'b']) + + df1 = df[0:3] + df2 = df[3:] + + tm.assert_index_equal(df['grade'].cat.categories, + df1['grade'].cat.categories) + tm.assert_index_equal(df['grade'].cat.categories, + df2['grade'].cat.categories) + + dfx = pd.concat([df1, df2]) + tm.assert_index_equal(df['grade'].cat.categories, + dfx['grade'].cat.categories) + + dfa = df1.append(df2) + tm.assert_index_equal(df['grade'].cat.categories, + dfa['grade'].cat.categories) + + def test_categorical_concat_preserve(self): + + # GH 8641 series concat not preserving category dtype + # GH 13524 can concat different categories + s = Series(list('abc'), dtype='category') + s2 = Series(list('abd'), dtype='category') + + exp = Series(list('abcabd')) + res = pd.concat([s, s2], ignore_index=True) + tm.assert_series_equal(res, exp) + + exp = Series(list('abcabc'), dtype='category') + res = pd.concat([s, s], ignore_index=True) + tm.assert_series_equal(res, exp) + + exp = Series(list('abcabc'), index=[0, 1, 2, 0, 1, 2], + dtype='category') + res = pd.concat([s, s]) + tm.assert_series_equal(res, exp) + + a = Series(np.arange(6, dtype='int64')) + b = Series(list('aabbca')) + + df2 = DataFrame({'A': a, + 'B': b.astype(CategoricalDtype(list('cab')))}) + res = pd.concat([df2, df2]) + exp = DataFrame( + {'A': pd.concat([a, a]), + 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))}) + tm.assert_frame_equal(res, exp) + + def test_categorical_index_preserver(self): + + a = Series(np.arange(6, dtype='int64')) + b = Series(list('aabbca')) + + df2 = DataFrame({'A': a, + 'B': b.astype(CategoricalDtype(list('cab'))) + }).set_index('B') + result = pd.concat([df2, df2]) + expected = DataFrame( + {'A': pd.concat([a, a]), + 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab'))) + }).set_index('B') + tm.assert_frame_equal(result, expected) + + # wrong catgories + df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe')) + }).set_index('B') + pytest.raises(TypeError, lambda: pd.concat([df2, df3])) + + def test_concat_categoricalindex(self): + # GH 16111, categories that aren't lexsorted + categories = [9, 0, 1, 2, 3] + + a = pd.Series(1, index=pd.CategoricalIndex([9, 0], + categories=categories)) + b = pd.Series(2, index=pd.CategoricalIndex([0, 1], + categories=categories)) + c = pd.Series(3, index=pd.CategoricalIndex([1, 2], + categories=categories)) + + result = pd.concat([a, b, c], axis=1) + + exp_idx = pd.CategoricalIndex([0, 1, 2, 9]) + exp = pd.DataFrame({0: [1, np.nan, np.nan, 1], + 1: [2, 2, np.nan, np.nan], + 2: [np.nan, 3, 3, np.nan]}, + columns=[0, 1, 2], + index=exp_idx) + tm.assert_frame_equal(result, exp) + + def test_concat_order(self): + # GH 17344 + dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])] + dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a']) + for i in range(100)] + result = pd.concat(dfs).columns + expected = dfs[0].columns + if PY2: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_concat_datetime_timezone(self): + # GH 18523 + idx1 = pd.date_range('2011-01-01', periods=3, freq='H', + tz='Europe/Paris') + idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H') + df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1) + df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2) + result = pd.concat([df1, df2], axis=1) + + exp_idx = DatetimeIndex(['2011-01-01 00:00:00+01:00', + '2011-01-01 01:00:00+01:00', + '2011-01-01 02:00:00+01:00'], + freq='H' + ).tz_localize('UTC').tz_convert('Europe/Paris') + + expected = pd.DataFrame([[1, 1], [2, 2], [3, 3]], + index=exp_idx, columns=['a', 'b']) + + tm.assert_frame_equal(result, expected) + + idx3 = pd.date_range('2011-01-01', periods=3, + freq='H', tz='Asia/Tokyo') + df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3) + result = pd.concat([df1, df3], axis=1) + + exp_idx = DatetimeIndex(['2010-12-31 15:00:00+00:00', + '2010-12-31 16:00:00+00:00', + '2010-12-31 17:00:00+00:00', + '2010-12-31 23:00:00+00:00', + '2011-01-01 00:00:00+00:00', + '2011-01-01 01:00:00+00:00'] + ).tz_localize('UTC') + + expected = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3], + [1, np.nan], [2, np.nan], [3, np.nan]], + index=exp_idx, columns=['a', 'b']) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) +@pytest.mark.parametrize('dt', np.sctypes['float']) +def test_concat_no_unnecessary_upcast(dt, pdt): + with catch_warnings(record=True): + # GH 13247 + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], dtype=dt, ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == dt + + +@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) +@pytest.mark.parametrize('dt', np.sctypes['int']) +def test_concat_will_upcast(dt, pdt): + with catch_warnings(record=True): + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == 'float64' + + +def test_concat_empty_and_non_empty_frame_regression(): + # GH 18178 regression test + df1 = pd.DataFrame({'foo': [1]}) + df2 = pd.DataFrame({'foo': []}) + expected = pd.DataFrame({'foo': [1.0]}) + result = pd.concat([df1, df2]) + assert_frame_equal(result, expected) + + +def test_concat_empty_and_non_empty_series_regression(): + # GH 18187 regression test + s1 = pd.Series([1]) + s2 = pd.Series([]) + expected = s1 + result = pd.concat([s1, s2]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py new file mode 100644 index 0000000000000..000b22d4fdd36 --- /dev/null +++ b/pandas/tests/reshape/test_melt.py @@ -0,0 +1,621 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=W0612,E1101 + +import pytest + +from pandas import DataFrame +import pandas as pd + +from numpy import nan +import numpy as np + +from pandas import melt, lreshape, wide_to_long +import pandas.util.testing as tm +from pandas.compat import range + + +class TestMelt(object): + + def setup_method(self, method): + self.df = tm.makeTimeDataFrame()[:10] + self.df['id1'] = (self.df['A'] > 0).astype(np.int64) + self.df['id2'] = (self.df['B'] > 0).astype(np.int64) + + self.var_name = 'var' + self.value_name = 'val' + + self.df1 = pd.DataFrame([[1.067683, -1.110463, 0.20867 + ], [-1.321405, 0.368915, -1.055342], + [-0.807333, 0.08298, -0.873361]]) + self.df1.columns = [list('ABC'), list('abc')] + self.df1.columns.names = ['CAP', 'low'] + + def test_top_level_method(self): + result = melt(self.df) + assert result.columns.tolist() == ['variable', 'value'] + + def test_method_signatures(self): + tm.assert_frame_equal(self.df.melt(), + melt(self.df)) + + tm.assert_frame_equal(self.df.melt(id_vars=['id1', 'id2'], + value_vars=['A', 'B']), + melt(self.df, + id_vars=['id1', 'id2'], + value_vars=['A', 'B'])) + + tm.assert_frame_equal(self.df.melt(var_name=self.var_name, + value_name=self.value_name), + melt(self.df, + var_name=self.var_name, + value_name=self.value_name)) + + tm.assert_frame_equal(self.df1.melt(col_level=0), + melt(self.df1, col_level=0)) + + def test_default_col_names(self): + result = self.df.melt() + assert result.columns.tolist() == ['variable', 'value'] + + result1 = self.df.melt(id_vars=['id1']) + assert result1.columns.tolist() == ['id1', 'variable', 'value'] + + result2 = self.df.melt(id_vars=['id1', 'id2']) + assert result2.columns.tolist() == ['id1', 'id2', 'variable', 'value'] + + def test_value_vars(self): + result3 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A') + assert len(result3) == 10 + + result4 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B']) + expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A'] * 10 + ['B'] * 10, + 'value': (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', 'variable', 'value']) + tm.assert_frame_equal(result4, expected4) + + def test_value_vars_types(self): + # GH 15348 + expected = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A'] * 10 + ['B'] * 10, + 'value': (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', 'variable', 'value']) + + for type_ in (tuple, list, np.array): + result = self.df.melt(id_vars=['id1', 'id2'], + value_vars=type_(('A', 'B'))) + tm.assert_frame_equal(result, expected) + + def test_vars_work_with_multiindex(self): + expected = DataFrame({ + ('A', 'a'): self.df1[('A', 'a')], + 'CAP': ['B'] * len(self.df1), + 'low': ['b'] * len(self.df1), + 'value': self.df1[('B', 'b')], + }, columns=[('A', 'a'), 'CAP', 'low', 'value']) + + result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')]) + tm.assert_frame_equal(result, expected) + + def test_tuple_vars_fail_with_multiindex(self): + # melt should fail with an informative error message if + # the columns have a MultiIndex and a tuple is passed + # for id_vars or value_vars. + tuple_a = ('A', 'a') + list_a = [tuple_a] + tuple_b = ('B', 'b') + list_b = [tuple_b] + + for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), + (tuple_a, tuple_b)): + with tm.assert_raises_regex(ValueError, r'MultiIndex'): + self.df1.melt(id_vars=id_vars, value_vars=value_vars) + + def test_custom_var_name(self): + result5 = self.df.melt(var_name=self.var_name) + assert result5.columns.tolist() == ['var', 'value'] + + result6 = self.df.melt(id_vars=['id1'], var_name=self.var_name) + assert result6.columns.tolist() == ['id1', 'var', 'value'] + + result7 = self.df.melt(id_vars=['id1', 'id2'], var_name=self.var_name) + assert result7.columns.tolist() == ['id1', 'id2', 'var', 'value'] + + result8 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + var_name=self.var_name) + assert result8.columns.tolist() == ['id1', 'id2', 'var', 'value'] + + result9 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + var_name=self.var_name) + expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + self.var_name: ['A'] * 10 + ['B'] * 10, + 'value': (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', self.var_name, 'value']) + tm.assert_frame_equal(result9, expected9) + + def test_custom_value_name(self): + result10 = self.df.melt(value_name=self.value_name) + assert result10.columns.tolist() == ['variable', 'val'] + + result11 = self.df.melt(id_vars=['id1'], value_name=self.value_name) + assert result11.columns.tolist() == ['id1', 'variable', 'val'] + + result12 = self.df.melt(id_vars=['id1', 'id2'], + value_name=self.value_name) + assert result12.columns.tolist() == ['id1', 'id2', 'variable', 'val'] + + result13 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + value_name=self.value_name) + assert result13.columns.tolist() == ['id1', 'id2', 'variable', 'val'] + + result14 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + value_name=self.value_name) + expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A'] * 10 + ['B'] * 10, + self.value_name: (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', 'variable', + self.value_name]) + tm.assert_frame_equal(result14, expected14) + + def test_custom_var_and_value_name(self): + + result15 = self.df.melt(var_name=self.var_name, + value_name=self.value_name) + assert result15.columns.tolist() == ['var', 'val'] + + result16 = self.df.melt(id_vars=['id1'], var_name=self.var_name, + value_name=self.value_name) + assert result16.columns.tolist() == ['id1', 'var', 'val'] + + result17 = self.df.melt(id_vars=['id1', 'id2'], + var_name=self.var_name, + value_name=self.value_name) + assert result17.columns.tolist() == ['id1', 'id2', 'var', 'val'] + + result18 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + var_name=self.var_name, + value_name=self.value_name) + assert result18.columns.tolist() == ['id1', 'id2', 'var', 'val'] + + result19 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + var_name=self.var_name, + value_name=self.value_name) + expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + self.var_name: ['A'] * 10 + ['B'] * 10, + self.value_name: (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', self.var_name, + self.value_name]) + tm.assert_frame_equal(result19, expected19) + + df20 = self.df.copy() + df20.columns.name = 'foo' + result20 = df20.melt() + assert result20.columns.tolist() == ['foo', 'value'] + + def test_col_level(self): + res1 = self.df1.melt(col_level=0) + res2 = self.df1.melt(col_level='CAP') + assert res1.columns.tolist() == ['CAP', 'value'] + assert res2.columns.tolist() == ['CAP', 'value'] + + def test_multiindex(self): + res = self.df1.melt() + assert res.columns.tolist() == ['CAP', 'low', 'value'] + + +class TestLreshape(object): + + def test_pairs(self): + data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', '21dec2008', + '11jan2009'], + 'birthwt': [1766, 3301, 1454, 3139, 4133], + 'id': [101, 102, 103, 104, 105], + 'sex': ['Male', 'Female', 'Female', 'Female', 'Female'], + 'visitdt1': ['11jan2009', '22dec2008', '04jan2009', + '29dec2008', '20jan2009'], + 'visitdt2': + ['21jan2009', nan, '22jan2009', '31dec2008', '03feb2009'], + 'visitdt3': ['05feb2009', nan, nan, '02jan2009', '15feb2009'], + 'wt1': [1823, 3338, 1549, 3298, 4306], + 'wt2': [2011.0, nan, 1892.0, 3338.0, 4575.0], + 'wt3': [2293.0, nan, nan, 3377.0, 4805.0]} + + df = DataFrame(data) + + spec = {'visitdt': ['visitdt%d' % i for i in range(1, 4)], + 'wt': ['wt%d' % i for i in range(1, 4)]} + result = lreshape(df, spec) + + exp_data = {'birthdt': + ['08jan2009', '20dec2008', '30dec2008', '21dec2008', + '11jan2009', '08jan2009', '30dec2008', '21dec2008', + '11jan2009', '08jan2009', '21dec2008', '11jan2009'], + 'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 1454, 3139, + 4133, 1766, 3139, 4133], + 'id': [101, 102, 103, 104, 105, 101, 103, 104, 105, 101, + 104, 105], + 'sex': ['Male', 'Female', 'Female', 'Female', 'Female', + 'Male', 'Female', 'Female', 'Female', 'Male', + 'Female', 'Female'], + 'visitdt': ['11jan2009', '22dec2008', '04jan2009', + '29dec2008', '20jan2009', '21jan2009', + '22jan2009', '31dec2008', '03feb2009', + '05feb2009', '02jan2009', '15feb2009'], + 'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, + 1892.0, 3338.0, 4575.0, 2293.0, 3377.0, 4805.0]} + exp = DataFrame(exp_data, columns=result.columns) + tm.assert_frame_equal(result, exp) + + result = lreshape(df, spec, dropna=False) + exp_data = {'birthdt': + ['08jan2009', '20dec2008', '30dec2008', '21dec2008', + '11jan2009', '08jan2009', '20dec2008', '30dec2008', + '21dec2008', '11jan2009', '08jan2009', '20dec2008', + '30dec2008', '21dec2008', '11jan2009'], + 'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 3301, 1454, + 3139, 4133, 1766, 3301, 1454, 3139, 4133], + 'id': [101, 102, 103, 104, 105, 101, 102, 103, 104, 105, + 101, 102, 103, 104, 105], + 'sex': ['Male', 'Female', 'Female', 'Female', 'Female', + 'Male', 'Female', 'Female', 'Female', 'Female', + 'Male', 'Female', 'Female', 'Female', 'Female'], + 'visitdt': ['11jan2009', '22dec2008', '04jan2009', + '29dec2008', '20jan2009', '21jan2009', nan, + '22jan2009', '31dec2008', '03feb2009', + '05feb2009', nan, nan, '02jan2009', + '15feb2009'], + 'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, nan, + 1892.0, 3338.0, 4575.0, 2293.0, nan, nan, 3377.0, + 4805.0]} + exp = DataFrame(exp_data, columns=result.columns) + tm.assert_frame_equal(result, exp) + + spec = {'visitdt': ['visitdt%d' % i for i in range(1, 3)], + 'wt': ['wt%d' % i for i in range(1, 4)]} + pytest.raises(ValueError, lreshape, df, spec) + + +class TestWideToLong(object): + + def test_simple(self): + np.random.seed(123) + x = np.random.randn(3) + df = pd.DataFrame({"A1970": {0: "a", + 1: "b", + 2: "c"}, + "A1980": {0: "d", + 1: "e", + 2: "f"}, + "B1970": {0: 2.5, + 1: 1.2, + 2: .7}, + "B1980": {0: 3.2, + 1: 1.3, + 2: .1}, + "X": dict(zip( + range(3), x))}) + df["id"] = df.index + exp_data = {"X": x.tolist() + x.tolist(), + "A": ['a', 'b', 'c', 'd', 'e', 'f'], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2]} + expected = DataFrame(exp_data) + expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] + result = wide_to_long(df, ["A", "B"], i="id", j="year") + tm.assert_frame_equal(result, expected) + + def test_stubs(self): + # GH9204 + df = pd.DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]]) + df.columns = ['id', 'inc1', 'inc2', 'edu1', 'edu2'] + stubs = ['inc', 'edu'] + + # TODO: unused? + df_long = pd.wide_to_long(df, stubs, i='id', j='age') # noqa + + assert stubs == ['inc', 'edu'] + + def test_separating_character(self): + # GH14779 + np.random.seed(123) + x = np.random.randn(3) + df = pd.DataFrame({"A.1970": {0: "a", + 1: "b", + 2: "c"}, + "A.1980": {0: "d", + 1: "e", + 2: "f"}, + "B.1970": {0: 2.5, + 1: 1.2, + 2: .7}, + "B.1980": {0: 3.2, + 1: 1.3, + 2: .1}, + "X": dict(zip( + range(3), x))}) + df["id"] = df.index + exp_data = {"X": x.tolist() + x.tolist(), + "A": ['a', 'b', 'c', 'd', 'e', 'f'], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2]} + expected = DataFrame(exp_data) + expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] + result = wide_to_long(df, ["A", "B"], i="id", j="year", sep=".") + tm.assert_frame_equal(result, expected) + + def test_escapable_characters(self): + np.random.seed(123) + x = np.random.randn(3) + df = pd.DataFrame({"A(quarterly)1970": {0: "a", + 1: "b", + 2: "c"}, + "A(quarterly)1980": {0: "d", + 1: "e", + 2: "f"}, + "B(quarterly)1970": {0: 2.5, + 1: 1.2, + 2: .7}, + "B(quarterly)1980": {0: 3.2, + 1: 1.3, + 2: .1}, + "X": dict(zip( + range(3), x))}) + df["id"] = df.index + exp_data = {"X": x.tolist() + x.tolist(), + "A(quarterly)": ['a', 'b', 'c', 'd', 'e', 'f'], + "B(quarterly)": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2]} + expected = DataFrame(exp_data) + expected = expected.set_index( + ['id', 'year'])[["X", "A(quarterly)", "B(quarterly)"]] + result = wide_to_long(df, ["A(quarterly)", "B(quarterly)"], + i="id", j="year") + tm.assert_frame_equal(result, expected) + + def test_unbalanced(self): + # test that we can have a varying amount of time variables + df = pd.DataFrame({'A2010': [1.0, 2.0], + 'A2011': [3.0, 4.0], + 'B2010': [5.0, 6.0], + 'X': ['X1', 'X2']}) + df['id'] = df.index + exp_data = {'X': ['X1', 'X1', 'X2', 'X2'], + 'A': [1.0, 3.0, 2.0, 4.0], + 'B': [5.0, np.nan, 6.0, np.nan], + 'id': [0, 0, 1, 1], + 'year': [2010, 2011, 2010, 2011]} + expected = pd.DataFrame(exp_data) + expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] + result = wide_to_long(df, ['A', 'B'], i='id', j='year') + tm.assert_frame_equal(result, expected) + + def test_character_overlap(self): + # Test we handle overlapping characters in both id_vars and value_vars + df = pd.DataFrame({ + 'A11': ['a11', 'a22', 'a33'], + 'A12': ['a21', 'a22', 'a23'], + 'B11': ['b11', 'b12', 'b13'], + 'B12': ['b21', 'b22', 'b23'], + 'BB11': [1, 2, 3], + 'BB12': [4, 5, 6], + 'BBBX': [91, 92, 93], + 'BBBZ': [91, 92, 93] + }) + df['id'] = df.index + expected = pd.DataFrame({ + 'BBBX': [91, 92, 93, 91, 92, 93], + 'BBBZ': [91, 92, 93, 91, 92, 93], + 'A': ['a11', 'a22', 'a33', 'a21', 'a22', 'a23'], + 'B': ['b11', 'b12', 'b13', 'b21', 'b22', 'b23'], + 'BB': [1, 2, 3, 4, 5, 6], + 'id': [0, 1, 2, 0, 1, 2], + 'year': [11, 11, 11, 12, 12, 12]}) + expected = expected.set_index(['id', 'year'])[ + ['BBBX', 'BBBZ', 'A', 'B', 'BB']] + result = wide_to_long(df, ['A', 'B', 'BB'], i='id', j='year') + tm.assert_frame_equal(result.sort_index(axis=1), + expected.sort_index(axis=1)) + + def test_invalid_separator(self): + # if an invalid separator is supplied a empty data frame is returned + sep = 'nope!' + df = pd.DataFrame({'A2010': [1.0, 2.0], + 'A2011': [3.0, 4.0], + 'B2010': [5.0, 6.0], + 'X': ['X1', 'X2']}) + df['id'] = df.index + exp_data = {'X': '', + 'A2010': [], + 'A2011': [], + 'B2010': [], + 'id': [], + 'year': [], + 'A': [], + 'B': []} + expected = pd.DataFrame(exp_data).astype({'year': 'int'}) + expected = expected.set_index(['id', 'year'])[[ + 'X', 'A2010', 'A2011', 'B2010', 'A', 'B']] + expected.index.set_levels([0, 1], level=0, inplace=True) + result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep) + tm.assert_frame_equal(result.sort_index(axis=1), + expected.sort_index(axis=1)) + + def test_num_string_disambiguation(self): + # Test that we can disambiguate number value_vars from + # string value_vars + df = pd.DataFrame({ + 'A11': ['a11', 'a22', 'a33'], + 'A12': ['a21', 'a22', 'a23'], + 'B11': ['b11', 'b12', 'b13'], + 'B12': ['b21', 'b22', 'b23'], + 'BB11': [1, 2, 3], + 'BB12': [4, 5, 6], + 'Arating': [91, 92, 93], + 'Arating_old': [91, 92, 93] + }) + df['id'] = df.index + expected = pd.DataFrame({ + 'Arating': [91, 92, 93, 91, 92, 93], + 'Arating_old': [91, 92, 93, 91, 92, 93], + 'A': ['a11', 'a22', 'a33', 'a21', 'a22', 'a23'], + 'B': ['b11', 'b12', 'b13', 'b21', 'b22', 'b23'], + 'BB': [1, 2, 3, 4, 5, 6], + 'id': [0, 1, 2, 0, 1, 2], + 'year': [11, 11, 11, 12, 12, 12]}) + expected = expected.set_index(['id', 'year'])[ + ['Arating', 'Arating_old', 'A', 'B', 'BB']] + result = wide_to_long(df, ['A', 'B', 'BB'], i='id', j='year') + tm.assert_frame_equal(result.sort_index(axis=1), + expected.sort_index(axis=1)) + + def test_invalid_suffixtype(self): + # If all stubs names end with a string, but a numeric suffix is + # assumed, an empty data frame is returned + df = pd.DataFrame({'Aone': [1.0, 2.0], + 'Atwo': [3.0, 4.0], + 'Bone': [5.0, 6.0], + 'X': ['X1', 'X2']}) + df['id'] = df.index + exp_data = {'X': '', + 'Aone': [], + 'Atwo': [], + 'Bone': [], + 'id': [], + 'year': [], + 'A': [], + 'B': []} + expected = pd.DataFrame(exp_data).astype({'year': 'int'}) + + expected = expected.set_index(['id', 'year']) + expected.index.set_levels([0, 1], level=0, inplace=True) + result = wide_to_long(df, ['A', 'B'], i='id', j='year') + tm.assert_frame_equal(result.sort_index(axis=1), + expected.sort_index(axis=1)) + + def test_multiple_id_columns(self): + # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm + df = pd.DataFrame({ + 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], + 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], + 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] + }) + expected = pd.DataFrame({ + 'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8, + 2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9], + 'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], + 'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], + 'age': [1, 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, 2] + }) + expected = expected.set_index(['famid', 'birth', 'age'])[['ht']] + result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') + tm.assert_frame_equal(result, expected) + + def test_non_unique_idvars(self): + # GH16382 + # Raise an error message if non unique id vars (i) are passed + df = pd.DataFrame({ + 'A_A1': [1, 2, 3, 4, 5], + 'B_B1': [1, 2, 3, 4, 5], + 'x': [1, 1, 1, 1, 1] + }) + with pytest.raises(ValueError): + wide_to_long(df, ['A_A', 'B_B'], i='x', j='colname') + + def test_cast_j_int(self): + df = pd.DataFrame({ + 'actor_1': ['CCH Pounder', 'Johnny Depp', 'Christoph Waltz'], + 'actor_2': ['Joel David Moore', 'Orlando Bloom', 'Rory Kinnear'], + 'actor_fb_likes_1': [1000.0, 40000.0, 11000.0], + 'actor_fb_likes_2': [936.0, 5000.0, 393.0], + 'title': ['Avatar', "Pirates of the Caribbean", 'Spectre']}) + + expected = pd.DataFrame({ + 'actor': ['CCH Pounder', + 'Johnny Depp', + 'Christoph Waltz', + 'Joel David Moore', + 'Orlando Bloom', + 'Rory Kinnear'], + 'actor_fb_likes': [1000.0, 40000.0, 11000.0, 936.0, 5000.0, 393.0], + 'num': [1, 1, 1, 2, 2, 2], + 'title': ['Avatar', + 'Pirates of the Caribbean', + 'Spectre', + 'Avatar', + 'Pirates of the Caribbean', + 'Spectre']}).set_index(['title', 'num']) + result = wide_to_long(df, ['actor', 'actor_fb_likes'], + i='title', j='num', sep='_') + + tm.assert_frame_equal(result, expected) + + def test_identical_stubnames(self): + df = pd.DataFrame({'A2010': [1.0, 2.0], + 'A2011': [3.0, 4.0], + 'B2010': [5.0, 6.0], + 'A': ['X1', 'X2']}) + with pytest.raises(ValueError): + wide_to_long(df, ['A', 'B'], i='A', j='colname') + + def test_nonnumeric_suffix(self): + df = pd.DataFrame({'treatment_placebo': [1.0, 2.0], + 'treatment_test': [3.0, 4.0], + 'result_placebo': [5.0, 6.0], + 'A': ['X1', 'X2']}) + expected = pd.DataFrame({ + 'A': ['X1', 'X1', 'X2', 'X2'], + 'colname': ['placebo', 'test', 'placebo', 'test'], + 'result': [5.0, np.nan, 6.0, np.nan], + 'treatment': [1.0, 3.0, 2.0, 4.0]}) + expected = expected.set_index(['A', 'colname']) + result = wide_to_long(df, ['result', 'treatment'], + i='A', j='colname', suffix='[a-z]+', sep='_') + tm.assert_frame_equal(result, expected) + + def test_mixed_type_suffix(self): + df = pd.DataFrame({ + 'A': ['X1', 'X2'], + 'result_1': [0, 9], + 'result_foo': [5.0, 6.0], + 'treatment_1': [1.0, 2.0], + 'treatment_foo': [3.0, 4.0]}) + expected = pd.DataFrame({ + 'A': ['X1', 'X2', 'X1', 'X2'], + 'colname': ['1', '1', 'foo', 'foo'], + 'result': [0.0, 9.0, 5.0, 6.0], + 'treatment': [1.0, 2.0, 3.0, 4.0]}).set_index(['A', 'colname']) + result = wide_to_long(df, ['result', 'treatment'], + i='A', j='colname', suffix='.+', sep='_') + tm.assert_frame_equal(result, expected) + + def test_float_suffix(self): + df = pd.DataFrame({ + 'treatment_1.1': [1.0, 2.0], + 'treatment_2.1': [3.0, 4.0], + 'result_1.2': [5.0, 6.0], + 'result_1': [0, 9], + 'A': ['X1', 'X2']}) + expected = pd.DataFrame({ + 'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'], + 'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], + 'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan], + 'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]}) + expected = expected.set_index(['A', 'colname']) + result = wide_to_long(df, ['result', 'treatment'], + i='A', j='colname', suffix='[0-9.]+', sep='_') + tm.assert_frame_equal(result, expected) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tests/reshape/test_pivot.py similarity index 79% rename from pandas/tools/tests/test_pivot.py rename to pandas/tests/reshape/test_pivot.py index f5d91d0088306..786c57a4a82df 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1,19 +1,24 @@ + from datetime import datetime, date, timedelta +import pytest + + import numpy as np +from collections import OrderedDict import pandas as pd from pandas import (DataFrame, Series, Index, MultiIndex, - Grouper, date_range, concat) -from pandas.tools.pivot import pivot_table, crosstab + Grouper, date_range, concat, Categorical) +from pandas.core.reshape.pivot import pivot_table, crosstab from pandas.compat import range, product import pandas.util.testing as tm -from pandas.tseries.util import pivot_annual, isleapyear +from pandas.api.types import CategoricalDtype as CDT -class TestPivotTable(tm.TestCase): +class TestPivotTable(object): - def setUp(self): + def setup_method(self, method): self.data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], @@ -41,14 +46,14 @@ def test_pivot_table(self): pivot_table(self.data, values='D', index=index) if len(index) > 1: - self.assertEqual(table.index.names, tuple(index)) + assert table.index.names == tuple(index) else: - self.assertEqual(table.index.name, index[0]) + assert table.index.name == index[0] if len(columns) > 1: - self.assertEqual(table.columns.names, columns) + assert table.columns.names == columns else: - self.assertEqual(table.columns.name, columns[0]) + assert table.columns.name == columns[0] expected = self.data.groupby( index + [columns])['D'].agg(np.mean).unstack() @@ -86,6 +91,58 @@ def test_pivot_table_dropna(self): tm.assert_index_equal(pv_col.columns, m) tm.assert_index_equal(pv_ind.index, m) + def test_pivot_table_categorical(self): + + raw_cat1 = Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + raw_cat2 = Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]}) + result = pd.pivot_table(df, values='values', index=['A', 'B']) + + exp_index = pd.MultiIndex.from_product( + [Categorical(["a", "b", "z"], ordered=True), + Categorical(["c", "d", "y"], ordered=True)], + names=['A', 'B']) + expected = DataFrame( + {'values': [1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan]}, + index=exp_index) + tm.assert_frame_equal(result, expected) + + def test_pivot_table_dropna_categoricals(self): + # GH 15193 + categories = ['a', 'b', 'c', 'd'] + + df = DataFrame({'A': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c'], + 'B': [1, 2, 3, 1, 2, 3, 1, 2, 3], + 'C': range(0, 9)}) + + df['A'] = df['A'].astype(CDT(categories, ordered=False)) + result_true = df.pivot_table(index='B', columns='A', values='C', + dropna=True) + expected_columns = Series(['a', 'b', 'c'], name='A') + expected_columns = expected_columns.astype( + CDT(categories, ordered=False)) + expected_index = Series([1, 2, 3], name='B') + expected_true = DataFrame([[0.0, 3.0, 6.0], + [1.0, 4.0, 7.0], + [2.0, 5.0, 8.0]], + index=expected_index, + columns=expected_columns,) + tm.assert_frame_equal(expected_true, result_true) + + result_false = df.pivot_table(index='B', columns='A', values='C', + dropna=False) + expected_columns = ( + Series(['a', 'b', 'c', 'd'], name='A').astype('category') + ) + expected_false = DataFrame([[0.0, 3.0, 6.0, np.NaN], + [1.0, 4.0, 7.0, np.NaN], + [2.0, 5.0, 8.0, np.NaN]], + index=expected_index, + columns=expected_columns,) + tm.assert_frame_equal(expected_false, result_false) + def test_pass_array(self): result = self.data.pivot_table( 'D', index=self.data.A, columns=self.data.C) @@ -111,7 +168,7 @@ def test_pivot_dtypes(self): # can convert dtypes f = DataFrame({'a': ['cat', 'bat', 'cat', 'bat'], 'v': [ 1, 2, 3, 4], 'i': ['a', 'b', 'a', 'b']}) - self.assertEqual(f.dtypes['v'], 'int64') + assert f.dtypes['v'] == 'int64' z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.sum) @@ -122,7 +179,7 @@ def test_pivot_dtypes(self): # cannot convert dtypes f = DataFrame({'a': ['cat', 'bat', 'cat', 'bat'], 'v': [ 1.5, 2.5, 3.5, 4.5], 'i': ['a', 'b', 'a', 'b']}) - self.assertEqual(f.dtypes['v'], 'float64') + assert f.dtypes['v'] == 'float64' z = pivot_table(f, values='v', index=['a'], columns=[ 'i'], fill_value=0, aggfunc=np.mean) @@ -130,6 +187,24 @@ def test_pivot_dtypes(self): expected = Series(dict(float64=2)) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('columns,values', + [('bool1', ['float1', 'float2']), + ('bool1', ['float1', 'float2', 'bool1']), + ('bool2', ['float1', 'float2', 'bool1'])]) + def test_pivot_preserve_dtypes(self, columns, values): + # GH 7142 regression test + v = np.arange(5, dtype=np.float64) + df = DataFrame({'float1': v, 'float2': v + 2.0, + 'bool1': v <= 2, 'bool2': v <= 3}) + + df_res = df.reset_index().pivot_table( + index='index', columns=columns, values=values) + + result = dict(df_res.dtypes) + expected = {col: np.dtype('O') if col[0].startswith('b') + else np.dtype('float64') for col in df_res} + assert result == expected + def test_pivot_no_values(self): # GH 14380 idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-01-02', @@ -212,10 +287,10 @@ def test_pivot_index_with_nan(self): df.loc[1, 'b'] = df.loc[4, 'b'] = nan pv = df.pivot('a', 'b', 'c') - self.assertEqual(pv.notnull().values.sum(), len(df)) + assert pv.notna().values.sum() == len(df) for _, row in df.iterrows(): - self.assertEqual(pv.loc[row['a'], row['b']], row['c']) + assert pv.loc[row['a'], row['b']] == row['c'] tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T) @@ -304,7 +379,7 @@ def _check_output(result, values_col, index=['A', 'B'], expected_col_margins = self.data.groupby(index)[values_col].mean() tm.assert_series_equal(col_margins, expected_col_margins, check_names=False) - self.assertEqual(col_margins.name, margins_col) + assert col_margins.name == margins_col result = result.sort_index() index_margins = result.loc[(margins_col, '')].iloc[:-1] @@ -312,11 +387,11 @@ def _check_output(result, values_col, index=['A', 'B'], expected_ix_margins = self.data.groupby(columns)[values_col].mean() tm.assert_series_equal(index_margins, expected_ix_margins, check_names=False) - self.assertEqual(index_margins.name, (margins_col, '')) + assert index_margins.name == (margins_col, '') grand_total_margins = result.loc[(margins_col, ''), margins_col] expected_total_margins = self.data[values_col].mean() - self.assertEqual(grand_total_margins, expected_total_margins) + assert grand_total_margins == expected_total_margins # column specified result = self.data.pivot_table(values='D', index=['A', 'B'], @@ -345,18 +420,18 @@ def _check_output(result, values_col, index=['A', 'B'], aggfunc=np.mean) for value_col in table.columns: totals = table.loc[('All', ''), value_col] - self.assertEqual(totals, self.data[value_col].mean()) + assert totals == self.data[value_col].mean() # no rows rtable = self.data.pivot_table(columns=['AA', 'BB'], margins=True, aggfunc=np.mean) - tm.assertIsInstance(rtable, Series) + assert isinstance(rtable, Series) table = self.data.pivot_table(index=['AA', 'BB'], margins=True, aggfunc='mean') for item in ['DD', 'EE', 'FF']: totals = table.loc[('All', ''), item] - self.assertEqual(totals, self.data[item].mean()) + assert totals == self.data[item].mean() # issue number #8349: pivot_table with margins and dictionary aggfunc data = [ @@ -404,6 +479,41 @@ def _check_output(result, values_col, index=['A', 'B'], tm.assert_frame_equal(result['SALARY'], expected['SALARY']) + def test_margins_dtype(self): + # GH 17013 + + df = self.data.copy() + df[['D', 'E', 'F']] = np.arange(len(df) * 3).reshape(len(df), 3) + + mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')] + mi = MultiIndex.from_tuples(mi_val, names=('A', 'B')) + expected = DataFrame({'dull': [12, 21, 3, 9, 45], + 'shiny': [33, 0, 36, 51, 120]}, + index=mi).rename_axis('C', axis=1) + expected['All'] = expected['dull'] + expected['shiny'] + + result = df.pivot_table(values='D', index=['A', 'B'], + columns='C', margins=True, + aggfunc=np.sum, fill_value=0) + + tm.assert_frame_equal(expected, result) + + @pytest.mark.xfail(reason='GH 17035 (len of floats is casted back to ' + 'floats)') + def test_margins_dtype_len(self): + mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')] + mi = MultiIndex.from_tuples(mi_val, names=('A', 'B')) + expected = DataFrame({'dull': [1, 1, 2, 1, 5], + 'shiny': [2, 0, 2, 2, 6]}, + index=mi).rename_axis('C', axis=1) + expected['All'] = expected['dull'] + expected['shiny'] + + result = self.data.pivot_table(values='D', index=['A', 'B'], + columns='C', margins=True, + aggfunc=len, fill_value=0) + + tm.assert_frame_equal(expected, result) + def test_pivot_integer_columns(self): # caused by upstream bug in unstack @@ -477,10 +587,10 @@ def test_pivot_columns_lexsorted(self): columns=['Index', 'Symbol', 'Year'], aggfunc='mean') - self.assertTrue(pivoted.columns.is_monotonic) + assert pivoted.columns.is_monotonic def test_pivot_complex_aggfunc(self): - f = {'D': ['std'], 'E': ['sum']} + f = OrderedDict([('D', ['std']), ('E', ['sum'])]) expected = self.data.groupby(['A', 'B']).agg(f).unstack('B') result = self.data.pivot_table(index='A', columns='B', aggfunc=f) @@ -491,21 +601,21 @@ def test_margins_no_values_no_cols(self): result = self.data[['A', 'B']].pivot_table( index=['A', 'B'], aggfunc=len, margins=True) result_list = result.tolist() - self.assertEqual(sum(result_list[:-1]), result_list[-1]) + assert sum(result_list[:-1]) == result_list[-1] def test_margins_no_values_two_rows(self): # Regression test on pivot table: no values passed but rows are a # multi-index result = self.data[['A', 'B', 'C']].pivot_table( index=['A', 'B'], columns='C', aggfunc=len, margins=True) - self.assertEqual(result.All.tolist(), [3.0, 1.0, 4.0, 3.0, 11.0]) + assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0] def test_margins_no_values_one_row_one_col(self): # Regression test on pivot table: no values passed but row and col # defined result = self.data[['A', 'B']].pivot_table( index='A', columns='B', aggfunc=len, margins=True) - self.assertEqual(result.All.tolist(), [4.0, 7.0, 11.0]) + assert result.All.tolist() == [4.0, 7.0, 11.0] def test_margins_no_values_two_row_two_cols(self): # Regression test on pivot table: no values passed but rows and cols @@ -514,22 +624,22 @@ def test_margins_no_values_two_row_two_cols(self): 'e', 'f', 'g', 'h', 'i', 'j', 'k'] result = self.data[['A', 'B', 'C', 'D']].pivot_table( index=['A', 'B'], columns=['C', 'D'], aggfunc=len, margins=True) - self.assertEqual(result.All.tolist(), [3.0, 1.0, 4.0, 3.0, 11.0]) + assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0] def test_pivot_table_with_margins_set_margin_name(self): - # GH 3335 + # see gh-3335 for margin_name in ['foo', 'one', 666, None, ['a', 'b']]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): # multi-index index pivot_table(self.data, values='D', index=['A', 'B'], columns=['C'], margins=True, margins_name=margin_name) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): # multi-index column pivot_table(self.data, values='D', index=['C'], columns=['A', 'B'], margins=True, margins_name=margin_name) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): # non-multi-index index/column pivot_table(self.data, values='D', index=['A'], columns=['B'], margins=True, @@ -592,10 +702,10 @@ def test_pivot_timegrouper(self): values='Quantity', aggfunc=np.sum) tm.assert_frame_equal(result, expected.T) - self.assertRaises(KeyError, lambda: pivot_table( + pytest.raises(KeyError, lambda: pivot_table( df, index=Grouper(freq='6MS', key='foo'), columns='Buyer', values='Quantity', aggfunc=np.sum)) - self.assertRaises(KeyError, lambda: pivot_table( + pytest.raises(KeyError, lambda: pivot_table( df, index='Buyer', columns=Grouper(freq='6MS', key='foo'), values='Quantity', aggfunc=np.sum)) @@ -612,10 +722,10 @@ def test_pivot_timegrouper(self): values='Quantity', aggfunc=np.sum) tm.assert_frame_equal(result, expected.T) - self.assertRaises(ValueError, lambda: pivot_table( + pytest.raises(ValueError, lambda: pivot_table( df, index=Grouper(freq='6MS', level='foo'), columns='Buyer', values='Quantity', aggfunc=np.sum)) - self.assertRaises(ValueError, lambda: pivot_table( + pytest.raises(ValueError, lambda: pivot_table( df, index='Buyer', columns=Grouper(freq='6MS', level='foo'), values='Quantity', aggfunc=np.sum)) @@ -798,6 +908,40 @@ def test_pivot_dtaccessor(self): index=['X', 'Y'], columns=exp_col) tm.assert_frame_equal(result, expected) + def test_daily(self): + rng = date_range('1/1/2000', '12/31/2004', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + + annual = pivot_table(DataFrame(ts), index=ts.index.year, + columns=ts.index.dayofyear) + annual.columns = annual.columns.droplevel(0) + + doy = np.asarray(ts.index.dayofyear) + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = subset.index.year + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + assert result.name == i + + def test_monthly(self): + rng = date_range('1/1/2000', '12/31/2004', freq='M') + ts = Series(np.random.randn(len(rng)), index=rng) + + annual = pivot_table(pd.DataFrame(ts), index=ts.index.year, + columns=ts.index.month) + annual.columns = annual.columns.droplevel(0) + + month = ts.index.month + for i in range(1, 13): + subset = ts[month == i] + subset.index = subset.index.year + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + assert result.name == i + def test_pivot_table_with_iterator_values(self): # GH 12017 aggs = {'D': 'sum', 'E': 'mean'} @@ -839,6 +983,8 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = pd.DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) + @pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to ' + 'ints)') def test_categorical_margins(self): # GH 10989 df = pd.DataFrame({'x': np.arange(8), @@ -849,14 +995,23 @@ def test_categorical_margins(self): expected.index = Index([0, 1, 'All'], name='y') expected.columns = Index([0, 1, 'All'], name='z') - data = df.copy() - table = data.pivot_table('x', 'y', 'z', margins=True) + table = df.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) - data = df.copy() - data.y = data.y.astype('category') - data.z = data.z.astype('category') - table = data.pivot_table('x', 'y', 'z', margins=True) + @pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to ' + 'ints)') + def test_categorical_margins_category(self): + df = pd.DataFrame({'x': np.arange(8), + 'y': np.arange(8) // 4, + 'z': np.arange(8) % 2}) + + expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected.index = Index([0, 1, 'All'], name='y') + expected.columns = Index([0, 1, 'All'], name='z') + + df.y = df.y.astype('category') + df.z = df.z.astype('category') + table = df.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) def test_categorical_aggfunc(self): @@ -906,10 +1061,103 @@ def test_categorical_pivot_index_ordering(self): columns=expected_columns) tm.assert_frame_equal(result, expected) + def test_pivot_table_not_series(self): + # GH 4386 + # pivot_table always returns a DataFrame + # when values is not list like and columns is None + # and aggfunc is not instance of list + df = DataFrame({'col1': [3, 4, 5], + 'col2': ['C', 'D', 'E'], + 'col3': [1, 3, 9]}) + + result = df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) + m = MultiIndex.from_arrays([[1, 3, 9], + ['C', 'D', 'E']], + names=['col3', 'col2']) + expected = DataFrame([3, 4, 5], + index=m, columns=['col1']) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table( + 'col1', index='col3', columns='col2', aggfunc=np.sum + ) + expected = DataFrame([[3, np.NaN, np.NaN], + [np.NaN, 4, np.NaN], + [np.NaN, np.NaN, 5]], + index=Index([1, 3, 9], name='col3'), + columns=Index(['C', 'D', 'E'], name='col2')) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table('col1', index='col3', aggfunc=[np.sum]) + m = MultiIndex.from_arrays([['sum'], + ['col1']]) + expected = DataFrame([3, 4, 5], + index=Index([1, 3, 9], name='col3'), + columns=m) + + tm.assert_frame_equal(result, expected) + + def test_pivot_margins_name_unicode(self): + # issue #13292 + greek = u'\u0394\u03bf\u03ba\u03b9\u03bc\u03ae' + frame = pd.DataFrame({'foo': [1, 2, 3]}) + table = pd.pivot_table(frame, index=['foo'], aggfunc=len, margins=True, + margins_name=greek) + index = pd.Index([1, 2, 3, greek], dtype='object', name='foo') + expected = pd.DataFrame(index=index) + tm.assert_frame_equal(table, expected) + + def test_pivot_string_as_func(self): + # GH #18713 + # for correctness purposes + data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', + 'bar', 'bar', 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', + 'one', 'two', 'two', 'two', 'one'], + 'C': range(11)}) + + result = pivot_table(data, index='A', columns='B', aggfunc='sum') + mi = MultiIndex(levels=[['C'], ['one', 'two']], + labels=[[0, 0], [0, 1]], names=[None, 'B']) + expected = DataFrame({('C', 'one'): {'bar': 15, 'foo': 13}, + ('C', 'two'): {'bar': 7, 'foo': 20}}, + columns=mi).rename_axis('A') + tm.assert_frame_equal(result, expected) + + result = pivot_table(data, index='A', columns='B', + aggfunc=['sum', 'mean']) + mi = MultiIndex(levels=[['sum', 'mean'], ['C'], ['one', 'two']], + labels=[[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 1]], + names=[None, None, 'B']) + expected = DataFrame({('mean', 'C', 'one'): {'bar': 5.0, 'foo': 3.25}, + ('mean', 'C', 'two'): {'bar': 7.0, + 'foo': 6.666666666666667}, + ('sum', 'C', 'one'): {'bar': 15, 'foo': 13}, + ('sum', 'C', 'two'): {'bar': 7, 'foo': 20}}, + columns=mi).rename_axis('A') + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('f, f_numpy', + [('sum', np.sum), + ('mean', np.mean), + ('std', np.std), + (['sum', 'mean'], [np.sum, np.mean]), + (['sum', 'std'], [np.sum, np.std]), + (['std', 'mean'], [np.std, np.mean])]) + def test_pivot_string_func_vs_func(self, f, f_numpy): + # GH #18713 + # for consistency purposes + result = pivot_table(self.data, index='A', columns='B', aggfunc=f) + expected = pivot_table(self.data, index='A', columns='B', + aggfunc=f_numpy) + tm.assert_frame_equal(result, expected) + -class TestCrosstab(tm.TestCase): +class TestCrosstab(object): - def setUp(self): + def setup_method(self, method): df = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], @@ -962,8 +1210,24 @@ def test_crosstab_ndarray(self): # assign arbitrary names result = crosstab(self.df['A'].values, self.df['C'].values) - self.assertEqual(result.index.name, 'row_0') - self.assertEqual(result.columns.name, 'col_0') + assert result.index.name == 'row_0' + assert result.columns.name == 'col_0' + + def test_crosstab_non_aligned(self): + # GH 17005 + a = pd.Series([0, 1, 1], index=['a', 'b', 'c']) + b = pd.Series([3, 4, 3, 4, 3], index=['a', 'b', 'c', 'd', 'f']) + c = np.array([3, 4, 3]) + + expected = pd.DataFrame([[1, 0], [1, 1]], + index=Index([0, 1], name='row_0'), + columns=Index([3, 4], name='col_0')) + + result = crosstab(a, b) + tm.assert_frame_equal(result, expected) + + result = crosstab(a, c) + tm.assert_frame_equal(result, expected) def test_crosstab_margins(self): a = np.random.randint(0, 7, size=100) @@ -975,8 +1239,8 @@ def test_crosstab_margins(self): result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), margins=True) - self.assertEqual(result.index.names, ('a',)) - self.assertEqual(result.columns.names, ['b', 'c']) + assert result.index.names == ('a',) + assert result.columns.names == ['b', 'c'] all_cols = result['All', ''] exp_cols = df.groupby(['a']).size().astype('i8') @@ -996,6 +1260,43 @@ def test_crosstab_margins(self): exp_rows = exp_rows.fillna(0).astype(np.int64) tm.assert_series_equal(all_rows, exp_rows) + def test_crosstab_margins_set_margin_name(self): + # GH 15972 + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + + df = DataFrame({'a': a, 'b': b, 'c': c}) + + result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), + margins=True, margins_name='TOTAL') + + assert result.index.names == ('a',) + assert result.columns.names == ['b', 'c'] + + all_cols = result['TOTAL', ''] + exp_cols = df.groupby(['a']).size().astype('i8') + # to keep index.name + exp_margin = Series([len(df)], index=Index(['TOTAL'], name='a')) + exp_cols = exp_cols.append(exp_margin) + exp_cols.name = ('TOTAL', '') + + tm.assert_series_equal(all_cols, exp_cols) + + all_rows = result.loc['TOTAL'] + exp_rows = df.groupby(['b', 'c']).size().astype('i8') + exp_rows = exp_rows.append(Series([len(df)], index=[('TOTAL', '')])) + exp_rows.name = 'TOTAL' + + exp_rows = exp_rows.reindex(all_rows.index) + exp_rows = exp_rows.fillna(0).astype(np.int64) + tm.assert_series_equal(all_rows, exp_rows) + + for margins_name in [666, None, ['a', 'b']]: + with pytest.raises(ValueError): + crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), + margins=True, margins_name=margins_name) + def test_crosstab_pass_values(self): a = np.random.randint(0, 7, size=100) b = np.random.randint(0, 3, size=100) @@ -1246,22 +1547,22 @@ def test_crosstab_errors(self): 'c': [1, 1, np.nan, 1, 1]}) error = 'values cannot be used without an aggfunc.' - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, values=df.c) error = 'aggfunc cannot be used without values' - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, aggfunc=np.mean) error = 'Not a valid normalize argument' - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, normalize='42') - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, normalize=42) error = 'Not a valid margins argument' - with tm.assertRaisesRegexp(ValueError, error): + with tm.assert_raises_regex(ValueError, error): pd.crosstab(df.a, df.b, normalize='all', margins=42) def test_crosstab_with_categorial_columns(self): @@ -1321,105 +1622,19 @@ def test_crosstab_with_numpy_size(self): columns=expected_column) tm.assert_frame_equal(result, expected) + def test_crosstab_dup_index_names(self): + # GH 13279, GH 18872 + s = pd.Series(range(3), name='foo') + pytest.raises(ValueError, pd.crosstab, s, s) -class TestPivotAnnual(tm.TestCase): - """ - New pandas of scikits.timeseries pivot_annual - """ - - def test_daily(self): - rng = date_range('1/1/2000', '12/31/2004', freq='D') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'D') - - doy = ts.index.dayofyear - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 - - for i in range(1, 367): - subset = ts[doy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - # check leap days - leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] - day = leaps.index.dayofyear[0] - leaps.index = leaps.index.year - leaps.name = 60 - tm.assert_series_equal(annual[day].dropna(), leaps) - - def test_hourly(self): - rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), - freq='H') - data_hourly = np.random.randint(100, 350, rng_hourly.size) - ts_hourly = Series(data_hourly, index=rng_hourly) - - grouped = ts_hourly.groupby(ts_hourly.index.year) - hoy = grouped.apply(lambda x: x.reset_index(drop=True)) - hoy = hoy.index.droplevel(0).values - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 - hoy += 1 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts_hourly) - - ts_hourly = ts_hourly.astype(float) - for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: - subset = ts_hourly[hoy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - leaps = ts_hourly[(ts_hourly.index.month == 2) & ( - ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] - hour = leaps.index.dayofyear[0] * 24 - 23 - leaps.index = leaps.index.year - leaps.name = 1417 - tm.assert_series_equal(annual[hour].dropna(), leaps) - - def test_weekly(self): - pass - - def test_monthly(self): - rng = date_range('1/1/2000', '12/31/2004', freq='M') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'M') - - month = ts.index.month - for i in range(1, 13): - subset = ts[month == i] - subset.index = [x.year for x in subset.index] - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - def test_period_monthly(self): - pass - - def test_period_daily(self): - pass + @pytest.mark.parametrize("names", [['a', ('b', 'c')], + [('a', 'b'), 'c']]) + def test_crosstab_tuple_name(self, names): + s1 = pd.Series(range(3), name=names[0]) + s2 = pd.Series(range(1, 4), name=names[1]) - def test_period_weekly(self): - pass - - def test_isleapyear_deprecate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2000)) + mi = pd.MultiIndex.from_arrays([range(3), range(1, 4)], names=names) + expected = pd.Series(1, index=mi).unstack(1, fill_value=0) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertFalse(isleapyear(2001)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2004)) + result = pd.crosstab(s1, s2) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py new file mode 100644 index 0000000000000..c4d925b83585b --- /dev/null +++ b/pandas/tests/reshape/test_reshape.py @@ -0,0 +1,509 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=W0612,E1101 + +from warnings import catch_warnings +import pytest +from collections import OrderedDict + +from pandas import DataFrame, Series +import pandas as pd + +from numpy import nan +import numpy as np + +from pandas.util.testing import assert_frame_equal + +from pandas import get_dummies, Categorical, Index +import pandas.util.testing as tm +from pandas.compat import u + + +class TestGetDummies(object): + + @pytest.fixture + def df(self): + return DataFrame({'A': ['a', 'b', 'a'], + 'B': ['b', 'b', 'c'], + 'C': [1, 2, 3]}) + + @pytest.fixture(params=['uint8', 'i8', np.float64, bool, None]) + def dtype(self, request): + return np.dtype(request.param) + + @pytest.fixture(params=['dense', 'sparse']) + def sparse(self, request): + # params are strings to simplify reading test results, + # e.g. TestGetDummies::test_basic[uint8-sparse] instead of [uint8-True] + return request.param == 'sparse' + + def effective_dtype(self, dtype): + if dtype is None: + return np.uint8 + return dtype + + def test_raises_on_dtype_object(self, df): + with pytest.raises(ValueError): + get_dummies(df, dtype='object') + + def test_basic(self, sparse, dtype): + s_list = list('abc') + s_series = Series(s_list) + s_series_index = Series(s_list, list('ABC')) + + expected = DataFrame({'a': [1, 0, 0], + 'b': [0, 1, 0], + 'c': [0, 0, 1]}, + dtype=self.effective_dtype(dtype)) + result = get_dummies(s_list, sparse=sparse, dtype=dtype) + assert_frame_equal(result, expected) + + result = get_dummies(s_series, sparse=sparse, dtype=dtype) + assert_frame_equal(result, expected) + + expected.index = list('ABC') + result = get_dummies(s_series_index, sparse=sparse, dtype=dtype) + assert_frame_equal(result, expected) + + def test_basic_types(self, sparse, dtype): + # GH 10531 + s_list = list('abc') + s_series = Series(s_list) + s_df = DataFrame({'a': [0, 1, 0, 1, 2], + 'b': ['A', 'A', 'B', 'C', 'C'], + 'c': [2, 3, 3, 3, 2]}) + + expected = DataFrame({'a': [1, 0, 0], + 'b': [0, 1, 0], + 'c': [0, 0, 1]}, + dtype=self.effective_dtype(dtype), + columns=list('abc')) + if not sparse: + compare = tm.assert_frame_equal + else: + expected = expected.to_sparse(fill_value=0, kind='integer') + compare = tm.assert_sp_frame_equal + + result = get_dummies(s_list, sparse=sparse, dtype=dtype) + compare(result, expected) + + result = get_dummies(s_series, sparse=sparse, dtype=dtype) + compare(result, expected) + + result = get_dummies(s_df, columns=s_df.columns, + sparse=sparse, dtype=dtype) + tm.assert_series_equal(result.get_dtype_counts(), + Series({dtype.name: 8})) + + result = get_dummies(s_df, columns=['a'], sparse=sparse, dtype=dtype) + dtype_name = self.effective_dtype(dtype).name + + expected_counts = {'int64': 1, 'object': 1} + expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) + + expected = Series(expected_counts).sort_index() + tm.assert_series_equal(result.get_dtype_counts().sort_index(), + expected) + + def test_just_na(self, sparse): + just_na_list = [np.nan] + just_na_series = Series(just_na_list) + just_na_series_index = Series(just_na_list, index=['A']) + + res_list = get_dummies(just_na_list, sparse=sparse) + res_series = get_dummies(just_na_series, sparse=sparse) + res_series_index = get_dummies(just_na_series_index, sparse=sparse) + + assert res_list.empty + assert res_series.empty + assert res_series_index.empty + + assert res_list.index.tolist() == [0] + assert res_series.index.tolist() == [0] + assert res_series_index.index.tolist() == ['A'] + + def test_include_na(self, sparse, dtype): + if sparse: + pytest.xfail(reason='nan in index is problematic (GH 16894)') + + s = ['a', 'b', np.nan] + res = get_dummies(s, sparse=sparse, dtype=dtype) + exp = DataFrame({'a': [1, 0, 0], + 'b': [0, 1, 0]}, + dtype=self.effective_dtype(dtype)) + assert_frame_equal(res, exp) + + # Sparse dataframes do not allow nan labelled columns, see #GH8822 + res_na = get_dummies(s, dummy_na=True, sparse=sparse, dtype=dtype) + exp_na = DataFrame({nan: [0, 0, 1], + 'a': [1, 0, 0], + 'b': [0, 1, 0]}, + dtype=self.effective_dtype(dtype)) + exp_na = exp_na.reindex(['a', 'b', nan], axis=1) + # hack (NaN handling in assert_index_equal) + exp_na.columns = res_na.columns + assert_frame_equal(res_na, exp_na) + + res_just_na = get_dummies([nan], dummy_na=True, + sparse=sparse, dtype=dtype) + exp_just_na = DataFrame(Series(1, index=[0]), columns=[nan], + dtype=self.effective_dtype(dtype)) + tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values) + + def test_unicode(self, sparse): + # See GH 6885 - get_dummies chokes on unicode values + import unicodedata + e = 'e' + eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE') + s = [e, eacute, eacute] + res = get_dummies(s, prefix='letter', sparse=sparse) + exp = DataFrame({'letter_e': [1, 0, 0], + u('letter_%s') % eacute: [0, 1, 1]}, + dtype=np.uint8) + assert_frame_equal(res, exp) + + def test_dataframe_dummies_all_obj(self, df, sparse): + df = df[['A', 'B']] + result = get_dummies(df, sparse=sparse) + expected = DataFrame({'A_a': [1, 0, 1], + 'A_b': [0, 1, 0], + 'B_b': [1, 1, 0], + 'B_c': [0, 0, 1]}, + dtype=np.uint8) + assert_frame_equal(result, expected) + + def test_dataframe_dummies_mix_default(self, df, sparse, dtype): + result = get_dummies(df, sparse=sparse, dtype=dtype) + expected = DataFrame({'C': [1, 2, 3], + 'A_a': [1, 0, 1], + 'A_b': [0, 1, 0], + 'B_b': [1, 1, 0], + 'B_c': [0, 0, 1]}) + cols = ['A_a', 'A_b', 'B_b', 'B_c'] + expected[cols] = expected[cols].astype(dtype) + expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']] + assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_list(self, df, sparse): + prefixes = ['from_A', 'from_B'] + result = get_dummies(df, prefix=prefixes, sparse=sparse) + expected = DataFrame({'C': [1, 2, 3], + 'from_A_a': [1, 0, 1], + 'from_A_b': [0, 1, 0], + 'from_B_b': [1, 1, 0], + 'from_B_c': [0, 0, 1]}, + dtype=np.uint8) + expected[['C']] = df[['C']] + expected = expected[['C', 'from_A_a', 'from_A_b', + 'from_B_b', 'from_B_c']] + assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_str(self, df, sparse): + # not that you should do this... + result = get_dummies(df, prefix='bad', sparse=sparse) + bad_columns = ['bad_a', 'bad_b', 'bad_b', 'bad_c'] + expected = DataFrame([[1, 1, 0, 1, 0], + [2, 0, 1, 1, 0], + [3, 1, 0, 0, 1]], + columns=['C'] + bad_columns, + dtype=np.uint8) + expected = expected.astype({"C": np.int64}) + assert_frame_equal(result, expected) + + def test_dataframe_dummies_subset(self, df, sparse): + result = get_dummies(df, prefix=['from_A'], columns=['A'], + sparse=sparse) + expected = DataFrame({'B': ['b', 'b', 'c'], + 'C': [1, 2, 3], + 'from_A_a': [1, 0, 1], + 'from_A_b': [0, 1, 0]}, dtype=np.uint8) + expected[['C']] = df[['C']] + assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_sep(self, df, sparse): + result = get_dummies(df, prefix_sep='..', sparse=sparse) + expected = DataFrame({'C': [1, 2, 3], + 'A..a': [1, 0, 1], + 'A..b': [0, 1, 0], + 'B..b': [1, 1, 0], + 'B..c': [0, 0, 1]}, + dtype=np.uint8) + expected[['C']] = df[['C']] + expected = expected[['C', 'A..a', 'A..b', 'B..b', 'B..c']] + assert_frame_equal(result, expected) + + result = get_dummies(df, prefix_sep=['..', '__'], sparse=sparse) + expected = expected.rename(columns={'B..b': 'B__b', 'B..c': 'B__c'}) + assert_frame_equal(result, expected) + + result = get_dummies(df, prefix_sep={'A': '..', 'B': '__'}, + sparse=sparse) + assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_bad_length(self, df, sparse): + with pytest.raises(ValueError): + get_dummies(df, prefix=['too few'], sparse=sparse) + + def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse): + with pytest.raises(ValueError): + get_dummies(df, prefix_sep=['bad'], sparse=sparse) + + def test_dataframe_dummies_prefix_dict(self, sparse): + prefixes = {'A': 'from_A', 'B': 'from_B'} + df = DataFrame({'C': [1, 2, 3], + 'A': ['a', 'b', 'a'], + 'B': ['b', 'b', 'c']}) + result = get_dummies(df, prefix=prefixes, sparse=sparse) + + expected = DataFrame({'C': [1, 2, 3], + 'from_A_a': [1, 0, 1], + 'from_A_b': [0, 1, 0], + 'from_B_b': [1, 1, 0], + 'from_B_c': [0, 0, 1]}) + + columns = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c'] + expected[columns] = expected[columns].astype(np.uint8) + assert_frame_equal(result, expected) + + def test_dataframe_dummies_with_na(self, df, sparse, dtype): + df.loc[3, :] = [np.nan, np.nan, np.nan] + result = get_dummies(df, dummy_na=True, + sparse=sparse, dtype=dtype).sort_index(axis=1) + expected = DataFrame({'C': [1, 2, 3, np.nan], + 'A_a': [1, 0, 1, 0], + 'A_b': [0, 1, 0, 0], + 'A_nan': [0, 0, 0, 1], + 'B_b': [1, 1, 0, 0], + 'B_c': [0, 0, 1, 0], + 'B_nan': [0, 0, 0, 1]}).sort_index(axis=1) + + e_dtype = self.effective_dtype(dtype) + columns = ['A_a', 'A_b', 'A_nan', 'B_b', 'B_c', 'B_nan'] + expected[columns] = expected[columns].astype(e_dtype) + assert_frame_equal(result, expected) + + result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype) + expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']] + assert_frame_equal(result, expected) + + def test_dataframe_dummies_with_categorical(self, df, sparse, dtype): + df['cat'] = pd.Categorical(['x', 'y', 'y']) + result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1) + expected = DataFrame({'C': [1, 2, 3], + 'A_a': [1, 0, 1], + 'A_b': [0, 1, 0], + 'B_b': [1, 1, 0], + 'B_c': [0, 0, 1], + 'cat_x': [1, 0, 0], + 'cat_y': [0, 1, 1]}).sort_index(axis=1) + + columns = ['A_a', 'A_b', 'B_b', 'B_c', 'cat_x', 'cat_y'] + effective_dtype = self.effective_dtype(dtype) + expected[columns] = expected[columns].astype(effective_dtype) + expected.sort_index(axis=1) + assert_frame_equal(result, expected) + + def test_basic_drop_first(self, sparse): + # GH12402 Add a new parameter `drop_first` to avoid collinearity + # Basic case + s_list = list('abc') + s_series = Series(s_list) + s_series_index = Series(s_list, list('ABC')) + + expected = DataFrame({'b': [0, 1, 0], + 'c': [0, 0, 1]}, + dtype=np.uint8) + + result = get_dummies(s_list, drop_first=True, sparse=sparse) + assert_frame_equal(result, expected) + + result = get_dummies(s_series, drop_first=True, sparse=sparse) + assert_frame_equal(result, expected) + + expected.index = list('ABC') + result = get_dummies(s_series_index, drop_first=True, sparse=sparse) + assert_frame_equal(result, expected) + + def test_basic_drop_first_one_level(self, sparse): + # Test the case that categorical variable only has one level. + s_list = list('aaa') + s_series = Series(s_list) + s_series_index = Series(s_list, list('ABC')) + + expected = DataFrame(index=np.arange(3)) + + result = get_dummies(s_list, drop_first=True, sparse=sparse) + assert_frame_equal(result, expected) + + result = get_dummies(s_series, drop_first=True, sparse=sparse) + assert_frame_equal(result, expected) + + expected = DataFrame(index=list('ABC')) + result = get_dummies(s_series_index, drop_first=True, sparse=sparse) + assert_frame_equal(result, expected) + + def test_basic_drop_first_NA(self, sparse): + # Test NA handling together with drop_first + s_NA = ['a', 'b', np.nan] + res = get_dummies(s_NA, drop_first=True, sparse=sparse) + exp = DataFrame({'b': [0, 1, 0]}, dtype=np.uint8) + assert_frame_equal(res, exp) + + res_na = get_dummies(s_NA, dummy_na=True, drop_first=True, + sparse=sparse) + exp_na = DataFrame( + {'b': [0, 1, 0], + nan: [0, 0, 1]}, + dtype=np.uint8).reindex(['b', nan], axis=1) + assert_frame_equal(res_na, exp_na) + + res_just_na = get_dummies([nan], dummy_na=True, drop_first=True, + sparse=sparse) + exp_just_na = DataFrame(index=np.arange(1)) + assert_frame_equal(res_just_na, exp_just_na) + + def test_dataframe_dummies_drop_first(self, df, sparse): + df = df[['A', 'B']] + result = get_dummies(df, drop_first=True, sparse=sparse) + expected = DataFrame({'A_b': [0, 1, 0], + 'B_c': [0, 0, 1]}, + dtype=np.uint8) + assert_frame_equal(result, expected) + + def test_dataframe_dummies_drop_first_with_categorical( + self, df, sparse, dtype): + df['cat'] = pd.Categorical(['x', 'y', 'y']) + result = get_dummies(df, drop_first=True, sparse=sparse) + expected = DataFrame({'C': [1, 2, 3], + 'A_b': [0, 1, 0], + 'B_c': [0, 0, 1], + 'cat_y': [0, 1, 1]}) + cols = ['A_b', 'B_c', 'cat_y'] + expected[cols] = expected[cols].astype(np.uint8) + expected = expected[['C', 'A_b', 'B_c', 'cat_y']] + assert_frame_equal(result, expected) + + def test_dataframe_dummies_drop_first_with_na(self, df, sparse): + df.loc[3, :] = [np.nan, np.nan, np.nan] + result = get_dummies(df, dummy_na=True, drop_first=True, + sparse=sparse).sort_index(axis=1) + expected = DataFrame({'C': [1, 2, 3, np.nan], + 'A_b': [0, 1, 0, 0], + 'A_nan': [0, 0, 0, 1], + 'B_c': [0, 0, 1, 0], + 'B_nan': [0, 0, 0, 1]}) + cols = ['A_b', 'A_nan', 'B_c', 'B_nan'] + expected[cols] = expected[cols].astype(np.uint8) + expected = expected.sort_index(axis=1) + assert_frame_equal(result, expected) + + result = get_dummies(df, dummy_na=False, drop_first=True, + sparse=sparse) + expected = expected[['C', 'A_b', 'B_c']] + assert_frame_equal(result, expected) + + def test_int_int(self): + data = Series([1, 2, 1]) + result = pd.get_dummies(data) + expected = DataFrame([[1, 0], + [0, 1], + [1, 0]], + columns=[1, 2], + dtype=np.uint8) + tm.assert_frame_equal(result, expected) + + data = Series(pd.Categorical(['a', 'b', 'a'])) + result = pd.get_dummies(data) + expected = DataFrame([[1, 0], + [0, 1], + [1, 0]], + columns=pd.Categorical(['a', 'b']), + dtype=np.uint8) + tm.assert_frame_equal(result, expected) + + def test_int_df(self, dtype): + data = DataFrame( + {'A': [1, 2, 1], + 'B': pd.Categorical(['a', 'b', 'a']), + 'C': [1, 2, 1], + 'D': [1., 2., 1.] + } + ) + columns = ['C', 'D', 'A_1', 'A_2', 'B_a', 'B_b'] + expected = DataFrame([ + [1, 1., 1, 0, 1, 0], + [2, 2., 0, 1, 0, 1], + [1, 1., 1, 0, 1, 0] + ], columns=columns) + expected[columns[2:]] = expected[columns[2:]].astype(dtype) + result = pd.get_dummies(data, columns=['A', 'B'], dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_preserve_categorical_dtype(self, dtype): + # GH13854 + for ordered in [False, True]: + cat = pd.Categorical(list("xy"), categories=list("xyz"), + ordered=ordered) + result = get_dummies(cat, dtype=dtype) + + data = np.array([[1, 0, 0], [0, 1, 0]], + dtype=self.effective_dtype(dtype)) + cols = pd.CategoricalIndex(cat.categories, + categories=cat.categories, + ordered=ordered) + expected = DataFrame(data, columns=cols, + dtype=self.effective_dtype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('sparse', [True, False]) + def test_get_dummies_dont_sparsify_all_columns(self, sparse): + # GH18914 + df = DataFrame.from_dict(OrderedDict([('GDP', [1, 2]), + ('Nation', ['AB', 'CD'])])) + df = get_dummies(df, columns=['Nation'], sparse=sparse) + df2 = df.reindex(columns=['GDP']) + + tm.assert_frame_equal(df[['GDP']], df2) + + +class TestCategoricalReshape(object): + + def test_reshaping_panel_categorical(self): + + with catch_warnings(record=True): + p = tm.makePanel() + p['str'] = 'foo' + df = p.to_frame() + + df['category'] = df['str'].astype('category') + result = df['category'].unstack() + + c = Categorical(['foo'] * len(p.major_axis)) + expected = DataFrame({'A': c.copy(), + 'B': c.copy(), + 'C': c.copy(), + 'D': c.copy()}, + columns=Index(list('ABCD'), name='minor'), + index=p.major_axis.set_names('major')) + tm.assert_frame_equal(result, expected) + + +class TestMakeAxisDummies(object): + + def test_preserve_categorical_dtype(self): + # GH13854 + for ordered in [False, True]: + cidx = pd.CategoricalIndex(list("xyz"), ordered=ordered) + midx = pd.MultiIndex(levels=[['a'], cidx], + labels=[[0, 0], [0, 1]]) + df = DataFrame([[10, 11]], index=midx) + + expected = DataFrame([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], + index=midx, columns=cidx) + + from pandas.core.reshape.reshape import make_axis_dummies + result = make_axis_dummies(df) + tm.assert_frame_equal(result, expected) + + result = make_axis_dummies(df, transform=lambda x: x) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py new file mode 100644 index 0000000000000..8d093f2784ba1 --- /dev/null +++ b/pandas/tests/reshape/test_tile.py @@ -0,0 +1,591 @@ +import os +import pytest + +import numpy as np +from pandas.compat import zip + +from pandas import (DataFrame, Series, isna, to_datetime, DatetimeIndex, Index, + Timestamp, Interval, IntervalIndex, Categorical, + cut, qcut, date_range, NaT, TimedeltaIndex) +from pandas.tseries.offsets import Nano, Day +import pandas.util.testing as tm +from pandas.api.types import CategoricalDtype as CDT + +from pandas.core.algorithms import quantile +import pandas.core.reshape.tile as tmod + + +class TestCut(object): + + def test_simple(self): + data = np.ones(5, dtype='int64') + result = cut(data, 4, labels=False) + expected = np.array([1, 1, 1, 1, 1]) + tm.assert_numpy_array_equal(result, expected, + check_dtype=False) + + def test_bins(self): + data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]) + result, bins = cut(data, 3, retbins=True) + + intervals = IntervalIndex.from_breaks(bins.round(3)) + intervals = intervals.take([0, 0, 0, 1, 2, 0]) + expected = Categorical(intervals, ordered=True) + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, + 6.53333333, 9.7])) + + def test_right(self): + data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) + result, bins = cut(data, 4, right=True, retbins=True) + intervals = IntervalIndex.from_breaks(bins.round(3)) + expected = Categorical(intervals, ordered=True) + expected = expected.take([0, 0, 0, 2, 3, 0, 0]) + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95, + 7.325, 9.7])) + + def test_noright(self): + data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) + result, bins = cut(data, 4, right=False, retbins=True) + intervals = IntervalIndex.from_breaks(bins.round(3), closed='left') + intervals = intervals.take([0, 0, 0, 2, 3, 0, 1]) + expected = Categorical(intervals, ordered=True) + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, + 7.325, 9.7095])) + + def test_arraylike(self): + data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] + result, bins = cut(data, 3, retbins=True) + intervals = IntervalIndex.from_breaks(bins.round(3)) + intervals = intervals.take([0, 0, 0, 1, 2, 0]) + expected = Categorical(intervals, ordered=True) + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, + 6.53333333, 9.7])) + + def test_bins_from_intervalindex(self): + c = cut(range(5), 3) + expected = c + result = cut(range(5), bins=expected.categories) + tm.assert_categorical_equal(result, expected) + + expected = Categorical.from_codes(np.append(c.codes, -1), + categories=c.categories, + ordered=True) + result = cut(range(6), bins=expected.categories) + tm.assert_categorical_equal(result, expected) + + # doc example + # make sure we preserve the bins + ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) + c = cut(ages, bins=[0, 18, 35, 70]) + expected = IntervalIndex.from_tuples([(0, 18), (18, 35), (35, 70)]) + tm.assert_index_equal(c.categories, expected) + + result = cut([25, 20, 50], bins=c.categories) + tm.assert_index_equal(result.categories, expected) + tm.assert_numpy_array_equal(result.codes, + np.array([1, 1, 2], dtype='int8')) + + def test_bins_not_monotonic(self): + data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] + pytest.raises(ValueError, cut, data, [0.1, 1.5, 1, 10]) + + def test_wrong_num_labels(self): + data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] + pytest.raises(ValueError, cut, data, [0, 1, 10], + labels=['foo', 'bar', 'baz']) + + def test_cut_corner(self): + # h3h + pytest.raises(ValueError, cut, [], 2) + + pytest.raises(ValueError, cut, [1, 2, 3], 0.5) + + @pytest.mark.parametrize('arg', [2, np.eye(2), DataFrame(np.eye(2))]) + @pytest.mark.parametrize('cut_func', [cut, qcut]) + def test_cut_not_1d_arg(self, arg, cut_func): + with pytest.raises(ValueError): + cut_func(arg, 2) + + def test_cut_out_of_range_more(self): + # #1511 + s = Series([0, -1, 0, 1, -3], name='x') + ind = cut(s, [0, 1], labels=False) + exp = Series([np.nan, np.nan, np.nan, 0, np.nan], name='x') + tm.assert_series_equal(ind, exp) + + def test_labels(self): + arr = np.tile(np.arange(0, 1.01, 0.1), 4) + + result, bins = cut(arr, 4, retbins=True) + ex_levels = IntervalIndex.from_breaks([-1e-3, 0.25, 0.5, 0.75, 1]) + tm.assert_index_equal(result.categories, ex_levels) + + result, bins = cut(arr, 4, retbins=True, right=False) + ex_levels = IntervalIndex.from_breaks([0, 0.25, 0.5, 0.75, 1 + 1e-3], + closed='left') + tm.assert_index_equal(result.categories, ex_levels) + + def test_cut_pass_series_name_to_factor(self): + s = Series(np.random.randn(100), name='foo') + + factor = cut(s, 4) + assert factor.name == 'foo' + + def test_label_precision(self): + arr = np.arange(0, 0.73, 0.01) + + result = cut(arr, 4, precision=2) + ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, + 0.54, 0.72]) + tm.assert_index_equal(result.categories, ex_levels) + + def test_na_handling(self): + arr = np.arange(0, 0.75, 0.01) + arr[::3] = np.nan + + result = cut(arr, 4) + + result_arr = np.asarray(result) + + ex_arr = np.where(isna(arr), np.nan, result_arr) + + tm.assert_almost_equal(result_arr, ex_arr) + + result = cut(arr, 4, labels=False) + ex_result = np.where(isna(arr), np.nan, result) + tm.assert_almost_equal(result, ex_result) + + def test_inf_handling(self): + data = np.arange(6) + data_ser = Series(data, dtype='int64') + + bins = [-np.inf, 2, 4, np.inf] + result = cut(data, bins) + result_ser = cut(data_ser, bins) + + ex_uniques = IntervalIndex.from_breaks(bins) + tm.assert_index_equal(result.categories, ex_uniques) + assert result[5] == Interval(4, np.inf) + assert result[0] == Interval(-np.inf, 2) + assert result_ser[5] == Interval(4, np.inf) + assert result_ser[0] == Interval(-np.inf, 2) + + def test_qcut(self): + arr = np.random.randn(1000) + + # We store the bins as Index that have been rounded + # to comparisons are a bit tricky. + labels, bins = qcut(arr, 4, retbins=True) + ex_bins = quantile(arr, [0, .25, .5, .75, 1.]) + result = labels.categories.left.values + assert np.allclose(result, ex_bins[:-1], atol=1e-2) + result = labels.categories.right.values + assert np.allclose(result, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True) + tm.assert_categorical_equal(labels, ex_levels) + + def test_qcut_bounds(self): + arr = np.random.randn(1000) + + factor = qcut(arr, 10, labels=False) + assert len(np.unique(factor)) == 10 + + def test_qcut_specify_quantiles(self): + arr = np.random.randn(100) + + factor = qcut(arr, [0, .25, .5, .75, 1.]) + expected = qcut(arr, 4) + tm.assert_categorical_equal(factor, expected) + + def test_qcut_all_bins_same(self): + tm.assert_raises_regex(ValueError, "edges.*unique", qcut, + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) + + def test_cut_out_of_bounds(self): + arr = np.random.randn(100) + + result = cut(arr, [-1, 0, 1]) + + mask = isna(result) + ex_mask = (arr < -1) | (arr > 1) + tm.assert_numpy_array_equal(mask, ex_mask) + + def test_cut_pass_labels(self): + arr = [50, 5, 10, 15, 20, 30, 70] + bins = [0, 25, 50, 100] + labels = ['Small', 'Medium', 'Large'] + + result = cut(arr, bins, labels=labels) + exp = Categorical(['Medium'] + 4 * ['Small'] + ['Medium', 'Large'], + categories=labels, + ordered=True) + tm.assert_categorical_equal(result, exp) + + result = cut(arr, bins, labels=Categorical.from_codes([0, 1, 2], + labels)) + exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels) + tm.assert_categorical_equal(result, exp) + + # issue 16459 + labels = ['Good', 'Medium', 'Bad'] + result = cut(arr, 3, labels=labels) + exp = cut(arr, 3, labels=Categorical(labels, categories=labels, + ordered=True)) + tm.assert_categorical_equal(result, exp) + + def test_qcut_include_lowest(self): + values = np.arange(10) + + ii = qcut(values, 4) + + ex_levels = IntervalIndex( + [Interval(-0.001, 2.25), + Interval(2.25, 4.5), + Interval(4.5, 6.75), + Interval(6.75, 9)]) + tm.assert_index_equal(ii.categories, ex_levels) + + def test_qcut_nas(self): + arr = np.random.randn(100) + arr[:20] = np.nan + + result = qcut(arr, 4) + assert isna(result[:20]).all() + + def test_qcut_index(self): + result = qcut([0, 2], 2) + intervals = [Interval(-0.001, 1), Interval(1, 2)] + expected = Categorical(intervals, ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_round_frac(self): + # it works + result = cut(np.arange(11.), 2) + + result = cut(np.arange(11.) / 1e10, 2) + + # #1979, negative numbers + + result = tmod._round_frac(-117.9998, precision=3) + assert result == -118 + result = tmod._round_frac(117.9998, precision=3) + assert result == 118 + + result = tmod._round_frac(117.9998, precision=2) + assert result == 118 + result = tmod._round_frac(0.000123456, precision=2) + assert result == 0.00012 + + def test_qcut_binning_issues(self): + # #1978, 1979 + path = os.path.join(tm.get_data_path(), 'cut_data.csv') + arr = np.loadtxt(path) + + result = qcut(arr, 20) + + starts = [] + ends = [] + for lev in np.unique(result): + s = lev.left + e = lev.right + assert s != e + + starts.append(float(s)) + ends.append(float(e)) + + for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]), + zip(ends[:-1], ends[1:])): + assert sp < sn + assert ep < en + assert ep <= sn + + def test_cut_return_intervals(self): + s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) + res = cut(s, 3) + exp_bins = np.linspace(0, 8, num=4).round(3) + exp_bins[0] -= 0.008 + exp = Series(IntervalIndex.from_breaks(exp_bins, closed='right').take( + [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True)) + tm.assert_series_equal(res, exp) + + def test_qcut_return_intervals(self): + s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) + res = qcut(s, [0, 0.333, 0.666, 1]) + exp_levels = np.array([Interval(-0.001, 2.664), + Interval(2.664, 5.328), Interval(5.328, 8)]) + exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype( + CDT(ordered=True)) + tm.assert_series_equal(res, exp) + + def test_series_retbins(self): + # GH 8589 + s = Series(np.arange(4)) + result, bins = cut(s, 2, retbins=True) + expected = Series(IntervalIndex.from_breaks( + [-0.003, 1.5, 3], closed='right').repeat(2)).astype( + CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + result, bins = qcut(s, 2, retbins=True) + expected = Series(IntervalIndex.from_breaks( + [-0.001, 1.5, 3], closed='right').repeat(2)).astype( + CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + def test_qcut_duplicates_bin(self): + # GH 7751 + values = [0, 0, 0, 0, 1, 2, 3] + expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)]) + + result = qcut(values, 3, duplicates='drop') + tm.assert_index_equal(result.categories, expected) + + pytest.raises(ValueError, qcut, values, 3) + pytest.raises(ValueError, qcut, values, 3, duplicates='raise') + + # invalid + pytest.raises(ValueError, qcut, values, 3, duplicates='foo') + + def test_single_quantile(self): + # issue 15431 + expected = Series([0, 0]) + + s = Series([9., 9.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + intervals = IntervalIndex([Interval(8.999, 9.0), + Interval(8.999, 9.0)], closed='right') + expected = Series(intervals).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + s = Series([-9., -9.]) + expected = Series([0, 0]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + intervals = IntervalIndex([Interval(-9.001, -9.0), + Interval(-9.001, -9.0)], closed='right') + expected = Series(intervals).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + s = Series([0., 0.]) + expected = Series([0, 0]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + intervals = IntervalIndex([Interval(-0.001, 0.0), + Interval(-0.001, 0.0)], closed='right') + expected = Series(intervals).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + s = Series([9]) + expected = Series([0]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right') + expected = Series(intervals).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + s = Series([-9]) + expected = Series([0]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right') + expected = Series(intervals).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + s = Series([0]) + expected = Series([0]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right') + expected = Series(intervals).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + def test_single_bin(self): + # issue 14652 + expected = Series([0, 0]) + + s = Series([9., 9.]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + s = Series([-9., -9.]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + expected = Series([0]) + + s = Series([9]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + s = Series([-9]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + # issue 15428 + expected = Series([0, 0]) + + s = Series([0., 0.]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + expected = Series([0]) + + s = Series([0]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "array_1_writeable, array_2_writeable", + [(True, True), (True, False), (False, False)]) + def test_cut_read_only(self, array_1_writeable, array_2_writeable): + # issue 18773 + array_1 = np.arange(0, 100, 10) + array_1.flags.writeable = array_1_writeable + + array_2 = np.arange(0, 100, 10) + array_2.flags.writeable = array_2_writeable + + hundred_elements = np.arange(100) + + tm.assert_categorical_equal(cut(hundred_elements, array_1), + cut(hundred_elements, array_2)) + + +class TestDatelike(object): + + @pytest.mark.parametrize('s', [ + Series(DatetimeIndex(['20180101', NaT, '20180103'])), + Series(TimedeltaIndex(['0 days', NaT, '2 days']))], + ids=lambda x: str(x.dtype)) + def test_qcut_nat(self, s): + # GH 19768 + intervals = IntervalIndex.from_tuples( + [(s[0] - Nano(), s[2] - Day()), np.nan, (s[2] - Day(), s[2])]) + expected = Series(Categorical(intervals, ordered=True)) + result = qcut(s, 2) + tm.assert_series_equal(result, expected) + + def test_datetime_cut(self): + # GH 14714 + # testing for time data to be present as series + data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03'])) + + result, bins = cut(data, 3, retbins=True) + expected = ( + Series(IntervalIndex([ + Interval(Timestamp('2012-12-31 23:57:07.200000'), + Timestamp('2013-01-01 16:00:00')), + Interval(Timestamp('2013-01-01 16:00:00'), + Timestamp('2013-01-02 08:00:00')), + Interval(Timestamp('2013-01-02 08:00:00'), + Timestamp('2013-01-03 00:00:00'))])) + .astype(CDT(ordered=True))) + + tm.assert_series_equal(result, expected) + + # testing for time data to be present as list + data = [np.datetime64('2013-01-01'), np.datetime64('2013-01-02'), + np.datetime64('2013-01-03')] + result, bins = cut(data, 3, retbins=True) + tm.assert_series_equal(Series(result), expected) + + # testing for time data to be present as ndarray + data = np.array([np.datetime64('2013-01-01'), + np.datetime64('2013-01-02'), + np.datetime64('2013-01-03')]) + result, bins = cut(data, 3, retbins=True) + tm.assert_series_equal(Series(result), expected) + + # testing for time data to be present as datetime index + data = DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03']) + result, bins = cut(data, 3, retbins=True) + tm.assert_series_equal(Series(result), expected) + + @pytest.mark.parametrize('bins', [ + 3, [Timestamp('2013-01-01 04:57:07.200000'), + Timestamp('2013-01-01 21:00:00'), + Timestamp('2013-01-02 13:00:00'), + Timestamp('2013-01-03 05:00:00')]]) + @pytest.mark.parametrize('box', [list, np.array, Index, Series]) + def test_datetimetz_cut(self, bins, box): + # GH 19872 + tz = 'US/Eastern' + s = Series(date_range('20130101', periods=3, tz=tz)) + if not isinstance(bins, int): + bins = box(bins) + result = cut(s, bins) + expected = ( + Series(IntervalIndex([ + Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz), + Timestamp('2013-01-01 16:00:00', tz=tz)), + Interval(Timestamp('2013-01-01 16:00:00', tz=tz), + Timestamp('2013-01-02 08:00:00', tz=tz)), + Interval(Timestamp('2013-01-02 08:00:00', tz=tz), + Timestamp('2013-01-03 00:00:00', tz=tz))])) + .astype(CDT(ordered=True))) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('bins', [3, np.linspace(0, 1, 4)]) + def test_datetimetz_qcut(self, bins): + # GH 19872 + tz = 'US/Eastern' + s = Series(date_range('20130101', periods=3, tz=tz)) + result = qcut(s, bins) + expected = ( + Series(IntervalIndex([ + Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz), + Timestamp('2013-01-01 16:00:00', tz=tz)), + Interval(Timestamp('2013-01-01 16:00:00', tz=tz), + Timestamp('2013-01-02 08:00:00', tz=tz)), + Interval(Timestamp('2013-01-02 08:00:00', tz=tz), + Timestamp('2013-01-03 00:00:00', tz=tz))])) + .astype(CDT(ordered=True))) + tm.assert_series_equal(result, expected) + + def test_datetime_bin(self): + data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] + bin_data = ['2012-12-12', '2012-12-14', '2012-12-16'] + expected = ( + Series(IntervalIndex([ + Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), + Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))])) + .astype(CDT(ordered=True))) + + for conv in [Timestamp, Timestamp, np.datetime64]: + bins = [conv(v) for v in bin_data] + result = cut(data, bins=bins) + tm.assert_series_equal(Series(result), expected) + + bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data] + result = cut(data, bins=bin_pydatetime) + tm.assert_series_equal(Series(result), expected) + + bins = to_datetime(bin_data) + result = cut(data, bins=bin_pydatetime) + tm.assert_series_equal(Series(result), expected) + + def test_datetime_nan(self): + + def f(): + cut(date_range('20130101', periods=3), bins=[0, 2, 4]) + pytest.raises(ValueError, f) + + result = cut(date_range('20130102', periods=5), + bins=date_range('20130101', periods=2)) + mask = result.categories.isna() + tm.assert_numpy_array_equal(mask, np.array([False])) + mask = result.isna() + tm.assert_numpy_array_equal( + mask, np.array([False, True, True, True, True])) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py new file mode 100644 index 0000000000000..8743d11118200 --- /dev/null +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -0,0 +1,345 @@ +import pytest + +import numpy as np +import pandas as pd +from pandas import Categorical, Series, CategoricalIndex +from pandas.core.dtypes.concat import union_categoricals +from pandas.util import testing as tm + + +class TestUnionCategoricals(object): + + def test_union_categorical(self): + # GH 13361 + data = [ + (list('abc'), list('abd'), list('abcabd')), + ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), + ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), + + (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'], + ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']), + + (pd.date_range('2014-01-01', '2014-01-05'), + pd.date_range('2014-01-06', '2014-01-07'), + pd.date_range('2014-01-01', '2014-01-07')), + + (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'), + pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'), + pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')), + + (pd.period_range('2014-01-01', '2014-01-05'), + pd.period_range('2014-01-06', '2014-01-07'), + pd.period_range('2014-01-01', '2014-01-07')), + ] + + for a, b, combined in data: + for box in [Categorical, CategoricalIndex, Series]: + result = union_categoricals([box(Categorical(a)), + box(Categorical(b))]) + expected = Categorical(combined) + tm.assert_categorical_equal(result, expected, + check_category_order=True) + + # new categories ordered by appearance + s = Categorical(['x', 'y', 'z']) + s2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([s, s2]) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['x', 'y', 'z', 'a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + s = Categorical([0, 1.2, 2], ordered=True) + s2 = Categorical([0, 1.2, 2], ordered=True) + result = union_categoricals([s, s2]) + expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) + tm.assert_categorical_equal(result, expected) + + # must exactly match types + s = Categorical([0, 1.2, 2]) + s2 = Categorical([2, 3, 4]) + msg = 'dtype of categories must be the same' + with tm.assert_raises_regex(TypeError, msg): + union_categoricals([s, s2]) + + msg = 'No Categoricals to union' + with tm.assert_raises_regex(ValueError, msg): + union_categoricals([]) + + def test_union_categoricals_nan(self): + # GH 13759 + res = union_categoricals([pd.Categorical([1, 2, np.nan]), + pd.Categorical([3, 2, np.nan])]) + exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical(['A', 'B']), + pd.Categorical(['B', 'B', np.nan])]) + exp = Categorical(['A', 'B', 'B', 'B', np.nan]) + tm.assert_categorical_equal(res, exp) + + val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'), + pd.NaT] + val2 = [pd.NaT, pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-02-01')] + + res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) + exp = Categorical(val1 + val2, + categories=[pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-03-01'), + pd.Timestamp('2011-02-01')]) + tm.assert_categorical_equal(res, exp) + + # all NaN + res = union_categoricals([pd.Categorical(np.array([np.nan, np.nan], + dtype=object)), + pd.Categorical(['X'])]) + exp = Categorical([np.nan, np.nan, 'X']) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical([np.nan, np.nan]), + pd.Categorical([np.nan, np.nan])]) + exp = Categorical([np.nan, np.nan, np.nan, np.nan]) + tm.assert_categorical_equal(res, exp) + + def test_union_categoricals_empty(self): + # GH 13759 + res = union_categoricals([pd.Categorical([]), + pd.Categorical([])]) + exp = Categorical([]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([Categorical([]), + Categorical(['1'])]) + exp = Categorical(['1']) + tm.assert_categorical_equal(res, exp) + + def test_union_categorical_same_category(self): + # check fastpath + c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) + c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], + categories=[1, 2, 3, 4]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z']) + c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z']) + res = union_categoricals([c1, c2]) + exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'], + categories=['x', 'y', 'z']) + tm.assert_categorical_equal(res, exp) + + def test_union_categorical_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19096 + c1 = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c']) + c2 = Categorical(['a', 'b', 'c'], categories=['b', 'a', 'c']) + result = union_categoricals([c1, c2]) + expected = Categorical(['a', 'b', 'c', 'a', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + def test_union_categoricals_ordered(self): + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + msg = 'Categorical.ordered must be the same' + with tm.assert_raises_regex(TypeError, msg): + union_categoricals([c1, c2]) + + res = union_categoricals([c1, c1]) + exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + msg = "to union ordered Categoricals, all categories must be the same" + with tm.assert_raises_regex(TypeError, msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_ignore_order(self): + # GH 15219 + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + msg = 'Categorical.ordered must be the same' + with tm.assert_raises_regex(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + res = union_categoricals([c1, c1], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c1, c1], ignore_order=False) + exp = Categorical([1, 2, 3, 1, 2, 3], + categories=[1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, np.nan, 3, 2]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c2, c1], ignore_order=True, + sort_categories=True) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([4, 5, 6], ordered=True) + result = union_categoricals([c1, c2], ignore_order=True) + expected = Categorical([1, 2, 3, 4, 5, 6]) + tm.assert_categorical_equal(result, expected) + + msg = "to union ordered Categoricals, all categories must be the same" + with tm.assert_raises_regex(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + with tm.assert_raises_regex(TypeError, msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_sort(self): + # GH 13846 + c1 = Categorical(['x', 'y', 'z']) + c2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['a', 'b', 'c', 'x', 'y', 'z']) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) + c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b']) + c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) + c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['x', np.nan]) + c2 = Categorical([np.nan, 'b']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['x', np.nan, np.nan, 'b'], + categories=['b', 'x']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([np.nan, np.nan]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) + c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) + with pytest.raises(TypeError): + union_categoricals([c1, c2], sort_categories=True) + + def test_union_categoricals_sort_false(self): + # GH 13846 + c1 = Categorical(['x', 'y', 'z']) + c2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['x', 'y', 'z', 'a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) + c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['b', 'a', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) + c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['x', np.nan]) + c2 = Categorical([np.nan, 'b']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['x', np.nan, np.nan, 'b'], + categories=['x', 'b']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([np.nan, np.nan]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) + c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['b', 'a', 'a', 'c'], + categories=['b', 'a', 'c'], ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_union_categorical_unwrap(self): + # GH 14173 + c1 = Categorical(['a', 'b']) + c2 = pd.Series(['b', 'c'], dtype='category') + result = union_categoricals([c1, c2]) + expected = Categorical(['a', 'b', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c2 = CategoricalIndex(c2) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + c1 = Series(c1) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + with pytest.raises(TypeError): + union_categoricals([c1, ['a', 'b', 'c']]) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py new file mode 100644 index 0000000000000..e4a9591b95c26 --- /dev/null +++ b/pandas/tests/reshape/test_util.py @@ -0,0 +1,49 @@ + +import numpy as np +from pandas import date_range, Index +import pandas.util.testing as tm +from pandas.core.reshape.util import cartesian_product + + +class TestCartesianProduct(object): + + def test_simple(self): + x, y = list('ABC'), [1, 22] + result1, result2 = cartesian_product([x, y]) + expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C']) + expected2 = np.array([1, 22, 1, 22, 1, 22]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + + def test_datetimeindex(self): + # regression test for GitHub issue #6439 + # make sure that the ordering on datetimeindex is consistent + x = date_range('2000-01-01', periods=2) + result1, result2 = [Index(y).day for y in cartesian_product([x, x])] + expected1 = Index([1, 1, 2, 2]) + expected2 = Index([1, 2, 1, 2]) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + def test_empty(self): + # product of empty factors + X = [[], [0, 1], []] + Y = [[], [], ['a', 'b', 'c']] + for x, y in zip(X, Y): + expected1 = np.array([], dtype=np.asarray(x).dtype) + expected2 = np.array([], dtype=np.asarray(y).dtype) + result1, result2 = cartesian_product([x, y]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + + # empty product (empty input): + result = cartesian_product([]) + expected = [] + assert result == expected + + def test_invalid_input(self): + invalid_inputs = [1, [1], [1, 2], [[1], 2], + 'a', ['a'], ['a', 'b'], [['a'], 'b']] + msg = "Input must be a list-like of list-likes" + for X in invalid_inputs: + tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X) diff --git a/pandas/tests/scalar/interval/__init__.py b/pandas/tests/scalar/interval/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/interval/test_interval.py b/pandas/tests/scalar/interval/test_interval.py new file mode 100644 index 0000000000000..c9e6e84d226a8 --- /dev/null +++ b/pandas/tests/scalar/interval/test_interval.py @@ -0,0 +1,202 @@ +from __future__ import division + +import numpy as np +from pandas import Interval, Timestamp, Timedelta +import pandas.core.common as com + +import pytest +import pandas.util.testing as tm + + +@pytest.fixture +def interval(): + return Interval(0, 1) + + +class TestInterval(object): + + def test_properties(self, interval): + assert interval.closed == 'right' + assert interval.left == 0 + assert interval.right == 1 + assert interval.mid == 0.5 + + def test_repr(self, interval): + assert repr(interval) == "Interval(0, 1, closed='right')" + assert str(interval) == "(0, 1]" + + interval_left = Interval(0, 1, closed='left') + assert repr(interval_left) == "Interval(0, 1, closed='left')" + assert str(interval_left) == "[0, 1)" + + def test_contains(self, interval): + assert 0.5 in interval + assert 1 in interval + assert 0 not in interval + + msg = "__contains__ not defined for two intervals" + with tm.assert_raises_regex(TypeError, msg): + interval in interval + + interval_both = Interval(0, 1, closed='both') + assert 0 in interval_both + assert 1 in interval_both + + interval_neither = Interval(0, 1, closed='neither') + assert 0 not in interval_neither + assert 0.5 in interval_neither + assert 1 not in interval_neither + + def test_equal(self): + assert Interval(0, 1) == Interval(0, 1, closed='right') + assert Interval(0, 1) != Interval(0, 1, closed='left') + assert Interval(0, 1) != 0 + + def test_comparison(self): + with tm.assert_raises_regex(TypeError, 'unorderable types'): + Interval(0, 1) < 2 + + assert Interval(0, 1) < Interval(1, 2) + assert Interval(0, 1) < Interval(0, 2) + assert Interval(0, 1) < Interval(0.5, 1.5) + assert Interval(0, 1) <= Interval(0, 1) + assert Interval(0, 1) > Interval(-1, 2) + assert Interval(0, 1) >= Interval(0, 1) + + def test_hash(self, interval): + # should not raise + hash(interval) + + @pytest.mark.parametrize('left, right, expected', [ + (0, 5, 5), + (-2, 5.5, 7.5), + (10, 10, 0), + (10, np.inf, np.inf), + (-np.inf, -5, np.inf), + (-np.inf, np.inf, np.inf), + (Timedelta('0 days'), Timedelta('5 days'), Timedelta('5 days')), + (Timedelta('10 days'), Timedelta('10 days'), Timedelta('0 days')), + (Timedelta('1H10M'), Timedelta('5H5M'), Timedelta('3H55M')), + (Timedelta('5S'), Timedelta('1H'), Timedelta('59M55S'))]) + def test_length(self, left, right, expected): + # GH 18789 + iv = Interval(left, right) + result = iv.length + assert result == expected + + @pytest.mark.parametrize('left, right, expected', [ + ('2017-01-01', '2017-01-06', '5 days'), + ('2017-01-01', '2017-01-01 12:00:00', '12 hours'), + ('2017-01-01 12:00', '2017-01-01 12:00:00', '0 days'), + ('2017-01-01 12:01', '2017-01-05 17:31:00', '4 days 5 hours 30 min')]) + @pytest.mark.parametrize('tz', (None, 'UTC', 'CET', 'US/Eastern')) + def test_length_timestamp(self, tz, left, right, expected): + # GH 18789 + iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) + result = iv.length + expected = Timedelta(expected) + assert result == expected + + @pytest.mark.parametrize('left, right', [ + ('a', 'z'), + (('a', 'b'), ('c', 'd')), + (list('AB'), list('ab')), + (Interval(0, 1), Interval(1, 2))]) + def test_length_errors(self, left, right): + # GH 18789 + iv = Interval(left, right) + msg = 'cannot compute length between .* and .*' + with tm.assert_raises_regex(TypeError, msg): + iv.length + + def test_math_add(self, interval): + expected = Interval(1, 2) + actual = interval + 1 + assert expected == actual + + expected = Interval(1, 2) + actual = 1 + interval + assert expected == actual + + actual = interval + actual += 1 + assert expected == actual + + msg = r"unsupported operand type\(s\) for \+" + with tm.assert_raises_regex(TypeError, msg): + interval + Interval(1, 2) + + with tm.assert_raises_regex(TypeError, msg): + interval + 'foo' + + def test_math_sub(self, interval): + expected = Interval(-1, 0) + actual = interval - 1 + assert expected == actual + + actual = interval + actual -= 1 + assert expected == actual + + msg = r"unsupported operand type\(s\) for -" + with tm.assert_raises_regex(TypeError, msg): + interval - Interval(1, 2) + + with tm.assert_raises_regex(TypeError, msg): + interval - 'foo' + + def test_math_mult(self, interval): + expected = Interval(0, 2) + actual = interval * 2 + assert expected == actual + + expected = Interval(0, 2) + actual = 2 * interval + assert expected == actual + + actual = interval + actual *= 2 + assert expected == actual + + msg = r"unsupported operand type\(s\) for \*" + with tm.assert_raises_regex(TypeError, msg): + interval * Interval(1, 2) + + msg = r"can\'t multiply sequence by non-int" + with tm.assert_raises_regex(TypeError, msg): + interval * 'foo' + + def test_math_div(self, interval): + expected = Interval(0, 0.5) + actual = interval / 2.0 + assert expected == actual + + actual = interval + actual /= 2.0 + assert expected == actual + + msg = r"unsupported operand type\(s\) for /" + with tm.assert_raises_regex(TypeError, msg): + interval / Interval(1, 2) + + with tm.assert_raises_regex(TypeError, msg): + interval / 'foo' + + def test_constructor_errors(self): + msg = "invalid option for 'closed': foo" + with tm.assert_raises_regex(ValueError, msg): + Interval(0, 1, closed='foo') + + msg = 'left side of interval must be <= right side' + with tm.assert_raises_regex(ValueError, msg): + Interval(1, 0) + + @pytest.mark.parametrize('tz_left, tz_right', [ + (None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')]) + def test_constructor_errors_tz(self, tz_left, tz_right): + # GH 18538 + left = Timestamp('2017-01-01', tz=tz_left) + right = Timestamp('2017-01-02', tz=tz_right) + error = TypeError if com._any_none(tz_left, tz_right) else ValueError + with pytest.raises(error): + Interval(left, right) diff --git a/pandas/tests/scalar/period/__init__.py b/pandas/tests/scalar/period/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py similarity index 57% rename from pandas/tests/scalar/test_period_asfreq.py rename to pandas/tests/scalar/period/test_asfreq.py index d311fef8a826d..474d19809b03c 100644 --- a/pandas/tests/scalar/test_period_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -1,21 +1,53 @@ +import pytest + +from pandas.errors import OutOfBoundsDatetime + import pandas as pd from pandas import Period, offsets from pandas.util import testing as tm -from pandas.tseries.frequencies import _period_code_map - - -class TestFreqConversion(tm.TestCase): - "Test frequency conversion of date objects" +from pandas._libs.tslibs.frequencies import _period_code_map + + +class TestFreqConversion(object): + """Test frequency conversion of date objects""" + @pytest.mark.parametrize('freq', ['A', 'Q', 'M', 'W', 'B', 'D']) + def test_asfreq_near_zero(self, freq): + # GH#19643, GH#19650 + per = Period('0001-01-01', freq=freq) + tup1 = (per.year, per.hour, per.day) + + prev = per - 1 + assert (per - 1).ordinal == per.ordinal - 1 + tup2 = (prev.year, prev.month, prev.day) + assert tup2 < tup1 + + def test_asfreq_near_zero_weekly(self): + # GH#19834 + per1 = Period('0001-01-01', 'D') + 6 + per2 = Period('0001-01-01', 'D') - 6 + week1 = per1.asfreq('W') + week2 = per2.asfreq('W') + assert week1 != week2 + assert week1.asfreq('D', 'E') >= per1 + assert week2.asfreq('D', 'S') <= per2 + + @pytest.mark.xfail(reason='GH#19643 period_helper asfreq functions fail ' + 'to check for overflows') + def test_to_timestamp_out_of_bounds(self): + # GH#19643, currently gives Timestamp('1754-08-30 22:43:41.128654848') + per = Period('0001-01-01', freq='B') + with pytest.raises(OutOfBoundsDatetime): + per.to_timestamp() def test_asfreq_corner(self): val = Period(freq='A', year=2007) result1 = val.asfreq('5t') result2 = val.asfreq('t') expected = Period('2007-12-31 23:59', freq='t') - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freqstr, '5T') - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freqstr, 'T') + assert result1.ordinal == expected.ordinal + assert result1.freqstr == '5T' + assert result2.ordinal == expected.ordinal + assert result2.freqstr == 'T' def test_conv_annual(self): # frequency conversion tests: from Annual Frequency @@ -55,35 +87,35 @@ def test_conv_annual(self): ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) - self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) - self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) - self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) - self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) - self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) - self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) - self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) - self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) - self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) - self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) - self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) - self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) - self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) - self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) - - self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) - self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) - - self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) - self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) - - self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) - self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) - - self.assertEqual(ival_A.asfreq('A'), ival_A) + assert ival_A.asfreq('Q', 'S') == ival_A_to_Q_start + assert ival_A.asfreq('Q', 'e') == ival_A_to_Q_end + assert ival_A.asfreq('M', 's') == ival_A_to_M_start + assert ival_A.asfreq('M', 'E') == ival_A_to_M_end + assert ival_A.asfreq('W', 'S') == ival_A_to_W_start + assert ival_A.asfreq('W', 'E') == ival_A_to_W_end + assert ival_A.asfreq('B', 'S') == ival_A_to_B_start + assert ival_A.asfreq('B', 'E') == ival_A_to_B_end + assert ival_A.asfreq('D', 'S') == ival_A_to_D_start + assert ival_A.asfreq('D', 'E') == ival_A_to_D_end + assert ival_A.asfreq('H', 'S') == ival_A_to_H_start + assert ival_A.asfreq('H', 'E') == ival_A_to_H_end + assert ival_A.asfreq('min', 'S') == ival_A_to_T_start + assert ival_A.asfreq('min', 'E') == ival_A_to_T_end + assert ival_A.asfreq('T', 'S') == ival_A_to_T_start + assert ival_A.asfreq('T', 'E') == ival_A_to_T_end + assert ival_A.asfreq('S', 'S') == ival_A_to_S_start + assert ival_A.asfreq('S', 'E') == ival_A_to_S_end + + assert ival_AJAN.asfreq('D', 'S') == ival_AJAN_to_D_start + assert ival_AJAN.asfreq('D', 'E') == ival_AJAN_to_D_end + + assert ival_AJUN.asfreq('D', 'S') == ival_AJUN_to_D_start + assert ival_AJUN.asfreq('D', 'E') == ival_AJUN_to_D_end + + assert ival_ANOV.asfreq('D', 'S') == ival_ANOV_to_D_start + assert ival_ANOV.asfreq('D', 'E') == ival_ANOV_to_D_end + + assert ival_A.asfreq('A') == ival_A def test_conv_quarterly(self): # frequency conversion tests: from Quarterly Frequency @@ -120,30 +152,30 @@ def test_conv_quarterly(self): ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) - self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) - self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) - - self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) - self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) - self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) - self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) - self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) - self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) - self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) - self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) - self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) - self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) - self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) - self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) - self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) - self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) - - self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) - self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) - self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) - self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) - - self.assertEqual(ival_Q.asfreq('Q'), ival_Q) + assert ival_Q.asfreq('A') == ival_Q_to_A + assert ival_Q_end_of_year.asfreq('A') == ival_Q_to_A + + assert ival_Q.asfreq('M', 'S') == ival_Q_to_M_start + assert ival_Q.asfreq('M', 'E') == ival_Q_to_M_end + assert ival_Q.asfreq('W', 'S') == ival_Q_to_W_start + assert ival_Q.asfreq('W', 'E') == ival_Q_to_W_end + assert ival_Q.asfreq('B', 'S') == ival_Q_to_B_start + assert ival_Q.asfreq('B', 'E') == ival_Q_to_B_end + assert ival_Q.asfreq('D', 'S') == ival_Q_to_D_start + assert ival_Q.asfreq('D', 'E') == ival_Q_to_D_end + assert ival_Q.asfreq('H', 'S') == ival_Q_to_H_start + assert ival_Q.asfreq('H', 'E') == ival_Q_to_H_end + assert ival_Q.asfreq('Min', 'S') == ival_Q_to_T_start + assert ival_Q.asfreq('Min', 'E') == ival_Q_to_T_end + assert ival_Q.asfreq('S', 'S') == ival_Q_to_S_start + assert ival_Q.asfreq('S', 'E') == ival_Q_to_S_end + + assert ival_QEJAN.asfreq('D', 'S') == ival_QEJAN_to_D_start + assert ival_QEJAN.asfreq('D', 'E') == ival_QEJAN_to_D_end + assert ival_QEJUN.asfreq('D', 'S') == ival_QEJUN_to_D_start + assert ival_QEJUN.asfreq('D', 'E') == ival_QEJUN_to_D_end + + assert ival_Q.asfreq('Q') == ival_Q def test_conv_monthly(self): # frequency conversion tests: from Monthly Frequency @@ -170,25 +202,25 @@ def test_conv_monthly(self): ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, minute=59, second=59) - self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) - self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) - - self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) - self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) - self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) - self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) - self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) - self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) - self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) - self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) - self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) - self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) - self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) - self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) - - self.assertEqual(ival_M.asfreq('M'), ival_M) + assert ival_M.asfreq('A') == ival_M_to_A + assert ival_M_end_of_year.asfreq('A') == ival_M_to_A + assert ival_M.asfreq('Q') == ival_M_to_Q + assert ival_M_end_of_quarter.asfreq('Q') == ival_M_to_Q + + assert ival_M.asfreq('W', 'S') == ival_M_to_W_start + assert ival_M.asfreq('W', 'E') == ival_M_to_W_end + assert ival_M.asfreq('B', 'S') == ival_M_to_B_start + assert ival_M.asfreq('B', 'E') == ival_M_to_B_end + assert ival_M.asfreq('D', 'S') == ival_M_to_D_start + assert ival_M.asfreq('D', 'E') == ival_M_to_D_end + assert ival_M.asfreq('H', 'S') == ival_M_to_H_start + assert ival_M.asfreq('H', 'E') == ival_M_to_H_end + assert ival_M.asfreq('Min', 'S') == ival_M_to_T_start + assert ival_M.asfreq('Min', 'E') == ival_M_to_T_end + assert ival_M.asfreq('S', 'S') == ival_M_to_S_start + assert ival_M.asfreq('S', 'E') == ival_M_to_S_end + + assert ival_M.asfreq('M') == ival_M def test_conv_weekly(self): # frequency conversion tests: from Weekly Frequency @@ -254,67 +286,66 @@ def test_conv_weekly(self): ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, minute=59, second=59) - self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) - self.assertEqual(ival_W_end_of_year.asfreq('A'), - ival_W_to_A_end_of_year) - self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) - self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), - ival_W_to_Q_end_of_quarter) - self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) - self.assertEqual(ival_W_end_of_month.asfreq('M'), - ival_W_to_M_end_of_month) - - self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) - self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) - - self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) - self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) - - self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) - self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) - self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) - self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) - self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) - self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) - self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) - self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) - self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) - self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) - self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) - self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) - self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) - self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) - - self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) - self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) - self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) - self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) - self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) - self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) - - self.assertEqual(ival_W.asfreq('W'), ival_W) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + assert ival_W.asfreq('A') == ival_W_to_A + assert ival_W_end_of_year.asfreq('A') == ival_W_to_A_end_of_year + + assert ival_W.asfreq('Q') == ival_W_to_Q + assert ival_W_end_of_quarter.asfreq('Q') == ival_W_to_Q_end_of_quarter + + assert ival_W.asfreq('M') == ival_W_to_M + assert ival_W_end_of_month.asfreq('M') == ival_W_to_M_end_of_month + + assert ival_W.asfreq('B', 'S') == ival_W_to_B_start + assert ival_W.asfreq('B', 'E') == ival_W_to_B_end + + assert ival_W.asfreq('D', 'S') == ival_W_to_D_start + assert ival_W.asfreq('D', 'E') == ival_W_to_D_end + + assert ival_WSUN.asfreq('D', 'S') == ival_WSUN_to_D_start + assert ival_WSUN.asfreq('D', 'E') == ival_WSUN_to_D_end + assert ival_WSAT.asfreq('D', 'S') == ival_WSAT_to_D_start + assert ival_WSAT.asfreq('D', 'E') == ival_WSAT_to_D_end + assert ival_WFRI.asfreq('D', 'S') == ival_WFRI_to_D_start + assert ival_WFRI.asfreq('D', 'E') == ival_WFRI_to_D_end + assert ival_WTHU.asfreq('D', 'S') == ival_WTHU_to_D_start + assert ival_WTHU.asfreq('D', 'E') == ival_WTHU_to_D_end + assert ival_WWED.asfreq('D', 'S') == ival_WWED_to_D_start + assert ival_WWED.asfreq('D', 'E') == ival_WWED_to_D_end + assert ival_WTUE.asfreq('D', 'S') == ival_WTUE_to_D_start + assert ival_WTUE.asfreq('D', 'E') == ival_WTUE_to_D_end + assert ival_WMON.asfreq('D', 'S') == ival_WMON_to_D_start + assert ival_WMON.asfreq('D', 'E') == ival_WMON_to_D_end + + assert ival_W.asfreq('H', 'S') == ival_W_to_H_start + assert ival_W.asfreq('H', 'E') == ival_W_to_H_end + assert ival_W.asfreq('Min', 'S') == ival_W_to_T_start + assert ival_W.asfreq('Min', 'E') == ival_W_to_T_end + assert ival_W.asfreq('S', 'S') == ival_W_to_S_start + assert ival_W.asfreq('S', 'E') == ival_W_to_S_end + + assert ival_W.asfreq('W') == ival_W + + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): ival_W.asfreq('WK') def test_conv_weekly_legacy(self): # frequency conversion tests: from Weekly Frequency - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK', year=2007, month=1, day=1) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-SAT', year=2007, month=1, day=6) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-FRI', year=2007, month=1, day=5) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-THU', year=2007, month=1, day=4) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-WED', year=2007, month=1, day=3) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-TUE', year=2007, month=1, day=2) - with self.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): Period(freq='WK-MON', year=2007, month=1, day=1) def test_conv_business(self): @@ -342,25 +373,25 @@ def test_conv_business(self): ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, minute=59, second=59) - self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) - self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) + assert ival_B.asfreq('A') == ival_B_to_A + assert ival_B_end_of_year.asfreq('A') == ival_B_to_A + assert ival_B.asfreq('Q') == ival_B_to_Q + assert ival_B_end_of_quarter.asfreq('Q') == ival_B_to_Q + assert ival_B.asfreq('M') == ival_B_to_M + assert ival_B_end_of_month.asfreq('M') == ival_B_to_M + assert ival_B.asfreq('W') == ival_B_to_W + assert ival_B_end_of_week.asfreq('W') == ival_B_to_W - self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) + assert ival_B.asfreq('D') == ival_B_to_D - self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) - self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) - self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) - self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) - self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) - self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) + assert ival_B.asfreq('H', 'S') == ival_B_to_H_start + assert ival_B.asfreq('H', 'E') == ival_B_to_H_end + assert ival_B.asfreq('Min', 'S') == ival_B_to_T_start + assert ival_B.asfreq('Min', 'E') == ival_B_to_T_end + assert ival_B.asfreq('S', 'S') == ival_B_to_S_start + assert ival_B.asfreq('S', 'E') == ival_B_to_S_end - self.assertEqual(ival_B.asfreq('B'), ival_B) + assert ival_B.asfreq('B') == ival_B def test_conv_daily(self): # frequency conversion tests: from Business Frequency" @@ -405,39 +436,36 @@ def test_conv_daily(self): ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, minute=59, second=59) - self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) - - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), - ival_Deoq_to_AJAN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), - ival_Deoq_to_AJUN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), - ival_Deoq_to_ADEC) - - self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) - self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) - self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) - self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) - self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) - - self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) - self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) - - self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) - self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) - self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) - self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) - self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) - self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) - - self.assertEqual(ival_D.asfreq('D'), ival_D) + assert ival_D.asfreq('A') == ival_D_to_A + + assert ival_D_end_of_quarter.asfreq('A-JAN') == ival_Deoq_to_AJAN + assert ival_D_end_of_quarter.asfreq('A-JUN') == ival_Deoq_to_AJUN + assert ival_D_end_of_quarter.asfreq('A-DEC') == ival_Deoq_to_ADEC + + assert ival_D_end_of_year.asfreq('A') == ival_D_to_A + assert ival_D_end_of_quarter.asfreq('Q') == ival_D_to_QEDEC + assert ival_D.asfreq("Q-JAN") == ival_D_to_QEJAN + assert ival_D.asfreq("Q-JUN") == ival_D_to_QEJUN + assert ival_D.asfreq("Q-DEC") == ival_D_to_QEDEC + assert ival_D.asfreq('M') == ival_D_to_M + assert ival_D_end_of_month.asfreq('M') == ival_D_to_M + assert ival_D.asfreq('W') == ival_D_to_W + assert ival_D_end_of_week.asfreq('W') == ival_D_to_W + + assert ival_D_friday.asfreq('B') == ival_B_friday + assert ival_D_saturday.asfreq('B', 'S') == ival_B_friday + assert ival_D_saturday.asfreq('B', 'E') == ival_B_monday + assert ival_D_sunday.asfreq('B', 'S') == ival_B_friday + assert ival_D_sunday.asfreq('B', 'E') == ival_B_monday + + assert ival_D.asfreq('H', 'S') == ival_D_to_H_start + assert ival_D.asfreq('H', 'E') == ival_D_to_H_end + assert ival_D.asfreq('Min', 'S') == ival_D_to_T_start + assert ival_D.asfreq('Min', 'E') == ival_D_to_T_end + assert ival_D.asfreq('S', 'S') == ival_D_to_S_start + assert ival_D.asfreq('S', 'E') == ival_D_to_S_end + + assert ival_D.asfreq('D') == ival_D def test_conv_hourly(self): # frequency conversion tests: from Hourly Frequency" @@ -472,25 +500,25 @@ def test_conv_hourly(self): ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=59, second=59) - self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) - self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) - - self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) - self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) - self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) - self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) - - self.assertEqual(ival_H.asfreq('H'), ival_H) + assert ival_H.asfreq('A') == ival_H_to_A + assert ival_H_end_of_year.asfreq('A') == ival_H_to_A + assert ival_H.asfreq('Q') == ival_H_to_Q + assert ival_H_end_of_quarter.asfreq('Q') == ival_H_to_Q + assert ival_H.asfreq('M') == ival_H_to_M + assert ival_H_end_of_month.asfreq('M') == ival_H_to_M + assert ival_H.asfreq('W') == ival_H_to_W + assert ival_H_end_of_week.asfreq('W') == ival_H_to_W + assert ival_H.asfreq('D') == ival_H_to_D + assert ival_H_end_of_day.asfreq('D') == ival_H_to_D + assert ival_H.asfreq('B') == ival_H_to_B + assert ival_H_end_of_bus.asfreq('B') == ival_H_to_B + + assert ival_H.asfreq('Min', 'S') == ival_H_to_T_start + assert ival_H.asfreq('Min', 'E') == ival_H_to_T_end + assert ival_H.asfreq('S', 'S') == ival_H_to_S_start + assert ival_H.asfreq('S', 'E') == ival_H_to_S_end + + assert ival_H.asfreq('H') == ival_H def test_conv_minutely(self): # frequency conversion tests: from Minutely Frequency" @@ -525,25 +553,25 @@ def test_conv_minutely(self): ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, second=59) - self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) - self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) - - self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) - self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) - - self.assertEqual(ival_T.asfreq('Min'), ival_T) + assert ival_T.asfreq('A') == ival_T_to_A + assert ival_T_end_of_year.asfreq('A') == ival_T_to_A + assert ival_T.asfreq('Q') == ival_T_to_Q + assert ival_T_end_of_quarter.asfreq('Q') == ival_T_to_Q + assert ival_T.asfreq('M') == ival_T_to_M + assert ival_T_end_of_month.asfreq('M') == ival_T_to_M + assert ival_T.asfreq('W') == ival_T_to_W + assert ival_T_end_of_week.asfreq('W') == ival_T_to_W + assert ival_T.asfreq('D') == ival_T_to_D + assert ival_T_end_of_day.asfreq('D') == ival_T_to_D + assert ival_T.asfreq('B') == ival_T_to_B + assert ival_T_end_of_bus.asfreq('B') == ival_T_to_B + assert ival_T.asfreq('H') == ival_T_to_H + assert ival_T_end_of_hour.asfreq('H') == ival_T_to_H + + assert ival_T.asfreq('S', 'S') == ival_T_to_S_start + assert ival_T.asfreq('S', 'E') == ival_T_to_S_end + + assert ival_T.asfreq('Min') == ival_T def test_conv_secondly(self): # frequency conversion tests: from Secondly Frequency" @@ -577,24 +605,24 @@ def test_conv_secondly(self): ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, minute=0) - self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) - self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) - - self.assertEqual(ival_S.asfreq('S'), ival_S) + assert ival_S.asfreq('A') == ival_S_to_A + assert ival_S_end_of_year.asfreq('A') == ival_S_to_A + assert ival_S.asfreq('Q') == ival_S_to_Q + assert ival_S_end_of_quarter.asfreq('Q') == ival_S_to_Q + assert ival_S.asfreq('M') == ival_S_to_M + assert ival_S_end_of_month.asfreq('M') == ival_S_to_M + assert ival_S.asfreq('W') == ival_S_to_W + assert ival_S_end_of_week.asfreq('W') == ival_S_to_W + assert ival_S.asfreq('D') == ival_S_to_D + assert ival_S_end_of_day.asfreq('D') == ival_S_to_D + assert ival_S.asfreq('B') == ival_S_to_B + assert ival_S_end_of_bus.asfreq('B') == ival_S_to_B + assert ival_S.asfreq('H') == ival_S_to_H + assert ival_S_end_of_hour.asfreq('H') == ival_S_to_H + assert ival_S.asfreq('Min') == ival_S_to_T + assert ival_S_end_of_minute.asfreq('Min') == ival_S_to_T + + assert ival_S.asfreq('S') == ival_S def test_asfreq_mult(self): # normal freq to mult freq @@ -604,17 +632,17 @@ def test_asfreq_mult(self): result = p.asfreq(freq) expected = Period('2007', freq='3A') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq # ordinal will not change for freq in ['3A', offsets.YearEnd(3)]: result = p.asfreq(freq, how='S') expected = Period('2007', freq='3A') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq # mult freq to normal freq p = Period(freq='3A', year=2007) @@ -623,49 +651,49 @@ def test_asfreq_mult(self): result = p.asfreq(freq) expected = Period('2009', freq='A') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq # ordinal will not change for freq in ['A', offsets.YearEnd()]: result = p.asfreq(freq, how='S') expected = Period('2007', freq='A') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq p = Period(freq='A', year=2007) for freq in ['2M', offsets.MonthEnd(2)]: result = p.asfreq(freq) expected = Period('2007-12', freq='2M') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq for freq in ['2M', offsets.MonthEnd(2)]: result = p.asfreq(freq, how='S') expected = Period('2007-01', freq='2M') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq p = Period(freq='3A', year=2007) for freq in ['2M', offsets.MonthEnd(2)]: result = p.asfreq(freq) expected = Period('2009-12', freq='2M') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq for freq in ['2M', offsets.MonthEnd(2)]: result = p.asfreq(freq, how='S') expected = Period('2007-01', freq='2M') - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq def test_asfreq_combined(self): # normal freq to combined freq @@ -675,9 +703,9 @@ def test_asfreq_combined(self): expected = Period('2007', freq='25H') for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): result = p.asfreq(freq, how=how) - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq # combined freq to normal freq p1 = Period(freq='1D1H', year=2007) @@ -687,35 +715,34 @@ def test_asfreq_combined(self): result1 = p1.asfreq('H') result2 = p2.asfreq('H') expected = Period('2007-01-02', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) + assert result1 == expected + assert result1.ordinal == expected.ordinal + assert result1.freq == expected.freq + assert result2 == expected + assert result2.ordinal == expected.ordinal + assert result2.freq == expected.freq # ordinal will not change result1 = p1.asfreq('H', how='S') result2 = p2.asfreq('H', how='S') expected = Period('2007-01-01', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) + assert result1 == expected + assert result1.ordinal == expected.ordinal + assert result1.freq == expected.freq + assert result2 == expected + assert result2.ordinal == expected.ordinal + assert result2.freq == expected.freq def test_asfreq_MS(self): initial = Period("2013") - self.assertEqual(initial.asfreq(freq="M", how="S"), - Period('2013-01', 'M')) + assert initial.asfreq(freq="M", how="S") == Period('2013-01', 'M') - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): initial.asfreq(freq="MS", how="S") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): pd.Period('2013-01', 'MS') - self.assertTrue(_period_code_map.get("MS") is None) + assert _period_code_map.get("MS") is None diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py new file mode 100644 index 0000000000000..f43ab0704f0f4 --- /dev/null +++ b/pandas/tests/scalar/period/test_period.py @@ -0,0 +1,1449 @@ +import pytest + +import pytz +import numpy as np +from datetime import datetime, date, timedelta + +import pandas as pd +import pandas.util.testing as tm +import pandas.core.indexes.period as period +from pandas.compat import text_type, iteritems +from pandas.compat.numpy import np_datetime64_compat + +from pandas._libs import tslib +from pandas._libs.tslibs import period as libperiod +from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs.parsing import DateParseError +from pandas import Period, Timestamp, offsets + + +class TestPeriodProperties(object): + "Test properties such as year, month, weekday, etc...." + + @pytest.mark.parametrize('freq', ['A', 'M', 'D', 'H']) + def test_is_leap_year(self, freq): + # GH 13727 + p = Period('2000-01-01 00:00:00', freq=freq) + assert p.is_leap_year + assert isinstance(p.is_leap_year, bool) + + p = Period('1999-01-01 00:00:00', freq=freq) + assert not p.is_leap_year + + p = Period('2004-01-01 00:00:00', freq=freq) + assert p.is_leap_year + + p = Period('2100-01-01 00:00:00', freq=freq) + assert not p.is_leap_year + + def test_quarterly_negative_ordinals(self): + p = Period(ordinal=-1, freq='Q-DEC') + assert p.year == 1969 + assert p.quarter == 4 + assert isinstance(p, Period) + + p = Period(ordinal=-2, freq='Q-DEC') + assert p.year == 1969 + assert p.quarter == 3 + assert isinstance(p, Period) + + p = Period(ordinal=-2, freq='M') + assert p.year == 1969 + assert p.month == 11 + assert isinstance(p, Period) + + @pytest.mark.parametrize('month', MONTHS) + def test_period_cons_quarterly(self, month): + # bugs in scikits.timeseries + freq = 'Q-%s' % month + exp = Period('1989Q3', freq=freq) + assert '1989Q3' in str(exp) + stamp = exp.to_timestamp('D', how='end') + p = Period(stamp, freq=freq) + assert p == exp + + stamp = exp.to_timestamp('3D', how='end') + p = Period(stamp, freq=freq) + assert p == exp + + @pytest.mark.parametrize('month', MONTHS) + def test_period_cons_annual(self, month): + # bugs in scikits.timeseries + freq = 'A-%s' % month + exp = Period('1989', freq=freq) + stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) + p = Period(stamp, freq=freq) + assert p == exp + 1 + assert isinstance(p, Period) + + @pytest.mark.parametrize('day', DAYS) + @pytest.mark.parametrize('num', range(10, 17)) + def test_period_cons_weekly(self, num, day): + daystr = '2011-02-%d' % num + freq = 'W-%s' % day + + result = Period(daystr, freq=freq) + expected = Period(daystr, freq='D').asfreq(freq) + assert result == expected + assert isinstance(result, Period) + + def test_period_from_ordinal(self): + p = pd.Period('2011-01', freq='M') + res = pd.Period._from_ordinal(p.ordinal, freq='M') + assert p == res + assert isinstance(res, Period) + + def test_period_cons_nat(self): + p = Period('NaT', freq='M') + assert p is pd.NaT + + p = Period('nat', freq='W-SUN') + assert p is pd.NaT + + p = Period(tslib.iNaT, freq='D') + assert p is pd.NaT + + p = Period(tslib.iNaT, freq='3D') + assert p is pd.NaT + + p = Period(tslib.iNaT, freq='1D1H') + assert p is pd.NaT + + p = Period('NaT') + assert p is pd.NaT + + p = Period(tslib.iNaT) + assert p is pd.NaT + + def test_period_cons_mult(self): + p1 = Period('2011-01', freq='3M') + p2 = Period('2011-01', freq='M') + assert p1.ordinal == p2.ordinal + + assert p1.freq == offsets.MonthEnd(3) + assert p1.freqstr == '3M' + + assert p2.freq == offsets.MonthEnd() + assert p2.freqstr == 'M' + + result = p1 + 1 + assert result.ordinal == (p2 + 3).ordinal + assert result.freq == p1.freq + assert result.freqstr == '3M' + + result = p1 - 1 + assert result.ordinal == (p2 - 3).ordinal + assert result.freq == p1.freq + assert result.freqstr == '3M' + + msg = ('Frequency must be positive, because it' + ' represents span: -3M') + with tm.assert_raises_regex(ValueError, msg): + Period('2011-01', freq='-3M') + + msg = ('Frequency must be positive, because it' ' represents span: 0M') + with tm.assert_raises_regex(ValueError, msg): + Period('2011-01', freq='0M') + + def test_period_cons_combined(self): + p = [(Period('2011-01', freq='1D1H'), + Period('2011-01', freq='1H1D'), + Period('2011-01', freq='H')), + (Period(ordinal=1, freq='1D1H'), + Period(ordinal=1, freq='1H1D'), + Period(ordinal=1, freq='H'))] + + for p1, p2, p3 in p: + assert p1.ordinal == p3.ordinal + assert p2.ordinal == p3.ordinal + + assert p1.freq == offsets.Hour(25) + assert p1.freqstr == '25H' + + assert p2.freq == offsets.Hour(25) + assert p2.freqstr == '25H' + + assert p3.freq == offsets.Hour() + assert p3.freqstr == 'H' + + result = p1 + 1 + assert result.ordinal == (p3 + 25).ordinal + assert result.freq == p1.freq + assert result.freqstr == '25H' + + result = p2 + 1 + assert result.ordinal == (p3 + 25).ordinal + assert result.freq == p2.freq + assert result.freqstr == '25H' + + result = p1 - 1 + assert result.ordinal == (p3 - 25).ordinal + assert result.freq == p1.freq + assert result.freqstr == '25H' + + result = p2 - 1 + assert result.ordinal == (p3 - 25).ordinal + assert result.freq == p2.freq + assert result.freqstr == '25H' + + msg = ('Frequency must be positive, because it' + ' represents span: -25H') + with tm.assert_raises_regex(ValueError, msg): + Period('2011-01', freq='-1D1H') + with tm.assert_raises_regex(ValueError, msg): + Period('2011-01', freq='-1H1D') + with tm.assert_raises_regex(ValueError, msg): + Period(ordinal=1, freq='-1D1H') + with tm.assert_raises_regex(ValueError, msg): + Period(ordinal=1, freq='-1H1D') + + msg = ('Frequency must be positive, because it' + ' represents span: 0D') + with tm.assert_raises_regex(ValueError, msg): + Period('2011-01', freq='0D0H') + with tm.assert_raises_regex(ValueError, msg): + Period(ordinal=1, freq='0D0H') + + # You can only combine together day and intraday offsets + msg = ('Invalid frequency: 1W1D') + with tm.assert_raises_regex(ValueError, msg): + Period('2011-01', freq='1W1D') + msg = ('Invalid frequency: 1D1W') + with tm.assert_raises_regex(ValueError, msg): + Period('2011-01', freq='1D1W') + + @pytest.mark.parametrize('tzstr', ['Europe/Brussels', + 'Asia/Tokyo', 'US/Pacific']) + def test_timestamp_tz_arg(self, tzstr): + p = Period('1/1/2005', freq='M').to_timestamp(tz=tzstr) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period('1/1/2005', freq='3H').to_timestamp(tz=tzstr) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=tzstr) + exp = Timestamp('31/12/2005', tz='UTC').tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=tzstr) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + @pytest.mark.parametrize('tzstr', ['dateutil/Europe/Brussels', + 'dateutil/Asia/Tokyo', + 'dateutil/US/Pacific']) + def test_timestamp_tz_arg_dateutil(self, tzstr): + from pandas._libs.tslibs.timezones import dateutil_gettz + from pandas._libs.tslibs.timezones import maybe_get_tz + tz = maybe_get_tz(tzstr) + p = Period('1/1/2005', freq='M').to_timestamp(tz=tz) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + assert p == exp + assert p.tz == dateutil_gettz(tzstr.split('/', 1)[1]) + assert p.tz == exp.tz + + p = Period('1/1/2005', freq='M').to_timestamp(freq='3H', tz=tz) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(tzstr) + assert p == exp + assert p.tz == dateutil_gettz(tzstr.split('/', 1)[1]) + assert p.tz == exp.tz + + def test_timestamp_tz_arg_dateutil_from_string(self): + from pandas._libs.tslibs.timezones import dateutil_gettz + p = Period('1/1/2005', + freq='M').to_timestamp(tz='dateutil/Europe/Brussels') + assert p.tz == dateutil_gettz('Europe/Brussels') + + def test_timestamp_mult(self): + p = pd.Period('2011-01', freq='M') + assert p.to_timestamp(how='S') == pd.Timestamp('2011-01-01') + assert p.to_timestamp(how='E') == pd.Timestamp('2011-01-31') + + p = pd.Period('2011-01', freq='3M') + assert p.to_timestamp(how='S') == pd.Timestamp('2011-01-01') + assert p.to_timestamp(how='E') == pd.Timestamp('2011-03-31') + + def test_construction(self): + i1 = Period('1/1/2005', freq='M') + i2 = Period('Jan 2005') + + assert i1 == i2 + + i1 = Period('2005', freq='A') + i2 = Period('2005') + i3 = Period('2005', freq='a') + + assert i1 == i2 + assert i1 == i3 + + i4 = Period('2005', freq='M') + i5 = Period('2005', freq='m') + + pytest.raises(ValueError, i1.__ne__, i4) + assert i4 == i5 + + i1 = Period.now('Q') + i2 = Period(datetime.now(), freq='Q') + i3 = Period.now('q') + + assert i1 == i2 + assert i1 == i3 + + i1 = Period('1982', freq='min') + i2 = Period('1982', freq='MIN') + assert i1 == i2 + i2 = Period('1982', freq=('Min', 1)) + assert i1 == i2 + + i1 = Period(year=2005, month=3, day=1, freq='D') + i2 = Period('3/1/2005', freq='D') + assert i1 == i2 + + i3 = Period(year=2005, month=3, day=1, freq='d') + assert i1 == i3 + + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + assert i1 == expected + + expected = Period(np_datetime64_compat( + '2007-01-01 09:00:00.001Z'), freq='L') + assert i1 == expected + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + assert i1 == expected + + expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), + freq='U') + assert i1 == expected + + pytest.raises(ValueError, Period, ordinal=200701) + + pytest.raises(ValueError, Period, '2007-1-1', freq='X') + + def test_construction_bday(self): + + # Biz day construction, roll forward if non-weekday + i1 = Period('3/10/12', freq='B') + i2 = Period('3/10/12', freq='D') + assert i1 == i2.asfreq('B') + i2 = Period('3/11/12', freq='D') + assert i1 == i2.asfreq('B') + i2 = Period('3/12/12', freq='D') + assert i1 == i2.asfreq('B') + + i3 = Period('3/10/12', freq='b') + assert i1 == i3 + + i1 = Period(year=2012, month=3, day=10, freq='B') + i2 = Period('3/12/12', freq='B') + assert i1 == i2 + + def test_construction_quarter(self): + + i1 = Period(year=2005, quarter=1, freq='Q') + i2 = Period('1/1/2005', freq='Q') + assert i1 == i2 + + i1 = Period(year=2005, quarter=3, freq='Q') + i2 = Period('9/1/2005', freq='Q') + assert i1 == i2 + + i1 = Period('2005Q1') + i2 = Period(year=2005, quarter=1, freq='Q') + i3 = Period('2005q1') + assert i1 == i2 + assert i1 == i3 + + i1 = Period('05Q1') + assert i1 == i2 + lower = Period('05q1') + assert i1 == lower + + i1 = Period('1Q2005') + assert i1 == i2 + lower = Period('1q2005') + assert i1 == lower + + i1 = Period('1Q05') + assert i1 == i2 + lower = Period('1q05') + assert i1 == lower + + i1 = Period('4Q1984') + assert i1.year == 1984 + lower = Period('4q1984') + assert i1 == lower + + def test_construction_month(self): + + expected = Period('2007-01', freq='M') + i1 = Period('200701', freq='M') + assert i1 == expected + + i1 = Period('200701', freq='M') + assert i1 == expected + + i1 = Period(200701, freq='M') + assert i1 == expected + + i1 = Period(ordinal=200701, freq='M') + assert i1.year == 18695 + + i1 = Period(datetime(2007, 1, 1), freq='M') + i2 = Period('200701', freq='M') + assert i1 == i2 + + i1 = Period(date(2007, 1, 1), freq='M') + i2 = Period(datetime(2007, 1, 1), freq='M') + i3 = Period(np.datetime64('2007-01-01'), freq='M') + i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') + i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') + assert i1 == i2 + assert i1 == i3 + assert i1 == i4 + assert i1 == i5 + + def test_period_constructor_offsets(self): + assert (Period('1/1/2005', freq=offsets.MonthEnd()) == + Period('1/1/2005', freq='M')) + assert (Period('2005', freq=offsets.YearEnd()) == + Period('2005', freq='A')) + assert (Period('2005', freq=offsets.MonthEnd()) == + Period('2005', freq='M')) + assert (Period('3/10/12', freq=offsets.BusinessDay()) == + Period('3/10/12', freq='B')) + assert (Period('3/10/12', freq=offsets.Day()) == + Period('3/10/12', freq='D')) + + assert (Period(year=2005, quarter=1, + freq=offsets.QuarterEnd(startingMonth=12)) == + Period(year=2005, quarter=1, freq='Q')) + assert (Period(year=2005, quarter=2, + freq=offsets.QuarterEnd(startingMonth=12)) == + Period(year=2005, quarter=2, freq='Q')) + + assert (Period(year=2005, month=3, day=1, freq=offsets.Day()) == + Period(year=2005, month=3, day=1, freq='D')) + assert (Period(year=2012, month=3, day=10, freq=offsets.BDay()) == + Period(year=2012, month=3, day=10, freq='B')) + + expected = Period('2005-03-01', freq='3D') + assert (Period(year=2005, month=3, day=1, + freq=offsets.Day(3)) == expected) + assert Period(year=2005, month=3, day=1, freq='3D') == expected + + assert (Period(year=2012, month=3, day=10, + freq=offsets.BDay(3)) == + Period(year=2012, month=3, day=10, freq='3B')) + + assert (Period(200701, freq=offsets.MonthEnd()) == + Period(200701, freq='M')) + + i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) + i2 = Period(ordinal=200701, freq='M') + assert i1 == i2 + assert i1.year == 18695 + assert i2.year == 18695 + + i1 = Period(datetime(2007, 1, 1), freq='M') + i2 = Period('200701', freq='M') + assert i1 == i2 + + i1 = Period(date(2007, 1, 1), freq='M') + i2 = Period(datetime(2007, 1, 1), freq='M') + i3 = Period(np.datetime64('2007-01-01'), freq='M') + i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') + i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') + assert i1 == i2 + assert i1 == i3 + assert i1 == i4 + assert i1 == i5 + + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + assert i1 == expected + + expected = Period(np_datetime64_compat( + '2007-01-01 09:00:00.001Z'), freq='L') + assert i1 == expected + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + assert i1 == expected + + expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), + freq='U') + assert i1 == expected + + pytest.raises(ValueError, Period, ordinal=200701) + + pytest.raises(ValueError, Period, '2007-1-1', freq='X') + + def test_freq_str(self): + i1 = Period('1982', freq='Min') + assert i1.freq == offsets.Minute() + assert i1.freqstr == 'T' + + def test_period_deprecated_freq(self): + cases = {"M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], + "B": ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY", "bus"], + "D": ["DAY", "DLY", "DAILY", "Day", "Dly", "Daily"], + "H": ["HR", "HOUR", "HRLY", "HOURLY", "hr", "Hour", "HRly"], + "T": ["minute", "MINUTE", "MINUTELY", "minutely"], + "S": ["sec", "SEC", "SECOND", "SECONDLY", "second"], + "L": ["MILLISECOND", "MILLISECONDLY", "millisecond"], + "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], + "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"]} + + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + for exp, freqs in iteritems(cases): + for freq in freqs: + with tm.assert_raises_regex(ValueError, msg): + Period('2016-03-01 09:00', freq=freq) + with tm.assert_raises_regex(ValueError, msg): + Period(ordinal=1, freq=freq) + + # check supported freq-aliases still works + p1 = Period('2016-03-01 09:00', freq=exp) + p2 = Period(ordinal=1, freq=exp) + assert isinstance(p1, Period) + assert isinstance(p2, Period) + + def test_hash(self): + assert (hash(Period('2011-01', freq='M')) == + hash(Period('2011-01', freq='M'))) + + assert (hash(Period('2011-01-01', freq='D')) != + hash(Period('2011-01', freq='M'))) + + assert (hash(Period('2011-01', freq='3M')) != + hash(Period('2011-01', freq='2M'))) + + assert (hash(Period('2011-01', freq='M')) != + hash(Period('2011-02', freq='M'))) + + def test_repr(self): + p = Period('Jan-2000') + assert '2000-01' in repr(p) + + p = Period('2000-12-15') + assert '2000-12-15' in repr(p) + + def test_repr_nat(self): + p = Period('nat', freq='M') + assert repr(tslib.NaT) in repr(p) + + def test_millisecond_repr(self): + p = Period('2000-01-01 12:15:02.123') + + assert repr(p) == "Period('2000-01-01 12:15:02.123', 'L')" + + def test_microsecond_repr(self): + p = Period('2000-01-01 12:15:02.123567') + + assert repr(p) == "Period('2000-01-01 12:15:02.123567', 'U')" + + def test_strftime(self): + p = Period('2000-1-1 12:34:12', freq='S') + res = p.strftime('%Y-%m-%d %H:%M:%S') + assert res == '2000-01-01 12:34:12' + assert isinstance(res, text_type) # GH3363 + + def test_sub_delta(self): + left, right = Period('2011', freq='A'), Period('2007', freq='A') + result = left - right + assert result == 4 + + with pytest.raises(period.IncompatibleFrequency): + left - Period('2007-01', freq='M') + + def test_to_timestamp(self): + p = Period('1982', freq='A') + start_ts = p.to_timestamp(how='S') + aliases = ['s', 'StarT', 'BEGIn'] + for a in aliases: + assert start_ts == p.to_timestamp('D', how=a) + # freq with mult should not affect to the result + assert start_ts == p.to_timestamp('3D', how=a) + + end_ts = p.to_timestamp(how='E') + aliases = ['e', 'end', 'FINIsH'] + for a in aliases: + assert end_ts == p.to_timestamp('D', how=a) + assert end_ts == p.to_timestamp('3D', how=a) + + from_lst = ['A', 'Q', 'M', 'W', 'B', 'D', 'H', 'Min', 'S'] + + def _ex(p): + return Timestamp((p + 1).start_time.value - 1) + + for i, fcode in enumerate(from_lst): + p = Period('1982', freq=fcode) + result = p.to_timestamp().to_period(fcode) + assert result == p + + assert p.start_time == p.to_timestamp(how='S') + + assert p.end_time == _ex(p) + + # Frequency other than daily + + p = Period('1985', freq='A') + + result = p.to_timestamp('H', how='end') + expected = datetime(1985, 12, 31, 23) + assert result == expected + result = p.to_timestamp('3H', how='end') + assert result == expected + + result = p.to_timestamp('T', how='end') + expected = datetime(1985, 12, 31, 23, 59) + assert result == expected + result = p.to_timestamp('2T', how='end') + assert result == expected + + result = p.to_timestamp(how='end') + expected = datetime(1985, 12, 31) + assert result == expected + + expected = datetime(1985, 1, 1) + result = p.to_timestamp('H', how='start') + assert result == expected + result = p.to_timestamp('T', how='start') + assert result == expected + result = p.to_timestamp('S', how='start') + assert result == expected + result = p.to_timestamp('3H', how='start') + assert result == expected + result = p.to_timestamp('5S', how='start') + assert result == expected + + def test_start_time(self): + freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] + xp = datetime(2012, 1, 1) + for f in freq_lst: + p = Period('2012', freq=f) + assert p.start_time == xp + assert Period('2012', freq='B').start_time == datetime(2012, 1, 2) + assert Period('2012', freq='W').start_time == datetime(2011, 12, 26) + + def test_end_time(self): + p = Period('2012', freq='A') + + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + xp = _ex(2013, 1, 1) + assert xp == p.end_time + + p = Period('2012', freq='Q') + xp = _ex(2012, 4, 1) + assert xp == p.end_time + + p = Period('2012', freq='M') + xp = _ex(2012, 2, 1) + assert xp == p.end_time + + p = Period('2012', freq='D') + xp = _ex(2012, 1, 2) + assert xp == p.end_time + + p = Period('2012', freq='H') + xp = _ex(2012, 1, 1, 1) + assert xp == p.end_time + + p = Period('2012', freq='B') + xp = _ex(2012, 1, 3) + assert xp == p.end_time + + p = Period('2012', freq='W') + xp = _ex(2012, 1, 2) + assert xp == p.end_time + + # Test for GH 11738 + p = Period('2012', freq='15D') + xp = _ex(2012, 1, 16) + assert xp == p.end_time + + p = Period('2012', freq='1D1H') + xp = _ex(2012, 1, 2, 1) + assert xp == p.end_time + + p = Period('2012', freq='1H1D') + xp = _ex(2012, 1, 2, 1) + assert xp == p.end_time + + def test_anchor_week_end_time(self): + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + p = Period('2013-1-1', 'W-SAT') + xp = _ex(2013, 1, 6) + assert p.end_time == xp + + def test_properties_annually(self): + # Test properties on Periods with annually frequency. + a_date = Period(freq='A', year=2007) + assert a_date.year == 2007 + + def test_properties_quarterly(self): + # Test properties on Periods with daily frequency. + qedec_date = Period(freq="Q-DEC", year=2007, quarter=1) + qejan_date = Period(freq="Q-JAN", year=2007, quarter=1) + qejun_date = Period(freq="Q-JUN", year=2007, quarter=1) + # + for x in range(3): + for qd in (qedec_date, qejan_date, qejun_date): + assert (qd + x).qyear == 2007 + assert (qd + x).quarter == x + 1 + + def test_properties_monthly(self): + # Test properties on Periods with daily frequency. + m_date = Period(freq='M', year=2007, month=1) + for x in range(11): + m_ival_x = m_date + x + assert m_ival_x.year == 2007 + if 1 <= x + 1 <= 3: + assert m_ival_x.quarter == 1 + elif 4 <= x + 1 <= 6: + assert m_ival_x.quarter == 2 + elif 7 <= x + 1 <= 9: + assert m_ival_x.quarter == 3 + elif 10 <= x + 1 <= 12: + assert m_ival_x.quarter == 4 + assert m_ival_x.month == x + 1 + + def test_properties_weekly(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq='W', year=2007, month=1, day=7) + # + assert w_date.year == 2007 + assert w_date.quarter == 1 + assert w_date.month == 1 + assert w_date.week == 1 + assert (w_date - 1).week == 52 + assert w_date.days_in_month == 31 + assert Period(freq='W', year=2012, + month=2, day=1).days_in_month == 29 + + def test_properties_weekly_legacy(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq='W', year=2007, month=1, day=7) + assert w_date.year == 2007 + assert w_date.quarter == 1 + assert w_date.month == 1 + assert w_date.week == 1 + assert (w_date - 1).week == 52 + assert w_date.days_in_month == 31 + + exp = Period(freq='W', year=2012, month=2, day=1) + assert exp.days_in_month == 29 + + msg = pd._libs.tslibs.frequencies._INVALID_FREQ_ERROR + with tm.assert_raises_regex(ValueError, msg): + Period(freq='WK', year=2007, month=1, day=7) + + def test_properties_daily(self): + # Test properties on Periods with daily frequency. + b_date = Period(freq='B', year=2007, month=1, day=1) + # + assert b_date.year == 2007 + assert b_date.quarter == 1 + assert b_date.month == 1 + assert b_date.day == 1 + assert b_date.weekday == 0 + assert b_date.dayofyear == 1 + assert b_date.days_in_month == 31 + assert Period(freq='B', year=2012, + month=2, day=1).days_in_month == 29 + + d_date = Period(freq='D', year=2007, month=1, day=1) + + assert d_date.year == 2007 + assert d_date.quarter == 1 + assert d_date.month == 1 + assert d_date.day == 1 + assert d_date.weekday == 0 + assert d_date.dayofyear == 1 + assert d_date.days_in_month == 31 + assert Period(freq='D', year=2012, month=2, + day=1).days_in_month == 29 + + def test_properties_hourly(self): + # Test properties on Periods with hourly frequency. + h_date1 = Period(freq='H', year=2007, month=1, day=1, hour=0) + h_date2 = Period(freq='2H', year=2007, month=1, day=1, hour=0) + + for h_date in [h_date1, h_date2]: + assert h_date.year == 2007 + assert h_date.quarter == 1 + assert h_date.month == 1 + assert h_date.day == 1 + assert h_date.weekday == 0 + assert h_date.dayofyear == 1 + assert h_date.hour == 0 + assert h_date.days_in_month == 31 + assert Period(freq='H', year=2012, month=2, day=1, + hour=0).days_in_month == 29 + + def test_properties_minutely(self): + # Test properties on Periods with minutely frequency. + t_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + # + assert t_date.quarter == 1 + assert t_date.month == 1 + assert t_date.day == 1 + assert t_date.weekday == 0 + assert t_date.dayofyear == 1 + assert t_date.hour == 0 + assert t_date.minute == 0 + assert t_date.days_in_month == 31 + assert Period(freq='D', year=2012, month=2, day=1, hour=0, + minute=0).days_in_month == 29 + + def test_properties_secondly(self): + # Test properties on Periods with secondly frequency. + s_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + # + assert s_date.year == 2007 + assert s_date.quarter == 1 + assert s_date.month == 1 + assert s_date.day == 1 + assert s_date.weekday == 0 + assert s_date.dayofyear == 1 + assert s_date.hour == 0 + assert s_date.minute == 0 + assert s_date.second == 0 + assert s_date.days_in_month == 31 + assert Period(freq='Min', year=2012, month=2, day=1, hour=0, + minute=0, second=0).days_in_month == 29 + + def test_pnow(self): + + # deprecation, xref #13790 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + period.pnow('D') + + def test_constructor_corner(self): + expected = Period('2007-01', freq='2M') + assert Period(year=2007, month=1, freq='2M') == expected + + pytest.raises(ValueError, Period, datetime.now()) + pytest.raises(ValueError, Period, datetime.now().date()) + pytest.raises(ValueError, Period, 1.6, freq='D') + pytest.raises(ValueError, Period, ordinal=1.6, freq='D') + pytest.raises(ValueError, Period, ordinal=2, value=1, freq='D') + assert Period(None) is pd.NaT + pytest.raises(ValueError, Period, month=1) + + p = Period('2007-01-01', freq='D') + + result = Period(p, freq='A') + exp = Period('2007', freq='A') + assert result == exp + + def test_constructor_infer_freq(self): + p = Period('2007-01-01') + assert p.freq == 'D' + + p = Period('2007-01-01 07') + assert p.freq == 'H' + + p = Period('2007-01-01 07:10') + assert p.freq == 'T' + + p = Period('2007-01-01 07:10:15') + assert p.freq == 'S' + + p = Period('2007-01-01 07:10:15.123') + assert p.freq == 'L' + + p = Period('2007-01-01 07:10:15.123000') + assert p.freq == 'L' + + p = Period('2007-01-01 07:10:15.123400') + assert p.freq == 'U' + + def test_badinput(self): + pytest.raises(ValueError, Period, '-2000', 'A') + pytest.raises(DateParseError, Period, '0', 'A') + pytest.raises(DateParseError, Period, '1/1/-2000', 'A') + + def test_multiples(self): + result1 = Period('1989', freq='2A') + result2 = Period('1989', freq='A') + assert result1.ordinal == result2.ordinal + assert result1.freqstr == '2A-DEC' + assert result2.freqstr == 'A-DEC' + assert result1.freq == offsets.YearEnd(2) + assert result2.freq == offsets.YearEnd() + + assert (result1 + 1).ordinal == result1.ordinal + 2 + assert (1 + result1).ordinal == result1.ordinal + 2 + assert (result1 - 1).ordinal == result2.ordinal - 2 + assert (-1 + result1).ordinal == result2.ordinal - 2 + + def test_round_trip(self): + + p = Period('2000Q1') + new_p = tm.round_trip_pickle(p) + assert new_p == p + + +class TestPeriodField(object): + + def test_get_period_field_array_raises_on_out_of_range(self): + pytest.raises(ValueError, libperiod.get_period_field_arr, -1, + np.empty(1), 0) + + +class TestComparisons(object): + + def setup_method(self, method): + self.january1 = Period('2000-01', 'M') + self.january2 = Period('2000-01', 'M') + self.february = Period('2000-02', 'M') + self.march = Period('2000-03', 'M') + self.day = Period('2012-01-01', 'D') + + def test_equal(self): + assert self.january1 == self.january2 + + def test_equal_Raises_Value(self): + with pytest.raises(period.IncompatibleFrequency): + self.january1 == self.day + + def test_notEqual(self): + assert self.january1 != 1 + assert self.january1 != self.february + + def test_greater(self): + assert self.february > self.january1 + + def test_greater_Raises_Value(self): + with pytest.raises(period.IncompatibleFrequency): + self.january1 > self.day + + def test_greater_Raises_Type(self): + with pytest.raises(TypeError): + self.january1 > 1 + + def test_greaterEqual(self): + assert self.january1 >= self.january2 + + def test_greaterEqual_Raises_Value(self): + with pytest.raises(period.IncompatibleFrequency): + self.january1 >= self.day + + with pytest.raises(TypeError): + print(self.january1 >= 1) + + def test_smallerEqual(self): + assert self.january1 <= self.january2 + + def test_smallerEqual_Raises_Value(self): + with pytest.raises(period.IncompatibleFrequency): + self.january1 <= self.day + + def test_smallerEqual_Raises_Type(self): + with pytest.raises(TypeError): + self.january1 <= 1 + + def test_smaller(self): + assert self.january1 < self.february + + def test_smaller_Raises_Value(self): + with pytest.raises(period.IncompatibleFrequency): + self.january1 < self.day + + def test_smaller_Raises_Type(self): + with pytest.raises(TypeError): + self.january1 < 1 + + def test_sort(self): + periods = [self.march, self.january1, self.february] + correctPeriods = [self.january1, self.february, self.march] + assert sorted(periods) == correctPeriods + + def test_period_nat_comp(self): + p_nat = Period('NaT', freq='D') + p = Period('2011-01-01', freq='D') + + nat = pd.Timestamp('NaT') + t = pd.Timestamp('2011-01-01') + # confirm Period('NaT') work identical with Timestamp('NaT') + for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), + (t, nat), (nat, nat)]: + assert not left < right + assert not left > right + assert not left == right + assert left != right + assert not left <= right + assert not left >= right + + +class TestMethods(object): + + def test_add(self): + dt1 = Period(freq='D', year=2008, month=1, day=1) + dt2 = Period(freq='D', year=2008, month=1, day=2) + assert dt1 + 1 == dt2 + assert 1 + dt1 == dt2 + + def test_add_pdnat(self): + p = pd.Period('2011-01', freq='M') + assert p + pd.NaT is pd.NaT + assert pd.NaT + p is pd.NaT + + p = pd.Period('NaT', freq='M') + assert p + pd.NaT is pd.NaT + assert pd.NaT + p is pd.NaT + + def test_add_raises(self): + # GH 4731 + dt1 = Period(freq='D', year=2008, month=1, day=1) + dt2 = Period(freq='D', year=2008, month=1, day=2) + msg = r"unsupported operand type\(s\)" + with tm.assert_raises_regex(TypeError, msg): + dt1 + "str" + + msg = r"unsupported operand type\(s\)" + with tm.assert_raises_regex(TypeError, msg): + "str" + dt1 + + with tm.assert_raises_regex(TypeError, msg): + dt1 + dt2 + + boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])] + + @pytest.mark.parametrize('lbox', boxes) + @pytest.mark.parametrize('rbox', boxes) + def test_add_timestamp_raises(self, rbox, lbox): + # GH # 17983 + ts = pd.Timestamp('2017') + per = pd.Period('2017', freq='M') + + # We may get a different message depending on which class raises + # the error. + msg = (r"cannot add|unsupported operand|" + r"can only operate on a|incompatible type|" + r"ufunc add cannot use operands") + with tm.assert_raises_regex(TypeError, msg): + lbox(ts) + rbox(per) + + with tm.assert_raises_regex(TypeError, msg): + lbox(per) + rbox(ts) + + with tm.assert_raises_regex(TypeError, msg): + lbox(per) + rbox(per) + + def test_sub(self): + dt1 = Period('2011-01-01', freq='D') + dt2 = Period('2011-01-15', freq='D') + + assert dt1 - dt2 == -14 + assert dt2 - dt1 == 14 + + msg = r"Input has different freq=M from Period\(freq=D\)" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + dt1 - pd.Period('2011-02', freq='M') + + def test_add_offset(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('2011', freq=freq) + exp = Period('2013', freq=freq) + assert p + offsets.YearEnd(2) == exp + assert offsets.YearEnd(2) + p == exp + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with pytest.raises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + with pytest.raises(period.IncompatibleFrequency): + o + p + + for freq in ['M', '2M', '3M']: + p = Period('2011-03', freq=freq) + exp = Period('2011-05', freq=freq) + assert p + offsets.MonthEnd(2) == exp + assert offsets.MonthEnd(2) + p == exp + + exp = Period('2012-03', freq=freq) + assert p + offsets.MonthEnd(12) == exp + assert offsets.MonthEnd(12) + p == exp + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with pytest.raises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + with pytest.raises(period.IncompatibleFrequency): + o + p + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('2011-04-01', freq=freq) + + exp = Period('2011-04-06', freq=freq) + assert p + offsets.Day(5) == exp + assert offsets.Day(5) + p == exp + + exp = Period('2011-04-02', freq=freq) + assert p + offsets.Hour(24) == exp + assert offsets.Hour(24) + p == exp + + exp = Period('2011-04-03', freq=freq) + assert p + np.timedelta64(2, 'D') == exp + with pytest.raises(TypeError): + np.timedelta64(2, 'D') + p + + exp = Period('2011-04-02', freq=freq) + assert p + np.timedelta64(3600 * 24, 's') == exp + with pytest.raises(TypeError): + np.timedelta64(3600 * 24, 's') + p + + exp = Period('2011-03-30', freq=freq) + assert p + timedelta(-2) == exp + assert timedelta(-2) + p == exp + + exp = Period('2011-04-03', freq=freq) + assert p + timedelta(hours=48) == exp + assert timedelta(hours=48) + p == exp + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + with pytest.raises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + with pytest.raises(period.IncompatibleFrequency): + o + p + + for freq in ['H', '2H', '3H']: + p = Period('2011-04-01 09:00', freq=freq) + + exp = Period('2011-04-03 09:00', freq=freq) + assert p + offsets.Day(2) == exp + assert offsets.Day(2) + p == exp + + exp = Period('2011-04-01 12:00', freq=freq) + assert p + offsets.Hour(3) == exp + assert offsets.Hour(3) + p == exp + + exp = Period('2011-04-01 12:00', freq=freq) + assert p + np.timedelta64(3, 'h') == exp + with pytest.raises(TypeError): + np.timedelta64(3, 'h') + p + + exp = Period('2011-04-01 10:00', freq=freq) + assert p + np.timedelta64(3600, 's') == exp + with pytest.raises(TypeError): + np.timedelta64(3600, 's') + p + + exp = Period('2011-04-01 11:00', freq=freq) + assert p + timedelta(minutes=120) == exp + assert timedelta(minutes=120) + p == exp + + exp = Period('2011-04-05 12:00', freq=freq) + assert p + timedelta(days=4, minutes=180) == exp + assert timedelta(days=4, minutes=180) + p == exp + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + with pytest.raises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + with pytest.raises(period.IncompatibleFrequency): + o + p + + def test_add_offset_nat(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('NaT', freq=freq) + for o in [offsets.YearEnd(2)]: + assert p + o is tslib.NaT + assert o + p is tslib.NaT + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + assert p + o is tslib.NaT + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + assert o + p is tslib.NaT + + for freq in ['M', '2M', '3M']: + p = Period('NaT', freq=freq) + for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: + assert p + o is tslib.NaT + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + assert o + p is tslib.NaT + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + assert p + o is tslib.NaT + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + assert o + p is tslib.NaT + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), + np.timedelta64(3600 * 24, 's'), timedelta(-2), + timedelta(hours=48)]: + assert p + o is tslib.NaT + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + assert o + p is tslib.NaT + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + assert p + o is tslib.NaT + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + assert o + p is tslib.NaT + + for freq in ['H', '2H', '3H']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), + np.timedelta64(3600, 's'), timedelta(minutes=120), + timedelta(days=4, minutes=180)]: + assert p + o is tslib.NaT + + if not isinstance(o, np.timedelta64): + assert o + p is tslib.NaT + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + assert p + o is tslib.NaT + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + assert o + p is tslib.NaT + + def test_sub_pdnat(self): + # GH 13071 + p = pd.Period('2011-01', freq='M') + assert p - pd.NaT is pd.NaT + assert pd.NaT - p is pd.NaT + + p = pd.Period('NaT', freq='M') + assert p - pd.NaT is pd.NaT + assert pd.NaT - p is pd.NaT + + def test_sub_offset(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('2011', freq=freq) + assert p - offsets.YearEnd(2) == Period('2009', freq=freq) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with pytest.raises(period.IncompatibleFrequency): + p - o + + for freq in ['M', '2M', '3M']: + p = Period('2011-03', freq=freq) + assert p - offsets.MonthEnd(2) == Period('2011-01', freq=freq) + assert p - offsets.MonthEnd(12) == Period('2010-03', freq=freq) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with pytest.raises(period.IncompatibleFrequency): + p - o + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('2011-04-01', freq=freq) + assert p - offsets.Day(5) == Period('2011-03-27', freq=freq) + assert p - offsets.Hour(24) == Period('2011-03-31', freq=freq) + assert p - np.timedelta64(2, 'D') == Period( + '2011-03-30', freq=freq) + assert p - np.timedelta64(3600 * 24, 's') == Period( + '2011-03-31', freq=freq) + assert p - timedelta(-2) == Period('2011-04-03', freq=freq) + assert p - timedelta(hours=48) == Period('2011-03-30', freq=freq) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + with pytest.raises(period.IncompatibleFrequency): + p - o + + for freq in ['H', '2H', '3H']: + p = Period('2011-04-01 09:00', freq=freq) + assert p - offsets.Day(2) == Period('2011-03-30 09:00', freq=freq) + assert p - offsets.Hour(3) == Period('2011-04-01 06:00', freq=freq) + assert p - np.timedelta64(3, 'h') == Period( + '2011-04-01 06:00', freq=freq) + assert p - np.timedelta64(3600, 's') == Period( + '2011-04-01 08:00', freq=freq) + assert p - timedelta(minutes=120) == Period( + '2011-04-01 07:00', freq=freq) + assert p - timedelta(days=4, minutes=180) == Period( + '2011-03-28 06:00', freq=freq) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + with pytest.raises(period.IncompatibleFrequency): + p - o + + def test_sub_offset_nat(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('NaT', freq=freq) + for o in [offsets.YearEnd(2)]: + assert p - o is tslib.NaT + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + assert p - o is tslib.NaT + + for freq in ['M', '2M', '3M']: + p = Period('NaT', freq=freq) + for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: + assert p - o is tslib.NaT + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + assert p - o is tslib.NaT + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), + np.timedelta64(3600 * 24, 's'), timedelta(-2), + timedelta(hours=48)]: + assert p - o is tslib.NaT + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + assert p - o is tslib.NaT + + for freq in ['H', '2H', '3H']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), + np.timedelta64(3600, 's'), timedelta(minutes=120), + timedelta(days=4, minutes=180)]: + assert p - o is tslib.NaT + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + assert p - o is tslib.NaT + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_nat_ops(self, freq): + p = Period('NaT', freq=freq) + assert p + 1 is tslib.NaT + assert 1 + p is tslib.NaT + assert p - 1 is tslib.NaT + assert p - Period('2011-01', freq=freq) is tslib.NaT + assert Period('2011-01', freq=freq) - p is tslib.NaT + + def test_period_ops_offset(self): + p = Period('2011-04-01', freq='D') + result = p + offsets.Day() + exp = pd.Period('2011-04-02', freq='D') + assert result == exp + + result = p - offsets.Day(2) + exp = pd.Period('2011-03-30', freq='D') + assert result == exp + + msg = r"Input cannot be converted to Period\(freq=D\)" + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + p + offsets.Hour(2) + + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + p - offsets.Hour(2) + + +def test_period_immutable(): + # see gh-17116 + per = pd.Period('2014Q1') + with pytest.raises(AttributeError): + per.ordinal = 14 + + freq = per.freq + with pytest.raises(AttributeError): + per.freq = 2 * freq + + +@pytest.mark.xfail(reason='GH#19834 Period parsing error') +def test_small_year_parsing(): + per1 = Period('0001-01-07', 'D') + assert per1.year == 1 + assert per1.day == 7 diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py new file mode 100644 index 0000000000000..a6b217a37bd0c --- /dev/null +++ b/pandas/tests/scalar/test_nat.py @@ -0,0 +1,332 @@ +import pytest + +from datetime import datetime, timedelta +import pytz + +import numpy as np +from pandas import (NaT, Index, Timestamp, Timedelta, Period, + DatetimeIndex, PeriodIndex, + TimedeltaIndex, Series, isna) +from pandas.util import testing as tm +from pandas._libs.tslib import iNaT + +from pandas.compat import callable + + +@pytest.mark.parametrize('nat, idx', [(Timestamp('NaT'), DatetimeIndex), + (Timedelta('NaT'), TimedeltaIndex), + (Period('NaT', freq='M'), PeriodIndex)]) +def test_nat_fields(nat, idx): + + for field in idx._field_ops: + + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(NaT, field) + assert np.isnan(result) + + result = getattr(nat, field) + assert np.isnan(result) + + for field in idx._bool_ops: + + result = getattr(NaT, field) + assert result is False + + result = getattr(nat, field) + assert result is False + + +def test_nat_vector_field_access(): + idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + + for field in DatetimeIndex._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(idx, field) + expected = Index([getattr(x, field) for x in idx]) + tm.assert_index_equal(result, expected) + + s = Series(idx) + + for field in DatetimeIndex._field_ops: + + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + for field in DatetimeIndex._bool_ops: + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta, Period]) +def test_identity(klass): + assert klass(None) is NaT + + result = klass(np.nan) + assert result is NaT + + result = klass(None) + assert result is NaT + + result = klass(iNaT) + assert result is NaT + + result = klass(np.nan) + assert result is NaT + + result = klass(float('nan')) + assert result is NaT + + result = klass(NaT) + assert result is NaT + + result = klass('NaT') + assert result is NaT + + assert isna(klass('nat')) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta, Period]) +def test_equality(klass): + + # nat + if klass is not Period: + klass('').value == iNaT + klass('nat').value == iNaT + klass('NAT').value == iNaT + klass(None).value == iNaT + klass(np.nan).value == iNaT + assert isna(klass('nat')) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta]) +def test_round_nat(klass): + # GH14940 + ts = klass('nat') + for method in ["round", "floor", "ceil"]: + round_method = getattr(ts, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + assert round_method(freq) is ts + + +def test_NaT_methods(): + # GH 9513 + # GH 17329 for `timestamp` + raise_methods = ['astimezone', 'combine', 'ctime', 'dst', + 'fromordinal', 'fromtimestamp', 'isocalendar', + 'strftime', 'strptime', 'time', 'timestamp', + 'timetuple', 'timetz', 'toordinal', 'tzname', + 'utcfromtimestamp', 'utcnow', 'utcoffset', + 'utctimetuple', 'timestamp'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today', + 'tz_convert', 'tz_localize'] + nan_methods = ['weekday', 'isoweekday'] + + for method in raise_methods: + if hasattr(NaT, method): + with pytest.raises(ValueError): + getattr(NaT, method)() + + for method in nan_methods: + if hasattr(NaT, method): + assert np.isnan(getattr(NaT, method)()) + + for method in nat_methods: + if hasattr(NaT, method): + # see gh-8254 + exp_warning = None + if method == 'to_datetime': + exp_warning = FutureWarning + with tm.assert_produces_warning( + exp_warning, check_stacklevel=False): + assert getattr(NaT, method)() is NaT + + # GH 12300 + assert NaT.isoformat() == 'NaT' + + +def test_NaT_docstrings(): + # GH#17327 + nat_names = dir(NaT) + + # NaT should have *most* of the Timestamp methods, with matching + # docstrings. The attributes that are not expected to be present in NaT + # are private methods plus `ts_expected` below. + ts_names = dir(Timestamp) + ts_missing = [x for x in ts_names if x not in nat_names and + not x.startswith('_')] + ts_missing.sort() + ts_expected = ['freqstr', 'normalize', + 'to_julian_date', + 'to_period', 'tz'] + assert ts_missing == ts_expected + + ts_overlap = [x for x in nat_names if x in ts_names and + not x.startswith('_') and + callable(getattr(Timestamp, x))] + for name in ts_overlap: + tsdoc = getattr(Timestamp, name).__doc__ + natdoc = getattr(NaT, name).__doc__ + assert tsdoc == natdoc + + # NaT should have *most* of the Timedelta methods, with matching + # docstrings. The attributes that are not expected to be present in NaT + # are private methods plus `td_expected` below. + # For methods that are both Timestamp and Timedelta methods, the + # Timestamp docstring takes priority. + td_names = dir(Timedelta) + td_missing = [x for x in td_names if x not in nat_names and + not x.startswith('_')] + td_missing.sort() + td_expected = ['components', 'delta', 'is_populated', + 'to_pytimedelta', 'to_timedelta64', 'view'] + assert td_missing == td_expected + + td_overlap = [x for x in nat_names if x in td_names and + x not in ts_names and # Timestamp __doc__ takes priority + not x.startswith('_') and + callable(getattr(Timedelta, x))] + assert td_overlap == ['total_seconds'] + for name in td_overlap: + tddoc = getattr(Timedelta, name).__doc__ + natdoc = getattr(NaT, name).__doc__ + assert tddoc == natdoc + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta]) +def test_isoformat(klass): + + result = klass('NaT').isoformat() + expected = 'NaT' + assert result == expected + + +def test_nat_arithmetic(): + # GH 6873 + i = 2 + f = 1.5 + + for (left, right) in [(NaT, i), (NaT, f), (NaT, np.nan)]: + assert left / right is NaT + assert left * right is NaT + assert right * left is NaT + with pytest.raises(TypeError): + right / left + + # Timestamp / datetime + t = Timestamp('2014-01-01') + dt = datetime(2014, 1, 1) + for (left, right) in [(NaT, NaT), (NaT, t), (NaT, dt)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + # timedelta-like + # offsets are tested in test_offsets.py + + delta = timedelta(3600) + td = Timedelta('5s') + + for (left, right) in [(NaT, delta), (NaT, td)]: + # NaT + timedelta-like returns NaT + assert right + left is NaT + assert left + right is NaT + assert right - left is NaT + assert left - right is NaT + assert np.isnan(left / right) + assert np.isnan(right / left) + + # GH 11718 + t_utc = Timestamp('2014-01-01', tz='UTC') + t_tz = Timestamp('2014-01-01', tz='US/Eastern') + dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) + + for (left, right) in [(NaT, t_utc), (NaT, t_tz), + (NaT, dt_tz)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + # int addition / subtraction + for (left, right) in [(NaT, 2), (NaT, 0), (NaT, -3)]: + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + +def test_nat_rfloordiv_timedelta(): + # GH#18846 + # See also test_timedelta.TestTimedeltaArithmetic.test_floordiv + td = Timedelta(hours=3, minutes=4) + + assert td // np.nan is NaT + assert np.isnan(td // NaT) + assert np.isnan(td // np.timedelta64('NaT')) + + +def test_nat_arithmetic_index(): + # GH 11718 + + dti = DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') + exp = DatetimeIndex([NaT, NaT], name='x') + tm.assert_index_equal(dti + NaT, exp) + tm.assert_index_equal(NaT + dti, exp) + + dti_tz = DatetimeIndex(['2011-01-01', '2011-01-02'], + tz='US/Eastern', name='x') + exp = DatetimeIndex([NaT, NaT], name='x', tz='US/Eastern') + tm.assert_index_equal(dti_tz + NaT, exp) + tm.assert_index_equal(NaT + dti_tz, exp) + + exp = TimedeltaIndex([NaT, NaT], name='x') + for (left, right) in [(NaT, dti), (NaT, dti_tz)]: + tm.assert_index_equal(left - right, exp) + tm.assert_index_equal(right - left, exp) + + # timedelta # GH#19124 + tdi = TimedeltaIndex(['1 day', '2 day'], name='x') + tdi_nat = TimedeltaIndex([NaT, NaT], name='x') + + tm.assert_index_equal(tdi + NaT, tdi_nat) + tm.assert_index_equal(NaT + tdi, tdi_nat) + tm.assert_index_equal(tdi - NaT, tdi_nat) + tm.assert_index_equal(NaT - tdi, tdi_nat) + + +@pytest.mark.parametrize('box, assert_func', [ + (TimedeltaIndex, tm.assert_index_equal), + (Series, tm.assert_series_equal) +]) +def test_nat_arithmetic_td64_vector(box, assert_func): + # GH#19124 + vec = box(['1 day', '2 day'], dtype='timedelta64[ns]') + box_nat = box([NaT, NaT], dtype='timedelta64[ns]') + + assert_func(vec + NaT, box_nat) + assert_func(NaT + vec, box_nat) + assert_func(vec - NaT, box_nat) + assert_func(NaT - vec, box_nat) + + +def test_nat_pinned_docstrings(): + # GH17327 + assert NaT.ctime.__doc__ == datetime.ctime.__doc__ diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py deleted file mode 100644 index ffe00a4a62a0a..0000000000000 --- a/pandas/tests/scalar/test_period.py +++ /dev/null @@ -1,1448 +0,0 @@ -import numpy as np -from datetime import datetime, date, timedelta - -import pandas as pd -import pandas.util.testing as tm -import pandas.tseries.period as period -from pandas.compat import text_type, iteritems -from pandas.compat.numpy import np_datetime64_compat -from pandas import Period, Timestamp, tslib, offsets, _period -from pandas.tseries.frequencies import DAYS, MONTHS - - -class TestPeriodProperties(tm.TestCase): - "Test properties such as year, month, weekday, etc...." - - def test_is_leap_year(self): - # GH 13727 - for freq in ['A', 'M', 'D', 'H']: - p = Period('2000-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) - self.assertIsInstance(p.is_leap_year, bool) - - p = Period('1999-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) - - p = Period('2004-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) - - p = Period('2100-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) - - def test_quarterly_negative_ordinals(self): - p = Period(ordinal=-1, freq='Q-DEC') - self.assertEqual(p.year, 1969) - self.assertEqual(p.quarter, 4) - self.assertIsInstance(p, Period) - - p = Period(ordinal=-2, freq='Q-DEC') - self.assertEqual(p.year, 1969) - self.assertEqual(p.quarter, 3) - self.assertIsInstance(p, Period) - - p = Period(ordinal=-2, freq='M') - self.assertEqual(p.year, 1969) - self.assertEqual(p.month, 11) - self.assertIsInstance(p, Period) - - def test_period_cons_quarterly(self): - # bugs in scikits.timeseries - for month in MONTHS: - freq = 'Q-%s' % month - exp = Period('1989Q3', freq=freq) - self.assertIn('1989Q3', str(exp)) - stamp = exp.to_timestamp('D', how='end') - p = Period(stamp, freq=freq) - self.assertEqual(p, exp) - - stamp = exp.to_timestamp('3D', how='end') - p = Period(stamp, freq=freq) - self.assertEqual(p, exp) - - def test_period_cons_annual(self): - # bugs in scikits.timeseries - for month in MONTHS: - freq = 'A-%s' % month - exp = Period('1989', freq=freq) - stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) - p = Period(stamp, freq=freq) - self.assertEqual(p, exp + 1) - self.assertIsInstance(p, Period) - - def test_period_cons_weekly(self): - for num in range(10, 17): - daystr = '2011-02-%d' % num - for day in DAYS: - freq = 'W-%s' % day - - result = Period(daystr, freq=freq) - expected = Period(daystr, freq='D').asfreq(freq) - self.assertEqual(result, expected) - self.assertIsInstance(result, Period) - - def test_period_from_ordinal(self): - p = pd.Period('2011-01', freq='M') - res = pd.Period._from_ordinal(p.ordinal, freq='M') - self.assertEqual(p, res) - self.assertIsInstance(res, Period) - - def test_period_cons_nat(self): - p = Period('NaT', freq='M') - self.assertIs(p, pd.NaT) - - p = Period('nat', freq='W-SUN') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='D') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='3D') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='1D1H') - self.assertIs(p, pd.NaT) - - p = Period('NaT') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT) - self.assertIs(p, pd.NaT) - - def test_cons_null_like(self): - # check Timestamp compat - self.assertIs(Timestamp('NaT'), pd.NaT) - self.assertIs(Period('NaT'), pd.NaT) - - self.assertIs(Timestamp(None), pd.NaT) - self.assertIs(Period(None), pd.NaT) - - self.assertIs(Timestamp(float('nan')), pd.NaT) - self.assertIs(Period(float('nan')), pd.NaT) - - self.assertIs(Timestamp(np.nan), pd.NaT) - self.assertIs(Period(np.nan), pd.NaT) - - def test_period_cons_mult(self): - p1 = Period('2011-01', freq='3M') - p2 = Period('2011-01', freq='M') - self.assertEqual(p1.ordinal, p2.ordinal) - - self.assertEqual(p1.freq, offsets.MonthEnd(3)) - self.assertEqual(p1.freqstr, '3M') - - self.assertEqual(p2.freq, offsets.MonthEnd()) - self.assertEqual(p2.freqstr, 'M') - - result = p1 + 1 - self.assertEqual(result.ordinal, (p2 + 3).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '3M') - - result = p1 - 1 - self.assertEqual(result.ordinal, (p2 - 3).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '3M') - - msg = ('Frequency must be positive, because it' - ' represents span: -3M') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-3M') - - msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='0M') - - def test_period_cons_combined(self): - p = [(Period('2011-01', freq='1D1H'), - Period('2011-01', freq='1H1D'), - Period('2011-01', freq='H')), - (Period(ordinal=1, freq='1D1H'), - Period(ordinal=1, freq='1H1D'), - Period(ordinal=1, freq='H'))] - - for p1, p2, p3 in p: - self.assertEqual(p1.ordinal, p3.ordinal) - self.assertEqual(p2.ordinal, p3.ordinal) - - self.assertEqual(p1.freq, offsets.Hour(25)) - self.assertEqual(p1.freqstr, '25H') - - self.assertEqual(p2.freq, offsets.Hour(25)) - self.assertEqual(p2.freqstr, '25H') - - self.assertEqual(p3.freq, offsets.Hour()) - self.assertEqual(p3.freqstr, 'H') - - result = p1 + 1 - self.assertEqual(result.ordinal, (p3 + 25).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '25H') - - result = p2 + 1 - self.assertEqual(result.ordinal, (p3 + 25).ordinal) - self.assertEqual(result.freq, p2.freq) - self.assertEqual(result.freqstr, '25H') - - result = p1 - 1 - self.assertEqual(result.ordinal, (p3 - 25).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '25H') - - result = p2 - 1 - self.assertEqual(result.ordinal, (p3 - 25).ordinal) - self.assertEqual(result.freq, p2.freq) - self.assertEqual(result.freqstr, '25H') - - msg = ('Frequency must be positive, because it' - ' represents span: -25H') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-1D1H') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-1H1D') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='-1D1H') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='-1H1D') - - msg = ('Frequency must be positive, because it' - ' represents span: 0D') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='0D0H') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='0D0H') - - # You can only combine together day and intraday offsets - msg = ('Invalid frequency: 1W1D') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='1W1D') - msg = ('Invalid frequency: 1D1W') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='1D1W') - - def test_timestamp_tz_arg(self): - tm._skip_if_no_pytz() - import pytz - for case in ['Europe/Brussels', 'Asia/Tokyo', 'US/Pacific']: - p = Period('1/1/2005', freq='M').to_timestamp(tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='3H').to_timestamp(tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=case) - exp = Timestamp('31/12/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - def test_timestamp_tz_arg_dateutil(self): - from pandas.tslib import _dateutil_gettz as gettz - from pandas.tslib import maybe_get_tz - for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo', - 'dateutil/US/Pacific']: - p = Period('1/1/2005', freq='M').to_timestamp( - tz=maybe_get_tz(case)) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - self.assertEqual(p, exp) - self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', - freq='M').to_timestamp(freq='3H', tz=maybe_get_tz(case)) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - self.assertEqual(p, exp) - self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) - self.assertEqual(p.tz, exp.tz) - - def test_timestamp_tz_arg_dateutil_from_string(self): - from pandas.tslib import _dateutil_gettz as gettz - p = Period('1/1/2005', - freq='M').to_timestamp(tz='dateutil/Europe/Brussels') - self.assertEqual(p.tz, gettz('Europe/Brussels')) - - def test_timestamp_mult(self): - p = pd.Period('2011-01', freq='M') - self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) - self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-01-31')) - - p = pd.Period('2011-01', freq='3M') - self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) - self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) - - def test_construction(self): - i1 = Period('1/1/2005', freq='M') - i2 = Period('Jan 2005') - - self.assertEqual(i1, i2) - - i1 = Period('2005', freq='A') - i2 = Period('2005') - i3 = Period('2005', freq='a') - - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - i4 = Period('2005', freq='M') - i5 = Period('2005', freq='m') - - self.assertRaises(ValueError, i1.__ne__, i4) - self.assertEqual(i4, i5) - - i1 = Period.now('Q') - i2 = Period(datetime.now(), freq='Q') - i3 = Period.now('q') - - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - i1 = Period('1982', freq='min') - i2 = Period('1982', freq='MIN') - self.assertEqual(i1, i2) - i2 = Period('1982', freq=('Min', 1)) - self.assertEqual(i1, i2) - - i1 = Period(year=2005, month=3, day=1, freq='D') - i2 = Period('3/1/2005', freq='D') - self.assertEqual(i1, i2) - - i3 = Period(year=2005, month=3, day=1, freq='d') - self.assertEqual(i1, i3) - - i1 = Period('2007-01-01 09:00:00.001') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat( - '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) - - i1 = Period('2007-01-01 09:00:00.00101') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), - freq='U') - self.assertEqual(i1, expected) - - self.assertRaises(ValueError, Period, ordinal=200701) - - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') - - def test_construction_bday(self): - - # Biz day construction, roll forward if non-weekday - i1 = Period('3/10/12', freq='B') - i2 = Period('3/10/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - i2 = Period('3/11/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - i2 = Period('3/12/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - - i3 = Period('3/10/12', freq='b') - self.assertEqual(i1, i3) - - i1 = Period(year=2012, month=3, day=10, freq='B') - i2 = Period('3/12/12', freq='B') - self.assertEqual(i1, i2) - - def test_construction_quarter(self): - - i1 = Period(year=2005, quarter=1, freq='Q') - i2 = Period('1/1/2005', freq='Q') - self.assertEqual(i1, i2) - - i1 = Period(year=2005, quarter=3, freq='Q') - i2 = Period('9/1/2005', freq='Q') - self.assertEqual(i1, i2) - - i1 = Period('2005Q1') - i2 = Period(year=2005, quarter=1, freq='Q') - i3 = Period('2005q1') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - i1 = Period('05Q1') - self.assertEqual(i1, i2) - lower = Period('05q1') - self.assertEqual(i1, lower) - - i1 = Period('1Q2005') - self.assertEqual(i1, i2) - lower = Period('1q2005') - self.assertEqual(i1, lower) - - i1 = Period('1Q05') - self.assertEqual(i1, i2) - lower = Period('1q05') - self.assertEqual(i1, lower) - - i1 = Period('4Q1984') - self.assertEqual(i1.year, 1984) - lower = Period('4q1984') - self.assertEqual(i1, lower) - - def test_construction_month(self): - - expected = Period('2007-01', freq='M') - i1 = Period('200701', freq='M') - self.assertEqual(i1, expected) - - i1 = Period('200701', freq='M') - self.assertEqual(i1, expected) - - i1 = Period(200701, freq='M') - self.assertEqual(i1, expected) - - i1 = Period(ordinal=200701, freq='M') - self.assertEqual(i1.year, 18695) - - i1 = Period(datetime(2007, 1, 1), freq='M') - i2 = Period('200701', freq='M') - self.assertEqual(i1, i2) - - i1 = Period(date(2007, 1, 1), freq='M') - i2 = Period(datetime(2007, 1, 1), freq='M') - i3 = Period(np.datetime64('2007-01-01'), freq='M') - i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') - i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - self.assertEqual(i1, i4) - self.assertEqual(i1, i5) - - def test_period_constructor_offsets(self): - self.assertEqual(Period('1/1/2005', freq=offsets.MonthEnd()), - Period('1/1/2005', freq='M')) - self.assertEqual(Period('2005', freq=offsets.YearEnd()), - Period('2005', freq='A')) - self.assertEqual(Period('2005', freq=offsets.MonthEnd()), - Period('2005', freq='M')) - self.assertEqual(Period('3/10/12', freq=offsets.BusinessDay()), - Period('3/10/12', freq='B')) - self.assertEqual(Period('3/10/12', freq=offsets.Day()), - Period('3/10/12', freq='D')) - - self.assertEqual(Period(year=2005, quarter=1, - freq=offsets.QuarterEnd(startingMonth=12)), - Period(year=2005, quarter=1, freq='Q')) - self.assertEqual(Period(year=2005, quarter=2, - freq=offsets.QuarterEnd(startingMonth=12)), - Period(year=2005, quarter=2, freq='Q')) - - self.assertEqual(Period(year=2005, month=3, day=1, freq=offsets.Day()), - Period(year=2005, month=3, day=1, freq='D')) - self.assertEqual(Period(year=2012, month=3, day=10, - freq=offsets.BDay()), - Period(year=2012, month=3, day=10, freq='B')) - - expected = Period('2005-03-01', freq='3D') - self.assertEqual(Period(year=2005, month=3, day=1, - freq=offsets.Day(3)), expected) - self.assertEqual(Period(year=2005, month=3, day=1, freq='3D'), - expected) - - self.assertEqual(Period(year=2012, month=3, day=10, - freq=offsets.BDay(3)), - Period(year=2012, month=3, day=10, freq='3B')) - - self.assertEqual(Period(200701, freq=offsets.MonthEnd()), - Period(200701, freq='M')) - - i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) - i2 = Period(ordinal=200701, freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1.year, 18695) - self.assertEqual(i2.year, 18695) - - i1 = Period(datetime(2007, 1, 1), freq='M') - i2 = Period('200701', freq='M') - self.assertEqual(i1, i2) - - i1 = Period(date(2007, 1, 1), freq='M') - i2 = Period(datetime(2007, 1, 1), freq='M') - i3 = Period(np.datetime64('2007-01-01'), freq='M') - i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') - i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - self.assertEqual(i1, i4) - self.assertEqual(i1, i5) - - i1 = Period('2007-01-01 09:00:00.001') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat( - '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) - - i1 = Period('2007-01-01 09:00:00.00101') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), - freq='U') - self.assertEqual(i1, expected) - - self.assertRaises(ValueError, Period, ordinal=200701) - - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') - - def test_freq_str(self): - i1 = Period('1982', freq='Min') - self.assertEqual(i1.freq, offsets.Minute()) - self.assertEqual(i1.freqstr, 'T') - - def test_period_deprecated_freq(self): - cases = {"M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], - "B": ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY", "bus"], - "D": ["DAY", "DLY", "DAILY", "Day", "Dly", "Daily"], - "H": ["HR", "HOUR", "HRLY", "HOURLY", "hr", "Hour", "HRly"], - "T": ["minute", "MINUTE", "MINUTELY", "minutely"], - "S": ["sec", "SEC", "SECOND", "SECONDLY", "second"], - "L": ["MILLISECOND", "MILLISECONDLY", "millisecond"], - "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], - "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"]} - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - for exp, freqs in iteritems(cases): - for freq in freqs: - with self.assertRaisesRegexp(ValueError, msg): - Period('2016-03-01 09:00', freq=freq) - with self.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq=freq) - - # check supported freq-aliases still works - p1 = Period('2016-03-01 09:00', freq=exp) - p2 = Period(ordinal=1, freq=exp) - tm.assertIsInstance(p1, Period) - tm.assertIsInstance(p2, Period) - - def test_hash(self): - self.assertEqual(hash(Period('2011-01', freq='M')), - hash(Period('2011-01', freq='M'))) - - self.assertNotEqual(hash(Period('2011-01-01', freq='D')), - hash(Period('2011-01', freq='M'))) - - self.assertNotEqual(hash(Period('2011-01', freq='3M')), - hash(Period('2011-01', freq='2M'))) - - self.assertNotEqual(hash(Period('2011-01', freq='M')), - hash(Period('2011-02', freq='M'))) - - def test_repr(self): - p = Period('Jan-2000') - self.assertIn('2000-01', repr(p)) - - p = Period('2000-12-15') - self.assertIn('2000-12-15', repr(p)) - - def test_repr_nat(self): - p = Period('nat', freq='M') - self.assertIn(repr(tslib.NaT), repr(p)) - - def test_millisecond_repr(self): - p = Period('2000-01-01 12:15:02.123') - - self.assertEqual("Period('2000-01-01 12:15:02.123', 'L')", repr(p)) - - def test_microsecond_repr(self): - p = Period('2000-01-01 12:15:02.123567') - - self.assertEqual("Period('2000-01-01 12:15:02.123567', 'U')", repr(p)) - - def test_strftime(self): - p = Period('2000-1-1 12:34:12', freq='S') - res = p.strftime('%Y-%m-%d %H:%M:%S') - self.assertEqual(res, '2000-01-01 12:34:12') - tm.assertIsInstance(res, text_type) # GH3363 - - def test_sub_delta(self): - left, right = Period('2011', freq='A'), Period('2007', freq='A') - result = left - right - self.assertEqual(result, 4) - - with self.assertRaises(period.IncompatibleFrequency): - left - Period('2007-01', freq='M') - - def test_to_timestamp(self): - p = Period('1982', freq='A') - start_ts = p.to_timestamp(how='S') - aliases = ['s', 'StarT', 'BEGIn'] - for a in aliases: - self.assertEqual(start_ts, p.to_timestamp('D', how=a)) - # freq with mult should not affect to the result - self.assertEqual(start_ts, p.to_timestamp('3D', how=a)) - - end_ts = p.to_timestamp(how='E') - aliases = ['e', 'end', 'FINIsH'] - for a in aliases: - self.assertEqual(end_ts, p.to_timestamp('D', how=a)) - self.assertEqual(end_ts, p.to_timestamp('3D', how=a)) - - from_lst = ['A', 'Q', 'M', 'W', 'B', 'D', 'H', 'Min', 'S'] - - def _ex(p): - return Timestamp((p + 1).start_time.value - 1) - - for i, fcode in enumerate(from_lst): - p = Period('1982', freq=fcode) - result = p.to_timestamp().to_period(fcode) - self.assertEqual(result, p) - - self.assertEqual(p.start_time, p.to_timestamp(how='S')) - - self.assertEqual(p.end_time, _ex(p)) - - # Frequency other than daily - - p = Period('1985', freq='A') - - result = p.to_timestamp('H', how='end') - expected = datetime(1985, 12, 31, 23) - self.assertEqual(result, expected) - result = p.to_timestamp('3H', how='end') - self.assertEqual(result, expected) - - result = p.to_timestamp('T', how='end') - expected = datetime(1985, 12, 31, 23, 59) - self.assertEqual(result, expected) - result = p.to_timestamp('2T', how='end') - self.assertEqual(result, expected) - - result = p.to_timestamp(how='end') - expected = datetime(1985, 12, 31) - self.assertEqual(result, expected) - - expected = datetime(1985, 1, 1) - result = p.to_timestamp('H', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('T', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('S', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('3H', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('5S', how='start') - self.assertEqual(result, expected) - - def test_start_time(self): - freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] - xp = datetime(2012, 1, 1) - for f in freq_lst: - p = Period('2012', freq=f) - self.assertEqual(p.start_time, xp) - self.assertEqual(Period('2012', freq='B').start_time, - datetime(2012, 1, 2)) - self.assertEqual(Period('2012', freq='W').start_time, - datetime(2011, 12, 26)) - - def test_end_time(self): - p = Period('2012', freq='A') - - def _ex(*args): - return Timestamp(Timestamp(datetime(*args)).value - 1) - - xp = _ex(2013, 1, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='Q') - xp = _ex(2012, 4, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='M') - xp = _ex(2012, 2, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='D') - xp = _ex(2012, 1, 2) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='H') - xp = _ex(2012, 1, 1, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='B') - xp = _ex(2012, 1, 3) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='W') - xp = _ex(2012, 1, 2) - self.assertEqual(xp, p.end_time) - - # Test for GH 11738 - p = Period('2012', freq='15D') - xp = _ex(2012, 1, 16) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='1D1H') - xp = _ex(2012, 1, 2, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='1H1D') - xp = _ex(2012, 1, 2, 1) - self.assertEqual(xp, p.end_time) - - def test_anchor_week_end_time(self): - def _ex(*args): - return Timestamp(Timestamp(datetime(*args)).value - 1) - - p = Period('2013-1-1', 'W-SAT') - xp = _ex(2013, 1, 6) - self.assertEqual(p.end_time, xp) - - def test_properties_annually(self): - # Test properties on Periods with annually frequency. - a_date = Period(freq='A', year=2007) - self.assertEqual(a_date.year, 2007) - - def test_properties_quarterly(self): - # Test properties on Periods with daily frequency. - qedec_date = Period(freq="Q-DEC", year=2007, quarter=1) - qejan_date = Period(freq="Q-JAN", year=2007, quarter=1) - qejun_date = Period(freq="Q-JUN", year=2007, quarter=1) - # - for x in range(3): - for qd in (qedec_date, qejan_date, qejun_date): - self.assertEqual((qd + x).qyear, 2007) - self.assertEqual((qd + x).quarter, x + 1) - - def test_properties_monthly(self): - # Test properties on Periods with daily frequency. - m_date = Period(freq='M', year=2007, month=1) - for x in range(11): - m_ival_x = m_date + x - self.assertEqual(m_ival_x.year, 2007) - if 1 <= x + 1 <= 3: - self.assertEqual(m_ival_x.quarter, 1) - elif 4 <= x + 1 <= 6: - self.assertEqual(m_ival_x.quarter, 2) - elif 7 <= x + 1 <= 9: - self.assertEqual(m_ival_x.quarter, 3) - elif 10 <= x + 1 <= 12: - self.assertEqual(m_ival_x.quarter, 4) - self.assertEqual(m_ival_x.month, x + 1) - - def test_properties_weekly(self): - # Test properties on Periods with daily frequency. - w_date = Period(freq='W', year=2007, month=1, day=7) - # - self.assertEqual(w_date.year, 2007) - self.assertEqual(w_date.quarter, 1) - self.assertEqual(w_date.month, 1) - self.assertEqual(w_date.week, 1) - self.assertEqual((w_date - 1).week, 52) - self.assertEqual(w_date.days_in_month, 31) - self.assertEqual(Period(freq='W', year=2012, - month=2, day=1).days_in_month, 29) - - def test_properties_weekly_legacy(self): - # Test properties on Periods with daily frequency. - w_date = Period(freq='W', year=2007, month=1, day=7) - self.assertEqual(w_date.year, 2007) - self.assertEqual(w_date.quarter, 1) - self.assertEqual(w_date.month, 1) - self.assertEqual(w_date.week, 1) - self.assertEqual((w_date - 1).week, 52) - self.assertEqual(w_date.days_in_month, 31) - - exp = Period(freq='W', year=2012, month=2, day=1) - self.assertEqual(exp.days_in_month, 29) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK', year=2007, month=1, day=7) - - def test_properties_daily(self): - # Test properties on Periods with daily frequency. - b_date = Period(freq='B', year=2007, month=1, day=1) - # - self.assertEqual(b_date.year, 2007) - self.assertEqual(b_date.quarter, 1) - self.assertEqual(b_date.month, 1) - self.assertEqual(b_date.day, 1) - self.assertEqual(b_date.weekday, 0) - self.assertEqual(b_date.dayofyear, 1) - self.assertEqual(b_date.days_in_month, 31) - self.assertEqual(Period(freq='B', year=2012, - month=2, day=1).days_in_month, 29) - # - d_date = Period(freq='D', year=2007, month=1, day=1) - # - self.assertEqual(d_date.year, 2007) - self.assertEqual(d_date.quarter, 1) - self.assertEqual(d_date.month, 1) - self.assertEqual(d_date.day, 1) - self.assertEqual(d_date.weekday, 0) - self.assertEqual(d_date.dayofyear, 1) - self.assertEqual(d_date.days_in_month, 31) - self.assertEqual(Period(freq='D', year=2012, month=2, - day=1).days_in_month, 29) - - def test_properties_hourly(self): - # Test properties on Periods with hourly frequency. - h_date1 = Period(freq='H', year=2007, month=1, day=1, hour=0) - h_date2 = Period(freq='2H', year=2007, month=1, day=1, hour=0) - - for h_date in [h_date1, h_date2]: - self.assertEqual(h_date.year, 2007) - self.assertEqual(h_date.quarter, 1) - self.assertEqual(h_date.month, 1) - self.assertEqual(h_date.day, 1) - self.assertEqual(h_date.weekday, 0) - self.assertEqual(h_date.dayofyear, 1) - self.assertEqual(h_date.hour, 0) - self.assertEqual(h_date.days_in_month, 31) - self.assertEqual(Period(freq='H', year=2012, month=2, day=1, - hour=0).days_in_month, 29) - - def test_properties_minutely(self): - # Test properties on Periods with minutely frequency. - t_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - # - self.assertEqual(t_date.quarter, 1) - self.assertEqual(t_date.month, 1) - self.assertEqual(t_date.day, 1) - self.assertEqual(t_date.weekday, 0) - self.assertEqual(t_date.dayofyear, 1) - self.assertEqual(t_date.hour, 0) - self.assertEqual(t_date.minute, 0) - self.assertEqual(t_date.days_in_month, 31) - self.assertEqual(Period(freq='D', year=2012, month=2, day=1, hour=0, - minute=0).days_in_month, 29) - - def test_properties_secondly(self): - # Test properties on Periods with secondly frequency. - s_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - # - self.assertEqual(s_date.year, 2007) - self.assertEqual(s_date.quarter, 1) - self.assertEqual(s_date.month, 1) - self.assertEqual(s_date.day, 1) - self.assertEqual(s_date.weekday, 0) - self.assertEqual(s_date.dayofyear, 1) - self.assertEqual(s_date.hour, 0) - self.assertEqual(s_date.minute, 0) - self.assertEqual(s_date.second, 0) - self.assertEqual(s_date.days_in_month, 31) - self.assertEqual(Period(freq='Min', year=2012, month=2, day=1, hour=0, - minute=0, second=0).days_in_month, 29) - - def test_properties_nat(self): - p_nat = Period('NaT', freq='M') - t_nat = pd.Timestamp('NaT') - self.assertIs(p_nat, t_nat) - - # confirm Period('NaT') work identical with Timestamp('NaT') - for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', - 'dayofyear', 'quarter', 'days_in_month']: - self.assertTrue(np.isnan(getattr(p_nat, f))) - self.assertTrue(np.isnan(getattr(t_nat, f))) - - def test_pnow(self): - dt = datetime.now() - - val = period.pnow('D') - exp = Period(dt, freq='D') - self.assertEqual(val, exp) - - val2 = period.pnow('2D') - exp2 = Period(dt, freq='2D') - self.assertEqual(val2, exp2) - self.assertEqual(val.ordinal, val2.ordinal) - self.assertEqual(val.ordinal, exp2.ordinal) - - def test_constructor_corner(self): - expected = Period('2007-01', freq='2M') - self.assertEqual(Period(year=2007, month=1, freq='2M'), expected) - - self.assertRaises(ValueError, Period, datetime.now()) - self.assertRaises(ValueError, Period, datetime.now().date()) - self.assertRaises(ValueError, Period, 1.6, freq='D') - self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') - self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') - self.assertIs(Period(None), pd.NaT) - self.assertRaises(ValueError, Period, month=1) - - p = Period('2007-01-01', freq='D') - - result = Period(p, freq='A') - exp = Period('2007', freq='A') - self.assertEqual(result, exp) - - def test_constructor_infer_freq(self): - p = Period('2007-01-01') - self.assertEqual(p.freq, 'D') - - p = Period('2007-01-01 07') - self.assertEqual(p.freq, 'H') - - p = Period('2007-01-01 07:10') - self.assertEqual(p.freq, 'T') - - p = Period('2007-01-01 07:10:15') - self.assertEqual(p.freq, 'S') - - p = Period('2007-01-01 07:10:15.123') - self.assertEqual(p.freq, 'L') - - p = Period('2007-01-01 07:10:15.123000') - self.assertEqual(p.freq, 'L') - - p = Period('2007-01-01 07:10:15.123400') - self.assertEqual(p.freq, 'U') - - def test_badinput(self): - self.assertRaises(ValueError, Period, '-2000', 'A') - self.assertRaises(tslib.DateParseError, Period, '0', 'A') - self.assertRaises(tslib.DateParseError, Period, '1/1/-2000', 'A') - - def test_multiples(self): - result1 = Period('1989', freq='2A') - result2 = Period('1989', freq='A') - self.assertEqual(result1.ordinal, result2.ordinal) - self.assertEqual(result1.freqstr, '2A-DEC') - self.assertEqual(result2.freqstr, 'A-DEC') - self.assertEqual(result1.freq, offsets.YearEnd(2)) - self.assertEqual(result2.freq, offsets.YearEnd()) - - self.assertEqual((result1 + 1).ordinal, result1.ordinal + 2) - self.assertEqual((1 + result1).ordinal, result1.ordinal + 2) - self.assertEqual((result1 - 1).ordinal, result2.ordinal - 2) - self.assertEqual((-1 + result1).ordinal, result2.ordinal - 2) - - def test_round_trip(self): - - p = Period('2000Q1') - new_p = self.round_trip_pickle(p) - self.assertEqual(new_p, p) - - -class TestPeriodField(tm.TestCase): - - def test_get_period_field_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) - - def test_get_period_field_array_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field_arr, -1, - np.empty(1), 0) - - -class TestComparisons(tm.TestCase): - - def setUp(self): - self.january1 = Period('2000-01', 'M') - self.january2 = Period('2000-01', 'M') - self.february = Period('2000-02', 'M') - self.march = Period('2000-03', 'M') - self.day = Period('2012-01-01', 'D') - - def test_equal(self): - self.assertEqual(self.january1, self.january2) - - def test_equal_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 == self.day - - def test_notEqual(self): - self.assertNotEqual(self.january1, 1) - self.assertNotEqual(self.january1, self.february) - - def test_greater(self): - self.assertTrue(self.february > self.january1) - - def test_greater_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 > self.day - - def test_greater_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 > 1 - - def test_greaterEqual(self): - self.assertTrue(self.january1 >= self.january2) - - def test_greaterEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 >= self.day - - with tm.assertRaises(TypeError): - print(self.january1 >= 1) - - def test_smallerEqual(self): - self.assertTrue(self.january1 <= self.january2) - - def test_smallerEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 <= self.day - - def test_smallerEqual_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 <= 1 - - def test_smaller(self): - self.assertTrue(self.january1 < self.february) - - def test_smaller_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 < self.day - - def test_smaller_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 < 1 - - def test_sort(self): - periods = [self.march, self.january1, self.february] - correctPeriods = [self.january1, self.february, self.march] - self.assertEqual(sorted(periods), correctPeriods) - - def test_period_nat_comp(self): - p_nat = Period('NaT', freq='D') - p = Period('2011-01-01', freq='D') - - nat = pd.Timestamp('NaT') - t = pd.Timestamp('2011-01-01') - # confirm Period('NaT') work identical with Timestamp('NaT') - for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), - (t, nat), (nat, nat)]: - self.assertEqual(left < right, False) - self.assertEqual(left > right, False) - self.assertEqual(left == right, False) - self.assertEqual(left != right, True) - self.assertEqual(left <= right, False) - self.assertEqual(left >= right, False) - - -class TestMethods(tm.TestCase): - - def test_add(self): - dt1 = Period(freq='D', year=2008, month=1, day=1) - dt2 = Period(freq='D', year=2008, month=1, day=2) - self.assertEqual(dt1 + 1, dt2) - self.assertEqual(1 + dt1, dt2) - - def test_add_pdnat(self): - p = pd.Period('2011-01', freq='M') - self.assertIs(p + pd.NaT, pd.NaT) - self.assertIs(pd.NaT + p, pd.NaT) - - p = pd.Period('NaT', freq='M') - self.assertIs(p + pd.NaT, pd.NaT) - self.assertIs(pd.NaT + p, pd.NaT) - - def test_add_raises(self): - # GH 4731 - dt1 = Period(freq='D', year=2008, month=1, day=1) - dt2 = Period(freq='D', year=2008, month=1, day=2) - msg = r"unsupported operand type\(s\)" - with tm.assertRaisesRegexp(TypeError, msg): - dt1 + "str" - - msg = r"unsupported operand type\(s\)" - with tm.assertRaisesRegexp(TypeError, msg): - "str" + dt1 - - with tm.assertRaisesRegexp(TypeError, msg): - dt1 + dt2 - - def test_sub(self): - dt1 = Period('2011-01-01', freq='D') - dt2 = Period('2011-01-15', freq='D') - - self.assertEqual(dt1 - dt2, -14) - self.assertEqual(dt2 - dt1, 14) - - msg = r"Input has different freq=M from Period\(freq=D\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - dt1 - pd.Period('2011-02', freq='M') - - def test_add_offset(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('2011', freq=freq) - exp = Period('2013', freq=freq) - self.assertEqual(p + offsets.YearEnd(2), exp) - self.assertEqual(offsets.YearEnd(2) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - for freq in ['M', '2M', '3M']: - p = Period('2011-03', freq=freq) - exp = Period('2011-05', freq=freq) - self.assertEqual(p + offsets.MonthEnd(2), exp) - self.assertEqual(offsets.MonthEnd(2) + p, exp) - - exp = Period('2012-03', freq=freq) - self.assertEqual(p + offsets.MonthEnd(12), exp) - self.assertEqual(offsets.MonthEnd(12) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('2011-04-01', freq=freq) - - exp = Period('2011-04-06', freq=freq) - self.assertEqual(p + offsets.Day(5), exp) - self.assertEqual(offsets.Day(5) + p, exp) - - exp = Period('2011-04-02', freq=freq) - self.assertEqual(p + offsets.Hour(24), exp) - self.assertEqual(offsets.Hour(24) + p, exp) - - exp = Period('2011-04-03', freq=freq) - self.assertEqual(p + np.timedelta64(2, 'D'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(2, 'D') + p - - exp = Period('2011-04-02', freq=freq) - self.assertEqual(p + np.timedelta64(3600 * 24, 's'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3600 * 24, 's') + p - - exp = Period('2011-03-30', freq=freq) - self.assertEqual(p + timedelta(-2), exp) - self.assertEqual(timedelta(-2) + p, exp) - - exp = Period('2011-04-03', freq=freq) - self.assertEqual(p + timedelta(hours=48), exp) - self.assertEqual(timedelta(hours=48) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - for freq in ['H', '2H', '3H']: - p = Period('2011-04-01 09:00', freq=freq) - - exp = Period('2011-04-03 09:00', freq=freq) - self.assertEqual(p + offsets.Day(2), exp) - self.assertEqual(offsets.Day(2) + p, exp) - - exp = Period('2011-04-01 12:00', freq=freq) - self.assertEqual(p + offsets.Hour(3), exp) - self.assertEqual(offsets.Hour(3) + p, exp) - - exp = Period('2011-04-01 12:00', freq=freq) - self.assertEqual(p + np.timedelta64(3, 'h'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3, 'h') + p - - exp = Period('2011-04-01 10:00', freq=freq) - self.assertEqual(p + np.timedelta64(3600, 's'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3600, 's') + p - - exp = Period('2011-04-01 11:00', freq=freq) - self.assertEqual(p + timedelta(minutes=120), exp) - self.assertEqual(timedelta(minutes=120) + p, exp) - - exp = Period('2011-04-05 12:00', freq=freq) - self.assertEqual(p + timedelta(days=4, minutes=180), exp) - self.assertEqual(timedelta(days=4, minutes=180) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - def test_add_offset_nat(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('NaT', freq=freq) - for o in [offsets.YearEnd(2)]: - self.assertIs(p + o, tslib.NaT) - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), - np.timedelta64(3600 * 24, 's'), timedelta(-2), - timedelta(hours=48)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for freq in ['H', '2H', '3H']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), - np.timedelta64(3600, 's'), timedelta(minutes=120), - timedelta(days=4, minutes=180)]: - self.assertIs(p + o, tslib.NaT) - - if not isinstance(o, np.timedelta64): - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - def test_sub_pdnat(self): - # GH 13071 - p = pd.Period('2011-01', freq='M') - self.assertIs(p - pd.NaT, pd.NaT) - self.assertIs(pd.NaT - p, pd.NaT) - - p = pd.Period('NaT', freq='M') - self.assertIs(p - pd.NaT, pd.NaT) - self.assertIs(pd.NaT - p, pd.NaT) - - def test_sub_offset(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('2011', freq=freq) - self.assertEqual(p - offsets.YearEnd(2), Period('2009', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - for freq in ['M', '2M', '3M']: - p = Period('2011-03', freq=freq) - self.assertEqual(p - offsets.MonthEnd(2), - Period('2011-01', freq=freq)) - self.assertEqual(p - offsets.MonthEnd(12), - Period('2010-03', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('2011-04-01', freq=freq) - self.assertEqual(p - offsets.Day(5), - Period('2011-03-27', freq=freq)) - self.assertEqual(p - offsets.Hour(24), - Period('2011-03-31', freq=freq)) - self.assertEqual(p - np.timedelta64(2, 'D'), - Period('2011-03-30', freq=freq)) - self.assertEqual(p - np.timedelta64(3600 * 24, 's'), - Period('2011-03-31', freq=freq)) - self.assertEqual(p - timedelta(-2), - Period('2011-04-03', freq=freq)) - self.assertEqual(p - timedelta(hours=48), - Period('2011-03-30', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - for freq in ['H', '2H', '3H']: - p = Period('2011-04-01 09:00', freq=freq) - self.assertEqual(p - offsets.Day(2), - Period('2011-03-30 09:00', freq=freq)) - self.assertEqual(p - offsets.Hour(3), - Period('2011-04-01 06:00', freq=freq)) - self.assertEqual(p - np.timedelta64(3, 'h'), - Period('2011-04-01 06:00', freq=freq)) - self.assertEqual(p - np.timedelta64(3600, 's'), - Period('2011-04-01 08:00', freq=freq)) - self.assertEqual(p - timedelta(minutes=120), - Period('2011-04-01 07:00', freq=freq)) - self.assertEqual(p - timedelta(days=4, minutes=180), - Period('2011-03-28 06:00', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - def test_sub_offset_nat(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('NaT', freq=freq) - for o in [offsets.YearEnd(2)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p - o, tslib.NaT) - - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p - o, tslib.NaT) - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), - np.timedelta64(3600 * 24, 's'), timedelta(-2), - timedelta(hours=48)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - self.assertIs(p - o, tslib.NaT) - - for freq in ['H', '2H', '3H']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), - np.timedelta64(3600, 's'), timedelta(minutes=120), - timedelta(days=4, minutes=180)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - self.assertIs(p - o, tslib.NaT) - - def test_nat_ops(self): - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - self.assertIs(p + 1, tslib.NaT) - self.assertIs(1 + p, tslib.NaT) - self.assertIs(p - 1, tslib.NaT) - self.assertIs(p - Period('2011-01', freq=freq), tslib.NaT) - self.assertIs(Period('2011-01', freq=freq) - p, tslib.NaT) - - def test_period_ops_offset(self): - p = Period('2011-04-01', freq='D') - result = p + offsets.Day() - exp = pd.Period('2011-04-02', freq='D') - self.assertEqual(result, exp) - - result = p - offsets.Day(2) - exp = pd.Period('2011-03-30', freq='D') - self.assertEqual(result, exp) - - msg = r"Input cannot be converted to Period\(freq=D\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - p + offsets.Hour(2) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - p - offsets.Hour(2) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py deleted file mode 100644 index c5a828bf2e912..0000000000000 --- a/pandas/tests/scalar/test_timedelta.py +++ /dev/null @@ -1,713 +0,0 @@ -""" test the scalar Timedelta """ -import numpy as np -from datetime import timedelta - -import pandas as pd -import pandas.util.testing as tm -from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct -from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series, - to_timedelta, tslib, compat, isnull) - -iNaT = tslib.iNaT - - -class TestTimedeltas(tm.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - pass - - def test_construction(self): - - expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10, unit='d').value, expected) - self.assertEqual(Timedelta(10.0, unit='d').value, expected) - self.assertEqual(Timedelta('10 days').value, expected) - self.assertEqual(Timedelta(days=10).value, expected) - self.assertEqual(Timedelta(days=10.0).value, expected) - - expected += np.timedelta64(10, 's').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta('10 days 00:00:10').value, expected) - self.assertEqual(Timedelta(days=10, seconds=10).value, expected) - self.assertEqual( - Timedelta(days=10, milliseconds=10 * 1000).value, expected) - self.assertEqual( - Timedelta(days=10, microseconds=10 * 1000 * 1000).value, expected) - - # test construction with np dtypes - # GH 8757 - timedelta_kwargs = {'days': 'D', - 'seconds': 's', - 'microseconds': 'us', - 'milliseconds': 'ms', - 'minutes': 'm', - 'hours': 'h', - 'weeks': 'W'} - npdtypes = [np.int64, np.int32, np.int16, np.float64, np.float32, - np.float16] - for npdtype in npdtypes: - for pykwarg, npkwarg in timedelta_kwargs.items(): - expected = np.timedelta64(1, - npkwarg).astype('m8[ns]').view('i8') - self.assertEqual( - Timedelta(**{pykwarg: npdtype(1)}).value, expected) - - # rounding cases - self.assertEqual(Timedelta(82739999850000).value, 82739999850000) - self.assertTrue('0 days 22:58:59.999850' in str(Timedelta( - 82739999850000))) - self.assertEqual(Timedelta(123072001000000).value, 123072001000000) - self.assertTrue('1 days 10:11:12.001' in str(Timedelta( - 123072001000000))) - - # string conversion with/without leading zero - # GH 9570 - self.assertEqual(Timedelta('0:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('00:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1)) - self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1)) - - # more strings & abbrevs - # GH 8190 - self.assertEqual(Timedelta('1 h'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hour'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hr'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hours'), timedelta(hours=1)) - self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1)) - self.assertEqual(Timedelta('1 m'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1.5 m'), timedelta(seconds=90)) - self.assertEqual(Timedelta('1 minute'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 minutes'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 s'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 second'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 seconds'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 ms'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 milli'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 millisecond'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 us'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 micros'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 microsecond'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1.5 microsecond'), - Timedelta('00:00:00.000001500')) - self.assertEqual(Timedelta('1 ns'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nano'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nanosecond'), - Timedelta('00:00:00.000000001')) - - # combos - self.assertEqual(Timedelta('10 days 1 hour'), - timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h'), timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h 1m 1s'), timedelta( - days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=1, microseconds=3)) - self.assertEqual(Timedelta('-10 days 1 h 1.5m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=31, microseconds=3)) - - # currently invalid as it has a - on the hhmmdd part (only allowed on - # the days) - self.assertRaises(ValueError, - lambda: Timedelta('-10 days -1 h 1.5m 1s 3us')) - - # only leading neg signs are allowed - self.assertRaises(ValueError, - lambda: Timedelta('10 days -1 h 1.5m 1s 3us')) - - # no units specified - self.assertRaises(ValueError, lambda: Timedelta('3.1415')) - - # invalid construction - tm.assertRaisesRegexp(ValueError, "cannot construct a Timedelta", - lambda: Timedelta()) - tm.assertRaisesRegexp(ValueError, "unit abbreviation w/o a number", - lambda: Timedelta('foo')) - tm.assertRaisesRegexp(ValueError, - "cannot construct a Timedelta from the passed " - "arguments, allowed keywords are ", - lambda: Timedelta(day=10)) - - # roundtripping both for string and value - for v in ['1s', '-1s', '1us', '-1us', '1 day', '-1 day', - '-23:59:59.999999', '-1 days +23:59:59.999999', '-1ns', - '1ns', '-23:59:59.999999999']: - - td = Timedelta(v) - self.assertEqual(Timedelta(td.value), td) - - # str does not normally display nanos - if not td.nanoseconds: - self.assertEqual(Timedelta(str(td)), td) - self.assertEqual(Timedelta(td._repr_base(format='all')), td) - - # floats - expected = np.timedelta64( - 10, 's').astype('m8[ns]').view('i8') + np.timedelta64( - 500, 'ms').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10.5, unit='s').value, expected) - - # nat - self.assertEqual(Timedelta('').value, iNaT) - self.assertEqual(Timedelta('nat').value, iNaT) - self.assertEqual(Timedelta('NAT').value, iNaT) - self.assertEqual(Timedelta(None).value, iNaT) - self.assertEqual(Timedelta(np.nan).value, iNaT) - self.assertTrue(isnull(Timedelta('nat'))) - - # offset - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Second(2)), - Timedelta('0 days, 00:00:02')) - - # unicode - # GH 11995 - expected = Timedelta('1H') - result = pd.Timedelta(u'1H') - self.assertEqual(result, expected) - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta(u'0 days, 02:00:00')) - - self.assertRaises(ValueError, lambda: Timedelta(u'foo bar')) - - def test_overflow_on_construction(self): - # xref https://github.com/statsmodels/statsmodels/issues/3374 - value = pd.Timedelta('1day').value * 20169940 - self.assertRaises(OverflowError, pd.Timedelta, value) - - def test_total_seconds_scalar(self): - # GH 10939 - rng = Timedelta('1 days, 10:11:12.100123456') - expt = 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9 - tm.assert_almost_equal(rng.total_seconds(), expt) - - rng = Timedelta(np.nan) - self.assertTrue(np.isnan(rng.total_seconds())) - - def test_repr(self): - - self.assertEqual(repr(Timedelta(10, unit='d')), - "Timedelta('10 days 00:00:00')") - self.assertEqual(repr(Timedelta(10, unit='s')), - "Timedelta('0 days 00:00:10')") - self.assertEqual(repr(Timedelta(10, unit='ms')), - "Timedelta('0 days 00:00:00.010000')") - self.assertEqual(repr(Timedelta(-10, unit='ms')), - "Timedelta('-1 days +23:59:59.990000')") - - def test_conversion(self): - - for td in [Timedelta(10, unit='d'), - Timedelta('1 days, 10:11:12.012345')]: - pydt = td.to_pytimedelta() - self.assertTrue(td == Timedelta(pydt)) - self.assertEqual(td, pydt) - self.assertTrue(isinstance(pydt, timedelta) and not isinstance( - pydt, Timedelta)) - - self.assertEqual(td, np.timedelta64(td.value, 'ns')) - td64 = td.to_timedelta64() - self.assertEqual(td64, np.timedelta64(td.value, 'ns')) - self.assertEqual(td, td64) - self.assertTrue(isinstance(td64, np.timedelta64)) - - # this is NOT equal and cannot be roundtriped (because of the nanos) - td = Timedelta('1 days, 10:11:12.012345678') - self.assertTrue(td != td.to_pytimedelta()) - - def test_freq_conversion(self): - - td = Timedelta('1 days 2 hours 3 ns') - result = td / np.timedelta64(1, 'D') - self.assertEqual(result, td.value / float(86400 * 1e9)) - result = td / np.timedelta64(1, 's') - self.assertEqual(result, td.value / float(1e9)) - result = td / np.timedelta64(1, 'ns') - self.assertEqual(result, td.value) - - def test_fields(self): - def check(value): - # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) - - # compat to datetime.timedelta - rng = to_timedelta('1 days, 10:11:12') - self.assertEqual(rng.days, 1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 0) - self.assertEqual(rng.nanoseconds, 0) - - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) - - # GH 10050 - check(rng.days) - check(rng.seconds) - check(rng.microseconds) - check(rng.nanoseconds) - - td = Timedelta('-1 days, 10:11:12') - self.assertEqual(abs(td), Timedelta('13:48:48')) - self.assertTrue(str(td) == "-1 days +10:11:12") - self.assertEqual(-td, Timedelta('0 days 13:48:48')) - self.assertEqual(-Timedelta('-1 days, 10:11:12').value, 49728000000000) - self.assertEqual(Timedelta('-1 days, 10:11:12').value, -49728000000000) - - rng = to_timedelta('-1 days, 10:11:12.100123456') - self.assertEqual(rng.days, -1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 100 * 1000 + 123) - self.assertEqual(rng.nanoseconds, 456) - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) - - # components - tup = pd.to_timedelta(-1, 'us').components - self.assertEqual(tup.days, -1) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) - - # GH 10050 - check(tup.days) - check(tup.hours) - check(tup.minutes) - check(tup.seconds) - check(tup.milliseconds) - check(tup.microseconds) - check(tup.nanoseconds) - - tup = Timedelta('-1 days 1 us').components - self.assertEqual(tup.days, -2) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) - - def test_nat_converters(self): - self.assertEqual(to_timedelta( - 'nat', box=False).astype('int64'), tslib.iNaT) - self.assertEqual(to_timedelta( - 'nan', box=False).astype('int64'), tslib.iNaT) - - def testit(unit, transform): - - # array - result = to_timedelta(np.arange(5), unit=unit) - expected = TimedeltaIndex([np.timedelta64(i, transform(unit)) - for i in np.arange(5).tolist()]) - tm.assert_index_equal(result, expected) - - # scalar - result = to_timedelta(2, unit=unit) - expected = Timedelta(np.timedelta64(2, transform(unit)).astype( - 'timedelta64[ns]')) - self.assertEqual(result, expected) - - # validate all units - # GH 6855 - for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']: - testit(unit, lambda x: x.upper()) - for unit in ['days', 'day', 'Day', 'Days']: - testit(unit, lambda x: 'D') - for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US', - 'NS']: - testit(unit, lambda x: x.lower()) - - # offsets - - # m - testit('T', lambda x: 'm') - - # ms - testit('L', lambda x: 'ms') - - def test_numeric_conversions(self): - self.assertEqual(ct(0), np.timedelta64(0, 'ns')) - self.assertEqual(ct(10), np.timedelta64(10, 'ns')) - self.assertEqual(ct(10, unit='ns'), np.timedelta64( - 10, 'ns').astype('m8[ns]')) - - self.assertEqual(ct(10, unit='us'), np.timedelta64( - 10, 'us').astype('m8[ns]')) - self.assertEqual(ct(10, unit='ms'), np.timedelta64( - 10, 'ms').astype('m8[ns]')) - self.assertEqual(ct(10, unit='s'), np.timedelta64( - 10, 's').astype('m8[ns]')) - self.assertEqual(ct(10, unit='d'), np.timedelta64( - 10, 'D').astype('m8[ns]')) - - def test_timedelta_conversions(self): - self.assertEqual(ct(timedelta(seconds=1)), - np.timedelta64(1, 's').astype('m8[ns]')) - self.assertEqual(ct(timedelta(microseconds=1)), - np.timedelta64(1, 'us').astype('m8[ns]')) - self.assertEqual(ct(timedelta(days=1)), - np.timedelta64(1, 'D').astype('m8[ns]')) - - def test_round(self): - - t1 = Timedelta('1 days 02:34:56.789123456') - t2 = Timedelta('-1 days 02:34:56.789123456') - - for (freq, s1, s2) in [('N', t1, t2), - ('U', Timedelta('1 days 02:34:56.789123000'), - Timedelta('-1 days 02:34:56.789123000')), - ('L', Timedelta('1 days 02:34:56.789000000'), - Timedelta('-1 days 02:34:56.789000000')), - ('S', Timedelta('1 days 02:34:57'), - Timedelta('-1 days 02:34:57')), - ('2S', Timedelta('1 days 02:34:56'), - Timedelta('-1 days 02:34:56')), - ('5S', Timedelta('1 days 02:34:55'), - Timedelta('-1 days 02:34:55')), - ('T', Timedelta('1 days 02:35:00'), - Timedelta('-1 days 02:35:00')), - ('12T', Timedelta('1 days 02:36:00'), - Timedelta('-1 days 02:36:00')), - ('H', Timedelta('1 days 03:00:00'), - Timedelta('-1 days 03:00:00')), - ('d', Timedelta('1 days'), - Timedelta('-1 days'))]: - r1 = t1.round(freq) - self.assertEqual(r1, s1) - r2 = t2.round(freq) - self.assertEqual(r2, s2) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) - - t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') - t2 = -1 * t1 - t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s') - t1c = pd.TimedeltaIndex([1, 1, 1], unit='D') - - # note that negative times round DOWN! so don't give whole numbers - for (freq, s1, s2) in [('N', t1, t2), - ('U', t1, t2), - ('L', t1a, - TimedeltaIndex(['-1 days +00:00:00', - '-2 days +23:58:58', - '-2 days +23:57:56'], - dtype='timedelta64[ns]', - freq=None) - ), - ('S', t1a, - TimedeltaIndex(['-1 days +00:00:00', - '-2 days +23:58:58', - '-2 days +23:57:56'], - dtype='timedelta64[ns]', - freq=None) - ), - ('12T', t1c, - TimedeltaIndex(['-1 days', - '-1 days', - '-1 days'], - dtype='timedelta64[ns]', - freq=None) - ), - ('H', t1c, - TimedeltaIndex(['-1 days', - '-1 days', - '-1 days'], - dtype='timedelta64[ns]', - freq=None) - ), - ('d', t1c, - pd.TimedeltaIndex([-1, -1, -1], unit='D') - )]: - - r1 = t1.round(freq) - tm.assert_index_equal(r1, s1) - r2 = t2.round(freq) - tm.assert_index_equal(r2, s2) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) - - def test_contains(self): - # Checking for any NaT-like objects - # GH 13603 - td = to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertFalse((v in td)) - - td = to_timedelta([pd.NaT]) - for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertTrue((v in td)) - - def test_identity(self): - - td = Timedelta(10, unit='d') - self.assertTrue(isinstance(td, Timedelta)) - self.assertTrue(isinstance(td, timedelta)) - - def test_short_format_converters(self): - def conv(v): - return v.astype('m8[ns]') - - self.assertEqual(ct('10'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('10ns'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('100'), np.timedelta64(100, 'ns')) - self.assertEqual(ct('100ns'), np.timedelta64(100, 'ns')) - - self.assertEqual(ct('1000'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000ns'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000NS'), np.timedelta64(1000, 'ns')) - - self.assertEqual(ct('10us'), np.timedelta64(10000, 'ns')) - self.assertEqual(ct('100us'), np.timedelta64(100000, 'ns')) - self.assertEqual(ct('1000us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000Us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000uS'), np.timedelta64(1000000, 'ns')) - - self.assertEqual(ct('1ms'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('10ms'), np.timedelta64(10000000, 'ns')) - self.assertEqual(ct('100ms'), np.timedelta64(100000000, 'ns')) - self.assertEqual(ct('1000ms'), np.timedelta64(1000000000, 'ns')) - - self.assertEqual(ct('-1s'), -np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('1s'), np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('10s'), np.timedelta64(10000000000, 'ns')) - self.assertEqual(ct('100s'), np.timedelta64(100000000000, 'ns')) - self.assertEqual(ct('1000s'), np.timedelta64(1000000000000, 'ns')) - - self.assertEqual(ct('1d'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('-1d'), -conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('1D'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('10D'), conv(np.timedelta64(10, 'D'))) - self.assertEqual(ct('100D'), conv(np.timedelta64(100, 'D'))) - self.assertEqual(ct('1000D'), conv(np.timedelta64(1000, 'D'))) - self.assertEqual(ct('10000D'), conv(np.timedelta64(10000, 'D'))) - - # space - self.assertEqual(ct(' 10000D '), conv(np.timedelta64(10000, 'D'))) - self.assertEqual(ct(' - 10000D '), -conv(np.timedelta64(10000, 'D'))) - - # invalid - self.assertRaises(ValueError, ct, '1foo') - self.assertRaises(ValueError, ct, 'foo') - - def test_full_format_converters(self): - def conv(v): - return v.astype('m8[ns]') - - d1 = np.timedelta64(1, 'D') - - self.assertEqual(ct('1days'), conv(d1)) - self.assertEqual(ct('1days,'), conv(d1)) - self.assertEqual(ct('- 1days,'), -conv(d1)) - - self.assertEqual(ct('00:00:01'), conv(np.timedelta64(1, 's'))) - self.assertEqual(ct('06:00:01'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.0'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.01'), conv( - np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - self.assertEqual(ct('- 1days, 00:00:01'), - conv(-d1 + np.timedelta64(1, 's'))) - self.assertEqual(ct('1days, 06:00:01'), conv( - d1 + np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('1days, 06:00:01.01'), conv( - d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - # invalid - self.assertRaises(ValueError, ct, '- 1days, 00') - - def test_overflow(self): - # GH 9442 - s = Series(pd.date_range('20130101', periods=100000, freq='H')) - s[0] += pd.Timedelta('1s 1ms') - - # mean - result = (s - s.min()).mean() - expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s) - ).sum()) - - # the computation is converted to float so might be some loss of - # precision - self.assertTrue(np.allclose(result.value / 1000, expected.value / - 1000)) - - # sum - self.assertRaises(ValueError, lambda: (s - s.min()).sum()) - s1 = s[0:10000] - self.assertRaises(ValueError, lambda: (s1 - s1.min()).sum()) - s2 = s[0:1000] - result = (s2 - s2.min()).sum() - - def test_pickle(self): - - v = Timedelta('1 days 10:11:12.0123456') - v_p = self.round_trip_pickle(v) - self.assertEqual(v, v_p) - - def test_timedelta_hash_equality(self): - # GH 11129 - v = Timedelta(1, 'D') - td = timedelta(days=1) - self.assertEqual(hash(v), hash(td)) - - d = {td: 2} - self.assertEqual(d[v], 2) - - tds = timedelta_range('1 second', periods=20) - self.assertTrue(all(hash(td) == hash(td.to_pytimedelta()) for td in - tds)) - - # python timedeltas drop ns resolution - ns_td = Timedelta(1, 'ns') - self.assertNotEqual(hash(ns_td), hash(ns_td.to_pytimedelta())) - - def test_implementation_limits(self): - min_td = Timedelta(Timedelta.min) - max_td = Timedelta(Timedelta.max) - - # GH 12727 - # timedelta limits correspond to int64 boundaries - self.assertTrue(min_td.value == np.iinfo(np.int64).min + 1) - self.assertTrue(max_td.value == np.iinfo(np.int64).max) - - # Beyond lower limit, a NAT before the Overflow - self.assertIsInstance(min_td - Timedelta(1, 'ns'), - pd.tslib.NaTType) - - with tm.assertRaises(OverflowError): - min_td - Timedelta(2, 'ns') - - with tm.assertRaises(OverflowError): - max_td + Timedelta(1, 'ns') - - # Same tests using the internal nanosecond values - td = Timedelta(min_td.value - 1, 'ns') - self.assertIsInstance(td, pd.tslib.NaTType) - - with tm.assertRaises(OverflowError): - Timedelta(min_td.value - 2, 'ns') - - with tm.assertRaises(OverflowError): - Timedelta(max_td.value + 1, 'ns') - - def test_timedelta_arithmetic(self): - data = pd.Series(['nat', '32 days'], dtype='timedelta64[ns]') - deltas = [timedelta(days=1), Timedelta(1, unit='D')] - for delta in deltas: - result_method = data.add(delta) - result_operator = data + delta - expected = pd.Series(['nat', '33 days'], dtype='timedelta64[ns]') - tm.assert_series_equal(result_operator, expected) - tm.assert_series_equal(result_method, expected) - - result_method = data.sub(delta) - result_operator = data - delta - expected = pd.Series(['nat', '31 days'], dtype='timedelta64[ns]') - tm.assert_series_equal(result_operator, expected) - tm.assert_series_equal(result_method, expected) - # GH 9396 - result_method = data.div(delta) - result_operator = data / delta - expected = pd.Series([np.nan, 32.], dtype='float64') - tm.assert_series_equal(result_operator, expected) - tm.assert_series_equal(result_method, expected) - - def test_apply_to_timedelta(self): - timedelta_NaT = pd.to_timedelta('NaT') - - list_of_valid_strings = ['00:00:01', '00:00:02'] - a = pd.to_timedelta(list_of_valid_strings) - b = Series(list_of_valid_strings).apply(pd.to_timedelta) - # Can't compare until apply on a Series gives the correct dtype - # assert_series_equal(a, b) - - list_of_strings = ['00:00:01', np.nan, pd.NaT, timedelta_NaT] - - # TODO: unused? - a = pd.to_timedelta(list_of_strings) # noqa - b = Series(list_of_strings).apply(pd.to_timedelta) # noqa - # Can't compare until apply on a Series gives the correct dtype - # assert_series_equal(a, b) - - def test_components(self): - rng = timedelta_range('1 days, 10:11:12', periods=2, freq='s') - rng.components - - # with nat - s = Series(rng) - s[1] = np.nan - - result = s.dt.components - self.assertFalse(result.iloc[0].isnull().all()) - self.assertTrue(result.iloc[1].isnull().all()) - - def test_isoformat(self): - td = Timedelta(days=6, minutes=50, seconds=3, - milliseconds=10, microseconds=10, nanoseconds=12) - expected = 'P6DT0H50M3.010010012S' - result = td.isoformat() - self.assertEqual(result, expected) - - td = Timedelta(days=4, hours=12, minutes=30, seconds=5) - result = td.isoformat() - expected = 'P4DT12H30M5S' - self.assertEqual(result, expected) - - td = Timedelta(nanoseconds=123) - result = td.isoformat() - expected = 'P0DT0H0M0.000000123S' - self.assertEqual(result, expected) - - # trim nano - td = Timedelta(microseconds=10) - result = td.isoformat() - expected = 'P0DT0H0M0.00001S' - self.assertEqual(result, expected) - - # trim micro - td = Timedelta(milliseconds=1) - result = td.isoformat() - expected = 'P0DT0H0M0.001S' - self.assertEqual(result, expected) - - # NaT - result = Timedelta('NaT').isoformat() - expected = 'NaT' - self.assertEqual(result, expected) - - # don't strip every 0 - result = Timedelta(minutes=1).isoformat() - expected = 'P0DT0H1M0S' - self.assertEqual(result, expected) - - def test_ops_error_str(self): - # GH 13624 - td = Timedelta('1 day') - - for l, r in [(td, 'a'), ('a', td)]: - - with tm.assertRaises(TypeError): - l + r - - with tm.assertRaises(TypeError): - l > r - - self.assertFalse(l == r) - self.assertTrue(l != r) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py deleted file mode 100644 index 2abc83ca6109c..0000000000000 --- a/pandas/tests/scalar/test_timestamp.py +++ /dev/null @@ -1,1684 +0,0 @@ -""" test the scalar Timestamp """ - -import sys -import operator -import calendar -import numpy as np -from datetime import datetime, timedelta -from distutils.version import LooseVersion - -import pandas as pd -import pandas.util.testing as tm -import pandas._period as period -from pandas.tseries import offsets, frequencies -from pandas.tslib import get_timezone, iNaT -from pandas.compat import lrange, long -from pandas.util.testing import assert_series_equal -from pandas.compat.numpy import np_datetime64_compat -from pandas import (Timestamp, date_range, Period, Timedelta, tslib, compat, - Series, NaT, isnull, DataFrame, DatetimeIndex) -from pandas.tseries.frequencies import (RESO_DAY, RESO_HR, RESO_MIN, RESO_US, - RESO_MS, RESO_SEC) - -randn = np.random.randn - - -class TestTimestamp(tm.TestCase): - - def test_constructor(self): - base_str = '2014-07-01 09:00' - base_dt = datetime(2014, 7, 1, 9) - base_expected = 1404205200000000000 - - # confirm base representation is correct - import calendar - self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, - base_expected) - - tests = [(base_str, base_dt, base_expected), - ('2014-07-01 10:00', datetime(2014, 7, 1, 10), - base_expected + 3600 * 1000000000), - ('2014-07-01 09:00:00.000008000', - datetime(2014, 7, 1, 9, 0, 0, 8), - base_expected + 8000), - ('2014-07-01 09:00:00.000000005', - Timestamp('2014-07-01 09:00:00.000000005'), - base_expected + 5)] - - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - import pytz - import dateutil - timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), - ('US/Eastern', -4), ('dateutil/US/Pacific', -7), - (pytz.FixedOffset(-180), -3), - (dateutil.tz.tzoffset(None, 18000), 5)] - - for date_str, date, expected in tests: - for result in [Timestamp(date_str), Timestamp(date)]: - # only with timestring - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # re-creation shouldn't affect to internal value - result = Timestamp(result) - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # with timezone - for tz, offset in timezones: - for result in [Timestamp(date_str, tz=tz), Timestamp(date, - tz=tz)]: - expected_tz = expected - offset * 3600 * 1000000000 - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should preserve tz - result = Timestamp(result) - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should convert to UTC - result = Timestamp(result, tz='UTC') - expected_utc = expected - offset * 3600 * 1000000000 - self.assertEqual(result.value, expected_utc) - self.assertEqual(tslib.pydt_to_i8(result), expected_utc) - - def test_constructor_with_stringoffset(self): - # GH 7833 - base_str = '2014-07-01 11:00:00+02:00' - base_dt = datetime(2014, 7, 1, 9) - base_expected = 1404205200000000000 - - # confirm base representation is correct - import calendar - self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, - base_expected) - - tests = [(base_str, base_expected), - ('2014-07-01 12:00:00+02:00', - base_expected + 3600 * 1000000000), - ('2014-07-01 11:00:00.000008000+02:00', base_expected + 8000), - ('2014-07-01 11:00:00.000000005+02:00', base_expected + 5)] - - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - import pytz - import dateutil - timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), - ('US/Eastern', -4), ('dateutil/US/Pacific', -7), - (pytz.FixedOffset(-180), -3), - (dateutil.tz.tzoffset(None, 18000), 5)] - - for date_str, expected in tests: - for result in [Timestamp(date_str)]: - # only with timestring - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # re-creation shouldn't affect to internal value - result = Timestamp(result) - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # with timezone - for tz, offset in timezones: - result = Timestamp(date_str, tz=tz) - expected_tz = expected - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should preserve tz - result = Timestamp(result) - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should convert to UTC - result = Timestamp(result, tz='UTC') - expected_utc = expected - self.assertEqual(result.value, expected_utc) - self.assertEqual(tslib.pydt_to_i8(result), expected_utc) - - # This should be 2013-11-01 05:00 in UTC - # converted to Chicago tz - result = Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago') - self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) - expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" # noqa - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # This should be 2013-11-01 05:00 in UTC - # converted to Tokyo tz (+09:00) - result = Timestamp('2013-11-01 00:00:00-0500', tz='Asia/Tokyo') - self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) - expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # GH11708 - # This should be 2015-11-18 10:00 in UTC - # converted to Asia/Katmandu - result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") - self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) - expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # This should be 2015-11-18 10:00 in UTC - # converted to Asia/Kolkata - result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") - self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) - expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - def test_constructor_invalid(self): - with tm.assertRaisesRegexp(TypeError, 'Cannot convert input'): - Timestamp(slice(2)) - with tm.assertRaisesRegexp(ValueError, 'Cannot convert Period'): - Timestamp(Period('1000-01-01')) - - def test_constructor_positional(self): - # GH 10758 - with tm.assertRaises(TypeError): - Timestamp(2000, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 0, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 13, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 1, 0) - with tm.assertRaises(ValueError): - Timestamp(2000, 1, 32) - - # GH 11630 - self.assertEqual( - repr(Timestamp(2015, 11, 12)), - repr(Timestamp('20151112'))) - - self.assertEqual( - repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)), - repr(Timestamp('2015-11-12 01:02:03.999999'))) - - self.assertIs(Timestamp(None), pd.NaT) - - def test_constructor_keyword(self): - # GH 10758 - with tm.assertRaises(TypeError): - Timestamp(year=2000, month=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=0, day=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=13, day=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=1, day=0) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=1, day=32) - - self.assertEqual( - repr(Timestamp(year=2015, month=11, day=12)), - repr(Timestamp('20151112'))) - - self.assertEqual( - repr(Timestamp(year=2015, month=11, day=12, - hour=1, minute=2, second=3, microsecond=999999)), - repr(Timestamp('2015-11-12 01:02:03.999999'))) - - def test_constructor_fromordinal(self): - base = datetime(2000, 1, 1) - - ts = Timestamp.fromordinal(base.toordinal(), freq='D') - self.assertEqual(base, ts) - self.assertEqual(ts.freq, 'D') - self.assertEqual(base.toordinal(), ts.toordinal()) - - ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') - self.assertEqual(pd.Timestamp('2000-01-01', tz='US/Eastern'), ts) - self.assertEqual(base.toordinal(), ts.toordinal()) - - def test_constructor_offset_depr(self): - # GH 12160 - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - ts = Timestamp('2011-01-01', offset='D') - self.assertEqual(ts.freq, 'D') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - self.assertEqual(ts.offset, 'D') - - msg = "Can only specify freq or offset, not both" - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp('2011-01-01', offset='D', freq='D') - - def test_constructor_offset_depr_fromordinal(self): - # GH 12160 - base = datetime(2000, 1, 1) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - ts = Timestamp.fromordinal(base.toordinal(), offset='D') - self.assertEqual(pd.Timestamp('2000-01-01'), ts) - self.assertEqual(ts.freq, 'D') - self.assertEqual(base.toordinal(), ts.toordinal()) - - msg = "Can only specify freq or offset, not both" - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp.fromordinal(base.toordinal(), offset='D', freq='D') - - def test_conversion(self): - # GH 9255 - ts = Timestamp('2000-01-01') - - result = ts.to_pydatetime() - expected = datetime(2000, 1, 1) - self.assertEqual(result, expected) - self.assertEqual(type(result), type(expected)) - - result = ts.to_datetime64() - expected = np.datetime64(ts.value, 'ns') - self.assertEqual(result, expected) - self.assertEqual(type(result), type(expected)) - self.assertEqual(result.dtype, expected.dtype) - - def test_repr(self): - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - - dates = ['2014-03-07', '2014-01-01 09:00', - '2014-01-01 00:00:00.000000001'] - - # dateutil zone change (only matters for repr) - import dateutil - if (dateutil.__version__ >= LooseVersion('2.3') and - (dateutil.__version__ <= LooseVersion('2.4.0') or - dateutil.__version__ >= LooseVersion('2.6.0'))): - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/US/Pacific'] - else: - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/America/Los_Angeles'] - - freqs = ['D', 'M', 'S', 'N'] - - for date in dates: - for tz in timezones: - for freq in freqs: - - # avoid to match with timezone name - freq_repr = "'{0}'".format(freq) - if tz.startswith('dateutil'): - tz_repr = tz.replace('dateutil', '') - else: - tz_repr = tz - - date_only = Timestamp(date) - self.assertIn(date, repr(date_only)) - self.assertNotIn(tz_repr, repr(date_only)) - self.assertNotIn(freq_repr, repr(date_only)) - self.assertEqual(date_only, eval(repr(date_only))) - - date_tz = Timestamp(date, tz=tz) - self.assertIn(date, repr(date_tz)) - self.assertIn(tz_repr, repr(date_tz)) - self.assertNotIn(freq_repr, repr(date_tz)) - self.assertEqual(date_tz, eval(repr(date_tz))) - - date_freq = Timestamp(date, freq=freq) - self.assertIn(date, repr(date_freq)) - self.assertNotIn(tz_repr, repr(date_freq)) - self.assertIn(freq_repr, repr(date_freq)) - self.assertEqual(date_freq, eval(repr(date_freq))) - - date_tz_freq = Timestamp(date, tz=tz, freq=freq) - self.assertIn(date, repr(date_tz_freq)) - self.assertIn(tz_repr, repr(date_tz_freq)) - self.assertIn(freq_repr, repr(date_tz_freq)) - self.assertEqual(date_tz_freq, eval(repr(date_tz_freq))) - - # this can cause the tz field to be populated, but it's redundant to - # information in the datestring - tm._skip_if_no_pytz() - import pytz # noqa - date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) - self.assertIn('2014-03-13 00:00:00-0400', repr(date_with_utc_offset)) - self.assertNotIn('tzoffset', repr(date_with_utc_offset)) - self.assertIn('pytz.FixedOffset(-240)', repr(date_with_utc_offset)) - expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", - 'pytz.FixedOffset(-240)') - self.assertEqual(date_with_utc_offset, eval(expr)) - - def test_bounds_with_different_units(self): - out_of_bounds_dates = ('1677-09-21', '2262-04-12', ) - - time_units = ('D', 'h', 'm', 's', 'ms', 'us') - - for date_string in out_of_bounds_dates: - for unit in time_units: - self.assertRaises(ValueError, Timestamp, np.datetime64( - date_string, dtype='M8[%s]' % unit)) - - in_bounds_dates = ('1677-09-23', '2262-04-11', ) - - for date_string in in_bounds_dates: - for unit in time_units: - Timestamp(np.datetime64(date_string, dtype='M8[%s]' % unit)) - - def test_tz(self): - t = '2014-02-01 09:00' - ts = Timestamp(t) - local = ts.tz_localize('Asia/Tokyo') - self.assertEqual(local.hour, 9) - self.assertEqual(local, Timestamp(t, tz='Asia/Tokyo')) - conv = local.tz_convert('US/Eastern') - self.assertEqual(conv, Timestamp('2014-01-31 19:00', tz='US/Eastern')) - self.assertEqual(conv.hour, 19) - - # preserves nanosecond - ts = Timestamp(t) + offsets.Nano(5) - local = ts.tz_localize('Asia/Tokyo') - self.assertEqual(local.hour, 9) - self.assertEqual(local.nanosecond, 5) - conv = local.tz_convert('US/Eastern') - self.assertEqual(conv.nanosecond, 5) - self.assertEqual(conv.hour, 19) - - def test_tz_localize_ambiguous(self): - - ts = Timestamp('2014-11-02 01:00') - ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) - ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) - - rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') - self.assertEqual(rng[1], ts_dst) - self.assertEqual(rng[2], ts_no_dst) - self.assertRaises(ValueError, ts.tz_localize, 'US/Eastern', - ambiguous='infer') - - # GH 8025 - with tm.assertRaisesRegexp(TypeError, - 'Cannot localize tz-aware Timestamp, use ' - 'tz_convert for conversions'): - Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') - - with tm.assertRaisesRegexp(TypeError, - 'Cannot convert tz-naive Timestamp, use ' - 'tz_localize to localize'): - Timestamp('2011-01-01').tz_convert('Asia/Tokyo') - - def test_tz_localize_nonexistent(self): - # See issue 13057 - from pytz.exceptions import NonExistentTimeError - times = ['2015-03-08 02:00', '2015-03-08 02:30', - '2015-03-29 02:00', '2015-03-29 02:30'] - timezones = ['US/Eastern', 'US/Pacific', - 'Europe/Paris', 'Europe/Belgrade'] - for t, tz in zip(times, timezones): - ts = Timestamp(t) - self.assertRaises(NonExistentTimeError, ts.tz_localize, - tz) - self.assertRaises(NonExistentTimeError, ts.tz_localize, - tz, errors='raise') - self.assertIs(ts.tz_localize(tz, errors='coerce'), - pd.NaT) - - def test_tz_localize_errors_ambiguous(self): - # See issue 13057 - from pytz.exceptions import AmbiguousTimeError - ts = pd.Timestamp('2015-11-1 01:00') - self.assertRaises(AmbiguousTimeError, - ts.tz_localize, 'US/Pacific', errors='coerce') - - def test_tz_localize_roundtrip(self): - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: - for t in ['2014-02-01 09:00', '2014-07-08 09:00', - '2014-11-01 17:00', '2014-11-05 00:00']: - ts = Timestamp(t) - localized = ts.tz_localize(tz) - self.assertEqual(localized, Timestamp(t, tz=tz)) - - with tm.assertRaises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - self.assertEqual(reset, ts) - self.assertTrue(reset.tzinfo is None) - - def test_tz_convert_roundtrip(self): - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: - for t in ['2014-02-01 09:00', '2014-07-08 09:00', - '2014-11-01 17:00', '2014-11-05 00:00']: - ts = Timestamp(t, tz='UTC') - converted = ts.tz_convert(tz) - - reset = converted.tz_convert(None) - self.assertEqual(reset, Timestamp(t)) - self.assertTrue(reset.tzinfo is None) - self.assertEqual(reset, - converted.tz_convert('UTC').tz_localize(None)) - - def test_barely_oob_dts(self): - one_us = np.timedelta64(1).astype('timedelta64[us]') - - # By definition we can't go out of bounds in [ns], so we - # convert the datetime64s to [us] so we can go out of bounds - min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') - max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') - - # No error for the min/max datetimes - Timestamp(min_ts_us) - Timestamp(max_ts_us) - - # One us less than the minimum is an error - self.assertRaises(ValueError, Timestamp, min_ts_us - one_us) - - # One us more than the maximum is an error - self.assertRaises(ValueError, Timestamp, max_ts_us + one_us) - - def test_utc_z_designator(self): - self.assertEqual(get_timezone( - Timestamp('2014-11-02 01:00Z').tzinfo), 'UTC') - - def test_now(self): - # #9000 - ts_from_string = Timestamp('now') - ts_from_method = Timestamp.now() - ts_datetime = datetime.now() - - ts_from_string_tz = Timestamp('now', tz='US/Eastern') - ts_from_method_tz = Timestamp.now(tz='US/Eastern') - - # Check that the delta between the times is less than 1s (arbitrarily - # small) - delta = Timedelta(seconds=1) - self.assertTrue(abs(ts_from_method - ts_from_string) < delta) - self.assertTrue(abs(ts_datetime - ts_from_method) < delta) - self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) - self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - - ts_from_method_tz.tz_localize(None)) < delta) - - def test_today(self): - - ts_from_string = Timestamp('today') - ts_from_method = Timestamp.today() - ts_datetime = datetime.today() - - ts_from_string_tz = Timestamp('today', tz='US/Eastern') - ts_from_method_tz = Timestamp.today(tz='US/Eastern') - - # Check that the delta between the times is less than 1s (arbitrarily - # small) - delta = Timedelta(seconds=1) - self.assertTrue(abs(ts_from_method - ts_from_string) < delta) - self.assertTrue(abs(ts_datetime - ts_from_method) < delta) - self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) - self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - - ts_from_method_tz.tz_localize(None)) < delta) - - def test_asm8(self): - np.random.seed(7960929) - ns = [Timestamp.min.value, Timestamp.max.value, 1000, ] - for n in ns: - self.assertEqual(Timestamp(n).asm8.view('i8'), - np.datetime64(n, 'ns').view('i8'), n) - self.assertEqual(Timestamp('nat').asm8.view('i8'), - np.datetime64('nat', 'ns').view('i8')) - - def test_fields(self): - def check(value, equal): - # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) - self.assertEqual(value, equal) - - # GH 10050 - ts = Timestamp('2015-05-10 09:06:03.000100001') - check(ts.year, 2015) - check(ts.month, 5) - check(ts.day, 10) - check(ts.hour, 9) - check(ts.minute, 6) - check(ts.second, 3) - self.assertRaises(AttributeError, lambda: ts.millisecond) - check(ts.microsecond, 100) - check(ts.nanosecond, 1) - check(ts.dayofweek, 6) - check(ts.quarter, 2) - check(ts.dayofyear, 130) - check(ts.week, 19) - check(ts.daysinmonth, 31) - check(ts.daysinmonth, 31) - - def test_nat_fields(self): - # GH 10050 - ts = Timestamp('NaT') - self.assertTrue(np.isnan(ts.year)) - self.assertTrue(np.isnan(ts.month)) - self.assertTrue(np.isnan(ts.day)) - self.assertTrue(np.isnan(ts.hour)) - self.assertTrue(np.isnan(ts.minute)) - self.assertTrue(np.isnan(ts.second)) - self.assertTrue(np.isnan(ts.microsecond)) - self.assertTrue(np.isnan(ts.nanosecond)) - self.assertTrue(np.isnan(ts.dayofweek)) - self.assertTrue(np.isnan(ts.quarter)) - self.assertTrue(np.isnan(ts.dayofyear)) - self.assertTrue(np.isnan(ts.week)) - self.assertTrue(np.isnan(ts.daysinmonth)) - self.assertTrue(np.isnan(ts.days_in_month)) - - def test_nat_vector_field_access(self): - idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) - - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_numpy_array_equal(result, np.array(expected)) - - s = pd.Series(idx) - - for field in fields: - result = getattr(s.dt, field) - expected = [getattr(x, field) for x in idx] - self.assert_series_equal(result, pd.Series(expected)) - - def test_nat_scalar_field_access(self): - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] - for field in fields: - result = getattr(NaT, field) - self.assertTrue(np.isnan(result)) - - def test_NaT_methods(self): - # GH 9513 - raise_methods = ['astimezone', 'combine', 'ctime', 'dst', - 'fromordinal', 'fromtimestamp', 'isocalendar', - 'strftime', 'strptime', 'time', 'timestamp', - 'timetuple', 'timetz', 'toordinal', 'tzname', - 'utcfromtimestamp', 'utcnow', 'utcoffset', - 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] - nan_methods = ['weekday', 'isoweekday'] - - for method in raise_methods: - if hasattr(NaT, method): - self.assertRaises(ValueError, getattr(NaT, method)) - - for method in nan_methods: - if hasattr(NaT, method): - self.assertTrue(np.isnan(getattr(NaT, method)())) - - for method in nat_methods: - if hasattr(NaT, method): - # see gh-8254 - exp_warning = None - if method == 'to_datetime': - exp_warning = FutureWarning - with tm.assert_produces_warning( - exp_warning, check_stacklevel=False): - self.assertIs(getattr(NaT, method)(), NaT) - - # GH 12300 - self.assertEqual(NaT.isoformat(), 'NaT') - - def test_pprint(self): - # GH12622 - import pprint - nested_obj = {'foo': 1, - 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} - result = pprint.pformat(nested_obj, width=50) - expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], - 'foo': 1}""" - self.assertEqual(result, expected) - - def to_datetime_depr(self): - # see gh-8254 - ts = Timestamp('2011-01-01') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - expected = datetime(2011, 1, 1) - result = ts.to_datetime() - self.assertEqual(result, expected) - - def to_pydatetime_nonzero_nano(self): - ts = Timestamp('2011-01-01 9:00:00.123456789') - - # Warn the user of data loss (nanoseconds). - with tm.assert_produces_warning(UserWarning, - check_stacklevel=False): - expected = datetime(2011, 1, 1, 9, 0, 0, 123456) - result = ts.to_pydatetime() - self.assertEqual(result, expected) - - def test_round(self): - - # round - dt = Timestamp('20130101 09:10:11') - result = dt.round('D') - expected = Timestamp('20130101') - self.assertEqual(result, expected) - - dt = Timestamp('20130101 19:10:11') - result = dt.round('D') - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - dt = Timestamp('20130201 12:00:00') - result = dt.round('D') - expected = Timestamp('20130202') - self.assertEqual(result, expected) - - dt = Timestamp('20130104 12:00:00') - result = dt.round('D') - expected = Timestamp('20130105') - self.assertEqual(result, expected) - - dt = Timestamp('20130104 12:32:00') - result = dt.round('30Min') - expected = Timestamp('20130104 12:30:00') - self.assertEqual(result, expected) - - dti = date_range('20130101 09:10:11', periods=5) - result = dti.round('D') - expected = date_range('20130101', periods=5) - tm.assert_index_equal(result, expected) - - # floor - dt = Timestamp('20130101 09:10:11') - result = dt.floor('D') - expected = Timestamp('20130101') - self.assertEqual(result, expected) - - # ceil - dt = Timestamp('20130101 09:10:11') - result = dt.ceil('D') - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - # round with tz - dt = Timestamp('20130101 09:10:11', tz='US/Eastern') - result = dt.round('D') - expected = Timestamp('20130101', tz='US/Eastern') - self.assertEqual(result, expected) - - dt = Timestamp('20130101 09:10:11', tz='US/Eastern') - result = dt.round('s') - self.assertEqual(result, dt) - - dti = date_range('20130101 09:10:11', - periods=5).tz_localize('UTC').tz_convert('US/Eastern') - result = dti.round('D') - expected = date_range('20130101', periods=5).tz_localize('US/Eastern') - tm.assert_index_equal(result, expected) - - result = dti.round('s') - tm.assert_index_equal(result, dti) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: dti.round(freq)) - - def test_class_ops_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - - def compare(x, y): - self.assertEqual(int(Timestamp(x).value / 1e9), - int(Timestamp(y).value / 1e9)) - - compare(Timestamp.now(), datetime.now()) - compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) - compare(Timestamp.utcnow(), datetime.utcnow()) - compare(Timestamp.today(), datetime.today()) - current_time = calendar.timegm(datetime.now().utctimetuple()) - compare(Timestamp.utcfromtimestamp(current_time), - datetime.utcfromtimestamp(current_time)) - compare(Timestamp.fromtimestamp(current_time), - datetime.fromtimestamp(current_time)) - - date_component = datetime.utcnow() - time_component = (date_component + timedelta(minutes=10)).time() - compare(Timestamp.combine(date_component, time_component), - datetime.combine(date_component, time_component)) - - def test_class_ops_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - - def compare(x, y): - self.assertEqual(int(np.round(Timestamp(x).value / 1e9)), - int(np.round(Timestamp(y).value / 1e9))) - - compare(Timestamp.now(), datetime.now()) - compare(Timestamp.now('UTC'), datetime.now(tzutc())) - compare(Timestamp.utcnow(), datetime.utcnow()) - compare(Timestamp.today(), datetime.today()) - current_time = calendar.timegm(datetime.now().utctimetuple()) - compare(Timestamp.utcfromtimestamp(current_time), - datetime.utcfromtimestamp(current_time)) - compare(Timestamp.fromtimestamp(current_time), - datetime.fromtimestamp(current_time)) - - date_component = datetime.utcnow() - time_component = (date_component + timedelta(minutes=10)).time() - compare(Timestamp.combine(date_component, time_component), - datetime.combine(date_component, time_component)) - - def test_basics_nanos(self): - val = np.int64(946684800000000000).view('M8[ns]') - stamp = Timestamp(val.view('i8') + 500) - self.assertEqual(stamp.year, 2000) - self.assertEqual(stamp.month, 1) - self.assertEqual(stamp.microsecond, 0) - self.assertEqual(stamp.nanosecond, 500) - - # GH 14415 - val = np.iinfo(np.int64).min + 80000000000000 - stamp = Timestamp(val) - self.assertEqual(stamp.year, 1677) - self.assertEqual(stamp.month, 9) - self.assertEqual(stamp.day, 21) - self.assertEqual(stamp.microsecond, 145224) - self.assertEqual(stamp.nanosecond, 192) - - def test_unit(self): - - def check(val, unit=None, h=1, s=1, us=0): - stamp = Timestamp(val, unit=unit) - self.assertEqual(stamp.year, 2000) - self.assertEqual(stamp.month, 1) - self.assertEqual(stamp.day, 1) - self.assertEqual(stamp.hour, h) - if unit != 'D': - self.assertEqual(stamp.minute, 1) - self.assertEqual(stamp.second, s) - self.assertEqual(stamp.microsecond, us) - else: - self.assertEqual(stamp.minute, 0) - self.assertEqual(stamp.second, 0) - self.assertEqual(stamp.microsecond, 0) - self.assertEqual(stamp.nanosecond, 0) - - ts = Timestamp('20000101 01:01:01') - val = ts.value - days = (ts - Timestamp('1970-01-01')).days - - check(val) - check(val / long(1000), unit='us') - check(val / long(1000000), unit='ms') - check(val / long(1000000000), unit='s') - check(days, unit='D', h=0) - - # using truediv, so these are like floats - if compat.PY3: - check((val + 500000) / long(1000000000), unit='s', us=500) - check((val + 500000000) / long(1000000000), unit='s', us=500000) - check((val + 500000) / long(1000000), unit='ms', us=500) - - # get chopped in py2 - else: - check((val + 500000) / long(1000000000), unit='s') - check((val + 500000000) / long(1000000000), unit='s') - check((val + 500000) / long(1000000), unit='ms') - - # ok - check((val + 500000) / long(1000), unit='us', us=500) - check((val + 500000000) / long(1000000), unit='ms', us=500000) - - # floats - check(val / 1000.0 + 5, unit='us', us=5) - check(val / 1000.0 + 5000, unit='us', us=5000) - check(val / 1000000.0 + 0.5, unit='ms', us=500) - check(val / 1000000.0 + 0.005, unit='ms', us=5) - check(val / 1000000000.0 + 0.5, unit='s', us=500000) - check(days + 0.5, unit='D', h=12) - - # nan - result = Timestamp(np.nan) - self.assertIs(result, NaT) - - result = Timestamp(None) - self.assertIs(result, NaT) - - result = Timestamp(iNaT) - self.assertIs(result, NaT) - - result = Timestamp(NaT) - self.assertIs(result, NaT) - - result = Timestamp('NaT') - self.assertIs(result, NaT) - - self.assertTrue(isnull(Timestamp('nat'))) - - def test_roundtrip(self): - - # test value to string and back conversions - # further test accessors - base = Timestamp('20140101 00:00:00') - - result = Timestamp(base.value + pd.Timedelta('5ms').value) - self.assertEqual(result, Timestamp(str(base) + ".005000")) - self.assertEqual(result.microsecond, 5000) - - result = Timestamp(base.value + pd.Timedelta('5us').value) - self.assertEqual(result, Timestamp(str(base) + ".000005")) - self.assertEqual(result.microsecond, 5) - - result = Timestamp(base.value + pd.Timedelta('5ns').value) - self.assertEqual(result, Timestamp(str(base) + ".000000005")) - self.assertEqual(result.nanosecond, 5) - self.assertEqual(result.microsecond, 0) - - result = Timestamp(base.value + pd.Timedelta('6ms 5us').value) - self.assertEqual(result, Timestamp(str(base) + ".006005")) - self.assertEqual(result.microsecond, 5 + 6 * 1000) - - result = Timestamp(base.value + pd.Timedelta('200ms 5us').value) - self.assertEqual(result, Timestamp(str(base) + ".200005")) - self.assertEqual(result.microsecond, 5 + 200 * 1000) - - def test_comparison(self): - # 5-18-2012 00:00:00.000 - stamp = long(1337299200000000000) - - val = Timestamp(stamp) - - self.assertEqual(val, val) - self.assertFalse(val != val) - self.assertFalse(val < val) - self.assertTrue(val <= val) - self.assertFalse(val > val) - self.assertTrue(val >= val) - - other = datetime(2012, 5, 18) - self.assertEqual(val, other) - self.assertFalse(val != other) - self.assertFalse(val < other) - self.assertTrue(val <= other) - self.assertFalse(val > other) - self.assertTrue(val >= other) - - other = Timestamp(stamp + 100) - - self.assertNotEqual(val, other) - self.assertNotEqual(val, other) - self.assertTrue(val < other) - self.assertTrue(val <= other) - self.assertTrue(other > val) - self.assertTrue(other >= val) - - def test_compare_invalid(self): - - # GH 8058 - val = Timestamp('20130101 12:01:02') - self.assertFalse(val == 'foo') - self.assertFalse(val == 10.0) - self.assertFalse(val == 1) - self.assertFalse(val == long(1)) - self.assertFalse(val == []) - self.assertFalse(val == {'foo': 1}) - self.assertFalse(val == np.float64(1)) - self.assertFalse(val == np.int64(1)) - - self.assertTrue(val != 'foo') - self.assertTrue(val != 10.0) - self.assertTrue(val != 1) - self.assertTrue(val != long(1)) - self.assertTrue(val != []) - self.assertTrue(val != {'foo': 1}) - self.assertTrue(val != np.float64(1)) - self.assertTrue(val != np.int64(1)) - - # ops testing - df = DataFrame(randn(5, 2)) - a = df[0] - b = Series(randn(5)) - b.name = Timestamp('2000-01-01') - tm.assert_series_equal(a / b, 1 / (b / a)) - - def test_cant_compare_tz_naive_w_aware(self): - tm._skip_if_no_pytz() - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz='utc') - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): - tm._skip_if_no_pytz() - from pytz import utc - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=utc) - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_cant_compare_tz_naive_w_aware_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - utc = tzutc() - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=utc) - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_delta_preserve_nanos(self): - val = Timestamp(long(1337299200000000123)) - result = val + timedelta(1) - self.assertEqual(result.nanosecond, val.nanosecond) - - def test_frequency_misc(self): - self.assertEqual(frequencies.get_freq_group('T'), - frequencies.FreqGroup.FR_MIN) - - code, stride = frequencies.get_freq_code(offsets.Hour()) - self.assertEqual(code, frequencies.FreqGroup.FR_HR) - - code, stride = frequencies.get_freq_code((5, 'T')) - self.assertEqual(code, frequencies.FreqGroup.FR_MIN) - self.assertEqual(stride, 5) - - offset = offsets.Hour() - result = frequencies.to_offset(offset) - self.assertEqual(result, offset) - - result = frequencies.to_offset((5, 'T')) - expected = offsets.Minute(5) - self.assertEqual(result, expected) - - self.assertRaises(ValueError, frequencies.get_freq_code, (5, 'baz')) - - self.assertRaises(ValueError, frequencies.to_offset, '100foo') - - self.assertRaises(ValueError, frequencies.to_offset, ('', '')) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = frequencies.get_standard_freq(offsets.Hour()) - self.assertEqual(result, 'H') - - def test_hash_equivalent(self): - d = {datetime(2011, 1, 1): 5} - stamp = Timestamp(datetime(2011, 1, 1)) - self.assertEqual(d[stamp], 5) - - def test_timestamp_compare_scalars(self): - # case where ndim == 0 - lhs = np.datetime64(datetime(2013, 12, 6)) - rhs = Timestamp('now') - nat = Timestamp('nat') - - ops = {'gt': 'lt', - 'lt': 'gt', - 'ge': 'le', - 'le': 'ge', - 'eq': 'eq', - 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - expected = left_f(lhs, rhs) - - result = right_f(rhs, lhs) - self.assertEqual(result, expected) - - expected = left_f(rhs, nat) - result = right_f(nat, rhs) - self.assertEqual(result, expected) - - def test_timestamp_compare_series(self): - # make sure we can compare Timestamps on the right AND left hand side - # GH4982 - s = Series(date_range('20010101', periods=10), name='dates') - s_nat = s.copy(deep=True) - - s[0] = pd.Timestamp('nat') - s[3] = pd.Timestamp('nat') - - ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - - # no nats - expected = left_f(s, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s) - tm.assert_series_equal(result, expected) - - # nats - expected = left_f(s, Timestamp('nat')) - result = right_f(Timestamp('nat'), s) - tm.assert_series_equal(result, expected) - - # compare to timestamp with series containing nats - expected = left_f(s_nat, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s_nat) - tm.assert_series_equal(result, expected) - - # compare to nat with series containing nats - expected = left_f(s_nat, Timestamp('nat')) - result = right_f(Timestamp('nat'), s_nat) - tm.assert_series_equal(result, expected) - - def test_is_leap_year(self): - # GH 13727 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - dt = Timestamp('2000-01-01 00:00:00', tz=tz) - self.assertTrue(dt.is_leap_year) - self.assertIsInstance(dt.is_leap_year, bool) - - dt = Timestamp('1999-01-01 00:00:00', tz=tz) - self.assertFalse(dt.is_leap_year) - - dt = Timestamp('2004-01-01 00:00:00', tz=tz) - self.assertTrue(dt.is_leap_year) - - dt = Timestamp('2100-01-01 00:00:00', tz=tz) - self.assertFalse(dt.is_leap_year) - - self.assertFalse(pd.NaT.is_leap_year) - self.assertIsInstance(pd.NaT.is_leap_year, bool) - - def test_round_nat(self): - # GH14940 - ts = Timestamp('nat') - print(dir(ts)) - for method in ["round", "floor", "ceil"]: - round_method = getattr(ts, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - self.assertIs(round_method(freq), ts) - - -class TestTimestampNsOperations(tm.TestCase): - - def setUp(self): - self.timestamp = Timestamp(datetime.utcnow()) - - def assert_ns_timedelta(self, modified_timestamp, expected_value): - value = self.timestamp.value - modified_value = modified_timestamp.value - - self.assertEqual(modified_value - value, expected_value) - - def test_timedelta_ns_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'ns'), - -123) - - def test_timedelta_ns_based_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64( - 1234567898, 'ns'), 1234567898) - - def test_timedelta_us_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'us'), - -123000) - - def test_timedelta_ms_arithmetic(self): - time = self.timestamp + np.timedelta64(-123, 'ms') - self.assert_ns_timedelta(time, -123000000) - - def test_nanosecond_string_parsing(self): - ts = Timestamp('2013-05-01 07:15:45.123456789') - # GH 7878 - expected_repr = '2013-05-01 07:15:45.123456789' - expected_value = 1367392545123456789 - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789+09:00', tz='Asia/Tokyo') - self.assertEqual(ts.value, expected_value - 9 * 3600 * 1000000000) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789', tz='UTC') - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789', tz='US/Eastern') - self.assertEqual(ts.value, expected_value + 4 * 3600 * 1000000000) - self.assertIn(expected_repr, repr(ts)) - - # GH 10041 - ts = Timestamp('20130501T071545.123456789') - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - def test_nanosecond_timestamp(self): - # GH 7610 - expected = 1293840000000000005 - t = Timestamp('2011-01-01') + offsets.Nano(5) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - t = Timestamp(t) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000005Z')) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - expected = 1293840000000000010 - t = t + offsets.Nano(5) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - t = Timestamp(t) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000010Z')) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - def test_nat_arithmetic(self): - # GH 6873 - i = 2 - f = 1.5 - - for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]: - self.assertIs(left / right, pd.NaT) - self.assertIs(left * right, pd.NaT) - self.assertIs(right * left, pd.NaT) - with tm.assertRaises(TypeError): - right / left - - # Timestamp / datetime - t = Timestamp('2014-01-01') - dt = datetime(2014, 1, 1) - for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # timedelta-like - # offsets are tested in test_offsets.py - - delta = timedelta(3600) - td = Timedelta('5s') - - for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]: - # NaT + timedelta-like returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(right - left, pd.NaT) - self.assertIs(left - right, pd.NaT) - - # GH 11718 - tm._skip_if_no_pytz() - import pytz - - t_utc = Timestamp('2014-01-01', tz='UTC') - t_tz = Timestamp('2014-01-01', tz='US/Eastern') - dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) - - for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz), - (pd.NaT, dt_tz)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # int addition / subtraction - for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - def test_nat_arithmetic_index(self): - # GH 11718 - - # datetime - tm._skip_if_no_pytz() - - dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - self.assert_index_equal(dti + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti, exp) - - dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], - tz='US/Eastern', name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern') - self.assert_index_equal(dti_tz + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti_tz, exp) - - exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]: - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - - # timedelta - tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, tdi)]: - self.assert_index_equal(left + right, exp) - self.assert_index_equal(right + left, exp) - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - - -class TestTimestampOps(tm.TestCase): - - def test_timestamp_and_datetime(self): - self.assertEqual((Timestamp(datetime( - 2013, 10, 13)) - datetime(2013, 10, 12)).days, 1) - self.assertEqual((datetime(2013, 10, 12) - - Timestamp(datetime(2013, 10, 13))).days, -1) - - def test_timestamp_and_series(self): - timestamp_series = Series(date_range('2014-03-17', periods=2, freq='D', - tz='US/Eastern')) - first_timestamp = timestamp_series[0] - - delta_series = Series([np.timedelta64(0, 'D'), np.timedelta64(1, 'D')]) - assert_series_equal(timestamp_series - first_timestamp, delta_series) - assert_series_equal(first_timestamp - timestamp_series, -delta_series) - - def test_addition_subtraction_types(self): - # Assert on the types resulting from Timestamp +/- various date/time - # objects - datetime_instance = datetime(2014, 3, 4) - timedelta_instance = timedelta(seconds=1) - # build a timestamp with a frequency, since then it supports - # addition/subtraction of integers - timestamp_instance = date_range(datetime_instance, periods=1, - freq='D')[0] - - self.assertEqual(type(timestamp_instance + 1), Timestamp) - self.assertEqual(type(timestamp_instance - 1), Timestamp) - - # Timestamp + datetime not supported, though subtraction is supported - # and yields timedelta more tests in tseries/base/tests/test_base.py - self.assertEqual( - type(timestamp_instance - datetime_instance), Timedelta) - self.assertEqual( - type(timestamp_instance + timedelta_instance), Timestamp) - self.assertEqual( - type(timestamp_instance - timedelta_instance), Timestamp) - - # Timestamp +/- datetime64 not supported, so not tested (could possibly - # assert error raised?) - timedelta64_instance = np.timedelta64(1, 'D') - self.assertEqual( - type(timestamp_instance + timedelta64_instance), Timestamp) - self.assertEqual( - type(timestamp_instance - timedelta64_instance), Timestamp) - - def test_addition_subtraction_preserve_frequency(self): - timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] - timedelta_instance = timedelta(days=1) - original_freq = timestamp_instance.freq - self.assertEqual((timestamp_instance + 1).freq, original_freq) - self.assertEqual((timestamp_instance - 1).freq, original_freq) - self.assertEqual( - (timestamp_instance + timedelta_instance).freq, original_freq) - self.assertEqual( - (timestamp_instance - timedelta_instance).freq, original_freq) - - timedelta64_instance = np.timedelta64(1, 'D') - self.assertEqual( - (timestamp_instance + timedelta64_instance).freq, original_freq) - self.assertEqual( - (timestamp_instance - timedelta64_instance).freq, original_freq) - - def test_resolution(self): - - for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', - 'S', 'L', 'U'], - [RESO_DAY, RESO_DAY, - RESO_DAY, RESO_DAY, - RESO_HR, RESO_MIN, - RESO_SEC, RESO_MS, - RESO_US]): - for tz in [None, 'Asia/Tokyo', 'US/Eastern', - 'dateutil/US/Eastern']: - idx = date_range(start='2013-04-01', periods=30, freq=freq, - tz=tz) - result = period.resolution(idx.asi8, idx.tz) - self.assertEqual(result, expected) - - -class TestTimestampToJulianDate(tm.TestCase): - - def test_compare_1700(self): - r = Timestamp('1700-06-23').to_julian_date() - self.assertEqual(r, 2342145.5) - - def test_compare_2000(self): - r = Timestamp('2000-04-12').to_julian_date() - self.assertEqual(r, 2451646.5) - - def test_compare_2100(self): - r = Timestamp('2100-08-12').to_julian_date() - self.assertEqual(r, 2488292.5) - - def test_compare_hour01(self): - r = Timestamp('2000-08-12T01:00:00').to_julian_date() - self.assertEqual(r, 2451768.5416666666666666) - - def test_compare_hour13(self): - r = Timestamp('2000-08-12T13:00:00').to_julian_date() - self.assertEqual(r, 2451769.0416666666666666) - - -class TestTimeSeries(tm.TestCase): - - def test_timestamp_to_datetime(self): - tm._skip_if_no_pytz() - rng = date_range('20090415', '20090519', tz='US/Eastern') - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_dateutil(self): - tm._skip_if_no_pytz() - rng = date_range('20090415', '20090519', tz='dateutil/US/Eastern') - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - rng = date_range('20090415', '20090519', - tz=pytz.timezone('US/Eastern')) - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_explicit_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as gettz - rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_fields(self): - # extra fields from DatetimeIndex like quarter and week - idx = tm.makeDateIndex(100) - - fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', - 'days_in_month', 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'] - for f in fields: - expected = getattr(idx, f)[-1] - result = getattr(Timestamp(idx[-1]), f) - self.assertEqual(result, expected) - - self.assertEqual(idx.freq, Timestamp(idx[-1], idx.freq).freq) - self.assertEqual(idx.freqstr, Timestamp(idx[-1], idx.freq).freqstr) - - def test_timestamp_date_out_of_range(self): - self.assertRaises(ValueError, Timestamp, '1676-01-01') - self.assertRaises(ValueError, Timestamp, '2263-01-01') - - # 1475 - self.assertRaises(ValueError, DatetimeIndex, ['1400-01-01']) - self.assertRaises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) - - def test_timestamp_repr(self): - # pre-1900 - stamp = Timestamp('1850-01-01', tz='US/Eastern') - repr(stamp) - - iso8601 = '1850-01-01 01:23:45.012345' - stamp = Timestamp(iso8601, tz='US/Eastern') - result = repr(stamp) - self.assertIn(iso8601, result) - - def test_timestamp_from_ordinal(self): - - # GH 3042 - dt = datetime(2011, 4, 16, 0, 0) - ts = Timestamp.fromordinal(dt.toordinal()) - self.assertEqual(ts.to_pydatetime(), dt) - - # with a tzinfo - stamp = Timestamp('2011-4-16', tz='US/Eastern') - dt_tz = stamp.to_pydatetime() - ts = Timestamp.fromordinal(dt_tz.toordinal(), tz='US/Eastern') - self.assertEqual(ts.to_pydatetime(), dt_tz) - - def test_timestamp_compare_with_early_datetime(self): - # e.g. datetime.min - stamp = Timestamp('2012-01-01') - - self.assertFalse(stamp == datetime.min) - self.assertFalse(stamp == datetime(1600, 1, 1)) - self.assertFalse(stamp == datetime(2700, 1, 1)) - self.assertNotEqual(stamp, datetime.min) - self.assertNotEqual(stamp, datetime(1600, 1, 1)) - self.assertNotEqual(stamp, datetime(2700, 1, 1)) - self.assertTrue(stamp > datetime(1600, 1, 1)) - self.assertTrue(stamp >= datetime(1600, 1, 1)) - self.assertTrue(stamp < datetime(2700, 1, 1)) - self.assertTrue(stamp <= datetime(2700, 1, 1)) - - def test_timestamp_equality(self): - - # GH 11034 - s = Series([Timestamp('2000-01-29 01:59:00'), 'NaT']) - result = s != s - assert_series_equal(result, Series([False, True])) - result = s != s[0] - assert_series_equal(result, Series([False, True])) - result = s != s[1] - assert_series_equal(result, Series([True, True])) - - result = s == s - assert_series_equal(result, Series([True, False])) - result = s == s[0] - assert_series_equal(result, Series([True, False])) - result = s == s[1] - assert_series_equal(result, Series([False, False])) - - def test_series_box_timestamp(self): - rng = date_range('20090415', '20090519', freq='B') - s = Series(rng) - - tm.assertIsInstance(s[5], Timestamp) - - rng = date_range('20090415', '20090519', freq='B') - s = Series(rng, index=rng) - tm.assertIsInstance(s[5], Timestamp) - - tm.assertIsInstance(s.iat[5], Timestamp) - - def test_frame_setitem_timestamp(self): - # 2155 - columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', - freq=offsets.BDay()) - index = lrange(10) - data = DataFrame(columns=columns, index=index) - t = datetime(2012, 11, 1) - ts = Timestamp(t) - data[ts] = np.nan # works - - def test_to_html_timestamp(self): - rng = date_range('2000-01-01', periods=10) - df = DataFrame(np.random.randn(10, 4), index=rng) - - result = df.to_html() - self.assertIn('2000-01-01', result) - - def test_series_map_box_timestamps(self): - # #2689, #2627 - s = Series(date_range('1/1/2000', periods=10)) - - def f(x): - return (x.hour, x.day, x.month) - - # it works! - s.map(f) - s.apply(f) - DataFrame(s).applymap(f) - - def test_dti_slicing(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - dti2 = dti[[1, 3, 5]] - - v1 = dti2[0] - v2 = dti2[1] - v3 = dti2[2] - - self.assertEqual(v1, Timestamp('2/28/2005')) - self.assertEqual(v2, Timestamp('4/30/2005')) - self.assertEqual(v3, Timestamp('6/30/2005')) - - # don't carry freq through irregular slicing - self.assertIsNone(dti2.freq) - - def test_woy_boundary(self): - # make sure weeks at year boundaries are correct - d = datetime(2013, 12, 31) - result = Timestamp(d).week - expected = 1 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2008, 12, 28) - result = Timestamp(d).week - expected = 52 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2009, 12, 31) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2010, 1, 1) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2010, 1, 3) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - result = np.array([Timestamp(datetime(*args)).week - for args in [(2000, 1, 1), (2000, 1, 2), ( - 2005, 1, 1), (2005, 1, 2)]]) - self.assertTrue((result == [52, 52, 53, 53]).all()) - - -class TestTsUtil(tm.TestCase): - - def test_min_valid(self): - # Ensure that Timestamp.min is a valid Timestamp - Timestamp(Timestamp.min) - - def test_max_valid(self): - # Ensure that Timestamp.max is a valid Timestamp - Timestamp(Timestamp.max) - - def test_to_datetime_bijective(self): - # Ensure that converting to datetime and back only loses precision - # by going from nanoseconds to microseconds. - exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.max.to_pydatetime()).value / 1000, - Timestamp.max.value / 1000) - - exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.min.to_pydatetime()).value / 1000, - Timestamp.min.value / 1000) - - -class TestTslib(tm.TestCase): - - def test_round(self): - stamp = Timestamp('2000-01-05 05:09:15.13') - - def _check_round(freq, expected): - result = stamp.round(freq=freq) - self.assertEqual(result, expected) - - for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), - ('H', Timestamp('2000-01-05 05:00:00')), - ('S', Timestamp('2000-01-05 05:09:15'))]: - _check_round(freq, expected) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - stamp.round('foo') diff --git a/pandas/tests/scalar/timedelta/__init__.py b/pandas/tests/scalar/timedelta/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py new file mode 100644 index 0000000000000..179768fcc6709 --- /dev/null +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -0,0 +1,616 @@ +# -*- coding: utf-8 -*- +""" +Tests for scalar Timedelta arithmetic ops +""" +from datetime import datetime, timedelta +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.core import ops +from pandas import Timedelta, Timestamp, NaT + + +class TestTimedeltaAdditionSubtraction(object): + """ + Tests for Timedelta methods: + + __add__, __radd__, + __sub__, __rsub__ + """ + @pytest.mark.parametrize('ten_seconds', [ + Timedelta(10, unit='s'), + timedelta(seconds=10), + np.timedelta64(10, 's'), + np.timedelta64(10000000000, 'ns'), + pd.offsets.Second(10)]) + def test_td_add_sub_ten_seconds(self, ten_seconds): + # GH#6808 + base = Timestamp('20130101 09:01:12.123456') + expected_add = Timestamp('20130101 09:01:22.123456') + expected_sub = Timestamp('20130101 09:01:02.123456') + + result = base + ten_seconds + assert result == expected_add + + result = base - ten_seconds + assert result == expected_sub + + @pytest.mark.parametrize('one_day_ten_secs', [ + Timedelta('1 day, 00:00:10'), + Timedelta('1 days, 00:00:10'), + timedelta(days=1, seconds=10), + np.timedelta64(1, 'D') + np.timedelta64(10, 's'), + pd.offsets.Day() + pd.offsets.Second(10)]) + def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): + # GH#6808 + base = Timestamp('20130102 09:01:12.123456') + expected_add = Timestamp('20130103 09:01:22.123456') + expected_sub = Timestamp('20130101 09:01:02.123456') + + result = base + one_day_ten_secs + assert result == expected_add + + result = base - one_day_ten_secs + assert result == expected_sub + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_datetimelike_scalar(self, op): + # GH#19738 + td = Timedelta(10, unit='d') + + result = op(td, datetime(2016, 1, 1)) + if op is operator.add: + # datetime + Timedelta does _not_ call Timedelta.__radd__, + # so we get a datetime back instead of a Timestamp + assert isinstance(result, Timestamp) + assert result == Timestamp(2016, 1, 11) + + result = op(td, Timestamp('2018-01-12 18:09')) + assert isinstance(result, Timestamp) + assert result == Timestamp('2018-01-22 18:09') + + result = op(td, np.datetime64('2018-01-12')) + assert isinstance(result, Timestamp) + assert result == Timestamp('2018-01-22') + + result = op(td, NaT) + assert result is NaT + + with pytest.raises(TypeError): + op(td, 2) + with pytest.raises(TypeError): + op(td, 2.0) + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_td(self, op): + td = Timedelta(10, unit='d') + + result = op(td, Timedelta(days=10)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=20) + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_pytimedelta(self, op): + td = Timedelta(10, unit='d') + result = op(td, timedelta(days=9)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=19) + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_timedelta64(self, op): + td = Timedelta(10, unit='d') + result = op(td, np.timedelta64(-4, 'D')) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=6) + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_offset(self, op): + td = Timedelta(10, unit='d') + + result = op(td, pd.offsets.Hour(6)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=10, hours=6) + + def test_td_sub_td(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + result = td - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_pytimedelta(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + result = td - td.to_pytimedelta() + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_timedelta64(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + result = td - td.to_timedelta64() + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_nat(self): + td = Timedelta(10, unit='d') + result = td - NaT + assert result is NaT + + def test_td_sub_td64_nat(self): + td = Timedelta(10, unit='d') + result = td - np.timedelta64('NaT') + assert result is NaT + + def test_td_sub_offset(self): + td = Timedelta(10, unit='d') + result = td - pd.offsets.Hour(1) + assert isinstance(result, Timedelta) + assert result == Timedelta(239, unit='h') + + def test_td_sub_numeric_raises(self): + td = td = Timedelta(10, unit='d') + with pytest.raises(TypeError): + td - 2 + with pytest.raises(TypeError): + td - 2.0 + + def test_td_rsub_pytimedelta(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + + result = td.to_pytimedelta() - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_rsub_timedelta64(self): + td = Timedelta(10, unit='d') + expected = Timedelta(0, unit='ns') + + result = td.to_timedelta64() - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_rsub_nat(self): + td = Timedelta(10, unit='d') + result = NaT - td + assert result is NaT + + result = np.datetime64('NaT') - td + assert result is NaT + + def test_td_rsub_td64_nat(self): + td = Timedelta(10, unit='d') + result = np.timedelta64('NaT') - td + assert result is NaT + + def test_td_rsub_offset(self): + result = pd.offsets.Hour(1) - Timedelta(10, unit='d') + assert isinstance(result, Timedelta) + assert result == Timedelta(-239, unit='h') + + def test_td_rsub_numeric_raises(self): + td = td = Timedelta(10, unit='d') + with pytest.raises(TypeError): + 2 - td + with pytest.raises(TypeError): + 2.0 - td + + +class TestTimedeltaMultiplicationDivision(object): + """ + Tests for Timedelta methods: + + __mul__, __rmul__, + __div__, __rdiv__, + __truediv__, __rtruediv__, + __floordiv__, __rfloordiv__, + __mod__, __rmod__, + __divmod__, __rdivmod__ + """ + + # --------------------------------------------------------------- + # Timedelta.__mul__, __rmul__ + + @pytest.mark.parametrize('td_nat', [pd.NaT, + np.timedelta64('NaT', 'ns'), + np.timedelta64('NaT')]) + @pytest.mark.parametrize('op', [operator.mul, ops.rmul]) + def test_td_mul_nat(self, op, td_nat): + # GH#19819 + td = Timedelta(10, unit='d') + with pytest.raises(TypeError): + op(td, td_nat) + + @pytest.mark.parametrize('op', [operator.mul, ops.rmul]) + def test_td_mul_scalar(self, op): + # GH#19738 + td = Timedelta(minutes=3) + + result = op(td, 2) + assert result == Timedelta(minutes=6) + + result = op(td, 1.5) + assert result == Timedelta(minutes=4, seconds=30) + + assert op(td, np.nan) is NaT + + assert op(-1, td).value == -1 * td.value + assert op(-1.0, td).value == -1.0 * td.value + + with pytest.raises(TypeError): + # timedelta * datetime is gibberish + op(td, Timestamp(2016, 1, 2)) + + with pytest.raises(TypeError): + # invalid multiply with another timedelta + op(td, td) + + # --------------------------------------------------------------- + # Timedelta.__div__, __truediv__ + + def test_td_div_timedeltalike_scalar(self): + # GH#19738 + td = Timedelta(10, unit='d') + + result = td / pd.offsets.Hour(1) + assert result == 240 + + assert td / td == 1 + assert td / np.timedelta64(60, 'h') == 4 + + assert np.isnan(td / NaT) + + def test_td_div_numeric_scalar(self): + # GH#19738 + td = Timedelta(10, unit='d') + + result = td / 2 + assert isinstance(result, Timedelta) + assert result == Timedelta(days=5) + + result = td / 5.0 + assert isinstance(result, Timedelta) + assert result == Timedelta(days=2) + + # --------------------------------------------------------------- + # Timedelta.__rdiv__ + + def test_td_rdiv_timedeltalike_scalar(self): + # GH#19738 + td = Timedelta(10, unit='d') + result = pd.offsets.Hour(1) / td + assert result == 1 / 240.0 + + assert np.timedelta64(60, 'h') / td == 0.25 + + # --------------------------------------------------------------- + # Timedelta.__floordiv__ + + def test_td_floordiv_timedeltalike_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + scalar = Timedelta(hours=3, minutes=3) + + assert td // scalar == 1 + assert -td // scalar.to_pytimedelta() == -2 + assert (2 * td) // scalar.to_timedelta64() == 2 + + def test_td_floordiv_null_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + + assert td // np.nan is NaT + assert np.isnan(td // NaT) + assert np.isnan(td // np.timedelta64('NaT')) + + def test_td_floordiv_offsets(self): + # GH#19738 + td = Timedelta(hours=3, minutes=4) + assert td // pd.offsets.Hour(1) == 3 + assert td // pd.offsets.Minute(2) == 92 + + def test_td_floordiv_invalid_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + + with pytest.raises(TypeError): + td // np.datetime64('2016-01-01', dtype='datetime64[us]') + + def test_td_floordiv_numeric_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + + expected = Timedelta(hours=1, minutes=32) + assert td // 2 == expected + assert td // 2.0 == expected + assert td // np.float64(2.0) == expected + assert td // np.int32(2.0) == expected + assert td // np.uint8(2.0) == expected + + def test_td_floordiv_timedeltalike_array(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + scalar = Timedelta(hours=3, minutes=3) + + # Array-like others + assert td // np.array(scalar.to_timedelta64()) == 1 + + res = (3 * td) // np.array([scalar.to_timedelta64()]) + expected = np.array([3], dtype=np.int64) + tm.assert_numpy_array_equal(res, expected) + + res = (10 * td) // np.array([scalar.to_timedelta64(), + np.timedelta64('NaT')]) + expected = np.array([10, np.nan]) + tm.assert_numpy_array_equal(res, expected) + + def test_td_floordiv_numeric_series(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + ser = pd.Series([1], dtype=np.int64) + res = td // ser + assert res.dtype.kind == 'm' + + # --------------------------------------------------------------- + # Timedelta.__rfloordiv__ + + def test_td_rfloordiv_timedeltalike_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + scalar = Timedelta(hours=3, minutes=4) + + # scalar others + # x // Timedelta is defined only for timedelta-like x. int-like, + # float-like, and date-like, in particular, should all either + # a) raise TypeError directly or + # b) return NotImplemented, following which the reversed + # operation will raise TypeError. + assert td.__rfloordiv__(scalar) == 1 + assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2 + assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0 + + def test_td_rfloordiv_null_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + + assert np.isnan(td.__rfloordiv__(NaT)) + assert np.isnan(td.__rfloordiv__(np.timedelta64('NaT'))) + + def test_td_rfloordiv_offsets(self): + # GH#19738 + assert pd.offsets.Hour(1) // Timedelta(minutes=25) == 2 + + def test_td_rfloordiv_invalid_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + + dt64 = np.datetime64('2016-01-01', dtype='datetime64[us]') + with pytest.raises(TypeError): + td.__rfloordiv__(dt64) + + def test_td_rfloordiv_numeric_scalar(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + + assert td.__rfloordiv__(np.nan) is NotImplemented + assert td.__rfloordiv__(3.5) is NotImplemented + assert td.__rfloordiv__(2) is NotImplemented + + with pytest.raises(TypeError): + td.__rfloordiv__(np.float64(2.0)) + with pytest.raises(TypeError): + td.__rfloordiv__(np.int32(2.0)) + with pytest.raises(TypeError): + td.__rfloordiv__(np.uint8(9)) + + def test_td_rfloordiv_timedeltalike_array(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + scalar = Timedelta(hours=3, minutes=4) + + # Array-like others + assert td.__rfloordiv__(np.array(scalar.to_timedelta64())) == 1 + + res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()])) + expected = np.array([3], dtype=np.int64) + tm.assert_numpy_array_equal(res, expected) + + arr = np.array([(10 * scalar).to_timedelta64(), + np.timedelta64('NaT')]) + res = td.__rfloordiv__(arr) + expected = np.array([10, np.nan]) + tm.assert_numpy_array_equal(res, expected) + + def test_td_rfloordiv_numeric_series(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + ser = pd.Series([1], dtype=np.int64) + res = td.__rfloordiv__(ser) + assert res is NotImplemented + with pytest.raises(TypeError): + ser // td + + def test_mod_timedeltalike(self): + # GH#19365 + td = Timedelta(hours=37) + + # Timedelta-like others + result = td % Timedelta(hours=6) + assert isinstance(result, Timedelta) + assert result == Timedelta(hours=1) + + result = td % timedelta(minutes=60) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + result = td % NaT + assert result is NaT + + def test_mod_timedelta64_nat(self): + # GH#19365 + td = Timedelta(hours=37) + + result = td % np.timedelta64('NaT', 'ns') + assert result is NaT + + def test_mod_timedelta64(self): + # GH#19365 + td = Timedelta(hours=37) + + result = td % np.timedelta64(2, 'h') + assert isinstance(result, Timedelta) + assert result == Timedelta(hours=1) + + def test_mod_offset(self): + # GH#19365 + td = Timedelta(hours=37) + + result = td % pd.offsets.Hour(5) + assert isinstance(result, Timedelta) + assert result == Timedelta(hours=2) + + # ---------------------------------------------------------------- + # Timedelta.__mod__, __rmod__ + + def test_mod_numeric(self): + # GH#19365 + td = Timedelta(hours=37) + + # Numeric Others + result = td % 2 + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + result = td % 1e12 + assert isinstance(result, Timedelta) + assert result == Timedelta(minutes=3, seconds=20) + + result = td % int(1e12) + assert isinstance(result, Timedelta) + assert result == Timedelta(minutes=3, seconds=20) + + def test_mod_invalid(self): + # GH#19365 + td = Timedelta(hours=37) + + with pytest.raises(TypeError): + td % pd.Timestamp('2018-01-22') + + with pytest.raises(TypeError): + td % [] + + def test_rmod_pytimedelta(self): + # GH#19365 + td = Timedelta(minutes=3) + + result = timedelta(minutes=4) % td + assert isinstance(result, Timedelta) + assert result == Timedelta(minutes=1) + + def test_rmod_timedelta64(self): + # GH#19365 + td = Timedelta(minutes=3) + result = np.timedelta64(5, 'm') % td + assert isinstance(result, Timedelta) + assert result == Timedelta(minutes=2) + + def test_rmod_invalid(self): + # GH#19365 + td = Timedelta(minutes=3) + + with pytest.raises(TypeError): + pd.Timestamp('2018-01-22') % td + + with pytest.raises(TypeError): + 15 % td + + with pytest.raises(TypeError): + 16.0 % td + + with pytest.raises(TypeError): + np.array([22, 24]) % td + + # ---------------------------------------------------------------- + # Timedelta.__divmod__, __rdivmod__ + + def test_divmod_numeric(self): + # GH#19365 + td = Timedelta(days=2, hours=6) + + result = divmod(td, 53 * 3600 * 1e9) + assert result[0] == Timedelta(1, unit='ns') + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=1) + + assert result + result = divmod(td, np.nan) + assert result[0] is pd.NaT + assert result[1] is pd.NaT + + def test_divmod(self): + # GH#19365 + td = Timedelta(days=2, hours=6) + + result = divmod(td, timedelta(days=1)) + assert result[0] == 2 + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=6) + + result = divmod(td, 54) + assert result[0] == Timedelta(hours=1) + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(0) + + result = divmod(td, pd.NaT) + assert np.isnan(result[0]) + assert result[1] is pd.NaT + + def test_divmod_offset(self): + # GH#19365 + td = Timedelta(days=2, hours=6) + + result = divmod(td, pd.offsets.Hour(-4)) + assert result[0] == -14 + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=-2) + + def test_divmod_invalid(self): + # GH#19365 + td = Timedelta(days=2, hours=6) + + with pytest.raises(TypeError): + divmod(td, pd.Timestamp('2018-01-22')) + + def test_rdivmod_pytimedelta(self): + # GH#19365 + result = divmod(timedelta(days=2, hours=6), Timedelta(days=1)) + assert result[0] == 2 + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=6) + + def test_rdivmod_offset(self): + result = divmod(pd.offsets.Hour(54), Timedelta(hours=-4)) + assert result[0] == -14 + assert isinstance(result[1], Timedelta) + assert result[1] == Timedelta(hours=-2) + + def test_rdivmod_invalid(self): + # GH#19365 + td = Timedelta(minutes=3) + + with pytest.raises(TypeError): + divmod(pd.Timestamp('2018-01-22'), td) + + with pytest.raises(TypeError): + divmod(15, td) + + with pytest.raises(TypeError): + divmod(16.0, td) + + with pytest.raises(TypeError): + divmod(np.array([22, 24]), td) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py new file mode 100644 index 0000000000000..5ccad9e6b4e3c --- /dev/null +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +from datetime import timedelta + +import pytest +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import Timedelta + + +def test_construction(): + expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') + assert Timedelta(10, unit='d').value == expected + assert Timedelta(10.0, unit='d').value == expected + assert Timedelta('10 days').value == expected + assert Timedelta(days=10).value == expected + assert Timedelta(days=10.0).value == expected + + expected += np.timedelta64(10, 's').astype('m8[ns]').view('i8') + assert Timedelta('10 days 00:00:10').value == expected + assert Timedelta(days=10, seconds=10).value == expected + assert Timedelta(days=10, milliseconds=10 * 1000).value == expected + assert Timedelta(days=10, + microseconds=10 * 1000 * 1000).value == expected + + # rounding cases + assert Timedelta(82739999850000).value == 82739999850000 + assert ('0 days 22:58:59.999850' in str(Timedelta(82739999850000))) + assert Timedelta(123072001000000).value == 123072001000000 + assert ('1 days 10:11:12.001' in str(Timedelta(123072001000000))) + + # string conversion with/without leading zero + # GH#9570 + assert Timedelta('0:00:00') == timedelta(hours=0) + assert Timedelta('00:00:00') == timedelta(hours=0) + assert Timedelta('-1:00:00') == -timedelta(hours=1) + assert Timedelta('-01:00:00') == -timedelta(hours=1) + + # more strings & abbrevs + # GH#8190 + assert Timedelta('1 h') == timedelta(hours=1) + assert Timedelta('1 hour') == timedelta(hours=1) + assert Timedelta('1 hr') == timedelta(hours=1) + assert Timedelta('1 hours') == timedelta(hours=1) + assert Timedelta('-1 hours') == -timedelta(hours=1) + assert Timedelta('1 m') == timedelta(minutes=1) + assert Timedelta('1.5 m') == timedelta(seconds=90) + assert Timedelta('1 minute') == timedelta(minutes=1) + assert Timedelta('1 minutes') == timedelta(minutes=1) + assert Timedelta('1 s') == timedelta(seconds=1) + assert Timedelta('1 second') == timedelta(seconds=1) + assert Timedelta('1 seconds') == timedelta(seconds=1) + assert Timedelta('1 ms') == timedelta(milliseconds=1) + assert Timedelta('1 milli') == timedelta(milliseconds=1) + assert Timedelta('1 millisecond') == timedelta(milliseconds=1) + assert Timedelta('1 us') == timedelta(microseconds=1) + assert Timedelta('1 micros') == timedelta(microseconds=1) + assert Timedelta('1 microsecond') == timedelta(microseconds=1) + assert Timedelta('1.5 microsecond') == Timedelta('00:00:00.000001500') + assert Timedelta('1 ns') == Timedelta('00:00:00.000000001') + assert Timedelta('1 nano') == Timedelta('00:00:00.000000001') + assert Timedelta('1 nanosecond') == Timedelta('00:00:00.000000001') + + # combos + assert Timedelta('10 days 1 hour') == timedelta(days=10, hours=1) + assert Timedelta('10 days 1 h') == timedelta(days=10, hours=1) + assert Timedelta('10 days 1 h 1m 1s') == timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s') == -timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s') == -timedelta( + days=10, hours=1, minutes=1, seconds=1) + assert Timedelta('-10 days 1 h 1m 1s 3us') == -timedelta( + days=10, hours=1, minutes=1, seconds=1, microseconds=3) + assert Timedelta('-10 days 1 h 1.5m 1s 3us') == -timedelta( + days=10, hours=1, minutes=1, seconds=31, microseconds=3) + + # Currently invalid as it has a - on the hh:mm:dd part + # (only allowed on the days) + with pytest.raises(ValueError): + Timedelta('-10 days -1 h 1.5m 1s 3us') + + # only leading neg signs are allowed + with pytest.raises(ValueError): + Timedelta('10 days -1 h 1.5m 1s 3us') + + # no units specified + with pytest.raises(ValueError): + Timedelta('3.1415') + + # invalid construction + tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", + lambda: Timedelta()) + tm.assert_raises_regex(ValueError, + "unit abbreviation w/o a number", + lambda: Timedelta('foo')) + tm.assert_raises_regex(ValueError, + "cannot construct a Timedelta from the " + "passed arguments, allowed keywords are ", + lambda: Timedelta(day=10)) + + # floats + expected = np.timedelta64( + 10, 's').astype('m8[ns]').view('i8') + np.timedelta64( + 500, 'ms').astype('m8[ns]').view('i8') + assert Timedelta(10.5, unit='s').value == expected + + # offset + assert pd.to_timedelta(pd.offsets.Hour(2)) == Timedelta(hours=2) + assert Timedelta(pd.offsets.Hour(2)) == Timedelta(hours=2) + assert Timedelta(pd.offsets.Second(2)) == Timedelta(seconds=2) + + # GH#11995: unicode + expected = Timedelta('1H') + result = pd.Timedelta(u'1H') + assert result == expected + assert (pd.to_timedelta(pd.offsets.Hour(2)) == + Timedelta(u'0 days, 02:00:00')) + + with pytest.raises(ValueError): + Timedelta(u'foo bar') + + +@pytest.mark.parametrize('item', list({'days': 'D', + 'seconds': 's', + 'microseconds': 'us', + 'milliseconds': 'ms', + 'minutes': 'm', + 'hours': 'h', + 'weeks': 'W'}.items())) +@pytest.mark.parametrize('npdtype', [np.int64, np.int32, np.int16, + np.float64, np.float32, np.float16]) +def test_td_construction_with_np_dtypes(npdtype, item): + # GH#8757: test construction with np dtypes + pykwarg, npkwarg = item + expected = np.timedelta64(1, npkwarg).astype('m8[ns]').view('i8') + assert Timedelta(**{pykwarg: npdtype(1)}).value == expected + + +@pytest.mark.parametrize('val', [ + '1s', '-1s', '1us', '-1us', '1 day', '-1 day', + '-23:59:59.999999', '-1 days +23:59:59.999999', '-1ns', + '1ns', '-23:59:59.999999999']) +def test_td_from_repr_roundtrip(val): + # round-trip both for string and value + td = Timedelta(val) + assert Timedelta(td.value) == td + + # str does not normally display nanos + if not td.nanoseconds: + assert Timedelta(str(td)) == td + assert Timedelta(td._repr_base(format='all')) == td + + +def test_overflow_on_construction(): + # xref https://github.com/statsmodels/statsmodels/issues/3374 + value = pd.Timedelta('1day').value * 20169940 + with pytest.raises(OverflowError): + pd.Timedelta(value) + + # xref GH#17637 + with pytest.raises(OverflowError): + pd.Timedelta(7 * 19999, unit='D') + + with pytest.raises(OverflowError): + pd.Timedelta(timedelta(days=13 * 19999)) + + +@pytest.mark.parametrize('fmt,exp', [ + ('P6DT0H50M3.010010012S', Timedelta(days=6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, + nanoseconds=12)), + ('P-6DT0H50M3.010010012S', Timedelta(days=-6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, + nanoseconds=12)), + ('P4DT12H30M5S', Timedelta(days=4, hours=12, minutes=30, seconds=5)), + ('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)), + ('P0DT0H0M0.00001S', Timedelta(microseconds=10)), + ('P0DT0H0M0.001S', Timedelta(milliseconds=1)), + ('P0DT0H1M0S', Timedelta(minutes=1)), + ('P1DT25H61M61S', Timedelta(days=1, hours=25, minutes=61, seconds=61)) +]) +def test_iso_constructor(fmt, exp): + assert Timedelta(fmt) == exp + + +@pytest.mark.parametrize('fmt', [ + 'PPPPPPPPPPPP', 'PDTHMS', 'P0DT999H999M999S', + 'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S', + 'P1DT0H0M0.S']) +def test_iso_constructor_raises(fmt): + with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration ' + 'format - {}'.format(fmt)): + Timedelta(fmt) + + +def test_td_constructor_on_nanoseconds(): + # GH#9273 + result = Timedelta(nanoseconds=100) + expected = Timedelta('100ns') + assert result == expected + + result = Timedelta(days=1, hours=1, minutes=1, weeks=1, seconds=1, + milliseconds=1, microseconds=1, nanoseconds=1) + expected = Timedelta(694861001001001) + assert result == expected + + result = Timedelta(microseconds=1) + Timedelta(nanoseconds=1) + expected = Timedelta('1us1ns') + assert result == expected + + result = Timedelta(microseconds=1) - Timedelta(nanoseconds=1) + expected = Timedelta('999ns') + assert result == expected + + result = Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2) + expected = Timedelta('990ns') + assert result == expected + + with pytest.raises(TypeError): + Timedelta(nanoseconds='abc') diff --git a/pandas/tests/scalar/timedelta/test_formats.py b/pandas/tests/scalar/timedelta/test_formats.py new file mode 100644 index 0000000000000..8a877c7d1c0fa --- /dev/null +++ b/pandas/tests/scalar/timedelta/test_formats.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +from pandas import Timedelta + + +def test_repr(): + assert (repr(Timedelta(10, unit='d')) == + "Timedelta('10 days 00:00:00')") + assert (repr(Timedelta(10, unit='s')) == + "Timedelta('0 days 00:00:10')") + assert (repr(Timedelta(10, unit='ms')) == + "Timedelta('0 days 00:00:00.010000')") + assert (repr(Timedelta(-10, unit='ms')) == + "Timedelta('-1 days +23:59:59.990000')") + + +def test_isoformat(): + td = Timedelta(days=6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, nanoseconds=12) + expected = 'P6DT0H50M3.010010012S' + result = td.isoformat() + assert result == expected + + td = Timedelta(days=4, hours=12, minutes=30, seconds=5) + result = td.isoformat() + expected = 'P4DT12H30M5S' + assert result == expected + + td = Timedelta(nanoseconds=123) + result = td.isoformat() + expected = 'P0DT0H0M0.000000123S' + assert result == expected + + # trim nano + td = Timedelta(microseconds=10) + result = td.isoformat() + expected = 'P0DT0H0M0.00001S' + assert result == expected + + # trim micro + td = Timedelta(milliseconds=1) + result = td.isoformat() + expected = 'P0DT0H0M0.001S' + assert result == expected + + # don't strip every 0 + result = Timedelta(minutes=1).isoformat() + expected = 'P0DT0H1M0S' + assert result == expected diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py new file mode 100644 index 0000000000000..a80c5d6611b8a --- /dev/null +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -0,0 +1,568 @@ +""" test the scalar Timedelta """ +import pytest + +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type as ct +from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series, + to_timedelta, compat) +from pandas._libs.tslib import iNaT, NaT + + +class TestTimedeltaArithmetic(object): + + def test_arithmetic_overflow(self): + with pytest.raises(OverflowError): + pd.Timestamp('1700-01-01') + pd.Timedelta(13 * 19999, unit='D') + + with pytest.raises(OverflowError): + pd.Timestamp('1700-01-01') + timedelta(days=13 * 19999) + + def test_ops_error_str(self): + # GH 13624 + td = Timedelta('1 day') + + for left, right in [(td, 'a'), ('a', td)]: + + with pytest.raises(TypeError): + left + right + + with pytest.raises(TypeError): + left > right + + assert not left == right + assert left != right + + def test_ops_notimplemented(self): + class Other: + pass + + other = Other() + + td = Timedelta('1 day') + assert td.__add__(other) is NotImplemented + assert td.__sub__(other) is NotImplemented + assert td.__truediv__(other) is NotImplemented + assert td.__mul__(other) is NotImplemented + assert td.__floordiv__(other) is NotImplemented + + def test_unary_ops(self): + td = Timedelta(10, unit='d') + + # __neg__, __pos__ + assert -td == Timedelta(-10, unit='d') + assert -td == Timedelta('-10d') + assert +td == Timedelta(10, unit='d') + + # __abs__, __abs__(__neg__) + assert abs(td) == td + assert abs(-td) == td + assert abs(-td) == Timedelta('10d') + + +class TestTimedeltaComparison(object): + def test_comparison_object_array(self): + # analogous to GH#15183 + td = Timedelta('2 days') + other = Timedelta('3 hours') + + arr = np.array([other, td], dtype=object) + res = arr == td + expected = np.array([False, True], dtype=bool) + assert (res == expected).all() + + # 2D case + arr = np.array([[other, td], + [td, other]], + dtype=object) + res = arr != td + expected = np.array([[True, False], [False, True]], dtype=bool) + assert res.shape == expected.shape + assert (res == expected).all() + + def test_compare_timedelta_ndarray(self): + # GH11835 + periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] + arr = np.array(periods) + result = arr[0] > arr + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + +class TestTimedeltas(object): + + def test_total_seconds_scalar(self): + # see gh-10939 + rng = Timedelta('1 days, 10:11:12.100123456') + expt = 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9 + tm.assert_almost_equal(rng.total_seconds(), expt) + + rng = Timedelta(np.nan) + assert np.isnan(rng.total_seconds()) + + def test_conversion(self): + + for td in [Timedelta(10, unit='d'), + Timedelta('1 days, 10:11:12.012345')]: + pydt = td.to_pytimedelta() + assert td == Timedelta(pydt) + assert td == pydt + assert (isinstance(pydt, timedelta) and not isinstance( + pydt, Timedelta)) + + assert td == np.timedelta64(td.value, 'ns') + td64 = td.to_timedelta64() + + assert td64 == np.timedelta64(td.value, 'ns') + assert td == td64 + + assert isinstance(td64, np.timedelta64) + + # this is NOT equal and cannot be roundtriped (because of the nanos) + td = Timedelta('1 days, 10:11:12.012345678') + assert td != td.to_pytimedelta() + + def test_freq_conversion(self): + + # truediv + td = Timedelta('1 days 2 hours 3 ns') + result = td / np.timedelta64(1, 'D') + assert result == td.value / float(86400 * 1e9) + result = td / np.timedelta64(1, 's') + assert result == td.value / float(1e9) + result = td / np.timedelta64(1, 'ns') + assert result == td.value + + # floordiv + td = Timedelta('1 days 2 hours 3 ns') + result = td // np.timedelta64(1, 'D') + assert result == 1 + result = td // np.timedelta64(1, 's') + assert result == 93600 + result = td // np.timedelta64(1, 'ns') + assert result == td.value + + def test_fields(self): + def check(value): + # that we are int/long like + assert isinstance(value, (int, compat.long)) + + # compat to datetime.timedelta + rng = to_timedelta('1 days, 10:11:12') + assert rng.days == 1 + assert rng.seconds == 10 * 3600 + 11 * 60 + 12 + assert rng.microseconds == 0 + assert rng.nanoseconds == 0 + + pytest.raises(AttributeError, lambda: rng.hours) + pytest.raises(AttributeError, lambda: rng.minutes) + pytest.raises(AttributeError, lambda: rng.milliseconds) + + # GH 10050 + check(rng.days) + check(rng.seconds) + check(rng.microseconds) + check(rng.nanoseconds) + + td = Timedelta('-1 days, 10:11:12') + assert abs(td) == Timedelta('13:48:48') + assert str(td) == "-1 days +10:11:12" + assert -td == Timedelta('0 days 13:48:48') + assert -Timedelta('-1 days, 10:11:12').value == 49728000000000 + assert Timedelta('-1 days, 10:11:12').value == -49728000000000 + + rng = to_timedelta('-1 days, 10:11:12.100123456') + assert rng.days == -1 + assert rng.seconds == 10 * 3600 + 11 * 60 + 12 + assert rng.microseconds == 100 * 1000 + 123 + assert rng.nanoseconds == 456 + pytest.raises(AttributeError, lambda: rng.hours) + pytest.raises(AttributeError, lambda: rng.minutes) + pytest.raises(AttributeError, lambda: rng.milliseconds) + + # components + tup = pd.to_timedelta(-1, 'us').components + assert tup.days == -1 + assert tup.hours == 23 + assert tup.minutes == 59 + assert tup.seconds == 59 + assert tup.milliseconds == 999 + assert tup.microseconds == 999 + assert tup.nanoseconds == 0 + + # GH 10050 + check(tup.days) + check(tup.hours) + check(tup.minutes) + check(tup.seconds) + check(tup.milliseconds) + check(tup.microseconds) + check(tup.nanoseconds) + + tup = Timedelta('-1 days 1 us').components + assert tup.days == -2 + assert tup.hours == 23 + assert tup.minutes == 59 + assert tup.seconds == 59 + assert tup.milliseconds == 999 + assert tup.microseconds == 999 + assert tup.nanoseconds == 0 + + def test_nat_converters(self): + assert to_timedelta('nat', box=False).astype('int64') == iNaT + assert to_timedelta('nan', box=False).astype('int64') == iNaT + + def testit(unit, transform): + + # array + result = to_timedelta(np.arange(5), unit=unit) + expected = TimedeltaIndex([np.timedelta64(i, transform(unit)) + for i in np.arange(5).tolist()]) + tm.assert_index_equal(result, expected) + + # scalar + result = to_timedelta(2, unit=unit) + expected = Timedelta(np.timedelta64(2, transform(unit)).astype( + 'timedelta64[ns]')) + assert result == expected + + # validate all units + # GH 6855 + for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']: + testit(unit, lambda x: x.upper()) + for unit in ['days', 'day', 'Day', 'Days']: + testit(unit, lambda x: 'D') + for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US', + 'NS']: + testit(unit, lambda x: x.lower()) + + # offsets + + # m + testit('T', lambda x: 'm') + + # ms + testit('L', lambda x: 'ms') + + def test_numeric_conversions(self): + assert ct(0) == np.timedelta64(0, 'ns') + assert ct(10) == np.timedelta64(10, 'ns') + assert ct(10, unit='ns') == np.timedelta64(10, 'ns').astype('m8[ns]') + + assert ct(10, unit='us') == np.timedelta64(10, 'us').astype('m8[ns]') + assert ct(10, unit='ms') == np.timedelta64(10, 'ms').astype('m8[ns]') + assert ct(10, unit='s') == np.timedelta64(10, 's').astype('m8[ns]') + assert ct(10, unit='d') == np.timedelta64(10, 'D').astype('m8[ns]') + + def test_timedelta_conversions(self): + assert (ct(timedelta(seconds=1)) == + np.timedelta64(1, 's').astype('m8[ns]')) + assert (ct(timedelta(microseconds=1)) == + np.timedelta64(1, 'us').astype('m8[ns]')) + assert (ct(timedelta(days=1)) == + np.timedelta64(1, 'D').astype('m8[ns]')) + + def test_round(self): + + t1 = Timedelta('1 days 02:34:56.789123456') + t2 = Timedelta('-1 days 02:34:56.789123456') + + for (freq, s1, s2) in [('N', t1, t2), + ('U', Timedelta('1 days 02:34:56.789123000'), + Timedelta('-1 days 02:34:56.789123000')), + ('L', Timedelta('1 days 02:34:56.789000000'), + Timedelta('-1 days 02:34:56.789000000')), + ('S', Timedelta('1 days 02:34:57'), + Timedelta('-1 days 02:34:57')), + ('2S', Timedelta('1 days 02:34:56'), + Timedelta('-1 days 02:34:56')), + ('5S', Timedelta('1 days 02:34:55'), + Timedelta('-1 days 02:34:55')), + ('T', Timedelta('1 days 02:35:00'), + Timedelta('-1 days 02:35:00')), + ('12T', Timedelta('1 days 02:36:00'), + Timedelta('-1 days 02:36:00')), + ('H', Timedelta('1 days 03:00:00'), + Timedelta('-1 days 03:00:00')), + ('d', Timedelta('1 days'), + Timedelta('-1 days'))]: + r1 = t1.round(freq) + assert r1 == s1 + r2 = t2.round(freq) + assert r2 == s2 + + # invalid + for freq in ['Y', 'M', 'foobar']: + pytest.raises(ValueError, lambda: t1.round(freq)) + + t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') + t2 = -1 * t1 + t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s') + t1c = pd.TimedeltaIndex([1, 1, 1], unit='D') + + # note that negative times round DOWN! so don't give whole numbers + for (freq, s1, s2) in [('N', t1, t2), + ('U', t1, t2), + ('L', t1a, + TimedeltaIndex(['-1 days +00:00:00', + '-2 days +23:58:58', + '-2 days +23:57:56'], + dtype='timedelta64[ns]', + freq=None) + ), + ('S', t1a, + TimedeltaIndex(['-1 days +00:00:00', + '-2 days +23:58:58', + '-2 days +23:57:56'], + dtype='timedelta64[ns]', + freq=None) + ), + ('12T', t1c, + TimedeltaIndex(['-1 days', + '-1 days', + '-1 days'], + dtype='timedelta64[ns]', + freq=None) + ), + ('H', t1c, + TimedeltaIndex(['-1 days', + '-1 days', + '-1 days'], + dtype='timedelta64[ns]', + freq=None) + ), + ('d', t1c, + pd.TimedeltaIndex([-1, -1, -1], unit='D') + )]: + + r1 = t1.round(freq) + tm.assert_index_equal(r1, s1) + r2 = t2.round(freq) + tm.assert_index_equal(r2, s2) + + # invalid + for freq in ['Y', 'M', 'foobar']: + pytest.raises(ValueError, lambda: t1.round(freq)) + + def test_contains(self): + # Checking for any NaT-like objects + # GH 13603 + td = to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + for v in [pd.NaT, None, float('nan'), np.nan]: + assert not (v in td) + + td = to_timedelta([pd.NaT]) + for v in [pd.NaT, None, float('nan'), np.nan]: + assert (v in td) + + def test_identity(self): + + td = Timedelta(10, unit='d') + assert isinstance(td, Timedelta) + assert isinstance(td, timedelta) + + def test_short_format_converters(self): + def conv(v): + return v.astype('m8[ns]') + + assert ct('10') == np.timedelta64(10, 'ns') + assert ct('10ns') == np.timedelta64(10, 'ns') + assert ct('100') == np.timedelta64(100, 'ns') + assert ct('100ns') == np.timedelta64(100, 'ns') + + assert ct('1000') == np.timedelta64(1000, 'ns') + assert ct('1000ns') == np.timedelta64(1000, 'ns') + assert ct('1000NS') == np.timedelta64(1000, 'ns') + + assert ct('10us') == np.timedelta64(10000, 'ns') + assert ct('100us') == np.timedelta64(100000, 'ns') + assert ct('1000us') == np.timedelta64(1000000, 'ns') + assert ct('1000Us') == np.timedelta64(1000000, 'ns') + assert ct('1000uS') == np.timedelta64(1000000, 'ns') + + assert ct('1ms') == np.timedelta64(1000000, 'ns') + assert ct('10ms') == np.timedelta64(10000000, 'ns') + assert ct('100ms') == np.timedelta64(100000000, 'ns') + assert ct('1000ms') == np.timedelta64(1000000000, 'ns') + + assert ct('-1s') == -np.timedelta64(1000000000, 'ns') + assert ct('1s') == np.timedelta64(1000000000, 'ns') + assert ct('10s') == np.timedelta64(10000000000, 'ns') + assert ct('100s') == np.timedelta64(100000000000, 'ns') + assert ct('1000s') == np.timedelta64(1000000000000, 'ns') + + assert ct('1d') == conv(np.timedelta64(1, 'D')) + assert ct('-1d') == -conv(np.timedelta64(1, 'D')) + assert ct('1D') == conv(np.timedelta64(1, 'D')) + assert ct('10D') == conv(np.timedelta64(10, 'D')) + assert ct('100D') == conv(np.timedelta64(100, 'D')) + assert ct('1000D') == conv(np.timedelta64(1000, 'D')) + assert ct('10000D') == conv(np.timedelta64(10000, 'D')) + + # space + assert ct(' 10000D ') == conv(np.timedelta64(10000, 'D')) + assert ct(' - 10000D ') == -conv(np.timedelta64(10000, 'D')) + + # invalid + pytest.raises(ValueError, ct, '1foo') + pytest.raises(ValueError, ct, 'foo') + + def test_full_format_converters(self): + def conv(v): + return v.astype('m8[ns]') + + d1 = np.timedelta64(1, 'D') + + assert ct('1days') == conv(d1) + assert ct('1days,') == conv(d1) + assert ct('- 1days,') == -conv(d1) + + assert ct('00:00:01') == conv(np.timedelta64(1, 's')) + assert ct('06:00:01') == conv(np.timedelta64(6 * 3600 + 1, 's')) + assert ct('06:00:01.0') == conv(np.timedelta64(6 * 3600 + 1, 's')) + assert ct('06:00:01.01') == conv(np.timedelta64( + 1000 * (6 * 3600 + 1) + 10, 'ms')) + + assert (ct('- 1days, 00:00:01') == + conv(-d1 + np.timedelta64(1, 's'))) + assert (ct('1days, 06:00:01') == + conv(d1 + np.timedelta64(6 * 3600 + 1, 's'))) + assert (ct('1days, 06:00:01.01') == + conv(d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) + + # invalid + pytest.raises(ValueError, ct, '- 1days, 00') + + def test_overflow(self): + # GH 9442 + s = Series(pd.date_range('20130101', periods=100000, freq='H')) + s[0] += pd.Timedelta('1s 1ms') + + # mean + result = (s - s.min()).mean() + expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s) + ).sum()) + + # the computation is converted to float so + # might be some loss of precision + assert np.allclose(result.value / 1000, expected.value / 1000) + + # sum + pytest.raises(ValueError, lambda: (s - s.min()).sum()) + s1 = s[0:10000] + pytest.raises(ValueError, lambda: (s1 - s1.min()).sum()) + s2 = s[0:1000] + result = (s2 - s2.min()).sum() + + def test_pickle(self): + + v = Timedelta('1 days 10:11:12.0123456') + v_p = tm.round_trip_pickle(v) + assert v == v_p + + def test_timedelta_hash_equality(self): + # GH 11129 + v = Timedelta(1, 'D') + td = timedelta(days=1) + assert hash(v) == hash(td) + + d = {td: 2} + assert d[v] == 2 + + tds = timedelta_range('1 second', periods=20) + assert all(hash(td) == hash(td.to_pytimedelta()) for td in tds) + + # python timedeltas drop ns resolution + ns_td = Timedelta(1, 'ns') + assert hash(ns_td) != hash(ns_td.to_pytimedelta()) + + def test_implementation_limits(self): + min_td = Timedelta(Timedelta.min) + max_td = Timedelta(Timedelta.max) + + # GH 12727 + # timedelta limits correspond to int64 boundaries + assert min_td.value == np.iinfo(np.int64).min + 1 + assert max_td.value == np.iinfo(np.int64).max + + # Beyond lower limit, a NAT before the Overflow + assert (min_td - Timedelta(1, 'ns')) is NaT + + with pytest.raises(OverflowError): + min_td - Timedelta(2, 'ns') + + with pytest.raises(OverflowError): + max_td + Timedelta(1, 'ns') + + # Same tests using the internal nanosecond values + td = Timedelta(min_td.value - 1, 'ns') + assert td is NaT + + with pytest.raises(OverflowError): + Timedelta(min_td.value - 2, 'ns') + + with pytest.raises(OverflowError): + Timedelta(max_td.value + 1, 'ns') + + def test_total_seconds_precision(self): + # GH 19458 + assert Timedelta('30S').total_seconds() == 30.0 + assert Timedelta('0').total_seconds() == 0.0 + assert Timedelta('-2S').total_seconds() == -2.0 + assert Timedelta('5.324S').total_seconds() == 5.324 + assert (Timedelta('30S').total_seconds() - 30.0) < 1e-20 + assert (30.0 - Timedelta('30S').total_seconds()) < 1e-20 + + def test_timedelta_arithmetic(self): + data = pd.Series(['nat', '32 days'], dtype='timedelta64[ns]') + deltas = [timedelta(days=1), Timedelta(1, unit='D')] + for delta in deltas: + result_method = data.add(delta) + result_operator = data + delta + expected = pd.Series(['nat', '33 days'], dtype='timedelta64[ns]') + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) + + result_method = data.sub(delta) + result_operator = data - delta + expected = pd.Series(['nat', '31 days'], dtype='timedelta64[ns]') + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) + # GH 9396 + result_method = data.div(delta) + result_operator = data / delta + expected = pd.Series([np.nan, 32.], dtype='float64') + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) + + def test_apply_to_timedelta(self): + timedelta_NaT = pd.to_timedelta('NaT') + + list_of_valid_strings = ['00:00:01', '00:00:02'] + a = pd.to_timedelta(list_of_valid_strings) + b = Series(list_of_valid_strings).apply(pd.to_timedelta) + # Can't compare until apply on a Series gives the correct dtype + # assert_series_equal(a, b) + + list_of_strings = ['00:00:01', np.nan, pd.NaT, timedelta_NaT] + + # TODO: unused? + a = pd.to_timedelta(list_of_strings) # noqa + b = Series(list_of_strings).apply(pd.to_timedelta) # noqa + # Can't compare until apply on a Series gives the correct dtype + # assert_series_equal(a, b) + + def test_components(self): + rng = timedelta_range('1 days, 10:11:12', periods=2, freq='s') + rng.components + + # with nat + s = Series(rng) + s[1] = np.nan + + result = s.dt.components + assert not result.iloc[0].isna().all() + assert result.iloc[1].isna().all() diff --git a/pandas/tests/scalar/timestamp/__init__.py b/pandas/tests/scalar/timestamp/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py new file mode 100644 index 0000000000000..8f4809c93e28b --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +from datetime import datetime, timedelta + +import pytest +import numpy as np + +from pandas.compat import long +from pandas.tseries import offsets +from pandas import Timestamp, Timedelta + + +class TestTimestampArithmetic(object): + def test_overflow_offset(self): + # xref https://github.com/statsmodels/statsmodels/issues/3374 + # ends up multiplying really large numbers which overflow + + stamp = Timestamp('2017-01-13 00:00:00', freq='D') + offset = 20169940 * offsets.Day(1) + + with pytest.raises(OverflowError): + stamp + offset + + with pytest.raises(OverflowError): + offset + stamp + + with pytest.raises(OverflowError): + stamp - offset + + def test_delta_preserve_nanos(self): + val = Timestamp(long(1337299200000000123)) + result = val + timedelta(1) + assert result.nanosecond == val.nanosecond + + def test_timestamp_sub_datetime(self): + dt = datetime(2013, 10, 12) + ts = Timestamp(datetime(2013, 10, 13)) + assert (ts - dt).days == 1 + assert (dt - ts).days == -1 + + def test_addition_subtraction_types(self): + # Assert on the types resulting from Timestamp +/- various date/time + # objects + dt = datetime(2014, 3, 4) + td = timedelta(seconds=1) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + ts = Timestamp(dt, freq='D') + + assert type(ts + 1) == Timestamp + assert type(ts - 1) == Timestamp + + # Timestamp + datetime not supported, though subtraction is supported + # and yields timedelta more tests in tseries/base/tests/test_base.py + assert type(ts - dt) == Timedelta + assert type(ts + td) == Timestamp + assert type(ts - td) == Timestamp + + # Timestamp +/- datetime64 not supported, so not tested (could possibly + # assert error raised?) + td64 = np.timedelta64(1, 'D') + assert type(ts + td64) == Timestamp + assert type(ts - td64) == Timestamp + + def test_addition_subtraction_preserve_frequency(self): + ts = Timestamp('2014-03-05', freq='D') + td = timedelta(days=1) + original_freq = ts.freq + + assert (ts + 1).freq == original_freq + assert (ts - 1).freq == original_freq + assert (ts + td).freq == original_freq + assert (ts - td).freq == original_freq + + td64 = np.timedelta64(1, 'D') + assert (ts + td64).freq == original_freq + assert (ts - td64).freq == original_freq diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py new file mode 100644 index 0000000000000..72d87be619917 --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +import sys +from datetime import datetime +import operator + +import pytest +import numpy as np + +from dateutil.tz import tzutc +from pytz import utc + +from pandas.compat import long +from pandas import Timestamp + + +class TestTimestampComparison(object): + def test_comparison_object_array(self): + # GH#15183 + ts = Timestamp('2011-01-03 00:00:00-0500', tz='US/Eastern') + other = Timestamp('2011-01-01 00:00:00-0500', tz='US/Eastern') + naive = Timestamp('2011-01-01 00:00:00') + + arr = np.array([other, ts], dtype=object) + res = arr == ts + expected = np.array([False, True], dtype=bool) + assert (res == expected).all() + + # 2D case + arr = np.array([[other, ts], + [ts, other]], + dtype=object) + res = arr != ts + expected = np.array([[True, False], [False, True]], dtype=bool) + assert res.shape == expected.shape + assert (res == expected).all() + + # tzaware mismatch + arr = np.array([naive], dtype=object) + with pytest.raises(TypeError): + arr < ts + + def test_comparison(self): + # 5-18-2012 00:00:00.000 + stamp = long(1337299200000000000) + + val = Timestamp(stamp) + + assert val == val + assert not val != val + assert not val < val + assert val <= val + assert not val > val + assert val >= val + + other = datetime(2012, 5, 18) + assert val == other + assert not val != other + assert not val < other + assert val <= other + assert not val > other + assert val >= other + + other = Timestamp(stamp + 100) + + assert val != other + assert val != other + assert val < other + assert val <= other + assert other > val + assert other >= val + + def test_compare_invalid(self): + # GH 8058 + val = Timestamp('20130101 12:01:02') + assert not val == 'foo' + assert not val == 10.0 + assert not val == 1 + assert not val == long(1) + assert not val == [] + assert not val == {'foo': 1} + assert not val == np.float64(1) + assert not val == np.int64(1) + + assert val != 'foo' + assert val != 10.0 + assert val != 1 + assert val != long(1) + assert val != [] + assert val != {'foo': 1} + assert val != np.float64(1) + assert val != np.int64(1) + + def test_cant_compare_tz_naive_w_aware(self): + # see gh-1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz='utc') + + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) + else: + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b + + def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): + # see gh-1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz=utc) + + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) + else: + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b + + def test_cant_compare_tz_naive_w_aware_dateutil(self): + # see gh-1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz=tzutc()) + + pytest.raises(Exception, a.__eq__, b) + pytest.raises(Exception, a.__ne__, b) + pytest.raises(Exception, a.__lt__, b) + pytest.raises(Exception, a.__gt__, b) + pytest.raises(Exception, b.__eq__, a) + pytest.raises(Exception, b.__ne__, a) + pytest.raises(Exception, b.__lt__, a) + pytest.raises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + pytest.raises(Exception, a.__eq__, b.to_pydatetime()) + pytest.raises(Exception, a.to_pydatetime().__eq__, b) + else: + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b + + def test_timestamp_compare_scalars(self): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp('now') + nat = Timestamp('nat') + + ops = {'gt': 'lt', + 'lt': 'gt', + 'ge': 'le', + 'le': 'ge', + 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + assert result == expected + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + assert result == expected + + def test_timestamp_compare_with_early_datetime(self): + # e.g. datetime.min + stamp = Timestamp('2012-01-01') + + assert not stamp == datetime.min + assert not stamp == datetime(1600, 1, 1) + assert not stamp == datetime(2700, 1, 1) + assert stamp != datetime.min + assert stamp != datetime(1600, 1, 1) + assert stamp != datetime(2700, 1, 1) + assert stamp > datetime(1600, 1, 1) + assert stamp >= datetime(1600, 1, 1) + assert stamp < datetime(2700, 1, 1) + assert stamp <= datetime(2700, 1, 1) diff --git a/pandas/tests/scalar/timestamp/test_rendering.py b/pandas/tests/scalar/timestamp/test_rendering.py new file mode 100644 index 0000000000000..c404b60567daf --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_rendering.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +import pytest +import dateutil +import pytz # noqa # a test below uses pytz but only inside a `eval` call + +import pprint +from distutils.version import LooseVersion + +from pandas import Timestamp + + +class TestTimestampRendering(object): + + # dateutil zone change (only matters for repr) + if LooseVersion(dateutil.__version__) >= LooseVersion('2.6.0'): + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific'] + else: + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/America/Los_Angeles'] + + @pytest.mark.parametrize('tz', timezones) + @pytest.mark.parametrize('freq', ['D', 'M', 'S', 'N']) + @pytest.mark.parametrize('date', ['2014-03-07', '2014-01-01 09:00', + '2014-01-01 00:00:00.000000001']) + def test_repr(self, date, freq, tz): + # avoid to match with timezone name + freq_repr = "'{0}'".format(freq) + if tz.startswith('dateutil'): + tz_repr = tz.replace('dateutil', '') + else: + tz_repr = tz + + date_only = Timestamp(date) + assert date in repr(date_only) + assert tz_repr not in repr(date_only) + assert freq_repr not in repr(date_only) + assert date_only == eval(repr(date_only)) + + date_tz = Timestamp(date, tz=tz) + assert date in repr(date_tz) + assert tz_repr in repr(date_tz) + assert freq_repr not in repr(date_tz) + assert date_tz == eval(repr(date_tz)) + + date_freq = Timestamp(date, freq=freq) + assert date in repr(date_freq) + assert tz_repr not in repr(date_freq) + assert freq_repr in repr(date_freq) + assert date_freq == eval(repr(date_freq)) + + date_tz_freq = Timestamp(date, tz=tz, freq=freq) + assert date in repr(date_tz_freq) + assert tz_repr in repr(date_tz_freq) + assert freq_repr in repr(date_tz_freq) + assert date_tz_freq == eval(repr(date_tz_freq)) + + def test_repr_utcoffset(self): + # This can cause the tz field to be populated, but it's redundant to + # include this information in the date-string. + date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) + assert '2014-03-13 00:00:00-0400' in repr(date_with_utc_offset) + assert 'tzoffset' not in repr(date_with_utc_offset) + assert 'pytz.FixedOffset(-240)' in repr(date_with_utc_offset) + expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", + 'pytz.FixedOffset(-240)') + assert date_with_utc_offset == eval(expr) + + def test_timestamp_repr_pre1900(self): + # pre-1900 + stamp = Timestamp('1850-01-01', tz='US/Eastern') + repr(stamp) + + iso8601 = '1850-01-01 01:23:45.012345' + stamp = Timestamp(iso8601, tz='US/Eastern') + result = repr(stamp) + assert iso8601 in result + + def test_pprint(self): + # GH#12622 + nested_obj = {'foo': 1, + 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} + result = pprint.pformat(nested_obj, width=50) + expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], + 'foo': 1}""" + assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py new file mode 100644 index 0000000000000..cde5baf47c18e --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -0,0 +1,874 @@ +""" test the scalar Timestamp """ + +import pytz +import pytest +import dateutil +import calendar +import locale +import numpy as np + +from dateutil.tz import tzutc +from pytz import timezone, utc +from datetime import datetime, timedelta + +import pandas.util.testing as tm +import pandas.util._test_decorators as td + +from pandas.tseries import offsets + +from pandas._libs.tslibs import conversion +from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz + +from pandas.errors import OutOfBoundsDatetime +from pandas.compat import long, PY3 +from pandas.compat.numpy import np_datetime64_compat +from pandas import Timestamp, Period, Timedelta, NaT + + +class TestTimestampProperties(object): + + def test_properties_business(self): + ts = Timestamp('2017-10-01', freq='B') + control = Timestamp('2017-10-01') + assert ts.dayofweek == 6 + assert not ts.is_month_start # not a weekday + assert not ts.is_quarter_start # not a weekday + # Control case: non-business is month/qtr start + assert control.is_month_start + assert control.is_quarter_start + + ts = Timestamp('2017-09-30', freq='B') + control = Timestamp('2017-09-30') + assert ts.dayofweek == 5 + assert not ts.is_month_end # not a weekday + assert not ts.is_quarter_end # not a weekday + # Control case: non-business is month/qtr start + assert control.is_month_end + assert control.is_quarter_end + + def test_fields(self): + def check(value, equal): + # that we are int/long like + assert isinstance(value, (int, long)) + assert value == equal + + # GH 10050 + ts = Timestamp('2015-05-10 09:06:03.000100001') + check(ts.year, 2015) + check(ts.month, 5) + check(ts.day, 10) + check(ts.hour, 9) + check(ts.minute, 6) + check(ts.second, 3) + pytest.raises(AttributeError, lambda: ts.millisecond) + check(ts.microsecond, 100) + check(ts.nanosecond, 1) + check(ts.dayofweek, 6) + check(ts.quarter, 2) + check(ts.dayofyear, 130) + check(ts.week, 19) + check(ts.daysinmonth, 31) + check(ts.daysinmonth, 31) + + # GH 13303 + ts = Timestamp('2014-12-31 23:59:00-05:00', tz='US/Eastern') + check(ts.year, 2014) + check(ts.month, 12) + check(ts.day, 31) + check(ts.hour, 23) + check(ts.minute, 59) + check(ts.second, 0) + pytest.raises(AttributeError, lambda: ts.millisecond) + check(ts.microsecond, 0) + check(ts.nanosecond, 0) + check(ts.dayofweek, 2) + check(ts.quarter, 4) + check(ts.dayofyear, 365) + check(ts.week, 1) + check(ts.daysinmonth, 31) + + ts = Timestamp('2014-01-01 00:00:00+01:00') + starts = ['is_month_start', 'is_quarter_start', 'is_year_start'] + for start in starts: + assert getattr(ts, start) + ts = Timestamp('2014-12-31 23:59:59+01:00') + ends = ['is_month_end', 'is_year_end', 'is_quarter_end'] + for end in ends: + assert getattr(ts, end) + + # GH 12806 + @pytest.mark.parametrize('data', + [Timestamp('2017-08-28 23:00:00'), + Timestamp('2017-08-28 23:00:00', tz='EST')]) + @pytest.mark.parametrize('time_locale', [ + None] if tm.get_locales() is None else [None] + tm.get_locales()) + def test_names(self, data, time_locale): + # GH 17354 + # Test .weekday_name, .day_name(), .month_name + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert data.weekday_name == 'Monday' + if time_locale is None: + expected_day = 'Monday' + expected_month = 'August' + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_day = calendar.day_name[0].capitalize() + expected_month = calendar.month_name[8].capitalize() + + assert data.day_name(time_locale) == expected_day + assert data.month_name(time_locale) == expected_month + + # Test NaT + nan_ts = Timestamp(NaT) + assert np.isnan(nan_ts.day_name(time_locale)) + assert np.isnan(nan_ts.month_name(time_locale)) + + @pytest.mark.parametrize('tz', [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']) + def test_is_leap_year(self, tz): + # GH 13727 + dt = Timestamp('2000-01-01 00:00:00', tz=tz) + assert dt.is_leap_year + assert isinstance(dt.is_leap_year, bool) + + dt = Timestamp('1999-01-01 00:00:00', tz=tz) + assert not dt.is_leap_year + + dt = Timestamp('2004-01-01 00:00:00', tz=tz) + assert dt.is_leap_year + + dt = Timestamp('2100-01-01 00:00:00', tz=tz) + assert not dt.is_leap_year + + def test_woy_boundary(self): + # make sure weeks at year boundaries are correct + d = datetime(2013, 12, 31) + result = Timestamp(d).week + expected = 1 # ISO standard + assert result == expected + + d = datetime(2008, 12, 28) + result = Timestamp(d).week + expected = 52 # ISO standard + assert result == expected + + d = datetime(2009, 12, 31) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + d = datetime(2010, 1, 1) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + d = datetime(2010, 1, 3) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + result = np.array([Timestamp(datetime(*args)).week + for args in [(2000, 1, 1), (2000, 1, 2), ( + 2005, 1, 1), (2005, 1, 2)]]) + assert (result == [52, 52, 53, 53]).all() + + +class TestTimestampConstructors(object): + + def test_constructor(self): + base_str = '2014-07-01 09:00' + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1404205200000000000 + + # confirm base representation is correct + import calendar + assert (calendar.timegm(base_dt.timetuple()) * 1000000000 == + base_expected) + + tests = [(base_str, base_dt, base_expected), + ('2014-07-01 10:00', datetime(2014, 7, 1, 10), + base_expected + 3600 * 1000000000), + ('2014-07-01 09:00:00.000008000', + datetime(2014, 7, 1, 9, 0, 0, 8), + base_expected + 8000), + ('2014-07-01 09:00:00.000000005', + Timestamp('2014-07-01 09:00:00.000000005'), + base_expected + 5)] + + timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), + ('US/Eastern', -4), ('dateutil/US/Pacific', -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5)] + + for date_str, date, expected in tests: + for result in [Timestamp(date_str), Timestamp(date)]: + # only with timestring + assert result.value == expected + assert conversion.pydt_to_i8(result) == expected + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + assert result.value == expected + assert conversion.pydt_to_i8(result) == expected + + # with timezone + for tz, offset in timezones: + for result in [Timestamp(date_str, tz=tz), Timestamp(date, + tz=tz)]: + expected_tz = expected - offset * 3600 * 1000000000 + assert result.value == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz + + # should preserve tz + result = Timestamp(result) + assert result.value == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz + + # should convert to UTC + result = Timestamp(result, tz='UTC') + expected_utc = expected - offset * 3600 * 1000000000 + assert result.value == expected_utc + assert conversion.pydt_to_i8(result) == expected_utc + + def test_constructor_with_stringoffset(self): + # GH 7833 + base_str = '2014-07-01 11:00:00+02:00' + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1404205200000000000 + + # confirm base representation is correct + import calendar + assert (calendar.timegm(base_dt.timetuple()) * 1000000000 == + base_expected) + + tests = [(base_str, base_expected), + ('2014-07-01 12:00:00+02:00', + base_expected + 3600 * 1000000000), + ('2014-07-01 11:00:00.000008000+02:00', base_expected + 8000), + ('2014-07-01 11:00:00.000000005+02:00', base_expected + 5)] + + timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), + ('US/Eastern', -4), ('dateutil/US/Pacific', -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5)] + + for date_str, expected in tests: + for result in [Timestamp(date_str)]: + # only with timestring + assert result.value == expected + assert conversion.pydt_to_i8(result) == expected + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + assert result.value == expected + assert conversion.pydt_to_i8(result) == expected + + # with timezone + for tz, offset in timezones: + result = Timestamp(date_str, tz=tz) + expected_tz = expected + assert result.value == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz + + # should preserve tz + result = Timestamp(result) + assert result.value == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz + + # should convert to UTC + result = Timestamp(result, tz='UTC') + expected_utc = expected + assert result.value == expected_utc + assert conversion.pydt_to_i8(result) == expected_utc + + # This should be 2013-11-01 05:00 in UTC + # converted to Chicago tz + result = Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago') + assert result.value == Timestamp('2013-11-01 05:00').value + expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" # noqa + assert repr(result) == expected + assert result == eval(repr(result)) + + # This should be 2013-11-01 05:00 in UTC + # converted to Tokyo tz (+09:00) + result = Timestamp('2013-11-01 00:00:00-0500', tz='Asia/Tokyo') + assert result.value == Timestamp('2013-11-01 05:00').value + expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" + assert repr(result) == expected + assert result == eval(repr(result)) + + # GH11708 + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Katmandu + result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") + assert result.value == Timestamp("2015-11-18 10:00").value + expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" + assert repr(result) == expected + assert result == eval(repr(result)) + + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Kolkata + result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") + assert result.value == Timestamp("2015-11-18 10:00").value + expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" + assert repr(result) == expected + assert result == eval(repr(result)) + + def test_constructor_invalid(self): + with tm.assert_raises_regex(TypeError, 'Cannot convert input'): + Timestamp(slice(2)) + with tm.assert_raises_regex(ValueError, 'Cannot convert Period'): + Timestamp(Period('1000-01-01')) + + def test_constructor_invalid_tz(self): + # GH#17690 + with tm.assert_raises_regex(TypeError, 'must be a datetime.tzinfo'): + Timestamp('2017-10-22', tzinfo='US/Eastern') + + with tm.assert_raises_regex(ValueError, 'at most one of'): + Timestamp('2017-10-22', tzinfo=utc, tz='UTC') + + with tm.assert_raises_regex(ValueError, "Invalid frequency:"): + # GH#5168 + # case where user tries to pass tz as an arg, not kwarg, gets + # interpreted as a `freq` + Timestamp('2012-01-01', 'US/Pacific') + + def test_constructor_tz_or_tzinfo(self): + # GH#17943, GH#17690, GH#5168 + stamps = [Timestamp(year=2017, month=10, day=22, tz='UTC'), + Timestamp(year=2017, month=10, day=22, tzinfo=utc), + Timestamp(year=2017, month=10, day=22, tz=utc), + Timestamp(datetime(2017, 10, 22), tzinfo=utc), + Timestamp(datetime(2017, 10, 22), tz='UTC'), + Timestamp(datetime(2017, 10, 22), tz=utc)] + assert all(ts == stamps[0] for ts in stamps) + + def test_constructor_positional(self): + # see gh-10758 + with pytest.raises(TypeError): + Timestamp(2000, 1) + with pytest.raises(ValueError): + Timestamp(2000, 0, 1) + with pytest.raises(ValueError): + Timestamp(2000, 13, 1) + with pytest.raises(ValueError): + Timestamp(2000, 1, 0) + with pytest.raises(ValueError): + Timestamp(2000, 1, 32) + + # see gh-11630 + assert (repr(Timestamp(2015, 11, 12)) == + repr(Timestamp('20151112'))) + assert (repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)) == + repr(Timestamp('2015-11-12 01:02:03.999999'))) + + def test_constructor_keyword(self): + # GH 10758 + with pytest.raises(TypeError): + Timestamp(year=2000, month=1) + with pytest.raises(ValueError): + Timestamp(year=2000, month=0, day=1) + with pytest.raises(ValueError): + Timestamp(year=2000, month=13, day=1) + with pytest.raises(ValueError): + Timestamp(year=2000, month=1, day=0) + with pytest.raises(ValueError): + Timestamp(year=2000, month=1, day=32) + + assert (repr(Timestamp(year=2015, month=11, day=12)) == + repr(Timestamp('20151112'))) + + assert (repr(Timestamp(year=2015, month=11, day=12, hour=1, minute=2, + second=3, microsecond=999999)) == + repr(Timestamp('2015-11-12 01:02:03.999999'))) + + def test_constructor_fromordinal(self): + base = datetime(2000, 1, 1) + + ts = Timestamp.fromordinal(base.toordinal(), freq='D') + assert base == ts + assert ts.freq == 'D' + assert base.toordinal() == ts.toordinal() + + ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') + assert Timestamp('2000-01-01', tz='US/Eastern') == ts + assert base.toordinal() == ts.toordinal() + + # GH#3042 + dt = datetime(2011, 4, 16, 0, 0) + ts = Timestamp.fromordinal(dt.toordinal()) + assert ts.to_pydatetime() == dt + + # with a tzinfo + stamp = Timestamp('2011-4-16', tz='US/Eastern') + dt_tz = stamp.to_pydatetime() + ts = Timestamp.fromordinal(dt_tz.toordinal(), tz='US/Eastern') + assert ts.to_pydatetime() == dt_tz + + @pytest.mark.parametrize('result', [ + Timestamp(datetime(2000, 1, 2, 3, 4, 5, 6), nanosecond=1), + Timestamp(year=2000, month=1, day=2, hour=3, minute=4, second=5, + microsecond=6, nanosecond=1), + Timestamp(year=2000, month=1, day=2, hour=3, minute=4, second=5, + microsecond=6, nanosecond=1, tz='UTC'), + Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, None), + Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, pytz.UTC)]) + def test_constructor_nanosecond(self, result): + # GH 18898 + expected = Timestamp(datetime(2000, 1, 2, 3, 4, 5, 6), tz=result.tz) + expected = expected + Timedelta(nanoseconds=1) + assert result == expected + + @pytest.mark.parametrize('arg', ['year', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond']) + def test_invalid_date_kwarg_with_string_input(self, arg): + kwarg = {arg: 1} + with pytest.raises(ValueError): + Timestamp('2010-10-10 12:59:59.999999999', **kwarg) + + def test_out_of_bounds_value(self): + one_us = np.timedelta64(1).astype('timedelta64[us]') + + # By definition we can't go out of bounds in [ns], so we + # convert the datetime64s to [us] so we can go out of bounds + min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') + max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') + + # No error for the min/max datetimes + Timestamp(min_ts_us) + Timestamp(max_ts_us) + + # One us less than the minimum is an error + with pytest.raises(ValueError): + Timestamp(min_ts_us - one_us) + + # One us more than the maximum is an error + with pytest.raises(ValueError): + Timestamp(max_ts_us + one_us) + + def test_out_of_bounds_string(self): + with pytest.raises(ValueError): + Timestamp('1676-01-01') + with pytest.raises(ValueError): + Timestamp('2263-01-01') + + def test_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + with pytest.raises(OutOfBoundsDatetime): + Timestamp('2262-04-11 23:47:16.854775808') + + def test_bounds_with_different_units(self): + out_of_bounds_dates = ('1677-09-21', '2262-04-12') + + time_units = ('D', 'h', 'm', 's', 'ms', 'us') + + for date_string in out_of_bounds_dates: + for unit in time_units: + dt64 = np.datetime64(date_string, dtype='M8[%s]' % unit) + with pytest.raises(ValueError): + Timestamp(dt64) + + in_bounds_dates = ('1677-09-23', '2262-04-11') + + for date_string in in_bounds_dates: + for unit in time_units: + dt64 = np.datetime64(date_string, dtype='M8[%s]' % unit) + Timestamp(dt64) + + def test_min_valid(self): + # Ensure that Timestamp.min is a valid Timestamp + Timestamp(Timestamp.min) + + def test_max_valid(self): + # Ensure that Timestamp.max is a valid Timestamp + Timestamp(Timestamp.max) + + def test_now(self): + # GH#9000 + ts_from_string = Timestamp('now') + ts_from_method = Timestamp.now() + ts_datetime = datetime.now() + + ts_from_string_tz = Timestamp('now', tz='US/Eastern') + ts_from_method_tz = Timestamp.now(tz='US/Eastern') + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + assert abs(ts_from_method - ts_from_string) < delta + assert abs(ts_datetime - ts_from_method) < delta + assert abs(ts_from_method_tz - ts_from_string_tz) < delta + assert (abs(ts_from_string_tz.tz_localize(None) - + ts_from_method_tz.tz_localize(None)) < delta) + + def test_today(self): + ts_from_string = Timestamp('today') + ts_from_method = Timestamp.today() + ts_datetime = datetime.today() + + ts_from_string_tz = Timestamp('today', tz='US/Eastern') + ts_from_method_tz = Timestamp.today(tz='US/Eastern') + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + assert abs(ts_from_method - ts_from_string) < delta + assert abs(ts_datetime - ts_from_method) < delta + assert abs(ts_from_method_tz - ts_from_string_tz) < delta + assert (abs(ts_from_string_tz.tz_localize(None) - + ts_from_method_tz.tz_localize(None)) < delta) + + +class TestTimestamp(object): + + def test_tz(self): + tstr = '2014-02-01 09:00' + ts = Timestamp(tstr) + local = ts.tz_localize('Asia/Tokyo') + assert local.hour == 9 + assert local == Timestamp(tstr, tz='Asia/Tokyo') + conv = local.tz_convert('US/Eastern') + assert conv == Timestamp('2014-01-31 19:00', tz='US/Eastern') + assert conv.hour == 19 + + # preserves nanosecond + ts = Timestamp(tstr) + offsets.Nano(5) + local = ts.tz_localize('Asia/Tokyo') + assert local.hour == 9 + assert local.nanosecond == 5 + conv = local.tz_convert('US/Eastern') + assert conv.nanosecond == 5 + assert conv.hour == 19 + + def test_utc_z_designator(self): + assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) == 'UTC' + + def test_asm8(self): + np.random.seed(7960929) + ns = [Timestamp.min.value, Timestamp.max.value, 1000] + + for n in ns: + assert (Timestamp(n).asm8.view('i8') == + np.datetime64(n, 'ns').view('i8') == n) + + assert (Timestamp('nat').asm8.view('i8') == + np.datetime64('nat', 'ns').view('i8')) + + def test_class_ops_pytz(self): + def compare(x, y): + assert (int(Timestamp(x).value / 1e9) == + int(Timestamp(y).value / 1e9)) + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + compare(Timestamp.utcfromtimestamp(current_time), + datetime.utcfromtimestamp(current_time)) + compare(Timestamp.fromtimestamp(current_time), + datetime.fromtimestamp(current_time)) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare(Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component)) + + def test_class_ops_dateutil(self): + def compare(x, y): + assert (int(np.round(Timestamp(x).value / 1e9)) == + int(np.round(Timestamp(y).value / 1e9))) + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now('UTC'), datetime.now(tzutc())) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + compare(Timestamp.utcfromtimestamp(current_time), + datetime.utcfromtimestamp(current_time)) + compare(Timestamp.fromtimestamp(current_time), + datetime.fromtimestamp(current_time)) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare(Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component)) + + def test_basics_nanos(self): + val = np.int64(946684800000000000).view('M8[ns]') + stamp = Timestamp(val.view('i8') + 500) + assert stamp.year == 2000 + assert stamp.month == 1 + assert stamp.microsecond == 0 + assert stamp.nanosecond == 500 + + # GH 14415 + val = np.iinfo(np.int64).min + 80000000000000 + stamp = Timestamp(val) + assert stamp.year == 1677 + assert stamp.month == 9 + assert stamp.day == 21 + assert stamp.microsecond == 145224 + assert stamp.nanosecond == 192 + + def test_unit(self): + + def check(val, unit=None, h=1, s=1, us=0): + stamp = Timestamp(val, unit=unit) + assert stamp.year == 2000 + assert stamp.month == 1 + assert stamp.day == 1 + assert stamp.hour == h + if unit != 'D': + assert stamp.minute == 1 + assert stamp.second == s + assert stamp.microsecond == us + else: + assert stamp.minute == 0 + assert stamp.second == 0 + assert stamp.microsecond == 0 + assert stamp.nanosecond == 0 + + ts = Timestamp('20000101 01:01:01') + val = ts.value + days = (ts - Timestamp('1970-01-01')).days + + check(val) + check(val / long(1000), unit='us') + check(val / long(1000000), unit='ms') + check(val / long(1000000000), unit='s') + check(days, unit='D', h=0) + + # using truediv, so these are like floats + if PY3: + check((val + 500000) / long(1000000000), unit='s', us=500) + check((val + 500000000) / long(1000000000), unit='s', us=500000) + check((val + 500000) / long(1000000), unit='ms', us=500) + + # get chopped in py2 + else: + check((val + 500000) / long(1000000000), unit='s') + check((val + 500000000) / long(1000000000), unit='s') + check((val + 500000) / long(1000000), unit='ms') + + # ok + check((val + 500000) / long(1000), unit='us', us=500) + check((val + 500000000) / long(1000000), unit='ms', us=500000) + + # floats + check(val / 1000.0 + 5, unit='us', us=5) + check(val / 1000.0 + 5000, unit='us', us=5000) + check(val / 1000000.0 + 0.5, unit='ms', us=500) + check(val / 1000000.0 + 0.005, unit='ms', us=5) + check(val / 1000000000.0 + 0.5, unit='s', us=500000) + check(days + 0.5, unit='D', h=12) + + def test_roundtrip(self): + + # test value to string and back conversions + # further test accessors + base = Timestamp('20140101 00:00:00') + + result = Timestamp(base.value + Timedelta('5ms').value) + assert result == Timestamp(str(base) + ".005000") + assert result.microsecond == 5000 + + result = Timestamp(base.value + Timedelta('5us').value) + assert result == Timestamp(str(base) + ".000005") + assert result.microsecond == 5 + + result = Timestamp(base.value + Timedelta('5ns').value) + assert result == Timestamp(str(base) + ".000000005") + assert result.nanosecond == 5 + assert result.microsecond == 0 + + result = Timestamp(base.value + Timedelta('6ms 5us').value) + assert result == Timestamp(str(base) + ".006005") + assert result.microsecond == 5 + 6 * 1000 + + result = Timestamp(base.value + Timedelta('200ms 5us').value) + assert result == Timestamp(str(base) + ".200005") + assert result.microsecond == 5 + 200 * 1000 + + def test_hash_equivalent(self): + d = {datetime(2011, 1, 1): 5} + stamp = Timestamp(datetime(2011, 1, 1)) + assert d[stamp] == 5 + + +class TestTimestampNsOperations(object): + + def setup_method(self, method): + self.timestamp = Timestamp(datetime.utcnow()) + + def assert_ns_timedelta(self, modified_timestamp, expected_value): + value = self.timestamp.value + modified_value = modified_timestamp.value + + assert modified_value - value == expected_value + + def test_timedelta_ns_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'ns'), + -123) + + def test_timedelta_ns_based_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64( + 1234567898, 'ns'), 1234567898) + + def test_timedelta_us_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'us'), + -123000) + + def test_timedelta_ms_arithmetic(self): + time = self.timestamp + np.timedelta64(-123, 'ms') + self.assert_ns_timedelta(time, -123000000) + + def test_nanosecond_string_parsing(self): + ts = Timestamp('2013-05-01 07:15:45.123456789') + # GH 7878 + expected_repr = '2013-05-01 07:15:45.123456789' + expected_value = 1367392545123456789 + assert ts.value == expected_value + assert expected_repr in repr(ts) + + ts = Timestamp('2013-05-01 07:15:45.123456789+09:00', tz='Asia/Tokyo') + assert ts.value == expected_value - 9 * 3600 * 1000000000 + assert expected_repr in repr(ts) + + ts = Timestamp('2013-05-01 07:15:45.123456789', tz='UTC') + assert ts.value == expected_value + assert expected_repr in repr(ts) + + ts = Timestamp('2013-05-01 07:15:45.123456789', tz='US/Eastern') + assert ts.value == expected_value + 4 * 3600 * 1000000000 + assert expected_repr in repr(ts) + + # GH 10041 + ts = Timestamp('20130501T071545.123456789') + assert ts.value == expected_value + assert expected_repr in repr(ts) + + def test_nanosecond_timestamp(self): + # GH 7610 + expected = 1293840000000000005 + t = Timestamp('2011-01-01') + offsets.Nano(5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + t = Timestamp(t) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000005Z')) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + expected = 1293840000000000010 + t = t + offsets.Nano(5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + t = Timestamp(t) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000010Z')) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + +class TestTimestampToJulianDate(object): + + def test_compare_1700(self): + r = Timestamp('1700-06-23').to_julian_date() + assert r == 2342145.5 + + def test_compare_2000(self): + r = Timestamp('2000-04-12').to_julian_date() + assert r == 2451646.5 + + def test_compare_2100(self): + r = Timestamp('2100-08-12').to_julian_date() + assert r == 2488292.5 + + def test_compare_hour01(self): + r = Timestamp('2000-08-12T01:00:00').to_julian_date() + assert r == 2451768.5416666666666666 + + def test_compare_hour13(self): + r = Timestamp('2000-08-12T13:00:00').to_julian_date() + assert r == 2451769.0416666666666666 + + +class TestTimestampConversion(object): + def test_conversion(self): + # GH#9255 + ts = Timestamp('2000-01-01') + + result = ts.to_pydatetime() + expected = datetime(2000, 1, 1) + assert result == expected + assert type(result) == type(expected) + + result = ts.to_datetime64() + expected = np.datetime64(ts.value, 'ns') + assert result == expected + assert type(result) == type(expected) + assert result.dtype == expected.dtype + + def test_to_pydatetime_nonzero_nano(self): + ts = Timestamp('2011-01-01 9:00:00.123456789') + + # Warn the user of data loss (nanoseconds). + with tm.assert_produces_warning(UserWarning, + check_stacklevel=False): + expected = datetime(2011, 1, 1, 9, 0, 0, 123456) + result = ts.to_pydatetime() + assert result == expected + + def test_timestamp_to_datetime(self): + stamp = Timestamp('20090415', tz='US/Eastern', freq='D') + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_timestamp_to_datetime_dateutil(self): + stamp = Timestamp('20090415', tz='dateutil/US/Eastern', freq='D') + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_timestamp_to_datetime_explicit_pytz(self): + stamp = Timestamp('20090415', tz=pytz.timezone('US/Eastern'), freq='D') + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + @td.skip_if_windows_python_3 + def test_timestamp_to_datetime_explicit_dateutil(self): + stamp = Timestamp('20090415', tz=gettz('US/Eastern'), freq='D') + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_to_datetime_bijective(self): + # Ensure that converting to datetime and back only loses precision + # by going from nanoseconds to microseconds. + exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + assert (Timestamp(Timestamp.max.to_pydatetime()).value / 1000 == + Timestamp.max.value / 1000) + + exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + assert (Timestamp(Timestamp.min.to_pydatetime()).value / 1000 == + Timestamp.min.value / 1000) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py new file mode 100644 index 0000000000000..f43651dc6f0db --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -0,0 +1,293 @@ +# -*- coding: utf-8 -*- +""" +Tests for Timestamp timezone-related methods +""" +from datetime import date, timedelta + +from distutils.version import LooseVersion +import pytest +import pytz +from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError +import dateutil +from dateutil.tz import gettz, tzoffset + +import pandas.util.testing as tm +import pandas.util._test_decorators as td + +from pandas import Timestamp, NaT +from pandas.errors import OutOfBoundsDatetime + + +class TestTimestampTZOperations(object): + # -------------------------------------------------------------- + # Timestamp.tz_localize + + def test_tz_localize_pushes_out_of_bounds(self): + # GH#12677 + # tz_localize that pushes away from the boundary is OK + pac = Timestamp.min.tz_localize('US/Pacific') + assert pac.value > Timestamp.min.value + pac.tz_convert('Asia/Tokyo') # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime): + Timestamp.min.tz_localize('Asia/Tokyo') + + # tz_localize that pushes away from the boundary is OK + tokyo = Timestamp.max.tz_localize('Asia/Tokyo') + assert tokyo.value < Timestamp.max.value + tokyo.tz_convert('US/Pacific') # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime): + Timestamp.max.tz_localize('US/Pacific') + + def test_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + # GH#14402 + ts = Timestamp('2015-11-01 01:00:03') + expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') + expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') + + with pytest.raises(pytz.AmbiguousTimeError): + ts.tz_localize('US/Central') + + result = ts.tz_localize('US/Central', ambiguous=True) + assert result == expected0 + + result = ts.tz_localize('US/Central', ambiguous=False) + assert result == expected1 + + def test_tz_localize_ambiguous(self): + ts = Timestamp('2014-11-02 01:00') + ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) + ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) + + assert (ts_no_dst.value - ts_dst.value) / 1e9 == 3600 + with pytest.raises(ValueError): + ts.tz_localize('US/Eastern', ambiguous='infer') + + # GH#8025 + with tm.assert_raises_regex(TypeError, + 'Cannot localize tz-aware Timestamp, ' + 'use tz_convert for conversions'): + Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') + + with tm.assert_raises_regex(TypeError, + 'Cannot convert tz-naive Timestamp, ' + 'use tz_localize to localize'): + Timestamp('2011-01-01').tz_convert('Asia/Tokyo') + + @pytest.mark.parametrize('stamp, tz', [ + ('2015-03-08 02:00', 'US/Eastern'), + ('2015-03-08 02:30', 'US/Pacific'), + ('2015-03-29 02:00', 'Europe/Paris'), + ('2015-03-29 02:30', 'Europe/Belgrade')]) + def test_tz_localize_nonexistent(self, stamp, tz): + # GH#13057 + ts = Timestamp(stamp) + with pytest.raises(NonExistentTimeError): + ts.tz_localize(tz) + with pytest.raises(NonExistentTimeError): + ts.tz_localize(tz, errors='raise') + assert ts.tz_localize(tz, errors='coerce') is NaT + + def test_tz_localize_errors_ambiguous(self): + # GH#13057 + ts = Timestamp('2015-11-1 01:00') + with pytest.raises(AmbiguousTimeError): + ts.tz_localize('US/Pacific', errors='coerce') + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', + 'US/Eastern', 'dateutil/US/Pacific']) + @pytest.mark.parametrize('stamp', ['2014-02-01 09:00', '2014-07-08 09:00', + '2014-11-01 17:00', '2014-11-05 00:00']) + def test_tz_localize_roundtrip(self, stamp, tz): + ts = Timestamp(stamp) + localized = ts.tz_localize(tz) + assert localized == Timestamp(stamp, tz=tz) + + with pytest.raises(TypeError): + localized.tz_localize(tz) + + reset = localized.tz_localize(None) + assert reset == ts + assert reset.tzinfo is None + + def test_tz_localize_ambiguous_compat(self): + # validate that pytz and dateutil are compat for dst + # when the transition happens + naive = Timestamp('2013-10-27 01:00:00') + + pytz_zone = 'Europe/London' + dateutil_zone = 'dateutil/Europe/London' + result_pytz = naive.tz_localize(pytz_zone, ambiguous=0) + result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=0) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382835600000000000 + + if LooseVersion(dateutil.__version__) < LooseVersion('2.6.0'): + # dateutil 2.6 buggy w.r.t. ambiguous=0 + # see gh-14621 + # see https://github.com/dateutil/dateutil/issues/321 + assert (result_pytz.to_pydatetime().tzname() == + result_dateutil.to_pydatetime().tzname()) + assert str(result_pytz) == str(result_dateutil) + elif LooseVersion(dateutil.__version__) > LooseVersion('2.6.0'): + # fixed ambiguous behavior + assert result_pytz.to_pydatetime().tzname() == 'GMT' + assert result_dateutil.to_pydatetime().tzname() == 'BST' + assert str(result_pytz) != str(result_dateutil) + + # 1 hour difference + result_pytz = naive.tz_localize(pytz_zone, ambiguous=1) + result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=1) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382832000000000000 + + # dateutil < 2.6 is buggy w.r.t. ambiguous timezones + if LooseVersion(dateutil.__version__) > LooseVersion('2.5.3'): + # see gh-14621 + assert str(result_pytz) == str(result_dateutil) + assert (result_pytz.to_pydatetime().tzname() == + result_dateutil.to_pydatetime().tzname()) + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern'), + 'US/Eastern', 'dateutil/US/Eastern']) + def test_timestamp_tz_localize(self, tz): + stamp = Timestamp('3/11/2012 04:00') + + result = stamp.tz_localize(tz) + expected = Timestamp('3/11/2012 04:00', tz=tz) + assert result.hour == expected.hour + assert result == expected + + # ------------------------------------------------------------------ + # Timestamp.tz_convert + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', + 'US/Eastern', 'dateutil/US/Pacific']) + @pytest.mark.parametrize('stamp', ['2014-02-01 09:00', '2014-07-08 09:00', + '2014-11-01 17:00', '2014-11-05 00:00']) + def test_tz_convert_roundtrip(self, stamp, tz): + ts = Timestamp(stamp, tz='UTC') + converted = ts.tz_convert(tz) + + reset = converted.tz_convert(None) + assert reset == Timestamp(stamp) + assert reset.tzinfo is None + assert reset == converted.tz_convert('UTC').tz_localize(None) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_astimezone(self, tzstr): + # astimezone is an alias for tz_convert, so keep it with + # the tz_convert tests + utcdate = Timestamp('3/11/2012 22:00', tz='UTC') + expected = utcdate.tz_convert(tzstr) + result = utcdate.astimezone(tzstr) + assert expected == result + assert isinstance(result, Timestamp) + + @td.skip_if_windows + def test_tz_convert_utc_with_system_utc(self): + from pandas._libs.tslibs.timezones import maybe_get_tz + + # from system utc to real utc + ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) + # check that the time hasn't changed. + assert ts == ts.tz_convert(dateutil.tz.tzutc()) + + # from system utc to real utc + ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) + # check that the time hasn't changed. + assert ts == ts.tz_convert(dateutil.tz.tzutc()) + + # ------------------------------------------------------------------ + # Timestamp.__init__ with tz str or tzinfo + + def test_timestamp_constructor_tz_utc(self): + utc_stamp = Timestamp('3/11/2012 05:00', tz='utc') + assert utc_stamp.tzinfo is pytz.utc + assert utc_stamp.hour == 5 + + utc_stamp = Timestamp('3/11/2012 05:00').tz_localize('utc') + assert utc_stamp.hour == 5 + + def test_timestamp_to_datetime_tzoffset(self): + tzinfo = tzoffset(None, 7200) + expected = Timestamp('3/11/2012 04:00', tz=tzinfo) + result = Timestamp(expected.to_pydatetime()) + assert expected == result + + def test_timestamp_constructor_near_dst_boundary(self): + # GH#11481 & GH#15777 + # Naive string timestamps were being localized incorrectly + # with tz_convert_single instead of tz_localize_to_utc + + for tz in ['Europe/Brussels', 'Europe/Prague']: + result = Timestamp('2015-10-25 01:00', tz=tz) + expected = Timestamp('2015-10-25 01:00').tz_localize(tz) + assert result == expected + + with pytest.raises(pytz.AmbiguousTimeError): + Timestamp('2015-10-25 02:00', tz=tz) + + result = Timestamp('2017-03-26 01:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 01:00').tz_localize('Europe/Paris') + assert result == expected + + with pytest.raises(pytz.NonExistentTimeError): + Timestamp('2017-03-26 02:00', tz='Europe/Paris') + + # GH#11708 + naive = Timestamp('2015-11-18 10:00:00') + result = naive.tz_localize('UTC').tz_convert('Asia/Kolkata') + expected = Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata') + assert result == expected + + # GH#15823 + result = Timestamp('2017-03-26 00:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 00:00:00+0100', tz='Europe/Paris') + assert result == expected + + result = Timestamp('2017-03-26 01:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 01:00:00+0100', tz='Europe/Paris') + assert result == expected + + with pytest.raises(pytz.NonExistentTimeError): + Timestamp('2017-03-26 02:00', tz='Europe/Paris') + + result = Timestamp('2017-03-26 02:00:00+0100', tz='Europe/Paris') + naive = Timestamp(result.value) + expected = naive.tz_localize('UTC').tz_convert('Europe/Paris') + assert result == expected + + result = Timestamp('2017-03-26 03:00', tz='Europe/Paris') + expected = Timestamp('2017-03-26 03:00:00+0200', tz='Europe/Paris') + assert result == expected + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern'), + 'US/Eastern', 'dateutil/US/Eastern']) + def test_timestamp_constructed_by_date_and_tz(self, tz): + # GH#2993, Timestamp cannot be constructed by datetime.date + # and tz correctly + + result = Timestamp(date(2012, 3, 11), tz=tz) + + expected = Timestamp('3/11/2012', tz=tz) + assert result.hour == expected.hour + assert result == expected + + @pytest.mark.parametrize('tz', [pytz.timezone('US/Eastern'), + gettz('US/Eastern'), + 'US/Eastern', 'dateutil/US/Eastern']) + def test_timestamp_add_timedelta_push_over_dst_boundary(self, tz): + # GH#1389 + + # 4 hours before DST transition + stamp = Timestamp('3/10/2012 22:00', tz=tz) + + result = stamp + timedelta(hours=6) + + # spring forward, + "7" hours + expected = Timestamp('3/11/2012 05:00', tz=tz) + + assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py new file mode 100644 index 0000000000000..994ff86e6fdf9 --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -0,0 +1,264 @@ +# -*- coding: utf-8 -*- +from datetime import datetime + +import pytest +import pytz +from pytz import utc +from dateutil.tz import gettz + +import pandas.util.testing as tm +import pandas.util._test_decorators as td + +from pandas.compat import PY3 +from pandas._libs import tslib +from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR +from pandas import Timestamp, NaT + + +class TestTimestampUnaryOps(object): + + # -------------------------------------------------------------- + # Timestamp.round + + def test_round_day_naive(self): + dt = Timestamp('20130101 09:10:11') + result = dt.round('D') + expected = Timestamp('20130101') + assert result == expected + + dt = Timestamp('20130101 19:10:11') + result = dt.round('D') + expected = Timestamp('20130102') + assert result == expected + + dt = Timestamp('20130201 12:00:00') + result = dt.round('D') + expected = Timestamp('20130202') + assert result == expected + + dt = Timestamp('20130104 12:00:00') + result = dt.round('D') + expected = Timestamp('20130105') + assert result == expected + + def test_round_tzaware(self): + dt = Timestamp('20130101 09:10:11', tz='US/Eastern') + result = dt.round('D') + expected = Timestamp('20130101', tz='US/Eastern') + assert result == expected + + dt = Timestamp('20130101 09:10:11', tz='US/Eastern') + result = dt.round('s') + assert result == dt + + def test_round_30min(self): + # round + dt = Timestamp('20130104 12:32:00') + result = dt.round('30Min') + expected = Timestamp('20130104 12:30:00') + assert result == expected + + def test_round_subsecond(self): + # GH#14440 & GH#15578 + result = Timestamp('2016-10-17 12:00:00.0015').round('ms') + expected = Timestamp('2016-10-17 12:00:00.002000') + assert result == expected + + result = Timestamp('2016-10-17 12:00:00.00149').round('ms') + expected = Timestamp('2016-10-17 12:00:00.001000') + assert result == expected + + ts = Timestamp('2016-10-17 12:00:00.0015') + for freq in ['us', 'ns']: + assert ts == ts.round(freq) + + result = Timestamp('2016-10-17 12:00:00.001501031').round('10ns') + expected = Timestamp('2016-10-17 12:00:00.001501030') + assert result == expected + + def test_round_nonstandard_freq(self): + with tm.assert_produces_warning(): + Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') + + def test_round_invalid_arg(self): + stamp = Timestamp('2000-01-05 05:09:15.13') + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): + stamp.round('foo') + + @pytest.mark.parametrize('freq, expected', [ + ('D', Timestamp('2000-01-05 00:00:00')), + ('H', Timestamp('2000-01-05 05:00:00')), + ('S', Timestamp('2000-01-05 05:09:15'))]) + def test_round_frequencies(self, freq, expected): + stamp = Timestamp('2000-01-05 05:09:15.13') + + result = stamp.round(freq=freq) + assert result == expected + + @pytest.mark.parametrize('test_input, rounder, freq, expected', [ + ('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'), + ('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'), + ('2117-01-01 00:00:45.000000012', 'floor', '10ns', + '2117-01-01 00:00:45.000000010'), + ('1823-01-01 00:00:01.000000012', 'ceil', '10ns', + '1823-01-01 00:00:01.000000020'), + ('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'), + ('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01'), + ('NaT', 'floor', '1s', 'NaT'), + ('NaT', 'ceil', '1s', 'NaT') + ]) + def test_ceil_floor_edge(self, test_input, rounder, freq, expected): + dt = Timestamp(test_input) + func = getattr(dt, rounder) + result = func(freq) + + if dt is NaT: + assert result is NaT + else: + expected = Timestamp(expected) + assert result == expected + + def test_ceil(self): + dt = Timestamp('20130101 09:10:11') + result = dt.ceil('D') + expected = Timestamp('20130102') + assert result == expected + + def test_floor(self): + dt = Timestamp('20130101 09:10:11') + result = dt.floor('D') + expected = Timestamp('20130101') + assert result == expected + + # -------------------------------------------------------------- + # Timestamp.replace + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] + + def test_replace_naive(self): + # GH#14621, GH#7825 + ts = Timestamp('2016-01-01 09:00:00') + result = ts.replace(hour=0) + expected = Timestamp('2016-01-01 00:00:00') + assert result == expected + + @pytest.mark.parametrize('tz', timezones) + def test_replace_aware(self, tz): + # GH#14621, GH#7825 + # replacing datetime components with and w/o presence of a timezone + ts = Timestamp('2016-01-01 09:00:00', tz=tz) + result = ts.replace(hour=0) + expected = Timestamp('2016-01-01 00:00:00', tz=tz) + assert result == expected + + @pytest.mark.parametrize('tz', timezones) + def test_replace_preserves_nanos(self, tz): + # GH#14621, GH#7825 + ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) + result = ts.replace(hour=0) + expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz) + assert result == expected + + @pytest.mark.parametrize('tz', timezones) + def test_replace_multiple(self, tz): + # GH#14621, GH#7825 + # replacing datetime components with and w/o presence of a timezone + # test all + ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) + result = ts.replace(year=2015, month=2, day=2, hour=0, minute=5, + second=5, microsecond=5, nanosecond=5) + expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz) + assert result == expected + + @pytest.mark.parametrize('tz', timezones) + def test_replace_invalid_kwarg(self, tz): + # GH#14621, GH#7825 + ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) + with pytest.raises(TypeError): + ts.replace(foo=5) + + @pytest.mark.parametrize('tz', timezones) + def test_replace_integer_args(self, tz): + # GH#14621, GH#7825 + ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) + with pytest.raises(ValueError): + ts.replace(hour=0.1) + + def test_replace_tzinfo_equiv_tz_localize_none(self): + # GH#14621, GH#7825 + # assert conversion to naive is the same as replacing tzinfo with None + ts = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') + assert ts.tz_localize(None) == ts.replace(tzinfo=None) + + @td.skip_if_windows + def test_replace_tzinfo(self): + # GH#15683 + dt = datetime(2016, 3, 27, 1) + tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo + + result_dt = dt.replace(tzinfo=tzinfo) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo) + + if PY3: + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + assert result_dt.timestamp() == result_pd.timestamp() + + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None) + + if PY3: + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + assert result_dt.timestamp() == result_pd.timestamp() + + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + @pytest.mark.parametrize('tz, normalize', [ + (pytz.timezone('US/Eastern'), lambda x: x.tzinfo.normalize(x)), + (gettz('US/Eastern'), lambda x: x)]) + def test_replace_across_dst(self, tz, normalize): + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization + ts_naive = Timestamp('2017-12-03 16:03:30') + ts_aware = tslib._localize_pydatetime(ts_naive, tz) + + # Preliminary sanity-check + assert ts_aware == normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = normalize(ts2) + assert ts2 == ts2b + + # -------------------------------------------------------------- + + @td.skip_if_windows + def test_timestamp(self): + # GH#17329 + # tz-naive --> treat it as if it were UTC for purposes of timestamp() + ts = Timestamp.now() + uts = ts.replace(tzinfo=utc) + assert ts.timestamp() == uts.timestamp() + + tsc = Timestamp('2014-10-11 11:00:01.12345678', tz='US/Central') + utsc = tsc.tz_convert('UTC') + + # utsc is a different representation of the same time + assert tsc.timestamp() == utsc.timestamp() + + if PY3: + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + + # should agree with datetime.timestamp method + dt = ts.to_pydatetime() + assert dt.timestamp() == ts.timestamp() diff --git a/pandas/tests/series/common.py b/pandas/tests/series/common.py index 613961e1c670f..0c25dcb29c3b2 100644 --- a/pandas/tests/series/common.py +++ b/pandas/tests/series/common.py @@ -1,4 +1,4 @@ -from pandas.util.decorators import cache_readonly +from pandas.util._decorators import cache_readonly import pandas.util.testing as tm import pandas as pd diff --git a/pandas/tests/series/indexing/__init__.py b/pandas/tests/series/indexing/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/series/indexing/conftest.py b/pandas/tests/series/indexing/conftest.py new file mode 100644 index 0000000000000..0e06f6b8e4640 --- /dev/null +++ b/pandas/tests/series/indexing/conftest.py @@ -0,0 +1,8 @@ +import pytest + +from pandas.tests.series.common import TestData + + +@pytest.fixture(scope='module') +def test_data(): + return TestData() diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py new file mode 100644 index 0000000000000..c1b6d0a452232 --- /dev/null +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -0,0 +1,520 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 + +import pytest + +from datetime import datetime + +import pandas as pd +import numpy as np + +from numpy import nan + +from pandas import compat + +from pandas import (Series, date_range, isna, Categorical) +from pandas.compat import lrange, range + +from pandas.util.testing import (assert_series_equal) +import pandas.util.testing as tm + +JOIN_TYPES = ['inner', 'outer', 'left', 'right'] + + +@pytest.mark.parametrize( + 'first_slice,second_slice', [ + [[2, None], [None, -5]], + [[None, 0], [None, -5]], + [[None, -5], [None, 0]], + [[None, 0], [None, 0]] + ]) +@pytest.mark.parametrize('join_type', JOIN_TYPES) +@pytest.mark.parametrize('fill', [None, -1]) +def test_align(test_data, first_slice, second_slice, join_type, fill): + a = test_data.ts[slice(*first_slice)] + b = test_data.ts[slice(*second_slice)] + + aa, ab = a.align(b, join=join_type, fill_value=fill) + + join_index = a.index.join(b.index, how=join_type) + if fill is not None: + diff_a = aa.index.difference(join_index) + diff_b = ab.index.difference(join_index) + if len(diff_a) > 0: + assert (aa.reindex(diff_a) == fill).all() + if len(diff_b) > 0: + assert (ab.reindex(diff_b) == fill).all() + + ea = a.reindex(join_index) + eb = b.reindex(join_index) + + if fill is not None: + ea = ea.fillna(fill) + eb = eb.fillna(fill) + + assert_series_equal(aa, ea) + assert_series_equal(ab, eb) + assert aa.name == 'ts' + assert ea.name == 'ts' + assert ab.name == 'ts' + assert eb.name == 'ts' + + +@pytest.mark.parametrize( + 'first_slice,second_slice', [ + [[2, None], [None, -5]], + [[None, 0], [None, -5]], + [[None, -5], [None, 0]], + [[None, 0], [None, 0]] + ]) +@pytest.mark.parametrize('join_type', JOIN_TYPES) +@pytest.mark.parametrize('method', ['pad', 'bfill']) +@pytest.mark.parametrize('limit', [None, 1]) +def test_align_fill_method(test_data, + first_slice, second_slice, + join_type, method, limit): + a = test_data.ts[slice(*first_slice)] + b = test_data.ts[slice(*second_slice)] + + aa, ab = a.align(b, join=join_type, method=method, limit=limit) + + join_index = a.index.join(b.index, how=join_type) + ea = a.reindex(join_index) + eb = b.reindex(join_index) + + ea = ea.fillna(method=method, limit=limit) + eb = eb.fillna(method=method, limit=limit) + + assert_series_equal(aa, ea) + assert_series_equal(ab, eb) + + +def test_align_nocopy(test_data): + b = test_data.ts[:5].copy() + + # do copy + a = test_data.ts.copy() + ra, _ = a.align(b, join='left') + ra[:5] = 5 + assert not (a[:5] == 5).any() + + # do not copy + a = test_data.ts.copy() + ra, _ = a.align(b, join='left', copy=False) + ra[:5] = 5 + assert (a[:5] == 5).all() + + # do copy + a = test_data.ts.copy() + b = test_data.ts[:5].copy() + _, rb = a.align(b, join='right') + rb[:3] = 5 + assert not (b[:3] == 5).any() + + # do not copy + a = test_data.ts.copy() + b = test_data.ts[:5].copy() + _, rb = a.align(b, join='right', copy=False) + rb[:2] = 5 + assert (b[:2] == 5).all() + + +def test_align_same_index(test_data): + a, b = test_data.ts.align(test_data.ts, copy=False) + assert a.index is test_data.ts.index + assert b.index is test_data.ts.index + + a, b = test_data.ts.align(test_data.ts, copy=True) + assert a.index is not test_data.ts.index + assert b.index is not test_data.ts.index + + +def test_align_multiindex(): + # GH 10665 + + midx = pd.MultiIndex.from_product([range(2), range(3), range(2)], + names=('a', 'b', 'c')) + idx = pd.Index(range(2), name='b') + s1 = pd.Series(np.arange(12, dtype='int64'), index=midx) + s2 = pd.Series(np.arange(2, dtype='int64'), index=idx) + + # these must be the same results (but flipped) + res1l, res1r = s1.align(s2, join='left') + res2l, res2r = s2.align(s1, join='right') + + expl = s1 + tm.assert_series_equal(expl, res1l) + tm.assert_series_equal(expl, res2r) + expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + tm.assert_series_equal(expr, res1r) + tm.assert_series_equal(expr, res2l) + + res1l, res1r = s1.align(s2, join='right') + res2l, res2r = s2.align(s1, join='left') + + exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)], + names=('a', 'b', 'c')) + expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + tm.assert_series_equal(expl, res1l) + tm.assert_series_equal(expl, res2r) + expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx) + tm.assert_series_equal(expr, res1r) + tm.assert_series_equal(expr, res2l) + + +def test_reindex(test_data): + identity = test_data.series.reindex(test_data.series.index) + + # __array_interface__ is not defined for older numpies + # and on some pythons + try: + assert np.may_share_memory(test_data.series.index, identity.index) + except AttributeError: + pass + + assert identity.index.is_(test_data.series.index) + assert identity.index.identical(test_data.series.index) + + subIndex = test_data.series.index[10:20] + subSeries = test_data.series.reindex(subIndex) + + for idx, val in compat.iteritems(subSeries): + assert val == test_data.series[idx] + + subIndex2 = test_data.ts.index[10:20] + subTS = test_data.ts.reindex(subIndex2) + + for idx, val in compat.iteritems(subTS): + assert val == test_data.ts[idx] + stuffSeries = test_data.ts.reindex(subIndex) + + assert np.isnan(stuffSeries).all() + + # This is extremely important for the Cython code to not screw up + nonContigIndex = test_data.ts.index[::2] + subNonContig = test_data.ts.reindex(nonContigIndex) + for idx, val in compat.iteritems(subNonContig): + assert val == test_data.ts[idx] + + # return a copy the same index here + result = test_data.ts.reindex() + assert not (result is test_data.ts) + + +def test_reindex_nan(): + ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8]) + + i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2] + assert_series_equal(ts.reindex(i), ts.iloc[j]) + + ts.index = ts.index.astype('object') + + # reindex coerces index.dtype to float, loc/iloc doesn't + assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) + + +def test_reindex_series_add_nat(): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + series = Series(rng) + + result = series.reindex(lrange(15)) + assert np.issubdtype(result.dtype, np.dtype('M8[ns]')) + + mask = result.isna() + assert mask[-5:].all() + assert not mask[:-5].any() + + +def test_reindex_with_datetimes(): + rng = date_range('1/1/2000', periods=20) + ts = Series(np.random.randn(20), index=rng) + + result = ts.reindex(list(ts.index[5:10])) + expected = ts[5:10] + tm.assert_series_equal(result, expected) + + result = ts[list(ts.index[5:10])] + tm.assert_series_equal(result, expected) + + +def test_reindex_corner(test_data): + # (don't forget to fix this) I think it's fixed + test_data.empty.reindex(test_data.ts.index, method='pad') # it works + + # corner case: pad empty series + reindexed = test_data.empty.reindex(test_data.ts.index, method='pad') + + # pass non-Index + reindexed = test_data.ts.reindex(list(test_data.ts.index)) + assert_series_equal(test_data.ts, reindexed) + + # bad fill method + ts = test_data.ts[::2] + pytest.raises(Exception, ts.reindex, test_data.ts.index, method='foo') + + +def test_reindex_pad(): + s = Series(np.arange(10), dtype='int64') + s2 = s[::2] + + reindexed = s2.reindex(s.index, method='pad') + reindexed2 = s2.reindex(s.index, method='ffill') + assert_series_equal(reindexed, reindexed2) + + expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) + assert_series_equal(reindexed, expected) + + # GH4604 + s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e']) + new_index = ['a', 'g', 'c', 'f'] + expected = Series([1, 1, 3, 3], index=new_index) + + # this changes dtype because the ffill happens after + result = s.reindex(new_index).ffill() + assert_series_equal(result, expected.astype('float64')) + + result = s.reindex(new_index).ffill(downcast='infer') + assert_series_equal(result, expected) + + expected = Series([1, 5, 3, 5], index=new_index) + result = s.reindex(new_index, method='ffill') + assert_series_equal(result, expected) + + # inference of new dtype + s = Series([True, False, False, True], index=list('abcd')) + new_index = 'agc' + result = s.reindex(list(new_index)).ffill() + expected = Series([True, True, False], index=list(new_index)) + assert_series_equal(result, expected) + + # GH4618 shifted series downcasting + s = Series(False, index=lrange(0, 5)) + result = s.shift(1).fillna(method='bfill') + expected = Series(False, index=lrange(0, 5)) + assert_series_equal(result, expected) + + +def test_reindex_nearest(): + s = Series(np.arange(10, dtype='int64')) + target = [0.1, 0.9, 1.5, 2.0] + actual = s.reindex(target, method='nearest') + expected = Series(np.around(target).astype('int64'), target) + assert_series_equal(expected, actual) + + actual = s.reindex_like(actual, method='nearest') + assert_series_equal(expected, actual) + + actual = s.reindex_like(actual, method='nearest', tolerance=1) + assert_series_equal(expected, actual) + actual = s.reindex_like(actual, method='nearest', + tolerance=[1, 2, 3, 4]) + assert_series_equal(expected, actual) + + actual = s.reindex(target, method='nearest', tolerance=0.2) + expected = Series([0, 1, np.nan, 2], target) + assert_series_equal(expected, actual) + + actual = s.reindex(target, method='nearest', + tolerance=[0.3, 0.01, 0.4, 3]) + expected = Series([0, np.nan, np.nan, 2], target) + assert_series_equal(expected, actual) + + +def test_reindex_backfill(): + pass + + +def test_reindex_int(test_data): + ts = test_data.ts[::2] + int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index) + + # this should work fine + reindexed_int = int_ts.reindex(test_data.ts.index) + + # if NaNs introduced + assert reindexed_int.dtype == np.float_ + + # NO NaNs introduced + reindexed_int = int_ts.reindex(int_ts.index[::2]) + assert reindexed_int.dtype == np.int_ + + +def test_reindex_bool(test_data): + # A series other than float, int, string, or object + ts = test_data.ts[::2] + bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) + + # this should work fine + reindexed_bool = bool_ts.reindex(test_data.ts.index) + + # if NaNs introduced + assert reindexed_bool.dtype == np.object_ + + # NO NaNs introduced + reindexed_bool = bool_ts.reindex(bool_ts.index[::2]) + assert reindexed_bool.dtype == np.bool_ + + +def test_reindex_bool_pad(test_data): + # fail + ts = test_data.ts[5:] + bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) + filled_bool = bool_ts.reindex(test_data.ts.index, method='pad') + assert isna(filled_bool[:5]).all() + + +def test_reindex_categorical(): + index = date_range('20000101', periods=3) + + # reindexing to an invalid Categorical + s = Series(['a', 'b', 'c'], dtype='category') + result = s.reindex(index) + expected = Series(Categorical(values=[np.nan, np.nan, np.nan], + categories=['a', 'b', 'c'])) + expected.index = index + tm.assert_series_equal(result, expected) + + # partial reindexing + expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b', + 'c'])) + expected.index = [1, 2] + result = s.reindex([1, 2]) + tm.assert_series_equal(result, expected) + + expected = Series(Categorical( + values=['c', np.nan], categories=['a', 'b', 'c'])) + expected.index = [2, 3] + result = s.reindex([2, 3]) + tm.assert_series_equal(result, expected) + + +def test_reindex_like(test_data): + other = test_data.ts[::2] + assert_series_equal(test_data.ts.reindex(other.index), + test_data.ts.reindex_like(other)) + + # GH 7179 + day1 = datetime(2013, 3, 5) + day2 = datetime(2013, 5, 5) + day3 = datetime(2014, 3, 5) + + series1 = Series([5, None, None], [day1, day2, day3]) + series2 = Series([None, None], [day1, day3]) + + result = series1.reindex_like(series2, method='pad') + expected = Series([5, np.nan], index=[day1, day3]) + assert_series_equal(result, expected) + + +def test_reindex_fill_value(): + # ----------------------------------------------------------- + # floats + floats = Series([1., 2., 3.]) + result = floats.reindex([1, 2, 3]) + expected = Series([2., 3., np.nan], index=[1, 2, 3]) + assert_series_equal(result, expected) + + result = floats.reindex([1, 2, 3], fill_value=0) + expected = Series([2., 3., 0], index=[1, 2, 3]) + assert_series_equal(result, expected) + + # ----------------------------------------------------------- + # ints + ints = Series([1, 2, 3]) + + result = ints.reindex([1, 2, 3]) + expected = Series([2., 3., np.nan], index=[1, 2, 3]) + assert_series_equal(result, expected) + + # don't upcast + result = ints.reindex([1, 2, 3], fill_value=0) + expected = Series([2, 3, 0], index=[1, 2, 3]) + assert issubclass(result.dtype.type, np.integer) + assert_series_equal(result, expected) + + # ----------------------------------------------------------- + # objects + objects = Series([1, 2, 3], dtype=object) + + result = objects.reindex([1, 2, 3]) + expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object) + assert_series_equal(result, expected) + + result = objects.reindex([1, 2, 3], fill_value='foo') + expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object) + assert_series_equal(result, expected) + + # ------------------------------------------------------------ + # bools + bools = Series([True, False, True]) + + result = bools.reindex([1, 2, 3]) + expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object) + assert_series_equal(result, expected) + + result = bools.reindex([1, 2, 3], fill_value=False) + expected = Series([False, True, False], index=[1, 2, 3]) + assert_series_equal(result, expected) + + +def test_rename(): + # GH 17407 + s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex')) + result = s.rename(str) + expected = s.rename(lambda i: str(i)) + assert_series_equal(result, expected) + + assert result.name == expected.name + + +def test_drop(): + # unique + s = Series([1, 2], index=['one', 'two']) + expected = Series([1], index=['one']) + result = s.drop(['two']) + assert_series_equal(result, expected) + result = s.drop('two', axis='rows') + assert_series_equal(result, expected) + + # non-unique + # GH 5248 + s = Series([1, 1, 2], index=['one', 'two', 'one']) + expected = Series([1, 2], index=['one', 'one']) + result = s.drop(['two'], axis=0) + assert_series_equal(result, expected) + result = s.drop('two') + assert_series_equal(result, expected) + + expected = Series([1], index=['two']) + result = s.drop(['one']) + assert_series_equal(result, expected) + result = s.drop('one') + assert_series_equal(result, expected) + + # single string/tuple-like + s = Series(range(3), index=list('abc')) + pytest.raises(KeyError, s.drop, 'bc') + pytest.raises(KeyError, s.drop, ('a',)) + + # errors='ignore' + s = Series(range(3), index=list('abc')) + result = s.drop('bc', errors='ignore') + assert_series_equal(result, s) + result = s.drop(['a', 'd'], errors='ignore') + expected = s.iloc[1:] + assert_series_equal(result, expected) + + # bad axis + pytest.raises(ValueError, s.drop, 'one', axis='columns') + + # GH 8522 + s = Series([2, 3], index=[True, False]) + assert s.index.is_object() + result = s.drop(True) + expected = Series([3], index=[False]) + assert_series_equal(result, expected) + + # GH 16877 + s = Series([2, 3], index=[0, 1]) + with tm.assert_raises_regex(KeyError, 'not contained in axis'): + s.drop([False, True]) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py new file mode 100644 index 0000000000000..f1f4a5a05697d --- /dev/null +++ b/pandas/tests/series/indexing/test_boolean.py @@ -0,0 +1,603 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 + +import pytest + +import pandas as pd +import numpy as np + +from pandas import (Series, date_range, isna, Index, Timestamp) +from pandas.compat import lrange, range +from pandas.core.dtypes.common import is_integer + +from pandas.core.indexing import IndexingError +from pandas.tseries.offsets import BDay + +from pandas.util.testing import (assert_series_equal) +import pandas.util.testing as tm + +JOIN_TYPES = ['inner', 'outer', 'left', 'right'] + + +def test_getitem_boolean(test_data): + s = test_data.series + mask = s > s.median() + + # passing list is OK + result = s[list(mask)] + expected = s[mask] + assert_series_equal(result, expected) + tm.assert_index_equal(result.index, s.index[mask]) + + +def test_getitem_boolean_empty(): + s = Series([], dtype=np.int64) + s.index.name = 'index_name' + s = s[s.isna()] + assert s.index.name == 'index_name' + assert s.dtype == np.int64 + + # GH5877 + # indexing with empty series + s = Series(['A', 'B']) + expected = Series(np.nan, index=['C'], dtype=object) + result = s[Series(['C'], dtype=object)] + assert_series_equal(result, expected) + + s = Series(['A', 'B']) + expected = Series(dtype=object, index=Index([], dtype='int64')) + result = s[Series([], dtype=object)] + assert_series_equal(result, expected) + + # invalid because of the boolean indexer + # that's empty or not-aligned + def f(): + s[Series([], dtype=bool)] + + pytest.raises(IndexingError, f) + + def f(): + s[Series([True], dtype=bool)] + + pytest.raises(IndexingError, f) + + +def test_getitem_boolean_object(test_data): + # using column from DataFrame + + s = test_data.series + mask = s > s.median() + omask = mask.astype(object) + + # getitem + result = s[omask] + expected = s[mask] + assert_series_equal(result, expected) + + # setitem + s2 = s.copy() + cop = s.copy() + cop[omask] = 5 + s2[mask] = 5 + assert_series_equal(cop, s2) + + # nans raise exception + omask[5:10] = np.nan + pytest.raises(Exception, s.__getitem__, omask) + pytest.raises(Exception, s.__setitem__, omask, 5) + + +def test_getitem_setitem_boolean_corner(test_data): + ts = test_data.ts + mask_shifted = ts.shift(1, freq=BDay()) > ts.median() + + # these used to raise...?? + + pytest.raises(Exception, ts.__getitem__, mask_shifted) + pytest.raises(Exception, ts.__setitem__, mask_shifted, 1) + # ts[mask_shifted] + # ts[mask_shifted] = 1 + + pytest.raises(Exception, ts.loc.__getitem__, mask_shifted) + pytest.raises(Exception, ts.loc.__setitem__, mask_shifted, 1) + # ts.loc[mask_shifted] + # ts.loc[mask_shifted] = 2 + + +def test_setitem_boolean(test_data): + mask = test_data.series > test_data.series.median() + + # similar indexed series + result = test_data.series.copy() + result[mask] = test_data.series * 2 + expected = test_data.series * 2 + assert_series_equal(result[mask], expected[mask]) + + # needs alignment + result = test_data.series.copy() + result[mask] = (test_data.series * 2)[0:5] + expected = (test_data.series * 2)[0:5].reindex_like(test_data.series) + expected[-mask] = test_data.series[mask] + assert_series_equal(result[mask], expected[mask]) + + +def test_get_set_boolean_different_order(test_data): + ordered = test_data.series.sort_values() + + # setting + copy = test_data.series.copy() + copy[ordered > 0] = 0 + + expected = test_data.series.copy() + expected[expected > 0] = 0 + + assert_series_equal(copy, expected) + + # getting + sel = test_data.series[ordered > 0] + exp = test_data.series[test_data.series > 0] + assert_series_equal(sel, exp) + + +def test_where_unsafe(): + # unsafe dtype changes + for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16, + np.float32, np.float64]: + s = Series(np.arange(10), dtype=dtype) + mask = s < 5 + s[mask] = lrange(2, 7) + expected = Series(lrange(2, 7) + lrange(5, 10), dtype=dtype) + assert_series_equal(s, expected) + assert s.dtype == expected.dtype + + # these are allowed operations, but are upcasted + for dtype in [np.int64, np.float64]: + s = Series(np.arange(10), dtype=dtype) + mask = s < 5 + values = [2.5, 3.5, 4.5, 5.5, 6.5] + s[mask] = values + expected = Series(values + lrange(5, 10), dtype='float64') + assert_series_equal(s, expected) + assert s.dtype == expected.dtype + + # GH 9731 + s = Series(np.arange(10), dtype='int64') + mask = s > 5 + values = [2.5, 3.5, 4.5, 5.5] + s[mask] = values + expected = Series(lrange(6) + values, dtype='float64') + assert_series_equal(s, expected) + + # can't do these as we are forced to change the itemsize of the input + # to something we cannot + for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]: + s = Series(np.arange(10), dtype=dtype) + mask = s < 5 + values = [2.5, 3.5, 4.5, 5.5, 6.5] + pytest.raises(Exception, s.__setitem__, tuple(mask), values) + + # GH3235 + s = Series(np.arange(10), dtype='int64') + mask = s < 5 + s[mask] = lrange(2, 7) + expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64') + assert_series_equal(s, expected) + assert s.dtype == expected.dtype + + s = Series(np.arange(10), dtype='int64') + mask = s > 5 + s[mask] = [0] * 4 + expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64') + assert_series_equal(s, expected) + + s = Series(np.arange(10)) + mask = s > 5 + + def f(): + s[mask] = [5, 4, 3, 2, 1] + + pytest.raises(ValueError, f) + + def f(): + s[mask] = [0] * 5 + + pytest.raises(ValueError, f) + + # dtype changes + s = Series([1, 2, 3, 4]) + result = s.where(s > 2, np.nan) + expected = Series([np.nan, np.nan, 3, 4]) + assert_series_equal(result, expected) + + # GH 4667 + # setting with None changes dtype + s = Series(range(10)).astype(float) + s[8] = None + result = s[8] + assert isna(result) + + s = Series(range(10)).astype(float) + s[s > 8] = None + result = s[isna(s)] + expected = Series(np.nan, index=[9]) + assert_series_equal(result, expected) + + +def test_where_raise_on_error_deprecation(): + # gh-14968 + # deprecation of raise_on_error + s = Series(np.random.randn(5)) + cond = s > 0 + with tm.assert_produces_warning(FutureWarning): + s.where(cond, raise_on_error=True) + with tm.assert_produces_warning(FutureWarning): + s.mask(cond, raise_on_error=True) + + +def test_where(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.where(cond).dropna() + rs2 = s[cond] + assert_series_equal(rs, rs2) + + rs = s.where(cond, -s) + assert_series_equal(rs, s.abs()) + + rs = s.where(cond) + assert (s.shape == rs.shape) + assert (rs is not s) + + # test alignment + cond = Series([True, False, False, True, False], index=s.index) + s2 = -(s.abs()) + + expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index) + rs = s2.where(cond[:3]) + assert_series_equal(rs, expected) + + expected = s2.abs() + expected.iloc[0] = s2[0] + rs = s2.where(cond[:3], -s2) + assert_series_equal(rs, expected) + + +def test_where_error(): + s = Series(np.random.randn(5)) + cond = s > 0 + + pytest.raises(ValueError, s.where, 1) + pytest.raises(ValueError, s.where, cond[:3].values, -s) + + # GH 2745 + s = Series([1, 2]) + s[[True, False]] = [0, 1] + expected = Series([0, 2]) + assert_series_equal(s, expected) + + # failures + pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]), + [0, 2, 3]) + pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]), + []) + + +@pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) +def test_where_array_like(klass): + # see gh-15414 + s = Series([1, 2, 3]) + cond = [False, True, True] + expected = Series([np.nan, 2, 3]) + + result = s.where(klass(cond)) + assert_series_equal(result, expected) + + +@pytest.mark.parametrize('cond', [ + [1, 0, 1], + Series([2, 5, 7]), + ["True", "False", "True"], + [Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")] +]) +def test_where_invalid_input(cond): + # see gh-15414: only boolean arrays accepted + s = Series([1, 2, 3]) + msg = "Boolean array expected for the condition" + + with tm.assert_raises_regex(ValueError, msg): + s.where(cond) + + msg = "Array conditional must be same shape as self" + with tm.assert_raises_regex(ValueError, msg): + s.where([True]) + + +def test_where_ndframe_align(): + msg = "Array conditional must be same shape as self" + s = Series([1, 2, 3]) + + cond = [True] + with tm.assert_raises_regex(ValueError, msg): + s.where(cond) + + expected = Series([1, np.nan, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + cond = np.array([False, True, False, True]) + with tm.assert_raises_regex(ValueError, msg): + s.where(cond) + + expected = Series([np.nan, 2, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + +def test_where_setitem_invalid(): + # GH 2702 + # make sure correct exceptions are raised on invalid list assignment + + # slice + s = Series(list('abc')) + + def f(): + s[0:3] = list(range(27)) + + pytest.raises(ValueError, f) + + s[0:3] = list(range(3)) + expected = Series([0, 1, 2]) + assert_series_equal(s.astype(np.int64), expected, ) + + # slice with step + s = Series(list('abcdef')) + + def f(): + s[0:4:2] = list(range(27)) + + pytest.raises(ValueError, f) + + s = Series(list('abcdef')) + s[0:4:2] = list(range(2)) + expected = Series([0, 'b', 1, 'd', 'e', 'f']) + assert_series_equal(s, expected) + + # neg slices + s = Series(list('abcdef')) + + def f(): + s[:-1] = list(range(27)) + + pytest.raises(ValueError, f) + + s[-3:-1] = list(range(2)) + expected = Series(['a', 'b', 'c', 0, 1, 'f']) + assert_series_equal(s, expected) + + # list + s = Series(list('abc')) + + def f(): + s[[0, 1, 2]] = list(range(27)) + + pytest.raises(ValueError, f) + + s = Series(list('abc')) + + def f(): + s[[0, 1, 2]] = list(range(2)) + + pytest.raises(ValueError, f) + + # scalar + s = Series(list('abc')) + s[0] = list(range(10)) + expected = Series([list(range(10)), 'b', 'c']) + assert_series_equal(s, expected) + + +@pytest.mark.parametrize('size', range(2, 6)) +@pytest.mark.parametrize('mask', [ + [True, False, False, False, False], + [True, False], + [False] +]) +@pytest.mark.parametrize('item', [ + 2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min +]) +# Test numpy arrays, lists and tuples as the input to be +# broadcast +@pytest.mark.parametrize('box', [ + lambda x: np.array([x]), + lambda x: [x], + lambda x: (x,) +]) +def test_broadcast(size, mask, item, box): + selection = np.resize(mask, size) + + data = np.arange(size, dtype=float) + + # Construct the expected series by taking the source + # data or item based on the selection + expected = Series([item if use_item else data[ + i] for i, use_item in enumerate(selection)]) + + s = Series(data) + s[selection] = box(item) + assert_series_equal(s, expected) + + s = Series(data) + result = s.where(~selection, box(item)) + assert_series_equal(result, expected) + + s = Series(data) + result = s.mask(selection, box(item)) + assert_series_equal(result, expected) + + +def test_where_inplace(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.copy() + + rs.where(cond, inplace=True) + assert_series_equal(rs.dropna(), s[cond]) + assert_series_equal(rs, s.where(cond)) + + rs = s.copy() + rs.where(cond, -s, inplace=True) + assert_series_equal(rs, s.where(cond, -s)) + + +def test_where_dups(): + # GH 4550 + # where crashes with dups in index + s1 = Series(list(range(3))) + s2 = Series(list(range(3))) + comb = pd.concat([s1, s2]) + result = comb.where(comb < 2) + expected = Series([0, 1, np.nan, 0, 1, np.nan], + index=[0, 1, 2, 0, 1, 2]) + assert_series_equal(result, expected) + + # GH 4548 + # inplace updating not working with dups + comb[comb < 1] = 5 + expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2]) + assert_series_equal(comb, expected) + + comb[comb < 2] += 10 + expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) + assert_series_equal(comb, expected) + + +def test_where_numeric_with_string(): + # GH 9280 + s = pd.Series([1, 2, 3]) + w = s.where(s > 1, 'X') + + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == 'object' + + w = s.where(s > 1, ['X', 'Y', 'Z']) + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == 'object' + + w = s.where(s > 1, np.array(['X', 'Y', 'Z'])) + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == 'object' + + +def test_where_timedelta_coerce(): + s = Series([1, 2], dtype='timedelta64[ns]') + expected = Series([10, 10]) + mask = np.array([False, False]) + + rs = s.where(mask, [10, 10]) + assert_series_equal(rs, expected) + + rs = s.where(mask, 10) + assert_series_equal(rs, expected) + + rs = s.where(mask, 10.0) + assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, 10.0]) + assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, np.nan]) + expected = Series([10, None], dtype='object') + assert_series_equal(rs, expected) + + +def test_where_datetime_conversion(): + s = Series(date_range('20130102', periods=2)) + expected = Series([10, 10]) + mask = np.array([False, False]) + + rs = s.where(mask, [10, 10]) + assert_series_equal(rs, expected) + + rs = s.where(mask, 10) + assert_series_equal(rs, expected) + + rs = s.where(mask, 10.0) + assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, 10.0]) + assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, np.nan]) + expected = Series([10, None], dtype='object') + assert_series_equal(rs, expected) + + # GH 15701 + timestamps = ['2016-12-31 12:00:04+00:00', + '2016-12-31 12:00:04.010000+00:00'] + s = Series([pd.Timestamp(t) for t in timestamps]) + rs = s.where(Series([False, True])) + expected = Series([pd.NaT, s[1]]) + assert_series_equal(rs, expected) + + +def test_mask(): + # compare with tested results in test_where + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.where(~cond, np.nan) + assert_series_equal(rs, s.mask(cond)) + + rs = s.where(~cond) + rs2 = s.mask(cond) + assert_series_equal(rs, rs2) + + rs = s.where(~cond, -s) + rs2 = s.mask(cond, -s) + assert_series_equal(rs, rs2) + + cond = Series([True, False, False, True, False], index=s.index) + s2 = -(s.abs()) + rs = s2.where(~cond[:3]) + rs2 = s2.mask(cond[:3]) + assert_series_equal(rs, rs2) + + rs = s2.where(~cond[:3], -s2) + rs2 = s2.mask(cond[:3], -s2) + assert_series_equal(rs, rs2) + + pytest.raises(ValueError, s.mask, 1) + pytest.raises(ValueError, s.mask, cond[:3].values, -s) + + # dtype changes + s = Series([1, 2, 3, 4]) + result = s.mask(s > 2, np.nan) + expected = Series([1, 2, np.nan, np.nan]) + assert_series_equal(result, expected) + + +def test_mask_inplace(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.copy() + rs.mask(cond, inplace=True) + assert_series_equal(rs.dropna(), s[~cond]) + assert_series_equal(rs, s.mask(cond)) + + rs = s.copy() + rs.mask(cond, -s, inplace=True) + assert_series_equal(rs, s.mask(cond, -s)) diff --git a/pandas/tests/series/indexing/test_callable.py b/pandas/tests/series/indexing/test_callable.py new file mode 100644 index 0000000000000..b656137545903 --- /dev/null +++ b/pandas/tests/series/indexing/test_callable.py @@ -0,0 +1,33 @@ +import pandas as pd +import pandas.util.testing as tm + + +def test_getitem_callable(): + # GH 12533 + s = pd.Series(4, index=list('ABCD')) + result = s[lambda x: 'A'] + assert result == s.loc['A'] + + result = s[lambda x: ['A', 'B']] + tm.assert_series_equal(result, s.loc[['A', 'B']]) + + result = s[lambda x: [True, False, True, True]] + tm.assert_series_equal(result, s.iloc[[0, 2, 3]]) + + +def test_setitem_callable(): + # GH 12533 + s = pd.Series([1, 2, 3, 4], index=list('ABCD')) + s[lambda x: 'A'] = -1 + tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list('ABCD'))) + + +def test_setitem_other_callable(): + # GH 13299 + inc = lambda x: x + 1 + + s = pd.Series([1, 2, -1, 4]) + s[s < 0] = inc + + expected = pd.Series([1, 2, inc, 4]) + tm.assert_series_equal(s, expected) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py new file mode 100644 index 0000000000000..f484cdea2e09f --- /dev/null +++ b/pandas/tests/series/indexing/test_datetime.py @@ -0,0 +1,710 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 + +import pytest + +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd + +from pandas import (Series, DataFrame, + date_range, Timestamp, DatetimeIndex, NaT) + +from pandas.compat import lrange, range +from pandas.util.testing import (assert_series_equal, + assert_frame_equal, assert_almost_equal) + +import pandas.util.testing as tm + +import pandas._libs.index as _index +from pandas._libs import tslib + +JOIN_TYPES = ['inner', 'outer', 'left', 'right'] + +""" +Also test support for datetime64[ns] in Series / DataFrame +""" + + +def test_fancy_getitem(): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + + assert s[48] == 48 + assert s['1/2/2009'] == 48 + assert s['2009-1-2'] == 48 + assert s[datetime(2009, 1, 2)] == 48 + assert s[Timestamp(datetime(2009, 1, 2))] == 48 + pytest.raises(KeyError, s.__getitem__, '2009-1-3') + + assert_series_equal(s['3/6/2009':'2009-06-05'], + s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) + + +def test_fancy_setitem(): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + s[48] = -1 + assert s[48] == -1 + s['1/2/2009'] = -2 + assert s[48] == -2 + s['1/2/2009':'2009-06-05'] = -3 + assert (s[48:54] == -3).all() + + +def test_dti_snap(): + dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', + '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') + + res = dti.snap(freq='W-MON') + exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') + exp = exp.repeat([3, 4]) + assert (res == exp).all() + + res = dti.snap(freq='B') + + exp = date_range('1/1/2002', '1/7/2002', freq='b') + exp = exp.repeat([1, 1, 1, 2, 2]) + assert (res == exp).all() + + +def test_dti_reset_index_round_trip(): + dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') + d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + assert d2.dtypes[0] == np.dtype('M8[ns]') + d3 = d2.set_index('index') + assert_frame_equal(d1, d3, check_names=False) + + # #2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) + df = df.set_index('Date') + + assert df.index[0] == stamp + assert df.reset_index()['Date'][0] == stamp + + +def test_series_set_value(): + # #1561 + + dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] + index = DatetimeIndex(dates) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = Series().set_value(dates[0], 1.) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s2 = s.set_value(dates[1], np.nan) + + exp = Series([1., np.nan], index=index) + + assert_series_equal(s2, exp) + + # s = Series(index[:1], index[:1]) + # s2 = s.set_value(dates[1], index[1]) + # assert s2.values.dtype == 'M8[ns]' + + +@pytest.mark.slow +def test_slice_locs_indexerror(): + times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) + for i in range(100000)] + s = Series(lrange(100000), times) + s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] + + +def test_slicing_datetimes(): + # GH 7523 + + # unique + df = DataFrame(np.arange(4., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 3, 4]]) + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + # duplicates + df = pd.DataFrame(np.arange(5., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 2, 3, 4]]) + + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + +def test_frame_datetime64_duplicated(): + dates = date_range('2010-07-01', end='2010-08-05') + + tst = DataFrame({'symbol': 'AAA', 'date': dates}) + result = tst.duplicated(['date', 'symbol']) + assert (-result).all() + + tst = DataFrame({'date': dates}) + result = tst.duplicated() + assert (-result).all() + + +def test_getitem_setitem_datetime_tz_pytz(): + from pytz import timezone as tz + from pandas import date_range + + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + + # comparison dates with datetime MUST be localized! + date = tz('US/Central').localize(datetime(1990, 1, 1, 3)) + result[date] = 0 + result[date] = ts[4] + assert_series_equal(result, ts) + + +def test_getitem_setitem_datetime_tz_dateutil(): + from dateutil.tz import tzutc + from pandas._libs.tslibs.timezones import dateutil_gettz as gettz + + tz = lambda x: tzutc() if x == 'UTC' else gettz( + x) # handle special case for utc in dateutil + + from pandas import date_range + + N = 50 + + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', + tz='America/New_York') + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0 + result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4] + assert_series_equal(result, ts) + + +def test_getitem_setitem_datetimeindex(): + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04:00:00"] + expected = ts[4] + assert result == expected + + result = ts.copy() + result["1990-01-01 04:00:00"] = 0 + result["1990-01-01 04:00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04:00:00" + rb = "1990-01-01 07:00:00" + # GH#18435 strings get a pass from tzawareness compat + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + lb = "1990-01-01 04:00:00-0500" + rb = "1990-01-01 07:00:00-0500" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # repeat all the above with naive datetimes + result = ts[datetime(1990, 1, 1, 4)] + expected = ts[4] + assert result == expected + + result = ts.copy() + result[datetime(1990, 1, 1, 4)] = 0 + result[datetime(1990, 1, 1, 4)] = ts[4] + assert_series_equal(result, ts) + + result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] + assert_series_equal(result, ts) + + lb = datetime(1990, 1, 1, 4) + rb = datetime(1990, 1, 1, 7) + with pytest.raises(TypeError): + # tznaive vs tzaware comparison is invalid + # see GH#18376, GH#18162 + ts[(ts.index >= lb) & (ts.index <= rb)] + + lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo) + rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo) + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts[ts.index[4]] + expected = ts[4] + assert result == expected + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + # also test partial date slicing + result = ts["1990-01-02"] + expected = ts[24:48] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-02"] = 0 + result["1990-01-02"] = ts[24:48] + assert_series_equal(result, ts) + + +def test_getitem_setitem_periodindex(): + from pandas import period_range + + N = 50 + rng = period_range('1/1/1990', periods=N, freq='H') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04"] + expected = ts[4] + assert result == expected + + result = ts.copy() + result["1990-01-01 04"] = 0 + result["1990-01-01 04"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04":"1990-01-01 07"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04":"1990-01-01 07"] = 0 + result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04" + rb = "1990-01-01 07" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # GH 2782 + result = ts[ts.index[4]] + expected = ts[4] + assert result == expected + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + +def test_getitem_median_slice_bug(): + index = date_range('20090415', '20090519', freq='2B') + s = Series(np.random.randn(13), index=index) + + indexer = [slice(6, 7, None)] + result = s[indexer] + expected = s[indexer[0]] + assert_series_equal(result, expected) + + +def test_datetime_indexing(): + from pandas import date_range + + index = date_range('1/1/2000', '1/7/2000') + index = index.repeat(3) + + s = Series(len(index), index=index) + stamp = Timestamp('1/8/2000') + + pytest.raises(KeyError, s.__getitem__, stamp) + s[stamp] = 0 + assert s[stamp] == 0 + + # not monotonic + s = Series(len(index), index=index) + s = s[::-1] + + pytest.raises(KeyError, s.__getitem__, stamp) + s[stamp] = 0 + assert s[stamp] == 0 + + +""" +test duplicates in time series +""" + + +@pytest.fixture(scope='module') +def dups(): + dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), + datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 3), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 4), + datetime(2000, 1, 4), datetime(2000, 1, 5)] + + return Series(np.random.randn(len(dates)), index=dates) + + +def test_constructor(dups): + assert isinstance(dups, Series) + assert isinstance(dups.index, DatetimeIndex) + + +def test_is_unique_monotonic(dups): + assert not dups.index.is_unique + + +def test_index_unique(dups): + uniques = dups.index.unique() + expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 5)]) + assert uniques.dtype == 'M8[ns]' # sanity + tm.assert_index_equal(uniques, expected) + assert dups.index.nunique() == 4 + + # #2563 + assert isinstance(uniques, DatetimeIndex) + + dups_local = dups.index.tz_localize('US/Eastern') + dups_local.name = 'foo' + result = dups_local.unique() + expected = DatetimeIndex(expected, name='foo') + expected = expected.tz_localize('US/Eastern') + assert result.tz is not None + assert result.name == 'foo' + tm.assert_index_equal(result, expected) + + # NaT, note this is excluded + arr = [1370745748 + t for t in range(20)] + [tslib.iNaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 + + arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) + for t in range(20)] + [NaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 + + +def test_index_dupes_contains(): + d = datetime(2011, 12, 5, 20, 30) + ix = DatetimeIndex([d, d]) + assert d in ix + + +def test_duplicate_dates_indexing(dups): + ts = dups + + uniques = ts.index.unique() + for date in uniques: + result = ts[date] + + mask = ts.index == date + total = (ts.index == date).sum() + expected = ts[mask] + if total > 1: + assert_series_equal(result, expected) + else: + assert_almost_equal(result, expected[0]) + + cp = ts.copy() + cp[date] = 0 + expected = Series(np.where(mask, 0, ts), index=ts.index) + assert_series_equal(cp, expected) + + pytest.raises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) + + # new index + ts[datetime(2000, 1, 6)] = 0 + assert ts[datetime(2000, 1, 6)] == 0 + + +def test_range_slice(): + idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', + '1/4/2000']) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts['1/2/2000':] + expected = ts[1:] + assert_series_equal(result, expected) + + result = ts['1/2/2000':'1/3/2000'] + expected = ts[1:4] + assert_series_equal(result, expected) + + +def test_groupby_average_dup_values(dups): + result = dups.groupby(level=0).mean() + expected = dups.groupby(dups.index).mean() + assert_series_equal(result, expected) + + +def test_indexing_over_size_cutoff(): + import datetime + # #1821 + + old_cutoff = _index._SIZE_CUTOFF + try: + _index._SIZE_CUTOFF = 1000 + + # create large list of non periodic datetime + dates = [] + sec = datetime.timedelta(seconds=1) + half_sec = datetime.timedelta(microseconds=500000) + d = datetime.datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame(np.random.randn(len(dates), 4), + index=dates, + columns=list('ABCD')) + + pos = n * 3 + timestamp = df.index[pos] + assert timestamp in df.index + + # it works! + df.loc[timestamp] + assert len(df.loc[[timestamp]]) > 0 + finally: + _index._SIZE_CUTOFF = old_cutoff + + +def test_indexing_unordered(): + # GH 2437 + rng = date_range(start='2011-01-01', end='2011-01-15') + ts = Series(np.random.rand(len(rng)), index=rng) + ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]]) + + for t in ts.index: + # TODO: unused? + s = str(t) # noqa + + expected = ts[t] + result = ts2[t] + assert expected == result + + # GH 3448 (ranges) + def compare(slobj): + result = ts2[slobj].copy() + result = result.sort_index() + expected = ts[slobj] + assert_series_equal(result, expected) + + compare(slice('2011-01-01', '2011-01-15')) + compare(slice('2010-12-30', '2011-01-15')) + compare(slice('2011-01-01', '2011-01-16')) + + # partial ranges + compare(slice('2011-01-01', '2011-01-6')) + compare(slice('2011-01-06', '2011-01-8')) + compare(slice('2011-01-06', '2011-01-12')) + + # single values + result = ts2['2011'].sort_index() + expected = ts['2011'] + assert_series_equal(result, expected) + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts['2005'] + for t in result.index: + assert t.year == 2005 + + +def test_indexing(): + idx = date_range("2001-1-1", periods=20, freq='M') + ts = Series(np.random.rand(len(idx)), index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts['2001'] + expected.name = 'A' + + df = DataFrame(dict(A=ts)) + result = df['2001']['A'] + assert_series_equal(expected, result) + + # setting + ts['2001'] = 1 + expected = ts['2001'] + expected.name = 'A' + + df.loc['2001', 'A'] = 1 + + result = df['2001']['A'] + assert_series_equal(expected, result) + + # GH3546 (not including times on the last day) + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', + freq='H') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', + freq='S') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = [Timestamp('2013-05-31 00:00'), + Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013'] + assert_series_equal(expected, ts) + + # GH14826, indexing with a seconds resolution string / datetime object + df = DataFrame(np.random.rand(5, 5), + columns=['open', 'high', 'low', 'close', 'volume'], + index=date_range('2012-01-02 18:01:00', + periods=5, tz='US/Central', freq='s')) + expected = df.loc[[df.index[2]]] + + # this is a single date, so will raise + pytest.raises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) + pytest.raises(KeyError, df.__getitem__, df.index[2], ) + + +""" +test NaT support +""" + + +def test_set_none_nan(): + series = Series(date_range('1/1/2000', periods=10)) + series[3] = None + assert series[3] is NaT + + series[3:5] = None + assert series[4] is NaT + + series[5] = np.nan + assert series[5] is NaT + + series[5:7] = np.nan + assert series[6] is NaT + + +def test_nat_operations(): + # GH 8617 + s = Series([0, pd.NaT], dtype='m8[ns]') + exp = s[0] + assert s.median() == exp + assert s.min() == exp + assert s.max() == exp + + +@pytest.mark.parametrize('method', ["round", "floor", "ceil"]) +@pytest.mark.parametrize('freq', ["s", "5s", "min", "5min", "h", "5h"]) +def test_round_nat(method, freq): + # GH14940 + s = Series([pd.NaT]) + expected = Series(pd.NaT) + round_method = getattr(s.dt, method) + assert_series_equal(round_method(freq), expected) diff --git a/pandas/tests/series/indexing/test_iloc.py b/pandas/tests/series/indexing/test_iloc.py new file mode 100644 index 0000000000000..648a37ce0262b --- /dev/null +++ b/pandas/tests/series/indexing/test_iloc.py @@ -0,0 +1,38 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 + +import numpy as np + +from pandas import Series + +from pandas.compat import lrange, range +from pandas.util.testing import (assert_series_equal, + assert_almost_equal) + + +def test_iloc(): + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) + + for i in range(len(s)): + result = s.iloc[i] + exp = s[s.index[i]] + assert_almost_equal(result, exp) + + # pass a slice + result = s.iloc[slice(1, 3)] + expected = s.loc[2:4] + assert_series_equal(result, expected) + + # test slice is a view + result[:] = 0 + assert (s[1:3] == 0).all() + + # list of integers + result = s.iloc[[0, 2, 3, 4, 5]] + expected = s.reindex(s.index[[0, 2, 3, 4, 5]]) + assert_series_equal(result, expected) + + +def test_iloc_nonunique(): + s = Series([0, 1, 2], index=[0, 1, 0]) + assert s.iloc[2] == 2 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py new file mode 100644 index 0000000000000..5cc1a8ff1c451 --- /dev/null +++ b/pandas/tests/series/indexing/test_indexing.py @@ -0,0 +1,760 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 + +""" test get/set & misc """ + +import pytest + +from datetime import timedelta + +import numpy as np +import pandas as pd + +from pandas.core.dtypes.common import is_scalar +from pandas import (Series, DataFrame, MultiIndex, + Timestamp, Timedelta, Categorical) +from pandas.tseries.offsets import BDay + +from pandas.compat import lrange, range + +from pandas.util.testing import (assert_series_equal) +import pandas.util.testing as tm + + +def test_basic_indexing(): + s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b']) + + pytest.raises(IndexError, s.__getitem__, 5) + pytest.raises(IndexError, s.__setitem__, 5, 0) + + pytest.raises(KeyError, s.__getitem__, 'c') + + s = s.sort_index() + + pytest.raises(IndexError, s.__getitem__, 5) + pytest.raises(IndexError, s.__setitem__, 5, 0) + + +def test_basic_getitem_with_labels(test_data): + indices = test_data.ts.index[[5, 10, 15]] + + result = test_data.ts[indices] + expected = test_data.ts.reindex(indices) + assert_series_equal(result, expected) + + result = test_data.ts[indices[0]:indices[2]] + expected = test_data.ts.loc[indices[0]:indices[2]] + assert_series_equal(result, expected) + + # integer indexes, be careful + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) + inds = [0, 2, 5, 7, 8] + arr_inds = np.array([0, 2, 5, 7, 8]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s[inds] + expected = s.reindex(inds) + assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s[arr_inds] + expected = s.reindex(arr_inds) + assert_series_equal(result, expected) + + # GH12089 + # with tz for values + s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), + index=['a', 'b', 'c']) + expected = Timestamp('2011-01-01', tz='US/Eastern') + result = s.loc['a'] + assert result == expected + result = s.iloc[0] + assert result == expected + result = s['a'] + assert result == expected + + +def test_getitem_setitem_ellipsis(): + s = Series(np.random.randn(10)) + + np.fix(s) + + result = s[...] + assert_series_equal(result, s) + + s[...] = 5 + assert (result == 5).all() + + +def test_getitem_get(test_data): + test_series = test_data.series + test_obj_series = test_data.objSeries + + idx1 = test_series.index[5] + idx2 = test_obj_series.index[5] + + assert test_series[idx1] == test_series.get(idx1) + assert test_obj_series[idx2] == test_obj_series.get(idx2) + + assert test_series[idx1] == test_series[5] + assert test_obj_series[idx2] == test_obj_series[5] + + assert test_series.get(-1) == test_series.get(test_series.index[-1]) + assert test_series[5] == test_series.get(test_series.index[5]) + + # missing + d = test_data.ts.index[0] - BDay() + pytest.raises(KeyError, test_data.ts.__getitem__, d) + + # None + # GH 5652 + for s in [Series(), Series(index=list('abc'))]: + result = s.get(None) + assert result is None + + +def test_getitem_fancy(test_data): + slice1 = test_data.series[[1, 2, 3]] + slice2 = test_data.objSeries[[1, 2, 3]] + assert test_data.series.index[2] == slice1.index[1] + assert test_data.objSeries.index[2] == slice2.index[1] + assert test_data.series[2] == slice1[1] + assert test_data.objSeries[2] == slice2[1] + + +def test_getitem_generator(test_data): + gen = (x > 0 for x in test_data.series) + result = test_data.series[gen] + result2 = test_data.series[iter(test_data.series > 0)] + expected = test_data.series[test_data.series > 0] + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + + +def test_type_promotion(): + # GH12599 + s = pd.Series() + s["a"] = pd.Timestamp("2016-01-01") + s["b"] = 3.0 + s["c"] = "foo" + expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"], + index=["a", "b", "c"]) + assert_series_equal(s, expected) + + +@pytest.mark.parametrize( + 'result_1, duplicate_item, expected_1', + [ + [ + pd.Series({1: 12, 2: [1, 2, 2, 3]}), pd.Series({1: 313}), + pd.Series({1: 12, }, dtype=object), + ], + [ + pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}), + pd.Series({1: [1, 2, 3]}), pd.Series({1: [1, 2, 3], }), + ], + ]) +def test_getitem_with_duplicates_indices( + result_1, duplicate_item, expected_1): + # GH 17610 + result = result_1.append(duplicate_item) + expected = expected_1.append(duplicate_item) + assert_series_equal(result[1], expected) + assert result[2] == result_1[2] + + +def test_getitem_out_of_bounds(test_data): + # don't segfault, GH #495 + pytest.raises(IndexError, test_data.ts.__getitem__, len(test_data.ts)) + + # GH #917 + s = Series([]) + pytest.raises(IndexError, s.__getitem__, -1) + + +def test_getitem_setitem_integers(): + # caused bug without test + s = Series([1, 2, 3], ['a', 'b', 'c']) + + assert s.iloc[0] == s['a'] + s.iloc[0] = 5 + tm.assert_almost_equal(s['a'], 5) + + +def test_getitem_box_float64(test_data): + value = test_data.ts[5] + assert isinstance(value, np.float64) + + +def test_series_box_timestamp(): + rng = pd.date_range('20090415', '20090519', freq='B') + ser = Series(rng) + + assert isinstance(ser[5], pd.Timestamp) + + rng = pd.date_range('20090415', '20090519', freq='B') + ser = Series(rng, index=rng) + assert isinstance(ser[5], pd.Timestamp) + + assert isinstance(ser.iat[5], pd.Timestamp) + + +def test_getitem_ambiguous_keyerror(): + s = Series(lrange(10), index=lrange(0, 20, 2)) + pytest.raises(KeyError, s.__getitem__, 1) + pytest.raises(KeyError, s.loc.__getitem__, 1) + + +def test_getitem_unordered_dup(): + obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) + assert is_scalar(obj['c']) + assert obj['c'] == 0 + + +def test_getitem_dups_with_missing(): + # breaks reindex, so need to use .loc internally + # GH 4246 + s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = s.loc[['foo', 'bar', 'bah', 'bam']] + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s[['foo', 'bar', 'bah', 'bam']] + assert_series_equal(result, expected) + + +def test_getitem_dups(): + s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64) + expected = Series([3, 4], index=['C', 'C'], dtype=np.int64) + result = s['C'] + assert_series_equal(result, expected) + + +def test_setitem_ambiguous_keyerror(): + s = Series(lrange(10), index=lrange(0, 20, 2)) + + # equivalent of an append + s2 = s.copy() + s2[1] = 5 + expected = s.append(Series([5], index=[1])) + assert_series_equal(s2, expected) + + s2 = s.copy() + s2.loc[1] = 5 + expected = s.append(Series([5], index=[1])) + assert_series_equal(s2, expected) + + +def test_getitem_dataframe(): + rng = list(range(10)) + s = pd.Series(10, index=rng) + df = pd.DataFrame(rng, index=rng) + pytest.raises(TypeError, s.__getitem__, df > 5) + + +def test_setitem(test_data): + test_data.ts[test_data.ts.index[5]] = np.NaN + test_data.ts[[1, 2, 17]] = np.NaN + test_data.ts[6] = np.NaN + assert np.isnan(test_data.ts[6]) + assert np.isnan(test_data.ts[2]) + test_data.ts[np.isnan(test_data.ts)] = 5 + assert not np.isnan(test_data.ts[2]) + + # caught this bug when writing tests + series = Series(tm.makeIntIndex(20).astype(float), + index=tm.makeIntIndex(20)) + + series[::2] = 0 + assert (series[::2] == 0).all() + + # set item that's not contained + s = test_data.series.copy() + s['foobar'] = 1 + + app = Series([1], index=['foobar'], name='series') + expected = test_data.series.append(app) + assert_series_equal(s, expected) + + # Test for issue #10193 + key = pd.Timestamp('2012-01-01') + series = pd.Series() + series[key] = 47 + expected = pd.Series(47, [key]) + assert_series_equal(series, expected) + + series = pd.Series([], pd.DatetimeIndex([], freq='D')) + series[key] = 47 + expected = pd.Series(47, pd.DatetimeIndex([key], freq='D')) + assert_series_equal(series, expected) + + +def test_setitem_dtypes(): + # change dtypes + # GH 4463 + expected = Series([np.nan, 2, 3]) + + s = Series([1, 2, 3]) + s.iloc[0] = np.nan + assert_series_equal(s, expected) + + s = Series([1, 2, 3]) + s.loc[0] = np.nan + assert_series_equal(s, expected) + + s = Series([1, 2, 3]) + s[0] = np.nan + assert_series_equal(s, expected) + + s = Series([False]) + s.loc[0] = np.nan + assert_series_equal(s, Series([np.nan])) + + s = Series([False, True]) + s.loc[0] = np.nan + assert_series_equal(s, Series([np.nan, 1.0])) + + +def test_set_value(test_data): + idx = test_data.ts.index[10] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = test_data.ts.set_value(idx, 0) + assert res is test_data.ts + assert test_data.ts[idx] == 0 + + # equiv + s = test_data.series.copy() + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = s.set_value('foobar', 0) + assert res is s + assert res.index[-1] == 'foobar' + assert res['foobar'] == 0 + + s = test_data.series.copy() + s.loc['foobar'] = 0 + assert s.index[-1] == 'foobar' + assert s['foobar'] == 0 + + +def test_setslice(test_data): + sl = test_data.ts[5:20] + assert len(sl) == len(sl.index) + assert sl.index.is_unique + + +def test_basic_getitem_setitem_corner(test_data): + # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2] + with tm.assert_raises_regex(ValueError, 'tuple-index'): + test_data.ts[:, 2] + with tm.assert_raises_regex(ValueError, 'tuple-index'): + test_data.ts[:, 2] = 2 + + # weird lists. [slice(0, 5)] will work but not two slices + result = test_data.ts[[slice(None, 5)]] + expected = test_data.ts[:5] + assert_series_equal(result, expected) + + # OK + pytest.raises(Exception, test_data.ts.__getitem__, + [5, slice(None, None)]) + pytest.raises(Exception, test_data.ts.__setitem__, + [5, slice(None, None)], 2) + + +@pytest.mark.parametrize('tz', ['US/Eastern', 'UTC', 'Asia/Tokyo']) +def test_setitem_with_tz(tz): + orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3, + tz=tz)) + assert orig.dtype == 'datetime64[ns, {0}]'.format(tz) + + # scalar + s = orig.copy() + s[1] = pd.Timestamp('2011-01-01', tz=tz) + exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz), + pd.Timestamp('2011-01-01 00:00', tz=tz), + pd.Timestamp('2016-01-01 02:00', tz=tz)]) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[1] = pd.Timestamp('2011-01-01', tz=tz) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz) + tm.assert_series_equal(s, exp) + + # vector + vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2]) + assert vals.dtype == 'datetime64[ns, {0}]'.format(tz) + + s[[1, 2]] = vals + exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz), + pd.Timestamp('2011-01-01 00:00', tz=tz), + pd.Timestamp('2012-01-01 00:00', tz=tz)]) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + +def test_setitem_with_tz_dst(): + # GH XXX + tz = 'US/Eastern' + orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3, + tz=tz)) + assert orig.dtype == 'datetime64[ns, {0}]'.format(tz) + + # scalar + s = orig.copy() + s[1] = pd.Timestamp('2011-01-01', tz=tz) + exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz), + pd.Timestamp('2011-01-01 00:00-05:00', tz=tz), + pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)]) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[1] = pd.Timestamp('2011-01-01', tz=tz) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz) + tm.assert_series_equal(s, exp) + + # vector + vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2]) + assert vals.dtype == 'datetime64[ns, {0}]'.format(tz) + + s[[1, 2]] = vals + exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz), + pd.Timestamp('2011-01-01 00:00', tz=tz), + pd.Timestamp('2012-01-01 00:00', tz=tz)]) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + +def test_categorial_assigning_ops(): + orig = Series(Categorical(["b", "b"], categories=["a", "b"])) + s = orig.copy() + s[:] = "a" + exp = Series(Categorical(["a", "a"], categories=["a", "b"])) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s[1] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s[s.index > 0] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s[[False, True]] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.index = ["x", "y"] + s["y"] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"]), + index=["x", "y"]) + tm.assert_series_equal(s, exp) + + # ensure that one can set something to np.nan + s = Series(Categorical([1, 2, 3])) + exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3])) + s[1] = np.nan + tm.assert_series_equal(s, exp) + + +def test_slice(test_data): + numSlice = test_data.series[10:20] + numSliceEnd = test_data.series[-10:] + objSlice = test_data.objSeries[10:20] + + assert test_data.series.index[9] not in numSlice.index + assert test_data.objSeries.index[9] not in objSlice.index + + assert len(numSlice) == len(numSlice.index) + assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]] + + assert numSlice.index[1] == test_data.series.index[11] + assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:]) + + # Test return view. + sl = test_data.series[10:20] + sl[:] = 0 + + assert (test_data.series[10:20] == 0).all() + + +def test_slice_can_reorder_not_uniquely_indexed(): + s = Series(1, index=['a', 'a', 'b', 'b', 'c']) + s[::-1] # it works! + + +def test_ix_setitem(test_data): + inds = test_data.series.index[[3, 4, 7]] + + result = test_data.series.copy() + result.loc[inds] = 5 + + expected = test_data.series.copy() + expected[[3, 4, 7]] = 5 + assert_series_equal(result, expected) + + result.iloc[5:10] = 10 + expected[5:10] = 10 + assert_series_equal(result, expected) + + # set slice with indices + d1, d2 = test_data.series.index[[5, 15]] + result.loc[d1:d2] = 6 + expected[5:16] = 6 # because it's inclusive + assert_series_equal(result, expected) + + # set index value + test_data.series.loc[d1] = 4 + test_data.series.loc[d2] = 6 + assert test_data.series[d1] == 4 + assert test_data.series[d2] == 6 + + +def test_setitem_na(): + # these induce dtype changes + expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) + s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) + s[::2] = np.nan + assert_series_equal(s, expected) + + # gets coerced to float, right? + expected = Series([np.nan, 1, np.nan, 0]) + s = Series([True, True, False, False]) + s[::2] = np.nan + assert_series_equal(s, expected) + + expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, + 9]) + s = Series(np.arange(10)) + s[:5] = np.nan + assert_series_equal(s, expected) + + +def test_timedelta_assignment(): + # GH 8209 + s = Series([]) + s.loc['B'] = timedelta(1) + tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B'])) + + s = s.reindex(s.index.insert(0, 'A')) + tm.assert_series_equal(s, Series( + [np.nan, Timedelta('1 days')], index=['A', 'B'])) + + result = s.fillna(timedelta(1)) + expected = Series(Timedelta('1 days'), index=['A', 'B']) + tm.assert_series_equal(result, expected) + + s.loc['A'] = timedelta(1) + tm.assert_series_equal(s, expected) + + # GH 14155 + s = Series(10 * [np.timedelta64(10, 'm')]) + s.loc[[1, 2, 3]] = np.timedelta64(20, 'm') + expected = pd.Series(10 * [np.timedelta64(10, 'm')]) + expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, 'm')) + tm.assert_series_equal(s, expected) + + +def test_underlying_data_conversion(): + # GH 4080 + df = DataFrame({c: [1, 2, 3] for c in ['a', 'b', 'c']}) + df.set_index(['a', 'b', 'c'], inplace=True) + s = Series([1], index=[(2, 2, 2)]) + df['val'] = 0 + df + df['val'].update(s) + + expected = DataFrame( + dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0])) + expected.set_index(['a', 'b', 'c'], inplace=True) + tm.assert_frame_equal(df, expected) + + # GH 3970 + # these are chained assignments as well + pd.set_option('chained_assignment', None) + df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) + df["cc"] = 0.0 + + ck = [True] * len(df) + + df["bb"].iloc[0] = .13 + + # TODO: unused + df_tmp = df.iloc[ck] # noqa + + df["bb"].iloc[0] = .15 + assert df['bb'].iloc[0] == 0.15 + pd.set_option('chained_assignment', 'raise') + + # GH 3217 + df = DataFrame(dict(a=[1, 3], b=[np.nan, 2])) + df['c'] = np.nan + df['c'].update(pd.Series(['foo'], index=[0])) + + expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan])) + tm.assert_frame_equal(df, expected) + + +def test_preserve_refs(test_data): + seq = test_data.ts[[5, 10, 15]] + seq[1] = np.NaN + assert not np.isnan(test_data.ts[10]) + + +def test_cast_on_putmask(): + # GH 2746 + + # need to upcast + s = Series([1, 2], index=[1, 2], dtype='int64') + s[[True, False]] = Series([0], index=[1], dtype='int64') + expected = Series([0, 2], index=[1, 2], dtype='int64') + + assert_series_equal(s, expected) + + +def test_type_promote_putmask(): + # GH8387: test that changing types does not break alignment + ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5) + left, mask = ts.copy(), ts > 0 + right = ts[mask].copy().map(str) + left[mask] = right + assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t)) + + s = Series([0, 1, 2, 0]) + mask = s > 0 + s2 = s[mask].map(str) + s[mask] = s2 + assert_series_equal(s, Series([0, '1', '2', 0])) + + s = Series([0, 'foo', 'bar', 0]) + mask = Series([False, True, True, False]) + s2 = s[mask] + s[mask] = s2 + assert_series_equal(s, Series([0, 'foo', 'bar', 0])) + + +def test_multilevel_preserve_name(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + s = Series(np.random.randn(len(index)), index=index, name='sth') + + result = s['foo'] + result2 = s.loc['foo'] + assert result.name == s.name + assert result2.name == s.name + + +def test_setitem_scalar_into_readonly_backing_data(): + # GH14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + for n in range(len(series)): + with pytest.raises(ValueError): + series[n] = 1 + + assert array[n] == 0 + + +def test_setitem_slice_into_readonly_backing_data(): + # GH14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + with pytest.raises(ValueError): + series[1:3] = 1 + + assert not array.any() + + +""" +miscellaneous methods +""" + + +def test_select(test_data): + # deprecated: gh-12410 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + n = len(test_data.ts) + result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2]) + expected = test_data.ts.reindex(test_data.ts.index[n // 2:]) + assert_series_equal(result, expected) + + result = test_data.ts.select(lambda x: x.weekday() == 2) + expected = test_data.ts[test_data.ts.index.weekday == 2] + assert_series_equal(result, expected) + + +def test_pop(): + # GH 6600 + df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, }) + k = df.iloc[4] + + result = k.pop('B') + assert result == 4 + + expected = Series([0, 0], index=['A', 'C'], name=4) + assert_series_equal(k, expected) + + +def test_take(): + s = Series([-1, 5, 6, 2, 4]) + + actual = s.take([1, 3, 4]) + expected = Series([5, 2, 4], index=[1, 3, 4]) + tm.assert_series_equal(actual, expected) + + actual = s.take([-1, 3, 4]) + expected = Series([4, 2, 4], index=[4, 3, 4]) + tm.assert_series_equal(actual, expected) + + pytest.raises(IndexError, s.take, [1, 10]) + pytest.raises(IndexError, s.take, [2, 5]) + + with tm.assert_produces_warning(FutureWarning): + s.take([-1, 3, 4], convert=False) + + +def test_head_tail(test_data): + assert_series_equal(test_data.series.head(), test_data.series[:5]) + assert_series_equal(test_data.series.head(0), test_data.series[0:0]) + assert_series_equal(test_data.series.tail(), test_data.series[-5:]) + assert_series_equal(test_data.series.tail(0), test_data.series[0:0]) diff --git a/pandas/tests/series/indexing/test_loc.py b/pandas/tests/series/indexing/test_loc.py new file mode 100644 index 0000000000000..088406e0a1db6 --- /dev/null +++ b/pandas/tests/series/indexing/test_loc.py @@ -0,0 +1,150 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 + +import pytest + +import numpy as np +import pandas as pd + +from pandas import (Series, Timestamp) + +from pandas.compat import lrange +from pandas.util.testing import (assert_series_equal) + + +def test_loc_getitem(test_data): + inds = test_data.series.index[[3, 4, 7]] + assert_series_equal( + test_data.series.loc[inds], + test_data.series.reindex(inds)) + assert_series_equal(test_data.series.iloc[5::2], test_data.series[5::2]) + + # slice with indices + d1, d2 = test_data.ts.index[[5, 15]] + result = test_data.ts.loc[d1:d2] + expected = test_data.ts.truncate(d1, d2) + assert_series_equal(result, expected) + + # boolean + mask = test_data.series > test_data.series.median() + assert_series_equal(test_data.series.loc[mask], test_data.series[mask]) + + # ask for index value + assert test_data.ts.loc[d1] == test_data.ts[d1] + assert test_data.ts.loc[d2] == test_data.ts[d2] + + +def test_loc_getitem_not_monotonic(test_data): + d1, d2 = test_data.ts.index[[5, 15]] + + ts2 = test_data.ts[::2][[1, 2, 0]] + + pytest.raises(KeyError, ts2.loc.__getitem__, slice(d1, d2)) + pytest.raises(KeyError, ts2.loc.__setitem__, slice(d1, d2), 0) + + +def test_loc_getitem_setitem_integer_slice_keyerrors(): + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) + + # this is OK + cp = s.copy() + cp.iloc[4:10] = 0 + assert (cp.iloc[4:10] == 0).all() + + # so is this + cp = s.copy() + cp.iloc[3:11] = 0 + assert (cp.iloc[3:11] == 0).values.all() + + result = s.iloc[2:6] + result2 = s.loc[3:11] + expected = s.reindex([4, 6, 8, 10]) + + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + + # non-monotonic, raise KeyError + s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]] + pytest.raises(KeyError, s2.loc.__getitem__, slice(3, 11)) + pytest.raises(KeyError, s2.loc.__setitem__, slice(3, 11), 0) + + +def test_loc_getitem_iterator(test_data): + idx = iter(test_data.series.index[:10]) + result = test_data.series.loc[idx] + assert_series_equal(result, test_data.series[:10]) + + +def test_loc_setitem_boolean(test_data): + mask = test_data.series > test_data.series.median() + + result = test_data.series.copy() + result.loc[mask] = 0 + expected = test_data.series + expected[mask] = 0 + assert_series_equal(result, expected) + + +def test_loc_setitem_corner(test_data): + inds = list(test_data.series.index[[5, 8, 12]]) + test_data.series.loc[inds] = 5 + pytest.raises(Exception, test_data.series.loc.__setitem__, + inds + ['foo'], 5) + + +def test_basic_setitem_with_labels(test_data): + indices = test_data.ts.index[[5, 10, 15]] + + cp = test_data.ts.copy() + exp = test_data.ts.copy() + cp[indices] = 0 + exp.loc[indices] = 0 + assert_series_equal(cp, exp) + + cp = test_data.ts.copy() + exp = test_data.ts.copy() + cp[indices[0]:indices[2]] = 0 + exp.loc[indices[0]:indices[2]] = 0 + assert_series_equal(cp, exp) + + # integer indexes, be careful + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) + inds = [0, 4, 6] + arr_inds = np.array([0, 4, 6]) + + cp = s.copy() + exp = s.copy() + s[inds] = 0 + s.loc[inds] = 0 + assert_series_equal(cp, exp) + + cp = s.copy() + exp = s.copy() + s[arr_inds] = 0 + s.loc[arr_inds] = 0 + assert_series_equal(cp, exp) + + inds_notfound = [0, 4, 5, 6] + arr_inds_notfound = np.array([0, 4, 5, 6]) + pytest.raises(Exception, s.__setitem__, inds_notfound, 0) + pytest.raises(Exception, s.__setitem__, arr_inds_notfound, 0) + + # GH12089 + # with tz for values + s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), + index=['a', 'b', 'c']) + s2 = s.copy() + expected = Timestamp('2011-01-03', tz='US/Eastern') + s2.loc['a'] = expected + result = s2.loc['a'] + assert result == expected + + s2 = s.copy() + s2.iloc[0] = expected + result = s2.iloc[0] + assert result == expected + + s2 = s.copy() + s2['a'] = expected + result = s2['a'] + assert result == expected diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py new file mode 100644 index 0000000000000..b964ec3874998 --- /dev/null +++ b/pandas/tests/series/indexing/test_numeric.py @@ -0,0 +1,236 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 + +import pytest + +import numpy as np +import pandas as pd + +from pandas import (Index, Series, DataFrame) + +from pandas.compat import lrange, range +from pandas.util.testing import (assert_series_equal) + +import pandas.util.testing as tm + + +def test_get(): + # GH 6383 + s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45, + 51, 39, 55, 43, 54, 52, 51, 54])) + + result = s.get(25, 0) + expected = 0 + assert result == expected + + s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, + 45, 51, 39, 55, 43, 54, 52, 51, 54]), + index=pd.Float64Index( + [25.0, 36.0, 49.0, 64.0, 81.0, 100.0, + 121.0, 144.0, 169.0, 196.0, 1225.0, + 1296.0, 1369.0, 1444.0, 1521.0, 1600.0, + 1681.0, 1764.0, 1849.0, 1936.0], + dtype='object')) + + result = s.get(25, 0) + expected = 43 + assert result == expected + + # GH 7407 + # with a boolean accessor + df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3}) + vc = df.i.value_counts() + result = vc.get(99, default='Missing') + assert result == 'Missing' + + vc = df.b.value_counts() + result = vc.get(False, default='Missing') + assert result == 3 + + result = vc.get(True, default='Missing') + assert result == 'Missing' + + +def test_get_nan(): + # GH 8569 + s = pd.Float64Index(range(10)).to_series() + assert s.get(np.nan) is None + assert s.get(np.nan, default='Missing') == 'Missing' + + # ensure that fixing the above hasn't broken get + # with multiple elements + idx = [20, 30] + assert_series_equal(s.get(idx), + Series([np.nan] * 2, index=idx)) + idx = [np.nan, np.nan] + assert_series_equal(s.get(idx), + Series([np.nan] * 2, index=idx)) + + +def test_delitem(): + # GH 5542 + # should delete the item inplace + s = Series(lrange(5)) + del s[0] + + expected = Series(lrange(1, 5), index=lrange(1, 5)) + assert_series_equal(s, expected) + + del s[1] + expected = Series(lrange(2, 5), index=lrange(2, 5)) + assert_series_equal(s, expected) + + # empty + s = Series() + + def f(): + del s[0] + + pytest.raises(KeyError, f) + + # only 1 left, del, add, del + s = Series(1) + del s[0] + assert_series_equal(s, Series(dtype='int64', index=Index( + [], dtype='int64'))) + s[0] = 1 + assert_series_equal(s, Series(1)) + del s[0] + assert_series_equal(s, Series(dtype='int64', index=Index( + [], dtype='int64'))) + + # Index(dtype=object) + s = Series(1, index=['a']) + del s['a'] + assert_series_equal(s, Series(dtype='int64', index=Index( + [], dtype='object'))) + s['a'] = 1 + assert_series_equal(s, Series(1, index=['a'])) + del s['a'] + assert_series_equal(s, Series(dtype='int64', index=Index( + [], dtype='object'))) + + +def test_slice_float64(): + values = np.arange(10., 50., 2) + index = Index(values) + + start, end = values[[5, 15]] + + s = Series(np.random.randn(20), index=index) + + result = s[start:end] + expected = s.iloc[5:16] + assert_series_equal(result, expected) + + result = s.loc[start:end] + assert_series_equal(result, expected) + + df = DataFrame(np.random.randn(20, 3), index=index) + + result = df[start:end] + expected = df.iloc[5:16] + tm.assert_frame_equal(result, expected) + + result = df.loc[start:end] + tm.assert_frame_equal(result, expected) + + +def test_getitem_negative_out_of_bounds(): + s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) + + pytest.raises(IndexError, s.__getitem__, -11) + pytest.raises(IndexError, s.__setitem__, -11, 'foo') + + +def test_getitem_regression(): + s = Series(lrange(5), index=lrange(5)) + result = s[lrange(5)] + assert_series_equal(result, s) + + +def test_getitem_setitem_slice_bug(): + s = Series(lrange(10), lrange(10)) + result = s[-12:] + assert_series_equal(result, s) + + result = s[-7:] + assert_series_equal(result, s[3:]) + + result = s[:-12] + assert_series_equal(result, s[:0]) + + s = Series(lrange(10), lrange(10)) + s[-12:] = 0 + assert (s == 0).all() + + s[:-12] = 5 + assert (s == 0).all() + + +def test_getitem_setitem_slice_integers(): + s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) + + result = s[:4] + expected = s.reindex([2, 4, 6, 8]) + assert_series_equal(result, expected) + + s[:4] = 0 + assert (s[:4] == 0).all() + assert not (s[4:] == 0).any() + + +def test_setitem_float_labels(): + # note labels are floats + s = Series(['a', 'b', 'c'], index=[0, 0.5, 1]) + tmp = s.copy() + + s.loc[1] = 'zoo' + tmp.iloc[2] = 'zoo' + + assert_series_equal(s, tmp) + + +def test_slice_float_get_set(test_data): + pytest.raises(TypeError, lambda: test_data.ts[4.0:10.0]) + + def f(): + test_data.ts[4.0:10.0] = 0 + + pytest.raises(TypeError, f) + + pytest.raises(TypeError, test_data.ts.__getitem__, slice(4.5, 10.0)) + pytest.raises(TypeError, test_data.ts.__setitem__, slice(4.5, 10.0), 0) + + +def test_slice_floats2(): + s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) + + assert len(s.loc[12.0:]) == 8 + assert len(s.loc[12.5:]) == 7 + + i = np.arange(10, 20, dtype=float) + i[2] = 12.2 + s.index = i + assert len(s.loc[12.0:]) == 8 + assert len(s.loc[12.5:]) == 7 + + +def test_int_indexing(): + s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) + + pytest.raises(KeyError, s.__getitem__, 5) + + pytest.raises(KeyError, s.__getitem__, 'c') + + # not monotonic + s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1]) + + pytest.raises(KeyError, s.__getitem__, 5) + + pytest.raises(KeyError, s.__getitem__, 'c') + + +def test_getitem_int64(test_data): + idx = np.int64(5) + assert test_data.ts[idx] == test_data.ts[5] diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 6473dbeeaa1bc..dce4e82cbdcf1 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime import numpy as np @@ -16,27 +18,27 @@ from .common import TestData -class TestSeriesAlterAxes(TestData, tm.TestCase): +class TestSeriesAlterAxes(TestData): def test_setindex(self): # wrong type series = self.series.copy() - self.assertRaises(TypeError, setattr, series, 'index', None) + pytest.raises(TypeError, setattr, series, 'index', None) # wrong length series = self.series.copy() - self.assertRaises(Exception, setattr, series, 'index', - np.arange(len(series) - 1)) + pytest.raises(Exception, setattr, series, 'index', + np.arange(len(series) - 1)) # works series = self.series.copy() series.index = np.arange(len(series)) - tm.assertIsInstance(series.index, Index) + assert isinstance(series.index, Index) def test_rename(self): renamer = lambda x: x.strftime('%Y%m%d') renamed = self.ts.rename(renamer) - self.assertEqual(renamed.index[0], renamer(self.ts.index[0])) + assert renamed.index[0] == renamer(self.ts.index[0]) # dict rename_dict = dict(zip(self.ts.index, renamed.index)) @@ -46,14 +48,14 @@ def test_rename(self): # partial dict s = Series(np.arange(4), index=['a', 'b', 'c', 'd'], dtype='int64') renamed = s.rename({'b': 'foo', 'd': 'bar'}) - self.assert_index_equal(renamed.index, Index(['a', 'foo', 'c', 'bar'])) + tm.assert_index_equal(renamed.index, Index(['a', 'foo', 'c', 'bar'])) # index with name renamer = Series(np.arange(4), index=Index(['a', 'b', 'c', 'd'], name='name'), dtype='int64') renamed = renamer.rename({}) - self.assertEqual(renamed.index.name, renamer.index.name) + assert renamed.index.name == renamer.index.name def test_rename_by_series(self): s = Series(range(5), name='foo') @@ -66,51 +68,56 @@ def test_rename_set_name(self): s = Series(range(4), index=list('abcd')) for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: result = s.rename(name) - self.assertEqual(result.name, name) - self.assert_numpy_array_equal(result.index.values, s.index.values) - self.assertTrue(s.name is None) + assert result.name == name + tm.assert_numpy_array_equal(result.index.values, s.index.values) + assert s.name is None def test_rename_set_name_inplace(self): s = Series(range(3), index=list('abc')) for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: s.rename(name, inplace=True) - self.assertEqual(s.name, name) + assert s.name == name exp = np.array(['a', 'b', 'c'], dtype=np.object_) - self.assert_numpy_array_equal(s.index.values, exp) + tm.assert_numpy_array_equal(s.index.values, exp) + + def test_rename_axis_supported(self): + # Supporting axis for compatibility, detailed in GH-18589 + s = Series(range(5)) + s.rename({}, axis=0) + s.rename({}, axis='index') + with tm.assert_raises_regex(ValueError, 'No axis named 5'): + s.rename({}, axis=5) def test_set_name_attribute(self): s = Series([1, 2, 3]) s2 = Series([1, 2, 3], name='bar') for name in [7, 7., 'name', datetime(2001, 1, 1), (1,), u"\u05D0"]: s.name = name - self.assertEqual(s.name, name) + assert s.name == name s2.name = name - self.assertEqual(s2.name, name) + assert s2.name == name def test_set_name(self): s = Series([1, 2, 3]) s2 = s._set_name('foo') - self.assertEqual(s2.name, 'foo') - self.assertTrue(s.name is None) - self.assertTrue(s is not s2) + assert s2.name == 'foo' + assert s.name is None + assert s is not s2 def test_rename_inplace(self): renamer = lambda x: x.strftime('%Y%m%d') expected = renamer(self.ts.index[0]) self.ts.rename(renamer, inplace=True) - self.assertEqual(self.ts.index[0], expected) + assert self.ts.index[0] == expected def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) s = Series(lrange(10)) s.index = idx - - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(s.is_time_series) - self.assertTrue(s.index.is_all_dates) + assert s.index.is_all_dates def test_reset_index(self): df = tm.makeDataFrame()[:5] @@ -119,10 +126,10 @@ def test_reset_index(self): ser.name = 'value' df = ser.reset_index() - self.assertIn('value', df) + assert 'value' in df df = ser.reset_index(name='value2') - self.assertIn('value2', df) + assert 'value2' in df # check inplace s = ser.reset_index(drop=True) @@ -136,17 +143,56 @@ def test_reset_index(self): [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) rs = s.reset_index(level=1) - self.assertEqual(len(rs.columns), 2) + assert len(rs.columns) == 2 rs = s.reset_index(level=[0, 2], drop=True) - self.assert_index_equal(rs.index, Index(index.get_level_values(1))) - tm.assertIsInstance(rs, Series) + tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) + assert isinstance(rs, Series) + + def test_reset_index_level(self): + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], + columns=['A', 'B', 'C']) + + for levels in ['A', 'B'], [0, 1]: + # With MultiIndex + s = df.set_index(['A', 'B'])['C'] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = s.reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A', 'B']).reset_index(level=levels, + drop=True) + tm.assert_frame_equal(result, df[['C']]) + + with tm.assert_raises_regex(KeyError, 'Level E '): + s.reset_index(level=['A', 'E']) + + # With single-level Index + s = df.set_index('A')['B'] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df[['A', 'B']]) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df[['A', 'B']]) + + result = s.reset_index(level=levels[0], drop=True) + tm.assert_series_equal(result, df['B']) + + with tm.assert_raises_regex(IndexError, 'Too many levels'): + s.reset_index(level=[0, 1, 2]) def test_reset_index_range(self): # GH 12071 s = pd.Series(range(2), name='A', dtype='int64') series_result = s.reset_index() - tm.assertIsInstance(series_result.index, RangeIndex) + assert isinstance(series_result.index, RangeIndex) series_expected = pd.DataFrame([[0, 0], [1, 1]], columns=['index', 'A'], index=RangeIndex(stop=2)) @@ -176,13 +222,56 @@ def test_reorder_levels(self): expected = Series(np.arange(6), index=e_idx) assert_series_equal(result, expected) - result = s.reorder_levels([0, 0, 0]) - e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']], - labels=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0]], - names=['L0', 'L0', 'L0']) - expected = Series(range(6), index=e_idx) - assert_series_equal(result, expected) + def test_rename_axis_inplace(self): + # GH 15704 + series = self.ts.copy() + expected = series.rename_axis('foo') + result = series.copy() + no_return = result.rename_axis('foo', inplace=True) - result = s.reorder_levels(['L0', 'L0', 'L0']) + assert no_return is None assert_series_equal(result, expected) + + def test_set_axis_inplace(self): + # GH14636 + + s = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64') + + expected = s.copy() + expected.index = list('abcd') + + for axis in 0, 'index': + # inplace=True + # The FutureWarning comes from the fact that we would like to have + # inplace default to False some day + for inplace, warn in (None, FutureWarning), (True, None): + result = s.copy() + kwargs = {'inplace': inplace} + with tm.assert_produces_warning(warn): + result.set_axis(list('abcd'), axis=axis, **kwargs) + tm.assert_series_equal(result, expected) + + # inplace=False + result = s.set_axis(list('abcd'), axis=0, inplace=False) + tm.assert_series_equal(expected, result) + + # omitting the "axis" parameter + with tm.assert_produces_warning(None): + result = s.set_axis(list('abcd'), inplace=False) + tm.assert_series_equal(result, expected) + + # wrong values for the "axis" parameter + for axis in 2, 'foo': + with tm.assert_raises_regex(ValueError, 'No axis named'): + s.set_axis(list('abcd'), axis=axis, inplace=False) + + def test_set_axis_prior_to_deprecation_signature(self): + s = Series(np.arange(4), index=[1, 3, 5, 7], dtype='int64') + + expected = s.copy() + expected.index = list('abcd') + + for axis in 0, 'index': + with tm.assert_produces_warning(FutureWarning): + result = s.set_axis(0, list('abcd'), inplace=False) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 222165e9d3633..0e6e44e839464 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -10,13 +10,12 @@ import numpy as np import pandas as pd -from pandas import (Series, Categorical, DataFrame, isnull, notnull, - bdate_range, date_range, _np_version_under1p10) +from pandas import (Series, Categorical, DataFrame, isna, notna, + bdate_range, date_range, _np_version_under1p10, + CategoricalIndex) from pandas.core.index import MultiIndex -from pandas.tseries.index import Timestamp -from pandas.tseries.tdi import Timedelta -import pandas.core.config as cf - +from pandas.core.indexes.datetimes import Timestamp +from pandas.core.indexes.timedeltas import Timedelta import pandas.core.nanops as nanops from pandas.compat import lrange, range @@ -24,96 +23,197 @@ from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, assert_index_equal) import pandas.util.testing as tm - +import pandas.util._test_decorators as td from .common import TestData -class TestSeriesAnalytics(TestData, tm.TestCase): +class TestSeriesAnalytics(TestData): + + @pytest.mark.parametrize("use_bottleneck", [True, False]) + @pytest.mark.parametrize("method, unit", [ + ("sum", 0.0), + ("prod", 1.0) + ]) + def test_empty(self, method, unit, use_bottleneck): + with pd.option_context("use_bottleneck", use_bottleneck): + # GH 9422 / 18921 + # Entirely empty + s = Series([]) + # NA by default + result = getattr(s, method)() + assert result == unit + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == unit + + result = getattr(s, method)(min_count=1) + assert isna(result) + + # Skipna, default + result = getattr(s, method)(skipna=True) + result == unit + + # Skipna, explicit + result = getattr(s, method)(skipna=True, min_count=0) + assert result == unit + + result = getattr(s, method)(skipna=True, min_count=1) + assert isna(result) + + # All-NA + s = Series([np.nan]) + # NA by default + result = getattr(s, method)() + assert result == unit + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == unit + + result = getattr(s, method)(min_count=1) + assert isna(result) + + # Skipna, default + result = getattr(s, method)(skipna=True) + result == unit + + # skipna, explicit + result = getattr(s, method)(skipna=True, min_count=0) + assert result == unit - def test_sum_zero(self): - arr = np.array([]) - self.assertEqual(nanops.nansum(arr), 0) + result = getattr(s, method)(skipna=True, min_count=1) + assert isna(result) - arr = np.empty((10, 0)) - self.assertTrue((nanops.nansum(arr, axis=1) == 0).all()) + # Mix of valid, empty + s = Series([np.nan, 1]) + # Default + result = getattr(s, method)() + assert result == 1.0 + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == 1.0 + + result = getattr(s, method)(min_count=1) + assert result == 1.0 + + # Skipna + result = getattr(s, method)(skipna=True) + assert result == 1.0 + + result = getattr(s, method)(skipna=True, min_count=0) + assert result == 1.0 + + result = getattr(s, method)(skipna=True, min_count=1) + assert result == 1.0 + + # GH #844 (changed in 9422) + df = DataFrame(np.empty((10, 0))) + assert (getattr(df, method)(1) == unit).all() + + s = pd.Series([1]) + result = getattr(s, method)(min_count=2) + assert isna(result) + + s = pd.Series([np.nan]) + result = getattr(s, method)(min_count=2) + assert isna(result) + + s = pd.Series([np.nan, 1]) + result = getattr(s, method)(min_count=2) + assert isna(result) + + @pytest.mark.parametrize('method, unit', [ + ('sum', 0.0), + ('prod', 1.0), + ]) + def test_empty_multi(self, method, unit): + s = pd.Series([1, np.nan, np.nan, np.nan], + index=pd.MultiIndex.from_product([('a', 'b'), (0, 1)])) + # 1 / 0 by default + result = getattr(s, method)(level=0) + expected = pd.Series([1, unit], index=['a', 'b']) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = getattr(s, method)(level=0, min_count=0) + expected = pd.Series([1, unit], index=['a', 'b']) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = getattr(s, method)(level=0, min_count=1) + expected = pd.Series([1, np.nan], index=['a', 'b']) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "method", ['mean', 'median', 'std', 'var']) + def test_ops_consistency_on_empty(self, method): - # GH #844 - s = Series([], index=[]) - self.assertEqual(s.sum(), 0) + # GH 7869 + # consistency on empty - df = DataFrame(np.empty((10, 0))) - self.assertTrue((df.sum(1) == 0).all()) + # float + result = getattr(Series(dtype=float), method)() + assert isna(result) + + # timedelta64[ns] + result = getattr(Series(dtype='m8[ns]'), method)() + assert result is pd.NaT def test_nansum_buglet(self): s = Series([1.0, np.nan], index=[0, 1]) result = np.nansum(s) assert_almost_equal(result, 1) - def test_overflow(self): - # GH 6915 - # overflowing on the smaller int dtypes - for dtype in ['int32', 'int64']: - v = np.arange(5000000, dtype=dtype) - s = Series(v) - - # no bottleneck - result = s.sum(skipna=False) - self.assertEqual(int(result), v.sum(dtype='int64')) - result = s.min(skipna=False) - self.assertEqual(int(result), 0) - result = s.max(skipna=False) - self.assertEqual(int(result), v[-1]) - - # use bottleneck if available - result = s.sum() - self.assertEqual(int(result), v.sum(dtype='int64')) - result = s.min() - self.assertEqual(int(result), 0) - result = s.max() - self.assertEqual(int(result), v[-1]) - - for dtype in ['float32', 'float64']: - v = np.arange(5000000, dtype=dtype) - s = Series(v) - - # no bottleneck - result = s.sum(skipna=False) - self.assertEqual(result, v.sum(dtype=dtype)) - result = s.min(skipna=False) - self.assertTrue(np.allclose(float(result), 0.0)) - result = s.max(skipna=False) - self.assertTrue(np.allclose(float(result), v[-1])) - - # use bottleneck if available - result = s.sum() - self.assertEqual(result, v.sum(dtype=dtype)) - result = s.min() - self.assertTrue(np.allclose(float(result), 0.0)) - result = s.max() - self.assertTrue(np.allclose(float(result), v[-1])) + @pytest.mark.parametrize("use_bottleneck", [True, False]) + def test_sum_overflow(self, use_bottleneck): + + with pd.option_context('use_bottleneck', use_bottleneck): + # GH 6915 + # overflowing on the smaller int dtypes + for dtype in ['int32', 'int64']: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + result = s.sum(skipna=False) + assert int(result) == v.sum(dtype='int64') + result = s.min(skipna=False) + assert int(result) == 0 + result = s.max(skipna=False) + assert int(result) == v[-1] + + for dtype in ['float32', 'float64']: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + result = s.sum(skipna=False) + assert result == v.sum(dtype=dtype) + result = s.min(skipna=False) + assert np.allclose(float(result), 0.0) + result = s.max(skipna=False) + assert np.allclose(float(result), v[-1]) def test_sum(self): - self._check_stat_op('sum', np.sum, check_allna=True) + self._check_stat_op('sum', np.sum, check_allna=False) def test_sum_inf(self): - import pandas.core.nanops as nanops - s = Series(np.random.randn(10)) s2 = s.copy() s[5:8] = np.inf s2[5:8] = np.nan - self.assertTrue(np.isinf(s.sum())) + assert np.isinf(s.sum()) arr = np.random.randn(100, 100).astype('f4') arr[:, 2] = np.inf - with cf.option_context("mode.use_inf_as_null", True): + with pd.option_context("mode.use_inf_as_na", True): assert_almost_equal(s.sum(), s2.sum()) res = nanops.nansum(arr, axis=1) - self.assertTrue(np.isinf(res).all()) + assert np.isinf(res).all() def test_mean(self): self._check_stat_op('mean', np.mean) @@ -123,17 +223,17 @@ def test_median(self): # test with integers, test failure int_ts = Series(np.ones(10, dtype=int), index=lrange(10)) - self.assertAlmostEqual(np.median(int_ts), int_ts.median()) + tm.assert_almost_equal(np.median(int_ts), int_ts.median()) def test_mode(self): # No mode should be found. exp = Series([], dtype=np.float64) tm.assert_series_equal(Series([]).mode(), exp) - exp = Series([], dtype=np.int64) + exp = Series([1], dtype=np.int64) tm.assert_series_equal(Series([1]).mode(), exp) - exp = Series([], dtype=np.object) + exp = Series(['a', 'b', 'c'], dtype=np.object) tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp) # Test numerical data types. @@ -169,7 +269,8 @@ def test_mode(self): tm.assert_series_equal(s.mode(), exp) # Test datetime types. - exp = Series([], dtype="M8[ns]") + exp = Series(['1900-05-03', '2011-01-03', + '2013-01-02'], dtype='M8[ns]') s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(s.mode(), exp) @@ -180,7 +281,7 @@ def test_mode(self): tm.assert_series_equal(s.mode(), exp) # gh-5986: Test timedelta types. - exp = Series([], dtype='timedelta64[ns]') + exp = Series(['-1 days', '0 days', '1 days'], dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(s.mode(), exp) @@ -200,13 +301,13 @@ def test_mode(self): s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(s.mode(), exp) - exp = Series([], dtype=np.uint64) + exp = Series([1, 2**63], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(s.mode(), exp) # Test category dtype. c = Categorical([1, 2]) - exp = Categorical([], categories=[1, 2]) + exp = Categorical([1, 2], categories=[1, 2]) exp = Series(exp, dtype='category') tm.assert_series_equal(Series(c).mode(), exp) @@ -247,10 +348,10 @@ def test_var_std(self): # 1 - element series with ddof=1 s = self.ts.iloc[[0]] result = s.var(ddof=1) - self.assertTrue(isnull(result)) + assert isna(result) result = s.std(ddof=1) - self.assertTrue(isnull(result)) + assert isna(result) def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) @@ -264,11 +365,10 @@ def test_sem(self): # 1 - element series with ddof=1 s = self.ts.iloc[[0]] result = s.sem(ddof=1) - self.assertTrue(isnull(result)) + assert isna(result) + @td.skip_if_no_scipy def test_skew(self): - tm._skip_if_no_scipy() - from scipy.stats import skew alt = lambda x: skew(x, bias=False) self._check_stat_op('skew', alt) @@ -280,15 +380,14 @@ def test_skew(self): s = Series(np.ones(i)) df = DataFrame(np.ones((i, i))) if i < min_N: - self.assertTrue(np.isnan(s.skew())) - self.assertTrue(np.isnan(df.skew()).all()) + assert np.isnan(s.skew()) + assert np.isnan(df.skew()).all() else: - self.assertEqual(0, s.skew()) - self.assertTrue((df.skew() == 0).all()) + assert 0 == s.skew() + assert (df.skew() == 0).all() + @td.skip_if_no_scipy def test_kurt(self): - tm._skip_if_no_scipy() - from scipy.stats import kurtosis alt = lambda x: kurtosis(x, bias=False) self._check_stat_op('kurt', alt) @@ -297,7 +396,7 @@ def test_kurt(self): labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) - self.assertAlmostEqual(s.kurt(), s.kurt(level=0)['bar']) + tm.assert_almost_equal(s.kurt(), s.kurt(level=0)['bar']) # test corner cases, kurt() returns NaN unless there's at least 4 # values @@ -306,11 +405,11 @@ def test_kurt(self): s = Series(np.ones(i)) df = DataFrame(np.ones((i, i))) if i < min_N: - self.assertTrue(np.isnan(s.kurt())) - self.assertTrue(np.isnan(df.kurt()).all()) + assert np.isnan(s.kurt()) + assert np.isnan(df.kurt()).all() else: - self.assertEqual(0, s.kurt()) - self.assertTrue((df.kurt() == 0).all()) + assert 0 == s.kurt() + assert (df.kurt() == 0).all() def test_describe(self): s = Series([0, 1, 2, 3, 4], name='int_data') @@ -319,31 +418,31 @@ def test_describe(self): name='int_data', index=['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = Series([True, True, False, False, False], name='bool_data') result = s.describe() expected = Series([5, 2, False, 3], name='bool_data', index=['count', 'unique', 'top', 'freq']) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = Series(['a', 'a', 'b', 'c', 'd'], name='str_data') result = s.describe() expected = Series([5, 4, 'a', 2], name='str_data', index=['count', 'unique', 'top', 'freq']) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_argsort(self): self._check_accum_op('argsort', check_dtype=False) argsorted = self.ts.argsort() - self.assertTrue(issubclass(argsorted.dtype.type, np.integer)) + assert issubclass(argsorted.dtype.type, np.integer) # GH 2967 (introduced bug in 0.11-dev I think) s = Series([Timestamp('201301%02d' % (i + 1)) for i in range(5)]) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' shifted = s.shift(-1) - self.assertEqual(shifted.dtype, 'datetime64[ns]') - self.assertTrue(isnull(shifted[4])) + assert shifted.dtype == 'datetime64[ns]' + assert isna(shifted[4]) result = s.argsort() expected = Series(lrange(5), dtype='int64') @@ -361,11 +460,12 @@ def test_argsort_stable(self): mexpected = np.argsort(s.values, kind='mergesort') qexpected = np.argsort(s.values, kind='quicksort') - self.assert_series_equal(mindexer, Series(mexpected), - check_dtype=False) - self.assert_series_equal(qindexer, Series(qexpected), - check_dtype=False) - self.assertFalse(np.array_equal(qindexer, mindexer)) + tm.assert_series_equal(mindexer, Series(mexpected), + check_dtype=False) + tm.assert_series_equal(qindexer, Series(qexpected), + check_dtype=False) + pytest.raises(AssertionError, tm.assert_numpy_array_equal, + qindexer, mindexer) def test_cumsum(self): self._check_accum_op('cumsum') @@ -374,24 +474,24 @@ def test_cumprod(self): self._check_accum_op('cumprod') def test_cummin(self): - self.assert_numpy_array_equal(self.ts.cummin().values, - np.minimum.accumulate(np.array(self.ts))) + tm.assert_numpy_array_equal(self.ts.cummin().values, + np.minimum.accumulate(np.array(self.ts))) ts = self.ts.copy() ts[::2] = np.NaN result = ts.cummin()[1::2] - expected = np.minimum.accumulate(ts.valid()) + expected = np.minimum.accumulate(ts.dropna()) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_cummax(self): - self.assert_numpy_array_equal(self.ts.cummax().values, - np.maximum.accumulate(np.array(self.ts))) + tm.assert_numpy_array_equal(self.ts.cummax().values, + np.maximum.accumulate(np.array(self.ts))) ts = self.ts.copy() ts[::2] = np.NaN result = ts.cummax()[1::2] - expected = np.maximum.accumulate(ts.valid()) + expected = np.maximum.accumulate(ts.dropna()) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_cummin_datetime64(self): s = pd.Series(pd.to_datetime(['NaT', '2000-1-2', 'NaT', '2000-1-1', @@ -400,13 +500,13 @@ def test_cummin_datetime64(self): expected = pd.Series(pd.to_datetime(['NaT', '2000-1-2', 'NaT', '2000-1-1', 'NaT', '2000-1-1'])) result = s.cummin(skipna=True) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) expected = pd.Series(pd.to_datetime( ['NaT', '2000-1-2', '2000-1-2', '2000-1-1', '2000-1-1', '2000-1-1' ])) result = s.cummin(skipna=False) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) def test_cummax_datetime64(self): s = pd.Series(pd.to_datetime(['NaT', '2000-1-2', 'NaT', '2000-1-1', @@ -415,13 +515,13 @@ def test_cummax_datetime64(self): expected = pd.Series(pd.to_datetime(['NaT', '2000-1-2', 'NaT', '2000-1-2', 'NaT', '2000-1-3'])) result = s.cummax(skipna=True) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) expected = pd.Series(pd.to_datetime( ['NaT', '2000-1-2', '2000-1-2', '2000-1-2', '2000-1-2', '2000-1-3' ])) result = s.cummax(skipna=False) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) def test_cummin_timedelta64(self): s = pd.Series(pd.to_timedelta(['NaT', @@ -438,7 +538,7 @@ def test_cummin_timedelta64(self): 'NaT', '1 min', ])) result = s.cummin(skipna=True) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) expected = pd.Series(pd.to_timedelta(['NaT', '2 min', @@ -447,7 +547,7 @@ def test_cummin_timedelta64(self): '1 min', '1 min', ])) result = s.cummin(skipna=False) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) def test_cummax_timedelta64(self): s = pd.Series(pd.to_timedelta(['NaT', @@ -464,7 +564,7 @@ def test_cummax_timedelta64(self): 'NaT', '3 min', ])) result = s.cummax(skipna=True) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) expected = pd.Series(pd.to_timedelta(['NaT', '2 min', @@ -473,7 +573,7 @@ def test_cummax_timedelta64(self): '2 min', '3 min', ])) result = s.cummax(skipna=False) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) def test_npdiff(self): pytest.skip("skipping due to Series no longer being an " @@ -487,9 +587,8 @@ def test_npdiff(self): def _check_stat_op(self, name, alternate, check_objects=False, check_allna=False): - import pandas.core.nanops as nanops - def testit(): + with pd.option_context('use_bottleneck', False): f = getattr(Series, name) # add some NaNs @@ -498,11 +597,11 @@ def testit(): # idxmax, idxmin, min, and max are valid for dates if name not in ['max', 'min']: ds = Series(date_range('1/1/2001', periods=10)) - self.assertRaises(TypeError, f, ds) + pytest.raises(TypeError, f, ds) # skipna or no - self.assertTrue(notnull(f(self.series))) - self.assertTrue(isnull(f(self.series, skipna=False))) + assert notna(f(self.series)) + assert isna(f(self.series, skipna=False)) # check the result is correct nona = self.series.dropna() @@ -512,15 +611,7 @@ def testit(): allna = self.series * nan if check_allna: - # xref 9422 - # bottleneck >= 1.0 give 0.0 for an allna Series sum - try: - self.assertTrue(nanops._USE_BOTTLENECK) - import bottleneck as bn # noqa - self.assertTrue(bn.__version__ >= LooseVersion('1.0')) - self.assertEqual(f(allna), 0.0) - except: - self.assertTrue(np.isnan(f(allna))) + assert np.isnan(f(allna)) # dtype=object with None, it works! s = Series([1, 2, 3, None, 5]) @@ -537,45 +628,35 @@ def testit(): s = Series(bdate_range('1/1/2000', periods=10)) res = f(s) exp = alternate(s) - self.assertEqual(res, exp) + assert res == exp # check on string data if name not in ['sum', 'min', 'max']: - self.assertRaises(TypeError, f, Series(list('abc'))) + pytest.raises(TypeError, f, Series(list('abc'))) # Invalid axis. - self.assertRaises(ValueError, f, self.series, axis=1) + pytest.raises(ValueError, f, self.series, axis=1) # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: - self.assertRaisesRegexp(NotImplementedError, name, f, - self.series, numeric_only=True) - - testit() - - try: - import bottleneck as bn # noqa - nanops._USE_BOTTLENECK = False - testit() - nanops._USE_BOTTLENECK = True - except ImportError: - pass + tm.assert_raises_regex(NotImplementedError, name, f, + self.series, numeric_only=True) def _check_accum_op(self, name, check_dtype=True): func = getattr(np, name) - self.assert_numpy_array_equal(func(self.ts).values, - func(np.array(self.ts)), - check_dtype=check_dtype) + tm.assert_numpy_array_equal(func(self.ts).values, + func(np.array(self.ts)), + check_dtype=check_dtype) # with missing values ts = self.ts.copy() ts[::2] = np.NaN result = func(ts)[1::2] - expected = func(np.array(ts.valid())) + expected = func(np.array(ts.dropna())) - self.assert_numpy_array_equal(result.values, expected, - check_dtype=False) + tm.assert_numpy_array_equal(result.values, expected, + check_dtype=False) def test_compress(self): cond = [True, False, True, False, False] @@ -594,12 +675,12 @@ def test_numpy_compress(self): tm.assert_series_equal(np.compress(cond, s), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.compress, - cond, s, axis=1) + tm.assert_raises_regex(ValueError, msg, np.compress, + cond, s, axis=1) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.compress, - cond, s, out=s) + tm.assert_raises_regex(ValueError, msg, np.compress, + cond, s, out=s) def test_round(self): self.ts.index.name = "index_name" @@ -607,7 +688,7 @@ def test_round(self): expected = Series(np.round(self.ts.values, 2), index=self.ts.index, name='ts') assert_series_equal(result, expected) - self.assertEqual(result.name, self.ts.name) + assert result.name == self.ts.name def test_numpy_round(self): # See gh-12600 @@ -617,47 +698,48 @@ def test_numpy_round(self): assert_series_equal(out, expected) msg = "the 'out' parameter is not supported" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): np.round(s, decimals=0, out=s) def test_built_in_round(self): if not compat.PY3: pytest.skip( - 'build in round cannot be overriden prior to Python 3') + 'build in round cannot be overridden prior to Python 3') s = Series([1.123, 2.123, 3.123], index=lrange(3)) result = round(s) expected_rounded0 = Series([1., 2., 3.], index=lrange(3)) - self.assert_series_equal(result, expected_rounded0) + tm.assert_series_equal(result, expected_rounded0) decimals = 2 expected_rounded = Series([1.12, 2.12, 3.12], index=lrange(3)) result = round(s, decimals) - self.assert_series_equal(result, expected_rounded) + tm.assert_series_equal(result, expected_rounded) def test_prod_numpy16_bug(self): s = Series([1., 1., 1.], index=lrange(3)) result = s.prod() - self.assertNotIsInstance(result, Series) + + assert not isinstance(result, Series) def test_all_any(self): ts = tm.makeTimeSeries() bool_series = ts > 0 - self.assertFalse(bool_series.all()) - self.assertTrue(bool_series.any()) + assert not bool_series.all() + assert bool_series.any() # Alternative types, with implicit 'object' dtype. s = Series(['abc', True]) - self.assertEqual('abc', s.any()) # 'abc' || True => 'abc' + assert 'abc' == s.any() # 'abc' || True => 'abc' def test_all_any_params(self): # Check skipna, with implicit 'object' dtype. s1 = Series([np.nan, True]) s2 = Series([np.nan, False]) - self.assertTrue(s1.all(skipna=False)) # nan && True => True - self.assertTrue(s1.all(skipna=True)) - self.assertTrue(np.isnan(s2.any(skipna=False))) # nan || False => nan - self.assertFalse(s2.any(skipna=True)) + assert s1.all(skipna=False) # nan && True => True + assert s1.all(skipna=True) + assert np.isnan(s2.any(skipna=False)) # nan || False => nan + assert not s2.any(skipna=True) # Check level. s = pd.Series([False, False, True, True, False, True], @@ -666,12 +748,12 @@ def test_all_any_params(self): assert_series_equal(s.any(level=0), Series([False, True, True])) # bool_only is not implemented with level option. - self.assertRaises(NotImplementedError, s.any, bool_only=True, level=0) - self.assertRaises(NotImplementedError, s.all, bool_only=True, level=0) + pytest.raises(NotImplementedError, s.any, bool_only=True, level=0) + pytest.raises(NotImplementedError, s.all, bool_only=True, level=0) # bool_only is not implemented alone. - self.assertRaises(NotImplementedError, s.any, bool_only=True) - self.assertRaises(NotImplementedError, s.all, bool_only=True) + pytest.raises(NotImplementedError, s.any, bool_only=True) + pytest.raises(NotImplementedError, s.all, bool_only=True) def test_modulo(self): with np.errstate(all='ignore'): @@ -696,7 +778,7 @@ def test_modulo(self): p = p.astype('float64') result = p['first'] % p['second'] result2 = p['second'] % p['first'] - self.assertFalse(np.array_equal(result, result2)) + assert not result.equals(result2) # GH 9144 s = Series([0, 1]) @@ -709,65 +791,38 @@ def test_modulo(self): expected = Series([nan, 0.0]) assert_series_equal(result, expected) - def test_ops_consistency_on_empty(self): - - # GH 7869 - # consistency on empty - - # float - result = Series(dtype=float).sum() - self.assertEqual(result, 0) - - result = Series(dtype=float).mean() - self.assertTrue(isnull(result)) - - result = Series(dtype=float).median() - self.assertTrue(isnull(result)) - - # timedelta64[ns] - result = Series(dtype='m8[ns]').sum() - self.assertEqual(result, Timedelta(0)) - - result = Series(dtype='m8[ns]').mean() - self.assertTrue(result is pd.NaT) - - result = Series(dtype='m8[ns]').median() - self.assertTrue(result is pd.NaT) - + @td.skip_if_no_scipy def test_corr(self): - tm._skip_if_no_scipy() - import scipy.stats as stats # full overlap - self.assertAlmostEqual(self.ts.corr(self.ts), 1) + tm.assert_almost_equal(self.ts.corr(self.ts), 1) # partial overlap - self.assertAlmostEqual(self.ts[:15].corr(self.ts[5:]), 1) + tm.assert_almost_equal(self.ts[:15].corr(self.ts[5:]), 1) - self.assertTrue(isnull(self.ts[:15].corr(self.ts[5:], min_periods=12))) + assert isna(self.ts[:15].corr(self.ts[5:], min_periods=12)) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) - self.assertTrue(isnull(ts1.corr(ts2, min_periods=12))) + assert isna(ts1.corr(ts2, min_periods=12)) # No overlap - self.assertTrue(np.isnan(self.ts[::2].corr(self.ts[1::2]))) + assert np.isnan(self.ts[::2].corr(self.ts[1::2])) # all NA cp = self.ts[:10].copy() cp[:] = np.nan - self.assertTrue(isnull(cp.corr(cp))) + assert isna(cp.corr(cp)) A = tm.makeTimeSeries() B = tm.makeTimeSeries() result = A.corr(B) expected, _ = stats.pearsonr(A, B) - self.assertAlmostEqual(result, expected) + tm.assert_almost_equal(result, expected) + @td.skip_if_no_scipy def test_corr_rank(self): - tm._skip_if_no_scipy() - import scipy import scipy.stats as stats @@ -777,14 +832,14 @@ def test_corr_rank(self): A[-5:] = A[:5] result = A.corr(B, method='kendall') expected = stats.kendalltau(A, B)[0] - self.assertAlmostEqual(result, expected) + tm.assert_almost_equal(result, expected) result = A.corr(B, method='spearman') expected = stats.spearmanr(A, B)[0] - self.assertAlmostEqual(result, expected) + tm.assert_almost_equal(result, expected) # these methods got rewritten in 0.8 - if scipy.__version__ < LooseVersion('0.9'): + if LooseVersion(scipy.__version__) < LooseVersion('0.9'): pytest.skip("skipping corr rank because of scipy version " "{0}".format(scipy.__version__)) @@ -797,38 +852,38 @@ def test_corr_rank(self): 1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375]) kexp = 0.4319297 sexp = 0.5853767 - self.assertAlmostEqual(A.corr(B, method='kendall'), kexp) - self.assertAlmostEqual(A.corr(B, method='spearman'), sexp) + tm.assert_almost_equal(A.corr(B, method='kendall'), kexp) + tm.assert_almost_equal(A.corr(B, method='spearman'), sexp) def test_cov(self): # full overlap - self.assertAlmostEqual(self.ts.cov(self.ts), self.ts.std() ** 2) + tm.assert_almost_equal(self.ts.cov(self.ts), self.ts.std() ** 2) # partial overlap - self.assertAlmostEqual(self.ts[:15].cov(self.ts[5:]), + tm.assert_almost_equal(self.ts[:15].cov(self.ts[5:]), self.ts[5:15].std() ** 2) # No overlap - self.assertTrue(np.isnan(self.ts[::2].cov(self.ts[1::2]))) + assert np.isnan(self.ts[::2].cov(self.ts[1::2])) # all NA cp = self.ts[:10].copy() cp[:] = np.nan - self.assertTrue(isnull(cp.cov(cp))) + assert isna(cp.cov(cp)) # min_periods - self.assertTrue(isnull(self.ts[:15].cov(self.ts[5:], min_periods=12))) + assert isna(self.ts[:15].cov(self.ts[5:], min_periods=12)) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) - self.assertTrue(isnull(ts1.cov(ts2, min_periods=12))) + assert isna(ts1.cov(ts2, min_periods=12)) def test_count(self): - self.assertEqual(self.ts.count(), len(self.ts)) + assert self.ts.count() == len(self.ts) self.ts[::2] = np.NaN - self.assertEqual(self.ts.count(), np.isfinite(self.ts).sum()) + assert self.ts.count() == np.isfinite(self.ts).sum() mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) @@ -856,15 +911,15 @@ def test_dot(self): # Check ndarray argument result = a.dot(b.values) - self.assertTrue(np.all(result == expected.values)) + assert np.all(result == expected.values) assert_almost_equal(a.dot(b['2'].values), expected['2']) # Check series argument assert_almost_equal(a.dot(b['1']), expected['1']) assert_almost_equal(a.dot(b2['1']), expected['1']) - self.assertRaises(Exception, a.dot, a.values[:3]) - self.assertRaises(ValueError, a.dot, b.T) + pytest.raises(Exception, a.dot, a.values[:3]) + pytest.raises(ValueError, a.dot, b.T) def test_value_counts_nunique(self): @@ -873,7 +928,13 @@ def test_value_counts_nunique(self): series[20:500] = np.nan series[10:20] = 5000 result = series.nunique() - self.assertEqual(result, 11) + assert result == 11 + + # GH 18051 + s = pd.Series(pd.Categorical([])) + assert s.nunique() == 0 + s = pd.Series(pd.Categorical([np.nan])) + assert s.nunique() == 0 def test_unique(self): @@ -881,213 +942,136 @@ def test_unique(self): s = Series([1.2345] * 100) s[::2] = np.nan result = s.unique() - self.assertEqual(len(result), 2) + assert len(result) == 2 s = Series([1.2345] * 100, dtype='f4') s[::2] = np.nan result = s.unique() - self.assertEqual(len(result), 2) + assert len(result) == 2 # NAs in object arrays #714 s = Series(['foo'] * 100, dtype='O') s[::2] = np.nan result = s.unique() - self.assertEqual(len(result), 2) + assert len(result) == 2 # decision about None s = Series([1, 2, 3, None, None, None], dtype=object) result = s.unique() expected = np.array([1, 2, 3, None], dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_drop_duplicates(self): - # check both int and object - for s in [Series([1, 2, 3, 3]), Series(['1', '2', '3', '3'])]: - expected = Series([False, False, False, True]) - assert_series_equal(s.duplicated(), expected) - assert_series_equal(s.drop_duplicates(), s[~expected]) - sc = s.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, s[~expected]) - - expected = Series([False, False, True, False]) - assert_series_equal(s.duplicated(keep='last'), expected) - assert_series_equal(s.drop_duplicates(keep='last'), s[~expected]) - sc = s.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, s[~expected]) - - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.drop_duplicates(take_last=True), s[~expected]) - sc = s.copy() - with tm.assert_produces_warning(FutureWarning): - sc.drop_duplicates(take_last=True, inplace=True) - assert_series_equal(sc, s[~expected]) - - expected = Series([False, False, True, True]) - assert_series_equal(s.duplicated(keep=False), expected) - assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) - sc = s.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, s[~expected]) - - for s in [Series([1, 2, 3, 5, 3, 2, 4]), - Series(['1', '2', '3', '5', '3', '2', '4'])]: - expected = Series([False, False, False, False, True, True, False]) - assert_series_equal(s.duplicated(), expected) - assert_series_equal(s.drop_duplicates(), s[~expected]) - sc = s.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, s[~expected]) - - expected = Series([False, True, True, False, False, False, False]) - assert_series_equal(s.duplicated(keep='last'), expected) - assert_series_equal(s.drop_duplicates(keep='last'), s[~expected]) - sc = s.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, s[~expected]) - - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.drop_duplicates(take_last=True), s[~expected]) - sc = s.copy() - with tm.assert_produces_warning(FutureWarning): - sc.drop_duplicates(take_last=True, inplace=True) - assert_series_equal(sc, s[~expected]) - - expected = Series([False, True, True, False, True, True, False]) - assert_series_equal(s.duplicated(keep=False), expected) - assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) - sc = s.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, s[~expected]) - - def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.ts[::2] = np.nan - self.ts[:10][::3] = 4. - - ranks = self.ts.rank() - oranks = self.ts.astype('O').rank() - - assert_series_equal(ranks, oranks) - - mask = np.isnan(self.ts) - filled = self.ts.fillna(np.inf) - - # rankdata returns a ndarray - exp = Series(rankdata(filled), index=filled.index, name='ts') - exp[mask] = np.nan - - tm.assert_series_equal(ranks, exp) - - iseries = Series(np.arange(5).repeat(2)) - - iranks = iseries.rank() - exp = iseries.astype(float).rank() - assert_series_equal(iranks, exp) - iseries = Series(np.arange(5)) + 1.0 - exp = iseries / 5.0 - iranks = iseries.rank(pct=True) - - assert_series_equal(iranks, exp) - - iseries = Series(np.repeat(1, 100)) - exp = Series(np.repeat(0.505, 100)) - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries[1] = np.nan - exp = Series(np.repeat(50.0 / 99.0, 100)) - exp[1] = np.nan - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.arange(5)) + 1.0 - iseries[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.repeat(np.nan, 100)) - exp = iseries.copy() - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.arange(5)) + 1 - iseries[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - rng = date_range('1/1/1990', periods=5) - iseries = Series(np.arange(5), rng) + 1 - iseries.iloc[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) - exp = Series([2, 1, 3, 5, 4, 6.0]) - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - # GH 5968 - iseries = Series(['3 day', '1 day 10m', '-2 day', pd.NaT], - dtype='m8[ns]') - exp = Series([3, 2, 1, np.nan]) - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - values = np.array( - [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40 - ], dtype='float64') - random_order = np.random.permutation(len(values)) - iseries = Series(values[random_order]) - exp = Series(random_order + 1.0, dtype='float64') - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - def test_rank_signature(self): - s = Series([0, 1]) - s.rank(method='average') - self.assertRaises(ValueError, s.rank, 'average') - - def test_rank_inf(self): - pytest.skip('DataFrame.rank does not currently rank ' - 'np.inf and -np.inf properly') - - values = np.array( - [-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, - 2, 40, np.inf], dtype='float64') - random_order = np.random.permutation(len(values)) - iseries = Series(values[random_order]) - exp = Series(random_order + 1.0, dtype='float64') - iranks = iseries.rank() - assert_series_equal(iranks, exp) + tm.assert_numpy_array_equal(result, expected) + + # GH 18051 + s = pd.Series(pd.Categorical([])) + tm.assert_categorical_equal(s.unique(), pd.Categorical([]), + check_dtype=False) + s = pd.Series(pd.Categorical([np.nan])) + tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]), + check_dtype=False) + + @pytest.mark.parametrize( + "tc1, tc2", + [ + ( + Series([1, 2, 3, 3], dtype=np.dtype('int_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_')) + ), + ( + Series([1, 2, 3, 3], dtype=np.dtype('uint')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint')) + ), + ( + Series([1, 2, 3, 3], dtype=np.dtype('float_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_')) + ), + ( + Series([1, 2, 3, 3], dtype=np.dtype('unicode_')), + Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_')) + ) + ] + ) + def test_drop_duplicates_non_bool(self, tc1, tc2): + # Test case 1 + expected = Series([False, False, False, True]) + assert_series_equal(tc1.duplicated(), expected) + assert_series_equal(tc1.drop_duplicates(), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, tc1[~expected]) + + expected = Series([False, False, True, False]) + assert_series_equal(tc1.duplicated(keep='last'), expected) + assert_series_equal(tc1.drop_duplicates(keep='last'), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, tc1[~expected]) + + expected = Series([False, False, True, True]) + assert_series_equal(tc1.duplicated(keep=False), expected) + assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, tc1[~expected]) + + # Test case 2 + expected = Series([False, False, False, False, True, True, False]) + assert_series_equal(tc2.duplicated(), expected) + assert_series_equal(tc2.drop_duplicates(), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, tc2[~expected]) + + expected = Series([False, True, True, False, False, False, False]) + assert_series_equal(tc2.duplicated(keep='last'), expected) + assert_series_equal(tc2.drop_duplicates(keep='last'), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, tc2[~expected]) + + expected = Series([False, True, True, False, True, True, False]) + assert_series_equal(tc2.duplicated(keep=False), expected) + assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, tc2[~expected]) + + def test_drop_duplicates_bool(self): + tc = Series([True, False, True, False]) + + expected = Series([False, False, True, True]) + assert_series_equal(tc.duplicated(), expected) + assert_series_equal(tc.drop_duplicates(), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(inplace=True) + assert_series_equal(sc, tc[~expected]) + + expected = Series([True, True, False, False]) + assert_series_equal(tc.duplicated(keep='last'), expected) + assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep='last', inplace=True) + assert_series_equal(sc, tc[~expected]) + + expected = Series([True, True, True, True]) + assert_series_equal(tc.duplicated(keep=False), expected) + assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep=False, inplace=True) + assert_series_equal(sc, tc[~expected]) def test_clip(self): val = self.ts.median() - self.assertEqual(self.ts.clip_lower(val).min(), val) - self.assertEqual(self.ts.clip_upper(val).max(), val) + assert self.ts.clip_lower(val).min() == val + assert self.ts.clip_upper(val).max() == val - self.assertEqual(self.ts.clip(lower=val).min(), val) - self.assertEqual(self.ts.clip(upper=val).max(), val) + assert self.ts.clip(lower=val).min() == val + assert self.ts.clip(upper=val).max() == val result = self.ts.clip(-0.5, 0.5) expected = np.clip(self.ts, -0.5, 0.5) assert_series_equal(result, expected) - tm.assertIsInstance(expected, Series) + assert isinstance(expected, Series) def test_clip_types_and_nulls(self): @@ -1099,10 +1083,21 @@ def test_clip_types_and_nulls(self): thresh = s[2] l = s.clip_lower(thresh) u = s.clip_upper(thresh) - self.assertEqual(l[notnull(l)].min(), thresh) - self.assertEqual(u[notnull(u)].max(), thresh) - self.assertEqual(list(isnull(s)), list(isnull(l))) - self.assertEqual(list(isnull(s)), list(isnull(u))) + assert l[notna(l)].min() == thresh + assert u[notna(u)].max() == thresh + assert list(isna(s)) == list(isna(l)) + assert list(isna(s)) == list(isna(u)) + + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH # 17276 + s = Series([1, 2, 3]) + + assert_series_equal(s.clip(np.nan), Series([1, 2, 3])) + assert_series_equal(s.clip(upper=[1, 1, np.nan]), Series([1, 2, 3])) + assert_series_equal(s.clip(lower=[1, np.nan, 1]), Series([1, 2, 3])) + assert_series_equal(s.clip(upper=np.nan, lower=np.nan), + Series([1, 2, 3])) def test_clip_against_series(self): # GH #6966 @@ -1115,20 +1110,33 @@ def test_clip_against_series(self): lower = Series([1.0, 2.0, 3.0]) upper = Series([1.5, 2.5, 3.5]) + assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])]) + def test_clip_against_list_like(self, inplace, upper): + # GH #15390 + original = pd.Series([5, 6, 7]) + result = original.clip(upper=upper, inplace=inplace) + expected = pd.Series([1, 2, 3]) + + if inplace: + result = original + tm.assert_series_equal(result, expected, check_exact=True) + def test_clip_with_datetimes(self): # GH 11838 # naive and tz-aware datetimes t = Timestamp('2015-12-01 09:30:30') - s = Series([Timestamp('2015-12-01 09:30:00'), Timestamp( - '2015-12-01 09:31:00')]) + s = Series([Timestamp('2015-12-01 09:30:00'), + Timestamp('2015-12-01 09:31:00')]) result = s.clip(upper=t) - expected = Series([Timestamp('2015-12-01 09:30:00'), Timestamp( - '2015-12-01 09:30:30')]) + expected = Series([Timestamp('2015-12-01 09:30:00'), + Timestamp('2015-12-01 09:30:30')]) assert_series_equal(result, expected) t = Timestamp('2015-12-01 09:30:30', tz='US/Eastern') @@ -1183,13 +1191,25 @@ def test_isin(self): expected = Series([True, False, True, False, False, False, True, True]) assert_series_equal(result, expected) + # GH: 16012 + # This specific issue has to have a series over 1e6 in len, but the + # comparison array (in_list) must be large enough so that numpy doesn't + # do a manual masking trick that will avoid this issue altogether + s = Series(list('abcdefghijk' * 10 ** 5)) + # If numpy doesn't do the manual comparison/mask, these + # unorderable mixed types are what cause the exception in numpy + in_list = [-1, 'a', 'b', 'G', 'Y', 'Z', 'E', + 'K', 'E', 'S', 'I', 'R', 'R'] * 6 + + assert s.isin(in_list).sum() == 200000 + def test_isin_with_string_scalar(self): # GH4763 s = Series(['A', 'B', 'C', 'a', 'B', 'B', 'A', 'C']) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): s.isin('a') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): s = Series(['aaa', 'b', 'c']) s.isin('aaa') @@ -1226,6 +1246,15 @@ def test_isin_with_i8(self): result = s.isin(s[0:2]) assert_series_equal(result, expected) + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + s = Series(["a", "b"]) + expected = Series([False, False]) + + result = s.isin(empty) + tm.assert_series_equal(expected, result) + def test_timedelta64_analytics(self): from pandas import date_range @@ -1234,20 +1263,20 @@ def test_timedelta64_analytics(self): Timestamp('20120101') result = td.idxmin() - self.assertEqual(result, 0) + assert result == 0 result = td.idxmax() - self.assertEqual(result, 2) + assert result == 2 # GH 2982 # with NaT td[0] = np.nan result = td.idxmin() - self.assertEqual(result, 1) + assert result == 1 result = td.idxmax() - self.assertEqual(result, 2) + assert result == 2 # abs s1 = Series(date_range('20120101', periods=3)) @@ -1264,145 +1293,173 @@ def test_timedelta64_analytics(self): # max/min result = td.max() expected = Timedelta('2 days') - self.assertEqual(result, expected) + assert result == expected result = td.min() expected = Timedelta('1 days') - self.assertEqual(result, expected) + assert result == expected def test_idxmin(self): # test idxmin - # _check_stat_op approach can not be used here because of isnull check. + # _check_stat_op approach can not be used here because of isna check. # add some NaNs self.series[5:15] = np.NaN # skipna or no - self.assertEqual(self.series[self.series.idxmin()], self.series.min()) - self.assertTrue(isnull(self.series.idxmin(skipna=False))) + assert self.series[self.series.idxmin()] == self.series.min() + assert isna(self.series.idxmin(skipna=False)) # no NaNs nona = self.series.dropna() - self.assertEqual(nona[nona.idxmin()], nona.min()) - self.assertEqual(nona.index.values.tolist().index(nona.idxmin()), - nona.values.argmin()) + assert nona[nona.idxmin()] == nona.min() + assert (nona.index.values.tolist().index(nona.idxmin()) == + nona.values.argmin()) # all NaNs allna = self.series * nan - self.assertTrue(isnull(allna.idxmin())) + assert isna(allna.idxmin()) # datetime64[ns] from pandas import date_range s = Series(date_range('20130102', periods=6)) result = s.idxmin() - self.assertEqual(result, 0) + assert result == 0 s[0] = np.nan result = s.idxmin() - self.assertEqual(result, 1) + assert result == 1 + + def test_numpy_argmin_deprecated(self): + # See gh-16830 + data = np.arange(1, 11) + + s = Series(data, index=data) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # The deprecation of Series.argmin also causes a deprecation + # warning when calling np.argmin. This behavior is temporary + # until the implementation of Series.argmin is corrected. + result = np.argmin(s) + + assert result == 1 + + with tm.assert_produces_warning(FutureWarning): + # argmin is aliased to idxmin + result = s.argmin() - def test_numpy_argmin(self): - # argmin is aliased to idxmin - data = np.random.randint(0, 11, size=10) - result = np.argmin(Series(data)) - self.assertEqual(result, np.argmin(data)) + assert result == 1 if not _np_version_under1p10: - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argmin, - Series(data), out=data) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argmin, + s, out=data) def test_idxmax(self): # test idxmax - # _check_stat_op approach can not be used here because of isnull check. + # _check_stat_op approach can not be used here because of isna check. # add some NaNs self.series[5:15] = np.NaN # skipna or no - self.assertEqual(self.series[self.series.idxmax()], self.series.max()) - self.assertTrue(isnull(self.series.idxmax(skipna=False))) + assert self.series[self.series.idxmax()] == self.series.max() + assert isna(self.series.idxmax(skipna=False)) # no NaNs nona = self.series.dropna() - self.assertEqual(nona[nona.idxmax()], nona.max()) - self.assertEqual(nona.index.values.tolist().index(nona.idxmax()), - nona.values.argmax()) + assert nona[nona.idxmax()] == nona.max() + assert (nona.index.values.tolist().index(nona.idxmax()) == + nona.values.argmax()) # all NaNs allna = self.series * nan - self.assertTrue(isnull(allna.idxmax())) + assert isna(allna.idxmax()) from pandas import date_range s = Series(date_range('20130102', periods=6)) result = s.idxmax() - self.assertEqual(result, 5) + assert result == 5 s[5] = np.nan result = s.idxmax() - self.assertEqual(result, 4) + assert result == 4 # Float64Index # GH 5914 s = pd.Series([1, 2, 3], [1.1, 2.1, 3.1]) result = s.idxmax() - self.assertEqual(result, 3.1) + assert result == 3.1 result = s.idxmin() - self.assertEqual(result, 1.1) + assert result == 1.1 s = pd.Series(s.index, s.index) result = s.idxmax() - self.assertEqual(result, 3.1) + assert result == 3.1 result = s.idxmin() - self.assertEqual(result, 1.1) + assert result == 1.1 - def test_numpy_argmax(self): + def test_numpy_argmax_deprecated(self): + # See gh-16830 + data = np.arange(1, 11) - # argmax is aliased to idxmax - data = np.random.randint(0, 11, size=10) - result = np.argmax(Series(data)) - self.assertEqual(result, np.argmax(data)) + s = Series(data, index=data) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # The deprecation of Series.argmax also causes a deprecation + # warning when calling np.argmax. This behavior is temporary + # until the implementation of Series.argmax is corrected. + result = np.argmax(s) + assert result == 10 + + with tm.assert_produces_warning(FutureWarning): + # argmax is aliased to idxmax + result = s.argmax() + + assert result == 10 if not _np_version_under1p10: - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argmax, - Series(data), out=data) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argmax, + s, out=data) def test_ptp(self): N = 1000 arr = np.random.randn(N) ser = Series(arr) - self.assertEqual(np.ptp(ser), np.ptp(arr)) + assert np.ptp(ser) == np.ptp(arr) # GH11163 s = Series([3, 5, np.nan, -3, 10]) - self.assertEqual(s.ptp(), 13) - self.assertTrue(pd.isnull(s.ptp(skipna=False))) + assert s.ptp() == 13 + assert pd.isna(s.ptp(skipna=False)) mi = pd.MultiIndex.from_product([['a', 'b'], [1, 2, 3]]) s = pd.Series([1, np.nan, 7, 3, 5, np.nan], index=mi) expected = pd.Series([6, 2], index=['a', 'b'], dtype=np.float64) - self.assert_series_equal(s.ptp(level=0), expected) + tm.assert_series_equal(s.ptp(level=0), expected) expected = pd.Series([np.nan, np.nan], index=['a', 'b']) - self.assert_series_equal(s.ptp(level=0, skipna=False), expected) + tm.assert_series_equal(s.ptp(level=0, skipna=False), expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): s.ptp(axis=1) s = pd.Series(['a', 'b', 'c', 'd', 'e']) - with self.assertRaises(TypeError): + with pytest.raises(TypeError): s.ptp() - with self.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): s.ptp(numeric_only=True) def test_empty_timeseries_redections_return_nat(self): # covers #11245 for dtype in ('m8[ns]', 'm8[ns]', 'M8[ns]', 'M8[ns, UTC]'): - self.assertIs(Series([], dtype=dtype).min(), pd.NaT) - self.assertIs(Series([], dtype=dtype).max(), pd.NaT) + assert Series([], dtype=dtype).min() is pd.NaT + assert Series([], dtype=dtype).max() is pd.NaT def test_unique_data_ownership(self): # it works! #1807 @@ -1432,7 +1489,7 @@ def test_numpy_repeat(self): assert_series_equal(np.repeat(s, 2), expected) msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, s, 2, axis=0) + tm.assert_raises_regex(ValueError, msg, np.repeat, s, 2, axis=0) def test_searchsorted(self): s = Series([1, 2, 3]) @@ -1451,7 +1508,7 @@ def test_searchsorted_numeric_dtypes_scalar(self): s = Series([1, 2, 90, 1000, 3e9]) r = s.searchsorted(30) e = 2 - self.assertEqual(r, e) + assert r == e r = s.searchsorted([30]) e = np.array([2], dtype=np.intp) @@ -1468,7 +1525,7 @@ def test_search_sorted_datetime64_scalar(self): v = pd.Timestamp('20120102') r = s.searchsorted(v) e = 1 - self.assertEqual(r, e) + assert r == e def test_search_sorted_datetime64_list(self): s = Series(pd.date_range('20120101', periods=10, freq='2D')) @@ -1487,111 +1544,26 @@ def test_searchsorted_sorter(self): def test_is_unique(self): # GH11946 s = Series(np.random.randint(0, 10, size=1000)) - self.assertFalse(s.is_unique) + assert not s.is_unique s = Series(np.arange(1000)) - self.assertTrue(s.is_unique) + assert s.is_unique def test_is_monotonic(self): s = Series(np.random.randint(0, 10, size=1000)) - self.assertFalse(s.is_monotonic) + assert not s.is_monotonic s = Series(np.arange(1000)) - self.assertTrue(s.is_monotonic) - self.assertTrue(s.is_monotonic_increasing) + assert s.is_monotonic + assert s.is_monotonic_increasing s = Series(np.arange(1000, 0, -1)) - self.assertTrue(s.is_monotonic_decreasing) + assert s.is_monotonic_decreasing s = Series(pd.date_range('20130101', periods=10)) - self.assertTrue(s.is_monotonic) - self.assertTrue(s.is_monotonic_increasing) + assert s.is_monotonic + assert s.is_monotonic_increasing s = Series(list(reversed(s.tolist()))) - self.assertFalse(s.is_monotonic) - self.assertTrue(s.is_monotonic_decreasing) - - def test_nsmallest_nlargest(self): - # float, int, datetime64 (use i8), timedelts64 (same), - # object that are numbers, object that are strings - - base = [3, 2, 1, 2, 5] - - s_list = [ - Series(base, dtype='int8'), - Series(base, dtype='int16'), - Series(base, dtype='int32'), - Series(base, dtype='int64'), - Series(base, dtype='float32'), - Series(base, dtype='float64'), - Series(base, dtype='uint8'), - Series(base, dtype='uint16'), - Series(base, dtype='uint32'), - Series(base, dtype='uint64'), - Series(base).astype('timedelta64[ns]'), - Series(pd.to_datetime(['2003', '2002', '2001', '2002', '2005'])), - ] - - raising = [ - Series([3., 2, 1, 2, '5'], dtype='object'), - Series([3., 2, 1, 2, 5], dtype='object'), - # not supported on some archs - # Series([3., 2, 1, 2, 5], dtype='complex256'), - Series([3., 2, 1, 2, 5], dtype='complex128'), - ] - - for r in raising: - dt = r.dtype - msg = "Cannot use method 'n(larg|small)est' with dtype %s" % dt - args = 2, len(r), 0, -1 - methods = r.nlargest, r.nsmallest - for method, arg in product(methods, args): - with tm.assertRaisesRegexp(TypeError, msg): - method(arg) - - for s in s_list: - - assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) - - assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.nsmallest(2, take_last=True), s.iloc[[2, 3]]) - - assert_series_equal(s.nlargest(3), s.iloc[[4, 0, 1]]) - - assert_series_equal(s.nlargest(3, keep='last'), s.iloc[[4, 0, 3]]) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.nlargest(3, take_last=True), s.iloc[[4, 0, 3]]) - - empty = s.iloc[0:0] - assert_series_equal(s.nsmallest(0), empty) - assert_series_equal(s.nsmallest(-1), empty) - assert_series_equal(s.nlargest(0), empty) - assert_series_equal(s.nlargest(-1), empty) - - assert_series_equal(s.nsmallest(len(s)), s.sort_values()) - assert_series_equal(s.nsmallest(len(s) + 1), s.sort_values()) - assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]]) - assert_series_equal(s.nlargest(len(s) + 1), - s.iloc[[4, 0, 1, 3, 2]]) - - s = Series([3., np.nan, 1, 2, 5]) - assert_series_equal(s.nlargest(), s.iloc[[4, 0, 3, 2]]) - assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) - - msg = 'keep must be either "first", "last"' - with tm.assertRaisesRegexp(ValueError, msg): - s.nsmallest(keep='invalid') - with tm.assertRaisesRegexp(ValueError, msg): - s.nlargest(keep='invalid') - - # GH 13412 - s = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) - result = s.nlargest(3) - expected = s.sort_values(ascending=False).head(3) - assert_series_equal(result, expected) - result = s.nsmallest(3) - expected = s.sort_values().head(3) - assert_series_equal(result, expected) + assert not s.is_monotonic + assert s.is_monotonic_decreasing def test_sort_index_level(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) @@ -1627,7 +1599,7 @@ def test_apply_categorical(self): result = s.apply(lambda x: 'A') exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg')) tm.assert_series_equal(result, exp) - self.assertEqual(result.dtype, np.object) + assert result.dtype == np.object def test_shift_int(self): ts = self.ts.astype(int) @@ -1639,79 +1611,21 @@ def test_shift_categorical(self): # GH 9416 s = pd.Series(['a', 'b', 'c', 'd'], dtype='category') - assert_series_equal(s.iloc[:-1], s.shift(1).shift(-1).valid()) + assert_series_equal(s.iloc[:-1], s.shift(1).shift(-1).dropna()) sp1 = s.shift(1) assert_index_equal(s.index, sp1.index) - self.assertTrue(np.all(sp1.values.codes[:1] == -1)) - self.assertTrue(np.all(s.values.codes[:-1] == sp1.values.codes[1:])) + assert np.all(sp1.values.codes[:1] == -1) + assert np.all(s.values.codes[:-1] == sp1.values.codes[1:]) sn2 = s.shift(-2) assert_index_equal(s.index, sn2.index) - self.assertTrue(np.all(sn2.values.codes[-2:] == -1)) - self.assertTrue(np.all(s.values.codes[2:] == sn2.values.codes[:-2])) + assert np.all(sn2.values.codes[-2:] == -1) + assert np.all(s.values.codes[2:] == sn2.values.codes[:-2]) assert_index_equal(s.values.categories, sp1.values.categories) assert_index_equal(s.values.categories, sn2.values.categories) - def test_reshape_deprecate(self): - x = Series(np.random.random(10), name='x') - tm.assert_produces_warning(FutureWarning, x.reshape, x.shape) - - def test_reshape_non_2d(self): - # see gh-4554 - with tm.assert_produces_warning(FutureWarning): - x = Series(np.random.random(201), name='x') - self.assertTrue(x.reshape(x.shape, ) is x) - - # see gh-2719 - with tm.assert_produces_warning(FutureWarning): - a = Series([1, 2, 3, 4]) - result = a.reshape(2, 2) - expected = a.values.reshape(2, 2) - tm.assert_numpy_array_equal(result, expected) - self.assertIsInstance(result, type(expected)) - - def test_reshape_2d_return_array(self): - x = Series(np.random.random(201), name='x') - - with tm.assert_produces_warning(FutureWarning): - result = x.reshape((-1, 1)) - self.assertNotIsInstance(result, Series) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result2 = np.reshape(x, (-1, 1)) - self.assertNotIsInstance(result2, Series) - - with tm.assert_produces_warning(FutureWarning): - result = x[:, None] - expected = x.reshape((-1, 1)) - assert_almost_equal(result, expected) - - def test_reshape_bad_kwarg(self): - a = Series([1, 2, 3, 4]) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - msg = "'foo' is an invalid keyword argument for this function" - tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - msg = r"reshape\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2) - - def test_numpy_reshape(self): - a = Series([1, 2, 3, 4]) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = np.reshape(a, (2, 2)) - expected = a.values.reshape(2, 2) - tm.assert_numpy_array_equal(result, expected) - self.assertIsInstance(result, type(expected)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = np.reshape(a, a.shape) - tm.assert_series_equal(result, a) - def test_unstack(self): from numpy import nan @@ -1738,7 +1652,7 @@ def test_unstack(self): labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) expected = DataFrame({'bar': s.values}, index=exp_index).sort_index(level=0) - unstacked = s.unstack(0) + unstacked = s.unstack(0).sort_index() assert_frame_equal(unstacked, expected) # GH5873 @@ -1867,3 +1781,324 @@ def test_value_counts_categorical_not_ordered(self): index=exp_idx, name='xxx') tm.assert_series_equal(s.value_counts(normalize=True), exp) tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + +@pytest.fixture +def s_main_dtypes(): + df = pd.DataFrame( + {'datetime': pd.to_datetime(['2003', '2002', + '2001', '2002', + '2005']), + 'datetimetz': pd.to_datetime( + ['2003', '2002', + '2001', '2002', + '2005']).tz_localize('US/Eastern'), + 'timedelta': pd.to_timedelta(['3d', '2d', '1d', + '2d', '5d'])}) + + for dtype in ['int8', 'int16', 'int32', 'int64', + 'float32', 'float64', + 'uint8', 'uint16', 'uint32', 'uint64']: + df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype) + + return df + + +class TestNLargestNSmallest(object): + + @pytest.mark.parametrize( + "r", [Series([3., 2, 1, 2, '5'], dtype='object'), + Series([3., 2, 1, 2, 5], dtype='object'), + # not supported on some archs + # Series([3., 2, 1, 2, 5], dtype='complex256'), + Series([3., 2, 1, 2, 5], dtype='complex128'), + Series(list('abcde')), + Series(list('abcde'), dtype='category')]) + def test_error(self, r): + dt = r.dtype + msg = ("Cannot use method 'n(larg|small)est' with " + "dtype {dt}".format(dt=dt)) + args = 2, len(r), 0, -1 + methods = r.nlargest, r.nsmallest + for method, arg in product(methods, args): + with tm.assert_raises_regex(TypeError, msg): + method(arg) + + @pytest.mark.parametrize( + "s", + [v for k, v in s_main_dtypes().iteritems()]) + def test_nsmallest_nlargest(self, s): + # float, int, datetime64 (use i8), timedelts64 (same), + # object that are numbers, object that are strings + + assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) + assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) + + empty = s.iloc[0:0] + assert_series_equal(s.nsmallest(0), empty) + assert_series_equal(s.nsmallest(-1), empty) + assert_series_equal(s.nlargest(0), empty) + assert_series_equal(s.nlargest(-1), empty) + + assert_series_equal(s.nsmallest(len(s)), s.sort_values()) + assert_series_equal(s.nsmallest(len(s) + 1), s.sort_values()) + assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]]) + assert_series_equal(s.nlargest(len(s) + 1), + s.iloc[[4, 0, 1, 3, 2]]) + + def test_misc(self): + + s = Series([3., np.nan, 1, 2, 5]) + assert_series_equal(s.nlargest(), s.iloc[[4, 0, 3, 2]]) + assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) + + msg = 'keep must be either "first", "last"' + with tm.assert_raises_regex(ValueError, msg): + s.nsmallest(keep='invalid') + with tm.assert_raises_regex(ValueError, msg): + s.nlargest(keep='invalid') + + # GH 15297 + s = Series([1] * 5, index=[1, 2, 3, 4, 5]) + expected_first = Series([1] * 3, index=[1, 2, 3]) + expected_last = Series([1] * 3, index=[5, 4, 3]) + + result = s.nsmallest(3) + assert_series_equal(result, expected_first) + + result = s.nsmallest(3, keep='last') + assert_series_equal(result, expected_last) + + result = s.nlargest(3) + assert_series_equal(result, expected_first) + + result = s.nlargest(3, keep='last') + assert_series_equal(result, expected_last) + + @pytest.mark.parametrize('n', range(1, 5)) + def test_n(self, n): + + # GH 13412 + s = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) + result = s.nlargest(n) + expected = s.sort_values(ascending=False).head(n) + assert_series_equal(result, expected) + + result = s.nsmallest(n) + expected = s.sort_values().head(n) + assert_series_equal(result, expected) + + +class TestCategoricalSeriesAnalytics(object): + + def test_count(self): + + s = Series(Categorical([np.nan, 1, 2, np.nan], + categories=[5, 4, 3, 2, 1], ordered=True)) + result = s.count() + assert result == 2 + + def test_min_max(self): + # unordered cats have no min/max + cat = Series(Categorical(["a", "b", "c", "d"], ordered=False)) + pytest.raises(TypeError, lambda: cat.min()) + pytest.raises(TypeError, lambda: cat.max()) + + cat = Series(Categorical(["a", "b", "c", "d"], ordered=True)) + _min = cat.min() + _max = cat.max() + assert _min == "a" + assert _max == "d" + + cat = Series(Categorical(["a", "b", "c", "d"], categories=[ + 'd', 'c', 'b', 'a'], ordered=True)) + _min = cat.min() + _max = cat.max() + assert _min == "d" + assert _max == "a" + + cat = Series(Categorical( + [np.nan, "b", "c", np.nan], categories=['d', 'c', 'b', 'a' + ], ordered=True)) + _min = cat.min() + _max = cat.max() + assert np.isnan(_min) + assert _max == "b" + + cat = Series(Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True)) + _min = cat.min() + _max = cat.max() + assert np.isnan(_min) + assert _max == 1 + + def test_mode(self): + s = Series(Categorical([1, 1, 2, 4, 5, 5, 5], + categories=[5, 4, 3, 2, 1], ordered=True)) + res = s.mode() + exp = Series(Categorical([5], categories=[ + 5, 4, 3, 2, 1], ordered=True)) + tm.assert_series_equal(res, exp) + s = Series(Categorical([1, 1, 1, 4, 5, 5, 5], + categories=[5, 4, 3, 2, 1], ordered=True)) + res = s.mode() + exp = Series(Categorical([5, 1], categories=[ + 5, 4, 3, 2, 1], ordered=True)) + tm.assert_series_equal(res, exp) + s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], + ordered=True)) + res = s.mode() + exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], + ordered=True)) + tm.assert_series_equal(res, exp) + + def test_value_counts(self): + # GH 12835 + cats = Categorical(list('abcccb'), categories=list('cabd')) + s = Series(cats, name='xxx') + res = s.value_counts(sort=False) + + exp_index = CategoricalIndex(list('cabd'), categories=cats.categories) + exp = Series([3, 1, 2, 0], name='xxx', index=exp_index) + tm.assert_series_equal(res, exp) + + res = s.value_counts(sort=True) + + exp_index = CategoricalIndex(list('cbad'), categories=cats.categories) + exp = Series([3, 2, 1, 0], name='xxx', index=exp_index) + tm.assert_series_equal(res, exp) + + # check object dtype handles the Series.name as the same + # (tested in test_base.py) + s = Series(["a", "b", "c", "c", "c", "b"], name='xxx') + res = s.value_counts() + exp = Series([3, 2, 1], name='xxx', index=["c", "b", "a"]) + tm.assert_series_equal(res, exp) + + def test_value_counts_with_nan(self): + # see gh-9443 + + # sanity check + s = Series(["a", "b", "a"], dtype="category") + exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) + + res = s.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + res = s.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + # same Series via two different constructions --> same behaviour + series = [ + Series(["a", "b", None, "a", None, None], dtype="category"), + Series(Categorical(["a", "b", None, "a", None, None], + categories=["a", "b"])) + ] + + for s in series: + # None is a NaN value, so we exclude its count here + exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) + res = s.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + # we don't exclude the count of None and sort by counts + exp = Series([3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"])) + res = s.value_counts(dropna=False) + tm.assert_series_equal(res, exp) + + # When we aren't sorting by counts, and np.nan isn't a + # category, it should be last. + exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) + res = s.value_counts(dropna=False, sort=False) + tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize( + "dtype", + ["int_", "uint", "float_", "unicode_", "timedelta64[h]", + pytest.param("datetime64[D]", + marks=pytest.mark.xfail(reason="issue7996"))] + ) + @pytest.mark.parametrize("is_ordered", [True, False]) + def test_drop_duplicates_categorical_non_bool(self, dtype, is_ordered): + cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) + + # Test case 1 + input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype)) + tc1 = Series(Categorical(input1, categories=cat_array, + ordered=is_ordered)) + + expected = Series([False, False, False, True]) + tm.assert_series_equal(tc1.duplicated(), expected) + tm.assert_series_equal(tc1.drop_duplicates(), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(inplace=True) + tm.assert_series_equal(sc, tc1[~expected]) + + expected = Series([False, False, True, False]) + tm.assert_series_equal(tc1.duplicated(keep='last'), expected) + tm.assert_series_equal(tc1.drop_duplicates(keep='last'), + tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(keep='last', inplace=True) + tm.assert_series_equal(sc, tc1[~expected]) + + expected = Series([False, False, True, True]) + tm.assert_series_equal(tc1.duplicated(keep=False), expected) + tm.assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(keep=False, inplace=True) + tm.assert_series_equal(sc, tc1[~expected]) + + # Test case 2 + input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) + tc2 = Series(Categorical( + input2, categories=cat_array, ordered=is_ordered) + ) + + expected = Series([False, False, False, False, True, True, False]) + tm.assert_series_equal(tc2.duplicated(), expected) + tm.assert_series_equal(tc2.drop_duplicates(), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(inplace=True) + tm.assert_series_equal(sc, tc2[~expected]) + + expected = Series([False, True, True, False, False, False, False]) + tm.assert_series_equal(tc2.duplicated(keep='last'), expected) + tm.assert_series_equal(tc2.drop_duplicates(keep='last'), + tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(keep='last', inplace=True) + tm.assert_series_equal(sc, tc2[~expected]) + + expected = Series([False, True, True, False, True, True, False]) + tm.assert_series_equal(tc2.duplicated(keep=False), expected) + tm.assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(keep=False, inplace=True) + tm.assert_series_equal(sc, tc2[~expected]) + + @pytest.mark.parametrize("is_ordered", [True, False]) + def test_drop_duplicates_categorical_bool(self, is_ordered): + tc = Series(Categorical([True, False, True, False], + categories=[True, False], ordered=is_ordered)) + + expected = Series([False, False, True, True]) + tm.assert_series_equal(tc.duplicated(), expected) + tm.assert_series_equal(tc.drop_duplicates(), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + expected = Series([True, True, False, False]) + tm.assert_series_equal(tc.duplicated(keep='last'), expected) + tm.assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep='last', inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + expected = Series([True, True, True, True]) + tm.assert_series_equal(tc.duplicated(keep=False), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep=False, inplace=True) + tm.assert_series_equal(sc, tc[~expected]) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py new file mode 100644 index 0000000000000..cf8698bc5ed5e --- /dev/null +++ b/pandas/tests/series/test_api.py @@ -0,0 +1,754 @@ +# coding=utf-8 +# pylint: disable-msg=E1101,W0612 +from collections import OrderedDict + +import pytest + +import numpy as np +import pandas as pd + +from pandas import Index, Series, DataFrame, date_range +from pandas.core.indexes.datetimes import Timestamp + +from pandas.compat import range, lzip, isidentifier, string_types +from pandas import (compat, Categorical, period_range, timedelta_range, + DatetimeIndex, PeriodIndex, TimedeltaIndex) +import pandas.io.formats.printing as printing +from pandas.util.testing import (assert_series_equal, + ensure_clean) +import pandas.util.testing as tm + +from .common import TestData + + +class SharedWithSparse(object): + """ + A collection of tests Series and SparseSeries can share. + + In generic tests on this class, use ``self._assert_series_equal()`` + which is implemented in sub-classes. + """ + def _assert_series_equal(self, left, right): + """Dispatch to series class dependent assertion""" + raise NotImplementedError + + def test_scalarop_preserve_name(self): + result = self.ts * 2 + assert result.name == self.ts.name + + def test_copy_name(self): + result = self.ts.copy() + assert result.name == self.ts.name + + def test_copy_index_name_checking(self): + # don't want to be able to modify the index stored elsewhere after + # making a copy + + self.ts.index.name = None + assert self.ts.index.name is None + assert self.ts is self.ts + + cp = self.ts.copy() + cp.index.name = 'foo' + printing.pprint_thing(self.ts.index.name) + assert self.ts.index.name is None + + def test_append_preserve_name(self): + result = self.ts[:5].append(self.ts[5:]) + assert result.name == self.ts.name + + def test_binop_maybe_preserve_name(self): + # names match, preserve + result = self.ts * self.ts + assert result.name == self.ts.name + result = self.ts.mul(self.ts) + assert result.name == self.ts.name + + result = self.ts * self.ts[:-2] + assert result.name == self.ts.name + + # names don't match, don't preserve + cp = self.ts.copy() + cp.name = 'something else' + result = self.ts + cp + assert result.name is None + result = self.ts.add(cp) + assert result.name is None + + ops = ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow'] + ops = ops + ['r' + op for op in ops] + for op in ops: + # names match, preserve + s = self.ts.copy() + result = getattr(s, op)(s) + assert result.name == self.ts.name + + # names don't match, don't preserve + cp = self.ts.copy() + cp.name = 'changed' + result = getattr(s, op)(cp) + assert result.name is None + + def test_combine_first_name(self): + result = self.ts.combine_first(self.ts[:5]) + assert result.name == self.ts.name + + def test_getitem_preserve_name(self): + result = self.ts[self.ts > 0] + assert result.name == self.ts.name + + result = self.ts[[0, 2, 4]] + assert result.name == self.ts.name + + result = self.ts[5:10] + assert result.name == self.ts.name + + def test_pickle(self): + unp_series = self._pickle_roundtrip(self.series) + unp_ts = self._pickle_roundtrip(self.ts) + assert_series_equal(unp_series, self.series) + assert_series_equal(unp_ts, self.ts) + + def _pickle_roundtrip(self, obj): + + with ensure_clean() as path: + obj.to_pickle(path) + unpickled = pd.read_pickle(path) + return unpickled + + def test_argsort_preserve_name(self): + result = self.ts.argsort() + assert result.name == self.ts.name + + def test_sort_index_name(self): + result = self.ts.sort_index(ascending=False) + assert result.name == self.ts.name + + def test_to_sparse_pass_name(self): + result = self.ts.to_sparse() + assert result.name == self.ts.name + + def test_constructor_dict(self): + d = {'a': 0., 'b': 1., 'c': 2.} + result = self.series_klass(d) + expected = self.series_klass(d, index=sorted(d.keys())) + self._assert_series_equal(result, expected) + + result = self.series_klass(d, index=['b', 'c', 'd', 'a']) + expected = self.series_klass([1, 2, np.nan, 0], + index=['b', 'c', 'd', 'a']) + self._assert_series_equal(result, expected) + + def test_constructor_subclass_dict(self): + data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) + series = self.series_klass(data) + expected = self.series_klass(dict(compat.iteritems(data))) + self._assert_series_equal(series, expected) + + def test_constructor_ordereddict(self): + # GH3283 + data = OrderedDict( + ('col%s' % i, np.random.random()) for i in range(12)) + + series = self.series_klass(data) + expected = self.series_klass(list(data.values()), list(data.keys())) + self._assert_series_equal(series, expected) + + # Test with subclass + class A(OrderedDict): + pass + + series = self.series_klass(A(data)) + self._assert_series_equal(series, expected) + + def test_constructor_dict_multiindex(self): + d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} + _d = sorted(d.items()) + result = self.series_klass(d) + expected = self.series_klass( + [x[1] for x in _d], + index=pd.MultiIndex.from_tuples([x[0] for x in _d])) + self._assert_series_equal(result, expected) + + d['z'] = 111. + _d.insert(0, ('z', d['z'])) + result = self.series_klass(d) + expected = self.series_klass([x[1] for x in _d], + index=pd.Index([x[0] for x in _d], + tupleize_cols=False)) + result = result.reindex(index=expected.index) + self._assert_series_equal(result, expected) + + def test_constructor_dict_timedelta_index(self): + # GH #12169 : Resample category data with timedelta index + # construct Series from dict as data and TimedeltaIndex as index + # will result NaN in result Series data + expected = self.series_klass( + data=['A', 'B', 'C'], + index=pd.to_timedelta([0, 10, 20], unit='s') + ) + + result = self.series_klass( + data={pd.to_timedelta(0, unit='s'): 'A', + pd.to_timedelta(10, unit='s'): 'B', + pd.to_timedelta(20, unit='s'): 'C'}, + index=pd.to_timedelta([0, 10, 20], unit='s') + ) + self._assert_series_equal(result, expected) + + def test_from_array_deprecated(self): + + with tm.assert_produces_warning(FutureWarning): + self.series_klass.from_array([1, 2, 3]) + + +class TestSeriesMisc(TestData, SharedWithSparse): + + series_klass = Series + # SharedWithSparse tests use generic, series_klass-agnostic assertion + _assert_series_equal = staticmethod(tm.assert_series_equal) + + def test_tab_completion(self): + # GH 9910 + s = Series(list('abcd')) + # Series of str values should have .str but not .dt/.cat in __dir__ + assert 'str' in dir(s) + assert 'dt' not in dir(s) + assert 'cat' not in dir(s) + + # similarly for .dt + s = Series(date_range('1/1/2015', periods=5)) + assert 'dt' in dir(s) + assert 'str' not in dir(s) + assert 'cat' not in dir(s) + + # Similarly for .cat, but with the twist that str and dt should be + # there if the categories are of that type first cat and str. + s = Series(list('abbcd'), dtype="category") + assert 'cat' in dir(s) + assert 'str' in dir(s) # as it is a string categorical + assert 'dt' not in dir(s) + + # similar to cat and str + s = Series(date_range('1/1/2015', periods=5)).astype("category") + assert 'cat' in dir(s) + assert 'str' not in dir(s) + assert 'dt' in dir(s) # as it is a datetime categorical + + def test_tab_completion_with_categorical(self): + # test the tab completion display + ok_for_cat = ['categories', 'codes', 'ordered', 'set_categories', + 'add_categories', 'remove_categories', + 'rename_categories', 'reorder_categories', + 'remove_unused_categories', 'as_ordered', 'as_unordered'] + + def get_dir(s): + results = [r for r in s.cat.__dir__() if not r.startswith('_')] + return list(sorted(set(results))) + + s = Series(list('aabbcde')).astype('category') + results = get_dir(s) + tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) + + @pytest.mark.parametrize("index", [ + tm.makeUnicodeIndex(10), + tm.makeStringIndex(10), + tm.makeCategoricalIndex(10), + Index(['foo', 'bar', 'baz'] * 2), + tm.makeDateIndex(10), + tm.makePeriodIndex(10), + tm.makeTimedeltaIndex(10), + tm.makeIntIndex(10), + tm.makeUIntIndex(10), + tm.makeIntIndex(10), + tm.makeFloatIndex(10), + Index([True, False]), + Index(['a{}'.format(i) for i in range(101)]), + pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')), + pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ]) + def test_index_tab_completion(self, index): + # dir contains string-like values of the Index. + s = pd.Series(index=index) + dir_s = dir(s) + for i, x in enumerate(s.index.unique(level=0)): + if i < 100: + assert (not isinstance(x, string_types) or + not isidentifier(x) or x in dir_s) + else: + assert x not in dir_s + + def test_not_hashable(self): + s_empty = Series() + s = Series([1]) + pytest.raises(TypeError, hash, s_empty) + pytest.raises(TypeError, hash, s) + + def test_contains(self): + tm.assert_contains_all(self.ts.index, self.ts) + + def test_iter(self): + for i, val in enumerate(self.series): + assert val == self.series[i] + + for i, val in enumerate(self.ts): + assert val == self.ts[i] + + def test_keys(self): + # HACK: By doing this in two stages, we avoid 2to3 wrapping the call + # to .keys() in a list() + getkeys = self.ts.keys + assert getkeys() is self.ts.index + + def test_values(self): + tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) + + def test_iteritems(self): + for idx, val in compat.iteritems(self.series): + assert val == self.series[idx] + + for idx, val in compat.iteritems(self.ts): + assert val == self.ts[idx] + + # assert is lazy (genrators don't define reverse, lists do) + assert not hasattr(self.series.iteritems(), 'reverse') + + def test_items(self): + for idx, val in self.series.items(): + assert val == self.series[idx] + + for idx, val in self.ts.items(): + assert val == self.ts[idx] + + # assert is lazy (genrators don't define reverse, lists do) + assert not hasattr(self.series.items(), 'reverse') + + def test_raise_on_info(self): + s = Series(np.random.randn(10)) + with pytest.raises(AttributeError): + s.info() + + def test_copy(self): + + for deep in [None, False, True]: + s = Series(np.arange(10), dtype='float64') + + # default deep is True + if deep is None: + s2 = s.copy() + else: + s2 = s.copy(deep=deep) + + s2[::2] = np.NaN + + if deep is None or deep is True: + # Did not modify original Series + assert np.isnan(s2[0]) + assert not np.isnan(s[0]) + else: + # we DID modify the original Series + assert np.isnan(s2[0]) + assert np.isnan(s[0]) + + # GH 11794 + # copy of tz-aware + expected = Series([Timestamp('2012/01/01', tz='UTC')]) + expected2 = Series([Timestamp('1999/01/01', tz='UTC')]) + + for deep in [None, False, True]: + + s = Series([Timestamp('2012/01/01', tz='UTC')]) + + if deep is None: + s2 = s.copy() + else: + s2 = s.copy(deep=deep) + + s2[0] = pd.Timestamp('1999/01/01', tz='UTC') + + # default deep is True + if deep is None or deep is True: + # Did not modify original Series + assert_series_equal(s2, expected2) + assert_series_equal(s, expected) + else: + # we DID modify the original Series + assert_series_equal(s2, expected2) + assert_series_equal(s, expected2) + + def test_axis_alias(self): + s = Series([1, 2, np.nan]) + assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index')) + assert s.dropna().sum('rows') == 3 + assert s._get_axis_number('rows') == 0 + assert s._get_axis_name('rows') == 'index' + + def test_class_axis(self): + # https://github.com/pandas-dev/pandas/issues/18147 + Series.index # no exception! + + def test_numpy_unique(self): + # it works! + np.unique(self.ts) + + def test_ndarray_compat(self): + + # test numpy compat with Series as sub-class of NDFrame + tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'], + index=date_range('1/1/2000', periods=1000)) + + def f(x): + return x[x.idxmax()] + + result = tsdf.apply(f) + expected = tsdf.max() + tm.assert_series_equal(result, expected) + + # .item() + s = Series([1]) + result = s.item() + assert result == 1 + assert s.item() == s.iloc[0] + + # using an ndarray like function + s = Series(np.random.randn(10)) + result = Series(np.ones_like(s)) + expected = Series(1, index=range(10), dtype='float64') + tm.assert_series_equal(result, expected) + + # ravel + s = Series(np.random.randn(10)) + tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F')) + + # compress + # GH 6658 + s = Series([0, 1., -1], index=list('abc')) + result = np.compress(s > 0, s) + tm.assert_series_equal(result, Series([1.], index=['b'])) + + result = np.compress(s < -1, s) + # result empty Index(dtype=object) as the same as original + exp = Series([], dtype='float64', index=Index([], dtype='object')) + tm.assert_series_equal(result, exp) + + s = Series([0, 1., -1], index=[.1, .2, .3]) + result = np.compress(s > 0, s) + tm.assert_series_equal(result, Series([1.], index=[.2])) + + result = np.compress(s < -1, s) + # result empty Float64Index as the same as original + exp = Series([], dtype='float64', index=Index([], dtype='float64')) + tm.assert_series_equal(result, exp) + + def test_str_attribute(self): + # GH9068 + methods = ['strip', 'rstrip', 'lstrip'] + s = Series([' jack', 'jill ', ' jesse ', 'frank']) + for method in methods: + expected = Series([getattr(str, method)(x) for x in s.values]) + assert_series_equal(getattr(Series.str, method)(s.str), expected) + + # str accessor only valid with string values + s = Series(range(5)) + with tm.assert_raises_regex(AttributeError, + 'only use .str accessor'): + s.str.repeat(2) + + def test_empty_method(self): + s_empty = pd.Series() + assert s_empty.empty + + for full_series in [pd.Series([1]), pd.Series(index=[1])]: + assert not full_series.empty + + def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip('IPython', minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; s = pd.Series()" + ip.run_code(code) + with tm.assert_produces_warning(None): + with provisionalcompleter('ignore'): + list(ip.Completer.completions('s.', 1)) + + +class TestCategoricalSeries(object): + + @pytest.mark.parametrize( + "method", + [ + lambda x: x.cat.set_categories([1, 2, 3]), + lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True), + lambda x: x.cat.rename_categories([1, 2, 3]), + lambda x: x.cat.remove_unused_categories(), + lambda x: x.cat.remove_categories([2]), + lambda x: x.cat.add_categories([4]), + lambda x: x.cat.as_ordered(), + lambda x: x.cat.as_unordered(), + ]) + def test_getname_categorical_accessor(self, method): + # GH 17509 + s = Series([1, 2, 3], name='A').astype('category') + expected = 'A' + result = method(s).name + assert result == expected + + def test_cat_accessor(self): + s = Series(Categorical(["a", "b", np.nan, "a"])) + tm.assert_index_equal(s.cat.categories, Index(["a", "b"])) + assert not s.cat.ordered, False + + exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) + s.cat.set_categories(["b", "a"], inplace=True) + tm.assert_categorical_equal(s.values, exp) + + res = s.cat.set_categories(["b", "a"]) + tm.assert_categorical_equal(res.values, exp) + + s[:] = "a" + s = s.cat.remove_unused_categories() + tm.assert_index_equal(s.cat.categories, Index(["a"])) + + def test_cat_accessor_api(self): + # GH 9322 + from pandas.core.arrays.categorical import CategoricalAccessor + assert Series.cat is CategoricalAccessor + s = Series(list('aabbcde')).astype('category') + assert isinstance(s.cat, CategoricalAccessor) + + invalid = Series([1]) + with tm.assert_raises_regex(AttributeError, + "only use .cat accessor"): + invalid.cat + assert not hasattr(invalid, 'cat') + + def test_cat_accessor_no_new_attributes(self): + # https://github.com/pandas-dev/pandas/issues/10673 + c = Series(list('aabbcde')).astype('category') + with tm.assert_raises_regex(AttributeError, + "You cannot add any new attribute"): + c.cat.xlabel = "a" + + def test_categorical_delegations(self): + + # invalid accessor + pytest.raises(AttributeError, lambda: Series([1, 2, 3]).cat) + tm.assert_raises_regex( + AttributeError, + r"Can only use .cat accessor with a 'category' dtype", + lambda: Series([1, 2, 3]).cat) + pytest.raises(AttributeError, lambda: Series(['a', 'b', 'c']).cat) + pytest.raises(AttributeError, lambda: Series(np.arange(5.)).cat) + pytest.raises(AttributeError, + lambda: Series([Timestamp('20130101')]).cat) + + # Series should delegate calls to '.categories', '.codes', '.ordered' + # and the methods '.set_categories()' 'drop_unused_categories()' to the + # categorical# -*- coding: utf-8 -*- + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + exp_categories = Index(["a", "b", "c"]) + tm.assert_index_equal(s.cat.categories, exp_categories) + s.cat.categories = [1, 2, 3] + exp_categories = Index([1, 2, 3]) + tm.assert_index_equal(s.cat.categories, exp_categories) + + exp_codes = Series([0, 1, 2, 0], dtype='int8') + tm.assert_series_equal(s.cat.codes, exp_codes) + + assert s.cat.ordered + s = s.cat.as_unordered() + assert not s.cat.ordered + s.cat.as_ordered(inplace=True) + assert s.cat.ordered + + # reorder + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + exp_categories = Index(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) + s = s.cat.set_categories(["c", "b", "a"]) + tm.assert_index_equal(s.cat.categories, exp_categories) + tm.assert_numpy_array_equal(s.values.__array__(), exp_values) + tm.assert_numpy_array_equal(s.__array__(), exp_values) + + # remove unused categories + s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c" + ])) + exp_categories = Index(["a", "b"]) + exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) + s = s.cat.remove_unused_categories() + tm.assert_index_equal(s.cat.categories, exp_categories) + tm.assert_numpy_array_equal(s.values.__array__(), exp_values) + tm.assert_numpy_array_equal(s.__array__(), exp_values) + + # This method is likely to be confused, so test that it raises an error + # on wrong inputs: + def f(): + s.set_categories([4, 3, 2, 1]) + + pytest.raises(Exception, f) + # right: s.cat.set_categories([4,3,2,1]) + + # GH18862 (let Series.cat.rename_categories take callables) + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + result = s.cat.rename_categories(lambda x: x.upper()) + expected = Series(Categorical(["A", "B", "C", "A"], + categories=["A", "B", "C"], + ordered=True)) + tm.assert_series_equal(result, expected) + + def test_str_accessor_api_for_categorical(self): + # https://github.com/pandas-dev/pandas/issues/10661 + from pandas.core.strings import StringMethods + s = Series(list('aabb')) + s = s + " " + s + c = s.astype('category') + assert isinstance(c.str, StringMethods) + + # str functions, which need special arguments + special_func_defs = [ + ('cat', (list("zyxw"),), {"sep": ","}), + ('center', (10,), {}), + ('contains', ("a",), {}), + ('count', ("a",), {}), + ('decode', ("UTF-8",), {}), + ('encode', ("UTF-8",), {}), + ('endswith', ("a",), {}), + ('extract', ("([a-z]*) ",), {"expand": False}), + ('extract', ("([a-z]*) ",), {"expand": True}), + ('extractall', ("([a-z]*) ",), {}), + ('find', ("a",), {}), + ('findall', ("a",), {}), + ('index', (" ",), {}), + ('ljust', (10,), {}), + ('match', ("a"), {}), # deprecated... + ('normalize', ("NFC",), {}), + ('pad', (10,), {}), + ('partition', (" ",), {"expand": False}), # not default + ('partition', (" ",), {"expand": True}), # default + ('repeat', (3,), {}), + ('replace', ("a", "z"), {}), + ('rfind', ("a",), {}), + ('rindex', (" ",), {}), + ('rjust', (10,), {}), + ('rpartition', (" ",), {"expand": False}), # not default + ('rpartition', (" ",), {"expand": True}), # default + ('slice', (0, 1), {}), + ('slice_replace', (0, 1, "z"), {}), + ('split', (" ",), {"expand": False}), # default + ('split', (" ",), {"expand": True}), # not default + ('startswith', ("a",), {}), + ('wrap', (2,), {}), + ('zfill', (10,), {}) + ] + _special_func_names = [f[0] for f in special_func_defs] + + # * get, join: they need a individual elements of type lists, but + # we can't make a categorical with lists as individual categories. + # -> `s.str.split(" ").astype("category")` will error! + # * `translate` has different interfaces for py2 vs. py3 + _ignore_names = ["get", "join", "translate"] + + str_func_names = [f for f in dir(s.str) if not ( + f.startswith("_") or + f in _special_func_names or + f in _ignore_names)] + + func_defs = [(f, (), {}) for f in str_func_names] + func_defs.extend(special_func_defs) + + for func, args, kwargs in func_defs: + res = getattr(c.str, func)(*args, **kwargs) + exp = getattr(s.str, func)(*args, **kwargs) + + if isinstance(res, DataFrame): + tm.assert_frame_equal(res, exp) + else: + tm.assert_series_equal(res, exp) + + invalid = Series([1, 2, 3]).astype('category') + with tm.assert_raises_regex(AttributeError, + "Can only use .str " + "accessor with string"): + invalid.str + assert not hasattr(invalid, 'str') + + def test_dt_accessor_api_for_categorical(self): + # https://github.com/pandas-dev/pandas/issues/10661 + from pandas.core.indexes.accessors import Properties + + s_dr = Series(date_range('1/1/2015', periods=5, tz="MET")) + c_dr = s_dr.astype("category") + + s_pr = Series(period_range('1/1/2015', freq='D', periods=5)) + c_pr = s_pr.astype("category") + + s_tdr = Series(timedelta_range('1 days', '10 days')) + c_tdr = s_tdr.astype("category") + + # only testing field (like .day) + # and bool (is_month_start) + get_ops = lambda x: x._datetimelike_ops + + test_data = [ + ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr), + ("Period", get_ops(PeriodIndex), s_pr, c_pr), + ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)] + + assert isinstance(c_dr.dt, Properties) + + special_func_defs = [ + ('strftime', ("%Y-%m-%d",), {}), + ('tz_convert', ("EST",), {}), + ('round', ("D",), {}), + ('floor', ("D",), {}), + ('ceil', ("D",), {}), + ('asfreq', ("D",), {}), + # ('tz_localize', ("UTC",), {}), + ] + _special_func_names = [f[0] for f in special_func_defs] + + # the series is already localized + _ignore_names = ['tz_localize', 'components'] + + for name, attr_names, s, c in test_data: + func_names = [f + for f in dir(s.dt) + if not (f.startswith("_") or f in attr_names or f in + _special_func_names or f in _ignore_names)] + + func_defs = [(f, (), {}) for f in func_names] + for f_def in special_func_defs: + if f_def[0] in dir(s.dt): + func_defs.append(f_def) + + for func, args, kwargs in func_defs: + res = getattr(c.dt, func)(*args, **kwargs) + exp = getattr(s.dt, func)(*args, **kwargs) + + if isinstance(res, DataFrame): + tm.assert_frame_equal(res, exp) + elif isinstance(res, Series): + tm.assert_series_equal(res, exp) + else: + tm.assert_almost_equal(res, exp) + + for attr in attr_names: + try: + res = getattr(c.dt, attr) + exp = getattr(s.dt, attr) + except Exception as e: + print(name, attr) + raise e + + if isinstance(res, DataFrame): + tm.assert_frame_equal(res, exp) + elif isinstance(res, Series): + tm.assert_series_equal(res, exp) + else: + tm.assert_almost_equal(res, exp) + + invalid = Series([1, 2, 3]).astype('category') + with tm.assert_raises_regex( + AttributeError, "Can only use .dt accessor with datetimelike"): + invalid.dt + assert not hasattr(invalid, 'str') diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 16d1466bb90fe..0780c846a6c19 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -1,43 +1,42 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + +from collections import Counter, defaultdict, OrderedDict + import numpy as np import pandas as pd -from pandas import (Index, Series, DataFrame, isnull) +from pandas import (Index, Series, DataFrame, isna) from pandas.compat import lrange from pandas import compat -from pandas.util.testing import assert_series_equal +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from .common import TestData -class TestSeriesApply(TestData, tm.TestCase): +class TestSeriesApply(TestData): def test_apply(self): with np.errstate(all='ignore'): - assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) + tm.assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) - # elementwise-apply + # element-wise apply import math - assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts)) - - # how to handle Series result, #2316 - result = self.ts.apply(lambda x: Series( - [x, x ** 2], index=['x', 'x^2'])) - expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2}) - tm.assert_frame_equal(result, expected) + tm.assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts)) # empty series s = Series(dtype=object, name='foo', index=pd.Index([], name='bar')) rs = s.apply(lambda x: x) tm.assert_series_equal(s, rs) + # check all metadata (GH 9322) - self.assertIsNot(s, rs) - self.assertIs(s.index, rs.index) - self.assertEqual(s.dtype, rs.dtype) - self.assertEqual(s.name, rs.name) + assert s is not rs + assert s.index is rs.index + assert s.dtype == rs.dtype + assert s.name == rs.name # index but no data s = Series(index=[1, 2, 3]) @@ -62,20 +61,38 @@ def test_apply_dont_convert_dtype(self): f = lambda x: x if x > 0 else np.nan result = s.apply(f, convert_dtype=False) - self.assertEqual(result.dtype, object) + assert result.dtype == object + + def test_with_string_args(self): + + for arg in ['sum', 'mean', 'min', 'max', 'std']: + result = self.ts.apply(arg) + expected = getattr(self.ts, arg)() + assert result == expected def test_apply_args(self): s = Series(['foo,bar']) result = s.apply(str.split, args=(',', )) - self.assertEqual(result[0], ['foo', 'bar']) - tm.assertIsInstance(result[0], list) + assert result[0] == ['foo', 'bar'] + assert isinstance(result[0], list) + + def test_series_map_box_timestamps(self): + # GH#2689, GH#2627 + ser = Series(pd.date_range('1/1/2000', periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + ser.map(func) + ser.apply(func) def test_apply_box(self): # ufunc will not be boxed. Same test cases as the test_map_box vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' # boxed value must be Timestamp instance res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz)) @@ -85,7 +102,7 @@ def test_apply_box(self): vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), pd.Timestamp('2011-01-02', tz='US/Eastern')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns, US/Eastern]') + assert s.dtype == 'datetime64[ns, US/Eastern]' res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz)) exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern']) @@ -94,7 +111,7 @@ def test_apply_box(self): # timedelta vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days)) exp = pd.Series(['Timedelta_1', 'Timedelta_2']) tm.assert_series_equal(res, exp) @@ -103,7 +120,7 @@ def test_apply_box(self): vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'object') + assert s.dtype == 'object' res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.freqstr)) exp = pd.Series(['Period_M', 'Period_M']) @@ -136,8 +153,186 @@ def f(x): exp = pd.Series(['Asia/Tokyo'] * 25, name='XX') tm.assert_series_equal(result, exp) + def test_apply_dict_depr(self): + + tsdf = pd.DataFrame(np.random.randn(10, 3), + columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + with tm.assert_produces_warning(FutureWarning): + tsdf.A.agg({'foo': ['sum', 'mean']}) + + +class TestSeriesAggregate(TestData): + + def test_transform(self): + # transforming functions + + with np.errstate(all='ignore'): + + f_sqrt = np.sqrt(self.series) + f_abs = np.abs(self.series) + + # ufunc + result = self.series.transform(np.sqrt) + expected = f_sqrt.copy() + assert_series_equal(result, expected) + + result = self.series.apply(np.sqrt) + assert_series_equal(result, expected) + + # list-like + result = self.series.transform([np.sqrt]) + expected = f_sqrt.to_frame().copy() + expected.columns = ['sqrt'] + assert_frame_equal(result, expected) + + result = self.series.transform([np.sqrt]) + assert_frame_equal(result, expected) + + result = self.series.transform(['sqrt']) + assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both functions per + # series and then concatting + expected = pd.concat([f_sqrt, f_abs], axis=1) + expected.columns = ['sqrt', 'absolute'] + result = self.series.apply([np.sqrt, np.abs]) + assert_frame_equal(result, expected) + + result = self.series.transform(['sqrt', 'abs']) + expected.columns = ['sqrt', 'abs'] + assert_frame_equal(result, expected) + + # dict, provide renaming + expected = pd.concat([f_sqrt, f_abs], axis=1) + expected.columns = ['foo', 'bar'] + expected = expected.unstack().rename('series') + + result = self.series.apply({'foo': np.sqrt, 'bar': np.abs}) + assert_series_equal(result.reindex_like(expected), expected) + + def test_transform_and_agg_error(self): + # we are trying to transform with an aggregator + def f(): + self.series.transform(['min', 'max']) + pytest.raises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.series.agg(['sqrt', 'max']) + pytest.raises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.series.transform(['sqrt', 'max']) + pytest.raises(ValueError, f) + + def f(): + with np.errstate(all='ignore'): + self.series.agg({'foo': np.sqrt, 'bar': 'sum'}) + pytest.raises(ValueError, f) + + def test_demo(self): + # demonstration tests + s = Series(range(6), dtype='int64', name='series') + + result = s.agg(['min', 'max']) + expected = Series([0, 5], index=['min', 'max'], name='series') + tm.assert_series_equal(result, expected) + + result = s.agg({'foo': 'min'}) + expected = Series([0], index=['foo'], name='series') + tm.assert_series_equal(result, expected) + + # nested renaming + with tm.assert_produces_warning(FutureWarning): + result = s.agg({'foo': ['min', 'max']}) + + expected = DataFrame( + {'foo': [0, 5]}, + index=['min', 'max']).unstack().rename('series') + tm.assert_series_equal(result, expected) + + def test_multiple_aggregators_with_dict_api(self): + + s = Series(range(6), dtype='int64', name='series') + # nested renaming + with tm.assert_produces_warning(FutureWarning): + result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']}) + + expected = DataFrame( + {'foo': [5.0, np.nan, 0.0, np.nan], + 'bar': [np.nan, 2.5, np.nan, 15.0]}, + columns=['foo', 'bar'], + index=['max', 'mean', + 'min', 'sum']).unstack().rename('series') + tm.assert_series_equal(result.reindex_like(expected), expected) + + def test_agg_apply_evaluate_lambdas_the_same(self): + # test that we are evaluating row-by-row first + # before vectorized evaluation + result = self.series.apply(lambda x: str(x)) + expected = self.series.agg(lambda x: str(x)) + tm.assert_series_equal(result, expected) + + result = self.series.apply(str) + expected = self.series.agg(str) + tm.assert_series_equal(result, expected) + + def test_with_nested_series(self): + # GH 2316 + # .agg with a reducer and a transform, what to do + result = self.ts.apply(lambda x: Series( + [x, x ** 2], index=['x', 'x^2'])) + expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2}) + tm.assert_frame_equal(result, expected) + + result = self.ts.agg(lambda x: Series( + [x, x ** 2], index=['x', 'x^2'])) + tm.assert_frame_equal(result, expected) + + def test_replicate_describe(self): + # this also tests a result set that is all scalars + expected = self.series.describe() + result = self.series.apply(OrderedDict( + [('count', 'count'), + ('mean', 'mean'), + ('std', 'std'), + ('min', 'min'), + ('25%', lambda x: x.quantile(0.25)), + ('50%', 'median'), + ('75%', lambda x: x.quantile(0.75)), + ('max', 'max')])) + assert_series_equal(result, expected) + + def test_reduce(self): + # reductions with named functions + result = self.series.agg(['sum', 'mean']) + expected = Series([self.series.sum(), + self.series.mean()], + ['sum', 'mean'], + name=self.series.name) + assert_series_equal(result, expected) + + def test_non_callable_aggregates(self): + # test agg using non-callable series attributes + s = Series([1, 2, None]) + + # Calling agg w/ just a string arg same as calling s.arg + result = s.agg('size') + expected = s.size + assert result == expected + + # test when mixed w/ callable reducers + result = s.agg(['size', 'count', 'mean']) + expected = Series(OrderedDict([('size', 3.0), + ('count', 2.0), + ('mean', 1.5)])) + assert_series_equal(result[expected.index], expected) -class TestSeriesMap(TestData, tm.TestCase): + +class TestSeriesMap(TestData): def test_map(self): index, data = tm.getMixedTypeDict() @@ -148,17 +343,17 @@ def test_map(self): merged = target.map(source) for k, v in compat.iteritems(merged): - self.assertEqual(v, source[target[k]]) + assert v == source[target[k]] # input could be a dict merged = target.map(source.to_dict()) for k, v in compat.iteritems(merged): - self.assertEqual(v, source[target[k]]) + assert v == source[target[k]] # function result = self.ts.map(lambda x: x * 2) - self.assert_series_equal(result, self.ts * 2) + tm.assert_series_equal(result, self.ts * 2) # GH 10324 a = Series([1, 2, 3, 4]) @@ -166,9 +361,9 @@ def test_map(self): c = Series(["even", "odd", "even", "odd"]) exp = Series(["odd", "even", "odd", np.nan], dtype="category") - self.assert_series_equal(a.map(b), exp) + tm.assert_series_equal(a.map(b), exp) exp = Series(["odd", "even", "odd", np.nan]) - self.assert_series_equal(a.map(c), exp) + tm.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) b = Series([1, 2, 3, 4], @@ -176,9 +371,9 @@ def test_map(self): c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e'])) exp = Series([np.nan, 1, 2, 3]) - self.assert_series_equal(a.map(b), exp) + tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 1, 2, 3]) - self.assert_series_equal(a.map(c), exp) + tm.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) b = Series(['B', 'C', 'D', 'E'], dtype='category', @@ -187,9 +382,17 @@ def test_map(self): exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'], categories=['B', 'C', 'D', 'E'])) - self.assert_series_equal(a.map(b), exp) + tm.assert_series_equal(a.map(b), exp) exp = Series([np.nan, 'B', 'C', 'D']) - self.assert_series_equal(a.map(c), exp) + tm.assert_series_equal(a.map(c), exp) + + @pytest.mark.parametrize("index", tm.all_index_generator(10)) + def test_map_empty(self, index): + s = Series(index) + result = s.map({}) + + expected = pd.Series(np.nan, index=s.index) + tm.assert_series_equal(result, expected) def test_map_compat(self): # related GH 8024 @@ -202,25 +405,25 @@ def test_map_int(self): left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4}) right = Series({1: 11, 2: 22, 3: 33}) - self.assertEqual(left.dtype, np.float_) - self.assertTrue(issubclass(right.dtype.type, np.integer)) + assert left.dtype == np.float_ + assert issubclass(right.dtype.type, np.integer) merged = left.map(right) - self.assertEqual(merged.dtype, np.float_) - self.assertTrue(isnull(merged['d'])) - self.assertTrue(not isnull(merged['c'])) + assert merged.dtype == np.float_ + assert isna(merged['d']) + assert not isna(merged['c']) def test_map_type_inference(self): s = Series(lrange(3)) s2 = s.map(lambda x: np.where(x == 0, 0, 1)) - self.assertTrue(issubclass(s2.dtype.type, np.integer)) + assert issubclass(s2.dtype.type, np.integer) def test_map_decimal(self): from decimal import Decimal result = self.series.map(lambda x: Decimal(str(x))) - self.assertEqual(result.dtype, np.object_) - tm.assertIsInstance(result[0], Decimal) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) def test_map_na_exclusion(self): s = Series([1.5, np.nan, 3, np.nan, 5]) @@ -236,18 +439,60 @@ def test_map_dict_with_tuple_keys(self): converted to a multi-index, preventing tuple values from being mapped properly. """ + # GH 18496 df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]}) label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'} + df['labels'] = df['a'].map(label_mappings) df['expected_labels'] = pd.Series(['A', 'B', 'A', 'B'], index=df.index) # All labels should be filled now tm.assert_series_equal(df['labels'], df['expected_labels'], check_names=False) + def test_map_counter(self): + s = Series(['a', 'b', 'c'], index=[1, 2, 3]) + counter = Counter() + counter['b'] = 5 + counter['c'] += 1 + result = s.map(counter) + expected = Series([0, 5, 1], index=[1, 2, 3]) + assert_series_equal(result, expected) + + def test_map_defaultdict(self): + s = Series([1, 2, 3], index=['a', 'b', 'c']) + default_dict = defaultdict(lambda: 'blank') + default_dict[1] = 'stuff' + result = s.map(default_dict) + expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c']) + assert_series_equal(result, expected) + + def test_map_dict_subclass_with_missing(self): + """ + Test Series.map with a dictionary subclass that defines __missing__, + i.e. sets a default value (GH #15999). + """ + class DictWithMissing(dict): + def __missing__(self, key): + return 'missing' + s = Series([1, 2, 3]) + dictionary = DictWithMissing({3: 'three'}) + result = s.map(dictionary) + expected = Series(['missing', 'missing', 'three']) + assert_series_equal(result, expected) + + def test_map_dict_subclass_without_missing(self): + class DictWithoutMissing(dict): + pass + s = Series([1, 2, 3]) + dictionary = DictWithoutMissing({3: 'three'}) + result = s.map(dictionary) + expected = Series([np.nan, np.nan, 'three']) + assert_series_equal(result, expected) + def test_map_box(self): vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' # boxed value must be Timestamp instance res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz)) @@ -257,7 +502,7 @@ def test_map_box(self): vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), pd.Timestamp('2011-01-02', tz='US/Eastern')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns, US/Eastern]') + assert s.dtype == 'datetime64[ns, US/Eastern]' res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz)) exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern']) @@ -266,7 +511,7 @@ def test_map_box(self): # timedelta vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days)) exp = pd.Series(['Timedelta_1', 'Timedelta_2']) tm.assert_series_equal(res, exp) @@ -275,7 +520,7 @@ def test_map_box(self): vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] s = pd.Series(vals) - self.assertEqual(s.dtype, 'object') + assert s.dtype == 'object' res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.freqstr)) exp = pd.Series(['Period_M', 'Period_M']) @@ -296,9 +541,9 @@ def test_map_categorical(self): result = s.map(lambda x: 'A') exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg')) tm.assert_series_equal(result, exp) - self.assertEqual(result.dtype, np.object) + assert result.dtype == np.object - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): s.map(lambda x: x, na_action='ignore') def test_map_datetimetz(self): @@ -319,7 +564,7 @@ def test_map_datetimetz(self): exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64) tm.assert_series_equal(result, exp) - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): s.map(lambda x: x, na_action='ignore') # not vectorized diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py new file mode 100644 index 0000000000000..ec0d7296e540e --- /dev/null +++ b/pandas/tests/series/test_arithmetic.py @@ -0,0 +1,868 @@ +# -*- coding: utf-8 -*- +from datetime import datetime, timedelta +import operator +from decimal import Decimal + +import numpy as np +import pytest + +from pandas import Series, Timestamp, Timedelta, Period, NaT +from pandas._libs.tslibs.period import IncompatibleFrequency + +import pandas as pd +import pandas.util.testing as tm + + +@pytest.fixture +def tdser(): + """ + Return a Series with dtype='timedelta64[ns]', including a NaT. + """ + return Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + + +# ------------------------------------------------------------------ +# Comparisons + +class TestSeriesComparison(object): + def test_compare_invalid(self): + # GH#8058 + # ops testing + a = pd.Series(np.random.randn(5), name=0) + b = pd.Series(np.random.randn(5)) + b.name = pd.Timestamp('2000-01-01') + tm.assert_series_equal(a / b, 1 / (b / a)) + + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) + def test_ser_flex_cmp_return_dtypes(self, opname): + # GH#15115 + ser = Series([1, 3, 2], index=range(3)) + const = 2 + + result = getattr(ser, opname)(const).get_dtype_counts() + tm.assert_series_equal(result, Series([1], ['bool'])) + + @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) + def test_ser_flex_cmp_return_dtypes_empty(self, opname): + # GH#15115 empty Series case + ser = Series([1, 3, 2], index=range(3)) + empty = ser.iloc[:0] + const = 2 + + result = getattr(empty, opname)(const).get_dtype_counts() + tm.assert_series_equal(result, Series([1], ['bool'])) + + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt]) + @pytest.mark.parametrize('names', [(None, None, None), + ('foo', 'bar', None), + ('baz', 'baz', 'baz')]) + def test_ser_cmp_result_names(self, names, op): + # datetime64 dtype + dti = pd.date_range('1949-06-07 03:00:00', + freq='H', periods=5, name=names[0]) + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # datetime64tz dtype + dti = dti.tz_localize('US/Central') + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # timedelta64 dtype + tdi = dti - dti.shift(1) + ser = Series(tdi).rename(names[1]) + result = op(ser, tdi) + assert result.name == names[2] + + # categorical + if op in [operator.eq, operator.ne]: + # categorical dtype comparisons raise for inequalities + cidx = tdi.astype('category') + ser = Series(cidx).rename(names[1]) + result = op(ser, cidx) + assert result.name == names[2] + + +class TestTimestampSeriesComparison(object): + def test_dt64ser_cmp_date_invalid(self): + # GH#19800 datetime.date comparison raises to + # match DatetimeIndex/Timestamp. This also matches the behavior + # of stdlib datetime.datetime + ser = pd.Series(pd.date_range('20010101', periods=10), name='dates') + date = ser.iloc[0].to_pydatetime().date() + assert not (ser == date).any() + assert (ser != date).all() + with pytest.raises(TypeError): + ser > date + with pytest.raises(TypeError): + ser < date + with pytest.raises(TypeError): + ser >= date + with pytest.raises(TypeError): + ser <= date + + def test_dt64ser_cmp_period_scalar(self): + ser = Series(pd.period_range('2000-01-01', periods=10, freq='D')) + val = Period('2000-01-04', freq='D') + result = ser > val + expected = Series([x > val for x in ser]) + tm.assert_series_equal(result, expected) + + val = ser[5] + result = ser > val + expected = Series([x > val for x in ser]) + tm.assert_series_equal(result, expected) + + def test_timestamp_compare_series(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH#4982 + ser = pd.Series(pd.date_range('20010101', periods=10), name='dates') + s_nat = ser.copy(deep=True) + + ser[0] = pd.Timestamp('nat') + ser[3] = pd.Timestamp('nat') + + ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(ser, pd.Timestamp('20010109')) + result = right_f(pd.Timestamp('20010109'), ser) + tm.assert_series_equal(result, expected) + + # nats + expected = left_f(ser, pd.Timestamp('nat')) + result = right_f(pd.Timestamp('nat'), ser) + tm.assert_series_equal(result, expected) + + # compare to timestamp with series containing nats + expected = left_f(s_nat, pd.Timestamp('20010109')) + result = right_f(pd.Timestamp('20010109'), s_nat) + tm.assert_series_equal(result, expected) + + # compare to nat with series containing nats + expected = left_f(s_nat, pd.Timestamp('nat')) + result = right_f(pd.Timestamp('nat'), s_nat) + tm.assert_series_equal(result, expected) + + def test_timestamp_equality(self): + # GH#11034 + ser = pd.Series([pd.Timestamp('2000-01-29 01:59:00'), 'NaT']) + result = ser != ser + tm.assert_series_equal(result, pd.Series([False, True])) + result = ser != ser[0] + tm.assert_series_equal(result, pd.Series([False, True])) + result = ser != ser[1] + tm.assert_series_equal(result, pd.Series([True, True])) + + result = ser == ser + tm.assert_series_equal(result, pd.Series([True, False])) + result = ser == ser[0] + tm.assert_series_equal(result, pd.Series([True, False])) + result = ser == ser[1] + tm.assert_series_equal(result, pd.Series([False, False])) + + +class TestTimedeltaSeriesComparisons(object): + def test_compare_timedelta_series(self): + # regresssion test for GH5963 + s = pd.Series([timedelta(days=1), timedelta(days=2)]) + actual = s > timedelta(days=1) + expected = pd.Series([False, True]) + tm.assert_series_equal(actual, expected) + + +class TestPeriodSeriesComparisons(object): + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_cmp_series_period_scalar(self, freq): + # GH 13200 + base = Series([Period(x, freq=freq) for x in + ['2011-01', '2011-02', '2011-03', '2011-04']]) + p = Period('2011-02', freq=freq) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base == p, exp) + tm.assert_series_equal(p == base, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base != p, exp) + tm.assert_series_equal(p != base, exp) + + exp = Series([False, False, True, True]) + tm.assert_series_equal(base > p, exp) + tm.assert_series_equal(p < base, exp) + + exp = Series([True, False, False, False]) + tm.assert_series_equal(base < p, exp) + tm.assert_series_equal(p > base, exp) + + exp = Series([False, True, True, True]) + tm.assert_series_equal(base >= p, exp) + tm.assert_series_equal(p <= base, exp) + + exp = Series([True, True, False, False]) + tm.assert_series_equal(base <= p, exp) + tm.assert_series_equal(p >= base, exp) + + # different base freq + msg = "Input has different freq=A-DEC from Period" + with tm.assert_raises_regex(IncompatibleFrequency, msg): + base <= Period('2011', freq='A') + + with tm.assert_raises_regex(IncompatibleFrequency, msg): + Period('2011', freq='A') >= base + + @pytest.mark.parametrize('freq', ['M', '2M', '3M']) + def test_cmp_series_period_series(self, freq): + # GH#13200 + base = Series([Period(x, freq=freq) for x in + ['2011-01', '2011-02', '2011-03', '2011-04']]) + + ser = Series([Period(x, freq=freq) for x in + ['2011-02', '2011-01', '2011-03', '2011-05']]) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == ser, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != ser, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > ser, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < ser, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= ser, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= ser, exp) + + ser2 = Series([Period(x, freq='A') for x in + ['2011', '2011', '2011', '2011']]) + + # different base freq + msg = "Input has different freq=A-DEC from Period" + with tm.assert_raises_regex(IncompatibleFrequency, msg): + base <= ser2 + + def test_cmp_series_period_series_mixed_freq(self): + # GH#13200 + base = Series([Period('2011', freq='A'), + Period('2011-02', freq='M'), + Period('2013', freq='A'), + Period('2011-04', freq='M')]) + + ser = Series([Period('2012', freq='A'), + Period('2011-01', freq='M'), + Period('2013', freq='A'), + Period('2011-05', freq='M')]) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == ser, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != ser, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > ser, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < ser, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= ser, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= ser, exp) + + +# ------------------------------------------------------------------ +# Arithmetic + +class TestSeriesDivision(object): + # __div__, __rdiv__, __floordiv__, __rfloordiv__ + # for non-timestamp/timedelta/period dtypes + + def test_divide_decimal(self): + # resolves issue GH#9787 + expected = Series([Decimal(5)]) + + ser = Series([Decimal(10)]) + result = ser / Decimal(2) + + tm.assert_series_equal(result, expected) + + ser = Series([Decimal(10)]) + result = ser // Decimal(2) + + tm.assert_series_equal(result, expected) + + def test_div_equiv_binop(self): + # Test Series.div as well as Series.__div__ + # float/integer issue + # GH#7785 + first = Series([1, 0], name='first') + second = Series([-0.01, -0.02], name='second') + expected = Series([-0.01, -np.inf]) + + result = second.div(first) + tm.assert_series_equal(result, expected, check_names=False) + + result = second / first + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype2', [ + np.int64, np.int32, np.int16, np.int8, + np.float64, np.float32, np.float16, + np.uint64, np.uint32, np.uint16, np.uint8]) + @pytest.mark.parametrize('dtype1', [np.int64, np.float64, np.uint64]) + def test_ser_div_ser(self, dtype1, dtype2): + # no longer do integer div for any ops, but deal with the 0's + first = Series([3, 4, 5, 8], name='first').astype(dtype1) + second = Series([0, 0, 0, 3], name='second').astype(dtype2) + + with np.errstate(all='ignore'): + expected = Series(first.values.astype(np.float64) / second.values, + dtype='float64', name=None) + expected.iloc[0:3] = np.inf + + result = first / second + tm.assert_series_equal(result, expected) + assert not result.equals(second / first) + + def test_rdiv_zero_compat(self): + # GH#8674 + zero_array = np.array([0] * 5) + data = np.random.randn(5) + expected = Series([0.] * 5) + + result = zero_array / Series(data) + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / data + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / Series(data) + tm.assert_series_equal(result, expected) + + def test_div_zero_inf_signs(self): + # GH#9144, inf signing + ser = Series([-1, 0, 1], name='first') + expected = Series([-np.inf, np.nan, np.inf], name='first') + + result = ser / 0 + tm.assert_series_equal(result, expected) + + def test_rdiv_zero(self): + # GH#9144 + ser = Series([-1, 0, 1], name='first') + expected = Series([0.0, np.nan, 0.0], name='first') + + result = 0 / ser + tm.assert_series_equal(result, expected) + + def test_floordiv_div(self): + # GH#9144 + ser = Series([-1, 0, 1], name='first') + + result = ser // 0 + expected = Series([-np.inf, np.nan, np.inf], name='first') + tm.assert_series_equal(result, expected) + + +class TestSeriesArithmetic(object): + # Standard, numeric, or otherwise not-Timestamp/Timedelta/Period dtypes + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [Timestamp('2011-01-01'), Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_radd_str_invalid(self, dtype, data): + ser = Series(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + ser + + # TODO: parametrize, better name + def test_object_ser_add_invalid(self): + # invalid ops + obj_ser = tm.makeObjectSeries() + obj_ser.name = 'objects' + with pytest.raises(Exception): + obj_ser + 1 + with pytest.raises(Exception): + obj_ser + np.array(1, dtype=np.int64) + with pytest.raises(Exception): + obj_ser - 1 + with pytest.raises(Exception): + obj_ser - np.array(1, dtype=np.int64) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_nan(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) + + result = np.nan + ser + tm.assert_series_equal(result, expected) + + result = ser + np.nan + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_int(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([2, 3, 4], dtype=dtype) + + result = 1 + ser + tm.assert_series_equal(result, expected) + + result = ser + 1 + tm.assert_series_equal(result, expected) + + def test_series_radd_str(self): + ser = pd.Series(['x', np.nan, 'x']) + tm.assert_series_equal('a' + ser, pd.Series(['ax', np.nan, 'ax'])) + tm.assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa'])) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_timedelta(self, dtype): + # note this test is _not_ aimed at timedelta64-dtyped Series + ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days')], dtype=dtype) + expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), + pd.Timedelta('6 days')]) + + result = pd.Timedelta('3 days') + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.Timedelta('3 days') + tm.assert_series_equal(result, expected) + + +class TestPeriodSeriesArithmetic(object): + def test_ops_series_timedelta(self): + # GH 13043 + ser = pd.Series([pd.Period('2015-01-01', freq='D'), + pd.Period('2015-01-02', freq='D')], name='xxx') + assert ser.dtype == object + + expected = pd.Series([pd.Period('2015-01-02', freq='D'), + pd.Period('2015-01-03', freq='D')], name='xxx') + + result = ser + pd.Timedelta('1 days') + tm.assert_series_equal(result, expected) + + result = pd.Timedelta('1 days') + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.tseries.offsets.Day() + tm.assert_series_equal(result, expected) + + result = pd.tseries.offsets.Day() + ser + tm.assert_series_equal(result, expected) + + def test_ops_series_period(self): + # GH 13043 + ser = pd.Series([pd.Period('2015-01-01', freq='D'), + pd.Period('2015-01-02', freq='D')], name='xxx') + assert ser.dtype == object + + per = pd.Period('2015-01-10', freq='D') + # dtype will be object because of original dtype + expected = pd.Series([9, 8], name='xxx', dtype=object) + tm.assert_series_equal(per - ser, expected) + tm.assert_series_equal(ser - per, -1 * expected) + + s2 = pd.Series([pd.Period('2015-01-05', freq='D'), + pd.Period('2015-01-04', freq='D')], name='xxx') + assert s2.dtype == object + + expected = pd.Series([4, 2], name='xxx', dtype=object) + tm.assert_series_equal(s2 - ser, expected) + tm.assert_series_equal(ser - s2, -1 * expected) + + +class TestTimestampSeriesArithmetic(object): + def test_timestamp_sub_series(self): + ser = pd.Series(pd.date_range('2014-03-17', periods=2, freq='D', + tz='US/Eastern')) + ts = ser[0] + + delta_series = pd.Series([np.timedelta64(0, 'D'), + np.timedelta64(1, 'D')]) + tm.assert_series_equal(ser - ts, delta_series) + tm.assert_series_equal(ts - ser, -delta_series) + + def test_dt64ser_sub_datetime_dtype(self): + ts = Timestamp(datetime(1993, 1, 7, 13, 30, 00)) + dt = datetime(1993, 6, 22, 13, 30) + ser = Series([ts]) + result = pd.to_timedelta(np.abs(ser - dt)) + assert result.dtype == 'timedelta64[ns]' + + +class TestTimedeltaSeriesAdditionSubtraction(object): + # Tests for Series[timedelta64[ns]] __add__, __sub__, __radd__, __rsub__ + + # ------------------------------------------------------------------ + # Operations with int-like others + + def test_td64series_add_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + tdser + Series([2, 3, 4]) + + @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') + def test_td64series_radd_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + Series([2, 3, 4]) + tdser + + def test_td64series_sub_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + tdser - Series([2, 3, 4]) + + @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds') + def test_td64series_rsub_int_series_invalid(self, tdser): + with pytest.raises(TypeError): + Series([2, 3, 4]) - tdser + + def test_td64_series_add_intlike(self): + # GH#19123 + tdi = pd.TimedeltaIndex(['59 days', '59 days', 'NaT']) + ser = Series(tdi) + + other = Series([20, 30, 40], dtype='uint8') + + pytest.raises(TypeError, ser.__add__, 1) + pytest.raises(TypeError, ser.__sub__, 1) + + pytest.raises(TypeError, ser.__add__, other) + pytest.raises(TypeError, ser.__sub__, other) + + pytest.raises(TypeError, ser.__add__, other.values) + pytest.raises(TypeError, ser.__sub__, other.values) + + pytest.raises(TypeError, ser.__add__, pd.Index(other)) + pytest.raises(TypeError, ser.__sub__, pd.Index(other)) + + @pytest.mark.parametrize('scalar', [1, 1.5, np.array(2)]) + def test_td64series_add_sub_numeric_scalar_invalid(self, scalar, tdser): + with pytest.raises(TypeError): + tdser + scalar + with pytest.raises(TypeError): + scalar + tdser + with pytest.raises(TypeError): + tdser - scalar + with pytest.raises(TypeError): + scalar - tdser + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [ + np.array([1, 2, 3]), + pd.Index([1, 2, 3]), + pytest.param(Series([1, 2, 3]), + marks=pytest.mark.xfail(reason='GH#19123 integer ' + 'interpreted as nanos')) + ]) + def test_td64series_add_sub_numeric_array_invalid(self, vector, + dtype, tdser): + vector = vector.astype(dtype) + with pytest.raises(TypeError): + tdser + vector + with pytest.raises(TypeError): + vector + tdser + with pytest.raises(TypeError): + tdser - vector + with pytest.raises(TypeError): + vector - tdser + + # ------------------------------------------------------------------ + # Operations with datetime-like others + + def test_td64series_add_sub_timestamp(self): + # GH#11925 + tdser = Series(pd.timedelta_range('1 day', periods=3)) + ts = Timestamp('2012-01-01') + expected = Series(pd.date_range('2012-01-02', periods=3)) + tm.assert_series_equal(ts + tdser, expected) + tm.assert_series_equal(tdser + ts, expected) + + expected2 = Series(pd.date_range('2011-12-31', periods=3, freq='-1D')) + tm.assert_series_equal(ts - tdser, expected2) + tm.assert_series_equal(ts + (-tdser), expected2) + + with pytest.raises(TypeError): + tdser - ts + + # ------------------------------------------------------------------ + # Operations with timedelta-like others (including DateOffsets) + + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_td64_series_with_tdi(self, names): + # GH#17250 make sure result dtype is correct + # GH#19043 make sure names are propagated correctly + tdi = pd.TimedeltaIndex(['0 days', '1 day'], name=names[0]) + ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) + expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)], + name=names[2]) + + result = tdi + ser + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + result = ser + tdi + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + expected = Series([Timedelta(hours=-3), Timedelta(days=1, hours=-4)], + name=names[2]) + + result = tdi - ser + tm.assert_series_equal(result, expected) + assert result.dtype == 'timedelta64[ns]' + + result = ser - tdi + tm.assert_series_equal(result, -expected) + assert result.dtype == 'timedelta64[ns]' + + def test_td64_sub_NaT(self): + # GH#18808 + ser = Series([NaT, Timedelta('1s')]) + res = ser - NaT + expected = Series([NaT, NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + + +class TestTimedeltaSeriesMultiplicationDivision(object): + # Tests for Series[timedelta64[ns]] + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + + # ------------------------------------------------------------------ + # __floordiv__, __rfloordiv__ + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_timedelta_floordiv(self, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + result = td1 // scalar_td + expected = Series([0, 0, np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_timedelta_rfloordiv(self, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + result = scalar_td // td1 + expected = Series([1, 1, np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_timedelta_rfloordiv_explicit(self, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # We can test __rfloordiv__ using this syntax, + # see `test_timedelta_rfloordiv` + result = td1.__rfloordiv__(scalar_td) + expected = Series([1, 1, np.nan]) + tm.assert_series_equal(result, expected) + + # ------------------------------------------------------------------ + # Operations with int-like others + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])]) + def test_td64series_div_numeric_array(self, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + expected = Series(['2.95D', '1D 23H 12m', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser / vector + tm.assert_series_equal(result, expected) + + with pytest.raises(TypeError): + vector / tdser + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [np.array([20, 30, 40]), + pd.Index([20, 30, 40]), + Series([20, 30, 40])]) + def test_td64series_mul_numeric_array(self, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + + expected = Series(['1180 Days', '1770 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser * vector + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', + 'uint64', 'uint32', 'uint16', 'uint8', + 'float64', 'float32', 'float16']) + @pytest.mark.parametrize('vector', [ + np.array([20, 30, 40]), + pytest.param(pd.Index([20, 30, 40]), + marks=pytest.mark.xfail(reason='__mul__ raises ' + 'instead of returning ' + 'NotImplemented')), + Series([20, 30, 40]) + ]) + def test_td64series_rmul_numeric_array(self, vector, dtype, tdser): + # GH#4521 + # divide/multiply by integers + vector = vector.astype(dtype) + + expected = Series(['1180 Days', '1770 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = vector * tdser + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)]) + def test_td64series_mul_numeric_scalar(self, one, tdser): + # GH#4521 + # divide/multiply by integers + expected = Series(['-59 Days', '-59 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser * (-one) + tm.assert_series_equal(result, expected) + result = (-one) * tdser + tm.assert_series_equal(result, expected) + + expected = Series(['118 Days', '118 Days', 'NaT'], + dtype='timedelta64[ns]') + + result = tdser * (2 * one) + tm.assert_series_equal(result, expected) + result = (2 * one) * tdser + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('two', [ + 2, 2.0, + pytest.param(np.array(2), + marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' + 'incorrectly True.')), + pytest.param(np.array(2.0), + marks=pytest.mark.xfail(reason='GH#19011 is_list_like ' + 'incorrectly True.')), + ]) + def test_td64series_div_numeric_scalar(self, two, tdser): + # GH#4521 + # divide/multiply by integers + expected = Series(['29.5D', '29.5D', 'NaT'], dtype='timedelta64[ns]') + + result = tdser / two + tm.assert_series_equal(result, expected) + + # ------------------------------------------------------------------ + # Operations with timedelta-like others + + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_tdi_mul_int_series(self, names): + # GH#19042 + tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], + name=names[0]) + ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) + + expected = Series(['0days', '1day', '4days', '9days', '16days'], + dtype='timedelta64[ns]', + name=names[2]) + + result = ser * tdi + tm.assert_series_equal(result, expected) + + # The direct operation tdi * ser still needs to be fixed. + result = ser.__rmul__(tdi) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('names', [(None, None, None), + ('Egon', 'Venkman', None), + ('NCC1701D', 'NCC1701D', 'NCC1701D')]) + def test_float_series_rdiv_tdi(self, names): + # GH#19042 + # TODO: the direct operation TimedeltaIndex / Series still + # needs to be fixed. + tdi = pd.TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], + name=names[0]) + ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) + + expected = Series([tdi[n] / ser[n] for n in range(len(ser))], + dtype='timedelta64[ns]', + name=names[2]) + + result = ser.__rdiv__(tdi) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_td64series_mul_timedeltalike_invalid(self, scalar_td): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = 'operate|unsupported|cannot|not supported' + with tm.assert_raises_regex(TypeError, pattern): + td1 * scalar_td + with tm.assert_raises_regex(TypeError, pattern): + scalar_td * td1 + + +class TestTimedeltaSeriesInvalidArithmeticOps(object): + @pytest.mark.parametrize('scalar_td', [ + timedelta(minutes=5, seconds=4), + Timedelta('5m4s'), + Timedelta('5m4s').to_timedelta64()]) + def test_td64series_pow_invalid(self, scalar_td): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = 'operate|unsupported|cannot|not supported' + with tm.assert_raises_regex(TypeError, pattern): + scalar_td ** td1 + with tm.assert_raises_regex(TypeError, pattern): + td1 ** scalar_td diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index d2fd8858e7647..3104d85601434 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -1,15 +1,17 @@ # coding=utf-8 +import pytest + import numpy as np -from pandas import (offsets, Series, notnull, - isnull, date_range, Timestamp) +from pandas import (offsets, Series, notna, + isna, date_range, Timestamp) import pandas.util.testing as tm from .common import TestData -class TestSeriesAsof(TestData, tm.TestCase): +class TestSeriesAsof(TestData): def test_basic(self): @@ -21,21 +23,21 @@ def test_basic(self): dates = date_range('1/1/1990', periods=N * 3, freq='25s') result = ts.asof(dates) - self.assertTrue(notnull(result).all()) + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] result = ts.asof(list(dates)) - self.assertTrue(notnull(result).all()) + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] mask = (result.index >= lb) & (result.index < ub) rs = result[mask] - self.assertTrue((rs == ts[lb]).all()) + assert (rs == ts[lb]).all() val = result[result.index[result.index >= ub][0]] - self.assertEqual(ts[ub], val) + assert ts[ub] == val def test_scalar(self): @@ -48,20 +50,20 @@ def test_scalar(self): val1 = ts.asof(ts.index[7]) val2 = ts.asof(ts.index[19]) - self.assertEqual(val1, ts[4]) - self.assertEqual(val2, ts[14]) + assert val1 == ts[4] + assert val2 == ts[14] # accepts strings val1 = ts.asof(str(ts.index[7])) - self.assertEqual(val1, ts[4]) + assert val1 == ts[4] # in there result = ts.asof(ts.index[3]) - self.assertEqual(result, ts[3]) + assert result == ts[3] # no as of value d = ts.index[0] - offsets.BDay() - self.assertTrue(np.isnan(ts.asof(d))) + assert np.isnan(ts.asof(d)) def test_with_nan(self): # basic asof test @@ -96,19 +98,19 @@ def test_periodindex(self): dates = date_range('1/1/1990', periods=N * 3, freq='37min') result = ts.asof(dates) - self.assertTrue(notnull(result).all()) + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] result = ts.asof(list(dates)) - self.assertTrue(notnull(result).all()) + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] pix = PeriodIndex(result.index.values, freq='H') mask = (pix >= lb) & (pix < ub) rs = result[mask] - self.assertTrue((rs == ts[lb]).all()) + assert (rs == ts[lb]).all() ts[5:10] = np.nan ts[15:20] = np.nan @@ -116,19 +118,19 @@ def test_periodindex(self): val1 = ts.asof(ts.index[7]) val2 = ts.asof(ts.index[19]) - self.assertEqual(val1, ts[4]) - self.assertEqual(val2, ts[14]) + assert val1 == ts[4] + assert val2 == ts[14] # accepts strings val1 = ts.asof(str(ts.index[7])) - self.assertEqual(val1, ts[4]) + assert val1 == ts[4] # in there - self.assertEqual(ts.asof(ts.index[3]), ts[3]) + assert ts.asof(ts.index[3]) == ts[3] # no as of value d = ts.index[0].to_timestamp() - offsets.BDay() - self.assertTrue(isnull(ts.asof(d))) + assert isna(ts.asof(d)) def test_errors(self): @@ -138,13 +140,39 @@ def test_errors(self): Timestamp('20130102')]) # non-monotonic - self.assertFalse(s.index.is_monotonic) - with self.assertRaises(ValueError): + assert not s.index.is_monotonic + with pytest.raises(ValueError): s.asof(s.index[0]) # subset with Series N = 10 rng = date_range('1/1/1990', periods=N, freq='53s') s = Series(np.random.randn(N), index=rng) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): s.asof(s.index[0], subset='foo') + + def test_all_nans(self): + # GH 15713 + # series is all nans + result = Series([np.nan]).asof([0]) + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + # testing non-default indexes + N = 50 + rng = date_range('1/1/1990', periods=N, freq='53s') + + dates = date_range('1/1/1990', periods=N * 3, freq='25s') + result = Series(np.nan, index=rng).asof(dates) + expected = Series(np.nan, index=dates) + tm.assert_series_equal(result, expected) + + # testing scalar input + date = date_range('1/1/1990', periods=N * 3, freq='25s')[0] + result = Series(np.nan, index=rng).asof(date) + assert isna(result) + + # test name is propagated + result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5]) + expected = Series(np.nan, index=[4, 5], name='test') + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index d4e5d36c15c68..6cf60e818c845 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime from numpy import nan @@ -16,20 +18,20 @@ from .common import TestData -class TestSeriesCombine(TestData, tm.TestCase): +class TestSeriesCombine(TestData): def test_append(self): appendedSeries = self.series.append(self.objSeries) for idx, value in compat.iteritems(appendedSeries): if idx in self.series.index: - self.assertEqual(value, self.series[idx]) + assert value == self.series[idx] elif idx in self.objSeries.index: - self.assertEqual(value, self.objSeries[idx]) + assert value == self.objSeries[idx] else: self.fail("orphaned index!") - self.assertRaises(ValueError, self.ts.append, self.ts, - verify_integrity=True) + pytest.raises(ValueError, self.ts.append, self.ts, + verify_integrity=True) def test_append_many(self): pieces = [self.ts[:5], self.ts[5:10], self.ts[10:]] @@ -53,9 +55,9 @@ def test_append_duplicates(self): exp, check_index_type=True) msg = 'Indexes have overlapping values:' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s1.append(s2, verify_integrity=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): pd.concat([s1, s2], verify_integrity=True) def test_combine_first(self): @@ -68,14 +70,14 @@ def test_combine_first(self): # nothing used from the input combined = series.combine_first(series_copy) - self.assert_series_equal(combined, series) + tm.assert_series_equal(combined, series) # Holes filled from input combined = series_copy.combine_first(series) - self.assertTrue(np.isfinite(combined).all()) + assert np.isfinite(combined).all() - self.assert_series_equal(combined[::2], series[::2]) - self.assert_series_equal(combined[1::2], series_copy[1::2]) + tm.assert_series_equal(combined[::2], series[::2]) + tm.assert_series_equal(combined[1::2], series_copy[1::2]) # mixed types index = tm.makeStringIndex(20) @@ -115,9 +117,9 @@ def test_concat_empty_series_dtypes_roundtrips(self): 'M8[ns]']) for dtype in dtypes: - self.assertEqual(pd.concat([Series(dtype=dtype)]).dtype, dtype) - self.assertEqual(pd.concat([Series(dtype=dtype), - Series(dtype=dtype)]).dtype, dtype) + assert pd.concat([Series(dtype=dtype)]).dtype == dtype + assert pd.concat([Series(dtype=dtype), + Series(dtype=dtype)]).dtype == dtype def int_result_type(dtype, dtype2): typs = set([dtype.kind, dtype2.kind]) @@ -153,58 +155,56 @@ def get_result_type(dtype, dtype2): expected = get_result_type(dtype, dtype2) result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2) ]).dtype - self.assertEqual(result.kind, expected) + assert result.kind == expected def test_concat_empty_series_dtypes(self): - # bools - self.assertEqual(pd.concat([Series(dtype=np.bool_), - Series(dtype=np.int32)]).dtype, np.int32) - self.assertEqual(pd.concat([Series(dtype=np.bool_), - Series(dtype=np.float32)]).dtype, - np.object_) - - # datetimelike - self.assertEqual(pd.concat([Series(dtype='m8[ns]'), - Series(dtype=np.bool)]).dtype, np.object_) - self.assertEqual(pd.concat([Series(dtype='m8[ns]'), - Series(dtype=np.int64)]).dtype, np.object_) - self.assertEqual(pd.concat([Series(dtype='M8[ns]'), - Series(dtype=np.bool)]).dtype, np.object_) - self.assertEqual(pd.concat([Series(dtype='M8[ns]'), - Series(dtype=np.int64)]).dtype, np.object_) - self.assertEqual(pd.concat([Series(dtype='M8[ns]'), - Series(dtype=np.bool_), - Series(dtype=np.int64)]).dtype, np.object_) + # booleans + assert pd.concat([Series(dtype=np.bool_), + Series(dtype=np.int32)]).dtype == np.int32 + assert pd.concat([Series(dtype=np.bool_), + Series(dtype=np.float32)]).dtype == np.object_ + + # datetime-like + assert pd.concat([Series(dtype='m8[ns]'), + Series(dtype=np.bool)]).dtype == np.object_ + assert pd.concat([Series(dtype='m8[ns]'), + Series(dtype=np.int64)]).dtype == np.object_ + assert pd.concat([Series(dtype='M8[ns]'), + Series(dtype=np.bool)]).dtype == np.object_ + assert pd.concat([Series(dtype='M8[ns]'), + Series(dtype=np.int64)]).dtype == np.object_ + assert pd.concat([Series(dtype='M8[ns]'), + Series(dtype=np.bool_), + Series(dtype=np.int64)]).dtype == np.object_ # categorical - self.assertEqual(pd.concat([Series(dtype='category'), - Series(dtype='category')]).dtype, - 'category') - self.assertEqual(pd.concat([Series(dtype='category'), - Series(dtype='float64')]).dtype, - 'float64') - self.assertEqual(pd.concat([Series(dtype='category'), - Series(dtype='object')]).dtype, 'object') + assert pd.concat([Series(dtype='category'), + Series(dtype='category')]).dtype == 'category' + # GH 18515 + assert pd.concat([Series(np.array([]), dtype='category'), + Series(dtype='float64')]).dtype == 'float64' + assert pd.concat([Series(dtype='category'), + Series(dtype='object')]).dtype == 'object' # sparse result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='float64').to_sparse()]) - self.assertEqual(result.dtype, np.float64) - self.assertEqual(result.ftype, 'float64:sparse') + assert result.dtype == np.float64 + assert result.ftype == 'float64:sparse' result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='float64')]) - self.assertEqual(result.dtype, np.float64) - self.assertEqual(result.ftype, 'float64:sparse') + assert result.dtype == np.float64 + assert result.ftype == 'float64:sparse' result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='object')]) - self.assertEqual(result.dtype, np.object_) - self.assertEqual(result.ftype, 'object:dense') + assert result.dtype == np.object_ + assert result.ftype == 'object:dense' def test_combine_first_dt64(self): - from pandas.tseries.tools import to_datetime + from pandas.core.tools.datetimes import to_datetime s0 = to_datetime(Series(["2010", np.NaN])) s1 = to_datetime(Series([np.NaN, "2011"])) rs = s0.combine_first(s1) @@ -218,7 +218,7 @@ def test_combine_first_dt64(self): assert_series_equal(rs, xp) -class TestTimeseries(tm.TestCase): +class TestTimeseries(object): def test_append_concat(self): rng = date_range('5/8/2012 1:45', periods=10, freq='5T') @@ -243,13 +243,11 @@ def test_append_concat(self): rng2 = rng.copy() rng1.name = 'foo' rng2.name = 'bar' - self.assertEqual(rng1.append(rng1).name, 'foo') - self.assertIsNone(rng1.append(rng2).name) + assert rng1.append(rng1).name == 'foo' + assert rng1.append(rng2).name is None def test_append_concat_tz(self): - # GH 2938 - tm._skip_if_no_pytz() - + # see gh-2938 rng = date_range('5/8/2012 1:45', periods=10, freq='5T', tz='US/Eastern') rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', @@ -270,8 +268,7 @@ def test_append_concat_tz(self): tm.assert_index_equal(appended, rng3) def test_append_concat_tz_explicit_pytz(self): - # GH 2938 - tm._skip_if_no_pytz() + # see gh-2938 from pytz import timezone as timezone rng = date_range('5/8/2012 1:45', periods=10, freq='5T', @@ -294,8 +291,7 @@ def test_append_concat_tz_explicit_pytz(self): tm.assert_index_equal(appended, rng3) def test_append_concat_tz_dateutil(self): - # GH 2938 - tm._skip_if_no_dateutil() + # see gh-2938 rng = date_range('5/8/2012 1:45', periods=10, freq='5T', tz='dateutil/US/Eastern') rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index aef4c9269bc62..e0bfe41645a3f 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1,98 +1,121 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime, timedelta +from collections import OrderedDict from numpy import nan import numpy as np import numpy.ma as ma import pandas as pd -from pandas.types.common import is_categorical_dtype, is_datetime64tz_dtype -from pandas import (Index, Series, isnull, date_range, - period_range, NaT) -from pandas.core.index import MultiIndex -from pandas.tseries.index import Timestamp, DatetimeIndex +from pandas.api.types import CategoricalDtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64tz_dtype) +from pandas import (Index, Series, isna, date_range, Timestamp, + NaT, period_range, timedelta_range, MultiIndex, + IntervalIndex, Categorical, DataFrame) -from pandas import lib, tslib +from pandas._libs import lib +from pandas._libs.tslib import iNaT -from pandas.compat import lrange, range, zip, OrderedDict, long -from pandas import compat +from pandas.compat import lrange, range, zip, long, PY36 from pandas.util.testing import assert_series_equal import pandas.util.testing as tm from .common import TestData -class TestSeriesConstructors(TestData, tm.TestCase): +class TestSeriesConstructors(TestData): + + def test_invalid_dtype(self): + # GH15520 + msg = 'not understood' + invalid_list = [pd.Timestamp, 'pd.Timestamp', list] + for dtype in invalid_list: + with tm.assert_raises_regex(TypeError, msg): + Series([], name='time', dtype=dtype) def test_scalar_conversion(self): # Pass in scalar is disabled scalar = Series(0.5) - self.assertNotIsInstance(scalar, float) - - # coercion - self.assertEqual(float(Series([1.])), 1.0) - self.assertEqual(int(Series([1.])), 1) - self.assertEqual(long(Series([1.])), 1) - - def test_TimeSeries_deprecation(self): + assert not isinstance(scalar, float) - # deprecation TimeSeries, #10890 - with tm.assert_produces_warning(FutureWarning): - pd.TimeSeries(1, index=date_range('20130101', periods=3)) + # Coercion + assert float(Series([1.])) == 1.0 + assert int(Series([1.])) == 1 + assert long(Series([1.])) == 1 def test_constructor(self): - # Recognize TimeSeries - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(self.ts.is_time_series) - self.assertTrue(self.ts.index.is_all_dates) + assert self.ts.index.is_all_dates # Pass in Series derived = Series(self.ts) - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(derived.is_time_series) - self.assertTrue(derived.index.is_all_dates) + assert derived.index.is_all_dates - self.assertTrue(tm.equalContents(derived.index, self.ts.index)) + assert tm.equalContents(derived.index, self.ts.index) # Ensure new index is not created - self.assertEqual(id(self.ts.index), id(derived.index)) + assert id(self.ts.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) - self.assertEqual(mixed.dtype, np.object_) - self.assertIs(mixed[1], np.NaN) + assert mixed.dtype == np.object_ + assert mixed[1] is np.NaN - with tm.assert_produces_warning(FutureWarning): - self.assertFalse(self.empty.is_time_series) - self.assertFalse(self.empty.index.is_all_dates) - with tm.assert_produces_warning(FutureWarning): - self.assertFalse(Series({}).is_time_series) - self.assertFalse(Series({}).index.is_all_dates) - self.assertRaises(Exception, Series, np.random.randn(3, 3), - index=np.arange(3)) + assert not self.empty.index.is_all_dates + assert not Series({}).index.is_all_dates + pytest.raises(Exception, Series, np.random.randn(3, 3), + index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' - self.assertEqual(rs, xp) + assert rs == xp # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) - self.assertRaises(NotImplementedError, Series, m) + pytest.raises(NotImplementedError, Series, m) - def test_constructor_empty(self): + @pytest.mark.parametrize('input_class', [list, dict, OrderedDict]) + def test_constructor_empty(self, input_class): empty = Series() - empty2 = Series([]) + empty2 = Series(input_class()) - # the are Index() and RangeIndex() which don't compare type equal + # these are Index() and RangeIndex() which don't compare type equal # but are just .equals assert_series_equal(empty, empty2, check_index_type=False) - empty = Series(index=lrange(10)) - empty2 = Series(np.nan, index=lrange(10)) - assert_series_equal(empty, empty2) + # With explicit dtype: + empty = Series(dtype='float64') + empty2 = Series(input_class(), dtype='float64') + assert_series_equal(empty, empty2, check_index_type=False) + + # GH 18515 : with dtype=category: + empty = Series(dtype='category') + empty2 = Series(input_class(), dtype='category') + assert_series_equal(empty, empty2, check_index_type=False) + + if input_class is not list: + # With index: + empty = Series(index=lrange(10)) + empty2 = Series(input_class(), index=lrange(10)) + assert_series_equal(empty, empty2) + + # With index and dtype float64: + empty = Series(np.nan, index=lrange(10)) + empty2 = Series(input_class(), index=lrange(10), dtype='float64') + assert_series_equal(empty, empty2) + + @pytest.mark.parametrize('input_arg', [np.nan, float('nan')]) + def test_constructor_nan(self, input_arg): + empty = Series(dtype='float64', index=lrange(10)) + empty2 = Series(input_arg, index=lrange(10)) + + assert_series_equal(empty, empty2, check_index_type=False) def test_constructor_series(self): index1 = ['d', 'b', 'a', 'c'] @@ -119,6 +142,25 @@ def test_constructor_list_like(self): result = Series(obj, index=[0, 1, 2]) assert_series_equal(result, expected) + @pytest.mark.parametrize('input_vals', [ + ([1, 2]), + ([1.0, 2.0, np.nan]), + (['1', '2']), + (list(pd.date_range('1/1/2011', periods=2, freq='H'))), + (list(pd.date_range('1/1/2011', periods=2, freq='H', + tz='US/Eastern'))), + ([pd.Interval(left=0, right=5)]), + ]) + def test_constructor_list_str(self, input_vals): + # GH 16605 + # Ensure that data elements from a list are converted to strings + # when dtype is str, 'str', or 'U' + + for dtype in ['str', str, 'U']: + result = Series(input_vals, dtype=dtype) + expected = Series(input_vals).astype(dtype) + assert_series_equal(result, expected) + def test_constructor_generator(self): gen = (i for i in range(10)) @@ -151,15 +193,130 @@ def test_constructor_categorical(self): tm.assert_categorical_equal(res.values, cat) # GH12574 - self.assertRaises( + pytest.raises( ValueError, lambda: Series(pd.Categorical([1, 2, 3]), dtype='int64')) cat = Series(pd.Categorical([1, 2, 3]), dtype='category') - self.assertTrue(is_categorical_dtype(cat)) - self.assertTrue(is_categorical_dtype(cat.dtype)) + assert is_categorical_dtype(cat) + assert is_categorical_dtype(cat.dtype) s = Series([1, 2, 3], dtype='category') - self.assertTrue(is_categorical_dtype(s)) - self.assertTrue(is_categorical_dtype(s.dtype)) + assert is_categorical_dtype(s) + assert is_categorical_dtype(s.dtype) + + def test_constructor_categorical_with_coercion(self): + factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) + # test basic creation / coercion of categoricals + s = Series(factor, name='A') + assert s.dtype == 'category' + assert len(s) == len(factor) + str(s.values) + str(s) + + # in a frame + df = DataFrame({'A': factor}) + result = df['A'] + tm.assert_series_equal(result, s) + result = df.iloc[:, 0] + tm.assert_series_equal(result, s) + assert len(df) == len(factor) + str(df.values) + str(df) + + df = DataFrame({'A': s}) + result = df['A'] + tm.assert_series_equal(result, s) + assert len(df) == len(factor) + str(df.values) + str(df) + + # multiples + df = DataFrame({'A': s, 'B': s, 'C': 1}) + result1 = df['A'] + result2 = df['B'] + tm.assert_series_equal(result1, s) + tm.assert_series_equal(result2, s, check_names=False) + assert result2.name == 'B' + assert len(df) == len(factor) + str(df.values) + str(df) + + # GH8623 + x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], + [1, 'John P. Doe']], + columns=['person_id', 'person_name']) + x['person_name'] = Categorical(x.person_name + ) # doing this breaks transform + + expected = x.iloc[0].person_name + result = x.person_name.iloc[0] + assert result == expected + + result = x.person_name[0] + assert result == expected + + result = x.person_name.loc[0] + assert result == expected + + def test_constructor_categorical_dtype(self): + result = pd.Series(['a', 'b'], + dtype=CategoricalDtype(['a', 'b', 'c'], + ordered=True)) + assert is_categorical_dtype(result) is True + tm.assert_index_equal(result.cat.categories, pd.Index(['a', 'b', 'c'])) + assert result.cat.ordered + + result = pd.Series(['a', 'b'], dtype=CategoricalDtype(['b', 'a'])) + assert is_categorical_dtype(result) + tm.assert_index_equal(result.cat.categories, pd.Index(['b', 'a'])) + assert result.cat.ordered is False + + # GH 19565 - Check broadcasting of scalar with Categorical dtype + result = Series('a', index=[0, 1], + dtype=CategoricalDtype(['a', 'b'], ordered=True)) + expected = Series(['a', 'a'], index=[0, 1], + dtype=CategoricalDtype(['a', 'b'], ordered=True)) + tm.assert_series_equal(result, expected, check_categorical=True) + + def test_categorical_sideeffects_free(self): + # Passing a categorical to a Series and then changing values in either + # the series or the categorical should not change the values in the + # other one, IF you specify copy! + cat = Categorical(["a", "b", "c", "a"]) + s = Series(cat, copy=True) + assert s.cat is not cat + s.cat.categories = [1, 2, 3] + exp_s = np.array([1, 2, 3, 1], dtype=np.int64) + exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(s.__array__(), exp_s) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + # setting + s[0] = 2 + exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s2) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + # however, copy is False by default + # so this WILL change values + cat = Categorical(["a", "b", "c", "a"]) + s = Series(cat) + assert s.values is cat + s.cat.categories = [1, 2, 3] + exp_s = np.array([1, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s) + tm.assert_numpy_array_equal(cat.__array__(), exp_s) + + s[0] = 2 + exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s2) + tm.assert_numpy_array_equal(cat.__array__(), exp_s2) + + def test_unordered_compare_equal(self): + left = pd.Series(['a', 'b', 'c'], + dtype=CategoricalDtype(['a', 'b'])) + right = pd.Series(pd.Categorical(['a', 'b', np.nan], + categories=['a', 'b'])) + tm.assert_series_equal(left, right) def test_constructor_maskedarray(self): data = ma.masked_all((3, ), dtype=float) @@ -215,14 +372,14 @@ def test_constructor_maskedarray(self): data = ma.masked_all((3, ), dtype='M8[ns]') result = Series(data) - expected = Series([tslib.iNaT, tslib.iNaT, tslib.iNaT], dtype='M8[ns]') + expected = Series([iNaT, iNaT, iNaT], dtype='M8[ns]') assert_series_equal(result, expected) data[0] = datetime(2001, 1, 1) data[2] = datetime(2001, 1, 3) index = ['a', 'b', 'c'] result = Series(data, index=index) - expected = Series([datetime(2001, 1, 1), tslib.iNaT, + expected = Series([datetime(2001, 1, 1), iNaT, datetime(2001, 1, 3)], index=index, dtype='M8[ns]') assert_series_equal(result, expected) @@ -234,27 +391,55 @@ def test_constructor_maskedarray(self): def test_series_ctor_plus_datetimeindex(self): rng = date_range('20090415', '20090519', freq='B') - data = dict((k, 1) for k in rng) + data = {k: 1 for k in rng} result = Series(data, index=rng) - self.assertIs(result.index, rng) + assert result.index is rng def test_constructor_default_index(self): s = Series([0, 1, 2]) tm.assert_index_equal(s.index, pd.Index(np.arange(3))) + @pytest.mark.parametrize('input', [[1, 2, 3], + (1, 2, 3), + list(range(3)), + pd.Categorical(['a', 'b', 'a']), + (i for i in range(3)), + map(lambda x: x, range(3))]) + def test_constructor_index_mismatch(self, input): + # GH 19342 + # test that construction of a Series with an index of different length + # raises an error + msg = 'Length of passed values is 3, index implies 4' + with pytest.raises(ValueError, message=msg): + Series(input, index=np.arange(4)) + + def test_constructor_numpy_scalar(self): + # GH 19342 + # construction with a numpy scalar + # should not raise + result = Series(np.array(100), index=np.arange(4), dtype='int64') + expected = Series(100, index=np.arange(4), dtype='int64') + tm.assert_series_equal(result, expected) + + def test_constructor_broadcast_list(self): + # GH 19342 + # construction with single-element container and index + # should raise + pytest.raises(ValueError, Series, ['foo'], index=['a', 'b', 'c']) + def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] s = Series(objs, index=[0, 1]) - tm.assertIsInstance(s, Series) + assert isinstance(s, Series) def test_constructor_sanitize(self): s = Series(np.array([1., 1., 8.]), dtype='i8') - self.assertEqual(s.dtype, np.dtype('i8')) + assert s.dtype == np.dtype('i8') s = Series(np.array([1., 1., np.nan]), copy=True, dtype='i8') - self.assertEqual(s.dtype, np.dtype('f8')) + assert s.dtype == np.dtype('f8') def test_constructor_copy(self): # GH15125 @@ -268,16 +453,35 @@ def test_constructor_copy(self): # changes to origin of copy does not affect the copy x[0] = 2. - self.assertFalse(x.equals(y)) - self.assertEqual(x[0], 2.) - self.assertEqual(y[0], 1.) + assert not x.equals(y) + assert x[0] == 2. + assert y[0] == 1. + + @pytest.mark.parametrize( + "index", + [ + pd.date_range('20170101', periods=3, tz='US/Eastern'), + pd.date_range('20170101', periods=3), + pd.timedelta_range('1 day', periods=3), + pd.period_range('2012Q1', periods=3, freq='Q'), + pd.Index(list('abc')), + pd.Int64Index([1, 2, 3]), + pd.RangeIndex(0, 3)], + ids=lambda x: type(x).__name__) + def test_constructor_limit_copies(self, index): + # GH 17449 + # limit copies of input + s = pd.Series(index) + + # we make 1 copy; this is just a smoke test here + assert s._data.blocks[0].values is not index def test_constructor_pass_none(self): s = Series(None, index=lrange(5)) - self.assertEqual(s.dtype, np.float64) + assert s.dtype == np.float64 s = Series(None, index=lrange(5), dtype=object) - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ # GH 7431 # inference on the index @@ -288,12 +492,12 @@ def test_constructor_pass_none(self): def test_constructor_pass_nan_nat(self): # GH 13467 exp = Series([np.nan, np.nan], dtype=np.float64) - self.assertEqual(exp.dtype, np.float64) + assert exp.dtype == np.float64 tm.assert_series_equal(Series([np.nan, np.nan]), exp) tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp) exp = Series([pd.NaT, pd.NaT]) - self.assertEqual(exp.dtype, 'datetime64[ns]') + assert exp.dtype == 'datetime64[ns]' tm.assert_series_equal(Series([pd.NaT, pd.NaT]), exp) tm.assert_series_equal(Series(np.array([pd.NaT, pd.NaT])), exp) @@ -304,7 +508,7 @@ def test_constructor_pass_nan_nat(self): tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) def test_constructor_cast(self): - self.assertRaises(ValueError, Series, ['a', 'b', 'c'], dtype=float) + pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float) def test_constructor_dtype_nocast(self): # 1572 @@ -313,17 +517,17 @@ def test_constructor_dtype_nocast(self): s2 = Series(s, dtype=np.int64) s2[1] = 5 - self.assertEqual(s[1], 5) + assert s[1] == 5 def test_constructor_datelike_coercion(self): # GH 9477 - # incorrectly infering on dateimelike looking when object dtype is + # incorrectly inferring on dateimelike looking when object dtype is # specified s = Series([Timestamp('20130101'), 'NOV'], dtype=object) - self.assertEqual(s.iloc[0], Timestamp('20130101')) - self.assertEqual(s.iloc[1], 'NOV') - self.assertTrue(s.dtype == object) + assert s.iloc[0] == Timestamp('20130101') + assert s.iloc[1] == 'NOV' + assert s.dtype == object # the dtype was being reset on the slicing and re-inferred to datetime # even thought the blocks are mixed @@ -337,31 +541,38 @@ def test_constructor_datelike_coercion(self): 'mat': mat}, index=belly) result = df.loc['3T19'] - self.assertTrue(result.dtype == object) + assert result.dtype == object result = df.loc['216'] - self.assertTrue(result.dtype == object) + assert result.dtype == object + + def test_constructor_datetimes_with_nulls(self): + # gh-15869 + for arr in [np.array([None, None, None, None, + datetime.now(), None]), + np.array([None, None, datetime.now(), None])]: + result = Series(arr) + assert result.dtype == 'M8[ns]' def test_constructor_dtype_datetime64(self): - import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) - self.assertTrue(isnull(s).all()) + s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) + assert isna(s).all() # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous - s = Series(tslib.iNaT, index=lrange(5)) - self.assertFalse(isnull(s).all()) + s = Series(iNaT, index=lrange(5)) + assert not isna(s).all() s = Series(nan, dtype='M8[ns]', index=lrange(5)) - self.assertTrue(isnull(s).all()) + assert isna(s).all() - s = Series([datetime(2001, 1, 2, 0, 0), tslib.iNaT], dtype='M8[ns]') - self.assertTrue(isnull(s[1])) - self.assertEqual(s.dtype, 'M8[ns]') + s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') + assert isna(s[1]) + assert s.dtype == 'M8[ns]' s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') - self.assertTrue(isnull(s[1])) - self.assertEqual(s.dtype, 'M8[ns]') + assert isna(s[1]) + assert s.dtype == 'M8[ns]' # GH3416 dates = [ @@ -371,32 +582,28 @@ def test_constructor_dtype_datetime64(self): ] s = Series(dates) - self.assertEqual(s.dtype, 'M8[ns]') + assert s.dtype == 'M8[ns]' s.iloc[0] = np.nan - self.assertEqual(s.dtype, 'M8[ns]') - - # invalid astypes - for t in ['s', 'D', 'us', 'ms']: - self.assertRaises(TypeError, s.astype, 'M8[%s]' % t) + assert s.dtype == 'M8[ns]' # GH3414 related - self.assertRaises(TypeError, lambda x: Series( + pytest.raises(TypeError, lambda x: Series( Series(dates).astype('int') / 1000000, dtype='M8[ms]')) - self.assertRaises(TypeError, - lambda x: Series(dates, dtype='datetime64')) + pytest.raises(TypeError, + lambda x: Series(dates, dtype='datetime64')) # invalid dates can be help as object result = Series([datetime(2, 1, 1)]) - self.assertEqual(result[0], datetime(2, 1, 1, 0, 0)) + assert result[0] == datetime(2, 1, 1, 0, 0) result = Series([datetime(3000, 1, 1)]) - self.assertEqual(result[0], datetime(3000, 1, 1, 0, 0)) + assert result[0] == datetime(3000, 1, 1, 0, 0) # don't mix types result = Series([Timestamp('20130101'), 1], index=['a', 'b']) - self.assertEqual(result['a'], Timestamp('20130101')) - self.assertEqual(result['b'], 1) + assert result['a'] == Timestamp('20130101') + assert result['b'] == 1 # GH6529 # coerce datetime64 non-ns properly @@ -421,45 +628,45 @@ def test_constructor_dtype_datetime64(self): dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object) series1 = Series(dates2, dates) - self.assert_numpy_array_equal(series1.values, dates2) - self.assertEqual(series1.dtype, object) + tm.assert_numpy_array_equal(series1.values, dates2) + assert series1.dtype == object # these will correctly infer a datetime s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype, 'datetime64[ns]') + assert s.dtype == 'datetime64[ns]' # tz-aware (UTC and other tz's) # GH 8411 dr = date_range('20130101', periods=3) - self.assertTrue(Series(dr).iloc[0].tz is None) + assert Series(dr).iloc[0].tz is None dr = date_range('20130101', periods=3, tz='UTC') - self.assertTrue(str(Series(dr).iloc[0].tz) == 'UTC') + assert str(Series(dr).iloc[0].tz) == 'UTC' dr = date_range('20130101', periods=3, tz='US/Eastern') - self.assertTrue(str(Series(dr).iloc[0].tz) == 'US/Eastern') + assert str(Series(dr).iloc[0].tz) == 'US/Eastern' # non-convertible s = Series([1479596223000, -1479590, pd.NaT]) - self.assertTrue(s.dtype == 'object') - self.assertTrue(s[2] is pd.NaT) - self.assertTrue('NaT' in str(s)) + assert s.dtype == 'object' + assert s[2] is pd.NaT + assert 'NaT' in str(s) # if we passed a NaT it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT]) - self.assertTrue(s.dtype == 'object') - self.assertTrue(s[2] is pd.NaT) - self.assertTrue('NaT' in str(s)) + assert s.dtype == 'object' + assert s[2] is pd.NaT + assert 'NaT' in str(s) # if we passed a nan it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) - self.assertTrue(s.dtype == 'object') - self.assertTrue(s[2] is np.nan) - self.assertTrue('NaN' in str(s)) + assert s.dtype == 'object' + assert s[2] is np.nan + assert 'NaN' in str(s) def test_constructor_with_datetime_tz(self): @@ -468,27 +675,27 @@ def test_constructor_with_datetime_tz(self): dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) - self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') - self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') - self.assertTrue(is_datetime64tz_dtype(s.dtype)) - self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) + assert s.dtype.name == 'datetime64[ns, US/Eastern]' + assert s.dtype == 'datetime64[ns, US/Eastern]' + assert is_datetime64tz_dtype(s.dtype) + assert 'datetime64[ns, US/Eastern]' in str(s) # export result = s.values - self.assertIsInstance(result, np.ndarray) - self.assertTrue(result.dtype == 'datetime64[ns]') + assert isinstance(result, np.ndarray) + assert result.dtype == 'datetime64[ns]' exp = pd.DatetimeIndex(result) exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz) - self.assert_index_equal(dr, exp) + tm.assert_index_equal(dr, exp) # indexing result = s.iloc[0] - self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern', freq='D')) + assert result == Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern', freq='D') result = s[0] - self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern', freq='D')) + assert result == Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern', freq='D') result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) @@ -500,36 +707,17 @@ def test_constructor_with_datetime_tz(self): result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) - # astype - result = s.astype(object) - expected = Series(DatetimeIndex(s._values).asobject) - assert_series_equal(result, expected) - - result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz) - assert_series_equal(result, s) - - # astype - datetime64[ns, tz] - result = Series(s.values).astype('datetime64[ns, US/Eastern]') - assert_series_equal(result, s) - - result = Series(s.values).astype(s.dtype) - assert_series_equal(result, s) - - result = s.astype('datetime64[ns, CET]') - expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) - assert_series_equal(result, expected) - # short str - self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) + assert 'datetime64[ns, US/Eastern]' in str(s) # formatting with NaT result = s.shift() - self.assertTrue('datetime64[ns, US/Eastern]' in str(result)) - self.assertTrue('NaT' in str(result)) + assert 'datetime64[ns, US/Eastern]' in str(result) + assert 'NaT' in str(result) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) - self.assertTrue('datetime64[ns, US/Eastern]' in str(t)) + assert 'datetime64[ns, US/Eastern]' in str(t) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) @@ -537,19 +725,52 @@ def test_constructor_with_datetime_tz(self): # inference s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]) - self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]') - self.assertTrue(lib.infer_dtype(s) == 'datetime64') + assert s.dtype == 'datetime64[ns, US/Pacific]' + assert lib.infer_dtype(s) == 'datetime64' s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')]) - self.assertTrue(s.dtype == 'object') - self.assertTrue(lib.infer_dtype(s) == 'datetime') + assert s.dtype == 'object' + assert lib.infer_dtype(s) == 'datetime' # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected) + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) + def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units + # gh-19223 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([1, 2, 3], dtype=arr_dtype) + s = Series(arr) + result = s.astype(dtype) + expected = Series(arr.astype(dtype)) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('arg', + ['2013-01-01 00:00:00', pd.NaT, np.nan, None]) + def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg): + # GH 17415: With naive string + result = Series([arg], dtype='datetime64[ns, CET]') + expected = Series(pd.Timestamp(arg)).dt.tz_localize('CET') + assert_series_equal(result, expected) + + def test_construction_interval(self): + # construction from interval & array of intervals + index = IntervalIndex.from_breaks(np.arange(3), closed='right') + result = Series(index) + repr(result) + str(result) + tm.assert_index_equal(Index(result.values), index) + + result = Series(index.values) + tm.assert_index_equal(Index(result.values), index) + def test_construction_consistency(self): # make sure that we are not re-localizing upon construction @@ -571,10 +792,10 @@ def test_constructor_periodindex(self): pi = period_range('20130101', periods=5, freq='D') s = Series(pi) - expected = Series(pi.asobject) + expected = Series(pi.astype(object)) assert_series_equal(s, expected) - self.assertEqual(s.dtype, 'object') + assert s.dtype == 'object' def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} @@ -590,47 +811,32 @@ def test_constructor_dict(self): expected.iloc[1] = 1 assert_series_equal(result, expected) - def test_constructor_dict_multiindex(self): - check = lambda result, expected: tm.assert_series_equal( - result, expected, check_dtype=True, check_series_type=True) - d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} - _d = sorted(d.items()) - ser = Series(d) - expected = Series([x[1] for x in _d], - index=MultiIndex.from_tuples([x[0] for x in _d])) - check(ser, expected) - - d['z'] = 111. - _d.insert(0, ('z', d['z'])) - ser = Series(d) - expected = Series([x[1] for x in _d], index=Index( - [x[0] for x in _d], tupleize_cols=False)) - ser = ser.reindex(index=expected.index) - check(ser, expected) - - def test_constructor_dict_timedelta_index(self): - # GH #12169 : Resample category data with timedelta index - # construct Series from dict as data and TimedeltaIndex as index - # will result NaN in result Series data - expected = Series( - data=['A', 'B', 'C'], - index=pd.to_timedelta([0, 10, 20], unit='s') - ) - - result = Series( - data={pd.to_timedelta(0, unit='s'): 'A', - pd.to_timedelta(10, unit='s'): 'B', - pd.to_timedelta(20, unit='s'): 'C'}, - index=pd.to_timedelta([0, 10, 20], unit='s') - ) - # this should work + def test_constructor_dict_order(self): + # GH19018 + # initialization ordering: by insertion order if python>= 3.6, else + # order by value + d = {'b': 1, 'a': 0, 'c': 2} + result = Series(d) + if PY36: + expected = Series([1, 0, 2], index=list('bac')) + else: + expected = Series([0, 1, 2], index=list('abc')) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("value", [2, np.nan, None, float('nan')]) + def test_constructor_dict_nan_key(self, value): + # GH 18480 + d = {1: 'a', value: 'b', float('nan'): 'c', 4: 'd'} + result = Series(d).sort_values() + expected = Series(['a', 'b', 'c', 'd'], index=[1, value, np.nan, 4]) assert_series_equal(result, expected) - def test_constructor_subclass_dict(self): - data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) - series = Series(data) - refseries = Series(dict(compat.iteritems(data))) - assert_series_equal(refseries, series) + # MultiIndex: + d = {(1, 1): 'a', (2, np.nan): 'b', (3, value): 'c'} + result = Series(d).sort_values() + expected = Series(['a', 'b', 'c'], + index=Index([(1, 1), (2, np.nan), (3, value)])) + assert_series_equal(result, expected) def test_constructor_dict_datetime64_index(self): # GH 9456 @@ -655,164 +861,160 @@ def create_data(constructor): assert_series_equal(result_datetime, expected) assert_series_equal(result_Timestamp, expected) - def test_orderedDict_ctor(self): - # GH3283 - import pandas - import random - data = OrderedDict([('col%s' % i, random.random()) for i in range(12)]) - s = pandas.Series(data) - self.assertTrue(all(s.values == list(data.values()))) - - def test_orderedDict_subclass_ctor(self): - # GH3283 - import pandas - import random - - class A(OrderedDict): - pass - - data = A([('col%s' % i, random.random()) for i in range(12)]) - s = pandas.Series(data) - self.assertTrue(all(s.values == list(data.values()))) - def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] s = Series(data) - self.assertEqual(list(s), data) + assert list(s) == data def test_constructor_tuple_of_tuples(self): data = ((1, 1), (2, 2), (2, 3)) s = Series(data) - self.assertEqual(tuple(s), data) + assert tuple(s) == data + + def test_constructor_dict_of_tuples(self): + data = {(1, 2): 3, + (None, 5): 6} + result = Series(data).sort_values() + expected = Series([3, 6], + index=MultiIndex.from_tuples([(1, 2), (None, 5)])) + tm.assert_series_equal(result, expected) def test_constructor_set(self): values = set([1, 2, 3, 4, 5]) - self.assertRaises(TypeError, Series, values) + pytest.raises(TypeError, Series, values) values = frozenset(values) - self.assertRaises(TypeError, Series, values) + pytest.raises(TypeError, Series, values) def test_fromDict(self): data = {'a': 0, 'b': 1, 'c': 2, 'd': 3} series = Series(data) - self.assertTrue(tm.is_sorted(series.index)) + assert tm.is_sorted(series.index) data = {'a': 0, 'b': '1', 'c': '2', 'd': datetime.now()} series = Series(data) - self.assertEqual(series.dtype, np.object_) + assert series.dtype == np.object_ data = {'a': 0, 'b': '1', 'c': '2', 'd': '3'} series = Series(data) - self.assertEqual(series.dtype, np.object_) + assert series.dtype == np.object_ data = {'a': '0', 'b': '1'} series = Series(data, dtype=float) - self.assertEqual(series.dtype, np.float64) + assert series.dtype == np.float64 def test_fromValue(self): nans = Series(np.NaN, index=self.ts.index) - self.assertEqual(nans.dtype, np.float_) - self.assertEqual(len(nans), len(self.ts)) + assert nans.dtype == np.float_ + assert len(nans) == len(self.ts) strings = Series('foo', index=self.ts.index) - self.assertEqual(strings.dtype, np.object_) - self.assertEqual(len(strings), len(self.ts)) + assert strings.dtype == np.object_ + assert len(strings) == len(self.ts) d = datetime.now() dates = Series(d, index=self.ts.index) - self.assertEqual(dates.dtype, 'M8[ns]') - self.assertEqual(len(dates), len(self.ts)) + assert dates.dtype == 'M8[ns]' + assert len(dates) == len(self.ts) # GH12336 # Test construction of categorical series from value categorical = Series(0, index=self.ts.index, dtype="category") expected = Series(0, index=self.ts.index).astype("category") - self.assertEqual(categorical.dtype, 'category') - self.assertEqual(len(categorical), len(self.ts)) + assert categorical.dtype == 'category' + assert len(categorical) == len(self.ts) tm.assert_series_equal(categorical, expected) def test_constructor_dtype_timedelta64(self): # basic td = Series([timedelta(days=i) for i in range(3)]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([timedelta(days=1)]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([timedelta(days=1), timedelta(days=2), np.timedelta64( 1, 's')]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' # mixed with NaT - from pandas import tslib - td = Series([timedelta(days=1), tslib.NaT], dtype='m8[ns]') - self.assertEqual(td.dtype, 'timedelta64[ns]') + td = Series([timedelta(days=1), NaT], dtype='m8[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([timedelta(days=1), np.nan], dtype='m8[ns]') - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([np.timedelta64(300000000), pd.NaT], dtype='m8[ns]') - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' # improved inference # GH5689 - td = Series([np.timedelta64(300000000), pd.NaT]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + td = Series([np.timedelta64(300000000), NaT]) + assert td.dtype == 'timedelta64[ns]' # because iNaT is int, not coerced to timedelta - td = Series([np.timedelta64(300000000), tslib.iNaT]) - self.assertEqual(td.dtype, 'object') + td = Series([np.timedelta64(300000000), iNaT]) + assert td.dtype == 'object' td = Series([np.timedelta64(300000000), np.nan]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([pd.NaT, np.timedelta64(300000000)]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' td = Series([np.timedelta64(1, 's')]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + assert td.dtype == 'timedelta64[ns]' # these are frequency conversion astypes # for t in ['s', 'D', 'us', 'ms']: - # self.assertRaises(TypeError, td.astype, 'm8[%s]' % t) + # pytest.raises(TypeError, td.astype, 'm8[%s]' % t) # valid astype td.astype('int64') # invalid casting - self.assertRaises(TypeError, td.astype, 'int32') + pytest.raises(TypeError, td.astype, 'int32') # this is an invalid casting def f(): Series([timedelta(days=1), 'foo'], dtype='m8[ns]') - self.assertRaises(Exception, f) + pytest.raises(Exception, f) # leave as object here td = Series([timedelta(days=i) for i in range(3)] + ['foo']) - self.assertEqual(td.dtype, 'object') + assert td.dtype == 'object' # these will correctly infer a timedelta s = Series([None, pd.NaT, '1 Day']) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' s = Series([np.nan, pd.NaT, '1 Day']) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' s = Series([pd.NaT, None, '1 Day']) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' s = Series([pd.NaT, np.nan, '1 Day']) - self.assertEqual(s.dtype, 'timedelta64[ns]') + assert s.dtype == 'timedelta64[ns]' + + # GH 16406 + def test_constructor_mixed_tz(self): + s = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')]) + expected = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')], + dtype='object') + assert_series_equal(s, expected) def test_NaT_scalar(self): - series = Series([0, 1000, 2000, tslib.iNaT], dtype='M8[ns]') + series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') val = series[3] - self.assertTrue(isnull(val)) + assert isna(val) series[2] = val - self.assertTrue(isnull(series[2])) + assert isna(series[2]) def test_NaT_cast(self): # GH10747 @@ -824,26 +1026,110 @@ def test_constructor_name_hashable(self): for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]: for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]: s = Series(data, name=n) - self.assertEqual(s.name, n) + assert s.name == n def test_constructor_name_unhashable(self): for n in [['name_list'], np.ones(2), {1: 2}]: for data in [['name_list'], np.ones(2), {1: 2}]: - self.assertRaises(TypeError, Series, data, name=n) + pytest.raises(TypeError, Series, data, name=n) def test_auto_conversion(self): series = Series(list(date_range('1/1/2000', periods=10))) - self.assertEqual(series.dtype, 'M8[ns]') + assert series.dtype == 'M8[ns]' - def test_constructor_cant_cast_datetime64(self): - msg = "Cannot cast datetime64 to " - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=float) + def test_convert_non_ns(self): + # convert from a numpy array of non-ns timedelta64 + arr = np.array([1, 2, 3], dtype='timedelta64[s]') + s = Series(arr) + expected = Series(pd.timedelta_range('00:00:01', periods=3, freq='s')) + assert_series_equal(s, expected) + + # convert from a numpy array of non-ns datetime64 + # note that creating a numpy datetime64 is in LOCAL time!!!! + # seems to work for M8[D], but not for M8[s] + + s = Series(np.array(['2013-01-01', '2013-01-02', + '2013-01-03'], dtype='datetime64[D]')) + assert_series_equal(s, Series(date_range('20130101', periods=3, + freq='D'))) + + # s = Series(np.array(['2013-01-01 00:00:01','2013-01-01 + # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]')) + + # assert_series_equal(s,date_range('20130101 + # 00:00:01',period=3,freq='s')) + + @pytest.mark.parametrize( + "index", + [ + date_range('1/1/2000', periods=10), + timedelta_range('1 day', periods=10), + period_range('2000-Q1', periods=10, freq='Q')], + ids=lambda x: type(x).__name__) + def test_constructor_cant_cast_datetimelike(self, index): + + # floats are not ok + msg = "Cannot cast {} to ".format(type(index).__name__) + with tm.assert_raises_regex(TypeError, msg): + Series(index, dtype=float) + + # ints are ok + # we test with np.int64 to get similar results on + # windows / 32-bit platforms + result = Series(index, dtype=np.int64) + expected = Series(index.astype(np.int64)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + date_range('1/1/2000', periods=10), + timedelta_range('1 day', periods=10), + period_range('2000-Q1', periods=10, freq='Q')], + ids=lambda x: type(x).__name__) + def test_constructor_cast_object(self, index): + s = Series(index, dtype=object) + exp = Series(index).astype(object) + tm.assert_series_equal(s, exp) - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=int) + s = Series(pd.Index(index, dtype=object), dtype=object) + exp = Series(index).astype(object) + tm.assert_series_equal(s, exp) - def test_constructor_cast_object(self): - s = Series(date_range('1/1/2000', periods=10), dtype=object) - exp = Series(date_range('1/1/2000', periods=10)) + s = Series(index.astype(object), dtype=object) + exp = Series(index).astype(object) tm.assert_series_equal(s, exp) + + def test_constructor_generic_timestamp_deprecated(self): + # see gh-15524 + + with tm.assert_produces_warning(FutureWarning): + dtype = np.timedelta64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'm8[ns]' + + with tm.assert_produces_warning(FutureWarning): + dtype = np.datetime64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'M8[ns]' + + # These timestamps have the wrong frequencies, + # so an Exception should be raised now. + msg = "cannot convert timedeltalike" + with tm.assert_raises_regex(TypeError, msg): + Series([], dtype='m8[ps]') + + msg = "cannot convert datetimelike" + with tm.assert_raises_regex(TypeError, msg): + Series([], dtype='M8[ps]') + + @pytest.mark.parametrize('dtype', [None, 'uint8', 'category']) + def test_constructor_range_dtype(self, dtype): + # GH 16804 + expected = Series([0, 1, 2, 3, 4], dtype=dtype or 'int64') + result = Series(range(5), dtype=dtype) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 4c697c7e52bb8..3abc0f724db25 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -1,17 +1,19 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import locale +import calendar +import pytest + from datetime import datetime, date import numpy as np import pandas as pd -from pandas.types.common import is_integer_dtype, is_list_like +from pandas.core.dtypes.common import is_integer_dtype, is_list_like from pandas import (Index, Series, DataFrame, bdate_range, - date_range, period_range, timedelta_range) -from pandas.tseries.period import PeriodIndex -from pandas.tseries.index import Timestamp, DatetimeIndex -from pandas.tseries.tdi import TimedeltaIndex + date_range, period_range, timedelta_range, + PeriodIndex, DatetimeIndex, TimedeltaIndex) import pandas.core.common as com from pandas.util.testing import assert_series_equal @@ -20,28 +22,20 @@ from .common import TestData -class TestSeriesDatetimeValues(TestData, tm.TestCase): +class TestSeriesDatetimeValues(TestData): def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor - ok_for_base = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', - 'dayofyear', 'quarter', 'freq', 'days_in_month', - 'daysinmonth', 'is_leap_year'] - ok_for_period = ok_for_base + ['qyear', 'start_time', 'end_time'] + ok_for_period = PeriodIndex._datetimelike_ops ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] - ok_for_dt = ok_for_base + ['date', 'time', 'microsecond', 'nanosecond', - 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end', 'tz', - 'weekday_name'] + ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', - 'floor', 'ceil', 'weekday_name'] - ok_for_td = ['days', 'seconds', 'microseconds', 'nanoseconds'] + 'floor', 'ceil', 'day_name', 'month_name'] + ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil'] @@ -58,7 +52,7 @@ def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): - self.assertEqual(a, b) + assert a == b else: tm.assert_series_equal(a, b) @@ -78,8 +72,8 @@ def compare(s, name): getattr(s.dt, prop) result = s.dt.to_pydatetime() - self.assertIsInstance(result, np.ndarray) - self.assertTrue(result.dtype == object) + assert isinstance(result, np.ndarray) + assert result.dtype == object result = s.dt.tz_localize('US/Eastern') exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern') @@ -87,10 +81,9 @@ def compare(s, name): tm.assert_series_equal(result, expected) tz_result = result.dt.tz - self.assertEqual(str(tz_result), 'US/Eastern') + assert str(tz_result) == 'US/Eastern' freq_result = s.dt.freq - self.assertEqual(freq_result, DatetimeIndex(s.values, - freq='infer').freq) + assert freq_result == DatetimeIndex(s.values, freq='infer').freq # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') @@ -148,8 +141,8 @@ def compare(s, name): getattr(s.dt, prop) result = s.dt.to_pydatetime() - self.assertIsInstance(result, np.ndarray) - self.assertTrue(result.dtype == object) + assert isinstance(result, np.ndarray) + assert result.dtype == object result = s.dt.tz_convert('CET') expected = Series(s._values.tz_convert('CET'), @@ -157,12 +150,11 @@ def compare(s, name): tm.assert_series_equal(result, expected) tz_result = result.dt.tz - self.assertEqual(str(tz_result), 'CET') + assert str(tz_result) == 'CET' freq_result = s.dt.freq - self.assertEqual(freq_result, DatetimeIndex(s.values, - freq='infer').freq) + assert freq_result == DatetimeIndex(s.values, freq='infer').freq - # timedeltaindex + # timedelta index cases = [Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, @@ -179,20 +171,19 @@ def compare(s, name): getattr(s.dt, prop) result = s.dt.components - self.assertIsInstance(result, DataFrame) + assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() - self.assertIsInstance(result, np.ndarray) - self.assertTrue(result.dtype == object) + assert isinstance(result, np.ndarray) + assert result.dtype == object result = s.dt.total_seconds() - self.assertIsInstance(result, pd.Series) - self.assertTrue(result.dtype == 'float64') + assert isinstance(result, pd.Series) + assert result.dtype == 'float64' freq_result = s.dt.freq - self.assertEqual(freq_result, TimedeltaIndex(s.values, - freq='infer').freq) + assert freq_result == TimedeltaIndex(s.values, freq='infer').freq # both index = date_range('20130101', periods=3, freq='D') @@ -226,7 +217,7 @@ def compare(s, name): getattr(s.dt, prop) freq_result = s.dt.freq - self.assertEqual(freq_result, PeriodIndex(s.values).freq) + assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): @@ -239,7 +230,7 @@ def get_dir(s): results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series(period_range('20130101', periods=5, - freq='D', name='xxx').asobject) + freq='D', name='xxx').astype(object)) results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) @@ -259,7 +250,7 @@ def get_dir(s): # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') - with tm.assertRaisesRegexp(ValueError, "modifications"): + with tm.assert_raises_regex(ValueError, "modifications"): s.dt.hour = 5 # trying to set a copy @@ -268,15 +259,63 @@ def get_dir(s): def f(): s.dt.hour[0] = 5 - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) + + def test_dt_namespace_accessor_categorical(self): + # GH 19468 + dti = DatetimeIndex(['20171111', '20181212']).repeat(2) + s = Series(pd.Categorical(dti), name='foo') + result = s.dt.year + expected = Series([2017, 2017, 2018, 2018], name='foo') + tm.assert_series_equal(result, expected) def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(date_range('20130101', periods=5, freq='D')) - with tm.assertRaisesRegexp(AttributeError, - "You cannot add any new attribute"): + with tm.assert_raises_regex(AttributeError, + "You cannot add any new attribute"): s.dt.xlabel = "a" + @pytest.mark.parametrize('time_locale', [ + None] if tm.get_locales() is None else [None] + tm.get_locales()) + def test_dt_accessor_datetime_name_accessors(self, time_locale): + # Test Monday -> Sunday and January -> December, in that sequence + if time_locale is None: + # If the time_locale is None, day-name and month_name should + # return the english attributes + expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', + 'Friday', 'Saturday', 'Sunday'] + expected_months = ['January', 'February', 'March', 'April', 'May', + 'June', 'July', 'August', 'September', + 'October', 'November', 'December'] + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_days = calendar.day_name[:] + expected_months = calendar.month_name[1:] + + s = Series(DatetimeIndex(freq='D', start=datetime(1998, 1, 1), + periods=365)) + english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', + 'Friday', 'Saturday', 'Sunday'] + for day, name, eng_name in zip(range(4, 11), + expected_days, + english_days): + name = name.capitalize() + assert s.dt.weekday_name[day] == eng_name + assert s.dt.day_name(locale=time_locale)[day] == name + s = s.append(Series([pd.NaT])) + assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1]) + + s = Series(DatetimeIndex(freq='M', start='2012', end='2013')) + result = s.dt.month_name(locale=time_locale) + expected = Series([month.capitalize() for month in expected_months]) + tm.assert_series_equal(result, expected) + for s_date, expected in zip(s, expected_months): + result = s_date.month_name(locale=time_locale) + assert result == expected.capitalize() + s = s.append(Series([pd.NaT])) + assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1]) + def test_strftime(self): # GH 10086 s = Series(date_range('20130101', periods=5)) @@ -319,13 +358,13 @@ def test_strftime(self): expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', '2015/03/05'], dtype=np.object_) # dtype may be S10 or U10 depending on python version - self.assert_numpy_array_equal(result, expected, check_dtype=False) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) period_index = period_range('20150301', periods=5) result = period_index.strftime("%Y/%m/%d") expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', '2015/03/05'], dtype='=U10') - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)]) @@ -374,31 +413,22 @@ def test_valid_dt_with_missing_values(self): def test_dt_accessor_api(self): # GH 9322 - from pandas.tseries.common import (CombinedDatetimelikeProperties, - DatetimeProperties) - self.assertIs(Series.dt, CombinedDatetimelikeProperties) + from pandas.core.indexes.accessors import ( + CombinedDatetimelikeProperties, DatetimeProperties) + assert Series.dt is CombinedDatetimelikeProperties s = Series(date_range('2000-01-01', periods=3)) - self.assertIsInstance(s.dt, DatetimeProperties) + assert isinstance(s.dt, DatetimeProperties) for s in [Series(np.arange(5)), Series(list('abcde')), Series(np.random.randn(5))]: - with tm.assertRaisesRegexp(AttributeError, - "only use .dt accessor"): + with tm.assert_raises_regex(AttributeError, + "only use .dt accessor"): s.dt - self.assertFalse(hasattr(s, 'dt')) - - def test_sub_of_datetime_from_TimeSeries(self): - from pandas.tseries.timedeltas import to_timedelta - from datetime import datetime - a = Timestamp(datetime(1993, 0o1, 0o7, 13, 30, 00)) - b = datetime(1993, 6, 22, 13, 30) - a = Series([a]) - result = to_timedelta(np.abs(a - b)) - self.assertEqual(result.dtype, 'timedelta64[ns]') + assert not hasattr(s, 'dt') def test_between(self): - s = Series(bdate_range('1/1/2000', periods=20).asobject) + s = Series(bdate_range('1/1/2000', periods=20).astype(object)) s[::2] = np.nan result = s[s.between(s[3], s[17])] @@ -420,3 +450,13 @@ def test_date_tz(self): date(2015, 11, 22)]) assert_series_equal(s.dt.date, expected) assert_series_equal(s.apply(lambda x: x.date()), expected) + + def test_datetime_understood(self): + # Ensures it doesn't fail to create the right series + # reported in issue#16726 + series = pd.Series(pd.date_range("2012-01-01", periods=3)) + offset = pd.offsets.DateOffset(days=6) + result = series - offset + expected = pd.Series(pd.to_datetime([ + '2011-12-26', '2011-12-27', '2011-12-28'])) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 13375ab886d8d..56ff092dd0a27 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -1,163 +1,443 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + +from datetime import datetime, timedelta + import sys -from datetime import datetime import string +import warnings from numpy import nan +import pandas as pd import numpy as np -from pandas import Series, Timestamp, Timedelta, DataFrame, date_range +from pandas import ( + Series, Timestamp, Timedelta, DataFrame, date_range, + Categorical, Index +) +from pandas.api.types import CategoricalDtype +import pandas._libs.tslib as tslib from pandas.compat import lrange, range, u from pandas import compat -from pandas.util.testing import assert_series_equal import pandas.util.testing as tm from .common import TestData -class TestSeriesDtypes(TestData, tm.TestCase): +class TestSeriesDtypes(TestData): + + def test_dt64_series_astype_object(self): + dt64ser = Series(date_range('20130101', periods=3)) + result = dt64ser.astype(object) + assert isinstance(result.iloc[0], datetime) + assert result.dtype == np.object_ - def test_astype(self): + def test_td64_series_astype_object(self): + tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + result = tdser.astype(object) + assert isinstance(result.iloc[0], timedelta) + assert result.dtype == np.object_ + + @pytest.mark.parametrize("dtype", ["float32", "float64", + "int64", "int32"]) + def test_astype(self, dtype): s = Series(np.random.randn(5), name='foo') + as_typed = s.astype(dtype) - for dtype in ['float32', 'float64', 'int64', 'int32']: - astyped = s.astype(dtype) - self.assertEqual(astyped.dtype, dtype) - self.assertEqual(astyped.name, s.name) + assert as_typed.dtype == dtype + assert as_typed.name == s.name + + def test_asobject_deprecated(self): + s = Series(np.random.randn(5), name='foo') + with tm.assert_produces_warning(FutureWarning): + o = s.asobject + assert isinstance(o, np.ndarray) def test_dtype(self): - self.assertEqual(self.ts.dtype, np.dtype('float64')) - self.assertEqual(self.ts.dtypes, np.dtype('float64')) - self.assertEqual(self.ts.ftype, 'float64:dense') - self.assertEqual(self.ts.ftypes, 'float64:dense') - assert_series_equal(self.ts.get_dtype_counts(), Series(1, ['float64'])) - assert_series_equal(self.ts.get_ftype_counts(), Series( - 1, ['float64:dense'])) - - def test_astype_cast_nan_inf_int(self): - # GH14265, check nan and inf raise error when converting to int - types = [np.int32, np.int64] - values = [np.nan, np.inf] + assert self.ts.dtype == np.dtype('float64') + assert self.ts.dtypes == np.dtype('float64') + assert self.ts.ftype == 'float64:dense' + assert self.ts.ftypes == 'float64:dense' + tm.assert_series_equal(self.ts.get_dtype_counts(), + Series(1, ['float64'])) + tm.assert_series_equal(self.ts.get_ftype_counts(), + Series(1, ['float64:dense'])) + + @pytest.mark.parametrize("value", [np.nan, np.inf]) + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + def test_astype_cast_nan_inf_int(self, dtype, value): + # gh-14265: check NaN and inf raise error when converting to int msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' + s = Series([value]) - for this_type in types: - for this_val in values: - s = Series([this_val]) - with self.assertRaisesRegexp(ValueError, msg): - s.astype(this_type) + with tm.assert_raises_regex(ValueError, msg): + s.astype(dtype) - def test_astype_cast_object_int(self): + @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) + def test_astype_cast_object_int_fail(self, dtype): arr = Series(["car", "house", "tree", "1"]) + with pytest.raises(ValueError): + arr.astype(dtype) - self.assertRaises(ValueError, arr.astype, int) - self.assertRaises(ValueError, arr.astype, np.int64) - self.assertRaises(ValueError, arr.astype, np.int8) - + def test_astype_cast_object_int(self): arr = Series(['1', '2', '3', '4'], dtype=object) result = arr.astype(int) - self.assert_series_equal(result, Series(np.arange(1, 5))) - def test_astype_datetimes(self): - import pandas.tslib as tslib + tm.assert_series_equal(result, Series(np.arange(1, 5))) + def test_astype_datetime(self): s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) + s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0)]) + s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) + s[1] = np.nan - self.assertEqual(s.dtype, 'M8[ns]') - s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == 'M8[ns]' - def test_astype_str(self): - # GH4405 - digits = string.digits - s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]) - s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0]) - types = (compat.text_type, np.str_) - for typ in types: - for s in (s1, s2): - res = s.astype(typ) - expec = s.map(compat.text_type) - assert_series_equal(res, expec) - - # GH9757 - # Test str and unicode on python 2.x and just str on python 3.x - for tt in set([str, compat.text_type]): - ts = Series([Timestamp('2010-01-04 00:00:00')]) - s = ts.astype(tt) - expected = Series([tt('2010-01-04')]) - assert_series_equal(s, expected) - - ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) - s = ts.astype(tt) - expected = Series([tt('2010-01-04 00:00:00-05:00')]) - assert_series_equal(s, expected) - - td = Series([Timedelta(1, unit='d')]) - s = td.astype(tt) - expected = Series([tt('1 days 00:00:00.000000000')]) - assert_series_equal(s, expected) + s = s.astype('O') + assert s.dtype == np.object_ + + def test_astype_datetime64tz(self): + s = Series(date_range('20130101', periods=3, tz='US/Eastern')) + + # astype + result = s.astype(object) + expected = Series(s.astype(object), dtype=object) + tm.assert_series_equal(result, expected) + + result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz) + tm.assert_series_equal(result, s) + + # astype - object, preserves on construction + result = Series(s.astype(object)) + expected = s.astype(object) + tm.assert_series_equal(result, expected) + + # astype - datetime64[ns, tz] + result = Series(s.values).astype('datetime64[ns, US/Eastern]') + tm.assert_series_equal(result, s) + + result = Series(s.values).astype(s.dtype) + tm.assert_series_equal(result, s) + + result = s.astype('datetime64[ns, CET]') + expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [compat.text_type, np.str_]) + @pytest.mark.parametrize("series", [Series([string.digits * 10, + tm.rands(63), + tm.rands(64), + tm.rands(1000)]), + Series([string.digits * 10, + tm.rands(63), + tm.rands(64), nan, 1.0])]) + def test_astype_str_map(self, dtype, series): + # see gh-4405 + result = series.astype(dtype) + expected = series.map(compat.text_type) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [str, compat.text_type]) + def test_astype_str_cast(self, dtype): + # see gh-9757: test str and unicode on python 2.x + # and just str on python 3.x + ts = Series([Timestamp('2010-01-04 00:00:00')]) + s = ts.astype(dtype) + + expected = Series([dtype('2010-01-04')]) + tm.assert_series_equal(s, expected) + + ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) + s = ts.astype(dtype) + + expected = Series([dtype('2010-01-04 00:00:00-05:00')]) + tm.assert_series_equal(s, expected) + + td = Series([Timedelta(1, unit='d')]) + s = td.astype(dtype) + + expected = Series([dtype('1 days 00:00:00.000000000')]) + tm.assert_series_equal(s, expected) def test_astype_unicode(self): - - # GH7758 - # a bit of magic is required to set default encoding encoding to utf-8 + # see gh-7758: A bit of magic is required to set + # default encoding to utf-8 digits = string.digits test_series = [ Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), Series([u('データーサイエンス、お前はもう死んでいる')]), - ] former_encoding = None + if not compat.PY3: - # in python we can force the default encoding for this test + # In Python, we can force the default encoding for this test former_encoding = sys.getdefaultencoding() reload(sys) # noqa + sys.setdefaultencoding("utf-8") if sys.getdefaultencoding() == "utf-8": test_series.append(Series([u('野菜食べないとやばい') .encode("utf-8")])) + for s in test_series: res = s.astype("unicode") expec = s.map(compat.text_type) - assert_series_equal(res, expec) - # restore the former encoding + tm.assert_series_equal(res, expec) + + # Restore the former encoding if former_encoding is not None and former_encoding != "utf-8": reload(sys) # noqa sys.setdefaultencoding(former_encoding) - def test_astype_dict(self): - # GH7271 + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): + # see gh-7271 s = Series(range(0, 10, 2), name='abc') - result = s.astype({'abc': str}) + dt1 = dtype_class({'abc': str}) + result = s.astype(dt1) expected = Series(['0', '2', '4', '6', '8'], name='abc') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) - result = s.astype({'abc': 'float64'}) + dt2 = dtype_class({'abc': 'float64'}) + result = s.astype(dt2) expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64', name='abc') - assert_series_equal(result, expected) - - self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str}) - self.assertRaises(KeyError, s.astype, {0: str}) - - def test_complexx(self): - # GH4819 - # complex access for ndarray compat + tm.assert_series_equal(result, expected) + + dt3 = dtype_class({'abc': str, 'def': str}) + with pytest.raises(KeyError): + s.astype(dt3) + + dt4 = dtype_class({0: str}) + with pytest.raises(KeyError): + s.astype(dt4) + + # GH16717 + # if dtypes provided is empty, it should error + dt5 = dtype_class({}) + with pytest.raises(KeyError): + s.astype(dt5) + + def test_astype_categories_deprecation(self): + + # deprecated 17636 + s = Series(['a', 'b', 'a']) + expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s.astype('category', categories=['a', 'b'], ordered=True) + tm.assert_series_equal(result, expected) + + def test_astype_from_categorical(self): + l = ["a", "b", "c", "a"] + s = Series(l) + exp = Series(Categorical(l)) + res = s.astype('category') + tm.assert_series_equal(res, exp) + + l = [1, 2, 3, 1] + s = Series(l) + exp = Series(Categorical(l)) + res = s.astype('category') + tm.assert_series_equal(res, exp) + + df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], + "vals": [1, 2, 3, 4, 5, 6]}) + cats = Categorical([1, 2, 3, 4, 5, 6]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + df = DataFrame({"cats": ['a', 'b', 'b', 'a', 'a', 'd'], + "vals": [1, 2, 3, 4, 5, 6]}) + cats = Categorical(['a', 'b', 'b', 'a', 'a', 'd']) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + # with keywords + l = ["a", "b", "c", "a"] + s = Series(l) + exp = Series(Categorical(l, ordered=True)) + res = s.astype(CategoricalDtype(None, ordered=True)) + tm.assert_series_equal(res, exp) + + exp = Series(Categorical(l, categories=list('abcdef'), ordered=True)) + res = s.astype(CategoricalDtype(list('abcdef'), ordered=True)) + tm.assert_series_equal(res, exp) + + def test_astype_categorical_to_other(self): + + df = DataFrame({'value': np.random.randint(0, 10000, 100)}) + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=['value'], ascending=True) + df['value_group'] = pd.cut(df.value, range(0, 10500, 500), + right=False, labels=cat_labels) + + s = df['value_group'] + expected = s + tm.assert_series_equal(s.astype('category'), expected) + tm.assert_series_equal(s.astype(CategoricalDtype()), expected) + pytest.raises(ValueError, lambda: s.astype('float64')) + + cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])) + exp = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) + tm.assert_series_equal(cat.astype('str'), exp) + s2 = Series(Categorical(['1', '2', '3', '4'])) + exp2 = Series([1, 2, 3, 4]).astype(int) + tm.assert_series_equal(s2.astype('int'), exp2) + + # object don't sort correctly, so just compare that we have the same + # values + def cmp(a, b): + tm.assert_almost_equal( + np.sort(np.unique(a)), np.sort(np.unique(b))) + + expected = Series(np.array(s.values), name='value_group') + cmp(s.astype('object'), expected) + cmp(s.astype(np.object_), expected) + + # array conversion + tm.assert_almost_equal(np.array(s), np.array(s.values)) + + # valid conversion + for valid in [lambda x: x.astype('category'), + lambda x: x.astype(CategoricalDtype()), + lambda x: x.astype('object').astype('category'), + lambda x: x.astype('object').astype( + CategoricalDtype()) + ]: + + result = valid(s) + # compare series values + # internal .categories can't be compared because it is sorted + tm.assert_series_equal(result, s, check_categorical=False) + + # invalid conversion (these are NOT a dtype) + for invalid in [lambda x: x.astype(Categorical), + lambda x: x.astype('object').astype(Categorical)]: + pytest.raises(TypeError, lambda: invalid(s)) + + @pytest.mark.parametrize('name', [None, 'foo']) + @pytest.mark.parametrize('dtype_ordered', [True, False]) + @pytest.mark.parametrize('series_ordered', [True, False]) + def test_astype_categorical_to_categorical(self, name, dtype_ordered, + series_ordered): + # GH 10696/18593 + s_data = list('abcaacbab') + s_dtype = CategoricalDtype(list('bac'), ordered=series_ordered) + s = Series(s_data, dtype=s_dtype, name=name) + + # unspecified categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = s.astype(dtype) + exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered) + expected = Series(s_data, name=name, dtype=exp_dtype) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.astype('category', ordered=dtype_ordered) + tm.assert_series_equal(result, expected) + + # different categories + dtype = CategoricalDtype(list('adc'), dtype_ordered) + result = s.astype(dtype) + expected = Series(s_data, name=name, dtype=dtype) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.astype( + 'category', categories=list('adc'), ordered=dtype_ordered) + tm.assert_series_equal(result, expected) + + if dtype_ordered is False: + # not specifying ordered, so only test once + expected = s + result = s.astype('category') + tm.assert_series_equal(result, expected) + + def test_astype_categoricaldtype(self): + s = Series(['a', 'b', 'a']) + result = s.astype(CategoricalDtype(['a', 'b'], ordered=True)) + expected = Series(Categorical(['a', 'b', 'a'], ordered=True)) + tm.assert_series_equal(result, expected) + + result = s.astype(CategoricalDtype(['a', 'b'], ordered=False)) + expected = Series(Categorical(['a', 'b', 'a'], ordered=False)) + tm.assert_series_equal(result, expected) + + result = s.astype(CategoricalDtype(['a', 'b', 'c'], ordered=False)) + expected = Series(Categorical(['a', 'b', 'a'], + categories=['a', 'b', 'c'], + ordered=False)) + tm.assert_series_equal(result, expected) + tm.assert_index_equal(result.cat.categories, Index(['a', 'b', 'c'])) + + def test_astype_categoricaldtype_with_args(self): + s = Series(['a', 'b']) + type_ = CategoricalDtype(['a', 'b']) + + with pytest.raises(TypeError): + s.astype(type_, ordered=True) + with pytest.raises(TypeError): + s.astype(type_, categories=['a', 'b']) + with pytest.raises(TypeError): + s.astype(type_, categories=['a', 'b'], ordered=False) + + def test_astype_generic_timestamp_deprecated(self): + # see gh-15524 + data = [1] + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = Series(data) + dtype = np.datetime64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = Series(data) + dtype = np.timedelta64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", np.typecodes['All']) + def test_astype_empty_constructor_equality(self, dtype): + # see gh-15524 + + if dtype not in ('S', 'V'): # poor support (if any) currently + with warnings.catch_warnings(record=True): + # Generic timestamp dtypes ('M' and 'm') are deprecated, + # but we test that already in series/test_constructors.py + + init_empty = Series([], dtype=dtype) + as_type_empty = Series([]).astype(dtype) + tm.assert_series_equal(init_empty, as_type_empty) + + def test_complex(self): + # see gh-4819: complex access for ndarray compat a = np.arange(5, dtype=np.float64) b = Series(a + 4j * a) + tm.assert_numpy_array_equal(a, b.real) tm.assert_numpy_array_equal(4 * a, b.imag) @@ -166,23 +446,22 @@ def test_complexx(self): tm.assert_numpy_array_equal(4 * a, b.imag) def test_arg_for_errors_in_astype(self): - # issue #14878 + # see gh-14878 + s = Series([1, 2, 3]) - sr = Series([1, 2, 3]) - - with self.assertRaises(ValueError): - sr.astype(np.float64, errors=False) + with pytest.raises(ValueError): + s.astype(np.float64, errors=False) with tm.assert_produces_warning(FutureWarning): - sr.astype(np.int8, raise_on_error=True) + s.astype(np.int8, raise_on_error=True) - sr.astype(np.int8, errors='raise') + s.astype(np.int8, errors='raise') def test_intercept_astype_object(self): series = Series(date_range('1/1/2000', periods=10)) - # this test no longer makes sense as series is by default already - # M8[ns] + # This test no longer makes sense, as + # Series is by default already M8[ns]. expected = series.astype('object') df = DataFrame({'a': series, @@ -192,9 +471,36 @@ def test_intercept_astype_object(self): tm.assert_series_equal(df.dtypes, exp_dtypes) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() df = DataFrame({'a': series, 'b': ['foo'] * len(series)}) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() + + def test_series_to_categorical(self): + # see gh-16524: test conversion of Series to Categorical + series = Series(['a', 'b', 'c']) + + result = Series(series, dtype='category') + expected = Series(['a', 'b', 'c'], dtype='category') + + tm.assert_series_equal(result, expected) + + def test_infer_objects_series(self): + # GH 11221 + actual = Series(np.array([1, 2, 3], dtype='O')).infer_objects() + expected = Series([1, 2, 3]) + tm.assert_series_equal(actual, expected) + + actual = Series(np.array([1, 2, 3, None], dtype='O')).infer_objects() + expected = Series([1., 2., 3., np.nan]) + tm.assert_series_equal(actual, expected) + + # only soft conversions, unconvertable pass thru unchanged + actual = (Series(np.array([1, 2, 3, None, 'a'], dtype='O')) + .infer_objects()) + expected = Series([1, 2, 3, None, 'a']) + + assert actual.dtype == 'object' + tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py deleted file mode 100644 index a20cb8324d2a3..0000000000000 --- a/pandas/tests/series/test_indexing.py +++ /dev/null @@ -1,2638 +0,0 @@ -# coding=utf-8 -# pylint: disable-msg=E1101,W0612 - -from datetime import datetime, timedelta - -from numpy import nan -import numpy as np -import pandas as pd - -import pandas.index as _index -from pandas.types.common import is_integer, is_scalar -from pandas import (Index, Series, DataFrame, isnull, - date_range, NaT, MultiIndex, - Timestamp, DatetimeIndex, Timedelta) -from pandas.core.indexing import IndexingError -from pandas.tseries.offsets import BDay -from pandas import lib, tslib - -from pandas.compat import lrange, range -from pandas import compat -from pandas.util.testing import (slow, - assert_series_equal, - assert_almost_equal, - assert_frame_equal) -import pandas.util.testing as tm - -from pandas.tests.series.common import TestData - -JOIN_TYPES = ['inner', 'outer', 'left', 'right'] - - -class TestSeriesIndexing(TestData, tm.TestCase): - - def test_get(self): - - # GH 6383 - s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45, - 51, 39, 55, 43, 54, 52, 51, 54])) - - result = s.get(25, 0) - expected = 0 - self.assertEqual(result, expected) - - s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, - 45, 51, 39, 55, 43, 54, 52, 51, 54]), - index=pd.Float64Index( - [25.0, 36.0, 49.0, 64.0, 81.0, 100.0, - 121.0, 144.0, 169.0, 196.0, 1225.0, - 1296.0, 1369.0, 1444.0, 1521.0, 1600.0, - 1681.0, 1764.0, 1849.0, 1936.0], - dtype='object')) - - result = s.get(25, 0) - expected = 43 - self.assertEqual(result, expected) - - # GH 7407 - # with a boolean accessor - df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3}) - vc = df.i.value_counts() - result = vc.get(99, default='Missing') - self.assertEqual(result, 'Missing') - - vc = df.b.value_counts() - result = vc.get(False, default='Missing') - self.assertEqual(result, 3) - - result = vc.get(True, default='Missing') - self.assertEqual(result, 'Missing') - - def test_delitem(self): - - # GH 5542 - # should delete the item inplace - s = Series(lrange(5)) - del s[0] - - expected = Series(lrange(1, 5), index=lrange(1, 5)) - assert_series_equal(s, expected) - - del s[1] - expected = Series(lrange(2, 5), index=lrange(2, 5)) - assert_series_equal(s, expected) - - # empty - s = Series() - - def f(): - del s[0] - - self.assertRaises(KeyError, f) - - # only 1 left, del, add, del - s = Series(1) - del s[0] - assert_series_equal(s, Series(dtype='int64', index=Index( - [], dtype='int64'))) - s[0] = 1 - assert_series_equal(s, Series(1)) - del s[0] - assert_series_equal(s, Series(dtype='int64', index=Index( - [], dtype='int64'))) - - # Index(dtype=object) - s = Series(1, index=['a']) - del s['a'] - assert_series_equal(s, Series(dtype='int64', index=Index( - [], dtype='object'))) - s['a'] = 1 - assert_series_equal(s, Series(1, index=['a'])) - del s['a'] - assert_series_equal(s, Series(dtype='int64', index=Index( - [], dtype='object'))) - - def test_getitem_setitem_ellipsis(self): - s = Series(np.random.randn(10)) - - np.fix(s) - - result = s[...] - assert_series_equal(result, s) - - s[...] = 5 - self.assertTrue((result == 5).all()) - - def test_getitem_negative_out_of_bounds(self): - s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) - - self.assertRaises(IndexError, s.__getitem__, -11) - self.assertRaises(IndexError, s.__setitem__, -11, 'foo') - - def test_pop(self): - # GH 6600 - df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, }) - k = df.iloc[4] - - result = k.pop('B') - self.assertEqual(result, 4) - - expected = Series([0, 0], index=['A', 'C'], name=4) - assert_series_equal(k, expected) - - def test_getitem_get(self): - idx1 = self.series.index[5] - idx2 = self.objSeries.index[5] - - self.assertEqual(self.series[idx1], self.series.get(idx1)) - self.assertEqual(self.objSeries[idx2], self.objSeries.get(idx2)) - - self.assertEqual(self.series[idx1], self.series[5]) - self.assertEqual(self.objSeries[idx2], self.objSeries[5]) - - self.assertEqual( - self.series.get(-1), self.series.get(self.series.index[-1])) - self.assertEqual(self.series[5], self.series.get(self.series.index[5])) - - # missing - d = self.ts.index[0] - BDay() - self.assertRaises(KeyError, self.ts.__getitem__, d) - - # None - # GH 5652 - for s in [Series(), Series(index=list('abc'))]: - result = s.get(None) - self.assertIsNone(result) - - def test_iget(self): - - s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.iget(1) - - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.irow(1) - - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.iget_value(1) - - for i in range(len(s)): - result = s.iloc[i] - exp = s[s.index[i]] - assert_almost_equal(result, exp) - - # pass a slice - result = s.iloc[slice(1, 3)] - expected = s.loc[2:4] - assert_series_equal(result, expected) - - # test slice is a view - result[:] = 0 - self.assertTrue((s[1:3] == 0).all()) - - # list of integers - result = s.iloc[[0, 2, 3, 4, 5]] - expected = s.reindex(s.index[[0, 2, 3, 4, 5]]) - assert_series_equal(result, expected) - - def test_iget_nonunique(self): - s = Series([0, 1, 2], index=[0, 1, 0]) - self.assertEqual(s.iloc[2], 2) - - def test_getitem_regression(self): - s = Series(lrange(5), index=lrange(5)) - result = s[lrange(5)] - assert_series_equal(result, s) - - def test_getitem_setitem_slice_bug(self): - s = Series(lrange(10), lrange(10)) - result = s[-12:] - assert_series_equal(result, s) - - result = s[-7:] - assert_series_equal(result, s[3:]) - - result = s[:-12] - assert_series_equal(result, s[:0]) - - s = Series(lrange(10), lrange(10)) - s[-12:] = 0 - self.assertTrue((s == 0).all()) - - s[:-12] = 5 - self.assertTrue((s == 0).all()) - - def test_getitem_int64(self): - idx = np.int64(5) - self.assertEqual(self.ts[idx], self.ts[5]) - - def test_getitem_fancy(self): - slice1 = self.series[[1, 2, 3]] - slice2 = self.objSeries[[1, 2, 3]] - self.assertEqual(self.series.index[2], slice1.index[1]) - self.assertEqual(self.objSeries.index[2], slice2.index[1]) - self.assertEqual(self.series[2], slice1[1]) - self.assertEqual(self.objSeries[2], slice2[1]) - - def test_getitem_boolean(self): - s = self.series - mask = s > s.median() - - # passing list is OK - result = s[list(mask)] - expected = s[mask] - assert_series_equal(result, expected) - self.assert_index_equal(result.index, s.index[mask]) - - def test_getitem_boolean_empty(self): - s = Series([], dtype=np.int64) - s.index.name = 'index_name' - s = s[s.isnull()] - self.assertEqual(s.index.name, 'index_name') - self.assertEqual(s.dtype, np.int64) - - # GH5877 - # indexing with empty series - s = Series(['A', 'B']) - expected = Series(np.nan, index=['C'], dtype=object) - result = s[Series(['C'], dtype=object)] - assert_series_equal(result, expected) - - s = Series(['A', 'B']) - expected = Series(dtype=object, index=Index([], dtype='int64')) - result = s[Series([], dtype=object)] - assert_series_equal(result, expected) - - # invalid because of the boolean indexer - # that's empty or not-aligned - def f(): - s[Series([], dtype=bool)] - - self.assertRaises(IndexingError, f) - - def f(): - s[Series([True], dtype=bool)] - - self.assertRaises(IndexingError, f) - - def test_getitem_generator(self): - gen = (x > 0 for x in self.series) - result = self.series[gen] - result2 = self.series[iter(self.series > 0)] - expected = self.series[self.series > 0] - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - - def test_type_promotion(self): - # GH12599 - s = pd.Series() - s["a"] = pd.Timestamp("2016-01-01") - s["b"] = 3.0 - s["c"] = "foo" - expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"], - index=["a", "b", "c"]) - assert_series_equal(s, expected) - - def test_getitem_boolean_object(self): - # using column from DataFrame - - s = self.series - mask = s > s.median() - omask = mask.astype(object) - - # getitem - result = s[omask] - expected = s[mask] - assert_series_equal(result, expected) - - # setitem - s2 = s.copy() - cop = s.copy() - cop[omask] = 5 - s2[mask] = 5 - assert_series_equal(cop, s2) - - # nans raise exception - omask[5:10] = np.nan - self.assertRaises(Exception, s.__getitem__, omask) - self.assertRaises(Exception, s.__setitem__, omask, 5) - - def test_getitem_setitem_boolean_corner(self): - ts = self.ts - mask_shifted = ts.shift(1, freq=BDay()) > ts.median() - - # these used to raise...?? - - self.assertRaises(Exception, ts.__getitem__, mask_shifted) - self.assertRaises(Exception, ts.__setitem__, mask_shifted, 1) - # ts[mask_shifted] - # ts[mask_shifted] = 1 - - self.assertRaises(Exception, ts.loc.__getitem__, mask_shifted) - self.assertRaises(Exception, ts.loc.__setitem__, mask_shifted, 1) - # ts.loc[mask_shifted] - # ts.loc[mask_shifted] = 2 - - def test_getitem_setitem_slice_integers(self): - s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) - - result = s[:4] - expected = s.reindex([2, 4, 6, 8]) - assert_series_equal(result, expected) - - s[:4] = 0 - self.assertTrue((s[:4] == 0).all()) - self.assertTrue(not (s[4:] == 0).any()) - - def test_getitem_setitem_datetime_tz_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone as tz - - from pandas import date_range - - N = 50 - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(N), index=rng) - - # also test Timestamp tz handling, GH #2789 - result = ts.copy() - result["1990-01-01 09:00:00+00:00"] = 0 - result["1990-01-01 09:00:00+00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result["1990-01-01 03:00:00-06:00"] = 0 - result["1990-01-01 03:00:00-06:00"] = ts[4] - assert_series_equal(result, ts) - - # repeat with datetimes - result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - - # comparison dates with datetime MUST be localized! - date = tz('US/Central').localize(datetime(1990, 1, 1, 3)) - result[date] = 0 - result[date] = ts[4] - assert_series_equal(result, ts) - - def test_getitem_setitem_datetime_tz_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - from pandas.tslib import _dateutil_gettz as gettz - - tz = lambda x: tzutc() if x == 'UTC' else gettz( - x) # handle special case for utc in dateutil - - from pandas import date_range - - N = 50 - - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', - tz='America/New_York') - ts = Series(np.random.randn(N), index=rng) - - # also test Timestamp tz handling, GH #2789 - result = ts.copy() - result["1990-01-01 09:00:00+00:00"] = 0 - result["1990-01-01 09:00:00+00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result["1990-01-01 03:00:00-06:00"] = 0 - result["1990-01-01 03:00:00-06:00"] = ts[4] - assert_series_equal(result, ts) - - # repeat with datetimes - result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0 - result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4] - assert_series_equal(result, ts) - - def test_getitem_setitem_datetimeindex(self): - N = 50 - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(N), index=rng) - - result = ts["1990-01-01 04:00:00"] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00"] = 0 - result["1990-01-01 04:00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] - assert_series_equal(result, ts) - - lb = "1990-01-01 04:00:00" - rb = "1990-01-01 07:00:00" - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - # repeat all the above with naive datetimes - result = ts[datetime(1990, 1, 1, 4)] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4)] = 0 - result[datetime(1990, 1, 1, 4)] = ts[4] - assert_series_equal(result, ts) - - result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] - assert_series_equal(result, ts) - - lb = datetime(1990, 1, 1, 4) - rb = datetime(1990, 1, 1, 7) - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts[ts.index[4]] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts[ts.index[4:8]] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] - assert_series_equal(result, ts) - - # also test partial date slicing - result = ts["1990-01-02"] - expected = ts[24:48] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-02"] = 0 - result["1990-01-02"] = ts[24:48] - assert_series_equal(result, ts) - - def test_getitem_setitem_periodindex(self): - from pandas import period_range - - N = 50 - rng = period_range('1/1/1990', periods=N, freq='H') - ts = Series(np.random.randn(N), index=rng) - - result = ts["1990-01-01 04"] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result["1990-01-01 04"] = 0 - result["1990-01-01 04"] = ts[4] - assert_series_equal(result, ts) - - result = ts["1990-01-01 04":"1990-01-01 07"] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-01 04":"1990-01-01 07"] = 0 - result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] - assert_series_equal(result, ts) - - lb = "1990-01-01 04" - rb = "1990-01-01 07" - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - # GH 2782 - result = ts[ts.index[4]] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts[ts.index[4:8]] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] - assert_series_equal(result, ts) - - def test_getitem_median_slice_bug(self): - index = date_range('20090415', '20090519', freq='2B') - s = Series(np.random.randn(13), index=index) - - indexer = [slice(6, 7, None)] - result = s[indexer] - expected = s[indexer[0]] - assert_series_equal(result, expected) - - def test_getitem_out_of_bounds(self): - # don't segfault, GH #495 - self.assertRaises(IndexError, self.ts.__getitem__, len(self.ts)) - - # GH #917 - s = Series([]) - self.assertRaises(IndexError, s.__getitem__, -1) - - def test_getitem_setitem_integers(self): - # caused bug without test - s = Series([1, 2, 3], ['a', 'b', 'c']) - - self.assertEqual(s.iloc[0], s['a']) - s.iloc[0] = 5 - self.assertAlmostEqual(s['a'], 5) - - def test_getitem_box_float64(self): - value = self.ts[5] - tm.assertIsInstance(value, np.float64) - - def test_getitem_ambiguous_keyerror(self): - s = Series(lrange(10), index=lrange(0, 20, 2)) - self.assertRaises(KeyError, s.__getitem__, 1) - self.assertRaises(KeyError, s.loc.__getitem__, 1) - - def test_getitem_unordered_dup(self): - obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) - self.assertTrue(is_scalar(obj['c'])) - self.assertEqual(obj['c'], 0) - - def test_getitem_dups_with_missing(self): - - # breaks reindex, so need to use .loc internally - # GH 4246 - s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah']) - expected = s.loc[['foo', 'bar', 'bah', 'bam']] - result = s[['foo', 'bar', 'bah', 'bam']] - assert_series_equal(result, expected) - - def test_getitem_dups(self): - s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64) - expected = Series([3, 4], index=['C', 'C'], dtype=np.int64) - result = s['C'] - assert_series_equal(result, expected) - - def test_getitem_dataframe(self): - rng = list(range(10)) - s = pd.Series(10, index=rng) - df = pd.DataFrame(rng, index=rng) - self.assertRaises(TypeError, s.__getitem__, df > 5) - - def test_getitem_callable(self): - # GH 12533 - s = pd.Series(4, index=list('ABCD')) - result = s[lambda x: 'A'] - self.assertEqual(result, s.loc['A']) - - result = s[lambda x: ['A', 'B']] - tm.assert_series_equal(result, s.loc[['A', 'B']]) - - result = s[lambda x: [True, False, True, True]] - tm.assert_series_equal(result, s.iloc[[0, 2, 3]]) - - def test_setitem_ambiguous_keyerror(self): - s = Series(lrange(10), index=lrange(0, 20, 2)) - - # equivalent of an append - s2 = s.copy() - s2[1] = 5 - expected = s.append(Series([5], index=[1])) - assert_series_equal(s2, expected) - - s2 = s.copy() - s2.loc[1] = 5 - expected = s.append(Series([5], index=[1])) - assert_series_equal(s2, expected) - - def test_setitem_float_labels(self): - # note labels are floats - s = Series(['a', 'b', 'c'], index=[0, 0.5, 1]) - tmp = s.copy() - - s.loc[1] = 'zoo' - tmp.iloc[2] = 'zoo' - - assert_series_equal(s, tmp) - - def test_setitem_callable(self): - # GH 12533 - s = pd.Series([1, 2, 3, 4], index=list('ABCD')) - s[lambda x: 'A'] = -1 - tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list('ABCD'))) - - def test_setitem_other_callable(self): - # GH 13299 - inc = lambda x: x + 1 - - s = pd.Series([1, 2, -1, 4]) - s[s < 0] = inc - - expected = pd.Series([1, 2, inc, 4]) - tm.assert_series_equal(s, expected) - - def test_slice(self): - numSlice = self.series[10:20] - numSliceEnd = self.series[-10:] - objSlice = self.objSeries[10:20] - - self.assertNotIn(self.series.index[9], numSlice.index) - self.assertNotIn(self.objSeries.index[9], objSlice.index) - - self.assertEqual(len(numSlice), len(numSlice.index)) - self.assertEqual(self.series[numSlice.index[0]], - numSlice[numSlice.index[0]]) - - self.assertEqual(numSlice.index[1], self.series.index[11]) - - self.assertTrue(tm.equalContents(numSliceEnd, np.array(self.series)[ - -10:])) - - # test return view - sl = self.series[10:20] - sl[:] = 0 - self.assertTrue((self.series[10:20] == 0).all()) - - def test_slice_can_reorder_not_uniquely_indexed(self): - s = Series(1, index=['a', 'a', 'b', 'b', 'c']) - s[::-1] # it works! - - def test_slice_float_get_set(self): - - self.assertRaises(TypeError, lambda: self.ts[4.0:10.0]) - - def f(): - self.ts[4.0:10.0] = 0 - - self.assertRaises(TypeError, f) - - self.assertRaises(TypeError, self.ts.__getitem__, slice(4.5, 10.0)) - self.assertRaises(TypeError, self.ts.__setitem__, slice(4.5, 10.0), 0) - - def test_slice_floats2(self): - s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) - - self.assertEqual(len(s.loc[12.0:]), 8) - self.assertEqual(len(s.loc[12.5:]), 7) - - i = np.arange(10, 20, dtype=float) - i[2] = 12.2 - s.index = i - self.assertEqual(len(s.loc[12.0:]), 8) - self.assertEqual(len(s.loc[12.5:]), 7) - - def test_slice_float64(self): - - values = np.arange(10., 50., 2) - index = Index(values) - - start, end = values[[5, 15]] - - s = Series(np.random.randn(20), index=index) - - result = s[start:end] - expected = s.iloc[5:16] - assert_series_equal(result, expected) - - result = s.loc[start:end] - assert_series_equal(result, expected) - - df = DataFrame(np.random.randn(20, 3), index=index) - - result = df[start:end] - expected = df.iloc[5:16] - tm.assert_frame_equal(result, expected) - - result = df.loc[start:end] - tm.assert_frame_equal(result, expected) - - def test_setitem(self): - self.ts[self.ts.index[5]] = np.NaN - self.ts[[1, 2, 17]] = np.NaN - self.ts[6] = np.NaN - self.assertTrue(np.isnan(self.ts[6])) - self.assertTrue(np.isnan(self.ts[2])) - self.ts[np.isnan(self.ts)] = 5 - self.assertFalse(np.isnan(self.ts[2])) - - # caught this bug when writing tests - series = Series(tm.makeIntIndex(20).astype(float), - index=tm.makeIntIndex(20)) - - series[::2] = 0 - self.assertTrue((series[::2] == 0).all()) - - # set item that's not contained - s = self.series.copy() - s['foobar'] = 1 - - app = Series([1], index=['foobar'], name='series') - expected = self.series.append(app) - assert_series_equal(s, expected) - - # Test for issue #10193 - key = pd.Timestamp('2012-01-01') - series = pd.Series() - series[key] = 47 - expected = pd.Series(47, [key]) - assert_series_equal(series, expected) - - series = pd.Series([], pd.DatetimeIndex([], freq='D')) - series[key] = 47 - expected = pd.Series(47, pd.DatetimeIndex([key], freq='D')) - assert_series_equal(series, expected) - - def test_setitem_dtypes(self): - - # change dtypes - # GH 4463 - expected = Series([np.nan, 2, 3]) - - s = Series([1, 2, 3]) - s.iloc[0] = np.nan - assert_series_equal(s, expected) - - s = Series([1, 2, 3]) - s.loc[0] = np.nan - assert_series_equal(s, expected) - - s = Series([1, 2, 3]) - s[0] = np.nan - assert_series_equal(s, expected) - - s = Series([False]) - s.loc[0] = np.nan - assert_series_equal(s, Series([np.nan])) - - s = Series([False, True]) - s.loc[0] = np.nan - assert_series_equal(s, Series([np.nan, 1.0])) - - def test_set_value(self): - idx = self.ts.index[10] - res = self.ts.set_value(idx, 0) - self.assertIs(res, self.ts) - self.assertEqual(self.ts[idx], 0) - - # equiv - s = self.series.copy() - res = s.set_value('foobar', 0) - self.assertIs(res, s) - self.assertEqual(res.index[-1], 'foobar') - self.assertEqual(res['foobar'], 0) - - s = self.series.copy() - s.loc['foobar'] = 0 - self.assertEqual(s.index[-1], 'foobar') - self.assertEqual(s['foobar'], 0) - - def test_setslice(self): - sl = self.ts[5:20] - self.assertEqual(len(sl), len(sl.index)) - self.assertTrue(sl.index.is_unique) - - def test_basic_getitem_setitem_corner(self): - # invalid tuples, e.g. self.ts[:, None] vs. self.ts[:, 2] - with tm.assertRaisesRegexp(ValueError, 'tuple-index'): - self.ts[:, 2] - with tm.assertRaisesRegexp(ValueError, 'tuple-index'): - self.ts[:, 2] = 2 - - # weird lists. [slice(0, 5)] will work but not two slices - result = self.ts[[slice(None, 5)]] - expected = self.ts[:5] - assert_series_equal(result, expected) - - # OK - self.assertRaises(Exception, self.ts.__getitem__, - [5, slice(None, None)]) - self.assertRaises(Exception, self.ts.__setitem__, - [5, slice(None, None)], 2) - - def test_basic_getitem_with_labels(self): - indices = self.ts.index[[5, 10, 15]] - - result = self.ts[indices] - expected = self.ts.reindex(indices) - assert_series_equal(result, expected) - - result = self.ts[indices[0]:indices[2]] - expected = self.ts.loc[indices[0]:indices[2]] - assert_series_equal(result, expected) - - # integer indexes, be careful - s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - inds = [0, 2, 5, 7, 8] - arr_inds = np.array([0, 2, 5, 7, 8]) - result = s[inds] - expected = s.reindex(inds) - assert_series_equal(result, expected) - - result = s[arr_inds] - expected = s.reindex(arr_inds) - assert_series_equal(result, expected) - - # GH12089 - # with tz for values - s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), - index=['a', 'b', 'c']) - expected = Timestamp('2011-01-01', tz='US/Eastern') - result = s.loc['a'] - self.assertEqual(result, expected) - result = s.iloc[0] - self.assertEqual(result, expected) - result = s['a'] - self.assertEqual(result, expected) - - def test_basic_setitem_with_labels(self): - indices = self.ts.index[[5, 10, 15]] - - cp = self.ts.copy() - exp = self.ts.copy() - cp[indices] = 0 - exp.loc[indices] = 0 - assert_series_equal(cp, exp) - - cp = self.ts.copy() - exp = self.ts.copy() - cp[indices[0]:indices[2]] = 0 - exp.loc[indices[0]:indices[2]] = 0 - assert_series_equal(cp, exp) - - # integer indexes, be careful - s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - inds = [0, 4, 6] - arr_inds = np.array([0, 4, 6]) - - cp = s.copy() - exp = s.copy() - s[inds] = 0 - s.loc[inds] = 0 - assert_series_equal(cp, exp) - - cp = s.copy() - exp = s.copy() - s[arr_inds] = 0 - s.loc[arr_inds] = 0 - assert_series_equal(cp, exp) - - inds_notfound = [0, 4, 5, 6] - arr_inds_notfound = np.array([0, 4, 5, 6]) - self.assertRaises(Exception, s.__setitem__, inds_notfound, 0) - self.assertRaises(Exception, s.__setitem__, arr_inds_notfound, 0) - - # GH12089 - # with tz for values - s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), - index=['a', 'b', 'c']) - s2 = s.copy() - expected = Timestamp('2011-01-03', tz='US/Eastern') - s2.loc['a'] = expected - result = s2.loc['a'] - self.assertEqual(result, expected) - - s2 = s.copy() - s2.iloc[0] = expected - result = s2.iloc[0] - self.assertEqual(result, expected) - - s2 = s.copy() - s2['a'] = expected - result = s2['a'] - self.assertEqual(result, expected) - - def test_loc_getitem(self): - inds = self.series.index[[3, 4, 7]] - assert_series_equal(self.series.loc[inds], self.series.reindex(inds)) - assert_series_equal(self.series.iloc[5::2], self.series[5::2]) - - # slice with indices - d1, d2 = self.ts.index[[5, 15]] - result = self.ts.loc[d1:d2] - expected = self.ts.truncate(d1, d2) - assert_series_equal(result, expected) - - # boolean - mask = self.series > self.series.median() - assert_series_equal(self.series.loc[mask], self.series[mask]) - - # ask for index value - self.assertEqual(self.ts.loc[d1], self.ts[d1]) - self.assertEqual(self.ts.loc[d2], self.ts[d2]) - - def test_loc_getitem_not_monotonic(self): - d1, d2 = self.ts.index[[5, 15]] - - ts2 = self.ts[::2][[1, 2, 0]] - - self.assertRaises(KeyError, ts2.loc.__getitem__, slice(d1, d2)) - self.assertRaises(KeyError, ts2.loc.__setitem__, slice(d1, d2), 0) - - def test_loc_getitem_setitem_integer_slice_keyerrors(self): - s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - - # this is OK - cp = s.copy() - cp.iloc[4:10] = 0 - self.assertTrue((cp.iloc[4:10] == 0).all()) - - # so is this - cp = s.copy() - cp.iloc[3:11] = 0 - self.assertTrue((cp.iloc[3:11] == 0).values.all()) - - result = s.iloc[2:6] - result2 = s.loc[3:11] - expected = s.reindex([4, 6, 8, 10]) - - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - - # non-monotonic, raise KeyError - s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]] - self.assertRaises(KeyError, s2.loc.__getitem__, slice(3, 11)) - self.assertRaises(KeyError, s2.loc.__setitem__, slice(3, 11), 0) - - def test_loc_getitem_iterator(self): - idx = iter(self.series.index[:10]) - result = self.series.loc[idx] - assert_series_equal(result, self.series[:10]) - - def test_setitem_with_tz(self): - for tz in ['US/Eastern', 'UTC', 'Asia/Tokyo']: - orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3, - tz=tz)) - self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz)) - - # scalar - s = orig.copy() - s[1] = pd.Timestamp('2011-01-01', tz=tz) - exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz), - pd.Timestamp('2011-01-01 00:00', tz=tz), - pd.Timestamp('2016-01-01 02:00', tz=tz)]) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.loc[1] = pd.Timestamp('2011-01-01', tz=tz) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz) - tm.assert_series_equal(s, exp) - - # vector - vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2]) - self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz)) - - s[[1, 2]] = vals - exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz), - pd.Timestamp('2011-01-01 00:00', tz=tz), - pd.Timestamp('2012-01-01 00:00', tz=tz)]) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.loc[[1, 2]] = vals - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[[1, 2]] = vals - tm.assert_series_equal(s, exp) - - def test_setitem_with_tz_dst(self): - # GH XXX - tz = 'US/Eastern' - orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3, - tz=tz)) - self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz)) - - # scalar - s = orig.copy() - s[1] = pd.Timestamp('2011-01-01', tz=tz) - exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz), - pd.Timestamp('2011-01-01 00:00', tz=tz), - pd.Timestamp('2016-11-06 02:00', tz=tz)]) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.loc[1] = pd.Timestamp('2011-01-01', tz=tz) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz) - tm.assert_series_equal(s, exp) - - # vector - vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2]) - self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz)) - - s[[1, 2]] = vals - exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz), - pd.Timestamp('2011-01-01 00:00', tz=tz), - pd.Timestamp('2012-01-01 00:00', tz=tz)]) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.loc[[1, 2]] = vals - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[[1, 2]] = vals - tm.assert_series_equal(s, exp) - - def test_where(self): - s = Series(np.random.randn(5)) - cond = s > 0 - - rs = s.where(cond).dropna() - rs2 = s[cond] - assert_series_equal(rs, rs2) - - rs = s.where(cond, -s) - assert_series_equal(rs, s.abs()) - - rs = s.where(cond) - assert (s.shape == rs.shape) - assert (rs is not s) - - # test alignment - cond = Series([True, False, False, True, False], index=s.index) - s2 = -(s.abs()) - - expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index) - rs = s2.where(cond[:3]) - assert_series_equal(rs, expected) - - expected = s2.abs() - expected.iloc[0] = s2[0] - rs = s2.where(cond[:3], -s2) - assert_series_equal(rs, expected) - - self.assertRaises(ValueError, s.where, 1) - self.assertRaises(ValueError, s.where, cond[:3].values, -s) - - # GH 2745 - s = Series([1, 2]) - s[[True, False]] = [0, 1] - expected = Series([0, 2]) - assert_series_equal(s, expected) - - # failures - self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), - [0, 2, 3]) - self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), - []) - - # unsafe dtype changes - for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16, - np.float32, np.float64]: - s = Series(np.arange(10), dtype=dtype) - mask = s < 5 - s[mask] = lrange(2, 7) - expected = Series(lrange(2, 7) + lrange(5, 10), dtype=dtype) - assert_series_equal(s, expected) - self.assertEqual(s.dtype, expected.dtype) - - # these are allowed operations, but are upcasted - for dtype in [np.int64, np.float64]: - s = Series(np.arange(10), dtype=dtype) - mask = s < 5 - values = [2.5, 3.5, 4.5, 5.5, 6.5] - s[mask] = values - expected = Series(values + lrange(5, 10), dtype='float64') - assert_series_equal(s, expected) - self.assertEqual(s.dtype, expected.dtype) - - # GH 9731 - s = Series(np.arange(10), dtype='int64') - mask = s > 5 - values = [2.5, 3.5, 4.5, 5.5] - s[mask] = values - expected = Series(lrange(6) + values, dtype='float64') - assert_series_equal(s, expected) - - # can't do these as we are forced to change the itemsize of the input - # to something we cannot - for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]: - s = Series(np.arange(10), dtype=dtype) - mask = s < 5 - values = [2.5, 3.5, 4.5, 5.5, 6.5] - self.assertRaises(Exception, s.__setitem__, tuple(mask), values) - - # GH3235 - s = Series(np.arange(10), dtype='int64') - mask = s < 5 - s[mask] = lrange(2, 7) - expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64') - assert_series_equal(s, expected) - self.assertEqual(s.dtype, expected.dtype) - - s = Series(np.arange(10), dtype='int64') - mask = s > 5 - s[mask] = [0] * 4 - expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64') - assert_series_equal(s, expected) - - s = Series(np.arange(10)) - mask = s > 5 - - def f(): - s[mask] = [5, 4, 3, 2, 1] - - self.assertRaises(ValueError, f) - - def f(): - s[mask] = [0] * 5 - - self.assertRaises(ValueError, f) - - # dtype changes - s = Series([1, 2, 3, 4]) - result = s.where(s > 2, np.nan) - expected = Series([np.nan, np.nan, 3, 4]) - assert_series_equal(result, expected) - - # GH 4667 - # setting with None changes dtype - s = Series(range(10)).astype(float) - s[8] = None - result = s[8] - self.assertTrue(isnull(result)) - - s = Series(range(10)).astype(float) - s[s > 8] = None - result = s[isnull(s)] - expected = Series(np.nan, index=[9]) - assert_series_equal(result, expected) - - def test_where_setitem_invalid(self): - - # GH 2702 - # make sure correct exceptions are raised on invalid list assignment - - # slice - s = Series(list('abc')) - - def f(): - s[0:3] = list(range(27)) - - self.assertRaises(ValueError, f) - - s[0:3] = list(range(3)) - expected = Series([0, 1, 2]) - assert_series_equal(s.astype(np.int64), expected, ) - - # slice with step - s = Series(list('abcdef')) - - def f(): - s[0:4:2] = list(range(27)) - - self.assertRaises(ValueError, f) - - s = Series(list('abcdef')) - s[0:4:2] = list(range(2)) - expected = Series([0, 'b', 1, 'd', 'e', 'f']) - assert_series_equal(s, expected) - - # neg slices - s = Series(list('abcdef')) - - def f(): - s[:-1] = list(range(27)) - - self.assertRaises(ValueError, f) - - s[-3:-1] = list(range(2)) - expected = Series(['a', 'b', 'c', 0, 1, 'f']) - assert_series_equal(s, expected) - - # list - s = Series(list('abc')) - - def f(): - s[[0, 1, 2]] = list(range(27)) - - self.assertRaises(ValueError, f) - - s = Series(list('abc')) - - def f(): - s[[0, 1, 2]] = list(range(2)) - - self.assertRaises(ValueError, f) - - # scalar - s = Series(list('abc')) - s[0] = list(range(10)) - expected = Series([list(range(10)), 'b', 'c']) - assert_series_equal(s, expected) - - def test_where_broadcast(self): - # Test a variety of differently sized series - for size in range(2, 6): - # Test a variety of boolean indices - for selection in [ - # First element should be set - np.resize([True, False, False, False, False], size), - # Set alternating elements] - np.resize([True, False], size), - # No element should be set - np.resize([False], size)]: - - # Test a variety of different numbers as content - for item in [2.0, np.nan, np.finfo(np.float).max, - np.finfo(np.float).min]: - # Test numpy arrays, lists and tuples as the input to be - # broadcast - for arr in [np.array([item]), [item], (item, )]: - data = np.arange(size, dtype=float) - s = Series(data) - s[selection] = arr - # Construct the expected series by taking the source - # data or item based on the selection - expected = Series([item if use_item else data[ - i] for i, use_item in enumerate(selection)]) - assert_series_equal(s, expected) - - s = Series(data) - result = s.where(~selection, arr) - assert_series_equal(result, expected) - - def test_where_inplace(self): - s = Series(np.random.randn(5)) - cond = s > 0 - - rs = s.copy() - - rs.where(cond, inplace=True) - assert_series_equal(rs.dropna(), s[cond]) - assert_series_equal(rs, s.where(cond)) - - rs = s.copy() - rs.where(cond, -s, inplace=True) - assert_series_equal(rs, s.where(cond, -s)) - - def test_where_dups(self): - # GH 4550 - # where crashes with dups in index - s1 = Series(list(range(3))) - s2 = Series(list(range(3))) - comb = pd.concat([s1, s2]) - result = comb.where(comb < 2) - expected = Series([0, 1, np.nan, 0, 1, np.nan], - index=[0, 1, 2, 0, 1, 2]) - assert_series_equal(result, expected) - - # GH 4548 - # inplace updating not working with dups - comb[comb < 1] = 5 - expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2]) - assert_series_equal(comb, expected) - - comb[comb < 2] += 10 - expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) - assert_series_equal(comb, expected) - - def test_where_datetime(self): - s = Series(date_range('20130102', periods=2)) - expected = Series([10, 10], dtype='datetime64[ns]') - mask = np.array([False, False]) - - rs = s.where(mask, [10, 10]) - assert_series_equal(rs, expected) - - rs = s.where(mask, 10) - assert_series_equal(rs, expected) - - rs = s.where(mask, 10.0) - assert_series_equal(rs, expected) - - rs = s.where(mask, [10.0, 10.0]) - assert_series_equal(rs, expected) - - rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='datetime64[ns]') - assert_series_equal(rs, expected) - - def test_where_timedelta(self): - s = Series([1, 2], dtype='timedelta64[ns]') - expected = Series([10, 10], dtype='timedelta64[ns]') - mask = np.array([False, False]) - - rs = s.where(mask, [10, 10]) - assert_series_equal(rs, expected) - - rs = s.where(mask, 10) - assert_series_equal(rs, expected) - - rs = s.where(mask, 10.0) - assert_series_equal(rs, expected) - - rs = s.where(mask, [10.0, 10.0]) - assert_series_equal(rs, expected) - - rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='timedelta64[ns]') - assert_series_equal(rs, expected) - - def test_mask(self): - # compare with tested results in test_where - s = Series(np.random.randn(5)) - cond = s > 0 - - rs = s.where(~cond, np.nan) - assert_series_equal(rs, s.mask(cond)) - - rs = s.where(~cond) - rs2 = s.mask(cond) - assert_series_equal(rs, rs2) - - rs = s.where(~cond, -s) - rs2 = s.mask(cond, -s) - assert_series_equal(rs, rs2) - - cond = Series([True, False, False, True, False], index=s.index) - s2 = -(s.abs()) - rs = s2.where(~cond[:3]) - rs2 = s2.mask(cond[:3]) - assert_series_equal(rs, rs2) - - rs = s2.where(~cond[:3], -s2) - rs2 = s2.mask(cond[:3], -s2) - assert_series_equal(rs, rs2) - - self.assertRaises(ValueError, s.mask, 1) - self.assertRaises(ValueError, s.mask, cond[:3].values, -s) - - # dtype changes - s = Series([1, 2, 3, 4]) - result = s.mask(s > 2, np.nan) - expected = Series([1, 2, np.nan, np.nan]) - assert_series_equal(result, expected) - - def test_mask_broadcast(self): - # GH 8801 - # copied from test_where_broadcast - for size in range(2, 6): - for selection in [ - # First element should be set - np.resize([True, False, False, False, False], size), - # Set alternating elements] - np.resize([True, False], size), - # No element should be set - np.resize([False], size)]: - for item in [2.0, np.nan, np.finfo(np.float).max, - np.finfo(np.float).min]: - for arr in [np.array([item]), [item], (item, )]: - data = np.arange(size, dtype=float) - s = Series(data) - result = s.mask(selection, arr) - expected = Series([item if use_item else data[ - i] for i, use_item in enumerate(selection)]) - assert_series_equal(result, expected) - - def test_mask_inplace(self): - s = Series(np.random.randn(5)) - cond = s > 0 - - rs = s.copy() - rs.mask(cond, inplace=True) - assert_series_equal(rs.dropna(), s[~cond]) - assert_series_equal(rs, s.mask(cond)) - - rs = s.copy() - rs.mask(cond, -s, inplace=True) - assert_series_equal(rs, s.mask(cond, -s)) - - def test_ix_setitem(self): - inds = self.series.index[[3, 4, 7]] - - result = self.series.copy() - result.loc[inds] = 5 - - expected = self.series.copy() - expected[[3, 4, 7]] = 5 - assert_series_equal(result, expected) - - result.iloc[5:10] = 10 - expected[5:10] = 10 - assert_series_equal(result, expected) - - # set slice with indices - d1, d2 = self.series.index[[5, 15]] - result.loc[d1:d2] = 6 - expected[5:16] = 6 # because it's inclusive - assert_series_equal(result, expected) - - # set index value - self.series.loc[d1] = 4 - self.series.loc[d2] = 6 - self.assertEqual(self.series[d1], 4) - self.assertEqual(self.series[d2], 6) - - def test_where_numeric_with_string(self): - # GH 9280 - s = pd.Series([1, 2, 3]) - w = s.where(s > 1, 'X') - - self.assertFalse(is_integer(w[0])) - self.assertTrue(is_integer(w[1])) - self.assertTrue(is_integer(w[2])) - self.assertTrue(isinstance(w[0], str)) - self.assertTrue(w.dtype == 'object') - - w = s.where(s > 1, ['X', 'Y', 'Z']) - self.assertFalse(is_integer(w[0])) - self.assertTrue(is_integer(w[1])) - self.assertTrue(is_integer(w[2])) - self.assertTrue(isinstance(w[0], str)) - self.assertTrue(w.dtype == 'object') - - w = s.where(s > 1, np.array(['X', 'Y', 'Z'])) - self.assertFalse(is_integer(w[0])) - self.assertTrue(is_integer(w[1])) - self.assertTrue(is_integer(w[2])) - self.assertTrue(isinstance(w[0], str)) - self.assertTrue(w.dtype == 'object') - - def test_setitem_boolean(self): - mask = self.series > self.series.median() - - # similiar indexed series - result = self.series.copy() - result[mask] = self.series * 2 - expected = self.series * 2 - assert_series_equal(result[mask], expected[mask]) - - # needs alignment - result = self.series.copy() - result[mask] = (self.series * 2)[0:5] - expected = (self.series * 2)[0:5].reindex_like(self.series) - expected[-mask] = self.series[mask] - assert_series_equal(result[mask], expected[mask]) - - def test_ix_setitem_boolean(self): - mask = self.series > self.series.median() - - result = self.series.copy() - result.loc[mask] = 0 - expected = self.series - expected[mask] = 0 - assert_series_equal(result, expected) - - def test_ix_setitem_corner(self): - inds = list(self.series.index[[5, 8, 12]]) - self.series.loc[inds] = 5 - self.assertRaises(Exception, self.series.loc.__setitem__, - inds + ['foo'], 5) - - def test_get_set_boolean_different_order(self): - ordered = self.series.sort_values() - - # setting - copy = self.series.copy() - copy[ordered > 0] = 0 - - expected = self.series.copy() - expected[expected > 0] = 0 - - assert_series_equal(copy, expected) - - # getting - sel = self.series[ordered > 0] - exp = self.series[self.series > 0] - assert_series_equal(sel, exp) - - def test_setitem_na(self): - # these induce dtype changes - expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) - s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) - s[::2] = np.nan - assert_series_equal(s, expected) - - # get's coerced to float, right? - expected = Series([np.nan, 1, np.nan, 0]) - s = Series([True, True, False, False]) - s[::2] = np.nan - assert_series_equal(s, expected) - - expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, - 9]) - s = Series(np.arange(10)) - s[:5] = np.nan - assert_series_equal(s, expected) - - def test_basic_indexing(self): - s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b']) - - self.assertRaises(IndexError, s.__getitem__, 5) - self.assertRaises(IndexError, s.__setitem__, 5, 0) - - self.assertRaises(KeyError, s.__getitem__, 'c') - - s = s.sort_index() - - self.assertRaises(IndexError, s.__getitem__, 5) - self.assertRaises(IndexError, s.__setitem__, 5, 0) - - def test_int_indexing(self): - s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) - - self.assertRaises(KeyError, s.__getitem__, 5) - - self.assertRaises(KeyError, s.__getitem__, 'c') - - # not monotonic - s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1]) - - self.assertRaises(KeyError, s.__getitem__, 5) - - self.assertRaises(KeyError, s.__getitem__, 'c') - - def test_datetime_indexing(self): - from pandas import date_range - - index = date_range('1/1/2000', '1/7/2000') - index = index.repeat(3) - - s = Series(len(index), index=index) - stamp = Timestamp('1/8/2000') - - self.assertRaises(KeyError, s.__getitem__, stamp) - s[stamp] = 0 - self.assertEqual(s[stamp], 0) - - # not monotonic - s = Series(len(index), index=index) - s = s[::-1] - - self.assertRaises(KeyError, s.__getitem__, stamp) - s[stamp] = 0 - self.assertEqual(s[stamp], 0) - - def test_timedelta_assignment(self): - # GH 8209 - s = Series([]) - s.loc['B'] = timedelta(1) - tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B'])) - - s = s.reindex(s.index.insert(0, 'A')) - tm.assert_series_equal(s, Series( - [np.nan, Timedelta('1 days')], index=['A', 'B'])) - - result = s.fillna(timedelta(1)) - expected = Series(Timedelta('1 days'), index=['A', 'B']) - tm.assert_series_equal(result, expected) - - s.loc['A'] = timedelta(1) - tm.assert_series_equal(s, expected) - - # GH 14155 - s = Series(10 * [np.timedelta64(10, 'm')]) - s.loc[[1, 2, 3]] = np.timedelta64(20, 'm') - expected = pd.Series(10 * [np.timedelta64(10, 'm')]) - expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, 'm')) - tm.assert_series_equal(s, expected) - - def test_underlying_data_conversion(self): - - # GH 4080 - df = DataFrame(dict((c, [1, 2, 3]) for c in ['a', 'b', 'c'])) - df.set_index(['a', 'b', 'c'], inplace=True) - s = Series([1], index=[(2, 2, 2)]) - df['val'] = 0 - df - df['val'].update(s) - - expected = DataFrame( - dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0])) - expected.set_index(['a', 'b', 'c'], inplace=True) - tm.assert_frame_equal(df, expected) - - # GH 3970 - # these are chained assignments as well - pd.set_option('chained_assignment', None) - df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) - df["cc"] = 0.0 - - ck = [True] * len(df) - - df["bb"].iloc[0] = .13 - - # TODO: unused - df_tmp = df.iloc[ck] # noqa - - df["bb"].iloc[0] = .15 - self.assertEqual(df['bb'].iloc[0], 0.15) - pd.set_option('chained_assignment', 'raise') - - # GH 3217 - df = DataFrame(dict(a=[1, 3], b=[np.nan, 2])) - df['c'] = np.nan - df['c'].update(pd.Series(['foo'], index=[0])) - - expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan])) - tm.assert_frame_equal(df, expected) - - def test_preserveRefs(self): - seq = self.ts[[5, 10, 15]] - seq[1] = np.NaN - self.assertFalse(np.isnan(self.ts[10])) - - def test_drop(self): - - # unique - s = Series([1, 2], index=['one', 'two']) - expected = Series([1], index=['one']) - result = s.drop(['two']) - assert_series_equal(result, expected) - result = s.drop('two', axis='rows') - assert_series_equal(result, expected) - - # non-unique - # GH 5248 - s = Series([1, 1, 2], index=['one', 'two', 'one']) - expected = Series([1, 2], index=['one', 'one']) - result = s.drop(['two'], axis=0) - assert_series_equal(result, expected) - result = s.drop('two') - assert_series_equal(result, expected) - - expected = Series([1], index=['two']) - result = s.drop(['one']) - assert_series_equal(result, expected) - result = s.drop('one') - assert_series_equal(result, expected) - - # single string/tuple-like - s = Series(range(3), index=list('abc')) - self.assertRaises(ValueError, s.drop, 'bc') - self.assertRaises(ValueError, s.drop, ('a', )) - - # errors='ignore' - s = Series(range(3), index=list('abc')) - result = s.drop('bc', errors='ignore') - assert_series_equal(result, s) - result = s.drop(['a', 'd'], errors='ignore') - expected = s.iloc[1:] - assert_series_equal(result, expected) - - # bad axis - self.assertRaises(ValueError, s.drop, 'one', axis='columns') - - # GH 8522 - s = Series([2, 3], index=[True, False]) - self.assertTrue(s.index.is_object()) - result = s.drop(True) - expected = Series([3], index=[False]) - assert_series_equal(result, expected) - - def test_align(self): - def _check_align(a, b, how='left', fill=None): - aa, ab = a.align(b, join=how, fill_value=fill) - - join_index = a.index.join(b.index, how=how) - if fill is not None: - diff_a = aa.index.difference(join_index) - diff_b = ab.index.difference(join_index) - if len(diff_a) > 0: - self.assertTrue((aa.reindex(diff_a) == fill).all()) - if len(diff_b) > 0: - self.assertTrue((ab.reindex(diff_b) == fill).all()) - - ea = a.reindex(join_index) - eb = b.reindex(join_index) - - if fill is not None: - ea = ea.fillna(fill) - eb = eb.fillna(fill) - - assert_series_equal(aa, ea) - assert_series_equal(ab, eb) - self.assertEqual(aa.name, 'ts') - self.assertEqual(ea.name, 'ts') - self.assertEqual(ab.name, 'ts') - self.assertEqual(eb.name, 'ts') - - for kind in JOIN_TYPES: - _check_align(self.ts[2:], self.ts[:-5], how=kind) - _check_align(self.ts[2:], self.ts[:-5], how=kind, fill=-1) - - # empty left - _check_align(self.ts[:0], self.ts[:-5], how=kind) - _check_align(self.ts[:0], self.ts[:-5], how=kind, fill=-1) - - # empty right - _check_align(self.ts[:-5], self.ts[:0], how=kind) - _check_align(self.ts[:-5], self.ts[:0], how=kind, fill=-1) - - # both empty - _check_align(self.ts[:0], self.ts[:0], how=kind) - _check_align(self.ts[:0], self.ts[:0], how=kind, fill=-1) - - def test_align_fill_method(self): - def _check_align(a, b, how='left', method='pad', limit=None): - aa, ab = a.align(b, join=how, method=method, limit=limit) - - join_index = a.index.join(b.index, how=how) - ea = a.reindex(join_index) - eb = b.reindex(join_index) - - ea = ea.fillna(method=method, limit=limit) - eb = eb.fillna(method=method, limit=limit) - - assert_series_equal(aa, ea) - assert_series_equal(ab, eb) - - for kind in JOIN_TYPES: - for meth in ['pad', 'bfill']: - _check_align(self.ts[2:], self.ts[:-5], how=kind, method=meth) - _check_align(self.ts[2:], self.ts[:-5], how=kind, method=meth, - limit=1) - - # empty left - _check_align(self.ts[:0], self.ts[:-5], how=kind, method=meth) - _check_align(self.ts[:0], self.ts[:-5], how=kind, method=meth, - limit=1) - - # empty right - _check_align(self.ts[:-5], self.ts[:0], how=kind, method=meth) - _check_align(self.ts[:-5], self.ts[:0], how=kind, method=meth, - limit=1) - - # both empty - _check_align(self.ts[:0], self.ts[:0], how=kind, method=meth) - _check_align(self.ts[:0], self.ts[:0], how=kind, method=meth, - limit=1) - - def test_align_nocopy(self): - b = self.ts[:5].copy() - - # do copy - a = self.ts.copy() - ra, _ = a.align(b, join='left') - ra[:5] = 5 - self.assertFalse((a[:5] == 5).any()) - - # do not copy - a = self.ts.copy() - ra, _ = a.align(b, join='left', copy=False) - ra[:5] = 5 - self.assertTrue((a[:5] == 5).all()) - - # do copy - a = self.ts.copy() - b = self.ts[:5].copy() - _, rb = a.align(b, join='right') - rb[:3] = 5 - self.assertFalse((b[:3] == 5).any()) - - # do not copy - a = self.ts.copy() - b = self.ts[:5].copy() - _, rb = a.align(b, join='right', copy=False) - rb[:2] = 5 - self.assertTrue((b[:2] == 5).all()) - - def test_align_sameindex(self): - a, b = self.ts.align(self.ts, copy=False) - self.assertIs(a.index, self.ts.index) - self.assertIs(b.index, self.ts.index) - - # a, b = self.ts.align(self.ts, copy=True) - # self.assertIsNot(a.index, self.ts.index) - # self.assertIsNot(b.index, self.ts.index) - - def test_align_multiindex(self): - # GH 10665 - - midx = pd.MultiIndex.from_product([range(2), range(3), range(2)], - names=('a', 'b', 'c')) - idx = pd.Index(range(2), name='b') - s1 = pd.Series(np.arange(12, dtype='int64'), index=midx) - s2 = pd.Series(np.arange(2, dtype='int64'), index=idx) - - # these must be the same results (but flipped) - res1l, res1r = s1.align(s2, join='left') - res2l, res2r = s2.align(s1, join='right') - - expl = s1 - tm.assert_series_equal(expl, res1l) - tm.assert_series_equal(expl, res2r) - expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) - tm.assert_series_equal(expr, res1r) - tm.assert_series_equal(expr, res2l) - - res1l, res1r = s1.align(s2, join='right') - res2l, res2r = s2.align(s1, join='left') - - exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)], - names=('a', 'b', 'c')) - expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) - tm.assert_series_equal(expl, res1l) - tm.assert_series_equal(expl, res2r) - expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx) - tm.assert_series_equal(expr, res1r) - tm.assert_series_equal(expr, res2l) - - def test_reindex(self): - - identity = self.series.reindex(self.series.index) - - # __array_interface__ is not defined for older numpies - # and on some pythons - try: - self.assertTrue(np.may_share_memory(self.series.index, - identity.index)) - except (AttributeError): - pass - - self.assertTrue(identity.index.is_(self.series.index)) - self.assertTrue(identity.index.identical(self.series.index)) - - subIndex = self.series.index[10:20] - subSeries = self.series.reindex(subIndex) - - for idx, val in compat.iteritems(subSeries): - self.assertEqual(val, self.series[idx]) - - subIndex2 = self.ts.index[10:20] - subTS = self.ts.reindex(subIndex2) - - for idx, val in compat.iteritems(subTS): - self.assertEqual(val, self.ts[idx]) - stuffSeries = self.ts.reindex(subIndex) - - self.assertTrue(np.isnan(stuffSeries).all()) - - # This is extremely important for the Cython code to not screw up - nonContigIndex = self.ts.index[::2] - subNonContig = self.ts.reindex(nonContigIndex) - for idx, val in compat.iteritems(subNonContig): - self.assertEqual(val, self.ts[idx]) - - # return a copy the same index here - result = self.ts.reindex() - self.assertFalse((result is self.ts)) - - def test_reindex_nan(self): - ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8]) - - i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2] - assert_series_equal(ts.reindex(i), ts.iloc[j]) - - ts.index = ts.index.astype('object') - - # reindex coerces index.dtype to float, loc/iloc doesn't - assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) - - def test_reindex_series_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - series = Series(rng) - - result = series.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) - - mask = result.isnull() - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - - def test_reindex_with_datetimes(self): - rng = date_range('1/1/2000', periods=20) - ts = Series(np.random.randn(20), index=rng) - - result = ts.reindex(list(ts.index[5:10])) - expected = ts[5:10] - tm.assert_series_equal(result, expected) - - result = ts[list(ts.index[5:10])] - tm.assert_series_equal(result, expected) - - def test_reindex_corner(self): - # (don't forget to fix this) I think it's fixed - self.empty.reindex(self.ts.index, method='pad') # it works - - # corner case: pad empty series - reindexed = self.empty.reindex(self.ts.index, method='pad') - - # pass non-Index - reindexed = self.ts.reindex(list(self.ts.index)) - assert_series_equal(self.ts, reindexed) - - # bad fill method - ts = self.ts[::2] - self.assertRaises(Exception, ts.reindex, self.ts.index, method='foo') - - def test_reindex_pad(self): - - s = Series(np.arange(10), dtype='int64') - s2 = s[::2] - - reindexed = s2.reindex(s.index, method='pad') - reindexed2 = s2.reindex(s.index, method='ffill') - assert_series_equal(reindexed, reindexed2) - - expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) - assert_series_equal(reindexed, expected) - - # GH4604 - s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e']) - new_index = ['a', 'g', 'c', 'f'] - expected = Series([1, 1, 3, 3], index=new_index) - - # this changes dtype because the ffill happens after - result = s.reindex(new_index).ffill() - assert_series_equal(result, expected.astype('float64')) - - result = s.reindex(new_index).ffill(downcast='infer') - assert_series_equal(result, expected) - - expected = Series([1, 5, 3, 5], index=new_index) - result = s.reindex(new_index, method='ffill') - assert_series_equal(result, expected) - - # inferrence of new dtype - s = Series([True, False, False, True], index=list('abcd')) - new_index = 'agc' - result = s.reindex(list(new_index)).ffill() - expected = Series([True, True, False], index=list(new_index)) - assert_series_equal(result, expected) - - # GH4618 shifted series downcasting - s = Series(False, index=lrange(0, 5)) - result = s.shift(1).fillna(method='bfill') - expected = Series(False, index=lrange(0, 5)) - assert_series_equal(result, expected) - - def test_reindex_nearest(self): - s = Series(np.arange(10, dtype='int64')) - target = [0.1, 0.9, 1.5, 2.0] - actual = s.reindex(target, method='nearest') - expected = Series(np.around(target).astype('int64'), target) - assert_series_equal(expected, actual) - - actual = s.reindex_like(actual, method='nearest') - assert_series_equal(expected, actual) - - actual = s.reindex_like(actual, method='nearest', tolerance=1) - assert_series_equal(expected, actual) - - actual = s.reindex(target, method='nearest', tolerance=0.2) - expected = Series([0, 1, np.nan, 2], target) - assert_series_equal(expected, actual) - - def test_reindex_backfill(self): - pass - - def test_reindex_int(self): - ts = self.ts[::2] - int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index) - - # this should work fine - reindexed_int = int_ts.reindex(self.ts.index) - - # if NaNs introduced - self.assertEqual(reindexed_int.dtype, np.float_) - - # NO NaNs introduced - reindexed_int = int_ts.reindex(int_ts.index[::2]) - self.assertEqual(reindexed_int.dtype, np.int_) - - def test_reindex_bool(self): - - # A series other than float, int, string, or object - ts = self.ts[::2] - bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) - - # this should work fine - reindexed_bool = bool_ts.reindex(self.ts.index) - - # if NaNs introduced - self.assertEqual(reindexed_bool.dtype, np.object_) - - # NO NaNs introduced - reindexed_bool = bool_ts.reindex(bool_ts.index[::2]) - self.assertEqual(reindexed_bool.dtype, np.bool_) - - def test_reindex_bool_pad(self): - # fail - ts = self.ts[5:] - bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) - filled_bool = bool_ts.reindex(self.ts.index, method='pad') - self.assertTrue(isnull(filled_bool[:5]).all()) - - def test_reindex_like(self): - other = self.ts[::2] - assert_series_equal(self.ts.reindex(other.index), - self.ts.reindex_like(other)) - - # GH 7179 - day1 = datetime(2013, 3, 5) - day2 = datetime(2013, 5, 5) - day3 = datetime(2014, 3, 5) - - series1 = Series([5, None, None], [day1, day2, day3]) - series2 = Series([None, None], [day1, day3]) - - result = series1.reindex_like(series2, method='pad') - expected = Series([5, np.nan], index=[day1, day3]) - assert_series_equal(result, expected) - - def test_reindex_fill_value(self): - # ----------------------------------------------------------- - # floats - floats = Series([1., 2., 3.]) - result = floats.reindex([1, 2, 3]) - expected = Series([2., 3., np.nan], index=[1, 2, 3]) - assert_series_equal(result, expected) - - result = floats.reindex([1, 2, 3], fill_value=0) - expected = Series([2., 3., 0], index=[1, 2, 3]) - assert_series_equal(result, expected) - - # ----------------------------------------------------------- - # ints - ints = Series([1, 2, 3]) - - result = ints.reindex([1, 2, 3]) - expected = Series([2., 3., np.nan], index=[1, 2, 3]) - assert_series_equal(result, expected) - - # don't upcast - result = ints.reindex([1, 2, 3], fill_value=0) - expected = Series([2, 3, 0], index=[1, 2, 3]) - self.assertTrue(issubclass(result.dtype.type, np.integer)) - assert_series_equal(result, expected) - - # ----------------------------------------------------------- - # objects - objects = Series([1, 2, 3], dtype=object) - - result = objects.reindex([1, 2, 3]) - expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object) - assert_series_equal(result, expected) - - result = objects.reindex([1, 2, 3], fill_value='foo') - expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object) - assert_series_equal(result, expected) - - # ------------------------------------------------------------ - # bools - bools = Series([True, False, True]) - - result = bools.reindex([1, 2, 3]) - expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object) - assert_series_equal(result, expected) - - result = bools.reindex([1, 2, 3], fill_value=False) - expected = Series([False, True, False], index=[1, 2, 3]) - assert_series_equal(result, expected) - - def test_select(self): - n = len(self.ts) - result = self.ts.select(lambda x: x >= self.ts.index[n // 2]) - expected = self.ts.reindex(self.ts.index[n // 2:]) - assert_series_equal(result, expected) - - result = self.ts.select(lambda x: x.weekday() == 2) - expected = self.ts[self.ts.index.weekday == 2] - assert_series_equal(result, expected) - - def test_cast_on_putmask(self): - - # GH 2746 - - # need to upcast - s = Series([1, 2], index=[1, 2], dtype='int64') - s[[True, False]] = Series([0], index=[1], dtype='int64') - expected = Series([0, 2], index=[1, 2], dtype='int64') - - assert_series_equal(s, expected) - - def test_type_promote_putmask(self): - - # GH8387: test that changing types does not break alignment - ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5) - left, mask = ts.copy(), ts > 0 - right = ts[mask].copy().map(str) - left[mask] = right - assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t)) - - s = Series([0, 1, 2, 0]) - mask = s > 0 - s2 = s[mask].map(str) - s[mask] = s2 - assert_series_equal(s, Series([0, '1', '2', 0])) - - s = Series([0, 'foo', 'bar', 0]) - mask = Series([False, True, True, False]) - s2 = s[mask] - s[mask] = s2 - assert_series_equal(s, Series([0, 'foo', 'bar', 0])) - - def test_head_tail(self): - assert_series_equal(self.series.head(), self.series[:5]) - assert_series_equal(self.series.head(0), self.series[0:0]) - assert_series_equal(self.series.tail(), self.series[-5:]) - assert_series_equal(self.series.tail(0), self.series[0:0]) - - def test_multilevel_preserve_name(self): - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - s = Series(np.random.randn(len(index)), index=index, name='sth') - - result = s['foo'] - result2 = s.loc['foo'] - self.assertEqual(result.name, s.name) - self.assertEqual(result2.name, s.name) - - def test_setitem_scalar_into_readonly_backing_data(self): - # GH14359: test that you cannot mutate a read only buffer - - array = np.zeros(5) - array.flags.writeable = False # make the array immutable - series = Series(array) - - for n in range(len(series)): - with self.assertRaises(ValueError): - series[n] = 1 - - self.assertEqual( - array[n], - 0, - msg='even though the ValueError was raised, the underlying' - ' array was still mutated!', - ) - - def test_setitem_slice_into_readonly_backing_data(self): - # GH14359: test that you cannot mutate a read only buffer - - array = np.zeros(5) - array.flags.writeable = False # make the array immutable - series = Series(array) - - with self.assertRaises(ValueError): - series[1:3] = 1 - - self.assertTrue( - not array.any(), - msg='even though the ValueError was raised, the underlying' - ' array was still mutated!', - ) - - -class TestTimeSeriesDuplicates(tm.TestCase): - - def setUp(self): - dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), - datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 3), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 4), - datetime(2000, 1, 4), datetime(2000, 1, 5)] - - self.dups = Series(np.random.randn(len(dates)), index=dates) - - def test_constructor(self): - tm.assertIsInstance(self.dups, Series) - tm.assertIsInstance(self.dups.index, DatetimeIndex) - - def test_is_unique_monotonic(self): - self.assertFalse(self.dups.index.is_unique) - - def test_index_unique(self): - uniques = self.dups.index.unique() - expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 5)]) - self.assertEqual(uniques.dtype, 'M8[ns]') # sanity - tm.assert_index_equal(uniques, expected) - self.assertEqual(self.dups.index.nunique(), 4) - - # #2563 - self.assertTrue(isinstance(uniques, DatetimeIndex)) - - dups_local = self.dups.index.tz_localize('US/Eastern') - dups_local.name = 'foo' - result = dups_local.unique() - expected = DatetimeIndex(expected, name='foo') - expected = expected.tz_localize('US/Eastern') - self.assertTrue(result.tz is not None) - self.assertEqual(result.name, 'foo') - tm.assert_index_equal(result, expected) - - # NaT, note this is excluded - arr = [1370745748 + t for t in range(20)] + [tslib.iNaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) - for t in range(20)] + [NaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - def test_index_dupes_contains(self): - d = datetime(2011, 12, 5, 20, 30) - ix = DatetimeIndex([d, d]) - self.assertTrue(d in ix) - - def test_duplicate_dates_indexing(self): - ts = self.dups - - uniques = ts.index.unique() - for date in uniques: - result = ts[date] - - mask = ts.index == date - total = (ts.index == date).sum() - expected = ts[mask] - if total > 1: - assert_series_equal(result, expected) - else: - assert_almost_equal(result, expected[0]) - - cp = ts.copy() - cp[date] = 0 - expected = Series(np.where(mask, 0, ts), index=ts.index) - assert_series_equal(cp, expected) - - self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) - - # new index - ts[datetime(2000, 1, 6)] = 0 - self.assertEqual(ts[datetime(2000, 1, 6)], 0) - - def test_range_slice(self): - idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', - '1/4/2000']) - - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts['1/2/2000':] - expected = ts[1:] - assert_series_equal(result, expected) - - result = ts['1/2/2000':'1/3/2000'] - expected = ts[1:4] - assert_series_equal(result, expected) - - def test_groupby_average_dup_values(self): - result = self.dups.groupby(level=0).mean() - expected = self.dups.groupby(self.dups.index).mean() - assert_series_equal(result, expected) - - def test_indexing_over_size_cutoff(self): - import datetime - # #1821 - - old_cutoff = _index._SIZE_CUTOFF - try: - _index._SIZE_CUTOFF = 1000 - - # create large list of non periodic datetime - dates = [] - sec = datetime.timedelta(seconds=1) - half_sec = datetime.timedelta(microseconds=500000) - d = datetime.datetime(2011, 12, 5, 20, 30) - n = 1100 - for i in range(n): - dates.append(d) - dates.append(d + sec) - dates.append(d + sec + half_sec) - dates.append(d + sec + sec + half_sec) - d += 3 * sec - - # duplicate some values in the list - duplicate_positions = np.random.randint(0, len(dates) - 1, 20) - for p in duplicate_positions: - dates[p + 1] = dates[p] - - df = DataFrame(np.random.randn(len(dates), 4), - index=dates, - columns=list('ABCD')) - - pos = n * 3 - timestamp = df.index[pos] - self.assertIn(timestamp, df.index) - - # it works! - df.loc[timestamp] - self.assertTrue(len(df.loc[[timestamp]]) > 0) - finally: - _index._SIZE_CUTOFF = old_cutoff - - def test_indexing_unordered(self): - # GH 2437 - rng = date_range(start='2011-01-01', end='2011-01-15') - ts = Series(np.random.rand(len(rng)), index=rng) - ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]]) - - for t in ts.index: - # TODO: unused? - s = str(t) # noqa - - expected = ts[t] - result = ts2[t] - self.assertTrue(expected == result) - - # GH 3448 (ranges) - def compare(slobj): - result = ts2[slobj].copy() - result = result.sort_index() - expected = ts[slobj] - assert_series_equal(result, expected) - - compare(slice('2011-01-01', '2011-01-15')) - compare(slice('2010-12-30', '2011-01-15')) - compare(slice('2011-01-01', '2011-01-16')) - - # partial ranges - compare(slice('2011-01-01', '2011-01-6')) - compare(slice('2011-01-06', '2011-01-8')) - compare(slice('2011-01-06', '2011-01-12')) - - # single values - result = ts2['2011'].sort_index() - expected = ts['2011'] - assert_series_equal(result, expected) - - # diff freq - rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') - ts = Series(np.arange(len(rng)), index=rng) - ts = ts.take(np.random.permutation(20)) - - result = ts['2005'] - for t in result.index: - self.assertTrue(t.year == 2005) - - def test_indexing(self): - - idx = date_range("2001-1-1", periods=20, freq='M') - ts = Series(np.random.rand(len(idx)), index=idx) - - # getting - - # GH 3070, make sure semantics work on Series/Frame - expected = ts['2001'] - expected.name = 'A' - - df = DataFrame(dict(A=ts)) - result = df['2001']['A'] - assert_series_equal(expected, result) - - # setting - ts['2001'] = 1 - expected = ts['2001'] - expected.name = 'A' - - df.loc['2001', 'A'] = 1 - - result = df['2001']['A'] - assert_series_equal(expected, result) - - # GH3546 (not including times on the last day) - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', - freq='H') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', - freq='S') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = [Timestamp('2013-05-31 00:00'), - Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013'] - assert_series_equal(expected, ts) - - # GH14826, indexing with a seconds resolution string / datetime object - df = DataFrame(np.random.rand(5, 5), - columns=['open', 'high', 'low', 'close', 'volume'], - index=date_range('2012-01-02 18:01:00', - periods=5, tz='US/Central', freq='s')) - expected = df.loc[[df.index[2]]] - - # this is a single date, so will raise - self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) - self.assertRaises(KeyError, df.__getitem__, df.index[2], ) - - -class TestDatetimeIndexing(tm.TestCase): - """ - Also test support for datetime64[ns] in Series / DataFrame - """ - - def setUp(self): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='Min') - self.series = Series(np.random.rand(len(dti)), dti) - - def test_fancy_getitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - - self.assertEqual(s[48], 48) - self.assertEqual(s['1/2/2009'], 48) - self.assertEqual(s['2009-1-2'], 48) - self.assertEqual(s[datetime(2009, 1, 2)], 48) - self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) - self.assertRaises(KeyError, s.__getitem__, '2009-1-3') - - assert_series_equal(s['3/6/2009':'2009-06-05'], - s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) - - def test_fancy_setitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - s[48] = -1 - self.assertEqual(s[48], -1) - s['1/2/2009'] = -2 - self.assertEqual(s[48], -2) - s['1/2/2009':'2009-06-05'] = -3 - self.assertTrue((s[48:54] == -3).all()) - - def test_dti_snap(self): - dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', - '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') - - res = dti.snap(freq='W-MON') - exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') - exp = exp.repeat([3, 4]) - self.assertTrue((res == exp).all()) - - res = dti.snap(freq='B') - - exp = date_range('1/1/2002', '1/7/2002', freq='b') - exp = exp.repeat([1, 1, 1, 2, 2]) - self.assertTrue((res == exp).all()) - - def test_dti_reset_index_round_trip(self): - dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') - d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) - d2 = d1.reset_index() - self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) - d3 = d2.set_index('index') - assert_frame_equal(d1, d3, check_names=False) - - # #2329 - stamp = datetime(2012, 11, 22) - df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) - df = df.set_index('Date') - - self.assertEqual(df.index[0], stamp) - self.assertEqual(df.reset_index()['Date'][0], stamp) - - def test_series_set_value(self): - # #1561 - - dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] - index = DatetimeIndex(dates) - - s = Series().set_value(dates[0], 1.) - s2 = s.set_value(dates[1], np.nan) - - exp = Series([1., np.nan], index=index) - - assert_series_equal(s2, exp) - - # s = Series(index[:1], index[:1]) - # s2 = s.set_value(dates[1], index[1]) - # self.assertEqual(s2.values.dtype, 'M8[ns]') - - @slow - def test_slice_locs_indexerror(self): - times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) - for i in range(100000)] - s = Series(lrange(100000), times) - s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] - - def test_slicing_datetimes(self): - - # GH 7523 - - # unique - df = DataFrame(np.arange(4., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 3, 4]]) - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - # duplicates - df = pd.DataFrame(np.arange(5., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 2, 3, 4]]) - - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - def test_frame_datetime64_duplicated(self): - dates = date_range('2010-07-01', end='2010-08-05') - - tst = DataFrame({'symbol': 'AAA', 'date': dates}) - result = tst.duplicated(['date', 'symbol']) - self.assertTrue((-result).all()) - - tst = DataFrame({'date': dates}) - result = tst.duplicated() - self.assertTrue((-result).all()) - - -class TestNatIndexing(tm.TestCase): - - def setUp(self): - self.series = Series(date_range('1/1/2000', periods=10)) - - # --------------------------------------------------------------------- - # NaT support - - def test_set_none_nan(self): - self.series[3] = None - self.assertIs(self.series[3], NaT) - - self.series[3:5] = None - self.assertIs(self.series[4], NaT) - - self.series[5] = np.nan - self.assertIs(self.series[5], NaT) - - self.series[5:7] = np.nan - self.assertIs(self.series[6], NaT) - - def test_nat_operations(self): - # GH 8617 - s = Series([0, pd.NaT], dtype='m8[ns]') - exp = s[0] - self.assertEqual(s.median(), exp) - self.assertEqual(s.min(), exp) - self.assertEqual(s.max(), exp) - - def test_round_nat(self): - # GH14940 - s = Series([pd.NaT]) - expected = Series(pd.NaT) - for method in ["round", "floor", "ceil"]: - round_method = getattr(s.dt, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - assert_series_equal(round_method(freq), expected) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index a3b13ba9b993a..79e23459ac992 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -1,20 +1,22 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + from datetime import datetime from numpy import nan import numpy as np from pandas import Series -from pandas.tseries.index import Timestamp -import pandas.lib as lib +from pandas.core.indexes.datetimes import Timestamp +import pandas._libs.lib as lib from pandas.util.testing import assert_series_equal import pandas.util.testing as tm -class TestSeriesInternals(tm.TestCase): +class TestSeriesInternals(object): def test_convert_objects(self): @@ -114,7 +116,7 @@ def test_convert_objects(self): # r = s.copy() # r[0] = np.nan # result = r.convert_objects(convert_dates=True,convert_numeric=False) - # self.assertEqual(result.dtype, 'M8[ns]') + # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date for x in 'abcdefghijklmnopqrstuvwxyz': @@ -280,7 +282,7 @@ def test_convert(self): # r = s.copy() # r[0] = np.nan # result = r._convert(convert_dates=True,convert_numeric=False) - # self.assertEqual(result.dtype, 'M8[ns]') + # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date expected = Series([lib.NaT]) @@ -294,7 +296,7 @@ def test_convert(self): def test_convert_no_arg_error(self): s = Series(['1.0', '2']) - self.assertRaises(ValueError, s._convert) + pytest.raises(ValueError, s._convert) def test_convert_preserve_bool(self): s = Series([1, True, 3, 5], dtype=object) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index d514fbfc142f0..62d1372525cc8 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -2,13 +2,15 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime +import collections +import pytest import numpy as np import pandas as pd from pandas import Series, DataFrame -from pandas.compat import StringIO, u, long +from pandas.compat import StringIO, u from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, ensure_clean) import pandas.util.testing as tm @@ -16,45 +18,75 @@ from .common import TestData -class TestSeriesToCSV(TestData, tm.TestCase): +class TestSeriesToCSV(TestData): + + def read_csv(self, path, **kwargs): + params = dict(squeeze=True, index_col=0, + header=None, parse_dates=True) + params.update(**kwargs) + + header = params.get("header") + out = pd.read_csv(path, **params) + + if header is None: + out.name = out.index.name = None + + return out + + def test_from_csv_deprecation(self): + # see gh-17812 + with ensure_clean() as path: + self.ts.to_csv(path) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = self.read_csv(path) + depr_ts = Series.from_csv(path) + assert_series_equal(depr_ts, ts) def test_from_csv(self): with ensure_clean() as path: self.ts.to_csv(path) - ts = Series.from_csv(path) + ts = self.read_csv(path) assert_series_equal(self.ts, ts, check_names=False) - self.assertTrue(ts.name is None) - self.assertTrue(ts.index.name is None) - # GH10483 + assert ts.name is None + assert ts.index.name is None + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + depr_ts = Series.from_csv(path) + assert_series_equal(depr_ts, ts) + + # see gh-10483 self.ts.to_csv(path, header=True) - ts_h = Series.from_csv(path, header=0) - self.assertTrue(ts_h.name == 'ts') + ts_h = self.read_csv(path, header=0) + assert ts_h.name == "ts" self.series.to_csv(path) - series = Series.from_csv(path) - self.assertIsNone(series.name) - self.assertIsNone(series.index.name) + series = self.read_csv(path) assert_series_equal(self.series, series, check_names=False) - self.assertTrue(series.name is None) - self.assertTrue(series.index.name is None) + + assert series.name is None + assert series.index.name is None self.series.to_csv(path, header=True) - series_h = Series.from_csv(path, header=0) - self.assertTrue(series_h.name == 'series') + series_h = self.read_csv(path, header=0) + assert series_h.name == "series" - outfile = open(path, 'w') - outfile.write('1998-01-01|1.0\n1999-01-01|2.0') + outfile = open(path, "w") + outfile.write("1998-01-01|1.0\n1999-01-01|2.0") outfile.close() - series = Series.from_csv(path, sep='|') - checkseries = Series({datetime(1998, 1, 1): 1.0, - datetime(1999, 1, 1): 2.0}) - assert_series_equal(checkseries, series) - series = Series.from_csv(path, sep='|', parse_dates=False) - checkseries = Series({'1998-01-01': 1.0, '1999-01-01': 2.0}) - assert_series_equal(checkseries, series) + series = self.read_csv(path, sep="|") + check_series = Series({datetime(1998, 1, 1): 1.0, + datetime(1999, 1, 1): 2.0}) + assert_series_equal(check_series, series) + + series = self.read_csv(path, sep="|", parse_dates=False) + check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0}) + assert_series_equal(check_series, series) def test_to_csv(self): import io @@ -74,20 +106,19 @@ def test_to_csv_unicode_index(self): buf = StringIO() s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")]) - s.to_csv(buf, encoding='UTF-8') + s.to_csv(buf, encoding="UTF-8") buf.seek(0) - s2 = Series.from_csv(buf, index_col=0, encoding='UTF-8') - + s2 = self.read_csv(buf, index_col=0, encoding="UTF-8") assert_series_equal(s, s2) def test_to_csv_float_format(self): with ensure_clean() as filename: ser = Series([0.123456, 0.234567, 0.567567]) - ser.to_csv(filename, float_format='%.2f') + ser.to_csv(filename, float_format="%.2f") - rs = Series.from_csv(filename) + rs = self.read_csv(filename) xp = Series([0.12, 0.23, 0.57]) assert_series_equal(rs, xp) @@ -105,10 +136,34 @@ def test_to_csv_path_is_none(self): # DataFrame.to_csv() which returned string s = Series([1, 2, 3]) csv_str = s.to_csv(path=None) - self.assertIsInstance(csv_str, str) + assert isinstance(csv_str, str) + + def test_to_csv_compression(self, compression_no_zip): + + s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'], + name='X') + + with ensure_clean() as filename: + + s.to_csv(filename, compression=compression_no_zip, header=True) + # test the round trip - to_csv -> read_csv + rs = pd.read_csv(filename, compression=compression_no_zip, + index_col=0, squeeze=True) + assert_series_equal(s, rs) -class TestSeriesIO(TestData, tm.TestCase): + # explicitly ensure file was compressed + with tm.decompress_file(filename, compression_no_zip) as fh: + text = fh.read().decode('utf8') + assert s.name in text + + with tm.decompress_file(filename, compression_no_zip) as fh: + assert_series_equal(s, pd.read_csv(fh, + index_col=0, + squeeze=True)) + + +class TestSeriesIO(TestData): def test_to_frame(self): self.ts.name = None @@ -126,21 +181,18 @@ def test_to_frame(self): dict(testdifferent=self.ts.values), index=self.ts.index) assert_frame_equal(rs, xp) - def test_to_dict(self): - self.assert_series_equal(Series(self.ts.to_dict(), name='ts'), self.ts) - def test_timeseries_periodindex(self): # GH2891 from pandas import period_range prng = period_range('1/1/2011', '1/1/2012', freq='M') ts = Series(np.random.randn(len(prng)), prng) - new_ts = self.round_trip_pickle(ts) - self.assertEqual(new_ts.index.freq, 'M') + new_ts = tm.round_trip_pickle(ts) + assert new_ts.index.freq == 'M' def test_pickle_preserve_name(self): for n in [777, 777., 'name', datetime(2001, 11, 11), (1, 2)]: unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n)) - self.assertEqual(unpickled.name, n) + assert unpickled.name == n def _pickle_roundtrip_name(self, obj): @@ -163,40 +215,19 @@ class SubclassedFrame(DataFrame): s = SubclassedSeries([1, 2, 3], name='X') result = s.to_frame() - self.assertTrue(isinstance(result, SubclassedFrame)) + assert isinstance(result, SubclassedFrame) expected = SubclassedFrame({'X': [1, 2, 3]}) assert_frame_equal(result, expected) - -class TestSeriesToList(TestData, tm.TestCase): - - def test_tolist(self): - rs = self.ts.tolist() - xp = self.ts.values.tolist() - assert_almost_equal(rs, xp) - - # datetime64 - s = Series(self.ts.index) - rs = s.tolist() - self.assertEqual(self.ts.index[0], rs[0]) - - def test_tolist_np_int(self): - # GH10904 - for t in ['int8', 'int16', 'int32', 'int64']: - s = pd.Series([1], dtype=t) - self.assertIsInstance(s.tolist()[0], (int, long)) - - def test_tolist_np_uint(self): - # GH10904 - for t in ['uint8', 'uint16']: - s = pd.Series([1], dtype=t) - self.assertIsInstance(s.tolist()[0], int) - for t in ['uint32', 'uint64']: - s = pd.Series([1], dtype=t) - self.assertIsInstance(s.tolist()[0], long) - - def test_tolist_np_float(self): - # GH10904 - for t in ['float16', 'float32', 'float64']: - s = pd.Series([1], dtype=t) - self.assertIsInstance(s.tolist()[0], float) + @pytest.mark.parametrize('mapping', ( + dict, + collections.defaultdict(list), + collections.OrderedDict)) + def test_to_dict(self, mapping): + # GH16122 + ts = TestData().ts + tm.assert_series_equal( + Series(ts.to_dict(mapping), name='ts'), ts) + from_method = Series(ts.to_dict(collections.Counter)) + from_constructor = Series(collections.Counter(ts.iteritems())) + tm.assert_series_equal(from_method, from_constructor) diff --git a/pandas/tests/series/test_misc_api.py b/pandas/tests/series/test_misc_api.py deleted file mode 100644 index 2facbaf1fe31e..0000000000000 --- a/pandas/tests/series/test_misc_api.py +++ /dev/null @@ -1,350 +0,0 @@ -# coding=utf-8 -# pylint: disable-msg=E1101,W0612 - -import numpy as np -import pandas as pd - -from pandas import Index, Series, DataFrame, date_range -from pandas.tseries.index import Timestamp - -from pandas.compat import range -from pandas import compat -import pandas.formats.printing as printing -from pandas.util.testing import (assert_series_equal, - ensure_clean) -import pandas.util.testing as tm - -from .common import TestData - - -class SharedWithSparse(object): - - def test_scalarop_preserve_name(self): - result = self.ts * 2 - self.assertEqual(result.name, self.ts.name) - - def test_copy_name(self): - result = self.ts.copy() - self.assertEqual(result.name, self.ts.name) - - def test_copy_index_name_checking(self): - # don't want to be able to modify the index stored elsewhere after - # making a copy - - self.ts.index.name = None - self.assertIsNone(self.ts.index.name) - self.assertIs(self.ts, self.ts) - - cp = self.ts.copy() - cp.index.name = 'foo' - printing.pprint_thing(self.ts.index.name) - self.assertIsNone(self.ts.index.name) - - def test_append_preserve_name(self): - result = self.ts[:5].append(self.ts[5:]) - self.assertEqual(result.name, self.ts.name) - - def test_binop_maybe_preserve_name(self): - # names match, preserve - result = self.ts * self.ts - self.assertEqual(result.name, self.ts.name) - result = self.ts.mul(self.ts) - self.assertEqual(result.name, self.ts.name) - - result = self.ts * self.ts[:-2] - self.assertEqual(result.name, self.ts.name) - - # names don't match, don't preserve - cp = self.ts.copy() - cp.name = 'something else' - result = self.ts + cp - self.assertIsNone(result.name) - result = self.ts.add(cp) - self.assertIsNone(result.name) - - ops = ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow'] - ops = ops + ['r' + op for op in ops] - for op in ops: - # names match, preserve - s = self.ts.copy() - result = getattr(s, op)(s) - self.assertEqual(result.name, self.ts.name) - - # names don't match, don't preserve - cp = self.ts.copy() - cp.name = 'changed' - result = getattr(s, op)(cp) - self.assertIsNone(result.name) - - def test_combine_first_name(self): - result = self.ts.combine_first(self.ts[:5]) - self.assertEqual(result.name, self.ts.name) - - def test_getitem_preserve_name(self): - result = self.ts[self.ts > 0] - self.assertEqual(result.name, self.ts.name) - - result = self.ts[[0, 2, 4]] - self.assertEqual(result.name, self.ts.name) - - result = self.ts[5:10] - self.assertEqual(result.name, self.ts.name) - - def test_pickle(self): - unp_series = self._pickle_roundtrip(self.series) - unp_ts = self._pickle_roundtrip(self.ts) - assert_series_equal(unp_series, self.series) - assert_series_equal(unp_ts, self.ts) - - def _pickle_roundtrip(self, obj): - - with ensure_clean() as path: - obj.to_pickle(path) - unpickled = pd.read_pickle(path) - return unpickled - - def test_argsort_preserve_name(self): - result = self.ts.argsort() - self.assertEqual(result.name, self.ts.name) - - def test_sort_index_name(self): - result = self.ts.sort_index(ascending=False) - self.assertEqual(result.name, self.ts.name) - - def test_to_sparse_pass_name(self): - result = self.ts.to_sparse() - self.assertEqual(result.name, self.ts.name) - - -class TestSeriesMisc(TestData, SharedWithSparse, tm.TestCase): - - def test_tab_completion(self): - # GH 9910 - s = Series(list('abcd')) - # Series of str values should have .str but not .dt/.cat in __dir__ - self.assertTrue('str' in dir(s)) - self.assertTrue('dt' not in dir(s)) - self.assertTrue('cat' not in dir(s)) - - # similiarly for .dt - s = Series(date_range('1/1/2015', periods=5)) - self.assertTrue('dt' in dir(s)) - self.assertTrue('str' not in dir(s)) - self.assertTrue('cat' not in dir(s)) - - # similiarly for .cat, but with the twist that str and dt should be - # there if the categories are of that type first cat and str - s = Series(list('abbcd'), dtype="category") - self.assertTrue('cat' in dir(s)) - self.assertTrue('str' in dir(s)) # as it is a string categorical - self.assertTrue('dt' not in dir(s)) - - # similar to cat and str - s = Series(date_range('1/1/2015', periods=5)).astype("category") - self.assertTrue('cat' in dir(s)) - self.assertTrue('str' not in dir(s)) - self.assertTrue('dt' in dir(s)) # as it is a datetime categorical - - def test_not_hashable(self): - s_empty = Series() - s = Series([1]) - self.assertRaises(TypeError, hash, s_empty) - self.assertRaises(TypeError, hash, s) - - def test_contains(self): - tm.assert_contains_all(self.ts.index, self.ts) - - def test_iter(self): - for i, val in enumerate(self.series): - self.assertEqual(val, self.series[i]) - - for i, val in enumerate(self.ts): - self.assertEqual(val, self.ts[i]) - - def test_iter_box(self): - vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] - s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns]') - for res, exp in zip(s, vals): - self.assertIsInstance(res, pd.Timestamp) - self.assertEqual(res, exp) - self.assertIsNone(res.tz) - - vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), - pd.Timestamp('2011-01-02', tz='US/Eastern')] - s = pd.Series(vals) - self.assertEqual(s.dtype, 'datetime64[ns, US/Eastern]') - for res, exp in zip(s, vals): - self.assertIsInstance(res, pd.Timestamp) - self.assertEqual(res, exp) - self.assertEqual(res.tz, exp.tz) - - # timedelta - vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] - s = pd.Series(vals) - self.assertEqual(s.dtype, 'timedelta64[ns]') - for res, exp in zip(s, vals): - self.assertIsInstance(res, pd.Timedelta) - self.assertEqual(res, exp) - - # period (object dtype, not boxed) - vals = [pd.Period('2011-01-01', freq='M'), - pd.Period('2011-01-02', freq='M')] - s = pd.Series(vals) - self.assertEqual(s.dtype, 'object') - for res, exp in zip(s, vals): - self.assertIsInstance(res, pd.Period) - self.assertEqual(res, exp) - self.assertEqual(res.freq, 'M') - - def test_keys(self): - # HACK: By doing this in two stages, we avoid 2to3 wrapping the call - # to .keys() in a list() - getkeys = self.ts.keys - self.assertIs(getkeys(), self.ts.index) - - def test_values(self): - self.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) - - def test_iteritems(self): - for idx, val in compat.iteritems(self.series): - self.assertEqual(val, self.series[idx]) - - for idx, val in compat.iteritems(self.ts): - self.assertEqual(val, self.ts[idx]) - - # assert is lazy (genrators don't define reverse, lists do) - self.assertFalse(hasattr(self.series.iteritems(), 'reverse')) - - def test_raise_on_info(self): - s = Series(np.random.randn(10)) - with tm.assertRaises(AttributeError): - s.info() - - def test_copy(self): - - for deep in [None, False, True]: - s = Series(np.arange(10), dtype='float64') - - # default deep is True - if deep is None: - s2 = s.copy() - else: - s2 = s.copy(deep=deep) - - s2[::2] = np.NaN - - if deep is None or deep is True: - # Did not modify original Series - self.assertTrue(np.isnan(s2[0])) - self.assertFalse(np.isnan(s[0])) - else: - # we DID modify the original Series - self.assertTrue(np.isnan(s2[0])) - self.assertTrue(np.isnan(s[0])) - - # GH 11794 - # copy of tz-aware - expected = Series([Timestamp('2012/01/01', tz='UTC')]) - expected2 = Series([Timestamp('1999/01/01', tz='UTC')]) - - for deep in [None, False, True]: - - s = Series([Timestamp('2012/01/01', tz='UTC')]) - - if deep is None: - s2 = s.copy() - else: - s2 = s.copy(deep=deep) - - s2[0] = pd.Timestamp('1999/01/01', tz='UTC') - - # default deep is True - if deep is None or deep is True: - # Did not modify original Series - assert_series_equal(s2, expected2) - assert_series_equal(s, expected) - else: - # we DID modify the original Series - assert_series_equal(s2, expected2) - assert_series_equal(s, expected2) - - def test_axis_alias(self): - s = Series([1, 2, np.nan]) - assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index')) - self.assertEqual(s.dropna().sum('rows'), 3) - self.assertEqual(s._get_axis_number('rows'), 0) - self.assertEqual(s._get_axis_name('rows'), 'index') - - def test_numpy_unique(self): - # it works! - np.unique(self.ts) - - def test_ndarray_compat(self): - - # test numpy compat with Series as sub-class of NDFrame - tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'], - index=date_range('1/1/2000', periods=1000)) - - def f(x): - return x[x.argmax()] - - result = tsdf.apply(f) - expected = tsdf.max() - assert_series_equal(result, expected) - - # .item() - s = Series([1]) - result = s.item() - self.assertEqual(result, 1) - self.assertEqual(s.item(), s.iloc[0]) - - # using an ndarray like function - s = Series(np.random.randn(10)) - result = np.ones_like(s) - expected = Series(1, index=range(10), dtype='float64') - # assert_series_equal(result,expected) - - # ravel - s = Series(np.random.randn(10)) - tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F')) - - # compress - # GH 6658 - s = Series([0, 1., -1], index=list('abc')) - result = np.compress(s > 0, s) - assert_series_equal(result, Series([1.], index=['b'])) - - result = np.compress(s < -1, s) - # result empty Index(dtype=object) as the same as original - exp = Series([], dtype='float64', index=Index([], dtype='object')) - assert_series_equal(result, exp) - - s = Series([0, 1., -1], index=[.1, .2, .3]) - result = np.compress(s > 0, s) - assert_series_equal(result, Series([1.], index=[.2])) - - result = np.compress(s < -1, s) - # result empty Float64Index as the same as original - exp = Series([], dtype='float64', index=Index([], dtype='float64')) - assert_series_equal(result, exp) - - def test_str_attribute(self): - # GH9068 - methods = ['strip', 'rstrip', 'lstrip'] - s = Series([' jack', 'jill ', ' jesse ', 'frank']) - for method in methods: - expected = Series([getattr(str, method)(x) for x in s.values]) - assert_series_equal(getattr(Series.str, method)(s.str), expected) - - # str accessor only valid with string values - s = Series(range(5)) - with self.assertRaisesRegexp(AttributeError, 'only use .str accessor'): - s.str.repeat(2) - - def test_empty_method(self): - s_empty = pd.Series() - tm.assert_equal(s_empty.empty, True) - - for full_series in [pd.Series([1]), pd.Series(index=[1])]: - tm.assert_equal(full_series.empty, False) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 405d6c98a5d37..2bc44cb1c683f 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -2,21 +2,34 @@ # pylint: disable-msg=E1101,W0612 import pytz +import pytest + from datetime import timedelta, datetime +from distutils.version import LooseVersion from numpy import nan import numpy as np import pandas as pd -from pandas import (Series, DataFrame, isnull, date_range, - MultiIndex, Index, Timestamp) +from pandas import (Series, DataFrame, isna, date_range, + MultiIndex, Index, Timestamp, NaT, IntervalIndex, + Categorical) from pandas.compat import range -from pandas import tslib +from pandas._libs.tslib import iNaT +from pandas.core.series import remove_na from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm +import pandas.util._test_decorators as td from .common import TestData +try: + import scipy + _is_scipy_ge_0190 = (LooseVersion(scipy.__version__) >= + LooseVersion('0.19.0')) +except: + _is_scipy_ge_0190 = False + def _skip_if_no_pchip(): try: @@ -39,18 +52,23 @@ def _simple_ts(start, end, freq='D'): return Series(np.random.randn(len(rng)), index=rng) -class TestSeriesMissingData(TestData, tm.TestCase): +class TestSeriesMissingData(TestData): + + def test_remove_na_deprecation(self): + # see gh-16971 + with tm.assert_produces_warning(FutureWarning): + remove_na(Series([])) def test_timedelta_fillna(self): # GH 3371 - s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp( - '20130102'), Timestamp('20130103 9:01:01')]) + s = Series([Timestamp('20130101'), Timestamp('20130101'), + Timestamp('20130102'), Timestamp('20130103 9:01:01')]) td = s.diff() # reg fillna result = td.fillna(0) - expected = Series([timedelta(0), timedelta(0), timedelta(1), timedelta( - days=1, seconds=9 * 3600 + 60 + 1)]) + expected = Series([timedelta(0), timedelta(0), timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) # interprested as seconds @@ -60,8 +78,9 @@ def test_timedelta_fillna(self): assert_series_equal(result, expected) result = td.fillna(timedelta(days=1, seconds=1)) - expected = Series([timedelta(days=1, seconds=1), timedelta( - 0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) + expected = Series([timedelta(days=1, seconds=1), timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) result = td.fillna(np.timedelta64(int(1e9))) @@ -69,9 +88,8 @@ def test_timedelta_fillna(self): timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) - from pandas import tslib - result = td.fillna(tslib.NaT) - expected = Series([tslib.NaT, timedelta(0), timedelta(1), + result = td.fillna(NaT) + expected = Series([NaT, timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)], dtype='m8[ns]') assert_series_equal(result, expected) @@ -102,8 +120,7 @@ def test_datetime64_fillna(self): '20130101'), Timestamp('20130104'), Timestamp('20130103 9:01:01')]) assert_series_equal(result, expected) - from pandas import tslib - result = s.fillna(tslib.NaT) + result = s.fillna(NaT) expected = s assert_series_equal(result, expected) @@ -131,6 +148,7 @@ def test_datetime64_fillna(self): assert_series_equal(result, expected) def test_datetime64_tz_fillna(self): + for tz in ['US/Eastern', 'Asia/Tokyo']: # DatetimeBlock s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, @@ -142,24 +160,24 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp('2011-01-02 10:00')]) - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) # check s is not changed - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'), Timestamp('2011-01-02 10:00', tz=tz)]) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00'), 'AAA', Timestamp('2011-01-03 10:00'), 'AAA'], dtype=object) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -167,8 +185,8 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')]) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -176,31 +194,31 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')]) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) # DatetimeBlockTZ idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT, '2011-01-03 10:00', pd.NaT], tz=tz) s = pd.Series(idx) - self.assertEqual(s.dtype, 'datetime64[ns, {0}]'.format(tz)) - self.assert_series_equal(pd.isnull(s), null_loc) + assert s.dtype == 'datetime64[ns, {0}]'.format(tz) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00')) expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp('2011-01-02 10:00'), Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-02 10:00')]) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00', '2011-01-03 10:00', '2011-01-02 10:00'], tz=tz) expected = Series(idx) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz).to_pydatetime()) @@ -208,15 +226,15 @@ def test_datetime64_tz_fillna(self): '2011-01-03 10:00', '2011-01-02 10:00'], tz=tz) expected = Series(idx) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA', Timestamp('2011-01-03 10:00', tz=tz), 'AAA'], dtype=object) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -224,8 +242,8 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00')]) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00', tz=tz)}) @@ -233,8 +251,8 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00', tz=tz)]) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) # filling with a naive/other zone, coerce to object result = s.fillna(Timestamp('20130101')) @@ -242,16 +260,62 @@ def test_datetime64_tz_fillna(self): Timestamp('2013-01-01'), Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2013-01-01')]) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(Timestamp('20130101', tz='US/Pacific')) expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp('2013-01-01', tz='US/Pacific'), Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2013-01-01', tz='US/Pacific')]) - self.assert_series_equal(expected, result) - self.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + # with timezone + # GH 15855 + df = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.NaT]) + exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.Timestamp('2012-11-11 00:00:00+01:00')]) + assert_series_equal(df.fillna(method='pad'), exp) + + df = pd.Series([pd.NaT, pd.Timestamp('2012-11-11 00:00:00+01:00')]) + exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), + pd.Timestamp('2012-11-11 00:00:00+01:00')]) + assert_series_equal(df.fillna(method='bfill'), exp) + + def test_fillna_consistency(self): + # GH 16402 + # fillna with a tz aware to a tz-naive, should result in object + + s = Series([Timestamp('20130101'), pd.NaT]) + + result = s.fillna(Timestamp('20130101', tz='US/Eastern')) + expected = Series([Timestamp('20130101'), + Timestamp('2013-01-01', tz='US/Eastern')], + dtype='object') + assert_series_equal(result, expected) + + # where (we ignore the errors=) + result = s.where([True, False], + Timestamp('20130101', tz='US/Eastern'), + errors='ignore') + assert_series_equal(result, expected) + + result = s.where([True, False], + Timestamp('20130101', tz='US/Eastern'), + errors='ignore') + assert_series_equal(result, expected) + + # with a non-datetime + result = s.fillna('foo') + expected = Series([Timestamp('20130101'), + 'foo']) + assert_series_equal(result, expected) + + # assignment + s2 = s.copy() + s2[1] = 'foo' + assert_series_equal(s2, expected) def test_datetime64tz_fillna_round_issue(self): # GH 14872 @@ -292,11 +356,81 @@ def test_fillna_int(self): def test_fillna_raise(self): s = Series(np.random.randint(-100, 100, 50)) - self.assertRaises(TypeError, s.fillna, [1, 2]) - self.assertRaises(TypeError, s.fillna, (1, 2)) + pytest.raises(TypeError, s.fillna, [1, 2]) + pytest.raises(TypeError, s.fillna, (1, 2)) + + # related GH 9217, make sure limit is an int and greater than 0 + s = Series([1, 2, 3, None]) + for limit in [-1, 0, 1., 2.]: + for method in ['backfill', 'bfill', 'pad', 'ffill', None]: + with pytest.raises(ValueError): + s.fillna(1, limit=limit, method=method) + + def test_categorical_nan_equality(self): + cat = Series(Categorical(["a", "b", "c", np.nan])) + exp = Series([True, True, True, False]) + res = (cat == cat) + tm.assert_series_equal(res, exp) + + def test_categorical_nan_handling(self): + + # NaNs are represented as -1 in labels + s = Series(Categorical(["a", "b", np.nan, "a"])) + tm.assert_index_equal(s.cat.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(s.values.codes, + np.array([0, 1, -1, 0], dtype=np.int8)) + + @pytest.mark.parametrize('fill_value, expected_output', [ + ('a', ['a', 'a', 'b', 'a', 'a']), + ({1: 'a', 3: 'b', 4: 'b'}, ['a', 'a', 'b', 'b', 'b']), + ({1: 'a'}, ['a', 'a', 'b', np.nan, np.nan]), + ({1: 'a', 3: 'b'}, ['a', 'a', 'b', 'b', np.nan]), + (Series('a'), ['a', np.nan, 'b', np.nan, np.nan]), + (Series('a', index=[1]), ['a', 'a', 'b', np.nan, np.nan]), + (Series({1: 'a', 3: 'b'}), ['a', 'a', 'b', 'b', np.nan]), + (Series(['a', 'b'], index=[3, 4]), ['a', np.nan, 'b', 'a', 'b']) + ]) + def test_fillna_categorical(self, fill_value, expected_output): + # GH 17033 + # Test fillna for a Categorical series + data = ['a', np.nan, 'b', np.nan, np.nan] + s = Series(Categorical(data, categories=['a', 'b'])) + exp = Series(Categorical(expected_output, categories=['a', 'b'])) + tm.assert_series_equal(s.fillna(fill_value), exp) + + def test_fillna_categorical_raise(self): + data = ['a', np.nan, 'b', np.nan, np.nan] + s = Series(Categorical(data, categories=['a', 'b'])) + + with tm.assert_raises_regex(ValueError, + "fill value must be in categories"): + s.fillna('d') + + with tm.assert_raises_regex(ValueError, + "fill value must be in categories"): + s.fillna(Series('d')) + + with tm.assert_raises_regex(ValueError, + "fill value must be in categories"): + s.fillna({1: 'd', 3: 'a'}) + + with tm.assert_raises_regex(TypeError, + '"value" parameter must be a scalar or ' + 'dict, but you passed a "list"'): + s.fillna(['a', 'b']) + + with tm.assert_raises_regex(TypeError, + '"value" parameter must be a scalar or ' + 'dict, but you passed a "tuple"'): + s.fillna(('a', 'b')) + + with tm.assert_raises_regex(TypeError, + '"value" parameter must be a scalar, dict ' + 'or Series, but you passed a "DataFrame"'): + s.fillna(DataFrame({1: ['a'], 3: ['b']})) def test_fillna_nat(self): - series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') + series = Series([0, 1, 2, iNaT], dtype='M8[ns]') filled = series.fillna(method='pad') filled2 = series.fillna(value=series.values[2]) @@ -314,7 +448,7 @@ def test_fillna_nat(self): assert_frame_equal(filled, expected) assert_frame_equal(filled2, expected) - series = Series([tslib.iNaT, 0, 1, 2], dtype='M8[ns]') + series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') filled = series.fillna(method='bfill') filled2 = series.fillna(value=series[1]) @@ -332,11 +466,24 @@ def test_fillna_nat(self): assert_frame_equal(filled, expected) assert_frame_equal(filled2, expected) - def test_isnull_for_inf(self): + def test_isna_for_inf(self): + s = Series(['a', np.inf, np.nan, 1.0]) + with pd.option_context('mode.use_inf_as_na', True): + r = s.isna() + dr = s.dropna() + e = Series([False, True, True, False]) + de = Series(['a', 1.0], index=[0, 3]) + tm.assert_series_equal(r, e) + tm.assert_series_equal(dr, de) + + @tm.capture_stdout + def test_isnull_for_inf_deprecated(self): + # gh-17115 s = Series(['a', np.inf, np.nan, 1.0]) with pd.option_context('mode.use_inf_as_null', True): - r = s.isnull() + r = s.isna() dr = s.dropna() + e = Series([False, True, True, False]) de = Series(['a', 1.0], index=[0, 3]) tm.assert_series_equal(r, e) @@ -345,21 +492,21 @@ def test_isnull_for_inf(self): def test_fillna(self): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) - self.assert_series_equal(ts, ts.fillna(method='ffill')) + tm.assert_series_equal(ts, ts.fillna(method='ffill')) ts[2] = np.NaN exp = Series([0., 1., 1., 3., 4.], index=ts.index) - self.assert_series_equal(ts.fillna(method='ffill'), exp) + tm.assert_series_equal(ts.fillna(method='ffill'), exp) exp = Series([0., 1., 3., 3., 4.], index=ts.index) - self.assert_series_equal(ts.fillna(method='backfill'), exp) + tm.assert_series_equal(ts.fillna(method='backfill'), exp) exp = Series([0., 1., 5., 3., 4.], index=ts.index) - self.assert_series_equal(ts.fillna(value=5), exp) + tm.assert_series_equal(ts.fillna(value=5), exp) - self.assertRaises(ValueError, ts.fillna) - self.assertRaises(ValueError, self.ts.fillna, value=0, method='ffill') + pytest.raises(ValueError, ts.fillna) + pytest.raises(ValueError, self.ts.fillna, value=0, method='ffill') # GH 5703 s1 = Series([np.nan]) @@ -433,7 +580,7 @@ def test_fillna_invalid_method(self): try: self.ts.fillna(method='ffil') except ValueError as inst: - self.assertIn('ffil', str(inst)) + assert 'ffil' in str(inst) def test_ffill(self): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) @@ -453,34 +600,33 @@ def test_bfill(self): def test_timedelta64_nan(self): - from pandas import tslib td = Series([timedelta(days=i) for i in range(10)]) # nan ops on timedeltas td1 = td.copy() td1[0] = np.nan - self.assertTrue(isnull(td1[0])) - self.assertEqual(td1[0].value, tslib.iNaT) + assert isna(td1[0]) + assert td1[0].value == iNaT td1[0] = td[0] - self.assertFalse(isnull(td1[0])) + assert not isna(td1[0]) - td1[1] = tslib.iNaT - self.assertTrue(isnull(td1[1])) - self.assertEqual(td1[1].value, tslib.iNaT) + td1[1] = iNaT + assert isna(td1[1]) + assert td1[1].value == iNaT td1[1] = td[1] - self.assertFalse(isnull(td1[1])) + assert not isna(td1[1]) - td1[2] = tslib.NaT - self.assertTrue(isnull(td1[2])) - self.assertEqual(td1[2].value, tslib.iNaT) + td1[2] = NaT + assert isna(td1[2]) + assert td1[2].value == iNaT td1[2] = td[2] - self.assertFalse(isnull(td1[2])) + assert not isna(td1[2]) # boolean setting # this doesn't work, not sure numpy even supports it # result = td[(td>np.timedelta64(timedelta(days=3))) & # td val expected = Series([x > val for x in series]) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) val = series[5] result = series > val expected = Series([x > val for x in series]) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_comparisons(self): left = np.random.randn(10) @@ -61,344 +63,518 @@ def test_comparisons(self): assert_series_equal(s == s2, exp) assert_series_equal(s2 == s, exp) - def test_op_method(self): - def check(series, other, check_reverse=False): - simple_ops = ['add', 'sub', 'mul', 'floordiv', 'truediv', 'pow'] - if not compat.PY3: - simple_ops.append('div') + def test_operator_series_comparison_zerorank(self): + # GH 13006 + result = np.float64(0) > pd.Series([1, 2, 3]) + expected = 0.0 > pd.Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + result = pd.Series([1, 2, 3]) < np.float64(0) + expected = pd.Series([1, 2, 3]) < 0.0 + tm.assert_series_equal(result, expected) + result = np.array([0, 1, 2])[0] > pd.Series([0, 1, 2]) + expected = 0.0 > pd.Series([1, 2, 3]) + tm.assert_series_equal(result, expected) - for opname in simple_ops: - op = getattr(Series, opname) - - if op == 'div': - alt = operator.truediv - else: - alt = getattr(operator, opname) - - result = op(series, other) - expected = alt(series, other) - assert_almost_equal(result, expected) - if check_reverse: - rop = getattr(Series, "r" + opname) - result = rop(series, other) - expected = alt(other, series) - assert_almost_equal(result, expected) + def test_object_comparisons(self): + s = Series(['a', 'b', np.nan, 'c', 'a']) - check(self.ts, self.ts * 2) - check(self.ts, self.ts[::2]) - check(self.ts, 5, check_reverse=True) - check(tm.makeFloatSeries(), tm.makeFloatSeries(), check_reverse=True) + result = s == 'a' + expected = Series([True, False, False, False, True]) + assert_series_equal(result, expected) - def test_neg(self): - assert_series_equal(-self.series, -1 * self.series) + result = s < 'a' + expected = Series([False, False, False, False, False]) + assert_series_equal(result, expected) - def test_invert(self): - assert_series_equal(-(self.series < 0), ~(self.series < 0)) + result = s != 'a' + expected = -(s == 'a') + assert_series_equal(result, expected) - def test_div(self): - with np.errstate(all='ignore'): - # no longer do integer div for any ops, but deal with the 0's - p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) - result = p['first'] / p['second'] - expected = Series( - p['first'].values.astype(float) / p['second'].values, - dtype='float64') - expected.iloc[0:3] = np.inf - assert_series_equal(result, expected) + def test_categorical_comparisons(self): + # GH 8938 + # allow equality comparisons + a = Series(list('abc'), dtype="category") + b = Series(list('abc'), dtype="object") + c = Series(['a', 'b', 'cc'], dtype="object") + d = Series(list('acb'), dtype="object") + e = Categorical(list('abc')) + f = Categorical(list('acb')) + + # vs scalar + assert not (a == 'a').all() + assert ((a != 'a') == ~(a == 'a')).all() + + assert not ('a' == a).all() + assert (a == 'a')[0] + assert ('a' == a)[0] + assert not ('a' != a)[0] + + # vs list-like + assert (a == a).all() + assert not (a != a).all() + + assert (a == list(a)).all() + assert (a == b).all() + assert (b == a).all() + assert ((~(a == b)) == (a != b)).all() + assert ((~(b == a)) == (b != a)).all() + + assert not (a == c).all() + assert not (c == a).all() + assert not (a == d).all() + assert not (d == a).all() + + # vs a cat-like + assert (a == e).all() + assert (e == a).all() + assert not (a == f).all() + assert not (f == a).all() + + assert ((~(a == e) == (a != e)).all()) + assert ((~(e == a) == (e != a)).all()) + assert ((~(a == f) == (a != f)).all()) + assert ((~(f == a) == (f != a)).all()) + + # non-equality is not comparable + pytest.raises(TypeError, lambda: a < b) + pytest.raises(TypeError, lambda: b < a) + pytest.raises(TypeError, lambda: a > b) + pytest.raises(TypeError, lambda: b > a) - result = p['first'] / 0 - expected = Series(np.inf, index=p.index, name='first') - assert_series_equal(result, expected) + def test_comparison_tuples(self): + # GH11339 + # comparisons vs tuple + s = Series([(1, 1), (1, 2)]) - p = p.astype('float64') - result = p['first'] / p['second'] - expected = Series(p['first'].values / p['second'].values) - assert_series_equal(result, expected) + result = s == (1, 2) + expected = Series([False, True]) + assert_series_equal(result, expected) - p = DataFrame({'first': [3, 4, 5, 8], 'second': [1, 1, 1, 1]}) - result = p['first'] / p['second'] - assert_series_equal(result, p['first'].astype('float64'), - check_names=False) - self.assertTrue(result.name is None) - self.assertFalse(np.array_equal(result, p['second'] / p['first'])) - - # inf signing - s = Series([np.nan, 1., -1.]) - result = s / 0 - expected = Series([np.nan, np.inf, -np.inf]) - assert_series_equal(result, expected) + result = s != (1, 2) + expected = Series([True, False]) + assert_series_equal(result, expected) - # float/integer issue - # GH 7785 - p = DataFrame({'first': (1, 0), 'second': (-0.01, -0.02)}) - expected = Series([-0.01, -np.inf]) + result = s == (0, 0) + expected = Series([False, False]) + assert_series_equal(result, expected) - result = p['second'].div(p['first']) - assert_series_equal(result, expected, check_names=False) + result = s != (0, 0) + expected = Series([True, True]) + assert_series_equal(result, expected) - result = p['second'] / p['first'] - assert_series_equal(result, expected) + s = Series([(1, 1), (1, 1)]) - # GH 9144 - s = Series([-1, 0, 1]) + result = s == (1, 1) + expected = Series([True, True]) + assert_series_equal(result, expected) - result = 0 / s - expected = Series([0.0, nan, 0.0]) - assert_series_equal(result, expected) + result = s != (1, 1) + expected = Series([False, False]) + assert_series_equal(result, expected) - result = s / 0 - expected = Series([-inf, nan, inf]) - assert_series_equal(result, expected) + s = Series([frozenset([1]), frozenset([1, 2])]) - result = s // 0 - expected = Series([-inf, nan, inf]) - assert_series_equal(result, expected) + result = s == frozenset([1]) + expected = Series([True, False]) + assert_series_equal(result, expected) - # GH 8674 - zero_array = np.array([0] * 5) - data = np.random.randn(5) - expected = pd.Series([0.] * 5) - result = zero_array / pd.Series(data) - assert_series_equal(result, expected) + def test_comparison_operators_with_nas(self): + ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) + ser[::2] = np.nan - result = pd.Series(zero_array) / data - assert_series_equal(result, expected) + # test that comparisons work + ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] + for op in ops: + val = ser[5] - result = pd.Series(zero_array) / pd.Series(data) - assert_series_equal(result, expected) + f = getattr(operator, op) + result = f(ser, val) - def test_operators(self): - def _check_op(series, other, op, pos_only=False, - check_dtype=True): - left = np.abs(series) if pos_only else series - right = np.abs(other) if pos_only else other + expected = f(ser.dropna(), val).reindex(ser.index) - cython_or_numpy = op(left, right) - python = left.combine(right, op) - assert_series_equal(cython_or_numpy, python, - check_dtype=check_dtype) + if op == 'ne': + expected = expected.fillna(True).astype(bool) + else: + expected = expected.fillna(False).astype(bool) - def check(series, other): - simple_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod'] + assert_series_equal(result, expected) - for opname in simple_ops: - _check_op(series, other, getattr(operator, opname)) + # fffffffuuuuuuuuuuuu + # result = f(val, s) + # expected = f(val, s.dropna()).reindex(s.index) + # assert_series_equal(result, expected) - _check_op(series, other, operator.pow, pos_only=True) + # boolean &, |, ^ should work with object arrays and propagate NAs - _check_op(series, other, lambda x, y: operator.add(y, x)) - _check_op(series, other, lambda x, y: operator.sub(y, x)) - _check_op(series, other, lambda x, y: operator.truediv(y, x)) - _check_op(series, other, lambda x, y: operator.floordiv(y, x)) - _check_op(series, other, lambda x, y: operator.mul(y, x)) - _check_op(series, other, lambda x, y: operator.pow(y, x), - pos_only=True) - _check_op(series, other, lambda x, y: operator.mod(y, x)) + ops = ['and_', 'or_', 'xor'] + mask = ser.isna() + for bool_op in ops: + func = getattr(operator, bool_op) - check(self.ts, self.ts * 2) - check(self.ts, self.ts * 0) - check(self.ts, self.ts[::2]) - check(self.ts, 5) + filled = ser.fillna(ser[0]) - def check_comparators(series, other, check_dtype=True): - _check_op(series, other, operator.gt, check_dtype=check_dtype) - _check_op(series, other, operator.ge, check_dtype=check_dtype) - _check_op(series, other, operator.eq, check_dtype=check_dtype) - _check_op(series, other, operator.lt, check_dtype=check_dtype) - _check_op(series, other, operator.le, check_dtype=check_dtype) + result = func(ser < ser[9], ser > ser[3]) - check_comparators(self.ts, 5) - check_comparators(self.ts, self.ts + 1, check_dtype=False) + expected = func(filled < filled[9], filled > filled[3]) + expected[mask] = False + assert_series_equal(result, expected) - def test_divmod(self): - def check(series, other): - results = divmod(series, other) - if isinstance(other, Iterable) and len(series) != len(other): - # if the lengths don't match, this is the test where we use - # `self.ts[::2]`. Pad every other value in `other_np` with nan. - other_np = [] - for n in other: - other_np.append(n) - other_np.append(np.nan) - else: - other_np = other - other_np = np.asarray(other_np) - with np.errstate(all='ignore'): - expecteds = divmod(series.values, np.asarray(other_np)) + def test_comparison_object_numeric_nas(self): + ser = Series(np.random.randn(10), dtype=object) + shifted = ser.shift(2) - for result, expected in zip(results, expecteds): - # check the values, name, and index separatly - assert_almost_equal(np.asarray(result), expected) + ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] + for op in ops: + func = getattr(operator, op) - self.assertEqual(result.name, series.name) - assert_index_equal(result.index, series.index) + result = func(ser, shifted) + expected = func(ser.astype(float), shifted.astype(float)) + assert_series_equal(result, expected) - check(self.ts, self.ts * 2) - check(self.ts, self.ts * 0) - check(self.ts, self.ts[::2]) - check(self.ts, 5) + def test_comparison_invalid(self): + # GH4968 + # invalid date/int comparisons + s = Series(range(5)) + s2 = Series(date_range('20010101', periods=5)) - def test_operators_empty_int_corner(self): - s1 = Series([], [], dtype=np.int32) - s2 = Series({'x': 0.}) - assert_series_equal(s1 * s2, Series([np.nan], index=['x'])) + for (x, y) in [(s, s2), (s2, s)]: + pytest.raises(TypeError, lambda: x == y) + pytest.raises(TypeError, lambda: x != y) + pytest.raises(TypeError, lambda: x >= y) + pytest.raises(TypeError, lambda: x > y) + pytest.raises(TypeError, lambda: x < y) + pytest.raises(TypeError, lambda: x <= y) - def test_operators_timedelta64(self): + def test_unequal_categorical_comparison_raises_type_error(self): + # unequal comparison should raise for unordered cats + cat = Series(Categorical(list("abc"))) - # invalid ops - self.assertRaises(Exception, self.objSeries.__add__, 1) - self.assertRaises(Exception, self.objSeries.__add__, - np.array(1, dtype=np.int64)) - self.assertRaises(Exception, self.objSeries.__sub__, 1) - self.assertRaises(Exception, self.objSeries.__sub__, - np.array(1, dtype=np.int64)) + def f(): + cat > "b" - # seriese ops - v1 = date_range('2012-1-1', periods=3, freq='D') - v2 = date_range('2012-1-2', periods=3, freq='D') - rs = Series(v2) - Series(v1) - xp = Series(1e9 * 3600 * 24, - rs.index).astype('int64').astype('timedelta64[ns]') - assert_series_equal(rs, xp) - self.assertEqual(rs.dtype, 'timedelta64[ns]') + pytest.raises(TypeError, f) + cat = Series(Categorical(list("abc"), ordered=False)) - df = DataFrame(dict(A=v1)) - td = Series([timedelta(days=i) for i in range(3)]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + def f(): + cat > "b" - # series on the rhs - result = df['A'] - df['A'].shift() - self.assertEqual(result.dtype, 'timedelta64[ns]') + pytest.raises(TypeError, f) - result = df['A'] + td - self.assertEqual(result.dtype, 'M8[ns]') + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal + cat = Series(Categorical(list("abc"), ordered=True)) - # scalar Timestamp on rhs - maxa = df['A'].max() - tm.assertIsInstance(maxa, Timestamp) + pytest.raises(TypeError, lambda: cat < "d") + pytest.raises(TypeError, lambda: cat > "d") + pytest.raises(TypeError, lambda: "d" < cat) + pytest.raises(TypeError, lambda: "d" > cat) - resultb = df['A'] - df['A'].max() - self.assertEqual(resultb.dtype, 'timedelta64[ns]') + tm.assert_series_equal(cat == "d", Series([False, False, False])) + tm.assert_series_equal(cat != "d", Series([True, True, True])) - # timestamp on lhs - result = resultb + df['A'] - values = [Timestamp('20111230'), Timestamp('20120101'), - Timestamp('20120103')] - expected = Series(values, name='A') + @pytest.mark.parametrize('dtype', [None, object]) + def test_more_na_comparisons(self, dtype): + left = Series(['a', np.nan, 'c'], dtype=dtype) + right = Series(['a', np.nan, 'd'], dtype=dtype) + + result = left == right + expected = Series([True, False, False]) assert_series_equal(result, expected) - # datetimes on rhs - result = df['A'] - datetime(2001, 1, 1) - expected = Series( - [timedelta(days=4017 + i) for i in range(3)], name='A') + result = left != right + expected = Series([False, True, True]) assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'm8[ns]') - d = datetime(2001, 1, 1, 3, 4) - resulta = df['A'] - d - self.assertEqual(resulta.dtype, 'm8[ns]') + result = left == np.nan + expected = Series([False, False, False]) + assert_series_equal(result, expected) - # roundtrip - resultb = resulta + d - assert_series_equal(df['A'], resultb) + result = left != np.nan + expected = Series([True, True, True]) + assert_series_equal(result, expected) - # timedeltas on rhs - td = timedelta(days=1) - resulta = df['A'] + td - resultb = resulta - td - assert_series_equal(resultb, df['A']) - self.assertEqual(resultb.dtype, 'M8[ns]') + @pytest.mark.parametrize('pair', [ + ([pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')], + [NaT, NaT, pd.Timestamp('2011-01-03')]), + + ([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], + [NaT, NaT, pd.Timedelta('3 days')]), + + ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], + [NaT, NaT, pd.Period('2011-03', freq='M')])]) + @pytest.mark.parametrize('reverse', [True, False]) + @pytest.mark.parametrize('box', [Series, Index]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_nat_comparisons(self, dtype, box, reverse, pair): + l, r = pair + if reverse: + # add lhs / rhs switched data + l, r = r, l + + left = Series(l, dtype=dtype) + right = box(r, dtype=dtype) + # Series, Index + + expected = Series([False, False, True]) + assert_series_equal(left == right, expected) + + expected = Series([True, True, False]) + assert_series_equal(left != right, expected) + + expected = Series([False, False, False]) + assert_series_equal(left < right, expected) + + expected = Series([False, False, False]) + assert_series_equal(left > right, expected) + + expected = Series([False, False, True]) + assert_series_equal(left >= right, expected) + + expected = Series([False, False, True]) + assert_series_equal(left <= right, expected) + + @pytest.mark.parametrize('data', [ + [pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')], + [pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], + [pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')] + ]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_nat_comparisons_scalar(self, dtype, data): + left = Series(data, dtype=dtype) + + expected = Series([False, False, False]) + assert_series_equal(left == pd.NaT, expected) + assert_series_equal(pd.NaT == left, expected) + + expected = Series([True, True, True]) + assert_series_equal(left != pd.NaT, expected) + assert_series_equal(pd.NaT != left, expected) + + expected = Series([False, False, False]) + assert_series_equal(left < pd.NaT, expected) + assert_series_equal(pd.NaT > left, expected) + assert_series_equal(left <= pd.NaT, expected) + assert_series_equal(pd.NaT >= left, expected) + + assert_series_equal(left > pd.NaT, expected) + assert_series_equal(pd.NaT < left, expected) + assert_series_equal(left >= pd.NaT, expected) + assert_series_equal(pd.NaT <= left, expected) - # roundtrip - td = timedelta(minutes=5, seconds=3) - resulta = df['A'] + td - resultb = resulta - td - assert_series_equal(df['A'], resultb) - self.assertEqual(resultb.dtype, 'M8[ns]') + def test_comparison_different_length(self): + a = Series(['a', 'b', 'c']) + b = Series(['b', 'a']) + pytest.raises(ValueError, a.__lt__, b) - # inplace - value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) - rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) - self.assertEqual(rs[2], value) + a = Series([1, 2]) + b = Series([2, 3, 4]) + pytest.raises(ValueError, a.__eq__, b) - def test_operator_series_comparison_zerorank(self): - # GH 13006 - result = np.float64(0) > pd.Series([1, 2, 3]) - expected = 0.0 > pd.Series([1, 2, 3]) - self.assert_series_equal(result, expected) - result = pd.Series([1, 2, 3]) < np.float64(0) - expected = pd.Series([1, 2, 3]) < 0.0 - self.assert_series_equal(result, expected) - result = np.array([0, 1, 2])[0] > pd.Series([0, 1, 2]) - expected = 0.0 > pd.Series([1, 2, 3]) - self.assert_series_equal(result, expected) + def test_comparison_label_based(self): - def test_timedeltas_with_DateOffset(self): + # GH 4947 + # comparisons should be label based - # GH 4532 - # operate with pd.offsets - s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + a = Series([True, False, True], list('bca')) + b = Series([False, True, False], list('abc')) - result = s + pd.offsets.Second(5) - result2 = pd.offsets.Second(5) + s - expected = Series([Timestamp('20130101 9:01:05'), Timestamp( - '20130101 9:02:05')]) + expected = Series([False, True, False], list('abc')) + result = a & b assert_series_equal(result, expected) - assert_series_equal(result2, expected) - result = s - pd.offsets.Second(5) - result2 = -pd.offsets.Second(5) + s - expected = Series([Timestamp('20130101 9:00:55'), Timestamp( - '20130101 9:01:55')]) + expected = Series([True, True, False], list('abc')) + result = a | b assert_series_equal(result, expected) - assert_series_equal(result2, expected) - result = s + pd.offsets.Milli(5) - result2 = pd.offsets.Milli(5) + s - expected = Series([Timestamp('20130101 9:01:00.005'), Timestamp( - '20130101 9:02:00.005')]) + expected = Series([True, False, False], list('abc')) + result = a ^ b assert_series_equal(result, expected) - assert_series_equal(result2, expected) - result = s + pd.offsets.Minute(5) + pd.offsets.Milli(5) - expected = Series([Timestamp('20130101 9:06:00.005'), Timestamp( - '20130101 9:07:00.005')]) - assert_series_equal(result, expected) + # rhs is bigger + a = Series([True, False, True], list('bca')) + b = Series([False, True, False, True], list('abcd')) - # operate with np.timedelta64 correctly - result = s + np.timedelta64(1, 's') - result2 = np.timedelta64(1, 's') + s - expected = Series([Timestamp('20130101 9:01:01'), Timestamp( - '20130101 9:02:01')]) + expected = Series([False, True, False, False], list('abcd')) + result = a & b assert_series_equal(result, expected) - assert_series_equal(result2, expected) - result = s + np.timedelta64(5, 'ms') - result2 = np.timedelta64(5, 'ms') + s - expected = Series([Timestamp('20130101 9:01:00.005'), Timestamp( - '20130101 9:02:00.005')]) + expected = Series([True, True, False, False], list('abcd')) + result = a | b assert_series_equal(result, expected) - assert_series_equal(result2, expected) - # valid DateOffsets - for do in ['Hour', 'Minute', 'Second', 'Day', 'Micro', 'Milli', - 'Nano']: - op = getattr(pd.offsets, do) - s + op(5) - op(5) + s - - def test_timedelta_series_ops(self): - # GH11925 - - s = Series(timedelta_range('1 day', periods=3)) - ts = Timestamp('2012-01-01') - expected = Series(date_range('2012-01-02', periods=3)) - assert_series_equal(ts + s, expected) - assert_series_equal(s + ts, expected) - - expected2 = Series(date_range('2011-12-31', periods=3, freq='-1D')) - assert_series_equal(ts - s, expected2) - assert_series_equal(ts + (-s), expected2) + # filling + + # vs empty + result = a & Series([]) + expected = Series([False, False, False], list('bca')) + assert_series_equal(result, expected) + + result = a | Series([]) + expected = Series([True, False, True], list('bca')) + assert_series_equal(result, expected) + + # vs non-matching + result = a & Series([1], ['z']) + expected = Series([False, False, False, False], list('abcz')) + assert_series_equal(result, expected) + + result = a | Series([1], ['z']) + expected = Series([True, True, False, False], list('abcz')) + assert_series_equal(result, expected) + + # identity + # we would like s[s|e] == s to hold for any e, whether empty or not + for e in [Series([]), Series([1], ['z']), + Series(np.nan, b.index), Series(np.nan, a.index)]: + result = a[a | e] + assert_series_equal(result, a[a]) + + for e in [Series(['z'])]: + if compat.PY3: + with tm.assert_produces_warning(RuntimeWarning): + result = a[a | e] + else: + result = a[a | e] + assert_series_equal(result, a[a]) + + # vs scalars + index = list('bca') + t = Series([True, False, True]) + + for v in [True, 1, 2]: + result = Series([True, False, True], index=index) | v + expected = Series([True, True, True], index=index) + assert_series_equal(result, expected) + + for v in [np.nan, 'foo']: + pytest.raises(TypeError, lambda: t | v) + + for v in [False, 0]: + result = Series([True, False, True], index=index) | v + expected = Series([True, False, True], index=index) + assert_series_equal(result, expected) + + for v in [True, 1]: + result = Series([True, False, True], index=index) & v + expected = Series([True, False, True], index=index) + assert_series_equal(result, expected) + + for v in [False, 0]: + result = Series([True, False, True], index=index) & v + expected = Series([False, False, False], index=index) + assert_series_equal(result, expected) + for v in [np.nan]: + pytest.raises(TypeError, lambda: t & v) + + def test_comparison_flex_basic(self): + left = pd.Series(np.random.randn(10)) + right = pd.Series(np.random.randn(10)) + + assert_series_equal(left.eq(right), left == right) + assert_series_equal(left.ne(right), left != right) + assert_series_equal(left.le(right), left < right) + assert_series_equal(left.lt(right), left <= right) + assert_series_equal(left.gt(right), left > right) + assert_series_equal(left.ge(right), left >= right) + + # axis + for axis in [0, None, 'index']: + assert_series_equal(left.eq(right, axis=axis), left == right) + assert_series_equal(left.ne(right, axis=axis), left != right) + assert_series_equal(left.le(right, axis=axis), left < right) + assert_series_equal(left.lt(right, axis=axis), left <= right) + assert_series_equal(left.gt(right, axis=axis), left > right) + assert_series_equal(left.ge(right, axis=axis), left >= right) + + # + msg = 'No axis named 1 for object type' + for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']: + with tm.assert_raises_regex(ValueError, msg): + getattr(left, op)(right, axis=1) + + def test_comparison_flex_alignment(self): + left = Series([1, 3, 2], index=list('abc')) + right = Series([2, 2, 2], index=list('bcd')) + + exp = pd.Series([False, False, True, False], index=list('abcd')) + assert_series_equal(left.eq(right), exp) + + exp = pd.Series([True, True, False, True], index=list('abcd')) + assert_series_equal(left.ne(right), exp) + + exp = pd.Series([False, False, True, False], index=list('abcd')) + assert_series_equal(left.le(right), exp) + + exp = pd.Series([False, False, False, False], index=list('abcd')) + assert_series_equal(left.lt(right), exp) + + exp = pd.Series([False, True, True, False], index=list('abcd')) + assert_series_equal(left.ge(right), exp) + + exp = pd.Series([False, True, False, False], index=list('abcd')) + assert_series_equal(left.gt(right), exp) + + def test_comparison_flex_alignment_fill(self): + left = Series([1, 3, 2], index=list('abc')) + right = Series([2, 2, 2], index=list('bcd')) + + exp = pd.Series([False, False, True, True], index=list('abcd')) + assert_series_equal(left.eq(right, fill_value=2), exp) + + exp = pd.Series([True, True, False, False], index=list('abcd')) + assert_series_equal(left.ne(right, fill_value=2), exp) + + exp = pd.Series([False, False, True, True], index=list('abcd')) + assert_series_equal(left.le(right, fill_value=0), exp) + + exp = pd.Series([False, False, False, True], index=list('abcd')) + assert_series_equal(left.lt(right, fill_value=0), exp) + + exp = pd.Series([True, True, True, False], index=list('abcd')) + assert_series_equal(left.ge(right, fill_value=0), exp) + + exp = pd.Series([True, True, False, False], index=list('abcd')) + assert_series_equal(left.gt(right, fill_value=0), exp) + + def test_ne(self): + ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float) + expected = [True, True, False, True, True] + assert tm.equalContents(ts.index != 5, expected) + assert tm.equalContents(~(ts.index == 5), expected) + + def test_comp_ops_df_compat(self): + # GH 1134 + s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x') + s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x') + + s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') + s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') + + for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: + + msg = "Can only compare identically-labeled Series objects" + with tm.assert_raises_regex(ValueError, msg): + left == right + + with tm.assert_raises_regex(ValueError, msg): + left != right + + with tm.assert_raises_regex(ValueError, msg): + left < right + + msg = "Can only compare identically-labeled DataFrame objects" + with tm.assert_raises_regex(ValueError, msg): + left.to_frame() == right.to_frame() + + with tm.assert_raises_regex(ValueError, msg): + left.to_frame() != right.to_frame() + + with tm.assert_raises_regex(ValueError, msg): + left.to_frame() < right.to_frame() + + +class TestTimedeltaSeriesArithmetic(object): def test_timedelta64_operations_with_DateOffset(self): # GH 10699 @@ -411,8 +587,9 @@ def test_timedelta64_operations_with_DateOffset(self): expected = Series([timedelta(minutes=4, seconds=3)] * 3) assert_series_equal(result, expected) - result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3), - pd.offsets.Hour(2)]) + with tm.assert_produces_warning(PerformanceWarning): + result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3), + pd.offsets.Hour(2)]) expected = Series([timedelta(minutes=6, seconds=3), timedelta( minutes=5, seconds=6), timedelta(hours=2, minutes=5, seconds=3)]) assert_series_equal(result, expected) @@ -431,19 +608,18 @@ def test_timedelta64_operations_with_DateOffset(self): op(5) - td def test_timedelta64_operations_with_timedeltas(self): - # td operate with td td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td2 = timedelta(minutes=5, seconds=4) result = td1 - td2 - expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta( - seconds=1)] * 3) - self.assertEqual(result.dtype, 'm8[ns]') + expected = (Series([timedelta(seconds=0)] * 3) - + Series([timedelta(seconds=1)] * 3)) + assert result.dtype == 'm8[ns]' assert_series_equal(result, expected) result2 = td2 - td1 - expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta( - seconds=0)] * 3)) + expected = (Series([timedelta(seconds=1)] * 3) - + Series([timedelta(seconds=0)] * 3)) assert_series_equal(result2, expected) # roundtrip @@ -454,153 +630,202 @@ def test_timedelta64_operations_with_timedeltas(self): td1 = Series(pd.to_timedelta(['00:05:03'] * 3)) td2 = pd.to_timedelta('00:05:04') result = td1 - td2 - expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta( - seconds=1)] * 3) - self.assertEqual(result.dtype, 'm8[ns]') + expected = (Series([timedelta(seconds=0)] * 3) - + Series([timedelta(seconds=1)] * 3)) + assert result.dtype == 'm8[ns]' assert_series_equal(result, expected) result2 = td2 - td1 - expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta( - seconds=0)] * 3)) + expected = (Series([timedelta(seconds=1)] * 3) - + Series([timedelta(seconds=0)] * 3)) assert_series_equal(result2, expected) # roundtrip assert_series_equal(result + td2, td1) - def test_timedelta64_operations_with_integers(self): - - # GH 4521 - # divide/multiply by integers - startdate = Series(date_range('2013-01-01', '2013-01-03')) - enddate = Series(date_range('2013-03-01', '2013-03-03')) + def test_operators_timedelta64(self): + # series ops + v1 = date_range('2012-1-1', periods=3, freq='D') + v2 = date_range('2012-1-2', periods=3, freq='D') + rs = Series(v2) - Series(v1) + xp = Series(1e9 * 3600 * 24, + rs.index).astype('int64').astype('timedelta64[ns]') + assert_series_equal(rs, xp) + assert rs.dtype == 'timedelta64[ns]' - s1 = enddate - startdate - s1[2] = np.nan - s2 = Series([2, 3, 4]) - expected = Series(s1.values.astype(np.int64) / s2, dtype='m8[ns]') - expected[2] = np.nan - result = s1 / s2 - assert_series_equal(result, expected) + df = DataFrame(dict(A=v1)) + td = Series([timedelta(days=i) for i in range(3)]) + assert td.dtype == 'timedelta64[ns]' - s2 = Series([20, 30, 40]) - expected = Series(s1.values.astype(np.int64) / s2, dtype='m8[ns]') - expected[2] = np.nan - result = s1 / s2 - assert_series_equal(result, expected) + # series on the rhs + result = df['A'] - df['A'].shift() + assert result.dtype == 'timedelta64[ns]' - result = s1 / 2 - expected = Series(s1.values.astype(np.int64) / 2, dtype='m8[ns]') - expected[2] = np.nan - assert_series_equal(result, expected) + result = df['A'] + td + assert result.dtype == 'M8[ns]' - s2 = Series([20, 30, 40]) - expected = Series(s1.values.astype(np.int64) * s2, dtype='m8[ns]') - expected[2] = np.nan - result = s1 * s2 - assert_series_equal(result, expected) + # scalar Timestamp on rhs + maxa = df['A'].max() + assert isinstance(maxa, Timestamp) - for dtype in ['int32', 'int16', 'uint32', 'uint64', 'uint32', 'uint16', - 'uint8']: - s2 = Series([20, 30, 40], dtype=dtype) - expected = Series( - s1.values.astype(np.int64) * s2.astype(np.int64), - dtype='m8[ns]') - expected[2] = np.nan - result = s1 * s2 - assert_series_equal(result, expected) + resultb = df['A'] - df['A'].max() + assert resultb.dtype == 'timedelta64[ns]' - result = s1 * 2 - expected = Series(s1.values.astype(np.int64) * 2, dtype='m8[ns]') - expected[2] = np.nan + # timestamp on lhs + result = resultb + df['A'] + values = [Timestamp('20111230'), Timestamp('20120101'), + Timestamp('20120103')] + expected = Series(values, name='A') assert_series_equal(result, expected) - result = s1 * -1 - expected = Series(s1.values.astype(np.int64) * -1, dtype='m8[ns]') - expected[2] = np.nan + # datetimes on rhs + result = df['A'] - datetime(2001, 1, 1) + expected = Series( + [timedelta(days=4017 + i) for i in range(3)], name='A') assert_series_equal(result, expected) + assert result.dtype == 'm8[ns]' - # invalid ops - assert_series_equal(s1 / s2.astype(float), - Series([Timedelta('2 days 22:48:00'), Timedelta( - '1 days 23:12:00'), Timedelta('NaT')])) - assert_series_equal(s1 / 2.0, - Series([Timedelta('29 days 12:00:00'), Timedelta( - '29 days 12:00:00'), Timedelta('NaT')])) - - for op in ['__add__', '__sub__']: - sop = getattr(s1, op, None) - if sop is not None: - self.assertRaises(TypeError, sop, 1) - self.assertRaises(TypeError, sop, s2.values) - - def test_timedelta64_conversions(self): - startdate = Series(date_range('2013-01-01', '2013-01-03')) - enddate = Series(date_range('2013-03-01', '2013-03-03')) + d = datetime(2001, 1, 1, 3, 4) + resulta = df['A'] - d + assert resulta.dtype == 'm8[ns]' - s1 = enddate - startdate - s1[2] = np.nan + # roundtrip + resultb = resulta + d + assert_series_equal(df['A'], resultb) - for m in [1, 3, 10]: - for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']: + # timedeltas on rhs + td = timedelta(days=1) + resulta = df['A'] + td + resultb = resulta - td + assert_series_equal(resultb, df['A']) + assert resultb.dtype == 'M8[ns]' - # op - expected = s1.apply(lambda x: x / np.timedelta64(m, unit)) - result = s1 / np.timedelta64(m, unit) - assert_series_equal(result, expected) + # roundtrip + td = timedelta(minutes=5, seconds=3) + resulta = df['A'] + td + resultb = resulta - td + assert_series_equal(df['A'], resultb) + assert resultb.dtype == 'M8[ns]' - if m == 1 and unit != 'ns': + # inplace + value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) + rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) + assert rs[2] == value - # astype - result = s1.astype("timedelta64[{0}]".format(unit)) - assert_series_equal(result, expected) + def test_timedelta64_ops_nat(self): + # GH 11349 + timedelta_series = Series([NaT, Timedelta('1s')]) + nat_series_dtype_timedelta = Series([NaT, NaT], + dtype='timedelta64[ns]') + single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') - # reverse op - expected = s1.apply( - lambda x: Timedelta(np.timedelta64(m, unit)) / x) - result = np.timedelta64(m, unit) / s1 + # subtraction + assert_series_equal(timedelta_series - NaT, + nat_series_dtype_timedelta) + assert_series_equal(-NaT + timedelta_series, + nat_series_dtype_timedelta) - # astype - s = Series(date_range('20130101', periods=3)) - result = s.astype(object) - self.assertIsInstance(result.iloc[0], datetime) - self.assertTrue(result.dtype == np.object_) + assert_series_equal(timedelta_series - single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + assert_series_equal(-single_nat_dtype_timedelta + timedelta_series, + nat_series_dtype_timedelta) - result = s1.astype(object) - self.assertIsInstance(result.iloc[0], timedelta) - self.assertTrue(result.dtype == np.object_) + # addition + assert_series_equal(nat_series_dtype_timedelta + NaT, + nat_series_dtype_timedelta) + assert_series_equal(NaT + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) - def test_timedelta64_equal_timedelta_supported_ops(self): - ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), - Timestamp('20130228 22:00:00'), Timestamp( - '20130228 21:00:00')]) + assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) - intervals = 'D', 'h', 'm', 's', 'us' + assert_series_equal(timedelta_series + NaT, + nat_series_dtype_timedelta) + assert_series_equal(NaT + timedelta_series, + nat_series_dtype_timedelta) - # TODO: unused - # npy16_mappings = {'D': 24 * 60 * 60 * 1000000, - # 'h': 60 * 60 * 1000000, - # 'm': 60 * 1000000, - # 's': 1000000, - # 'us': 1} + assert_series_equal(timedelta_series + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + assert_series_equal(single_nat_dtype_timedelta + timedelta_series, + nat_series_dtype_timedelta) - def timedelta64(*args): - return sum(starmap(np.timedelta64, zip(args, intervals))) + assert_series_equal(nat_series_dtype_timedelta + NaT, + nat_series_dtype_timedelta) + assert_series_equal(NaT + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) - for op, d, h, m, s, us in product([operator.add, operator.sub], - *([range(2)] * 5)): - nptd = timedelta64(d, h, m, s, us) - pytd = timedelta(days=d, hours=h, minutes=m, seconds=s, - microseconds=us) - lhs = op(ser, nptd) - rhs = op(ser, pytd) + assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + # multiplication + assert_series_equal(nat_series_dtype_timedelta * 1.0, + nat_series_dtype_timedelta) + assert_series_equal(1.0 * nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + assert_series_equal(timedelta_series * 1, timedelta_series) + assert_series_equal(1 * timedelta_series, timedelta_series) + + assert_series_equal(timedelta_series * 1.5, + Series([NaT, Timedelta('1.5s')])) + assert_series_equal(1.5 * timedelta_series, + Series([NaT, Timedelta('1.5s')])) + + assert_series_equal(timedelta_series * nan, + nat_series_dtype_timedelta) + assert_series_equal(nan * timedelta_series, + nat_series_dtype_timedelta) + + # division + assert_series_equal(timedelta_series / 2, + Series([NaT, Timedelta('0.5s')])) + assert_series_equal(timedelta_series / 2.0, + Series([NaT, Timedelta('0.5s')])) + assert_series_equal(timedelta_series / nan, + nat_series_dtype_timedelta) + + @pytest.mark.parametrize('scalar_td', [timedelta(minutes=5, seconds=4), + Timedelta(minutes=5, seconds=4), + Timedelta('5m4s').to_timedelta64()]) + def test_operators_timedelta64_with_timedelta(self, scalar_td): + # smoke tests + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan - try: - assert_series_equal(lhs, rhs) - except: - raise AssertionError( - "invalid comparsion [op->{0},d->{1},h->{2},m->{3}," - "s->{4},us->{5}]\n{6}\n{7}\n".format(op, d, h, m, s, - us, lhs, rhs)) + td1 + scalar_td + scalar_td + td1 + td1 - scalar_td + scalar_td - td1 + td1 / scalar_td + scalar_td / td1 + + +class TestDatetimeSeriesArithmetic(object): + @pytest.mark.parametrize( + 'box, assert_func', + [(Series, tm.assert_series_equal), + (pd.Index, tm.assert_index_equal)]) + def test_sub_datetime64_not_ns(self, box, assert_func): + # GH#7996 + dt64 = np.datetime64('2013-01-01') + assert dt64.dtype == 'datetime64[D]' + + obj = box(date_range('20130101', periods=3)) + res = obj - dt64 + expected = box([Timedelta(days=0), Timedelta(days=1), + Timedelta(days=2)]) + assert_func(res, expected) + + res = dt64 - obj + assert_func(res, -expected) def test_operators_datetimelike(self): def run_ops(ops, get_ser, test_ser): @@ -610,22 +835,12 @@ def run_ops(ops, get_ser, test_ser): # defined for op_str in ops: op = getattr(get_ser, op_str, None) - with tm.assertRaisesRegexp(TypeError, 'operate'): + with tm.assert_raises_regex(TypeError, 'operate|cannot'): op(test_ser) # ## timedelta64 ### td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td1.iloc[2] = np.nan - td2 = timedelta(minutes=5, seconds=4) - ops = ['__mul__', '__floordiv__', '__pow__', '__rmul__', - '__rfloordiv__', '__rpow__'] - run_ops(ops, td1, td2) - td1 + td2 - td2 + td1 - td1 - td2 - td2 - td1 - td1 / td2 - td2 / td1 # ## datetime64 ### dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), @@ -684,24 +899,23 @@ def run_ops(ops, get_ser, test_ser): assert_series_equal(result, exp) # odd numpy behavior with scalar timedeltas - if not _np_version_under1p8: - result = td1[0] + dt1 - exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) - assert_series_equal(result, exp) + result = td1[0] + dt1 + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) - result = td2[0] + dt2 - exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) - assert_series_equal(result, exp) + result = td2[0] + dt2 + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt1 - td1[0] exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) assert_series_equal(result, exp) - self.assertRaises(TypeError, lambda: td1[0] - dt1) + pytest.raises(TypeError, lambda: td1[0] - dt1) result = dt2 - td2[0] exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz) assert_series_equal(result, exp) - self.assertRaises(TypeError, lambda: td2[0] - dt2) + pytest.raises(TypeError, lambda: td2[0] - dt2) result = dt1 + td1 exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz) @@ -719,18 +933,8 @@ def run_ops(ops, get_ser, test_ser): exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz) assert_series_equal(result, exp) - self.assertRaises(TypeError, lambda: td1 - dt1) - self.assertRaises(TypeError, lambda: td2 - dt2) - - def test_sub_datetime_compat(self): - # GH 14088 - tm._skip_if_no_pytz() - import pytz - s = Series([datetime(2016, 8, 23, 12, tzinfo=pytz.utc), pd.NaT]) - dt = datetime(2016, 8, 22, 12, tzinfo=pytz.utc) - exp = Series([Timedelta('1 days'), pd.NaT]) - assert_series_equal(s - dt, exp) - assert_series_equal(s - Timestamp(dt), exp) + pytest.raises(TypeError, lambda: td1 - dt1) + pytest.raises(TypeError, lambda: td2 - dt2) def test_sub_single_tz(self): # GH12290 @@ -743,581 +947,454 @@ def test_sub_single_tz(self): expected = Series([Timedelta('-2days')]) assert_series_equal(result, expected) - def test_ops_nat(self): + def test_dt64tz_series_sub_dtitz(self): + # GH#19071 subtracting tzaware DatetimeIndex from tzaware Series + # (with same tz) raises, fixed by #19024 + dti = pd.date_range('1999-09-30', periods=10, tz='US/Pacific') + ser = pd.Series(dti) + expected = pd.Series(pd.TimedeltaIndex(['0days'] * 10)) + + res = dti - ser + tm.assert_series_equal(res, expected) + res = ser - dti + tm.assert_series_equal(res, expected) + + def test_sub_datetime_compat(self): + # see gh-14088 + s = Series([datetime(2016, 8, 23, 12, tzinfo=pytz.utc), pd.NaT]) + dt = datetime(2016, 8, 22, 12, tzinfo=pytz.utc) + exp = Series([Timedelta('1 days'), pd.NaT]) + assert_series_equal(s - dt, exp) + assert_series_equal(s - Timestamp(dt), exp) + + def test_dt64_series_with_timedelta(self): + # scalar timedeltas/np.timedelta64 objects + # operate with np.timedelta64 correctly + s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + + result = s + np.timedelta64(1, 's') + result2 = np.timedelta64(1, 's') + s + expected = Series([Timestamp('20130101 9:01:01'), + Timestamp('20130101 9:02:01')]) + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + + result = s + np.timedelta64(5, 'ms') + result2 = np.timedelta64(5, 'ms') + s + expected = Series([Timestamp('20130101 9:01:00.005'), + Timestamp('20130101 9:02:00.005')]) + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + + def test_dt64_series_add_tick_DateOffset(self): + # GH 4532 + # operate with pd.offsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + expected = Series([Timestamp('20130101 9:01:05'), + Timestamp('20130101 9:02:05')]) + + result = ser + pd.offsets.Second(5) + assert_series_equal(result, expected) + + result2 = pd.offsets.Second(5) + ser + assert_series_equal(result2, expected) + + def test_dt64_series_sub_tick_DateOffset(self): + # GH 4532 + # operate with pd.offsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + expected = Series([Timestamp('20130101 9:00:55'), + Timestamp('20130101 9:01:55')]) + + result = ser - pd.offsets.Second(5) + assert_series_equal(result, expected) + + result2 = -pd.offsets.Second(5) + ser + assert_series_equal(result2, expected) + + with pytest.raises(TypeError): + pd.offsets.Second(5) - ser + + @pytest.mark.parametrize('cls_name', ['Day', 'Hour', 'Minute', 'Second', + 'Milli', 'Micro', 'Nano']) + def test_dt64_series_with_tick_DateOffset_smoke(self, cls_name): + # GH 4532 + # smoke tests for valid DateOffsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + + offset_cls = getattr(pd.offsets, cls_name) + ser + offset_cls(5) + offset_cls(5) + ser + + def test_dt64_series_add_mixed_tick_DateOffset(self): + # GH 4532 + # operate with pd.offsets + s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + + result = s + pd.offsets.Milli(5) + result2 = pd.offsets.Milli(5) + s + expected = Series([Timestamp('20130101 9:01:00.005'), + Timestamp('20130101 9:02:00.005')]) + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + + result = s + pd.offsets.Minute(5) + pd.offsets.Milli(5) + expected = Series([Timestamp('20130101 9:06:00.005'), + Timestamp('20130101 9:07:00.005')]) + assert_series_equal(result, expected) + + def test_dt64_series_sub_NaT(self): + # GH#18808 + dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) + ser = pd.Series(dti) + res = ser - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + + dti_tz = dti.tz_localize('Asia/Tokyo') + ser_tz = pd.Series(dti_tz) + res = ser_tz - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') + tm.assert_series_equal(res, expected) + + def test_datetime64_ops_nat(self): # GH 11349 - timedelta_series = Series([NaT, Timedelta('1s')]) datetime_series = Series([NaT, Timestamp('19900315')]) - nat_series_dtype_timedelta = Series( - [NaT, NaT], dtype='timedelta64[ns]') nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]') single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') - single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') # subtraction - assert_series_equal(timedelta_series - NaT, nat_series_dtype_timedelta) - assert_series_equal(-NaT + timedelta_series, - nat_series_dtype_timedelta) - - assert_series_equal(timedelta_series - single_nat_dtype_timedelta, - nat_series_dtype_timedelta) - assert_series_equal(-single_nat_dtype_timedelta + timedelta_series, - nat_series_dtype_timedelta) - - assert_series_equal(datetime_series - NaT, nat_series_dtype_timestamp) assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp) - - assert_series_equal(datetime_series - single_nat_dtype_datetime, - nat_series_dtype_timedelta) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): -single_nat_dtype_datetime + datetime_series - assert_series_equal(datetime_series - single_nat_dtype_timedelta, - nat_series_dtype_timestamp) - assert_series_equal(-single_nat_dtype_timedelta + datetime_series, - nat_series_dtype_timestamp) - - # without a Series wrapping the NaT, it is ambiguous - # whether it is a datetime64 or timedelta64 - # defaults to interpreting it as timedelta64 - assert_series_equal(nat_series_dtype_timestamp - NaT, - nat_series_dtype_timestamp) assert_series_equal(-NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) - - assert_series_equal(nat_series_dtype_timestamp - - single_nat_dtype_datetime, - nat_series_dtype_timedelta) - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): -single_nat_dtype_datetime + nat_series_dtype_timestamp - assert_series_equal(nat_series_dtype_timestamp - - single_nat_dtype_timedelta, - nat_series_dtype_timestamp) - assert_series_equal(-single_nat_dtype_timedelta + - nat_series_dtype_timestamp, - nat_series_dtype_timestamp) - - with tm.assertRaises(TypeError): - timedelta_series - single_nat_dtype_datetime - # addition assert_series_equal(nat_series_dtype_timestamp + NaT, nat_series_dtype_timestamp) assert_series_equal(NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) - assert_series_equal(nat_series_dtype_timestamp + - single_nat_dtype_timedelta, - nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_timedelta + - nat_series_dtype_timestamp, - nat_series_dtype_timestamp) - - assert_series_equal(nat_series_dtype_timedelta + NaT, - nat_series_dtype_timedelta) - assert_series_equal(NaT + nat_series_dtype_timedelta, - nat_series_dtype_timedelta) - - assert_series_equal(nat_series_dtype_timedelta + - single_nat_dtype_timedelta, - nat_series_dtype_timedelta) - assert_series_equal(single_nat_dtype_timedelta + - nat_series_dtype_timedelta, - nat_series_dtype_timedelta) - - assert_series_equal(timedelta_series + NaT, nat_series_dtype_timedelta) - assert_series_equal(NaT + timedelta_series, nat_series_dtype_timedelta) - - assert_series_equal(timedelta_series + single_nat_dtype_timedelta, - nat_series_dtype_timedelta) - assert_series_equal(single_nat_dtype_timedelta + timedelta_series, - nat_series_dtype_timedelta) - assert_series_equal(nat_series_dtype_timestamp + NaT, nat_series_dtype_timestamp) assert_series_equal(NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) - assert_series_equal(nat_series_dtype_timestamp + - single_nat_dtype_timedelta, - nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_timedelta + - nat_series_dtype_timestamp, - nat_series_dtype_timestamp) + @pytest.mark.parametrize('dt64_series', [ + Series([Timestamp('19900315'), Timestamp('19900315')]), + Series([NaT, Timestamp('19900315')]), + Series([NaT, NaT], dtype='datetime64[ns]')]) + @pytest.mark.parametrize('one', [1, 1.0, np.array(1)]) + def test_dt64_mul_div_numeric_invalid(self, one, dt64_series): + # multiplication + with pytest.raises(TypeError): + dt64_series * one + with pytest.raises(TypeError): + one * dt64_series - assert_series_equal(nat_series_dtype_timedelta + NaT, - nat_series_dtype_timedelta) - assert_series_equal(NaT + nat_series_dtype_timedelta, - nat_series_dtype_timedelta) + # division + with pytest.raises(TypeError): + dt64_series / one + with pytest.raises(TypeError): + one / dt64_series + + def test_dt64_series_arith_overflow(self): + # GH#12534, fixed by #19024 + dt = pd.Timestamp('1700-01-31') + td = pd.Timedelta('20000 Days') + dti = pd.date_range('1949-09-30', freq='100Y', periods=4) + ser = pd.Series(dti) + with pytest.raises(OverflowError): + ser - dt + with pytest.raises(OverflowError): + dt - ser + with pytest.raises(OverflowError): + ser + td + with pytest.raises(OverflowError): + td + ser + + ser.iloc[-1] = pd.NaT + expected = pd.Series(['2004-10-03', '2104-10-04', '2204-10-04', 'NaT'], + dtype='datetime64[ns]') + res = ser + td + tm.assert_series_equal(res, expected) + res = td + ser + tm.assert_series_equal(res, expected) + + ser.iloc[1:] = pd.NaT + expected = pd.Series(['91279 Days', 'NaT', 'NaT', 'NaT'], + dtype='timedelta64[ns]') + res = ser - dt + tm.assert_series_equal(res, expected) + res = dt - ser + tm.assert_series_equal(res, -expected) + + @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo']) + def test_dt64_series_add_intlike(self, tz): + # GH#19123 + dti = pd.DatetimeIndex(['2016-01-02', '2016-02-03', 'NaT'], tz=tz) + ser = Series(dti) + + other = Series([20, 30, 40], dtype='uint8') + + pytest.raises(TypeError, ser.__add__, 1) + pytest.raises(TypeError, ser.__sub__, 1) + + pytest.raises(TypeError, ser.__add__, other) + pytest.raises(TypeError, ser.__sub__, other) + + pytest.raises(TypeError, ser.__add__, other.values) + pytest.raises(TypeError, ser.__sub__, other.values) + + pytest.raises(TypeError, ser.__add__, pd.Index(other)) + pytest.raises(TypeError, ser.__sub__, pd.Index(other)) + + +class TestSeriesOperators(TestData): + @pytest.mark.parametrize( + 'ts', + [ + (lambda x: x, lambda x: x * 2, False), + (lambda x: x, lambda x: x[::2], False), + (lambda x: x, lambda x: 5, True), + (lambda x: tm.makeFloatSeries(), + lambda x: tm.makeFloatSeries(), + True) + ]) + @pytest.mark.parametrize('opname', ['add', 'sub', 'mul', 'floordiv', + 'truediv', 'div', 'pow']) + def test_op_method(self, opname, ts): + # check that Series.{opname} behaves like Series.__{opname}__, + series = ts[0](self.ts) + other = ts[1](self.ts) + check_reverse = ts[2] + + if opname == 'div' and compat.PY3: + pytest.skip('div test only for Py3') + + op = getattr(Series, opname) + + if op == 'div': + alt = operator.truediv + else: + alt = getattr(operator, opname) - assert_series_equal(nat_series_dtype_timedelta + - single_nat_dtype_timedelta, - nat_series_dtype_timedelta) - assert_series_equal(single_nat_dtype_timedelta + - nat_series_dtype_timedelta, - nat_series_dtype_timedelta) + result = op(series, other) + expected = alt(series, other) + assert_almost_equal(result, expected) + if check_reverse: + rop = getattr(Series, "r" + opname) + result = rop(series, other) + expected = alt(other, series) + assert_almost_equal(result, expected) - assert_series_equal(nat_series_dtype_timedelta + - single_nat_dtype_datetime, - nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_datetime + - nat_series_dtype_timedelta, - nat_series_dtype_timestamp) + def test_neg(self): + assert_series_equal(-self.series, -1 * self.series) - # multiplication - assert_series_equal(nat_series_dtype_timedelta * 1.0, - nat_series_dtype_timedelta) - assert_series_equal(1.0 * nat_series_dtype_timedelta, - nat_series_dtype_timedelta) + def test_invert(self): + assert_series_equal(-(self.series < 0), ~(self.series < 0)) - assert_series_equal(timedelta_series * 1, timedelta_series) - assert_series_equal(1 * timedelta_series, timedelta_series) + def test_operators(self): + def _check_op(series, other, op, pos_only=False, + check_dtype=True): + left = np.abs(series) if pos_only else series + right = np.abs(other) if pos_only else other - assert_series_equal(timedelta_series * 1.5, - Series([NaT, Timedelta('1.5s')])) - assert_series_equal(1.5 * timedelta_series, - Series([NaT, Timedelta('1.5s')])) + cython_or_numpy = op(left, right) + python = left.combine(right, op) + assert_series_equal(cython_or_numpy, python, + check_dtype=check_dtype) - assert_series_equal(timedelta_series * nan, nat_series_dtype_timedelta) - assert_series_equal(nan * timedelta_series, nat_series_dtype_timedelta) + def check(series, other): + simple_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod'] - with tm.assertRaises(TypeError): - datetime_series * 1 - with tm.assertRaises(TypeError): - nat_series_dtype_timestamp * 1 - with tm.assertRaises(TypeError): - datetime_series * 1.0 - with tm.assertRaises(TypeError): - nat_series_dtype_timestamp * 1.0 + for opname in simple_ops: + _check_op(series, other, getattr(operator, opname)) - # division - assert_series_equal(timedelta_series / 2, - Series([NaT, Timedelta('0.5s')])) - assert_series_equal(timedelta_series / 2.0, - Series([NaT, Timedelta('0.5s')])) - assert_series_equal(timedelta_series / nan, nat_series_dtype_timedelta) - with tm.assertRaises(TypeError): - nat_series_dtype_timestamp / 1.0 - with tm.assertRaises(TypeError): - nat_series_dtype_timestamp / 1 + _check_op(series, other, operator.pow, pos_only=True) - def test_ops_datetimelike_align(self): - # GH 7500 - # datetimelike ops need to align - dt = Series(date_range('2012-1-1', periods=3, freq='D')) - dt.iloc[2] = np.nan - dt2 = dt[::-1] + _check_op(series, other, lambda x, y: operator.add(y, x)) + _check_op(series, other, lambda x, y: operator.sub(y, x)) + _check_op(series, other, lambda x, y: operator.truediv(y, x)) + _check_op(series, other, lambda x, y: operator.floordiv(y, x)) + _check_op(series, other, lambda x, y: operator.mul(y, x)) + _check_op(series, other, lambda x, y: operator.pow(y, x), + pos_only=True) + _check_op(series, other, lambda x, y: operator.mod(y, x)) - expected = Series([timedelta(0), timedelta(0), pd.NaT]) - # name is reset - result = dt2 - dt - assert_series_equal(result, expected) + check(self.ts, self.ts * 2) + check(self.ts, self.ts * 0) + check(self.ts, self.ts[::2]) + check(self.ts, 5) - expected = Series(expected, name=0) - result = (dt2.to_frame() - dt.to_frame())[0] - assert_series_equal(result, expected) + def check_comparators(series, other, check_dtype=True): + _check_op(series, other, operator.gt, check_dtype=check_dtype) + _check_op(series, other, operator.ge, check_dtype=check_dtype) + _check_op(series, other, operator.eq, check_dtype=check_dtype) + _check_op(series, other, operator.lt, check_dtype=check_dtype) + _check_op(series, other, operator.le, check_dtype=check_dtype) - def test_object_comparisons(self): - s = Series(['a', 'b', np.nan, 'c', 'a']) + check_comparators(self.ts, 5) + check_comparators(self.ts, self.ts + 1, check_dtype=False) - result = s == 'a' - expected = Series([True, False, False, False, True]) - assert_series_equal(result, expected) - - result = s < 'a' - expected = Series([False, False, False, False, False]) - assert_series_equal(result, expected) - - result = s != 'a' - expected = -(s == 'a') - assert_series_equal(result, expected) - - def test_comparison_tuples(self): - # GH11339 - # comparisons vs tuple - s = Series([(1, 1), (1, 2)]) - - result = s == (1, 2) - expected = Series([False, True]) - assert_series_equal(result, expected) - - result = s != (1, 2) - expected = Series([True, False]) - assert_series_equal(result, expected) - - result = s == (0, 0) - expected = Series([False, False]) - assert_series_equal(result, expected) - - result = s != (0, 0) - expected = Series([True, True]) - assert_series_equal(result, expected) - - s = Series([(1, 1), (1, 1)]) - - result = s == (1, 1) - expected = Series([True, True]) - assert_series_equal(result, expected) - - result = s != (1, 1) - expected = Series([False, False]) - assert_series_equal(result, expected) - - s = Series([frozenset([1]), frozenset([1, 2])]) - - result = s == frozenset([1]) - expected = Series([True, False]) - assert_series_equal(result, expected) - - def test_comparison_operators_with_nas(self): - s = Series(bdate_range('1/1/2000', periods=10), dtype=object) - s[::2] = np.nan - - # test that comparisons work - ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] - for op in ops: - val = s[5] - - f = getattr(operator, op) - result = f(s, val) - - expected = f(s.dropna(), val).reindex(s.index) - - if op == 'ne': - expected = expected.fillna(True).astype(bool) + def test_divmod(self): + def check(series, other): + results = divmod(series, other) + if isinstance(other, Iterable) and len(series) != len(other): + # if the lengths don't match, this is the test where we use + # `self.ts[::2]`. Pad every other value in `other_np` with nan. + other_np = [] + for n in other: + other_np.append(n) + other_np.append(np.nan) else: - expected = expected.fillna(False).astype(bool) - - assert_series_equal(result, expected) - - # fffffffuuuuuuuuuuuu - # result = f(val, s) - # expected = f(val, s.dropna()).reindex(s.index) - # assert_series_equal(result, expected) - - # boolean &, |, ^ should work with object arrays and propagate NAs - - ops = ['and_', 'or_', 'xor'] - mask = s.isnull() - for bool_op in ops: - f = getattr(operator, bool_op) - - filled = s.fillna(s[0]) - - result = f(s < s[9], s > s[3]) - - expected = f(filled < filled[9], filled > filled[3]) - expected[mask] = False - assert_series_equal(result, expected) - - def test_comparison_object_numeric_nas(self): - s = Series(np.random.randn(10), dtype=object) - shifted = s.shift(2) - - ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] - for op in ops: - f = getattr(operator, op) - - result = f(s, shifted) - expected = f(s.astype(float), shifted.astype(float)) - assert_series_equal(result, expected) - - def test_comparison_invalid(self): - - # GH4968 - # invalid date/int comparisons - s = Series(range(5)) - s2 = Series(date_range('20010101', periods=5)) - - for (x, y) in [(s, s2), (s2, s)]: - self.assertRaises(TypeError, lambda: x == y) - self.assertRaises(TypeError, lambda: x != y) - self.assertRaises(TypeError, lambda: x >= y) - self.assertRaises(TypeError, lambda: x > y) - self.assertRaises(TypeError, lambda: x < y) - self.assertRaises(TypeError, lambda: x <= y) - - def test_more_na_comparisons(self): - for dtype in [None, object]: - left = Series(['a', np.nan, 'c'], dtype=dtype) - right = Series(['a', np.nan, 'd'], dtype=dtype) - - result = left == right - expected = Series([True, False, False]) - assert_series_equal(result, expected) - - result = left != right - expected = Series([False, True, True]) - assert_series_equal(result, expected) - - result = left == np.nan - expected = Series([False, False, False]) - assert_series_equal(result, expected) - - result = left != np.nan - expected = Series([True, True, True]) - assert_series_equal(result, expected) - - def test_nat_comparisons(self): - data = [([pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')], - [pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]), - - ([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')], - [pd.NaT, pd.NaT, pd.Timedelta('3 days')]), - - ([pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')], - [pd.NaT, pd.NaT, pd.Period('2011-03', freq='M')])] - - # add lhs / rhs switched data - data = data + [(r, l) for l, r in data] - - for l, r in data: - for dtype in [None, object]: - left = Series(l, dtype=dtype) - - # Series, Index - for right in [Series(r, dtype=dtype), Index(r, dtype=dtype)]: - expected = Series([False, False, True]) - assert_series_equal(left == right, expected) - - expected = Series([True, True, False]) - assert_series_equal(left != right, expected) - - expected = Series([False, False, False]) - assert_series_equal(left < right, expected) - - expected = Series([False, False, False]) - assert_series_equal(left > right, expected) - - expected = Series([False, False, True]) - assert_series_equal(left >= right, expected) - - expected = Series([False, False, True]) - assert_series_equal(left <= right, expected) - - def test_nat_comparisons_scalar(self): - data = [[pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')], - - [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')], - - [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')]] - - for l in data: - for dtype in [None, object]: - left = Series(l, dtype=dtype) - - expected = Series([False, False, False]) - assert_series_equal(left == pd.NaT, expected) - assert_series_equal(pd.NaT == left, expected) - - expected = Series([True, True, True]) - assert_series_equal(left != pd.NaT, expected) - assert_series_equal(pd.NaT != left, expected) - - expected = Series([False, False, False]) - assert_series_equal(left < pd.NaT, expected) - assert_series_equal(pd.NaT > left, expected) - assert_series_equal(left <= pd.NaT, expected) - assert_series_equal(pd.NaT >= left, expected) - - assert_series_equal(left > pd.NaT, expected) - assert_series_equal(pd.NaT < left, expected) - assert_series_equal(left >= pd.NaT, expected) - assert_series_equal(pd.NaT <= left, expected) - - def test_comparison_different_length(self): - a = Series(['a', 'b', 'c']) - b = Series(['b', 'a']) - self.assertRaises(ValueError, a.__lt__, b) - - a = Series([1, 2]) - b = Series([2, 3, 4]) - self.assertRaises(ValueError, a.__eq__, b) - - def test_comparison_label_based(self): - - # GH 4947 - # comparisons should be label based - - a = Series([True, False, True], list('bca')) - b = Series([False, True, False], list('abc')) - - expected = Series([False, True, False], list('abc')) - result = a & b - assert_series_equal(result, expected) - - expected = Series([True, True, False], list('abc')) - result = a | b - assert_series_equal(result, expected) + other_np = other + other_np = np.asarray(other_np) + with np.errstate(all='ignore'): + expecteds = divmod(series.values, np.asarray(other_np)) - expected = Series([True, False, False], list('abc')) - result = a ^ b - assert_series_equal(result, expected) + for result, expected in zip(results, expecteds): + # check the values, name, and index separately + assert_almost_equal(np.asarray(result), expected) - # rhs is bigger - a = Series([True, False, True], list('bca')) - b = Series([False, True, False, True], list('abcd')) + assert result.name == series.name + assert_index_equal(result.index, series.index) - expected = Series([False, True, False, False], list('abcd')) - result = a & b - assert_series_equal(result, expected) + check(self.ts, self.ts * 2) + check(self.ts, self.ts * 0) + check(self.ts, self.ts[::2]) + check(self.ts, 5) - expected = Series([True, True, False, False], list('abcd')) - result = a | b - assert_series_equal(result, expected) + def test_operators_empty_int_corner(self): + s1 = Series([], [], dtype=np.int32) + s2 = Series({'x': 0.}) + assert_series_equal(s1 * s2, Series([np.nan], index=['x'])) - # filling + @pytest.mark.parametrize("m", [1, 3, 10]) + @pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns']) + def test_timedelta64_conversions(self, m, unit): - # vs empty - result = a & Series([]) - expected = Series([False, False, False], list('bca')) - assert_series_equal(result, expected) + startdate = Series(date_range('2013-01-01', '2013-01-03')) + enddate = Series(date_range('2013-03-01', '2013-03-03')) - result = a | Series([]) - expected = Series([True, False, True], list('bca')) - assert_series_equal(result, expected) + s1 = enddate - startdate + s1[2] = np.nan - # vs non-matching - result = a & Series([1], ['z']) - expected = Series([False, False, False, False], list('abcz')) + # op + expected = s1.apply(lambda x: x / np.timedelta64(m, unit)) + result = s1 / np.timedelta64(m, unit) assert_series_equal(result, expected) - result = a | Series([1], ['z']) - expected = Series([True, True, False, False], list('abcz')) + # reverse op + expected = s1.apply( + lambda x: Timedelta(np.timedelta64(m, unit)) / x) + result = np.timedelta64(m, unit) / s1 assert_series_equal(result, expected) - # identity - # we would like s[s|e] == s to hold for any e, whether empty or not - for e in [Series([]), Series([1], ['z']), - Series(np.nan, b.index), Series(np.nan, a.index)]: - result = a[a | e] - assert_series_equal(result, a[a]) - - for e in [Series(['z'])]: - if compat.PY3: - with tm.assert_produces_warning(RuntimeWarning): - result = a[a | e] - else: - result = a[a | e] - assert_series_equal(result, a[a]) - - # vs scalars - index = list('bca') - t = Series([True, False, True]) - - for v in [True, 1, 2]: - result = Series([True, False, True], index=index) | v - expected = Series([True, True, True], index=index) - assert_series_equal(result, expected) - - for v in [np.nan, 'foo']: - self.assertRaises(TypeError, lambda: t | v) - - for v in [False, 0]: - result = Series([True, False, True], index=index) | v - expected = Series([True, False, True], index=index) - assert_series_equal(result, expected) - - for v in [True, 1]: - result = Series([True, False, True], index=index) & v - expected = Series([True, False, True], index=index) - assert_series_equal(result, expected) - - for v in [False, 0]: - result = Series([True, False, True], index=index) & v - expected = Series([False, False, False], index=index) - assert_series_equal(result, expected) - for v in [np.nan]: - self.assertRaises(TypeError, lambda: t & v) - - def test_comparison_flex_basic(self): - left = pd.Series(np.random.randn(10)) - right = pd.Series(np.random.randn(10)) - - assert_series_equal(left.eq(right), left == right) - assert_series_equal(left.ne(right), left != right) - assert_series_equal(left.le(right), left < right) - assert_series_equal(left.lt(right), left <= right) - assert_series_equal(left.gt(right), left > right) - assert_series_equal(left.ge(right), left >= right) - - # axis - for axis in [0, None, 'index']: - assert_series_equal(left.eq(right, axis=axis), left == right) - assert_series_equal(left.ne(right, axis=axis), left != right) - assert_series_equal(left.le(right, axis=axis), left < right) - assert_series_equal(left.lt(right, axis=axis), left <= right) - assert_series_equal(left.gt(right, axis=axis), left > right) - assert_series_equal(left.ge(right, axis=axis), left >= right) + @pytest.mark.parametrize('op', [operator.add, operator.sub]) + def test_timedelta64_equal_timedelta_supported_ops(self, op): + ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), + Timestamp('20130228 22:00:00'), + Timestamp('20130228 21:00:00')]) - # - msg = 'No axis named 1 for object type' - for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']: - with tm.assertRaisesRegexp(ValueError, msg): - getattr(left, op)(right, axis=1) + intervals = 'D', 'h', 'm', 's', 'us' - def test_comparison_flex_alignment(self): - left = Series([1, 3, 2], index=list('abc')) - right = Series([2, 2, 2], index=list('bcd')) + # TODO: unused + # npy16_mappings = {'D': 24 * 60 * 60 * 1000000, + # 'h': 60 * 60 * 1000000, + # 'm': 60 * 1000000, + # 's': 1000000, + # 'us': 1} - exp = pd.Series([False, False, True, False], index=list('abcd')) - assert_series_equal(left.eq(right), exp) + def timedelta64(*args): + return sum(starmap(np.timedelta64, zip(args, intervals))) - exp = pd.Series([True, True, False, True], index=list('abcd')) - assert_series_equal(left.ne(right), exp) + for d, h, m, s, us in product(*([range(2)] * 5)): + nptd = timedelta64(d, h, m, s, us) + pytd = timedelta(days=d, hours=h, minutes=m, seconds=s, + microseconds=us) + lhs = op(ser, nptd) + rhs = op(ser, pytd) - exp = pd.Series([False, False, True, False], index=list('abcd')) - assert_series_equal(left.le(right), exp) + assert_series_equal(lhs, rhs) - exp = pd.Series([False, False, False, False], index=list('abcd')) - assert_series_equal(left.lt(right), exp) + def test_ops_nat_mixed_datetime64_timedelta64(self): + # GH 11349 + timedelta_series = Series([NaT, Timedelta('1s')]) + datetime_series = Series([NaT, Timestamp('19900315')]) + nat_series_dtype_timedelta = Series([NaT, NaT], + dtype='timedelta64[ns]') + nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]') + single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') + single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') - exp = pd.Series([False, True, True, False], index=list('abcd')) - assert_series_equal(left.ge(right), exp) + # subtraction + assert_series_equal(datetime_series - single_nat_dtype_datetime, + nat_series_dtype_timedelta) - exp = pd.Series([False, True, False, False], index=list('abcd')) - assert_series_equal(left.gt(right), exp) + assert_series_equal(datetime_series - single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + assert_series_equal(-single_nat_dtype_timedelta + datetime_series, + nat_series_dtype_timestamp) - def test_comparison_flex_alignment_fill(self): - left = Series([1, 3, 2], index=list('abc')) - right = Series([2, 2, 2], index=list('bcd')) + # without a Series wrapping the NaT, it is ambiguous + # whether it is a datetime64 or timedelta64 + # defaults to interpreting it as timedelta64 + assert_series_equal(nat_series_dtype_timestamp - + single_nat_dtype_datetime, + nat_series_dtype_timedelta) - exp = pd.Series([False, False, True, True], index=list('abcd')) - assert_series_equal(left.eq(right, fill_value=2), exp) + assert_series_equal(nat_series_dtype_timestamp - + single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + assert_series_equal(-single_nat_dtype_timedelta + + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) - exp = pd.Series([True, True, False, False], index=list('abcd')) - assert_series_equal(left.ne(right, fill_value=2), exp) + with pytest.raises(TypeError): + timedelta_series - single_nat_dtype_datetime - exp = pd.Series([False, False, True, True], index=list('abcd')) - assert_series_equal(left.le(right, fill_value=0), exp) + # addition + assert_series_equal(nat_series_dtype_timestamp + + single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) - exp = pd.Series([False, False, False, True], index=list('abcd')) - assert_series_equal(left.lt(right, fill_value=0), exp) + assert_series_equal(nat_series_dtype_timestamp + + single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) - exp = pd.Series([True, True, True, False], index=list('abcd')) - assert_series_equal(left.ge(right, fill_value=0), exp) + assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_datetime, + nat_series_dtype_timestamp) + assert_series_equal(single_nat_dtype_datetime + + nat_series_dtype_timedelta, + nat_series_dtype_timestamp) - exp = pd.Series([True, True, False, False], index=list('abcd')) - assert_series_equal(left.gt(right, fill_value=0), exp) + def test_ops_datetimelike_align(self): + # GH 7500 + # datetimelike ops need to align + dt = Series(date_range('2012-1-1', periods=3, freq='D')) + dt.iloc[2] = np.nan + dt2 = dt[::-1] - def test_return_dtypes_bool_op_costant(self): - # gh15115 - s = pd.Series([1, 3, 2], index=range(3)) - const = 2 - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - result = getattr(s, op)(const).get_dtype_counts() - self.assert_series_equal(result, Series([1], ['bool'])) + expected = Series([timedelta(0), timedelta(0), pd.NaT]) + # name is reset + result = dt2 - dt + assert_series_equal(result, expected) - # empty Series - empty = s.iloc[:0] - for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: - result = getattr(empty, op)(const).get_dtype_counts() - self.assert_series_equal(result, Series([1], ['bool'])) + expected = Series(expected, name=0) + result = (dt2.to_frame() - dt.to_frame())[0] + assert_series_equal(result, expected) def test_operators_bitwise(self): # GH 9016: support bitwise op for integer types @@ -1388,11 +1465,11 @@ def test_operators_bitwise(self): expected = Series([1, 1, 3, 3], dtype='int32') assert_series_equal(res, expected) - self.assertRaises(TypeError, lambda: s_1111 & 'a') - self.assertRaises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd']) - self.assertRaises(TypeError, lambda: s_0123 & np.NaN) - self.assertRaises(TypeError, lambda: s_0123 & 3.14) - self.assertRaises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) + pytest.raises(TypeError, lambda: s_1111 & 'a') + pytest.raises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd']) + pytest.raises(TypeError, lambda: s_0123 & np.NaN) + pytest.raises(TypeError, lambda: s_0123 & 3.14) + pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) # s_0123 will be all false now because of reindexing like s_tft if compat.PY3: @@ -1435,7 +1512,7 @@ def test_scalar_na_cmp_corners(self): def tester(a, b): return a & b - self.assertRaises(TypeError, tester, s, datetime(2005, 1, 1)) + pytest.raises(TypeError, tester, s, datetime(2005, 1, 1)) s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) s[::2] = np.nan @@ -1452,8 +1529,8 @@ def tester(a, b): # this is an alignment issue; these are equivalent # https://github.com/pandas-dev/pandas/issues/5284 - self.assertRaises(ValueError, lambda: d.__and__(s, axis='columns')) - self.assertRaises(ValueError, tester, s, d) + pytest.raises(ValueError, lambda: d.__and__(s, axis='columns')) + pytest.raises(ValueError, tester, s, d) # this is wrong as its not a boolean result # result = d.__and__(s,axis='index') @@ -1464,10 +1541,10 @@ def test_operators_corner(self): empty = Series([], index=Index([])) result = series + empty - self.assertTrue(np.isnan(result).all()) + assert np.isnan(result).all() result = empty + Series([], index=Index([])) - self.assertEqual(len(result), 0) + assert len(result) == 0 # TODO: this returned NotImplemented earlier, what to do? # deltas = Series([timedelta(1)] * 5, index=np.arange(5)) @@ -1480,238 +1557,17 @@ def test_operators_corner(self): added = self.ts + int_ts expected = Series(self.ts.values[:-5] + int_ts.values, index=self.ts.index[:-5], name='ts') - self.assert_series_equal(added[:-5], expected) + tm.assert_series_equal(added[:-5], expected) - def test_operators_reverse_object(self): + @pytest.mark.parametrize('op', [operator.add, operator.sub, operator.mul, + operator.truediv, operator.floordiv]) + def test_operators_reverse_object(self, op): # GH 56 arr = Series(np.random.randn(10), index=np.arange(10), dtype=object) - def _check_op(arr, op): - result = op(1., arr) - expected = op(1., arr.astype(float)) - assert_series_equal(result.astype(float), expected) - - _check_op(arr, operator.add) - _check_op(arr, operator.sub) - _check_op(arr, operator.mul) - _check_op(arr, operator.truediv) - _check_op(arr, operator.floordiv) - - def test_arith_ops_df_compat(self): - # GH 1134 - s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x') - s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x') - - exp = pd.Series([3.0, 4.0, np.nan, np.nan], - index=list('ABCD'), name='x') - assert_series_equal(s1 + s2, exp) - assert_series_equal(s2 + s1, exp) - - exp = pd.DataFrame({'x': [3.0, 4.0, np.nan, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s1.to_frame() + s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() + s1.to_frame(), exp) - - # different length - s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') - s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') - - exp = pd.Series([3, 4, 5, np.nan], - index=list('ABCD'), name='x') - assert_series_equal(s3 + s4, exp) - assert_series_equal(s4 + s3, exp) - - exp = pd.DataFrame({'x': [3, 4, 5, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s3.to_frame() + s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() + s3.to_frame(), exp) - - def test_comp_ops_df_compat(self): - # GH 1134 - s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x') - s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x') - - s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') - s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') - - for l, r in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: - - msg = "Can only compare identically-labeled Series objects" - with tm.assertRaisesRegexp(ValueError, msg): - l == r - - with tm.assertRaisesRegexp(ValueError, msg): - l != r - - with tm.assertRaisesRegexp(ValueError, msg): - l < r - - msg = "Can only compare identically-labeled DataFrame objects" - with tm.assertRaisesRegexp(ValueError, msg): - l.to_frame() == r.to_frame() - - with tm.assertRaisesRegexp(ValueError, msg): - l.to_frame() != r.to_frame() - - with tm.assertRaisesRegexp(ValueError, msg): - l.to_frame() < r.to_frame() - - def test_bool_ops_df_compat(self): - # GH 1134 - s1 = pd.Series([True, False, True], index=list('ABC'), name='x') - s2 = pd.Series([True, True, False], index=list('ABD'), name='x') - - exp = pd.Series([True, False, False, False], - index=list('ABCD'), name='x') - assert_series_equal(s1 & s2, exp) - assert_series_equal(s2 & s1, exp) - - # True | np.nan => True - exp = pd.Series([True, True, True, False], - index=list('ABCD'), name='x') - assert_series_equal(s1 | s2, exp) - # np.nan | True => np.nan, filled with False - exp = pd.Series([True, True, False, False], - index=list('ABCD'), name='x') - assert_series_equal(s2 | s1, exp) - - # DataFrame doesn't fill nan with False - exp = pd.DataFrame({'x': [True, False, np.nan, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s1.to_frame() & s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() & s1.to_frame(), exp) - - exp = pd.DataFrame({'x': [True, True, np.nan, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s1.to_frame() | s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() | s1.to_frame(), exp) - - # different length - s3 = pd.Series([True, False, True], index=list('ABC'), name='x') - s4 = pd.Series([True, True, True, True], index=list('ABCD'), name='x') - - exp = pd.Series([True, False, True, False], - index=list('ABCD'), name='x') - assert_series_equal(s3 & s4, exp) - assert_series_equal(s4 & s3, exp) - - # np.nan | True => np.nan, filled with False - exp = pd.Series([True, True, True, False], - index=list('ABCD'), name='x') - assert_series_equal(s3 | s4, exp) - # True | np.nan => True - exp = pd.Series([True, True, True, True], - index=list('ABCD'), name='x') - assert_series_equal(s4 | s3, exp) - - exp = pd.DataFrame({'x': [True, False, True, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s3.to_frame() & s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() & s3.to_frame(), exp) - - exp = pd.DataFrame({'x': [True, True, True, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s3.to_frame() | s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() | s3.to_frame(), exp) - - def test_series_frame_radd_bug(self): - # GH 353 - vals = Series(tm.rands_array(5, 10)) - result = 'foo_' + vals - expected = vals.map(lambda x: 'foo_' + x) - assert_series_equal(result, expected) - - frame = DataFrame({'vals': vals}) - result = 'foo_' + frame - expected = DataFrame({'vals': vals.map(lambda x: 'foo_' + x)}) - assert_frame_equal(result, expected) - - # really raise this time - with tm.assertRaises(TypeError): - datetime.now() + self.ts - - with tm.assertRaises(TypeError): - self.ts + datetime.now() - - def test_series_radd_more(self): - data = [[1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), - pd.NaT], - ['x', 'y', 1]] - - for d in data: - for dtype in [None, object]: - s = Series(d, dtype=dtype) - with tm.assertRaises(TypeError): - 'foo_' + s - - for dtype in [None, object]: - res = 1 + pd.Series([1, 2, 3], dtype=dtype) - exp = pd.Series([2, 3, 4], dtype=dtype) - assert_series_equal(res, exp) - res = pd.Series([1, 2, 3], dtype=dtype) + 1 - assert_series_equal(res, exp) - - res = np.nan + pd.Series([1, 2, 3], dtype=dtype) - exp = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) - assert_series_equal(res, exp) - res = pd.Series([1, 2, 3], dtype=dtype) + np.nan - assert_series_equal(res, exp) - - s = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), - pd.Timedelta('3 days')], dtype=dtype) - exp = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), - pd.Timedelta('6 days')]) - assert_series_equal(pd.Timedelta('3 days') + s, exp) - assert_series_equal(s + pd.Timedelta('3 days'), exp) - - s = pd.Series(['x', np.nan, 'x']) - assert_series_equal('a' + s, pd.Series(['ax', np.nan, 'ax'])) - assert_series_equal(s + 'a', pd.Series(['xa', np.nan, 'xa'])) - - def test_frame_radd_more(self): - data = [[1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), - pd.NaT], - ['x', 'y', 1]] - - for d in data: - for dtype in [None, object]: - s = DataFrame(d, dtype=dtype) - with tm.assertRaises(TypeError): - 'foo_' + s - - for dtype in [None, object]: - res = 1 + pd.DataFrame([1, 2, 3], dtype=dtype) - exp = pd.DataFrame([2, 3, 4], dtype=dtype) - assert_frame_equal(res, exp) - res = pd.DataFrame([1, 2, 3], dtype=dtype) + 1 - assert_frame_equal(res, exp) - - res = np.nan + pd.DataFrame([1, 2, 3], dtype=dtype) - exp = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) - assert_frame_equal(res, exp) - res = pd.DataFrame([1, 2, 3], dtype=dtype) + np.nan - assert_frame_equal(res, exp) - - df = pd.DataFrame(['x', np.nan, 'x']) - assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) - assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) - - def test_operators_frame(self): - # rpow does not work with DataFrame - df = DataFrame({'A': self.ts}) - - assert_series_equal(self.ts + self.ts, self.ts + df['A'], - check_names=False) - assert_series_equal(self.ts ** self.ts, self.ts ** df['A'], - check_names=False) - assert_series_equal(self.ts < self.ts, self.ts < df['A'], - check_names=False) - assert_series_equal(self.ts / self.ts, self.ts / df['A'], - check_names=False) + result = op(1., arr) + expected = op(1., arr.astype(float)) + assert_series_equal(result.astype(float), expected) def test_operators_combine(self): def _check_fill(meth, op, a, b, fill_value=0): @@ -1719,8 +1575,8 @@ def _check_fill(meth, op, a, b, fill_value=0): a = a.reindex(exp_index) b = b.reindex(exp_index) - amask = isnull(a) - bmask = isnull(b) + amask = isna(a) + bmask = isna(b) exp_values = [] for i in range(len(exp_index)): @@ -1772,12 +1628,6 @@ def _check_fill(meth, op, a, b, fill_value=0): # should accept axis=0 or axis='rows' op(a, b, axis=0) - def test_ne(self): - ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float) - expected = [True, True, False, True, True] - self.assertTrue(tm.equalContents(ts.index != 5, expected)) - self.assertTrue(tm.equalContents(~(ts.index == 5), expected)) - def test_operators_na_handling(self): from decimal import Decimal from datetime import date @@ -1786,8 +1636,8 @@ def test_operators_na_handling(self): result = s + s.shift(1) result2 = s.shift(1) + s - self.assertTrue(isnull(result[0])) - self.assertTrue(isnull(result2[0])) + assert isna(result[0]) + assert isna(result2[0]) s = Series(['foo', 'bar', 'baz', np.nan]) result = 'prefix_' + s @@ -1798,40 +1648,24 @@ def test_operators_na_handling(self): expected = Series(['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan]) assert_series_equal(result, expected) - def test_divide_decimal(self): - """ resolves issue #9787 """ - from decimal import Decimal - - expected = Series([Decimal(5)]) - - s = Series([Decimal(10)]) - s = s / Decimal(2) - - assert_series_equal(expected, s) - - s = Series([Decimal(10)]) - s = s // Decimal(2) - - assert_series_equal(expected, s) - def test_datetime64_with_index(self): - # arithmetic integer ops with an index - s = Series(np.random.randn(5)) - expected = s - s.index.to_series() - result = s - s.index + ser = Series(np.random.randn(5)) + expected = ser - ser.index.to_series() + result = ser - ser.index assert_series_equal(result, expected) # GH 4629 # arithmetic datetime64 ops with an index - s = Series(date_range('20130101', periods=5), - index=date_range('20130101', periods=5)) - expected = s - s.index.to_series() - result = s - s.index + ser = Series(date_range('20130101', periods=5), + index=date_range('20130101', periods=5)) + expected = ser - ser.index.to_series() + result = ser - ser.index assert_series_equal(result, expected) - result = s - s.index.to_period() - assert_series_equal(result, expected) + with pytest.raises(TypeError): + # GH#18850 + result = ser - ser.index.to_period() df = DataFrame(np.random.randn(5, 2), index=date_range('20130101', periods=5)) @@ -1840,15 +1674,6 @@ def test_datetime64_with_index(self): df['result'] = df['date'] - df.index assert_series_equal(df['result'], df['expected'], check_names=False) - def test_dti_tz_convert_to_utc(self): - base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], - tz='UTC') - idx1 = base.tz_convert('Asia/Tokyo')[:2] - idx2 = base.tz_convert('US/Eastern')[1:] - - res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) - assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) - def test_op_duplicate_index(self): # GH14227 s1 = Series([1, 2], index=[1, 1]) @@ -1856,3 +1681,177 @@ def test_op_duplicate_index(self): result = s1 + s2 expected = pd.Series([11, 12, np.nan], index=[1, 1, 2]) assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "test_input,error_type", + [ + (pd.Series([]), ValueError), + + # For strings, or any Series with dtype 'O' + (pd.Series(['foo', 'bar', 'baz']), TypeError), + (pd.Series([(1,), (2,)]), TypeError), + + # For mixed data types + ( + pd.Series(['foo', 'foo', 'bar', 'bar', None, np.nan, 'baz']), + TypeError + ), + ] + ) + def test_assert_idxminmax_raises(self, test_input, error_type): + """ + Cases where ``Series.argmax`` and related should raise an exception + """ + with pytest.raises(error_type): + test_input.idxmin() + with pytest.raises(error_type): + test_input.idxmin(skipna=False) + with pytest.raises(error_type): + test_input.idxmax() + with pytest.raises(error_type): + test_input.idxmax(skipna=False) + + def test_idxminmax_with_inf(self): + # For numeric data with NA and Inf (GH #13595) + s = pd.Series([0, -np.inf, np.inf, np.nan]) + + assert s.idxmin() == 1 + assert np.isnan(s.idxmin(skipna=False)) + + assert s.idxmax() == 2 + assert np.isnan(s.idxmax(skipna=False)) + + # Using old-style behavior that treats floating point nan, -inf, and + # +inf as missing + with pd.option_context('mode.use_inf_as_na', True): + assert s.idxmin() == 0 + assert np.isnan(s.idxmin(skipna=False)) + assert s.idxmax() == 0 + np.isnan(s.idxmax(skipna=False)) + + +class TestSeriesOperationsDataFrameCompat(object): + def test_operators_frame(self): + # rpow does not work with DataFrame + ts = tm.makeTimeSeries() + ts.name = 'ts' + + df = DataFrame({'A': ts}) + + assert_series_equal(ts + ts, ts + df['A'], + check_names=False) + assert_series_equal(ts ** ts, ts ** df['A'], + check_names=False) + assert_series_equal(ts < ts, ts < df['A'], + check_names=False) + assert_series_equal(ts / ts, ts / df['A'], + check_names=False) + + def test_series_frame_radd_bug(self): + # GH#353 + vals = Series(tm.rands_array(5, 10)) + result = 'foo_' + vals + expected = vals.map(lambda x: 'foo_' + x) + assert_series_equal(result, expected) + + frame = DataFrame({'vals': vals}) + result = 'foo_' + frame + expected = DataFrame({'vals': vals.map(lambda x: 'foo_' + x)}) + assert_frame_equal(result, expected) + + ts = tm.makeTimeSeries() + ts.name = 'ts' + + # really raise this time + with pytest.raises(TypeError): + datetime.now() + ts + + with pytest.raises(TypeError): + ts + datetime.now() + + def test_bool_ops_df_compat(self): + # GH 1134 + s1 = pd.Series([True, False, True], index=list('ABC'), name='x') + s2 = pd.Series([True, True, False], index=list('ABD'), name='x') + + exp = pd.Series([True, False, False, False], + index=list('ABCD'), name='x') + assert_series_equal(s1 & s2, exp) + assert_series_equal(s2 & s1, exp) + + # True | np.nan => True + exp = pd.Series([True, True, True, False], + index=list('ABCD'), name='x') + assert_series_equal(s1 | s2, exp) + # np.nan | True => np.nan, filled with False + exp = pd.Series([True, True, False, False], + index=list('ABCD'), name='x') + assert_series_equal(s2 | s1, exp) + + # DataFrame doesn't fill nan with False + exp = pd.DataFrame({'x': [True, False, np.nan, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s1.to_frame() & s2.to_frame(), exp) + assert_frame_equal(s2.to_frame() & s1.to_frame(), exp) + + exp = pd.DataFrame({'x': [True, True, np.nan, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s1.to_frame() | s2.to_frame(), exp) + assert_frame_equal(s2.to_frame() | s1.to_frame(), exp) + + # different length + s3 = pd.Series([True, False, True], index=list('ABC'), name='x') + s4 = pd.Series([True, True, True, True], index=list('ABCD'), name='x') + + exp = pd.Series([True, False, True, False], + index=list('ABCD'), name='x') + assert_series_equal(s3 & s4, exp) + assert_series_equal(s4 & s3, exp) + + # np.nan | True => np.nan, filled with False + exp = pd.Series([True, True, True, False], + index=list('ABCD'), name='x') + assert_series_equal(s3 | s4, exp) + # True | np.nan => True + exp = pd.Series([True, True, True, True], + index=list('ABCD'), name='x') + assert_series_equal(s4 | s3, exp) + + exp = pd.DataFrame({'x': [True, False, True, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s3.to_frame() & s4.to_frame(), exp) + assert_frame_equal(s4.to_frame() & s3.to_frame(), exp) + + exp = pd.DataFrame({'x': [True, True, True, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s3.to_frame() | s4.to_frame(), exp) + assert_frame_equal(s4.to_frame() | s3.to_frame(), exp) + + def test_arith_ops_df_compat(self): + # GH#1134 + s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x') + s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x') + + exp = pd.Series([3.0, 4.0, np.nan, np.nan], + index=list('ABCD'), name='x') + assert_series_equal(s1 + s2, exp) + assert_series_equal(s2 + s1, exp) + + exp = pd.DataFrame({'x': [3.0, 4.0, np.nan, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s1.to_frame() + s2.to_frame(), exp) + assert_frame_equal(s2.to_frame() + s1.to_frame(), exp) + + # different length + s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') + s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') + + exp = pd.Series([3, 4, 5, np.nan], + index=list('ABCD'), name='x') + assert_series_equal(s3 + s4, exp) + assert_series_equal(s4 + s3, exp) + + exp = pd.DataFrame({'x': [3, 4, 5, np.nan]}, + index=list('ABCD')) + assert_frame_equal(s3.to_frame() + s4.to_frame(), exp) + assert_frame_equal(s4.to_frame() + s3.to_frame(), exp) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index f1ae7765648ca..8ff2071e351d0 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -2,43 +2,43 @@ import pandas as pd import pandas.util.testing as tm -import pandas.tseries.period as period -from pandas import Series, period_range, DataFrame, Period +import pandas.core.indexes.period as period +from pandas import Series, period_range, DataFrame def _permute(obj): return obj.take(np.random.permutation(len(obj))) -class TestSeriesPeriod(tm.TestCase): +class TestSeriesPeriod(object): - def setUp(self): + def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) def test_auto_conversion(self): series = Series(list(period_range('2000-01-01', periods=10, freq='D'))) - self.assertEqual(series.dtype, 'object') + assert series.dtype == 'object' series = pd.Series([pd.Period('2011-01-01', freq='D'), pd.Period('2011-02-01', freq='D')]) - self.assertEqual(series.dtype, 'object') + assert series.dtype == 'object' def test_getitem(self): - self.assertEqual(self.series[1], pd.Period('2000-01-02', freq='D')) + assert self.series[1] == pd.Period('2000-01-02', freq='D') result = self.series[[2, 4]] exp = pd.Series([pd.Period('2000-01-03', freq='D'), pd.Period('2000-01-05', freq='D')], index=[2, 4]) - self.assert_series_equal(result, exp) - self.assertEqual(result.dtype, 'object') + tm.assert_series_equal(result, exp) + assert result.dtype == 'object' - def test_isnull(self): + def test_isna(self): # GH 13737 s = Series([pd.Period('2011-01', freq='M'), pd.Period('NaT', freq='M')]) - tm.assert_series_equal(s.isnull(), Series([False, True])) - tm.assert_series_equal(s.notnull(), Series([True, False])) + tm.assert_series_equal(s.isna(), Series([False, True])) + tm.assert_series_equal(s.notna(), Series([True, False])) def test_fillna(self): # GH 13737 @@ -49,12 +49,12 @@ def test_fillna(self): exp = Series([pd.Period('2011-01', freq='M'), pd.Period('2012-01', freq='M')]) tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'object') + assert res.dtype == 'object' res = s.fillna('XXX') exp = Series([pd.Period('2011-01', freq='M'), 'XXX']) tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'object') + assert res.dtype == 'object' def test_dropna(self): # GH 13737 @@ -63,17 +63,6 @@ def test_dropna(self): tm.assert_series_equal(s.dropna(), Series([pd.Period('2011-01', freq='M')])) - def test_series_comparison_scalars(self): - val = pd.Period('2000-01-04', freq='D') - result = self.series > val - expected = pd.Series([x > val for x in self.series]) - tm.assert_series_equal(result, expected) - - val = self.series[5] - result = self.series > val - expected = pd.Series([x > val for x in self.series]) - tm.assert_series_equal(result, expected) - def test_between(self): left, right = self.series[[2, 7]] result = self.series.between(left, right) @@ -89,10 +78,10 @@ def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='period[D]') val = series[3] - self.assertTrue(isnull(val)) + assert isna(val) series[2] = val - self.assertTrue(isnull(series[2])) + assert isna(series[2]) def test_NaT_cast(self): result = Series([np.nan]).astype('period[D]') @@ -103,16 +92,16 @@ def test_NaT_cast(self): def test_set_none_nan(self): # currently Period is stored as object dtype, not as NaT self.series[3] = None - self.assertIs(self.series[3], None) + assert self.series[3] is None self.series[3:5] = None - self.assertIs(self.series[4], None) + assert self.series[4] is None self.series[5] = np.nan - self.assertTrue(np.isnan(self.series[5])) + assert np.isnan(self.series[5]) self.series[5:7] = np.nan - self.assertTrue(np.isnan(self.series[6])) + assert np.isnan(self.series[6]) def test_intercept_astype_object(self): expected = self.series.astype('object') @@ -121,112 +110,12 @@ def test_intercept_astype_object(self): 'b': np.random.randn(len(self.series))}) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - def test_comp_series_period_scalar(self): - # GH 13200 - for freq in ['M', '2M', '3M']: - base = Series([Period(x, freq=freq) for x in - ['2011-01', '2011-02', '2011-03', '2011-04']]) - p = Period('2011-02', freq=freq) - - exp = pd.Series([False, True, False, False]) - tm.assert_series_equal(base == p, exp) - tm.assert_series_equal(p == base, exp) - - exp = pd.Series([True, False, True, True]) - tm.assert_series_equal(base != p, exp) - tm.assert_series_equal(p != base, exp) - - exp = pd.Series([False, False, True, True]) - tm.assert_series_equal(base > p, exp) - tm.assert_series_equal(p < base, exp) - - exp = pd.Series([True, False, False, False]) - tm.assert_series_equal(base < p, exp) - tm.assert_series_equal(p > base, exp) - - exp = pd.Series([False, True, True, True]) - tm.assert_series_equal(base >= p, exp) - tm.assert_series_equal(p <= base, exp) - - exp = pd.Series([True, True, False, False]) - tm.assert_series_equal(base <= p, exp) - tm.assert_series_equal(p >= base, exp) - - # different base freq - msg = "Input has different freq=A-DEC from Period" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='A') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='A') >= base - - def test_comp_series_period_series(self): - # GH 13200 - for freq in ['M', '2M', '3M']: - base = Series([Period(x, freq=freq) for x in - ['2011-01', '2011-02', '2011-03', '2011-04']]) - - s = Series([Period(x, freq=freq) for x in - ['2011-02', '2011-01', '2011-03', '2011-05']]) - - exp = Series([False, False, True, False]) - tm.assert_series_equal(base == s, exp) - - exp = Series([True, True, False, True]) - tm.assert_series_equal(base != s, exp) - - exp = Series([False, True, False, False]) - tm.assert_series_equal(base > s, exp) - - exp = Series([True, False, False, True]) - tm.assert_series_equal(base < s, exp) - - exp = Series([False, True, True, False]) - tm.assert_series_equal(base >= s, exp) - - exp = Series([True, False, True, True]) - tm.assert_series_equal(base <= s, exp) - - s2 = Series([Period(x, freq='A') for x in - ['2011', '2011', '2011', '2011']]) - - # different base freq - msg = "Input has different freq=A-DEC from Period" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= s2 - - def test_comp_series_period_object(self): - # GH 13200 - base = Series([Period('2011', freq='A'), Period('2011-02', freq='M'), - Period('2013', freq='A'), Period('2011-04', freq='M')]) - - s = Series([Period('2012', freq='A'), Period('2011-01', freq='M'), - Period('2013', freq='A'), Period('2011-05', freq='M')]) - - exp = Series([False, False, True, False]) - tm.assert_series_equal(base == s, exp) - - exp = Series([True, True, False, True]) - tm.assert_series_equal(base != s, exp) - - exp = Series([False, True, False, False]) - tm.assert_series_equal(base > s, exp) - - exp = Series([True, False, False, True]) - tm.assert_series_equal(base < s, exp) - - exp = Series([False, True, True, False]) - tm.assert_series_equal(base >= s, exp) - - exp = Series([True, False, True, True]) - tm.assert_series_equal(base <= s, exp) + assert (result[:, 0] == expected.values).all() def test_align_series(self): rng = period_range('1/1/2000', '1/1/2010', freq='A') @@ -244,5 +133,34 @@ def test_align_series(self): for kind in ['inner', 'outer', 'left', 'right']: ts.align(ts[::2], join=kind) msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + with tm.assert_raises_regex(period.IncompatibleFrequency, msg): ts + ts.asfreq('D', how="end") + + def test_truncate(self): + # GH 17717 + idx1 = pd.PeriodIndex([ + pd.Period('2017-09-02'), + pd.Period('2017-09-02'), + pd.Period('2017-09-03') + ]) + series1 = pd.Series([1, 2, 3], index=idx1) + result1 = series1.truncate(after='2017-09-02') + + expected_idx1 = pd.PeriodIndex([ + pd.Period('2017-09-02'), + pd.Period('2017-09-02') + ]) + tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1)) + + idx2 = pd.PeriodIndex([ + pd.Period('2017-09-03'), + pd.Period('2017-09-02'), + pd.Period('2017-09-03') + ]) + series2 = pd.Series([1, 2, 3], index=idx2) + result2 = series2.sort_index().truncate(after='2017-09-02') + + expected_idx2 = pd.PeriodIndex([ + pd.Period('2017-09-02') + ]) + tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2)) diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index b8d1b92081858..3c93ff1d3f31e 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -1,59 +1,56 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import pytest import numpy as np import pandas as pd -from pandas import (Index, Series, _np_version_under1p9) -from pandas.tseries.index import Timestamp -from pandas.types.common import is_integer +from pandas import Index, Series +from pandas.core.indexes.datetimes import Timestamp +from pandas.core.dtypes.common import is_integer import pandas.util.testing as tm from .common import TestData -class TestSeriesQuantile(TestData, tm.TestCase): +class TestSeriesQuantile(TestData): def test_quantile(self): - from numpy import percentile q = self.ts.quantile(0.1) - self.assertEqual(q, percentile(self.ts.valid(), 10)) + assert q == np.percentile(self.ts.dropna(), 10) q = self.ts.quantile(0.9) - self.assertEqual(q, percentile(self.ts.valid(), 90)) + assert q == np.percentile(self.ts.dropna(), 90) # object dtype q = Series(self.ts, dtype=object).quantile(0.9) - self.assertEqual(q, percentile(self.ts.valid(), 90)) + assert q == np.percentile(self.ts.dropna(), 90) # datetime64[ns] dtype dts = self.ts.index.to_series() q = dts.quantile(.2) - self.assertEqual(q, Timestamp('2000-01-10 19:12:00')) + assert q == Timestamp('2000-01-10 19:12:00') # timedelta64[ns] dtype tds = dts.diff() q = tds.quantile(.25) - self.assertEqual(q, pd.to_timedelta('24:00:00')) + assert q == pd.to_timedelta('24:00:00') # GH7661 result = Series([np.timedelta64('NaT')]).sum() - self.assertTrue(result is pd.NaT) + assert result == pd.Timedelta(0) msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.ts.quantile(invalid) def test_quantile_multi(self): - from numpy import percentile qs = [.1, .9] result = self.ts.quantile(qs) - expected = pd.Series([percentile(self.ts.valid(), 10), - percentile(self.ts.valid(), 90)], + expected = pd.Series([np.percentile(self.ts.dropna(), 10), + np.percentile(self.ts.dropna(), 90)], index=qs, name=self.ts.name) tm.assert_series_equal(result, expected) @@ -71,59 +68,28 @@ def test_quantile_multi(self): tm.assert_series_equal(result, expected) def test_quantile_interpolation(self): - # GH #10174 - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") - - from numpy import percentile + # see gh-10174 # interpolation = linear (default case) q = self.ts.quantile(0.1, interpolation='linear') - self.assertEqual(q, percentile(self.ts.valid(), 10)) + assert q == np.percentile(self.ts.dropna(), 10) q1 = self.ts.quantile(0.1) - self.assertEqual(q1, percentile(self.ts.valid(), 10)) + assert q1 == np.percentile(self.ts.dropna(), 10) # test with and without interpolation keyword - self.assertEqual(q, q1) + assert q == q1 def test_quantile_interpolation_dtype(self): # GH #10174 - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") - - from numpy import percentile # interpolation = linear (default case) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower') - self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) - self.assertTrue(is_integer(q)) + assert q == np.percentile(np.array([1, 3, 4]), 50) + assert is_integer(q) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher') - self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) - self.assertTrue(is_integer(q)) - - def test_quantile_interpolation_np_lt_1p9(self): - # GH #10174 - if not _np_version_under1p9: - pytest.skip("Numpy version is greater than 1.9") - - from numpy import percentile - - # interpolation = linear (default case) - q = self.ts.quantile(0.1, interpolation='linear') - self.assertEqual(q, percentile(self.ts.valid(), 10)) - q1 = self.ts.quantile(0.1) - self.assertEqual(q1, percentile(self.ts.valid(), 10)) - - # interpolation other than linear - expErrMsg = "Interpolation methods other than " - with tm.assertRaisesRegexp(ValueError, expErrMsg): - self.ts.quantile(0.9, interpolation='nearest') - - # object dtype - with tm.assertRaisesRegexp(ValueError, expErrMsg): - q = Series(self.ts, dtype=object).quantile(0.7, - interpolation='higher') + assert q == np.percentile(np.array([1, 3, 4]), 50) + assert is_integer(q) def test_quantile_nan(self): @@ -131,14 +97,14 @@ def test_quantile_nan(self): s = pd.Series([1, 2, 3, 4, np.nan]) result = s.quantile(0.5) expected = 2.5 - self.assertEqual(result, expected) + assert result == expected # all nan/empty cases = [Series([]), Series([np.nan, np.nan])] for s in cases: res = s.quantile(0.5) - self.assertTrue(np.isnan(res)) + assert np.isnan(res) res = s.quantile([0.5]) tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5])) @@ -167,7 +133,7 @@ def test_quantile_box(self): for case in cases: s = pd.Series(case, name='XXX') res = s.quantile(0.5) - self.assertEqual(res, case[1]) + assert res == case[1] res = s.quantile([0.5]) exp = pd.Series([case[1]], index=[0.5], name='XXX') @@ -175,12 +141,12 @@ def test_quantile_box(self): def test_datetime_timedelta_quantiles(self): # covers #9694 - self.assertTrue(pd.isnull(Series([], dtype='M8[ns]').quantile(.5))) - self.assertTrue(pd.isnull(Series([], dtype='m8[ns]').quantile(.5))) + assert pd.isna(Series([], dtype='M8[ns]').quantile(.5)) + assert pd.isna(Series([], dtype='m8[ns]').quantile(.5)) def test_quantile_nat(self): res = Series([pd.NaT, pd.NaT]).quantile(0.5) - self.assertTrue(res is pd.NaT) + assert res is pd.NaT res = Series([pd.NaT, pd.NaT]).quantile([0.5]) tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5])) @@ -191,7 +157,7 @@ def test_quantile_empty(self): s = Series([], dtype='float64') res = s.quantile(0.5) - self.assertTrue(np.isnan(res)) + assert np.isnan(res) res = s.quantile([0.5]) exp = Series([np.nan], index=[0.5]) @@ -201,7 +167,7 @@ def test_quantile_empty(self): s = Series([], dtype='int64') res = s.quantile(0.5) - self.assertTrue(np.isnan(res)) + assert np.isnan(res) res = s.quantile([0.5]) exp = Series([np.nan], index=[0.5]) @@ -211,7 +177,7 @@ def test_quantile_empty(self): s = Series([], dtype='datetime64[ns]') res = s.quantile(0.5) - self.assertTrue(res is pd.NaT) + assert res is pd.NaT res = s.quantile([0.5]) exp = Series([pd.NaT], index=[0.5]) diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py new file mode 100644 index 0000000000000..d15325ca8ef0e --- /dev/null +++ b/pandas/tests/series/test_rank.py @@ -0,0 +1,471 @@ +# -*- coding: utf-8 -*- +from pandas import compat, Timestamp + +import pytest + +from distutils.version import LooseVersion +from numpy import nan +import numpy as np + +from pandas import Series, date_range, NaT +from pandas.api.types import CategoricalDtype + +from pandas.compat import product +from pandas.util.testing import assert_series_equal +import pandas.util.testing as tm +from pandas.tests.series.common import TestData +from pandas._libs.tslib import iNaT +from pandas._libs.algos import Infinity, NegInfinity + + +class TestSeriesRank(TestData): + s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) + + results = { + 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, + 3.5, 1.5, 8.0, nan, 5.5]), + 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), + 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), + 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), + 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), + } + + def test_rank(self): + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') + + self.ts[::2] = np.nan + self.ts[:10][::3] = 4. + + ranks = self.ts.rank() + oranks = self.ts.astype('O').rank() + + assert_series_equal(ranks, oranks) + + mask = np.isnan(self.ts) + filled = self.ts.fillna(np.inf) + + # rankdata returns a ndarray + exp = Series(rankdata(filled), index=filled.index, name='ts') + exp[mask] = np.nan + + tm.assert_series_equal(ranks, exp) + + iseries = Series(np.arange(5).repeat(2)) + + iranks = iseries.rank() + exp = iseries.astype(float).rank() + assert_series_equal(iranks, exp) + iseries = Series(np.arange(5)) + 1.0 + exp = iseries / 5.0 + iranks = iseries.rank(pct=True) + + assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(1, 100)) + exp = Series(np.repeat(0.505, 100)) + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries[1] = np.nan + exp = Series(np.repeat(50.0 / 99.0, 100)) + exp[1] = np.nan + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1.0 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(np.nan, 100)) + exp = iseries.copy() + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + rng = date_range('1/1/1990', periods=5) + iseries = Series(np.arange(5), rng) + 1 + iseries.iloc[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) + exp = Series([2, 1, 3, 5, 4, 6.0]) + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + # GH 5968 + iseries = Series(['3 day', '1 day 10m', '-2 day', NaT], + dtype='m8[ns]') + exp = Series([3, 2, 1, np.nan]) + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + values = np.array( + [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40 + ], dtype='float64') + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(random_order + 1.0, dtype='float64') + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + def test_rank_categorical(self): + # GH issue #15420 rank incorrectly orders ordered categories + + # Test ascending/descending ranking for ordered categoricals + exp = Series([1., 2., 3., 4., 5., 6.]) + exp_desc = Series([6., 5., 4., 3., 2., 1.]) + ordered = Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] + ).astype(CategoricalDtype(categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=True)) + assert_series_equal(ordered.rank(), exp) + assert_series_equal(ordered.rank(ascending=False), exp_desc) + + # Unordered categoricals should be ranked as objects + unordered = Series(['first', 'second', 'third', 'fourth', + 'fifth', 'sixth']).astype( + CategoricalDtype(categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=False)) + exp_unordered = Series([2., 4., 6., 3., 1., 5.]) + res = unordered.rank() + assert_series_equal(res, exp_unordered) + + unordered1 = Series( + [1, 2, 3, 4, 5, 6], + ).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False)) + exp_unordered1 = Series([1., 2., 3., 4., 5., 6.]) + res1 = unordered1.rank() + assert_series_equal(res1, exp_unordered1) + + # Test na_option for rank data + na_ser = Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] + ).astype(CategoricalDtype(['first', 'second', 'third', 'fourth', + 'fifth', 'sixth', 'seventh'], True)) + + exp_top = Series([2., 3., 4., 5., 6., 7., 1.]) + exp_bot = Series([1., 2., 3., 4., 5., 6., 7.]) + exp_keep = Series([1., 2., 3., 4., 5., 6., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top'), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep'), exp_keep) + + # Test na_option for rank data with ascending False + exp_top = Series([7., 6., 5., 4., 3., 2., 1.]) + exp_bot = Series([6., 5., 4., 3., 2., 1., 7.]) + exp_keep = Series([6., 5., 4., 3., 2., 1., np.NaN]) + + assert_series_equal( + na_ser.rank(na_option='top', ascending=False), + exp_top + ) + assert_series_equal( + na_ser.rank(na_option='bottom', ascending=False), + exp_bot + ) + assert_series_equal( + na_ser.rank(na_option='keep', ascending=False), + exp_keep + ) + + # Test with pct=True + na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype( + CategoricalDtype(['first', 'second', 'third', 'fourth'], True)) + exp_top = Series([0.4, 0.6, 0.8, 1., 0.2]) + exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.]) + exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep) + + def test_rank_signature(self): + s = Series([0, 1]) + s.rank(method='average') + pytest.raises(ValueError, s.rank, 'average') + + @pytest.mark.parametrize('contents,dtype', [ + ([-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, + 2, 40, np.inf], + 'float64'), + ([-np.inf, -50, -1, -1e-20, -1e-25, -1e-45, 0, 1e-40, 1e-20, 1e-10, + 2, 40, np.inf], + 'float32'), + ([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max], + 'uint8'), + pytest.param([np.iinfo(np.int64).min, -100, 0, 1, 9999, 100000, + 1e10, np.iinfo(np.int64).max], + 'int64', + marks=pytest.mark.xfail( + reason="iNaT is equivalent to minimum value of dtype" + "int64 pending issue #16674")), + ([NegInfinity(), '1', 'A', 'BA', 'Ba', 'C', Infinity()], + 'object') + ]) + def test_rank_inf(self, contents, dtype): + dtype_na_map = { + 'float64': np.nan, + 'float32': np.nan, + 'int64': iNaT, + 'object': None + } + # Insert nans at random positions if underlying dtype has missing + # value. Then adjust the expected order by adding nans accordingly + # This is for testing whether rank calculation is affected + # when values are interwined with nan values. + values = np.array(contents, dtype=dtype) + exp_order = np.array(range(len(values)), dtype='float64') + 1.0 + if dtype in dtype_na_map: + na_value = dtype_na_map[dtype] + nan_indices = np.random.choice(range(len(values)), 5) + values = np.insert(values, nan_indices, na_value) + exp_order = np.insert(exp_order, nan_indices, np.nan) + # shuffle the testing array and expected results in the same way + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(exp_order[random_order], dtype='float64') + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + def test_rank_tie_methods(self): + s = self.s + + def _check(s, expected, method='average'): + result = s.rank(method=method) + tm.assert_series_equal(result, Series(expected)) + + dtypes = [None, object] + disabled = set([(object, 'first')]) + results = self.results + + for method, dtype in product(results, dtypes): + if (dtype, method) in disabled: + continue + series = s if dtype is None else s.astype(dtype) + _check(series, results[method], method=method) + + def test_rank_tie_methods_on_infs_nans(self): + dtypes = [('object', None, Infinity(), NegInfinity()), + ('float64', np.nan, np.inf, -np.inf)] + chunk = 3 + disabled = set([('object', 'first')]) + + def _check(s, expected, method='average', na_option='keep'): + result = s.rank(method=method, na_option=na_option) + tm.assert_series_equal(result, Series(expected, dtype='float64')) + + exp_ranks = { + 'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]), + 'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]), + 'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]), + 'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]), + 'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3]) + } + na_options = ('top', 'bottom', 'keep') + for dtype, na_value, pos_inf, neg_inf in dtypes: + in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk + iseries = Series(in_arr, dtype=dtype) + for method, na_opt in product(exp_ranks.keys(), na_options): + ranks = exp_ranks[method] + if (dtype, method) in disabled: + continue + if na_opt == 'top': + order = ranks[1] + ranks[0] + ranks[2] + elif na_opt == 'bottom': + order = ranks[0] + ranks[2] + ranks[1] + else: + order = ranks[0] + [np.nan] * chunk + ranks[1] + _check(iseries, order, method, na_opt) + + def test_rank_methods_series(self): + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') + import scipy + + xs = np.random.randn(9) + xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates + np.random.shuffle(xs) + + index = [chr(ord('a') + i) for i in range(len(xs))] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + ts = Series(vals, index=index) + + for m in ['average', 'min', 'max', 'first', 'dense']: + result = ts.rank(method=m) + sprank = rankdata(vals, m if m != 'first' else 'ordinal') + expected = Series(sprank, index=index) + + if LooseVersion(scipy.__version__) >= LooseVersion('0.17.0'): + expected = expected.astype('float64') + tm.assert_series_equal(result, expected) + + def test_rank_dense_method(self): + dtypes = ['O', 'f8', 'i8'] + in_out = [([1], [1]), + ([2], [1]), + ([0], [1]), + ([2, 2], [1, 1]), + ([1, 2, 3], [1, 2, 3]), + ([4, 2, 1], [3, 2, 1],), + ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), + ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])] + + for ser, exp in in_out: + for dtype in dtypes: + s = Series(ser).astype(dtype) + result = s.rank(method='dense') + expected = Series(exp).astype(result.dtype) + assert_series_equal(result, expected) + + def test_rank_descending(self): + dtypes = ['O', 'f8', 'i8'] + + for dtype, method in product(dtypes, self.results): + if 'i' in dtype: + s = self.s.dropna() + else: + s = self.s.astype(dtype) + + res = s.rank(ascending=False) + expected = (s.max() - s).rank() + assert_series_equal(res, expected) + + if method == 'first' and dtype == 'O': + continue + + expected = (s.max() - s).rank(method=method) + res2 = s.rank(method=method, ascending=False) + assert_series_equal(res2, expected) + + def test_rank_int(self): + s = self.s.dropna().astype('i8') + + for method, res in compat.iteritems(self.results): + result = s.rank(method=method) + expected = Series(res).dropna() + expected.index = result.index + assert_series_equal(result, expected) + + def test_rank_object_bug(self): + # GH 13445 + + # smoke tests + Series([np.nan] * 32).astype(object).rank(ascending=True) + Series([np.nan] * 32).astype(object).rank(ascending=False) + + def test_rank_modify_inplace(self): + # GH 18521 + # Check rank does not mutate series + s = Series([Timestamp('2017-01-05 10:20:27.569000'), NaT]) + expected = s.copy() + + s.rank() + result = s + assert_series_equal(result, expected) + + +# GH15630, pct should be on 100% basis when method='dense' + +@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8']) +@pytest.mark.parametrize('ser, exp', [ + ([1], [1.]), + ([1, 2], [1. / 2, 2. / 2]), + ([2, 2], [1., 1.]), + ([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]), + ([1, 2, 2], [1. / 2, 2. / 2, 2. / 2]), + ([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],), + ([1, 1, 5, 5, 3], [1. / 3, 1. / 3, 3. / 3, 3. / 3, 2. / 3]), + ([1, 1, 3, 3, 5, 5], [1. / 3, 1. / 3, 2. / 3, 2. / 3, 3. / 3, 3. / 3]), + ([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])]) +def test_rank_dense_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method='dense', pct=True) + expected = Series(exp).astype(result.dtype) + assert_series_equal(result, expected) + + +@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8']) +@pytest.mark.parametrize('ser, exp', [ + ([1], [1.]), + ([1, 2], [1. / 2, 2. / 2]), + ([2, 2], [1. / 2, 1. / 2]), + ([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]), + ([1, 2, 2], [1. / 3, 2. / 3, 2. / 3]), + ([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],), + ([1, 1, 5, 5, 3], [1. / 5, 1. / 5, 4. / 5, 4. / 5, 3. / 5]), + ([1, 1, 3, 3, 5, 5], [1. / 6, 1. / 6, 3. / 6, 3. / 6, 5. / 6, 5. / 6]), + ([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])]) +def test_rank_min_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method='min', pct=True) + expected = Series(exp).astype(result.dtype) + assert_series_equal(result, expected) + + +@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8']) +@pytest.mark.parametrize('ser, exp', [ + ([1], [1.]), + ([1, 2], [1. / 2, 2. / 2]), + ([2, 2], [1., 1.]), + ([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]), + ([1, 2, 2], [1. / 3, 3. / 3, 3. / 3]), + ([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],), + ([1, 1, 5, 5, 3], [2. / 5, 2. / 5, 5. / 5, 5. / 5, 3. / 5]), + ([1, 1, 3, 3, 5, 5], [2. / 6, 2. / 6, 4. / 6, 4. / 6, 6. / 6, 6. / 6]), + ([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])]) +def test_rank_max_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method='max', pct=True) + expected = Series(exp).astype(result.dtype) + assert_series_equal(result, expected) + + +@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8']) +@pytest.mark.parametrize('ser, exp', [ + ([1], [1.]), + ([1, 2], [1. / 2, 2. / 2]), + ([2, 2], [1.5 / 2, 1.5 / 2]), + ([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]), + ([1, 2, 2], [1. / 3, 2.5 / 3, 2.5 / 3]), + ([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],), + ([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3. / 5]), + ([1, 1, 3, 3, 5, 5], + [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]), + ([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])]) +def test_rank_average_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method='average', pct=True) + expected = Series(exp).astype(result.dtype) + assert_series_equal(result, expected) + + +@pytest.mark.parametrize('dtype', ['f8', 'i8']) +@pytest.mark.parametrize('ser, exp', [ + ([1], [1.]), + ([1, 2], [1. / 2, 2. / 2]), + ([2, 2], [1. / 2, 2. / 2.]), + ([1, 2, 3], [1. / 3, 2. / 3, 3. / 3]), + ([1, 2, 2], [1. / 3, 2. / 3, 3. / 3]), + ([4, 2, 1], [3. / 3, 2. / 3, 1. / 3],), + ([1, 1, 5, 5, 3], [1. / 5, 2. / 5, 4. / 5, 5. / 5, 3. / 5]), + ([1, 1, 3, 3, 5, 5], [1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6, 6. / 6]), + ([-5, -4, -3, -2, -1], [1. / 5, 2. / 5, 3. / 5, 4. / 5, 5. / 5])]) +def test_rank_first_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method='first', pct=True) + expected = Series(exp).astype(result.dtype) + assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 7fe31bab87537..2c07d87865f53 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -1,16 +1,17 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + import numpy as np import pandas as pd -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm from .common import TestData -class TestSeriesReplace(TestData, tm.TestCase): - +class TestSeriesReplace(TestData): def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) @@ -36,18 +37,18 @@ def test_replace(self): # replace list with a single value rs = ser.replace([np.nan, 'foo', 'bar'], -1) - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -1).all()) - self.assertTrue((rs[20:30] == -1).all()) - self.assertTrue((pd.isnull(ser[:5])).all()) + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isna(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -2).all()) - self.assertTrue((rs[20:30] == -3).all()) - self.assertTrue((pd.isnull(ser[:5])).all()) + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isna(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) @@ -56,9 +57,9 @@ def test_replace(self): # replace inplace ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) - self.assertTrue((ser[:5] == -1).all()) - self.assertTrue((ser[6:10] == -1).all()) - self.assertTrue((ser[20:30] == -1).all()) + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() ser = pd.Series([np.nan, 0, np.inf]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) @@ -73,11 +74,11 @@ def test_replace(self): tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) # malformed - self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) + pytest.raises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) # make sure that we aren't just masking a TypeError because bools don't # implement indexing - with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): + with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): ser.replace([1, 2], [np.nan, 0]) ser = pd.Series([0, 1, 2, 3, 4]) @@ -118,7 +119,7 @@ def test_replace_with_single_list(self): # make sure things don't get corrupted when fillna call fails s = ser.copy() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): s.replace([1, 2, 3], inplace=True, method='crash_cymbal') tm.assert_series_equal(s, ser) @@ -132,8 +133,8 @@ def check_replace(to_rep, val, expected): tm.assert_series_equal(expected, r) tm.assert_series_equal(expected, sc) - # should NOT upcast to float - e = pd.Series([0, 1, 2, 3, 4]) + # MUST upcast to float + e = pd.Series([0., 1., 2., 3., 4.]) tr, v = [3], [3.0] check_replace(tr, v, e) @@ -152,8 +153,8 @@ def check_replace(to_rep, val, expected): tr, v = [3, 4], [3.5, pd.Timestamp('20130101')] check_replace(tr, v, e) - # casts to float - e = pd.Series([0, 1, 2, 3.5, 1]) + # casts to object + e = pd.Series([0, 1, 2, 3.5, True], dtype='object') tr, v = [3, 4], [3.5, True] check_replace(tr, v, e) @@ -185,7 +186,7 @@ def test_replace_bool_with_bool(self): def test_replace_with_dict_with_bool_keys(self): s = pd.Series([True, False, True]) - with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'): + with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): s.replace({'asdf': 'asdb', True: 'yes'}) def test_replace2(self): @@ -199,18 +200,18 @@ def test_replace2(self): # replace list with a single value rs = ser.replace([np.nan, 'foo', 'bar'], -1) - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -1).all()) - self.assertTrue((rs[20:30] == -1).all()) - self.assertTrue((pd.isnull(ser[:5])).all()) + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isna(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3}) - self.assertTrue((rs[:5] == -1).all()) - self.assertTrue((rs[6:10] == -2).all()) - self.assertTrue((rs[20:30] == -3).all()) - self.assertTrue((pd.isnull(ser[:5])).all()) + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isna(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3]) @@ -218,12 +219,33 @@ def test_replace2(self): # replace inplace ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True) - self.assertTrue((ser[:5] == -1).all()) - self.assertTrue((ser[6:10] == -1).all()) - self.assertTrue((ser[20:30] == -1).all()) + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() def test_replace_with_empty_dictlike(self): # GH 15289 s = pd.Series(list('abcd')) tm.assert_series_equal(s, s.replace(dict())) tm.assert_series_equal(s, s.replace(pd.Series([]))) + + def test_replace_string_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace('2', np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_unicode_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace(u'2', np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_mixed_types_with_string(self): + # Testing mixed + s = pd.Series([1, 2, 3, '4', 4, 5]) + result = s.replace([2, '4'], np.nan) + expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) + tm.assert_series_equal(expected, result) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 99a406a71b12b..97236f028b1c4 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -3,20 +3,23 @@ from datetime import datetime, timedelta +import sys + import numpy as np import pandas as pd -from pandas import (Index, Series, DataFrame, date_range) +from pandas import (Index, Series, DataFrame, date_range, option_context, + Categorical, period_range, timedelta_range) from pandas.core.index import MultiIndex -from pandas.compat import StringIO, lrange, range, u +from pandas.compat import lrange, range, u from pandas import compat import pandas.util.testing as tm from .common import TestData -class TestSeriesRepr(TestData, tm.TestCase): +class TestSeriesRepr(TestData): def test_multilevel_name_print(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', @@ -32,24 +35,29 @@ def test_multilevel_name_print(self): "qux one 7", " two 8", " three 9", "Name: sth, dtype: int64"] expected = "\n".join(expected) - self.assertEqual(repr(s), expected) + assert repr(s) == expected def test_name_printing(self): - # test small series + # Test small Series. s = Series([0, 1, 2]) + s.name = "test" - self.assertIn("Name: test", repr(s)) + assert "Name: test" in repr(s) + s.name = None - self.assertNotIn("Name:", repr(s)) - # test big series (diff code path) + assert "Name:" not in repr(s) + + # Test big Series (diff code path). s = Series(lrange(0, 1000)) + s.name = "test" - self.assertIn("Name: test", repr(s)) + assert "Name: test" in repr(s) + s.name = None - self.assertNotIn("Name:", repr(s)) + assert "Name:" not in repr(s) s = Series(index=date_range('20010101', '20020101'), name='test') - self.assertIn("Name: test", repr(s)) + assert "Name: test" in repr(s) def test_repr(self): str(self.ts) @@ -88,44 +96,39 @@ def test_repr(self): # 0 as name ser = Series(np.random.randn(100), name=0) rep_str = repr(ser) - self.assertIn("Name: 0", rep_str) + assert "Name: 0" in rep_str # tidy repr ser = Series(np.random.randn(1001), name=0) rep_str = repr(ser) - self.assertIn("Name: 0", rep_str) + assert "Name: 0" in rep_str ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) - self.assertFalse("\t" in repr(ser)) - self.assertFalse("\r" in repr(ser)) - self.assertFalse("a\n" in repr(ser)) + assert "\t" not in repr(ser) + assert "\r" not in repr(ser) + assert "a\n" not in repr(ser) # with empty series (#4651) s = Series([], dtype=np.int64, name='foo') - self.assertEqual(repr(s), 'Series([], Name: foo, dtype: int64)') + assert repr(s) == 'Series([], Name: foo, dtype: int64)' s = Series([], dtype=np.int64, name=None) - self.assertEqual(repr(s), 'Series([], dtype: int64)') + assert repr(s) == 'Series([], dtype: int64)' def test_tidy_repr(self): a = Series([u("\u05d0")] * 1000) a.name = 'title1' repr(a) # should not raise exception + @tm.capture_stderr def test_repr_bool_fails(self): s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)]) - import sys + # It works (with no Cython exception barf)! + repr(s) - buf = StringIO() - tmp = sys.stderr - sys.stderr = buf - try: - # it works (with no Cython exception barf)! - repr(s) - finally: - sys.stderr = tmp - self.assertEqual(buf.getvalue(), '') + output = sys.stderr.getvalue() + assert output == '' def test_repr_name_iterable_indexable(self): s = Series([1, 2, 3], name=np.int64(3)) @@ -137,8 +140,7 @@ def test_repr_name_iterable_indexable(self): repr(s) def test_repr_should_return_str(self): - # http://docs.python.org/py3k/reference/datamodel.html#object.__repr__ - # http://docs.python.org/reference/datamodel.html#object.__repr__ + # https://docs.python.org/3/reference/datamodel.html#object.__repr__ # ...The return value must be a string object. # (str on py2.x, str (unicode) on py3) @@ -146,7 +148,7 @@ def test_repr_should_return_str(self): data = [8, 5, 3, 5] index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] df = Series(data, index=index1) - self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 + assert type(df.__repr__() == str) # both py2 / 3 def test_repr_max_rows(self): # GH 6863 @@ -174,7 +176,273 @@ def test_timeseries_repr_object_dtype(self): repr(ts) ts = tm.makeTimeSeries(1000) - self.assertTrue(repr(ts).splitlines()[-1].startswith('Freq:')) + assert repr(ts).splitlines()[-1].startswith('Freq:') ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)] repr(ts2).splitlines()[-1] + + def test_latex_repr(self): + result = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & $\alpha$ \\ +1 & b \\ +2 & c \\ +\bottomrule +\end{tabular} +""" + with option_context('display.latex.escape', False, + 'display.latex.repr', True): + s = Series([r'$\alpha$', 'b', 'c']) + assert result == s._repr_latex_() + + assert s._repr_latex_() is None + + +class TestCategoricalRepr(object): + + def test_categorical_repr(self): + a = Series(Categorical([1, 2, 3, 4])) + exp = u("0 1\n1 2\n2 3\n3 4\n" + + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") + + assert exp == a.__unicode__() + + a = Series(Categorical(["a", "b"] * 25)) + exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + + "Length: 50, dtype: category\nCategories (2, object): [a, b]") + with option_context("display.max_rows", 5): + assert exp == repr(a) + + levs = list("abcdefghijklmnopqrstuvwxyz") + a = Series(Categorical(["a", "b"], categories=levs, ordered=True)) + exp = u("0 a\n1 b\n" + "dtype: category\n" + "Categories (26, object): [a < b < c < d ... w < x < y < z]") + assert exp == a.__unicode__() + + def test_categorical_series_repr(self): + s = Series(Categorical([1, 2, 3])) + exp = """0 1 +1 2 +2 3 +dtype: category +Categories (3, int64): [1, 2, 3]""" + + assert repr(s) == exp + + s = Series(Categorical(np.arange(10))) + exp = """0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +dtype: category +Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]""" + + assert repr(s) == exp + + def test_categorical_series_repr_ordered(self): + s = Series(Categorical([1, 2, 3], ordered=True)) + exp = """0 1 +1 2 +2 3 +dtype: category +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(s) == exp + + s = Series(Categorical(np.arange(10), ordered=True)) + exp = """0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +dtype: category +Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]""" + + assert repr(s) == exp + + def test_categorical_series_repr_datetime(self): + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00:00 +1 2011-01-01 10:00:00 +2 2011-01-01 11:00:00 +3 2011-01-01 12:00:00 +4 2011-01-01 13:00:00 +dtype: category +Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, + 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa + + assert repr(s) == exp + + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 2011-01-01 11:00:00-05:00 +3 2011-01-01 12:00:00-05:00 +4 2011-01-01 13:00:00-05:00 +dtype: category +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, + 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, + 2011-01-01 13:00:00-05:00]""" # noqa + + assert repr(s) == exp + + def test_categorical_series_repr_datetime_ordered(self): + idx = date_range('2011-01-01 09:00', freq='H', periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00:00 +1 2011-01-01 10:00:00 +2 2011-01-01 11:00:00 +3 2011-01-01 12:00:00 +4 2011-01-01 13:00:00 +dtype: category +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + + assert repr(s) == exp + + idx = date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 2011-01-01 11:00:00-05:00 +3 2011-01-01 12:00:00-05:00 +4 2011-01-01 13:00:00-05:00 +dtype: category +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa + + assert repr(s) == exp + + def test_categorical_series_repr_period(self): + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00 +1 2011-01-01 10:00 +2 2011-01-01 11:00 +3 2011-01-01 12:00 +4 2011-01-01 13:00 +dtype: category +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa + + assert repr(s) == exp + + idx = period_range('2011-01', freq='M', periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01 +1 2011-02 +2 2011-03 +3 2011-04 +4 2011-05 +dtype: category +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + + assert repr(s) == exp + + def test_categorical_series_repr_period_ordered(self): + idx = period_range('2011-01-01 09:00', freq='H', periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00 +1 2011-01-01 10:00 +2 2011-01-01 11:00 +3 2011-01-01 12:00 +4 2011-01-01 13:00 +dtype: category +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa + + assert repr(s) == exp + + idx = period_range('2011-01', freq='M', periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01 +1 2011-02 +2 2011-03 +3 2011-04 +4 2011-05 +dtype: category +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + + assert repr(s) == exp + + def test_categorical_series_repr_timedelta(self): + idx = timedelta_range('1 days', periods=5) + s = Series(Categorical(idx)) + exp = """0 1 days +1 2 days +2 3 days +3 4 days +4 5 days +dtype: category +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + + assert repr(s) == exp + + idx = timedelta_range('1 hours', periods=10) + s = Series(Categorical(idx)) + exp = """0 0 days 01:00:00 +1 1 days 01:00:00 +2 2 days 01:00:00 +3 3 days 01:00:00 +4 4 days 01:00:00 +5 5 days 01:00:00 +6 6 days 01:00:00 +7 7 days 01:00:00 +8 8 days 01:00:00 +9 9 days 01:00:00 +dtype: category +Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, + 8 days 01:00:00, 9 days 01:00:00]""" # noqa + + assert repr(s) == exp + + def test_categorical_series_repr_timedelta_ordered(self): + idx = timedelta_range('1 days', periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 1 days +1 2 days +2 3 days +3 4 days +4 5 days +dtype: category +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa + + assert repr(s) == exp + + idx = timedelta_range('1 hours', periods=10) + s = Series(Categorical(idx, ordered=True)) + exp = """0 0 days 01:00:00 +1 1 days 01:00:00 +2 2 days 01:00:00 +3 3 days 01:00:00 +4 4 days 01:00:00 +5 5 days 01:00:00 +6 6 days 01:00:00 +7 7 days 01:00:00 +8 8 days 01:00:00 +9 9 days 01:00:00 +dtype: category +Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < + 8 days 01:00:00 < 9 days 01:00:00]""" # noqa + + assert repr(s) == exp diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index db506f12a2293..01b4ea6eaa238 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -1,56 +1,47 @@ # coding=utf-8 +import pytest + import numpy as np import random -from pandas import (DataFrame, Series, MultiIndex) +from pandas import DataFrame, Series, MultiIndex, IntervalIndex, Categorical -from pandas.util.testing import (assert_series_equal, assert_almost_equal) +from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm from .common import TestData -class TestSeriesSorting(TestData, tm.TestCase): - - def test_sort(self): +class TestSeriesSorting(TestData): + def test_sortlevel_deprecated(self): ts = self.ts.copy() - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ts.sort() # sorts inplace - self.assert_series_equal(ts, self.ts.sort_values()) + # see gh-9816 with tm.assert_produces_warning(FutureWarning): ts.sortlevel() - def test_order(self): - - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - result = self.ts.order() - self.assert_series_equal(result, self.ts.sort_values()) - def test_sort_values(self): # check indexes are reordered corresponding with the values ser = Series([3, 2, 4, 1], ['A', 'B', 'C', 'D']) expected = Series([1, 2, 3, 4], ['D', 'B', 'A', 'C']) result = ser.sort_values() - self.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result) ts = self.ts.copy() ts[:5] = np.NaN vals = ts.values result = ts.sort_values() - self.assertTrue(np.isnan(result[-5:]).all()) - self.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) + assert np.isnan(result[-5:]).all() + tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) # na_position result = ts.sort_values(na_position='first') - self.assertTrue(np.isnan(result[:5]).all()) - self.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) + assert np.isnan(result[:5]).all() + tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) # something object-type ser = Series(['A', 'B'], [1, 2]) @@ -59,17 +50,36 @@ def test_sort_values(self): # ascending=False ordered = ts.sort_values(ascending=False) - expected = np.sort(ts.valid().values)[::-1] - assert_almost_equal(expected, ordered.valid().values) + expected = np.sort(ts.dropna().values)[::-1] + assert_almost_equal(expected, ordered.dropna().values) ordered = ts.sort_values(ascending=False, na_position='first') - assert_almost_equal(expected, ordered.valid().values) + assert_almost_equal(expected, ordered.dropna().values) + + # ascending=[False] should behave the same as ascending=False + ordered = ts.sort_values(ascending=[False]) + expected = ts.sort_values(ascending=False) + assert_series_equal(expected, ordered) + ordered = ts.sort_values(ascending=[False], na_position='first') + expected = ts.sort_values(ascending=False, na_position='first') + assert_series_equal(expected, ordered) + + pytest.raises(ValueError, + lambda: ts.sort_values(ascending=None)) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending=[])) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending=[1, 2, 3])) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending=[False, False])) + pytest.raises(ValueError, + lambda: ts.sort_values(ascending='foobar')) # inplace=True ts = self.ts.copy() ts.sort_values(ascending=False, inplace=True) - self.assert_series_equal(ts, self.ts.sort_values(ascending=False)) - self.assert_index_equal(ts.index, - self.ts.sort_values(ascending=False).index) + tm.assert_series_equal(ts, self.ts.sort_values(ascending=False)) + tm.assert_index_equal(ts.index, + self.ts.sort_values(ascending=False).index) # GH 5856/5853 # Series.sort_values operating on a view @@ -79,7 +89,7 @@ def test_sort_values(self): def f(): s.sort_values(inplace=True) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_sort_index(self): rindex = list(self.ts.index) @@ -102,13 +112,13 @@ def test_sort_index(self): sorted_series = random_order.sort_index(axis=0) assert_series_equal(sorted_series, self.ts) - self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) + pytest.raises(ValueError, lambda: random_order.sort_values(axis=1)) sorted_series = random_order.sort_index(level=0, axis=0) assert_series_equal(sorted_series, self.ts) - self.assertRaises(ValueError, - lambda: random_order.sort_index(level=0, axis=1)) + pytest.raises(ValueError, + lambda: random_order.sort_index(level=0, axis=1)) def test_sort_index_inplace(self): @@ -119,16 +129,17 @@ def test_sort_index_inplace(self): # descending random_order = self.ts.reindex(rindex) result = random_order.sort_index(ascending=False, inplace=True) - self.assertIs(result, None, - msg='sort_index() inplace should return None') - assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1])) + + assert result is None + tm.assert_series_equal(random_order, self.ts.reindex( + self.ts.index[::-1])) # ascending random_order = self.ts.reindex(rindex) result = random_order.sort_index(ascending=True, inplace=True) - self.assertIs(result, None, - msg='sort_index() inplace should return None') - assert_series_equal(random_order, self.ts) + + assert result is None + tm.assert_series_equal(random_order, self.ts) def test_sort_index_multiindex(self): @@ -169,3 +180,87 @@ def test_sort_index_na_position(self): expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan]) index_sorted_series = series.sort_index(na_position='last') assert_series_equal(expected_series_last, index_sorted_series) + + def test_sort_index_intervals(self): + s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays( + [0, 1, 2, 3], + [1, 2, 3, 4])) + + result = s.sort_index() + expected = s + assert_series_equal(result, expected) + + result = s.sort_index(ascending=False) + expected = Series([3, 2, 1, np.nan], IntervalIndex.from_arrays( + [3, 2, 1, 0], + [4, 3, 2, 1])) + assert_series_equal(result, expected) + + def test_sort_values_categorical(self): + + c = Categorical(["a", "b", "b", "a"], ordered=False) + cat = Series(c.copy()) + + # sort in the categories order + expected = Series( + Categorical(["a", "a", "b", "b"], + ordered=False), index=[0, 3, 1, 2]) + result = cat.sort_values() + tm.assert_series_equal(result, expected) + + cat = Series(Categorical(["a", "c", "b", "d"], ordered=True)) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + cat = Series(Categorical(["a", "c", "b", "d"], categories=[ + "a", "b", "c", "d"], ordered=True)) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + res = cat.sort_values(ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + raw_cat1 = Categorical(["a", "b", "c", "d"], + categories=["a", "b", "c", "d"], ordered=False) + raw_cat2 = Categorical(["a", "b", "c", "d"], + categories=["d", "c", "b", "a"], ordered=True) + s = ["a", "b", "c", "d"] + df = DataFrame({"unsort": raw_cat1, + "sort": raw_cat2, + "string": s, + "values": [1, 2, 3, 4]}) + + # Cats must be sorted in a dataframe + res = df.sort_values(by=["string"], ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp) + assert res["sort"].dtype == "category" + + res = df.sort_values(by=["sort"], ascending=False) + exp = df.sort_values(by=["string"], ascending=True) + tm.assert_series_equal(res["values"], exp["values"]) + assert res["sort"].dtype == "category" + assert res["unsort"].dtype == "category" + + # unordered cat, but we allow this + df.sort_values(by=["unsort"], ascending=False) + + # multi-columns sort + # GH 7848 + df = DataFrame({"id": [6, 5, 4, 3, 2, 1], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) + df["grade"] = Categorical(df["raw_grade"], ordered=True) + df['grade'] = df['grade'].cat.set_categories(['b', 'e', 'a']) + + # sorts 'grade' according to the order of the categories + result = df.sort_values(by=['grade']) + expected = df.iloc[[1, 2, 5, 0, 3, 4]] + tm.assert_frame_equal(result, expected) + + # multi + result = df.sort_values(by=['grade', 'id']) + expected = df.iloc[[2, 1, 5, 4, 3, 0]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 3b1b8aca426e1..60afaa3b821e1 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -6,63 +6,70 @@ import pandas.util.testing as tm -class TestSeriesSubclassing(tm.TestCase): +class TestSeriesSubclassing(object): def test_indexing_sliced(self): s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd')) res = s.loc[['a', 'b']] exp = tm.SubclassedSeries([1, 2], index=list('ab')) tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) res = s.iloc[[2, 3]] exp = tm.SubclassedSeries([3, 4], index=list('cd')) tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) res = s.loc[['a', 'b']] exp = tm.SubclassedSeries([1, 2], index=list('ab')) tm.assert_series_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedSeries) def test_to_frame(self): s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'), name='xxx') res = s.to_frame() exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd')) tm.assert_frame_equal(res, exp) - tm.assertIsInstance(res, tm.SubclassedDataFrame) + def test_subclass_unstack(self): + # GH 15564 + s = tm.SubclassedSeries( + [1, 2, 3, 4], index=[list('aabb'), list('xyxy')]) -class TestSparseSeriesSubclassing(tm.TestCase): + res = s.unstack() + exp = tm.SubclassedDataFrame( + {'x': [1, 3], 'y': [2, 4]}, index=['a', 'b']) + + tm.assert_frame_equal(res, exp) + + +class TestSparseSeriesSubclassing(object): def test_subclass_sparse_slice(self): # int64 s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) - self.assertEqual(s.loc[1:3].dtype, np.int64) + assert s.loc[1:3].dtype == np.int64 exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) - self.assertEqual(s.iloc[1:3].dtype, np.int64) + assert s.iloc[1:3].dtype == np.int64 exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) - self.assertEqual(s[1:3].dtype, np.int64) + assert s[1:3].dtype == np.int64 # float64 s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.]) exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) - self.assertEqual(s.loc[1:3].dtype, np.float64) + assert s.loc[1:3].dtype == np.float64 exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) - self.assertEqual(s.iloc[1:3].dtype, np.float64) + assert s.iloc[1:3].dtype == np.float64 exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) - self.assertEqual(s[1:3].dtype, np.float64) + assert s[1:3].dtype == np.float64 def test_subclass_sparse_addition(self): s1 = tm.SubclassedSparseSeries([1, 3, 5]) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index e0db813e60c14..baf2619c7b022 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,21 +1,26 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import pytest + import numpy as np from datetime import datetime, timedelta, time import pandas as pd import pandas.util.testing as tm -from pandas.tslib import iNaT +import pandas.util._test_decorators as td +from pandas._libs.tslib import iNaT from pandas.compat import lrange, StringIO, product -from pandas.tseries.tdi import TimedeltaIndex -from pandas.tseries.index import DatetimeIndex +from pandas.errors import NullFrequencyError + +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.indexes.datetimes import DatetimeIndex from pandas.tseries.offsets import BDay, BMonthEnd from pandas import (Index, Series, date_range, NaT, concat, DataFrame, Timestamp, to_datetime, offsets, timedelta_range) from pandas.util.testing import (assert_series_equal, assert_almost_equal, - assert_frame_equal, _skip_if_has_locale) + assert_frame_equal) from pandas.tests.series.common import TestData @@ -31,7 +36,7 @@ def assert_range_equal(left, right): assert (left.tz == right.tz) -class TestTimeSeries(TestData, tm.TestCase): +class TestTimeSeries(TestData): def test_shift(self): shifted = self.ts.shift(1) @@ -39,7 +44,7 @@ def test_shift(self): tm.assert_index_equal(shifted.index, self.ts.index) tm.assert_index_equal(unshifted.index, self.ts.index) - tm.assert_numpy_array_equal(unshifted.valid().values, + tm.assert_numpy_array_equal(unshifted.dropna().values, self.ts.values[:-1]) offset = BDay() @@ -66,14 +71,14 @@ def test_shift(self): unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) - tm.assert_numpy_array_equal(unshifted.valid().values, ps.values[:-1]) + tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, BDay()) assert_series_equal(shifted2, shifted3) assert_series_equal(ps, shifted2.shift(-1, 'B')) - self.assertRaises(ValueError, ps.shift, freq='D') + pytest.raises(ValueError, ps.shift, freq='D') # legacy support shifted4 = ps.shift(1, freq='B') @@ -104,7 +109,7 @@ def test_shift(self): # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') - self.assertRaises(ValueError, lambda: s - s2) + pytest.raises(TypeError, lambda: s - s2) def test_shift2(self): ts = Series(np.random.randn(5), @@ -120,7 +125,7 @@ def test_shift2(self): tm.assert_index_equal(result.index, exp_index) idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.shift, 1) + pytest.raises(NullFrequencyError, idx.shift, 1) def test_shift_dst(self): # GH 13926 @@ -129,25 +134,25 @@ def test_shift_dst(self): res = s.shift(0) tm.assert_series_equal(res, s) - self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') + assert res.dtype == 'datetime64[ns, US/Eastern]' res = s.shift(1) - exp_vals = [NaT] + dates.asobject.values.tolist()[:9] + exp_vals = [NaT] + dates.astype(object).values.tolist()[:9] exp = Series(exp_vals) tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') + assert res.dtype == 'datetime64[ns, US/Eastern]' res = s.shift(-2) - exp_vals = dates.asobject.values.tolist()[2:] + [NaT, NaT] + exp_vals = dates.astype(object).values.tolist()[2:] + [NaT, NaT] exp = Series(exp_vals) tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') + assert res.dtype == 'datetime64[ns, US/Eastern]' for ex in [10, -10, 20, -20]: res = s.shift(ex) exp = Series([NaT] * 10, dtype='datetime64[ns, US/Eastern]') tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') + assert res.dtype == 'datetime64[ns, US/Eastern]' def test_tshift(self): # PeriodIndex @@ -163,7 +168,7 @@ def test_tshift(self): shifted3 = ps.tshift(freq=BDay()) assert_series_equal(shifted, shifted3) - self.assertRaises(ValueError, ps.tshift, freq='M') + pytest.raises(ValueError, ps.tshift, freq='M') # DatetimeIndex shifted = self.ts.tshift(1) @@ -182,7 +187,7 @@ def test_tshift(self): assert_series_equal(unshifted, inferred_ts) no_freq = self.ts[[0, 5, 7]] - self.assertRaises(ValueError, no_freq.tshift) + pytest.raises(ValueError, no_freq.tshift) def test_truncate(self): offset = BDay() @@ -230,9 +235,25 @@ def test_truncate(self): truncated = ts.truncate(before=self.ts.index[-1] + offset) assert (len(truncated) == 0) - self.assertRaises(ValueError, ts.truncate, - before=self.ts.index[-1] + offset, - after=self.ts.index[0] - offset) + pytest.raises(ValueError, ts.truncate, + before=self.ts.index[-1] + offset, + after=self.ts.index[0] - offset) + + def test_truncate_nonsortedindex(self): + # GH 17935 + + s = pd.Series(['a', 'b', 'c', 'd', 'e'], + index=[5, 3, 2, 9, 0]) + with tm.assert_raises_regex(ValueError, + 'truncate requires a sorted index'): + s.truncate(before=3, after=9) + + rng = pd.date_range('2011-01-01', '2012-01-01', freq='W') + ts = pd.Series(np.random.randn(len(rng)), index=rng) + with tm.assert_raises_regex(ValueError, + 'truncate requires a sorted index'): + ts.sort_values(ascending=False).truncate(before='2011-11', + after='2011-12') def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime( @@ -240,25 +261,33 @@ def test_asfreq(self): daily_ts = ts.asfreq('B') monthly_ts = daily_ts.asfreq('BM') - assert_series_equal(monthly_ts, ts) + tm.assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq('B', method='pad') monthly_ts = daily_ts.asfreq('BM') - assert_series_equal(monthly_ts, ts) + tm.assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq(BDay()) monthly_ts = daily_ts.asfreq(BMonthEnd()) - assert_series_equal(monthly_ts, ts) + tm.assert_series_equal(monthly_ts, ts) result = ts[:0].asfreq('M') - self.assertEqual(len(result), 0) - self.assertIsNot(result, ts) + assert len(result) == 0 + assert result is not ts daily_ts = ts.asfreq('D', fill_value=-1) result = daily_ts.value_counts().sort_index() expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + def test_asfreq_datetimeindex_empty_series(self): + # GH 14320 + expected = Series(index=pd.DatetimeIndex( + ["2016-09-29 11:00"])).asfreq('H') + result = Series(index=pd.DatetimeIndex(["2016-09-29 11:00"]), + data=[3]).asfreq('H') + tm.assert_index_equal(expected.index, result.index) def test_diff(self): # Just run the function @@ -270,7 +299,7 @@ def test_diff(self): s = Series([a, b]) rs = s.diff() - self.assertEqual(rs[1], 1) + assert rs[1] == 1 # neg n rs = self.ts.diff(-1) @@ -315,15 +344,43 @@ def test_pct_change(self): rs = self.ts.pct_change(freq='5D') filled = self.ts.fillna(method='pad') - assert_series_equal(rs, filled / filled.shift(freq='5D') - 1) + assert_series_equal(rs, + (filled / filled.shift(freq='5D') - 1) + .reindex_like(filled)) def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) chg = s.pct_change() - expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) + expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2]) assert_series_equal(chg, expected) + @pytest.mark.parametrize("freq, periods, fill_method, limit", + [('5B', 5, None, None), + ('3B', 3, None, None), + ('3B', 3, 'bfill', None), + ('7B', 7, 'pad', 1), + ('7B', 7, 'bfill', 3), + ('14B', 14, None, None)]) + def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): + # GH 7292 + rs_freq = self.ts.pct_change(freq=freq, + fill_method=fill_method, + limit=limit) + rs_periods = self.ts.pct_change(periods, + fill_method=fill_method, + limit=limit) + assert_series_equal(rs_freq, rs_periods) + + empty_ts = Series(index=self.ts.index) + rs_freq = empty_ts.pct_change(freq=freq, + fill_method=fill_method, + limit=limit) + rs_periods = empty_ts.pct_change(periods, + fill_method=fill_method, + limit=limit) + assert_series_equal(rs_freq, rs_periods) + def test_autocorr(self): # Just run the function corr1 = self.ts.autocorr() @@ -333,10 +390,10 @@ def test_autocorr(self): # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: - self.assertTrue(np.isnan(corr1)) - self.assertTrue(np.isnan(corr2)) + assert np.isnan(corr1) + assert np.isnan(corr2) else: - self.assertEqual(corr1, corr2) + assert corr1 == corr2 # Choose a random lag between 1 and length of Series - 2 # and compare the result with the Series corr() function @@ -346,34 +403,34 @@ def test_autocorr(self): # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: - self.assertTrue(np.isnan(corr1)) - self.assertTrue(np.isnan(corr2)) + assert np.isnan(corr1) + assert np.isnan(corr2) else: - self.assertEqual(corr1, corr2) + assert corr1 == corr2 def test_first_last_valid(self): ts = self.ts.copy() ts[:5] = np.NaN index = ts.first_valid_index() - self.assertEqual(index, ts.index[5]) + assert index == ts.index[5] ts[-5:] = np.NaN index = ts.last_valid_index() - self.assertEqual(index, ts.index[-6]) + assert index == ts.index[-6] ts[:] = np.nan - self.assertIsNone(ts.last_valid_index()) - self.assertIsNone(ts.first_valid_index()) + assert ts.last_valid_index() is None + assert ts.first_valid_index() is None ser = Series([], index=[]) - self.assertIsNone(ser.last_valid_index()) - self.assertIsNone(ser.first_valid_index()) + assert ser.last_valid_index() is None + assert ser.first_valid_index() is None # GH12800 empty = Series() - self.assertIsNone(empty.last_valid_index()) - self.assertIsNone(empty.first_valid_index()) + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None def test_mpl_compat_hack(self): result = self.ts[:, np.newaxis] @@ -383,10 +440,8 @@ def test_mpl_compat_hack(self): def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(ser.is_time_series) - self.assertTrue(ser.index.is_all_dates) - self.assertIsInstance(ser.index, DatetimeIndex) + assert ser.index.is_all_dates + assert isinstance(ser.index, DatetimeIndex) def test_empty_series_ops(self): # see issue #13844 @@ -395,7 +450,7 @@ def test_empty_series_ops(self): assert_series_equal(a, a + b) assert_series_equal(a, a - b) assert_series_equal(a, b + a) - self.assertRaises(TypeError, lambda x, y: x - y, b, a) + pytest.raises(TypeError, lambda x, y: x - y, b, a) def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') @@ -405,12 +460,12 @@ def test_contiguous_boolean_preserve_freq(self): masked = rng[mask] expected = rng[10:20] - self.assertIsNotNone(expected.freq) + assert expected.freq is not None assert_range_equal(masked, expected) mask[22] = True masked = rng[mask] - self.assertIsNone(masked.freq) + assert masked.freq is None def test_to_datetime_unit(self): @@ -460,9 +515,9 @@ def test_to_datetime_unit(self): Timestamp('1970-01-03')] + ['NaT'] * 3) tm.assert_index_equal(result, expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime([1, 2, 'foo'], unit='D') - with self.assertRaises(ValueError): + with pytest.raises(ValueError): to_datetime([1, 2, 111111111], unit='D') # coerce we can process @@ -479,7 +534,7 @@ def test_series_ctor_datetime64(self): dates = np.asarray(rng) series = Series(dates) - self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) + assert np.issubdtype(series.dtype, np.dtype('M8[ns]')) def test_series_repr_nat(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') @@ -490,7 +545,7 @@ def test_series_repr_nat(self): '2 1970-01-01 00:00:00.000002\n' '3 NaT\n' 'dtype: datetime64[ns]') - self.assertEqual(result, expected) + assert result == expected def test_asfreq_keep_index_name(self): # GH #9854 @@ -498,8 +553,8 @@ def test_asfreq_keep_index_name(self): index = pd.date_range('20130101', periods=20, name=index_name) df = pd.DataFrame([x for x in range(20)], columns=['foo'], index=index) - self.assertEqual(index_name, df.index.name) - self.assertEqual(index_name, df.asfreq('10D').index.name) + assert index_name == df.index.name + assert index_name == df.asfreq('10D').index.name def test_promote_datetime_date(self): rng = date_range('1/1/2000', periods=20) @@ -522,7 +577,7 @@ def test_promote_datetime_date(self): result = rng.get_indexer(ts2.index) expected = rng.get_indexer(ts_slice.index) - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) def test_asfreq_normalize(self): rng = date_range('1/1/2000 09:30', periods=20) @@ -547,11 +602,11 @@ def test_asfreq_normalize(self): def test_first_subset(self): ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') result = ts.first('10d') - self.assertEqual(len(result), 20) + assert len(result) == 20 ts = _simple_ts('1/1/2000', '1/1/2010') result = ts.first('10d') - self.assertEqual(len(result), 10) + assert len(result) == 10 result = ts.first('3M') expected = ts[:'3/31/2000'] @@ -567,11 +622,11 @@ def test_first_subset(self): def test_last_subset(self): ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') result = ts.last('10d') - self.assertEqual(len(result), 20) + assert len(result) == 20 ts = _simple_ts('1/1/2000', '1/1/2010') result = ts.last('10d') - self.assertEqual(len(result), 10) + assert len(result) == 10 result = ts.last('21D') expected = ts['12/12/2009':] @@ -594,9 +649,9 @@ def test_at_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() result = ts.at_time('9:30') expected = ts.at_time(time(9, 30)) @@ -630,7 +685,7 @@ def test_at_time(self): rng = date_range('1/1/2012', freq='23Min', periods=384) ts = Series(np.random.randn(len(rng)), rng) rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) + assert len(rs) == 0 def test_between(self): series = Series(date_range('1/1/2000', periods=10)) @@ -655,18 +710,18 @@ def test_between_time(self): if not inc_end: exp_len -= 4 - self.assertEqual(len(filtered), exp_len) + assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: - self.assertTrue(t >= stime) + assert t >= stime else: - self.assertTrue(t > stime) + assert t > stime if inc_end: - self.assertTrue(t <= etime) + assert t <= etime else: - self.assertTrue(t < etime) + assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) @@ -687,37 +742,36 @@ def test_between_time(self): if not inc_end: exp_len -= 4 - self.assertEqual(len(filtered), exp_len) + assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) + assert (t >= stime) or (t <= etime) else: - self.assertTrue((t > stime) or (t <= etime)) + assert (t > stime) or (t <= etime) if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) + assert (t <= etime) or (t >= stime) else: - self.assertTrue((t < etime) or (t >= stime)) + assert (t < etime) or (t >= stime) def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') - self.assertRaises(ValueError, rng.indexer_between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + pytest.raises(ValueError, rng.indexer_between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({'A': 0}, index=rng) - self.assertRaises(ValueError, frame.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + pytest.raises(ValueError, frame.between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) - self.assertRaises(ValueError, series.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + pytest.raises(ValueError, series.between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + @td.skip_if_has_locale def test_between_time_formats(self): # GH11818 - _skip_if_has_locale() - rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) @@ -728,12 +782,10 @@ def test_between_time_formats(self): expected_length = 28 for time_string in strings: - self.assertEqual(len(ts.between_time(*time_string)), - expected_length, - "%s - %s" % time_string) + assert len(ts.between_time(*time_string)) == expected_length def test_to_period(self): - from pandas.tseries.period import period_range + from pandas.core.indexes.period import period_range ts = _simple_ts('1/1/2000', '1/1/2001') @@ -788,7 +840,7 @@ def test_to_csv_numpy_16_bug(self): frame.to_csv(buf) result = buf.getvalue() - self.assertIn('2000-01-01', result) + assert '2000-01-01' in result def test_series_map_box_timedelta(self): # GH 11349 @@ -809,31 +861,31 @@ def test_asfreq_resample_set_correct_freq(self): df = df.set_index(pd.to_datetime(df.date)) # testing the settings before calling .asfreq() and .resample() - self.assertEqual(df.index.freq, None) - self.assertEqual(df.index.inferred_freq, 'D') + assert df.index.freq is None + assert df.index.inferred_freq == 'D' # does .asfreq() set .freq correctly? - self.assertEqual(df.asfreq('D').index.freq, 'D') + assert df.asfreq('D').index.freq == 'D' # does .resample() set .freq correctly? - self.assertEqual(df.resample('D').asfreq().index.freq, 'D') + assert df.resample('D').asfreq().index.freq == 'D' def test_pickle(self): # GH4606 - p = self.round_trip_pickle(NaT) - self.assertTrue(p is NaT) + p = tm.round_trip_pickle(NaT) + assert p is NaT idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06']) - idx_p = self.round_trip_pickle(idx) - self.assertTrue(idx_p[0] == idx[0]) - self.assertTrue(idx_p[1] is NaT) - self.assertTrue(idx_p[2] == idx[2]) + idx_p = tm.round_trip_pickle(idx) + assert idx_p[0] == idx[0] + assert idx_p[1] is NaT + assert idx_p[2] == idx[2] # GH11002 # don't infer freq idx = date_range('1750-1-1', '2050-1-1', freq='7D') - idx_p = self.round_trip_pickle(idx) + idx_p = tm.round_trip_pickle(idx) tm.assert_index_equal(idx, idx_p) def test_setops_preserve_freq(self): @@ -841,35 +893,35 @@ def test_setops_preserve_freq(self): rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) result = rng[:50].union(rng[50:100]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz result = rng[:50].union(rng[30:100]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz result = rng[:50].union(rng[60:100]) - self.assertEqual(result.name, rng.name) - self.assertIsNone(result.freq) - self.assertEqual(result.tz, rng.tz) + assert result.name == rng.name + assert result.freq is None + assert result.tz == rng.tz result = rng[:50].intersection(rng[25:75]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freqstr, 'D') - self.assertEqual(result.tz, rng.tz) + assert result.name == rng.name + assert result.freqstr == 'D' + assert result.tz == rng.tz nofreq = DatetimeIndex(list(rng[25:75]), name='other') result = rng[:50].union(nofreq) - self.assertIsNone(result.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz result = rng[:50].intersection(nofreq) - self.assertIsNone(result.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz def test_min_max(self): rng = date_range('1/1/2000', '12/31/2000') @@ -877,13 +929,13 @@ def test_min_max(self): the_min = rng2.min() the_max = rng2.max() - tm.assertIsInstance(the_min, Timestamp) - tm.assertIsInstance(the_max, Timestamp) - self.assertEqual(the_min, rng[0]) - self.assertEqual(the_max, rng[-1]) + assert isinstance(the_min, Timestamp) + assert isinstance(the_max, Timestamp) + assert the_min == rng[0] + assert the_max == rng[-1] - self.assertEqual(rng.min(), rng[0]) - self.assertEqual(rng.max(), rng[-1]) + assert rng.min() == rng[0] + assert rng.max() == rng[-1] def test_min_max_series(self): rng = date_range('1/1/2000', periods=10, freq='4h') @@ -892,13 +944,13 @@ def test_min_max_series(self): result = df.TS.max() exp = Timestamp(df.TS.iat[-1]) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, exp) + assert isinstance(result, Timestamp) + assert result == exp result = df.TS.min() exp = Timestamp(df.TS.iat[0]) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, exp) + assert isinstance(result, Timestamp) + assert result == exp def test_from_M8_structured(self): dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] @@ -906,15 +958,16 @@ def test_from_M8_structured(self): dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')]) df = DataFrame(arr) - self.assertEqual(df['Date'][0], dates[0][0]) - self.assertEqual(df['Forecasting'][0], dates[0][1]) + assert df['Date'][0] == dates[0][0] + assert df['Forecasting'][0] == dates[0][1] s = Series(arr['Date']) - self.assertTrue(s[0], Timestamp) - self.assertEqual(s[0], dates[0][0]) + assert isinstance(s[0], Timestamp) + assert s[0] == dates[0][0] - s = Series.from_array(arr['Date'], Index([0])) - self.assertEqual(s[0], dates[0][0]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + s = Series.from_array(arr['Date'], Index([0])) + assert s[0] == dates[0][0] def test_get_level_values_box(self): from pandas import MultiIndex @@ -925,4 +978,4 @@ def test_get_level_values_box(self): index = MultiIndex(levels=levels, labels=labels) - self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) + assert isinstance(index.get_level_values(0)[0], Timestamp) diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py new file mode 100644 index 0000000000000..b54645d04bd1a --- /dev/null +++ b/pandas/tests/series/test_timezones.py @@ -0,0 +1,302 @@ +# -*- coding: utf-8 -*- +""" +Tests for Series timezone-related methods +""" +from datetime import datetime + +import pytest +import pytz +import numpy as np +from dateutil.tz import tzoffset + +import pandas.util.testing as tm +from pandas._libs import tslib +from pandas._libs.tslibs import timezones +from pandas.compat import lrange +from pandas.core.indexes.datetimes import date_range +from pandas import Series, Timestamp, DatetimeIndex, Index + + +class TestSeriesTimezones(object): + # ----------------------------------------------------------------- + # Series.tz_localize + def test_series_tz_localize(self): + + rng = date_range('1/1/2011', periods=100, freq='H') + ts = Series(1, index=rng) + + result = ts.tz_localize('utc') + assert result.index.tz.zone == 'UTC' + + # Can't localize if already tz-aware + rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') + ts = Series(1, index=rng) + tm.assert_raises_regex(TypeError, 'Already tz-aware', + ts.tz_localize, 'US/Eastern') + + def test_series_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + + # GH#14402 + ts = Timestamp('2015-11-01 01:00:03') + expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') + expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') + + ser = Series([ts]) + expected0 = Series([expected0]) + expected1 = Series([expected1]) + + with pytest.raises(pytz.AmbiguousTimeError): + ser.dt.tz_localize('US/Central') + + result = ser.dt.tz_localize('US/Central', ambiguous=True) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize('US/Central', ambiguous=[True]) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize('US/Central', ambiguous=False) + tm.assert_series_equal(result, expected1) + + result = ser.dt.tz_localize('US/Central', ambiguous=[False]) + tm.assert_series_equal(result, expected1) + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_series_tz_localize_empty(self, tzstr): + # GH#2248 + ser = Series() + + ser2 = ser.tz_localize('utc') + assert ser2.index.tz == pytz.utc + + ser2 = ser.tz_localize(tzstr) + timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) + + # ----------------------------------------------------------------- + # Series.tz_convert + + def test_series_tz_convert(self): + rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') + ts = Series(1, index=rng) + + result = ts.tz_convert('Europe/Berlin') + assert result.index.tz.zone == 'Europe/Berlin' + + # can't convert tz-naive + rng = date_range('1/1/2011', periods=200, freq='D') + ts = Series(1, index=rng) + tm.assert_raises_regex(TypeError, "Cannot convert tz-naive", + ts.tz_convert, 'US/Eastern') + + def test_series_tz_convert_to_utc(self): + base = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + tz='UTC') + idx1 = base.tz_convert('Asia/Tokyo')[:2] + idx2 = base.tz_convert('US/Eastern')[1:] + + res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) + tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) + + # ----------------------------------------------------------------- + # Series.append + + def test_series_append_aware(self): + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', + tz='US/Eastern') + rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', + tz='US/Eastern') + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], + tz='US/Eastern') + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC') + rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='UTC') + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], + tz='UTC') + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + utc = rng1.tz + assert utc == ts_result.index.tz + + # GH#7795 + # different tz coerces to object dtype, not UTC + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', + tz='US/Eastern') + rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', + tz='US/Central') + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'), + Timestamp('1/1/2011 02:00', tz='US/Central')]) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + + def test_series_append_aware_naive(self): + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') + rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', + tz='US/Eastern') + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index.astype(object)) + assert ts_result.index.equals(expected) + + # mixed + rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') + rng2 = lrange(100) + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index) + assert ts_result.index.equals(expected) + + def test_series_append_dst(self): + rng1 = date_range('1/1/2016 01:00', periods=3, freq='H', + tz='US/Eastern') + rng2 = date_range('8/1/2016 01:00', periods=3, freq='H', + tz='US/Eastern') + ser1 = Series([1, 2, 3], index=rng1) + ser2 = Series([10, 11, 12], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(['2016-01-01 01:00', '2016-01-01 02:00', + '2016-01-01 03:00', '2016-08-01 01:00', + '2016-08-01 02:00', '2016-08-01 03:00'], + tz='US/Eastern') + exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + # ----------------------------------------------------------------- + + def test_dateutil_tzoffset_support(self): + values = [188.5, 328.25] + tzinfo = tzoffset(None, 7200) + index = [datetime(2012, 5, 11, 11, tzinfo=tzinfo), + datetime(2012, 5, 11, 12, tzinfo=tzinfo)] + series = Series(data=values, index=index) + + assert series.index.tz == tzinfo + + # it works! #2443 + repr(series.index[0]) + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_tz_aware_asfreq(self, tz): + dr = date_range('2011-12-01', '2012-07-20', freq='D', tz=tz) + + ser = Series(np.random.randn(len(dr)), index=dr) + + # it works! + ser.asfreq('T') + + @pytest.mark.parametrize('tz', ['US/Eastern', 'dateutil/US/Eastern']) + def test_string_index_alias_tz_aware(self, tz): + rng = date_range('1/1/2000', periods=10, tz=tz) + ser = Series(np.random.randn(len(rng)), index=rng) + + result = ser['1/3/2000'] + tm.assert_almost_equal(result, ser[2]) + + # TODO: De-duplicate with test below + def test_series_add_tz_mismatch_converts_to_utc_duplicate(self): + rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') + ser = Series(np.random.randn(len(rng)), index=rng) + + ts_moscow = ser.tz_convert('Europe/Moscow') + + result = ser + ts_moscow + assert result.index.tz is pytz.utc + + result = ts_moscow + ser + assert result.index.tz is pytz.utc + + def test_series_add_tz_mismatch_converts_to_utc(self): + rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') + + perm = np.random.permutation(100)[:90] + ser1 = Series(np.random.randn(90), + index=rng.take(perm).tz_convert('US/Eastern')) + + perm = np.random.permutation(100)[:90] + ser2 = Series(np.random.randn(90), + index=rng.take(perm).tz_convert('Europe/Berlin')) + + result = ser1 + ser2 + + uts1 = ser1.tz_convert('utc') + uts2 = ser2.tz_convert('utc') + expected = uts1 + uts2 + + assert result.index.tz == pytz.UTC + tm.assert_series_equal(result, expected) + + def test_series_add_aware_naive_raises(self): + rng = date_range('1/1/2011', periods=10, freq='H') + ser = Series(np.random.randn(len(rng)), index=rng) + + ser_utc = ser.tz_localize('utc') + + with pytest.raises(Exception): + ser + ser_utc + + with pytest.raises(Exception): + ser_utc + ser + + def test_series_align_aware(self): + idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') + ser = Series(np.random.randn(len(idx1)), index=idx1) + ser_central = ser.tz_convert('US/Central') + # # different timezones convert to UTC + + new1, new2 = ser.align(ser_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) + def test_localized_at_time_between_time(self, tzstr): + from datetime import time + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range('4/16/2012', '5/1/2012', freq='H') + ts = Series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + result = ts_local.at_time(time(10, 0)) + expected = ts.at_time(time(10, 0)).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + t1, t2 = time(10, 0), time(11, 0) + result = ts_local.between_time(t1, t2) + expected = ts.between_time(t1, t2).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + @pytest.mark.parametrize('tzstr', ['Europe/Berlin', + 'dateutil/Europe/Berlin']) + def test_getitem_pydatetime_tz(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + index = date_range(start='2012-12-24 16:00', end='2012-12-24 18:00', + freq='H', tz=tzstr) + ts = Series(index=index, data=index.hour) + time_pandas = Timestamp('2012-12-24 17:00', tz=tzstr) + + dt = datetime(2012, 12, 24, 17, 0) + time_datetime = tslib._localize_pydatetime(dt, tz) + assert ts[time_pandas] == ts[time_datetime] diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index cf0482b41c80a..a0cde5f81d021 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -1,33 +1,27 @@ -from unittest import TestCase from pandas.core.series import Series +import pytest +import pandas.util.testing as tm -class TestSeriesValidate(TestCase): - """Tests for error handling related to data types of method arguments.""" - s = Series([1, 2, 3, 4, 5]) - - def test_validate_bool_args(self): - # Tests for error handling related to boolean arguments. - invalid_values = [1, "True", [1, 2, 3], 5.0] - for value in invalid_values: - with self.assertRaises(ValueError): - self.s.reset_index(inplace=value) +@pytest.fixture +def series(): + return Series([1, 2, 3, 4, 5]) - with self.assertRaises(ValueError): - self.s._set_name(name='hello', inplace=value) - with self.assertRaises(ValueError): - self.s.sort_values(inplace=value) - - with self.assertRaises(ValueError): - self.s.sort_index(inplace=value) +class TestSeriesValidate(object): + """Tests for error handling related to data types of method arguments.""" - with self.assertRaises(ValueError): - self.s.sort_index(inplace=value) + @pytest.mark.parametrize("func", ["reset_index", "_set_name", + "sort_values", "sort_index", + "rename", "dropna"]) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, series, func, inplace): + msg = "For argument \"inplace\" expected type bool" + kwargs = dict(inplace=inplace) - with self.assertRaises(ValueError): - self.s.rename(inplace=value) + if func == "_set_name": + kwargs["name"] = "hello" - with self.assertRaises(ValueError): - self.s.dropna(inplace=value) + with tm.assert_raises_regex(ValueError, msg): + getattr(series, func)(**kwargs) diff --git a/pandas/tests/sparse/__init__.py b/pandas/tests/sparse/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/sparse/common.py b/pandas/tests/sparse/common.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/sparse/frame/__init__.py b/pandas/tests/sparse/frame/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py new file mode 100644 index 0000000000000..ccb30502b862e --- /dev/null +++ b/pandas/tests/sparse/frame/test_analytics.py @@ -0,0 +1,40 @@ +import pytest +import numpy as np +from pandas import SparseDataFrame, DataFrame, SparseSeries +from pandas.util import testing as tm + + +@pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') +def test_quantile(): + # GH 17386 + data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]] + q = 0.1 + + sparse_df = SparseDataFrame(data) + result = sparse_df.quantile(q) + + dense_df = DataFrame(data) + dense_expected = dense_df.quantile(q) + sparse_expected = SparseSeries(dense_expected) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + +@pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') +def test_quantile_multi(): + # GH 17386 + data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]] + q = [0.1, 0.5] + + sparse_df = SparseDataFrame(data) + result = sparse_df.quantile(q) + + dense_df = DataFrame(data) + dense_expected = dense_df.quantile(q) + sparse_expected = SparseDataFrame(dense_expected) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) diff --git a/pandas/tests/sparse/frame/test_apply.py b/pandas/tests/sparse/frame/test_apply.py new file mode 100644 index 0000000000000..07e4b1bf7c913 --- /dev/null +++ b/pandas/tests/sparse/frame/test_apply.py @@ -0,0 +1,92 @@ +import pytest +import numpy as np +from pandas import SparseDataFrame, DataFrame, Series, bdate_range +from pandas.core import nanops +from pandas.util import testing as tm + + +@pytest.fixture +def dates(): + return bdate_range('1/1/2011', periods=10) + + +@pytest.fixture +def empty(): + return SparseDataFrame() + + +@pytest.fixture +def frame(dates): + data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6], + 'C': np.arange(10, dtype=np.float64), + 'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]} + + return SparseDataFrame(data, index=dates) + + +@pytest.fixture +def fill_frame(frame): + values = frame.values.copy() + values[np.isnan(values)] = 2 + + return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], + default_fill_value=2, + index=frame.index) + + +def test_apply(frame): + applied = frame.apply(np.sqrt) + assert isinstance(applied, SparseDataFrame) + tm.assert_almost_equal(applied.values, np.sqrt(frame.values)) + + # agg / broadcast + with tm.assert_produces_warning(FutureWarning): + broadcasted = frame.apply(np.sum, broadcast=True) + assert isinstance(broadcasted, SparseDataFrame) + + with tm.assert_produces_warning(FutureWarning): + exp = frame.to_dense().apply(np.sum, broadcast=True) + tm.assert_frame_equal(broadcasted.to_dense(), exp) + + applied = frame.apply(np.sum) + tm.assert_series_equal(applied, + frame.to_dense().apply(nanops.nansum)) + + +def test_apply_fill(fill_frame): + applied = fill_frame.apply(np.sqrt) + assert applied['A'].fill_value == np.sqrt(2) + + +def test_apply_empty(empty): + assert empty.apply(np.sqrt) is empty + + +def test_apply_nonuq(): + orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=['a', 'a', 'c']) + sparse = orig.to_sparse() + res = sparse.apply(lambda s: s[0], axis=1) + exp = orig.apply(lambda s: s[0], axis=1) + + # dtype must be kept + assert res.dtype == np.int64 + + # ToDo: apply must return subclassed dtype + assert isinstance(res, Series) + tm.assert_series_equal(res.to_dense(), exp) + + # df.T breaks + sparse = orig.T.to_sparse() + res = sparse.apply(lambda s: s[0], axis=0) # noqa + exp = orig.T.apply(lambda s: s[0], axis=0) + + # TODO: no non-unique columns supported in sparse yet + # tm.assert_series_equal(res.to_dense(), exp) + + +def test_applymap(frame): + # just test that it works + result = frame.applymap(lambda x: x * 2) + assert isinstance(result, SparseDataFrame) diff --git a/pandas/sparse/tests/test_frame.py b/pandas/tests/sparse/frame/test_frame.py similarity index 72% rename from pandas/sparse/tests/test_frame.py rename to pandas/tests/sparse/frame/test_frame.py index e3b865492c043..1062de3119efc 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -2,28 +2,33 @@ import operator +import pytest +from warnings import catch_warnings from numpy import nan import numpy as np import pandas as pd from pandas import Series, DataFrame, bdate_range, Panel -from pandas.tseries.index import DatetimeIndex +from pandas.core.indexes.datetimes import DatetimeIndex from pandas.tseries.offsets import BDay -import pandas.util.testing as tm +from pandas.util import testing as tm from pandas.compat import lrange from pandas import compat -import pandas.sparse.frame as spf +from pandas.core.sparse import frame as spf -from pandas._sparse import BlockIndex, IntIndex -from pandas.sparse.api import SparseSeries, SparseDataFrame, SparseArray -from pandas.tests.frame.test_misc_api import SharedWithSparse +from pandas._libs.sparse import BlockIndex, IntIndex +from pandas.core.sparse.api import SparseSeries, SparseDataFrame, SparseArray +from pandas.tests.frame.test_api import SharedWithSparse -class TestSparseDataFrame(tm.TestCase, SharedWithSparse): - +class TestSparseDataFrame(SharedWithSparse): klass = SparseDataFrame - def setUp(self): + # SharedWithSparse tests use generic, klass-agnostic assertion + _assert_frame_equal = staticmethod(tm.assert_sp_frame_equal) + _assert_series_equal = staticmethod(tm.assert_sp_series_equal) + + def setup_method(self, method): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10, dtype=np.float64), @@ -37,6 +42,8 @@ def setUp(self): self.frame = SparseDataFrame(self.data, index=self.dates) self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind='integer') + self.mixed_frame = self.frame.copy(False) + self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates)) values = self.frame.values.copy() values[np.isnan(values)] = 0 @@ -66,35 +73,35 @@ def test_fill_value_when_combine_const(self): res = df.add(2, fill_value=0) tm.assert_sp_frame_equal(res, exp) - def test_as_matrix(self): - empty = self.empty.as_matrix() - self.assertEqual(empty.shape, (0, 0)) + def test_values(self): + empty = self.empty.values + assert empty.shape == (0, 0) no_cols = SparseDataFrame(index=np.arange(10)) - mat = no_cols.as_matrix() - self.assertEqual(mat.shape, (10, 0)) + mat = no_cols.values + assert mat.shape == (10, 0) no_index = SparseDataFrame(columns=np.arange(10)) - mat = no_index.as_matrix() - self.assertEqual(mat.shape, (0, 10)) + mat = no_index.values + assert mat.shape == (0, 10) def test_copy(self): cp = self.frame.copy() - tm.assertIsInstance(cp, SparseDataFrame) + assert isinstance(cp, SparseDataFrame) tm.assert_sp_frame_equal(cp, self.frame) # as of v0.15.0 # this is now identical (but not is_a ) - self.assertTrue(cp.index.identical(self.frame.index)) + assert cp.index.identical(self.frame.index) def test_constructor(self): for col, series in compat.iteritems(self.frame): - tm.assertIsInstance(series, SparseSeries) + assert isinstance(series, SparseSeries) - tm.assertIsInstance(self.iframe['A'].sp_index, IntIndex) + assert isinstance(self.iframe['A'].sp_index, IntIndex) # constructed zframe from matrix above - self.assertEqual(self.zframe['A'].fill_value, 0) + assert self.zframe['A'].fill_value == 0 tm.assert_numpy_array_equal(pd.SparseArray([1., 2., 3., 4., 5., 6.]), self.zframe['A'].values) tm.assert_numpy_array_equal(np.array([0., 0., 0., 0., 1., 2., @@ -104,7 +111,7 @@ def test_constructor(self): # construct no data sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10)) for col, series in compat.iteritems(sdf): - tm.assertIsInstance(series, SparseSeries) + assert isinstance(series, SparseSeries) # construct from nested dict data = {} @@ -127,11 +134,23 @@ def test_constructor(self): tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False) # assert level parameter breaks reindex - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): self.frame.reindex(idx, level=0) repr(self.frame) + def test_constructor_dict_order(self): + # GH19018 + # initialization ordering: by insertion order if python>= 3.6, else + # order by value + d = {'b': [2, 3], 'a': [0, 1]} + frame = SparseDataFrame(data=d) + if compat.PY36: + expected = SparseDataFrame(data=d, columns=list('ba')) + else: + expected = SparseDataFrame(data=d, columns=list('ab')) + tm.assert_sp_frame_equal(frame, expected) + def test_constructor_ndarray(self): # no index or columns sp = SparseDataFrame(self.frame.values) @@ -141,21 +160,21 @@ def test_constructor_ndarray(self): tm.assert_sp_frame_equal(sp, self.frame.reindex(columns=['A'])) # raise on level argument - self.assertRaises(TypeError, self.frame.reindex, columns=['A'], - level=1) + pytest.raises(TypeError, self.frame.reindex, columns=['A'], + level=1) # wrong length index / columns - with tm.assertRaisesRegexp(ValueError, "^Index length"): + with tm.assert_raises_regex(ValueError, "^Index length"): SparseDataFrame(self.frame.values, index=self.frame.index[:-1]) - with tm.assertRaisesRegexp(ValueError, "^Column length"): + with tm.assert_raises_regex(ValueError, "^Column length"): SparseDataFrame(self.frame.values, columns=self.frame.columns[:-1]) # GH 9272 def test_constructor_empty(self): sp = SparseDataFrame() - self.assertEqual(len(sp.index), 0) - self.assertEqual(len(sp.columns), 0) + assert len(sp.index) == 0 + assert len(sp.columns) == 0 def test_constructor_dataframe(self): dense = self.frame.to_dense() @@ -165,16 +184,16 @@ def test_constructor_dataframe(self): def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) sdf = SparseDataFrame(columns=lrange(4), index=arr) - self.assertTrue(sdf[0].index is sdf[1].index) + assert sdf[0].index is sdf[1].index def test_constructor_from_series(self): # GH 2873 x = Series(np.random.randn(10000), name='a') x = x.to_sparse(fill_value=0) - tm.assertIsInstance(x, SparseSeries) + assert isinstance(x, SparseSeries) df = SparseDataFrame(x) - tm.assertIsInstance(df, SparseDataFrame) + assert isinstance(df, SparseDataFrame) x = Series(np.random.randn(10000), name='a') y = Series(np.random.randn(10000), name='b') @@ -192,35 +211,58 @@ def test_constructor_from_series(self): # without sparse value raises error # df2 = SparseDataFrame([x2_sparse, y]) + def test_constructor_from_dense_series(self): + # GH 19393 + # series with name + x = Series(np.random.randn(10000), name='a') + result = SparseDataFrame(x) + expected = x.to_frame().to_sparse() + tm.assert_sp_frame_equal(result, expected) + + # series with no name + x = Series(np.random.randn(10000)) + result = SparseDataFrame(x) + expected = x.to_frame().to_sparse() + tm.assert_sp_frame_equal(result, expected) + + def test_constructor_from_unknown_type(self): + # GH 19393 + class Unknown: + pass + with pytest.raises(TypeError, + message='SparseDataFrame called with unknown type ' + '"Unknown" for data argument'): + SparseDataFrame(Unknown()) + def test_constructor_preserve_attr(self): # GH 13866 arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 df = pd.SparseDataFrame({'x': arr}) - self.assertEqual(df['x'].dtype, np.int64) - self.assertEqual(df['x'].fill_value, 0) + assert df['x'].dtype == np.int64 + assert df['x'].fill_value == 0 s = pd.SparseSeries(arr, name='x') - self.assertEqual(s.dtype, np.int64) - self.assertEqual(s.fill_value, 0) + assert s.dtype == np.int64 + assert s.fill_value == 0 df = pd.SparseDataFrame(s) - self.assertEqual(df['x'].dtype, np.int64) - self.assertEqual(df['x'].fill_value, 0) + assert df['x'].dtype == np.int64 + assert df['x'].fill_value == 0 df = pd.SparseDataFrame({'x': s}) - self.assertEqual(df['x'].dtype, np.int64) - self.assertEqual(df['x'].fill_value, 0) + assert df['x'].dtype == np.int64 + assert df['x'].fill_value == 0 def test_constructor_nan_dataframe(self): # GH 10079 trains = np.arange(100) - tresholds = [10, 20, 30, 40, 50, 60] - tuples = [(i, j) for i in trains for j in tresholds] + thresholds = [10, 20, 30, 40, 50, 60] + tuples = [(i, j) for i in trains for j in thresholds] index = pd.MultiIndex.from_tuples(tuples, - names=['trains', 'tresholds']) + names=['trains', 'thresholds']) matrix = np.empty((len(index), len(trains))) matrix.fill(np.nan) df = pd.DataFrame(matrix, index=index, columns=trains, dtype=float) @@ -229,6 +271,18 @@ def test_constructor_nan_dataframe(self): dtype=float) tm.assert_sp_frame_equal(result, expected) + def test_type_coercion_at_construction(self): + # GH 15682 + result = pd.SparseDataFrame( + {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8', + default_fill_value=0) + expected = pd.SparseDataFrame( + {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'), + 'b': pd.SparseSeries([0, 1, 0], dtype='uint8'), + 'c': pd.SparseSeries([0, 0, 1], dtype='uint8')}, + default_fill_value=0) + tm.assert_sp_frame_equal(result, expected) + def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan @@ -239,11 +293,11 @@ def test_dtypes(self): tm.assert_series_equal(result, expected) def test_shape(self): - # GH 10452 - self.assertEqual(self.frame.shape, (10, 4)) - self.assertEqual(self.iframe.shape, (10, 4)) - self.assertEqual(self.zframe.shape, (10, 4)) - self.assertEqual(self.fill_frame.shape, (10, 4)) + # see gh-10452 + assert self.frame.shape == (10, 4) + assert self.iframe.shape == (10, 4) + assert self.zframe.shape == (10, 4) + assert self.fill_frame.shape == (10, 4) def test_str(self): df = DataFrame(np.random.randn(10000, 4)) @@ -260,7 +314,7 @@ def test_array_interface(self): def test_pickle(self): def _test_roundtrip(frame, orig): - result = self.round_trip_pickle(frame) + result = tm.round_trip_pickle(frame) tm.assert_sp_frame_equal(frame, result) tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False) @@ -271,30 +325,30 @@ def test_dense_to_sparse(self): df = DataFrame({'A': [nan, nan, nan, 1, 2], 'B': [1, 2, nan, nan, nan]}) sdf = df.to_sparse() - tm.assertIsInstance(sdf, SparseDataFrame) - self.assertTrue(np.isnan(sdf.default_fill_value)) - tm.assertIsInstance(sdf['A'].sp_index, BlockIndex) + assert isinstance(sdf, SparseDataFrame) + assert np.isnan(sdf.default_fill_value) + assert isinstance(sdf['A'].sp_index, BlockIndex) tm.assert_frame_equal(sdf.to_dense(), df) sdf = df.to_sparse(kind='integer') - tm.assertIsInstance(sdf['A'].sp_index, IntIndex) + assert isinstance(sdf['A'].sp_index, IntIndex) df = DataFrame({'A': [0, 0, 0, 1, 2], 'B': [1, 2, 0, 0, 0]}, dtype=float) sdf = df.to_sparse(fill_value=0) - self.assertEqual(sdf.default_fill_value, 0) + assert sdf.default_fill_value == 0 tm.assert_frame_equal(sdf.to_dense(), df) def test_density(self): df = SparseSeries([nan, nan, nan, 0, 1, 2, 3, 4, 5, 6]) - self.assertEqual(df.density, 0.7) + assert df.density == 0.7 df = SparseDataFrame({'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10), 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}) - self.assertEqual(df.density, 0.75) + assert df.density == 0.75 def test_sparse_to_dense(self): pass @@ -324,7 +378,7 @@ def _compare_to_dense(a, b, da, db, op): if isinstance(a, DataFrame) and isinstance(db, DataFrame): mixed_result = op(a, db) - tm.assertIsInstance(mixed_result, SparseDataFrame) + assert isinstance(mixed_result, SparseDataFrame) tm.assert_sp_frame_equal(mixed_result, sparse_result, exact_indices=False) @@ -367,10 +421,10 @@ def _compare_to_dense(a, b, da, db, op): def test_op_corners(self): empty = self.empty + self.empty - self.assertTrue(empty.empty) + assert empty.empty foo = self.frame + self.empty - tm.assertIsInstance(foo.index, DatetimeIndex) + assert isinstance(foo.index, DatetimeIndex) tm.assert_frame_equal(foo, self.frame * np.nan) foo = self.empty + self.frame @@ -387,42 +441,51 @@ def test_getitem(self): exp = sdf.reindex(columns=['a', 'b']) tm.assert_sp_frame_equal(result, exp) - self.assertRaises(Exception, sdf.__getitem__, ['a', 'd']) + pytest.raises(Exception, sdf.__getitem__, ['a', 'd']) - def test_icol(self): - # 10711 deprecated + def test_iloc(self): # 2227 result = self.frame.iloc[:, 0] - self.assertTrue(isinstance(result, SparseSeries)) + assert isinstance(result, SparseSeries) tm.assert_sp_series_equal(result, self.frame['A']) # preserve sparse index type. #2251 data = {'A': [0, 1]} iframe = SparseDataFrame(data, default_kind='integer') - self.assertEqual(type(iframe['A'].sp_index), - type(iframe.iloc[:, 0].sp_index)) + tm.assert_class_equal(iframe['A'].sp_index, + iframe.iloc[:, 0].sp_index) def test_set_value(self): - # ok as the index gets conver to object + # ok, as the index gets converted to object frame = self.frame.copy() - res = frame.set_value('foobar', 'B', 1.5) - self.assertEqual(res.index.dtype, 'object') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = frame.set_value('foobar', 'B', 1.5) + assert res.index.dtype == 'object' res = self.frame res.index = res.index.astype(object) - res = self.frame.set_value('foobar', 'B', 1.5) - self.assertIsNot(res, self.frame) - self.assertEqual(res.index[-1], 'foobar') - self.assertEqual(res.get_value('foobar', 'B'), 1.5) - - res2 = res.set_value('foobar', 'qux', 1.5) - self.assertIsNot(res2, res) - self.assert_index_equal(res2.columns, - pd.Index(list(self.frame.columns) + ['qux'])) - self.assertEqual(res2.get_value('foobar', 'qux'), 1.5) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = self.frame.set_value('foobar', 'B', 1.5) + assert res is not self.frame + assert res.index[-1] == 'foobar' + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert res.get_value('foobar', 'B') == 1.5 + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res2 = res.set_value('foobar', 'qux', 1.5) + assert res2 is not res + tm.assert_index_equal(res2.columns, + pd.Index(list(self.frame.columns) + ['qux'])) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert res2.get_value('foobar', 'qux') == 1.5 def test_fancy_index_misc(self): # axis = 0 @@ -447,8 +510,8 @@ def test_getitem_overload(self): subindex = self.frame.index[indexer] subframe = self.frame[indexer] - self.assert_index_equal(subindex, subframe.index) - self.assertRaises(Exception, self.frame.__getitem__, indexer[:-1]) + tm.assert_index_equal(subindex, subframe.index) + pytest.raises(Exception, self.frame.__getitem__, indexer[:-1]) def test_setitem(self): @@ -457,7 +520,7 @@ def _check_frame(frame, orig): # insert SparseSeries frame['E'] = frame['A'] - tm.assertIsInstance(frame['E'], SparseSeries) + assert isinstance(frame['E'], SparseSeries) tm.assert_sp_series_equal(frame['E'], frame['A'], check_names=False) @@ -467,11 +530,11 @@ def _check_frame(frame, orig): expected = to_insert.to_dense().reindex(frame.index) result = frame['E'].to_dense() tm.assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'E') + assert result.name == 'E' # insert Series frame['F'] = frame['A'].to_dense() - tm.assertIsInstance(frame['F'], SparseSeries) + assert isinstance(frame['F'], SparseSeries) tm.assert_sp_series_equal(frame['F'], frame['A'], check_names=False) @@ -484,24 +547,24 @@ def _check_frame(frame, orig): # insert ndarray frame['H'] = np.random.randn(N) - tm.assertIsInstance(frame['H'], SparseSeries) + assert isinstance(frame['H'], SparseSeries) to_sparsify = np.random.randn(N) to_sparsify[N // 2:] = frame.default_fill_value frame['I'] = to_sparsify - self.assertEqual(len(frame['I'].sp_values), N // 2) + assert len(frame['I'].sp_values) == N // 2 # insert ndarray wrong size - self.assertRaises(Exception, frame.__setitem__, 'foo', - np.random.randn(N - 1)) + pytest.raises(Exception, frame.__setitem__, 'foo', + np.random.randn(N - 1)) # scalar value frame['J'] = 5 - self.assertEqual(len(frame['J'].sp_values), N) - self.assertTrue((frame['J'].sp_values == 5).all()) + assert len(frame['J'].sp_values) == N + assert (frame['J'].sp_values == 5).all() frame['K'] = frame.default_fill_value - self.assertEqual(len(frame['K'].sp_values), 0) + assert len(frame['K'].sp_values) == 0 self._check_all(_check_frame) @@ -523,30 +586,39 @@ def test_setitem_array(self): self.frame['F'].reindex(index), check_names=False) + def test_setitem_chained_no_consolidate(self): + # https://github.com/pandas-dev/pandas/pull/19268 + # issuecomment-361696418 + # chained setitem used to cause consolidation + sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) + with pd.option_context('mode.chained_assignment', None): + sdf[0][1] = 2 + assert len(sdf._data.blocks) == 2 + def test_delitem(self): A = self.frame['A'] C = self.frame['C'] del self.frame['B'] - self.assertNotIn('B', self.frame) + assert 'B' not in self.frame tm.assert_sp_series_equal(self.frame['A'], A) tm.assert_sp_series_equal(self.frame['C'], C) del self.frame['D'] - self.assertNotIn('D', self.frame) + assert 'D' not in self.frame del self.frame['A'] - self.assertNotIn('A', self.frame) + assert 'A' not in self.frame def test_set_columns(self): self.frame.columns = self.frame.columns - self.assertRaises(Exception, setattr, self.frame, 'columns', - self.frame.columns[:-1]) + pytest.raises(Exception, setattr, self.frame, 'columns', + self.frame.columns[:-1]) def test_set_index(self): self.frame.index = self.frame.index - self.assertRaises(Exception, setattr, self.frame, 'index', - self.frame.index[:-1]) + pytest.raises(Exception, setattr, self.frame, 'index', + self.frame.index[:-1]) def test_append(self): a = self.frame[:5] @@ -561,59 +633,13 @@ def test_append(self): tm.assert_sp_frame_equal(appended.iloc[:, :3], self.frame.iloc[:, :3], exact_indices=False) - def test_apply(self): - applied = self.frame.apply(np.sqrt) - tm.assertIsInstance(applied, SparseDataFrame) - tm.assert_almost_equal(applied.values, np.sqrt(self.frame.values)) - - applied = self.fill_frame.apply(np.sqrt) - self.assertEqual(applied['A'].fill_value, np.sqrt(2)) - - # agg / broadcast - broadcasted = self.frame.apply(np.sum, broadcast=True) - tm.assertIsInstance(broadcasted, SparseDataFrame) - - exp = self.frame.to_dense().apply(np.sum, broadcast=True) - tm.assert_frame_equal(broadcasted.to_dense(), exp) - - self.assertIs(self.empty.apply(np.sqrt), self.empty) - - from pandas.core import nanops - applied = self.frame.apply(np.sum) - tm.assert_series_equal(applied, - self.frame.to_dense().apply(nanops.nansum)) - - def test_apply_nonuq(self): - orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], - index=['a', 'a', 'c']) - sparse = orig.to_sparse() - res = sparse.apply(lambda s: s[0], axis=1) - exp = orig.apply(lambda s: s[0], axis=1) - # dtype must be kept - self.assertEqual(res.dtype, np.int64) - # ToDo: apply must return subclassed dtype - self.assertIsInstance(res, pd.Series) - tm.assert_series_equal(res.to_dense(), exp) - - # df.T breaks - sparse = orig.T.to_sparse() - res = sparse.apply(lambda s: s[0], axis=0) # noqa - exp = orig.T.apply(lambda s: s[0], axis=0) - # TODO: no non-unique columns supported in sparse yet - # tm.assert_series_equal(res.to_dense(), exp) - - def test_applymap(self): - # just test that it works - result = self.frame.applymap(lambda x: x * 2) - tm.assertIsInstance(result, SparseDataFrame) - def test_astype(self): sparse = pd.SparseDataFrame({'A': SparseArray([1, 2, 3, 4], dtype=np.int64), 'B': SparseArray([4, 5, 6, 7], dtype=np.int64)}) - self.assertEqual(sparse['A'].dtype, np.int64) - self.assertEqual(sparse['B'].dtype, np.int64) + assert sparse['A'].dtype == np.int64 + assert sparse['B'].dtype == np.int64 res = sparse.astype(np.float64) exp = pd.SparseDataFrame({'A': SparseArray([1., 2., 3., 4.], @@ -622,16 +648,16 @@ def test_astype(self): fill_value=0.)}, default_fill_value=np.nan) tm.assert_sp_frame_equal(res, exp) - self.assertEqual(res['A'].dtype, np.float64) - self.assertEqual(res['B'].dtype, np.float64) + assert res['A'].dtype == np.float64 + assert res['B'].dtype == np.float64 sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4], dtype=np.int64), 'B': SparseArray([0, 5, 0, 7], dtype=np.int64)}, default_fill_value=0) - self.assertEqual(sparse['A'].dtype, np.int64) - self.assertEqual(sparse['B'].dtype, np.int64) + assert sparse['A'].dtype == np.int64 + assert sparse['B'].dtype == np.int64 res = sparse.astype(np.float64) exp = pd.SparseDataFrame({'A': SparseArray([0., 2., 0., 4.], @@ -640,8 +666,8 @@ def test_astype(self): fill_value=0.)}, default_fill_value=0.) tm.assert_sp_frame_equal(res, exp) - self.assertEqual(res['A'].dtype, np.float64) - self.assertEqual(res['B'].dtype, np.float64) + assert res['A'].dtype == np.float64 + assert res['B'].dtype == np.float64 def test_astype_bool(self): sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4], @@ -651,8 +677,8 @@ def test_astype_bool(self): fill_value=0, dtype=np.int64)}, default_fill_value=0) - self.assertEqual(sparse['A'].dtype, np.int64) - self.assertEqual(sparse['B'].dtype, np.int64) + assert sparse['A'].dtype == np.int64 + assert sparse['B'].dtype == np.int64 res = sparse.astype(bool) exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True], @@ -663,8 +689,8 @@ def test_astype_bool(self): fill_value=False)}, default_fill_value=False) tm.assert_sp_frame_equal(res, exp) - self.assertEqual(res['A'].dtype, np.bool) - self.assertEqual(res['B'].dtype, np.bool) + assert res['A'].dtype == np.bool + assert res['B'].dtype == np.bool def test_fillna(self): df = self.zframe.reindex(lrange(5)) @@ -749,9 +775,18 @@ def test_sparse_frame_fillna_limit(self): tm.assert_frame_equal(result, expected) def test_rename(self): - # just check this works - renamed = self.frame.rename(index=str) # noqa - renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) # noqa + result = self.frame.rename(index=str) + expected = SparseDataFrame(self.data, index=self.dates.strftime( + "%Y-%m-%d %H:%M:%S")) + tm.assert_sp_frame_equal(result, expected) + + result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) + data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C1': np.arange(10, dtype=np.float64), + 'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} + expected = SparseDataFrame(data, index=self.dates) + tm.assert_sp_frame_equal(result, expected) def test_corr(self): res = self.frame.corr() @@ -770,10 +805,10 @@ def test_join(self): tm.assert_sp_frame_equal(joined, self.frame, exact_indices=False) right = self.frame.loc[:, ['B', 'D']] - self.assertRaises(Exception, left.join, right) + pytest.raises(Exception, left.join, right) - with tm.assertRaisesRegexp(ValueError, - 'Other Series must have a name'): + with tm.assert_raises_regex(ValueError, + 'Other Series must have a name'): self.frame.join(Series( np.random.randn(len(self.frame)), index=self.frame.index)) @@ -803,22 +838,22 @@ def _check_frame(frame): # length zero length_zero = frame.reindex([]) - self.assertEqual(len(length_zero), 0) - self.assertEqual(len(length_zero.columns), len(frame.columns)) - self.assertEqual(len(length_zero['A']), 0) + assert len(length_zero) == 0 + assert len(length_zero.columns) == len(frame.columns) + assert len(length_zero['A']) == 0 # frame being reindexed has length zero length_n = length_zero.reindex(index) - self.assertEqual(len(length_n), len(frame)) - self.assertEqual(len(length_n.columns), len(frame.columns)) - self.assertEqual(len(length_n['A']), len(frame)) + assert len(length_n) == len(frame) + assert len(length_n.columns) == len(frame.columns) + assert len(length_n['A']) == len(frame) # reindex columns reindexed = frame.reindex(columns=['A', 'B', 'Z']) - self.assertEqual(len(reindexed.columns), 3) + assert len(reindexed.columns) == 3 tm.assert_almost_equal(reindexed['Z'].fill_value, frame.default_fill_value) - self.assertTrue(np.isnan(reindexed['Z'].sp_values).all()) + assert np.isnan(reindexed['Z'].sp_values).all() _check_frame(self.frame) _check_frame(self.iframe) @@ -828,11 +863,11 @@ def _check_frame(frame): # with copy=False reindexed = self.frame.reindex(self.frame.index, copy=False) reindexed['F'] = reindexed['A'] - self.assertIn('F', self.frame) + assert 'F' in self.frame reindexed = self.frame.reindex(self.frame.index) reindexed['G'] = reindexed['A'] - self.assertNotIn('G', self.frame) + assert 'G' not in self.frame def test_reindex_fill_value(self): rng = bdate_range('20110110', periods=20) @@ -905,11 +940,11 @@ def test_reindex_method(self): tm.assert_sp_frame_equal(result, expected) # method='bfill' - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): sparse.reindex(columns=range(6), method='bfill') # method='ffill' - with tm.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): sparse.reindex(columns=range(6), method='ffill') def test_take(self): @@ -926,23 +961,25 @@ def _check(frame, orig): self._check_all(_check) def test_stack_sparse_frame(self): - def _check(frame): - dense_frame = frame.to_dense() # noqa + with catch_warnings(record=True): + + def _check(frame): + dense_frame = frame.to_dense() # noqa - wp = Panel.from_dict({'foo': frame}) - from_dense_lp = wp.to_frame() + wp = Panel.from_dict({'foo': frame}) + from_dense_lp = wp.to_frame() - from_sparse_lp = spf.stack_sparse_frame(frame) + from_sparse_lp = spf.stack_sparse_frame(frame) - self.assert_numpy_array_equal(from_dense_lp.values, - from_sparse_lp.values) + tm.assert_numpy_array_equal(from_dense_lp.values, + from_sparse_lp.values) - _check(self.frame) - _check(self.iframe) + _check(self.frame) + _check(self.iframe) - # for now - self.assertRaises(Exception, _check, self.zframe) - self.assertRaises(Exception, _check, self.fill_frame) + # for now + pytest.raises(Exception, _check, self.zframe) + pytest.raises(Exception, _check, self.fill_frame) def test_transpose(self): @@ -960,7 +997,6 @@ def _check(frame, orig): def test_shift(self): def _check(frame, orig): - shifted = frame.shift(0) exp = orig.shift(0) tm.assert_frame_equal(shifted.to_dense(), exp) @@ -975,12 +1011,14 @@ def _check(frame, orig): shifted = frame.shift(2, freq='B') exp = orig.shift(2, freq='B') - exp = exp.to_sparse(frame.default_fill_value) + exp = exp.to_sparse(frame.default_fill_value, + kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) shifted = frame.shift(2, freq=BDay()) exp = orig.shift(2, freq=BDay()) - exp = exp.to_sparse(frame.default_fill_value) + exp = exp.to_sparse(frame.default_fill_value, + kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) self._check_all(_check) @@ -1015,7 +1053,7 @@ def test_numpy_transpose(self): tm.assert_sp_frame_equal(result, sdf) msg = "the 'axes' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.transpose, sdf, axes=1) + tm.assert_raises_regex(ValueError, msg, np.transpose, sdf, axes=1) def test_combine_first(self): df = self.frame @@ -1053,33 +1091,38 @@ def test_sparse_pow_issue(self): df = SparseDataFrame({'A': [nan, 0, 1]}) # note that 2 ** df works fine, also df ** 1 - result = 1**df + result = 1 ** df r1 = result.take([0], 1)['A'] r2 = result['A'] - self.assertEqual(len(r2.sp_values), len(r1.sp_values)) + assert len(r2.sp_values) == len(r1.sp_values) def test_as_blocks(self): df = SparseDataFrame({'A': [1.1, 3.3], 'B': [nan, -3.9]}, dtype='float64') - df_blocks = df.blocks - self.assertEqual(list(df_blocks.keys()), ['float64']) + # deprecated 0.21.0 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df_blocks = df.blocks + assert list(df_blocks.keys()) == ['float64'] tm.assert_frame_equal(df_blocks['float64'], df) + @pytest.mark.xfail(reason='nan column names in _init_dict problematic ' + '(GH 16894)') def test_nan_columnname(self): # GH 8822 nan_colname = DataFrame(Series(1.0, index=[0]), columns=[nan]) nan_colname_sparse = nan_colname.to_sparse() - self.assertTrue(np.isnan(nan_colname_sparse.columns[0])) + assert np.isnan(nan_colname_sparse.columns[0]) - def test_isnull(self): + def test_isna(self): # GH 8276 df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan], 'B': [0, np.nan, np.nan, 2, np.nan]}) - res = df.isnull() + res = df.isna() exp = pd.SparseDataFrame({'A': [True, True, False, False, True], 'B': [False, True, True, False, True]}, default_fill_value=True) @@ -1090,18 +1133,18 @@ def test_isnull(self): df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan], 'B': [0, np.nan, 0, 2, np.nan]}, default_fill_value=0.) - res = df.isnull() - tm.assertIsInstance(res, pd.SparseDataFrame) + res = df.isna() + assert isinstance(res, pd.SparseDataFrame) exp = pd.DataFrame({'A': [False, False, False, False, True], 'B': [False, True, False, False, True]}) tm.assert_frame_equal(res.to_dense(), exp) - def test_isnotnull(self): + def test_notna(self): # GH 8276 df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan], 'B': [0, np.nan, np.nan, 2, np.nan]}) - res = df.isnotnull() + res = df.notna() exp = pd.SparseDataFrame({'A': [False, False, True, True, False], 'B': [True, False, False, True, False]}, default_fill_value=False) @@ -1112,14 +1155,14 @@ def test_isnotnull(self): df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan], 'B': [0, np.nan, 0, 2, np.nan]}, default_fill_value=0.) - res = df.isnotnull() - tm.assertIsInstance(res, pd.SparseDataFrame) + res = df.notna() + assert isinstance(res, pd.SparseDataFrame) exp = pd.DataFrame({'A': [True, True, True, True, False], 'B': [True, False, True, True, False]}) tm.assert_frame_equal(res.to_dense(), exp) -class TestSparseDataFrameArithmetic(tm.TestCase): +class TestSparseDataFrameArithmetic(object): def test_numeric_op_scalar(self): df = pd.DataFrame({'A': [nan, nan, 0, 1, ], @@ -1140,17 +1183,16 @@ def test_comparison_op_scalar(self): # comparison changes internal repr, compare with dense res = sparse > 1 - tm.assertIsInstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), df > 1) res = sparse != 0 - tm.assertIsInstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), df != 0) -class TestSparseDataFrameAnalytics(tm.TestCase): - - def setUp(self): +class TestSparseDataFrameAnalytics(object): + def setup_method(self, method): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10, dtype=float), @@ -1178,12 +1220,12 @@ def test_numpy_cumsum(self): tm.assert_sp_frame_equal(result, expected) msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - self.frame, dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + self.frame, dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - self.frame, out=result) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + self.frame, out=result) def test_numpy_func_call(self): # no exception should be raised even though @@ -1193,3 +1235,48 @@ def test_numpy_func_call(self): 'std', 'min', 'max'] for func in funcs: getattr(np, func)(self.frame) + + @pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') + def test_quantile(self): + # GH 17386 + data = [[1, 1], [2, 10], [3, 100], [nan, nan]] + q = 0.1 + + sparse_df = SparseDataFrame(data) + result = sparse_df.quantile(q) + + dense_df = DataFrame(data) + dense_expected = dense_df.quantile(q) + sparse_expected = SparseSeries(dense_expected) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + @pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') + def test_quantile_multi(self): + # GH 17386 + data = [[1, 1], [2, 10], [3, 100], [nan, nan]] + q = [0.1, 0.5] + + sparse_df = SparseDataFrame(data) + result = sparse_df.quantile(q) + + dense_df = DataFrame(data) + dense_expected = dense_df.quantile(q) + sparse_expected = SparseDataFrame(dense_expected) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) + + def test_assign_with_sparse_frame(self): + # GH 19163 + df = pd.DataFrame({"a": [1, 2, 3]}) + res = df.to_sparse(fill_value=False).assign(newcol=False) + exp = df.assign(newcol=False).to_sparse(fill_value=False) + + tm.assert_sp_frame_equal(res, exp) + + for column in res.columns: + assert type(res[column]) is SparseSeries diff --git a/pandas/tests/sparse/frame/test_indexing.py b/pandas/tests/sparse/frame/test_indexing.py new file mode 100644 index 0000000000000..1c27d44015c2b --- /dev/null +++ b/pandas/tests/sparse/frame/test_indexing.py @@ -0,0 +1,113 @@ +import pytest +import numpy as np +from pandas import SparseDataFrame, DataFrame +from pandas.util import testing as tm + + +pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)") + + +@pytest.mark.parametrize('data', [ + [[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]], + [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]], + [ + [1.0, 1.0 + 1.0j], + [2.0 + 2.0j, 2.0], + [3.0, 3.0 + 3.0j], + [4.0 + 4.0j, 4.0], + [np.nan, np.nan] + ] +]) +@pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') +def test_where_with_numeric_data(data): + # GH 17386 + lower_bound = 1.5 + + sparse = SparseDataFrame(data) + result = sparse.where(sparse > lower_bound) + + dense = DataFrame(data) + dense_expected = dense.where(dense > lower_bound) + sparse_expected = SparseDataFrame(dense_expected) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) + + +@pytest.mark.parametrize('data', [ + [[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]], + [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]], + [ + [1.0, 1.0 + 1.0j], + [2.0 + 2.0j, 2.0], + [3.0, 3.0 + 3.0j], + [4.0 + 4.0j, 4.0], + [np.nan, np.nan] + ] +]) +@pytest.mark.parametrize('other', [ + True, + -100, + 0.1, + 100.0 + 100.0j +]) +@pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') +def test_where_with_numeric_data_and_other(data, other): + # GH 17386 + lower_bound = 1.5 + + sparse = SparseDataFrame(data) + result = sparse.where(sparse > lower_bound, other) + + dense = DataFrame(data) + dense_expected = dense.where(dense > lower_bound, other) + sparse_expected = SparseDataFrame(dense_expected, + default_fill_value=other) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) + + +@pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') +def test_where_with_bool_data(): + # GH 17386 + data = [[False, False], [True, True], [False, False]] + cond = True + + sparse = SparseDataFrame(data) + result = sparse.where(sparse == cond) + + dense = DataFrame(data) + dense_expected = dense.where(dense == cond) + sparse_expected = SparseDataFrame(dense_expected) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) + + +@pytest.mark.parametrize('other', [ + True, + 0, + 0.1, + 100.0 + 100.0j +]) +@pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') +def test_where_with_bool_data_and_other(other): + # GH 17386 + data = [[False, False], [True, True], [False, False]] + cond = True + + sparse = SparseDataFrame(data) + result = sparse.where(sparse == cond, other) + + dense = DataFrame(data) + dense_expected = dense.where(dense == cond, other) + sparse_expected = SparseDataFrame(dense_expected, + default_fill_value=other) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) diff --git a/pandas/tests/sparse/frame/test_to_csv.py b/pandas/tests/sparse/frame/test_to_csv.py new file mode 100644 index 0000000000000..b0243dfde8d3f --- /dev/null +++ b/pandas/tests/sparse/frame/test_to_csv.py @@ -0,0 +1,20 @@ +import numpy as np +import pytest +from pandas import SparseDataFrame, read_csv +from pandas.util import testing as tm + + +class TestSparseDataFrameToCsv(object): + fill_values = [np.nan, 0, None, 1] + + @pytest.mark.parametrize('fill_value', fill_values) + def test_to_csv_sparse_dataframe(self, fill_value): + # GH19384 + sdf = SparseDataFrame({'a': type(self).fill_values}, + default_fill_value=fill_value) + + with tm.ensure_clean('sparse_df.csv') as path: + sdf.to_csv(path, index=False) + df = read_csv(path, skip_blank_lines=False) + + tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf) diff --git a/pandas/tests/sparse/frame/test_to_from_scipy.py b/pandas/tests/sparse/frame/test_to_from_scipy.py new file mode 100644 index 0000000000000..aef49c84fc2ad --- /dev/null +++ b/pandas/tests/sparse/frame/test_to_from_scipy.py @@ -0,0 +1,168 @@ +import pytest +import numpy as np +from warnings import catch_warnings +from pandas.util import testing as tm +from pandas import SparseDataFrame, SparseSeries +from distutils.version import LooseVersion +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_float_dtype, + is_object_dtype, + is_float) + + +scipy = pytest.importorskip('scipy') + + +@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811 +@pytest.mark.parametrize('columns', [None, list('def')]) +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) +@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) +def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): + # GH 4343 + # Make one ndarray and from it one sparse matrix, both to be used for + # constructing frames and comparing results + arr = np.eye(3, dtype=dtype) + # GH 16179 + arr[0, 1] = dtype(2) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = SparseDataFrame(spm, index=index, columns=columns, + default_fill_value=fill_value) + + # Expected result construction is kind of tricky for all + # dtype-fill_value combinations; easiest to cast to something generic + # and except later on + rarr = arr.astype(object) + rarr[arr == 0] = np.nan + expected = SparseDataFrame(rarr, index=index, columns=columns).fillna( + fill_value if fill_value is not None else np.nan) + + # Assert frame is as expected + sdf_obj = sdf.astype(object) + tm.assert_sp_frame_equal(sdf_obj, expected) + tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) + + # Assert spmatrices equal + assert dict(sdf.to_coo().todok()) == dict(spm.todok()) + + # Ensure dtype is preserved if possible + was_upcast = ((fill_value is None or is_float(fill_value)) and + not is_object_dtype(dtype) and + not is_float_dtype(dtype)) + res_dtype = (bool if is_bool_dtype(dtype) else + float if was_upcast else + dtype) + tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) + assert sdf.to_coo().dtype == res_dtype + + # However, adding a str column results in an upcast to object + sdf['strings'] = np.arange(len(sdf)).astype(str) + assert sdf.to_coo().dtype == np.object_ + + +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +def test_from_to_scipy_object(spmatrix, fill_value): + # GH 4343 + dtype = object + columns = list('cd') + index = list('ab') + + if (spmatrix is scipy.sparse.dok_matrix and LooseVersion( + scipy.__version__) >= LooseVersion('0.19.0')): + pytest.skip("dok_matrix from object does not work in SciPy >= 0.19") + + # Make one ndarray and from it one sparse matrix, both to be used for + # constructing frames and comparing results + arr = np.eye(2, dtype=dtype) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = SparseDataFrame(spm, index=index, columns=columns, + default_fill_value=fill_value) + + # Expected result construction is kind of tricky for all + # dtype-fill_value combinations; easiest to cast to something generic + # and except later on + rarr = arr.astype(object) + rarr[arr == 0] = np.nan + expected = SparseDataFrame(rarr, index=index, columns=columns).fillna( + fill_value if fill_value is not None else np.nan) + + # Assert frame is as expected + sdf_obj = sdf.astype(object) + tm.assert_sp_frame_equal(sdf_obj, expected) + tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) + + # Assert spmatrices equal + with catch_warnings(record=True): + assert dict(sdf.to_coo().todok()) == dict(spm.todok()) + + # Ensure dtype is preserved if possible + res_dtype = object + tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) + assert sdf.to_coo().dtype == res_dtype + + +def test_from_scipy_correct_ordering(spmatrix): + # GH 16179 + arr = np.arange(1, 5).reshape(2, 2) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = SparseDataFrame(spm) + expected = SparseDataFrame(arr) + tm.assert_sp_frame_equal(sdf, expected) + tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) + + +def test_from_scipy_fillna(spmatrix): + # GH 16112 + arr = np.eye(3) + arr[1:, 0] = np.nan + + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = SparseDataFrame(spm).fillna(-1.0) + + # Returning frame should fill all nan values with -1.0 + expected = SparseDataFrame({ + 0: SparseSeries([1., -1, -1]), + 1: SparseSeries([np.nan, 1, np.nan]), + 2: SparseSeries([np.nan, np.nan, 1]), + }, default_fill_value=-1) + + # fill_value is expected to be what .fillna() above was called with + # We don't use -1 as initial fill_value in expected SparseSeries + # construction because this way we obtain "compressed" SparseArrays, + # avoiding having to construct them ourselves + for col in expected: + expected[col].fill_value = -1 + + tm.assert_sp_frame_equal(sdf, expected) diff --git a/pandas/tests/sparse/series/__init__.py b/pandas/tests/sparse/series/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/sparse/series/test_indexing.py b/pandas/tests/sparse/series/test_indexing.py new file mode 100644 index 0000000000000..de01b065a9fa0 --- /dev/null +++ b/pandas/tests/sparse/series/test_indexing.py @@ -0,0 +1,113 @@ +import pytest +import numpy as np +from pandas import SparseSeries, Series +from pandas.util import testing as tm + + +pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)") + + +@pytest.mark.parametrize('data', [ + [1, 1, 2, 2, 3, 3, 4, 4, 0, 0], + [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan], + [ + 1.0, 1.0 + 1.0j, + 2.0 + 2.0j, 2.0, + 3.0, 3.0 + 3.0j, + 4.0 + 4.0j, 4.0, + np.nan, np.nan + ] +]) +@pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') +def test_where_with_numeric_data(data): + # GH 17386 + lower_bound = 1.5 + + sparse = SparseSeries(data) + result = sparse.where(sparse > lower_bound) + + dense = Series(data) + dense_expected = dense.where(dense > lower_bound) + sparse_expected = SparseSeries(dense_expected) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + +@pytest.mark.parametrize('data', [ + [1, 1, 2, 2, 3, 3, 4, 4, 0, 0], + [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan], + [ + 1.0, 1.0 + 1.0j, + 2.0 + 2.0j, 2.0, + 3.0, 3.0 + 3.0j, + 4.0 + 4.0j, 4.0, + np.nan, np.nan + ] +]) +@pytest.mark.parametrize('other', [ + True, + -100, + 0.1, + 100.0 + 100.0j +]) +@pytest.mark.skip(reason='Wrong SparseBlock initialization ' + '(Segfault) ' + '(GH 17386)') +def test_where_with_numeric_data_and_other(data, other): + # GH 17386 + lower_bound = 1.5 + + sparse = SparseSeries(data) + result = sparse.where(sparse > lower_bound, other) + + dense = Series(data) + dense_expected = dense.where(dense > lower_bound, other) + sparse_expected = SparseSeries(dense_expected, fill_value=other) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + +@pytest.mark.xfail(reason='Wrong SparseBlock initialization ' + '(GH 17386)') +def test_where_with_bool_data(): + # GH 17386 + data = [False, False, True, True, False, False] + cond = True + + sparse = SparseSeries(data) + result = sparse.where(sparse == cond) + + dense = Series(data) + dense_expected = dense.where(dense == cond) + sparse_expected = SparseSeries(dense_expected) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + +@pytest.mark.parametrize('other', [ + True, + 0, + 0.1, + 100.0 + 100.0j +]) +@pytest.mark.skip(reason='Wrong SparseBlock initialization ' + '(Segfault) ' + '(GH 17386)') +def test_where_with_bool_data_and_other(other): + # GH 17386 + data = [False, False, True, True, False, False] + cond = True + + sparse = SparseSeries(data) + result = sparse.where(sparse == cond, other) + + dense = Series(data) + dense_expected = dense.where(dense == cond, other) + sparse_expected = SparseSeries(dense_expected, fill_value=other) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) diff --git a/pandas/sparse/tests/test_series.py b/pandas/tests/sparse/series/test_series.py similarity index 80% rename from pandas/sparse/tests/test_series.py rename to pandas/tests/sparse/series/test_series.py index d4543b97af4dd..eb63c87820070 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -1,24 +1,29 @@ # pylint: disable-msg=E1101,W0612 import operator +from datetime import datetime + +import pytest from numpy import nan import numpy as np import pandas as pd -from pandas import Series, DataFrame, bdate_range -from pandas.core.common import isnull +from pandas import (Series, DataFrame, bdate_range, + isna, compat, _np_version_under1p12) from pandas.tseries.offsets import BDay import pandas.util.testing as tm -from pandas.compat import range -from pandas import compat -from pandas.tools.util import cartesian_product +import pandas.util._test_decorators as td +from pandas.compat import range, PY36 +from pandas.core.reshape.util import cartesian_product + +import pandas.core.sparse.frame as spf -import pandas.sparse.frame as spf +from pandas._libs.sparse import BlockIndex, IntIndex +from pandas.core.sparse.api import SparseSeries +from pandas.tests.series.test_api import SharedWithSparse -from pandas._sparse import BlockIndex, IntIndex -from pandas.sparse.api import SparseSeries -from pandas.tests.series.test_misc_api import SharedWithSparse +from itertools import product def _test_data1(): @@ -55,9 +60,13 @@ def _test_data2_zero(): return arr, index -class TestSparseSeries(tm.TestCase, SharedWithSparse): +class TestSparseSeries(SharedWithSparse): - def setUp(self): + series_klass = SparseSeries + # SharedWithSparse tests use generic, series_klass-agnostic assertion + _assert_series_equal = staticmethod(tm.assert_sp_series_equal) + + def setup_method(self, method): arr, index = _test_data1() date_index = bdate_range('1/1/2011', periods=len(index)) @@ -87,37 +96,61 @@ def setUp(self): self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0) + def test_constructor_dict_input(self): + # gh-16905 + constructor_dict = {1: 1.} + index = [0, 1, 2] + + # Series with index passed in + series = pd.Series(constructor_dict) + expected = SparseSeries(series, index=index) + + result = SparseSeries(constructor_dict, index=index) + tm.assert_sp_series_equal(result, expected) + + # Series with index and dictionary with no index + expected = SparseSeries(series) + + result = SparseSeries(constructor_dict) + tm.assert_sp_series_equal(result, expected) + + def test_constructor_dict_order(self): + # GH19018 + # initialization ordering: by insertion order if python>= 3.6, else + # order by value + d = {'b': 1, 'a': 0, 'c': 2} + result = SparseSeries(d) + if PY36: + expected = SparseSeries([1, 0, 2], index=list('bac')) + else: + expected = SparseSeries([0, 1, 2], index=list('abc')) + tm.assert_sp_series_equal(result, expected) + def test_constructor_dtype(self): arr = SparseSeries([np.nan, 1, 2, np.nan]) - self.assertEqual(arr.dtype, np.float64) - self.assertTrue(np.isnan(arr.fill_value)) + assert arr.dtype == np.float64 + assert np.isnan(arr.fill_value) arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0) - self.assertEqual(arr.dtype, np.float64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.float64 + assert arr.fill_value == 0 arr = SparseSeries([0, 1, 2, 4], dtype=np.int64, fill_value=np.nan) - self.assertEqual(arr.dtype, np.int64) - self.assertTrue(np.isnan(arr.fill_value)) + assert arr.dtype == np.int64 + assert np.isnan(arr.fill_value) arr = SparseSeries([0, 1, 2, 4], dtype=np.int64) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 def test_iteration_and_str(self): [x for x in self.bseries] str(self.bseries) - def test_TimeSeries_deprecation(self): - - # deprecation TimeSeries, #10890 - with tm.assert_produces_warning(FutureWarning): - pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3)) - def test_construct_DataFrame_with_sp_series(self): # it works! df = DataFrame({'col': self.bseries}) @@ -140,12 +173,12 @@ def test_construct_DataFrame_with_sp_series(self): def test_constructor_preserve_attr(self): arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 s = pd.SparseSeries(arr, name='x') - self.assertEqual(s.dtype, np.int64) - self.assertEqual(s.fill_value, 0) + assert s.dtype == np.int64 + assert s.fill_value == 0 def test_series_density(self): # GH2803 @@ -153,7 +186,7 @@ def test_series_density(self): ts[2:-2] = nan sts = ts.to_sparse() density = sts.density # don't die - self.assertEqual(density, 4 / 10.0) + assert density == 4 / 10.0 def test_sparse_to_dense(self): arr, index = _test_data1() @@ -208,12 +241,12 @@ def test_dense_to_sparse(self): iseries = series.to_sparse(kind='integer') tm.assert_sp_series_equal(bseries, self.bseries) tm.assert_sp_series_equal(iseries, self.iseries, check_names=False) - self.assertEqual(iseries.name, self.bseries.name) + assert iseries.name == self.bseries.name - self.assertEqual(len(series), len(bseries)) - self.assertEqual(len(series), len(iseries)) - self.assertEqual(series.shape, bseries.shape) - self.assertEqual(series.shape, iseries.shape) + assert len(series) == len(bseries) + assert len(series) == len(iseries) + assert series.shape == bseries.shape + assert series.shape == iseries.shape # non-NaN fill value series = self.zbseries.to_dense() @@ -221,26 +254,26 @@ def test_dense_to_sparse(self): ziseries = series.to_sparse(kind='integer', fill_value=0) tm.assert_sp_series_equal(zbseries, self.zbseries) tm.assert_sp_series_equal(ziseries, self.ziseries, check_names=False) - self.assertEqual(ziseries.name, self.zbseries.name) + assert ziseries.name == self.zbseries.name - self.assertEqual(len(series), len(zbseries)) - self.assertEqual(len(series), len(ziseries)) - self.assertEqual(series.shape, zbseries.shape) - self.assertEqual(series.shape, ziseries.shape) + assert len(series) == len(zbseries) + assert len(series) == len(ziseries) + assert series.shape == zbseries.shape + assert series.shape == ziseries.shape def test_to_dense_preserve_name(self): assert (self.bseries.name is not None) result = self.bseries.to_dense() - self.assertEqual(result.name, self.bseries.name) + assert result.name == self.bseries.name def test_constructor(self): # test setup guys - self.assertTrue(np.isnan(self.bseries.fill_value)) - tm.assertIsInstance(self.bseries.sp_index, BlockIndex) - self.assertTrue(np.isnan(self.iseries.fill_value)) - tm.assertIsInstance(self.iseries.sp_index, IntIndex) + assert np.isnan(self.bseries.fill_value) + assert isinstance(self.bseries.sp_index, BlockIndex) + assert np.isnan(self.iseries.fill_value) + assert isinstance(self.iseries.sp_index, IntIndex) - self.assertEqual(self.zbseries.fill_value, 0) + assert self.zbseries.fill_value == 0 tm.assert_numpy_array_equal(self.zbseries.values.values, self.bseries.to_dense().fillna(0).values) @@ -249,13 +282,13 @@ def _check_const(sparse, name): # use passed series name result = SparseSeries(sparse) tm.assert_sp_series_equal(result, sparse) - self.assertEqual(sparse.name, name) - self.assertEqual(result.name, name) + assert sparse.name == name + assert result.name == name # use passed name result = SparseSeries(sparse, name='x') tm.assert_sp_series_equal(result, sparse, check_names=False) - self.assertEqual(result.name, 'x') + assert result.name == 'x' _check_const(self.bseries, 'bseries') _check_const(self.iseries, 'iseries') @@ -264,7 +297,7 @@ def _check_const(sparse, name): # Sparse time series works date_index = bdate_range('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) - tm.assertIsInstance(s5, SparseSeries) + assert isinstance(s5, SparseSeries) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) @@ -276,31 +309,31 @@ def _check_const(sparse, name): values = np.ones(self.bseries.npoints) sp = SparseSeries(values, sparse_index=self.bseries.sp_index) sp.sp_values[:5] = 97 - self.assertEqual(values[0], 97) + assert values[0] == 97 - self.assertEqual(len(sp), 20) - self.assertEqual(sp.shape, (20, )) + assert len(sp) == 20 + assert sp.shape == (20, ) # but can make it copy! sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True) sp.sp_values[:5] = 100 - self.assertEqual(values[0], 97) + assert values[0] == 97 - self.assertEqual(len(sp), 20) - self.assertEqual(sp.shape, (20, )) + assert len(sp) == 20 + assert sp.shape == (20, ) def test_constructor_scalar(self): data = 5 sp = SparseSeries(data, np.arange(100)) sp = sp.reindex(np.arange(200)) - self.assertTrue((sp.loc[:99] == data).all()) - self.assertTrue(isnull(sp.loc[100:]).all()) + assert (sp.loc[:99] == data).all() + assert isna(sp.loc[100:]).all() data = np.nan sp = SparseSeries(data, np.arange(100)) - self.assertEqual(len(sp), 100) - self.assertEqual(sp.shape, (100, )) + assert len(sp) == 100 + assert sp.shape == (100, ) def test_constructor_ndarray(self): pass @@ -309,20 +342,20 @@ def test_constructor_nonnan(self): arr = [0, 0, 0, nan, nan] sp_series = SparseSeries(arr, fill_value=0) tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr)) - self.assertEqual(len(sp_series), 5) - self.assertEqual(sp_series.shape, (5, )) + assert len(sp_series) == 5 + assert sp_series.shape == (5, ) - # GH 9272 def test_constructor_empty(self): + # see gh-9272 sp = SparseSeries() - self.assertEqual(len(sp.index), 0) - self.assertEqual(sp.shape, (0, )) + assert len(sp.index) == 0 + assert sp.shape == (0, ) def test_copy_astype(self): cop = self.bseries.astype(np.float64) - self.assertIsNot(cop, self.bseries) - self.assertIs(cop.sp_index, self.bseries.sp_index) - self.assertEqual(cop.dtype, np.float64) + assert cop is not self.bseries + assert cop.sp_index is self.bseries.sp_index + assert cop.dtype == np.float64 cop2 = self.iseries.copy() @@ -331,8 +364,8 @@ def test_copy_astype(self): # test that data is copied cop[:5] = 97 - self.assertEqual(cop.sp_values[0], 97) - self.assertNotEqual(self.bseries.sp_values[0], 97) + assert cop.sp_values[0] == 97 + assert self.bseries.sp_values[0] != 97 # correct fill value zbcop = self.zbseries.copy() @@ -344,22 +377,22 @@ def test_copy_astype(self): # no deep copy view = self.bseries.copy(deep=False) view.sp_values[:5] = 5 - self.assertTrue((self.bseries.sp_values[:5] == 5).all()) + assert (self.bseries.sp_values[:5] == 5).all() def test_shape(self): - # GH 10452 - self.assertEqual(self.bseries.shape, (20, )) - self.assertEqual(self.btseries.shape, (20, )) - self.assertEqual(self.iseries.shape, (20, )) + # see gh-10452 + assert self.bseries.shape == (20, ) + assert self.btseries.shape == (20, ) + assert self.iseries.shape == (20, ) - self.assertEqual(self.bseries2.shape, (15, )) - self.assertEqual(self.iseries2.shape, (15, )) + assert self.bseries2.shape == (15, ) + assert self.iseries2.shape == (15, ) - self.assertEqual(self.zbseries2.shape, (15, )) - self.assertEqual(self.ziseries2.shape, (15, )) + assert self.zbseries2.shape == (15, ) + assert self.ziseries2.shape == (15, ) def test_astype(self): - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): self.bseries.astype(np.int64) def test_astype_all(self): @@ -370,12 +403,12 @@ def test_astype_all(self): np.int32, np.int16, np.int8] for typ in types: res = s.astype(typ) - self.assertEqual(res.dtype, typ) + assert res.dtype == typ tm.assert_series_equal(res.to_dense(), orig.astype(typ)) def test_kind(self): - self.assertEqual(self.bseries.kind, 'block') - self.assertEqual(self.iseries.kind, 'integer') + assert self.bseries.kind == 'block' + assert self.iseries.kind == 'integer' def test_to_frame(self): # GH 9850 @@ -396,7 +429,7 @@ def test_to_frame(self): def test_pickle(self): def _test_roundtrip(series): - unpickled = self.round_trip_pickle(series) + unpickled = tm.round_trip_pickle(series) tm.assert_sp_series_equal(series, unpickled) tm.assert_series_equal(series.to_dense(), unpickled.to_dense()) @@ -431,44 +464,51 @@ def _check_getitem(sp, dense): _check_getitem(self.ziseries, self.ziseries.to_dense()) # exception handling - self.assertRaises(Exception, self.bseries.__getitem__, - len(self.bseries) + 1) + pytest.raises(Exception, self.bseries.__getitem__, + len(self.bseries) + 1) # index not contained - self.assertRaises(Exception, self.btseries.__getitem__, - self.btseries.index[-1] + BDay()) + pytest.raises(Exception, self.btseries.__getitem__, + self.btseries.index[-1] + BDay()) def test_get_get_value(self): tm.assert_almost_equal(self.bseries.get(10), self.bseries[10]) - self.assertIsNone(self.bseries.get(len(self.bseries) + 1)) + assert self.bseries.get(len(self.bseries) + 1) is None dt = self.btseries.index[10] result = self.btseries.get(dt) expected = self.btseries.to_dense()[dt] tm.assert_almost_equal(result, expected) - tm.assert_almost_equal(self.bseries.get_value(10), self.bseries[10]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + tm.assert_almost_equal( + self.bseries.get_value(10), self.bseries[10]) def test_set_value(self): idx = self.btseries.index[7] - self.btseries.set_value(idx, 0) - self.assertEqual(self.btseries[idx], 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.btseries.set_value(idx, 0) + assert self.btseries[idx] == 0 - self.iseries.set_value('foobar', 0) - self.assertEqual(self.iseries.index[-1], 'foobar') - self.assertEqual(self.iseries['foobar'], 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.iseries.set_value('foobar', 0) + assert self.iseries.index[-1] == 'foobar' + assert self.iseries['foobar'] == 0 def test_getitem_slice(self): idx = self.bseries.index res = self.bseries[::2] - tm.assertIsInstance(res, SparseSeries) + assert isinstance(res, SparseSeries) expected = self.bseries.reindex(idx[::2]) tm.assert_sp_series_equal(res, expected) res = self.bseries[:5] - tm.assertIsInstance(res, SparseSeries) + assert isinstance(res, SparseSeries) tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) res = self.bseries[5:] @@ -485,7 +525,7 @@ def _compare_with_dense(sp): def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) - self.assertIsInstance(sparse_result, SparseSeries) + assert isinstance(sparse_result, SparseSeries) tm.assert_almost_equal(dense_result, sparse_result.values.values) @@ -495,32 +535,39 @@ def _compare(idx): self._check_all(_compare_with_dense) - self.assertRaises(Exception, self.bseries.take, - [0, len(self.bseries) + 1]) + pytest.raises(Exception, self.bseries.take, + [0, len(self.bseries) + 1]) # Corner case sp = SparseSeries(np.ones(10) * nan) exp = pd.Series(np.repeat(nan, 5)) tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp) + with tm.assert_produces_warning(FutureWarning): + sp.take([1, 5], convert=True) + + with tm.assert_produces_warning(FutureWarning): + sp.take([1, 5], convert=False) + def test_numpy_take(self): sp = SparseSeries([1.0, 2.0, 3.0]) indices = [1, 2] - tm.assert_series_equal(np.take(sp, indices, axis=0).to_dense(), - np.take(sp.to_dense(), indices, axis=0)) + if not _np_version_under1p12: + tm.assert_series_equal(np.take(sp, indices, axis=0).to_dense(), + np.take(sp.to_dense(), indices, axis=0)) - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.take, - sp, indices, out=np.empty(sp.shape)) + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.take, + sp, indices, out=np.empty(sp.shape)) - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.take, - sp, indices, mode='clip') + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.take, + sp, indices, out=None, mode='clip') def test_setitem(self): self.bseries[5] = 7. - self.assertEqual(self.bseries[5], 7.) + assert self.bseries[5] == 7. def test_setslice(self): self.bseries[5:10] = 7. @@ -597,30 +644,30 @@ def test_abs(self): expected = SparseSeries([1, 2, 3], name='x') result = s.abs() tm.assert_sp_series_equal(result, expected) - self.assertEqual(result.name, 'x') + assert result.name == 'x' result = abs(s) tm.assert_sp_series_equal(result, expected) - self.assertEqual(result.name, 'x') + assert result.name == 'x' result = np.abs(s) tm.assert_sp_series_equal(result, expected) - self.assertEqual(result.name, 'x') + assert result.name == 'x' s = SparseSeries([1, -2, 2, -3], fill_value=-2, name='x') expected = SparseSeries([1, 2, 3], sparse_index=s.sp_index, fill_value=2, name='x') result = s.abs() tm.assert_sp_series_equal(result, expected) - self.assertEqual(result.name, 'x') + assert result.name == 'x' result = abs(s) tm.assert_sp_series_equal(result, expected) - self.assertEqual(result.name, 'x') + assert result.name == 'x' result = np.abs(s) tm.assert_sp_series_equal(result, expected) - self.assertEqual(result.name, 'x') + assert result.name == 'x' def test_reindex(self): def _compare_with_series(sps, new_index): @@ -645,7 +692,7 @@ def _compare_with_series(sps, new_index): # special cases same_index = self.bseries.reindex(self.bseries.index) tm.assert_sp_series_equal(self.bseries, same_index) - self.assertIsNot(same_index, self.bseries) + assert same_index is not self.bseries # corner cases sp = SparseSeries([], index=[]) @@ -656,7 +703,7 @@ def _compare_with_series(sps, new_index): # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1. - self.assertTrue((self.bseries.sp_values != 1.).all()) + assert (self.bseries.sp_values != 1.).all() reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1. @@ -669,7 +716,7 @@ def _check(values, index1, index2, fill_value): first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value) reindexed = first_series.sparse_reindex(index2) - self.assertIs(reindexed.sp_index, index2) + assert reindexed.sp_index is index2 int_indices1 = index1.to_int_index().indices int_indices2 = index2.to_int_index().indices @@ -708,8 +755,8 @@ def _check_all(values, first, second): first_series = SparseSeries(values1, sparse_index=IntIndex(length, index1), fill_value=nan) - with tm.assertRaisesRegexp(TypeError, - 'new index must be a SparseIndex'): + with tm.assert_raises_regex(TypeError, + 'new index must be a SparseIndex'): reindexed = first_series.sparse_reindex(0) # noqa def test_repr(self): @@ -734,7 +781,7 @@ def _compare_with_dense(obj, op): sparse_result = getattr(obj, op)() series = obj.to_dense() dense_result = getattr(series, op)() - self.assertEqual(sparse_result, dense_result) + assert sparse_result == dense_result to_compare = ['count', 'sum', 'mean', 'std', 'var', 'skew'] @@ -764,18 +811,18 @@ def _compare_all(obj): def test_dropna(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) - sp_valid = sp.valid() + sp_valid = sp.dropna() - expected = sp.to_dense().valid() + expected = sp.to_dense().dropna() expected = expected[expected != 0] exp_arr = pd.SparseArray(expected.values, fill_value=0, kind='block') tm.assert_sp_array_equal(sp_valid.values, exp_arr) - self.assert_index_equal(sp_valid.index, expected.index) - self.assertEqual(len(sp_valid.sp_values), 2) + tm.assert_index_equal(sp_valid.index, expected.index) + assert len(sp_valid.sp_values) == 2 result = self.bseries.dropna() expected = self.bseries.to_dense().dropna() - self.assertNotIsInstance(result, SparseSeries) + assert not isinstance(result, SparseSeries) tm.assert_series_equal(result, expected) def test_homogenize(self): @@ -802,7 +849,7 @@ def _check_matches(indices, expected): # must have NaN fill value data = {'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)} - with tm.assertRaisesRegexp(TypeError, "NaN fill value"): + with tm.assert_raises_regex(TypeError, "NaN fill value"): spf.homogenize(data) def test_fill_value_corner(self): @@ -810,13 +857,13 @@ def test_fill_value_corner(self): cop.fill_value = 0 result = self.bseries / cop - self.assertTrue(np.isnan(result.fill_value)) + assert np.isnan(result.fill_value) cop2 = self.zbseries.copy() cop2.fill_value = 1 result = cop2 / cop # 1 / 0 is inf - self.assertTrue(np.isinf(result.fill_value)) + assert np.isinf(result.fill_value) def test_fill_value_when_combine_const(self): # GH12723 @@ -824,13 +871,13 @@ def test_fill_value_when_combine_const(self): exp = s.fillna(0).add(2) res = s.add(2, fill_value=0) - self.assert_series_equal(res, exp) + tm.assert_series_equal(res, exp) def test_shift(self): series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) - self.assertIsNot(shifted, series) + assert shifted is not series tm.assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) @@ -938,10 +985,21 @@ def test_combine_first(self): tm.assert_sp_series_equal(result, result2) tm.assert_sp_series_equal(result, expected) + @pytest.mark.parametrize('deep,fill_values', [([True, False], + [0, 1, np.nan, None])]) + def test_memory_usage_deep(self, deep, fill_values): + for deep, fill_value in product(deep, fill_values): + sparse_series = SparseSeries(fill_values, fill_value=fill_value) + dense_series = Series(fill_values) + sparse_usage = sparse_series.memory_usage(deep=deep) + dense_usage = dense_series.memory_usage(deep=deep) + + assert sparse_usage < dense_usage + -class TestSparseHandlingMultiIndexes(tm.TestCase): +class TestSparseHandlingMultiIndexes(object): - def setUp(self): + def setup_method(self, method): miindex = pd.MultiIndex.from_product( [["x", "y"], ["10", "20"]], names=['row-foo', 'row-bar']) micol = pd.MultiIndex.from_product( @@ -965,11 +1023,11 @@ def test_round_trip_preserve_multiindex_names(self): check_names=True) -class TestSparseSeriesScipyInteraction(tm.TestCase): +@td.skip_if_no_scipy +class TestSparseSeriesScipyInteraction(object): # Issue 8048: add SparseSeries coo methods - def setUp(self): - tm._skip_if_no_scipy() + def setup_method(self, method): import scipy.sparse # SparseSeries inputs used in tests, the tests rely on the order self.sparse_series = [] @@ -1042,25 +1100,25 @@ def test_to_coo_text_names_text_row_levels_nosort(self): def test_to_coo_bad_partition_nonnull_intersection(self): ss = self.sparse_series[0] - self.assertRaises(ValueError, ss.to_coo, ['A', 'B', 'C'], ['C', 'D']) + pytest.raises(ValueError, ss.to_coo, ['A', 'B', 'C'], ['C', 'D']) def test_to_coo_bad_partition_small_union(self): ss = self.sparse_series[0] - self.assertRaises(ValueError, ss.to_coo, ['A'], ['C', 'D']) + pytest.raises(ValueError, ss.to_coo, ['A'], ['C', 'D']) def test_to_coo_nlevels_less_than_two(self): ss = self.sparse_series[0] ss.index = np.arange(len(ss.index)) - self.assertRaises(ValueError, ss.to_coo) + pytest.raises(ValueError, ss.to_coo) def test_to_coo_bad_ilevel(self): ss = self.sparse_series[0] - self.assertRaises(KeyError, ss.to_coo, ['A', 'B'], ['C', 'D', 'E']) + pytest.raises(KeyError, ss.to_coo, ['A', 'B'], ['C', 'D', 'E']) def test_to_coo_duplicate_index_entries(self): ss = pd.concat([self.sparse_series[0], self.sparse_series[0]]).to_sparse() - self.assertRaises(ValueError, ss.to_coo, ['A', 'B'], ['C', 'D']) + pytest.raises(ValueError, ss.to_coo, ['A', 'B'], ['C', 'D']) def test_from_coo_dense_index(self): ss = SparseSeries.from_coo(self.coo_matrices[0], dense_index=True) @@ -1076,7 +1134,6 @@ def test_from_coo_nodense_index(self): def test_from_coo_long_repr(self): # GH 13114 # test it doesn't raise error. Formatting is tested in test_format - tm._skip_if_no_scipy() import scipy.sparse sparse = SparseSeries.from_coo(scipy.sparse.rand(350, 18)) @@ -1102,8 +1159,8 @@ def _check_results_to_coo(self, results, check): # or compare directly as difference of sparse # assert(abs(A - A_result).max() < 1e-12) # max is failing in python # 2.6 - self.assertEqual(il, il_result) - self.assertEqual(jl, jl_result) + assert il == il_result + assert jl == jl_result def test_concat(self): val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) @@ -1167,7 +1224,7 @@ def test_concat_axis1_different_fill(self): res = pd.concat([sparse1, sparse2], axis=1) exp = pd.concat([pd.Series(val1, name='x'), pd.Series(val2, name='y')], axis=1) - self.assertIsInstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) def test_concat_different_kind(self): @@ -1273,11 +1330,11 @@ def test_value_counts_int(self): tm.assert_series_equal(sparse.value_counts(dropna=False), dense.value_counts(dropna=False)) - def test_isnull(self): + def test_isna(self): # GH 8276 s = pd.SparseSeries([np.nan, np.nan, 1, 2, np.nan], name='xxx') - res = s.isnull() + res = s.isna() exp = pd.SparseSeries([True, True, False, False, True], name='xxx', fill_value=True) tm.assert_sp_series_equal(res, exp) @@ -1285,16 +1342,16 @@ def test_isnull(self): # if fill_value is not nan, True can be included in sp_values s = pd.SparseSeries([np.nan, 0., 1., 2., 0.], name='xxx', fill_value=0.) - res = s.isnull() - tm.assertIsInstance(res, pd.SparseSeries) + res = s.isna() + assert isinstance(res, pd.SparseSeries) exp = pd.Series([True, False, False, False, False], name='xxx') tm.assert_series_equal(res.to_dense(), exp) - def test_isnotnull(self): + def test_notna(self): # GH 8276 s = pd.SparseSeries([np.nan, np.nan, 1, 2, np.nan], name='xxx') - res = s.isnotnull() + res = s.notna() exp = pd.SparseSeries([False, False, True, True, False], name='xxx', fill_value=False) tm.assert_sp_series_equal(res, exp) @@ -1302,8 +1359,8 @@ def test_isnotnull(self): # if fill_value is not nan, True can be included in sp_values s = pd.SparseSeries([np.nan, 0., 1., 2., 0.], name='xxx', fill_value=0.) - res = s.isnotnull() - tm.assertIsInstance(res, pd.SparseSeries) + res = s.notna() + assert isinstance(res, pd.SparseSeries) exp = pd.Series([False, True, True, True, True], name='xxx') tm.assert_series_equal(res.to_dense(), exp) @@ -1315,9 +1372,9 @@ def _dense_series_compare(s, f): tm.assert_series_equal(result.to_dense(), dense_result) -class TestSparseSeriesAnalytics(tm.TestCase): +class TestSparseSeriesAnalytics(object): - def setUp(self): + def setup_method(self, method): arr, index = _test_data1() self.bseries = SparseSeries(arr, index=index, kind='block', name='bseries') @@ -1337,7 +1394,7 @@ def test_cumsum(self): axis = 1 # Series is 1-D, so only axis = 0 is valid. msg = "No axis named {axis}".format(axis=axis) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.bseries.cumsum(axis=axis) def test_numpy_cumsum(self): @@ -1350,19 +1407,54 @@ def test_numpy_cumsum(self): tm.assert_series_equal(result, expected) msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - self.bseries, dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + self.bseries, dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - self.zbseries, out=result) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + self.zbseries, out=result) def test_numpy_func_call(self): # no exception should be raised even though # numpy passes in 'axis=None' or `axis=-1' funcs = ['sum', 'cumsum', 'var', 'mean', 'prod', 'cumprod', 'std', 'argsort', - 'argmin', 'argmax', 'min', 'max'] + 'min', 'max'] for func in funcs: for series in ('bseries', 'zbseries'): getattr(np, func)(getattr(self, series)) + + def test_deprecated_numpy_func_call(self): + # NOTE: These should be add to the 'test_numpy_func_call' test above + # once the behavior of argmin/argmax is corrected. + funcs = ['argmin', 'argmax'] + for func in funcs: + for series in ('bseries', 'zbseries'): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + getattr(np, func)(getattr(self, series)) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + getattr(getattr(self, series), func)() + + def test_deprecated_reindex_axis(self): + # https://github.com/pandas-dev/pandas/issues/17833 + with tm.assert_produces_warning(FutureWarning) as m: + self.bseries.reindex_axis([0, 1, 2]) + assert 'reindex' in str(m[0].message) + + +@pytest.mark.parametrize( + 'datetime_type', (np.datetime64, + pd.Timestamp, + lambda x: datetime.strptime(x, '%Y-%m-%d'))) +def test_constructor_dict_datetime64_index(datetime_type): + # GH 9456 + dates = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] + values = [42544017.198965244, 1234565, 40512335.181958228, -1] + + result = SparseSeries(dict(zip(map(datetime_type, dates), values))) + expected = SparseSeries(values, map(pd.Timestamp, dates)) + + tm.assert_sp_series_equal(result, expected) diff --git a/pandas/sparse/tests/test_arithmetics.py b/pandas/tests/sparse/test_arithmetics.py similarity index 95% rename from pandas/sparse/tests/test_arithmetics.py rename to pandas/tests/sparse/test_arithmetics.py index eb926082a7b7c..f023cd0003910 100644 --- a/pandas/sparse/tests/test_arithmetics.py +++ b/pandas/tests/sparse/test_arithmetics.py @@ -3,7 +3,7 @@ import pandas.util.testing as tm -class TestSparseArrayArithmetics(tm.TestCase): +class TestSparseArrayArithmetics(object): _base = np.array _klass = pd.SparseArray @@ -67,9 +67,9 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense): self._assert((b_dense ** a).to_dense(), b_dense ** a_dense) def _check_bool_result(self, res): - tm.assertIsInstance(res, self._klass) - self.assertEqual(res.dtype, np.bool) - self.assertIsInstance(res.fill_value, bool) + assert isinstance(res, self._klass) + assert res.dtype == np.bool + assert isinstance(res.fill_value, bool) def _check_comparison_ops(self, a, b, a_dense, b_dense): with np.errstate(invalid='ignore'): @@ -274,30 +274,30 @@ def test_int_array(self): for kind in ['integer', 'block']: a = self._klass(values, dtype=dtype, kind=kind) - self.assertEqual(a.dtype, dtype) + assert a.dtype == dtype b = self._klass(rvalues, dtype=dtype, kind=kind) - self.assertEqual(b.dtype, dtype) + assert b.dtype == dtype self._check_numeric_ops(a, b, values, rvalues) self._check_numeric_ops(a, b * 0, values, rvalues * 0) a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) - self.assertEqual(a.dtype, dtype) + assert a.dtype == dtype b = self._klass(rvalues, dtype=dtype, kind=kind) - self.assertEqual(b.dtype, dtype) + assert b.dtype == dtype self._check_numeric_ops(a, b, values, rvalues) a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) - self.assertEqual(a.dtype, dtype) + assert a.dtype == dtype b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind) - self.assertEqual(b.dtype, dtype) + assert b.dtype == dtype self._check_numeric_ops(a, b, values, rvalues) a = self._klass(values, fill_value=1, dtype=dtype, kind=kind) - self.assertEqual(a.dtype, dtype) + assert a.dtype == dtype b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind) - self.assertEqual(b.dtype, dtype) + assert b.dtype == dtype self._check_numeric_ops(a, b, values, rvalues) def test_int_array_comparison(self): @@ -364,24 +364,24 @@ def test_mixed_array_float_int(self): for kind in ['integer', 'block']: a = self._klass(values, kind=kind) b = self._klass(rvalues, kind=kind) - self.assertEqual(b.dtype, rdtype) + assert b.dtype == rdtype self._check_numeric_ops(a, b, values, rvalues) self._check_numeric_ops(a, b * 0, values, rvalues * 0) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind) - self.assertEqual(b.dtype, rdtype) + assert b.dtype == rdtype self._check_numeric_ops(a, b, values, rvalues) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind, fill_value=0) - self.assertEqual(b.dtype, rdtype) + assert b.dtype == rdtype self._check_numeric_ops(a, b, values, rvalues) a = self._klass(values, kind=kind, fill_value=1) b = self._klass(rvalues, kind=kind, fill_value=2) - self.assertEqual(b.dtype, rdtype) + assert b.dtype == rdtype self._check_numeric_ops(a, b, values, rvalues) def test_mixed_array_comparison(self): @@ -394,24 +394,24 @@ def test_mixed_array_comparison(self): for kind in ['integer', 'block']: a = self._klass(values, kind=kind) b = self._klass(rvalues, kind=kind) - self.assertEqual(b.dtype, rdtype) + assert b.dtype == rdtype self._check_comparison_ops(a, b, values, rvalues) self._check_comparison_ops(a, b * 0, values, rvalues * 0) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind) - self.assertEqual(b.dtype, rdtype) + assert b.dtype == rdtype self._check_comparison_ops(a, b, values, rvalues) a = self._klass(values, kind=kind, fill_value=0) b = self._klass(rvalues, kind=kind, fill_value=0) - self.assertEqual(b.dtype, rdtype) + assert b.dtype == rdtype self._check_comparison_ops(a, b, values, rvalues) a = self._klass(values, kind=kind, fill_value=1) b = self._klass(rvalues, kind=kind, fill_value=2) - self.assertEqual(b.dtype, rdtype) + assert b.dtype == rdtype self._check_comparison_ops(a, b, values, rvalues) diff --git a/pandas/sparse/tests/test_array.py b/pandas/tests/sparse/test_array.py similarity index 71% rename from pandas/sparse/tests/test_array.py rename to pandas/tests/sparse/test_array.py index 70aaea5b5b1f0..6c0c83cf65ff7 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -1,107 +1,132 @@ from pandas.compat import range + import re import operator +import pytest import warnings from numpy import nan import numpy as np -from pandas import _np_version_under1p8 -from pandas.sparse.api import SparseArray, SparseSeries -from pandas._sparse import IntIndex -from pandas.util.testing import assert_almost_equal, assertRaisesRegexp +from pandas.core.sparse.api import SparseArray, SparseSeries +from pandas._libs.sparse import IntIndex +from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm -class TestSparseArray(tm.TestCase): +class TestSparseArray(object): - def setUp(self): + def setup_method(self, method): self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) self.arr = SparseArray(self.arr_data) self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) def test_constructor_dtype(self): arr = SparseArray([np.nan, 1, 2, np.nan]) - self.assertEqual(arr.dtype, np.float64) - self.assertTrue(np.isnan(arr.fill_value)) + assert arr.dtype == np.float64 + assert np.isnan(arr.fill_value) arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) - self.assertEqual(arr.dtype, np.float64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.float64 + assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], dtype=np.float64) - self.assertEqual(arr.dtype, np.float64) - self.assertTrue(np.isnan(arr.fill_value)) + assert arr.dtype == np.float64 + assert np.isnan(arr.fill_value) arr = SparseArray([0, 1, 2, 4], dtype=np.int64) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], dtype=None) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 def test_constructor_object_dtype(self): # GH 11856 arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object) - self.assertEqual(arr.dtype, np.object) - self.assertTrue(np.isnan(arr.fill_value)) + assert arr.dtype == np.object + assert np.isnan(arr.fill_value) arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object, fill_value='A') - self.assertEqual(arr.dtype, np.object) - self.assertEqual(arr.fill_value, 'A') + assert arr.dtype == np.object + assert arr.fill_value == 'A' + + # GH 17574 + data = [False, 0, 100.0, 0.0] + arr = SparseArray(data, dtype=np.object, fill_value=False) + assert arr.dtype == np.object + assert arr.fill_value is False + arr_expected = np.array(data, dtype=np.object) + it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) + assert np.fromiter(it, dtype=np.bool).all() def test_constructor_spindex_dtype(self): arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan])) - self.assertEqual(arr.dtype, np.float64) - self.assertTrue(np.isnan(arr.fill_value)) + assert arr.dtype == np.float64 + assert np.isnan(arr.fill_value) arr = SparseArray(data=[1, 2, 3], sparse_index=IntIndex(4, [1, 2, 3]), dtype=np.int64, fill_value=0) exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0) tm.assert_sp_array_equal(arr, exp) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64) exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) tm.assert_sp_array_equal(arr, exp) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 arr = SparseArray(data=[1, 2, 3], sparse_index=IntIndex(4, [1, 2, 3]), dtype=None, fill_value=0) exp = SparseArray([0, 1, 2, 3], dtype=None) tm.assert_sp_array_equal(arr, exp) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 # scalar input arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None) exp = SparseArray([1], dtype=None) tm.assert_sp_array_equal(arr, exp) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None) exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) tm.assert_sp_array_equal(arr, exp) - self.assertEqual(arr.dtype, np.int64) - self.assertEqual(arr.fill_value, 0) + assert arr.dtype == np.int64 + assert arr.fill_value == 0 + + @pytest.mark.parametrize('scalar,dtype', [ + (False, bool), + (0.0, 'float64'), + (1, 'int64'), + ('z', 'object')]) + def test_scalar_with_index_infer_dtype(self, scalar, dtype): + # GH 19163 + arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar) + exp = SparseArray([scalar, scalar, scalar], fill_value=scalar) + + tm.assert_sp_array_equal(arr, exp) + + assert arr.dtype == dtype + assert exp.dtype == dtype def test_sparseseries_roundtrip(self): # GH 13999 @@ -131,27 +156,25 @@ def test_sparseseries_roundtrip(self): def test_get_item(self): - self.assertTrue(np.isnan(self.arr[1])) - self.assertEqual(self.arr[2], 1) - self.assertEqual(self.arr[7], 5) + assert np.isnan(self.arr[1]) + assert self.arr[2] == 1 + assert self.arr[7] == 5 - self.assertEqual(self.zarr[0], 0) - self.assertEqual(self.zarr[2], 1) - self.assertEqual(self.zarr[7], 5) + assert self.zarr[0] == 0 + assert self.zarr[2] == 1 + assert self.zarr[7] == 5 errmsg = re.compile("bounds") - assertRaisesRegexp(IndexError, errmsg, lambda: self.arr[11]) - assertRaisesRegexp(IndexError, errmsg, lambda: self.arr[-11]) - self.assertEqual(self.arr[-1], self.arr[len(self.arr) - 1]) + tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[11]) + tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[-11]) + assert self.arr[-1] == self.arr[len(self.arr) - 1] def test_take(self): - self.assertTrue(np.isnan(self.arr.take(0))) - self.assertTrue(np.isscalar(self.arr.take(2))) + assert np.isnan(self.arr.take(0)) + assert np.isscalar(self.arr.take(2)) - # np.take in < 1.8 doesn't support scalar indexing - if not _np_version_under1p8: - self.assertEqual(self.arr.take(2), np.take(self.arr_data, 2)) - self.assertEqual(self.arr.take(6), np.take(self.arr_data, 6)) + assert self.arr.take(2) == np.take(self.arr_data, 2) + assert self.arr.take(6) == np.take(self.arr_data, 6) exp = SparseArray(np.take(self.arr_data, [2, 3])) tm.assert_sp_array_equal(self.arr.take([2, 3]), exp) @@ -177,21 +200,22 @@ def test_take_negative(self): tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp) def test_bad_take(self): - assertRaisesRegexp(IndexError, "bounds", lambda: self.arr.take(11)) - self.assertRaises(IndexError, lambda: self.arr.take(-11)) + tm.assert_raises_regex( + IndexError, "bounds", lambda: self.arr.take(11)) + pytest.raises(IndexError, lambda: self.arr.take(-11)) def test_take_invalid_kwargs(self): msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, self.arr.take, - [2, 3], foo=2) + tm.assert_raises_regex(TypeError, msg, self.arr.take, + [2, 3], foo=2) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, self.arr.take, - [2, 3], out=self.arr) + tm.assert_raises_regex(ValueError, msg, self.arr.take, + [2, 3], out=self.arr) msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, self.arr.take, - [2, 3], mode='clip') + tm.assert_raises_regex(ValueError, msg, self.arr.take, + [2, 3], mode='clip') def test_take_filling(self): # similar tests as GH 12631 @@ -213,16 +237,16 @@ def test_take_filling(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, -6])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True) def test_take_filling_fill_value(self): @@ -245,16 +269,16 @@ def test_take_filling_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): sparse.take(np.array([1, 0, -5]), fill_value=True) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, -6])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True) def test_take_filling_all_nan(self): @@ -267,11 +291,11 @@ def test_take_filling_all_nan(self): expected = SparseArray([np.nan, np.nan, np.nan]) tm.assert_sp_array_equal(result, expected) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, -6])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5])) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True) def test_set_item(self): @@ -281,61 +305,61 @@ def setitem(): def setslice(): self.arr[1:5] = 2 - assertRaisesRegexp(TypeError, "item assignment", setitem) - assertRaisesRegexp(TypeError, "item assignment", setslice) + tm.assert_raises_regex(TypeError, "item assignment", setitem) + tm.assert_raises_regex(TypeError, "item assignment", setslice) def test_constructor_from_too_large_array(self): - assertRaisesRegexp(TypeError, "expected dimension <= 1 data", - SparseArray, np.arange(10).reshape((2, 5))) + tm.assert_raises_regex(TypeError, "expected dimension <= 1 data", + SparseArray, np.arange(10).reshape((2, 5))) def test_constructor_from_sparse(self): res = SparseArray(self.zarr) - self.assertEqual(res.fill_value, 0) + assert res.fill_value == 0 assert_almost_equal(res.sp_values, self.zarr.sp_values) def test_constructor_copy(self): cp = SparseArray(self.arr, copy=True) cp.sp_values[:3] = 0 - self.assertFalse((self.arr.sp_values[:3] == 0).any()) + assert not (self.arr.sp_values[:3] == 0).any() not_copy = SparseArray(self.arr) not_copy.sp_values[:3] = 0 - self.assertTrue((self.arr.sp_values[:3] == 0).all()) + assert (self.arr.sp_values[:3] == 0).all() def test_constructor_bool(self): # GH 10648 data = np.array([False, False, True, True, False, False]) arr = SparseArray(data, fill_value=False, dtype=bool) - self.assertEqual(arr.dtype, bool) + assert arr.dtype == bool tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) for dense in [arr.to_dense(), arr.values]: - self.assertEqual(dense.dtype, bool) + assert dense.dtype == bool tm.assert_numpy_array_equal(dense, data) def test_constructor_bool_fill_value(self): arr = SparseArray([True, False, True], dtype=None) - self.assertEqual(arr.dtype, np.bool) - self.assertFalse(arr.fill_value) + assert arr.dtype == np.bool + assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool) - self.assertEqual(arr.dtype, np.bool) - self.assertFalse(arr.fill_value) + assert arr.dtype == np.bool + assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True) - self.assertEqual(arr.dtype, np.bool) - self.assertTrue(arr.fill_value) + assert arr.dtype == np.bool + assert arr.fill_value def test_constructor_float32(self): # GH 10648 data = np.array([1., np.nan, 3], dtype=np.float32) arr = SparseArray(data, dtype=np.float32) - self.assertEqual(arr.dtype, np.float32) + assert arr.dtype == np.float32 tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) @@ -343,25 +367,25 @@ def test_constructor_float32(self): np.array([0, 2], dtype=np.int32)) for dense in [arr.to_dense(), arr.values]: - self.assertEqual(dense.dtype, np.float32) - self.assert_numpy_array_equal(dense, data) + assert dense.dtype == np.float32 + tm.assert_numpy_array_equal(dense, data) def test_astype(self): res = self.arr.astype('f8') res.sp_values[:3] = 27 - self.assertFalse((self.arr.sp_values[:3] == 27).any()) + assert not (self.arr.sp_values[:3] == 27).any() msg = "unable to coerce current fill_value nan to int64 dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): self.arr.astype('i8') arr = SparseArray([0, np.nan, 0, 1]) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.astype('i8') arr = SparseArray([0, np.nan, 0, 1], fill_value=0) msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.astype('i8') def test_astype_all(self): @@ -372,46 +396,46 @@ def test_astype_all(self): np.int32, np.int16, np.int8] for typ in types: res = arr.astype(typ) - self.assertEqual(res.dtype, typ) - self.assertEqual(res.sp_values.dtype, typ) + assert res.dtype == typ + assert res.sp_values.dtype == typ tm.assert_numpy_array_equal(res.values, vals.astype(typ)) def test_set_fill_value(self): arr = SparseArray([1., np.nan, 2.], fill_value=np.nan) arr.fill_value = 2 - self.assertEqual(arr.fill_value, 2) + assert arr.fill_value == 2 arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64) arr.fill_value = 2 - self.assertEqual(arr.fill_value, 2) + assert arr.fill_value == 2 # coerces to int msg = "unable to set fill_value 3\\.1 to int64 dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = 3.1 msg = "unable to set fill_value nan to int64 dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = np.nan arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) arr.fill_value = True - self.assertTrue(arr.fill_value) + assert arr.fill_value # coerces to bool msg = "unable to set fill_value 0 to bool dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = 0 msg = "unable to set fill_value nan to bool dtype" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = np.nan # invalid msg = "fill_value must be a scalar" for val in [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): arr.fill_value = val def test_copy_shallow(self): @@ -496,10 +520,10 @@ def test_getslice_tuple(self): exp = SparseArray(dense[4:, ], fill_value=0) tm.assert_sp_array_equal(res, exp) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse[4:, :] - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): # check numpy compat dense[4:, :] @@ -521,19 +545,19 @@ def _check_op(op, first, second): res = op(first, second) exp = SparseArray(op(first.values, second.values), fill_value=first.fill_value) - tm.assertIsInstance(res, SparseArray) + assert isinstance(res, SparseArray) assert_almost_equal(res.values, exp.values) res2 = op(first, second.values) - tm.assertIsInstance(res2, SparseArray) + assert isinstance(res2, SparseArray) tm.assert_sp_array_equal(res, res2) res3 = op(first.values, second) - tm.assertIsInstance(res3, SparseArray) + assert isinstance(res3, SparseArray) tm.assert_sp_array_equal(res, res3) res4 = op(first, 4) - tm.assertIsInstance(res4, SparseArray) + assert isinstance(res4, SparseArray) # ignore this if the actual op raises (e.g. pow) try: @@ -546,7 +570,7 @@ def _check_op(op, first, second): def _check_inplace_op(op): tmp = arr1.copy() - self.assertRaises(NotImplementedError, op, tmp, arr2) + pytest.raises(NotImplementedError, op, tmp, arr2) with np.errstate(all='ignore'): bin_ops = [operator.add, operator.sub, operator.mul, @@ -562,7 +586,7 @@ def _check_inplace_op(op): def test_pickle(self): def _check_roundtrip(obj): - unpickled = self.round_trip_pickle(obj) + unpickled = tm.round_trip_pickle(obj) tm.assert_sp_array_equal(unpickled, obj) _check_roundtrip(self.arr) @@ -618,14 +642,14 @@ def test_fillna(self): # int dtype shouldn't have missing. No changes. s = SparseArray([0, 0, 0, 0]) - self.assertEqual(s.dtype, np.int64) - self.assertEqual(s.fill_value, 0) + assert s.dtype == np.int64 + assert s.fill_value == 0 res = s.fillna(-1) tm.assert_sp_array_equal(res, s) s = SparseArray([0, 0, 0, 0], fill_value=0) - self.assertEqual(s.dtype, np.int64) - self.assertEqual(s.fill_value, 0) + assert s.dtype == np.int64 + assert s.fill_value == 0 res = s.fillna(-1) exp = SparseArray([0, 0, 0, 0], fill_value=0) tm.assert_sp_array_equal(res, exp) @@ -633,8 +657,8 @@ def test_fillna(self): # fill_value can be nan if there is no missing hole. # only fill_value will be changed s = SparseArray([0, 0, 0, 0], fill_value=np.nan) - self.assertEqual(s.dtype, np.int64) - self.assertTrue(np.isnan(s.fill_value)) + assert s.dtype == np.int64 + assert np.isnan(s.fill_value) res = s.fillna(-1) exp = SparseArray([0, 0, 0, 0], fill_value=-1) tm.assert_sp_array_equal(res, exp) @@ -653,39 +677,127 @@ def test_fillna_overlap(self): tm.assert_sp_array_equal(res, exp) -class TestSparseArrayAnalytics(tm.TestCase): +class TestSparseArrayAnalytics(object): + + @pytest.mark.parametrize('data,pos,neg', [ + ([True, True, True], True, False), + ([1, 2, 1], 1, 0), + ([1.0, 2.0, 1.0], 1.0, 0.0) + ]) + def test_all(self, data, pos, neg): + # GH 17570 + out = SparseArray(data).all() + assert out + + out = SparseArray(data, fill_value=pos).all() + assert out + + data[1] = neg + out = SparseArray(data).all() + assert not out + + out = SparseArray(data, fill_value=pos).all() + assert not out + + @pytest.mark.parametrize('data,pos,neg', [ + ([True, True, True], True, False), + ([1, 2, 1], 1, 0), + ([1.0, 2.0, 1.0], 1.0, 0.0) + ]) + def test_numpy_all(self, data, pos, neg): + # GH 17570 + out = np.all(SparseArray(data)) + assert out + + out = np.all(SparseArray(data, fill_value=pos)) + assert out + + data[1] = neg + out = np.all(SparseArray(data)) + assert not out + + out = np.all(SparseArray(data, fill_value=pos)) + assert not out + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.all, + SparseArray(data), out=out) + + @pytest.mark.parametrize('data,pos,neg', [ + ([False, True, False], True, False), + ([0, 2, 0], 2, 0), + ([0.0, 2.0, 0.0], 2.0, 0.0) + ]) + def test_any(self, data, pos, neg): + # GH 17570 + out = SparseArray(data).any() + assert out + + out = SparseArray(data, fill_value=pos).any() + assert out + + data[1] = neg + out = SparseArray(data).any() + assert not out + + out = SparseArray(data, fill_value=pos).any() + assert not out + + @pytest.mark.parametrize('data,pos,neg', [ + ([False, True, False], True, False), + ([0, 2, 0], 2, 0), + ([0.0, 2.0, 0.0], 2.0, 0.0) + ]) + def test_numpy_any(self, data, pos, neg): + # GH 17570 + out = np.any(SparseArray(data)) + assert out + + out = np.any(SparseArray(data, fill_value=pos)) + assert out + + data[1] = neg + out = np.any(SparseArray(data)) + assert not out + + out = np.any(SparseArray(data, fill_value=pos)) + assert not out + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.any, + SparseArray(data), out=out) def test_sum(self): data = np.arange(10).astype(float) out = SparseArray(data).sum() - self.assertEqual(out, 45.0) + assert out == 45.0 data[5] = np.nan out = SparseArray(data, fill_value=2).sum() - self.assertEqual(out, 40.0) + assert out == 40.0 out = SparseArray(data, fill_value=np.nan).sum() - self.assertEqual(out, 40.0) + assert out == 40.0 def test_numpy_sum(self): data = np.arange(10).astype(float) out = np.sum(SparseArray(data)) - self.assertEqual(out, 45.0) + assert out == 45.0 data[5] = np.nan out = np.sum(SparseArray(data, fill_value=2)) - self.assertEqual(out, 40.0) + assert out == 40.0 out = np.sum(SparseArray(data, fill_value=np.nan)) - self.assertEqual(out, 40.0) + assert out == 40.0 msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.sum, - SparseArray(data), dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.sum, + SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.sum, - SparseArray(data), out=out) + tm.assert_raises_regex(ValueError, msg, np.sum, + SparseArray(data), out=out) def test_cumsum(self): non_null_data = np.array([1, 2, 3, 4, 5], dtype=float) @@ -709,7 +821,7 @@ def test_cumsum(self): axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): SparseArray(data).cumsum(axis=axis) def test_numpy_cumsum(self): @@ -733,38 +845,38 @@ def test_numpy_cumsum(self): tm.assert_sp_array_equal(out, expected) msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - SparseArray(data), dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.cumsum, - SparseArray(data), out=out) + tm.assert_raises_regex(ValueError, msg, np.cumsum, + SparseArray(data), out=out) def test_mean(self): data = np.arange(10).astype(float) out = SparseArray(data).mean() - self.assertEqual(out, 4.5) + assert out == 4.5 data[5] = np.nan out = SparseArray(data).mean() - self.assertEqual(out, 40.0 / 9) + assert out == 40.0 / 9 def test_numpy_mean(self): data = np.arange(10).astype(float) out = np.mean(SparseArray(data)) - self.assertEqual(out, 4.5) + assert out == 4.5 data[5] = np.nan out = np.mean(SparseArray(data)) - self.assertEqual(out, 40.0 / 9) + assert out == 40.0 / 9 msg = "the 'dtype' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.mean, - SparseArray(data), dtype=np.int64) + tm.assert_raises_regex(ValueError, msg, np.mean, + SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.mean, - SparseArray(data), out=out) + tm.assert_raises_regex(ValueError, msg, np.mean, + SparseArray(data), out=out) def test_ufunc(self): # GH 13853 make sure ufunc is applied to fill_value diff --git a/pandas/sparse/tests/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py similarity index 82% rename from pandas/sparse/tests/test_combine_concat.py rename to pandas/tests/sparse/test_combine_concat.py index 81655daec6164..70fd1da529d46 100644 --- a/pandas/sparse/tests/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -1,11 +1,13 @@ # pylint: disable-msg=E1101,W0612 +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm +import itertools -class TestSparseSeriesConcat(tm.TestCase): +class TestSparseSeriesConcat(object): def test_concat(self): val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) @@ -69,7 +71,7 @@ def test_concat_axis1_different_fill(self): res = pd.concat([sparse1, sparse2], axis=1) exp = pd.concat([pd.Series(val1, name='x'), pd.Series(val2, name='y')], axis=1) - self.assertIsInstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) def test_concat_different_kind(self): @@ -122,9 +124,9 @@ def test_concat_sparse_dense(self): tm.assert_sp_series_equal(res, exp) -class TestSparseDataFrameConcat(tm.TestCase): +class TestSparseDataFrameConcat(object): - def setUp(self): + def setup_method(self, method): self.dense1 = pd.DataFrame({'A': [0., 1., 2., np.nan], 'B': [0., 0., 0., 0.], @@ -234,12 +236,12 @@ def test_concat_different_columns(self): # each columns keeps its fill_value, thus compare in dense res = pd.concat([sparse, sparse3]) exp = pd.concat([self.dense1, self.dense3]) - self.assertIsInstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) res = pd.concat([sparse3, sparse]) exp = pd.concat([self.dense3, self.dense1]) - self.assertIsInstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) def test_concat_series(self): @@ -309,45 +311,60 @@ def test_concat_axis1(self): # each columns keeps its fill_value, thus compare in dense res = pd.concat([sparse, sparse3], axis=1) exp = pd.concat([self.dense1, self.dense3], axis=1) - self.assertIsInstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) res = pd.concat([sparse3, sparse], axis=1) exp = pd.concat([self.dense3, self.dense1], axis=1) - self.assertIsInstance(res, pd.SparseDataFrame) + assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) - def test_concat_sparse_dense(self): - sparse = self.dense1.to_sparse() - - res = pd.concat([sparse, self.dense2]) - exp = pd.concat([self.dense1, self.dense2]) - self.assertIsInstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense2, sparse]) - exp = pd.concat([self.dense2, self.dense1]) - self.assertIsInstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - sparse = self.dense1.to_sparse(fill_value=0) - - res = pd.concat([sparse, self.dense2]) - exp = pd.concat([self.dense1, self.dense2]) - self.assertIsInstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense2, sparse]) - exp = pd.concat([self.dense2, self.dense1]) - self.assertIsInstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res.to_dense(), exp) - - res = pd.concat([self.dense3, sparse], axis=1) - exp = pd.concat([self.dense3, self.dense1], axis=1) - self.assertIsInstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res, exp) - - res = pd.concat([sparse, self.dense3], axis=1) - exp = pd.concat([self.dense1, self.dense3], axis=1) - self.assertIsInstance(res, pd.SparseDataFrame) - tm.assert_frame_equal(res, exp) + @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', + itertools.product([None, 0, 1, np.nan], + [0, 1], + [1, 0])) + def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): + frames = [self.dense1, self.dense2] + sparse_frame = [frames[dense_idx], + frames[sparse_idx].to_sparse(fill_value=fill_value)] + dense_frame = [frames[dense_idx], frames[sparse_idx]] + + # This will try both directions sparse + dense and dense + sparse + for _ in range(2): + res = pd.concat(sparse_frame) + exp = pd.concat(dense_frame) + + assert isinstance(res, pd.SparseDataFrame) + tm.assert_frame_equal(res.to_dense(), exp) + + sparse_frame = sparse_frame[::-1] + dense_frame = dense_frame[::-1] + + @pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', + itertools.product([None, 0, 1, np.nan], + [0, 1], + [1, 0])) + def test_concat_sparse_dense_cols(self, fill_value, sparse_idx, dense_idx): + # See GH16874, GH18914 and #18686 for why this should be a DataFrame + + frames = [self.dense1, self.dense3] + + sparse_frame = [frames[dense_idx], + frames[sparse_idx].to_sparse(fill_value=fill_value)] + dense_frame = [frames[dense_idx], frames[sparse_idx]] + + # This will try both directions sparse + dense and dense + sparse + for _ in range(2): + res = pd.concat(sparse_frame, axis=1) + exp = pd.concat(dense_frame, axis=1) + + for column in frames[dense_idx].columns: + if dense_idx == sparse_idx: + tm.assert_frame_equal(res[column], exp[column]) + else: + tm.assert_series_equal(res[column], exp[column]) + + tm.assert_frame_equal(res, exp) + + sparse_frame = sparse_frame[::-1] + dense_frame = dense_frame[::-1] diff --git a/pandas/sparse/tests/test_format.py b/pandas/tests/sparse/test_format.py similarity index 77% rename from pandas/sparse/tests/test_format.py rename to pandas/tests/sparse/test_format.py index 0c0e773d19bb9..d983bd209085a 100644 --- a/pandas/sparse/tests/test_format.py +++ b/pandas/tests/sparse/test_format.py @@ -13,7 +13,7 @@ use_32bit_repr = is_platform_windows() or is_platform_32bit() -class TestSparseSeriesFormatting(tm.TestCase): +class TestSparseSeriesFormatting(object): @property def dtype_format_for_platform(self): @@ -27,16 +27,16 @@ def test_sparse_max_row(self): "4 NaN\ndtype: float64\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dfm)) - self.assertEqual(result, exp) + assert result == exp with option_context("display.max_rows", 3): # GH 10560 result = repr(s) exp = ("0 1.0\n ... \n4 NaN\n" - "dtype: float64\nBlockIndex\n" + "Length: 5, dtype: float64\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dfm)) - self.assertEqual(result, exp) + assert result == exp def test_sparse_mi_max_row(self): idx = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 0), @@ -50,16 +50,17 @@ def test_sparse_mi_max_row(self): "dtype: float64\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dfm)) - self.assertEqual(result, exp) + assert result == exp - with option_context("display.max_rows", 3): + with option_context("display.max_rows", 3, + "display.show_dimensions", False): # GH 13144 result = repr(s) exp = ("A 0 1.0\n ... \nC 2 NaN\n" "dtype: float64\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dfm)) - self.assertEqual(result, exp) + assert result == exp def test_sparse_bool(self): # GH 13110 @@ -72,15 +73,15 @@ def test_sparse_bool(self): "dtype: bool\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dtype)) - self.assertEqual(result, exp) + assert result == exp with option_context("display.max_rows", 3): result = repr(s) exp = ("0 True\n ... \n5 False\n" - "dtype: bool\nBlockIndex\n" + "Length: 6, dtype: bool\nBlockIndex\n" "Block locations: array([0, 3]{0})\n" "Block lengths: array([1, 1]{0})".format(dtype)) - self.assertEqual(result, exp) + assert result == exp def test_sparse_int(self): # GH 13110 @@ -92,18 +93,19 @@ def test_sparse_int(self): "5 0\ndtype: int64\nBlockIndex\n" "Block locations: array([1, 4]{0})\n" "Block lengths: array([1, 1]{0})".format(dtype)) - self.assertEqual(result, exp) + assert result == exp - with option_context("display.max_rows", 3): + with option_context("display.max_rows", 3, + "display.show_dimensions", False): result = repr(s) exp = ("0 0\n ..\n5 0\n" "dtype: int64\nBlockIndex\n" "Block locations: array([1, 4]{0})\n" "Block lengths: array([1, 1]{0})".format(dtype)) - self.assertEqual(result, exp) + assert result == exp -class TestSparseDataFrameFormatting(tm.TestCase): +class TestSparseDataFrameFormatting(object): def test_sparse_frame(self): # GH 13110 @@ -112,7 +114,19 @@ def test_sparse_frame(self): 'C': [0, 0, 3, 0, 5], 'D': [np.nan, np.nan, np.nan, 1, 2]}) sparse = df.to_sparse() - self.assertEqual(repr(sparse), repr(df)) + assert repr(sparse) == repr(df) with option_context("display.max_rows", 3): - self.assertEqual(repr(sparse), repr(df)) + assert repr(sparse) == repr(df) + + def test_sparse_repr_after_set(self): + # GH 15488 + sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) + res = sdf.copy() + + # Ignore the warning + with pd.option_context('mode.chained_assignment', None): + sdf[0][1] = 2 # This line triggers the bug + + repr(sdf) + tm.assert_sp_frame_equal(sdf, res) diff --git a/pandas/sparse/tests/test_groupby.py b/pandas/tests/sparse/test_groupby.py similarity index 96% rename from pandas/sparse/tests/test_groupby.py rename to pandas/tests/sparse/test_groupby.py index 23bea94a2aef8..c9049ed9743dd 100644 --- a/pandas/sparse/tests/test_groupby.py +++ b/pandas/tests/sparse/test_groupby.py @@ -4,9 +4,9 @@ import pandas.util.testing as tm -class TestSparseGroupBy(tm.TestCase): +class TestSparseGroupBy(object): - def setUp(self): + def setup_method(self, method): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/tests/sparse/test_indexing.py similarity index 82% rename from pandas/sparse/tests/test_indexing.py rename to pandas/tests/sparse/test_indexing.py index 357a7103f4027..37a287af71451 100644 --- a/pandas/sparse/tests/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -1,14 +1,14 @@ # pylint: disable-msg=E1101,W0612 -import pytest # noqa +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm -class TestSparseSeriesIndexing(tm.TestCase): +class TestSparseSeriesIndexing(object): - def setUp(self): + def setup_method(self, method): self.orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) self.sparse = self.orig.to_sparse() @@ -16,9 +16,9 @@ def test_getitem(self): orig = self.orig sparse = self.sparse - self.assertEqual(sparse[0], 1) - self.assertTrue(np.isnan(sparse[1])) - self.assertEqual(sparse[3], 3) + assert sparse[0] == 1 + assert np.isnan(sparse[1]) + assert sparse[3] == 3 result = sparse[[1, 3, 4]] exp = orig[[1, 3, 4]].to_sparse() @@ -53,23 +53,23 @@ def test_getitem_int_dtype(self): res = s[::2] exp = pd.SparseSeries([0, 2, 4, 6], index=[0, 2, 4, 6], name='xxx') tm.assert_sp_series_equal(res, exp) - self.assertEqual(res.dtype, np.int64) + assert res.dtype == np.int64 s = pd.SparseSeries([0, 1, 2, 3, 4, 5, 6], fill_value=0, name='xxx') res = s[::2] exp = pd.SparseSeries([0, 2, 4, 6], index=[0, 2, 4, 6], fill_value=0, name='xxx') tm.assert_sp_series_equal(res, exp) - self.assertEqual(res.dtype, np.int64) + assert res.dtype == np.int64 def test_getitem_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0]) sparse = orig.to_sparse(fill_value=0) - self.assertEqual(sparse[0], 1) - self.assertTrue(np.isnan(sparse[1])) - self.assertEqual(sparse[2], 0) - self.assertEqual(sparse[3], 3) + assert sparse[0] == 1 + assert np.isnan(sparse[1]) + assert sparse[2] == 0 + assert sparse[3] == 3 result = sparse[[1, 3, 4]] exp = orig[[1, 3, 4]].to_sparse(fill_value=0) @@ -113,19 +113,19 @@ def test_loc(self): orig = self.orig sparse = self.sparse - self.assertEqual(sparse.loc[0], 1) - self.assertTrue(np.isnan(sparse.loc[1])) + assert sparse.loc[0] == 1 + assert np.isnan(sparse.loc[1]) result = sparse.loc[[1, 3, 4]] exp = orig.loc[[1, 3, 4]].to_sparse() tm.assert_sp_series_equal(result, exp) # exceeds the bounds - result = sparse.loc[[1, 3, 4, 5]] - exp = orig.loc[[1, 3, 4, 5]].to_sparse() + result = sparse.reindex([1, 3, 4, 5]) + exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_series_equal(result, exp) # padded with NaN - self.assertTrue(np.isnan(result[-1])) + assert np.isnan(result[-1]) # dense array result = sparse.loc[orig % 2 == 1] @@ -145,8 +145,8 @@ def test_loc_index(self): orig = pd.Series([1, np.nan, np.nan, 3, np.nan], index=list('ABCDE')) sparse = orig.to_sparse() - self.assertEqual(sparse.loc['A'], 1) - self.assertTrue(np.isnan(sparse.loc['B'])) + assert sparse.loc['A'] == 1 + assert np.isnan(sparse.loc['B']) result = sparse.loc[['A', 'C', 'D']] exp = orig.loc[['A', 'C', 'D']].to_sparse() @@ -170,8 +170,8 @@ def test_loc_index_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE')) sparse = orig.to_sparse(fill_value=0) - self.assertEqual(sparse.loc['A'], 1) - self.assertTrue(np.isnan(sparse.loc['B'])) + assert sparse.loc['A'] == 1 + assert np.isnan(sparse.loc['B']) result = sparse.loc[['A', 'C', 'D']] exp = orig.loc[['A', 'C', 'D']].to_sparse(fill_value=0) @@ -209,8 +209,8 @@ def test_iloc(self): orig = self.orig sparse = self.sparse - self.assertEqual(sparse.iloc[3], 3) - self.assertTrue(np.isnan(sparse.iloc[2])) + assert sparse.iloc[3] == 3 + assert np.isnan(sparse.iloc[2]) result = sparse.iloc[[1, 3, 4]] exp = orig.iloc[[1, 3, 4]].to_sparse() @@ -220,16 +220,16 @@ def test_iloc(self): exp = orig.iloc[[1, -2, -4]].to_sparse() tm.assert_sp_series_equal(result, exp) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.iloc[[1, 3, 5]] def test_iloc_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0]) sparse = orig.to_sparse(fill_value=0) - self.assertEqual(sparse.iloc[3], 3) - self.assertTrue(np.isnan(sparse.iloc[1])) - self.assertEqual(sparse.iloc[4], 0) + assert sparse.iloc[3] == 3 + assert np.isnan(sparse.iloc[1]) + assert sparse.iloc[4] == 0 result = sparse.iloc[[1, 3, 4]] exp = orig.iloc[[1, 3, 4]].to_sparse(fill_value=0) @@ -249,74 +249,74 @@ def test_iloc_slice_fill_value(self): def test_at(self): orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) sparse = orig.to_sparse() - self.assertEqual(sparse.at[0], orig.at[0]) - self.assertTrue(np.isnan(sparse.at[1])) - self.assertTrue(np.isnan(sparse.at[2])) - self.assertEqual(sparse.at[3], orig.at[3]) - self.assertTrue(np.isnan(sparse.at[4])) + assert sparse.at[0] == orig.at[0] + assert np.isnan(sparse.at[1]) + assert np.isnan(sparse.at[2]) + assert sparse.at[3] == orig.at[3] + assert np.isnan(sparse.at[4]) orig = pd.Series([1, np.nan, np.nan, 3, np.nan], index=list('abcde')) sparse = orig.to_sparse() - self.assertEqual(sparse.at['a'], orig.at['a']) - self.assertTrue(np.isnan(sparse.at['b'])) - self.assertTrue(np.isnan(sparse.at['c'])) - self.assertEqual(sparse.at['d'], orig.at['d']) - self.assertTrue(np.isnan(sparse.at['e'])) + assert sparse.at['a'] == orig.at['a'] + assert np.isnan(sparse.at['b']) + assert np.isnan(sparse.at['c']) + assert sparse.at['d'] == orig.at['d'] + assert np.isnan(sparse.at['e']) def test_at_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0], index=list('abcde')) sparse = orig.to_sparse(fill_value=0) - self.assertEqual(sparse.at['a'], orig.at['a']) - self.assertTrue(np.isnan(sparse.at['b'])) - self.assertEqual(sparse.at['c'], orig.at['c']) - self.assertEqual(sparse.at['d'], orig.at['d']) - self.assertEqual(sparse.at['e'], orig.at['e']) + assert sparse.at['a'] == orig.at['a'] + assert np.isnan(sparse.at['b']) + assert sparse.at['c'] == orig.at['c'] + assert sparse.at['d'] == orig.at['d'] + assert sparse.at['e'] == orig.at['e'] def test_iat(self): orig = self.orig sparse = self.sparse - self.assertEqual(sparse.iat[0], orig.iat[0]) - self.assertTrue(np.isnan(sparse.iat[1])) - self.assertTrue(np.isnan(sparse.iat[2])) - self.assertEqual(sparse.iat[3], orig.iat[3]) - self.assertTrue(np.isnan(sparse.iat[4])) + assert sparse.iat[0] == orig.iat[0] + assert np.isnan(sparse.iat[1]) + assert np.isnan(sparse.iat[2]) + assert sparse.iat[3] == orig.iat[3] + assert np.isnan(sparse.iat[4]) - self.assertTrue(np.isnan(sparse.iat[-1])) - self.assertEqual(sparse.iat[-5], orig.iat[-5]) + assert np.isnan(sparse.iat[-1]) + assert sparse.iat[-5] == orig.iat[-5] def test_iat_fill_value(self): orig = pd.Series([1, np.nan, 0, 3, 0]) sparse = orig.to_sparse() - self.assertEqual(sparse.iat[0], orig.iat[0]) - self.assertTrue(np.isnan(sparse.iat[1])) - self.assertEqual(sparse.iat[2], orig.iat[2]) - self.assertEqual(sparse.iat[3], orig.iat[3]) - self.assertEqual(sparse.iat[4], orig.iat[4]) + assert sparse.iat[0] == orig.iat[0] + assert np.isnan(sparse.iat[1]) + assert sparse.iat[2] == orig.iat[2] + assert sparse.iat[3] == orig.iat[3] + assert sparse.iat[4] == orig.iat[4] - self.assertEqual(sparse.iat[-1], orig.iat[-1]) - self.assertEqual(sparse.iat[-5], orig.iat[-5]) + assert sparse.iat[-1] == orig.iat[-1] + assert sparse.iat[-5] == orig.iat[-5] def test_get(self): s = pd.SparseSeries([1, np.nan, np.nan, 3, np.nan]) - self.assertEqual(s.get(0), 1) - self.assertTrue(np.isnan(s.get(1))) - self.assertIsNone(s.get(5)) + assert s.get(0) == 1 + assert np.isnan(s.get(1)) + assert s.get(5) is None s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE')) - self.assertEqual(s.get('A'), 1) - self.assertTrue(np.isnan(s.get('B'))) - self.assertEqual(s.get('C'), 0) - self.assertIsNone(s.get('XX')) + assert s.get('A') == 1 + assert np.isnan(s.get('B')) + assert s.get('C') == 0 + assert s.get('XX') is None s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'), fill_value=0) - self.assertEqual(s.get('A'), 1) - self.assertTrue(np.isnan(s.get('B'))) - self.assertEqual(s.get('C'), 0) - self.assertIsNone(s.get('XX')) + assert s.get('A') == 1 + assert np.isnan(s.get('B')) + assert s.get('C') == 0 + assert s.get('XX') is None def test_take(self): orig = pd.Series([1, np.nan, np.nan, 3, np.nan], @@ -366,7 +366,7 @@ def test_reindex(self): exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse() tm.assert_sp_series_equal(res, exp) - def test_reindex_fill_value(self): + def test_fill_value_reindex(self): orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE')) sparse = orig.to_sparse(fill_value=0) @@ -397,6 +397,28 @@ def test_reindex_fill_value(self): exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0) tm.assert_sp_series_equal(res, exp) + def test_reindex_fill_value(self): + floats = pd.Series([1., 2., 3.]).to_sparse() + result = floats.reindex([1, 2, 3], fill_value=0) + expected = pd.Series([2., 3., 0], index=[1, 2, 3]).to_sparse() + tm.assert_sp_series_equal(result, expected) + + def test_reindex_nearest(self): + s = pd.Series(np.arange(10, dtype='float64')).to_sparse() + target = [0.1, 0.9, 1.5, 2.0] + actual = s.reindex(target, method='nearest') + expected = pd.Series(np.around(target), target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + + actual = s.reindex(target, method='nearest', tolerance=0.2) + expected = pd.Series([0, 1, np.nan, 2], target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + + actual = s.reindex(target, method='nearest', + tolerance=[0.3, 0.01, 0.4, 3]) + expected = pd.Series([0, np.nan, np.nan, 2], target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + def tests_indexing_with_sparse(self): # GH 13985 @@ -423,13 +445,13 @@ def tests_indexing_with_sparse(self): msg = ("iLocation based boolean indexing cannot use an " "indexable as a mask") - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): s.iloc[indexer] class TestSparseSeriesMultiIndexing(TestSparseSeriesIndexing): - def setUp(self): + def setup_method(self, method): # Mi with duplicated values idx = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 0), ('C', 0), ('C', 1)]) @@ -440,9 +462,9 @@ def test_getitem_multi(self): orig = self.orig sparse = self.sparse - self.assertEqual(sparse[0], orig[0]) - self.assertTrue(np.isnan(sparse[1])) - self.assertEqual(sparse[3], orig[3]) + assert sparse[0] == orig[0] + assert np.isnan(sparse[1]) + assert sparse[3] == orig[3] tm.assert_sp_series_equal(sparse['A'], orig['A'].to_sparse()) tm.assert_sp_series_equal(sparse['B'], orig['B'].to_sparse()) @@ -469,9 +491,9 @@ def test_getitem_multi_tuple(self): orig = self.orig sparse = self.sparse - self.assertEqual(sparse['C', 0], orig['C', 0]) - self.assertTrue(np.isnan(sparse['A', 1])) - self.assertTrue(np.isnan(sparse['B', 0])) + assert sparse['C', 0] == orig['C', 0] + assert np.isnan(sparse['A', 1]) + assert np.isnan(sparse['B', 0]) def test_getitems_slice_multi(self): orig = self.orig @@ -504,6 +526,11 @@ def test_loc(self): exp = orig.loc[[1, 3, 4, 5]].to_sparse() tm.assert_sp_series_equal(result, exp) + # single element list (GH 15447) + result = sparse.loc[['A']] + exp = orig.loc[['A']].to_sparse() + tm.assert_sp_series_equal(result, exp) + # dense array result = sparse.loc[orig % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() @@ -522,9 +549,9 @@ def test_loc_multi_tuple(self): orig = self.orig sparse = self.sparse - self.assertEqual(sparse.loc['C', 0], orig.loc['C', 0]) - self.assertTrue(np.isnan(sparse.loc['A', 1])) - self.assertTrue(np.isnan(sparse.loc['B', 0])) + assert sparse.loc['C', 0] == orig.loc['C', 0] + assert np.isnan(sparse.loc['A', 1]) + assert np.isnan(sparse.loc['B', 0]) def test_loc_slice(self): orig = self.orig @@ -537,8 +564,37 @@ def test_loc_slice(self): orig.loc['A':'B'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:'B'], orig.loc[:'B'].to_sparse()) + def test_reindex(self): + # GH 15447 + orig = self.orig + sparse = self.sparse + + res = sparse.reindex([('A', 0), ('C', 1)]) + exp = orig.reindex([('A', 0), ('C', 1)]).to_sparse() + tm.assert_sp_series_equal(res, exp) + + # On specific level: + res = sparse.reindex(['A', 'C', 'B'], level=0) + exp = orig.reindex(['A', 'C', 'B'], level=0).to_sparse() + tm.assert_sp_series_equal(res, exp) -class TestSparseDataFrameIndexing(tm.TestCase): + # single element list (GH 15447) + res = sparse.reindex(['A'], level=0) + exp = orig.reindex(['A'], level=0).to_sparse() + tm.assert_sp_series_equal(res, exp) + + with pytest.raises(TypeError): + # Incomplete keys are not accepted for reindexing: + sparse.reindex(['A', 'C']) + + # "copy" argument: + res = sparse.reindex(sparse.index, copy=True) + exp = orig.reindex(orig.index, copy=True).to_sparse() + tm.assert_sp_series_equal(res, exp) + assert sparse is not res + + +class TestSparseDataFrameIndexing(object): def test_getitem(self): orig = pd.DataFrame([[1, np.nan, np.nan], @@ -594,9 +650,9 @@ def test_loc(self): columns=list('xyz')) sparse = orig.to_sparse() - self.assertEqual(sparse.loc[0, 'x'], 1) - self.assertTrue(np.isnan(sparse.loc[1, 'z'])) - self.assertEqual(sparse.loc[2, 'z'], 4) + assert sparse.loc[0, 'x'] == 1 + assert np.isnan(sparse.loc[1, 'z']) + assert sparse.loc[2, 'z'] == 4 tm.assert_sp_series_equal(sparse.loc[0], orig.loc[0].to_sparse()) tm.assert_sp_series_equal(sparse.loc[1], orig.loc[1].to_sparse()) @@ -626,8 +682,8 @@ def test_loc(self): tm.assert_sp_frame_equal(result, exp) # exceeds the bounds - result = sparse.loc[[1, 3, 4, 5]] - exp = orig.loc[[1, 3, 4, 5]].to_sparse() + result = sparse.reindex([1, 3, 4, 5]) + exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array @@ -651,9 +707,9 @@ def test_loc_index(self): index=list('abc'), columns=list('xyz')) sparse = orig.to_sparse() - self.assertEqual(sparse.loc['a', 'x'], 1) - self.assertTrue(np.isnan(sparse.loc['b', 'z'])) - self.assertEqual(sparse.loc['c', 'z'], 4) + assert sparse.loc['a', 'x'] == 1 + assert np.isnan(sparse.loc['b', 'z']) + assert sparse.loc['c', 'z'] == 4 tm.assert_sp_series_equal(sparse.loc['a'], orig.loc['a'].to_sparse()) tm.assert_sp_series_equal(sparse.loc['b'], orig.loc['b'].to_sparse()) @@ -711,8 +767,8 @@ def test_iloc(self): [np.nan, np.nan, 4]]) sparse = orig.to_sparse() - self.assertEqual(sparse.iloc[1, 1], 3) - self.assertTrue(np.isnan(sparse.iloc[2, 0])) + assert sparse.iloc[1, 1] == 3 + assert np.isnan(sparse.iloc[2, 0]) tm.assert_sp_series_equal(sparse.iloc[0], orig.loc[0].to_sparse()) tm.assert_sp_series_equal(sparse.iloc[1], orig.loc[1].to_sparse()) @@ -741,7 +797,7 @@ def test_iloc(self): exp = orig.iloc[[2], [1, 0]].to_sparse() tm.assert_sp_frame_equal(result, exp) - with tm.assertRaises(IndexError): + with pytest.raises(IndexError): sparse.iloc[[1, 3, 5]] def test_iloc_slice(self): @@ -759,10 +815,10 @@ def test_at(self): [0, np.nan, 5]], index=list('ABCD'), columns=list('xyz')) sparse = orig.to_sparse() - self.assertEqual(sparse.at['A', 'x'], orig.at['A', 'x']) - self.assertTrue(np.isnan(sparse.at['B', 'z'])) - self.assertTrue(np.isnan(sparse.at['C', 'y'])) - self.assertEqual(sparse.at['D', 'x'], orig.at['D', 'x']) + assert sparse.at['A', 'x'] == orig.at['A', 'x'] + assert np.isnan(sparse.at['B', 'z']) + assert np.isnan(sparse.at['C', 'y']) + assert sparse.at['D', 'x'] == orig.at['D', 'x'] def test_at_fill_value(self): orig = pd.DataFrame([[1, np.nan, 0], @@ -771,10 +827,10 @@ def test_at_fill_value(self): [0, np.nan, 5]], index=list('ABCD'), columns=list('xyz')) sparse = orig.to_sparse(fill_value=0) - self.assertEqual(sparse.at['A', 'x'], orig.at['A', 'x']) - self.assertTrue(np.isnan(sparse.at['B', 'z'])) - self.assertTrue(np.isnan(sparse.at['C', 'y'])) - self.assertEqual(sparse.at['D', 'x'], orig.at['D', 'x']) + assert sparse.at['A', 'x'] == orig.at['A', 'x'] + assert np.isnan(sparse.at['B', 'z']) + assert np.isnan(sparse.at['C', 'y']) + assert sparse.at['D', 'x'] == orig.at['D', 'x'] def test_iat(self): orig = pd.DataFrame([[1, np.nan, 0], @@ -783,13 +839,13 @@ def test_iat(self): [0, np.nan, 5]], index=list('ABCD'), columns=list('xyz')) sparse = orig.to_sparse() - self.assertEqual(sparse.iat[0, 0], orig.iat[0, 0]) - self.assertTrue(np.isnan(sparse.iat[1, 2])) - self.assertTrue(np.isnan(sparse.iat[2, 1])) - self.assertEqual(sparse.iat[2, 0], orig.iat[2, 0]) + assert sparse.iat[0, 0] == orig.iat[0, 0] + assert np.isnan(sparse.iat[1, 2]) + assert np.isnan(sparse.iat[2, 1]) + assert sparse.iat[2, 0] == orig.iat[2, 0] - self.assertTrue(np.isnan(sparse.iat[-1, -2])) - self.assertEqual(sparse.iat[-1, -1], orig.iat[-1, -1]) + assert np.isnan(sparse.iat[-1, -2]) + assert sparse.iat[-1, -1] == orig.iat[-1, -1] def test_iat_fill_value(self): orig = pd.DataFrame([[1, np.nan, 0], @@ -798,13 +854,13 @@ def test_iat_fill_value(self): [0, np.nan, 5]], index=list('ABCD'), columns=list('xyz')) sparse = orig.to_sparse(fill_value=0) - self.assertEqual(sparse.iat[0, 0], orig.iat[0, 0]) - self.assertTrue(np.isnan(sparse.iat[1, 2])) - self.assertTrue(np.isnan(sparse.iat[2, 1])) - self.assertEqual(sparse.iat[2, 0], orig.iat[2, 0]) + assert sparse.iat[0, 0] == orig.iat[0, 0] + assert np.isnan(sparse.iat[1, 2]) + assert np.isnan(sparse.iat[2, 1]) + assert sparse.iat[2, 0] == orig.iat[2, 0] - self.assertTrue(np.isnan(sparse.iat[-1, -2])) - self.assertEqual(sparse.iat[-1, -1], orig.iat[-1, -1]) + assert np.isnan(sparse.iat[-1, -2]) + assert sparse.iat[-1, -1] == orig.iat[-1, -1] def test_take(self): orig = pd.DataFrame([[1, np.nan, 0], @@ -901,9 +957,9 @@ def test_reindex_fill_value(self): tm.assert_sp_frame_equal(res, exp) -class TestMultitype(tm.TestCase): +class TestMultitype(object): - def setUp(self): + def setup_method(self, method): self.cols = ['string', 'int', 'float', 'object'] self.string_series = pd.SparseSeries(['a', 'b', 'c']) @@ -921,7 +977,7 @@ def setUp(self): def test_frame_basic_dtypes(self): for _, row in self.sdf.iterrows(): - self.assertEqual(row.dtype, object) + assert row.dtype == object tm.assert_sp_series_equal(self.sdf['string'], self.string_series, check_names=False) tm.assert_sp_series_equal(self.sdf['int'], self.int_series, @@ -963,13 +1019,14 @@ def test_frame_indexing_multiple(self): def test_series_indexing_single(self): for i, idx in enumerate(self.cols): - self.assertEqual(self.ss.iloc[i], self.ss[idx]) - self.assertEqual(type(self.ss.iloc[i]), - type(self.ss[idx])) - self.assertEqual(self.ss['string'], 'a') - self.assertEqual(self.ss['int'], 1) - self.assertEqual(self.ss['float'], 1.1) - self.assertEqual(self.ss['object'], []) + assert self.ss.iloc[i] == self.ss[idx] + tm.assert_class_equal(self.ss.iloc[i], self.ss[idx], + obj="series index") + + assert self.ss['string'] == 'a' + assert self.ss['int'] == 1 + assert self.ss['float'] == 1.1 + assert self.ss['object'] == [] def test_series_indexing_multiple(self): tm.assert_sp_series_equal(self.ss.loc[['string', 'int']], diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py similarity index 77% rename from pandas/sparse/tests/test_libsparse.py rename to pandas/tests/sparse/test_libsparse.py index 4d5a93d77cf14..7719ea46503fd 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -4,11 +4,10 @@ import numpy as np import operator import pandas.util.testing as tm +import pandas.util._test_decorators as td -from pandas import compat - -from pandas.sparse.array import IntIndex, BlockIndex, _make_index -import pandas._sparse as splib +from pandas.core.sparse.array import IntIndex, BlockIndex, _make_index +import pandas._libs.sparse as splib TEST_LENGTH = 20 @@ -42,7 +41,7 @@ def _check_case_dict(case): _check_case([], [], [], [], [], []) -class TestSparseIndexUnion(tm.TestCase): +class TestSparseIndexUnion(object): def test_index_make_union(self): def _check_case(xloc, xlen, yloc, ylen, eloc, elen): @@ -162,41 +161,42 @@ def test_intindex_make_union(self): b = IntIndex(5, np.array([0, 2], dtype=np.int32)) res = a.make_union(b) exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) - self.assertTrue(res.equals(exp)) + assert res.equals(exp) a = IntIndex(5, np.array([], dtype=np.int32)) b = IntIndex(5, np.array([0, 2], dtype=np.int32)) res = a.make_union(b) exp = IntIndex(5, np.array([0, 2], np.int32)) - self.assertTrue(res.equals(exp)) + assert res.equals(exp) a = IntIndex(5, np.array([], dtype=np.int32)) b = IntIndex(5, np.array([], dtype=np.int32)) res = a.make_union(b) exp = IntIndex(5, np.array([], np.int32)) - self.assertTrue(res.equals(exp)) + assert res.equals(exp) a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) res = a.make_union(b) exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) - self.assertTrue(res.equals(exp)) + assert res.equals(exp) a = IntIndex(5, np.array([0, 1], dtype=np.int32)) b = IntIndex(4, np.array([0, 1], dtype=np.int32)) - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): a.make_union(b) -class TestSparseIndexIntersect(tm.TestCase): +class TestSparseIndexIntersect(object): + @td.skip_if_windows def test_intersect(self): def _check_correct(a, b, expected): result = a.intersect(b) assert (result.equals(expected)) def _check_length_exc(a, longer): - self.assertRaises(Exception, a.intersect, longer) + pytest.raises(Exception, a.intersect, longer) def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) @@ -212,20 +212,18 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): _check_length_exc(xindex.to_int_index(), longer_index.to_int_index()) - if compat.is_platform_windows(): - pytest.skip("segfaults on win-64 when all tests are run") check_cases(_check_case) def test_intersect_empty(self): xindex = IntIndex(4, np.array([], dtype=np.int32)) yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) - self.assertTrue(xindex.intersect(yindex).equals(xindex)) - self.assertTrue(yindex.intersect(xindex).equals(xindex)) + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) xindex = xindex.to_block_index() yindex = yindex.to_block_index() - self.assertTrue(xindex.intersect(yindex).equals(xindex)) - self.assertTrue(yindex.intersect(xindex).equals(xindex)) + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) def test_intersect_identical(self): cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)), @@ -234,45 +232,45 @@ def test_intersect_identical(self): IntIndex(5, np.array([], dtype=np.int32))] for case in cases: - self.assertTrue(case.intersect(case).equals(case)) + assert case.intersect(case).equals(case) case = case.to_block_index() - self.assertTrue(case.intersect(case).equals(case)) + assert case.intersect(case).equals(case) -class TestSparseIndexCommon(tm.TestCase): +class TestSparseIndexCommon(object): def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') - self.assertIsInstance(idx, IntIndex) - self.assertEqual(idx.npoints, 2) + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) idx = _make_index(4, np.array([], dtype=np.int32), kind='integer') - self.assertIsInstance(idx, IntIndex) - self.assertEqual(idx.npoints, 0) + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind='integer') - self.assertIsInstance(idx, IntIndex) - self.assertEqual(idx.npoints, 4) + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) def test_block_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') - self.assertIsInstance(idx, BlockIndex) - self.assertEqual(idx.npoints, 2) + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) idx = _make_index(4, np.array([], dtype=np.int32), kind='block') - self.assertIsInstance(idx, BlockIndex) - self.assertEqual(idx.npoints, 0) + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, @@ -280,8 +278,8 @@ def test_block_internal(self): idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind='block') - self.assertIsInstance(idx, BlockIndex) - self.assertEqual(idx.npoints, 4) + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, @@ -289,8 +287,8 @@ def test_block_internal(self): idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block') - self.assertIsInstance(idx, BlockIndex) - self.assertEqual(idx.npoints, 3) + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, @@ -299,35 +297,35 @@ def test_block_internal(self): def test_lookup(self): for kind in ['integer', 'block']: idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) - self.assertEqual(idx.lookup(-1), -1) - self.assertEqual(idx.lookup(0), -1) - self.assertEqual(idx.lookup(1), -1) - self.assertEqual(idx.lookup(2), 0) - self.assertEqual(idx.lookup(3), 1) - self.assertEqual(idx.lookup(4), -1) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == -1 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 0 + assert idx.lookup(3) == 1 + assert idx.lookup(4) == -1 idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) for i in range(-1, 5): - self.assertEqual(idx.lookup(i), -1) + assert idx.lookup(i) == -1 idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) - self.assertEqual(idx.lookup(-1), -1) - self.assertEqual(idx.lookup(0), 0) - self.assertEqual(idx.lookup(1), 1) - self.assertEqual(idx.lookup(2), 2) - self.assertEqual(idx.lookup(3), 3) - self.assertEqual(idx.lookup(4), -1) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == 1 + assert idx.lookup(2) == 2 + assert idx.lookup(3) == 3 + assert idx.lookup(4) == -1 idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) - self.assertEqual(idx.lookup(-1), -1) - self.assertEqual(idx.lookup(0), 0) - self.assertEqual(idx.lookup(1), -1) - self.assertEqual(idx.lookup(2), 1) - self.assertEqual(idx.lookup(3), 2) - self.assertEqual(idx.lookup(4), -1) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 1 + assert idx.lookup(3) == 2 + assert idx.lookup(4) == -1 def test_lookup_array(self): for kind in ['integer', 'block']: @@ -335,11 +333,11 @@ def test_lookup_array(self): res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) exp = np.array([-1, -1, 0], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) + tm.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) exp = np.array([-1, 0, -1, 1], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) + tm.assert_numpy_array_equal(res, exp) idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) @@ -349,21 +347,21 @@ def test_lookup_array(self): kind=kind) res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) exp = np.array([-1, 0, 2], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) + tm.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) exp = np.array([-1, 2, 1, 3], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) + tm.assert_numpy_array_equal(res, exp) idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) exp = np.array([1, -1, 2, 0], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) + tm.assert_numpy_array_equal(res, exp) res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) exp = np.array([-1, -1, 1, -1], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) + tm.assert_numpy_array_equal(res, exp) def test_lookup_basics(self): def _check(index): @@ -387,20 +385,20 @@ def _check(index): # corner cases -class TestBlockIndex(tm.TestCase): +class TestBlockIndex(object): def test_block_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') - self.assertIsInstance(idx, BlockIndex) - self.assertEqual(idx.npoints, 2) + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) idx = _make_index(4, np.array([], dtype=np.int32), kind='block') - self.assertIsInstance(idx, BlockIndex) - self.assertEqual(idx.npoints, 0) + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, @@ -408,16 +406,16 @@ def test_block_internal(self): idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind='block') - self.assertIsInstance(idx, BlockIndex) - self.assertEqual(idx.npoints, 4) + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block') - self.assertIsInstance(idx, BlockIndex) - self.assertEqual(idx.npoints, 3) + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) tm.assert_numpy_array_equal(idx.blengths, @@ -436,8 +434,8 @@ def test_make_block_boundary(self): def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) - self.assertTrue(index.equals(index)) - self.assertFalse(index.equals(BlockIndex(10, [0, 4], [2, 6]))) + assert index.equals(index) + assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) def test_check_integrity(self): locs = [] @@ -451,10 +449,10 @@ def test_check_integrity(self): index = BlockIndex(1, locs, lengths) # noqa # block extend beyond end - self.assertRaises(Exception, BlockIndex, 10, [5], [10]) + pytest.raises(Exception, BlockIndex, 10, [5], [10]) # block overlap - self.assertRaises(Exception, BlockIndex, 10, [2, 5], [5, 3]) + pytest.raises(Exception, BlockIndex, 10, [2, 5], [5, 3]) def test_to_int_index(self): locs = [0, 10] @@ -469,35 +467,73 @@ def test_to_int_index(self): def test_to_block_index(self): index = BlockIndex(10, [0, 5], [4, 5]) - self.assertIs(index.to_block_index(), index) + assert index.to_block_index() is index + + +class TestIntIndex(object): + + def test_check_integrity(self): + + # Too many indices than specified in self.length + msg = "Too many indices" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=1, indices=[1, 2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, -2, 3]) + # No index can be negative. + msg = "No index can be less than zero" -class TestIntIndex(tm.TestCase): + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # All indices must be less than the length. + msg = "All indices must be less than the length" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, 2, 5]) + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, 2, 6]) + + # Indices must be strictly ascending. + msg = "Indices must be strictly increasing" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, 3, 2]) + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, 3, 3]) def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') - self.assertIsInstance(idx, IntIndex) - self.assertEqual(idx.npoints, 2) + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) idx = _make_index(4, np.array([], dtype=np.int32), kind='integer') - self.assertIsInstance(idx, IntIndex) - self.assertEqual(idx.npoints, 0) + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind='integer') - self.assertIsInstance(idx, IntIndex) - self.assertEqual(idx.npoints, 4) + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) def test_equals(self): index = IntIndex(10, [0, 1, 2, 3, 4]) - self.assertTrue(index.equals(index)) - self.assertFalse(index.equals(IntIndex(10, [0, 1, 2, 3]))) + assert index.equals(index) + assert not index.equals(IntIndex(10, [0, 1, 2, 3])) def test_to_block_index(self): @@ -508,18 +544,18 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): # see if survive the round trip xbindex = xindex.to_int_index().to_block_index() ybindex = yindex.to_int_index().to_block_index() - tm.assertIsInstance(xbindex, BlockIndex) - self.assertTrue(xbindex.equals(xindex)) - self.assertTrue(ybindex.equals(yindex)) + assert isinstance(xbindex, BlockIndex) + assert xbindex.equals(xindex) + assert ybindex.equals(yindex) check_cases(_check_case) def test_to_int_index(self): index = IntIndex(10, [2, 3, 4, 5, 6]) - self.assertIs(index.to_int_index(), index) + assert index.to_int_index() is index -class TestSparseOperators(tm.TestCase): +class TestSparseOperators(object): def _op_tests(self, sparse_op, python_op): def _check_case(xloc, xlen, yloc, ylen, eloc, elen): @@ -540,9 +576,9 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y, ydindex, yfill) - self.assertTrue(rb_index.to_int_index().equals(ri_index)) + assert rb_index.to_int_index().equals(ri_index) tm.assert_numpy_array_equal(result_block_vals, result_int_vals) - self.assertEqual(bfill, ifill) + assert bfill == ifill # check versus Series... xseries = Series(x, xdindex.indices) @@ -560,8 +596,8 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): check_cases(_check_case) -# too cute? oh but how I abhor code duplication +# too cute? oh but how I abhor code duplication check_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv'] diff --git a/pandas/sparse/tests/test_pivot.py b/pandas/tests/sparse/test_pivot.py similarity index 97% rename from pandas/sparse/tests/test_pivot.py rename to pandas/tests/sparse/test_pivot.py index 4ff9f20093c67..e7eba63e4e0b3 100644 --- a/pandas/sparse/tests/test_pivot.py +++ b/pandas/tests/sparse/test_pivot.py @@ -3,9 +3,9 @@ import pandas.util.testing as tm -class TestPivotTable(tm.TestCase): +class TestPivotTable(object): - def setUp(self): + def setup_method(self, method): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', diff --git a/pandas/tests/sparse/test_reshape.py b/pandas/tests/sparse/test_reshape.py new file mode 100644 index 0000000000000..b492c47375bcf --- /dev/null +++ b/pandas/tests/sparse/test_reshape.py @@ -0,0 +1,38 @@ +import pytest +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + + +@pytest.fixture +def sparse_df(): + return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye + + +@pytest.fixture +def multi_index3(): + return pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + +def test_sparse_frame_stack(sparse_df, multi_index3): + ss = sparse_df.stack() + expected = pd.SparseSeries(np.ones(3), index=multi_index3) + tm.assert_sp_series_equal(ss, expected) + + +def test_sparse_frame_unstack(sparse_df): + mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)]) + sparse_df.index = mi + arr = np.array([[1, np.nan, np.nan], + [np.nan, 1, np.nan], + [np.nan, np.nan, 1]]) + unstacked_df = pd.DataFrame(arr, index=mi).unstack() + unstacked_sdf = sparse_df.unstack() + + tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values) + + +def test_sparse_series_unstack(sparse_df, multi_index3): + frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack() + tm.assert_sp_frame_equal(frame, sparse_df) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index fab04f7fa4bf2..884b1eb7342c6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1,25 +1,31 @@ # -*- coding: utf-8 -*- -from pandas.compat import range import numpy as np +import pytest + from numpy.random import RandomState from numpy import nan from datetime import datetime from itertools import permutations -from pandas import Series, Categorical, CategoricalIndex, Index +from pandas import (Series, Categorical, CategoricalIndex, + Timestamp, DatetimeIndex, Index, IntervalIndex) import pandas as pd from pandas import compat -import pandas.algos as _algos -from pandas.compat import lrange +from pandas._libs import (groupby as libgroupby, algos as libalgos, + hashtable as ht) +from pandas._libs.hashtable import unique_label_indices +from pandas.compat import lrange, range import pandas.core.algorithms as algos +import pandas.core.common as com import pandas.util.testing as tm -import pandas.hashtable as hashtable +import pandas.util._test_decorators as td +from pandas.core.dtypes.dtypes import CategoricalDtype as CDT from pandas.compat.numpy import np_array_datetime64_compat from pandas.util.testing import assert_almost_equal -class TestMatch(tm.TestCase): +class TestMatch(object): def test_ints(self): values = np.array([0, 2, 1]) @@ -27,16 +33,16 @@ def test_ints(self): result = algos.match(to_match, values) expected = np.array([0, 2, 1, 1, 0, 2, -1, 0], dtype=np.int64) - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) result = Series(algos.match(to_match, values, np.nan)) expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0])) tm.assert_series_equal(result, expected) - s = pd.Series(np.arange(5), dtype=np.float32) + s = Series(np.arange(5), dtype=np.float32) result = algos.match(s, [2, 4]) expected = np.array([-1, -1, 0, -1, 1], dtype=np.int64) - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) result = Series(algos.match(s, [2, 4], np.nan)) expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1])) @@ -48,140 +54,54 @@ def test_strings(self): result = algos.match(to_match, values) expected = np.array([1, 0, -1, 0, 1, 2, -1], dtype=np.int64) - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) result = Series(algos.match(to_match, values, np.nan)) expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan])) tm.assert_series_equal(result, expected) -class TestSafeSort(tm.TestCase): - - def test_basic_sort(self): - values = [3, 1, 2, 0, 4] - result = algos.safe_sort(values) - expected = np.array([0, 1, 2, 3, 4]) - tm.assert_numpy_array_equal(result, expected) - - values = list("baaacb") - result = algos.safe_sort(values) - expected = np.array(list("aaabbc")) - tm.assert_numpy_array_equal(result, expected) - - values = [] - result = algos.safe_sort(values) - expected = np.array([]) - tm.assert_numpy_array_equal(result, expected) - - def test_labels(self): - values = [3, 1, 2, 0, 4] - expected = np.array([0, 1, 2, 3, 4]) - - labels = [0, 1, 1, 2, 3, 0, -1, 4] - result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([3, 1, 1, 2, 0, 3, -1, 4], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - # na_sentinel - labels = [0, 1, 1, 2, 3, 0, 99, 4] - result, result_labels = algos.safe_sort(values, labels, - na_sentinel=99) - expected_labels = np.array([3, 1, 1, 2, 0, 3, 99, 4], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - # out of bound indices - labels = [0, 101, 102, 2, 3, 0, 99, 4] - result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - labels = [] - result, result_labels = algos.safe_sort(values, labels) - expected_labels = np.array([], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - def test_mixed_integer(self): - values = np.array(['b', 1, 0, 'a', 0, 'b'], dtype=object) - result = algos.safe_sort(values) - expected = np.array([0, 0, 1, 'a', 'b', 'b'], dtype=object) - tm.assert_numpy_array_equal(result, expected) - - values = np.array(['b', 1, 0, 'a'], dtype=object) - labels = [0, 1, 2, 3, 0, -1, 1] - result, result_labels = algos.safe_sort(values, labels) - expected = np.array([0, 1, 'a', 'b'], dtype=object) - expected_labels = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result_labels, expected_labels) - - def test_unsortable(self): - # GH 13714 - arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) - if compat.PY2 and not pd._np_version_under1p10: - # RuntimeWarning: tp_compare didn't return -1 or -2 for exception - with tm.assert_produces_warning(RuntimeWarning): - tm.assertRaises(TypeError, algos.safe_sort, arr) - else: - tm.assertRaises(TypeError, algos.safe_sort, arr) - - def test_exceptions(self): - with tm.assertRaisesRegexp(TypeError, - "Only list-like objects are allowed"): - algos.safe_sort(values=1) - - with tm.assertRaisesRegexp(TypeError, - "Only list-like objects or None"): - algos.safe_sort(values=[0, 1, 2], labels=1) - - with tm.assertRaisesRegexp(ValueError, "values should be unique"): - algos.safe_sort(values=[0, 1, 2, 1], labels=[0, 1]) - - -class TestFactorize(tm.TestCase): +class TestFactorize(object): def test_basic(self): labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) - self.assert_numpy_array_equal( + tm.assert_numpy_array_equal( uniques, np.array(['a', 'b', 'c'], dtype=object)) labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], sort=True) exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) + tm.assert_numpy_array_equal(labels, exp) exp = np.array(['a', 'b', 'c'], dtype=object) - self.assert_numpy_array_equal(uniques, exp) + tm.assert_numpy_array_equal(uniques, exp) labels, uniques = algos.factorize(list(reversed(range(5)))) exp = np.array([0, 1, 2, 3, 4], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) + tm.assert_numpy_array_equal(labels, exp) exp = np.array([4, 3, 2, 1, 0], dtype=np.int64) - self.assert_numpy_array_equal(uniques, exp) + tm.assert_numpy_array_equal(uniques, exp) labels, uniques = algos.factorize(list(reversed(range(5))), sort=True) exp = np.array([4, 3, 2, 1, 0], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) + tm.assert_numpy_array_equal(labels, exp) exp = np.array([0, 1, 2, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(uniques, exp) + tm.assert_numpy_array_equal(uniques, exp) labels, uniques = algos.factorize(list(reversed(np.arange(5.)))) exp = np.array([0, 1, 2, 3, 4], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) + tm.assert_numpy_array_equal(labels, exp) exp = np.array([4., 3., 2., 1., 0.], dtype=np.float64) - self.assert_numpy_array_equal(uniques, exp) + tm.assert_numpy_array_equal(uniques, exp) labels, uniques = algos.factorize(list(reversed(np.arange(5.))), sort=True) exp = np.array([4, 3, 2, 1, 0], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) + tm.assert_numpy_array_equal(labels, exp) exp = np.array([0., 1., 2., 3., 4.], dtype=np.float64) - self.assert_numpy_array_equal(uniques, exp) + tm.assert_numpy_array_equal(uniques, exp) def test_mixed(self): @@ -190,34 +110,34 @@ def test_mixed(self): labels, uniques = algos.factorize(x) exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) - exp = pd.Index(['A', 'B', 3.14, np.inf]) + tm.assert_numpy_array_equal(labels, exp) + exp = Index(['A', 'B', 3.14, np.inf]) tm.assert_index_equal(uniques, exp) labels, uniques = algos.factorize(x, sort=True) exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) - exp = pd.Index([3.14, np.inf, 'A', 'B']) + tm.assert_numpy_array_equal(labels, exp) + exp = Index([3.14, np.inf, 'A', 'B']) tm.assert_index_equal(uniques, exp) def test_datelike(self): # M8 - v1 = pd.Timestamp('20130101 09:00:00.00004') - v2 = pd.Timestamp('20130101') + v1 = Timestamp('20130101 09:00:00.00004') + v2 = Timestamp('20130101') x = Series([v1, v1, v1, v2, v2, v1]) labels, uniques = algos.factorize(x) exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) - exp = pd.DatetimeIndex([v1, v2]) - self.assert_index_equal(uniques, exp) + tm.assert_numpy_array_equal(labels, exp) + exp = DatetimeIndex([v1, v2]) + tm.assert_index_equal(uniques, exp) labels, uniques = algos.factorize(x, sort=True) exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) - exp = pd.DatetimeIndex([v2, v1]) - self.assert_index_equal(uniques, exp) + tm.assert_numpy_array_equal(labels, exp) + exp = DatetimeIndex([v2, v1]) + tm.assert_index_equal(uniques, exp) # period v1 = pd.Period('201302', freq='M') @@ -227,13 +147,13 @@ def test_datelike(self): # periods are not 'sorted' as they are converted back into an index labels, uniques = algos.factorize(x) exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) - self.assert_index_equal(uniques, pd.PeriodIndex([v1, v2])) + tm.assert_numpy_array_equal(labels, exp) + tm.assert_index_equal(uniques, pd.PeriodIndex([v1, v2])) labels, uniques = algos.factorize(x, sort=True) exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) - self.assert_index_equal(uniques, pd.PeriodIndex([v1, v2])) + tm.assert_numpy_array_equal(labels, exp) + tm.assert_index_equal(uniques, pd.PeriodIndex([v1, v2])) # GH 5986 v1 = pd.to_timedelta('1 day 1 min') @@ -241,26 +161,26 @@ def test_datelike(self): x = Series([v1, v2, v1, v1, v2, v2, v1]) labels, uniques = algos.factorize(x) exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) - self.assert_index_equal(uniques, pd.to_timedelta([v1, v2])) + tm.assert_numpy_array_equal(labels, exp) + tm.assert_index_equal(uniques, pd.to_timedelta([v1, v2])) labels, uniques = algos.factorize(x, sort=True) exp = np.array([1, 0, 1, 1, 0, 0, 1], dtype=np.intp) - self.assert_numpy_array_equal(labels, exp) - self.assert_index_equal(uniques, pd.to_timedelta([v2, v1])) + tm.assert_numpy_array_equal(labels, exp) + tm.assert_index_equal(uniques, pd.to_timedelta([v2, v1])) def test_factorize_nan(self): # nan should map to na_sentinel, not reverse_indexer[na_sentinel] # rizer.factorize should not raise an exception if na_sentinel indexes # outside of reverse_indexer key = np.array([1, 2, 1, np.nan], dtype='O') - rizer = hashtable.Factorizer(len(key)) + rizer = ht.Factorizer(len(key)) for na_sentinel in (-1, 20): ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel) expected = np.array([0, 1, 0, na_sentinel], dtype='int32') - self.assertEqual(len(set(key)), len(set(expected))) - self.assertTrue(np.array_equal( - pd.isnull(key), expected == na_sentinel)) + assert len(set(key)) == len(set(expected)) + tm.assert_numpy_array_equal(pd.isna(key), + expected == na_sentinel) # nan still maps to na_sentinel when sort=False key = np.array([0, np.nan, 1], dtype='O') @@ -270,19 +190,46 @@ def test_factorize_nan(self): ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel) # noqa expected = np.array([2, -1, 0], dtype='int32') - self.assertEqual(len(set(key)), len(set(expected))) - self.assertTrue( - np.array_equal(pd.isnull(key), expected == na_sentinel)) + assert len(set(key)) == len(set(expected)) + tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) + + @pytest.mark.parametrize("data,expected_label,expected_level", [ + ( + [(1, 1), (1, 2), (0, 0), (1, 2), 'nonsense'], + [0, 1, 2, 1, 3], + [(1, 1), (1, 2), (0, 0), 'nonsense'] + ), + ( + [(1, 1), (1, 2), (0, 0), (1, 2), (1, 2, 3)], + [0, 1, 2, 1, 3], + [(1, 1), (1, 2), (0, 0), (1, 2, 3)] + ), + ( + [(1, 1), (1, 2), (0, 0), (1, 2)], + [0, 1, 2, 1], + [(1, 1), (1, 2), (0, 0)] + ) + ]) + def test_factorize_tuple_list(self, data, expected_label, expected_level): + # GH9454 + result = pd.factorize(data) + + tm.assert_numpy_array_equal(result[0], + np.array(expected_label, dtype=np.intp)) + + expected_level_array = com._asarray_tuplesafe(expected_level, + dtype=object) + tm.assert_numpy_array_equal(result[1], expected_level_array) def test_complex_sorting(self): # gh 12666 - check no segfault # Test not valid numpy versions older than 1.11 if pd._np_version_under1p11: - self.skipTest("Test valid only for numpy 1.11+") + pytest.skip("Test valid only for numpy 1.11+") x17 = np.array([complex(i) for i in range(17)], dtype=object) - self.assertRaises(TypeError, algos.factorize, x17[::-1], sort=True) + pytest.raises(TypeError, algos.factorize, x17[::-1], sort=True) def test_uint64_factorize(self): data = np.array([2**63, 1, 2**63], dtype=np.uint64) @@ -301,20 +248,29 @@ def test_uint64_factorize(self): tm.assert_numpy_array_equal(labels, exp_labels) tm.assert_numpy_array_equal(uniques, exp_uniques) + def test_deprecate_order(self): + # gh 19727 - check warning is raised for deprecated keyword, order. + # Test not valid once order keyword is removed. + data = np.array([2**63, 1, 2**63], dtype=np.uint64) + with tm.assert_produces_warning(expected_warning=FutureWarning): + algos.factorize(data, order=True) + with tm.assert_produces_warning(False): + algos.factorize(data) + -class TestUnique(tm.TestCase): +class TestUnique(object): def test_ints(self): arr = np.random.randint(0, 100, size=50) result = algos.unique(arr) - tm.assertIsInstance(result, np.ndarray) + assert isinstance(result, np.ndarray) def test_objects(self): arr = np.random.randint(0, 100, size=50).astype('O') result = algos.unique(arr) - tm.assertIsInstance(result, np.ndarray) + assert isinstance(result, np.ndarray) def test_object_refcount_bug(self): lst = ['A', 'B', 'C', 'D', 'E'] @@ -347,17 +303,17 @@ def test_datetime64_dtype_array_returned(self): '2015-01-01T00:00:00.000000000+0000']) result = algos.unique(dt_index) tm.assert_numpy_array_equal(result, expected) - self.assertEqual(result.dtype, expected.dtype) + assert result.dtype == expected.dtype - s = pd.Series(dt_index) + s = Series(dt_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) - self.assertEqual(result.dtype, expected.dtype) + assert result.dtype == expected.dtype arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) - self.assertEqual(result.dtype, expected.dtype) + assert result.dtype == expected.dtype def test_timedelta64_dtype_array_returned(self): # GH 9431 @@ -366,31 +322,153 @@ def test_timedelta64_dtype_array_returned(self): td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678]) result = algos.unique(td_index) tm.assert_numpy_array_equal(result, expected) - self.assertEqual(result.dtype, expected.dtype) + assert result.dtype == expected.dtype - s = pd.Series(td_index) + s = Series(td_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) - self.assertEqual(result.dtype, expected.dtype) + assert result.dtype == expected.dtype arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) - self.assertEqual(result.dtype, expected.dtype) + assert result.dtype == expected.dtype def test_uint64_overflow(self): - s = pd.Series([1, 2, 2**63, 2**63], dtype=np.uint64) + s = Series([1, 2, 2**63, 2**63], dtype=np.uint64) exp = np.array([1, 2, 2**63], dtype=np.uint64) tm.assert_numpy_array_equal(algos.unique(s), exp) + def test_nan_in_object_array(self): + l = ['a', np.nan, 'c', 'c'] + result = pd.unique(l) + expected = np.array(['a', np.nan, 'c'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_categorical(self): + + # we are expecting to return in the order + # of appearance + expected = Categorical(list('bac'), categories=list('bac')) + + # we are expecting to return in the order + # of the categories + expected_o = Categorical( + list('bac'), categories=list('abc'), ordered=True) + + # GH 15939 + c = Categorical(list('baabc')) + result = c.unique() + tm.assert_categorical_equal(result, expected) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected) + + c = Categorical(list('baabc'), ordered=True) + result = c.unique() + tm.assert_categorical_equal(result, expected_o) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected_o) + + # Series of categorical dtype + s = Series(Categorical(list('baabc')), name='foo') + result = s.unique() + tm.assert_categorical_equal(result, expected) + + result = pd.unique(s) + tm.assert_categorical_equal(result, expected) + + # CI -> return CI + ci = CategoricalIndex(Categorical(list('baabc'), + categories=list('bac'))) + expected = CategoricalIndex(expected) + result = ci.unique() + tm.assert_index_equal(result, expected) + + result = pd.unique(ci) + tm.assert_index_equal(result, expected) + + def test_datetime64tz_aware(self): + # GH 15939 + + result = Series( + Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])).unique() + expected = np.array([Timestamp('2016-01-01 00:00:00-0500', + tz='US/Eastern')], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')]).unique() + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + tm.assert_index_equal(result, expected) + + result = pd.unique( + Series(Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')]))) + expected = np.array([Timestamp('2016-01-01 00:00:00-0500', + tz='US/Eastern')], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])) + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + tm.assert_index_equal(result, expected) + + def test_order_of_appearance(self): + # 9346 + # light testing of guarantee of order of appearance + # these also are the doc-examples + result = pd.unique(Series([2, 1, 3, 3])) + tm.assert_numpy_array_equal(result, + np.array([2, 1, 3], dtype='int64')) + + result = pd.unique(Series([2] + [1] * 5)) + tm.assert_numpy_array_equal(result, + np.array([2, 1], dtype='int64')) + + result = pd.unique(Series([Timestamp('20160101'), + Timestamp('20160101')])) + expected = np.array(['2016-01-01T00:00:00.000000000'], + dtype='datetime64[ns]') + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(Index( + [Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])) + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', + freq=None) + tm.assert_index_equal(result, expected) + + result = pd.unique(list('aabc')) + expected = np.array(['a', 'b', 'c'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(Series(Categorical(list('aabc')))) + expected = Categorical(list('abc')) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("arg ,expected", [ + (('1', '1', '2'), np.array(['1', '2'], dtype=object)), + (('foo',), np.array(['foo'], dtype=object)) + ]) + def test_tuple_with_strings(self, arg, expected): + # see GH 17108 + result = pd.unique(arg) + tm.assert_numpy_array_equal(result, expected) + -class TestIsin(tm.TestCase): +class TestIsin(object): def test_invalid(self): - self.assertRaises(TypeError, lambda: algos.isin(1, 1)) - self.assertRaises(TypeError, lambda: algos.isin(1, [1])) - self.assertRaises(TypeError, lambda: algos.isin([1], 1)) + pytest.raises(TypeError, lambda: algos.isin(1, 1)) + pytest.raises(TypeError, lambda: algos.isin(1, [1])) + pytest.raises(TypeError, lambda: algos.isin([1], 1)) def test_basic(self): @@ -402,15 +480,15 @@ def test_basic(self): expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), [1]) + result = algos.isin(Series([1, 2]), [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), pd.Series([1])) + result = algos.isin(Series([1, 2]), Series([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), set([1])) + result = algos.isin(Series([1, 2]), set([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) @@ -418,11 +496,11 @@ def test_basic(self): expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series(['a', 'b']), pd.Series(['a'])) + result = algos.isin(Series(['a', 'b']), Series(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series(['a', 'b']), set(['a'])) + result = algos.isin(Series(['a', 'b']), set(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) @@ -430,6 +508,8 @@ def test_basic(self): expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) + def test_i8(self): + arr = pd.date_range('20130101', periods=3).values result = algos.isin(arr, [arr[0]]) expected = np.array([True, False, False]) @@ -465,47 +545,67 @@ def test_large(self): expected[1] = True tm.assert_numpy_array_equal(result, expected) + def test_categorical_from_codes(self): + # GH 16639 + vals = np.array([0, 1, 2, 0]) + cats = ['a', 'b', 'c'] + Sd = Series(Categorical(1).from_codes(vals, cats)) + St = Series(Categorical(1).from_codes(np.array([0, 1]), cats)) + expected = np.array([True, True, False, True]) + result = algos.isin(Sd, St) + tm.assert_numpy_array_equal(expected, result) + + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_empty(self, empty): + # see gh-16991 + vals = Index(["a", "b"]) + expected = np.array([False, False]) + + result = algos.isin(vals, empty) + tm.assert_numpy_array_equal(expected, result) + -class TestValueCounts(tm.TestCase): +class TestValueCounts(object): def test_value_counts(self): np.random.seed(1234) - from pandas.tools.tile import cut + from pandas.core.reshape.tile import cut arr = np.random.randn(4) factor = cut(arr, 4) - tm.assertIsInstance(factor, Categorical) + # assert isinstance(factor, n) result = algos.value_counts(factor) - cats = ['(-1.194, -0.535]', '(-0.535, 0.121]', '(0.121, 0.777]', - '(0.777, 1.433]'] - expected_index = CategoricalIndex(cats, cats, ordered=True) - expected = Series([1, 1, 1, 1], index=expected_index) + breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] + index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True)) + expected = Series([1, 1, 1, 1], index=index) tm.assert_series_equal(result.sort_index(), expected.sort_index()) def test_value_counts_bins(self): s = [1, 2, 3, 4] result = algos.value_counts(s, bins=1) - self.assertEqual(result.tolist(), [4]) - self.assertEqual(result.index[0], 0.997) + expected = Series([4], + index=IntervalIndex.from_tuples([(0.996, 4.0)])) + tm.assert_series_equal(result, expected) result = algos.value_counts(s, bins=2, sort=False) - self.assertEqual(result.tolist(), [2, 2]) - self.assertEqual(result.index[0], 0.997) - self.assertEqual(result.index[1], 2.5) + expected = Series([2, 2], + index=IntervalIndex.from_tuples([(0.996, 2.5), + (2.5, 4.0)])) + tm.assert_series_equal(result, expected) def test_value_counts_dtypes(self): result = algos.value_counts([1, 1.]) - self.assertEqual(len(result), 1) + assert len(result) == 1 result = algos.value_counts([1, 1.], bins=1) - self.assertEqual(len(result), 1) + assert len(result) == 1 result = algos.value_counts(Series([1, 1., '1'])) # object - self.assertEqual(len(result), 2) + assert len(result) == 2 - self.assertRaises(TypeError, lambda s: algos.value_counts(s, bins=1), - ['1', 1]) + pytest.raises(TypeError, lambda s: algos.value_counts(s, bins=1), + ['1', 1]) def test_value_counts_nat(self): td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]') @@ -514,36 +614,36 @@ def test_value_counts_nat(self): for s in [td, dt]: vc = algos.value_counts(s) vc_with_na = algos.value_counts(s, dropna=False) - self.assertEqual(len(vc), 1) - self.assertEqual(len(vc_with_na), 2) + assert len(vc) == 1 + assert len(vc_with_na) == 2 - exp_dt = pd.Series({pd.Timestamp('2014-01-01 00:00:00'): 1}) + exp_dt = Series({Timestamp('2014-01-01 00:00:00'): 1}) tm.assert_series_equal(algos.value_counts(dt), exp_dt) # TODO same for (timedelta) def test_value_counts_datetime_outofbounds(self): # GH 13663 - s = pd.Series([datetime(3000, 1, 1), datetime(5000, 1, 1), - datetime(5000, 1, 1), datetime(6000, 1, 1), - datetime(3000, 1, 1), datetime(3000, 1, 1)]) + s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1), + datetime(5000, 1, 1), datetime(6000, 1, 1), + datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() - exp_index = pd.Index([datetime(3000, 1, 1), datetime(5000, 1, 1), - datetime(6000, 1, 1)], dtype=object) - exp = pd.Series([3, 2, 1], index=exp_index) + exp_index = Index([datetime(3000, 1, 1), datetime(5000, 1, 1), + datetime(6000, 1, 1)], dtype=object) + exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) # GH 12424 - res = pd.to_datetime(pd.Series(['2362-01-01', np.nan]), + res = pd.to_datetime(Series(['2362-01-01', np.nan]), errors='ignore') - exp = pd.Series(['2362-01-01', np.nan], dtype=object) + exp = Series(['2362-01-01', np.nan], dtype=object) tm.assert_series_equal(res, exp) def test_categorical(self): - s = Series(pd.Categorical(list('aaabbc'))) + s = Series(Categorical(list('aaabbc'))) result = s.value_counts() - expected = pd.Series([3, 2, 1], - index=pd.CategoricalIndex(['a', 'b', 'c'])) + expected = Series([3, 2, 1], index=CategoricalIndex(['a', 'b', 'c'])) + tm.assert_series_equal(result, expected, check_index_type=True) # preserve order? @@ -553,38 +653,38 @@ def test_categorical(self): tm.assert_series_equal(result, expected, check_index_type=True) def test_categorical_nans(self): - s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) + s = Series(Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() - expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex( + expected = Series([4, 3, 2], index=CategoricalIndex( ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = pd.Series([ + expected = Series([ 4, 3, 2, 1 - ], index=pd.CategoricalIndex(['a', 'b', 'c', np.nan])) + ], index=CategoricalIndex(['a', 'b', 'c', np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) # out of order - s = Series(pd.Categorical( + s = Series(Categorical( list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c'])) s.iloc[1] = np.nan result = s.value_counts() - expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex( + expected = Series([4, 3, 2], index=CategoricalIndex( ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( + expected = Series([4, 3, 2, 1], index=CategoricalIndex( ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) def test_categorical_zeroes(self): # keep the `d` category with 0 - s = Series(pd.Categorical( + s = Series(Categorical( list('bbbaac'), categories=list('abcd'), ordered=True)) result = s.value_counts() - expected = Series([3, 2, 1, 0], index=pd.Categorical( + expected = Series([3, 2, 1, 0], index=Categorical( ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) @@ -592,34 +692,34 @@ def test_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( - pd.Series([True, True, False]).value_counts(dropna=True), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False]).value_counts(dropna=True), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False]).value_counts(dropna=False), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False]).value_counts(dropna=False), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False, None]).value_counts(dropna=True), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False, None]).value_counts(dropna=True), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False, None]).value_counts(dropna=False), - pd.Series([2, 1, 1], index=[True, False, np.nan])) + Series([True, True, False, None]).value_counts(dropna=False), + Series([2, 1, 1], index=[True, False, np.nan])) tm.assert_series_equal( - pd.Series([10.3, 5., 5.]).value_counts(dropna=True), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5.]).value_counts(dropna=True), + Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( - pd.Series([10.3, 5., 5.]).value_counts(dropna=False), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5.]).value_counts(dropna=False), + Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( - pd.Series([10.3, 5., 5., None]).value_counts(dropna=True), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5., None]).value_counts(dropna=True), + Series([2, 1], index=[5., 10.3])) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): - tm.assert_series_equal( - pd.Series([10.3, 5., 5., None]).value_counts(dropna=False), - pd.Series([2, 1, 1], index=[5., 10.3, np.nan])) + result = Series([10.3, 5., 5., None]).value_counts(dropna=False) + expected = Series([2, 1, 1], index=[5., 10.3, np.nan]) + tm.assert_series_equal(result, expected) def test_value_counts_normalized(self): # GH12558 @@ -648,10 +748,12 @@ def test_value_counts_uint64(self): expected = Series([1, 1], index=[-1, 2**63]) result = algos.value_counts(arr) - tm.assert_series_equal(result, expected) + # 32-bit linux has a different ordering + if not compat.is_platform_32bit(): + tm.assert_series_equal(result, expected) -class TestDuplicated(tm.TestCase): +class TestDuplicated(object): def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) @@ -691,55 +793,57 @@ def test_duplicated_with_nas(self): expected = np.array(trues + trues) tm.assert_numpy_array_equal(result, expected) - def test_numeric_object_likes(self): - cases = [np.array([1, 2, 1, 5, 3, - 2, 4, 1, 5, 6]), - np.array([1.1, 2.2, 1.1, np.nan, 3.3, - 2.2, 4.4, 1.1, np.nan, 6.6]), - np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j, - 2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]), - np.array(['a', 'b', 'a', 'e', 'c', - 'b', 'd', 'a', 'e', 'f'], dtype=object), - np.array([1, 2**63, 1, 3**5, 10, - 2**63, 39, 1, 3**5, 7], dtype=np.uint64)] - + @pytest.mark.parametrize('case', [ + np.array([1, 2, 1, 5, 3, + 2, 4, 1, 5, 6]), + np.array([1.1, 2.2, 1.1, np.nan, 3.3, + 2.2, 4.4, 1.1, np.nan, 6.6]), + pytest.param(np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j, + 2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]), + marks=pytest.mark.xfail(reason="Complex bug. GH 16399") + ), + np.array(['a', 'b', 'a', 'e', 'c', + 'b', 'd', 'a', 'e', 'f'], dtype=object), + np.array([1, 2**63, 1, 3**5, 10, 2**63, 39, 1, 3**5, 7], + dtype=np.uint64), + ]) + def test_numeric_object_likes(self, case): exp_first = np.array([False, False, True, False, False, True, False, True, True, False]) exp_last = np.array([True, True, True, True, False, False, False, False, False, False]) exp_false = exp_first | exp_last - for case in cases: - res_first = algos.duplicated(case, keep='first') + res_first = algos.duplicated(case, keep='first') + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = algos.duplicated(case, keep='last') + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = algos.duplicated(case, keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # index + for idx in [Index(case), Index(case, dtype='category')]: + res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) - res_last = algos.duplicated(case, keep='last') + res_last = idx.duplicated(keep='last') tm.assert_numpy_array_equal(res_last, exp_last) - res_false = algos.duplicated(case, keep=False) + res_false = idx.duplicated(keep=False) tm.assert_numpy_array_equal(res_false, exp_false) - # index - for idx in [pd.Index(case), pd.Index(case, dtype='category')]: - res_first = idx.duplicated(keep='first') - tm.assert_numpy_array_equal(res_first, exp_first) + # series + for s in [Series(case), Series(case, dtype='category')]: + res_first = s.duplicated(keep='first') + tm.assert_series_equal(res_first, Series(exp_first)) - res_last = idx.duplicated(keep='last') - tm.assert_numpy_array_equal(res_last, exp_last) + res_last = s.duplicated(keep='last') + tm.assert_series_equal(res_last, Series(exp_last)) - res_false = idx.duplicated(keep=False) - tm.assert_numpy_array_equal(res_false, exp_false) - - # series - for s in [pd.Series(case), pd.Series(case, dtype='category')]: - res_first = s.duplicated(keep='first') - tm.assert_series_equal(res_first, pd.Series(exp_first)) - - res_last = s.duplicated(keep='last') - tm.assert_series_equal(res_last, pd.Series(exp_last)) - - res_false = s.duplicated(keep=False) - tm.assert_series_equal(res_false, pd.Series(exp_false)) + res_false = s.duplicated(keep=False) + tm.assert_series_equal(res_false, Series(exp_false)) def test_datetime_likes(self): @@ -748,8 +852,8 @@ def test_datetime_likes(self): td = ['1 days', '2 days', '1 days', 'NaT', '3 days', '2 days', '4 days', '1 days', 'NaT', '6 days'] - cases = [np.array([pd.Timestamp(d) for d in dt]), - np.array([pd.Timestamp(d, tz='US/Eastern') for d in dt]), + cases = [np.array([Timestamp(d) for d in dt]), + np.array([Timestamp(d, tz='US/Eastern') for d in dt]), np.array([pd.Period(d, freq='D') for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] @@ -771,8 +875,8 @@ def test_datetime_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # index - for idx in [pd.Index(case), pd.Index(case, dtype='category'), - pd.Index(case, dtype=object)]: + for idx in [Index(case), Index(case, dtype='category'), + Index(case, dtype=object)]: res_first = idx.duplicated(keep='first') tm.assert_numpy_array_equal(res_first, exp_first) @@ -783,24 +887,40 @@ def test_datetime_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # series - for s in [pd.Series(case), pd.Series(case, dtype='category'), - pd.Series(case, dtype=object)]: + for s in [Series(case), Series(case, dtype='category'), + Series(case, dtype=object)]: res_first = s.duplicated(keep='first') - tm.assert_series_equal(res_first, pd.Series(exp_first)) + tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') - tm.assert_series_equal(res_last, pd.Series(exp_last)) + tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) - tm.assert_series_equal(res_false, pd.Series(exp_false)) + tm.assert_series_equal(res_false, Series(exp_false)) def test_unique_index(self): - cases = [pd.Index([1, 2, 3]), pd.RangeIndex(0, 3)] + cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)] for case in cases: - self.assertTrue(case.is_unique) + assert case.is_unique tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False])) + @pytest.mark.parametrize('arr, unique', [ + ([(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)], + [(0, 0), (0, 1), (1, 0), (1, 1)]), + ([('b', 'c'), ('a', 'b'), ('a', 'b'), ('b', 'c')], + [('b', 'c'), ('a', 'b')]), + ([('a', 1), ('b', 2), ('a', 3), ('a', 1)], + [('a', 1), ('b', 2), ('a', 3)]), + ]) + def test_unique_tuples(self, arr, unique): + # https://github.com/pandas-dev/pandas/issues/16519 + expected = np.empty(len(unique), dtype=object) + expected[:] = unique + + result = pd.unique(arr) + tm.assert_numpy_array_equal(result, expected) + class GroupVarTestMixin(object): @@ -818,7 +938,7 @@ def test_group_var_generic_1d(self): expected_counts = counts + 3 self.algo(out, counts, values, labels) - self.assertTrue(np.allclose(out, expected_out, self.rtol)) + assert np.allclose(out, expected_out, self.rtol) tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_1d_flat_labels(self): @@ -834,7 +954,7 @@ def test_group_var_generic_1d_flat_labels(self): self.algo(out, counts, values, labels) - self.assertTrue(np.allclose(out, expected_out, self.rtol)) + assert np.allclose(out, expected_out, self.rtol) tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_2d_all_finite(self): @@ -849,7 +969,7 @@ def test_group_var_generic_2d_all_finite(self): expected_counts = counts + 2 self.algo(out, counts, values, labels) - self.assertTrue(np.allclose(out, expected_out, self.rtol)) + assert np.allclose(out, expected_out, self.rtol) tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_2d_some_nan(self): @@ -881,15 +1001,15 @@ def test_group_var_constant(self): self.algo(out, counts, values, labels) - self.assertEqual(counts[0], 3) - self.assertTrue(out[0, 0] >= 0) + assert counts[0] == 3 + assert out[0, 0] >= 0 tm.assert_almost_equal(out[0, 0], 0.0) -class TestGroupVarFloat64(tm.TestCase, GroupVarTestMixin): +class TestGroupVarFloat64(GroupVarTestMixin): __test__ = True - algo = algos.algos.group_var_float64 + algo = libgroupby.group_var_float64 dtype = np.float64 rtol = 1e-5 @@ -905,62 +1025,72 @@ def test_group_var_large_inputs(self): self.algo(out, counts, values, labels) - self.assertEqual(counts[0], 10 ** 6) + assert counts[0] == 10 ** 6 tm.assert_almost_equal(out[0, 0], 1.0 / 12, check_less_precise=True) -class TestGroupVarFloat32(tm.TestCase, GroupVarTestMixin): +class TestGroupVarFloat32(GroupVarTestMixin): __test__ = True - algo = algos.algos.group_var_float32 + algo = libgroupby.group_var_float32 dtype = np.float32 rtol = 1e-2 -class TestHashTable(tm.TestCase): +class TestHashTable(object): def test_lookup_nan(self): xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) - m = hashtable.Float64HashTable() + m = ht.Float64HashTable() m.map_locations(xs) - self.assert_numpy_array_equal(m.lookup(xs), - np.arange(len(xs), dtype=np.int64)) + tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), + dtype=np.int64)) def test_lookup_overflow(self): xs = np.array([1, 2, 2**63], dtype=np.uint64) - m = hashtable.UInt64HashTable() + m = ht.UInt64HashTable() m.map_locations(xs) - self.assert_numpy_array_equal(m.lookup(xs), - np.arange(len(xs), dtype=np.int64)) + tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), + dtype=np.int64)) def test_get_unique(self): - s = pd.Series([1, 2, 2**63, 2**63], dtype=np.uint64) + s = Series([1, 2, 2**63, 2**63], dtype=np.uint64) exp = np.array([1, 2, 2**63], dtype=np.uint64) - self.assert_numpy_array_equal(s.unique(), exp) + tm.assert_numpy_array_equal(s.unique(), exp) def test_vector_resize(self): # Test for memory errors after internal vector # reallocations (pull request #7157) - def _test_vector_resize(htable, uniques, dtype, nvals): + def _test_vector_resize(htable, uniques, dtype, nvals, safely_resizes): vals = np.array(np.random.randn(1000), dtype=dtype) - # get_labels appends to the vector + # get_labels may append to uniques htable.get_labels(vals[:nvals], uniques, 0, -1) - # to_array resizes the vector - uniques.to_array() - htable.get_labels(vals, uniques, 0, -1) + # to_array() set an external_view_exists flag on uniques. + tmp = uniques.to_array() + oldshape = tmp.shape + # subsequent get_labels() calls can no longer append to it + # (for all but StringHashTables + ObjectVector) + if safely_resizes: + htable.get_labels(vals, uniques, 0, -1) + else: + with pytest.raises(ValueError) as excinfo: + htable.get_labels(vals, uniques, 0, -1) + assert str(excinfo.value).startswith('external reference') + uniques.to_array() # should not raise here + assert tmp.shape == oldshape test_cases = [ - (hashtable.PyObjectHashTable, hashtable.ObjectVector, 'object'), - (hashtable.StringHashTable, hashtable.ObjectVector, 'object'), - (hashtable.Float64HashTable, hashtable.Float64Vector, 'float64'), - (hashtable.Int64HashTable, hashtable.Int64Vector, 'int64'), - (hashtable.UInt64HashTable, hashtable.UInt64Vector, 'uint64')] + (ht.PyObjectHashTable, ht.ObjectVector, 'object', False), + (ht.StringHashTable, ht.ObjectVector, 'object', True), + (ht.Float64HashTable, ht.Float64Vector, 'float64', False), + (ht.Int64HashTable, ht.Int64Vector, 'int64', False), + (ht.UInt64HashTable, ht.UInt64Vector, 'uint64', False)] - for (tbl, vect, dtype) in test_cases: + for (tbl, vect, dtype, safely_resizes) in test_cases: # resizing to empty is a special case - _test_vector_resize(tbl(), vect(), dtype, 0) - _test_vector_resize(tbl(), vect(), dtype, 10) + _test_vector_resize(tbl(), vect(), dtype, 0, safely_resizes) + _test_vector_resize(tbl(), vect(), dtype, 10, safely_resizes) def test_quantile(): @@ -972,7 +1102,6 @@ def test_quantile(): def test_unique_label_indices(): - from pandas.hashtable import unique_label_indices a = np.random.randint(1, 1 << 10, 1 << 15).astype('i8') @@ -989,16 +1118,16 @@ def test_unique_label_indices(): check_dtype=False) -class TestRank(tm.TestCase): +class TestRank(object): + @td.skip_if_no_scipy def test_scipy_compat(self): - tm._skip_if_no_scipy() from scipy.stats import rankdata def _check(arr): mask = ~np.isfinite(arr) arr = arr.copy() - result = _algos.rank_1d_float64(arr) + result = libalgos.rank_1d_float64(arr) arr[mask] = np.inf exp = rankdata(arr) exp[mask] = nan @@ -1025,7 +1154,7 @@ def test_too_many_ndims(self): arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) msg = "Array with ndim > 2 are not supported" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): algos.rank(arr) @@ -1034,30 +1163,30 @@ def test_pad_backfill_object_segfault(): old = np.array([], dtype='O') new = np.array([datetime(2010, 12, 31)], dtype='O') - result = _algos.pad_object(old, new) + result = libalgos.pad_object(old, new) expected = np.array([-1], dtype=np.int64) - assert (np.array_equal(result, expected)) + tm.assert_numpy_array_equal(result, expected) - result = _algos.pad_object(new, old) + result = libalgos.pad_object(new, old) expected = np.array([], dtype=np.int64) - assert (np.array_equal(result, expected)) + tm.assert_numpy_array_equal(result, expected) - result = _algos.backfill_object(old, new) + result = libalgos.backfill_object(old, new) expected = np.array([-1], dtype=np.int64) - assert (np.array_equal(result, expected)) + tm.assert_numpy_array_equal(result, expected) - result = _algos.backfill_object(new, old) + result = libalgos.backfill_object(new, old) expected = np.array([], dtype=np.int64) - assert (np.array_equal(result, expected)) + tm.assert_numpy_array_equal(result, expected) def test_arrmap(): values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O') - result = _algos.arrmap_object(values, lambda x: x in ['foo', 'bar']) + result = libalgos.arrmap_object(values, lambda x: x in ['foo', 'bar']) assert (result.dtype == np.bool_) -class TestTseriesUtil(tm.TestCase): +class TestTseriesUtil(object): def test_combineFunc(self): pass @@ -1065,7 +1194,7 @@ def test_combineFunc(self): def test_reindex(self): pass - def test_isnull(self): + def test_isna(self): pass def test_groupby(self): @@ -1078,36 +1207,36 @@ def test_backfill(self): old = Index([1, 5, 10]) new = Index(lrange(12)) - filler = _algos.backfill_int64(old.values, new.values) + filler = libalgos.backfill_int64(old.values, new.values) expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64) - self.assert_numpy_array_equal(filler, expect_filler) + tm.assert_numpy_array_equal(filler, expect_filler) # corner case old = Index([1, 4]) new = Index(lrange(5, 10)) - filler = _algos.backfill_int64(old.values, new.values) + filler = libalgos.backfill_int64(old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) - self.assert_numpy_array_equal(filler, expect_filler) + tm.assert_numpy_array_equal(filler, expect_filler) def test_pad(self): old = Index([1, 5, 10]) new = Index(lrange(12)) - filler = _algos.pad_int64(old.values, new.values) + filler = libalgos.pad_int64(old.values, new.values) expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64) - self.assert_numpy_array_equal(filler, expect_filler) + tm.assert_numpy_array_equal(filler, expect_filler) # corner case old = Index([5, 10]) new = Index(lrange(5)) - filler = _algos.pad_int64(old.values, new.values) + filler = libalgos.pad_int64(old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) - self.assert_numpy_array_equal(filler, expect_filler) + tm.assert_numpy_array_equal(filler, expect_filler) def test_is_lexsorted(): @@ -1123,7 +1252,7 @@ def test_is_lexsorted(): 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0]), + 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='int64'), np.array([30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, @@ -1135,35 +1264,34 @@ def test_is_lexsorted(): 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, - 4, 3, 2, 1, 0])] - - assert (not _algos.is_lexsorted(failure)) - -# def test_get_group_index(): -# a = np.array([0, 1, 2, 0, 2, 1, 0, 0], dtype=np.int64) -# b = np.array([1, 0, 3, 2, 0, 2, 3, 0], dtype=np.int64) -# expected = np.array([1, 4, 11, 2, 8, 6, 3, 0], dtype=np.int64) + 4, 3, 2, 1, 0], dtype='int64')] -# result = lib.get_group_index([a, b], (3, 4)) - -# assert(np.array_equal(result, expected)) + assert (not libalgos.is_lexsorted(failure)) def test_groupsort_indexer(): a = np.random.randint(0, 1000, 100).astype(np.int64) b = np.random.randint(0, 1000, 100).astype(np.int64) - result = _algos.groupsort_indexer(a, 1000)[0] + result = libalgos.groupsort_indexer(a, 1000)[0] # need to use a stable sort + # np.argsort returns int, groupsort_indexer + # always returns int64 expected = np.argsort(a, kind='mergesort') - assert (np.array_equal(result, expected)) + expected = expected.astype(np.int64) + + tm.assert_numpy_array_equal(result, expected) # compare with lexsort + # np.lexsort returns int, groupsort_indexer + # always returns int64 key = a * 1000 + b - result = _algos.groupsort_indexer(key, 1000000)[0] + result = libalgos.groupsort_indexer(key, 1000000)[0] expected = np.lexsort((b, a)) - assert (np.array_equal(result, expected)) + expected = expected.astype(np.int64) + + tm.assert_numpy_array_equal(result, expected) def test_infinity_sort(): @@ -1172,8 +1300,8 @@ def test_infinity_sort(): # itself. Instead, let's give our infinities a self-consistent # ordering, but outside the float extended real line. - Inf = _algos.Infinity() - NegInf = _algos.NegInfinity() + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] @@ -1181,24 +1309,47 @@ def test_infinity_sort(): assert all(Inf > x or x is Inf for x in ref_nums) assert Inf >= Inf and Inf == Inf assert not Inf < Inf and not Inf > Inf + assert libalgos.Infinity() == libalgos.Infinity() + assert not libalgos.Infinity() != libalgos.Infinity() assert all(NegInf <= x for x in ref_nums) assert all(NegInf < x or x is NegInf for x in ref_nums) assert NegInf <= NegInf and NegInf == NegInf assert not NegInf < NegInf and not NegInf > NegInf + assert libalgos.NegInfinity() == libalgos.NegInfinity() + assert not libalgos.NegInfinity() != libalgos.NegInfinity() for perm in permutations(ref_nums): assert sorted(perm) == ref_nums # smoke tests - np.array([_algos.Infinity()] * 32).argsort() - np.array([_algos.NegInfinity()] * 32).argsort() + np.array([libalgos.Infinity()] * 32).argsort() + np.array([libalgos.NegInfinity()] * 32).argsort() + + +def test_infinity_against_nan(): + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() + + assert not Inf > np.nan + assert not Inf >= np.nan + assert not Inf < np.nan + assert not Inf <= np.nan + assert not Inf == np.nan + assert Inf != np.nan + + assert not NegInf > np.nan + assert not NegInf >= np.nan + assert not NegInf < np.nan + assert not NegInf <= np.nan + assert not NegInf == np.nan + assert NegInf != np.nan def test_ensure_platform_int(): arr = np.arange(100, dtype=np.intp) - result = _algos.ensure_platform_int(arr) + result = libalgos.ensure_platform_int(arr) assert (result is arr) @@ -1208,27 +1359,27 @@ def test_int64_add_overflow(): m = np.iinfo(np.int64).max n = np.iinfo(np.int64).min - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), m) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([n, n]), n) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([n, n]), np.array([n, n])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, n]), np.array([n, n])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True]), b_mask=np.array([False, True])) - with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_raises_regex(OverflowError, msg): with tm.assert_produces_warning(RuntimeWarning): algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m])) @@ -1236,31 +1387,48 @@ def test_int64_add_overflow(): # Check that the nan boolean arrays override whether or not # the addition overflows. We don't check the result but just # the fact that an OverflowError is not raised. - with tm.assertRaises(AssertionError): - with tm.assertRaisesRegexp(OverflowError, msg): + with pytest.raises(AssertionError): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, True])) - with tm.assertRaises(AssertionError): - with tm.assertRaisesRegexp(OverflowError, msg): + with pytest.raises(AssertionError): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), b_mask=np.array([True, True])) - with tm.assertRaises(AssertionError): - with tm.assertRaisesRegexp(OverflowError, msg): + with pytest.raises(AssertionError): + with tm.assert_raises_regex(OverflowError, msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, False]), b_mask=np.array([False, True])) -class TestMode(tm.TestCase): +class TestMode(object): def test_no_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(algos.mode([]), exp) - exp = Series([], dtype=np.int) + def test_mode_single(self): + # GH 15714 + exp_single = [1] + data_single = [1] + + exp_multi = [1] + data_multi = [1, 1] + + for dt in np.typecodes['AllInteger'] + np.typecodes['Float']: + s = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + s = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + exp = Series([1], dtype=np.int) tm.assert_series_equal(algos.mode([1]), exp) - exp = Series([], dtype=np.object) + exp = Series(['a', 'b', 'c'], dtype=np.object) tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp) def test_number_mode(self): @@ -1296,7 +1464,8 @@ def test_strobj_mode(self): tm.assert_series_equal(algos.mode(s), exp) def test_datelike_mode(self): - exp = Series([], dtype="M8[ns]") + exp = Series(['1900-05-03', '2011-01-03', + '2013-01-02'], dtype="M8[ns]") s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(algos.mode(s), exp) @@ -1307,7 +1476,8 @@ def test_datelike_mode(self): tm.assert_series_equal(algos.mode(s), exp) def test_timedelta_mode(self): - exp = Series([], dtype='timedelta64[ns]') + exp = Series(['-1 days', '0 days', '1 days'], + dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(s), exp) @@ -1327,26 +1497,29 @@ def test_uint64_overflow(self): s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) - exp = Series([], dtype=np.uint64) + exp = Series([1, 2**63], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) def test_categorical(self): c = Categorical([1, 2]) - exp = Series([], dtype=np.int64) - tm.assert_series_equal(algos.mode(c), exp) + exp = c + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) c = Categorical([1, 'a', 'a']) - exp = Series(['a'], dtype=object) - tm.assert_series_equal(algos.mode(c), exp) + exp = Categorical(['a'], categories=[1, 'a']) + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) c = Categorical([1, 1, 2, 3, 3]) - exp = Series([1, 3], dtype=np.int64) - tm.assert_series_equal(algos.mode(c), exp) + exp = Categorical([1, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) def test_index(self): idx = Index([1, 2, 3]) - exp = Series([], dtype=np.int64) + exp = Series([1, 2, 3], dtype=np.int64) tm.assert_series_equal(algos.mode(idx), exp) idx = Index([1, 'a', 'a']) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 473f1d81c9532..9f7b06ed2d61c 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -9,16 +9,19 @@ import pandas as pd import pandas.compat as compat -from pandas.types.common import (is_object_dtype, is_datetimetz, - needs_i8_conversion) +from pandas.core.dtypes.common import ( + is_object_dtype, is_datetimetz, is_datetime64_dtype, + needs_i8_conversion) import pandas.util.testing as tm -from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, - Timedelta) -from pandas.compat import u, StringIO +from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, + PeriodIndex, Timedelta, IntervalIndex, Interval, + CategoricalIndex, Timestamp) +from pandas.compat import StringIO, PYPY, long from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate, - NoNewAttributesMixin) -from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas.core.accessor import PandasDelegate +from pandas.core.base import PandasObject, NoNewAttributesMixin +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas._libs.tslib import iNaT class CheckStringMixin(object): @@ -44,9 +47,10 @@ class CheckImmutable(object): mutable_regex = re.compile('does not support mutable operations') def check_mutable_error(self, *args, **kwargs): - # pass whatever functions you normally would to assertRaises (after the - # Exception kind) - tm.assertRaisesRegexp(TypeError, self.mutable_regex, *args, **kwargs) + # Pass whatever function you normally would to assert_raises_regex + # (after the Exception kind). + tm.assert_raises_regex( + TypeError, self.mutable_regex, *args, **kwargs) def test_no_mutable_funcs(self): def setitem(): @@ -69,6 +73,7 @@ def delslice(): self.check_mutable_error(delslice) mutable_methods = getattr(self, "mutable_methods", []) + for meth in mutable_methods: self.check_mutable_error(getattr(self.container, meth)) @@ -79,74 +84,11 @@ def test_slicing_maintains_type(self): def check_result(self, result, expected, klass=None): klass = klass or self.klass - self.assertIsInstance(result, klass) - self.assertEqual(result, expected) - - -class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): - mutable_methods = ('extend', 'pop', 'remove', 'insert') - unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) - - def setUp(self): - self.lst = [1, 2, 3, 4, 5] - self.container = FrozenList(self.lst) - self.klass = FrozenList - - def test_add(self): - result = self.container + (1, 2, 3) - expected = FrozenList(self.lst + [1, 2, 3]) - self.check_result(result, expected) + assert isinstance(result, klass) + assert result == expected - result = (1, 2, 3) + self.container - expected = FrozenList([1, 2, 3] + self.lst) - self.check_result(result, expected) - def test_inplace(self): - q = r = self.container - q += [5] - self.check_result(q, self.lst + [5]) - # other shouldn't be mutated - self.check_result(r, self.lst) - - -class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): - mutable_methods = ('put', 'itemset', 'fill') - unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) - - def setUp(self): - self.lst = [3, 5, 7, -2] - self.container = FrozenNDArray(self.lst) - self.klass = FrozenNDArray - - def test_shallow_copying(self): - original = self.container.copy() - self.assertIsInstance(self.container.view(), FrozenNDArray) - self.assertFalse(isinstance( - self.container.view(np.ndarray), FrozenNDArray)) - self.assertIsNot(self.container.view(), self.container) - self.assert_numpy_array_equal(self.container, original) - # shallow copy should be the same too - self.assertIsInstance(self.container._shallow_copy(), FrozenNDArray) - - # setting should not be allowed - def testit(container): - container[0] = 16 - - self.check_mutable_error(testit, self.container) - - def test_values(self): - original = self.container.view(np.ndarray).copy() - n = original[0] + 15 - vals = self.container.values() - self.assert_numpy_array_equal(original, vals) - self.assertIsNot(original, vals) - vals[0] = n - self.assertIsInstance(self.container, pd.core.base.FrozenNDArray) - self.assert_numpy_array_equal(self.container.values(), original) - self.assertEqual(vals[0], n) - - -class TestPandasDelegate(tm.TestCase): +class TestPandasDelegate(object): class Delegator(object): _properties = ['foo'] @@ -164,17 +106,18 @@ def bar(self, *args, **kwargs): """ a test bar method """ pass - class Delegate(PandasDelegate): + class Delegate(PandasDelegate, PandasObject): def __init__(self, obj): self.obj = obj - def setUp(self): + def setup_method(self, method): pass - def test_invalida_delgation(self): + def test_invalid_delegation(self): # these show that in order for the delegation to work - # the _delegate_* methods need to be overriden to not raise a TypeError + # the _delegate_* methods need to be overridden to not raise + # a TypeError self.Delegate._add_delegate_accessors( delegate=self.Delegator, @@ -192,18 +135,19 @@ def test_invalida_delgation(self): def f(): delegate.foo - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): delegate.foo = 5 - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) def f(): delegate.foo() - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(self): # Delegate does not implement memory_usage. # Check that we fall back to in-built `__sizeof__` @@ -212,7 +156,7 @@ def test_memory_usage(self): sys.getsizeof(delegate) -class Ops(tm.TestCase): +class Ops(object): def _allow_na_ops(self, obj): """Whether to skip test cases including NaN""" @@ -222,7 +166,7 @@ def _allow_na_ops(self, obj): return False return True - def setUp(self): + def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name='a') self.int_index = tm.makeIntIndex(10, name='a') self.float_index = tm.makeFloatIndex(10, name='a') @@ -277,22 +221,22 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False): tm.assert_index_equal(result, expected) elif isinstance(result, np.ndarray) and isinstance(expected, np.ndarray): - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) else: - self.assertEqual(result, expected) + assert result == expected # freq raises AttributeError on an Int64Index because its not - # defined we mostly care about Series hwere anyhow + # defined we mostly care about Series here anyhow if not ignore_failures: for o in self.not_valid_objs: # an object that is datetimelike will raise a TypeError, # otherwise an AttributeError if issubclass(type(o), DatetimeIndexOpsMixin): - self.assertRaises(TypeError, lambda: getattr(o, op)) + pytest.raises(TypeError, lambda: getattr(o, op)) else: - self.assertRaises(AttributeError, - lambda: getattr(o, op)) + pytest.raises(AttributeError, + lambda: getattr(o, op)) def test_binary_ops_docs(self): from pandas import DataFrame, Panel @@ -310,21 +254,19 @@ def test_binary_ops_docs(self): operand2 = 'other' op = op_map[op_name] expected_str = ' '.join([operand1, op, operand2]) - self.assertTrue(expected_str in getattr(klass, - op_name).__doc__) + assert expected_str in getattr(klass, op_name).__doc__ # reverse version of the binary ops expected_str = ' '.join([operand2, op, operand1]) - self.assertTrue(expected_str in getattr(klass, 'r' + - op_name).__doc__) + assert expected_str in getattr(klass, 'r' + op_name).__doc__ class TestIndexOps(Ops): - def setUp(self): - super(TestIndexOps, self).setUp() - self.is_valid_objs = [o for o in self.objs if o._allow_index_ops] - self.not_valid_objs = [o for o in self.objs if not o._allow_index_ops] + def setup_method(self, method): + super(TestIndexOps, self).setup_method(method) + self.is_valid_objs = self.objs + self.not_valid_objs = [] def test_none_comparison(self): @@ -337,55 +279,64 @@ def test_none_comparison(self): # noinspection PyComparisonWithNone result = o == None # noqa - self.assertFalse(result.iat[0]) - self.assertFalse(result.iat[1]) + assert not result.iat[0] + assert not result.iat[1] # noinspection PyComparisonWithNone result = o != None # noqa - self.assertTrue(result.iat[0]) - self.assertTrue(result.iat[1]) + assert result.iat[0] + assert result.iat[1] result = None == o # noqa - self.assertFalse(result.iat[0]) - self.assertFalse(result.iat[1]) + assert not result.iat[0] + assert not result.iat[1] # this fails for numpy < 1.9 # and oddly for *some* platforms # result = None != o # noqa - # self.assertTrue(result.iat[0]) - # self.assertTrue(result.iat[1]) - - result = None > o - self.assertFalse(result.iat[0]) - self.assertFalse(result.iat[1]) + # assert result.iat[0] + # assert result.iat[1] + if (is_datetime64_dtype(o) or is_datetimetz(o)): + # Following DatetimeIndex (and Timestamp) convention, + # inequality comparisons with Series[datetime64] raise + with pytest.raises(TypeError): + None > o + with pytest.raises(TypeError): + o > None + else: + result = None > o + assert not result.iat[0] + assert not result.iat[1] - result = o < None - self.assertFalse(result.iat[0]) - self.assertFalse(result.iat[1]) + result = o < None + assert not result.iat[0] + assert not result.iat[1] def test_ndarray_compat_properties(self): for o in self.objs: + # Check that we work. + for p in ['shape', 'dtype', 'flags', 'T', + 'strides', 'itemsize', 'nbytes']: + assert getattr(o, p, None) is not None - # check that we work - for p in ['shape', 'dtype', 'flags', 'T', 'strides', 'itemsize', - 'nbytes']: - self.assertIsNotNone(getattr(o, p, None)) - self.assertTrue(hasattr(o, 'base')) + assert hasattr(o, 'base') - # if we have a datetimelike dtype then needs a view to work + # If we have a datetime-like dtype then needs a view to work # but the user is responsible for that try: - self.assertIsNotNone(o.data) + assert o.data is not None except ValueError: pass - self.assertRaises(ValueError, o.item) # len > 1 - self.assertEqual(o.ndim, 1) - self.assertEqual(o.size, len(o)) + with pytest.raises(ValueError): + o.item() # len > 1 + + assert o.ndim == 1 + assert o.size == len(o) - self.assertEqual(Index([1]).item(), 1) - self.assertEqual(Series([1]).item(), 1) + assert Index([1]).item() == 1 + assert Series([1]).item() == 1 def test_ops(self): for op in ['max', 'min']: @@ -394,15 +345,16 @@ def test_ops(self): if not isinstance(o, PeriodIndex): expected = getattr(o.values, op)() else: - expected = pd.Period(ordinal=getattr(o._values, op)(), - freq=o.freq) + expected = pd.Period( + ordinal=getattr(o._ndarray_values, op)(), + freq=o.freq) try: - self.assertEqual(result, expected) + assert result == expected except TypeError: # comparing tz-aware series with np.array results in # TypeError expected = expected.astype('M8[ns]').astype('int64') - self.assertEqual(result.value, expected) + assert result.value == expected def test_nanops(self): # GH 7261 @@ -410,43 +362,43 @@ def test_nanops(self): for klass in [Index, Series]: obj = klass([np.nan, 2.0]) - self.assertEqual(getattr(obj, op)(), 2.0) + assert getattr(obj, op)() == 2.0 obj = klass([np.nan]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isna(getattr(obj, op)()) obj = klass([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) + assert pd.isna(getattr(obj, op)()) obj = klass([pd.NaT, datetime(2011, 11, 1)]) # check DatetimeIndex monotonic path - self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1)) + assert getattr(obj, op)() == datetime(2011, 11, 1) obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) # check DatetimeIndex non-monotonic path - self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1)) + assert getattr(obj, op)(), datetime(2011, 11, 1) # argmin/max obj = Index(np.arange(5, dtype='int64')) - self.assertEqual(obj.argmin(), 0) - self.assertEqual(obj.argmax(), 4) + assert obj.argmin() == 0 + assert obj.argmax() == 4 obj = Index([np.nan, 1, np.nan, 2]) - self.assertEqual(obj.argmin(), 1) - self.assertEqual(obj.argmax(), 3) + assert obj.argmin() == 1 + assert obj.argmax() == 3 obj = Index([np.nan]) - self.assertEqual(obj.argmin(), -1) - self.assertEqual(obj.argmax(), -1) + assert obj.argmin() == -1 + assert obj.argmax() == -1 obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), pd.NaT]) - self.assertEqual(obj.argmin(), 1) - self.assertEqual(obj.argmax(), 2) + assert obj.argmin() == 1 + assert obj.argmax() == 2 obj = Index([pd.NaT]) - self.assertEqual(obj.argmin(), -1) - self.assertEqual(obj.argmax(), -1) + assert obj.argmin() == -1 + assert obj.argmax() == -1 def test_value_counts_unique_nunique(self): for orig in self.objs: @@ -463,42 +415,42 @@ def test_value_counts_unique_nunique(self): if isinstance(o, Index) and o.is_boolean(): continue elif isinstance(o, Index): - expected_index = pd.Index(o[::-1]) + expected_index = Index(o[::-1]) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' else: - expected_index = pd.Index(values[::-1]) + expected_index = Index(values[::-1]) idx = o.index.repeat(range(1, len(o) + 1)) rep = np.repeat(values, range(1, len(o) + 1)) o = klass(rep, index=idx, name='a') # check values has the same dtype as the original - self.assertEqual(o.dtype, orig.dtype) + assert o.dtype == orig.dtype expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64', name='a') result = o.value_counts() tm.assert_series_equal(result, expected_s) - self.assertTrue(result.index.name is None) - self.assertEqual(result.name, 'a') + assert result.index.name is None + assert result.name == 'a' result = o.unique() if isinstance(o, Index): - self.assertTrue(isinstance(result, o.__class__)) - self.assert_index_equal(result, orig) + assert isinstance(result, o.__class__) + tm.assert_index_equal(result, orig) elif is_datetimetz(o): # datetimetz Series returns array of Timestamp - self.assertEqual(result[0], orig[0]) + assert result[0] == orig[0] for r in result: - self.assertIsInstance(r, pd.Timestamp) + assert isinstance(r, Timestamp) tm.assert_numpy_array_equal(result, - orig._values.asobject.values) + orig._values.astype(object).values) else: tm.assert_numpy_array_equal(result, orig.values) - self.assertEqual(o.nunique(), len(np.unique(o.values))) + assert o.nunique() == len(np.unique(o.values)) def test_value_counts_unique_nunique_null(self): @@ -506,7 +458,7 @@ def test_value_counts_unique_nunique_null(self): for orig in self.objs: o = orig.copy() klass = type(o) - values = o._values + values = o._ndarray_values if not self._allow_na_ops(o): continue @@ -515,21 +467,21 @@ def test_value_counts_unique_nunique_null(self): if is_datetimetz(o): if isinstance(o, DatetimeIndex): v = o.asi8 - v[0:2] = pd.tslib.iNaT + v[0:2] = iNaT values = o._shallow_copy(v) else: o = o.copy() - o[0:2] = pd.tslib.iNaT + o[0:2] = iNaT values = o._values elif needs_i8_conversion(o): - values[0:2] = pd.tslib.iNaT + values[0:2] = iNaT values = o._shallow_copy(values) else: values[0:2] = null_obj # check values has the same dtype as the original - self.assertEqual(values.dtype, o.dtype) + assert values.dtype == o.dtype # create repeated values, 'n'th element is repeated by n+1 # times @@ -544,21 +496,21 @@ def test_value_counts_unique_nunique_null(self): if is_datetimetz(o): expected_index = orig._values._shallow_copy(values) else: - expected_index = pd.Index(values) + expected_index = Index(values) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' # check values has the same dtype as the original - self.assertEqual(o.dtype, orig.dtype) + assert o.dtype == orig.dtype # check values correctly have NaN nanloc = np.zeros(len(o), dtype=np.bool) nanloc[:3] = True if isinstance(o, Index): - self.assert_numpy_array_equal(pd.isnull(o), nanloc) + tm.assert_numpy_array_equal(pd.isna(o), nanloc) else: - exp = pd.Series(nanloc, o.index, name='a') - self.assert_series_equal(pd.isnull(o), exp) + exp = Series(nanloc, o.index, name='a') + tm.assert_series_equal(pd.isna(o), exp) expected_s_na = Series(list(range(10, 2, -1)) + [3], index=expected_index[9:0:-1], @@ -569,12 +521,12 @@ def test_value_counts_unique_nunique_null(self): result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) - self.assertTrue(result_s_na.index.name is None) - self.assertEqual(result_s_na.name, 'a') + assert result_s_na.index.name is None + assert result_s_na.name == 'a' result_s = o.value_counts() tm.assert_series_equal(o.value_counts(), expected_s) - self.assertTrue(result_s.index.name is None) - self.assertEqual(result_s.name, 'a') + assert result_s.index.name is None + assert result_s.name == 'a' result = o.unique() if isinstance(o, Index): @@ -582,17 +534,17 @@ def test_value_counts_unique_nunique_null(self): Index(values[1:], name='a')) elif is_datetimetz(o): # unable to compare NaT / nan - tm.assert_numpy_array_equal(result[1:], - values[2:].asobject.values) - self.assertIs(result[0], pd.NaT) + vals = values[2:].astype(object).values + tm.assert_numpy_array_equal(result[1:], vals) + assert result[0] is pd.NaT else: tm.assert_numpy_array_equal(result[1:], values[2:]) - self.assertTrue(pd.isnull(result[0])) - self.assertEqual(result.dtype, orig.dtype) + assert pd.isna(result[0]) + assert result.dtype == orig.dtype - self.assertEqual(o.nunique(), 8) - self.assertEqual(o.nunique(dropna=False), 9) + assert o.nunique() == 8 + assert o.nunique(dropna=False) == 9 def test_value_counts_inferred(self): klasses = [Index, Series] @@ -609,7 +561,7 @@ def test_value_counts_inferred(self): exp = np.unique(np.array(s_values, dtype=np.object_)) tm.assert_numpy_array_equal(s.unique(), exp) - self.assertEqual(s.nunique(), 4) + assert s.nunique() == 4 # don't sort, have to sort after the fact as not sorting is # platform-dep hist = s.value_counts(sort=False).sort_values() @@ -633,15 +585,14 @@ def test_value_counts_bins(self): s = klass(s_values) # bins - self.assertRaises(TypeError, - lambda bins: s.value_counts(bins=bins), 1) + pytest.raises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) - exp1 = Series({0.998: 4}) + exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) - exp1n = Series({0.998: 1.0}) + exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): @@ -650,20 +601,22 @@ def test_value_counts_bins(self): exp = np.array([1, 2, 3], dtype=np.int64) tm.assert_numpy_array_equal(s1.unique(), exp) - self.assertEqual(s1.nunique(), 3) + assert s1.nunique() == 3 - res4 = s1.value_counts(bins=4) - exp4 = Series({0.998: 2, - 1.5: 1, - 2.0: 0, - 2.5: 1}, index=[0.998, 2.5, 1.5, 2.0]) + # these return the same + res4 = s1.value_counts(bins=4, dropna=True) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) + + res4 = s1.value_counts(bins=4, dropna=False) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) + tm.assert_series_equal(res4, exp4) + res4n = s1.value_counts(bins=4, normalize=True) - exp4n = Series( - {0.998: 0.5, - 1.5: 0.25, - 2.0: 0.0, - 2.5: 0.25}, index=[0.998, 2.5, 1.5, 2.0]) + exp4n = Series([0.5, 0.25, 0.25, 0], + index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly @@ -679,7 +632,7 @@ def test_value_counts_bins(self): else: exp = np.array(['a', 'b', np.nan, 'd'], dtype=object) tm.assert_numpy_array_equal(s.unique(), exp) - self.assertEqual(s.nunique(), 3) + assert s.nunique() == 3 s = klass({}) expected = Series([], dtype=np.int64) @@ -687,13 +640,12 @@ def test_value_counts_bins(self): check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): - self.assert_index_equal(s.unique(), Index([]), - exact=False) + tm.assert_index_equal(s.unique(), Index([]), exact=False) else: - self.assert_numpy_array_equal(s.unique(), np.array([]), - check_dtype=False) + tm.assert_numpy_array_equal(s.unique(), np.array([]), + check_dtype=False) - self.assertEqual(s.nunique(), 0) + assert s.nunique() == 0 def test_value_counts_datetime64(self): klasses = [Index, Series] @@ -726,14 +678,14 @@ def test_value_counts_datetime64(self): else: tm.assert_numpy_array_equal(s.unique(), expected) - self.assertEqual(s.nunique(), 3) + assert s.nunique() == 3 # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() - self.assertEqual(result.index.dtype, 'datetime64[ns]') + assert result.index.dtype == 'datetime64[ns]' tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) @@ -741,7 +693,7 @@ def test_value_counts_datetime64(self): tm.assert_series_equal(result, expected_s) unique = s.unique() - self.assertEqual(unique.dtype, 'datetime64[ns]') + assert unique.dtype == 'datetime64[ns]' # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): @@ -749,10 +701,10 @@ def test_value_counts_datetime64(self): tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) - self.assertTrue(pd.isnull(unique[3])) + assert pd.isna(unique[3]) - self.assertEqual(s.nunique(), 3) - self.assertEqual(s.nunique(dropna=False), 4) + assert s.nunique() == 3 + assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) @@ -786,14 +738,14 @@ def test_factorize(self): exp_uniques = o labels, uniques = o.factorize() - self.assert_numpy_array_equal(labels, exp_arr) + tm.assert_numpy_array_equal(labels, exp_arr) if isinstance(o, Series): - self.assert_index_equal(uniques, Index(orig), - check_names=False) + tm.assert_index_equal(uniques, Index(orig), + check_names=False) else: # factorize explicitly resets name - self.assert_index_equal(uniques, exp_uniques, - check_names=False) + tm.assert_index_equal(uniques, exp_uniques, + check_names=False) def test_factorize_repeated(self): for orig in self.objs: @@ -816,24 +768,24 @@ def test_factorize_repeated(self): dtype=np.intp) labels, uniques = n.factorize(sort=True) - self.assert_numpy_array_equal(labels, exp_arr) + tm.assert_numpy_array_equal(labels, exp_arr) if isinstance(o, Series): - self.assert_index_equal(uniques, Index(orig).sort_values(), - check_names=False) + tm.assert_index_equal(uniques, Index(orig).sort_values(), + check_names=False) else: - self.assert_index_equal(uniques, o, check_names=False) + tm.assert_index_equal(uniques, o, check_names=False) exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4], np.intp) labels, uniques = n.factorize(sort=False) - self.assert_numpy_array_equal(labels, exp_arr) + tm.assert_numpy_array_equal(labels, exp_arr) if isinstance(o, Series): expected = Index(o.iloc[5:10].append(o.iloc[:5])) - self.assert_index_equal(uniques, expected, check_names=False) + tm.assert_index_equal(uniques, expected, check_names=False) else: expected = o[5:10].append(o[:5]) - self.assert_index_equal(uniques, expected, check_names=False) + tm.assert_index_equal(uniques, expected, check_names=False) def test_duplicated_drop_duplicates_index(self): # GH 4060 @@ -851,13 +803,13 @@ def test_duplicated_drop_duplicates_index(self): expected = np.array([False] * len(original), dtype=bool) duplicated = original.duplicated() tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool result = original.drop_duplicates() tm.assert_index_equal(result, original) - self.assertFalse(result is original) + assert result is not original # has_duplicates - self.assertFalse(original.has_duplicates) + assert not original.has_duplicates # create repeated values, 3rd and 5th values are duplicated idx = original[list(range(len(original))) + [5, 3]] @@ -865,7 +817,7 @@ def test_duplicated_drop_duplicates_index(self): dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool tm.assert_index_equal(idx.drop_duplicates(), original) base = [False] * len(idx) @@ -875,19 +827,10 @@ def test_duplicated_drop_duplicates_index(self): duplicated = idx.duplicated(keep='last') tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool result = idx.drop_duplicates(keep='last') tm.assert_index_equal(result, idx[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - duplicated = idx.duplicated(take_last=True) - tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) - with tm.assert_produces_warning(FutureWarning): - result = idx.drop_duplicates(take_last=True) - tm.assert_index_equal(result, idx[~expected]) - base = [False] * len(original) + [True, True] base[3] = True base[5] = True @@ -895,11 +838,11 @@ def test_duplicated_drop_duplicates_index(self): duplicated = idx.duplicated(keep=False) tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( TypeError, r"drop_duplicates\(\) got an unexpected " "keyword argument"): idx.drop_duplicates(inplace=True) @@ -910,7 +853,7 @@ def test_duplicated_drop_duplicates_index(self): tm.assert_series_equal(original.duplicated(), expected) result = original.drop_duplicates() tm.assert_series_equal(result, original) - self.assertFalse(result is original) + assert result is not original idx = original.index[list(range(len(original))) + [5, 3]] values = original._values[list(range(len(original))) + [5, 3]] @@ -930,13 +873,6 @@ def test_duplicated_drop_duplicates_index(self): tm.assert_series_equal(s.drop_duplicates(keep='last'), s[~np.array(base)]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal( - s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(s.drop_duplicates(take_last=True), - s[~np.array(base)]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True @@ -977,11 +913,11 @@ def test_fillna(self): # values will not be changed result = o.fillna(o.astype(object).values[0]) if isinstance(o, Index): - self.assert_index_equal(o, result) + tm.assert_index_equal(o, result) else: - self.assert_series_equal(o, result) + tm.assert_series_equal(o, result) # check shallow_copied - self.assertFalse(o is result) + assert o is not result for null_obj in [np.nan, None]: for orig in self.objs: @@ -1007,16 +943,17 @@ def test_fillna(self): o = klass(values) # check values has the same dtype as the original - self.assertEqual(o.dtype, orig.dtype) + assert o.dtype == orig.dtype result = o.fillna(fill_value) if isinstance(o, Index): - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) else: - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # check shallow_copied - self.assertFalse(o is result) + assert o is not result + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(self): for o in self.objs: res = o.memory_usage() @@ -1025,36 +962,34 @@ def test_memory_usage(self): if (is_object_dtype(o) or (isinstance(o, Series) and is_object_dtype(o.index))): # if there are objects, only deep will pick them up - self.assertTrue(res_deep > res) + assert res_deep > res else: - self.assertEqual(res, res_deep) + assert res == res_deep if isinstance(o, Series): - self.assertEqual( - (o.memory_usage(index=False) + - o.index.memory_usage()), - o.memory_usage(index=True) - ) + assert ((o.memory_usage(index=False) + + o.index.memory_usage()) == + o.memory_usage(index=True)) # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = res_deep - sys.getsizeof(o) - self.assertTrue(abs(diff) < 100) + assert abs(diff) < 100 def test_searchsorted(self): # See gh-12238 for o in self.objs: index = np.searchsorted(o, max(o)) - self.assertTrue(0 <= index <= len(o)) + assert 0 <= index <= len(o) index = np.searchsorted(o, max(o), sorter=range(len(o))) - self.assertTrue(0 <= index <= len(o)) + assert 0 <= index <= len(o) def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): self.int_series.drop_duplicates(inplace=value) @@ -1070,10 +1005,10 @@ def test_transpose(self): def test_transpose_non_default_axes(self): for obj in self.objs: - tm.assertRaisesRegexp(ValueError, self.errmsg, - obj.transpose, 1) - tm.assertRaisesRegexp(ValueError, self.errmsg, - obj.transpose, axes=1) + tm.assert_raises_regex(ValueError, self.errmsg, + obj.transpose, 1) + tm.assert_raises_regex(ValueError, self.errmsg, + obj.transpose, axes=1) def test_numpy_transpose(self): for obj in self.objs: @@ -1082,26 +1017,221 @@ def test_numpy_transpose(self): else: tm.assert_series_equal(np.transpose(obj), obj) - tm.assertRaisesRegexp(ValueError, self.errmsg, - np.transpose, obj, axes=1) + tm.assert_raises_regex(ValueError, self.errmsg, + np.transpose, obj, axes=1) -class TestNoNewAttributesMixin(tm.TestCase): +class TestNoNewAttributesMixin(object): def test_mixin(self): class T(NoNewAttributesMixin): pass t = T() - self.assertFalse(hasattr(t, "__frozen")) + assert not hasattr(t, "__frozen") + t.a = "test" - self.assertEqual(t.a, "test") + assert t.a == "test" + t._freeze() - # self.assertTrue("__frozen" not in dir(t)) - self.assertIs(getattr(t, "__frozen"), True) + assert "__frozen" in dir(t) + assert getattr(t, "__frozen") def f(): t.b = "test" - self.assertRaises(AttributeError, f) - self.assertFalse(hasattr(t, "b")) + pytest.raises(AttributeError, f) + assert not hasattr(t, "b") + + +class TestToIterable(object): + # test that we convert an iterable to python types + + dtypes = [ + ('int8', (int, long)), + ('int16', (int, long)), + ('int32', (int, long)), + ('int64', (int, long)), + ('uint8', (int, long)), + ('uint16', (int, long)), + ('uint32', (int, long)), + ('uint64', (int, long)), + ('float16', float), + ('float32', float), + ('float64', float), + ('datetime64[ns]', Timestamp), + ('datetime64[ns, US/Eastern]', Timestamp), + ('timedelta64[ns]', Timedelta)] + + @pytest.mark.parametrize( + 'dtype, rdtype', dtypes) + @pytest.mark.parametrize( + 'method', + [ + lambda x: x.tolist(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], ids=['tolist', 'list', 'iter']) + @pytest.mark.parametrize('typ', [Series, Index]) + def test_iterable(self, typ, method, dtype, rdtype): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + s = typ([1], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + 'dtype, rdtype, obj', + [ + ('object', object, 'a'), + ('object', (int, long), 1), + ('category', object, 'a'), + ('category', (int, long), 1)]) + @pytest.mark.parametrize( + 'method', + [ + lambda x: x.tolist(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], ids=['tolist', 'list', 'iter']) + @pytest.mark.parametrize('typ', [Series, Index]) + def test_iterable_object_and_category(self, typ, method, + dtype, rdtype, obj): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + s = typ([obj], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + 'dtype, rdtype', dtypes) + def test_iterable_items(self, dtype, rdtype): + # gh-13258 + # test items / iteritems yields the correct boxed scalars + # this only applies to series + s = Series([1], dtype=dtype) + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + _, result = list(s.iteritems())[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + 'dtype, rdtype', + dtypes + [ + ('object', (int, long)), + ('category', (int, long))]) + @pytest.mark.parametrize('typ', [Series, Index]) + def test_iterable_map(self, typ, dtype, rdtype): + # gh-13236 + # coerce iteration to underlying python / pandas types + s = typ([1], dtype=dtype) + result = s.map(type)[0] + if not isinstance(rdtype, tuple): + rdtype = tuple([rdtype]) + assert result in rdtype + + @pytest.mark.parametrize( + 'method', + [ + lambda x: x.tolist(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], ids=['tolist', 'list', 'iter']) + def test_categorial_datetimelike(self, method): + i = CategoricalIndex([Timestamp('1999-12-31'), + Timestamp('2000-12-31')]) + + result = method(i)[0] + assert isinstance(result, Timestamp) + + def test_iter_box(self): + vals = [Timestamp('2011-01-01'), Timestamp('2011-01-02')] + s = Series(vals) + assert s.dtype == 'datetime64[ns]' + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz is None + assert res == exp + + vals = [Timestamp('2011-01-01', tz='US/Eastern'), + Timestamp('2011-01-02', tz='US/Eastern')] + s = Series(vals) + + assert s.dtype == 'datetime64[ns, US/Eastern]' + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz == exp.tz + assert res == exp + + # timedelta + vals = [Timedelta('1 days'), Timedelta('2 days')] + s = Series(vals) + assert s.dtype == 'timedelta64[ns]' + for res, exp in zip(s, vals): + assert isinstance(res, Timedelta) + assert res == exp + + # period (object dtype, not boxed) + vals = [pd.Period('2011-01-01', freq='M'), + pd.Period('2011-01-02', freq='M')] + s = Series(vals) + assert s.dtype == 'object' + for res, exp in zip(s, vals): + assert isinstance(res, pd.Period) + assert res.freq == 'M' + assert res == exp + + +@pytest.mark.parametrize('array, expected_type, dtype', [ + (np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'), + (np.array(['a', 'b']), np.ndarray, 'object'), + (pd.Categorical(['a', 'b']), pd.Categorical, 'category'), + (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'), + (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex, + 'datetime64[ns, US/Central]'), + (pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'), + (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'), + (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'), +]) +def test_values_consistent(array, expected_type, dtype): + l_values = pd.Series(array)._values + r_values = pd.Index(array)._values + assert type(l_values) is expected_type + assert type(l_values) is type(r_values) + + if isinstance(l_values, np.ndarray): + tm.assert_numpy_array_equal(l_values, r_values) + elif isinstance(l_values, pd.Index): + tm.assert_index_equal(l_values, r_values) + elif pd.api.types.is_categorical(l_values): + tm.assert_categorical_equal(l_values, r_values) + else: + raise TypeError("Unexpected type {}".format(type(l_values))) + + assert l_values.dtype == dtype + assert r_values.dtype == dtype + + +@pytest.mark.parametrize('array, expected', [ + (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)), + (np.array(['0', '1']), np.array(['0', '1'], dtype=object)), + (pd.Categorical(['a', 'a']), np.array([0, 0], dtype='int8')), + (pd.DatetimeIndex(['2017-01-01T00:00:00']), + np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')), + (pd.DatetimeIndex(['2017-01-01T00:00:00'], tz="US/Eastern"), + np.array(['2017-01-01T05:00:00'], dtype='M8[ns]')), + (pd.TimedeltaIndex([10**10]), np.array([10**10], dtype='m8[ns]')), + pytest.param( + pd.PeriodIndex(['2017', '2018'], freq='D'), + np.array([17167, 17532]), + marks=pytest.mark.xfail(reason="PeriodArray Not implemented") + ), +]) +def test_ndarray_values(array, expected): + l_values = pd.Series(array)._ndarray_values + r_values = pd.Index(array)._ndarray_values + tm.assert_numpy_array_equal(l_values, r_values) + tm.assert_numpy_array_equal(l_values, expected) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py deleted file mode 100644 index cc99cf0f830aa..0000000000000 --- a/pandas/tests/test_categorical.py +++ /dev/null @@ -1,4578 +0,0 @@ -# -*- coding: utf-8 -*- -# pylint: disable=E1101,E1103,W0232 - -import sys -from datetime import datetime -from distutils.version import LooseVersion - -import numpy as np - -from pandas.types.dtypes import CategoricalDtype -from pandas.types.common import (is_categorical_dtype, - is_object_dtype, - is_float_dtype, - is_integer_dtype) - -import pandas as pd -import pandas.compat as compat -import pandas.util.testing as tm -from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex, - Timestamp, CategoricalIndex, isnull) -from pandas.compat import range, lrange, u, PY3 -from pandas.core.config import option_context - -# GH 12066 -# flake8: noqa - - -class TestCategorical(tm.TestCase): - - def setUp(self): - self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], - ordered=True) - - def test_getitem(self): - self.assertEqual(self.factor[0], 'a') - self.assertEqual(self.factor[-1], 'c') - - subf = self.factor[[0, 1, 2]] - tm.assert_numpy_array_equal(subf._codes, - np.array([0, 1, 1], dtype=np.int8)) - - subf = self.factor[np.asarray(self.factor) == 'c'] - tm.assert_numpy_array_equal(subf._codes, - np.array([2, 2, 2], dtype=np.int8)) - - def test_getitem_listlike(self): - - # GH 9469 - # properly coerce the input indexers - np.random.seed(1) - c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)) - result = c.codes[np.array([100000]).astype(np.int64)] - expected = c[np.array([100000]).astype(np.int64)].codes - self.assert_numpy_array_equal(result, expected) - - def test_getitem_category_type(self): - # GH 14580 - # test iloc() on Series with Categorical data - - s = pd.Series([1, 2, 3]).astype('category') - - # get slice - result = s.iloc[0:2] - expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3]) - tm.assert_series_equal(result, expected) - - # get list of indexes - result = s.iloc[[0, 1]] - expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3]) - tm.assert_series_equal(result, expected) - - # get boolean array - result = s.iloc[[True, False, False]] - expected = pd.Series([1]).astype('category', categories=[1, 2, 3]) - tm.assert_series_equal(result, expected) - - def test_setitem(self): - - # int/positional - c = self.factor.copy() - c[0] = 'b' - self.assertEqual(c[0], 'b') - c[-1] = 'a' - self.assertEqual(c[-1], 'a') - - # boolean - c = self.factor.copy() - indexer = np.zeros(len(c), dtype='bool') - indexer[0] = True - indexer[-1] = True - c[indexer] = 'c' - expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], - ordered=True) - - self.assert_categorical_equal(c, expected) - - def test_setitem_listlike(self): - - # GH 9469 - # properly coerce the input indexers - np.random.seed(1) - c = Categorical(np.random.randint(0, 5, size=150000).astype( - np.int8)).add_categories([-1000]) - indexer = np.array([100000]).astype(np.int64) - c[indexer] = -1000 - - # we are asserting the code result here - # which maps to the -1000 category - result = c.codes[np.array([100000]).astype(np.int64)] - self.assertEqual(result, np.array([5], dtype='int8')) - - def test_constructor_unsortable(self): - - # it works! - arr = np.array([1, 2, 3, datetime.now()], dtype='O') - factor = Categorical(arr, ordered=False) - self.assertFalse(factor.ordered) - - # this however will raise as cannot be sorted - self.assertRaises( - TypeError, lambda: Categorical(arr, ordered=True)) - - def test_is_equal_dtype(self): - - # test dtype comparisons between cats - - c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False) - c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False) - c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True) - self.assertTrue(c1.is_dtype_equal(c1)) - self.assertTrue(c2.is_dtype_equal(c2)) - self.assertTrue(c3.is_dtype_equal(c3)) - self.assertFalse(c1.is_dtype_equal(c2)) - self.assertFalse(c1.is_dtype_equal(c3)) - self.assertFalse(c1.is_dtype_equal(Index(list('aabca')))) - self.assertFalse(c1.is_dtype_equal(c1.astype(object))) - self.assertTrue(c1.is_dtype_equal(CategoricalIndex(c1))) - self.assertFalse(c1.is_dtype_equal( - CategoricalIndex(c1, categories=list('cab')))) - self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))) - - def test_constructor(self): - - exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) - c1 = Categorical(exp_arr) - self.assert_numpy_array_equal(c1.__array__(), exp_arr) - c2 = Categorical(exp_arr, categories=["a", "b", "c"]) - self.assert_numpy_array_equal(c2.__array__(), exp_arr) - c2 = Categorical(exp_arr, categories=["c", "b", "a"]) - self.assert_numpy_array_equal(c2.__array__(), exp_arr) - - # categories must be unique - def f(): - Categorical([1, 2], [1, 2, 2]) - - self.assertRaises(ValueError, f) - - def f(): - Categorical(["a", "b"], ["a", "b", "b"]) - - self.assertRaises(ValueError, f) - - def f(): - with tm.assert_produces_warning(FutureWarning): - Categorical([1, 2], [1, 2, np.nan, np.nan]) - - self.assertRaises(ValueError, f) - - # The default should be unordered - c1 = Categorical(["a", "b", "c", "a"]) - self.assertFalse(c1.ordered) - - # Categorical as input - c1 = Categorical(["a", "b", "c", "a"]) - c2 = Categorical(c1) - tm.assert_categorical_equal(c1, c2) - - c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - c2 = Categorical(c1) - tm.assert_categorical_equal(c1, c2) - - c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) - c2 = Categorical(c1) - tm.assert_categorical_equal(c1, c2) - - c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) - c2 = Categorical(c1, categories=["a", "b", "c"]) - self.assert_numpy_array_equal(c1.__array__(), c2.__array__()) - self.assert_index_equal(c2.categories, Index(["a", "b", "c"])) - - # Series of dtype category - c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - c2 = Categorical(Series(c1)) - tm.assert_categorical_equal(c1, c2) - - c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) - c2 = Categorical(Series(c1)) - tm.assert_categorical_equal(c1, c2) - - # Series - c1 = Categorical(["a", "b", "c", "a"]) - c2 = Categorical(Series(["a", "b", "c", "a"])) - tm.assert_categorical_equal(c1, c2) - - c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - c2 = Categorical(Series(["a", "b", "c", "a"]), - categories=["a", "b", "c", "d"]) - tm.assert_categorical_equal(c1, c2) - - # This should result in integer categories, not float! - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - self.assertTrue(is_integer_dtype(cat.categories)) - - # https://github.com/pandas-dev/pandas/issues/3678 - cat = pd.Categorical([np.nan, 1, 2, 3]) - self.assertTrue(is_integer_dtype(cat.categories)) - - # this should result in floats - cat = pd.Categorical([np.nan, 1, 2., 3]) - self.assertTrue(is_float_dtype(cat.categories)) - - cat = pd.Categorical([np.nan, 1., 2., 3.]) - self.assertTrue(is_float_dtype(cat.categories)) - - # Deprecating NaNs in categoires (GH #10748) - # preserve int as far as possible by converting to object if NaN is in - # categories - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1, 2, 3], - categories=[np.nan, 1, 2, 3]) - self.assertTrue(is_object_dtype(cat.categories)) - - # This doesn't work -> this would probably need some kind of "remember - # the original type" feature to try to cast the array interface result - # to... - - # vals = np.asarray(cat[cat.notnull()]) - # self.assertTrue(is_integer_dtype(vals)) - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, "a", "b", "c"], - categories=[np.nan, "a", "b", "c"]) - self.assertTrue(is_object_dtype(cat.categories)) - # but don't do it for floats - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1., 2., 3.], - categories=[np.nan, 1., 2., 3.]) - self.assertTrue(is_float_dtype(cat.categories)) - - # corner cases - cat = pd.Categorical([1]) - self.assertTrue(len(cat.categories) == 1) - self.assertTrue(cat.categories[0] == 1) - self.assertTrue(len(cat.codes) == 1) - self.assertTrue(cat.codes[0] == 0) - - cat = pd.Categorical(["a"]) - self.assertTrue(len(cat.categories) == 1) - self.assertTrue(cat.categories[0] == "a") - self.assertTrue(len(cat.codes) == 1) - self.assertTrue(cat.codes[0] == 0) - - # Scalars should be converted to lists - cat = pd.Categorical(1) - self.assertTrue(len(cat.categories) == 1) - self.assertTrue(cat.categories[0] == 1) - self.assertTrue(len(cat.codes) == 1) - self.assertTrue(cat.codes[0] == 0) - - cat = pd.Categorical([1], categories=1) - self.assertTrue(len(cat.categories) == 1) - self.assertTrue(cat.categories[0] == 1) - self.assertTrue(len(cat.codes) == 1) - self.assertTrue(cat.codes[0] == 0) - - # Catch old style constructor useage: two arrays, codes + categories - # We can only catch two cases: - # - when the first is an integer dtype and the second is not - # - when the resulting codes are all -1/NaN - with tm.assert_produces_warning(RuntimeWarning): - c_old = Categorical([0, 1, 2, 0, 1, 2], - categories=["a", "b", "c"]) # noqa - - with tm.assert_produces_warning(RuntimeWarning): - c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa - categories=[3, 4, 5]) - - # the next one are from the old docs, but unfortunately these don't - # trigger :-( - with tm.assert_produces_warning(None): - c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa - cat = Categorical([1, 2], categories=[1, 2, 3]) - - # this is a legitimate constructor - with tm.assert_produces_warning(None): - c = Categorical(np.array([], dtype='int64'), # noqa - categories=[3, 2, 1], ordered=True) - - def test_constructor_with_index(self): - ci = CategoricalIndex(list('aabbca'), categories=list('cab')) - tm.assert_categorical_equal(ci.values, Categorical(ci)) - - ci = CategoricalIndex(list('aabbca'), categories=list('cab')) - tm.assert_categorical_equal(ci.values, - Categorical(ci.astype(object), - categories=ci.categories)) - - def test_constructor_with_generator(self): - # This was raising an Error in isnull(single_val).any() because isnull - # returned a scalar for a generator - xrange = range - - exp = Categorical([0, 1, 2]) - cat = Categorical((x for x in [0, 1, 2])) - tm.assert_categorical_equal(cat, exp) - cat = Categorical(xrange(3)) - tm.assert_categorical_equal(cat, exp) - - # This uses xrange internally - from pandas.core.index import MultiIndex - MultiIndex.from_product([range(5), ['a', 'b', 'c']]) - - # check that categories accept generators and sequences - cat = pd.Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) - tm.assert_categorical_equal(cat, exp) - cat = pd.Categorical([0, 1, 2], categories=xrange(3)) - tm.assert_categorical_equal(cat, exp) - - def test_constructor_with_datetimelike(self): - - # 12077 - # constructor wwth a datetimelike and NaT - - for dtl in [pd.date_range('1995-01-01 00:00:00', - periods=5, freq='s'), - pd.date_range('1995-01-01 00:00:00', - periods=5, freq='s', tz='US/Eastern'), - pd.timedelta_range('1 day', periods=5, freq='s')]: - - s = Series(dtl) - c = Categorical(s) - expected = type(dtl)(s) - expected.freq = None - tm.assert_index_equal(c.categories, expected) - self.assert_numpy_array_equal(c.codes, np.arange(5, dtype='int8')) - - # with NaT - s2 = s.copy() - s2.iloc[-1] = pd.NaT - c = Categorical(s2) - expected = type(dtl)(s2.dropna()) - expected.freq = None - tm.assert_index_equal(c.categories, expected) - - exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) - self.assert_numpy_array_equal(c.codes, exp) - - result = repr(c) - self.assertTrue('NaT' in result) - - def test_constructor_from_index_series_datetimetz(self): - idx = pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern') - result = pd.Categorical(idx) - tm.assert_index_equal(result.categories, idx) - - result = pd.Categorical(pd.Series(idx)) - tm.assert_index_equal(result.categories, idx) - - def test_constructor_from_index_series_timedelta(self): - idx = pd.timedelta_range('1 days', freq='D', periods=3) - result = pd.Categorical(idx) - tm.assert_index_equal(result.categories, idx) - - result = pd.Categorical(pd.Series(idx)) - tm.assert_index_equal(result.categories, idx) - - def test_constructor_from_index_series_period(self): - idx = pd.period_range('2015-01-01', freq='D', periods=3) - result = pd.Categorical(idx) - tm.assert_index_equal(result.categories, idx) - - result = pd.Categorical(pd.Series(idx)) - tm.assert_index_equal(result.categories, idx) - - def test_constructor_invariant(self): - # GH 14190 - vals = [ - np.array([1., 1.2, 1.8, np.nan]), - np.array([1, 2, 3], dtype='int64'), - ['a', 'b', 'c', np.nan], - [pd.Period('2014-01'), pd.Period('2014-02'), pd.NaT], - [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.NaT], - [pd.Timestamp('2014-01-01', tz='US/Eastern'), - pd.Timestamp('2014-01-02', tz='US/Eastern'), pd.NaT], - ] - for val in vals: - c = Categorical(val) - c2 = Categorical(c) - tm.assert_categorical_equal(c, c2) - - def test_from_codes(self): - - # too few categories - def f(): - Categorical.from_codes([1, 2], [1, 2]) - - self.assertRaises(ValueError, f) - - # no int codes - def f(): - Categorical.from_codes(["a"], [1, 2]) - - self.assertRaises(ValueError, f) - - # no unique categories - def f(): - Categorical.from_codes([0, 1, 2], ["a", "a", "b"]) - - self.assertRaises(ValueError, f) - - # too negative - def f(): - Categorical.from_codes([-2, 1, 2], ["a", "b", "c"]) - - self.assertRaises(ValueError, f) - - exp = Categorical(["a", "b", "c"], ordered=False) - res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"]) - tm.assert_categorical_equal(exp, res) - - # Not available in earlier numpy versions - if hasattr(np.random, "choice"): - codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) - pd.Categorical.from_codes(codes, categories=["train", "test"]) - - def test_validate_ordered(self): - # see gh-14058 - exp_msg = "'ordered' must either be 'True' or 'False'" - exp_err = TypeError - - # This should be a boolean. - ordered = np.array([0, 1, 2]) - - with tm.assertRaisesRegexp(exp_err, exp_msg): - Categorical([1, 2, 3], ordered=ordered) - - with tm.assertRaisesRegexp(exp_err, exp_msg): - Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'], - ordered=ordered) - - def test_comparisons(self): - - result = self.factor[self.factor == 'a'] - expected = self.factor[np.asarray(self.factor) == 'a'] - tm.assert_categorical_equal(result, expected) - - result = self.factor[self.factor != 'a'] - expected = self.factor[np.asarray(self.factor) != 'a'] - tm.assert_categorical_equal(result, expected) - - result = self.factor[self.factor < 'c'] - expected = self.factor[np.asarray(self.factor) < 'c'] - tm.assert_categorical_equal(result, expected) - - result = self.factor[self.factor > 'a'] - expected = self.factor[np.asarray(self.factor) > 'a'] - tm.assert_categorical_equal(result, expected) - - result = self.factor[self.factor >= 'b'] - expected = self.factor[np.asarray(self.factor) >= 'b'] - tm.assert_categorical_equal(result, expected) - - result = self.factor[self.factor <= 'b'] - expected = self.factor[np.asarray(self.factor) <= 'b'] - tm.assert_categorical_equal(result, expected) - - n = len(self.factor) - - other = self.factor[np.random.permutation(n)] - result = self.factor == other - expected = np.asarray(self.factor) == np.asarray(other) - self.assert_numpy_array_equal(result, expected) - - result = self.factor == 'd' - expected = np.repeat(False, len(self.factor)) - self.assert_numpy_array_equal(result, expected) - - # comparisons with categoricals - cat_rev = pd.Categorical(["a", "b", "c"], categories=["c", "b", "a"], - ordered=True) - cat_rev_base = pd.Categorical( - ["b", "b", "b"], categories=["c", "b", "a"], ordered=True) - cat = pd.Categorical(["a", "b", "c"], ordered=True) - cat_base = pd.Categorical(["b", "b", "b"], categories=cat.categories, - ordered=True) - - # comparisons need to take categories ordering into account - res_rev = cat_rev > cat_rev_base - exp_rev = np.array([True, False, False]) - self.assert_numpy_array_equal(res_rev, exp_rev) - - res_rev = cat_rev < cat_rev_base - exp_rev = np.array([False, False, True]) - self.assert_numpy_array_equal(res_rev, exp_rev) - - res = cat > cat_base - exp = np.array([False, False, True]) - self.assert_numpy_array_equal(res, exp) - - # Only categories with same categories can be compared - def f(): - cat > cat_rev - - self.assertRaises(TypeError, f) - - cat_rev_base2 = pd.Categorical( - ["b", "b", "b"], categories=["c", "b", "a", "d"]) - - def f(): - cat_rev > cat_rev_base2 - - self.assertRaises(TypeError, f) - - # Only categories with same ordering information can be compared - cat_unorderd = cat.set_ordered(False) - self.assertFalse((cat > cat).any()) - - def f(): - cat > cat_unorderd - - self.assertRaises(TypeError, f) - - # comparison (in both directions) with Series will raise - s = Series(["b", "b", "b"]) - self.assertRaises(TypeError, lambda: cat > s) - self.assertRaises(TypeError, lambda: cat_rev > s) - self.assertRaises(TypeError, lambda: s < cat) - self.assertRaises(TypeError, lambda: s < cat_rev) - - # comparison with numpy.array will raise in both direction, but only on - # newer numpy versions - a = np.array(["b", "b", "b"]) - self.assertRaises(TypeError, lambda: cat > a) - self.assertRaises(TypeError, lambda: cat_rev > a) - - # The following work via '__array_priority__ = 1000' - # works only on numpy >= 1.7.1 - if LooseVersion(np.__version__) > "1.7.1": - self.assertRaises(TypeError, lambda: a < cat) - self.assertRaises(TypeError, lambda: a < cat_rev) - - # Make sure that unequal comparison take the categories order in - # account - cat_rev = pd.Categorical( - list("abc"), categories=list("cba"), ordered=True) - exp = np.array([True, False, False]) - res = cat_rev > "b" - self.assert_numpy_array_equal(res, exp) - - def test_argsort(self): - c = Categorical([5, 3, 1, 4, 2], ordered=True) - - expected = np.array([2, 4, 1, 3, 0]) - tm.assert_numpy_array_equal(c.argsort(ascending=True), expected, - check_dtype=False) - - expected = expected[::-1] - tm.assert_numpy_array_equal(c.argsort(ascending=False), expected, - check_dtype=False) - - def test_numpy_argsort(self): - c = Categorical([5, 3, 1, 4, 2], ordered=True) - - expected = np.array([2, 4, 1, 3, 0]) - tm.assert_numpy_array_equal(np.argsort(c), expected, - check_dtype=False) - - msg = "the 'kind' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - c, kind='mergesort') - - msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - c, axis=0) - - msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.argsort, - c, order='C') - - def test_na_flags_int_categories(self): - # #1457 - - categories = lrange(10) - labels = np.random.randint(0, 10, 20) - labels[::5] = -1 - - cat = Categorical(labels, categories, fastpath=True) - repr(cat) - - self.assert_numpy_array_equal(isnull(cat), labels == -1) - - def test_categories_none(self): - factor = Categorical(['a', 'b', 'b', 'a', - 'a', 'c', 'c', 'c'], ordered=True) - tm.assert_categorical_equal(factor, self.factor) - - def test_describe(self): - # string type - desc = self.factor.describe() - self.assertTrue(self.factor.ordered) - exp_index = pd.CategoricalIndex(['a', 'b', 'c'], name='categories', - ordered=self.factor.ordered) - expected = DataFrame({'counts': [3, 2, 3], - 'freqs': [3 / 8., 2 / 8., 3 / 8.]}, - index=exp_index) - tm.assert_frame_equal(desc, expected) - - # check unused categories - cat = self.factor.copy() - cat.set_categories(["a", "b", "c", "d"], inplace=True) - desc = cat.describe() - - exp_index = pd.CategoricalIndex(['a', 'b', 'c', 'd'], - ordered=self.factor.ordered, - name='categories') - expected = DataFrame({'counts': [3, 2, 3, 0], - 'freqs': [3 / 8., 2 / 8., 3 / 8., 0]}, - index=exp_index) - tm.assert_frame_equal(desc, expected) - - # check an integer one - cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) - desc = cat.describe() - exp_index = pd.CategoricalIndex([1, 2, 3], ordered=cat.ordered, - name='categories') - expected = DataFrame({'counts': [5, 3, 3], - 'freqs': [5 / 11., 3 / 11., 3 / 11.]}, - index=exp_index) - tm.assert_frame_equal(desc, expected) - - # https://github.com/pandas-dev/pandas/issues/3678 - # describe should work with NaN - cat = pd.Categorical([np.nan, 1, 2, 2]) - desc = cat.describe() - expected = DataFrame({'counts': [1, 2, 1], - 'freqs': [1 / 4., 2 / 4., 1 / 4.]}, - index=pd.CategoricalIndex([1, 2, np.nan], - categories=[1, 2], - name='categories')) - tm.assert_frame_equal(desc, expected) - - # NA as a category - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "c", "c", np.nan], - categories=["b", "a", "c", np.nan]) - result = cat.describe() - - expected = DataFrame([[0, 0], [1, 0.25], [2, 0.5], [1, 0.25]], - columns=['counts', 'freqs'], - index=pd.CategoricalIndex(['b', 'a', 'c', np.nan], - name='categories')) - tm.assert_frame_equal(result, expected, check_categorical=False) - - # NA as an unused category - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "c", "c"], - categories=["b", "a", "c", np.nan]) - result = cat.describe() - - exp_idx = pd.CategoricalIndex( - ['b', 'a', 'c', np.nan], name='categories') - expected = DataFrame([[0, 0], [1, 1 / 3.], [2, 2 / 3.], [0, 0]], - columns=['counts', 'freqs'], index=exp_idx) - tm.assert_frame_equal(result, expected, check_categorical=False) - - def test_print(self): - expected = ["[a, b, b, a, a, c, c, c]", - "Categories (3, object): [a < b < c]"] - expected = "\n".join(expected) - actual = repr(self.factor) - self.assertEqual(actual, expected) - - def test_big_print(self): - factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'], - name='cat', fastpath=True) - expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600", - "Categories (3, object): [a, b, c]"] - expected = "\n".join(expected) - - actual = repr(factor) - - self.assertEqual(actual, expected) - - def test_empty_print(self): - factor = Categorical([], ["a", "b", "c"]) - expected = ("[], Categories (3, object): [a, b, c]") - # hack because array_repr changed in numpy > 1.6.x - actual = repr(factor) - self.assertEqual(actual, expected) - - self.assertEqual(expected, actual) - factor = Categorical([], ["a", "b", "c"], ordered=True) - expected = ("[], Categories (3, object): [a < b < c]") - actual = repr(factor) - self.assertEqual(expected, actual) - - factor = Categorical([], []) - expected = ("[], Categories (0, object): []") - self.assertEqual(expected, repr(factor)) - - def test_print_none_width(self): - # GH10087 - a = pd.Series(pd.Categorical([1, 2, 3, 4])) - exp = u("0 1\n1 2\n2 3\n3 4\n" + - "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") - - with option_context("display.width", None): - self.assertEqual(exp, repr(a)) - - def test_unicode_print(self): - if PY3: - _rep = repr - else: - _rep = unicode # noqa - - c = pd.Categorical(['aaaaa', 'bb', 'cccc'] * 20) - expected = u"""\ -[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] -Length: 60 -Categories (3, object): [aaaaa, bb, cccc]""" - - self.assertEqual(_rep(c), expected) - - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] - * 20) - expected = u"""\ -[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] -Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa - - self.assertEqual(_rep(c), expected) - - # unicode option should not affect to Categorical, as it doesn't care - # the repr width - with option_context('display.unicode.east_asian_width', True): - - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] - * 20) - expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] -Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa - - self.assertEqual(_rep(c), expected) - - def test_periodindex(self): - idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', - '2014-03', '2014-03'], freq='M') - - cat1 = Categorical(idx1) - str(cat1) - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8) - exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') - self.assert_numpy_array_equal(cat1._codes, exp_arr) - self.assert_index_equal(cat1.categories, exp_idx) - - idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', - '2014-03', '2014-01'], freq='M') - cat2 = Categorical(idx2, ordered=True) - str(cat2) - exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8) - exp_idx2 = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') - self.assert_numpy_array_equal(cat2._codes, exp_arr) - self.assert_index_equal(cat2.categories, exp_idx2) - - idx3 = PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09', - '2013-08', '2013-07', '2013-05'], freq='M') - cat3 = Categorical(idx3, ordered=True) - exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8) - exp_idx = PeriodIndex(['2013-05', '2013-07', '2013-08', '2013-09', - '2013-10', '2013-11', '2013-12'], freq='M') - self.assert_numpy_array_equal(cat3._codes, exp_arr) - self.assert_index_equal(cat3.categories, exp_idx) - - def test_categories_assigments(self): - s = pd.Categorical(["a", "b", "c", "a"]) - exp = np.array([1, 2, 3, 1], dtype=np.int64) - s.categories = [1, 2, 3] - self.assert_numpy_array_equal(s.__array__(), exp) - self.assert_index_equal(s.categories, Index([1, 2, 3])) - - # lengthen - def f(): - s.categories = [1, 2, 3, 4] - - self.assertRaises(ValueError, f) - - # shorten - def f(): - s.categories = [1, 2] - - self.assertRaises(ValueError, f) - - def test_construction_with_ordered(self): - # GH 9347, 9190 - cat = Categorical([0, 1, 2]) - self.assertFalse(cat.ordered) - cat = Categorical([0, 1, 2], ordered=False) - self.assertFalse(cat.ordered) - cat = Categorical([0, 1, 2], ordered=True) - self.assertTrue(cat.ordered) - - def test_ordered_api(self): - # GH 9347 - cat1 = pd.Categorical(["a", "c", "b"], ordered=False) - self.assert_index_equal(cat1.categories, Index(['a', 'b', 'c'])) - self.assertFalse(cat1.ordered) - - cat2 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'], - ordered=False) - self.assert_index_equal(cat2.categories, Index(['b', 'c', 'a'])) - self.assertFalse(cat2.ordered) - - cat3 = pd.Categorical(["a", "c", "b"], ordered=True) - self.assert_index_equal(cat3.categories, Index(['a', 'b', 'c'])) - self.assertTrue(cat3.ordered) - - cat4 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'], - ordered=True) - self.assert_index_equal(cat4.categories, Index(['b', 'c', 'a'])) - self.assertTrue(cat4.ordered) - - def test_set_ordered(self): - - cat = Categorical(["a", "b", "c", "a"], ordered=True) - cat2 = cat.as_unordered() - self.assertFalse(cat2.ordered) - cat2 = cat.as_ordered() - self.assertTrue(cat2.ordered) - cat2.as_unordered(inplace=True) - self.assertFalse(cat2.ordered) - cat2.as_ordered(inplace=True) - self.assertTrue(cat2.ordered) - - self.assertTrue(cat2.set_ordered(True).ordered) - self.assertFalse(cat2.set_ordered(False).ordered) - cat2.set_ordered(True, inplace=True) - self.assertTrue(cat2.ordered) - cat2.set_ordered(False, inplace=True) - self.assertFalse(cat2.ordered) - - # removed in 0.19.0 - msg = "can\'t set attribute" - with tm.assertRaisesRegexp(AttributeError, msg): - cat.ordered = True - with tm.assertRaisesRegexp(AttributeError, msg): - cat.ordered = False - - def test_set_categories(self): - cat = Categorical(["a", "b", "c", "a"], ordered=True) - exp_categories = Index(["c", "b", "a"]) - exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) - - res = cat.set_categories(["c", "b", "a"], inplace=True) - self.assert_index_equal(cat.categories, exp_categories) - self.assert_numpy_array_equal(cat.__array__(), exp_values) - self.assertIsNone(res) - - res = cat.set_categories(["a", "b", "c"]) - # cat must be the same as before - self.assert_index_equal(cat.categories, exp_categories) - self.assert_numpy_array_equal(cat.__array__(), exp_values) - # only res is changed - exp_categories_back = Index(["a", "b", "c"]) - self.assert_index_equal(res.categories, exp_categories_back) - self.assert_numpy_array_equal(res.__array__(), exp_values) - - # not all "old" included in "new" -> all not included ones are now - # np.nan - cat = Categorical(["a", "b", "c", "a"], ordered=True) - res = cat.set_categories(["a"]) - self.assert_numpy_array_equal(res.codes, - np.array([0, -1, -1, 0], dtype=np.int8)) - - # still not all "old" in "new" - res = cat.set_categories(["a", "b", "d"]) - self.assert_numpy_array_equal(res.codes, - np.array([0, 1, -1, 0], dtype=np.int8)) - self.assert_index_equal(res.categories, Index(["a", "b", "d"])) - - # all "old" included in "new" - cat = cat.set_categories(["a", "b", "c", "d"]) - exp_categories = Index(["a", "b", "c", "d"]) - self.assert_index_equal(cat.categories, exp_categories) - - # internals... - c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, 2, 3, 0], dtype=np.int8)) - self.assert_index_equal(c.categories, Index([1, 2, 3, 4])) - - exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) - self.assert_numpy_array_equal(c.get_values(), exp) - - # all "pointers" to '4' must be changed from 3 to 0,... - c = c.set_categories([4, 3, 2, 1]) - - # positions are changed - self.assert_numpy_array_equal(c._codes, - np.array([3, 2, 1, 0, 3], dtype=np.int8)) - - # categories are now in new order - self.assert_index_equal(c.categories, Index([4, 3, 2, 1])) - - # output is the same - exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) - self.assert_numpy_array_equal(c.get_values(), exp) - self.assertTrue(c.min(), 4) - self.assertTrue(c.max(), 1) - - # set_categories should set the ordering if specified - c2 = c.set_categories([4, 3, 2, 1], ordered=False) - self.assertFalse(c2.ordered) - self.assert_numpy_array_equal(c.get_values(), c2.get_values()) - - # set_categories should pass thru the ordering - c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) - self.assertFalse(c2.ordered) - self.assert_numpy_array_equal(c.get_values(), c2.get_values()) - - def test_rename_categories(self): - cat = pd.Categorical(["a", "b", "c", "a"]) - - # inplace=False: the old one must not be changed - res = cat.rename_categories([1, 2, 3]) - self.assert_numpy_array_equal(res.__array__(), - np.array([1, 2, 3, 1], dtype=np.int64)) - self.assert_index_equal(res.categories, Index([1, 2, 3])) - - exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) - self.assert_numpy_array_equal(cat.__array__(), exp_cat) - - exp_cat = Index(["a", "b", "c"]) - self.assert_index_equal(cat.categories, exp_cat) - res = cat.rename_categories([1, 2, 3], inplace=True) - - # and now inplace - self.assertIsNone(res) - self.assert_numpy_array_equal(cat.__array__(), - np.array([1, 2, 3, 1], dtype=np.int64)) - self.assert_index_equal(cat.categories, Index([1, 2, 3])) - - # lengthen - def f(): - cat.rename_categories([1, 2, 3, 4]) - - self.assertRaises(ValueError, f) - - # shorten - def f(): - cat.rename_categories([1, 2]) - - self.assertRaises(ValueError, f) - - def test_reorder_categories(self): - cat = Categorical(["a", "b", "c", "a"], ordered=True) - old = cat.copy() - new = Categorical(["a", "b", "c", "a"], categories=["c", "b", "a"], - ordered=True) - - # first inplace == False - res = cat.reorder_categories(["c", "b", "a"]) - # cat must be the same as before - self.assert_categorical_equal(cat, old) - # only res is changed - self.assert_categorical_equal(res, new) - - # inplace == True - res = cat.reorder_categories(["c", "b", "a"], inplace=True) - self.assertIsNone(res) - self.assert_categorical_equal(cat, new) - - # not all "old" included in "new" - cat = Categorical(["a", "b", "c", "a"], ordered=True) - - def f(): - cat.reorder_categories(["a"]) - - self.assertRaises(ValueError, f) - - # still not all "old" in "new" - def f(): - cat.reorder_categories(["a", "b", "d"]) - - self.assertRaises(ValueError, f) - - # all "old" included in "new", but too long - def f(): - cat.reorder_categories(["a", "b", "c", "d"]) - - self.assertRaises(ValueError, f) - - def test_add_categories(self): - cat = Categorical(["a", "b", "c", "a"], ordered=True) - old = cat.copy() - new = Categorical(["a", "b", "c", "a"], - categories=["a", "b", "c", "d"], ordered=True) - - # first inplace == False - res = cat.add_categories("d") - self.assert_categorical_equal(cat, old) - self.assert_categorical_equal(res, new) - - res = cat.add_categories(["d"]) - self.assert_categorical_equal(cat, old) - self.assert_categorical_equal(res, new) - - # inplace == True - res = cat.add_categories("d", inplace=True) - self.assert_categorical_equal(cat, new) - self.assertIsNone(res) - - # new is in old categories - def f(): - cat.add_categories(["d"]) - - self.assertRaises(ValueError, f) - - # GH 9927 - cat = Categorical(list("abc"), ordered=True) - expected = Categorical( - list("abc"), categories=list("abcde"), ordered=True) - # test with Series, np.array, index, list - res = cat.add_categories(Series(["d", "e"])) - self.assert_categorical_equal(res, expected) - res = cat.add_categories(np.array(["d", "e"])) - self.assert_categorical_equal(res, expected) - res = cat.add_categories(Index(["d", "e"])) - self.assert_categorical_equal(res, expected) - res = cat.add_categories(["d", "e"]) - self.assert_categorical_equal(res, expected) - - def test_remove_categories(self): - cat = Categorical(["a", "b", "c", "a"], ordered=True) - old = cat.copy() - new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], - ordered=True) - - # first inplace == False - res = cat.remove_categories("c") - self.assert_categorical_equal(cat, old) - self.assert_categorical_equal(res, new) - - res = cat.remove_categories(["c"]) - self.assert_categorical_equal(cat, old) - self.assert_categorical_equal(res, new) - - # inplace == True - res = cat.remove_categories("c", inplace=True) - self.assert_categorical_equal(cat, new) - self.assertIsNone(res) - - # removal is not in categories - def f(): - cat.remove_categories(["c"]) - - self.assertRaises(ValueError, f) - - def test_remove_unused_categories(self): - c = Categorical(["a", "b", "c", "d", "a"], - categories=["a", "b", "c", "d", "e"]) - exp_categories_all = Index(["a", "b", "c", "d", "e"]) - exp_categories_dropped = Index(["a", "b", "c", "d"]) - - self.assert_index_equal(c.categories, exp_categories_all) - - res = c.remove_unused_categories() - self.assert_index_equal(res.categories, exp_categories_dropped) - self.assert_index_equal(c.categories, exp_categories_all) - - res = c.remove_unused_categories(inplace=True) - self.assert_index_equal(c.categories, exp_categories_dropped) - self.assertIsNone(res) - - # with NaN values (GH11599) - c = Categorical(["a", "b", "c", np.nan], - categories=["a", "b", "c", "d", "e"]) - res = c.remove_unused_categories() - self.assert_index_equal(res.categories, - Index(np.array(["a", "b", "c"]))) - exp_codes = np.array([0, 1, 2, -1], dtype=np.int8) - self.assert_numpy_array_equal(res.codes, exp_codes) - self.assert_index_equal(c.categories, exp_categories_all) - - val = ['F', np.nan, 'D', 'B', 'D', 'F', np.nan] - cat = pd.Categorical(values=val, categories=list('ABCDEFG')) - out = cat.remove_unused_categories() - self.assert_index_equal(out.categories, Index(['B', 'D', 'F'])) - exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) - self.assert_numpy_array_equal(out.codes, exp_codes) - self.assertEqual(out.get_values().tolist(), val) - - alpha = list('abcdefghijklmnopqrstuvwxyz') - val = np.random.choice(alpha[::2], 10000).astype('object') - val[np.random.choice(len(val), 100)] = np.nan - - cat = pd.Categorical(values=val, categories=alpha) - out = cat.remove_unused_categories() - self.assertEqual(out.get_values().tolist(), val.tolist()) - - def test_nan_handling(self): - - # Nans are represented as -1 in codes - c = Categorical(["a", "b", np.nan, "a"]) - self.assert_index_equal(c.categories, Index(["a", "b"])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, -1, 0], dtype=np.int8)) - c[1] = np.nan - self.assert_index_equal(c.categories, Index(["a", "b"])) - self.assert_numpy_array_equal(c._codes, - np.array([0, -1, -1, 0], dtype=np.int8)) - - # If categories have nan included, the code should point to that - # instead - with tm.assert_produces_warning(FutureWarning): - c = Categorical(["a", "b", np.nan, "a"], - categories=["a", "b", np.nan]) - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - c[1] = np.nan - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 2, 2, 0], dtype=np.int8)) - - # Changing categories should also make the replaced category np.nan - c = Categorical(["a", "b", "c", "a"]) - with tm.assert_produces_warning(FutureWarning): - c.categories = ["a", "b", np.nan] # noqa - - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - - # Adding nan to categories should make assigned nan point to the - # category! - c = Categorical(["a", "b", np.nan, "a"]) - self.assert_index_equal(c.categories, Index(["a", "b"])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, -1, 0], dtype=np.int8)) - with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a", "b", np.nan], rename=True, inplace=True) - - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, -1, 0], dtype=np.int8)) - c[1] = np.nan - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 2, -1, 0], dtype=np.int8)) - - # Remove null categories (GH 10156) - cases = [([1.0, 2.0, np.nan], [1.0, 2.0]), - (['a', 'b', None], ['a', 'b']), - ([pd.Timestamp('2012-05-01'), pd.NaT], - [pd.Timestamp('2012-05-01')])] - - null_values = [np.nan, None, pd.NaT] - - for with_null, without in cases: - with tm.assert_produces_warning(FutureWarning): - base = Categorical([], with_null) - expected = Categorical([], without) - - for nullval in null_values: - result = base.remove_categories(nullval) - self.assert_categorical_equal(result, expected) - - # Different null values are indistinguishable - for i, j in [(0, 1), (0, 2), (1, 2)]: - nulls = [null_values[i], null_values[j]] - - def f(): - with tm.assert_produces_warning(FutureWarning): - Categorical([], categories=nulls) - - self.assertRaises(ValueError, f) - - def test_isnull(self): - exp = np.array([False, False, True]) - c = Categorical(["a", "b", np.nan]) - res = c.isnull() - self.assert_numpy_array_equal(res, exp) - - with tm.assert_produces_warning(FutureWarning): - c = Categorical(["a", "b", np.nan], categories=["a", "b", np.nan]) - res = c.isnull() - self.assert_numpy_array_equal(res, exp) - - # test both nan in categories and as -1 - exp = np.array([True, False, True]) - c = Categorical(["a", "b", np.nan]) - with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a", "b", np.nan], rename=True, inplace=True) - c[0] = np.nan - res = c.isnull() - self.assert_numpy_array_equal(res, exp) - - def test_codes_immutable(self): - - # Codes should be read only - c = Categorical(["a", "b", "c", "a", np.nan]) - exp = np.array([0, 1, 2, 0, -1], dtype='int8') - self.assert_numpy_array_equal(c.codes, exp) - - # Assignments to codes should raise - def f(): - c.codes = np.array([0, 1, 2, 0, 1], dtype='int8') - - self.assertRaises(ValueError, f) - - # changes in the codes array should raise - # np 1.6.1 raises RuntimeError rather than ValueError - codes = c.codes - - def f(): - codes[4] = 1 - - self.assertRaises(ValueError, f) - - # But even after getting the codes, the original array should still be - # writeable! - c[4] = "a" - exp = np.array([0, 1, 2, 0, 0], dtype='int8') - self.assert_numpy_array_equal(c.codes, exp) - c._codes[4] = 2 - exp = np.array([0, 1, 2, 0, 2], dtype='int8') - self.assert_numpy_array_equal(c.codes, exp) - - def test_min_max(self): - - # unordered cats have no min/max - cat = Categorical(["a", "b", "c", "d"], ordered=False) - self.assertRaises(TypeError, lambda: cat.min()) - self.assertRaises(TypeError, lambda: cat.max()) - cat = Categorical(["a", "b", "c", "d"], ordered=True) - _min = cat.min() - _max = cat.max() - self.assertEqual(_min, "a") - self.assertEqual(_max, "d") - cat = Categorical(["a", "b", "c", "d"], - categories=['d', 'c', 'b', 'a'], ordered=True) - _min = cat.min() - _max = cat.max() - self.assertEqual(_min, "d") - self.assertEqual(_max, "a") - cat = Categorical([np.nan, "b", "c", np.nan], - categories=['d', 'c', 'b', 'a'], ordered=True) - _min = cat.min() - _max = cat.max() - self.assertTrue(np.isnan(_min)) - self.assertEqual(_max, "b") - - _min = cat.min(numeric_only=True) - self.assertEqual(_min, "c") - _max = cat.max(numeric_only=True) - self.assertEqual(_max, "b") - - cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], - ordered=True) - _min = cat.min() - _max = cat.max() - self.assertTrue(np.isnan(_min)) - self.assertEqual(_max, 1) - - _min = cat.min(numeric_only=True) - self.assertEqual(_min, 2) - _max = cat.max(numeric_only=True) - self.assertEqual(_max, 1) - - def test_unique(self): - # categories are reordered based on value when ordered=False - cat = Categorical(["a", "b"]) - exp = Index(["a", "b"]) - res = cat.unique() - self.assert_index_equal(res.categories, exp) - self.assert_categorical_equal(res, cat) - - cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"]) - res = cat.unique() - self.assert_index_equal(res.categories, exp) - tm.assert_categorical_equal(res, Categorical(exp)) - - cat = Categorical(["c", "a", "b", "a", "a"], - categories=["a", "b", "c"]) - exp = Index(["c", "a", "b"]) - res = cat.unique() - self.assert_index_equal(res.categories, exp) - exp_cat = Categorical(exp, categories=['c', 'a', 'b']) - tm.assert_categorical_equal(res, exp_cat) - - # nan must be removed - cat = Categorical(["b", np.nan, "b", np.nan, "a"], - categories=["a", "b", "c"]) - res = cat.unique() - exp = Index(["b", "a"]) - self.assert_index_equal(res.categories, exp) - exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"]) - tm.assert_categorical_equal(res, exp_cat) - - def test_unique_ordered(self): - # keep categories order when ordered=True - cat = Categorical(['b', 'a', 'b'], categories=['a', 'b'], ordered=True) - res = cat.unique() - exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True) - tm.assert_categorical_equal(res, exp_cat) - - cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'], - ordered=True) - res = cat.unique() - exp_cat = Categorical(['c', 'b', 'a'], categories=['a', 'b', 'c'], - ordered=True) - tm.assert_categorical_equal(res, exp_cat) - - cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'], - ordered=True) - res = cat.unique() - exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True) - tm.assert_categorical_equal(res, exp_cat) - - cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'], - ordered=True) - res = cat.unique() - exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'], - ordered=True) - tm.assert_categorical_equal(res, exp_cat) - - def test_unique_index_series(self): - c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1]) - # Categorical.unique sorts categories by appearance order - # if ordered=False - exp = Categorical([3, 1, 2], categories=[3, 1, 2]) - tm.assert_categorical_equal(c.unique(), exp) - - tm.assert_index_equal(Index(c).unique(), Index(exp)) - tm.assert_categorical_equal(pd.Series(c).unique(), exp) - - c = Categorical([1, 1, 2, 2], categories=[3, 2, 1]) - exp = Categorical([1, 2], categories=[1, 2]) - tm.assert_categorical_equal(c.unique(), exp) - tm.assert_index_equal(Index(c).unique(), Index(exp)) - tm.assert_categorical_equal(pd.Series(c).unique(), exp) - - c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True) - # Categorical.unique keeps categories order if ordered=True - exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True) - tm.assert_categorical_equal(c.unique(), exp) - - tm.assert_index_equal(Index(c).unique(), Index(exp)) - tm.assert_categorical_equal(pd.Series(c).unique(), exp) - - def test_mode(self): - s = Categorical([1, 1, 2, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1], - ordered=True) - res = s.mode() - exp = Categorical([5], categories=[5, 4, 3, 2, 1], ordered=True) - tm.assert_categorical_equal(res, exp) - s = Categorical([1, 1, 1, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1], - ordered=True) - res = s.mode() - exp = Categorical([5, 1], categories=[5, 4, 3, 2, 1], ordered=True) - tm.assert_categorical_equal(res, exp) - s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], - ordered=True) - res = s.mode() - exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) - tm.assert_categorical_equal(res, exp) - # NaN should not become the mode! - s = Categorical([np.nan, np.nan, np.nan, 4, 5], - categories=[5, 4, 3, 2, 1], ordered=True) - res = s.mode() - exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) - tm.assert_categorical_equal(res, exp) - s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4], - categories=[5, 4, 3, 2, 1], ordered=True) - res = s.mode() - exp = Categorical([4], categories=[5, 4, 3, 2, 1], ordered=True) - tm.assert_categorical_equal(res, exp) - s = Categorical([np.nan, np.nan, 4, 5, 4], categories=[5, 4, 3, 2, 1], - ordered=True) - res = s.mode() - exp = Categorical([4], categories=[5, 4, 3, 2, 1], ordered=True) - tm.assert_categorical_equal(res, exp) - - def test_sort_values(self): - - # unordered cats are sortable - cat = Categorical(["a", "b", "b", "a"], ordered=False) - cat.sort_values() - - cat = Categorical(["a", "c", "b", "d"], ordered=True) - - # sort_values - res = cat.sort_values() - exp = np.array(["a", "b", "c", "d"], dtype=object) - self.assert_numpy_array_equal(res.__array__(), exp) - self.assert_index_equal(res.categories, cat.categories) - - cat = Categorical(["a", "c", "b", "d"], - categories=["a", "b", "c", "d"], ordered=True) - res = cat.sort_values() - exp = np.array(["a", "b", "c", "d"], dtype=object) - self.assert_numpy_array_equal(res.__array__(), exp) - self.assert_index_equal(res.categories, cat.categories) - - res = cat.sort_values(ascending=False) - exp = np.array(["d", "c", "b", "a"], dtype=object) - self.assert_numpy_array_equal(res.__array__(), exp) - self.assert_index_equal(res.categories, cat.categories) - - # sort (inplace order) - cat1 = cat.copy() - cat1.sort_values(inplace=True) - exp = np.array(["a", "b", "c", "d"], dtype=object) - self.assert_numpy_array_equal(cat1.__array__(), exp) - self.assert_index_equal(res.categories, cat.categories) - - # reverse - cat = Categorical(["a", "c", "c", "b", "d"], ordered=True) - res = cat.sort_values(ascending=False) - exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object) - exp_categories = Index(["a", "b", "c", "d"]) - self.assert_numpy_array_equal(res.__array__(), exp_val) - self.assert_index_equal(res.categories, exp_categories) - - def test_sort_values_na_position(self): - # see gh-12882 - cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True) - exp_categories = Index([2, 5]) - - exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) - res = cat.sort_values() # default arguments - self.assert_numpy_array_equal(res.__array__(), exp) - self.assert_index_equal(res.categories, exp_categories) - - exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0]) - res = cat.sort_values(ascending=True, na_position='first') - self.assert_numpy_array_equal(res.__array__(), exp) - self.assert_index_equal(res.categories, exp_categories) - - exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0]) - res = cat.sort_values(ascending=False, na_position='first') - self.assert_numpy_array_equal(res.__array__(), exp) - self.assert_index_equal(res.categories, exp_categories) - - exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) - res = cat.sort_values(ascending=True, na_position='last') - self.assert_numpy_array_equal(res.__array__(), exp) - self.assert_index_equal(res.categories, exp_categories) - - exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan]) - res = cat.sort_values(ascending=False, na_position='last') - self.assert_numpy_array_equal(res.__array__(), exp) - self.assert_index_equal(res.categories, exp_categories) - - cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) - res = cat.sort_values(ascending=False, na_position='last') - exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) - exp_categories = Index(["a", "b", "c", "d"]) - self.assert_numpy_array_equal(res.__array__(), exp_val) - self.assert_index_equal(res.categories, exp_categories) - - cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) - res = cat.sort_values(ascending=False, na_position='first') - exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) - exp_categories = Index(["a", "b", "c", "d"]) - self.assert_numpy_array_equal(res.__array__(), exp_val) - self.assert_index_equal(res.categories, exp_categories) - - def test_slicing_directly(self): - cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) - sliced = cat[3] - self.assertEqual(sliced, "d") - sliced = cat[3:5] - expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd']) - self.assert_numpy_array_equal(sliced._codes, expected._codes) - tm.assert_index_equal(sliced.categories, expected.categories) - - def test_set_item_nan(self): - cat = pd.Categorical([1, 2, 3]) - exp = pd.Categorical([1, np.nan, 3], categories=[1, 2, 3]) - cat[1] = np.nan - tm.assert_categorical_equal(cat, exp) - - # if nan in categories, the proper code should be set! - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1] = np.nan - exp = np.array([0, 3, 2, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = np.nan - exp = np.array([0, 3, 3, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = [np.nan, 1] - exp = np.array([0, 3, 0, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = [np.nan, np.nan] - exp = np.array([0, 3, 3, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, np.nan, 3], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[pd.isnull(cat)] = np.nan - exp = np.array([0, 1, 3, 2], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - def test_shift(self): - # GH 9416 - cat = pd.Categorical(['a', 'b', 'c', 'd', 'a']) - - # shift forward - sp1 = cat.shift(1) - xp1 = pd.Categorical([np.nan, 'a', 'b', 'c', 'd']) - self.assert_categorical_equal(sp1, xp1) - self.assert_categorical_equal(cat[:-1], sp1[1:]) - - # shift back - sn2 = cat.shift(-2) - xp2 = pd.Categorical(['c', 'd', 'a', np.nan, np.nan], - categories=['a', 'b', 'c', 'd']) - self.assert_categorical_equal(sn2, xp2) - self.assert_categorical_equal(cat[2:], sn2[:-2]) - - # shift by zero - self.assert_categorical_equal(cat, cat.shift(0)) - - def test_nbytes(self): - cat = pd.Categorical([1, 2, 3]) - exp = cat._codes.nbytes + cat._categories.values.nbytes - self.assertEqual(cat.nbytes, exp) - - def test_memory_usage(self): - cat = pd.Categorical([1, 2, 3]) - - # .categories is an index, so we include the hashtable - self.assertTrue(cat.nbytes > 0 and cat.nbytes <= cat.memory_usage()) - self.assertTrue(cat.nbytes > 0 and - cat.nbytes <= cat.memory_usage(deep=True)) - - cat = pd.Categorical(['foo', 'foo', 'bar']) - self.assertTrue(cat.memory_usage(deep=True) > cat.nbytes) - - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) - self.assertTrue(abs(diff) < 100) - - def test_searchsorted(self): - # https://github.com/pandas-dev/pandas/issues/8420 - # https://github.com/pandas-dev/pandas/issues/14522 - - c1 = pd.Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], - categories=['cheese', 'milk', 'apple', 'bread'], - ordered=True) - s1 = pd.Series(c1) - c2 = pd.Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], - categories=['cheese', 'milk', 'apple', 'bread'], - ordered=False) - s2 = pd.Series(c2) - - # Searching for single item argument, side='left' (default) - res_cat = c1.searchsorted('apple') - res_ser = s1.searchsorted('apple') - exp = np.array([2], dtype=np.intp) - self.assert_numpy_array_equal(res_cat, exp) - self.assert_numpy_array_equal(res_ser, exp) - - # Searching for single item array, side='left' (default) - res_cat = c1.searchsorted(['bread']) - res_ser = s1.searchsorted(['bread']) - exp = np.array([3], dtype=np.intp) - self.assert_numpy_array_equal(res_cat, exp) - self.assert_numpy_array_equal(res_ser, exp) - - # Searching for several items array, side='right' - res_cat = c1.searchsorted(['apple', 'bread'], side='right') - res_ser = s1.searchsorted(['apple', 'bread'], side='right') - exp = np.array([3, 5], dtype=np.intp) - self.assert_numpy_array_equal(res_cat, exp) - self.assert_numpy_array_equal(res_ser, exp) - - # Searching for a single value that is not from the Categorical - self.assertRaises(ValueError, lambda: c1.searchsorted('cucumber')) - self.assertRaises(ValueError, lambda: s1.searchsorted('cucumber')) - - # Searching for multiple values one of each is not from the Categorical - self.assertRaises(ValueError, - lambda: c1.searchsorted(['bread', 'cucumber'])) - self.assertRaises(ValueError, - lambda: s1.searchsorted(['bread', 'cucumber'])) - - # searchsorted call for unordered Categorical - self.assertRaises(ValueError, lambda: c2.searchsorted('apple')) - self.assertRaises(ValueError, lambda: s2.searchsorted('apple')) - - with tm.assert_produces_warning(FutureWarning): - res = c1.searchsorted(v=['bread']) - exp = np.array([3], dtype=np.intp) - tm.assert_numpy_array_equal(res, exp) - - def test_deprecated_labels(self): - # TODO: labels is deprecated and should be removed in 0.18 or 2017, - # whatever is earlier - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - exp = cat.codes - with tm.assert_produces_warning(FutureWarning): - res = cat.labels - self.assert_numpy_array_equal(res, exp) - - def test_deprecated_from_array(self): - # GH13854, `.from_array` is deprecated - with tm.assert_produces_warning(FutureWarning): - Categorical.from_array([0, 1]) - - def test_removed_names_produces_warning(self): - - # 10482 - with tm.assert_produces_warning(UserWarning): - Categorical([0, 1], name="a") - - with tm.assert_produces_warning(UserWarning): - Categorical.from_codes([1, 2], ["a", "b", "c"], name="a") - - def test_datetime_categorical_comparison(self): - dt_cat = pd.Categorical( - pd.date_range('2014-01-01', periods=3), ordered=True) - self.assert_numpy_array_equal(dt_cat > dt_cat[0], - np.array([False, True, True])) - self.assert_numpy_array_equal(dt_cat[0] < dt_cat, - np.array([False, True, True])) - - def test_reflected_comparison_with_scalars(self): - # GH8658 - cat = pd.Categorical([1, 2, 3], ordered=True) - self.assert_numpy_array_equal(cat > cat[0], - np.array([False, True, True])) - self.assert_numpy_array_equal(cat[0] < cat, - np.array([False, True, True])) - - def test_comparison_with_unknown_scalars(self): - # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 - # and following comparisons with scalars not in categories should raise - # for unequal comps, but not for equal/not equal - cat = pd.Categorical([1, 2, 3], ordered=True) - - self.assertRaises(TypeError, lambda: cat < 4) - self.assertRaises(TypeError, lambda: cat > 4) - self.assertRaises(TypeError, lambda: 4 < cat) - self.assertRaises(TypeError, lambda: 4 > cat) - - self.assert_numpy_array_equal(cat == 4, - np.array([False, False, False])) - self.assert_numpy_array_equal(cat != 4, - np.array([True, True, True])) - - def test_map(self): - c = pd.Categorical(list('ABABC'), categories=list('CBA'), - ordered=True) - result = c.map(lambda x: x.lower()) - exp = pd.Categorical(list('ababc'), categories=list('cba'), - ordered=True) - tm.assert_categorical_equal(result, exp) - - c = pd.Categorical(list('ABABC'), categories=list('ABC'), - ordered=False) - result = c.map(lambda x: x.lower()) - exp = pd.Categorical(list('ababc'), categories=list('abc'), - ordered=False) - tm.assert_categorical_equal(result, exp) - - result = c.map(lambda x: 1) - # GH 12766: Return an index not an array - tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) - - def test_validate_inplace(self): - cat = Categorical(['A', 'B', 'B', 'C', 'A']) - invalid_values = [1, "True", [1, 2, 3], 5.0] - - for value in invalid_values: - with self.assertRaises(ValueError): - cat.set_ordered(value=True, inplace=value) - - with self.assertRaises(ValueError): - cat.as_ordered(inplace=value) - - with self.assertRaises(ValueError): - cat.as_unordered(inplace=value) - - with self.assertRaises(ValueError): - cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value) - - with self.assertRaises(ValueError): - cat.rename_categories(['X', 'Y', 'Z'], inplace=value) - - with self.assertRaises(ValueError): - cat.reorder_categories( - ['X', 'Y', 'Z'], ordered=True, inplace=value) - - with self.assertRaises(ValueError): - cat.add_categories( - new_categories=['D', 'E', 'F'], inplace=value) - - with self.assertRaises(ValueError): - cat.remove_categories(removals=['D', 'E', 'F'], inplace=value) - - with self.assertRaises(ValueError): - cat.remove_unused_categories(inplace=value) - - with self.assertRaises(ValueError): - cat.sort_values(inplace=value) - - -class TestCategoricalAsBlock(tm.TestCase): - - def setUp(self): - self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) - - df = DataFrame({'value': np.random.randint(0, 10000, 100)}) - labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] - - df = df.sort_values(by=['value'], ascending=True) - df['value_group'] = pd.cut(df.value, range(0, 10500, 500), right=False, - labels=labels) - self.cat = df - - def test_dtypes(self): - - # GH8143 - index = ['cat', 'obj', 'num'] - cat = pd.Categorical(['a', 'b', 'c']) - obj = pd.Series(['a', 'b', 'c']) - num = pd.Series([1, 2, 3]) - df = pd.concat([pd.Series(cat), obj, num], axis=1, keys=index) - - result = df.dtypes == 'object' - expected = Series([False, True, False], index=index) - tm.assert_series_equal(result, expected) - - result = df.dtypes == 'int64' - expected = Series([False, False, True], index=index) - tm.assert_series_equal(result, expected) - - result = df.dtypes == 'category' - expected = Series([True, False, False], index=index) - tm.assert_series_equal(result, expected) - - def test_codes_dtypes(self): - - # GH 8453 - result = Categorical(['foo', 'bar', 'baz']) - self.assertTrue(result.codes.dtype == 'int8') - - result = Categorical(['foo%05d' % i for i in range(400)]) - self.assertTrue(result.codes.dtype == 'int16') - - result = Categorical(['foo%05d' % i for i in range(40000)]) - self.assertTrue(result.codes.dtype == 'int32') - - # adding cats - result = Categorical(['foo', 'bar', 'baz']) - self.assertTrue(result.codes.dtype == 'int8') - result = result.add_categories(['foo%05d' % i for i in range(400)]) - self.assertTrue(result.codes.dtype == 'int16') - - # removing cats - result = result.remove_categories(['foo%05d' % i for i in range(300)]) - self.assertTrue(result.codes.dtype == 'int8') - - def test_basic(self): - - # test basic creation / coercion of categoricals - s = Series(self.factor, name='A') - self.assertEqual(s.dtype, 'category') - self.assertEqual(len(s), len(self.factor)) - str(s.values) - str(s) - - # in a frame - df = DataFrame({'A': self.factor}) - result = df['A'] - tm.assert_series_equal(result, s) - result = df.iloc[:, 0] - tm.assert_series_equal(result, s) - self.assertEqual(len(df), len(self.factor)) - str(df.values) - str(df) - - df = DataFrame({'A': s}) - result = df['A'] - tm.assert_series_equal(result, s) - self.assertEqual(len(df), len(self.factor)) - str(df.values) - str(df) - - # multiples - df = DataFrame({'A': s, 'B': s, 'C': 1}) - result1 = df['A'] - result2 = df['B'] - tm.assert_series_equal(result1, s) - tm.assert_series_equal(result2, s, check_names=False) - self.assertEqual(result2.name, 'B') - self.assertEqual(len(df), len(self.factor)) - str(df.values) - str(df) - - # GH8623 - x = pd.DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], - [1, 'John P. Doe']], - columns=['person_id', 'person_name']) - x['person_name'] = pd.Categorical(x.person_name - ) # doing this breaks transform - - expected = x.iloc[0].person_name - result = x.person_name.iloc[0] - self.assertEqual(result, expected) - - result = x.person_name[0] - self.assertEqual(result, expected) - - result = x.person_name.loc[0] - self.assertEqual(result, expected) - - def test_creation_astype(self): - l = ["a", "b", "c", "a"] - s = pd.Series(l) - exp = pd.Series(Categorical(l)) - res = s.astype('category') - tm.assert_series_equal(res, exp) - - l = [1, 2, 3, 1] - s = pd.Series(l) - exp = pd.Series(Categorical(l)) - res = s.astype('category') - tm.assert_series_equal(res, exp) - - df = pd.DataFrame({"cats": [1, 2, 3, 4, 5, 6], - "vals": [1, 2, 3, 4, 5, 6]}) - cats = Categorical([1, 2, 3, 4, 5, 6]) - exp_df = pd.DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) - df["cats"] = df["cats"].astype("category") - tm.assert_frame_equal(exp_df, df) - - df = pd.DataFrame({"cats": ['a', 'b', 'b', 'a', 'a', 'd'], - "vals": [1, 2, 3, 4, 5, 6]}) - cats = Categorical(['a', 'b', 'b', 'a', 'a', 'd']) - exp_df = pd.DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) - df["cats"] = df["cats"].astype("category") - tm.assert_frame_equal(exp_df, df) - - # with keywords - l = ["a", "b", "c", "a"] - s = pd.Series(l) - exp = pd.Series(Categorical(l, ordered=True)) - res = s.astype('category', ordered=True) - tm.assert_series_equal(res, exp) - - exp = pd.Series(Categorical( - l, categories=list('abcdef'), ordered=True)) - res = s.astype('category', categories=list('abcdef'), ordered=True) - tm.assert_series_equal(res, exp) - - def test_construction_series(self): - - l = [1, 2, 3, 1] - exp = Series(l).astype('category') - res = Series(l, dtype='category') - tm.assert_series_equal(res, exp) - - l = ["a", "b", "c", "a"] - exp = Series(l).astype('category') - res = Series(l, dtype='category') - tm.assert_series_equal(res, exp) - - # insert into frame with different index - # GH 8076 - index = pd.date_range('20000101', periods=3) - expected = Series(Categorical(values=[np.nan, np.nan, np.nan], - categories=['a', 'b', 'c'])) - expected.index = index - - expected = DataFrame({'x': expected}) - df = DataFrame( - {'x': Series(['a', 'b', 'c'], dtype='category')}, index=index) - tm.assert_frame_equal(df, expected) - - def test_construction_frame(self): - - # GH8626 - - # dict creation - df = DataFrame({'A': list('abc')}, dtype='category') - expected = Series(list('abc'), dtype='category', name='A') - tm.assert_series_equal(df['A'], expected) - - # to_frame - s = Series(list('abc'), dtype='category') - result = s.to_frame() - expected = Series(list('abc'), dtype='category', name=0) - tm.assert_series_equal(result[0], expected) - result = s.to_frame(name='foo') - expected = Series(list('abc'), dtype='category', name='foo') - tm.assert_series_equal(result['foo'], expected) - - # list-like creation - df = DataFrame(list('abc'), dtype='category') - expected = Series(list('abc'), dtype='category', name=0) - tm.assert_series_equal(df[0], expected) - - # ndim != 1 - df = DataFrame([pd.Categorical(list('abc'))]) - expected = DataFrame({0: Series(list('abc'), dtype='category')}) - tm.assert_frame_equal(df, expected) - - df = DataFrame([pd.Categorical(list('abc')), pd.Categorical(list( - 'abd'))]) - expected = DataFrame({0: Series(list('abc'), dtype='category'), - 1: Series(list('abd'), dtype='category')}, - columns=[0, 1]) - tm.assert_frame_equal(df, expected) - - # mixed - df = DataFrame([pd.Categorical(list('abc')), list('def')]) - expected = DataFrame({0: Series(list('abc'), dtype='category'), - 1: list('def')}, columns=[0, 1]) - tm.assert_frame_equal(df, expected) - - # invalid (shape) - self.assertRaises( - ValueError, - lambda: DataFrame([pd.Categorical(list('abc')), - pd.Categorical(list('abdefg'))])) - - # ndim > 1 - self.assertRaises(NotImplementedError, - lambda: pd.Categorical(np.array([list('abcd')]))) - - def test_reshaping(self): - - p = tm.makePanel() - p['str'] = 'foo' - df = p.to_frame() - df['category'] = df['str'].astype('category') - result = df['category'].unstack() - - c = Categorical(['foo'] * len(p.major_axis)) - expected = DataFrame({'A': c.copy(), - 'B': c.copy(), - 'C': c.copy(), - 'D': c.copy()}, - columns=Index(list('ABCD'), name='minor'), - index=p.major_axis.set_names('major')) - tm.assert_frame_equal(result, expected) - - def test_reindex(self): - - index = pd.date_range('20000101', periods=3) - - # reindexing to an invalid Categorical - s = Series(['a', 'b', 'c'], dtype='category') - result = s.reindex(index) - expected = Series(Categorical(values=[np.nan, np.nan, np.nan], - categories=['a', 'b', 'c'])) - expected.index = index - tm.assert_series_equal(result, expected) - - # partial reindexing - expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b', - 'c'])) - expected.index = [1, 2] - result = s.reindex([1, 2]) - tm.assert_series_equal(result, expected) - - expected = Series(Categorical( - values=['c', np.nan], categories=['a', 'b', 'c'])) - expected.index = [2, 3] - result = s.reindex([2, 3]) - tm.assert_series_equal(result, expected) - - def test_sideeffects_free(self): - # Passing a categorical to a Series and then changing values in either - # the series or the categorical should not change the values in the - # other one, IF you specify copy! - cat = Categorical(["a", "b", "c", "a"]) - s = pd.Series(cat, copy=True) - self.assertFalse(s.cat is cat) - s.cat.categories = [1, 2, 3] - exp_s = np.array([1, 2, 3, 1], dtype=np.int64) - exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) - self.assert_numpy_array_equal(s.__array__(), exp_s) - self.assert_numpy_array_equal(cat.__array__(), exp_cat) - - # setting - s[0] = 2 - exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) - self.assert_numpy_array_equal(s.__array__(), exp_s2) - self.assert_numpy_array_equal(cat.__array__(), exp_cat) - - # however, copy is False by default - # so this WILL change values - cat = Categorical(["a", "b", "c", "a"]) - s = pd.Series(cat) - self.assertTrue(s.values is cat) - s.cat.categories = [1, 2, 3] - exp_s = np.array([1, 2, 3, 1], dtype=np.int64) - self.assert_numpy_array_equal(s.__array__(), exp_s) - self.assert_numpy_array_equal(cat.__array__(), exp_s) - - s[0] = 2 - exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) - self.assert_numpy_array_equal(s.__array__(), exp_s2) - self.assert_numpy_array_equal(cat.__array__(), exp_s2) - - def test_nan_handling(self): - - # Nans are represented as -1 in labels - s = Series(Categorical(["a", "b", np.nan, "a"])) - self.assert_index_equal(s.cat.categories, Index(["a", "b"])) - self.assert_numpy_array_equal(s.values.codes, - np.array([0, 1, -1, 0], dtype=np.int8)) - - # If categories have nan included, the label should point to that - # instead - with tm.assert_produces_warning(FutureWarning): - s2 = Series(Categorical(["a", "b", np.nan, "a"], - categories=["a", "b", np.nan])) - - exp_cat = Index(["a", "b", np.nan]) - self.assert_index_equal(s2.cat.categories, exp_cat) - self.assert_numpy_array_equal(s2.values.codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - - # Changing categories should also make the replaced category np.nan - s3 = Series(Categorical(["a", "b", "c", "a"])) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - s3.cat.categories = ["a", "b", np.nan] - - exp_cat = Index(["a", "b", np.nan]) - self.assert_index_equal(s3.cat.categories, exp_cat) - self.assert_numpy_array_equal(s3.values.codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - - def test_cat_accessor(self): - s = Series(Categorical(["a", "b", np.nan, "a"])) - self.assert_index_equal(s.cat.categories, Index(["a", "b"])) - self.assertEqual(s.cat.ordered, False) - exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) - s.cat.set_categories(["b", "a"], inplace=True) - tm.assert_categorical_equal(s.values, exp) - - res = s.cat.set_categories(["b", "a"]) - tm.assert_categorical_equal(res.values, exp) - - exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) - s[:] = "a" - s = s.cat.remove_unused_categories() - self.assert_index_equal(s.cat.categories, Index(["a"])) - - def test_sequence_like(self): - - # GH 7839 - # make sure can iterate - df = DataFrame({"id": [1, 2, 3, 4, 5, 6], - "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) - df['grade'] = Categorical(df['raw_grade']) - - # basic sequencing testing - result = list(df.grade.values) - expected = np.array(df.grade.values).tolist() - tm.assert_almost_equal(result, expected) - - # iteration - for t in df.itertuples(index=False): - str(t) - - for row, s in df.iterrows(): - str(s) - - for c, col in df.iteritems(): - str(s) - - def test_series_delegations(self): - - # invalid accessor - self.assertRaises(AttributeError, lambda: Series([1, 2, 3]).cat) - tm.assertRaisesRegexp( - AttributeError, - r"Can only use .cat accessor with a 'category' dtype", - lambda: Series([1, 2, 3]).cat) - self.assertRaises(AttributeError, lambda: Series(['a', 'b', 'c']).cat) - self.assertRaises(AttributeError, lambda: Series(np.arange(5.)).cat) - self.assertRaises(AttributeError, - lambda: Series([Timestamp('20130101')]).cat) - - # Series should delegate calls to '.categories', '.codes', '.ordered' - # and the methods '.set_categories()' 'drop_unused_categories()' to the - # categorical - s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) - exp_categories = Index(["a", "b", "c"]) - tm.assert_index_equal(s.cat.categories, exp_categories) - s.cat.categories = [1, 2, 3] - exp_categories = Index([1, 2, 3]) - self.assert_index_equal(s.cat.categories, exp_categories) - - exp_codes = Series([0, 1, 2, 0], dtype='int8') - tm.assert_series_equal(s.cat.codes, exp_codes) - - self.assertEqual(s.cat.ordered, True) - s = s.cat.as_unordered() - self.assertEqual(s.cat.ordered, False) - s.cat.as_ordered(inplace=True) - self.assertEqual(s.cat.ordered, True) - - # reorder - s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) - exp_categories = Index(["c", "b", "a"]) - exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) - s = s.cat.set_categories(["c", "b", "a"]) - tm.assert_index_equal(s.cat.categories, exp_categories) - self.assert_numpy_array_equal(s.values.__array__(), exp_values) - self.assert_numpy_array_equal(s.__array__(), exp_values) - - # remove unused categories - s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c" - ])) - exp_categories = Index(["a", "b"]) - exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) - s = s.cat.remove_unused_categories() - self.assert_index_equal(s.cat.categories, exp_categories) - self.assert_numpy_array_equal(s.values.__array__(), exp_values) - self.assert_numpy_array_equal(s.__array__(), exp_values) - - # This method is likely to be confused, so test that it raises an error - # on wrong inputs: - def f(): - s.set_categories([4, 3, 2, 1]) - - self.assertRaises(Exception, f) - # right: s.cat.set_categories([4,3,2,1]) - - def test_series_functions_no_warnings(self): - df = pd.DataFrame({'value': np.random.randint(0, 100, 20)}) - labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] - with tm.assert_produces_warning(False): - df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, - labels=labels) - - def test_assignment_to_dataframe(self): - # assignment - df = DataFrame({'value': np.array(np.random.randint(0, 10000, 100), - dtype='int32')}) - labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] - - df = df.sort_values(by=['value'], ascending=True) - s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) - d = s.values - df['D'] = d - str(df) - - result = df.dtypes - expected = Series( - [np.dtype('int32'), CategoricalDtype()], index=['value', 'D']) - tm.assert_series_equal(result, expected) - - df['E'] = s - str(df) - - result = df.dtypes - expected = Series([np.dtype('int32'), CategoricalDtype(), - CategoricalDtype()], - index=['value', 'D', 'E']) - tm.assert_series_equal(result, expected) - - result1 = df['D'] - result2 = df['E'] - self.assert_categorical_equal(result1._data._block.values, d) - - # sorting - s.name = 'E' - self.assert_series_equal(result2.sort_index(), s.sort_index()) - - cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) - df = pd.DataFrame(pd.Series(cat)) - - def test_describe(self): - - # Categoricals should not show up together with numerical columns - result = self.cat.describe() - self.assertEqual(len(result.columns), 1) - - # In a frame, describe() for the cat should be the same as for string - # arrays (count, unique, top, freq) - - cat = Categorical(["a", "b", "b", "b"], categories=['a', 'b', 'c'], - ordered=True) - s = Series(cat) - result = s.describe() - expected = Series([4, 2, "b", 3], - index=['count', 'unique', 'top', 'freq']) - tm.assert_series_equal(result, expected) - - cat = pd.Series(pd.Categorical(["a", "b", "c", "c"])) - df3 = pd.DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) - res = df3.describe() - self.assert_numpy_array_equal(res["cat"].values, res["s"].values) - - def test_repr(self): - a = pd.Series(pd.Categorical([1, 2, 3, 4])) - exp = u("0 1\n1 2\n2 3\n3 4\n" + - "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") - - self.assertEqual(exp, a.__unicode__()) - - a = pd.Series(pd.Categorical(["a", "b"] * 25)) - exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + - "dtype: category\nCategories (2, object): [a, b]") - with option_context("display.max_rows", 5): - self.assertEqual(exp, repr(a)) - - levs = list("abcdefghijklmnopqrstuvwxyz") - a = pd.Series(pd.Categorical( - ["a", "b"], categories=levs, ordered=True)) - exp = u("0 a\n1 b\n" + "dtype: category\n" - "Categories (26, object): [a < b < c < d ... w < x < y < z]") - self.assertEqual(exp, a.__unicode__()) - - def test_categorical_repr(self): - c = pd.Categorical([1, 2, 3]) - exp = """[1, 2, 3] -Categories (3, int64): [1, 2, 3]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) - exp = """[1, 2, 3, 1, 2, 3] -Categories (3, int64): [1, 2, 3]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical([1, 2, 3, 4, 5] * 10) - exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] -Length: 50 -Categories (5, int64): [1, 2, 3, 4, 5]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(np.arange(20)) - exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] -Length: 20 -Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" - - self.assertEqual(repr(c), exp) - - def test_categorical_repr_ordered(self): - c = pd.Categorical([1, 2, 3], ordered=True) - exp = """[1, 2, 3] -Categories (3, int64): [1 < 2 < 3]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], - ordered=True) - exp = """[1, 2, 3, 1, 2, 3] -Categories (3, int64): [1 < 2 < 3]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical([1, 2, 3, 4, 5] * 10, ordered=True) - exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] -Length: 50 -Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(np.arange(20), ordered=True) - exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] -Length: 20 -Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" - - self.assertEqual(repr(c), exp) - - def test_categorical_repr_datetime(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - c = pd.Categorical(idx) - - # TODO(wesm): exceeding 80 characters in the console is not good - # behavior - exp = ( - "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " - "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" - "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " - "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" - " 2011-01-01 12:00:00, " - "2011-01-01 13:00:00]""") - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx) - exp = ( - "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " - "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " - "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " - "2011-01-01 13:00:00]\n" - "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " - "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" - " 2011-01-01 12:00:00, " - "2011-01-01 13:00:00]") - - self.assertEqual(repr(c), exp) - - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - c = pd.Categorical(idx) - exp = ( - "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " - "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " - "2011-01-01 13:00:00-05:00]\n" - "Categories (5, datetime64[ns, US/Eastern]): " - "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" - " " - "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" - " " - "2011-01-01 13:00:00-05:00]") - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx) - exp = ( - "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " - "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " - "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " - "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " - "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" - "Categories (5, datetime64[ns, US/Eastern]): " - "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" - " " - "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" - " " - "2011-01-01 13:00:00-05:00]") - - self.assertEqual(repr(c), exp) - - def test_categorical_repr_datetime_ordered(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - c = pd.Categorical(idx, ordered=True) - exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] -Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] -Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa - - self.assertEqual(repr(c), exp) - - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - c = pd.Categorical(idx, ordered=True) - exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] -Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < - 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" # noqa - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] -Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < - 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" - - self.assertEqual(repr(c), exp) - - def test_categorical_repr_period(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - c = pd.Categorical(idx) - exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" - - self.assertEqual(repr(c), exp) - - idx = pd.period_range('2011-01', freq='M', periods=5) - c = pd.Categorical(idx) - exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" - - self.assertEqual(repr(c), exp) - - def test_categorical_repr_period_ordered(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - c = pd.Categorical(idx, ordered=True) - exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" - - self.assertEqual(repr(c), exp) - - idx = pd.period_range('2011-01', freq='M', periods=5) - c = pd.Categorical(idx, ordered=True) - exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" - - self.assertEqual(repr(c), exp) - - def test_categorical_repr_timedelta(self): - idx = pd.timedelta_range('1 days', periods=5) - c = pd.Categorical(idx) - exp = """[1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" - - self.assertEqual(repr(c), exp) - - idx = pd.timedelta_range('1 hours', periods=20) - c = pd.Categorical(idx) - exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] -Length: 20 -Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, - 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, - 18 days 01:00:00, 19 days 01:00:00]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] -Length: 40 -Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, - 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, - 18 days 01:00:00, 19 days 01:00:00]""" - - self.assertEqual(repr(c), exp) - - def test_categorical_repr_timedelta_ordered(self): - idx = pd.timedelta_range('1 days', periods=5) - c = pd.Categorical(idx, ordered=True) - exp = """[1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" - - self.assertEqual(repr(c), exp) - - idx = pd.timedelta_range('1 hours', periods=20) - c = pd.Categorical(idx, ordered=True) - exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] -Length: 20 -Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < - 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < - 18 days 01:00:00 < 19 days 01:00:00]""" - - self.assertEqual(repr(c), exp) - - c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) - exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] -Length: 40 -Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < - 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < - 18 days 01:00:00 < 19 days 01:00:00]""" - - self.assertEqual(repr(c), exp) - - def test_categorical_series_repr(self): - s = pd.Series(pd.Categorical([1, 2, 3])) - exp = """0 1 -1 2 -2 3 -dtype: category -Categories (3, int64): [1, 2, 3]""" - - self.assertEqual(repr(s), exp) - - s = pd.Series(pd.Categorical(np.arange(10))) - exp = """0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -dtype: category -Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]""" - - self.assertEqual(repr(s), exp) - - def test_categorical_series_repr_ordered(self): - s = pd.Series(pd.Categorical([1, 2, 3], ordered=True)) - exp = """0 1 -1 2 -2 3 -dtype: category -Categories (3, int64): [1 < 2 < 3]""" - - self.assertEqual(repr(s), exp) - - s = pd.Series(pd.Categorical(np.arange(10), ordered=True)) - exp = """0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -dtype: category -Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]""" - - self.assertEqual(repr(s), exp) - - def test_categorical_series_repr_datetime(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - s = pd.Series(pd.Categorical(idx)) - exp = """0 2011-01-01 09:00:00 -1 2011-01-01 10:00:00 -2 2011-01-01 11:00:00 -3 2011-01-01 12:00:00 -4 2011-01-01 13:00:00 -dtype: category -Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, - 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" - - self.assertEqual(repr(s), exp) - - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - s = pd.Series(pd.Categorical(idx)) - exp = """0 2011-01-01 09:00:00-05:00 -1 2011-01-01 10:00:00-05:00 -2 2011-01-01 11:00:00-05:00 -3 2011-01-01 12:00:00-05:00 -4 2011-01-01 13:00:00-05:00 -dtype: category -Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, - 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, - 2011-01-01 13:00:00-05:00]""" - - self.assertEqual(repr(s), exp) - - def test_categorical_series_repr_datetime_ordered(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - s = pd.Series(pd.Categorical(idx, ordered=True)) - exp = """0 2011-01-01 09:00:00 -1 2011-01-01 10:00:00 -2 2011-01-01 11:00:00 -3 2011-01-01 12:00:00 -4 2011-01-01 13:00:00 -dtype: category -Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" - - self.assertEqual(repr(s), exp) - - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - s = pd.Series(pd.Categorical(idx, ordered=True)) - exp = """0 2011-01-01 09:00:00-05:00 -1 2011-01-01 10:00:00-05:00 -2 2011-01-01 11:00:00-05:00 -3 2011-01-01 12:00:00-05:00 -4 2011-01-01 13:00:00-05:00 -dtype: category -Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < - 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" - - self.assertEqual(repr(s), exp) - - def test_categorical_series_repr_period(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - s = pd.Series(pd.Categorical(idx)) - exp = """0 2011-01-01 09:00 -1 2011-01-01 10:00 -2 2011-01-01 11:00 -3 2011-01-01 12:00 -4 2011-01-01 13:00 -dtype: category -Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" - - self.assertEqual(repr(s), exp) - - idx = pd.period_range('2011-01', freq='M', periods=5) - s = pd.Series(pd.Categorical(idx)) - exp = """0 2011-01 -1 2011-02 -2 2011-03 -3 2011-04 -4 2011-05 -dtype: category -Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" - - self.assertEqual(repr(s), exp) - - def test_categorical_series_repr_period_ordered(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - s = pd.Series(pd.Categorical(idx, ordered=True)) - exp = """0 2011-01-01 09:00 -1 2011-01-01 10:00 -2 2011-01-01 11:00 -3 2011-01-01 12:00 -4 2011-01-01 13:00 -dtype: category -Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" - - self.assertEqual(repr(s), exp) - - idx = pd.period_range('2011-01', freq='M', periods=5) - s = pd.Series(pd.Categorical(idx, ordered=True)) - exp = """0 2011-01 -1 2011-02 -2 2011-03 -3 2011-04 -4 2011-05 -dtype: category -Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" - - self.assertEqual(repr(s), exp) - - def test_categorical_series_repr_timedelta(self): - idx = pd.timedelta_range('1 days', periods=5) - s = pd.Series(pd.Categorical(idx)) - exp = """0 1 days -1 2 days -2 3 days -3 4 days -4 5 days -dtype: category -Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" - - self.assertEqual(repr(s), exp) - - idx = pd.timedelta_range('1 hours', periods=10) - s = pd.Series(pd.Categorical(idx)) - exp = """0 0 days 01:00:00 -1 1 days 01:00:00 -2 2 days 01:00:00 -3 3 days 01:00:00 -4 4 days 01:00:00 -5 5 days 01:00:00 -6 6 days 01:00:00 -7 7 days 01:00:00 -8 8 days 01:00:00 -9 9 days 01:00:00 -dtype: category -Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, - 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, - 8 days 01:00:00, 9 days 01:00:00]""" - - self.assertEqual(repr(s), exp) - - def test_categorical_series_repr_timedelta_ordered(self): - idx = pd.timedelta_range('1 days', periods=5) - s = pd.Series(pd.Categorical(idx, ordered=True)) - exp = """0 1 days -1 2 days -2 3 days -3 4 days -4 5 days -dtype: category -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" - - self.assertEqual(repr(s), exp) - - idx = pd.timedelta_range('1 hours', periods=10) - s = pd.Series(pd.Categorical(idx, ordered=True)) - exp = """0 0 days 01:00:00 -1 1 days 01:00:00 -2 2 days 01:00:00 -3 3 days 01:00:00 -4 4 days 01:00:00 -5 5 days 01:00:00 -6 6 days 01:00:00 -7 7 days 01:00:00 -8 8 days 01:00:00 -9 9 days 01:00:00 -dtype: category -Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < - 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < - 8 days 01:00:00 < 9 days 01:00:00]""" - - self.assertEqual(repr(s), exp) - - def test_categorical_index_repr(self): - idx = pd.CategoricalIndex(pd.Categorical([1, 2, 3])) - exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" - self.assertEqual(repr(idx), exp) - - i = pd.CategoricalIndex(pd.Categorical(np.arange(10))) - exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" - self.assertEqual(repr(i), exp) - - def test_categorical_index_repr_ordered(self): - i = pd.CategoricalIndex(pd.Categorical([1, 2, 3], ordered=True)) - exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" - self.assertEqual(repr(i), exp) - - i = pd.CategoricalIndex(pd.Categorical(np.arange(10), ordered=True)) - exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" - self.assertEqual(repr(i), exp) - - def test_categorical_index_repr_datetime(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', - '2011-01-01 11:00:00', '2011-01-01 12:00:00', - '2011-01-01 13:00:00'], - categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" - - self.assertEqual(repr(i), exp) - - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', - '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', - '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" - - self.assertEqual(repr(i), exp) - - def test_categorical_index_repr_datetime_ordered(self): - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', - '2011-01-01 11:00:00', '2011-01-01 12:00:00', - '2011-01-01 13:00:00'], - categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" - - self.assertEqual(repr(i), exp) - - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', - '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', - '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" - - self.assertEqual(repr(i), exp) - - i = pd.CategoricalIndex(pd.Categorical(idx.append(idx), ordered=True)) - exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', - '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', - '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', - '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', - '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" - - self.assertEqual(repr(i), exp) - - def test_categorical_index_repr_period(self): - # test all length - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=1) - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" - self.assertEqual(repr(i), exp) - - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=2) - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" - self.assertEqual(repr(i), exp) - - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=3) - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" - self.assertEqual(repr(i), exp) - - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', - '2011-01-01 12:00', '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" - - self.assertEqual(repr(i), exp) - - i = pd.CategoricalIndex(pd.Categorical(idx.append(idx))) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', - '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', - '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', - '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" - - self.assertEqual(repr(i), exp) - - idx = pd.period_range('2011-01', freq='M', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" - self.assertEqual(repr(i), exp) - - def test_categorical_index_repr_period_ordered(self): - idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', - '2011-01-01 12:00', '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" - - self.assertEqual(repr(i), exp) - - idx = pd.period_range('2011-01', freq='M', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" - self.assertEqual(repr(i), exp) - - def test_categorical_index_repr_timedelta(self): - idx = pd.timedelta_range('1 days', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" - self.assertEqual(repr(i), exp) - - idx = pd.timedelta_range('1 hours', periods=10) - i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', - '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', - '9 days 01:00:00'], - categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" - - self.assertEqual(repr(i), exp) - - def test_categorical_index_repr_timedelta_ordered(self): - idx = pd.timedelta_range('1 days', periods=5) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" - self.assertEqual(repr(i), exp) - - idx = pd.timedelta_range('1 hours', periods=10) - i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', - '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', - '9 days 01:00:00'], - categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" - - self.assertEqual(repr(i), exp) - - def test_categorical_frame(self): - # normal DataFrame - dt = pd.date_range('2011-01-01 09:00', freq='H', periods=5, - tz='US/Eastern') - p = pd.period_range('2011-01', freq='M', periods=5) - df = pd.DataFrame({'dt': dt, 'p': p}) - exp = """ dt p -0 2011-01-01 09:00:00-05:00 2011-01 -1 2011-01-01 10:00:00-05:00 2011-02 -2 2011-01-01 11:00:00-05:00 2011-03 -3 2011-01-01 12:00:00-05:00 2011-04 -4 2011-01-01 13:00:00-05:00 2011-05""" - - df = pd.DataFrame({'dt': pd.Categorical(dt), 'p': pd.Categorical(p)}) - self.assertEqual(repr(df), exp) - - def test_info(self): - - # make sure it works - n = 2500 - df = DataFrame({'int64': np.random.randint(100, size=n)}) - df['category'] = Series(np.array(list('abcdefghij')).take( - np.random.randint(0, 10, size=n))).astype('category') - df.isnull() - df.info() - - df2 = df[df['category'] == 'd'] - df2.info() - - def test_groupby_sort(self): - - # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby - # This should result in a properly sorted Series so that the plot - # has a sorted x axis - # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') - - res = self.cat.groupby(['value_group'])['value_group'].count() - exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] - exp.index = pd.CategoricalIndex(exp.index, name=exp.index.name) - tm.assert_series_equal(res, exp) - - def test_min_max(self): - # unordered cats have no min/max - cat = Series(Categorical(["a", "b", "c", "d"], ordered=False)) - self.assertRaises(TypeError, lambda: cat.min()) - self.assertRaises(TypeError, lambda: cat.max()) - - cat = Series(Categorical(["a", "b", "c", "d"], ordered=True)) - _min = cat.min() - _max = cat.max() - self.assertEqual(_min, "a") - self.assertEqual(_max, "d") - - cat = Series(Categorical(["a", "b", "c", "d"], categories=[ - 'd', 'c', 'b', 'a'], ordered=True)) - _min = cat.min() - _max = cat.max() - self.assertEqual(_min, "d") - self.assertEqual(_max, "a") - - cat = Series(Categorical( - [np.nan, "b", "c", np.nan], categories=['d', 'c', 'b', 'a' - ], ordered=True)) - _min = cat.min() - _max = cat.max() - self.assertTrue(np.isnan(_min)) - self.assertEqual(_max, "b") - - cat = Series(Categorical( - [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True)) - _min = cat.min() - _max = cat.max() - self.assertTrue(np.isnan(_min)) - self.assertEqual(_max, 1) - - def test_mode(self): - s = Series(Categorical([1, 1, 2, 4, 5, 5, 5], - categories=[5, 4, 3, 2, 1], ordered=True)) - res = s.mode() - exp = Series(Categorical([5], categories=[ - 5, 4, 3, 2, 1], ordered=True)) - tm.assert_series_equal(res, exp) - s = Series(Categorical([1, 1, 1, 4, 5, 5, 5], - categories=[5, 4, 3, 2, 1], ordered=True)) - res = s.mode() - exp = Series(Categorical([5, 1], categories=[ - 5, 4, 3, 2, 1], ordered=True)) - tm.assert_series_equal(res, exp) - s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], - ordered=True)) - res = s.mode() - exp = Series(Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)) - tm.assert_series_equal(res, exp) - - def test_value_counts(self): - # GH 12835 - cats = pd.Categorical(["a", "b", "c", "c", "c", "b"], - categories=["c", "a", "b", "d"]) - s = pd.Series(cats, name='xxx') - res = s.value_counts(sort=False) - - exp_index = pd.CategoricalIndex(["c", "a", "b", "d"], - categories=cats.categories) - exp = Series([3, 1, 2, 0], name='xxx', index=exp_index) - tm.assert_series_equal(res, exp) - - res = s.value_counts(sort=True) - - exp_index = pd.CategoricalIndex(["c", "b", "a", "d"], - categories=cats.categories) - exp = Series([3, 2, 1, 0], name='xxx', index=exp_index) - tm.assert_series_equal(res, exp) - - # check object dtype handles the Series.name as the same - # (tested in test_base.py) - s = pd.Series(["a", "b", "c", "c", "c", "b"], name='xxx') - res = s.value_counts() - exp = Series([3, 2, 1], name='xxx', index=["c", "b", "a"]) - tm.assert_series_equal(res, exp) - - def test_value_counts_with_nan(self): - # see gh-9443 - - # sanity check - s = pd.Series(["a", "b", "a"], dtype="category") - exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) - - res = s.value_counts(dropna=True) - tm.assert_series_equal(res, exp) - - res = s.value_counts(dropna=True) - tm.assert_series_equal(res, exp) - - # same Series via two different constructions --> same behaviour - series = [ - pd.Series(["a", "b", None, "a", None, None], dtype="category"), - pd.Series(pd.Categorical(["a", "b", None, "a", None, None], - categories=["a", "b"])) - ] - - for s in series: - # None is a NaN value, so we exclude its count here - exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) - res = s.value_counts(dropna=True) - tm.assert_series_equal(res, exp) - - # we don't exclude the count of None and sort by counts - exp = pd.Series( - [3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"])) - res = s.value_counts(dropna=False) - tm.assert_series_equal(res, exp) - - # When we aren't sorting by counts, and np.nan isn't a - # category, it should be last. - exp = pd.Series( - [2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan])) - res = s.value_counts(dropna=False, sort=False) - tm.assert_series_equal(res, exp) - - def test_groupby(self): - - cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"], - categories=["a", "b", "c", "d"], ordered=True) - data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) - - exp_index = pd.CategoricalIndex(['a', 'b', 'c', 'd'], name='b', - ordered=True) - expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index) - result = data.groupby("b").mean() - tm.assert_frame_equal(result, expected) - - raw_cat1 = Categorical(["a", "a", "b", "b"], - categories=["a", "b", "z"], ordered=True) - raw_cat2 = Categorical(["c", "d", "c", "d"], - categories=["c", "d", "y"], ordered=True) - df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]}) - - # single grouper - gb = df.groupby("A") - exp_idx = pd.CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True) - expected = DataFrame({'values': Series([3, 7, np.nan], index=exp_idx)}) - result = gb.sum() - tm.assert_frame_equal(result, expected) - - # multiple groupers - gb = df.groupby(['A', 'B']) - exp_index = pd.MultiIndex.from_product( - [Categorical(["a", "b", "z"], ordered=True), - Categorical(["c", "d", "y"], ordered=True)], - names=['A', 'B']) - expected = DataFrame({'values': [1, 2, np.nan, 3, 4, np.nan, - np.nan, np.nan, np.nan]}, - index=exp_index) - result = gb.sum() - tm.assert_frame_equal(result, expected) - - # multiple groupers with a non-cat - df = df.copy() - df['C'] = ['foo', 'bar'] * 2 - gb = df.groupby(['A', 'B', 'C']) - exp_index = pd.MultiIndex.from_product( - [Categorical(["a", "b", "z"], ordered=True), - Categorical(["c", "d", "y"], ordered=True), - ['foo', 'bar']], - names=['A', 'B', 'C']) - expected = DataFrame({'values': Series( - np.nan, index=exp_index)}).sort_index() - expected.iloc[[1, 2, 7, 8], 0] = [1, 2, 3, 4] - result = gb.sum() - tm.assert_frame_equal(result, expected) - - # GH 8623 - x = pd.DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], - [1, 'John P. Doe']], - columns=['person_id', 'person_name']) - x['person_name'] = pd.Categorical(x.person_name) - - g = x.groupby(['person_id']) - result = g.transform(lambda x: x) - tm.assert_frame_equal(result, x[['person_name']]) - - result = x.drop_duplicates('person_name') - expected = x.iloc[[0, 1]] - tm.assert_frame_equal(result, expected) - - def f(x): - return x.drop_duplicates('person_name').iloc[0] - - result = g.apply(f) - expected = x.iloc[[0, 1]].copy() - expected.index = Index([1, 2], name='person_id') - expected['person_name'] = expected['person_name'].astype('object') - tm.assert_frame_equal(result, expected) - - # GH 9921 - # Monotonic - df = DataFrame({"a": [5, 15, 25]}) - c = pd.cut(df.a, bins=[0, 10, 20, 30, 40]) - - result = df.a.groupby(c).transform(sum) - tm.assert_series_equal(result, df['a']) - - tm.assert_series_equal( - df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a']) - tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']]) - tm.assert_frame_equal( - df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']]) - - # Filter - tm.assert_series_equal(df.a.groupby(c).filter(np.all), df['a']) - tm.assert_frame_equal(df.groupby(c).filter(np.all), df) - - # Non-monotonic - df = DataFrame({"a": [5, 15, 25, -5]}) - c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40]) - - result = df.a.groupby(c).transform(sum) - tm.assert_series_equal(result, df['a']) - - tm.assert_series_equal( - df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a']) - tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']]) - tm.assert_frame_equal( - df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']]) - - # GH 9603 - df = pd.DataFrame({'a': [1, 0, 0, 0]}) - c = pd.cut(df.a, [0, 1, 2, 3, 4]) - result = df.groupby(c).apply(len) - - exp_index = pd.CategoricalIndex(c.values.categories, - ordered=c.values.ordered) - expected = pd.Series([1, 0, 0, 0], index=exp_index) - expected.index.name = 'a' - tm.assert_series_equal(result, expected) - - def test_pivot_table(self): - - raw_cat1 = Categorical(["a", "a", "b", "b"], - categories=["a", "b", "z"], ordered=True) - raw_cat2 = Categorical(["c", "d", "c", "d"], - categories=["c", "d", "y"], ordered=True) - df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]}) - result = pd.pivot_table(df, values='values', index=['A', 'B']) - - exp_index = pd.MultiIndex.from_product( - [Categorical(["a", "b", "z"], ordered=True), - Categorical(["c", "d", "y"], ordered=True)], - names=['A', 'B']) - expected = Series([1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan], - index=exp_index, name='values') - tm.assert_series_equal(result, expected) - - def test_count(self): - - s = Series(Categorical([np.nan, 1, 2, np.nan], - categories=[5, 4, 3, 2, 1], ordered=True)) - result = s.count() - self.assertEqual(result, 2) - - def test_sort_values(self): - - c = Categorical(["a", "b", "b", "a"], ordered=False) - cat = Series(c.copy()) - - # 'order' was deprecated in gh-10726 - # 'sort' was deprecated in gh-12882 - for func in ('order', 'sort'): - with tm.assert_produces_warning(FutureWarning): - getattr(c, func)() - - # sort in the categories order - expected = Series( - Categorical(["a", "a", "b", "b"], - ordered=False), index=[0, 3, 1, 2]) - result = cat.sort_values() - tm.assert_series_equal(result, expected) - - cat = Series(Categorical(["a", "c", "b", "d"], ordered=True)) - res = cat.sort_values() - exp = np.array(["a", "b", "c", "d"], dtype=np.object_) - self.assert_numpy_array_equal(res.__array__(), exp) - - cat = Series(Categorical(["a", "c", "b", "d"], categories=[ - "a", "b", "c", "d"], ordered=True)) - res = cat.sort_values() - exp = np.array(["a", "b", "c", "d"], dtype=np.object_) - self.assert_numpy_array_equal(res.__array__(), exp) - - res = cat.sort_values(ascending=False) - exp = np.array(["d", "c", "b", "a"], dtype=np.object_) - self.assert_numpy_array_equal(res.__array__(), exp) - - raw_cat1 = Categorical(["a", "b", "c", "d"], - categories=["a", "b", "c", "d"], ordered=False) - raw_cat2 = Categorical(["a", "b", "c", "d"], - categories=["d", "c", "b", "a"], ordered=True) - s = ["a", "b", "c", "d"] - df = DataFrame({"unsort": raw_cat1, - "sort": raw_cat2, - "string": s, - "values": [1, 2, 3, 4]}) - - # Cats must be sorted in a dataframe - res = df.sort_values(by=["string"], ascending=False) - exp = np.array(["d", "c", "b", "a"], dtype=np.object_) - self.assert_numpy_array_equal(res["sort"].values.__array__(), exp) - self.assertEqual(res["sort"].dtype, "category") - - res = df.sort_values(by=["sort"], ascending=False) - exp = df.sort_values(by=["string"], ascending=True) - self.assert_series_equal(res["values"], exp["values"]) - self.assertEqual(res["sort"].dtype, "category") - self.assertEqual(res["unsort"].dtype, "category") - - # unordered cat, but we allow this - df.sort_values(by=["unsort"], ascending=False) - - # multi-columns sort - # GH 7848 - df = DataFrame({"id": [6, 5, 4, 3, 2, 1], - "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) - df["grade"] = pd.Categorical(df["raw_grade"], ordered=True) - df['grade'] = df['grade'].cat.set_categories(['b', 'e', 'a']) - - # sorts 'grade' according to the order of the categories - result = df.sort_values(by=['grade']) - expected = df.iloc[[1, 2, 5, 0, 3, 4]] - tm.assert_frame_equal(result, expected) - - # multi - result = df.sort_values(by=['grade', 'id']) - expected = df.iloc[[2, 1, 5, 4, 3, 0]] - tm.assert_frame_equal(result, expected) - - def test_slicing(self): - cat = Series(Categorical([1, 2, 3, 4])) - reversed = cat[::-1] - exp = np.array([4, 3, 2, 1], dtype=np.int64) - self.assert_numpy_array_equal(reversed.__array__(), exp) - - df = DataFrame({'value': (np.arange(100) + 1).astype('int64')}) - df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) - - expected = Series([11, '(0, 25]'], index=['value', 'D'], name=10) - result = df.iloc[10] - tm.assert_series_equal(result, expected) - - expected = DataFrame({'value': np.arange(11, 21).astype('int64')}, - index=np.arange(10, 20).astype('int64')) - expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) - result = df.iloc[10:20] - tm.assert_frame_equal(result, expected) - - expected = Series([9, '(0, 25]'], index=['value', 'D'], name=8) - result = df.loc[8] - tm.assert_series_equal(result, expected) - - def test_slicing_and_getting_ops(self): - - # systematically test the slicing operations: - # for all slicing ops: - # - returning a dataframe - # - returning a column - # - returning a row - # - returning a single value - - cats = pd.Categorical( - ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]) - idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) - values = [1, 2, 3, 4, 5, 6, 7] - df = pd.DataFrame({"cats": cats, "values": values}, index=idx) - - # the expected values - cats2 = pd.Categorical(["b", "c"], categories=["a", "b", "c"]) - idx2 = pd.Index(["j", "k"]) - values2 = [3, 4] - - # 2:4,: | "j":"k",: - exp_df = pd.DataFrame({"cats": cats2, "values": values2}, index=idx2) - - # :,"cats" | :,0 - exp_col = pd.Series(cats, index=idx, name='cats') - - # "j",: | 2,: - exp_row = pd.Series(["b", 3], index=["cats", "values"], dtype="object", - name="j") - - # "j","cats | 2,0 - exp_val = "b" - - # iloc - # frame - res_df = df.iloc[2:4, :] - tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) - - # row - res_row = df.iloc[2, :] - tm.assert_series_equal(res_row, exp_row) - tm.assertIsInstance(res_row["cats"], compat.string_types) - - # col - res_col = df.iloc[:, 0] - tm.assert_series_equal(res_col, exp_col) - self.assertTrue(is_categorical_dtype(res_col)) - - # single value - res_val = df.iloc[2, 0] - self.assertEqual(res_val, exp_val) - - # loc - # frame - res_df = df.loc["j":"k", :] - tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) - - # row - res_row = df.loc["j", :] - tm.assert_series_equal(res_row, exp_row) - tm.assertIsInstance(res_row["cats"], compat.string_types) - - # col - res_col = df.loc[:, "cats"] - tm.assert_series_equal(res_col, exp_col) - self.assertTrue(is_categorical_dtype(res_col)) - - # single value - res_val = df.loc["j", "cats"] - self.assertEqual(res_val, exp_val) - - # ix - # frame - # res_df = df.loc["j":"k",[0,1]] # doesn't work? - res_df = df.loc["j":"k", :] - tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) - - # row - res_row = df.loc["j", :] - tm.assert_series_equal(res_row, exp_row) - tm.assertIsInstance(res_row["cats"], compat.string_types) - - # col - res_col = df.loc[:, "cats"] - tm.assert_series_equal(res_col, exp_col) - self.assertTrue(is_categorical_dtype(res_col)) - - # single value - res_val = df.loc["j", df.columns[0]] - self.assertEqual(res_val, exp_val) - - # iat - res_val = df.iat[2, 0] - self.assertEqual(res_val, exp_val) - - # at - res_val = df.at["j", "cats"] - self.assertEqual(res_val, exp_val) - - # fancy indexing - exp_fancy = df.iloc[[2]] - - res_fancy = df[df["cats"] == "b"] - tm.assert_frame_equal(res_fancy, exp_fancy) - res_fancy = df[df["values"] == 3] - tm.assert_frame_equal(res_fancy, exp_fancy) - - # get_value - res_val = df.get_value("j", "cats") - self.assertEqual(res_val, exp_val) - - # i : int, slice, or sequence of integers - res_row = df.iloc[2] - tm.assert_series_equal(res_row, exp_row) - tm.assertIsInstance(res_row["cats"], compat.string_types) - - res_df = df.iloc[slice(2, 4)] - tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) - - res_df = df.iloc[[2, 3]] - tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) - - res_col = df.iloc[:, 0] - tm.assert_series_equal(res_col, exp_col) - self.assertTrue(is_categorical_dtype(res_col)) - - res_df = df.iloc[:, slice(0, 2)] - tm.assert_frame_equal(res_df, df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) - - res_df = df.iloc[:, [0, 1]] - tm.assert_frame_equal(res_df, df) - self.assertTrue(is_categorical_dtype(res_df["cats"])) - - def test_slicing_doc_examples(self): - - # GH 7918 - cats = Categorical(["a", "b", "b", "b", "c", "c", "c"], - categories=["a", "b", "c"]) - idx = Index(["h", "i", "j", "k", "l", "m", "n", ]) - values = [1, 2, 2, 2, 3, 4, 5] - df = DataFrame({"cats": cats, "values": values}, index=idx) - - result = df.iloc[2:4, :] - expected = DataFrame( - {"cats": Categorical(['b', 'b'], categories=['a', 'b', 'c']), - "values": [2, 2]}, index=['j', 'k']) - tm.assert_frame_equal(result, expected) - - result = df.iloc[2:4, :].dtypes - expected = Series(['category', 'int64'], ['cats', 'values']) - tm.assert_series_equal(result, expected) - - result = df.loc["h":"j", "cats"] - expected = Series(Categorical(['a', 'b', 'b'], - categories=['a', 'b', 'c']), - index=['h', 'i', 'j'], name='cats') - tm.assert_series_equal(result, expected) - - result = df.loc["h":"j", df.columns[0:1]] - expected = DataFrame({'cats': Categorical(['a', 'b', 'b'], - categories=['a', 'b', 'c'])}, - index=['h', 'i', 'j']) - tm.assert_frame_equal(result, expected) - - def test_assigning_ops(self): - # systematically test the assigning operations: - # for all slicing ops: - # for value in categories and value not in categories: - - # - assign a single value -> exp_single_cats_value - - # - assign a complete row (mixed values) -> exp_single_row - - # assign multiple rows (mixed values) (-> array) -> exp_multi_row - - # assign a part of a column with dtype == categorical -> - # exp_parts_cats_col - - # assign a part of a column with dtype != categorical -> - # exp_parts_cats_col - - cats = pd.Categorical(["a", "a", "a", "a", "a", "a", "a"], - categories=["a", "b"]) - idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) - values = [1, 1, 1, 1, 1, 1, 1] - orig = pd.DataFrame({"cats": cats, "values": values}, index=idx) - - # the expected values - # changed single row - cats1 = pd.Categorical(["a", "a", "b", "a", "a", "a", "a"], - categories=["a", "b"]) - idx1 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) - values1 = [1, 1, 2, 1, 1, 1, 1] - exp_single_row = pd.DataFrame({"cats": cats1, - "values": values1}, index=idx1) - - # changed multiple rows - cats2 = pd.Categorical(["a", "a", "b", "b", "a", "a", "a"], - categories=["a", "b"]) - idx2 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) - values2 = [1, 1, 2, 2, 1, 1, 1] - exp_multi_row = pd.DataFrame({"cats": cats2, - "values": values2}, index=idx2) - - # changed part of the cats column - cats3 = pd.Categorical( - ["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) - idx3 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) - values3 = [1, 1, 1, 1, 1, 1, 1] - exp_parts_cats_col = pd.DataFrame( - {"cats": cats3, - "values": values3}, index=idx3) - - # changed single value in cats col - cats4 = pd.Categorical( - ["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) - idx4 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) - values4 = [1, 1, 1, 1, 1, 1, 1] - exp_single_cats_value = pd.DataFrame( - {"cats": cats4, - "values": values4}, index=idx4) - - # iloc - # ############### - # - assign a single value -> exp_single_cats_value - df = orig.copy() - df.iloc[2, 0] = "b" - tm.assert_frame_equal(df, exp_single_cats_value) - - df = orig.copy() - df.iloc[df.index == "j", 0] = "b" - tm.assert_frame_equal(df, exp_single_cats_value) - - # - assign a single value not in the current categories set - def f(): - df = orig.copy() - df.iloc[2, 0] = "c" - - self.assertRaises(ValueError, f) - - # - assign a complete row (mixed values) -> exp_single_row - df = orig.copy() - df.iloc[2, :] = ["b", 2] - tm.assert_frame_equal(df, exp_single_row) - - # - assign a complete row (mixed values) not in categories set - def f(): - df = orig.copy() - df.iloc[2, :] = ["c", 2] - - self.assertRaises(ValueError, f) - - # - assign multiple rows (mixed values) -> exp_multi_row - df = orig.copy() - df.iloc[2:4, :] = [["b", 2], ["b", 2]] - tm.assert_frame_equal(df, exp_multi_row) - - def f(): - df = orig.copy() - df.iloc[2:4, :] = [["c", 2], ["c", 2]] - - self.assertRaises(ValueError, f) - - # assign a part of a column with dtype == categorical -> - # exp_parts_cats_col - df = orig.copy() - df.iloc[2:4, 0] = pd.Categorical(["b", "b"], categories=["a", "b"]) - tm.assert_frame_equal(df, exp_parts_cats_col) - - with tm.assertRaises(ValueError): - # different categories -> not sure if this should fail or pass - df = orig.copy() - df.iloc[2:4, 0] = pd.Categorical( - ["b", "b"], categories=["a", "b", "c"]) - - with tm.assertRaises(ValueError): - # different values - df = orig.copy() - df.iloc[2:4, 0] = pd.Categorical( - ["c", "c"], categories=["a", "b", "c"]) - - # assign a part of a column with dtype != categorical -> - # exp_parts_cats_col - df = orig.copy() - df.iloc[2:4, 0] = ["b", "b"] - tm.assert_frame_equal(df, exp_parts_cats_col) - - with tm.assertRaises(ValueError): - df.iloc[2:4, 0] = ["c", "c"] - - # loc - # ############## - # - assign a single value -> exp_single_cats_value - df = orig.copy() - df.loc["j", "cats"] = "b" - tm.assert_frame_equal(df, exp_single_cats_value) - - df = orig.copy() - df.loc[df.index == "j", "cats"] = "b" - tm.assert_frame_equal(df, exp_single_cats_value) - - # - assign a single value not in the current categories set - def f(): - df = orig.copy() - df.loc["j", "cats"] = "c" - - self.assertRaises(ValueError, f) - - # - assign a complete row (mixed values) -> exp_single_row - df = orig.copy() - df.loc["j", :] = ["b", 2] - tm.assert_frame_equal(df, exp_single_row) - - # - assign a complete row (mixed values) not in categories set - def f(): - df = orig.copy() - df.loc["j", :] = ["c", 2] - - self.assertRaises(ValueError, f) - - # - assign multiple rows (mixed values) -> exp_multi_row - df = orig.copy() - df.loc["j":"k", :] = [["b", 2], ["b", 2]] - tm.assert_frame_equal(df, exp_multi_row) - - def f(): - df = orig.copy() - df.loc["j":"k", :] = [["c", 2], ["c", 2]] - - self.assertRaises(ValueError, f) - - # assign a part of a column with dtype == categorical -> - # exp_parts_cats_col - df = orig.copy() - df.loc["j":"k", "cats"] = pd.Categorical( - ["b", "b"], categories=["a", "b"]) - tm.assert_frame_equal(df, exp_parts_cats_col) - - with tm.assertRaises(ValueError): - # different categories -> not sure if this should fail or pass - df = orig.copy() - df.loc["j":"k", "cats"] = pd.Categorical( - ["b", "b"], categories=["a", "b", "c"]) - - with tm.assertRaises(ValueError): - # different values - df = orig.copy() - df.loc["j":"k", "cats"] = pd.Categorical( - ["c", "c"], categories=["a", "b", "c"]) - - # assign a part of a column with dtype != categorical -> - # exp_parts_cats_col - df = orig.copy() - df.loc["j":"k", "cats"] = ["b", "b"] - tm.assert_frame_equal(df, exp_parts_cats_col) - - with tm.assertRaises(ValueError): - df.loc["j":"k", "cats"] = ["c", "c"] - - # loc - # ############## - # - assign a single value -> exp_single_cats_value - df = orig.copy() - df.loc["j", df.columns[0]] = "b" - tm.assert_frame_equal(df, exp_single_cats_value) - - df = orig.copy() - df.loc[df.index == "j", df.columns[0]] = "b" - tm.assert_frame_equal(df, exp_single_cats_value) - - # - assign a single value not in the current categories set - def f(): - df = orig.copy() - df.loc["j", df.columns[0]] = "c" - - self.assertRaises(ValueError, f) - - # - assign a complete row (mixed values) -> exp_single_row - df = orig.copy() - df.loc["j", :] = ["b", 2] - tm.assert_frame_equal(df, exp_single_row) - - # - assign a complete row (mixed values) not in categories set - def f(): - df = orig.copy() - df.loc["j", :] = ["c", 2] - - self.assertRaises(ValueError, f) - - # - assign multiple rows (mixed values) -> exp_multi_row - df = orig.copy() - df.loc["j":"k", :] = [["b", 2], ["b", 2]] - tm.assert_frame_equal(df, exp_multi_row) - - def f(): - df = orig.copy() - df.loc["j":"k", :] = [["c", 2], ["c", 2]] - - self.assertRaises(ValueError, f) - - # assign a part of a column with dtype == categorical -> - # exp_parts_cats_col - df = orig.copy() - df.loc["j":"k", df.columns[0]] = pd.Categorical( - ["b", "b"], categories=["a", "b"]) - tm.assert_frame_equal(df, exp_parts_cats_col) - - with tm.assertRaises(ValueError): - # different categories -> not sure if this should fail or pass - df = orig.copy() - df.loc["j":"k", df.columns[0]] = pd.Categorical( - ["b", "b"], categories=["a", "b", "c"]) - - with tm.assertRaises(ValueError): - # different values - df = orig.copy() - df.loc["j":"k", df.columns[0]] = pd.Categorical( - ["c", "c"], categories=["a", "b", "c"]) - - # assign a part of a column with dtype != categorical -> - # exp_parts_cats_col - df = orig.copy() - df.loc["j":"k", df.columns[0]] = ["b", "b"] - tm.assert_frame_equal(df, exp_parts_cats_col) - - with tm.assertRaises(ValueError): - df.loc["j":"k", df.columns[0]] = ["c", "c"] - - # iat - df = orig.copy() - df.iat[2, 0] = "b" - tm.assert_frame_equal(df, exp_single_cats_value) - - # - assign a single value not in the current categories set - def f(): - df = orig.copy() - df.iat[2, 0] = "c" - - self.assertRaises(ValueError, f) - - # at - # - assign a single value -> exp_single_cats_value - df = orig.copy() - df.at["j", "cats"] = "b" - tm.assert_frame_equal(df, exp_single_cats_value) - - # - assign a single value not in the current categories set - def f(): - df = orig.copy() - df.at["j", "cats"] = "c" - - self.assertRaises(ValueError, f) - - # fancy indexing - catsf = pd.Categorical(["a", "a", "c", "c", "a", "a", "a"], - categories=["a", "b", "c"]) - idxf = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) - valuesf = [1, 1, 3, 3, 1, 1, 1] - df = pd.DataFrame({"cats": catsf, "values": valuesf}, index=idxf) - - exp_fancy = exp_multi_row.copy() - exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True) - - df[df["cats"] == "c"] = ["b", 2] - # category c is kept in .categories - tm.assert_frame_equal(df, exp_fancy) - - # set_value - df = orig.copy() - df.set_value("j", "cats", "b") - tm.assert_frame_equal(df, exp_single_cats_value) - - def f(): - df = orig.copy() - df.set_value("j", "cats", "c") - - self.assertRaises(ValueError, f) - - # Assigning a Category to parts of a int/... column uses the values of - # the Catgorical - df = pd.DataFrame({"a": [1, 1, 1, 1, 1], - "b": ["a", "a", "a", "a", "a"]}) - exp = pd.DataFrame({"a": [1, "b", "b", 1, 1], - "b": ["a", "a", "b", "b", "a"]}) - df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"]) - df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"]) - tm.assert_frame_equal(df, exp) - - # Series - orig = Series(pd.Categorical(["b", "b"], categories=["a", "b"])) - s = orig.copy() - s[:] = "a" - exp = Series(pd.Categorical(["a", "a"], categories=["a", "b"])) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s[1] = "a" - exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s[s.index > 0] = "a" - exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s[[False, True]] = "a" - exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.index = ["x", "y"] - s["y"] = "a" - exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"]), - index=["x", "y"]) - tm.assert_series_equal(s, exp) - - # ensure that one can set something to np.nan - s = Series(Categorical([1, 2, 3])) - exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3])) - s[1] = np.nan - tm.assert_series_equal(s, exp) - - def test_comparisons(self): - tests_data = [(list("abc"), list("cba"), list("bbb")), - ([1, 2, 3], [3, 2, 1], [2, 2, 2])] - for data, reverse, base in tests_data: - cat_rev = pd.Series(pd.Categorical(data, categories=reverse, - ordered=True)) - cat_rev_base = pd.Series(pd.Categorical(base, categories=reverse, - ordered=True)) - cat = pd.Series(pd.Categorical(data, ordered=True)) - cat_base = pd.Series(pd.Categorical( - base, categories=cat.cat.categories, ordered=True)) - s = Series(base) - a = np.array(base) - - # comparisons need to take categories ordering into account - res_rev = cat_rev > cat_rev_base - exp_rev = Series([True, False, False]) - tm.assert_series_equal(res_rev, exp_rev) - - res_rev = cat_rev < cat_rev_base - exp_rev = Series([False, False, True]) - tm.assert_series_equal(res_rev, exp_rev) - - res = cat > cat_base - exp = Series([False, False, True]) - tm.assert_series_equal(res, exp) - - scalar = base[1] - res = cat > scalar - exp = Series([False, False, True]) - exp2 = cat.values > scalar - tm.assert_series_equal(res, exp) - tm.assert_numpy_array_equal(res.values, exp2) - res_rev = cat_rev > scalar - exp_rev = Series([True, False, False]) - exp_rev2 = cat_rev.values > scalar - tm.assert_series_equal(res_rev, exp_rev) - tm.assert_numpy_array_equal(res_rev.values, exp_rev2) - - # Only categories with same categories can be compared - def f(): - cat > cat_rev - - self.assertRaises(TypeError, f) - - # categorical cannot be compared to Series or numpy array, and also - # not the other way around - self.assertRaises(TypeError, lambda: cat > s) - self.assertRaises(TypeError, lambda: cat_rev > s) - self.assertRaises(TypeError, lambda: cat > a) - self.assertRaises(TypeError, lambda: cat_rev > a) - - self.assertRaises(TypeError, lambda: s < cat) - self.assertRaises(TypeError, lambda: s < cat_rev) - - self.assertRaises(TypeError, lambda: a < cat) - self.assertRaises(TypeError, lambda: a < cat_rev) - - # unequal comparison should raise for unordered cats - cat = Series(Categorical(list("abc"))) - - def f(): - cat > "b" - - self.assertRaises(TypeError, f) - cat = Series(Categorical(list("abc"), ordered=False)) - - def f(): - cat > "b" - - self.assertRaises(TypeError, f) - - # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 - # and following comparisons with scalars not in categories should raise - # for unequal comps, but not for equal/not equal - cat = Series(Categorical(list("abc"), ordered=True)) - - self.assertRaises(TypeError, lambda: cat < "d") - self.assertRaises(TypeError, lambda: cat > "d") - self.assertRaises(TypeError, lambda: "d" < cat) - self.assertRaises(TypeError, lambda: "d" > cat) - - self.assert_series_equal(cat == "d", Series([False, False, False])) - self.assert_series_equal(cat != "d", Series([True, True, True])) - - # And test NaN handling... - cat = Series(Categorical(["a", "b", "c", np.nan])) - exp = Series([True, True, True, False]) - res = (cat == cat) - tm.assert_series_equal(res, exp) - - def test_cat_equality(self): - - # GH 8938 - # allow equality comparisons - a = Series(list('abc'), dtype="category") - b = Series(list('abc'), dtype="object") - c = Series(['a', 'b', 'cc'], dtype="object") - d = Series(list('acb'), dtype="object") - e = Categorical(list('abc')) - f = Categorical(list('acb')) - - # vs scalar - self.assertFalse((a == 'a').all()) - self.assertTrue(((a != 'a') == ~(a == 'a')).all()) - - self.assertFalse(('a' == a).all()) - self.assertTrue((a == 'a')[0]) - self.assertTrue(('a' == a)[0]) - self.assertFalse(('a' != a)[0]) - - # vs list-like - self.assertTrue((a == a).all()) - self.assertFalse((a != a).all()) - - self.assertTrue((a == list(a)).all()) - self.assertTrue((a == b).all()) - self.assertTrue((b == a).all()) - self.assertTrue(((~(a == b)) == (a != b)).all()) - self.assertTrue(((~(b == a)) == (b != a)).all()) - - self.assertFalse((a == c).all()) - self.assertFalse((c == a).all()) - self.assertFalse((a == d).all()) - self.assertFalse((d == a).all()) - - # vs a cat-like - self.assertTrue((a == e).all()) - self.assertTrue((e == a).all()) - self.assertFalse((a == f).all()) - self.assertFalse((f == a).all()) - - self.assertTrue(((~(a == e) == (a != e)).all())) - self.assertTrue(((~(e == a) == (e != a)).all())) - self.assertTrue(((~(a == f) == (a != f)).all())) - self.assertTrue(((~(f == a) == (f != a)).all())) - - # non-equality is not comparable - self.assertRaises(TypeError, lambda: a < b) - self.assertRaises(TypeError, lambda: b < a) - self.assertRaises(TypeError, lambda: a > b) - self.assertRaises(TypeError, lambda: b > a) - - def test_concat_append(self): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - vals = [1, 2] - df = pd.DataFrame({"cats": cat, "vals": vals}) - cat2 = pd.Categorical(["a", "b", "a", "b"], categories=["a", "b"]) - vals2 = [1, 2, 1, 2] - exp = pd.DataFrame({"cats": cat2, - "vals": vals2}, index=pd.Index([0, 1, 0, 1])) - - tm.assert_frame_equal(pd.concat([df, df]), exp) - tm.assert_frame_equal(df.append(df), exp) - - # GH 13524 can concat different categories - cat3 = pd.Categorical(["a", "b"], categories=["a", "b", "c"]) - vals3 = [1, 2] - df_different_categories = pd.DataFrame({"cats": cat3, "vals": vals3}) - - res = pd.concat([df, df_different_categories], ignore_index=True) - exp = pd.DataFrame({"cats": list('abab'), "vals": [1, 2, 1, 2]}) - tm.assert_frame_equal(res, exp) - - res = df.append(df_different_categories, ignore_index=True) - tm.assert_frame_equal(res, exp) - - def test_concat_append_gh7864(self): - # GH 7864 - # make sure ordering is preserverd - df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], - "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) - df["grade"] = pd.Categorical(df["raw_grade"]) - df['grade'].cat.set_categories(['e', 'a', 'b']) - - df1 = df[0:3] - df2 = df[3:] - - self.assert_index_equal(df['grade'].cat.categories, - df1['grade'].cat.categories) - self.assert_index_equal(df['grade'].cat.categories, - df2['grade'].cat.categories) - - dfx = pd.concat([df1, df2]) - self.assert_index_equal(df['grade'].cat.categories, - dfx['grade'].cat.categories) - - dfa = df1.append(df2) - self.assert_index_equal(df['grade'].cat.categories, - dfa['grade'].cat.categories) - - def test_concat_preserve(self): - - # GH 8641 series concat not preserving category dtype - # GH 13524 can concat different categories - s = Series(list('abc'), dtype='category') - s2 = Series(list('abd'), dtype='category') - - exp = Series(list('abcabd')) - res = pd.concat([s, s2], ignore_index=True) - tm.assert_series_equal(res, exp) - - exp = Series(list('abcabc'), dtype='category') - res = pd.concat([s, s], ignore_index=True) - tm.assert_series_equal(res, exp) - - exp = Series(list('abcabc'), index=[0, 1, 2, 0, 1, 2], - dtype='category') - res = pd.concat([s, s]) - tm.assert_series_equal(res, exp) - - a = Series(np.arange(6, dtype='int64')) - b = Series(list('aabbca')) - - df2 = DataFrame({'A': a, - 'B': b.astype('category', categories=list('cab'))}) - res = pd.concat([df2, df2]) - exp = DataFrame({'A': pd.concat([a, a]), - 'B': pd.concat([b, b]).astype( - 'category', categories=list('cab'))}) - tm.assert_frame_equal(res, exp) - - def test_categorical_index_preserver(self): - - a = Series(np.arange(6, dtype='int64')) - b = Series(list('aabbca')) - - df2 = DataFrame({'A': a, - 'B': b.astype('category', categories=list('cab')) - }).set_index('B') - result = pd.concat([df2, df2]) - expected = DataFrame({'A': pd.concat([a, a]), - 'B': pd.concat([b, b]).astype( - 'category', categories=list('cab')) - }).set_index('B') - tm.assert_frame_equal(result, expected) - - # wrong catgories - df3 = DataFrame({'A': a, - 'B': pd.Categorical(b, categories=list('abc')) - }).set_index('B') - self.assertRaises(TypeError, lambda: pd.concat([df2, df3])) - - def test_merge(self): - # GH 9426 - - right = DataFrame({'c': {0: 'a', - 1: 'b', - 2: 'c', - 3: 'd', - 4: 'e'}, - 'd': {0: 'null', - 1: 'null', - 2: 'null', - 3: 'null', - 4: 'null'}}) - left = DataFrame({'a': {0: 'f', - 1: 'f', - 2: 'f', - 3: 'f', - 4: 'f'}, - 'b': {0: 'g', - 1: 'g', - 2: 'g', - 3: 'g', - 4: 'g'}}) - df = pd.merge(left, right, how='left', left_on='b', right_on='c') - - # object-object - expected = df.copy() - - # object-cat - cright = right.copy() - cright['d'] = cright['d'].astype('category') - result = pd.merge(left, cright, how='left', left_on='b', right_on='c') - tm.assert_frame_equal(result, expected) - - # cat-object - cleft = left.copy() - cleft['b'] = cleft['b'].astype('category') - result = pd.merge(cleft, cright, how='left', left_on='b', right_on='c') - tm.assert_frame_equal(result, expected) - - # cat-cat - cright = right.copy() - cright['d'] = cright['d'].astype('category') - cleft = left.copy() - cleft['b'] = cleft['b'].astype('category') - result = pd.merge(cleft, cright, how='left', left_on='b', right_on='c') - tm.assert_frame_equal(result, expected) - - def test_repeat(self): - # GH10183 - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - exp = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b"]) - res = cat.repeat(2) - self.assert_categorical_equal(res, exp) - - def test_numpy_repeat(self): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - exp = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b"]) - self.assert_categorical_equal(np.repeat(cat, 2), exp) - - msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, cat, 2, axis=1) - - def test_reshape(self): - cat = pd.Categorical([], categories=["a", "b"]) - tm.assert_produces_warning(FutureWarning, cat.reshape, 0) - - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([], categories=["a", "b"]) - self.assert_categorical_equal(cat.reshape(0), cat) - - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([], categories=["a", "b"]) - self.assert_categorical_equal(cat.reshape((5, -1)), cat) - - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - self.assert_categorical_equal(cat.reshape(cat.shape), cat) - - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - self.assert_categorical_equal(cat.reshape(cat.size), cat) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - msg = "can only specify one unknown dimension" - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - tm.assertRaisesRegexp(ValueError, msg, cat.reshape, (-2, -1)) - - def test_numpy_reshape(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - self.assert_categorical_equal(np.reshape(cat, cat.shape), cat) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.reshape, - cat, cat.shape, order='F') - - def test_na_actions(self): - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - vals = ["a", "b", np.nan, "d"] - df = pd.DataFrame({"cats": cat, "vals": vals}) - cat2 = pd.Categorical([1, 2, 3, 3], categories=[1, 2, 3]) - vals2 = ["a", "b", "b", "d"] - df_exp_fill = pd.DataFrame({"cats": cat2, "vals": vals2}) - cat3 = pd.Categorical([1, 2, 3], categories=[1, 2, 3]) - vals3 = ["a", "b", np.nan] - df_exp_drop_cats = pd.DataFrame({"cats": cat3, "vals": vals3}) - cat4 = pd.Categorical([1, 2], categories=[1, 2, 3]) - vals4 = ["a", "b"] - df_exp_drop_all = pd.DataFrame({"cats": cat4, "vals": vals4}) - - # fillna - res = df.fillna(value={"cats": 3, "vals": "b"}) - tm.assert_frame_equal(res, df_exp_fill) - - def f(): - df.fillna(value={"cats": 4, "vals": "c"}) - - self.assertRaises(ValueError, f) - - res = df.fillna(method='pad') - tm.assert_frame_equal(res, df_exp_fill) - - res = df.dropna(subset=["cats"]) - tm.assert_frame_equal(res, df_exp_drop_cats) - - res = df.dropna() - tm.assert_frame_equal(res, df_exp_drop_all) - - # make sure that fillna takes missing values into account - c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) - df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]}) - - cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) - df_exp = pd.DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) - - res = df.fillna("a") - tm.assert_frame_equal(res, df_exp) - - # GH 14021 - # np.nan should always be a is a valid filler - cat = Categorical([np.nan, 2, np.nan]) - val = Categorical([np.nan, np.nan, np.nan]) - df = DataFrame({"cats": cat, "vals": val}) - res = df.fillna(df.median()) - v_exp = [np.nan, np.nan, np.nan] - df_exp = pd.DataFrame({"cats": [2, 2, 2], "vals": v_exp}, - dtype='category') - tm.assert_frame_equal(res, df_exp) - - result = df.cats.fillna(np.nan) - tm.assert_series_equal(result, df.cats) - result = df.vals.fillna(np.nan) - tm.assert_series_equal(result, df.vals) - - idx = pd.DatetimeIndex(['2011-01-01 09:00', '2016-01-01 23:45', - '2011-01-01 09:00', pd.NaT, pd.NaT]) - df = DataFrame({'a': pd.Categorical(idx)}) - tm.assert_frame_equal(df.fillna(value=pd.NaT), df) - - idx = pd.PeriodIndex(['2011-01', '2011-01', '2011-01', - pd.NaT, pd.NaT], freq='M') - df = DataFrame({'a': pd.Categorical(idx)}) - tm.assert_frame_equal(df.fillna(value=pd.NaT), df) - - idx = pd.TimedeltaIndex(['1 days', '2 days', - '1 days', pd.NaT, pd.NaT]) - df = pd.DataFrame({'a': pd.Categorical(idx)}) - tm.assert_frame_equal(df.fillna(value=pd.NaT), df) - - def test_astype_to_other(self): - - s = self.cat['value_group'] - expected = s - tm.assert_series_equal(s.astype('category'), expected) - tm.assert_series_equal(s.astype(CategoricalDtype()), expected) - self.assertRaises(ValueError, lambda: s.astype('float64')) - - cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])) - exp = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) - tm.assert_series_equal(cat.astype('str'), exp) - s2 = Series(Categorical(['1', '2', '3', '4'])) - exp2 = Series([1, 2, 3, 4]).astype(int) - tm.assert_series_equal(s2.astype('int'), exp2) - - # object don't sort correctly, so just compare that we have the same - # values - def cmp(a, b): - tm.assert_almost_equal( - np.sort(np.unique(a)), np.sort(np.unique(b))) - - expected = Series(np.array(s.values), name='value_group') - cmp(s.astype('object'), expected) - cmp(s.astype(np.object_), expected) - - # array conversion - tm.assert_almost_equal(np.array(s), np.array(s.values)) - - # valid conversion - for valid in [lambda x: x.astype('category'), - lambda x: x.astype(CategoricalDtype()), - lambda x: x.astype('object').astype('category'), - lambda x: x.astype('object').astype( - CategoricalDtype()) - ]: - - result = valid(s) - # compare series values - # internal .categories can't be compared because it is sorted - tm.assert_series_equal(result, s, check_categorical=False) - - # invalid conversion (these are NOT a dtype) - for invalid in [lambda x: x.astype(pd.Categorical), - lambda x: x.astype('object').astype(pd.Categorical)]: - self.assertRaises(TypeError, lambda: invalid(s)) - - def test_astype_categorical(self): - - cat = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) - tm.assert_categorical_equal(cat, cat.astype('category')) - tm.assert_almost_equal(np.array(cat), cat.astype('object')) - - self.assertRaises(ValueError, lambda: cat.astype(float)) - - def test_to_records(self): - - # GH8626 - - # dict creation - df = DataFrame({'A': list('abc')}, dtype='category') - expected = Series(list('abc'), dtype='category', name='A') - tm.assert_series_equal(df['A'], expected) - - # list-like creation - df = DataFrame(list('abc'), dtype='category') - expected = Series(list('abc'), dtype='category', name=0) - tm.assert_series_equal(df[0], expected) - - # to record array - # this coerces - result = df.to_records() - expected = np.rec.array([(0, 'a'), (1, 'b'), (2, 'c')], - dtype=[('index', '=i8'), ('0', 'O')]) - tm.assert_almost_equal(result, expected) - - def test_numeric_like_ops(self): - - # numeric ops should not succeed - for op in ['__add__', '__sub__', '__mul__', '__truediv__']: - self.assertRaises(TypeError, - lambda: getattr(self.cat, op)(self.cat)) - - # reduction ops should not succeed (unless specifically defined, e.g. - # min/max) - s = self.cat['value_group'] - for op in ['kurt', 'skew', 'var', 'std', 'mean', 'sum', 'median']: - self.assertRaises(TypeError, - lambda: getattr(s, op)(numeric_only=False)) - - # mad technically works because it takes always the numeric data - - # numpy ops - s = pd.Series(pd.Categorical([1, 2, 3, 4])) - self.assertRaises(TypeError, lambda: np.sum(s)) - - # numeric ops on a Series - for op in ['__add__', '__sub__', '__mul__', '__truediv__']: - self.assertRaises(TypeError, lambda: getattr(s, op)(2)) - - # invalid ufunc - self.assertRaises(TypeError, lambda: np.log(s)) - - def test_cat_tab_completition(self): - # test the tab completion display - ok_for_cat = ['categories', 'codes', 'ordered', 'set_categories', - 'add_categories', 'remove_categories', - 'rename_categories', 'reorder_categories', - 'remove_unused_categories', 'as_ordered', 'as_unordered'] - - def get_dir(s): - results = [r for r in s.cat.__dir__() if not r.startswith('_')] - return list(sorted(set(results))) - - s = Series(list('aabbcde')).astype('category') - results = get_dir(s) - tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) - - def test_cat_accessor_api(self): - # GH 9322 - from pandas.core.categorical import CategoricalAccessor - self.assertIs(Series.cat, CategoricalAccessor) - s = Series(list('aabbcde')).astype('category') - self.assertIsInstance(s.cat, CategoricalAccessor) - - invalid = Series([1]) - with tm.assertRaisesRegexp(AttributeError, "only use .cat accessor"): - invalid.cat - self.assertFalse(hasattr(invalid, 'cat')) - - def test_cat_accessor_no_new_attributes(self): - # https://github.com/pandas-dev/pandas/issues/10673 - c = Series(list('aabbcde')).astype('category') - with tm.assertRaisesRegexp(AttributeError, - "You cannot add any new attribute"): - c.cat.xlabel = "a" - - def test_str_accessor_api_for_categorical(self): - # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.core.strings import StringMethods - s = Series(list('aabb')) - s = s + " " + s - c = s.astype('category') - self.assertIsInstance(c.str, StringMethods) - - # str functions, which need special arguments - special_func_defs = [ - ('cat', (list("zyxw"),), {"sep": ","}), - ('center', (10,), {}), - ('contains', ("a",), {}), - ('count', ("a",), {}), - ('decode', ("UTF-8",), {}), - ('encode', ("UTF-8",), {}), - ('endswith', ("a",), {}), - ('extract', ("([a-z]*) ",), {"expand": False}), - ('extract', ("([a-z]*) ",), {"expand": True}), - ('extractall', ("([a-z]*) ",), {}), - ('find', ("a",), {}), - ('findall', ("a",), {}), - ('index', (" ",), {}), - ('ljust', (10,), {}), - ('match', ("a"), {}), # deprecated... - ('normalize', ("NFC",), {}), - ('pad', (10,), {}), - ('partition', (" ",), {"expand": False}), # not default - ('partition', (" ",), {"expand": True}), # default - ('repeat', (3,), {}), - ('replace', ("a", "z"), {}), - ('rfind', ("a",), {}), - ('rindex', (" ",), {}), - ('rjust', (10,), {}), - ('rpartition', (" ",), {"expand": False}), # not default - ('rpartition', (" ",), {"expand": True}), # default - ('slice', (0, 1), {}), - ('slice_replace', (0, 1, "z"), {}), - ('split', (" ",), {"expand": False}), # default - ('split', (" ",), {"expand": True}), # not default - ('startswith', ("a",), {}), - ('wrap', (2,), {}), - ('zfill', (10,), {}) - ] - _special_func_names = [f[0] for f in special_func_defs] - - # * get, join: they need a individual elements of type lists, but - # we can't make a categorical with lists as individual categories. - # -> `s.str.split(" ").astype("category")` will error! - # * `translate` has different interfaces for py2 vs. py3 - _ignore_names = ["get", "join", "translate"] - - str_func_names = [f - for f in dir(s.str) - if not (f.startswith("_") or f in _special_func_names - or f in _ignore_names)] - - func_defs = [(f, (), {}) for f in str_func_names] - func_defs.extend(special_func_defs) - - for func, args, kwargs in func_defs: - res = getattr(c.str, func)(*args, **kwargs) - exp = getattr(s.str, func)(*args, **kwargs) - - if isinstance(res, pd.DataFrame): - tm.assert_frame_equal(res, exp) - else: - tm.assert_series_equal(res, exp) - - invalid = Series([1, 2, 3]).astype('category') - with tm.assertRaisesRegexp(AttributeError, - "Can only use .str accessor with string"): - invalid.str - self.assertFalse(hasattr(invalid, 'str')) - - def test_dt_accessor_api_for_categorical(self): - # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.tseries.common import Properties - from pandas.tseries.index import date_range, DatetimeIndex - from pandas.tseries.period import period_range, PeriodIndex - from pandas.tseries.tdi import timedelta_range, TimedeltaIndex - - s_dr = Series(date_range('1/1/2015', periods=5, tz="MET")) - c_dr = s_dr.astype("category") - - s_pr = Series(period_range('1/1/2015', freq='D', periods=5)) - c_pr = s_pr.astype("category") - - s_tdr = Series(timedelta_range('1 days', '10 days')) - c_tdr = s_tdr.astype("category") - - test_data = [ - ("Datetime", DatetimeIndex._datetimelike_ops, s_dr, c_dr), - ("Period", PeriodIndex._datetimelike_ops, s_pr, c_pr), - ("Timedelta", TimedeltaIndex._datetimelike_ops, s_tdr, c_tdr)] - - self.assertIsInstance(c_dr.dt, Properties) - - special_func_defs = [ - ('strftime', ("%Y-%m-%d",), {}), - ('tz_convert', ("EST",), {}), - ('round', ("D",), {}), - ('floor', ("D",), {}), - ('ceil', ("D",), {}), - # ('tz_localize', ("UTC",), {}), - ] - _special_func_names = [f[0] for f in special_func_defs] - - # the series is already localized - _ignore_names = ['tz_localize'] - - for name, attr_names, s, c in test_data: - func_names = [f - for f in dir(s.dt) - if not (f.startswith("_") or f in attr_names or f in - _special_func_names or f in _ignore_names)] - - func_defs = [(f, (), {}) for f in func_names] - for f_def in special_func_defs: - if f_def[0] in dir(s.dt): - func_defs.append(f_def) - - for func, args, kwargs in func_defs: - res = getattr(c.dt, func)(*args, **kwargs) - exp = getattr(s.dt, func)(*args, **kwargs) - - if isinstance(res, pd.DataFrame): - tm.assert_frame_equal(res, exp) - elif isinstance(res, pd.Series): - tm.assert_series_equal(res, exp) - else: - tm.assert_numpy_array_equal(res, exp) - - for attr in attr_names: - try: - res = getattr(c.dt, attr) - exp = getattr(s.dt, attr) - except Exception as e: - print(name, attr) - raise e - - if isinstance(res, pd.DataFrame): - tm.assert_frame_equal(res, exp) - elif isinstance(res, pd.Series): - tm.assert_series_equal(res, exp) - else: - tm.assert_numpy_array_equal(res, exp) - - invalid = Series([1, 2, 3]).astype('category') - with tm.assertRaisesRegexp( - AttributeError, "Can only use .dt accessor with datetimelike"): - invalid.dt - self.assertFalse(hasattr(invalid, 'str')) - - def test_concat_categorical(self): - # See GH 10177 - df1 = pd.DataFrame(np.arange(18, dtype='int64').reshape(6, 3), - columns=["a", "b", "c"]) - - df2 = pd.DataFrame(np.arange(14, dtype='int64').reshape(7, 2), - columns=["a", "c"]) - - cat_values = ["one", "one", "two", "one", "two", "two", "one"] - df2['h'] = pd.Series(pd.Categorical(cat_values)) - - res = pd.concat((df1, df2), axis=0, ignore_index=True) - exp = pd.DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], - 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, - np.nan, np.nan, np.nan, np.nan, np.nan], - 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13], - 'h': [None] * 6 + cat_values}) - tm.assert_frame_equal(res, exp) - - -class TestCategoricalSubclassing(tm.TestCase): - - def test_constructor(self): - sc = tm.SubclassedCategorical(['a', 'b', 'c']) - self.assertIsInstance(sc, tm.SubclassedCategorical) - tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c'])) - - def test_from_array(self): - sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c']) - self.assertIsInstance(sc, tm.SubclassedCategorical) - exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c']) - tm.assert_categorical_equal(sc, exp) - - def test_map(self): - sc = tm.SubclassedCategorical(['a', 'b', 'c']) - res = sc.map(lambda x: x.upper()) - self.assertIsInstance(res, tm.SubclassedCategorical) - exp = Categorical(['A', 'B', 'C']) - tm.assert_categorical_equal(res, exp) - - def test_map(self): - sc = tm.SubclassedCategorical(['a', 'b', 'c']) - res = sc.map(lambda x: x.upper()) - self.assertIsInstance(res, tm.SubclassedCategorical) - exp = Categorical(['A', 'B', 'C']) - tm.assert_categorical_equal(res, exp) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 90b1157572be1..0b329f64dafa3 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,19 +1,25 @@ # -*- coding: utf-8 -*- +import pytest +import collections +from functools import partial + import numpy as np from pandas import Series, Timestamp from pandas.compat import range, lmap import pandas.core.common as com +from pandas.core import ops import pandas.util.testing as tm def test_mut_exclusive(): msg = "mutually exclusive arguments: '[ab]' and '[ab]'" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): com._mut_exclusive(a=1, b=2) assert com._mut_exclusive(a=1, b=None) == 1 assert com._mut_exclusive(major=None, major_axis=None) is None + assert com._mut_exclusive(a=None, b=2) == 2 def test_get_callable_name(): @@ -142,46 +148,46 @@ def test_random_state(): import numpy.random as npr # Check with seed state = com._random_state(5) - tm.assert_equal(state.uniform(), npr.RandomState(5).uniform()) + assert state.uniform() == npr.RandomState(5).uniform() # Check with random state object state2 = npr.RandomState(10) - tm.assert_equal( - com._random_state(state2).uniform(), npr.RandomState(10).uniform()) + assert (com._random_state(state2).uniform() == + npr.RandomState(10).uniform()) # check with no arg random state assert com._random_state() is np.random # Error for floats or strings - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): com._random_state('test') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): com._random_state(5.5) def test_maybe_match_name(): - matched = com._maybe_match_name( + matched = ops._maybe_match_name( Series([1], name='x'), Series( [2], name='x')) assert (matched == 'x') - matched = com._maybe_match_name( + matched = ops._maybe_match_name( Series([1], name='x'), Series( [2], name='y')) assert (matched is None) - matched = com._maybe_match_name(Series([1]), Series([2], name='x')) + matched = ops._maybe_match_name(Series([1]), Series([2], name='x')) assert (matched is None) - matched = com._maybe_match_name(Series([1], name='x'), Series([2])) + matched = ops._maybe_match_name(Series([1], name='x'), Series([2])) assert (matched is None) - matched = com._maybe_match_name(Series([1], name='x'), [2]) + matched = ops._maybe_match_name(Series([1], name='x'), [2]) assert (matched == 'x') - matched = com._maybe_match_name([1], Series([2], name='y')) + matched = ops._maybe_match_name([1], Series([2], name='y')) assert (matched == 'y') @@ -193,3 +199,26 @@ def test_dict_compat(): assert (com._dict_compat(data_datetime64) == expected) assert (com._dict_compat(expected) == expected) assert (com._dict_compat(data_unchanged) == data_unchanged) + + +def test_standardize_mapping(): + # No uninitialized defaultdicts + with pytest.raises(TypeError): + com.standardize_mapping(collections.defaultdict) + + # No non-mapping subtypes, instance + with pytest.raises(TypeError): + com.standardize_mapping([]) + + # No non-mapping subtypes, class + with pytest.raises(TypeError): + com.standardize_mapping(list) + + fill = {'bad': 'data'} + assert (com.standardize_mapping(fill) == dict) + + # Convert instance to type + assert (com.standardize_mapping({}) == dict) + + dd = collections.defaultdict(list) + assert isinstance(com.standardize_mapping(dd), partial) diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py index 68c0b81eb18ce..ead9ba1e26e2d 100644 --- a/pandas/tests/test_compat.py +++ b/pandas/tests/test_compat.py @@ -3,24 +3,27 @@ Testing that functions from compat work as expected """ +import pytest from pandas.compat import (range, zip, map, filter, lrange, lzip, lmap, lfilter, builtins, iterkeys, itervalues, iteritems, - next) -import pandas.util.testing as tm + next, get_range_parameters, PY2) -class TestBuiltinIterators(tm.TestCase): +class TestBuiltinIterators(object): - def check_result(self, actual, expected, lengths): + @classmethod + def check_result(cls, actual, expected, lengths): for (iter_res, list_res), exp, length in zip(actual, expected, lengths): - self.assertNotIsInstance(iter_res, list) - tm.assertIsInstance(list_res, list) + assert not isinstance(iter_res, list) + assert isinstance(list_res, list) + iter_res = list(iter_res) - self.assertEqual(len(list_res), length) - self.assertEqual(len(iter_res), length) - self.assertEqual(iter_res, exp) - self.assertEqual(list_res, exp) + + assert len(list_res) == length + assert len(iter_res) == length + assert iter_res == exp + assert list_res == exp def test_range(self): actual1 = range(10) @@ -64,6 +67,25 @@ def test_zip(self): self.check_result(actual, expected, lengths) def test_dict_iterators(self): - self.assertEqual(next(itervalues({1: 2})), 2) - self.assertEqual(next(iterkeys({1: 2})), 1) - self.assertEqual(next(iteritems({1: 2})), (1, 2)) + assert next(itervalues({1: 2})) == 2 + assert next(iterkeys({1: 2})) == 1 + assert next(iteritems({1: 2})) == (1, 2) + + +class TestCompatFunctions(object): + + @pytest.mark.parametrize( + 'start,stop,step', [(0, 10, 2), (11, -2, -1), (0, -5, 1), (2, 4, 8)]) + def test_get_range_parameters(self, start, stop, step): + rng = range(start, stop, step) + if PY2 and len(rng) == 0: + start_expected, stop_expected, step_expected = 0, 0, 1 + elif PY2 and len(rng) == 1: + start_expected, stop_expected, step_expected = start, start + 1, 1 + else: + start_expected, stop_expected, step_expected = start, stop, step + + start_result, stop_result, step_result = get_range_parameters(rng) + assert start_result == start_expected + assert stop_result == stop_expected + assert step_result == step_expected diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index c58aada193b15..91ce65dcce9b2 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -1,28 +1,36 @@ # -*- coding: utf-8 -*- +import pytest + import pandas as pd -import unittest -import warnings +import warnings -class TestConfig(unittest.TestCase): - def __init__(self, *args): - super(TestConfig, self).__init__(*args) +class TestConfig(object): + @classmethod + def setup_class(cls): from copy import deepcopy - self.cf = pd.core.config - self.gc = deepcopy(getattr(self.cf, '_global_config')) - self.do = deepcopy(getattr(self.cf, '_deprecated_options')) - self.ro = deepcopy(getattr(self.cf, '_registered_options')) - def setUp(self): + cls.cf = pd.core.config + cls.gc = deepcopy(getattr(cls.cf, '_global_config')) + cls.do = deepcopy(getattr(cls.cf, '_deprecated_options')) + cls.ro = deepcopy(getattr(cls.cf, '_registered_options')) + + def setup_method(self, method): setattr(self.cf, '_global_config', {}) - setattr( - self.cf, 'options', self.cf.DictWrapper(self.cf._global_config)) + setattr(self.cf, 'options', self.cf.DictWrapper( + self.cf._global_config)) setattr(self.cf, '_deprecated_options', {}) setattr(self.cf, '_registered_options', {}) - def tearDown(self): + # Our test fixture in conftest.py sets "chained_assignment" + # to "raise" only after all test methods have been setup. + # However, after this setup, there is no longer any + # "chained_assignment" option, so re-register it. + self.cf.register_option('chained_assignment', 'raise') + + def teardown_method(self, method): setattr(self.cf, '_global_config', self.gc) setattr(self.cf, '_deprecated_options', self.do) setattr(self.cf, '_registered_options', self.ro) @@ -30,36 +38,36 @@ def tearDown(self): def test_api(self): # the pandas object exposes the user API - self.assertTrue(hasattr(pd, 'get_option')) - self.assertTrue(hasattr(pd, 'set_option')) - self.assertTrue(hasattr(pd, 'reset_option')) - self.assertTrue(hasattr(pd, 'describe_option')) + assert hasattr(pd, 'get_option') + assert hasattr(pd, 'set_option') + assert hasattr(pd, 'reset_option') + assert hasattr(pd, 'describe_option') def test_is_one_of_factory(self): v = self.cf.is_one_of_factory([None, 12]) v(12) v(None) - self.assertRaises(ValueError, v, 1.1) + pytest.raises(ValueError, v, 1.1) def test_register_option(self): self.cf.register_option('a', 1, 'doc') # can't register an already registered option - self.assertRaises(KeyError, self.cf.register_option, 'a', 1, 'doc') + pytest.raises(KeyError, self.cf.register_option, 'a', 1, 'doc') # can't register an already registered option - self.assertRaises(KeyError, self.cf.register_option, 'a.b.c.d1', 1, - 'doc') - self.assertRaises(KeyError, self.cf.register_option, 'a.b.c.d2', 1, - 'doc') + pytest.raises(KeyError, self.cf.register_option, 'a.b.c.d1', 1, + 'doc') + pytest.raises(KeyError, self.cf.register_option, 'a.b.c.d2', 1, + 'doc') # no python keywords - self.assertRaises(ValueError, self.cf.register_option, 'for', 0) - self.assertRaises(ValueError, self.cf.register_option, 'a.for.b', 0) + pytest.raises(ValueError, self.cf.register_option, 'for', 0) + pytest.raises(ValueError, self.cf.register_option, 'a.for.b', 0) # must be valid identifier (ensure attribute access works) - self.assertRaises(ValueError, self.cf.register_option, - 'Oh my Goddess!', 0) + pytest.raises(ValueError, self.cf.register_option, + 'Oh my Goddess!', 0) # we can register options several levels deep # without predefining the intermediate steps @@ -82,56 +90,42 @@ def test_describe_option(self): self.cf.register_option('l', "foo") # non-existent keys raise KeyError - self.assertRaises(KeyError, self.cf.describe_option, 'no.such.key') + pytest.raises(KeyError, self.cf.describe_option, 'no.such.key') # we can get the description for any key we registered - self.assertTrue( - 'doc' in self.cf.describe_option('a', _print_desc=False)) - self.assertTrue( - 'doc2' in self.cf.describe_option('b', _print_desc=False)) - self.assertTrue( - 'precated' in self.cf.describe_option('b', _print_desc=False)) - - self.assertTrue( - 'doc3' in self.cf.describe_option('c.d.e1', _print_desc=False)) - self.assertTrue( - 'doc4' in self.cf.describe_option('c.d.e2', _print_desc=False)) + assert 'doc' in self.cf.describe_option('a', _print_desc=False) + assert 'doc2' in self.cf.describe_option('b', _print_desc=False) + assert 'precated' in self.cf.describe_option('b', _print_desc=False) + assert 'doc3' in self.cf.describe_option('c.d.e1', _print_desc=False) + assert 'doc4' in self.cf.describe_option('c.d.e2', _print_desc=False) # if no doc is specified we get a default message # saying "description not available" - self.assertTrue( - 'vailable' in self.cf.describe_option('f', _print_desc=False)) - self.assertTrue( - 'vailable' in self.cf.describe_option('g.h', _print_desc=False)) - self.assertTrue( - 'precated' in self.cf.describe_option('g.h', _print_desc=False)) - self.assertTrue( - 'k' in self.cf.describe_option('g.h', _print_desc=False)) + assert 'vailable' in self.cf.describe_option('f', _print_desc=False) + assert 'vailable' in self.cf.describe_option('g.h', _print_desc=False) + assert 'precated' in self.cf.describe_option('g.h', _print_desc=False) + assert 'k' in self.cf.describe_option('g.h', _print_desc=False) # default is reported - self.assertTrue( - 'foo' in self.cf.describe_option('l', _print_desc=False)) + assert 'foo' in self.cf.describe_option('l', _print_desc=False) # current value is reported - self.assertFalse( - 'bar' in self.cf.describe_option('l', _print_desc=False)) + assert 'bar' not in self.cf.describe_option('l', _print_desc=False) self.cf.set_option("l", "bar") - self.assertTrue( - 'bar' in self.cf.describe_option('l', _print_desc=False)) + assert 'bar' in self.cf.describe_option('l', _print_desc=False) def test_case_insensitive(self): self.cf.register_option('KanBAN', 1, 'doc') - self.assertTrue( - 'doc' in self.cf.describe_option('kanbaN', _print_desc=False)) - self.assertEqual(self.cf.get_option('kanBaN'), 1) + assert 'doc' in self.cf.describe_option('kanbaN', _print_desc=False) + assert self.cf.get_option('kanBaN') == 1 self.cf.set_option('KanBan', 2) - self.assertEqual(self.cf.get_option('kAnBaN'), 2) + assert self.cf.get_option('kAnBaN') == 2 # gets of non-existent keys fail - self.assertRaises(KeyError, self.cf.get_option, 'no_such_option') + pytest.raises(KeyError, self.cf.get_option, 'no_such_option') self.cf.deprecate_option('KanBan') - self.assertTrue(self.cf._is_deprecated('kAnBaN')) + assert self.cf._is_deprecated('kAnBaN') def test_get_option(self): self.cf.register_option('a', 1, 'doc') @@ -139,118 +133,118 @@ def test_get_option(self): self.cf.register_option('b.b', None, 'doc2') # gets of existing keys succeed - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') - self.assertTrue(self.cf.get_option('b.b') is None) + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' + assert self.cf.get_option('b.b') is None # gets of non-existent keys fail - self.assertRaises(KeyError, self.cf.get_option, 'no_such_option') + pytest.raises(KeyError, self.cf.get_option, 'no_such_option') def test_set_option(self): self.cf.register_option('a', 1, 'doc') self.cf.register_option('b.c', 'hullo', 'doc2') self.cf.register_option('b.b', None, 'doc2') - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') - self.assertTrue(self.cf.get_option('b.b') is None) + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' + assert self.cf.get_option('b.b') is None self.cf.set_option('a', 2) self.cf.set_option('b.c', 'wurld') self.cf.set_option('b.b', 1.1) - self.assertEqual(self.cf.get_option('a'), 2) - self.assertEqual(self.cf.get_option('b.c'), 'wurld') - self.assertEqual(self.cf.get_option('b.b'), 1.1) + assert self.cf.get_option('a') == 2 + assert self.cf.get_option('b.c') == 'wurld' + assert self.cf.get_option('b.b') == 1.1 - self.assertRaises(KeyError, self.cf.set_option, 'no.such.key', None) + pytest.raises(KeyError, self.cf.set_option, 'no.such.key', None) def test_set_option_empty_args(self): - self.assertRaises(ValueError, self.cf.set_option) + pytest.raises(ValueError, self.cf.set_option) def test_set_option_uneven_args(self): - self.assertRaises(ValueError, self.cf.set_option, 'a.b', 2, 'b.c') + pytest.raises(ValueError, self.cf.set_option, 'a.b', 2, 'b.c') def test_set_option_invalid_single_argument_type(self): - self.assertRaises(ValueError, self.cf.set_option, 2) + pytest.raises(ValueError, self.cf.set_option, 2) def test_set_option_multiple(self): self.cf.register_option('a', 1, 'doc') self.cf.register_option('b.c', 'hullo', 'doc2') self.cf.register_option('b.b', None, 'doc2') - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') - self.assertTrue(self.cf.get_option('b.b') is None) + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' + assert self.cf.get_option('b.b') is None self.cf.set_option('a', '2', 'b.c', None, 'b.b', 10.0) - self.assertEqual(self.cf.get_option('a'), '2') - self.assertTrue(self.cf.get_option('b.c') is None) - self.assertEqual(self.cf.get_option('b.b'), 10.0) + assert self.cf.get_option('a') == '2' + assert self.cf.get_option('b.c') is None + assert self.cf.get_option('b.b') == 10.0 def test_validation(self): self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2', validator=self.cf.is_text) - self.assertRaises(ValueError, self.cf.register_option, 'a.b.c.d2', - 'NO', 'doc', validator=self.cf.is_int) + pytest.raises(ValueError, self.cf.register_option, 'a.b.c.d2', + 'NO', 'doc', validator=self.cf.is_int) self.cf.set_option('a', 2) # int is_int self.cf.set_option('b.c', 'wurld') # str is_str - self.assertRaises( + pytest.raises( ValueError, self.cf.set_option, 'a', None) # None not is_int - self.assertRaises(ValueError, self.cf.set_option, 'a', 'ab') - self.assertRaises(ValueError, self.cf.set_option, 'b.c', 1) + pytest.raises(ValueError, self.cf.set_option, 'a', 'ab') + pytest.raises(ValueError, self.cf.set_option, 'b.c', 1) validator = self.cf.is_one_of_factory([None, self.cf.is_callable]) self.cf.register_option('b', lambda: None, 'doc', validator=validator) self.cf.set_option('b', '%.1f'.format) # Formatter is callable self.cf.set_option('b', None) # Formatter is none (default) - self.assertRaises(ValueError, self.cf.set_option, 'b', '%.1f') + pytest.raises(ValueError, self.cf.set_option, 'b', '%.1f') def test_reset_option(self): self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2', validator=self.cf.is_str) - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' self.cf.set_option('a', 2) self.cf.set_option('b.c', 'wurld') - self.assertEqual(self.cf.get_option('a'), 2) - self.assertEqual(self.cf.get_option('b.c'), 'wurld') + assert self.cf.get_option('a') == 2 + assert self.cf.get_option('b.c') == 'wurld' self.cf.reset_option('a') - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'wurld') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'wurld' self.cf.reset_option('b.c') - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' def test_reset_option_all(self): self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2', validator=self.cf.is_str) - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' self.cf.set_option('a', 2) self.cf.set_option('b.c', 'wurld') - self.assertEqual(self.cf.get_option('a'), 2) - self.assertEqual(self.cf.get_option('b.c'), 'wurld') + assert self.cf.get_option('a') == 2 + assert self.cf.get_option('b.c') == 'wurld' self.cf.reset_option("all") - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b.c'), 'hullo') + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b.c') == 'hullo' def test_deprecate_option(self): # we can deprecate non-existent options self.cf.deprecate_option('foo') - self.assertTrue(self.cf._is_deprecated('foo')) + assert self.cf._is_deprecated('foo') with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') try: @@ -260,9 +254,8 @@ def test_deprecate_option(self): else: self.fail("Nonexistent option didn't raise KeyError") - self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'deprecated' in str(w[-1])) # we get the default message + assert len(w) == 1 # should have raised one warning + assert 'deprecated' in str(w[-1]) # we get the default message self.cf.register_option('a', 1, 'doc', validator=self.cf.is_int) self.cf.register_option('b.c', 'hullo', 'doc2') @@ -273,13 +266,11 @@ def test_deprecate_option(self): warnings.simplefilter('always') self.cf.get_option('a') - self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'eprecated' in str(w[-1])) # we get the default message - self.assertTrue( - 'nifty_ver' in str(w[-1])) # with the removal_ver quoted + assert len(w) == 1 # should have raised one warning + assert 'eprecated' in str(w[-1]) # we get the default message + assert 'nifty_ver' in str(w[-1]) # with the removal_ver quoted - self.assertRaises( + pytest.raises( KeyError, self.cf.deprecate_option, 'a') # can't depr. twice self.cf.deprecate_option('b.c', 'zounds!') @@ -287,66 +278,60 @@ def test_deprecate_option(self): warnings.simplefilter('always') self.cf.get_option('b.c') - self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'zounds!' in str(w[-1])) # we get the custom message + assert len(w) == 1 # should have raised one warning + assert 'zounds!' in str(w[-1]) # we get the custom message # test rerouting keys self.cf.register_option('d.a', 'foo', 'doc2') self.cf.register_option('d.dep', 'bar', 'doc2') - self.assertEqual(self.cf.get_option('d.a'), 'foo') - self.assertEqual(self.cf.get_option('d.dep'), 'bar') + assert self.cf.get_option('d.a') == 'foo' + assert self.cf.get_option('d.dep') == 'bar' self.cf.deprecate_option('d.dep', rkey='d.a') # reroute d.dep to d.a with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - self.assertEqual(self.cf.get_option('d.dep'), 'foo') + assert self.cf.get_option('d.dep') == 'foo' - self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'eprecated' in str(w[-1])) # we get the custom message + assert len(w) == 1 # should have raised one warning + assert 'eprecated' in str(w[-1]) # we get the custom message with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') self.cf.set_option('d.dep', 'baz') # should overwrite "d.a" - self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'eprecated' in str(w[-1])) # we get the custom message + assert len(w) == 1 # should have raised one warning + assert 'eprecated' in str(w[-1]) # we get the custom message with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - self.assertEqual(self.cf.get_option('d.dep'), 'baz') + assert self.cf.get_option('d.dep') == 'baz' - self.assertEqual(len(w), 1) # should have raised one warning - self.assertTrue( - 'eprecated' in str(w[-1])) # we get the custom message + assert len(w) == 1 # should have raised one warning + assert 'eprecated' in str(w[-1]) # we get the custom message def test_config_prefix(self): with self.cf.config_prefix("base"): self.cf.register_option('a', 1, "doc1") self.cf.register_option('b', 2, "doc2") - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b'), 2) + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b') == 2 self.cf.set_option('a', 3) self.cf.set_option('b', 4) - self.assertEqual(self.cf.get_option('a'), 3) - self.assertEqual(self.cf.get_option('b'), 4) + assert self.cf.get_option('a') == 3 + assert self.cf.get_option('b') == 4 - self.assertEqual(self.cf.get_option('base.a'), 3) - self.assertEqual(self.cf.get_option('base.b'), 4) - self.assertTrue( - 'doc1' in self.cf.describe_option('base.a', _print_desc=False)) - self.assertTrue( - 'doc2' in self.cf.describe_option('base.b', _print_desc=False)) + assert self.cf.get_option('base.a') == 3 + assert self.cf.get_option('base.b') == 4 + assert 'doc1' in self.cf.describe_option('base.a', _print_desc=False) + assert 'doc2' in self.cf.describe_option('base.b', _print_desc=False) self.cf.reset_option('base.a') self.cf.reset_option('base.b') with self.cf.config_prefix("base"): - self.assertEqual(self.cf.get_option('a'), 1) - self.assertEqual(self.cf.get_option('b'), 2) + assert self.cf.get_option('a') == 1 + assert self.cf.get_option('b') == 2 def test_callback(self): k = [None] @@ -361,21 +346,21 @@ def callback(key): del k[-1], v[-1] self.cf.set_option("d.a", "fooz") - self.assertEqual(k[-1], "d.a") - self.assertEqual(v[-1], "fooz") + assert k[-1] == "d.a" + assert v[-1] == "fooz" del k[-1], v[-1] self.cf.set_option("d.b", "boo") - self.assertEqual(k[-1], "d.b") - self.assertEqual(v[-1], "boo") + assert k[-1] == "d.b" + assert v[-1] == "boo" del k[-1], v[-1] self.cf.reset_option("d.b") - self.assertEqual(k[-1], "d.b") + assert k[-1] == "d.b" def test_set_ContextManager(self): def eq(val): - self.assertEqual(self.cf.get_option("a"), val) + assert self.cf.get_option("a") == val self.cf.register_option('a', 0) eq(0) @@ -405,22 +390,22 @@ def f3(key): self.cf.register_option('c', 0, cb=f3) options = self.cf.options - self.assertEqual(options.a, 0) + assert options.a == 0 with self.cf.option_context("a", 15): - self.assertEqual(options.a, 15) + assert options.a == 15 options.a = 500 - self.assertEqual(self.cf.get_option("a"), 500) + assert self.cf.get_option("a") == 500 self.cf.reset_option("a") - self.assertEqual(options.a, self.cf.get_option("a", 0)) + assert options.a == self.cf.get_option("a", 0) - self.assertRaises(KeyError, f) - self.assertRaises(KeyError, f2) + pytest.raises(KeyError, f) + pytest.raises(KeyError, f2) # make sure callback kicks when using this form of setting options.c = 1 - self.assertEqual(len(holder), 1) + assert len(holder) == 1 def test_option_context_scope(self): # Ensure that creating a context does not affect the existing @@ -435,11 +420,17 @@ def test_option_context_scope(self): # Ensure creating contexts didn't affect the current context. ctx = self.cf.option_context(option_name, context_value) - self.assertEqual(self.cf.get_option(option_name), original_value) + assert self.cf.get_option(option_name) == original_value # Ensure the correct value is available inside the context. with ctx: - self.assertEqual(self.cf.get_option(option_name), context_value) + assert self.cf.get_option(option_name) == context_value # Ensure the current context is reset - self.assertEqual(self.cf.get_option(option_name), original_value) + assert self.cf.get_option(option_name) == original_value + + def test_dictwrapper_getattr(self): + options = self.cf.options + # GH 19789 + pytest.raises(self.cf.OptionError, getattr, options, 'bananas') + assert not hasattr(options, 'bananas') diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py new file mode 100644 index 0000000000000..a595d9f18d6b8 --- /dev/null +++ b/pandas/tests/test_downstream.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +""" +Testing that we work in the downstream packages +""" +import pytest +import numpy as np # noqa +from pandas import DataFrame +from pandas.compat import PY36 +from pandas.util import testing as tm +import importlib + + +def import_module(name): + # we *only* want to skip if the module is truly not available + # and NOT just an actual import error because of pandas changes + + if PY36: + try: + return importlib.import_module(name) + except ModuleNotFoundError: # noqa + pytest.skip("skipping as {} not available".format(name)) + + else: + try: + return importlib.import_module(name) + except ImportError as e: + if "No module named" in str(e) and name in str(e): + pytest.skip("skipping as {} not available".format(name)) + raise + + +@pytest.fixture +def df(): + return DataFrame({'A': [1, 2, 3]}) + + +def test_dask(df): + + toolz = import_module('toolz') # noqa + dask = import_module('dask') # noqa + + import dask.dataframe as dd + + ddf = dd.from_pandas(df, npartitions=3) + assert ddf.A is not None + assert ddf.compute() is not None + + +def test_xarray(df): + + xarray = import_module('xarray') # noqa + + assert df.to_xarray() is not None + + +@tm.network +def test_statsmodels(): + + statsmodels = import_module('statsmodels') # noqa + import statsmodels.api as sm + import statsmodels.formula.api as smf + df = sm.datasets.get_rdataset("Guerry", "HistData").data + smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit() + + +def test_scikit_learn(df): + + sklearn = import_module('sklearn') # noqa + from sklearn import svm, datasets + + digits = datasets.load_digits() + clf = svm.SVC(gamma=0.001, C=100.) + clf.fit(digits.data[:-1], digits.target[:-1]) + clf.predict(digits.data[-1:]) + + +@tm.network +def test_seaborn(): + + seaborn = import_module('seaborn') + tips = seaborn.load_dataset("tips") + seaborn.stripplot(x="day", y="total_bill", data=tips) + + +def test_pandas_gbq(df): + + pandas_gbq = import_module('pandas_gbq') # noqa + + +@tm.network +def test_pandas_datareader(): + + pandas_datareader = import_module('pandas_datareader') # noqa + pandas_datareader.DataReader( + 'F', 'quandl', '2017-01-01', '2017-02-01') + + +def test_geopandas(): + + geopandas = import_module('geopandas') # noqa + fp = geopandas.datasets.get_path('naturalearth_lowres') + assert geopandas.read_file(fp) is not None + + +def test_pyarrow(df): + + pyarrow = import_module('pyarrow') # noqa + table = pyarrow.Table.from_pandas(df) + result = table.to_pandas() + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py new file mode 100644 index 0000000000000..e2a142366a89e --- /dev/null +++ b/pandas/tests/test_errors.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +import pytest +from warnings import catch_warnings +import pandas # noqa +import pandas as pd +from pandas.errors import AbstractMethodError +import pandas.util.testing as tm + + +@pytest.mark.parametrize( + "exc", ['UnsupportedFunctionCall', 'UnsortedIndexError', + 'OutOfBoundsDatetime', + 'ParserError', 'PerformanceWarning', 'DtypeWarning', + 'EmptyDataError', 'ParserWarning', 'MergeError']) +def test_exception_importable(exc): + from pandas import errors + e = getattr(errors, exc) + assert e is not None + + # check that we can raise on them + with pytest.raises(e): + raise e() + + +def test_catch_oob(): + from pandas import errors + + try: + pd.Timestamp('15000101') + except errors.OutOfBoundsDatetime: + pass + + +def test_error_rename(): + # see gh-12665 + from pandas.errors import ParserError + from pandas.io.common import CParserError + + try: + raise CParserError() + except ParserError: + pass + + try: + raise ParserError() + except CParserError: + pass + + with catch_warnings(record=True): + try: + raise ParserError() + except pd.parser.CParserError: + pass + + +class Foo: + @classmethod + def classmethod(cls): + raise AbstractMethodError(cls, methodtype='classmethod') + + @property + def property(self): + raise AbstractMethodError(self, methodtype='property') + + def method(self): + raise AbstractMethodError(self) + + +def test_AbstractMethodError_classmethod(): + xpr = "This classmethod must be defined in the concrete class Foo" + with tm.assert_raises_regex(AbstractMethodError, xpr): + Foo.classmethod() + + xpr = "This property must be defined in the concrete class Foo" + with tm.assert_raises_regex(AbstractMethodError, xpr): + Foo().property + + xpr = "This method must be defined in the concrete class Foo" + with tm.assert_raises_regex(AbstractMethodError, xpr): + Foo().method() diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 0318757f76a11..56e00fa8af23d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -2,6 +2,7 @@ from __future__ import print_function # pylint: disable-msg=W0612,E1101 +from warnings import catch_warnings import re import operator import pytest @@ -11,18 +12,14 @@ import numpy as np from pandas.core.api import DataFrame, Panel -from pandas.computation import expressions as expr -from pandas import compat, _np_version_under1p12 +from pandas.core.computation import expressions as expr +from pandas import compat, _np_version_under1p11, _np_version_under1p13 from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, assert_panel_equal, - assert_panel4d_equal, slow) -from pandas.formats.printing import pprint_thing + assert_frame_equal, assert_panel_equal) +from pandas.io.formats.printing import pprint_thing import pandas.util.testing as tm -if not expr._USE_NUMEXPR: - numexpr = pytest.importorskip('numexpr') - _frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64') _frame2 = DataFrame(randn(100, 4), columns=list('ABCD'), dtype='float64') _mixed = DataFrame({'A': _frame['A'].copy(), @@ -35,24 +32,32 @@ 'D': _frame2['D'].astype('int32')}) _integer = DataFrame( np.random.randint(1, 100, - size=(10001, 4)), columns=list('ABCD'), dtype='int64') + size=(10001, 4)), + columns=list('ABCD'), dtype='int64') _integer2 = DataFrame(np.random.randint(1, 100, size=(101, 4)), columns=list('ABCD'), dtype='int64') -_frame_panel = Panel(dict(ItemA=_frame.copy(), ItemB=( - _frame.copy() + 3), ItemC=_frame.copy(), ItemD=_frame.copy())) -_frame2_panel = Panel(dict(ItemA=_frame2.copy(), ItemB=(_frame2.copy() + 3), - ItemC=_frame2.copy(), ItemD=_frame2.copy())) -_integer_panel = Panel(dict(ItemA=_integer, ItemB=(_integer + 34).astype( - 'int64'))) -_integer2_panel = Panel(dict(ItemA=_integer2, ItemB=(_integer2 + 34).astype( - 'int64'))) -_mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3))) -_mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) + +with catch_warnings(record=True): + _frame_panel = Panel(dict(ItemA=_frame.copy(), + ItemB=(_frame.copy() + 3), + ItemC=_frame.copy(), + ItemD=_frame.copy())) + _frame2_panel = Panel(dict(ItemA=_frame2.copy(), + ItemB=(_frame2.copy() + 3), + ItemC=_frame2.copy(), + ItemD=_frame2.copy())) + _integer_panel = Panel(dict(ItemA=_integer, + ItemB=(_integer + 34).astype('int64'))) + _integer2_panel = Panel(dict(ItemA=_integer2, + ItemB=(_integer2 + 34).astype('int64'))) + _mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3))) + _mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) -class TestExpressions(tm.TestCase): +@pytest.mark.skipif(not expr._USE_NUMEXPR, reason='not using numexpr') +class TestExpressions(object): - def setUp(self): + def setup_method(self, method): self.frame = _frame.copy() self.frame2 = _frame2.copy() @@ -61,23 +66,17 @@ def setUp(self): self.integer = _integer.copy() self._MIN_ELEMENTS = expr._MIN_ELEMENTS - def tearDown(self): + def teardown_method(self, method): expr._MIN_ELEMENTS = self._MIN_ELEMENTS def run_arithmetic(self, df, other, assert_func, check_dtype=False, test_flex=True): expr._MIN_ELEMENTS = 0 - operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv', 'pow'] + operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv'] if not compat.PY3: operations.append('div') for arith in operations: - # numpy >= 1.12 doesn't handle integers - # raised to integer powers - # https://github.com/pandas-dev/pandas/issues/15363 - if arith == 'pow' and not _np_version_under1p12: - continue - operator_name = arith if arith == 'div': operator_name = 'truediv' @@ -118,6 +117,7 @@ def run_binary(self, df, other, assert_func, test_flex=False, expr._MIN_ELEMENTS = 0 expr.set_test_mode(True) operations = ['gt', 'lt', 'ge', 'le', 'eq', 'ne'] + for arith in operations: if test_flex: op = lambda x, y: getattr(df, arith)(y) @@ -190,7 +190,7 @@ def test_integer_arithmetic_frame(self): def test_integer_arithmetic_series(self): self.run_series(self.integer.iloc[:, 0], self.integer.iloc[:, 0]) - @slow + @pytest.mark.slow def test_integer_panel(self): self.run_panel(_integer2_panel, np.random.randint(1, 100)) @@ -200,16 +200,10 @@ def test_float_arithemtic_frame(self): def test_float_arithmetic_series(self): self.run_series(self.frame2.iloc[:, 0], self.frame2.iloc[:, 0]) - @slow + @pytest.mark.slow def test_float_panel(self): self.run_panel(_frame2_panel, np.random.randn() + 0.1, binary_comp=0.8) - @slow - def test_panel4d(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5, - assert_func=assert_panel4d_equal, binary_comp=3) - def test_mixed_arithmetic_frame(self): # TODO: FIGURE OUT HOW TO GET IT TO WORK... # can't do arithmetic because comparison methods try to do *entire* @@ -220,7 +214,7 @@ def test_mixed_arithmetic_series(self): for col in self.mixed2.columns: self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4) - @slow + @pytest.mark.slow def test_mixed_panel(self): self.run_panel(_mixed2_panel, np.random.randint(1, 100), binary_comp=-2) @@ -248,22 +242,22 @@ def test_invalid(self): # no op result = expr._can_use_numexpr(operator.add, None, self.frame, self.frame, 'evaluate') - self.assertFalse(result) + assert not result # mixed result = expr._can_use_numexpr(operator.add, '+', self.mixed, self.frame, 'evaluate') - self.assertFalse(result) + assert not result # min elements result = expr._can_use_numexpr(operator.add, '+', self.frame2, self.frame2, 'evaluate') - self.assertFalse(result) + assert not result # ok, we only check on first part of expression result = expr._can_use_numexpr(operator.add, '+', self.frame, self.frame2, 'evaluate') - self.assertTrue(result) + assert result def test_binary_ops(self): def testit(): @@ -274,10 +268,10 @@ def testit(): for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: - # numpy >= 1.12 doesn't handle integers + # numpy >= 1.11 doesn't handle integers # raised to integer powers # https://github.com/pandas-dev/pandas/issues/15363 - if op == 'pow' and not _np_version_under1p12: + if op == 'pow' and not _np_version_under1p11: continue if op == 'div': @@ -287,7 +281,7 @@ def testit(): if op is not None: result = expr._can_use_numexpr(op, op_str, f, f, 'evaluate') - self.assertNotEqual(result, f._is_mixed_type) + assert result != f._is_mixed_type result = expr.evaluate(op, op_str, f, f, use_numexpr=True) @@ -302,7 +296,7 @@ def testit(): result = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate') - self.assertFalse(result) + assert not result expr.set_use_numexpr(False) testit() @@ -330,7 +324,7 @@ def testit(): result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') - self.assertNotEqual(result, f11._is_mixed_type) + assert result != f11._is_mixed_type result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) @@ -343,7 +337,7 @@ def testit(): result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') - self.assertFalse(result) + assert not result expr.set_use_numexpr(False) testit() @@ -384,22 +378,22 @@ def test_bool_ops_raise_on_arithmetic(self): f = getattr(operator, name) err_msg = re.escape(msg % op) - with tm.assertRaisesRegexp(NotImplementedError, err_msg): + with tm.assert_raises_regex(NotImplementedError, err_msg): f(df, df) - with tm.assertRaisesRegexp(NotImplementedError, err_msg): + with tm.assert_raises_regex(NotImplementedError, err_msg): f(df.a, df.b) - with tm.assertRaisesRegexp(NotImplementedError, err_msg): + with tm.assert_raises_regex(NotImplementedError, err_msg): f(df.a, True) - with tm.assertRaisesRegexp(NotImplementedError, err_msg): + with tm.assert_raises_regex(NotImplementedError, err_msg): f(False, df.a) - with tm.assertRaisesRegexp(TypeError, err_msg): + with tm.assert_raises_regex(TypeError, err_msg): f(False, df) - with tm.assertRaisesRegexp(TypeError, err_msg): + with tm.assert_raises_regex(TypeError, err_msg): f(df, True) def test_bool_ops_warn_on_arithmetic(self): @@ -414,6 +408,10 @@ def test_bool_ops_warn_on_arithmetic(self): f = getattr(operator, name) fe = getattr(operator, sub_funcs[subs[op]]) + # >= 1.13.0 these are now TypeErrors + if op == '-' and not _np_version_under1p13: + continue + with tm.use_numexpr(True, min_elements=5): with tm.assert_produces_warning(check_stacklevel=False): r = f(df, df) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py deleted file mode 100644 index 28f1dc61533c1..0000000000000 --- a/pandas/tests/test_generic.py +++ /dev/null @@ -1,2032 +0,0 @@ -# -*- coding: utf-8 -*- -# pylint: disable-msg=E1101,W0612 - -from operator import methodcaller -import pytest -import numpy as np -from numpy import nan -import pandas as pd - -from distutils.version import LooseVersion -from pandas.types.common import is_scalar -from pandas import (Index, Series, DataFrame, Panel, isnull, - date_range, period_range, Panel4D) -from pandas.core.index import MultiIndex - -import pandas.formats.printing as printing - -from pandas.compat import range, zip, PY3 -from pandas import compat -from pandas.util.testing import (assertRaisesRegexp, - assert_series_equal, - assert_frame_equal, - assert_panel_equal, - assert_panel4d_equal, - assert_almost_equal) - -import pandas.util.testing as tm - - -# ---------------------------------------------------------------------- -# Generic types test cases - - -class Generic(object): - - def setUp(self): - pass - - @property - def _ndim(self): - return self._typ._AXIS_LEN - - def _axes(self): - """ return the axes for my object typ """ - return self._typ._AXIS_ORDERS - - def _construct(self, shape, value=None, dtype=None, **kwargs): - """ construct an object for the given shape - if value is specified use that if its a scalar - if value is an array, repeat it as needed """ - - if isinstance(shape, int): - shape = tuple([shape] * self._ndim) - if value is not None: - if is_scalar(value): - if value == 'empty': - arr = None - - # remove the info axis - kwargs.pop(self._typ._info_axis_name, None) - else: - arr = np.empty(shape, dtype=dtype) - arr.fill(value) - else: - fshape = np.prod(shape) - arr = value.ravel() - new_shape = fshape / arr.shape[0] - if fshape % arr.shape[0] != 0: - raise Exception("invalid value passed in _construct") - - arr = np.repeat(arr, new_shape).reshape(shape) - else: - arr = np.random.randn(*shape) - return self._typ(arr, dtype=dtype, **kwargs) - - def _compare(self, result, expected): - self._comparator(result, expected) - - def test_rename(self): - - # single axis - idx = list('ABCD') - # relabeling values passed into self.rename - args = [ - str.lower, - {x: x.lower() for x in idx}, - Series({x: x.lower() for x in idx}), - ] - - for axis in self._axes(): - kwargs = {axis: idx} - obj = self._construct(4, **kwargs) - - for arg in args: - # rename a single axis - result = obj.rename(**{axis: arg}) - expected = obj.copy() - setattr(expected, axis, list('abcd')) - self._compare(result, expected) - - # multiple axes at once - - def test_rename_axis(self): - idx = list('ABCD') - # relabeling values passed into self.rename - args = [ - str.lower, - {x: x.lower() for x in idx}, - Series({x: x.lower() for x in idx}), - ] - - for axis in self._axes(): - kwargs = {axis: idx} - obj = self._construct(4, **kwargs) - - for arg in args: - # rename a single axis - result = obj.rename_axis(arg, axis=axis) - expected = obj.copy() - setattr(expected, axis, list('abcd')) - self._compare(result, expected) - # scalar values - for arg in ['foo', None]: - result = obj.rename_axis(arg, axis=axis) - expected = obj.copy() - getattr(expected, axis).name = arg - self._compare(result, expected) - - def test_get_numeric_data(self): - - n = 4 - kwargs = {} - for i in range(self._ndim): - kwargs[self._typ._AXIS_NAMES[i]] = list(range(n)) - - # get the numeric data - o = self._construct(n, **kwargs) - result = o._get_numeric_data() - self._compare(result, o) - - # non-inclusion - result = o._get_bool_data() - expected = self._construct(n, value='empty', **kwargs) - self._compare(result, expected) - - # get the bool data - arr = np.array([True, True, False, True]) - o = self._construct(n, value=arr, **kwargs) - result = o._get_numeric_data() - self._compare(result, o) - - # _get_numeric_data is includes _get_bool_data, so can't test for - # non-inclusion - - def test_get_default(self): - - # GH 7725 - d0 = "a", "b", "c", "d" - d1 = np.arange(4, dtype='int64') - others = "e", 10 - - for data, index in ((d0, d1), (d1, d0)): - s = Series(data, index=index) - for i, d in zip(index, data): - self.assertEqual(s.get(i), d) - self.assertEqual(s.get(i, d), d) - self.assertEqual(s.get(i, "z"), d) - for other in others: - self.assertEqual(s.get(other, "z"), "z") - self.assertEqual(s.get(other, other), other) - - def test_nonzero(self): - - # GH 4633 - # look at the boolean/nonzero behavior for objects - obj = self._construct(shape=4) - self.assertRaises(ValueError, lambda: bool(obj == 0)) - self.assertRaises(ValueError, lambda: bool(obj == 1)) - self.assertRaises(ValueError, lambda: bool(obj)) - - obj = self._construct(shape=4, value=1) - self.assertRaises(ValueError, lambda: bool(obj == 0)) - self.assertRaises(ValueError, lambda: bool(obj == 1)) - self.assertRaises(ValueError, lambda: bool(obj)) - - obj = self._construct(shape=4, value=np.nan) - self.assertRaises(ValueError, lambda: bool(obj == 0)) - self.assertRaises(ValueError, lambda: bool(obj == 1)) - self.assertRaises(ValueError, lambda: bool(obj)) - - # empty - obj = self._construct(shape=0) - self.assertRaises(ValueError, lambda: bool(obj)) - - # invalid behaviors - - obj1 = self._construct(shape=4, value=1) - obj2 = self._construct(shape=4, value=1) - - def f(): - if obj1: - printing.pprint_thing("this works and shouldn't") - - self.assertRaises(ValueError, f) - self.assertRaises(ValueError, lambda: obj1 and obj2) - self.assertRaises(ValueError, lambda: obj1 or obj2) - self.assertRaises(ValueError, lambda: not obj1) - - def test_numpy_1_7_compat_numeric_methods(self): - # GH 4435 - # numpy in 1.7 tries to pass addtional arguments to pandas functions - - o = self._construct(shape=4) - for op in ['min', 'max', 'max', 'var', 'std', 'prod', 'sum', 'cumsum', - 'cumprod', 'median', 'skew', 'kurt', 'compound', 'cummax', - 'cummin', 'all', 'any']: - f = getattr(np, op, None) - if f is not None: - f(o) - - def test_downcast(self): - # test close downcasting - - o = self._construct(shape=4, value=9, dtype=np.int64) - result = o.copy() - result._data = o._data.downcast(dtypes='infer') - self._compare(result, o) - - o = self._construct(shape=4, value=9.) - expected = o.astype(np.int64) - result = o.copy() - result._data = o._data.downcast(dtypes='infer') - self._compare(result, expected) - - o = self._construct(shape=4, value=9.5) - result = o.copy() - result._data = o._data.downcast(dtypes='infer') - self._compare(result, o) - - # are close - o = self._construct(shape=4, value=9.000000000005) - result = o.copy() - result._data = o._data.downcast(dtypes='infer') - expected = o.astype(np.int64) - self._compare(result, expected) - - def test_constructor_compound_dtypes(self): - # GH 5191 - # compound dtypes should raise not-implementederror - - def f(dtype): - return self._construct(shape=3, dtype=dtype) - - self.assertRaises(NotImplementedError, f, [("A", "datetime64[h]"), - ("B", "str"), - ("C", "int32")]) - - # these work (though results may be unexpected) - f('int64') - f('float64') - f('M8[ns]') - - def check_metadata(self, x, y=None): - for m in x._metadata: - v = getattr(x, m, None) - if y is None: - self.assertIsNone(v) - else: - self.assertEqual(v, getattr(y, m, None)) - - def test_metadata_propagation(self): - # check that the metadata matches up on the resulting ops - - o = self._construct(shape=3) - o.name = 'foo' - o2 = self._construct(shape=3) - o2.name = 'bar' - - # TODO - # Once panel can do non-trivial combine operations - # (currently there is an a raise in the Panel arith_ops to prevent - # this, though it actually does work) - # can remove all of these try: except: blocks on the actual operations - - # ---------- - # preserving - # ---------- - - # simple ops with scalars - for op in ['__add__', '__sub__', '__truediv__', '__mul__']: - result = getattr(o, op)(1) - self.check_metadata(o, result) - - # ops with like - for op in ['__add__', '__sub__', '__truediv__', '__mul__']: - try: - result = getattr(o, op)(o) - self.check_metadata(o, result) - except (ValueError, AttributeError): - pass - - # simple boolean - for op in ['__eq__', '__le__', '__ge__']: - v1 = getattr(o, op)(o) - self.check_metadata(o, v1) - - try: - self.check_metadata(o, v1 & v1) - except (ValueError): - pass - - try: - self.check_metadata(o, v1 | v1) - except (ValueError): - pass - - # combine_first - try: - result = o.combine_first(o2) - self.check_metadata(o, result) - except (AttributeError): - pass - - # --------------------------- - # non-preserving (by default) - # --------------------------- - - # add non-like - try: - result = o + o2 - self.check_metadata(result) - except (ValueError, AttributeError): - pass - - # simple boolean - for op in ['__eq__', '__le__', '__ge__']: - - # this is a name matching op - v1 = getattr(o, op)(o) - - v2 = getattr(o, op)(o2) - self.check_metadata(v2) - - try: - self.check_metadata(v1 & v2) - except (ValueError): - pass - - try: - self.check_metadata(v1 | v2) - except (ValueError): - pass - - def test_head_tail(self): - # GH5370 - - o = self._construct(shape=10) - - # check all index types - for index in [tm.makeFloatIndex, tm.makeIntIndex, tm.makeStringIndex, - tm.makeUnicodeIndex, tm.makeDateIndex, - tm.makePeriodIndex]: - axis = o._get_axis_name(0) - setattr(o, axis, index(len(getattr(o, axis)))) - - # Panel + dims - try: - o.head() - except (NotImplementedError): - pytest.skip('not implemented on {0}'.format( - o.__class__.__name__)) - - self._compare(o.head(), o.iloc[:5]) - self._compare(o.tail(), o.iloc[-5:]) - - # 0-len - self._compare(o.head(0), o.iloc[0:0]) - self._compare(o.tail(0), o.iloc[0:0]) - - # bounded - self._compare(o.head(len(o) + 1), o) - self._compare(o.tail(len(o) + 1), o) - - # neg index - self._compare(o.head(-3), o.head(7)) - self._compare(o.tail(-3), o.tail(7)) - - def test_sample(self): - # Fixes issue: 2419 - - o = self._construct(shape=10) - - ### - # Check behavior of random_state argument - ### - - # Check for stability when receives seed or random state -- run 10 - # times. - for test in range(10): - seed = np.random.randint(0, 100) - self._compare( - o.sample(n=4, random_state=seed), o.sample(n=4, - random_state=seed)) - self._compare( - o.sample(frac=0.7, random_state=seed), o.sample( - frac=0.7, random_state=seed)) - - self._compare( - o.sample(n=4, random_state=np.random.RandomState(test)), - o.sample(n=4, random_state=np.random.RandomState(test))) - - self._compare( - o.sample(frac=0.7, random_state=np.random.RandomState(test)), - o.sample(frac=0.7, random_state=np.random.RandomState(test))) - - os1, os2 = [], [] - for _ in range(2): - np.random.seed(test) - os1.append(o.sample(n=4)) - os2.append(o.sample(frac=0.7)) - self._compare(*os1) - self._compare(*os2) - - # Check for error when random_state argument invalid. - with tm.assertRaises(ValueError): - o.sample(random_state='astring!') - - ### - # Check behavior of `frac` and `N` - ### - - # Giving both frac and N throws error - with tm.assertRaises(ValueError): - o.sample(n=3, frac=0.3) - - # Check that raises right error for negative lengths - with tm.assertRaises(ValueError): - o.sample(n=-3) - with tm.assertRaises(ValueError): - o.sample(frac=-0.3) - - # Make sure float values of `n` give error - with tm.assertRaises(ValueError): - o.sample(n=3.2) - - # Check lengths are right - self.assertTrue(len(o.sample(n=4) == 4)) - self.assertTrue(len(o.sample(frac=0.34) == 3)) - self.assertTrue(len(o.sample(frac=0.36) == 4)) - - ### - # Check weights - ### - - # Weight length must be right - with tm.assertRaises(ValueError): - o.sample(n=3, weights=[0, 1]) - - with tm.assertRaises(ValueError): - bad_weights = [0.5] * 11 - o.sample(n=3, weights=bad_weights) - - with tm.assertRaises(ValueError): - bad_weight_series = Series([0, 0, 0.2]) - o.sample(n=4, weights=bad_weight_series) - - # Check won't accept negative weights - with tm.assertRaises(ValueError): - bad_weights = [-0.1] * 10 - o.sample(n=3, weights=bad_weights) - - # Check inf and -inf throw errors: - with tm.assertRaises(ValueError): - weights_with_inf = [0.1] * 10 - weights_with_inf[0] = np.inf - o.sample(n=3, weights=weights_with_inf) - - with tm.assertRaises(ValueError): - weights_with_ninf = [0.1] * 10 - weights_with_ninf[0] = -np.inf - o.sample(n=3, weights=weights_with_ninf) - - # All zeros raises errors - zero_weights = [0] * 10 - with tm.assertRaises(ValueError): - o.sample(n=3, weights=zero_weights) - - # All missing weights - nan_weights = [np.nan] * 10 - with tm.assertRaises(ValueError): - o.sample(n=3, weights=nan_weights) - - # Check np.nan are replaced by zeros. - weights_with_nan = [np.nan] * 10 - weights_with_nan[5] = 0.5 - self._compare( - o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6]) - - # Check None are also replaced by zeros. - weights_with_None = [None] * 10 - weights_with_None[5] = 0.5 - self._compare( - o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6]) - - def test_size_compat(self): - # GH8846 - # size property should be defined - - o = self._construct(shape=10) - self.assertTrue(o.size == np.prod(o.shape)) - self.assertTrue(o.size == 10 ** len(o.axes)) - - def test_split_compat(self): - # xref GH8846 - o = self._construct(shape=10) - self.assertTrue(len(np.array_split(o, 5)) == 5) - self.assertTrue(len(np.array_split(o, 2)) == 2) - - def test_unexpected_keyword(self): # GH8597 - df = DataFrame(np.random.randn(5, 2), columns=['jim', 'joe']) - ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) - ts = df['joe'].copy() - ts[2] = np.nan - - with assertRaisesRegexp(TypeError, 'unexpected keyword'): - df.drop('joe', axis=1, in_place=True) - - with assertRaisesRegexp(TypeError, 'unexpected keyword'): - df.reindex([1, 0], inplace=True) - - with assertRaisesRegexp(TypeError, 'unexpected keyword'): - ca.fillna(0, inplace=True) - - with assertRaisesRegexp(TypeError, 'unexpected keyword'): - ts.fillna(0, in_place=True) - - # See gh-12301 - def test_stat_unexpected_keyword(self): - obj = self._construct(5) - starwars = 'Star Wars' - errmsg = 'unexpected keyword' - - with assertRaisesRegexp(TypeError, errmsg): - obj.max(epic=starwars) # stat_function - with assertRaisesRegexp(TypeError, errmsg): - obj.var(epic=starwars) # stat_function_ddof - with assertRaisesRegexp(TypeError, errmsg): - obj.sum(epic=starwars) # cum_function - with assertRaisesRegexp(TypeError, errmsg): - obj.any(epic=starwars) # logical_function - - def test_api_compat(self): - - # GH 12021 - # compat for __name__, __qualname__ - - obj = self._construct(5) - for func in ['sum', 'cumsum', 'any', 'var']: - f = getattr(obj, func) - self.assertEqual(f.__name__, func) - if PY3: - self.assertTrue(f.__qualname__.endswith(func)) - - def test_stat_non_defaults_args(self): - obj = self._construct(5) - out = np.array([0]) - errmsg = "the 'out' parameter is not supported" - - with assertRaisesRegexp(ValueError, errmsg): - obj.max(out=out) # stat_function - with assertRaisesRegexp(ValueError, errmsg): - obj.var(out=out) # stat_function_ddof - with assertRaisesRegexp(ValueError, errmsg): - obj.sum(out=out) # cum_function - with assertRaisesRegexp(ValueError, errmsg): - obj.any(out=out) # logical_function - - def test_clip(self): - lower = 1 - upper = 3 - col = np.arange(5) - - obj = self._construct(len(col), value=col) - - if isinstance(obj, Panel): - msg = "clip is not supported yet for panels" - tm.assertRaisesRegexp(NotImplementedError, msg, - obj.clip, lower=lower, - upper=upper) - - else: - out = obj.clip(lower=lower, upper=upper) - expected = self._construct(len(col), value=col - .clip(lower, upper)) - self._compare(out, expected) - - bad_axis = 'foo' - msg = ('No axis named {axis} ' - 'for object').format(axis=bad_axis) - assertRaisesRegexp(ValueError, msg, obj.clip, - lower=lower, upper=upper, - axis=bad_axis) - - def test_truncate_out_of_bounds(self): - # GH11382 - - # small - shape = [int(2e3)] + ([1] * (self._ndim - 1)) - small = self._construct(shape, dtype='int8') - self._compare(small.truncate(), small) - self._compare(small.truncate(before=0, after=3e3), small) - self._compare(small.truncate(before=-1, after=2e3), small) - - # big - shape = [int(2e6)] + ([1] * (self._ndim - 1)) - big = self._construct(shape, dtype='int8') - self._compare(big.truncate(), big) - self._compare(big.truncate(before=0, after=3e6), big) - self._compare(big.truncate(before=-1, after=2e6), big) - - def test_numpy_clip(self): - lower = 1 - upper = 3 - col = np.arange(5) - - obj = self._construct(len(col), value=col) - - if isinstance(obj, Panel): - msg = "clip is not supported yet for panels" - tm.assertRaisesRegexp(NotImplementedError, msg, - np.clip, obj, - lower, upper) - else: - out = np.clip(obj, lower, upper) - expected = self._construct(len(col), value=col - .clip(lower, upper)) - self._compare(out, expected) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, - np.clip, obj, - lower, upper, out=col) - - def test_validate_bool_args(self): - df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) - invalid_values = [1, "True", [1, 2, 3], 5.0] - - for value in invalid_values: - with self.assertRaises(ValueError): - super(DataFrame, df).rename_axis(mapper={'a': 'x', 'b': 'y'}, - axis=1, inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).drop('a', axis=1, inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).sort_index(inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).consolidate(inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).fillna(value=0, inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).replace(to_replace=1, value=7, - inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).interpolate(inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df)._where(cond=df.a > 2, inplace=value) - - with self.assertRaises(ValueError): - super(DataFrame, df).mask(cond=df.a > 2, inplace=value) - - -class TestSeries(tm.TestCase, Generic): - _typ = Series - _comparator = lambda self, x, y: assert_series_equal(x, y) - - def setUp(self): - self.ts = tm.makeTimeSeries() # Was at top level in test_series - self.ts.name = 'ts' - - self.series = tm.makeStringSeries() - self.series.name = 'series' - - def test_rename_mi(self): - s = Series([11, 21, 31], - index=MultiIndex.from_tuples( - [("A", x) for x in ["a", "B", "c"]])) - s.rename(str.lower) - - def test_set_axis_name(self): - s = Series([1, 2, 3], index=['a', 'b', 'c']) - funcs = ['rename_axis', '_set_axis_name'] - name = 'foo' - for func in funcs: - result = methodcaller(func, name)(s) - self.assertTrue(s.index.name is None) - self.assertEqual(result.index.name, name) - - def test_set_axis_name_mi(self): - s = Series([11, 21, 31], index=MultiIndex.from_tuples( - [("A", x) for x in ["a", "B", "c"]], - names=['l1', 'l2']) - ) - funcs = ['rename_axis', '_set_axis_name'] - for func in funcs: - result = methodcaller(func, ['L1', 'L2'])(s) - self.assertTrue(s.index.name is None) - self.assertEqual(s.index.names, ['l1', 'l2']) - self.assertTrue(result.index.name is None) - self.assertTrue(result.index.names, ['L1', 'L2']) - - def test_set_axis_name_raises(self): - s = pd.Series([1]) - with tm.assertRaises(ValueError): - s._set_axis_name(name='a', axis=1) - - def test_get_numeric_data_preserve_dtype(self): - - # get the numeric data - o = Series([1, 2, 3]) - result = o._get_numeric_data() - self._compare(result, o) - - o = Series([1, '2', 3.]) - result = o._get_numeric_data() - expected = Series([], dtype=object, index=pd.Index([], dtype=object)) - self._compare(result, expected) - - o = Series([True, False, True]) - result = o._get_numeric_data() - self._compare(result, o) - - o = Series([True, False, True]) - result = o._get_bool_data() - self._compare(result, o) - - o = Series(date_range('20130101', periods=3)) - result = o._get_numeric_data() - expected = Series([], dtype='M8[ns]', index=pd.Index([], dtype=object)) - self._compare(result, expected) - - def test_nonzero_single_element(self): - - # allow single item via bool method - s = Series([True]) - self.assertTrue(s.bool()) - - s = Series([False]) - self.assertFalse(s.bool()) - - # single item nan to raise - for s in [Series([np.nan]), Series([pd.NaT]), Series([True]), - Series([False])]: - self.assertRaises(ValueError, lambda: bool(s)) - - for s in [Series([np.nan]), Series([pd.NaT])]: - self.assertRaises(ValueError, lambda: s.bool()) - - # multiple bool are still an error - for s in [Series([True, True]), Series([False, False])]: - self.assertRaises(ValueError, lambda: bool(s)) - self.assertRaises(ValueError, lambda: s.bool()) - - # single non-bool are an error - for s in [Series([1]), Series([0]), Series(['a']), Series([0.0])]: - self.assertRaises(ValueError, lambda: bool(s)) - self.assertRaises(ValueError, lambda: s.bool()) - - def test_metadata_propagation_indiv(self): - # check that the metadata matches up on the resulting ops - - o = Series(range(3), range(3)) - o.name = 'foo' - o2 = Series(range(3), range(3)) - o2.name = 'bar' - - result = o.T - self.check_metadata(o, result) - - # resample - ts = Series(np.random.rand(1000), - index=date_range('20130101', periods=1000, freq='s'), - name='foo') - result = ts.resample('1T').mean() - self.check_metadata(ts, result) - - result = ts.resample('1T').min() - self.check_metadata(ts, result) - - result = ts.resample('1T').apply(lambda x: x.sum()) - self.check_metadata(ts, result) - - _metadata = Series._metadata - _finalize = Series.__finalize__ - Series._metadata = ['name', 'filename'] - o.filename = 'foo' - o2.filename = 'bar' - - def finalize(self, other, method=None, **kwargs): - for name in self._metadata: - if method == 'concat' and name == 'filename': - value = '+'.join([getattr( - o, name) for o in other.objs if getattr(o, name, None) - ]) - object.__setattr__(self, name, value) - else: - object.__setattr__(self, name, getattr(other, name, None)) - - return self - - Series.__finalize__ = finalize - - result = pd.concat([o, o2]) - self.assertEqual(result.filename, 'foo+bar') - self.assertIsNone(result.name) - - # reset - Series._metadata = _metadata - Series.__finalize__ = _finalize - - def test_describe(self): - self.series.describe() - self.ts.describe() - - def test_describe_objects(self): - s = Series(['a', 'b', 'b', np.nan, np.nan, np.nan, 'c', 'd', 'a', 'a']) - result = s.describe() - expected = Series({'count': 7, 'unique': 4, - 'top': 'a', 'freq': 3, 'second': 'b', - 'second_freq': 2}, index=result.index) - assert_series_equal(result, expected) - - dt = list(self.ts.index) - dt.append(dt[0]) - ser = Series(dt) - rs = ser.describe() - min_date = min(dt) - max_date = max(dt) - xp = Series({'count': len(dt), - 'unique': len(self.ts.index), - 'first': min_date, 'last': max_date, 'freq': 2, - 'top': min_date}, index=rs.index) - assert_series_equal(rs, xp) - - def test_describe_empty(self): - result = pd.Series().describe() - - self.assertEqual(result['count'], 0) - self.assertTrue(result.drop('count').isnull().all()) - - nanSeries = Series([np.nan]) - nanSeries.name = 'NaN' - result = nanSeries.describe() - self.assertEqual(result['count'], 0) - self.assertTrue(result.drop('count').isnull().all()) - - def test_describe_none(self): - noneSeries = Series([None]) - noneSeries.name = 'None' - expected = Series([0, 0], index=['count', 'unique'], name='None') - assert_series_equal(noneSeries.describe(), expected) - - def test_to_xarray(self): - - tm._skip_if_no_xarray() - import xarray - from xarray import DataArray - - s = Series([]) - s.index.name = 'foo' - result = s.to_xarray() - self.assertEqual(len(result), 0) - self.assertEqual(len(result.coords), 1) - assert_almost_equal(list(result.coords.keys()), ['foo']) - self.assertIsInstance(result, DataArray) - - def testit(index, check_index_type=True, check_categorical=True): - s = Series(range(6), index=index(6)) - s.index.name = 'foo' - result = s.to_xarray() - repr(result) - self.assertEqual(len(result), 6) - self.assertEqual(len(result.coords), 1) - assert_almost_equal(list(result.coords.keys()), ['foo']) - self.assertIsInstance(result, DataArray) - - # idempotency - assert_series_equal(result.to_series(), s, - check_index_type=check_index_type, - check_categorical=check_categorical) - - l = [tm.makeFloatIndex, tm.makeIntIndex, - tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeTimedeltaIndex] - - if LooseVersion(xarray.__version__) >= '0.8.0': - l.append(tm.makeCategoricalIndex) - - for index in l: - testit(index) - - s = Series(range(6)) - s.index.name = 'foo' - s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)], - names=['one', 'two']) - result = s.to_xarray() - self.assertEqual(len(result), 2) - assert_almost_equal(list(result.coords.keys()), ['one', 'two']) - self.assertIsInstance(result, DataArray) - assert_series_equal(result.to_series(), s) - - -class TestDataFrame(tm.TestCase, Generic): - _typ = DataFrame - _comparator = lambda self, x, y: assert_frame_equal(x, y) - - def test_rename_mi(self): - df = DataFrame([ - 11, 21, 31 - ], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]])) - df.rename(str.lower) - - def test_set_axis_name(self): - df = pd.DataFrame([[1, 2], [3, 4]]) - funcs = ['_set_axis_name', 'rename_axis'] - for func in funcs: - result = methodcaller(func, 'foo')(df) - self.assertTrue(df.index.name is None) - self.assertEqual(result.index.name, 'foo') - - result = methodcaller(func, 'cols', axis=1)(df) - self.assertTrue(df.columns.name is None) - self.assertEqual(result.columns.name, 'cols') - - def test_set_axis_name_mi(self): - df = DataFrame( - np.empty((3, 3)), - index=MultiIndex.from_tuples([("A", x) for x in list('aBc')]), - columns=MultiIndex.from_tuples([('C', x) for x in list('xyz')]) - ) - - level_names = ['L1', 'L2'] - funcs = ['_set_axis_name', 'rename_axis'] - for func in funcs: - result = methodcaller(func, level_names)(df) - self.assertEqual(result.index.names, level_names) - self.assertEqual(result.columns.names, [None, None]) - - result = methodcaller(func, level_names, axis=1)(df) - self.assertEqual(result.columns.names, ["L1", "L2"]) - self.assertEqual(result.index.names, [None, None]) - - def test_nonzero_single_element(self): - - # allow single item via bool method - df = DataFrame([[True]]) - self.assertTrue(df.bool()) - - df = DataFrame([[False]]) - self.assertFalse(df.bool()) - - df = DataFrame([[False, False]]) - self.assertRaises(ValueError, lambda: df.bool()) - self.assertRaises(ValueError, lambda: bool(df)) - - def test_get_numeric_data_preserve_dtype(self): - - # get the numeric data - o = DataFrame({'A': [1, '2', 3.]}) - result = o._get_numeric_data() - expected = DataFrame(index=[0, 1, 2], dtype=object) - self._compare(result, expected) - - def test_describe(self): - tm.makeDataFrame().describe() - tm.makeMixedDataFrame().describe() - tm.makeTimeDataFrame().describe() - - def test_describe_percentiles_percent_or_raw(self): - msg = 'percentiles should all be in the interval \\[0, 1\\]' - - df = tm.makeDataFrame() - with tm.assertRaisesRegexp(ValueError, msg): - df.describe(percentiles=[10, 50, 100]) - - with tm.assertRaisesRegexp(ValueError, msg): - df.describe(percentiles=[2]) - - with tm.assertRaisesRegexp(ValueError, msg): - df.describe(percentiles=[-2]) - - def test_describe_percentiles_equivalence(self): - df = tm.makeDataFrame() - d1 = df.describe() - d2 = df.describe(percentiles=[.25, .75]) - assert_frame_equal(d1, d2) - - def test_describe_percentiles_insert_median(self): - df = tm.makeDataFrame() - d1 = df.describe(percentiles=[.25, .75]) - d2 = df.describe(percentiles=[.25, .5, .75]) - assert_frame_equal(d1, d2) - self.assertTrue('25%' in d1.index) - self.assertTrue('75%' in d2.index) - - # none above - d1 = df.describe(percentiles=[.25, .45]) - d2 = df.describe(percentiles=[.25, .45, .5]) - assert_frame_equal(d1, d2) - self.assertTrue('25%' in d1.index) - self.assertTrue('45%' in d2.index) - - # none below - d1 = df.describe(percentiles=[.75, 1]) - d2 = df.describe(percentiles=[.5, .75, 1]) - assert_frame_equal(d1, d2) - self.assertTrue('75%' in d1.index) - self.assertTrue('100%' in d2.index) - - # edge - d1 = df.describe(percentiles=[0, 1]) - d2 = df.describe(percentiles=[0, .5, 1]) - assert_frame_equal(d1, d2) - self.assertTrue('0%' in d1.index) - self.assertTrue('100%' in d2.index) - - def test_describe_percentiles_insert_median_ndarray(self): - # GH14908 - df = tm.makeDataFrame() - result = df.describe(percentiles=np.array([.25, .75])) - expected = df.describe(percentiles=[.25, .75]) - assert_frame_equal(result, expected) - - def test_describe_percentiles_unique(self): - # GH13104 - df = tm.makeDataFrame() - with self.assertRaises(ValueError): - df.describe(percentiles=[0.1, 0.2, 0.4, 0.5, 0.2, 0.6]) - with self.assertRaises(ValueError): - df.describe(percentiles=[0.1, 0.2, 0.4, 0.2, 0.6]) - - def test_describe_percentiles_formatting(self): - # GH13104 - df = tm.makeDataFrame() - - # default - result = df.describe().index - expected = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', - 'max'], - dtype='object') - tm.assert_index_equal(result, expected) - - result = df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, - 0.9995, 0.9999]).index - expected = Index(['count', 'mean', 'std', 'min', '0.01%', '0.05%', - '0.1%', '50%', '99.9%', '99.95%', '99.99%', 'max'], - dtype='object') - tm.assert_index_equal(result, expected) - - result = df.describe(percentiles=[0.00499, 0.005, 0.25, 0.50, - 0.75]).index - expected = Index(['count', 'mean', 'std', 'min', '0.499%', '0.5%', - '25%', '50%', '75%', 'max'], - dtype='object') - tm.assert_index_equal(result, expected) - - result = df.describe(percentiles=[0.00499, 0.01001, 0.25, 0.50, - 0.75]).index - expected = Index(['count', 'mean', 'std', 'min', '0.5%', '1.0%', - '25%', '50%', '75%', 'max'], - dtype='object') - tm.assert_index_equal(result, expected) - - def test_describe_column_index_type(self): - # GH13288 - df = pd.DataFrame([1, 2, 3, 4]) - df.columns = pd.Index([0], dtype=object) - result = df.describe().columns - expected = Index([0], dtype=object) - tm.assert_index_equal(result, expected) - - df = pd.DataFrame({'A': list("BCDE"), 0: [1, 2, 3, 4]}) - result = df.describe().columns - expected = Index([0], dtype=object) - tm.assert_index_equal(result, expected) - - def test_describe_no_numeric(self): - df = DataFrame({'A': ['foo', 'foo', 'bar'] * 8, - 'B': ['a', 'b', 'c', 'd'] * 6}) - desc = df.describe() - expected = DataFrame(dict((k, v.describe()) - for k, v in compat.iteritems(df)), - columns=df.columns) - assert_frame_equal(desc, expected) - - ts = tm.makeTimeSeries() - df = DataFrame({'time': ts.index}) - desc = df.describe() - self.assertEqual(desc.time['first'], min(ts.index)) - - def test_describe_empty(self): - df = DataFrame() - tm.assertRaisesRegexp(ValueError, 'DataFrame without columns', - df.describe) - - df = DataFrame(columns=['A', 'B']) - result = df.describe() - expected = DataFrame(0, columns=['A', 'B'], index=['count', 'unique']) - tm.assert_frame_equal(result, expected) - - def test_describe_empty_int_columns(self): - df = DataFrame([[0, 1], [1, 2]]) - desc = df[df[0] < 0].describe() # works - assert_series_equal(desc.xs('count'), - Series([0, 0], dtype=float, name='count')) - self.assertTrue(isnull(desc.iloc[1:]).all().all()) - - def test_describe_objects(self): - df = DataFrame({"C1": ['a', 'a', 'c'], "C2": ['d', 'd', 'f']}) - result = df.describe() - expected = DataFrame({"C1": [3, 2, 'a', 2], "C2": [3, 2, 'd', 2]}, - index=['count', 'unique', 'top', 'freq']) - assert_frame_equal(result, expected) - - df = DataFrame({"C1": pd.date_range('2010-01-01', periods=4, freq='D') - }) - df.loc[4] = pd.Timestamp('2010-01-04') - result = df.describe() - expected = DataFrame({"C1": [5, 4, pd.Timestamp('2010-01-04'), 2, - pd.Timestamp('2010-01-01'), - pd.Timestamp('2010-01-04')]}, - index=['count', 'unique', 'top', 'freq', - 'first', 'last']) - assert_frame_equal(result, expected) - - # mix time and str - df['C2'] = ['a', 'a', 'b', 'c', 'a'] - result = df.describe() - expected['C2'] = [5, 3, 'a', 3, np.nan, np.nan] - assert_frame_equal(result, expected) - - # just str - expected = DataFrame({'C2': [5, 3, 'a', 4]}, - index=['count', 'unique', 'top', 'freq']) - result = df[['C2']].describe() - - # mix of time, str, numeric - df['C3'] = [2, 4, 6, 8, 2] - result = df.describe() - expected = DataFrame({"C3": [5., 4.4, 2.607681, 2., 2., 4., 6., 8.]}, - index=['count', 'mean', 'std', 'min', '25%', - '50%', '75%', 'max']) - assert_frame_equal(result, expected) - assert_frame_equal(df.describe(), df[['C3']].describe()) - - assert_frame_equal(df[['C1', 'C3']].describe(), df[['C3']].describe()) - assert_frame_equal(df[['C2', 'C3']].describe(), df[['C3']].describe()) - - def test_describe_typefiltering(self): - df = DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, - 'catB': ['a', 'b', 'c', 'd'] * 6, - 'numC': np.arange(24, dtype='int64'), - 'numD': np.arange(24.) + .5, - 'ts': tm.makeTimeSeries()[:24].index}) - - descN = df.describe() - expected_cols = ['numC', 'numD', ] - expected = DataFrame(dict((k, df[k].describe()) - for k in expected_cols), - columns=expected_cols) - assert_frame_equal(descN, expected) - - desc = df.describe(include=['number']) - assert_frame_equal(desc, descN) - desc = df.describe(exclude=['object', 'datetime']) - assert_frame_equal(desc, descN) - desc = df.describe(include=['float']) - assert_frame_equal(desc, descN.drop('numC', 1)) - - descC = df.describe(include=['O']) - expected_cols = ['catA', 'catB'] - expected = DataFrame(dict((k, df[k].describe()) - for k in expected_cols), - columns=expected_cols) - assert_frame_equal(descC, expected) - - descD = df.describe(include=['datetime']) - assert_series_equal(descD.ts, df.ts.describe()) - - desc = df.describe(include=['object', 'number', 'datetime']) - assert_frame_equal(desc.loc[:, ["numC", "numD"]].dropna(), descN) - assert_frame_equal(desc.loc[:, ["catA", "catB"]].dropna(), descC) - descDs = descD.sort_index() # the index order change for mixed-types - assert_frame_equal(desc.loc[:, "ts":].dropna().sort_index(), descDs) - - desc = df.loc[:, 'catA':'catB'].describe(include='all') - assert_frame_equal(desc, descC) - desc = df.loc[:, 'numC':'numD'].describe(include='all') - assert_frame_equal(desc, descN) - - desc = df.describe(percentiles=[], include='all') - cnt = Series(data=[4, 4, 6, 6, 6], - index=['catA', 'catB', 'numC', 'numD', 'ts']) - assert_series_equal(desc.count(), cnt) - self.assertTrue('count' in desc.index) - self.assertTrue('unique' in desc.index) - self.assertTrue('50%' in desc.index) - self.assertTrue('first' in desc.index) - - desc = df.drop("ts", 1).describe(percentiles=[], include='all') - assert_series_equal(desc.count(), cnt.drop("ts")) - self.assertTrue('first' not in desc.index) - desc = df.drop(["numC", "numD"], 1).describe(percentiles=[], - include='all') - assert_series_equal(desc.count(), cnt.drop(["numC", "numD"])) - self.assertTrue('50%' not in desc.index) - - def test_describe_typefiltering_category_bool(self): - df = DataFrame({'A_cat': pd.Categorical(['foo', 'foo', 'bar'] * 8), - 'B_str': ['a', 'b', 'c', 'd'] * 6, - 'C_bool': [True] * 12 + [False] * 12, - 'D_num': np.arange(24.) + .5, - 'E_ts': tm.makeTimeSeries()[:24].index}) - - desc = df.describe() - expected_cols = ['D_num'] - expected = DataFrame(dict((k, df[k].describe()) - for k in expected_cols), - columns=expected_cols) - assert_frame_equal(desc, expected) - - desc = df.describe(include=["category"]) - self.assertTrue(desc.columns.tolist() == ["A_cat"]) - - # 'all' includes numpy-dtypes + category - desc1 = df.describe(include="all") - desc2 = df.describe(include=[np.generic, "category"]) - assert_frame_equal(desc1, desc2) - - def test_describe_timedelta(self): - df = DataFrame({"td": pd.to_timedelta(np.arange(24) % 20, "D")}) - self.assertTrue(df.describe().loc["mean"][0] == pd.to_timedelta( - "8d4h")) - - def test_describe_typefiltering_dupcol(self): - df = DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, - 'catB': ['a', 'b', 'c', 'd'] * 6, - 'numC': np.arange(24), - 'numD': np.arange(24.) + .5, - 'ts': tm.makeTimeSeries()[:24].index}) - s = df.describe(include='all').shape[1] - df = pd.concat([df, df], axis=1) - s2 = df.describe(include='all').shape[1] - self.assertTrue(s2 == 2 * s) - - def test_describe_typefiltering_groupby(self): - df = DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, - 'catB': ['a', 'b', 'c', 'd'] * 6, - 'numC': np.arange(24), - 'numD': np.arange(24.) + .5, - 'ts': tm.makeTimeSeries()[:24].index}) - G = df.groupby('catA') - self.assertTrue(G.describe(include=['number']).shape == (2, 16)) - self.assertTrue(G.describe(include=['number', 'object']).shape == (2, - 33)) - self.assertTrue(G.describe(include='all').shape == (2, 52)) - - def test_describe_multi_index_df_column_names(self): - """ Test that column names persist after the describe operation.""" - - df = pd.DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - # GH 11517 - # test for hierarchical index - hierarchical_index_df = df.groupby(['A', 'B']).mean().T - self.assertTrue(hierarchical_index_df.columns.names == ['A', 'B']) - self.assertTrue(hierarchical_index_df.describe().columns.names == - ['A', 'B']) - - # test for non-hierarchical index - non_hierarchical_index_df = df.groupby(['A']).mean().T - self.assertTrue(non_hierarchical_index_df.columns.names == ['A']) - self.assertTrue(non_hierarchical_index_df.describe().columns.names == - ['A']) - - def test_metadata_propagation_indiv(self): - - # groupby - df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - result = df.groupby('A').sum() - self.check_metadata(df, result) - - # resample - df = DataFrame(np.random.randn(1000, 2), - index=date_range('20130101', periods=1000, freq='s')) - result = df.resample('1T') - self.check_metadata(df, result) - - # merging with override - # GH 6923 - _metadata = DataFrame._metadata - _finalize = DataFrame.__finalize__ - - np.random.seed(10) - df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['a', 'b']) - df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['c', 'd']) - DataFrame._metadata = ['filename'] - df1.filename = 'fname1.csv' - df2.filename = 'fname2.csv' - - def finalize(self, other, method=None, **kwargs): - - for name in self._metadata: - if method == 'merge': - left, right = other.left, other.right - value = getattr(left, name, '') + '|' + getattr(right, - name, '') - object.__setattr__(self, name, value) - else: - object.__setattr__(self, name, getattr(other, name, '')) - - return self - - DataFrame.__finalize__ = finalize - result = df1.merge(df2, left_on=['a'], right_on=['c'], how='inner') - self.assertEqual(result.filename, 'fname1.csv|fname2.csv') - - # concat - # GH 6927 - DataFrame._metadata = ['filename'] - df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list('ab')) - df1.filename = 'foo' - - def finalize(self, other, method=None, **kwargs): - for name in self._metadata: - if method == 'concat': - value = '+'.join([getattr( - o, name) for o in other.objs if getattr(o, name, None) - ]) - object.__setattr__(self, name, value) - else: - object.__setattr__(self, name, getattr(other, name, None)) - - return self - - DataFrame.__finalize__ = finalize - - result = pd.concat([df1, df1]) - self.assertEqual(result.filename, 'foo+foo') - - # reset - DataFrame._metadata = _metadata - DataFrame.__finalize__ = _finalize - - def test_tz_convert_and_localize(self): - l0 = date_range('20140701', periods=5, freq='D') - - # TODO: l1 should be a PeriodIndex for testing - # after GH2106 is addressed - with tm.assertRaises(NotImplementedError): - period_range('20140701', periods=1).tz_convert('UTC') - with tm.assertRaises(NotImplementedError): - period_range('20140701', periods=1).tz_localize('UTC') - # l1 = period_range('20140701', periods=5, freq='D') - l1 = date_range('20140701', periods=5, freq='D') - - int_idx = Index(range(5)) - - for fn in ['tz_localize', 'tz_convert']: - - if fn == 'tz_convert': - l0 = l0.tz_localize('UTC') - l1 = l1.tz_localize('UTC') - - for idx in [l0, l1]: - - l0_expected = getattr(idx, fn)('US/Pacific') - l1_expected = getattr(idx, fn)('US/Pacific') - - df1 = DataFrame(np.ones(5), index=l0) - df1 = getattr(df1, fn)('US/Pacific') - self.assert_index_equal(df1.index, l0_expected) - - # MultiIndex - # GH7846 - df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) - - df3 = getattr(df2, fn)('US/Pacific', level=0) - self.assertFalse(df3.index.levels[0].equals(l0)) - self.assert_index_equal(df3.index.levels[0], l0_expected) - self.assert_index_equal(df3.index.levels[1], l1) - self.assertFalse(df3.index.levels[1].equals(l1_expected)) - - df3 = getattr(df2, fn)('US/Pacific', level=1) - self.assert_index_equal(df3.index.levels[0], l0) - self.assertFalse(df3.index.levels[0].equals(l0_expected)) - self.assert_index_equal(df3.index.levels[1], l1_expected) - self.assertFalse(df3.index.levels[1].equals(l1)) - - df4 = DataFrame(np.ones(5), - MultiIndex.from_arrays([int_idx, l0])) - - # TODO: untested - df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa - - self.assert_index_equal(df3.index.levels[0], l0) - self.assertFalse(df3.index.levels[0].equals(l0_expected)) - self.assert_index_equal(df3.index.levels[1], l1_expected) - self.assertFalse(df3.index.levels[1].equals(l1)) - - # Bad Inputs - for fn in ['tz_localize', 'tz_convert']: - # Not DatetimeIndex / PeriodIndex - with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'): - df = DataFrame(index=int_idx) - df = getattr(df, fn)('US/Pacific') - - # Not DatetimeIndex / PeriodIndex - with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'): - df = DataFrame(np.ones(5), - MultiIndex.from_arrays([int_idx, l0])) - df = getattr(df, fn)('US/Pacific', level=0) - - # Invalid level - with tm.assertRaisesRegexp(ValueError, 'not valid'): - df = DataFrame(index=l0) - df = getattr(df, fn)('US/Pacific', level=1) - - def test_set_attribute(self): - # Test for consistent setattr behavior when an attribute and a column - # have the same name (Issue #8994) - df = DataFrame({'x': [1, 2, 3]}) - - df.y = 2 - df['y'] = [2, 4, 6] - df.y = 5 - - self.assertEqual(df.y, 5) - assert_series_equal(df['y'], Series([2, 4, 6], name='y')) - - def test_pct_change(self): - # GH 11150 - pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( - 0, 40, 10)]).astype(np.float64) - pnl.iat[1, 0] = np.nan - pnl.iat[1, 1] = np.nan - pnl.iat[2, 3] = 60 - - mask = pnl.isnull() - - for axis in range(2): - expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( - axis=axis) - 1 - expected[mask] = np.nan - result = pnl.pct_change(axis=axis, fill_method='pad') - - self.assert_frame_equal(result, expected) - - def test_to_xarray(self): - - tm._skip_if_no_xarray() - from xarray import Dataset - - df = DataFrame({'a': list('abc'), - 'b': list(range(1, 4)), - 'c': np.arange(3, 6).astype('u1'), - 'd': np.arange(4.0, 7.0, dtype='float64'), - 'e': [True, False, True], - 'f': pd.Categorical(list('abc')), - 'g': pd.date_range('20130101', periods=3), - 'h': pd.date_range('20130101', - periods=3, - tz='US/Eastern')} - ) - - df.index.name = 'foo' - result = df[0:0].to_xarray() - self.assertEqual(result.dims['foo'], 0) - self.assertIsInstance(result, Dataset) - - for index in [tm.makeFloatIndex, tm.makeIntIndex, - tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeCategoricalIndex, tm.makeTimedeltaIndex]: - df.index = index(3) - df.index.name = 'foo' - df.columns.name = 'bar' - result = df.to_xarray() - self.assertEqual(result.dims['foo'], 3) - self.assertEqual(len(result.coords), 1) - self.assertEqual(len(result.data_vars), 8) - assert_almost_equal(list(result.coords.keys()), ['foo']) - self.assertIsInstance(result, Dataset) - - # idempotency - # categoricals are not preserved - # datetimes w/tz are not preserved - # column names are lost - expected = df.copy() - expected['f'] = expected['f'].astype(object) - expected['h'] = expected['h'].astype('datetime64[ns]') - expected.columns.name = None - assert_frame_equal(result.to_dataframe(), expected, - check_index_type=False, check_categorical=False) - - # available in 0.7.1 - # MultiIndex - df.index = pd.MultiIndex.from_product([['a'], range(3)], - names=['one', 'two']) - result = df.to_xarray() - self.assertEqual(result.dims['one'], 1) - self.assertEqual(result.dims['two'], 3) - self.assertEqual(len(result.coords), 2) - self.assertEqual(len(result.data_vars), 8) - assert_almost_equal(list(result.coords.keys()), ['one', 'two']) - self.assertIsInstance(result, Dataset) - - result = result.to_dataframe() - expected = df.copy() - expected['f'] = expected['f'].astype(object) - expected['h'] = expected['h'].astype('datetime64[ns]') - expected.columns.name = None - assert_frame_equal(result, - expected, - check_index_type=False) - - -class TestPanel(tm.TestCase, Generic): - _typ = Panel - _comparator = lambda self, x, y: assert_panel_equal(x, y, by_blocks=True) - - def test_to_xarray(self): - - tm._skip_if_no_xarray() - from xarray import DataArray - - p = tm.makePanel() - - result = p.to_xarray() - self.assertIsInstance(result, DataArray) - self.assertEqual(len(result.coords), 3) - assert_almost_equal(list(result.coords.keys()), - ['items', 'major_axis', 'minor_axis']) - self.assertEqual(len(result.dims), 3) - - # idempotency - assert_panel_equal(result.to_pandas(), p) - - -class TestPanel4D(tm.TestCase, Generic): - _typ = Panel4D - _comparator = lambda self, x, y: assert_panel4d_equal(x, y, by_blocks=True) - - def test_sample(self): - pytest.skip("sample on Panel4D") - - def test_to_xarray(self): - - tm._skip_if_no_xarray() - from xarray import DataArray - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p = tm.makePanel4D() - - result = p.to_xarray() - self.assertIsInstance(result, DataArray) - self.assertEqual(len(result.coords), 4) - assert_almost_equal(list(result.coords.keys()), - ['labels', 'items', 'major_axis', - 'minor_axis']) - self.assertEqual(len(result.dims), 4) - - # non-convertible - self.assertRaises(ValueError, lambda: result.to_pandas()) - -# run all the tests, but wrap each in a warning catcher -for t in ['test_rename', 'test_rename_axis', 'test_get_numeric_data', - 'test_get_default', 'test_nonzero', - 'test_numpy_1_7_compat_numeric_methods', - 'test_downcast', 'test_constructor_compound_dtypes', - 'test_head_tail', - 'test_size_compat', 'test_split_compat', - 'test_unexpected_keyword', - 'test_stat_unexpected_keyword', 'test_api_compat', - 'test_stat_non_defaults_args', - 'test_clip', 'test_truncate_out_of_bounds', 'test_numpy_clip', - 'test_metadata_propagation']: - - def f(): - def tester(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - return getattr(super(TestPanel4D, self), t)() - return tester - - setattr(TestPanel4D, t, f()) - - -class TestNDFrame(tm.TestCase): - # tests that don't fit elsewhere - - def test_sample(sel): - # Fixes issue: 2419 - # additional specific object based tests - - # A few dataframe test with degenerate weights. - easy_weight_list = [0] * 10 - easy_weight_list[5] = 1 - - df = pd.DataFrame({'col1': range(10, 20), - 'col2': range(20, 30), - 'colString': ['a'] * 10, - 'easyweights': easy_weight_list}) - sample1 = df.sample(n=1, weights='easyweights') - assert_frame_equal(sample1, df.iloc[5:6]) - - # Ensure proper error if string given as weight for Series, panel, or - # DataFrame with axis = 1. - s = Series(range(10)) - with tm.assertRaises(ValueError): - s.sample(n=3, weights='weight_column') - - panel = pd.Panel(items=[0, 1, 2], major_axis=[2, 3, 4], - minor_axis=[3, 4, 5]) - with tm.assertRaises(ValueError): - panel.sample(n=1, weights='weight_column') - - with tm.assertRaises(ValueError): - df.sample(n=1, weights='weight_column', axis=1) - - # Check weighting key error - with tm.assertRaises(KeyError): - df.sample(n=3, weights='not_a_real_column_name') - - # Check that re-normalizes weights that don't sum to one. - weights_less_than_1 = [0] * 10 - weights_less_than_1[0] = 0.5 - tm.assert_frame_equal( - df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) - - ### - # Test axis argument - ### - - # Test axis argument - df = pd.DataFrame({'col1': range(10), 'col2': ['a'] * 10}) - second_column_weight = [0, 1] - assert_frame_equal( - df.sample(n=1, axis=1, weights=second_column_weight), df[['col2']]) - - # Different axis arg types - assert_frame_equal(df.sample(n=1, axis='columns', - weights=second_column_weight), - df[['col2']]) - - weight = [0] * 10 - weight[5] = 0.5 - assert_frame_equal(df.sample(n=1, axis='rows', weights=weight), - df.iloc[5:6]) - assert_frame_equal(df.sample(n=1, axis='index', weights=weight), - df.iloc[5:6]) - - # Check out of range axis values - with tm.assertRaises(ValueError): - df.sample(n=1, axis=2) - - with tm.assertRaises(ValueError): - df.sample(n=1, axis='not_a_name') - - with tm.assertRaises(ValueError): - s = pd.Series(range(10)) - s.sample(n=1, axis=1) - - # Test weight length compared to correct axis - with tm.assertRaises(ValueError): - df.sample(n=1, axis=1, weights=[0.5] * 10) - - # Check weights with axis = 1 - easy_weight_list = [0] * 3 - easy_weight_list[2] = 1 - - df = pd.DataFrame({'col1': range(10, 20), - 'col2': range(20, 30), - 'colString': ['a'] * 10}) - sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) - assert_frame_equal(sample1, df[['colString']]) - - # Test default axes - p = pd.Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6], - minor_axis=[1, 3, 5]) - assert_panel_equal( - p.sample(n=3, random_state=42), p.sample(n=3, axis=1, - random_state=42)) - assert_frame_equal( - df.sample(n=3, random_state=42), df.sample(n=3, axis=0, - random_state=42)) - - # Test that function aligns weights with frame - df = DataFrame( - {'col1': [5, 6, 7], - 'col2': ['a', 'b', 'c'], }, index=[9, 5, 3]) - s = Series([1, 0, 0], index=[3, 5, 9]) - assert_frame_equal(df.loc[[3]], df.sample(1, weights=s)) - - # Weights have index values to be dropped because not in - # sampled DataFrame - s2 = Series([0.001, 0, 10000], index=[3, 5, 10]) - assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2)) - - # Weights have empty values to be filed with zeros - s3 = Series([0.01, 0], index=[3, 5]) - assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3)) - - # No overlap in weight and sampled DataFrame indices - s4 = Series([1, 0], index=[1, 2]) - with tm.assertRaises(ValueError): - df.sample(1, weights=s4) - - def test_squeeze(self): - # noop - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries()]: - tm.assert_series_equal(s.squeeze(), s) - for df in [tm.makeTimeDataFrame()]: - tm.assert_frame_equal(df.squeeze(), df) - for p in [tm.makePanel()]: - tm.assert_panel_equal(p.squeeze(), p) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - for p4d in [tm.makePanel4D()]: - tm.assert_panel4d_equal(p4d.squeeze(), p4d) - - # squeezing - df = tm.makeTimeDataFrame().reindex(columns=['A']) - tm.assert_series_equal(df.squeeze(), df['A']) - - p = tm.makePanel().reindex(items=['ItemA']) - tm.assert_frame_equal(p.squeeze(), p['ItemA']) - - p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) - tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4d = tm.makePanel4D().reindex(labels=['label1']) - tm.assert_panel_equal(p4d.squeeze(), p4d['label1']) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4d = tm.makePanel4D().reindex(labels=['label1'], items=['ItemA']) - tm.assert_frame_equal(p4d.squeeze(), p4d.loc['label1', 'ItemA']) - - # don't fail with 0 length dimensions GH11229 & GH8999 - empty_series = pd.Series([], name='five') - empty_frame = pd.DataFrame([empty_series]) - empty_panel = pd.Panel({'six': empty_frame}) - - [tm.assert_series_equal(empty_series, higher_dim.squeeze()) - for higher_dim in [empty_series, empty_frame, empty_panel]] - - # axis argument - df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] - tm.assert_equal(df.shape, (1, 1)) - tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) - tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) - tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) - tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) - tm.assert_equal(df.squeeze(), df.iloc[0, 0]) - tm.assertRaises(ValueError, df.squeeze, axis=2) - tm.assertRaises(ValueError, df.squeeze, axis='x') - - df = tm.makeTimeDataFrame(3) - tm.assert_frame_equal(df.squeeze(axis=0), df) - - def test_numpy_squeeze(self): - s = tm.makeFloatSeries() - tm.assert_series_equal(np.squeeze(s), s) - - df = tm.makeTimeDataFrame().reindex(columns=['A']) - tm.assert_series_equal(np.squeeze(df), df['A']) - - def test_transpose(self): - msg = (r"transpose\(\) got multiple values for " - r"keyword argument 'axes'") - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries()]: - # calls implementation in pandas/core/base.py - tm.assert_series_equal(s.transpose(), s) - for df in [tm.makeTimeDataFrame()]: - tm.assert_frame_equal(df.transpose().transpose(), df) - for p in [tm.makePanel()]: - tm.assert_panel_equal(p.transpose(2, 0, 1) - .transpose(1, 2, 0), p) - tm.assertRaisesRegexp(TypeError, msg, p.transpose, - 2, 0, 1, axes=(2, 0, 1)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - for p4d in [tm.makePanel4D()]: - tm.assert_panel4d_equal(p4d.transpose(2, 0, 3, 1) - .transpose(1, 3, 0, 2), p4d) - tm.assertRaisesRegexp(TypeError, msg, p4d.transpose, - 2, 0, 3, 1, axes=(2, 0, 3, 1)) - - def test_numpy_transpose(self): - msg = "the 'axes' parameter is not supported" - - s = tm.makeFloatSeries() - tm.assert_series_equal( - np.transpose(s), s) - tm.assertRaisesRegexp(ValueError, msg, - np.transpose, s, axes=1) - - df = tm.makeTimeDataFrame() - tm.assert_frame_equal(np.transpose( - np.transpose(df)), df) - tm.assertRaisesRegexp(ValueError, msg, - np.transpose, df, axes=1) - - p = tm.makePanel() - tm.assert_panel_equal(np.transpose( - np.transpose(p, axes=(2, 0, 1)), - axes=(1, 2, 0)), p) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4d = tm.makePanel4D() - tm.assert_panel4d_equal(np.transpose( - np.transpose(p4d, axes=(2, 0, 3, 1)), - axes=(1, 3, 0, 2)), p4d) - - def test_take(self): - indices = [1, 5, -2, 6, 3, -1] - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries()]: - out = s.take(indices) - expected = Series(data=s.values.take(indices), - index=s.index.take(indices)) - tm.assert_series_equal(out, expected) - for df in [tm.makeTimeDataFrame()]: - out = df.take(indices) - expected = DataFrame(data=df.values.take(indices, axis=0), - index=df.index.take(indices), - columns=df.columns) - tm.assert_frame_equal(out, expected) - - indices = [-3, 2, 0, 1] - for p in [tm.makePanel()]: - out = p.take(indices) - expected = Panel(data=p.values.take(indices, axis=0), - items=p.items.take(indices), - major_axis=p.major_axis, - minor_axis=p.minor_axis) - tm.assert_panel_equal(out, expected) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - for p4d in [tm.makePanel4D()]: - out = p4d.take(indices) - expected = Panel4D(data=p4d.values.take(indices, axis=0), - labels=p4d.labels.take(indices), - major_axis=p4d.major_axis, - minor_axis=p4d.minor_axis, - items=p4d.items) - tm.assert_panel4d_equal(out, expected) - - def test_take_invalid_kwargs(self): - indices = [-3, 2, 0, 1] - s = tm.makeFloatSeries() - df = tm.makeTimeDataFrame() - p = tm.makePanel() - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4d = tm.makePanel4D() - - for obj in (s, df, p, p4d): - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, obj.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, obj.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, obj.take, - indices, mode='clip') - - def test_equals(self): - s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) - s2 = s1.copy() - self.assertTrue(s1.equals(s2)) - - s1[1] = 99 - self.assertFalse(s1.equals(s2)) - - # NaNs compare as equal - s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) - s2 = s1.copy() - self.assertTrue(s1.equals(s2)) - - s2[0] = 9.9 - self.assertFalse(s1.equals(s2)) - - idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) - s1 = Series([1, 2, np.nan], index=idx) - s2 = s1.copy() - self.assertTrue(s1.equals(s2)) - - # Add object dtype column with nans - index = np.random.random(10) - df1 = DataFrame( - np.random.random(10, ), index=index, columns=['floats']) - df1['text'] = 'the sky is so blue. we could use more chocolate.'.split( - ) - df1['start'] = date_range('2000-1-1', periods=10, freq='T') - df1['end'] = date_range('2000-1-1', periods=10, freq='D') - df1['diff'] = df1['end'] - df1['start'] - df1['bool'] = (np.arange(10) % 3 == 0) - df1.loc[::2] = nan - df2 = df1.copy() - self.assertTrue(df1['text'].equals(df2['text'])) - self.assertTrue(df1['start'].equals(df2['start'])) - self.assertTrue(df1['end'].equals(df2['end'])) - self.assertTrue(df1['diff'].equals(df2['diff'])) - self.assertTrue(df1['bool'].equals(df2['bool'])) - self.assertTrue(df1.equals(df2)) - self.assertFalse(df1.equals(object)) - - # different dtype - different = df1.copy() - different['floats'] = different['floats'].astype('float32') - self.assertFalse(df1.equals(different)) - - # different index - different_index = -index - different = df2.set_index(different_index) - self.assertFalse(df1.equals(different)) - - # different columns - different = df2.copy() - different.columns = df2.columns[::-1] - self.assertFalse(df1.equals(different)) - - # DatetimeIndex - index = pd.date_range('2000-1-1', periods=10, freq='T') - df1 = df1.set_index(index) - df2 = df1.copy() - self.assertTrue(df1.equals(df2)) - - # MultiIndex - df3 = df1.set_index(['text'], append=True) - df2 = df1.set_index(['text'], append=True) - self.assertTrue(df3.equals(df2)) - - df2 = df1.set_index(['floats'], append=True) - self.assertFalse(df3.equals(df2)) - - # NaN in index - df3 = df1.set_index(['floats'], append=True) - df2 = df1.set_index(['floats'], append=True) - self.assertTrue(df3.equals(df2)) - - # GH 8437 - a = pd.Series([False, np.nan]) - b = pd.Series([False, np.nan]) - c = pd.Series(index=range(2)) - d = pd.Series(index=range(2)) - e = pd.Series(index=range(2)) - f = pd.Series(index=range(2)) - c[:-1] = d[:-1] = e[0] = f[0] = False - self.assertTrue(a.equals(a)) - self.assertTrue(a.equals(b)) - self.assertTrue(a.equals(c)) - self.assertTrue(a.equals(d)) - self.assertFalse(a.equals(e)) - self.assertTrue(e.equals(f)) - - def test_describe_raises(self): - with tm.assertRaises(NotImplementedError): - tm.makePanel().describe() - - def test_pipe(self): - df = DataFrame({'A': [1, 2, 3]}) - f = lambda x, y: x ** y - result = df.pipe(f, 2) - expected = DataFrame({'A': [1, 4, 9]}) - self.assert_frame_equal(result, expected) - - result = df.A.pipe(f, 2) - self.assert_series_equal(result, expected.A) - - def test_pipe_tuple(self): - df = DataFrame({'A': [1, 2, 3]}) - f = lambda x, y: y - result = df.pipe((f, 'y'), 0) - self.assert_frame_equal(result, df) - - result = df.A.pipe((f, 'y'), 0) - self.assert_series_equal(result, df.A) - - def test_pipe_tuple_error(self): - df = DataFrame({"A": [1, 2, 3]}) - f = lambda x, y: y - with tm.assertRaises(ValueError): - df.pipe((f, 'y'), x=1, y=0) - - with tm.assertRaises(ValueError): - df.A.pipe((f, 'y'), x=1, y=0) - - def test_pipe_panel(self): - wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})}) - f = lambda x, y: x + y - result = wp.pipe(f, 2) - expected = wp + 2 - assert_panel_equal(result, expected) - - result = wp.pipe((f, 'y'), x=1) - expected = wp + 1 - assert_panel_equal(result, expected) - - with tm.assertRaises(ValueError): - result = wp.pipe((f, 'y'), x=1, y=1) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 2a16d7663b0cf..af946436b55c7 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- import numpy as np -from pandas import Index +from pandas import Index, DataFrame, Categorical, merge -import pandas._join as _join +from pandas._libs import join as _join import pandas.util.testing as tm -from pandas.util.testing import assert_almost_equal +from pandas.util.testing import assert_almost_equal, assert_frame_equal -class TestIndexer(tm.TestCase): +class TestIndexer(object): def test_outer_join_indexer(self): typemap = [('int32', _join.outer_join_indexer_int32), @@ -23,9 +23,9 @@ def test_outer_join_indexer(self): empty = np.array([], dtype=dtype) result, lindexer, rindexer = indexer(left, right) - tm.assertIsInstance(result, np.ndarray) - tm.assertIsInstance(lindexer, np.ndarray) - tm.assertIsInstance(rindexer, np.ndarray) + assert isinstance(result, np.ndarray) + assert isinstance(lindexer, np.ndarray) + assert isinstance(rindexer, np.ndarray) tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype)) exp = np.array([0, 1, 2, -1, -1], dtype=np.int64) tm.assert_numpy_array_equal(lindexer, exp) @@ -53,7 +53,7 @@ def test_left_join_indexer_unique(): result = _join.left_join_indexer_unique_int64(b, a) expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) - assert (np.array_equal(result, expected)) + tm.assert_numpy_array_equal(result, expected) def test_left_outer_join_bug(): @@ -69,13 +69,14 @@ def test_left_outer_join_bug(): lidx, ridx = _join.left_outer_join(left, right, max_groups, sort=False) - exp_lidx = np.arange(len(left)) - exp_ridx = -np.ones(len(left)) + exp_lidx = np.arange(len(left), dtype=np.int64) + exp_ridx = -np.ones(len(left), dtype=np.int64) + exp_ridx[left == 1] = 1 exp_ridx[left == 3] = 0 - assert (np.array_equal(lidx, exp_lidx)) - assert (np.array_equal(ridx, exp_ridx)) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) def test_inner_join_indexer(): @@ -192,3 +193,43 @@ def test_inner_join_indexer2(): exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) assert_almost_equal(ridx, exp_ridx) + + +def test_merge_join_categorical_multiindex(): + # From issue 16627 + a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'], + ['a', 'b', 'c']), + 'Int1': [0, 1, 0, 1, 0, 0]} + a = DataFrame(a) + + b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'], + ['a', 'b', 'c']), + 'Int': [0, 0, 0, 1, 1, 1], + 'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]} + b = DataFrame(b).set_index(['Cat', 'Int'])['Factor'] + + expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'], + right_on=['Cat', 'Int'], how='left') + result = a.join(b, on=['Cat1', 'Int1']) + expected = expected.drop(['Cat', 'Int'], axis=1) + assert_frame_equal(expected, result) + + # Same test, but with ordered categorical + a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'], + ['b', 'a', 'c'], + ordered=True), + 'Int1': [0, 1, 0, 1, 0, 0]} + a = DataFrame(a) + + b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'], + ['b', 'a', 'c'], + ordered=True), + 'Int': [0, 0, 0, 1, 1, 1], + 'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]} + b = DataFrame(b).set_index(['Cat', 'Int'])['Factor'] + + expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'], + right_on=['Cat', 'Int'], how='left') + result = a.join(b, on=['Cat1', 'Int1']) + expected = expected.drop(['Cat', 'Int'], axis=1) + assert_frame_equal(expected, result) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 2381c52ef14b6..3e34b48fb6795 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -1,29 +1,31 @@ # -*- coding: utf-8 -*- +import pytest + import numpy as np -import pandas as pd -import pandas.lib as lib +from pandas import Index +from pandas._libs import lib, writers as libwriters import pandas.util.testing as tm -class TestMisc(tm.TestCase): +class TestMisc(object): def test_max_len_string_array(self): arr = a = np.array(['foo', 'b', np.nan], dtype='object') - self.assertTrue(lib.max_len_string_array(arr), 3) + assert libwriters.max_len_string_array(arr) == 3 # unicode arr = a.astype('U').astype(object) - self.assertTrue(lib.max_len_string_array(arr), 3) + assert libwriters.max_len_string_array(arr) == 3 # bytes for python3 arr = a.astype('S').astype(object) - self.assertTrue(lib.max_len_string_array(arr), 3) + assert libwriters.max_len_string_array(arr) == 3 # raises - tm.assertRaises(TypeError, - lambda: lib.max_len_string_array(arr.astype('U'))) + pytest.raises(TypeError, + lambda: libwriters.max_len_string_array(arr.astype('U'))) def test_fast_unique_multiple_list_gen_sort(self): keys = [['p', 'a'], ['n', 'd'], ['a', 's']] @@ -39,7 +41,7 @@ def test_fast_unique_multiple_list_gen_sort(self): tm.assert_numpy_array_equal(np.array(out), expected) -class TestIndexing(tm.TestCase): +class TestIndexing(object): def test_maybe_indices_to_slice_left_edge(self): target = np.arange(100) @@ -47,32 +49,36 @@ def test_maybe_indices_to_slice_left_edge(self): # slice indices = np.array([], dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) for end in [1, 2, 5, 20, 99]: for step in [1, 2, 4]: indices = np.arange(0, end, step, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], - target[maybe_slice]) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # reverse indices = indices[::-1] maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], - target[maybe_slice]) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # not slice for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]: indices = np.array(case, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertFalse(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(maybe_slice, indices) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) def test_maybe_indices_to_slice_right_edge(self): target = np.arange(100) @@ -82,42 +88,49 @@ def test_maybe_indices_to_slice_right_edge(self): for step in [1, 2, 4]: indices = np.arange(start, 99, step, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], - target[maybe_slice]) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # reverse indices = indices[::-1] maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], - target[maybe_slice]) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # not slice indices = np.array([97, 98, 99, 100], dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertFalse(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(maybe_slice, indices) - with self.assertRaises(IndexError): + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + + with pytest.raises(IndexError): target[indices] - with self.assertRaises(IndexError): + with pytest.raises(IndexError): target[maybe_slice] indices = np.array([100, 99, 98, 97], dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertFalse(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(maybe_slice, indices) - with self.assertRaises(IndexError): + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + + with pytest.raises(IndexError): target[indices] - with self.assertRaises(IndexError): + with pytest.raises(IndexError): target[maybe_slice] for case in [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]: indices = np.array(case, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertFalse(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(maybe_slice, indices) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) def test_maybe_indices_to_slice_both_edges(self): target = np.arange(10) @@ -126,22 +139,22 @@ def test_maybe_indices_to_slice_both_edges(self): for step in [1, 2, 4, 5, 8, 9]: indices = np.arange(0, 9, step, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) # reverse indices = indices[::-1] maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) # not slice for case in [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]: indices = np.array(case, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertFalse(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(maybe_slice, indices) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) def test_maybe_indices_to_slice_middle(self): target = np.arange(100) @@ -151,84 +164,43 @@ def test_maybe_indices_to_slice_middle(self): for step in [1, 2, 4, 20]: indices = np.arange(start, end, step, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], - target[maybe_slice]) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # reverse indices = indices[::-1] maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], - target[maybe_slice]) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # not slice for case in [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]: indices = np.array(case, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) - self.assertFalse(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(maybe_slice, indices) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) def test_maybe_booleans_to_slice(self): arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) result = lib.maybe_booleans_to_slice(arr) - self.assertTrue(result.dtype == np.bool_) + assert result.dtype == np.bool_ result = lib.maybe_booleans_to_slice(arr[:0]) - self.assertTrue(result == slice(0, 0)) + assert result == slice(0, 0) def test_get_reverse_indexer(self): indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.int64) result = lib.get_reverse_indexer(indexer, 5) expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) - self.assertTrue(np.array_equal(result, expected)) - - -class TestNullObj(tm.TestCase): - - _1d_methods = ['isnullobj', 'isnullobj_old'] - _2d_methods = ['isnullobj2d', 'isnullobj2d_old'] - - def _check_behavior(self, arr, expected): - for method in TestNullObj._1d_methods: - result = getattr(lib, method)(arr) - tm.assert_numpy_array_equal(result, expected) - - arr = np.atleast_2d(arr) - expected = np.atleast_2d(expected) - - for method in TestNullObj._2d_methods: - result = getattr(lib, method)(arr) - tm.assert_numpy_array_equal(result, expected) - - def test_basic(self): - arr = np.array([1, None, 'foo', -5.1, pd.NaT, np.nan]) - expected = np.array([False, True, False, False, True, True]) - - self._check_behavior(arr, expected) - - def test_non_obj_dtype(self): - arr = np.array([1, 3, np.nan, 5], dtype=float) - expected = np.array([False, False, True, False]) - - self._check_behavior(arr, expected) - - def test_empty_arr(self): - arr = np.array([]) - expected = np.array([], dtype=bool) - - self._check_behavior(arr, expected) - - def test_empty_str_inp(self): - arr = np.array([""]) # empty but not null - expected = np.array([False]) - - self._check_behavior(arr, expected) + tm.assert_numpy_array_equal(result, expected) - def test_empty_like(self): - # see gh-13717: no segfaults! - arr = np.empty_like([None]) - expected = np.array([True]) - self._check_behavior(arr, expected) +def test_cache_readonly_preserve_docstrings(): + # GH18197 + assert Index.hasnans.__doc__ is not None diff --git a/pandas/tests/test_msgpack/test_except.py b/pandas/tests/test_msgpack/test_except.py deleted file mode 100644 index 76b91bb375bbc..0000000000000 --- a/pandas/tests/test_msgpack/test_except.py +++ /dev/null @@ -1,33 +0,0 @@ -# coding: utf-8 - -import unittest -from pandas.msgpack import packb, unpackb - - -class DummyException(Exception): - pass - - -class TestExceptions(unittest.TestCase): - - def test_raise_on_find_unsupported_value(self): - import datetime - self.assertRaises(TypeError, packb, datetime.datetime.now()) - - def test_raise_from_object_hook(self): - def hook(obj): - raise DummyException - - self.assertRaises(DummyException, unpackb, packb({}), object_hook=hook) - self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), - object_hook=hook) - self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), - object_pairs_hook=hook) - self.assertRaises(DummyException, unpackb, - packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) - self.assertRaises(DummyException, unpackb, - packb({'fizz': {'buzz': 'spam'}}), - object_pairs_hook=hook) - - def test_invalidvalue(self): - self.assertRaises(ValueError, unpackb, b'\xd9\x97#DL_') diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py old mode 100755 new mode 100644 index 8e0628eefa392..79e05c90a21b0 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -4,28 +4,26 @@ import datetime import itertools import pytest +import pytz from numpy.random import randn import numpy as np from pandas.core.index import Index, MultiIndex -from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp +from pandas import Panel, DataFrame, Series, notna, isna, Timestamp -from pandas.types.common import is_float_dtype, is_integer_dtype -from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, assertRaisesRegexp) +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas.core.common as com import pandas.util.testing as tm from pandas.compat import (range, lrange, StringIO, lzip, u, product as cart_product, zip) import pandas as pd +import pandas._libs.index as _index -import pandas.index as _index +class Base(object): -class TestMultiLevel(tm.TestCase): - - def setUp(self): + def setup_method(self, method): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], @@ -58,6 +56,9 @@ def setUp(self): inplace=True) self.ymd.index.set_names(['year', 'month', 'day'], inplace=True) + +class TestMultiLevel(Base): + def test_append(self): a, b = self.frame[:5], self.frame[5:] @@ -68,8 +69,6 @@ def test_append(self): tm.assert_series_equal(result, self.frame['A']) def test_append_index(self): - tm._skip_if_no_pytz() - idx1 = Index([1.1, 1.2, 1.3]) idx2 = pd.date_range('2011-01-01', freq='D', periods=3, tz='Asia/Tokyo') @@ -80,58 +79,57 @@ def test_append_index(self): result = idx1.append(midx_lv2) - # GH 7112 - import pytz + # see gh-7112 tz = pytz.timezone('Asia/Tokyo') - expected_tuples = [(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz)), - (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz)), - (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz))] + expected_tuples = [(1.1, tz.localize(datetime.datetime(2011, 1, 1))), + (1.2, tz.localize(datetime.datetime(2011, 1, 2))), + (1.3, tz.localize(datetime.datetime(2011, 1, 3)))] expected = Index([1.1, 1.2, 1.3] + expected_tuples) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(idx1) expected = Index(expected_tuples + [1.1, 1.2, 1.3]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(midx_lv2) expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(midx_lv3) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv3.append(midx_lv2) expected = Index._simple_new( - np.array([(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz), 'A'), - (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz), 'B'), - (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz), 'C')] + + np.array([(1.1, tz.localize(datetime.datetime(2011, 1, 1)), 'A'), + (1.2, tz.localize(datetime.datetime(2011, 1, 2)), 'B'), + (1.3, tz.localize(datetime.datetime(2011, 1, 3)), 'C')] + expected_tuples), None) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_dataframe_constructor(self): multi = DataFrame(np.random.randn(4, 4), index=[np.array(['a', 'a', 'b', 'b']), np.array(['x', 'y', 'x', 'y'])]) - tm.assertIsInstance(multi.index, MultiIndex) - self.assertNotIsInstance(multi.columns, MultiIndex) + assert isinstance(multi.index, MultiIndex) + assert not isinstance(multi.columns, MultiIndex) multi = DataFrame(np.random.randn(4, 4), columns=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - tm.assertIsInstance(multi.columns, MultiIndex) + assert isinstance(multi.columns, MultiIndex) def test_series_constructor(self): multi = Series(1., index=[np.array(['a', 'a', 'b', 'b']), np.array( ['x', 'y', 'x', 'y'])]) - tm.assertIsInstance(multi.index, MultiIndex) + assert isinstance(multi.index, MultiIndex) multi = Series(1., index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - tm.assertIsInstance(multi.index, MultiIndex) + assert isinstance(multi.index, MultiIndex) multi = Series(lrange(4), index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - tm.assertIsInstance(multi.index, MultiIndex) + assert isinstance(multi.index, MultiIndex) def test_reindex_level(self): # axis=0 @@ -139,18 +137,18 @@ def test_reindex_level(self): result = month_sums.reindex(self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Series result = month_sums['A'].reindex(self.ymd.index, level=1) expected = self.ymd['A'].groupby(level='month').transform(np.sum) - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) # axis=1 month_sums = self.ymd.T.sum(axis=1, level='month') result = month_sums.reindex(columns=self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum).T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_binops_level(self): def _check_op(opname): @@ -160,7 +158,7 @@ def _check_op(opname): broadcasted = self.ymd.groupby(level='month').transform(np.sum) expected = op(self.ymd, broadcasted) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Series op = getattr(Series, opname) @@ -169,7 +167,7 @@ def _check_op(opname): np.sum) expected = op(self.ymd['A'], broadcasted) expected.name = 'A' - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) _check_op('sub') _check_op('add') @@ -178,8 +176,8 @@ def _check_op(opname): def test_pickle(self): def _test_roundtrip(frame): - unpickled = self.round_trip_pickle(frame) - assert_frame_equal(frame, unpickled) + unpickled = tm.round_trip_pickle(frame) + tm.assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) @@ -189,74 +187,30 @@ def _test_roundtrip(frame): def test_reindex(self): expected = self.frame.iloc[[0, 3]] reindexed = self.frame.loc[[('foo', 'one'), ('bar', 'one')]] - assert_frame_equal(reindexed, expected) + tm.assert_frame_equal(reindexed, expected) with catch_warnings(record=True): reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] - assert_frame_equal(reindexed, expected) + tm.assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] chunk = self.ymd.reindex(new_index) - self.assertIs(chunk.index, new_index) + assert chunk.index is new_index chunk = self.ymd.loc[new_index] - self.assertIs(chunk.index, new_index) + assert chunk.index is new_index with catch_warnings(record=True): chunk = self.ymd.ix[new_index] - self.assertIs(chunk.index, new_index) + assert chunk.index is new_index ymdT = self.ymd.T chunk = ymdT.reindex(columns=new_index) - self.assertIs(chunk.columns, new_index) + assert chunk.columns is new_index chunk = ymdT.loc[:, new_index] - self.assertIs(chunk.columns, new_index) - - def test_sort_index_preserve_levels(self): - result = self.frame.sort_index() - self.assertEqual(result.index.names, self.frame.index.names) - - def test_sorting_repr_8017(self): - - np.random.seed(0) - data = np.random.randn(3, 4) - - for gen, extra in [([1., 3., 2., 5.], 4.), ([1, 3, 2, 5], 4), - ([Timestamp('20130101'), Timestamp('20130103'), - Timestamp('20130102'), Timestamp('20130105')], - Timestamp('20130104')), - (['1one', '3one', '2one', '5one'], '4one')]: - columns = MultiIndex.from_tuples([('red', i) for i in gen]) - df = DataFrame(data, index=list('def'), columns=columns) - df2 = pd.concat([df, - DataFrame('world', index=list('def'), - columns=MultiIndex.from_tuples( - [('red', extra)]))], axis=1) - - # check that the repr is good - # make sure that we have a correct sparsified repr - # e.g. only 1 header of read - self.assertEqual(str(df2).splitlines()[0].split(), ['red']) - - # GH 8017 - # sorting fails after columns added - - # construct single-dtype then sort - result = df.copy().sort_index(axis=1) - expected = df.iloc[:, [0, 2, 1, 3]] - assert_frame_equal(result, expected) - - result = df2.sort_index(axis=1) - expected = df2.iloc[:, [0, 2, 1, 4, 3]] - assert_frame_equal(result, expected) - - # setitem then sort - result = df.copy() - result[('red', extra)] = 'world' - result = result.sort_index(axis=1) - assert_frame_equal(result, expected) + assert chunk.columns is new_index def test_repr_to_string(self): repr(self.frame) @@ -277,15 +231,17 @@ def test_repr_name_coincide(self): df = DataFrame({'value': [0, 1]}, index=index) lines = repr(df).split('\n') - self.assertTrue(lines[2].startswith('a 0 foo')) + assert lines[2].startswith('a 0 foo') def test_getitem_simple(self): df = self.frame.T col = df['foo', 'one'] - assert_almost_equal(col.values, df.values[:, 0]) - self.assertRaises(KeyError, df.__getitem__, ('foo', 'four')) - self.assertRaises(KeyError, df.__getitem__, 'foobar') + tm.assert_almost_equal(col.values, df.values[:, 0]) + with pytest.raises(KeyError): + df[('foo', 'four')] + with pytest.raises(KeyError): + df['foobar'] def test_series_getitem(self): s = self.ymd['A'] @@ -297,46 +253,46 @@ def test_series_getitem(self): expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] - self.assertEqual(result, expected) + assert result == expected # fancy expected = s.reindex(s.index[49:51]) result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) with catch_warnings(record=True): result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # key error - self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) + pytest.raises(KeyError, s.__getitem__, (2000, 3, 4)) def test_series_getitem_corner(self): s = self.ymd['A'] # don't segfault, GH #495 # out of bounds access - self.assertRaises(IndexError, s.__getitem__, len(self.ymd)) + pytest.raises(IndexError, s.__getitem__, len(self.ymd)) # generator result = s[(x > 0 for x in s)] expected = s[s > 0] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan - self.assertTrue(isnull(s.values[42:65]).all()) - self.assertTrue(notnull(s.values[:42]).all()) - self.assertTrue(notnull(s.values[65:]).all()) + assert isna(s.values[42:65]).all() + assert notna(s.values[:42]).all() + assert notna(s.values[65:]).all() s[2000, 3, 10] = np.nan - self.assertTrue(isnull(s[49])) + assert isna(s[49]) def test_series_slice_partial(self): pass @@ -347,36 +303,36 @@ def test_frame_getitem_setitem_boolean(self): result = df[df > 0] expected = df.where(df > 0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df[df > 0] = 5 values[values > 0] = 5 - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) df[df == 5] = 0 values[values == 5] = 0 - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) # a df that needs alignment first df[df[:-1] < 0] = 2 np.putmask(values[:-1], values[:-1] < 0, 2) - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) - with assertRaisesRegexp(TypeError, 'boolean values only'): + with tm.assert_raises_regex(TypeError, 'boolean values only'): df[df * 0] = 2 def test_frame_getitem_setitem_slice(self): # getitem result = self.frame.iloc[:4] expected = self.frame[:4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # setitem cp = self.frame.copy() cp.iloc[:4] = 0 - self.assertTrue((cp.values[:4] == 0).all()) - self.assertTrue((cp.values[4:] != 0).all()) + assert (cp.values[:4] == 0).all() + assert (cp.values[4:] != 0).all() def test_frame_getitem_setitem_multislice(self): levels = [['t1', 't2'], ['a', 'b', 'c']] @@ -385,25 +341,25 @@ def test_frame_getitem_setitem_multislice(self): df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx) result = df.loc[:, 'value'] - assert_series_equal(df['value'], result) + tm.assert_series_equal(df['value'], result) with catch_warnings(record=True): result = df.ix[:, 'value'] - assert_series_equal(df['value'], result) + tm.assert_series_equal(df['value'], result) result = df.loc[df.index[1:3], 'value'] - assert_series_equal(df['value'][1:3], result) + tm.assert_series_equal(df['value'][1:3], result) result = df.loc[:, :] - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) result = df df.loc[:, 'value'] = 10 result['value'] = 10 - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) df.loc[:, :] = 10 - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) def test_frame_getitem_multicolumn_empty_level(self): f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) @@ -413,7 +369,7 @@ def test_frame_getitem_multicolumn_empty_level(self): result = f['level1 item1'] expected = DataFrame([['1'], ['2'], ['3']], index=f.index, columns=['level3 item1']) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_frame_setitem_multi_column(self): df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'], @@ -421,12 +377,12 @@ def test_frame_setitem_multi_column(self): cp = df.copy() cp['a'] = cp['b'] - assert_frame_equal(cp['a'], cp['b']) + tm.assert_frame_equal(cp['a'], cp['b']) # set with ndarray cp = df.copy() cp['a'] = cp['b'].values - assert_frame_equal(cp['a'], cp['b']) + tm.assert_frame_equal(cp['a'], cp['b']) # --------------------------------------- # #1803 @@ -435,7 +391,7 @@ def test_frame_setitem_multi_column(self): # Works, but adds a column instead of updating the two existing ones df['A'] = 0.0 # Doesn't work - self.assertTrue((df['A'].values == 0).all()) + assert (df['A'].values == 0).all() # it broadcasts df['B', '1'] = [1, 2, 3] @@ -444,11 +400,11 @@ def test_frame_setitem_multi_column(self): sliced_a1 = df['A', '1'] sliced_a2 = df['A', '2'] sliced_b1 = df['B', '1'] - assert_series_equal(sliced_a1, sliced_b1, check_names=False) - assert_series_equal(sliced_a2, sliced_b1, check_names=False) - self.assertEqual(sliced_a1.name, ('A', '1')) - self.assertEqual(sliced_a2.name, ('A', '2')) - self.assertEqual(sliced_b1.name, ('B', '1')) + tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False) + tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False) + assert sliced_a1.name == ('A', '1') + assert sliced_a2.name == ('A', '2') + assert sliced_b1.name == ('B', '1') def test_getitem_tuple_plus_slice(self): # GH #671 @@ -465,9 +421,9 @@ def test_getitem_tuple_plus_slice(self): with catch_warnings(record=True): expected3 = idf.ix[0, 0] - assert_series_equal(result, expected) - assert_series_equal(result, expected2) - assert_series_equal(result, expected3) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected2) + tm.assert_series_equal(result, expected3) def test_getitem_setitem_tuple_plus_columns(self): # GH #1013 @@ -476,26 +432,14 @@ def test_getitem_setitem_tuple_plus_columns(self): result = df.loc[(2000, 1, 6), ['A', 'B', 'C']] expected = df.loc[2000, 1, 6][['A', 'B', 'C']] - assert_series_equal(result, expected) - - def test_getitem_multilevel_index_tuple_unsorted(self): - index_columns = list("abc") - df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]], - columns=index_columns + ["data"]) - df = df.set_index(index_columns) - query_index = df.index[:1] - rs = df.loc[query_index, "data"] - - xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c']) - xp = Series(['x'], index=xp_idx, name='data') - assert_series_equal(rs, xp) + tm.assert_series_equal(result, expected) def test_xs(self): xs = self.frame.xs(('bar', 'two')) xs2 = self.frame.loc[('bar', 'two')] - assert_series_equal(xs, xs2) - assert_almost_equal(xs.values, self.frame.values[4]) + tm.assert_series_equal(xs, xs2) + tm.assert_almost_equal(xs.values, self.frame.values[4]) # GH 6574 # missing values in returned index should be preserrved @@ -514,18 +458,18 @@ def test_xs(self): ['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2')) result = df.xs('z', level='a1') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_xs_partial(self): result = self.frame.xs('foo') result2 = self.frame.loc['foo'] expected = self.frame.T['foo'].T - assert_frame_equal(result, expected) - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) result = self.ymd.xs((2000, 4)) expected = self.ymd.loc[2000, 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # ex from #1796 index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]], @@ -537,14 +481,14 @@ def test_xs_partial(self): result = df.xs(['foo', 'one']) expected = df.loc['foo', 'one'] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_xs_level(self): result = self.frame.xs('two', level='second') expected = self.frame[self.frame.index.get_level_values(1) == 'two'] expected.index = expected.index.droplevel(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), ( 'p', 'q', 'r')]) @@ -552,7 +496,7 @@ def test_xs_level(self): result = df.xs('c', level=2) expected = df[1:2] expected.index = expected.index.droplevel(2) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # this is a copy in 0.14 result = self.frame.xs('two', level='second') @@ -562,7 +506,7 @@ def test_xs_level(self): def f(x): x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + pytest.raises(com.SettingWithCopyError, f, result) def test_xs_level_multiple(self): from pandas import read_table @@ -576,7 +520,7 @@ def test_xs_level_multiple(self): result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # this is a copy in 0.14 result = df.xs(('a', 4), level=['one', 'four']) @@ -586,7 +530,7 @@ def test_xs_level_multiple(self): def f(x): x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + pytest.raises(com.SettingWithCopyError, f, result) # GH2107 dates = lrange(20111201, 20111205) @@ -597,7 +541,7 @@ def f(x): rs = df.xs(20111201, level='date') xp = df.loc[20111201, :] - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_xs_level0(self): from pandas import read_table @@ -611,29 +555,29 @@ def test_xs_level0(self): result = df.xs('a', level=0) expected = df.xs('a') - self.assertEqual(len(result), 2) - assert_frame_equal(result, expected) + assert len(result) == 2 + tm.assert_frame_equal(result, expected) def test_xs_level_series(self): s = self.frame['A'] result = s[:, 'two'] expected = self.frame.xs('two', level=1)['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = self.ymd['A'] result = s[2000, 5] expected = self.ymd.loc[2000, 5]['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # not implementing this for now - self.assertRaises(TypeError, s.__getitem__, (2000, slice(3, 4))) + pytest.raises(TypeError, s.__getitem__, (2000, slice(3, 4))) # result = s[2000, 3:4] # lv =s.index.get_level_values(1) # expected = s[(lv == 3) | (lv == 4)] # expected.index = expected.index.droplevel(0) - # assert_series_equal(result, expected) + # tm.assert_series_equal(result, expected) # can do this though @@ -649,15 +593,15 @@ def test_getitem_toplevel(self): result = df['foo'] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df['bar'] result2 = df.loc[:, 'bar'] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], @@ -667,19 +611,19 @@ def test_getitem_setitem_slice_integers(self): columns=['a', 'b', 'c', 'd']) res = frame.loc[1:2] exp = frame.reindex(frame.index[2:]) - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) frame.loc[1:2] = 7 - self.assertTrue((frame.loc[1:2] == 7).values.all()) + assert (frame.loc[1:2] == 7).values.all() series = Series(np.random.randn(len(index)), index=index) res = series.loc[1:2] exp = series.reindex(series.index[2:]) - assert_series_equal(res, exp) + tm.assert_series_equal(res, exp) series.loc[1:2] = 7 - self.assertTrue((series.loc[1:2] == 7).values.all()) + assert (series.loc[1:2] == 7).values.all() def test_getitem_int(self): levels = [[0, 1], [0, 1, 2]] @@ -691,15 +635,15 @@ def test_getitem_int(self): result = frame.loc[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # raises exception - self.assertRaises(KeyError, frame.loc.__getitem__, 3) + pytest.raises(KeyError, frame.loc.__getitem__, 3) # however this will work result = self.frame.iloc[2] expected = self.frame.xs(self.frame.index[2]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T @@ -707,51 +651,43 @@ def test_getitem_partial(self): expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) - assert_frame_equal(result, expected) - - def test_getitem_slice_not_sorted(self): - df = self.frame.sort_index(level=1).T - - # buglet with int typechecking - result = df.iloc[:, :np.int32(3)] - expected = df.reindex(columns=df.columns[:3]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() - assert_series_equal(dft['foo', 'two'], s > s.median()) - # tm.assertIsInstance(dft._data.blocks[1].items, MultiIndex) + tm.assert_series_equal(dft['foo', 'two'], s > s.median()) + # assert isinstance(dft._data.blocks[1].items, MultiIndex) reindexed = dft.reindex(columns=[('foo', 'two')]) - assert_series_equal(reindexed['foo', 'two'], s > s.median()) + tm.assert_series_equal(reindexed['foo', 'two'], s > s.median()) def test_frame_setitem_ix(self): self.frame.loc[('bar', 'two'), 'B'] = 5 - self.assertEqual(self.frame.loc[('bar', 'two'), 'B'], 5) + assert self.frame.loc[('bar', 'two'), 'B'] == 5 # with integer labels df = self.frame.copy() df.columns = lrange(3) df.loc[('bar', 'two'), 1] = 7 - self.assertEqual(df.loc[('bar', 'two'), 1], 7) + assert df.loc[('bar', 'two'), 1] == 7 with catch_warnings(record=True): df = self.frame.copy() df.columns = lrange(3) df.ix[('bar', 'two'), 1] = 7 - self.assertEqual(df.loc[('bar', 'two'), 1], 7) + assert df.loc[('bar', 'two'), 1] == 7 def test_fancy_slice_partial(self): result = self.frame.loc['bar':'baz'] expected = self.frame[3:7] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd.loc[(2000, 2):(2000, 4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_getitem_partial_column_select(self): idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], @@ -760,54 +696,18 @@ def test_getitem_partial_column_select(self): result = df.loc[('a', 'y'), :] expected = df.loc[('a', 'y')] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.loc[('a', 'y'), [1, 0]] expected = df.loc[('a', 'y')][[1, 0]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) with catch_warnings(record=True): result = df.ix[('a', 'y'), [1, 0]] - assert_frame_equal(result, expected) - - self.assertRaises(KeyError, df.loc.__getitem__, - (('a', 'foo'), slice(None, None))) - - def test_sort_index_level(self): - df = self.frame.copy() - df.index = np.arange(len(df)) - - # axis=1 - - # series - a_sorted = self.frame['A'].sort_index(level=0) - - # preserve names - self.assertEqual(a_sorted.index.names, self.frame.index.names) - - # inplace - rs = self.frame.copy() - rs.sort_index(level=0, inplace=True) - assert_frame_equal(rs, self.frame.sort_index(level=0)) - - def test_sort_index_level_large_cardinality(self): - - # #2684 (int64) - index = MultiIndex.from_arrays([np.arange(4000)] * 3) - df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) - - # it works! - result = df.sort_index(level=0) - self.assertTrue(result.index.lexsort_depth == 3) + tm.assert_frame_equal(result, expected) - # #2684 (int32) - index = MultiIndex.from_arrays([np.arange(4000)] * 3) - df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) - - # it works! - result = df.sort_index(level=0) - self.assertTrue((result.dtypes.values == df.dtypes.values).all()) - self.assertTrue(result.index.lexsort_depth == 3) + pytest.raises(KeyError, df.loc.__getitem__, + (('a', 'foo'), slice(None, None))) def test_delevel_infer_dtype(self): tuples = [tuple @@ -817,42 +717,19 @@ def test_delevel_infer_dtype(self): df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() - self.assertTrue(is_integer_dtype(deleveled['prm1'])) - self.assertTrue(is_float_dtype(deleveled['prm2'])) + assert is_integer_dtype(deleveled['prm1']) + assert is_float_dtype(deleveled['prm2']) def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop=True) - self.assertEqual(len(deleveled.columns), len(self.ymd.columns)) + assert len(deleveled.columns) == len(self.ymd.columns) deleveled = self.series.reset_index() - tm.assertIsInstance(deleveled, DataFrame) - self.assertEqual(len(deleveled.columns), - len(self.series.index.levels) + 1) + assert isinstance(deleveled, DataFrame) + assert len(deleveled.columns) == len(self.series.index.levels) + 1 deleveled = self.series.reset_index(drop=True) - tm.assertIsInstance(deleveled, Series) - - def test_sort_index_level_by_name(self): - self.frame.index.names = ['first', 'second'] - result = self.frame.sort_index(level='second') - expected = self.frame.sort_index(level=1) - assert_frame_equal(result, expected) - - def test_sort_index_level_mixed(self): - sorted_before = self.frame.sort_index(level=1) - - df = self.frame.copy() - df['foo'] = 'bar' - sorted_after = df.sort_index(level=1) - assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1)) - - dft = self.frame.T - sorted_before = dft.sort_index(level=1, axis=1) - dft['foo', 'three'] = 'bar' - - sorted_after = dft.sort_index(level=1, axis=1) - assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), - sorted_after.drop([('foo', 'three')], axis=1)) + assert isinstance(deleveled, Series) def test_count_level(self): def _check_counts(frame, axis=0): @@ -861,7 +738,7 @@ def _check_counts(frame, axis=0): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count() expected = expected.reindex_like(result).astype('i8') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) self.frame.iloc[1, [1, 2]] = np.nan self.frame.iloc[7, [0, 1]] = np.nan @@ -875,12 +752,12 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() - assertRaisesRegexp(TypeError, 'hierarchical', df.count, level=0) + tm.assert_raises_regex( + TypeError, 'hierarchical', df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) - tm.assert_index_equal(result.columns, - pd.Index(['A', 'B', 'C'], name='exp')) + tm.assert_index_equal(result.columns, Index(list('ABC'), name='exp')) def test_count_level_series(self): index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', @@ -891,30 +768,31 @@ def test_count_level_series(self): result = s.count(level=0) expected = s.groupby(level=0).count() - assert_series_equal(result.astype('f8'), - expected.reindex(result.index).fillna(0)) + tm.assert_series_equal( + result.astype('f8'), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() - assert_series_equal(result.astype('f8'), - expected.reindex(result.index).fillna(0)) + tm.assert_series_equal( + result.astype('f8'), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame['A'][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0], name='A') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(np.int64) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_get_level_number_out_of_bounds(self): - with assertRaisesRegexp(IndexError, "Too many levels"): + with tm.assert_raises_regex(IndexError, "Too many levels"): self.frame.index._get_level_number(2) - with assertRaisesRegexp(IndexError, "not a valid level number"): + with tm.assert_raises_regex(IndexError, + "not a valid level number"): self.frame.index._get_level_number(-3) def test_unstack(self): @@ -936,56 +814,56 @@ def test_unstack_multiple_no_empty_columns(self): unstacked = s.unstack([1, 2]) expected = unstacked.dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected) + tm.assert_frame_equal(unstacked, expected) def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() - assert_frame_equal(restacked, self.ymd) + tm.assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sort_index(level=2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort_index(axis=1, ascending=False) restacked = unstacked.stack() - assert_frame_equal(restacked, self.ymd) + tm.assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).loc[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() - assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) + tm.assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) @@ -993,8 +871,8 @@ def test_stack(self): # GH10417 def check(left, right): - assert_series_equal(left, right) - self.assertFalse(left.index.is_unique) + tm.assert_series_equal(left, right) + assert not left.index.is_unique li, ri = left.index, right.index tm.assert_index_equal(li, ri) @@ -1049,7 +927,7 @@ def test_unstack_odd_failure(self): result = df.unstack(2) recons = result.stack() - assert_frame_equal(recons, df) + tm.assert_frame_equal(recons, df) def test_stack_mixed_dtype(self): df = self.frame.T @@ -1057,10 +935,10 @@ def test_stack_mixed_dtype(self): df = df.sort_index(level=1, axis=1) stacked = df.stack() - result = df['foo'].stack() - assert_series_equal(stacked['foo'], result, check_names=False) - self.assertIs(result.name, None) - self.assertEqual(stacked['bar'].dtype, np.float_) + result = df['foo'].stack().sort_index() + tm.assert_series_equal(stacked['foo'], result, check_names=False) + assert result.name is None + assert stacked['bar'].dtype == np.float_ def test_unstack_bug(self): df = DataFrame({'state': ['naive', 'naive', 'naive', 'activ', 'activ', @@ -1074,73 +952,74 @@ def test_unstack_bug(self): unstacked = result.unstack() restacked = unstacked.stack() - assert_series_equal(restacked, - result.reindex(restacked.index).astype(float)) + tm.assert_series_equal( + restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() - self.assertEqual(unstacked.index.name, 'first') - self.assertEqual(unstacked.columns.names, ['exp', 'second']) + assert unstacked.index.name == 'first' + assert unstacked.columns.names == ['exp', 'second'] restacked = unstacked.stack() - self.assertEqual(restacked.index.names, self.frame.index.names) + assert restacked.index.names == self.frame.index.names def test_unstack_level_name(self): result = self.frame.unstack('second') expected = self.frame.unstack(level=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack('second') result = unstacked.stack('exp') expected = self.frame.unstack().stack(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.stack('exp') expected = self.frame.stack() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') - assert_frame_equal(unstacked, expected) - self.assertEqual(unstacked.columns.names, expected.columns.names) + tm.assert_frame_equal(unstacked, expected) + assert unstacked.columns.names == expected.columns.names # series s = self.ymd['A'] s_unstacked = s.unstack(['year', 'month']) - assert_frame_equal(s_unstacked, expected['A']) + tm.assert_frame_equal(s_unstacked, expected['A']) restacked = unstacked.stack(['year', 'month']) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sort_index(level=0) - assert_frame_equal(restacked, self.ymd) - self.assertEqual(restacked.index.names, self.ymd.index.names) + tm.assert_frame_equal(restacked, self.ymd) + assert restacked.index.names == self.ymd.index.names # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1).dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected) + tm.assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) + tm.assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) def test_stack_names_and_numbers(self): unstacked = self.ymd.unstack(['year', 'month']) # Can't use mixture of names and numbers to stack - with assertRaisesRegexp(ValueError, "level should contain"): + with tm.assert_raises_regex(ValueError, "level should contain"): unstacked.stack([0, 'month']) def test_stack_multiple_out_of_bounds(self): # nlevels == 3 unstacked = self.ymd.unstack(['year', 'month']) - with assertRaisesRegexp(IndexError, "Too many levels"): + with tm.assert_raises_regex(IndexError, "Too many levels"): unstacked.stack([2, 3]) - with assertRaisesRegexp(IndexError, "not a valid level number"): + with tm.assert_raises_regex(IndexError, + "not a valid level number"): unstacked.stack([-4, -3]) def test_unstack_period_series(self): @@ -1163,16 +1042,16 @@ def test_unstack_period_series(self): columns=['A', 'B']) expected.columns.name = 'str' - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected.T) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) idx1 = pd.PeriodIndex(['2013-01', '2013-01', '2013-02', '2013-02', '2013-03', '2013-03'], freq='M', name='period1') idx2 = pd.PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09', '2013-08', '2013-07'], freq='M', name='period2') - idx = pd.MultiIndex.from_arrays([idx1, idx2]) + idx = MultiIndex.from_arrays([idx1, idx2]) s = Series(value, index=idx) result1 = s.unstack() @@ -1189,9 +1068,9 @@ def test_unstack_period_series(self): [6, 5, np.nan, np.nan, np.nan, np.nan]], index=e_idx, columns=e_cols) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected.T) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) def test_unstack_period_frame(self): # GH 4342 @@ -1202,8 +1081,8 @@ def test_unstack_period_frame(self): '2013-10', '2014-02'], freq='M', name='period2') value = {'A': [1, 2, 3, 4, 5, 6], 'B': [6, 5, 4, 3, 2, 1]} - idx = pd.MultiIndex.from_arrays([idx1, idx2]) - df = pd.DataFrame(value, index=idx) + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame(value, index=idx) result1 = df.unstack() result2 = df.unstack(level=1) @@ -1212,22 +1091,22 @@ def test_unstack_period_frame(self): e_1 = pd.PeriodIndex(['2014-01', '2014-02'], freq='M', name='period1') e_2 = pd.PeriodIndex(['2013-10', '2013-12', '2014-02', '2013-10', '2013-12', '2014-02'], freq='M', name='period2') - e_cols = pd.MultiIndex.from_arrays(['A A A B B B'.split(), e_2]) + e_cols = MultiIndex.from_arrays(['A A A B B B'.split(), e_2]) expected = DataFrame([[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], index=e_1, columns=e_cols) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) e_1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-01', '2014-02'], freq='M', name='period1') e_2 = pd.PeriodIndex( ['2013-10', '2013-12', '2014-02'], freq='M', name='period2') - e_cols = pd.MultiIndex.from_arrays(['A A B B'.split(), e_1]) + e_cols = MultiIndex.from_arrays(['A A B B'.split(), e_1]) expected = DataFrame([[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols) - assert_frame_equal(result3, expected) + tm.assert_frame_equal(result3, expected) def test_stack_multiple_bug(self): """ bug when some uniques are not present in the data #3170""" @@ -1245,18 +1124,18 @@ def test_stack_multiple_bug(self): rs = down.stack('ID') xp = unst.loc[:, ['VAR1']].resample('W-THU').mean().stack('ID') xp.columns.name = 'Params' - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_stack_dropna(self): # GH #3997 - df = pd.DataFrame({'A': ['a1', 'a2'], 'B': ['b1', 'b2'], 'C': [1, 1]}) + df = DataFrame({'A': ['a1', 'a2'], 'B': ['b1', 'b2'], 'C': [1, 1]}) df = df.set_index(['A', 'B']) stacked = df.unstack().stack(dropna=False) - self.assertTrue(len(stacked) > len(stacked.dropna())) + assert len(stacked) > len(stacked.dropna()) stacked = df.unstack().stack(dropna=True) - assert_frame_equal(stacked, stacked.dropna()) + tm.assert_frame_equal(stacked, stacked.dropna()) def test_unstack_multiple_hierarchical(self): df = DataFrame(index=[[0, 0, 0, 0, 1, 1, 1, 1], @@ -1279,7 +1158,7 @@ def test_groupby_transform(self): applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) result = applied.reindex(expected.index) - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) def test_unstack_sparse_keyspace(self): # memory problems with naive impl #2278 @@ -1308,10 +1187,41 @@ def test_unstack_unobserved_keys(self): df = DataFrame(np.random.randn(4, 2), index=index) result = df.unstack() - self.assertEqual(len(result.columns), 4) + assert len(result.columns) == 4 recons = result.stack() - assert_frame_equal(recons, df) + tm.assert_frame_equal(recons, df) + + def test_stack_order_with_unsorted_levels(self): + # GH 16323 + + def manual_compare_stacked(df, df_stacked, lev0, lev1): + assert all(df.loc[row, col] == + df_stacked.loc[(row, col[lev0]), col[lev1]] + for row in df.index for col in df.columns) + + # deep check for 1-row case + for width in [2, 3]: + levels_poss = itertools.product( + itertools.permutations([0, 1, 2], width), + repeat=2) + + for levels in levels_poss: + columns = MultiIndex(levels=levels, + labels=[[0, 0, 1, 1], + [0, 1, 0, 1]]) + df = DataFrame(columns=columns, data=[range(4)]) + for stack_lev in range(2): + df_stacked = df.stack(stack_lev) + manual_compare_stacked(df, df_stacked, + stack_lev, 1 - stack_lev) + + # check multi-row case + mi = MultiIndex(levels=[["A", "C", "B"], ["B", "A", "C"]], + labels=[np.repeat(range(3), 3), np.tile(range(3), 3)]) + df = DataFrame(columns=mi, index=range(5), + data=np.arange(5 * len(mi)).reshape(5, -1)) + manual_compare_stacked(df, df.stack(0), 0, 1) def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], @@ -1328,11 +1238,12 @@ def test_groupby_level_no_obs(self): 'f2', 's1'), ('f2', 's2'), ('f3', 's1'), ('f3', 's2')]) df = DataFrame( [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx) - df1 = df.select(lambda u: u[0] in ['f2', 'f3'], axis=1) + df1 = df.loc(axis=1)[df.columns.map( + lambda u: u[0] in ['f2', 'f3'])] grouped = df1.groupby(axis=1, level=0) result = grouped.sum() - self.assertTrue((result.columns == ['f2', 'f3']).all()) + assert (result.columns == ['f2', 'f3']).all() def test_join(self): a = self.frame.loc[self.frame.index[:5], ['A']] @@ -1342,69 +1253,70 @@ def test_join(self): expected = self.frame.copy() expected.values[np.isnan(joined.values)] = np.nan - self.assertFalse(np.isnan(joined.values).all()) + assert not np.isnan(joined.values).all() - assert_frame_equal(joined, expected, check_names=False - ) # TODO what should join do with names ? + # TODO what should join do with names ? + tm.assert_frame_equal(joined, expected, check_names=False) def test_swaplevel(self): swapped = self.frame['A'].swaplevel() swapped2 = self.frame['A'].swaplevel(0) swapped3 = self.frame['A'].swaplevel(0, 1) swapped4 = self.frame['A'].swaplevel('first', 'second') - self.assertFalse(swapped.index.equals(self.frame.index)) - assert_series_equal(swapped, swapped2) - assert_series_equal(swapped, swapped3) - assert_series_equal(swapped, swapped4) + assert not swapped.index.equals(self.frame.index) + tm.assert_series_equal(swapped, swapped2) + tm.assert_series_equal(swapped, swapped3) + tm.assert_series_equal(swapped, swapped4) back = swapped.swaplevel() back2 = swapped.swaplevel(0) back3 = swapped.swaplevel(0, 1) back4 = swapped.swaplevel('second', 'first') - self.assertTrue(back.index.equals(self.frame.index)) - assert_series_equal(back, back2) - assert_series_equal(back, back3) - assert_series_equal(back, back4) + assert back.index.equals(self.frame.index) + tm.assert_series_equal(back, back2) + tm.assert_series_equal(back, back3) + tm.assert_series_equal(back, back4) ft = self.frame.T swapped = ft.swaplevel('first', 'second', axis=1) exp = self.frame.swaplevel('first', 'second').T - assert_frame_equal(swapped, exp) + tm.assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): - panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) - expected = panel.copy() - expected.major_axis = expected.major_axis.swaplevel(0, 1) + with catch_warnings(record=True): + panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) + expected = panel.copy() + expected.major_axis = expected.major_axis.swaplevel(0, 1) - for result in (panel.swaplevel(axis='major'), - panel.swaplevel(0, axis='major'), - panel.swaplevel(0, 1, axis='major')): - tm.assert_panel_equal(result, expected) + for result in (panel.swaplevel(axis='major'), + panel.swaplevel(0, axis='major'), + panel.swaplevel(0, 1, axis='major')): + tm.assert_panel_equal(result, expected) def test_reorder_levels(self): result = self.ymd.reorder_levels(['month', 'day', 'year']) expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd['A'].reorder_levels(['month', 'day', 'year']) expected = self.ymd['A'].swaplevel(0, 1).swaplevel(1, 2) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = self.ymd.T.reorder_levels(['month', 'day', 'year'], axis=1) expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) - with assertRaisesRegexp(TypeError, 'hierarchical axis'): + with tm.assert_raises_regex(TypeError, 'hierarchical axis'): self.ymd.reorder_levels([1, 2], axis=1) - with assertRaisesRegexp(IndexError, 'Too many levels'): + with tm.assert_raises_regex(IndexError, 'Too many levels'): self.ymd.index.reorder_levels([1, 2, 3]) def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] - tm.assertIsInstance(df.columns, MultiIndex) - self.assertTrue((df[2000, 1, 10] == df[2000, 1, 7]).all()) + assert isinstance(df.columns, MultiIndex) + assert (df[2000, 1, 10] == df[2000, 1, 7]).all() def test_alignment(self): x = Series(data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ( @@ -1416,29 +1328,13 @@ def test_alignment(self): res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) - assert_series_equal(res, exp) + tm.assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) - assert_series_equal(res, exp) - - def test_is_lexsorted(self): - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) - self.assertTrue(index.is_lexsorted()) - - index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) - self.assertFalse(index.is_lexsorted()) - - index = MultiIndex(levels=levels, - labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) - self.assertFalse(index.is_lexsorted()) - self.assertEqual(index.lexsort_depth, 0) + tm.assert_series_equal(res, exp) def test_frame_getitem_view(self): df = self.frame.T.copy() @@ -1446,7 +1342,7 @@ def test_frame_getitem_view(self): # this works because we are modifying the underlying array # really a no-no df['foo'].values[:] = 0 - self.assertTrue((df['foo'].values == 0).all()) + assert (df['foo'].values == 0).all() # but not if it's mixed-type df['foo', 'four'] = 'foo' @@ -1457,50 +1353,13 @@ def f(): df['foo']['one'] = 2 return df - self.assertRaises(com.SettingWithCopyError, f) + pytest.raises(com.SettingWithCopyError, f) try: df = f() except: pass - self.assertTrue((df['foo', 'one'] == 0).all()) - - def test_frame_getitem_not_sorted(self): - df = self.frame.T - df['foo', 'four'] = 'foo' - - arrays = [np.array(x) for x in zip(*df.columns._tuple_index)] - - result = df['foo'] - result2 = df.loc[:, 'foo'] - expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) - expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - df = df.T - result = df.xs('foo') - result2 = df.loc['foo'] - expected = df.reindex(df.index[arrays[0] == 'foo']) - expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - def test_series_getitem_not_sorted(self): - arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = lzip(*arrays) - index = MultiIndex.from_tuples(tuples) - s = Series(randn(8), index=index) - - arrays = [np.array(x) for x in zip(*index._tuple_index)] - - result = s['qux'] - result2 = s.loc['qux'] - expected = s[arrays[0] == 'qux'] - expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) + assert (df['foo', 'one'] == 0).all() def test_count(self): frame = self.frame.copy() @@ -1508,54 +1367,61 @@ def test_count(self): result = frame.count(level='b') expect = self.frame.count(level=1) - assert_frame_equal(result, expect, check_names=False) + tm.assert_frame_equal(result, expect, check_names=False) result = frame.count(level='a') expect = self.frame.count(level=0) - assert_frame_equal(result, expect, check_names=False) + tm.assert_frame_equal(result, expect, check_names=False) series = self.series.copy() series.index.names = ['a', 'b'] result = series.count(level='b') expect = self.series.count(level=1) - assert_series_equal(result, expect, check_names=False) - self.assertEqual(result.index.name, 'b') + tm.assert_series_equal(result, expect, check_names=False) + assert result.index.name == 'b' result = series.count(level='a') expect = self.series.count(level=0) - assert_series_equal(result, expect, check_names=False) - self.assertEqual(result.index.name, 'a') + tm.assert_series_equal(result, expect, check_names=False) + assert result.index.name == 'a' - self.assertRaises(KeyError, series.count, 'x') - self.assertRaises(KeyError, frame.count, level='x') + pytest.raises(KeyError, series.count, 'x') + pytest.raises(KeyError, frame.count, level='x') AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var', 'sem'] - def test_series_group_min_max(self): + @pytest.mark.parametrize('sort', [True, False]) + def test_series_group_min_max(self, sort): + # GH 17537 for op, level, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2), [False, True]): - grouped = self.series.groupby(level=level) + grouped = self.series.groupby(level=level, sort=sort) aggf = lambda x: getattr(x, op)(skipna=skipna) # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) - assert_series_equal(leftside, rightside) + if sort: + rightside = rightside.sort_index(level=level) + tm.assert_series_equal(leftside, rightside) - def test_frame_group_ops(self): + @pytest.mark.parametrize('sort', [True, False]) + def test_frame_group_ops(self, sort): + # GH 17537 self.frame.iloc[1, [1, 2]] = np.nan self.frame.iloc[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2), lrange(2), [False, True]): + if axis == 0: frame = self.frame else: frame = self.frame.T - grouped = frame.groupby(level=level, axis=axis) + grouped = frame.groupby(level=level, axis=axis, sort=sort) pieces = [] @@ -1566,21 +1432,24 @@ def aggf(x): leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + if sort: + rightside = rightside.sort_index(level=level, axis=axis) + frame = frame.sort_index(level=level, axis=axis) # for good measure, groupby detail level_index = frame._get_axis(axis).levels[level] - self.assert_index_equal(leftside._get_axis(axis), level_index) - self.assert_index_equal(rightside._get_axis(axis), level_index) + tm.assert_index_equal(leftside._get_axis(axis), level_index) + tm.assert_index_equal(rightside._get_axis(axis), level_index) - assert_frame_equal(leftside, rightside) + tm.assert_frame_equal(leftside, rightside) def test_stat_op_corner(self): obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)])) result = obj.sum(level=0) expected = Series([10.0], index=[2]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_frame_any_all_group(self): df = DataFrame( @@ -1591,11 +1460,11 @@ def test_frame_any_all_group(self): result = df.any(level=0) ex = DataFrame({'data': [False, True]}, index=['one', 'two']) - assert_frame_equal(result, ex) + tm.assert_frame_equal(result, ex) result = df.all(level=0) ex = DataFrame({'data': [False, False]}, index=['one', 'two']) - assert_frame_equal(result, ex) + tm.assert_frame_equal(result, ex) def test_std_var_pass_ddof(self): index = MultiIndex.from_arrays([np.arange(5).repeat(10), np.tile( @@ -1608,20 +1477,20 @@ def test_std_var_pass_ddof(self): result = getattr(df[0], meth)(level=0, ddof=ddof) expected = df[0].groupby(level=0).agg(alt) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = getattr(df, meth)(level=0, ddof=ddof) expected = df.groupby(level=0).agg(alt) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=['year', 'month']) expected = self.ymd.groupby(level=['year', 'month']).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd['A'].sum(level=['year', 'month']) expected = self.ymd['A'].groupby(level=['year', 'month']).sum() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() @@ -1631,12 +1500,12 @@ def test_groupby_multilevel(self): expected = self.ymd.groupby([k1, k2]).mean() - assert_frame_equal(result, expected, check_names=False - ) # TODO groupby with level_values drops names - self.assertEqual(result.index.names, self.ymd.index.names[:2]) + # TODO groupby with level_values drops names + tm.assert_frame_equal(result, expected, check_names=False) + assert result.index.names == self.ymd.index.names[:2] result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass @@ -1646,18 +1515,18 @@ def test_multilevel_consolidate(self): 'bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) - df = df.consolidate() + df = df._consolidate() def test_ix_preserve_names(self): result = self.ymd.loc[2000] result2 = self.ymd['A'].loc[2000] - self.assertEqual(result.index.names, self.ymd.index.names[1:]) - self.assertEqual(result2.index.names, self.ymd.index.names[1:]) + assert result.index.names == self.ymd.index.names[1:] + assert result2.index.names == self.ymd.index.names[1:] result = self.ymd.loc[2000, 2] result2 = self.ymd['A'].loc[2000, 2] - self.assertEqual(result.index.name, self.ymd.index.names[2]) - self.assertEqual(result2.index.name, self.ymd.index.names[2]) + assert result.index.name == self.ymd.index.names[2] + assert result2.index.name == self.ymd.index.names[2] def test_partial_set(self): # GH #397 @@ -1665,19 +1534,19 @@ def test_partial_set(self): exp = self.ymd.copy() df.loc[2000, 4] = 0 exp.loc[2000, 4].values[:] = 0 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) df['A'].loc[2000, 4] = 1 exp['A'].loc[2000, 4].values[:] = 1 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) df.loc[2000] = 5 exp.loc[2000].values[:] = 5 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) # this works...for now df['A'].iloc[14] = 5 - self.assertEqual(df['A'][14], 5) + assert df['A'][14] == 5 def test_unstack_preserve_types(self): # GH #403 @@ -1685,9 +1554,9 @@ def test_unstack_preserve_types(self): self.ymd['F'] = 2 unstacked = self.ymd.unstack('month') - self.assertEqual(unstacked['A', 1].dtype, np.float64) - self.assertEqual(unstacked['E', 1].dtype, np.object_) - self.assertEqual(unstacked['F', 1].dtype, np.float64) + assert unstacked['A', 1].dtype == np.float64 + assert unstacked['E', 1].dtype == np.object_ + assert unstacked['F', 1].dtype == np.float64 def test_unstack_group_index_overflow(self): labels = np.tile(np.arange(500), 2) @@ -1698,11 +1567,11 @@ def test_unstack_group_index_overflow(self): s = Series(np.arange(1000), index=index) result = s.unstack() - self.assertEqual(result.shape, (500, 2)) + assert result.shape == (500, 2) # test roundtrip stacked = result.stack() - assert_series_equal(s, stacked.reindex(s.index)) + tm.assert_series_equal(s, stacked.reindex(s.index)) # put it at beginning index = MultiIndex(levels=[[0, 1]] + [level] * 8, @@ -1710,7 +1579,7 @@ def test_unstack_group_index_overflow(self): s = Series(np.arange(1000), index=index) result = s.unstack(0) - self.assertEqual(result.shape, (500, 2)) + assert result.shape == (500, 2) # put it in middle index = MultiIndex(levels=[level] * 4 + [[0, 1]] + [level] * 4, @@ -1719,15 +1588,47 @@ def test_unstack_group_index_overflow(self): s = Series(np.arange(1000), index=index) result = s.unstack(4) - self.assertEqual(result.shape, (500, 2)) + assert result.shape == (500, 2) + + def test_pyint_engine(self): + # GH 18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [tuple(l) for l in [[0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N]] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype=np.intp) + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype=np.intp) + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) def test_getitem_lowerdim_corner(self): - self.assertRaises(KeyError, self.frame.loc.__getitem__, - (('bar', 'three'), 'B')) + pytest.raises(KeyError, self.frame.loc.__getitem__, + (('bar', 'three'), 'B')) # in theory should be inserting in a sorted space???? self.frame.loc[('bar', 'three'), 'B'] = 0 - self.assertEqual(self.frame.sort_index().loc[('bar', 'three'), 'B'], 0) + assert self.frame.sort_index().loc[('bar', 'three'), 'B'] == 0 # --------------------------------------------------------------------- # AMBIGUOUS CASES! @@ -1737,16 +1638,16 @@ def test_partial_ix_missing(self): result = self.ymd.loc[2000, 0] expected = self.ymd.loc[2000]['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # need to put in some work here # self.ymd.loc[2000, 0] = 0 - # self.assertTrue((self.ymd.loc[2000]['A'] == 0).all()) + # assert (self.ymd.loc[2000]['A'] == 0).all() # Pretty sure the second (and maybe even the first) is already wrong. - self.assertRaises(Exception, self.ymd.loc.__getitem__, (2000, 6)) - self.assertRaises(Exception, self.ymd.loc.__getitem__, (2000, 6), 0) + pytest.raises(Exception, self.ymd.loc.__getitem__, (2000, 6)) + pytest.raises(Exception, self.ymd.loc.__getitem__, (2000, 6), 0) # --------------------------------------------------------------------- @@ -1767,17 +1668,17 @@ def test_level_with_tuples(self): result2 = series.loc[('foo', 'bar', 0)] expected = series[:2] expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) - self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) + pytest.raises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) result = frame.loc[('foo', 'bar', 0)] result2 = frame.xs(('foo', 'bar', 0)) expected = frame[:2] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), ( 'foo', 'qux')], [0, 1]], @@ -1790,49 +1691,56 @@ def test_level_with_tuples(self): result2 = series.loc[('foo', 'bar')] expected = series[:2] expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) result = frame.loc[('foo', 'bar')] result2 = frame.xs(('foo', 'bar')) expected = frame[:2] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) def test_int_series_slicing(self): s = self.ymd['A'] result = s[5:] expected = s.reindex(s.index[5:]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) exp = self.ymd['A'].copy() s[5:] = 0 exp.values[5:] = 0 - self.assert_numpy_array_equal(s.values, exp.values) + tm.assert_numpy_array_equal(s.values, exp.values) result = self.ymd[5:] expected = self.ymd.reindex(s.index[5:]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('unicode_strings', [True, False]) + def test_mixed_depth_get(self, unicode_strings): + # If unicode_strings is True, the column labels in dataframe + # construction will use unicode strings in Python 2 (pull request + # #17099). - def test_mixed_depth_get(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] + if unicode_strings: + arrays = [[u(s) for s in arr] for arr in arrays] + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(4, 6), columns=index) + df = DataFrame(np.random.randn(4, 6), columns=index) result = df['a'] - expected = df['a', '', ''] - assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'a') + expected = df['a', '', ''].rename('a') + tm.assert_series_equal(result, expected) result = df['routine1', 'result1'] expected = df['routine1', 'result1', ''] - assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, ('routine1', 'result1')) + expected = expected.rename(('routine1', 'result1')) + tm.assert_series_equal(result, expected) def test_mixed_depth_insert(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1847,7 +1755,7 @@ def test_mixed_depth_insert(self): expected = df.copy() result['b'] = [1, 2, 3, 4] expected['b', '', ''] = [1, 2, 3, 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_mixed_depth_drop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1860,16 +1768,16 @@ def test_mixed_depth_drop(self): result = df.drop('a', axis=1) expected = df.drop([('a', '', '')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) result = df.drop(['top'], axis=1) expected = df.drop([('top', 'OD', 'wx')], axis=1) expected = expected.drop([('top', 'OD', 'wy')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) result = df.drop(('top', 'OD', 'wx'), axis=1) expected = df.drop([('top', 'OD', 'wx')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) expected = df.drop([('top', 'OD', 'wy')], axis=1) expected = df.drop('top', axis=1) @@ -1877,7 +1785,7 @@ def test_mixed_depth_drop(self): result = df.drop('result1', level=1, axis=1) expected = df.drop([('routine1', 'result1', ''), ('routine2', 'result1', '')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) def test_drop_nonunique(self): df = DataFrame([["x-a", "x", "a", 1.5], ["x-a", "x", "a", 1.2], @@ -1898,7 +1806,7 @@ def test_drop_nonunique(self): result.index = expected.index - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_mixed_depth_pop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1913,32 +1821,32 @@ def test_mixed_depth_pop(self): df2 = df.copy() result = df1.pop('a') expected = df2.pop(('a', '', '')) - assert_series_equal(expected, result, check_names=False) - assert_frame_equal(df1, df2) - self.assertEqual(result.name, 'a') + tm.assert_series_equal(expected, result, check_names=False) + tm.assert_frame_equal(df1, df2) + assert result.name == 'a' expected = df1['top'] df1 = df1.drop(['top'], axis=1) result = df2.pop('top') - assert_frame_equal(expected, result) - assert_frame_equal(df1, df2) + tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(df1, df2) def test_reindex_level_partial_selection(self): result = self.frame.reindex(['foo', 'qux'], level=0) expected = self.frame.iloc[[0, 1, 2, 7, 8, 9]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) - result = self.frame.T.reindex_axis(['foo', 'qux'], axis=1, level=0) - assert_frame_equal(result, expected.T) + result = self.frame.T.reindex(['foo', 'qux'], axis=1, level=0) + tm.assert_frame_equal(result, expected.T) result = self.frame.loc[['foo', 'qux']] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame['A'].loc[['foo', 'qux']] - assert_series_equal(result, expected['A']) + tm.assert_series_equal(result, expected['A']) result = self.frame.T.loc[:, ['foo', 'qux']] - assert_frame_equal(result, expected.T) + tm.assert_frame_equal(result, expected.T) def test_setitem_multiple_partial(self): expected = self.frame.copy() @@ -1946,49 +1854,49 @@ def test_setitem_multiple_partial(self): result.loc[['foo', 'bar']] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = self.frame.copy() result = self.frame.copy() result.loc['foo':'bar'] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = self.frame['A'].copy() result = self.frame['A'].copy() result.loc[['foo', 'bar']] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) expected = self.frame['A'].copy() result = self.frame['A'].copy() result.loc['foo':'bar'] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_drop_level(self): result = self.frame.drop(['bar', 'qux'], level='first') expected = self.frame.iloc[[0, 1, 2, 5, 6]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.drop(['two'], level='second') expected = self.frame.iloc[[0, 2, 3, 6, 7, 9]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.T.drop(['bar', 'qux'], axis=1, level='first') expected = self.frame.iloc[[0, 1, 2, 5, 6]].T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.T.drop(['two'], axis=1, level='second') expected = self.frame.iloc[[0, 2, 3, 6, 7, 9]].T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_drop_level_nonunique_datetime(self): # GH 12701 - idx = pd.Index([2, 3, 4, 4, 5], name='id') + idx = Index([2, 3, 4, 4, 5], name='id') idxdt = pd.to_datetime(['201603231400', '201603231500', '201603231600', @@ -1998,12 +1906,12 @@ def test_drop_level_nonunique_datetime(self): columns=list('ab'), index=idx) df['tstamp'] = idxdt df = df.set_index('tstamp', append=True) - ts = pd.Timestamp('201603231600') - self.assertFalse(df.index.is_unique) + ts = Timestamp('201603231600') + assert not df.index.is_unique result = df.drop(ts, level='tstamp') expected = df.loc[idx != 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_drop_preserve_names(self): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], @@ -2013,7 +1921,7 @@ def test_drop_preserve_names(self): df = DataFrame(np.random.randn(6, 3), index=index) result = df.drop([(0, 2)]) - self.assertEqual(result.index.names, ('one', 'two')) + assert result.index.names == ('one', 'two') def test_unicode_repr_issues(self): levels = [Index([u('a/\u03c3'), u('b/\u03c3'), u('c/\u03c3')]), @@ -2042,7 +1950,7 @@ def test_dataframe_insert_column_all_na(self): df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) s = Series({(1, 1): 1, (1, 2): 2}) df['new'] = s - self.assertTrue(df['new'].isnull().all()) + assert df['new'].isna().all() def test_join_segfault(self): # 1532 @@ -2058,11 +1966,11 @@ def test_set_column_scalar_with_ix(self): subset = self.frame.index[[1, 4, 5]] self.frame.loc[subset] = 99 - self.assertTrue((self.frame.loc[subset].values == 99).all()) + assert (self.frame.loc[subset].values == 99).all() col = self.frame['B'] col[subset] = 97 - self.assertTrue((self.frame.loc[subset, 'B'] == 97).all()) + assert (self.frame.loc[subset, 'B'] == 97).all() def test_frame_dict_constructor_empty_series(self): s1 = Series([ @@ -2088,8 +1996,8 @@ def test_indexing_ambiguity_bug_1678(self): result = frame.iloc[:, 1] exp = frame.loc[:, ('Ohio', 'Red')] - tm.assertIsInstance(result, Series) - assert_series_equal(result, exp) + assert isinstance(result, Series) + tm.assert_series_equal(result, exp) def test_nonunique_assignment_1750(self): df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], @@ -2100,7 +2008,7 @@ def test_nonunique_assignment_1750(self): df.loc[ix, "C"] = '_' - self.assertTrue((df.xs((1, 1))['C'] == '_').all()) + assert (df.xs((1, 1))['C'] == '_').all() def test_indexing_over_hashtable_size_cutoff(self): n = 10000 @@ -2112,9 +2020,9 @@ def test_indexing_over_hashtable_size_cutoff(self): MultiIndex.from_arrays((["a"] * n, np.arange(n)))) # hai it works! - self.assertEqual(s[("a", 5)], 5) - self.assertEqual(s[("a", 6)], 6) - self.assertEqual(s[("a", 7)], 7) + assert s[("a", 5)] == 5 + assert s[("a", 6)] == 6 + assert s[("a", 7)] == 7 _index._SIZE_CUTOFF = old_cutoff @@ -2154,8 +2062,8 @@ def test_tuples_have_na(self): labels=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]]) - self.assertTrue(isnull(index[4][0])) - self.assertTrue(isnull(index.values[4][0])) + assert isna(index[4][0]) + assert isna(index.values[4][0]) def test_duplicate_groupby_issues(self): idx_tp = [('600809', '20061231'), ('600809', '20070331'), @@ -2166,7 +2074,7 @@ def test_duplicate_groupby_issues(self): s = Series(dt, index=idx) result = s.groupby(s.index).first() - self.assertEqual(len(result), 3) + assert len(result) == 3 def test_duplicate_mi(self): # GH 4516 @@ -2181,7 +2089,7 @@ def test_duplicate_mi(self): ['foo', 'bar', 5.0, 5]], columns=list('ABCD')).set_index(['A', 'B']) result = df.loc[('foo', 'bar')] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_duplicated_drop_duplicates(self): # GH 4060 @@ -2191,35 +2099,24 @@ def test_duplicated_drop_duplicates(self): [False, False, False, True, False, False], dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(), expected) expected = np.array([True, False, False, False, False, False]) duplicated = idx.duplicated(keep='last') tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep='last'), expected) expected = np.array([True, False, False, True, False, False]) duplicated = idx.duplicated(keep=False) tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) + assert duplicated.dtype == bool expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) - # deprecate take_last - expected = np.array([True, False, False, False, False, False]) - with tm.assert_produces_warning(FutureWarning): - duplicated = idx.duplicated(take_last=True) - tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) - expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) - with tm.assert_produces_warning(FutureWarning): - tm.assert_index_equal( - idx.drop_duplicates(take_last=True), expected) - def test_multiindex_set_index(self): # segfault in #3308 d = {'t1': [2, 2.5, 3], 't2': [4, 5, 6]} @@ -2242,8 +2139,8 @@ def test_datetimeindex(self): expected1 = pd.DatetimeIndex(['2013-04-01 9:00', '2013-04-02 9:00', '2013-04-03 9:00'], tz='Asia/Tokyo') - self.assert_index_equal(idx.levels[0], expected1) - self.assert_index_equal(idx.levels[1], idx2) + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) # from datetime combos # GH 7888 @@ -2253,9 +2150,9 @@ def test_datetimeindex(self): for d1, d2 in itertools.product( [date1, date2, date3], [date1, date2, date3]): - index = pd.MultiIndex.from_product([[d1], [d2]]) - self.assertIsInstance(index.levels[0], pd.DatetimeIndex) - self.assertIsInstance(index.levels[1], pd.DatetimeIndex) + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) def test_constructor_with_tz(self): @@ -2274,14 +2171,14 @@ def test_constructor_with_tz(self): def test_set_index_datetime(self): # GH 3950 - df = pd.DataFrame( + df = DataFrame( {'label': ['a', 'a', 'a', 'b', 'b', 'b'], 'datetime': ['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00', '2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00'], 'value': range(6)}) df.index = pd.to_datetime(df.pop('datetime'), utc=True) - df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific') + df.index = df.index.tz_convert('US/Pacific') expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', @@ -2289,14 +2186,14 @@ def test_set_index_datetime(self): expected = expected.tz_localize('UTC').tz_convert('US/Pacific') df = df.set_index('label', append=True) - self.assert_index_equal(df.index.levels[0], expected) - self.assert_index_equal(df.index.levels[1], - pd.Index(['a', 'b'], name='label')) + tm.assert_index_equal(df.index.levels[0], expected) + tm.assert_index_equal(df.index.levels[1], + Index(['a', 'b'], name='label')) df = df.swaplevel(0, 1) - self.assert_index_equal(df.index.levels[0], - pd.Index(['a', 'b'], name='label')) - self.assert_index_equal(df.index.levels[1], expected) + tm.assert_index_equal(df.index.levels[0], + Index(['a', 'b'], name='label')) + tm.assert_index_equal(df.index.levels[1], expected) df = DataFrame(np.random.random(6)) idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', @@ -2319,103 +2216,152 @@ def test_set_index_datetime(self): expected2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-02 09:00'], tz='US/Eastern') - self.assert_index_equal(df.index.levels[0], expected1) - self.assert_index_equal(df.index.levels[1], expected2) - self.assert_index_equal(df.index.levels[2], idx3) + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) # GH 7092 - self.assert_index_equal(df.index.get_level_values(0), idx1) - self.assert_index_equal(df.index.get_level_values(1), idx2) - self.assert_index_equal(df.index.get_level_values(2), idx3) + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) def test_reset_index_datetime(self): # GH 3950 for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']: idx1 = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, name='idx1') - idx2 = pd.Index(range(5), name='idx2', dtype='int64') - idx = pd.MultiIndex.from_arrays([idx1, idx2]) - df = pd.DataFrame( + idx2 = Index(range(5), name='idx2', dtype='int64') + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame( {'a': np.arange(5, dtype='int64'), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) - expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5)], - 'idx2': np.arange(5, dtype='int64'), - 'a': np.arange(5, dtype='int64'), - 'b': ['A', 'B', 'C', 'D', 'E']}, - columns=['idx1', 'idx2', 'a', 'b']) + expected = DataFrame({'idx1': [datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5)], + 'idx2': np.arange(5, dtype='int64'), + 'a': np.arange(5, dtype='int64'), + 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx1', 'idx2', 'a', 'b']) expected['idx1'] = expected['idx1'].apply( - lambda d: pd.Timestamp(d, tz=tz)) + lambda d: Timestamp(d, tz=tz)) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) idx3 = pd.date_range('1/1/2012', periods=5, freq='MS', tz='Europe/Paris', name='idx3') - idx = pd.MultiIndex.from_arrays([idx1, idx2, idx3]) - df = pd.DataFrame( + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( {'a': np.arange(5, dtype='int64'), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) - expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1), - datetime.datetime(2011, 1, 2), - datetime.datetime(2011, 1, 3), - datetime.datetime(2011, 1, 4), - datetime.datetime(2011, 1, 5)], - 'idx2': np.arange(5, dtype='int64'), - 'idx3': [datetime.datetime(2012, 1, 1), - datetime.datetime(2012, 2, 1), - datetime.datetime(2012, 3, 1), - datetime.datetime(2012, 4, 1), - datetime.datetime(2012, 5, 1)], - 'a': np.arange(5, dtype='int64'), - 'b': ['A', 'B', 'C', 'D', 'E']}, - columns=['idx1', 'idx2', 'idx3', 'a', 'b']) + expected = DataFrame({'idx1': [datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5)], + 'idx2': np.arange(5, dtype='int64'), + 'idx3': [datetime.datetime(2012, 1, 1), + datetime.datetime(2012, 2, 1), + datetime.datetime(2012, 3, 1), + datetime.datetime(2012, 4, 1), + datetime.datetime(2012, 5, 1)], + 'a': np.arange(5, dtype='int64'), + 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx1', 'idx2', 'idx3', 'a', 'b']) expected['idx1'] = expected['idx1'].apply( - lambda d: pd.Timestamp(d, tz=tz)) + lambda d: Timestamp(d, tz=tz)) expected['idx3'] = expected['idx3'].apply( - lambda d: pd.Timestamp(d, tz='Europe/Paris')) - assert_frame_equal(df.reset_index(), expected) + lambda d: Timestamp(d, tz='Europe/Paris')) + tm.assert_frame_equal(df.reset_index(), expected) # GH 7793 - idx = pd.MultiIndex.from_product([['a', 'b'], pd.date_range( + idx = MultiIndex.from_product([['a', 'b'], pd.date_range( '20130101', periods=3, tz=tz)]) - df = pd.DataFrame( + df = DataFrame( np.arange(6, dtype='int64').reshape( 6, 1), columns=['a'], index=idx) - expected = pd.DataFrame({'level_0': 'a a a b b b'.split(), - 'level_1': [ - datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 2), - datetime.datetime(2013, 1, 3)] * 2, - 'a': np.arange(6, dtype='int64')}, - columns=['level_0', 'level_1', 'a']) + expected = DataFrame({'level_0': 'a a a b b b'.split(), + 'level_1': [ + datetime.datetime(2013, 1, 1), + datetime.datetime(2013, 1, 2), + datetime.datetime(2013, 1, 3)] * 2, + 'a': np.arange(6, dtype='int64')}, + columns=['level_0', 'level_1', 'a']) expected['level_1'] = expected['level_1'].apply( - lambda d: pd.Timestamp(d, freq='D', tz=tz)) - assert_frame_equal(df.reset_index(), expected) + lambda d: Timestamp(d, freq='D', tz=tz)) + tm.assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): # GH 7746 - idx = pd.MultiIndex.from_product([pd.period_range('20130101', - periods=3, freq='M'), - ['a', 'b', 'c']], - names=['month', 'feature']) - - df = pd.DataFrame(np.arange(9, dtype='int64') - .reshape(-1, 1), - index=idx, columns=['a']) - expected = pd.DataFrame({ + idx = MultiIndex.from_product( + [pd.period_range('20130101', periods=3, freq='M'), list('abc')], + names=['month', 'feature']) + + df = DataFrame(np.arange(9, dtype='int64').reshape(-1, 1), + index=idx, columns=['a']) + expected = DataFrame({ 'month': ([pd.Period('2013-01', freq='M')] * 3 + [pd.Period('2013-02', freq='M')] * 3 + [pd.Period('2013-03', freq='M')] * 3), 'feature': ['a', 'b', 'c'] * 3, 'a': np.arange(9, dtype='int64') }, columns=['month', 'feature', 'a']) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) + + def test_reset_index_multiindex_columns(self): + levels = [['A', ''], ['B', 'b']] + df = DataFrame([[0, 2], [1, 3]], + columns=MultiIndex.from_tuples(levels)) + result = df[['B']].rename_axis('A').reset_index() + tm.assert_frame_equal(result, df) + + # gh-16120: already existing column + with tm.assert_raises_regex(ValueError, + (r"cannot insert \('A', ''\), " + "already exists")): + df.rename_axis('A').reset_index() + + # gh-16164: multiindex (tuple) full key + result = df.set_index([('A', '')]).reset_index() + tm.assert_frame_equal(result, df) + + # with additional (unnamed) index level + idx_col = DataFrame([[0], [1]], + columns=MultiIndex.from_tuples([('level_0', '')])) + expected = pd.concat([idx_col, df[[('B', 'b'), ('A', '')]]], axis=1) + result = df.set_index([('B', 'b')], append=True).reset_index() + tm.assert_frame_equal(result, expected) + + # with index name which is a too long tuple... + with tm.assert_raises_regex(ValueError, + ("Item must have length equal to number " + "of levels.")): + df.rename_axis([('C', 'c', 'i')]).reset_index() + + # or too short... + levels = [['A', 'a', ''], ['B', 'b', 'i']] + df2 = DataFrame([[0, 2], [1, 3]], + columns=MultiIndex.from_tuples(levels)) + idx_col = DataFrame([[0], [1]], + columns=MultiIndex.from_tuples([('C', 'c', 'ii')])) + expected = pd.concat([idx_col, df2], axis=1) + result = df2.rename_axis([('C', 'c')]).reset_index(col_fill='ii') + tm.assert_frame_equal(result, expected) + + # ... which is incompatible with col_fill=None + with tm.assert_raises_regex(ValueError, + ("col_fill=None is incompatible with " + r"incomplete column name \('C', 'c'\)")): + df2.rename_axis([('C', 'c')]).reset_index(col_fill=None) + + # with col_level != 0 + result = df2.rename_axis([('c', 'ii')]).reset_index(col_level=1, + col_fill='C') + tm.assert_frame_equal(result, expected) def test_set_index_period(self): # GH 6631 @@ -2433,46 +2379,456 @@ def test_set_index_period(self): expected1 = pd.period_range('2011-01-01', periods=3, freq='M') expected2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H') - self.assert_index_equal(df.index.levels[0], expected1) - self.assert_index_equal(df.index.levels[1], expected2) - self.assert_index_equal(df.index.levels[2], idx3) + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) - self.assert_index_equal(df.index.get_level_values(0), idx1) - self.assert_index_equal(df.index.get_level_values(1), idx2) - self.assert_index_equal(df.index.get_level_values(2), idx3) + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) def test_repeat(self): # GH 9361 # fixed by # GH 7891 - m_idx = pd.MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) + m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) data = ['a', 'b', 'c', 'd'] - m_df = pd.Series(data, index=m_idx) + m_df = Series(data, index=m_idx) assert m_df.repeat(3).shape == (3 * len(data), ) def test_iloc_mi(self): # GH 13797 # Test if iloc can handle integer locations in MultiIndexed DataFrame - data = [ - ['str00', 'str01'], - ['str10', 'str11'], - ['str20', 'srt21'], - ['str30', 'str31'], - ['str40', 'str41'] - ] + data = [['str00', 'str01'], ['str10', 'str11'], ['str20', 'srt21'], + ['str30', 'str31'], ['str40', 'str41']] + + mi = MultiIndex.from_tuples( + [('CC', 'A'), ('CC', 'B'), ('CC', 'B'), ('BB', 'a'), ('BB', 'b')]) + + expected = DataFrame(data) + df_mi = DataFrame(data, index=mi) + + result = DataFrame([[df_mi.iloc[r, c] for c in range(2)] + for r in range(5)]) + + tm.assert_frame_equal(result, expected) + + +class TestSorted(Base): + """ everything you wanted to test about sorting """ + + def test_sort_index_preserve_levels(self): + result = self.frame.sort_index() + assert result.index.names == self.frame.index.names + + def test_sorting_repr_8017(self): + + np.random.seed(0) + data = np.random.randn(3, 4) + + for gen, extra in [([1., 3., 2., 5.], 4.), ([1, 3, 2, 5], 4), + ([Timestamp('20130101'), Timestamp('20130103'), + Timestamp('20130102'), Timestamp('20130105')], + Timestamp('20130104')), + (['1one', '3one', '2one', '5one'], '4one')]: + columns = MultiIndex.from_tuples([('red', i) for i in gen]) + df = DataFrame(data, index=list('def'), columns=columns) + df2 = pd.concat([df, + DataFrame('world', index=list('def'), + columns=MultiIndex.from_tuples( + [('red', extra)]))], axis=1) + + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + assert str(df2).splitlines()[0].split() == ['red'] + + # GH 8017 + # sorting fails after columns added + + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) + + # setitem then sort + result = df.copy() + result[('red', extra)] = 'world' + + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level(self): + df = self.frame.copy() + df.index = np.arange(len(df)) + + # axis=1 + + # series + a_sorted = self.frame['A'].sort_index(level=0) + + # preserve names + assert a_sorted.index.names == self.frame.index.names + + # inplace + rs = self.frame.copy() + rs.sort_index(level=0, inplace=True) + tm.assert_frame_equal(rs, self.frame.sort_index(level=0)) + + def test_sort_index_level_large_cardinality(self): + + # #2684 (int64) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) + + # it works! + result = df.sort_index(level=0) + assert result.index.lexsort_depth == 3 + + # #2684 (int32) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) + + # it works! + result = df.sort_index(level=0) + assert (result.dtypes.values == df.dtypes.values).all() + assert result.index.lexsort_depth == 3 + + def test_sort_index_level_by_name(self): + self.frame.index.names = ['first', 'second'] + result = self.frame.sort_index(level='second') + expected = self.frame.sort_index(level=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level_mixed(self): + sorted_before = self.frame.sort_index(level=1) + + df = self.frame.copy() + df['foo'] = 'bar' + sorted_after = df.sort_index(level=1) + tm.assert_frame_equal(sorted_before, + sorted_after.drop(['foo'], axis=1)) + + dft = self.frame.T + sorted_before = dft.sort_index(level=1, axis=1) + dft['foo', 'three'] = 'bar' + + sorted_after = dft.sort_index(level=1, axis=1) + tm.assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), + sorted_after.drop([('foo', 'three')], axis=1)) + + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex(levels=levels, + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + assert index.is_lexsorted() + + index = MultiIndex(levels=levels, + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) + assert not index.is_lexsorted() + + index = MultiIndex(levels=levels, + labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) + assert not index.is_lexsorted() + assert index.lexsort_depth == 0 + + def test_getitem_multilevel_index_tuple_not_sorted(self): + index_columns = list("abc") + df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]], + columns=index_columns + ["data"]) + df = df.set_index(index_columns) + query_index = df.index[:1] + rs = df.loc[query_index, "data"] + + xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c']) + xp = Series(['x'], index=xp_idx, name='data') + tm.assert_series_equal(rs, xp) + + def test_getitem_slice_not_sorted(self): + df = self.frame.sort_index(level=1).T + + # buglet with int typechecking + result = df.iloc[:, :np.int32(3)] + expected = df.reindex(columns=df.columns[:3]) + tm.assert_frame_equal(result, expected) + + def test_frame_getitem_not_sorted2(self): + # 13431 + df = DataFrame({'col1': ['b', 'd', 'b', 'a'], + 'col2': [3, 1, 1, 2], + 'data': ['one', 'two', 'three', 'four']}) + + df2 = df.set_index(['col1', 'col2']) + df2_original = df2.copy() + + df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True) + df2.index.set_labels([0, 1, 0, 2], level='col1', inplace=True) + assert not df2.index.is_lexsorted() + assert not df2.index.is_monotonic + + assert df2_original.index.equals(df2.index) + expected = df2.sort_index() + assert expected.index.is_lexsorted() + assert expected.index.is_monotonic + + result = df2.sort_index(level=0) + assert result.index.is_lexsorted() + assert result.index.is_monotonic + tm.assert_frame_equal(result, expected) + + def test_frame_getitem_not_sorted(self): + df = self.frame.T + df['foo', 'four'] = 'foo' + + arrays = [np.array(x) for x in zip(*df.columns.values)] + + result = df['foo'] + result2 = df.loc[:, 'foo'] + expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) + expected.columns = expected.columns.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) - mi = pd.MultiIndex.from_tuples( - [('CC', 'A'), - ('CC', 'B'), - ('CC', 'B'), - ('BB', 'a'), - ('BB', 'b') - ]) + df = df.T + result = df.xs('foo') + result2 = df.loc['foo'] + expected = df.reindex(df.index[arrays[0] == 'foo']) + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) - expected = pd.DataFrame(data) - df_mi = pd.DataFrame(data, index=mi) + def test_series_getitem_not_sorted(self): + arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + tuples = lzip(*arrays) + index = MultiIndex.from_tuples(tuples) + s = Series(randn(8), index=index) - result = pd.DataFrame([[df_mi.iloc[r, c] for c in range(2)] - for r in range(5)]) + arrays = [np.array(x) for x in zip(*index.values)] - assert_frame_equal(result, expected) + result = s['qux'] + result2 = s.loc['qux'] + expected = s[arrays[0] == 'qux'] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + def test_sort_index_and_reconstruction(self): + + # 15622 + # lexsortedness should be identical + # across MultiIndex consruction methods + + df = DataFrame([[1, 1], [2, 2]], index=list('ab')) + expected = DataFrame([[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_tuples([(0.5, 'a'), + (0.5, 'b'), + (0.8, 'a'), + (0.8, 'b')])) + assert expected.index.is_lexsorted() + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_product([[0.5, 0.8], list('ab')])) + result = result.sort_index() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex(levels=[[0.5, 0.8], ['a', 'b']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + result = result.sort_index() + assert result.index.is_lexsorted() + + tm.assert_frame_equal(result, expected) + + concatted = pd.concat([df, df], keys=[0.8, 0.5]) + result = concatted.sort_index() + + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # 14015 + df = DataFrame([[1, 2], [6, 7]], + columns=MultiIndex.from_tuples( + [(0, '20160811 12:00:00'), + (0, '20160809 12:00:00')], + names=['l1', 'Date'])) + + df.columns.set_levels(pd.to_datetime(df.columns.levels[1]), + level=1, + inplace=True) + assert not df.columns.is_lexsorted() + assert not df.columns.is_monotonic + result = df.sort_index(axis=1) + assert result.columns.is_lexsorted() + assert result.columns.is_monotonic + result = df.sort_index(axis=1, level=1) + assert result.columns.is_lexsorted() + assert result.columns.is_monotonic + + def test_sort_index_and_reconstruction_doc_example(self): + # doc example + df = DataFrame({'value': [1, 2, 3, 4]}, + index=MultiIndex( + levels=[['a', 'b'], ['bb', 'aa']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + assert df.index.is_lexsorted() + assert not df.index.is_monotonic + + # sort it + expected = DataFrame({'value': [2, 1, 4, 3]}, + index=MultiIndex( + levels=[['a', 'b'], ['aa', 'bb']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]])) + result = df.sort_index() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # reconstruct + result = df.sort_index().copy() + result.index = result.index._sort_levels_monotonic() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + def test_sort_index_reorder_on_ops(self): + # 15687 + df = DataFrame( + np.random.randn(8, 2), + index=MultiIndex.from_product( + [['a', 'b'], ['big', 'small'], ['red', 'blu']], + names=['letter', 'size', 'color']), + columns=['near', 'far']) + df = df.sort_index() + + def my_func(group): + group.index = ['newz', 'newa'] + return group + + result = df.groupby(level=['letter', 'size']).apply( + my_func).sort_index() + expected = MultiIndex.from_product( + [['a', 'b'], ['big', 'small'], ['newa', 'newz']], + names=['letter', 'size', None]) + + tm.assert_index_equal(result.index, expected) + + def test_sort_non_lexsorted(self): + # degenerate case where we sort but don't + # have a satisfying result :< + # GH 15797 + idx = MultiIndex([['A', 'B', 'C'], + ['c', 'b', 'a']], + [[0, 1, 2, 0, 1, 2], + [0, 2, 1, 1, 0, 2]]) + + df = DataFrame({'col': range(len(idx))}, + index=idx, + dtype='int64') + assert df.index.is_lexsorted() is False + assert df.index.is_monotonic is False + + sorted = df.sort_index() + assert sorted.index.is_lexsorted() is True + assert sorted.index.is_monotonic is True + + expected = DataFrame( + {'col': [1, 4, 5, 2]}, + index=MultiIndex.from_tuples([('B', 'a'), ('B', 'c'), + ('C', 'a'), ('C', 'b')]), + dtype='int64') + result = sorted.loc[pd.IndexSlice['B':'C', 'a':'c'], :] + tm.assert_frame_equal(result, expected) + + def test_sort_index_nan(self): + # GH 14784 + # incorrect sorting w.r.t. nans + tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]] + mi = MultiIndex.from_tuples(tuples) + + df = DataFrame(np.arange(16).reshape(4, 4), + index=mi, columns=list('ABCD')) + s = Series(np.arange(4), index=mi) + + df2 = DataFrame({ + 'date': pd.to_datetime([ + '20121002', '20121007', '20130130', '20130202', '20130305', + '20121002', '20121207', '20130130', '20130202', '20130305', + '20130202', '20130305' + ]), + 'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + 'whole_cost': [1790, np.nan, 280, 259, np.nan, 623, 90, 312, + np.nan, 301, 359, 801], + 'cost': [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12] + }).set_index(['date', 'user_id']) + + # sorting frame, default nan position is last + result = df.sort_index() + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position last + result = df.sort_index(na_position='last') + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position first + result = df.sort_index(na_position='first') + expected = df.iloc[[1, 2, 3, 0], :] + tm.assert_frame_equal(result, expected) + + # sorting frame with removed rows + result = df2.dropna().sort_index() + expected = df2.sort_index().dropna() + tm.assert_frame_equal(result, expected) + + # sorting series, default nan position is last + result = s.sort_index() + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position last + result = s.sort_index(na_position='last') + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position first + result = s.sort_index(na_position='first') + expected = s.iloc[[1, 2, 3, 0]] + tm.assert_series_equal(result, expected) + + def test_sort_ascending_list(self): + # GH: 16934 + + # Set up a Series with a three level MultiIndex + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], + [4, 3, 2, 1, 4, 3, 2, 1]] + tuples = lzip(*arrays) + mi = MultiIndex.from_tuples(tuples, names=['first', 'second', 'third']) + s = Series(range(8), index=mi) + + # Sort with boolean ascending + result = s.sort_index(level=['third', 'first'], ascending=False) + expected = s.iloc[[4, 0, 5, 1, 6, 2, 7, 3]] + tm.assert_series_equal(result, expected) + + # Sort with list of boolean ascending + result = s.sort_index(level=['third', 'first'], + ascending=[False, True]) + expected = s.iloc[[0, 4, 1, 5, 2, 6, 3, 7]] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 937c20d009b6b..dffb303af6ae1 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -3,19 +3,24 @@ from functools import partial +import pytest import warnings import numpy as np -from pandas import Series, isnull -from pandas.types.common import is_integer_dtype + +import pandas as pd +from pandas import Series, isna +from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops import pandas.util.testing as tm +import pandas.util._test_decorators as td +from pandas.compat.numpy import _np_version_under1p13 use_bn = nanops._USE_BOTTLENECK -class TestnanopsDataFrame(tm.TestCase): +class TestnanopsDataFrame(object): - def setUp(self): + def setup_method(self, method): np.random.seed(11235) nanops._USE_BOTTLENECK = False @@ -115,7 +120,7 @@ def setUp(self): self.arr_float_nan_inf_1d = self.arr_float_nan_inf[:, 0, 0] self.arr_nan_nan_inf_1d = self.arr_nan_nan_inf[:, 0, 0] - def tearDown(self): + def teardown_method(self, method): nanops._USE_BOTTLENECK = use_bn def check_results(self, targ, res, axis, check_dtype=True): @@ -178,12 +183,17 @@ def _coerce_tds(targ, res): check_dtype=check_dtype) def check_fun_data(self, testfunc, targfunc, testarval, targarval, - targarnanval, check_dtype=True, **kwargs): + targarnanval, check_dtype=True, empty_targfunc=None, + **kwargs): for axis in list(range(targarval.ndim)) + [None]: for skipna in [False, True]: targartempval = targarval if skipna else targarnanval - try: + if skipna and empty_targfunc and isna(targartempval).all(): + targ = empty_targfunc(targartempval, axis=axis, **kwargs) + else: targ = targfunc(targartempval, axis=axis, **kwargs) + + try: res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs) self.check_results(targ, res, axis, @@ -215,10 +225,11 @@ def check_fun_data(self, testfunc, targfunc, testarval, targarval, except ValueError: return self.check_fun_data(testfunc, targfunc, testarval2, targarval2, - targarnanval2, check_dtype=check_dtype, **kwargs) + targarnanval2, check_dtype=check_dtype, + empty_targfunc=empty_targfunc, **kwargs) def check_fun(self, testfunc, targfunc, testar, targar=None, - targarnan=None, **kwargs): + targarnan=None, empty_targfunc=None, **kwargs): if targar is None: targar = testar if targarnan is None: @@ -228,7 +239,8 @@ def check_fun(self, testfunc, targfunc, testar, targar=None, targarnanval = getattr(self, targarnan) try: self.check_fun_data(testfunc, targfunc, testarval, targarval, - targarnanval, **kwargs) + targarnanval, empty_targfunc=empty_targfunc, + **kwargs) except BaseException as exc: exc.args += ('testar: %s' % testar, 'targar: %s' % targar, 'targarnan: %s' % targarnan) @@ -325,7 +337,8 @@ def test_nanall(self): def test_nansum(self): self.check_funs(nanops.nansum, np.sum, allow_str=False, - allow_date=False, allow_tdelta=True, check_dtype=False) + allow_date=False, allow_tdelta=True, check_dtype=False, + empty_targfunc=np.nansum) def test_nanmean(self): self.check_funs(nanops.nanmean, np.mean, allow_complex=False, @@ -337,16 +350,13 @@ def test_nanmean_overflow(self): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - # numpy < 1.9.0 is not computing this correctly - from distutils.version import LooseVersion - if LooseVersion(np.__version__) >= '1.9.0': - for a in [2 ** 55, -2 ** 55, 20150515061816532]: - s = Series(a, index=range(500), dtype=np.int64) - result = s.mean() - np_result = s.values.mean() - self.assertEqual(result, a) - self.assertEqual(result, np_result) - self.assertTrue(result.dtype == np.float64) + for a in [2 ** 55, -2 ** 55, 20150515061816532]: + s = Series(a, index=range(500), dtype=np.int64) + result = s.mean() + np_result = s.values.mean() + assert result == a + assert result == np_result + assert result.dtype == np.float64 def test_returned_dtype(self): @@ -361,15 +371,9 @@ def test_returned_dtype(self): for method in group_a + group_b: result = getattr(s, method)() if is_integer_dtype(dtype) and method in group_a: - self.assertTrue( - result.dtype == np.float64, - "return dtype expected from %s is np.float64, " - "got %s instead" % (method, result.dtype)) + assert result.dtype == np.float64 else: - self.assertTrue( - result.dtype == dtype, - "return dtype expected from %s is %s, " - "got %s instead" % (method, dtype, result.dtype)) + assert result.dtype == dtype def test_nanmedian(self): with warnings.catch_warnings(record=True): @@ -387,13 +391,13 @@ def test_nanstd(self): allow_str=False, allow_date=False, allow_tdelta=True, allow_obj='convert') + @td.skip_if_no('scipy', min_version='0.17.0') def test_nansem(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() from scipy.stats import sem - self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, - allow_str=False, allow_date=False, - allow_tdelta=True, allow_obj='convert') + with np.errstate(invalid='ignore'): + self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False, allow_obj='convert') def _minmax_wrap(self, value, axis=None, func=None): res = func(value, axis) @@ -412,7 +416,7 @@ def test_nanmax(self): def _argminmax_wrap(self, value, axis=None, func=None): res = func(value, axis) nans = np.min(value, axis) - nullnan = isnull(nans) + nullnan = isna(nans) if res.ndim: res[nullnan] = -1 elif (hasattr(nullnan, 'all') and nullnan.all() or @@ -447,26 +451,30 @@ def _skew_kurt_wrap(self, values, axis=None, func=None): return 0. return result + @td.skip_if_no('scipy', min_version='0.17.0') def test_nanskew(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) - self.check_funs(nanops.nanskew, func, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False) + with np.errstate(invalid='ignore'): + self.check_funs(nanops.nanskew, func, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False) + @td.skip_if_no('scipy', min_version='0.17.0') def test_nankurt(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) - self.check_funs(nanops.nankurt, func, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False) + with np.errstate(invalid='ignore'): + self.check_funs(nanops.nankurt, func, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False) + @td.skip_if_no("numpy", min_version="1.10.0") def test_nanprod(self): self.check_funs(nanops.nanprod, np.prod, allow_str=False, - allow_date=False, allow_tdelta=False) + allow_date=False, allow_tdelta=False, + empty_targfunc=np.nanprod) def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs): res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs) @@ -553,8 +561,8 @@ def test_nancorr_pearson(self): self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method='pearson') + @td.skip_if_no_scipy def test_nancorr_kendall(self): - tm.skip_if_no_package('scipy.stats') from scipy.stats import kendalltau targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0] targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] @@ -565,8 +573,8 @@ def test_nancorr_kendall(self): self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method='kendall') + @td.skip_if_no_scipy def test_nancorr_spearman(self): - tm.skip_if_no_package('scipy.stats') from scipy.stats import spearmanr targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0] targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] @@ -654,9 +662,9 @@ def check_bool(self, func, value, correct, *args, **kwargs): try: res0 = func(value, *args, **kwargs) if correct: - self.assertTrue(res0) + assert res0 else: - self.assertFalse(res0) + assert not res0 except BaseException as exc: exc.args += ('dim: %s' % getattr(value, 'ndim', value), ) raise @@ -733,67 +741,62 @@ def test__isfinite(self): raise def test__bn_ok_dtype(self): - self.assertTrue(nanops._bn_ok_dtype(self.arr_float.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_complex.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_int.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_bool.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_str.dtype, 'test')) - self.assertTrue(nanops._bn_ok_dtype(self.arr_utf.dtype, 'test')) - self.assertFalse(nanops._bn_ok_dtype(self.arr_date.dtype, 'test')) - self.assertFalse(nanops._bn_ok_dtype(self.arr_tdelta.dtype, 'test')) - self.assertFalse(nanops._bn_ok_dtype(self.arr_obj.dtype, 'test')) + assert nanops._bn_ok_dtype(self.arr_float.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_complex.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_int.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_bool.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_str.dtype, 'test') + assert nanops._bn_ok_dtype(self.arr_utf.dtype, 'test') + assert not nanops._bn_ok_dtype(self.arr_date.dtype, 'test') + assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, 'test') + assert not nanops._bn_ok_dtype(self.arr_obj.dtype, 'test') -class TestEnsureNumeric(tm.TestCase): +class TestEnsureNumeric(object): def test_numeric_values(self): # Test integer - self.assertEqual(nanops._ensure_numeric(1), 1, 'Failed for int') + assert nanops._ensure_numeric(1) == 1 + # Test float - self.assertEqual(nanops._ensure_numeric(1.1), 1.1, 'Failed for float') + assert nanops._ensure_numeric(1.1) == 1.1 + # Test complex - self.assertEqual(nanops._ensure_numeric(1 + 2j), 1 + 2j, - 'Failed for complex') + assert nanops._ensure_numeric(1 + 2j) == 1 + 2j def test_ndarray(self): # Test numeric ndarray values = np.array([1, 2, 3]) - self.assertTrue(np.allclose(nanops._ensure_numeric(values), values), - 'Failed for numeric ndarray') + assert np.allclose(nanops._ensure_numeric(values), values) # Test object ndarray o_values = values.astype(object) - self.assertTrue(np.allclose(nanops._ensure_numeric(o_values), values), - 'Failed for object ndarray') + assert np.allclose(nanops._ensure_numeric(o_values), values) # Test convertible string ndarray s_values = np.array(['1', '2', '3'], dtype=object) - self.assertTrue(np.allclose(nanops._ensure_numeric(s_values), values), - 'Failed for convertible string ndarray') + assert np.allclose(nanops._ensure_numeric(s_values), values) # Test non-convertible string ndarray s_values = np.array(['foo', 'bar', 'baz'], dtype=object) - self.assertRaises(ValueError, lambda: nanops._ensure_numeric(s_values)) + pytest.raises(ValueError, lambda: nanops._ensure_numeric(s_values)) def test_convertable_values(self): - self.assertTrue(np.allclose(nanops._ensure_numeric('1'), 1.0), - 'Failed for convertible integer string') - self.assertTrue(np.allclose(nanops._ensure_numeric('1.1'), 1.1), - 'Failed for convertible float string') - self.assertTrue(np.allclose(nanops._ensure_numeric('1+1j'), 1 + 1j), - 'Failed for convertible complex string') + assert np.allclose(nanops._ensure_numeric('1'), 1.0) + assert np.allclose(nanops._ensure_numeric('1.1'), 1.1) + assert np.allclose(nanops._ensure_numeric('1+1j'), 1 + 1j) def test_non_convertable_values(self): - self.assertRaises(TypeError, lambda: nanops._ensure_numeric('foo')) - self.assertRaises(TypeError, lambda: nanops._ensure_numeric({})) - self.assertRaises(TypeError, lambda: nanops._ensure_numeric([])) + pytest.raises(TypeError, lambda: nanops._ensure_numeric('foo')) + pytest.raises(TypeError, lambda: nanops._ensure_numeric({})) + pytest.raises(TypeError, lambda: nanops._ensure_numeric([])) -class TestNanvarFixedValues(tm.TestCase): +class TestNanvarFixedValues(object): # xref GH10242 - def setUp(self): + def setup_method(self, method): # Samples from a normal distribution. self.variance = variance = 3.0 self.samples = self.prng.normal(scale=variance ** 0.5, size=100000) @@ -880,14 +883,14 @@ def test_ground_truth(self): for ddof in range(3): var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof) tm.assert_almost_equal(var[:3], variance[axis, ddof]) - self.assertTrue(np.isnan(var[3])) + assert np.isnan(var[3]) # Test nanstd. for axis in range(2): for ddof in range(3): std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof) tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5) - self.assertTrue(np.isnan(std[3])) + assert np.isnan(std[3]) def test_nanstd_roundoff(self): # Regression test for GH 10242 (test data taken from GH 10489). Ensure @@ -895,18 +898,18 @@ def test_nanstd_roundoff(self): data = Series(766897346 * np.ones(10)) for ddof in range(3): result = data.std(ddof=ddof) - self.assertEqual(result, 0.0) + assert result == 0.0 @property def prng(self): return np.random.RandomState(1234) -class TestNanskewFixedValues(tm.TestCase): +class TestNanskewFixedValues(object): # xref GH 11974 - def setUp(self): + def setup_method(self, method): # Test data + skewness value (computed with scipy.stats.skew) self.samples = np.sin(np.linspace(0, 1, 200)) self.actual_skew = -0.1875895205961754 @@ -916,20 +919,20 @@ def test_constant_series(self): for val in [3075.2, 3075.3, 3075.5]: data = val * np.ones(300) skew = nanops.nanskew(data) - self.assertEqual(skew, 0.0) + assert skew == 0.0 def test_all_finite(self): alpha, beta = 0.3, 0.1 left_tailed = self.prng.beta(alpha, beta, size=100) - self.assertLess(nanops.nanskew(left_tailed), 0) + assert nanops.nanskew(left_tailed) < 0 alpha, beta = 0.1, 0.3 right_tailed = self.prng.beta(alpha, beta, size=100) - self.assertGreater(nanops.nanskew(right_tailed), 0) + assert nanops.nanskew(right_tailed) > 0 def test_ground_truth(self): skew = nanops.nanskew(self.samples) - self.assertAlmostEqual(skew, self.actual_skew) + tm.assert_almost_equal(skew, self.actual_skew) def test_axis(self): samples = np.vstack([self.samples, @@ -940,7 +943,7 @@ def test_axis(self): def test_nans(self): samples = np.hstack([self.samples, np.nan]) skew = nanops.nanskew(samples, skipna=False) - self.assertTrue(np.isnan(skew)) + assert np.isnan(skew) def test_nans_skipna(self): samples = np.hstack([self.samples, np.nan]) @@ -952,11 +955,11 @@ def prng(self): return np.random.RandomState(1234) -class TestNankurtFixedValues(tm.TestCase): +class TestNankurtFixedValues(object): # xref GH 11974 - def setUp(self): + def setup_method(self, method): # Test data + kurtosis value (computed with scipy.stats.kurtosis) self.samples = np.sin(np.linspace(0, 1, 200)) self.actual_kurt = -1.2058303433799713 @@ -966,20 +969,20 @@ def test_constant_series(self): for val in [3075.2, 3075.3, 3075.5]: data = val * np.ones(300) kurt = nanops.nankurt(data) - self.assertEqual(kurt, 0.0) + assert kurt == 0.0 def test_all_finite(self): alpha, beta = 0.3, 0.1 left_tailed = self.prng.beta(alpha, beta, size=100) - self.assertLess(nanops.nankurt(left_tailed), 0) + assert nanops.nankurt(left_tailed) < 0 alpha, beta = 0.1, 0.3 right_tailed = self.prng.beta(alpha, beta, size=100) - self.assertGreater(nanops.nankurt(right_tailed), 0) + assert nanops.nankurt(right_tailed) > 0 def test_ground_truth(self): kurt = nanops.nankurt(self.samples) - self.assertAlmostEqual(kurt, self.actual_kurt) + tm.assert_almost_equal(kurt, self.actual_kurt) def test_axis(self): samples = np.vstack([self.samples, @@ -990,7 +993,7 @@ def test_axis(self): def test_nans(self): samples = np.hstack([self.samples, np.nan]) kurt = nanops.nankurt(samples, skipna=False) - self.assertTrue(np.isnan(kurt)) + assert np.isnan(kurt) def test_nans_skipna(self): samples = np.hstack([self.samples, np.nan]) @@ -1000,3 +1003,47 @@ def test_nans_skipna(self): @property def prng(self): return np.random.RandomState(1234) + + +def test_use_bottleneck(): + + if nanops._BOTTLENECK_INSTALLED: + + pd.set_option('use_bottleneck', True) + assert pd.get_option('use_bottleneck') + + pd.set_option('use_bottleneck', False) + assert not pd.get_option('use_bottleneck') + + pd.set_option('use_bottleneck', use_bn) + + +@pytest.mark.parametrize("numpy_op, expected", [ + (np.sum, 10), + (np.nansum, 10), + (np.mean, 2.5), + (np.nanmean, 2.5), + (np.median, 2.5), + (np.nanmedian, 2.5), + (np.min, 1), + (np.max, 4), +]) +def test_numpy_ops(numpy_op, expected): + # GH8383 + result = numpy_op(pd.Series([1, 2, 3, 4])) + assert result == expected + + +@pytest.mark.parametrize("numpy_op, expected", [ + (np.nanmin, 1), + (np.nanmax, 4), +]) +def test_numpy_ops_np_version_under1p13(numpy_op, expected): + # GH8383 + result = numpy_op(pd.Series([1, 2, 3, 4])) + if _np_version_under1p13: + # bug for numpy < 1.13, where result is a series, should be a scalar + with pytest.raises(ValueError): + assert result == expected + else: + assert result == expected diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 2f329f241a5b8..301a7fc437fcf 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,54 +1,64 @@ # -*- coding: utf-8 -*- # pylint: disable=W0612,E1101 +from warnings import catch_warnings from datetime import datetime - import operator import pytest import numpy as np -import pandas as pd -from pandas.types.common import is_float_dtype -from pandas import (Series, DataFrame, Index, date_range, isnull, notnull, +from pandas.core.dtypes.common import is_float_dtype +from pandas import (Series, DataFrame, Index, date_range, isna, notna, pivot, MultiIndex) from pandas.core.nanops import nanall, nanany from pandas.core.panel import Panel -from pandas.core.series import remove_na -from pandas.formats.printing import pprint_thing +from pandas.io.formats.printing import pprint_thing from pandas import compat from pandas.compat import range, lrange, StringIO, OrderedDict, signature from pandas.tseries.offsets import BDay, MonthEnd from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, - ensure_clean, assertRaisesRegexp, - makeCustomDataframe as mkdf, - makeMixedDataFrame) + ensure_clean, makeMixedDataFrame, + makeCustomDataframe as mkdf) import pandas.core.panel as panelm import pandas.util.testing as tm +import pandas.util._test_decorators as td + + +def make_test_panel(): + with catch_warnings(record=True): + _panel = tm.makePanel() + tm.add_nans(_panel) + _panel = _panel.copy() + return _panel class PanelTests(object): panel = None def test_pickle(self): - unpickled = self.round_trip_pickle(self.panel) - assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) + with catch_warnings(record=True): + unpickled = tm.round_trip_pickle(self.panel) + assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) def test_rank(self): - self.assertRaises(NotImplementedError, lambda: self.panel.rank()) + with catch_warnings(record=True): + pytest.raises(NotImplementedError, lambda: self.panel.rank()) def test_cumsum(self): - cumsum = self.panel.cumsum() - assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum()) + with catch_warnings(record=True): + cumsum = self.panel.cumsum() + assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum()) def not_hashable(self): - c_empty = Panel() - c = Panel(Panel([[[1]]])) - self.assertRaises(TypeError, hash, c_empty) - self.assertRaises(TypeError, hash, c) + with catch_warnings(record=True): + c_empty = Panel() + c = Panel(Panel([[[1]]])) + pytest.raises(TypeError, hash, c_empty) + pytest.raises(TypeError, hash, c) class SafeForLongAndSparse(object): @@ -57,31 +67,33 @@ def test_repr(self): repr(self.panel) def test_copy_names(self): - for attr in ('major_axis', 'minor_axis'): - getattr(self.panel, attr).name = None - cp = self.panel.copy() - getattr(cp, attr).name = 'foo' - self.assertIsNone(getattr(self.panel, attr).name) + with catch_warnings(record=True): + for attr in ('major_axis', 'minor_axis'): + getattr(self.panel, attr).name = None + cp = self.panel.copy() + getattr(cp, attr).name = 'foo' + assert getattr(self.panel, attr).name is None def test_iter(self): tm.equalContents(list(self.panel), self.panel.items) def test_count(self): - f = lambda s: notnull(s).sum() + f = lambda s: notna(s).sum() self._check_stat_op('count', f, obj=self.panel, has_skipna=False) def test_sum(self): - self._check_stat_op('sum', np.sum) + self._check_stat_op('sum', np.sum, skipna_alternative=np.nansum) def test_mean(self): self._check_stat_op('mean', np.mean) + @td.skip_if_no("numpy", min_version="1.10.0") def test_prod(self): - self._check_stat_op('prod', np.prod) + self._check_stat_op('prod', np.prod, skipna_alternative=np.nanprod) def test_median(self): def wrapper(x): - if isnull(x).any(): + if isna(x).any(): return np.nan return np.median(x) @@ -93,11 +105,9 @@ def test_min(self): def test_max(self): self._check_stat_op('max', np.max) + @td.skip_if_no_scipy def test_skew(self): - try: - from scipy.stats import skew - except ImportError: - pytest.skip("no scipy.stats.skew") + from scipy.stats import skew def this_skew(x): if len(x) < 3: @@ -106,10 +116,6 @@ def this_skew(x): self._check_stat_op('skew', this_skew) - # def test_mad(self): - # f = lambda x: np.abs(x - x.mean()).mean() - # self._check_stat_op('mad', f) - def test_var(self): def alt(x): if len(x) < 2: @@ -134,7 +140,8 @@ def alt(x): self._check_stat_op('sem', alt) - def _check_stat_op(self, name, alternative, obj=None, has_skipna=True): + def _check_stat_op(self, name, alternative, obj=None, has_skipna=True, + skipna_alternative=None): if obj is None: obj = self.panel @@ -146,11 +153,8 @@ def _check_stat_op(self, name, alternative, obj=None, has_skipna=True): if has_skipna: - def skipna_wrapper(x): - nona = remove_na(x) - if len(nona) == 0: - return np.nan - return alternative(nona) + skipna_wrapper = tm._make_skipna_wrapper(alternative, + skipna_alternative) def wrapper(x): return alternative(np.asarray(x)) @@ -164,15 +168,15 @@ def wrapper(x): for i in range(obj.ndim): result = f(axis=i) - if not tm._incompat_bottleneck_version(name): + if name in ['sum', 'prod']: assert_frame_equal(result, obj.apply(skipna_wrapper, axis=i)) - self.assertRaises(Exception, f, axis=obj.ndim) + pytest.raises(Exception, f, axis=obj.ndim) # Unimplemented numeric_only parameter. if 'numeric_only' in signature(f).args: - self.assertRaisesRegexp(NotImplementedError, name, f, - numeric_only=True) + tm.assert_raises_regex(NotImplementedError, name, f, + numeric_only=True) class SafeForSparse(object): @@ -196,38 +200,38 @@ def test_set_axis(self): self.panel.items = new_items if hasattr(self.panel, '_item_cache'): - self.assertNotIn('ItemA', self.panel._item_cache) - self.assertIs(self.panel.items, new_items) + assert 'ItemA' not in self.panel._item_cache + assert self.panel.items is new_items # TODO: unused? item = self.panel[0] # noqa self.panel.major_axis = new_major - self.assertIs(self.panel[0].index, new_major) - self.assertIs(self.panel.major_axis, new_major) + assert self.panel[0].index is new_major + assert self.panel.major_axis is new_major # TODO: unused? item = self.panel[0] # noqa self.panel.minor_axis = new_minor - self.assertIs(self.panel[0].columns, new_minor) - self.assertIs(self.panel.minor_axis, new_minor) + assert self.panel[0].columns is new_minor + assert self.panel.minor_axis is new_minor def test_get_axis_number(self): - self.assertEqual(self.panel._get_axis_number('items'), 0) - self.assertEqual(self.panel._get_axis_number('major'), 1) - self.assertEqual(self.panel._get_axis_number('minor'), 2) + assert self.panel._get_axis_number('items') == 0 + assert self.panel._get_axis_number('major') == 1 + assert self.panel._get_axis_number('minor') == 2 - with tm.assertRaisesRegexp(ValueError, "No axis named foo"): + with tm.assert_raises_regex(ValueError, "No axis named foo"): self.panel._get_axis_number('foo') - with tm.assertRaisesRegexp(ValueError, "No axis named foo"): + with tm.assert_raises_regex(ValueError, "No axis named foo"): self.panel.__ge__(self.panel, axis='foo') def test_get_axis_name(self): - self.assertEqual(self.panel._get_axis_name(0), 'items') - self.assertEqual(self.panel._get_axis_name(1), 'major_axis') - self.assertEqual(self.panel._get_axis_name(2), 'minor_axis') + assert self.panel._get_axis_name(0) == 'items' + assert self.panel._get_axis_name(1) == 'major_axis' + assert self.panel._get_axis_name(2) == 'minor_axis' def test_get_plane_axes(self): # what to do here? @@ -238,47 +242,48 @@ def test_get_plane_axes(self): index, columns = self.panel._get_plane_axes(0) def test_truncate(self): - dates = self.panel.major_axis - start, end = dates[1], dates[5] + with catch_warnings(record=True): + dates = self.panel.major_axis + start, end = dates[1], dates[5] - trunced = self.panel.truncate(start, end, axis='major') - expected = self.panel['ItemA'].truncate(start, end) + trunced = self.panel.truncate(start, end, axis='major') + expected = self.panel['ItemA'].truncate(start, end) - assert_frame_equal(trunced['ItemA'], expected) + assert_frame_equal(trunced['ItemA'], expected) - trunced = self.panel.truncate(before=start, axis='major') - expected = self.panel['ItemA'].truncate(before=start) + trunced = self.panel.truncate(before=start, axis='major') + expected = self.panel['ItemA'].truncate(before=start) - assert_frame_equal(trunced['ItemA'], expected) + assert_frame_equal(trunced['ItemA'], expected) - trunced = self.panel.truncate(after=end, axis='major') - expected = self.panel['ItemA'].truncate(after=end) + trunced = self.panel.truncate(after=end, axis='major') + expected = self.panel['ItemA'].truncate(after=end) - assert_frame_equal(trunced['ItemA'], expected) - - # XXX test other axes + assert_frame_equal(trunced['ItemA'], expected) def test_arith(self): - self._test_op(self.panel, operator.add) - self._test_op(self.panel, operator.sub) - self._test_op(self.panel, operator.mul) - self._test_op(self.panel, operator.truediv) - self._test_op(self.panel, operator.floordiv) - self._test_op(self.panel, operator.pow) - - self._test_op(self.panel, lambda x, y: y + x) - self._test_op(self.panel, lambda x, y: y - x) - self._test_op(self.panel, lambda x, y: y * x) - self._test_op(self.panel, lambda x, y: y / x) - self._test_op(self.panel, lambda x, y: y ** x) - - self._test_op(self.panel, lambda x, y: x + y) # panel + 1 - self._test_op(self.panel, lambda x, y: x - y) # panel - 1 - self._test_op(self.panel, lambda x, y: x * y) # panel * 1 - self._test_op(self.panel, lambda x, y: x / y) # panel / 1 - self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 - - self.assertRaises(Exception, self.panel.__add__, self.panel['ItemA']) + with catch_warnings(record=True): + self._test_op(self.panel, operator.add) + self._test_op(self.panel, operator.sub) + self._test_op(self.panel, operator.mul) + self._test_op(self.panel, operator.truediv) + self._test_op(self.panel, operator.floordiv) + self._test_op(self.panel, operator.pow) + + self._test_op(self.panel, lambda x, y: y + x) + self._test_op(self.panel, lambda x, y: y - x) + self._test_op(self.panel, lambda x, y: y * x) + self._test_op(self.panel, lambda x, y: y / x) + self._test_op(self.panel, lambda x, y: y ** x) + + self._test_op(self.panel, lambda x, y: x + y) # panel + 1 + self._test_op(self.panel, lambda x, y: x - y) # panel - 1 + self._test_op(self.panel, lambda x, y: x * y) # panel * 1 + self._test_op(self.panel, lambda x, y: x / y) # panel / 1 + self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 + + pytest.raises(Exception, self.panel.__add__, + self.panel['ItemA']) @staticmethod def _test_op(panel, op): @@ -294,318 +299,342 @@ def test_iteritems(self): for k, v in self.panel.iteritems(): pass - self.assertEqual(len(list(self.panel.iteritems())), - len(self.panel.items)) + assert len(list(self.panel.iteritems())) == len(self.panel.items) def test_combineFrame(self): - def check_op(op, name): - # items - df = self.panel['ItemA'] + with catch_warnings(record=True): + def check_op(op, name): + # items + df = self.panel['ItemA'] - func = getattr(self.panel, name) + func = getattr(self.panel, name) - result = func(df, axis='items') + result = func(df, axis='items') - assert_frame_equal(result['ItemB'], op(self.panel['ItemB'], df)) + assert_frame_equal( + result['ItemB'], op(self.panel['ItemB'], df)) - # major - xs = self.panel.major_xs(self.panel.major_axis[0]) - result = func(xs, axis='major') + # major + xs = self.panel.major_xs(self.panel.major_axis[0]) + result = func(xs, axis='major') - idx = self.panel.major_axis[1] + idx = self.panel.major_axis[1] - assert_frame_equal(result.major_xs(idx), - op(self.panel.major_xs(idx), xs)) + assert_frame_equal(result.major_xs(idx), + op(self.panel.major_xs(idx), xs)) - # minor - xs = self.panel.minor_xs(self.panel.minor_axis[0]) - result = func(xs, axis='minor') + # minor + xs = self.panel.minor_xs(self.panel.minor_axis[0]) + result = func(xs, axis='minor') - idx = self.panel.minor_axis[1] + idx = self.panel.minor_axis[1] - assert_frame_equal(result.minor_xs(idx), - op(self.panel.minor_xs(idx), xs)) + assert_frame_equal(result.minor_xs(idx), + op(self.panel.minor_xs(idx), xs)) - ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod'] - if not compat.PY3: - ops.append('div') + ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod'] + if not compat.PY3: + ops.append('div') - for op in ops: - try: - check_op(getattr(operator, op), op) - except: - pprint_thing("Failing operation: %r" % op) - raise - if compat.PY3: - try: - check_op(operator.truediv, 'div') - except: - pprint_thing("Failing operation: %r" % 'div') - raise + for op in ops: + try: + check_op(getattr(operator, op), op) + except: + pprint_thing("Failing operation: %r" % op) + raise + if compat.PY3: + try: + check_op(operator.truediv, 'div') + except: + pprint_thing("Failing operation: %r" % 'div') + raise def test_combinePanel(self): - result = self.panel.add(self.panel) - self.assert_panel_equal(result, self.panel * 2) + with catch_warnings(record=True): + result = self.panel.add(self.panel) + assert_panel_equal(result, self.panel * 2) def test_neg(self): - self.assert_panel_equal(-self.panel, self.panel * -1) + with catch_warnings(record=True): + assert_panel_equal(-self.panel, self.panel * -1) # issue 7692 def test_raise_when_not_implemented(self): - p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5), - items=['ItemA', 'ItemB', 'ItemC'], - major_axis=pd.date_range('20130101', periods=4), - minor_axis=list('ABCDE')) - d = p.sum(axis=1).iloc[0] - ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'div', 'mod', 'pow'] - for op in ops: - with self.assertRaises(NotImplementedError): - getattr(p, op)(d, axis=0) + with catch_warnings(record=True): + p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5), + items=['ItemA', 'ItemB', 'ItemC'], + major_axis=date_range('20130101', periods=4), + minor_axis=list('ABCDE')) + d = p.sum(axis=1).iloc[0] + ops = ['add', 'sub', 'mul', 'truediv', + 'floordiv', 'div', 'mod', 'pow'] + for op in ops: + with pytest.raises(NotImplementedError): + getattr(p, op)(d, axis=0) def test_select(self): - p = self.panel + with catch_warnings(record=True): + p = self.panel - # select items - result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') - expected = p.reindex(items=['ItemA', 'ItemC']) - self.assert_panel_equal(result, expected) + # select items + result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') + expected = p.reindex(items=['ItemA', 'ItemC']) + assert_panel_equal(result, expected) - # select major_axis - result = p.select(lambda x: x >= datetime(2000, 1, 15), axis='major') - new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] - expected = p.reindex(major=new_major) - self.assert_panel_equal(result, expected) + # select major_axis + result = p.select(lambda x: x >= datetime( + 2000, 1, 15), axis='major') + new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] + expected = p.reindex(major=new_major) + assert_panel_equal(result, expected) - # select minor_axis - result = p.select(lambda x: x in ('D', 'A'), axis=2) - expected = p.reindex(minor=['A', 'D']) - self.assert_panel_equal(result, expected) + # select minor_axis + result = p.select(lambda x: x in ('D', 'A'), axis=2) + expected = p.reindex(minor=['A', 'D']) + assert_panel_equal(result, expected) - # corner case, empty thing - result = p.select(lambda x: x in ('foo', ), axis='items') - self.assert_panel_equal(result, p.reindex(items=[])) + # corner case, empty thing + result = p.select(lambda x: x in ('foo', ), axis='items') + assert_panel_equal(result, p.reindex(items=[])) def test_get_value(self): for item in self.panel.items: for mjr in self.panel.major_axis[::2]: for mnr in self.panel.minor_axis: - result = self.panel.get_value(item, mjr, mnr) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.panel.get_value(item, mjr, mnr) expected = self.panel[item][mnr][mjr] assert_almost_equal(result, expected) def test_abs(self): - result = self.panel.abs() - result2 = abs(self.panel) - expected = np.abs(self.panel) - self.assert_panel_equal(result, expected) - self.assert_panel_equal(result2, expected) - - df = self.panel['ItemA'] - result = df.abs() - result2 = abs(df) - expected = np.abs(df) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) + with catch_warnings(record=True): + result = self.panel.abs() + result2 = abs(self.panel) + expected = np.abs(self.panel) + assert_panel_equal(result, expected) + assert_panel_equal(result2, expected) - s = df['A'] - result = s.abs() - result2 = abs(s) - expected = np.abs(s) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - self.assertEqual(result.name, 'A') - self.assertEqual(result2.name, 'A') + df = self.panel['ItemA'] + result = df.abs() + result2 = abs(df) + expected = np.abs(df) + assert_frame_equal(result, expected) + assert_frame_equal(result2, expected) + + s = df['A'] + result = s.abs() + result2 = abs(s) + expected = np.abs(s) + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + assert result.name == 'A' + assert result2.name == 'A' class CheckIndexing(object): def test_getitem(self): - self.assertRaises(Exception, self.panel.__getitem__, 'ItemQ') + pytest.raises(Exception, self.panel.__getitem__, 'ItemQ') def test_delitem_and_pop(self): - expected = self.panel['ItemA'] - result = self.panel.pop('ItemA') - assert_frame_equal(expected, result) - self.assertNotIn('ItemA', self.panel.items) + with catch_warnings(record=True): + expected = self.panel['ItemA'] + result = self.panel.pop('ItemA') + assert_frame_equal(expected, result) + assert 'ItemA' not in self.panel.items - del self.panel['ItemB'] - self.assertNotIn('ItemB', self.panel.items) - self.assertRaises(Exception, self.panel.__delitem__, 'ItemB') + del self.panel['ItemB'] + assert 'ItemB' not in self.panel.items + pytest.raises(Exception, self.panel.__delitem__, 'ItemB') - values = np.empty((3, 3, 3)) - values[0] = 0 - values[1] = 1 - values[2] = 2 + values = np.empty((3, 3, 3)) + values[0] = 0 + values[1] = 1 + values[2] = 2 - panel = Panel(values, lrange(3), lrange(3), lrange(3)) + panel = Panel(values, lrange(3), lrange(3), lrange(3)) - # did we delete the right row? + # did we delete the right row? - panelc = panel.copy() - del panelc[0] - assert_frame_equal(panelc[1], panel[1]) - assert_frame_equal(panelc[2], panel[2]) + panelc = panel.copy() + del panelc[0] + tm.assert_frame_equal(panelc[1], panel[1]) + tm.assert_frame_equal(panelc[2], panel[2]) - panelc = panel.copy() - del panelc[1] - assert_frame_equal(panelc[0], panel[0]) - assert_frame_equal(panelc[2], panel[2]) + panelc = panel.copy() + del panelc[1] + tm.assert_frame_equal(panelc[0], panel[0]) + tm.assert_frame_equal(panelc[2], panel[2]) - panelc = panel.copy() - del panelc[2] - assert_frame_equal(panelc[1], panel[1]) - assert_frame_equal(panelc[0], panel[0]) + panelc = panel.copy() + del panelc[2] + tm.assert_frame_equal(panelc[1], panel[1]) + tm.assert_frame_equal(panelc[0], panel[0]) def test_setitem(self): - # LongPanel with one item - lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() - with tm.assertRaises(ValueError): - self.panel['ItemE'] = lp + with catch_warnings(record=True): + + # LongPanel with one item + lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() + with pytest.raises(ValueError): + self.panel['ItemE'] = lp - # DataFrame - df = self.panel['ItemA'][2:].filter(items=['A', 'B']) - self.panel['ItemF'] = df - self.panel['ItemE'] = df + # DataFrame + df = self.panel['ItemA'][2:].filter(items=['A', 'B']) + self.panel['ItemF'] = df + self.panel['ItemE'] = df - df2 = self.panel['ItemF'] + df2 = self.panel['ItemF'] - assert_frame_equal(df, df2.reindex(index=df.index, columns=df.columns)) + assert_frame_equal(df, df2.reindex( + index=df.index, columns=df.columns)) - # scalar - self.panel['ItemG'] = 1 - self.panel['ItemE'] = True - self.assertEqual(self.panel['ItemG'].values.dtype, np.int64) - self.assertEqual(self.panel['ItemE'].values.dtype, np.bool_) + # scalar + self.panel['ItemG'] = 1 + self.panel['ItemE'] = True + assert self.panel['ItemG'].values.dtype == np.int64 + assert self.panel['ItemE'].values.dtype == np.bool_ - # object dtype - self.panel['ItemQ'] = 'foo' - self.assertEqual(self.panel['ItemQ'].values.dtype, np.object_) + # object dtype + self.panel['ItemQ'] = 'foo' + assert self.panel['ItemQ'].values.dtype == np.object_ - # boolean dtype - self.panel['ItemP'] = self.panel['ItemA'] > 0 - self.assertEqual(self.panel['ItemP'].values.dtype, np.bool_) + # boolean dtype + self.panel['ItemP'] = self.panel['ItemA'] > 0 + assert self.panel['ItemP'].values.dtype == np.bool_ - self.assertRaises(TypeError, self.panel.__setitem__, 'foo', + pytest.raises(TypeError, self.panel.__setitem__, 'foo', self.panel.loc[['ItemP']]) - # bad shape - p = Panel(np.random.randn(4, 3, 2)) - with tm.assertRaisesRegexp(ValueError, - r"shape of value must be \(3, 2\), " - r"shape of given object was \(4, 2\)"): - p[0] = np.random.randn(4, 2) + # bad shape + p = Panel(np.random.randn(4, 3, 2)) + with tm.assert_raises_regex(ValueError, + r"shape of value must be " + r"\(3, 2\), shape of given " + r"object was \(4, 2\)"): + p[0] = np.random.randn(4, 2) def test_setitem_ndarray(self): - timeidx = date_range(start=datetime(2009, 1, 1), - end=datetime(2009, 12, 31), - freq=MonthEnd()) - lons_coarse = np.linspace(-177.5, 177.5, 72) - lats_coarse = np.linspace(-87.5, 87.5, 36) - P = Panel(items=timeidx, major_axis=lons_coarse, - minor_axis=lats_coarse) - data = np.random.randn(72 * 36).reshape((72, 36)) - key = datetime(2009, 2, 28) - P[key] = data - - assert_almost_equal(P[key].values, data) + with catch_warnings(record=True): + timeidx = date_range(start=datetime(2009, 1, 1), + end=datetime(2009, 12, 31), + freq=MonthEnd()) + lons_coarse = np.linspace(-177.5, 177.5, 72) + lats_coarse = np.linspace(-87.5, 87.5, 36) + P = Panel(items=timeidx, major_axis=lons_coarse, + minor_axis=lats_coarse) + data = np.random.randn(72 * 36).reshape((72, 36)) + key = datetime(2009, 2, 28) + P[key] = data + + assert_almost_equal(P[key].values, data) def test_set_minor_major(self): - # GH 11014 - df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan]) - df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0]) - panel = Panel({'Item1': df1, 'Item2': df2}) - - newminor = notnull(panel.iloc[:, :, 0]) - panel.loc[:, :, 'NewMinor'] = newminor - assert_frame_equal(panel.loc[:, :, 'NewMinor'], - newminor.astype(object)) - - newmajor = notnull(panel.iloc[:, 0, :]) - panel.loc[:, 'NewMajor', :] = newmajor - assert_frame_equal(panel.loc[:, 'NewMajor', :], - newmajor.astype(object)) + with catch_warnings(record=True): + # GH 11014 + df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan]) + df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0]) + panel = Panel({'Item1': df1, 'Item2': df2}) + + newminor = notna(panel.iloc[:, :, 0]) + panel.loc[:, :, 'NewMinor'] = newminor + assert_frame_equal(panel.loc[:, :, 'NewMinor'], + newminor.astype(object)) + + newmajor = notna(panel.iloc[:, 0, :]) + panel.loc[:, 'NewMajor', :] = newmajor + assert_frame_equal(panel.loc[:, 'NewMajor', :], + newmajor.astype(object)) def test_major_xs(self): - ref = self.panel['ItemA'] + with catch_warnings(record=True): + ref = self.panel['ItemA'] - idx = self.panel.major_axis[5] - xs = self.panel.major_xs(idx) + idx = self.panel.major_axis[5] + xs = self.panel.major_xs(idx) - result = xs['ItemA'] - assert_series_equal(result, ref.xs(idx), check_names=False) - self.assertEqual(result.name, 'ItemA') + result = xs['ItemA'] + assert_series_equal(result, ref.xs(idx), check_names=False) + assert result.name == 'ItemA' - # not contained - idx = self.panel.major_axis[0] - BDay() - self.assertRaises(Exception, self.panel.major_xs, idx) + # not contained + idx = self.panel.major_axis[0] - BDay() + pytest.raises(Exception, self.panel.major_xs, idx) def test_major_xs_mixed(self): - self.panel['ItemD'] = 'foo' - xs = self.panel.major_xs(self.panel.major_axis[0]) - self.assertEqual(xs['ItemA'].dtype, np.float64) - self.assertEqual(xs['ItemD'].dtype, np.object_) + with catch_warnings(record=True): + self.panel['ItemD'] = 'foo' + xs = self.panel.major_xs(self.panel.major_axis[0]) + assert xs['ItemA'].dtype == np.float64 + assert xs['ItemD'].dtype == np.object_ def test_minor_xs(self): - ref = self.panel['ItemA'] + with catch_warnings(record=True): + ref = self.panel['ItemA'] - idx = self.panel.minor_axis[1] - xs = self.panel.minor_xs(idx) + idx = self.panel.minor_axis[1] + xs = self.panel.minor_xs(idx) - assert_series_equal(xs['ItemA'], ref[idx], check_names=False) + assert_series_equal(xs['ItemA'], ref[idx], check_names=False) - # not contained - self.assertRaises(Exception, self.panel.minor_xs, 'E') + # not contained + pytest.raises(Exception, self.panel.minor_xs, 'E') def test_minor_xs_mixed(self): - self.panel['ItemD'] = 'foo' + with catch_warnings(record=True): + self.panel['ItemD'] = 'foo' - xs = self.panel.minor_xs('D') - self.assertEqual(xs['ItemA'].dtype, np.float64) - self.assertEqual(xs['ItemD'].dtype, np.object_) + xs = self.panel.minor_xs('D') + assert xs['ItemA'].dtype == np.float64 + assert xs['ItemD'].dtype == np.object_ def test_xs(self): - itemA = self.panel.xs('ItemA', axis=0) - expected = self.panel['ItemA'] - assert_frame_equal(itemA, expected) + with catch_warnings(record=True): + itemA = self.panel.xs('ItemA', axis=0) + expected = self.panel['ItemA'] + tm.assert_frame_equal(itemA, expected) - # get a view by default - itemA_view = self.panel.xs('ItemA', axis=0) - itemA_view.values[:] = np.nan - self.assertTrue(np.isnan(self.panel['ItemA'].values).all()) + # Get a view by default. + itemA_view = self.panel.xs('ItemA', axis=0) + itemA_view.values[:] = np.nan - # mixed-type yields a copy - self.panel['strings'] = 'foo' - result = self.panel.xs('D', axis=2) - self.assertIsNotNone(result.is_copy) + assert np.isnan(self.panel['ItemA'].values).all() + + # Mixed-type yields a copy. + self.panel['strings'] = 'foo' + result = self.panel.xs('D', axis=2) + assert result._is_copy is not None def test_getitem_fancy_labels(self): - p = self.panel + with catch_warnings(record=True): + p = self.panel - items = p.items[[1, 0]] - dates = p.major_axis[::2] - cols = ['D', 'C', 'F'] + items = p.items[[1, 0]] + dates = p.major_axis[::2] + cols = ['D', 'C', 'F'] - # all 3 specified - assert_panel_equal(p.loc[items, dates, cols], - p.reindex(items=items, major=dates, minor=cols)) + # all 3 specified + assert_panel_equal(p.loc[items, dates, cols], + p.reindex(items=items, major=dates, minor=cols)) - # 2 specified - assert_panel_equal(p.loc[:, dates, cols], - p.reindex(major=dates, minor=cols)) + # 2 specified + assert_panel_equal(p.loc[:, dates, cols], + p.reindex(major=dates, minor=cols)) - assert_panel_equal(p.loc[items, :, cols], - p.reindex(items=items, minor=cols)) + assert_panel_equal(p.loc[items, :, cols], + p.reindex(items=items, minor=cols)) - assert_panel_equal(p.loc[items, dates, :], - p.reindex(items=items, major=dates)) + assert_panel_equal(p.loc[items, dates, :], + p.reindex(items=items, major=dates)) - # only 1 - assert_panel_equal(p.loc[items, :, :], p.reindex(items=items)) + # only 1 + assert_panel_equal(p.loc[items, :, :], p.reindex(items=items)) - assert_panel_equal(p.loc[:, dates, :], p.reindex(major=dates)) + assert_panel_equal(p.loc[:, dates, :], p.reindex(major=dates)) - assert_panel_equal(p.loc[:, :, cols], p.reindex(minor=cols)) + assert_panel_equal(p.loc[:, :, cols], p.reindex(minor=cols)) def test_getitem_fancy_slice(self): pass @@ -645,518 +674,554 @@ def test_getitem_fancy_xs(self): assert_series_equal(p.loc[:, date, col], p.major_xs(date).loc[col]) def test_getitem_fancy_xs_check_view(self): - item = 'ItemB' - date = self.panel.major_axis[5] - - # make sure it's always a view - NS = slice(None, None) - - # DataFrames - comp = assert_frame_equal - self._check_view(item, comp) - self._check_view((item, NS), comp) - self._check_view((item, NS, NS), comp) - self._check_view((NS, date), comp) - self._check_view((NS, date, NS), comp) - self._check_view((NS, NS, 'C'), comp) - - # Series - comp = assert_series_equal - self._check_view((item, date), comp) - self._check_view((item, date, NS), comp) - self._check_view((item, NS, 'C'), comp) - self._check_view((NS, date, 'C'), comp) + with catch_warnings(record=True): + item = 'ItemB' + date = self.panel.major_axis[5] + + # make sure it's always a view + NS = slice(None, None) + + # DataFrames + comp = assert_frame_equal + self._check_view(item, comp) + self._check_view((item, NS), comp) + self._check_view((item, NS, NS), comp) + self._check_view((NS, date), comp) + self._check_view((NS, date, NS), comp) + self._check_view((NS, NS, 'C'), comp) + + # Series + comp = assert_series_equal + self._check_view((item, date), comp) + self._check_view((item, date, NS), comp) + self._check_view((item, NS, 'C'), comp) + self._check_view((NS, date, 'C'), comp) def test_getitem_callable(self): - p = self.panel - # GH 12533 + with catch_warnings(record=True): + p = self.panel + # GH 12533 - assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB']) - assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']], - p.loc[['ItemB', 'ItemC']]) + assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB']) + assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']], + p.loc[['ItemB', 'ItemC']]) def test_ix_setitem_slice_dataframe(self): - a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33], - minor_axis=[111, 222, 333]) - b = DataFrame(np.random.randn(2, 3), index=[111, 333], - columns=[1, 2, 3]) + with catch_warnings(record=True): + a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33], + minor_axis=[111, 222, 333]) + b = DataFrame(np.random.randn(2, 3), index=[111, 333], + columns=[1, 2, 3]) - a.loc[:, 22, [111, 333]] = b + a.loc[:, 22, [111, 333]] = b - assert_frame_equal(a.loc[:, 22, [111, 333]], b) + assert_frame_equal(a.loc[:, 22, [111, 333]], b) def test_ix_align(self): - from pandas import Series - b = Series(np.random.randn(10), name=0) - b.sort() - df_orig = Panel(np.random.randn(3, 10, 2)) - df = df_orig.copy() + with catch_warnings(record=True): + from pandas import Series + b = Series(np.random.randn(10), name=0) + b.sort_values() + df_orig = Panel(np.random.randn(3, 10, 2)) + df = df_orig.copy() - df.loc[0, :, 0] = b - assert_series_equal(df.loc[0, :, 0].reindex(b.index), b) + df.loc[0, :, 0] = b + assert_series_equal(df.loc[0, :, 0].reindex(b.index), b) - df = df_orig.swapaxes(0, 1) - df.loc[:, 0, 0] = b - assert_series_equal(df.loc[:, 0, 0].reindex(b.index), b) + df = df_orig.swapaxes(0, 1) + df.loc[:, 0, 0] = b + assert_series_equal(df.loc[:, 0, 0].reindex(b.index), b) - df = df_orig.swapaxes(1, 2) - df.loc[0, 0, :] = b - assert_series_equal(df.loc[0, 0, :].reindex(b.index), b) + df = df_orig.swapaxes(1, 2) + df.loc[0, 0, :] = b + assert_series_equal(df.loc[0, 0, :].reindex(b.index), b) def test_ix_frame_align(self): - p_orig = tm.makePanel() - df = p_orig.iloc[0].copy() - assert_frame_equal(p_orig['ItemA'], df) - - p = p_orig.copy() - p.iloc[0, :, :] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.iloc[0] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.iloc[0, :, :] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.iloc[0] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.loc['ItemA'] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.loc['ItemA', :, :] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p['ItemA'] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.iloc[0, [0, 1, 3, 5], -2:] = df - out = p.iloc[0, [0, 1, 3, 5], -2:] - assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]]) - - # GH3830, panel assignent by values/frame - for dtype in ['float64', 'int64']: - - panel = Panel(np.arange(40).reshape((2, 4, 5)), - items=['a1', 'a2'], dtype=dtype) - df1 = panel.iloc[0] - df2 = panel.iloc[1] - - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) - - # Assignment by Value Passes for 'a2' - panel.loc['a2'] = df1.values - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df1) - - # Assignment by DataFrame Ok w/o loc 'a2' - panel['a2'] = df2 - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) - - # Assignment by DataFrame Fails for 'a2' - panel.loc['a2'] = df2 - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) + with catch_warnings(record=True): + p_orig = tm.makePanel() + df = p_orig.iloc[0].copy() + assert_frame_equal(p_orig['ItemA'], df) + + p = p_orig.copy() + p.iloc[0, :, :] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.iloc[0] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.iloc[0, :, :] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.iloc[0] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.loc['ItemA'] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.loc['ItemA', :, :] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p['ItemA'] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.iloc[0, [0, 1, 3, 5], -2:] = df + out = p.iloc[0, [0, 1, 3, 5], -2:] + assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]]) + + # GH3830, panel assignent by values/frame + for dtype in ['float64', 'int64']: + + panel = Panel(np.arange(40).reshape((2, 4, 5)), + items=['a1', 'a2'], dtype=dtype) + df1 = panel.iloc[0] + df2 = panel.iloc[1] + + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) + + # Assignment by Value Passes for 'a2' + panel.loc['a2'] = df1.values + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df1) + + # Assignment by DataFrame Ok w/o loc 'a2' + panel['a2'] = df2 + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) + + # Assignment by DataFrame Fails for 'a2' + panel.loc['a2'] = df2 + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) def _check_view(self, indexer, comp): cp = self.panel.copy() obj = cp.loc[indexer] obj.values[:] = 0 - self.assertTrue((obj.values == 0).all()) + assert (obj.values == 0).all() comp(cp.loc[indexer].reindex_like(obj), obj) def test_logical_with_nas(self): - d = Panel({'ItemA': {'a': [np.nan, False]}, - 'ItemB': {'a': [True, True]}}) + with catch_warnings(record=True): + d = Panel({'ItemA': {'a': [np.nan, False]}, + 'ItemB': {'a': [True, True]}}) - result = d['ItemA'] | d['ItemB'] - expected = DataFrame({'a': [np.nan, True]}) - assert_frame_equal(result, expected) + result = d['ItemA'] | d['ItemB'] + expected = DataFrame({'a': [np.nan, True]}) + assert_frame_equal(result, expected) - # this is autodowncasted here - result = d['ItemA'].fillna(False) | d['ItemB'] - expected = DataFrame({'a': [True, True]}) - assert_frame_equal(result, expected) + # this is autodowncasted here + result = d['ItemA'].fillna(False) | d['ItemB'] + expected = DataFrame({'a': [True, True]}) + assert_frame_equal(result, expected) def test_neg(self): - # what to do? - assert_panel_equal(-self.panel, -1 * self.panel) + with catch_warnings(record=True): + assert_panel_equal(-self.panel, -1 * self.panel) def test_invert(self): - assert_panel_equal(-(self.panel < 0), ~(self.panel < 0)) + with catch_warnings(record=True): + assert_panel_equal(-(self.panel < 0), ~(self.panel < 0)) def test_comparisons(self): - p1 = tm.makePanel() - p2 = tm.makePanel() + with catch_warnings(record=True): + p1 = tm.makePanel() + p2 = tm.makePanel() - tp = p1.reindex(items=p1.items + ['foo']) - df = p1[p1.items[0]] + tp = p1.reindex(items=p1.items + ['foo']) + df = p1[p1.items[0]] - def test_comp(func): + def test_comp(func): - # versus same index - result = func(p1, p2) - self.assert_numpy_array_equal(result.values, - func(p1.values, p2.values)) + # versus same index + result = func(p1, p2) + tm.assert_numpy_array_equal(result.values, + func(p1.values, p2.values)) - # versus non-indexed same objs - self.assertRaises(Exception, func, p1, tp) + # versus non-indexed same objs + pytest.raises(Exception, func, p1, tp) - # versus different objs - self.assertRaises(Exception, func, p1, df) + # versus different objs + pytest.raises(Exception, func, p1, df) - # versus scalar - result3 = func(self.panel, 0) - self.assert_numpy_array_equal(result3.values, - func(self.panel.values, 0)) + # versus scalar + result3 = func(self.panel, 0) + tm.assert_numpy_array_equal(result3.values, + func(self.panel.values, 0)) - with np.errstate(invalid='ignore'): - test_comp(operator.eq) - test_comp(operator.ne) - test_comp(operator.lt) - test_comp(operator.gt) - test_comp(operator.ge) - test_comp(operator.le) + with np.errstate(invalid='ignore'): + test_comp(operator.eq) + test_comp(operator.ne) + test_comp(operator.lt) + test_comp(operator.gt) + test_comp(operator.ge) + test_comp(operator.le) def test_get_value(self): - for item in self.panel.items: - for mjr in self.panel.major_axis[::2]: - for mnr in self.panel.minor_axis: - result = self.panel.get_value(item, mjr, mnr) - expected = self.panel[item][mnr][mjr] - assert_almost_equal(result, expected) - with tm.assertRaisesRegexp(TypeError, - "There must be an argument for each axis"): - self.panel.get_value('a') + with catch_warnings(record=True): + for item in self.panel.items: + for mjr in self.panel.major_axis[::2]: + for mnr in self.panel.minor_axis: + result = self.panel.get_value(item, mjr, mnr) + expected = self.panel[item][mnr][mjr] + assert_almost_equal(result, expected) + with tm.assert_raises_regex(TypeError, + "There must be an argument " + "for each axis"): + self.panel.get_value('a') def test_set_value(self): - for item in self.panel.items: - for mjr in self.panel.major_axis[::2]: - for mnr in self.panel.minor_axis: - self.panel.set_value(item, mjr, mnr, 1.) - assert_almost_equal(self.panel[item][mnr][mjr], 1.) - - # resize - res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) - tm.assertIsInstance(res, Panel) - self.assertIsNot(res, self.panel) - self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) + with catch_warnings(record=True): + for item in self.panel.items: + for mjr in self.panel.major_axis[::2]: + for mnr in self.panel.minor_axis: + self.panel.set_value(item, mjr, mnr, 1.) + tm.assert_almost_equal(self.panel[item][mnr][mjr], 1.) - res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['ItemE'].values)) - with tm.assertRaisesRegexp(TypeError, - "There must be an argument for each axis" - " plus the value provided"): - self.panel.set_value('a') + # resize + res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) + assert isinstance(res, Panel) + assert res is not self.panel + assert res.get_value('ItemE', 'foo', 'bar') == 1.5 + res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5) + assert is_float_dtype(res3['ItemE'].values) -_panel = tm.makePanel() -tm.add_nans(_panel) + msg = ("There must be an argument for each " + "axis plus the value provided") + with tm.assert_raises_regex(TypeError, msg): + self.panel.set_value('a') -class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, +class TestPanel(PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): @classmethod def assert_panel_equal(cls, x, y): assert_panel_equal(x, y) - def setUp(self): - self.panel = _panel.copy() + def setup_method(self, method): + self.panel = make_test_panel() self.panel.major_axis.name = None self.panel.minor_axis.name = None self.panel.items.name = None def test_constructor(self): - # with BlockManager - wp = Panel(self.panel._data) - self.assertIs(wp._data, self.panel._data) - - wp = Panel(self.panel._data, copy=True) - self.assertIsNot(wp._data, self.panel._data) - assert_panel_equal(wp, self.panel) - - # strings handled prop - wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]]) - self.assertEqual(wp.values.dtype, np.object_) - - vals = self.panel.values - - # no copy - wp = Panel(vals) - self.assertIs(wp.values, vals) - - # copy - wp = Panel(vals, copy=True) - self.assertIsNot(wp.values, vals) - - # GH #8285, test when scalar data is used to construct a Panel - # if dtype is not passed, it should be inferred - value_and_dtype = [(1, 'int64'), (3.14, 'float64'), - ('foo', np.object_)] - for (val, dtype) in value_and_dtype: - wp = Panel(val, items=range(2), major_axis=range(3), - minor_axis=range(4)) - vals = np.empty((2, 3, 4), dtype=dtype) - vals.fill(val) - assert_panel_equal(wp, Panel(vals, dtype=dtype)) - - # test the case when dtype is passed - wp = Panel(1, items=range(2), major_axis=range(3), minor_axis=range(4), - dtype='float32') - vals = np.empty((2, 3, 4), dtype='float32') - vals.fill(1) - assert_panel_equal(wp, Panel(vals, dtype='float32')) + with catch_warnings(record=True): + # with BlockManager + wp = Panel(self.panel._data) + assert wp._data is self.panel._data + + wp = Panel(self.panel._data, copy=True) + assert wp._data is not self.panel._data + tm.assert_panel_equal(wp, self.panel) + + # strings handled prop + wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]]) + assert wp.values.dtype == np.object_ + + vals = self.panel.values + + # no copy + wp = Panel(vals) + assert wp.values is vals + + # copy + wp = Panel(vals, copy=True) + assert wp.values is not vals + + # GH #8285, test when scalar data is used to construct a Panel + # if dtype is not passed, it should be inferred + value_and_dtype = [(1, 'int64'), (3.14, 'float64'), + ('foo', np.object_)] + for (val, dtype) in value_and_dtype: + wp = Panel(val, items=range(2), major_axis=range(3), + minor_axis=range(4)) + vals = np.empty((2, 3, 4), dtype=dtype) + vals.fill(val) + + tm.assert_panel_equal(wp, Panel(vals, dtype=dtype)) + + # test the case when dtype is passed + wp = Panel(1, items=range(2), major_axis=range(3), + minor_axis=range(4), + dtype='float32') + vals = np.empty((2, 3, 4), dtype='float32') + vals.fill(1) + + tm.assert_panel_equal(wp, Panel(vals, dtype='float32')) def test_constructor_cast(self): - zero_filled = self.panel.fillna(0) + with catch_warnings(record=True): + zero_filled = self.panel.fillna(0) - casted = Panel(zero_filled._data, dtype=int) - casted2 = Panel(zero_filled.values, dtype=int) + casted = Panel(zero_filled._data, dtype=int) + casted2 = Panel(zero_filled.values, dtype=int) - exp_values = zero_filled.values.astype(int) - assert_almost_equal(casted.values, exp_values) - assert_almost_equal(casted2.values, exp_values) + exp_values = zero_filled.values.astype(int) + assert_almost_equal(casted.values, exp_values) + assert_almost_equal(casted2.values, exp_values) - casted = Panel(zero_filled._data, dtype=np.int32) - casted2 = Panel(zero_filled.values, dtype=np.int32) + casted = Panel(zero_filled._data, dtype=np.int32) + casted2 = Panel(zero_filled.values, dtype=np.int32) - exp_values = zero_filled.values.astype(np.int32) - assert_almost_equal(casted.values, exp_values) - assert_almost_equal(casted2.values, exp_values) + exp_values = zero_filled.values.astype(np.int32) + assert_almost_equal(casted.values, exp_values) + assert_almost_equal(casted2.values, exp_values) - # can't cast - data = [[['foo', 'bar', 'baz']]] - self.assertRaises(ValueError, Panel, data, dtype=float) + # can't cast + data = [[['foo', 'bar', 'baz']]] + pytest.raises(ValueError, Panel, data, dtype=float) def test_constructor_empty_panel(self): - empty = Panel() - self.assertEqual(len(empty.items), 0) - self.assertEqual(len(empty.major_axis), 0) - self.assertEqual(len(empty.minor_axis), 0) + with catch_warnings(record=True): + empty = Panel() + assert len(empty.items) == 0 + assert len(empty.major_axis) == 0 + assert len(empty.minor_axis) == 0 def test_constructor_observe_dtype(self): - # GH #411 - panel = Panel(items=lrange(3), major_axis=lrange(3), - minor_axis=lrange(3), dtype='O') - self.assertEqual(panel.values.dtype, np.object_) + with catch_warnings(record=True): + # GH #411 + panel = Panel(items=lrange(3), major_axis=lrange(3), + minor_axis=lrange(3), dtype='O') + assert panel.values.dtype == np.object_ def test_constructor_dtypes(self): - # GH #797 - - def _check_dtype(panel, dtype): - for i in panel.items: - self.assertEqual(panel[i].values.dtype.name, dtype) - - # only nan holding types allowed here - for dtype in ['float64', 'float32', 'object']: - panel = Panel(items=lrange(2), major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype), - items=lrange(2), - major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'), - items=lrange(2), - major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel(np.random.randn(2, 10, 5), items=lrange( - 2), major_axis=lrange(10), minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - df1 = DataFrame(np.random.randn(2, 5), - index=lrange(2), columns=lrange(5)) - df2 = DataFrame(np.random.randn(2, 5), - index=lrange(2), columns=lrange(5)) - panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype) - _check_dtype(panel, dtype) + with catch_warnings(record=True): + # GH #797 + + def _check_dtype(panel, dtype): + for i in panel.items: + assert panel[i].values.dtype.name == dtype + + # only nan holding types allowed here + for dtype in ['float64', 'float32', 'object']: + panel = Panel(items=lrange(2), major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype), + items=lrange(2), + major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'), + items=lrange(2), + major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel( + np.random.randn(2, 10, 5), + items=lrange(2), major_axis=lrange(10), + minor_axis=lrange(5), + dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + df1 = DataFrame(np.random.randn(2, 5), + index=lrange(2), columns=lrange(5)) + df2 = DataFrame(np.random.randn(2, 5), + index=lrange(2), columns=lrange(5)) + panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype) + _check_dtype(panel, dtype) def test_constructor_fails_with_not_3d_input(self): - with tm.assertRaisesRegexp(ValueError, - "The number of dimensions required is 3"): - Panel(np.random.randn(10, 2)) + with catch_warnings(record=True): + with tm.assert_raises_regex(ValueError, "The number of dimensions required is 3"): # noqa + Panel(np.random.randn(10, 2)) def test_consolidate(self): - self.assertTrue(self.panel._data.is_consolidated()) + with catch_warnings(record=True): + assert self.panel._data.is_consolidated() - self.panel['foo'] = 1. - self.assertFalse(self.panel._data.is_consolidated()) + self.panel['foo'] = 1. + assert not self.panel._data.is_consolidated() - panel = self.panel.consolidate() - self.assertTrue(panel._data.is_consolidated()) + panel = self.panel._consolidate() + assert panel._data.is_consolidated() def test_ctor_dict(self): - itema = self.panel['ItemA'] - itemb = self.panel['ItemB'] + with catch_warnings(record=True): + itema = self.panel['ItemA'] + itemb = self.panel['ItemB'] - d = {'A': itema, 'B': itemb[5:]} - d2 = {'A': itema._series, 'B': itemb[5:]._series} - d3 = {'A': None, - 'B': DataFrame(itemb[5:]._series), - 'C': DataFrame(itema._series)} + d = {'A': itema, 'B': itemb[5:]} + d2 = {'A': itema._series, 'B': itemb[5:]._series} + d3 = {'A': None, + 'B': DataFrame(itemb[5:]._series), + 'C': DataFrame(itema._series)} - wp = Panel.from_dict(d) - wp2 = Panel.from_dict(d2) # nested Dict + wp = Panel.from_dict(d) + wp2 = Panel.from_dict(d2) # nested Dict - # TODO: unused? - wp3 = Panel.from_dict(d3) # noqa + # TODO: unused? + wp3 = Panel.from_dict(d3) # noqa - self.assert_index_equal(wp.major_axis, self.panel.major_axis) - assert_panel_equal(wp, wp2) + tm.assert_index_equal(wp.major_axis, self.panel.major_axis) + assert_panel_equal(wp, wp2) - # intersect - wp = Panel.from_dict(d, intersect=True) - self.assert_index_equal(wp.major_axis, itemb.index[5:]) + # intersect + wp = Panel.from_dict(d, intersect=True) + tm.assert_index_equal(wp.major_axis, itemb.index[5:]) - # use constructor - assert_panel_equal(Panel(d), Panel.from_dict(d)) - assert_panel_equal(Panel(d2), Panel.from_dict(d2)) - assert_panel_equal(Panel(d3), Panel.from_dict(d3)) + # use constructor + assert_panel_equal(Panel(d), Panel.from_dict(d)) + assert_panel_equal(Panel(d2), Panel.from_dict(d2)) + assert_panel_equal(Panel(d3), Panel.from_dict(d3)) - # a pathological case - d4 = {'A': None, 'B': None} + # a pathological case + d4 = {'A': None, 'B': None} - # TODO: unused? - wp4 = Panel.from_dict(d4) # noqa + # TODO: unused? + wp4 = Panel.from_dict(d4) # noqa - assert_panel_equal(Panel(d4), Panel(items=['A', 'B'])) + assert_panel_equal(Panel(d4), Panel(items=['A', 'B'])) - # cast - dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) - for k, v in compat.iteritems(d)) - result = Panel(dcasted, dtype=int) - expected = Panel(dict((k, v.astype(int)) - for k, v in compat.iteritems(dcasted))) - assert_panel_equal(result, expected) + # cast + dcasted = {k: v.reindex(wp.major_axis).fillna(0) + for k, v in compat.iteritems(d)} + result = Panel(dcasted, dtype=int) + expected = Panel({k: v.astype(int) + for k, v in compat.iteritems(dcasted)}) + assert_panel_equal(result, expected) - result = Panel(dcasted, dtype=np.int32) - expected = Panel(dict((k, v.astype(np.int32)) - for k, v in compat.iteritems(dcasted))) - assert_panel_equal(result, expected) + result = Panel(dcasted, dtype=np.int32) + expected = Panel({k: v.astype(np.int32) + for k, v in compat.iteritems(dcasted)}) + assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - data = dict((k, v.values) for k, v in self.panel.iteritems()) - result = Panel(data) - exp_major = Index(np.arange(len(self.panel.major_axis))) - self.assert_index_equal(result.major_axis, exp_major) + with catch_warnings(record=True): + data = {k: v.values for k, v in self.panel.iteritems()} + result = Panel(data) + exp_major = Index(np.arange(len(self.panel.major_axis))) + tm.assert_index_equal(result.major_axis, exp_major) - result = Panel(data, items=self.panel.items, - major_axis=self.panel.major_axis, - minor_axis=self.panel.minor_axis) - assert_panel_equal(result, self.panel) + result = Panel(data, items=self.panel.items, + major_axis=self.panel.major_axis, + minor_axis=self.panel.minor_axis) + assert_panel_equal(result, self.panel) - data['ItemC'] = self.panel['ItemC'] - result = Panel(data) - assert_panel_equal(result, self.panel) + data['ItemC'] = self.panel['ItemC'] + result = Panel(data) + assert_panel_equal(result, self.panel) - # corner, blow up - data['ItemB'] = data['ItemB'][:-1] - self.assertRaises(Exception, Panel, data) + # corner, blow up + data['ItemB'] = data['ItemB'][:-1] + pytest.raises(Exception, Panel, data) - data['ItemB'] = self.panel['ItemB'].values[:, :-1] - self.assertRaises(Exception, Panel, data) + data['ItemB'] = self.panel['ItemB'].values[:, :-1] + pytest.raises(Exception, Panel, data) def test_ctor_orderedDict(self): - keys = list(set(np.random.randint(0, 5000, 100)))[ - :50] # unique random int keys - d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) - p = Panel(d) - self.assertTrue(list(p.items) == keys) + with catch_warnings(record=True): + keys = list(set(np.random.randint(0, 5000, 100)))[ + :50] # unique random int keys + d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) + p = Panel(d) + assert list(p.items) == keys - p = Panel.from_dict(d) - self.assertTrue(list(p.items) == keys) + p = Panel.from_dict(d) + assert list(p.items) == keys def test_constructor_resize(self): - data = self.panel._data - items = self.panel.items[:-1] - major = self.panel.major_axis[:-1] - minor = self.panel.minor_axis[:-1] - - result = Panel(data, items=items, major_axis=major, minor_axis=minor) - expected = self.panel.reindex(items=items, major=major, minor=minor) - assert_panel_equal(result, expected) + with catch_warnings(record=True): + data = self.panel._data + items = self.panel.items[:-1] + major = self.panel.major_axis[:-1] + minor = self.panel.minor_axis[:-1] + + result = Panel(data, items=items, + major_axis=major, minor_axis=minor) + expected = self.panel.reindex( + items=items, major=major, minor=minor) + assert_panel_equal(result, expected) - result = Panel(data, items=items, major_axis=major) - expected = self.panel.reindex(items=items, major=major) - assert_panel_equal(result, expected) + result = Panel(data, items=items, major_axis=major) + expected = self.panel.reindex(items=items, major=major) + assert_panel_equal(result, expected) - result = Panel(data, items=items) - expected = self.panel.reindex(items=items) - assert_panel_equal(result, expected) + result = Panel(data, items=items) + expected = self.panel.reindex(items=items) + assert_panel_equal(result, expected) - result = Panel(data, minor_axis=minor) - expected = self.panel.reindex(minor=minor) - assert_panel_equal(result, expected) + result = Panel(data, minor_axis=minor) + expected = self.panel.reindex(minor=minor) + assert_panel_equal(result, expected) def test_from_dict_mixed_orient(self): - df = tm.makeDataFrame() - df['foo'] = 'bar' + with catch_warnings(record=True): + df = tm.makeDataFrame() + df['foo'] = 'bar' - data = {'k1': df, 'k2': df} + data = {'k1': df, 'k2': df} - panel = Panel.from_dict(data, orient='minor') + panel = Panel.from_dict(data, orient='minor') - self.assertEqual(panel['foo'].values.dtype, np.object_) - self.assertEqual(panel['A'].values.dtype, np.float64) + assert panel['foo'].values.dtype == np.object_ + assert panel['A'].values.dtype == np.float64 def test_constructor_error_msgs(self): - def testit(): - Panel(np.random.randn(3, 4, 5), lrange(4), lrange(5), lrange(5)) - - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(4, 5, 5\)", - testit) - - def testit(): - Panel(np.random.randn(3, 4, 5), lrange(5), lrange(4), lrange(5)) - - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(5, 4, 5\)", - testit) - - def testit(): - Panel(np.random.randn(3, 4, 5), lrange(5), lrange(5), lrange(4)) - - assertRaisesRegexp(ValueError, - r"Shape of passed values is \(3, 4, 5\), " - r"indices imply \(5, 5, 4\)", - testit) + with catch_warnings(record=True): + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(4), lrange(5), lrange(5)) + + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(4, 5, 5\)", + testit) + + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(5), lrange(4), lrange(5)) + + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(5, 4, 5\)", + testit) + + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(5), lrange(5), lrange(4)) + + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(5, 5, 4\)", + testit) def test_conform(self): - df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) - conformed = self.panel.conform(df) + with catch_warnings(record=True): + df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) + conformed = self.panel.conform(df) - tm.assert_index_equal(conformed.index, self.panel.major_axis) - tm.assert_index_equal(conformed.columns, self.panel.minor_axis) + tm.assert_index_equal(conformed.index, self.panel.major_axis) + tm.assert_index_equal(conformed.columns, self.panel.minor_axis) def test_convert_objects(self): + with catch_warnings(record=True): - # GH 4937 - p = Panel(dict(A=dict(a=['1', '1.0']))) - expected = Panel(dict(A=dict(a=[1, 1.0]))) - result = p._convert(numeric=True, coerce=True) - assert_panel_equal(result, expected) + # GH 4937 + p = Panel(dict(A=dict(a=['1', '1.0']))) + expected = Panel(dict(A=dict(a=[1, 1.0]))) + result = p._convert(numeric=True, coerce=True) + assert_panel_equal(result, expected) def test_dtypes(self): @@ -1165,875 +1230,964 @@ def test_dtypes(self): assert_series_equal(result, expected) def test_astype(self): - # GH7271 - data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) - panel = Panel(data, ['a', 'b'], ['c', 'd'], ['e', 'f']) + with catch_warnings(record=True): + # GH7271 + data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) + panel = Panel(data, ['a', 'b'], ['c', 'd'], ['e', 'f']) - str_data = np.array([[['1', '2'], ['3', '4']], - [['5', '6'], ['7', '8']]]) - expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f']) - assert_panel_equal(panel.astype(str), expected) + str_data = np.array([[['1', '2'], ['3', '4']], + [['5', '6'], ['7', '8']]]) + expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f']) + assert_panel_equal(panel.astype(str), expected) - self.assertRaises(NotImplementedError, panel.astype, {0: str}) + pytest.raises(NotImplementedError, panel.astype, {0: str}) def test_apply(self): - # GH1148 - - # ufunc - applied = self.panel.apply(np.sqrt) - with np.errstate(invalid='ignore'): - expected = np.sqrt(self.panel.values) - assert_almost_equal(applied.values, expected) - - # ufunc same shape - result = self.panel.apply(lambda x: x * 2, axis='items') - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, axis='major_axis') - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, axis='minor_axis') - expected = self.panel * 2 - assert_panel_equal(result, expected) - - # reduction to DataFrame - result = self.panel.apply(lambda x: x.dtype, axis='items') - expected = DataFrame(np.dtype('float64'), index=self.panel.major_axis, - columns=self.panel.minor_axis) - assert_frame_equal(result, expected) - result = self.panel.apply(lambda x: x.dtype, axis='major_axis') - expected = DataFrame(np.dtype('float64'), index=self.panel.minor_axis, - columns=self.panel.items) - assert_frame_equal(result, expected) - result = self.panel.apply(lambda x: x.dtype, axis='minor_axis') - expected = DataFrame(np.dtype('float64'), index=self.panel.major_axis, - columns=self.panel.items) - assert_frame_equal(result, expected) - - # reductions via other dims - expected = self.panel.sum(0) - result = self.panel.apply(lambda x: x.sum(), axis='items') - assert_frame_equal(result, expected) - expected = self.panel.sum(1) - result = self.panel.apply(lambda x: x.sum(), axis='major_axis') - assert_frame_equal(result, expected) - expected = self.panel.sum(2) - result = self.panel.apply(lambda x: x.sum(), axis='minor_axis') - assert_frame_equal(result, expected) + with catch_warnings(record=True): + # GH1148 + + # ufunc + applied = self.panel.apply(np.sqrt) + with np.errstate(invalid='ignore'): + expected = np.sqrt(self.panel.values) + assert_almost_equal(applied.values, expected) + + # ufunc same shape + result = self.panel.apply(lambda x: x * 2, axis='items') + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, axis='major_axis') + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, axis='minor_axis') + expected = self.panel * 2 + assert_panel_equal(result, expected) - # pass kwargs - result = self.panel.apply(lambda x, y: x.sum() + y, axis='items', y=5) - expected = self.panel.sum(0) + 5 - assert_frame_equal(result, expected) + # reduction to DataFrame + result = self.panel.apply(lambda x: x.dtype, axis='items') + expected = DataFrame(np.dtype('float64'), + index=self.panel.major_axis, + columns=self.panel.minor_axis) + assert_frame_equal(result, expected) + result = self.panel.apply(lambda x: x.dtype, axis='major_axis') + expected = DataFrame(np.dtype('float64'), + index=self.panel.minor_axis, + columns=self.panel.items) + assert_frame_equal(result, expected) + result = self.panel.apply(lambda x: x.dtype, axis='minor_axis') + expected = DataFrame(np.dtype('float64'), + index=self.panel.major_axis, + columns=self.panel.items) + assert_frame_equal(result, expected) + + # reductions via other dims + expected = self.panel.sum(0) + result = self.panel.apply(lambda x: x.sum(), axis='items') + assert_frame_equal(result, expected) + expected = self.panel.sum(1) + result = self.panel.apply(lambda x: x.sum(), axis='major_axis') + assert_frame_equal(result, expected) + expected = self.panel.sum(2) + result = self.panel.apply(lambda x: x.sum(), axis='minor_axis') + assert_frame_equal(result, expected) + + # pass kwargs + result = self.panel.apply( + lambda x, y: x.sum() + y, axis='items', y=5) + expected = self.panel.sum(0) + 5 + assert_frame_equal(result, expected) def test_apply_slabs(self): + with catch_warnings(record=True): - # same shape as original - result = self.panel.apply(lambda x: x * 2, - axis=['items', 'major_axis']) - expected = (self.panel * 2).transpose('minor_axis', 'major_axis', - 'items') - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['major_axis', 'items']) - assert_panel_equal(result, expected) - - result = self.panel.apply(lambda x: x * 2, - axis=['items', 'minor_axis']) - expected = (self.panel * 2).transpose('major_axis', 'minor_axis', - 'items') - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['minor_axis', 'items']) - assert_panel_equal(result, expected) - - result = self.panel.apply(lambda x: x * 2, - axis=['major_axis', 'minor_axis']) - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['minor_axis', 'major_axis']) - assert_panel_equal(result, expected) - - # reductions - result = self.panel.apply(lambda x: x.sum(0), axis=[ - 'items', 'major_axis' - ]) - expected = self.panel.sum(1).T - assert_frame_equal(result, expected) + # same shape as original + result = self.panel.apply(lambda x: x * 2, + axis=['items', 'major_axis']) + expected = (self.panel * 2).transpose('minor_axis', 'major_axis', + 'items') + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['major_axis', 'items']) + assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x.sum(1), axis=[ - 'items', 'major_axis' - ]) - expected = self.panel.sum(0) - assert_frame_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['items', 'minor_axis']) + expected = (self.panel * 2).transpose('major_axis', 'minor_axis', + 'items') + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['minor_axis', 'items']) + assert_panel_equal(result, expected) - # transforms - f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T + result = self.panel.apply(lambda x: x * 2, + axis=['major_axis', 'minor_axis']) + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['minor_axis', 'major_axis']) + assert_panel_equal(result, expected) + + # reductions + result = self.panel.apply(lambda x: x.sum(0), axis=[ + 'items', 'major_axis' + ]) + expected = self.panel.sum(1).T + assert_frame_equal(result, expected) # make sure that we don't trigger any warnings - with tm.assert_produces_warning(False): + with catch_warnings(record=True): + result = self.panel.apply(lambda x: x.sum(1), axis=[ + 'items', 'major_axis' + ]) + expected = self.panel.sum(0) + assert_frame_equal(result, expected) + + # transforms + f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T + + # make sure that we don't trigger any warnings result = self.panel.apply(f, axis=['items', 'major_axis']) - expected = Panel(dict([(ax, f(self.panel.loc[:, :, ax])) - for ax in self.panel.minor_axis])) + expected = Panel({ax: f(self.panel.loc[:, :, ax]) + for ax in self.panel.minor_axis}) assert_panel_equal(result, expected) - result = self.panel.apply(f, axis=['major_axis', 'minor_axis']) - expected = Panel(dict([(ax, f(self.panel.loc[ax])) - for ax in self.panel.items])) - assert_panel_equal(result, expected) - - result = self.panel.apply(f, axis=['minor_axis', 'items']) - expected = Panel(dict([(ax, f(self.panel.loc[:, ax])) - for ax in self.panel.major_axis])) - assert_panel_equal(result, expected) - - # with multi-indexes - # GH7469 - index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ( - 'two', 'a'), ('two', 'b')]) - dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape( - 4, 3), columns=list("ABC"), index=index) - dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape( - 4, 3), columns=list("ABC"), index=index) - p = Panel({'f': dfa, 'g': dfb}) - result = p.apply(lambda x: x.sum(), axis=0) - - # on windows this will be in32 - result = result.astype('int64') - expected = p.sum(0) - assert_frame_equal(result, expected) + result = self.panel.apply(f, axis=['major_axis', 'minor_axis']) + expected = Panel({ax: f(self.panel.loc[ax]) + for ax in self.panel.items}) + assert_panel_equal(result, expected) + + result = self.panel.apply(f, axis=['minor_axis', 'items']) + expected = Panel({ax: f(self.panel.loc[:, ax]) + for ax in self.panel.major_axis}) + assert_panel_equal(result, expected) + + # with multi-indexes + # GH7469 + index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ( + 'two', 'a'), ('two', 'b')]) + dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape( + 4, 3), columns=list("ABC"), index=index) + dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape( + 4, 3), columns=list("ABC"), index=index) + p = Panel({'f': dfa, 'g': dfb}) + result = p.apply(lambda x: x.sum(), axis=0) + + # on windows this will be in32 + result = result.astype('int64') + expected = p.sum(0) + assert_frame_equal(result, expected) def test_apply_no_or_zero_ndim(self): - # GH10332 - self.panel = Panel(np.random.rand(5, 5, 5)) + with catch_warnings(record=True): + # GH10332 + self.panel = Panel(np.random.rand(5, 5, 5)) - result_int = self.panel.apply(lambda df: 0, axis=[1, 2]) - result_float = self.panel.apply(lambda df: 0.0, axis=[1, 2]) - result_int64 = self.panel.apply(lambda df: np.int64(0), axis=[1, 2]) - result_float64 = self.panel.apply(lambda df: np.float64(0.0), - axis=[1, 2]) + result_int = self.panel.apply(lambda df: 0, axis=[1, 2]) + result_float = self.panel.apply(lambda df: 0.0, axis=[1, 2]) + result_int64 = self.panel.apply( + lambda df: np.int64(0), axis=[1, 2]) + result_float64 = self.panel.apply(lambda df: np.float64(0.0), + axis=[1, 2]) - expected_int = expected_int64 = Series([0] * 5) - expected_float = expected_float64 = Series([0.0] * 5) + expected_int = expected_int64 = Series([0] * 5) + expected_float = expected_float64 = Series([0.0] * 5) - assert_series_equal(result_int, expected_int) - assert_series_equal(result_int64, expected_int64) - assert_series_equal(result_float, expected_float) - assert_series_equal(result_float64, expected_float64) + assert_series_equal(result_int, expected_int) + assert_series_equal(result_int64, expected_int64) + assert_series_equal(result_float, expected_float) + assert_series_equal(result_float64, expected_float64) def test_reindex(self): - ref = self.panel['ItemB'] + with catch_warnings(record=True): + ref = self.panel['ItemB'] - # items - result = self.panel.reindex(items=['ItemA', 'ItemB']) - assert_frame_equal(result['ItemB'], ref) + # items + result = self.panel.reindex(items=['ItemA', 'ItemB']) + assert_frame_equal(result['ItemB'], ref) - # major - new_major = list(self.panel.major_axis[:10]) - result = self.panel.reindex(major=new_major) - assert_frame_equal(result['ItemB'], ref.reindex(index=new_major)) + # major + new_major = list(self.panel.major_axis[:10]) + result = self.panel.reindex(major=new_major) + assert_frame_equal(result['ItemB'], ref.reindex(index=new_major)) - # raise exception put both major and major_axis - self.assertRaises(Exception, self.panel.reindex, major_axis=new_major, + # raise exception put both major and major_axis + pytest.raises(Exception, self.panel.reindex, + major_axis=new_major, major=new_major) - # minor - new_minor = list(self.panel.minor_axis[:2]) - result = self.panel.reindex(minor=new_minor) - assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor)) + # minor + new_minor = list(self.panel.minor_axis[:2]) + result = self.panel.reindex(minor=new_minor) + assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor)) - # this ok - result = self.panel.reindex() - assert_panel_equal(result, self.panel) - self.assertFalse(result is self.panel) + # raise exception put both major and major_axis + pytest.raises(Exception, self.panel.reindex, + minor_axis=new_minor, + minor=new_minor) - # with filling - smaller_major = self.panel.major_axis[::5] - smaller = self.panel.reindex(major=smaller_major) + # this ok + result = self.panel.reindex() + assert_panel_equal(result, self.panel) + assert result is not self.panel - larger = smaller.reindex(major=self.panel.major_axis, method='pad') + # with filling + smaller_major = self.panel.major_axis[::5] + smaller = self.panel.reindex(major=smaller_major) - assert_frame_equal(larger.major_xs(self.panel.major_axis[1]), - smaller.major_xs(smaller_major[0])) + larger = smaller.reindex(major=self.panel.major_axis, method='pad') - # don't necessarily copy - result = self.panel.reindex(major=self.panel.major_axis, copy=False) - assert_panel_equal(result, self.panel) - self.assertTrue(result is self.panel) + assert_frame_equal(larger.major_xs(self.panel.major_axis[1]), + smaller.major_xs(smaller_major[0])) - def test_reindex_multi(self): + # don't necessarily copy + result = self.panel.reindex( + major=self.panel.major_axis, copy=False) + assert_panel_equal(result, self.panel) + assert result is self.panel - # with and without copy full reindexing - result = self.panel.reindex(items=self.panel.items, - major=self.panel.major_axis, - minor=self.panel.minor_axis, copy=False) - - self.assertIs(result.items, self.panel.items) - self.assertIs(result.major_axis, self.panel.major_axis) - self.assertIs(result.minor_axis, self.panel.minor_axis) - - result = self.panel.reindex(items=self.panel.items, - major=self.panel.major_axis, - minor=self.panel.minor_axis, copy=False) - assert_panel_equal(result, self.panel) - - # multi-axis indexing consistency - # GH 5900 - df = DataFrame(np.random.randn(4, 3)) - p = Panel({'Item1': df}) - expected = Panel({'Item1': df}) - expected['Item2'] = np.nan - - items = ['Item1', 'Item2'] - major_axis = np.arange(4) - minor_axis = np.arange(3) - - results = [] - results.append(p.reindex(items=items, major_axis=major_axis, - copy=True)) - results.append(p.reindex(items=items, major_axis=major_axis, - copy=False)) - results.append(p.reindex(items=items, minor_axis=minor_axis, - copy=True)) - results.append(p.reindex(items=items, minor_axis=minor_axis, - copy=False)) - results.append(p.reindex(items=items, major_axis=major_axis, - minor_axis=minor_axis, copy=True)) - results.append(p.reindex(items=items, major_axis=major_axis, - minor_axis=minor_axis, copy=False)) - - for i, r in enumerate(results): - assert_panel_equal(expected, r) + def test_reindex_axis_style(self): + with catch_warnings(record=True): + panel = Panel(np.random.rand(5, 5, 5)) + expected0 = Panel(panel.values).iloc[[0, 1]] + expected1 = Panel(panel.values).iloc[:, [0, 1]] + expected2 = Panel(panel.values).iloc[:, :, [0, 1]] - def test_reindex_like(self): - # reindex_like - smaller = self.panel.reindex(items=self.panel.items[:-1], - major=self.panel.major_axis[:-1], - minor=self.panel.minor_axis[:-1]) - smaller_like = self.panel.reindex_like(smaller) - assert_panel_equal(smaller, smaller_like) + result = panel.reindex([0, 1], axis=0) + assert_panel_equal(result, expected0) - def test_take(self): - # axis == 0 - result = self.panel.take([2, 0, 1], axis=0) - expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB']) - assert_panel_equal(result, expected) + result = panel.reindex([0, 1], axis=1) + assert_panel_equal(result, expected1) - # axis >= 1 - result = self.panel.take([3, 0, 1, 2], axis=2) - expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) - assert_panel_equal(result, expected) + result = panel.reindex([0, 1], axis=2) + assert_panel_equal(result, expected2) - # neg indicies ok - expected = self.panel.reindex(minor=['D', 'D', 'B', 'C']) - result = self.panel.take([3, -1, 1, 2], axis=2) - assert_panel_equal(result, expected) + result = panel.reindex([0, 1], axis=2) + assert_panel_equal(result, expected2) - self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) + def test_reindex_multi(self): + with catch_warnings(record=True): + + # with and without copy full reindexing + result = self.panel.reindex( + items=self.panel.items, + major=self.panel.major_axis, + minor=self.panel.minor_axis, copy=False) + + assert result.items is self.panel.items + assert result.major_axis is self.panel.major_axis + assert result.minor_axis is self.panel.minor_axis + + result = self.panel.reindex( + items=self.panel.items, + major=self.panel.major_axis, + minor=self.panel.minor_axis, copy=False) + assert_panel_equal(result, self.panel) + + # multi-axis indexing consistency + # GH 5900 + df = DataFrame(np.random.randn(4, 3)) + p = Panel({'Item1': df}) + expected = Panel({'Item1': df}) + expected['Item2'] = np.nan + + items = ['Item1', 'Item2'] + major_axis = np.arange(4) + minor_axis = np.arange(3) + + results = [] + results.append(p.reindex(items=items, major_axis=major_axis, + copy=True)) + results.append(p.reindex(items=items, major_axis=major_axis, + copy=False)) + results.append(p.reindex(items=items, minor_axis=minor_axis, + copy=True)) + results.append(p.reindex(items=items, minor_axis=minor_axis, + copy=False)) + results.append(p.reindex(items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=True)) + results.append(p.reindex(items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=False)) + + for i, r in enumerate(results): + assert_panel_equal(expected, r) - def test_sort_index(self): - import random + def test_reindex_like(self): + with catch_warnings(record=True): + # reindex_like + smaller = self.panel.reindex(items=self.panel.items[:-1], + major=self.panel.major_axis[:-1], + minor=self.panel.minor_axis[:-1]) + smaller_like = self.panel.reindex_like(smaller) + assert_panel_equal(smaller, smaller_like) - ritems = list(self.panel.items) - rmajor = list(self.panel.major_axis) - rminor = list(self.panel.minor_axis) - random.shuffle(ritems) - random.shuffle(rmajor) - random.shuffle(rminor) + def test_take(self): + with catch_warnings(record=True): + # axis == 0 + result = self.panel.take([2, 0, 1], axis=0) + expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB']) + assert_panel_equal(result, expected) - random_order = self.panel.reindex(items=ritems) - sorted_panel = random_order.sort_index(axis=0) - assert_panel_equal(sorted_panel, self.panel) + # axis >= 1 + result = self.panel.take([3, 0, 1, 2], axis=2) + expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) + assert_panel_equal(result, expected) - # descending - random_order = self.panel.reindex(items=ritems) - sorted_panel = random_order.sort_index(axis=0, ascending=False) - assert_panel_equal(sorted_panel, - self.panel.reindex(items=self.panel.items[::-1])) + # neg indicies ok + expected = self.panel.reindex(minor=['D', 'D', 'B', 'C']) + result = self.panel.take([3, -1, 1, 2], axis=2) + assert_panel_equal(result, expected) - random_order = self.panel.reindex(major=rmajor) - sorted_panel = random_order.sort_index(axis=1) - assert_panel_equal(sorted_panel, self.panel) + pytest.raises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) - random_order = self.panel.reindex(minor=rminor) - sorted_panel = random_order.sort_index(axis=2) - assert_panel_equal(sorted_panel, self.panel) + def test_sort_index(self): + with catch_warnings(record=True): + import random + + ritems = list(self.panel.items) + rmajor = list(self.panel.major_axis) + rminor = list(self.panel.minor_axis) + random.shuffle(ritems) + random.shuffle(rmajor) + random.shuffle(rminor) + + random_order = self.panel.reindex(items=ritems) + sorted_panel = random_order.sort_index(axis=0) + assert_panel_equal(sorted_panel, self.panel) + + # descending + random_order = self.panel.reindex(items=ritems) + sorted_panel = random_order.sort_index(axis=0, ascending=False) + assert_panel_equal( + sorted_panel, + self.panel.reindex(items=self.panel.items[::-1])) + + random_order = self.panel.reindex(major=rmajor) + sorted_panel = random_order.sort_index(axis=1) + assert_panel_equal(sorted_panel, self.panel) + + random_order = self.panel.reindex(minor=rminor) + sorted_panel = random_order.sort_index(axis=2) + assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): - filled = self.panel.fillna(0) - self.assertTrue(np.isfinite(filled.values).all()) - - filled = self.panel.fillna(method='backfill') - assert_frame_equal(filled['ItemA'], - self.panel['ItemA'].fillna(method='backfill')) - - panel = self.panel.copy() - panel['str'] = 'foo' - - filled = panel.fillna(method='backfill') - assert_frame_equal(filled['ItemA'], - panel['ItemA'].fillna(method='backfill')) - - empty = self.panel.reindex(items=[]) - filled = empty.fillna(0) - assert_panel_equal(filled, empty) - - self.assertRaises(ValueError, self.panel.fillna) - self.assertRaises(ValueError, self.panel.fillna, 5, method='ffill') - - self.assertRaises(TypeError, self.panel.fillna, [1, 2]) - self.assertRaises(TypeError, self.panel.fillna, (1, 2)) - - # limit not implemented when only value is specified - p = Panel(np.random.randn(3, 4, 5)) - p.iloc[0:2, 0:2, 0:2] = np.nan - self.assertRaises(NotImplementedError, lambda: p.fillna(999, limit=1)) - - # Test in place fillNA - # Expected result - expected = Panel([[[0, 1], [2, 1]], [[10, 11], [12, 11]]], - items=['a', 'b'], minor_axis=['x', 'y'], - dtype=np.float64) - # method='ffill' - p1 = Panel([[[0, 1], [2, np.nan]], [[10, 11], [12, np.nan]]], - items=['a', 'b'], minor_axis=['x', 'y'], - dtype=np.float64) - p1.fillna(method='ffill', inplace=True) - assert_panel_equal(p1, expected) - - # method='bfill' - p2 = Panel([[[0, np.nan], [2, 1]], [[10, np.nan], [12, 11]]], - items=['a', 'b'], minor_axis=['x', 'y'], dtype=np.float64) - p2.fillna(method='bfill', inplace=True) - assert_panel_equal(p2, expected) + with catch_warnings(record=True): + filled = self.panel.fillna(0) + assert np.isfinite(filled.values).all() + + filled = self.panel.fillna(method='backfill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill')) + + panel = self.panel.copy() + panel['str'] = 'foo' + + filled = panel.fillna(method='backfill') + assert_frame_equal(filled['ItemA'], + panel['ItemA'].fillna(method='backfill')) + + empty = self.panel.reindex(items=[]) + filled = empty.fillna(0) + assert_panel_equal(filled, empty) + + pytest.raises(ValueError, self.panel.fillna) + pytest.raises(ValueError, self.panel.fillna, 5, method='ffill') + + pytest.raises(TypeError, self.panel.fillna, [1, 2]) + pytest.raises(TypeError, self.panel.fillna, (1, 2)) + + # limit not implemented when only value is specified + p = Panel(np.random.randn(3, 4, 5)) + p.iloc[0:2, 0:2, 0:2] = np.nan + pytest.raises(NotImplementedError, + lambda: p.fillna(999, limit=1)) + + # Test in place fillNA + # Expected result + expected = Panel([[[0, 1], [2, 1]], [[10, 11], [12, 11]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + # method='ffill' + p1 = Panel([[[0, 1], [2, np.nan]], [[10, 11], [12, np.nan]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + p1.fillna(method='ffill', inplace=True) + assert_panel_equal(p1, expected) + + # method='bfill' + p2 = Panel([[[0, np.nan], [2, 1]], [[10, np.nan], [12, 11]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + p2.fillna(method='bfill', inplace=True) + assert_panel_equal(p2, expected) def test_ffill_bfill(self): - assert_panel_equal(self.panel.ffill(), - self.panel.fillna(method='ffill')) - assert_panel_equal(self.panel.bfill(), - self.panel.fillna(method='bfill')) + with catch_warnings(record=True): + assert_panel_equal(self.panel.ffill(), + self.panel.fillna(method='ffill')) + assert_panel_equal(self.panel.bfill(), + self.panel.fillna(method='bfill')) def test_truncate_fillna_bug(self): - # #1823 - result = self.panel.truncate(before=None, after=None, axis='items') + with catch_warnings(record=True): + # #1823 + result = self.panel.truncate(before=None, after=None, axis='items') - # it works! - result.fillna(value=0.0) + # it works! + result.fillna(value=0.0) def test_swapaxes(self): - result = self.panel.swapaxes('items', 'minor') - self.assertIs(result.items, self.panel.minor_axis) + with catch_warnings(record=True): + result = self.panel.swapaxes('items', 'minor') + assert result.items is self.panel.minor_axis - result = self.panel.swapaxes('items', 'major') - self.assertIs(result.items, self.panel.major_axis) + result = self.panel.swapaxes('items', 'major') + assert result.items is self.panel.major_axis - result = self.panel.swapaxes('major', 'minor') - self.assertIs(result.major_axis, self.panel.minor_axis) + result = self.panel.swapaxes('major', 'minor') + assert result.major_axis is self.panel.minor_axis - panel = self.panel.copy() - result = panel.swapaxes('major', 'minor') - panel.values[0, 0, 1] = np.nan - expected = panel.swapaxes('major', 'minor') - assert_panel_equal(result, expected) + panel = self.panel.copy() + result = panel.swapaxes('major', 'minor') + panel.values[0, 0, 1] = np.nan + expected = panel.swapaxes('major', 'minor') + assert_panel_equal(result, expected) - # this should also work - result = self.panel.swapaxes(0, 1) - self.assertIs(result.items, self.panel.major_axis) + # this should also work + result = self.panel.swapaxes(0, 1) + assert result.items is self.panel.major_axis - # this works, but return a copy - result = self.panel.swapaxes('items', 'items') - assert_panel_equal(self.panel, result) - self.assertNotEqual(id(self.panel), id(result)) + # this works, but return a copy + result = self.panel.swapaxes('items', 'items') + assert_panel_equal(self.panel, result) + assert id(self.panel) != id(result) def test_transpose(self): - result = self.panel.transpose('minor', 'major', 'items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) + with catch_warnings(record=True): + result = self.panel.transpose('minor', 'major', 'items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) - # test kwargs - result = self.panel.transpose(items='minor', major='major', - minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) + # test kwargs + result = self.panel.transpose(items='minor', major='major', + minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) - # text mixture of args - result = self.panel.transpose('minor', major='major', minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) + # text mixture of args + result = self.panel.transpose( + 'minor', major='major', minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) - result = self.panel.transpose('minor', 'major', minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) + result = self.panel.transpose('minor', + 'major', + minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) - # duplicate axes - with tm.assertRaisesRegexp(TypeError, - 'not enough/duplicate arguments'): - self.panel.transpose('minor', maj='major', minor='items') + # duplicate axes + with tm.assert_raises_regex(TypeError, + 'not enough/duplicate arguments'): + self.panel.transpose('minor', maj='major', minor='items') - with tm.assertRaisesRegexp(ValueError, 'repeated axis in transpose'): - self.panel.transpose('minor', 'major', major='minor', - minor='items') + with tm.assert_raises_regex(ValueError, + 'repeated axis in transpose'): + self.panel.transpose('minor', 'major', major='minor', + minor='items') - result = self.panel.transpose(2, 1, 0) - assert_panel_equal(result, expected) + result = self.panel.transpose(2, 1, 0) + assert_panel_equal(result, expected) - result = self.panel.transpose('minor', 'items', 'major') - expected = self.panel.swapaxes('items', 'minor') - expected = expected.swapaxes('major', 'minor') - assert_panel_equal(result, expected) + result = self.panel.transpose('minor', 'items', 'major') + expected = self.panel.swapaxes('items', 'minor') + expected = expected.swapaxes('major', 'minor') + assert_panel_equal(result, expected) - result = self.panel.transpose(2, 0, 1) - assert_panel_equal(result, expected) + result = self.panel.transpose(2, 0, 1) + assert_panel_equal(result, expected) - self.assertRaises(ValueError, self.panel.transpose, 0, 0, 1) + pytest.raises(ValueError, self.panel.transpose, 0, 0, 1) def test_transpose_copy(self): - panel = self.panel.copy() - result = panel.transpose(2, 0, 1, copy=True) - expected = panel.swapaxes('items', 'minor') - expected = expected.swapaxes('major', 'minor') - assert_panel_equal(result, expected) + with catch_warnings(record=True): + panel = self.panel.copy() + result = panel.transpose(2, 0, 1, copy=True) + expected = panel.swapaxes('items', 'minor') + expected = expected.swapaxes('major', 'minor') + assert_panel_equal(result, expected) - panel.values[0, 1, 1] = np.nan - self.assertTrue(notnull(result.values[1, 0, 1])) + panel.values[0, 1, 1] = np.nan + assert notna(result.values[1, 0, 1]) def test_to_frame(self): - # filtered - filtered = self.panel.to_frame() - expected = self.panel.to_frame().dropna(how='any') - assert_frame_equal(filtered, expected) - - # unfiltered - unfiltered = self.panel.to_frame(filter_observations=False) - assert_panel_equal(unfiltered.to_panel(), self.panel) - - # names - self.assertEqual(unfiltered.index.names, ('major', 'minor')) - - # unsorted, round trip - df = self.panel.to_frame(filter_observations=False) - unsorted = df.take(np.random.permutation(len(df))) - pan = unsorted.to_panel() - assert_panel_equal(pan, self.panel) - - # preserve original index names - df = DataFrame(np.random.randn(6, 2), - index=[['a', 'a', 'b', 'b', 'c', 'c'], - [0, 1, 0, 1, 0, 1]], - columns=['one', 'two']) - df.index.names = ['foo', 'bar'] - df.columns.name = 'baz' - - rdf = df.to_panel().to_frame() - self.assertEqual(rdf.index.names, df.index.names) - self.assertEqual(rdf.columns.names, df.columns.names) + with catch_warnings(record=True): + # filtered + filtered = self.panel.to_frame() + expected = self.panel.to_frame().dropna(how='any') + assert_frame_equal(filtered, expected) + + # unfiltered + unfiltered = self.panel.to_frame(filter_observations=False) + assert_panel_equal(unfiltered.to_panel(), self.panel) + + # names + assert unfiltered.index.names == ('major', 'minor') + + # unsorted, round trip + df = self.panel.to_frame(filter_observations=False) + unsorted = df.take(np.random.permutation(len(df))) + pan = unsorted.to_panel() + assert_panel_equal(pan, self.panel) + + # preserve original index names + df = DataFrame(np.random.randn(6, 2), + index=[['a', 'a', 'b', 'b', 'c', 'c'], + [0, 1, 0, 1, 0, 1]], + columns=['one', 'two']) + df.index.names = ['foo', 'bar'] + df.columns.name = 'baz' + + rdf = df.to_panel().to_frame() + assert rdf.index.names == df.index.names + assert rdf.columns.names == df.columns.names def test_to_frame_mixed(self): - panel = self.panel.fillna(0) - panel['str'] = 'foo' - panel['bool'] = panel['ItemA'] > 0 - - lp = panel.to_frame() - wp = lp.to_panel() - self.assertEqual(wp['bool'].values.dtype, np.bool_) - # Previously, this was mutating the underlying index and changing its - # name - assert_frame_equal(wp['bool'], panel['bool'], check_names=False) - - # GH 8704 - # with categorical - df = panel.to_frame() - df['category'] = df['str'].astype('category') - - # to_panel - # TODO: this converts back to object - p = df.to_panel() - expected = panel.copy() - expected['category'] = 'foo' - assert_panel_equal(p, expected) + with catch_warnings(record=True): + panel = self.panel.fillna(0) + panel['str'] = 'foo' + panel['bool'] = panel['ItemA'] > 0 + + lp = panel.to_frame() + wp = lp.to_panel() + assert wp['bool'].values.dtype == np.bool_ + # Previously, this was mutating the underlying + # index and changing its name + assert_frame_equal(wp['bool'], panel['bool'], check_names=False) + + # GH 8704 + # with categorical + df = panel.to_frame() + df['category'] = df['str'].astype('category') + + # to_panel + # TODO: this converts back to object + p = df.to_panel() + expected = panel.copy() + expected['category'] = 'foo' + assert_panel_equal(p, expected) def test_to_frame_multi_major(self): - idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( - 2, 'two')]) - df = DataFrame([[1, 'a', 1], [2, 'b', 1], [3, 'c', 1], [4, 'd', 1]], - columns=['A', 'B', 'C'], index=idx) - wp = Panel({'i1': df, 'i2': df}) - expected_idx = MultiIndex.from_tuples( - [ - (1, 'one', 'A'), (1, 'one', 'B'), - (1, 'one', 'C'), (1, 'two', 'A'), - (1, 'two', 'B'), (1, 'two', 'C'), - (2, 'one', 'A'), (2, 'one', 'B'), - (2, 'one', 'C'), (2, 'two', 'A'), - (2, 'two', 'B'), (2, 'two', 'C') - ], - names=[None, None, 'minor']) - expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, - 'c', 1, 4, 'd', 1], - 'i2': [1, 'a', 1, 2, 'b', - 1, 3, 'c', 1, 4, 'd', 1]}, - index=expected_idx) - result = wp.to_frame() - assert_frame_equal(result, expected) - - wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773 - result = wp.to_frame() - assert_frame_equal(result, expected[1:]) - - idx = MultiIndex.from_tuples([(1, 'two'), (1, 'one'), (2, 'one'), ( - np.nan, 'two')]) - df = DataFrame([[1, 'a', 1], [2, 'b', 1], [3, 'c', 1], [4, 'd', 1]], - columns=['A', 'B', 'C'], index=idx) - wp = Panel({'i1': df, 'i2': df}) - ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'), - (1, 'two', 'C'), - (1, 'one', 'A'), - (1, 'one', 'B'), - (1, 'one', 'C'), - (2, 'one', 'A'), - (2, 'one', 'B'), - (2, 'one', 'C'), - (np.nan, 'two', 'A'), - (np.nan, 'two', 'B'), - (np.nan, 'two', 'C')], - names=[None, None, 'minor']) - expected.index = ex_idx - result = wp.to_frame() - assert_frame_equal(result, expected) + with catch_warnings(record=True): + idx = MultiIndex.from_tuples( + [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]) + df = DataFrame([[1, 'a', 1], [2, 'b', 1], + [3, 'c', 1], [4, 'd', 1]], + columns=['A', 'B', 'C'], index=idx) + wp = Panel({'i1': df, 'i2': df}) + expected_idx = MultiIndex.from_tuples( + [ + (1, 'one', 'A'), (1, 'one', 'B'), + (1, 'one', 'C'), (1, 'two', 'A'), + (1, 'two', 'B'), (1, 'two', 'C'), + (2, 'one', 'A'), (2, 'one', 'B'), + (2, 'one', 'C'), (2, 'two', 'A'), + (2, 'two', 'B'), (2, 'two', 'C') + ], + names=[None, None, 'minor']) + expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, + 'c', 1, 4, 'd', 1], + 'i2': [1, 'a', 1, 2, 'b', + 1, 3, 'c', 1, 4, 'd', 1]}, + index=expected_idx) + result = wp.to_frame() + assert_frame_equal(result, expected) + + wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773 + result = wp.to_frame() + assert_frame_equal(result, expected[1:]) + + idx = MultiIndex.from_tuples( + [(1, 'two'), (1, 'one'), (2, 'one'), (np.nan, 'two')]) + df = DataFrame([[1, 'a', 1], [2, 'b', 1], + [3, 'c', 1], [4, 'd', 1]], + columns=['A', 'B', 'C'], index=idx) + wp = Panel({'i1': df, 'i2': df}) + ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'), + (1, 'two', 'C'), + (1, 'one', 'A'), + (1, 'one', 'B'), + (1, 'one', 'C'), + (2, 'one', 'A'), + (2, 'one', 'B'), + (2, 'one', 'C'), + (np.nan, 'two', 'A'), + (np.nan, 'two', 'B'), + (np.nan, 'two', 'C')], + names=[None, None, 'minor']) + expected.index = ex_idx + result = wp.to_frame() + assert_frame_equal(result, expected) def test_to_frame_multi_major_minor(self): - cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) - idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( - 2, 'two'), (3, 'three'), (4, 'four')]) - df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], - ['a', 'b', 'w', 'x'], - ['c', 'd', 'y', 'z'], [-1, -2, -3, -4], - [-5, -6, -7, -8]], columns=cols, index=idx) - wp = Panel({'i1': df, 'i2': df}) - - exp_idx = MultiIndex.from_tuples( - [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'), - (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'), - (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'), - (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'), - (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'), - (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'), - (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'), - (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'), - (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'), - (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'), - (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'), - (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')], - names=[None, None, None, None]) - exp_val = [[1, 1], [2, 2], [11, 11], [12, 12], [3, 3], [4, 4], - [13, 13], [14, 14], ['a', 'a'], ['b', 'b'], ['w', 'w'], - ['x', 'x'], ['c', 'c'], ['d', 'd'], ['y', 'y'], ['z', 'z'], - [-1, -1], [-2, -2], [-3, -3], [-4, -4], [-5, -5], [-6, -6], - [-7, -7], [-8, -8]] - result = wp.to_frame() - expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two'), (3, 'three'), (4, 'four')]) + df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], + ['a', 'b', 'w', 'x'], + ['c', 'd', 'y', 'z'], [-1, -2, -3, -4], + [-5, -6, -7, -8]], columns=cols, index=idx) + wp = Panel({'i1': df, 'i2': df}) + + exp_idx = MultiIndex.from_tuples( + [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'), + (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'), + (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'), + (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'), + (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'), + (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'), + (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'), + (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'), + (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'), + (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'), + (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'), + (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')], + names=[None, None, None, None]) + exp_val = [[1, 1], [2, 2], [11, 11], [12, 12], + [3, 3], [4, 4], + [13, 13], [14, 14], ['a', 'a'], + ['b', 'b'], ['w', 'w'], + ['x', 'x'], ['c', 'c'], ['d', 'd'], [ + 'y', 'y'], ['z', 'z'], + [-1, -1], [-2, -2], [-3, -3], [-4, -4], + [-5, -5], [-6, -6], + [-7, -7], [-8, -8]] + result = wp.to_frame() + expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx) + assert_frame_equal(result, expected) def test_to_frame_multi_drop_level(self): - idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')]) - df = DataFrame({'A': [np.nan, 1, 2]}, index=idx) - wp = Panel({'i1': df, 'i2': df}) - result = wp.to_frame() - exp_idx = MultiIndex.from_tuples([(2, 'one', 'A'), (2, 'two', 'A')], - names=[None, None, 'minor']) - expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')]) + df = DataFrame({'A': [np.nan, 1, 2]}, index=idx) + wp = Panel({'i1': df, 'i2': df}) + result = wp.to_frame() + exp_idx = MultiIndex.from_tuples( + [(2, 'one', 'A'), (2, 'two', 'A')], + names=[None, None, 'minor']) + expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx) + assert_frame_equal(result, expected) def test_to_panel_na_handling(self): - df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), - index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], - [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]]) + with catch_warnings(record=True): + df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), + index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], + [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]]) - panel = df.to_panel() - self.assertTrue(isnull(panel[0].loc[1, [0, 1]]).all()) + panel = df.to_panel() + assert isna(panel[0].loc[1, [0, 1]]).all() def test_to_panel_duplicates(self): # #2441 - df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) - idf = df.set_index(['a', 'b']) - assertRaisesRegexp(ValueError, 'non-uniquely indexed', idf.to_panel) + with catch_warnings(record=True): + df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) + idf = df.set_index(['a', 'b']) + tm.assert_raises_regex( + ValueError, 'non-uniquely indexed', idf.to_panel) def test_panel_dups(self): + with catch_warnings(record=True): - # GH 4960 - # duplicates in an index + # GH 4960 + # duplicates in an index - # items - data = np.random.randn(5, 100, 5) - no_dup_panel = Panel(data, items=list("ABCDE")) - panel = Panel(data, items=list("AACDE")) + # items + data = np.random.randn(5, 100, 5) + no_dup_panel = Panel(data, items=list("ABCDE")) + panel = Panel(data, items=list("AACDE")) - expected = no_dup_panel['A'] - result = panel.iloc[0] - assert_frame_equal(result, expected) + expected = no_dup_panel['A'] + result = panel.iloc[0] + assert_frame_equal(result, expected) - expected = no_dup_panel['E'] - result = panel.loc['E'] - assert_frame_equal(result, expected) + expected = no_dup_panel['E'] + result = panel.loc['E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[['A', 'B']] - expected.items = ['A', 'A'] - result = panel.loc['A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[['A', 'B']] + expected.items = ['A', 'A'] + result = panel.loc['A'] + assert_panel_equal(result, expected) - # major - data = np.random.randn(5, 5, 5) - no_dup_panel = Panel(data, major_axis=list("ABCDE")) - panel = Panel(data, major_axis=list("AACDE")) + # major + data = np.random.randn(5, 5, 5) + no_dup_panel = Panel(data, major_axis=list("ABCDE")) + panel = Panel(data, major_axis=list("AACDE")) - expected = no_dup_panel.loc[:, 'A'] - result = panel.iloc[:, 0] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, 'A'] + result = panel.iloc[:, 0] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, 'E'] - result = panel.loc[:, 'E'] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, 'E'] + result = panel.loc[:, 'E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, ['A', 'B']] - expected.major_axis = ['A', 'A'] - result = panel.loc[:, 'A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[:, ['A', 'B']] + expected.major_axis = ['A', 'A'] + result = panel.loc[:, 'A'] + assert_panel_equal(result, expected) - # minor - data = np.random.randn(5, 100, 5) - no_dup_panel = Panel(data, minor_axis=list("ABCDE")) - panel = Panel(data, minor_axis=list("AACDE")) + # minor + data = np.random.randn(5, 100, 5) + no_dup_panel = Panel(data, minor_axis=list("ABCDE")) + panel = Panel(data, minor_axis=list("AACDE")) - expected = no_dup_panel.loc[:, :, 'A'] - result = panel.iloc[:, :, 0] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, :, 'A'] + result = panel.iloc[:, :, 0] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, :, 'E'] - result = panel.loc[:, :, 'E'] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, :, 'E'] + result = panel.loc[:, :, 'E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, :, ['A', 'B']] - expected.minor_axis = ['A', 'A'] - result = panel.loc[:, :, 'A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[:, :, ['A', 'B']] + expected.minor_axis = ['A', 'A'] + result = panel.loc[:, :, 'A'] + assert_panel_equal(result, expected) def test_filter(self): pass def test_compound(self): - compounded = self.panel.compound() + with catch_warnings(record=True): + compounded = self.panel.compound() - assert_series_equal(compounded['ItemA'], - (1 + self.panel['ItemA']).product(0) - 1, - check_names=False) + assert_series_equal(compounded['ItemA'], + (1 + self.panel['ItemA']).product(0) - 1, + check_names=False) def test_shift(self): - # major - idx = self.panel.major_axis[0] - idx_lag = self.panel.major_axis[1] - shifted = self.panel.shift(1) - assert_frame_equal(self.panel.major_xs(idx), shifted.major_xs(idx_lag)) - - # minor - idx = self.panel.minor_axis[0] - idx_lag = self.panel.minor_axis[1] - shifted = self.panel.shift(1, axis='minor') - assert_frame_equal(self.panel.minor_xs(idx), shifted.minor_xs(idx_lag)) - - # items - idx = self.panel.items[0] - idx_lag = self.panel.items[1] - shifted = self.panel.shift(1, axis='items') - assert_frame_equal(self.panel[idx], shifted[idx_lag]) - - # negative numbers, #2164 - result = self.panel.shift(-1) - expected = Panel(dict((i, f.shift(-1)[:-1]) - for i, f in self.panel.iteritems())) - assert_panel_equal(result, expected) - - # mixed dtypes #6959 - data = [('item ' + ch, makeMixedDataFrame()) for ch in list('abcde')] - data = dict(data) - mixed_panel = Panel.from_dict(data, orient='minor') - shifted = mixed_panel.shift(1) - assert_series_equal(mixed_panel.dtypes, shifted.dtypes) + with catch_warnings(record=True): + # major + idx = self.panel.major_axis[0] + idx_lag = self.panel.major_axis[1] + shifted = self.panel.shift(1) + assert_frame_equal(self.panel.major_xs(idx), + shifted.major_xs(idx_lag)) + + # minor + idx = self.panel.minor_axis[0] + idx_lag = self.panel.minor_axis[1] + shifted = self.panel.shift(1, axis='minor') + assert_frame_equal(self.panel.minor_xs(idx), + shifted.minor_xs(idx_lag)) + + # items + idx = self.panel.items[0] + idx_lag = self.panel.items[1] + shifted = self.panel.shift(1, axis='items') + assert_frame_equal(self.panel[idx], shifted[idx_lag]) + + # negative numbers, #2164 + result = self.panel.shift(-1) + expected = Panel({i: f.shift(-1)[:-1] + for i, f in self.panel.iteritems()}) + assert_panel_equal(result, expected) + + # mixed dtypes #6959 + data = [('item ' + ch, makeMixedDataFrame()) + for ch in list('abcde')] + data = dict(data) + mixed_panel = Panel.from_dict(data, orient='minor') + shifted = mixed_panel.shift(1) + assert_series_equal(mixed_panel.dtypes, shifted.dtypes) def test_tshift(self): # PeriodIndex - ps = tm.makePeriodPanel() - shifted = ps.tshift(1) - unshifted = shifted.tshift(-1) + with catch_warnings(record=True): + ps = tm.makePeriodPanel() + shifted = ps.tshift(1) + unshifted = shifted.tshift(-1) - assert_panel_equal(unshifted, ps) + assert_panel_equal(unshifted, ps) - shifted2 = ps.tshift(freq='B') - assert_panel_equal(shifted, shifted2) + shifted2 = ps.tshift(freq='B') + assert_panel_equal(shifted, shifted2) - shifted3 = ps.tshift(freq=BDay()) - assert_panel_equal(shifted, shifted3) + shifted3 = ps.tshift(freq=BDay()) + assert_panel_equal(shifted, shifted3) - assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M') + tm.assert_raises_regex(ValueError, 'does not match', + ps.tshift, freq='M') - # DatetimeIndex - panel = _panel - shifted = panel.tshift(1) - unshifted = shifted.tshift(-1) + # DatetimeIndex + panel = make_test_panel() + shifted = panel.tshift(1) + unshifted = shifted.tshift(-1) - assert_panel_equal(panel, unshifted) + assert_panel_equal(panel, unshifted) - shifted2 = panel.tshift(freq=panel.major_axis.freq) - assert_panel_equal(shifted, shifted2) + shifted2 = panel.tshift(freq=panel.major_axis.freq) + assert_panel_equal(shifted, shifted2) - inferred_ts = Panel(panel.values, items=panel.items, - major_axis=Index(np.asarray(panel.major_axis)), - minor_axis=panel.minor_axis) - shifted = inferred_ts.tshift(1) - unshifted = shifted.tshift(-1) - assert_panel_equal(shifted, panel.tshift(1)) - assert_panel_equal(unshifted, inferred_ts) + inferred_ts = Panel(panel.values, items=panel.items, + major_axis=Index(np.asarray(panel.major_axis)), + minor_axis=panel.minor_axis) + shifted = inferred_ts.tshift(1) + unshifted = shifted.tshift(-1) + assert_panel_equal(shifted, panel.tshift(1)) + assert_panel_equal(unshifted, inferred_ts) - no_freq = panel.iloc[:, [0, 5, 7], :] - self.assertRaises(ValueError, no_freq.tshift) + no_freq = panel.iloc[:, [0, 5, 7], :] + pytest.raises(ValueError, no_freq.tshift) def test_pct_change(self): - df1 = DataFrame({'c1': [1, 2, 5], 'c2': [3, 4, 6]}) - df2 = df1 + 1 - df3 = DataFrame({'c1': [3, 4, 7], 'c2': [5, 6, 8]}) - wp = Panel({'i1': df1, 'i2': df2, 'i3': df3}) - # major, 1 - result = wp.pct_change() # axis='major' - expected = Panel({'i1': df1.pct_change(), - 'i2': df2.pct_change(), - 'i3': df3.pct_change()}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=1) - assert_panel_equal(result, expected) - # major, 2 - result = wp.pct_change(periods=2) - expected = Panel({'i1': df1.pct_change(2), - 'i2': df2.pct_change(2), - 'i3': df3.pct_change(2)}) - assert_panel_equal(result, expected) - # minor, 1 - result = wp.pct_change(axis='minor') - expected = Panel({'i1': df1.pct_change(axis=1), - 'i2': df2.pct_change(axis=1), - 'i3': df3.pct_change(axis=1)}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=2) - assert_panel_equal(result, expected) - # minor, 2 - result = wp.pct_change(periods=2, axis='minor') - expected = Panel({'i1': df1.pct_change(periods=2, axis=1), - 'i2': df2.pct_change(periods=2, axis=1), - 'i3': df3.pct_change(periods=2, axis=1)}) - assert_panel_equal(result, expected) - # items, 1 - result = wp.pct_change(axis='items') - expected = Panel({'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i2': DataFrame({'c1': [1, 0.5, .2], - 'c2': [1. / 3, 0.25, 1. / 6]}), - 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6], - 'c2': [.25, .2, 1. / 7]})}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=0) - assert_panel_equal(result, expected) - # items, 2 - result = wp.pct_change(periods=2, axis='items') - expected = Panel({'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i3': DataFrame({'c1': [2, 1, .4], - 'c2': [2. / 3, .5, 1. / 3]})}) - assert_panel_equal(result, expected) + with catch_warnings(record=True): + df1 = DataFrame({'c1': [1, 2, 5], 'c2': [3, 4, 6]}) + df2 = df1 + 1 + df3 = DataFrame({'c1': [3, 4, 7], 'c2': [5, 6, 8]}) + wp = Panel({'i1': df1, 'i2': df2, 'i3': df3}) + # major, 1 + result = wp.pct_change() # axis='major' + expected = Panel({'i1': df1.pct_change(), + 'i2': df2.pct_change(), + 'i3': df3.pct_change()}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=1) + assert_panel_equal(result, expected) + # major, 2 + result = wp.pct_change(periods=2) + expected = Panel({'i1': df1.pct_change(2), + 'i2': df2.pct_change(2), + 'i3': df3.pct_change(2)}) + assert_panel_equal(result, expected) + # minor, 1 + result = wp.pct_change(axis='minor') + expected = Panel({'i1': df1.pct_change(axis=1), + 'i2': df2.pct_change(axis=1), + 'i3': df3.pct_change(axis=1)}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=2) + assert_panel_equal(result, expected) + # minor, 2 + result = wp.pct_change(periods=2, axis='minor') + expected = Panel({'i1': df1.pct_change(periods=2, axis=1), + 'i2': df2.pct_change(periods=2, axis=1), + 'i3': df3.pct_change(periods=2, axis=1)}) + assert_panel_equal(result, expected) + # items, 1 + result = wp.pct_change(axis='items') + expected = Panel( + {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i2': DataFrame({'c1': [1, 0.5, .2], + 'c2': [1. / 3, 0.25, 1. / 6]}), + 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6], + 'c2': [.25, .2, 1. / 7]})}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=0) + assert_panel_equal(result, expected) + # items, 2 + result = wp.pct_change(periods=2, axis='items') + expected = Panel( + {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i3': DataFrame({'c1': [2, 1, .4], + 'c2': [2. / 3, .5, 1. / 3]})}) + assert_panel_equal(result, expected) def test_round(self): - values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], - [-1566.213, 88.88], [-12, 94.5]], - [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], - [272.212, -99.99], [23, -76.5]]] - evalues = [[[float(np.around(i)) for i in j] for j in k] - for k in values] - p = Panel(values, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - expected = Panel(evalues, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - result = p.round() - self.assert_panel_equal(expected, result) + with catch_warnings(record=True): + values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], + [-1566.213, 88.88], [-12, 94.5]], + [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], + [272.212, -99.99], [23, -76.5]]] + evalues = [[[float(np.around(i)) for i in j] for j in k] + for k in values] + p = Panel(values, items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + expected = Panel(evalues, items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + result = p.round() + assert_panel_equal(expected, result) def test_numpy_round(self): - values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], - [-1566.213, 88.88], [-12, 94.5]], - [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], - [272.212, -99.99], [23, -76.5]]] - evalues = [[[float(np.around(i)) for i in j] for j in k] - for k in values] - p = Panel(values, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - expected = Panel(evalues, items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - result = np.round(p) - self.assert_panel_equal(expected, result) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) + with catch_warnings(record=True): + values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], + [-1566.213, 88.88], [-12, 94.5]], + [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], + [272.212, -99.99], [23, -76.5]]] + evalues = [[[float(np.around(i)) for i in j] for j in k] + for k in values] + p = Panel(values, items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + expected = Panel(evalues, items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + result = np.round(p) + assert_panel_equal(expected, result) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.round, p, out=p) def test_multiindex_get(self): - ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)], - names=['first', 'second']) - wp = Panel(np.random.random((4, 5, 5)), - items=ind, - major_axis=np.arange(5), - minor_axis=np.arange(5)) - f1 = wp['a'] - f2 = wp.loc['a'] - assert_panel_equal(f1, f2) - - self.assertTrue((f1.items == [1, 2]).all()) - self.assertTrue((f2.items == [1, 2]).all()) - - ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], - names=['first', 'second']) + with catch_warnings(record=True): + ind = MultiIndex.from_tuples( + [('a', 1), ('a', 2), ('b', 1), ('b', 2)], + names=['first', 'second']) + wp = Panel(np.random.random((4, 5, 5)), + items=ind, + major_axis=np.arange(5), + minor_axis=np.arange(5)) + f1 = wp['a'] + f2 = wp.loc['a'] + assert_panel_equal(f1, f2) + + assert (f1.items == [1, 2]).all() + assert (f2.items == [1, 2]).all() + + ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], + names=['first', 'second']) def test_multiindex_blocks(self): - ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], - names=['first', 'second']) - wp = Panel(self.panel._data) - wp.items = ind - f1 = wp['a'] - self.assertTrue((f1.items == [1, 2]).all()) + with catch_warnings(record=True): + ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], + names=['first', 'second']) + wp = Panel(self.panel._data) + wp.items = ind + f1 = wp['a'] + assert (f1.items == [1, 2]).all() - f1 = wp[('b', 1)] - self.assertTrue((f1.columns == ['A', 'B', 'C', 'D']).all()) + f1 = wp[('b', 1)] + assert (f1.columns == ['A', 'B', 'C', 'D']).all() def test_repr_empty(self): - empty = Panel() - repr(empty) + with catch_warnings(record=True): + empty = Panel() + repr(empty) def test_rename(self): - mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'} + with catch_warnings(record=True): + mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'} - renamed = self.panel.rename_axis(mapper, axis=0) - exp = Index(['foo', 'bar', 'baz']) - self.assert_index_equal(renamed.items, exp) + renamed = self.panel.rename_axis(mapper, axis=0) + exp = Index(['foo', 'bar', 'baz']) + tm.assert_index_equal(renamed.items, exp) - renamed = self.panel.rename_axis(str.lower, axis=2) - exp = Index(['a', 'b', 'c', 'd']) - self.assert_index_equal(renamed.minor_axis, exp) + renamed = self.panel.rename_axis(str.lower, axis=2) + exp = Index(['a', 'b', 'c', 'd']) + tm.assert_index_equal(renamed.minor_axis, exp) - # don't copy - renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False) - renamed_nocopy['foo'] = 3. - self.assertTrue((self.panel['ItemA'].values == 3).all()) + # don't copy + renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False) + renamed_nocopy['foo'] = 3. + assert (self.panel['ItemA'].values == 3).all() def test_get_attr(self): assert_frame_equal(self.panel['ItemA'], self.panel.ItemA) @@ -2045,12 +2199,13 @@ def test_get_attr(self): assert_frame_equal(self.panel['i'], self.panel.i) def test_from_frame_level1_unsorted(self): - tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1), - ('MSFT', 1)] - midx = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.rand(5, 4), index=midx) - p = df.to_panel() - assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index()) + with catch_warnings(record=True): + tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1), + ('MSFT', 1)] + midx = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.rand(5, 4), index=midx) + p = df.to_panel() + assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index()) def test_to_excel(self): try: @@ -2093,282 +2248,323 @@ def test_to_excel_xlsxwriter(self): assert_frame_equal(df, recdf) def test_dropna(self): - p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) - p.loc[:, ['b', 'd'], 0] = np.nan + with catch_warnings(record=True): + p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) + p.loc[:, ['b', 'd'], 0] = np.nan - result = p.dropna(axis=1) - exp = p.loc[:, ['a', 'c', 'e'], :] - assert_panel_equal(result, exp) - inp = p.copy() - inp.dropna(axis=1, inplace=True) - assert_panel_equal(inp, exp) + result = p.dropna(axis=1) + exp = p.loc[:, ['a', 'c', 'e'], :] + assert_panel_equal(result, exp) + inp = p.copy() + inp.dropna(axis=1, inplace=True) + assert_panel_equal(inp, exp) - result = p.dropna(axis=1, how='all') - assert_panel_equal(result, p) + result = p.dropna(axis=1, how='all') + assert_panel_equal(result, p) - p.loc[:, ['b', 'd'], :] = np.nan - result = p.dropna(axis=1, how='all') - exp = p.loc[:, ['a', 'c', 'e'], :] - assert_panel_equal(result, exp) + p.loc[:, ['b', 'd'], :] = np.nan + result = p.dropna(axis=1, how='all') + exp = p.loc[:, ['a', 'c', 'e'], :] + assert_panel_equal(result, exp) - p = Panel(np.random.randn(4, 5, 6), items=list('abcd')) - p.loc[['b'], :, 0] = np.nan + p = Panel(np.random.randn(4, 5, 6), items=list('abcd')) + p.loc[['b'], :, 0] = np.nan - result = p.dropna() - exp = p.loc[['a', 'c', 'd']] - assert_panel_equal(result, exp) + result = p.dropna() + exp = p.loc[['a', 'c', 'd']] + assert_panel_equal(result, exp) - result = p.dropna(how='all') - assert_panel_equal(result, p) + result = p.dropna(how='all') + assert_panel_equal(result, p) - p.loc['b'] = np.nan - result = p.dropna(how='all') - exp = p.loc[['a', 'c', 'd']] - assert_panel_equal(result, exp) + p.loc['b'] = np.nan + result = p.dropna(how='all') + exp = p.loc[['a', 'c', 'd']] + assert_panel_equal(result, exp) def test_drop(self): - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - panel = Panel({"One": df, "Two": df}) + with catch_warnings(record=True): + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + panel = Panel({"One": df, "Two": df}) - def check_drop(drop_val, axis_number, aliases, expected): - try: - actual = panel.drop(drop_val, axis=axis_number) - assert_panel_equal(actual, expected) - for alias in aliases: - actual = panel.drop(drop_val, axis=alias) + def check_drop(drop_val, axis_number, aliases, expected): + try: + actual = panel.drop(drop_val, axis=axis_number) assert_panel_equal(actual, expected) - except AssertionError: - pprint_thing("Failed with axis_number %d and aliases: %s" % - (axis_number, aliases)) - raise - # Items - expected = Panel({"One": df}) - check_drop('Two', 0, ['items'], expected) - - self.assertRaises(ValueError, panel.drop, 'Three') - - # errors = 'ignore' - dropped = panel.drop('Three', errors='ignore') - assert_panel_equal(dropped, panel) - dropped = panel.drop(['Two', 'Three'], errors='ignore') - expected = Panel({"One": df}) - assert_panel_equal(dropped, expected) - - # Major - exp_df = DataFrame({"A": [2], "B": [4]}, index=[1]) - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop(0, 1, ['major_axis', 'major'], expected) - - exp_df = DataFrame({"A": [1], "B": [3]}, index=[0]) - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop([1], 1, ['major_axis', 'major'], expected) - - # Minor - exp_df = df[['B']] - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop(["A"], 2, ['minor_axis', 'minor'], expected) - - exp_df = df[['A']] - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop("B", 2, ['minor_axis', 'minor'], expected) + for alias in aliases: + actual = panel.drop(drop_val, axis=alias) + assert_panel_equal(actual, expected) + except AssertionError: + pprint_thing("Failed with axis_number %d and aliases: %s" % + (axis_number, aliases)) + raise + # Items + expected = Panel({"One": df}) + check_drop('Two', 0, ['items'], expected) + + pytest.raises(KeyError, panel.drop, 'Three') + + # errors = 'ignore' + dropped = panel.drop('Three', errors='ignore') + assert_panel_equal(dropped, panel) + dropped = panel.drop(['Two', 'Three'], errors='ignore') + expected = Panel({"One": df}) + assert_panel_equal(dropped, expected) + + # Major + exp_df = DataFrame({"A": [2], "B": [4]}, index=[1]) + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop(0, 1, ['major_axis', 'major'], expected) + + exp_df = DataFrame({"A": [1], "B": [3]}, index=[0]) + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop([1], 1, ['major_axis', 'major'], expected) + + # Minor + exp_df = df[['B']] + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop(["A"], 2, ['minor_axis', 'minor'], expected) + + exp_df = df[['A']] + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop("B", 2, ['minor_axis', 'minor'], expected) def test_update(self): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + with catch_warnings(record=True): + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - pan.update(other) + pan.update(other) - expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[3.6, 2., 3], [1.5, np.nan, 7], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[3.6, 2., 3], [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - assert_panel_equal(pan, expected) + assert_panel_equal(pan, expected) def test_update_from_dict(self): - pan = Panel({'one': DataFrame([[1.5, np.nan, 3], [1.5, np.nan, 3], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]]), - 'two': DataFrame([[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]])}) - - other = {'two': DataFrame([[3.6, 2., np.nan], [np.nan, np.nan, 7]])} - - pan.update(other) - - expected = Panel( - {'two': DataFrame([[3.6, 2., 3], [1.5, np.nan, 7], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]]), - 'one': DataFrame([[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]])}) - - assert_panel_equal(pan, expected) + with catch_warnings(record=True): + pan = Panel({'one': DataFrame([[1.5, np.nan, 3], + [1.5, np.nan, 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'two': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]])}) + + other = {'two': DataFrame( + [[3.6, 2., np.nan], [np.nan, np.nan, 7]])} + + pan.update(other) + + expected = Panel( + {'one': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'two': DataFrame([[3.6, 2., 3], + [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]) + } + ) + + assert_panel_equal(pan, expected) def test_update_nooverwrite(self): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + with catch_warnings(record=True): + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - pan.update(other, overwrite=False) + pan.update(other, overwrite=False) - expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, 2., 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[1.5, 2., 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - assert_panel_equal(pan, expected) + assert_panel_equal(pan, expected) def test_update_filtered(self): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + with catch_warnings(record=True): + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - pan.update(other, filter_func=lambda x: x > 2) + pan.update(other, filter_func=lambda x: x > 2) - expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, np.nan, 3], [1.5, np.nan, 7], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[1.5, np.nan, 3], [1.5, np.nan, 7], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) - assert_panel_equal(pan, expected) + assert_panel_equal(pan, expected) def test_update_raise(self): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) - - self.assertRaises(Exception, pan.update, *(pan, ), + with catch_warnings(record=True): + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + pytest.raises(Exception, pan.update, *(pan, ), **{'raise_conflict': True}) def test_all_any(self): - self.assertTrue((self.panel.all(axis=0).values == nanall( - self.panel, axis=0)).all()) - self.assertTrue((self.panel.all(axis=1).values == nanall( - self.panel, axis=1).T).all()) - self.assertTrue((self.panel.all(axis=2).values == nanall( - self.panel, axis=2).T).all()) - self.assertTrue((self.panel.any(axis=0).values == nanany( - self.panel, axis=0)).all()) - self.assertTrue((self.panel.any(axis=1).values == nanany( - self.panel, axis=1).T).all()) - self.assertTrue((self.panel.any(axis=2).values == nanany( - self.panel, axis=2).T).all()) + assert (self.panel.all(axis=0).values == nanall( + self.panel, axis=0)).all() + assert (self.panel.all(axis=1).values == nanall( + self.panel, axis=1).T).all() + assert (self.panel.all(axis=2).values == nanall( + self.panel, axis=2).T).all() + assert (self.panel.any(axis=0).values == nanany( + self.panel, axis=0)).all() + assert (self.panel.any(axis=1).values == nanany( + self.panel, axis=1).T).all() + assert (self.panel.any(axis=2).values == nanany( + self.panel, axis=2).T).all() def test_all_any_unhandled(self): - self.assertRaises(NotImplementedError, self.panel.all, bool_only=True) - self.assertRaises(NotImplementedError, self.panel.any, bool_only=True) + pytest.raises(NotImplementedError, self.panel.all, bool_only=True) + pytest.raises(NotImplementedError, self.panel.any, bool_only=True) + + # GH issue 15960 + def test_sort_values(self): + pytest.raises(NotImplementedError, self.panel.sort_values) + pytest.raises(NotImplementedError, self.panel.sort_values, 'ItemA') -class TestLongPanel(tm.TestCase): +class TestLongPanel(object): """ LongPanel no longer exists, but... """ - def setUp(self): - import warnings - warnings.filterwarnings(action='ignore', category=FutureWarning) - - panel = tm.makePanel() - tm.add_nans(panel) - + def setup_method(self, method): + panel = make_test_panel() self.panel = panel.to_frame() self.unfiltered_panel = panel.to_frame(filter_observations=False) def test_ops_differently_indexed(self): - # trying to set non-identically indexed panel - wp = self.panel.to_panel() - wp2 = wp.reindex(major=wp.major_axis[:-1]) - lp2 = wp2.to_frame() + with catch_warnings(record=True): + # trying to set non-identically indexed panel + wp = self.panel.to_panel() + wp2 = wp.reindex(major=wp.major_axis[:-1]) + lp2 = wp2.to_frame() - result = self.panel + lp2 - assert_frame_equal(result.reindex(lp2.index), lp2 * 2) + result = self.panel + lp2 + assert_frame_equal(result.reindex(lp2.index), lp2 * 2) - # careful, mutation - self.panel['foo'] = lp2['ItemA'] - assert_series_equal(self.panel['foo'].reindex(lp2.index), lp2['ItemA'], - check_names=False) + # careful, mutation + self.panel['foo'] = lp2['ItemA'] + assert_series_equal(self.panel['foo'].reindex(lp2.index), + lp2['ItemA'], + check_names=False) def test_ops_scalar(self): - result = self.panel.mul(2) - expected = DataFrame.__mul__(self.panel, 2) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + result = self.panel.mul(2) + expected = DataFrame.__mul__(self.panel, 2) + assert_frame_equal(result, expected) def test_combineFrame(self): - wp = self.panel.to_panel() - result = self.panel.add(wp['ItemA'].stack(), axis=0) - assert_frame_equal(result.to_panel()['ItemA'], wp['ItemA'] * 2) + with catch_warnings(record=True): + wp = self.panel.to_panel() + result = self.panel.add(wp['ItemA'].stack(), axis=0) + assert_frame_equal(result.to_panel()['ItemA'], wp['ItemA'] * 2) def test_combinePanel(self): - wp = self.panel.to_panel() - result = self.panel.add(self.panel) - wide_result = result.to_panel() - assert_frame_equal(wp['ItemA'] * 2, wide_result['ItemA']) + with catch_warnings(record=True): + wp = self.panel.to_panel() + result = self.panel.add(self.panel) + wide_result = result.to_panel() + assert_frame_equal(wp['ItemA'] * 2, wide_result['ItemA']) - # one item - result = self.panel.add(self.panel.filter(['ItemA'])) + # one item + result = self.panel.add(self.panel.filter(['ItemA'])) def test_combine_scalar(self): - result = self.panel.mul(2) - expected = DataFrame(self.panel._data) * 2 - assert_frame_equal(result, expected) + with catch_warnings(record=True): + result = self.panel.mul(2) + expected = DataFrame(self.panel._data) * 2 + assert_frame_equal(result, expected) def test_combine_series(self): - s = self.panel['ItemA'][:10] - result = self.panel.add(s, axis=0) - expected = DataFrame.add(self.panel, s, axis=0) - assert_frame_equal(result, expected) + with catch_warnings(record=True): + s = self.panel['ItemA'][:10] + result = self.panel.add(s, axis=0) + expected = DataFrame.add(self.panel, s, axis=0) + assert_frame_equal(result, expected) - s = self.panel.iloc[5] - result = self.panel + s - expected = DataFrame.add(self.panel, s, axis=1) - assert_frame_equal(result, expected) + s = self.panel.iloc[5] + result = self.panel + s + expected = DataFrame.add(self.panel, s, axis=1) + assert_frame_equal(result, expected) def test_operators(self): - wp = self.panel.to_panel() - result = (self.panel + 1).to_panel() - assert_frame_equal(wp['ItemA'] + 1, result['ItemA']) + with catch_warnings(record=True): + wp = self.panel.to_panel() + result = (self.panel + 1).to_panel() + assert_frame_equal(wp['ItemA'] + 1, result['ItemA']) def test_arith_flex_panel(self): - ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] - if not compat.PY3: - aliases = {} - else: - aliases = {'div': 'truediv'} - self.panel = self.panel.to_panel() - - for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]: - for op in ops: - alias = aliases.get(op, op) - f = getattr(operator, alias) - exp = f(self.panel, n) - result = getattr(self.panel, op)(n) - assert_panel_equal(result, exp, check_panel_type=True) - - # rops - r_f = lambda x, y: f(y, x) - exp = r_f(self.panel, n) - result = getattr(self.panel, 'r' + op)(n) - assert_panel_equal(result, exp) + with catch_warnings(record=True): + ops = ['add', 'sub', 'mul', 'div', + 'truediv', 'pow', 'floordiv', 'mod'] + if not compat.PY3: + aliases = {} + else: + aliases = {'div': 'truediv'} + self.panel = self.panel.to_panel() + + for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]: + for op in ops: + alias = aliases.get(op, op) + f = getattr(operator, alias) + exp = f(self.panel, n) + result = getattr(self.panel, op)(n) + assert_panel_equal(result, exp, check_panel_type=True) + + # rops + r_f = lambda x, y: f(y, x) + exp = r_f(self.panel, n) + result = getattr(self.panel, 'r' + op)(n) + assert_panel_equal(result, exp) def test_sort(self): def is_sorted(arr): return (arr[1:] > arr[:-1]).any() sorted_minor = self.panel.sort_index(level=1) - self.assertTrue(is_sorted(sorted_minor.index.labels[1])) + assert is_sorted(sorted_minor.index.labels[1]) sorted_major = sorted_minor.sort_index(level=0) - self.assertTrue(is_sorted(sorted_major.index.labels[0])) + assert is_sorted(sorted_major.index.labels[0]) def test_to_string(self): buf = StringIO() @@ -2377,153 +2573,140 @@ def test_to_string(self): def test_to_sparse(self): if isinstance(self.panel, Panel): msg = 'sparsifying is not supported' - tm.assertRaisesRegexp(NotImplementedError, msg, - self.panel.to_sparse) + tm.assert_raises_regex(NotImplementedError, msg, + self.panel.to_sparse) def test_truncate(self): - dates = self.panel.index.levels[0] - start, end = dates[1], dates[5] + with catch_warnings(record=True): + dates = self.panel.index.levels[0] + start, end = dates[1], dates[5] - trunced = self.panel.truncate(start, end).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(start, end) + trunced = self.panel.truncate(start, end).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(start, end) - # TODO trucate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO truncate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - trunced = self.panel.truncate(before=start).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(before=start) + trunced = self.panel.truncate(before=start).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(before=start) - # TODO trucate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO truncate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - trunced = self.panel.truncate(after=end).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(after=end) + trunced = self.panel.truncate(after=end).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(after=end) - # TODO trucate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO truncate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - # truncate on dates that aren't in there - wp = self.panel.to_panel() - new_index = wp.major_axis[::5] + # truncate on dates that aren't in there + wp = self.panel.to_panel() + new_index = wp.major_axis[::5] - wp2 = wp.reindex(major=new_index) + wp2 = wp.reindex(major=new_index) - lp2 = wp2.to_frame() - lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2]) + lp2 = wp2.to_frame() + lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2]) - wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2]) + wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2]) - assert_panel_equal(wp_trunc, lp_trunc.to_panel()) + assert_panel_equal(wp_trunc, lp_trunc.to_panel()) - # throw proper exception - self.assertRaises(Exception, lp2.truncate, wp.major_axis[-2], + # throw proper exception + pytest.raises(Exception, lp2.truncate, wp.major_axis[-2], wp.major_axis[2]) def test_axis_dummies(self): - from pandas.core.reshape import make_axis_dummies + from pandas.core.reshape.reshape import make_axis_dummies minor_dummies = make_axis_dummies(self.panel, 'minor').astype(np.uint8) - self.assertEqual(len(minor_dummies.columns), - len(self.panel.index.levels[1])) + assert len(minor_dummies.columns) == len(self.panel.index.levels[1]) major_dummies = make_axis_dummies(self.panel, 'major').astype(np.uint8) - self.assertEqual(len(major_dummies.columns), - len(self.panel.index.levels[0])) + assert len(major_dummies.columns) == len(self.panel.index.levels[0]) mapping = {'A': 'one', 'B': 'one', 'C': 'two', 'D': 'two'} transformed = make_axis_dummies(self.panel, 'minor', transform=mapping.get).astype(np.uint8) - self.assertEqual(len(transformed.columns), 2) - self.assert_index_equal(transformed.columns, Index(['one', 'two'])) + assert len(transformed.columns) == 2 + tm.assert_index_equal(transformed.columns, Index(['one', 'two'])) # TODO: test correctness def test_get_dummies(self): - from pandas.core.reshape import get_dummies, make_axis_dummies + from pandas.core.reshape.reshape import get_dummies, make_axis_dummies self.panel['Label'] = self.panel.index.labels[1] minor_dummies = make_axis_dummies(self.panel, 'minor').astype(np.uint8) dummies = get_dummies(self.panel['Label']) - self.assert_numpy_array_equal(dummies.values, minor_dummies.values) + tm.assert_numpy_array_equal(dummies.values, minor_dummies.values) def test_mean(self): - means = self.panel.mean(level='minor') + with catch_warnings(record=True): + means = self.panel.mean(level='minor') - # test versus Panel version - wide_means = self.panel.to_panel().mean('major') - assert_frame_equal(means, wide_means) + # test versus Panel version + wide_means = self.panel.to_panel().mean('major') + assert_frame_equal(means, wide_means) def test_sum(self): - sums = self.panel.sum(level='minor') + with catch_warnings(record=True): + sums = self.panel.sum(level='minor') - # test versus Panel version - wide_sums = self.panel.to_panel().sum('major') - assert_frame_equal(sums, wide_sums) + # test versus Panel version + wide_sums = self.panel.to_panel().sum('major') + assert_frame_equal(sums, wide_sums) def test_count(self): - index = self.panel.index + with catch_warnings(record=True): + index = self.panel.index - major_count = self.panel.count(level=0)['ItemA'] - labels = index.labels[0] - for i, idx in enumerate(index.levels[0]): - self.assertEqual(major_count[i], (labels == i).sum()) + major_count = self.panel.count(level=0)['ItemA'] + labels = index.labels[0] + for i, idx in enumerate(index.levels[0]): + assert major_count[i] == (labels == i).sum() - minor_count = self.panel.count(level=1)['ItemA'] - labels = index.labels[1] - for i, idx in enumerate(index.levels[1]): - self.assertEqual(minor_count[i], (labels == i).sum()) + minor_count = self.panel.count(level=1)['ItemA'] + labels = index.labels[1] + for i, idx in enumerate(index.levels[1]): + assert minor_count[i] == (labels == i).sum() def test_join(self): - lp1 = self.panel.filter(['ItemA', 'ItemB']) - lp2 = self.panel.filter(['ItemC']) + with catch_warnings(record=True): + lp1 = self.panel.filter(['ItemA', 'ItemB']) + lp2 = self.panel.filter(['ItemC']) - joined = lp1.join(lp2) + joined = lp1.join(lp2) - self.assertEqual(len(joined.columns), 3) + assert len(joined.columns) == 3 - self.assertRaises(Exception, lp1.join, + pytest.raises(Exception, lp1.join, self.panel.filter(['ItemB', 'ItemC'])) def test_pivot(self): - from pandas.core.reshape import _slow_pivot - - one, two, three = (np.array([1, 2, 3, 4, 5]), - np.array(['a', 'b', 'c', 'd', 'e']), - np.array([1, 2, 3, 5, 4.])) - df = pivot(one, two, three) - self.assertEqual(df['a'][1], 1) - self.assertEqual(df['b'][2], 2) - self.assertEqual(df['c'][3], 3) - self.assertEqual(df['d'][4], 5) - self.assertEqual(df['e'][5], 4) - assert_frame_equal(df, _slow_pivot(one, two, three)) - - # weird overlap, TODO: test? - a, b, c = (np.array([1, 2, 3, 4, 4]), - np.array(['a', 'a', 'a', 'a', 'a']), - np.array([1., 2., 3., 4., 5.])) - self.assertRaises(Exception, pivot, a, b, c) - - # corner case, empty - df = pivot(np.array([]), np.array([]), np.array([])) - - -def test_monotonic(): - pos = np.array([1, 2, 3, 5]) - - def _monotonic(arr): - return not (arr[1:] < arr[:-1]).any() - - assert _monotonic(pos) - - neg = np.array([1, 2, 3, 4, 3]) - - assert not _monotonic(neg) - - neg2 = np.array([5, 1, 2, 3, 4, 5]) - - assert not _monotonic(neg2) + with catch_warnings(record=True): + from pandas.core.reshape.reshape import _slow_pivot + + one, two, three = (np.array([1, 2, 3, 4, 5]), + np.array(['a', 'b', 'c', 'd', 'e']), + np.array([1, 2, 3, 5, 4.])) + df = pivot(one, two, three) + assert df['a'][1] == 1 + assert df['b'][2] == 2 + assert df['c'][3] == 3 + assert df['d'][4] == 5 + assert df['e'][5] == 4 + assert_frame_equal(df, _slow_pivot(one, two, three)) + + # weird overlap, TODO: test? + a, b, c = (np.array([1, 2, 3, 4, 4]), + np.array(['a', 'a', 'a', 'a', 'a']), + np.array([1., 2., 3., 4., 5.])) + pytest.raises(Exception, pivot, a, b, c) + + # corner case, empty + df = pivot(np.array([]), np.array([]), np.array([])) def test_panel_index(): diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py deleted file mode 100644 index 902b42e7d77d7..0000000000000 --- a/pandas/tests/test_panel4d.py +++ /dev/null @@ -1,943 +0,0 @@ -# -*- coding: utf-8 -*- -from datetime import datetime -from pandas.compat import range, lrange -import operator -import pytest - -import numpy as np - -from pandas.types.common import is_float_dtype -from pandas import Series, Index, isnull, notnull -from pandas.core.panel import Panel -from pandas.core.panel4d import Panel4D -from pandas.core.series import remove_na -from pandas.tseries.offsets import BDay - -from pandas.util.testing import (assert_panel_equal, - assert_panel4d_equal, - assert_frame_equal, - assert_series_equal, - assert_almost_equal) -import pandas.util.testing as tm - - -def add_nans(panel4d): - for l, label in enumerate(panel4d.labels): - panel = panel4d[label] - tm.add_nans(panel) - - -class SafeForLongAndSparse(object): - - def test_repr(self): - repr(self.panel4d) - - def test_iter(self): - tm.equalContents(list(self.panel4d), self.panel4d.labels) - - def test_count(self): - f = lambda s: notnull(s).sum() - self._check_stat_op('count', f, obj=self.panel4d, has_skipna=False) - - def test_sum(self): - self._check_stat_op('sum', np.sum) - - def test_mean(self): - self._check_stat_op('mean', np.mean) - - def test_prod(self): - self._check_stat_op('prod', np.prod) - - def test_median(self): - def wrapper(x): - if isnull(x).any(): - return np.nan - return np.median(x) - - self._check_stat_op('median', wrapper) - - def test_min(self): - self._check_stat_op('min', np.min) - - def test_max(self): - self._check_stat_op('max', np.max) - - def test_skew(self): - try: - from scipy.stats import skew - except ImportError: - pytest.skip("no scipy.stats.skew") - - def this_skew(x): - if len(x) < 3: - return np.nan - return skew(x, bias=False) - self._check_stat_op('skew', this_skew) - - # def test_mad(self): - # f = lambda x: np.abs(x - x.mean()).mean() - # self._check_stat_op('mad', f) - - def test_var(self): - def alt(x): - if len(x) < 2: - return np.nan - return np.var(x, ddof=1) - self._check_stat_op('var', alt) - - def test_std(self): - def alt(x): - if len(x) < 2: - return np.nan - return np.std(x, ddof=1) - self._check_stat_op('std', alt) - - def test_sem(self): - def alt(x): - if len(x) < 2: - return np.nan - return np.std(x, ddof=1) / np.sqrt(len(x)) - self._check_stat_op('sem', alt) - - # def test_skew(self): - # from scipy.stats import skew - - # def alt(x): - # if len(x) < 3: - # return np.nan - # return skew(x, bias=False) - - # self._check_stat_op('skew', alt) - - def _check_stat_op(self, name, alternative, obj=None, has_skipna=True): - if obj is None: - obj = self.panel4d - - # # set some NAs - # obj.loc[5:10] = np.nan - # obj.loc[15:20, -2:] = np.nan - - f = getattr(obj, name) - - if has_skipna: - def skipna_wrapper(x): - nona = remove_na(x) - if len(nona) == 0: - return np.nan - return alternative(nona) - - def wrapper(x): - return alternative(np.asarray(x)) - - for i in range(obj.ndim): - result = f(axis=i, skipna=False) - assert_panel_equal(result, obj.apply(wrapper, axis=i)) - else: - skipna_wrapper = alternative - wrapper = alternative - - for i in range(obj.ndim): - result = f(axis=i) - if not tm._incompat_bottleneck_version(name): - assert_panel_equal(result, obj.apply(skipna_wrapper, axis=i)) - - self.assertRaises(Exception, f, axis=obj.ndim) - - -class SafeForSparse(object): - - @classmethod - def assert_panel_equal(cls, x, y): - assert_panel_equal(x, y) - - @classmethod - def assert_panel4d_equal(cls, x, y): - assert_panel4d_equal(x, y) - - def test_get_axis(self): - assert(self.panel4d._get_axis(0) is self.panel4d.labels) - assert(self.panel4d._get_axis(1) is self.panel4d.items) - assert(self.panel4d._get_axis(2) is self.panel4d.major_axis) - assert(self.panel4d._get_axis(3) is self.panel4d.minor_axis) - - def test_set_axis(self): - new_labels = Index(np.arange(len(self.panel4d.labels))) - - # TODO: unused? - # new_items = Index(np.arange(len(self.panel4d.items))) - - new_major = Index(np.arange(len(self.panel4d.major_axis))) - new_minor = Index(np.arange(len(self.panel4d.minor_axis))) - - # ensure propagate to potentially prior-cached items too - - # TODO: unused? - # label = self.panel4d['l1'] - - self.panel4d.labels = new_labels - - if hasattr(self.panel4d, '_item_cache'): - self.assertNotIn('l1', self.panel4d._item_cache) - self.assertIs(self.panel4d.labels, new_labels) - - self.panel4d.major_axis = new_major - self.assertIs(self.panel4d[0].major_axis, new_major) - self.assertIs(self.panel4d.major_axis, new_major) - - self.panel4d.minor_axis = new_minor - self.assertIs(self.panel4d[0].minor_axis, new_minor) - self.assertIs(self.panel4d.minor_axis, new_minor) - - def test_get_axis_number(self): - self.assertEqual(self.panel4d._get_axis_number('labels'), 0) - self.assertEqual(self.panel4d._get_axis_number('items'), 1) - self.assertEqual(self.panel4d._get_axis_number('major'), 2) - self.assertEqual(self.panel4d._get_axis_number('minor'), 3) - - def test_get_axis_name(self): - self.assertEqual(self.panel4d._get_axis_name(0), 'labels') - self.assertEqual(self.panel4d._get_axis_name(1), 'items') - self.assertEqual(self.panel4d._get_axis_name(2), 'major_axis') - self.assertEqual(self.panel4d._get_axis_name(3), 'minor_axis') - - def test_arith(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self._test_op(self.panel4d, operator.add) - self._test_op(self.panel4d, operator.sub) - self._test_op(self.panel4d, operator.mul) - self._test_op(self.panel4d, operator.truediv) - self._test_op(self.panel4d, operator.floordiv) - self._test_op(self.panel4d, operator.pow) - - self._test_op(self.panel4d, lambda x, y: y + x) - self._test_op(self.panel4d, lambda x, y: y - x) - self._test_op(self.panel4d, lambda x, y: y * x) - self._test_op(self.panel4d, lambda x, y: y / x) - self._test_op(self.panel4d, lambda x, y: y ** x) - - self.assertRaises(Exception, self.panel4d.__add__, - self.panel4d['l1']) - - @staticmethod - def _test_op(panel4d, op): - result = op(panel4d, 1) - assert_panel_equal(result['l1'], op(panel4d['l1'], 1)) - - def test_keys(self): - tm.equalContents(list(self.panel4d.keys()), self.panel4d.labels) - - def test_iteritems(self): - """Test panel4d.iteritems()""" - - self.assertEqual(len(list(self.panel4d.iteritems())), - len(self.panel4d.labels)) - - def test_combinePanel4d(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = self.panel4d.add(self.panel4d) - self.assert_panel4d_equal(result, self.panel4d * 2) - - def test_neg(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assert_panel4d_equal(-self.panel4d, self.panel4d * -1) - - def test_select(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - p = self.panel4d - - # select labels - result = p.select(lambda x: x in ('l1', 'l3'), axis='labels') - expected = p.reindex(labels=['l1', 'l3']) - self.assert_panel4d_equal(result, expected) - - # select items - result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') - expected = p.reindex(items=['ItemA', 'ItemC']) - self.assert_panel4d_equal(result, expected) - - # select major_axis - result = p.select(lambda x: x >= datetime(2000, 1, 15), - axis='major') - new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] - expected = p.reindex(major=new_major) - self.assert_panel4d_equal(result, expected) - - # select minor_axis - result = p.select(lambda x: x in ('D', 'A'), axis=3) - expected = p.reindex(minor=['A', 'D']) - self.assert_panel4d_equal(result, expected) - - # corner case, empty thing - result = p.select(lambda x: x in ('foo',), axis='items') - self.assert_panel4d_equal(result, p.reindex(items=[])) - - def test_get_value(self): - - for item in self.panel.items: - for mjr in self.panel.major_axis[::2]: - for mnr in self.panel.minor_axis: - result = self.panel.get_value(item, mjr, mnr) - expected = self.panel[item][mnr][mjr] - assert_almost_equal(result, expected) - - def test_abs(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = self.panel4d.abs() - expected = np.abs(self.panel4d) - self.assert_panel4d_equal(result, expected) - - p = self.panel4d['l1'] - result = p.abs() - expected = np.abs(p) - assert_panel_equal(result, expected) - - df = p['ItemA'] - result = df.abs() - expected = np.abs(df) - assert_frame_equal(result, expected) - - -class CheckIndexing(object): - - def test_getitem(self): - self.assertRaises(Exception, self.panel4d.__getitem__, 'ItemQ') - - def test_delitem_and_pop(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = self.panel4d['l2'] - result = self.panel4d.pop('l2') - assert_panel_equal(expected, result) - self.assertNotIn('l2', self.panel4d.labels) - - del self.panel4d['l3'] - self.assertNotIn('l3', self.panel4d.labels) - self.assertRaises(Exception, self.panel4d.__delitem__, 'l3') - - values = np.empty((4, 4, 4, 4)) - values[0] = 0 - values[1] = 1 - values[2] = 2 - values[3] = 3 - - panel4d = Panel4D(values, lrange(4), lrange(4), - lrange(4), lrange(4)) - - # did we delete the right row? - panel4dc = panel4d.copy() - del panel4dc[0] - assert_panel_equal(panel4dc[1], panel4d[1]) - assert_panel_equal(panel4dc[2], panel4d[2]) - assert_panel_equal(panel4dc[3], panel4d[3]) - - panel4dc = panel4d.copy() - del panel4dc[1] - assert_panel_equal(panel4dc[0], panel4d[0]) - assert_panel_equal(panel4dc[2], panel4d[2]) - assert_panel_equal(panel4dc[3], panel4d[3]) - - panel4dc = panel4d.copy() - del panel4dc[2] - assert_panel_equal(panel4dc[1], panel4d[1]) - assert_panel_equal(panel4dc[0], panel4d[0]) - assert_panel_equal(panel4dc[3], panel4d[3]) - - panel4dc = panel4d.copy() - del panel4dc[3] - assert_panel_equal(panel4dc[1], panel4d[1]) - assert_panel_equal(panel4dc[2], panel4d[2]) - assert_panel_equal(panel4dc[0], panel4d[0]) - - def test_setitem(self): - # LongPanel with one item - # lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() - # self.assertRaises(Exception, self.panel.__setitem__, - # 'ItemE', lp) - - # Panel - p = Panel(dict( - ItemA=self.panel4d['l1']['ItemA'][2:].filter(items=['A', 'B']))) - self.panel4d['l4'] = p - self.panel4d['l5'] = p - - p2 = self.panel4d['l4'] - - assert_panel_equal(p, p2.reindex(items=p.items, - major_axis=p.major_axis, - minor_axis=p.minor_axis)) - - # scalar - self.panel4d['lG'] = 1 - self.panel4d['lE'] = True - self.assertEqual(self.panel4d['lG'].values.dtype, np.int64) - self.assertEqual(self.panel4d['lE'].values.dtype, np.bool_) - - # object dtype - self.panel4d['lQ'] = 'foo' - self.assertEqual(self.panel4d['lQ'].values.dtype, np.object_) - - # boolean dtype - self.panel4d['lP'] = self.panel4d['l1'] > 0 - self.assertEqual(self.panel4d['lP'].values.dtype, np.bool_) - - def test_setitem_by_indexer(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - # Panel - panel4dc = self.panel4d.copy() - p = panel4dc.iloc[0] - - def func(): - self.panel4d.iloc[0] = p - self.assertRaises(NotImplementedError, func) - - # DataFrame - panel4dc = self.panel4d.copy() - df = panel4dc.iloc[0, 0] - df.iloc[:] = 1 - panel4dc.iloc[0, 0] = df - self.assertTrue((panel4dc.iloc[0, 0].values == 1).all()) - - # Series - panel4dc = self.panel4d.copy() - s = panel4dc.iloc[0, 0, :, 0] - s.iloc[:] = 1 - panel4dc.iloc[0, 0, :, 0] = s - self.assertTrue((panel4dc.iloc[0, 0, :, 0].values == 1).all()) - - # scalar - panel4dc = self.panel4d.copy() - panel4dc.iloc[0] = 1 - panel4dc.iloc[1] = True - panel4dc.iloc[2] = 'foo' - self.assertTrue((panel4dc.iloc[0].values == 1).all()) - self.assertTrue(panel4dc.iloc[1].values.all()) - self.assertTrue((panel4dc.iloc[2].values == 'foo').all()) - - def test_setitem_by_indexer_mixed_type(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - # GH 8702 - self.panel4d['foo'] = 'bar' - - # scalar - panel4dc = self.panel4d.copy() - panel4dc.iloc[0] = 1 - panel4dc.iloc[1] = True - panel4dc.iloc[2] = 'foo' - self.assertTrue((panel4dc.iloc[0].values == 1).all()) - self.assertTrue(panel4dc.iloc[1].values.all()) - self.assertTrue((panel4dc.iloc[2].values == 'foo').all()) - - def test_comparisons(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p1 = tm.makePanel4D() - p2 = tm.makePanel4D() - - tp = p1.reindex(labels=p1.labels.tolist() + ['foo']) - p = p1[p1.labels[0]] - - def test_comp(func): - result = func(p1, p2) - self.assert_numpy_array_equal(result.values, - func(p1.values, p2.values)) - - # versus non-indexed same objs - self.assertRaises(Exception, func, p1, tp) - - # versus different objs - self.assertRaises(Exception, func, p1, p) - - result3 = func(self.panel4d, 0) - self.assert_numpy_array_equal(result3.values, - func(self.panel4d.values, 0)) - - with np.errstate(invalid='ignore'): - test_comp(operator.eq) - test_comp(operator.ne) - test_comp(operator.lt) - test_comp(operator.gt) - test_comp(operator.ge) - test_comp(operator.le) - - def test_major_xs(self): - ref = self.panel4d['l1']['ItemA'] - - idx = self.panel4d.major_axis[5] - xs = self.panel4d.major_xs(idx) - - assert_series_equal(xs['l1'].T['ItemA'], - ref.xs(idx), check_names=False) - - # not contained - idx = self.panel4d.major_axis[0] - BDay() - self.assertRaises(Exception, self.panel4d.major_xs, idx) - - def test_major_xs_mixed(self): - self.panel4d['l4'] = 'foo' - xs = self.panel4d.major_xs(self.panel4d.major_axis[0]) - self.assertEqual(xs['l1']['A'].dtype, np.float64) - self.assertEqual(xs['l4']['A'].dtype, np.object_) - - def test_minor_xs(self): - ref = self.panel4d['l1']['ItemA'] - - idx = self.panel4d.minor_axis[1] - xs = self.panel4d.minor_xs(idx) - - assert_series_equal(xs['l1'].T['ItemA'], ref[idx], check_names=False) - - # not contained - self.assertRaises(Exception, self.panel4d.minor_xs, 'E') - - def test_minor_xs_mixed(self): - self.panel4d['l4'] = 'foo' - - xs = self.panel4d.minor_xs('D') - self.assertEqual(xs['l1'].T['ItemA'].dtype, np.float64) - self.assertEqual(xs['l4'].T['ItemA'].dtype, np.object_) - - def test_xs(self): - l1 = self.panel4d.xs('l1', axis=0) - expected = self.panel4d['l1'] - assert_panel_equal(l1, expected) - - # view if possible - l1_view = self.panel4d.xs('l1', axis=0) - l1_view.values[:] = np.nan - self.assertTrue(np.isnan(self.panel4d['l1'].values).all()) - - # mixed-type - self.panel4d['strings'] = 'foo' - result = self.panel4d.xs('D', axis=3) - self.assertIsNotNone(result.is_copy) - - def test_getitem_fancy_labels(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - panel4d = self.panel4d - - labels = panel4d.labels[[1, 0]] - items = panel4d.items[[1, 0]] - dates = panel4d.major_axis[::2] - cols = ['D', 'C', 'F'] - - # all 4 specified - assert_panel4d_equal(panel4d.loc[labels, items, dates, cols], - panel4d.reindex(labels=labels, items=items, - major=dates, minor=cols)) - - # 3 specified - assert_panel4d_equal(panel4d.loc[:, items, dates, cols], - panel4d.reindex(items=items, major=dates, - minor=cols)) - - # 2 specified - assert_panel4d_equal(panel4d.loc[:, :, dates, cols], - panel4d.reindex(major=dates, minor=cols)) - - assert_panel4d_equal(panel4d.loc[:, items, :, cols], - panel4d.reindex(items=items, minor=cols)) - - assert_panel4d_equal(panel4d.loc[:, items, dates, :], - panel4d.reindex(items=items, major=dates)) - - # only 1 - assert_panel4d_equal(panel4d.loc[:, items, :, :], - panel4d.reindex(items=items)) - - assert_panel4d_equal(panel4d.loc[:, :, dates, :], - panel4d.reindex(major=dates)) - - assert_panel4d_equal(panel4d.loc[:, :, :, cols], - panel4d.reindex(minor=cols)) - - def test_getitem_fancy_slice(self): - pass - - def test_getitem_fancy_ints(self): - pass - - def test_get_value(self): - for label in self.panel4d.labels: - for item in self.panel4d.items: - for mjr in self.panel4d.major_axis[::2]: - for mnr in self.panel4d.minor_axis: - result = self.panel4d.get_value( - label, item, mjr, mnr) - expected = self.panel4d[label][item][mnr][mjr] - assert_almost_equal(result, expected) - - def test_set_value(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - for label in self.panel4d.labels: - for item in self.panel4d.items: - for mjr in self.panel4d.major_axis[::2]: - for mnr in self.panel4d.minor_axis: - self.panel4d.set_value(label, item, mjr, mnr, 1.) - assert_almost_equal( - self.panel4d[label][item][mnr][mjr], 1.) - - res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['l4'].values)) - - # resize - res = self.panel4d.set_value('l4', 'ItemE', 'foo', 'bar', 1.5) - tm.assertIsInstance(res, Panel4D) - self.assertIsNot(res, self.panel4d) - self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) - - res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) - self.assertTrue(is_float_dtype(res3['l4'].values)) - - -class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, - SafeForLongAndSparse): - - @classmethod - def assert_panel4d_equal(cls, x, y): - assert_panel4d_equal(x, y) - - def setUp(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.panel4d = tm.makePanel4D(nper=8) - add_nans(self.panel4d) - - def test_constructor(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - panel4d = Panel4D(self.panel4d._data) - self.assertIs(panel4d._data, self.panel4d._data) - - panel4d = Panel4D(self.panel4d._data, copy=True) - self.assertIsNot(panel4d._data, self.panel4d._data) - assert_panel4d_equal(panel4d, self.panel4d) - - vals = self.panel4d.values - - # no copy - panel4d = Panel4D(vals) - self.assertIs(panel4d.values, vals) - - # copy - panel4d = Panel4D(vals, copy=True) - self.assertIsNot(panel4d.values, vals) - - # GH #8285, test when scalar data is used to construct a Panel4D - # if dtype is not passed, it should be inferred - value_and_dtype = [(1, 'int64'), (3.14, 'float64'), - ('foo', np.object_)] - for (val, dtype) in value_and_dtype: - panel4d = Panel4D(val, labels=range(2), items=range( - 3), major_axis=range(4), minor_axis=range(5)) - vals = np.empty((2, 3, 4, 5), dtype=dtype) - vals.fill(val) - expected = Panel4D(vals, dtype=dtype) - assert_panel4d_equal(panel4d, expected) - - # test the case when dtype is passed - panel4d = Panel4D(1, labels=range(2), items=range( - 3), major_axis=range(4), minor_axis=range(5), dtype='float32') - vals = np.empty((2, 3, 4, 5), dtype='float32') - vals.fill(1) - - expected = Panel4D(vals, dtype='float32') - assert_panel4d_equal(panel4d, expected) - - def test_constructor_cast(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - zero_filled = self.panel4d.fillna(0) - - casted = Panel4D(zero_filled._data, dtype=int) - casted2 = Panel4D(zero_filled.values, dtype=int) - - exp_values = zero_filled.values.astype(int) - assert_almost_equal(casted.values, exp_values) - assert_almost_equal(casted2.values, exp_values) - - casted = Panel4D(zero_filled._data, dtype=np.int32) - casted2 = Panel4D(zero_filled.values, dtype=np.int32) - - exp_values = zero_filled.values.astype(np.int32) - assert_almost_equal(casted.values, exp_values) - assert_almost_equal(casted2.values, exp_values) - - # can't cast - data = [[['foo', 'bar', 'baz']]] - self.assertRaises(ValueError, Panel, data, dtype=float) - - def test_consolidate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(self.panel4d._data.is_consolidated()) - - self.panel4d['foo'] = 1. - self.assertFalse(self.panel4d._data.is_consolidated()) - - panel4d = self.panel4d.consolidate() - self.assertTrue(panel4d._data.is_consolidated()) - - def test_ctor_dict(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - l1 = self.panel4d['l1'] - l2 = self.panel4d['l2'] - - d = {'A': l1, 'B': l2.loc[['ItemB'], :, :]} - panel4d = Panel4D(d) - - assert_panel_equal(panel4d['A'], self.panel4d['l1']) - assert_frame_equal(panel4d.loc['B', 'ItemB', :, :], - self.panel4d.loc['l2', ['ItemB'], - :, :]['ItemB']) - - def test_constructor_dict_mixed(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - data = dict((k, v.values) for k, v in self.panel4d.iteritems()) - result = Panel4D(data) - - exp_major = Index(np.arange(len(self.panel4d.major_axis))) - self.assert_index_equal(result.major_axis, exp_major) - - result = Panel4D(data, - labels=self.panel4d.labels, - items=self.panel4d.items, - major_axis=self.panel4d.major_axis, - minor_axis=self.panel4d.minor_axis) - assert_panel4d_equal(result, self.panel4d) - - data['l2'] = self.panel4d['l2'] - - result = Panel4D(data) - assert_panel4d_equal(result, self.panel4d) - - # corner, blow up - data['l2'] = data['l2']['ItemB'] - self.assertRaises(Exception, Panel4D, data) - - data['l2'] = self.panel4d['l2'].values[:, :, :-1] - self.assertRaises(Exception, Panel4D, data) - - def test_constructor_resize(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - data = self.panel4d._data - labels = self.panel4d.labels[:-1] - items = self.panel4d.items[:-1] - major = self.panel4d.major_axis[:-1] - minor = self.panel4d.minor_axis[:-1] - - result = Panel4D(data, labels=labels, items=items, - major_axis=major, minor_axis=minor) - expected = self.panel4d.reindex( - labels=labels, items=items, major=major, minor=minor) - assert_panel4d_equal(result, expected) - - result = Panel4D(data, items=items, major_axis=major) - expected = self.panel4d.reindex(items=items, major=major) - assert_panel4d_equal(result, expected) - - result = Panel4D(data, items=items) - expected = self.panel4d.reindex(items=items) - assert_panel4d_equal(result, expected) - - result = Panel4D(data, minor_axis=minor) - expected = self.panel4d.reindex(minor=minor) - assert_panel4d_equal(result, expected) - - def test_conform(self): - - p = self.panel4d['l1'].filter(items=['ItemA', 'ItemB']) - conformed = self.panel4d.conform(p) - - tm.assert_index_equal(conformed.items, self.panel4d.labels) - tm.assert_index_equal(conformed.major_axis, self.panel4d.major_axis) - tm.assert_index_equal(conformed.minor_axis, self.panel4d.minor_axis) - - def test_reindex(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ref = self.panel4d['l2'] - - # labels - result = self.panel4d.reindex(labels=['l1', 'l2']) - assert_panel_equal(result['l2'], ref) - - # items - result = self.panel4d.reindex(items=['ItemA', 'ItemB']) - assert_frame_equal(result['l2']['ItemB'], ref['ItemB']) - - # major - new_major = list(self.panel4d.major_axis[:10]) - result = self.panel4d.reindex(major=new_major) - assert_frame_equal( - result['l2']['ItemB'], ref['ItemB'].reindex(index=new_major)) - - # raise exception put both major and major_axis - self.assertRaises(Exception, self.panel4d.reindex, - major_axis=new_major, major=new_major) - - # minor - new_minor = list(self.panel4d.minor_axis[:2]) - result = self.panel4d.reindex(minor=new_minor) - assert_frame_equal( - result['l2']['ItemB'], ref['ItemB'].reindex(columns=new_minor)) - - result = self.panel4d.reindex(labels=self.panel4d.labels, - items=self.panel4d.items, - major=self.panel4d.major_axis, - minor=self.panel4d.minor_axis) - - # don't necessarily copy - result = self.panel4d.reindex() - assert_panel4d_equal(result, self.panel4d) - self.assertFalse(result is self.panel4d) - - # with filling - smaller_major = self.panel4d.major_axis[::5] - smaller = self.panel4d.reindex(major=smaller_major) - - larger = smaller.reindex(major=self.panel4d.major_axis, - method='pad') - - assert_panel_equal(larger.loc[:, :, self.panel4d.major_axis[1], :], - smaller.loc[:, :, smaller_major[0], :]) - - # don't necessarily copy - result = self.panel4d.reindex( - major=self.panel4d.major_axis, copy=False) - assert_panel4d_equal(result, self.panel4d) - self.assertTrue(result is self.panel4d) - - def test_not_hashable(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4D_empty = Panel4D() - self.assertRaises(TypeError, hash, p4D_empty) - self.assertRaises(TypeError, hash, self.panel4d) - - def test_reindex_like(self): - # reindex_like - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - smaller = self.panel4d.reindex(labels=self.panel4d.labels[:-1], - items=self.panel4d.items[:-1], - major=self.panel4d.major_axis[:-1], - minor=self.panel4d.minor_axis[:-1]) - smaller_like = self.panel4d.reindex_like(smaller) - assert_panel4d_equal(smaller, smaller_like) - - def test_sort_index(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - import random - - rlabels = list(self.panel4d.labels) - ritems = list(self.panel4d.items) - rmajor = list(self.panel4d.major_axis) - rminor = list(self.panel4d.minor_axis) - random.shuffle(rlabels) - random.shuffle(ritems) - random.shuffle(rmajor) - random.shuffle(rminor) - - random_order = self.panel4d.reindex(labels=rlabels) - sorted_panel4d = random_order.sort_index(axis=0) - assert_panel4d_equal(sorted_panel4d, self.panel4d) - - def test_fillna(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertFalse(np.isfinite(self.panel4d.values).all()) - filled = self.panel4d.fillna(0) - self.assertTrue(np.isfinite(filled.values).all()) - - self.assertRaises(NotImplementedError, - self.panel4d.fillna, method='pad') - - def test_swapaxes(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = self.panel4d.swapaxes('labels', 'items') - self.assertIs(result.items, self.panel4d.labels) - - result = self.panel4d.swapaxes('labels', 'minor') - self.assertIs(result.labels, self.panel4d.minor_axis) - - result = self.panel4d.swapaxes('items', 'minor') - self.assertIs(result.items, self.panel4d.minor_axis) - - result = self.panel4d.swapaxes('items', 'major') - self.assertIs(result.items, self.panel4d.major_axis) - - result = self.panel4d.swapaxes('major', 'minor') - self.assertIs(result.major_axis, self.panel4d.minor_axis) - - # this should also work - result = self.panel4d.swapaxes(0, 1) - self.assertIs(result.labels, self.panel4d.items) - - # this works, but return a copy - result = self.panel4d.swapaxes('items', 'items') - assert_panel4d_equal(self.panel4d, result) - self.assertNotEqual(id(self.panel4d), id(result)) - - def test_update(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - p4d = Panel4D([[[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]]) - - other = Panel4D([[[[3.6, 2., np.nan]], - [[np.nan, np.nan, 7]]]]) - - p4d.update(other) - - expected = Panel4D([[[[3.6, 2, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 7], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]]) - - assert_panel4d_equal(p4d, expected) - - def test_dtypes(self): - - result = self.panel4d.dtypes - expected = Series(np.dtype('float64'), index=self.panel4d.labels) - assert_series_equal(result, expected) - - def test_repr_empty(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - empty = Panel4D() - repr(empty) - - def test_rename(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - mapper = {'l1': 'foo', - 'l2': 'bar', - 'l3': 'baz'} - - renamed = self.panel4d.rename_axis(mapper, axis=0) - exp = Index(['foo', 'bar', 'baz']) - self.assert_index_equal(renamed.labels, exp) - - renamed = self.panel4d.rename_axis(str.lower, axis=3) - exp = Index(['a', 'b', 'c', 'd']) - self.assert_index_equal(renamed.minor_axis, exp) - - # don't copy - renamed_nocopy = self.panel4d.rename_axis(mapper, - axis=0, - copy=False) - renamed_nocopy['foo'] = 3. - self.assertTrue((self.panel4d['l1'].values == 3).all()) - - def test_get_attr(self): - assert_panel_equal(self.panel4d['l1'], self.panel4d.l1) diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py deleted file mode 100644 index 6a578d85d3ee3..0000000000000 --- a/pandas/tests/test_panelnd.py +++ /dev/null @@ -1,101 +0,0 @@ -# -*- coding: utf-8 -*- -from pandas.core import panelnd -from pandas.core.panel import Panel - -from pandas.util.testing import assert_panel_equal -import pandas.util.testing as tm - - -class TestPanelnd(tm.TestCase): - - def setUp(self): - pass - - def test_4d_construction(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - # create a 4D - Panel4D = panelnd.create_nd_panel_factory( - klass_name='Panel4D', - orders=['labels', 'items', 'major_axis', 'minor_axis'], - slices={'items': 'items', 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, - slicer=Panel, - aliases={'major': 'major_axis', 'minor': 'minor_axis'}, - stat_axis=2) - - p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) # noqa - - def test_4d_construction_alt(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - # create a 4D - Panel4D = panelnd.create_nd_panel_factory( - klass_name='Panel4D', - orders=['labels', 'items', 'major_axis', 'minor_axis'], - slices={'items': 'items', 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, - slicer='Panel', - aliases={'major': 'major_axis', 'minor': 'minor_axis'}, - stat_axis=2) - - p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) # noqa - - def test_4d_construction_error(self): - - # create a 4D - self.assertRaises(Exception, - panelnd.create_nd_panel_factory, - klass_name='Panel4D', - orders=['labels', 'items', 'major_axis', - 'minor_axis'], - slices={'items': 'items', - 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, - slicer='foo', - aliases={'major': 'major_axis', - 'minor': 'minor_axis'}, - stat_axis=2) - - def test_5d_construction(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - - # create a 4D - Panel4D = panelnd.create_nd_panel_factory( - klass_name='Panel4D', - orders=['labels1', 'items', 'major_axis', 'minor_axis'], - slices={'items': 'items', 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, - slicer=Panel, - aliases={'major': 'major_axis', 'minor': 'minor_axis'}, - stat_axis=2) - - # deprecation GH13564 - p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) - - # create a 5D - Panel5D = panelnd.create_nd_panel_factory( - klass_name='Panel5D', - orders=['cool1', 'labels1', 'items', 'major_axis', - 'minor_axis'], - slices={'labels1': 'labels1', 'items': 'items', - 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, - slicer=Panel4D, - aliases={'major': 'major_axis', 'minor': 'minor_axis'}, - stat_axis=2) - - # deprecation GH13564 - p5d = Panel5D(dict(C1=p4d)) - - # slice back to 4d - results = p5d.iloc[p5d.cool1.get_loc('C1'), :, :, 0:3, :] - expected = p4d.iloc[:, :, 0:3, :] - assert_panel_equal(results['L1'], expected['L1']) - - # test a transpose - # results = p5d.transpose(1,2,3,4,0) - # expected = diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py new file mode 100644 index 0000000000000..fe0cf4c9b38af --- /dev/null +++ b/pandas/tests/test_register_accessor.py @@ -0,0 +1,87 @@ +import contextlib + +import pytest + +import pandas as pd +import pandas.util.testing as tm + + +@contextlib.contextmanager +def ensure_removed(obj, attr): + """Ensure that an attribute added to 'obj' during the test is + removed when we're done""" + try: + yield + finally: + try: + delattr(obj, attr) + except AttributeError: + pass + + +class MyAccessor(object): + + def __init__(self, obj): + self.obj = obj + self.item = 'item' + + @property + def prop(self): + return self.item + + def method(self): + return self.item + + +@pytest.mark.parametrize('obj, registrar', [ + (pd.Series, pd.api.extensions.register_series_accessor), + (pd.DataFrame, pd.api.extensions.register_dataframe_accessor), + (pd.Index, pd.api.extensions.register_index_accessor) +]) +def test_series_register(obj, registrar): + with ensure_removed(obj, 'mine'): + before = set(dir(obj)) + registrar('mine')(MyAccessor) + assert obj([]).mine.prop == 'item' + after = set(dir(obj)) + assert (before ^ after) == {'mine'} + + +def test_accessor_works(): + with ensure_removed(pd.Series, 'mine'): + pd.api.extensions.register_series_accessor('mine')(MyAccessor) + + s = pd.Series([1, 2]) + assert s.mine.obj is s + + assert s.mine.prop == 'item' + assert s.mine.method() == 'item' + + +def test_overwrite_warns(): + # Need to restore mean + mean = pd.Series.mean + try: + with tm.assert_produces_warning(UserWarning) as w: + pd.api.extensions.register_series_accessor('mean')(MyAccessor) + s = pd.Series([1, 2]) + assert s.mean.prop == 'item' + msg = str(w[0].message) + assert 'mean' in msg + assert 'MyAccessor' in msg + assert 'Series' in msg + finally: + pd.Series.mean = mean + + +def test_raises_attribute_error(): + + with ensure_removed(pd.Series, 'bad'): + + @pd.api.extensions.register_series_accessor("bad") + class Bad(object): + def __init__(self, data): + raise AttributeError("whoops") + + with tm.assert_raises_regex(AttributeError, "whoops"): + pd.Series([]).bad diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/test_resample.py old mode 100755 new mode 100644 similarity index 72% rename from pandas/tests/tseries/test_resample.py rename to pandas/tests/test_resample.py index afb44887fe7d1..23cc18de34778 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1,32 +1,40 @@ # pylint: disable=E1101 +from warnings import catch_warnings from datetime import datetime, timedelta from functools import partial +from textwrap import dedent +from operator import methodcaller +import pytz +import pytest +import dateutil import numpy as np import pandas as pd import pandas.tseries.offsets as offsets import pandas.util.testing as tm -from pandas import (Series, DataFrame, Panel, Index, isnull, - notnull, Timestamp) +import pandas.util._test_decorators as td +from pandas import (Series, DataFrame, Panel, Index, isna, + notna, Timestamp) -from pandas.types.generic import ABCSeries, ABCDataFrame +from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict -from pandas.core.base import SpecificationError -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall from pandas.core.groupby import DataError -from pandas.tseries.frequencies import MONTHS, DAYS +import pandas.core.common as com + from pandas.tseries.frequencies import to_offset -from pandas.tseries.index import date_range +from pandas.core.indexes.datetimes import date_range from pandas.tseries.offsets import Minute, BDay -from pandas.tseries.period import period_range, PeriodIndex, Period -from pandas.tseries.resample import (DatetimeIndex, TimeGrouper, - DatetimeIndexResampler) -from pandas.tseries.tdi import timedelta_range, TimedeltaIndex +from pandas.core.indexes.period import period_range, PeriodIndex, Period +from pandas.core.resample import (DatetimeIndex, TimeGrouper, + DatetimeIndexResampler) +from pandas.core.indexes.timedeltas import timedelta_range, TimedeltaIndex from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, assert_index_equal) -from pandas._period import IncompatibleFrequency +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas._libs.tslibs.ccalendar import DAYS, MONTHS bday = BDay() @@ -48,9 +56,9 @@ def _simple_pts(start, end, freq='D'): return Series(np.random.randn(len(rng)), index=rng) -class TestResampleAPI(tm.TestCase): +class TestResampleAPI(object): - def setUp(self): + def setup_method(self, method): dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='Min') @@ -61,21 +69,20 @@ def setUp(self): def test_str(self): r = self.series.resample('H') - self.assertTrue( - 'DatetimeIndexResampler [freq=, axis=0, closed=left, ' - 'label=left, convention=start, base=0]' in str(r)) + assert ('DatetimeIndexResampler [freq=, axis=0, closed=left, ' + 'label=left, convention=start, base=0]' in str(r)) def test_api(self): r = self.series.resample('H') result = r.mean() - self.assertIsInstance(result, Series) - self.assertEqual(len(result), 217) + assert isinstance(result, Series) + assert len(result) == 217 r = self.series.to_frame().resample('H') result = r.mean() - self.assertIsInstance(result, DataFrame) - self.assertEqual(len(result), 217) + assert isinstance(result, DataFrame) + assert len(result) == 217 def test_api_changes_v018(self): @@ -83,7 +90,7 @@ def test_api_changes_v018(self): # to .resample(......).how() r = self.series.resample('H') - self.assertIsInstance(r, DatetimeIndexResampler) + assert isinstance(r, DatetimeIndexResampler) for how in ['sum', 'mean', 'prod', 'min', 'max', 'var', 'std']: with tm.assert_produces_warning(FutureWarning, @@ -99,25 +106,25 @@ def test_api_changes_v018(self): tm.assert_frame_equal(result, expected) # compat for pandas-like methods - for how in ['sort_values', 'isnull']: + for how in ['sort_values', 'isna']: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): getattr(r, how)() # invalids as these can be setting operations r = self.series.resample('H') - self.assertRaises(ValueError, lambda: r.iloc[0]) - self.assertRaises(ValueError, lambda: r.iat[0]) - self.assertRaises(ValueError, lambda: r.loc[0]) - self.assertRaises(ValueError, lambda: r.loc[ + pytest.raises(ValueError, lambda: r.iloc[0]) + pytest.raises(ValueError, lambda: r.iat[0]) + pytest.raises(ValueError, lambda: r.loc[0]) + pytest.raises(ValueError, lambda: r.loc[ Timestamp('2013-01-01 00:00:00', offset='H')]) - self.assertRaises(ValueError, lambda: r.at[ + pytest.raises(ValueError, lambda: r.at[ Timestamp('2013-01-01 00:00:00', offset='H')]) def f(): r[0] = 5 - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # str/repr r = self.series.resample('H') @@ -131,10 +138,10 @@ def f(): tm.assert_numpy_array_equal(np.array(r), np.array(r.mean())) # masquerade as Series/DataFrame as needed for API compat - self.assertTrue(isinstance(self.series.resample('H'), ABCSeries)) - self.assertFalse(isinstance(self.frame.resample('H'), ABCSeries)) - self.assertFalse(isinstance(self.series.resample('H'), ABCDataFrame)) - self.assertTrue(isinstance(self.frame.resample('H'), ABCDataFrame)) + assert isinstance(self.series.resample('H'), ABCSeries) + assert not isinstance(self.frame.resample('H'), ABCSeries) + assert not isinstance(self.series.resample('H'), ABCDataFrame) + assert isinstance(self.frame.resample('H'), ABCDataFrame) # bin numeric ops for op in ['__add__', '__mul__', '__truediv__', '__div__', '__sub__']: @@ -145,7 +152,7 @@ def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertIsInstance(getattr(r, op)(2), pd.Series) + assert isinstance(getattr(r, op)(2), Series) # unary numeric ops for op in ['__pos__', '__neg__', '__abs__', '__inv__']: @@ -156,7 +163,7 @@ def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertIsInstance(getattr(r, op)(), pd.Series) + assert isinstance(getattr(r, op)(), Series) # comparison ops for op in ['__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__']: @@ -164,7 +171,7 @@ def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertIsInstance(getattr(r, op)(2), pd.Series) + assert isinstance(getattr(r, op)(2), Series) # IPython introspection shouldn't trigger warning GH 13618 for op in ['_repr_json', '_repr_latex', @@ -177,7 +184,7 @@ def f(): df = self.series.to_frame('foo') # same as prior versions for DataFrame - self.assertRaises(KeyError, lambda: df.resample('H')[0]) + pytest.raises(KeyError, lambda: df.resample('H')[0]) # compat for Series # but we cannot be sure that we need a warning here @@ -185,13 +192,13 @@ def f(): check_stacklevel=False): result = self.series.resample('H')[0] expected = self.series.resample('H').mean()[0] - self.assertEqual(result, expected) + assert result == expected with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = self.series.resample('H')['2005-01-09 23:00:00'] expected = self.series.resample('H').mean()['2005-01-09 23:00:00'] - self.assertEqual(result, expected) + assert result == expected def test_groupby_resample_api(self): @@ -220,18 +227,32 @@ def test_groupby_resample_on_api(self): # GH 15021 # .groupby(...).resample(on=...) results in an unexpected # keyword warning. - df = pd.DataFrame({'key': ['A', 'B'] * 5, - 'dates': pd.date_range('2016-01-01', periods=10), - 'values': np.random.randn(10)}) + df = DataFrame({'key': ['A', 'B'] * 5, + 'dates': pd.date_range('2016-01-01', periods=10), + 'values': np.random.randn(10)}) expected = df.set_index('dates').groupby('key').resample('D').mean() result = df.groupby('key').resample('D', on='dates').mean() assert_frame_equal(result, expected) - def test_plot_api(self): - tm._skip_if_no_mpl() + def test_pipe(self): + # GH17905 + + # series + r = self.series.resample('H') + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_series_equal(result, expected) + + # dataframe + r = self.frame.resample('H') + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_frame_equal(result, expected) + @td.skip_if_no_mpl + def test_plot_api(self): # .resample(....).plot(...) # hitting warnings # GH 12448 @@ -253,7 +274,7 @@ def test_getitem(self): tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) r = self.frame.resample('H')['B'] - self.assertEqual(r._selected_obj.name, self.frame.columns[1]) + assert r._selected_obj.name == self.frame.columns[1] # technically this is allowed r = self.frame.resample('H')['A', 'B'] @@ -267,10 +288,10 @@ def test_getitem(self): def test_select_bad_cols(self): g = self.frame.resample('H') - self.assertRaises(KeyError, g.__getitem__, ['D']) + pytest.raises(KeyError, g.__getitem__, ['D']) - self.assertRaises(KeyError, g.__getitem__, ['A', 'D']) - with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): + pytest.raises(KeyError, g.__getitem__, ['A', 'D']) + with tm.assert_raises_regex(KeyError, '^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'D']] @@ -281,14 +302,13 @@ def test_attribute_access(self): tm.assert_series_equal(r.A.sum(), r['A'].sum()) # getting - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertRaises(AttributeError, lambda: r.F) + pytest.raises(AttributeError, lambda: r.F) # setting def f(): r.F = 'bah' - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_api_compat_before_use(self): @@ -296,7 +316,7 @@ def test_api_compat_before_use(self): # on these attributes for attr in ['groups', 'ngroups', 'indices']: rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = pd.Series(np.arange(len(rng)), index=rng) + ts = Series(np.arange(len(rng)), index=rng) rs = ts.resample('30s') # before use @@ -323,7 +343,7 @@ def test_downsample_but_actually_upsampling(self): # this is reindex / asfreq rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = pd.Series(np.arange(len(rng), dtype='int64'), index=rng) + ts = Series(np.arange(len(rng), dtype='int64'), index=rng) result = ts.resample('20s').asfreq() expected = Series([0, 20, 40, 60, 80], index=pd.date_range('2012-01-01 00:00:00', @@ -338,7 +358,7 @@ def test_combined_up_downsampling_of_irregular(self): # preserve these semantics rng = pd.date_range('1/1/2012', periods=100, freq='S') - ts = pd.Series(np.arange(len(rng)), index=rng) + ts = Series(np.arange(len(rng)), index=rng) ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]] with tm.assert_produces_warning(FutureWarning, @@ -359,7 +379,7 @@ def test_fillna(self): # need to upsample here rng = pd.date_range('1/1/2012', periods=10, freq='2S') - ts = pd.Series(np.arange(len(rng), dtype='int64'), index=rng) + ts = Series(np.arange(len(rng), dtype='int64'), index=rng) r = ts.resample('s') expected = r.ffill() @@ -370,7 +390,7 @@ def test_fillna(self): result = r.fillna(method='bfill') assert_series_equal(result, expected) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): r.fillna(0) def test_apply_without_aggregation(self): @@ -393,8 +413,10 @@ def test_agg_consistency(self): r = df.resample('3T') - expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'}) - result = r.agg({'r1': 'mean', 'r2': 'sum'}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'}) + result = r.agg({'r1': 'mean', 'r2': 'sum'}) assert_frame_equal(result, expected) # TODO: once GH 14008 is fixed, move these tests into @@ -406,9 +428,7 @@ def test_agg(self): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') index.name = 'date' - df = pd.DataFrame(np.random.rand(10, 2), - columns=list('AB'), - index=index) + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], @@ -458,7 +478,9 @@ def test_agg(self): expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'sum')]) for t in cases: - result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) assert_frame_equal(result, expected, check_like=True) expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) @@ -467,8 +489,10 @@ def test_agg(self): ('B', 'mean2'), ('B', 'sum2')]) for t in cases: - result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, - 'B': {'mean2': 'mean', 'sum2': 'sum'}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, + 'B': {'mean2': 'mean', 'sum2': 'sum'}}) assert_frame_equal(result, expected, check_like=True) expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) @@ -494,9 +518,7 @@ def test_agg_misc(self): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') index.name = 'date' - df = pd.DataFrame(np.random.rand(10, 2), - columns=list('AB'), - index=index) + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], @@ -528,9 +550,12 @@ def test_agg_misc(self): ('result1', 'B'), ('result2', 'A'), ('result2', 'B')]) + for t in cases: - result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), - ('result2', np.mean)])) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum), + ('result2', np.mean)])) assert_frame_equal(result, expected, check_like=True) # agg with different hows @@ -556,7 +581,9 @@ def test_agg_misc(self): # series like aggs for t in cases: - result = t['A'].agg({'A': ['sum', 'std']}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t['A'].agg({'A': ['sum', 'std']}) expected = pd.concat([t['A'].sum(), t['A'].std()], axis=1) @@ -571,17 +598,22 @@ def test_agg_misc(self): ('A', 'std'), ('B', 'mean'), ('B', 'std')]) - result = t['A'].agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t['A'].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) assert_frame_equal(result, expected, check_like=True) # errors # invalid names in the agg specification for t in cases: def f(): - t[['A']].agg({'A': ['sum', 'std'], - 'B': ['mean', 'std']}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + t[['A']].agg({'A': ['sum', 'std'], + 'B': ['mean', 'std']}) - self.assertRaises(SpecificationError, f) + pytest.raises(KeyError, f) def test_agg_nested_dicts(self): @@ -589,9 +621,7 @@ def test_agg_nested_dicts(self): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') index.name = 'date' - df = pd.DataFrame(np.random.rand(10, 2), - columns=list('AB'), - index=index) + df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index], @@ -608,7 +638,7 @@ def test_agg_nested_dicts(self): def f(): t.aggregate({'r1': {'A': ['mean', 'sum']}, 'r2': {'B': ['mean', 'sum']}}) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) for t in cases: expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(), @@ -616,44 +646,62 @@ def f(): expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) - result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) assert_frame_equal(result, expected, check_like=True) - result = t.agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = t.agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) assert_frame_equal(result, expected, check_like=True) + def test_try_aggregate_non_existing_column(self): + # GH 16766 + data = [ + {'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0}, + {'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0}, + {'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5} + ] + df = DataFrame(data).set_index('dt') + + # Error as we don't have 'z' column + with pytest.raises(KeyError): + df.resample('30T').agg({'x': ['mean'], + 'y': ['median'], + 'z': ['sum']}) + def test_selection_api_validation(self): # GH 13500 index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') - df = pd.DataFrame({'date': index, - 'a': np.arange(len(index), dtype=np.int64)}, - index=pd.MultiIndex.from_arrays([ - np.arange(len(index), dtype=np.int64), - index], names=['v', 'd'])) - df_exp = pd.DataFrame({'a': np.arange(len(index), dtype=np.int64)}, - index=index) + + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame({'date': index, 'a': rng}, + index=pd.MultiIndex.from_arrays([rng, index], + names=['v', 'd'])) + df_exp = DataFrame({'a': rng}, index=index) # non DatetimeIndex - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.resample('2D', level='v') - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.resample('2D', on='date', level='d') - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): df.resample('2D', on=['a', 'date']) - with tm.assertRaises(KeyError): + with pytest.raises(KeyError): df.resample('2D', level=['a', 'date']) # upsampling not allowed - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.resample('2D', level='d').asfreq() - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): df.resample('2D', on='date').asfreq() exp = df_exp.resample('2D').sum() @@ -675,35 +723,58 @@ def create_index(self, *args, **kwargs): factory = self._index_factory() return factory(*args, **kwargs) - def test_asfreq_downsample(self): - s = self.create_series() - - result = s.resample('2D').asfreq() - expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2))) - expected.index.freq = to_offset('2D') - assert_series_equal(result, expected) - - frame = s.to_frame('value') - result = frame.resample('2D').asfreq() - expected = frame.reindex( - frame.index.take(np.arange(0, len(frame.index), 2))) - expected.index.freq = to_offset('2D') - assert_frame_equal(result, expected) - - def test_asfreq_upsample(self): - s = self.create_series() - - result = s.resample('1H').asfreq() - new_index = self.create_index(s.index[0], s.index[-1], freq='1H') - expected = s.reindex(new_index) - assert_series_equal(result, expected) - - frame = s.to_frame('value') - result = frame.resample('1H').asfreq() - new_index = self.create_index(frame.index[0], - frame.index[-1], freq='1H') - expected = frame.reindex(new_index) - assert_frame_equal(result, expected) + @pytest.fixture + def _index_start(self): + return datetime(2005, 1, 1) + + @pytest.fixture + def _index_end(self): + return datetime(2005, 1, 10) + + @pytest.fixture + def _index_freq(self): + return 'D' + + @pytest.fixture + def index(self, _index_start, _index_end, _index_freq): + return self.create_index(_index_start, _index_end, freq=_index_freq) + + @pytest.fixture + def _series_name(self): + raise com.AbstractMethodError(self) + + @pytest.fixture + def _static_values(self, index): + return np.arange(len(index)) + + @pytest.fixture + def series(self, index, _series_name, _static_values): + return Series(_static_values, index=index, name=_series_name) + + @pytest.fixture + def frame(self, index, _static_values): + return DataFrame({'value': _static_values}, index=index) + + @pytest.fixture(params=[Series, DataFrame]) + def series_and_frame(self, request, index, _series_name, _static_values): + if request.param == Series: + return Series(_static_values, index=index, name=_series_name) + if request.param == DataFrame: + return DataFrame({'value': _static_values}, index=index) + + @pytest.mark.parametrize('freq', ['2D', '1H']) + def test_asfreq(self, series_and_frame, freq): + obj = series_and_frame + + result = obj.resample(freq).asfreq() + if freq == '2D': + new_index = obj.index.take(np.arange(0, len(obj.index), 2)) + new_index.freq = to_offset('2D') + else: + new_index = self.create_index(obj.index[0], obj.index[-1], + freq=freq) + expected = obj.reindex(new_index) + assert_almost_equal(result, expected) def test_asfreq_fill_value(self): # test for fill value during resampling, issue 3715 @@ -733,7 +804,7 @@ def test_resample_interpolate(self): def test_raises_on_non_datetimelike_index(self): # this is a non datetimelike index xp = DataFrame() - self.assertRaises(TypeError, lambda: xp.resample('A').mean()) + pytest.raises(TypeError, lambda: xp.resample('A').mean()) def test_resample_empty_series(self): # GH12771 & GH12868 @@ -750,19 +821,8 @@ def test_resample_empty_series(self): expected = s.copy() expected.index = s.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) - self.assertEqual(result.index.freq, expected.index.freq) - - if (method == 'size' and - isinstance(result.index, PeriodIndex) and - freq in ['M', 'D']): - # GH12871 - TODO: name should propagate, but currently - # doesn't on lower / same frequency with PeriodIndex - assert_series_equal(result, expected, check_dtype=False, - check_names=False) - # this assert will break when fixed - self.assertTrue(result.name is None) - else: - assert_series_equal(result, expected, check_dtype=False) + assert result.index.freq == expected.index.freq + assert_series_equal(result, expected, check_dtype=False) def test_resample_empty_dataframe(self): # GH13212 @@ -771,33 +831,39 @@ def test_resample_empty_dataframe(self): for freq in ['M', 'D', 'H']: # count retains dimensions too - methods = downsample_methods + ['count'] + methods = downsample_methods + upsample_methods for method in methods: result = getattr(f.resample(freq), method)() + if method != 'size': + expected = f.copy() + else: + # GH14962 + expected = Series([]) - expected = f.copy() expected.index = f.index._shallow_copy(freq=freq) assert_index_equal(result.index, expected.index) - self.assertEqual(result.index.freq, expected.index.freq) - assert_frame_equal(result, expected, check_dtype=False) + assert result.index.freq == expected.index.freq + assert_almost_equal(result, expected, check_dtype=False) # test size for GH13212 (currently stays as df) - def test_resample_empty_dtypes(self): + @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) + @pytest.mark.parametrize( + "dtype", + [np.float, np.int, np.object, 'datetime64[ns]']) + def test_resample_empty_dtypes(self, index, dtype): # Empty series were sometimes causing a segfault (for the functions # with Cython bounds-checking disabled) or an IndexError. We just run # them to ensure they no longer do. (GH #10228) - for index in tm.all_timeseries_index_generator(0): - for dtype in (np.float, np.int, np.object, 'datetime64[ns]'): - for how in downsample_methods + upsample_methods: - empty_series = pd.Series([], index, dtype) - try: - getattr(empty_series.resample('d'), how)() - except DataError: - # Ignore these since some combinations are invalid - # (ex: doing mean with dtype of np.object) - pass + for how in downsample_methods + upsample_methods: + empty_series = Series([], index, dtype) + try: + getattr(empty_series.resample('d'), how)() + except DataError: + # Ignore these since some combinations are invalid + # (ex: doing mean with dtype of np.object) + pass def test_resample_loffset_arg_type(self): # GH 13218, 15002 @@ -808,7 +874,7 @@ def test_resample_loffset_arg_type(self): periods=len(df.index) / 2, freq='2D') - # loffset coreces PeriodIndex to DateTimeIndex + # loffset coerces PeriodIndex to DateTimeIndex if isinstance(expected_index, PeriodIndex): expected_index = expected_index.to_timestamp() @@ -829,18 +895,32 @@ def test_resample_loffset_arg_type(self): # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex if isinstance(expected.index, TimedeltaIndex): - with tm.assertRaises(AssertionError): + with pytest.raises(AssertionError): assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected) else: assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected) + def test_apply_to_empty_series(self): + # GH 14313 + series = self.create_series()[:0] + + for freq in ['M', 'D', 'H']: + result = series.resample(freq).apply(lambda x: 1) + expected = series.resample(freq).apply(np.sum) + + assert_series_equal(result, expected, check_dtype=False) -class TestDatetimeIndex(Base, tm.TestCase): + +class TestDatetimeIndex(Base): _index_factory = lambda x: date_range - def setUp(self): + @pytest.fixture + def _series_name(self): + return 'dti' + + def setup_method(self, method): dti = DatetimeIndex(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq='Min') @@ -874,8 +954,8 @@ def test_custom_grouper(self): for f in funcs: g._cython_agg_general(f) - self.assertEqual(g.ngroups, 2593) - self.assertTrue(notnull(g.mean()).all()) + assert g.ngroups == 2593 + assert notna(g.mean()).all() # construct expected val arr = [1] + [5] * 2592 @@ -891,20 +971,21 @@ def test_custom_grouper(self): index=dti, dtype='float64') r = df.groupby(b).agg(np.sum) - self.assertEqual(len(r.columns), 10) - self.assertEqual(len(r.index), 2593) + assert len(r.columns) == 10 + assert len(r.index) == 2593 def test_resample_basic(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', name='index') s = Series(np.random.randn(14), index=rng) + result = s.resample('5min', closed='right', label='right').mean() exp_idx = date_range('1/1/2000', periods=4, freq='5min', name='index') expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=exp_idx) assert_series_equal(result, expected) - self.assertEqual(result.index.name, 'index') + assert result.index.name == 'index' result = s.resample('5min', closed='left', label='right').mean() @@ -920,6 +1001,20 @@ def test_resample_basic(self): expect = s.groupby(grouper).agg(lambda x: x[-1]) assert_series_equal(result, expect) + def test_resample_string_kwargs(self): + # Test for issue #19303 + rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', + name='index') + s = Series(np.random.randn(14), index=rng) + + # Check that wrong keyword argument strings raise an error + with pytest.raises(ValueError): + s.resample('5min', label='righttt').mean() + with pytest.raises(ValueError): + s.resample('5min', closed='righttt').mean() + with pytest.raises(ValueError): + s.resample('5min', convention='starttt').mean() + def test_resample_how(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min', name='index') @@ -932,7 +1027,7 @@ def test_resample_how(self): args = downsample_methods def _ohlc(group): - if isnull(group).all(): + if isna(group).all(): return np.repeat(np.nan, 4) return [group[0], group.max(), group.min(), group[-1]] @@ -948,7 +1043,7 @@ def _ohlc(group): '5min', closed='right', label='right'), arg)() expected = s.groupby(grouplist).agg(func) - self.assertEqual(result.index.name, 'index') + assert result.index.name == 'index' if arg == 'ohlc': expected = DataFrame(expected.values.tolist()) expected.columns = ['open', 'high', 'low', 'close'] @@ -972,17 +1067,17 @@ def test_numpy_compat(self): for func in ('min', 'max', 'sum', 'prod', 'mean', 'var', 'std'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(r, func), - func, 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(r, func), axis=1) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, func), + func, 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, func), axis=1) def test_resample_how_callables(self): # GH 7929 data = np.arange(5, dtype=np.int64) ind = pd.DatetimeIndex(start='2014-01-01', periods=len(data), freq='d') - df = pd.DataFrame({"A": data, "B": data}, index=ind) + df = DataFrame({"A": data, "B": data}, index=ind) def fn(x, a=1): return str(type(x)) @@ -1032,7 +1127,7 @@ def test_resample_timedelta_idempotency(self): # GH 12072 index = pd.timedelta_range('0', periods=9, freq='10L') - series = pd.Series(range(9), index=index) + series = Series(range(9), index=index) result = series.resample('10L').mean() expected = series assert_series_equal(result, expected) @@ -1106,52 +1201,51 @@ def test_resample_basic_from_daily(self): # to weekly result = s.resample('w-sun').last() - self.assertEqual(len(result), 3) - self.assertTrue((result.index.dayofweek == [6, 6, 6]).all()) - self.assertEqual(result.iloc[0], s['1/2/2005']) - self.assertEqual(result.iloc[1], s['1/9/2005']) - self.assertEqual(result.iloc[2], s.iloc[-1]) + assert len(result) == 3 + assert (result.index.dayofweek == [6, 6, 6]).all() + assert result.iloc[0] == s['1/2/2005'] + assert result.iloc[1] == s['1/9/2005'] + assert result.iloc[2] == s.iloc[-1] result = s.resample('W-MON').last() - self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [0, 0]).all()) - self.assertEqual(result.iloc[0], s['1/3/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert len(result) == 2 + assert (result.index.dayofweek == [0, 0]).all() + assert result.iloc[0] == s['1/3/2005'] + assert result.iloc[1] == s['1/10/2005'] result = s.resample('W-TUE').last() - self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [1, 1]).all()) - self.assertEqual(result.iloc[0], s['1/4/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert len(result) == 2 + assert (result.index.dayofweek == [1, 1]).all() + assert result.iloc[0] == s['1/4/2005'] + assert result.iloc[1] == s['1/10/2005'] result = s.resample('W-WED').last() - self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [2, 2]).all()) - self.assertEqual(result.iloc[0], s['1/5/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert len(result) == 2 + assert (result.index.dayofweek == [2, 2]).all() + assert result.iloc[0] == s['1/5/2005'] + assert result.iloc[1] == s['1/10/2005'] result = s.resample('W-THU').last() - self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [3, 3]).all()) - self.assertEqual(result.iloc[0], s['1/6/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert len(result) == 2 + assert (result.index.dayofweek == [3, 3]).all() + assert result.iloc[0] == s['1/6/2005'] + assert result.iloc[1] == s['1/10/2005'] result = s.resample('W-FRI').last() - self.assertEqual(len(result), 2) - self.assertTrue((result.index.dayofweek == [4, 4]).all()) - self.assertEqual(result.iloc[0], s['1/7/2005']) - self.assertEqual(result.iloc[1], s['1/10/2005']) + assert len(result) == 2 + assert (result.index.dayofweek == [4, 4]).all() + assert result.iloc[0] == s['1/7/2005'] + assert result.iloc[1] == s['1/10/2005'] # to biz day result = s.resample('B').last() - self.assertEqual(len(result), 7) - self.assertTrue((result.index.dayofweek == [ - 4, 0, 1, 2, 3, 4, 0 - ]).all()) - self.assertEqual(result.iloc[0], s['1/2/2005']) - self.assertEqual(result.iloc[1], s['1/3/2005']) - self.assertEqual(result.iloc[5], s['1/9/2005']) - self.assertEqual(result.index.name, 'index') + assert len(result) == 7 + assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all() + + assert result.iloc[0] == s['1/2/2005'] + assert result.iloc[1] == s['1/3/2005'] + assert result.iloc[5] == s['1/9/2005'] + assert result.index.name == 'index' def test_resample_upsampling_picked_but_not_correct(self): @@ -1160,7 +1254,7 @@ def test_resample_upsampling_picked_but_not_correct(self): series = Series(1, index=dates) result = series.resample('D').mean() - self.assertEqual(result.index[0], dates[0]) + assert result.index[0] == dates[0] # GH 5955 # incorrect deciding to upsample when the axis frequency matches the @@ -1221,7 +1315,7 @@ def test_resample_loffset(self): loffset=Minute(1)).mean() assert_series_equal(result, expected) - self.assertEqual(result.index.freq, Minute(5)) + assert result.index.freq == Minute(5) # from daily dti = DatetimeIndex(start=datetime(2005, 1, 1), @@ -1231,7 +1325,7 @@ def test_resample_loffset(self): # to weekly result = ser.resample('w-sun').last() expected = ser.resample('w-sun', loffset=-bday).last() - self.assertEqual(result.index[0] - bday, expected.index[0]) + assert result.index[0] - bday == expected.index[0] def test_resample_loffset_count(self): # GH 12725 @@ -1245,7 +1339,7 @@ def test_resample_loffset_count(self): date_range(start_time, periods=10, freq='10S') + timedelta(seconds=1) ) - expected = pd.Series(10, index=expected_index) + expected = Series(10, index=expected_index) assert_series_equal(result, expected) @@ -1264,25 +1358,25 @@ def test_resample_upsample(self): # to minutely, by padding result = s.resample('Min').pad() - self.assertEqual(len(result), 12961) - self.assertEqual(result[0], s[0]) - self.assertEqual(result[-1], s[-1]) + assert len(result) == 12961 + assert result[0] == s[0] + assert result[-1] == s[-1] - self.assertEqual(result.index.name, 'index') + assert result.index.name == 'index' def test_resample_how_method(self): # GH9915 - s = pd.Series([11, 22], - index=[Timestamp('2015-03-31 21:48:52.672000'), - Timestamp('2015-03-31 21:49:52.739000')]) - expected = pd.Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], - index=[Timestamp('2015-03-31 21:48:50'), - Timestamp('2015-03-31 21:49:00'), - Timestamp('2015-03-31 21:49:10'), - Timestamp('2015-03-31 21:49:20'), - Timestamp('2015-03-31 21:49:30'), - Timestamp('2015-03-31 21:49:40'), - Timestamp('2015-03-31 21:49:50')]) + s = Series([11, 22], + index=[Timestamp('2015-03-31 21:48:52.672000'), + Timestamp('2015-03-31 21:49:52.739000')]) + expected = Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], + index=[Timestamp('2015-03-31 21:48:50'), + Timestamp('2015-03-31 21:49:00'), + Timestamp('2015-03-31 21:49:10'), + Timestamp('2015-03-31 21:49:20'), + Timestamp('2015-03-31 21:49:30'), + Timestamp('2015-03-31 21:49:40'), + Timestamp('2015-03-31 21:49:50')]) assert_series_equal(s.resample("10S").mean(), expected) def test_resample_extra_index_point(self): @@ -1304,6 +1398,14 @@ def test_upsample_with_limit(self): expected = ts.reindex(result.index, method='ffill', limit=2) assert_series_equal(result, expected) + def test_nearest_upsample_with_limit(self): + rng = date_range('1/1/2000', periods=3, freq='5t') + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample('t').nearest(limit=2) + expected = ts.reindex(result.index, method='nearest', limit=2) + assert_series_equal(result, expected) + def test_resample_ohlc(self): s = self.series @@ -1311,20 +1413,20 @@ def test_resample_ohlc(self): expect = s.groupby(grouper).agg(lambda x: x[-1]) result = s.resample('5Min').ohlc() - self.assertEqual(len(result), len(expect)) - self.assertEqual(len(result.columns), 4) + assert len(result) == len(expect) + assert len(result.columns) == 4 xs = result.iloc[-2] - self.assertEqual(xs['open'], s[-6]) - self.assertEqual(xs['high'], s[-6:-1].max()) - self.assertEqual(xs['low'], s[-6:-1].min()) - self.assertEqual(xs['close'], s[-2]) + assert xs['open'] == s[-6] + assert xs['high'] == s[-6:-1].max() + assert xs['low'] == s[-6:-1].min() + assert xs['close'] == s[-2] xs = result.iloc[0] - self.assertEqual(xs['open'], s[0]) - self.assertEqual(xs['high'], s[:5].max()) - self.assertEqual(xs['low'], s[:5].min()) - self.assertEqual(xs['close'], s[4]) + assert xs['open'] == s[0] + assert xs['high'] == s[:5].max() + assert xs['low'] == s[:5].min() + assert xs['close'] == s[4] def test_resample_ohlc_result(self): @@ -1334,10 +1436,10 @@ def test_resample_ohlc_result(self): s = Series(range(len(index)), index=index) a = s.loc[:'4-15-2000'].resample('30T').ohlc() - self.assertIsInstance(a, DataFrame) + assert isinstance(a, DataFrame) b = s.loc[:'4-14-2000'].resample('30T').ohlc() - self.assertIsInstance(b, DataFrame) + assert isinstance(b, DataFrame) # GH12348 # raising on odd period @@ -1353,7 +1455,7 @@ def test_resample_ohlc_result(self): def test_resample_ohlc_dataframe(self): df = ( - pd.DataFrame({ + DataFrame({ 'PRICE': { Timestamp('2011-01-06 10:59:05', tz=None): 24990, Timestamp('2011-01-06 12:43:33', tz=None): 25499, @@ -1362,7 +1464,7 @@ def test_resample_ohlc_dataframe(self): Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, Timestamp('2011-01-06 12:54:09', tz=None): 100000000}}) - ).reindex_axis(['VOLUME', 'PRICE'], axis=1) + ).reindex(['VOLUME', 'PRICE'], axis=1) res = df.resample('H').ohlc() exp = pd.concat([df['VOLUME'].resample('H').ohlc(), df['PRICE'].resample('H').ohlc()], @@ -1401,9 +1503,9 @@ def test_resample_reresample(self): s = Series(np.random.rand(len(dti)), dti) bs = s.resample('B', closed='right', label='right').mean() result = bs.resample('8H').mean() - self.assertEqual(len(result), 22) - tm.assertIsInstance(result.index.freq, offsets.DateOffset) - self.assertEqual(result.index.freq, offsets.Hour(8)) + assert len(result) == 22 + assert isinstance(result.index.freq, offsets.DateOffset) + assert result.index.freq == offsets.Hour(8) def test_resample_timestamp_to_period(self): ts = _simple_ts('1/1/1990', '1/1/2000') @@ -1430,7 +1532,7 @@ def test_resample_timestamp_to_period(self): def test_ohlc_5min(self): def _ohlc(group): - if isnull(group).all(): + if isna(group).all(): return np.repeat(np.nan, 4) return [group[0], group.max(), group.min(), group[-1]] @@ -1440,13 +1542,13 @@ def _ohlc(group): resampled = ts.resample('5min', closed='right', label='right').ohlc() - self.assertTrue((resampled.loc['1/1/2000 00:00'] == ts[0]).all()) + assert (resampled.loc['1/1/2000 00:00'] == ts[0]).all() exp = _ohlc(ts[1:31]) - self.assertTrue((resampled.loc['1/1/2000 00:05'] == exp).all()) + assert (resampled.loc['1/1/2000 00:05'] == exp).all() exp = _ohlc(ts['1/1/2000 5:55:01':]) - self.assertTrue((resampled.loc['1/1/2000 6:00:00'] == exp).all()) + assert (resampled.loc['1/1/2000 6:00:00'] == exp).all() def test_downsample_non_unique(self): rng = date_range('1/1/2000', '2/29/2000') @@ -1456,7 +1558,7 @@ def test_downsample_non_unique(self): result = ts.resample('M').mean() expected = ts.groupby(lambda x: x.month).mean() - self.assertEqual(len(result), 2) + assert len(result) == 2 assert_almost_equal(result[0], expected[1]) assert_almost_equal(result[1], expected[2]) @@ -1466,7 +1568,7 @@ def test_asfreq_non_unique(self): rng2 = rng.repeat(2).values ts = Series(np.random.randn(len(rng2)), index=rng2) - self.assertRaises(Exception, ts.asfreq, 'B') + pytest.raises(Exception, ts.asfreq, 'B') def test_resample_axis1(self): rng = date_range('1/1/2000', '2/29/2000') @@ -1481,44 +1583,47 @@ def test_resample_panel(self): rng = date_range('1/1/2000', '6/30/2000') n = len(rng) - panel = Panel(np.random.randn(3, n, 5), - items=['one', 'two', 'three'], - major_axis=rng, - minor_axis=['a', 'b', 'c', 'd', 'e']) + with catch_warnings(record=True): + panel = Panel(np.random.randn(3, n, 5), + items=['one', 'two', 'three'], + major_axis=rng, + minor_axis=['a', 'b', 'c', 'd', 'e']) - result = panel.resample('M', axis=1).mean() + result = panel.resample('M', axis=1).mean() - def p_apply(panel, f): - result = {} - for item in panel.items: - result[item] = f(panel[item]) - return Panel(result, items=panel.items) + def p_apply(panel, f): + result = {} + for item in panel.items: + result[item] = f(panel[item]) + return Panel(result, items=panel.items) - expected = p_apply(panel, lambda x: x.resample('M').mean()) - tm.assert_panel_equal(result, expected) + expected = p_apply(panel, lambda x: x.resample('M').mean()) + tm.assert_panel_equal(result, expected) - panel2 = panel.swapaxes(1, 2) - result = panel2.resample('M', axis=2).mean() - expected = p_apply(panel2, lambda x: x.resample('M', axis=1).mean()) - tm.assert_panel_equal(result, expected) + panel2 = panel.swapaxes(1, 2) + result = panel2.resample('M', axis=2).mean() + expected = p_apply(panel2, + lambda x: x.resample('M', axis=1).mean()) + tm.assert_panel_equal(result, expected) def test_resample_panel_numpy(self): rng = date_range('1/1/2000', '6/30/2000') n = len(rng) - panel = Panel(np.random.randn(3, n, 5), - items=['one', 'two', 'three'], - major_axis=rng, - minor_axis=['a', 'b', 'c', 'd', 'e']) + with catch_warnings(record=True): + panel = Panel(np.random.randn(3, n, 5), + items=['one', 'two', 'three'], + major_axis=rng, + minor_axis=['a', 'b', 'c', 'd', 'e']) - result = panel.resample('M', axis=1).apply(lambda x: x.mean(1)) - expected = panel.resample('M', axis=1).mean() - tm.assert_panel_equal(result, expected) + result = panel.resample('M', axis=1).apply(lambda x: x.mean(1)) + expected = panel.resample('M', axis=1).mean() + tm.assert_panel_equal(result, expected) - panel = panel.swapaxes(1, 2) - result = panel.resample('M', axis=2).apply(lambda x: x.mean(2)) - expected = panel.resample('M', axis=2).mean() - tm.assert_panel_equal(result, expected) + panel = panel.swapaxes(1, 2) + result = panel.resample('M', axis=2).apply(lambda x: x.mean(2)) + expected = panel.resample('M', axis=2).mean() + tm.assert_panel_equal(result, expected) def test_resample_anchored_ticks(self): # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should @@ -1563,7 +1668,7 @@ def test_resample_base(self): resampled = ts.resample('5min', base=2).mean() exp_rng = date_range('12/31/1999 23:57:00', '1/1/2000 01:57', freq='5min') - self.assert_index_equal(resampled.index, exp_rng) + tm.assert_index_equal(resampled.index, exp_rng) def test_resample_base_with_timedeltaindex(self): @@ -1577,8 +1682,8 @@ def test_resample_base_with_timedeltaindex(self): exp_without_base = timedelta_range(start='0s', end='25s', freq='2s') exp_with_base = timedelta_range(start='5s', end='29s', freq='2s') - self.assert_index_equal(without_base.index, exp_without_base) - self.assert_index_equal(with_base.index, exp_with_base) + tm.assert_index_equal(without_base.index, exp_without_base) + tm.assert_index_equal(with_base.index, exp_with_base) def test_resample_categorical_data_with_timedeltaindex(self): # GH #12169 @@ -1589,7 +1694,7 @@ def test_resample_categorical_data_with_timedeltaindex(self): expected = DataFrame({'Group_obj': ['A', 'A'], 'Group': ['A', 'A']}, index=pd.to_timedelta([0, 10], unit='s')) - expected = expected.reindex_axis(['Group_obj', 'Group'], 1) + expected = expected.reindex(['Group_obj', 'Group'], axis=1) tm.assert_frame_equal(result, expected) def test_resample_daily_anchored(self): @@ -1609,16 +1714,14 @@ def test_resample_to_period_monthly_buglet(self): result = ts.resample('M', kind='period').mean() exp_index = period_range('Jan-2000', 'Dec-2000', freq='M') - self.assert_index_equal(result.index, exp_index) + tm.assert_index_equal(result.index, exp_index) def test_period_with_agg(self): # aggregate a period resampler with a lambda - s2 = pd.Series(np.random.randint(0, 5, 50), - index=pd.period_range('2012-01-01', - freq='H', - periods=50), - dtype='float64') + s2 = Series(np.random.randint(0, 5, 50), + index=pd.period_range('2012-01-01', freq='H', periods=50), + dtype='float64') expected = s2.to_timestamp().resample('D').mean().to_period() result = s2.resample('D').agg(lambda x: x.mean()) @@ -1633,9 +1736,9 @@ def test_resample_segfault(self): (2, datetime(2013, 10, 1, 18, 15), 1, 0), (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0)] - df = pd.DataFrame.from_records(all_wins_and_wagers, - columns=("ID", "timestamp", "A", "B") - ).set_index("timestamp") + df = DataFrame.from_records(all_wins_and_wagers, + columns=("ID", "timestamp", "A", "B") + ).set_index("timestamp") result = df.groupby("ID").resample("5min").sum() expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum()) assert_frame_equal(result, expected) @@ -1653,10 +1756,31 @@ def test_resample_dtype_preservation(self): ).set_index('date') result = df.resample('1D').ffill() - self.assertEqual(result.val.dtype, np.int32) + assert result.val.dtype == np.int32 result = df.groupby('group').resample('1D').ffill() - self.assertEqual(result.val.dtype, np.int32) + assert result.val.dtype == np.int32 + + def test_resample_dtype_coerceion(self): + + pytest.importorskip('scipy.interpolate') + + # GH 16361 + df = {"a": [1, 3, 1, 4]} + df = DataFrame(df, index=pd.date_range("2017-01-01", "2017-01-04")) + + expected = (df.astype("float64") + .resample("H") + .mean() + ["a"] + .interpolate("cubic") + ) + + result = df.resample("H")["a"].mean().interpolate("cubic") + tm.assert_series_equal(result, expected) + + result = df.resample("H").mean()["a"].interpolate("cubic") + tm.assert_series_equal(result, expected) def test_weekly_resample_buglet(self): # #1327 @@ -1684,7 +1808,7 @@ def test_nanosecond_resample_error(self): periods=10, freq='100n' ) - ts = pd.Series(range(len(indx)), index=indx) + ts = Series(range(len(indx)), index=indx) r = ts.resample(pd.tseries.offsets.Nano(100)) result = r.agg('mean') @@ -1693,7 +1817,7 @@ def test_nanosecond_resample_error(self): periods=10, freq='100n' ) - exp = pd.Series(range(len(exp_indx)), index=exp_indx) + exp = Series(range(len(exp_indx)), index=exp_indx) assert_series_equal(result, exp) @@ -1730,7 +1854,7 @@ def test_resample_anchored_intraday(self): ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h') resampled = ts.resample('M').mean() - self.assertEqual(len(resampled), 1) + assert len(resampled) == 1 def test_resample_anchored_monthstart(self): ts = _simple_ts('1/1/2000', '12/31/2002') @@ -1752,17 +1876,15 @@ def test_resample_anchored_multiday(self): ) | pd.date_range( '2014-10-15 23:00:00', periods=2, freq='2200L') - s = pd.Series(np.random.randn(5), index=index) + s = Series(np.random.randn(5), index=index) # Ensure left closing works result = s.resample('2200L').mean() - self.assertEqual(result.index[-1], - pd.Timestamp('2014-10-15 23:00:02.000')) + assert result.index[-1] == Timestamp('2014-10-15 23:00:02.000') # Ensure right closing works result = s.resample('2200L', label='right').mean() - self.assertEqual(result.index[-1], - pd.Timestamp('2014-10-15 23:00:04.200')) + assert result.index[-1] == Timestamp('2014-10-15 23:00:04.200') def test_corner_cases(self): # miscellaneous test coverage @@ -1772,18 +1894,18 @@ def test_corner_cases(self): result = ts.resample('5t', closed='right', label='left').mean() ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t') - self.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result.index, ex_index) len0pts = _simple_pts('2007-01', '2010-05', freq='M')[:0] # it works result = len0pts.resample('A-DEC').mean() - self.assertEqual(len(result), 0) + assert len(result) == 0 # resample to periods ts = _simple_ts('2000-04-28', '2000-04-30 11:00', freq='h') result = ts.resample('M', kind='period').mean() - self.assertEqual(len(result), 1) - self.assertEqual(result.index[0], Period('2000-04', freq='M')) + assert len(result) == 1 + assert result.index[0] == Period('2000-04', freq='M') def test_anchored_lowercase_buglet(self): dates = date_range('4/16/2012 20:00', periods=50000, freq='s') @@ -1798,7 +1920,7 @@ def test_upsample_apply_functions(self): ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('20min').aggregate(['mean', 'sum']) - tm.assertIsInstance(result, DataFrame) + assert isinstance(result, DataFrame) def test_resample_not_monotonic(self): rng = pd.date_range('2012-06-12', periods=200, freq='h') @@ -1844,10 +1966,12 @@ def test_how_lambda_functions(self): tm.assert_series_equal(result['foo'], foo_exp) tm.assert_series_equal(result['bar'], bar_exp) + # this is a MI Series, so comparing the names of the results + # doesn't make sense result = ts.resample('M').aggregate({'foo': lambda x: x.mean(), 'bar': lambda x: x.std(ddof=1)}) - tm.assert_series_equal(result['foo'], foo_exp) - tm.assert_series_equal(result['bar'], bar_exp) + tm.assert_series_equal(result['foo'], foo_exp, check_names=False) + tm.assert_series_equal(result['bar'], bar_exp, check_names=False) def test_resample_unequal_times(self): # #1772 @@ -1866,7 +1990,7 @@ def test_resample_consistency(self): # resample with bfill / limit / reindex consistency i30 = pd.date_range('2002-02-02', periods=4, freq='30T') - s = pd.Series(np.arange(4.), index=i30) + s = Series(np.arange(4.), index=i30) s[2] = np.NaN # Upsample by factor 3 with reindex() and resample() methods: @@ -1919,15 +2043,15 @@ def test_resample_nunique(self): # GH 12352 df = DataFrame({ - 'ID': {pd.Timestamp('2015-06-05 00:00:00'): '0010100903', - pd.Timestamp('2015-06-08 00:00:00'): '0010150847'}, - 'DATE': {pd.Timestamp('2015-06-05 00:00:00'): '2015-06-05', - pd.Timestamp('2015-06-08 00:00:00'): '2015-06-08'}}) + 'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903', + Timestamp('2015-06-08 00:00:00'): '0010150847'}, + 'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05', + Timestamp('2015-06-08 00:00:00'): '2015-06-08'}}) r = df.resample('D') g = df.groupby(pd.Grouper(freq='D')) - expected = df.groupby(pd.TimeGrouper('D')).ID.apply(lambda x: - x.nunique()) - self.assertEqual(expected.name, 'ID') + expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x: + x.nunique()) + assert expected.name == 'ID' for t in [r, g]: result = r.ID.nunique() @@ -1939,6 +2063,26 @@ def test_resample_nunique(self): result = df.ID.groupby(pd.Grouper(freq='D')).nunique() assert_series_equal(result, expected) + def test_resample_nunique_with_date_gap(self): + # GH 13453 + index = pd.date_range('1-1-2000', '2-15-2000', freq='h') + index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') + index3 = index.append(index2) + s = Series(range(len(index3)), index=index3, dtype='int64') + r = s.resample('M') + + # Since all elements are unique, these should all be the same + results = [ + r.count(), + r.nunique(), + r.agg(Series.nunique), + r.agg('nunique') + ] + + assert_series_equal(results[0], results[1]) + assert_series_equal(results[0], results[2]) + assert_series_equal(results[0], results[3]) + def test_resample_group_info(self): # GH10914 for n, k in product((10000, 100000), (10, 100, 1000)): dr = date_range(start='2015-08-27', periods=n // 10, freq='T') @@ -2133,60 +2277,38 @@ def test_resample_datetime_values(self): tm.assert_series_equal(res, exp) -class TestPeriodIndex(Base, tm.TestCase): +class TestPeriodIndex(Base): _index_factory = lambda x: period_range + @pytest.fixture + def _series_name(self): + return 'pi' + def create_series(self): + # TODO: replace calls to .create_series() by injecting the series + # fixture i = period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq='D') return Series(np.arange(len(i)), index=i, name='pi') - def test_asfreq_downsample(self): - - # series - s = self.create_series() - expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2))) - expected.index = expected.index.to_timestamp() - expected.index.freq = to_offset('2D') - - # this is a bug, this *should* return a PeriodIndex - # directly - # GH 12884 - result = s.resample('2D').asfreq() - assert_series_equal(result, expected) - - # frame - frame = s.to_frame('value') - expected = frame.reindex( - frame.index.take(np.arange(0, len(frame.index), 2))) - expected.index = expected.index.to_timestamp() - expected.index.freq = to_offset('2D') - result = frame.resample('2D').asfreq() - assert_frame_equal(result, expected) - - def test_asfreq_upsample(self): - - # this is a bug, this *should* return a PeriodIndex - # directly - # GH 12884 - s = self.create_series() - new_index = date_range(s.index[0].to_timestamp(how='start'), - (s.index[-1] + 1).to_timestamp(how='start'), - freq='1H', - closed='left') - expected = s.to_timestamp().reindex(new_index).to_period() - result = s.resample('1H').asfreq() - assert_series_equal(result, expected) - - frame = s.to_frame('value') - new_index = date_range(frame.index[0].to_timestamp(how='start'), - (frame.index[-1] + 1).to_timestamp(how='start'), - freq='1H', - closed='left') - expected = frame.to_timestamp().reindex(new_index).to_period() - result = frame.resample('1H').asfreq() - assert_frame_equal(result, expected) + @pytest.mark.parametrize('freq', ['2D', '1H', '2H']) + @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) + def test_asfreq(self, series_and_frame, freq, kind): + # GH 12884, 15944 + # make sure .asfreq() returns PeriodIndex (except kind='timestamp') + + obj = series_and_frame + if kind == 'timestamp': + expected = obj.to_timestamp().resample(freq).asfreq() + else: + start = obj.index[0].to_timestamp(how='start') + end = (obj.index[-1] + 1).to_timestamp(how='start') + new_index = date_range(start=start, end=end, freq=freq, + closed='left') + expected = obj.to_timestamp().reindex(new_index).to_period(freq) + result = obj.resample(freq, kind=kind).asfreq() + assert_almost_equal(result, expected) def test_asfreq_fill_value(self): # test for fill value during resampling, issue 3715 @@ -2207,21 +2329,19 @@ def test_asfreq_fill_value(self): result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0) assert_frame_equal(result, expected) - def test_selection(self): - index = self.create_series().index + @pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W']) + @pytest.mark.parametrize('kind', [None, 'period', 'timestamp']) + def test_selection(self, index, freq, kind): # This is a bug, these should be implemented # GH 14008 - df = pd.DataFrame({'date': index, - 'a': np.arange(len(index), dtype=np.int64)}, - index=pd.MultiIndex.from_arrays([ - np.arange(len(index), dtype=np.int64), - index], names=['v', 'd'])) - - with tm.assertRaises(NotImplementedError): - df.resample('2D', on='date') - - with tm.assertRaises(NotImplementedError): - df.resample('2D', level='d') + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame({'date': index, 'a': rng}, + index=pd.MultiIndex.from_arrays([rng, index], + names=['v', 'd'])) + with pytest.raises(NotImplementedError): + df.resample(freq, on='date', kind=kind) + with pytest.raises(NotImplementedError): + df.resample(freq, level='d', kind=kind) def test_annual_upsample_D_s_f(self): self._check_annual_upsample_cases('D', 'start', 'ffill') @@ -2283,20 +2403,19 @@ def test_basic_downsample(self): def test_not_subperiod(self): # These are incompatible period rules for resampling ts = _simple_pts('1/1/1990', '6/30/1995', freq='w-wed') - self.assertRaises(ValueError, lambda: ts.resample('a-dec').mean()) - self.assertRaises(ValueError, lambda: ts.resample('q-mar').mean()) - self.assertRaises(ValueError, lambda: ts.resample('M').mean()) - self.assertRaises(ValueError, lambda: ts.resample('w-thu').mean()) + pytest.raises(ValueError, lambda: ts.resample('a-dec').mean()) + pytest.raises(ValueError, lambda: ts.resample('q-mar').mean()) + pytest.raises(ValueError, lambda: ts.resample('M').mean()) + pytest.raises(ValueError, lambda: ts.resample('w-thu').mean()) - def test_basic_upsample(self): + @pytest.mark.parametrize('freq', ['D', '2D']) + def test_basic_upsample(self, freq): ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') result = ts.resample('a-dec').mean() - resampled = result.resample('D', convention='end').ffill() - - expected = result.to_timestamp('D', how='end') - expected = expected.asfreq('D', 'ffill').to_period() - + resampled = result.resample(freq, convention='end').ffill() + expected = result.to_timestamp(freq, how='end') + expected = expected.asfreq(freq, 'ffill').to_period(freq) assert_series_equal(resampled, expected) def test_upsample_with_limit(self): @@ -2362,22 +2481,21 @@ def test_resample_basic(self): result2 = s.resample('T', kind='period').mean() assert_series_equal(result2, expected) - def test_resample_count(self): - + @pytest.mark.parametrize('freq,expected_vals', [('M', [31, 29, 31, 9]), + ('2M', [31 + 29, 31 + 9])]) + def test_resample_count(self, freq, expected_vals): # GH12774 - series = pd.Series(1, index=pd.period_range(start='2000', - periods=100)) - result = series.resample('M').count() - - expected_index = pd.period_range(start='2000', freq='M', periods=4) - expected = pd.Series([31, 29, 31, 9], index=expected_index) - + series = Series(1, index=pd.period_range(start='2000', periods=100)) + result = series.resample(freq).count() + expected_index = pd.period_range(start='2000', freq=freq, + periods=len(expected_vals)) + expected = Series(expected_vals, index=expected_index) assert_series_equal(result, expected) def test_resample_same_freq(self): # GH12770 - series = pd.Series(range(3), index=pd.period_range( + series = Series(range(3), index=pd.period_range( start='2000', periods=3, freq='M')) expected = series @@ -2387,15 +2505,12 @@ def test_resample_same_freq(self): def test_resample_incompat_freq(self): - with self.assertRaises(IncompatibleFrequency): - pd.Series(range(3), index=pd.period_range( + with pytest.raises(IncompatibleFrequency): + Series(range(3), index=pd.period_range( start='2000', periods=3, freq='M')).resample('W').mean() def test_with_local_timezone_pytz(self): - # GH5430 - tm._skip_if_no_pytz() - import pytz - + # see gh-5430 local_timezone = pytz.timezone('America/Los_Angeles') start = datetime(year=2013, month=11, day=1, hour=0, minute=0, @@ -2406,7 +2521,7 @@ def test_with_local_timezone_pytz(self): index = pd.date_range(start, end, freq='H') - series = pd.Series(1, index=index) + series = Series(1, index=index) series = series.tz_convert(local_timezone) result = series.resample('D', kind='period').mean() @@ -2414,14 +2529,11 @@ def test_with_local_timezone_pytz(self): # Index is moved back a day with the timezone conversion from UTC to # Pacific expected_index = (pd.period_range(start=start, end=end, freq='D') - 1) - expected = pd.Series(1, index=expected_index) + expected = Series(1, index=expected_index) assert_series_equal(result, expected) def test_with_local_timezone_dateutil(self): - # GH5430 - tm._skip_if_no_dateutil() - import dateutil - + # see gh-5430 local_timezone = 'dateutil/America/Los_Angeles' start = datetime(year=2013, month=11, day=1, hour=0, minute=0, @@ -2432,7 +2544,7 @@ def test_with_local_timezone_dateutil(self): index = pd.date_range(start, end, freq='H', name='idx') - series = pd.Series(1, index=index) + series = Series(1, index=index) series = series.tz_convert(local_timezone) result = series.resample('D', kind='period').mean() @@ -2441,7 +2553,7 @@ def test_with_local_timezone_dateutil(self): # Pacific expected_index = (pd.period_range(start=start, end=end, freq='D', name='idx') - 1) - expected = pd.Series(1, index=expected_index) + expected = Series(1, index=expected_index) assert_series_equal(result, expected) def test_fill_method_and_how_upsample(self): @@ -2513,14 +2625,17 @@ def test_resample_fill_missing(self): def test_cant_fill_missing_dups(self): rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A') s = Series(np.random.randn(5), index=rng) - self.assertRaises(Exception, lambda: s.resample('A').ffill()) + pytest.raises(Exception, lambda: s.resample('A').ffill()) - def test_resample_5minute(self): + @pytest.mark.parametrize('freq', ['5min']) + @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) + def test_resample_5minute(self, freq, kind): rng = period_range('1/1/2000', '1/5/2000', freq='T') ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.resample('5min').mean() - expected = ts.to_timestamp().resample('5min').mean() + expected = ts.to_timestamp().resample(freq).mean() + if kind != 'timestamp': + expected = expected.to_period(freq) + result = ts.resample(freq, kind=kind).mean() assert_series_equal(result, expected) def test_upsample_daily_business_daily(self): @@ -2552,7 +2667,7 @@ def test_resample_weekly_all_na(self): result = ts.resample('W-THU').asfreq() - self.assertTrue(result.isnull().all()) + assert result.isna().all() result = ts.resample('W-THU').asfreq().ffill()[:-1] expected = ts.asfreq('W-THU').ffill() @@ -2630,7 +2745,7 @@ def test_closed_left_corner(self): ex_index = date_range(start='1/1/2012 9:30', freq='10min', periods=3) - self.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result.index, ex_index) assert_series_equal(result, exp) def test_quarterly_resampling(self): @@ -2652,13 +2767,41 @@ def test_resample_weekly_bug_1726(self): # it works! df.resample('W-MON', closed='left', label='left').first() + def test_resample_with_dst_time_change(self): + # GH 15549 + index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000], + tz='UTC').tz_convert('America/Chicago') + df = pd.DataFrame([1, 2], index=index) + result = df.resample('12h', closed='right', + label='right').last().ffill() + + expected_index_values = ['2016-03-09 12:00:00-06:00', + '2016-03-10 00:00:00-06:00', + '2016-03-10 12:00:00-06:00', + '2016-03-11 00:00:00-06:00', + '2016-03-11 12:00:00-06:00', + '2016-03-12 00:00:00-06:00', + '2016-03-12 12:00:00-06:00', + '2016-03-13 00:00:00-06:00', + '2016-03-13 13:00:00-05:00', + '2016-03-14 01:00:00-05:00', + '2016-03-14 13:00:00-05:00', + '2016-03-15 01:00:00-05:00', + '2016-03-15 13:00:00-05:00'] + index = pd.DatetimeIndex(expected_index_values, + tz='UTC').tz_convert('America/Chicago') + expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 2.0], index=index) + assert_frame_equal(result, expected) + def test_resample_bms_2752(self): # GH2753 - foo = pd.Series(index=pd.bdate_range('20000101', '20000201')) + foo = Series(index=pd.bdate_range('20000101', '20000201')) res1 = foo.resample("BMS").mean() res2 = foo.resample("BMS").mean().resample("B").mean() - self.assertEqual(res1.index[0], Timestamp('20000103')) - self.assertEqual(res1.index[0], res2.index[0]) + assert res1.index[0] == Timestamp('20000103') + assert res1.index[0] == res2.index[0] # def test_monthly_convention_span(self): # rng = period_range('2000-01', periods=3, freq='M') @@ -2740,10 +2883,96 @@ def test_evenly_divisible_with_no_extra_bins(self): result = df.resample('7D').sum() assert_frame_equal(result, expected) + @pytest.mark.parametrize('kind', ['period', None, 'timestamp']) + @pytest.mark.parametrize('agg_arg', ['mean', {'value': 'mean'}, ['mean']]) + def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg): + # make sure passing loffset returns DatetimeIndex in all cases + # basic method taken from Base.test_resample_loffset_arg_type() + df = frame + expected_means = [df.values[i:i + 2].mean() + for i in range(0, len(df.values), 2)] + expected_index = self.create_index(df.index[0], + periods=len(df.index) / 2, + freq='2D') + + # loffset coerces PeriodIndex to DateTimeIndex + expected_index = expected_index.to_timestamp() + expected_index += timedelta(hours=2) + expected = DataFrame({'value': expected_means}, index=expected_index) + + result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result_how = df.resample('2D', how=agg_arg, loffset='2H', + kind=kind) + if isinstance(agg_arg, list): + expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')]) + assert_frame_equal(result_agg, expected) + assert_frame_equal(result_how, expected) + + @pytest.mark.parametrize('freq, period_mult', [('H', 24), ('12H', 2)]) + @pytest.mark.parametrize('kind', [None, 'period']) + def test_upsampling_ohlc(self, freq, period_mult, kind): + # GH 13083 + pi = PeriodIndex(start='2000', freq='D', periods=10) + s = Series(range(len(pi)), index=pi) + expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) + + # timestamp-based resampling doesn't include all sub-periods + # of the last original period, so extend accordingly: + new_index = PeriodIndex(start='2000', freq=freq, + periods=period_mult * len(pi)) + expected = expected.reindex(new_index) + result = s.resample(freq, kind=kind).ohlc() + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('periods, values', + [([pd.NaT, '1970-01-01 00:00:00', pd.NaT, + '1970-01-01 00:00:02', '1970-01-01 00:00:03'], + [2, 3, 5, 7, 11]), + ([pd.NaT, pd.NaT, '1970-01-01 00:00:00', pd.NaT, + pd.NaT, pd.NaT, '1970-01-01 00:00:02', + '1970-01-01 00:00:03', pd.NaT, pd.NaT], + [1, 2, 3, 5, 6, 8, 7, 11, 12, 13])]) + @pytest.mark.parametrize('freq, expected_values', + [('1s', [3, np.NaN, 7, 11]), + ('2s', [3, int((7 + 11) / 2)]), + ('3s', [int((3 + 7) / 2), 11])]) + def test_resample_with_nat(self, periods, values, freq, expected_values): + # GH 13224 + index = PeriodIndex(periods, freq='S') + frame = DataFrame(values, index=index) + + expected_index = period_range('1970-01-01 00:00:00', + periods=len(expected_values), freq=freq) + expected = DataFrame(expected_values, index=expected_index) + result = frame.resample(freq).mean() + assert_frame_equal(result, expected) + + def test_resample_with_only_nat(self): + # GH 13224 + pi = PeriodIndex([pd.NaT] * 3, freq='S') + frame = DataFrame([2, 3, 5], index=pi) + expected_index = PeriodIndex(data=[], freq=pi.freq) + expected = DataFrame([], index=expected_index) + result = frame.resample('1s').mean() + assert_frame_equal(result, expected) + -class TestTimedeltaIndex(Base, tm.TestCase): +class TestTimedeltaIndex(Base): _index_factory = lambda x: timedelta_range + @pytest.fixture + def _index_start(self): + return '1 day' + + @pytest.fixture + def _index_end(self): + return '10 day' + + @pytest.fixture + def _series_name(self): + return 'tdi' + def create_series(self): i = timedelta_range('1 day', '10 day', freq='D') @@ -2762,9 +2991,9 @@ def test_asfreq_bug(self): assert_frame_equal(result, expected) -class TestResamplerGrouper(tm.TestCase): +class TestResamplerGrouper(object): - def setUp(self): + def setup_method(self, method): self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, 'B': np.arange(40)}, index=date_range('1/1/2000', @@ -2788,6 +3017,19 @@ def test_back_compat_v180(self): expected = df.groupby('A').resample('4s').mean().ffill() assert_frame_equal(result, expected) + def test_tab_complete_ipython6_warning(self, ip): + from IPython.core.completer import provisionalcompleter + code = dedent("""\ + import pandas.util.testing as tm + s = tm.makeTimeSeries() + rs = s.resample("D") + """) + ip.run_code(code) + + with tm.assert_produces_warning(None): + with provisionalcompleter('ignore'): + list(ip.Completer.completions('rs.', 1)) + def test_deferred_with_groupby(self): # GH 12486 @@ -2835,20 +3077,47 @@ def test_getitem_multiple(self): # GH 13174 # multiple calls after selection causing an issue with aliasing data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}] - df = pd.DataFrame(data, index=pd.date_range('2016-01-01', periods=2)) + df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2)) r = df.groupby('id').resample('1D') result = r['buyer'].count() - expected = pd.Series([1, 1], - index=pd.MultiIndex.from_tuples( - [(1, pd.Timestamp('2016-01-01')), - (2, pd.Timestamp('2016-01-02'))], - names=['id', None]), - name='buyer') + expected = Series([1, 1], + index=pd.MultiIndex.from_tuples( + [(1, Timestamp('2016-01-01')), + (2, Timestamp('2016-01-02'))], + names=['id', None]), + name='buyer') assert_series_equal(result, expected) result = r['buyer'].count() assert_series_equal(result, expected) + def test_groupby_resample_on_api_with_getitem(self): + # GH 17813 + df = pd.DataFrame({'id': list('aabbb'), + 'date': pd.date_range('1-1-2016', periods=5), + 'data': 1}) + exp = df.set_index('date').groupby('id').resample('2D')['data'].sum() + result = df.groupby('id').resample('2D', on='date')['data'].sum() + assert_series_equal(result, exp) + + def test_nearest(self): + + # GH 17496 + # Resample nearest + index = pd.date_range('1/1/2000', periods=3, freq='T') + result = Series(range(3), index=index).resample('20s').nearest() + + expected = Series( + [0, 0, 1, 1, 1, 2, 2], + index=pd.DatetimeIndex( + ['2000-01-01 00:00:00', '2000-01-01 00:00:20', + '2000-01-01 00:00:40', '2000-01-01 00:01:00', + '2000-01-01 00:01:20', '2000-01-01 00:01:40', + '2000-01-01 00:02:00'], + dtype='datetime64[ns]', + freq='20S')) + assert_series_equal(result, expected) + def test_methods(self): g = self.frame.groupby('A') r = g.resample('2s') @@ -2875,7 +3144,7 @@ def test_methods(self): expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)()) assert_series_equal(result, expected) - for f in ['backfill', 'ffill', 'asfreq']: + for f in ['nearest', 'backfill', 'ffill', 'asfreq']: result = getattr(r, f)() expected = g.apply(lambda x: getattr(x.resample('2s'), f)()) assert_frame_equal(result, expected) @@ -2909,6 +3178,25 @@ def f(x): result = g.apply(f) assert_frame_equal(result, expected) + def test_apply_with_mutated_index(self): + # GH 15169 + index = pd.date_range('1-1-2015', '12-31-15', freq='D') + df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index) + + def f(x): + s = Series([1, 2], index=['a', 'b']) + return s + + expected = df.groupby(pd.Grouper(freq='M')).apply(f) + + result = df.resample('M').apply(f) + assert_frame_equal(result, expected) + + # A case for series + expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f) + result = df['col1'].resample('M').apply(f) + assert_series_equal(result, expected) + def test_resample_groupby_with_label(self): # GH 13235 index = date_range('2000-01-01', freq='2D', periods=5) @@ -2935,20 +3223,19 @@ def test_consistency_with_window(self): df = self.frame expected = pd.Int64Index([1, 2, 3], name='A') result = df.groupby('A').resample('2s').mean() - self.assertEqual(result.index.nlevels, 2) + assert result.index.nlevels == 2 tm.assert_index_equal(result.index.levels[0], expected) result = df.groupby('A').rolling(20).mean() - self.assertEqual(result.index.nlevels, 2) + assert result.index.nlevels == 2 tm.assert_index_equal(result.index.levels[0], expected) def test_median_duplicate_columns(self): # GH 14233 - df = pd.DataFrame(np.random.randn(20, 3), - columns=list('aaa'), - index=pd.date_range('2012-01-01', - periods=20, freq='s')) + df = DataFrame(np.random.randn(20, 3), + columns=list('aaa'), + index=pd.date_range('2012-01-01', periods=20, freq='s')) df2 = df.copy() df2.columns = ['a', 'b', 'c'] expected = df2.resample('5s').median() @@ -2957,14 +3244,16 @@ def test_median_duplicate_columns(self): assert_frame_equal(result, expected) -class TestTimeGrouper(tm.TestCase): +class TestTimeGrouper(object): - def setUp(self): + def setup_method(self, method): self.ts = Series(np.random.randn(1000), index=date_range('1/1/2000', periods=1000)) def test_apply(self): - grouper = TimeGrouper('A', label='right', closed='right') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + grouper = pd.TimeGrouper(freq='A', label='right', closed='right') grouped = self.ts.groupby(grouper) @@ -2982,7 +3271,9 @@ def test_count(self): expected = self.ts.groupby(lambda x: x.year).count() - grouper = TimeGrouper('A', label='right', closed='right') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + grouper = pd.TimeGrouper(freq='A', label='right', closed='right') result = self.ts.groupby(grouper).count() expected.index = result.index assert_series_equal(result, expected) @@ -3014,25 +3305,27 @@ def test_apply_iteration(self): # it works! result = grouped.apply(f) - self.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.index, df.index) def test_panel_aggregation(self): ind = pd.date_range('1/1/2000', periods=100) data = np.random.randn(2, len(ind), 4) - wp = pd.Panel(data, items=['Item1', 'Item2'], major_axis=ind, - minor_axis=['A', 'B', 'C', 'D']) - tg = TimeGrouper('M', axis=1) - _, grouper, _ = tg._get_grouper(wp) - bingrouped = wp.groupby(grouper) - binagg = bingrouped.mean() + with catch_warnings(record=True): + wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, + minor_axis=['A', 'B', 'C', 'D']) - def f(x): - assert (isinstance(x, Panel)) - return x.mean(1) + tg = TimeGrouper('M', axis=1) + _, grouper, _ = tg._get_grouper(wp) + bingrouped = wp.groupby(grouper) + binagg = bingrouped.mean() + + def f(x): + assert (isinstance(x, Panel)) + return x.mean(1) - result = bingrouped.agg(f) - tm.assert_panel_equal(result, binagg) + result = bingrouped.agg(f) + tm.assert_panel_equal(result, binagg) def test_fails_on_no_datetime_index(self): index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') @@ -3043,19 +3336,13 @@ def test_fails_on_no_datetime_index(self): for name, func in zip(index_names, index_funcs): index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) - with tm.assertRaisesRegexp(TypeError, - "Only valid with DatetimeIndex, " - "TimedeltaIndex or PeriodIndex, " - "but got an instance of %r" % name): + with tm.assert_raises_regex(TypeError, + "Only valid with " + "DatetimeIndex, TimedeltaIndex " + "or PeriodIndex, but got an " + "instance of %r" % name): df.groupby(TimeGrouper('D')) - # PeriodIndex gives a specific error message - df = DataFrame({'a': np.random.randn(n)}, index=tm.makePeriodIndex(n)) - with tm.assertRaisesRegexp(TypeError, - "axis must be a DatetimeIndex, but " - "got an instance of 'PeriodIndex'"): - df.groupby(TimeGrouper('D')) - def test_aaa_group_order(self): # GH 12840 # check TimeGrouper perform stable sorts @@ -3135,8 +3422,45 @@ def test_aggregate_normal(self): assert_frame_equal(expected, dt_result) """ - def test_aggregate_with_nat(self): + @pytest.mark.parametrize('method, unit', [ + ('sum', 0), + ('prod', 1), + ]) + def test_resample_entirly_nat_window(self, method, unit): + s = pd.Series([0] * 2 + [np.nan] * 2, + index=pd.date_range('2017', periods=4)) + # 0 / 1 by default + result = methodcaller(method)(s.resample("2d")) + expected = pd.Series([0.0, unit], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = methodcaller(method, min_count=0)(s.resample("2d")) + expected = pd.Series([0.0, unit], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = methodcaller(method, min_count=1)(s.resample("2d")) + expected = pd.Series([0.0, np.nan], + index=pd.to_datetime(['2017-01-01', + '2017-01-03'])) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('func, fill_value', [ + ('min', np.nan), + ('max', np.nan), + ('sum', 0), + ('prod', 1), + ('count', 0), + ]) + def test_aggregate_with_nat(self, func, fill_value): # check TimeGrouper's aggregation is identical as normal groupby + # if NaT is included, 'var', 'std', 'mean', 'first','last' + # and 'nth' doesn't work yet n = 20 data = np.random.randn(n, 4).astype('int64') @@ -3150,39 +3474,78 @@ def test_aggregate_with_nat(self): normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - for func in ['min', 'max', 'sum', 'prod']: - normal_result = getattr(normal_grouped, func)() - dt_result = getattr(dt_grouped, func)() - pad = DataFrame([[np.nan, np.nan, np.nan, np.nan]], index=[3], - columns=['A', 'B', 'C', 'D']) - expected = normal_result.append(pad) - expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - assert_frame_equal(expected, dt_result) + normal_result = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() - for func in ['count']: - normal_result = getattr(normal_grouped, func)() - pad = DataFrame([[0, 0, 0, 0]], index=[3], - columns=['A', 'B', 'C', 'D']) - expected = normal_result.append(pad) - expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_frame_equal(expected, dt_result) + pad = DataFrame([[fill_value] * 4], index=[3], + columns=['A', 'B', 'C', 'D']) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_frame_equal(expected, dt_result) + assert dt_result.index.name == 'key' - for func in ['size']: - normal_result = getattr(normal_grouped, func)() - pad = Series([0], index=[3]) - expected = normal_result.append(pad) - expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', - periods=5, name='key') - dt_result = getattr(dt_grouped, func)() - assert_series_equal(expected, dt_result) - # GH 9925 - self.assertEqual(dt_result.index.name, 'key') + def test_aggregate_with_nat_size(self): + # GH 9925 + n = 20 + data = np.random.randn(n, 4).astype('int64') + normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) + dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, + datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 + + normal_grouped = normal_df.groupby('key') + dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - # if NaT is included, 'var', 'std', 'mean', 'first','last' - # and 'nth' doesn't work yet + normal_result = normal_grouped.size() + dt_result = dt_grouped.size() + + pad = Series([0], index=[3]) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start='2013-01-01', freq='D', + periods=5, name='key') + assert_series_equal(expected, dt_result) + assert dt_result.index.name == 'key' + + def test_repr(self): + # GH18203 + result = repr(TimeGrouper(key='A', freq='H')) + expected = ("TimeGrouper(key='A', freq=, axis=0, sort=True, " + "closed='left', label='left', how='mean', " + "convention='e', base=0)") + assert result == expected + + @pytest.mark.parametrize('method, unit', [ + ('sum', 0), + ('prod', 1), + ]) + def test_upsample_sum(self, method, unit): + s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H")) + resampled = s.resample("30T") + index = pd.to_datetime(['2017-01-01T00:00:00', + '2017-01-01T00:30:00', + '2017-01-01T01:00:00']) + + # 0 / 1 by default + result = methodcaller(method)(resampled) + expected = pd.Series([1, unit, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = methodcaller(method, min_count=0)(resampled) + expected = pd.Series([1, unit, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = methodcaller(method, min_count=1)(resampled) + expected = pd.Series([1, np.nan, 1], index=index) + tm.assert_series_equal(result, expected) + + # min_count>1 + result = methodcaller(method, min_count=2)(resampled) + expected = pd.Series([np.nan, np.nan, np.nan], index=index) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py deleted file mode 100644 index d587e4ea6a1fa..0000000000000 --- a/pandas/tests/test_reshape.py +++ /dev/null @@ -1,952 +0,0 @@ -# -*- coding: utf-8 -*- -# pylint: disable-msg=W0612,E1101 - -from pandas import DataFrame, Series -from pandas.core.sparse import SparseDataFrame -import pandas as pd - -from numpy import nan -import numpy as np - -from pandas.util.testing import assert_frame_equal - -from pandas.core.reshape import (melt, lreshape, get_dummies, wide_to_long) -import pandas.util.testing as tm -from pandas.compat import range, u - - -class TestMelt(tm.TestCase): - - def setUp(self): - self.df = tm.makeTimeDataFrame()[:10] - self.df['id1'] = (self.df['A'] > 0).astype(np.int64) - self.df['id2'] = (self.df['B'] > 0).astype(np.int64) - - self.var_name = 'var' - self.value_name = 'val' - - self.df1 = pd.DataFrame([[1.067683, -1.110463, 0.20867 - ], [-1.321405, 0.368915, -1.055342], - [-0.807333, 0.08298, -0.873361]]) - self.df1.columns = [list('ABC'), list('abc')] - self.df1.columns.names = ['CAP', 'low'] - - def test_default_col_names(self): - result = melt(self.df) - self.assertEqual(result.columns.tolist(), ['variable', 'value']) - - result1 = melt(self.df, id_vars=['id1']) - self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value' - ]) - - result2 = melt(self.df, id_vars=['id1', 'id2']) - self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', - 'value']) - - def test_value_vars(self): - result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A') - self.assertEqual(len(result3), 10) - - result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) - expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, - 'id2': self.df['id2'].tolist() * 2, - 'variable': ['A'] * 10 + ['B'] * 10, - 'value': (self.df['A'].tolist() + - self.df['B'].tolist())}, - columns=['id1', 'id2', 'variable', 'value']) - tm.assert_frame_equal(result4, expected4) - - def test_value_vars_types(self): - # GH 15348 - expected = DataFrame({'id1': self.df['id1'].tolist() * 2, - 'id2': self.df['id2'].tolist() * 2, - 'variable': ['A'] * 10 + ['B'] * 10, - 'value': (self.df['A'].tolist() + - self.df['B'].tolist())}, - columns=['id1', 'id2', 'variable', 'value']) - - for type_ in (tuple, list, np.array): - result = melt(self.df, id_vars=['id1', 'id2'], - value_vars=type_(('A', 'B'))) - tm.assert_frame_equal(result, expected) - - def test_vars_work_with_multiindex(self): - expected = DataFrame({ - ('A', 'a'): self.df1[('A', 'a')], - 'CAP': ['B'] * len(self.df1), - 'low': ['b'] * len(self.df1), - 'value': self.df1[('B', 'b')], - }, columns=[('A', 'a'), 'CAP', 'low', 'value']) - - result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')]) - tm.assert_frame_equal(result, expected) - - def test_tuple_vars_fail_with_multiindex(self): - # melt should fail with an informative error message if - # the columns have a MultiIndex and a tuple is passed - # for id_vars or value_vars. - tuple_a = ('A', 'a') - list_a = [tuple_a] - tuple_b = ('B', 'b') - list_b = [tuple_b] - - for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), - (tuple_a, tuple_b)): - with tm.assertRaisesRegexp(ValueError, r'MultiIndex'): - melt(self.df1, id_vars=id_vars, value_vars=value_vars) - - def test_custom_var_name(self): - result5 = melt(self.df, var_name=self.var_name) - self.assertEqual(result5.columns.tolist(), ['var', 'value']) - - result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name) - self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) - - result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name) - self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', - 'value']) - - result8 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - var_name=self.var_name) - self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', - 'value']) - - result9 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - var_name=self.var_name) - expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, - 'id2': self.df['id2'].tolist() * 2, - self.var_name: ['A'] * 10 + ['B'] * 10, - 'value': (self.df['A'].tolist() + - self.df['B'].tolist())}, - columns=['id1', 'id2', self.var_name, 'value']) - tm.assert_frame_equal(result9, expected9) - - def test_custom_value_name(self): - result10 = melt(self.df, value_name=self.value_name) - self.assertEqual(result10.columns.tolist(), ['variable', 'val']) - - result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name) - self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) - - result12 = melt(self.df, id_vars=['id1', 'id2'], - value_name=self.value_name) - self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', - 'val']) - - result13 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - value_name=self.value_name) - self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', - 'val']) - - result14 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - value_name=self.value_name) - expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, - 'id2': self.df['id2'].tolist() * 2, - 'variable': ['A'] * 10 + ['B'] * 10, - self.value_name: (self.df['A'].tolist() + - self.df['B'].tolist())}, - columns=['id1', 'id2', 'variable', - self.value_name]) - tm.assert_frame_equal(result14, expected14) - - def test_custom_var_and_value_name(self): - - result15 = melt(self.df, var_name=self.var_name, - value_name=self.value_name) - self.assertEqual(result15.columns.tolist(), ['var', 'val']) - - result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, - value_name=self.value_name) - self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) - - result17 = melt(self.df, id_vars=['id1', 'id2'], - var_name=self.var_name, value_name=self.value_name) - self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val' - ]) - - result18 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - var_name=self.var_name, value_name=self.value_name) - self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val' - ]) - - result19 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - var_name=self.var_name, value_name=self.value_name) - expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, - 'id2': self.df['id2'].tolist() * 2, - self.var_name: ['A'] * 10 + ['B'] * 10, - self.value_name: (self.df['A'].tolist() + - self.df['B'].tolist())}, - columns=['id1', 'id2', self.var_name, - self.value_name]) - tm.assert_frame_equal(result19, expected19) - - df20 = self.df.copy() - df20.columns.name = 'foo' - result20 = melt(df20) - self.assertEqual(result20.columns.tolist(), ['foo', 'value']) - - def test_col_level(self): - res1 = melt(self.df1, col_level=0) - res2 = melt(self.df1, col_level='CAP') - self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) - self.assertEqual(res2.columns.tolist(), ['CAP', 'value']) - - def test_multiindex(self): - res = pd.melt(self.df1) - self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value']) - - -class TestGetDummies(tm.TestCase): - - sparse = False - - def setUp(self): - self.df = DataFrame({'A': ['a', 'b', 'a'], - 'B': ['b', 'b', 'c'], - 'C': [1, 2, 3]}) - - def test_basic(self): - s_list = list('abc') - s_series = Series(s_list) - s_series_index = Series(s_list, list('ABC')) - - expected = DataFrame({'a': {0: 1, - 1: 0, - 2: 0}, - 'b': {0: 0, - 1: 1, - 2: 0}, - 'c': {0: 0, - 1: 0, - 2: 1}}, dtype=np.uint8) - assert_frame_equal(get_dummies(s_list, sparse=self.sparse), expected) - assert_frame_equal(get_dummies(s_series, sparse=self.sparse), expected) - - expected.index = list('ABC') - assert_frame_equal( - get_dummies(s_series_index, sparse=self.sparse), expected) - - def test_basic_types(self): - # GH 10531 - s_list = list('abc') - s_series = Series(s_list) - s_df = DataFrame({'a': [0, 1, 0, 1, 2], - 'b': ['A', 'A', 'B', 'C', 'C'], - 'c': [2, 3, 3, 3, 2]}) - - if not self.sparse: - exp_df_type = DataFrame - exp_blk_type = pd.core.internals.IntBlock - else: - exp_df_type = SparseDataFrame - exp_blk_type = pd.core.internals.SparseBlock - - self.assertEqual( - type(get_dummies(s_list, sparse=self.sparse)), exp_df_type) - self.assertEqual( - type(get_dummies(s_series, sparse=self.sparse)), exp_df_type) - - r = get_dummies(s_df, sparse=self.sparse, columns=s_df.columns) - self.assertEqual(type(r), exp_df_type) - - r = get_dummies(s_df, sparse=self.sparse, columns=['a']) - self.assertEqual(type(r[['a_0']]._data.blocks[0]), exp_blk_type) - self.assertEqual(type(r[['a_1']]._data.blocks[0]), exp_blk_type) - self.assertEqual(type(r[['a_2']]._data.blocks[0]), exp_blk_type) - - def test_just_na(self): - just_na_list = [np.nan] - just_na_series = Series(just_na_list) - just_na_series_index = Series(just_na_list, index=['A']) - - res_list = get_dummies(just_na_list, sparse=self.sparse) - res_series = get_dummies(just_na_series, sparse=self.sparse) - res_series_index = get_dummies(just_na_series_index, - sparse=self.sparse) - - self.assertEqual(res_list.empty, True) - self.assertEqual(res_series.empty, True) - self.assertEqual(res_series_index.empty, True) - - self.assertEqual(res_list.index.tolist(), [0]) - self.assertEqual(res_series.index.tolist(), [0]) - self.assertEqual(res_series_index.index.tolist(), ['A']) - - def test_include_na(self): - s = ['a', 'b', np.nan] - res = get_dummies(s, sparse=self.sparse) - exp = DataFrame({'a': {0: 1, 1: 0, 2: 0}, - 'b': {0: 0, 1: 1, 2: 0}}, dtype=np.uint8) - assert_frame_equal(res, exp) - - # Sparse dataframes do not allow nan labelled columns, see #GH8822 - res_na = get_dummies(s, dummy_na=True, sparse=self.sparse) - exp_na = DataFrame({nan: {0: 0, 1: 0, 2: 1}, - 'a': {0: 1, 1: 0, 2: 0}, - 'b': {0: 0, 1: 1, 2: 0}}, - dtype=np.uint8) - exp_na = exp_na.reindex_axis(['a', 'b', nan], 1) - # hack (NaN handling in assert_index_equal) - exp_na.columns = res_na.columns - assert_frame_equal(res_na, exp_na) - - res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse) - exp_just_na = DataFrame(Series(1, index=[0]), columns=[nan], - dtype=np.uint8) - tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values) - - def test_unicode(self - ): # See GH 6885 - get_dummies chokes on unicode values - import unicodedata - e = 'e' - eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE') - s = [e, eacute, eacute] - res = get_dummies(s, prefix='letter', sparse=self.sparse) - exp = DataFrame({'letter_e': {0: 1, - 1: 0, - 2: 0}, - u('letter_%s') % eacute: {0: 0, - 1: 1, - 2: 1}}, - dtype=np.uint8) - assert_frame_equal(res, exp) - - def test_dataframe_dummies_all_obj(self): - df = self.df[['A', 'B']] - result = get_dummies(df, sparse=self.sparse) - expected = DataFrame({'A_a': [1, 0, 1], - 'A_b': [0, 1, 0], - 'B_b': [1, 1, 0], - 'B_c': [0, 0, 1]}, dtype=np.uint8) - assert_frame_equal(result, expected) - - def test_dataframe_dummies_mix_default(self): - df = self.df - result = get_dummies(df, sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3], - 'A_a': [1, 0, 1], - 'A_b': [0, 1, 0], - 'B_b': [1, 1, 0], - 'B_c': [0, 0, 1]}) - cols = ['A_a', 'A_b', 'B_b', 'B_c'] - expected[cols] = expected[cols].astype(np.uint8) - expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']] - assert_frame_equal(result, expected) - - def test_dataframe_dummies_prefix_list(self): - prefixes = ['from_A', 'from_B'] - df = DataFrame({'A': ['a', 'b', 'a'], - 'B': ['b', 'b', 'c'], - 'C': [1, 2, 3]}) - result = get_dummies(df, prefix=prefixes, sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3], - 'from_A_a': [1, 0, 1], - 'from_A_b': [0, 1, 0], - 'from_B_b': [1, 1, 0], - 'from_B_c': [0, 0, 1]}) - cols = expected.columns[1:] - expected[cols] = expected[cols].astype(np.uint8) - expected = expected[['C', 'from_A_a', 'from_A_b', 'from_B_b', - 'from_B_c']] - assert_frame_equal(result, expected) - - def test_dataframe_dummies_prefix_str(self): - # not that you should do this... - df = self.df - result = get_dummies(df, prefix='bad', sparse=self.sparse) - expected = DataFrame([[1, 1, 0, 1, 0], - [2, 0, 1, 1, 0], - [3, 1, 0, 0, 1]], - columns=['C', 'bad_a', 'bad_b', 'bad_b', 'bad_c'], - dtype=np.uint8) - expected = expected.astype({"C": np.int64}) - assert_frame_equal(result, expected) - - def test_dataframe_dummies_subset(self): - df = self.df - result = get_dummies(df, prefix=['from_A'], columns=['A'], - sparse=self.sparse) - expected = DataFrame({'from_A_a': [1, 0, 1], - 'from_A_b': [0, 1, 0], - 'B': ['b', 'b', 'c'], - 'C': [1, 2, 3]}) - cols = ['from_A_a', 'from_A_b'] - expected[cols] = expected[cols].astype(np.uint8) - assert_frame_equal(result, expected) - - def test_dataframe_dummies_prefix_sep(self): - df = self.df - result = get_dummies(df, prefix_sep='..', sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3], - 'A..a': [1, 0, 1], - 'A..b': [0, 1, 0], - 'B..b': [1, 1, 0], - 'B..c': [0, 0, 1]}) - expected = expected[['C', 'A..a', 'A..b', 'B..b', 'B..c']] - cols = expected.columns[1:] - expected[cols] = expected[cols].astype(np.uint8) - assert_frame_equal(result, expected) - - result = get_dummies(df, prefix_sep=['..', '__'], sparse=self.sparse) - expected = expected.rename(columns={'B..b': 'B__b', 'B..c': 'B__c'}) - assert_frame_equal(result, expected) - - result = get_dummies(df, prefix_sep={'A': '..', - 'B': '__'}, sparse=self.sparse) - assert_frame_equal(result, expected) - - def test_dataframe_dummies_prefix_bad_length(self): - with tm.assertRaises(ValueError): - get_dummies(self.df, prefix=['too few'], sparse=self.sparse) - - def test_dataframe_dummies_prefix_sep_bad_length(self): - with tm.assertRaises(ValueError): - get_dummies(self.df, prefix_sep=['bad'], sparse=self.sparse) - - def test_dataframe_dummies_prefix_dict(self): - prefixes = {'A': 'from_A', 'B': 'from_B'} - df = DataFrame({'A': ['a', 'b', 'a'], - 'B': ['b', 'b', 'c'], - 'C': [1, 2, 3]}) - result = get_dummies(df, prefix=prefixes, sparse=self.sparse) - expected = DataFrame({'from_A_a': [1, 0, 1], - 'from_A_b': [0, 1, 0], - 'from_B_b': [1, 1, 0], - 'from_B_c': [0, 0, 1], - 'C': [1, 2, 3]}) - cols = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c'] - expected[cols] = expected[cols].astype(np.uint8) - assert_frame_equal(result, expected) - - def test_dataframe_dummies_with_na(self): - df = self.df - df.loc[3, :] = [np.nan, np.nan, np.nan] - result = get_dummies(df, dummy_na=True, sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3, np.nan], - 'A_a': [1, 0, 1, 0], - 'A_b': [0, 1, 0, 0], - 'A_nan': [0, 0, 0, 1], - 'B_b': [1, 1, 0, 0], - 'B_c': [0, 0, 1, 0], - 'B_nan': [0, 0, 0, 1]}) - cols = ['A_a', 'A_b', 'A_nan', 'B_b', 'B_c', 'B_nan'] - expected[cols] = expected[cols].astype(np.uint8) - expected = expected[['C', 'A_a', 'A_b', 'A_nan', - 'B_b', 'B_c', 'B_nan']] - assert_frame_equal(result, expected) - - result = get_dummies(df, dummy_na=False, sparse=self.sparse) - expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']] - assert_frame_equal(result, expected) - - def test_dataframe_dummies_with_categorical(self): - df = self.df - df['cat'] = pd.Categorical(['x', 'y', 'y']) - result = get_dummies(df, sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3], - 'A_a': [1, 0, 1], - 'A_b': [0, 1, 0], - 'B_b': [1, 1, 0], - 'B_c': [0, 0, 1], - 'cat_x': [1, 0, 0], - 'cat_y': [0, 1, 1]}) - cols = ['A_a', 'A_b', 'B_b', 'B_c', 'cat_x', 'cat_y'] - expected[cols] = expected[cols].astype(np.uint8) - expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c', - 'cat_x', 'cat_y']] - assert_frame_equal(result, expected) - - # GH12402 Add a new parameter `drop_first` to avoid collinearity - def test_basic_drop_first(self): - # Basic case - s_list = list('abc') - s_series = Series(s_list) - s_series_index = Series(s_list, list('ABC')) - - expected = DataFrame({'b': {0: 0, - 1: 1, - 2: 0}, - 'c': {0: 0, - 1: 0, - 2: 1}}, dtype=np.uint8) - - result = get_dummies(s_list, sparse=self.sparse, drop_first=True) - assert_frame_equal(result, expected) - - result = get_dummies(s_series, sparse=self.sparse, drop_first=True) - assert_frame_equal(result, expected) - - expected.index = list('ABC') - result = get_dummies(s_series_index, sparse=self.sparse, - drop_first=True) - assert_frame_equal(result, expected) - - def test_basic_drop_first_one_level(self): - # Test the case that categorical variable only has one level. - s_list = list('aaa') - s_series = Series(s_list) - s_series_index = Series(s_list, list('ABC')) - - expected = DataFrame(index=np.arange(3)) - - result = get_dummies(s_list, sparse=self.sparse, drop_first=True) - assert_frame_equal(result, expected) - - result = get_dummies(s_series, sparse=self.sparse, drop_first=True) - assert_frame_equal(result, expected) - - expected = DataFrame(index=list('ABC')) - result = get_dummies(s_series_index, sparse=self.sparse, - drop_first=True) - assert_frame_equal(result, expected) - - def test_basic_drop_first_NA(self): - # Test NA hadling together with drop_first - s_NA = ['a', 'b', np.nan] - res = get_dummies(s_NA, sparse=self.sparse, drop_first=True) - exp = DataFrame({'b': {0: 0, - 1: 1, - 2: 0}}, dtype=np.uint8) - assert_frame_equal(res, exp) - - res_na = get_dummies(s_NA, dummy_na=True, sparse=self.sparse, - drop_first=True) - exp_na = DataFrame({'b': {0: 0, - 1: 1, - 2: 0}, - nan: {0: 0, - 1: 0, - 2: 1}}, dtype=np.uint8).reindex_axis( - ['b', nan], 1) - assert_frame_equal(res_na, exp_na) - - res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse, - drop_first=True) - exp_just_na = DataFrame(index=np.arange(1)) - assert_frame_equal(res_just_na, exp_just_na) - - def test_dataframe_dummies_drop_first(self): - df = self.df[['A', 'B']] - result = get_dummies(df, sparse=self.sparse, drop_first=True) - expected = DataFrame({'A_b': [0, 1, 0], - 'B_c': [0, 0, 1]}, dtype=np.uint8) - assert_frame_equal(result, expected) - - def test_dataframe_dummies_drop_first_with_categorical(self): - df = self.df - df['cat'] = pd.Categorical(['x', 'y', 'y']) - result = get_dummies(df, sparse=self.sparse, drop_first=True) - expected = DataFrame({'C': [1, 2, 3], - 'A_b': [0, 1, 0], - 'B_c': [0, 0, 1], - 'cat_y': [0, 1, 1]}) - cols = ['A_b', 'B_c', 'cat_y'] - expected[cols] = expected[cols].astype(np.uint8) - expected = expected[['C', 'A_b', 'B_c', 'cat_y']] - assert_frame_equal(result, expected) - - def test_dataframe_dummies_drop_first_with_na(self): - df = self.df - df.loc[3, :] = [np.nan, np.nan, np.nan] - result = get_dummies(df, dummy_na=True, sparse=self.sparse, - drop_first=True) - expected = DataFrame({'C': [1, 2, 3, np.nan], - 'A_b': [0, 1, 0, 0], - 'A_nan': [0, 0, 0, 1], - 'B_c': [0, 0, 1, 0], - 'B_nan': [0, 0, 0, 1]}) - cols = ['A_b', 'A_nan', 'B_c', 'B_nan'] - expected[cols] = expected[cols].astype(np.uint8) - - expected = expected[['C', 'A_b', 'A_nan', 'B_c', 'B_nan']] - assert_frame_equal(result, expected) - - result = get_dummies(df, dummy_na=False, sparse=self.sparse, - drop_first=True) - expected = expected[['C', 'A_b', 'B_c']] - assert_frame_equal(result, expected) - - def test_int_int(self): - data = Series([1, 2, 1]) - result = pd.get_dummies(data) - expected = DataFrame([[1, 0], [0, 1], [1, 0]], columns=[1, 2], - dtype=np.uint8) - tm.assert_frame_equal(result, expected) - - data = Series(pd.Categorical(['a', 'b', 'a'])) - result = pd.get_dummies(data) - expected = DataFrame([[1, 0], [0, 1], [1, 0]], - columns=pd.Categorical(['a', 'b']), - dtype=np.uint8) - tm.assert_frame_equal(result, expected) - - def test_int_df(self): - data = DataFrame( - {'A': [1, 2, 1], - 'B': pd.Categorical(['a', 'b', 'a']), - 'C': [1, 2, 1], - 'D': [1., 2., 1.] - } - ) - columns = ['C', 'D', 'A_1', 'A_2', 'B_a', 'B_b'] - expected = DataFrame([ - [1, 1., 1, 0, 1, 0], - [2, 2., 0, 1, 0, 1], - [1, 1., 1, 0, 1, 0] - ], columns=columns) - expected[columns[2:]] = expected[columns[2:]].astype(np.uint8) - result = pd.get_dummies(data, columns=['A', 'B']) - tm.assert_frame_equal(result, expected) - - def test_dataframe_dummies_preserve_categorical_dtype(self): - # GH13854 - for ordered in [False, True]: - cat = pd.Categorical(list("xy"), categories=list("xyz"), - ordered=ordered) - result = get_dummies(cat) - - data = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.uint8) - cols = pd.CategoricalIndex(cat.categories, - categories=cat.categories, - ordered=ordered) - expected = DataFrame(data, columns=cols) - - tm.assert_frame_equal(result, expected) - - -class TestGetDummiesSparse(TestGetDummies): - sparse = True - - -class TestMakeAxisDummies(tm.TestCase): - - def test_preserve_categorical_dtype(self): - # GH13854 - for ordered in [False, True]: - cidx = pd.CategoricalIndex(list("xyz"), ordered=ordered) - midx = pd.MultiIndex(levels=[['a'], cidx], - labels=[[0, 0], [0, 1]]) - df = DataFrame([[10, 11]], index=midx) - - expected = DataFrame([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], - index=midx, columns=cidx) - - from pandas.core.reshape import make_axis_dummies - result = make_axis_dummies(df) - tm.assert_frame_equal(result, expected) - - result = make_axis_dummies(df, transform=lambda x: x) - tm.assert_frame_equal(result, expected) - - -class TestLreshape(tm.TestCase): - - def test_pairs(self): - data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', '21dec2008', - '11jan2009'], - 'birthwt': [1766, 3301, 1454, 3139, 4133], - 'id': [101, 102, 103, 104, 105], - 'sex': ['Male', 'Female', 'Female', 'Female', 'Female'], - 'visitdt1': ['11jan2009', '22dec2008', '04jan2009', - '29dec2008', '20jan2009'], - 'visitdt2': - ['21jan2009', nan, '22jan2009', '31dec2008', '03feb2009'], - 'visitdt3': ['05feb2009', nan, nan, '02jan2009', '15feb2009'], - 'wt1': [1823, 3338, 1549, 3298, 4306], - 'wt2': [2011.0, nan, 1892.0, 3338.0, 4575.0], - 'wt3': [2293.0, nan, nan, 3377.0, 4805.0]} - - df = DataFrame(data) - - spec = {'visitdt': ['visitdt%d' % i for i in range(1, 4)], - 'wt': ['wt%d' % i for i in range(1, 4)]} - result = lreshape(df, spec) - - exp_data = {'birthdt': - ['08jan2009', '20dec2008', '30dec2008', '21dec2008', - '11jan2009', '08jan2009', '30dec2008', '21dec2008', - '11jan2009', '08jan2009', '21dec2008', '11jan2009'], - 'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 1454, 3139, - 4133, 1766, 3139, 4133], - 'id': [101, 102, 103, 104, 105, 101, 103, 104, 105, 101, - 104, 105], - 'sex': ['Male', 'Female', 'Female', 'Female', 'Female', - 'Male', 'Female', 'Female', 'Female', 'Male', - 'Female', 'Female'], - 'visitdt': ['11jan2009', '22dec2008', '04jan2009', - '29dec2008', '20jan2009', '21jan2009', - '22jan2009', '31dec2008', '03feb2009', - '05feb2009', '02jan2009', '15feb2009'], - 'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, - 1892.0, 3338.0, 4575.0, 2293.0, 3377.0, 4805.0]} - exp = DataFrame(exp_data, columns=result.columns) - tm.assert_frame_equal(result, exp) - - result = lreshape(df, spec, dropna=False) - exp_data = {'birthdt': - ['08jan2009', '20dec2008', '30dec2008', '21dec2008', - '11jan2009', '08jan2009', '20dec2008', '30dec2008', - '21dec2008', '11jan2009', '08jan2009', '20dec2008', - '30dec2008', '21dec2008', '11jan2009'], - 'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 3301, 1454, - 3139, 4133, 1766, 3301, 1454, 3139, 4133], - 'id': [101, 102, 103, 104, 105, 101, 102, 103, 104, 105, - 101, 102, 103, 104, 105], - 'sex': ['Male', 'Female', 'Female', 'Female', 'Female', - 'Male', 'Female', 'Female', 'Female', 'Female', - 'Male', 'Female', 'Female', 'Female', 'Female'], - 'visitdt': ['11jan2009', '22dec2008', '04jan2009', - '29dec2008', '20jan2009', '21jan2009', nan, - '22jan2009', '31dec2008', '03feb2009', - '05feb2009', nan, nan, '02jan2009', - '15feb2009'], - 'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, nan, - 1892.0, 3338.0, 4575.0, 2293.0, nan, nan, 3377.0, - 4805.0]} - exp = DataFrame(exp_data, columns=result.columns) - tm.assert_frame_equal(result, exp) - - spec = {'visitdt': ['visitdt%d' % i for i in range(1, 3)], - 'wt': ['wt%d' % i for i in range(1, 4)]} - self.assertRaises(ValueError, lreshape, df, spec) - - -class TestWideToLong(tm.TestCase): - - def test_simple(self): - np.random.seed(123) - x = np.random.randn(3) - df = pd.DataFrame({"A1970": {0: "a", - 1: "b", - 2: "c"}, - "A1980": {0: "d", - 1: "e", - 2: "f"}, - "B1970": {0: 2.5, - 1: 1.2, - 2: .7}, - "B1980": {0: 3.2, - 1: 1.3, - 2: .1}, - "X": dict(zip( - range(3), x))}) - df["id"] = df.index - exp_data = {"X": x.tolist() + x.tolist(), - "A": ['a', 'b', 'c', 'd', 'e', 'f'], - "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], - "year": ['1970', '1970', '1970', '1980', '1980', '1980'], - "id": [0, 1, 2, 0, 1, 2]} - exp_frame = DataFrame(exp_data) - exp_frame = exp_frame.set_index(['id', 'year'])[["X", "A", "B"]] - long_frame = wide_to_long(df, ["A", "B"], i="id", j="year") - tm.assert_frame_equal(long_frame, exp_frame) - - def test_stubs(self): - # GH9204 - df = pd.DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]]) - df.columns = ['id', 'inc1', 'inc2', 'edu1', 'edu2'] - stubs = ['inc', 'edu'] - - # TODO: unused? - df_long = pd.wide_to_long(df, stubs, i='id', j='age') # noqa - - self.assertEqual(stubs, ['inc', 'edu']) - - def test_separating_character(self): - # GH14779 - np.random.seed(123) - x = np.random.randn(3) - df = pd.DataFrame({"A.1970": {0: "a", - 1: "b", - 2: "c"}, - "A.1980": {0: "d", - 1: "e", - 2: "f"}, - "B.1970": {0: 2.5, - 1: 1.2, - 2: .7}, - "B.1980": {0: 3.2, - 1: 1.3, - 2: .1}, - "X": dict(zip( - range(3), x))}) - df["id"] = df.index - exp_data = {"X": x.tolist() + x.tolist(), - "A": ['a', 'b', 'c', 'd', 'e', 'f'], - "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], - "year": ['1970', '1970', '1970', '1980', '1980', '1980'], - "id": [0, 1, 2, 0, 1, 2]} - exp_frame = DataFrame(exp_data) - exp_frame = exp_frame.set_index(['id', 'year'])[["X", "A", "B"]] - long_frame = wide_to_long(df, ["A", "B"], i="id", j="year", sep=".") - tm.assert_frame_equal(long_frame, exp_frame) - - def test_escapable_characters(self): - np.random.seed(123) - x = np.random.randn(3) - df = pd.DataFrame({"A(quarterly)1970": {0: "a", - 1: "b", - 2: "c"}, - "A(quarterly)1980": {0: "d", - 1: "e", - 2: "f"}, - "B(quarterly)1970": {0: 2.5, - 1: 1.2, - 2: .7}, - "B(quarterly)1980": {0: 3.2, - 1: 1.3, - 2: .1}, - "X": dict(zip( - range(3), x))}) - df["id"] = df.index - exp_data = {"X": x.tolist() + x.tolist(), - "A(quarterly)": ['a', 'b', 'c', 'd', 'e', 'f'], - "B(quarterly)": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], - "year": ['1970', '1970', '1970', '1980', '1980', '1980'], - "id": [0, 1, 2, 0, 1, 2]} - exp_frame = DataFrame(exp_data) - exp_frame = exp_frame.set_index( - ['id', 'year'])[["X", "A(quarterly)", "B(quarterly)"]] - long_frame = wide_to_long(df, ["A(quarterly)", "B(quarterly)"], - i="id", j="year") - tm.assert_frame_equal(long_frame, exp_frame) - - def test_unbalanced(self): - # test that we can have a varying amount of time variables - df = pd.DataFrame({'A2010': [1.0, 2.0], - 'A2011': [3.0, 4.0], - 'B2010': [5.0, 6.0], - 'X': ['X1', 'X2']}) - df['id'] = df.index - exp_data = {'X': ['X1', 'X1', 'X2', 'X2'], - 'A': [1.0, 3.0, 2.0, 4.0], - 'B': [5.0, np.nan, 6.0, np.nan], - 'id': [0, 0, 1, 1], - 'year': ['2010', '2011', '2010', '2011']} - exp_frame = pd.DataFrame(exp_data) - exp_frame = exp_frame.set_index(['id', 'year'])[["X", "A", "B"]] - long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year') - tm.assert_frame_equal(long_frame, exp_frame) - - def test_character_overlap(self): - # Test we handle overlapping characters in both id_vars and value_vars - df = pd.DataFrame({ - 'A11': ['a11', 'a22', 'a33'], - 'A12': ['a21', 'a22', 'a23'], - 'B11': ['b11', 'b12', 'b13'], - 'B12': ['b21', 'b22', 'b23'], - 'BB11': [1, 2, 3], - 'BB12': [4, 5, 6], - 'BBBX': [91, 92, 93], - 'BBBZ': [91, 92, 93] - }) - df['id'] = df.index - exp_frame = pd.DataFrame({ - 'BBBX': [91, 92, 93, 91, 92, 93], - 'BBBZ': [91, 92, 93, 91, 92, 93], - 'A': ['a11', 'a22', 'a33', 'a21', 'a22', 'a23'], - 'B': ['b11', 'b12', 'b13', 'b21', 'b22', 'b23'], - 'BB': [1, 2, 3, 4, 5, 6], - 'id': [0, 1, 2, 0, 1, 2], - 'year': ['11', '11', '11', '12', '12', '12']}) - exp_frame = exp_frame.set_index(['id', 'year'])[ - ['BBBX', 'BBBZ', 'A', 'B', 'BB']] - long_frame = wide_to_long(df, ['A', 'B', 'BB'], i='id', j='year') - tm.assert_frame_equal(long_frame.sort_index(axis=1), - exp_frame.sort_index(axis=1)) - - def test_invalid_separator(self): - # if an invalid separator is supplied a empty data frame is returned - sep = 'nope!' - df = pd.DataFrame({'A2010': [1.0, 2.0], - 'A2011': [3.0, 4.0], - 'B2010': [5.0, 6.0], - 'X': ['X1', 'X2']}) - df['id'] = df.index - exp_data = {'X': '', - 'A2010': [], - 'A2011': [], - 'B2010': [], - 'id': [], - 'year': [], - 'A': [], - 'B': []} - exp_frame = pd.DataFrame(exp_data) - exp_frame = exp_frame.set_index(['id', 'year'])[[ - 'X', 'A2010', 'A2011', 'B2010', 'A', 'B']] - exp_frame.index.set_levels([[0, 1], []], inplace=True) - long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep) - tm.assert_frame_equal(long_frame.sort_index(axis=1), - exp_frame.sort_index(axis=1)) - - def test_num_string_disambiguation(self): - # Test that we can disambiguate number value_vars from - # string value_vars - df = pd.DataFrame({ - 'A11': ['a11', 'a22', 'a33'], - 'A12': ['a21', 'a22', 'a23'], - 'B11': ['b11', 'b12', 'b13'], - 'B12': ['b21', 'b22', 'b23'], - 'BB11': [1, 2, 3], - 'BB12': [4, 5, 6], - 'Arating': [91, 92, 93], - 'Arating_old': [91, 92, 93] - }) - df['id'] = df.index - exp_frame = pd.DataFrame({ - 'Arating': [91, 92, 93, 91, 92, 93], - 'Arating_old': [91, 92, 93, 91, 92, 93], - 'A': ['a11', 'a22', 'a33', 'a21', 'a22', 'a23'], - 'B': ['b11', 'b12', 'b13', 'b21', 'b22', 'b23'], - 'BB': [1, 2, 3, 4, 5, 6], - 'id': [0, 1, 2, 0, 1, 2], - 'year': ['11', '11', '11', '12', '12', '12']}) - exp_frame = exp_frame.set_index(['id', 'year'])[ - ['Arating', 'Arating_old', 'A', 'B', 'BB']] - long_frame = wide_to_long(df, ['A', 'B', 'BB'], i='id', j='year') - tm.assert_frame_equal(long_frame.sort_index(axis=1), - exp_frame.sort_index(axis=1)) - - def test_invalid_suffixtype(self): - # If all stubs names end with a string, but a numeric suffix is - # assumed, an empty data frame is returned - df = pd.DataFrame({'Aone': [1.0, 2.0], - 'Atwo': [3.0, 4.0], - 'Bone': [5.0, 6.0], - 'X': ['X1', 'X2']}) - df['id'] = df.index - exp_data = {'X': '', - 'Aone': [], - 'Atwo': [], - 'Bone': [], - 'id': [], - 'year': [], - 'A': [], - 'B': []} - exp_frame = pd.DataFrame(exp_data) - exp_frame = exp_frame.set_index(['id', 'year'])[[ - 'X', 'Aone', 'Atwo', 'Bone', 'A', 'B']] - exp_frame.index.set_levels([[0, 1], []], inplace=True) - long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year') - tm.assert_frame_equal(long_frame.sort_index(axis=1), - exp_frame.sort_index(axis=1)) - - def test_multiple_id_columns(self): - # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm - df = pd.DataFrame({ - 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], - 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], - 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], - 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] - }) - exp_frame = pd.DataFrame({ - 'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8, - 2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9], - 'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], - 'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], - 'age': ['1', '2', '1', '2', '1', '2', '1', '2', '1', - '2', '1', '2', '1', '2', '1', '2', '1', '2'] - }) - exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']] - long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') - tm.assert_frame_equal(long_frame, exp_frame) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py new file mode 100644 index 0000000000000..d0350ba252329 --- /dev/null +++ b/pandas/tests/test_sorting.py @@ -0,0 +1,437 @@ +import pytest +from itertools import product +from collections import defaultdict +import warnings +from datetime import datetime + +import numpy as np +from numpy import nan +from pandas.core import common as com +from pandas import (DataFrame, MultiIndex, merge, concat, Series, compat, + _np_version_under1p10) +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.core.sorting import (is_int64_overflow_possible, + decons_group_index, + get_group_index, + nargsort, + lexsort_indexer, + safe_sort) + + +class TestSorting(object): + + @pytest.mark.slow + def test_int64_overflow(self): + + B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) + A = np.arange(2500) + df = DataFrame({'A': A, + 'B': B, + 'C': A, + 'D': B, + 'E': A, + 'F': B, + 'G': A, + 'H': B, + 'values': np.random.randn(2500)}) + + lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) + rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']) + + left = lg.sum()['values'] + right = rg.sum()['values'] + + exp_index, _ = left.index.sortlevel() + tm.assert_index_equal(left.index, exp_index) + + exp_index, _ = right.index.sortlevel(0) + tm.assert_index_equal(right.index, exp_index) + + tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H' + ]].values)) + tups = com._asarray_tuplesafe(tups) + + expected = df.groupby(tups).sum()['values'] + + for k, v in compat.iteritems(expected): + assert left[k] == right[k[::-1]] + assert left[k] == v + assert len(left) == len(right) + + def test_int64_overflow_moar(self): + + # GH9096 + values = range(55109) + data = DataFrame.from_dict( + {'a': values, 'b': values, 'c': values, 'd': values}) + grouped = data.groupby(['a', 'b', 'c', 'd']) + assert len(grouped) == len(values) + + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) + i = np.random.choice(len(arr), len(arr) * 4) + arr = np.vstack((arr, arr[i])) # add sume duplicate rows + + i = np.random.permutation(len(arr)) + arr = arr[i] # shuffle rows + + df = DataFrame(arr, columns=list('abcde')) + df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10 + gr = df.groupby(list('abcde')) + + # verify this is testing what it is supposed to test! + assert is_int64_overflow_possible(gr.grouper.shape) + + # manually compute groupings + jim, joe = defaultdict(list), defaultdict(list) + for key, a, b in zip(map(tuple, arr), df['jim'], df['joe']): + jim[key].append(a) + joe[key].append(b) + + assert len(gr) == len(jim) + mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde')) + + def aggr(func): + f = lambda a: np.fromiter(map(func, a), dtype='f8') + arr = np.vstack((f(jim.values()), f(joe.values()))).T + res = DataFrame(arr, columns=['jim', 'joe'], index=mi) + return res.sort_index() + + assert_frame_equal(gr.mean(), aggr(np.mean)) + assert_frame_equal(gr.median(), aggr(np.median)) + + def test_lexsort_indexer(self): + keys = [[nan] * 5 + list(range(100)) + [nan] * 5] + # orders=True, na_position='last' + result = lexsort_indexer(keys, orders=True, na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=True, na_position='first' + result = lexsort_indexer(keys, orders=True, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='last' + result = lexsort_indexer(keys, orders=False, na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='first' + result = lexsort_indexer(keys, orders=False, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + def test_nargsort(self): + # np.argsort(items) places NaNs last + items = [nan] * 5 + list(range(100)) + [nan] * 5 + # np.argsort(items2) may not place NaNs first + items2 = np.array(items, dtype='O') + + try: + # GH 2785; due to a regression in NumPy1.6.2 + np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) + np.argsort(items2, kind='mergesort') + except TypeError: + pytest.skip('requested sort not available for type') + + # mergesort is the most difficult to get right because we want it to be + # stable. + + # According to numpy/core/tests/test_multiarray, """The number of + # sorted items must be greater than ~50 to check the actual algorithm + # because quick and merge sort fall over to insertion sort for small + # arrays.""" + + # mergesort, ascending=True, na_position='last' + result = nargsort(items, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = nargsort(items, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = nargsort(items, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = nargsort(items, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='last' + result = nargsort(items2, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = nargsort(items2, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = nargsort(items2, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = nargsort(items2, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + +class TestMerge(object): + + @pytest.mark.slow + def test_int64_overflow_issues(self): + + # #2690, combinatorial explosion + df1 = DataFrame(np.random.randn(1000, 7), + columns=list('ABCDEF') + ['G1']) + df2 = DataFrame(np.random.randn(1000, 7), + columns=list('ABCDEF') + ['G2']) + + # it works! + result = merge(df1, df2, how='outer') + assert len(result) == 2000 + + low, high, n = -1 << 10, 1 << 10, 1 << 20 + left = DataFrame(np.random.randint(low, high, (n, 7)), + columns=list('ABCDEFG')) + left['left'] = left.sum(axis=1) + + # one-2-one match + i = np.random.permutation(len(left)) + right = left.iloc[i].copy() + right.columns = right.columns[:-1].tolist() + ['right'] + right.index = np.arange(len(right)) + right['right'] *= -1 + + out = merge(left, right, how='outer') + assert len(out) == len(left) + assert_series_equal(out['left'], - out['right'], check_names=False) + result = out.iloc[:, :-2].sum(axis=1) + assert_series_equal(out['left'], result, check_names=False) + assert result.name is None + + out.sort_values(out.columns.tolist(), inplace=True) + out.index = np.arange(len(out)) + for how in ['left', 'right', 'outer', 'inner']: + assert_frame_equal(out, merge(left, right, how=how, sort=True)) + + # check that left merge w/ sort=False maintains left frame order + out = merge(left, right, how='left', sort=False) + assert_frame_equal(left, out[left.columns.tolist()]) + + out = merge(right, left, how='left', sort=False) + assert_frame_equal(right, out[right.columns.tolist()]) + + # one-2-many/none match + n = 1 << 11 + left = DataFrame(np.random.randint(low, high, (n, 7)).astype('int64'), + columns=list('ABCDEFG')) + + # confirm that this is checking what it is supposed to check + shape = left.apply(Series.nunique).values + assert is_int64_overflow_possible(shape) + + # add duplicates to left frame + left = concat([left, left], ignore_index=True) + + right = DataFrame(np.random.randint(low, high, (n // 2, 7)) + .astype('int64'), + columns=list('ABCDEFG')) + + # add duplicates & overlap with left to the right frame + i = np.random.choice(len(left), n) + right = concat([right, right, left.iloc[i]], ignore_index=True) + + left['left'] = np.random.randn(len(left)) + right['right'] = np.random.randn(len(right)) + + # shuffle left & right frames + i = np.random.permutation(len(left)) + left = left.iloc[i].copy() + left.index = np.arange(len(left)) + + i = np.random.permutation(len(right)) + right = right.iloc[i].copy() + right.index = np.arange(len(right)) + + # manually compute outer merge + ldict, rdict = defaultdict(list), defaultdict(list) + + for idx, row in left.set_index(list('ABCDEFG')).iterrows(): + ldict[idx].append(row['left']) + + for idx, row in right.set_index(list('ABCDEFG')).iterrows(): + rdict[idx].append(row['right']) + + vals = [] + for k, lval in ldict.items(): + rval = rdict.get(k, [np.nan]) + for lv, rv in product(lval, rval): + vals.append(k + tuple([lv, rv])) + + for k, rval in rdict.items(): + if k not in ldict: + for rv in rval: + vals.append(k + tuple([np.nan, rv])) + + def align(df): + df = df.sort_values(df.columns.tolist()) + df.index = np.arange(len(df)) + return df + + def verify_order(df): + kcols = list('ABCDEFG') + assert_frame_equal(df[kcols].copy(), + df[kcols].sort_values(kcols, kind='mergesort')) + + out = DataFrame(vals, columns=list('ABCDEFG') + ['left', 'right']) + out = align(out) + + jmask = {'left': out['left'].notna(), + 'right': out['right'].notna(), + 'inner': out['left'].notna() & out['right'].notna(), + 'outer': np.ones(len(out), dtype='bool')} + + for how in 'left', 'right', 'outer', 'inner': + mask = jmask[how] + frame = align(out[mask].copy()) + assert mask.all() ^ mask.any() or how == 'outer' + + for sort in [False, True]: + res = merge(left, right, how=how, sort=sort) + if sort: + verify_order(res) + + # as in GH9092 dtypes break with outer/right join + assert_frame_equal(frame, align(res), + check_dtype=how not in ('right', 'outer')) + + +def test_decons(): + + def testit(label_list, shape): + group_index = get_group_index(label_list, shape, sort=True, xnull=True) + label_list2 = decons_group_index(group_index, shape) + + for a, b in zip(label_list, label_list2): + tm.assert_numpy_array_equal(a, b) + + shape = (4, 5, 6) + label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100).astype(np.int64), + np.tile([0, 2, 4, 3, 0, 1, 2, 3], 100).astype(np.int64), + np.tile([5, 1, 0, 2, 3, 0, 5, 4], 100).astype(np.int64)] + testit(label_list, shape) + + shape = (10000, 10000) + label_list = [np.tile(np.arange(10000, dtype=np.int64), 5), + np.tile(np.arange(10000, dtype=np.int64), 5)] + testit(label_list, shape) + + +class TestSafeSort(object): + + def test_basic_sort(self): + values = [3, 1, 2, 0, 4] + result = safe_sort(values) + expected = np.array([0, 1, 2, 3, 4]) + tm.assert_numpy_array_equal(result, expected) + + values = list("baaacb") + result = safe_sort(values) + expected = np.array(list("aaabbc"), dtype='object') + tm.assert_numpy_array_equal(result, expected) + + values = [] + result = safe_sort(values) + expected = np.array([]) + tm.assert_numpy_array_equal(result, expected) + + def test_labels(self): + values = [3, 1, 2, 0, 4] + expected = np.array([0, 1, 2, 3, 4]) + + labels = [0, 1, 1, 2, 3, 0, -1, 4] + result, result_labels = safe_sort(values, labels) + expected_labels = np.array([3, 1, 1, 2, 0, 3, -1, 4], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + # na_sentinel + labels = [0, 1, 1, 2, 3, 0, 99, 4] + result, result_labels = safe_sort(values, labels, + na_sentinel=99) + expected_labels = np.array([3, 1, 1, 2, 0, 3, 99, 4], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + # out of bound indices + labels = [0, 101, 102, 2, 3, 0, 99, 4] + result, result_labels = safe_sort(values, labels) + expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + labels = [] + result, result_labels = safe_sort(values, labels) + expected_labels = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + def test_mixed_integer(self): + values = np.array(['b', 1, 0, 'a', 0, 'b'], dtype=object) + result = safe_sort(values) + expected = np.array([0, 0, 1, 'a', 'b', 'b'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + values = np.array(['b', 1, 0, 'a'], dtype=object) + labels = [0, 1, 2, 3, 0, -1, 1] + result, result_labels = safe_sort(values, labels) + expected = np.array([0, 1, 'a', 'b'], dtype=object) + expected_labels = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_labels, expected_labels) + + def test_mixed_integer_from_list(self): + values = ['b', 1, 0, 'a', 0, 'b'] + result = safe_sort(values) + expected = np.array([0, 0, 1, 'a', 'b', 'b'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_unsortable(self): + # GH 13714 + arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) + if compat.PY2 and not _np_version_under1p10: + # RuntimeWarning: tp_compare didn't return -1 or -2 for exception + with warnings.catch_warnings(): + pytest.raises(TypeError, safe_sort, arr) + else: + pytest.raises(TypeError, safe_sort, arr) + + def test_exceptions(self): + with tm.assert_raises_regex(TypeError, + "Only list-like objects are allowed"): + safe_sort(values=1) + + with tm.assert_raises_regex(TypeError, + "Only list-like objects or None"): + safe_sort(values=[0, 1, 2], labels=1) + + with tm.assert_raises_regex(ValueError, + "values should be unique"): + safe_sort(values=[0, 1, 2, 1], labels=[0, 1]) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py deleted file mode 100644 index 118c4147a2019..0000000000000 --- a/pandas/tests/test_stats.py +++ /dev/null @@ -1,185 +0,0 @@ -# -*- coding: utf-8 -*- -from pandas import compat - -from distutils.version import LooseVersion -from numpy import nan -import numpy as np - -from pandas import Series, DataFrame - -from pandas.compat import product -from pandas.util.testing import (assert_frame_equal, assert_series_equal) -import pandas.util.testing as tm - - -class TestRank(tm.TestCase): - s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) - df = DataFrame({'A': s, 'B': s}) - - results = { - 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, - 3.5, 1.5, 8.0, nan, 5.5]), - 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), - 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), - 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), - 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), - } - - def test_rank_tie_methods(self): - s = self.s - - def _check(s, expected, method='average'): - result = s.rank(method=method) - tm.assert_series_equal(result, Series(expected)) - - dtypes = [None, object] - disabled = set([(object, 'first')]) - results = self.results - - for method, dtype in product(results, dtypes): - if (dtype, method) in disabled: - continue - series = s if dtype is None else s.astype(dtype) - _check(series, results[method], method=method) - - def test_rank_methods_series(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') - import scipy - from scipy.stats import rankdata - - xs = np.random.randn(9) - xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates - np.random.shuffle(xs) - - index = [chr(ord('a') + i) for i in range(len(xs))] - - for vals in [xs, xs + 1e6, xs * 1e-6]: - ts = Series(vals, index=index) - - for m in ['average', 'min', 'max', 'first', 'dense']: - result = ts.rank(method=m) - sprank = rankdata(vals, m if m != 'first' else 'ordinal') - expected = Series(sprank, index=index) - - if LooseVersion(scipy.__version__) >= '0.17.0': - expected = expected.astype('float64') - tm.assert_series_equal(result, expected) - - def test_rank_methods_frame(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') - import scipy - from scipy.stats import rankdata - - xs = np.random.randint(0, 21, (100, 26)) - xs = (xs - 10.0) / 10.0 - cols = [chr(ord('z') - i) for i in range(xs.shape[1])] - - for vals in [xs, xs + 1e6, xs * 1e-6]: - df = DataFrame(vals, columns=cols) - - for ax in [0, 1]: - for m in ['average', 'min', 'max', 'first', 'dense']: - result = df.rank(axis=ax, method=m) - sprank = np.apply_along_axis( - rankdata, ax, vals, - m if m != 'first' else 'ordinal') - sprank = sprank.astype(np.float64) - expected = DataFrame(sprank, columns=cols) - - if LooseVersion(scipy.__version__) >= '0.17.0': - expected = expected.astype('float64') - tm.assert_frame_equal(result, expected) - - def test_rank_dense_method(self): - dtypes = ['O', 'f8', 'i8'] - in_out = [([1], [1]), - ([2], [1]), - ([0], [1]), - ([2, 2], [1, 1]), - ([1, 2, 3], [1, 2, 3]), - ([4, 2, 1], [3, 2, 1],), - ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), - ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])] - - for ser, exp in in_out: - for dtype in dtypes: - s = Series(ser).astype(dtype) - result = s.rank(method='dense') - expected = Series(exp).astype(result.dtype) - assert_series_equal(result, expected) - - def test_rank_descending(self): - dtypes = ['O', 'f8', 'i8'] - - for dtype, method in product(dtypes, self.results): - if 'i' in dtype: - s = self.s.dropna() - df = self.df.dropna() - else: - s = self.s.astype(dtype) - df = self.df.astype(dtype) - - res = s.rank(ascending=False) - expected = (s.max() - s).rank() - assert_series_equal(res, expected) - - res = df.rank(ascending=False) - expected = (df.max() - df).rank() - assert_frame_equal(res, expected) - - if method == 'first' and dtype == 'O': - continue - - expected = (s.max() - s).rank(method=method) - res2 = s.rank(method=method, ascending=False) - assert_series_equal(res2, expected) - - expected = (df.max() - df).rank(method=method) - - if dtype != 'O': - res2 = df.rank(method=method, ascending=False, - numeric_only=True) - assert_frame_equal(res2, expected) - - res3 = df.rank(method=method, ascending=False, - numeric_only=False) - assert_frame_equal(res3, expected) - - def test_rank_2d_tie_methods(self): - df = self.df - - def _check2d(df, expected, method='average', axis=0): - exp_df = DataFrame({'A': expected, 'B': expected}) - - if axis == 1: - df = df.T - exp_df = exp_df.T - - result = df.rank(method=method, axis=axis) - assert_frame_equal(result, exp_df) - - dtypes = [None, object] - disabled = set([(object, 'first')]) - results = self.results - - for method, axis, dtype in product(results, [0, 1], dtypes): - if (dtype, method) in disabled: - continue - frame = df if dtype is None else df.astype(dtype) - _check2d(frame, results[method], method=method, axis=axis) - - def test_rank_int(self): - s = self.s.dropna().astype('i8') - - for method, res in compat.iteritems(self.results): - result = s.rank(method=method) - expected = Series(res).dropna() - expected.index = result.index - assert_series_equal(result, expected) - - def test_rank_object_bug(self): - # GH 13445 - - # smoke tests - Series([np.nan] * 32).astype(object).rank(ascending=True) - Series([np.nan] * 32).astype(object).rank(ascending=False) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index ce97b09b7e3ca..a878d6ed7b052 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2,6 +2,7 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta +import pytest import re from numpy import nan as NA @@ -10,7 +11,7 @@ from pandas.compat import range, u import pandas.compat as compat -from pandas import (Index, Series, DataFrame, isnull, MultiIndex, notnull) +from pandas import Index, Series, DataFrame, isna, MultiIndex, notna from pandas.util.testing import assert_series_equal import pandas.util.testing as tm @@ -18,19 +19,20 @@ import pandas.core.strings as strings -class TestStringMethods(tm.TestCase): +class TestStringMethods(object): def test_api(self): # GH 6106, GH 9322 - self.assertIs(Series.str, strings.StringMethods) - self.assertIsInstance(Series(['']).str, strings.StringMethods) + assert Series.str is strings.StringMethods + assert isinstance(Series(['']).str, strings.StringMethods) # GH 9184 invalid = Series([1]) - with tm.assertRaisesRegexp(AttributeError, "only use .str accessor"): + with tm.assert_raises_regex(AttributeError, + "only use .str accessor"): invalid.str - self.assertFalse(hasattr(invalid, 'str')) + assert not hasattr(invalid, 'str') def test_iter(self): # GH3638 @@ -39,7 +41,7 @@ def test_iter(self): for s in ds.str: # iter must yield a Series - tm.assertIsInstance(s, Series) + assert isinstance(s, Series) # indices of each yielded Series should be equal to the index of # the original Series @@ -47,13 +49,12 @@ def test_iter(self): for el in s: # each element of the series is either a basestring/str or nan - self.assertTrue(isinstance(el, compat.string_types) or - isnull(el)) + assert isinstance(el, compat.string_types) or isna(el) # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in # this case since 'wikitravel' is the longest string - self.assertEqual(s.dropna().values.item(), 'l') + assert s.dropna().values.item() == 'l' def test_iter_empty(self): ds = Series([], dtype=object) @@ -65,8 +66,8 @@ def test_iter_empty(self): # nothing to iterate over so nothing defined values should remain # unchanged - self.assertEqual(i, 100) - self.assertEqual(s, 1) + assert i == 100 + assert s == 1 def test_iter_single_element(self): ds = Series(['a']) @@ -74,7 +75,7 @@ def test_iter_single_element(self): for i, s in enumerate(ds.str): pass - self.assertFalse(i) + assert not i assert_series_equal(ds, s) def test_iter_object_try_string(self): @@ -86,8 +87,8 @@ def test_iter_object_try_string(self): for i, s in enumerate(ds.str): pass - self.assertEqual(i, 100) - self.assertEqual(s, 'h') + assert i == 100 + assert s == 'h' def test_cat(self): one = np.array(['a', 'a', 'b', 'b', 'c', NA], dtype=np.object_) @@ -96,29 +97,29 @@ def test_cat(self): # single array result = strings.str_cat(one) exp = 'aabbc' - self.assertEqual(result, exp) + assert result == exp result = strings.str_cat(one, na_rep='NA') exp = 'aabbcNA' - self.assertEqual(result, exp) + assert result == exp result = strings.str_cat(one, na_rep='-') exp = 'aabbc-' - self.assertEqual(result, exp) + assert result == exp result = strings.str_cat(one, sep='_', na_rep='NA') exp = 'a_a_b_b_c_NA' - self.assertEqual(result, exp) + assert result == exp result = strings.str_cat(two, sep='-') exp = 'a-b-d-foo' - self.assertEqual(result, exp) + assert result == exp # Multiple arrays result = strings.str_cat(one, [two], na_rep='NA') exp = np.array(['aa', 'aNA', 'bb', 'bd', 'cfoo', 'NANA'], dtype=np.object_) - self.assert_numpy_array_equal(result, exp) + tm.assert_numpy_array_equal(result, exp) result = strings.str_cat(one, two) exp = np.array(['aa', NA, 'bb', 'bd', 'cfoo', NA], dtype=np.object_) @@ -134,7 +135,7 @@ def test_count(self): result = Series(values).str.count('f[o]+') exp = Series([1, 2, NA, 4]) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) tm.assert_series_equal(result, exp) # mixed @@ -145,7 +146,7 @@ def test_count(self): rs = Series(mixed).str.count('a') xp = Series([1, NA, 0, NA, NA, 0, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode @@ -157,7 +158,7 @@ def test_count(self): result = Series(values).str.count('f[o]+') exp = Series([1, 2, NA, 4]) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) tm.assert_series_equal(result, exp) def test_contains(self): @@ -176,7 +177,7 @@ def test_contains(self): values = ['foo', 'xyz', 'fooommm__foo', 'mmm_'] result = strings.str_contains(values, pat) expected = np.array([False, False, True, True]) - self.assertEqual(result.dtype, np.bool_) + assert result.dtype == np.bool_ tm.assert_numpy_array_equal(result, expected) # case insensitive using regex @@ -199,7 +200,7 @@ def test_contains(self): rs = Series(mixed).str.contains('o') xp = Series([False, NA, False, NA, NA, True, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode @@ -219,13 +220,13 @@ def test_contains(self): dtype=np.object_) result = strings.str_contains(values, pat) expected = np.array([False, False, True, True]) - self.assertEqual(result.dtype, np.bool_) + assert result.dtype == np.bool_ tm.assert_numpy_array_equal(result, expected) # na values = Series(['om', 'foo', np.nan]) res = values.str.contains('foo', na="foo") - self.assertEqual(res.loc[2], "foo") + assert res.loc[2] == "foo" def test_startswith(self): values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) @@ -243,7 +244,7 @@ def test_startswith(self): tm.assert_numpy_array_equal(rs, xp) rs = Series(mixed).str.startswith('f') - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) xp = Series([False, NA, False, NA, NA, True, NA, NA, NA]) tm.assert_series_equal(rs, xp) @@ -274,7 +275,7 @@ def test_endswith(self): rs = Series(mixed).str.endswith('f') xp = Series([False, NA, False, NA, NA, False, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode @@ -326,7 +327,7 @@ def test_lower_upper(self): mixed = mixed.str.upper() rs = Series(mixed).str.lower() xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode @@ -380,13 +381,11 @@ def test_swapcase(self): def test_casemethods(self): values = ['aaa', 'bbb', 'CCC', 'Dddd', 'eEEE'] s = Series(values) - self.assertEqual(s.str.lower().tolist(), [v.lower() for v in values]) - self.assertEqual(s.str.upper().tolist(), [v.upper() for v in values]) - self.assertEqual(s.str.title().tolist(), [v.title() for v in values]) - self.assertEqual(s.str.capitalize().tolist(), [ - v.capitalize() for v in values]) - self.assertEqual(s.str.swapcase().tolist(), [ - v.swapcase() for v in values]) + assert s.str.lower().tolist() == [v.lower() for v in values] + assert s.str.upper().tolist() == [v.upper() for v in values] + assert s.str.title().tolist() == [v.title() for v in values] + assert s.str.capitalize().tolist() == [v.capitalize() for v in values] + assert s.str.swapcase().tolist() == [v.swapcase() for v in values] def test_replace(self): values = Series(['fooBAD__barBAD', NA]) @@ -405,7 +404,7 @@ def test_replace(self): rs = Series(mixed).str.replace('BAD[_]*', '') xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -430,7 +429,7 @@ def test_replace(self): for repl in (None, 3, {'a': 'b'}): for data in (['a', 'b', None], ['a', 'b', 'c', 'ad']): values = klass(data) - self.assertRaises(TypeError, values.str.replace, 'a', repl) + pytest.raises(TypeError, values.str.replace, 'a', repl) def test_replace_callable(self): # GH 15055 @@ -450,15 +449,15 @@ def test_replace_callable(self): r'(?(3)required )positional arguments?') repl = lambda: None - with tm.assertRaisesRegexp(TypeError, p_err): + with tm.assert_raises_regex(TypeError, p_err): values.str.replace('a', repl) repl = lambda m, x: None - with tm.assertRaisesRegexp(TypeError, p_err): + with tm.assert_raises_regex(TypeError, p_err): values.str.replace('a', repl) repl = lambda m, x, y=None: None - with tm.assertRaisesRegexp(TypeError, p_err): + with tm.assert_raises_regex(TypeError, p_err): values.str.replace('a', repl) # test regex named groups @@ -469,6 +468,89 @@ def test_replace_callable(self): exp = Series(['bAR', NA]) tm.assert_series_equal(result, exp) + def test_replace_compiled_regex(self): + # GH 15446 + values = Series(['fooBAD__barBAD', NA]) + + # test with compiled regex + pat = re.compile(r'BAD[_]*') + result = values.str.replace(pat, '') + exp = Series(['foobar', NA]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD', + None, 1, 2.]) + + rs = Series(mixed).str.replace(pat, '') + xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA]) + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + # unicode + values = Series([u('fooBAD__barBAD'), NA]) + + result = values.str.replace(pat, '') + exp = Series([u('foobar'), NA]) + tm.assert_series_equal(result, exp) + + result = values.str.replace(pat, '', n=1) + exp = Series([u('foobarBAD'), NA]) + tm.assert_series_equal(result, exp) + + # flags + unicode + values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) + exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) + pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE) + result = values.str.replace(pat, ", ") + tm.assert_series_equal(result, exp) + + # case and flags provided to str.replace will have no effect + # and will produce warnings + values = Series(['fooBAD__barBAD__bad', NA]) + pat = re.compile(r'BAD[_]*') + + with tm.assert_raises_regex(ValueError, + "case and flags cannot be"): + result = values.str.replace(pat, '', flags=re.IGNORECASE) + + with tm.assert_raises_regex(ValueError, + "case and flags cannot be"): + result = values.str.replace(pat, '', case=False) + + with tm.assert_raises_regex(ValueError, + "case and flags cannot be"): + result = values.str.replace(pat, '', case=True) + + # test with callable + values = Series(['fooBAD__barBAD', NA]) + repl = lambda m: m.group(0).swapcase() + pat = re.compile('[a-z][A-Z]{2}') + result = values.str.replace(pat, repl, n=2) + exp = Series(['foObaD__baRbaD', NA]) + tm.assert_series_equal(result, exp) + + def test_replace_literal(self): + # GH16808 literal replace (regex=False vs regex=True) + values = Series(['f.o', 'foo', NA]) + exp = Series(['bao', 'bao', NA]) + result = values.str.replace('f.', 'ba') + tm.assert_series_equal(result, exp) + + exp = Series(['bao', 'foo', NA]) + result = values.str.replace('f.', 'ba', regex=False) + tm.assert_series_equal(result, exp) + + # Cannot do a literal replace if given a callable repl or compiled + # pattern + callable_repl = lambda m: m.group(0).swapcase() + compiled_pat = re.compile('[a-z][A-Z]{2}') + + pytest.raises(ValueError, values.str.replace, 'abc', callable_repl, + regex=False) + pytest.raises(ValueError, values.str.replace, compiled_pat, '', + regex=False) + def test_repeat(self): values = Series(['a', 'b', NA, 'c', NA, 'd']) @@ -486,7 +568,7 @@ def test_repeat(self): rs = Series(mixed).str.repeat(3) xp = Series(['aaa', NA, 'bbb', NA, NA, 'foofoofoo', NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode @@ -500,64 +582,44 @@ def test_repeat(self): exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, u('dddddd')]) tm.assert_series_equal(result, exp) - def test_deprecated_match(self): - # Old match behavior, deprecated (but still default) in 0.13 + def test_match(self): + # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([('BAD__', 'BAD'), NA, []]) - tm.assert_series_equal(result, exp) - - # mixed - mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), - 'foo', None, 1, 2.]) - - with tm.assert_produces_warning(): - rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') - xp = Series([('BAD_', 'BAD'), NA, ('BAD_', 'BAD'), - NA, NA, [], NA, NA, NA]) - tm.assertIsInstance(rs, Series) - tm.assert_series_equal(rs, xp) - - # unicode - values = Series([u('fooBAD__barBAD'), NA, u('foo')]) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([(u('BAD__'), u('BAD')), NA, []]) + result = values.str.match('.*(BAD[_]+).*(BAD)') + exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - def test_match(self): - # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + result = values.str.match('.*BAD[_]+.*BAD') exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - # If no groups, use new behavior even when as_indexer is False. - # (Old behavior is pretty much useless in this case.) + # test passing as_indexer still works but is ignored values = Series(['fooBAD__barBAD', NA, 'foo']) - result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) exp = Series([True, NA, False]) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) + tm.assert_series_equal(result, exp) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) + tm.assert_series_equal(result, exp) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) tm.assert_series_equal(result, exp) + pytest.raises(ValueError, values.str.match, '.*(BAD[_]+).*(BAD)', + as_indexer=False) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) - - with tm.assert_produces_warning(): - rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') xp = Series([True, NA, True, NA, NA, False, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode values = Series([u('fooBAD__barBAD'), NA, u('foo')]) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + result = values.str.match('.*(BAD[_]+).*(BAD)') exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) @@ -571,13 +633,16 @@ def test_match(self): def test_extract_expand_None(self): values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_raises_regex(ValueError, + 'expand must be True or False'): values.str.extract('.*(BAD[_]+).*(BAD)', expand=None) def test_extract_expand_unspecified(self): values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_produces_warning(FutureWarning): - values.str.extract('.*(BAD[_]+).*(BAD)') + result_unspecified = values.str.extract('.*(BAD[_]+).*') + assert isinstance(result_unspecified, DataFrame) + result_true = values.str.extract('.*(BAD[_]+).*', expand=True) + tm.assert_frame_equal(result_unspecified, result_true) def test_extract_expand_False(self): # Contains tests like those in test_match and some others. @@ -608,7 +673,7 @@ def test_extract_expand_False(self): # Index only works with one regex group since # multi-group would expand to a frame idx = Index(['A1', 'A2', 'A3', 'A4', 'B5']) - with tm.assertRaisesRegexp(ValueError, "supported"): + with tm.assert_raises_regex(ValueError, "supported"): idx.str.extract('([AB])([123])', expand=False) # these should work for both Series and Index @@ -616,16 +681,16 @@ def test_extract_expand_False(self): # no groups s_or_idx = klass(['A1', 'B2', 'C3']) f = lambda: s_or_idx.str.extract('[ABC][123]', expand=False) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # only non-capturing groups f = lambda: s_or_idx.str.extract('(?:[AB]).*', expand=False) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # single group renames series/index properly s_or_idx = klass(['A1', 'A2']) result = s_or_idx.str.extract(r'(?PA)\d', expand=False) - self.assertEqual(result.name, 'uno') + assert result.name == 'uno' exp = klass(['A', 'A'], name='uno') if klass == Series: @@ -729,7 +794,7 @@ def check_index(index): r = s.str.extract(r'(?P[a-z])', expand=False) e = Series(['a', 'b', 'c'], name='sue') tm.assert_series_equal(r, e) - self.assertEqual(r.name, e.name) + assert r.name == e.name def test_extract_expand_True(self): # Contains tests like those in test_match and some others. @@ -761,16 +826,16 @@ def test_extract_expand_True(self): # no groups s_or_idx = klass(['A1', 'B2', 'C3']) f = lambda: s_or_idx.str.extract('[ABC][123]', expand=True) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # only non-capturing groups f = lambda: s_or_idx.str.extract('(?:[AB]).*', expand=True) - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) # single group renames series/index properly s_or_idx = klass(['A1', 'A2']) result_df = s_or_idx.str.extract(r'(?PA)\d', expand=True) - tm.assertIsInstance(result_df, DataFrame) + assert isinstance(result_df, DataFrame) result_series = result_df['uno'] assert_series_equal(result_series, Series(['A', 'A'], name='uno')) @@ -1031,28 +1096,50 @@ def test_extractall_single_group_with_quantifier(self): e = DataFrame(['ab', 'abc', 'd', 'cd'], i) tm.assert_frame_equal(r, e) - def test_extractall_no_matches(self): - s = Series(['a3', 'b3', 'd4c2'], name='series_name') + @pytest.mark.parametrize('data, names', [ + ([], (None, )), + ([], ('i1', )), + ([], (None, 'i2')), + ([], ('i1', 'i2')), + (['a3', 'b3', 'd4c2'], (None, )), + (['a3', 'b3', 'd4c2'], ('i1', 'i2')), + (['a3', 'b3', 'd4c2'], (None, 'i2')), + (['a3', 'b3', 'd4c2'], ('i1', 'i2')), + ]) + def test_extractall_no_matches(self, data, names): + # GH19075 extractall with no matches should return a valid MultiIndex + n = len(data) + if len(names) == 1: + i = Index(range(n), name=names[0]) + else: + a = (tuple([i] * (n - 1)) for i in range(n)) + i = MultiIndex.from_tuples(a, names=names) + s = Series(data, name='series_name', index=i, dtype='object') + ei = MultiIndex.from_tuples([], names=(names + ('match',))) + # one un-named group. r = s.str.extractall('(z)') - e = DataFrame(columns=[0]) + e = DataFrame(columns=[0], index=ei) tm.assert_frame_equal(r, e) + # two un-named groups. r = s.str.extractall('(z)(z)') - e = DataFrame(columns=[0, 1]) + e = DataFrame(columns=[0, 1], index=ei) tm.assert_frame_equal(r, e) + # one named group. r = s.str.extractall('(?Pz)') - e = DataFrame(columns=["first"]) + e = DataFrame(columns=["first"], index=ei) tm.assert_frame_equal(r, e) + # two named groups. r = s.str.extractall('(?Pz)(?Pz)') - e = DataFrame(columns=["first", "second"]) + e = DataFrame(columns=["first", "second"], index=ei) tm.assert_frame_equal(r, e) + # one named, one un-named. r = s.str.extractall('(z)(?Pz)') - e = DataFrame(columns=[0, - "second"]) + e = DataFrame(columns=[0, "second"], index=ei) tm.assert_frame_equal(r, e) def test_extractall_stringindex(self): @@ -1084,7 +1171,7 @@ def test_extractall_errors(self): # no capture groups. (it returns DataFrame with one column for # each capture group) s = Series(['a3', 'b3', 'd4c2'], name='series_name') - with tm.assertRaisesRegexp(ValueError, "no capture groups"): + with tm.assert_raises_regex(ValueError, "no capture groups"): s.str.extractall(r'[a-z]') def test_extract_index_one_two_groups(self): @@ -1168,17 +1255,16 @@ def test_extractall_same_as_extract_subject_index(self): tm.assert_frame_equal(extract_one_noname, no_match_index) def test_empty_str_methods(self): - empty_str = empty = Series(dtype=str) + empty_str = empty = Series(dtype=object) empty_int = Series(dtype=int) empty_bool = Series(dtype=bool) - empty_list = Series(dtype=list) empty_bytes = Series(dtype=object) # GH7241 # (extract) on empty series tm.assert_series_equal(empty_str, empty.str.cat(empty)) - self.assertEqual('', empty.str.cat()) + assert '' == empty.str.cat() tm.assert_series_equal(empty_str, empty.str.title()) tm.assert_series_equal(empty_int, empty.str.count('a')) tm.assert_series_equal(empty_bool, empty.str.contains('a')) @@ -1202,25 +1288,24 @@ def test_empty_str_methods(self): DataFrame(columns=[0, 1], dtype=str), empty.str.extract('()()', expand=False)) tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies()) - tm.assert_series_equal(empty_str, empty_list.str.join('')) + tm.assert_series_equal(empty_str, empty_str.str.join('')) tm.assert_series_equal(empty_int, empty.str.len()) - tm.assert_series_equal(empty_list, empty_list.str.findall('a')) + tm.assert_series_equal(empty_str, empty_str.str.findall('a')) tm.assert_series_equal(empty_int, empty.str.find('a')) tm.assert_series_equal(empty_int, empty.str.rfind('a')) tm.assert_series_equal(empty_str, empty.str.pad(42)) tm.assert_series_equal(empty_str, empty.str.center(42)) - tm.assert_series_equal(empty_list, empty.str.split('a')) - tm.assert_series_equal(empty_list, empty.str.rsplit('a')) - tm.assert_series_equal(empty_list, + tm.assert_series_equal(empty_str, empty.str.split('a')) + tm.assert_series_equal(empty_str, empty.str.rsplit('a')) + tm.assert_series_equal(empty_str, empty.str.partition('a', expand=False)) - tm.assert_series_equal(empty_list, + tm.assert_series_equal(empty_str, empty.str.rpartition('a', expand=False)) tm.assert_series_equal(empty_str, empty.str.slice(stop=1)) tm.assert_series_equal(empty_str, empty.str.slice(step=1)) tm.assert_series_equal(empty_str, empty.str.strip()) tm.assert_series_equal(empty_str, empty.str.lstrip()) tm.assert_series_equal(empty_str, empty.str.rstrip()) - tm.assert_series_equal(empty_str, empty.str.rstrip()) tm.assert_series_equal(empty_str, empty.str.wrap(42)) tm.assert_series_equal(empty_str, empty.str.get(0)) tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii')) @@ -1281,20 +1366,13 @@ def test_ismethods(self): tm.assert_series_equal(str_s.str.isupper(), Series(upper_e)) tm.assert_series_equal(str_s.str.istitle(), Series(title_e)) - self.assertEqual(str_s.str.isalnum().tolist(), [v.isalnum() - for v in values]) - self.assertEqual(str_s.str.isalpha().tolist(), [v.isalpha() - for v in values]) - self.assertEqual(str_s.str.isdigit().tolist(), [v.isdigit() - for v in values]) - self.assertEqual(str_s.str.isspace().tolist(), [v.isspace() - for v in values]) - self.assertEqual(str_s.str.islower().tolist(), [v.islower() - for v in values]) - self.assertEqual(str_s.str.isupper().tolist(), [v.isupper() - for v in values]) - self.assertEqual(str_s.str.istitle().tolist(), [v.istitle() - for v in values]) + assert str_s.str.isalnum().tolist() == [v.isalnum() for v in values] + assert str_s.str.isalpha().tolist() == [v.isalpha() for v in values] + assert str_s.str.isdigit().tolist() == [v.isdigit() for v in values] + assert str_s.str.isspace().tolist() == [v.isspace() for v in values] + assert str_s.str.islower().tolist() == [v.islower() for v in values] + assert str_s.str.isupper().tolist() == [v.isupper() for v in values] + assert str_s.str.istitle().tolist() == [v.istitle() for v in values] def test_isnumeric(self): # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER @@ -1309,10 +1387,8 @@ def test_isnumeric(self): tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e)) unicodes = [u'A', u'3', u'¼', u'★', u'፸', u'3', u'four'] - self.assertEqual(s.str.isnumeric().tolist(), [ - v.isnumeric() for v in unicodes]) - self.assertEqual(s.str.isdecimal().tolist(), [ - v.isdecimal() for v in unicodes]) + assert s.str.isnumeric().tolist() == [v.isnumeric() for v in unicodes] + assert s.str.isdecimal().tolist() == [v.isdecimal() for v in unicodes] values = ['A', np.nan, u'¼', u'★', np.nan, u'3', 'four'] s = Series(values) @@ -1371,7 +1447,7 @@ def test_join(self): rs = Series(mixed).str.split('_').str.join('_') xp = Series(['a_b', NA, 'asdf_cas_asdf', NA, NA, 'foo', NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -1383,7 +1459,7 @@ def test_len(self): values = Series(['foo', 'fooo', 'fooooo', np.nan, 'fooooooo']) result = values.str.len() - exp = values.map(lambda x: len(x) if notnull(x) else NA) + exp = values.map(lambda x: len(x) if notna(x) else NA) tm.assert_series_equal(result, exp) # mixed @@ -1393,7 +1469,7 @@ def test_len(self): rs = Series(mixed).str.len() xp = Series([3, NA, 13, NA, NA, 3, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -1401,7 +1477,7 @@ def test_len(self): 'fooooooo')]) result = values.str.len() - exp = values.map(lambda x: len(x) if notnull(x) else NA) + exp = values.map(lambda x: len(x) if notna(x) else NA) tm.assert_series_equal(result, exp) def test_findall(self): @@ -1418,7 +1494,7 @@ def test_findall(self): rs = Series(mixed).str.findall('BAD[_]*') xp = Series([['BAD__', 'BAD'], NA, [], NA, NA, ['BAD'], NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -1466,12 +1542,12 @@ def test_find(self): dtype=np.int64) tm.assert_numpy_array_equal(result.values, expected) - with tm.assertRaisesRegexp(TypeError, - "expected a string object, not int"): + with tm.assert_raises_regex(TypeError, + "expected a string object, not int"): result = values.str.find(0) - with tm.assertRaisesRegexp(TypeError, - "expected a string object, not int"): + with tm.assert_raises_regex(TypeError, + "expected a string object, not int"): result = values.str.rfind(0) def test_find_nan(self): @@ -1541,11 +1617,13 @@ def _check(result, expected): dtype=np.int64) tm.assert_numpy_array_equal(result.values, expected) - with tm.assertRaisesRegexp(ValueError, "substring not found"): + with tm.assert_raises_regex(ValueError, + "substring not found"): result = s.str.index('DE') - with tm.assertRaisesRegexp(TypeError, - "expected a string object, not int"): + with tm.assert_raises_regex(TypeError, + "expected a string " + "object, not int"): result = s.str.index(0) # test with nan @@ -1577,7 +1655,7 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='left') xp = Series([' a', NA, ' b', NA, NA, ' ee', NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) mixed = Series(['a', NA, 'b', True, datetime.today(), 'ee', None, 1, 2. @@ -1586,7 +1664,7 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='right') xp = Series(['a ', NA, 'b ', NA, NA, 'ee ', NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) mixed = Series(['a', NA, 'b', True, datetime.today(), 'ee', None, 1, 2. @@ -1595,7 +1673,7 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='both') xp = Series([' a ', NA, ' b ', NA, NA, ' ee ', NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -1629,12 +1707,14 @@ def test_pad_fillchar(self): exp = Series(['XXaXX', 'XXbXX', NA, 'XXcXX', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not str"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not str"): result = values.str.pad(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not int"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not int"): result = values.str.pad(5, fillchar=5) def test_pad_width(self): @@ -1642,8 +1722,9 @@ def test_pad_width(self): s = Series(['1', '22', 'a', 'bb']) for f in ['center', 'ljust', 'rjust', 'zfill', 'pad']: - with tm.assertRaisesRegexp(TypeError, - "width must be of integer type, not*"): + with tm.assert_raises_regex(TypeError, + "width must be of " + "integer type, not*"): getattr(s.str, f)('f') def test_translate(self): @@ -1675,7 +1756,7 @@ def _check(result, expected): expected = klass(['abcde', 'abcc', 'cddd', 'cde']) _check(result, expected) else: - with tm.assertRaisesRegexp( + with tm.assert_raises_regex( ValueError, "deletechars is not a valid argument"): result = s.str.translate(table, deletechars='fg') @@ -1707,19 +1788,19 @@ def test_center_ljust_rjust(self): rs = Series(mixed).str.center(5) xp = Series([' a ', NA, ' b ', NA, NA, ' c ', ' eee ', NA, NA, NA ]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.ljust(5) xp = Series(['a ', NA, 'b ', NA, NA, 'c ', 'eee ', NA, NA, NA ]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.rjust(5) xp = Series([' a', NA, ' b', NA, NA, ' c', ' eee', NA, NA, NA ]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -1764,28 +1845,34 @@ def test_center_ljust_rjust_fillchar(self): # If fillchar is not a charatter, normal str raises TypeError # 'aaa'.ljust(5, 'XY') # TypeError: must be char, not str - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not str"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not str"): result = values.str.center(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not str"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not str"): result = values.str.ljust(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not str"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not str"): result = values.str.rjust(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not int"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not int"): result = values.str.center(5, fillchar=1) - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not int"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not int"): result = values.str.ljust(5, fillchar=1) - with tm.assertRaisesRegexp(TypeError, - "fillchar must be a character, not int"): + with tm.assert_raises_regex(TypeError, + "fillchar must be a " + "character, not int"): result = values.str.rjust(5, fillchar=1) def test_zfill(self): @@ -1831,11 +1918,11 @@ def test_split(self): result = mixed.str.split('_') exp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, NA, NA, NA ]) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) tm.assert_almost_equal(result, exp) result = mixed.str.split('_', expand=False) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) tm.assert_almost_equal(result, exp) # unicode @@ -1876,11 +1963,11 @@ def test_rsplit(self): result = mixed.str.rsplit('_') exp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, NA, NA, NA ]) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) tm.assert_almost_equal(result, exp) result = mixed.str.rsplit('_', expand=False) - tm.assertIsInstance(result, Series) + assert isinstance(result, Series) tm.assert_almost_equal(result, exp) # unicode @@ -1910,9 +1997,9 @@ def test_split_noargs(self): s = Series(['Wes McKinney', 'Travis Oliphant']) result = s.str.split() expected = ['Travis', 'Oliphant'] - self.assertEqual(result[1], expected) + assert result[1] == expected result = s.str.rsplit() - self.assertEqual(result[1], expected) + assert result[1] == expected def test_split_maxsplit(self): # re.split 0, str.split -1 @@ -1967,7 +2054,7 @@ def test_split_to_dataframe(self): index=['preserve', 'me']) tm.assert_frame_equal(result, exp) - with tm.assertRaisesRegexp(ValueError, "expand must be"): + with tm.assert_raises_regex(ValueError, "expand must be"): s.str.split('_', expand="not_a_boolean") def test_split_to_multiindex_expand(self): @@ -1975,14 +2062,14 @@ def test_split_to_multiindex_expand(self): result = idx.str.split('_', expand=True) exp = idx tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 1) + assert result.nlevels == 1 idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.split('_', expand=True) exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), ( 'with', 'no', 'nans')]) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 3) + assert result.nlevels == 3 idx = Index(['some_unequal_splits', 'one_of_these_things_is_not']) result = idx.str.split('_', expand=True) @@ -1990,9 +2077,9 @@ def test_split_to_multiindex_expand(self): ), ('one', 'of', 'these', 'things', 'is', 'not')]) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 6) + assert result.nlevels == 6 - with tm.assertRaisesRegexp(ValueError, "expand must be"): + with tm.assert_raises_regex(ValueError, "expand must be"): idx.str.split('_', expand="not_a_boolean") def test_rsplit_to_dataframe_expand(self): @@ -2029,21 +2116,33 @@ def test_rsplit_to_multiindex_expand(self): result = idx.str.rsplit('_', expand=True) exp = idx tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 1) + assert result.nlevels == 1 idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.rsplit('_', expand=True) exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), ( 'with', 'no', 'nans')]) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 3) + assert result.nlevels == 3 idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.rsplit('_', expand=True, n=1) exp = MultiIndex.from_tuples([('some_equal', 'splits'), ('with_no', 'nans')]) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 2) + assert result.nlevels == 2 + + def test_split_nan_expand(self): + # gh-18450 + s = Series(["foo,bar,baz", NA]) + result = s.str.split(",", expand=True) + exp = DataFrame([["foo", "bar", "baz"], [NA, NA, NA]]) + tm.assert_frame_equal(result, exp) + + # check that these are actually np.nan and not None + # TODO see GH 18463 + # tm.assert_frame_equal does not differentiate + assert all(np.isnan(x) for x in result.iloc[1]) def test_split_with_name(self): # GH 12617 @@ -2061,12 +2160,12 @@ def test_split_with_name(self): idx = Index(['a,b', 'c,d'], name='xxx') res = idx.str.split(',') exp = Index([['a', 'b'], ['c', 'd']], name='xxx') - self.assertTrue(res.nlevels, 1) + assert res.nlevels == 1 tm.assert_index_equal(res, exp) res = idx.str.split(',', expand=True) exp = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')]) - self.assertTrue(res.nlevels, 2) + assert res.nlevels == 2 tm.assert_index_equal(res, exp) def test_partition_series(self): @@ -2132,9 +2231,9 @@ def test_partition_series(self): # compare to standard lib values = Series(['A_B_C', 'B_C_D', 'E_F_G', 'EFGHEF']) result = values.str.partition('_', expand=False).tolist() - self.assertEqual(result, [v.partition('_') for v in values]) + assert result == [v.partition('_') for v in values] result = values.str.rpartition('_', expand=False).tolist() - self.assertEqual(result, [v.rpartition('_') for v in values]) + assert result == [v.rpartition('_') for v in values] def test_partition_index(self): values = Index(['a_b_c', 'c_d_e', 'f_g_h']) @@ -2143,25 +2242,25 @@ def test_partition_index(self): exp = Index(np.array([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')])) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 1) + assert result.nlevels == 1 result = values.str.rpartition('_', expand=False) exp = Index(np.array([('a_b', '_', 'c'), ('c_d', '_', 'e'), ( 'f_g', '_', 'h')])) tm.assert_index_equal(result, exp) - self.assertEqual(result.nlevels, 1) + assert result.nlevels == 1 result = values.str.partition('_') exp = Index([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')]) tm.assert_index_equal(result, exp) - self.assertTrue(isinstance(result, MultiIndex)) - self.assertEqual(result.nlevels, 3) + assert isinstance(result, MultiIndex) + assert result.nlevels == 3 result = values.str.rpartition('_') exp = Index([('a_b', '_', 'c'), ('c_d', '_', 'e'), ('f_g', '_', 'h')]) tm.assert_index_equal(result, exp) - self.assertTrue(isinstance(result, MultiIndex)) - self.assertEqual(result.nlevels, 3) + assert isinstance(result, MultiIndex) + assert result.nlevels == 3 def test_partition_to_dataframe(self): values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h']) @@ -2206,13 +2305,13 @@ def test_partition_with_name(self): idx = Index(['a,b', 'c,d'], name='xxx') res = idx.str.partition(',') exp = MultiIndex.from_tuples([('a', ',', 'b'), ('c', ',', 'd')]) - self.assertTrue(res.nlevels, 3) + assert res.nlevels == 3 tm.assert_index_equal(res, exp) # should preserve name res = idx.str.partition(',', expand=False) exp = Index(np.array([('a', ',', 'b'), ('c', ',', 'd')]), name='xxx') - self.assertTrue(res.nlevels, 1) + assert res.nlevels == 1 tm.assert_index_equal(res, exp) def test_pipe_failures(self): @@ -2240,7 +2339,7 @@ def test_slice(self): (3, 0, -1)]: try: result = values.str.slice(start, stop, step) - expected = Series([s[start:stop:step] if not isnull(s) else NA + expected = Series([s[start:stop:step] if not isna(s) else NA for s in values]) tm.assert_series_equal(result, expected) except: @@ -2254,7 +2353,7 @@ def test_slice(self): rs = Series(mixed).str.slice(2, 5) xp = Series(['foo', NA, 'bar', NA, NA, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.slice(2, 5, -1) @@ -2332,19 +2431,19 @@ def test_strip_lstrip_rstrip_mixed(self): rs = Series(mixed).str.strip() xp = Series(['aa', NA, 'bb', NA, NA, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.lstrip() xp = Series(['aa ', NA, 'bb \t\n', NA, NA, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.rstrip() xp = Series([' aa', NA, ' bb', NA, NA, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) def test_strip_lstrip_rstrip_unicode(self): @@ -2433,7 +2532,7 @@ def test_get(self): rs = Series(mixed).str.split('_').str.get(1) xp = Series(['b', NA, 'd', NA, NA, NA, NA, NA]) - tm.assertIsInstance(rs, Series) + assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -2443,6 +2542,19 @@ def test_get(self): expected = Series([u('b'), u('d'), np.nan, u('g')]) tm.assert_series_equal(result, expected) + # bounds testing + values = Series(['1_2_3_4_5', '6_7_8_9_10', '11_12']) + + # positive index + result = values.str.split('_').str.get(2) + expected = Series(['3', '8', np.nan]) + tm.assert_series_equal(result, expected) + + # negative index + result = values.str.split('_').str.get(-3) + expected = Series(['3', '8', np.nan]) + tm.assert_series_equal(result, expected) + def test_more_contains(self): # PR #1179 s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, @@ -2551,20 +2663,21 @@ def test_match_findall_flags(self): pat = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})' - with tm.assert_produces_warning(FutureWarning): - result = data.str.match(pat, flags=re.IGNORECASE) + result = data.str.extract(pat, flags=re.IGNORECASE, expand=True) + assert result.iloc[0].tolist() == ['dave', 'google', 'com'] - self.assertEqual(result[0], ('dave', 'google', 'com')) + result = data.str.match(pat, flags=re.IGNORECASE) + assert result[0] result = data.str.findall(pat, flags=re.IGNORECASE) - self.assertEqual(result[0][0], ('dave', 'google', 'com')) + assert result[0][0] == ('dave', 'google', 'com') result = data.str.count(pat, flags=re.IGNORECASE) - self.assertEqual(result[0], 1) + assert result[0] == 1 with tm.assert_produces_warning(UserWarning): result = data.str.contains(pat, flags=re.IGNORECASE) - self.assertEqual(result[0], True) + assert result[0] def test_encode_decode(self): base = Series([u('a'), u('b'), u('a\xe4')]) @@ -2579,7 +2692,7 @@ def test_encode_decode(self): def test_encode_decode_errors(self): encodeBase = Series([u('a'), u('b'), u('a\x9d')]) - self.assertRaises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252') + pytest.raises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252') f = lambda x: x.encode('cp1252', 'ignore') result = encodeBase.str.encode('cp1252', 'ignore') @@ -2588,7 +2701,7 @@ def test_encode_decode_errors(self): decodeBase = Series([b'a', b'b', b'a\x9d']) - self.assertRaises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252') + pytest.raises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252') f = lambda x: x.decode('cp1252', 'ignore') result = decodeBase.str.decode('cp1252', 'ignore') @@ -2612,7 +2725,8 @@ def test_normalize(self): result = s.str.normalize('NFC') tm.assert_series_equal(result, expected) - with tm.assertRaisesRegexp(ValueError, "invalid normalization form"): + with tm.assert_raises_regex(ValueError, + "invalid normalization form"): s.str.normalize('xxx') s = Index([u'ABC', u'123', u'アイエ']) @@ -2631,19 +2745,19 @@ def test_cat_on_filtered_index(self): str_month = df.month.astype('str') str_both = str_year.str.cat(str_month, sep=' ') - self.assertEqual(str_both.loc[1], '2011 2') + assert str_both.loc[1] == '2011 2' str_multiple = str_year.str.cat([str_month, str_month], sep=' ') - self.assertEqual(str_multiple.loc[1], '2011 2 2') + assert str_multiple.loc[1] == '2011 2 2' def test_str_cat_raises_intuitive_error(self): # https://github.com/pandas-dev/pandas/issues/11334 s = Series(['a', 'b', 'c', 'd']) message = "Did you mean to supply a `sep` keyword?" - with tm.assertRaisesRegexp(ValueError, message): + with tm.assert_raises_regex(ValueError, message): s.str.cat('|') - with tm.assertRaisesRegexp(ValueError, message): + with tm.assert_raises_regex(ValueError, message): s.str.cat(' ') def test_index_str_accessor_visibility(self): @@ -2665,15 +2779,15 @@ def test_index_str_accessor_visibility(self): (['aa', datetime(2011, 1, 1)], 'mixed')] for values, tp in cases: idx = Index(values) - self.assertTrue(isinstance(Series(values).str, StringMethods)) - self.assertTrue(isinstance(idx.str, StringMethods)) - self.assertEqual(idx.inferred_type, tp) + assert isinstance(Series(values).str, StringMethods) + assert isinstance(idx.str, StringMethods) + assert idx.inferred_type == tp for values, tp in cases: idx = Index(values) - self.assertTrue(isinstance(Series(values).str, StringMethods)) - self.assertTrue(isinstance(idx.str, StringMethods)) - self.assertEqual(idx.inferred_type, tp) + assert isinstance(Series(values).str, StringMethods) + assert isinstance(idx.str, StringMethods) + assert idx.inferred_type == tp cases = [([1, np.nan], 'floating'), ([datetime(2011, 1, 1)], 'datetime64'), @@ -2681,31 +2795,31 @@ def test_index_str_accessor_visibility(self): for values, tp in cases: idx = Index(values) message = 'Can only use .str accessor with string values' - with self.assertRaisesRegexp(AttributeError, message): + with tm.assert_raises_regex(AttributeError, message): Series(values).str - with self.assertRaisesRegexp(AttributeError, message): + with tm.assert_raises_regex(AttributeError, message): idx.str - self.assertEqual(idx.inferred_type, tp) + assert idx.inferred_type == tp # MultiIndex has mixed dtype, but not allow to use accessor idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')]) - self.assertEqual(idx.inferred_type, 'mixed') + assert idx.inferred_type == 'mixed' message = 'Can only use .str accessor with Index, not MultiIndex' - with self.assertRaisesRegexp(AttributeError, message): + with tm.assert_raises_regex(AttributeError, message): idx.str def test_str_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(list('aabbcde')) - with tm.assertRaisesRegexp(AttributeError, - "You cannot add any new attribute"): + with tm.assert_raises_regex(AttributeError, + "You cannot add any new attribute"): s.str.xlabel = "a" def test_method_on_bytes(self): lhs = Series(np.array(list('abc'), 'S1').astype(object)) rhs = Series(np.array(list('def'), 'S1').astype(object)) if compat.PY3: - self.assertRaises(TypeError, lhs.str.cat, rhs) + pytest.raises(TypeError, lhs.str.cat, rhs) else: result = lhs.str.cat(rhs) expected = Series(np.array( diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 3aed22c140ffe..7b97b0e975df3 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -6,10 +6,10 @@ from pandas.compat import long import pandas.core.algorithms as algos import pandas.util.testing as tm -from pandas.tslib import iNaT +from pandas._libs.tslib import iNaT -class TestTake(tm.TestCase): +class TestTake(object): # standard incompatible fill error fill_error = re.compile("Incompatible type for fill_value") @@ -32,7 +32,7 @@ def _test_dtype(dtype, can_hold_na, writeable=True): expected[3] = np.nan tm.assert_almost_equal(out, expected) else: - with tm.assertRaisesRegexp(TypeError, self.fill_error): + with tm.assert_raises_regex(TypeError, self.fill_error): algos.take_1d(data, indexer, out=out) # no exception o/w data.take(indexer, out=out) @@ -123,7 +123,8 @@ def _test_dtype(dtype, can_hold_na, writeable=True): tm.assert_almost_equal(out1, expected1) else: for i, out in enumerate([out0, out1]): - with tm.assertRaisesRegexp(TypeError, self.fill_error): + with tm.assert_raises_regex(TypeError, + self.fill_error): algos.take_nd(data, indexer, out=out, axis=i) # no exception o/w data.take(indexer, out=out, axis=i) @@ -235,7 +236,8 @@ def _test_dtype(dtype, can_hold_na): tm.assert_almost_equal(out2, expected2) else: for i, out in enumerate([out0, out1, out2]): - with tm.assertRaisesRegexp(TypeError, self.fill_error): + with tm.assert_raises_regex(TypeError, + self.fill_error): algos.take_nd(data, indexer, out=out, axis=i) # no exception o/w data.take(indexer, out=out, axis=i) @@ -348,24 +350,24 @@ def test_1d_bool(self): result = algos.take_1d(arr, [0, 2, 2, 1]) expected = arr.take([0, 2, 2, 1]) - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) result = algos.take_1d(arr, [0, 2, -1]) - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ def test_2d_bool(self): arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool) result = algos.take_nd(arr, [0, 2, 2, 1]) expected = arr.take([0, 2, 2, 1], axis=0) - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) result = algos.take_nd(arr, [0, 2, 2, 1], axis=1) expected = arr.take([0, 2, 2, 1], axis=1) - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) result = algos.take_nd(arr, [0, 2, -1]) - self.assertEqual(result.dtype, np.object_) + assert result.dtype == np.object_ def test_2d_float32(self): arr = np.random.randn(4, 3).astype(np.float32) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3add568c1ea99..dabdb1e8e689c 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1,23 +1,23 @@ from itertools import product import pytest -import sys import warnings +from warnings import catch_warnings -from datetime import datetime +from datetime import datetime, timedelta from numpy.random import randn import numpy as np -from distutils.version import LooseVersion import pandas as pd -from pandas import (Series, DataFrame, Panel, bdate_range, isnull, - notnull, concat, Timestamp) -import pandas.stats.moments as mom +from pandas import (Series, DataFrame, bdate_range, + isna, notna, concat, Timestamp, Index) import pandas.core.window as rwindow import pandas.tseries.offsets as offsets from pandas.core.base import SpecificationError -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall +from pandas.core.sorting import safe_sort import pandas.util.testing as tm -from pandas.compat import range, zip, PY3 +import pandas.util._test_decorators as td +from pandas.compat import range, zip N, K = 100, 10 @@ -29,7 +29,7 @@ def assert_equal(left, right): tm.assert_frame_equal(left, right) -class Base(tm.TestCase): +class Base(object): _nan_locs = np.arange(20, 40) _inf_locs = np.array([]) @@ -47,7 +47,7 @@ def _create_data(self): class TestApi(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_getitem(self): @@ -56,7 +56,7 @@ def test_getitem(self): tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) r = self.frame.rolling(window=5)[1] - self.assertEqual(r._selected_obj.name, self.frame.columns[1]) + assert r._selected_obj.name == self.frame.columns[1] # technically this is allowed r = self.frame.rolling(window=5)[1, 3] @@ -70,10 +70,10 @@ def test_getitem(self): def test_select_bad_cols(self): df = DataFrame([[1, 2]], columns=['A', 'B']) g = df.rolling(window=5) - self.assertRaises(KeyError, g.__getitem__, ['C']) # g[['C']] + pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] - self.assertRaises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] - with tm.assertRaisesRegexp(KeyError, '^[^A]+$'): + pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] + with tm.assert_raises_regex(KeyError, '^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'C']] @@ -83,7 +83,7 @@ def test_attribute_access(self): df = DataFrame([[1, 2]], columns=['A', 'B']) r = df.rolling(window=5) tm.assert_series_equal(r.A.sum(), r['A'].sum()) - self.assertRaises(AttributeError, lambda: r.F) + pytest.raises(AttributeError, lambda: r.F) def tests_skip_nuisance(self): @@ -96,7 +96,7 @@ def tests_skip_nuisance(self): columns=list('AB')) tm.assert_frame_equal(result, expected) - expected = pd.concat([r[['A', 'B']].sum(), df[['C']]], axis=1) + expected = concat([r[['A', 'B']].sum(), df[['C']]], axis=1) result = r.sum() tm.assert_frame_equal(result, expected, check_like=True) @@ -112,44 +112,46 @@ def test_agg(self): b_sum = r['B'].sum() result = r.aggregate([np.mean, np.std]) - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([['A', 'B'], ['mean', 'std']]) tm.assert_frame_equal(result, expected) result = r.aggregate({'A': np.mean, 'B': np.std}) - expected = pd.concat([a_mean, b_std], axis=1) + expected = concat([a_mean, b_std], axis=1) tm.assert_frame_equal(result, expected, check_like=True) result = r.aggregate({'A': ['mean', 'std']}) - expected = pd.concat([a_mean, a_std], axis=1) + expected = concat([a_mean, a_std], axis=1) expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'std')]) tm.assert_frame_equal(result, expected) result = r['A'].aggregate(['mean', 'sum']) - expected = pd.concat([a_mean, a_sum], axis=1) + expected = concat([a_mean, a_sum], axis=1) expected.columns = ['mean', 'sum'] tm.assert_frame_equal(result, expected) - result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) - expected = pd.concat([a_mean, a_sum], axis=1) + with catch_warnings(record=True): + result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) + expected = concat([a_mean, a_sum], axis=1) expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', 'sum')]) tm.assert_frame_equal(result, expected, check_like=True) - result = r.aggregate({'A': {'mean': 'mean', - 'sum': 'sum'}, - 'B': {'mean2': 'mean', - 'sum2': 'sum'}}) - expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + with catch_warnings(record=True): + result = r.aggregate({'A': {'mean': 'mean', + 'sum': 'sum'}, + 'B': {'mean2': 'mean', + 'sum2': 'sum'}}) + expected = concat([a_mean, a_sum, b_mean, b_sum], axis=1) exp_cols = [('A', 'mean'), ('A', 'sum'), ('B', 'mean2'), ('B', 'sum2')] expected.columns = pd.MultiIndex.from_tuples(exp_cols) tm.assert_frame_equal(result, expected, check_like=True) result = r.aggregate({'A': ['mean', 'std'], 'B': ['mean', 'std']}) - expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) exp_cols = [('A', 'mean'), ('A', 'std'), ('B', 'mean'), ('B', 'std')] expected.columns = pd.MultiIndex.from_tuples(exp_cols) @@ -158,7 +160,7 @@ def test_agg(self): # passed lambda result = r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)}) rcustom = r['B'].apply(lambda x: np.std(x, ddof=1)) - expected = pd.concat([a_sum, rcustom], axis=1) + expected = concat([a_sum, rcustom], axis=1) tm.assert_frame_equal(result, expected, check_like=True) def test_agg_consistency(self): @@ -171,7 +173,7 @@ def test_agg_consistency(self): tm.assert_index_equal(result, expected) result = r['A'].agg([np.sum, np.mean]).columns - expected = pd.Index(['sum', 'mean']) + expected = Index(['sum', 'mean']) tm.assert_index_equal(result, expected) result = r.agg({'A': [np.sum, np.mean]}).columns @@ -188,18 +190,20 @@ def f(): r.aggregate({'r1': {'A': ['mean', 'sum']}, 'r2': {'B': ['mean', 'sum']}}) - self.assertRaises(SpecificationError, f) + pytest.raises(SpecificationError, f) - expected = pd.concat([r['A'].mean(), r['A'].std(), r['B'].mean(), - r['B'].std()], axis=1) + expected = concat([r['A'].mean(), r['A'].std(), + r['B'].mean(), r['B'].std()], axis=1) expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) - result = r[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) + with catch_warnings(record=True): + result = r[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) tm.assert_frame_equal(result, expected, check_like=True) - result = r.agg({'A': {'ra': ['mean', 'std']}, - 'B': {'rb': ['mean', 'std']}}) + with catch_warnings(record=True): + result = r.agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) expected.columns = pd.MultiIndex.from_tuples([('A', 'ra', 'mean'), ( 'A', 'ra', 'std'), ('B', 'rb', 'mean'), ('B', 'rb', 'std')]) tm.assert_frame_equal(result, expected, check_like=True) @@ -220,8 +224,8 @@ def test_count_nonnumeric_types(self): 'fl_inf': [1., 2., np.Inf], 'fl_nan': [1., 2., np.NaN], 'str_nan': ['aa', 'bb', np.NaN], - 'dt_nat': [pd.Timestamp('20170101'), pd.Timestamp('20170203'), - pd.Timestamp(None)], + 'dt_nat': [Timestamp('20170101'), Timestamp('20170203'), + Timestamp(None)], 'periods_nat': [pd.Period('2012-01'), pd.Period('2012-02'), pd.Period(None)]}, columns=cols) @@ -244,16 +248,15 @@ def test_count_nonnumeric_types(self): tm.assert_frame_equal(result, expected) result = df.rolling(1).count() - expected = df.notnull().astype(float) + expected = df.notna().astype(float) tm.assert_frame_equal(result, expected) + @td.skip_if_no_scipy def test_window_with_args(self): - tm._skip_if_no_scipy() - # make sure that we are aggregating window functions correctly with arg r = Series(np.random.randn(100)).rolling(window=10, min_periods=1, win_type='gaussian') - expected = pd.concat([r.mean(std=10), r.mean(std=.01)], axis=1) + expected = concat([r.mean(std=10), r.mean(std=.01)], axis=1) expected.columns = ['', ''] result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=.01)]) @@ -265,7 +268,7 @@ def a(x): def b(x): return x.mean(std=0.01) - expected = pd.concat([r.mean(std=10), r.mean(std=.01)], axis=1) + expected = concat([r.mean(std=10), r.mean(std=.01)], axis=1) expected.columns = ['a', 'b'] result = r.aggregate([a, b]) tm.assert_frame_equal(result, expected) @@ -276,47 +279,18 @@ def test_preserve_metadata(self): s2 = s.rolling(30).sum() s3 = s.rolling(20).sum() - self.assertEqual(s2.name, 'foo') - self.assertEqual(s3.name, 'foo') - - def test_how_compat(self): - # in prior versions, we would allow how to be used in the resample - # now that its deprecated, we need to handle this in the actual - # aggregation functions - s = pd.Series( - np.random.randn(20), - index=pd.date_range('1/1/2000', periods=20, freq='12H')) - - for how in ['min', 'max', 'median']: - for op in ['mean', 'sum', 'std', 'var', 'kurt', 'skew']: - for t in ['rolling', 'expanding']: - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - - dfunc = getattr(pd, "{0}_{1}".format(t, op)) - if dfunc is None: - continue - - if t == 'rolling': - kwargs = {'window': 5} - else: - kwargs = {} - result = dfunc(s, freq='D', how=how, **kwargs) - - expected = getattr( - getattr(s, t)(freq='D', **kwargs), op)(how=how) - tm.assert_series_equal(result, expected) + assert s2.name == 'foo' + assert s3.name == 'foo' class TestWindow(Base): - def setUp(self): + def setup_method(self, method): self._create_data() + @td.skip_if_no_scipy def test_constructor(self): # GH 12669 - tm._skip_if_no_scipy() for o in [self.series, self.frame]: c = o.rolling @@ -332,13 +306,13 @@ def test_constructor(self): # not valid for w in [2., 'foo', np.array([2])]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(win_type='boxcar', window=2, min_periods=w) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(win_type='boxcar', window=2, min_periods=1, center=w) for wt in ['foobar', 1]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(win_type=wt, window=2) def test_numpy_compat(self): @@ -348,15 +322,15 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with window objects" for func in ('sum', 'mean'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(w, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(w, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(w, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(w, func), dtype=np.float64) class TestRolling(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_doc_string(self): @@ -380,27 +354,58 @@ def test_constructor(self): # GH 13383 c(0) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(-1) # not valid for w in [2., 'foo', np.array([2])]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(window=w) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(window=2, min_periods=w) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(window=2, min_periods=1, center=w) + @td.skip_if_no_scipy def test_constructor_with_win_type(self): # GH 13383 - tm._skip_if_no_scipy() for o in [self.series, self.frame]: c = o.rolling c(0, win_type='boxcar') - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(-1, win_type='boxcar') + def test_constructor_with_timedelta_window(self): + # GH 15440 + n = 10 + df = DataFrame({'value': np.arange(n)}, + index=pd.date_range('2015-12-24', periods=n, freq="D")) + expected_data = np.append([0., 1.], np.arange(3., 27., 3)) + for window in [timedelta(days=3), pd.Timedelta(days=3)]: + result = df.rolling(window=window).sum() + expected = DataFrame({'value': expected_data}, + index=pd.date_range('2015-12-24', periods=n, + freq="D")) + tm.assert_frame_equal(result, expected) + expected = df.rolling('3D').sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + 'window', [timedelta(days=3), pd.Timedelta(days=3), '3D']) + def test_constructor_with_timedelta_window_and_minperiods(self, window): + # GH 15305 + n = 10 + df = DataFrame({'value': np.arange(n)}, + index=pd.date_range('2017-08-08', periods=n, freq="D")) + expected = DataFrame( + {'value': np.append([np.NaN, 1.], np.arange(3., 27., 3))}, + index=pd.date_range('2017-08-08', periods=n, freq="D")) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, + min_periods=2).apply(sum) + tm.assert_frame_equal(result_roll_sum, expected) + tm.assert_frame_equal(result_roll_generic, expected) + def test_numpy_compat(self): # see gh-12811 r = rwindow.Rolling(Series([2, 4, 6]), window=2) @@ -408,15 +413,68 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with window objects" for func in ('std', 'mean', 'sum', 'max', 'min', 'var'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(r, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(r, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(r, func), dtype=np.float64) + + def test_closed(self): + df = DataFrame({'A': [0, 1, 2, 3, 4]}) + # closed only allowed for datetimelike + with pytest.raises(ValueError): + df.rolling(window=3, closed='neither') + + @pytest.mark.parametrize('roller', ['1s', 1]) + def tests_empty_df_rolling(self, roller): + # GH 15819 Verifies that datetime and integer rolling windows can be + # applied to empty DataFrames + expected = DataFrame() + result = DataFrame().rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer rolling windows can be applied to + # empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + def test_missing_minp_zero(self): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.rolling(1, min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.rolling(1, min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + def test_missing_minp_zero_variable(self): + # https://github.com/pandas-dev/pandas/pull/18921 + x = pd.Series([np.nan] * 4, + index=pd.DatetimeIndex(['2017-01-01', '2017-01-04', + '2017-01-06', '2017-01-07'])) + result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() + expected = pd.Series(0.0, index=x.index) + tm.assert_series_equal(result, expected) + + def test_multi_index_names(self): + + # GH 16789, 16825 + cols = pd.MultiIndex.from_product([['A', 'B'], ['C', 'D', 'E']], + names=['1', '2']) + df = DataFrame(np.ones((10, 6)), columns=cols) + result = df.rolling(3).cov() + + tm.assert_index_equal(result.columns, df.columns) + assert result.index.names == [None, '1', '2'] class TestExpanding(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_doc_string(self): @@ -438,9 +496,9 @@ def test_constructor(self): # not valid for w in [2., 'foo', np.array([2])]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(min_periods=w) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(min_periods=1, center=w) def test_numpy_compat(self): @@ -450,15 +508,48 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with window objects" for func in ('std', 'mean', 'sum', 'max', 'min', 'var'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(e, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(e, func), dtype=np.float64) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, func), dtype=np.float64) + + @pytest.mark.parametrize( + 'expander', + [1, pytest.param('ls', marks=pytest.mark.xfail( + reason='GH 16425 expanding with ' + 'offset not supported'))]) + def test_empty_df_expanding(self, expander): + # GH 15819 Verifies that datetime and integer expanding windows can be + # applied to empty DataFrames + + expected = DataFrame() + result = DataFrame().expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer expanding windows can be applied + # to empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame( + index=pd.DatetimeIndex([])).expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + def test_missing_minp_zero(self): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.expanding(min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.expanding(min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) class TestEWM(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_doc_string(self): @@ -481,28 +572,28 @@ def test_constructor(self): c(halflife=0.75, alpha=None) # not valid: mutually exclusive - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(com=0.5, alpha=0.5) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(span=1.5, halflife=0.75) - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(alpha=0.5, span=1.5) # not valid: com < 0 - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(com=-0.5) # not valid: span < 1 - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(span=0.5) # not valid: halflife <= 0 - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(halflife=0) # not valid: alpha <= 0 or alpha > 1 for alpha in (-0.5, 1.5): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): c(alpha=alpha) def test_numpy_compat(self): @@ -512,30 +603,17 @@ def test_numpy_compat(self): msg = "numpy operations are not valid with window objects" for func in ('std', 'mean', 'var'): - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(e, func), 1, 2, 3) - tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, - getattr(e, func), dtype=np.float64) - - -class TestDeprecations(Base): - """ test that we are catching deprecation warnings """ - - def setUp(self): - self._create_data() - - def test_deprecations(self): + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, func), 1, 2, 3) + tm.assert_raises_regex(UnsupportedFunctionCall, msg, + getattr(e, func), dtype=np.float64) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - mom.rolling_mean(np.ones(10), 3, center=True, axis=0) - mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) - -# GH #12373 : rolling functions error on float32 data +# gh-12373 : rolling functions error on float32 data # make sure rolling functions works for different dtypes # -# NOTE that these are yielded tests and so _create_data is -# explicity called, nor do these inherit from unittest.TestCase +# NOTE that these are yielded tests and so _create_data +# is explicitly called. # # further note that we are only checking rolling for fully dtype # compliance (though both expanding and ewm inherit) @@ -605,8 +683,8 @@ def get_expects(self): return expects def _create_dtype_data(self, dtype): - sr1 = Series(range(5), dtype=dtype) - sr2 = Series(range(10, 0, -2), dtype=dtype) + sr1 = Series(np.arange(5), dtype=dtype) + sr2 = Series(np.arange(10, 0, -2), dtype=dtype) df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) data = { @@ -628,7 +706,7 @@ def test_dtypes(self): f = self.funcs[f_name] d = self.data[d_name] exp = self.expects[d_name][f_name] - yield self.check_dtypes, f, f_name, d, d_name, exp + self.check_dtypes(f, f_name, d, d_name, exp) def check_dtypes(self, f, f_name, d, d_name, exp): roll = d.rolling(window=self.window) @@ -747,7 +825,7 @@ def _create_data(self): class TestMoments(Base): - def setUp(self): + def setup_method(self, method): self._create_data() def test_centered_axis_validation(self): @@ -756,7 +834,7 @@ def test_centered_axis_validation(self): Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() # bad axis - with self.assertRaises(ValueError): + with pytest.raises(ValueError): Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() # ok ok @@ -766,84 +844,64 @@ def test_centered_axis_validation(self): axis=1).mean() # bad axis - with self.assertRaises(ValueError): + with pytest.raises(ValueError): (DataFrame(np.ones((10, 10))) .rolling(window=3, center=True, axis=2).mean()) def test_rolling_sum(self): - self._check_moment_func(mom.rolling_sum, np.sum, name='sum') + self._check_moment_func(np.nansum, name='sum', + zero_min_periods_equal=False) def test_rolling_count(self): counter = lambda x: np.isfinite(x).astype(float).sum() - self._check_moment_func(mom.rolling_count, counter, name='count', - has_min_periods=False, preserve_nan=False, + self._check_moment_func(counter, name='count', has_min_periods=False, fill_value=0) def test_rolling_mean(self): - self._check_moment_func(mom.rolling_mean, np.mean, name='mean') + self._check_moment_func(np.mean, name='mean') + @td.skip_if_no_scipy def test_cmov_mean(self): # GH 8238 - tm._skip_if_no_scipy() - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, - 12.952, np.nan, np.nan]) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rs = mom.rolling_mean(vals, 5, center=True) - tm.assert_almost_equal(xp, rs) - - xp = Series(rs) - rs = Series(vals).rolling(5, center=True).mean() - tm.assert_series_equal(xp, rs) + result = Series(vals).rolling(5, center=True).mean() + expected = Series([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, + 12.818, 12.952, np.nan, np.nan]) + tm.assert_series_equal(expected, result) + @td.skip_if_no_scipy def test_cmov_window(self): # GH 8238 - tm._skip_if_no_scipy() - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, - 12.952, np.nan, np.nan]) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - tm.assert_almost_equal(xp, rs) - - xp = Series(rs) - rs = Series(vals).rolling(5, win_type='boxcar', center=True).mean() - tm.assert_series_equal(xp, rs) + result = Series(vals).rolling(5, win_type='boxcar', center=True).mean() + expected = Series([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, + 12.818, 12.952, np.nan, np.nan]) + tm.assert_series_equal(expected, result) + @td.skip_if_no_scipy def test_cmov_window_corner(self): # GH 8238 - tm._skip_if_no_scipy() - # all nan - vals = np.empty(10, dtype=float) - vals.fill(np.nan) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - self.assertTrue(np.isnan(rs).all()) + vals = pd.Series([np.nan] * 10) + result = vals.rolling(5, center=True, win_type='boxcar').mean() + assert np.isnan(result).all() # empty - vals = np.array([]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - self.assertEqual(len(rs), 0) + vals = pd.Series([]) + result = vals.rolling(5, center=True, win_type='boxcar').mean() + assert len(result) == 0 # shorter than window - vals = np.random.randn(5) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rs = mom.rolling_window(vals, 10, 'boxcar') - self.assertTrue(np.isnan(rs).all()) - self.assertEqual(len(rs), 5) + vals = pd.Series(np.random.randn(5)) + result = vals.rolling(10, win_type='boxcar').mean() + assert np.isnan(result).all() + assert len(result) == 5 + @td.skip_if_no_scipy def test_cmov_window_frame(self): # Gh 8238 - tm._skip_if_no_scipy() - vals = np.array([[12.18, 3.64], [10.18, 9.16], [13.24, 14.61], [4.51, 8.11], [6.15, 11.44], [9.14, 6.21], [11.31, 10.67], [2.94, 6.51], [9.42, 8.39], [12.44, @@ -859,7 +917,7 @@ def test_cmov_window_frame(self): tm.assert_frame_equal(DataFrame(xp), rs) # invalid method - with self.assertRaises(AttributeError): + with pytest.raises(AttributeError): (DataFrame(vals).rolling(5, win_type='boxcar', center=True) .std()) @@ -872,9 +930,8 @@ def test_cmov_window_frame(self): rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).sum() tm.assert_frame_equal(DataFrame(xp), rs) + @td.skip_if_no_scipy def test_cmov_window_na_min_periods(self): - tm._skip_if_no_scipy() - # min_periods vals = Series(np.random.randn(10)) vals[4] = np.nan @@ -885,10 +942,9 @@ def test_cmov_window_na_min_periods(self): center=True).mean() tm.assert_series_equal(xp, rs) + @td.skip_if_no_scipy def test_cmov_window_regular(self): # GH 8238 - tm._skip_if_no_scipy() - win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -918,10 +974,9 @@ def test_cmov_window_regular(self): rs = Series(vals).rolling(5, win_type=wt, center=True).mean() tm.assert_series_equal(xp, rs) + @td.skip_if_no_scipy def test_cmov_window_regular_linear_range(self): # GH 8238 - tm._skip_if_no_scipy() - win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -935,10 +990,9 @@ def test_cmov_window_regular_linear_range(self): rs = Series(vals).rolling(5, win_type=wt, center=True).mean() tm.assert_series_equal(xp, rs) + @td.skip_if_no_scipy def test_cmov_window_regular_missing_data(self): # GH 8238 - tm._skip_if_no_scipy() - win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -968,10 +1022,9 @@ def test_cmov_window_regular_missing_data(self): rs = Series(vals).rolling(5, win_type=wt, min_periods=3).mean() tm.assert_series_equal(xp, rs) + @td.skip_if_no_scipy def test_cmov_window_special(self): # GH 8238 - tm._skip_if_no_scipy() - win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.}, {'width': 0.5}] @@ -995,10 +1048,9 @@ def test_cmov_window_special(self): rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k) tm.assert_series_equal(xp, rs) + @td.skip_if_no_scipy def test_cmov_window_special_linear_range(self): # GH 8238 - tm._skip_if_no_scipy() - win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.}, {'width': 0.5}] @@ -1014,57 +1066,89 @@ def test_cmov_window_special_linear_range(self): tm.assert_series_equal(xp, rs) def test_rolling_median(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self._check_moment_func(mom.rolling_median, np.median, - name='median') + self._check_moment_func(np.median, name='median') def test_rolling_min(self): + self._check_moment_func(np.min, name='min') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self._check_moment_func(mom.rolling_min, np.min, name='min') - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - a = np.array([1, 2, 3, 4, 5]) - b = mom.rolling_min(a, window=100, min_periods=1) - tm.assert_almost_equal(b, np.ones(len(a))) + a = pd.Series([1, 2, 3, 4, 5]) + result = a.rolling(window=100, min_periods=1).min() + expected = pd.Series(np.ones(len(a))) + tm.assert_series_equal(result, expected) - self.assertRaises(ValueError, mom.rolling_min, np.array([1, 2, 3]), - window=3, min_periods=5) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() def test_rolling_max(self): + self._check_moment_func(np.max, name='max') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self._check_moment_func(mom.rolling_max, np.max, name='max') + a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) + b = a.rolling(window=100, min_periods=1).max() + tm.assert_almost_equal(a, b) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - a = np.array([1, 2, 3, 4, 5], dtype=np.float64) - b = mom.rolling_max(a, window=100, min_periods=1) - tm.assert_almost_equal(a, b) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() - self.assertRaises(ValueError, mom.rolling_max, np.array([1, 2, 3]), - window=3, min_periods=5) - - def test_rolling_quantile(self): - qs = [.1, .5, .9] + @pytest.mark.parametrize('q', [0.0, .1, .5, .9, 1.0]) + def test_rolling_quantile(self, q): def scoreatpercentile(a, per): values = np.sort(a, axis=0) - idx = per / 1. * (values.shape[0] - 1) - return values[int(idx)] + idx = int(per / 1. * (values.shape[0] - 1)) + + if idx == values.shape[0] - 1: + retval = values[-1] + + else: + qlow = float(idx) / float(values.shape[0] - 1) + qhig = float(idx + 1) / float(values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + + return retval + + def quantile_func(x): + return scoreatpercentile(x, q) + + self._check_moment_func(quantile_func, name='quantile', + quantile=q) + + def test_rolling_quantile_np_percentile(self): + # #9413: Tests that rolling window's quantile default behavior + # is analogus to Numpy's percentile + row = 10 + col = 5 + idx = pd.date_range('20100101', periods=row, freq='B') + df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) + + df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) + np_percentile = np.percentile(df, [25, 50, 75], axis=0) + + tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) + + def test_rolling_quantile_series(self): + # #16211: Tests that rolling window's quantile default behavior + # is analogus to Series' quantile + arr = np.arange(100) + s = Series(arr) + q1 = s.quantile(0.1) + q2 = s.rolling(100).quantile(0.1).iloc[-1] + + tm.assert_almost_equal(q1, q2) - for q in qs: + def test_rolling_quantile_param(self): + ser = Series([0.0, .1, .5, .9, 1.0]) - def f(x, window, quantile, min_periods=None, freq=None, - center=False): - return mom.rolling_quantile(x, window, quantile, - min_periods=min_periods, freq=freq, - center=center) + with pytest.raises(ValueError): + ser.rolling(3).quantile(-0.1) - def alt(x): - return scoreatpercentile(x, q) + with pytest.raises(ValueError): + ser.rolling(3).quantile(10.0) - self._check_moment_func(f, alt, name='quantile', quantile=q) + with pytest.raises(TypeError): + ser.rolling(3).quantile('foo') def test_rolling_apply(self): # suppress warnings about empty slices, as we are deliberately testing @@ -1078,15 +1162,10 @@ def test_rolling_apply(self): tm.assert_series_equal(ser, ser.rolling(10).apply(lambda x: x.mean())) - f = lambda x: x[np.isfinite(x)].mean() + def f(x): + return x[np.isfinite(x)].mean() - def roll_mean(x, window, min_periods=None, freq=None, center=False, - **kwargs): - return mom.rolling_apply(x, window, func=f, - min_periods=min_periods, freq=freq, - center=center) - - self._check_moment_func(roll_mean, np.mean, name='apply', func=f) + self._check_moment_func(np.mean, name='apply', func=f) # GH 8080 s = Series([None, None, None]) @@ -1099,267 +1178,104 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False, def test_rolling_apply_out_of_bounds(self): # #1850 - arr = np.arange(4) + vals = pd.Series([1, 2, 3, 4]) - # it works! - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_apply(arr, 10, np.sum) - self.assertTrue(isnull(result).all()) + result = vals.rolling(10).apply(np.sum) + assert result.isna().all() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) - tm.assert_almost_equal(result, result) + result = vals.rolling(10, min_periods=1).apply(np.sum) + expected = pd.Series([1, 3, 6, 10], dtype=float) + tm.assert_almost_equal(result, expected) def test_rolling_std(self): - self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=1), + self._check_moment_func(lambda x: np.std(x, ddof=1), name='std') - self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=0), + self._check_moment_func(lambda x: np.std(x, ddof=0), name='std', ddof=0) def test_rolling_std_1obs(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1) - expected = np.array([np.nan] * 5) - tm.assert_almost_equal(result, expected) + vals = pd.Series([1., 2., 3., 4., 5.]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1, ddof=0) - expected = np.zeros(5) - tm.assert_almost_equal(result, expected) + result = vals.rolling(1, min_periods=1).std() + expected = pd.Series([np.nan] * 5) + tm.assert_series_equal(result, expected) + + result = vals.rolling(1, min_periods=1).std(ddof=0) + expected = pd.Series([0.] * 5) + tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), - 3, min_periods=2) - self.assertTrue(np.isnan(result[2])) + result = (pd.Series([np.nan, np.nan, 3, 4, 5]) + .rolling(3, min_periods=2).std()) + assert np.isnan(result[2]) def test_rolling_std_neg_sqrt(self): # unit test from Bottleneck # Test move_nanstd for neg sqrt. - a = np.array([0.0011448196318903589, 0.00028718669878572767, - 0.00028718669878572767, 0.00028718669878572767, - 0.00028718669878572767]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - b = mom.rolling_std(a, window=3) - self.assertTrue(np.isfinite(b[2:]).all()) + a = pd.Series([0.0011448196318903589, 0.00028718669878572767, + 0.00028718669878572767, 0.00028718669878572767, + 0.00028718669878572767]) + b = a.rolling(window=3).std() + assert np.isfinite(b[2:]).all() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - b = mom.ewmstd(a, span=3) - self.assertTrue(np.isfinite(b[2:]).all()) + b = a.ewm(span=3).std() + assert np.isfinite(b[2:]).all() def test_rolling_var(self): - self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), - test_stable=True, name='var') - self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=0), + self._check_moment_func(lambda x: np.var(x, ddof=1), + name='var') + self._check_moment_func(lambda x: np.var(x, ddof=0), name='var', ddof=0) + @td.skip_if_no_scipy def test_rolling_skew(self): - try: - from scipy.stats import skew - except ImportError: - pytest.skip('no scipy') - self._check_moment_func(mom.rolling_skew, - lambda x: skew(x, bias=False), name='skew') + from scipy.stats import skew + self._check_moment_func(lambda x: skew(x, bias=False), name='skew') + @td.skip_if_no_scipy def test_rolling_kurt(self): - try: - from scipy.stats import kurtosis - except ImportError: - pytest.skip('no scipy') - self._check_moment_func(mom.rolling_kurt, - lambda x: kurtosis(x, bias=False), name='kurt') - - def test_fperr_robustness(self): - # TODO: remove this once python 2.5 out of picture - if PY3: - pytest.skip("doesn't work on python 3") - - # #2114 - data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' # noqa - - arr = np.frombuffer(data, dtype='= 0).all()) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_mean(arr, 2) - self.assertTrue((result[1:] >= 0).all()) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_var(arr, 2) - self.assertTrue((result[1:] >= 0).all()) - - # #2527, ugh - arr = np.array([0.00012456, 0.0003, 0]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_mean(arr, 1) - self.assertTrue(result[-1] >= 0) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.rolling_mean(-arr, 1) - self.assertTrue(result[-1] <= 0) - - def _check_moment_func(self, f, static_comp, name=None, window=50, - has_min_periods=True, has_center=True, - has_time_rule=True, preserve_nan=True, - fill_value=None, test_stable=False, **kwargs): - - with warnings.catch_warnings(record=True): - self._check_ndarray(f, static_comp, window=window, - has_min_periods=has_min_periods, - preserve_nan=preserve_nan, - has_center=has_center, fill_value=fill_value, - test_stable=test_stable, **kwargs) - - with warnings.catch_warnings(record=True): - self._check_structures(f, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - fill_value=fill_value, - has_center=has_center, **kwargs) - - # new API - if name is not None: - self._check_structures(f, static_comp, name=name, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - fill_value=fill_value, - has_center=has_center, **kwargs) - - def _check_ndarray(self, f, static_comp, window=50, has_min_periods=True, - preserve_nan=True, has_center=True, fill_value=None, - test_stable=False, test_window=True, **kwargs): - def get_result(arr, window, min_periods=None, center=False): - return f(arr, window, min_periods=min_periods, center=center, ** - kwargs) - - result = get_result(self.arr, window) - tm.assert_almost_equal(result[-1], static_comp(self.arr[-50:])) + from scipy.stats import kurtosis + self._check_moment_func(lambda x: kurtosis(x, bias=False), + name='kurt') - if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) + def _check_moment_func(self, static_comp, name, has_min_periods=True, + has_center=True, has_time_rule=True, + fill_value=None, zero_min_periods_equal=True, + **kwargs): - # excluding NaNs correctly - arr = randn(50) - arr[:10] = np.NaN - arr[-10:] = np.NaN - - if has_min_periods: - result = get_result(arr, 50, min_periods=30) - tm.assert_almost_equal(result[-1], static_comp(arr[10:-10])) - - # min_periods is working correctly - result = get_result(arr, 20, min_periods=15) - self.assertTrue(np.isnan(result[23])) - self.assertFalse(np.isnan(result[24])) - - self.assertFalse(np.isnan(result[-6])) - self.assertTrue(np.isnan(result[-5])) - - arr2 = randn(20) - result = get_result(arr2, 10, min_periods=5) - self.assertTrue(isnull(result[3])) - self.assertTrue(notnull(result[4])) - - # min_periods=0 - result0 = get_result(arr, 20, min_periods=0) - result1 = get_result(arr, 20, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = get_result(arr, 50) - tm.assert_almost_equal(result[-1], static_comp(arr[10:-10])) - - # GH 7925 - if has_center: - if has_min_periods: - result = get_result(arr, 20, min_periods=15, center=True) - expected = get_result( - np.concatenate((arr, np.array([np.NaN] * 9))), 20, - min_periods=15)[9:] - else: - result = get_result(arr, 20, center=True) - expected = get_result( - np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] - - self.assert_numpy_array_equal(result, expected) - - if test_stable: - result = get_result(self.arr + 1e9, window) - tm.assert_almost_equal(result[-1], - static_comp(self.arr[-50:] + 1e9)) - - # Test window larger than array, #7297 - if test_window: - if has_min_periods: - for minp in (0, len(self.arr) - 1, len(self.arr)): - result = get_result(self.arr, len(self.arr) + 1, - min_periods=minp) - expected = get_result(self.arr, len(self.arr), - min_periods=minp) - nan_mask = np.isnan(result) - self.assertTrue(np.array_equal(nan_mask, np.isnan( - expected))) - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], - expected[nan_mask]) - else: - result = get_result(self.arr, len(self.arr) + 1) - expected = get_result(self.arr, len(self.arr)) - nan_mask = np.isnan(result) - self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - - def _check_structures(self, f, static_comp, name=None, - has_min_periods=True, has_time_rule=True, - has_center=True, fill_value=None, **kwargs): - def get_result(obj, window, min_periods=None, freq=None, center=False): - - # check via the API calls if name is provided - if name is not None: - - # catch a freq deprecation warning if freq is provided and not - # None - w = FutureWarning if freq is not None else None - with tm.assert_produces_warning(w, check_stacklevel=False): - r = obj.rolling(window=window, min_periods=min_periods, - freq=freq, center=center) - return getattr(r, name)(**kwargs) - - # check via the moments API - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - return f(obj, window=window, min_periods=min_periods, - freq=freq, center=center, **kwargs) + def get_result(obj, window, min_periods=None, center=False): + r = obj.rolling(window=window, min_periods=min_periods, + center=center) + return getattr(r, name)(**kwargs) series_result = get_result(self.series, window=50) - frame_result = get_result(self.frame, window=50) + assert isinstance(series_result, Series) + tm.assert_almost_equal(series_result.iloc[-1], + static_comp(self.series[-50:])) - tm.assertIsInstance(series_result, Series) - self.assertEqual(type(frame_result), DataFrame) + frame_result = get_result(self.frame, window=50) + assert isinstance(frame_result, DataFrame) + tm.assert_series_equal(frame_result.iloc[-1, :], + self.frame.iloc[-50:, :].apply(static_comp, + axis=0), + check_names=False) # check time_rule works if has_time_rule: win = 25 minp = 10 + series = self.series[::2].resample('B').mean() + frame = self.frame[::2].resample('B').mean() if has_min_periods: - series_result = get_result(self.series[::2], window=win, - min_periods=minp, freq='B') - frame_result = get_result(self.frame[::2], window=win, - min_periods=minp, freq='B') + series_result = get_result(series, window=win, + min_periods=minp) + frame_result = get_result(frame, window=win, + min_periods=minp) else: - series_result = get_result(self.series[::2], window=win, - freq='B') - frame_result = get_result(self.frame[::2], window=win, - freq='B') + series_result = get_result(series, window=win) + frame_result = get_result(frame, window=win) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() @@ -1367,15 +1283,79 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): trunc_series = self.series[::2].truncate(prev_date, last_date) trunc_frame = self.frame[::2].truncate(prev_date, last_date) - self.assertAlmostEqual(series_result[-1], + tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) tm.assert_series_equal(frame_result.xs(last_date), trunc_frame.apply(static_comp), check_names=False) - # GH 7925 + # excluding NaNs correctly + obj = Series(randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + if has_min_periods: + result = get_result(obj, 50, min_periods=30) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # min_periods is working correctly + result = get_result(obj, 20, min_periods=15) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(randn(20)) + result = get_result(obj2, 10, min_periods=5) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + if zero_min_periods_equal: + # min_periods=0 may be equivalent to min_periods=1 + result0 = get_result(obj, 20, min_periods=0) + result1 = get_result(obj, 20, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = get_result(obj, 50) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # window larger than series length (#7297) + if has_min_periods: + for minp in (0, len(self.series) - 1, len(self.series)): + result = get_result(self.series, len(self.series) + 1, + min_periods=minp) + expected = get_result(self.series, len(self.series), + min_periods=minp) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], + expected[nan_mask]) + else: + result = get_result(self.series, len(self.series) + 1) + expected = get_result(self.series, len(self.series)) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + # check center=True if has_center: + if has_min_periods: + result = get_result(obj, 20, min_periods=15, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, + min_periods=15)[9:].reset_index(drop=True) + else: + result = get_result(obj, 20, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), + 20)[9:].reset_index(drop=True) + + tm.assert_series_equal(result, expected) # shifter index s = ['x%d' % x for x in range(12)] @@ -1415,13 +1395,12 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): tm.assert_frame_equal(frame_xp, frame_rs) def test_ewma(self): - self._check_ew(mom.ewma, name='mean') + self._check_ew(name='mean') - arr = np.zeros(1000) - arr[5] = 1 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = mom.ewma(arr, span=100, adjust=False).sum() - self.assertTrue(np.abs(result - 1) < 1e-2) + vals = pd.Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() + assert np.abs(result - 1) < 1e-2 s = Series([1.0, 2.0, 4.0, 8.0]) @@ -1500,55 +1479,34 @@ def simple_wma(s, w): tm.assert_series_equal(result, expected) def test_ewmvar(self): - self._check_ew(mom.ewmvar, name='var') + self._check_ew(name='var') def test_ewmvol(self): - self._check_ew(mom.ewmvol, name='vol') + self._check_ew(name='vol') def test_ewma_span_com_args(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - A = mom.ewma(self.arr, com=9.5) - B = mom.ewma(self.arr, span=20) - tm.assert_almost_equal(A, B) + A = self.series.ewm(com=9.5).mean() + B = self.series.ewm(span=20).mean() + tm.assert_almost_equal(A, B) - self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, span=20) - self.assertRaises(ValueError, mom.ewma, self.arr) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20) + with pytest.raises(ValueError): + self.series.ewm().mean() def test_ewma_halflife_arg(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - A = mom.ewma(self.arr, com=13.932726172912965) - B = mom.ewma(self.arr, halflife=10.0) - tm.assert_almost_equal(A, B) - - self.assertRaises(ValueError, mom.ewma, self.arr, span=20, - halflife=50) - self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, - halflife=50) - self.assertRaises(ValueError, mom.ewma, self.arr, com=9.5, span=20, - halflife=50) - self.assertRaises(ValueError, mom.ewma, self.arr) - - def test_ewma_alpha_old_api(self): - # GH 10789 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - a = mom.ewma(self.arr, alpha=0.61722699889169674) - b = mom.ewma(self.arr, com=0.62014947789973052) - c = mom.ewma(self.arr, span=2.240298955799461) - d = mom.ewma(self.arr, halflife=0.721792864318) - tm.assert_numpy_array_equal(a, b) - tm.assert_numpy_array_equal(a, c) - tm.assert_numpy_array_equal(a, d) - - def test_ewma_alpha_arg_old_api(self): - # GH 10789 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertRaises(ValueError, mom.ewma, self.arr) - self.assertRaises(ValueError, mom.ewma, self.arr, - com=10.0, alpha=0.5) - self.assertRaises(ValueError, mom.ewma, self.arr, - span=10.0, alpha=0.5) - self.assertRaises(ValueError, mom.ewma, self.arr, - halflife=10.0, alpha=0.5) + A = self.series.ewm(com=13.932726172912965).mean() + B = self.series.ewm(halflife=10.0).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + self.series.ewm(span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm() def test_ewm_alpha(self): # GH 10789 @@ -1563,55 +1521,59 @@ def test_ewm_alpha(self): def test_ewm_alpha_arg(self): # GH 10789 - s = Series(self.arr) - self.assertRaises(ValueError, s.ewm) - self.assertRaises(ValueError, s.ewm, com=10.0, alpha=0.5) - self.assertRaises(ValueError, s.ewm, span=10.0, alpha=0.5) - self.assertRaises(ValueError, s.ewm, halflife=10.0, alpha=0.5) + s = self.series + with pytest.raises(ValueError): + s.ewm() + with pytest.raises(ValueError): + s.ewm(com=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(span=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(halflife=10.0, alpha=0.5) def test_ewm_domain_checks(self): # GH 12492 s = Series(self.arr) # com must satisfy: com >= 0 - self.assertRaises(ValueError, s.ewm, com=-0.1) + pytest.raises(ValueError, s.ewm, com=-0.1) s.ewm(com=0.0) s.ewm(com=0.1) # span must satisfy: span >= 1 - self.assertRaises(ValueError, s.ewm, span=-0.1) - self.assertRaises(ValueError, s.ewm, span=0.0) - self.assertRaises(ValueError, s.ewm, span=0.9) + pytest.raises(ValueError, s.ewm, span=-0.1) + pytest.raises(ValueError, s.ewm, span=0.0) + pytest.raises(ValueError, s.ewm, span=0.9) s.ewm(span=1.0) s.ewm(span=1.1) # halflife must satisfy: halflife > 0 - self.assertRaises(ValueError, s.ewm, halflife=-0.1) - self.assertRaises(ValueError, s.ewm, halflife=0.0) + pytest.raises(ValueError, s.ewm, halflife=-0.1) + pytest.raises(ValueError, s.ewm, halflife=0.0) s.ewm(halflife=0.1) # alpha must satisfy: 0 < alpha <= 1 - self.assertRaises(ValueError, s.ewm, alpha=-0.1) - self.assertRaises(ValueError, s.ewm, alpha=0.0) + pytest.raises(ValueError, s.ewm, alpha=-0.1) + pytest.raises(ValueError, s.ewm, alpha=0.0) s.ewm(alpha=0.1) s.ewm(alpha=1.0) - self.assertRaises(ValueError, s.ewm, alpha=1.1) + pytest.raises(ValueError, s.ewm, alpha=1.1) - def test_ew_empty_arrays(self): - arr = np.array([], dtype=np.float64) + def test_ew_empty_series(self): + vals = pd.Series([], dtype=np.float64) - funcs = [mom.ewma, mom.ewmvol, mom.ewmvar] + ewm = vals.ewm(3) + funcs = ['mean', 'vol', 'var'] for f in funcs: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = f(arr, 3) - tm.assert_almost_equal(result, arr) - - def _check_ew(self, func, name=None): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self._check_ew_ndarray(func, name=name) - self._check_ew_structures(func, name=name) - - def _check_ew_ndarray(self, func, preserve_nan=False, name=None): - result = func(self.arr, com=10) + result = getattr(ewm, f)() + tm.assert_almost_equal(result, vals) + + def _check_ew(self, name=None, preserve_nan=False): + series_result = getattr(self.series.ewm(com=10), name)() + assert isinstance(series_result, Series) + + frame_result = getattr(self.frame.ewm(com=10), name)() + assert type(frame_result) == DataFrame + + result = getattr(self.series.ewm(com=10), name)() if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) + assert result[self._nan_locs].isna().all() # excluding NaNs correctly arr = randn(50) @@ -1621,44 +1583,196 @@ def _check_ew_ndarray(self, func, preserve_nan=False, name=None): # check min_periods # GH 7898 - result = func(s, 50, min_periods=2) - self.assertTrue(np.isnan(result.values[:11]).all()) - self.assertFalse(np.isnan(result.values[11:]).any()) + result = getattr(s.ewm(com=50, min_periods=2), name)() + assert result[:11].isna().all() + assert not result[11:].isna().any() for min_periods in (0, 1): - result = func(s, 50, min_periods=min_periods) - if func == mom.ewma: - self.assertTrue(np.isnan(result.values[:10]).all()) - self.assertFalse(np.isnan(result.values[10:]).any()) + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == 'mean': + assert result[:10].isna().all() + assert not result[10:].isna().any() else: - # ewmstd, ewmvol, ewmvar (with bias=False) require at least two - # values - self.assertTrue(np.isnan(result.values[:11]).all()) - self.assertFalse(np.isnan(result.values[11:]).any()) + # ewm.std, ewm.vol, ewm.var (with bias=False) require at least + # two values + assert result[:11].isna().all() + assert not result[11:].isna().any() # check series of length 0 - result = func(Series([]), 50, min_periods=min_periods) - tm.assert_series_equal(result, Series([])) + result = getattr(Series().ewm(com=50, min_periods=min_periods), + name)() + tm.assert_series_equal(result, Series()) # check series of length 1 - result = func(Series([1.]), 50, min_periods=min_periods) - if func == mom.ewma: + result = getattr(Series([1.]).ewm(50, min_periods=min_periods), + name)() + if name == 'mean': tm.assert_series_equal(result, Series([1.])) else: - # ewmstd, ewmvol, ewmvar with bias=False require at least two - # values + # ewm.std, ewm.vol, ewm.var with bias=False require at least + # two values tm.assert_series_equal(result, Series([np.NaN])) # pass in ints - result2 = func(np.arange(50), span=10) - self.assertEqual(result2.dtype, np.float_) - - def _check_ew_structures(self, func, name): - series_result = getattr(self.series.ewm(com=10), name)() - tm.assertIsInstance(series_result, Series) - - frame_result = getattr(self.frame.ewm(com=10), name)() - self.assertEqual(type(frame_result), DataFrame) + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() + assert result2.dtype == np.float_ + + +class TestPairwise(object): + + # GH 7738 + df1s = [DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], + columns=['C', 'C']), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1., 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0., 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 1]), + DataFrame([[2., 4.], [1., 2.], [5., 2.], [8., 1.]], + columns=[1, 0.]), + DataFrame([[2, 4.], [1, 2.], [5, 2.], [8, 1.]], + columns=[0, 1.]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.]], + columns=[1., 'X']), ] + df2 = DataFrame([[None, 1, 1], [None, 1, 2], + [None, 3, 2], [None, 8, 1]], columns=['Y', 'Z', 'X']) + s = Series([1, 1, 3, 8]) + + def compare(self, result, expected): + + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize('f', [lambda x: x.cov(), lambda x: x.corr()]) + def test_no_flex(self, f): + + # DataFrame methods (which do not call _flex_binary_moment()) + + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.columns) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x: x.expanding().cov(pairwise=True), + lambda x: x.expanding().corr(pairwise=True), + lambda x: x.rolling(window=3).cov(pairwise=True), + lambda x: x.rolling(window=3).corr(pairwise=True), + lambda x: x.ewm(com=3).cov(pairwise=True), + lambda x: x.ewm(com=3).corr(pairwise=True)]) + def test_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=True + # note that we may construct the 1st level of the MI + # in a non-motononic way, so compare accordingly + results = [] + for i, df in enumerate(self.df1s): + result = f(df) + tm.assert_index_equal(result.index.levels[0], + df.index, + check_names=False) + tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), + safe_sort(df.columns.unique())) + tm.assert_index_equal(result.columns, df.columns) + results.append(df) + + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x: x.expanding().cov(pairwise=False), + lambda x: x.expanding().corr(pairwise=False), + lambda x: x.rolling(window=3).cov(pairwise=False), + lambda x: x.rolling(window=3).corr(pairwise=False), + lambda x: x.ewm(com=3).cov(pairwise=False), + lambda x: x.ewm(com=3).corr(pairwise=False), ]) + def test_no_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=False + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y, pairwise=True), + lambda x, y: x.expanding().corr(y, pairwise=True), + lambda x, y: x.rolling(window=3).cov(y, pairwise=True), + lambda x, y: x.rolling(window=3).corr(y, pairwise=True), + lambda x, y: x.ewm(com=3).cov(y, pairwise=True), + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]) + def test_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=True + results = [f(df, self.df2) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index.levels[0], + df.index, + check_names=False) + tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]), + safe_sort(self.df2.columns.unique())) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y, pairwise=False), + lambda x, y: x.expanding().corr(y, pairwise=False), + lambda x, y: x.rolling(window=3).cov(y, pairwise=False), + lambda x, y: x.rolling(window=3).corr(y, pairwise=False), + lambda x, y: x.ewm(com=3).cov(y, pairwise=False), + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]) + def test_no_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=False + results = [f(df, self.df2) if df.columns.is_unique else None + for df in self.df1s] + for (df, result) in zip(self.df1s, results): + if result is not None: + with catch_warnings(record=True): + # we can have int and str columns + expected_index = df.index.union(self.df2.index) + expected_columns = df.columns.union(self.df2.columns) + tm.assert_index_equal(result.index, expected_index) + tm.assert_index_equal(result.columns, expected_columns) + else: + tm.assert_raises_regex( + ValueError, "'arg1' columns are not unique", f, df, + self.df2) + tm.assert_raises_regex( + ValueError, "'arg2' columns are not unique", f, + self.df2, df) + + @pytest.mark.parametrize( + 'f', [lambda x, y: x.expanding().cov(y), + lambda x, y: x.expanding().corr(y), + lambda x, y: x.rolling(window=3).cov(y), + lambda x, y: x.rolling(window=3).corr(y), + lambda x, y: x.ewm(com=3).cov(y), + lambda x, y: x.ewm(com=3).corr(y), ]) + def test_pairwise_with_series(self, f): + + # DataFrame with a Series + results = ([f(df, self.s) for df in self.df1s] + + [f(self.s, df) for df in self.df1s]) + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) # create the data only once as we are not setting it @@ -1703,10 +1817,10 @@ def create_dataframes(): def is_constant(x): values = x.values.ravel() - return len(set(values[notnull(values)])) == 1 + return len(set(values[notna(values)])) == 1 def no_nans(x): - return x.notnull().all().all() + return x.notna().all().all() # data is a tuple(object, is_contant, no_nans) data = create_series() + create_dataframes() @@ -1717,6 +1831,15 @@ def no_nans(x): _consistency_data = _create_consistency_data() +def _rolling_consistency_cases(): + for window in [1, 2, 3, 10, 20]: + for min_periods in set([0, 1, 2, 3, 4, window]): + if min_periods and (min_periods > window): + continue + for center in [False, True]: + yield window, min_periods, center + + class TestMomentsConsistency(Base): base_functions = [ (lambda v: Series(v).count(), None, 'count'), @@ -1733,9 +1856,6 @@ class TestMomentsConsistency(Base): # lambda v: Series(v).skew(), 3, 'skew'), # (lambda v: Series(v).kurt(), 4, 'kurt'), - # (lambda x, min_periods: mom.expanding_quantile(x, 0.3, - # min_periods=min_periods, 'quantile'), - # restore once GH 8084 is fixed # lambda v: Series(v).quantile(0.3), None, 'quantile'), @@ -1743,15 +1863,11 @@ class TestMomentsConsistency(Base): (np.nanmax, 1, 'max'), (np.nanmin, 1, 'min'), (np.nansum, 1, 'sum'), + (np.nanmean, 1, 'mean'), + (lambda v: np.nanstd(v, ddof=1), 1, 'std'), + (lambda v: np.nanvar(v, ddof=1), 1, 'var'), + (np.nanmedian, 1, 'median'), ] - if np.__version__ >= LooseVersion('1.8.0'): - base_functions += [ - (np.nanmean, 1, 'mean'), - (lambda v: np.nanstd(v, ddof=1), 1, 'std'), - (lambda v: np.nanvar(v, ddof=1), 1, 'var'), - ] - if np.__version__ >= LooseVersion('1.9.0'): - base_functions += [(np.nanmedian, 1, 'median'), ] no_nan_functions = [ (np.max, None, 'max'), (np.min, None, 'min'), @@ -1766,7 +1882,7 @@ def _create_data(self): super(TestMomentsConsistency, self)._create_data() self.data = _consistency_data - def setUp(self): + def setup_method(self, method): self._create_data() def _test_moments_consistency(self, min_periods, count, mean, mock_mean, @@ -1776,7 +1892,7 @@ def _test_moments_consistency(self, min_periods, count, mean, mock_mean, var_debiasing_factors=None): def _non_null_values(x): values = x.values.ravel() - return set(values[notnull(values)].tolist()) + return set(values[notna(values)].tolist()) for (x, is_constant, no_nans) in self.data: count_x = count(x) @@ -1789,7 +1905,8 @@ def _non_null_values(x): # check that correlation of a series with itself is either 1 or NaN corr_x_x = corr(x, x) - # self.assertTrue(_non_null_values(corr_x_x).issubset(set([1.]))) # + + # assert _non_null_values(corr_x_x).issubset(set([1.])) # restore once rolling_cov(x, x) is identically equal to var(x) if is_constant: @@ -1819,11 +1936,11 @@ def _non_null_values(x): # check that var(x), std(x), and cov(x) are all >= 0 var_x = var(x) std_x = std(x) - self.assertFalse((var_x < 0).any().any()) - self.assertFalse((std_x < 0).any().any()) + assert not (var_x < 0).any().any() + assert not (std_x < 0).any().any() if cov: cov_x_x = cov(x, x) - self.assertFalse((cov_x_x < 0).any().any()) + assert not (cov_x_x < 0).any().any() # check that var(x) == cov(x, x) assert_equal(var_x, cov_x_x) @@ -1838,7 +1955,7 @@ def _non_null_values(x): if is_constant: # check that variance of constant series is identically 0 - self.assertFalse((var_x > 0).any().any()) + assert not (var_x > 0).any().any() expected = x * np.nan expected[count_x >= max(min_periods, 1)] = 0. if var is var_unbiased: @@ -1847,7 +1964,7 @@ def _non_null_values(x): if isinstance(x, Series): for (y, is_constant, no_nans) in self.data: - if not x.isnull().equals(y.isnull()): + if not x.isna().equals(y.isna()): # can only easily test two Series with similar # structure continue @@ -1883,8 +2000,12 @@ def _non_null_values(x): assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) - @tm.slow - def test_ewm_consistency(self): + @pytest.mark.slow + @pytest.mark.parametrize( + 'min_periods, adjust, ignore_na', product([0, 1, 2, 3, 4], + [True, False], + [False, True])) + def test_ewm_consistency(self, min_periods, adjust, ignore_na): def _weights(s, com, adjust, ignore_na): if isinstance(s, DataFrame): if not len(s.columns): @@ -1900,8 +2021,8 @@ def _weights(s, com, adjust, ignore_na): w = Series(np.nan, index=s.index) alpha = 1. / (1. + com) if ignore_na: - w[s.notnull()] = _weights(s[s.notnull()], com=com, - adjust=adjust, ignore_na=False) + w[s.notna()] = _weights(s[s.notna()], com=com, + adjust=adjust, ignore_na=False) elif adjust: for i in range(len(s)): if s.iat[i] == s.iat[i]: @@ -1938,52 +2059,51 @@ def _ewma(s, com, min_periods, adjust, ignore_na): return result com = 3. - for min_periods, adjust, ignore_na in product([0, 1, 2, 3, 4], - [True, False], - [False, True]): - # test consistency between different ewm* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).mean(), - mock_mean=lambda x: _ewma(x, com=com, - min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na), - corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).corr(y), - var_unbiased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).var(bias=False)), - std_unbiased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .std(bias=False)), - cov_unbiased=lambda x, y: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .cov(y, bias=False)), - var_biased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .var(bias=True)), - std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).std(bias=True), - cov_biased=lambda x, y: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .cov(y, bias=True)), - var_debiasing_factors=lambda x: ( - _variance_debiasing_factors(x, com=com, adjust=adjust, - ignore_na=ignore_na))) - - @tm.slow - def test_expanding_consistency(self): + # test consistency between different ewm* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).mean(), + mock_mean=lambda x: _ewma(x, com=com, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na), + corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).corr(y), + var_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).var(bias=False)), + std_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .std(bias=False)), + cov_unbiased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=False)), + var_biased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .var(bias=True)), + std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).std(bias=True), + cov_biased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=True)), + var_debiasing_factors=lambda x: ( + _variance_debiasing_factors(x, com=com, adjust=adjust, + ignore_na=ignore_na))) + + @pytest.mark.slow + @pytest.mark.parametrize( + 'min_periods', [0, 1, 2, 3, 4]) + def test_expanding_consistency(self, min_periods): # suppress warnings about empty slices, as we are deliberately testing # with empty/0-length Series/DataFrames @@ -1992,87 +2112,73 @@ def test_expanding_consistency(self): message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - for min_periods in [0, 1, 2, 3, 4]: - - # test consistency between different expanding_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.expanding( - min_periods=min_periods).mean(), - mock_mean=lambda x: x.expanding( - min_periods=min_periods).sum() / x.expanding().count(), - corr=lambda x, y: x.expanding( - min_periods=min_periods).corr(y), - var_unbiased=lambda x: x.expanding( - min_periods=min_periods).var(), - std_unbiased=lambda x: x.expanding( - min_periods=min_periods).std(), - cov_unbiased=lambda x, y: x.expanding( - min_periods=min_periods).cov(y), - var_biased=lambda x: x.expanding( - min_periods=min_periods).var(ddof=0), - std_biased=lambda x: x.expanding( - min_periods=min_periods).std(ddof=0), - cov_biased=lambda x, y: x.expanding( - min_periods=min_periods).cov(y, ddof=0), - var_debiasing_factors=lambda x: ( - x.expanding().count() / - (x.expanding().count() - 1.) - .replace(0., np.nan))) - - # test consistency between expanding_xyz() and either (a) - # expanding_apply of Series.xyz(), or (b) expanding_apply of - # np.nanxyz() - for (x, is_constant, no_nans) in self.data: - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - expanding_f = getattr( - x.expanding(min_periods=min_periods), name) - - if (require_min_periods and - (min_periods is not None) and - (min_periods < require_min_periods)): - continue - - if name == 'count': - expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding( - min_periods=0).apply(func=f) + # test consistency between different expanding_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.expanding( + min_periods=min_periods).mean(), + mock_mean=lambda x: x.expanding( + min_periods=min_periods).sum() / x.expanding().count(), + corr=lambda x, y: x.expanding( + min_periods=min_periods).corr(y), + var_unbiased=lambda x: x.expanding( + min_periods=min_periods).var(), + std_unbiased=lambda x: x.expanding( + min_periods=min_periods).std(), + cov_unbiased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y), + var_biased=lambda x: x.expanding( + min_periods=min_periods).var(ddof=0), + std_biased=lambda x: x.expanding( + min_periods=min_periods).std(ddof=0), + cov_biased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y, ddof=0), + var_debiasing_factors=lambda x: ( + x.expanding().count() / + (x.expanding().count() - 1.) + .replace(0., np.nan))) + + # test consistency between expanding_xyz() and either (a) + # expanding_apply of Series.xyz(), or (b) expanding_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + expanding_f = getattr( + x.expanding(min_periods=min_periods), name) + + if (require_min_periods and + (min_periods is not None) and + (min_periods < require_min_periods)): + continue + + if name == 'count': + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=0).apply(func=f) + else: + if name in ['cov', 'corr']: + expanding_f_result = expanding_f( + pairwise=False) else: - if name in ['cov', 'corr']: - expanding_f_result = expanding_f( - pairwise=False) - else: - expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding( - min_periods=min_periods).apply(func=f) - - if not tm._incompat_bottleneck_version(name): - assert_equal(expanding_f_result, - expanding_apply_f_result) - - if (name in ['cov', 'corr']) and isinstance(x, - DataFrame): - # test pairwise=True - expanding_f_result = expanding_f(x, pairwise=True) - expected = Panel(items=x.index, - major_axis=x.columns, - minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = getattr( - x.iloc[:, i].expanding( - min_periods=min_periods), - name)(x.iloc[:, j]) - tm.assert_panel_equal(expanding_f_result, expected) - - @tm.slow - def test_rolling_consistency(self): + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=min_periods).apply(func=f) + + # GH 9422 + if name in ['sum', 'prod']: + assert_equal(expanding_f_result, + expanding_apply_f_result) + + @pytest.mark.slow + @pytest.mark.parametrize( + 'window,min_periods,center', list(_rolling_consistency_cases())) + def test_rolling_consistency(self, window, min_periods, center): # suppress warnings about empty slices, as we are deliberately testing # with empty/0-length Series/DataFrames @@ -2081,119 +2187,93 @@ def test_rolling_consistency(self): message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - def cases(): - for window in [1, 2, 3, 10, 20]: - for min_periods in set([0, 1, 2, 3, 4, window]): - if min_periods and (min_periods > window): - continue - for center in [False, True]: - yield window, min_periods, center - - for window, min_periods, center in cases(): - # test consistency between different rolling_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: ( - x.rolling(window=window, center=center) - .count()), - mean=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).mean()), - mock_mean=lambda x: ( - x.rolling(window=window, - min_periods=min_periods, - center=center).sum() - .divide(x.rolling(window=window, - min_periods=min_periods, - center=center).count())), - corr=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).corr(y)), - - var_unbiased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).var()), - - std_unbiased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).std()), - - cov_unbiased=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).cov(y)), - - var_biased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).var(ddof=0)), - - std_biased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).std(ddof=0)), - - cov_biased=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).cov(y, ddof=0)), - var_debiasing_factors=lambda x: ( - x.rolling(window=window, center=center).count() - .divide((x.rolling(window=window, center=center) - .count() - 1.) - .replace(0., np.nan)))) - - # test consistency between rolling_xyz() and either (a) - # rolling_apply of Series.xyz(), or (b) rolling_apply of - # np.nanxyz() - for (x, is_constant, no_nans) in self.data: - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - rolling_f = getattr( - x.rolling(window=window, center=center, - min_periods=min_periods), name) - - if require_min_periods and ( - min_periods is not None) and ( - min_periods < require_min_periods): - continue + # test consistency between different rolling_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: ( + x.rolling(window=window, center=center) + .count()), + mean=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).mean()), + mock_mean=lambda x: ( + x.rolling(window=window, + min_periods=min_periods, + center=center).sum() + .divide(x.rolling(window=window, + min_periods=min_periods, + center=center).count())), + corr=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).corr(y)), - if name == 'count': - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling( - window=window, min_periods=0, - center=center).apply(func=f) + var_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var()), + + std_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std()), + + cov_unbiased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y)), + + var_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var(ddof=0)), + + std_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std(ddof=0)), + + cov_biased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y, ddof=0)), + var_debiasing_factors=lambda x: ( + x.rolling(window=window, center=center).count() + .divide((x.rolling(window=window, center=center) + .count() - 1.) + .replace(0., np.nan)))) + + # test consistency between rolling_xyz() and either (a) + # rolling_apply of Series.xyz(), or (b) rolling_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr( + x.rolling(window=window, center=center, + min_periods=min_periods), name) + + if require_min_periods and ( + min_periods is not None) and ( + min_periods < require_min_periods): + continue + + if name == 'count': + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=0, + center=center).apply(func=f) + else: + if name in ['cov', 'corr']: + rolling_f_result = rolling_f( + pairwise=False) else: - if name in ['cov', 'corr']: - rolling_f_result = rolling_f( - pairwise=False) - else: - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling( - window=window, min_periods=min_periods, - center=center).apply(func=f) - if not tm._incompat_bottleneck_version(name): - assert_equal(rolling_f_result, - rolling_apply_f_result) - - if (name in ['cov', 'corr']) and isinstance( - x, DataFrame): - # test pairwise=True - rolling_f_result = rolling_f(x, - pairwise=True) - expected = Panel(items=x.index, - major_axis=x.columns, - minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = ( - getattr( - x.iloc[:, i] - .rolling(window=window, - min_periods=min_periods, - center=center), - name)(x.iloc[:, j])) - tm.assert_panel_equal(rolling_f_result, expected) + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=min_periods, + center=center).apply(func=f) + + # GH 9422 + if name in ['sum', 'prod']: + assert_equal(rolling_f_result, + rolling_apply_f_result) # binary moments def test_rolling_cov(self): @@ -2226,20 +2306,28 @@ def test_rolling_corr_pairwise(self): self._check_pairwise_moment('rolling', 'corr', window=10, min_periods=5) + @pytest.mark.parametrize('window', range(7)) + def test_rolling_corr_with_zero_variance(self, window): + # GH 18430 + s = pd.Series(np.zeros(20)) + other = pd.Series(np.arange(20)) + + assert s.rolling(window=window).corr(other=other).isna().all() + def _check_pairwise_moment(self, dispatch, name, **kwargs): def get_result(obj, obj2=None): return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2) - panel = get_result(self.frame) - actual = panel.loc[:, 1, 5] + result = get_result(self.frame) + result = result.loc[(slice(None), 1), 5] + result.index = result.index.droplevel(1) expected = get_result(self.frame[1], self.frame[5]) - tm.assert_series_equal(actual, expected, check_names=False) - self.assertEqual(actual.name, 5) + tm.assert_series_equal(result, expected, check_names=False) def test_flex_binary_moment(self): # GH3155 # don't blow the stack - self.assertRaises(TypeError, rwindow._flex_binary_moment, 5, 6, None) + pytest.raises(TypeError, rwindow._flex_binary_moment, 5, 6, None) def test_corr_sanity(self): # GH 3155 @@ -2249,16 +2337,15 @@ def test_corr_sanity(self): [0.84780328, 0.33394331], [0.78369152, 0.63919667]])) res = df[0].rolling(5, center=True).corr(df[1]) - self.assertTrue(all([np.abs(np.nan_to_num(x)) <= 1 for x in res])) + assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) # and some fuzzing - for i in range(10): + for _ in range(10): df = DataFrame(np.random.rand(30, 2)) res = df[0].rolling(5, center=True).corr(df[1]) try: - self.assertTrue(all([np.abs(np.nan_to_num(x)) <= 1 for x in res - ])) - except: + assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) + except AssertionError: print(res) def test_flex_binary_frame(self): @@ -2308,16 +2395,16 @@ def func(A, B, com, **kwargs): B[-10:] = np.NaN result = func(A, B, 20, min_periods=5) - self.assertTrue(np.isnan(result.values[:14]).all()) - self.assertFalse(np.isnan(result.values[14:]).any()) + assert np.isnan(result.values[:14]).all() + assert not np.isnan(result.values[14:]).any() # GH 7898 for min_periods in (0, 1, 2): result = func(A, B, 20, min_periods=min_periods) # binary functions (ewmcov, ewmcorr) with bias=False require at # least two values - self.assertTrue(np.isnan(result.values[:11]).all()) - self.assertFalse(np.isnan(result.values[11:]).any()) + assert np.isnan(result.values[:11]).all() + assert not np.isnan(result.values[11:]).any() # check series of length 0 result = func(Series([]), Series([]), 50, min_periods=min_periods) @@ -2328,23 +2415,7 @@ def func(A, B, com, **kwargs): Series([1.]), Series([1.]), 50, min_periods=min_periods) tm.assert_series_equal(result, Series([np.NaN])) - self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5) - - def test_expanding_apply(self): - ser = Series([]) - tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) - - def expanding_mean(x, min_periods=1, freq=None): - return mom.expanding_apply(x, lambda x: x.mean(), - min_periods=min_periods, freq=freq) - - self._check_expanding(expanding_mean, np.mean) - - # GH 8080 - s = Series([None, None, None]) - result = s.expanding(min_periods=0).apply(lambda x: len(x)) - expected = Series([1., 2., 3.]) - tm.assert_series_equal(result, expected) + pytest.raises(Exception, func, A, randn(50), 20, min_periods=5) def test_expanding_apply_args_kwargs(self): def mean_w_arg(x, const): @@ -2393,26 +2464,20 @@ def test_expanding_cov(self): tm.assert_almost_equal(rolling_result, result) - def test_expanding_max(self): - self._check_expanding(mom.expanding_max, np.max, preserve_nan=False) - def test_expanding_cov_pairwise(self): result = self.frame.expanding().corr() rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() - for i in result.items: - tm.assert_almost_equal(result[i], rolling_result[i]) + tm.assert_frame_equal(result, rolling_result) def test_expanding_corr_pairwise(self): result = self.frame.expanding().corr() rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() - - for i in result.items: - tm.assert_almost_equal(result[i], rolling_result[i]) + tm.assert_frame_equal(result, rolling_result) def test_expanding_cov_diff_index(self): # GH 7512 @@ -2480,8 +2545,6 @@ def test_rolling_functions_window_non_shrinkage(self): s_expected = Series(np.nan, index=s.index) df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) - df_expected_panel = Panel(items=df.index, major_axis=df.columns, - minor_axis=df.columns) functions = [lambda x: (x.rolling(window=10, min_periods=5) .cov(x, pairwise=False)), @@ -2513,13 +2576,24 @@ def test_rolling_functions_window_non_shrinkage(self): # scipy needed for rolling_window continue + def test_rolling_functions_window_non_shrinkage_binary(self): + + # corr/cov return a MI DataFrame + df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], + columns=Index(['A', 'B'], name='foo'), + index=Index(range(4), name='bar')) + df_expected = DataFrame( + columns=Index(['A', 'B'], name='foo'), + index=pd.MultiIndex.from_product([df.index, df.columns], + names=['bar', 'foo']), + dtype='float64') functions = [lambda x: (x.rolling(window=10, min_periods=5) .cov(x, pairwise=True)), lambda x: (x.rolling(window=10, min_periods=5) .corr(x, pairwise=True))] for f in functions: - df_result_panel = f(df) - tm.assert_panel_equal(df_result_panel, df_expected_panel) + df_result = f(df) + tm.assert_frame_equal(df_result, df_expected) def test_moment_functions_zero_length(self): # GH 8056 @@ -2527,13 +2601,9 @@ def test_moment_functions_zero_length(self): s_expected = s df1 = DataFrame() df1_expected = df1 - df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, - minor_axis=df1.columns) df2 = DataFrame(columns=['a']) df2['a'] = df2['a'].astype('float64') df2_expected = df2 - df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, - minor_axis=df2.columns) functions = [lambda x: x.expanding().count(), lambda x: x.expanding(min_periods=5).cov( @@ -2586,6 +2656,23 @@ def test_moment_functions_zero_length(self): # scipy needed for rolling_window continue + def test_moment_functions_zero_length_pairwise(self): + + df1 = DataFrame() + df1_expected = df1 + df2 = DataFrame(columns=Index(['a'], name='foo'), + index=Index([], name='bar')) + df2['a'] = df2['a'].astype('float64') + + df1_expected = DataFrame( + index=pd.MultiIndex.from_product([df1.index, df1.columns]), + columns=Index([])) + df2_expected = DataFrame( + index=pd.MultiIndex.from_product([df2.index, df2.columns], + names=['bar', 'foo']), + columns=Index(['a'], name='foo'), + dtype='float64') + functions = [lambda x: (x.expanding(min_periods=5) .cov(x, pairwise=True)), lambda x: (x.expanding(min_periods=5) @@ -2596,24 +2683,33 @@ def test_moment_functions_zero_length(self): .corr(x, pairwise=True)), ] for f in functions: - df1_result_panel = f(df1) - tm.assert_panel_equal(df1_result_panel, df1_expected_panel) + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) - df2_result_panel = f(df2) - tm.assert_panel_equal(df2_result_panel, df2_expected_panel) + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) def test_expanding_cov_pairwise_diff_length(self): # GH 7512 - df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=['A', 'B']) - df1a = DataFrame([[1, 5], [3, 9]], index=[0, 2], columns=['A', 'B']) - df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y']) - df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y']) - result1 = df1.expanding().cov(df2a, pairwise=True)[2] - result2 = df1.expanding().cov(df2a, pairwise=True)[2] - result3 = df1a.expanding().cov(df2, pairwise=True)[2] - result4 = df1a.expanding().cov(df2a, pairwise=True)[2] - expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A', 'B'], - columns=['X', 'Y']) + df1 = DataFrame([[1, 5], [3, 2], [3, 9]], + columns=Index(['A', 'B'], name='foo')) + df1a = DataFrame([[1, 5], [3, 9]], + index=[0, 2], + columns=Index(['A', 'B'], name='foo')) + df2 = DataFrame([[5, 6], [None, None], [2, 1]], + columns=Index(['X', 'Y'], name='foo')) + df2a = DataFrame([[5, 6], [2, 1]], + index=[0, 2], + columns=Index(['X', 'Y'], name='foo')) + # TODO: xref gh-15826 + # .loc is not preserving the names + result1 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] + result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] + expected = DataFrame([[-3.0, -6.0], [-5.0, -10.0]], + columns=Index(['A', 'B'], name='foo'), + index=Index(['X', 'Y'], name='foo')) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) tm.assert_frame_equal(result3, expected) @@ -2621,149 +2717,30 @@ def test_expanding_cov_pairwise_diff_length(self): def test_expanding_corr_pairwise_diff_length(self): # GH 7512 - df1 = DataFrame([[1, 2], [3, 2], [3, 4]], columns=['A', 'B']) - df1a = DataFrame([[1, 2], [3, 4]], index=[0, 2], columns=['A', 'B']) - df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y']) - df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y']) - result1 = df1.expanding().corr(df2, pairwise=True)[2] - result2 = df1.expanding().corr(df2a, pairwise=True)[2] - result3 = df1a.expanding().corr(df2, pairwise=True)[2] - result4 = df1a.expanding().corr(df2a, pairwise=True)[2] - expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A', 'B'], - columns=['X', 'Y']) + df1 = DataFrame([[1, 2], [3, 2], [3, 4]], + columns=['A', 'B'], + index=Index(range(3), name='bar')) + df1a = DataFrame([[1, 2], [3, 4]], + index=Index([0, 2], name='bar'), + columns=['A', 'B']) + df2 = DataFrame([[5, 6], [None, None], [2, 1]], + columns=['X', 'Y'], + index=Index(range(3), name='bar')) + df2a = DataFrame([[5, 6], [2, 1]], + index=Index([0, 2], name='bar'), + columns=['X', 'Y']) + result1 = df1.expanding().corr(df2, pairwise=True).loc[2] + result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] + result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] + expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], + columns=['A', 'B'], + index=Index(['X', 'Y'])) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) tm.assert_frame_equal(result3, expected) tm.assert_frame_equal(result4, expected) - def test_pairwise_stats_column_names_order(self): - # GH 7738 - df1s = [DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], - columns=['C', 'C']), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1., 0]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0., 1]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 1]), - DataFrame([[2., 4.], [1., 2.], [5., 2.], [8., 1.]], - columns=[1, 0.]), - DataFrame([[2, 4.], [1, 2.], [5, 2.], [8, 1.]], - columns=[0, 1.]), - DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.]], - columns=[1., 'X']), ] - df2 = DataFrame([[None, 1, 1], [None, 1, 2], - [None, 3, 2], [None, 8, 1]], columns=['Y', 'Z', 'X']) - s = Series([1, 1, 3, 8]) - - # suppress warnings about incomparable objects, as we are deliberately - # testing with such column labels - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message=".*incomparable objects.*", - category=RuntimeWarning) - - # DataFrame methods (which do not call _flex_binary_moment()) - for f in [lambda x: x.cov(), lambda x: x.corr(), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.columns) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - # compare internal values, as columns can be different - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with itself, pairwise=True - for f in [lambda x: x.expanding().cov(pairwise=True), - lambda x: x.expanding().corr(pairwise=True), - lambda x: x.rolling(window=3).cov(pairwise=True), - lambda x: x.rolling(window=3).corr(pairwise=True), - lambda x: x.ewm(com=3).cov(pairwise=True), - lambda x: x.ewm(com=3).corr(pairwise=True), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.items, df.index) - tm.assert_index_equal(result.major_axis, df.columns) - tm.assert_index_equal(result.minor_axis, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with itself, pairwise=False - for f in [lambda x: x.expanding().cov(pairwise=False), - lambda x: x.expanding().corr(pairwise=False), - lambda x: x.rolling(window=3).cov(pairwise=False), - lambda x: x.rolling(window=3).corr(pairwise=False), - lambda x: x.ewm(com=3).cov(pairwise=False), - lambda x: x.ewm(com=3).corr(pairwise=False), ]: - results = [f(df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.index) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with another DataFrame, pairwise=True - for f in [lambda x, y: x.expanding().cov(y, pairwise=True), - lambda x, y: x.expanding().corr(y, pairwise=True), - lambda x, y: x.rolling(window=3).cov(y, pairwise=True), - lambda x, y: x.rolling(window=3).corr(y, pairwise=True), - lambda x, y: x.ewm(com=3).cov(y, pairwise=True), - lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]: - results = [f(df, df2) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.items, df.index) - tm.assert_index_equal(result.major_axis, df.columns) - tm.assert_index_equal(result.minor_axis, df2.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - - # DataFrame with another DataFrame, pairwise=False - for f in [lambda x, y: x.expanding().cov(y, pairwise=False), - lambda x, y: x.expanding().corr(y, pairwise=False), - lambda x, y: x.rolling(window=3).cov(y, pairwise=False), - lambda x, y: x.rolling(window=3).corr(y, pairwise=False), - lambda x, y: x.ewm(com=3).cov(y, pairwise=False), - lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]: - results = [f(df, df2) if df.columns.is_unique else None - for df in df1s] - for (df, result) in zip(df1s, results): - if result is not None: - expected_index = df.index.union(df2.index) - expected_columns = df.columns.union(df2.columns) - tm.assert_index_equal(result.index, expected_index) - tm.assert_index_equal(result.columns, expected_columns) - else: - tm.assertRaisesRegexp( - ValueError, "'arg1' columns are not unique", f, df, - df2) - tm.assertRaisesRegexp( - ValueError, "'arg2' columns are not unique", f, - df2, df) - - # DataFrame with a Series - for f in [lambda x, y: x.expanding().cov(y), - lambda x, y: x.expanding().corr(y), - lambda x, y: x.rolling(window=3).cov(y), - lambda x, y: x.rolling(window=3).corr(y), - lambda x, y: x.ewm(com=3).cov(y), - lambda x, y: x.ewm(com=3).corr(y), ]: - results = [f(df, s) for df in df1s] + [f(s, df) for df in df1s] - for (df, result) in zip(df1s, results): - tm.assert_index_equal(result.index, df.index) - tm.assert_index_equal(result.columns, df.columns) - for i, result in enumerate(results): - if i > 0: - self.assert_numpy_array_equal(result.values, - results[0].values) - def test_rolling_skew_edge_cases(self): all_nan = Series([np.NaN] * 5) @@ -2806,55 +2783,83 @@ def test_rolling_kurt_edge_cases(self): x = d.rolling(window=4).kurt() tm.assert_series_equal(expected, x) - def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, - has_time_rule=True, preserve_nan=True): - result = func(self.arr) + def test_rolling_skew_eq_value_fperr(self): + # #18804 all rolling skew for all equal values should return Nan + a = Series([1.1] * 15).rolling(window=10).skew() + assert np.isnan(a).all() + + def test_rolling_kurt_eq_value_fperr(self): + # #18804 all rolling kurt for all equal values should return Nan + a = Series([1.1] * 15).rolling(window=10).kurt() + assert np.isnan(a).all() + + @pytest.mark.parametrize('func,static_comp', [('sum', np.sum), + ('mean', np.mean), + ('max', np.max), + ('min', np.min)], + ids=['sum', 'mean', 'max', 'min']) + def test_expanding_func(self, func, static_comp): + def expanding_func(x, min_periods=1, center=False, axis=0): + exp = x.expanding(min_periods=min_periods, + center=center, axis=axis) + return getattr(exp, func)() + self._check_expanding(expanding_func, static_comp, preserve_nan=False) + + def test_expanding_apply(self): + + def expanding_mean(x, min_periods=1): + exp = x.expanding(min_periods=min_periods) + return exp.apply(lambda x: x.mean()) + + self._check_expanding(expanding_mean, np.mean) + + ser = Series([]) + tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) + + # GH 8080 + s = Series([None, None, None]) + result = s.expanding(min_periods=0).apply(lambda x: len(x)) + expected = Series([1., 2., 3.]) + tm.assert_series_equal(result, expected) + + def _check_expanding(self, func, static_comp, has_min_periods=True, + has_time_rule=True, preserve_nan=True): + + series_result = func(self.series) + assert isinstance(series_result, Series) + frame_result = func(self.frame) + assert isinstance(frame_result, DataFrame) - tm.assert_almost_equal(result[10], static_comp(self.arr[:11])) + result = func(self.series) + tm.assert_almost_equal(result[10], static_comp(self.series[:11])) if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) + assert result.iloc[self._nan_locs].isna().all() - arr = randn(50) + ser = Series(randn(50)) if has_min_periods: - result = func(arr, min_periods=30) - assert (np.isnan(result[:29]).all()) - tm.assert_almost_equal(result[-1], static_comp(arr[:50])) + result = func(ser, min_periods=30) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) # min_periods is working correctly - result = func(arr, min_periods=15) - self.assertTrue(np.isnan(result[13])) - self.assertFalse(np.isnan(result[14])) + result = func(ser, min_periods=15) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) - arr2 = randn(20) - result = func(arr2, min_periods=5) - self.assertTrue(isnull(result[3])) - self.assertTrue(notnull(result[4])) + ser2 = Series(randn(20)) + result = func(ser2, min_periods=5) + assert isna(result[3]) + assert notna(result[4]) # min_periods=0 - result0 = func(arr, min_periods=0) - result1 = func(arr, min_periods=1) + result0 = func(ser, min_periods=0) + result1 = func(ser, min_periods=1) tm.assert_almost_equal(result0, result1) else: - result = func(arr) - tm.assert_almost_equal(result[-1], static_comp(arr[:50])) - - def _check_expanding_structures(self, func): - series_result = func(self.series) - tm.assertIsInstance(series_result, Series) - frame_result = func(self.frame) - self.assertEqual(type(frame_result), DataFrame) - - def _check_expanding(self, func, static_comp, has_min_periods=True, - has_time_rule=True, preserve_nan=True): - with warnings.catch_warnings(record=True): - self._check_expanding_ndarray(func, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - preserve_nan=preserve_nan) - with warnings.catch_warnings(record=True): - self._check_expanding_structures(func) + result = func(ser) + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" @@ -2870,11 +2875,10 @@ def test_rolling_max_gh6297(self): expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - x = series.rolling(window=1, freq='D').max() + x = series.resample('D').max().rolling(window=1).max() tm.assert_series_equal(expected, x) - def test_rolling_max_how_resample(self): + def test_rolling_max_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) @@ -2889,26 +2893,23 @@ def test_rolling_max_how_resample(self): # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - x = series.rolling(window=1, freq='D').max() + x = series.resample('D').max().rolling(window=1).max() tm.assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - x = series.rolling(window=1, freq='D').max(how='median') + x = series.resample('D').median().rolling(window=1).max() tm.assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 v = (4.0 + 10.0 + 20.0) / 3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - x = series.rolling(window=1, freq='D').max(how='mean') - tm.assert_series_equal(expected, x) + x = series.resample('D').mean().rolling(window=1).max() + tm.assert_series_equal(expected, x) - def test_rolling_min_how_resample(self): + def test_rolling_min_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) @@ -2923,11 +2924,10 @@ def test_rolling_min_how_resample(self): # Default how should be min expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - r = series.rolling(window=1, freq='D') - tm.assert_series_equal(expected, r.min()) + r = series.resample('D').min().rolling(window=1) + tm.assert_series_equal(expected, r.min()) - def test_rolling_median_how_resample(self): + def test_rolling_median_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) @@ -2942,9 +2942,8 @@ def test_rolling_median_how_resample(self): # Default how should be median expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - x = series.rolling(window=1, freq='D').median() - tm.assert_series_equal(expected, x) + x = series.resample('D').median().rolling(window=1).median() + tm.assert_series_equal(expected, x) def test_rolling_median_memory_error(self): # GH11722 @@ -2964,15 +2963,15 @@ def test_rolling_min_max_numeric_types(self): # correctness result = (DataFrame(np.arange(20, dtype=data_type)) .rolling(window=5).max()) - self.assertEqual(result.dtypes[0], np.dtype("f8")) + assert result.dtypes[0] == np.dtype("f8") result = (DataFrame(np.arange(20, dtype=data_type)) .rolling(window=5).min()) - self.assertEqual(result.dtypes[0], np.dtype("f8")) + assert result.dtypes[0] == np.dtype("f8") -class TestGrouperGrouping(tm.TestCase): +class TestGrouperGrouping(object): - def setUp(self): + def setup_method(self, method): self.series = Series(np.arange(10)) self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, 'B': np.arange(40)}) @@ -2981,12 +2980,12 @@ def test_mutated(self): def f(): self.frame.groupby('A', foo=1) - self.assertRaises(TypeError, f) + pytest.raises(TypeError, f) g = self.frame.groupby('A') - self.assertFalse(g.mutated) + assert not g.mutated g = self.frame.groupby('A', mutated=True) - self.assertTrue(g.mutated) + assert g.mutated def test_getitem(self): g = self.frame.groupby('A') @@ -3115,12 +3114,12 @@ def test_expanding_apply(self): tm.assert_frame_equal(result, expected) -class TestRollingTS(tm.TestCase): +class TestRollingTS(object): # rolling time-series friendly # xref GH13327 - def setUp(self): + def setup_method(self, method): self.regular = DataFrame({'A': pd.date_range('20130101', periods=5, @@ -3150,16 +3149,16 @@ def test_valid(self): df = self.regular # not a valid freq - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window='foobar') # not a datetimelike index - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.reset_index().rolling(window='foobar') # non-fixed freqs for freq in ['2MS', pd.offsets.MonthBegin(2)]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window=freq) for freq in ['1D', pd.offsets.Day(2), '2ms']: @@ -3167,11 +3166,11 @@ def test_valid(self): # non-integer min_periods for minp in [1.0, 'foo', np.array([1, 2, 3])]: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window='1D', min_periods=minp) # center is not implemented - with self.assertRaises(NotImplementedError): + with pytest.raises(NotImplementedError): df.rolling(window='1D', center=True) def test_on(self): @@ -3179,7 +3178,7 @@ def test_on(self): df = self.regular # not a valid column - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window='2s', on='foobar') # column is valid @@ -3188,7 +3187,7 @@ def test_on(self): df.rolling(window='2d', on='C').sum() # invalid columns - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling(window='2d', on='B') # ok even though on non-selected @@ -3202,22 +3201,22 @@ def test_monotonic_on(self): freq='s'), 'B': range(5)}) - self.assertTrue(df.A.is_monotonic) + assert df.A.is_monotonic df.rolling('2s', on='A').sum() df = df.set_index('A') - self.assertTrue(df.index.is_monotonic) + assert df.index.is_monotonic df.rolling('2s').sum() # non-monotonic df.index = reversed(df.index.tolist()) - self.assertFalse(df.index.is_monotonic) + assert not df.index.is_monotonic - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling('2s').sum() df = df.reset_index() - with self.assertRaises(ValueError): + with pytest.raises(ValueError): df.rolling('2s', on='A').sum() def test_frame_on(self): @@ -3249,7 +3248,7 @@ def test_frame_on(self): # test as a frame # we should be ignoring the 'on' as an aggregation column - # note that the expected is setting, computing, and reseting + # note that the expected is setting, computing, and resetting # so the columns need to be switched compared # to the actual result where they are ordered as in the # original @@ -3269,11 +3268,11 @@ def test_frame_on2(self): # using multiple aggregation columns df = DataFrame({'A': [0, 1, 2, 3, 4], 'B': [0, 1, 2, np.nan, 4], - 'C': pd.Index([pd.Timestamp('20130101 09:00:00'), - pd.Timestamp('20130101 09:00:02'), - pd.Timestamp('20130101 09:00:03'), - pd.Timestamp('20130101 09:00:05'), - pd.Timestamp('20130101 09:00:06')])}, + 'C': Index([Timestamp('20130101 09:00:00'), + Timestamp('20130101 09:00:02'), + Timestamp('20130101 09:00:03'), + Timestamp('20130101 09:00:05'), + Timestamp('20130101 09:00:06')])}, columns=['A', 'C', 'B']) expected1 = DataFrame({'A': [0., 1, 3, 3, 7], @@ -3329,6 +3328,45 @@ def test_min_periods(self): result = df.rolling('2s', min_periods=1).sum() tm.assert_frame_equal(result, expected) + def test_closed(self): + + # xref GH13965 + + df = DataFrame({'A': [1] * 5}, + index=[Timestamp('20130101 09:00:01'), + Timestamp('20130101 09:00:02'), + Timestamp('20130101 09:00:03'), + Timestamp('20130101 09:00:04'), + Timestamp('20130101 09:00:06')]) + + # closed must be 'right', 'left', 'both', 'neither' + with pytest.raises(ValueError): + self.regular.rolling(window='2s', closed="blabla") + + expected = df.copy() + expected["A"] = [1.0, 2, 2, 2, 1] + result = df.rolling('2s', closed='right').sum() + tm.assert_frame_equal(result, expected) + + # default should be 'right' + result = df.rolling('2s').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [1.0, 2, 3, 3, 2] + result = df.rolling('2s', closed='both').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 2, 2, 1] + result = df.rolling('2s', closed='left').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 1, 1, np.nan] + result = df.rolling('2s', closed='neither').sum() + tm.assert_frame_equal(result, expected) + def test_ragged_sum(self): df = self.ragged @@ -3408,7 +3446,7 @@ def test_ragged_quantile(self): result = df.rolling(window='2s', min_periods=1).quantile(0.5) expected = df.copy() - expected['B'] = [0.0, 1, 1.0, 3.0, 3.0] + expected['B'] = [0.0, 1, 1.5, 3.0, 3.5] tm.assert_frame_equal(result, expected) def test_ragged_std(self): @@ -3561,11 +3599,11 @@ def test_perf_min(self): freq='s')) expected = dfp.rolling(2, min_periods=1).min() result = dfp.rolling('2s').min() - self.assertTrue(((result - expected) < 0.01).all().bool()) + assert ((result - expected) < 0.01).all().bool() expected = dfp.rolling(200, min_periods=1).min() result = dfp.rolling('200s').min() - self.assertTrue(((result - expected) < 0.01).all().bool()) + assert ((result - expected) < 0.01).all().bool() def test_ragged_max(self): @@ -3608,7 +3646,7 @@ def test_ragged_apply(self): def test_all(self): - # simple comparision of integer vs time-based windowing + # simple comparison of integer vs time-based windowing df = self.regular * 2 er = df.rolling(window=1) r = df.rolling(window='1s') @@ -3630,7 +3668,7 @@ def test_all(self): def test_all2(self): - # more sophisticated comparision of integer vs. + # more sophisticated comparison of integer vs. # time-based windowing df = DataFrame({'B': np.arange(50)}, index=pd.date_range('20130101', @@ -3670,10 +3708,48 @@ def test_groupby_monotonic(self): ['Ryan', '3/31/2016', 50], ['Joe', '7/1/2015', 100], ['Joe', '9/9/2015', 500], ['Joe', '10/15/2015', 50]] - df = pd.DataFrame(data=data, columns=['name', 'date', 'amount']) + df = DataFrame(data=data, columns=['name', 'date', 'amount']) df['date'] = pd.to_datetime(df['date']) expected = df.set_index('date').groupby('name').apply( lambda x: x.rolling('180D')['amount'].sum()) result = df.groupby('name').rolling('180D', on='date')['amount'].sum() tm.assert_series_equal(result, expected) + + def test_non_monotonic(self): + # GH 13966 (similar to #15130, closed by #15175) + + dates = pd.date_range(start='2016-01-01 09:30:00', + periods=20, freq='s') + df = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, + 'B': np.concatenate((dates, dates)), + 'C': np.arange(40)}) + + result = df.groupby('A').rolling('4s', on='B').C.mean() + expected = df.set_index('B').groupby('A').apply( + lambda x: x.rolling('4s')['C'].mean()) + tm.assert_series_equal(result, expected) + + df2 = df.sort_values('B') + result = df2.groupby('A').rolling('4s', on='B').C.mean() + tm.assert_series_equal(result, expected) + + def test_rolling_cov_offset(self): + # GH16058 + + idx = pd.date_range('2017-01-01', periods=24, freq='1h') + ss = Series(np.arange(len(idx)), index=idx) + + result = ss.rolling('2h').cov() + expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(2, min_periods=1).cov() + tm.assert_series_equal(result, expected2) + + result = ss.rolling('3h').cov() + expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(3, min_periods=1).cov() + tm.assert_series_equal(result, expected2) diff --git a/pandas/tests/tools/__init__.py b/pandas/tests/tools/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tools/tests/test_util.py b/pandas/tests/tools/test_numeric.py similarity index 71% rename from pandas/tools/tests/test_util.py rename to pandas/tests/tools/test_numeric.py index 2672db13a959f..b306dba0be7f1 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tests/tools/test_numeric.py @@ -1,129 +1,30 @@ -import os -import locale -import codecs import pytest import decimal import numpy as np +import pandas as pd +from pandas import to_numeric + +from pandas.util import testing as tm from numpy import iinfo -import pandas as pd -from pandas import (date_range, Index, _np_version_under1p9) -import pandas.util.testing as tm -from pandas.tools.util import cartesian_product, to_numeric - -CURRENT_LOCALE = locale.getlocale() -LOCALE_OVERRIDE = os.environ.get('LOCALE_OVERRIDE', None) - - -class TestCartesianProduct(tm.TestCase): - - def test_simple(self): - x, y = list('ABC'), [1, 22] - result1, result2 = cartesian_product([x, y]) - expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C']) - expected2 = np.array([1, 22, 1, 22, 1, 22]) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) - - def test_datetimeindex(self): - # regression test for GitHub issue #6439 - # make sure that the ordering on datetimeindex is consistent - x = date_range('2000-01-01', periods=2) - result1, result2 = [Index(y).day for y in cartesian_product([x, x])] - expected1 = np.array([1, 1, 2, 2], dtype=np.int32) - expected2 = np.array([1, 2, 1, 2], dtype=np.int32) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) + +class TestToNumeric(object): def test_empty(self): - # product of empty factors - X = [[], [0, 1], []] - Y = [[], [], ['a', 'b', 'c']] - for x, y in zip(X, Y): - expected1 = np.array([], dtype=np.asarray(x).dtype) - expected2 = np.array([], dtype=np.asarray(y).dtype) - result1, result2 = cartesian_product([x, y]) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) - - # empty product (empty input): - result = cartesian_product([]) - expected = [] - tm.assert_equal(result, expected) - - def test_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] - msg = "Input must be a list-like of list-likes" - for X in invalid_inputs: - tm.assertRaisesRegexp(TypeError, msg, cartesian_product, X=X) - - -class TestLocaleUtils(tm.TestCase): - - @classmethod - def setUpClass(cls): - super(TestLocaleUtils, cls).setUpClass() - cls.locales = tm.get_locales() - - if not cls.locales: - pytest.skip("No locales found") - - tm._skip_if_windows() - - @classmethod - def tearDownClass(cls): - super(TestLocaleUtils, cls).tearDownClass() - del cls.locales - - def test_get_locales(self): - # all systems should have at least a single locale - assert len(tm.get_locales()) > 0 - - def test_get_locales_prefix(self): - if len(self.locales) == 1: - pytest.skip("Only a single locale found, no point in " - "trying to test filtering locale prefixes") - first_locale = self.locales[0] - assert len(tm.get_locales(prefix=first_locale[:2])) > 0 - - def test_set_locale(self): - if len(self.locales) == 1: - pytest.skip("Only a single locale found, no point in " - "trying to test setting another locale") - - if all(x is None for x in CURRENT_LOCALE): - # Not sure why, but on some travis runs with pytest, - # getlocale() returned (None, None). - pytest.skip("CURRENT_LOCALE is not set.") - - if LOCALE_OVERRIDE is None: - lang, enc = 'it_CH', 'UTF-8' - elif LOCALE_OVERRIDE == 'C': - lang, enc = 'en_US', 'ascii' - else: - lang, enc = LOCALE_OVERRIDE.split('.') - - enc = codecs.lookup(enc).name - new_locale = lang, enc - - if not tm._can_set_locale(new_locale): - with tm.assertRaises(locale.Error): - with tm.set_locale(new_locale): - pass - else: - with tm.set_locale(new_locale) as normalized_locale: - new_lang, new_enc = normalized_locale.split('.') - new_enc = codecs.lookup(enc).name - normalized_locale = new_lang, new_enc - self.assertEqual(normalized_locale, new_locale) - - current_locale = locale.getlocale() - self.assertEqual(current_locale, CURRENT_LOCALE) - - -class TestToNumeric(tm.TestCase): + # see gh-16302 + s = pd.Series([], dtype=object) + + res = to_numeric(s) + expected = pd.Series([], dtype=np.int64) + + tm.assert_series_equal(res, expected) + + # Original issue example + res = to_numeric(s, errors='coerce', downcast='integer') + expected = pd.Series([], dtype=np.int8) + + tm.assert_series_equal(res, expected) def test_series(self): s = pd.Series(['1', '-3.14', '7']) @@ -153,7 +54,7 @@ def test_series_numeric(self): def test_error(self): s = pd.Series([1, -3.14, 'apple']) msg = 'Unable to parse string "apple" at position 2' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') @@ -166,13 +67,13 @@ def test_error(self): s = pd.Series(['orange', 1, -3.14, 'apple']) msg = 'Unable to parse string "orange" at position 0' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors='raise') def test_error_seen_bool(self): s = pd.Series([True, False, 'apple']) msg = 'Unable to parse string "apple" at position 2' - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') @@ -263,24 +164,24 @@ def test_all_nan(self): def test_type_check(self): # GH 11776 df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']}) - with tm.assertRaisesRegexp(TypeError, "1-d array"): + with tm.assert_raises_regex(TypeError, "1-d array"): to_numeric(df) for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): + with tm.assert_raises_regex(TypeError, "1-d array"): to_numeric(df, errors=errors) def test_scalar(self): - self.assertEqual(pd.to_numeric(1), 1) - self.assertEqual(pd.to_numeric(1.1), 1.1) + assert pd.to_numeric(1) == 1 + assert pd.to_numeric(1.1) == 1.1 - self.assertEqual(pd.to_numeric('1'), 1) - self.assertEqual(pd.to_numeric('1.1'), 1.1) + assert pd.to_numeric('1') == 1 + assert pd.to_numeric('1.1') == 1.1 - with tm.assertRaises(ValueError): + with pytest.raises(ValueError): to_numeric('XX', errors='raise') - self.assertEqual(to_numeric('XX', errors='ignore'), 'XX') - self.assertTrue(np.isnan(to_numeric('XX', errors='coerce'))) + assert to_numeric('XX', errors='ignore') == 'XX' + assert np.isnan(to_numeric('XX', errors='coerce')) def test_numeric_dtypes(self): idx = pd.Index([1, 2, 3], name='xxx') @@ -367,7 +268,7 @@ def test_non_hashable(self): res = pd.to_numeric(s, errors='ignore') tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) - with self.assertRaisesRegexp(TypeError, "Invalid object type"): + with tm.assert_raises_regex(TypeError, "Invalid object type"): pd.to_numeric(s) def test_downcast(self): @@ -388,7 +289,7 @@ def test_downcast(self): smallest_float_dtype = float_32_char for data in (mixed_data, int_data, date_data): - with self.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): pd.to_numeric(data, downcast=invalid_downcast) expected = np.array([1, 2, 3], dtype=np.int64) @@ -454,9 +355,6 @@ def test_downcast(self): def test_downcast_limits(self): # Test the limits of each downcast. Bug: #14401. - # Check to make sure numpy is new enough to run this test. - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") i = 'integer' u = 'unsigned' @@ -482,4 +380,29 @@ def test_downcast_limits(self): for dtype, downcast, min_max in dtype_downcast_min_max: series = pd.to_numeric(pd.Series(min_max), downcast=downcast) - tm.assert_equal(series.dtype, dtype) + assert series.dtype == dtype + + def test_coerce_uint64_conflict(self): + # see gh-17007 and gh-17125 + # + # Still returns float despite the uint64-nan conflict, + # which would normally force the casting to object. + df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]}) + expected = pd.Series([200, 300, np.nan, np.nan, + 30000000000000000000], dtype=float, name="a") + result = to_numeric(df["a"], errors="coerce") + tm.assert_series_equal(result, expected) + + s = pd.Series(["12345678901234567890", "1234567890", "ITEM"]) + expected = pd.Series([12345678901234567890, + 1234567890, np.nan], dtype=float) + result = to_numeric(s, errors="coerce") + tm.assert_series_equal(result, expected) + + # For completeness, check against "ignore" and "raise" + result = to_numeric(s, errors="ignore") + tm.assert_series_equal(result, s) + + msg = "Unable to parse string" + with tm.assert_raises_regex(ValueError, msg): + to_numeric(s, errors="raise") diff --git a/pandas/tests/tseries/conftest.py b/pandas/tests/tseries/conftest.py new file mode 100644 index 0000000000000..fc1ecf21c5446 --- /dev/null +++ b/pandas/tests/tseries/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific']) +def tz(request): + return request.param diff --git a/pandas/tests/tseries/offsets/__init__.py b/pandas/tests/tseries/offsets/__init__.py new file mode 100644 index 0000000000000..40a96afc6ff09 --- /dev/null +++ b/pandas/tests/tseries/offsets/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/pandas/tests/tseries/offsets/common.py b/pandas/tests/tseries/offsets/common.py new file mode 100644 index 0000000000000..2e8eb224bca7f --- /dev/null +++ b/pandas/tests/tseries/offsets/common.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +""" +Assertion helpers for offsets tests +""" + + +def assert_offset_equal(offset, base, expected): + actual = offset + base + actual_swapped = base + offset + actual_apply = offset.apply(base) + try: + assert actual == expected + assert actual_swapped == expected + assert actual_apply == expected + except AssertionError: + raise AssertionError("\nExpected: %s\nActual: %s\nFor Offset: %s)" + "\nAt Date: %s" % + (expected, actual, offset, base)) + + +def assert_onOffset(offset, date, expected): + actual = offset.onOffset(date) + assert actual == expected, ("\nExpected: %s\nActual: %s\nFor Offset: %s)" + "\nAt Date: %s" % + (expected, actual, offset, date)) diff --git a/pandas/tests/tseries/offsets/conftest.py b/pandas/tests/tseries/offsets/conftest.py new file mode 100644 index 0000000000000..76f24123ea0e1 --- /dev/null +++ b/pandas/tests/tseries/offsets/conftest.py @@ -0,0 +1,26 @@ +import pytest +import pandas.tseries.offsets as offsets + + +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__]) +def offset_types(request): + return request.param + + +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if + issubclass(getattr(offsets, o), offsets.MonthOffset) + and o != 'MonthOffset']) +def month_classes(request): + return request.param + + +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if + issubclass(getattr(offsets, o), offsets.Tick)]) +def tick_classes(request): + return request.param + + +@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific']) +def tz(request): + return request.param diff --git a/pandas/tests/tseries/data/cday-0.14.1.pickle b/pandas/tests/tseries/offsets/data/cday-0.14.1.pickle similarity index 100% rename from pandas/tests/tseries/data/cday-0.14.1.pickle rename to pandas/tests/tseries/offsets/data/cday-0.14.1.pickle diff --git a/pandas/tests/tseries/data/dateoffset_0_15_2.pickle b/pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle similarity index 100% rename from pandas/tests/tseries/data/dateoffset_0_15_2.pickle rename to pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py new file mode 100644 index 0000000000000..c084cccbb74ac --- /dev/null +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -0,0 +1,658 @@ +# -*- coding: utf-8 -*- +""" +Tests for Fiscal Year and Fiscal Quarter offset classes +""" +from datetime import datetime + +from dateutil.relativedelta import relativedelta +import pytest + +import pandas.util.testing as tm + +from pandas import Timestamp +from pandas.tseries.frequencies import get_offset +from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR +from pandas.tseries.offsets import FY5253Quarter, FY5253 +from pandas._libs.tslibs.offsets import WeekDay + +from .common import assert_offset_equal, assert_onOffset +from .test_offsets import Base + + +def makeFY5253LastOfMonthQuarter(*args, **kwds): + return FY5253Quarter(*args, variation="last", **kwds) + + +def makeFY5253NearestEndMonthQuarter(*args, **kwds): + return FY5253Quarter(*args, variation="nearest", **kwds) + + +def makeFY5253NearestEndMonth(*args, **kwds): + return FY5253(*args, variation="nearest", **kwds) + + +def makeFY5253LastOfMonth(*args, **kwds): + return FY5253(*args, variation="last", **kwds) + + +def test_get_offset_name(): + assert (makeFY5253LastOfMonthQuarter( + weekday=1, startingMonth=3, + qtr_with_extra_week=4).freqstr == "REQ-L-MAR-TUE-4") + assert (makeFY5253NearestEndMonthQuarter( + weekday=1, startingMonth=3, + qtr_with_extra_week=3).freqstr == "REQ-N-MAR-TUE-3") + + +def test_get_offset(): + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): + get_offset('gibberish') + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): + get_offset('QS-JAN-B') + + pairs = [ + ("RE-N-DEC-MON", + makeFY5253NearestEndMonth(weekday=0, startingMonth=12)), + ("RE-L-DEC-TUE", + makeFY5253LastOfMonth(weekday=1, startingMonth=12)), + ("REQ-L-MAR-TUE-4", + makeFY5253LastOfMonthQuarter(weekday=1, + startingMonth=3, + qtr_with_extra_week=4)), + ("REQ-L-DEC-MON-3", + makeFY5253LastOfMonthQuarter(weekday=0, + startingMonth=12, + qtr_with_extra_week=3)), + ("REQ-N-DEC-MON-3", + makeFY5253NearestEndMonthQuarter(weekday=0, + startingMonth=12, + qtr_with_extra_week=3))] + + for name, expected in pairs: + offset = get_offset(name) + assert offset == expected, ("Expected %r to yield %r (actual: %r)" % + (name, expected, offset)) + + +class TestFY5253LastOfMonth(Base): + offset_lom_sat_aug = makeFY5253LastOfMonth(1, startingMonth=8, + weekday=WeekDay.SAT) + offset_lom_sat_sep = makeFY5253LastOfMonth(1, startingMonth=9, + weekday=WeekDay.SAT) + + on_offset_cases = [ + # From Wikipedia (see: + # http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar#Last_Saturday_of_the_month_at_fiscal_year_end) + (offset_lom_sat_aug, datetime(2006, 8, 26), True), + (offset_lom_sat_aug, datetime(2007, 8, 25), True), + (offset_lom_sat_aug, datetime(2008, 8, 30), True), + (offset_lom_sat_aug, datetime(2009, 8, 29), True), + (offset_lom_sat_aug, datetime(2010, 8, 28), True), + (offset_lom_sat_aug, datetime(2011, 8, 27), True), + (offset_lom_sat_aug, datetime(2012, 8, 25), True), + (offset_lom_sat_aug, datetime(2013, 8, 31), True), + (offset_lom_sat_aug, datetime(2014, 8, 30), True), + (offset_lom_sat_aug, datetime(2015, 8, 29), True), + (offset_lom_sat_aug, datetime(2016, 8, 27), True), + (offset_lom_sat_aug, datetime(2017, 8, 26), True), + (offset_lom_sat_aug, datetime(2018, 8, 25), True), + (offset_lom_sat_aug, datetime(2019, 8, 31), True), + + (offset_lom_sat_aug, datetime(2006, 8, 27), False), + (offset_lom_sat_aug, datetime(2007, 8, 28), False), + (offset_lom_sat_aug, datetime(2008, 8, 31), False), + (offset_lom_sat_aug, datetime(2009, 8, 30), False), + (offset_lom_sat_aug, datetime(2010, 8, 29), False), + (offset_lom_sat_aug, datetime(2011, 8, 28), False), + + (offset_lom_sat_aug, datetime(2006, 8, 25), False), + (offset_lom_sat_aug, datetime(2007, 8, 24), False), + (offset_lom_sat_aug, datetime(2008, 8, 29), False), + (offset_lom_sat_aug, datetime(2009, 8, 28), False), + (offset_lom_sat_aug, datetime(2010, 8, 27), False), + (offset_lom_sat_aug, datetime(2011, 8, 26), False), + (offset_lom_sat_aug, datetime(2019, 8, 30), False), + + # From GMCR (see for example: + # http://yahoo.brand.edgar-online.com/Default.aspx? + # companyid=3184&formtypeID=7) + (offset_lom_sat_sep, datetime(2010, 9, 25), True), + (offset_lom_sat_sep, datetime(2011, 9, 24), True), + (offset_lom_sat_sep, datetime(2012, 9, 29), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + def test_apply(self): + offset_lom_aug_sat = makeFY5253LastOfMonth(startingMonth=8, + weekday=WeekDay.SAT) + offset_lom_aug_sat_1 = makeFY5253LastOfMonth(n=1, startingMonth=8, + weekday=WeekDay.SAT) + + date_seq_lom_aug_sat = [datetime(2006, 8, 26), datetime(2007, 8, 25), + datetime(2008, 8, 30), datetime(2009, 8, 29), + datetime(2010, 8, 28), datetime(2011, 8, 27), + datetime(2012, 8, 25), datetime(2013, 8, 31), + datetime(2014, 8, 30), datetime(2015, 8, 29), + datetime(2016, 8, 27)] + + tests = [ + (offset_lom_aug_sat, date_seq_lom_aug_sat), + (offset_lom_aug_sat_1, date_seq_lom_aug_sat), + (offset_lom_aug_sat, [ + datetime(2006, 8, 25)] + date_seq_lom_aug_sat), + (offset_lom_aug_sat_1, [ + datetime(2006, 8, 27)] + date_seq_lom_aug_sat[1:]), + (makeFY5253LastOfMonth(n=-1, startingMonth=8, + weekday=WeekDay.SAT), + list(reversed(date_seq_lom_aug_sat))), + ] + for test in tests: + offset, data = test + current = data[0] + for datum in data[1:]: + current = current + offset + assert current == datum + + +class TestFY5253NearestEndMonth(Base): + + def test_get_year_end(self): + assert (makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SAT).get_year_end( + datetime(2013, 1, 1)) == datetime(2013, 8, 31)) + assert (makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SUN).get_year_end( + datetime(2013, 1, 1)) == datetime(2013, 9, 1)) + assert (makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.FRI).get_year_end( + datetime(2013, 1, 1)) == datetime(2013, 8, 30)) + + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, + variation="nearest") + assert (offset_n.get_year_end(datetime(2012, 1, 1)) == + datetime(2013, 1, 1)) + assert (offset_n.get_year_end(datetime(2012, 1, 10)) == + datetime(2013, 1, 1)) + + assert (offset_n.get_year_end(datetime(2013, 1, 1)) == + datetime(2013, 12, 31)) + assert (offset_n.get_year_end(datetime(2013, 1, 2)) == + datetime(2013, 12, 31)) + assert (offset_n.get_year_end(datetime(2013, 1, 3)) == + datetime(2013, 12, 31)) + assert (offset_n.get_year_end(datetime(2013, 1, 10)) == + datetime(2013, 12, 31)) + + JNJ = FY5253(n=1, startingMonth=12, weekday=6, variation="nearest") + assert (JNJ.get_year_end(datetime(2006, 1, 1)) == + datetime(2006, 12, 31)) + + offset_lom_aug_sat = makeFY5253NearestEndMonth(1, startingMonth=8, + weekday=WeekDay.SAT) + offset_lom_aug_thu = makeFY5253NearestEndMonth(1, startingMonth=8, + weekday=WeekDay.THU) + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, + variation="nearest") + + on_offset_cases = [ + # From Wikipedia (see: + # http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar + # #Saturday_nearest_the_end_of_month) + # 2006-09-02 2006 September 2 + # 2007-09-01 2007 September 1 + # 2008-08-30 2008 August 30 (leap year) + # 2009-08-29 2009 August 29 + # 2010-08-28 2010 August 28 + # 2011-09-03 2011 September 3 + # 2012-09-01 2012 September 1 (leap year) + # 2013-08-31 2013 August 31 + # 2014-08-30 2014 August 30 + # 2015-08-29 2015 August 29 + # 2016-09-03 2016 September 3 (leap year) + # 2017-09-02 2017 September 2 + # 2018-09-01 2018 September 1 + # 2019-08-31 2019 August 31 + (offset_lom_aug_sat, datetime(2006, 9, 2), True), + (offset_lom_aug_sat, datetime(2007, 9, 1), True), + (offset_lom_aug_sat, datetime(2008, 8, 30), True), + (offset_lom_aug_sat, datetime(2009, 8, 29), True), + (offset_lom_aug_sat, datetime(2010, 8, 28), True), + (offset_lom_aug_sat, datetime(2011, 9, 3), True), + + (offset_lom_aug_sat, datetime(2016, 9, 3), True), + (offset_lom_aug_sat, datetime(2017, 9, 2), True), + (offset_lom_aug_sat, datetime(2018, 9, 1), True), + (offset_lom_aug_sat, datetime(2019, 8, 31), True), + + (offset_lom_aug_sat, datetime(2006, 8, 27), False), + (offset_lom_aug_sat, datetime(2007, 8, 28), False), + (offset_lom_aug_sat, datetime(2008, 8, 31), False), + (offset_lom_aug_sat, datetime(2009, 8, 30), False), + (offset_lom_aug_sat, datetime(2010, 8, 29), False), + (offset_lom_aug_sat, datetime(2011, 8, 28), False), + + (offset_lom_aug_sat, datetime(2006, 8, 25), False), + (offset_lom_aug_sat, datetime(2007, 8, 24), False), + (offset_lom_aug_sat, datetime(2008, 8, 29), False), + (offset_lom_aug_sat, datetime(2009, 8, 28), False), + (offset_lom_aug_sat, datetime(2010, 8, 27), False), + (offset_lom_aug_sat, datetime(2011, 8, 26), False), + (offset_lom_aug_sat, datetime(2019, 8, 30), False), + + # From Micron, see: + # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 + (offset_lom_aug_thu, datetime(2012, 8, 30), True), + (offset_lom_aug_thu, datetime(2011, 9, 1), True), + + (offset_n, datetime(2012, 12, 31), False), + (offset_n, datetime(2013, 1, 1), True), + (offset_n, datetime(2013, 1, 2), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + def test_apply(self): + date_seq_nem_8_sat = [datetime(2006, 9, 2), datetime(2007, 9, 1), + datetime(2008, 8, 30), datetime(2009, 8, 29), + datetime(2010, 8, 28), datetime(2011, 9, 3)] + + JNJ = [datetime(2005, 1, 2), datetime(2006, 1, 1), + datetime(2006, 12, 31), datetime(2007, 12, 30), + datetime(2008, 12, 28), datetime(2010, 1, 3), + datetime(2011, 1, 2), datetime(2012, 1, 1), + datetime(2012, 12, 30)] + + DEC_SAT = FY5253(n=-1, startingMonth=12, weekday=5, + variation="nearest") + + tests = [ + (makeFY5253NearestEndMonth(startingMonth=8, + weekday=WeekDay.SAT), + date_seq_nem_8_sat), + (makeFY5253NearestEndMonth(n=1, startingMonth=8, + weekday=WeekDay.SAT), + date_seq_nem_8_sat), + (makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), + [datetime(2006, 9, 1)] + date_seq_nem_8_sat), + (makeFY5253NearestEndMonth(n=1, startingMonth=8, + weekday=WeekDay.SAT), + [datetime(2006, 9, 3)] + date_seq_nem_8_sat[1:]), + (makeFY5253NearestEndMonth(n=-1, startingMonth=8, + weekday=WeekDay.SAT), + list(reversed(date_seq_nem_8_sat))), + (makeFY5253NearestEndMonth(n=1, startingMonth=12, + weekday=WeekDay.SUN), JNJ), + (makeFY5253NearestEndMonth(n=-1, startingMonth=12, + weekday=WeekDay.SUN), + list(reversed(JNJ))), + (makeFY5253NearestEndMonth(n=1, startingMonth=12, + weekday=WeekDay.SUN), + [datetime(2005, 1, 2), datetime(2006, 1, 1)]), + (makeFY5253NearestEndMonth(n=1, startingMonth=12, + weekday=WeekDay.SUN), + [datetime(2006, 1, 2), datetime(2006, 12, 31)]), + (DEC_SAT, [datetime(2013, 1, 15), datetime(2012, 12, 29)]) + ] + for test in tests: + offset, data = test + current = data[0] + for datum in data[1:]: + current = current + offset + assert current == datum + + +class TestFY5253LastOfMonthQuarter(Base): + + def test_isAnchored(self): + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4).isAnchored() + assert makeFY5253LastOfMonthQuarter( + weekday=WeekDay.SAT, startingMonth=3, + qtr_with_extra_week=4).isAnchored() + assert not makeFY5253LastOfMonthQuarter( + 2, startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4).isAnchored() + + def test_equality(self): + assert (makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4) == makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4)) + assert (makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4) != makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SUN, qtr_with_extra_week=4)) + assert (makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, + qtr_with_extra_week=4) != makeFY5253LastOfMonthQuarter( + startingMonth=2, weekday=WeekDay.SAT, qtr_with_extra_week=4)) + + def test_offset(self): + offset = makeFY5253LastOfMonthQuarter(1, startingMonth=9, + weekday=WeekDay.SAT, + qtr_with_extra_week=4) + offset2 = makeFY5253LastOfMonthQuarter(2, startingMonth=9, + weekday=WeekDay.SAT, + qtr_with_extra_week=4) + offset4 = makeFY5253LastOfMonthQuarter(4, startingMonth=9, + weekday=WeekDay.SAT, + qtr_with_extra_week=4) + + offset_neg1 = makeFY5253LastOfMonthQuarter(-1, startingMonth=9, + weekday=WeekDay.SAT, + qtr_with_extra_week=4) + offset_neg2 = makeFY5253LastOfMonthQuarter(-2, startingMonth=9, + weekday=WeekDay.SAT, + qtr_with_extra_week=4) + + GMCR = [datetime(2010, 3, 27), datetime(2010, 6, 26), + datetime(2010, 9, 25), datetime(2010, 12, 25), + datetime(2011, 3, 26), datetime(2011, 6, 25), + datetime(2011, 9, 24), datetime(2011, 12, 24), + datetime(2012, 3, 24), datetime(2012, 6, 23), + datetime(2012, 9, 29), datetime(2012, 12, 29), + datetime(2013, 3, 30), datetime(2013, 6, 29)] + + assert_offset_equal(offset, base=GMCR[0], expected=GMCR[1]) + assert_offset_equal(offset, base=GMCR[0] + relativedelta(days=-1), + expected=GMCR[0]) + assert_offset_equal(offset, base=GMCR[1], expected=GMCR[2]) + + assert_offset_equal(offset2, base=GMCR[0], expected=GMCR[2]) + assert_offset_equal(offset4, base=GMCR[0], expected=GMCR[4]) + + assert_offset_equal(offset_neg1, base=GMCR[-1], expected=GMCR[-2]) + assert_offset_equal(offset_neg1, + base=GMCR[-1] + relativedelta(days=+1), + expected=GMCR[-1]) + assert_offset_equal(offset_neg2, base=GMCR[-1], expected=GMCR[-3]) + + date = GMCR[0] + relativedelta(days=-1) + for expected in GMCR: + assert_offset_equal(offset, date, expected) + date = date + offset + + date = GMCR[-1] + relativedelta(days=+1) + for expected in reversed(GMCR): + assert_offset_equal(offset_neg1, date, expected) + date = date + offset_neg1 + + lomq_aug_sat_4 = makeFY5253LastOfMonthQuarter(1, startingMonth=8, + weekday=WeekDay.SAT, + qtr_with_extra_week=4) + lomq_sep_sat_4 = makeFY5253LastOfMonthQuarter(1, startingMonth=9, + weekday=WeekDay.SAT, + qtr_with_extra_week=4) + + on_offset_cases = [ + # From Wikipedia + (lomq_aug_sat_4, datetime(2006, 8, 26), True), + (lomq_aug_sat_4, datetime(2007, 8, 25), True), + (lomq_aug_sat_4, datetime(2008, 8, 30), True), + (lomq_aug_sat_4, datetime(2009, 8, 29), True), + (lomq_aug_sat_4, datetime(2010, 8, 28), True), + (lomq_aug_sat_4, datetime(2011, 8, 27), True), + (lomq_aug_sat_4, datetime(2019, 8, 31), True), + + (lomq_aug_sat_4, datetime(2006, 8, 27), False), + (lomq_aug_sat_4, datetime(2007, 8, 28), False), + (lomq_aug_sat_4, datetime(2008, 8, 31), False), + (lomq_aug_sat_4, datetime(2009, 8, 30), False), + (lomq_aug_sat_4, datetime(2010, 8, 29), False), + (lomq_aug_sat_4, datetime(2011, 8, 28), False), + + (lomq_aug_sat_4, datetime(2006, 8, 25), False), + (lomq_aug_sat_4, datetime(2007, 8, 24), False), + (lomq_aug_sat_4, datetime(2008, 8, 29), False), + (lomq_aug_sat_4, datetime(2009, 8, 28), False), + (lomq_aug_sat_4, datetime(2010, 8, 27), False), + (lomq_aug_sat_4, datetime(2011, 8, 26), False), + (lomq_aug_sat_4, datetime(2019, 8, 30), False), + + # From GMCR + (lomq_sep_sat_4, datetime(2010, 9, 25), True), + (lomq_sep_sat_4, datetime(2011, 9, 24), True), + (lomq_sep_sat_4, datetime(2012, 9, 29), True), + + (lomq_sep_sat_4, datetime(2013, 6, 29), True), + (lomq_sep_sat_4, datetime(2012, 6, 23), True), + (lomq_sep_sat_4, datetime(2012, 6, 30), False), + + (lomq_sep_sat_4, datetime(2013, 3, 30), True), + (lomq_sep_sat_4, datetime(2012, 3, 24), True), + + (lomq_sep_sat_4, datetime(2012, 12, 29), True), + (lomq_sep_sat_4, datetime(2011, 12, 24), True), + + # INTC (extra week in Q1) + # See: http://www.intc.com/releasedetail.cfm?ReleaseID=542844 + (makeFY5253LastOfMonthQuarter(1, startingMonth=12, + weekday=WeekDay.SAT, + qtr_with_extra_week=1), + datetime(2011, 4, 2), True), + + # see: http://google.brand.edgar-online.com/?sym=INTC&formtypeID=7 + (makeFY5253LastOfMonthQuarter(1, startingMonth=12, + weekday=WeekDay.SAT, + qtr_with_extra_week=1), + datetime(2012, 12, 29), True), + (makeFY5253LastOfMonthQuarter(1, startingMonth=12, + weekday=WeekDay.SAT, + qtr_with_extra_week=1), + datetime(2011, 12, 31), True), + (makeFY5253LastOfMonthQuarter(1, startingMonth=12, + weekday=WeekDay.SAT, + qtr_with_extra_week=1), + datetime(2010, 12, 25), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + def test_year_has_extra_week(self): + # End of long Q1 + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2011, 4, 2)) + + # Start of long Q1 + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2010, 12, 26)) + + # End of year before year with long Q1 + assert not makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2010, 12, 25)) + + for year in [x + for x in range(1994, 2011 + 1) + if x not in [2011, 2005, 2000, 1994]]: + assert not makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week( + datetime(year, 4, 2)) + + # Other long years + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2005, 4, 2)) + + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(2000, 4, 2)) + + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, + qtr_with_extra_week=1).year_has_extra_week(datetime(1994, 4, 2)) + + def test_get_weeks(self): + sat_dec_1 = makeFY5253LastOfMonthQuarter(1, startingMonth=12, + weekday=WeekDay.SAT, + qtr_with_extra_week=1) + sat_dec_4 = makeFY5253LastOfMonthQuarter(1, startingMonth=12, + weekday=WeekDay.SAT, + qtr_with_extra_week=4) + + assert sat_dec_1.get_weeks(datetime(2011, 4, 2)) == [14, 13, 13, 13] + assert sat_dec_4.get_weeks(datetime(2011, 4, 2)) == [13, 13, 13, 14] + assert sat_dec_1.get_weeks(datetime(2010, 12, 25)) == [13, 13, 13, 13] + + +class TestFY5253NearestEndMonthQuarter(Base): + + offset_nem_sat_aug_4 = makeFY5253NearestEndMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.SAT, + qtr_with_extra_week=4) + offset_nem_thu_aug_4 = makeFY5253NearestEndMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.THU, + qtr_with_extra_week=4) + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, + variation="nearest") + + on_offset_cases = [ + # From Wikipedia + (offset_nem_sat_aug_4, datetime(2006, 9, 2), True), + (offset_nem_sat_aug_4, datetime(2007, 9, 1), True), + (offset_nem_sat_aug_4, datetime(2008, 8, 30), True), + (offset_nem_sat_aug_4, datetime(2009, 8, 29), True), + (offset_nem_sat_aug_4, datetime(2010, 8, 28), True), + (offset_nem_sat_aug_4, datetime(2011, 9, 3), True), + + (offset_nem_sat_aug_4, datetime(2016, 9, 3), True), + (offset_nem_sat_aug_4, datetime(2017, 9, 2), True), + (offset_nem_sat_aug_4, datetime(2018, 9, 1), True), + (offset_nem_sat_aug_4, datetime(2019, 8, 31), True), + + (offset_nem_sat_aug_4, datetime(2006, 8, 27), False), + (offset_nem_sat_aug_4, datetime(2007, 8, 28), False), + (offset_nem_sat_aug_4, datetime(2008, 8, 31), False), + (offset_nem_sat_aug_4, datetime(2009, 8, 30), False), + (offset_nem_sat_aug_4, datetime(2010, 8, 29), False), + (offset_nem_sat_aug_4, datetime(2011, 8, 28), False), + + (offset_nem_sat_aug_4, datetime(2006, 8, 25), False), + (offset_nem_sat_aug_4, datetime(2007, 8, 24), False), + (offset_nem_sat_aug_4, datetime(2008, 8, 29), False), + (offset_nem_sat_aug_4, datetime(2009, 8, 28), False), + (offset_nem_sat_aug_4, datetime(2010, 8, 27), False), + (offset_nem_sat_aug_4, datetime(2011, 8, 26), False), + (offset_nem_sat_aug_4, datetime(2019, 8, 30), False), + + # From Micron, see: + # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 + (offset_nem_thu_aug_4, datetime(2012, 8, 30), True), + (offset_nem_thu_aug_4, datetime(2011, 9, 1), True), + + # See: http://google.brand.edgar-online.com/?sym=MU&formtypeID=13 + (offset_nem_thu_aug_4, datetime(2013, 5, 30), True), + (offset_nem_thu_aug_4, datetime(2013, 2, 28), True), + (offset_nem_thu_aug_4, datetime(2012, 11, 29), True), + (offset_nem_thu_aug_4, datetime(2012, 5, 31), True), + (offset_nem_thu_aug_4, datetime(2007, 3, 1), True), + (offset_nem_thu_aug_4, datetime(1994, 3, 3), True), + + (offset_n, datetime(2012, 12, 31), False), + (offset_n, datetime(2013, 1, 1), True), + (offset_n, datetime(2013, 1, 2), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + def test_offset(self): + offset = makeFY5253NearestEndMonthQuarter(1, startingMonth=8, + weekday=WeekDay.THU, + qtr_with_extra_week=4) + + MU = [datetime(2012, 5, 31), + datetime(2012, 8, 30), datetime(2012, 11, 29), + datetime(2013, 2, 28), datetime(2013, 5, 30)] + + date = MU[0] + relativedelta(days=-1) + for expected in MU: + assert_offset_equal(offset, date, expected) + date = date + offset + + assert_offset_equal(offset, + datetime(2012, 5, 31), + datetime(2012, 8, 30)) + assert_offset_equal(offset, + datetime(2012, 5, 30), + datetime(2012, 5, 31)) + + offset2 = FY5253Quarter(weekday=5, startingMonth=12, variation="last", + qtr_with_extra_week=4) + + assert_offset_equal(offset2, + datetime(2013, 1, 15), + datetime(2013, 3, 30)) + + +def test_bunched_yearends(): + # GH#14774 cases with two fiscal year-ends in the same calendar-year + fy = FY5253(n=1, weekday=5, startingMonth=12, variation='nearest') + dt = Timestamp('2004-01-01') + assert fy.rollback(dt) == Timestamp('2002-12-28') + assert (-fy).apply(dt) == Timestamp('2002-12-28') + assert dt - fy == Timestamp('2002-12-28') + + assert fy.rollforward(dt) == Timestamp('2004-01-03') + assert fy.apply(dt) == Timestamp('2004-01-03') + assert fy + dt == Timestamp('2004-01-03') + assert dt + fy == Timestamp('2004-01-03') + + # Same thing, but starting from a Timestamp in the previous year. + dt = Timestamp('2003-12-31') + assert fy.rollback(dt) == Timestamp('2002-12-28') + assert (-fy).apply(dt) == Timestamp('2002-12-28') + assert dt - fy == Timestamp('2002-12-28') + + +def test_fy5253_last_onoffset(): + # GH#18877 dates on the year-end but not normalized to midnight + offset = FY5253(n=-5, startingMonth=5, variation="last", weekday=0) + ts = Timestamp('1984-05-28 06:29:43.955911354+0200', + tz='Europe/San_Marino') + fast = offset.onOffset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253_nearest_onoffset(): + # GH#18877 dates on the year-end but not normalized to midnight + offset = FY5253(n=3, startingMonth=7, variation="nearest", weekday=2) + ts = Timestamp('2032-07-28 00:12:59.035729419+0000', tz='Africa/Dakar') + fast = offset.onOffset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253qtr_onoffset_nearest(): + # GH#19036 + ts = Timestamp('1985-09-02 23:57:46.232550356-0300', + tz='Atlantic/Bermuda') + offset = FY5253Quarter(n=3, qtr_with_extra_week=1, startingMonth=2, + variation="nearest", weekday=0) + fast = offset.onOffset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253qtr_onoffset_last(): + # GH#19036 + offset = FY5253Quarter(n=-2, qtr_with_extra_week=1, + startingMonth=7, variation="last", weekday=2) + ts = Timestamp('2011-01-26 19:03:40.331096129+0200', + tz='Africa/Windhoek') + slow = (ts + offset) - offset == ts + fast = offset.onOffset(ts) + assert fast == slow diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py new file mode 100644 index 0000000000000..d96ebab615d12 --- /dev/null +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -0,0 +1,3181 @@ +import os +from distutils.version import LooseVersion +from datetime import date, datetime, timedelta + +import pytest +from pandas.compat import range +from pandas import compat + +import numpy as np + +from pandas.compat.numpy import np_datetime64_compat + +from pandas.core.series import Series +from pandas._libs.tslibs.frequencies import (get_freq_code, get_freq_str, + _INVALID_FREQ_ERROR) +from pandas.tseries.frequencies import _offset_map, get_offset +from pandas.core.indexes.datetimes import ( + _to_m8, DatetimeIndex, _daterange_cache) +import pandas._libs.tslibs.offsets as liboffsets +from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset +from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, + BusinessHour, WeekOfMonth, CBMonthEnd, + CustomBusinessHour, + CBMonthBegin, BYearEnd, MonthEnd, + MonthBegin, SemiMonthBegin, SemiMonthEnd, + BYearBegin, QuarterBegin, BQuarterBegin, + BMonthBegin, DateOffset, Week, YearBegin, + YearEnd, Day, + QuarterEnd, BusinessMonthEnd, FY5253, + Nano, Easter, FY5253Quarter, + LastWeekOfMonth) +from pandas.core.tools.datetimes import format, ole2datetime +import pandas.tseries.offsets as offsets +from pandas.io.pickle import read_pickle +from pandas._libs.tslibs import timezones +from pandas._libs.tslib import normalize_date, NaT, Timestamp +import pandas._libs.tslib as tslib +import pandas.util.testing as tm +from pandas.tseries.holiday import USFederalHolidayCalendar + +from .common import assert_offset_equal, assert_onOffset + + +def test_monthrange(): + import calendar + for y in range(2000, 2013): + for m in range(1, 13): + assert tslib.monthrange(y, m) == calendar.monthrange(y, m) + +#### +# Misc function tests +#### + + +def test_format(): + actual = format(datetime(2008, 1, 15)) + assert actual == '20080115' + + +def test_ole2datetime(): + actual = ole2datetime(60000) + assert actual == datetime(2064, 4, 8) + + with pytest.raises(ValueError): + ole2datetime(60) + + +def test_normalize_date(): + actual = normalize_date(datetime(2007, 10, 1, 1, 12, 5, 10)) + assert actual == datetime(2007, 10, 1) + + +def test_to_m8(): + valb = datetime(2007, 10, 1) + valu = _to_m8(valb) + assert isinstance(valu, np.datetime64) + # assert valu == np.datetime64(datetime(2007,10,1)) + + # def test_datetime64_box(): + # valu = np.datetime64(datetime(2007,10,1)) + # valb = _dt_box(valu) + # assert type(valb) == datetime + # assert valb == datetime(2007,10,1) + + ##### + # DateOffset Tests + ##### + + +class Base(object): + _offset = None + + timezones = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific'] + + def _get_offset(self, klass, value=1, normalize=False): + # create instance from offset class + if klass is FY5253: + klass = klass(n=value, startingMonth=1, weekday=1, + variation='last', normalize=normalize) + elif klass is FY5253Quarter: + klass = klass(n=value, startingMonth=1, weekday=1, + qtr_with_extra_week=1, variation='last', + normalize=normalize) + elif klass is LastWeekOfMonth: + klass = klass(n=value, weekday=5, normalize=normalize) + elif klass is WeekOfMonth: + klass = klass(n=value, week=1, weekday=5, normalize=normalize) + elif klass is Week: + klass = klass(n=value, weekday=5, normalize=normalize) + elif klass is DateOffset: + klass = klass(days=value, normalize=normalize) + else: + try: + klass = klass(value, normalize=normalize) + except Exception: + klass = klass(normalize=normalize) + return klass + + def test_apply_out_of_range(self, tz): + if self._offset is None: + return + + # try to create an out-of-bounds result timestamp; if we can't create + # the offset skip + try: + if self._offset in (BusinessHour, CustomBusinessHour): + # Using 10000 in BusinessHour fails in tz check because of DST + # difference + offset = self._get_offset(self._offset, value=100000) + else: + offset = self._get_offset(self._offset, value=10000) + + result = Timestamp('20080101') + offset + assert isinstance(result, datetime) + assert result.tzinfo is None + + # Check tz is preserved + t = Timestamp('20080101', tz=tz) + result = t + offset + assert isinstance(result, datetime) + assert t.tzinfo == result.tzinfo + + except tslib.OutOfBoundsDatetime: + raise + except (ValueError, KeyError): + # we are creating an invalid offset + # so ignore + pass + + +class TestCommon(Base): + # exected value created by Base._get_offset + # are applied to 2011/01/01 09:00 (Saturday) + # used for .apply and .rollforward + expecteds = {'Day': Timestamp('2011-01-02 09:00:00'), + 'DateOffset': Timestamp('2011-01-02 09:00:00'), + 'BusinessDay': Timestamp('2011-01-03 09:00:00'), + 'CustomBusinessDay': Timestamp('2011-01-03 09:00:00'), + 'CustomBusinessMonthEnd': Timestamp('2011-01-31 09:00:00'), + 'CustomBusinessMonthBegin': Timestamp('2011-01-03 09:00:00'), + 'MonthBegin': Timestamp('2011-02-01 09:00:00'), + 'BusinessMonthBegin': Timestamp('2011-01-03 09:00:00'), + 'MonthEnd': Timestamp('2011-01-31 09:00:00'), + 'SemiMonthEnd': Timestamp('2011-01-15 09:00:00'), + 'SemiMonthBegin': Timestamp('2011-01-15 09:00:00'), + 'BusinessMonthEnd': Timestamp('2011-01-31 09:00:00'), + 'YearBegin': Timestamp('2012-01-01 09:00:00'), + 'BYearBegin': Timestamp('2011-01-03 09:00:00'), + 'YearEnd': Timestamp('2011-12-31 09:00:00'), + 'BYearEnd': Timestamp('2011-12-30 09:00:00'), + 'QuarterBegin': Timestamp('2011-03-01 09:00:00'), + 'BQuarterBegin': Timestamp('2011-03-01 09:00:00'), + 'QuarterEnd': Timestamp('2011-03-31 09:00:00'), + 'BQuarterEnd': Timestamp('2011-03-31 09:00:00'), + 'BusinessHour': Timestamp('2011-01-03 10:00:00'), + 'CustomBusinessHour': Timestamp('2011-01-03 10:00:00'), + 'WeekOfMonth': Timestamp('2011-01-08 09:00:00'), + 'LastWeekOfMonth': Timestamp('2011-01-29 09:00:00'), + 'FY5253Quarter': Timestamp('2011-01-25 09:00:00'), + 'FY5253': Timestamp('2011-01-25 09:00:00'), + 'Week': Timestamp('2011-01-08 09:00:00'), + 'Easter': Timestamp('2011-04-24 09:00:00'), + 'Hour': Timestamp('2011-01-01 10:00:00'), + 'Minute': Timestamp('2011-01-01 09:01:00'), + 'Second': Timestamp('2011-01-01 09:00:01'), + 'Milli': Timestamp('2011-01-01 09:00:00.001000'), + 'Micro': Timestamp('2011-01-01 09:00:00.000001'), + 'Nano': Timestamp(np_datetime64_compat( + '2011-01-01T09:00:00.000000001Z'))} + + def test_return_type(self, offset_types): + offset = self._get_offset(offset_types) + + # make sure that we are returning a Timestamp + result = Timestamp('20080101') + offset + assert isinstance(result, Timestamp) + + # make sure that we are returning NaT + assert NaT + offset is NaT + assert offset + NaT is NaT + + assert NaT - offset is NaT + assert (-offset).apply(NaT) is NaT + + def test_offset_n(self, offset_types): + offset = self._get_offset(offset_types) + assert offset.n == 1 + + neg_offset = offset * -1 + assert neg_offset.n == -1 + + mul_offset = offset * 3 + assert mul_offset.n == 3 + + def test_offset_freqstr(self, offset_types): + offset = self._get_offset(offset_types) + + freqstr = offset.freqstr + if freqstr not in ('', + "", + 'LWOM-SAT', ): + code = get_offset(freqstr) + assert offset.rule_code == code + + def _check_offsetfunc_works(self, offset, funcname, dt, expected, + normalize=False): + offset_s = self._get_offset(offset, normalize=normalize) + func = getattr(offset_s, funcname) + + result = func(dt) + assert isinstance(result, Timestamp) + assert result == expected + + result = func(Timestamp(dt)) + assert isinstance(result, Timestamp) + assert result == expected + + # see gh-14101 + exp_warning = None + ts = Timestamp(dt) + Nano(5) + + if (offset_s.__class__.__name__ == 'DateOffset' and + (funcname == 'apply' or normalize) and + ts.nanosecond > 0): + exp_warning = UserWarning + + # test nanosecond is preserved + with tm.assert_produces_warning(exp_warning, + check_stacklevel=False): + result = func(ts) + assert isinstance(result, Timestamp) + if normalize is False: + assert result == expected + Nano(5) + else: + assert result == expected + + if isinstance(dt, np.datetime64): + # test tz when input is datetime or Timestamp + return + + for tz in self.timezones: + expected_localize = expected.tz_localize(tz) + tz_obj = timezones.maybe_get_tz(tz) + dt_tz = tslib._localize_pydatetime(dt, tz_obj) + + result = func(dt_tz) + assert isinstance(result, Timestamp) + assert result == expected_localize + + result = func(Timestamp(dt, tz=tz)) + assert isinstance(result, Timestamp) + assert result == expected_localize + + # see gh-14101 + exp_warning = None + ts = Timestamp(dt, tz=tz) + Nano(5) + + if (offset_s.__class__.__name__ == 'DateOffset' and + (funcname == 'apply' or normalize) and + ts.nanosecond > 0): + exp_warning = UserWarning + + # test nanosecond is preserved + with tm.assert_produces_warning(exp_warning, + check_stacklevel=False): + result = func(ts) + assert isinstance(result, Timestamp) + if normalize is False: + assert result == expected_localize + Nano(5) + else: + assert result == expected_localize + + def test_apply(self, offset_types): + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np_datetime64_compat('2011-01-01 09:00Z') + + for dt in [sdt, ndt]: + expected = self.expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'apply', dt, expected) + + expected = Timestamp(expected.date()) + self._check_offsetfunc_works(offset_types, 'apply', dt, expected, + normalize=True) + + def test_rollforward(self, offset_types): + expecteds = self.expecteds.copy() + + # result will not be changed if the target is on the offset + no_changes = ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', + 'Week', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', + 'Nano', 'DateOffset'] + for n in no_changes: + expecteds[n] = Timestamp('2011/01/01 09:00') + + expecteds['BusinessHour'] = Timestamp('2011-01-03 09:00:00') + expecteds['CustomBusinessHour'] = Timestamp('2011-01-03 09:00:00') + + # but be changed when normalize=True + norm_expected = expecteds.copy() + for k in norm_expected: + norm_expected[k] = Timestamp(norm_expected[k].date()) + + normalized = {'Day': Timestamp('2011-01-02 00:00:00'), + 'DateOffset': Timestamp('2011-01-02 00:00:00'), + 'MonthBegin': Timestamp('2011-02-01 00:00:00'), + 'SemiMonthBegin': Timestamp('2011-01-15 00:00:00'), + 'YearBegin': Timestamp('2012-01-01 00:00:00'), + 'Week': Timestamp('2011-01-08 00:00:00'), + 'Hour': Timestamp('2011-01-01 00:00:00'), + 'Minute': Timestamp('2011-01-01 00:00:00'), + 'Second': Timestamp('2011-01-01 00:00:00'), + 'Milli': Timestamp('2011-01-01 00:00:00'), + 'Micro': Timestamp('2011-01-01 00:00:00')} + norm_expected.update(normalized) + + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np_datetime64_compat('2011-01-01 09:00Z') + + for dt in [sdt, ndt]: + expected = expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'rollforward', dt, + expected) + expected = norm_expected[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'rollforward', dt, + expected, normalize=True) + + def test_rollback(self, offset_types): + expecteds = {'BusinessDay': Timestamp('2010-12-31 09:00:00'), + 'CustomBusinessDay': Timestamp('2010-12-31 09:00:00'), + 'CustomBusinessMonthEnd': + Timestamp('2010-12-31 09:00:00'), + 'CustomBusinessMonthBegin': + Timestamp('2010-12-01 09:00:00'), + 'BusinessMonthBegin': Timestamp('2010-12-01 09:00:00'), + 'MonthEnd': Timestamp('2010-12-31 09:00:00'), + 'SemiMonthEnd': Timestamp('2010-12-31 09:00:00'), + 'BusinessMonthEnd': Timestamp('2010-12-31 09:00:00'), + 'BYearBegin': Timestamp('2010-01-01 09:00:00'), + 'YearEnd': Timestamp('2010-12-31 09:00:00'), + 'BYearEnd': Timestamp('2010-12-31 09:00:00'), + 'QuarterBegin': Timestamp('2010-12-01 09:00:00'), + 'BQuarterBegin': Timestamp('2010-12-01 09:00:00'), + 'QuarterEnd': Timestamp('2010-12-31 09:00:00'), + 'BQuarterEnd': Timestamp('2010-12-31 09:00:00'), + 'BusinessHour': Timestamp('2010-12-31 17:00:00'), + 'CustomBusinessHour': Timestamp('2010-12-31 17:00:00'), + 'WeekOfMonth': Timestamp('2010-12-11 09:00:00'), + 'LastWeekOfMonth': Timestamp('2010-12-25 09:00:00'), + 'FY5253Quarter': Timestamp('2010-10-26 09:00:00'), + 'FY5253': Timestamp('2010-01-26 09:00:00'), + 'Easter': Timestamp('2010-04-04 09:00:00')} + + # result will not be changed if the target is on the offset + for n in ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', 'Week', + 'Hour', 'Minute', 'Second', 'Milli', 'Micro', 'Nano', + 'DateOffset']: + expecteds[n] = Timestamp('2011/01/01 09:00') + + # but be changed when normalize=True + norm_expected = expecteds.copy() + for k in norm_expected: + norm_expected[k] = Timestamp(norm_expected[k].date()) + + normalized = {'Day': Timestamp('2010-12-31 00:00:00'), + 'DateOffset': Timestamp('2010-12-31 00:00:00'), + 'MonthBegin': Timestamp('2010-12-01 00:00:00'), + 'SemiMonthBegin': Timestamp('2010-12-15 00:00:00'), + 'YearBegin': Timestamp('2010-01-01 00:00:00'), + 'Week': Timestamp('2010-12-25 00:00:00'), + 'Hour': Timestamp('2011-01-01 00:00:00'), + 'Minute': Timestamp('2011-01-01 00:00:00'), + 'Second': Timestamp('2011-01-01 00:00:00'), + 'Milli': Timestamp('2011-01-01 00:00:00'), + 'Micro': Timestamp('2011-01-01 00:00:00')} + norm_expected.update(normalized) + + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np_datetime64_compat('2011-01-01 09:00Z') + + for dt in [sdt, ndt]: + expected = expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'rollback', dt, + expected) + + expected = norm_expected[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'rollback', dt, + expected, normalize=True) + + def test_onOffset(self, offset_types): + dt = self.expecteds[offset_types.__name__] + offset_s = self._get_offset(offset_types) + assert offset_s.onOffset(dt) + + # when normalize=True, onOffset checks time is 00:00:00 + offset_n = self._get_offset(offset_types, normalize=True) + assert not offset_n.onOffset(dt) + + if offset_types in (BusinessHour, CustomBusinessHour): + # In default BusinessHour (9:00-17:00), normalized time + # cannot be in business hour range + return + date = datetime(dt.year, dt.month, dt.day) + assert offset_n.onOffset(date) + + def test_add(self, offset_types, tz): + dt = datetime(2011, 1, 1, 9, 0) + + offset_s = self._get_offset(offset_types) + expected = self.expecteds[offset_types.__name__] + + result_dt = dt + offset_s + result_ts = Timestamp(dt) + offset_s + for result in [result_dt, result_ts]: + assert isinstance(result, Timestamp) + assert result == expected + + expected_localize = expected.tz_localize(tz) + result = Timestamp(dt, tz=tz) + offset_s + assert isinstance(result, Timestamp) + assert result == expected_localize + + # normalize=True + offset_s = self._get_offset(offset_types, normalize=True) + expected = Timestamp(expected.date()) + + result_dt = dt + offset_s + result_ts = Timestamp(dt) + offset_s + for result in [result_dt, result_ts]: + assert isinstance(result, Timestamp) + assert result == expected + + expected_localize = expected.tz_localize(tz) + result = Timestamp(dt, tz=tz) + offset_s + assert isinstance(result, Timestamp) + assert result == expected_localize + + def test_pickle_v0_15_2(self): + offsets = {'DateOffset': DateOffset(years=1), + 'MonthBegin': MonthBegin(1), + 'Day': Day(1), + 'YearBegin': YearBegin(1), + 'Week': Week(1)} + pickle_path = os.path.join(tm.get_data_path(), + 'dateoffset_0_15_2.pickle') + # This code was executed once on v0.15.2 to generate the pickle: + # with open(pickle_path, 'wb') as f: pickle.dump(offsets, f) + # + tm.assert_dict_equal(offsets, read_pickle(pickle_path)) + + +class TestDateOffset(Base): + + def setup_method(self, method): + self.d = Timestamp(datetime(2008, 1, 2)) + _offset_map.clear() + + def test_repr(self): + repr(DateOffset()) + repr(DateOffset(2)) + repr(2 * DateOffset()) + repr(2 * DateOffset(months=2)) + + def test_mul(self): + assert DateOffset(2) == 2 * DateOffset(1) + assert DateOffset(2) == DateOffset(1) * 2 + + def test_constructor(self): + + assert ((self.d + DateOffset(months=2)) == datetime(2008, 3, 2)) + assert ((self.d - DateOffset(months=2)) == datetime(2007, 11, 2)) + + assert ((self.d + DateOffset(2)) == datetime(2008, 1, 4)) + + assert not DateOffset(2).isAnchored() + assert DateOffset(1).isAnchored() + + d = datetime(2008, 1, 31) + assert ((d + DateOffset(months=1)) == datetime(2008, 2, 29)) + + def test_copy(self): + assert (DateOffset(months=2).copy() == DateOffset(months=2)) + + def test_eq(self): + offset1 = DateOffset(days=1) + offset2 = DateOffset(days=365) + + assert offset1 != offset2 + + +class TestBusinessDay(Base): + _offset = BDay + + def setup_method(self, method): + self.d = datetime(2008, 1, 1) + + self.offset = BDay() + self.offset2 = BDay(2) + + def test_different_normalize_equals(self): + # equivalent in this special case + offset = BDay() + offset2 = BDay() + offset2.normalize = True + assert offset == offset2 + + def test_repr(self): + assert repr(self.offset) == '' + assert repr(self.offset2) == '<2 * BusinessDays>' + + expected = '' + assert repr(self.offset + timedelta(1)) == expected + + def test_with_offset(self): + offset = self.offset + timedelta(hours=2) + + assert (self.d + offset) == datetime(2008, 1, 2, 2) + + def testEQ(self): + assert self.offset2 == self.offset2 + + def test_mul(self): + pass + + def test_hash(self): + assert hash(self.offset2) == hash(self.offset2) + + def testCall(self): + assert self.offset2(self.d) == datetime(2008, 1, 3) + + def testRAdd(self): + assert self.d + self.offset2 == self.offset2 + self.d + + def testSub(self): + off = self.offset2 + pytest.raises(Exception, off.__sub__, self.d) + assert 2 * off - off == off + + assert self.d - self.offset2 == self.d + BDay(-2) + + def testRSub(self): + assert self.d - self.offset2 == (-self.offset2).apply(self.d) + + def testMult1(self): + assert self.d + 10 * self.offset == self.d + BDay(10) + + def testMult2(self): + assert self.d + (-5 * BDay(-10)) == self.d + BDay(50) + + def testRollback1(self): + assert BDay(10).rollback(self.d) == self.d + + def testRollback2(self): + assert (BDay(10).rollback(datetime(2008, 1, 5)) == + datetime(2008, 1, 4)) + + def testRollforward1(self): + assert BDay(10).rollforward(self.d) == self.d + + def testRollforward2(self): + assert (BDay(10).rollforward(datetime(2008, 1, 5)) == + datetime(2008, 1, 7)) + + def test_roll_date_object(self): + offset = BDay() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 9, 14) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 17) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + def test_onOffset(self): + tests = [(BDay(), datetime(2008, 1, 1), True), + (BDay(), datetime(2008, 1, 5), False)] + + for offset, d, expected in tests: + assert_onOffset(offset, d, expected) + + apply_cases = [] + apply_cases.append((BDay(), { + datetime(2008, 1, 1): datetime(2008, 1, 2), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 8)})) + + apply_cases.append((2 * BDay(), { + datetime(2008, 1, 1): datetime(2008, 1, 3), + datetime(2008, 1, 4): datetime(2008, 1, 8), + datetime(2008, 1, 5): datetime(2008, 1, 8), + datetime(2008, 1, 6): datetime(2008, 1, 8), + datetime(2008, 1, 7): datetime(2008, 1, 9)})) + + apply_cases.append((-BDay(), { + datetime(2008, 1, 1): datetime(2007, 12, 31), + datetime(2008, 1, 4): datetime(2008, 1, 3), + datetime(2008, 1, 5): datetime(2008, 1, 4), + datetime(2008, 1, 6): datetime(2008, 1, 4), + datetime(2008, 1, 7): datetime(2008, 1, 4), + datetime(2008, 1, 8): datetime(2008, 1, 7)})) + + apply_cases.append((-2 * BDay(), { + datetime(2008, 1, 1): datetime(2007, 12, 28), + datetime(2008, 1, 4): datetime(2008, 1, 2), + datetime(2008, 1, 5): datetime(2008, 1, 3), + datetime(2008, 1, 6): datetime(2008, 1, 3), + datetime(2008, 1, 7): datetime(2008, 1, 3), + datetime(2008, 1, 8): datetime(2008, 1, 4), + datetime(2008, 1, 9): datetime(2008, 1, 7)})) + + apply_cases.append((BDay(0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 4): datetime(2008, 1, 4), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 7)})) + + @pytest.mark.parametrize('case', apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + BDay(10) + assert result == datetime(2012, 11, 6) + + result = dt + BDay(100) - BDay(100) + assert result == dt + + off = BDay() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 12, 23) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + xp = datetime(2011, 12, 26) + assert rs == xp + + off = BDay() * 10 + rs = datetime(2014, 1, 5) + off # see #5890 + xp = datetime(2014, 1, 17) + assert rs == xp + + def test_apply_corner(self): + pytest.raises(TypeError, BDay().apply, BMonthEnd()) + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = BDay() + offset2 = BDay() + assert not offset1 != offset2 + + +class TestBusinessHour(Base): + _offset = BusinessHour + + def setup_method(self, method): + self.d = datetime(2014, 7, 1, 10, 00) + + self.offset1 = BusinessHour() + self.offset2 = BusinessHour(n=3) + + self.offset3 = BusinessHour(n=-1) + self.offset4 = BusinessHour(n=-4) + + from datetime import time as dt_time + self.offset5 = BusinessHour(start=dt_time(11, 0), end=dt_time(14, 30)) + self.offset6 = BusinessHour(start='20:00', end='05:00') + self.offset7 = BusinessHour(n=-2, start=dt_time(21, 30), + end=dt_time(6, 30)) + + def test_constructor_errors(self): + from datetime import time as dt_time + with pytest.raises(ValueError): + BusinessHour(start=dt_time(11, 0, 5)) + with pytest.raises(ValueError): + BusinessHour(start='AAA') + with pytest.raises(ValueError): + BusinessHour(start='14:00:05') + + def test_different_normalize_equals(self): + # equivalent in this special case + offset = self._offset() + offset2 = self._offset() + offset2.normalize = True + assert offset == offset2 + + def test_repr(self): + assert repr(self.offset1) == '' + assert repr(self.offset2) == '<3 * BusinessHours: BH=09:00-17:00>' + assert repr(self.offset3) == '<-1 * BusinessHour: BH=09:00-17:00>' + assert repr(self.offset4) == '<-4 * BusinessHours: BH=09:00-17:00>' + + assert repr(self.offset5) == '' + assert repr(self.offset6) == '' + assert repr(self.offset7) == '<-2 * BusinessHours: BH=21:30-06:30>' + + def test_with_offset(self): + expected = Timestamp('2014-07-01 13:00') + + assert self.d + BusinessHour() * 3 == expected + assert self.d + BusinessHour(n=3) == expected + + def testEQ(self): + for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: + assert offset == offset + + assert BusinessHour() != BusinessHour(-1) + assert BusinessHour(start='09:00') == BusinessHour() + assert BusinessHour(start='09:00') != BusinessHour(start='09:01') + assert (BusinessHour(start='09:00', end='17:00') != + BusinessHour(start='17:00', end='09:01')) + + def test_hash(self): + for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: + assert hash(offset) == hash(offset) + + def testCall(self): + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 13) + assert self.offset3(self.d) == datetime(2014, 6, 30, 17) + assert self.offset4(self.d) == datetime(2014, 6, 30, 14) + + def testRAdd(self): + assert self.d + self.offset2 == self.offset2 + self.d + + def testSub(self): + off = self.offset2 + pytest.raises(Exception, off.__sub__, self.d) + assert 2 * off - off == off + + assert self.d - self.offset2 == self.d + self._offset(-3) + + def testRSub(self): + assert self.d - self.offset2 == (-self.offset2).apply(self.d) + + def testMult1(self): + assert self.d + 5 * self.offset1 == self.d + self._offset(5) + + def testMult2(self): + assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) + + def testRollback1(self): + assert self.offset1.rollback(self.d) == self.d + assert self.offset2.rollback(self.d) == self.d + assert self.offset3.rollback(self.d) == self.d + assert self.offset4.rollback(self.d) == self.d + assert self.offset5.rollback(self.d) == datetime(2014, 6, 30, 14, 30) + assert self.offset6.rollback(self.d) == datetime(2014, 7, 1, 5, 0) + assert self.offset7.rollback(self.d) == datetime(2014, 7, 1, 6, 30) + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset2.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset3.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset4.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset5.rollback(d) == datetime(2014, 6, 30, 14, 30) + assert self.offset6.rollback(d) == d + assert self.offset7.rollback(d) == d + + assert self._offset(5).rollback(self.d) == self.d + + def testRollback2(self): + assert (self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)) == + datetime(2014, 7, 4, 17, 0)) + + def testRollforward1(self): + assert self.offset1.rollforward(self.d) == self.d + assert self.offset2.rollforward(self.d) == self.d + assert self.offset3.rollforward(self.d) == self.d + assert self.offset4.rollforward(self.d) == self.d + assert (self.offset5.rollforward(self.d) == + datetime(2014, 7, 1, 11, 0)) + assert (self.offset6.rollforward(self.d) == + datetime(2014, 7, 1, 20, 0)) + assert (self.offset7.rollforward(self.d) == + datetime(2014, 7, 1, 21, 30)) + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset3.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset4.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset5.rollforward(d) == datetime(2014, 7, 1, 11) + assert self.offset6.rollforward(d) == d + assert self.offset7.rollforward(d) == d + + assert self._offset(5).rollforward(self.d) == self.d + + def testRollforward2(self): + assert (self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)) == + datetime(2014, 7, 7, 9)) + + def test_roll_date_object(self): + offset = BusinessHour() + + dt = datetime(2014, 7, 6, 15, 0) + + result = offset.rollback(dt) + assert result == datetime(2014, 7, 4, 17) + + result = offset.rollforward(dt) + assert result == datetime(2014, 7, 7, 9) + + normalize_cases = [] + normalize_cases.append((BusinessHour(normalize=True), { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 1, 0): datetime(2014, 7, 1), + datetime(2014, 7, 4, 15): datetime(2014, 7, 4), + datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 7), + datetime(2014, 7, 6, 10): datetime(2014, 7, 7)})) + + normalize_cases.append((BusinessHour(-1, normalize=True), { + datetime(2014, 7, 1, 8): datetime(2014, 6, 30), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1), + datetime(2014, 7, 1, 10): datetime(2014, 6, 30), + datetime(2014, 7, 1, 0): datetime(2014, 6, 30), + datetime(2014, 7, 7, 10): datetime(2014, 7, 4), + datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 4), + datetime(2014, 7, 6, 10): datetime(2014, 7, 4)})) + + normalize_cases.append((BusinessHour(1, normalize=True, start='17:00', + end='04:00'), { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 2, 2): datetime(2014, 7, 2), + datetime(2014, 7, 2, 3): datetime(2014, 7, 2), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5), + datetime(2014, 7, 5, 2): datetime(2014, 7, 5), + datetime(2014, 7, 7, 2): datetime(2014, 7, 7), + datetime(2014, 7, 7, 17): datetime(2014, 7, 7)})) + + @pytest.mark.parametrize('case', normalize_cases) + def test_normalize(self, case): + offset, cases = case + for dt, expected in compat.iteritems(cases): + assert offset.apply(dt) == expected + + on_offset_cases = [] + on_offset_cases.append((BusinessHour(), { + datetime(2014, 7, 1, 9): True, + datetime(2014, 7, 1, 8, 59): False, + datetime(2014, 7, 1, 8): False, + datetime(2014, 7, 1, 17): True, + datetime(2014, 7, 1, 17, 1): False, + datetime(2014, 7, 1, 18): False, + datetime(2014, 7, 5, 9): False, + datetime(2014, 7, 6, 12): False})) + + on_offset_cases.append((BusinessHour(start='10:00', end='15:00'), { + datetime(2014, 7, 1, 9): False, + datetime(2014, 7, 1, 10): True, + datetime(2014, 7, 1, 15): True, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12): False, + datetime(2014, 7, 6, 12): False})) + + on_offset_cases.append((BusinessHour(start='19:00', end='05:00'), { + datetime(2014, 7, 1, 9, 0): False, + datetime(2014, 7, 1, 10, 0): False, + datetime(2014, 7, 1, 15): False, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12, 0): False, + datetime(2014, 7, 6, 12, 0): False, + datetime(2014, 7, 1, 19, 0): True, + datetime(2014, 7, 2, 0, 0): True, + datetime(2014, 7, 4, 23): True, + datetime(2014, 7, 5, 1): True, + datetime(2014, 7, 5, 5, 0): True, + datetime(2014, 7, 6, 23, 0): False, + datetime(2014, 7, 7, 3, 0): False})) + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, cases = case + for dt, expected in compat.iteritems(cases): + assert offset.onOffset(dt) == expected + + opening_time_cases = [] + # opening time should be affected by sign of n, not by n's value and + # end + opening_time_cases.append(([BusinessHour(), BusinessHour(n=2), + BusinessHour(n=4), BusinessHour(end='10:00'), + BusinessHour(n=2, end='4:00'), + BusinessHour(n=4, end='15:00')], { + datetime(2014, 7, 1, 11): (datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9)), + datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9)), + datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9)), + datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9)), + # if timestamp is on opening time, next opening time is + # as it is + datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 9)), + datetime(2014, 7, 2, 10): (datetime(2014, 7, 3, 9), + datetime(2014, 7, 2, 9)), + # 2014-07-05 is saturday + datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9)), + datetime(2014, 7, 4, 10): (datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9)), + datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9)), + datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9)), + datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9)), + datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 8, 9), + datetime(2014, 7, 7, 9))})) + + opening_time_cases.append(([BusinessHour(start='11:15'), + BusinessHour(n=2, start='11:15'), + BusinessHour(n=3, start='11:15'), + BusinessHour(start='11:15', end='10:00'), + BusinessHour(n=2, start='11:15', end='4:00'), + BusinessHour(n=3, start='11:15', + end='15:00')], { + datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 11, 15), + datetime(2014, 6, 30, 11, 15)), + datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15)), + datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15)), + datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15)), + datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15)), + datetime(2014, 7, 2, 10): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15)), + datetime(2014, 7, 2, 11, 15): (datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 2, 11, 15)), + datetime(2014, 7, 2, 11, 15, 1): (datetime(2014, 7, 3, 11, 15), + datetime(2014, 7, 2, 11, 15)), + datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15)), + datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 11, 15), + datetime(2014, 7, 3, 11, 15)), + datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15)), + datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15)), + datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15)), + datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15))})) + + opening_time_cases.append(([BusinessHour(-1), BusinessHour(n=-2), + BusinessHour(n=-4), + BusinessHour(n=-1, end='10:00'), + BusinessHour(n=-2, end='4:00'), + BusinessHour(n=-4, end='15:00')], { + datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9)), + datetime(2014, 7, 1, 18): (datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9)), + datetime(2014, 7, 1, 23): (datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9)), + datetime(2014, 7, 2, 8): (datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9)), + datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 9)), + datetime(2014, 7, 2, 10): (datetime(2014, 7, 2, 9), + datetime(2014, 7, 3, 9)), + datetime(2014, 7, 5, 10): (datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9)), + datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9)), + datetime(2014, 7, 4, 23): (datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9)), + datetime(2014, 7, 6, 10): (datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9)), + datetime(2014, 7, 7, 5): (datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9)), + datetime(2014, 7, 7, 9): (datetime(2014, 7, 7, 9), + datetime(2014, 7, 7, 9)), + datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 7, 9), + datetime(2014, 7, 8, 9))})) + + opening_time_cases.append(([BusinessHour(start='17:00', end='05:00'), + BusinessHour(n=3, start='17:00', + end='03:00')], { + datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 17), + datetime(2014, 6, 30, 17)), + datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17)), + datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17)), + datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17)), + datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17)), + datetime(2014, 7, 4, 17): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 4, 17)), + datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17)), + datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 3, 17)), + datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17)), + datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17)), + datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17)), + datetime(2014, 7, 7, 17, 1): (datetime(2014, 7, 8, 17), + datetime(2014, 7, 7, 17)), })) + + opening_time_cases.append(([BusinessHour(-1, start='17:00', end='05:00'), + BusinessHour(n=-2, start='17:00', + end='03:00')], { + datetime(2014, 7, 1, 11): (datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 17)), + datetime(2014, 7, 1, 18): (datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17)), + datetime(2014, 7, 1, 23): (datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17)), + datetime(2014, 7, 2, 8): (datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17)), + datetime(2014, 7, 2, 9): (datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17)), + datetime(2014, 7, 2, 16, 59): (datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17)), + datetime(2014, 7, 5, 10): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17)), + datetime(2014, 7, 4, 10): (datetime(2014, 7, 3, 17), + datetime(2014, 7, 4, 17)), + datetime(2014, 7, 4, 23): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17)), + datetime(2014, 7, 6, 10): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17)), + datetime(2014, 7, 7, 5): (datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17)), + datetime(2014, 7, 7, 18): (datetime(2014, 7, 7, 17), + datetime(2014, 7, 8, 17))})) + + @pytest.mark.parametrize('case', opening_time_cases) + def test_opening_time(self, case): + _offsets, cases = case + for offset in _offsets: + for dt, (exp_next, exp_prev) in compat.iteritems(cases): + assert offset._next_opening_time(dt) == exp_next + assert offset._prev_opening_time(dt) == exp_prev + + apply_cases = [] + apply_cases.append((BusinessHour(), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 2, 9, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 12), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, 30)})) + + apply_cases.append((BusinessHour(4), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 13): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, 30)})) + + apply_cases.append((BusinessHour(-1), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 10), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 10): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 1, 15, 30, 15), + datetime(2014, 7, 1, 9, 30, 15): datetime(2014, 6, 30, 16, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 5): datetime(2014, 6, 30, 16), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 10), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 16), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 16), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 16), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 16), + datetime(2014, 7, 7, 9): datetime(2014, 7, 4, 16), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 16, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 16, 30, 30)})) + + apply_cases.append((BusinessHour(-4), { + datetime(2014, 7, 1, 11): datetime(2014, 6, 30, 15), + datetime(2014, 7, 1, 13): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 18): datetime(2014, 7, 4, 13), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 13, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 13, 30, 30)})) + + apply_cases.append((BusinessHour(start='13:00', end='16:00'), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 13), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 14), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 14), + datetime(2014, 7, 1, 15, 30, 15): datetime(2014, 7, 2, 13, 30, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 14), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 14)})) + + apply_cases.append((BusinessHour(n=2, start='13:00', end='16:00'), { + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 14): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 15), + datetime(2014, 7, 2, 14, 30): datetime(2014, 7, 3, 13, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 14, 30): datetime(2014, 7, 7, 13, 30), + datetime(2014, 7, 4, 14, 30, 30): datetime(2014, 7, 7, 13, 30, 30)})) + + apply_cases.append((BusinessHour(n=-1, start='13:00', end='16:00'), { + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 14): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 15): datetime(2014, 7, 2, 14), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 16): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 13, 30, 15): datetime(2014, 7, 1, 15, 30, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 15), + datetime(2014, 7, 7, 11): datetime(2014, 7, 4, 15)})) + + apply_cases.append((BusinessHour(n=-3, start='10:00', end='16:00'), { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 11), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 11, 30): datetime(2014, 7, 1, 14, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13), + datetime(2014, 7, 4, 10): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 16): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 12, 30): datetime(2014, 7, 3, 15, 30), + datetime(2014, 7, 4, 12, 30, 30): datetime(2014, 7, 3, 15, 30, 30)})) + + apply_cases.append((BusinessHour(start='19:00', end='05:00'), { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 20), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 4, 30): datetime(2014, 7, 2, 19, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 1), + datetime(2014, 7, 4, 10): datetime(2014, 7, 4, 20), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5, 0), + datetime(2014, 7, 5, 0): datetime(2014, 7, 5, 1), + datetime(2014, 7, 5, 4): datetime(2014, 7, 7, 19), + datetime(2014, 7, 5, 4, 30): datetime(2014, 7, 7, 19, 30), + datetime(2014, 7, 5, 4, 30, 30): datetime(2014, 7, 7, 19, 30, 30)})) + + apply_cases.append((BusinessHour(n=-1, start='19:00', end='05:00'), { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 4), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 20): datetime(2014, 7, 2, 5), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 19, 30): datetime(2014, 7, 2, 4, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 23), + datetime(2014, 7, 3, 6): datetime(2014, 7, 3, 4), + datetime(2014, 7, 4, 23): datetime(2014, 7, 4, 22), + datetime(2014, 7, 5, 0): datetime(2014, 7, 4, 23), + datetime(2014, 7, 5, 4): datetime(2014, 7, 5, 3), + datetime(2014, 7, 7, 19, 30): datetime(2014, 7, 5, 4, 30), + datetime(2014, 7, 7, 19, 30, 30): datetime(2014, 7, 5, 4, 30, 30)})) + + @pytest.mark.parametrize('case', apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + apply_large_n_cases = [] + # A week later + apply_large_n_cases.append((BusinessHour(40), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 8, 11), + datetime(2014, 7, 1, 13): datetime(2014, 7, 8, 13), + datetime(2014, 7, 1, 15): datetime(2014, 7, 8, 15), + datetime(2014, 7, 1, 16): datetime(2014, 7, 8, 16), + datetime(2014, 7, 1, 17): datetime(2014, 7, 9, 9), + datetime(2014, 7, 2, 11): datetime(2014, 7, 9, 11), + datetime(2014, 7, 2, 8): datetime(2014, 7, 9, 9), + datetime(2014, 7, 2, 19): datetime(2014, 7, 10, 9), + datetime(2014, 7, 2, 23): datetime(2014, 7, 10, 9), + datetime(2014, 7, 3, 0): datetime(2014, 7, 10, 9), + datetime(2014, 7, 5, 15): datetime(2014, 7, 14, 9), + datetime(2014, 7, 4, 18): datetime(2014, 7, 14, 9), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 14, 9, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 14, 9, 30, 30)})) + + # 3 days and 1 hour before + apply_large_n_cases.append((BusinessHour(-25), { + datetime(2014, 7, 1, 11): datetime(2014, 6, 26, 10), + datetime(2014, 7, 1, 13): datetime(2014, 6, 26, 12), + datetime(2014, 7, 1, 9): datetime(2014, 6, 25, 16), + datetime(2014, 7, 1, 10): datetime(2014, 6, 25, 17), + datetime(2014, 7, 3, 11): datetime(2014, 6, 30, 10), + datetime(2014, 7, 3, 8): datetime(2014, 6, 27, 16), + datetime(2014, 7, 3, 19): datetime(2014, 6, 30, 16), + datetime(2014, 7, 3, 23): datetime(2014, 6, 30, 16), + datetime(2014, 7, 4, 9): datetime(2014, 6, 30, 16), + datetime(2014, 7, 5, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 6, 18): datetime(2014, 7, 1, 16), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 1, 16, 30), + datetime(2014, 7, 7, 10, 30, 30): datetime(2014, 7, 2, 9, 30, 30)})) + + # 5 days and 3 hours later + apply_large_n_cases.append((BusinessHour(28, start='21:00', end='02:00'), { + datetime(2014, 7, 1, 11): datetime(2014, 7, 9, 0), + datetime(2014, 7, 1, 22): datetime(2014, 7, 9, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 9, 21), + datetime(2014, 7, 2, 2): datetime(2014, 7, 10, 0), + datetime(2014, 7, 3, 21): datetime(2014, 7, 11, 0), + datetime(2014, 7, 4, 1): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 2): datetime(2014, 7, 12, 0), + datetime(2014, 7, 4, 3): datetime(2014, 7, 12, 0), + datetime(2014, 7, 5, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 5, 15): datetime(2014, 7, 15, 0), + datetime(2014, 7, 6, 18): datetime(2014, 7, 15, 0), + datetime(2014, 7, 7, 1): datetime(2014, 7, 15, 0), + datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, 30)})) + + @pytest.mark.parametrize('case', apply_large_n_cases) + def test_apply_large_n(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_apply_nanoseconds(self): + tests = [] + + tests.append((BusinessHour(), + {Timestamp('2014-07-04 15:00') + Nano(5): Timestamp( + '2014-07-04 16:00') + Nano(5), + Timestamp('2014-07-04 16:00') + Nano(5): Timestamp( + '2014-07-07 09:00') + Nano(5), + Timestamp('2014-07-04 16:00') - Nano(5): Timestamp( + '2014-07-04 17:00') - Nano(5)})) + + tests.append((BusinessHour(-1), + {Timestamp('2014-07-04 15:00') + Nano(5): Timestamp( + '2014-07-04 14:00') + Nano(5), + Timestamp('2014-07-04 10:00') + Nano(5): Timestamp( + '2014-07-04 09:00') + Nano(5), + Timestamp('2014-07-04 10:00') - Nano(5): Timestamp( + '2014-07-03 17:00') - Nano(5), })) + + for offset, cases in tests: + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = self._offset() + offset2 = self._offset() + assert not offset1 != offset2 + + def test_datetimeindex(self): + idx1 = DatetimeIndex(start='2014-07-04 15:00', end='2014-07-08 10:00', + freq='BH') + idx2 = DatetimeIndex(start='2014-07-04 15:00', periods=12, freq='BH') + idx3 = DatetimeIndex(end='2014-07-08 10:00', periods=12, freq='BH') + expected = DatetimeIndex(['2014-07-04 15:00', '2014-07-04 16:00', + '2014-07-07 09:00', + '2014-07-07 10:00', '2014-07-07 11:00', + '2014-07-07 12:00', + '2014-07-07 13:00', '2014-07-07 14:00', + '2014-07-07 15:00', + '2014-07-07 16:00', '2014-07-08 09:00', + '2014-07-08 10:00'], + freq='BH') + for idx in [idx1, idx2, idx3]: + tm.assert_index_equal(idx, expected) + + idx1 = DatetimeIndex(start='2014-07-04 15:45', end='2014-07-08 10:45', + freq='BH') + idx2 = DatetimeIndex(start='2014-07-04 15:45', periods=12, freq='BH') + idx3 = DatetimeIndex(end='2014-07-08 10:45', periods=12, freq='BH') + + expected = DatetimeIndex(['2014-07-04 15:45', '2014-07-04 16:45', + '2014-07-07 09:45', + '2014-07-07 10:45', '2014-07-07 11:45', + '2014-07-07 12:45', + '2014-07-07 13:45', '2014-07-07 14:45', + '2014-07-07 15:45', + '2014-07-07 16:45', '2014-07-08 09:45', + '2014-07-08 10:45'], + freq='BH') + expected = idx1 + for idx in [idx1, idx2, idx3]: + tm.assert_index_equal(idx, expected) + + +class TestCustomBusinessHour(Base): + _offset = CustomBusinessHour + + def setup_method(self, method): + # 2014 Calendar to check custom holidays + # Sun Mon Tue Wed Thu Fri Sat + # 6/22 23 24 25 26 27 28 + # 29 30 7/1 2 3 4 5 + # 6 7 8 9 10 11 12 + self.d = datetime(2014, 7, 1, 10, 00) + self.offset1 = CustomBusinessHour(weekmask='Tue Wed Thu Fri') + + self.holidays = ['2014-06-27', datetime(2014, 6, 30), + np.datetime64('2014-07-02')] + self.offset2 = CustomBusinessHour(holidays=self.holidays) + + def test_constructor_errors(self): + from datetime import time as dt_time + with pytest.raises(ValueError): + CustomBusinessHour(start=dt_time(11, 0, 5)) + with pytest.raises(ValueError): + CustomBusinessHour(start='AAA') + with pytest.raises(ValueError): + CustomBusinessHour(start='14:00:05') + + def test_different_normalize_equals(self): + # equivalent in this special case + offset = self._offset() + offset2 = self._offset() + offset2.normalize = True + assert offset == offset2 + + def test_repr(self): + assert repr(self.offset1) == '' + assert repr(self.offset2) == '' + + def test_with_offset(self): + expected = Timestamp('2014-07-01 13:00') + + assert self.d + CustomBusinessHour() * 3 == expected + assert self.d + CustomBusinessHour(n=3) == expected + + def testEQ(self): + for offset in [self.offset1, self.offset2]: + assert offset == offset + + assert CustomBusinessHour() != CustomBusinessHour(-1) + assert (CustomBusinessHour(start='09:00') == + CustomBusinessHour()) + assert (CustomBusinessHour(start='09:00') != + CustomBusinessHour(start='09:01')) + assert (CustomBusinessHour(start='09:00', end='17:00') != + CustomBusinessHour(start='17:00', end='09:01')) + + assert (CustomBusinessHour(weekmask='Tue Wed Thu Fri') != + CustomBusinessHour(weekmask='Mon Tue Wed Thu Fri')) + assert (CustomBusinessHour(holidays=['2014-06-27']) != + CustomBusinessHour(holidays=['2014-06-28'])) + + def test_hash(self): + assert hash(self.offset1) == hash(self.offset1) + assert hash(self.offset2) == hash(self.offset2) + + def testCall(self): + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 11) + + def testRAdd(self): + assert self.d + self.offset2 == self.offset2 + self.d + + def testSub(self): + off = self.offset2 + pytest.raises(Exception, off.__sub__, self.d) + assert 2 * off - off == off + + assert self.d - self.offset2 == self.d - (2 * off - off) + + def testRSub(self): + assert self.d - self.offset2 == (-self.offset2).apply(self.d) + + def testMult1(self): + assert self.d + 5 * self.offset1 == self.d + self._offset(5) + + def testMult2(self): + assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) + + def testRollback1(self): + assert self.offset1.rollback(self.d) == self.d + assert self.offset2.rollback(self.d) == self.d + + d = datetime(2014, 7, 1, 0) + + # 2014/07/01 is Tuesday, 06/30 is Monday(holiday) + assert self.offset1.rollback(d) == datetime(2014, 6, 27, 17) + + # 2014/6/30 and 2014/6/27 are holidays + assert self.offset2.rollback(d) == datetime(2014, 6, 26, 17) + + def testRollback2(self): + assert (self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)) == + datetime(2014, 7, 4, 17, 0)) + + def testRollforward1(self): + assert self.offset1.rollforward(self.d) == self.d + assert self.offset2.rollforward(self.d) == self.d + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9) + + def testRollforward2(self): + assert (self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)) == + datetime(2014, 7, 7, 9)) + + def test_roll_date_object(self): + offset = BusinessHour() + + dt = datetime(2014, 7, 6, 15, 0) + + result = offset.rollback(dt) + assert result == datetime(2014, 7, 4, 17) + + result = offset.rollforward(dt) + assert result == datetime(2014, 7, 7, 9) + + def test_normalize(self): + tests = [] + + tests.append((CustomBusinessHour(normalize=True, + holidays=self.holidays), + {datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3), + datetime(2014, 7, 1, 23): datetime(2014, 7, 3), + datetime(2014, 7, 1, 0): datetime(2014, 7, 1), + datetime(2014, 7, 4, 15): datetime(2014, 7, 4), + datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 7), + datetime(2014, 7, 6, 10): datetime(2014, 7, 7)})) + + tests.append((CustomBusinessHour(-1, normalize=True, + holidays=self.holidays), + {datetime(2014, 7, 1, 8): datetime(2014, 6, 26), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1), + datetime(2014, 7, 1, 10): datetime(2014, 6, 26), + datetime(2014, 7, 1, 0): datetime(2014, 6, 26), + datetime(2014, 7, 7, 10): datetime(2014, 7, 4), + datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 4), + datetime(2014, 7, 6, 10): datetime(2014, 7, 4)})) + + tests.append((CustomBusinessHour(1, normalize=True, start='17:00', + end='04:00', holidays=self.holidays), + {datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 2, 2): datetime(2014, 7, 2), + datetime(2014, 7, 2, 3): datetime(2014, 7, 3), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5), + datetime(2014, 7, 5, 2): datetime(2014, 7, 5), + datetime(2014, 7, 7, 2): datetime(2014, 7, 7), + datetime(2014, 7, 7, 17): datetime(2014, 7, 7)})) + + for offset, cases in tests: + for dt, expected in compat.iteritems(cases): + assert offset.apply(dt) == expected + + def test_onOffset(self): + tests = [] + + tests.append((CustomBusinessHour(start='10:00', end='15:00', + holidays=self.holidays), + {datetime(2014, 7, 1, 9): False, + datetime(2014, 7, 1, 10): True, + datetime(2014, 7, 1, 15): True, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12): False, + datetime(2014, 7, 6, 12): False})) + + for offset, cases in tests: + for dt, expected in compat.iteritems(cases): + assert offset.onOffset(dt) == expected + + def test_apply(self): + tests = [] + + tests.append(( + CustomBusinessHour(holidays=self.holidays), + {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 9), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 3, 9, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 10), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, + 30)})) + + tests.append(( + CustomBusinessHour(4, holidays=self.holidays), + {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 13): datetime(2014, 7, 3, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 3, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, + 30)})) + + for offset, cases in tests: + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_apply_nanoseconds(self): + tests = [] + + tests.append((CustomBusinessHour(holidays=self.holidays), + {Timestamp('2014-07-01 15:00') + Nano(5): Timestamp( + '2014-07-01 16:00') + Nano(5), + Timestamp('2014-07-01 16:00') + Nano(5): Timestamp( + '2014-07-03 09:00') + Nano(5), + Timestamp('2014-07-01 16:00') - Nano(5): Timestamp( + '2014-07-01 17:00') - Nano(5)})) + + tests.append((CustomBusinessHour(-1, holidays=self.holidays), + {Timestamp('2014-07-01 15:00') + Nano(5): Timestamp( + '2014-07-01 14:00') + Nano(5), + Timestamp('2014-07-01 10:00') + Nano(5): Timestamp( + '2014-07-01 09:00') + Nano(5), + Timestamp('2014-07-01 10:00') - Nano(5): Timestamp( + '2014-06-26 17:00') - Nano(5), })) + + for offset, cases in tests: + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + +class TestCustomBusinessDay(Base): + _offset = CDay + + def setup_method(self, method): + self.d = datetime(2008, 1, 1) + self.nd = np_datetime64_compat('2008-01-01 00:00:00Z') + + self.offset = CDay() + self.offset2 = CDay(2) + + def test_different_normalize_equals(self): + # equivalent in this special case + offset = CDay() + offset2 = CDay() + offset2.normalize = True + assert offset == offset2 + + def test_repr(self): + assert repr(self.offset) == '' + assert repr(self.offset2) == '<2 * CustomBusinessDays>' + + expected = '' + assert repr(self.offset + timedelta(1)) == expected + + def test_with_offset(self): + offset = self.offset + timedelta(hours=2) + + assert (self.d + offset) == datetime(2008, 1, 2, 2) + + def testEQ(self): + assert self.offset2 == self.offset2 + + def test_mul(self): + pass + + def test_hash(self): + assert hash(self.offset2) == hash(self.offset2) + + def testCall(self): + assert self.offset2(self.d) == datetime(2008, 1, 3) + assert self.offset2(self.nd) == datetime(2008, 1, 3) + + def testRAdd(self): + assert self.d + self.offset2 == self.offset2 + self.d + + def testSub(self): + off = self.offset2 + pytest.raises(Exception, off.__sub__, self.d) + assert 2 * off - off == off + + assert self.d - self.offset2 == self.d + CDay(-2) + + def testRSub(self): + assert self.d - self.offset2 == (-self.offset2).apply(self.d) + + def testMult1(self): + assert self.d + 10 * self.offset == self.d + CDay(10) + + def testMult2(self): + assert self.d + (-5 * CDay(-10)) == self.d + CDay(50) + + def testRollback1(self): + assert CDay(10).rollback(self.d) == self.d + + def testRollback2(self): + assert (CDay(10).rollback(datetime(2008, 1, 5)) == + datetime(2008, 1, 4)) + + def testRollforward1(self): + assert CDay(10).rollforward(self.d) == self.d + + def testRollforward2(self): + assert (CDay(10).rollforward(datetime(2008, 1, 5)) == + datetime(2008, 1, 7)) + + def test_roll_date_object(self): + offset = CDay() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 9, 14) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 17) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + on_offset_cases = [(CDay(), datetime(2008, 1, 1), True), + (CDay(), datetime(2008, 1, 5), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, d, expected = case + assert_onOffset(offset, d, expected) + + apply_cases = [] + apply_cases.append((CDay(), { + datetime(2008, 1, 1): datetime(2008, 1, 2), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 8)})) + + apply_cases.append((2 * CDay(), { + datetime(2008, 1, 1): datetime(2008, 1, 3), + datetime(2008, 1, 4): datetime(2008, 1, 8), + datetime(2008, 1, 5): datetime(2008, 1, 8), + datetime(2008, 1, 6): datetime(2008, 1, 8), + datetime(2008, 1, 7): datetime(2008, 1, 9)})) + + apply_cases.append((-CDay(), { + datetime(2008, 1, 1): datetime(2007, 12, 31), + datetime(2008, 1, 4): datetime(2008, 1, 3), + datetime(2008, 1, 5): datetime(2008, 1, 4), + datetime(2008, 1, 6): datetime(2008, 1, 4), + datetime(2008, 1, 7): datetime(2008, 1, 4), + datetime(2008, 1, 8): datetime(2008, 1, 7)})) + + apply_cases.append((-2 * CDay(), { + datetime(2008, 1, 1): datetime(2007, 12, 28), + datetime(2008, 1, 4): datetime(2008, 1, 2), + datetime(2008, 1, 5): datetime(2008, 1, 3), + datetime(2008, 1, 6): datetime(2008, 1, 3), + datetime(2008, 1, 7): datetime(2008, 1, 3), + datetime(2008, 1, 8): datetime(2008, 1, 4), + datetime(2008, 1, 9): datetime(2008, 1, 7)})) + + apply_cases.append((CDay(0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 4): datetime(2008, 1, 4), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 7)})) + + @pytest.mark.parametrize('case', apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + CDay(10) + assert result == datetime(2012, 11, 6) + + result = dt + CDay(100) - CDay(100) + assert result == dt + + off = CDay() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 12, 23) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + xp = datetime(2011, 12, 26) + assert rs == xp + + def test_apply_corner(self): + pytest.raises(Exception, CDay().apply, BMonthEnd()) + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = CDay() + offset2 = CDay() + assert not offset1 != offset2 + + def test_holidays(self): + # Define a TradingDay offset + holidays = ['2012-05-01', datetime(2013, 5, 1), + np.datetime64('2014-05-01')] + tday = CDay(holidays=holidays) + for year in range(2012, 2015): + dt = datetime(year, 4, 30) + xp = datetime(year, 5, 2) + rs = dt + tday + assert rs == xp + + def test_weekmask(self): + weekmask_saudi = 'Sat Sun Mon Tue Wed' # Thu-Fri Weekend + weekmask_uae = '1111001' # Fri-Sat Weekend + weekmask_egypt = [1, 1, 1, 1, 0, 0, 1] # Fri-Sat Weekend + bday_saudi = CDay(weekmask=weekmask_saudi) + bday_uae = CDay(weekmask=weekmask_uae) + bday_egypt = CDay(weekmask=weekmask_egypt) + dt = datetime(2013, 5, 1) + xp_saudi = datetime(2013, 5, 4) + xp_uae = datetime(2013, 5, 2) + xp_egypt = datetime(2013, 5, 2) + assert xp_saudi == dt + bday_saudi + assert xp_uae == dt + bday_uae + assert xp_egypt == dt + bday_egypt + xp2 = datetime(2013, 5, 5) + assert xp2 == dt + 2 * bday_saudi + assert xp2 == dt + 2 * bday_uae + assert xp2 == dt + 2 * bday_egypt + + def test_weekmask_and_holidays(self): + weekmask_egypt = 'Sun Mon Tue Wed Thu' # Fri-Sat Weekend + holidays = ['2012-05-01', datetime(2013, 5, 1), + np.datetime64('2014-05-01')] + bday_egypt = CDay(holidays=holidays, weekmask=weekmask_egypt) + dt = datetime(2013, 4, 30) + xp_egypt = datetime(2013, 5, 5) + assert xp_egypt == dt + 2 * bday_egypt + + def test_calendar(self): + calendar = USFederalHolidayCalendar() + dt = datetime(2014, 1, 17) + assert_offset_equal(CDay(calendar=calendar), dt, datetime(2014, 1, 21)) + + def test_roundtrip_pickle(self): + def _check_roundtrip(obj): + unpickled = tm.round_trip_pickle(obj) + assert unpickled == obj + + _check_roundtrip(self.offset) + _check_roundtrip(self.offset2) + _check_roundtrip(self.offset * 2) + + def test_pickle_compat_0_14_1(self): + hdays = [datetime(2013, 1, 1) for ele in range(4)] + + pth = tm.get_data_path() + + cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle')) + cday = CDay(holidays=hdays) + assert cday == cday0_14_1 + + +class CustomBusinessMonthBase(object): + + def setup_method(self, method): + self.d = datetime(2008, 1, 1) + + self.offset = self._object() + self.offset2 = self._object(2) + + def testEQ(self): + assert self.offset2 == self.offset2 + + def test_mul(self): + pass + + def test_hash(self): + assert hash(self.offset2) == hash(self.offset2) + + def testRAdd(self): + assert self.d + self.offset2 == self.offset2 + self.d + + def testSub(self): + off = self.offset2 + pytest.raises(Exception, off.__sub__, self.d) + assert 2 * off - off == off + + assert self.d - self.offset2 == self.d + self._object(-2) + + def testRSub(self): + assert self.d - self.offset2 == (-self.offset2).apply(self.d) + + def testMult1(self): + assert self.d + 10 * self.offset == self.d + self._object(10) + + def testMult2(self): + assert self.d + (-5 * self._object(-10)) == self.d + self._object(50) + + def test_offsets_compare_equal(self): + offset1 = self._object() + offset2 = self._object() + assert not offset1 != offset2 + + def test_roundtrip_pickle(self): + def _check_roundtrip(obj): + unpickled = tm.round_trip_pickle(obj) + assert unpickled == obj + + _check_roundtrip(self._object()) + _check_roundtrip(self._object(2)) + _check_roundtrip(self._object() * 2) + + def test_copy(self): + # GH 17452 + off = self._object(weekmask='Mon Wed Fri') + assert off == off.copy() + + +class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base): + _object = CBMonthEnd + + def test_different_normalize_equals(self): + # equivalent in this special case + offset = CBMonthEnd() + offset2 = CBMonthEnd() + offset2.normalize = True + assert offset == offset2 + + def test_repr(self): + assert repr(self.offset) == '' + assert repr(self.offset2) == '<2 * CustomBusinessMonthEnds>' + + def testCall(self): + assert self.offset2(self.d) == datetime(2008, 2, 29) + + def testRollback1(self): + assert (CDay(10).rollback(datetime(2007, 12, 31)) == + datetime(2007, 12, 31)) + + def testRollback2(self): + assert CBMonthEnd(10).rollback(self.d) == datetime(2007, 12, 31) + + def testRollforward1(self): + assert CBMonthEnd(10).rollforward(self.d) == datetime(2008, 1, 31) + + def test_roll_date_object(self): + offset = CBMonthEnd() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 8, 31) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 28) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + on_offset_cases = [(CBMonthEnd(), datetime(2008, 1, 31), True), + (CBMonthEnd(), datetime(2008, 1, 1), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, d, expected = case + assert_onOffset(offset, d, expected) + + apply_cases = [] + apply_cases.append((CBMonthEnd(), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 2, 7): datetime(2008, 2, 29)})) + + apply_cases.append((2 * CBMonthEnd(), { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 2, 7): datetime(2008, 3, 31)})) + + apply_cases.append((-CBMonthEnd(), { + datetime(2008, 1, 1): datetime(2007, 12, 31), + datetime(2008, 2, 8): datetime(2008, 1, 31)})) + + apply_cases.append((-2 * CBMonthEnd(), { + datetime(2008, 1, 1): datetime(2007, 11, 30), + datetime(2008, 2, 9): datetime(2007, 12, 31)})) + + apply_cases.append((CBMonthEnd(0), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 2, 7): datetime(2008, 2, 29)})) + + @pytest.mark.parametrize('case', apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + CBMonthEnd(10) + assert result == datetime(2013, 7, 31) + + result = dt + CDay(100) - CDay(100) + assert result == dt + + off = CBMonthEnd() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 7, 29) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + xp = datetime(2012, 5, 31) + assert rs == xp + + def test_holidays(self): + # Define a TradingDay offset + holidays = ['2012-01-31', datetime(2012, 2, 28), + np.datetime64('2012-02-29')] + bm_offset = CBMonthEnd(holidays=holidays) + dt = datetime(2012, 1, 1) + assert dt + bm_offset == datetime(2012, 1, 30) + assert dt + 2 * bm_offset == datetime(2012, 2, 27) + + def test_datetimeindex(self): + from pandas.tseries.holiday import USFederalHolidayCalendar + hcal = USFederalHolidayCalendar() + freq = CBMonthEnd(calendar=hcal) + + assert (DatetimeIndex(start='20120101', end='20130101', + freq=freq).tolist()[0] == datetime(2012, 1, 31)) + + +class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base): + _object = CBMonthBegin + + def test_different_normalize_equals(self): + # equivalent in this special case + offset = CBMonthBegin() + offset2 = CBMonthBegin() + offset2.normalize = True + assert offset == offset2 + + def test_repr(self): + assert repr(self.offset) == '' + assert repr(self.offset2) == '<2 * CustomBusinessMonthBegins>' + + def testCall(self): + assert self.offset2(self.d) == datetime(2008, 3, 3) + + def testRollback1(self): + assert (CDay(10).rollback(datetime(2007, 12, 31)) == + datetime(2007, 12, 31)) + + def testRollback2(self): + assert CBMonthBegin(10).rollback(self.d) == datetime(2008, 1, 1) + + def testRollforward1(self): + assert CBMonthBegin(10).rollforward(self.d) == datetime(2008, 1, 1) + + def test_roll_date_object(self): + offset = CBMonthBegin() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 9, 3) + + result = offset.rollforward(dt) + assert result == datetime(2012, 10, 1) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + on_offset_cases = [(CBMonthBegin(), datetime(2008, 1, 1), True), + (CBMonthBegin(), datetime(2008, 1, 31), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + apply_cases = [] + apply_cases.append((CBMonthBegin(), { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 2, 7): datetime(2008, 3, 3)})) + + apply_cases.append((2 * CBMonthBegin(), { + datetime(2008, 1, 1): datetime(2008, 3, 3), + datetime(2008, 2, 7): datetime(2008, 4, 1)})) + + apply_cases.append((-CBMonthBegin(), { + datetime(2008, 1, 1): datetime(2007, 12, 3), + datetime(2008, 2, 8): datetime(2008, 2, 1)})) + + apply_cases.append((-2 * CBMonthBegin(), { + datetime(2008, 1, 1): datetime(2007, 11, 1), + datetime(2008, 2, 9): datetime(2008, 1, 1)})) + + apply_cases.append((CBMonthBegin(0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 7): datetime(2008, 2, 1)})) + + @pytest.mark.parametrize('case', apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + CBMonthBegin(10) + assert result == datetime(2013, 8, 1) + + result = dt + CDay(100) - CDay(100) + assert result == dt + + off = CBMonthBegin() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 7, 1) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + + xp = datetime(2012, 6, 1) + assert rs == xp + + def test_holidays(self): + # Define a TradingDay offset + holidays = ['2012-02-01', datetime(2012, 2, 2), + np.datetime64('2012-03-01')] + bm_offset = CBMonthBegin(holidays=holidays) + dt = datetime(2012, 1, 1) + + assert dt + bm_offset == datetime(2012, 1, 2) + assert dt + 2 * bm_offset == datetime(2012, 2, 3) + + def test_datetimeindex(self): + hcal = USFederalHolidayCalendar() + cbmb = CBMonthBegin(calendar=hcal) + assert (DatetimeIndex(start='20120101', end='20130101', + freq=cbmb).tolist()[0] == datetime(2012, 1, 3)) + + +class TestWeek(Base): + _offset = Week + + def test_repr(self): + assert repr(Week(weekday=0)) == "" + assert repr(Week(n=-1, weekday=0)) == "<-1 * Week: weekday=0>" + assert repr(Week(n=-2, weekday=0)) == "<-2 * Weeks: weekday=0>" + + def test_corner(self): + pytest.raises(ValueError, Week, weekday=7) + tm.assert_raises_regex( + ValueError, "Day must be", Week, weekday=-1) + + def test_isAnchored(self): + assert Week(weekday=0).isAnchored() + assert not Week().isAnchored() + assert not Week(2, weekday=2).isAnchored() + assert not Week(2).isAnchored() + + offset_cases = [] + # not business week + offset_cases.append((Week(), { + datetime(2008, 1, 1): datetime(2008, 1, 8), + datetime(2008, 1, 4): datetime(2008, 1, 11), + datetime(2008, 1, 5): datetime(2008, 1, 12), + datetime(2008, 1, 6): datetime(2008, 1, 13), + datetime(2008, 1, 7): datetime(2008, 1, 14)})) + + # Mon + offset_cases.append((Week(weekday=0), { + datetime(2007, 12, 31): datetime(2008, 1, 7), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 14)})) + + # n=0 -> roll forward. Mon + offset_cases.append((Week(0, weekday=0), { + datetime(2007, 12, 31): datetime(2007, 12, 31), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 7)})) + + # n=0 -> roll forward. Mon + offset_cases.append((Week(-2, weekday=1), { + datetime(2010, 4, 6): datetime(2010, 3, 23), + datetime(2010, 4, 8): datetime(2010, 3, 30), + datetime(2010, 4, 5): datetime(2010, 3, 23)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_onOffset(self): + for weekday in range(7): + offset = Week(weekday=weekday) + + for day in range(1, 8): + date = datetime(2008, 1, day) + + if day % 7 == weekday: + expected = True + else: + expected = False + assert_onOffset(offset, date, expected) + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = Week() + offset2 = Week() + assert not offset1 != offset2 + + +class TestWeekOfMonth(Base): + _offset = WeekOfMonth + + def test_constructor(self): + tm.assert_raises_regex(ValueError, "^N cannot be 0", + WeekOfMonth, n=0, week=1, weekday=1) + tm.assert_raises_regex(ValueError, "^Week", WeekOfMonth, + n=1, week=4, weekday=0) + tm.assert_raises_regex(ValueError, "^Week", WeekOfMonth, + n=1, week=-1, weekday=0) + tm.assert_raises_regex(ValueError, "^Day", WeekOfMonth, + n=1, week=0, weekday=-1) + tm.assert_raises_regex(ValueError, "^Day", WeekOfMonth, + n=1, week=0, weekday=7) + + def test_repr(self): + assert (repr(WeekOfMonth(weekday=1, week=2)) == + "") + + def test_offset(self): + date1 = datetime(2011, 1, 4) # 1st Tuesday of Month + date2 = datetime(2011, 1, 11) # 2nd Tuesday of Month + date3 = datetime(2011, 1, 18) # 3rd Tuesday of Month + date4 = datetime(2011, 1, 25) # 4th Tuesday of Month + + # see for loop for structure + test_cases = [ + (-2, 2, 1, date1, datetime(2010, 11, 16)), + (-2, 2, 1, date2, datetime(2010, 11, 16)), + (-2, 2, 1, date3, datetime(2010, 11, 16)), + (-2, 2, 1, date4, datetime(2010, 12, 21)), + + (-1, 2, 1, date1, datetime(2010, 12, 21)), + (-1, 2, 1, date2, datetime(2010, 12, 21)), + (-1, 2, 1, date3, datetime(2010, 12, 21)), + (-1, 2, 1, date4, datetime(2011, 1, 18)), + + (1, 0, 0, date1, datetime(2011, 2, 7)), + (1, 0, 0, date2, datetime(2011, 2, 7)), + (1, 0, 0, date3, datetime(2011, 2, 7)), + (1, 0, 0, date4, datetime(2011, 2, 7)), + (1, 0, 1, date1, datetime(2011, 2, 1)), + (1, 0, 1, date2, datetime(2011, 2, 1)), + (1, 0, 1, date3, datetime(2011, 2, 1)), + (1, 0, 1, date4, datetime(2011, 2, 1)), + (1, 0, 2, date1, datetime(2011, 1, 5)), + (1, 0, 2, date2, datetime(2011, 2, 2)), + (1, 0, 2, date3, datetime(2011, 2, 2)), + (1, 0, 2, date4, datetime(2011, 2, 2)), + + (1, 2, 1, date1, datetime(2011, 1, 18)), + (1, 2, 1, date2, datetime(2011, 1, 18)), + (1, 2, 1, date3, datetime(2011, 2, 15)), + (1, 2, 1, date4, datetime(2011, 2, 15)), + + (2, 2, 1, date1, datetime(2011, 2, 15)), + (2, 2, 1, date2, datetime(2011, 2, 15)), + (2, 2, 1, date3, datetime(2011, 3, 15)), + (2, 2, 1, date4, datetime(2011, 3, 15))] + + for n, week, weekday, dt, expected in test_cases: + offset = WeekOfMonth(n, week=week, weekday=weekday) + assert_offset_equal(offset, dt, expected) + + # try subtracting + result = datetime(2011, 2, 1) - WeekOfMonth(week=1, weekday=2) + assert result == datetime(2011, 1, 12) + + result = datetime(2011, 2, 3) - WeekOfMonth(week=0, weekday=2) + assert result == datetime(2011, 2, 2) + + on_offset_cases = [(0, 0, datetime(2011, 2, 7), True), + (0, 0, datetime(2011, 2, 6), False), + (0, 0, datetime(2011, 2, 14), False), + (1, 0, datetime(2011, 2, 14), True), + (0, 1, datetime(2011, 2, 1), True), + (0, 1, datetime(2011, 2, 8), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + week, weekday, dt, expected = case + offset = WeekOfMonth(week=week, weekday=weekday) + assert offset.onOffset(dt) == expected + + +class TestLastWeekOfMonth(Base): + _offset = LastWeekOfMonth + + def test_constructor(self): + tm.assert_raises_regex(ValueError, "^N cannot be 0", + LastWeekOfMonth, n=0, weekday=1) + + tm.assert_raises_regex(ValueError, "^Day", LastWeekOfMonth, n=1, + weekday=-1) + tm.assert_raises_regex( + ValueError, "^Day", LastWeekOfMonth, n=1, weekday=7) + + def test_offset(self): + # Saturday + last_sat = datetime(2013, 8, 31) + next_sat = datetime(2013, 9, 28) + offset_sat = LastWeekOfMonth(n=1, weekday=5) + + one_day_before = (last_sat + timedelta(days=-1)) + assert one_day_before + offset_sat == last_sat + + one_day_after = (last_sat + timedelta(days=+1)) + assert one_day_after + offset_sat == next_sat + + # Test On that day + assert last_sat + offset_sat == next_sat + + # Thursday + + offset_thur = LastWeekOfMonth(n=1, weekday=3) + last_thurs = datetime(2013, 1, 31) + next_thurs = datetime(2013, 2, 28) + + one_day_before = last_thurs + timedelta(days=-1) + assert one_day_before + offset_thur == last_thurs + + one_day_after = last_thurs + timedelta(days=+1) + assert one_day_after + offset_thur == next_thurs + + # Test on that day + assert last_thurs + offset_thur == next_thurs + + three_before = last_thurs + timedelta(days=-3) + assert three_before + offset_thur == last_thurs + + two_after = last_thurs + timedelta(days=+2) + assert two_after + offset_thur == next_thurs + + offset_sunday = LastWeekOfMonth(n=1, weekday=WeekDay.SUN) + assert datetime(2013, 7, 31) + offset_sunday == datetime(2013, 8, 25) + + on_offset_cases = [ + (WeekDay.SUN, datetime(2013, 1, 27), True), + (WeekDay.SAT, datetime(2013, 3, 30), True), + (WeekDay.MON, datetime(2013, 2, 18), False), # Not the last Mon + (WeekDay.SUN, datetime(2013, 2, 25), False), # Not a SUN + (WeekDay.MON, datetime(2013, 2, 25), True), + (WeekDay.SAT, datetime(2013, 11, 30), True), + + (WeekDay.SAT, datetime(2006, 8, 26), True), + (WeekDay.SAT, datetime(2007, 8, 25), True), + (WeekDay.SAT, datetime(2008, 8, 30), True), + (WeekDay.SAT, datetime(2009, 8, 29), True), + (WeekDay.SAT, datetime(2010, 8, 28), True), + (WeekDay.SAT, datetime(2011, 8, 27), True), + (WeekDay.SAT, datetime(2019, 8, 31), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + weekday, dt, expected = case + offset = LastWeekOfMonth(weekday=weekday) + assert offset.onOffset(dt) == expected + + +class TestSemiMonthEnd(Base): + _offset = SemiMonthEnd + + def test_offset_whole_year(self): + dates = (datetime(2007, 12, 31), + datetime(2008, 1, 15), + datetime(2008, 1, 31), + datetime(2008, 2, 15), + datetime(2008, 2, 29), + datetime(2008, 3, 15), + datetime(2008, 3, 31), + datetime(2008, 4, 15), + datetime(2008, 4, 30), + datetime(2008, 5, 15), + datetime(2008, 5, 31), + datetime(2008, 6, 15), + datetime(2008, 6, 30), + datetime(2008, 7, 15), + datetime(2008, 7, 31), + datetime(2008, 8, 15), + datetime(2008, 8, 31), + datetime(2008, 9, 15), + datetime(2008, 9, 30), + datetime(2008, 10, 15), + datetime(2008, 10, 31), + datetime(2008, 11, 15), + datetime(2008, 11, 30), + datetime(2008, 12, 15), + datetime(2008, 12, 31)) + + for base, exp_date in zip(dates[:-1], dates[1:]): + assert_offset_equal(SemiMonthEnd(), base, exp_date) + + # ensure .apply_index works as expected + s = DatetimeIndex(dates[:-1]) + result = SemiMonthEnd().apply_index(s) + exp = DatetimeIndex(dates[1:]) + tm.assert_index_equal(result, exp) + + # ensure generating a range with DatetimeIndex gives same result + result = DatetimeIndex(start=dates[0], end=dates[-1], freq='SM') + exp = DatetimeIndex(dates) + tm.assert_index_equal(result, exp) + + offset_cases = [] + offset_cases.append((SemiMonthEnd(), { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 15): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 15), + datetime(2006, 12, 14): datetime(2006, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 15), + datetime(2007, 1, 1): datetime(2007, 1, 15), + datetime(2006, 12, 1): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2006, 12, 31)})) + + offset_cases.append((SemiMonthEnd(day_of_month=20), { + datetime(2008, 1, 1): datetime(2008, 1, 20), + datetime(2008, 1, 15): datetime(2008, 1, 20), + datetime(2008, 1, 21): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 20), + datetime(2006, 12, 14): datetime(2006, 12, 20), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 20), + datetime(2007, 1, 1): datetime(2007, 1, 20), + datetime(2006, 12, 1): datetime(2006, 12, 20), + datetime(2006, 12, 15): datetime(2006, 12, 20)})) + + offset_cases.append((SemiMonthEnd(0), { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 16): datetime(2008, 1, 31), + datetime(2008, 1, 15): datetime(2008, 1, 15), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 15)})) + + offset_cases.append((SemiMonthEnd(0, day_of_month=16), { + datetime(2008, 1, 1): datetime(2008, 1, 16), + datetime(2008, 1, 16): datetime(2008, 1, 16), + datetime(2008, 1, 15): datetime(2008, 1, 16), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 16)})) + + offset_cases.append((SemiMonthEnd(2), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2007, 1, 15), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2007, 1, 16): datetime(2007, 2, 15), + datetime(2006, 11, 1): datetime(2006, 11, 30)})) + + offset_cases.append((SemiMonthEnd(-1), { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 6, 15), + datetime(2008, 12, 31): datetime(2008, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 15), + datetime(2006, 12, 30): datetime(2006, 12, 15), + datetime(2007, 1, 1): datetime(2006, 12, 31)})) + + offset_cases.append((SemiMonthEnd(-1, day_of_month=4), { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2007, 1, 4): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 6, 4), + datetime(2008, 12, 31): datetime(2008, 12, 4), + datetime(2006, 12, 5): datetime(2006, 12, 4), + datetime(2006, 12, 30): datetime(2006, 12, 4), + datetime(2007, 1, 1): datetime(2006, 12, 31)})) + + offset_cases.append((SemiMonthEnd(-2), { + datetime(2007, 1, 1): datetime(2006, 12, 15), + datetime(2008, 6, 30): datetime(2008, 5, 31), + datetime(2008, 3, 15): datetime(2008, 2, 15), + datetime(2008, 12, 31): datetime(2008, 11, 30), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 14): datetime(2006, 11, 15), + datetime(2007, 1, 1): datetime(2006, 12, 15)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + @pytest.mark.parametrize('case', offset_cases) + def test_apply_index(self, case): + offset, cases = case + s = DatetimeIndex(cases.keys()) + result = offset.apply_index(s) + exp = DatetimeIndex(cases.values()) + tm.assert_index_equal(result, exp) + + on_offset_cases = [(datetime(2007, 12, 31), True), + (datetime(2007, 12, 15), True), + (datetime(2007, 12, 14), False), + (datetime(2007, 12, 1), False), + (datetime(2008, 2, 29), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + dt, expected = case + assert_onOffset(SemiMonthEnd(), dt, expected) + + @pytest.mark.parametrize('klass,assert_func', + [(Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) + def test_vectorized_offset_addition(self, klass, assert_func): + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + + result = s + SemiMonthEnd() + result2 = SemiMonthEnd() + s + exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'), + Timestamp('2000-02-01', tz='US/Central')], name='a') + result = s + SemiMonthEnd() + result2 = SemiMonthEnd() + s + exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + +class TestSemiMonthBegin(Base): + _offset = SemiMonthBegin + + def test_offset_whole_year(self): + dates = (datetime(2007, 12, 15), + datetime(2008, 1, 1), + datetime(2008, 1, 15), + datetime(2008, 2, 1), + datetime(2008, 2, 15), + datetime(2008, 3, 1), + datetime(2008, 3, 15), + datetime(2008, 4, 1), + datetime(2008, 4, 15), + datetime(2008, 5, 1), + datetime(2008, 5, 15), + datetime(2008, 6, 1), + datetime(2008, 6, 15), + datetime(2008, 7, 1), + datetime(2008, 7, 15), + datetime(2008, 8, 1), + datetime(2008, 8, 15), + datetime(2008, 9, 1), + datetime(2008, 9, 15), + datetime(2008, 10, 1), + datetime(2008, 10, 15), + datetime(2008, 11, 1), + datetime(2008, 11, 15), + datetime(2008, 12, 1), + datetime(2008, 12, 15)) + + for base, exp_date in zip(dates[:-1], dates[1:]): + assert_offset_equal(SemiMonthBegin(), base, exp_date) + + # ensure .apply_index works as expected + s = DatetimeIndex(dates[:-1]) + result = SemiMonthBegin().apply_index(s) + exp = DatetimeIndex(dates[1:]) + tm.assert_index_equal(result, exp) + + # ensure generating a range with DatetimeIndex gives same result + result = DatetimeIndex(start=dates[0], end=dates[-1], freq='SMS') + exp = DatetimeIndex(dates) + tm.assert_index_equal(result, exp) + + offset_cases = [] + offset_cases.append((SemiMonthBegin(), { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 14): datetime(2006, 12, 15), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 1): datetime(2007, 1, 15), + datetime(2006, 12, 1): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2007, 1, 1)})) + + offset_cases.append((SemiMonthBegin(day_of_month=20), { + datetime(2008, 1, 1): datetime(2008, 1, 20), + datetime(2008, 1, 15): datetime(2008, 1, 20), + datetime(2008, 1, 21): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 14): datetime(2006, 12, 20), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 1): datetime(2007, 1, 20), + datetime(2006, 12, 1): datetime(2006, 12, 20), + datetime(2006, 12, 15): datetime(2006, 12, 20)})) + + offset_cases.append((SemiMonthBegin(0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 16): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 1, 15), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 2): datetime(2006, 12, 15), + datetime(2007, 1, 1): datetime(2007, 1, 1)})) + + offset_cases.append((SemiMonthBegin(0, day_of_month=16), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 16): datetime(2008, 1, 16), + datetime(2008, 1, 15): datetime(2008, 1, 16), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 5): datetime(2007, 1, 16), + datetime(2007, 1, 1): datetime(2007, 1, 1)})) + + offset_cases.append((SemiMonthBegin(2), { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 15), + datetime(2006, 12, 1): datetime(2007, 1, 1), + datetime(2006, 12, 29): datetime(2007, 1, 15), + datetime(2006, 12, 15): datetime(2007, 1, 15), + datetime(2007, 1, 1): datetime(2007, 2, 1), + datetime(2007, 1, 16): datetime(2007, 2, 15), + datetime(2006, 11, 1): datetime(2006, 12, 1)})) + + offset_cases.append((SemiMonthBegin(-1), { + datetime(2007, 1, 1): datetime(2006, 12, 15), + datetime(2008, 6, 30): datetime(2008, 6, 15), + datetime(2008, 6, 14): datetime(2008, 6, 1), + datetime(2008, 12, 31): datetime(2008, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 15)})) + + offset_cases.append((SemiMonthBegin(-1, day_of_month=4), { + datetime(2007, 1, 1): datetime(2006, 12, 4), + datetime(2007, 1, 4): datetime(2007, 1, 1), + datetime(2008, 6, 30): datetime(2008, 6, 4), + datetime(2008, 12, 31): datetime(2008, 12, 4), + datetime(2006, 12, 5): datetime(2006, 12, 4), + datetime(2006, 12, 30): datetime(2006, 12, 4), + datetime(2006, 12, 2): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 4)})) + + offset_cases.append((SemiMonthBegin(-2), { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 6, 30): datetime(2008, 6, 1), + datetime(2008, 6, 14): datetime(2008, 5, 15), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 12, 15): datetime(2006, 11, 15), + datetime(2007, 1, 1): datetime(2006, 12, 1)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + @pytest.mark.parametrize('case', offset_cases) + def test_apply_index(self, case): + offset, cases = case + s = DatetimeIndex(cases.keys()) + result = offset.apply_index(s) + exp = DatetimeIndex(cases.values()) + tm.assert_index_equal(result, exp) + + on_offset_cases = [(datetime(2007, 12, 1), True), + (datetime(2007, 12, 15), True), + (datetime(2007, 12, 14), False), + (datetime(2007, 12, 31), False), + (datetime(2008, 2, 15), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + dt, expected = case + assert_onOffset(SemiMonthBegin(), dt, expected) + + @pytest.mark.parametrize('klass,assert_func', + [(Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) + def test_vectorized_offset_addition(self, klass, assert_func): + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + SemiMonthBegin() + result2 = SemiMonthBegin() + s + exp = klass([Timestamp('2000-02-01 00:15:00', tz='US/Central'), + Timestamp('2000-03-01', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'), + Timestamp('2000-02-01', tz='US/Central')], name='a') + result = s + SemiMonthBegin() + result2 = SemiMonthBegin() + s + exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + +def test_Easter(): + assert_offset_equal(Easter(), datetime(2010, 1, 1), datetime(2010, 4, 4)) + assert_offset_equal(Easter(), datetime(2010, 4, 5), datetime(2011, 4, 24)) + assert_offset_equal(Easter(2), datetime(2010, 1, 1), datetime(2011, 4, 24)) + + assert_offset_equal(Easter(), datetime(2010, 4, 4), datetime(2011, 4, 24)) + assert_offset_equal(Easter(2), datetime(2010, 4, 4), datetime(2012, 4, 8)) + + assert_offset_equal(-Easter(), datetime(2011, 1, 1), datetime(2010, 4, 4)) + assert_offset_equal(-Easter(), datetime(2010, 4, 5), datetime(2010, 4, 4)) + assert_offset_equal(-Easter(2), + datetime(2011, 1, 1), + datetime(2009, 4, 12)) + + assert_offset_equal(-Easter(), datetime(2010, 4, 4), datetime(2009, 4, 12)) + assert_offset_equal(-Easter(2), + datetime(2010, 4, 4), + datetime(2008, 3, 23)) + + +class TestOffsetNames(object): + + def test_get_offset_name(self): + assert BDay().freqstr == 'B' + assert BDay(2).freqstr == '2B' + assert BMonthEnd().freqstr == 'BM' + assert Week(weekday=0).freqstr == 'W-MON' + assert Week(weekday=1).freqstr == 'W-TUE' + assert Week(weekday=2).freqstr == 'W-WED' + assert Week(weekday=3).freqstr == 'W-THU' + assert Week(weekday=4).freqstr == 'W-FRI' + + assert LastWeekOfMonth(weekday=WeekDay.SUN).freqstr == "LWOM-SUN" + + +def test_get_offset(): + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): + get_offset('gibberish') + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): + get_offset('QS-JAN-B') + + pairs = [ + ('B', BDay()), ('b', BDay()), ('bm', BMonthEnd()), + ('Bm', BMonthEnd()), ('W-MON', Week(weekday=0)), + ('W-TUE', Week(weekday=1)), ('W-WED', Week(weekday=2)), + ('W-THU', Week(weekday=3)), ('W-FRI', Week(weekday=4))] + + for name, expected in pairs: + offset = get_offset(name) + assert offset == expected, ("Expected %r to yield %r (actual: %r)" % + (name, expected, offset)) + + +def test_get_offset_legacy(): + pairs = [('w@Sat', Week(weekday=5))] + for name, expected in pairs: + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): + get_offset(name) + + +class TestOffsetAliases(object): + + def setup_method(self, method): + _offset_map.clear() + + def test_alias_equality(self): + for k, v in compat.iteritems(_offset_map): + if v is None: + continue + assert k == v.copy() + + def test_rule_code(self): + lst = ['M', 'MS', 'BM', 'BMS', 'D', 'B', 'H', 'T', 'S', 'L', 'U'] + for k in lst: + assert k == get_offset(k).rule_code + # should be cached - this is kind of an internals test... + assert k in _offset_map + assert k == (get_offset(k) * 3).rule_code + + suffix_lst = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] + base = 'W' + for v in suffix_lst: + alias = '-'.join([base, v]) + assert alias == get_offset(alias).rule_code + assert alias == (get_offset(alias) * 5).rule_code + + suffix_lst = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', + 'SEP', 'OCT', 'NOV', 'DEC'] + base_lst = ['A', 'AS', 'BA', 'BAS', 'Q', 'QS', 'BQ', 'BQS'] + for base in base_lst: + for v in suffix_lst: + alias = '-'.join([base, v]) + assert alias == get_offset(alias).rule_code + assert alias == (get_offset(alias) * 5).rule_code + + lst = ['M', 'D', 'B', 'H', 'T', 'S', 'L', 'U'] + for k in lst: + code, stride = get_freq_code('3' + k) + assert isinstance(code, int) + assert stride == 3 + assert k == get_freq_str(code) + + +def test_dateoffset_misc(): + oset = offsets.DateOffset(months=2, days=4) + # it works + oset.freqstr + + assert (not offsets.DateOffset(months=2) == 2) + + +def test_freq_offsets(): + off = BDay(1, offset=timedelta(0, 1800)) + assert (off.freqstr == 'B+30Min') + + off = BDay(1, offset=timedelta(0, -1800)) + assert (off.freqstr == 'B-30Min') + + +def get_all_subclasses(cls): + ret = set() + this_subclasses = cls.__subclasses__() + ret = ret | set(this_subclasses) + for this_subclass in this_subclasses: + ret | get_all_subclasses(this_subclass) + return ret + + +class TestCaching(object): + + # as of GH 6479 (in 0.14.0), offset caching is turned off + # as of v0.12.0 only BusinessMonth/Quarter were actually caching + + def setup_method(self, method): + _daterange_cache.clear() + _offset_map.clear() + + def run_X_index_creation(self, cls): + inst1 = cls() + if not inst1.isAnchored(): + assert not inst1._should_cache(), cls + return + + assert inst1._should_cache(), cls + + DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 31), + freq=inst1, normalize=True) + assert cls() in _daterange_cache, cls + + def test_should_cache_month_end(self): + assert not MonthEnd()._should_cache() + + def test_should_cache_bmonth_end(self): + assert not BusinessMonthEnd()._should_cache() + + def test_should_cache_week_month(self): + assert not WeekOfMonth(weekday=1, week=2)._should_cache() + + def test_all_cacheableoffsets(self): + for subclass in get_all_subclasses(CacheableOffset): + if subclass.__name__[0] == "_" \ + or subclass in TestCaching.no_simple_ctr: + continue + self.run_X_index_creation(subclass) + + def test_month_end_index_creation(self): + DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 31), + freq=MonthEnd(), normalize=True) + assert not MonthEnd() in _daterange_cache + + def test_bmonth_end_index_creation(self): + DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 29), + freq=BusinessMonthEnd(), normalize=True) + assert not BusinessMonthEnd() in _daterange_cache + + def test_week_of_month_index_creation(self): + inst1 = WeekOfMonth(weekday=1, week=2) + DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 29), + freq=inst1, normalize=True) + inst2 = WeekOfMonth(weekday=1, week=2) + assert inst2 not in _daterange_cache + + +class TestReprNames(object): + + def test_str_for_named_is_name(self): + # look at all the amazing combinations! + month_prefixes = ['A', 'AS', 'BA', 'BAS', 'Q', 'BQ', 'BQS', 'QS'] + names = [prefix + '-' + month + for prefix in month_prefixes + for month in ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', + 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']] + days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] + names += ['W-' + day for day in days] + names += ['WOM-' + week + day + for week in ('1', '2', '3', '4') for day in days] + _offset_map.clear() + for name in names: + offset = get_offset(name) + assert offset.freqstr == name + + +def get_utc_offset_hours(ts): + # take a Timestamp and compute total hours of utc offset + o = ts.utcoffset() + return (o.days * 24 * 3600 + o.seconds) / 3600.0 + + +class TestDST(object): + """ + test DateOffset additions over Daylight Savings Time + """ + # one microsecond before the DST transition + ts_pre_fallback = "2013-11-03 01:59:59.999999" + ts_pre_springfwd = "2013-03-10 01:59:59.999999" + + # test both basic names and dateutil timezones + timezone_utc_offsets = { + 'US/Eastern': dict(utc_offset_daylight=-4, + utc_offset_standard=-5, ), + 'dateutil/US/Pacific': dict(utc_offset_daylight=-7, + utc_offset_standard=-8, ) + } + valid_date_offsets_singular = [ + 'weekday', 'day', 'hour', 'minute', 'second', 'microsecond' + ] + valid_date_offsets_plural = [ + 'weeks', 'days', + 'hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds' + ] + + def _test_all_offsets(self, n, **kwds): + valid_offsets = self.valid_date_offsets_plural if n > 1 \ + else self.valid_date_offsets_singular + + for name in valid_offsets: + self._test_offset(offset_name=name, offset_n=n, **kwds) + + def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): + offset = DateOffset(**{offset_name: offset_n}) + + t = tstart + offset + if expected_utc_offset is not None: + assert get_utc_offset_hours(t) == expected_utc_offset + + if offset_name == 'weeks': + # dates should match + assert t.date() == timedelta(days=7 * offset.kwds[ + 'weeks']) + tstart.date() + # expect the same day of week, hour of day, minute, second, ... + assert (t.dayofweek == tstart.dayofweek and + t.hour == tstart.hour and + t.minute == tstart.minute and + t.second == tstart.second) + elif offset_name == 'days': + # dates should match + assert timedelta(offset.kwds['days']) + tstart.date() == t.date() + # expect the same hour of day, minute, second, ... + assert (t.hour == tstart.hour and + t.minute == tstart.minute and + t.second == tstart.second) + elif offset_name in self.valid_date_offsets_singular: + # expect the singular offset value to match between tstart and t + datepart_offset = getattr(t, offset_name + if offset_name != 'weekday' else + 'dayofweek') + assert datepart_offset == offset.kwds[offset_name] + else: + # the offset should be the same as if it was done in UTC + assert (t == (tstart.tz_convert('UTC') + offset) + .tz_convert('US/Pacific')) + + def _make_timestamp(self, string, hrs_offset, tz): + if hrs_offset >= 0: + offset_string = '{hrs:02d}00'.format(hrs=hrs_offset) + else: + offset_string = '-{hrs:02d}00'.format(hrs=-1 * hrs_offset) + return Timestamp(string + offset_string).tz_convert(tz) + + def test_fallback_plural(self): + # test moving from daylight savings to standard time + import dateutil + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets['utc_offset_daylight'] + hrs_post = utc_offsets['utc_offset_standard'] + + if LooseVersion(dateutil.__version__) < LooseVersion('2.6.0'): + # buggy ambiguous behavior in 2.6.0 + # GH 14621 + # https://github.com/dateutil/dateutil/issues/321 + self._test_all_offsets( + n=3, tstart=self._make_timestamp(self.ts_pre_fallback, + hrs_pre, tz), + expected_utc_offset=hrs_post) + elif LooseVersion(dateutil.__version__) > LooseVersion('2.6.0'): + # fixed, but skip the test + continue + + def test_springforward_plural(self): + # test moving from standard to daylight savings + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets['utc_offset_standard'] + hrs_post = utc_offsets['utc_offset_daylight'] + self._test_all_offsets( + n=3, tstart=self._make_timestamp(self.ts_pre_springfwd, + hrs_pre, tz), + expected_utc_offset=hrs_post) + + def test_fallback_singular(self): + # in the case of singular offsets, we don't necessarily know which utc + # offset the new Timestamp will wind up in (the tz for 1 month may be + # different from 1 second) so we don't specify an expected_utc_offset + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets['utc_offset_standard'] + self._test_all_offsets(n=1, tstart=self._make_timestamp( + self.ts_pre_fallback, hrs_pre, tz), expected_utc_offset=None) + + def test_springforward_singular(self): + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets['utc_offset_standard'] + self._test_all_offsets(n=1, tstart=self._make_timestamp( + self.ts_pre_springfwd, hrs_pre, tz), expected_utc_offset=None) + + offset_classes = {MonthBegin: ['11/2/2012', '12/1/2012'], + MonthEnd: ['11/2/2012', '11/30/2012'], + BMonthBegin: ['11/2/2012', '12/3/2012'], + BMonthEnd: ['11/2/2012', '11/30/2012'], + CBMonthBegin: ['11/2/2012', '12/3/2012'], + CBMonthEnd: ['11/2/2012', '11/30/2012'], + SemiMonthBegin: ['11/2/2012', '11/15/2012'], + SemiMonthEnd: ['11/2/2012', '11/15/2012'], + Week: ['11/2/2012', '11/9/2012'], + YearBegin: ['11/2/2012', '1/1/2013'], + YearEnd: ['11/2/2012', '12/31/2012'], + BYearBegin: ['11/2/2012', '1/1/2013'], + BYearEnd: ['11/2/2012', '12/31/2012'], + QuarterBegin: ['11/2/2012', '12/1/2012'], + QuarterEnd: ['11/2/2012', '12/31/2012'], + BQuarterBegin: ['11/2/2012', '12/3/2012'], + BQuarterEnd: ['11/2/2012', '12/31/2012'], + Day: ['11/4/2012', '11/4/2012 23:00']}.items() + + @pytest.mark.parametrize('tup', offset_classes) + def test_all_offset_classes(self, tup): + offset, test_values = tup + + first = Timestamp(test_values[0], tz='US/Eastern') + offset() + second = Timestamp(test_values[1], tz='US/Eastern') + assert first == second + + +# --------------------------------------------------------------------- +def test_get_offset_day_error(): + # subclass of _BaseOffset must override _day_opt attribute, or we should + # get a NotImplementedError + + with pytest.raises(NotImplementedError): + DateOffset()._get_offset_day(datetime.now()) + + +def test_valid_default_arguments(offset_types): + # GH#19142 check that the calling the constructors without passing + # any keyword arguments produce valid offsets + cls = offset_types + cls() + + +@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds))) +def test_valid_month_attributes(kwd, month_classes): + # GH#18226 + cls = month_classes + # check that we cannot create e.g. MonthEnd(weeks=3) + with pytest.raises(TypeError): + cls(**{kwd: 3}) + + +@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds))) +def test_valid_tick_attributes(kwd, tick_classes): + # GH#18226 + cls = tick_classes + # check that we cannot create e.g. Hour(weeks=3) + with pytest.raises(TypeError): + cls(**{kwd: 3}) + + +def test_validate_n_error(): + with pytest.raises(TypeError): + DateOffset(n='Doh!') + + with pytest.raises(TypeError): + MonthBegin(n=timedelta(1)) + + with pytest.raises(TypeError): + BDay(n=np.array([1, 2], dtype=np.int64)) + + +def test_require_integers(offset_types): + cls = offset_types + with pytest.raises(ValueError): + cls(n=1.5) + + +def test_weeks_onoffset(): + # GH#18510 Week with weekday = None, normalize = False should always + # be onOffset + offset = Week(n=2, weekday=None) + ts = Timestamp('1862-01-13 09:03:34.873477378+0210', tz='Africa/Lusaka') + fast = offset.onOffset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + # negative n + offset = Week(n=2, weekday=None) + ts = Timestamp('1856-10-24 16:18:36.556360110-0717', tz='Pacific/Easter') + fast = offset.onOffset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_weekofmonth_onoffset(): + # GH#18864 + # Make sure that nanoseconds don't trip up onOffset (and with it apply) + offset = WeekOfMonth(n=2, week=2, weekday=0) + ts = Timestamp('1916-05-15 01:14:49.583410462+0422', tz='Asia/Qyzylorda') + fast = offset.onOffset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + # negative n + offset = WeekOfMonth(n=-3, week=1, weekday=0) + ts = Timestamp('1980-12-08 03:38:52.878321185+0500', tz='Asia/Oral') + fast = offset.onOffset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_last_week_of_month_on_offset(): + # GH#19036, GH#18977 _adjust_dst was incorrect for LastWeekOfMonth + offset = LastWeekOfMonth(n=4, weekday=6) + ts = Timestamp('1917-05-27 20:55:27.084284178+0200', + tz='Europe/Warsaw') + slow = (ts + offset) - offset == ts + fast = offset.onOffset(ts) + assert fast == slow + + # negative n + offset = LastWeekOfMonth(n=-4, weekday=5) + ts = Timestamp('2005-08-27 05:01:42.799392561-0500', + tz='America/Rainy_River') + slow = (ts + offset) - offset == ts + fast = offset.onOffset(ts) + assert fast == slow diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py new file mode 100644 index 0000000000000..24033d4ff6cbd --- /dev/null +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- +""" +Tests for offsets.Tick and subclasses +""" +from datetime import datetime, timedelta + +import pytest +import numpy as np + +from pandas import Timedelta, Timestamp +from pandas.tseries import offsets +from pandas.tseries.offsets import Hour, Minute, Second, Milli, Micro, Nano + +from .common import assert_offset_equal + +# --------------------------------------------------------------------- +# Test Helpers + +tick_classes = [Hour, Minute, Second, Milli, Micro, Nano] + + +# --------------------------------------------------------------------- + + +def test_apply_ticks(): + result = offsets.Hour(3).apply(offsets.Hour(4)) + exp = offsets.Hour(7) + assert (result == exp) + + +def test_delta_to_tick(): + delta = timedelta(3) + + tick = offsets._delta_to_tick(delta) + assert (tick == offsets.Day(3)) + + +# --------------------------------------------------------------------- + + +def test_Hour(): + assert_offset_equal(Hour(), + datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) + assert_offset_equal(Hour(-1), + datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) + assert_offset_equal(2 * Hour(), + datetime(2010, 1, 1), datetime(2010, 1, 1, 2)) + assert_offset_equal(-1 * Hour(), + datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) + + assert Hour(3) + Hour(2) == Hour(5) + assert Hour(3) - Hour(2) == Hour() + + assert Hour(4) != Hour(1) + + +def test_Minute(): + assert_offset_equal(Minute(), + datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1)) + assert_offset_equal(Minute(-1), + datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) + assert_offset_equal(2 * Minute(), + datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 2)) + assert_offset_equal(-1 * Minute(), + datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) + + assert Minute(3) + Minute(2) == Minute(5) + assert Minute(3) - Minute(2) == Minute() + assert Minute(5) != Minute() + + +def test_Second(): + assert_offset_equal(Second(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 1)) + assert_offset_equal(Second(-1), + datetime(2010, 1, 1, 0, 0, 1), + datetime(2010, 1, 1)) + assert_offset_equal(2 * Second(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 2)) + assert_offset_equal(-1 * Second(), + datetime(2010, 1, 1, 0, 0, 1), + datetime(2010, 1, 1)) + + assert Second(3) + Second(2) == Second(5) + assert Second(3) - Second(2) == Second() + + +def test_Millisecond(): + assert_offset_equal(Milli(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 1000)) + assert_offset_equal(Milli(-1), + datetime(2010, 1, 1, 0, 0, 0, 1000), + datetime(2010, 1, 1)) + assert_offset_equal(Milli(2), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 2000)) + assert_offset_equal(2 * Milli(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 2000)) + assert_offset_equal(-1 * Milli(), + datetime(2010, 1, 1, 0, 0, 0, 1000), + datetime(2010, 1, 1)) + + assert Milli(3) + Milli(2) == Milli(5) + assert Milli(3) - Milli(2) == Milli() + + +def test_MillisecondTimestampArithmetic(): + assert_offset_equal(Milli(), + Timestamp('2010-01-01'), + Timestamp('2010-01-01 00:00:00.001')) + assert_offset_equal(Milli(-1), + Timestamp('2010-01-01 00:00:00.001'), + Timestamp('2010-01-01')) + + +def test_Microsecond(): + assert_offset_equal(Micro(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 1)) + assert_offset_equal(Micro(-1), + datetime(2010, 1, 1, 0, 0, 0, 1), + datetime(2010, 1, 1)) + + assert_offset_equal(2 * Micro(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 2)) + assert_offset_equal(-1 * Micro(), + datetime(2010, 1, 1, 0, 0, 0, 1), + datetime(2010, 1, 1)) + + assert Micro(3) + Micro(2) == Micro(5) + assert Micro(3) - Micro(2) == Micro() + + +def test_NanosecondGeneric(): + timestamp = Timestamp(datetime(2010, 1, 1)) + assert timestamp.nanosecond == 0 + + result = timestamp + Nano(10) + assert result.nanosecond == 10 + + reverse_result = Nano(10) + timestamp + assert reverse_result.nanosecond == 10 + + +def test_Nanosecond(): + timestamp = Timestamp(datetime(2010, 1, 1)) + assert_offset_equal(Nano(), + timestamp, + timestamp + np.timedelta64(1, 'ns')) + assert_offset_equal(Nano(-1), + timestamp + np.timedelta64(1, 'ns'), + timestamp) + assert_offset_equal(2 * Nano(), + timestamp, + timestamp + np.timedelta64(2, 'ns')) + assert_offset_equal(-1 * Nano(), + timestamp + np.timedelta64(1, 'ns'), + timestamp) + + assert Nano(3) + Nano(2) == Nano(5) + assert Nano(3) - Nano(2) == Nano() + + # GH9284 + assert Nano(1) + Nano(10) == Nano(11) + assert Nano(5) + Micro(1) == Nano(1005) + assert Micro(5) + Nano(1) == Nano(5001) + + +@pytest.mark.parametrize('kls, expected', + [(Hour, Timedelta(hours=5)), + (Minute, Timedelta(hours=2, minutes=3)), + (Second, Timedelta(hours=2, seconds=3)), + (Milli, Timedelta(hours=2, milliseconds=3)), + (Micro, Timedelta(hours=2, microseconds=3)), + (Nano, Timedelta(hours=2, nanoseconds=3))]) +def test_tick_addition(kls, expected): + offset = kls(3) + result = offset + Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + +@pytest.mark.parametrize('cls1', tick_classes) +@pytest.mark.parametrize('cls2', tick_classes) +def test_tick_zero(cls1, cls2): + assert cls1(0) == cls2(0) + assert cls1(0) + cls2(0) == cls1(0) + + if cls1 is not Nano: + assert cls1(2) + cls2(0) == cls1(2) + + if cls1 is Nano: + assert cls1(2) + Nano(0) == cls1(2) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_tick_equalities(cls): + assert cls(3) == cls(3) + assert cls() == cls(1) + + # not equals + assert cls(3) != cls(2) + assert cls(3) != cls(-3) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_tick_operators(cls): + assert cls(3) + cls(2) == cls(5) + assert cls(3) - cls(2) == cls(1) + assert cls(800) + cls(300) == cls(1100) + assert cls(1000) - cls(5) == cls(995) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_tick_offset(cls): + assert not cls().isAnchored() + + +@pytest.mark.parametrize('cls', tick_classes) +def test_compare_ticks(cls): + three = cls(3) + four = cls(4) + + # TODO: WTF? What is this range(10) supposed to do? + for _ in range(10): + assert three < cls(4) + assert cls(3) < four + assert four > cls(3) + assert cls(4) > three + assert cls(3) == cls(3) + assert cls(3) != cls(4) diff --git a/pandas/tests/tseries/offsets/test_yqm_offsets.py b/pandas/tests/tseries/offsets/test_yqm_offsets.py new file mode 100644 index 0000000000000..22b8cf6119d18 --- /dev/null +++ b/pandas/tests/tseries/offsets/test_yqm_offsets.py @@ -0,0 +1,1030 @@ +# -*- coding: utf-8 -*- +""" +Tests for Year, Quarter, and Month-based DateOffset subclasses +""" +from datetime import datetime + +import pytest + +import pandas as pd +from pandas import Timestamp +from pandas import compat + +from pandas.tseries.offsets import (BMonthBegin, BMonthEnd, + MonthBegin, MonthEnd, + YearEnd, YearBegin, BYearEnd, BYearBegin, + QuarterEnd, QuarterBegin, + BQuarterEnd, BQuarterBegin) + +from .test_offsets import Base +from .common import assert_offset_equal, assert_onOffset + + +# -------------------------------------------------------------------- +# Misc + +def test_quarterly_dont_normalize(): + date = datetime(2012, 3, 31, 5, 30) + + offsets = (QuarterBegin, QuarterEnd, BQuarterEnd, BQuarterBegin) + + for klass in offsets: + result = date + klass() + assert (result.time() == date.time()) + + +@pytest.mark.parametrize('n', [-2, 1]) +@pytest.mark.parametrize('cls', [MonthBegin, MonthEnd, + BMonthBegin, BMonthEnd, + QuarterBegin, QuarterEnd, + BQuarterBegin, BQuarterEnd, + YearBegin, YearEnd, + BYearBegin, BYearEnd]) +def test_apply_index(cls, n): + offset = cls(n=n) + rng = pd.date_range(start='1/1/2000', periods=100000, freq='T') + ser = pd.Series(rng) + + res = rng + offset + res_v2 = offset.apply_index(rng) + assert (res == res_v2).all() + assert res[0] == rng[0] + offset + assert res[-1] == rng[-1] + offset + res2 = ser + offset + # apply_index is only for indexes, not series, so no res2_v2 + assert res2.iloc[0] == ser.iloc[0] + offset + assert res2.iloc[-1] == ser.iloc[-1] + offset + + +@pytest.mark.parametrize('offset', [QuarterBegin(), QuarterEnd(), + BQuarterBegin(), BQuarterEnd()]) +def test_on_offset(offset): + dates = [datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31)] + for date in dates: + res = offset.onOffset(date) + slow_version = date == (date + offset) - offset + assert res == slow_version + + +# -------------------------------------------------------------------- +# Months + +class TestMonthBegin(Base): + _offset = MonthBegin + + offset_cases = [] + # NOTE: I'm not entirely happy with the logic here for Begin -ss + # see thread 'offset conventions' on the ML + offset_cases.append((MonthBegin(), { + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 2, 1): datetime(2008, 3, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 12, 1): datetime(2007, 1, 1), + datetime(2007, 1, 31): datetime(2007, 2, 1)})) + + offset_cases.append((MonthBegin(0), { + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2006, 12, 3): datetime(2007, 1, 1), + datetime(2007, 1, 31): datetime(2007, 2, 1)})) + + offset_cases.append((MonthBegin(2), { + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 1, 31): datetime(2008, 3, 1), + datetime(2006, 12, 31): datetime(2007, 2, 1), + datetime(2007, 12, 28): datetime(2008, 2, 1), + datetime(2007, 1, 1): datetime(2007, 3, 1), + datetime(2006, 11, 1): datetime(2007, 1, 1)})) + + offset_cases.append((MonthBegin(-1), { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 5, 31): datetime(2008, 5, 1), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 1, 2): datetime(2006, 1, 1)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + +class TestMonthEnd(Base): + _offset = MonthEnd + + def test_day_of_month(self): + dt = datetime(2007, 1, 1) + offset = MonthEnd() + + result = dt + offset + assert result == Timestamp(2007, 1, 31) + + result = result + offset + assert result == Timestamp(2007, 2, 28) + + def test_normalize(self): + dt = datetime(2007, 1, 1, 3) + + result = dt + MonthEnd(normalize=True) + expected = dt.replace(hour=0) + MonthEnd() + assert result == expected + + offset_cases = [] + offset_cases.append((MonthEnd(), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2006, 12, 1): datetime(2006, 12, 31)})) + + offset_cases.append((MonthEnd(0), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31)})) + + offset_cases.append((MonthEnd(2), { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 3, 31), + datetime(2006, 12, 29): datetime(2007, 1, 31), + datetime(2006, 12, 31): datetime(2007, 2, 28), + datetime(2007, 1, 1): datetime(2007, 2, 28), + datetime(2006, 11, 1): datetime(2006, 12, 31)})) + + offset_cases.append((MonthEnd(-1), { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 5, 31), + datetime(2008, 12, 31): datetime(2008, 11, 30), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 30): datetime(2006, 11, 30), + datetime(2007, 1, 1): datetime(2006, 12, 31)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(MonthEnd(), datetime(2007, 12, 31), True), + (MonthEnd(), datetime(2008, 1, 1), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + +class TestBMonthBegin(Base): + _offset = BMonthBegin + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = BMonthBegin() + offset2 = BMonthBegin() + assert not offset1 != offset2 + + offset_cases = [] + offset_cases.append((BMonthBegin(), { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 9, 1): datetime(2006, 10, 2), + datetime(2007, 1, 1): datetime(2007, 2, 1), + datetime(2006, 12, 1): datetime(2007, 1, 1)})) + + offset_cases.append((BMonthBegin(0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2006, 10, 2): datetime(2006, 10, 2), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 9, 15): datetime(2006, 10, 2)})) + + offset_cases.append((BMonthBegin(2), { + datetime(2008, 1, 1): datetime(2008, 3, 3), + datetime(2008, 1, 15): datetime(2008, 3, 3), + datetime(2006, 12, 29): datetime(2007, 2, 1), + datetime(2006, 12, 31): datetime(2007, 2, 1), + datetime(2007, 1, 1): datetime(2007, 3, 1), + datetime(2006, 11, 1): datetime(2007, 1, 1)})) + + offset_cases.append((BMonthBegin(-1), { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 6, 30): datetime(2008, 6, 2), + datetime(2008, 6, 1): datetime(2008, 5, 1), + datetime(2008, 3, 10): datetime(2008, 3, 3), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 12, 30): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 1)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(BMonthBegin(), datetime(2007, 12, 31), False), + (BMonthBegin(), datetime(2008, 1, 1), True), + (BMonthBegin(), datetime(2001, 4, 2), True), + (BMonthBegin(), datetime(2008, 3, 3), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + +class TestBMonthEnd(Base): + _offset = BMonthEnd + + def test_normalize(self): + dt = datetime(2007, 1, 1, 3) + + result = dt + BMonthEnd(normalize=True) + expected = dt.replace(hour=0) + BMonthEnd() + assert result == expected + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = BMonthEnd() + offset2 = BMonthEnd() + assert not offset1 != offset2 + + offset_cases = [] + offset_cases.append((BMonthEnd(), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2007, 1, 31), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2006, 12, 1): datetime(2006, 12, 29)})) + + offset_cases.append((BMonthEnd(0), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 29), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31)})) + + offset_cases.append((BMonthEnd(2), { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 3, 31), + datetime(2006, 12, 29): datetime(2007, 2, 28), + datetime(2006, 12, 31): datetime(2007, 2, 28), + datetime(2007, 1, 1): datetime(2007, 2, 28), + datetime(2006, 11, 1): datetime(2006, 12, 29)})) + + offset_cases.append((BMonthEnd(-1), { + datetime(2007, 1, 1): datetime(2006, 12, 29), + datetime(2008, 6, 30): datetime(2008, 5, 30), + datetime(2008, 12, 31): datetime(2008, 11, 28), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 30): datetime(2006, 12, 29), + datetime(2007, 1, 1): datetime(2006, 12, 29)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(BMonthEnd(), datetime(2007, 12, 31), True), + (BMonthEnd(), datetime(2008, 1, 1), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + +# -------------------------------------------------------------------- +# Quarters + + +class TestQuarterBegin(Base): + + def test_repr(self): + expected = "" + assert repr(QuarterBegin()) == expected + expected = "" + assert repr(QuarterBegin(startingMonth=3)) == expected + expected = "" + assert repr(QuarterBegin(startingMonth=1)) == expected + + def test_isAnchored(self): + assert QuarterBegin(startingMonth=1).isAnchored() + assert QuarterBegin().isAnchored() + assert not QuarterBegin(2, startingMonth=1).isAnchored() + + def test_offset_corner_case(self): + # corner + offset = QuarterBegin(n=-1, startingMonth=1) + assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1) + + offset_cases = [] + offset_cases.append((QuarterBegin(startingMonth=1), { + datetime(2007, 12, 1): datetime(2008, 1, 1), + datetime(2008, 1, 1): datetime(2008, 4, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2008, 4, 1): datetime(2008, 7, 1)})) + + offset_cases.append((QuarterBegin(startingMonth=2), { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 2, 29): datetime(2008, 5, 1), + datetime(2008, 3, 15): datetime(2008, 5, 1), + datetime(2008, 3, 31): datetime(2008, 5, 1), + datetime(2008, 4, 15): datetime(2008, 5, 1), + datetime(2008, 4, 30): datetime(2008, 5, 1)})) + + offset_cases.append((QuarterBegin(startingMonth=1, n=0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 12, 1): datetime(2009, 1, 1), + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2008, 4, 30): datetime(2008, 7, 1)})) + + offset_cases.append((QuarterBegin(startingMonth=1, n=-1), { + datetime(2008, 1, 1): datetime(2007, 10, 1), + datetime(2008, 1, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 1, 1), + datetime(2008, 2, 29): datetime(2008, 1, 1), + datetime(2008, 3, 15): datetime(2008, 1, 1), + datetime(2008, 3, 31): datetime(2008, 1, 1), + datetime(2008, 4, 15): datetime(2008, 4, 1), + datetime(2008, 4, 30): datetime(2008, 4, 1), + datetime(2008, 7, 1): datetime(2008, 4, 1)})) + + offset_cases.append((QuarterBegin(startingMonth=1, n=2), { + datetime(2008, 1, 1): datetime(2008, 7, 1), + datetime(2008, 2, 15): datetime(2008, 7, 1), + datetime(2008, 2, 29): datetime(2008, 7, 1), + datetime(2008, 3, 15): datetime(2008, 7, 1), + datetime(2008, 3, 31): datetime(2008, 7, 1), + datetime(2008, 4, 15): datetime(2008, 10, 1), + datetime(2008, 4, 1): datetime(2008, 10, 1)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + +class TestQuarterEnd(Base): + _offset = QuarterEnd + + def test_repr(self): + expected = "" + assert repr(QuarterEnd()) == expected + expected = "" + assert repr(QuarterEnd(startingMonth=3)) == expected + expected = "" + assert repr(QuarterEnd(startingMonth=1)) == expected + + def test_isAnchored(self): + assert QuarterEnd(startingMonth=1).isAnchored() + assert QuarterEnd().isAnchored() + assert not QuarterEnd(2, startingMonth=1).isAnchored() + + def test_offset_corner_case(self): + # corner + offset = QuarterEnd(n=-1, startingMonth=1) + assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 31) + + offset_cases = [] + offset_cases.append((QuarterEnd(startingMonth=1), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 4, 30), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 7, 31)})) + + offset_cases.append((QuarterEnd(startingMonth=2), { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2008, 2, 15): datetime(2008, 2, 29), + datetime(2008, 2, 29): datetime(2008, 5, 31), + datetime(2008, 3, 15): datetime(2008, 5, 31), + datetime(2008, 3, 31): datetime(2008, 5, 31), + datetime(2008, 4, 15): datetime(2008, 5, 31), + datetime(2008, 4, 30): datetime(2008, 5, 31)})) + + offset_cases.append((QuarterEnd(startingMonth=1, n=0), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 4, 30)})) + + offset_cases.append((QuarterEnd(startingMonth=1, n=-1), { + datetime(2008, 1, 1): datetime(2007, 10, 31), + datetime(2008, 1, 31): datetime(2007, 10, 31), + datetime(2008, 2, 15): datetime(2008, 1, 31), + datetime(2008, 2, 29): datetime(2008, 1, 31), + datetime(2008, 3, 15): datetime(2008, 1, 31), + datetime(2008, 3, 31): datetime(2008, 1, 31), + datetime(2008, 4, 15): datetime(2008, 1, 31), + datetime(2008, 4, 30): datetime(2008, 1, 31), + datetime(2008, 7, 1): datetime(2008, 4, 30)})) + + offset_cases.append((QuarterEnd(startingMonth=1, n=2), { + datetime(2008, 1, 31): datetime(2008, 7, 31), + datetime(2008, 2, 15): datetime(2008, 7, 31), + datetime(2008, 2, 29): datetime(2008, 7, 31), + datetime(2008, 3, 15): datetime(2008, 7, 31), + datetime(2008, 3, 31): datetime(2008, 7, 31), + datetime(2008, 4, 15): datetime(2008, 7, 31), + datetime(2008, 4, 30): datetime(2008, 10, 31)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True), + (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True), + (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + +class TestBQuarterBegin(Base): + _offset = BQuarterBegin + + def test_repr(self): + expected = "" + assert repr(BQuarterBegin()) == expected + expected = "" + assert repr(BQuarterBegin(startingMonth=3)) == expected + expected = "" + assert repr(BQuarterBegin(startingMonth=1)) == expected + + def test_isAnchored(self): + assert BQuarterBegin(startingMonth=1).isAnchored() + assert BQuarterBegin().isAnchored() + assert not BQuarterBegin(2, startingMonth=1).isAnchored() + + def test_offset_corner_case(self): + # corner + offset = BQuarterBegin(n=-1, startingMonth=1) + assert datetime(2007, 4, 3) + offset == datetime(2007, 4, 2) + + offset_cases = [] + offset_cases.append((BQuarterBegin(startingMonth=1), { + datetime(2008, 1, 1): datetime(2008, 4, 1), + datetime(2008, 1, 31): datetime(2008, 4, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2007, 3, 15): datetime(2007, 4, 2), + datetime(2007, 2, 28): datetime(2007, 4, 2), + datetime(2007, 1, 1): datetime(2007, 4, 2), + datetime(2007, 4, 15): datetime(2007, 7, 2), + datetime(2007, 7, 1): datetime(2007, 7, 2), + datetime(2007, 4, 1): datetime(2007, 4, 2), + datetime(2007, 4, 2): datetime(2007, 7, 2), + datetime(2008, 4, 30): datetime(2008, 7, 1)})) + + offset_cases.append((BQuarterBegin(startingMonth=2), { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 2, 29): datetime(2008, 5, 1), + datetime(2008, 3, 15): datetime(2008, 5, 1), + datetime(2008, 3, 31): datetime(2008, 5, 1), + datetime(2008, 4, 15): datetime(2008, 5, 1), + datetime(2008, 8, 15): datetime(2008, 11, 3), + datetime(2008, 9, 15): datetime(2008, 11, 3), + datetime(2008, 11, 1): datetime(2008, 11, 3), + datetime(2008, 4, 30): datetime(2008, 5, 1)})) + + offset_cases.append((BQuarterBegin(startingMonth=1, n=0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2007, 12, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 1, 15): datetime(2008, 4, 1), + datetime(2008, 2, 27): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2007, 4, 1): datetime(2007, 4, 2), + datetime(2007, 4, 2): datetime(2007, 4, 2), + datetime(2007, 7, 1): datetime(2007, 7, 2), + datetime(2007, 4, 15): datetime(2007, 7, 2), + datetime(2007, 7, 2): datetime(2007, 7, 2)})) + + offset_cases.append((BQuarterBegin(startingMonth=1, n=-1), { + datetime(2008, 1, 1): datetime(2007, 10, 1), + datetime(2008, 1, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 1, 1), + datetime(2008, 2, 29): datetime(2008, 1, 1), + datetime(2008, 3, 15): datetime(2008, 1, 1), + datetime(2008, 3, 31): datetime(2008, 1, 1), + datetime(2008, 4, 15): datetime(2008, 4, 1), + datetime(2007, 7, 3): datetime(2007, 7, 2), + datetime(2007, 4, 3): datetime(2007, 4, 2), + datetime(2007, 7, 2): datetime(2007, 4, 2), + datetime(2008, 4, 1): datetime(2008, 1, 1)})) + + offset_cases.append((BQuarterBegin(startingMonth=1, n=2), { + datetime(2008, 1, 1): datetime(2008, 7, 1), + datetime(2008, 1, 15): datetime(2008, 7, 1), + datetime(2008, 2, 29): datetime(2008, 7, 1), + datetime(2008, 3, 15): datetime(2008, 7, 1), + datetime(2007, 3, 31): datetime(2007, 7, 2), + datetime(2007, 4, 15): datetime(2007, 10, 1), + datetime(2008, 4, 30): datetime(2008, 10, 1)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + +class TestBQuarterEnd(Base): + _offset = BQuarterEnd + + def test_repr(self): + expected = "" + assert repr(BQuarterEnd()) == expected + expected = "" + assert repr(BQuarterEnd(startingMonth=3)) == expected + expected = "" + assert repr(BQuarterEnd(startingMonth=1)) == expected + + def test_isAnchored(self): + assert BQuarterEnd(startingMonth=1).isAnchored() + assert BQuarterEnd().isAnchored() + assert not BQuarterEnd(2, startingMonth=1).isAnchored() + + def test_offset_corner_case(self): + # corner + offset = BQuarterEnd(n=-1, startingMonth=1) + assert datetime(2010, 1, 31) + offset == datetime(2010, 1, 29) + + offset_cases = [] + offset_cases.append((BQuarterEnd(startingMonth=1), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 4, 30), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 7, 31)})) + + offset_cases.append((BQuarterEnd(startingMonth=2), { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2008, 2, 15): datetime(2008, 2, 29), + datetime(2008, 2, 29): datetime(2008, 5, 30), + datetime(2008, 3, 15): datetime(2008, 5, 30), + datetime(2008, 3, 31): datetime(2008, 5, 30), + datetime(2008, 4, 15): datetime(2008, 5, 30), + datetime(2008, 4, 30): datetime(2008, 5, 30)})) + + offset_cases.append((BQuarterEnd(startingMonth=1, n=0), { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 4, 30)})) + + offset_cases.append((BQuarterEnd(startingMonth=1, n=-1), { + datetime(2008, 1, 1): datetime(2007, 10, 31), + datetime(2008, 1, 31): datetime(2007, 10, 31), + datetime(2008, 2, 15): datetime(2008, 1, 31), + datetime(2008, 2, 29): datetime(2008, 1, 31), + datetime(2008, 3, 15): datetime(2008, 1, 31), + datetime(2008, 3, 31): datetime(2008, 1, 31), + datetime(2008, 4, 15): datetime(2008, 1, 31), + datetime(2008, 4, 30): datetime(2008, 1, 31)})) + + offset_cases.append((BQuarterEnd(startingMonth=1, n=2), { + datetime(2008, 1, 31): datetime(2008, 7, 31), + datetime(2008, 2, 15): datetime(2008, 7, 31), + datetime(2008, 2, 29): datetime(2008, 7, 31), + datetime(2008, 3, 15): datetime(2008, 7, 31), + datetime(2008, 3, 31): datetime(2008, 7, 31), + datetime(2008, 4, 15): datetime(2008, 7, 31), + datetime(2008, 4, 30): datetime(2008, 10, 31)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + +# -------------------------------------------------------------------- +# Years + + +class TestYearBegin(Base): + _offset = YearBegin + + def test_misspecified(self): + pytest.raises(ValueError, YearBegin, month=13) + + offset_cases = [] + offset_cases.append((YearBegin(), { + datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 1), + datetime(2005, 12, 31): datetime(2006, 1, 1)})) + + offset_cases.append((YearBegin(0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 1), + datetime(2005, 12, 31): datetime(2006, 1, 1)})) + + offset_cases.append((YearBegin(3), { + datetime(2008, 1, 1): datetime(2011, 1, 1), + datetime(2008, 6, 30): datetime(2011, 1, 1), + datetime(2008, 12, 31): datetime(2011, 1, 1), + datetime(2005, 12, 30): datetime(2008, 1, 1), + datetime(2005, 12, 31): datetime(2008, 1, 1)})) + + offset_cases.append((YearBegin(-1), { + datetime(2007, 1, 1): datetime(2006, 1, 1), + datetime(2007, 1, 15): datetime(2007, 1, 1), + datetime(2008, 6, 30): datetime(2008, 1, 1), + datetime(2008, 12, 31): datetime(2008, 1, 1), + datetime(2006, 12, 29): datetime(2006, 1, 1), + datetime(2006, 12, 30): datetime(2006, 1, 1), + datetime(2007, 1, 1): datetime(2006, 1, 1)})) + + offset_cases.append((YearBegin(-2), { + datetime(2007, 1, 1): datetime(2005, 1, 1), + datetime(2008, 6, 30): datetime(2007, 1, 1), + datetime(2008, 12, 31): datetime(2007, 1, 1)})) + + offset_cases.append((YearBegin(month=4), { + datetime(2007, 4, 1): datetime(2008, 4, 1), + datetime(2007, 4, 15): datetime(2008, 4, 1), + datetime(2007, 3, 1): datetime(2007, 4, 1), + datetime(2007, 12, 15): datetime(2008, 4, 1), + datetime(2012, 1, 31): datetime(2012, 4, 1)})) + + offset_cases.append((YearBegin(0, month=4), { + datetime(2007, 4, 1): datetime(2007, 4, 1), + datetime(2007, 3, 1): datetime(2007, 4, 1), + datetime(2007, 12, 15): datetime(2008, 4, 1), + datetime(2012, 1, 31): datetime(2012, 4, 1)})) + + offset_cases.append((YearBegin(4, month=4), { + datetime(2007, 4, 1): datetime(2011, 4, 1), + datetime(2007, 4, 15): datetime(2011, 4, 1), + datetime(2007, 3, 1): datetime(2010, 4, 1), + datetime(2007, 12, 15): datetime(2011, 4, 1), + datetime(2012, 1, 31): datetime(2015, 4, 1)})) + + offset_cases.append((YearBegin(-1, month=4), { + datetime(2007, 4, 1): datetime(2006, 4, 1), + datetime(2007, 3, 1): datetime(2006, 4, 1), + datetime(2007, 12, 15): datetime(2007, 4, 1), + datetime(2012, 1, 31): datetime(2011, 4, 1)})) + + offset_cases.append((YearBegin(-3, month=4), { + datetime(2007, 4, 1): datetime(2004, 4, 1), + datetime(2007, 3, 1): datetime(2004, 4, 1), + datetime(2007, 12, 15): datetime(2005, 4, 1), + datetime(2012, 1, 31): datetime(2009, 4, 1)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(YearBegin(), datetime(2007, 1, 3), False), + (YearBegin(), datetime(2008, 1, 1), True), + (YearBegin(), datetime(2006, 12, 31), False), + (YearBegin(), datetime(2006, 1, 2), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + +class TestYearEnd(Base): + _offset = YearEnd + + def test_misspecified(self): + pytest.raises(ValueError, YearEnd, month=13) + + offset_cases = [] + offset_cases.append((YearEnd(), { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2009, 12, 31), + datetime(2005, 12, 30): datetime(2005, 12, 31), + datetime(2005, 12, 31): datetime(2006, 12, 31)})) + + offset_cases.append((YearEnd(0), { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2008, 12, 31), + datetime(2005, 12, 30): datetime(2005, 12, 31)})) + + offset_cases.append((YearEnd(-1), { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2007, 12, 31), + datetime(2008, 12, 31): datetime(2007, 12, 31), + datetime(2006, 12, 29): datetime(2005, 12, 31), + datetime(2006, 12, 30): datetime(2005, 12, 31), + datetime(2007, 1, 1): datetime(2006, 12, 31)})) + + offset_cases.append((YearEnd(-2), { + datetime(2007, 1, 1): datetime(2005, 12, 31), + datetime(2008, 6, 30): datetime(2006, 12, 31), + datetime(2008, 12, 31): datetime(2006, 12, 31)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(YearEnd(), datetime(2007, 12, 31), True), + (YearEnd(), datetime(2008, 1, 1), False), + (YearEnd(), datetime(2006, 12, 31), True), + (YearEnd(), datetime(2006, 12, 29), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + +class TestYearEndDiffMonth(Base): + offset_cases = [] + offset_cases.append((YearEnd(month=3), + {datetime(2008, 1, 1): datetime(2008, 3, 31), + datetime(2008, 2, 15): datetime(2008, 3, 31), + datetime(2008, 3, 31): datetime(2009, 3, 31), + datetime(2008, 3, 30): datetime(2008, 3, 31), + datetime(2005, 3, 31): datetime(2006, 3, 31), + datetime(2006, 7, 30): datetime(2007, 3, 31)})) + + offset_cases.append((YearEnd(0, month=3), + {datetime(2008, 1, 1): datetime(2008, 3, 31), + datetime(2008, 2, 28): datetime(2008, 3, 31), + datetime(2008, 3, 31): datetime(2008, 3, 31), + datetime(2005, 3, 30): datetime(2005, 3, 31)})) + + offset_cases.append((YearEnd(-1, month=3), + {datetime(2007, 1, 1): datetime(2006, 3, 31), + datetime(2008, 2, 28): datetime(2007, 3, 31), + datetime(2008, 3, 31): datetime(2007, 3, 31), + datetime(2006, 3, 29): datetime(2005, 3, 31), + datetime(2006, 3, 30): datetime(2005, 3, 31), + datetime(2007, 3, 1): datetime(2006, 3, 31)})) + + offset_cases.append((YearEnd(-2, month=3), + {datetime(2007, 1, 1): datetime(2005, 3, 31), + datetime(2008, 6, 30): datetime(2007, 3, 31), + datetime(2008, 3, 31): datetime(2006, 3, 31)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(YearEnd(month=3), datetime(2007, 3, 31), True), + (YearEnd(month=3), datetime(2008, 1, 1), False), + (YearEnd(month=3), datetime(2006, 3, 31), True), + (YearEnd(month=3), datetime(2006, 3, 29), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + +class TestBYearBegin(Base): + _offset = BYearBegin + + def test_misspecified(self): + pytest.raises(ValueError, BYearBegin, month=13) + pytest.raises(ValueError, BYearEnd, month=13) + + offset_cases = [] + offset_cases.append((BYearBegin(), { + datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2011, 1, 1): datetime(2011, 1, 3), + datetime(2011, 1, 3): datetime(2012, 1, 2), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2)})) + + offset_cases.append((BYearBegin(0), { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2)})) + + offset_cases.append((BYearBegin(-1), { + datetime(2007, 1, 1): datetime(2006, 1, 2), + datetime(2009, 1, 4): datetime(2009, 1, 1), + datetime(2009, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2008, 1, 1), + datetime(2008, 12, 31): datetime(2008, 1, 1), + datetime(2006, 12, 29): datetime(2006, 1, 2), + datetime(2006, 12, 30): datetime(2006, 1, 2), + datetime(2006, 1, 1): datetime(2005, 1, 3)})) + + offset_cases.append((BYearBegin(-2), { + datetime(2007, 1, 1): datetime(2005, 1, 3), + datetime(2007, 6, 30): datetime(2006, 1, 2), + datetime(2008, 12, 31): datetime(2007, 1, 1)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + +class TestBYearEnd(Base): + _offset = BYearEnd + + offset_cases = [] + offset_cases.append((BYearEnd(), { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2009, 12, 31), + datetime(2005, 12, 30): datetime(2006, 12, 29), + datetime(2005, 12, 31): datetime(2006, 12, 29)})) + + offset_cases.append((BYearEnd(0), { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2008, 12, 31), + datetime(2005, 12, 31): datetime(2006, 12, 29)})) + + offset_cases.append((BYearEnd(-1), { + datetime(2007, 1, 1): datetime(2006, 12, 29), + datetime(2008, 6, 30): datetime(2007, 12, 31), + datetime(2008, 12, 31): datetime(2007, 12, 31), + datetime(2006, 12, 29): datetime(2005, 12, 30), + datetime(2006, 12, 30): datetime(2006, 12, 29), + datetime(2007, 1, 1): datetime(2006, 12, 29)})) + + offset_cases.append((BYearEnd(-2), { + datetime(2007, 1, 1): datetime(2005, 12, 30), + datetime(2008, 6, 30): datetime(2006, 12, 29), + datetime(2008, 12, 31): datetime(2006, 12, 29)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(BYearEnd(), datetime(2007, 12, 31), True), + (BYearEnd(), datetime(2008, 1, 1), False), + (BYearEnd(), datetime(2006, 12, 31), False), + (BYearEnd(), datetime(2006, 12, 29), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) + + +class TestBYearEndLagged(Base): + _offset = BYearEnd + + def test_bad_month_fail(self): + pytest.raises(Exception, BYearEnd, month=13) + pytest.raises(Exception, BYearEnd, month=0) + + offset_cases = [] + offset_cases.append((BYearEnd(month=6), { + datetime(2008, 1, 1): datetime(2008, 6, 30), + datetime(2007, 6, 30): datetime(2008, 6, 30)})) + + offset_cases.append((BYearEnd(n=-1, month=6), { + datetime(2008, 1, 1): datetime(2007, 6, 29), + datetime(2007, 6, 30): datetime(2007, 6, 29)})) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_roll(self): + offset = BYearEnd(month=6) + date = datetime(2009, 11, 30) + + assert offset.rollforward(date) == datetime(2010, 6, 30) + assert offset.rollback(date) == datetime(2009, 6, 30) + + on_offset_cases = [(BYearEnd(month=2), datetime(2007, 2, 28), True), + (BYearEnd(month=6), datetime(2007, 6, 30), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) diff --git a/pandas/tests/tseries/test_converter.py b/pandas/tests/tseries/test_converter.py deleted file mode 100644 index 5351e26f0e62b..0000000000000 --- a/pandas/tests/tseries/test_converter.py +++ /dev/null @@ -1,199 +0,0 @@ -import pytest -from datetime import datetime, date - -import numpy as np -from pandas import Timestamp, Period, Index -from pandas.compat import u -import pandas.util.testing as tm -from pandas.tseries.offsets import Second, Milli, Micro, Day -from pandas.compat.numpy import np_datetime64_compat - -converter = pytest.importorskip('pandas.tseries.converter') - - -def test_timtetonum_accepts_unicode(): - assert (converter.time2num("00:01") == converter.time2num(u("00:01"))) - - -class TestDateTimeConverter(tm.TestCase): - - def setUp(self): - self.dtc = converter.DatetimeConverter() - self.tc = converter.TimeFormatter(None) - - def test_convert_accepts_unicode(self): - r1 = self.dtc.convert("12:22", None, None) - r2 = self.dtc.convert(u("12:22"), None, None) - assert (r1 == r2), "DatetimeConverter.convert should accept unicode" - - def test_conversion(self): - rs = self.dtc.convert(['2012-1-1'], None, None)[0] - xp = datetime(2012, 1, 1).toordinal() - self.assertEqual(rs, xp) - - rs = self.dtc.convert('2012-1-1', None, None) - self.assertEqual(rs, xp) - - rs = self.dtc.convert(date(2012, 1, 1), None, None) - self.assertEqual(rs, xp) - - rs = self.dtc.convert(datetime(2012, 1, 1).toordinal(), None, None) - self.assertEqual(rs, xp) - - rs = self.dtc.convert('2012-1-1', None, None) - self.assertEqual(rs, xp) - - rs = self.dtc.convert(Timestamp('2012-1-1'), None, None) - self.assertEqual(rs, xp) - - # also testing datetime64 dtype (GH8614) - rs = self.dtc.convert(np_datetime64_compat('2012-01-01'), None, None) - self.assertEqual(rs, xp) - - rs = self.dtc.convert(np_datetime64_compat( - '2012-01-01 00:00:00+0000'), None, None) - self.assertEqual(rs, xp) - - rs = self.dtc.convert(np.array([ - np_datetime64_compat('2012-01-01 00:00:00+0000'), - np_datetime64_compat('2012-01-02 00:00:00+0000')]), None, None) - self.assertEqual(rs[0], xp) - - # we have a tz-aware date (constructed to that when we turn to utc it - # is the same as our sample) - ts = (Timestamp('2012-01-01') - .tz_localize('UTC') - .tz_convert('US/Eastern') - ) - rs = self.dtc.convert(ts, None, None) - self.assertEqual(rs, xp) - - rs = self.dtc.convert(ts.to_pydatetime(), None, None) - self.assertEqual(rs, xp) - - rs = self.dtc.convert(Index([ts - Day(1), ts]), None, None) - self.assertEqual(rs[1], xp) - - rs = self.dtc.convert(Index([ts - Day(1), ts]).to_pydatetime(), - None, None) - self.assertEqual(rs[1], xp) - - def test_conversion_float(self): - decimals = 9 - - rs = self.dtc.convert( - Timestamp('2012-1-1 01:02:03', tz='UTC'), None, None) - xp = converter.dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC')) - tm.assert_almost_equal(rs, xp, decimals) - - rs = self.dtc.convert( - Timestamp('2012-1-1 09:02:03', tz='Asia/Hong_Kong'), None, None) - tm.assert_almost_equal(rs, xp, decimals) - - rs = self.dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None) - tm.assert_almost_equal(rs, xp, decimals) - - def test_conversion_outofbounds_datetime(self): - # 2579 - values = [date(1677, 1, 1), date(1677, 1, 2)] - rs = self.dtc.convert(values, None, None) - xp = converter.dates.date2num(values) - tm.assert_numpy_array_equal(rs, xp) - rs = self.dtc.convert(values[0], None, None) - xp = converter.dates.date2num(values[0]) - self.assertEqual(rs, xp) - - values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] - rs = self.dtc.convert(values, None, None) - xp = converter.dates.date2num(values) - tm.assert_numpy_array_equal(rs, xp) - rs = self.dtc.convert(values[0], None, None) - xp = converter.dates.date2num(values[0]) - self.assertEqual(rs, xp) - - def test_time_formatter(self): - self.tc(90000) - - def test_dateindex_conversion(self): - decimals = 9 - - for freq in ('B', 'L', 'S'): - dateindex = tm.makeDateIndex(k=10, freq=freq) - rs = self.dtc.convert(dateindex, None, None) - xp = converter.dates.date2num(dateindex._mpl_repr()) - tm.assert_almost_equal(rs, xp, decimals) - - def test_resolution(self): - def _assert_less(ts1, ts2): - val1 = self.dtc.convert(ts1, None, None) - val2 = self.dtc.convert(ts2, None, None) - if not val1 < val2: - raise AssertionError('{0} is not less than {1}.'.format(val1, - val2)) - - # Matplotlib's time representation using floats cannot distinguish - # intervals smaller than ~10 microsecond in the common range of years. - ts = Timestamp('2012-1-1') - _assert_less(ts, ts + Second()) - _assert_less(ts, ts + Milli()) - _assert_less(ts, ts + Micro(50)) - - -class TestPeriodConverter(tm.TestCase): - - def setUp(self): - self.pc = converter.PeriodConverter() - - class Axis(object): - pass - - self.axis = Axis() - self.axis.freq = 'D' - - def test_convert_accepts_unicode(self): - r1 = self.pc.convert("2012-1-1", None, self.axis) - r2 = self.pc.convert(u("2012-1-1"), None, self.axis) - self.assert_equal(r1, r2, - "PeriodConverter.convert should accept unicode") - - def test_conversion(self): - rs = self.pc.convert(['2012-1-1'], None, self.axis)[0] - xp = Period('2012-1-1').ordinal - self.assertEqual(rs, xp) - - rs = self.pc.convert('2012-1-1', None, self.axis) - self.assertEqual(rs, xp) - - rs = self.pc.convert([date(2012, 1, 1)], None, self.axis)[0] - self.assertEqual(rs, xp) - - rs = self.pc.convert(date(2012, 1, 1), None, self.axis) - self.assertEqual(rs, xp) - - rs = self.pc.convert([Timestamp('2012-1-1')], None, self.axis)[0] - self.assertEqual(rs, xp) - - rs = self.pc.convert(Timestamp('2012-1-1'), None, self.axis) - self.assertEqual(rs, xp) - - # FIXME - # rs = self.pc.convert( - # np_datetime64_compat('2012-01-01'), None, self.axis) - # self.assertEqual(rs, xp) - # - # rs = self.pc.convert( - # np_datetime64_compat('2012-01-01 00:00:00+0000'), - # None, self.axis) - # self.assertEqual(rs, xp) - # - # rs = self.pc.convert(np.array([ - # np_datetime64_compat('2012-01-01 00:00:00+0000'), - # np_datetime64_compat('2012-01-02 00:00:00+0000')]), - # None, self.axis) - # self.assertEqual(rs[0], xp) - - def test_integer_passthrough(self): - # GH9012 - rs = self.pc.convert([0, 1], None, self.axis) - xp = [0, 1] - self.assertEqual(rs, xp) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 5fbef465ca8fc..92d7eb15c929c 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -1,16 +1,21 @@ from datetime import datetime, timedelta from pandas.compat import range +import pytest import numpy as np from pandas import (Index, DatetimeIndex, Timestamp, Series, date_range, period_range) +from pandas._libs.tslibs.frequencies import (_period_code_map, + _INVALID_FREQ_ERROR) +from pandas._libs.tslibs.ccalendar import MONTHS +from pandas._libs.tslibs import resolution import pandas.tseries.frequencies as frequencies -from pandas.tseries.tools import to_datetime +from pandas.core.tools.datetimes import to_datetime import pandas.tseries.offsets as offsets -from pandas.tseries.period import PeriodIndex +from pandas.core.indexes.period import PeriodIndex import pandas.compat as compat from pandas.compat import is_platform_windows @@ -18,7 +23,7 @@ from pandas import Timedelta -class TestToOffset(tm.TestCase): +class TestToOffset(object): def test_to_offset_multiple(self): freqstr = '2h30min' @@ -100,7 +105,8 @@ def test_to_offset_multiple(self): assert (result == expected) # malformed - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 2h20m'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 2h20m'): frequencies.to_offset('2h20m') def test_to_offset_negative(self): @@ -122,17 +128,23 @@ def test_to_offset_negative(self): def test_to_offset_invalid(self): # GH 13930 - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: U1'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: U1'): frequencies.to_offset('U1') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: -U'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: -U'): frequencies.to_offset('-U') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 3U1'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 3U1'): frequencies.to_offset('3U1') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: -2-3U'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: -2-3U'): frequencies.to_offset('-2-3U') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: -2D:3H'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: -2D:3H'): frequencies.to_offset('-2D:3H') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 1.5.0S'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 1.5.0S'): frequencies.to_offset('1.5.0S') # split offsets with spaces are valid @@ -145,10 +157,11 @@ def test_to_offset_invalid(self): # special cases assert frequencies.to_offset('2SMS-15') == offsets.SemiMonthBegin(2) - with tm.assertRaisesRegexp(ValueError, - 'Invalid frequency: 2SMS-15-15'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 2SMS-15-15'): frequencies.to_offset('2SMS-15-15') - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 2SMS-15D'): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: 2SMS-15D'): frequencies.to_offset('2SMS-15D') def test_to_offset_leading_zero(self): @@ -160,6 +173,19 @@ def test_to_offset_leading_zero(self): result = frequencies.to_offset(freqstr) assert (result.n == -194) + def test_to_offset_leading_plus(self): + freqstr = '+1d' + result = frequencies.to_offset(freqstr) + assert (result.n == 1) + + freqstr = '+2h30min' + result = frequencies.to_offset(freqstr) + assert (result.n == 150) + + for bad_freq in ['+-1d', '-+1h', '+1', '-7', '+d', '-m']: + with tm.assert_raises_regex(ValueError, 'Invalid frequency:'): + frequencies.to_offset(bad_freq) + def test_to_offset_pd_timedelta(self): # Tests for #9064 td = Timedelta(days=1, seconds=1) @@ -198,7 +224,7 @@ def test_to_offset_pd_timedelta(self): assert (expected == result) td = Timedelta(microseconds=0) - tm.assertRaises(ValueError, lambda: frequencies.to_offset(td)) + pytest.raises(ValueError, lambda: frequencies.to_offset(td)) def test_anchored_shortcuts(self): result = frequencies.to_offset('W') @@ -239,11 +265,13 @@ def test_anchored_shortcuts(self): # ensure invalid cases fail as expected invalid_anchors = ['SM-0', 'SM-28', 'SM-29', - 'SM-FOO', 'BSM', 'SM--1' + 'SM-FOO', 'BSM', 'SM--1', 'SMS-1', 'SMS-28', 'SMS-30', - 'SMS-BAR', 'BSMS', 'SMS--2'] + 'SMS-BAR', 'SMS-BYR' 'BSMS', + 'SMS--2'] for invalid_anchor in invalid_anchors: - with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: '): + with tm.assert_raises_regex(ValueError, + 'Invalid frequency: '): frequencies.to_offset(invalid_anchor) @@ -259,259 +287,225 @@ def test_rule_aliases(): assert rule == offsets.Micro(10) -def test_get_rule_month(): - result = frequencies._get_rule_month('W') - assert (result == 'DEC') - result = frequencies._get_rule_month(offsets.Week()) - assert (result == 'DEC') - - result = frequencies._get_rule_month('D') - assert (result == 'DEC') - result = frequencies._get_rule_month(offsets.Day()) - assert (result == 'DEC') - - result = frequencies._get_rule_month('Q') - assert (result == 'DEC') - result = frequencies._get_rule_month(offsets.QuarterEnd(startingMonth=12)) - print(result == 'DEC') - - result = frequencies._get_rule_month('Q-JAN') - assert (result == 'JAN') - result = frequencies._get_rule_month(offsets.QuarterEnd(startingMonth=1)) - assert (result == 'JAN') - - result = frequencies._get_rule_month('A-DEC') - assert (result == 'DEC') - result = frequencies._get_rule_month(offsets.YearEnd()) - assert (result == 'DEC') - - result = frequencies._get_rule_month('A-MAY') - assert (result == 'MAY') - result = frequencies._get_rule_month(offsets.YearEnd(month=5)) - assert (result == 'MAY') - - -def test_period_str_to_code(): - assert (frequencies._period_str_to_code('A') == 1000) - assert (frequencies._period_str_to_code('A-DEC') == 1000) - assert (frequencies._period_str_to_code('A-JAN') == 1001) - assert (frequencies._period_str_to_code('Q') == 2000) - assert (frequencies._period_str_to_code('Q-DEC') == 2000) - assert (frequencies._period_str_to_code('Q-FEB') == 2002) - - def _assert_depr(freq, expected, aliases): - assert isinstance(aliases, list) - assert (frequencies._period_str_to_code(freq) == expected) - - msg = frequencies._INVALID_FREQ_ERROR - for alias in aliases: - with tm.assertRaisesRegexp(ValueError, msg): - frequencies._period_str_to_code(alias) - - _assert_depr("M", 3000, ["MTH", "MONTH", "MONTHLY"]) - - assert (frequencies._period_str_to_code('W') == 4000) - assert (frequencies._period_str_to_code('W-SUN') == 4000) - assert (frequencies._period_str_to_code('W-FRI') == 4005) - - _assert_depr("B", 5000, ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY"]) - _assert_depr("D", 6000, ["DAY", "DLY", "DAILY"]) - _assert_depr("H", 7000, ["HR", "HOUR", "HRLY", "HOURLY"]) - - _assert_depr("T", 8000, ["minute", "MINUTE", "MINUTELY"]) - assert (frequencies._period_str_to_code('Min') == 8000) - - _assert_depr("S", 9000, ["sec", "SEC", "SECOND", "SECONDLY"]) - _assert_depr("L", 10000, ["MILLISECOND", "MILLISECONDLY"]) - assert (frequencies._period_str_to_code('ms') == 10000) - - _assert_depr("U", 11000, ["MICROSECOND", "MICROSECONDLY"]) - assert (frequencies._period_str_to_code('US') == 11000) - - _assert_depr("N", 12000, ["NANOSECOND", "NANOSECONDLY"]) - assert (frequencies._period_str_to_code('NS') == 12000) - - -class TestFrequencyCode(tm.TestCase): +class TestFrequencyCode(object): def test_freq_code(self): - self.assertEqual(frequencies.get_freq('A'), 1000) - self.assertEqual(frequencies.get_freq('3A'), 1000) - self.assertEqual(frequencies.get_freq('-1A'), 1000) + assert frequencies.get_freq('A') == 1000 + assert frequencies.get_freq('3A') == 1000 + assert frequencies.get_freq('-1A') == 1000 + + assert frequencies.get_freq('Y') == 1000 + assert frequencies.get_freq('3Y') == 1000 + assert frequencies.get_freq('-1Y') == 1000 - self.assertEqual(frequencies.get_freq('W'), 4000) - self.assertEqual(frequencies.get_freq('W-MON'), 4001) - self.assertEqual(frequencies.get_freq('W-FRI'), 4005) + assert frequencies.get_freq('W') == 4000 + assert frequencies.get_freq('W-MON') == 4001 + assert frequencies.get_freq('W-FRI') == 4005 - for freqstr, code in compat.iteritems(frequencies._period_code_map): + for freqstr, code in compat.iteritems(_period_code_map): result = frequencies.get_freq(freqstr) - self.assertEqual(result, code) + assert result == code - result = frequencies.get_freq_group(freqstr) - self.assertEqual(result, code // 1000 * 1000) + result = resolution.get_freq_group(freqstr) + assert result == code // 1000 * 1000 - result = frequencies.get_freq_group(code) - self.assertEqual(result, code // 1000 * 1000) + result = resolution.get_freq_group(code) + assert result == code // 1000 * 1000 def test_freq_group(self): - self.assertEqual(frequencies.get_freq_group('A'), 1000) - self.assertEqual(frequencies.get_freq_group('3A'), 1000) - self.assertEqual(frequencies.get_freq_group('-1A'), 1000) - self.assertEqual(frequencies.get_freq_group('A-JAN'), 1000) - self.assertEqual(frequencies.get_freq_group('A-MAY'), 1000) - self.assertEqual(frequencies.get_freq_group(offsets.YearEnd()), 1000) - self.assertEqual(frequencies.get_freq_group( - offsets.YearEnd(month=1)), 1000) - self.assertEqual(frequencies.get_freq_group( - offsets.YearEnd(month=5)), 1000) - - self.assertEqual(frequencies.get_freq_group('W'), 4000) - self.assertEqual(frequencies.get_freq_group('W-MON'), 4000) - self.assertEqual(frequencies.get_freq_group('W-FRI'), 4000) - self.assertEqual(frequencies.get_freq_group(offsets.Week()), 4000) - self.assertEqual(frequencies.get_freq_group( - offsets.Week(weekday=1)), 4000) - self.assertEqual(frequencies.get_freq_group( - offsets.Week(weekday=5)), 4000) + assert resolution.get_freq_group('A') == 1000 + assert resolution.get_freq_group('3A') == 1000 + assert resolution.get_freq_group('-1A') == 1000 + assert resolution.get_freq_group('A-JAN') == 1000 + assert resolution.get_freq_group('A-MAY') == 1000 + + assert resolution.get_freq_group('Y') == 1000 + assert resolution.get_freq_group('3Y') == 1000 + assert resolution.get_freq_group('-1Y') == 1000 + assert resolution.get_freq_group('Y-JAN') == 1000 + assert resolution.get_freq_group('Y-MAY') == 1000 + + assert resolution.get_freq_group(offsets.YearEnd()) == 1000 + assert resolution.get_freq_group(offsets.YearEnd(month=1)) == 1000 + assert resolution.get_freq_group(offsets.YearEnd(month=5)) == 1000 + + assert resolution.get_freq_group('W') == 4000 + assert resolution.get_freq_group('W-MON') == 4000 + assert resolution.get_freq_group('W-FRI') == 4000 + assert resolution.get_freq_group(offsets.Week()) == 4000 + assert resolution.get_freq_group(offsets.Week(weekday=1)) == 4000 + assert resolution.get_freq_group(offsets.Week(weekday=5)) == 4000 def test_get_to_timestamp_base(self): tsb = frequencies.get_to_timestamp_base - self.assertEqual(tsb(frequencies.get_freq_code('D')[0]), - frequencies.get_freq_code('D')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('W')[0]), - frequencies.get_freq_code('D')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('M')[0]), - frequencies.get_freq_code('D')[0]) + assert (tsb(frequencies.get_freq_code('D')[0]) == + frequencies.get_freq_code('D')[0]) + assert (tsb(frequencies.get_freq_code('W')[0]) == + frequencies.get_freq_code('D')[0]) + assert (tsb(frequencies.get_freq_code('M')[0]) == + frequencies.get_freq_code('D')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('S')[0]), - frequencies.get_freq_code('S')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('T')[0]), - frequencies.get_freq_code('S')[0]) - self.assertEqual(tsb(frequencies.get_freq_code('H')[0]), - frequencies.get_freq_code('S')[0]) + assert (tsb(frequencies.get_freq_code('S')[0]) == + frequencies.get_freq_code('S')[0]) + assert (tsb(frequencies.get_freq_code('T')[0]) == + frequencies.get_freq_code('S')[0]) + assert (tsb(frequencies.get_freq_code('H')[0]) == + frequencies.get_freq_code('S')[0]) def test_freq_to_reso(self): Reso = frequencies.Resolution - self.assertEqual(Reso.get_str_from_freq('A'), 'year') - self.assertEqual(Reso.get_str_from_freq('Q'), 'quarter') - self.assertEqual(Reso.get_str_from_freq('M'), 'month') - self.assertEqual(Reso.get_str_from_freq('D'), 'day') - self.assertEqual(Reso.get_str_from_freq('H'), 'hour') - self.assertEqual(Reso.get_str_from_freq('T'), 'minute') - self.assertEqual(Reso.get_str_from_freq('S'), 'second') - self.assertEqual(Reso.get_str_from_freq('L'), 'millisecond') - self.assertEqual(Reso.get_str_from_freq('U'), 'microsecond') - self.assertEqual(Reso.get_str_from_freq('N'), 'nanosecond') + assert Reso.get_str_from_freq('A') == 'year' + assert Reso.get_str_from_freq('Q') == 'quarter' + assert Reso.get_str_from_freq('M') == 'month' + assert Reso.get_str_from_freq('D') == 'day' + assert Reso.get_str_from_freq('H') == 'hour' + assert Reso.get_str_from_freq('T') == 'minute' + assert Reso.get_str_from_freq('S') == 'second' + assert Reso.get_str_from_freq('L') == 'millisecond' + assert Reso.get_str_from_freq('U') == 'microsecond' + assert Reso.get_str_from_freq('N') == 'nanosecond' for freq in ['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U', 'N']: # check roundtrip result = Reso.get_freq(Reso.get_str_from_freq(freq)) - self.assertEqual(freq, result) + assert freq == result for freq in ['D', 'H', 'T', 'S', 'L', 'U']: result = Reso.get_freq(Reso.get_str(Reso.get_reso_from_freq(freq))) - self.assertEqual(freq, result) + assert freq == result def test_resolution_bumping(self): - # GH 14378 + # see gh-14378 Reso = frequencies.Resolution - self.assertEqual(Reso.get_stride_from_decimal(1.5, 'T'), (90, 'S')) - self.assertEqual(Reso.get_stride_from_decimal(62.4, 'T'), (3744, 'S')) - self.assertEqual(Reso.get_stride_from_decimal(1.04, 'H'), (3744, 'S')) - self.assertEqual(Reso.get_stride_from_decimal(1, 'D'), (1, 'D')) - self.assertEqual(Reso.get_stride_from_decimal(0.342931, 'H'), - (1234551600, 'U')) - self.assertEqual(Reso.get_stride_from_decimal(1.2345, 'D'), - (106660800, 'L')) + assert Reso.get_stride_from_decimal(1.5, 'T') == (90, 'S') + assert Reso.get_stride_from_decimal(62.4, 'T') == (3744, 'S') + assert Reso.get_stride_from_decimal(1.04, 'H') == (3744, 'S') + assert Reso.get_stride_from_decimal(1, 'D') == (1, 'D') + assert (Reso.get_stride_from_decimal(0.342931, 'H') == + (1234551600, 'U')) + assert Reso.get_stride_from_decimal(1.2345, 'D') == (106660800, 'L') - with self.assertRaises(ValueError): + with pytest.raises(ValueError): Reso.get_stride_from_decimal(0.5, 'N') # too much precision in the input can prevent - with self.assertRaises(ValueError): + with pytest.raises(ValueError): Reso.get_stride_from_decimal(0.3429324798798269273987982, 'H') def test_get_freq_code(self): - # freqstr - self.assertEqual(frequencies.get_freq_code('A'), - (frequencies.get_freq('A'), 1)) - self.assertEqual(frequencies.get_freq_code('3D'), - (frequencies.get_freq('D'), 3)) - self.assertEqual(frequencies.get_freq_code('-2M'), - (frequencies.get_freq('M'), -2)) + # frequency str + assert (frequencies.get_freq_code('A') == + (frequencies.get_freq('A'), 1)) + assert (frequencies.get_freq_code('3D') == + (frequencies.get_freq('D'), 3)) + assert (frequencies.get_freq_code('-2M') == + (frequencies.get_freq('M'), -2)) # tuple - self.assertEqual(frequencies.get_freq_code(('D', 1)), - (frequencies.get_freq('D'), 1)) - self.assertEqual(frequencies.get_freq_code(('A', 3)), - (frequencies.get_freq('A'), 3)) - self.assertEqual(frequencies.get_freq_code(('M', -2)), - (frequencies.get_freq('M'), -2)) + assert (frequencies.get_freq_code(('D', 1)) == + (frequencies.get_freq('D'), 1)) + assert (frequencies.get_freq_code(('A', 3)) == + (frequencies.get_freq('A'), 3)) + assert (frequencies.get_freq_code(('M', -2)) == + (frequencies.get_freq('M'), -2)) + # numeric tuple - self.assertEqual(frequencies.get_freq_code((1000, 1)), (1000, 1)) + assert frequencies.get_freq_code((1000, 1)) == (1000, 1) # offsets - self.assertEqual(frequencies.get_freq_code(offsets.Day()), - (frequencies.get_freq('D'), 1)) - self.assertEqual(frequencies.get_freq_code(offsets.Day(3)), - (frequencies.get_freq('D'), 3)) - self.assertEqual(frequencies.get_freq_code(offsets.Day(-2)), - (frequencies.get_freq('D'), -2)) - - self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd()), - (frequencies.get_freq('M'), 1)) - self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(3)), - (frequencies.get_freq('M'), 3)) - self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(-2)), - (frequencies.get_freq('M'), -2)) - - self.assertEqual(frequencies.get_freq_code(offsets.Week()), - (frequencies.get_freq('W'), 1)) - self.assertEqual(frequencies.get_freq_code(offsets.Week(3)), - (frequencies.get_freq('W'), 3)) - self.assertEqual(frequencies.get_freq_code(offsets.Week(-2)), - (frequencies.get_freq('W'), -2)) - - # monday is weekday=0 - self.assertEqual(frequencies.get_freq_code(offsets.Week(weekday=1)), - (frequencies.get_freq('W-TUE'), 1)) - self.assertEqual(frequencies.get_freq_code(offsets.Week(3, weekday=0)), - (frequencies.get_freq('W-MON'), 3)) - self.assertEqual( - frequencies.get_freq_code(offsets.Week(-2, weekday=4)), - (frequencies.get_freq('W-FRI'), -2)) + assert (frequencies.get_freq_code(offsets.Day()) == + (frequencies.get_freq('D'), 1)) + assert (frequencies.get_freq_code(offsets.Day(3)) == + (frequencies.get_freq('D'), 3)) + assert (frequencies.get_freq_code(offsets.Day(-2)) == + (frequencies.get_freq('D'), -2)) + + assert (frequencies.get_freq_code(offsets.MonthEnd()) == + (frequencies.get_freq('M'), 1)) + assert (frequencies.get_freq_code(offsets.MonthEnd(3)) == + (frequencies.get_freq('M'), 3)) + assert (frequencies.get_freq_code(offsets.MonthEnd(-2)) == + (frequencies.get_freq('M'), -2)) + + assert (frequencies.get_freq_code(offsets.Week()) == + (frequencies.get_freq('W'), 1)) + assert (frequencies.get_freq_code(offsets.Week(3)) == + (frequencies.get_freq('W'), 3)) + assert (frequencies.get_freq_code(offsets.Week(-2)) == + (frequencies.get_freq('W'), -2)) + + # Monday is weekday=0 + assert (frequencies.get_freq_code(offsets.Week(weekday=1)) == + (frequencies.get_freq('W-TUE'), 1)) + assert (frequencies.get_freq_code(offsets.Week(3, weekday=0)) == + (frequencies.get_freq('W-MON'), 3)) + assert (frequencies.get_freq_code(offsets.Week(-2, weekday=4)) == + (frequencies.get_freq('W-FRI'), -2)) + + def test_frequency_misc(self): + assert (resolution.get_freq_group('T') == + frequencies.FreqGroup.FR_MIN) + + code, stride = frequencies.get_freq_code(offsets.Hour()) + assert code == frequencies.FreqGroup.FR_HR + + code, stride = frequencies.get_freq_code((5, 'T')) + assert code == frequencies.FreqGroup.FR_MIN + assert stride == 5 + + offset = offsets.Hour() + result = frequencies.to_offset(offset) + assert result == offset + + result = frequencies.to_offset((5, 'T')) + expected = offsets.Minute(5) + assert result == expected + + with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + frequencies.get_freq_code((5, 'baz')) + + with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + frequencies.to_offset('100foo') + + with tm.assert_raises_regex(ValueError, 'Could not evaluate'): + frequencies.to_offset(('', '')) _dti = DatetimeIndex -class TestFrequencyInference(tm.TestCase): +class TestFrequencyInference(object): def test_raise_if_period_index(self): index = PeriodIndex(start="1/1/1990", periods=20, freq="M") - self.assertRaises(TypeError, frequencies.infer_freq, index) + pytest.raises(TypeError, frequencies.infer_freq, index) def test_raise_if_too_few(self): index = _dti(['12/31/1998', '1/3/1999']) - self.assertRaises(ValueError, frequencies.infer_freq, index) + pytest.raises(ValueError, frequencies.infer_freq, index) def test_business_daily(self): + index = _dti(['01/01/1999', '1/4/1999', '1/5/1999']) + assert frequencies.infer_freq(index) == 'B' + + def test_business_daily_look_alike(self): + # GH 16624, do not infer 'B' when 'weekend' (2-day gap) in wrong place index = _dti(['12/31/1998', '1/3/1999', '1/4/1999']) - self.assertEqual(frequencies.infer_freq(index), 'B') + assert frequencies.infer_freq(index) is None def test_day(self): self._check_tick(timedelta(1), 'D') def test_day_corner(self): index = _dti(['1/1/2000', '1/2/2000', '1/3/2000']) - self.assertEqual(frequencies.infer_freq(index), 'D') + assert frequencies.infer_freq(index) == 'D' def test_non_datetimeindex(self): dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000']) - self.assertEqual(frequencies.infer_freq(dates), 'D') + assert frequencies.infer_freq(dates) == 'D' def test_hour(self): self._check_tick(timedelta(hours=1), 'H') @@ -540,16 +534,16 @@ def _check_tick(self, base_delta, code): exp_freq = '%d%s' % (i, code) else: exp_freq = code - self.assertEqual(frequencies.infer_freq(index), exp_freq) + assert frequencies.infer_freq(index) == exp_freq index = _dti([b + base_delta * 7] + [b + base_delta * j for j in range( 3)]) - self.assertIsNone(frequencies.infer_freq(index)) + assert frequencies.infer_freq(index) is None index = _dti([b + base_delta * j for j in range(3)] + [b + base_delta * 7]) - self.assertIsNone(frequencies.infer_freq(index)) + assert frequencies.infer_freq(index) is None def test_weekly(self): days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] @@ -567,7 +561,7 @@ def test_week_of_month(self): def test_fifth_week_of_month(self): # Only supports freq up to WOM-4. See #9425 func = lambda: date_range('2014-01-01', freq='WOM-5MON') - self.assertRaises(ValueError, func) + pytest.raises(ValueError, func) def test_fifth_week_of_month_infer(self): # Only attempts to infer up to WOM-4. See #9425 @@ -585,7 +579,7 @@ def test_monthly(self): def test_monthly_ambiguous(self): rng = _dti(['1/31/2000', '2/29/2000', '3/31/2000']) - self.assertEqual(rng.inferred_freq, 'M') + assert rng.inferred_freq == 'M' def test_business_monthly(self): self._check_generated_range('1/1/2000', 'BM') @@ -607,7 +601,7 @@ def test_business_annual(self): def test_annual_ambiguous(self): rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002']) - self.assertEqual(rng.inferred_freq, 'A-JAN') + assert rng.inferred_freq == 'A-JAN' def _check_generated_range(self, start, freq): freq = freq.upper() @@ -615,41 +609,45 @@ def _check_generated_range(self, start, freq): gen = date_range(start, periods=7, freq=freq) index = _dti(gen.values) if not freq.startswith('Q-'): - self.assertEqual(frequencies.infer_freq(index), gen.freqstr) + assert frequencies.infer_freq(index) == gen.freqstr else: inf_freq = frequencies.infer_freq(index) - self.assertTrue((inf_freq == 'Q-DEC' and gen.freqstr in ( - 'Q', 'Q-DEC', 'Q-SEP', 'Q-JUN', 'Q-MAR')) or ( - inf_freq == 'Q-NOV' and gen.freqstr in ( - 'Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB')) or ( - inf_freq == 'Q-OCT' and gen.freqstr in ( - 'Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN'))) + is_dec_range = inf_freq == 'Q-DEC' and gen.freqstr in ( + 'Q', 'Q-DEC', 'Q-SEP', 'Q-JUN', 'Q-MAR') + is_nov_range = inf_freq == 'Q-NOV' and gen.freqstr in ( + 'Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB') + is_oct_range = inf_freq == 'Q-OCT' and gen.freqstr in ( + 'Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN') + assert is_dec_range or is_nov_range or is_oct_range gen = date_range(start, periods=5, freq=freq) index = _dti(gen.values) + if not freq.startswith('Q-'): - self.assertEqual(frequencies.infer_freq(index), gen.freqstr) + assert frequencies.infer_freq(index) == gen.freqstr else: inf_freq = frequencies.infer_freq(index) - self.assertTrue((inf_freq == 'Q-DEC' and gen.freqstr in ( - 'Q', 'Q-DEC', 'Q-SEP', 'Q-JUN', 'Q-MAR')) or ( - inf_freq == 'Q-NOV' and gen.freqstr in ( - 'Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB')) or ( - inf_freq == 'Q-OCT' and gen.freqstr in ( - 'Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN'))) + is_dec_range = inf_freq == 'Q-DEC' and gen.freqstr in ( + 'Q', 'Q-DEC', 'Q-SEP', 'Q-JUN', 'Q-MAR') + is_nov_range = inf_freq == 'Q-NOV' and gen.freqstr in ( + 'Q-NOV', 'Q-AUG', 'Q-MAY', 'Q-FEB') + is_oct_range = inf_freq == 'Q-OCT' and gen.freqstr in ( + 'Q-OCT', 'Q-JUL', 'Q-APR', 'Q-JAN') + + assert is_dec_range or is_nov_range or is_oct_range def test_infer_freq(self): rng = period_range('1959Q2', '2009Q3', freq='Q') - rng = Index(rng.to_timestamp('D', how='e').asobject) - self.assertEqual(rng.inferred_freq, 'Q-DEC') + rng = Index(rng.to_timestamp('D', how='e').astype(object)) + assert rng.inferred_freq == 'Q-DEC' rng = period_range('1959Q2', '2009Q3', freq='Q-NOV') - rng = Index(rng.to_timestamp('D', how='e').asobject) - self.assertEqual(rng.inferred_freq, 'Q-NOV') + rng = Index(rng.to_timestamp('D', how='e').astype(object)) + assert rng.inferred_freq == 'Q-NOV' rng = period_range('1959Q2', '2009Q3', freq='Q-OCT') - rng = Index(rng.to_timestamp('D', how='e').asobject) - self.assertEqual(rng.inferred_freq, 'Q-OCT') + rng = Index(rng.to_timestamp('D', how='e').astype(object)) + assert rng.inferred_freq == 'Q-OCT' def test_infer_freq_tz(self): @@ -669,7 +667,7 @@ def test_infer_freq_tz(self): 'US/Pacific', 'US/Eastern']: for expected, dates in compat.iteritems(freqs): idx = DatetimeIndex(dates, tz=tz) - self.assertEqual(idx.inferred_freq, expected) + assert idx.inferred_freq == expected def test_infer_freq_tz_transition(self): # Tests for #8772 @@ -685,11 +683,11 @@ def test_infer_freq_tz_transition(self): for freq in freqs: idx = date_range(date_pair[0], date_pair[ 1], freq=freq, tz=tz) - self.assertEqual(idx.inferred_freq, freq) + assert idx.inferred_freq == freq index = date_range("2013-11-03", periods=5, freq="3H").tz_localize("America/Chicago") - self.assertIsNone(index.inferred_freq) + assert index.inferred_freq is None def test_infer_freq_businesshour(self): # GH 7905 @@ -697,21 +695,21 @@ def test_infer_freq_businesshour(self): ['2014-07-01 09:00', '2014-07-01 10:00', '2014-07-01 11:00', '2014-07-01 12:00', '2014-07-01 13:00', '2014-07-01 14:00']) # hourly freq in a day must result in 'H' - self.assertEqual(idx.inferred_freq, 'H') + assert idx.inferred_freq == 'H' idx = DatetimeIndex( ['2014-07-01 09:00', '2014-07-01 10:00', '2014-07-01 11:00', '2014-07-01 12:00', '2014-07-01 13:00', '2014-07-01 14:00', '2014-07-01 15:00', '2014-07-01 16:00', '2014-07-02 09:00', '2014-07-02 10:00', '2014-07-02 11:00']) - self.assertEqual(idx.inferred_freq, 'BH') + assert idx.inferred_freq == 'BH' idx = DatetimeIndex( ['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00', '2014-07-04 12:00', '2014-07-04 13:00', '2014-07-04 14:00', '2014-07-04 15:00', '2014-07-04 16:00', '2014-07-07 09:00', '2014-07-07 10:00', '2014-07-07 11:00']) - self.assertEqual(idx.inferred_freq, 'BH') + assert idx.inferred_freq == 'BH' idx = DatetimeIndex( ['2014-07-04 09:00', '2014-07-04 10:00', '2014-07-04 11:00', @@ -722,12 +720,12 @@ def test_infer_freq_businesshour(self): '2014-07-07 16:00', '2014-07-08 09:00', '2014-07-08 10:00', '2014-07-08 11:00', '2014-07-08 12:00', '2014-07-08 13:00', '2014-07-08 14:00', '2014-07-08 15:00', '2014-07-08 16:00']) - self.assertEqual(idx.inferred_freq, 'BH') + assert idx.inferred_freq == 'BH' def test_not_monotonic(self): rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002']) rng = rng[::-1] - self.assertEqual(rng.inferred_freq, '-1A-JAN') + assert rng.inferred_freq == '-1A-JAN' def test_non_datetimeindex2(self): rng = _dti(['1/31/2000', '1/31/2001', '1/31/2002']) @@ -735,21 +733,20 @@ def test_non_datetimeindex2(self): vals = rng.to_pydatetime() result = frequencies.infer_freq(vals) - self.assertEqual(result, rng.inferred_freq) + assert result == rng.inferred_freq def test_invalid_index_types(self): # test all index types for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]: - self.assertRaises(TypeError, lambda: frequencies.infer_freq(i)) + pytest.raises(TypeError, lambda: frequencies.infer_freq(i)) # GH 10822 # odd error message on conversions to datetime for unicode if not is_platform_windows(): for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]: - self.assertRaises(ValueError, - lambda: frequencies.infer_freq(i)) + pytest.raises(ValueError, lambda: frequencies.infer_freq(i)) def test_string_datetimelike_compat(self): @@ -758,7 +755,7 @@ def test_string_datetimelike_compat(self): '2004-04']) result = frequencies.infer_freq(Index(['2004-01', '2004-02', '2004-03', '2004-04'])) - self.assertEqual(result, expected) + assert result == expected def test_series(self): @@ -767,79 +764,43 @@ def test_series(self): # invalid type of Series for s in [Series(np.arange(10)), Series(np.arange(10.))]: - self.assertRaises(TypeError, lambda: frequencies.infer_freq(s)) + pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) # a non-convertible string - self.assertRaises(ValueError, - lambda: frequencies.infer_freq( - Series(['foo', 'bar']))) + pytest.raises(ValueError, lambda: frequencies.infer_freq( + Series(['foo', 'bar']))) # cannot infer on PeriodIndex for freq in [None, 'L']: s = Series(period_range('2013', periods=10, freq=freq)) - self.assertRaises(TypeError, lambda: frequencies.infer_freq(s)) - for freq in ['Y']: - - msg = frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): - s = Series(period_range('2013', periods=10, freq=freq)) - self.assertRaises(TypeError, lambda: frequencies.infer_freq(s)) + pytest.raises(TypeError, lambda: frequencies.infer_freq(s)) # DateTimeIndex for freq in ['M', 'L', 'S']: s = Series(date_range('20130101', periods=10, freq=freq)) inferred = frequencies.infer_freq(s) - self.assertEqual(inferred, freq) + assert inferred == freq s = Series(date_range('20130101', '20130110')) inferred = frequencies.infer_freq(s) - self.assertEqual(inferred, 'D') + assert inferred == 'D' def test_legacy_offset_warnings(self): freqs = ['WEEKDAY', 'EOM', 'W@MON', 'W@TUE', 'W@WED', 'W@THU', 'W@FRI', 'W@SAT', 'W@SUN', 'Q@JAN', 'Q@FEB', 'Q@MAR', 'A@JAN', 'A@FEB', 'A@MAR', 'A@APR', 'A@MAY', 'A@JUN', 'A@JUL', 'A@AUG', 'A@SEP', 'A@OCT', 'A@NOV', 'A@DEC', - 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', 'WOM@4MON', - 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', 'WOM@4TUE', - 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', 'WOM@4WED', - 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', 'WOM@4THU' - 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', 'WOM@4FRI'] - - msg = frequencies._INVALID_FREQ_ERROR + 'Y@JAN', 'WOM@1MON', 'WOM@2MON', 'WOM@3MON', + 'WOM@4MON', 'WOM@1TUE', 'WOM@2TUE', 'WOM@3TUE', + 'WOM@4TUE', 'WOM@1WED', 'WOM@2WED', 'WOM@3WED', + 'WOM@4WED', 'WOM@1THU', 'WOM@2THU', 'WOM@3THU', + 'WOM@4THU', 'WOM@1FRI', 'WOM@2FRI', 'WOM@3FRI', + 'WOM@4FRI'] + + msg = _INVALID_FREQ_ERROR for freq in freqs: - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): frequencies.get_offset(freq) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): date_range('2011-01-01', periods=5, freq=freq) - - -MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', - 'NOV', 'DEC'] - - -def test_is_superperiod_subperiod(): - - # input validation - assert not (frequencies.is_superperiod(offsets.YearEnd(), None)) - assert not (frequencies.is_subperiod(offsets.MonthEnd(), None)) - assert not (frequencies.is_superperiod(None, offsets.YearEnd())) - assert not (frequencies.is_subperiod(None, offsets.MonthEnd())) - assert not (frequencies.is_superperiod(None, None)) - assert not (frequencies.is_subperiod(None, None)) - - assert (frequencies.is_superperiod(offsets.YearEnd(), offsets.MonthEnd())) - assert (frequencies.is_subperiod(offsets.MonthEnd(), offsets.YearEnd())) - - assert (frequencies.is_superperiod(offsets.Hour(), offsets.Minute())) - assert (frequencies.is_subperiod(offsets.Minute(), offsets.Hour())) - - assert (frequencies.is_superperiod(offsets.Second(), offsets.Milli())) - assert (frequencies.is_subperiod(offsets.Milli(), offsets.Second())) - - assert (frequencies.is_superperiod(offsets.Milli(), offsets.Micro())) - assert (frequencies.is_subperiod(offsets.Micro(), offsets.Milli())) - - assert (frequencies.is_superperiod(offsets.Micro(), offsets.Nano())) - assert (frequencies.is_subperiod(offsets.Nano(), offsets.Micro())) diff --git a/pandas/tests/tseries/test_holiday.py b/pandas/tests/tseries/test_holiday.py index 2adf28a506c53..3ea7e5b8620f2 100644 --- a/pandas/tests/tseries/test_holiday.py +++ b/pandas/tests/tseries/test_holiday.py @@ -1,3 +1,5 @@ +import pytest + from datetime import datetime import pandas.util.testing as tm from pandas import compat @@ -17,9 +19,9 @@ from pytz import utc -class TestCalendar(tm.TestCase): +class TestCalendar(object): - def setUp(self): + def setup_method(self, method): self.holiday_list = [ datetime(2012, 1, 2), datetime(2012, 1, 16), @@ -47,9 +49,9 @@ def test_calendar(self): Timestamp(self.start_date), Timestamp(self.end_date)) - self.assertEqual(list(holidays.to_pydatetime()), self.holiday_list) - self.assertEqual(list(holidays_1.to_pydatetime()), self.holiday_list) - self.assertEqual(list(holidays_2.to_pydatetime()), self.holiday_list) + assert list(holidays.to_pydatetime()) == self.holiday_list + assert list(holidays_1.to_pydatetime()) == self.holiday_list + assert list(holidays_2.to_pydatetime()) == self.holiday_list def test_calendar_caching(self): # Test for issue #9552 @@ -80,28 +82,22 @@ def test_calendar_observance_dates(self): def test_rule_from_name(self): USFedCal = get_calendar('USFederalHolidayCalendar') - self.assertEqual(USFedCal.rule_from_name( - 'Thanksgiving'), USThanksgivingDay) + assert USFedCal.rule_from_name('Thanksgiving') == USThanksgivingDay -class TestHoliday(tm.TestCase): +class TestHoliday(object): - def setUp(self): + def setup_method(self, method): self.start_date = datetime(2011, 1, 1) self.end_date = datetime(2020, 12, 31) def check_results(self, holiday, start, end, expected): - self.assertEqual(list(holiday.dates(start, end)), expected) + assert list(holiday.dates(start, end)) == expected + # Verify that timezone info is preserved. - self.assertEqual( - list( - holiday.dates( - utc.localize(Timestamp(start)), - utc.localize(Timestamp(end)), - ) - ), - [utc.localize(dt) for dt in expected], - ) + assert (list(holiday.dates(utc.localize(Timestamp(start)), + utc.localize(Timestamp(end)))) == + [utc.localize(dt) for dt in expected]) def test_usmemorialday(self): self.check_results(holiday=USMemorialDay, @@ -232,7 +228,7 @@ def test_holidays_within_dates(self): for rule, dates in compat.iteritems(holidays): empty_dates = rule.dates(start_date, end_date) - self.assertEqual(empty_dates.tolist(), []) + assert empty_dates.tolist() == [] if isinstance(dates, tuple): dates = [dates] @@ -253,8 +249,8 @@ def test_argument_types(self): Timestamp(self.start_date), Timestamp(self.end_date)) - self.assert_index_equal(holidays, holidays_1) - self.assert_index_equal(holidays, holidays_2) + tm.assert_index_equal(holidays, holidays_1) + tm.assert_index_equal(holidays, holidays_2) def test_special_holidays(self): base_date = [datetime(2012, 5, 28)] @@ -264,17 +260,15 @@ def test_special_holidays(self): end_date=datetime(2012, 12, 31), offset=DateOffset(weekday=MO(1))) - self.assertEqual(base_date, - holiday_1.dates(self.start_date, self.end_date)) - self.assertEqual(base_date, - holiday_2.dates(self.start_date, self.end_date)) + assert base_date == holiday_1.dates(self.start_date, self.end_date) + assert base_date == holiday_2.dates(self.start_date, self.end_date) def test_get_calendar(self): class TestCalendar(AbstractHolidayCalendar): rules = [] calendar = get_calendar('TestCalendar') - self.assertEqual(TestCalendar, calendar.__class__) + assert TestCalendar == calendar.__class__ def test_factory(self): class_1 = HolidayCalendarFactory('MemorialDay', @@ -285,14 +279,14 @@ def test_factory(self): USThanksgivingDay) class_3 = HolidayCalendarFactory('Combined', class_1, class_2) - self.assertEqual(len(class_1.rules), 1) - self.assertEqual(len(class_2.rules), 1) - self.assertEqual(len(class_3.rules), 2) + assert len(class_1.rules) == 1 + assert len(class_2.rules) == 1 + assert len(class_3.rules) == 2 -class TestObservanceRules(tm.TestCase): +class TestObservanceRules(object): - def setUp(self): + def setup_method(self, method): self.we = datetime(2014, 4, 9) self.th = datetime(2014, 4, 10) self.fr = datetime(2014, 4, 11) @@ -302,64 +296,65 @@ def setUp(self): self.tu = datetime(2014, 4, 15) def test_next_monday(self): - self.assertEqual(next_monday(self.sa), self.mo) - self.assertEqual(next_monday(self.su), self.mo) + assert next_monday(self.sa) == self.mo + assert next_monday(self.su) == self.mo def test_next_monday_or_tuesday(self): - self.assertEqual(next_monday_or_tuesday(self.sa), self.mo) - self.assertEqual(next_monday_or_tuesday(self.su), self.tu) - self.assertEqual(next_monday_or_tuesday(self.mo), self.tu) + assert next_monday_or_tuesday(self.sa) == self.mo + assert next_monday_or_tuesday(self.su) == self.tu + assert next_monday_or_tuesday(self.mo) == self.tu def test_previous_friday(self): - self.assertEqual(previous_friday(self.sa), self.fr) - self.assertEqual(previous_friday(self.su), self.fr) + assert previous_friday(self.sa) == self.fr + assert previous_friday(self.su) == self.fr def test_sunday_to_monday(self): - self.assertEqual(sunday_to_monday(self.su), self.mo) + assert sunday_to_monday(self.su) == self.mo def test_nearest_workday(self): - self.assertEqual(nearest_workday(self.sa), self.fr) - self.assertEqual(nearest_workday(self.su), self.mo) - self.assertEqual(nearest_workday(self.mo), self.mo) + assert nearest_workday(self.sa) == self.fr + assert nearest_workday(self.su) == self.mo + assert nearest_workday(self.mo) == self.mo def test_weekend_to_monday(self): - self.assertEqual(weekend_to_monday(self.sa), self.mo) - self.assertEqual(weekend_to_monday(self.su), self.mo) - self.assertEqual(weekend_to_monday(self.mo), self.mo) + assert weekend_to_monday(self.sa) == self.mo + assert weekend_to_monday(self.su) == self.mo + assert weekend_to_monday(self.mo) == self.mo def test_next_workday(self): - self.assertEqual(next_workday(self.sa), self.mo) - self.assertEqual(next_workday(self.su), self.mo) - self.assertEqual(next_workday(self.mo), self.tu) + assert next_workday(self.sa) == self.mo + assert next_workday(self.su) == self.mo + assert next_workday(self.mo) == self.tu def test_previous_workday(self): - self.assertEqual(previous_workday(self.sa), self.fr) - self.assertEqual(previous_workday(self.su), self.fr) - self.assertEqual(previous_workday(self.tu), self.mo) + assert previous_workday(self.sa) == self.fr + assert previous_workday(self.su) == self.fr + assert previous_workday(self.tu) == self.mo def test_before_nearest_workday(self): - self.assertEqual(before_nearest_workday(self.sa), self.th) - self.assertEqual(before_nearest_workday(self.su), self.fr) - self.assertEqual(before_nearest_workday(self.tu), self.mo) + assert before_nearest_workday(self.sa) == self.th + assert before_nearest_workday(self.su) == self.fr + assert before_nearest_workday(self.tu) == self.mo def test_after_nearest_workday(self): - self.assertEqual(after_nearest_workday(self.sa), self.mo) - self.assertEqual(after_nearest_workday(self.su), self.tu) - self.assertEqual(after_nearest_workday(self.fr), self.mo) + assert after_nearest_workday(self.sa) == self.mo + assert after_nearest_workday(self.su) == self.tu + assert after_nearest_workday(self.fr) == self.mo -class TestFederalHolidayCalendar(tm.TestCase): +class TestFederalHolidayCalendar(object): - # Test for issue 10278 - def test_no_mlk_before_1984(self): + def test_no_mlk_before_1986(self): + # see gh-10278 class MLKCalendar(AbstractHolidayCalendar): rules = [USMartinLutherKingJr] holidays = MLKCalendar().holidays(start='1984', end='1988').to_pydatetime().tolist() + # Testing to make sure holiday is not incorrectly observed before 1986 - self.assertEqual(holidays, [datetime(1986, 1, 20, 0, 0), datetime( - 1987, 1, 19, 0, 0)]) + assert holidays == [datetime(1986, 1, 20, 0, 0), + datetime(1987, 1, 19, 0, 0)] def test_memorial_day(self): class MemorialDay(AbstractHolidayCalendar): @@ -367,24 +362,24 @@ class MemorialDay(AbstractHolidayCalendar): holidays = MemorialDay().holidays(start='1971', end='1980').to_pydatetime().tolist() - # Fixes 5/31 error and checked manually against wikipedia - self.assertEqual(holidays, [datetime(1971, 5, 31, 0, 0), - datetime(1972, 5, 29, 0, 0), - datetime(1973, 5, 28, 0, 0), - datetime(1974, 5, 27, 0, - 0), datetime(1975, 5, 26, 0, 0), - datetime(1976, 5, 31, 0, - 0), datetime(1977, 5, 30, 0, 0), - datetime(1978, 5, 29, 0, - 0), datetime(1979, 5, 28, 0, 0)]) + # Fixes 5/31 error and checked manually against Wikipedia + assert holidays == [datetime(1971, 5, 31, 0, 0), + datetime(1972, 5, 29, 0, 0), + datetime(1973, 5, 28, 0, 0), + datetime(1974, 5, 27, 0, 0), + datetime(1975, 5, 26, 0, 0), + datetime(1976, 5, 31, 0, 0), + datetime(1977, 5, 30, 0, 0), + datetime(1978, 5, 29, 0, 0), + datetime(1979, 5, 28, 0, 0)] -class TestHolidayConflictingArguments(tm.TestCase): - # GH 10217 +class TestHolidayConflictingArguments(object): def test_both_offset_observance_raises(self): - with self.assertRaises(NotImplementedError): + # see gh-10217 + with pytest.raises(NotImplementedError): Holiday("Cyber Monday", month=11, day=1, offset=[DateOffset(weekday=SA(4))], observance=next_monday) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py deleted file mode 100644 index dfa1e94e4dc11..0000000000000 --- a/pandas/tests/tseries/test_offsets.py +++ /dev/null @@ -1,4962 +0,0 @@ -import os -from distutils.version import LooseVersion -from datetime import date, datetime, timedelta -from dateutil.relativedelta import relativedelta - -import pytest -from pandas.compat import range, iteritems -from pandas import compat - -import numpy as np - -from pandas.compat.numpy import np_datetime64_compat - -from pandas.core.series import Series -from pandas.tseries.frequencies import (_offset_map, get_freq_code, - _get_freq_str, _INVALID_FREQ_ERROR, - get_offset, get_standard_freq) -from pandas.tseries.index import _to_m8, DatetimeIndex, _daterange_cache -from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, - BusinessHour, WeekOfMonth, CBMonthEnd, - CustomBusinessHour, WeekDay, - CBMonthBegin, BYearEnd, MonthEnd, - MonthBegin, SemiMonthBegin, SemiMonthEnd, - BYearBegin, QuarterBegin, BQuarterBegin, - BMonthBegin, DateOffset, Week, YearBegin, - YearEnd, Hour, Minute, Second, Day, Micro, - QuarterEnd, BusinessMonthEnd, FY5253, - Milli, Nano, Easter, FY5253Quarter, - LastWeekOfMonth, CacheableOffset) -from pandas.tseries.tools import (format, ole2datetime, parse_time_string, - to_datetime, DateParseError) -import pandas.tseries.offsets as offsets -from pandas.io.pickle import read_pickle -from pandas.tslib import normalize_date, NaT, Timestamp, Timedelta -import pandas.tslib as tslib -from pandas.util.testing import assertRaisesRegexp -import pandas.util.testing as tm -from pandas.tseries.holiday import USFederalHolidayCalendar - - -def test_monthrange(): - import calendar - for y in range(2000, 2013): - for m in range(1, 13): - assert tslib.monthrange(y, m) == calendar.monthrange(y, m) - -#### -# Misc function tests -#### - - -def test_format(): - actual = format(datetime(2008, 1, 15)) - assert actual == '20080115' - - -def test_ole2datetime(): - actual = ole2datetime(60000) - assert actual == datetime(2064, 4, 8) - - with pytest.raises(ValueError): - ole2datetime(60) - - -def test_to_datetime1(): - actual = to_datetime(datetime(2008, 1, 15)) - assert actual == datetime(2008, 1, 15) - - actual = to_datetime('20080115') - assert actual == datetime(2008, 1, 15) - - # unparseable - s = 'Month 1, 1999' - assert to_datetime(s, errors='ignore') == s - - -def test_normalize_date(): - actual = normalize_date(datetime(2007, 10, 1, 1, 12, 5, 10)) - assert actual == datetime(2007, 10, 1) - - -def test_to_m8(): - valb = datetime(2007, 10, 1) - valu = _to_m8(valb) - tm.assertIsInstance(valu, np.datetime64) - # assert valu == np.datetime64(datetime(2007,10,1)) - - # def test_datetime64_box(): - # valu = np.datetime64(datetime(2007,10,1)) - # valb = _dt_box(valu) - # assert type(valb) == datetime - # assert valb == datetime(2007,10,1) - - ##### - # DateOffset Tests - ##### - - -class Base(tm.TestCase): - _offset = None - - _offset_types = [getattr(offsets, o) for o in offsets.__all__] - - timezones = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific'] - - @property - def offset_types(self): - return self._offset_types - - def _get_offset(self, klass, value=1, normalize=False): - # create instance from offset class - if klass is FY5253 or klass is FY5253Quarter: - klass = klass(n=value, startingMonth=1, weekday=1, - qtr_with_extra_week=1, variation='last', - normalize=normalize) - elif klass is LastWeekOfMonth: - klass = klass(n=value, weekday=5, normalize=normalize) - elif klass is WeekOfMonth: - klass = klass(n=value, week=1, weekday=5, normalize=normalize) - elif klass is Week: - klass = klass(n=value, weekday=5, normalize=normalize) - elif klass is DateOffset: - klass = klass(days=value, normalize=normalize) - else: - try: - klass = klass(value, normalize=normalize) - except: - klass = klass(normalize=normalize) - return klass - - def test_apply_out_of_range(self): - if self._offset is None: - return - - # try to create an out-of-bounds result timestamp; if we can't create - # the offset skip - try: - if self._offset in (BusinessHour, CustomBusinessHour): - # Using 10000 in BusinessHour fails in tz check because of DST - # difference - offset = self._get_offset(self._offset, value=100000) - else: - offset = self._get_offset(self._offset, value=10000) - - result = Timestamp('20080101') + offset - self.assertIsInstance(result, datetime) - self.assertIsNone(result.tzinfo) - - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - # Check tz is preserved - for tz in self.timezones: - t = Timestamp('20080101', tz=tz) - result = t + offset - self.assertIsInstance(result, datetime) - self.assertEqual(t.tzinfo, result.tzinfo) - - except (tslib.OutOfBoundsDatetime): - raise - except (ValueError, KeyError) as e: - pytest.skip( - "cannot create out_of_range offset: {0} {1}".format( - str(self).split('.')[-1], e)) - - -class TestCommon(Base): - - def setUp(self): - # exected value created by Base._get_offset - # are applied to 2011/01/01 09:00 (Saturday) - # used for .apply and .rollforward - self.expecteds = {'Day': Timestamp('2011-01-02 09:00:00'), - 'DateOffset': Timestamp('2011-01-02 09:00:00'), - 'BusinessDay': Timestamp('2011-01-03 09:00:00'), - 'CustomBusinessDay': - Timestamp('2011-01-03 09:00:00'), - 'CustomBusinessMonthEnd': - Timestamp('2011-01-31 09:00:00'), - 'CustomBusinessMonthBegin': - Timestamp('2011-01-03 09:00:00'), - 'MonthBegin': Timestamp('2011-02-01 09:00:00'), - 'BusinessMonthBegin': - Timestamp('2011-01-03 09:00:00'), - 'MonthEnd': Timestamp('2011-01-31 09:00:00'), - 'SemiMonthEnd': Timestamp('2011-01-15 09:00:00'), - 'SemiMonthBegin': Timestamp('2011-01-15 09:00:00'), - 'BusinessMonthEnd': Timestamp('2011-01-31 09:00:00'), - 'YearBegin': Timestamp('2012-01-01 09:00:00'), - 'BYearBegin': Timestamp('2011-01-03 09:00:00'), - 'YearEnd': Timestamp('2011-12-31 09:00:00'), - 'BYearEnd': Timestamp('2011-12-30 09:00:00'), - 'QuarterBegin': Timestamp('2011-03-01 09:00:00'), - 'BQuarterBegin': Timestamp('2011-03-01 09:00:00'), - 'QuarterEnd': Timestamp('2011-03-31 09:00:00'), - 'BQuarterEnd': Timestamp('2011-03-31 09:00:00'), - 'BusinessHour': Timestamp('2011-01-03 10:00:00'), - 'CustomBusinessHour': - Timestamp('2011-01-03 10:00:00'), - 'WeekOfMonth': Timestamp('2011-01-08 09:00:00'), - 'LastWeekOfMonth': Timestamp('2011-01-29 09:00:00'), - 'FY5253Quarter': Timestamp('2011-01-25 09:00:00'), - 'FY5253': Timestamp('2011-01-25 09:00:00'), - 'Week': Timestamp('2011-01-08 09:00:00'), - 'Easter': Timestamp('2011-04-24 09:00:00'), - 'Hour': Timestamp('2011-01-01 10:00:00'), - 'Minute': Timestamp('2011-01-01 09:01:00'), - 'Second': Timestamp('2011-01-01 09:00:01'), - 'Milli': Timestamp('2011-01-01 09:00:00.001000'), - 'Micro': Timestamp('2011-01-01 09:00:00.000001'), - 'Nano': Timestamp(np_datetime64_compat( - '2011-01-01T09:00:00.000000001Z'))} - - def test_return_type(self): - for offset in self.offset_types: - offset = self._get_offset(offset) - - # make sure that we are returning a Timestamp - result = Timestamp('20080101') + offset - self.assertIsInstance(result, Timestamp) - - # make sure that we are returning NaT - self.assertTrue(NaT + offset is NaT) - self.assertTrue(offset + NaT is NaT) - - self.assertTrue(NaT - offset is NaT) - self.assertTrue((-offset).apply(NaT) is NaT) - - def test_offset_n(self): - for offset_klass in self.offset_types: - offset = self._get_offset(offset_klass) - self.assertEqual(offset.n, 1) - - neg_offset = offset * -1 - self.assertEqual(neg_offset.n, -1) - - mul_offset = offset * 3 - self.assertEqual(mul_offset.n, 3) - - def test_offset_freqstr(self): - for offset_klass in self.offset_types: - offset = self._get_offset(offset_klass) - - freqstr = offset.freqstr - if freqstr not in ('', - "", - 'LWOM-SAT', ): - code = get_offset(freqstr) - self.assertEqual(offset.rule_code, code) - - def _check_offsetfunc_works(self, offset, funcname, dt, expected, - normalize=False): - offset_s = self._get_offset(offset, normalize=normalize) - func = getattr(offset_s, funcname) - - result = func(dt) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, expected) - - result = func(Timestamp(dt)) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, expected) - - # see gh-14101 - exp_warning = None - ts = Timestamp(dt) + Nano(5) - - if (offset_s.__class__.__name__ == 'DateOffset' and - (funcname == 'apply' or normalize) and - ts.nanosecond > 0): - exp_warning = UserWarning - - # test nanosecond is preserved - with tm.assert_produces_warning(exp_warning, - check_stacklevel=False): - result = func(ts) - self.assertTrue(isinstance(result, Timestamp)) - if normalize is False: - self.assertEqual(result, expected + Nano(5)) - else: - self.assertEqual(result, expected) - - if isinstance(dt, np.datetime64): - # test tz when input is datetime or Timestamp - return - - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - - for tz in self.timezones: - expected_localize = expected.tz_localize(tz) - tz_obj = tslib.maybe_get_tz(tz) - dt_tz = tslib._localize_pydatetime(dt, tz_obj) - - result = func(dt_tz) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, expected_localize) - - result = func(Timestamp(dt, tz=tz)) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, expected_localize) - - # see gh-14101 - exp_warning = None - ts = Timestamp(dt, tz=tz) + Nano(5) - - if (offset_s.__class__.__name__ == 'DateOffset' and - (funcname == 'apply' or normalize) and - ts.nanosecond > 0): - exp_warning = UserWarning - - # test nanosecond is preserved - with tm.assert_produces_warning(exp_warning, - check_stacklevel=False): - result = func(ts) - self.assertTrue(isinstance(result, Timestamp)) - if normalize is False: - self.assertEqual(result, expected_localize + Nano(5)) - else: - self.assertEqual(result, expected_localize) - - def test_apply(self): - sdt = datetime(2011, 1, 1, 9, 0) - ndt = np_datetime64_compat('2011-01-01 09:00Z') - - for offset in self.offset_types: - for dt in [sdt, ndt]: - expected = self.expecteds[offset.__name__] - self._check_offsetfunc_works(offset, 'apply', dt, expected) - - expected = Timestamp(expected.date()) - self._check_offsetfunc_works(offset, 'apply', dt, expected, - normalize=True) - - def test_rollforward(self): - expecteds = self.expecteds.copy() - - # result will not be changed if the target is on the offset - no_changes = ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', - 'Week', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', - 'Nano', 'DateOffset'] - for n in no_changes: - expecteds[n] = Timestamp('2011/01/01 09:00') - - expecteds['BusinessHour'] = Timestamp('2011-01-03 09:00:00') - expecteds['CustomBusinessHour'] = Timestamp('2011-01-03 09:00:00') - - # but be changed when normalize=True - norm_expected = expecteds.copy() - for k in norm_expected: - norm_expected[k] = Timestamp(norm_expected[k].date()) - - normalized = {'Day': Timestamp('2011-01-02 00:00:00'), - 'DateOffset': Timestamp('2011-01-02 00:00:00'), - 'MonthBegin': Timestamp('2011-02-01 00:00:00'), - 'SemiMonthBegin': Timestamp('2011-01-15 00:00:00'), - 'YearBegin': Timestamp('2012-01-01 00:00:00'), - 'Week': Timestamp('2011-01-08 00:00:00'), - 'Hour': Timestamp('2011-01-01 00:00:00'), - 'Minute': Timestamp('2011-01-01 00:00:00'), - 'Second': Timestamp('2011-01-01 00:00:00'), - 'Milli': Timestamp('2011-01-01 00:00:00'), - 'Micro': Timestamp('2011-01-01 00:00:00')} - norm_expected.update(normalized) - - sdt = datetime(2011, 1, 1, 9, 0) - ndt = np_datetime64_compat('2011-01-01 09:00Z') - - for offset in self.offset_types: - for dt in [sdt, ndt]: - expected = expecteds[offset.__name__] - self._check_offsetfunc_works(offset, 'rollforward', dt, - expected) - expected = norm_expected[offset.__name__] - self._check_offsetfunc_works(offset, 'rollforward', dt, - expected, normalize=True) - - def test_rollback(self): - expecteds = {'BusinessDay': Timestamp('2010-12-31 09:00:00'), - 'CustomBusinessDay': Timestamp('2010-12-31 09:00:00'), - 'CustomBusinessMonthEnd': - Timestamp('2010-12-31 09:00:00'), - 'CustomBusinessMonthBegin': - Timestamp('2010-12-01 09:00:00'), - 'BusinessMonthBegin': Timestamp('2010-12-01 09:00:00'), - 'MonthEnd': Timestamp('2010-12-31 09:00:00'), - 'SemiMonthEnd': Timestamp('2010-12-31 09:00:00'), - 'BusinessMonthEnd': Timestamp('2010-12-31 09:00:00'), - 'BYearBegin': Timestamp('2010-01-01 09:00:00'), - 'YearEnd': Timestamp('2010-12-31 09:00:00'), - 'BYearEnd': Timestamp('2010-12-31 09:00:00'), - 'QuarterBegin': Timestamp('2010-12-01 09:00:00'), - 'BQuarterBegin': Timestamp('2010-12-01 09:00:00'), - 'QuarterEnd': Timestamp('2010-12-31 09:00:00'), - 'BQuarterEnd': Timestamp('2010-12-31 09:00:00'), - 'BusinessHour': Timestamp('2010-12-31 17:00:00'), - 'CustomBusinessHour': Timestamp('2010-12-31 17:00:00'), - 'WeekOfMonth': Timestamp('2010-12-11 09:00:00'), - 'LastWeekOfMonth': Timestamp('2010-12-25 09:00:00'), - 'FY5253Quarter': Timestamp('2010-10-26 09:00:00'), - 'FY5253': Timestamp('2010-01-26 09:00:00'), - 'Easter': Timestamp('2010-04-04 09:00:00')} - - # result will not be changed if the target is on the offset - for n in ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', 'Week', - 'Hour', 'Minute', 'Second', 'Milli', 'Micro', 'Nano', - 'DateOffset']: - expecteds[n] = Timestamp('2011/01/01 09:00') - - # but be changed when normalize=True - norm_expected = expecteds.copy() - for k in norm_expected: - norm_expected[k] = Timestamp(norm_expected[k].date()) - - normalized = {'Day': Timestamp('2010-12-31 00:00:00'), - 'DateOffset': Timestamp('2010-12-31 00:00:00'), - 'MonthBegin': Timestamp('2010-12-01 00:00:00'), - 'SemiMonthBegin': Timestamp('2010-12-15 00:00:00'), - 'YearBegin': Timestamp('2010-01-01 00:00:00'), - 'Week': Timestamp('2010-12-25 00:00:00'), - 'Hour': Timestamp('2011-01-01 00:00:00'), - 'Minute': Timestamp('2011-01-01 00:00:00'), - 'Second': Timestamp('2011-01-01 00:00:00'), - 'Milli': Timestamp('2011-01-01 00:00:00'), - 'Micro': Timestamp('2011-01-01 00:00:00')} - norm_expected.update(normalized) - - sdt = datetime(2011, 1, 1, 9, 0) - ndt = np_datetime64_compat('2011-01-01 09:00Z') - - for offset in self.offset_types: - for dt in [sdt, ndt]: - expected = expecteds[offset.__name__] - self._check_offsetfunc_works(offset, 'rollback', dt, expected) - - expected = norm_expected[offset.__name__] - self._check_offsetfunc_works(offset, 'rollback', dt, expected, - normalize=True) - - def test_onOffset(self): - for offset in self.offset_types: - dt = self.expecteds[offset.__name__] - offset_s = self._get_offset(offset) - self.assertTrue(offset_s.onOffset(dt)) - - # when normalize=True, onOffset checks time is 00:00:00 - offset_n = self._get_offset(offset, normalize=True) - self.assertFalse(offset_n.onOffset(dt)) - - if offset in (BusinessHour, CustomBusinessHour): - # In default BusinessHour (9:00-17:00), normalized time - # cannot be in business hour range - continue - date = datetime(dt.year, dt.month, dt.day) - self.assertTrue(offset_n.onOffset(date)) - - def test_add(self): - dt = datetime(2011, 1, 1, 9, 0) - - for offset in self.offset_types: - offset_s = self._get_offset(offset) - expected = self.expecteds[offset.__name__] - - result_dt = dt + offset_s - result_ts = Timestamp(dt) + offset_s - for result in [result_dt, result_ts]: - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, expected) - - tm._skip_if_no_pytz() - for tz in self.timezones: - expected_localize = expected.tz_localize(tz) - result = Timestamp(dt, tz=tz) + offset_s - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, expected_localize) - - # normalize=True - offset_s = self._get_offset(offset, normalize=True) - expected = Timestamp(expected.date()) - - result_dt = dt + offset_s - result_ts = Timestamp(dt) + offset_s - for result in [result_dt, result_ts]: - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, expected) - - for tz in self.timezones: - expected_localize = expected.tz_localize(tz) - result = Timestamp(dt, tz=tz) + offset_s - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, expected_localize) - - def test_pickle_v0_15_2(self): - offsets = {'DateOffset': DateOffset(years=1), - 'MonthBegin': MonthBegin(1), - 'Day': Day(1), - 'YearBegin': YearBegin(1), - 'Week': Week(1)} - pickle_path = os.path.join(tm.get_data_path(), - 'dateoffset_0_15_2.pickle') - # This code was executed once on v0.15.2 to generate the pickle: - # with open(pickle_path, 'wb') as f: pickle.dump(offsets, f) - # - tm.assert_dict_equal(offsets, read_pickle(pickle_path)) - - -class TestDateOffset(Base): - - def setUp(self): - self.d = Timestamp(datetime(2008, 1, 2)) - _offset_map.clear() - - def test_repr(self): - repr(DateOffset()) - repr(DateOffset(2)) - repr(2 * DateOffset()) - repr(2 * DateOffset(months=2)) - - def test_mul(self): - assert DateOffset(2) == 2 * DateOffset(1) - assert DateOffset(2) == DateOffset(1) * 2 - - def test_constructor(self): - - assert ((self.d + DateOffset(months=2)) == datetime(2008, 3, 2)) - assert ((self.d - DateOffset(months=2)) == datetime(2007, 11, 2)) - - assert ((self.d + DateOffset(2)) == datetime(2008, 1, 4)) - - assert not DateOffset(2).isAnchored() - assert DateOffset(1).isAnchored() - - d = datetime(2008, 1, 31) - assert ((d + DateOffset(months=1)) == datetime(2008, 2, 29)) - - def test_copy(self): - assert (DateOffset(months=2).copy() == DateOffset(months=2)) - - def test_eq(self): - offset1 = DateOffset(days=1) - offset2 = DateOffset(days=365) - - self.assertNotEqual(offset1, offset2) - - -class TestBusinessDay(Base): - _offset = BDay - - def setUp(self): - self.d = datetime(2008, 1, 1) - - self.offset = BDay() - self.offset2 = BDay(2) - - def test_different_normalize_equals(self): - # equivalent in this special case - offset = BDay() - offset2 = BDay() - offset2.normalize = True - self.assertEqual(offset, offset2) - - def test_repr(self): - self.assertEqual(repr(self.offset), '') - assert repr(self.offset2) == '<2 * BusinessDays>' - - expected = '' - assert repr(self.offset + timedelta(1)) == expected - - def test_with_offset(self): - offset = self.offset + timedelta(hours=2) - - assert (self.d + offset) == datetime(2008, 1, 2, 2) - - def testEQ(self): - self.assertEqual(self.offset2, self.offset2) - - def test_mul(self): - pass - - def test_hash(self): - self.assertEqual(hash(self.offset2), hash(self.offset2)) - - def testCall(self): - self.assertEqual(self.offset2(self.d), datetime(2008, 1, 3)) - - def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) - - def testSub(self): - off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) - - self.assertEqual(self.d - self.offset2, self.d + BDay(-2)) - - def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) - - def testMult1(self): - self.assertEqual(self.d + 10 * self.offset, self.d + BDay(10)) - - def testMult2(self): - self.assertEqual(self.d + (-5 * BDay(-10)), self.d + BDay(50)) - - def testRollback1(self): - self.assertEqual(BDay(10).rollback(self.d), self.d) - - def testRollback2(self): - self.assertEqual( - BDay(10).rollback(datetime(2008, 1, 5)), datetime(2008, 1, 4)) - - def testRollforward1(self): - self.assertEqual(BDay(10).rollforward(self.d), self.d) - - def testRollforward2(self): - self.assertEqual( - BDay(10).rollforward(datetime(2008, 1, 5)), datetime(2008, 1, 7)) - - def test_roll_date_object(self): - offset = BDay() - - dt = date(2012, 9, 15) - - result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 14)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 17)) - - offset = offsets.Day() - result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 15)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 15)) - - def test_onOffset(self): - tests = [(BDay(), datetime(2008, 1, 1), True), - (BDay(), datetime(2008, 1, 5), False)] - - for offset, d, expected in tests: - assertOnOffset(offset, d, expected) - - def test_apply(self): - tests = [] - - tests.append((BDay(), {datetime(2008, 1, 1): datetime(2008, 1, 2), - datetime(2008, 1, 4): datetime(2008, 1, 7), - datetime(2008, 1, 5): datetime(2008, 1, 7), - datetime(2008, 1, 6): datetime(2008, 1, 7), - datetime(2008, 1, 7): datetime(2008, 1, 8)})) - - tests.append((2 * BDay(), {datetime(2008, 1, 1): datetime(2008, 1, 3), - datetime(2008, 1, 4): datetime(2008, 1, 8), - datetime(2008, 1, 5): datetime(2008, 1, 8), - datetime(2008, 1, 6): datetime(2008, 1, 8), - datetime(2008, 1, 7): datetime(2008, 1, 9)} - )) - - tests.append((-BDay(), {datetime(2008, 1, 1): datetime(2007, 12, 31), - datetime(2008, 1, 4): datetime(2008, 1, 3), - datetime(2008, 1, 5): datetime(2008, 1, 4), - datetime(2008, 1, 6): datetime(2008, 1, 4), - datetime(2008, 1, 7): datetime(2008, 1, 4), - datetime(2008, 1, 8): datetime(2008, 1, 7)} - )) - - tests.append((-2 * BDay(), { - datetime(2008, 1, 1): datetime(2007, 12, 28), - datetime(2008, 1, 4): datetime(2008, 1, 2), - datetime(2008, 1, 5): datetime(2008, 1, 3), - datetime(2008, 1, 6): datetime(2008, 1, 3), - datetime(2008, 1, 7): datetime(2008, 1, 3), - datetime(2008, 1, 8): datetime(2008, 1, 4), - datetime(2008, 1, 9): datetime(2008, 1, 7)} - )) - - tests.append((BDay(0), {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 1, 4): datetime(2008, 1, 4), - datetime(2008, 1, 5): datetime(2008, 1, 7), - datetime(2008, 1, 6): datetime(2008, 1, 7), - datetime(2008, 1, 7): datetime(2008, 1, 7)} - )) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_large_n(self): - dt = datetime(2012, 10, 23) - - result = dt + BDay(10) - self.assertEqual(result, datetime(2012, 11, 6)) - - result = dt + BDay(100) - BDay(100) - self.assertEqual(result, dt) - - off = BDay() * 6 - rs = datetime(2012, 1, 1) - off - xp = datetime(2011, 12, 23) - self.assertEqual(rs, xp) - - st = datetime(2011, 12, 18) - rs = st + off - xp = datetime(2011, 12, 26) - self.assertEqual(rs, xp) - - off = BDay() * 10 - rs = datetime(2014, 1, 5) + off # see #5890 - xp = datetime(2014, 1, 17) - self.assertEqual(rs, xp) - - def test_apply_corner(self): - self.assertRaises(TypeError, BDay().apply, BMonthEnd()) - - def test_offsets_compare_equal(self): - # root cause of #456 - offset1 = BDay() - offset2 = BDay() - self.assertFalse(offset1 != offset2) - - -class TestBusinessHour(Base): - _offset = BusinessHour - - def setUp(self): - self.d = datetime(2014, 7, 1, 10, 00) - - self.offset1 = BusinessHour() - self.offset2 = BusinessHour(n=3) - - self.offset3 = BusinessHour(n=-1) - self.offset4 = BusinessHour(n=-4) - - from datetime import time as dt_time - self.offset5 = BusinessHour(start=dt_time(11, 0), end=dt_time(14, 30)) - self.offset6 = BusinessHour(start='20:00', end='05:00') - self.offset7 = BusinessHour(n=-2, start=dt_time(21, 30), - end=dt_time(6, 30)) - - def test_constructor_errors(self): - from datetime import time as dt_time - with tm.assertRaises(ValueError): - BusinessHour(start=dt_time(11, 0, 5)) - with tm.assertRaises(ValueError): - BusinessHour(start='AAA') - with tm.assertRaises(ValueError): - BusinessHour(start='14:00:05') - - def test_different_normalize_equals(self): - # equivalent in this special case - offset = self._offset() - offset2 = self._offset() - offset2.normalize = True - self.assertEqual(offset, offset2) - - def test_repr(self): - self.assertEqual(repr(self.offset1), '') - self.assertEqual(repr(self.offset2), - '<3 * BusinessHours: BH=09:00-17:00>') - self.assertEqual(repr(self.offset3), - '<-1 * BusinessHour: BH=09:00-17:00>') - self.assertEqual(repr(self.offset4), - '<-4 * BusinessHours: BH=09:00-17:00>') - - self.assertEqual(repr(self.offset5), '') - self.assertEqual(repr(self.offset6), '') - self.assertEqual(repr(self.offset7), - '<-2 * BusinessHours: BH=21:30-06:30>') - - def test_with_offset(self): - expected = Timestamp('2014-07-01 13:00') - - self.assertEqual(self.d + BusinessHour() * 3, expected) - self.assertEqual(self.d + BusinessHour(n=3), expected) - - def testEQ(self): - for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: - self.assertEqual(offset, offset) - - self.assertNotEqual(BusinessHour(), BusinessHour(-1)) - self.assertEqual(BusinessHour(start='09:00'), BusinessHour()) - self.assertNotEqual(BusinessHour(start='09:00'), - BusinessHour(start='09:01')) - self.assertNotEqual(BusinessHour(start='09:00', end='17:00'), - BusinessHour(start='17:00', end='09:01')) - - def test_hash(self): - for offset in [self.offset1, self.offset2, self.offset3, self.offset4]: - self.assertEqual(hash(offset), hash(offset)) - - def testCall(self): - self.assertEqual(self.offset1(self.d), datetime(2014, 7, 1, 11)) - self.assertEqual(self.offset2(self.d), datetime(2014, 7, 1, 13)) - self.assertEqual(self.offset3(self.d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset4(self.d), datetime(2014, 6, 30, 14)) - - def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) - - def testSub(self): - off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) - - self.assertEqual(self.d - self.offset2, self.d + self._offset(-3)) - - def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) - - def testMult1(self): - self.assertEqual(self.d + 5 * self.offset1, self.d + self._offset(5)) - - def testMult2(self): - self.assertEqual(self.d + (-3 * self._offset(-2)), - self.d + self._offset(6)) - - def testRollback1(self): - self.assertEqual(self.offset1.rollback(self.d), self.d) - self.assertEqual(self.offset2.rollback(self.d), self.d) - self.assertEqual(self.offset3.rollback(self.d), self.d) - self.assertEqual(self.offset4.rollback(self.d), self.d) - self.assertEqual(self.offset5.rollback(self.d), - datetime(2014, 6, 30, 14, 30)) - self.assertEqual(self.offset6.rollback( - self.d), datetime(2014, 7, 1, 5, 0)) - self.assertEqual(self.offset7.rollback( - self.d), datetime(2014, 7, 1, 6, 30)) - - d = datetime(2014, 7, 1, 0) - self.assertEqual(self.offset1.rollback(d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset2.rollback(d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset3.rollback(d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset4.rollback(d), datetime(2014, 6, 30, 17)) - self.assertEqual(self.offset5.rollback( - d), datetime(2014, 6, 30, 14, 30)) - self.assertEqual(self.offset6.rollback(d), d) - self.assertEqual(self.offset7.rollback(d), d) - - self.assertEqual(self._offset(5).rollback(self.d), self.d) - - def testRollback2(self): - self.assertEqual(self._offset(-3) - .rollback(datetime(2014, 7, 5, 15, 0)), - datetime(2014, 7, 4, 17, 0)) - - def testRollforward1(self): - self.assertEqual(self.offset1.rollforward(self.d), self.d) - self.assertEqual(self.offset2.rollforward(self.d), self.d) - self.assertEqual(self.offset3.rollforward(self.d), self.d) - self.assertEqual(self.offset4.rollforward(self.d), self.d) - self.assertEqual(self.offset5.rollforward( - self.d), datetime(2014, 7, 1, 11, 0)) - self.assertEqual(self.offset6.rollforward( - self.d), datetime(2014, 7, 1, 20, 0)) - self.assertEqual(self.offset7.rollforward( - self.d), datetime(2014, 7, 1, 21, 30)) - - d = datetime(2014, 7, 1, 0) - self.assertEqual(self.offset1.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset2.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset3.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset4.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset5.rollforward(d), datetime(2014, 7, 1, 11)) - self.assertEqual(self.offset6.rollforward(d), d) - self.assertEqual(self.offset7.rollforward(d), d) - - self.assertEqual(self._offset(5).rollforward(self.d), self.d) - - def testRollforward2(self): - self.assertEqual(self._offset(-3) - .rollforward(datetime(2014, 7, 5, 16, 0)), - datetime(2014, 7, 7, 9)) - - def test_roll_date_object(self): - offset = BusinessHour() - - dt = datetime(2014, 7, 6, 15, 0) - - result = offset.rollback(dt) - self.assertEqual(result, datetime(2014, 7, 4, 17)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2014, 7, 7, 9)) - - def test_normalize(self): - tests = [] - - tests.append((BusinessHour(normalize=True), - {datetime(2014, 7, 1, 8): datetime(2014, 7, 1), - datetime(2014, 7, 1, 17): datetime(2014, 7, 2), - datetime(2014, 7, 1, 16): datetime(2014, 7, 2), - datetime(2014, 7, 1, 23): datetime(2014, 7, 2), - datetime(2014, 7, 1, 0): datetime(2014, 7, 1), - datetime(2014, 7, 4, 15): datetime(2014, 7, 4), - datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4), - datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7), - datetime(2014, 7, 5, 23): datetime(2014, 7, 7), - datetime(2014, 7, 6, 10): datetime(2014, 7, 7)})) - - tests.append((BusinessHour(-1, normalize=True), - {datetime(2014, 7, 1, 8): datetime(2014, 6, 30), - datetime(2014, 7, 1, 17): datetime(2014, 7, 1), - datetime(2014, 7, 1, 16): datetime(2014, 7, 1), - datetime(2014, 7, 1, 10): datetime(2014, 6, 30), - datetime(2014, 7, 1, 0): datetime(2014, 6, 30), - datetime(2014, 7, 7, 10): datetime(2014, 7, 4), - datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7), - datetime(2014, 7, 5, 23): datetime(2014, 7, 4), - datetime(2014, 7, 6, 10): datetime(2014, 7, 4)})) - - tests.append((BusinessHour(1, normalize=True, start='17:00', - end='04:00'), - {datetime(2014, 7, 1, 8): datetime(2014, 7, 1), - datetime(2014, 7, 1, 17): datetime(2014, 7, 1), - datetime(2014, 7, 1, 23): datetime(2014, 7, 2), - datetime(2014, 7, 2, 2): datetime(2014, 7, 2), - datetime(2014, 7, 2, 3): datetime(2014, 7, 2), - datetime(2014, 7, 4, 23): datetime(2014, 7, 5), - datetime(2014, 7, 5, 2): datetime(2014, 7, 5), - datetime(2014, 7, 7, 2): datetime(2014, 7, 7), - datetime(2014, 7, 7, 17): datetime(2014, 7, 7)})) - - for offset, cases in tests: - for dt, expected in compat.iteritems(cases): - self.assertEqual(offset.apply(dt), expected) - - def test_onOffset(self): - tests = [] - - tests.append((BusinessHour(), {datetime(2014, 7, 1, 9): True, - datetime(2014, 7, 1, 8, 59): False, - datetime(2014, 7, 1, 8): False, - datetime(2014, 7, 1, 17): True, - datetime(2014, 7, 1, 17, 1): False, - datetime(2014, 7, 1, 18): False, - datetime(2014, 7, 5, 9): False, - datetime(2014, 7, 6, 12): False})) - - tests.append((BusinessHour(start='10:00', end='15:00'), - {datetime(2014, 7, 1, 9): False, - datetime(2014, 7, 1, 10): True, - datetime(2014, 7, 1, 15): True, - datetime(2014, 7, 1, 15, 1): False, - datetime(2014, 7, 5, 12): False, - datetime(2014, 7, 6, 12): False})) - - tests.append((BusinessHour(start='19:00', end='05:00'), - {datetime(2014, 7, 1, 9, 0): False, - datetime(2014, 7, 1, 10, 0): False, - datetime(2014, 7, 1, 15): False, - datetime(2014, 7, 1, 15, 1): False, - datetime(2014, 7, 5, 12, 0): False, - datetime(2014, 7, 6, 12, 0): False, - datetime(2014, 7, 1, 19, 0): True, - datetime(2014, 7, 2, 0, 0): True, - datetime(2014, 7, 4, 23): True, - datetime(2014, 7, 5, 1): True, - datetime(2014, 7, 5, 5, 0): True, - datetime(2014, 7, 6, 23, 0): False, - datetime(2014, 7, 7, 3, 0): False})) - - for offset, cases in tests: - for dt, expected in compat.iteritems(cases): - self.assertEqual(offset.onOffset(dt), expected) - - def test_opening_time(self): - tests = [] - - # opening time should be affected by sign of n, not by n's value and - # end - tests.append(( - [BusinessHour(), BusinessHour(n=2), BusinessHour( - n=4), BusinessHour(end='10:00'), BusinessHour(n=2, end='4:00'), - BusinessHour(n=4, end='15:00')], - {datetime(2014, 7, 1, 11): (datetime(2014, 7, 2, 9), datetime( - 2014, 7, 1, 9)), - datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 9), datetime( - 2014, 7, 1, 9)), - datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 9), datetime( - 2014, 7, 1, 9)), - datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 9), datetime( - 2014, 7, 1, 9)), - # if timestamp is on opening time, next opening time is - # as it is - datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 9), datetime( - 2014, 7, 2, 9)), - datetime(2014, 7, 2, 10): (datetime(2014, 7, 3, 9), datetime( - 2014, 7, 2, 9)), - # 2014-07-05 is saturday - datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 9), datetime( - 2014, 7, 4, 9)), - datetime(2014, 7, 4, 10): (datetime(2014, 7, 7, 9), datetime( - 2014, 7, 4, 9)), - datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 9), datetime( - 2014, 7, 4, 9)), - datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 9), datetime( - 2014, 7, 4, 9)), - datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 9), datetime( - 2014, 7, 4, 9)), - datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 8, 9), datetime( - 2014, 7, 7, 9))})) - - tests.append(([BusinessHour(start='11:15'), - BusinessHour(n=2, start='11:15'), - BusinessHour(n=3, start='11:15'), - BusinessHour(start='11:15', end='10:00'), - BusinessHour(n=2, start='11:15', end='4:00'), - BusinessHour(n=3, start='11:15', end='15:00')], - {datetime(2014, 7, 1, 11): (datetime( - 2014, 7, 1, 11, 15), datetime(2014, 6, 30, 11, 15)), - datetime(2014, 7, 1, 18): (datetime( - 2014, 7, 2, 11, 15), datetime(2014, 7, 1, 11, 15)), - datetime(2014, 7, 1, 23): (datetime( - 2014, 7, 2, 11, 15), datetime(2014, 7, 1, 11, 15)), - datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 11, 15), - datetime(2014, 7, 1, 11, 15)), - datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 11, 15), - datetime(2014, 7, 1, 11, 15)), - datetime(2014, 7, 2, 10): (datetime( - 2014, 7, 2, 11, 15), datetime(2014, 7, 1, 11, 15)), - datetime(2014, 7, 2, 11, 15): (datetime( - 2014, 7, 2, 11, 15), datetime(2014, 7, 2, 11, 15)), - datetime(2014, 7, 2, 11, 15, 1): (datetime( - 2014, 7, 3, 11, 15), datetime(2014, 7, 2, 11, 15)), - datetime(2014, 7, 5, 10): (datetime( - 2014, 7, 7, 11, 15), datetime(2014, 7, 4, 11, 15)), - datetime(2014, 7, 4, 10): (datetime( - 2014, 7, 4, 11, 15), datetime(2014, 7, 3, 11, 15)), - datetime(2014, 7, 4, 23): (datetime( - 2014, 7, 7, 11, 15), datetime(2014, 7, 4, 11, 15)), - datetime(2014, 7, 6, 10): (datetime( - 2014, 7, 7, 11, 15), datetime(2014, 7, 4, 11, 15)), - datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 11, 15), - datetime(2014, 7, 4, 11, 15)), - datetime(2014, 7, 7, 9, 1): ( - datetime(2014, 7, 7, 11, 15), - datetime(2014, 7, 4, 11, 15))})) - - tests.append(([BusinessHour(-1), BusinessHour(n=-2), - BusinessHour(n=-4), - BusinessHour(n=-1, end='10:00'), - BusinessHour(n=-2, end='4:00'), - BusinessHour(n=-4, end='15:00')], - {datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 9), - datetime(2014, 7, 2, 9)), - datetime(2014, 7, 1, 18): (datetime(2014, 7, 1, 9), - datetime(2014, 7, 2, 9)), - datetime(2014, 7, 1, 23): (datetime(2014, 7, 1, 9), - datetime(2014, 7, 2, 9)), - datetime(2014, 7, 2, 8): (datetime(2014, 7, 1, 9), - datetime(2014, 7, 2, 9)), - datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 9), - datetime(2014, 7, 2, 9)), - datetime(2014, 7, 2, 10): (datetime(2014, 7, 2, 9), - datetime(2014, 7, 3, 9)), - datetime(2014, 7, 5, 10): (datetime(2014, 7, 4, 9), - datetime(2014, 7, 7, 9)), - datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 9), - datetime(2014, 7, 7, 9)), - datetime(2014, 7, 4, 23): (datetime(2014, 7, 4, 9), - datetime(2014, 7, 7, 9)), - datetime(2014, 7, 6, 10): (datetime(2014, 7, 4, 9), - datetime(2014, 7, 7, 9)), - datetime(2014, 7, 7, 5): (datetime(2014, 7, 4, 9), - datetime(2014, 7, 7, 9)), - datetime(2014, 7, 7, 9): (datetime(2014, 7, 7, 9), - datetime(2014, 7, 7, 9)), - datetime(2014, 7, 7, 9, 1): (datetime(2014, 7, 7, 9), - datetime(2014, 7, 8, 9))})) - - tests.append(([BusinessHour(start='17:00', end='05:00'), - BusinessHour(n=3, start='17:00', end='03:00')], - {datetime(2014, 7, 1, 11): (datetime(2014, 7, 1, 17), - datetime(2014, 6, 30, 17)), - datetime(2014, 7, 1, 18): (datetime(2014, 7, 2, 17), - datetime(2014, 7, 1, 17)), - datetime(2014, 7, 1, 23): (datetime(2014, 7, 2, 17), - datetime(2014, 7, 1, 17)), - datetime(2014, 7, 2, 8): (datetime(2014, 7, 2, 17), - datetime(2014, 7, 1, 17)), - datetime(2014, 7, 2, 9): (datetime(2014, 7, 2, 17), - datetime(2014, 7, 1, 17)), - datetime(2014, 7, 4, 17): (datetime(2014, 7, 4, 17), - datetime(2014, 7, 4, 17)), - datetime(2014, 7, 5, 10): (datetime(2014, 7, 7, 17), - datetime(2014, 7, 4, 17)), - datetime(2014, 7, 4, 10): (datetime(2014, 7, 4, 17), - datetime(2014, 7, 3, 17)), - datetime(2014, 7, 4, 23): (datetime(2014, 7, 7, 17), - datetime(2014, 7, 4, 17)), - datetime(2014, 7, 6, 10): (datetime(2014, 7, 7, 17), - datetime(2014, 7, 4, 17)), - datetime(2014, 7, 7, 5): (datetime(2014, 7, 7, 17), - datetime(2014, 7, 4, 17)), - datetime(2014, 7, 7, 17, 1): (datetime( - 2014, 7, 8, 17), datetime(2014, 7, 7, 17)), })) - - tests.append(([BusinessHour(-1, start='17:00', end='05:00'), - BusinessHour(n=-2, start='17:00', end='03:00')], - {datetime(2014, 7, 1, 11): (datetime(2014, 6, 30, 17), - datetime(2014, 7, 1, 17)), - datetime(2014, 7, 1, 18): (datetime(2014, 7, 1, 17), - datetime(2014, 7, 2, 17)), - datetime(2014, 7, 1, 23): (datetime(2014, 7, 1, 17), - datetime(2014, 7, 2, 17)), - datetime(2014, 7, 2, 8): (datetime(2014, 7, 1, 17), - datetime(2014, 7, 2, 17)), - datetime(2014, 7, 2, 9): (datetime(2014, 7, 1, 17), - datetime(2014, 7, 2, 17)), - datetime(2014, 7, 2, 16, 59): (datetime( - 2014, 7, 1, 17), datetime(2014, 7, 2, 17)), - datetime(2014, 7, 5, 10): (datetime(2014, 7, 4, 17), - datetime(2014, 7, 7, 17)), - datetime(2014, 7, 4, 10): (datetime(2014, 7, 3, 17), - datetime(2014, 7, 4, 17)), - datetime(2014, 7, 4, 23): (datetime(2014, 7, 4, 17), - datetime(2014, 7, 7, 17)), - datetime(2014, 7, 6, 10): (datetime(2014, 7, 4, 17), - datetime(2014, 7, 7, 17)), - datetime(2014, 7, 7, 5): (datetime(2014, 7, 4, 17), - datetime(2014, 7, 7, 17)), - datetime(2014, 7, 7, 18): (datetime(2014, 7, 7, 17), - datetime(2014, 7, 8, 17))})) - - for _offsets, cases in tests: - for offset in _offsets: - for dt, (exp_next, exp_prev) in compat.iteritems(cases): - self.assertEqual(offset._next_opening_time(dt), exp_next) - self.assertEqual(offset._prev_opening_time(dt), exp_prev) - - def test_apply(self): - tests = [] - - tests.append(( - BusinessHour(), - {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12), - datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), - datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), - datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 10), - datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 9), - datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 2, 9, 30, 15), - datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 10), - datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 12), - # out of business hours - datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 10), - datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), - datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), - datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), - # saturday - datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), - datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10), - datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30), - datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, - 30)})) - - tests.append((BusinessHour( - 4), {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15), - datetime(2014, 7, 1, 13): datetime(2014, 7, 2, 9), - datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 11), - datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 12), - datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 13), - datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 15), - datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 13), - datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13), - datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13), - datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13), - datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13), - datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13), - datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30), - datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, - 30)})) - - tests.append( - (BusinessHour(-1), - {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 10), - datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 12), - datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 14), - datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 15), - datetime(2014, 7, 1, 10): datetime(2014, 6, 30, 17), - datetime(2014, 7, 1, 16, 30, 15): datetime( - 2014, 7, 1, 15, 30, 15), - datetime(2014, 7, 1, 9, 30, 15): datetime( - 2014, 6, 30, 16, 30, 15), - datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 16), - datetime(2014, 7, 1, 5): datetime(2014, 6, 30, 16), - datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 10), - # out of business hours - datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 16), - datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 16), - datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 16), - datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 16), - # saturday - datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 16), - datetime(2014, 7, 7, 9): datetime(2014, 7, 4, 16), - datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 16, 30), - datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 16, 30, - 30)})) - - tests.append((BusinessHour( - -4), {datetime(2014, 7, 1, 11): datetime(2014, 6, 30, 15), - datetime(2014, 7, 1, 13): datetime(2014, 6, 30, 17), - datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 11), - datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 12), - datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13), - datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15), - datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13), - datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13), - datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 13), - datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13), - datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13), - datetime(2014, 7, 4, 18): datetime(2014, 7, 4, 13), - datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 13, 30), - datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 13, 30, - 30)})) - - tests.append((BusinessHour(start='13:00', end='16:00'), - {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 14), - datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), - datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 13), - datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 14), - datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 14), - datetime(2014, 7, 1, 15, 30, 15): datetime(2014, 7, 2, - 13, 30, 15), - datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 14), - datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 14)})) - - tests.append((BusinessHour(n=2, start='13:00', end='16:00'), { - datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 15), - datetime(2014, 7, 2, 14): datetime(2014, 7, 3, 13), - datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 15), - datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 15), - datetime(2014, 7, 2, 14, 30): datetime(2014, 7, 3, 13, 30), - datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 15), - datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 15), - datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 15), - datetime(2014, 7, 4, 14, 30): datetime(2014, 7, 7, 13, 30), - datetime(2014, 7, 4, 14, 30, 30): datetime(2014, 7, 7, 13, 30, 30) - })) - - tests.append((BusinessHour(n=-1, start='13:00', end='16:00'), - {datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15), - datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 15), - datetime(2014, 7, 2, 14): datetime(2014, 7, 1, 16), - datetime(2014, 7, 2, 15): datetime(2014, 7, 2, 14), - datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 15), - datetime(2014, 7, 2, 16): datetime(2014, 7, 2, 15), - datetime(2014, 7, 2, 13, 30, 15): datetime(2014, 7, 1, - 15, 30, 15), - datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 15), - datetime(2014, 7, 7, 11): datetime(2014, 7, 4, 15)})) - - tests.append((BusinessHour(n=-3, start='10:00', end='16:00'), { - datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13), - datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 11), - datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13), - datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 16), - datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13), - datetime(2014, 7, 2, 11, 30): datetime(2014, 7, 1, 14, 30), - datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13), - datetime(2014, 7, 4, 10): datetime(2014, 7, 3, 13), - datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13), - datetime(2014, 7, 4, 16): datetime(2014, 7, 4, 13), - datetime(2014, 7, 4, 12, 30): datetime(2014, 7, 3, 15, 30), - datetime(2014, 7, 4, 12, 30, 30): datetime(2014, 7, 3, 15, 30, 30) - })) - - tests.append((BusinessHour(start='19:00', end='05:00'), { - datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 20), - datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 20), - datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 20), - datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 20), - datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 20), - datetime(2014, 7, 2, 4, 30): datetime(2014, 7, 2, 19, 30), - datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 1), - datetime(2014, 7, 4, 10): datetime(2014, 7, 4, 20), - datetime(2014, 7, 4, 23): datetime(2014, 7, 5, 0), - datetime(2014, 7, 5, 0): datetime(2014, 7, 5, 1), - datetime(2014, 7, 5, 4): datetime(2014, 7, 7, 19), - datetime(2014, 7, 5, 4, 30): datetime(2014, 7, 7, 19, 30), - datetime(2014, 7, 5, 4, 30, 30): datetime(2014, 7, 7, 19, 30, 30) - })) - - tests.append((BusinessHour(n=-1, start='19:00', end='05:00'), { - datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 4), - datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 4), - datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 4), - datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 4), - datetime(2014, 7, 2, 20): datetime(2014, 7, 2, 5), - datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 4), - datetime(2014, 7, 2, 19, 30): datetime(2014, 7, 2, 4, 30), - datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 23), - datetime(2014, 7, 3, 6): datetime(2014, 7, 3, 4), - datetime(2014, 7, 4, 23): datetime(2014, 7, 4, 22), - datetime(2014, 7, 5, 0): datetime(2014, 7, 4, 23), - datetime(2014, 7, 5, 4): datetime(2014, 7, 5, 3), - datetime(2014, 7, 7, 19, 30): datetime(2014, 7, 5, 4, 30), - datetime(2014, 7, 7, 19, 30, 30): datetime(2014, 7, 5, 4, 30, 30) - })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_large_n(self): - tests = [] - - tests.append( - (BusinessHour(40), # A week later - {datetime(2014, 7, 1, 11): datetime(2014, 7, 8, 11), - datetime(2014, 7, 1, 13): datetime(2014, 7, 8, 13), - datetime(2014, 7, 1, 15): datetime(2014, 7, 8, 15), - datetime(2014, 7, 1, 16): datetime(2014, 7, 8, 16), - datetime(2014, 7, 1, 17): datetime(2014, 7, 9, 9), - datetime(2014, 7, 2, 11): datetime(2014, 7, 9, 11), - datetime(2014, 7, 2, 8): datetime(2014, 7, 9, 9), - datetime(2014, 7, 2, 19): datetime(2014, 7, 10, 9), - datetime(2014, 7, 2, 23): datetime(2014, 7, 10, 9), - datetime(2014, 7, 3, 0): datetime(2014, 7, 10, 9), - datetime(2014, 7, 5, 15): datetime(2014, 7, 14, 9), - datetime(2014, 7, 4, 18): datetime(2014, 7, 14, 9), - datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 14, 9, 30), - datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 14, 9, 30, - 30)})) - - tests.append( - (BusinessHour(-25), # 3 days and 1 hour before - {datetime(2014, 7, 1, 11): datetime(2014, 6, 26, 10), - datetime(2014, 7, 1, 13): datetime(2014, 6, 26, 12), - datetime(2014, 7, 1, 9): datetime(2014, 6, 25, 16), - datetime(2014, 7, 1, 10): datetime(2014, 6, 25, 17), - datetime(2014, 7, 3, 11): datetime(2014, 6, 30, 10), - datetime(2014, 7, 3, 8): datetime(2014, 6, 27, 16), - datetime(2014, 7, 3, 19): datetime(2014, 6, 30, 16), - datetime(2014, 7, 3, 23): datetime(2014, 6, 30, 16), - datetime(2014, 7, 4, 9): datetime(2014, 6, 30, 16), - datetime(2014, 7, 5, 15): datetime(2014, 7, 1, 16), - datetime(2014, 7, 6, 18): datetime(2014, 7, 1, 16), - datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 1, 16, 30), - datetime(2014, 7, 7, 10, 30, 30): datetime(2014, 7, 2, 9, 30, - 30)})) - - # 5 days and 3 hours later - tests.append((BusinessHour(28, start='21:00', end='02:00'), - {datetime(2014, 7, 1, 11): datetime(2014, 7, 9, 0), - datetime(2014, 7, 1, 22): datetime(2014, 7, 9, 1), - datetime(2014, 7, 1, 23): datetime(2014, 7, 9, 21), - datetime(2014, 7, 2, 2): datetime(2014, 7, 10, 0), - datetime(2014, 7, 3, 21): datetime(2014, 7, 11, 0), - datetime(2014, 7, 4, 1): datetime(2014, 7, 11, 23), - datetime(2014, 7, 4, 2): datetime(2014, 7, 12, 0), - datetime(2014, 7, 4, 3): datetime(2014, 7, 12, 0), - datetime(2014, 7, 5, 1): datetime(2014, 7, 14, 23), - datetime(2014, 7, 5, 15): datetime(2014, 7, 15, 0), - datetime(2014, 7, 6, 18): datetime(2014, 7, 15, 0), - datetime(2014, 7, 7, 1): datetime(2014, 7, 15, 0), - datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, - 30)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_nanoseconds(self): - tests = [] - - tests.append((BusinessHour(), - {Timestamp('2014-07-04 15:00') + Nano(5): Timestamp( - '2014-07-04 16:00') + Nano(5), - Timestamp('2014-07-04 16:00') + Nano(5): Timestamp( - '2014-07-07 09:00') + Nano(5), - Timestamp('2014-07-04 16:00') - Nano(5): Timestamp( - '2014-07-04 17:00') - Nano(5)})) - - tests.append((BusinessHour(-1), - {Timestamp('2014-07-04 15:00') + Nano(5): Timestamp( - '2014-07-04 14:00') + Nano(5), - Timestamp('2014-07-04 10:00') + Nano(5): Timestamp( - '2014-07-04 09:00') + Nano(5), - Timestamp('2014-07-04 10:00') - Nano(5): Timestamp( - '2014-07-03 17:00') - Nano(5), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_offsets_compare_equal(self): - # root cause of #456 - offset1 = self._offset() - offset2 = self._offset() - self.assertFalse(offset1 != offset2) - - def test_datetimeindex(self): - idx1 = DatetimeIndex(start='2014-07-04 15:00', end='2014-07-08 10:00', - freq='BH') - idx2 = DatetimeIndex(start='2014-07-04 15:00', periods=12, freq='BH') - idx3 = DatetimeIndex(end='2014-07-08 10:00', periods=12, freq='BH') - expected = DatetimeIndex(['2014-07-04 15:00', '2014-07-04 16:00', - '2014-07-07 09:00', - '2014-07-07 10:00', '2014-07-07 11:00', - '2014-07-07 12:00', - '2014-07-07 13:00', '2014-07-07 14:00', - '2014-07-07 15:00', - '2014-07-07 16:00', '2014-07-08 09:00', - '2014-07-08 10:00'], - freq='BH') - for idx in [idx1, idx2, idx3]: - tm.assert_index_equal(idx, expected) - - idx1 = DatetimeIndex(start='2014-07-04 15:45', end='2014-07-08 10:45', - freq='BH') - idx2 = DatetimeIndex(start='2014-07-04 15:45', periods=12, freq='BH') - idx3 = DatetimeIndex(end='2014-07-08 10:45', periods=12, freq='BH') - - expected = DatetimeIndex(['2014-07-04 15:45', '2014-07-04 16:45', - '2014-07-07 09:45', - '2014-07-07 10:45', '2014-07-07 11:45', - '2014-07-07 12:45', - '2014-07-07 13:45', '2014-07-07 14:45', - '2014-07-07 15:45', - '2014-07-07 16:45', '2014-07-08 09:45', - '2014-07-08 10:45'], - freq='BH') - expected = idx1 - for idx in [idx1, idx2, idx3]: - tm.assert_index_equal(idx, expected) - - -class TestCustomBusinessHour(Base): - _offset = CustomBusinessHour - - def setUp(self): - # 2014 Calendar to check custom holidays - # Sun Mon Tue Wed Thu Fri Sat - # 6/22 23 24 25 26 27 28 - # 29 30 7/1 2 3 4 5 - # 6 7 8 9 10 11 12 - self.d = datetime(2014, 7, 1, 10, 00) - self.offset1 = CustomBusinessHour(weekmask='Tue Wed Thu Fri') - - self.holidays = ['2014-06-27', datetime(2014, 6, 30), - np.datetime64('2014-07-02')] - self.offset2 = CustomBusinessHour(holidays=self.holidays) - - def test_constructor_errors(self): - from datetime import time as dt_time - with tm.assertRaises(ValueError): - CustomBusinessHour(start=dt_time(11, 0, 5)) - with tm.assertRaises(ValueError): - CustomBusinessHour(start='AAA') - with tm.assertRaises(ValueError): - CustomBusinessHour(start='14:00:05') - - def test_different_normalize_equals(self): - # equivalent in this special case - offset = self._offset() - offset2 = self._offset() - offset2.normalize = True - self.assertEqual(offset, offset2) - - def test_repr(self): - self.assertEqual(repr(self.offset1), - '') - self.assertEqual(repr(self.offset2), - '') - - def test_with_offset(self): - expected = Timestamp('2014-07-01 13:00') - - self.assertEqual(self.d + CustomBusinessHour() * 3, expected) - self.assertEqual(self.d + CustomBusinessHour(n=3), expected) - - def testEQ(self): - for offset in [self.offset1, self.offset2]: - self.assertEqual(offset, offset) - - self.assertNotEqual(CustomBusinessHour(), CustomBusinessHour(-1)) - self.assertEqual(CustomBusinessHour(start='09:00'), - CustomBusinessHour()) - self.assertNotEqual(CustomBusinessHour(start='09:00'), - CustomBusinessHour(start='09:01')) - self.assertNotEqual(CustomBusinessHour(start='09:00', end='17:00'), - CustomBusinessHour(start='17:00', end='09:01')) - - self.assertNotEqual(CustomBusinessHour(weekmask='Tue Wed Thu Fri'), - CustomBusinessHour(weekmask='Mon Tue Wed Thu Fri')) - self.assertNotEqual(CustomBusinessHour(holidays=['2014-06-27']), - CustomBusinessHour(holidays=['2014-06-28'])) - - def test_hash(self): - self.assertEqual(hash(self.offset1), hash(self.offset1)) - self.assertEqual(hash(self.offset2), hash(self.offset2)) - - def testCall(self): - self.assertEqual(self.offset1(self.d), datetime(2014, 7, 1, 11)) - self.assertEqual(self.offset2(self.d), datetime(2014, 7, 1, 11)) - - def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) - - def testSub(self): - off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) - - self.assertEqual(self.d - self.offset2, self.d - (2 * off - off)) - - def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) - - def testMult1(self): - self.assertEqual(self.d + 5 * self.offset1, self.d + self._offset(5)) - - def testMult2(self): - self.assertEqual(self.d + (-3 * self._offset(-2)), - self.d + self._offset(6)) - - def testRollback1(self): - self.assertEqual(self.offset1.rollback(self.d), self.d) - self.assertEqual(self.offset2.rollback(self.d), self.d) - - d = datetime(2014, 7, 1, 0) - # 2014/07/01 is Tuesday, 06/30 is Monday(holiday) - self.assertEqual(self.offset1.rollback(d), datetime(2014, 6, 27, 17)) - - # 2014/6/30 and 2014/6/27 are holidays - self.assertEqual(self.offset2.rollback(d), datetime(2014, 6, 26, 17)) - - def testRollback2(self): - self.assertEqual(self._offset(-3) - .rollback(datetime(2014, 7, 5, 15, 0)), - datetime(2014, 7, 4, 17, 0)) - - def testRollforward1(self): - self.assertEqual(self.offset1.rollforward(self.d), self.d) - self.assertEqual(self.offset2.rollforward(self.d), self.d) - - d = datetime(2014, 7, 1, 0) - self.assertEqual(self.offset1.rollforward(d), datetime(2014, 7, 1, 9)) - self.assertEqual(self.offset2.rollforward(d), datetime(2014, 7, 1, 9)) - - def testRollforward2(self): - self.assertEqual(self._offset(-3) - .rollforward(datetime(2014, 7, 5, 16, 0)), - datetime(2014, 7, 7, 9)) - - def test_roll_date_object(self): - offset = BusinessHour() - - dt = datetime(2014, 7, 6, 15, 0) - - result = offset.rollback(dt) - self.assertEqual(result, datetime(2014, 7, 4, 17)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2014, 7, 7, 9)) - - def test_normalize(self): - tests = [] - - tests.append((CustomBusinessHour(normalize=True, - holidays=self.holidays), - {datetime(2014, 7, 1, 8): datetime(2014, 7, 1), - datetime(2014, 7, 1, 17): datetime(2014, 7, 3), - datetime(2014, 7, 1, 16): datetime(2014, 7, 3), - datetime(2014, 7, 1, 23): datetime(2014, 7, 3), - datetime(2014, 7, 1, 0): datetime(2014, 7, 1), - datetime(2014, 7, 4, 15): datetime(2014, 7, 4), - datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4), - datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7), - datetime(2014, 7, 5, 23): datetime(2014, 7, 7), - datetime(2014, 7, 6, 10): datetime(2014, 7, 7)})) - - tests.append((CustomBusinessHour(-1, normalize=True, - holidays=self.holidays), - {datetime(2014, 7, 1, 8): datetime(2014, 6, 26), - datetime(2014, 7, 1, 17): datetime(2014, 7, 1), - datetime(2014, 7, 1, 16): datetime(2014, 7, 1), - datetime(2014, 7, 1, 10): datetime(2014, 6, 26), - datetime(2014, 7, 1, 0): datetime(2014, 6, 26), - datetime(2014, 7, 7, 10): datetime(2014, 7, 4), - datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7), - datetime(2014, 7, 5, 23): datetime(2014, 7, 4), - datetime(2014, 7, 6, 10): datetime(2014, 7, 4)})) - - tests.append((CustomBusinessHour(1, normalize=True, start='17:00', - end='04:00', holidays=self.holidays), - {datetime(2014, 7, 1, 8): datetime(2014, 7, 1), - datetime(2014, 7, 1, 17): datetime(2014, 7, 1), - datetime(2014, 7, 1, 23): datetime(2014, 7, 2), - datetime(2014, 7, 2, 2): datetime(2014, 7, 2), - datetime(2014, 7, 2, 3): datetime(2014, 7, 3), - datetime(2014, 7, 4, 23): datetime(2014, 7, 5), - datetime(2014, 7, 5, 2): datetime(2014, 7, 5), - datetime(2014, 7, 7, 2): datetime(2014, 7, 7), - datetime(2014, 7, 7, 17): datetime(2014, 7, 7)})) - - for offset, cases in tests: - for dt, expected in compat.iteritems(cases): - self.assertEqual(offset.apply(dt), expected) - - def test_onOffset(self): - tests = [] - - tests.append((CustomBusinessHour(start='10:00', end='15:00', - holidays=self.holidays), - {datetime(2014, 7, 1, 9): False, - datetime(2014, 7, 1, 10): True, - datetime(2014, 7, 1, 15): True, - datetime(2014, 7, 1, 15, 1): False, - datetime(2014, 7, 5, 12): False, - datetime(2014, 7, 6, 12): False})) - - for offset, cases in tests: - for dt, expected in compat.iteritems(cases): - self.assertEqual(offset.onOffset(dt), expected) - - def test_apply(self): - tests = [] - - tests.append(( - CustomBusinessHour(holidays=self.holidays), - {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12), - datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), - datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), - datetime(2014, 7, 1, 19): datetime(2014, 7, 3, 10), - datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 9), - datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 3, 9, 30, 15), - datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 10), - datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 10), - # out of business hours - datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 10), - datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), - datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), - datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), - # saturday - datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), - datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10), - datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30), - datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, - 30)})) - - tests.append(( - CustomBusinessHour(4, holidays=self.holidays), - {datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15), - datetime(2014, 7, 1, 13): datetime(2014, 7, 3, 9), - datetime(2014, 7, 1, 15): datetime(2014, 7, 3, 11), - datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 12), - datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 13), - datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 13), - datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 13), - datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13), - datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13), - datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13), - datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13), - datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13), - datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30), - datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, - 30)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_nanoseconds(self): - tests = [] - - tests.append((CustomBusinessHour(holidays=self.holidays), - {Timestamp('2014-07-01 15:00') + Nano(5): Timestamp( - '2014-07-01 16:00') + Nano(5), - Timestamp('2014-07-01 16:00') + Nano(5): Timestamp( - '2014-07-03 09:00') + Nano(5), - Timestamp('2014-07-01 16:00') - Nano(5): Timestamp( - '2014-07-01 17:00') - Nano(5)})) - - tests.append((CustomBusinessHour(-1, holidays=self.holidays), - {Timestamp('2014-07-01 15:00') + Nano(5): Timestamp( - '2014-07-01 14:00') + Nano(5), - Timestamp('2014-07-01 10:00') + Nano(5): Timestamp( - '2014-07-01 09:00') + Nano(5), - Timestamp('2014-07-01 10:00') - Nano(5): Timestamp( - '2014-06-26 17:00') - Nano(5), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - -class TestCustomBusinessDay(Base): - _offset = CDay - - def setUp(self): - self.d = datetime(2008, 1, 1) - self.nd = np_datetime64_compat('2008-01-01 00:00:00Z') - - self.offset = CDay() - self.offset2 = CDay(2) - - def test_different_normalize_equals(self): - # equivalent in this special case - offset = CDay() - offset2 = CDay() - offset2.normalize = True - self.assertEqual(offset, offset2) - - def test_repr(self): - assert repr(self.offset) == '' - assert repr(self.offset2) == '<2 * CustomBusinessDays>' - - expected = '' - assert repr(self.offset + timedelta(1)) == expected - - def test_with_offset(self): - offset = self.offset + timedelta(hours=2) - - assert (self.d + offset) == datetime(2008, 1, 2, 2) - - def testEQ(self): - self.assertEqual(self.offset2, self.offset2) - - def test_mul(self): - pass - - def test_hash(self): - self.assertEqual(hash(self.offset2), hash(self.offset2)) - - def testCall(self): - self.assertEqual(self.offset2(self.d), datetime(2008, 1, 3)) - self.assertEqual(self.offset2(self.nd), datetime(2008, 1, 3)) - - def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) - - def testSub(self): - off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) - - self.assertEqual(self.d - self.offset2, self.d + CDay(-2)) - - def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) - - def testMult1(self): - self.assertEqual(self.d + 10 * self.offset, self.d + CDay(10)) - - def testMult2(self): - self.assertEqual(self.d + (-5 * CDay(-10)), self.d + CDay(50)) - - def testRollback1(self): - self.assertEqual(CDay(10).rollback(self.d), self.d) - - def testRollback2(self): - self.assertEqual( - CDay(10).rollback(datetime(2008, 1, 5)), datetime(2008, 1, 4)) - - def testRollforward1(self): - self.assertEqual(CDay(10).rollforward(self.d), self.d) - - def testRollforward2(self): - self.assertEqual( - CDay(10).rollforward(datetime(2008, 1, 5)), datetime(2008, 1, 7)) - - def test_roll_date_object(self): - offset = CDay() - - dt = date(2012, 9, 15) - - result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 14)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 17)) - - offset = offsets.Day() - result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 15)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 15)) - - def test_onOffset(self): - tests = [(CDay(), datetime(2008, 1, 1), True), - (CDay(), datetime(2008, 1, 5), False)] - - for offset, d, expected in tests: - assertOnOffset(offset, d, expected) - - def test_apply(self): - tests = [] - - tests.append((CDay(), {datetime(2008, 1, 1): datetime(2008, 1, 2), - datetime(2008, 1, 4): datetime(2008, 1, 7), - datetime(2008, 1, 5): datetime(2008, 1, 7), - datetime(2008, 1, 6): datetime(2008, 1, 7), - datetime(2008, 1, 7): datetime(2008, 1, 8)})) - - tests.append((2 * CDay(), { - datetime(2008, 1, 1): datetime(2008, 1, 3), - datetime(2008, 1, 4): datetime(2008, 1, 8), - datetime(2008, 1, 5): datetime(2008, 1, 8), - datetime(2008, 1, 6): datetime(2008, 1, 8), - datetime(2008, 1, 7): datetime(2008, 1, 9)} - )) - - tests.append((-CDay(), { - datetime(2008, 1, 1): datetime(2007, 12, 31), - datetime(2008, 1, 4): datetime(2008, 1, 3), - datetime(2008, 1, 5): datetime(2008, 1, 4), - datetime(2008, 1, 6): datetime(2008, 1, 4), - datetime(2008, 1, 7): datetime(2008, 1, 4), - datetime(2008, 1, 8): datetime(2008, 1, 7)} - )) - - tests.append((-2 * CDay(), { - datetime(2008, 1, 1): datetime(2007, 12, 28), - datetime(2008, 1, 4): datetime(2008, 1, 2), - datetime(2008, 1, 5): datetime(2008, 1, 3), - datetime(2008, 1, 6): datetime(2008, 1, 3), - datetime(2008, 1, 7): datetime(2008, 1, 3), - datetime(2008, 1, 8): datetime(2008, 1, 4), - datetime(2008, 1, 9): datetime(2008, 1, 7)} - )) - - tests.append((CDay(0), {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 1, 4): datetime(2008, 1, 4), - datetime(2008, 1, 5): datetime(2008, 1, 7), - datetime(2008, 1, 6): datetime(2008, 1, 7), - datetime(2008, 1, 7): datetime(2008, 1, 7)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_large_n(self): - dt = datetime(2012, 10, 23) - - result = dt + CDay(10) - self.assertEqual(result, datetime(2012, 11, 6)) - - result = dt + CDay(100) - CDay(100) - self.assertEqual(result, dt) - - off = CDay() * 6 - rs = datetime(2012, 1, 1) - off - xp = datetime(2011, 12, 23) - self.assertEqual(rs, xp) - - st = datetime(2011, 12, 18) - rs = st + off - xp = datetime(2011, 12, 26) - self.assertEqual(rs, xp) - - def test_apply_corner(self): - self.assertRaises(Exception, CDay().apply, BMonthEnd()) - - def test_offsets_compare_equal(self): - # root cause of #456 - offset1 = CDay() - offset2 = CDay() - self.assertFalse(offset1 != offset2) - - def test_holidays(self): - # Define a TradingDay offset - holidays = ['2012-05-01', datetime(2013, 5, 1), - np.datetime64('2014-05-01')] - tday = CDay(holidays=holidays) - for year in range(2012, 2015): - dt = datetime(year, 4, 30) - xp = datetime(year, 5, 2) - rs = dt + tday - self.assertEqual(rs, xp) - - def test_weekmask(self): - weekmask_saudi = 'Sat Sun Mon Tue Wed' # Thu-Fri Weekend - weekmask_uae = '1111001' # Fri-Sat Weekend - weekmask_egypt = [1, 1, 1, 1, 0, 0, 1] # Fri-Sat Weekend - bday_saudi = CDay(weekmask=weekmask_saudi) - bday_uae = CDay(weekmask=weekmask_uae) - bday_egypt = CDay(weekmask=weekmask_egypt) - dt = datetime(2013, 5, 1) - xp_saudi = datetime(2013, 5, 4) - xp_uae = datetime(2013, 5, 2) - xp_egypt = datetime(2013, 5, 2) - self.assertEqual(xp_saudi, dt + bday_saudi) - self.assertEqual(xp_uae, dt + bday_uae) - self.assertEqual(xp_egypt, dt + bday_egypt) - xp2 = datetime(2013, 5, 5) - self.assertEqual(xp2, dt + 2 * bday_saudi) - self.assertEqual(xp2, dt + 2 * bday_uae) - self.assertEqual(xp2, dt + 2 * bday_egypt) - - def test_weekmask_and_holidays(self): - weekmask_egypt = 'Sun Mon Tue Wed Thu' # Fri-Sat Weekend - holidays = ['2012-05-01', datetime(2013, 5, 1), - np.datetime64('2014-05-01')] - bday_egypt = CDay(holidays=holidays, weekmask=weekmask_egypt) - dt = datetime(2013, 4, 30) - xp_egypt = datetime(2013, 5, 5) - self.assertEqual(xp_egypt, dt + 2 * bday_egypt) - - def test_calendar(self): - calendar = USFederalHolidayCalendar() - dt = datetime(2014, 1, 17) - assertEq(CDay(calendar=calendar), dt, datetime(2014, 1, 21)) - - def test_roundtrip_pickle(self): - def _check_roundtrip(obj): - unpickled = self.round_trip_pickle(obj) - self.assertEqual(unpickled, obj) - - _check_roundtrip(self.offset) - _check_roundtrip(self.offset2) - _check_roundtrip(self.offset * 2) - - def test_pickle_compat_0_14_1(self): - hdays = [datetime(2013, 1, 1) for ele in range(4)] - - pth = tm.get_data_path() - - cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle')) - cday = CDay(holidays=hdays) - self.assertEqual(cday, cday0_14_1) - - -class CustomBusinessMonthBase(object): - - def setUp(self): - self.d = datetime(2008, 1, 1) - - self.offset = self._object() - self.offset2 = self._object(2) - - def testEQ(self): - self.assertEqual(self.offset2, self.offset2) - - def test_mul(self): - pass - - def test_hash(self): - self.assertEqual(hash(self.offset2), hash(self.offset2)) - - def testRAdd(self): - self.assertEqual(self.d + self.offset2, self.offset2 + self.d) - - def testSub(self): - off = self.offset2 - self.assertRaises(Exception, off.__sub__, self.d) - self.assertEqual(2 * off - off, off) - - self.assertEqual(self.d - self.offset2, self.d + self._object(-2)) - - def testRSub(self): - self.assertEqual(self.d - self.offset2, (-self.offset2).apply(self.d)) - - def testMult1(self): - self.assertEqual(self.d + 10 * self.offset, self.d + self._object(10)) - - def testMult2(self): - self.assertEqual(self.d + (-5 * self._object(-10)), - self.d + self._object(50)) - - def test_offsets_compare_equal(self): - offset1 = self._object() - offset2 = self._object() - self.assertFalse(offset1 != offset2) - - def test_roundtrip_pickle(self): - def _check_roundtrip(obj): - unpickled = self.round_trip_pickle(obj) - self.assertEqual(unpickled, obj) - - _check_roundtrip(self._object()) - _check_roundtrip(self._object(2)) - _check_roundtrip(self._object() * 2) - - -class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base): - _object = CBMonthEnd - - def test_different_normalize_equals(self): - # equivalent in this special case - offset = CBMonthEnd() - offset2 = CBMonthEnd() - offset2.normalize = True - self.assertEqual(offset, offset2) - - def test_repr(self): - assert repr(self.offset) == '' - assert repr(self.offset2) == '<2 * CustomBusinessMonthEnds>' - - def testCall(self): - self.assertEqual(self.offset2(self.d), datetime(2008, 2, 29)) - - def testRollback1(self): - self.assertEqual( - CDay(10).rollback(datetime(2007, 12, 31)), datetime(2007, 12, 31)) - - def testRollback2(self): - self.assertEqual(CBMonthEnd(10).rollback(self.d), - datetime(2007, 12, 31)) - - def testRollforward1(self): - self.assertEqual(CBMonthEnd(10).rollforward( - self.d), datetime(2008, 1, 31)) - - def test_roll_date_object(self): - offset = CBMonthEnd() - - dt = date(2012, 9, 15) - - result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 8, 31)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 28)) - - offset = offsets.Day() - result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 15)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 15)) - - def test_onOffset(self): - tests = [(CBMonthEnd(), datetime(2008, 1, 31), True), - (CBMonthEnd(), datetime(2008, 1, 1), False)] - - for offset, d, expected in tests: - assertOnOffset(offset, d, expected) - - def test_apply(self): - cbm = CBMonthEnd() - tests = [] - - tests.append((cbm, {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 2, 7): datetime(2008, 2, 29)})) - - tests.append((2 * cbm, {datetime(2008, 1, 1): datetime(2008, 2, 29), - datetime(2008, 2, 7): datetime(2008, 3, 31)})) - - tests.append((-cbm, {datetime(2008, 1, 1): datetime(2007, 12, 31), - datetime(2008, 2, 8): datetime(2008, 1, 31)})) - - tests.append((-2 * cbm, {datetime(2008, 1, 1): datetime(2007, 11, 30), - datetime(2008, 2, 9): datetime(2007, 12, 31)} - )) - - tests.append((CBMonthEnd(0), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 2, 7): datetime(2008, 2, 29)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_large_n(self): - dt = datetime(2012, 10, 23) - - result = dt + CBMonthEnd(10) - self.assertEqual(result, datetime(2013, 7, 31)) - - result = dt + CDay(100) - CDay(100) - self.assertEqual(result, dt) - - off = CBMonthEnd() * 6 - rs = datetime(2012, 1, 1) - off - xp = datetime(2011, 7, 29) - self.assertEqual(rs, xp) - - st = datetime(2011, 12, 18) - rs = st + off - xp = datetime(2012, 5, 31) - self.assertEqual(rs, xp) - - def test_holidays(self): - # Define a TradingDay offset - holidays = ['2012-01-31', datetime(2012, 2, 28), - np.datetime64('2012-02-29')] - bm_offset = CBMonthEnd(holidays=holidays) - dt = datetime(2012, 1, 1) - self.assertEqual(dt + bm_offset, datetime(2012, 1, 30)) - self.assertEqual(dt + 2 * bm_offset, datetime(2012, 2, 27)) - - def test_datetimeindex(self): - from pandas.tseries.holiday import USFederalHolidayCalendar - hcal = USFederalHolidayCalendar() - freq = CBMonthEnd(calendar=hcal) - - self.assertEqual(DatetimeIndex(start='20120101', end='20130101', - freq=freq).tolist()[0], - datetime(2012, 1, 31)) - - -class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base): - _object = CBMonthBegin - - def test_different_normalize_equals(self): - # equivalent in this special case - offset = CBMonthBegin() - offset2 = CBMonthBegin() - offset2.normalize = True - self.assertEqual(offset, offset2) - - def test_repr(self): - assert repr(self.offset) == '' - assert repr(self.offset2) == '<2 * CustomBusinessMonthBegins>' - - def testCall(self): - self.assertEqual(self.offset2(self.d), datetime(2008, 3, 3)) - - def testRollback1(self): - self.assertEqual( - CDay(10).rollback(datetime(2007, 12, 31)), datetime(2007, 12, 31)) - - def testRollback2(self): - self.assertEqual(CBMonthBegin(10).rollback(self.d), - datetime(2008, 1, 1)) - - def testRollforward1(self): - self.assertEqual(CBMonthBegin(10).rollforward( - self.d), datetime(2008, 1, 1)) - - def test_roll_date_object(self): - offset = CBMonthBegin() - - dt = date(2012, 9, 15) - - result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 3)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 10, 1)) - - offset = offsets.Day() - result = offset.rollback(dt) - self.assertEqual(result, datetime(2012, 9, 15)) - - result = offset.rollforward(dt) - self.assertEqual(result, datetime(2012, 9, 15)) - - def test_onOffset(self): - tests = [(CBMonthBegin(), datetime(2008, 1, 1), True), - (CBMonthBegin(), datetime(2008, 1, 31), False)] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - def test_apply(self): - cbm = CBMonthBegin() - tests = [] - - tests.append((cbm, {datetime(2008, 1, 1): datetime(2008, 2, 1), - datetime(2008, 2, 7): datetime(2008, 3, 3)})) - - tests.append((2 * cbm, {datetime(2008, 1, 1): datetime(2008, 3, 3), - datetime(2008, 2, 7): datetime(2008, 4, 1)})) - - tests.append((-cbm, {datetime(2008, 1, 1): datetime(2007, 12, 3), - datetime(2008, 2, 8): datetime(2008, 2, 1)})) - - tests.append((-2 * cbm, {datetime(2008, 1, 1): datetime(2007, 11, 1), - datetime(2008, 2, 9): datetime(2008, 1, 1)})) - - tests.append((CBMonthBegin(0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 1, 7): datetime(2008, 2, 1)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_large_n(self): - dt = datetime(2012, 10, 23) - - result = dt + CBMonthBegin(10) - self.assertEqual(result, datetime(2013, 8, 1)) - - result = dt + CDay(100) - CDay(100) - self.assertEqual(result, dt) - - off = CBMonthBegin() * 6 - rs = datetime(2012, 1, 1) - off - xp = datetime(2011, 7, 1) - self.assertEqual(rs, xp) - - st = datetime(2011, 12, 18) - rs = st + off - xp = datetime(2012, 6, 1) - self.assertEqual(rs, xp) - - def test_holidays(self): - # Define a TradingDay offset - holidays = ['2012-02-01', datetime(2012, 2, 2), - np.datetime64('2012-03-01')] - bm_offset = CBMonthBegin(holidays=holidays) - dt = datetime(2012, 1, 1) - self.assertEqual(dt + bm_offset, datetime(2012, 1, 2)) - self.assertEqual(dt + 2 * bm_offset, datetime(2012, 2, 3)) - - def test_datetimeindex(self): - hcal = USFederalHolidayCalendar() - cbmb = CBMonthBegin(calendar=hcal) - self.assertEqual(DatetimeIndex(start='20120101', end='20130101', - freq=cbmb).tolist()[0], - datetime(2012, 1, 3)) - - -def assertOnOffset(offset, date, expected): - actual = offset.onOffset(date) - assert actual == expected, ("\nExpected: %s\nActual: %s\nFor Offset: %s)" - "\nAt Date: %s" % - (expected, actual, offset, date)) - - -class TestWeek(Base): - _offset = Week - - def test_repr(self): - self.assertEqual(repr(Week(weekday=0)), "") - self.assertEqual(repr(Week(n=-1, weekday=0)), "<-1 * Week: weekday=0>") - self.assertEqual(repr(Week(n=-2, weekday=0)), - "<-2 * Weeks: weekday=0>") - - def test_corner(self): - self.assertRaises(ValueError, Week, weekday=7) - assertRaisesRegexp(ValueError, "Day must be", Week, weekday=-1) - - def test_isAnchored(self): - self.assertTrue(Week(weekday=0).isAnchored()) - self.assertFalse(Week().isAnchored()) - self.assertFalse(Week(2, weekday=2).isAnchored()) - self.assertFalse(Week(2).isAnchored()) - - def test_offset(self): - tests = [] - - tests.append((Week(), # not business week - {datetime(2008, 1, 1): datetime(2008, 1, 8), - datetime(2008, 1, 4): datetime(2008, 1, 11), - datetime(2008, 1, 5): datetime(2008, 1, 12), - datetime(2008, 1, 6): datetime(2008, 1, 13), - datetime(2008, 1, 7): datetime(2008, 1, 14)})) - - tests.append((Week(weekday=0), # Mon - {datetime(2007, 12, 31): datetime(2008, 1, 7), - datetime(2008, 1, 4): datetime(2008, 1, 7), - datetime(2008, 1, 5): datetime(2008, 1, 7), - datetime(2008, 1, 6): datetime(2008, 1, 7), - datetime(2008, 1, 7): datetime(2008, 1, 14)})) - - tests.append((Week(0, weekday=0), # n=0 -> roll forward. Mon - {datetime(2007, 12, 31): datetime(2007, 12, 31), - datetime(2008, 1, 4): datetime(2008, 1, 7), - datetime(2008, 1, 5): datetime(2008, 1, 7), - datetime(2008, 1, 6): datetime(2008, 1, 7), - datetime(2008, 1, 7): datetime(2008, 1, 7)})) - - tests.append((Week(-2, weekday=1), # n=0 -> roll forward. Mon - {datetime(2010, 4, 6): datetime(2010, 3, 23), - datetime(2010, 4, 8): datetime(2010, 3, 30), - datetime(2010, 4, 5): datetime(2010, 3, 23)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - for weekday in range(7): - offset = Week(weekday=weekday) - - for day in range(1, 8): - date = datetime(2008, 1, day) - - if day % 7 == weekday: - expected = True - else: - expected = False - assertOnOffset(offset, date, expected) - - def test_offsets_compare_equal(self): - # root cause of #456 - offset1 = Week() - offset2 = Week() - self.assertFalse(offset1 != offset2) - - -class TestWeekOfMonth(Base): - _offset = WeekOfMonth - - def test_constructor(self): - assertRaisesRegexp(ValueError, "^N cannot be 0", WeekOfMonth, n=0, - week=1, weekday=1) - assertRaisesRegexp(ValueError, "^Week", WeekOfMonth, n=1, week=4, - weekday=0) - assertRaisesRegexp(ValueError, "^Week", WeekOfMonth, n=1, week=-1, - weekday=0) - assertRaisesRegexp(ValueError, "^Day", WeekOfMonth, n=1, week=0, - weekday=-1) - assertRaisesRegexp(ValueError, "^Day", WeekOfMonth, n=1, week=0, - weekday=7) - - def test_repr(self): - self.assertEqual(repr(WeekOfMonth(weekday=1, week=2)), - "") - - def test_offset(self): - date1 = datetime(2011, 1, 4) # 1st Tuesday of Month - date2 = datetime(2011, 1, 11) # 2nd Tuesday of Month - date3 = datetime(2011, 1, 18) # 3rd Tuesday of Month - date4 = datetime(2011, 1, 25) # 4th Tuesday of Month - - # see for loop for structure - test_cases = [ - (-2, 2, 1, date1, datetime(2010, 11, 16)), - (-2, 2, 1, date2, datetime(2010, 11, 16)), - (-2, 2, 1, date3, datetime(2010, 11, 16)), - (-2, 2, 1, date4, datetime(2010, 12, 21)), - - (-1, 2, 1, date1, datetime(2010, 12, 21)), - (-1, 2, 1, date2, datetime(2010, 12, 21)), - (-1, 2, 1, date3, datetime(2010, 12, 21)), - (-1, 2, 1, date4, datetime(2011, 1, 18)), - - (1, 0, 0, date1, datetime(2011, 2, 7)), - (1, 0, 0, date2, datetime(2011, 2, 7)), - (1, 0, 0, date3, datetime(2011, 2, 7)), - (1, 0, 0, date4, datetime(2011, 2, 7)), - (1, 0, 1, date1, datetime(2011, 2, 1)), - (1, 0, 1, date2, datetime(2011, 2, 1)), - (1, 0, 1, date3, datetime(2011, 2, 1)), - (1, 0, 1, date4, datetime(2011, 2, 1)), - (1, 0, 2, date1, datetime(2011, 1, 5)), - (1, 0, 2, date2, datetime(2011, 2, 2)), - (1, 0, 2, date3, datetime(2011, 2, 2)), - (1, 0, 2, date4, datetime(2011, 2, 2)), - - (1, 2, 1, date1, datetime(2011, 1, 18)), - (1, 2, 1, date2, datetime(2011, 1, 18)), - (1, 2, 1, date3, datetime(2011, 2, 15)), - (1, 2, 1, date4, datetime(2011, 2, 15)), - - (2, 2, 1, date1, datetime(2011, 2, 15)), - (2, 2, 1, date2, datetime(2011, 2, 15)), - (2, 2, 1, date3, datetime(2011, 3, 15)), - (2, 2, 1, date4, datetime(2011, 3, 15)), - ] - - for n, week, weekday, dt, expected in test_cases: - offset = WeekOfMonth(n, week=week, weekday=weekday) - assertEq(offset, dt, expected) - - # try subtracting - result = datetime(2011, 2, 1) - WeekOfMonth(week=1, weekday=2) - self.assertEqual(result, datetime(2011, 1, 12)) - result = datetime(2011, 2, 3) - WeekOfMonth(week=0, weekday=2) - self.assertEqual(result, datetime(2011, 2, 2)) - - def test_onOffset(self): - test_cases = [ - (0, 0, datetime(2011, 2, 7), True), - (0, 0, datetime(2011, 2, 6), False), - (0, 0, datetime(2011, 2, 14), False), - (1, 0, datetime(2011, 2, 14), True), - (0, 1, datetime(2011, 2, 1), True), - (0, 1, datetime(2011, 2, 8), False), - ] - - for week, weekday, dt, expected in test_cases: - offset = WeekOfMonth(week=week, weekday=weekday) - self.assertEqual(offset.onOffset(dt), expected) - - -class TestLastWeekOfMonth(Base): - _offset = LastWeekOfMonth - - def test_constructor(self): - assertRaisesRegexp(ValueError, "^N cannot be 0", LastWeekOfMonth, n=0, - weekday=1) - - assertRaisesRegexp(ValueError, "^Day", LastWeekOfMonth, n=1, - weekday=-1) - assertRaisesRegexp(ValueError, "^Day", LastWeekOfMonth, n=1, weekday=7) - - def test_offset(self): - # Saturday - last_sat = datetime(2013, 8, 31) - next_sat = datetime(2013, 9, 28) - offset_sat = LastWeekOfMonth(n=1, weekday=5) - - one_day_before = (last_sat + timedelta(days=-1)) - self.assertEqual(one_day_before + offset_sat, last_sat) - - one_day_after = (last_sat + timedelta(days=+1)) - self.assertEqual(one_day_after + offset_sat, next_sat) - - # Test On that day - self.assertEqual(last_sat + offset_sat, next_sat) - - # Thursday - - offset_thur = LastWeekOfMonth(n=1, weekday=3) - last_thurs = datetime(2013, 1, 31) - next_thurs = datetime(2013, 2, 28) - - one_day_before = last_thurs + timedelta(days=-1) - self.assertEqual(one_day_before + offset_thur, last_thurs) - - one_day_after = last_thurs + timedelta(days=+1) - self.assertEqual(one_day_after + offset_thur, next_thurs) - - # Test on that day - self.assertEqual(last_thurs + offset_thur, next_thurs) - - three_before = last_thurs + timedelta(days=-3) - self.assertEqual(three_before + offset_thur, last_thurs) - - two_after = last_thurs + timedelta(days=+2) - self.assertEqual(two_after + offset_thur, next_thurs) - - offset_sunday = LastWeekOfMonth(n=1, weekday=WeekDay.SUN) - self.assertEqual(datetime(2013, 7, 31) + - offset_sunday, datetime(2013, 8, 25)) - - def test_onOffset(self): - test_cases = [ - (WeekDay.SUN, datetime(2013, 1, 27), True), - (WeekDay.SAT, datetime(2013, 3, 30), True), - (WeekDay.MON, datetime(2013, 2, 18), False), # Not the last Mon - (WeekDay.SUN, datetime(2013, 2, 25), False), # Not a SUN - (WeekDay.MON, datetime(2013, 2, 25), True), - (WeekDay.SAT, datetime(2013, 11, 30), True), - - (WeekDay.SAT, datetime(2006, 8, 26), True), - (WeekDay.SAT, datetime(2007, 8, 25), True), - (WeekDay.SAT, datetime(2008, 8, 30), True), - (WeekDay.SAT, datetime(2009, 8, 29), True), - (WeekDay.SAT, datetime(2010, 8, 28), True), - (WeekDay.SAT, datetime(2011, 8, 27), True), - (WeekDay.SAT, datetime(2019, 8, 31), True), - ] - - for weekday, dt, expected in test_cases: - offset = LastWeekOfMonth(weekday=weekday) - self.assertEqual(offset.onOffset(dt), expected, msg=date) - - -class TestBMonthBegin(Base): - _offset = BMonthBegin - - def test_offset(self): - tests = [] - - tests.append((BMonthBegin(), - {datetime(2008, 1, 1): datetime(2008, 2, 1), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2006, 12, 29): datetime(2007, 1, 1), - datetime(2006, 12, 31): datetime(2007, 1, 1), - datetime(2006, 9, 1): datetime(2006, 10, 2), - datetime(2007, 1, 1): datetime(2007, 2, 1), - datetime(2006, 12, 1): datetime(2007, 1, 1)})) - - tests.append((BMonthBegin(0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2006, 10, 2): datetime(2006, 10, 2), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2006, 12, 29): datetime(2007, 1, 1), - datetime(2006, 12, 31): datetime(2007, 1, 1), - datetime(2006, 9, 15): datetime(2006, 10, 2)})) - - tests.append((BMonthBegin(2), - {datetime(2008, 1, 1): datetime(2008, 3, 3), - datetime(2008, 1, 15): datetime(2008, 3, 3), - datetime(2006, 12, 29): datetime(2007, 2, 1), - datetime(2006, 12, 31): datetime(2007, 2, 1), - datetime(2007, 1, 1): datetime(2007, 3, 1), - datetime(2006, 11, 1): datetime(2007, 1, 1)})) - - tests.append((BMonthBegin(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 1), - datetime(2008, 6, 30): datetime(2008, 6, 2), - datetime(2008, 6, 1): datetime(2008, 5, 1), - datetime(2008, 3, 10): datetime(2008, 3, 3), - datetime(2008, 12, 31): datetime(2008, 12, 1), - datetime(2006, 12, 29): datetime(2006, 12, 1), - datetime(2006, 12, 30): datetime(2006, 12, 1), - datetime(2007, 1, 1): datetime(2006, 12, 1)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [(BMonthBegin(), datetime(2007, 12, 31), False), - (BMonthBegin(), datetime(2008, 1, 1), True), - (BMonthBegin(), datetime(2001, 4, 2), True), - (BMonthBegin(), datetime(2008, 3, 3), True)] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - def test_offsets_compare_equal(self): - # root cause of #456 - offset1 = BMonthBegin() - offset2 = BMonthBegin() - self.assertFalse(offset1 != offset2) - - -class TestBMonthEnd(Base): - _offset = BMonthEnd - - def test_offset(self): - tests = [] - - tests.append((BMonthEnd(), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 2, 29), - datetime(2006, 12, 29): datetime(2007, 1, 31), - datetime(2006, 12, 31): datetime(2007, 1, 31), - datetime(2007, 1, 1): datetime(2007, 1, 31), - datetime(2006, 12, 1): datetime(2006, 12, 29)})) - - tests.append((BMonthEnd(0), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 1, 31), - datetime(2006, 12, 29): datetime(2006, 12, 29), - datetime(2006, 12, 31): datetime(2007, 1, 31), - datetime(2007, 1, 1): datetime(2007, 1, 31)})) - - tests.append((BMonthEnd(2), - {datetime(2008, 1, 1): datetime(2008, 2, 29), - datetime(2008, 1, 31): datetime(2008, 3, 31), - datetime(2006, 12, 29): datetime(2007, 2, 28), - datetime(2006, 12, 31): datetime(2007, 2, 28), - datetime(2007, 1, 1): datetime(2007, 2, 28), - datetime(2006, 11, 1): datetime(2006, 12, 29)})) - - tests.append((BMonthEnd(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 29), - datetime(2008, 6, 30): datetime(2008, 5, 30), - datetime(2008, 12, 31): datetime(2008, 11, 28), - datetime(2006, 12, 29): datetime(2006, 11, 30), - datetime(2006, 12, 30): datetime(2006, 12, 29), - datetime(2007, 1, 1): datetime(2006, 12, 29)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_normalize(self): - dt = datetime(2007, 1, 1, 3) - - result = dt + BMonthEnd(normalize=True) - expected = dt.replace(hour=0) + BMonthEnd() - self.assertEqual(result, expected) - - def test_onOffset(self): - - tests = [(BMonthEnd(), datetime(2007, 12, 31), True), - (BMonthEnd(), datetime(2008, 1, 1), False)] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - def test_offsets_compare_equal(self): - # root cause of #456 - offset1 = BMonthEnd() - offset2 = BMonthEnd() - self.assertFalse(offset1 != offset2) - - -class TestMonthBegin(Base): - _offset = MonthBegin - - def test_offset(self): - tests = [] - - # NOTE: I'm not entirely happy with the logic here for Begin -ss - # see thread 'offset conventions' on the ML - tests.append((MonthBegin(), - {datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2008, 2, 1): datetime(2008, 3, 1), - datetime(2006, 12, 31): datetime(2007, 1, 1), - datetime(2006, 12, 1): datetime(2007, 1, 1), - datetime(2007, 1, 31): datetime(2007, 2, 1)})) - - tests.append((MonthBegin(0), - {datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2006, 12, 3): datetime(2007, 1, 1), - datetime(2007, 1, 31): datetime(2007, 2, 1)})) - - tests.append((MonthBegin(2), - {datetime(2008, 2, 29): datetime(2008, 4, 1), - datetime(2008, 1, 31): datetime(2008, 3, 1), - datetime(2006, 12, 31): datetime(2007, 2, 1), - datetime(2007, 12, 28): datetime(2008, 2, 1), - datetime(2007, 1, 1): datetime(2007, 3, 1), - datetime(2006, 11, 1): datetime(2007, 1, 1)})) - - tests.append((MonthBegin(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 1), - datetime(2008, 5, 31): datetime(2008, 5, 1), - datetime(2008, 12, 31): datetime(2008, 12, 1), - datetime(2006, 12, 29): datetime(2006, 12, 1), - datetime(2006, 1, 2): datetime(2006, 1, 1)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - -class TestMonthEnd(Base): - _offset = MonthEnd - - def test_offset(self): - tests = [] - - tests.append((MonthEnd(), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 2, 29), - datetime(2006, 12, 29): datetime(2006, 12, 31), - datetime(2006, 12, 31): datetime(2007, 1, 31), - datetime(2007, 1, 1): datetime(2007, 1, 31), - datetime(2006, 12, 1): datetime(2006, 12, 31)})) - - tests.append((MonthEnd(0), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 1, 31), - datetime(2006, 12, 29): datetime(2006, 12, 31), - datetime(2006, 12, 31): datetime(2006, 12, 31), - datetime(2007, 1, 1): datetime(2007, 1, 31)})) - - tests.append((MonthEnd(2), - {datetime(2008, 1, 1): datetime(2008, 2, 29), - datetime(2008, 1, 31): datetime(2008, 3, 31), - datetime(2006, 12, 29): datetime(2007, 1, 31), - datetime(2006, 12, 31): datetime(2007, 2, 28), - datetime(2007, 1, 1): datetime(2007, 2, 28), - datetime(2006, 11, 1): datetime(2006, 12, 31)})) - - tests.append((MonthEnd(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 31), - datetime(2008, 6, 30): datetime(2008, 5, 31), - datetime(2008, 12, 31): datetime(2008, 11, 30), - datetime(2006, 12, 29): datetime(2006, 11, 30), - datetime(2006, 12, 30): datetime(2006, 11, 30), - datetime(2007, 1, 1): datetime(2006, 12, 31)})) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - # def test_day_of_month(self): - # dt = datetime(2007, 1, 1) - - # offset = MonthEnd(day=20) - - # result = dt + offset - # self.assertEqual(result, datetime(2007, 1, 20)) - - # result = result + offset - # self.assertEqual(result, datetime(2007, 2, 20)) - - def test_normalize(self): - dt = datetime(2007, 1, 1, 3) - - result = dt + MonthEnd(normalize=True) - expected = dt.replace(hour=0) + MonthEnd() - self.assertEqual(result, expected) - - def test_onOffset(self): - - tests = [(MonthEnd(), datetime(2007, 12, 31), True), - (MonthEnd(), datetime(2008, 1, 1), False)] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -class TestSemiMonthEnd(Base): - _offset = SemiMonthEnd - - def _get_tests(self): - tests = [] - - tests.append((SemiMonthEnd(), - {datetime(2008, 1, 1): datetime(2008, 1, 15), - datetime(2008, 1, 15): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 2, 15), - datetime(2006, 12, 14): datetime(2006, 12, 15), - datetime(2006, 12, 29): datetime(2006, 12, 31), - datetime(2006, 12, 31): datetime(2007, 1, 15), - datetime(2007, 1, 1): datetime(2007, 1, 15), - datetime(2006, 12, 1): datetime(2006, 12, 15), - datetime(2006, 12, 15): datetime(2006, 12, 31)})) - - tests.append((SemiMonthEnd(day_of_month=20), - {datetime(2008, 1, 1): datetime(2008, 1, 20), - datetime(2008, 1, 15): datetime(2008, 1, 20), - datetime(2008, 1, 21): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 2, 20), - datetime(2006, 12, 14): datetime(2006, 12, 20), - datetime(2006, 12, 29): datetime(2006, 12, 31), - datetime(2006, 12, 31): datetime(2007, 1, 20), - datetime(2007, 1, 1): datetime(2007, 1, 20), - datetime(2006, 12, 1): datetime(2006, 12, 20), - datetime(2006, 12, 15): datetime(2006, 12, 20)})) - - tests.append((SemiMonthEnd(0), - {datetime(2008, 1, 1): datetime(2008, 1, 15), - datetime(2008, 1, 16): datetime(2008, 1, 31), - datetime(2008, 1, 15): datetime(2008, 1, 15), - datetime(2008, 1, 31): datetime(2008, 1, 31), - datetime(2006, 12, 29): datetime(2006, 12, 31), - datetime(2006, 12, 31): datetime(2006, 12, 31), - datetime(2007, 1, 1): datetime(2007, 1, 15)})) - - tests.append((SemiMonthEnd(0, day_of_month=16), - {datetime(2008, 1, 1): datetime(2008, 1, 16), - datetime(2008, 1, 16): datetime(2008, 1, 16), - datetime(2008, 1, 15): datetime(2008, 1, 16), - datetime(2008, 1, 31): datetime(2008, 1, 31), - datetime(2006, 12, 29): datetime(2006, 12, 31), - datetime(2006, 12, 31): datetime(2006, 12, 31), - datetime(2007, 1, 1): datetime(2007, 1, 16)})) - - tests.append((SemiMonthEnd(2), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 2, 29), - datetime(2006, 12, 29): datetime(2007, 1, 15), - datetime(2006, 12, 31): datetime(2007, 1, 31), - datetime(2007, 1, 1): datetime(2007, 1, 31), - datetime(2007, 1, 16): datetime(2007, 2, 15), - datetime(2006, 11, 1): datetime(2006, 11, 30)})) - - tests.append((SemiMonthEnd(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 31), - datetime(2008, 6, 30): datetime(2008, 6, 15), - datetime(2008, 12, 31): datetime(2008, 12, 15), - datetime(2006, 12, 29): datetime(2006, 12, 15), - datetime(2006, 12, 30): datetime(2006, 12, 15), - datetime(2007, 1, 1): datetime(2006, 12, 31)})) - - tests.append((SemiMonthEnd(-1, day_of_month=4), - {datetime(2007, 1, 1): datetime(2006, 12, 31), - datetime(2007, 1, 4): datetime(2006, 12, 31), - datetime(2008, 6, 30): datetime(2008, 6, 4), - datetime(2008, 12, 31): datetime(2008, 12, 4), - datetime(2006, 12, 5): datetime(2006, 12, 4), - datetime(2006, 12, 30): datetime(2006, 12, 4), - datetime(2007, 1, 1): datetime(2006, 12, 31)})) - - tests.append((SemiMonthEnd(-2), - {datetime(2007, 1, 1): datetime(2006, 12, 15), - datetime(2008, 6, 30): datetime(2008, 5, 31), - datetime(2008, 3, 15): datetime(2008, 2, 15), - datetime(2008, 12, 31): datetime(2008, 11, 30), - datetime(2006, 12, 29): datetime(2006, 11, 30), - datetime(2006, 12, 14): datetime(2006, 11, 15), - datetime(2007, 1, 1): datetime(2006, 12, 15)})) - - return tests - - def test_offset_whole_year(self): - dates = (datetime(2007, 12, 31), - datetime(2008, 1, 15), - datetime(2008, 1, 31), - datetime(2008, 2, 15), - datetime(2008, 2, 29), - datetime(2008, 3, 15), - datetime(2008, 3, 31), - datetime(2008, 4, 15), - datetime(2008, 4, 30), - datetime(2008, 5, 15), - datetime(2008, 5, 31), - datetime(2008, 6, 15), - datetime(2008, 6, 30), - datetime(2008, 7, 15), - datetime(2008, 7, 31), - datetime(2008, 8, 15), - datetime(2008, 8, 31), - datetime(2008, 9, 15), - datetime(2008, 9, 30), - datetime(2008, 10, 15), - datetime(2008, 10, 31), - datetime(2008, 11, 15), - datetime(2008, 11, 30), - datetime(2008, 12, 15), - datetime(2008, 12, 31)) - - for base, exp_date in zip(dates[:-1], dates[1:]): - assertEq(SemiMonthEnd(), base, exp_date) - - # ensure .apply_index works as expected - s = DatetimeIndex(dates[:-1]) - result = SemiMonthEnd().apply_index(s) - exp = DatetimeIndex(dates[1:]) - tm.assert_index_equal(result, exp) - - # ensure generating a range with DatetimeIndex gives same result - result = DatetimeIndex(start=dates[0], end=dates[-1], freq='SM') - exp = DatetimeIndex(dates) - tm.assert_index_equal(result, exp) - - def test_offset(self): - for offset, cases in self._get_tests(): - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_index(self): - for offset, cases in self._get_tests(): - s = DatetimeIndex(cases.keys()) - result = offset.apply_index(s) - exp = DatetimeIndex(cases.values()) - tm.assert_index_equal(result, exp) - - def test_onOffset(self): - - tests = [(datetime(2007, 12, 31), True), - (datetime(2007, 12, 15), True), - (datetime(2007, 12, 14), False), - (datetime(2007, 12, 1), False), - (datetime(2008, 2, 29), True)] - - for dt, expected in tests: - assertOnOffset(SemiMonthEnd(), dt, expected) - - def test_vectorized_offset_addition(self): - for klass, assert_func in zip([Series, DatetimeIndex], - [self.assert_series_equal, - tm.assert_index_equal]): - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - - result = s + SemiMonthEnd() - result2 = SemiMonthEnd() + s - exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), - Timestamp('2000-02-29', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'), - Timestamp('2000-02-01', tz='US/Central')], name='a') - result = s + SemiMonthEnd() - result2 = SemiMonthEnd() + s - exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - -class TestSemiMonthBegin(Base): - _offset = SemiMonthBegin - - def _get_tests(self): - tests = [] - - tests.append((SemiMonthBegin(), - {datetime(2008, 1, 1): datetime(2008, 1, 15), - datetime(2008, 1, 15): datetime(2008, 2, 1), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2006, 12, 14): datetime(2006, 12, 15), - datetime(2006, 12, 29): datetime(2007, 1, 1), - datetime(2006, 12, 31): datetime(2007, 1, 1), - datetime(2007, 1, 1): datetime(2007, 1, 15), - datetime(2006, 12, 1): datetime(2006, 12, 15), - datetime(2006, 12, 15): datetime(2007, 1, 1)})) - - tests.append((SemiMonthBegin(day_of_month=20), - {datetime(2008, 1, 1): datetime(2008, 1, 20), - datetime(2008, 1, 15): datetime(2008, 1, 20), - datetime(2008, 1, 21): datetime(2008, 2, 1), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2006, 12, 14): datetime(2006, 12, 20), - datetime(2006, 12, 29): datetime(2007, 1, 1), - datetime(2006, 12, 31): datetime(2007, 1, 1), - datetime(2007, 1, 1): datetime(2007, 1, 20), - datetime(2006, 12, 1): datetime(2006, 12, 20), - datetime(2006, 12, 15): datetime(2006, 12, 20)})) - - tests.append((SemiMonthBegin(0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 1, 16): datetime(2008, 2, 1), - datetime(2008, 1, 15): datetime(2008, 1, 15), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2006, 12, 29): datetime(2007, 1, 1), - datetime(2006, 12, 2): datetime(2006, 12, 15), - datetime(2007, 1, 1): datetime(2007, 1, 1)})) - - tests.append((SemiMonthBegin(0, day_of_month=16), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 1, 16): datetime(2008, 1, 16), - datetime(2008, 1, 15): datetime(2008, 1, 16), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2006, 12, 29): datetime(2007, 1, 1), - datetime(2006, 12, 31): datetime(2007, 1, 1), - datetime(2007, 1, 5): datetime(2007, 1, 16), - datetime(2007, 1, 1): datetime(2007, 1, 1)})) - - tests.append((SemiMonthBegin(2), - {datetime(2008, 1, 1): datetime(2008, 2, 1), - datetime(2008, 1, 31): datetime(2008, 2, 15), - datetime(2006, 12, 1): datetime(2007, 1, 1), - datetime(2006, 12, 29): datetime(2007, 1, 15), - datetime(2006, 12, 15): datetime(2007, 1, 15), - datetime(2007, 1, 1): datetime(2007, 2, 1), - datetime(2007, 1, 16): datetime(2007, 2, 15), - datetime(2006, 11, 1): datetime(2006, 12, 1)})) - - tests.append((SemiMonthBegin(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 15), - datetime(2008, 6, 30): datetime(2008, 6, 15), - datetime(2008, 6, 14): datetime(2008, 6, 1), - datetime(2008, 12, 31): datetime(2008, 12, 15), - datetime(2006, 12, 29): datetime(2006, 12, 15), - datetime(2006, 12, 15): datetime(2006, 12, 1), - datetime(2007, 1, 1): datetime(2006, 12, 15)})) - - tests.append((SemiMonthBegin(-1, day_of_month=4), - {datetime(2007, 1, 1): datetime(2006, 12, 4), - datetime(2007, 1, 4): datetime(2007, 1, 1), - datetime(2008, 6, 30): datetime(2008, 6, 4), - datetime(2008, 12, 31): datetime(2008, 12, 4), - datetime(2006, 12, 5): datetime(2006, 12, 4), - datetime(2006, 12, 30): datetime(2006, 12, 4), - datetime(2006, 12, 2): datetime(2006, 12, 1), - datetime(2007, 1, 1): datetime(2006, 12, 4)})) - - tests.append((SemiMonthBegin(-2), - {datetime(2007, 1, 1): datetime(2006, 12, 1), - datetime(2008, 6, 30): datetime(2008, 6, 1), - datetime(2008, 6, 14): datetime(2008, 5, 15), - datetime(2008, 12, 31): datetime(2008, 12, 1), - datetime(2006, 12, 29): datetime(2006, 12, 1), - datetime(2006, 12, 15): datetime(2006, 11, 15), - datetime(2007, 1, 1): datetime(2006, 12, 1)})) - - return tests - - def test_offset_whole_year(self): - dates = (datetime(2007, 12, 15), - datetime(2008, 1, 1), - datetime(2008, 1, 15), - datetime(2008, 2, 1), - datetime(2008, 2, 15), - datetime(2008, 3, 1), - datetime(2008, 3, 15), - datetime(2008, 4, 1), - datetime(2008, 4, 15), - datetime(2008, 5, 1), - datetime(2008, 5, 15), - datetime(2008, 6, 1), - datetime(2008, 6, 15), - datetime(2008, 7, 1), - datetime(2008, 7, 15), - datetime(2008, 8, 1), - datetime(2008, 8, 15), - datetime(2008, 9, 1), - datetime(2008, 9, 15), - datetime(2008, 10, 1), - datetime(2008, 10, 15), - datetime(2008, 11, 1), - datetime(2008, 11, 15), - datetime(2008, 12, 1), - datetime(2008, 12, 15)) - - for base, exp_date in zip(dates[:-1], dates[1:]): - assertEq(SemiMonthBegin(), base, exp_date) - - # ensure .apply_index works as expected - s = DatetimeIndex(dates[:-1]) - result = SemiMonthBegin().apply_index(s) - exp = DatetimeIndex(dates[1:]) - tm.assert_index_equal(result, exp) - - # ensure generating a range with DatetimeIndex gives same result - result = DatetimeIndex(start=dates[0], end=dates[-1], freq='SMS') - exp = DatetimeIndex(dates) - tm.assert_index_equal(result, exp) - - def test_offset(self): - for offset, cases in self._get_tests(): - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_apply_index(self): - for offset, cases in self._get_tests(): - s = DatetimeIndex(cases.keys()) - result = offset.apply_index(s) - exp = DatetimeIndex(cases.values()) - tm.assert_index_equal(result, exp) - - def test_onOffset(self): - tests = [(datetime(2007, 12, 1), True), - (datetime(2007, 12, 15), True), - (datetime(2007, 12, 14), False), - (datetime(2007, 12, 31), False), - (datetime(2008, 2, 15), True)] - - for dt, expected in tests: - assertOnOffset(SemiMonthBegin(), dt, expected) - - def test_vectorized_offset_addition(self): - for klass, assert_func in zip([Series, DatetimeIndex], - [self.assert_series_equal, - tm.assert_index_equal]): - - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - result = s + SemiMonthBegin() - result2 = SemiMonthBegin() + s - exp = klass([Timestamp('2000-02-01 00:15:00', tz='US/Central'), - Timestamp('2000-03-01', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'), - Timestamp('2000-02-01', tz='US/Central')], name='a') - result = s + SemiMonthBegin() - result2 = SemiMonthBegin() + s - exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - -class TestBQuarterBegin(Base): - _offset = BQuarterBegin - - def test_repr(self): - self.assertEqual(repr(BQuarterBegin()), - "") - self.assertEqual(repr(BQuarterBegin(startingMonth=3)), - "") - self.assertEqual(repr(BQuarterBegin(startingMonth=1)), - "") - - def test_isAnchored(self): - self.assertTrue(BQuarterBegin(startingMonth=1).isAnchored()) - self.assertTrue(BQuarterBegin().isAnchored()) - self.assertFalse(BQuarterBegin(2, startingMonth=1).isAnchored()) - - def test_offset(self): - tests = [] - - tests.append((BQuarterBegin(startingMonth=1), - {datetime(2008, 1, 1): datetime(2008, 4, 1), - datetime(2008, 1, 31): datetime(2008, 4, 1), - datetime(2008, 2, 15): datetime(2008, 4, 1), - datetime(2008, 2, 29): datetime(2008, 4, 1), - datetime(2008, 3, 15): datetime(2008, 4, 1), - datetime(2008, 3, 31): datetime(2008, 4, 1), - datetime(2008, 4, 15): datetime(2008, 7, 1), - datetime(2007, 3, 15): datetime(2007, 4, 2), - datetime(2007, 2, 28): datetime(2007, 4, 2), - datetime(2007, 1, 1): datetime(2007, 4, 2), - datetime(2007, 4, 15): datetime(2007, 7, 2), - datetime(2007, 7, 1): datetime(2007, 7, 2), - datetime(2007, 4, 1): datetime(2007, 4, 2), - datetime(2007, 4, 2): datetime(2007, 7, 2), - datetime(2008, 4, 30): datetime(2008, 7, 1), })) - - tests.append((BQuarterBegin(startingMonth=2), - {datetime(2008, 1, 1): datetime(2008, 2, 1), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2008, 1, 15): datetime(2008, 2, 1), - datetime(2008, 2, 29): datetime(2008, 5, 1), - datetime(2008, 3, 15): datetime(2008, 5, 1), - datetime(2008, 3, 31): datetime(2008, 5, 1), - datetime(2008, 4, 15): datetime(2008, 5, 1), - datetime(2008, 8, 15): datetime(2008, 11, 3), - datetime(2008, 9, 15): datetime(2008, 11, 3), - datetime(2008, 11, 1): datetime(2008, 11, 3), - datetime(2008, 4, 30): datetime(2008, 5, 1), })) - - tests.append((BQuarterBegin(startingMonth=1, n=0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2007, 12, 31): datetime(2008, 1, 1), - datetime(2008, 2, 15): datetime(2008, 4, 1), - datetime(2008, 2, 29): datetime(2008, 4, 1), - datetime(2008, 1, 15): datetime(2008, 4, 1), - datetime(2008, 2, 27): datetime(2008, 4, 1), - datetime(2008, 3, 15): datetime(2008, 4, 1), - datetime(2007, 4, 1): datetime(2007, 4, 2), - datetime(2007, 4, 2): datetime(2007, 4, 2), - datetime(2007, 7, 1): datetime(2007, 7, 2), - datetime(2007, 4, 15): datetime(2007, 7, 2), - datetime(2007, 7, 2): datetime(2007, 7, 2), })) - - tests.append((BQuarterBegin(startingMonth=1, n=-1), - {datetime(2008, 1, 1): datetime(2007, 10, 1), - datetime(2008, 1, 31): datetime(2008, 1, 1), - datetime(2008, 2, 15): datetime(2008, 1, 1), - datetime(2008, 2, 29): datetime(2008, 1, 1), - datetime(2008, 3, 15): datetime(2008, 1, 1), - datetime(2008, 3, 31): datetime(2008, 1, 1), - datetime(2008, 4, 15): datetime(2008, 4, 1), - datetime(2007, 7, 3): datetime(2007, 7, 2), - datetime(2007, 4, 3): datetime(2007, 4, 2), - datetime(2007, 7, 2): datetime(2007, 4, 2), - datetime(2008, 4, 1): datetime(2008, 1, 1), })) - - tests.append((BQuarterBegin(startingMonth=1, n=2), - {datetime(2008, 1, 1): datetime(2008, 7, 1), - datetime(2008, 1, 15): datetime(2008, 7, 1), - datetime(2008, 2, 29): datetime(2008, 7, 1), - datetime(2008, 3, 15): datetime(2008, 7, 1), - datetime(2007, 3, 31): datetime(2007, 7, 2), - datetime(2007, 4, 15): datetime(2007, 10, 1), - datetime(2008, 4, 30): datetime(2008, 10, 1), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - # corner - offset = BQuarterBegin(n=-1, startingMonth=1) - self.assertEqual(datetime(2007, 4, 3) + offset, datetime(2007, 4, 2)) - - -class TestBQuarterEnd(Base): - _offset = BQuarterEnd - - def test_repr(self): - self.assertEqual(repr(BQuarterEnd()), - "") - self.assertEqual(repr(BQuarterEnd(startingMonth=3)), - "") - self.assertEqual(repr(BQuarterEnd(startingMonth=1)), - "") - - def test_isAnchored(self): - self.assertTrue(BQuarterEnd(startingMonth=1).isAnchored()) - self.assertTrue(BQuarterEnd().isAnchored()) - self.assertFalse(BQuarterEnd(2, startingMonth=1).isAnchored()) - - def test_offset(self): - tests = [] - - tests.append((BQuarterEnd(startingMonth=1), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 4, 30), - datetime(2008, 2, 15): datetime(2008, 4, 30), - datetime(2008, 2, 29): datetime(2008, 4, 30), - datetime(2008, 3, 15): datetime(2008, 4, 30), - datetime(2008, 3, 31): datetime(2008, 4, 30), - datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 7, 31), })) - - tests.append((BQuarterEnd(startingMonth=2), - {datetime(2008, 1, 1): datetime(2008, 2, 29), - datetime(2008, 1, 31): datetime(2008, 2, 29), - datetime(2008, 2, 15): datetime(2008, 2, 29), - datetime(2008, 2, 29): datetime(2008, 5, 30), - datetime(2008, 3, 15): datetime(2008, 5, 30), - datetime(2008, 3, 31): datetime(2008, 5, 30), - datetime(2008, 4, 15): datetime(2008, 5, 30), - datetime(2008, 4, 30): datetime(2008, 5, 30), })) - - tests.append((BQuarterEnd(startingMonth=1, n=0), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 1, 31), - datetime(2008, 2, 15): datetime(2008, 4, 30), - datetime(2008, 2, 29): datetime(2008, 4, 30), - datetime(2008, 3, 15): datetime(2008, 4, 30), - datetime(2008, 3, 31): datetime(2008, 4, 30), - datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 4, 30), })) - - tests.append((BQuarterEnd(startingMonth=1, n=-1), - {datetime(2008, 1, 1): datetime(2007, 10, 31), - datetime(2008, 1, 31): datetime(2007, 10, 31), - datetime(2008, 2, 15): datetime(2008, 1, 31), - datetime(2008, 2, 29): datetime(2008, 1, 31), - datetime(2008, 3, 15): datetime(2008, 1, 31), - datetime(2008, 3, 31): datetime(2008, 1, 31), - datetime(2008, 4, 15): datetime(2008, 1, 31), - datetime(2008, 4, 30): datetime(2008, 1, 31), })) - - tests.append((BQuarterEnd(startingMonth=1, n=2), - {datetime(2008, 1, 31): datetime(2008, 7, 31), - datetime(2008, 2, 15): datetime(2008, 7, 31), - datetime(2008, 2, 29): datetime(2008, 7, 31), - datetime(2008, 3, 15): datetime(2008, 7, 31), - datetime(2008, 3, 31): datetime(2008, 7, 31), - datetime(2008, 4, 15): datetime(2008, 7, 31), - datetime(2008, 4, 30): datetime(2008, 10, 31), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - # corner - offset = BQuarterEnd(n=-1, startingMonth=1) - self.assertEqual(datetime(2010, 1, 31) + offset, datetime(2010, 1, 29)) - - def test_onOffset(self): - - tests = [ - (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -def makeFY5253LastOfMonthQuarter(*args, **kwds): - return FY5253Quarter(*args, variation="last", **kwds) - - -def makeFY5253NearestEndMonthQuarter(*args, **kwds): - return FY5253Quarter(*args, variation="nearest", **kwds) - - -def makeFY5253NearestEndMonth(*args, **kwds): - return FY5253(*args, variation="nearest", **kwds) - - -def makeFY5253LastOfMonth(*args, **kwds): - return FY5253(*args, variation="last", **kwds) - - -class TestFY5253LastOfMonth(Base): - - def test_onOffset(self): - - offset_lom_sat_aug = makeFY5253LastOfMonth(1, startingMonth=8, - weekday=WeekDay.SAT) - offset_lom_sat_sep = makeFY5253LastOfMonth(1, startingMonth=9, - weekday=WeekDay.SAT) - - tests = [ - # From Wikipedia (see: - # http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar#Last_Saturday_of_the_month_at_fiscal_year_end) - (offset_lom_sat_aug, datetime(2006, 8, 26), True), - (offset_lom_sat_aug, datetime(2007, 8, 25), True), - (offset_lom_sat_aug, datetime(2008, 8, 30), True), - (offset_lom_sat_aug, datetime(2009, 8, 29), True), - (offset_lom_sat_aug, datetime(2010, 8, 28), True), - (offset_lom_sat_aug, datetime(2011, 8, 27), True), - (offset_lom_sat_aug, datetime(2012, 8, 25), True), - (offset_lom_sat_aug, datetime(2013, 8, 31), True), - (offset_lom_sat_aug, datetime(2014, 8, 30), True), - (offset_lom_sat_aug, datetime(2015, 8, 29), True), - (offset_lom_sat_aug, datetime(2016, 8, 27), True), - (offset_lom_sat_aug, datetime(2017, 8, 26), True), - (offset_lom_sat_aug, datetime(2018, 8, 25), True), - (offset_lom_sat_aug, datetime(2019, 8, 31), True), - - (offset_lom_sat_aug, datetime(2006, 8, 27), False), - (offset_lom_sat_aug, datetime(2007, 8, 28), False), - (offset_lom_sat_aug, datetime(2008, 8, 31), False), - (offset_lom_sat_aug, datetime(2009, 8, 30), False), - (offset_lom_sat_aug, datetime(2010, 8, 29), False), - (offset_lom_sat_aug, datetime(2011, 8, 28), False), - - (offset_lom_sat_aug, datetime(2006, 8, 25), False), - (offset_lom_sat_aug, datetime(2007, 8, 24), False), - (offset_lom_sat_aug, datetime(2008, 8, 29), False), - (offset_lom_sat_aug, datetime(2009, 8, 28), False), - (offset_lom_sat_aug, datetime(2010, 8, 27), False), - (offset_lom_sat_aug, datetime(2011, 8, 26), False), - (offset_lom_sat_aug, datetime(2019, 8, 30), False), - - # From GMCR (see for example: - # http://yahoo.brand.edgar-online.com/Default.aspx? - # companyid=3184&formtypeID=7) - (offset_lom_sat_sep, datetime(2010, 9, 25), True), - (offset_lom_sat_sep, datetime(2011, 9, 24), True), - (offset_lom_sat_sep, datetime(2012, 9, 29), True), - - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - def test_apply(self): - offset_lom_aug_sat = makeFY5253LastOfMonth(startingMonth=8, - weekday=WeekDay.SAT) - offset_lom_aug_sat_1 = makeFY5253LastOfMonth(n=1, startingMonth=8, - weekday=WeekDay.SAT) - - date_seq_lom_aug_sat = [datetime(2006, 8, 26), datetime(2007, 8, 25), - datetime(2008, 8, 30), datetime(2009, 8, 29), - datetime(2010, 8, 28), datetime(2011, 8, 27), - datetime(2012, 8, 25), datetime(2013, 8, 31), - datetime(2014, 8, 30), datetime(2015, 8, 29), - datetime(2016, 8, 27)] - - tests = [ - (offset_lom_aug_sat, date_seq_lom_aug_sat), - (offset_lom_aug_sat_1, date_seq_lom_aug_sat), - (offset_lom_aug_sat, [ - datetime(2006, 8, 25)] + date_seq_lom_aug_sat), - (offset_lom_aug_sat_1, [ - datetime(2006, 8, 27)] + date_seq_lom_aug_sat[1:]), - (makeFY5253LastOfMonth(n=-1, startingMonth=8, - weekday=WeekDay.SAT), - list(reversed(date_seq_lom_aug_sat))), - ] - for test in tests: - offset, data = test - current = data[0] - for datum in data[1:]: - current = current + offset - self.assertEqual(current, datum) - - -class TestFY5253NearestEndMonth(Base): - - def test_get_target_month_end(self): - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.SAT) - .get_target_month_end( - datetime(2013, 1, 1)), datetime(2013, 8, 31)) - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=12, - weekday=WeekDay.SAT) - .get_target_month_end(datetime(2013, 1, 1)), - datetime(2013, 12, 31)) - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=2, - weekday=WeekDay.SAT) - .get_target_month_end(datetime(2013, 1, 1)), - datetime(2013, 2, 28)) - - def test_get_year_end(self): - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.SAT) - .get_year_end(datetime(2013, 1, 1)), - datetime(2013, 8, 31)) - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.SUN) - .get_year_end(datetime(2013, 1, 1)), - datetime(2013, 9, 1)) - self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.FRI) - .get_year_end(datetime(2013, 1, 1)), - datetime(2013, 8, 30)) - - offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, - variation="nearest") - self.assertEqual(offset_n.get_year_end( - datetime(2012, 1, 1)), datetime(2013, 1, 1)) - self.assertEqual(offset_n.get_year_end( - datetime(2012, 1, 10)), datetime(2013, 1, 1)) - - self.assertEqual(offset_n.get_year_end( - datetime(2013, 1, 1)), datetime(2013, 12, 31)) - self.assertEqual(offset_n.get_year_end( - datetime(2013, 1, 2)), datetime(2013, 12, 31)) - self.assertEqual(offset_n.get_year_end( - datetime(2013, 1, 3)), datetime(2013, 12, 31)) - self.assertEqual(offset_n.get_year_end( - datetime(2013, 1, 10)), datetime(2013, 12, 31)) - - JNJ = FY5253(n=1, startingMonth=12, weekday=6, variation="nearest") - self.assertEqual(JNJ.get_year_end( - datetime(2006, 1, 1)), datetime(2006, 12, 31)) - - def test_onOffset(self): - offset_lom_aug_sat = makeFY5253NearestEndMonth(1, startingMonth=8, - weekday=WeekDay.SAT) - offset_lom_aug_thu = makeFY5253NearestEndMonth(1, startingMonth=8, - weekday=WeekDay.THU) - offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, - variation="nearest") - - tests = [ - # From Wikipedia (see: - # http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar - # #Saturday_nearest_the_end_of_month) - # 2006-09-02 2006 September 2 - # 2007-09-01 2007 September 1 - # 2008-08-30 2008 August 30 (leap year) - # 2009-08-29 2009 August 29 - # 2010-08-28 2010 August 28 - # 2011-09-03 2011 September 3 - # 2012-09-01 2012 September 1 (leap year) - # 2013-08-31 2013 August 31 - # 2014-08-30 2014 August 30 - # 2015-08-29 2015 August 29 - # 2016-09-03 2016 September 3 (leap year) - # 2017-09-02 2017 September 2 - # 2018-09-01 2018 September 1 - # 2019-08-31 2019 August 31 - (offset_lom_aug_sat, datetime(2006, 9, 2), True), - (offset_lom_aug_sat, datetime(2007, 9, 1), True), - (offset_lom_aug_sat, datetime(2008, 8, 30), True), - (offset_lom_aug_sat, datetime(2009, 8, 29), True), - (offset_lom_aug_sat, datetime(2010, 8, 28), True), - (offset_lom_aug_sat, datetime(2011, 9, 3), True), - - (offset_lom_aug_sat, datetime(2016, 9, 3), True), - (offset_lom_aug_sat, datetime(2017, 9, 2), True), - (offset_lom_aug_sat, datetime(2018, 9, 1), True), - (offset_lom_aug_sat, datetime(2019, 8, 31), True), - - (offset_lom_aug_sat, datetime(2006, 8, 27), False), - (offset_lom_aug_sat, datetime(2007, 8, 28), False), - (offset_lom_aug_sat, datetime(2008, 8, 31), False), - (offset_lom_aug_sat, datetime(2009, 8, 30), False), - (offset_lom_aug_sat, datetime(2010, 8, 29), False), - (offset_lom_aug_sat, datetime(2011, 8, 28), False), - - (offset_lom_aug_sat, datetime(2006, 8, 25), False), - (offset_lom_aug_sat, datetime(2007, 8, 24), False), - (offset_lom_aug_sat, datetime(2008, 8, 29), False), - (offset_lom_aug_sat, datetime(2009, 8, 28), False), - (offset_lom_aug_sat, datetime(2010, 8, 27), False), - (offset_lom_aug_sat, datetime(2011, 8, 26), False), - (offset_lom_aug_sat, datetime(2019, 8, 30), False), - - # From Micron, see: - # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 - (offset_lom_aug_thu, datetime(2012, 8, 30), True), - (offset_lom_aug_thu, datetime(2011, 9, 1), True), - - (offset_n, datetime(2012, 12, 31), False), - (offset_n, datetime(2013, 1, 1), True), - (offset_n, datetime(2013, 1, 2), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - def test_apply(self): - date_seq_nem_8_sat = [datetime(2006, 9, 2), datetime(2007, 9, 1), - datetime(2008, 8, 30), datetime(2009, 8, 29), - datetime(2010, 8, 28), datetime(2011, 9, 3)] - - JNJ = [datetime(2005, 1, 2), datetime(2006, 1, 1), - datetime(2006, 12, 31), datetime(2007, 12, 30), - datetime(2008, 12, 28), datetime(2010, 1, 3), - datetime(2011, 1, 2), datetime(2012, 1, 1), - datetime(2012, 12, 30)] - - DEC_SAT = FY5253(n=-1, startingMonth=12, weekday=5, - variation="nearest") - - tests = [ - (makeFY5253NearestEndMonth(startingMonth=8, - weekday=WeekDay.SAT), - date_seq_nem_8_sat), - (makeFY5253NearestEndMonth(n=1, startingMonth=8, - weekday=WeekDay.SAT), - date_seq_nem_8_sat), - (makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), - [datetime(2006, 9, 1)] + date_seq_nem_8_sat), - (makeFY5253NearestEndMonth(n=1, startingMonth=8, - weekday=WeekDay.SAT), - [datetime(2006, 9, 3)] + date_seq_nem_8_sat[1:]), - (makeFY5253NearestEndMonth(n=-1, startingMonth=8, - weekday=WeekDay.SAT), - list(reversed(date_seq_nem_8_sat))), - (makeFY5253NearestEndMonth(n=1, startingMonth=12, - weekday=WeekDay.SUN), JNJ), - (makeFY5253NearestEndMonth(n=-1, startingMonth=12, - weekday=WeekDay.SUN), - list(reversed(JNJ))), - (makeFY5253NearestEndMonth(n=1, startingMonth=12, - weekday=WeekDay.SUN), - [datetime(2005, 1, 2), datetime(2006, 1, 1)]), - (makeFY5253NearestEndMonth(n=1, startingMonth=12, - weekday=WeekDay.SUN), - [datetime(2006, 1, 2), datetime(2006, 12, 31)]), - (DEC_SAT, [datetime(2013, 1, 15), datetime(2012, 12, 29)]) - ] - for test in tests: - offset, data = test - current = data[0] - for datum in data[1:]: - current = current + offset - self.assertEqual(current, datum) - - -class TestFY5253LastOfMonthQuarter(Base): - - def test_isAnchored(self): - self.assertTrue( - makeFY5253LastOfMonthQuarter(startingMonth=1, weekday=WeekDay.SAT, - qtr_with_extra_week=4).isAnchored()) - self.assertTrue( - makeFY5253LastOfMonthQuarter(weekday=WeekDay.SAT, startingMonth=3, - qtr_with_extra_week=4).isAnchored()) - self.assertFalse(makeFY5253LastOfMonthQuarter( - 2, startingMonth=1, weekday=WeekDay.SAT, - qtr_with_extra_week=4).isAnchored()) - - def test_equality(self): - self.assertEqual(makeFY5253LastOfMonthQuarter(startingMonth=1, - weekday=WeekDay.SAT, - qtr_with_extra_week=4), - makeFY5253LastOfMonthQuarter(startingMonth=1, - weekday=WeekDay.SAT, - qtr_with_extra_week=4)) - self.assertNotEqual( - makeFY5253LastOfMonthQuarter( - startingMonth=1, weekday=WeekDay.SAT, - qtr_with_extra_week=4), - makeFY5253LastOfMonthQuarter( - startingMonth=1, weekday=WeekDay.SUN, - qtr_with_extra_week=4)) - self.assertNotEqual( - makeFY5253LastOfMonthQuarter( - startingMonth=1, weekday=WeekDay.SAT, - qtr_with_extra_week=4), - makeFY5253LastOfMonthQuarter( - startingMonth=2, weekday=WeekDay.SAT, - qtr_with_extra_week=4)) - - def test_offset(self): - offset = makeFY5253LastOfMonthQuarter(1, startingMonth=9, - weekday=WeekDay.SAT, - qtr_with_extra_week=4) - offset2 = makeFY5253LastOfMonthQuarter(2, startingMonth=9, - weekday=WeekDay.SAT, - qtr_with_extra_week=4) - offset4 = makeFY5253LastOfMonthQuarter(4, startingMonth=9, - weekday=WeekDay.SAT, - qtr_with_extra_week=4) - - offset_neg1 = makeFY5253LastOfMonthQuarter(-1, startingMonth=9, - weekday=WeekDay.SAT, - qtr_with_extra_week=4) - offset_neg2 = makeFY5253LastOfMonthQuarter(-2, startingMonth=9, - weekday=WeekDay.SAT, - qtr_with_extra_week=4) - - GMCR = [datetime(2010, 3, 27), datetime(2010, 6, 26), - datetime(2010, 9, 25), datetime(2010, 12, 25), - datetime(2011, 3, 26), datetime(2011, 6, 25), - datetime(2011, 9, 24), datetime(2011, 12, 24), - datetime(2012, 3, 24), datetime(2012, 6, 23), - datetime(2012, 9, 29), datetime(2012, 12, 29), - datetime(2013, 3, 30), datetime(2013, 6, 29)] - - assertEq(offset, base=GMCR[0], expected=GMCR[1]) - assertEq(offset, base=GMCR[0] + relativedelta(days=-1), - expected=GMCR[0]) - assertEq(offset, base=GMCR[1], expected=GMCR[2]) - - assertEq(offset2, base=GMCR[0], expected=GMCR[2]) - assertEq(offset4, base=GMCR[0], expected=GMCR[4]) - - assertEq(offset_neg1, base=GMCR[-1], expected=GMCR[-2]) - assertEq(offset_neg1, base=GMCR[-1] + relativedelta(days=+1), - expected=GMCR[-1]) - assertEq(offset_neg2, base=GMCR[-1], expected=GMCR[-3]) - - date = GMCR[0] + relativedelta(days=-1) - for expected in GMCR: - assertEq(offset, date, expected) - date = date + offset - - date = GMCR[-1] + relativedelta(days=+1) - for expected in reversed(GMCR): - assertEq(offset_neg1, date, expected) - date = date + offset_neg1 - - def test_onOffset(self): - lomq_aug_sat_4 = makeFY5253LastOfMonthQuarter(1, startingMonth=8, - weekday=WeekDay.SAT, - qtr_with_extra_week=4) - lomq_sep_sat_4 = makeFY5253LastOfMonthQuarter(1, startingMonth=9, - weekday=WeekDay.SAT, - qtr_with_extra_week=4) - - tests = [ - # From Wikipedia - (lomq_aug_sat_4, datetime(2006, 8, 26), True), - (lomq_aug_sat_4, datetime(2007, 8, 25), True), - (lomq_aug_sat_4, datetime(2008, 8, 30), True), - (lomq_aug_sat_4, datetime(2009, 8, 29), True), - (lomq_aug_sat_4, datetime(2010, 8, 28), True), - (lomq_aug_sat_4, datetime(2011, 8, 27), True), - (lomq_aug_sat_4, datetime(2019, 8, 31), True), - - (lomq_aug_sat_4, datetime(2006, 8, 27), False), - (lomq_aug_sat_4, datetime(2007, 8, 28), False), - (lomq_aug_sat_4, datetime(2008, 8, 31), False), - (lomq_aug_sat_4, datetime(2009, 8, 30), False), - (lomq_aug_sat_4, datetime(2010, 8, 29), False), - (lomq_aug_sat_4, datetime(2011, 8, 28), False), - - (lomq_aug_sat_4, datetime(2006, 8, 25), False), - (lomq_aug_sat_4, datetime(2007, 8, 24), False), - (lomq_aug_sat_4, datetime(2008, 8, 29), False), - (lomq_aug_sat_4, datetime(2009, 8, 28), False), - (lomq_aug_sat_4, datetime(2010, 8, 27), False), - (lomq_aug_sat_4, datetime(2011, 8, 26), False), - (lomq_aug_sat_4, datetime(2019, 8, 30), False), - - # From GMCR - (lomq_sep_sat_4, datetime(2010, 9, 25), True), - (lomq_sep_sat_4, datetime(2011, 9, 24), True), - (lomq_sep_sat_4, datetime(2012, 9, 29), True), - - (lomq_sep_sat_4, datetime(2013, 6, 29), True), - (lomq_sep_sat_4, datetime(2012, 6, 23), True), - (lomq_sep_sat_4, datetime(2012, 6, 30), False), - - (lomq_sep_sat_4, datetime(2013, 3, 30), True), - (lomq_sep_sat_4, datetime(2012, 3, 24), True), - - (lomq_sep_sat_4, datetime(2012, 12, 29), True), - (lomq_sep_sat_4, datetime(2011, 12, 24), True), - - # INTC (extra week in Q1) - # See: http://www.intc.com/releasedetail.cfm?ReleaseID=542844 - (makeFY5253LastOfMonthQuarter(1, startingMonth=12, - weekday=WeekDay.SAT, - qtr_with_extra_week=1), - datetime(2011, 4, 2), True), - - # see: http://google.brand.edgar-online.com/?sym=INTC&formtypeID=7 - (makeFY5253LastOfMonthQuarter(1, startingMonth=12, - weekday=WeekDay.SAT, - qtr_with_extra_week=1), - datetime(2012, 12, 29), True), - (makeFY5253LastOfMonthQuarter(1, startingMonth=12, - weekday=WeekDay.SAT, - qtr_with_extra_week=1), - datetime(2011, 12, 31), True), - (makeFY5253LastOfMonthQuarter(1, startingMonth=12, - weekday=WeekDay.SAT, - qtr_with_extra_week=1), - datetime(2010, 12, 25), True), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - def test_year_has_extra_week(self): - # End of long Q1 - self.assertTrue( - makeFY5253LastOfMonthQuarter(1, startingMonth=12, - weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2011, 4, 2))) - - # Start of long Q1 - self.assertTrue( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2010, 12, 26))) - - # End of year before year with long Q1 - self.assertFalse( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2010, 12, 25))) - - for year in [x - for x in range(1994, 2011 + 1) - if x not in [2011, 2005, 2000, 1994]]: - self.assertFalse( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(year, 4, 2))) - - # Other long years - self.assertTrue( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2005, 4, 2))) - - self.assertTrue( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(2000, 4, 2))) - - self.assertTrue( - makeFY5253LastOfMonthQuarter( - 1, startingMonth=12, weekday=WeekDay.SAT, - qtr_with_extra_week=1) - .year_has_extra_week(datetime(1994, 4, 2))) - - def test_get_weeks(self): - sat_dec_1 = makeFY5253LastOfMonthQuarter(1, startingMonth=12, - weekday=WeekDay.SAT, - qtr_with_extra_week=1) - sat_dec_4 = makeFY5253LastOfMonthQuarter(1, startingMonth=12, - weekday=WeekDay.SAT, - qtr_with_extra_week=4) - - self.assertEqual(sat_dec_1.get_weeks( - datetime(2011, 4, 2)), [14, 13, 13, 13]) - self.assertEqual(sat_dec_4.get_weeks( - datetime(2011, 4, 2)), [13, 13, 13, 14]) - self.assertEqual(sat_dec_1.get_weeks( - datetime(2010, 12, 25)), [13, 13, 13, 13]) - - -class TestFY5253NearestEndMonthQuarter(Base): - - def test_onOffset(self): - - offset_nem_sat_aug_4 = makeFY5253NearestEndMonthQuarter( - 1, startingMonth=8, weekday=WeekDay.SAT, - qtr_with_extra_week=4) - offset_nem_thu_aug_4 = makeFY5253NearestEndMonthQuarter( - 1, startingMonth=8, weekday=WeekDay.THU, - qtr_with_extra_week=4) - offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, - variation="nearest", qtr_with_extra_week=4) - - tests = [ - # From Wikipedia - (offset_nem_sat_aug_4, datetime(2006, 9, 2), True), - (offset_nem_sat_aug_4, datetime(2007, 9, 1), True), - (offset_nem_sat_aug_4, datetime(2008, 8, 30), True), - (offset_nem_sat_aug_4, datetime(2009, 8, 29), True), - (offset_nem_sat_aug_4, datetime(2010, 8, 28), True), - (offset_nem_sat_aug_4, datetime(2011, 9, 3), True), - - (offset_nem_sat_aug_4, datetime(2016, 9, 3), True), - (offset_nem_sat_aug_4, datetime(2017, 9, 2), True), - (offset_nem_sat_aug_4, datetime(2018, 9, 1), True), - (offset_nem_sat_aug_4, datetime(2019, 8, 31), True), - - (offset_nem_sat_aug_4, datetime(2006, 8, 27), False), - (offset_nem_sat_aug_4, datetime(2007, 8, 28), False), - (offset_nem_sat_aug_4, datetime(2008, 8, 31), False), - (offset_nem_sat_aug_4, datetime(2009, 8, 30), False), - (offset_nem_sat_aug_4, datetime(2010, 8, 29), False), - (offset_nem_sat_aug_4, datetime(2011, 8, 28), False), - - (offset_nem_sat_aug_4, datetime(2006, 8, 25), False), - (offset_nem_sat_aug_4, datetime(2007, 8, 24), False), - (offset_nem_sat_aug_4, datetime(2008, 8, 29), False), - (offset_nem_sat_aug_4, datetime(2009, 8, 28), False), - (offset_nem_sat_aug_4, datetime(2010, 8, 27), False), - (offset_nem_sat_aug_4, datetime(2011, 8, 26), False), - (offset_nem_sat_aug_4, datetime(2019, 8, 30), False), - - # From Micron, see: - # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 - (offset_nem_thu_aug_4, datetime(2012, 8, 30), True), - (offset_nem_thu_aug_4, datetime(2011, 9, 1), True), - - # See: http://google.brand.edgar-online.com/?sym=MU&formtypeID=13 - (offset_nem_thu_aug_4, datetime(2013, 5, 30), True), - (offset_nem_thu_aug_4, datetime(2013, 2, 28), True), - (offset_nem_thu_aug_4, datetime(2012, 11, 29), True), - (offset_nem_thu_aug_4, datetime(2012, 5, 31), True), - (offset_nem_thu_aug_4, datetime(2007, 3, 1), True), - (offset_nem_thu_aug_4, datetime(1994, 3, 3), True), - - (offset_n, datetime(2012, 12, 31), False), - (offset_n, datetime(2013, 1, 1), True), - (offset_n, datetime(2013, 1, 2), False) - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - def test_offset(self): - offset = makeFY5253NearestEndMonthQuarter(1, startingMonth=8, - weekday=WeekDay.THU, - qtr_with_extra_week=4) - - MU = [datetime(2012, 5, 31), datetime(2012, 8, 30), datetime(2012, 11, - 29), - datetime(2013, 2, 28), datetime(2013, 5, 30)] - - date = MU[0] + relativedelta(days=-1) - for expected in MU: - assertEq(offset, date, expected) - date = date + offset - - assertEq(offset, datetime(2012, 5, 31), datetime(2012, 8, 30)) - assertEq(offset, datetime(2012, 5, 30), datetime(2012, 5, 31)) - - offset2 = FY5253Quarter(weekday=5, startingMonth=12, variation="last", - qtr_with_extra_week=4) - - assertEq(offset2, datetime(2013, 1, 15), datetime(2013, 3, 30)) - - -class TestQuarterBegin(Base): - - def test_repr(self): - self.assertEqual(repr(QuarterBegin()), - "") - self.assertEqual(repr(QuarterBegin(startingMonth=3)), - "") - self.assertEqual(repr(QuarterBegin(startingMonth=1)), - "") - - def test_isAnchored(self): - self.assertTrue(QuarterBegin(startingMonth=1).isAnchored()) - self.assertTrue(QuarterBegin().isAnchored()) - self.assertFalse(QuarterBegin(2, startingMonth=1).isAnchored()) - - def test_offset(self): - tests = [] - - tests.append((QuarterBegin(startingMonth=1), - {datetime(2007, 12, 1): datetime(2008, 1, 1), - datetime(2008, 1, 1): datetime(2008, 4, 1), - datetime(2008, 2, 15): datetime(2008, 4, 1), - datetime(2008, 2, 29): datetime(2008, 4, 1), - datetime(2008, 3, 15): datetime(2008, 4, 1), - datetime(2008, 3, 31): datetime(2008, 4, 1), - datetime(2008, 4, 15): datetime(2008, 7, 1), - datetime(2008, 4, 1): datetime(2008, 7, 1), })) - - tests.append((QuarterBegin(startingMonth=2), - {datetime(2008, 1, 1): datetime(2008, 2, 1), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2008, 1, 15): datetime(2008, 2, 1), - datetime(2008, 2, 29): datetime(2008, 5, 1), - datetime(2008, 3, 15): datetime(2008, 5, 1), - datetime(2008, 3, 31): datetime(2008, 5, 1), - datetime(2008, 4, 15): datetime(2008, 5, 1), - datetime(2008, 4, 30): datetime(2008, 5, 1), })) - - tests.append((QuarterBegin(startingMonth=1, n=0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 12, 1): datetime(2009, 1, 1), - datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 2, 15): datetime(2008, 4, 1), - datetime(2008, 2, 29): datetime(2008, 4, 1), - datetime(2008, 3, 15): datetime(2008, 4, 1), - datetime(2008, 3, 31): datetime(2008, 4, 1), - datetime(2008, 4, 15): datetime(2008, 7, 1), - datetime(2008, 4, 30): datetime(2008, 7, 1), })) - - tests.append((QuarterBegin(startingMonth=1, n=-1), - {datetime(2008, 1, 1): datetime(2007, 10, 1), - datetime(2008, 1, 31): datetime(2008, 1, 1), - datetime(2008, 2, 15): datetime(2008, 1, 1), - datetime(2008, 2, 29): datetime(2008, 1, 1), - datetime(2008, 3, 15): datetime(2008, 1, 1), - datetime(2008, 3, 31): datetime(2008, 1, 1), - datetime(2008, 4, 15): datetime(2008, 4, 1), - datetime(2008, 4, 30): datetime(2008, 4, 1), - datetime(2008, 7, 1): datetime(2008, 4, 1)})) - - tests.append((QuarterBegin(startingMonth=1, n=2), - {datetime(2008, 1, 1): datetime(2008, 7, 1), - datetime(2008, 2, 15): datetime(2008, 7, 1), - datetime(2008, 2, 29): datetime(2008, 7, 1), - datetime(2008, 3, 15): datetime(2008, 7, 1), - datetime(2008, 3, 31): datetime(2008, 7, 1), - datetime(2008, 4, 15): datetime(2008, 10, 1), - datetime(2008, 4, 1): datetime(2008, 10, 1), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - # corner - offset = QuarterBegin(n=-1, startingMonth=1) - self.assertEqual(datetime(2010, 2, 1) + offset, datetime(2010, 1, 1)) - - -class TestQuarterEnd(Base): - _offset = QuarterEnd - - def test_repr(self): - self.assertEqual(repr(QuarterEnd()), "") - self.assertEqual(repr(QuarterEnd(startingMonth=3)), - "") - self.assertEqual(repr(QuarterEnd(startingMonth=1)), - "") - - def test_isAnchored(self): - self.assertTrue(QuarterEnd(startingMonth=1).isAnchored()) - self.assertTrue(QuarterEnd().isAnchored()) - self.assertFalse(QuarterEnd(2, startingMonth=1).isAnchored()) - - def test_offset(self): - tests = [] - - tests.append((QuarterEnd(startingMonth=1), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 4, 30), - datetime(2008, 2, 15): datetime(2008, 4, 30), - datetime(2008, 2, 29): datetime(2008, 4, 30), - datetime(2008, 3, 15): datetime(2008, 4, 30), - datetime(2008, 3, 31): datetime(2008, 4, 30), - datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 7, 31), })) - - tests.append((QuarterEnd(startingMonth=2), - {datetime(2008, 1, 1): datetime(2008, 2, 29), - datetime(2008, 1, 31): datetime(2008, 2, 29), - datetime(2008, 2, 15): datetime(2008, 2, 29), - datetime(2008, 2, 29): datetime(2008, 5, 31), - datetime(2008, 3, 15): datetime(2008, 5, 31), - datetime(2008, 3, 31): datetime(2008, 5, 31), - datetime(2008, 4, 15): datetime(2008, 5, 31), - datetime(2008, 4, 30): datetime(2008, 5, 31), })) - - tests.append((QuarterEnd(startingMonth=1, n=0), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 1, 31), - datetime(2008, 2, 15): datetime(2008, 4, 30), - datetime(2008, 2, 29): datetime(2008, 4, 30), - datetime(2008, 3, 15): datetime(2008, 4, 30), - datetime(2008, 3, 31): datetime(2008, 4, 30), - datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 4, 30), })) - - tests.append((QuarterEnd(startingMonth=1, n=-1), - {datetime(2008, 1, 1): datetime(2007, 10, 31), - datetime(2008, 1, 31): datetime(2007, 10, 31), - datetime(2008, 2, 15): datetime(2008, 1, 31), - datetime(2008, 2, 29): datetime(2008, 1, 31), - datetime(2008, 3, 15): datetime(2008, 1, 31), - datetime(2008, 3, 31): datetime(2008, 1, 31), - datetime(2008, 4, 15): datetime(2008, 1, 31), - datetime(2008, 4, 30): datetime(2008, 1, 31), - datetime(2008, 7, 1): datetime(2008, 4, 30)})) - - tests.append((QuarterEnd(startingMonth=1, n=2), - {datetime(2008, 1, 31): datetime(2008, 7, 31), - datetime(2008, 2, 15): datetime(2008, 7, 31), - datetime(2008, 2, 29): datetime(2008, 7, 31), - datetime(2008, 3, 15): datetime(2008, 7, 31), - datetime(2008, 3, 31): datetime(2008, 7, 31), - datetime(2008, 4, 15): datetime(2008, 7, 31), - datetime(2008, 4, 30): datetime(2008, 10, 31), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - # corner - offset = QuarterEnd(n=-1, startingMonth=1) - self.assertEqual(datetime(2010, 2, 1) + offset, datetime(2010, 1, 31)) - - def test_onOffset(self): - - tests = [(QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), - (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), - False), - (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), - False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), - False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), - False), - (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), - (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), - False), - (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31), - False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), - False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), - False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), - (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), - False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True), - (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), - False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), - False), - (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), - True), - (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True), - (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), - False), - (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), - False), - (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), - True), ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -class TestBYearBegin(Base): - _offset = BYearBegin - - def test_misspecified(self): - self.assertRaises(ValueError, BYearBegin, month=13) - self.assertRaises(ValueError, BYearEnd, month=13) - - def test_offset(self): - tests = [] - - tests.append((BYearBegin(), - {datetime(2008, 1, 1): datetime(2009, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2011, 1, 1): datetime(2011, 1, 3), - datetime(2011, 1, 3): datetime(2012, 1, 2), - datetime(2005, 12, 30): datetime(2006, 1, 2), - datetime(2005, 12, 31): datetime(2006, 1, 2)})) - - tests.append((BYearBegin(0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2005, 12, 30): datetime(2006, 1, 2), - datetime(2005, 12, 31): datetime(2006, 1, 2), })) - - tests.append((BYearBegin(-1), - {datetime(2007, 1, 1): datetime(2006, 1, 2), - datetime(2009, 1, 4): datetime(2009, 1, 1), - datetime(2009, 1, 1): datetime(2008, 1, 1), - datetime(2008, 6, 30): datetime(2008, 1, 1), - datetime(2008, 12, 31): datetime(2008, 1, 1), - datetime(2006, 12, 29): datetime(2006, 1, 2), - datetime(2006, 12, 30): datetime(2006, 1, 2), - datetime(2006, 1, 1): datetime(2005, 1, 3), })) - - tests.append((BYearBegin(-2), - {datetime(2007, 1, 1): datetime(2005, 1, 3), - datetime(2007, 6, 30): datetime(2006, 1, 2), - datetime(2008, 12, 31): datetime(2007, 1, 1), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - -class TestYearBegin(Base): - _offset = YearBegin - - def test_misspecified(self): - self.assertRaises(ValueError, YearBegin, month=13) - - def test_offset(self): - tests = [] - - tests.append((YearBegin(), - {datetime(2008, 1, 1): datetime(2009, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2005, 12, 30): datetime(2006, 1, 1), - datetime(2005, 12, 31): datetime(2006, 1, 1), })) - - tests.append((YearBegin(0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2005, 12, 30): datetime(2006, 1, 1), - datetime(2005, 12, 31): datetime(2006, 1, 1), })) - - tests.append((YearBegin(3), - {datetime(2008, 1, 1): datetime(2011, 1, 1), - datetime(2008, 6, 30): datetime(2011, 1, 1), - datetime(2008, 12, 31): datetime(2011, 1, 1), - datetime(2005, 12, 30): datetime(2008, 1, 1), - datetime(2005, 12, 31): datetime(2008, 1, 1), })) - - tests.append((YearBegin(-1), - {datetime(2007, 1, 1): datetime(2006, 1, 1), - datetime(2007, 1, 15): datetime(2007, 1, 1), - datetime(2008, 6, 30): datetime(2008, 1, 1), - datetime(2008, 12, 31): datetime(2008, 1, 1), - datetime(2006, 12, 29): datetime(2006, 1, 1), - datetime(2006, 12, 30): datetime(2006, 1, 1), - datetime(2007, 1, 1): datetime(2006, 1, 1), })) - - tests.append((YearBegin(-2), - {datetime(2007, 1, 1): datetime(2005, 1, 1), - datetime(2008, 6, 30): datetime(2007, 1, 1), - datetime(2008, 12, 31): datetime(2007, 1, 1), })) - - tests.append((YearBegin(month=4), - {datetime(2007, 4, 1): datetime(2008, 4, 1), - datetime(2007, 4, 15): datetime(2008, 4, 1), - datetime(2007, 3, 1): datetime(2007, 4, 1), - datetime(2007, 12, 15): datetime(2008, 4, 1), - datetime(2012, 1, 31): datetime(2012, 4, 1), })) - - tests.append((YearBegin(0, month=4), - {datetime(2007, 4, 1): datetime(2007, 4, 1), - datetime(2007, 3, 1): datetime(2007, 4, 1), - datetime(2007, 12, 15): datetime(2008, 4, 1), - datetime(2012, 1, 31): datetime(2012, 4, 1), })) - - tests.append((YearBegin(4, month=4), - {datetime(2007, 4, 1): datetime(2011, 4, 1), - datetime(2007, 4, 15): datetime(2011, 4, 1), - datetime(2007, 3, 1): datetime(2010, 4, 1), - datetime(2007, 12, 15): datetime(2011, 4, 1), - datetime(2012, 1, 31): datetime(2015, 4, 1), })) - - tests.append((YearBegin(-1, month=4), - {datetime(2007, 4, 1): datetime(2006, 4, 1), - datetime(2007, 3, 1): datetime(2006, 4, 1), - datetime(2007, 12, 15): datetime(2007, 4, 1), - datetime(2012, 1, 31): datetime(2011, 4, 1), })) - - tests.append((YearBegin(-3, month=4), - {datetime(2007, 4, 1): datetime(2004, 4, 1), - datetime(2007, 3, 1): datetime(2004, 4, 1), - datetime(2007, 12, 15): datetime(2005, 4, 1), - datetime(2012, 1, 31): datetime(2009, 4, 1), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [ - (YearBegin(), datetime(2007, 1, 3), False), - (YearBegin(), datetime(2008, 1, 1), True), - (YearBegin(), datetime(2006, 12, 31), False), - (YearBegin(), datetime(2006, 1, 2), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -class TestBYearEndLagged(Base): - - def test_bad_month_fail(self): - self.assertRaises(Exception, BYearEnd, month=13) - self.assertRaises(Exception, BYearEnd, month=0) - - def test_offset(self): - tests = [] - - tests.append((BYearEnd(month=6), - {datetime(2008, 1, 1): datetime(2008, 6, 30), - datetime(2007, 6, 30): datetime(2008, 6, 30)}, )) - - tests.append((BYearEnd(n=-1, month=6), - {datetime(2008, 1, 1): datetime(2007, 6, 29), - datetime(2007, 6, 30): datetime(2007, 6, 29)}, )) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - self.assertEqual(base + offset, expected) - - def test_roll(self): - offset = BYearEnd(month=6) - date = datetime(2009, 11, 30) - - self.assertEqual(offset.rollforward(date), datetime(2010, 6, 30)) - self.assertEqual(offset.rollback(date), datetime(2009, 6, 30)) - - def test_onOffset(self): - - tests = [ - (BYearEnd(month=2), datetime(2007, 2, 28), True), - (BYearEnd(month=6), datetime(2007, 6, 30), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -class TestBYearEnd(Base): - _offset = BYearEnd - - def test_offset(self): - tests = [] - - tests.append((BYearEnd(), - {datetime(2008, 1, 1): datetime(2008, 12, 31), - datetime(2008, 6, 30): datetime(2008, 12, 31), - datetime(2008, 12, 31): datetime(2009, 12, 31), - datetime(2005, 12, 30): datetime(2006, 12, 29), - datetime(2005, 12, 31): datetime(2006, 12, 29), })) - - tests.append((BYearEnd(0), - {datetime(2008, 1, 1): datetime(2008, 12, 31), - datetime(2008, 6, 30): datetime(2008, 12, 31), - datetime(2008, 12, 31): datetime(2008, 12, 31), - datetime(2005, 12, 31): datetime(2006, 12, 29), })) - - tests.append((BYearEnd(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 29), - datetime(2008, 6, 30): datetime(2007, 12, 31), - datetime(2008, 12, 31): datetime(2007, 12, 31), - datetime(2006, 12, 29): datetime(2005, 12, 30), - datetime(2006, 12, 30): datetime(2006, 12, 29), - datetime(2007, 1, 1): datetime(2006, 12, 29), })) - - tests.append((BYearEnd(-2), - {datetime(2007, 1, 1): datetime(2005, 12, 30), - datetime(2008, 6, 30): datetime(2006, 12, 29), - datetime(2008, 12, 31): datetime(2006, 12, 29), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [ - (BYearEnd(), datetime(2007, 12, 31), True), - (BYearEnd(), datetime(2008, 1, 1), False), - (BYearEnd(), datetime(2006, 12, 31), False), - (BYearEnd(), datetime(2006, 12, 29), True), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -class TestYearEnd(Base): - _offset = YearEnd - - def test_misspecified(self): - self.assertRaises(ValueError, YearEnd, month=13) - - def test_offset(self): - tests = [] - - tests.append((YearEnd(), - {datetime(2008, 1, 1): datetime(2008, 12, 31), - datetime(2008, 6, 30): datetime(2008, 12, 31), - datetime(2008, 12, 31): datetime(2009, 12, 31), - datetime(2005, 12, 30): datetime(2005, 12, 31), - datetime(2005, 12, 31): datetime(2006, 12, 31), })) - - tests.append((YearEnd(0), - {datetime(2008, 1, 1): datetime(2008, 12, 31), - datetime(2008, 6, 30): datetime(2008, 12, 31), - datetime(2008, 12, 31): datetime(2008, 12, 31), - datetime(2005, 12, 30): datetime(2005, 12, 31), })) - - tests.append((YearEnd(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 31), - datetime(2008, 6, 30): datetime(2007, 12, 31), - datetime(2008, 12, 31): datetime(2007, 12, 31), - datetime(2006, 12, 29): datetime(2005, 12, 31), - datetime(2006, 12, 30): datetime(2005, 12, 31), - datetime(2007, 1, 1): datetime(2006, 12, 31), })) - - tests.append((YearEnd(-2), - {datetime(2007, 1, 1): datetime(2005, 12, 31), - datetime(2008, 6, 30): datetime(2006, 12, 31), - datetime(2008, 12, 31): datetime(2006, 12, 31), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [ - (YearEnd(), datetime(2007, 12, 31), True), - (YearEnd(), datetime(2008, 1, 1), False), - (YearEnd(), datetime(2006, 12, 31), True), - (YearEnd(), datetime(2006, 12, 29), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -class TestYearEndDiffMonth(Base): - - def test_offset(self): - tests = [] - - tests.append((YearEnd(month=3), - {datetime(2008, 1, 1): datetime(2008, 3, 31), - datetime(2008, 2, 15): datetime(2008, 3, 31), - datetime(2008, 3, 31): datetime(2009, 3, 31), - datetime(2008, 3, 30): datetime(2008, 3, 31), - datetime(2005, 3, 31): datetime(2006, 3, 31), - datetime(2006, 7, 30): datetime(2007, 3, 31)})) - - tests.append((YearEnd(0, month=3), - {datetime(2008, 1, 1): datetime(2008, 3, 31), - datetime(2008, 2, 28): datetime(2008, 3, 31), - datetime(2008, 3, 31): datetime(2008, 3, 31), - datetime(2005, 3, 30): datetime(2005, 3, 31), })) - - tests.append((YearEnd(-1, month=3), - {datetime(2007, 1, 1): datetime(2006, 3, 31), - datetime(2008, 2, 28): datetime(2007, 3, 31), - datetime(2008, 3, 31): datetime(2007, 3, 31), - datetime(2006, 3, 29): datetime(2005, 3, 31), - datetime(2006, 3, 30): datetime(2005, 3, 31), - datetime(2007, 3, 1): datetime(2006, 3, 31), })) - - tests.append((YearEnd(-2, month=3), - {datetime(2007, 1, 1): datetime(2005, 3, 31), - datetime(2008, 6, 30): datetime(2007, 3, 31), - datetime(2008, 3, 31): datetime(2006, 3, 31), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [ - (YearEnd(month=3), datetime(2007, 3, 31), True), - (YearEnd(month=3), datetime(2008, 1, 1), False), - (YearEnd(month=3), datetime(2006, 3, 31), True), - (YearEnd(month=3), datetime(2006, 3, 29), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -def assertEq(offset, base, expected): - actual = offset + base - actual_swapped = base + offset - actual_apply = offset.apply(base) - try: - assert actual == expected - assert actual_swapped == expected - assert actual_apply == expected - except AssertionError: - raise AssertionError("\nExpected: %s\nActual: %s\nFor Offset: %s)" - "\nAt Date: %s" % - (expected, actual, offset, base)) - - -def test_Easter(): - assertEq(Easter(), datetime(2010, 1, 1), datetime(2010, 4, 4)) - assertEq(Easter(), datetime(2010, 4, 5), datetime(2011, 4, 24)) - assertEq(Easter(2), datetime(2010, 1, 1), datetime(2011, 4, 24)) - - assertEq(Easter(), datetime(2010, 4, 4), datetime(2011, 4, 24)) - assertEq(Easter(2), datetime(2010, 4, 4), datetime(2012, 4, 8)) - - assertEq(-Easter(), datetime(2011, 1, 1), datetime(2010, 4, 4)) - assertEq(-Easter(), datetime(2010, 4, 5), datetime(2010, 4, 4)) - assertEq(-Easter(2), datetime(2011, 1, 1), datetime(2009, 4, 12)) - - assertEq(-Easter(), datetime(2010, 4, 4), datetime(2009, 4, 12)) - assertEq(-Easter(2), datetime(2010, 4, 4), datetime(2008, 3, 23)) - - -class TestTicks(tm.TestCase): - - ticks = [Hour, Minute, Second, Milli, Micro, Nano] - - def test_ticks(self): - offsets = [(Hour, Timedelta(hours=5)), - (Minute, Timedelta(hours=2, minutes=3)), - (Second, Timedelta(hours=2, seconds=3)), - (Milli, Timedelta(hours=2, milliseconds=3)), - (Micro, Timedelta(hours=2, microseconds=3)), - (Nano, Timedelta(hours=2, nanoseconds=3))] - - for kls, expected in offsets: - offset = kls(3) - result = offset + Timedelta(hours=2) - self.assertTrue(isinstance(result, Timedelta)) - self.assertEqual(result, expected) - - def test_Hour(self): - assertEq(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) - assertEq(Hour(-1), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) - assertEq(2 * Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 2)) - assertEq(-1 * Hour(), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) - - self.assertEqual(Hour(3) + Hour(2), Hour(5)) - self.assertEqual(Hour(3) - Hour(2), Hour()) - - self.assertNotEqual(Hour(4), Hour(1)) - - def test_Minute(self): - assertEq(Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1)) - assertEq(Minute(-1), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) - assertEq(2 * Minute(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 2)) - assertEq(-1 * Minute(), datetime(2010, 1, 1, 0, 1), - datetime(2010, 1, 1)) - - self.assertEqual(Minute(3) + Minute(2), Minute(5)) - self.assertEqual(Minute(3) - Minute(2), Minute()) - self.assertNotEqual(Minute(5), Minute()) - - def test_Second(self): - assertEq(Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 1)) - assertEq(Second(-1), datetime(2010, 1, 1, - 0, 0, 1), datetime(2010, 1, 1)) - assertEq(2 * Second(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 2)) - assertEq(-1 * Second(), datetime(2010, 1, 1, 0, 0, 1), - datetime(2010, 1, 1)) - - self.assertEqual(Second(3) + Second(2), Second(5)) - self.assertEqual(Second(3) - Second(2), Second()) - - def test_Millisecond(self): - assertEq(Milli(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 1000)) - assertEq(Milli(-1), datetime(2010, 1, 1, 0, - 0, 0, 1000), datetime(2010, 1, 1)) - assertEq(Milli(2), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 2000)) - assertEq(2 * Milli(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 2000)) - assertEq(-1 * Milli(), datetime(2010, 1, 1, 0, 0, 0, 1000), - datetime(2010, 1, 1)) - - self.assertEqual(Milli(3) + Milli(2), Milli(5)) - self.assertEqual(Milli(3) - Milli(2), Milli()) - - def test_MillisecondTimestampArithmetic(self): - assertEq(Milli(), Timestamp('2010-01-01'), - Timestamp('2010-01-01 00:00:00.001')) - assertEq(Milli(-1), Timestamp('2010-01-01 00:00:00.001'), - Timestamp('2010-01-01')) - - def test_Microsecond(self): - assertEq(Micro(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 1)) - assertEq(Micro(-1), datetime(2010, 1, 1, - 0, 0, 0, 1), datetime(2010, 1, 1)) - assertEq(2 * Micro(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 2)) - assertEq(-1 * Micro(), datetime(2010, 1, 1, 0, 0, 0, 1), - datetime(2010, 1, 1)) - - self.assertEqual(Micro(3) + Micro(2), Micro(5)) - self.assertEqual(Micro(3) - Micro(2), Micro()) - - def test_NanosecondGeneric(self): - timestamp = Timestamp(datetime(2010, 1, 1)) - self.assertEqual(timestamp.nanosecond, 0) - - result = timestamp + Nano(10) - self.assertEqual(result.nanosecond, 10) - - reverse_result = Nano(10) + timestamp - self.assertEqual(reverse_result.nanosecond, 10) - - def test_Nanosecond(self): - timestamp = Timestamp(datetime(2010, 1, 1)) - assertEq(Nano(), timestamp, timestamp + np.timedelta64(1, 'ns')) - assertEq(Nano(-1), timestamp + np.timedelta64(1, 'ns'), timestamp) - assertEq(2 * Nano(), timestamp, timestamp + np.timedelta64(2, 'ns')) - assertEq(-1 * Nano(), timestamp + np.timedelta64(1, 'ns'), timestamp) - - self.assertEqual(Nano(3) + Nano(2), Nano(5)) - self.assertEqual(Nano(3) - Nano(2), Nano()) - - # GH9284 - self.assertEqual(Nano(1) + Nano(10), Nano(11)) - self.assertEqual(Nano(5) + Micro(1), Nano(1005)) - self.assertEqual(Micro(5) + Nano(1), Nano(5001)) - - def test_tick_zero(self): - for t1 in self.ticks: - for t2 in self.ticks: - self.assertEqual(t1(0), t2(0)) - self.assertEqual(t1(0) + t2(0), t1(0)) - - if t1 is not Nano: - self.assertEqual(t1(2) + t2(0), t1(2)) - if t1 is Nano: - self.assertEqual(t1(2) + Nano(0), t1(2)) - - def test_tick_equalities(self): - for t in self.ticks: - self.assertEqual(t(3), t(3)) - self.assertEqual(t(), t(1)) - - # not equals - self.assertNotEqual(t(3), t(2)) - self.assertNotEqual(t(3), t(-3)) - - def test_tick_operators(self): - for t in self.ticks: - self.assertEqual(t(3) + t(2), t(5)) - self.assertEqual(t(3) - t(2), t(1)) - self.assertEqual(t(800) + t(300), t(1100)) - self.assertEqual(t(1000) - t(5), t(995)) - - def test_tick_offset(self): - for t in self.ticks: - self.assertFalse(t().isAnchored()) - - def test_compare_ticks(self): - for kls in self.ticks: - three = kls(3) - four = kls(4) - - for _ in range(10): - self.assertTrue(three < kls(4)) - self.assertTrue(kls(3) < four) - self.assertTrue(four > kls(3)) - self.assertTrue(kls(4) > three) - self.assertTrue(kls(3) == kls(3)) - self.assertTrue(kls(3) != kls(4)) - - -class TestOffsetNames(tm.TestCase): - - def test_get_offset_name(self): - self.assertEqual(BDay().freqstr, 'B') - self.assertEqual(BDay(2).freqstr, '2B') - self.assertEqual(BMonthEnd().freqstr, 'BM') - self.assertEqual(Week(weekday=0).freqstr, 'W-MON') - self.assertEqual(Week(weekday=1).freqstr, 'W-TUE') - self.assertEqual(Week(weekday=2).freqstr, 'W-WED') - self.assertEqual(Week(weekday=3).freqstr, 'W-THU') - self.assertEqual(Week(weekday=4).freqstr, 'W-FRI') - - self.assertEqual(LastWeekOfMonth( - weekday=WeekDay.SUN).freqstr, "LWOM-SUN") - self.assertEqual( - makeFY5253LastOfMonthQuarter(weekday=1, startingMonth=3, - qtr_with_extra_week=4).freqstr, - "REQ-L-MAR-TUE-4") - self.assertEqual( - makeFY5253NearestEndMonthQuarter(weekday=1, startingMonth=3, - qtr_with_extra_week=3).freqstr, - "REQ-N-MAR-TUE-3") - - -def test_get_offset(): - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): - get_offset('gibberish') - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): - get_offset('QS-JAN-B') - - pairs = [ - ('B', BDay()), ('b', BDay()), ('bm', BMonthEnd()), - ('Bm', BMonthEnd()), ('W-MON', Week(weekday=0)), - ('W-TUE', Week(weekday=1)), ('W-WED', Week(weekday=2)), - ('W-THU', Week(weekday=3)), ('W-FRI', Week(weekday=4)), - ("RE-N-DEC-MON", makeFY5253NearestEndMonth(weekday=0, - startingMonth=12)), - ("RE-L-DEC-TUE", makeFY5253LastOfMonth(weekday=1, startingMonth=12)), - ("REQ-L-MAR-TUE-4", makeFY5253LastOfMonthQuarter( - weekday=1, startingMonth=3, qtr_with_extra_week=4)), - ("REQ-L-DEC-MON-3", makeFY5253LastOfMonthQuarter( - weekday=0, startingMonth=12, qtr_with_extra_week=3)), - ("REQ-N-DEC-MON-3", makeFY5253NearestEndMonthQuarter( - weekday=0, startingMonth=12, qtr_with_extra_week=3)), - ] - - for name, expected in pairs: - offset = get_offset(name) - assert offset == expected, ("Expected %r to yield %r (actual: %r)" % - (name, expected, offset)) - - -def test_get_offset_legacy(): - pairs = [('w@Sat', Week(weekday=5))] - for name, expected in pairs: - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): - get_offset(name) - - -class TestParseTimeString(tm.TestCase): - - def test_parse_time_string(self): - (date, parsed, reso) = parse_time_string('4Q1984') - (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') - self.assertEqual(date, date_lower) - self.assertEqual(parsed, parsed_lower) - self.assertEqual(reso, reso_lower) - - def test_parse_time_quarter_w_dash(self): - # https://github.com/pandas-dev/pandas/issue/9688 - pairs = [('1988-Q2', '1988Q2'), ('2Q-1988', '2Q1988'), ] - - for dashed, normal in pairs: - (date_dash, parsed_dash, reso_dash) = parse_time_string(dashed) - (date, parsed, reso) = parse_time_string(normal) - - self.assertEqual(date_dash, date) - self.assertEqual(parsed_dash, parsed) - self.assertEqual(reso_dash, reso) - - self.assertRaises(DateParseError, parse_time_string, "-2Q1992") - self.assertRaises(DateParseError, parse_time_string, "2-Q1992") - self.assertRaises(DateParseError, parse_time_string, "4-4Q1992") - - -def test_get_standard_freq(): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - fstr = get_standard_freq('W') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - assert fstr == get_standard_freq('w') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - assert fstr == get_standard_freq('1w') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - assert fstr == get_standard_freq(('W', 1)) - - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - get_standard_freq('WeEk') - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - fstr = get_standard_freq('5Q') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - assert fstr == get_standard_freq('5q') - - with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - get_standard_freq('5QuarTer') - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - assert fstr == get_standard_freq(('q', 5)) - - -def test_quarterly_dont_normalize(): - date = datetime(2012, 3, 31, 5, 30) - - offsets = (QuarterBegin, QuarterEnd, BQuarterEnd, BQuarterBegin) - - for klass in offsets: - result = date + klass() - assert (result.time() == date.time()) - - -class TestOffsetAliases(tm.TestCase): - - def setUp(self): - _offset_map.clear() - - def test_alias_equality(self): - for k, v in compat.iteritems(_offset_map): - if v is None: - continue - self.assertEqual(k, v.copy()) - - def test_rule_code(self): - lst = ['M', 'MS', 'BM', 'BMS', 'D', 'B', 'H', 'T', 'S', 'L', 'U'] - for k in lst: - self.assertEqual(k, get_offset(k).rule_code) - # should be cached - this is kind of an internals test... - assert k in _offset_map - self.assertEqual(k, (get_offset(k) * 3).rule_code) - - suffix_lst = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] - base = 'W' - for v in suffix_lst: - alias = '-'.join([base, v]) - self.assertEqual(alias, get_offset(alias).rule_code) - self.assertEqual(alias, (get_offset(alias) * 5).rule_code) - - suffix_lst = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', - 'SEP', 'OCT', 'NOV', 'DEC'] - base_lst = ['A', 'AS', 'BA', 'BAS', 'Q', 'QS', 'BQ', 'BQS'] - for base in base_lst: - for v in suffix_lst: - alias = '-'.join([base, v]) - self.assertEqual(alias, get_offset(alias).rule_code) - self.assertEqual(alias, (get_offset(alias) * 5).rule_code) - - lst = ['M', 'D', 'B', 'H', 'T', 'S', 'L', 'U'] - for k in lst: - code, stride = get_freq_code('3' + k) - self.assertTrue(isinstance(code, int)) - self.assertEqual(stride, 3) - self.assertEqual(k, _get_freq_str(code)) - - -def test_apply_ticks(): - result = offsets.Hour(3).apply(offsets.Hour(4)) - exp = offsets.Hour(7) - assert (result == exp) - - -def test_delta_to_tick(): - delta = timedelta(3) - - tick = offsets._delta_to_tick(delta) - assert (tick == offsets.Day(3)) - - -def test_dateoffset_misc(): - oset = offsets.DateOffset(months=2, days=4) - # it works - oset.freqstr - - assert (not offsets.DateOffset(months=2) == 2) - - -def test_freq_offsets(): - off = BDay(1, offset=timedelta(0, 1800)) - assert (off.freqstr == 'B+30Min') - - off = BDay(1, offset=timedelta(0, -1800)) - assert (off.freqstr == 'B-30Min') - - -def get_all_subclasses(cls): - ret = set() - this_subclasses = cls.__subclasses__() - ret = ret | set(this_subclasses) - for this_subclass in this_subclasses: - ret | get_all_subclasses(this_subclass) - return ret - - -class TestCaching(tm.TestCase): - - # as of GH 6479 (in 0.14.0), offset caching is turned off - # as of v0.12.0 only BusinessMonth/Quarter were actually caching - - def setUp(self): - _daterange_cache.clear() - _offset_map.clear() - - def run_X_index_creation(self, cls): - inst1 = cls() - if not inst1.isAnchored(): - self.assertFalse(inst1._should_cache(), cls) - return - - self.assertTrue(inst1._should_cache(), cls) - - DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 31), - freq=inst1, normalize=True) - self.assertTrue(cls() in _daterange_cache, cls) - - def test_should_cache_month_end(self): - self.assertFalse(MonthEnd()._should_cache()) - - def test_should_cache_bmonth_end(self): - self.assertFalse(BusinessMonthEnd()._should_cache()) - - def test_should_cache_week_month(self): - self.assertFalse(WeekOfMonth(weekday=1, week=2)._should_cache()) - - def test_all_cacheableoffsets(self): - for subclass in get_all_subclasses(CacheableOffset): - if subclass.__name__[0] == "_" \ - or subclass in TestCaching.no_simple_ctr: - continue - self.run_X_index_creation(subclass) - - def test_month_end_index_creation(self): - DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 31), - freq=MonthEnd(), normalize=True) - self.assertFalse(MonthEnd() in _daterange_cache) - - def test_bmonth_end_index_creation(self): - DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 29), - freq=BusinessMonthEnd(), normalize=True) - self.assertFalse(BusinessMonthEnd() in _daterange_cache) - - def test_week_of_month_index_creation(self): - inst1 = WeekOfMonth(weekday=1, week=2) - DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 29), - freq=inst1, normalize=True) - inst2 = WeekOfMonth(weekday=1, week=2) - self.assertFalse(inst2 in _daterange_cache) - - -class TestReprNames(tm.TestCase): - - def test_str_for_named_is_name(self): - # look at all the amazing combinations! - month_prefixes = ['A', 'AS', 'BA', 'BAS', 'Q', 'BQ', 'BQS', 'QS'] - names = [prefix + '-' + month - for prefix in month_prefixes - for month in ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', - 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']] - days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] - names += ['W-' + day for day in days] - names += ['WOM-' + week + day - for week in ('1', '2', '3', '4') for day in days] - _offset_map.clear() - for name in names: - offset = get_offset(name) - self.assertEqual(offset.freqstr, name) - - -def get_utc_offset_hours(ts): - # take a Timestamp and compute total hours of utc offset - o = ts.utcoffset() - return (o.days * 24 * 3600 + o.seconds) / 3600.0 - - -class TestDST(tm.TestCase): - """ - test DateOffset additions over Daylight Savings Time - """ - # one microsecond before the DST transition - ts_pre_fallback = "2013-11-03 01:59:59.999999" - ts_pre_springfwd = "2013-03-10 01:59:59.999999" - - # test both basic names and dateutil timezones - timezone_utc_offsets = { - 'US/Eastern': dict(utc_offset_daylight=-4, - utc_offset_standard=-5, ), - 'dateutil/US/Pacific': dict(utc_offset_daylight=-7, - utc_offset_standard=-8, ) - } - valid_date_offsets_singular = [ - 'weekday', 'day', 'hour', 'minute', 'second', 'microsecond' - ] - valid_date_offsets_plural = [ - 'weeks', 'days', - 'hours', 'minutes', 'seconds', - 'milliseconds', 'microseconds' - ] - - def _test_all_offsets(self, n, **kwds): - valid_offsets = self.valid_date_offsets_plural if n > 1 \ - else self.valid_date_offsets_singular - - for name in valid_offsets: - self._test_offset(offset_name=name, offset_n=n, **kwds) - - def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): - offset = DateOffset(**{offset_name: offset_n}) - - t = tstart + offset - if expected_utc_offset is not None: - self.assertTrue(get_utc_offset_hours(t) == expected_utc_offset) - - if offset_name == 'weeks': - # dates should match - self.assertTrue(t.date() == timedelta(days=7 * offset.kwds[ - 'weeks']) + tstart.date()) - # expect the same day of week, hour of day, minute, second, ... - self.assertTrue(t.dayofweek == tstart.dayofweek and t.hour == - tstart.hour and t.minute == tstart.minute and - t.second == tstart.second) - elif offset_name == 'days': - # dates should match - self.assertTrue(timedelta(offset.kwds['days']) + tstart.date() == - t.date()) - # expect the same hour of day, minute, second, ... - self.assertTrue(t.hour == tstart.hour and - t.minute == tstart.minute and - t.second == tstart.second) - elif offset_name in self.valid_date_offsets_singular: - # expect the signular offset value to match between tstart and t - datepart_offset = getattr(t, offset_name - if offset_name != 'weekday' else - 'dayofweek') - self.assertTrue(datepart_offset == offset.kwds[offset_name]) - else: - # the offset should be the same as if it was done in UTC - self.assertTrue(t == (tstart.tz_convert('UTC') + offset - ).tz_convert('US/Pacific')) - - def _make_timestamp(self, string, hrs_offset, tz): - if hrs_offset >= 0: - offset_string = '{hrs:02d}00'.format(hrs=hrs_offset) - else: - offset_string = '-{hrs:02d}00'.format(hrs=-1 * hrs_offset) - return Timestamp(string + offset_string).tz_convert(tz) - - def test_fallback_plural(self): - # test moving from daylight savings to standard time - import dateutil - for tz, utc_offsets in self.timezone_utc_offsets.items(): - hrs_pre = utc_offsets['utc_offset_daylight'] - hrs_post = utc_offsets['utc_offset_standard'] - - if dateutil.__version__ != LooseVersion('2.6.0'): - # buggy ambiguous behavior in 2.6.0 - # GH 14621 - # https://github.com/dateutil/dateutil/issues/321 - self._test_all_offsets( - n=3, tstart=self._make_timestamp(self.ts_pre_fallback, - hrs_pre, tz), - expected_utc_offset=hrs_post) - - def test_springforward_plural(self): - # test moving from standard to daylight savings - for tz, utc_offsets in self.timezone_utc_offsets.items(): - hrs_pre = utc_offsets['utc_offset_standard'] - hrs_post = utc_offsets['utc_offset_daylight'] - self._test_all_offsets( - n=3, tstart=self._make_timestamp(self.ts_pre_springfwd, - hrs_pre, tz), - expected_utc_offset=hrs_post) - - def test_fallback_singular(self): - # in the case of signular offsets, we dont neccesarily know which utc - # offset the new Timestamp will wind up in (the tz for 1 month may be - # different from 1 second) so we don't specify an expected_utc_offset - for tz, utc_offsets in self.timezone_utc_offsets.items(): - hrs_pre = utc_offsets['utc_offset_standard'] - self._test_all_offsets(n=1, tstart=self._make_timestamp( - self.ts_pre_fallback, hrs_pre, tz), expected_utc_offset=None) - - def test_springforward_singular(self): - for tz, utc_offsets in self.timezone_utc_offsets.items(): - hrs_pre = utc_offsets['utc_offset_standard'] - self._test_all_offsets(n=1, tstart=self._make_timestamp( - self.ts_pre_springfwd, hrs_pre, tz), expected_utc_offset=None) - - def test_all_offset_classes(self): - tests = {MonthBegin: ['11/2/2012', '12/1/2012'], - MonthEnd: ['11/2/2012', '11/30/2012'], - BMonthBegin: ['11/2/2012', '12/3/2012'], - BMonthEnd: ['11/2/2012', '11/30/2012'], - CBMonthBegin: ['11/2/2012', '12/3/2012'], - CBMonthEnd: ['11/2/2012', '11/30/2012'], - SemiMonthBegin: ['11/2/2012', '11/15/2012'], - SemiMonthEnd: ['11/2/2012', '11/15/2012'], - Week: ['11/2/2012', '11/9/2012'], - YearBegin: ['11/2/2012', '1/1/2013'], - YearEnd: ['11/2/2012', '12/31/2012'], - BYearBegin: ['11/2/2012', '1/1/2013'], - BYearEnd: ['11/2/2012', '12/31/2012'], - QuarterBegin: ['11/2/2012', '12/1/2012'], - QuarterEnd: ['11/2/2012', '12/31/2012'], - BQuarterBegin: ['11/2/2012', '12/3/2012'], - BQuarterEnd: ['11/2/2012', '12/31/2012'], - Day: ['11/4/2012', '11/4/2012 23:00']} - - for offset, test_values in iteritems(tests): - first = Timestamp(test_values[0], tz='US/Eastern') + offset() - second = Timestamp(test_values[1], tz='US/Eastern') - self.assertEqual(first, second, msg=str(offset)) diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py deleted file mode 100644 index 771fb2f50c410..0000000000000 --- a/pandas/tests/tseries/test_timezones.py +++ /dev/null @@ -1,1727 +0,0 @@ -# pylint: disable-msg=E1101,W0612 -import pytz -import numpy as np -from distutils.version import LooseVersion -from datetime import datetime, timedelta, tzinfo, date -from pytz import NonExistentTimeError - -import pandas.util.testing as tm -import pandas.tseries.tools as tools -import pandas.tseries.offsets as offsets -from pandas.compat import lrange, zip -from pandas.tseries.index import bdate_range, date_range -from pandas.types.dtypes import DatetimeTZDtype -from pandas import (Index, Series, DataFrame, isnull, Timestamp, tslib, NaT, - DatetimeIndex, to_datetime) -from pandas.util.testing import (assert_frame_equal, assert_series_equal, - set_timezone) - -try: - import pytz # noqa -except ImportError: - pass - -try: - import dateutil -except ImportError: - pass - - -class FixedOffset(tzinfo): - """Fixed offset in minutes east from UTC.""" - - def __init__(self, offset, name): - self.__offset = timedelta(minutes=offset) - self.__name = name - - def utcoffset(self, dt): - return self.__offset - - def tzname(self, dt): - return self.__name - - def dst(self, dt): - return timedelta(0) - - -fixed_off = FixedOffset(-420, '-07:00') -fixed_off_no_name = FixedOffset(-330, None) - - -class TestTimeZoneSupportPytz(tm.TestCase): - - def setUp(self): - tm._skip_if_no_pytz() - - def tz(self, tz): - # Construct a timezone object from a string. Overridden in subclass to - # parameterize tests. - return pytz.timezone(tz) - - def tzstr(self, tz): - # Construct a timezone string from a string. Overridden in subclass to - # parameterize tests. - return tz - - def localize(self, tz, x): - return tz.localize(x) - - def cmptz(self, tz1, tz2): - # Compare two timezones. Overridden in subclass to parameterize - # tests. - return tz1.zone == tz2.zone - - def test_utc_to_local_no_modify(self): - rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') - rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) - - # Values are unmodified - self.assertTrue(np.array_equal(rng.asi8, rng_eastern.asi8)) - - self.assertTrue(self.cmptz(rng_eastern.tz, self.tz('US/Eastern'))) - - def test_utc_to_local_no_modify_explicit(self): - rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') - rng_eastern = rng.tz_convert(self.tz('US/Eastern')) - - # Values are unmodified - self.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) - - self.assertEqual(rng_eastern.tz, self.tz('US/Eastern')) - - def test_localize_utc_conversion(self): - # Localizing to time zone should: - # 1) check for DST ambiguities - # 2) convert to UTC - - rng = date_range('3/10/2012', '3/11/2012', freq='30T') - - converted = rng.tz_localize(self.tzstr('US/Eastern')) - expected_naive = rng + offsets.Hour(5) - self.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) - - # DST ambiguity, this should fail - rng = date_range('3/11/2012', '3/12/2012', freq='30T') - # Is this really how it should fail?? - self.assertRaises(NonExistentTimeError, rng.tz_localize, - self.tzstr('US/Eastern')) - - def test_localize_utc_conversion_explicit(self): - # Localizing to time zone should: - # 1) check for DST ambiguities - # 2) convert to UTC - - rng = date_range('3/10/2012', '3/11/2012', freq='30T') - converted = rng.tz_localize(self.tz('US/Eastern')) - expected_naive = rng + offsets.Hour(5) - self.assertTrue(np.array_equal(converted.asi8, expected_naive.asi8)) - - # DST ambiguity, this should fail - rng = date_range('3/11/2012', '3/12/2012', freq='30T') - # Is this really how it should fail?? - self.assertRaises(NonExistentTimeError, rng.tz_localize, - self.tz('US/Eastern')) - - def test_timestamp_tz_localize(self): - stamp = Timestamp('3/11/2012 04:00') - - result = stamp.tz_localize(self.tzstr('US/Eastern')) - expected = Timestamp('3/11/2012 04:00', tz=self.tzstr('US/Eastern')) - self.assertEqual(result.hour, expected.hour) - self.assertEqual(result, expected) - - def test_timestamp_tz_localize_explicit(self): - stamp = Timestamp('3/11/2012 04:00') - - result = stamp.tz_localize(self.tz('US/Eastern')) - expected = Timestamp('3/11/2012 04:00', tz=self.tz('US/Eastern')) - self.assertEqual(result.hour, expected.hour) - self.assertEqual(result, expected) - - def test_timestamp_constructed_by_date_and_tz(self): - # Fix Issue 2993, Timestamp cannot be constructed by datetime.date - # and tz correctly - - result = Timestamp(date(2012, 3, 11), tz=self.tzstr('US/Eastern')) - - expected = Timestamp('3/11/2012', tz=self.tzstr('US/Eastern')) - self.assertEqual(result.hour, expected.hour) - self.assertEqual(result, expected) - - def test_timestamp_constructed_by_date_and_tz_explicit(self): - # Fix Issue 2993, Timestamp cannot be constructed by datetime.date - # and tz correctly - - result = Timestamp(date(2012, 3, 11), tz=self.tz('US/Eastern')) - - expected = Timestamp('3/11/2012', tz=self.tz('US/Eastern')) - self.assertEqual(result.hour, expected.hour) - self.assertEqual(result, expected) - - def test_timestamp_to_datetime_tzoffset(self): - # tzoffset - from dateutil.tz import tzoffset - tzinfo = tzoffset(None, 7200) - expected = Timestamp('3/11/2012 04:00', tz=tzinfo) - result = Timestamp(expected.to_pydatetime()) - self.assertEqual(expected, result) - - def test_timedelta_push_over_dst_boundary(self): - # #1389 - - # 4 hours before DST transition - stamp = Timestamp('3/10/2012 22:00', tz=self.tzstr('US/Eastern')) - - result = stamp + timedelta(hours=6) - - # spring forward, + "7" hours - expected = Timestamp('3/11/2012 05:00', tz=self.tzstr('US/Eastern')) - - self.assertEqual(result, expected) - - def test_timedelta_push_over_dst_boundary_explicit(self): - # #1389 - - # 4 hours before DST transition - stamp = Timestamp('3/10/2012 22:00', tz=self.tz('US/Eastern')) - - result = stamp + timedelta(hours=6) - - # spring forward, + "7" hours - expected = Timestamp('3/11/2012 05:00', tz=self.tz('US/Eastern')) - - self.assertEqual(result, expected) - - def test_tz_localize_dti(self): - dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256', - freq='L') - dti2 = dti.tz_localize(self.tzstr('US/Eastern')) - - dti_utc = DatetimeIndex(start='1/1/2005 05:00', - end='1/1/2005 5:00:30.256', freq='L', tz='utc') - - self.assert_numpy_array_equal(dti2.values, dti_utc.values) - - dti3 = dti2.tz_convert(self.tzstr('US/Pacific')) - self.assert_numpy_array_equal(dti3.values, dti_utc.values) - - dti = DatetimeIndex(start='11/6/2011 1:59', end='11/6/2011 2:00', - freq='L') - self.assertRaises(pytz.AmbiguousTimeError, dti.tz_localize, - self.tzstr('US/Eastern')) - - dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00', - freq='L') - self.assertRaises(pytz.NonExistentTimeError, dti.tz_localize, - self.tzstr('US/Eastern')) - - def test_tz_localize_empty_series(self): - # #2248 - - ts = Series() - - ts2 = ts.tz_localize('utc') - self.assertTrue(ts2.index.tz == pytz.utc) - - ts2 = ts.tz_localize(self.tzstr('US/Eastern')) - self.assertTrue(self.cmptz(ts2.index.tz, self.tz('US/Eastern'))) - - def test_astimezone(self): - utc = Timestamp('3/11/2012 22:00', tz='UTC') - expected = utc.tz_convert(self.tzstr('US/Eastern')) - result = utc.astimezone(self.tzstr('US/Eastern')) - self.assertEqual(expected, result) - tm.assertIsInstance(result, Timestamp) - - def test_create_with_tz(self): - stamp = Timestamp('3/11/2012 05:00', tz=self.tzstr('US/Eastern')) - self.assertEqual(stamp.hour, 5) - - rng = date_range('3/11/2012 04:00', periods=10, freq='H', - tz=self.tzstr('US/Eastern')) - - self.assertEqual(stamp, rng[1]) - - utc_stamp = Timestamp('3/11/2012 05:00', tz='utc') - self.assertIs(utc_stamp.tzinfo, pytz.utc) - self.assertEqual(utc_stamp.hour, 5) - - stamp = Timestamp('3/11/2012 05:00').tz_localize('utc') - self.assertEqual(utc_stamp.hour, 5) - - def test_create_with_fixed_tz(self): - off = FixedOffset(420, '+07:00') - start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) - end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) - rng = date_range(start=start, end=end) - self.assertEqual(off, rng.tz) - - rng2 = date_range(start, periods=len(rng), tz=off) - self.assert_index_equal(rng, rng2) - - rng3 = date_range('3/11/2012 05:00:00+07:00', - '6/11/2012 05:00:00+07:00') - self.assertTrue((rng.values == rng3.values).all()) - - def test_create_with_fixedoffset_noname(self): - off = fixed_off_no_name - start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) - end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) - rng = date_range(start=start, end=end) - self.assertEqual(off, rng.tz) - - idx = Index([start, end]) - self.assertEqual(off, idx.tz) - - def test_date_range_localize(self): - rng = date_range('3/11/2012 03:00', periods=15, freq='H', - tz='US/Eastern') - rng2 = DatetimeIndex(['3/11/2012 03:00', '3/11/2012 04:00'], - tz='US/Eastern') - rng3 = date_range('3/11/2012 03:00', periods=15, freq='H') - rng3 = rng3.tz_localize('US/Eastern') - - self.assert_index_equal(rng, rng3) - - # DST transition time - val = rng[0] - exp = Timestamp('3/11/2012 03:00', tz='US/Eastern') - - self.assertEqual(val.hour, 3) - self.assertEqual(exp.hour, 3) - self.assertEqual(val, exp) # same UTC value - self.assert_index_equal(rng[:2], rng2) - - # Right before the DST transition - rng = date_range('3/11/2012 00:00', periods=2, freq='H', - tz='US/Eastern') - rng2 = DatetimeIndex(['3/11/2012 00:00', '3/11/2012 01:00'], - tz='US/Eastern') - self.assert_index_equal(rng, rng2) - exp = Timestamp('3/11/2012 00:00', tz='US/Eastern') - self.assertEqual(exp.hour, 0) - self.assertEqual(rng[0], exp) - exp = Timestamp('3/11/2012 01:00', tz='US/Eastern') - self.assertEqual(exp.hour, 1) - self.assertEqual(rng[1], exp) - - rng = date_range('3/11/2012 00:00', periods=10, freq='H', - tz='US/Eastern') - self.assertEqual(rng[2].hour, 3) - - def test_utc_box_timestamp_and_localize(self): - rng = date_range('3/11/2012', '3/12/2012', freq='H', tz='utc') - rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) - - tz = self.tz('US/Eastern') - expected = rng[-1].astimezone(tz) - - stamp = rng_eastern[-1] - self.assertEqual(stamp, expected) - self.assertEqual(stamp.tzinfo, expected.tzinfo) - - # right tzinfo - rng = date_range('3/13/2012', '3/14/2012', freq='H', tz='utc') - rng_eastern = rng.tz_convert(self.tzstr('US/Eastern')) - # test not valid for dateutil timezones. - # self.assertIn('EDT', repr(rng_eastern[0].tzinfo)) - self.assertTrue('EDT' in repr(rng_eastern[0].tzinfo) or 'tzfile' in - repr(rng_eastern[0].tzinfo)) - - def test_timestamp_tz_convert(self): - strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] - idx = DatetimeIndex(strdates, tz=self.tzstr('US/Eastern')) - - conv = idx[0].tz_convert(self.tzstr('US/Pacific')) - expected = idx.tz_convert(self.tzstr('US/Pacific'))[0] - - self.assertEqual(conv, expected) - - def test_pass_dates_localize_to_utc(self): - strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] - - idx = DatetimeIndex(strdates) - conv = idx.tz_localize(self.tzstr('US/Eastern')) - - fromdates = DatetimeIndex(strdates, tz=self.tzstr('US/Eastern')) - - self.assertEqual(conv.tz, fromdates.tz) - self.assert_numpy_array_equal(conv.values, fromdates.values) - - def test_field_access_localize(self): - strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] - rng = DatetimeIndex(strdates, tz=self.tzstr('US/Eastern')) - self.assertTrue((rng.hour == 0).all()) - - # a more unusual time zone, #1946 - dr = date_range('2011-10-02 00:00', freq='h', periods=10, - tz=self.tzstr('America/Atikokan')) - - expected = np.arange(10, dtype=np.int32) - self.assert_numpy_array_equal(dr.hour, expected) - - def test_with_tz(self): - tz = self.tz('US/Central') - - # just want it to work - start = datetime(2011, 3, 12, tzinfo=pytz.utc) - dr = bdate_range(start, periods=50, freq=offsets.Hour()) - self.assertIs(dr.tz, pytz.utc) - - # DateRange with naive datetimes - dr = bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc) - dr = bdate_range('1/1/2005', '1/1/2009', tz=tz) - - # normalized - central = dr.tz_convert(tz) - self.assertIs(central.tz, tz) - comp = self.localize(tz, central[0].to_pydatetime().replace( - tzinfo=None)).tzinfo - self.assertIs(central[0].tz, comp) - - # compare vs a localized tz - comp = self.localize(tz, - dr[0].to_pydatetime().replace(tzinfo=None)).tzinfo - self.assertIs(central[0].tz, comp) - - # datetimes with tzinfo set - dr = bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), - '1/1/2009', tz=pytz.utc) - - self.assertRaises(Exception, bdate_range, - datetime(2005, 1, 1, tzinfo=pytz.utc), '1/1/2009', - tz=tz) - - def test_tz_localize(self): - dr = bdate_range('1/1/2009', '1/1/2010') - dr_utc = bdate_range('1/1/2009', '1/1/2010', tz=pytz.utc) - localized = dr.tz_localize(pytz.utc) - self.assert_index_equal(dr_utc, localized) - - def test_with_tz_ambiguous_times(self): - tz = self.tz('US/Eastern') - - # March 13, 2011, spring forward, skip from 2 AM to 3 AM - dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, - freq=offsets.Hour()) - self.assertRaises(pytz.NonExistentTimeError, dr.tz_localize, tz) - - # after dst transition, it works - dr = date_range(datetime(2011, 3, 13, 3, 30), periods=3, - freq=offsets.Hour(), tz=tz) - - # November 6, 2011, fall back, repeat 2 AM hour - dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, - freq=offsets.Hour()) - self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz) - - # UTC is OK - dr = date_range(datetime(2011, 3, 13), periods=48, - freq=offsets.Minute(30), tz=pytz.utc) - - def test_ambiguous_infer(self): - # November 6, 2011, fall back, repeat 2 AM hour - # With no repeated hours, we cannot infer the transition - tz = self.tz('US/Eastern') - dr = date_range(datetime(2011, 11, 6, 0), periods=5, - freq=offsets.Hour()) - self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz) - - # With repeated hours, we can infer the transition - dr = date_range(datetime(2011, 11, 6, 0), periods=5, - freq=offsets.Hour(), tz=tz) - times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', - '11/06/2011 02:00', '11/06/2011 03:00'] - di = DatetimeIndex(times) - localized = di.tz_localize(tz, ambiguous='infer') - self.assert_index_equal(dr, localized) - with tm.assert_produces_warning(FutureWarning): - localized_old = di.tz_localize(tz, infer_dst=True) - self.assert_index_equal(dr, localized_old) - self.assert_index_equal(dr, DatetimeIndex(times, tz=tz, - ambiguous='infer')) - - # When there is no dst transition, nothing special happens - dr = date_range(datetime(2011, 6, 1, 0), periods=10, - freq=offsets.Hour()) - localized = dr.tz_localize(tz) - localized_infer = dr.tz_localize(tz, ambiguous='infer') - self.assert_index_equal(localized, localized_infer) - with tm.assert_produces_warning(FutureWarning): - localized_infer_old = dr.tz_localize(tz, infer_dst=True) - self.assert_index_equal(localized, localized_infer_old) - - def test_ambiguous_flags(self): - # November 6, 2011, fall back, repeat 2 AM hour - tz = self.tz('US/Eastern') - - # Pass in flags to determine right dst transition - dr = date_range(datetime(2011, 11, 6, 0), periods=5, - freq=offsets.Hour(), tz=tz) - times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', - '11/06/2011 02:00', '11/06/2011 03:00'] - - # Test tz_localize - di = DatetimeIndex(times) - is_dst = [1, 1, 0, 0, 0] - localized = di.tz_localize(tz, ambiguous=is_dst) - self.assert_index_equal(dr, localized) - self.assert_index_equal(dr, DatetimeIndex(times, tz=tz, - ambiguous=is_dst)) - - localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) - self.assert_index_equal(dr, localized) - - localized = di.tz_localize(tz, - ambiguous=np.array(is_dst).astype('bool')) - self.assert_index_equal(dr, localized) - - # Test constructor - localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) - self.assert_index_equal(dr, localized) - - # Test duplicate times where infer_dst fails - times += times - di = DatetimeIndex(times) - - # When the sizes are incompatible, make sure error is raised - self.assertRaises(Exception, di.tz_localize, tz, ambiguous=is_dst) - - # When sizes are compatible and there are repeats ('infer' won't work) - is_dst = np.hstack((is_dst, is_dst)) - localized = di.tz_localize(tz, ambiguous=is_dst) - dr = dr.append(dr) - self.assert_index_equal(dr, localized) - - # When there is no dst transition, nothing special happens - dr = date_range(datetime(2011, 6, 1, 0), periods=10, - freq=offsets.Hour()) - is_dst = np.array([1] * 10) - localized = dr.tz_localize(tz) - localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) - self.assert_index_equal(localized, localized_is_dst) - - # construction with an ambiguous end-point - # GH 11626 - tz = self.tzstr("Europe/London") - - def f(): - date_range("2013-10-26 23:00", "2013-10-27 01:00", - tz="Europe/London", freq="H") - self.assertRaises(pytz.AmbiguousTimeError, f) - - times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H", - tz=tz, ambiguous='infer') - self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz, - freq="H")) - if dateutil.__version__ != LooseVersion('2.6.0'): - # GH 14621 - self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz, - freq="H")) - - def test_ambiguous_nat(self): - tz = self.tz('US/Eastern') - times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', - '11/06/2011 02:00', '11/06/2011 03:00'] - di = DatetimeIndex(times) - localized = di.tz_localize(tz, ambiguous='NaT') - - times = ['11/06/2011 00:00', np.NaN, np.NaN, '11/06/2011 02:00', - '11/06/2011 03:00'] - di_test = DatetimeIndex(times, tz='US/Eastern') - - # left dtype is datetime64[ns, US/Eastern] - # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] - self.assert_numpy_array_equal(di_test.values, localized.values) - - def test_ambiguous_bool(self): - # make sure that we are correctly accepting bool values as ambiguous - - # gh-14402 - t = Timestamp('2015-11-01 01:00:03') - expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') - expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') - - def f(): - t.tz_localize('US/Central') - self.assertRaises(pytz.AmbiguousTimeError, f) - - result = t.tz_localize('US/Central', ambiguous=True) - self.assertEqual(result, expected0) - - result = t.tz_localize('US/Central', ambiguous=False) - self.assertEqual(result, expected1) - - s = Series([t]) - expected0 = Series([expected0]) - expected1 = Series([expected1]) - - def f(): - s.dt.tz_localize('US/Central') - self.assertRaises(pytz.AmbiguousTimeError, f) - - result = s.dt.tz_localize('US/Central', ambiguous=True) - assert_series_equal(result, expected0) - - result = s.dt.tz_localize('US/Central', ambiguous=[True]) - assert_series_equal(result, expected0) - - result = s.dt.tz_localize('US/Central', ambiguous=False) - assert_series_equal(result, expected1) - - result = s.dt.tz_localize('US/Central', ambiguous=[False]) - assert_series_equal(result, expected1) - - def test_nonexistent_raise_coerce(self): - # See issue 13057 - from pytz.exceptions import NonExistentTimeError - times = ['2015-03-08 01:00', '2015-03-08 02:00', '2015-03-08 03:00'] - index = DatetimeIndex(times) - tz = 'US/Eastern' - self.assertRaises(NonExistentTimeError, - index.tz_localize, tz=tz) - self.assertRaises(NonExistentTimeError, - index.tz_localize, tz=tz, errors='raise') - result = index.tz_localize(tz=tz, errors='coerce') - test_times = ['2015-03-08 01:00-05:00', 'NaT', - '2015-03-08 03:00-04:00'] - expected = DatetimeIndex(test_times)\ - .tz_localize('UTC').tz_convert('US/Eastern') - tm.assert_index_equal(result, expected) - - # test utility methods - def test_infer_tz(self): - eastern = self.tz('US/Eastern') - utc = pytz.utc - - _start = datetime(2001, 1, 1) - _end = datetime(2009, 1, 1) - - start = self.localize(eastern, _start) - end = self.localize(eastern, _end) - assert (tools._infer_tzinfo(start, end) is self.localize( - eastern, _start).tzinfo) - assert (tools._infer_tzinfo(start, None) is self.localize( - eastern, _start).tzinfo) - assert (tools._infer_tzinfo(None, end) is self.localize(eastern, - _end).tzinfo) - - start = utc.localize(_start) - end = utc.localize(_end) - assert (tools._infer_tzinfo(start, end) is utc) - - end = self.localize(eastern, _end) - self.assertRaises(Exception, tools._infer_tzinfo, start, end) - self.assertRaises(Exception, tools._infer_tzinfo, end, start) - - def test_tz_string(self): - result = date_range('1/1/2000', periods=10, - tz=self.tzstr('US/Eastern')) - expected = date_range('1/1/2000', periods=10, tz=self.tz('US/Eastern')) - - self.assert_index_equal(result, expected) - - def test_take_dont_lose_meta(self): - tm._skip_if_no_pytz() - rng = date_range('1/1/2000', periods=20, tz=self.tzstr('US/Eastern')) - - result = rng.take(lrange(5)) - self.assertEqual(result.tz, rng.tz) - self.assertEqual(result.freq, rng.freq) - - def test_index_with_timezone_repr(self): - rng = date_range('4/13/2010', '5/6/2010') - - rng_eastern = rng.tz_localize(self.tzstr('US/Eastern')) - - rng_repr = repr(rng_eastern) - self.assertIn('2010-04-13 00:00:00', rng_repr) - - def test_index_astype_asobject_tzinfos(self): - # #1345 - - # dates around a dst transition - rng = date_range('2/13/2010', '5/6/2010', tz=self.tzstr('US/Eastern')) - - objs = rng.asobject - for i, x in enumerate(objs): - exval = rng[i] - self.assertEqual(x, exval) - self.assertEqual(x.tzinfo, exval.tzinfo) - - objs = rng.astype(object) - for i, x in enumerate(objs): - exval = rng[i] - self.assertEqual(x, exval) - self.assertEqual(x.tzinfo, exval.tzinfo) - - def test_localized_at_time_between_time(self): - from datetime import time - - rng = date_range('4/16/2012', '5/1/2012', freq='H') - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_local = ts.tz_localize(self.tzstr('US/Eastern')) - - result = ts_local.at_time(time(10, 0)) - expected = ts.at_time(time(10, 0)).tz_localize(self.tzstr( - 'US/Eastern')) - assert_series_equal(result, expected) - self.assertTrue(self.cmptz(result.index.tz, self.tz('US/Eastern'))) - - t1, t2 = time(10, 0), time(11, 0) - result = ts_local.between_time(t1, t2) - expected = ts.between_time(t1, - t2).tz_localize(self.tzstr('US/Eastern')) - assert_series_equal(result, expected) - self.assertTrue(self.cmptz(result.index.tz, self.tz('US/Eastern'))) - - def test_string_index_alias_tz_aware(self): - rng = date_range('1/1/2000', periods=10, tz=self.tzstr('US/Eastern')) - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts['1/3/2000'] - self.assertAlmostEqual(result, ts[2]) - - def test_fixed_offset(self): - dates = [datetime(2000, 1, 1, tzinfo=fixed_off), - datetime(2000, 1, 2, tzinfo=fixed_off), - datetime(2000, 1, 3, tzinfo=fixed_off)] - result = to_datetime(dates) - self.assertEqual(result.tz, fixed_off) - - def test_fixedtz_topydatetime(self): - dates = np.array([datetime(2000, 1, 1, tzinfo=fixed_off), - datetime(2000, 1, 2, tzinfo=fixed_off), - datetime(2000, 1, 3, tzinfo=fixed_off)]) - result = to_datetime(dates).to_pydatetime() - self.assert_numpy_array_equal(dates, result) - result = to_datetime(dates)._mpl_repr() - self.assert_numpy_array_equal(dates, result) - - def test_convert_tz_aware_datetime_datetime(self): - # #1581 - - tz = self.tz('US/Eastern') - - dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), - datetime(2000, 1, 3)] - - dates_aware = [self.localize(tz, x) for x in dates] - result = to_datetime(dates_aware) - self.assertTrue(self.cmptz(result.tz, self.tz('US/Eastern'))) - - converted = to_datetime(dates_aware, utc=True) - ex_vals = np.array([Timestamp(x).value for x in dates_aware]) - self.assert_numpy_array_equal(converted.asi8, ex_vals) - self.assertIs(converted.tz, pytz.utc) - - def test_to_datetime_utc(self): - from dateutil.parser import parse - arr = np.array([parse('2012-06-13T01:39:00Z')], dtype=object) - - result = to_datetime(arr, utc=True) - self.assertIs(result.tz, pytz.utc) - - def test_to_datetime_tzlocal(self): - from dateutil.parser import parse - from dateutil.tz import tzlocal - dt = parse('2012-06-13T01:39:00Z') - dt = dt.replace(tzinfo=tzlocal()) - - arr = np.array([dt], dtype=object) - - result = to_datetime(arr, utc=True) - self.assertIs(result.tz, pytz.utc) - - rng = date_range('2012-11-03 03:00', '2012-11-05 03:00', tz=tzlocal()) - arr = rng.to_pydatetime() - result = to_datetime(arr, utc=True) - self.assertIs(result.tz, pytz.utc) - - def test_frame_no_datetime64_dtype(self): - - # after 7822 - # these retain the timezones on dict construction - - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - dr_tz = dr.tz_localize(self.tzstr('US/Eastern')) - e = DataFrame({'A': 'foo', 'B': dr_tz}, index=dr) - tz_expected = DatetimeTZDtype('ns', dr_tz.tzinfo) - self.assertEqual(e['B'].dtype, tz_expected) - - # GH 2810 (with timezones) - datetimes_naive = [ts.to_pydatetime() for ts in dr] - datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] - df = DataFrame({'dr': dr, - 'dr_tz': dr_tz, - 'datetimes_naive': datetimes_naive, - 'datetimes_with_tz': datetimes_with_tz}) - result = df.get_dtype_counts().sort_index() - expected = Series({'datetime64[ns]': 2, - str(tz_expected): 2}).sort_index() - assert_series_equal(result, expected) - - def test_hongkong_tz_convert(self): - # #1673 - dr = date_range('2012-01-01', '2012-01-10', freq='D', tz='Hongkong') - - # it works! - dr.hour - - def test_tz_convert_unsorted(self): - dr = date_range('2012-03-09', freq='H', periods=100, tz='utc') - dr = dr.tz_convert(self.tzstr('US/Eastern')) - - result = dr[::-1].hour - exp = dr.hour[::-1] - tm.assert_almost_equal(result, exp) - - def test_shift_localized(self): - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - dr_tz = dr.tz_localize(self.tzstr('US/Eastern')) - - result = dr_tz.shift(1, '10T') - self.assertEqual(result.tz, dr_tz.tz) - - def test_tz_aware_asfreq(self): - dr = date_range('2011-12-01', '2012-07-20', freq='D', - tz=self.tzstr('US/Eastern')) - - s = Series(np.random.randn(len(dr)), index=dr) - - # it works! - s.asfreq('T') - - def test_static_tzinfo(self): - # it works! - index = DatetimeIndex([datetime(2012, 1, 1)], tz=self.tzstr('EST')) - index.hour - index[0] - - def test_tzaware_datetime_to_index(self): - d = [datetime(2012, 8, 19, tzinfo=self.tz('US/Eastern'))] - - index = DatetimeIndex(d) - self.assertTrue(self.cmptz(index.tz, self.tz('US/Eastern'))) - - def test_date_range_span_dst_transition(self): - # #1778 - - # Standard -> Daylight Savings Time - dr = date_range('03/06/2012 00:00', periods=200, freq='W-FRI', - tz='US/Eastern') - - self.assertTrue((dr.hour == 0).all()) - - dr = date_range('2012-11-02', periods=10, tz=self.tzstr('US/Eastern')) - self.assertTrue((dr.hour == 0).all()) - - def test_convert_datetime_list(self): - dr = date_range('2012-06-02', periods=10, - tz=self.tzstr('US/Eastern'), name='foo') - dr2 = DatetimeIndex(list(dr), name='foo') - self.assert_index_equal(dr, dr2) - self.assertEqual(dr.tz, dr2.tz) - self.assertEqual(dr2.name, 'foo') - - def test_frame_from_records_utc(self): - rec = {'datum': 1.5, - 'begin_time': datetime(2006, 4, 27, tzinfo=pytz.utc)} - - # it works - DataFrame.from_records([rec], index='begin_time') - - def test_frame_reset_index(self): - dr = date_range('2012-06-02', periods=10, tz=self.tzstr('US/Eastern')) - df = DataFrame(np.random.randn(len(dr)), dr) - roundtripped = df.reset_index().set_index('index') - xp = df.index.tz - rs = roundtripped.index.tz - self.assertEqual(xp, rs) - - def test_dateutil_tzoffset_support(self): - from dateutil.tz import tzoffset - values = [188.5, 328.25] - tzinfo = tzoffset(None, 7200) - index = [datetime(2012, 5, 11, 11, tzinfo=tzinfo), - datetime(2012, 5, 11, 12, tzinfo=tzinfo)] - series = Series(data=values, index=index) - - self.assertEqual(series.index.tz, tzinfo) - - # it works! #2443 - repr(series.index[0]) - - def test_getitem_pydatetime_tz(self): - index = date_range(start='2012-12-24 16:00', end='2012-12-24 18:00', - freq='H', tz=self.tzstr('Europe/Berlin')) - ts = Series(index=index, data=index.hour) - time_pandas = Timestamp('2012-12-24 17:00', - tz=self.tzstr('Europe/Berlin')) - time_datetime = self.localize( - self.tz('Europe/Berlin'), datetime(2012, 12, 24, 17, 0)) - self.assertEqual(ts[time_pandas], ts[time_datetime]) - - def test_index_drop_dont_lose_tz(self): - # #2621 - ind = date_range("2012-12-01", periods=10, tz="utc") - ind = ind.drop(ind[-1]) - - self.assertTrue(ind.tz is not None) - - def test_datetimeindex_tz(self): - """ Test different DatetimeIndex constructions with timezone - Follow-up of #4229 - """ - - arr = ['11/10/2005 08:00:00', '11/10/2005 09:00:00'] - - idx1 = to_datetime(arr).tz_localize(self.tzstr('US/Eastern')) - idx2 = DatetimeIndex(start="2005-11-10 08:00:00", freq='H', periods=2, - tz=self.tzstr('US/Eastern')) - idx3 = DatetimeIndex(arr, tz=self.tzstr('US/Eastern')) - idx4 = DatetimeIndex(np.array(arr), tz=self.tzstr('US/Eastern')) - - for other in [idx2, idx3, idx4]: - self.assert_index_equal(idx1, other) - - def test_datetimeindex_tz_nat(self): - idx = to_datetime([Timestamp("2013-1-1", tz=self.tzstr('US/Eastern')), - NaT]) - - self.assertTrue(isnull(idx[1])) - self.assertTrue(idx[0].tzinfo is not None) - - -class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): - - def setUp(self): - tm._skip_if_no_dateutil() - - def tz(self, tz): - """ - Construct a dateutil timezone. - Use tslib.maybe_get_tz so that we get the filename on the tz right - on windows. See #7337. - """ - return tslib.maybe_get_tz('dateutil/' + tz) - - def tzstr(self, tz): - """ Construct a timezone string from a string. Overridden in subclass - to parameterize tests. """ - return 'dateutil/' + tz - - def cmptz(self, tz1, tz2): - """ Compare two timezones. Overridden in subclass to parameterize - tests. """ - return tz1 == tz2 - - def localize(self, tz, x): - return x.replace(tzinfo=tz) - - def test_utc_with_system_utc(self): - # Skipped on win32 due to dateutil bug - tm._skip_if_windows() - - from pandas.tslib import maybe_get_tz - - # from system utc to real utc - ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) - # check that the time hasn't changed. - self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) - - # from system utc to real utc - ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) - # check that the time hasn't changed. - self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) - - def test_tz_convert_hour_overflow_dst(self): - # Regression test for: - # https://github.com/pandas-dev/pandas/issues/13306 - - # sorted case US/Eastern -> UTC - ts = ['2008-05-12 09:50:00', - '2008-12-12 09:50:35', - '2009-05-12 09:50:32'] - tt = to_datetime(ts).tz_localize('US/Eastern') - ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) - - # sorted case UTC -> US/Eastern - ts = ['2008-05-12 13:50:00', - '2008-12-12 14:50:35', - '2009-05-12 13:50:32'] - tt = to_datetime(ts).tz_localize('UTC') - ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) - - # unsorted case US/Eastern -> UTC - ts = ['2008-05-12 09:50:00', - '2008-12-12 09:50:35', - '2008-05-12 09:50:32'] - tt = to_datetime(ts).tz_localize('US/Eastern') - ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) - - # unsorted case UTC -> US/Eastern - ts = ['2008-05-12 13:50:00', - '2008-12-12 14:50:35', - '2008-05-12 13:50:32'] - tt = to_datetime(ts).tz_localize('UTC') - ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) - - def test_tz_convert_hour_overflow_dst_timestamps(self): - # Regression test for: - # https://github.com/pandas-dev/pandas/issues/13306 - - tz = self.tzstr('US/Eastern') - - # sorted case US/Eastern -> UTC - ts = [Timestamp('2008-05-12 09:50:00', tz=tz), - Timestamp('2008-12-12 09:50:35', tz=tz), - Timestamp('2009-05-12 09:50:32', tz=tz)] - tt = to_datetime(ts) - ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) - - # sorted case UTC -> US/Eastern - ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), - Timestamp('2008-12-12 14:50:35', tz='UTC'), - Timestamp('2009-05-12 13:50:32', tz='UTC')] - tt = to_datetime(ts) - ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) - - # unsorted case US/Eastern -> UTC - ts = [Timestamp('2008-05-12 09:50:00', tz=tz), - Timestamp('2008-12-12 09:50:35', tz=tz), - Timestamp('2008-05-12 09:50:32', tz=tz)] - tt = to_datetime(ts) - ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) - - # unsorted case UTC -> US/Eastern - ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), - Timestamp('2008-12-12 14:50:35', tz='UTC'), - Timestamp('2008-05-12 13:50:32', tz='UTC')] - tt = to_datetime(ts) - ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) - - def test_tslib_tz_convert_trans_pos_plus_1__bug(self): - # Regression test for tslib.tz_convert(vals, tz1, tz2). - # See https://github.com/pandas-dev/pandas/issues/4496 for details. - for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: - idx = date_range(datetime(2011, 3, 26, 23), - datetime(2011, 3, 27, 1), freq=freq) - idx = idx.tz_localize('UTC') - idx = idx.tz_convert('Europe/Moscow') - - expected = np.repeat(np.array([3, 4, 5], dtype=np.int32), - np.array([n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) - - def test_tslib_tz_convert_dst(self): - for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: - # Start DST - idx = date_range('2014-03-08 23:00', '2014-03-09 09:00', freq=freq, - tz='UTC') - idx = idx.tz_convert('US/Eastern') - expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, - 0, 1, 3, 4, 5], dtype=np.int32), - np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) - - idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, - tz='US/Eastern') - idx = idx.tz_convert('UTC') - expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], - dtype=np.int32), - np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) - - # End DST - idx = date_range('2014-11-01 23:00', '2014-11-02 09:00', freq=freq, - tz='UTC') - idx = idx.tz_convert('US/Eastern') - expected = np.repeat(np.array([19, 20, 21, 22, 23, - 0, 1, 1, 2, 3, 4], dtype=np.int32), - np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) - - idx = date_range('2014-11-01 18:00', '2014-11-02 05:00', freq=freq, - tz='US/Eastern') - idx = idx.tz_convert('UTC') - expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10], dtype=np.int32), - np.array([n, n, n, n, n, n, n, n, n, - n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) - - # daily - # Start DST - idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', - tz='UTC') - idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, - np.array([19, 19], dtype=np.int32)) - - idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', - tz='US/Eastern') - idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, - np.array([5, 5], dtype=np.int32)) - - # End DST - idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', - tz='UTC') - idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, - np.array([20, 20], dtype=np.int32)) - - idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', - tz='US/Eastern') - idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, - np.array([4, 4], dtype=np.int32)) - - def test_tzlocal(self): - # GH 13583 - ts = Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()) - self.assertEqual(ts.tz, dateutil.tz.tzlocal()) - self.assertTrue("tz='tzlocal()')" in repr(ts)) - - tz = tslib.maybe_get_tz('tzlocal()') - self.assertEqual(tz, dateutil.tz.tzlocal()) - - # get offset using normal datetime for test - offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) - offset = offset.total_seconds() * 1000000000 - self.assertEqual(ts.value + offset, Timestamp('2011-01-01').value) - - def test_tz_localize_tzlocal(self): - # GH 13583 - offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) - offset = int(offset.total_seconds() * 1000000000) - - dti = date_range(start='2001-01-01', end='2001-03-01') - dti2 = dti.tz_localize(dateutil.tz.tzlocal()) - tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) - - dti = date_range(start='2001-01-01', end='2001-03-01', - tz=dateutil.tz.tzlocal()) - dti2 = dti.tz_localize(None) - tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) - - def test_tz_convert_tzlocal(self): - # GH 13583 - # tz_convert doesn't affect to internal - dti = date_range(start='2001-01-01', end='2001-03-01', tz='UTC') - dti2 = dti.tz_convert(dateutil.tz.tzlocal()) - tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) - - dti = date_range(start='2001-01-01', end='2001-03-01', - tz=dateutil.tz.tzlocal()) - dti2 = dti.tz_convert(None) - tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) - - -class TestTimeZoneCacheKey(tm.TestCase): - - def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): - tzs = pytz.common_timezones - for tz_name in tzs: - if tz_name == 'UTC': - # skip utc as it's a special case in dateutil - continue - tz_p = tslib.maybe_get_tz(tz_name) - tz_d = tslib.maybe_get_tz('dateutil/' + tz_name) - if tz_d is None: - # skip timezones that dateutil doesn't know about. - continue - self.assertNotEqual(tslib._p_tz_cache_key( - tz_p), tslib._p_tz_cache_key(tz_d)) - - -class TestTimeZones(tm.TestCase): - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] - - def setUp(self): - tm._skip_if_no_pytz() - - def test_replace(self): - # GH 14621 - # GH 7825 - # replacing datetime components with and w/o presence of a timezone - dt = Timestamp('2016-01-01 09:00:00') - result = dt.replace(hour=0) - expected = Timestamp('2016-01-01 00:00:00') - self.assertEqual(result, expected) - - for tz in self.timezones: - dt = Timestamp('2016-01-01 09:00:00', tz=tz) - result = dt.replace(hour=0) - expected = Timestamp('2016-01-01 00:00:00', tz=tz) - self.assertEqual(result, expected) - - # we preserve nanoseconds - dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) - result = dt.replace(hour=0) - expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz) - self.assertEqual(result, expected) - - # test all - dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) - result = dt.replace(year=2015, month=2, day=2, hour=0, minute=5, - second=5, microsecond=5, nanosecond=5) - expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz) - self.assertEqual(result, expected) - - # error - def f(): - dt.replace(foo=5) - self.assertRaises(TypeError, f) - - def f(): - dt.replace(hour=0.1) - self.assertRaises(ValueError, f) - - # assert conversion to naive is the same as replacing tzinfo with None - dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') - self.assertEqual(dt.tz_localize(None), dt.replace(tzinfo=None)) - - def test_ambiguous_compat(self): - # validate that pytz and dateutil are compat for dst - # when the transition happens - tm._skip_if_no_dateutil() - tm._skip_if_no_pytz() - - pytz_zone = 'Europe/London' - dateutil_zone = 'dateutil/Europe/London' - result_pytz = (Timestamp('2013-10-27 01:00:00') - .tz_localize(pytz_zone, ambiguous=0)) - result_dateutil = (Timestamp('2013-10-27 01:00:00') - .tz_localize(dateutil_zone, ambiguous=0)) - self.assertEqual(result_pytz.value, result_dateutil.value) - self.assertEqual(result_pytz.value, 1382835600000000000) - - # dateutil 2.6 buggy w.r.t. ambiguous=0 - if dateutil.__version__ != LooseVersion('2.6.0'): - # GH 14621 - # https://github.com/dateutil/dateutil/issues/321 - self.assertEqual(result_pytz.to_pydatetime().tzname(), - result_dateutil.to_pydatetime().tzname()) - self.assertEqual(str(result_pytz), str(result_dateutil)) - - # 1 hour difference - result_pytz = (Timestamp('2013-10-27 01:00:00') - .tz_localize(pytz_zone, ambiguous=1)) - result_dateutil = (Timestamp('2013-10-27 01:00:00') - .tz_localize(dateutil_zone, ambiguous=1)) - self.assertEqual(result_pytz.value, result_dateutil.value) - self.assertEqual(result_pytz.value, 1382832000000000000) - - # dateutil < 2.6 is buggy w.r.t. ambiguous timezones - if dateutil.__version__ > LooseVersion('2.5.3'): - # GH 14621 - self.assertEqual(str(result_pytz), str(result_dateutil)) - self.assertEqual(result_pytz.to_pydatetime().tzname(), - result_dateutil.to_pydatetime().tzname()) - - def test_index_equals_with_tz(self): - left = date_range('1/1/2011', periods=100, freq='H', tz='utc') - right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern') - - self.assertFalse(left.equals(right)) - - def test_tz_localize_naive(self): - rng = date_range('1/1/2011', periods=100, freq='H') - - conv = rng.tz_localize('US/Pacific') - exp = date_range('1/1/2011', periods=100, freq='H', tz='US/Pacific') - - self.assert_index_equal(conv, exp) - - def test_tz_localize_roundtrip(self): - for tz in self.timezones: - idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') - idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') - idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') - idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') - for idx in [idx1, idx2, idx3, idx4]: - localized = idx.tz_localize(tz) - expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, - tz=tz) - tm.assert_index_equal(localized, expected) - - with tm.assertRaises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - tm.assert_index_equal(reset, idx) - self.assertTrue(reset.tzinfo is None) - - def test_series_frame_tz_localize(self): - - rng = date_range('1/1/2011', periods=100, freq='H') - ts = Series(1, index=rng) - - result = ts.tz_localize('utc') - self.assertEqual(result.index.tz.zone, 'UTC') - - df = DataFrame({'a': 1}, index=rng) - result = df.tz_localize('utc') - expected = DataFrame({'a': 1}, rng.tz_localize('UTC')) - self.assertEqual(result.index.tz.zone, 'UTC') - assert_frame_equal(result, expected) - - df = df.T - result = df.tz_localize('utc', axis=1) - self.assertEqual(result.columns.tz.zone, 'UTC') - assert_frame_equal(result, expected.T) - - # Can't localize if already tz-aware - rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') - ts = Series(1, index=rng) - tm.assertRaisesRegexp(TypeError, 'Already tz-aware', ts.tz_localize, - 'US/Eastern') - - def test_series_frame_tz_convert(self): - rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern') - ts = Series(1, index=rng) - - result = ts.tz_convert('Europe/Berlin') - self.assertEqual(result.index.tz.zone, 'Europe/Berlin') - - df = DataFrame({'a': 1}, index=rng) - result = df.tz_convert('Europe/Berlin') - expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin')) - self.assertEqual(result.index.tz.zone, 'Europe/Berlin') - assert_frame_equal(result, expected) - - df = df.T - result = df.tz_convert('Europe/Berlin', axis=1) - self.assertEqual(result.columns.tz.zone, 'Europe/Berlin') - assert_frame_equal(result, expected.T) - - # can't convert tz-naive - rng = date_range('1/1/2011', periods=200, freq='D') - ts = Series(1, index=rng) - tm.assertRaisesRegexp(TypeError, "Cannot convert tz-naive", - ts.tz_convert, 'US/Eastern') - - def test_tz_convert_roundtrip(self): - for tz in self.timezones: - idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M', - tz='UTC') - exp1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') - - idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D', - tz='UTC') - exp2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') - - idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H', - tz='UTC') - exp3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') - - idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T', - tz='UTC') - exp4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') - - for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), - (idx4, exp4)]: - converted = idx.tz_convert(tz) - reset = converted.tz_convert(None) - tm.assert_index_equal(reset, expected) - self.assertTrue(reset.tzinfo is None) - tm.assert_index_equal(reset, converted.tz_convert( - 'UTC').tz_localize(None)) - - def test_join_utc_convert(self): - rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') - - left = rng.tz_convert('US/Eastern') - right = rng.tz_convert('Europe/Berlin') - - for how in ['inner', 'outer', 'left', 'right']: - result = left.join(left[:-5], how=how) - tm.assertIsInstance(result, DatetimeIndex) - self.assertEqual(result.tz, left.tz) - - result = left.join(right[:-5], how=how) - tm.assertIsInstance(result, DatetimeIndex) - self.assertEqual(result.tz.zone, 'UTC') - - def test_join_aware(self): - rng = date_range('1/1/2011', periods=10, freq='H') - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_utc = ts.tz_localize('utc') - - self.assertRaises(Exception, ts.__add__, ts_utc) - self.assertRaises(Exception, ts_utc.__add__, ts) - - test1 = DataFrame(np.zeros((6, 3)), - index=date_range("2012-11-15 00:00:00", periods=6, - freq="100L", tz="US/Central")) - test2 = DataFrame(np.zeros((3, 3)), - index=date_range("2012-11-15 00:00:00", periods=3, - freq="250L", tz="US/Central"), - columns=lrange(3, 6)) - - result = test1.join(test2, how='outer') - ex_index = test1.index.union(test2.index) - - self.assert_index_equal(result.index, ex_index) - self.assertTrue(result.index.tz.zone == 'US/Central') - - # non-overlapping - rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", - tz="US/Central") - - rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", - tz="US/Eastern") - - result = rng.union(rng2) - self.assertTrue(result.tz.zone == 'UTC') - - def test_align_aware(self): - idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern') - idx2 = date_range('2001', periods=5, freq='2H', tz='US/Eastern') - df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) - df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) - new1, new2 = df1.align(df2) - self.assertEqual(df1.index.tz, new1.index.tz) - self.assertEqual(df2.index.tz, new2.index.tz) - - # # different timezones convert to UTC - - # frame - df1_central = df1.tz_convert('US/Central') - new1, new2 = df1.align(df1_central) - self.assertEqual(new1.index.tz, pytz.UTC) - self.assertEqual(new2.index.tz, pytz.UTC) - - # series - new1, new2 = df1[0].align(df1_central[0]) - self.assertEqual(new1.index.tz, pytz.UTC) - self.assertEqual(new2.index.tz, pytz.UTC) - - # combination - new1, new2 = df1.align(df1_central[0], axis=0) - self.assertEqual(new1.index.tz, pytz.UTC) - self.assertEqual(new2.index.tz, pytz.UTC) - - df1[0].align(df1_central, axis=0) - self.assertEqual(new1.index.tz, pytz.UTC) - self.assertEqual(new2.index.tz, pytz.UTC) - - def test_append_aware(self): - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', - tz='US/Eastern') - rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', - tz='US/Eastern') - ts1 = Series([1], index=rng1) - ts2 = Series([2], index=rng2) - ts_result = ts1.append(ts2) - - exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], - tz='US/Eastern') - exp = Series([1, 2], index=exp_index) - assert_series_equal(ts_result, exp) - self.assertEqual(ts_result.index.tz, rng1.tz) - - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC') - rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='UTC') - ts1 = Series([1], index=rng1) - ts2 = Series([2], index=rng2) - ts_result = ts1.append(ts2) - - exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], - tz='UTC') - exp = Series([1, 2], index=exp_index) - assert_series_equal(ts_result, exp) - utc = rng1.tz - self.assertEqual(utc, ts_result.index.tz) - - # GH 7795 - # different tz coerces to object dtype, not UTC - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', - tz='US/Eastern') - rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', - tz='US/Central') - ts1 = Series([1], index=rng1) - ts2 = Series([2], index=rng2) - ts_result = ts1.append(ts2) - exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'), - Timestamp('1/1/2011 02:00', tz='US/Central')]) - exp = Series([1, 2], index=exp_index) - assert_series_equal(ts_result, exp) - - def test_append_dst(self): - rng1 = date_range('1/1/2016 01:00', periods=3, freq='H', - tz='US/Eastern') - rng2 = date_range('8/1/2016 01:00', periods=3, freq='H', - tz='US/Eastern') - ts1 = Series([1, 2, 3], index=rng1) - ts2 = Series([10, 11, 12], index=rng2) - ts_result = ts1.append(ts2) - - exp_index = DatetimeIndex(['2016-01-01 01:00', '2016-01-01 02:00', - '2016-01-01 03:00', '2016-08-01 01:00', - '2016-08-01 02:00', '2016-08-01 03:00'], - tz='US/Eastern') - exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) - assert_series_equal(ts_result, exp) - self.assertEqual(ts_result.index.tz, rng1.tz) - - def test_append_aware_naive(self): - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') - rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', - tz='US/Eastern') - ts1 = Series(np.random.randn(len(rng1)), index=rng1) - ts2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ts1.append(ts2) - - self.assertTrue(ts_result.index.equals(ts1.index.asobject.append( - ts2.index.asobject))) - - # mixed - rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') - rng2 = lrange(100) - ts1 = Series(np.random.randn(len(rng1)), index=rng1) - ts2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ts1.append(ts2) - self.assertTrue(ts_result.index.equals(ts1.index.asobject.append( - ts2.index))) - - def test_equal_join_ensure_utc(self): - rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_moscow = ts.tz_convert('Europe/Moscow') - - result = ts + ts_moscow - self.assertIs(result.index.tz, pytz.utc) - - result = ts_moscow + ts - self.assertIs(result.index.tz, pytz.utc) - - df = DataFrame({'a': ts}) - df_moscow = df.tz_convert('Europe/Moscow') - result = df + df_moscow - self.assertIs(result.index.tz, pytz.utc) - - result = df_moscow + df - self.assertIs(result.index.tz, pytz.utc) - - def test_arith_utc_convert(self): - rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') - - perm = np.random.permutation(100)[:90] - ts1 = Series(np.random.randn(90), - index=rng.take(perm).tz_convert('US/Eastern')) - - perm = np.random.permutation(100)[:90] - ts2 = Series(np.random.randn(90), - index=rng.take(perm).tz_convert('Europe/Berlin')) - - result = ts1 + ts2 - - uts1 = ts1.tz_convert('utc') - uts2 = ts2.tz_convert('utc') - expected = uts1 + uts2 - - self.assertEqual(result.index.tz, pytz.UTC) - assert_series_equal(result, expected) - - def test_intersection(self): - rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') - - left = rng[10:90][::-1] - right = rng[20:80][::-1] - - self.assertEqual(left.tz, rng.tz) - result = left.intersection(right) - self.assertEqual(result.tz, left.tz) - - def test_timestamp_equality_different_timezones(self): - utc_range = date_range('1/1/2000', periods=20, tz='UTC') - eastern_range = utc_range.tz_convert('US/Eastern') - berlin_range = utc_range.tz_convert('Europe/Berlin') - - for a, b, c in zip(utc_range, eastern_range, berlin_range): - self.assertEqual(a, b) - self.assertEqual(b, c) - self.assertEqual(a, c) - - self.assertTrue((utc_range == eastern_range).all()) - self.assertTrue((utc_range == berlin_range).all()) - self.assertTrue((berlin_range == eastern_range).all()) - - def test_datetimeindex_tz(self): - rng = date_range('03/12/2012 00:00', periods=10, freq='W-FRI', - tz='US/Eastern') - rng2 = DatetimeIndex(data=rng, tz='US/Eastern') - self.assert_index_equal(rng, rng2) - - def test_normalize_tz(self): - rng = date_range('1/1/2000 9:30', periods=10, freq='D', - tz='US/Eastern') - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D', - tz='US/Eastern') - self.assert_index_equal(result, expected) - - self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) - - rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz='UTC') - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D', tz='UTC') - self.assert_index_equal(result, expected) - - self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) - - from dateutil.tz import tzlocal - rng = date_range('1/1/2000 9:30', periods=10, freq='D', tz=tzlocal()) - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D', tz=tzlocal()) - self.assert_index_equal(result, expected) - - self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) - - def test_normalize_tz_local(self): - # GH 13459 - from dateutil.tz import tzlocal - - timezones = ['US/Pacific', 'US/Eastern', 'UTC', 'Asia/Kolkata', - 'Asia/Shanghai', 'Australia/Canberra'] - - for timezone in timezones: - with set_timezone(timezone): - rng = date_range('1/1/2000 9:30', periods=10, freq='D', - tz=tzlocal()) - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D', - tz=tzlocal()) - self.assert_index_equal(result, expected) - - self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) - - def test_tzaware_offset(self): - dates = date_range('2012-11-01', periods=3, tz='US/Pacific') - offset = dates + offsets.Hour(5) - self.assertEqual(dates[0] + offsets.Hour(5), offset[0]) - - # GH 6818 - for tz in ['UTC', 'US/Pacific', 'Asia/Tokyo']: - dates = date_range('2010-11-01 00:00', periods=3, tz=tz, freq='H') - expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', - '2010-11-01 07:00'], freq='H', tz=tz) - - offset = dates + offsets.Hour(5) - self.assert_index_equal(offset, expected) - offset = dates + np.timedelta64(5, 'h') - self.assert_index_equal(offset, expected) - offset = dates + timedelta(hours=5) - self.assert_index_equal(offset, expected) - - def test_nat(self): - # GH 5546 - dates = [NaT] - idx = DatetimeIndex(dates) - idx = idx.tz_localize('US/Pacific') - self.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Pacific')) - idx = idx.tz_convert('US/Eastern') - self.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Eastern')) - idx = idx.tz_convert('UTC') - self.assert_index_equal(idx, DatetimeIndex(dates, tz='UTC')) - - dates = ['2010-12-01 00:00', '2010-12-02 00:00', NaT] - idx = DatetimeIndex(dates) - idx = idx.tz_localize('US/Pacific') - self.assert_index_equal(idx, DatetimeIndex(dates, tz='US/Pacific')) - idx = idx.tz_convert('US/Eastern') - expected = ['2010-12-01 03:00', '2010-12-02 03:00', NaT] - self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) - - idx = idx + offsets.Hour(5) - expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT] - self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) - idx = idx.tz_convert('US/Pacific') - expected = ['2010-12-01 05:00', '2010-12-02 05:00', NaT] - self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Pacific')) - - idx = idx + np.timedelta64(3, 'h') - expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT] - self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Pacific')) - - idx = idx.tz_convert('US/Eastern') - expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] - self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) - - -class TestTslib(tm.TestCase): - - def test_tslib_tz_convert(self): - def compare_utc_to_local(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz) - result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) - result_single = np.vectorize(f)(tz_didx.asi8) - self.assert_numpy_array_equal(result, result_single) - - def compare_local_to_utc(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC') - result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') - result_single = np.vectorize(f)(utc_didx.asi8) - self.assert_numpy_array_equal(result, result_single) - - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']: - # US: 2014-03-09 - 2014-11-11 - # MOSCOW: 2014-10-26 / 2014-12-31 - tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) - utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') - compare_utc_to_local(tz_didx, utc_didx) - # local tz to UTC can be differ in hourly (or higher) freqs because - # of DST - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz) - utc_didx = date_range('2000-01-01', '2020-01-01', freq='D') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz) - utc_didx = date_range('2000-01-01', '2100-01-01', freq='A') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - # Check empty array - result = tslib.tz_convert(np.array([], dtype=np.int64), - tslib.maybe_get_tz('US/Eastern'), - tslib.maybe_get_tz('Asia/Tokyo')) - self.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) - - # Check all-NaT array - result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64), - tslib.maybe_get_tz('US/Eastern'), - tslib.maybe_get_tz('Asia/Tokyo')) - self.assert_numpy_array_equal(result, np.array( - [tslib.iNaT], dtype=np.int64)) diff --git a/pandas/tests/tslibs/__init__.py b/pandas/tests/tslibs/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py new file mode 100644 index 0000000000000..eb77e52e7c91d --- /dev/null +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +from datetime import datetime, date + +import numpy as np +import pytest + +from pandas._libs import tslib +from pandas.compat.numpy import np_array_datetime64_compat +import pandas.util.testing as tm + + +class TestParseISO8601(object): + @pytest.mark.parametrize('date_str, exp', [ + ('2011-01-02', datetime(2011, 1, 2)), + ('2011-1-2', datetime(2011, 1, 2)), + ('2011-01', datetime(2011, 1, 1)), + ('2011-1', datetime(2011, 1, 1)), + ('2011 01 02', datetime(2011, 1, 2)), + ('2011.01.02', datetime(2011, 1, 2)), + ('2011/01/02', datetime(2011, 1, 2)), + ('2011\\01\\02', datetime(2011, 1, 2)), + ('2013-01-01 05:30:00', datetime(2013, 1, 1, 5, 30)), + ('2013-1-1 5:30:00', datetime(2013, 1, 1, 5, 30))]) + def test_parsers_iso8601(self, date_str, exp): + # GH#12060 + # test only the iso parser - flexibility to different + # separators and leadings 0s + # Timestamp construction falls back to dateutil + actual = tslib._test_parse_iso8601(date_str) + assert actual == exp + + @pytest.mark.parametrize( + 'date_str', + ['2011-01/02', '2011^11^11', + '201401', '201111', '200101', + # mixed separated and unseparated + '2005-0101', '200501-01', + '20010101 12:3456', + '20010101 1234:56', + # HHMMSS must have two digits in + # each component if unseparated + '20010101 1', '20010101 123', + '20010101 12345', '20010101 12345Z', + # wrong separator for HHMMSS + '2001-01-01 12-34-56']) + def test_parsers_iso8601_invalid(self, date_str): + # separators must all match - YYYYMM not valid + with pytest.raises(ValueError): + tslib._test_parse_iso8601(date_str) + + +class TestArrayToDatetime(object): + def test_parsing_valid_dates(self): + arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) + result = tslib.array_to_datetime(arr) + expected = ['2013-01-01T00:00:00.000000000-0000', + '2013-01-02T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) + result = tslib.array_to_datetime(arr) + expected = ['2013-09-16T00:00:00.000000000-0000', + '2013-09-17T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + @pytest.mark.parametrize('dt_string', [ + '01-01-2013 08:00:00+08:00', + '2013-01-01T08:00:00.000000000+0800', + '2012-12-31T16:00:00.000000000-0800', + '12-31-2012 23:00:00-01:00']) + def test_parsing_timezone_offsets(self, dt_string): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added + arr = np.array(['01-01-2013 00:00:00'], dtype=object) + expected = tslib.array_to_datetime(arr) + + arr = np.array([dt_string], dtype=object) + result = tslib.array_to_datetime(arr) + tm.assert_numpy_array_equal(result, expected) + + def test_number_looking_strings_not_into_datetime(self): + # GH#4601 + # These strings don't look like datetimes so they shouldn't be + # attempted to be converted + arr = np.array(['-352.737091', '183.575577'], dtype=object) + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + arr = np.array(['1', '2', '3', '4', '5'], dtype=object) + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + @pytest.mark.parametrize('invalid_date', [ + date(1000, 1, 1), + datetime(1000, 1, 1), + '1000-01-01', + 'Jan 1, 1000', + np.datetime64('1000-01-01')]) + def test_coerce_outside_ns_bounds(self, invalid_date): + arr = np.array([invalid_date], dtype='object') + with pytest.raises(ValueError): + tslib.array_to_datetime(arr, errors='raise') + + result = tslib.array_to_datetime(arr, errors='coerce') + expected = np.array([tslib.iNaT], dtype='M8[ns]') + tm.assert_numpy_array_equal(result, expected) + + def test_coerce_outside_ns_bounds_one_valid(self): + arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) + result = tslib.array_to_datetime(arr, errors='coerce') + expected = [tslib.iNaT, + '2000-01-01T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + def test_coerce_of_invalid_datetimes(self): + arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) + + # Without coercing, the presence of any invalid dates prevents + # any values from being converted + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + # With coercing, the invalid dates becomes iNaT + result = tslib.array_to_datetime(arr, errors='coerce') + expected = ['2013-01-01T00:00:00.000000000-0000', + tslib.iNaT, + tslib.iNaT] + + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) + with pytest.raises(tslib.OutOfBoundsDatetime): + tslib.array_to_datetime(arr) diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py new file mode 100644 index 0000000000000..b5d562a7b5a9c --- /dev/null +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from datetime import datetime + +import numpy as np + +from pandas._libs.tslibs import ccalendar + + +def test_get_day_of_year(): + assert ccalendar.get_day_of_year(2001, 3, 1) == 60 + assert ccalendar.get_day_of_year(2004, 3, 1) == 61 + assert ccalendar.get_day_of_year(1907, 12, 31) == 365 + assert ccalendar.get_day_of_year(2004, 12, 31) == 366 + + dt = datetime.fromordinal(1 + np.random.randint(365 * 4000)) + result = ccalendar.get_day_of_year(dt.year, dt.month, dt.day) + expected = (dt - dt.replace(month=1, day=1)).days + 1 + assert result == expected diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py new file mode 100644 index 0000000000000..76038136c26cb --- /dev/null +++ b/pandas/tests/tslibs/test_conversion.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +import pandas.util.testing as tm +from pandas import date_range +from pandas._libs.tslib import iNaT +from pandas._libs.tslibs import conversion, timezones + + +def compare_utc_to_local(tz_didx, utc_didx): + f = lambda x: conversion.tz_convert_single(x, 'UTC', tz_didx.tz) + result = conversion.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) + result_single = np.vectorize(f)(tz_didx.asi8) + tm.assert_numpy_array_equal(result, result_single) + + +def compare_local_to_utc(tz_didx, utc_didx): + f = lambda x: conversion.tz_convert_single(x, tz_didx.tz, 'UTC') + result = conversion.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') + result_single = np.vectorize(f)(utc_didx.asi8) + tm.assert_numpy_array_equal(result, result_single) + + +class TestTZConvert(object): + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', + 'US/Eastern', 'Europe/Moscow']) + def test_tz_convert_single_matches_tz_convert_hourly(self, tz): + # US: 2014-03-09 - 2014-11-11 + # MOSCOW: 2014-10-26 / 2014-12-31 + tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) + utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') + compare_utc_to_local(tz_didx, utc_didx) + + # local tz to UTC can be differ in hourly (or higher) freqs because + # of DST + compare_local_to_utc(tz_didx, utc_didx) + + @pytest.mark.parametrize('tz', ['UTC', 'Asia/Tokyo', + 'US/Eastern', 'Europe/Moscow']) + @pytest.mark.parametrize('freq', ['D', 'A']) + def test_tz_convert_single_matches_tz_convert(self, tz, freq): + tz_didx = date_range('2000-01-01', '2020-01-01', freq=freq, tz=tz) + utc_didx = date_range('2000-01-01', '2020-01-01', freq=freq) + compare_utc_to_local(tz_didx, utc_didx) + compare_local_to_utc(tz_didx, utc_didx) + + @pytest.mark.parametrize('arr', [ + pytest.param(np.array([], dtype=np.int64), id='empty'), + pytest.param(np.array([iNaT], dtype=np.int64), id='all_nat')]) + def test_tz_convert_corner(self, arr): + result = conversion.tz_convert(arr, + timezones.maybe_get_tz('US/Eastern'), + timezones.maybe_get_tz('Asia/Tokyo')) + tm.assert_numpy_array_equal(result, arr) diff --git a/pandas/tests/tslibs/test_libfrequencies.py b/pandas/tests/tslibs/test_libfrequencies.py new file mode 100644 index 0000000000000..601d542da3095 --- /dev/null +++ b/pandas/tests/tslibs/test_libfrequencies.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- + +import pandas.util.testing as tm + +from pandas.tseries import offsets +from pandas._libs.tslibs.frequencies import (get_rule_month, + _period_str_to_code, + _INVALID_FREQ_ERROR, + is_superperiod, is_subperiod) + + +def assert_aliases_deprecated(freq, expected, aliases): + assert isinstance(aliases, list) + assert (_period_str_to_code(freq) == expected) + + for alias in aliases: + with tm.assert_raises_regex(ValueError, _INVALID_FREQ_ERROR): + _period_str_to_code(alias) + + +def test_get_rule_month(): + result = get_rule_month('W') + assert (result == 'DEC') + result = get_rule_month(offsets.Week()) + assert (result == 'DEC') + + result = get_rule_month('D') + assert (result == 'DEC') + result = get_rule_month(offsets.Day()) + assert (result == 'DEC') + + result = get_rule_month('Q') + assert (result == 'DEC') + result = get_rule_month(offsets.QuarterEnd(startingMonth=12)) + + result = get_rule_month('Q-JAN') + assert (result == 'JAN') + result = get_rule_month(offsets.QuarterEnd(startingMonth=1)) + assert (result == 'JAN') + + result = get_rule_month('A-DEC') + assert (result == 'DEC') + result = get_rule_month('Y-DEC') + assert (result == 'DEC') + result = get_rule_month(offsets.YearEnd()) + assert (result == 'DEC') + + result = get_rule_month('A-MAY') + assert (result == 'MAY') + result = get_rule_month('Y-MAY') + assert (result == 'MAY') + result = get_rule_month(offsets.YearEnd(month=5)) + assert (result == 'MAY') + + +def test_period_str_to_code(): + assert (_period_str_to_code('A') == 1000) + assert (_period_str_to_code('A-DEC') == 1000) + assert (_period_str_to_code('A-JAN') == 1001) + assert (_period_str_to_code('Y') == 1000) + assert (_period_str_to_code('Y-DEC') == 1000) + assert (_period_str_to_code('Y-JAN') == 1001) + + assert (_period_str_to_code('Q') == 2000) + assert (_period_str_to_code('Q-DEC') == 2000) + assert (_period_str_to_code('Q-FEB') == 2002) + + assert_aliases_deprecated("M", 3000, ["MTH", "MONTH", "MONTHLY"]) + + assert (_period_str_to_code('W') == 4000) + assert (_period_str_to_code('W-SUN') == 4000) + assert (_period_str_to_code('W-FRI') == 4005) + + assert_aliases_deprecated("B", 5000, ["BUS", "BUSINESS", + "BUSINESSLY", "WEEKDAY"]) + assert_aliases_deprecated("D", 6000, ["DAY", "DLY", "DAILY"]) + assert_aliases_deprecated("H", 7000, ["HR", "HOUR", "HRLY", "HOURLY"]) + + assert_aliases_deprecated("T", 8000, ["minute", "MINUTE", "MINUTELY"]) + assert (_period_str_to_code('Min') == 8000) + + assert_aliases_deprecated("S", 9000, ["sec", "SEC", "SECOND", "SECONDLY"]) + assert_aliases_deprecated("L", 10000, ["MILLISECOND", "MILLISECONDLY"]) + assert (_period_str_to_code('ms') == 10000) + + assert_aliases_deprecated("U", 11000, ["MICROSECOND", "MICROSECONDLY"]) + assert (_period_str_to_code('US') == 11000) + + assert_aliases_deprecated("N", 12000, ["NANOSECOND", "NANOSECONDLY"]) + assert (_period_str_to_code('NS') == 12000) + + +def test_is_superperiod_subperiod(): + + # input validation + assert not (is_superperiod(offsets.YearEnd(), None)) + assert not (is_subperiod(offsets.MonthEnd(), None)) + assert not (is_superperiod(None, offsets.YearEnd())) + assert not (is_subperiod(None, offsets.MonthEnd())) + assert not (is_superperiod(None, None)) + assert not (is_subperiod(None, None)) + + assert (is_superperiod(offsets.YearEnd(), offsets.MonthEnd())) + assert (is_subperiod(offsets.MonthEnd(), offsets.YearEnd())) + + assert (is_superperiod(offsets.Hour(), offsets.Minute())) + assert (is_subperiod(offsets.Minute(), offsets.Hour())) + + assert (is_superperiod(offsets.Second(), offsets.Milli())) + assert (is_subperiod(offsets.Milli(), offsets.Second())) + + assert (is_superperiod(offsets.Milli(), offsets.Micro())) + assert (is_subperiod(offsets.Micro(), offsets.Milli())) + + assert (is_superperiod(offsets.Micro(), offsets.Nano())) + assert (is_subperiod(offsets.Nano(), offsets.Micro())) diff --git a/pandas/tests/tslibs/test_liboffsets.py b/pandas/tests/tslibs/test_liboffsets.py new file mode 100644 index 0000000000000..a31a79d2f68ed --- /dev/null +++ b/pandas/tests/tslibs/test_liboffsets.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +""" +Tests for helper functions in the cython tslibs.offsets +""" +from datetime import datetime + +import pytest + +from pandas import Timestamp + +import pandas._libs.tslibs.offsets as liboffsets +from pandas._libs.tslibs.offsets import roll_qtrday + + +def test_get_lastbday(): + dt = datetime(2017, 11, 30) + assert dt.weekday() == 3 # i.e. this is a business day + assert liboffsets.get_lastbday(dt.year, dt.month) == 30 + + dt = datetime(1993, 10, 31) + assert dt.weekday() == 6 # i.e. this is not a business day + assert liboffsets.get_lastbday(dt.year, dt.month) == 29 + + +def test_get_firstbday(): + dt = datetime(2017, 4, 1) + assert dt.weekday() == 5 # i.e. not a weekday + assert liboffsets.get_firstbday(dt.year, dt.month) == 3 + + dt = datetime(1993, 10, 1) + assert dt.weekday() == 4 # i.e. a business day + assert liboffsets.get_firstbday(dt.year, dt.month) == 1 + + +def test_shift_month(): + dt = datetime(2017, 11, 30) + assert liboffsets.shift_month(dt, 0, 'business_end') == dt + assert liboffsets.shift_month(dt, 0, + 'business_start') == datetime(2017, 11, 1) + + ts = Timestamp('1929-05-05') + assert liboffsets.shift_month(ts, 1, 'start') == Timestamp('1929-06-01') + assert liboffsets.shift_month(ts, -3, 'end') == Timestamp('1929-02-28') + + assert liboffsets.shift_month(ts, 25, None) == Timestamp('1931-06-5') + + # Try to shift to April 31, then shift back to Apr 30 to get a real date + assert liboffsets.shift_month(ts, -1, 31) == Timestamp('1929-04-30') + + dt = datetime(2017, 11, 15) + + assert liboffsets.shift_month(dt, 0, day_opt=None) == dt + assert liboffsets.shift_month(dt, 0, day_opt=15) == dt + + assert liboffsets.shift_month(dt, 1, + day_opt='start') == datetime(2017, 12, 1) + + assert liboffsets.shift_month(dt, -145, + day_opt='end') == datetime(2005, 10, 31) + + with pytest.raises(ValueError): + liboffsets.shift_month(dt, 3, day_opt='this should raise') + + +def test_get_day_of_month(): + # get_day_of_month is not directly exposed; we test it via roll_yearday + dt = datetime(2017, 11, 15) + + with pytest.raises(ValueError): + # To hit the raising case we need month == dt.month and n > 0 + liboffsets.roll_yearday(dt, n=3, month=11, day_opt='foo') + + +def test_roll_yearday(): + # Copied from doctest examples + month = 3 + day_opt = 'start' # `other` will be compared to March 1 + other = datetime(2017, 2, 10) # before March 1 + assert liboffsets.roll_yearday(other, 2, month, day_opt) == 1 + assert liboffsets.roll_yearday(other, -7, month, day_opt) == -7 + assert liboffsets.roll_yearday(other, 0, month, day_opt) == 0 + + other = Timestamp('2014-03-15', tz='US/Eastern') # after March 1 + assert liboffsets.roll_yearday(other, 2, month, day_opt) == 2 + assert liboffsets.roll_yearday(other, -7, month, day_opt) == -6 + assert liboffsets.roll_yearday(other, 0, month, day_opt) == 1 + + month = 6 + day_opt = 'end' # `other` will be compared to June 30 + other = datetime(1999, 6, 29) # before June 30 + assert liboffsets.roll_yearday(other, 5, month, day_opt) == 4 + assert liboffsets.roll_yearday(other, -7, month, day_opt) == -7 + assert liboffsets.roll_yearday(other, 0, month, day_opt) == 0 + + other = Timestamp(2072, 8, 24, 6, 17, 18) # after June 30 + assert liboffsets.roll_yearday(other, 5, month, day_opt) == 5 + assert liboffsets.roll_yearday(other, -7, month, day_opt) == -6 + assert liboffsets.roll_yearday(other, 0, month, day_opt) == 1 + + +def test_roll_qtrday(): + other = Timestamp(2072, 10, 1, 6, 17, 18) # Saturday + for day_opt in ['start', 'end', 'business_start', 'business_end']: + # as long as (other.month % 3) != (month % 3), day_opt is irrelevant + # the `day_opt` doesn't matter. + month = 5 # (other.month % 3) < (month % 3) + assert roll_qtrday(other, 4, month, day_opt, modby=3) == 3 + assert roll_qtrday(other, -3, month, day_opt, modby=3) == -3 + + month = 3 # (other.month % 3) > (month % 3) + assert roll_qtrday(other, 4, month, day_opt, modby=3) == 4 + assert roll_qtrday(other, -3, month, day_opt, modby=3) == -2 + + month = 2 + other = datetime(1999, 5, 31) # Monday + # has (other.month % 3) == (month % 3) + + n = 2 + assert roll_qtrday(other, n, month, 'start', modby=3) == n + assert roll_qtrday(other, n, month, 'end', modby=3) == n + assert roll_qtrday(other, n, month, 'business_start', modby=3) == n + assert roll_qtrday(other, n, month, 'business_end', modby=3) == n + + n = -1 + assert roll_qtrday(other, n, month, 'start', modby=3) == n + 1 + assert roll_qtrday(other, n, month, 'end', modby=3) == n + assert roll_qtrday(other, n, month, 'business_start', modby=3) == n + 1 + assert roll_qtrday(other, n, month, 'business_end', modby=3) == n + + other = Timestamp(2072, 10, 1, 6, 17, 18) # Saturday + month = 4 # (other.month % 3) == (month % 3) + n = 2 + assert roll_qtrday(other, n, month, 'start', modby=3) == n + assert roll_qtrday(other, n, month, 'end', modby=3) == n - 1 + assert roll_qtrday(other, n, month, 'business_start', modby=3) == n - 1 + assert roll_qtrday(other, n, month, 'business_end', modby=3) == n - 1 + + n = -1 + assert roll_qtrday(other, n, month, 'start', modby=3) == n + assert roll_qtrday(other, n, month, 'end', modby=3) == n + assert roll_qtrday(other, n, month, 'business_start', modby=3) == n + assert roll_qtrday(other, n, month, 'business_end', modby=3) == n + + other = Timestamp(2072, 10, 3, 6, 17, 18) # First businessday + month = 4 # (other.month % 3) == (month % 3) + n = 2 + assert roll_qtrday(other, n, month, 'start', modby=3) == n + assert roll_qtrday(other, n, month, 'end', modby=3) == n - 1 + assert roll_qtrday(other, n, month, 'business_start', modby=3) == n + assert roll_qtrday(other, n, month, 'business_end', modby=3) == n - 1 + + n = -1 + assert roll_qtrday(other, n, month, 'start', modby=3) == n + 1 + assert roll_qtrday(other, n, month, 'end', modby=3) == n + assert roll_qtrday(other, n, month, 'business_start', modby=3) == n + assert roll_qtrday(other, n, month, 'business_end', modby=3) == n + + +def test_roll_convention(): + other = 29 + before = 1 + after = 31 + + n = 42 + assert liboffsets.roll_convention(other, n, other) == n + assert liboffsets.roll_convention(other, n, before) == n + assert liboffsets.roll_convention(other, n, after) == n - 1 + + n = -4 + assert liboffsets.roll_convention(other, n, other) == n + assert liboffsets.roll_convention(other, n, before) == n + 1 + assert liboffsets.roll_convention(other, n, after) == n diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py new file mode 100644 index 0000000000000..34cce088a8b42 --- /dev/null +++ b/pandas/tests/tslibs/test_parsing.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- +""" +Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx +""" +from datetime import datetime +import numpy as np +import pytest +from dateutil.parser import parse + +import pandas.util._test_decorators as td +from pandas.conftest import is_dateutil_le_261, is_dateutil_gt_261 +from pandas import compat +from pandas.util import testing as tm +from pandas._libs.tslibs import parsing +from pandas._libs.tslibs.parsing import parse_time_string + + +class TestParseQuarters(object): + + def test_parse_time_string(self): + (date, parsed, reso) = parse_time_string('4Q1984') + (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') + assert date == date_lower + assert parsed == parsed_lower + assert reso == reso_lower + + def test_parse_time_quarter_w_dash(self): + # https://github.com/pandas-dev/pandas/issue/9688 + pairs = [('1988-Q2', '1988Q2'), ('2Q-1988', '2Q1988')] + + for dashed, normal in pairs: + (date_dash, parsed_dash, reso_dash) = parse_time_string(dashed) + (date, parsed, reso) = parse_time_string(normal) + + assert date_dash == date + assert parsed_dash == parsed + assert reso_dash == reso + + pytest.raises(parsing.DateParseError, parse_time_string, "-2Q1992") + pytest.raises(parsing.DateParseError, parse_time_string, "2-Q1992") + pytest.raises(parsing.DateParseError, parse_time_string, "4-4Q1992") + + +class TestDatetimeParsingWrappers(object): + def test_does_not_convert_mixed_integer(self): + bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') + + for bad_date_string in bad_date_strings: + assert not parsing._does_string_look_like_datetime(bad_date_string) + + good_date_strings = ('2012-01-01', + '01/01/2012', + 'Mon Sep 16, 2013', + '01012012', + '0101', + '1-1') + + for good_date_string in good_date_strings: + assert parsing._does_string_look_like_datetime(good_date_string) + + def test_parsers_quarterly_with_freq(self): + msg = ('Incorrect quarterly string is given, quarter ' + 'must be between 1 and 4: 2013Q5') + with tm.assert_raises_regex(parsing.DateParseError, msg): + parsing.parse_time_string('2013Q5') + + # GH 5418 + msg = ('Unable to retrieve month information from given freq: ' + 'INVLD-L-DEC-SAT') + with tm.assert_raises_regex(parsing.DateParseError, msg): + parsing.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') + + cases = {('2013Q2', None): datetime(2013, 4, 1), + ('2013Q2', 'A-APR'): datetime(2012, 8, 1), + ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} + + for (date_str, freq), exp in compat.iteritems(cases): + result, _, _ = parsing.parse_time_string(date_str, freq=freq) + assert result == exp + + def test_parsers_quarter_invalid(self): + + cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] + for case in cases: + pytest.raises(ValueError, parsing.parse_time_string, case) + + def test_parsers_monthfreq(self): + cases = {'201101': datetime(2011, 1, 1, 0, 0), + '200005': datetime(2000, 5, 1, 0, 0)} + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = parsing.parse_time_string(date_str, freq='M') + assert result1 == expected + + +class TestGuessDatetimeFormat(object): + + @td.skip_if_not_us_locale + @is_dateutil_le_261 + @pytest.mark.parametrize( + "string, format", + [ + ('20111230', '%Y%m%d'), + ('2011-12-30', '%Y-%m-%d'), + ('30-12-2011', '%d-%m-%Y'), + ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), + ('2011-12-30 00:00:00.000000', + '%Y-%m-%d %H:%M:%S.%f')]) + def test_guess_datetime_format_with_parseable_formats( + self, string, format): + result = parsing._guess_datetime_format(string) + assert result == format + + @td.skip_if_not_us_locale + @is_dateutil_gt_261 + @pytest.mark.parametrize( + "string", + ['20111230', '2011-12-30', '30-12-2011', + '2011-12-30 00:00:00', '2011-12-30T00:00:00', + '2011-12-30 00:00:00.000000']) + def test_guess_datetime_format_with_parseable_formats_gt_261( + self, string): + result = parsing._guess_datetime_format(string) + assert result is None + + @is_dateutil_le_261 + @pytest.mark.parametrize( + "dayfirst, expected", + [ + (True, "%d/%m/%Y"), + (False, "%m/%d/%Y")]) + def test_guess_datetime_format_with_dayfirst(self, dayfirst, expected): + ambiguous_string = '01/01/2011' + result = parsing._guess_datetime_format( + ambiguous_string, dayfirst=dayfirst) + assert result == expected + + @is_dateutil_gt_261 + @pytest.mark.parametrize( + "dayfirst", [True, False]) + def test_guess_datetime_format_with_dayfirst_gt_261(self, dayfirst): + ambiguous_string = '01/01/2011' + result = parsing._guess_datetime_format( + ambiguous_string, dayfirst=dayfirst) + assert result is None + + @td.skip_if_has_locale + @is_dateutil_le_261 + @pytest.mark.parametrize( + "string, format", + [ + ('30/Dec/2011', '%d/%b/%Y'), + ('30/December/2011', '%d/%B/%Y'), + ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S')]) + def test_guess_datetime_format_with_locale_specific_formats( + self, string, format): + result = parsing._guess_datetime_format(string) + assert result == format + + @td.skip_if_has_locale + @is_dateutil_gt_261 + @pytest.mark.parametrize( + "string", + [ + '30/Dec/2011', + '30/December/2011', + '30/Dec/2011 00:00:00']) + def test_guess_datetime_format_with_locale_specific_formats_gt_261( + self, string): + result = parsing._guess_datetime_format(string) + assert result is None + + def test_guess_datetime_format_invalid_inputs(self): + # A datetime string must include a year, month and a day for it + # to be guessable, in addition to being a string that looks like + # a datetime + invalid_dts = [ + '2013', + '01/2013', + '12:00:00', + '1/1/1/1', + 'this_is_not_a_datetime', + '51a', + 9, + datetime(2011, 1, 1), + ] + + for invalid_dt in invalid_dts: + assert parsing._guess_datetime_format(invalid_dt) is None + + @is_dateutil_le_261 + @pytest.mark.parametrize( + "string, format", + [ + ('2011-1-1', '%Y-%m-%d'), + ('30-1-2011', '%d-%m-%Y'), + ('1/1/2011', '%m/%d/%Y'), + ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), + ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')]) + def test_guess_datetime_format_nopadding(self, string, format): + # GH 11142 + result = parsing._guess_datetime_format(string) + assert result == format + + @is_dateutil_gt_261 + @pytest.mark.parametrize( + "string", + [ + '2011-1-1', + '30-1-2011', + '1/1/2011', + '2011-1-1 00:00:00', + '2011-1-1 0:0:0', + '2011-1-3T00:00:0']) + def test_guess_datetime_format_nopadding_gt_261(self, string): + # GH 11142 + result = parsing._guess_datetime_format(string) + assert result is None + + +class TestArrayToDatetime(object): + def test_try_parse_dates(self): + arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) + + result = parsing.try_parse_dates(arr, dayfirst=True) + expected = np.array([parse(d, dayfirst=True) for d in arr]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py new file mode 100644 index 0000000000000..61737083e22ea --- /dev/null +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +from pandas._libs.tslibs.frequencies import get_freq +from pandas._libs.tslibs.period import period_ordinal, period_asfreq + + +class TestPeriodFreqConversion(object): + + def test_intraday_conversion_factors(self): + assert period_asfreq(1, get_freq('D'), get_freq('H'), False) == 24 + assert period_asfreq(1, get_freq('D'), get_freq('T'), False) == 1440 + assert period_asfreq(1, get_freq('D'), get_freq('S'), False) == 86400 + assert period_asfreq(1, get_freq('D'), + get_freq('L'), False) == 86400000 + assert period_asfreq(1, get_freq('D'), + get_freq('U'), False) == 86400000000 + assert period_asfreq(1, get_freq('D'), + get_freq('N'), False) == 86400000000000 + + assert period_asfreq(1, get_freq('H'), get_freq('T'), False) == 60 + assert period_asfreq(1, get_freq('H'), get_freq('S'), False) == 3600 + assert period_asfreq(1, get_freq('H'), + get_freq('L'), False) == 3600000 + assert period_asfreq(1, get_freq('H'), + get_freq('U'), False) == 3600000000 + assert period_asfreq(1, get_freq('H'), + get_freq('N'), False) == 3600000000000 + + assert period_asfreq(1, get_freq('T'), get_freq('S'), False) == 60 + assert period_asfreq(1, get_freq('T'), get_freq('L'), False) == 60000 + assert period_asfreq(1, get_freq('T'), + get_freq('U'), False) == 60000000 + assert period_asfreq(1, get_freq('T'), + get_freq('N'), False) == 60000000000 + + assert period_asfreq(1, get_freq('S'), get_freq('L'), False) == 1000 + assert period_asfreq(1, get_freq('S'), + get_freq('U'), False) == 1000000 + assert period_asfreq(1, get_freq('S'), + get_freq('N'), False) == 1000000000 + + assert period_asfreq(1, get_freq('L'), get_freq('U'), False) == 1000 + assert period_asfreq(1, get_freq('L'), + get_freq('N'), False) == 1000000 + + assert period_asfreq(1, get_freq('U'), get_freq('N'), False) == 1000 + + def test_period_ordinal_start_values(self): + # information for 1.1.1970 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('A')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('M')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('W')) == 1 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('D')) == 0 + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq('B')) == 0 + + def test_period_ordinal_week(self): + assert period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, get_freq('W')) == 1 + assert period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, get_freq('W')) == 2 + assert period_ordinal(2013, 10, 6, 0, + 0, 0, 0, 0, get_freq('W')) == 2284 + assert period_ordinal(2013, 10, 7, 0, + 0, 0, 0, 0, get_freq('W')) == 2285 + + def test_period_ordinal_business_day(self): + # Thursday + assert period_ordinal(2013, 10, 3, 0, + 0, 0, 0, 0, get_freq('B')) == 11415 + # Friday + assert period_ordinal(2013, 10, 4, 0, + 0, 0, 0, 0, get_freq('B')) == 11416 + # Saturday + assert period_ordinal(2013, 10, 5, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 + # Sunday + assert period_ordinal(2013, 10, 6, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 + # Monday + assert period_ordinal(2013, 10, 7, 0, + 0, 0, 0, 0, get_freq('B')) == 11417 + # Tuesday + assert period_ordinal(2013, 10, 8, 0, + 0, 0, 0, 0, get_freq('B')) == 11418 diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py new file mode 100644 index 0000000000000..1bb355f267938 --- /dev/null +++ b/pandas/tests/tslibs/test_timezones.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +from datetime import datetime + +import pytest +import pytz +import dateutil.tz + +from pandas._libs import tslib +from pandas._libs.tslibs import timezones +from pandas import Timestamp + + +@pytest.mark.parametrize('tz_name', list(pytz.common_timezones)) +def test_cache_keys_are_distinct_for_pytz_vs_dateutil(tz_name): + if tz_name == 'UTC': + # skip utc as it's a special case in dateutil + return + tz_p = timezones.maybe_get_tz(tz_name) + tz_d = timezones.maybe_get_tz('dateutil/' + tz_name) + if tz_d is None: + # skip timezones that dateutil doesn't know about. + return + assert timezones._p_tz_cache_key(tz_p) != timezones._p_tz_cache_key(tz_d) + + +def test_tzlocal(): + # GH#13583 + ts = Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()) + assert ts.tz == dateutil.tz.tzlocal() + assert "tz='tzlocal()')" in repr(ts) + + tz = timezones.maybe_get_tz('tzlocal()') + assert tz == dateutil.tz.tzlocal() + + # get offset using normal datetime for test + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = offset.total_seconds() * 1000000000 + assert ts.value + offset == Timestamp('2011-01-01').value + + +@pytest.mark.parametrize('eastern, localize', [ + (pytz.timezone('US/Eastern'), lambda tz, x: tz.localize(x)), + (dateutil.tz.gettz('US/Eastern'), lambda tz, x: x.replace(tzinfo=tz))]) +def test_infer_tz(eastern, localize): + utc = pytz.utc + + start_naive = datetime(2001, 1, 1) + end_naive = datetime(2009, 1, 1) + + start = localize(eastern, start_naive) + end = localize(eastern, end_naive) + + assert (timezones.infer_tzinfo(start, end) is + tslib._localize_pydatetime(start_naive, eastern).tzinfo) + assert (timezones.infer_tzinfo(start, None) is + tslib._localize_pydatetime(start_naive, eastern).tzinfo) + assert (timezones.infer_tzinfo(None, end) is + tslib._localize_pydatetime(end_naive, eastern).tzinfo) + + start = utc.localize(start_naive) + end = utc.localize(end_naive) + assert timezones.infer_tzinfo(start, end) is utc + + end = tslib._localize_pydatetime(end_naive, eastern) + with pytest.raises(Exception): + timezones.infer_tzinfo(start, end) + with pytest.raises(Exception): + timezones.infer_tzinfo(end, start) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py deleted file mode 100644 index 497130b117289..0000000000000 --- a/pandas/tests/types/test_cast.py +++ /dev/null @@ -1,276 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -These test the private routines in types/cast.py - -""" - -from datetime import datetime -import numpy as np - -from pandas import Timedelta, Timestamp -from pandas.types.cast import (_possibly_downcast_to_dtype, - _possibly_convert_objects, - _infer_dtype_from_scalar, - _maybe_convert_string_to_object, - _maybe_convert_scalar, - _find_common_type) -from pandas.types.dtypes import (CategoricalDtype, - DatetimeTZDtype, PeriodDtype) -from pandas.util import testing as tm - - -class TestPossiblyDowncast(tm.TestCase): - - def test_downcast_conv(self): - # test downcasting - - arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) - result = _possibly_downcast_to_dtype(arr, 'infer') - assert (np.array_equal(result, arr)) - - arr = np.array([8., 8., 8., 8., 8.9999999999995]) - result = _possibly_downcast_to_dtype(arr, 'infer') - expected = np.array([8, 8, 8, 8, 9]) - assert (np.array_equal(result, expected)) - - arr = np.array([8., 8., 8., 8., 9.0000000000005]) - result = _possibly_downcast_to_dtype(arr, 'infer') - expected = np.array([8, 8, 8, 8, 9]) - assert (np.array_equal(result, expected)) - - # conversions - - expected = np.array([1, 2]) - for dtype in [np.float64, object, np.int64]: - arr = np.array([1.0, 2.0], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'infer') - tm.assert_almost_equal(result, expected, check_dtype=False) - - for dtype in [np.float64, object]: - expected = np.array([1.0, 2.0, np.nan], dtype=dtype) - arr = np.array([1.0, 2.0, np.nan], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'infer') - tm.assert_almost_equal(result, expected) - - # empties - for dtype in [np.int32, np.float64, np.float32, np.bool_, - np.int64, object]: - arr = np.array([], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'int64') - tm.assert_almost_equal(result, np.array([], dtype=np.int64)) - assert result.dtype == np.int64 - - def test_datetimelikes_nan(self): - arr = np.array([1, 2, np.nan]) - exp = np.array([1, 2, np.datetime64('NaT')], dtype='datetime64[ns]') - res = _possibly_downcast_to_dtype(arr, 'datetime64[ns]') - tm.assert_numpy_array_equal(res, exp) - - exp = np.array([1, 2, np.timedelta64('NaT')], dtype='timedelta64[ns]') - res = _possibly_downcast_to_dtype(arr, 'timedelta64[ns]') - tm.assert_numpy_array_equal(res, exp) - - -class TestInferDtype(tm.TestCase): - - def test_infer_dtype_from_scalar(self): - # Test that _infer_dtype_from_scalar is returning correct dtype for int - # and float. - - for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, - np.int32, np.uint64, np.int64]: - data = dtypec(12) - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, type(data)) - - data = 12 - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.int64) - - for dtypec in [np.float16, np.float32, np.float64]: - data = dtypec(12) - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, dtypec) - - data = np.float(12) - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.float64) - - for data in [True, False]: - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.bool_) - - for data in [np.complex64(1), np.complex128(1)]: - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.complex_) - - import datetime - for data in [np.datetime64(1, 'ns'), Timestamp(1), - datetime.datetime(2000, 1, 1, 0, 0)]: - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'M8[ns]') - - for data in [np.timedelta64(1, 'ns'), Timedelta(1), - datetime.timedelta(1)]: - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'm8[ns]') - - for data in [datetime.date(2000, 1, 1), - Timestamp(1, tz='US/Eastern'), 'foo']: - dtype, val = _infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.object_) - - -class TestMaybe(tm.TestCase): - - def test_maybe_convert_string_to_array(self): - result = _maybe_convert_string_to_object('x') - tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object)) - self.assertTrue(result.dtype == object) - - result = _maybe_convert_string_to_object(1) - self.assertEqual(result, 1) - - arr = np.array(['x', 'y'], dtype=str) - result = _maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) - self.assertTrue(result.dtype == object) - - # unicode - arr = np.array(['x', 'y']).astype('U') - result = _maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) - self.assertTrue(result.dtype == object) - - # object - arr = np.array(['x', 2], dtype=object) - result = _maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) - self.assertTrue(result.dtype == object) - - def test_maybe_convert_scalar(self): - - # pass thru - result = _maybe_convert_scalar('x') - self.assertEqual(result, 'x') - result = _maybe_convert_scalar(np.array([1])) - self.assertEqual(result, np.array([1])) - - # leave scalar dtype - result = _maybe_convert_scalar(np.int64(1)) - self.assertEqual(result, np.int64(1)) - result = _maybe_convert_scalar(np.int32(1)) - self.assertEqual(result, np.int32(1)) - result = _maybe_convert_scalar(np.float32(1)) - self.assertEqual(result, np.float32(1)) - result = _maybe_convert_scalar(np.int64(1)) - self.assertEqual(result, np.float64(1)) - - # coerce - result = _maybe_convert_scalar(1) - self.assertEqual(result, np.int64(1)) - result = _maybe_convert_scalar(1.0) - self.assertEqual(result, np.float64(1)) - result = _maybe_convert_scalar(Timestamp('20130101')) - self.assertEqual(result, Timestamp('20130101').value) - result = _maybe_convert_scalar(datetime(2013, 1, 1)) - self.assertEqual(result, Timestamp('20130101').value) - result = _maybe_convert_scalar(Timedelta('1 day 1 min')) - self.assertEqual(result, Timedelta('1 day 1 min').value) - - -class TestConvert(tm.TestCase): - - def test_possibly_convert_objects_copy(self): - values = np.array([1, 2]) - - out = _possibly_convert_objects(values, copy=False) - self.assertTrue(values is out) - - out = _possibly_convert_objects(values, copy=True) - self.assertTrue(values is not out) - - values = np.array(['apply', 'banana']) - out = _possibly_convert_objects(values, copy=False) - self.assertTrue(values is out) - - out = _possibly_convert_objects(values, copy=True) - self.assertTrue(values is not out) - - -class TestCommonTypes(tm.TestCase): - - def test_numpy_dtypes(self): - # (source_types, destination_type) - testcases = ( - # identity - ((np.int64,), np.int64), - ((np.uint64,), np.uint64), - ((np.float32,), np.float32), - ((np.object,), np.object), - - # into ints - ((np.int16, np.int64), np.int64), - ((np.int32, np.uint32), np.int64), - ((np.uint16, np.uint64), np.uint64), - - # into floats - ((np.float16, np.float32), np.float32), - ((np.float16, np.int16), np.float32), - ((np.float32, np.int16), np.float32), - ((np.uint64, np.int64), np.float64), - ((np.int16, np.float64), np.float64), - ((np.float16, np.int64), np.float64), - - # into others - ((np.complex128, np.int32), np.complex128), - ((np.object, np.float32), np.object), - ((np.object, np.int16), np.object), - - ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')), - np.dtype('datetime64[ns]')), - ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')), - np.dtype('timedelta64[ns]')), - - ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')), - np.dtype('datetime64[ns]')), - ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')), - np.dtype('timedelta64[ns]')), - - ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')), - np.object), - ((np.dtype('datetime64[ns]'), np.int64), np.object) - ) - for src, common in testcases: - self.assertEqual(_find_common_type(src), common) - - with tm.assertRaises(ValueError): - # empty - _find_common_type([]) - - def test_categorical_dtype(self): - dtype = CategoricalDtype() - self.assertEqual(_find_common_type([dtype]), 'category') - self.assertEqual(_find_common_type([dtype, dtype]), 'category') - self.assertEqual(_find_common_type([np.object, dtype]), np.object) - - def test_datetimetz_dtype(self): - dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') - self.assertEqual(_find_common_type([dtype, dtype]), - 'datetime64[ns, US/Eastern]') - - for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), - np.dtype('datetime64[ns]'), np.object, np.int64]: - self.assertEqual(_find_common_type([dtype, dtype2]), np.object) - self.assertEqual(_find_common_type([dtype2, dtype]), np.object) - - def test_period_dtype(self): - dtype = PeriodDtype(freq='D') - self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]') - - for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), - PeriodDtype(freq='2D'), PeriodDtype(freq='H'), - np.dtype('datetime64[ns]'), np.object, np.int64]: - self.assertEqual(_find_common_type([dtype, dtype2]), np.object) - self.assertEqual(_find_common_type([dtype2, dtype]), np.object) diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py deleted file mode 100644 index 4667bbd47ad18..0000000000000 --- a/pandas/tests/types/test_common.py +++ /dev/null @@ -1,54 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np - -from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype -from pandas.types.common import pandas_dtype, is_dtype_equal - -import pandas.util.testing as tm - - -class TestPandasDtype(tm.TestCase): - - def test_numpy_dtype(self): - for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: - self.assertEqual(pandas_dtype(dtype), np.dtype(dtype)) - - def test_numpy_string_dtype(self): - # do not parse freq-like string as period dtype - self.assertEqual(pandas_dtype('U'), np.dtype('U')) - self.assertEqual(pandas_dtype('S'), np.dtype('S')) - - def test_datetimetz_dtype(self): - for dtype in ['datetime64[ns, US/Eastern]', - 'datetime64[ns, Asia/Tokyo]', - 'datetime64[ns, UTC]']: - self.assertIs(pandas_dtype(dtype), DatetimeTZDtype(dtype)) - self.assertEqual(pandas_dtype(dtype), DatetimeTZDtype(dtype)) - self.assertEqual(pandas_dtype(dtype), dtype) - - def test_categorical_dtype(self): - self.assertEqual(pandas_dtype('category'), CategoricalDtype()) - - def test_period_dtype(self): - for dtype in ['period[D]', 'period[3M]', 'period[U]', - 'Period[D]', 'Period[3M]', 'Period[U]']: - self.assertIs(pandas_dtype(dtype), PeriodDtype(dtype)) - self.assertEqual(pandas_dtype(dtype), PeriodDtype(dtype)) - self.assertEqual(pandas_dtype(dtype), dtype) - - -def test_dtype_equal(): - assert is_dtype_equal(np.int64, np.int64) - assert not is_dtype_equal(np.int64, np.float64) - - p1 = PeriodDtype('D') - p2 = PeriodDtype('D') - assert is_dtype_equal(p1, p2) - assert not is_dtype_equal(np.int64, p1) - - p3 = PeriodDtype('2D') - assert not is_dtype_equal(p1, p3) - - assert not DatetimeTZDtype.is_dtype(np.int64) - assert not PeriodDtype.is_dtype(np.int64) diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py deleted file mode 100644 index 8ef2868ae324f..0000000000000 --- a/pandas/tests/types/test_dtypes.py +++ /dev/null @@ -1,352 +0,0 @@ -# -*- coding: utf-8 -*- -from itertools import product - -import numpy as np -import pandas as pd -from pandas import Series, Categorical, date_range - -from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype -from pandas.types.common import (is_categorical_dtype, is_categorical, - is_datetime64tz_dtype, is_datetimetz, - is_period_dtype, is_period, - is_dtype_equal, is_datetime64_ns_dtype, - is_datetime64_dtype, - is_datetime64_any_dtype, is_string_dtype, - _coerce_to_dtype) -import pandas.util.testing as tm - - -class Base(object): - - def test_hash(self): - hash(self.dtype) - - def test_equality_invalid(self): - self.assertRaises(self.dtype == 'foo') - self.assertFalse(is_dtype_equal(self.dtype, np.int64)) - - def test_numpy_informed(self): - - # np.dtype doesn't know about our new dtype - def f(): - np.dtype(self.dtype) - - self.assertRaises(TypeError, f) - - self.assertNotEqual(self.dtype, np.str_) - self.assertNotEqual(np.str_, self.dtype) - - def test_pickle(self): - result = self.round_trip_pickle(self.dtype) - self.assertEqual(result, self.dtype) - - -class TestCategoricalDtype(Base, tm.TestCase): - - def setUp(self): - self.dtype = CategoricalDtype() - - def test_hash_vs_equality(self): - # make sure that we satisfy is semantics - dtype = self.dtype - dtype2 = CategoricalDtype() - self.assertTrue(dtype == dtype2) - self.assertTrue(dtype2 == dtype) - self.assertTrue(dtype is dtype2) - self.assertTrue(dtype2 is dtype) - self.assertTrue(hash(dtype) == hash(dtype2)) - - def test_equality(self): - self.assertTrue(is_dtype_equal(self.dtype, 'category')) - self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype())) - self.assertFalse(is_dtype_equal(self.dtype, 'foo')) - - def test_construction_from_string(self): - result = CategoricalDtype.construct_from_string('category') - self.assertTrue(is_dtype_equal(self.dtype, result)) - self.assertRaises( - TypeError, lambda: CategoricalDtype.construct_from_string('foo')) - - def test_is_dtype(self): - self.assertTrue(CategoricalDtype.is_dtype(self.dtype)) - self.assertTrue(CategoricalDtype.is_dtype('category')) - self.assertTrue(CategoricalDtype.is_dtype(CategoricalDtype())) - self.assertFalse(CategoricalDtype.is_dtype('foo')) - self.assertFalse(CategoricalDtype.is_dtype(np.float64)) - - def test_basic(self): - - self.assertTrue(is_categorical_dtype(self.dtype)) - - factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) - - s = Series(factor, name='A') - - # dtypes - self.assertTrue(is_categorical_dtype(s.dtype)) - self.assertTrue(is_categorical_dtype(s)) - self.assertFalse(is_categorical_dtype(np.dtype('float64'))) - - self.assertTrue(is_categorical(s.dtype)) - self.assertTrue(is_categorical(s)) - self.assertFalse(is_categorical(np.dtype('float64'))) - self.assertFalse(is_categorical(1.0)) - - -class TestDatetimeTZDtype(Base, tm.TestCase): - - def setUp(self): - self.dtype = DatetimeTZDtype('ns', 'US/Eastern') - - def test_hash_vs_equality(self): - # make sure that we satisfy is semantics - dtype = self.dtype - dtype2 = DatetimeTZDtype('ns', 'US/Eastern') - dtype3 = DatetimeTZDtype(dtype2) - self.assertTrue(dtype == dtype2) - self.assertTrue(dtype2 == dtype) - self.assertTrue(dtype3 == dtype) - self.assertTrue(dtype is dtype2) - self.assertTrue(dtype2 is dtype) - self.assertTrue(dtype3 is dtype) - self.assertTrue(hash(dtype) == hash(dtype2)) - self.assertTrue(hash(dtype) == hash(dtype3)) - - def test_construction(self): - self.assertRaises(ValueError, - lambda: DatetimeTZDtype('ms', 'US/Eastern')) - - def test_subclass(self): - a = DatetimeTZDtype('datetime64[ns, US/Eastern]') - b = DatetimeTZDtype('datetime64[ns, CET]') - - self.assertTrue(issubclass(type(a), type(a))) - self.assertTrue(issubclass(type(a), type(b))) - - def test_coerce_to_dtype(self): - self.assertEqual(_coerce_to_dtype('datetime64[ns, US/Eastern]'), - DatetimeTZDtype('ns', 'US/Eastern')) - self.assertEqual(_coerce_to_dtype('datetime64[ns, Asia/Tokyo]'), - DatetimeTZDtype('ns', 'Asia/Tokyo')) - - def test_compat(self): - self.assertTrue(is_datetime64tz_dtype(self.dtype)) - self.assertTrue(is_datetime64tz_dtype('datetime64[ns, US/Eastern]')) - self.assertTrue(is_datetime64_any_dtype(self.dtype)) - self.assertTrue(is_datetime64_any_dtype('datetime64[ns, US/Eastern]')) - self.assertTrue(is_datetime64_ns_dtype(self.dtype)) - self.assertTrue(is_datetime64_ns_dtype('datetime64[ns, US/Eastern]')) - self.assertFalse(is_datetime64_dtype(self.dtype)) - self.assertFalse(is_datetime64_dtype('datetime64[ns, US/Eastern]')) - - def test_construction_from_string(self): - result = DatetimeTZDtype('datetime64[ns, US/Eastern]') - self.assertTrue(is_dtype_equal(self.dtype, result)) - result = DatetimeTZDtype.construct_from_string( - 'datetime64[ns, US/Eastern]') - self.assertTrue(is_dtype_equal(self.dtype, result)) - self.assertRaises(TypeError, - lambda: DatetimeTZDtype.construct_from_string('foo')) - - def test_is_dtype(self): - self.assertTrue(DatetimeTZDtype.is_dtype(self.dtype)) - self.assertTrue(DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]')) - self.assertFalse(DatetimeTZDtype.is_dtype('foo')) - self.assertTrue(DatetimeTZDtype.is_dtype(DatetimeTZDtype( - 'ns', 'US/Pacific'))) - self.assertFalse(DatetimeTZDtype.is_dtype(np.float64)) - - def test_equality(self): - self.assertTrue(is_dtype_equal(self.dtype, - 'datetime64[ns, US/Eastern]')) - self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype( - 'ns', 'US/Eastern'))) - self.assertFalse(is_dtype_equal(self.dtype, 'foo')) - self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype('ns', - 'CET'))) - self.assertFalse(is_dtype_equal( - DatetimeTZDtype('ns', 'US/Eastern'), DatetimeTZDtype( - 'ns', 'US/Pacific'))) - - # numpy compat - self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]")) - - def test_basic(self): - - self.assertTrue(is_datetime64tz_dtype(self.dtype)) - - dr = date_range('20130101', periods=3, tz='US/Eastern') - s = Series(dr, name='A') - - # dtypes - self.assertTrue(is_datetime64tz_dtype(s.dtype)) - self.assertTrue(is_datetime64tz_dtype(s)) - self.assertFalse(is_datetime64tz_dtype(np.dtype('float64'))) - self.assertFalse(is_datetime64tz_dtype(1.0)) - - self.assertTrue(is_datetimetz(s)) - self.assertTrue(is_datetimetz(s.dtype)) - self.assertFalse(is_datetimetz(np.dtype('float64'))) - self.assertFalse(is_datetimetz(1.0)) - - def test_dst(self): - - dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern') - s1 = Series(dr1, name='A') - self.assertTrue(is_datetimetz(s1)) - - dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern') - s2 = Series(dr2, name='A') - self.assertTrue(is_datetimetz(s2)) - self.assertEqual(s1.dtype, s2.dtype) - - def test_parser(self): - # pr #11245 - for tz, constructor in product(('UTC', 'US/Eastern'), - ('M8', 'datetime64')): - self.assertEqual( - DatetimeTZDtype('%s[ns, %s]' % (constructor, tz)), - DatetimeTZDtype('ns', tz), - ) - - def test_empty(self): - dt = DatetimeTZDtype() - with tm.assertRaises(AttributeError): - str(dt) - - -class TestPeriodDtype(Base, tm.TestCase): - - def setUp(self): - self.dtype = PeriodDtype('D') - - def test_construction(self): - with tm.assertRaises(ValueError): - PeriodDtype('xx') - - for s in ['period[D]', 'Period[D]', 'D']: - dt = PeriodDtype(s) - self.assertEqual(dt.freq, pd.tseries.offsets.Day()) - self.assertTrue(is_period_dtype(dt)) - - for s in ['period[3D]', 'Period[3D]', '3D']: - dt = PeriodDtype(s) - self.assertEqual(dt.freq, pd.tseries.offsets.Day(3)) - self.assertTrue(is_period_dtype(dt)) - - for s in ['period[26H]', 'Period[26H]', '26H', - 'period[1D2H]', 'Period[1D2H]', '1D2H']: - dt = PeriodDtype(s) - self.assertEqual(dt.freq, pd.tseries.offsets.Hour(26)) - self.assertTrue(is_period_dtype(dt)) - - def test_subclass(self): - a = PeriodDtype('period[D]') - b = PeriodDtype('period[3D]') - - self.assertTrue(issubclass(type(a), type(a))) - self.assertTrue(issubclass(type(a), type(b))) - - def test_identity(self): - self.assertEqual(PeriodDtype('period[D]'), - PeriodDtype('period[D]')) - self.assertIs(PeriodDtype('period[D]'), - PeriodDtype('period[D]')) - - self.assertEqual(PeriodDtype('period[3D]'), - PeriodDtype('period[3D]')) - self.assertIs(PeriodDtype('period[3D]'), - PeriodDtype('period[3D]')) - - self.assertEqual(PeriodDtype('period[1S1U]'), - PeriodDtype('period[1000001U]')) - self.assertIs(PeriodDtype('period[1S1U]'), - PeriodDtype('period[1000001U]')) - - def test_coerce_to_dtype(self): - self.assertEqual(_coerce_to_dtype('period[D]'), - PeriodDtype('period[D]')) - self.assertEqual(_coerce_to_dtype('period[3M]'), - PeriodDtype('period[3M]')) - - def test_compat(self): - self.assertFalse(is_datetime64_ns_dtype(self.dtype)) - self.assertFalse(is_datetime64_ns_dtype('period[D]')) - self.assertFalse(is_datetime64_dtype(self.dtype)) - self.assertFalse(is_datetime64_dtype('period[D]')) - - def test_construction_from_string(self): - result = PeriodDtype('period[D]') - self.assertTrue(is_dtype_equal(self.dtype, result)) - result = PeriodDtype.construct_from_string('period[D]') - self.assertTrue(is_dtype_equal(self.dtype, result)) - with tm.assertRaises(TypeError): - PeriodDtype.construct_from_string('foo') - with tm.assertRaises(TypeError): - PeriodDtype.construct_from_string('period[foo]') - with tm.assertRaises(TypeError): - PeriodDtype.construct_from_string('foo[D]') - - with tm.assertRaises(TypeError): - PeriodDtype.construct_from_string('datetime64[ns]') - with tm.assertRaises(TypeError): - PeriodDtype.construct_from_string('datetime64[ns, US/Eastern]') - - def test_is_dtype(self): - self.assertTrue(PeriodDtype.is_dtype(self.dtype)) - self.assertTrue(PeriodDtype.is_dtype('period[D]')) - self.assertTrue(PeriodDtype.is_dtype('period[3D]')) - self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('3D'))) - self.assertTrue(PeriodDtype.is_dtype('period[U]')) - self.assertTrue(PeriodDtype.is_dtype('period[S]')) - self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('U'))) - self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('S'))) - - self.assertFalse(PeriodDtype.is_dtype('D')) - self.assertFalse(PeriodDtype.is_dtype('3D')) - self.assertFalse(PeriodDtype.is_dtype('U')) - self.assertFalse(PeriodDtype.is_dtype('S')) - self.assertFalse(PeriodDtype.is_dtype('foo')) - self.assertFalse(PeriodDtype.is_dtype(np.object_)) - self.assertFalse(PeriodDtype.is_dtype(np.int64)) - self.assertFalse(PeriodDtype.is_dtype(np.float64)) - - def test_equality(self): - self.assertTrue(is_dtype_equal(self.dtype, 'period[D]')) - self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) - self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) - self.assertTrue(is_dtype_equal(PeriodDtype('D'), PeriodDtype('D'))) - - self.assertFalse(is_dtype_equal(self.dtype, 'D')) - self.assertFalse(is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D'))) - - def test_basic(self): - self.assertTrue(is_period_dtype(self.dtype)) - - pidx = pd.period_range('2013-01-01 09:00', periods=5, freq='H') - - self.assertTrue(is_period_dtype(pidx.dtype)) - self.assertTrue(is_period_dtype(pidx)) - self.assertTrue(is_period(pidx)) - - s = Series(pidx, name='A') - # dtypes - # series results in object dtype currently, - # is_period checks period_arraylike - self.assertFalse(is_period_dtype(s.dtype)) - self.assertFalse(is_period_dtype(s)) - self.assertTrue(is_period(s)) - - self.assertFalse(is_period_dtype(np.dtype('float64'))) - self.assertFalse(is_period_dtype(1.0)) - self.assertFalse(is_period(np.dtype('float64'))) - self.assertFalse(is_period(1.0)) - - def test_empty(self): - dt = PeriodDtype() - with tm.assertRaises(AttributeError): - str(dt) - - def test_not_string(self): - # though PeriodDtype has object kind, it cannot be string - self.assertFalse(is_string_dtype(PeriodDtype('D'))) diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py deleted file mode 100644 index c7c8b0becad63..0000000000000 --- a/pandas/tests/types/test_generic.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -import pandas as pd -import pandas.util.testing as tm -from pandas.types import generic as gt - - -class TestABCClasses(tm.TestCase): - tuples = [[1, 2, 2], ['red', 'blue', 'red']] - multi_index = pd.MultiIndex.from_arrays(tuples, names=('number', 'color')) - datetime_index = pd.to_datetime(['2000/1/1', '2010/1/1']) - timedelta_index = pd.to_timedelta(np.arange(5), unit='s') - period_index = pd.period_range('2000/1/1', '2010/1/1/', freq='M') - categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) - categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) - df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index) - sparse_series = pd.Series([1, 2, 3]).to_sparse() - sparse_array = pd.SparseArray(np.random.randn(10)) - - def test_abc_types(self): - self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) - self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCInt64Index) - self.assertIsInstance(pd.UInt64Index([1, 2, 3]), gt.ABCUInt64Index) - self.assertIsInstance(pd.Float64Index([1, 2, 3]), gt.ABCFloat64Index) - self.assertIsInstance(self.multi_index, gt.ABCMultiIndex) - self.assertIsInstance(self.datetime_index, gt.ABCDatetimeIndex) - self.assertIsInstance(self.timedelta_index, gt.ABCTimedeltaIndex) - self.assertIsInstance(self.period_index, gt.ABCPeriodIndex) - self.assertIsInstance(self.categorical_df.index, - gt.ABCCategoricalIndex) - self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndexClass) - self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCIndexClass) - self.assertIsInstance(pd.Series([1, 2, 3]), gt.ABCSeries) - self.assertIsInstance(self.df, gt.ABCDataFrame) - self.assertIsInstance(self.df.to_panel(), gt.ABCPanel) - self.assertIsInstance(self.sparse_series, gt.ABCSparseSeries) - self.assertIsInstance(self.sparse_array, gt.ABCSparseArray) - self.assertIsInstance(self.categorical, gt.ABCCategorical) - self.assertIsInstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py deleted file mode 100644 index 629aa63f4a0ae..0000000000000 --- a/pandas/tests/types/test_inference.py +++ /dev/null @@ -1,966 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -These the test the public routines exposed in types/common.py -related to inference and not otherwise tested in types/test_common.py - -""" - -import collections -import re -from datetime import datetime, date, timedelta, time -import numpy as np -import pytz - -import pandas as pd -from pandas import lib, tslib -from pandas import (Series, Index, DataFrame, Timedelta, - DatetimeIndex, TimedeltaIndex, Timestamp, - Panel, Period, Categorical) -from pandas.compat import u, PY2, lrange -from pandas.types import inference -from pandas.types.common import (is_timedelta64_dtype, - is_timedelta64_ns_dtype, - is_datetime64_dtype, - is_datetime64_ns_dtype, - is_datetime64_any_dtype, - is_datetime64tz_dtype, - is_number, - is_integer, - is_float, - is_bool, - is_scalar, - _ensure_int32, - _ensure_categorical) -from pandas.types.missing import isnull -from pandas.util import testing as tm - - -def test_is_sequence(): - is_seq = inference.is_sequence - assert (is_seq((1, 2))) - assert (is_seq([1, 2])) - assert (not is_seq("abcd")) - assert (not is_seq(u("abcd"))) - assert (not is_seq(np.int64)) - - class A(object): - - def __getitem__(self): - return 1 - - assert (not is_seq(A())) - - -def test_is_list_like(): - passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), - Series([]), Series(['a']).str) - fails = (1, '2', object()) - - for p in passes: - assert inference.is_list_like(p) - - for f in fails: - assert not inference.is_list_like(f) - - -def test_is_dict_like(): - passes = [{}, {'A': 1}, Series([1])] - fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])] - - for p in passes: - assert inference.is_dict_like(p) - - for f in fails: - assert not inference.is_dict_like(f) - - -def test_is_named_tuple(): - passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) - fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) - - for p in passes: - assert inference.is_named_tuple(p) - - for f in fails: - assert not inference.is_named_tuple(f) - - -def test_is_hashable(): - - # all new-style classes are hashable by default - class HashableClass(object): - pass - - class UnhashableClass1(object): - __hash__ = None - - class UnhashableClass2(object): - - def __hash__(self): - raise TypeError("Not hashable") - - hashable = (1, - 3.14, - np.float64(3.14), - 'a', - tuple(), - (1, ), - HashableClass(), ) - not_hashable = ([], UnhashableClass1(), ) - abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), ) - - for i in hashable: - assert inference.is_hashable(i) - for i in not_hashable: - assert not inference.is_hashable(i) - for i in abc_hashable_not_really_hashable: - assert not inference.is_hashable(i) - - # numpy.array is no longer collections.Hashable as of - # https://github.com/numpy/numpy/pull/5326, just test - # is_hashable() - assert not inference.is_hashable(np.array([])) - - # old-style classes in Python 2 don't appear hashable to - # collections.Hashable but also seem to support hash() by default - if PY2: - - class OldStyleClass(): - pass - - c = OldStyleClass() - assert not isinstance(c, collections.Hashable) - assert inference.is_hashable(c) - hash(c) # this will not raise - - -def test_is_re(): - passes = re.compile('ad'), - fails = 'x', 2, 3, object() - - for p in passes: - assert inference.is_re(p) - - for f in fails: - assert not inference.is_re(f) - - -def test_is_recompilable(): - passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'), - re.compile(r'')) - fails = 1, [], object() - - for p in passes: - assert inference.is_re_compilable(p) - - for f in fails: - assert not inference.is_re_compilable(f) - - -class TestInference(tm.TestCase): - - def test_infer_dtype_bytes(self): - compare = 'string' if PY2 else 'bytes' - - # string array of bytes - arr = np.array(list('abc'), dtype='S1') - self.assertEqual(lib.infer_dtype(arr), compare) - - # object array of bytes - arr = arr.astype(object) - self.assertEqual(lib.infer_dtype(arr), compare) - - def test_isinf_scalar(self): - # GH 11352 - self.assertTrue(lib.isposinf_scalar(float('inf'))) - self.assertTrue(lib.isposinf_scalar(np.inf)) - self.assertFalse(lib.isposinf_scalar(-np.inf)) - self.assertFalse(lib.isposinf_scalar(1)) - self.assertFalse(lib.isposinf_scalar('a')) - - self.assertTrue(lib.isneginf_scalar(float('-inf'))) - self.assertTrue(lib.isneginf_scalar(-np.inf)) - self.assertFalse(lib.isneginf_scalar(np.inf)) - self.assertFalse(lib.isneginf_scalar(1)) - self.assertFalse(lib.isneginf_scalar('a')) - - def test_maybe_convert_numeric_infinities(self): - # see gh-13274 - infinities = ['inf', 'inF', 'iNf', 'Inf', - 'iNF', 'InF', 'INf', 'INF'] - na_values = set(['', 'NULL', 'nan']) - - pos = np.array(['inf'], dtype=np.float64) - neg = np.array(['-inf'], dtype=np.float64) - - msg = "Unable to parse string" - - for infinity in infinities: - for maybe_int in (True, False): - out = lib.maybe_convert_numeric( - np.array([infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(['-' + infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, neg) - - out = lib.maybe_convert_numeric( - np.array([u(infinity)], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(['+' + infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - # too many characters - with tm.assertRaisesRegexp(ValueError, msg): - lib.maybe_convert_numeric( - np.array(['foo_' + infinity], dtype=object), - na_values, maybe_int) - - def test_maybe_convert_numeric_post_floatify_nan(self): - # see gh-13314 - data = np.array(['1.200', '-999.000', '4.500'], dtype=object) - expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) - nan_values = set([-999, -999.0]) - - for coerce_type in (True, False): - out = lib.maybe_convert_numeric(data, nan_values, coerce_type) - tm.assert_numpy_array_equal(out, expected) - - def test_convert_infs(self): - arr = np.array(['inf', 'inf', 'inf'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False) - self.assertTrue(result.dtype == np.float64) - - arr = np.array(['-inf', '-inf', '-inf'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False) - self.assertTrue(result.dtype == np.float64) - - def test_scientific_no_exponent(self): - # See PR 12215 - arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False, True) - self.assertTrue(np.all(np.isnan(result))) - - def test_convert_non_hashable(self): - # GH13324 - # make sure that we are handing non-hashables - arr = np.array([[10.0, 2], 1.0, 'apple']) - result = lib.maybe_convert_numeric(arr, set(), False, True) - tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) - - def test_convert_numeric_uint64(self): - arr = np.array([2**63], dtype=object) - exp = np.array([2**63], dtype=np.uint64) - tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) - - arr = np.array([str(2**63)], dtype=object) - exp = np.array([2**63], dtype=np.uint64) - tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) - - arr = np.array([np.uint64(2**63)], dtype=object) - exp = np.array([2**63], dtype=np.uint64) - tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) - - def test_convert_numeric_uint64_nan(self): - msg = 'uint64 array detected' - cases = [(np.array([2**63, np.nan], dtype=object), set()), - (np.array([str(2**63), np.nan], dtype=object), set()), - (np.array([np.nan, 2**63], dtype=object), set()), - (np.array([np.nan, str(2**63)], dtype=object), set()), - (np.array([2**63, 2**63 + 1], dtype=object), set([2**63])), - (np.array([str(2**63), str(2**63 + 1)], - dtype=object), set([2**63]))] - - for coerce in (True, False): - for arr, na_values in cases: - if coerce: - with tm.assertRaisesRegexp(ValueError, msg): - lib.maybe_convert_numeric(arr, na_values, - coerce_numeric=coerce) - else: - tm.assert_numpy_array_equal(lib.maybe_convert_numeric( - arr, na_values), arr) - - def test_convert_numeric_int64_uint64(self): - msg = 'uint64 and negative values detected' - cases = [np.array([2**63, -1], dtype=object), - np.array([str(2**63), -1], dtype=object), - np.array([str(2**63), str(-1)], dtype=object), - np.array([-1, 2**63], dtype=object), - np.array([-1, str(2**63)], dtype=object), - np.array([str(-1), str(2**63)], dtype=object)] - - for coerce in (True, False): - for case in cases: - if coerce: - with tm.assertRaisesRegexp(ValueError, msg): - lib.maybe_convert_numeric(case, set(), - coerce_numeric=coerce) - else: - tm.assert_numpy_array_equal(lib.maybe_convert_numeric( - case, set()), case) - - def test_maybe_convert_objects_uint64(self): - # see gh-4471 - arr = np.array([2**63], dtype=object) - exp = np.array([2**63], dtype=np.uint64) - tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) - - # NumPy bug: can't compare uint64 to int64, as that - # results in both casting to float64, so we should - # make sure that this function is robust against it - arr = np.array([np.uint64(2**63)], dtype=object) - exp = np.array([2**63], dtype=np.uint64) - tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) - - arr = np.array([2, -1], dtype=object) - exp = np.array([2, -1], dtype=np.int64) - tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) - - arr = np.array([2**63, -1], dtype=object) - exp = np.array([2**63, -1], dtype=object) - tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) - - def test_mixed_dtypes_remain_object_array(self): - # GH14956 - array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], - dtype=object) - result = lib.maybe_convert_objects(array, convert_datetime=1) - tm.assert_numpy_array_equal(result, array) - - -class TestTypeInference(tm.TestCase): - - def test_length_zero(self): - result = lib.infer_dtype(np.array([], dtype='i4')) - self.assertEqual(result, 'integer') - - result = lib.infer_dtype([]) - self.assertEqual(result, 'empty') - - def test_integers(self): - arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') - - arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') - - arr = np.array([1, 2, 3, 4, 5], dtype='i4') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') - - def test_bools(self): - arr = np.array([True, False, True, True, True], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - arr = np.array([True, False, True, 'foo'], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') - - arr = np.array([True, False, True], dtype=bool) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - def test_floats(self): - arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], - dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') - - arr = np.array([1, 2, 3, 4, 5], dtype='f4') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - arr = np.array([1, 2, 3, 4, 5], dtype='f8') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - def test_string(self): - pass - - def test_unicode(self): - pass - - def test_datetime(self): - - dates = [datetime(2012, 1, x) for x in range(1, 20)] - index = Index(dates) - self.assertEqual(index.inferred_type, 'datetime64') - - def test_infer_dtype_datetime(self): - - arr = np.array([Timestamp('2011-01-01'), - Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.datetime64('2011-01-01'), - np.datetime64('2011-01-01')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') - - arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, np.datetime64('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') - - arr = np.array([n, datetime(2011, 1, 1)]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, pd.Timestamp('2011-01-02'), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, np.datetime64('2011-01-02'), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') - - arr = np.array([n, datetime(2011, 1, 1), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - # different type of nat - arr = np.array([np.timedelta64('nat'), - np.datetime64('2011-01-02')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.datetime64('2011-01-02'), - np.timedelta64('nat')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - # mixed datetime - arr = np.array([datetime(2011, 1, 1), - pd.Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - # should be datetime? - arr = np.array([np.datetime64('2011-01-01'), - pd.Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - arr = np.array([pd.Timestamp('2011-01-02'), - np.datetime64('2011-01-01')]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) - self.assertEqual(lib.infer_dtype(arr), 'mixed-integer') - - arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - def test_infer_dtype_timedelta(self): - - arr = np.array([pd.Timedelta('1 days'), - pd.Timedelta('2 days')]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - arr = np.array([np.timedelta64(1, 'D'), - np.timedelta64(2, 'D')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - arr = np.array([timedelta(1), timedelta(2)]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, Timedelta('1 days')]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, np.timedelta64(1, 'D')]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, timedelta(1)]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, pd.Timedelta('1 days'), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, np.timedelta64(1, 'D'), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, timedelta(1), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - # different type of nat - arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], - dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], - dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - def test_infer_dtype_period(self): - # GH 13664 - arr = np.array([pd.Period('2011-01', freq='D'), - pd.Period('2011-02', freq='D')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') - - arr = np.array([pd.Period('2011-01', freq='D'), - pd.Period('2011-02', freq='M')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') - - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Period('2011-01', freq='D')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') - - arr = np.array([n, pd.Period('2011-01', freq='D'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') - - # different type of nat - arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')], - dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')], - dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - def test_infer_dtype_all_nan_nat_like(self): - arr = np.array([np.nan, np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'floating') - - # nan and None mix are result in mixed - arr = np.array([np.nan, np.nan, None]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - arr = np.array([None, np.nan, np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - # pd.NaT - arr = np.array([pd.NaT]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - arr = np.array([pd.NaT, np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.nan, pd.NaT]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.nan, pd.NaT, np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - arr = np.array([None, pd.NaT, None]) - self.assertEqual(lib.infer_dtype(arr), 'datetime') - - # np.datetime64(nat) - arr = np.array([np.datetime64('nat')]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') - - for n in [np.nan, pd.NaT, None]: - arr = np.array([n, np.datetime64('nat'), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') - - arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) - self.assertEqual(lib.infer_dtype(arr), 'datetime64') - - arr = np.array([np.timedelta64('nat')], dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - for n in [np.nan, pd.NaT, None]: - arr = np.array([n, np.timedelta64('nat'), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) - self.assertEqual(lib.infer_dtype(arr), 'timedelta') - - # datetime / timedelta mixed - arr = np.array([pd.NaT, np.datetime64('nat'), - np.timedelta64('nat'), np.nan]) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], - dtype=object) - self.assertEqual(lib.infer_dtype(arr), 'mixed') - - def test_is_datetimelike_array_all_nan_nat_like(self): - arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) - self.assertTrue(lib.is_datetime_array(arr)) - self.assertTrue(lib.is_datetime64_array(arr)) - self.assertFalse(lib.is_timedelta_array(arr)) - self.assertFalse(lib.is_timedelta64_array(arr)) - self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) - self.assertFalse(lib.is_datetime_array(arr)) - self.assertFalse(lib.is_datetime64_array(arr)) - self.assertTrue(lib.is_timedelta_array(arr)) - self.assertTrue(lib.is_timedelta64_array(arr)) - self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), - np.timedelta64('nat')]) - self.assertFalse(lib.is_datetime_array(arr)) - self.assertFalse(lib.is_datetime64_array(arr)) - self.assertFalse(lib.is_timedelta_array(arr)) - self.assertFalse(lib.is_timedelta64_array(arr)) - self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT]) - self.assertTrue(lib.is_datetime_array(arr)) - self.assertTrue(lib.is_datetime64_array(arr)) - self.assertTrue(lib.is_timedelta_array(arr)) - self.assertTrue(lib.is_timedelta64_array(arr)) - self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, np.nan], dtype=object) - self.assertFalse(lib.is_datetime_array(arr)) - self.assertFalse(lib.is_datetime64_array(arr)) - self.assertFalse(lib.is_timedelta_array(arr)) - self.assertFalse(lib.is_timedelta64_array(arr)) - self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) - - def test_date(self): - - dates = [date(2012, 1, x) for x in range(1, 20)] - index = Index(dates) - self.assertEqual(index.inferred_type, 'date') - - def test_to_object_array_tuples(self): - r = (5, 6) - values = [r] - result = lib.to_object_array_tuples(values) - - try: - # make sure record array works - from collections import namedtuple - record = namedtuple('record', 'x y') - r = record(5, 6) - values = [r] - result = lib.to_object_array_tuples(values) # noqa - except ImportError: - pass - - def test_object(self): - - # GH 7431 - # cannot infer more than this as only a single element - arr = np.array([None], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') - - def test_to_object_array_width(self): - # see gh-13320 - rows = [[1, 2, 3], [4, 5, 6]] - - expected = np.array(rows, dtype=object) - out = lib.to_object_array(rows) - tm.assert_numpy_array_equal(out, expected) - - expected = np.array(rows, dtype=object) - out = lib.to_object_array(rows, min_width=1) - tm.assert_numpy_array_equal(out, expected) - - expected = np.array([[1, 2, 3, None, None], - [4, 5, 6, None, None]], dtype=object) - out = lib.to_object_array(rows, min_width=5) - tm.assert_numpy_array_equal(out, expected) - - def test_is_period(self): - self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) - self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) - self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) - self.assertFalse(lib.is_period(1)) - self.assertFalse(lib.is_period(np.nan)) - - def test_categorical(self): - - # GH 8974 - from pandas import Categorical, Series - arr = Categorical(list('abc')) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') - - result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') - - arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') - - result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') - - -class TestNumberScalar(tm.TestCase): - - def test_is_number(self): - - self.assertTrue(is_number(True)) - self.assertTrue(is_number(1)) - self.assertTrue(is_number(1.1)) - self.assertTrue(is_number(1 + 3j)) - self.assertTrue(is_number(np.bool(False))) - self.assertTrue(is_number(np.int64(1))) - self.assertTrue(is_number(np.float64(1.1))) - self.assertTrue(is_number(np.complex128(1 + 3j))) - self.assertTrue(is_number(np.nan)) - - self.assertFalse(is_number(None)) - self.assertFalse(is_number('x')) - self.assertFalse(is_number(datetime(2011, 1, 1))) - self.assertFalse(is_number(np.datetime64('2011-01-01'))) - self.assertFalse(is_number(Timestamp('2011-01-01'))) - self.assertFalse(is_number(Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(is_number(timedelta(1000))) - self.assertFalse(is_number(Timedelta('1 days'))) - - # questionable - self.assertFalse(is_number(np.bool_(False))) - self.assertTrue(is_number(np.timedelta64(1, 'D'))) - - def test_is_bool(self): - self.assertTrue(is_bool(True)) - self.assertTrue(is_bool(np.bool(False))) - self.assertTrue(is_bool(np.bool_(False))) - - self.assertFalse(is_bool(1)) - self.assertFalse(is_bool(1.1)) - self.assertFalse(is_bool(1 + 3j)) - self.assertFalse(is_bool(np.int64(1))) - self.assertFalse(is_bool(np.float64(1.1))) - self.assertFalse(is_bool(np.complex128(1 + 3j))) - self.assertFalse(is_bool(np.nan)) - self.assertFalse(is_bool(None)) - self.assertFalse(is_bool('x')) - self.assertFalse(is_bool(datetime(2011, 1, 1))) - self.assertFalse(is_bool(np.datetime64('2011-01-01'))) - self.assertFalse(is_bool(Timestamp('2011-01-01'))) - self.assertFalse(is_bool(Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(is_bool(timedelta(1000))) - self.assertFalse(is_bool(np.timedelta64(1, 'D'))) - self.assertFalse(is_bool(Timedelta('1 days'))) - - def test_is_integer(self): - self.assertTrue(is_integer(1)) - self.assertTrue(is_integer(np.int64(1))) - - self.assertFalse(is_integer(True)) - self.assertFalse(is_integer(1.1)) - self.assertFalse(is_integer(1 + 3j)) - self.assertFalse(is_integer(np.bool(False))) - self.assertFalse(is_integer(np.bool_(False))) - self.assertFalse(is_integer(np.float64(1.1))) - self.assertFalse(is_integer(np.complex128(1 + 3j))) - self.assertFalse(is_integer(np.nan)) - self.assertFalse(is_integer(None)) - self.assertFalse(is_integer('x')) - self.assertFalse(is_integer(datetime(2011, 1, 1))) - self.assertFalse(is_integer(np.datetime64('2011-01-01'))) - self.assertFalse(is_integer(Timestamp('2011-01-01'))) - self.assertFalse(is_integer(Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(is_integer(timedelta(1000))) - self.assertFalse(is_integer(Timedelta('1 days'))) - - # questionable - self.assertTrue(is_integer(np.timedelta64(1, 'D'))) - - def test_is_float(self): - self.assertTrue(is_float(1.1)) - self.assertTrue(is_float(np.float64(1.1))) - self.assertTrue(is_float(np.nan)) - - self.assertFalse(is_float(True)) - self.assertFalse(is_float(1)) - self.assertFalse(is_float(1 + 3j)) - self.assertFalse(is_float(np.bool(False))) - self.assertFalse(is_float(np.bool_(False))) - self.assertFalse(is_float(np.int64(1))) - self.assertFalse(is_float(np.complex128(1 + 3j))) - self.assertFalse(is_float(None)) - self.assertFalse(is_float('x')) - self.assertFalse(is_float(datetime(2011, 1, 1))) - self.assertFalse(is_float(np.datetime64('2011-01-01'))) - self.assertFalse(is_float(Timestamp('2011-01-01'))) - self.assertFalse(is_float(Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(is_float(timedelta(1000))) - self.assertFalse(is_float(np.timedelta64(1, 'D'))) - self.assertFalse(is_float(Timedelta('1 days'))) - - def test_is_datetime_dtypes(self): - - ts = pd.date_range('20130101', periods=3) - tsa = pd.date_range('20130101', periods=3, tz='US/Eastern') - - self.assertTrue(is_datetime64_dtype('datetime64')) - self.assertTrue(is_datetime64_dtype('datetime64[ns]')) - self.assertTrue(is_datetime64_dtype(ts)) - self.assertFalse(is_datetime64_dtype(tsa)) - - self.assertFalse(is_datetime64_ns_dtype('datetime64')) - self.assertTrue(is_datetime64_ns_dtype('datetime64[ns]')) - self.assertTrue(is_datetime64_ns_dtype(ts)) - self.assertTrue(is_datetime64_ns_dtype(tsa)) - - self.assertTrue(is_datetime64_any_dtype('datetime64')) - self.assertTrue(is_datetime64_any_dtype('datetime64[ns]')) - self.assertTrue(is_datetime64_any_dtype(ts)) - self.assertTrue(is_datetime64_any_dtype(tsa)) - - self.assertFalse(is_datetime64tz_dtype('datetime64')) - self.assertFalse(is_datetime64tz_dtype('datetime64[ns]')) - self.assertFalse(is_datetime64tz_dtype(ts)) - self.assertTrue(is_datetime64tz_dtype(tsa)) - - for tz in ['US/Eastern', 'UTC']: - dtype = 'datetime64[ns, {}]'.format(tz) - self.assertFalse(is_datetime64_dtype(dtype)) - self.assertTrue(is_datetime64tz_dtype(dtype)) - self.assertTrue(is_datetime64_ns_dtype(dtype)) - self.assertTrue(is_datetime64_any_dtype(dtype)) - - def test_is_timedelta(self): - self.assertTrue(is_timedelta64_dtype('timedelta64')) - self.assertTrue(is_timedelta64_dtype('timedelta64[ns]')) - self.assertFalse(is_timedelta64_ns_dtype('timedelta64')) - self.assertTrue(is_timedelta64_ns_dtype('timedelta64[ns]')) - - tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') - self.assertTrue(is_timedelta64_dtype(tdi)) - self.assertTrue(is_timedelta64_ns_dtype(tdi)) - self.assertTrue(is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) - - # Conversion to Int64Index: - self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) - self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]'))) - - -class Testisscalar(tm.TestCase): - - def test_isscalar_builtin_scalars(self): - self.assertTrue(is_scalar(None)) - self.assertTrue(is_scalar(True)) - self.assertTrue(is_scalar(False)) - self.assertTrue(is_scalar(0.)) - self.assertTrue(is_scalar(np.nan)) - self.assertTrue(is_scalar('foobar')) - self.assertTrue(is_scalar(b'foobar')) - self.assertTrue(is_scalar(u('efoobar'))) - self.assertTrue(is_scalar(datetime(2014, 1, 1))) - self.assertTrue(is_scalar(date(2014, 1, 1))) - self.assertTrue(is_scalar(time(12, 0))) - self.assertTrue(is_scalar(timedelta(hours=1))) - self.assertTrue(is_scalar(pd.NaT)) - - def test_isscalar_builtin_nonscalars(self): - self.assertFalse(is_scalar({})) - self.assertFalse(is_scalar([])) - self.assertFalse(is_scalar([1])) - self.assertFalse(is_scalar(())) - self.assertFalse(is_scalar((1, ))) - self.assertFalse(is_scalar(slice(None))) - self.assertFalse(is_scalar(Ellipsis)) - - def test_isscalar_numpy_array_scalars(self): - self.assertTrue(is_scalar(np.int64(1))) - self.assertTrue(is_scalar(np.float64(1.))) - self.assertTrue(is_scalar(np.int32(1))) - self.assertTrue(is_scalar(np.object_('foobar'))) - self.assertTrue(is_scalar(np.str_('foobar'))) - self.assertTrue(is_scalar(np.unicode_(u('foobar')))) - self.assertTrue(is_scalar(np.bytes_(b'foobar'))) - self.assertTrue(is_scalar(np.datetime64('2014-01-01'))) - self.assertTrue(is_scalar(np.timedelta64(1, 'h'))) - - def test_isscalar_numpy_zerodim_arrays(self): - for zerodim in [np.array(1), np.array('foobar'), - np.array(np.datetime64('2014-01-01')), - np.array(np.timedelta64(1, 'h')), - np.array(np.datetime64('NaT'))]: - self.assertFalse(is_scalar(zerodim)) - self.assertTrue(is_scalar(lib.item_from_zerodim(zerodim))) - - def test_isscalar_numpy_arrays(self): - self.assertFalse(is_scalar(np.array([]))) - self.assertFalse(is_scalar(np.array([[]]))) - self.assertFalse(is_scalar(np.matrix('1; 2'))) - - def test_isscalar_pandas_scalars(self): - self.assertTrue(is_scalar(Timestamp('2014-01-01'))) - self.assertTrue(is_scalar(Timedelta(hours=1))) - self.assertTrue(is_scalar(Period('2014-01-01'))) - - def test_lisscalar_pandas_containers(self): - self.assertFalse(is_scalar(Series())) - self.assertFalse(is_scalar(Series([1]))) - self.assertFalse(is_scalar(DataFrame())) - self.assertFalse(is_scalar(DataFrame([[1]]))) - self.assertFalse(is_scalar(Panel())) - self.assertFalse(is_scalar(Panel([[[1]]]))) - self.assertFalse(is_scalar(Index([]))) - self.assertFalse(is_scalar(Index([1]))) - - -def test_datetimeindex_from_empty_datetime64_array(): - for unit in ['ms', 'us', 'ns']: - idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) - assert (len(idx) == 0) - - -def test_nan_to_nat_conversions(): - - df = DataFrame(dict({ - 'A': np.asarray( - lrange(10), dtype='float64'), - 'B': Timestamp('20010101') - })) - df.iloc[3:6, :] = np.nan - result = df.loc[4, 'B'].value - assert (result == tslib.iNaT) - - s = df['B'].copy() - s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) - assert (isnull(s[8])) - - # numpy < 1.7.0 is wrong - from distutils.version import LooseVersion - if LooseVersion(np.__version__) >= '1.7.0': - assert (s[8].value == np.datetime64('NaT').astype(np.int64)) - - -def test_ensure_int32(): - values = np.arange(10, dtype=np.int32) - result = _ensure_int32(values) - assert (result.dtype == np.int32) - - values = np.arange(10, dtype=np.int64) - result = _ensure_int32(values) - assert (result.dtype == np.int32) - - -def test_ensure_categorical(): - values = np.arange(10, dtype=np.int32) - result = _ensure_categorical(values) - assert (result.dtype == 'category') - - values = Categorical(values) - result = _ensure_categorical(values) - tm.assert_categorical_equal(result, values) diff --git a/pandas/tests/types/test_io.py b/pandas/tests/types/test_io.py deleted file mode 100644 index ce8e23342bf5a..0000000000000 --- a/pandas/tests/types/test_io.py +++ /dev/null @@ -1,109 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -import pandas.lib as lib -import pandas.util.testing as tm - -from pandas.compat import long, u - - -class TestParseSQL(tm.TestCase): - - def test_convert_sql_column_floats(self): - arr = np.array([1.5, None, 3, 4.2], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_strings(self): - arr = np.array(['1.5', None, '3', '4.2'], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_unicode(self): - arr = np.array([u('1.5'), None, u('3'), u('4.2')], - dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], - dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_ints(self): - arr = np.array([1, 2, 3, 4], dtype='O') - arr2 = np.array([1, 2, 3, 4], dtype='i4').astype('O') - result = lib.convert_sql_column(arr) - result2 = lib.convert_sql_column(arr2) - expected = np.array([1, 2, 3, 4], dtype='i8') - self.assert_numpy_array_equal(result, expected) - self.assert_numpy_array_equal(result2, expected) - - arr = np.array([1, 2, 3, None, 4], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_longs(self): - arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, 4], dtype='i8') - self.assert_numpy_array_equal(result, expected) - - arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_bools(self): - arr = np.array([True, False, True, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, True, False], dtype=bool) - self.assert_numpy_array_equal(result, expected) - - arr = np.array([True, False, None, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, np.nan, False], dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_decimals(self): - from decimal import Decimal - arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_downcast_int64(self): - from pandas.parser import na_values - - arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) - expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) - - # default argument - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) - - result = lib.downcast_int64(arr, na_values, use_unsigned=False) - self.assert_numpy_array_equal(result, expected) - - expected = np.array([1, 2, 7, 8, 10], dtype=np.uint8) - result = lib.downcast_int64(arr, na_values, use_unsigned=True) - self.assert_numpy_array_equal(result, expected) - - # still cast to int8 despite use_unsigned=True - # because of the negative number as an element - arr = np.array([1, 2, -7, 8, 10], dtype=np.int64) - expected = np.array([1, 2, -7, 8, 10], dtype=np.int8) - result = lib.downcast_int64(arr, na_values, use_unsigned=True) - self.assert_numpy_array_equal(result, expected) - - arr = np.array([1, 2, 7, 8, 300], dtype=np.int64) - expected = np.array([1, 2, 7, 8, 300], dtype=np.int16) - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) - - int8_na = na_values[np.int8] - int64_na = na_values[np.int64] - arr = np.array([int64_na, 2, 3, 10, 15], dtype=np.int64) - expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8) - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/util/__init__.py b/pandas/tests/util/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tools/tests/test_hashing.py b/pandas/tests/util/test_hashing.py similarity index 70% rename from pandas/tools/tests/test_hashing.py rename to pandas/tests/util/test_hashing.py index 05a352f259e8b..fe8d75539879e 100644 --- a/pandas/tools/tests/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -1,14 +1,19 @@ +import pytest +import datetime + +from warnings import catch_warnings import numpy as np import pandas as pd from pandas import DataFrame, Series, Index, MultiIndex -from pandas.tools.hashing import hash_array, hash_tuples, hash_pandas_object +from pandas.util import hash_array, hash_pandas_object +from pandas.core.util.hashing import hash_tuples, hash_tuple, _hash_scalar import pandas.util.testing as tm -class TestHashing(tm.TestCase): +class TestHashing(object): - def setUp(self): + def setup_method(self, method): self.df = DataFrame( {'i32': np.array([1, 2, 3] * 3, dtype='int32'), 'f32': np.array([None, 2.5, 3.5] * 3, dtype='float32'), @@ -44,7 +49,7 @@ def test_hash_array_mixed(self): def test_hash_array_errors(self): for val in [5, 'foo', pd.Timestamp('20130101')]: - self.assertRaises(TypeError, hash_array, val) + pytest.raises(TypeError, hash_array, val) def check_equal(self, obj, **kwargs): a = hash_pandas_object(obj, **kwargs) @@ -64,28 +69,78 @@ def check_not_equal_with_index(self, obj): a = hash_pandas_object(obj, index=True) b = hash_pandas_object(obj, index=False) if len(obj): - self.assertFalse((a == b).all()) + assert not (a == b).all() def test_hash_tuples(self): tups = [(1, 'one'), (1, 'two'), (2, 'one')] result = hash_tuples(tups) expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values - self.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tups[0]) - self.assertEqual(result, expected[0]) + assert result == expected[0] + + def test_hash_tuple(self): + # test equivalence between hash_tuples and hash_tuple + for tup in [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'), + ('A', pd.Timestamp("2012-01-01"))]: + result = hash_tuple(tup) + expected = hash_tuples([tup])[0] + assert result == expected + + def test_hash_scalar(self): + for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-01", tz='Europe/Brussels'), + datetime.datetime(2012, 1, 1), + pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(), + pd.Timedelta('1 days'), datetime.timedelta(1), + pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1), + np.nan, pd.NaT, None]: + result = _hash_scalar(val) + expected = hash_array(np.array([val], dtype=object), + categorize=True) + assert result[0] == expected[0] def test_hash_tuples_err(self): for val in [5, 'foo', pd.Timestamp('20130101')]: - self.assertRaises(TypeError, hash_tuples, val) + pytest.raises(TypeError, hash_tuples, val) def test_multiindex_unique(self): mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204), (102, 51)]) - self.assertTrue(mi.is_unique) + assert mi.is_unique result = hash_pandas_object(mi) - self.assertTrue(result.is_unique) + assert result.is_unique + + def test_multiindex_objects(self): + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + recons = mi._sort_levels_monotonic() + + # these are equal + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # _hashed_values and hash_pandas_object(..., index=False) + # equivalency + expected = hash_pandas_object( + mi, index=False).values + result = mi._hashed_values + tm.assert_numpy_array_equal(result, expected) + + expected = hash_pandas_object( + recons, index=False).values + result = recons._hashed_values + tm.assert_numpy_array_equal(result, expected) + + expected = mi._hashed_values + result = recons._hashed_values + + # values should match, but in different order + tm.assert_numpy_array_equal(np.sort(result), + np.sort(expected)) def test_hash_pandas_object(self): @@ -152,13 +207,28 @@ def test_categorical_consistency(self): tm.assert_series_equal(h1, h2) tm.assert_series_equal(h1, h3) + def test_categorical_with_nan_consistency(self): + c = pd.Categorical.from_codes( + [-1, 0, 1, 2, 3, 4], + categories=pd.date_range('2012-01-01', periods=5, name='B')) + expected = hash_array(c, categorize=False) + c = pd.Categorical.from_codes( + [-1, 0], + categories=[pd.Timestamp('2012-01-01')]) + result = hash_array(c, categorize=False) + assert result[0] in expected + assert result[1] in expected + def test_pandas_errors(self): - for obj in [pd.Timestamp('20130101'), tm.makePanel()]: - def f(): - hash_pandas_object(f) + for obj in [pd.Timestamp('20130101')]: + with pytest.raises(TypeError): + hash_pandas_object(obj) - self.assertRaises(TypeError, f) + with catch_warnings(record=True): + obj = tm.makePanel() + with pytest.raises(TypeError): + hash_pandas_object(obj) def test_hash_keys(self): # using different hash keys, should have different hashes @@ -168,13 +238,13 @@ def test_hash_keys(self): obj = Series(list('abc')) a = hash_pandas_object(obj, hash_key='9876543210123456') b = hash_pandas_object(obj, hash_key='9876543210123465') - self.assertTrue((a != b).all()) + assert (a != b).all() def test_invalid_key(self): # this only matters for object dtypes def f(): hash_pandas_object(Series(list('abc')), hash_key='foo') - self.assertRaises(ValueError, f) + pytest.raises(ValueError, f) def test_alread_encoded(self): # if already encoded then ok @@ -193,13 +263,13 @@ def test_same_len_hash_collisions(self): length = 2**(l + 8) + 1 s = tm.rands_array(length, 2) result = hash_array(s, 'utf8') - self.assertFalse(result[0] == result[1]) + assert not result[0] == result[1] for l in range(8): length = 2**(l + 8) s = tm.rands_array(length, 2) result = hash_array(s, 'utf8') - self.assertFalse(result[0] == result[1]) + assert not result[0] == result[1] def test_hash_collisions(self): @@ -211,12 +281,12 @@ def test_hash_collisions(self): # these should be different! result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8') expected1 = np.array([14963968704024874985], dtype=np.uint64) - self.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result1, expected1) result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8') expected2 = np.array([16428432627716348016], dtype=np.uint64) - self.assert_numpy_array_equal(result2, expected2) + tm.assert_numpy_array_equal(result2, expected2) result = hash_array(np.asarray(L, dtype=object), 'utf8') - self.assert_numpy_array_equal( + tm.assert_numpy_array_equal( result, np.concatenate([expected1, expected2], axis=0)) diff --git a/pandas/tests/test_testing.py b/pandas/tests/util/test_testing.py similarity index 74% rename from pandas/tests/test_testing.py rename to pandas/tests/util/test_testing.py index 07bfdc8fc9078..1c878604b11a2 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -1,30 +1,26 @@ # -*- coding: utf-8 -*- import pandas as pd -import unittest import pytest import numpy as np import sys from pandas import Series, DataFrame import pandas.util.testing as tm -from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, - raise_with_traceback, assert_index_equal, - assert_series_equal, assert_frame_equal, - assert_numpy_array_equal, +import pandas.util._test_decorators as td +from pandas.util.testing import (assert_almost_equal, raise_with_traceback, + assert_index_equal, assert_series_equal, + assert_frame_equal, assert_numpy_array_equal, RNGContext) -from pandas.compat import is_platform_windows -# let's get meta. - -class TestAssertAlmostEqual(tm.TestCase): +class TestAssertAlmostEqual(object): def _assert_almost_equal_both(self, a, b, **kwargs): assert_almost_equal(a, b, **kwargs) assert_almost_equal(b, a, **kwargs) def _assert_not_almost_equal_both(self, a, b, **kwargs): - self.assertRaises(AssertionError, assert_almost_equal, a, b, **kwargs) - self.assertRaises(AssertionError, assert_almost_equal, b, a, **kwargs) + pytest.raises(AssertionError, assert_almost_equal, a, b, **kwargs) + pytest.raises(AssertionError, assert_almost_equal, b, a, **kwargs) def test_assert_almost_equal_numbers(self): self._assert_almost_equal_both(1.1, 1.1) @@ -52,12 +48,18 @@ def test_assert_almost_equal_numbers_with_mixed(self): self._assert_not_almost_equal_both(1, [1, ]) self._assert_not_almost_equal_both(1, object()) - def test_assert_almost_equal_edge_case_ndarrays(self): - self._assert_almost_equal_both(np.array([], dtype='M8[ns]'), - np.array([], dtype='float64'), - check_dtype=False) - self._assert_almost_equal_both(np.array([], dtype=str), - np.array([], dtype='int64'), + @pytest.mark.parametrize( + "left_dtype", + ['M8[ns]', 'm8[ns]', 'float64', 'int64', 'object']) + @pytest.mark.parametrize( + "right_dtype", + ['M8[ns]', 'm8[ns]', 'float64', 'int64', 'object']) + def test_assert_almost_equal_edge_case_ndarrays( + self, left_dtype, right_dtype): + + # empty compare + self._assert_almost_equal_both(np.array([], dtype=left_dtype), + np.array([], dtype=right_dtype), check_dtype=False) def test_assert_almost_equal_dicts(self): @@ -130,12 +132,12 @@ def test_assert_almost_equal_inf(self): dtype=np.object_)) def test_assert_almost_equal_pandas(self): - self.assert_almost_equal(pd.Index([1., 1.1]), - pd.Index([1., 1.100001])) - self.assert_almost_equal(pd.Series([1., 1.1]), - pd.Series([1., 1.100001])) - self.assert_almost_equal(pd.DataFrame({'a': [1., 1.1]}), - pd.DataFrame({'a': [1., 1.100001]})) + tm.assert_almost_equal(pd.Index([1., 1.1]), + pd.Index([1., 1.100001])) + tm.assert_almost_equal(pd.Series([1., 1.1]), + pd.Series([1., 1.100001])) + tm.assert_almost_equal(pd.DataFrame({'a': [1., 1.1]}), + pd.DataFrame({'a': [1., 1.100001]})) def test_assert_almost_equal_object(self): a = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')] @@ -143,16 +145,16 @@ def test_assert_almost_equal_object(self): self._assert_almost_equal_both(a, b) -class TestUtilTesting(tm.TestCase): +class TestUtilTesting(object): def test_raise_with_traceback(self): - with assertRaisesRegexp(LookupError, "error_text"): + with tm.assert_raises_regex(LookupError, "error_text"): try: raise ValueError("THIS IS AN ERROR") except ValueError as e: e = LookupError("error_text") raise_with_traceback(e) - with assertRaisesRegexp(LookupError, "error_text"): + with tm.assert_raises_regex(LookupError, "error_text"): try: raise ValueError("This is another error") except ValueError: @@ -161,32 +163,29 @@ def test_raise_with_traceback(self): raise_with_traceback(e, traceback) -class TestAssertNumpyArrayEqual(tm.TestCase): +class TestAssertNumpyArrayEqual(object): + @td.skip_if_windows def test_numpy_array_equal_message(self): - if is_platform_windows(): - pytest.skip("windows has incomparable line-endings " - "and uses L on the shape") - expected = """numpy array are different numpy array shapes are different \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5])) # scalar comparison expected = """Expected type """ - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(1, 2) expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(1, 2) # array / scalar array comparison @@ -196,10 +195,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: ndarray \\[right\\]: int""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): # numpy_array_equal only accepts np.ndarray assert_numpy_array_equal(np.array([1]), 1) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1]), 1) # scalar / array comparison @@ -209,9 +208,9 @@ def test_numpy_array_equal_message(self): \\[left\\]: int \\[right\\]: ndarray""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(1, np.array([1])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(1, np.array([1])) expected = """numpy array are different @@ -220,10 +219,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[nan, 2\\.0, 3\\.0\\] \\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) @@ -233,9 +232,9 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([1, 3])) expected = """numpy array are different @@ -244,7 +243,7 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[1\\.1, 2\\.000001\\] \\[right\\]: \\[1\\.1, 2.0\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal( np.array([1.1, 2.000001]), np.array([1.1, 2.0])) @@ -257,10 +256,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) @@ -270,10 +269,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) @@ -284,19 +283,16 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') + @td.skip_if_windows def test_numpy_array_equal_object_message(self): - if is_platform_windows(): - pytest.skip("windows has incomparable line-endings " - "and uses L on the shape") - a = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')]) b = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]) @@ -306,9 +302,9 @@ def test_numpy_array_equal_object_message(self): \\[left\\]: \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\] \\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(a, b) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal(a, b) def test_numpy_array_equal_copy_flag(self): @@ -316,10 +312,10 @@ def test_numpy_array_equal_copy_flag(self): b = a.copy() c = a.view() expected = r'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)' - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(a, b, check_same='same') expected = r'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)' - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_numpy_array_equal(a, c, check_same='copy') def test_assert_almost_equal_iterable_message(self): @@ -330,7 +326,7 @@ def test_assert_almost_equal_iterable_message(self): \\[left\\]: 2 \\[right\\]: 3""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal([1, 2], [3, 4, 5]) expected = """Iterable are different @@ -339,11 +335,11 @@ def test_assert_almost_equal_iterable_message(self): \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_almost_equal([1, 2], [1, 3]) -class TestAssertIndexEqual(unittest.TestCase): +class TestAssertIndexEqual(object): def test_index_equal_message(self): @@ -357,7 +353,7 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, exact=False) expected = """MultiIndex level \\[1\\] are different @@ -370,9 +366,9 @@ def test_index_equal_message(self): ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different @@ -383,9 +379,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3, 4]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different @@ -396,9 +392,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3.0]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, exact=True) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, exact=True, check_exact=False) expected = """Index are different @@ -409,7 +405,7 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0000000001]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) # must success @@ -423,9 +419,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0001]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) # must success assert_index_equal(idx1, idx2, check_exact=False, @@ -439,9 +435,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 4]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_less_precise=True) expected = """MultiIndex level \\[1\\] are different @@ -454,9 +450,9 @@ def test_index_equal_message(self): ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) def test_index_equal_metadata_message(self): @@ -469,7 +465,7 @@ def test_index_equal_metadata_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3], name='x') - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) # same name, should pass @@ -486,19 +482,19 @@ def test_index_equal_metadata_message(self): idx1 = pd.Index([1, 2, 3], name=np.nan) idx2 = pd.Index([1, 2, 3], name=pd.NaT) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_index_equal(idx1, idx2) -class TestAssertSeriesEqual(tm.TestCase): +class TestAssertSeriesEqual(object): def _assert_equal(self, x, y, **kwargs): assert_series_equal(x, y, **kwargs) assert_series_equal(y, x, **kwargs) def _assert_not_equal(self, a, b, **kwargs): - self.assertRaises(AssertionError, assert_series_equal, a, b, **kwargs) - self.assertRaises(AssertionError, assert_series_equal, b, a, **kwargs) + pytest.raises(AssertionError, assert_series_equal, a, b, **kwargs) + pytest.raises(AssertionError, assert_series_equal, b, a, **kwargs) def test_equal(self): self._assert_equal(Series(range(3)), Series(range(3))) @@ -522,27 +518,27 @@ def test_less_precise(self): s1 = Series([0.12345], dtype='float64') s2 = Series([0.12346], dtype='float64') - self.assertRaises(AssertionError, assert_series_equal, s1, s2) + pytest.raises(AssertionError, assert_series_equal, s1, s2) self._assert_equal(s1, s2, check_less_precise=True) for i in range(4): self._assert_equal(s1, s2, check_less_precise=i) - self.assertRaises(AssertionError, assert_series_equal, s1, s2, 10) + pytest.raises(AssertionError, assert_series_equal, s1, s2, 10) s1 = Series([0.12345], dtype='float32') s2 = Series([0.12346], dtype='float32') - self.assertRaises(AssertionError, assert_series_equal, s1, s2) + pytest.raises(AssertionError, assert_series_equal, s1, s2) self._assert_equal(s1, s2, check_less_precise=True) for i in range(4): self._assert_equal(s1, s2, check_less_precise=i) - self.assertRaises(AssertionError, assert_series_equal, s1, s2, 10) + pytest.raises(AssertionError, assert_series_equal, s1, s2, 10) # even less than less precise s1 = Series([0.1235], dtype='float32') s2 = Series([0.1236], dtype='float32') - self.assertRaises(AssertionError, assert_series_equal, s1, s2) - self.assertRaises(AssertionError, assert_series_equal, s1, s2, True) + pytest.raises(AssertionError, assert_series_equal, s1, s2) + pytest.raises(AssertionError, assert_series_equal, s1, s2, True) def test_index_dtype(self): df1 = DataFrame.from_records( @@ -568,7 +564,7 @@ def test_series_equal_message(self): \\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) \\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4])) expected = """Series are different @@ -577,22 +573,36 @@ def test_series_equal_message(self): \\[left\\]: \\[1, 2, 3\\] \\[right\\]: \\[1, 2, 4\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4])) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]), check_less_precise=True) -class TestAssertFrameEqual(tm.TestCase): +class TestAssertFrameEqual(object): def _assert_equal(self, x, y, **kwargs): assert_frame_equal(x, y, **kwargs) assert_frame_equal(y, x, **kwargs) def _assert_not_equal(self, a, b, **kwargs): - self.assertRaises(AssertionError, assert_frame_equal, a, b, **kwargs) - self.assertRaises(AssertionError, assert_frame_equal, b, a, **kwargs) + pytest.raises(AssertionError, assert_frame_equal, a, b, **kwargs) + pytest.raises(AssertionError, assert_frame_equal, b, a, **kwargs) + + def test_equal_with_different_row_order(self): + # check_like=True ignores row-column orderings + df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + index=['a', 'b', 'c']) + df2 = pd.DataFrame({'A': [3, 2, 1], 'B': [6, 5, 4]}, + index=['c', 'b', 'a']) + + self._assert_equal(df1, df2, check_like=True) + self._assert_not_equal(df1, df2) + + def test_not_equal_with_different_shape(self): + self._assert_not_equal(pd.DataFrame({'A': [1, 2, 3]}), + pd.DataFrame({'A': [1, 2, 3, 4]})) def test_index_dtype(self): df1 = DataFrame.from_records( @@ -621,21 +631,11 @@ def test_frame_equal_message(self): expected = """DataFrame are different -DataFrame shape \\(number of rows\\) are different -\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) -\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" - - with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A': [1, 2, 3]}), - pd.DataFrame({'A': [1, 2, 3, 4]})) - - expected = """DataFrame are different - -DataFrame shape \\(number of columns\\) are different -\\[left\\]: 2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) -\\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)""" +DataFrame shape mismatch +\\[left\\]: \\(3, 2\\) +\\[right\\]: \\(3, 1\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3]})) @@ -645,7 +645,7 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, @@ -657,7 +657,7 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), pd.DataFrame({'A': [1, 2, 3], 'b': [4, 5, 6]}, @@ -669,32 +669,17 @@ def test_frame_equal_message(self): \\[left\\]: \\[4, 5, 6\\] \\[right\\]: \\[4, 5, 7\\]""" - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]})) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}), by_blocks=True) -class TestIsInstance(tm.TestCase): - - def test_isinstance(self): - - expected = "Expected type " - with assertRaisesRegexp(AssertionError, expected): - tm.assertIsInstance(1, pd.Series) - - def test_notisinstance(self): - - expected = "Input must not be type " - with assertRaisesRegexp(AssertionError, expected): - tm.assertNotIsInstance(pd.Series([1]), pd.Series) - - -class TestAssertCategoricalEqual(unittest.TestCase): +class TestAssertCategoricalEqual(object): def test_categorical_equal_message(self): @@ -706,7 +691,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 5]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b) expected = """Categorical\\.codes are different @@ -717,7 +702,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b) expected = """Categorical are different @@ -728,11 +713,11 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 3, 4], ordered=False) b = pd.Categorical([1, 2, 3, 4], ordered=True) - with assertRaisesRegexp(AssertionError, expected): + with tm.assert_raises_regex(AssertionError, expected): tm.assert_categorical_equal(a, b) -class TestRNGContext(unittest.TestCase): +class TestRNGContext(object): def test_RNGContext(self): expected0 = 1.764052345967664 @@ -740,34 +725,11 @@ def test_RNGContext(self): with RNGContext(0): with RNGContext(1): - self.assertEqual(np.random.randn(), expected1) - self.assertEqual(np.random.randn(), expected0) - - -class TestDeprecatedTests(tm.TestCase): - - def test_warning(self): - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertEquals(1, 1) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertNotEquals(1, 2) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assert_(True) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertAlmostEquals(1.0, 1.0000000001) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertNotAlmostEquals(1, 2) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_isinstance(Series([1, 2]), Series, msg='xxx') + assert np.random.randn() == expected1 + assert np.random.randn() == expected0 -class TestLocale(tm.TestCase): +class TestLocale(object): def test_locale(self): if sys.platform == 'win32': @@ -776,4 +738,4 @@ def test_locale(self): # GH9744 locales = tm.get_locales() - self.assertTrue(len(locales) >= 1) + assert len(locales) >= 1 diff --git a/pandas/tests/test_util.py b/pandas/tests/util/test_util.py similarity index 65% rename from pandas/tests/test_util.py rename to pandas/tests/util/test_util.py index 1bf9f4da45bff..2bc017ef226ce 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/util/test_util.py @@ -1,20 +1,27 @@ # -*- coding: utf-8 -*- -from collections import OrderedDict +import os +import locale +import codecs import sys -import unittest from uuid import uuid4 +from collections import OrderedDict + +import pytest +from pandas.compat import intern +import pandas.core.common as com from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf -from pandas.util.decorators import deprecate_kwarg -from pandas.util.validators import (validate_args, validate_kwargs, - validate_args_and_kwargs, - validate_bool_kwarg) +from pandas.util._decorators import deprecate_kwarg, make_signature +from pandas.util._validators import (validate_args, validate_kwargs, + validate_args_and_kwargs, + validate_bool_kwarg) import pandas.util.testing as tm +import pandas.util._test_decorators as td -class TestDecorators(tm.TestCase): +class TestDecorators(object): - def setUp(self): + def setup_method(self, method): @deprecate_kwarg('old', 'new') def _f1(new=False): return new @@ -27,15 +34,20 @@ def _f2(new=False): def _f3(new=0): return new + @deprecate_kwarg('old', None) + def _f4(old=True, unchanged=True): + return old + self.f1 = _f1 self.f2 = _f2 self.f3 = _f3 + self.f4 = _f4 def test_deprecate_kwarg(self): x = 78 with tm.assert_produces_warning(FutureWarning): result = self.f1(old=x) - self.assertIs(result, x) + assert result is x with tm.assert_produces_warning(None): self.f1(new=x) @@ -43,28 +55,37 @@ def test_dict_deprecate_kwarg(self): x = 'yes' with tm.assert_produces_warning(FutureWarning): result = self.f2(old=x) - self.assertEqual(result, True) + assert result def test_missing_deprecate_kwarg(self): x = 'bogus' with tm.assert_produces_warning(FutureWarning): result = self.f2(old=x) - self.assertEqual(result, 'bogus') + assert result == 'bogus' def test_callable_deprecate_kwarg(self): x = 5 with tm.assert_produces_warning(FutureWarning): result = self.f3(old=x) - self.assertEqual(result, x + 1) - with tm.assertRaises(TypeError): + assert result == x + 1 + with pytest.raises(TypeError): self.f3(old='hello') def test_bad_deprecate_kwarg(self): - with tm.assertRaises(TypeError): + with pytest.raises(TypeError): @deprecate_kwarg('old', 'new', 0) def f4(new=None): pass + def test_deprecate_keyword(self): + x = 9 + with tm.assert_produces_warning(FutureWarning): + result = self.f4(old=x) + assert result is x + with tm.assert_produces_warning(None): + result = self.f4(unchanged=x) + assert result is True + def test_rands(): r = tm.rands(10) @@ -81,12 +102,12 @@ def test_rands_array(): assert(len(arr[1, 1]) == 7) -class TestValidateArgs(tm.TestCase): +class TestValidateArgs(object): fname = 'func' def test_bad_min_fname_arg_count(self): msg = "'max_fname_arg_count' must be non-negative" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): validate_args(self.fname, (None,), -1, 'foo') def test_bad_arg_length_max_value_single(self): @@ -101,7 +122,7 @@ def test_bad_arg_length_max_value_single(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args(self.fname, args, min_fname_arg_count, compat_args) @@ -118,7 +139,7 @@ def test_bad_arg_length_max_value_multiple(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args(self.fname, args, min_fname_arg_count, compat_args) @@ -137,7 +158,7 @@ def test_not_all_defaults(self): arg_vals = (1, -1, 3) for i in range(1, 3): - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): validate_args(self.fname, arg_vals[:i], 2, compat_args) def test_validation(self): @@ -151,7 +172,7 @@ def test_validation(self): validate_args(self.fname, (1, None), 2, compat_args) -class TestValidateKwargs(tm.TestCase): +class TestValidateKwargs(object): fname = 'func' def test_bad_kwarg(self): @@ -166,7 +187,7 @@ def test_bad_kwarg(self): r"keyword argument '{arg}'".format( fname=self.fname, arg=badarg)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_kwargs(self.fname, kwargs, compat_args) def test_not_all_none(self): @@ -187,7 +208,7 @@ def test_not_all_none(self): kwargs = dict(zip(kwarg_keys[:i], kwarg_vals[:i])) - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assert_raises_regex(ValueError, msg): validate_kwargs(self.fname, kwargs, compat_args) def test_validation(self): @@ -206,17 +227,18 @@ def test_validate_bool_kwarg(self): for name in arg_names: for value in invalid_values: - with tm.assertRaisesRegexp(ValueError, - ("For argument \"%s\" expected " - "type bool, received type %s") % - (name, type(value).__name__)): + with tm.assert_raises_regex(ValueError, + "For argument \"%s\" " + "expected type bool, " + "received type %s" % + (name, type(value).__name__)): validate_bool_kwarg(value, name) for value in valid_values: - tm.assert_equal(validate_bool_kwarg(value, name), value) + assert validate_bool_kwarg(value, name) == value -class TestValidateKwargsAndArgs(tm.TestCase): +class TestValidateKwargsAndArgs(object): fname = 'func' def test_invalid_total_length_max_length_one(self): @@ -232,7 +254,7 @@ def test_invalid_total_length_max_length_one(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -250,7 +272,7 @@ def test_invalid_total_length_max_length_multiple(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -269,17 +291,17 @@ def test_no_args_with_kwargs(self): args = () kwargs = {'foo': -5, bad_arg: 2} - tm.assertRaisesRegexp(ValueError, msg, - validate_args_and_kwargs, - self.fname, args, kwargs, - min_fname_arg_count, compat_args) + tm.assert_raises_regex(ValueError, msg, + validate_args_and_kwargs, + self.fname, args, kwargs, + min_fname_arg_count, compat_args) args = (-5, 2) kwargs = {} - tm.assertRaisesRegexp(ValueError, msg, - validate_args_and_kwargs, - self.fname, args, kwargs, - min_fname_arg_count, compat_args) + tm.assert_raises_regex(ValueError, msg, + validate_args_and_kwargs, + self.fname, args, kwargs, + min_fname_arg_count, compat_args) def test_duplicate_argument(self): min_fname_arg_count = 2 @@ -293,7 +315,7 @@ def test_duplicate_argument(self): msg = (r"{fname}\(\) got multiple values for keyword " r"argument '{arg}'".format(fname=self.fname, arg='foo')) - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -313,14 +335,14 @@ def test_validation(self): compat_args) -class TestMove(tm.TestCase): +class TestMove(object): def test_cannot_create_instance_of_stolenbuffer(self): """Stolen buffers need to be created through the smart constructor ``move_into_mutable_buffer`` which has a bunch of checks in it. """ msg = "cannot create 'pandas.util._move.stolenbuf' instances" - with tm.assertRaisesRegexp(TypeError, msg): + with tm.assert_raises_regex(TypeError, msg): stolenbuf() def test_more_than_one_ref(self): @@ -329,9 +351,9 @@ def test_more_than_one_ref(self): """ b = b'testing' - with tm.assertRaises(BadMove) as e: + with pytest.raises(BadMove) as e: def handle_success(type_, value, tb): - self.assertIs(value.args[0], b) + assert value.args[0] is b return type(e).handle_success(e, type_, value, tb) # super e.handle_success = handle_success @@ -350,11 +372,11 @@ def test_exactly_one_ref(self): as_stolen_buf = move_into_mutable_buffer(b[:-3]) # materialize as bytearray to show that it is mutable - self.assertEqual(bytearray(as_stolen_buf), b'test') + assert bytearray(as_stolen_buf) == b'test' - @unittest.skipIf( + @pytest.mark.skipif( sys.version_info[0] > 2, - 'bytes objects cannot be interned in py3', + reason='bytes objects cannot be interned in py3', ) def test_interned(self): salt = uuid4().hex @@ -378,19 +400,14 @@ def ref_capture(ob): refcount[0] = sys.getrefcount(ob) - 2 return ob - with tm.assertRaises(BadMove): + with pytest.raises(BadMove): # If we intern the string it will still have one reference but now # it is in the intern table so if other people intern the same # string while the mutable buffer holds the first string they will # be the same instance. move_into_mutable_buffer(ref_capture(intern(make_string()))) # noqa - self.assertEqual( - refcount[0], - 1, - msg='The BadMove was probably raised for refcount reasons instead' - ' of interning reasons', - ) + assert refcount[0] == 1 def test_numpy_errstate_is_default(): @@ -400,4 +417,99 @@ def test_numpy_errstate_is_default(): import numpy as np from pandas.compat import numpy # noqa # The errstate should be unchanged after that import. - tm.assert_equal(np.geterr(), expected) + assert np.geterr() == expected + + +@td.skip_if_windows +class TestLocaleUtils(object): + + @classmethod + def setup_class(cls): + cls.locales = tm.get_locales() + cls.current_locale = locale.getlocale() + + if not cls.locales: + pytest.skip("No locales found") + + @classmethod + def teardown_class(cls): + del cls.locales + del cls.current_locale + + def test_get_locales(self): + # all systems should have at least a single locale + assert len(tm.get_locales()) > 0 + + def test_get_locales_prefix(self): + if len(self.locales) == 1: + pytest.skip("Only a single locale found, no point in " + "trying to test filtering locale prefixes") + first_locale = self.locales[0] + assert len(tm.get_locales(prefix=first_locale[:2])) > 0 + + def test_set_locale(self): + if len(self.locales) == 1: + pytest.skip("Only a single locale found, no point in " + "trying to test setting another locale") + + if com._all_none(*self.current_locale): + # Not sure why, but on some travis runs with pytest, + # getlocale() returned (None, None). + pytest.skip("Current locale is not set.") + + locale_override = os.environ.get('LOCALE_OVERRIDE', None) + + if locale_override is None: + lang, enc = 'it_CH', 'UTF-8' + elif locale_override == 'C': + lang, enc = 'en_US', 'ascii' + else: + lang, enc = locale_override.split('.') + + enc = codecs.lookup(enc).name + new_locale = lang, enc + + if not tm._can_set_locale(new_locale): + with pytest.raises(locale.Error): + with tm.set_locale(new_locale): + pass + else: + with tm.set_locale(new_locale) as normalized_locale: + new_lang, new_enc = normalized_locale.split('.') + new_enc = codecs.lookup(enc).name + normalized_locale = new_lang, new_enc + assert normalized_locale == new_locale + + current_locale = locale.getlocale() + assert current_locale == self.current_locale + + +def test_make_signature(): + # See GH 17608 + # Case where the func does not have default kwargs + sig = make_signature(validate_kwargs) + assert sig == (['fname', 'kwargs', 'compat_args'], + ['fname', 'kwargs', 'compat_args']) + + # Case where the func does have default kwargs + sig = make_signature(deprecate_kwarg) + assert sig == (['old_arg_name', 'new_arg_name', + 'mapping=None', 'stacklevel=2'], + ['old_arg_name', 'new_arg_name', 'mapping', 'stacklevel']) + + +def test_safe_import(monkeypatch): + assert not td.safe_import("foo") + assert not td.safe_import("pandas", min_version="99.99.99") + + # Create dummy module to be imported + import types + import sys + mod_name = "hello123" + mod = types.ModuleType(mod_name) + mod.__version__ = "1.5" + + assert not td.safe_import(mod_name) + monkeypatch.setitem(sys.modules, mod_name, mod) + assert not td.safe_import(mod_name, min_version="2.0") + assert td.safe_import(mod_name, min_version="1.0") diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index d938c2eeacbef..cd58aa2c7f923 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1,44 +1,4 @@ -""" -SQL-style merge routines -""" - -import copy import warnings -import string - -import numpy as np -from pandas.compat import range, lzip, zip, map, filter -import pandas.compat as compat - -import pandas as pd -from pandas import (Categorical, Series, DataFrame, - Index, MultiIndex, Timedelta) -from pandas.core.frame import _merge_doc -from pandas.types.common import (is_datetime64tz_dtype, - is_datetime64_dtype, - needs_i8_conversion, - is_int64_dtype, - is_integer_dtype, - is_float_dtype, - is_integer, - is_int_or_datetime_dtype, - is_dtype_equal, - is_bool, - is_list_like, - _ensure_int64, - _ensure_float64, - _ensure_object, - _get_dtype) -from pandas.types.missing import na_value_for_dtype -from pandas.core.internals import (items_overlap_with_suffix, - concatenate_block_managers) -from pandas.util.decorators import Appender, Substitution - -import pandas.core.algorithms as algos -import pandas.core.common as com - -import pandas._join as _join -import pandas.hashtable as _hash # back-compat of pseudo-public API @@ -49,1389 +9,9 @@ def wrapper(*args, **kwargs): "import from the public API: " "pandas.concat instead", FutureWarning, stacklevel=3) + import pandas as pd return pd.concat(*args, **kwargs) return wrapper -concat = concat_wrap() - - -@Substitution('\nleft : DataFrame') -@Appender(_merge_doc, indents=0) -def merge(left, right, how='inner', on=None, left_on=None, right_on=None, - left_index=False, right_index=False, sort=False, - suffixes=('_x', '_y'), copy=True, indicator=False): - op = _MergeOperation(left, right, how=how, on=on, left_on=left_on, - right_on=right_on, left_index=left_index, - right_index=right_index, sort=sort, suffixes=suffixes, - copy=copy, indicator=indicator) - return op.get_result() -if __debug__: - merge.__doc__ = _merge_doc % '\nleft : DataFrame' - - -class MergeError(ValueError): - pass - - -def _groupby_and_merge(by, on, left, right, _merge_pieces, - check_duplicates=True): - """ - groupby & merge; we are always performing a left-by type operation - - Parameters - ---------- - by: field to group - on: duplicates field - left: left frame - right: right frame - _merge_pieces: function for merging - check_duplicates: boolean, default True - should we check & clean duplicates - """ - - pieces = [] - if not isinstance(by, (list, tuple)): - by = [by] - - lby = left.groupby(by, sort=False) - - # if we can groupby the rhs - # then we can get vastly better perf - try: - - # we will check & remove duplicates if indicated - if check_duplicates: - if on is None: - on = [] - elif not isinstance(on, (list, tuple)): - on = [on] - - if right.duplicated(by + on).any(): - right = right.drop_duplicates(by + on, keep='last') - rby = right.groupby(by, sort=False) - except KeyError: - rby = None - - for key, lhs in lby: - - if rby is None: - rhs = right - else: - try: - rhs = right.take(rby.indices[key]) - except KeyError: - # key doesn't exist in left - lcols = lhs.columns.tolist() - cols = lcols + [r for r in right.columns - if r not in set(lcols)] - merged = lhs.reindex(columns=cols) - merged.index = range(len(merged)) - pieces.append(merged) - continue - - merged = _merge_pieces(lhs, rhs) - - # make sure join keys are in the merged - # TODO, should _merge_pieces do this? - for k in by: - try: - if k in merged: - merged[k] = key - except: - pass - - pieces.append(merged) - - # preserve the original order - # if we have a missing piece this can be reset - from pandas.tools.concat import concat - result = concat(pieces, ignore_index=True) - result = result.reindex(columns=pieces[0].columns, copy=False) - return result, lby - - -def ordered_merge(left, right, on=None, - left_on=None, right_on=None, - left_by=None, right_by=None, - fill_method=None, suffixes=('_x', '_y')): - - warnings.warn("ordered_merge is deprecated and replaced by merge_ordered", - FutureWarning, stacklevel=2) - return merge_ordered(left, right, on=on, - left_on=left_on, right_on=right_on, - left_by=left_by, right_by=right_by, - fill_method=fill_method, suffixes=suffixes) - - -def merge_ordered(left, right, on=None, - left_on=None, right_on=None, - left_by=None, right_by=None, - fill_method=None, suffixes=('_x', '_y'), - how='outer'): - """Perform merge with optional filling/interpolation designed for ordered - data like time series data. Optionally perform group-wise merge (see - examples) - - Parameters - ---------- - left : DataFrame - right : DataFrame - on : label or list - Field names to join on. Must be found in both DataFrames. - left_on : label or list, or array-like - Field names to join on in left DataFrame. Can be a vector or list of - vectors of the length of the DataFrame to use a particular vector as - the join key instead of columns - right_on : label or list, or array-like - Field names to join on in right DataFrame or vector/list of vectors per - left_on docs - left_by : column name or list of column names - Group left DataFrame by group columns and merge piece by piece with - right DataFrame - right_by : column name or list of column names - Group right DataFrame by group columns and merge piece by piece with - left DataFrame - fill_method : {'ffill', None}, default None - Interpolation method for data - suffixes : 2-length sequence (tuple, list, ...) - Suffix to apply to overlapping column names in the left and right - side, respectively - how : {'left', 'right', 'outer', 'inner'}, default 'outer' - * left: use only keys from left frame (SQL: left outer join) - * right: use only keys from right frame (SQL: right outer join) - * outer: use union of keys from both frames (SQL: full outer join) - * inner: use intersection of keys from both frames (SQL: inner join) - - .. versionadded:: 0.19.0 - - Examples - -------- - >>> A >>> B - key lvalue group key rvalue - 0 a 1 a 0 b 1 - 1 c 2 a 1 c 2 - 2 e 3 a 2 d 3 - 3 a 1 b - 4 c 2 b - 5 e 3 b - - >>> ordered_merge(A, B, fill_method='ffill', left_by='group') - key lvalue group rvalue - 0 a 1 a NaN - 1 b 1 a 1 - 2 c 2 a 2 - 3 d 2 a 3 - 4 e 3 a 3 - 5 f 3 a 4 - 6 a 1 b NaN - 7 b 1 b 1 - 8 c 2 b 2 - 9 d 2 b 3 - 10 e 3 b 3 - 11 f 3 b 4 - - Returns - ------- - merged : DataFrame - The output type will the be same as 'left', if it is a subclass - of DataFrame. - - See also - -------- - merge - merge_asof - - """ - def _merger(x, y): - # perform the ordered merge operation - op = _OrderedMerge(x, y, on=on, left_on=left_on, right_on=right_on, - suffixes=suffixes, fill_method=fill_method, - how=how) - return op.get_result() - - if left_by is not None and right_by is not None: - raise ValueError('Can only group either left or right frames') - elif left_by is not None: - result, _ = _groupby_and_merge(left_by, on, left, right, - lambda x, y: _merger(x, y), - check_duplicates=False) - elif right_by is not None: - result, _ = _groupby_and_merge(right_by, on, right, left, - lambda x, y: _merger(y, x), - check_duplicates=False) - else: - result = _merger(left, right) - return result - -ordered_merge.__doc__ = merge_ordered.__doc__ - - -def merge_asof(left, right, on=None, - left_on=None, right_on=None, - left_index=False, right_index=False, - by=None, left_by=None, right_by=None, - suffixes=('_x', '_y'), - tolerance=None, - allow_exact_matches=True, - direction='backward'): - """Perform an asof merge. This is similar to a left-join except that we - match on nearest key rather than equal keys. - - Both DataFrames must be sorted by the key. - - For each row in the left DataFrame: - - - A "backward" search selects the last row in the right DataFrame whose - 'on' key is less than or equal to the left's key. - - - A "forward" search selects the first row in the right DataFrame whose - 'on' key is greater than or equal to the left's key. - - - A "nearest" search selects the row in the right DataFrame whose 'on' - key is closest in absolute distance to the left's key. - - The default is "backward" and is the compatible in versions below 0.20.0. - The direction parameter was added in version 0.20.0 and introduces - "forward" and "nearest". - - Optionally match on equivalent keys with 'by' before searching with 'on'. - - .. versionadded:: 0.19.0 - - Parameters - ---------- - left : DataFrame - right : DataFrame - on : label - Field name to join on. Must be found in both DataFrames. - The data MUST be ordered. Furthermore this must be a numeric column, - such as datetimelike, integer, or float. On or left_on/right_on - must be given. - left_on : label - Field name to join on in left DataFrame. - right_on : label - Field name to join on in right DataFrame. - left_index : boolean - Use the index of the left DataFrame as the join key. - - .. versionadded:: 0.19.2 - - right_index : boolean - Use the index of the right DataFrame as the join key. - - .. versionadded:: 0.19.2 - - by : column name or list of column names - Match on these columns before performing merge operation. - left_by : column name - Field names to match on in the left DataFrame. - - .. versionadded:: 0.19.2 - - right_by : column name - Field names to match on in the right DataFrame. - - .. versionadded:: 0.19.2 - - suffixes : 2-length sequence (tuple, list, ...) - Suffix to apply to overlapping column names in the left and right - side, respectively - tolerance : integer or Timedelta, optional, default None - select asof tolerance within this range; must be compatible - to the merge index. - allow_exact_matches : boolean, default True - - - If True, allow matching the same 'on' value - (i.e. less-than-or-equal-to / greater-than-or-equal-to) - - If False, don't match the same 'on' value - (i.e., stricly less-than / strictly greater-than) - - direction : 'backward' (default), 'forward', or 'nearest' - Whether to search for prior, subsequent, or closest matches. - - .. versionadded:: 0.20.0 - - Returns - ------- - merged : DataFrame - - Examples - -------- - >>> left - a left_val - 0 1 a - 1 5 b - 2 10 c - - >>> right - a right_val - 0 1 1 - 1 2 2 - 2 3 3 - 3 6 6 - 4 7 7 - - >>> pd.merge_asof(left, right, on='a') - a left_val right_val - 0 1 a 1 - 1 5 b 3 - 2 10 c 7 - - >>> pd.merge_asof(left, right, on='a', allow_exact_matches=False) - a left_val right_val - 0 1 a NaN - 1 5 b 3.0 - 2 10 c 7.0 - - >>> pd.merge_asof(left, right, on='a', direction='forward') - a left_val right_val - 0 1 a 1.0 - 1 5 b 6.0 - 2 10 c NaN - - >>> pd.merge_asof(left, right, on='a', direction='nearest') - a left_val right_val - 0 1 a 1 - 1 5 b 6 - 2 10 c 7 - - We can use indexed DataFrames as well. - - >>> left - left_val - 1 a - 5 b - 10 c - - >>> right - right_val - 1 1 - 2 2 - 3 3 - 6 6 - 7 7 - - >>> pd.merge_asof(left, right, left_index=True, right_index=True) - left_val right_val - 1 a 1 - 5 b 3 - 10 c 7 - - Here is a real-world times-series example - - >>> quotes - time ticker bid ask - 0 2016-05-25 13:30:00.023 GOOG 720.50 720.93 - 1 2016-05-25 13:30:00.023 MSFT 51.95 51.96 - 2 2016-05-25 13:30:00.030 MSFT 51.97 51.98 - 3 2016-05-25 13:30:00.041 MSFT 51.99 52.00 - 4 2016-05-25 13:30:00.048 GOOG 720.50 720.93 - 5 2016-05-25 13:30:00.049 AAPL 97.99 98.01 - 6 2016-05-25 13:30:00.072 GOOG 720.50 720.88 - 7 2016-05-25 13:30:00.075 MSFT 52.01 52.03 - - >>> trades - time ticker price quantity - 0 2016-05-25 13:30:00.023 MSFT 51.95 75 - 1 2016-05-25 13:30:00.038 MSFT 51.95 155 - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 - 4 2016-05-25 13:30:00.048 AAPL 98.00 100 - - By default we are taking the asof of the quotes - - >>> pd.merge_asof(trades, quotes, - ... on='time', - ... by='ticker') - time ticker price quantity bid ask - 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 - 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 - 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN - - We only asof within 2ms betwen the quote time and the trade time - - >>> pd.merge_asof(trades, quotes, - ... on='time', - ... by='ticker', - ... tolerance=pd.Timedelta('2ms')) - time ticker price quantity bid ask - 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 - 1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 - 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN - - We only asof within 10ms betwen the quote time and the trade time - and we exclude exact matches on time. However *prior* data will - propogate forward - - >>> pd.merge_asof(trades, quotes, - ... on='time', - ... by='ticker', - ... tolerance=pd.Timedelta('10ms'), - ... allow_exact_matches=False) - time ticker price quantity bid ask - 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN - 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 - 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 - 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 - 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN - - See also - -------- - merge - merge_ordered - - """ - op = _AsOfMerge(left, right, - on=on, left_on=left_on, right_on=right_on, - left_index=left_index, right_index=right_index, - by=by, left_by=left_by, right_by=right_by, - suffixes=suffixes, - how='asof', tolerance=tolerance, - allow_exact_matches=allow_exact_matches, - direction=direction) - return op.get_result() - - -# TODO: transformations?? -# TODO: only copy DataFrames when modification necessary -class _MergeOperation(object): - """ - Perform a database (SQL) merge operation between two DataFrame objects - using either columns as keys or their row indexes - """ - _merge_type = 'merge' - - def __init__(self, left, right, how='inner', on=None, - left_on=None, right_on=None, axis=1, - left_index=False, right_index=False, sort=True, - suffixes=('_x', '_y'), copy=True, indicator=False): - self.left = self.orig_left = left - self.right = self.orig_right = right - self.how = how - self.axis = axis - - self.on = com._maybe_make_list(on) - self.left_on = com._maybe_make_list(left_on) - self.right_on = com._maybe_make_list(right_on) - - self.copy = copy - self.suffixes = suffixes - self.sort = sort - - self.left_index = left_index - self.right_index = right_index - - self.indicator = indicator - - if isinstance(self.indicator, compat.string_types): - self.indicator_name = self.indicator - elif isinstance(self.indicator, bool): - self.indicator_name = '_merge' if self.indicator else None - else: - raise ValueError( - 'indicator option can only accept boolean or string arguments') - - if not isinstance(left, DataFrame): - raise ValueError( - 'can not merge DataFrame with instance of ' - 'type {0}'.format(type(left))) - if not isinstance(right, DataFrame): - raise ValueError( - 'can not merge DataFrame with instance of ' - 'type {0}'.format(type(right))) - - if not is_bool(left_index): - raise ValueError( - 'left_index parameter must be of type bool, not ' - '{0}'.format(type(left_index))) - if not is_bool(right_index): - raise ValueError( - 'right_index parameter must be of type bool, not ' - '{0}'.format(type(right_index))) - - # warn user when merging between different levels - if left.columns.nlevels != right.columns.nlevels: - msg = ('merging between different levels can give an unintended ' - 'result ({0} levels on the left, {1} on the right)') - msg = msg.format(left.columns.nlevels, right.columns.nlevels) - warnings.warn(msg, UserWarning) - - self._validate_specification() - - # note this function has side effects - (self.left_join_keys, - self.right_join_keys, - self.join_names) = self._get_merge_keys() - - def get_result(self): - if self.indicator: - self.left, self.right = self._indicator_pre_merge( - self.left, self.right) - - join_index, left_indexer, right_indexer = self._get_join_info() - - ldata, rdata = self.left._data, self.right._data - lsuf, rsuf = self.suffixes - - llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, - rdata.items, rsuf) - - lindexers = {1: left_indexer} if left_indexer is not None else {} - rindexers = {1: right_indexer} if right_indexer is not None else {} - - result_data = concatenate_block_managers( - [(ldata, lindexers), (rdata, rindexers)], - axes=[llabels.append(rlabels), join_index], - concat_axis=0, copy=self.copy) - - typ = self.left._constructor - result = typ(result_data).__finalize__(self, method=self._merge_type) - - if self.indicator: - result = self._indicator_post_merge(result) - - self._maybe_add_join_keys(result, left_indexer, right_indexer) - - return result - - def _indicator_pre_merge(self, left, right): - - columns = left.columns.union(right.columns) - - for i in ['_left_indicator', '_right_indicator']: - if i in columns: - raise ValueError("Cannot use `indicator=True` option when " - "data contains a column named {}".format(i)) - if self.indicator_name in columns: - raise ValueError( - "Cannot use name of an existing column for indicator column") - - left = left.copy() - right = right.copy() - - left['_left_indicator'] = 1 - left['_left_indicator'] = left['_left_indicator'].astype('int8') - - right['_right_indicator'] = 2 - right['_right_indicator'] = right['_right_indicator'].astype('int8') - - return left, right - - def _indicator_post_merge(self, result): - - result['_left_indicator'] = result['_left_indicator'].fillna(0) - result['_right_indicator'] = result['_right_indicator'].fillna(0) - - result[self.indicator_name] = Categorical((result['_left_indicator'] + - result['_right_indicator']), - categories=[1, 2, 3]) - result[self.indicator_name] = ( - result[self.indicator_name] - .cat.rename_categories(['left_only', 'right_only', 'both'])) - - result = result.drop(labels=['_left_indicator', '_right_indicator'], - axis=1) - return result - - def _maybe_add_join_keys(self, result, left_indexer, right_indexer): - - left_has_missing = None - right_has_missing = None - - keys = zip(self.join_names, self.left_on, self.right_on) - for i, (name, lname, rname) in enumerate(keys): - if not _should_fill(lname, rname): - continue - - take_left, take_right = None, None - - if name in result: - - if left_indexer is not None and right_indexer is not None: - if name in self.left: - - if left_has_missing is None: - left_has_missing = (left_indexer == -1).any() - - if left_has_missing: - take_right = self.right_join_keys[i] - - if not is_dtype_equal(result[name].dtype, - self.left[name].dtype): - take_left = self.left[name]._values - - elif name in self.right: - - if right_has_missing is None: - right_has_missing = (right_indexer == -1).any() - - if right_has_missing: - take_left = self.left_join_keys[i] - - if not is_dtype_equal(result[name].dtype, - self.right[name].dtype): - take_right = self.right[name]._values - - elif left_indexer is not None \ - and isinstance(self.left_join_keys[i], np.ndarray): - - take_left = self.left_join_keys[i] - take_right = self.right_join_keys[i] - - if take_left is not None or take_right is not None: - - if take_left is None: - lvals = result[name]._values - else: - lfill = na_value_for_dtype(take_left.dtype) - lvals = algos.take_1d(take_left, left_indexer, - fill_value=lfill) - - if take_right is None: - rvals = result[name]._values - else: - rfill = na_value_for_dtype(take_right.dtype) - rvals = algos.take_1d(take_right, right_indexer, - fill_value=rfill) - - # if we have an all missing left_indexer - # make sure to just use the right values - mask = left_indexer == -1 - if mask.all(): - key_col = rvals - else: - key_col = Index(lvals).where(~mask, rvals) - - if name in result: - result[name] = key_col - else: - result.insert(i, name or 'key_%d' % i, key_col) - - def _get_join_indexers(self): - """ return the join indexers """ - return _get_join_indexers(self.left_join_keys, - self.right_join_keys, - sort=self.sort, - how=self.how) - - def _get_join_info(self): - left_ax = self.left._data.axes[self.axis] - right_ax = self.right._data.axes[self.axis] - - if self.left_index and self.right_index and self.how != 'asof': - join_index, left_indexer, right_indexer = \ - left_ax.join(right_ax, how=self.how, return_indexers=True) - elif self.right_index and self.how == 'left': - join_index, left_indexer, right_indexer = \ - _left_join_on_index(left_ax, right_ax, self.left_join_keys, - sort=self.sort) - elif self.left_index and self.how == 'right': - join_index, right_indexer, left_indexer = \ - _left_join_on_index(right_ax, left_ax, self.right_join_keys, - sort=self.sort) - else: - (left_indexer, - right_indexer) = self._get_join_indexers() - - if self.right_index: - if len(self.left) > 0: - join_index = self.left.index.take(left_indexer) - else: - join_index = self.right.index.take(right_indexer) - left_indexer = np.array([-1] * len(join_index)) - elif self.left_index: - if len(self.right) > 0: - join_index = self.right.index.take(right_indexer) - else: - join_index = self.left.index.take(left_indexer) - right_indexer = np.array([-1] * len(join_index)) - else: - join_index = Index(np.arange(len(left_indexer))) - - if len(join_index) == 0: - join_index = join_index.astype(object) - return join_index, left_indexer, right_indexer - - def _get_merge_data(self): - """ - Handles overlapping column names etc. - """ - ldata, rdata = self.left._data, self.right._data - lsuf, rsuf = self.suffixes - - llabels, rlabels = items_overlap_with_suffix( - ldata.items, lsuf, rdata.items, rsuf) - - if not llabels.equals(ldata.items): - ldata = ldata.copy(deep=False) - ldata.set_axis(0, llabels) - - if not rlabels.equals(rdata.items): - rdata = rdata.copy(deep=False) - rdata.set_axis(0, rlabels) - - return ldata, rdata - - def _get_merge_keys(self): - """ - Note: has side effects (copy/delete key columns) - - Parameters - ---------- - left - right - on - - Returns - ------- - left_keys, right_keys - """ - left_keys = [] - right_keys = [] - join_names = [] - right_drop = [] - left_drop = [] - left, right = self.left, self.right - - is_lkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(left) - is_rkey = lambda x: isinstance( - x, (np.ndarray, Series)) and len(x) == len(right) - - # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A - # user could, for example, request 'left_index' and 'left_by'. In a - # regular pd.merge(), users cannot specify both 'left_index' and - # 'left_on'. (Instead, users have a MultiIndex). That means the - # self.left_on in this function is always empty in a pd.merge(), but - # a pd.merge_asof(left_index=True, left_by=...) will result in a - # self.left_on array with a None in the middle of it. This requires - # a work-around as designated in the code below. - # See _validate_specification() for where this happens. - - # ugh, spaghetti re #733 - if _any(self.left_on) and _any(self.right_on): - for lk, rk in zip(self.left_on, self.right_on): - if is_lkey(lk): - left_keys.append(lk) - if is_rkey(rk): - right_keys.append(rk) - join_names.append(None) # what to do? - else: - if rk is not None: - right_keys.append(right[rk]._values) - join_names.append(rk) - else: - # work-around for merge_asof(right_index=True) - right_keys.append(right.index) - join_names.append(right.index.name) - else: - if not is_rkey(rk): - if rk is not None: - right_keys.append(right[rk]._values) - else: - # work-around for merge_asof(right_index=True) - right_keys.append(right.index) - if lk is not None and lk == rk: - # avoid key upcast in corner case (length-0) - if len(left) > 0: - right_drop.append(rk) - else: - left_drop.append(lk) - else: - right_keys.append(rk) - if lk is not None: - left_keys.append(left[lk]._values) - join_names.append(lk) - else: - # work-around for merge_asof(left_index=True) - left_keys.append(left.index) - join_names.append(left.index.name) - elif _any(self.left_on): - for k in self.left_on: - if is_lkey(k): - left_keys.append(k) - join_names.append(None) - else: - left_keys.append(left[k]._values) - join_names.append(k) - if isinstance(self.right.index, MultiIndex): - right_keys = [lev._values.take(lab) - for lev, lab in zip(self.right.index.levels, - self.right.index.labels)] - else: - right_keys = [self.right.index.values] - elif _any(self.right_on): - for k in self.right_on: - if is_rkey(k): - right_keys.append(k) - join_names.append(None) - else: - right_keys.append(right[k]._values) - join_names.append(k) - if isinstance(self.left.index, MultiIndex): - left_keys = [lev._values.take(lab) - for lev, lab in zip(self.left.index.levels, - self.left.index.labels)] - else: - left_keys = [self.left.index.values] - - if left_drop: - self.left = self.left.drop(left_drop, axis=1) - - if right_drop: - self.right = self.right.drop(right_drop, axis=1) - - return left_keys, right_keys, join_names - - def _validate_specification(self): - # Hm, any way to make this logic less complicated?? - if self.on is None and self.left_on is None and self.right_on is None: - - if self.left_index and self.right_index: - self.left_on, self.right_on = (), () - elif self.left_index: - if self.right_on is None: - raise MergeError('Must pass right_on or right_index=True') - elif self.right_index: - if self.left_on is None: - raise MergeError('Must pass left_on or left_index=True') - else: - # use the common columns - common_cols = self.left.columns.intersection( - self.right.columns) - if len(common_cols) == 0: - raise MergeError('No common columns to perform merge on') - if not common_cols.is_unique: - raise MergeError("Data columns not unique: %s" - % repr(common_cols)) - self.left_on = self.right_on = common_cols - elif self.on is not None: - if self.left_on is not None or self.right_on is not None: - raise MergeError('Can only pass argument "on" OR "left_on" ' - 'and "right_on", not a combination of both.') - self.left_on = self.right_on = self.on - elif self.left_on is not None: - n = len(self.left_on) - if self.right_index: - if len(self.left_on) != self.right.index.nlevels: - raise ValueError('len(left_on) must equal the number ' - 'of levels in the index of "right"') - self.right_on = [None] * n - elif self.right_on is not None: - n = len(self.right_on) - if self.left_index: - if len(self.right_on) != self.left.index.nlevels: - raise ValueError('len(right_on) must equal the number ' - 'of levels in the index of "left"') - self.left_on = [None] * n - if len(self.right_on) != len(self.left_on): - raise ValueError("len(right_on) must equal len(left_on)") - - -def _get_join_indexers(left_keys, right_keys, sort=False, how='inner', - **kwargs): - """ - - Parameters - ---------- - - Returns - ------- - - """ - from functools import partial - - assert len(left_keys) == len(right_keys), \ - 'left_key and right_keys must be the same length' - - # bind `sort` arg. of _factorize_keys - fkeys = partial(_factorize_keys, sort=sort) - - # get left & right join labels and num. of levels at each location - llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys))) - - # get flat i8 keys from label lists - lkey, rkey = _get_join_keys(llab, rlab, shape, sort) - - # factorize keys to a dense i8 space - # `count` is the num. of unique keys - # set(lkey) | set(rkey) == range(count) - lkey, rkey, count = fkeys(lkey, rkey) - - # preserve left frame order if how == 'left' and sort == False - kwargs = copy.copy(kwargs) - if how == 'left': - kwargs['sort'] = sort - join_func = _join_functions[how] - - return join_func(lkey, rkey, count, **kwargs) - - -class _OrderedMerge(_MergeOperation): - _merge_type = 'ordered_merge' - - def __init__(self, left, right, on=None, left_on=None, right_on=None, - left_index=False, right_index=False, axis=1, - suffixes=('_x', '_y'), copy=True, - fill_method=None, how='outer'): - - self.fill_method = fill_method - _MergeOperation.__init__(self, left, right, on=on, left_on=left_on, - left_index=left_index, - right_index=right_index, - right_on=right_on, axis=axis, - how=how, suffixes=suffixes, - sort=True # factorize sorts - ) - - def get_result(self): - join_index, left_indexer, right_indexer = self._get_join_info() - - # this is a bit kludgy - ldata, rdata = self.left._data, self.right._data - lsuf, rsuf = self.suffixes - - llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, - rdata.items, rsuf) - - if self.fill_method == 'ffill': - left_join_indexer = _join.ffill_indexer(left_indexer) - right_join_indexer = _join.ffill_indexer(right_indexer) - else: - left_join_indexer = left_indexer - right_join_indexer = right_indexer - - lindexers = { - 1: left_join_indexer} if left_join_indexer is not None else {} - rindexers = { - 1: right_join_indexer} if right_join_indexer is not None else {} - - result_data = concatenate_block_managers( - [(ldata, lindexers), (rdata, rindexers)], - axes=[llabels.append(rlabels), join_index], - concat_axis=0, copy=self.copy) - - typ = self.left._constructor - result = typ(result_data).__finalize__(self, method=self._merge_type) - - self._maybe_add_join_keys(result, left_indexer, right_indexer) - - return result - - -def _asof_function(direction, on_type): - return getattr(_join, 'asof_join_%s_%s' % (direction, on_type), None) - - -def _asof_by_function(direction, on_type, by_type): - return getattr(_join, 'asof_join_%s_%s_by_%s' % - (direction, on_type, by_type), None) - - -_type_casters = { - 'int64_t': _ensure_int64, - 'double': _ensure_float64, - 'object': _ensure_object, -} - -_cython_types = { - 'uint8': 'uint8_t', - 'uint32': 'uint32_t', - 'uint16': 'uint16_t', - 'uint64': 'uint64_t', - 'int8': 'int8_t', - 'int32': 'int32_t', - 'int16': 'int16_t', - 'int64': 'int64_t', - 'float16': 'error', - 'float32': 'float', - 'float64': 'double', -} - - -def _get_cython_type(dtype): - """ Given a dtype, return a C name like 'int64_t' or 'double' """ - type_name = _get_dtype(dtype).name - ctype = _cython_types.get(type_name, 'object') - if ctype == 'error': - raise MergeError('unsupported type: ' + type_name) - return ctype - - -def _get_cython_type_upcast(dtype): - """ Upcast a dtype to 'int64_t', 'double', or 'object' """ - if is_integer_dtype(dtype): - return 'int64_t' - elif is_float_dtype(dtype): - return 'double' - else: - return 'object' - - -class _AsOfMerge(_OrderedMerge): - _merge_type = 'asof_merge' - - def __init__(self, left, right, on=None, left_on=None, right_on=None, - left_index=False, right_index=False, - by=None, left_by=None, right_by=None, - axis=1, suffixes=('_x', '_y'), copy=True, - fill_method=None, - how='asof', tolerance=None, - allow_exact_matches=True, - direction='backward'): - - self.by = by - self.left_by = left_by - self.right_by = right_by - self.tolerance = tolerance - self.allow_exact_matches = allow_exact_matches - self.direction = direction - - _OrderedMerge.__init__(self, left, right, on=on, left_on=left_on, - right_on=right_on, left_index=left_index, - right_index=right_index, axis=axis, - how=how, suffixes=suffixes, - fill_method=fill_method) - - def _validate_specification(self): - super(_AsOfMerge, self)._validate_specification() - - # we only allow on to be a single item for on - if len(self.left_on) != 1 and not self.left_index: - raise MergeError("can only asof on a key for left") - - if len(self.right_on) != 1 and not self.right_index: - raise MergeError("can only asof on a key for right") - - if self.left_index and isinstance(self.left.index, MultiIndex): - raise MergeError("left can only have one index") - - if self.right_index and isinstance(self.right.index, MultiIndex): - raise MergeError("right can only have one index") - - # set 'by' columns - if self.by is not None: - if self.left_by is not None or self.right_by is not None: - raise MergeError('Can only pass by OR left_by ' - 'and right_by') - self.left_by = self.right_by = self.by - if self.left_by is None and self.right_by is not None: - raise MergeError('missing left_by') - if self.left_by is not None and self.right_by is None: - raise MergeError('missing right_by') - - # add by to our key-list so we can have it in the - # output as a key - if self.left_by is not None: - if not is_list_like(self.left_by): - self.left_by = [self.left_by] - if not is_list_like(self.right_by): - self.right_by = [self.right_by] - - self.left_on = self.left_by + list(self.left_on) - self.right_on = self.right_by + list(self.right_on) - - # check 'direction' is valid - if self.direction not in ['backward', 'forward', 'nearest']: - raise MergeError('direction invalid: ' + self.direction) - - @property - def _asof_key(self): - """ This is our asof key, the 'on' """ - return self.left_on[-1] - - def _get_merge_keys(self): - - # note this function has side effects - (left_join_keys, - right_join_keys, - join_names) = super(_AsOfMerge, self)._get_merge_keys() - - # validate index types are the same - for lk, rk in zip(left_join_keys, right_join_keys): - if not is_dtype_equal(lk.dtype, rk.dtype): - raise MergeError("incompatible merge keys, " - "must be the same type") - - # validate tolerance; must be a Timedelta if we have a DTI - if self.tolerance is not None: - - if self.left_index: - lt = self.left.index - else: - lt = left_join_keys[-1] - - msg = "incompatible tolerance, must be compat " \ - "with type {0}".format(type(lt)) - - if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt): - if not isinstance(self.tolerance, Timedelta): - raise MergeError(msg) - if self.tolerance < Timedelta(0): - raise MergeError("tolerance must be positive") - - elif is_int64_dtype(lt): - if not is_integer(self.tolerance): - raise MergeError(msg) - if self.tolerance < 0: - raise MergeError("tolerance must be positive") - - else: - raise MergeError("key must be integer or timestamp") - - # validate allow_exact_matches - if not is_bool(self.allow_exact_matches): - raise MergeError("allow_exact_matches must be boolean, " - "passed {0}".format(self.allow_exact_matches)) - - return left_join_keys, right_join_keys, join_names - - def _get_join_indexers(self): - """ return the join indexers """ - - def flip(xs): - """ unlike np.transpose, this returns an array of tuples """ - labels = list(string.ascii_lowercase[:len(xs)]) - dtypes = [x.dtype for x in xs] - labeled_dtypes = list(zip(labels, dtypes)) - return np.array(lzip(*xs), labeled_dtypes) - - # values to compare - left_values = (self.left.index.values if self.left_index else - self.left_join_keys[-1]) - right_values = (self.right.index.values if self.right_index else - self.right_join_keys[-1]) - tolerance = self.tolerance - - # we required sortedness in the join keys - msg = " keys must be sorted" - if not Index(left_values).is_monotonic: - raise ValueError('left' + msg) - if not Index(right_values).is_monotonic: - raise ValueError('right' + msg) - - # initial type conversion as needed - if needs_i8_conversion(left_values): - left_values = left_values.view('i8') - right_values = right_values.view('i8') - if tolerance is not None: - tolerance = tolerance.value - - # a "by" parameter requires special handling - if self.left_by is not None: - if len(self.left_join_keys) > 2: - # get tuple representation of values if more than one - left_by_values = flip(self.left_join_keys[0:-1]) - right_by_values = flip(self.right_join_keys[0:-1]) - else: - left_by_values = self.left_join_keys[0] - right_by_values = self.right_join_keys[0] - - # upcast 'by' parameter because HashTable is limited - by_type = _get_cython_type_upcast(left_by_values.dtype) - by_type_caster = _type_casters[by_type] - left_by_values = by_type_caster(left_by_values) - right_by_values = by_type_caster(right_by_values) - - # choose appropriate function by type - on_type = _get_cython_type(left_values.dtype) - func = _asof_by_function(self.direction, on_type, by_type) - return func(left_values, - right_values, - left_by_values, - right_by_values, - self.allow_exact_matches, - tolerance) - else: - # choose appropriate function by type - on_type = _get_cython_type(left_values.dtype) - func = _asof_function(self.direction, on_type) - return func(left_values, - right_values, - self.allow_exact_matches, - tolerance) - - -def _get_multiindex_indexer(join_keys, index, sort): - from functools import partial - - # bind `sort` argument - fkeys = partial(_factorize_keys, sort=sort) - - # left & right join labels and num. of levels at each location - rlab, llab, shape = map(list, zip(* map(fkeys, index.levels, join_keys))) - if sort: - rlab = list(map(np.take, rlab, index.labels)) - else: - i8copy = lambda a: a.astype('i8', subok=False, copy=True) - rlab = list(map(i8copy, index.labels)) - - # fix right labels if there were any nulls - for i in range(len(join_keys)): - mask = index.labels[i] == -1 - if mask.any(): - # check if there already was any nulls at this location - # if there was, it is factorized to `shape[i] - 1` - a = join_keys[i][llab[i] == shape[i] - 1] - if a.size == 0 or not a[0] != a[0]: - shape[i] += 1 - - rlab[i][mask] = shape[i] - 1 - - # get flat i8 join keys - lkey, rkey = _get_join_keys(llab, rlab, shape, sort) - - # factorize keys to a dense i8 space - lkey, rkey, count = fkeys(lkey, rkey) - - return _join.left_outer_join(lkey, rkey, count, sort=sort) - - -def _get_single_indexer(join_key, index, sort=False): - left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) - - left_indexer, right_indexer = _join.left_outer_join( - _ensure_int64(left_key), - _ensure_int64(right_key), - count, sort=sort) - - return left_indexer, right_indexer - - -def _left_join_on_index(left_ax, right_ax, join_keys, sort=False): - if len(join_keys) > 1: - if not ((isinstance(right_ax, MultiIndex) and - len(join_keys) == right_ax.nlevels)): - raise AssertionError("If more than one join key is given then " - "'right_ax' must be a MultiIndex and the " - "number of join keys must be the number of " - "levels in right_ax") - - left_indexer, right_indexer = \ - _get_multiindex_indexer(join_keys, right_ax, sort=sort) - else: - jkey = join_keys[0] - - left_indexer, right_indexer = \ - _get_single_indexer(jkey, right_ax, sort=sort) - - if sort or len(left_ax) != len(left_indexer): - # if asked to sort or there are 1-to-many matches - join_index = left_ax.take(left_indexer) - return join_index, left_indexer, right_indexer - - # left frame preserves order & length of its index - return left_ax, None, right_indexer - - -def _right_outer_join(x, y, max_groups): - right_indexer, left_indexer = _join.left_outer_join(y, x, max_groups) - return left_indexer, right_indexer - -_join_functions = { - 'inner': _join.inner_join, - 'left': _join.left_outer_join, - 'right': _right_outer_join, - 'outer': _join.full_outer_join, -} - - -def _factorize_keys(lk, rk, sort=True): - if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): - lk = lk.values - rk = rk.values - if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): - klass = _hash.Int64Factorizer - lk = _ensure_int64(com._values_from_object(lk)) - rk = _ensure_int64(com._values_from_object(rk)) - else: - klass = _hash.Factorizer - lk = _ensure_object(lk) - rk = _ensure_object(rk) - - rizer = klass(max(len(lk), len(rk))) - - llab = rizer.factorize(lk) - rlab = rizer.factorize(rk) - - count = rizer.get_count() - - if sort: - uniques = rizer.uniques.to_array() - llab, rlab = _sort_labels(uniques, llab, rlab) - - # NA group - lmask = llab == -1 - lany = lmask.any() - rmask = rlab == -1 - rany = rmask.any() - - if lany or rany: - if lany: - np.putmask(llab, lmask, count) - if rany: - np.putmask(rlab, rmask, count) - count += 1 - - return llab, rlab, count - - -def _sort_labels(uniques, left, right): - if not isinstance(uniques, np.ndarray): - # tuplesafe - uniques = Index(uniques).values - - l = len(left) - labels = np.concatenate([left, right]) - - _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1) - new_labels = _ensure_int64(new_labels) - new_left, new_right = new_labels[:l], new_labels[l:] - - return new_left, new_right - - -def _get_join_keys(llab, rlab, shape, sort): - from pandas.core.groupby import _int64_overflow_possible - - # how many levels can be done without overflow - pred = lambda i: not _int64_overflow_possible(shape[:i]) - nlev = next(filter(pred, range(len(shape), 0, -1))) - - # get keys for the first `nlev` levels - stride = np.prod(shape[1:nlev], dtype='i8') - lkey = stride * llab[0].astype('i8', subok=False, copy=False) - rkey = stride * rlab[0].astype('i8', subok=False, copy=False) - - for i in range(1, nlev): - stride //= shape[i] - lkey += llab[i] * stride - rkey += rlab[i] * stride - - if nlev == len(shape): # all done! - return lkey, rkey - - # densify current keys to avoid overflow - lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) - - llab = [lkey] + llab[nlev:] - rlab = [rkey] + rlab[nlev:] - shape = [count] + shape[nlev:] - - return _get_join_keys(llab, rlab, shape, sort) - - -def _should_fill(lname, rname): - if (not isinstance(lname, compat.string_types) or - not isinstance(rname, compat.string_types)): - return True - return lname == rname - - -def _any(x): - return x is not None and len(x) > 0 and any([y is not None for y in x]) +concat = concat_wrap() diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 0b1ced97d2b81..a68da67a219e2 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1,4005 +1,20 @@ -# being a bit too dynamic -# pylint: disable=E1101 -from __future__ import division - +import sys import warnings -import re -from math import ceil -from collections import namedtuple -from contextlib import contextmanager -from distutils.version import LooseVersion - -import numpy as np - -from pandas.types.common import (is_list_like, - is_integer, - is_number, - is_hashable, - is_iterator) -from pandas.types.missing import isnull, notnull - -from pandas.util.decorators import cache_readonly, deprecate_kwarg -from pandas.core.base import PandasObject - -from pandas.core.common import AbstractMethodError, _try_sort -from pandas.core.generic import _shared_docs, _shared_doc_kwargs -from pandas.core.index import Index, MultiIndex -from pandas.core.series import Series, remove_na -from pandas.tseries.period import PeriodIndex -from pandas.compat import range, lrange, lmap, map, zip, string_types -import pandas.compat as compat -from pandas.formats.printing import pprint_thing -from pandas.util.decorators import Appender -try: # mpl optional - import pandas.tseries.converter as conv - conv.register() # needs to override so set_xlim works with str/number -except ImportError: - pass - - -# Extracted from https://gist.github.com/huyng/816622 -# this is the rcParams set when setting display.with_mpl_style -# to True. -mpl_stylesheet = { - 'axes.axisbelow': True, - 'axes.color_cycle': ['#348ABD', - '#7A68A6', - '#A60628', - '#467821', - '#CF4457', - '#188487', - '#E24A33'], - 'axes.edgecolor': '#bcbcbc', - 'axes.facecolor': '#eeeeee', - 'axes.grid': True, - 'axes.labelcolor': '#555555', - 'axes.labelsize': 'large', - 'axes.linewidth': 1.0, - 'axes.titlesize': 'x-large', - 'figure.edgecolor': 'white', - 'figure.facecolor': 'white', - 'figure.figsize': (6.0, 4.0), - 'figure.subplot.hspace': 0.5, - 'font.family': 'monospace', - 'font.monospace': ['Andale Mono', - 'Nimbus Mono L', - 'Courier New', - 'Courier', - 'Fixed', - 'Terminal', - 'monospace'], - 'font.size': 10, - 'interactive': True, - 'keymap.all_axes': ['a'], - 'keymap.back': ['left', 'c', 'backspace'], - 'keymap.forward': ['right', 'v'], - 'keymap.fullscreen': ['f'], - 'keymap.grid': ['g'], - 'keymap.home': ['h', 'r', 'home'], - 'keymap.pan': ['p'], - 'keymap.save': ['s'], - 'keymap.xscale': ['L', 'k'], - 'keymap.yscale': ['l'], - 'keymap.zoom': ['o'], - 'legend.fancybox': True, - 'lines.antialiased': True, - 'lines.linewidth': 1.0, - 'patch.antialiased': True, - 'patch.edgecolor': '#EEEEEE', - 'patch.facecolor': '#348ABD', - 'patch.linewidth': 0.5, - 'toolbar': 'toolbar2', - 'xtick.color': '#555555', - 'xtick.direction': 'in', - 'xtick.major.pad': 6.0, - 'xtick.major.size': 0.0, - 'xtick.minor.pad': 6.0, - 'xtick.minor.size': 0.0, - 'ytick.color': '#555555', - 'ytick.direction': 'in', - 'ytick.major.pad': 6.0, - 'ytick.major.size': 0.0, - 'ytick.minor.pad': 6.0, - 'ytick.minor.size': 0.0 -} - - -def _mpl_le_1_2_1(): - try: - import matplotlib as mpl - return (str(mpl.__version__) <= LooseVersion('1.2.1') and - str(mpl.__version__)[0] != '0') - except ImportError: - return False - - -def _mpl_ge_1_3_1(): - try: - import matplotlib - # The or v[0] == '0' is because their versioneer is - # messed up on dev - return (matplotlib.__version__ >= LooseVersion('1.3.1') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_1_4_0(): - try: - import matplotlib - return (matplotlib.__version__ >= LooseVersion('1.4') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_1_5_0(): - try: - import matplotlib - return (matplotlib.__version__ >= LooseVersion('1.5') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_2_0_0(): - try: - import matplotlib - return matplotlib.__version__ >= LooseVersion('2.0') - except ImportError: - return False - -if _mpl_ge_1_5_0(): - # Compat with mp 1.5, which uses cycler. - import cycler - colors = mpl_stylesheet.pop('axes.color_cycle') - mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) - - -def _get_standard_kind(kind): - return {'density': 'kde'}.get(kind, kind) - - -def _get_standard_colors(num_colors=None, colormap=None, color_type='default', - color=None): - import matplotlib.pyplot as plt - - if color is None and colormap is not None: - if isinstance(colormap, compat.string_types): - import matplotlib.cm as cm - cmap = colormap - colormap = cm.get_cmap(colormap) - if colormap is None: - raise ValueError("Colormap {0} is not recognized".format(cmap)) - colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) - elif color is not None: - if colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - colors = list(color) if is_list_like(color) else color - else: - if color_type == 'default': - # need to call list() on the result to copy so we don't - # modify the global rcParams below - try: - colors = [c['color'] - for c in list(plt.rcParams['axes.prop_cycle'])] - except KeyError: - colors = list(plt.rcParams.get('axes.color_cycle', - list('bgrcmyk'))) - if isinstance(colors, compat.string_types): - colors = list(colors) - elif color_type == 'random': - import random - - def random_color(column): - random.seed(column) - return [random.random() for _ in range(3)] - - colors = lmap(random_color, lrange(num_colors)) - else: - raise ValueError("color_type must be either 'default' or 'random'") - - if isinstance(colors, compat.string_types): - import matplotlib.colors - conv = matplotlib.colors.ColorConverter() - - def _maybe_valid_colors(colors): - try: - [conv.to_rgba(c) for c in colors] - return True - except ValueError: - return False - - # check whether the string can be convertable to single color - maybe_single_color = _maybe_valid_colors([colors]) - # check whether each character can be convertable to colors - maybe_color_cycle = _maybe_valid_colors(list(colors)) - if maybe_single_color and maybe_color_cycle and len(colors) > 1: - msg = ("'{0}' can be parsed as both single color and " - "color cycle. Specify each color using a list " - "like ['{0}'] or {1}") - raise ValueError(msg.format(colors, list(colors))) - elif maybe_single_color: - colors = [colors] - else: - # ``colors`` is regarded as color cycle. - # mpl will raise error any of them is invalid - pass - - if len(colors) != num_colors: - multiple = num_colors // len(colors) - 1 - mod = num_colors % len(colors) - - colors += multiple * colors - colors += colors[:mod] - - return colors - - -class _Options(dict): - """ - Stores pandas plotting options. - Allows for parameter aliasing so you can just use parameter names that are - the same as the plot function parameters, but is stored in a canonical - format that makes it easy to breakdown into groups later - """ - - # alias so the names are same as plotting method parameter names - _ALIASES = {'x_compat': 'xaxis.compat'} - _DEFAULT_KEYS = ['xaxis.compat'] - - def __init__(self): - self['xaxis.compat'] = False - - def __getitem__(self, key): - key = self._get_canonical_key(key) - if key not in self: - raise ValueError('%s is not a valid pandas plotting option' % key) - return super(_Options, self).__getitem__(key) - - def __setitem__(self, key, value): - key = self._get_canonical_key(key) - return super(_Options, self).__setitem__(key, value) - - def __delitem__(self, key): - key = self._get_canonical_key(key) - if key in self._DEFAULT_KEYS: - raise ValueError('Cannot remove default parameter %s' % key) - return super(_Options, self).__delitem__(key) - - def __contains__(self, key): - key = self._get_canonical_key(key) - return super(_Options, self).__contains__(key) - - def reset(self): - """ - Reset the option store to its initial state - - Returns - ------- - None - """ - self.__init__() - - def _get_canonical_key(self, key): - return self._ALIASES.get(key, key) - - @contextmanager - def use(self, key, value): - """ - Temporarily set a parameter value using the with statement. - Aliasing allowed. - """ - old_value = self[key] - try: - self[key] = value - yield self - finally: - self[key] = old_value - - -plot_params = _Options() - - -def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, - diagonal='hist', marker='.', density_kwds=None, - hist_kwds=None, range_padding=0.05, **kwds): - """ - Draw a matrix of scatter plots. - - Parameters - ---------- - frame : DataFrame - alpha : float, optional - amount of transparency applied - figsize : (float,float), optional - a tuple (width, height) in inches - ax : Matplotlib axis object, optional - grid : bool, optional - setting this to True will show the grid - diagonal : {'hist', 'kde'} - pick between 'kde' and 'hist' for - either Kernel Density Estimation or Histogram - plot in the diagonal - marker : str, optional - Matplotlib marker type, default '.' - hist_kwds : other plotting keyword arguments - To be passed to hist function - density_kwds : other plotting keyword arguments - To be passed to kernel density estimate plot - range_padding : float, optional - relative extension of axis range in x and y - with respect to (x_max - x_min) or (y_max - y_min), - default 0.05 - kwds : other plotting keyword arguments - To be passed to scatter function - - Examples - -------- - >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) - >>> scatter_matrix(df, alpha=0.2) - """ - import matplotlib.pyplot as plt - - df = frame._get_numeric_data() - n = df.columns.size - naxes = n * n - fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, - squeeze=False) - - # no gaps between subplots - fig.subplots_adjust(wspace=0, hspace=0) - - mask = notnull(df) - - marker = _get_marker_compat(marker) - - hist_kwds = hist_kwds or {} - density_kwds = density_kwds or {} - - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwds.setdefault('c', plt.rcParams['patch.facecolor']) - - boundaries_list = [] - for a in df.columns: - values = df[a].values[mask[a].values] - rmin_, rmax_ = np.min(values), np.max(values) - rdelta_ext = (rmax_ - rmin_) * range_padding / 2. - boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) - - for i, a in zip(lrange(n), df.columns): - for j, b in zip(lrange(n), df.columns): - ax = axes[i, j] - - if i == j: - values = df[a].values[mask[a].values] - - # Deal with the diagonal by drawing a histogram there. - if diagonal == 'hist': - ax.hist(values, **hist_kwds) - - elif diagonal in ('kde', 'density'): - from scipy.stats import gaussian_kde - y = values - gkde = gaussian_kde(y) - ind = np.linspace(y.min(), y.max(), 1000) - ax.plot(ind, gkde.evaluate(ind), **density_kwds) - - ax.set_xlim(boundaries_list[i]) - - else: - common = (mask[a] & mask[b]).values - - ax.scatter(df[b][common], df[a][common], - marker=marker, alpha=alpha, **kwds) - - ax.set_xlim(boundaries_list[j]) - ax.set_ylim(boundaries_list[i]) - - ax.set_xlabel(b) - ax.set_ylabel(a) - - if j != 0: - ax.yaxis.set_visible(False) - if i != n - 1: - ax.xaxis.set_visible(False) - - if len(df.columns) > 1: - lim1 = boundaries_list[0] - locs = axes[0][1].yaxis.get_majorticklocs() - locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] - adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) - - lim0 = axes[0][0].get_ylim() - adj = adj * (lim0[1] - lim0[0]) + lim0[0] - axes[0][0].yaxis.set_ticks(adj) - - if np.all(locs == locs.astype(int)): - # if all ticks are int - locs = locs.astype(int) - axes[0][0].yaxis.set_ticklabels(locs) - - _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) - - return axes - - -def _gca(): - import matplotlib.pyplot as plt - return plt.gca() - - -def _gcf(): - import matplotlib.pyplot as plt - return plt.gcf() - - -def _get_marker_compat(marker): - import matplotlib.lines as mlines - import matplotlib as mpl - if mpl.__version__ < '1.1.0' and marker == '.': - return 'o' - if marker not in mlines.lineMarkers: - return 'o' - return marker - - -def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): - """RadViz - a multivariate data visualization algorithm - - Parameters: - ----------- - frame: DataFrame - class_column: str - Column name containing class names - ax: Matplotlib axis object, optional - color: list or tuple, optional - Colors to use for the different classes - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - kwds: keywords - Options to pass to matplotlib scatter plotting method - - Returns: - -------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - import matplotlib.patches as patches - - def normalize(series): - a = min(series) - b = max(series) - return (series - a) / (b - a) - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - df = frame.drop(class_column, axis=1).apply(normalize) - - if ax is None: - ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) - - to_plot = {} - colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, - color_type='random', color=color) - - for kls in classes: - to_plot[kls] = [[], []] - - m = len(frame.columns) - 1 - s = np.array([(np.cos(t), np.sin(t)) - for t in [2.0 * np.pi * (i / float(m)) - for i in range(m)]]) - - for i in range(n): - row = df.iloc[i].values - row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) - y = (s * row_).sum(axis=0) / row.sum() - kls = class_col.iat[i] - to_plot[kls][0].append(y[0]) - to_plot[kls][1].append(y[1]) - - for i, kls in enumerate(classes): - ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], - label=pprint_thing(kls), **kwds) - ax.legend() - - ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) - - for xy, name in zip(s, df.columns): - - ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) - - if xy[0] < 0.0 and xy[1] < 0.0: - ax.text(xy[0] - 0.025, xy[1] - 0.025, name, - ha='right', va='top', size='small') - elif xy[0] < 0.0 and xy[1] >= 0.0: - ax.text(xy[0] - 0.025, xy[1] + 0.025, name, - ha='right', va='bottom', size='small') - elif xy[0] >= 0.0 and xy[1] < 0.0: - ax.text(xy[0] + 0.025, xy[1] - 0.025, name, - ha='left', va='top', size='small') - elif xy[0] >= 0.0 and xy[1] >= 0.0: - ax.text(xy[0] + 0.025, xy[1] + 0.025, name, - ha='left', va='bottom', size='small') - - ax.axis('equal') - return ax - - -@deprecate_kwarg(old_arg_name='data', new_arg_name='frame') -def andrews_curves(frame, class_column, ax=None, samples=200, color=None, - colormap=None, **kwds): - """ - Generates a matplotlib plot of Andrews curves, for visualising clusters of - multivariate data. - - Andrews curves have the functional form: - - f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + - x_4 sin(2t) + x_5 cos(2t) + ... - - Where x coefficients correspond to the values of each dimension and t is - linearly spaced between -pi and +pi. Each row of frame then corresponds to - a single curve. - - Parameters: - ----------- - frame : DataFrame - Data to be plotted, preferably normalized to (0.0, 1.0) - class_column : Name of the column containing class names - ax : matplotlib axes object, default None - samples : Number of points to plot in each curve - color: list or tuple, optional - Colors to use for the different classes - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - kwds: keywords - Options to pass to matplotlib plotting method - - Returns: - -------- - ax: Matplotlib axis object - - """ - from math import sqrt, pi - import matplotlib.pyplot as plt - - def function(amplitudes): - def f(t): - x1 = amplitudes[0] - result = x1 / sqrt(2.0) - - # Take the rest of the coefficients and resize them - # appropriately. Take a copy of amplitudes as otherwise numpy - # deletes the element from amplitudes itself. - coeffs = np.delete(np.copy(amplitudes), 0) - coeffs.resize(int((coeffs.size + 1) / 2), 2) - - # Generate the harmonics and arguments for the sin and cos - # functions. - harmonics = np.arange(0, coeffs.shape[0]) + 1 - trig_args = np.outer(harmonics, t) - - result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + - coeffs[:, 1, np.newaxis] * np.cos(trig_args), - axis=0) - return result - return f - - n = len(frame) - class_col = frame[class_column] - classes = frame[class_column].drop_duplicates() - df = frame.drop(class_column, axis=1) - t = np.linspace(-pi, pi, samples) - used_legends = set([]) - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - colors = dict(zip(classes, color_values)) - if ax is None: - ax = plt.gca(xlim=(-pi, pi)) - for i in range(n): - row = df.iloc[i].values - f = function(row) - y = f(t) - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(t, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(t, y, color=colors[kls], **kwds) - - ax.legend(loc='upper right') - ax.grid() - return ax - - -def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): - """Bootstrap plot. - - Parameters: - ----------- - series: Time series - fig: matplotlib figure object, optional - size: number of data points to consider during each sampling - samples: number of times the bootstrap procedure is performed - kwds: optional keyword arguments for plotting commands, must be accepted - by both hist and plot - - Returns: - -------- - fig: matplotlib figure - """ - import random - import matplotlib.pyplot as plt - - # random.sample(ndarray, int) fails on python 3.3, sigh - data = list(series.values) - samplings = [random.sample(data, size) for _ in range(samples)] - - means = np.array([np.mean(sampling) for sampling in samplings]) - medians = np.array([np.median(sampling) for sampling in samplings]) - midranges = np.array([(min(sampling) + max(sampling)) * 0.5 - for sampling in samplings]) - if fig is None: - fig = plt.figure() - x = lrange(samples) - axes = [] - ax1 = fig.add_subplot(2, 3, 1) - ax1.set_xlabel("Sample") - axes.append(ax1) - ax1.plot(x, means, **kwds) - ax2 = fig.add_subplot(2, 3, 2) - ax2.set_xlabel("Sample") - axes.append(ax2) - ax2.plot(x, medians, **kwds) - ax3 = fig.add_subplot(2, 3, 3) - ax3.set_xlabel("Sample") - axes.append(ax3) - ax3.plot(x, midranges, **kwds) - ax4 = fig.add_subplot(2, 3, 4) - ax4.set_xlabel("Mean") - axes.append(ax4) - ax4.hist(means, **kwds) - ax5 = fig.add_subplot(2, 3, 5) - ax5.set_xlabel("Median") - axes.append(ax5) - ax5.hist(medians, **kwds) - ax6 = fig.add_subplot(2, 3, 6) - ax6.set_xlabel("Midrange") - axes.append(ax6) - ax6.hist(midranges, **kwds) - for axis in axes: - plt.setp(axis.get_xticklabels(), fontsize=8) - plt.setp(axis.get_yticklabels(), fontsize=8) - return fig - - -@deprecate_kwarg(old_arg_name='colors', new_arg_name='color') -@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) -def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, - use_columns=False, xticks=None, colormap=None, - axvlines=True, axvlines_kwds=None, **kwds): - """Parallel coordinates plotting. - - Parameters - ---------- - frame: DataFrame - class_column: str - Column name containing class names - cols: list, optional - A list of column names to use - ax: matplotlib.axis, optional - matplotlib axis object - color: list or tuple, optional - Colors to use for the different classes - use_columns: bool, optional - If true, columns will be used as xticks - xticks: list or tuple, optional - A list of values to use for xticks - colormap: str or matplotlib colormap, default None - Colormap to use for line colors. - axvlines: bool, optional - If true, vertical lines will be added at each xtick - axvlines_kwds: keywords, optional - Options to be passed to axvline method for vertical lines - kwds: keywords - Options to pass to matplotlib plotting method - - Returns - ------- - ax: matplotlib axis object - - Examples - -------- - >>> from pandas import read_csv - >>> from pandas.tools.plotting import parallel_coordinates - >>> from matplotlib import pyplot as plt - >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' - '/pandas/tests/data/iris.csv') - >>> parallel_coordinates(df, 'Name', color=('#556270', - '#4ECDC4', '#C7F464')) - >>> plt.show() - """ - if axvlines_kwds is None: - axvlines_kwds = {'linewidth': 1, 'color': 'black'} - import matplotlib.pyplot as plt - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - - if cols is None: - df = frame.drop(class_column, axis=1) - else: - df = frame[cols] - - used_legends = set([]) - - ncols = len(df.columns) - - # determine values to use for xticks - if use_columns is True: - if not np.all(np.isreal(list(df.columns))): - raise ValueError('Columns must be numeric to be used as xticks') - x = df.columns - elif xticks is not None: - if not np.all(np.isreal(xticks)): - raise ValueError('xticks specified must be numeric') - elif len(xticks) != ncols: - raise ValueError('Length of xticks must match number of columns') - x = xticks - else: - x = lrange(ncols) - - if ax is None: - ax = plt.gca() - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - - colors = dict(zip(classes, color_values)) - - for i in range(n): - y = df.iloc[i].values - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(x, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(x, y, color=colors[kls], **kwds) - - if axvlines: - for i in x: - ax.axvline(i, **axvlines_kwds) - - ax.set_xticks(x) - ax.set_xticklabels(df.columns) - ax.set_xlim(x[0], x[-1]) - ax.legend(loc='upper right') - ax.grid() - return ax - - -def lag_plot(series, lag=1, ax=None, **kwds): - """Lag plot for time series. - - Parameters: - ----------- - series: Time series - lag: lag of the scatter plot, default 1 - ax: Matplotlib axis object, optional - kwds: Matplotlib scatter method keyword arguments, optional - - Returns: - -------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwds.setdefault('c', plt.rcParams['patch.facecolor']) - - data = series.values - y1 = data[:-lag] - y2 = data[lag:] - if ax is None: - ax = plt.gca() - ax.set_xlabel("y(t)") - ax.set_ylabel("y(t + %s)" % lag) - ax.scatter(y1, y2, **kwds) - return ax - - -def autocorrelation_plot(series, ax=None, **kwds): - """Autocorrelation plot for time series. - - Parameters: - ----------- - series: Time series - ax: Matplotlib axis object, optional - kwds : keywords - Options to pass to matplotlib plotting method - - Returns: - ----------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - n = len(series) - data = np.asarray(series) - if ax is None: - ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) - mean = np.mean(data) - c0 = np.sum((data - mean) ** 2) / float(n) - - def r(h): - return ((data[:n - h] - mean) * - (data[h:] - mean)).sum() / float(n) / c0 - x = np.arange(n) + 1 - y = lmap(r, x) - z95 = 1.959963984540054 - z99 = 2.5758293035489004 - ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') - ax.axhline(y=z95 / np.sqrt(n), color='grey') - ax.axhline(y=0.0, color='black') - ax.axhline(y=-z95 / np.sqrt(n), color='grey') - ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') - ax.set_xlabel("Lag") - ax.set_ylabel("Autocorrelation") - ax.plot(x, y, **kwds) - if 'label' in kwds: - ax.legend() - ax.grid() - return ax - - -class MPLPlot(object): - """ - Base class for assembling a pandas plot using matplotlib - - Parameters - ---------- - data : - - """ - - @property - def _kind(self): - """Specify kind str. Must be overridden in child class""" - raise NotImplementedError - - _layout_type = 'vertical' - _default_rot = 0 - orientation = None - _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog', - 'mark_right', 'stacked'] - _attr_defaults = {'logy': False, 'logx': False, 'loglog': False, - 'mark_right': True, 'stacked': False} - - def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, - sharey=False, use_index=True, - figsize=None, grid=None, legend=True, rot=None, - ax=None, fig=None, title=None, xlim=None, ylim=None, - xticks=None, yticks=None, - sort_columns=False, fontsize=None, - secondary_y=False, colormap=None, - table=False, layout=None, **kwds): - - self.data = data - self.by = by - - self.kind = kind - - self.sort_columns = sort_columns - - self.subplots = subplots - - if sharex is None: - if ax is None: - self.sharex = True - else: - # if we get an axis, the users should do the visibility - # setting... - self.sharex = False - else: - self.sharex = sharex - - self.sharey = sharey - self.figsize = figsize - self.layout = layout - - self.xticks = xticks - self.yticks = yticks - self.xlim = xlim - self.ylim = ylim - self.title = title - self.use_index = use_index - - self.fontsize = fontsize - - if rot is not None: - self.rot = rot - # need to know for format_date_labels since it's rotated to 30 by - # default - self._rot_set = True - else: - self._rot_set = False - self.rot = self._default_rot - - if grid is None: - grid = False if secondary_y else self.plt.rcParams['axes.grid'] - - self.grid = grid - self.legend = legend - self.legend_handles = [] - self.legend_labels = [] - - for attr in self._pop_attributes: - value = kwds.pop(attr, self._attr_defaults.get(attr, None)) - setattr(self, attr, value) - - self.ax = ax - self.fig = fig - self.axes = None - - # parse errorbar input if given - xerr = kwds.pop('xerr', None) - yerr = kwds.pop('yerr', None) - self.errors = {} - for kw, err in zip(['xerr', 'yerr'], [xerr, yerr]): - self.errors[kw] = self._parse_errorbars(kw, err) - - if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, Index)): - secondary_y = [secondary_y] - self.secondary_y = secondary_y - - # ugly TypeError if user passes matplotlib's `cmap` name. - # Probably better to accept either. - if 'cmap' in kwds and colormap: - raise TypeError("Only specify one of `cmap` and `colormap`.") - elif 'cmap' in kwds: - self.colormap = kwds.pop('cmap') - else: - self.colormap = colormap - - self.table = table - - self.kwds = kwds - - self._validate_color_args() - - def _validate_color_args(self): - if 'color' not in self.kwds and 'colors' in self.kwds: - warnings.warn(("'colors' is being deprecated. Please use 'color'" - "instead of 'colors'")) - colors = self.kwds.pop('colors') - self.kwds['color'] = colors - - if ('color' in self.kwds and self.nseries == 1): - # support series.plot(color='green') - self.kwds['color'] = [self.kwds['color']] - - if ('color' in self.kwds or 'colors' in self.kwds) and \ - self.colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - - if 'color' in self.kwds and self.style is not None: - if is_list_like(self.style): - styles = self.style - else: - styles = [self.style] - # need only a single match - for s in styles: - if re.match('^[a-z]+?', s) is not None: - raise ValueError( - "Cannot pass 'style' string with a color " - "symbol and 'color' keyword argument. Please" - " use one or the other or pass 'style' " - "without a color symbol") - - def _iter_data(self, data=None, keep_index=False, fillna=None): - if data is None: - data = self.data - if fillna is not None: - data = data.fillna(fillna) - - # TODO: unused? - # if self.sort_columns: - # columns = _try_sort(data.columns) - # else: - # columns = data.columns - - for col, values in data.iteritems(): - if keep_index is True: - yield col, values - else: - yield col, values.values - - @property - def nseries(self): - if self.data.ndim == 1: - return 1 - else: - return self.data.shape[1] - - def draw(self): - self.plt.draw_if_interactive() - - def generate(self): - self._args_adjust() - self._compute_plot_data() - self._setup_subplots() - self._make_plot() - self._add_table() - self._make_legend() - self._adorn_subplots() - - for ax in self.axes: - self._post_plot_logic_common(ax, self.data) - self._post_plot_logic(ax, self.data) - - def _args_adjust(self): - pass - - def _has_plotted_object(self, ax): - """check whether ax has data""" - return (len(ax.lines) != 0 or - len(ax.artists) != 0 or - len(ax.containers) != 0) - - def _maybe_right_yaxis(self, ax, axes_num): - if not self.on_right(axes_num): - # secondary axes may be passed via ax kw - return self._get_ax_layer(ax) - - if hasattr(ax, 'right_ax'): - # if it has right_ax proparty, ``ax`` must be left axes - return ax.right_ax - elif hasattr(ax, 'left_ax'): - # if it has left_ax proparty, ``ax`` must be right axes - return ax - else: - # otherwise, create twin axes - orig_ax, new_ax = ax, ax.twinx() - # TODO: use Matplotlib public API when available - new_ax._get_lines = orig_ax._get_lines - new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill - orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax - - if not self._has_plotted_object(orig_ax): # no data on left y - orig_ax.get_yaxis().set_visible(False) - return new_ax - - def _setup_subplots(self): - if self.subplots: - fig, axes = _subplots(naxes=self.nseries, - sharex=self.sharex, sharey=self.sharey, - figsize=self.figsize, ax=self.ax, - layout=self.layout, - layout_type=self._layout_type) - else: - if self.ax is None: - fig = self.plt.figure(figsize=self.figsize) - axes = fig.add_subplot(111) - else: - fig = self.ax.get_figure() - if self.figsize is not None: - fig.set_size_inches(self.figsize) - axes = self.ax - - axes = _flatten(axes) - - if self.logx or self.loglog: - [a.set_xscale('log') for a in axes] - if self.logy or self.loglog: - [a.set_yscale('log') for a in axes] - - self.fig = fig - self.axes = axes - - @property - def result(self): - """ - Return result axes - """ - if self.subplots: - if self.layout is not None and not is_list_like(self.ax): - return self.axes.reshape(*self.layout) - else: - return self.axes - else: - sec_true = isinstance(self.secondary_y, bool) and self.secondary_y - all_sec = (is_list_like(self.secondary_y) and - len(self.secondary_y) == self.nseries) - if (sec_true or all_sec): - # if all data is plotted on secondary, return right axes - return self._get_ax_layer(self.axes[0], primary=False) - else: - return self.axes[0] - - def _compute_plot_data(self): - data = self.data - - if isinstance(data, Series): - label = self.label - if label is None and data.name is None: - label = 'None' - data = data.to_frame(name=label) - - numeric_data = data._convert(datetime=True)._get_numeric_data() - - try: - is_empty = numeric_data.empty - except AttributeError: - is_empty = not len(numeric_data) - - # no empty frames or series allowed - if is_empty: - raise TypeError('Empty {0!r}: no numeric data to ' - 'plot'.format(numeric_data.__class__.__name__)) - - self.data = numeric_data - - def _make_plot(self): - raise AbstractMethodError(self) - - def _add_table(self): - if self.table is False: - return - elif self.table is True: - data = self.data.transpose() - else: - data = self.table - ax = self._get_ax(0) - table(ax, data) - - def _post_plot_logic_common(self, ax, data): - """Common post process for each axes""" - labels = [pprint_thing(key) for key in data.index] - labels = dict(zip(range(len(data.index)), labels)) - - if self.orientation == 'vertical' or self.orientation is None: - if self._need_to_set_index: - xticklabels = [labels.get(x, '') for x in ax.get_xticks()] - ax.set_xticklabels(xticklabels) - self._apply_axis_properties(ax.xaxis, rot=self.rot, - fontsize=self.fontsize) - self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) - elif self.orientation == 'horizontal': - if self._need_to_set_index: - yticklabels = [labels.get(y, '') for y in ax.get_yticks()] - ax.set_yticklabels(yticklabels) - self._apply_axis_properties(ax.yaxis, rot=self.rot, - fontsize=self.fontsize) - self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) - else: # pragma no cover - raise ValueError - - def _post_plot_logic(self, ax, data): - """Post process for each axes. Overridden in child classes""" - pass - - def _adorn_subplots(self): - """Common post process unrelated to data""" - if len(self.axes) > 0: - all_axes = self._get_subplots() - nrows, ncols = self._get_axes_layout() - _handle_shared_axes(axarr=all_axes, nplots=len(all_axes), - naxes=nrows * ncols, nrows=nrows, - ncols=ncols, sharex=self.sharex, - sharey=self.sharey) - - for ax in self.axes: - if self.yticks is not None: - ax.set_yticks(self.yticks) - - if self.xticks is not None: - ax.set_xticks(self.xticks) - - if self.ylim is not None: - ax.set_ylim(self.ylim) - - if self.xlim is not None: - ax.set_xlim(self.xlim) - - ax.grid(self.grid) - - if self.title: - if self.subplots: - if is_list_like(self.title): - if len(self.title) != self.nseries: - msg = ('The length of `title` must equal the number ' - 'of columns if using `title` of type `list` ' - 'and `subplots=True`.\n' - 'length of title = {}\n' - 'number of columns = {}').format( - len(self.title), self.nseries) - raise ValueError(msg) - - for (ax, title) in zip(self.axes, self.title): - ax.set_title(title) - else: - self.fig.suptitle(self.title) - else: - if is_list_like(self.title): - msg = ('Using `title` of type `list` is not supported ' - 'unless `subplots=True` is passed') - raise ValueError(msg) - self.axes[0].set_title(self.title) - - def _apply_axis_properties(self, axis, rot=None, fontsize=None): - labels = axis.get_majorticklabels() + axis.get_minorticklabels() - for label in labels: - if rot is not None: - label.set_rotation(rot) - if fontsize is not None: - label.set_fontsize(fontsize) - - @property - def legend_title(self): - if not isinstance(self.data.columns, MultiIndex): - name = self.data.columns.name - if name is not None: - name = pprint_thing(name) - return name - else: - stringified = map(pprint_thing, - self.data.columns.names) - return ','.join(stringified) - - def _add_legend_handle(self, handle, label, index=None): - if label is not None: - if self.mark_right and index is not None: - if self.on_right(index): - label = label + ' (right)' - self.legend_handles.append(handle) - self.legend_labels.append(label) - - def _make_legend(self): - ax, leg = self._get_ax_legend(self.axes[0]) - - handles = [] - labels = [] - title = '' - - if not self.subplots: - if leg is not None: - title = leg.get_title().get_text() - handles = leg.legendHandles - labels = [x.get_text() for x in leg.get_texts()] - - if self.legend: - if self.legend == 'reverse': - self.legend_handles = reversed(self.legend_handles) - self.legend_labels = reversed(self.legend_labels) - - handles += self.legend_handles - labels += self.legend_labels - if self.legend_title is not None: - title = self.legend_title - - if len(handles) > 0: - ax.legend(handles, labels, loc='best', title=title) - - elif self.subplots and self.legend: - for ax in self.axes: - if ax.get_visible(): - ax.legend(loc='best') - - def _get_ax_legend(self, ax): - leg = ax.get_legend() - other_ax = (getattr(ax, 'left_ax', None) or - getattr(ax, 'right_ax', None)) - other_leg = None - if other_ax is not None: - other_leg = other_ax.get_legend() - if leg is None and other_leg is not None: - leg = other_leg - ax = other_ax - return ax, leg - - @cache_readonly - def plt(self): - import matplotlib.pyplot as plt - return plt - - @staticmethod - def mpl_ge_1_3_1(): - return _mpl_ge_1_3_1() - - @staticmethod - def mpl_ge_1_5_0(): - return _mpl_ge_1_5_0() - - _need_to_set_index = False - - def _get_xticks(self, convert_period=False): - index = self.data.index - is_datetype = index.inferred_type in ('datetime', 'date', - 'datetime64', 'time') - - if self.use_index: - if convert_period and isinstance(index, PeriodIndex): - self.data = self.data.reindex(index=index.sort_values()) - x = self.data.index.to_timestamp()._mpl_repr() - elif index.is_numeric(): - """ - Matplotlib supports numeric values or datetime objects as - xaxis values. Taking LBYL approach here, by the time - matplotlib raises exception when using non numeric/datetime - values for xaxis, several actions are already taken by plt. - """ - x = index._mpl_repr() - elif is_datetype: - self.data = self.data.sort_index() - x = self.data.index._mpl_repr() - else: - self._need_to_set_index = True - x = lrange(len(index)) - else: - x = lrange(len(index)) - - return x - - @classmethod - def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): - mask = isnull(y) - if mask.any(): - y = np.ma.array(y) - y = np.ma.masked_where(mask, y) - - if isinstance(x, Index): - x = x._mpl_repr() - - if is_errorbar: - if 'xerr' in kwds: - kwds['xerr'] = np.array(kwds.get('xerr')) - if 'yerr' in kwds: - kwds['yerr'] = np.array(kwds.get('yerr')) - return ax.errorbar(x, y, **kwds) - else: - # prevent style kwarg from going to errorbar, where it is - # unsupported - if style is not None: - args = (x, y, style) - else: - args = (x, y) - return ax.plot(*args, **kwds) - - def _get_index_name(self): - if isinstance(self.data.index, MultiIndex): - name = self.data.index.names - if any(x is not None for x in name): - name = ','.join([pprint_thing(x) for x in name]) - else: - name = None - else: - name = self.data.index.name - if name is not None: - name = pprint_thing(name) - - return name - - @classmethod - def _get_ax_layer(cls, ax, primary=True): - """get left (primary) or right (secondary) axes""" - if primary: - return getattr(ax, 'left_ax', ax) - else: - return getattr(ax, 'right_ax', ax) - - def _get_ax(self, i): - # get the twinx ax if appropriate - if self.subplots: - ax = self.axes[i] - ax = self._maybe_right_yaxis(ax, i) - self.axes[i] = ax - else: - ax = self.axes[0] - ax = self._maybe_right_yaxis(ax, i) - - ax.get_yaxis().set_visible(True) - return ax - - def on_right(self, i): - if isinstance(self.secondary_y, bool): - return self.secondary_y - - if isinstance(self.secondary_y, (tuple, list, np.ndarray, Index)): - return self.data.columns[i] in self.secondary_y - - def _apply_style_colors(self, colors, kwds, col_num, label): - """ - Manage style and color based on column number and its label. - Returns tuple of appropriate style and kwds which "color" may be added. - """ - style = None - if self.style is not None: - if isinstance(self.style, list): - try: - style = self.style[col_num] - except IndexError: - pass - elif isinstance(self.style, dict): - style = self.style.get(label, style) - else: - style = self.style - - has_color = 'color' in kwds or self.colormap is not None - nocolor_style = style is None or re.match('[a-z]+', style) is None - if (has_color or self.subplots) and nocolor_style: - kwds['color'] = colors[col_num % len(colors)] - return style, kwds - - def _get_colors(self, num_colors=None, color_kwds='color'): - if num_colors is None: - num_colors = self.nseries - - return _get_standard_colors(num_colors=num_colors, - colormap=self.colormap, - color=self.kwds.get(color_kwds)) - - def _parse_errorbars(self, label, err): - """ - Look for error keyword arguments and return the actual errorbar data - or return the error DataFrame/dict - - Error bars can be specified in several ways: - Series: the user provides a pandas.Series object of the same - length as the data - ndarray: provides a np.ndarray of the same length as the data - DataFrame/dict: error values are paired with keys matching the - key in the plotted DataFrame - str: the name of the column within the plotted DataFrame - """ - - if err is None: - return None - - from pandas import DataFrame, Series - - def match_labels(data, e): - e = e.reindex_axis(data.index) - return e - - # key-matched DataFrame - if isinstance(err, DataFrame): - - err = match_labels(self.data, err) - # key-matched dict - elif isinstance(err, dict): - pass - - # Series of error values - elif isinstance(err, Series): - # broadcast error series across data - err = match_labels(self.data, err) - err = np.atleast_2d(err) - err = np.tile(err, (self.nseries, 1)) - - # errors are a column in the dataframe - elif isinstance(err, string_types): - evalues = self.data[err].values - self.data = self.data[self.data.columns.drop(err)] - err = np.atleast_2d(evalues) - err = np.tile(err, (self.nseries, 1)) - - elif is_list_like(err): - if is_iterator(err): - err = np.atleast_2d(list(err)) - else: - # raw error values - err = np.atleast_2d(err) - - err_shape = err.shape - - # asymmetrical error bars - if err.ndim == 3: - if (err_shape[0] != self.nseries) or \ - (err_shape[1] != 2) or \ - (err_shape[2] != len(self.data)): - msg = "Asymmetrical error bars should be provided " + \ - "with the shape (%u, 2, %u)" % \ - (self.nseries, len(self.data)) - raise ValueError(msg) - - # broadcast errors to each data series - if len(err) == 1: - err = np.tile(err, (self.nseries, 1)) - - elif is_number(err): - err = np.tile([err], (self.nseries, len(self.data))) - - else: - msg = "No valid %s detected" % label - raise ValueError(msg) - - return err - - def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): - from pandas import DataFrame - errors = {} - - for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]): - if flag: - err = self.errors[kw] - # user provided label-matched dataframe of errors - if isinstance(err, (DataFrame, dict)): - if label is not None and label in err.keys(): - err = err[label] - else: - err = None - elif index is not None and err is not None: - err = err[index] - - if err is not None: - errors[kw] = err - return errors - - def _get_subplots(self): - from matplotlib.axes import Subplot - return [ax for ax in self.axes[0].get_figure().get_axes() - if isinstance(ax, Subplot)] - - def _get_axes_layout(self): - axes = self._get_subplots() - x_set = set() - y_set = set() - for ax in axes: - # check axes coordinates to estimate layout - points = ax.get_position().get_points() - x_set.add(points[0][0]) - y_set.add(points[0][1]) - return (len(y_set), len(x_set)) - - -class PlanePlot(MPLPlot): - """ - Abstract class for plotting on plane, currently scatter and hexbin. - """ - - _layout_type = 'single' - - def __init__(self, data, x, y, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - if x is None or y is None: - raise ValueError(self._kind + ' requires and x and y column') - if is_integer(x) and not self.data.columns.holds_integer(): - x = self.data.columns[x] - if is_integer(y) and not self.data.columns.holds_integer(): - y = self.data.columns[y] - self.x = x - self.y = y - - @property - def nseries(self): - return 1 - - def _post_plot_logic(self, ax, data): - x, y = self.x, self.y - ax.set_ylabel(pprint_thing(y)) - ax.set_xlabel(pprint_thing(x)) - - -class ScatterPlot(PlanePlot): - _kind = 'scatter' - - def __init__(self, data, x, y, s=None, c=None, **kwargs): - if s is None: - # hide the matplotlib default for size, in case we want to change - # the handling of this argument later - s = 20 - super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs) - if is_integer(c) and not self.data.columns.holds_integer(): - c = self.data.columns[c] - self.c = c - - def _make_plot(self): - x, y, c, data = self.x, self.y, self.c, self.data - ax = self.axes[0] - - c_is_column = is_hashable(c) and c in self.data.columns - - # plot a colorbar only if a colormap is provided or necessary - cb = self.kwds.pop('colorbar', self.colormap or c_is_column) - - # pandas uses colormap, matplotlib uses cmap. - cmap = self.colormap or 'Greys' - cmap = self.plt.cm.get_cmap(cmap) - color = self.kwds.pop("color", None) - if c is not None and color is not None: - raise TypeError('Specify exactly one of `c` and `color`') - elif c is None and color is None: - c_values = self.plt.rcParams['patch.facecolor'] - elif color is not None: - c_values = color - elif c_is_column: - c_values = self.data[c].values - else: - c_values = c - - if self.legend and hasattr(self, 'label'): - label = self.label - else: - label = None - scatter = ax.scatter(data[x].values, data[y].values, c=c_values, - label=label, cmap=cmap, **self.kwds) - if cb: - img = ax.collections[0] - kws = dict(ax=ax) - if self.mpl_ge_1_3_1(): - kws['label'] = c if c_is_column else '' - self.fig.colorbar(img, **kws) - - if label is not None: - self._add_legend_handle(scatter, label) - else: - self.legend = False - - errors_x = self._get_errorbars(label=x, index=0, yerr=False) - errors_y = self._get_errorbars(label=y, index=0, xerr=False) - if len(errors_x) > 0 or len(errors_y) > 0: - err_kwds = dict(errors_x, **errors_y) - err_kwds['ecolor'] = scatter.get_facecolor()[0] - ax.errorbar(data[x].values, data[y].values, - linestyle='none', **err_kwds) - - -class HexBinPlot(PlanePlot): - _kind = 'hexbin' - - def __init__(self, data, x, y, C=None, **kwargs): - super(HexBinPlot, self).__init__(data, x, y, **kwargs) - if is_integer(C) and not self.data.columns.holds_integer(): - C = self.data.columns[C] - self.C = C - - def _make_plot(self): - x, y, data, C = self.x, self.y, self.data, self.C - ax = self.axes[0] - # pandas uses colormap, matplotlib uses cmap. - cmap = self.colormap or 'BuGn' - cmap = self.plt.cm.get_cmap(cmap) - cb = self.kwds.pop('colorbar', True) - - if C is None: - c_values = None - else: - c_values = data[C].values - - ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, - **self.kwds) - if cb: - img = ax.collections[0] - self.fig.colorbar(img, ax=ax) - - def _make_legend(self): - pass - - -class LinePlot(MPLPlot): - _kind = 'line' - _default_rot = 0 - orientation = 'vertical' - - def __init__(self, data, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - if self.stacked: - self.data = self.data.fillna(value=0) - self.x_compat = plot_params['x_compat'] - if 'x_compat' in self.kwds: - self.x_compat = bool(self.kwds.pop('x_compat')) - - def _is_ts_plot(self): - # this is slightly deceptive - return not self.x_compat and self.use_index and self._use_dynamic_x() - - def _use_dynamic_x(self): - from pandas.tseries.plotting import _use_dynamic_x - return _use_dynamic_x(self._get_ax(0), self.data) - - def _make_plot(self): - if self._is_ts_plot(): - from pandas.tseries.plotting import _maybe_convert_index - data = _maybe_convert_index(self._get_ax(0), self.data) - - x = data.index # dummy, not used - plotf = self._ts_plot - it = self._iter_data(data=data, keep_index=True) - else: - x = self._get_xticks(convert_period=True) - plotf = self._plot - it = self._iter_data() - - stacking_id = self._get_stacking_id() - is_errorbar = any(e is not None for e in self.errors.values()) - - colors = self._get_colors() - for i, (label, y) in enumerate(it): - ax = self._get_ax(i) - kwds = self.kwds.copy() - style, kwds = self._apply_style_colors(colors, kwds, i, label) - - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) - - label = pprint_thing(label) # .encode('utf-8') - kwds['label'] = label - - newlines = plotf(ax, x, y, style=style, column_num=i, - stacking_id=stacking_id, - is_errorbar=is_errorbar, - **kwds) - self._add_legend_handle(newlines[0], label, index=i) - - lines = _get_all_lines(ax) - left, right = _get_xlim(lines) - ax.set_xlim(left, right) - - @classmethod - def _plot(cls, ax, x, y, style=None, column_num=None, - stacking_id=None, **kwds): - # column_num is used to get the target column from protf in line and - # area plots - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(y)) - y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) - lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) - cls._update_stacker(ax, stacking_id, y) - return lines - - @classmethod - def _ts_plot(cls, ax, x, data, style=None, **kwds): - from pandas.tseries.plotting import (_maybe_resample, - _decorate_axes, - format_dateaxis) - # accept x to be consistent with normal plot func, - # x is not passed to tsplot as it uses data.index as x coordinate - # column_num must be in kwds for stacking purpose - freq, data = _maybe_resample(data, ax, kwds) - - # Set ax with freq info - _decorate_axes(ax, freq, kwds) - # digging deeper - if hasattr(ax, 'left_ax'): - _decorate_axes(ax.left_ax, freq, kwds) - if hasattr(ax, 'right_ax'): - _decorate_axes(ax.right_ax, freq, kwds) - ax._plot_data.append((data, cls._kind, kwds)) - - lines = cls._plot(ax, data.index, data.values, style=style, **kwds) - # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq) - return lines - - def _get_stacking_id(self): - if self.stacked: - return id(self.data) - else: - return None - - @classmethod - def _initialize_stacker(cls, ax, stacking_id, n): - if stacking_id is None: - return - if not hasattr(ax, '_stacker_pos_prior'): - ax._stacker_pos_prior = {} - if not hasattr(ax, '_stacker_neg_prior'): - ax._stacker_neg_prior = {} - ax._stacker_pos_prior[stacking_id] = np.zeros(n) - ax._stacker_neg_prior[stacking_id] = np.zeros(n) - - @classmethod - def _get_stacked_values(cls, ax, stacking_id, values, label): - if stacking_id is None: - return values - if not hasattr(ax, '_stacker_pos_prior'): - # stacker may not be initialized for subplots - cls._initialize_stacker(ax, stacking_id, len(values)) - - if (values >= 0).all(): - return ax._stacker_pos_prior[stacking_id] + values - elif (values <= 0).all(): - return ax._stacker_neg_prior[stacking_id] + values - - raise ValueError('When stacked is True, each column must be either ' - 'all positive or negative.' - '{0} contains both positive and negative values' - .format(label)) - - @classmethod - def _update_stacker(cls, ax, stacking_id, values): - if stacking_id is None: - return - if (values >= 0).all(): - ax._stacker_pos_prior[stacking_id] += values - elif (values <= 0).all(): - ax._stacker_neg_prior[stacking_id] += values - - def _post_plot_logic(self, ax, data): - condition = (not self._use_dynamic_x() and - data.index.is_all_dates and - not self.subplots or - (self.subplots and self.sharex)) - - index_name = self._get_index_name() - - if condition: - # irregular TS rotated 30 deg. by default - # probably a better place to check / set this. - if not self._rot_set: - self.rot = 30 - format_date_labels(ax, rot=self.rot) - - if index_name is not None and self.use_index: - ax.set_xlabel(index_name) - - -class AreaPlot(LinePlot): - _kind = 'area' - - def __init__(self, data, **kwargs): - kwargs.setdefault('stacked', True) - data = data.fillna(value=0) - LinePlot.__init__(self, data, **kwargs) - - if not self.stacked: - # use smaller alpha to distinguish overlap - self.kwds.setdefault('alpha', 0.5) - - if self.logy or self.loglog: - raise ValueError("Log-y scales are not supported in area plot") - - @classmethod - def _plot(cls, ax, x, y, style=None, column_num=None, - stacking_id=None, is_errorbar=False, **kwds): - - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(y)) - y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) - - # need to remove label, because subplots uses mpl legend as it is - line_kwds = kwds.copy() - if cls.mpl_ge_1_5_0(): - line_kwds.pop('label') - lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) - - # get data from the line to get coordinates for fill_between - xdata, y_values = lines[0].get_data(orig=False) - - # unable to use ``_get_stacked_values`` here to get starting point - if stacking_id is None: - start = np.zeros(len(y)) - elif (y >= 0).all(): - start = ax._stacker_pos_prior[stacking_id] - elif (y <= 0).all(): - start = ax._stacker_neg_prior[stacking_id] - else: - start = np.zeros(len(y)) - - if 'color' not in kwds: - kwds['color'] = lines[0].get_color() - - rect = ax.fill_between(xdata, start, y_values, **kwds) - cls._update_stacker(ax, stacking_id, y) - - # LinePlot expects list of artists - res = [rect] if cls.mpl_ge_1_5_0() else lines - return res - - def _add_legend_handle(self, handle, label, index=None): - if not self.mpl_ge_1_5_0(): - from matplotlib.patches import Rectangle - # Because fill_between isn't supported in legend, - # specifically add Rectangle handle here - alpha = self.kwds.get('alpha', None) - handle = Rectangle((0, 0), 1, 1, fc=handle.get_color(), - alpha=alpha) - LinePlot._add_legend_handle(self, handle, label, index=index) - - def _post_plot_logic(self, ax, data): - LinePlot._post_plot_logic(self, ax, data) - - if self.ylim is None: - if (data >= 0).all().all(): - ax.set_ylim(0, None) - elif (data <= 0).all().all(): - ax.set_ylim(None, 0) - - -class BarPlot(MPLPlot): - _kind = 'bar' - _default_rot = 90 - orientation = 'vertical' - - def __init__(self, data, **kwargs): - self.bar_width = kwargs.pop('width', 0.5) - pos = kwargs.pop('position', 0.5) - kwargs.setdefault('align', 'center') - self.tick_pos = np.arange(len(data)) - - self.bottom = kwargs.pop('bottom', 0) - self.left = kwargs.pop('left', 0) - - self.log = kwargs.pop('log', False) - MPLPlot.__init__(self, data, **kwargs) - - if self.stacked or self.subplots: - self.tickoffset = self.bar_width * pos - if kwargs['align'] == 'edge': - self.lim_offset = self.bar_width / 2 - else: - self.lim_offset = 0 - else: - if kwargs['align'] == 'edge': - w = self.bar_width / self.nseries - self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 - self.lim_offset = w * 0.5 - else: - self.tickoffset = self.bar_width * pos - self.lim_offset = 0 - - self.ax_pos = self.tick_pos - self.tickoffset - - def _args_adjust(self): - if is_list_like(self.bottom): - self.bottom = np.array(self.bottom) - if is_list_like(self.left): - self.left = np.array(self.left) - - @classmethod - def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): - return ax.bar(x, y, w, bottom=start, log=log, **kwds) - - @property - def _start_base(self): - return self.bottom - - def _make_plot(self): - import matplotlib as mpl - - colors = self._get_colors() - ncolors = len(colors) - - pos_prior = neg_prior = np.zeros(len(self.data)) - K = self.nseries - - for i, (label, y) in enumerate(self._iter_data(fillna=0)): - ax = self._get_ax(i) - kwds = self.kwds.copy() - kwds['color'] = colors[i % ncolors] - - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) - - label = pprint_thing(label) - - if (('yerr' in kwds) or ('xerr' in kwds)) \ - and (kwds.get('ecolor') is None): - kwds['ecolor'] = mpl.rcParams['xtick.color'] - - start = 0 - if self.log and (y >= 1).all(): - start = 1 - start = start + self._start_base - - if self.subplots: - w = self.bar_width / 2 - rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, - start=start, label=label, - log=self.log, **kwds) - ax.set_title(label) - elif self.stacked: - mask = y > 0 - start = np.where(mask, pos_prior, neg_prior) + self._start_base - w = self.bar_width / 2 - rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, - start=start, label=label, - log=self.log, **kwds) - pos_prior = pos_prior + np.where(mask, y, 0) - neg_prior = neg_prior + np.where(mask, 0, y) - else: - w = self.bar_width / K - rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w, - start=start, label=label, - log=self.log, **kwds) - self._add_legend_handle(rect, label, index=i) - - def _post_plot_logic(self, ax, data): - if self.use_index: - str_index = [pprint_thing(key) for key in data.index] - else: - str_index = [pprint_thing(key) for key in range(data.shape[0])] - name = self._get_index_name() - - s_edge = self.ax_pos[0] - 0.25 + self.lim_offset - e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset - - self._decorate_ticks(ax, name, str_index, s_edge, e_edge) - - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - ax.set_xlim((start_edge, end_edge)) - ax.set_xticks(self.tick_pos) - ax.set_xticklabels(ticklabels) - if name is not None and self.use_index: - ax.set_xlabel(name) - - -class BarhPlot(BarPlot): - _kind = 'barh' - _default_rot = 0 - orientation = 'horizontal' - - @property - def _start_base(self): - return self.left - - @classmethod - def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): - return ax.barh(x, y, w, left=start, log=log, **kwds) - - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - # horizontal bars - ax.set_ylim((start_edge, end_edge)) - ax.set_yticks(self.tick_pos) - ax.set_yticklabels(ticklabels) - if name is not None and self.use_index: - ax.set_ylabel(name) - - -class HistPlot(LinePlot): - _kind = 'hist' - - def __init__(self, data, bins=10, bottom=0, **kwargs): - self.bins = bins # use mpl default - self.bottom = bottom - # Do not call LinePlot.__init__ which may fill nan - MPLPlot.__init__(self, data, **kwargs) - - def _args_adjust(self): - if is_integer(self.bins): - # create common bin edge - values = (self.data._convert(datetime=True)._get_numeric_data()) - values = np.ravel(values) - values = values[~isnull(values)] - - hist, self.bins = np.histogram( - values, bins=self.bins, - range=self.kwds.get('range', None), - weights=self.kwds.get('weights', None)) - - if is_list_like(self.bottom): - self.bottom = np.array(self.bottom) - - @classmethod - def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, - stacking_id=None, **kwds): - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(bins) - 1) - y = y[~isnull(y)] - - base = np.zeros(len(bins) - 1) - bottom = bottom + \ - cls._get_stacked_values(ax, stacking_id, base, kwds['label']) - # ignore style - n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) - cls._update_stacker(ax, stacking_id, n) - return patches - - def _make_plot(self): - colors = self._get_colors() - stacking_id = self._get_stacking_id() - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - - kwds = self.kwds.copy() - - label = pprint_thing(label) - kwds['label'] = label - - style, kwds = self._apply_style_colors(colors, kwds, i, label) - if style is not None: - kwds['style'] = style - - kwds = self._make_plot_keywords(kwds, y) - artists = self._plot(ax, y, column_num=i, - stacking_id=stacking_id, **kwds) - self._add_legend_handle(artists[0], label, index=i) - - def _make_plot_keywords(self, kwds, y): - """merge BoxPlot/KdePlot properties to passed kwds""" - # y is required for KdePlot - kwds['bottom'] = self.bottom - kwds['bins'] = self.bins - return kwds - - def _post_plot_logic(self, ax, data): - if self.orientation == 'horizontal': - ax.set_xlabel('Frequency') - else: - ax.set_ylabel('Frequency') - - @property - def orientation(self): - if self.kwds.get('orientation', None) == 'horizontal': - return 'horizontal' - else: - return 'vertical' - - -class KdePlot(HistPlot): - _kind = 'kde' - orientation = 'vertical' - - def __init__(self, data, bw_method=None, ind=None, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - self.bw_method = bw_method - self.ind = ind - - def _args_adjust(self): - pass - - def _get_ind(self, y): - if self.ind is None: - # np.nanmax() and np.nanmin() ignores the missing values - sample_range = np.nanmax(y) - np.nanmin(y) - ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, - np.nanmax(y) + 0.5 * sample_range, 1000) - else: - ind = self.ind - return ind - - @classmethod - def _plot(cls, ax, y, style=None, bw_method=None, ind=None, - column_num=None, stacking_id=None, **kwds): - from scipy.stats import gaussian_kde - from scipy import __version__ as spv - - y = remove_na(y) - - if LooseVersion(spv) >= '0.11.0': - gkde = gaussian_kde(y, bw_method=bw_method) - else: - gkde = gaussian_kde(y) - if bw_method is not None: - msg = ('bw_method was added in Scipy 0.11.0.' + - ' Scipy version in use is %s.' % spv) - warnings.warn(msg) - - y = gkde.evaluate(ind) - lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) - return lines - - def _make_plot_keywords(self, kwds, y): - kwds['bw_method'] = self.bw_method - kwds['ind'] = self._get_ind(y) - return kwds - - def _post_plot_logic(self, ax, data): - ax.set_ylabel('Density') - - -class PiePlot(MPLPlot): - _kind = 'pie' - _layout_type = 'horizontal' - - def __init__(self, data, kind=None, **kwargs): - data = data.fillna(value=0) - if (data < 0).any().any(): - raise ValueError("{0} doesn't allow negative values".format(kind)) - MPLPlot.__init__(self, data, kind=kind, **kwargs) - - def _args_adjust(self): - self.grid = False - self.logy = False - self.logx = False - self.loglog = False - - def _validate_color_args(self): - pass - - def _make_plot(self): - colors = self._get_colors( - num_colors=len(self.data), color_kwds='colors') - self.kwds.setdefault('colors', colors) - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - if label is not None: - label = pprint_thing(label) - ax.set_ylabel(label) - - kwds = self.kwds.copy() - - def blank_labeler(label, value): - if value == 0: - return '' - else: - return label - - idx = [pprint_thing(v) for v in self.data.index] - labels = kwds.pop('labels', idx) - # labels is used for each wedge's labels - # Blank out labels for values of 0 so they don't overlap - # with nonzero wedges - if labels is not None: - blabels = [blank_labeler(l, value) for - l, value in zip(labels, y)] - else: - blabels = None - results = ax.pie(y, labels=blabels, **kwds) - - if kwds.get('autopct', None) is not None: - patches, texts, autotexts = results - else: - patches, texts = results - autotexts = [] - - if self.fontsize is not None: - for t in texts + autotexts: - t.set_fontsize(self.fontsize) - - # leglabels is used for legend labels - leglabels = labels if labels is not None else idx - for p, l in zip(patches, leglabels): - self._add_legend_handle(p, l) - - -class BoxPlot(LinePlot): - _kind = 'box' - _layout_type = 'horizontal' - - _valid_return_types = (None, 'axes', 'dict', 'both') - # namedtuple to hold results - BP = namedtuple("Boxplot", ['ax', 'lines']) - - def __init__(self, data, return_type='axes', **kwargs): - # Do not call LinePlot.__init__ which may fill nan - if return_type not in self._valid_return_types: - raise ValueError( - "return_type must be {None, 'axes', 'dict', 'both'}") - - self.return_type = return_type - MPLPlot.__init__(self, data, **kwargs) - - def _args_adjust(self): - if self.subplots: - # Disable label ax sharing. Otherwise, all subplots shows last - # column label - if self.orientation == 'vertical': - self.sharex = False - else: - self.sharey = False - - @classmethod - def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): - if y.ndim == 2: - y = [remove_na(v) for v in y] - # Boxplot fails with empty arrays, so need to add a NaN - # if any cols are empty - # GH 8181 - y = [v if v.size > 0 else np.array([np.nan]) for v in y] - else: - y = remove_na(y) - bp = ax.boxplot(y, **kwds) - - if return_type == 'dict': - return bp, bp - elif return_type == 'both': - return cls.BP(ax=ax, lines=bp), bp - else: - return ax, bp - - def _validate_color_args(self): - if 'color' in self.kwds: - if self.colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - self.color = self.kwds.pop('color') - - if isinstance(self.color, dict): - valid_keys = ['boxes', 'whiskers', 'medians', 'caps'] - for key, values in compat.iteritems(self.color): - if key not in valid_keys: - raise ValueError("color dict contains invalid " - "key '{0}' " - "The key must be either {1}" - .format(key, valid_keys)) - else: - self.color = None - - # get standard colors for default - colors = _get_standard_colors(num_colors=3, - colormap=self.colormap, - color=None) - # use 2 colors by default, for box/whisker and median - # flier colors isn't needed here - # because it can be specified by ``sym`` kw - self._boxes_c = colors[0] - self._whiskers_c = colors[0] - self._medians_c = colors[2] - self._caps_c = 'k' # mpl default - - def _get_colors(self, num_colors=None, color_kwds='color'): - pass - - def maybe_color_bp(self, bp): - if isinstance(self.color, dict): - boxes = self.color.get('boxes', self._boxes_c) - whiskers = self.color.get('whiskers', self._whiskers_c) - medians = self.color.get('medians', self._medians_c) - caps = self.color.get('caps', self._caps_c) - else: - # Other types are forwarded to matplotlib - # If None, use default colors - boxes = self.color or self._boxes_c - whiskers = self.color or self._whiskers_c - medians = self.color or self._medians_c - caps = self.color or self._caps_c - - from matplotlib.artist import setp - setp(bp['boxes'], color=boxes, alpha=1) - setp(bp['whiskers'], color=whiskers, alpha=1) - setp(bp['medians'], color=medians, alpha=1) - setp(bp['caps'], color=caps, alpha=1) - - def _make_plot(self): - if self.subplots: - self._return_obj = Series() - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - kwds = self.kwds.copy() - - ret, bp = self._plot(ax, y, column_num=i, - return_type=self.return_type, **kwds) - self.maybe_color_bp(bp) - self._return_obj[label] = ret - - label = [pprint_thing(label)] - self._set_ticklabels(ax, label) - else: - y = self.data.values.T - ax = self._get_ax(0) - kwds = self.kwds.copy() - - ret, bp = self._plot(ax, y, column_num=0, - return_type=self.return_type, **kwds) - self.maybe_color_bp(bp) - self._return_obj = ret - - labels = [l for l, _ in self._iter_data()] - labels = [pprint_thing(l) for l in labels] - if not self.use_index: - labels = [pprint_thing(key) for key in range(len(labels))] - self._set_ticklabels(ax, labels) - - def _set_ticklabels(self, ax, labels): - if self.orientation == 'vertical': - ax.set_xticklabels(labels) - else: - ax.set_yticklabels(labels) - - def _make_legend(self): - pass - - def _post_plot_logic(self, ax, data): - pass - - @property - def orientation(self): - if self.kwds.get('vert', True): - return 'vertical' - else: - return 'horizontal' - - @property - def result(self): - if self.return_type is None: - return super(BoxPlot, self).result - else: - return self._return_obj - - -# kinds supported by both dataframe and series -_common_kinds = ['line', 'bar', 'barh', - 'kde', 'density', 'area', 'hist', 'box'] -# kinds supported by dataframe -_dataframe_kinds = ['scatter', 'hexbin'] -# kinds supported only by series or dataframe single column -_series_kinds = ['pie'] -_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds - -_klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot, - ScatterPlot, HexBinPlot, AreaPlot, PiePlot] - -_plot_klass = {} -for klass in _klasses: - _plot_klass[klass._kind] = klass - - -def _plot(data, x=None, y=None, subplots=False, - ax=None, kind='line', **kwds): - kind = _get_standard_kind(kind.lower().strip()) - if kind in _all_kinds: - klass = _plot_klass[kind] - else: - raise ValueError("%r is not a valid plot kind" % kind) - - from pandas import DataFrame - if kind in _dataframe_kinds: - if isinstance(data, DataFrame): - plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, - kind=kind, **kwds) - else: - raise ValueError("plot kind %r can only be used for data frames" - % kind) - - elif kind in _series_kinds: - if isinstance(data, DataFrame): - if y is None and subplots is False: - msg = "{0} requires either y column or 'subplots=True'" - raise ValueError(msg.format(kind)) - elif y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - # converted to series actually. copy to not modify - data = data[y].copy() - data.index.name = y - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - else: - if isinstance(data, DataFrame): - if x is not None: - if is_integer(x) and not data.columns.holds_integer(): - x = data.columns[x] - data = data.set_index(x) - - if y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - label = kwds['label'] if 'label' in kwds else y - series = data[y].copy() # Don't modify - series.name = label - - for kw in ['xerr', 'yerr']: - if (kw in kwds) and \ - (isinstance(kwds[kw], string_types) or - is_integer(kwds[kw])): - try: - kwds[kw] = data[kwds[kw]] - except (IndexError, KeyError, TypeError): - pass - data = series - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - - plot_obj.generate() - plot_obj.draw() - return plot_obj.result - - -df_kind = """- 'scatter' : scatter plot - - 'hexbin' : hexbin plot""" -series_kind = "" - -df_coord = """x : label or position, default None - y : label or position, default None - Allows plotting of one column versus another""" -series_coord = "" - -df_unique = """stacked : boolean, default False in line and - bar plots, and True in area plot. If True, create stacked plot. - sort_columns : boolean, default False - Sort column names to determine plot ordering - secondary_y : boolean or sequence, default False - Whether to plot on the secondary y-axis - If a list/tuple, which columns to plot on secondary y-axis""" -series_unique = """label : label argument to provide to plot - secondary_y : boolean or sequence of ints, default False - If True then y-axis will be on the right""" - -df_ax = """ax : matplotlib axes object, default None - subplots : boolean, default False - Make separate subplots for each column - sharex : boolean, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in; Be aware, that passing in both an ax and sharex=True - will alter all x axis labels for all axis in a figure! - sharey : boolean, default False - In case subplots=True, share y axis and set some y axis labels to - invisible - layout : tuple (optional) - (rows, columns) for the layout of subplots""" -series_ax = """ax : matplotlib axes object - If not passed, uses gca()""" - -df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe - column, the values of that column are used to color each point. - - If `kind` = 'hexbin', you can control the size of the bins with the - `gridsize` argument. By default, a histogram of the counts around each - `(x, y)` point is computed. You can specify alternative aggregations - by passing values to the `C` and `reduce_C_function` arguments. - `C` specifies the value at each `(x, y)` point and `reduce_C_function` - is a function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`).""" -series_note = "" - -_shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df', - klass_kind=df_kind, klass_coord=df_coord, - klass_ax=df_ax, klass_unique=df_unique, - klass_note=df_note) -_shared_doc_series_kwargs = dict(klass='Series', klass_obj='s', - klass_kind=series_kind, - klass_coord=series_coord, klass_ax=series_ax, - klass_unique=series_unique, - klass_note=series_note) - -_shared_docs['plot'] = """ - Make plots of %(klass)s using matplotlib / pylab. - - *New in version 0.17.0:* Each plot kind has a corresponding method on the - ``%(klass)s.plot`` accessor: - ``%(klass_obj)s.plot(kind='line')`` is equivalent to - ``%(klass_obj)s.plot.line()``. - - Parameters - ---------- - data : %(klass)s - %(klass_coord)s - kind : str - - 'line' : line plot (default) - - 'bar' : vertical bar plot - - 'barh' : horizontal bar plot - - 'hist' : histogram - - 'box' : boxplot - - 'kde' : Kernel Density Estimation plot - - 'density' : same as 'kde' - - 'area' : area plot - - 'pie' : pie plot - %(klass_kind)s - %(klass_ax)s - figsize : a tuple (width, height) in inches - use_index : boolean, default True - Use index as ticks for x axis - title : string or list - Title to use for the plot. If a string is passed, print the string at - the top of the figure. If a list is passed and `subplots` is True, - print each item in the list above the corresponding subplot. - grid : boolean, default None (matlab style default) - Axis grid lines - legend : False/True/'reverse' - Place legend on axis subplots - style : list or dict - matplotlib line style per column - logx : boolean, default False - Use log scaling on x axis - logy : boolean, default False - Use log scaling on y axis - loglog : boolean, default False - Use log scaling on both x and y axes - xticks : sequence - Values to use for the xticks - yticks : sequence - Values to use for the yticks - xlim : 2-tuple/list - ylim : 2-tuple/list - rot : int, default None - Rotation for ticks (xticks for vertical, yticks for horizontal plots) - fontsize : int, default None - Font size for xticks and yticks - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - colorbar : boolean, optional - If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots) - position : float - Specify relative alignments for bar plot layout. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - layout : tuple (optional) - (rows, columns) for the layout of the plot - table : boolean, Series or DataFrame, default False - If True, draw a table using the data in the DataFrame and the data will - be transposed to meet matplotlib's default layout. - If a Series or DataFrame is passed, use passed data to draw a table. - yerr : DataFrame, Series, array-like, dict and str - See :ref:`Plotting with Error Bars ` for - detail. - xerr : same types as yerr. - %(klass_unique)s - mark_right : boolean, default True - When using a secondary_y axis, automatically mark the column - labels with "(right)" in the legend - kwds : keywords - Options to pass to matplotlib plotting method - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - - Notes - ----- - - - See matplotlib documentation online for more on this subject - - If `kind` = 'bar' or 'barh', you can specify relative alignments - for bar plot layout by `position` keyword. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - %(klass_note)s - - """ - - -@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) -def plot_frame(data, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, - **kwds): - return _plot(data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - secondary_y=secondary_y, sort_columns=sort_columns, - **kwds) - - -@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) -def plot_series(data, kind='line', ax=None, # Series unique - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, # Series unique - **kwds): - - import matplotlib.pyplot as plt - """ - If no axes is specified, check whether there are existing figures - If there is no existing figures, _gca() will - create a figure with the default figsize, causing the figsize=parameter to - be ignored. - """ - if ax is None and len(plt.get_fignums()) > 0: - ax = _gca() - ax = MPLPlot._get_ax_layer(ax) - return _plot(data, kind=kind, ax=ax, - figsize=figsize, use_index=use_index, title=title, - grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - label=label, secondary_y=secondary_y, - **kwds) - - -_shared_docs['boxplot'] = """ - Make a box plot from DataFrame column optionally grouped by some columns or - other inputs - - Parameters - ---------- - data : the pandas object holding the data - column : column name or list of names, or vector - Can be any valid input to groupby - by : string or sequence - Column in the DataFrame to group by - ax : Matplotlib axes object, optional - fontsize : int or string - rot : label rotation angle - figsize : A tuple (width, height) in inches - grid : Setting this to True will show the grid - layout : tuple (optional) - (rows, columns) for the layout of the plot - return_type : {None, 'axes', 'dict', 'both'}, default None - The kind of object to return. The default is ``axes`` - 'axes' returns the matplotlib axes the boxplot is drawn on; - 'dict' returns a dictionary whose values are the matplotlib - Lines of the boxplot; - 'both' returns a namedtuple with the axes and dict. - - When grouping with ``by``, a Series mapping columns to ``return_type`` - is returned, unless ``return_type`` is None, in which case a NumPy - array of axes is returned with the same shape as ``layout``. - See the prose documentation for more. - - kwds : other plotting keyword arguments to be passed to matplotlib boxplot - function - - Returns - ------- - lines : dict - ax : matplotlib Axes - (ax, lines): namedtuple - - Notes - ----- - Use ``return_type='dict'`` when you want to tweak the appearance - of the lines after plotting. In this case a dict containing the Lines - making up the boxes, caps, fliers, medians, and whiskers is returned. - """ - - -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, return_type=None, - **kwds): - - # validate return_type: - if return_type not in BoxPlot._valid_return_types: - raise ValueError("return_type must be {'axes', 'dict', 'both'}") - - from pandas import Series, DataFrame - if isinstance(data, Series): - data = DataFrame({'x': data}) - column = 'x' - - def _get_colors(): - return _get_standard_colors(color=kwds.get('color'), num_colors=1) - - def maybe_color_bp(bp): - if 'color' not in kwds: - from matplotlib.artist import setp - setp(bp['boxes'], color=colors[0], alpha=1) - setp(bp['whiskers'], color=colors[0], alpha=1) - setp(bp['medians'], color=colors[2], alpha=1) - - def plot_group(keys, values, ax): - keys = [pprint_thing(x) for x in keys] - values = [remove_na(v) for v in values] - bp = ax.boxplot(values, **kwds) - if fontsize is not None: - ax.tick_params(axis='both', labelsize=fontsize) - if kwds.get('vert', 1): - ax.set_xticklabels(keys, rotation=rot) - else: - ax.set_yticklabels(keys, rotation=rot) - maybe_color_bp(bp) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type == 'dict': - return bp - elif return_type == 'both': - return BoxPlot.BP(ax=ax, lines=bp) - else: - return ax - - colors = _get_colors() - if column is None: - columns = None - else: - if isinstance(column, (list, tuple)): - columns = column - else: - columns = [column] - - if by is not None: - # Prefer array return type for 2-D plots to match the subplot layout - # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 - result = _grouped_plot_by_column(plot_group, data, columns=columns, - by=by, grid=grid, figsize=figsize, - ax=ax, layout=layout, - return_type=return_type) - else: - if return_type is None: - return_type = 'axes' - if layout is not None: - raise ValueError("The 'layout' keyword is not supported when " - "'by' is None") - - if ax is None: - ax = _gca() - data = data._get_numeric_data() - if columns is None: - columns = data.columns - else: - data = data[columns] - - result = plot_group(columns, data.values.T, ax) - ax.grid(grid) - - return result - - -def format_date_labels(ax, rot): - # mini version of autofmt_xdate - try: - for label in ax.get_xticklabels(): - label.set_ha('right') - label.set_rotation(rot) - fig = ax.get_figure() - fig.subplots_adjust(bottom=0.2) - except Exception: # pragma: no cover - pass - - -def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, - **kwargs): - """ - Make a scatter plot from two DataFrame columns - - Parameters - ---------- - data : DataFrame - x : Column name for the x-axis values - y : Column name for the y-axis values - ax : Matplotlib axis object - figsize : A tuple (width, height) in inches - grid : Setting this to True will show the grid - kwargs : other plotting keyword arguments - To be passed to scatter function - - Returns - ------- - fig : matplotlib.Figure - """ - import matplotlib.pyplot as plt - - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwargs.setdefault('c', plt.rcParams['patch.facecolor']) - - def plot_group(group, ax): - xvals = group[x].values - yvals = group[y].values - ax.scatter(xvals, yvals, **kwargs) - ax.grid(grid) - - if by is not None: - fig = _grouped_plot(plot_group, data, by=by, figsize=figsize, ax=ax) - else: - if ax is None: - fig = plt.figure() - ax = fig.add_subplot(111) - else: - fig = ax.get_figure() - plot_group(data, ax) - ax.set_ylabel(pprint_thing(y)) - ax.set_xlabel(pprint_thing(x)) - - ax.grid(grid) - - return fig - - -def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, - sharey=False, figsize=None, layout=None, bins=10, **kwds): - """ - Draw histogram of the DataFrame's series using matplotlib / pylab. - - Parameters - ---------- - data : DataFrame - column : string or sequence - If passed, will be used to limit data to a subset of columns - by : object, optional - If passed, then used to form histograms for separate groups - grid : boolean, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - ax : matplotlib axes object, default None - sharex : boolean, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in; Be aware, that passing in both an ax and sharex=True - will alter all x axis labels for all subplots in a figure! - sharey : boolean, default False - In case subplots=True, share y axis and set some y axis labels to - invisible - figsize : tuple - The size of the figure to create in inches by default - layout : tuple, optional - Tuple of (rows, columns) for the layout of the histograms - bins : integer, default 10 - Number of histogram bins to be used - kwds : other plotting keyword arguments - To be passed to hist function - """ - - if by is not None: - axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, - figsize=figsize, sharex=sharex, sharey=sharey, - layout=layout, bins=bins, xlabelsize=xlabelsize, - xrot=xrot, ylabelsize=ylabelsize, - yrot=yrot, **kwds) - return axes - - if column is not None: - if not isinstance(column, (list, np.ndarray, Index)): - column = [column] - data = data[column] - data = data._get_numeric_data() - naxes = len(data.columns) - - fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, - sharex=sharex, sharey=sharey, figsize=figsize, - layout=layout) - _axes = _flatten(axes) - - for i, col in enumerate(_try_sort(data.columns)): - ax = _axes[i] - ax.hist(data[col].dropna().values, bins=bins, **kwds) - ax.set_title(col) - ax.grid(grid) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - fig.subplots_adjust(wspace=0.3, hspace=0.3) - - return axes - - -def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, figsize=None, - bins=10, **kwds): - """ - Draw histogram of the input series using matplotlib - - Parameters - ---------- - by : object, optional - If passed, then used to form histograms for separate groups - ax : matplotlib axis object - If not passed, uses gca() - grid : boolean, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - figsize : tuple, default None - figure size in inches by default - bins: integer, default 10 - Number of histogram bins to be used - kwds : keywords - To be passed to the actual plotting function - - Notes - ----- - See matplotlib documentation online for more on this - - """ - import matplotlib.pyplot as plt - - if by is None: - if kwds.get('layout', None) is not None: - raise ValueError("The 'layout' keyword is not supported when " - "'by' is None") - # hack until the plotting interface is a bit more unified - fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else - plt.figure(figsize=figsize)) - if (figsize is not None and tuple(figsize) != - tuple(fig.get_size_inches())): - fig.set_size_inches(*figsize, forward=True) - if ax is None: - ax = fig.gca() - elif ax.get_figure() != fig: - raise AssertionError('passed axis not bound to passed figure') - values = self.dropna().values - - ax.hist(values, bins=bins, **kwds) - ax.grid(grid) - axes = np.array([ax]) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - - else: - if 'figure' in kwds: - raise ValueError("Cannot pass 'figure' when using the " - "'by' argument, since a new 'Figure' instance " - "will be created") - axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize, - bins=bins, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, **kwds) - - if hasattr(axes, 'ndim'): - if axes.ndim == 1 and len(axes) == 1: - return axes[0] - return axes - - -def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None, - layout=None, sharex=False, sharey=False, rot=90, grid=True, - xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, - **kwargs): - """ - Grouped histogram - - Parameters - ---------- - data: Series/DataFrame - column: object, optional - by: object, optional - ax: axes, optional - bins: int, default 50 - figsize: tuple, optional - layout: optional - sharex: boolean, default False - sharey: boolean, default False - rot: int, default 90 - grid: bool, default True - kwargs: dict, keyword arguments passed to matplotlib.Axes.hist - - Returns - ------- - axes: collection of Matplotlib Axes - """ - def plot_group(group, ax): - ax.hist(group.dropna().values, bins=bins, **kwargs) - - xrot = xrot or rot - - fig, axes = _grouped_plot(plot_group, data, column=column, - by=by, sharex=sharex, sharey=sharey, ax=ax, - figsize=figsize, layout=layout, rot=rot) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, - hspace=0.5, wspace=0.3) - return axes - - -def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, - rot=0, grid=True, ax=None, figsize=None, - layout=None, **kwds): - """ - Make box plots from DataFrameGroupBy data. - - Parameters - ---------- - grouped : Grouped DataFrame - subplots : - * ``False`` - no subplots will be used - * ``True`` - create a subplot for each group - column : column name or list of names, or vector - Can be any valid input to groupby - fontsize : int or string - rot : label rotation angle - grid : Setting this to True will show the grid - ax : Matplotlib axis object, default None - figsize : A tuple (width, height) in inches - layout : tuple (optional) - (rows, columns) for the layout of the plot - kwds : other plotting keyword arguments to be passed to matplotlib boxplot - function - - Returns - ------- - dict of key/value = group key/DataFrame.boxplot return value - or DataFrame.boxplot return value in case subplots=figures=False - - Examples - -------- - >>> import pandas - >>> import numpy as np - >>> import itertools - >>> - >>> tuples = [t for t in itertools.product(range(1000), range(4))] - >>> index = pandas.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) - >>> data = np.random.randn(len(index),4) - >>> df = pandas.DataFrame(data, columns=list('ABCD'), index=index) - >>> - >>> grouped = df.groupby(level='lvl1') - >>> boxplot_frame_groupby(grouped) - >>> - >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) - >>> boxplot_frame_groupby(grouped, subplots=False) - """ - if subplots is True: - naxes = len(grouped) - fig, axes = _subplots(naxes=naxes, squeeze=False, - ax=ax, sharex=False, sharey=True, - figsize=figsize, layout=layout) - axes = _flatten(axes) - - ret = Series() - for (key, group), ax in zip(grouped, axes): - d = group.boxplot(ax=ax, column=column, fontsize=fontsize, - rot=rot, grid=grid, **kwds) - ax.set_title(pprint_thing(key)) - ret.loc[key] = d - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, - right=0.9, wspace=0.2) - else: - from pandas.tools.concat import concat - keys, frames = zip(*grouped) - if grouped.axis == 0: - df = concat(frames, keys=keys, axis=1) - else: - if len(frames) > 1: - df = frames[0].join(frames[1::]) - else: - df = frames[0] - ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, - grid=grid, ax=ax, figsize=figsize, - layout=layout, **kwds) - return ret - - -def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, - figsize=None, sharex=True, sharey=True, layout=None, - rot=0, ax=None, **kwargs): - from pandas import DataFrame - - if figsize == 'default': - # allowed to specify mpl default with 'default' - warnings.warn("figsize='default' is deprecated. Specify figure" - "size by tuple instead", FutureWarning, stacklevel=4) - figsize = None - - grouped = data.groupby(by) - if column is not None: - grouped = grouped[column] - - naxes = len(grouped) - fig, axes = _subplots(naxes=naxes, figsize=figsize, - sharex=sharex, sharey=sharey, ax=ax, - layout=layout) - - _axes = _flatten(axes) - - for i, (key, group) in enumerate(grouped): - ax = _axes[i] - if numeric_only and isinstance(group, DataFrame): - group = group._get_numeric_data() - plotf(group, ax, **kwargs) - ax.set_title(pprint_thing(key)) - - return fig, axes - - -def _grouped_plot_by_column(plotf, data, columns=None, by=None, - numeric_only=True, grid=False, - figsize=None, ax=None, layout=None, - return_type=None, **kwargs): - grouped = data.groupby(by) - if columns is None: - if not isinstance(by, (list, tuple)): - by = [by] - columns = data._get_numeric_data().columns.difference(by) - naxes = len(columns) - fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, - figsize=figsize, ax=ax, layout=layout) - - _axes = _flatten(axes) - - result = Series() - ax_values = [] - - for i, col in enumerate(columns): - ax = _axes[i] - gp_col = grouped[col] - keys, values = zip(*gp_col) - re_plotf = plotf(keys, values, ax, **kwargs) - ax.set_title(col) - ax.set_xlabel(pprint_thing(by)) - ax_values.append(re_plotf) - ax.grid(grid) - - result = Series(ax_values, index=columns) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type is None: - result = axes - - byline = by[0] if len(by) == 1 else by - fig.suptitle('Boxplot grouped by %s' % byline) - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) - - return result - - -def table(ax, data, rowLabels=None, colLabels=None, - **kwargs): - """ - Helper function to convert DataFrame and Series to matplotlib.table - - Parameters - ---------- - `ax`: Matplotlib axes object - `data`: DataFrame or Series - data for table contents - `kwargs`: keywords, optional - keyword arguments which passed to matplotlib.table.table. - If `rowLabels` or `colLabels` is not specified, data index or column - name will be used. - - Returns - ------- - matplotlib table object - """ - from pandas import DataFrame - if isinstance(data, Series): - data = DataFrame(data, columns=[data.name]) - elif isinstance(data, DataFrame): - pass - else: - raise ValueError('Input data must be DataFrame or Series') - - if rowLabels is None: - rowLabels = data.index - - if colLabels is None: - colLabels = data.columns - - cellText = data.values - - import matplotlib.table - table = matplotlib.table.table(ax, cellText=cellText, - rowLabels=rowLabels, - colLabels=colLabels, **kwargs) - return table - - -def _get_layout(nplots, layout=None, layout_type='box'): - if layout is not None: - if not isinstance(layout, (tuple, list)) or len(layout) != 2: - raise ValueError('Layout must be a tuple of (rows, columns)') - - nrows, ncols = layout - - # Python 2 compat - ceil_ = lambda x: int(ceil(x)) - if nrows == -1 and ncols > 0: - layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) - elif ncols == -1 and nrows > 0: - layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) - elif ncols <= 0 and nrows <= 0: - msg = "At least one dimension of layout must be positive" - raise ValueError(msg) - - if nrows * ncols < nplots: - raise ValueError('Layout of %sx%s must be larger than ' - 'required size %s' % (nrows, ncols, nplots)) - - return layout - - if layout_type == 'single': - return (1, 1) - elif layout_type == 'horizontal': - return (1, nplots) - elif layout_type == 'vertical': - return (nplots, 1) - - layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} - try: - return layouts[nplots] - except KeyError: - k = 1 - while k ** 2 < nplots: - k += 1 - - if (k - 1) * k >= nplots: - return k, (k - 1) - else: - return k, k - -# copied from matplotlib/pyplot.py and modified for pandas.plotting - - -def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, - subplot_kw=None, ax=None, layout=None, layout_type='box', - **fig_kw): - """Create a figure with a set of subplots already made. - - This utility wrapper makes it convenient to create common layouts of - subplots, including the enclosing figure object, in a single call. - - Keyword arguments: - - naxes : int - Number of required axes. Exceeded axes are set invisible. Default is - nrows * ncols. - - sharex : bool - If True, the X axis will be shared amongst all subplots. - - sharey : bool - If True, the Y axis will be shared amongst all subplots. - - squeeze : bool - - If True, extra dimensions are squeezed out from the returned axis object: - - if only one subplot is constructed (nrows=ncols=1), the resulting - single Axis object is returned as a scalar. - - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object - array of Axis objects are returned as numpy 1-d arrays. - - for NxM subplots with N>1 and M>1 are returned as a 2d array. - - If False, no squeezing at all is done: the returned axis object is always - a 2-d array containing Axis instances, even if it ends up being 1x1. - - subplot_kw : dict - Dict with keywords passed to the add_subplot() call used to create each - subplots. - - ax : Matplotlib axis object, optional - - layout : tuple - Number of rows and columns of the subplot grid. - If not specified, calculated from naxes and layout_type - - layout_type : {'box', 'horziontal', 'vertical'}, default 'box' - Specify how to layout the subplot grid. - - fig_kw : Other keyword arguments to be passed to the figure() call. - Note that all keywords not recognized above will be - automatically included here. - - Returns: - - fig, ax : tuple - - fig is the Matplotlib Figure object - - ax can be either a single axis object or an array of axis objects if - more than one subplot was created. The dimensions of the resulting array - can be controlled with the squeeze keyword, see above. - - **Examples:** - - x = np.linspace(0, 2*np.pi, 400) - y = np.sin(x**2) - - # Just a figure and one subplot - f, ax = plt.subplots() - ax.plot(x, y) - ax.set_title('Simple plot') - - # Two subplots, unpack the output array immediately - f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) - ax1.plot(x, y) - ax1.set_title('Sharing Y axis') - ax2.scatter(x, y) - - # Four polar axes - plt.subplots(2, 2, subplot_kw=dict(polar=True)) - """ - import matplotlib.pyplot as plt - - if subplot_kw is None: - subplot_kw = {} - - if ax is None: - fig = plt.figure(**fig_kw) - else: - if is_list_like(ax): - ax = _flatten(ax) - if layout is not None: - warnings.warn("When passing multiple axes, layout keyword is " - "ignored", UserWarning) - if sharex or sharey: - warnings.warn("When passing multiple axes, sharex and sharey " - "are ignored. These settings must be specified " - "when creating axes", UserWarning, - stacklevel=4) - if len(ax) == naxes: - fig = ax[0].get_figure() - return fig, ax - else: - raise ValueError("The number of passed axes must be {0}, the " - "same as the output plot".format(naxes)) - - fig = ax.get_figure() - # if ax is passed and a number of subplots is 1, return ax as it is - if naxes == 1: - if squeeze: - return fig, ax - else: - return fig, _flatten(ax) - else: - warnings.warn("To output multiple subplots, the figure containing " - "the passed axes is being cleared", UserWarning, - stacklevel=4) - fig.clear() - - nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) - nplots = nrows * ncols - - # Create empty object array to hold all axes. It's easiest to make it 1-d - # so we can just append subplots upon creation, and then - axarr = np.empty(nplots, dtype=object) - - # Create first subplot separately, so we can share it if requested - ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) - - if sharex: - subplot_kw['sharex'] = ax0 - if sharey: - subplot_kw['sharey'] = ax0 - axarr[0] = ax0 - - # Note off-by-one counting because add_subplot uses the MATLAB 1-based - # convention. - for i in range(1, nplots): - kwds = subplot_kw.copy() - # Set sharex and sharey to None for blank/dummy axes, these can - # interfere with proper axis limits on the visible axes if - # they share axes e.g. issue #7528 - if i >= naxes: - kwds['sharex'] = None - kwds['sharey'] = None - ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) - axarr[i] = ax - - if naxes != nplots: - for ax in axarr[naxes:]: - ax.set_visible(False) - - _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) - - if squeeze: - # Reshape the array to have the final desired dimension (nrow,ncol), - # though discarding unneeded dimensions that equal 1. If we only have - # one subplot, just return it instead of a 1-element array. - if nplots == 1: - axes = axarr[0] - else: - axes = axarr.reshape(nrows, ncols).squeeze() - else: - # returned axis array will be always 2-d, even if nrows=ncols=1 - axes = axarr.reshape(nrows, ncols) - - return fig, axes - - -def _remove_labels_from_axis(axis): - for t in axis.get_majorticklabels(): - t.set_visible(False) - - try: - # set_visible will not be effective if - # minor axis has NullLocator and NullFormattor (default) - import matplotlib.ticker as ticker - if isinstance(axis.get_minor_locator(), ticker.NullLocator): - axis.set_minor_locator(ticker.AutoLocator()) - if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): - axis.set_minor_formatter(ticker.FormatStrFormatter('')) - for t in axis.get_minorticklabels(): - t.set_visible(False) - except Exception: # pragma no cover - raise - axis.get_label().set_visible(False) - - -def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): - if nplots > 1: - - if nrows > 1: - try: - # first find out the ax layout, - # so that we can correctly handle 'gaps" - layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) - for ax in axarr: - layout[ax.rowNum, ax.colNum] = ax.get_visible() - - for ax in axarr: - # only the last row of subplots should get x labels -> all - # other off layout handles the case that the subplot is - # the last in the column, because below is no subplot/gap. - if not layout[ax.rowNum + 1, ax.colNum]: - continue - if sharex or len(ax.get_shared_x_axes() - .get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.xaxis) - - except IndexError: - # if gridspec is used, ax.rowNum and ax.colNum may different - # from layout shape. in this case, use last_row logic - for ax in axarr: - if ax.is_last_row(): - continue - if sharex or len(ax.get_shared_x_axes() - .get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.xaxis) - - if ncols > 1: - for ax in axarr: - # only the first column should get y labels -> set all other to - # off as we only have labels in teh first column and we always - # have a subplot there, we can skip the layout test - if ax.is_first_col(): - continue - if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.yaxis) - - -def _flatten(axes): - if not is_list_like(axes): - return np.array([axes]) - elif isinstance(axes, (np.ndarray, Index)): - return axes.ravel() - return np.array(axes) - - -def _get_all_lines(ax): - lines = ax.get_lines() - - if hasattr(ax, 'right_ax'): - lines += ax.right_ax.get_lines() - - if hasattr(ax, 'left_ax'): - lines += ax.left_ax.get_lines() - - return lines - - -def _get_xlim(lines): - left, right = np.inf, -np.inf - for l in lines: - x = l.get_xdata(orig=False) - left = min(x[0], left) - right = max(x[-1], right) - return left, right - - -def _set_ticks_props(axes, xlabelsize=None, xrot=None, - ylabelsize=None, yrot=None): - import matplotlib.pyplot as plt - - for ax in _flatten(axes): - if xlabelsize is not None: - plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) - if xrot is not None: - plt.setp(ax.get_xticklabels(), rotation=xrot) - if ylabelsize is not None: - plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) - if yrot is not None: - plt.setp(ax.get_yticklabels(), rotation=yrot) - return axes - - -class BasePlotMethods(PandasObject): - - def __init__(self, data): - self._data = data - - def __call__(self, *args, **kwargs): - raise NotImplementedError - - -class SeriesPlotMethods(BasePlotMethods): - """Series plotting accessor and method - - Examples - -------- - >>> s.plot.line() - >>> s.plot.bar() - >>> s.plot.hist() - - Plotting methods can also be accessed by calling the accessor as a method - with the ``kind`` argument: - ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` - """ - - def __call__(self, kind='line', ax=None, - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, - loglog=False, xticks=None, yticks=None, - xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, **kwds): - return plot_series(self._data, kind=kind, ax=ax, figsize=figsize, - use_index=use_index, title=title, grid=grid, - legend=legend, style=style, logx=logx, logy=logy, - loglog=loglog, xticks=xticks, yticks=yticks, - xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, - colormap=colormap, table=table, yerr=yerr, - xerr=xerr, label=label, secondary_y=secondary_y, - **kwds) - __call__.__doc__ = plot_series.__doc__ - - def line(self, **kwds): - """ - Line plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='line', **kwds) - - def bar(self, **kwds): - """ - Vertical bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='bar', **kwds) - - def barh(self, **kwds): - """ - Horizontal bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='barh', **kwds) - - def box(self, **kwds): - """ - Boxplot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='box', **kwds) - - def hist(self, bins=10, **kwds): - """ - Histogram - - .. versionadded:: 0.17.0 - - Parameters - ---------- - bins: integer, default 10 - Number of histogram bins to be used - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='hist', bins=bins, **kwds) - - def kde(self, **kwds): - """ - Kernel Density Estimate plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='kde', **kwds) - - density = kde - - def area(self, **kwds): - """ - Area plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='area', **kwds) - - def pie(self, **kwds): - """ - Pie chart - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='pie', **kwds) - - -class FramePlotMethods(BasePlotMethods): - """DataFrame plotting accessor and method - - Examples - -------- - >>> df.plot.line() - >>> df.plot.scatter('x', 'y') - >>> df.plot.hexbin() - - These plotting methods can also be accessed by calling the accessor as a - method with the ``kind`` argument: - ``df.plot(kind='line')`` is equivalent to ``df.plot.line()`` - """ - - def __call__(self, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, **kwds): - return plot_frame(self._data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, style=style, - logx=logx, logy=logy, loglog=loglog, xticks=xticks, - yticks=yticks, xlim=xlim, ylim=ylim, rot=rot, - fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, secondary_y=secondary_y, - sort_columns=sort_columns, **kwds) - __call__.__doc__ = plot_frame.__doc__ - - def line(self, x=None, y=None, **kwds): - """ - Line plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='line', x=x, y=y, **kwds) - - def bar(self, x=None, y=None, **kwds): - """ - Vertical bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='bar', x=x, y=y, **kwds) - - def barh(self, x=None, y=None, **kwds): - """ - Horizontal bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='barh', x=x, y=y, **kwds) - - def box(self, by=None, **kwds): - """ - Boxplot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - by : string or sequence - Column in the DataFrame to group by. - \*\*kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='box', by=by, **kwds) - - def hist(self, by=None, bins=10, **kwds): - """ - Histogram - - .. versionadded:: 0.17.0 - - Parameters - ---------- - by : string or sequence - Column in the DataFrame to group by. - bins: integer, default 10 - Number of histogram bins to be used - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='hist', by=by, bins=bins, **kwds) - - def kde(self, **kwds): - """ - Kernel Density Estimate plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='kde', **kwds) - - density = kde - - def area(self, x=None, y=None, **kwds): - """ - Area plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='area', x=x, y=y, **kwds) - - def pie(self, y=None, **kwds): - """ - Pie chart - - .. versionadded:: 0.17.0 - - Parameters - ---------- - y : label or position, optional - Column to plot. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='pie', y=y, **kwds) - - def scatter(self, x, y, s=None, c=None, **kwds): - """ - Scatter plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - s : scalar or array_like, optional - Size of each point. - c : label or position, optional - Color of each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds) +import pandas.plotting as _plotting - def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, - **kwds): - """ - Hexbin plot +# back-compat of public API +# deprecate these functions +m = sys.modules['pandas.tools.plotting'] +for t in [t for t in dir(_plotting) if not t.startswith('_')]: - .. versionadded:: 0.17.0 + def outer(t=t): - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - C : label or position, optional - The value at each `(x, y)` point. - reduce_C_function : callable, optional - Function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`). - gridsize : int, optional - Number of bins. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + def wrapper(*args, **kwargs): + warnings.warn("'pandas.tools.plotting.{t}' is deprecated, " + "import 'pandas.plotting.{t}' instead.".format(t=t), + FutureWarning, stacklevel=2) + return getattr(_plotting, t)(*args, **kwargs) + return wrapper - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - if reduce_C_function is not None: - kwds['reduce_C_function'] = reduce_C_function - if gridsize is not None: - kwds['gridsize'] = gridsize - return self(kind='hexbin', x=x, y=y, C=C, **kwds) + setattr(m, t, outer(t)) diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py deleted file mode 100644 index de44eadc15751..0000000000000 --- a/pandas/tools/tests/test_tile.py +++ /dev/null @@ -1,352 +0,0 @@ -import os - -import numpy as np -from pandas.compat import zip - -from pandas import Series, Index -import pandas.util.testing as tm -from pandas.util.testing import assertRaisesRegexp -import pandas.core.common as com - -from pandas.core.algorithms import quantile -from pandas.tools.tile import cut, qcut -import pandas.tools.tile as tmod -from pandas import to_datetime, DatetimeIndex, Timestamp - - -class TestCut(tm.TestCase): - - def test_simple(self): - data = np.ones(5) - result = cut(data, 4, labels=False) - desired = np.array([1, 1, 1, 1, 1]) - tm.assert_numpy_array_equal(result, desired, - check_dtype=False) - - def test_bins(self): - data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]) - result, bins = cut(data, 3, retbins=True) - - exp_codes = np.array([0, 0, 0, 1, 2, 0], dtype=np.int8) - tm.assert_numpy_array_equal(result.codes, exp_codes) - exp = np.array([0.1905, 3.36666667, 6.53333333, 9.7]) - tm.assert_almost_equal(bins, exp) - - def test_right(self): - data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) - result, bins = cut(data, 4, right=True, retbins=True) - exp_codes = np.array([0, 0, 0, 2, 3, 0, 0], dtype=np.int8) - tm.assert_numpy_array_equal(result.codes, exp_codes) - exp = np.array([0.1905, 2.575, 4.95, 7.325, 9.7]) - tm.assert_numpy_array_equal(bins, exp) - - def test_noright(self): - data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) - result, bins = cut(data, 4, right=False, retbins=True) - exp_codes = np.array([0, 0, 0, 2, 3, 0, 1], dtype=np.int8) - tm.assert_numpy_array_equal(result.codes, exp_codes) - exp = np.array([0.2, 2.575, 4.95, 7.325, 9.7095]) - tm.assert_almost_equal(bins, exp) - - def test_arraylike(self): - data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] - result, bins = cut(data, 3, retbins=True) - exp_codes = np.array([0, 0, 0, 1, 2, 0], dtype=np.int8) - tm.assert_numpy_array_equal(result.codes, exp_codes) - exp = np.array([0.1905, 3.36666667, 6.53333333, 9.7]) - tm.assert_almost_equal(bins, exp) - - def test_bins_not_monotonic(self): - data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] - self.assertRaises(ValueError, cut, data, [0.1, 1.5, 1, 10]) - - def test_wrong_num_labels(self): - data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1] - self.assertRaises(ValueError, cut, data, [0, 1, 10], - labels=['foo', 'bar', 'baz']) - - def test_cut_corner(self): - # h3h - self.assertRaises(ValueError, cut, [], 2) - - self.assertRaises(ValueError, cut, [1, 2, 3], 0.5) - - def test_cut_out_of_range_more(self): - # #1511 - s = Series([0, -1, 0, 1, -3], name='x') - ind = cut(s, [0, 1], labels=False) - exp = Series([np.nan, np.nan, np.nan, 0, np.nan], name='x') - tm.assert_series_equal(ind, exp) - - def test_labels(self): - arr = np.tile(np.arange(0, 1.01, 0.1), 4) - - result, bins = cut(arr, 4, retbins=True) - ex_levels = Index(['(-0.001, 0.25]', '(0.25, 0.5]', '(0.5, 0.75]', - '(0.75, 1]']) - self.assert_index_equal(result.categories, ex_levels) - - result, bins = cut(arr, 4, retbins=True, right=False) - ex_levels = Index(['[0, 0.25)', '[0.25, 0.5)', '[0.5, 0.75)', - '[0.75, 1.001)']) - self.assert_index_equal(result.categories, ex_levels) - - def test_cut_pass_series_name_to_factor(self): - s = Series(np.random.randn(100), name='foo') - - factor = cut(s, 4) - self.assertEqual(factor.name, 'foo') - - def test_label_precision(self): - arr = np.arange(0, 0.73, 0.01) - - result = cut(arr, 4, precision=2) - ex_levels = Index(['(-0.00072, 0.18]', '(0.18, 0.36]', - '(0.36, 0.54]', '(0.54, 0.72]']) - self.assert_index_equal(result.categories, ex_levels) - - def test_na_handling(self): - arr = np.arange(0, 0.75, 0.01) - arr[::3] = np.nan - - result = cut(arr, 4) - - result_arr = np.asarray(result) - - ex_arr = np.where(com.isnull(arr), np.nan, result_arr) - - tm.assert_almost_equal(result_arr, ex_arr) - - result = cut(arr, 4, labels=False) - ex_result = np.where(com.isnull(arr), np.nan, result) - tm.assert_almost_equal(result, ex_result) - - def test_inf_handling(self): - data = np.arange(6) - data_ser = Series(data, dtype='int64') - - result = cut(data, [-np.inf, 2, 4, np.inf]) - result_ser = cut(data_ser, [-np.inf, 2, 4, np.inf]) - - ex_categories = Index(['(-inf, 2]', '(2, 4]', '(4, inf]']) - - tm.assert_index_equal(result.categories, ex_categories) - tm.assert_index_equal(result_ser.cat.categories, ex_categories) - self.assertEqual(result[5], '(4, inf]') - self.assertEqual(result[0], '(-inf, 2]') - self.assertEqual(result_ser[5], '(4, inf]') - self.assertEqual(result_ser[0], '(-inf, 2]') - - def test_qcut(self): - arr = np.random.randn(1000) - - labels, bins = qcut(arr, 4, retbins=True) - ex_bins = quantile(arr, [0, .25, .5, .75, 1.]) - tm.assert_almost_equal(bins, ex_bins) - - ex_levels = cut(arr, ex_bins, include_lowest=True) - self.assert_categorical_equal(labels, ex_levels) - - def test_qcut_bounds(self): - arr = np.random.randn(1000) - - factor = qcut(arr, 10, labels=False) - self.assertEqual(len(np.unique(factor)), 10) - - def test_qcut_specify_quantiles(self): - arr = np.random.randn(100) - - factor = qcut(arr, [0, .25, .5, .75, 1.]) - expected = qcut(arr, 4) - tm.assert_categorical_equal(factor, expected) - - def test_qcut_all_bins_same(self): - assertRaisesRegexp(ValueError, "edges.*unique", qcut, - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) - - def test_cut_out_of_bounds(self): - arr = np.random.randn(100) - - result = cut(arr, [-1, 0, 1]) - - mask = result.codes == -1 - ex_mask = (arr < -1) | (arr > 1) - self.assert_numpy_array_equal(mask, ex_mask) - - def test_cut_pass_labels(self): - arr = [50, 5, 10, 15, 20, 30, 70] - bins = [0, 25, 50, 100] - labels = ['Small', 'Medium', 'Large'] - - result = cut(arr, bins, labels=labels) - - exp = cut(arr, bins) - exp.categories = labels - - tm.assert_categorical_equal(result, exp) - - def test_qcut_include_lowest(self): - values = np.arange(10) - - cats = qcut(values, 4) - - ex_levels = ['[0, 2.25]', '(2.25, 4.5]', '(4.5, 6.75]', '(6.75, 9]'] - self.assertTrue((cats.categories == ex_levels).all()) - - def test_qcut_nas(self): - arr = np.random.randn(100) - arr[:20] = np.nan - - result = qcut(arr, 4) - self.assertTrue(com.isnull(result[:20]).all()) - - def test_label_formatting(self): - self.assertEqual(tmod._trim_zeros('1.000'), '1') - - # it works - result = cut(np.arange(11.), 2) - - result = cut(np.arange(11.) / 1e10, 2) - - # #1979, negative numbers - - result = tmod._format_label(-117.9998, precision=3) - self.assertEqual(result, '-118') - result = tmod._format_label(117.9998, precision=3) - self.assertEqual(result, '118') - - def test_qcut_binning_issues(self): - # #1978, 1979 - path = os.path.join(tm.get_data_path(), 'cut_data.csv') - arr = np.loadtxt(path) - - result = qcut(arr, 20) - - starts = [] - ends = [] - for lev in result.categories: - s, e = lev[1:-1].split(',') - - self.assertTrue(s != e) - - starts.append(float(s)) - ends.append(float(e)) - - for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]), - zip(ends[:-1], ends[1:])): - self.assertTrue(sp < sn) - self.assertTrue(ep < en) - self.assertTrue(ep <= sn) - - def test_cut_return_categorical(self): - from pandas import Categorical - s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) - res = cut(s, 3) - exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], - ["(-0.008, 2.667]", - "(2.667, 5.333]", "(5.333, 8]"], - ordered=True)) - tm.assert_series_equal(res, exp) - - def test_qcut_return_categorical(self): - from pandas import Categorical - s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) - res = qcut(s, [0, 0.333, 0.666, 1]) - exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], - ["[0, 2.664]", - "(2.664, 5.328]", "(5.328, 8]"], - ordered=True)) - tm.assert_series_equal(res, exp) - - def test_series_retbins(self): - # GH 8589 - s = Series(np.arange(4)) - result, bins = cut(s, 2, retbins=True) - tm.assert_numpy_array_equal(result.cat.codes.values, - np.array([0, 0, 1, 1], dtype=np.int8)) - tm.assert_numpy_array_equal(bins, np.array([-0.003, 1.5, 3])) - - result, bins = qcut(s, 2, retbins=True) - tm.assert_numpy_array_equal(result.cat.codes.values, - np.array([0, 0, 1, 1], dtype=np.int8)) - tm.assert_numpy_array_equal(bins, np.array([0, 1.5, 3])) - - def test_qcut_duplicates_bin(self): - # GH 7751 - values = [0, 0, 0, 0, 1, 2, 3] - result_levels = ['[0, 1]', '(1, 3]'] - - cats = qcut(values, 3, duplicates='drop') - self.assertTrue((cats.categories == result_levels).all()) - - self.assertRaises(ValueError, qcut, values, 3) - self.assertRaises(ValueError, qcut, values, 3, duplicates='raise') - - # invalid - self.assertRaises(ValueError, qcut, values, 3, duplicates='foo') - - def test_single_bin(self): - # issue 14652 - expected = Series([0, 0]) - - s = Series([9., 9.]) - result = cut(s, 1, labels=False) - tm.assert_series_equal(result, expected) - - s = Series([-9., -9.]) - result = cut(s, 1, labels=False) - tm.assert_series_equal(result, expected) - - def test_datetime_cut(self): - # GH 14714 - # testing for time data to be present as series - data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03'])) - result, bins = cut(data, 3, retbins=True) - expected = Series(['(2012-12-31 23:57:07.200000, 2013-01-01 16:00:00]', - '(2013-01-01 16:00:00, 2013-01-02 08:00:00]', - '(2013-01-02 08:00:00, 2013-01-03 00:00:00]'], - ).astype("category", ordered=True) - tm.assert_series_equal(result, expected) - - # testing for time data to be present as list - data = [np.datetime64('2013-01-01'), np.datetime64('2013-01-02'), - np.datetime64('2013-01-03')] - result, bins = cut(data, 3, retbins=True) - tm.assert_series_equal(Series(result), expected) - - # testing for time data to be present as ndarray - data = np.array([np.datetime64('2013-01-01'), - np.datetime64('2013-01-02'), - np.datetime64('2013-01-03')]) - result, bins = cut(data, 3, retbins=True) - tm.assert_series_equal(Series(result), expected) - - # testing for time data to be present as datetime index - data = DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03']) - result, bins = cut(data, 3, retbins=True) - tm.assert_series_equal(Series(result), expected) - - def test_datetime_bin(self): - data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] - bin_data = ['2012-12-12', '2012-12-14', '2012-12-16'] - expected = Series(['(2012-12-12 00:00:00, 2012-12-14 00:00:00]', - '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], - ).astype("category", ordered=True) - - for conv in [Timestamp, Timestamp, np.datetime64]: - bins = [conv(v) for v in bin_data] - result = cut(data, bins=bins) - tm.assert_series_equal(Series(result), expected) - - bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data] - result = cut(data, bins=bin_pydatetime) - tm.assert_series_equal(Series(result), expected) - - bins = to_datetime(bin_data) - result = cut(data, bins=bin_pydatetime) - tm.assert_series_equal(Series(result), expected) - - -def curpath(): - pth, _ = os.path.split(os.path.abspath(__file__)) - return pth diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index 9a07983b4d951..2094791ecdc60 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -1,14 +1,8 @@ """ - +Timeseries API """ # flake8: noqa -from pandas.tseries.index import DatetimeIndex, date_range, bdate_range from pandas.tseries.frequencies import infer_freq -from pandas.tseries.tdi import Timedelta, TimedeltaIndex, timedelta_range -from pandas.tseries.period import Period, PeriodIndex, period_range, pnow -from pandas.tseries.resample import TimeGrouper -from pandas.tseries.timedeltas import to_timedelta -from pandas.lib import NaT import pandas.tseries.offsets as offsets diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py deleted file mode 100644 index 46e8bd43e8ff8..0000000000000 --- a/pandas/tseries/common.py +++ /dev/null @@ -1,241 +0,0 @@ -""" -datetimelike delegation -""" - -import numpy as np - -from pandas.types.common import (_NS_DTYPE, _TD_DTYPE, - is_period_arraylike, - is_datetime_arraylike, is_integer_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, - is_timedelta64_dtype, is_categorical_dtype, - is_list_like) - -from pandas.core.base import PandasDelegate, NoNewAttributesMixin -from pandas.tseries.index import DatetimeIndex -from pandas._period import IncompatibleFrequency # flake8: noqa -from pandas.tseries.period import PeriodIndex -from pandas.tseries.tdi import TimedeltaIndex -from pandas import tslib -from pandas.core.algorithms import take_1d - - -def is_datetimelike(data): - """ - return a boolean if we can be successfully converted to a datetimelike - """ - try: - maybe_to_datetimelike(data) - return True - except (Exception): - pass - return False - - -def maybe_to_datetimelike(data, copy=False): - """ - return a DelegatedClass of a Series that is datetimelike - (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) - raise TypeError if this is not possible. - - Parameters - ---------- - data : Series - copy : boolean, default False - copy the input data - - Returns - ------- - DelegatedClass - - """ - from pandas import Series - - if not isinstance(data, Series): - raise TypeError("cannot convert an object of type {0} to a " - "datetimelike index".format(type(data))) - - index = data.index - name = data.name - orig = data if is_categorical_dtype(data) else None - if orig is not None: - data = orig.values.categories - - if is_datetime64_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), - index, name=name, orig=orig) - elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', - ambiguous='infer'), - index, data.name, orig=orig) - elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(TimedeltaIndex(data, copy=copy, - freq='infer'), index, - name=name, orig=orig) - else: - if is_period_arraylike(data): - return PeriodProperties(PeriodIndex(data, copy=copy), index, - name=name, orig=orig) - if is_datetime_arraylike(data): - return DatetimeProperties(DatetimeIndex(data, copy=copy, - freq='infer'), index, - name=name, orig=orig) - - raise TypeError("cannot convert an object of type {0} to a " - "datetimelike index".format(type(data))) - - -class Properties(PandasDelegate, NoNewAttributesMixin): - - def __init__(self, values, index, name, orig=None): - self.values = values - self.index = index - self.name = name - self.orig = orig - self._freeze() - - def _delegate_property_get(self, name): - from pandas import Series - - result = getattr(self.values, name) - - # maybe need to upcast (ints) - if isinstance(result, np.ndarray): - if is_integer_dtype(result): - result = result.astype('int64') - elif not is_list_like(result): - return result - - # blow up if we operate on categories - if self.orig is not None: - result = take_1d(result, self.orig.cat.codes) - - # return the result as a Series, which is by definition a copy - result = Series(result, index=self.index, name=self.name) - - # setting this object will show a SettingWithCopyWarning/Error - result.is_copy = ("modifications to a property of a datetimelike " - "object are not supported and are discarded. " - "Change values on the original.") - - return result - - def _delegate_property_set(self, name, value, *args, **kwargs): - raise ValueError("modifications to a property of a datetimelike " - "object are not supported. Change values on the " - "original.") - - def _delegate_method(self, name, *args, **kwargs): - from pandas import Series - - method = getattr(self.values, name) - result = method(*args, **kwargs) - - if not is_list_like(result): - return result - - result = Series(result, index=self.index, name=self.name) - - # setting this object will show a SettingWithCopyWarning/Error - result.is_copy = ("modifications to a method of a datetimelike object " - "are not supported and are discarded. Change " - "values on the original.") - - return result - - -class DatetimeProperties(Properties): - """ - Accessor object for datetimelike properties of the Series values. - - Examples - -------- - >>> s.dt.hour - >>> s.dt.second - >>> s.dt.quarter - - Returns a Series indexed like the original Series. - Raises TypeError if the Series does not contain datetimelike values. - """ - - def to_pydatetime(self): - return self.values.to_pydatetime() - -DatetimeProperties._add_delegate_accessors( - delegate=DatetimeIndex, - accessors=DatetimeIndex._datetimelike_ops, - typ='property') -DatetimeProperties._add_delegate_accessors( - delegate=DatetimeIndex, - accessors=["to_period", "tz_localize", "tz_convert", - "normalize", "strftime", "round", "floor", "ceil"], - typ='method') - - -class TimedeltaProperties(Properties): - """ - Accessor object for datetimelike properties of the Series values. - - Examples - -------- - >>> s.dt.hours - >>> s.dt.seconds - - Returns a Series indexed like the original Series. - Raises TypeError if the Series does not contain datetimelike values. - """ - - def to_pytimedelta(self): - return self.values.to_pytimedelta() - - @property - def components(self): - """ - Return a dataframe of the components (days, hours, minutes, - seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas. - - Returns - ------- - a DataFrame - - """ - return self.values.components.set_index(self.index) - -TimedeltaProperties._add_delegate_accessors( - delegate=TimedeltaIndex, - accessors=TimedeltaIndex._datetimelike_ops, - typ='property') -TimedeltaProperties._add_delegate_accessors( - delegate=TimedeltaIndex, - accessors=["to_pytimedelta", "total_seconds", "round", "floor", "ceil"], - typ='method') - - -class PeriodProperties(Properties): - """ - Accessor object for datetimelike properties of the Series values. - - Examples - -------- - >>> s.dt.hour - >>> s.dt.second - >>> s.dt.quarter - - Returns a Series indexed like the original Series. - Raises TypeError if the Series does not contain datetimelike values. - """ - -PeriodProperties._add_delegate_accessors( - delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_ops, - typ='property') -PeriodProperties._add_delegate_accessors(delegate=PeriodIndex, - accessors=["strftime"], - typ='method') - - -class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): - # This class is never instantiated, and exists solely for the benefit of - # the Series.dt class property. For Series objects, .dt will always be one - # of the more specific classes above. - __doc__ = DatetimeProperties.__doc__ diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 95ff9578fa3ee..26d3f3cb85edc 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,1002 +1,20 @@ -from datetime import datetime, timedelta -import datetime as pydt -import numpy as np +# flake8: noqa +import warnings -from dateutil.relativedelta import relativedelta - -import matplotlib.units as units -import matplotlib.dates as dates - -from matplotlib.ticker import Formatter, AutoLocator, Locator -from matplotlib.transforms import nonsingular - - -from pandas.types.common import (is_float, is_integer, - is_integer_dtype, - is_float_dtype, - is_datetime64_ns_dtype, - is_period_arraylike, - ) - -from pandas.compat import lrange -import pandas.compat as compat -import pandas.lib as lib -import pandas.core.common as com -from pandas.core.index import Index - -from pandas.core.series import Series -from pandas.tseries.index import date_range -import pandas.tseries.tools as tools -import pandas.tseries.frequencies as frequencies -from pandas.tseries.frequencies import FreqGroup -from pandas.tseries.period import Period, PeriodIndex - -# constants -HOURS_PER_DAY = 24. -MIN_PER_HOUR = 60. -SEC_PER_MIN = 60. - -SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR -SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY - -MUSEC_PER_DAY = 1e6 * SEC_PER_DAY - - -def _mpl_le_2_0_0(): - try: - import matplotlib - return matplotlib.compare_versions('2.0.0', matplotlib.__version__) - except ImportError: - return False +from pandas.plotting._converter import (time2num, + TimeConverter, TimeFormatter, + PeriodConverter, get_datevalue, + DatetimeConverter, + PandasAutoDateFormatter, + PandasAutoDateLocator, + MilliSecondLocator, get_finder, + TimeSeries_DateLocator, + TimeSeries_DateFormatter) def register(): - units.registry[lib.Timestamp] = DatetimeConverter() - units.registry[Period] = PeriodConverter() - units.registry[pydt.datetime] = DatetimeConverter() - units.registry[pydt.date] = DatetimeConverter() - units.registry[pydt.time] = TimeConverter() - units.registry[np.datetime64] = DatetimeConverter() - - -def _to_ordinalf(tm): - tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + - float(tm.microsecond / 1e6)) - return tot_sec - - -def time2num(d): - if isinstance(d, compat.string_types): - parsed = tools.to_datetime(d) - if not isinstance(parsed, datetime): - raise ValueError('Could not parse time %s' % d) - return _to_ordinalf(parsed.time()) - if isinstance(d, pydt.time): - return _to_ordinalf(d) - return d - - -class TimeConverter(units.ConversionInterface): - - @staticmethod - def convert(value, unit, axis): - valid_types = (str, pydt.time) - if (isinstance(value, valid_types) or is_integer(value) or - is_float(value)): - return time2num(value) - if isinstance(value, Index): - return value.map(time2num) - if isinstance(value, (list, tuple, np.ndarray, Index)): - return [time2num(x) for x in value] - return value - - @staticmethod - def axisinfo(unit, axis): - if unit != 'time': - return None - - majloc = AutoLocator() - majfmt = TimeFormatter(majloc) - return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='time') - - @staticmethod - def default_units(x, axis): - return 'time' - - -# time formatter -class TimeFormatter(Formatter): - - def __init__(self, locs): - self.locs = locs - - def __call__(self, x, pos=0): - fmt = '%H:%M:%S' - s = int(x) - ms = int((x - s) * 1e3) - us = int((x - s) * 1e6 - ms) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - _, h = divmod(h, 24) - if us != 0: - fmt += '.%6f' - elif ms != 0: - fmt += '.%3f' - - return pydt.time(h, m, s, us).strftime(fmt) - - -# Period Conversion - - -class PeriodConverter(dates.DateConverter): - - @staticmethod - def convert(values, units, axis): - if not hasattr(axis, 'freq'): - raise TypeError('Axis must have `freq` set to convert to Periods') - valid_types = (compat.string_types, datetime, - Period, pydt.date, pydt.time) - if (isinstance(values, valid_types) or is_integer(values) or - is_float(values)): - return get_datevalue(values, axis.freq) - if isinstance(values, PeriodIndex): - return values.asfreq(axis.freq)._values - if isinstance(values, Index): - return values.map(lambda x: get_datevalue(x, axis.freq)) - if is_period_arraylike(values): - return PeriodIndex(values, freq=axis.freq)._values - if isinstance(values, (list, tuple, np.ndarray, Index)): - return [get_datevalue(x, axis.freq) for x in values] - return values - - -def get_datevalue(date, freq): - if isinstance(date, Period): - return date.asfreq(freq).ordinal - elif isinstance(date, (compat.string_types, datetime, - pydt.date, pydt.time)): - return Period(date, freq).ordinal - elif (is_integer(date) or is_float(date) or - (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): - return date - elif date is None: - return None - raise ValueError("Unrecognizable date '%s'" % date) - - -def _dt_to_float_ordinal(dt): - """ - Convert :mod:`datetime` to the Gregorian date as UTC float days, - preserving hours, minutes, seconds and microseconds. Return value - is a :func:`float`. - """ - if (isinstance(dt, (np.ndarray, Index, Series) - ) and is_datetime64_ns_dtype(dt)): - base = dates.epoch2num(dt.asi8 / 1.0E9) - else: - base = dates.date2num(dt) - return base - - -# Datetime Conversion -class DatetimeConverter(dates.DateConverter): - - @staticmethod - def convert(values, unit, axis): - def try_parse(values): - try: - return _dt_to_float_ordinal(tools.to_datetime(values)) - except Exception: - return values - - if isinstance(values, (datetime, pydt.date)): - return _dt_to_float_ordinal(values) - elif isinstance(values, np.datetime64): - return _dt_to_float_ordinal(lib.Timestamp(values)) - elif isinstance(values, pydt.time): - return dates.date2num(values) - elif (is_integer(values) or is_float(values)): - return values - elif isinstance(values, compat.string_types): - return try_parse(values) - elif isinstance(values, (list, tuple, np.ndarray, Index)): - if isinstance(values, Index): - values = values.values - if not isinstance(values, np.ndarray): - values = com._asarray_tuplesafe(values) - - if is_integer_dtype(values) or is_float_dtype(values): - return values - - try: - values = tools.to_datetime(values) - if isinstance(values, Index): - values = _dt_to_float_ordinal(values) - else: - values = [_dt_to_float_ordinal(x) for x in values] - except Exception: - values = _dt_to_float_ordinal(values) - - return values - - @staticmethod - def axisinfo(unit, axis): - """ - Return the :class:`~matplotlib.units.AxisInfo` for *unit*. - - *unit* is a tzinfo instance or None. - The *axis* argument is required but not used. - """ - tz = unit - - majloc = PandasAutoDateLocator(tz=tz) - majfmt = PandasAutoDateFormatter(majloc, tz=tz) - datemin = pydt.date(2000, 1, 1) - datemax = pydt.date(2010, 1, 1) - - return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='', - default_limits=(datemin, datemax)) - - -class PandasAutoDateFormatter(dates.AutoDateFormatter): - - def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): - dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) - # matplotlib.dates._UTC has no _utcoffset called by pandas - if self._tz is dates.UTC: - self._tz._utcoffset = self._tz.utcoffset(None) - - # For mpl > 2.0 the format strings are controlled via rcparams - # so do not mess with them. For mpl < 2.0 change the second - # break point and add a musec break point - if _mpl_le_2_0_0(): - self.scaled[1. / SEC_PER_DAY] = '%H:%M:%S' - self.scaled[1. / MUSEC_PER_DAY] = '%H:%M:%S.%f' - - -class PandasAutoDateLocator(dates.AutoDateLocator): - - def get_locator(self, dmin, dmax): - 'Pick the best locator based on a distance.' - delta = relativedelta(dmax, dmin) - - num_days = ((delta.years * 12.0) + delta.months * 31.0) + delta.days - num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds - tot_sec = num_days * 86400. + num_sec - - if abs(tot_sec) < self.minticks: - self._freq = -1 - locator = MilliSecondLocator(self.tz) - locator.set_axis(self.axis) - - locator.set_view_interval(*self.axis.get_view_interval()) - locator.set_data_interval(*self.axis.get_data_interval()) - return locator - - return dates.AutoDateLocator.get_locator(self, dmin, dmax) - - def _get_unit(self): - return MilliSecondLocator.get_unit_generic(self._freq) - - -class MilliSecondLocator(dates.DateLocator): - - UNIT = 1. / (24 * 3600 * 1000) - - def __init__(self, tz): - dates.DateLocator.__init__(self, tz) - self._interval = 1. - - def _get_unit(self): - return self.get_unit_generic(-1) - - @staticmethod - def get_unit_generic(freq): - unit = dates.RRuleLocator.get_unit_generic(freq) - if unit < 0: - return MilliSecondLocator.UNIT - return unit - - def __call__(self): - # if no data have been set, this will tank with a ValueError - try: - dmin, dmax = self.viewlim_to_dt() - except ValueError: - return [] - - if dmin > dmax: - dmax, dmin = dmin, dmax - # We need to cap at the endpoints of valid datetime - - # TODO(wesm) unused? - # delta = relativedelta(dmax, dmin) - # try: - # start = dmin - delta - # except ValueError: - # start = _from_ordinal(1.0) - - # try: - # stop = dmax + delta - # except ValueError: - # # The magic number! - # stop = _from_ordinal(3652059.9999999) - - nmax, nmin = dates.date2num((dmax, dmin)) - - num = (nmax - nmin) * 86400 * 1000 - max_millis_ticks = 6 - for interval in [1, 10, 50, 100, 200, 500]: - if num <= interval * (max_millis_ticks - 1): - self._interval = interval - break - else: - # We went through the whole loop without breaking, default to 1 - self._interval = 1000. - - estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) - - if estimate > self.MAXTICKS * 2: - raise RuntimeError(('MillisecondLocator estimated to generate %d ' - 'ticks from %s to %s: exceeds Locator.MAXTICKS' - '* 2 (%d) ') % - (estimate, dmin, dmax, self.MAXTICKS * 2)) - - freq = '%dL' % self._get_interval() - tz = self.tz.tzname(None) - st = _from_ordinal(dates.date2num(dmin)) # strip tz - ed = _from_ordinal(dates.date2num(dmax)) - all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).asobject - - try: - if len(all_dates) > 0: - locs = self.raise_if_exceeds(dates.date2num(all_dates)) - return locs - except Exception: # pragma: no cover - pass - - lims = dates.date2num([dmin, dmax]) - return lims - - def _get_interval(self): - return self._interval - - def autoscale(self): - """ - Set the view limits to include the data range. - """ - dmin, dmax = self.datalim_to_dt() - if dmin > dmax: - dmax, dmin = dmin, dmax - - # We need to cap at the endpoints of valid datetime - - # TODO(wesm): unused? - - # delta = relativedelta(dmax, dmin) - # try: - # start = dmin - delta - # except ValueError: - # start = _from_ordinal(1.0) - - # try: - # stop = dmax + delta - # except ValueError: - # # The magic number! - # stop = _from_ordinal(3652059.9999999) - - dmin, dmax = self.datalim_to_dt() - - vmin = dates.date2num(dmin) - vmax = dates.date2num(dmax) - - return self.nonsingular(vmin, vmax) - - -def _from_ordinal(x, tz=None): - ix = int(x) - dt = datetime.fromordinal(ix) - remainder = float(x) - ix - hour, remainder = divmod(24 * remainder, 1) - minute, remainder = divmod(60 * remainder, 1) - second, remainder = divmod(60 * remainder, 1) - microsecond = int(1e6 * remainder) - if microsecond < 10: - microsecond = 0 # compensate for rounding errors - dt = datetime(dt.year, dt.month, dt.day, int(hour), int(minute), - int(second), microsecond) - if tz is not None: - dt = dt.astimezone(tz) - - if microsecond > 999990: # compensate for rounding errors - dt += timedelta(microseconds=1e6 - microsecond) - - return dt - -# Fixed frequency dynamic tick locators and formatters - -# ------------------------------------------------------------------------- -# --- Locators --- -# ------------------------------------------------------------------------- - - -def _get_default_annual_spacing(nyears): - """ - Returns a default spacing between consecutive ticks for annual data. - """ - if nyears < 11: - (min_spacing, maj_spacing) = (1, 1) - elif nyears < 20: - (min_spacing, maj_spacing) = (1, 2) - elif nyears < 50: - (min_spacing, maj_spacing) = (1, 5) - elif nyears < 100: - (min_spacing, maj_spacing) = (5, 10) - elif nyears < 200: - (min_spacing, maj_spacing) = (5, 25) - elif nyears < 600: - (min_spacing, maj_spacing) = (10, 50) - else: - factor = nyears // 1000 + 1 - (min_spacing, maj_spacing) = (factor * 20, factor * 100) - return (min_spacing, maj_spacing) - - -def period_break(dates, period): - """ - Returns the indices where the given period changes. - - Parameters - ---------- - dates : PeriodIndex - Array of intervals to monitor. - period : string - Name of the period to monitor. - """ - current = getattr(dates, period) - previous = getattr(dates - 1, period) - return (current - previous).nonzero()[0] - - -def has_level_label(label_flags, vmin): - """ - Returns true if the ``label_flags`` indicate there is at least one label - for this level. - - if the minimum view limit is not an exact integer, then the first tick - label won't be shown, so we must adjust for that. - """ - if label_flags.size == 0 or (label_flags.size == 1 and - label_flags[0] == 0 and - vmin % 1 > 0.0): - return False - else: - return True - - -def _daily_finder(vmin, vmax, freq): - periodsperday = -1 - - if freq >= FreqGroup.FR_HR: - if freq == FreqGroup.FR_NS: - periodsperday = 24 * 60 * 60 * 1000000000 - elif freq == FreqGroup.FR_US: - periodsperday = 24 * 60 * 60 * 1000000 - elif freq == FreqGroup.FR_MS: - periodsperday = 24 * 60 * 60 * 1000 - elif freq == FreqGroup.FR_SEC: - periodsperday = 24 * 60 * 60 - elif freq == FreqGroup.FR_MIN: - periodsperday = 24 * 60 - elif freq == FreqGroup.FR_HR: - periodsperday = 24 - else: # pragma: no cover - raise ValueError("unexpected frequency: %s" % freq) - periodsperyear = 365 * periodsperday - periodspermonth = 28 * periodsperday - - elif freq == FreqGroup.FR_BUS: - periodsperyear = 261 - periodspermonth = 19 - elif freq == FreqGroup.FR_DAY: - periodsperyear = 365 - periodspermonth = 28 - elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK: - periodsperyear = 52 - periodspermonth = 3 - else: # pragma: no cover - raise ValueError("unexpected frequency") - - # save this for later usage - vmin_orig = vmin - - (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), - Period(ordinal=int(vmax), freq=freq)) - span = vmax.ordinal - vmin.ordinal + 1 - dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) - # Initialize the output - info = np.zeros(span, - dtype=[('val', np.int64), ('maj', bool), - ('min', bool), ('fmt', '|S20')]) - info['val'][:] = dates_._values - info['fmt'][:] = '' - info['maj'][[0, -1]] = True - # .. and set some shortcuts - info_maj = info['maj'] - info_min = info['min'] - info_fmt = info['fmt'] - - def first_label(label_flags): - if (label_flags[0] == 0) and (label_flags.size > 1) and \ - ((vmin_orig % 1) > 0.0): - return label_flags[1] - else: - return label_flags[0] - - # Case 1. Less than a month - if span <= periodspermonth: - day_start = period_break(dates_, 'day') - month_start = period_break(dates_, 'month') - - def _hour_finder(label_interval, force_year_start): - _hour = dates_.hour - _prev_hour = (dates_ - 1).hour - hour_start = (_hour - _prev_hour) != 0 - info_maj[day_start] = True - info_min[hour_start & (_hour % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' - info_fmt[day_start] = '%H:%M\n%d-%b' - info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' - if force_year_start and not has_level_label(year_start, vmin_orig): - info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' - - def _minute_finder(label_interval): - hour_start = period_break(dates_, 'hour') - _minute = dates_.minute - _prev_minute = (dates_ - 1).minute - minute_start = (_minute - _prev_minute) != 0 - info_maj[hour_start] = True - info_min[minute_start & (_minute % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' - info_fmt[day_start] = '%H:%M\n%d-%b' - info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' - - def _second_finder(label_interval): - minute_start = period_break(dates_, 'minute') - _second = dates_.second - _prev_second = (dates_ - 1).second - second_start = (_second - _prev_second) != 0 - info['maj'][minute_start] = True - info['min'][second_start & (_second % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[second_start & (_second % - label_interval == 0)] = '%H:%M:%S' - info_fmt[day_start] = '%H:%M:%S\n%d-%b' - info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' - - if span < periodsperday / 12000.0: - _second_finder(1) - elif span < periodsperday / 6000.0: - _second_finder(2) - elif span < periodsperday / 2400.0: - _second_finder(5) - elif span < periodsperday / 1200.0: - _second_finder(10) - elif span < periodsperday / 800.0: - _second_finder(15) - elif span < periodsperday / 400.0: - _second_finder(30) - elif span < periodsperday / 150.0: - _minute_finder(1) - elif span < periodsperday / 70.0: - _minute_finder(2) - elif span < periodsperday / 24.0: - _minute_finder(5) - elif span < periodsperday / 12.0: - _minute_finder(15) - elif span < periodsperday / 6.0: - _minute_finder(30) - elif span < periodsperday / 2.5: - _hour_finder(1, False) - elif span < periodsperday / 1.5: - _hour_finder(2, False) - elif span < periodsperday * 1.25: - _hour_finder(3, False) - elif span < periodsperday * 2.5: - _hour_finder(6, True) - elif span < periodsperday * 4: - _hour_finder(12, True) - else: - info_maj[month_start] = True - info_min[day_start] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[day_start] = '%d' - info_fmt[month_start] = '%d\n%b' - info_fmt[year_start] = '%d\n%b\n%Y' - if not has_level_label(year_start, vmin_orig): - if not has_level_label(month_start, vmin_orig): - info_fmt[first_label(day_start)] = '%d\n%b\n%Y' - else: - info_fmt[first_label(month_start)] = '%d\n%b\n%Y' - - # Case 2. Less than three months - elif span <= periodsperyear // 4: - month_start = period_break(dates_, 'month') - info_maj[month_start] = True - if freq < FreqGroup.FR_HR: - info['min'] = True - else: - day_start = period_break(dates_, 'day') - info['min'][day_start] = True - week_start = period_break(dates_, 'week') - year_start = period_break(dates_, 'year') - info_fmt[week_start] = '%d' - info_fmt[month_start] = '\n\n%b' - info_fmt[year_start] = '\n\n%b\n%Y' - if not has_level_label(year_start, vmin_orig): - if not has_level_label(month_start, vmin_orig): - info_fmt[first_label(week_start)] = '\n\n%b\n%Y' - else: - info_fmt[first_label(month_start)] = '\n\n%b\n%Y' - # Case 3. Less than 14 months ............... - elif span <= 1.15 * periodsperyear: - year_start = period_break(dates_, 'year') - month_start = period_break(dates_, 'month') - week_start = period_break(dates_, 'week') - info_maj[month_start] = True - info_min[week_start] = True - info_min[year_start] = False - info_min[month_start] = False - info_fmt[month_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - if not has_level_label(year_start, vmin_orig): - info_fmt[first_label(month_start)] = '%b\n%Y' - # Case 4. Less than 2.5 years ............... - elif span <= 2.5 * periodsperyear: - year_start = period_break(dates_, 'year') - quarter_start = period_break(dates_, 'quarter') - month_start = period_break(dates_, 'month') - info_maj[quarter_start] = True - info_min[month_start] = True - info_fmt[quarter_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - # Case 4. Less than 4 years ................. - elif span <= 4 * periodsperyear: - year_start = period_break(dates_, 'year') - month_start = period_break(dates_, 'month') - info_maj[year_start] = True - info_min[month_start] = True - info_min[year_start] = False - - month_break = dates_[month_start].month - jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] - info_fmt[jan_or_jul] = '%b' - info_fmt[year_start] = '%b\n%Y' - # Case 5. Less than 11 years ................ - elif span <= 11 * periodsperyear: - year_start = period_break(dates_, 'year') - quarter_start = period_break(dates_, 'quarter') - info_maj[year_start] = True - info_min[quarter_start] = True - info_min[year_start] = False - info_fmt[year_start] = '%Y' - # Case 6. More than 12 years ................ - else: - year_start = period_break(dates_, 'year') - year_break = dates_[year_start].year - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - major_idx = year_start[(year_break % maj_anndef == 0)] - info_maj[major_idx] = True - minor_idx = year_start[(year_break % min_anndef == 0)] - info_min[minor_idx] = True - info_fmt[major_idx] = '%Y' - - return info - - -def _monthly_finder(vmin, vmax, freq): - periodsperyear = 12 - - vmin_orig = vmin - (vmin, vmax) = (int(vmin), int(vmax)) - span = vmax - vmin + 1 - - # Initialize the output - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - dates_ = info['val'] - info['fmt'] = '' - year_start = (dates_ % 12 == 0).nonzero()[0] - info_maj = info['maj'] - info_fmt = info['fmt'] - - if span <= 1.15 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - info_fmt[:] = '%b' - info_fmt[year_start] = '%b\n%Y' - - if not has_level_label(year_start, vmin_orig): - if dates_.size > 1: - idx = 1 - else: - idx = 0 - info_fmt[idx] = '%b\n%Y' - - elif span <= 2.5 * periodsperyear: - quarter_start = (dates_ % 3 == 0).nonzero() - info_maj[year_start] = True - # TODO: Check the following : is it really info['fmt'] ? - info['fmt'][quarter_start] = True - info['min'] = True - - info_fmt[quarter_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - - elif span <= 4 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) - info_fmt[jan_or_jul] = '%b' - info_fmt[year_start] = '%b\n%Y' - - elif span <= 11 * periodsperyear: - quarter_start = (dates_ % 3 == 0).nonzero() - info_maj[year_start] = True - info['min'][quarter_start] = True - - info_fmt[year_start] = '%Y' - - else: - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - years = dates_[year_start] // 12 + 1 - major_idx = year_start[(years % maj_anndef == 0)] - info_maj[major_idx] = True - info['min'][year_start[(years % min_anndef == 0)]] = True - - info_fmt[major_idx] = '%Y' - - return info - - -def _quarterly_finder(vmin, vmax, freq): - periodsperyear = 4 - vmin_orig = vmin - (vmin, vmax) = (int(vmin), int(vmax)) - span = vmax - vmin + 1 - - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - info['fmt'] = '' - dates_ = info['val'] - info_maj = info['maj'] - info_fmt = info['fmt'] - year_start = (dates_ % 4 == 0).nonzero()[0] - - if span <= 3.5 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - info_fmt[:] = 'Q%q' - info_fmt[year_start] = 'Q%q\n%F' - if not has_level_label(year_start, vmin_orig): - if dates_.size > 1: - idx = 1 - else: - idx = 0 - info_fmt[idx] = 'Q%q\n%F' - - elif span <= 11 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - info_fmt[year_start] = '%F' - - else: - years = dates_[year_start] // 4 + 1 - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - major_idx = year_start[(years % maj_anndef == 0)] - info_maj[major_idx] = True - info['min'][year_start[(years % min_anndef == 0)]] = True - info_fmt[major_idx] = '%F' - - return info - - -def _annual_finder(vmin, vmax, freq): - (vmin, vmax) = (int(vmin), int(vmax + 1)) - span = vmax - vmin + 1 - - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - info['fmt'] = '' - dates_ = info['val'] - - (min_anndef, maj_anndef) = _get_default_annual_spacing(span) - major_idx = dates_ % maj_anndef == 0 - info['maj'][major_idx] = True - info['min'][(dates_ % min_anndef == 0)] = True - info['fmt'][major_idx] = '%Y' - - return info - - -def get_finder(freq): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - fgroup = frequencies.get_freq_group(freq) - - if fgroup == FreqGroup.FR_ANN: - return _annual_finder - elif fgroup == FreqGroup.FR_QTR: - return _quarterly_finder - elif freq == FreqGroup.FR_MTH: - return _monthly_finder - elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): - return _daily_finder - else: # pragma: no cover - errmsg = "Unsupported frequency: %s" % (freq) - raise NotImplementedError(errmsg) - - -class TimeSeries_DateLocator(Locator): - """ - Locates the ticks along an axis controlled by a :class:`Series`. - - Parameters - ---------- - freq : {var} - Valid frequency specifier. - minor_locator : {False, True}, optional - Whether the locator is for minor ticks (True) or not. - dynamic_mode : {True, False}, optional - Whether the locator should work in dynamic mode. - base : {int}, optional - quarter : {int}, optional - month : {int}, optional - day : {int}, optional - """ - - def __init__(self, freq, minor_locator=False, dynamic_mode=True, - base=1, quarter=1, month=1, day=1, plot_obj=None): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - self.freq = freq - self.base = base - (self.quarter, self.month, self.day) = (quarter, month, day) - self.isminor = minor_locator - self.isdynamic = dynamic_mode - self.offset = 0 - self.plot_obj = plot_obj - self.finder = get_finder(freq) - - def _get_default_locs(self, vmin, vmax): - "Returns the default locations of ticks." - - if self.plot_obj.date_axis_info is None: - self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) - - locator = self.plot_obj.date_axis_info - - if self.isminor: - return np.compress(locator['min'], locator['val']) - return np.compress(locator['maj'], locator['val']) - - def __call__(self): - 'Return the locations of the ticks.' - # axis calls Locator.set_axis inside set_m_formatter - vi = tuple(self.axis.get_view_interval()) - if vi != self.plot_obj.view_interval: - self.plot_obj.date_axis_info = None - self.plot_obj.view_interval = vi - vmin, vmax = vi - if vmax < vmin: - vmin, vmax = vmax, vmin - if self.isdynamic: - locs = self._get_default_locs(vmin, vmax) - else: # pragma: no cover - base = self.base - (d, m) = divmod(vmin, base) - vmin = (d + 1) * base - locs = lrange(vmin, vmax + 1, base) - return locs - - def autoscale(self): - """ - Sets the view limits to the nearest multiples of base that contain the - data. - """ - # requires matplotlib >= 0.98.0 - (vmin, vmax) = self.axis.get_data_interval() - - locs = self._get_default_locs(vmin, vmax) - (vmin, vmax) = locs[[0, -1]] - if vmin == vmax: - vmin -= 1 - vmax += 1 - return nonsingular(vmin, vmax) - -# ------------------------------------------------------------------------- -# --- Formatter --- -# ------------------------------------------------------------------------- - - -class TimeSeries_DateFormatter(Formatter): - """ - Formats the ticks along an axis controlled by a :class:`PeriodIndex`. - - Parameters - ---------- - freq : {int, string} - Valid frequency specifier. - minor_locator : {False, True} - Whether the current formatter should apply to minor ticks (True) or - major ticks (False). - dynamic_mode : {True, False} - Whether the formatter works in dynamic mode or not. - """ - - def __init__(self, freq, minor_locator=False, dynamic_mode=True, - plot_obj=None): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - self.format = None - self.freq = freq - self.locs = [] - self.formatdict = None - self.isminor = minor_locator - self.isdynamic = dynamic_mode - self.offset = 0 - self.plot_obj = plot_obj - self.finder = get_finder(freq) - - def _set_default_format(self, vmin, vmax): - "Returns the default ticks spacing." - - if self.plot_obj.date_axis_info is None: - self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) - info = self.plot_obj.date_axis_info - - if self.isminor: - format = np.compress(info['min'] & np.logical_not(info['maj']), - info) - else: - format = np.compress(info['maj'], info) - self.formatdict = dict([(x, f) for (x, _, _, f) in format]) - return self.formatdict - - def set_locs(self, locs): - 'Sets the locations of the ticks' - # don't actually use the locs. This is just needed to work with - # matplotlib. Force to use vmin, vmax - self.locs = locs - - (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) - if vi != self.plot_obj.view_interval: - self.plot_obj.date_axis_info = None - self.plot_obj.view_interval = vi - if vmax < vmin: - (vmin, vmax) = (vmax, vmin) - self._set_default_format(vmin, vmax) - - def __call__(self, x, pos=0): - if self.formatdict is None: - return '' - else: - fmt = self.formatdict.pop(x, '') - return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + from pandas.plotting._converter import register as register_ + msg = ("'pandas.tseries.converter.register' has been moved and renamed to " + "'pandas.plotting.register_matplotlib_converters'. ") + warnings.warn(msg, FutureWarning, stacklevel=2) + register_() diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index e0c602bf5a037..0cffd818202ed 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,41 +1,32 @@ +# -*- coding: utf-8 -*- from datetime import timedelta -from pandas.compat import long, zip +from pandas.compat import zip from pandas import compat import re -import warnings import numpy as np -from pandas.types.generic import ABCSeries -from pandas.types.common import (is_integer, - is_period_arraylike, - is_timedelta64_dtype, - is_datetime64_dtype) +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.common import ( + is_period_arraylike, + is_timedelta64_dtype, + is_datetime64_dtype) -import pandas.core.algorithms as algos -from pandas.core.algorithms import unique from pandas.tseries.offsets import DateOffset -from pandas.util.decorators import cache_readonly, deprecate_kwarg -import pandas.tseries.offsets as offsets -import pandas.lib as lib -import pandas.tslib as tslib -from pandas.tslib import Timedelta -from pytz import AmbiguousTimeError +from pandas._libs.tslib import Timedelta + +import pandas._libs.tslibs.frequencies as libfreqs +from pandas._libs.tslibs.frequencies import ( # noqa, semi-public API + get_freq, get_base_alias, get_to_timestamp_base, get_freq_code, + FreqGroup, + is_subperiod, is_superperiod) -class FreqGroup(object): - FR_ANN = 1000 - FR_QTR = 2000 - FR_MTH = 3000 - FR_WK = 4000 - FR_BUS = 5000 - FR_DAY = 6000 - FR_HR = 7000 - FR_MIN = 8000 - FR_SEC = 9000 - FR_MS = 10000 - FR_US = 11000 - FR_NS = 12000 +from pandas._libs.tslibs.resolution import (Resolution, + _FrequencyInferer, + _TimedeltaFrequencyInferer) + +from pytz import AmbiguousTimeError RESO_NS = 0 @@ -46,320 +37,10 @@ class FreqGroup(object): RESO_HR = 5 RESO_DAY = 6 - -class Resolution(object): - - RESO_US = RESO_US - RESO_MS = RESO_MS - RESO_SEC = RESO_SEC - RESO_MIN = RESO_MIN - RESO_HR = RESO_HR - RESO_DAY = RESO_DAY - - _reso_str_map = { - RESO_NS: 'nanosecond', - RESO_US: 'microsecond', - RESO_MS: 'millisecond', - RESO_SEC: 'second', - RESO_MIN: 'minute', - RESO_HR: 'hour', - RESO_DAY: 'day' - } - - # factor to multiply a value by to convert it to the next finer grained - # resolution - _reso_mult_map = { - RESO_NS: None, - RESO_US: 1000, - RESO_MS: 1000, - RESO_SEC: 1000, - RESO_MIN: 60, - RESO_HR: 60, - RESO_DAY: 24 - } - - _reso_str_bump_map = { - 'D': 'H', - 'H': 'T', - 'T': 'S', - 'S': 'L', - 'L': 'U', - 'U': 'N', - 'N': None - } - - _str_reso_map = dict([(v, k) for k, v in compat.iteritems(_reso_str_map)]) - - _reso_freq_map = { - 'year': 'A', - 'quarter': 'Q', - 'month': 'M', - 'day': 'D', - 'hour': 'H', - 'minute': 'T', - 'second': 'S', - 'millisecond': 'L', - 'microsecond': 'U', - 'nanosecond': 'N'} - - _freq_reso_map = dict([(v, k) - for k, v in compat.iteritems(_reso_freq_map)]) - - @classmethod - def get_str(cls, reso): - """ - Return resolution str against resolution code. - - Example - ------- - >>> Resolution.get_str(Resolution.RESO_SEC) - 'second' - """ - return cls._reso_str_map.get(reso, 'day') - - @classmethod - def get_reso(cls, resostr): - """ - Return resolution str against resolution code. - - Example - ------- - >>> Resolution.get_reso('second') - 2 - - >>> Resolution.get_reso('second') == Resolution.RESO_SEC - True - """ - return cls._str_reso_map.get(resostr, cls.RESO_DAY) - - @classmethod - def get_freq_group(cls, resostr): - """ - Return frequency str against resolution str. - - Example - ------- - >>> f.Resolution.get_freq_group('day') - 4000 - """ - return get_freq_group(cls.get_freq(resostr)) - - @classmethod - def get_freq(cls, resostr): - """ - Return frequency str against resolution str. - - Example - ------- - >>> f.Resolution.get_freq('day') - 'D' - """ - return cls._reso_freq_map[resostr] - - @classmethod - def get_str_from_freq(cls, freq): - """ - Return resolution str against frequency str. - - Example - ------- - >>> Resolution.get_str_from_freq('H') - 'hour' - """ - return cls._freq_reso_map.get(freq, 'day') - - @classmethod - def get_reso_from_freq(cls, freq): - """ - Return resolution code against frequency str. - - Example - ------- - >>> Resolution.get_reso_from_freq('H') - 4 - - >>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR - True - """ - return cls.get_reso(cls.get_str_from_freq(freq)) - - @classmethod - def get_stride_from_decimal(cls, value, freq): - """ - Convert freq with decimal stride into a higher freq with integer stride - - Parameters - ---------- - value : integer or float - freq : string - Frequency string - - Raises - ------ - ValueError - If the float cannot be converted to an integer at any resolution. - - Example - ------- - >>> Resolution.get_stride_from_decimal(1.5, 'T') - (90, 'S') - - >>> Resolution.get_stride_from_decimal(1.04, 'H') - (3744, 'S') - - >>> Resolution.get_stride_from_decimal(1, 'D') - (1, 'D') - """ - - if np.isclose(value % 1, 0): - return int(value), freq - else: - start_reso = cls.get_reso_from_freq(freq) - if start_reso == 0: - raise ValueError( - "Could not convert to integer offset at any resolution" - ) - - next_value = cls._reso_mult_map[start_reso] * value - next_name = cls._reso_str_bump_map[freq] - return cls.get_stride_from_decimal(next_value, next_name) - - -def get_to_timestamp_base(base): - """ - Return frequency code group used for base of to_timestamp against - frequency code. - - Example - ------- - # Return day freq code against longer freq than day - >>> get_to_timestamp_base(get_freq_code('D')[0]) - 6000 - >>> get_to_timestamp_base(get_freq_code('W')[0]) - 6000 - >>> get_to_timestamp_base(get_freq_code('M')[0]) - 6000 - - # Return second freq code against hour between second - >>> get_to_timestamp_base(get_freq_code('H')[0]) - 9000 - >>> get_to_timestamp_base(get_freq_code('S')[0]) - 9000 - """ - if base < FreqGroup.FR_BUS: - return FreqGroup.FR_DAY - if FreqGroup.FR_HR <= base <= FreqGroup.FR_SEC: - return FreqGroup.FR_SEC - return base - - -def get_freq_group(freq): - """ - Return frequency code group of given frequency str or offset. - - Example - ------- - >>> get_freq_group('W-MON') - 4000 - - >>> get_freq_group('W-FRI') - 4000 - """ - if isinstance(freq, offsets.DateOffset): - freq = freq.rule_code - - if isinstance(freq, compat.string_types): - base, mult = get_freq_code(freq) - freq = base - elif isinstance(freq, int): - pass - else: - raise ValueError('input must be str, offset or int') - return (freq // 1000) * 1000 - - -def get_freq(freq): - """ - Return frequency code of given frequency str. - If input is not string, return input as it is. - - Example - ------- - >>> get_freq('A') - 1000 - - >>> get_freq('3A') - 1000 - """ - if isinstance(freq, compat.string_types): - base, mult = get_freq_code(freq) - freq = base - return freq - - -def get_freq_code(freqstr): - """ - Return freq str or tuple to freq code and stride (mult) - - Parameters - ---------- - freqstr : str or tuple - - Returns - ------- - return : tuple of base frequency code and stride (mult) - - Example - ------- - >>> get_freq_code('3D') - (6000, 3) - - >>> get_freq_code('D') - (6000, 1) - - >>> get_freq_code(('D', 3)) - (6000, 3) - """ - if isinstance(freqstr, DateOffset): - freqstr = (freqstr.rule_code, freqstr.n) - - if isinstance(freqstr, tuple): - if (is_integer(freqstr[0]) and - is_integer(freqstr[1])): - # e.g., freqstr = (2000, 1) - return freqstr - else: - # e.g., freqstr = ('T', 5) - try: - code = _period_str_to_code(freqstr[0]) - stride = freqstr[1] - except: - if is_integer(freqstr[1]): - raise - code = _period_str_to_code(freqstr[1]) - stride = freqstr[0] - return code, stride - - if is_integer(freqstr): - return (freqstr, 1) - - base, stride = _base_and_stride(freqstr) - code = _period_str_to_code(base) - - return code, stride - - -def _get_freq_str(base, mult=1): - code = _reverse_period_code_map.get(base) - if mult == 1: - return code - return str(mult) + code - - # --------------------------------------------------------------------- # Offset names ("time rules") and related functions - +from pandas._libs.tslibs.offsets import _offset_to_period_map # noqa:E402 from pandas.tseries.offsets import (Nano, Micro, Milli, Second, # noqa Minute, Hour, Day, BDay, CDay, Week, MonthBegin, @@ -375,69 +56,12 @@ def _get_freq_str(base, mult=1): #: cache of previously seen offsets _offset_map = {} -_offset_to_period_map = { - 'WEEKDAY': 'D', - 'EOM': 'M', - 'BM': 'M', - 'BQS': 'Q', - 'QS': 'Q', - 'BQ': 'Q', - 'BA': 'A', - 'AS': 'A', - 'BAS': 'A', - 'MS': 'M', - 'D': 'D', - 'C': 'C', - 'B': 'B', - 'T': 'T', - 'S': 'S', - 'L': 'L', - 'U': 'U', - 'N': 'N', - 'H': 'H', - 'Q': 'Q', - 'A': 'A', - 'W': 'W', - 'M': 'M' -} - -need_suffix = ['QS', 'BQ', 'BQS', 'AS', 'BA', 'BAS'] -for __prefix in need_suffix: - for _m in tslib._MONTHS: - _offset_to_period_map['%s-%s' % (__prefix, _m)] = \ - _offset_to_period_map[__prefix] -for __prefix in ['A', 'Q']: - for _m in tslib._MONTHS: - _alias = '%s-%s' % (__prefix, _m) - _offset_to_period_map[_alias] = _alias - -_days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] -for _d in _days: - _offset_to_period_map['W-%s' % _d] = 'W-%s' % _d - def get_period_alias(offset_str): """ alias to closest period strings BQ->Q etc""" return _offset_to_period_map.get(offset_str, None) -_lite_rule_alias = { - 'W': 'W-SUN', - 'Q': 'Q-DEC', - - 'A': 'A-DEC', # YearEnd(month=12), - 'AS': 'AS-JAN', # YearBegin(month=1), - 'BA': 'BA-DEC', # BYearEnd(month=12), - 'BAS': 'BAS-JAN', # BYearBegin(month=1), - - 'Min': 'T', - 'min': 'T', - 'ms': 'L', - 'us': 'U', - 'ns': 'N' -} - - _name_to_offset_map = {'days': Day(1), 'hours': Hour(1), 'minutes': Minute(1), @@ -447,10 +71,6 @@ def get_period_alias(offset_str): 'nanoseconds': Nano(1)} -_INVALID_FREQ_ERROR = "Invalid frequency: {0}" - - -@deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq') def to_offset(freq): """ Return DateOffset object from string or tuple representation @@ -505,7 +125,7 @@ def to_offset(freq): stride = freq[1] if isinstance(stride, compat.string_types): name, stride = stride, name - name, _ = _base_and_stride(name) + name, _ = libfreqs._base_and_stride(name) delta = get_offset(name) * stride elif isinstance(freq, timedelta): @@ -522,13 +142,13 @@ def to_offset(freq): else: delta = delta + offset except Exception: - raise ValueError(_INVALID_FREQ_ERROR.format(freq)) + raise ValueError(libfreqs._INVALID_FREQ_ERROR.format(freq)) else: delta = None stride_sign = None try: - splitted = re.split(opattern, freq) + splitted = re.split(libfreqs.opattern, freq) if splitted[-1] != '' and not splitted[-1].isspace(): # the last element must be blank raise ValueError('last element must be blank') @@ -536,7 +156,7 @@ def to_offset(freq): splitted[2::4]): if sep != '' and not sep.isspace(): raise ValueError('separator must be spaces') - prefix = _lite_rule_alias.get(name) or name + prefix = libfreqs._lite_rule_alias.get(name) or name if stride_sign is None: stride_sign = -1 if stride.startswith('-') else 1 if not stride: @@ -553,55 +173,14 @@ def to_offset(freq): else: delta = delta + offset except Exception: - raise ValueError(_INVALID_FREQ_ERROR.format(freq)) + raise ValueError(libfreqs._INVALID_FREQ_ERROR.format(freq)) if delta is None: - raise ValueError(_INVALID_FREQ_ERROR.format(freq)) + raise ValueError(libfreqs._INVALID_FREQ_ERROR.format(freq)) return delta -# hack to handle WOM-1MON -opattern = re.compile( - r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' -) - - -def _base_and_stride(freqstr): - """ - Return base freq and stride info from string representation - - Examples - -------- - _freq_and_stride('5Min') -> 'Min', 5 - """ - groups = opattern.match(freqstr) - - if not groups: - raise ValueError("Could not evaluate %s" % freqstr) - - stride = groups.group(1) - - if len(stride): - stride = int(stride) - else: - stride = 1 - - base = groups.group(2) - - return (base, stride) - - -def get_base_alias(freqstr): - """ - Returns the base frequency alias, e.g., '5D' -> 'D' - """ - return _base_and_stride(freqstr)[0] - - -_dont_uppercase = set(('MS', 'ms')) - - def get_offset(name): """ Return DateOffset object associated with rule name @@ -610,12 +189,12 @@ def get_offset(name): -------- get_offset('EOM') --> BMonthEnd(1) """ - if name not in _dont_uppercase: + if name not in libfreqs._dont_uppercase: name = name.upper() - name = _lite_rule_alias.get(name, name) - name = _lite_rule_alias.get(name.lower(), name) + name = libfreqs._lite_rule_alias.get(name, name) + name = libfreqs._lite_rule_alias.get(name.lower(), name) else: - name = _lite_rule_alias.get(name, name) + name = libfreqs._lite_rule_alias.get(name, name) if name not in _offset_map: try: @@ -626,7 +205,7 @@ def get_offset(name): offset = klass._from_name(*split[1:]) except (ValueError, TypeError, KeyError): # bad prefix or suffix - raise ValueError(_INVALID_FREQ_ERROR.format(name)) + raise ValueError(libfreqs._INVALID_FREQ_ERROR.format(name)) # cache _offset_map[name] = offset # do not return cache because it's mutable @@ -635,114 +214,9 @@ def get_offset(name): getOffset = get_offset - -def get_offset_name(offset): - """ - Return rule name associated with a DateOffset object - - Examples - -------- - get_offset_name(BMonthEnd(1)) --> 'EOM' - """ - - msg = "get_offset_name(offset) is deprecated. Use offset.freqstr instead" - warnings.warn(msg, FutureWarning, stacklevel=2) - return offset.freqstr - - -def get_standard_freq(freq): - """ - Return the standardized frequency string - """ - - msg = ("get_standard_freq is deprecated. Use to_offset(freq).rule_code " - "instead.") - warnings.warn(msg, FutureWarning, stacklevel=2) - return to_offset(freq).rule_code - # --------------------------------------------------------------------- # Period codes -# period frequency constants corresponding to scikits timeseries -# originals -_period_code_map = { - # Annual freqs with various fiscal year ends. - # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 - "A-DEC": 1000, # Annual - December year end - "A-JAN": 1001, # Annual - January year end - "A-FEB": 1002, # Annual - February year end - "A-MAR": 1003, # Annual - March year end - "A-APR": 1004, # Annual - April year end - "A-MAY": 1005, # Annual - May year end - "A-JUN": 1006, # Annual - June year end - "A-JUL": 1007, # Annual - July year end - "A-AUG": 1008, # Annual - August year end - "A-SEP": 1009, # Annual - September year end - "A-OCT": 1010, # Annual - October year end - "A-NOV": 1011, # Annual - November year end - - # Quarterly frequencies with various fiscal year ends. - # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 - "Q-DEC": 2000, # Quarterly - December year end - "Q-JAN": 2001, # Quarterly - January year end - "Q-FEB": 2002, # Quarterly - February year end - "Q-MAR": 2003, # Quarterly - March year end - "Q-APR": 2004, # Quarterly - April year end - "Q-MAY": 2005, # Quarterly - May year end - "Q-JUN": 2006, # Quarterly - June year end - "Q-JUL": 2007, # Quarterly - July year end - "Q-AUG": 2008, # Quarterly - August year end - "Q-SEP": 2009, # Quarterly - September year end - "Q-OCT": 2010, # Quarterly - October year end - "Q-NOV": 2011, # Quarterly - November year end - - "M": 3000, # Monthly - - "W-SUN": 4000, # Weekly - Sunday end of week - "W-MON": 4001, # Weekly - Monday end of week - "W-TUE": 4002, # Weekly - Tuesday end of week - "W-WED": 4003, # Weekly - Wednesday end of week - "W-THU": 4004, # Weekly - Thursday end of week - "W-FRI": 4005, # Weekly - Friday end of week - "W-SAT": 4006, # Weekly - Saturday end of week - - "B": 5000, # Business days - "D": 6000, # Daily - "H": 7000, # Hourly - "T": 8000, # Minutely - "S": 9000, # Secondly - "L": 10000, # Millisecondly - "U": 11000, # Microsecondly - "N": 12000, # Nanosecondly -} - -_reverse_period_code_map = {} -for _k, _v in compat.iteritems(_period_code_map): - _reverse_period_code_map[_v] = _k - -# Additional aliases -_period_code_map.update({ - "Q": 2000, # Quarterly - December year end (default quarterly) - "A": 1000, # Annual - "W": 4000, # Weekly - "C": 5000, # Custom Business Day -}) - - -def _period_str_to_code(freqstr): - freqstr = _lite_rule_alias.get(freqstr, freqstr) - - if freqstr not in _dont_uppercase: - lower = freqstr.lower() - freqstr = _lite_rule_alias.get(lower, freqstr) - - if freqstr not in _dont_uppercase: - freqstr = freqstr.upper() - try: - return _period_code_map[freqstr] - except KeyError: - raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) - def infer_freq(index, warn=True): """ @@ -769,8 +243,8 @@ def infer_freq(index, warn=True): if not (is_datetime64_dtype(values) or is_timedelta64_dtype(values) or values.dtype == object): - raise TypeError("cannot infer freq from a non-convertible " - "dtype on a Series of {0}".format(index.dtype)) + raise TypeError("cannot infer freq from a non-convertible dtype " + "on a Series of {dtype}".format(dtype=index.dtype)) index = values if is_period_arraylike(index): @@ -783,7 +257,7 @@ def infer_freq(index, warn=True): if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError("cannot infer freq from a non-convertible index " - "type {0}".format(type(index))) + "type {type}".format(type=type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): @@ -794,429 +268,3 @@ def infer_freq(index, warn=True): inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq() - -_ONE_MICRO = long(1000) -_ONE_MILLI = _ONE_MICRO * 1000 -_ONE_SECOND = _ONE_MILLI * 1000 -_ONE_MINUTE = 60 * _ONE_SECOND -_ONE_HOUR = 60 * _ONE_MINUTE -_ONE_DAY = 24 * _ONE_HOUR - - -class _FrequencyInferer(object): - """ - Not sure if I can avoid the state machine here - """ - - def __init__(self, index, warn=True): - self.index = index - self.values = np.asarray(index).view('i8') - - # This moves the values, which are implicitly in UTC, to the - # the timezone so they are in local time - if hasattr(index, 'tz'): - if index.tz is not None: - self.values = tslib.tz_convert(self.values, 'UTC', index.tz) - - self.warn = warn - - if len(index) < 3: - raise ValueError('Need at least 3 dates to infer frequency') - - self.is_monotonic = (self.index.is_monotonic_increasing or - self.index.is_monotonic_decreasing) - - @cache_readonly - def deltas(self): - return tslib.unique_deltas(self.values) - - @cache_readonly - def deltas_asi8(self): - return tslib.unique_deltas(self.index.asi8) - - @cache_readonly - def is_unique(self): - return len(self.deltas) == 1 - - @cache_readonly - def is_unique_asi8(self): - return len(self.deltas_asi8) == 1 - - def get_freq(self): - if not self.is_monotonic or not self.index.is_unique: - return None - - delta = self.deltas[0] - if _is_multiple(delta, _ONE_DAY): - return self._infer_daily_rule() - else: - # Business hourly, maybe. 17: one day / 65: one weekend - if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]): - return 'BH' - # Possibly intraday frequency. Here we use the - # original .asi8 values as the modified values - # will not work around DST transitions. See #8772 - elif not self.is_unique_asi8: - return None - delta = self.deltas_asi8[0] - if _is_multiple(delta, _ONE_HOUR): - # Hours - return _maybe_add_count('H', delta / _ONE_HOUR) - elif _is_multiple(delta, _ONE_MINUTE): - # Minutes - return _maybe_add_count('T', delta / _ONE_MINUTE) - elif _is_multiple(delta, _ONE_SECOND): - # Seconds - return _maybe_add_count('S', delta / _ONE_SECOND) - elif _is_multiple(delta, _ONE_MILLI): - # Milliseconds - return _maybe_add_count('L', delta / _ONE_MILLI) - elif _is_multiple(delta, _ONE_MICRO): - # Microseconds - return _maybe_add_count('U', delta / _ONE_MICRO) - else: - # Nanoseconds - return _maybe_add_count('N', delta) - - @cache_readonly - def day_deltas(self): - return [x / _ONE_DAY for x in self.deltas] - - @cache_readonly - def hour_deltas(self): - return [x / _ONE_HOUR for x in self.deltas] - - @cache_readonly - def fields(self): - return tslib.build_field_sarray(self.values) - - @cache_readonly - def rep_stamp(self): - return lib.Timestamp(self.values[0]) - - def month_position_check(self): - # TODO: cythonize this, very slow - calendar_end = True - business_end = True - calendar_start = True - business_start = True - - years = self.fields['Y'] - months = self.fields['M'] - days = self.fields['D'] - weekdays = self.index.dayofweek - - from calendar import monthrange - for y, m, d, wd in zip(years, months, days, weekdays): - - if calendar_start: - calendar_start &= d == 1 - if business_start: - business_start &= d == 1 or (d <= 3 and wd == 0) - - if calendar_end or business_end: - _, daysinmonth = monthrange(y, m) - cal = d == daysinmonth - if calendar_end: - calendar_end &= cal - if business_end: - business_end &= cal or (daysinmonth - d < 3 and wd == 4) - elif not calendar_start and not business_start: - break - - if calendar_end: - return 'ce' - elif business_end: - return 'be' - elif calendar_start: - return 'cs' - elif business_start: - return 'bs' - else: - return None - - @cache_readonly - def mdiffs(self): - nmonths = self.fields['Y'] * 12 + self.fields['M'] - return tslib.unique_deltas(nmonths.astype('i8')) - - @cache_readonly - def ydiffs(self): - return tslib.unique_deltas(self.fields['Y'].astype('i8')) - - def _infer_daily_rule(self): - annual_rule = self._get_annual_rule() - if annual_rule: - nyears = self.ydiffs[0] - month = _month_aliases[self.rep_stamp.month] - return _maybe_add_count('%s-%s' % (annual_rule, month), nyears) - - quarterly_rule = self._get_quarterly_rule() - if quarterly_rule: - nquarters = self.mdiffs[0] / 3 - mod_dict = {0: 12, 2: 11, 1: 10} - month = _month_aliases[mod_dict[self.rep_stamp.month % 3]] - return _maybe_add_count('%s-%s' % (quarterly_rule, month), - nquarters) - - monthly_rule = self._get_monthly_rule() - if monthly_rule: - return _maybe_add_count(monthly_rule, self.mdiffs[0]) - - if self.is_unique: - days = self.deltas[0] / _ONE_DAY - if days % 7 == 0: - # Weekly - alias = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-%s' % alias, days / 7) - else: - return _maybe_add_count('D', days) - - # Business daily. Maybe - if self.day_deltas == [1, 3]: - return 'B' - - wom_rule = self._get_wom_rule() - if wom_rule: - return wom_rule - - def _get_annual_rule(self): - if len(self.ydiffs) > 1: - return None - - if len(algos.unique(self.fields['M'])) > 1: - return None - - pos_check = self.month_position_check() - return {'cs': 'AS', 'bs': 'BAS', - 'ce': 'A', 'be': 'BA'}.get(pos_check) - - def _get_quarterly_rule(self): - if len(self.mdiffs) > 1: - return None - - if not self.mdiffs[0] % 3 == 0: - return None - - pos_check = self.month_position_check() - return {'cs': 'QS', 'bs': 'BQS', - 'ce': 'Q', 'be': 'BQ'}.get(pos_check) - - def _get_monthly_rule(self): - if len(self.mdiffs) > 1: - return None - pos_check = self.month_position_check() - return {'cs': 'MS', 'bs': 'BMS', - 'ce': 'M', 'be': 'BM'}.get(pos_check) - - def _get_wom_rule(self): - # wdiffs = unique(np.diff(self.index.week)) - # We also need -47, -49, -48 to catch index spanning year boundary - # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): - # return None - - weekdays = unique(self.index.weekday) - if len(weekdays) > 1: - return None - - week_of_months = unique((self.index.day - 1) // 7) - # Only attempt to infer up to WOM-4. See #9425 - week_of_months = week_of_months[week_of_months < 4] - if len(week_of_months) == 0 or len(week_of_months) > 1: - return None - - # get which week - week = week_of_months[0] + 1 - wd = _weekday_rule_aliases[weekdays[0]] - - return 'WOM-%d%s' % (week, wd) - - -class _TimedeltaFrequencyInferer(_FrequencyInferer): - - def _infer_daily_rule(self): - if self.is_unique: - days = self.deltas[0] / _ONE_DAY - if days % 7 == 0: - # Weekly - alias = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-%s' % alias, days / 7) - else: - return _maybe_add_count('D', days) - - -def _maybe_add_count(base, count): - if count != 1: - return '%d%s' % (count, base) - else: - return base - - -def _maybe_coerce_freq(code): - """ we might need to coerce a code to a rule_code - and uppercase it - - Parameters - ---------- - source : string - Frequency converting from - - Returns - ------- - string code - """ - - assert code is not None - if isinstance(code, offsets.DateOffset): - code = code.rule_code - return code.upper() - - -def is_subperiod(source, target): - """ - Returns True if downsampling is possible between source and target - frequencies - - Parameters - ---------- - source : string - Frequency converting from - target : string - Frequency converting to - - Returns - ------- - is_subperiod : boolean - """ - - if target is None or source is None: - return False - source = _maybe_coerce_freq(source) - target = _maybe_coerce_freq(target) - - if _is_annual(target): - if _is_quarterly(source): - return _quarter_months_conform(_get_rule_month(source), - _get_rule_month(target)) - return source in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'] - elif _is_quarterly(target): - return source in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'] - elif _is_monthly(target): - return source in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] - elif _is_weekly(target): - return source in [target, 'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] - elif target == 'B': - return source in ['B', 'H', 'T', 'S', 'L', 'U', 'N'] - elif target == 'C': - return source in ['C', 'H', 'T', 'S', 'L', 'U', 'N'] - elif target == 'D': - return source in ['D', 'H', 'T', 'S', 'L', 'U', 'N'] - elif target == 'H': - return source in ['H', 'T', 'S', 'L', 'U', 'N'] - elif target == 'T': - return source in ['T', 'S', 'L', 'U', 'N'] - elif target == 'S': - return source in ['S', 'L', 'U', 'N'] - elif target == 'L': - return source in ['L', 'U', 'N'] - elif target == 'U': - return source in ['U', 'N'] - elif target == 'N': - return source in ['N'] - - -def is_superperiod(source, target): - """ - Returns True if upsampling is possible between source and target - frequencies - - Parameters - ---------- - source : string - Frequency converting from - target : string - Frequency converting to - - Returns - ------- - is_superperiod : boolean - """ - if target is None or source is None: - return False - source = _maybe_coerce_freq(source) - target = _maybe_coerce_freq(target) - - if _is_annual(source): - if _is_annual(target): - return _get_rule_month(source) == _get_rule_month(target) - - if _is_quarterly(target): - smonth = _get_rule_month(source) - tmonth = _get_rule_month(target) - return _quarter_months_conform(smonth, tmonth) - return target in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'] - elif _is_quarterly(source): - return target in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'] - elif _is_monthly(source): - return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] - elif _is_weekly(source): - return target in [source, 'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] - elif source == 'B': - return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] - elif source == 'C': - return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] - elif source == 'D': - return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] - elif source == 'H': - return target in ['H', 'T', 'S', 'L', 'U', 'N'] - elif source == 'T': - return target in ['T', 'S', 'L', 'U', 'N'] - elif source == 'S': - return target in ['S', 'L', 'U', 'N'] - elif source == 'L': - return target in ['L', 'U', 'N'] - elif source == 'U': - return target in ['U', 'N'] - elif source == 'N': - return target in ['N'] - - -_get_rule_month = tslib._get_rule_month - - -def _is_annual(rule): - rule = rule.upper() - return rule == 'A' or rule.startswith('A-') - - -def _quarter_months_conform(source, target): - snum = _month_numbers[source] - tnum = _month_numbers[target] - return snum % 3 == tnum % 3 - - -def _is_quarterly(rule): - rule = rule.upper() - return rule == 'Q' or rule.startswith('Q-') or rule.startswith('BQ') - - -def _is_monthly(rule): - rule = rule.upper() - return rule == 'M' or rule == 'BM' - - -def _is_weekly(rule): - rule = rule.upper() - return rule == 'W' or rule.startswith('W-') - - -DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] - -MONTHS = tslib._MONTHS -_month_numbers = tslib._MONTH_NUMBERS -_month_aliases = tslib._MONTH_ALIASES -_weekday_rule_aliases = dict((k, v) for k, v in enumerate(DAYS)) - - -def _is_multiple(us, mult): - return us % mult == 0 diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 31e40c6bcbb2c..4e874eac9e6c6 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -133,7 +133,7 @@ def __init__(self, name, year=None, month=None, day=None, offset=None, Name of the holiday , defaults to class name offset : array of pandas.tseries.offsets or class from pandas.tseries.offsets - computes offset from date + computes offset from date observance: function computes when holiday is given a pandas Timestamp days_of_week: @@ -174,16 +174,16 @@ class from pandas.tseries.offsets def __repr__(self): info = '' if self.year is not None: - info += 'year=%s, ' % self.year - info += 'month=%s, day=%s, ' % (self.month, self.day) + info += 'year={year}, '.format(year=self.year) + info += 'month={mon}, day={day}, '.format(mon=self.month, day=self.day) if self.offset is not None: - info += 'offset=%s' % self.offset + info += 'offset={offset}'.format(offset=self.offset) if self.observance is not None: - info += 'observance=%s' % self.observance + info += 'observance={obs}'.format(obs=self.observance) - repr = 'Holiday: %s (%s)' % (self.name, info) + repr = 'Holiday: {name} ({info})'.format(name=self.name, info=info) return repr def dates(self, start_date, end_date, return_name=False): @@ -286,6 +286,7 @@ def _apply_rule(self, dates): dates += offset return dates + holiday_calendars = {} @@ -364,7 +365,7 @@ def holidays(self, start=None, end=None, return_name=False): ---------- start : starting date, datetime-like, optional end : ending date, datetime-like, optional - return_names : bool, optional + return_name : bool, optional If True, return a series that has dates and holiday names. False will only return a DatetimeIndex of dates. @@ -373,8 +374,8 @@ def holidays(self, start=None, end=None, return_name=False): DatetimeIndex of holidays """ if self.rules is None: - raise Exception('Holiday Calendar %s does not have any ' - 'rules specified' % self.name) + raise Exception('Holiday Calendar {name} does not have any ' + 'rules specified'.format(name=self.name)) if start is None: start = AbstractHolidayCalendar.start_date @@ -429,7 +430,7 @@ def merge_class(base, other): if not isinstance(other, list): other = [other] - other_holidays = dict((holiday.name, holiday) for holiday in other) + other_holidays = {holiday.name: holiday for holiday in other} try: base = base.rules @@ -438,7 +439,7 @@ def merge_class(base, other): if not isinstance(base, list): base = [base] - base_holidays = dict([(holiday.name, holiday) for holiday in base]) + base_holidays = {holiday.name: holiday for holiday in base} other_holidays.update(base_holidays) return list(other_holidays.values()) @@ -461,6 +462,7 @@ def merge(self, other, inplace=False): else: return holidays + USMemorialDay = Holiday('MemorialDay', month=5, day=31, offset=DateOffset(weekday=MO(-1))) USLaborDay = Holiday('Labor Day', month=9, day=1, diff --git a/pandas/tseries/interval.py b/pandas/tseries/interval.py deleted file mode 100644 index 6698c7e924758..0000000000000 --- a/pandas/tseries/interval.py +++ /dev/null @@ -1,38 +0,0 @@ - -from pandas.core.index import Index - - -class Interval(object): - """ - Represents an interval of time defined by two timestamps - """ - - def __init__(self, start, end): - self.start = start - self.end = end - - -class PeriodInterval(object): - """ - Represents an interval of time defined by two Period objects (time - ordinals) - """ - - def __init__(self, start, end): - self.start = start - self.end = end - - -class IntervalIndex(Index): - """ - - """ - - def __new__(self, starts, ends): - pass - - def dtype(self): - return self.values.dtype - -if __name__ == '__main__': - pass diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 370dd00762896..2e4be7fbdeebf 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,20 +1,34 @@ +# -*- coding: utf-8 -*- from datetime import date, datetime, timedelta +import functools +import operator + from pandas.compat import range from pandas import compat import numpy as np -from pandas.types.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod -from pandas.tseries.tools import to_datetime, normalize_date -from pandas.core.common import AbstractMethodError +from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod +from pandas.core.tools.datetimes import to_datetime +import pandas.core.common as com # import after tools, dateutil check -from dateutil.relativedelta import relativedelta, weekday from dateutil.easter import easter -import pandas.tslib as tslib -from pandas.tslib import Timestamp, OutOfBoundsDatetime, Timedelta +from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta +from pandas.util._decorators import cache_readonly + +from pandas._libs.tslibs import ccalendar, frequencies as libfrequencies +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds +import pandas._libs.tslibs.offsets as liboffsets +from pandas._libs.tslibs.offsets import ( + ApplyTypeError, + as_datetime, _is_normalized, + _get_calendar, _to_dt64, + _determine_offset, + apply_index_wraps, + roll_yearday, + shift_month, + BaseOffset) -import functools -import operator __all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay', 'CBMonthEnd', 'CBMonthBegin', @@ -42,13 +56,6 @@ def as_timestamp(obj): return obj -def as_datetime(obj): - f = getattr(obj, 'to_pydatetime', None) - if f is not None: - obj = f() - return obj - - def apply_wraps(func): @functools.wraps(func) def wrapper(self, other): @@ -95,7 +102,7 @@ def wrapper(self, other): if self.normalize: # normalize_date returns normal datetime - result = normalize_date(result) + result = tslib.normalize_date(result) if tz is not None and result.tzinfo is None: result = tslib._localize_pydatetime(result, tz) @@ -104,36 +111,36 @@ def wrapper(self, other): return wrapper -def apply_index_wraps(func): - @functools.wraps(func) - def wrapper(self, other): - result = func(self, other) - if self.normalize: - result = result.to_period('D').to_timestamp() - return result - return wrapper - - -def _is_normalized(dt): - if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or - dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0): - return False - return True +def shift_day(other, days): + """ + Increment the datetime `other` by the given number of days, retaining + the time-portion of the datetime. For tz-naive datetimes this is + equivalent to adding a timedelta. For tz-aware datetimes it is similar to + dateutil's relativedelta.__add__, but handles pytz tzinfo objects. -# --------------------------------------------------------------------- -# DateOffset + Parameters + ---------- + other : datetime or Timestamp + days : int + Returns + ------- + shifted: datetime or Timestamp + """ + if other.tzinfo is None: + return other + timedelta(days=days) -class ApplyTypeError(TypeError): - # sentinel class for catching the apply error to return NotImplemented - pass + tz = other.tzinfo + naive = other.replace(tzinfo=None) + shifted = naive + timedelta(days=days) + return tslib._localize_pydatetime(shifted, tz) -class CacheableOffset(object): - _cacheable = True +# --------------------------------------------------------------------- +# DateOffset -class DateOffset(object): +class DateOffset(BaseOffset): """ Standard kind of date increment used for a date range. @@ -176,45 +183,20 @@ def __add__(date): Since 0 is a bit weird, we suggest avoiding its use. """ - _cacheable = False - _normalize_cache = True - _kwds_use_relativedelta = ( - 'years', 'months', 'weeks', 'days', - 'year', 'month', 'week', 'day', 'weekday', - 'hour', 'minute', 'second', 'microsecond' - ) _use_relativedelta = False _adjust_dst = False + _attributes = frozenset(['n', 'normalize'] + + list(liboffsets.relativedelta_kwds)) # default for prior pickles normalize = False def __init__(self, n=1, normalize=False, **kwds): - self.n = int(n) + self.n = self._validate_n(n) self.normalize = normalize - self.kwds = kwds - self._offset, self._use_relativedelta = self._determine_offset() - - def _determine_offset(self): - # timedelta is used for sub-daily plural offsets and all singular - # offsets relativedelta is used for plural offsets of daily length or - # more nanosecond(s) are handled by apply_wraps - kwds_no_nanos = dict( - (k, v) for k, v in self.kwds.items() - if k not in ('nanosecond', 'nanoseconds') - ) - use_relativedelta = False - - if len(kwds_no_nanos) > 0: - if any(k in self._kwds_use_relativedelta for k in kwds_no_nanos): - use_relativedelta = True - offset = relativedelta(**kwds_no_nanos) - else: - # sub-daily offset - use timedelta (tz-aware) - offset = timedelta(**kwds_no_nanos) - else: - offset = timedelta(1) - return offset, use_relativedelta + + self._offset, self._use_relativedelta = _determine_offset(kwds) + self.__dict__.update(kwds) @apply_wraps def apply(self, other): @@ -249,8 +231,6 @@ def apply_index(self, i): raises NotImplentedError for offsets without a vectorized implementation - .. versionadded:: 0.17.0 - Parameters ---------- i : DatetimeIndex @@ -260,32 +240,33 @@ def apply_index(self, i): y : DatetimeIndex """ - if not type(self) is DateOffset: - raise NotImplementedError("DateOffset subclass %s " + if type(self) is not DateOffset: + raise NotImplementedError("DateOffset subclass {name} " "does not have a vectorized " - "implementation" - % (self.__class__.__name__,)) + "implementation".format( + name=self.__class__.__name__)) + kwds = self.kwds relativedelta_fast = set(['years', 'months', 'weeks', 'days', 'hours', 'minutes', 'seconds', 'microseconds']) # relativedelta/_offset path only valid for base DateOffset if (self._use_relativedelta and - set(self.kwds).issubset(relativedelta_fast)): + set(kwds).issubset(relativedelta_fast)): - months = ((self.kwds.get('years', 0) * 12 + - self.kwds.get('months', 0)) * self.n) + months = ((kwds.get('years', 0) * 12 + + kwds.get('months', 0)) * self.n) if months: - shifted = tslib.shift_months(i.asi8, months) + shifted = liboffsets.shift_months(i.asi8, months) i = i._shallow_copy(shifted) - weeks = (self.kwds.get('weeks', 0)) * self.n + weeks = (kwds.get('weeks', 0)) * self.n if weeks: i = (i.to_period('W') + weeks).to_timestamp() + \ i.to_perioddelta('W') - timedelta_kwds = dict((k, v) for k, v in self.kwds.items() - if k in ['days', 'hours', 'minutes', - 'seconds', 'microseconds']) + timedelta_kwds = {k: v for k, v in kwds.items() + if k in ['days', 'hours', 'minutes', + 'seconds', 'microseconds']} if timedelta_kwds: delta = Timedelta(**timedelta_kwds) i = i + (self.n * delta) @@ -295,22 +276,18 @@ def apply_index(self, i): return i + (self._offset * self.n) else: # relativedelta with other keywords + kwd = set(kwds) - relativedelta_fast raise NotImplementedError("DateOffset with relativedelta " - "keyword(s) %s not able to be " - "applied vectorized" % - (set(self.kwds) - relativedelta_fast),) + "keyword(s) {kwd} not able to be " + "applied vectorized".format(kwd=kwd)) def isAnchored(self): + # TODO: Does this make sense for the general case? It would help + # if there were a canonical docstring for what isAnchored means. return (self.n == 1) - def copy(self): - return self.__class__(self.n, normalize=self.normalize, **self.kwds) - - def _should_cache(self): - return self.isAnchored() and self._cacheable - def _params(self): - all_paras = dict(list(vars(self).items()) + list(self.kwds.items())) + all_paras = self.__dict__.copy() if 'holidays' in all_paras and not all_paras['holidays']: all_paras.pop('holidays') exclude = ['kwds', 'name', 'normalize', 'calendar'] @@ -320,38 +297,22 @@ def _params(self): params = tuple([str(self.__class__)] + attrs) return params - def __repr__(self): - className = getattr(self, '_outputName', type(self).__name__) + # TODO: Combine this with BusinessMixin version by defining a whitelisted + # set of attributes on each object rather than the existing behavior of + # iterating over internal ``__dict__`` + def _repr_attrs(self): exclude = set(['n', 'inc', 'normalize']) attrs = [] for attr in sorted(self.__dict__): - if ((attr == 'kwds' and len(self.kwds) == 0) or - attr.startswith('_')): + if attr.startswith('_') or attr == 'kwds': continue - elif attr == 'kwds': - kwds_new = {} - for key in self.kwds: - if not hasattr(self, key): - kwds_new[key] = self.kwds[key] - if len(kwds_new) > 0: - attrs.append('='.join((attr, repr(kwds_new)))) - else: - if attr not in exclude: - attrs.append('='.join((attr, repr(getattr(self, attr))))) - - if abs(self.n) != 1: - plural = 's' - else: - plural = '' + elif attr not in exclude: + value = getattr(self, attr) + attrs.append('{attr}={value}'.format(attr=attr, value=value)) - n_str = "" - if self.n != 1: - n_str = "%s * " % self.n - - out = '<%s' % n_str + className + plural + out = '' if attrs: out += ': ' + ', '.join(attrs) - out += '>' return out @property @@ -378,9 +339,6 @@ def __ne__(self, other): def __hash__(self): return hash(self._params()) - def __call__(self, other): - return self.apply(other) - def __add__(self, other): if isinstance(other, (ABCDatetimeIndex, ABCSeries)): return other + self @@ -391,9 +349,6 @@ def __add__(self, other): except ApplyTypeError: return NotImplemented - def __radd__(self, other): - return self.__add__(other) - def __sub__(self, other): if isinstance(other, datetime): raise TypeError('Cannot subtract datetime from offset.') @@ -403,22 +358,6 @@ def __sub__(self, other): else: # pragma: no cover return NotImplemented - def __rsub__(self, other): - if isinstance(other, (ABCDatetimeIndex, ABCSeries)): - return other - self - return self.__class__(-self.n, normalize=self.normalize, - **self.kwds) + other - - def __mul__(self, someInt): - return self.__class__(n=someInt * self.n, normalize=self.normalize, - **self.kwds) - - def __rmul__(self, someInt): - return self.__mul__(someInt) - - def __neg__(self): - return self.__class__(-self.n, normalize=self.normalize, **self.kwds) - def rollback(self, dt): """Roll provided date backward to next offset only if not on offset""" dt = as_timestamp(dt) @@ -447,43 +386,6 @@ def onOffset(self, dt): b = ((dt + self) - self) return a == b - # helpers for vectorized offsets - def _beg_apply_index(self, i, freq): - """Offsets index to beginning of Period frequency""" - - off = i.to_perioddelta('D') - - from pandas.tseries.frequencies import get_freq_code - base, mult = get_freq_code(freq) - base_period = i.to_period(base) - if self.n <= 0: - # when subtracting, dates on start roll to prior - roll = np.where(base_period.to_timestamp() == i - off, - self.n, self.n + 1) - else: - roll = self.n - - base = (base_period + roll).to_timestamp() - return base + off - - def _end_apply_index(self, i, freq): - """Offsets index to end of Period frequency""" - - off = i.to_perioddelta('D') - - from pandas.tseries.frequencies import get_freq_code - base, mult = get_freq_code(freq) - base_period = i.to_period(base) - if self.n > 0: - # when adding, dates on end roll to next - roll = np.where(base_period.to_timestamp(how='end') == i - off, - self.n, self.n - 1) - else: - roll = self.n - - base = (base_period + roll).to_timestamp(how='end') - return base + off - # way to get around weirdness with rule_code @property def _prefix(self): @@ -501,51 +403,90 @@ def freqstr(self): return repr(self) if self.n != 1: - fstr = '%d%s' % (self.n, code) + fstr = '{n}{code}'.format(n=self.n, code=code) else: fstr = code + try: + if self._offset: + fstr += self._offset_str() + except AttributeError: + # TODO: standardize `_offset` vs `offset` naming convention + pass + return fstr + def _offset_str(self): + return '' + @property def nanos(self): - raise ValueError("{0} is a non-fixed frequency".format(self)) + raise ValueError("{name} is a non-fixed frequency".format(name=self)) + + def __setstate__(self, state): + """Reconstruct an instance from a pickled state""" + if 'offset' in state: + # Older (<0.22.0) versions have offset attribute instead of _offset + if '_offset' in state: # pragma: no cover + raise AssertionError('Unexpected key `_offset`') + state['_offset'] = state.pop('offset') + state['kwds']['offset'] = state['_offset'] + + if '_offset' in state and not isinstance(state['_offset'], timedelta): + # relativedelta, we need to populate using its kwds + offset = state['_offset'] + odict = offset.__dict__ + kwds = {key: odict[key] for key in odict if odict[key]} + state.update(kwds) + + self.__dict__ = state + if 'weekmask' in state and 'holidays' in state: + calendar, holidays = _get_calendar(weekmask=self.weekmask, + holidays=self.holidays, + calendar=None) + self.calendar = calendar + self.holidays = holidays class SingleConstructorOffset(DateOffset): - @classmethod def _from_name(cls, suffix=None): # default _from_name calls cls with no args if suffix: - raise ValueError("Bad freq suffix %s" % suffix) + raise ValueError("Bad freq suffix {suffix}".format(suffix=suffix)) return cls() -class BusinessMixin(object): - """ mixin to business types to provide related functions """ +class _CustomMixin(object): + """ + Mixin for classes that define and validate calendar, holidays, + and weekdays attributes + """ + def __init__(self, weekmask, holidays, calendar): + calendar, holidays = _get_calendar(weekmask=weekmask, + holidays=holidays, + calendar=calendar) + # Custom offset instances are identified by the + # following two attributes. See DateOffset._params() + # holidays, weekmask - # TODO: Combine this with DateOffset by defining a whitelisted set of - # attributes on each object rather than the existing behavior of iterating - # over internal ``__dict__`` - def __repr__(self): - className = getattr(self, '_outputName', self.__class__.__name__) + self.weekmask = weekmask + self.holidays = holidays + self.calendar = calendar - if abs(self.n) != 1: - plural = 's' - else: - plural = '' - n_str = "" - if self.n != 1: - n_str = "%s * " % self.n +class BusinessMixin(object): + """ Mixin to business types to provide related functions """ - out = '<%s' % n_str + className + plural + self._repr_attrs() + '>' - return out + @property + def offset(self): + """Alias for self._offset""" + # Alias for backward compat + return self._offset def _repr_attrs(self): if self.offset: - attrs = ['offset=%s' % repr(self.offset)] + attrs = ['offset={offset!r}'.format(offset=self.offset)] else: attrs = None out = '' @@ -568,17 +509,6 @@ def __getstate__(self): return state - def __setstate__(self, state): - """Reconstruct an instance from a pickled state""" - self.__dict__ = state - if 'weekmask' in state and 'holidays' in state: - calendar, holidays = self.get_calendar(weekmask=self.weekmask, - holidays=self.holidays, - calendar=None) - self.kwds['calendar'] = self.calendar = calendar - self.kwds['holidays'] = self.holidays = holidays - self.kwds['weekmask'] = state['weekmask'] - class BusinessDay(BusinessMixin, SingleConstructorOffset): """ @@ -586,29 +516,12 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): """ _prefix = 'B' _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'offset']) - def __init__(self, n=1, normalize=False, **kwds): - self.n = int(n) + def __init__(self, n=1, normalize=False, offset=timedelta(0)): + self.n = self._validate_n(n) self.normalize = normalize - self.kwds = kwds - self.offset = kwds.get('offset', timedelta(0)) - - @property - def freqstr(self): - try: - code = self.rule_code - except NotImplementedError: - return repr(self) - - if self.n != 1: - fstr = '%d%s' % (self.n, code) - else: - fstr = code - - if self.offset: - fstr += self._offset_str() - - return fstr + self._offset = offset def _offset_str(self): def get_str(td): @@ -641,35 +554,35 @@ def get_str(td): else: return '+' + repr(self.offset) - def isAnchored(self): - return (self.n == 1) - @apply_wraps def apply(self, other): if isinstance(other, datetime): n = self.n + wday = other.weekday() - if n == 0 and other.weekday() > 4: - n = 1 - - result = other - - # avoid slowness below - if abs(n) > 5: - k = n // 5 - result = result + timedelta(7 * k) - if n < 0 and result.weekday() > 4: - n += 1 - n -= 5 * k - if n == 0 and result.weekday() > 4: - n -= 1 + # avoid slowness below by operating on weeks first + weeks = n // 5 + if n <= 0 and wday > 4: + # roll forward + n += 1 - while n != 0: - k = n // abs(n) - result = result + timedelta(k) - if result.weekday() < 5: - n -= k + n -= 5 * weeks + + # n is always >= 0 at this point + if n == 0 and wday > 4: + # roll back + days = 4 - wday + elif wday > 4: + # roll forward + days = (7 - wday) + (n - 1) + elif wday + n <= 4: + # shift by n days without leaving the current week + days = n + else: + # shift by n days plus 2 to get past the weekend + days = n + 2 + result = other + timedelta(days=7 * weeks + days) if self.offset: result = result + self.offset return result @@ -702,32 +615,29 @@ def onOffset(self, dt): class BusinessHourMixin(BusinessMixin): - def __init__(self, **kwds): + def __init__(self, start='09:00', end='17:00', offset=timedelta(0)): # must be validated here to equality check - kwds['start'] = self._validate_time(kwds.get('start', '09:00')) - kwds['end'] = self._validate_time(kwds.get('end', '17:00')) - self.kwds = kwds - self.offset = kwds.get('offset', timedelta(0)) - self.start = kwds.get('start', '09:00') - self.end = kwds.get('end', '17:00') - - def _validate_time(self, t_input): - from datetime import time as dt_time - import time - if isinstance(t_input, compat.string_types): - try: - t = time.strptime(t_input, '%H:%M') - return dt_time(hour=t.tm_hour, minute=t.tm_min) - except ValueError: - raise ValueError("time data must match '%H:%M' format") - elif isinstance(t_input, dt_time): - if t_input.second != 0 or t_input.microsecond != 0: - raise ValueError( - "time data must be specified only with hour and minute") - return t_input + self.start = liboffsets._validate_business_time(start) + self.end = liboffsets._validate_business_time(end) + self._offset = offset + + @cache_readonly + def next_bday(self): + """used for moving to next businessday""" + if self.n >= 0: + nb_offset = 1 + else: + nb_offset = -1 + if self._prefix.startswith('C'): + # CustomBusinessHour + return CustomBusinessDay(n=nb_offset, + weekmask=self.weekmask, + holidays=self.holidays, + calendar=self.calendar) else: - raise ValueError("time data must be string or datetime.time") + return BusinessDay(n=nb_offset) + # TODO: Cache this once offsets are immutable def _get_daytime_flag(self): if self.start == self.end: raise ValueError('start and end must not be the same') @@ -769,20 +679,21 @@ def _prev_opening_time(self, other): return datetime(other.year, other.month, other.day, self.start.hour, self.start.minute) + # TODO: cache this once offsets are immutable def _get_business_hours_by_sec(self): """ Return business hours in a day by seconds. """ if self._get_daytime_flag(): - # create dummy datetime to calcurate businesshours in a day + # create dummy datetime to calculate businesshours in a day dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) until = datetime(2014, 4, 1, self.end.hour, self.end.minute) - return tslib.tot_seconds(until - dtstart) + return (until - dtstart).total_seconds() else: self.daytime = False dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) until = datetime(2014, 4, 2, self.end.hour, self.end.minute) - return tslib.tot_seconds(until - dtstart) + return (until - dtstart).total_seconds() @apply_wraps def rollback(self, dt): @@ -809,7 +720,7 @@ def rollforward(self, dt): @apply_wraps def apply(self, other): - # calcurate here because offset is not immutable + # calculate here because offset is not immutable daytime = self._get_daytime_flag() businesshours = self._get_business_hours_by_sec() bhdelta = timedelta(seconds=businesshours) @@ -858,7 +769,7 @@ def apply(self, other): if n >= 0: bday_edge = self._prev_opening_time(other) bday_edge = bday_edge + bhdelta - # calcurate remainder + # calculate remainder bday_remain = result - bday_edge result = self._next_opening_time(other) result += bday_remain @@ -879,6 +790,7 @@ def apply(self, other): return result else: + # TODO: Figure out the end of this sente raise ApplyTypeError( 'Only know how to combine business hour with ') @@ -896,7 +808,7 @@ def onOffset(self, dt): def _onOffset(self, dt, businesshours): """ - Slight speedups using calcurated values + Slight speedups using calculated values """ # if self.normalize and not _is_normalized(dt): # return False @@ -906,7 +818,7 @@ def _onOffset(self, dt, businesshours): op = self._prev_opening_time(dt) else: op = self._next_opening_time(dt) - span = tslib.tot_seconds(dt - op) + span = (dt - op).total_seconds() if span <= businesshours: return True else: @@ -926,35 +838,25 @@ class BusinessHour(BusinessHourMixin, SingleConstructorOffset): """ DateOffset subclass representing possibly n business days - .. versionadded: 0.16.1 + .. versionadded:: 0.16.1 """ _prefix = 'BH' _anchor = 0 + _attributes = frozenset(['n', 'normalize', 'start', 'end', 'offset']) - def __init__(self, n=1, normalize=False, **kwds): - self.n = int(n) + def __init__(self, n=1, normalize=False, start='09:00', + end='17:00', offset=timedelta(0)): + self.n = self._validate_n(n) self.normalize = normalize - super(BusinessHour, self).__init__(**kwds) - - # used for moving to next businessday - if self.n >= 0: - nb_offset = 1 - else: - nb_offset = -1 - self.next_bday = BusinessDay(n=nb_offset) + super(BusinessHour, self).__init__(start=start, end=end, offset=offset) -class CustomBusinessDay(BusinessDay): +class CustomBusinessDay(_CustomMixin, BusinessDay): """ - **EXPERIMENTAL** DateOffset subclass representing possibly n business days + DateOffset subclass representing possibly n custom business days, excluding holidays - .. warning:: EXPERIMENTAL - - This class is not officially supported and the API is likely to change - in future versions. Use this at your own risk. - Parameters ---------- n : int, default 1 @@ -970,53 +872,16 @@ class CustomBusinessDay(BusinessDay): """ _cacheable = False _prefix = 'C' + _attributes = frozenset(['n', 'normalize', + 'weekmask', 'holidays', 'calendar', 'offset']) def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): - self.n = int(n) + holidays=None, calendar=None, offset=timedelta(0)): + self.n = self._validate_n(n) self.normalize = normalize - self.kwds = kwds - self.offset = kwds.get('offset', timedelta(0)) - calendar, holidays = self.get_calendar(weekmask=weekmask, - holidays=holidays, - calendar=calendar) - # CustomBusinessDay instances are identified by the - # following two attributes. See DateOffset._params() - # holidays, weekmask - - self.kwds['weekmask'] = self.weekmask = weekmask - self.kwds['holidays'] = self.holidays = holidays - self.kwds['calendar'] = self.calendar = calendar - - def get_calendar(self, weekmask, holidays, calendar): - """Generate busdaycalendar""" - if isinstance(calendar, np.busdaycalendar): - if not holidays: - holidays = tuple(calendar.holidays) - elif not isinstance(holidays, tuple): - holidays = tuple(holidays) - else: - # trust that calendar.holidays and holidays are - # consistent - pass - return calendar, holidays + self._offset = offset - if holidays is None: - holidays = [] - try: - holidays = holidays + calendar.holidays().tolist() - except AttributeError: - pass - holidays = [self._to_dt64(dt, dtype='datetime64[D]') for dt in - holidays] - holidays = tuple(sorted(holidays)) - - kwargs = {'weekmask': weekmask} - if holidays: - kwargs['holidays'] = holidays - - busdaycalendar = np.busdaycalendar(**kwargs) - return busdaycalendar, holidays + _CustomMixin.__init__(self, weekmask, holidays, calendar) @apply_wraps def apply(self, other): @@ -1049,140 +914,212 @@ def apply(self, other): def apply_index(self, i): raise NotImplementedError - @staticmethod - def _to_dt64(dt, dtype='datetime64'): - # Currently - # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]') - # numpy.datetime64('2013-05-01T02:00:00.000000+0200') - # Thus astype is needed to cast datetime to datetime64[D] - if getattr(dt, 'tzinfo', None) is not None: - i8 = tslib.pydt_to_i8(dt) - dt = tslib.tz_convert_single(i8, 'UTC', dt.tzinfo) - dt = Timestamp(dt) - dt = np.datetime64(dt) - if dt.dtype.name != dtype: - dt = dt.astype(dtype) - return dt - def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False - day64 = self._to_dt64(dt, 'datetime64[D]') + day64 = _to_dt64(dt, 'datetime64[D]') return np.is_busday(day64, busdaycal=self.calendar) -class CustomBusinessHour(BusinessHourMixin, SingleConstructorOffset): +class CustomBusinessHour(_CustomMixin, BusinessHourMixin, + SingleConstructorOffset): """ DateOffset subclass representing possibly n custom business days - .. versionadded: 0.18.1 + .. versionadded:: 0.18.1 """ _prefix = 'CBH' _anchor = 0 + _attributes = frozenset(['n', 'normalize', + 'weekmask', 'holidays', 'calendar', + 'start', 'end', 'offset']) def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): - self.n = int(n) + holidays=None, calendar=None, + start='09:00', end='17:00', offset=timedelta(0)): + self.n = self._validate_n(n) self.normalize = normalize - super(CustomBusinessHour, self).__init__(**kwds) - # used for moving to next businessday - if self.n >= 0: - nb_offset = 1 - else: - nb_offset = -1 - self.next_bday = CustomBusinessDay(n=nb_offset, - weekmask=weekmask, - holidays=holidays, - calendar=calendar) + self._offset = offset + + _CustomMixin.__init__(self, weekmask, holidays, calendar) + BusinessHourMixin.__init__(self, start=start, end=end, offset=offset) - self.kwds['weekmask'] = self.next_bday.weekmask - self.kwds['holidays'] = self.next_bday.holidays - self.kwds['calendar'] = self.next_bday.calendar + +# --------------------------------------------------------------------- +# Month-Based Offset Classes class MonthOffset(SingleConstructorOffset): _adjust_dst = True + _attributes = frozenset(['n', 'normalize']) + + def __init__(self, n=1, normalize=False): + self.n = self._validate_n(n) + self.normalize = normalize @property def name(self): if self.isAnchored: return self.rule_code else: - return "%s-%s" % (self.rule_code, _int_to_month[self.n]) + month = ccalendar.MONTH_ALIASES[self.n] + return "{code}-{month}".format(code=self.rule_code, + month=month) - -class MonthEnd(MonthOffset): - """DateOffset of one month end""" + def onOffset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return dt.day == self._get_offset_day(dt) @apply_wraps def apply(self, other): - n = self.n - _, days_in_month = tslib.monthrange(other.year, other.month) - if other.day != days_in_month: - other = other + relativedelta(months=-1, day=31) - if n <= 0: - n = n + 1 - other = other + relativedelta(months=n, day=31) - return other + compare_day = self._get_offset_day(other) + n = liboffsets.roll_convention(other.day, self.n, compare_day) + return shift_month(other, n, self._day_opt) @apply_index_wraps def apply_index(self, i): - shifted = tslib.shift_months(i.asi8, self.n, 'end') + shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) return i._shallow_copy(shifted) - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - days_in_month = tslib.monthrange(dt.year, dt.month)[1] - return dt.day == days_in_month +class MonthEnd(MonthOffset): + """DateOffset of one month end""" _prefix = 'M' + _day_opt = 'end' class MonthBegin(MonthOffset): """DateOffset of one month at beginning""" + _prefix = 'MS' + _day_opt = 'start' + + +class BusinessMonthEnd(MonthOffset): + """DateOffset increments between business EOM dates""" + _prefix = 'BM' + _day_opt = 'business_end' + + +class BusinessMonthBegin(MonthOffset): + """DateOffset of one business month at beginning""" + _prefix = 'BMS' + _day_opt = 'business_start' + + +class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): + """ + DateOffset subclass representing one custom business month, incrementing + between [BEGIN/END] of month dates + + Parameters + ---------- + n : int, default 1 + offset : timedelta, default timedelta(0) + normalize : bool, default False + Normalize start/end dates to midnight before generating date range + weekmask : str, Default 'Mon Tue Wed Thu Fri' + weekmask of valid business days, passed to ``numpy.busdaycalendar`` + holidays : list + list/array of dates to exclude from the set of valid business days, + passed to ``numpy.busdaycalendar`` + calendar : pd.HolidayCalendar or np.busdaycalendar + """ + _cacheable = False + _attributes = frozenset(['n', 'normalize', + 'weekmask', 'holidays', 'calendar', 'offset']) + + onOffset = DateOffset.onOffset # override MonthOffset method + apply_index = DateOffset.apply_index # override MonthOffset method + + def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', + holidays=None, calendar=None, offset=timedelta(0)): + self.n = self._validate_n(n) + self.normalize = normalize + self._offset = offset + + _CustomMixin.__init__(self, weekmask, holidays, calendar) + + @cache_readonly + def cbday_roll(self): + """Define default roll function to be called in apply method""" + cbday = CustomBusinessDay(n=self.n, normalize=False, **self.kwds) + + if self._prefix.endswith('S'): + # MonthBegin + roll_func = cbday.rollforward + else: + # MonthEnd + roll_func = cbday.rollback + return roll_func + + @cache_readonly + def m_offset(self): + if self._prefix.endswith('S'): + # MonthBegin + moff = MonthBegin(n=1, normalize=False) + else: + # MonthEnd + moff = MonthEnd(n=1, normalize=False) + return moff + + @cache_readonly + def month_roll(self): + """Define default roll function to be called in apply method""" + if self._prefix.endswith('S'): + # MonthBegin + roll_func = self.m_offset.rollback + else: + # MonthEnd + roll_func = self.m_offset.rollforward + return roll_func @apply_wraps def apply(self, other): - n = self.n + # First move to month offset + cur_month_offset_date = self.month_roll(other) - if other.day > 1 and n <= 0: # then roll forward if n<=0 - n += 1 + # Find this custom month offset + compare_date = self.cbday_roll(cur_month_offset_date) + n = liboffsets.roll_convention(other.day, self.n, compare_date.day) - return other + relativedelta(months=n, day=1) + new = cur_month_offset_date + n * self.m_offset + result = self.cbday_roll(new) + return result - @apply_index_wraps - def apply_index(self, i): - shifted = tslib.shift_months(i.asi8, self.n, 'start') - return i._shallow_copy(shifted) - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - return dt.day == 1 +class CustomBusinessMonthEnd(_CustomBusinessMonth): + __doc__ = _CustomBusinessMonth.__doc__.replace('[BEGIN/END]', 'end') + _prefix = 'CBM' - _prefix = 'MS' + +class CustomBusinessMonthBegin(_CustomBusinessMonth): + __doc__ = _CustomBusinessMonth.__doc__.replace('[BEGIN/END]', 'beginning') + _prefix = 'CBMS' +# --------------------------------------------------------------------- +# Semi-Month Based Offset Classes + class SemiMonthOffset(DateOffset): _adjust_dst = True _default_day_of_month = 15 _min_day_of_month = 2 + _attributes = frozenset(['n', 'normalize', 'day_of_month']) - def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, day_of_month=None): if day_of_month is None: self.day_of_month = self._default_day_of_month else: self.day_of_month = int(day_of_month) if not self._min_day_of_month <= self.day_of_month <= 27: - raise ValueError('day_of_month must be ' - '{}<=day_of_month<=27, got {}'.format( - self._min_day_of_month, self.day_of_month)) - self.n = int(n) + msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}' + raise ValueError(msg.format(min=self._min_day_of_month, + day=self.day_of_month)) + + self.n = self._validate_n(n) self.normalize = normalize - self.kwds = kwds - self.kwds['day_of_month'] = self.day_of_month @classmethod def _from_name(cls, suffix=None): @@ -1190,32 +1127,32 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - suffix = '-{}'.format(self.day_of_month) + suffix = '-{day_of_month}'.format(day_of_month=self.day_of_month) return self._prefix + suffix @apply_wraps def apply(self, other): - n = self.n - if not self.onOffset(other): - _, days_in_month = tslib.monthrange(other.year, other.month) - if 1 < other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) - if n > 0: - # rollforward so subtract 1 - n -= 1 - elif self.day_of_month < other.day < days_in_month: - other += relativedelta(day=self.day_of_month) - if n < 0: - # rollforward in the negative direction so add 1 - n += 1 - elif n == 0: - n = 1 + # shift `other` to self.day_of_month, incrementing `n` if necessary + n = liboffsets.roll_convention(other.day, self.n, self.day_of_month) + + days_in_month = tslib.monthrange(other.year, other.month)[1] + + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if type(self) is SemiMonthBegin and (self.n <= 0 and other.day == 1): + n -= 1 + elif type(self) is SemiMonthEnd and (self.n > 0 and + other.day == days_in_month): + n += 1 return self._apply(n, other) def _apply(self, n, other): """Handle specific apply logic for child classes""" - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) @apply_index_wraps def apply_index(self, i): @@ -1249,11 +1186,11 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): The roll array is based on the fact that i gets rolled back to the first day of the month. """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _apply_index_days(self, i, roll): """Apply the correct day for each date in i""" - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) class SemiMonthEnd(SemiMonthOffset): @@ -1279,21 +1216,9 @@ def onOffset(self, dt): return dt.day in (self.day_of_month, days_in_month) def _apply(self, n, other): - # if other.day is not day_of_month move to day_of_month and update n - if other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) - if n > 0: - n -= 1 - elif other.day > self.day_of_month: - other += relativedelta(day=self.day_of_month) - if n == 0: - n = 1 - else: - n += 1 - months = n // 2 day = 31 if n % 2 else self.day_of_month - return other + relativedelta(months=months, day=day) + return shift_month(other, months, day) def _get_roll(self, i, before_day_of_month, after_day_of_month): n = self.n @@ -1311,7 +1236,19 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): - i += (roll % 2) * Timedelta(days=self.day_of_month).value + """Add days portion of offset to DatetimeIndex i + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ + nanos = (roll % 2) * Timedelta(days=self.day_of_month).value + i += nanos.astype('timedelta64[ns]') return i + Timedelta(days=-1) @@ -1336,23 +1273,9 @@ def onOffset(self, dt): return dt.day in (1, self.day_of_month) def _apply(self, n, other): - # if other.day is not day_of_month move to day_of_month and update n - if other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) - if n == 0: - n = -1 - else: - n -= 1 - elif other.day > self.day_of_month: - other += relativedelta(day=self.day_of_month) - if n == 0: - n = 1 - elif n < 0: - n += 1 - months = n // 2 + n % 2 day = 1 if n % 2 else self.day_of_month - return other + relativedelta(months=months, day=day) + return shift_month(other, months, day) def _get_roll(self, i, before_day_of_month, after_day_of_month): n = self.n @@ -1370,193 +1293,23 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): - return i + (roll % 2) * Timedelta(days=self.day_of_month - 1).value + """Add days portion of offset to DatetimeIndex i + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] -class BusinessMonthEnd(MonthOffset): - """DateOffset increments between business EOM dates""" + Returns + ------- + result : DatetimeIndex + """ + nanos = (roll % 2) * Timedelta(days=self.day_of_month - 1).value + return i + nanos.astype('timedelta64[ns]') - def isAnchored(self): - return (self.n == 1) - - @apply_wraps - def apply(self, other): - n = self.n - wkday, days_in_month = tslib.monthrange(other.year, other.month) - lastBDay = days_in_month - max(((wkday + days_in_month - 1) - % 7) - 4, 0) - - if n > 0 and not other.day >= lastBDay: - n = n - 1 - elif n <= 0 and other.day > lastBDay: - n = n + 1 - other = other + relativedelta(months=n, day=31) - - if other.weekday() > 4: - other = other - BDay() - return other - - _prefix = 'BM' - - -class BusinessMonthBegin(MonthOffset): - """DateOffset of one business month at beginning""" - - @apply_wraps - def apply(self, other): - n = self.n - wkday, _ = tslib.monthrange(other.year, other.month) - first = _get_firstbday(wkday) - - if other.day > first and n <= 0: - # as if rolled forward already - n += 1 - elif other.day < first and n > 0: - other = other + timedelta(days=first - other.day) - n -= 1 - - other = other + relativedelta(months=n) - wkday, _ = tslib.monthrange(other.year, other.month) - first = _get_firstbday(wkday) - result = datetime(other.year, other.month, first, - other.hour, other.minute, - other.second, other.microsecond) - return result - - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - first_weekday, _ = tslib.monthrange(dt.year, dt.month) - if first_weekday == 5: - return dt.day == 3 - elif first_weekday == 6: - return dt.day == 2 - else: - return dt.day == 1 - - _prefix = 'BMS' - - -class CustomBusinessMonthEnd(BusinessMixin, MonthOffset): - """ - **EXPERIMENTAL** DateOffset of one custom business month - - .. warning:: EXPERIMENTAL - - This class is not officially supported and the API is likely to change - in future versions. Use this at your own risk. - - Parameters - ---------- - n : int, default 1 - offset : timedelta, default timedelta(0) - normalize : bool, default False - Normalize start/end dates to midnight before generating date range - weekmask : str, Default 'Mon Tue Wed Thu Fri' - weekmask of valid business days, passed to ``numpy.busdaycalendar`` - holidays : list - list/array of dates to exclude from the set of valid business days, - passed to ``numpy.busdaycalendar`` - calendar : pd.HolidayCalendar or np.busdaycalendar - """ - - _cacheable = False - _prefix = 'CBM' - - def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): - self.n = int(n) - self.normalize = normalize - self.kwds = kwds - self.offset = kwds.get('offset', timedelta(0)) - self.cbday = CustomBusinessDay(n=self.n, normalize=normalize, - weekmask=weekmask, holidays=holidays, - calendar=calendar, **kwds) - self.m_offset = MonthEnd(n=1, normalize=normalize, **kwds) - self.kwds['calendar'] = self.cbday.calendar # cache numpy calendar - - @apply_wraps - def apply(self, other): - n = self.n - # First move to month offset - cur_mend = self.m_offset.rollforward(other) - # Find this custom month offset - cur_cmend = self.cbday.rollback(cur_mend) - - # handle zero case. arbitrarily rollforward - if n == 0 and other != cur_cmend: - n += 1 - - if other < cur_cmend and n >= 1: - n -= 1 - elif other > cur_cmend and n <= -1: - n += 1 - - new = cur_mend + n * self.m_offset - result = self.cbday.rollback(new) - return result - - -class CustomBusinessMonthBegin(BusinessMixin, MonthOffset): - """ - **EXPERIMENTAL** DateOffset of one custom business month - - .. warning:: EXPERIMENTAL - - This class is not officially supported and the API is likely to change - in future versions. Use this at your own risk. - - Parameters - ---------- - n : int, default 1 - offset : timedelta, default timedelta(0) - normalize : bool, default False - Normalize start/end dates to midnight before generating date range - weekmask : str, Default 'Mon Tue Wed Thu Fri' - weekmask of valid business days, passed to ``numpy.busdaycalendar`` - holidays : list - list/array of dates to exclude from the set of valid business days, - passed to ``numpy.busdaycalendar`` - calendar : pd.HolidayCalendar or np.busdaycalendar - """ - - _cacheable = False - _prefix = 'CBMS' - - def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): - self.n = int(n) - self.normalize = normalize - self.kwds = kwds - self.offset = kwds.get('offset', timedelta(0)) - self.cbday = CustomBusinessDay(n=self.n, normalize=normalize, - weekmask=weekmask, holidays=holidays, - calendar=calendar, **kwds) - self.m_offset = MonthBegin(n=1, normalize=normalize, **kwds) - self.kwds['calendar'] = self.cbday.calendar # cache numpy calendar - - @apply_wraps - def apply(self, other): - n = self.n - dt_in = other - # First move to month offset - cur_mbegin = self.m_offset.rollback(dt_in) - # Find this custom month offset - cur_cmbegin = self.cbday.rollforward(cur_mbegin) - - # handle zero case. arbitrarily rollforward - if n == 0 and dt_in != cur_cmbegin: - n += 1 - - if dt_in > cur_cmbegin and n <= -1: - n += 1 - elif dt_in < cur_cmbegin and n >= 1: - n -= 1 - - new = cur_mbegin + n * self.m_offset - result = self.cbday.rollforward(new) - return result +# --------------------------------------------------------------------- +# Week-Based Offset Classes class Week(DateOffset): """ @@ -1568,49 +1321,36 @@ class Week(DateOffset): Always generate specific day of week. 0 for Monday """ _adjust_dst = True + _inc = timedelta(weeks=1) + _prefix = 'W' + _attributes = frozenset(['n', 'normalize', 'weekday']) - def __init__(self, n=1, normalize=False, **kwds): - self.n = n + def __init__(self, n=1, normalize=False, weekday=None): + self.n = self._validate_n(n) self.normalize = normalize - self.weekday = kwds.get('weekday', None) + self.weekday = weekday if self.weekday is not None: if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) - - self._inc = timedelta(weeks=1) - self.kwds = kwds + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) def isAnchored(self): return (self.n == 1 and self.weekday is not None) @apply_wraps def apply(self, other): - base = other if self.weekday is None: return other + self.n * self._inc - if self.n > 0: - k = self.n - otherDay = other.weekday() - if otherDay != self.weekday: - other = other + timedelta((self.weekday - otherDay) % 7) - k = k - 1 - other = other - for i in range(k): - other = other + self._inc - else: - k = self.n - otherDay = other.weekday() - if otherDay != self.weekday: - other = other + timedelta((self.weekday - otherDay) % 7) - for i in range(-k): - other = other - self._inc + k = self.n + otherDay = other.weekday() + if otherDay != self.weekday: + other = other + timedelta((self.weekday - otherDay) % 7) + if k > 0: + k -= 1 - other = datetime(other.year, other.month, other.day, - base.hour, base.minute, base.second, base.microsecond) - return other + return other + timedelta(weeks=k) @apply_index_wraps def apply_index(self, i): @@ -1618,20 +1358,48 @@ def apply_index(self, i): return ((i.to_period('W') + self.n).to_timestamp() + i.to_perioddelta('W')) else: - return self._end_apply_index(i, self.freqstr) + return self._end_apply_index(i) + + def _end_apply_index(self, dtindex): + """Add self to the given DatetimeIndex, specialized for case where + self.weekday is non-null. + + Parameters + ---------- + dtindex : DatetimeIndex + + Returns + ------- + result : DatetimeIndex + """ + off = dtindex.to_perioddelta('D') + + base, mult = libfrequencies.get_freq_code(self.freqstr) + base_period = dtindex.to_period(base) + if self.n > 0: + # when adding, dates on end roll to next + normed = dtindex - off + roll = np.where(base_period.to_timestamp(how='end') == normed, + self.n, self.n - 1) + else: + roll = self.n + + base = (base_period + roll).to_timestamp(how='end') + return base + off def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False + elif self.weekday is None: + return True return dt.weekday() == self.weekday - _prefix = 'W' - @property def rule_code(self): suffix = '' if self.weekday is not None: - suffix = '-%s' % (_int_to_weekday[self.weekday]) + weekday = ccalendar.int_to_weekday[self.weekday] + suffix = '-{weekday}'.format(weekday=weekday) return self._prefix + suffix @classmethod @@ -1639,42 +1407,42 @@ def _from_name(cls, suffix=None): if not suffix: weekday = None else: - weekday = _weekday_to_int[suffix] + weekday = ccalendar.weekday_to_int[suffix] return cls(weekday=weekday) -class WeekDay(object): - MON = 0 - TUE = 1 - WED = 2 - THU = 3 - FRI = 4 - SAT = 5 - SUN = 6 +class _WeekOfMonthMixin(object): + """Mixin for methods common to WeekOfMonth and LastWeekOfMonth""" + @apply_wraps + def apply(self, other): + compare_day = self._get_offset_day(other) -_int_to_weekday = { - WeekDay.MON: 'MON', - WeekDay.TUE: 'TUE', - WeekDay.WED: 'WED', - WeekDay.THU: 'THU', - WeekDay.FRI: 'FRI', - WeekDay.SAT: 'SAT', - WeekDay.SUN: 'SUN' -} + months = self.n + if months > 0 and compare_day > other.day: + months -= 1 + elif months <= 0 and compare_day < other.day: + months += 1 -_weekday_to_int = dict((v, k) for k, v in _int_to_weekday.items()) + shifted = shift_month(other, months, 'start') + to_day = self._get_offset_day(shifted) + return shift_day(shifted, to_day - shifted.day) + def onOffset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return dt.day == self._get_offset_day(dt) -class WeekOfMonth(DateOffset): + +class WeekOfMonth(_WeekOfMonthMixin, DateOffset): """ Describes monthly dates like "the Tuesday of the 2nd week of each month" Parameters ---------- n : int - week : {0, 1, 2, 3, ...} + week : {0, 1, 2, 3, ...}, default 0 0 is 1st week of month, 1 2nd week, etc. - weekday : {0, 1, ..., 6} + weekday : {0, 1, ..., 6}, default 0 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1683,94 +1451,72 @@ class WeekOfMonth(DateOffset): 5: Saturdays 6: Sundays """ - + _prefix = 'WOM' _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'week', 'weekday']) - def __init__(self, n=1, normalize=False, **kwds): - self.n = n + def __init__(self, n=1, normalize=False, week=0, weekday=0): + self.n = self._validate_n(n) self.normalize = normalize - self.weekday = kwds['weekday'] - self.week = kwds['week'] + self.weekday = weekday + self.week = week if self.n == 0: raise ValueError('N cannot be 0') if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) if self.week < 0 or self.week > 3: - raise ValueError('Week must be 0<=day<=3, got %d' % - self.week) - - self.kwds = kwds - - @apply_wraps - def apply(self, other): - base = other - offsetOfMonth = self.getOffsetOfMonth(other) + raise ValueError('Week must be 0<=week<=3, got {week}' + .format(week=self.week)) - if offsetOfMonth > other: - if self.n > 0: - months = self.n - 1 - else: - months = self.n - elif offsetOfMonth == other: - months = self.n - else: - if self.n > 0: - months = self.n - else: - months = self.n + 1 - - other = self.getOffsetOfMonth( - other + relativedelta(months=months, day=1)) - other = datetime(other.year, other.month, other.day, base.hour, - base.minute, base.second, base.microsecond) - return other - - def getOffsetOfMonth(self, dt): - w = Week(weekday=self.weekday) - d = datetime(dt.year, dt.month, 1, tzinfo=dt.tzinfo) - d = w.rollforward(d) - - for i in range(self.week): - d = w.apply(d) + def _get_offset_day(self, other): + """ + Find the day in the same month as other that has the same + weekday as self.weekday and is the self.week'th such day in the month. - return d + Parameters + ---------- + other: datetime - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - d = datetime(dt.year, dt.month, dt.day, tzinfo=dt.tzinfo) - return d == self.getOffsetOfMonth(dt) + Returns + ------- + day: int + """ + mstart = datetime(other.year, other.month, 1) + wday = mstart.weekday() + shift_days = (self.weekday - wday) % 7 + return 1 + shift_days + self.week * 7 @property def rule_code(self): - return '%s-%d%s' % (self._prefix, self.week + 1, - _int_to_weekday.get(self.weekday, '')) - - _prefix = 'WOM' + weekday = ccalendar.int_to_weekday.get(self.weekday, '') + return '{prefix}-{week}{weekday}'.format(prefix=self._prefix, + week=self.week + 1, + weekday=weekday) @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + raise ValueError("Prefix {prefix!r} requires a suffix." + .format(prefix=cls._prefix)) # TODO: handle n here... # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) week = int(suffix[0]) - 1 - weekday = _weekday_to_int[suffix[1:]] + weekday = ccalendar.weekday_to_int[suffix[1:]] return cls(week=week, weekday=weekday) -class LastWeekOfMonth(DateOffset): +class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset): """ Describes monthly dates in last week of month like "the last Tuesday of each month" Parameters ---------- - n : int - weekday : {0, 1, ..., 6} + n : int, default 1 + weekday : {0, 1, ..., 6}, default 0 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1778,88 +1524,78 @@ class LastWeekOfMonth(DateOffset): 4: Fridays 5: Saturdays 6: Sundays + """ + _prefix = 'LWOM' + _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'weekday']) - def __init__(self, n=1, normalize=False, **kwds): - self.n = n + def __init__(self, n=1, normalize=False, weekday=0): + self.n = self._validate_n(n) self.normalize = normalize - self.weekday = kwds['weekday'] + self.weekday = weekday if self.n == 0: raise ValueError('N cannot be 0') if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) - - self.kwds = kwds + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) - @apply_wraps - def apply(self, other): - offsetOfMonth = self.getOffsetOfMonth(other) - - if offsetOfMonth > other: - if self.n > 0: - months = self.n - 1 - else: - months = self.n - elif offsetOfMonth == other: - months = self.n - else: - if self.n > 0: - months = self.n - else: - months = self.n + 1 - - return self.getOffsetOfMonth( - other + relativedelta(months=months, day=1)) + def _get_offset_day(self, other): + """ + Find the day in the same month as other that has the same + weekday as self.weekday and is the last such day in the month. - def getOffsetOfMonth(self, dt): - m = MonthEnd() - d = datetime(dt.year, dt.month, 1, dt.hour, dt.minute, - dt.second, dt.microsecond, tzinfo=dt.tzinfo) - eom = m.rollforward(d) - w = Week(weekday=self.weekday) - return w.rollback(eom) + Parameters + ---------- + other: datetime - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - return dt == self.getOffsetOfMonth(dt) + Returns + ------- + day: int + """ + dim = ccalendar.get_days_in_month(other.year, other.month) + mend = datetime(other.year, other.month, dim) + wday = mend.weekday() + shift_days = (wday - self.weekday) % 7 + return dim - shift_days @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_weekday.get(self.weekday, '')) - - _prefix = 'LWOM' + weekday = ccalendar.int_to_weekday.get(self.weekday, '') + return '{prefix}-{weekday}'.format(prefix=self._prefix, + weekday=weekday) @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + raise ValueError("Prefix {prefix!r} requires a suffix." + .format(prefix=cls._prefix)) # TODO: handle n here... - weekday = _weekday_to_int[suffix] + weekday = ccalendar.weekday_to_int[suffix] return cls(weekday=weekday) +# --------------------------------------------------------------------- +# Quarter-Based Offset Classes + class QuarterOffset(DateOffset): """Quarter representation - doesn't call super""" - - #: default month for __init__ _default_startingMonth = None - #: default month in _from_name _from_name_startingMonth = None _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'startingMonth']) # TODO: Consider combining QuarterOffset and YearOffset __init__ at some - # point + # point. Also apply_index, onOffset, rule_code if + # startingMonth vs month attr names are resolved - def __init__(self, n=1, normalize=False, **kwds): - self.n = n + def __init__(self, n=1, normalize=False, startingMonth=None): + self.n = self._validate_n(n) self.normalize = normalize - self.startingMonth = kwds.get('startingMonth', - self._default_startingMonth) - - self.kwds = kwds + if startingMonth is None: + startingMonth = self._default_startingMonth + self.startingMonth = startingMonth def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -1868,7 +1604,7 @@ def isAnchored(self): def _from_name(cls, suffix=None): kwargs = {} if suffix: - kwargs['startingMonth'] = _month_to_int[suffix] + kwargs['startingMonth'] = ccalendar.MONTH_TO_CAL_NUM[suffix] else: if cls._from_name_startingMonth is not None: kwargs['startingMonth'] = cls._from_name_startingMonth @@ -1876,7 +1612,33 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_month[self.startingMonth]) + month = ccalendar.MONTH_ALIASES[self.startingMonth] + return '{prefix}-{month}'.format(prefix=self._prefix, month=month) + + @apply_wraps + def apply(self, other): + # months_since: find the calendar quarter containing other.month, + # e.g. if other.month == 8, the calendar quarter is [Jul, Aug, Sep]. + # Then find the month in that quarter containing an onOffset date for + # self. `months_since` is the number of months to shift other.month + # to get to this on-offset month. + months_since = other.month % 3 - self.startingMonth % 3 + qtrs = liboffsets.roll_qtrday(other, self.n, self.startingMonth, + day_opt=self._day_opt, modby=3) + months = qtrs * 3 - months_since + return shift_month(other, months, self._day_opt) + + def onOffset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + mod_month = (dt.month - self.startingMonth) % 3 + return mod_month == 0 and dt.day == self._get_offset_day(dt) + + @apply_index_wraps + def apply_index(self, dtindex): + shifted = liboffsets.shift_quarters(dtindex.asi8, self.n, + self.startingMonth, self._day_opt) + return dtindex._shallow_copy(shifted) class BQuarterEnd(QuarterOffset): @@ -1887,45 +1649,9 @@ class BQuarterEnd(QuarterOffset): """ _outputName = 'BusinessQuarterEnd' _default_startingMonth = 3 - # 'BQ' _from_name_startingMonth = 12 _prefix = 'BQ' - - @apply_wraps - def apply(self, other): - n = self.n - base = other - other = datetime(other.year, other.month, other.day, - other.hour, other.minute, other.second, - other.microsecond) - - wkday, days_in_month = tslib.monthrange(other.year, other.month) - lastBDay = days_in_month - max(((wkday + days_in_month - 1) - % 7) - 4, 0) - - monthsToGo = 3 - ((other.month - self.startingMonth) % 3) - if monthsToGo == 3: - monthsToGo = 0 - - if n > 0 and not (other.day >= lastBDay and monthsToGo == 0): - n = n - 1 - elif n <= 0 and other.day > lastBDay and monthsToGo == 0: - n = n + 1 - - other = other + relativedelta(months=monthsToGo + 3 * n, day=31) - other = tslib._localize_pydatetime(other, base.tzinfo) - if other.weekday() > 4: - other = other - BDay() - return other - - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - modMonth = (dt.month - self.startingMonth) % 3 - return BMonthEnd().onOffset(dt) and modMonth == 0 - -_int_to_month = tslib._MONTH_ALIASES -_month_to_int = dict((v, k) for k, v in _int_to_month.items()) + _day_opt = 'business_end' # TODO: This is basically the same as BQuarterEnd @@ -1935,34 +1661,7 @@ class BQuarterBegin(QuarterOffset): _default_startingMonth = 3 _from_name_startingMonth = 1 _prefix = 'BQS' - - @apply_wraps - def apply(self, other): - n = self.n - wkday, _ = tslib.monthrange(other.year, other.month) - - first = _get_firstbday(wkday) - - monthsSince = (other.month - self.startingMonth) % 3 - - if n <= 0 and monthsSince != 0: # make sure to roll forward so negate - monthsSince = monthsSince - 3 - - # roll forward if on same month later than first bday - if n <= 0 and (monthsSince == 0 and other.day > first): - n = n + 1 - # pretend to roll back if on same month but before firstbday - elif n > 0 and (monthsSince == 0 and other.day < first): - n = n - 1 - - # get the first bday for result - other = other + relativedelta(months=3 * n - monthsSince) - wkday, _ = tslib.monthrange(other.year, other.month) - first = _get_firstbday(wkday) - result = datetime(other.year, other.month, first, - other.hour, other.minute, other.second, - other.microsecond) - return result + _day_opt = 'business_start' class QuarterEnd(QuarterOffset): @@ -1974,44 +1673,7 @@ class QuarterEnd(QuarterOffset): _outputName = 'QuarterEnd' _default_startingMonth = 3 _prefix = 'Q' - - def __init__(self, n=1, normalize=False, **kwds): - self.n = n - self.normalize = normalize - self.startingMonth = kwds.get('startingMonth', 3) - - self.kwds = kwds - - def isAnchored(self): - return (self.n == 1 and self.startingMonth is not None) - - @apply_wraps - def apply(self, other): - n = self.n - other = datetime(other.year, other.month, other.day, - other.hour, other.minute, other.second, - other.microsecond) - wkday, days_in_month = tslib.monthrange(other.year, other.month) - - monthsToGo = 3 - ((other.month - self.startingMonth) % 3) - if monthsToGo == 3: - monthsToGo = 0 - - if n > 0 and not (other.day >= days_in_month and monthsToGo == 0): - n = n - 1 - - other = other + relativedelta(months=monthsToGo + 3 * n, day=31) - return other - - @apply_index_wraps - def apply_index(self, i): - return self._end_apply_index(i, self.freqstr) - - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - modMonth = (dt.month - self.startingMonth) % 3 - return MonthEnd().onOffset(dt) and modMonth == 0 + _day_opt = 'end' class QuarterBegin(QuarterOffset): @@ -2019,58 +1681,62 @@ class QuarterBegin(QuarterOffset): _default_startingMonth = 3 _from_name_startingMonth = 1 _prefix = 'QS' + _day_opt = 'start' - def isAnchored(self): - return (self.n == 1 and self.startingMonth is not None) - - @apply_wraps - def apply(self, other): - n = self.n - wkday, days_in_month = tslib.monthrange(other.year, other.month) - monthsSince = (other.month - self.startingMonth) % 3 +# --------------------------------------------------------------------- +# Year-Based Offset Classes - if n <= 0 and monthsSince != 0: - # make sure you roll forward, so negate - monthsSince = monthsSince - 3 +class YearOffset(DateOffset): + """DateOffset that just needs a month""" + _adjust_dst = True + _attributes = frozenset(['n', 'normalize', 'month']) - if n <= 0 and (monthsSince == 0 and other.day > 1): - # after start, so come back an extra period as if rolled forward - n = n + 1 + def _get_offset_day(self, other): + # override BaseOffset method to use self.month instead of other.month + # TODO: there may be a more performant way to do this + return liboffsets.get_day_of_month(other.replace(month=self.month), + self._day_opt) - other = other + relativedelta(months=3 * n - monthsSince, day=1) - return other + @apply_wraps + def apply(self, other): + years = roll_yearday(other, self.n, self.month, self._day_opt) + months = years * 12 + (self.month - other.month) + return shift_month(other, months, self._day_opt) @apply_index_wraps - def apply_index(self, i): - freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1 - # freq_month = self.startingMonth - freqstr = 'Q-%s' % (_int_to_month[freq_month],) - return self._beg_apply_index(i, freqstr) + def apply_index(self, dtindex): + shifted = liboffsets.shift_quarters(dtindex.asi8, self.n, + self.month, self._day_opt, + modby=12) + return dtindex._shallow_copy(shifted) + def onOffset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return dt.month == self.month and dt.day == self._get_offset_day(dt) -class YearOffset(DateOffset): - """DateOffset that just needs a month""" - _adjust_dst = True + def __init__(self, n=1, normalize=False, month=None): + self.n = self._validate_n(n) + self.normalize = normalize - def __init__(self, n=1, normalize=False, **kwds): - self.month = kwds.get('month', self._default_month) + month = month if month is not None else self._default_month + self.month = month if self.month < 1 or self.month > 12: raise ValueError('Month must go from 1 to 12') - DateOffset.__init__(self, n=n, normalize=normalize, **kwds) - @classmethod def _from_name(cls, suffix=None): kwargs = {} if suffix: - kwargs['month'] = _month_to_int[suffix] + kwargs['month'] = ccalendar.MONTH_TO_CAL_NUM[suffix] return cls(**kwargs) @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_month[self.month]) + month = ccalendar.MONTH_ALIASES[self.month] + return '{prefix}-{month}'.format(prefix=self._prefix, month=month) class BYearEnd(YearOffset): @@ -2078,35 +1744,7 @@ class BYearEnd(YearOffset): _outputName = 'BusinessYearEnd' _default_month = 12 _prefix = 'BA' - - @apply_wraps - def apply(self, other): - n = self.n - wkday, days_in_month = tslib.monthrange(other.year, self.month) - lastBDay = (days_in_month - - max(((wkday + days_in_month - 1) % 7) - 4, 0)) - - years = n - if n > 0: - if (other.month < self.month or - (other.month == self.month and other.day < lastBDay)): - years -= 1 - elif n <= 0: - if (other.month > self.month or - (other.month == self.month and other.day > lastBDay)): - years += 1 - - other = other + relativedelta(years=years) - - _, days_in_month = tslib.monthrange(other.year, self.month) - result = datetime(other.year, self.month, days_in_month, - other.hour, other.minute, other.second, - other.microsecond) - - if result.weekday() > 4: - result = result - BDay() - - return result + _day_opt = 'business_end' class BYearBegin(YearOffset): @@ -2114,145 +1752,25 @@ class BYearBegin(YearOffset): _outputName = 'BusinessYearBegin' _default_month = 1 _prefix = 'BAS' - - @apply_wraps - def apply(self, other): - n = self.n - wkday, days_in_month = tslib.monthrange(other.year, self.month) - - first = _get_firstbday(wkday) - - years = n - - if n > 0: # roll back first for positive n - if (other.month < self.month or - (other.month == self.month and other.day < first)): - years -= 1 - elif n <= 0: # roll forward - if (other.month > self.month or - (other.month == self.month and other.day > first)): - years += 1 - - # set first bday for result - other = other + relativedelta(years=years) - wkday, days_in_month = tslib.monthrange(other.year, self.month) - first = _get_firstbday(wkday) - return datetime(other.year, self.month, first, other.hour, - other.minute, other.second, other.microsecond) + _day_opt = 'business_start' class YearEnd(YearOffset): """DateOffset increments between calendar year ends""" _default_month = 12 _prefix = 'A' - - @apply_wraps - def apply(self, other): - def _increment(date): - if date.month == self.month: - _, days_in_month = tslib.monthrange(date.year, self.month) - if date.day != days_in_month: - year = date.year - else: - year = date.year + 1 - elif date.month < self.month: - year = date.year - else: - year = date.year + 1 - _, days_in_month = tslib.monthrange(year, self.month) - return datetime(year, self.month, days_in_month, - date.hour, date.minute, date.second, - date.microsecond) - - def _decrement(date): - year = date.year if date.month > self.month else date.year - 1 - _, days_in_month = tslib.monthrange(year, self.month) - return datetime(year, self.month, days_in_month, - date.hour, date.minute, date.second, - date.microsecond) - - def _rollf(date): - if date.month != self.month or\ - date.day < tslib.monthrange(date.year, date.month)[1]: - date = _increment(date) - return date - - n = self.n - result = other - if n > 0: - while n > 0: - result = _increment(result) - n -= 1 - elif n < 0: - while n < 0: - result = _decrement(result) - n += 1 - else: - # n == 0, roll forward - result = _rollf(result) - return result - - @apply_index_wraps - def apply_index(self, i): - # convert month anchor to annual period tuple - return self._end_apply_index(i, self.freqstr) - - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - wkday, days_in_month = tslib.monthrange(dt.year, self.month) - return self.month == dt.month and dt.day == days_in_month + _day_opt = 'end' class YearBegin(YearOffset): """DateOffset increments between calendar year begin dates""" _default_month = 1 _prefix = 'AS' + _day_opt = 'start' - @apply_wraps - def apply(self, other): - def _increment(date, n): - year = date.year + n - 1 - if date.month >= self.month: - year += 1 - return datetime(year, self.month, 1, date.hour, date.minute, - date.second, date.microsecond) - - def _decrement(date, n): - year = date.year + n + 1 - if date.month < self.month or (date.month == self.month and - date.day == 1): - year -= 1 - return datetime(year, self.month, 1, date.hour, date.minute, - date.second, date.microsecond) - - def _rollf(date): - if (date.month != self.month) or date.day > 1: - date = _increment(date, 1) - return date - - n = self.n - result = other - if n > 0: - result = _increment(result, n) - elif n < 0: - result = _decrement(result, n) - else: - # n == 0, roll forward - result = _rollf(result) - return result - - @apply_index_wraps - def apply_index(self, i): - freq_month = 12 if self.month == 1 else self.month - 1 - freqstr = 'A-%s' % (_int_to_month[freq_month],) - return self._beg_apply_index(i, freqstr) - - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - return dt.month == self.month and dt.day == 1 +# --------------------------------------------------------------------- +# Special Offset Classes class FY5253(DateOffset): """ @@ -2266,8 +1784,7 @@ class FY5253(DateOffset): such as retail, manufacturing and parking industry. For more information see: - http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar - + http://en.wikipedia.org/wiki/4-4-5_calendar The year may either: - end on the last X day of the Y month. @@ -2291,39 +1808,30 @@ class FY5253(DateOffset): variation : str {"nearest", "last"} for "LastOfMonth" or "NearestEndMonth" """ - _prefix = 'RE' - _suffix_prefix_last = 'L' - _suffix_prefix_nearest = 'N' _adjust_dst = True + _attributes = frozenset(['weekday', 'startingMonth', 'variation']) - def __init__(self, n=1, normalize=False, **kwds): - self.n = n + def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, + variation="nearest"): + self.n = self._validate_n(n) self.normalize = normalize - self.startingMonth = kwds['startingMonth'] - self.weekday = kwds["weekday"] + self.startingMonth = startingMonth + self.weekday = weekday - self.variation = kwds["variation"] - - self.kwds = kwds + self.variation = variation if self.n == 0: raise ValueError('N cannot be 0') if self.variation not in ["nearest", "last"]: - raise ValueError('%s is not a valid variation' % self.variation) - - if self.variation == "nearest": - weekday_offset = weekday(self.weekday) - self._rd_forward = relativedelta(weekday=weekday_offset) - self._rd_backward = relativedelta(weekday=weekday_offset(-1)) - else: - self._offset_lwom = LastWeekOfMonth(n=1, weekday=self.weekday) + raise ValueError('{variation} is not a valid variation' + .format(variation=self.variation)) def isAnchored(self): - return self.n == 1 \ - and self.startingMonth is not None \ - and self.weekday is not None + return (self.n == 1 and + self.startingMonth is not None and + self.weekday is not None) def onOffset(self, dt): if self.normalize and not _is_normalized(dt): @@ -2333,13 +1841,15 @@ def onOffset(self, dt): if self.variation == "nearest": # We have to check the year end of "this" cal year AND the previous - return year_end == dt or \ - self.get_year_end(dt - relativedelta(months=1)) == dt + return (year_end == dt or + self.get_year_end(shift_month(dt, -1, None)) == dt) else: return year_end == dt @apply_wraps def apply(self, other): + norm = Timestamp(other).normalize() + n = self.n prev_year = self.get_year_end( datetime(other.year - 1, self.startingMonth, 1)) @@ -2347,112 +1857,89 @@ def apply(self, other): datetime(other.year, self.startingMonth, 1)) next_year = self.get_year_end( datetime(other.year + 1, self.startingMonth, 1)) + prev_year = tslib._localize_pydatetime(prev_year, other.tzinfo) cur_year = tslib._localize_pydatetime(cur_year, other.tzinfo) next_year = tslib._localize_pydatetime(next_year, other.tzinfo) - if n > 0: - if other == prev_year: - year = other.year - 1 - elif other == cur_year: - year = other.year - elif other == next_year: - year = other.year + 1 - elif other < prev_year: - year = other.year - 1 - n -= 1 - elif other < cur_year: - year = other.year - n -= 1 - elif other < next_year: - year = other.year + 1 + # Note: next_year.year == other.year + 1, so we will always + # have other < next_year + if norm == prev_year: + n -= 1 + elif norm == cur_year: + pass + elif n > 0: + if norm < prev_year: + n -= 2 + elif prev_year < norm < cur_year: n -= 1 - else: - assert False - - result = self.get_year_end( - datetime(year + n, self.startingMonth, 1)) - - result = datetime(result.year, result.month, result.day, - other.hour, other.minute, other.second, - other.microsecond) - return result + elif cur_year < norm < next_year: + pass else: - n = -n - if other == prev_year: - year = other.year - 1 - elif other == cur_year: - year = other.year - elif other == next_year: - year = other.year + 1 - elif other > next_year: - year = other.year + 1 - n -= 1 - elif other > cur_year: - year = other.year - n -= 1 - elif other > prev_year: - year = other.year - 1 + if cur_year < norm < next_year: + n += 1 + elif prev_year < norm < cur_year: + pass + elif (norm.year == prev_year.year and norm < prev_year and + prev_year - norm <= timedelta(6)): + # GH#14774, error when next_year.year == cur_year.year + # e.g. prev_year == datetime(2004, 1, 3), + # other == datetime(2004, 1, 1) n -= 1 else: assert False - result = self.get_year_end( - datetime(year - n, self.startingMonth, 1)) - - result = datetime(result.year, result.month, result.day, - other.hour, other.minute, other.second, - other.microsecond) - return result + shifted = datetime(other.year + n, self.startingMonth, 1) + result = self.get_year_end(shifted) + result = datetime(result.year, result.month, result.day, + other.hour, other.minute, other.second, + other.microsecond) + return result def get_year_end(self, dt): - if self.variation == "nearest": - return self._get_year_end_nearest(dt) - else: - return self._get_year_end_last(dt) - - def get_target_month_end(self, dt): - target_month = datetime( - dt.year, self.startingMonth, 1, tzinfo=dt.tzinfo) - next_month_first_of = target_month + relativedelta(months=+1) - return next_month_first_of + relativedelta(days=-1) + assert dt.tzinfo is None - def _get_year_end_nearest(self, dt): - target_date = self.get_target_month_end(dt) - if target_date.weekday() == self.weekday: + dim = ccalendar.get_days_in_month(dt.year, self.startingMonth) + target_date = datetime(dt.year, self.startingMonth, dim) + wkday_diff = self.weekday - target_date.weekday() + if wkday_diff == 0: + # year_end is the same for "last" and "nearest" cases return target_date - else: - forward = target_date + self._rd_forward - backward = target_date + self._rd_backward - if forward - target_date < target_date - backward: - return forward - else: - return backward + if self.variation == "last": + days_forward = (wkday_diff % 7) - 7 - def _get_year_end_last(self, dt): - current_year = datetime( - dt.year, self.startingMonth, 1, tzinfo=dt.tzinfo) - return current_year + self._offset_lwom + # days_forward is always negative, so we always end up + # in the same year as dt + return target_date + timedelta(days=days_forward) + else: + # variation == "nearest": + days_forward = wkday_diff % 7 + if days_forward <= 3: + # The upcoming self.weekday is closer than the previous one + return target_date + timedelta(days_forward) + else: + # The previous self.weekday is closer than the upcoming one + return target_date + timedelta(days_forward - 7) @property def rule_code(self): + prefix = self._prefix suffix = self.get_rule_code_suffix() - return "%s-%s" % (self._get_prefix(), suffix) - - def _get_prefix(self): - return self._prefix + return "{prefix}-{suffix}".format(prefix=prefix, suffix=suffix) def _get_suffix_prefix(self): if self.variation == "nearest": - return self._suffix_prefix_nearest + return 'N' else: - return self._suffix_prefix_last + return 'L' def get_rule_code_suffix(self): - return '%s-%s-%s' % (self._get_suffix_prefix(), - _int_to_month[self.startingMonth], - _int_to_weekday[self.weekday]) + prefix = self._get_suffix_prefix() + month = ccalendar.MONTH_ALIASES[self.startingMonth] + weekday = ccalendar.int_to_weekday[self.weekday] + return '{prefix}-{month}-{weekday}'.format(prefix=prefix, month=month, + weekday=weekday) @classmethod def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): @@ -2461,17 +1948,15 @@ def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): elif varion_code == "L": variation = "last" else: - raise ValueError( - "Unable to parse varion_code: %s" % (varion_code,)) + raise ValueError("Unable to parse varion_code: " + "{code}".format(code=varion_code)) - startingMonth = _month_to_int[startingMonth_code] - weekday = _weekday_to_int[weekday_code] + startingMonth = ccalendar.MONTH_TO_CAL_NUM[startingMonth_code] + weekday = ccalendar.weekday_to_int[weekday_code] - return { - "weekday": weekday, - "startingMonth": startingMonth, - "variation": variation, - } + return {"weekday": weekday, + "startingMonth": startingMonth, + "variation": variation} @classmethod def _from_name(cls, *args): @@ -2491,7 +1976,7 @@ class FY5253Quarter(DateOffset): such as retail, manufacturing and parking industry. For more information see: - http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar + http://en.wikipedia.org/wiki/4-4-5_calendar The year may either: - end on the last X day of the Y month. @@ -2524,66 +2009,102 @@ class FY5253Quarter(DateOffset): _prefix = 'REQ' _adjust_dst = True + _attributes = frozenset(['weekday', 'startingMonth', 'qtr_with_extra_week', + 'variation']) - def __init__(self, n=1, normalize=False, **kwds): - self.n = n + def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, + qtr_with_extra_week=1, variation="nearest"): + self.n = self._validate_n(n) self.normalize = normalize - self.qtr_with_extra_week = kwds["qtr_with_extra_week"] - - self.kwds = kwds + self.weekday = weekday + self.startingMonth = startingMonth + self.qtr_with_extra_week = qtr_with_extra_week + self.variation = variation if self.n == 0: raise ValueError('N cannot be 0') - self._offset = FY5253( - startingMonth=kwds['startingMonth'], - weekday=kwds["weekday"], - variation=kwds["variation"]) + @cache_readonly + def _offset(self): + return FY5253(startingMonth=self.startingMonth, + weekday=self.weekday, + variation=self.variation) def isAnchored(self): return self.n == 1 and self._offset.isAnchored() + def _rollback_to_year(self, other): + """roll `other` back to the most recent date that was on a fiscal year + end. Return the date of that year-end, the number of full quarters + elapsed between that year-end and other, and the remaining Timedelta + since the most recent quarter-end. + + Parameters + ---------- + other : datetime or Timestamp + + Returns + ------- + tuple of + prev_year_end : Timestamp giving most recent fiscal year end + num_qtrs : int + tdelta : Timedelta + """ + num_qtrs = 0 + + norm = Timestamp(other).tz_localize(None) + start = self._offset.rollback(norm) + # Note: start <= norm and self._offset.onOffset(start) + + if start < norm: + # roll adjustment + qtr_lens = self.get_weeks(norm) + + # check thet qtr_lens is consistent with self._offset addition + end = shift_day(start, days=7 * sum(qtr_lens)) + assert self._offset.onOffset(end), (start, end, qtr_lens) + + tdelta = norm - start + for qlen in qtr_lens: + if qlen * 7 <= tdelta.days: + num_qtrs += 1 + tdelta -= Timedelta(days=qlen * 7) + else: + break + else: + tdelta = Timedelta(0) + + # Note: we always have tdelta.value >= 0 + return start, num_qtrs, tdelta + @apply_wraps def apply(self, other): - base = other + # Note: self.n == 0 is not allowed. n = self.n - if n > 0: - while n > 0: - if not self._offset.onOffset(other): - qtr_lens = self.get_weeks(other) - start = other - self._offset - else: - start = other - qtr_lens = self.get_weeks(other + self._offset) + prev_year_end, num_qtrs, tdelta = self._rollback_to_year(other) + res = prev_year_end + n += num_qtrs + if self.n <= 0 and tdelta.value > 0: + n += 1 - for weeks in qtr_lens: - start += relativedelta(weeks=weeks) - if start > other: - other = start - n -= 1 - break + # Possible speedup by handling years first. + years = n // 4 + if years: + res += self._offset * years + n -= years * 4 - else: - n = -n - while n > 0: - if not self._offset.onOffset(other): - qtr_lens = self.get_weeks(other) - end = other + self._offset - else: - end = other - qtr_lens = self.get_weeks(other) - - for weeks in reversed(qtr_lens): - end -= relativedelta(weeks=weeks) - if end < other: - other = end - n -= 1 - break - other = datetime(other.year, other.month, other.day, - base.hour, base.minute, base.second, base.microsecond) - return other + # Add an extra day to make *sure* we are getting the quarter lengths + # for the upcoming year, not the previous year + qtr_lens = self.get_weeks(res + Timedelta(days=1)) + + # Note: we always have 0 <= n < 4 + weeks = sum(qtr_lens[:n]) + if weeks: + res = shift_day(res, days=weeks * 7) + + return res def get_weeks(self, dt): ret = [13] * 4 @@ -2596,16 +2117,15 @@ def get_weeks(self, dt): return ret def year_has_extra_week(self, dt): - if self._offset.onOffset(dt): - prev_year_end = dt - self._offset - next_year_end = dt - else: - next_year_end = dt + self._offset - prev_year_end = dt - self._offset - - week_in_year = (next_year_end - prev_year_end).days / 7 + # Avoid round-down errors --> normalize to get + # e.g. '370D' instead of '360D23H' + norm = Timestamp(dt).normalize().tz_localize(None) - return week_in_year == 53 + next_year_end = self._offset.rollforward(norm) + prev_year_end = norm - self._offset + weeks_in_year = (next_year_end - prev_year_end).days / 7 + assert weeks_in_year in [52, 53], weeks_in_year + return weeks_in_year == 53 def onOffset(self, dt): if self.normalize and not _is_normalized(dt): @@ -2618,8 +2138,8 @@ def onOffset(self, dt): qtr_lens = self.get_weeks(dt) current = next_year_end - for qtr_len in qtr_lens[0:4]: - current += relativedelta(weeks=qtr_len) + for qtr_len in qtr_lens: + current = shift_day(current, days=qtr_len * 7) if dt == current: return True return False @@ -2627,8 +2147,9 @@ def onOffset(self, dt): @property def rule_code(self): suffix = self._offset.get_rule_code_suffix() - return "%s-%s" % (self._prefix, - "%s-%d" % (suffix, self.qtr_with_extra_week)) + qtr = self.qtr_with_extra_week + return "{prefix}-{suffix}-{qtr}".format(prefix=self._prefix, + suffix=suffix, qtr=qtr) @classmethod def _from_name(cls, *args): @@ -2644,30 +2165,30 @@ class Easter(DateOffset): 1583-4099. """ _adjust_dst = True + _attributes = frozenset(['n', 'normalize']) - def __init__(self, n=1, **kwds): - super(Easter, self).__init__(n, **kwds) + def __init__(self, n=1, normalize=False): + self.n = self._validate_n(n) + self.normalize = normalize @apply_wraps def apply(self, other): - currentEaster = easter(other.year) - currentEaster = datetime( - currentEaster.year, currentEaster.month, currentEaster.day) - currentEaster = tslib._localize_pydatetime(currentEaster, other.tzinfo) + current_easter = easter(other.year) + current_easter = datetime(current_easter.year, + current_easter.month, current_easter.day) + current_easter = tslib._localize_pydatetime(current_easter, + other.tzinfo) + + n = self.n + if n >= 0 and other < current_easter: + n -= 1 + elif n < 0 and other > current_easter: + n += 1 + # TODO: Why does this handle the 0 case the opposite of others? # NOTE: easter returns a datetime.date so we have to convert to type of # other - if self.n >= 0: - if other >= currentEaster: - new = easter(other.year + self.n) - else: - new = easter(other.year + self.n - 1) - else: - if other > currentEaster: - new = easter(other.year + self.n + 1) - else: - new = easter(other.year + self.n) - + new = easter(other.year + n) new = datetime(new.year, new.month, new.day, other.hour, other.minute, other.second, other.microsecond) return new @@ -2690,6 +2211,13 @@ def f(self, other): class Tick(SingleConstructorOffset): _inc = Timedelta(microseconds=1000) + _prefix = 'undefined' + _attributes = frozenset(['n', 'normalize']) + + def __init__(self, n=1, normalize=False): + # TODO: do Tick classes with normalize=True make sense? + self.n = self._validate_n(n) + self.normalize = normalize __gt__ = _tick_comp(operator.gt) __ge__ = _tick_comp(operator.ge) @@ -2711,8 +2239,8 @@ def __add__(self, other): except ApplyTypeError: return NotImplemented except OverflowError: - raise OverflowError("the add operation between {} and {} " - "will overflow".format(self, other)) + raise OverflowError("the add operation between {self} and {other} " + "will overflow".format(self=self, other=other)) def __eq__(self, other): if isinstance(other, compat.string_types): @@ -2723,7 +2251,8 @@ def __eq__(self, other): if isinstance(other, Tick): return self.delta == other.delta else: - return DateOffset.__eq__(self, other) + # TODO: Are there cases where this should raise TypeError? + return False # This is identical to DateOffset.__hash__, but has to be redefined here # for Python 3, because we've redefined __eq__. @@ -2739,7 +2268,8 @@ def __ne__(self, other): if isinstance(other, Tick): return self.delta != other.delta else: - return DateOffset.__ne__(self, other) + # TODO: Are there cases where this should raise TypeError? + return True @property def delta(self): @@ -2747,8 +2277,9 @@ def delta(self): @property def nanos(self): - return _delta_to_nanoseconds(self.delta) + return delta_to_nanoseconds(self.delta) + # TODO: Should Tick have its own apply_index? def apply(self, other): # Timestamp can handle tz and nano sec, thus no need to use apply_wraps if isinstance(other, Timestamp): @@ -2770,9 +2301,8 @@ def apply(self, other): elif isinstance(other, type(self)): return type(self)(self.n + other.n) - raise ApplyTypeError('Unhandled type: %s' % type(other).__name__) - - _prefix = 'undefined' + raise ApplyTypeError('Unhandled type: {type_str}' + .format(type_str=type(other).__name__)) def isAnchored(self): return False @@ -2791,7 +2321,7 @@ def _delta_to_tick(delta): else: return Second(seconds) else: - nanos = _delta_to_nanoseconds(delta) + nanos = delta_to_nanoseconds(delta) if nanos % 1000000 == 0: return Milli(nanos // 1000000) elif nanos % 1000 == 0: @@ -2799,8 +2329,6 @@ def _delta_to_tick(delta): else: # pragma: no cover return Nano(nanos) -_delta_to_nanoseconds = tslib._delta_to_nanoseconds - class Day(Tick): _inc = Timedelta(days=1) @@ -2844,19 +2372,7 @@ class Nano(Tick): CBMonthBegin = CustomBusinessMonthBegin CDay = CustomBusinessDay - -def _get_firstbday(wkday): - """ - wkday is the result of monthrange(year, month) - - If it's a saturday or sunday, increment first business day to reflect this - """ - first = 1 - if wkday == 5: # on Saturday - first = 3 - elif wkday == 6: # on Sunday - first = 2 - return first +# --------------------------------------------------------------------- def generate_range(start=None, end=None, periods=None, @@ -2919,7 +2435,8 @@ def generate_range(start=None, end=None, periods=None, # faster than cur + offset next_date = offset.apply(cur) if next_date <= cur: - raise ValueError('Offset %s did not increment date' % offset) + raise ValueError('Offset {offset} did not increment date' + .format(offset=offset)) cur = next_date else: while cur >= end: @@ -2928,9 +2445,11 @@ def generate_range(start=None, end=None, periods=None, # faster than cur + offset next_date = offset.apply(cur) if next_date >= cur: - raise ValueError('Offset %s did not decrement date' % offset) + raise ValueError('Offset {offset} did not decrement date' + .format(offset=offset)) cur = next_date + prefix_mapping = dict((offset._prefix, offset) for offset in [ YearBegin, # 'AS' YearEnd, # 'A' @@ -2948,6 +2467,7 @@ def generate_range(start=None, end=None, periods=None, CustomBusinessHour, # 'CBH' MonthEnd, # 'M' MonthBegin, # 'MS' + Nano, # 'N' SemiMonthEnd, # 'SM' SemiMonthBegin, # 'SMS' Week, # 'W' @@ -2963,5 +2483,3 @@ def generate_range(start=None, end=None, periods=None, FY5253, FY5253Quarter, ]) - -prefix_mapping['N'] = Nano diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 89aecf2acc07e..302016907635d 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -1,313 +1,3 @@ -""" -Period formatters and locators adapted from scikits.timeseries by -Pierre GF Gerard-Marchant & Matt Knox -""" +# flake8: noqa -# TODO: Use the fact that axis can have units to simplify the process - -import numpy as np - -from matplotlib import pylab -from pandas.tseries.period import Period -from pandas.tseries.offsets import DateOffset -import pandas.tseries.frequencies as frequencies -from pandas.tseries.index import DatetimeIndex -from pandas.formats.printing import pprint_thing -import pandas.compat as compat - -from pandas.tseries.converter import (TimeSeries_DateLocator, - TimeSeries_DateFormatter) - -# --------------------------------------------------------------------- -# Plotting functions and monkey patches - - -def tsplot(series, plotf, ax=None, **kwargs): - """ - Plots a Series on the given Matplotlib axes or the current axes - - Parameters - ---------- - axes : Axes - series : Series - - Notes - _____ - Supports same kwargs as Axes.plot - - """ - # Used inferred freq is possible, need a test case for inferred - if ax is None: - import matplotlib.pyplot as plt - ax = plt.gca() - - freq, series = _maybe_resample(series, ax, kwargs) - - # Set ax with freq info - _decorate_axes(ax, freq, kwargs) - ax._plot_data.append((series, plotf, kwargs)) - lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) - - # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq) - return lines - - -def _maybe_resample(series, ax, kwargs): - # resample against axes freq if necessary - freq, ax_freq = _get_freq(ax, series) - - if freq is None: # pragma: no cover - raise ValueError('Cannot use dynamic axis without frequency info') - - # Convert DatetimeIndex to PeriodIndex - if isinstance(series.index, DatetimeIndex): - series = series.to_period(freq=freq) - - if ax_freq is not None and freq != ax_freq: - if frequencies.is_superperiod(freq, ax_freq): # upsample input - series = series.copy() - series.index = series.index.asfreq(ax_freq, how='s') - freq = ax_freq - elif _is_sup(freq, ax_freq): # one is weekly - how = kwargs.pop('how', 'last') - series = getattr(series.resample('D'), how)().dropna() - series = getattr(series.resample(ax_freq), how)().dropna() - freq = ax_freq - elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): - _upsample_others(ax, freq, kwargs) - ax_freq = freq - else: # pragma: no cover - raise ValueError('Incompatible frequency conversion') - return freq, series - - -def _is_sub(f1, f2): - return ((f1.startswith('W') and frequencies.is_subperiod('D', f2)) or - (f2.startswith('W') and frequencies.is_subperiod(f1, 'D'))) - - -def _is_sup(f1, f2): - return ((f1.startswith('W') and frequencies.is_superperiod('D', f2)) or - (f2.startswith('W') and frequencies.is_superperiod(f1, 'D'))) - - -def _upsample_others(ax, freq, kwargs): - legend = ax.get_legend() - lines, labels = _replot_ax(ax, freq, kwargs) - _replot_ax(ax, freq, kwargs) - - other_ax = None - if hasattr(ax, 'left_ax'): - other_ax = ax.left_ax - if hasattr(ax, 'right_ax'): - other_ax = ax.right_ax - - if other_ax is not None: - rlines, rlabels = _replot_ax(other_ax, freq, kwargs) - lines.extend(rlines) - labels.extend(rlabels) - - if (legend is not None and kwargs.get('legend', True) and - len(lines) > 0): - title = legend.get_title().get_text() - if title == 'None': - title = None - ax.legend(lines, labels, loc='best', title=title) - - -def _replot_ax(ax, freq, kwargs): - data = getattr(ax, '_plot_data', None) - - # clear current axes and data - ax._plot_data = [] - ax.clear() - - _decorate_axes(ax, freq, kwargs) - - lines = [] - labels = [] - if data is not None: - for series, plotf, kwds in data: - series = series.copy() - idx = series.index.asfreq(freq, how='S') - series.index = idx - ax._plot_data.append((series, plotf, kwds)) - - # for tsplot - if isinstance(plotf, compat.string_types): - from pandas.tools.plotting import _plot_klass - plotf = _plot_klass[plotf]._plot - - lines.append(plotf(ax, series.index._mpl_repr(), - series.values, **kwds)[0]) - labels.append(pprint_thing(series.name)) - - return lines, labels - - -def _decorate_axes(ax, freq, kwargs): - """Initialize axes for time-series plotting""" - if not hasattr(ax, '_plot_data'): - ax._plot_data = [] - - ax.freq = freq - xaxis = ax.get_xaxis() - xaxis.freq = freq - if not hasattr(ax, 'legendlabels'): - ax.legendlabels = [kwargs.get('label', None)] - else: - ax.legendlabels.append(kwargs.get('label', None)) - ax.view_interval = None - ax.date_axis_info = None - - -def _get_ax_freq(ax): - """ - Get the freq attribute of the ax object if set. - Also checks shared axes (eg when using secondary yaxis, sharex=True - or twinx) - """ - ax_freq = getattr(ax, 'freq', None) - if ax_freq is None: - # check for left/right ax in case of secondary yaxis - if hasattr(ax, 'left_ax'): - ax_freq = getattr(ax.left_ax, 'freq', None) - elif hasattr(ax, 'right_ax'): - ax_freq = getattr(ax.right_ax, 'freq', None) - if ax_freq is None: - # check if a shared ax (sharex/twinx) has already freq set - shared_axes = ax.get_shared_x_axes().get_siblings(ax) - if len(shared_axes) > 1: - for shared_ax in shared_axes: - ax_freq = getattr(shared_ax, 'freq', None) - if ax_freq is not None: - break - return ax_freq - - -def _get_freq(ax, series): - # get frequency from data - freq = getattr(series.index, 'freq', None) - if freq is None: - freq = getattr(series.index, 'inferred_freq', None) - - ax_freq = _get_ax_freq(ax) - - # use axes freq if no data freq - if freq is None: - freq = ax_freq - - # get the period frequency - if isinstance(freq, DateOffset): - freq = freq.rule_code - else: - freq = frequencies.get_base_alias(freq) - - freq = frequencies.get_period_alias(freq) - return freq, ax_freq - - -def _use_dynamic_x(ax, data): - freq = _get_index_freq(data) - ax_freq = _get_ax_freq(ax) - - if freq is None: # convert irregular if axes has freq info - freq = ax_freq - else: # do not use tsplot if irregular was plotted first - if (ax_freq is None) and (len(ax.get_lines()) > 0): - return False - - if freq is None: - return False - - if isinstance(freq, DateOffset): - freq = freq.rule_code - else: - freq = frequencies.get_base_alias(freq) - freq = frequencies.get_period_alias(freq) - - if freq is None: - return False - - # hack this for 0.10.1, creating more technical debt...sigh - if isinstance(data.index, DatetimeIndex): - base = frequencies.get_freq(freq) - x = data.index - if (base <= frequencies.FreqGroup.FR_DAY): - return x[:1].is_normalized - return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] - return True - - -def _get_index_freq(data): - freq = getattr(data.index, 'freq', None) - if freq is None: - freq = getattr(data.index, 'inferred_freq', None) - if freq == 'B': - weekdays = np.unique(data.index.dayofweek) - if (5 in weekdays) or (6 in weekdays): - freq = None - return freq - - -def _maybe_convert_index(ax, data): - # tsplot converts automatically, but don't want to convert index - # over and over for DataFrames - if isinstance(data.index, DatetimeIndex): - freq = getattr(data.index, 'freq', None) - - if freq is None: - freq = getattr(data.index, 'inferred_freq', None) - if isinstance(freq, DateOffset): - freq = freq.rule_code - - if freq is None: - freq = _get_ax_freq(ax) - - if freq is None: - raise ValueError('Could not get frequency alias for plotting') - - freq = frequencies.get_base_alias(freq) - freq = frequencies.get_period_alias(freq) - - data = data.to_period(freq=freq) - return data - - -# Patch methods for subplot. Only format_dateaxis is currently used. -# Do we need the rest for convenience? - - -def format_dateaxis(subplot, freq): - """ - Pretty-formats the date axis (x-axis). - - Major and minor ticks are automatically set for the frequency of the - current underlying series. As the dynamic mode is activated by - default, changing the limits of the x axis will intelligently change - the positions of the ticks. - """ - majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_locator(majlocator) - subplot.xaxis.set_minor_locator(minlocator) - - majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_formatter(majformatter) - subplot.xaxis.set_minor_formatter(minformatter) - - # x and y coord info - subplot.format_coord = lambda t, y: ( - "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) - - pylab.draw_if_interactive() +from pandas.plotting._timeseries import tsplot diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py deleted file mode 100644 index dc460dee8415b..0000000000000 --- a/pandas/tseries/util.py +++ /dev/null @@ -1,104 +0,0 @@ -import warnings - -from pandas.compat import lrange -import numpy as np -from pandas.types.common import _ensure_platform_int -from pandas.core.frame import DataFrame -import pandas.core.algorithms as algorithms - - -def pivot_annual(series, freq=None): - """ - Deprecated. Use ``pivot_table`` instead. - - Group a series by years, taking leap years into account. - - The output has as many rows as distinct years in the original series, - and as many columns as the length of a leap year in the units corresponding - to the original frequency (366 for daily frequency, 366*24 for hourly...). - The fist column of the output corresponds to Jan. 1st, 00:00:00, - while the last column corresponds to Dec, 31st, 23:59:59. - Entries corresponding to Feb. 29th are masked for non-leap years. - - For example, if the initial series has a daily frequency, the 59th column - of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st, - and the 60th column is masked for non-leap years. - With a hourly initial frequency, the (59*24)th column of the output always - correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and - the 24 columns between (59*24) and (61*24) are masked. - - If the original frequency is less than daily, the output is equivalent to - ``series.convert('A', func=None)``. - - Parameters - ---------- - series : Series - freq : string or None, default None - - Returns - ------- - annual : DataFrame - """ - - msg = "pivot_annual is deprecated. Use pivot_table instead" - warnings.warn(msg, FutureWarning) - - index = series.index - year = index.year - years = algorithms.unique1d(year) - - if freq is not None: - freq = freq.upper() - else: - freq = series.index.freq - - if freq == 'D': - width = 366 - offset = index.dayofyear - 1 - - # adjust for leap year - offset[(~isleapyear(year)) & (offset >= 59)] += 1 - - columns = lrange(1, 367) - # todo: strings like 1/1, 1/25, etc.? - elif freq in ('M', 'BM'): - width = 12 - offset = index.month - 1 - columns = lrange(1, 13) - elif freq == 'H': - width = 8784 - grouped = series.groupby(series.index.year) - defaulted = grouped.apply(lambda x: x.reset_index(drop=True)) - defaulted.index = defaulted.index.droplevel(0) - offset = np.asarray(defaulted.index) - offset[~isleapyear(year) & (offset >= 1416)] += 24 - columns = lrange(1, 8785) - else: - raise NotImplementedError(freq) - - flat_index = (year - years.min()) * width + offset - flat_index = _ensure_platform_int(flat_index) - - values = np.empty((len(years), width)) - values.fill(np.nan) - values.put(flat_index, series.values) - - return DataFrame(values, index=years, columns=columns) - - -def isleapyear(year): - """ - Returns true if year is a leap year. - - Parameters - ---------- - year : integer / sequence - A given (list of) year(s). - """ - - msg = "isleapyear is deprecated. Use .is_leap_year property instead" - warnings.warn(msg, FutureWarning) - - year = np.asarray(year) - return np.logical_or(year % 400 == 0, - np.logical_and(year % 4 == 0, year % 100 > 0)) diff --git a/pandas/tslib.pxd b/pandas/tslib.pxd deleted file mode 100644 index aa8cbcb2cedc7..0000000000000 --- a/pandas/tslib.pxd +++ /dev/null @@ -1,10 +0,0 @@ -from numpy cimport ndarray, int64_t - -cdef convert_to_tsobject(object, object, object, bint, bint) -cpdef convert_to_timedelta64(object, object) -cpdef object maybe_get_tz(object) -cdef bint _is_utc(object) -cdef bint _is_tzlocal(object) -cdef object _get_dst_info(object) -cdef bint _nat_scalar_rules[6] -cdef bint _check_all_nulls(obj) diff --git a/pandas/tslib.py b/pandas/tslib.py new file mode 100644 index 0000000000000..c06b34c1b0483 --- /dev/null +++ b/pandas/tslib.py @@ -0,0 +1,7 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.tslib module is deprecated and will be " + "removed in a future version.", FutureWarning, stacklevel=2) +from pandas._libs.tslib import Timestamp, Timedelta, OutOfBoundsDatetime +from pandas._libs.tslibs.nattype import NaT, NaTType diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx deleted file mode 100644 index fc6e689a35d81..0000000000000 --- a/pandas/tslib.pyx +++ /dev/null @@ -1,5615 +0,0 @@ -# cython: profile=False - -import warnings - -cimport numpy as np -from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, - NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) -from datetime cimport get_datetime64_value, get_timedelta64_value -import numpy as np - -import sys -cdef bint PY3 = (sys.version_info[0] >= 3) - -from cpython cimport ( - PyTypeObject, - PyFloat_Check, - PyLong_Check, - PyObject_RichCompareBool, - PyObject_RichCompare, - Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE, - PyUnicode_Check, - PyUnicode_AsUTF8String, -) - -cdef extern from "Python.h": - cdef PyTypeObject *Py_TYPE(object) - -cdef extern from "datetime_helper.h": - double total_seconds(object) - -# this is our datetime.pxd -from datetime cimport cmp_pandas_datetimestruct -from libc.stdlib cimport free - -from util cimport (is_integer_object, is_float_object, is_datetime64_object, - is_timedelta64_object, INT64_MAX) -cimport util - -from datetime cimport * -from khash cimport * -cimport cython - -from datetime import timedelta, datetime -from datetime import time as datetime_time - -import re - -# dateutil compat -from dateutil.tz import (tzoffset, tzlocal as _dateutil_tzlocal, - tzfile as _dateutil_tzfile, - tzutc as _dateutil_tzutc, - tzstr as _dateutil_tzstr) - -from pandas.compat import is_platform_windows -if is_platform_windows(): - from dateutil.zoneinfo import gettz as _dateutil_gettz -else: - from dateutil.tz import gettz as _dateutil_gettz -from dateutil.relativedelta import relativedelta -from dateutil.parser import DEFAULTPARSER - -from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo -from pandas.compat import (parse_date, string_types, iteritems, - StringIO, callable) - -import operator -import collections -import warnings - -# initialize numpy -import_array() -#import_ufunc() - -# import datetime C API -PyDateTime_IMPORT - -# in numpy 1.7, will prob need the following: -# numpy_pydatetime_import - -cdef int64_t NPY_NAT = util.get_nat() -iNaT = NPY_NAT - -# < numpy 1.7 compat for NaT -compat_NaT = np.array([NPY_NAT]).astype('m8[ns]').item() - - -try: - basestring -except NameError: # py3 - basestring = str - - -cdef inline object create_timestamp_from_ts( - int64_t value, pandas_datetimestruct dts, - object tz, object freq): - """ convenience routine to construct a Timestamp from its parts """ - cdef _Timestamp ts_base - ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, - dts.day, dts.hour, dts.min, - dts.sec, dts.us, tz) - ts_base.value = value - ts_base.freq = freq - ts_base.nanosecond = dts.ps / 1000 - - return ts_base - - -cdef inline object create_datetime_from_ts( - int64_t value, pandas_datetimestruct dts, - object tz, object freq): - """ convenience routine to construct a datetime.datetime from its parts """ - return datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - - -def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): - # convert an i8 repr to an ndarray of datetimes or Timestamp (if box == - # True) - - cdef: - Py_ssize_t i, n = len(arr) - pandas_datetimestruct dts - object dt - int64_t value - ndarray[object] result = np.empty(n, dtype=object) - object (*func_create)(int64_t, pandas_datetimestruct, object, object) - - if box and util.is_string_object(freq): - from pandas.tseries.frequencies import to_offset - freq = to_offset(freq) - - if box: - func_create = create_timestamp_from_ts - else: - func_create = create_datetime_from_ts - - if tz is not None: - if _is_utc(tz): - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - pandas_datetime_to_datetimestruct( - value, PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, tz, freq) - elif _is_tzlocal(tz) or _is_fixed_offset(tz): - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - pandas_datetime_to_datetimestruct( - value, PANDAS_FR_ns, &dts) - dt = create_datetime_from_ts(value, dts, tz, freq) - dt = dt + tz.utcoffset(dt) - if box: - dt = Timestamp(dt) - result[i] = dt - else: - trans, deltas, typ = _get_dst_info(tz) - - for i in range(n): - - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - - # Adjust datetime64 timestamp, recompute datetimestruct - pos = trans.searchsorted(value, side='right') - 1 - if _treat_tz_as_pytz(tz): - # find right representation of dst etc in pytz timezone - new_tz = tz._tzinfos[tz._transition_info[pos]] - else: - # no zone-name change for dateutil tzs - dst etc - # represented in single object. - new_tz = tz - - pandas_datetime_to_datetimestruct( - value + deltas[pos], PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, new_tz, freq) - else: - for i in range(n): - - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, None, freq) - - return result - - -def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): - # convert an i8 repr to an ndarray of timedelta or Timedelta (if box == - # True) - - cdef: - Py_ssize_t i, n = len(arr) - int64_t value - ndarray[object] result = np.empty(n, dtype=object) - - for i in range(n): - - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - if box: - result[i] = Timedelta(value) - else: - result[i] = timedelta(microseconds=int(value) / 1000) - - return result - - -cdef inline bint _is_tzlocal(object tz): - return isinstance(tz, _dateutil_tzlocal) - - -cdef inline bint _is_fixed_offset(object tz): - if _treat_tz_as_dateutil(tz): - if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: - return 1 - else: - return 0 - elif _treat_tz_as_pytz(tz): - if (len(tz._transition_info) == 0 - and len(tz._utc_transition_times) == 0): - return 1 - else: - return 0 - return 1 - -_zero_time = datetime_time(0, 0) -_no_input = object() - -# Python front end to C extension type _Timestamp -# This serves as the box for datetime64 - - -class Timestamp(_Timestamp): - """TimeStamp is the pandas equivalent of python's Datetime - and is interchangable with it in most cases. It's the type used - for the entries that make up a DatetimeIndex, and other timeseries - oriented data structures in pandas. - - There are essentially three calling conventions for the constructor. The - primary form accepts four parameters. They can be passed by position or - keyword. - - Parameters - ---------- - ts_input : datetime-like, str, int, float - Value to be converted to Timestamp - freq : str, DateOffset - Offset which Timestamp will have - tz : string, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will have. - unit : string - numpy unit used for conversion, if ts_input is int or float - offset : str, DateOffset - Deprecated, use freq - - The other two forms mimic the parameters from ``datetime.datetime``. They - can be passed by either position or keyword, but not both mixed together. - - :func:`datetime.datetime` Parameters - ------------------------------------ - - .. versionadded:: 0.19.0 - - year : int - month : int - day : int - hour : int, optional, default is 0 - minute : int, optional, default is 0 - second : int, optional, default is 0 - microsecond : int, optional, default is 0 - tzinfo : datetime.tzinfo, optional, default is None - """ - - @classmethod - def fromordinal(cls, ordinal, freq=None, tz=None, offset=None): - """ - passed an ordinal, translate and convert to a ts - note: by definition there cannot be any tz info on the ordinal itself - - Parameters - ---------- - ordinal : int - date corresponding to a proleptic Gregorian ordinal - freq : str, DateOffset - Offset which Timestamp will have - tz : string, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will have. - offset : str, DateOffset - Deprecated, use freq - """ - return cls(datetime.fromordinal(ordinal), - freq=freq, tz=tz, offset=offset) - - @classmethod - def now(cls, tz=None): - """ - Return the current time in the local timezone. Equivalent - to datetime.now([tz]) - - Parameters - ---------- - tz : string / timezone object, default None - Timezone to localize to - """ - if isinstance(tz, basestring): - tz = maybe_get_tz(tz) - return cls(datetime.now(tz)) - - @classmethod - def today(cls, tz=None): - """ - Return the current time in the local timezone. This differs - from datetime.today() in that it can be localized to a - passed timezone. - - Parameters - ---------- - tz : string / timezone object, default None - Timezone to localize to - """ - return cls.now(tz) - - @classmethod - def utcnow(cls): - return cls.now('UTC') - - @classmethod - def utcfromtimestamp(cls, ts): - return cls(datetime.utcfromtimestamp(ts)) - - @classmethod - def fromtimestamp(cls, ts): - return cls(datetime.fromtimestamp(ts)) - - @classmethod - def combine(cls, date, time): - return cls(datetime.combine(date, time)) - - def __new__(cls, object ts_input=_no_input, - object freq=None, tz=None, unit=None, - year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - tzinfo=None, - object offset=None): - # The parameter list folds together legacy parameter names (the first - # four) and positional and keyword parameter names from pydatetime. - # - # There are three calling forms: - # - # - In the legacy form, the first parameter, ts_input, is required - # and may be datetime-like, str, int, or float. The second - # parameter, offset, is optional and may be str or DateOffset. - # - # - ints in the first, second, and third arguments indicate - # pydatetime positional arguments. Only the first 8 arguments - # (standing in for year, month, day, hour, minute, second, - # microsecond, tzinfo) may be non-None. As a shortcut, we just - # check that the second argument is an int. - # - # - Nones for the first four (legacy) arguments indicate pydatetime - # keyword arguments. year, month, and day are required. As a - # shortcut, we just check that the first argument was not passed. - # - # Mixing pydatetime positional and keyword arguments is forbidden! - - cdef _TSObject ts - - if offset is not None: - # deprecate offset kwd in 0.19.0, GH13593 - if freq is not None: - msg = "Can only specify freq or offset, not both" - raise TypeError(msg) - warnings.warn("offset is deprecated. Use freq instead", - FutureWarning) - freq = offset - - if ts_input is _no_input: - # User passed keyword arguments. - return Timestamp(datetime(year, month, day, hour or 0, - minute or 0, second or 0, - microsecond or 0, tzinfo), - tz=tzinfo) - elif is_integer_object(freq): - # User passed positional arguments: - # Timestamp(year, month, day[, hour[, minute[, second[, - # microsecond[, tzinfo]]]]]) - return Timestamp(datetime(ts_input, freq, tz, unit or 0, - year or 0, month or 0, day or 0, - hour), tz=hour) - - ts = convert_to_tsobject(ts_input, tz, unit, 0, 0) - - if ts.value == NPY_NAT: - return NaT - - if util.is_string_object(freq): - from pandas.tseries.frequencies import to_offset - freq = to_offset(freq) - - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) - - def _round(self, freq, rounder): - - cdef int64_t unit - cdef object result, value - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos - if self.tz is not None: - value = self.tz_localize(None).value - else: - value = self.value - result = Timestamp(unit * rounder(value / float(unit)), unit='ns') - if self.tz is not None: - result = result.tz_localize(self.tz) - return result - - def round(self, freq): - """ - Round the Timestamp to the specified resolution - - Returns - ------- - a new Timestamp rounded to the given resolution of `freq` - - Parameters - ---------- - freq : a freq string indicating the rounding resolution - - Raises - ------ - ValueError if the freq cannot be converted - """ - return self._round(freq, np.round) - - def floor(self, freq): - """ - return a new Timestamp floored to this resolution - - Parameters - ---------- - freq : a freq string indicating the flooring resolution - """ - return self._round(freq, np.floor) - - def ceil(self, freq): - """ - return a new Timestamp ceiled to this resolution - - Parameters - ---------- - freq : a freq string indicating the ceiling resolution - """ - return self._round(freq, np.ceil) - - @property - def tz(self): - """ - Alias for tzinfo - """ - return self.tzinfo - - @property - def offset(self): - warnings.warn(".offset is deprecated. Use .freq instead", - FutureWarning) - return self.freq - - def __setstate__(self, state): - self.value = state[0] - self.freq = state[1] - self.tzinfo = state[2] - - def __reduce__(self): - object_state = self.value, self.freq, self.tzinfo - return (Timestamp, object_state) - - def to_period(self, freq=None): - """ - Return an period of which this timestamp is an observation. - """ - from pandas.tseries.period import Period - - if freq is None: - freq = self.freq - - return Period(self, freq=freq) - - @property - def dayofweek(self): - return self.weekday() - - @property - def weekday_name(self): - out = get_date_name_field( - np.array([self.value], dtype=np.int64), 'weekday_name') - return out[0] - - @property - def dayofyear(self): - return self._get_field('doy') - - @property - def week(self): - return self._get_field('woy') - - weekofyear = week - - @property - def microsecond(self): - return self._get_field('us') - - @property - def quarter(self): - return self._get_field('q') - - @property - def days_in_month(self): - return self._get_field('dim') - - daysinmonth = days_in_month - - @property - def freqstr(self): - return getattr(self.freq, 'freqstr', self.freq) - - @property - def is_month_start(self): - return self._get_start_end_field('is_month_start') - - @property - def is_month_end(self): - return self._get_start_end_field('is_month_end') - - @property - def is_quarter_start(self): - return self._get_start_end_field('is_quarter_start') - - @property - def is_quarter_end(self): - return self._get_start_end_field('is_quarter_end') - - @property - def is_year_start(self): - return self._get_start_end_field('is_year_start') - - @property - def is_year_end(self): - return self._get_start_end_field('is_year_end') - - @property - def is_leap_year(self): - return bool(is_leapyear(self.year)) - - def tz_localize(self, tz, ambiguous='raise', errors='raise'): - """ - Convert naive Timestamp to local time zone, or remove - timezone from tz-aware Timestamp. - - Parameters - ---------- - tz : string, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will be converted to. - None will remove timezone holding local time. - ambiguous : bool, 'NaT', default 'raise' - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time - errors : 'raise', 'coerce', default 'raise' - - 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified timezone (e.g. due to a transition from - or to DST time) - - 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone - - .. versionadded:: 0.19.0 - - Returns - ------- - localized : Timestamp - - Raises - ------ - TypeError - If the Timestamp is tz-aware and tz is not None. - """ - if ambiguous == 'infer': - raise ValueError('Cannot infer offset with only one time.') - - if self.tzinfo is None: - # tz naive, localize - tz = maybe_get_tz(tz) - if not isinstance(ambiguous, basestring): - ambiguous = [ambiguous] - value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, - ambiguous=ambiguous, errors=errors)[0] - return Timestamp(value, tz=tz) - else: - if tz is None: - # reset tz - value = tz_convert_single(self.value, 'UTC', self.tz) - return Timestamp(value, tz=None) - else: - raise TypeError('Cannot localize tz-aware Timestamp, use ' - 'tz_convert for conversions') - - def tz_convert(self, tz): - """ - Convert tz-aware Timestamp to another time zone. - - Parameters - ---------- - tz : string, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will be converted to. - None will remove timezone holding UTC time. - - Returns - ------- - converted : Timestamp - - Raises - ------ - TypeError - If Timestamp is tz-naive. - """ - if self.tzinfo is None: - # tz naive, use tz_localize - raise TypeError('Cannot convert tz-naive Timestamp, use ' - 'tz_localize to localize') - else: - # Same UTC timestamp, different time zone - return Timestamp(self.value, tz=tz) - - astimezone = tz_convert - - def replace(self, year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - nanosecond=None, tzinfo=object, fold=0): - """ - implements datetime.replace, handles nanoseconds - - Parameters - ---------- - year : int, optional - month : int, optional - day : int, optional - hour : int, optional - minute : int, optional - second : int, optional - microsecond : int, optional - nanosecond: int, optional - tzinfo : tz-convertible, optional - fold : int, optional, default is 0 - added in 3.6, NotImplemented - - Returns - ------- - Timestamp with fields replaced - """ - - cdef: - pandas_datetimestruct dts - int64_t value - object _tzinfo, result, k, v - _TSObject ts - - # set to naive if needed - _tzinfo = self.tzinfo - value = self.value - if _tzinfo is not None: - value = tz_convert_single(value, 'UTC', _tzinfo) - - # setup components - pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) - dts.ps = self.nanosecond * 1000 - - # replace - def validate(k, v): - """ validate integers """ - if not is_integer_object(v): - raise ValueError("value must be an integer, received " - "{v} for {k}".format(v=type(v), k=k)) - return v - - if year is not None: - dts.year = validate('year', year) - if month is not None: - dts.month = validate('month', month) - if day is not None: - dts.day = validate('day', day) - if hour is not None: - dts.hour = validate('hour', hour) - if minute is not None: - dts.min = validate('minute', minute) - if second is not None: - dts.sec = validate('second', second) - if microsecond is not None: - dts.us = validate('microsecond', microsecond) - if nanosecond is not None: - dts.ps = validate('nanosecond', nanosecond) * 1000 - if tzinfo is not object: - _tzinfo = tzinfo - - # reconstruct & check bounds - value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - if value != NPY_NAT: - _check_dts_bounds(&dts) - - # set tz if needed - if _tzinfo is not None: - value = tz_convert_single(value, _tzinfo, 'UTC') - - result = create_timestamp_from_ts(value, dts, _tzinfo, self.freq) - return result - - def isoformat(self, sep='T'): - base = super(_Timestamp, self).isoformat(sep=sep) - if self.nanosecond == 0: - return base - - if self.tzinfo is not None: - base1, base2 = base[:-6], base[-6:] - else: - base1, base2 = base, "" - - if self.microsecond != 0: - base1 += "%.3d" % self.nanosecond - else: - base1 += ".%.9d" % self.nanosecond - - return base1 + base2 - - def _has_time_component(self): - """ - Returns if the Timestamp has a time component - in addition to the date part - """ - return (self.time() != _zero_time - or self.tzinfo is not None - or self.nanosecond != 0) - - def to_julian_date(self): - """ - Convert TimeStamp to a Julian Date. - 0 Julian date is noon January 1, 4713 BC. - """ - year = self.year - month = self.month - day = self.day - if month <= 2: - year -= 1 - month += 12 - return (day + - np.fix((153 * month - 457) / 5) + - 365 * year + - np.floor(year / 4) - - np.floor(year / 100) + - np.floor(year / 400) + - 1721118.5 + - (self.hour + - self.minute / 60.0 + - self.second / 3600.0 + - self.microsecond / 3600.0 / 1e+6 + - self.nanosecond / 3600.0 / 1e+9 - ) / 24.0) - - def normalize(self): - """ - Normalize Timestamp to midnight, preserving - tz information. - """ - normalized_value = date_normalize( - np.array([self.value], dtype='i8'), tz=self.tz)[0] - return Timestamp(normalized_value).tz_localize(self.tz) - - def __radd__(self, other): - # __radd__ on cython extension types like _Timestamp is not used, so - # define it here instead - return self + other - - -_nat_strings = set(['NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN']) - - -class NaTType(_NaT): - """(N)ot-(A)-(T)ime, the time equivalent of NaN""" - - def __new__(cls): - cdef _NaT base - - base = _NaT.__new__(cls, 1, 1, 1) - base._day = -1 - base._month = -1 - base.value = NPY_NAT - - return base - - def __repr__(self): - return 'NaT' - - def __str__(self): - return 'NaT' - - def isoformat(self, sep='T'): - # This allows Timestamp(ts.isoformat()) to always correctly roundtrip. - return 'NaT' - - def __hash__(self): - return NPY_NAT - - def __int__(self): - return NPY_NAT - - def __long__(self): - return NPY_NAT - - def __reduce__(self): - return (__nat_unpickle, (None, )) - - def total_seconds(self): - # GH 10939 - return np.nan - - @property - def is_leap_year(self): - return False - - def __rdiv__(self, other): - return _nat_rdivide_op(self, other) - - def __rtruediv__(self, other): - return _nat_rdivide_op(self, other) - - def __rfloordiv__(self, other): - return _nat_rdivide_op(self, other) - - def __rmul__(self, other): - if is_integer_object(other) or is_float_object(other): - return NaT - return NotImplemented - - -def __nat_unpickle(*args): - # return constant defined in the module - return NaT - -NaT = NaTType() - -cdef inline bint _checknull_with_nat(object val): - """ utility to check if a value is a nat or not """ - return val is None or ( - PyFloat_Check(val) and val != val) or val is NaT - -cdef inline bint _check_all_nulls(object val): - """ utility to check if a value is any type of null """ - cdef bint res - if PyFloat_Check(val): - res = val != val - elif val is NaT: - res = 1 - elif val is None: - res = 1 - elif is_datetime64_object(val): - res = get_datetime64_value(val) == NPY_NAT - elif is_timedelta64_object(val): - res = get_timedelta64_value(val) == NPY_NAT - else: - res = 0 - return res - -cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1: - return _nat_scalar_rules[op] - - -cpdef object get_value_box(ndarray arr, object loc): - cdef: - Py_ssize_t i, sz - void* data_ptr - - if util.is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = np.PyArray_SIZE(arr) - - if i < 0 and sz > 0: - i += sz - - if i >= sz or sz == 0 or i < 0: - raise IndexError('index out of bounds') - - if arr.descr.type_num == NPY_DATETIME: - return Timestamp(util.get_value_1d(arr, i)) - elif arr.descr.type_num == NPY_TIMEDELTA: - return Timedelta(util.get_value_1d(arr, i)) - else: - return util.get_value_1d(arr, i) - - -# Add the min and max fields at the class level -cdef int64_t _NS_UPPER_BOUND = INT64_MAX -# the smallest value we could actually represent is -# INT64_MIN + 1 == -9223372036854775807 -# but to allow overflow free conversion with a microsecond resolution -# use the smallest value with a 0 nanosecond unit (0s in last 3 digits) -cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 - -cdef pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS -pandas_datetime_to_datetimestruct(_NS_LOWER_BOUND, PANDAS_FR_ns, &_NS_MIN_DTS) -pandas_datetime_to_datetimestruct(_NS_UPPER_BOUND, PANDAS_FR_ns, &_NS_MAX_DTS) - -# Resolution is in nanoseconds -Timestamp.min = Timestamp(_NS_LOWER_BOUND) -Timestamp.max = Timestamp(_NS_UPPER_BOUND) - - -#---------------------------------------------------------------------- -# Frequency inference - -def unique_deltas(ndarray[int64_t] arr): - cdef: - Py_ssize_t i, n = len(arr) - int64_t val - khiter_t k - kh_int64_t *table - int ret = 0 - list uniques = [] - - table = kh_init_int64() - kh_resize_int64(table, 10) - for i in range(n - 1): - val = arr[i + 1] - arr[i] - k = kh_get_int64(table, val) - if k == table.n_buckets: - kh_put_int64(table, val, &ret) - uniques.append(val) - kh_destroy_int64(table) - - result = np.array(uniques, dtype=np.int64) - result.sort() - return result - - -cdef inline bint _is_multiple(int64_t us, int64_t mult): - return us % mult == 0 - - -cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: - if op == Py_EQ: - return lhs == rhs - elif op == Py_NE: - return lhs != rhs - elif op == Py_LT: - return lhs < rhs - elif op == Py_LE: - return lhs <= rhs - elif op == Py_GT: - return lhs > rhs - elif op == Py_GE: - return lhs >= rhs - - -cdef int _reverse_ops[6] - -_reverse_ops[Py_LT] = Py_GT -_reverse_ops[Py_LE] = Py_GE -_reverse_ops[Py_EQ] = Py_EQ -_reverse_ops[Py_NE] = Py_NE -_reverse_ops[Py_GT] = Py_LT -_reverse_ops[Py_GE] = Py_LE - - -cdef str _NDIM_STRING = "ndim" - -# This is PITA. Because we inherit from datetime, which has very specific -# construction requirements, we need to do object instantiation in python -# (see Timestamp class above). This will serve as a C extension type that -# shadows the python class, where we do any heavy lifting. -cdef class _Timestamp(datetime): - - cdef readonly: - int64_t value, nanosecond - object freq # frequency reference - - def __hash__(_Timestamp self): - if self.nanosecond: - return hash(self.value) - return datetime.__hash__(self) - - def __richcmp__(_Timestamp self, object other, int op): - cdef: - _Timestamp ots - int ndim - - if isinstance(other, _Timestamp): - if isinstance(other, _NaT): - return _cmp_nat_dt(other, self, _reverse_ops[op]) - ots = other - elif isinstance(other, datetime): - if self.nanosecond == 0: - val = self.to_pydatetime() - return PyObject_RichCompareBool(val, other, op) - - try: - ots = Timestamp(other) - except ValueError: - return self._compare_outside_nanorange(other, op) - else: - ndim = getattr(other, _NDIM_STRING, -1) - - if ndim != -1: - if ndim == 0: - if isinstance(other, np.datetime64): - other = Timestamp(other) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - - # only allow ==, != ops - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, - type(other).__name__)) - return PyObject_RichCompare(other, self, _reverse_ops[op]) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) - - self._assert_tzawareness_compat(other) - return _cmp_scalar(self.value, ots.value, op) - - def __reduce_ex__(self, protocol): - # python 3.6 compat - # http://bugs.python.org/issue28730 - # now __reduce_ex__ is defined and higher priority than __reduce__ - return self.__reduce__() - - def __repr__(self): - stamp = self._repr_base - zone = None - - try: - stamp += self.strftime('%z') - if self.tzinfo: - zone = _get_zone(self.tzinfo) - except ValueError: - year2000 = self.replace(year=2000) - stamp += year2000.strftime('%z') - if self.tzinfo: - zone = _get_zone(self.tzinfo) - - try: - stamp += zone.strftime(' %%Z') - except: - pass - - tz = ", tz='{0}'".format(zone) if zone is not None else "" - freq = ", freq='{0}'".format( - self.freq.freqstr) if self.freq is not None else "" - - return "Timestamp('{stamp}'{tz}{freq})".format( - stamp=stamp, tz=tz, freq=freq) - - cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, - int op) except -1: - cdef datetime dtval = self.to_pydatetime() - - self._assert_tzawareness_compat(other) - - if self.nanosecond == 0: - return PyObject_RichCompareBool(dtval, other, op) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - elif op == Py_LT: - return dtval < other - elif op == Py_LE: - return dtval < other - elif op == Py_GT: - return dtval >= other - elif op == Py_GE: - return dtval >= other - - cdef int _assert_tzawareness_compat(_Timestamp self, - object other) except -1: - if self.tzinfo is None: - if other.tzinfo is not None: - raise TypeError('Cannot compare tz-naive and tz-aware ' - 'timestamps') - elif other.tzinfo is None: - raise TypeError('Cannot compare tz-naive and tz-aware timestamps') - - cpdef datetime to_datetime(_Timestamp self): - """ - DEPRECATED: use :meth:`to_pydatetime` instead. - - Convert a Timestamp object to a native Python datetime object. - """ - warnings.warn("to_datetime is deprecated. Use self.to_pydatetime()", - FutureWarning, stacklevel=2) - return self.to_pydatetime(warn=False) - - cpdef datetime to_pydatetime(_Timestamp self, warn=True): - """ - Convert a Timestamp object to a native Python datetime object. - - If warn=True, issue a warning if nanoseconds is nonzero. - """ - cdef: - pandas_datetimestruct dts - _TSObject ts - - if self.nanosecond != 0 and warn: - warnings.warn("Discarding nonzero nanoseconds in conversion", - UserWarning, stacklevel=2) - ts = convert_to_tsobject(self, self.tzinfo, None, 0, 0) - dts = ts.dts - return datetime(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, - dts.us, ts.tzinfo) - - cpdef to_datetime64(self): - """ Returns a numpy.datetime64 object with 'ns' precision """ - return np.datetime64(self.value, 'ns') - - def __add__(self, other): - cdef int64_t other_int - - if is_timedelta64_object(other): - other_int = other.astype('timedelta64[ns]').view('i8') - return Timestamp(self.value + other_int, - tz=self.tzinfo, freq=self.freq) - - elif is_integer_object(other): - if self is NaT: - # to be compat with Period - return NaT - elif self.freq is None: - raise ValueError("Cannot add integral value to Timestamp " - "without freq.") - return Timestamp((self.freq * other).apply(self), freq=self.freq) - - elif isinstance(other, timedelta) or hasattr(other, 'delta'): - nanos = _delta_to_nanoseconds(other) - result = Timestamp(self.value + nanos, - tz=self.tzinfo, freq=self.freq) - if getattr(other, 'normalize', False): - result = Timestamp(normalize_date(result)) - return result - - # index/series like - elif hasattr(other, '_typ'): - return NotImplemented - - result = datetime.__add__(self, other) - if isinstance(result, datetime): - result = Timestamp(result) - result.nanosecond = self.nanosecond - return result - - def __sub__(self, other): - if is_timedelta64_object(other) or is_integer_object(other) \ - or isinstance(other, timedelta) or hasattr(other, 'delta'): - neg_other = -other - return self + neg_other - - # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex - elif getattr(other, '_typ', None) == 'datetimeindex': - # timezone comparison is performed in DatetimeIndex._sub_datelike - return -other.__sub__(self) - - # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex - elif getattr(other, '_typ', None) == 'timedeltaindex': - return (-other).__add__(self) - - elif other is NaT: - return NaT - - # coerce if necessary if we are a Timestamp-like - if (isinstance(self, datetime) - and (isinstance(other, datetime) - or is_datetime64_object(other))): - self = Timestamp(self) - other = Timestamp(other) - - # validate tz's - if get_timezone(self.tzinfo) != get_timezone(other.tzinfo): - raise TypeError( - "Timestamp subtraction must have the " - "same timezones or no timezones") - - # scalar Timestamp/datetime - Timestamp/datetime -> yields a - # Timedelta - try: - return Timedelta(self.value -other.value) - except (OverflowError, OutOfBoundsDatetime): - pass - - # scalar Timestamp/datetime - Timedelta -> yields a Timestamp (with - # same timezone if specified) - return datetime.__sub__(self, other) - - cpdef _get_field(self, field): - out = get_date_field(np.array([self.value], dtype=np.int64), field) - return int(out[0]) - - cpdef _get_start_end_field(self, field): - month_kw = self.freq.kwds.get( - 'startingMonth', self.freq.kwds.get( - 'month', 12)) if self.freq else 12 - freqstr = self.freqstr if self.freq else None - out = get_start_end_field( - np.array([self.value], dtype=np.int64), field, freqstr, month_kw) - return out[0] - - property _repr_base: - def __get__(self): - return '%s %s' % (self._date_repr, self._time_repr) - - property _date_repr: - def __get__(self): - # Ideal here would be self.strftime("%Y-%m-%d"), but - # the datetime strftime() methods require year >= 1900 - return '%d-%.2d-%.2d' % (self.year, self.month, self.day) - - property _time_repr: - def __get__(self): - result = '%.2d:%.2d:%.2d' % (self.hour, self.minute, self.second) - - if self.nanosecond != 0: - result += '.%.9d' % (self.nanosecond + 1000 * self.microsecond) - elif self.microsecond != 0: - result += '.%.6d' % self.microsecond - - return result - - property asm8: - def __get__(self): - return np.datetime64(self.value, 'ns') - - -cdef PyTypeObject* ts_type = Timestamp - - -cdef inline bint is_timestamp(object o): - return Py_TYPE(o) == ts_type # isinstance(o, Timestamp) - - -cdef bint _nat_scalar_rules[6] - -_nat_scalar_rules[Py_EQ] = False -_nat_scalar_rules[Py_NE] = True -_nat_scalar_rules[Py_LT] = False -_nat_scalar_rules[Py_LE] = False -_nat_scalar_rules[Py_GT] = False -_nat_scalar_rules[Py_GE] = False - - -cdef _nat_divide_op(self, other): - if isinstance(other, (Timedelta, np.timedelta64)) or other is NaT: - return np.nan - if is_integer_object(other) or is_float_object(other): - return NaT - return NotImplemented - -cdef _nat_rdivide_op(self, other): - if isinstance(other, Timedelta): - return np.nan - return NotImplemented - -cdef class _NaT(_Timestamp): - - def __hash__(_NaT self): - # py3k needs this defined here - return hash(self.value) - - def __richcmp__(_NaT self, object other, int op): - cdef int ndim = getattr(other, 'ndim', -1) - - if ndim == -1: - return _nat_scalar_rules[op] - - if ndim == 0: - if isinstance(other, np.datetime64): - other = Timestamp(other) - else: - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) - return PyObject_RichCompare(other, self, _reverse_ops[op]) - - def __add__(self, other): - try: - if isinstance(other, datetime): - return NaT - result = _Timestamp.__add__(self, other) - # Timestamp.__add__ doesn't return DatetimeIndex/TimedeltaIndex - if result is NotImplemented: - return result - except (OverflowError, OutOfBoundsDatetime): - pass - return NaT - - def __sub__(self, other): - if isinstance(other, (datetime, timedelta)): - return NaT - try: - result = _Timestamp.__sub__(self, other) - # Timestamp.__sub__ may return DatetimeIndex/TimedeltaIndex - if result is NotImplemented or hasattr(result, '_typ'): - return result - except (OverflowError, OutOfBoundsDatetime): - pass - return NaT - - def __pos__(self): - return NaT - - def __neg__(self): - return NaT - - def __div__(self, other): - return _nat_divide_op(self, other) - - def __truediv__(self, other): - return _nat_divide_op(self, other) - - def __floordiv__(self, other): - return _nat_divide_op(self, other) - - def __mul__(self, other): - if is_integer_object(other) or is_float_object(other): - return NaT - return NotImplemented - - -# lightweight C object to hold datetime & int64 pair -cdef class _TSObject: - cdef: - pandas_datetimestruct dts # pandas_datetimestruct - int64_t value # numpy dt64 - object tzinfo - - property value: - def __get__(self): - return self.value - -cpdef _get_utcoffset(tzinfo, obj): - try: - return tzinfo._utcoffset - except AttributeError: - return tzinfo.utcoffset(obj) - -# helper to extract datetime and int64 from several different possibilities -cdef convert_to_tsobject(object ts, object tz, object unit, - bint dayfirst, bint yearfirst): - """ - Extract datetime and int64 from any of: - - np.int64 (with unit providing a possible modifier) - - np.datetime64 - - a float (with unit providing a possible modifier) - - python int or long object (with unit providing a possible modifier) - - iso8601 string object - - python datetime object - - another timestamp object - """ - cdef: - _TSObject obj - bint utc_convert = 1 - int out_local = 0, out_tzoffset = 0 - - if tz is not None: - tz = maybe_get_tz(tz) - - obj = _TSObject() - - if util.is_string_object(ts): - return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) - - if ts is None or ts is NaT: - obj.value = NPY_NAT - elif is_datetime64_object(ts): - if ts.view('i8') == NPY_NAT: - obj.value = NPY_NAT - else: - obj.value = _get_datetime64_nanos(ts) - pandas_datetime_to_datetimestruct( - obj.value, PANDAS_FR_ns, &obj.dts) - elif is_integer_object(ts): - if ts == NPY_NAT: - obj.value = NPY_NAT - else: - ts = ts * cast_from_unit(None, unit) - obj.value = ts - pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts) - elif util.is_float_object(ts): - if ts != ts or ts == NPY_NAT: - obj.value = NPY_NAT - else: - ts = cast_from_unit(ts, unit) - obj.value = ts - pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts) - elif PyDateTime_Check(ts): - if tz is not None: - # sort of a temporary hack - if ts.tzinfo is not None: - if (hasattr(tz, 'normalize') and - hasattr(ts.tzinfo, '_utcoffset')): - ts = tz.normalize(ts) - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: #tzoffset - try: - tz = ts.astimezone(tz).tzinfo - except: - pass - obj.value = _pydatetime_to_dts(ts, &obj.dts) - ts_offset = _get_utcoffset(ts.tzinfo, ts) - obj.value -= _delta_to_nanoseconds(ts_offset) - tz_offset = _get_utcoffset(tz, ts) - obj.value += _delta_to_nanoseconds(tz_offset) - pandas_datetime_to_datetimestruct(obj.value, - PANDAS_FR_ns, &obj.dts) - obj.tzinfo = tz - elif not _is_utc(tz): - ts = _localize_pydatetime(ts, tz) - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: - # UTC - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = pytz.utc - else: - obj.value = _pydatetime_to_dts(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - - if obj.tzinfo is not None and not _is_utc(obj.tzinfo): - offset = _get_utcoffset(obj.tzinfo, ts) - obj.value -= _delta_to_nanoseconds(offset) - - if is_timestamp(ts): - obj.value += ts.nanosecond - obj.dts.ps = ts.nanosecond * 1000 - _check_dts_bounds(&obj.dts) - return obj - elif PyDate_Check(ts): - # Keep the converter same as PyDateTime's - ts = datetime.combine(ts, datetime_time()) - return convert_to_tsobject(ts, tz, None, 0, 0) - elif getattr(ts, '_typ', None) == 'period': - raise ValueError( - "Cannot convert Period to Timestamp " - "unambiguously. Use to_timestamp") - else: - raise TypeError('Cannot convert input [{}] of type {} to ' - 'Timestamp'.format(ts, type(ts))) - - if obj.value != NPY_NAT: - _check_dts_bounds(&obj.dts) - - if tz is not None: - _localize_tso(obj, tz) - - return obj - - -cpdef convert_str_to_tsobject(object ts, object tz, object unit, - dayfirst=False, yearfirst=False): - """ ts must be a string """ - - cdef: - _TSObject obj - int out_local = 0, out_tzoffset = 0 - - if tz is not None: - tz = maybe_get_tz(tz) - - obj = _TSObject() - - assert util.is_string_object(ts) - - if len(ts) == 0 or ts in _nat_strings: - ts = NaT - elif ts == 'now': - # Issue 9000, we short-circuit rather than going - # into np_datetime_strings which returns utc - ts = Timestamp.now(tz) - elif ts == 'today': - # Issue 9000, we short-circuit rather than going - # into np_datetime_strings which returns a normalized datetime - ts = Timestamp.today(tz) - else: - try: - _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) - obj.value = pandas_datetimestruct_to_datetime( - PANDAS_FR_ns, &obj.dts) - _check_dts_bounds(&obj.dts) - if out_local == 1: - obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') - if tz is None: - _check_dts_bounds(&obj.dts) - return obj - else: - # Keep the converter same as PyDateTime's - ts = Timestamp(obj.value, tz=obj.tzinfo) - else: - ts = obj.value - if tz is not None: - # shift for _localize_tso - ts = tz_convert_single(ts, tz, 'UTC') - except ValueError: - try: - ts = parse_datetime_string( - ts, dayfirst=dayfirst, yearfirst=yearfirst) - except Exception: - raise ValueError("could not convert string to Timestamp") - - return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) - - -def _test_parse_iso8601(object ts): - """ - TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used - only for testing, actual construction uses `convert_str_to_tsobject` - """ - cdef: - _TSObject obj - int out_local = 0, out_tzoffset = 0 - - obj = _TSObject() - - _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) - obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts) - _check_dts_bounds(&obj.dts) - if out_local == 1: - obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') - return Timestamp(obj.value, tz=obj.tzinfo) - else: - return Timestamp(obj.value) - -cdef inline void _localize_tso(_TSObject obj, object tz): - """ - Take a TSObject in UTC and localizes to timezone tz. - """ - if _is_utc(tz): - obj.tzinfo = tz - elif _is_tzlocal(tz): - pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts) - dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, - obj.dts.min, obj.dts.sec, obj.dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 - if obj.value != NPY_NAT: - pandas_datetime_to_datetimestruct(obj.value + delta, - PANDAS_FR_ns, &obj.dts) - else: - pandas_datetime_to_datetimestruct(obj.value, - PANDAS_FR_ns, &obj.dts) - obj.tzinfo = tz - else: - # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = _get_dst_info(tz) - - pos = trans.searchsorted(obj.value, side='right') - 1 - - # static/pytz/dateutil specific code - if _is_fixed_offset(tz): - # statictzinfo - if len(deltas) > 0 and obj.value != NPY_NAT: - pandas_datetime_to_datetimestruct(obj.value + deltas[0], - PANDAS_FR_ns, &obj.dts) - else: - pandas_datetime_to_datetimestruct( - obj.value, PANDAS_FR_ns, &obj.dts) - obj.tzinfo = tz - elif _treat_tz_as_pytz(tz): - inf = tz._transition_info[pos] - if obj.value != NPY_NAT: - pandas_datetime_to_datetimestruct(obj.value + deltas[pos], - PANDAS_FR_ns, &obj.dts) - else: - pandas_datetime_to_datetimestruct(obj.value, - PANDAS_FR_ns, &obj.dts) - obj.tzinfo = tz._tzinfos[inf] - elif _treat_tz_as_dateutil(tz): - if obj.value != NPY_NAT: - pandas_datetime_to_datetimestruct(obj.value + deltas[pos], - PANDAS_FR_ns, &obj.dts) - else: - pandas_datetime_to_datetimestruct(obj.value, - PANDAS_FR_ns, &obj.dts) - obj.tzinfo = tz - else: - obj.tzinfo = tz - - -def _localize_pydatetime(object dt, object tz): - """ - Take a datetime/Timestamp in UTC and localizes to timezone tz. - """ - if tz is None: - return dt - elif isinstance(dt, Timestamp): - return dt.tz_localize(tz) - elif tz == 'UTC' or tz is UTC: - return UTC.localize(dt) - try: - # datetime.replace with pytz may be incorrect result - return tz.localize(dt) - except AttributeError: - return dt.replace(tzinfo=tz) - - -def get_timezone(tz): - return _get_zone(tz) - -cdef inline bint _is_utc(object tz): - return tz is UTC or isinstance(tz, _dateutil_tzutc) - -cdef inline object _get_zone(object tz): - """ - We need to do several things here: - 1) Distinguish between pytz and dateutil timezones - 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone* - but a different tz object) - 3) Provide something to serialize when we're storing a datetime object - in pytables. - - We return a string prefaced with dateutil if it's a dateutil tz, else just - the tz name. It needs to be a string so that we can serialize it with - UJSON/pytables. maybe_get_tz (below) is the inverse of this process. - """ - if _is_utc(tz): - return 'UTC' - else: - if _treat_tz_as_dateutil(tz): - if '.tar.gz' in tz._filename: - raise ValueError( - 'Bad tz filename. Dateutil on python 3 on windows has a ' - 'bug which causes tzfile._filename to be the same for all ' - 'timezone files. Please construct dateutil timezones ' - 'implicitly by passing a string like "dateutil/Europe' - '/London" when you construct your pandas objects instead ' - 'of passing a timezone object. See ' - 'https://github.com/pandas-dev/pandas/pull/7362') - return 'dateutil/' + tz._filename - else: - # tz is a pytz timezone or unknown. - try: - zone = tz.zone - if zone is None: - return tz - return zone - except AttributeError: - return tz - - -cpdef inline object maybe_get_tz(object tz): - """ - (Maybe) Construct a timezone object from a string. If tz is a string, use - it to construct a timezone object. Otherwise, just return tz. - """ - if isinstance(tz, string_types): - if tz == 'tzlocal()': - tz = _dateutil_tzlocal() - elif tz.startswith('dateutil/'): - zone = tz[9:] - tz = _dateutil_gettz(zone) - # On Python 3 on Windows, the filename is not always set correctly. - if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: - tz._filename = zone - else: - tz = pytz.timezone(tz) - elif is_integer_object(tz): - tz = pytz.FixedOffset(tz / 60) - return tz - - -class OutOfBoundsDatetime(ValueError): - pass - -cdef inline _check_dts_bounds(pandas_datetimestruct *dts): - cdef: - bint error = False - - if dts.year <= 1677 and cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1: - error = True - elif ( - dts.year >= 2262 and - cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1): - error = True - - if error: - fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month, - dts.day, dts.hour, - dts.min, dts.sec) - - raise OutOfBoundsDatetime( - 'Out of bounds nanosecond timestamp: %s' % fmt) - - -def datetime_to_datetime64(ndarray[object] values): - cdef: - Py_ssize_t i, n = len(values) - object val, inferred_tz = None - ndarray[int64_t] iresult - pandas_datetimestruct dts - _TSObject _ts - - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') - for i in range(n): - val = values[i] - if _checknull_with_nat(val): - iresult[i] = NPY_NAT - elif PyDateTime_Check(val): - if val.tzinfo is not None: - if inferred_tz is not None: - if _get_zone(val.tzinfo) != inferred_tz: - raise ValueError('Array must be all same time zone') - else: - inferred_tz = _get_zone(val.tzinfo) - - _ts = convert_to_tsobject(val, None, None, 0, 0) - iresult[i] = _ts.value - _check_dts_bounds(&_ts.dts) - else: - if inferred_tz is not None: - raise ValueError( - 'Cannot mix tz-aware with tz-naive values') - iresult[i] = _pydatetime_to_dts(val, &dts) - _check_dts_bounds(&dts) - else: - raise TypeError('Unrecognized value type: %s' % type(val)) - - return result, inferred_tz - -cdef: - set _not_datelike_strings = set(['a', 'A', 'm', 'M', 'p', 'P', 't', 'T']) - -cpdef bint _does_string_look_like_datetime(object date_string): - if date_string.startswith('0'): - # Strings starting with 0 are more consistent with a - # date-like string than a number - return True - - try: - if float(date_string) < 1000: - return False - except ValueError: - pass - - if date_string in _not_datelike_strings: - return False - - return True - - -def format_array_from_datetime(ndarray[int64_t] values, object tz=None, - object format=None, object na_rep=None): - """ - return a np object array of the string formatted values - - Parameters - ---------- - values : a 1-d i8 array - tz : the timezone (or None) - format : optional, default is None - a strftime capable string - na_rep : optional, default is None - a nat format - - """ - cdef: - int64_t val, ns, N = len(values) - ndarray[int64_t] consider_values - bint show_ms = 0, show_us = 0, show_ns = 0, basic_format = 0 - ndarray[object] result = np.empty(N, dtype=object) - object ts, res - pandas_datetimestruct dts - - if na_rep is None: - na_rep = 'NaT' - - # if we don't have a format nor tz, then choose - # a format based on precision - basic_format = format is None and tz is None - if basic_format: - consider_values = values[values != NPY_NAT] - show_ns = (consider_values%1000).any() - - if not show_ns: - consider_values //= 1000 - show_us = (consider_values%1000).any() - - if not show_ms: - consider_values //= 1000 - show_ms = (consider_values%1000).any() - - for i in range(N): - val = values[i] - - if val == NPY_NAT: - result[i] = na_rep - elif basic_format: - - pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) - res = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, - dts.month, - dts.day, - dts.hour, - dts.min, - dts.sec) - - if show_ns: - ns = dts.ps / 1000 - res += '.%.9d' % (ns + 1000 * dts.us) - elif show_us: - res += '.%.6d' % dts.us - elif show_ms: - res += '.%.3d' % (dts.us /1000) - - result[i] = res - - else: - - ts = Timestamp(val, tz=tz) - if format is None: - result[i] = str(ts) - else: - - # invalid format string - # requires dates > 1900 - try: - result[i] = ts.strftime(format) - except ValueError: - result[i] = str(ts) - - return result - - -class DateParseError(ValueError): - pass - - -cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])') - - -def parse_datetime_string(object date_string, object freq=None, - dayfirst=False, yearfirst=False, **kwargs): - """parse datetime string, only returns datetime. - Also cares special handling matching time patterns. - - Returns - ------- - datetime - """ - - cdef: - object dt - - if not _does_string_look_like_datetime(date_string): - raise ValueError('Given date string not likely a datetime.') - - if _TIMEPAT.match(date_string): - # use current datetime as default, not pass _DEFAULT_DATETIME - dt = parse_date(date_string, dayfirst=dayfirst, - yearfirst=yearfirst, **kwargs) - return dt - try: - dt, _, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) - return dt - except DateParseError: - raise - except ValueError: - pass - - try: - dt = parse_date(date_string, default=_DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) - except TypeError: - # following may be raised from dateutil - # TypeError: 'NoneType' object is not iterable - raise ValueError('Given date string not likely a datetime.') - - return dt - - -def parse_datetime_string_with_reso(object date_string, object freq=None, - dayfirst=False, yearfirst=False, **kwargs): - """parse datetime string, only returns datetime - - Returns - ------- - datetime - """ - - cdef: - object parsed, reso - - if not _does_string_look_like_datetime(date_string): - raise ValueError('Given date string not likely a datetime.') - - try: - return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) - except DateParseError: - raise - except ValueError: - pass - - try: - parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME, - dayfirst=dayfirst, yearfirst=yearfirst) - except Exception as e: - # TODO: allow raise of errors within instead - raise DateParseError(e) - if parsed is None: - raise DateParseError("Could not parse %s" % date_string) - return parsed, parsed, reso - - -cdef inline object _parse_dateabbr_string(object date_string, object default, - object freq): - cdef: - object ret - int year, quarter = -1, month, mnum, date_len - - # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 - assert util.is_string_object(date_string) - - # len(date_string) == 0 - # should be NaT??? - - if date_string in _nat_strings: - return NaT, NaT, '' - - date_string = date_string.upper() - date_len = len(date_string) - - if date_len == 4: - # parse year only like 2000 - try: - ret = default.replace(year=int(date_string)) - return ret, ret, 'year' - except ValueError: - pass - - try: - if 4 <= date_len <= 7: - i = date_string.index('Q', 1, 6) - if i == 1: - quarter = int(date_string[0]) - if date_len == 4 or (date_len == 5 - and date_string[i + 1] == '-'): - # r'(\d)Q-?(\d\d)') - year = 2000 + int(date_string[-2:]) - elif date_len == 6 or (date_len == 7 - and date_string[i + 1] == '-'): - # r'(\d)Q-?(\d\d\d\d)') - year = int(date_string[-4:]) - else: - raise ValueError - elif i == 2 or i == 3: - # r'(\d\d)-?Q(\d)' - if date_len == 4 or (date_len == 5 - and date_string[i - 1] == '-'): - quarter = int(date_string[-1]) - year = 2000 + int(date_string[:2]) - else: - raise ValueError - elif i == 4 or i == 5: - if date_len == 6 or (date_len == 7 - and date_string[i - 1] == '-'): - # r'(\d\d\d\d)-?Q(\d)' - quarter = int(date_string[-1]) - year = int(date_string[:4]) - else: - raise ValueError - - if not (1 <= quarter <= 4): - msg = ('Incorrect quarterly string is given, quarter must be ' - 'between 1 and 4: {0}') - raise DateParseError(msg.format(date_string)) - - if freq is not None: - # hack attack, #1228 - try: - mnum = _MONTH_NUMBERS[_get_rule_month(freq)] + 1 - except (KeyError, ValueError): - msg = ('Unable to retrieve month information from given ' - 'freq: {0}').format(freq) - raise DateParseError(msg) - - month = (mnum + (quarter - 1) * 3) % 12 + 1 - if month > mnum: - year -= 1 - else: - month = (quarter - 1) * 3 + 1 - - ret = default.replace(year=year, month=month) - return ret, ret, 'quarter' - - except DateParseError: - raise - except ValueError: - pass - - if date_len == 6 and (freq == 'M' or getattr( - freq, 'rule_code', None) == 'M'): - year = int(date_string[:4]) - month = int(date_string[4:6]) - try: - ret = default.replace(year=year, month=month) - return ret, ret, 'month' - except ValueError: - pass - - for pat in ['%Y-%m', '%m-%Y', '%b %Y', '%b-%Y']: - try: - ret = datetime.strptime(date_string, pat) - return ret, ret, 'month' - except ValueError: - pass - - raise ValueError('Unable to parse {0}'.format(date_string)) - - -def dateutil_parse(object timestr, object default, ignoretz=False, - tzinfos=None, **kwargs): - """ lifted from dateutil to get resolution""" - - cdef: - object fobj, res, attr, ret, tzdata - object reso = None - dict repl = {} - - fobj = StringIO(str(timestr)) - res = DEFAULTPARSER._parse(fobj, **kwargs) - - # dateutil 2.2 compat - if isinstance(res, tuple): - res, _ = res - - if res is None: - msg = "Unknown datetime string format, unable to parse: {0}" - raise ValueError(msg.format(timestr)) - - for attr in ["year", "month", "day", "hour", - "minute", "second", "microsecond"]: - value = getattr(res, attr) - if value is not None: - repl[attr] = value - reso = attr - - if reso is None: - msg = "Unable to parse datetime string: {0}" - raise ValueError(msg.format(timestr)) - - if reso == 'microsecond': - if repl['microsecond'] == 0: - reso = 'second' - elif repl['microsecond'] % 1000 == 0: - reso = 'millisecond' - - ret = default.replace(**repl) - if res.weekday is not None and not res.day: - ret = ret + relativedelta.relativedelta(weekday=res.weekday) - if not ignoretz: - if callable(tzinfos) or tzinfos and res.tzname in tzinfos: - if callable(tzinfos): - tzdata = tzinfos(res.tzname, res.tzoffset) - else: - tzdata = tzinfos.get(res.tzname) - if isinstance(tzdata, datetime.tzinfo): - tzinfo = tzdata - elif isinstance(tzdata, string_types): - tzinfo = _dateutil_tzstr(tzdata) - elif isinstance(tzdata, int): - tzinfo = tzoffset(res.tzname, tzdata) - else: - raise ValueError("offset must be tzinfo subclass, " - "tz string, or int offset") - ret = ret.replace(tzinfo=tzinfo) - elif res.tzname and res.tzname in time.tzname: - ret = ret.replace(tzinfo=_dateutil_tzlocal()) - elif res.tzoffset == 0: - ret = ret.replace(tzinfo=_dateutil_tzutc()) - elif res.tzoffset: - ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset)) - return ret, reso - - -# const for parsers - -_DEFAULT_DATETIME = datetime(1, 1, 1).replace( - hour=0, minute=0, second=0, microsecond=0) -_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', - 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] -_MONTH_NUMBERS = {k: i for i, k in enumerate(_MONTHS)} -_MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)} - - -cpdef object _get_rule_month(object source, object default='DEC'): - """ - Return starting month of given freq, default is December. - - Example - ------- - >>> _get_rule_month('D') - 'DEC' - - >>> _get_rule_month('A-JAN') - 'JAN' - """ - if hasattr(source, 'freqstr'): - source = source.freqstr - source = source.upper() - if '-' not in source: - return default - else: - return source.split('-')[1] - - -cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): - """ - convert the ndarray according to the unit - if errors: - - raise: return converted values or raise OutOfBoundsDatetime - if out of range on the conversion or - ValueError for other conversions (e.g. a string) - - ignore: return non-convertible values as the same unit - - coerce: NaT for non-convertibles - - """ - cdef: - Py_ssize_t i, j, n=len(values) - int64_t m - ndarray[float64_t] fvalues - ndarray mask - bint is_ignore = errors=='ignore' - bint is_coerce = errors=='coerce' - bint is_raise = errors=='raise' - bint need_to_iterate=True - ndarray[int64_t] iresult - ndarray[object] oresult - - assert is_ignore or is_coerce or is_raise - - if unit == 'ns': - if issubclass(values.dtype.type, np.integer): - return values.astype('M8[ns]') - return array_to_datetime(values.astype(object), errors=errors) - - m = cast_from_unit(None, unit) - - if is_raise: - - # try a quick conversion to i8 - # if we have nulls that are not type-compat - # then need to iterate - try: - iresult = values.astype('i8', casting='same_kind', copy=False) - mask = iresult == iNaT - iresult[mask] = 0 - fvalues = iresult.astype('f8') * m - need_to_iterate=False - except: - pass - - # check the bounds - if not need_to_iterate: - - if ((fvalues < _NS_LOWER_BOUND).any() - or (fvalues > _NS_UPPER_BOUND).any()): - raise OutOfBoundsDatetime( - "cannot convert input with unit '{0}'".format(unit)) - result = (iresult *m).astype('M8[ns]') - iresult = result.view('i8') - iresult[mask] = iNaT - return result - - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') - - try: - for i in range(n): - val = values[i] - - if _checknull_with_nat(val): - iresult[i] = NPY_NAT - - elif is_integer_object(val) or is_float_object(val): - - if val != val or val == NPY_NAT: - iresult[i] = NPY_NAT - else: - try: - iresult[i] = cast_from_unit(val, unit) - except OverflowError: - if is_raise: - raise OutOfBoundsDatetime( - "cannot convert input {0} with the unit " - "'{1}'".format(val, unit)) - elif is_ignore: - raise AssertionError - iresult[i] = NPY_NAT - - elif util.is_string_object(val): - if len(val) == 0 or val in _nat_strings: - iresult[i] = NPY_NAT - - else: - try: - iresult[i] = cast_from_unit(float(val), unit) - except ValueError: - if is_raise: - raise ValueError( - "non convertible value {0} with the unit " - "'{1}'".format(val, unit)) - elif is_ignore: - raise AssertionError - iresult[i] = NPY_NAT - except: - if is_raise: - raise OutOfBoundsDatetime( - "cannot convert input {0} with the unit " - "'{1}'".format(val, unit)) - elif is_ignore: - raise AssertionError - iresult[i] = NPY_NAT - - else: - - if is_raise: - raise ValueError("non convertible value {0}" - "with the unit '{1}'".format( - val, - unit)) - if is_ignore: - raise AssertionError - - iresult[i] = NPY_NAT - - return result - - except AssertionError: - pass - - # we have hit an exception - # and are in ignore mode - # redo as object - - oresult = np.empty(n, dtype=object) - for i in range(n): - val = values[i] - - if _checknull_with_nat(val): - oresult[i] = NaT - elif is_integer_object(val) or is_float_object(val): - - if val != val or val == NPY_NAT: - oresult[i] = NaT - else: - try: - oresult[i] = Timestamp(cast_from_unit(val, unit)) - except: - oresult[i] = val - - elif util.is_string_object(val): - if len(val) == 0 or val in _nat_strings: - oresult[i] = NaT - - else: - oresult[i] = val - - return oresult - - -cpdef array_to_datetime(ndarray[object] values, errors='raise', - dayfirst=False, yearfirst=False, - format=None, utc=None, - require_iso8601=False): - cdef: - Py_ssize_t i, n = len(values) - object val, py_dt - ndarray[int64_t] iresult - ndarray[object] oresult - pandas_datetimestruct dts - bint utc_convert = bool(utc) - bint seen_integer = 0 - bint seen_string = 0 - bint seen_datetime = 0 - bint is_raise = errors=='raise' - bint is_ignore = errors=='ignore' - bint is_coerce = errors=='coerce' - _TSObject _ts - int out_local=0, out_tzoffset=0 - - # specify error conditions - assert is_raise or is_ignore or is_coerce - - try: - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') - for i in range(n): - val = values[i] - - if _checknull_with_nat(val): - iresult[i] = NPY_NAT - - elif PyDateTime_Check(val): - seen_datetime=1 - if val.tzinfo is not None: - if utc_convert: - _ts = convert_to_tsobject(val, None, 'ns', 0, 0) - iresult[i] = _ts.value - try: - _check_dts_bounds(&_ts.dts) - except ValueError: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - else: - raise ValueError('Tz-aware datetime.datetime cannot ' - 'be converted to datetime64 unless ' - 'utc=True') - else: - iresult[i] = _pydatetime_to_dts(val, &dts) - if is_timestamp(val): - iresult[i] += (<_Timestamp>val).nanosecond - try: - _check_dts_bounds(&dts) - except ValueError: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - - elif PyDate_Check(val): - iresult[i] = _date_to_datetime64(val, &dts) - try: - _check_dts_bounds(&dts) - seen_datetime=1 - except ValueError: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - - elif util.is_datetime64_object(val): - if get_datetime64_value(val) == NPY_NAT: - iresult[i] = NPY_NAT - else: - try: - iresult[i] = _get_datetime64_nanos(val) - seen_datetime=1 - except ValueError: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - - elif is_integer_object(val) or is_float_object(val): - # these must be ns unit by-definition - - if val != val or val == NPY_NAT: - iresult[i] = NPY_NAT - elif is_raise or is_ignore: - iresult[i] = val - seen_integer=1 - else: - # coerce - # we now need to parse this as if unit='ns' - # we can ONLY accept integers at this point - # if we have previously (or in future accept - # datetimes/strings, then we must coerce) - seen_integer = 1 - try: - iresult[i] = cast_from_unit(val, 'ns') - except: - iresult[i] = NPY_NAT - - elif util.is_string_object(val): - # string - - try: - if len(val) == 0 or val in _nat_strings: - iresult[i] = NPY_NAT - continue - - seen_string=1 - _string_to_dts(val, &dts, &out_local, &out_tzoffset) - value = pandas_datetimestruct_to_datetime( - PANDAS_FR_ns, &dts) - if out_local == 1: - tz = pytz.FixedOffset(out_tzoffset) - value = tz_convert_single(value, tz, 'UTC') - iresult[i] = value - _check_dts_bounds(&dts) - except ValueError: - # if requiring iso8601 strings, skip trying other formats - if require_iso8601: - if is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise ValueError( - "time data %r doesn't match format " - "specified" % (val,)) - else: - return values - - try: - py_dt = parse_datetime_string(val, dayfirst=dayfirst, - yearfirst=yearfirst) - except Exception: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise TypeError("invalid string coercion to datetime") - - try: - _ts = convert_to_tsobject(py_dt, None, None, 0, 0) - iresult[i] = _ts.value - except ValueError: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - except: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - else: - if is_coerce: - iresult[i] = NPY_NAT - else: - raise TypeError("{0} is not convertible to datetime" - .format(type(val))) - - if seen_datetime and seen_integer: - # we have mixed datetimes & integers - - if is_coerce: - # coerce all of the integers/floats to NaT, preserve - # the datetimes and other convertibles - for i in range(n): - val = values[i] - if is_integer_object(val) or is_float_object(val): - result[i] = NPY_NAT - elif is_raise: - raise ValueError( - "mixed datetimes and integers in passed array") - else: - raise TypeError - - return result - except OutOfBoundsDatetime: - if is_raise: - raise - - oresult = np.empty(n, dtype=object) - for i in range(n): - val = values[i] - - # set as nan except if its a NaT - if _checknull_with_nat(val): - if PyFloat_Check(val): - oresult[i] = np.nan - else: - oresult[i] = NaT - elif util.is_datetime64_object(val): - if get_datetime64_value(val) == NPY_NAT: - oresult[i] = NaT - else: - oresult[i] = val.item() - else: - oresult[i] = val - return oresult - except TypeError: - oresult = np.empty(n, dtype=object) - - for i in range(n): - val = values[i] - if _checknull_with_nat(val): - oresult[i] = val - elif util.is_string_object(val): - - if len(val) == 0 or val in _nat_strings: - oresult[i] = 'NaT' - continue - - try: - oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, - yearfirst=yearfirst) - _pydatetime_to_dts(oresult[i], &dts) - _check_dts_bounds(&dts) - except Exception: - if is_raise: - raise - return values - # oresult[i] = val - else: - if is_raise: - raise - return values - - return oresult - - -# Similar to Timestamp/datetime, this is a construction requirement for -# timedeltas that we need to do object instantiation in python. This will -# serve as a C extension type that shadows the Python class, where we do any -# heavy lifting. -cdef class _Timedelta(timedelta): - - cdef readonly: - int64_t value # nanoseconds - object freq # frequency reference - bint is_populated # are my components populated - int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns - - def __hash__(_Timedelta self): - if self._has_ns(): - return hash(self.value) - else: - return timedelta.__hash__(self) - - def __richcmp__(_Timedelta self, object other, int op): - cdef: - _Timedelta ots - int ndim - - if isinstance(other, _Timedelta): - if isinstance(other, _NaT): - return _cmp_nat_dt(other, self, _reverse_ops[op]) - ots = other - elif isinstance(other, timedelta): - ots = Timedelta(other) - else: - ndim = getattr(other, _NDIM_STRING, -1) - - if ndim != -1: - if ndim == 0: - if isinstance(other, np.timedelta64): - other = Timedelta(other) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - - # only allow ==, != ops - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, - type(other).__name__)) - if isinstance(other, np.ndarray): - return PyObject_RichCompare(np.array([self]), other, op) - return PyObject_RichCompare(other, self, _reverse_ops[op]) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) - - return _cmp_scalar(self.value, ots.value, op) - - def _ensure_components(_Timedelta self): - """ - compute the components - """ - cdef int64_t sfrac, ifrac, frac, ivalue = self.value - - if self.is_populated: - return - - # put frac in seconds - frac = ivalue /(1000 *1000 *1000) - if frac < 0: - self._sign = -1 - - # even fraction - if (-frac % 86400) != 0: - self._d = -frac /86400 + 1 - frac += 86400 *self._d - else: - frac = -frac - else: - self._sign = 1 - self._d = 0 - - if frac >= 86400: - self._d += frac / 86400 - frac -= self._d * 86400 - - if frac >= 3600: - self._h = frac / 3600 - frac -= self._h * 3600 - else: - self._h = 0 - - if frac >= 60: - self._m = frac / 60 - frac -= self._m * 60 - else: - self._m = 0 - - if frac >= 0: - self._s = frac - frac -= self._s - else: - self._s = 0 - - sfrac = (self._h * 3600 + self._m * 60 - + self._s) * (1000 * 1000 * 1000) - if self._sign < 0: - ifrac = ivalue + self._d *DAY_NS - sfrac - else: - ifrac = ivalue - (self._d *DAY_NS + sfrac) - - if ifrac != 0: - self._ms = ifrac /(1000 *1000) - ifrac -= self._ms *1000 *1000 - self._us = ifrac /1000 - ifrac -= self._us *1000 - self._ns = ifrac - else: - self._ms = 0 - self._us = 0 - self._ns = 0 - - self.is_populated = 1 - - cpdef timedelta to_pytimedelta(_Timedelta self): - """ - return an actual datetime.timedelta object - note: we lose nanosecond resolution if any - """ - return timedelta(microseconds=int(self.value) /1000) - - cpdef bint _has_ns(self): - return self.value % 1000 != 0 - -# components named tuple -Components = collections.namedtuple('Components', [ - 'days', 'hours', 'minutes', 'seconds', - 'milliseconds', 'microseconds', 'nanoseconds']) - -# Python front end to C extension type _Timedelta -# This serves as the box for timedelta64 - - -class Timedelta(_Timedelta): - """ - Represents a duration, the difference between two dates or times. - - Timedelta is the pandas equivalent of python's ``datetime.timedelta`` - and is interchangable with it in most cases. - - Parameters - ---------- - value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : string, [D,h,m,s,ms,us,ns] - Denote the unit of the input, if input is an integer. Default 'ns'. - days, seconds, microseconds, - milliseconds, minutes, hours, weeks : numeric, optional - Values for construction in compat with datetime.timedelta. - np ints and floats will be coereced to python ints and floats. - - Notes - ----- - The ``.value`` attribute is always in ns. - - """ - - def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta td_base - - if value is _no_input: - if not len(kwargs): - raise ValueError( - "cannot construct a Timedelta without a value/unit or " - "descriptive keywords (days,seconds....)") - - def _to_py_int_float(v): - if is_integer_object(v): - return int(v) - elif is_float_object(v): - return float(v) - raise TypeError( - "Invalid type {0}. Must be int or float.".format(type(v))) - - kwargs = dict([ (k, _to_py_int_float(v)) - for k, v in iteritems(kwargs) ]) - - try: - nano = kwargs.pop('nanoseconds', 0) - value = convert_to_timedelta64( - timedelta(**kwargs), 'ns') + nano - except TypeError as e: - raise ValueError("cannot construct a Timedelta from the " - "passed arguments, allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]") - - if isinstance(value, Timedelta): - value = value.value - elif util.is_string_object(value): - value = np.timedelta64(parse_timedelta_string(value)) - elif isinstance(value, timedelta): - value = convert_to_timedelta64(value, 'ns') - elif isinstance(value, np.timedelta64): - if unit is not None: - value = value.astype('timedelta64[{0}]'.format(unit)) - value = value.astype('timedelta64[ns]') - elif hasattr(value, 'delta'): - value = np.timedelta64(_delta_to_nanoseconds(value.delta), 'ns') - elif is_integer_object(value) or util.is_float_object(value): - # unit=None is de-facto 'ns' - value = convert_to_timedelta64(value, unit) - elif _checknull_with_nat(value): - return NaT - else: - raise ValueError( - "Value must be Timedelta, string, integer, " - "float, timedelta or convertible") - - if isinstance(value, np.timedelta64): - value = value.view('i8') - - # nat - if value == NPY_NAT: - return NaT - - # make timedelta happy - td_base = _Timedelta.__new__(cls, microseconds=int(value) /1000) - td_base.value = value - td_base.is_populated = 0 - return td_base - - @property - def delta(self): - """ return out delta in ns (for internal compat) """ - return self.value - - @property - def asm8(self): - """ return a numpy timedelta64 array view of myself """ - return np.int64(self.value).view('m8[ns]') - - @property - def resolution(self): - """ return a string representing the lowest resolution that we have """ - - self._ensure_components() - if self._ns: - return "N" - elif self._us: - return "U" - elif self._ms: - return "L" - elif self._s: - return "S" - elif self._m: - return "T" - elif self._h: - return "H" - else: - return "D" - - def _round(self, freq, rounder): - - cdef int64_t result, unit - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos - result = unit *rounder(self.value /float(unit)) - return Timedelta(result, unit='ns') - - def round(self, freq): - """ - Round the Timedelta to the specified resolution - - Returns - ------- - a new Timedelta rounded to the given resolution of `freq` - - Parameters - ---------- - freq : a freq string indicating the rounding resolution - - Raises - ------ - ValueError if the freq cannot be converted - """ - return self._round(freq, np.round) - - def floor(self, freq): - """ - return a new Timedelta floored to this resolution - - Parameters - ---------- - freq : a freq string indicating the flooring resolution - """ - return self._round(freq, np.floor) - - def ceil(self, freq): - """ - return a new Timedelta ceiled to this resolution - - Parameters - ---------- - freq : a freq string indicating the ceiling resolution - """ - return self._round(freq, np.ceil) - - def _repr_base(self, format=None): - """ - - Parameters - ---------- - format : None|all|even_day|sub_day|long - - Returns - ------- - converted : string of a Timedelta - - """ - cdef object sign_pretty, sign2_pretty, seconds_pretty, subs - - self._ensure_components() - - if self._sign < 0: - sign_pretty = "-" - sign2_pretty = " +" - else: - sign_pretty = "" - sign2_pretty = " " - - # show everything - if format == 'all': - seconds_pretty = "%02d.%03d%03d%03d" % ( - self._s, self._ms, self._us, self._ns) - return "%s%d days%s%02d:%02d:%s" % (sign_pretty, self._d, - sign2_pretty, self._h, - self._m, seconds_pretty) - - # by default not showing nano - if self._ms or self._us or self._ns: - seconds_pretty = "%02d.%03d%03d" % (self._s, self._ms, self._us) - else: - seconds_pretty = "%02d" % self._s - - # if we have a partial day - subs = (self._h or self._m or self._s or - self._ms or self._us or self._ns) - - if format == 'even_day': - if not subs: - return "%s%d days" % (sign_pretty, self._d) - - elif format == 'sub_day': - if not self._d: - - # degenerate, don't need the extra space - if self._sign > 0: - sign2_pretty = "" - return "%s%s%02d:%02d:%s" % (sign_pretty, sign2_pretty, - self._h, self._m, seconds_pretty) - - if subs or format=='long': - return "%s%d days%s%02d:%02d:%s" % (sign_pretty, self._d, - sign2_pretty, self._h, - self._m, seconds_pretty) - return "%s%d days" % (sign_pretty, self._d) - - def __repr__(self): - return "Timedelta('{0}')".format(self._repr_base(format='long')) - def __str__(self): - return self._repr_base(format='long') - - @property - def components(self): - """ Return a Components NamedTuple-like """ - self._ensure_components() - if self._sign < 0: - return Components(-self._d, self._h, self._m, self._s, - self._ms, self._us, self._ns) - - # return the named tuple - return Components(self._d, self._h, self._m, self._s, - self._ms, self._us, self._ns) - - @property - def days(self): - """ - Number of Days - - .components will return the shown components - """ - self._ensure_components() - if self._sign < 0: - return -1 *self._d - return self._d - - @property - def seconds(self): - """ - Number of seconds (>= 0 and less than 1 day). - - .components will return the shown components - """ - self._ensure_components() - return self._h *3600 + self._m *60 + self._s - - @property - def microseconds(self): - """ - Number of microseconds (>= 0 and less than 1 second). - - .components will return the shown components - """ - self._ensure_components() - return self._ms *1000 + self._us - - @property - def nanoseconds(self): - """ - Number of nanoseconds (>= 0 and less than 1 microsecond). - - .components will return the shown components - """ - self._ensure_components() - return self._ns - - def total_seconds(self): - """ - Total duration of timedelta in seconds (to ns precision) - """ - return 1e-9 *self.value - - def isoformat(self): - """ - Format Timedelta as ISO 8601 Duration like - `P[n]Y[n]M[n]DT[n]H[n]M[n]S`, where the `[n]`s are replaced by the - values. See https://en.wikipedia.org/wiki/ISO_8601#Durations - - .. versionadded:: 0.20.0 - - Returns - ------- - formatted : str - - Notes - ----- - The longest component is days, whose value may be larger than - 365. - Every component is always included, even if its value is 0. - Pandas uses nanosecond precision, so up to 9 decimal places may - be included in the seconds component. - Trailing 0's are removed from the seconds component after the decimal. - We do not 0 pad components, so it's `...T5H...`, not `...T05H...` - - Examples - -------- - >>> td = pd.Timedelta(days=6, minutes=50, seconds=3, - ... milliseconds=10, microseconds=10, nanoseconds=12) - >>> td.isoformat() - 'P6DT0H50M3.010010012S' - >>> pd.Timedelta(hours=1, seconds=10).isoformat() - 'P0DT0H0M10S' - >>> pd.Timedelta(hours=1, seconds=10).isoformat() - 'P0DT0H0M10S' - >>> pd.Timedelta(days=500.5).isoformat() - 'P500DT12H0MS' - - See Also - -------- - Timestamp.isoformat - """ - components = self.components - seconds = '{}.{:0>3}{:0>3}{:0>3}'.format(components.seconds, - components.milliseconds, - components.microseconds, - components.nanoseconds) - # Trim unnecessary 0s, 1.000000000 -> 1 - seconds = seconds.rstrip('0').rstrip('.') - tpl = 'P{td.days}DT{td.hours}H{td.minutes}M{seconds}S'.format( - td=components, seconds=seconds) - return tpl - - def __setstate__(self, state): - (value) = state - self.value = value - - def __reduce__(self): - object_state = self.value, - return (Timedelta, object_state) - - def view(self, dtype): - """ array view compat """ - return np.timedelta64(self.value).view(dtype) - - def to_timedelta64(self): - """ Returns a numpy.timedelta64 object with 'ns' precision """ - return np.timedelta64(self.value, 'ns') - - def _validate_ops_compat(self, other): - # return True if we are compat with operating - if _checknull_with_nat(other): - return True - elif isinstance(other, (Timedelta, timedelta, np.timedelta64)): - return True - elif util.is_string_object(other): - return True - elif hasattr(other, 'delta'): - return True - return False - - # higher than np.ndarray and np.matrix - __array_priority__ = 100 - - def _binary_op_method_timedeltalike(op, name): - # define a binary operation that only works if the other argument is - # timedelta like or an array of timedeltalike - def f(self, other): - # an offset - if hasattr(other, 'delta') and not isinstance(other, Timedelta): - return op(self, other.delta) - - # a datetimelike - if (isinstance(other, (datetime, np.datetime64)) - and not isinstance(other, (Timestamp, NaTType))): - return op(self, Timestamp(other)) - - # nd-array like - if hasattr(other, 'dtype'): - if other.dtype.kind not in ['m', 'M']: - # raise rathering than letting numpy return wrong answer - return NotImplemented - return op(self.to_timedelta64(), other) - - if not self._validate_ops_compat(other): - return NotImplemented - - if other is NaT: - return NaT - - try: - other = Timedelta(other) - except ValueError: - # failed to parse as timedelta - return NotImplemented - - return Timedelta(op(self.value, other.value), unit='ns') - - f.__name__ = name - return f - - __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') - __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') - __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') - __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') - - def __mul__(self, other): - - # nd-array like - if hasattr(other, 'dtype'): - return other * self.to_timedelta64() - - if other is NaT: - return NaT - - # only integers and floats allowed - if not (is_integer_object(other) or is_float_object(other)): - return NotImplemented - - return Timedelta(other * self.value, unit='ns') - - __rmul__ = __mul__ - - def __truediv__(self, other): - - if hasattr(other, 'dtype'): - return self.to_timedelta64() / other - - # integers or floats - if is_integer_object(other) or is_float_object(other): - return Timedelta(self.value /other, unit='ns') - - if not self._validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return np.nan - return self.value /float(other.value) - - def __rtruediv__(self, other): - if hasattr(other, 'dtype'): - return other / self.to_timedelta64() - - if not self._validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return NaT - return float(other.value) / self.value - - if not PY3: - __div__ = __truediv__ - __rdiv__ = __rtruediv__ - - def _not_implemented(self, *args, **kwargs): - return NotImplemented - - __floordiv__ = _not_implemented - __rfloordiv__ = _not_implemented - - def _op_unary_method(func, name): - - def f(self): - return Timedelta(func(self.value), unit='ns') - f.__name__ = name - return f - - __inv__ = _op_unary_method(lambda x: -x, '__inv__') - __neg__ = _op_unary_method(lambda x: -x, '__neg__') - __pos__ = _op_unary_method(lambda x: x, '__pos__') - __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') - -# resolution in ns -Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) -Timedelta.max = Timedelta(np.iinfo(np.int64).max) - -cdef PyTypeObject* td_type = Timedelta - - -cdef inline bint is_timedelta(object o): - return Py_TYPE(o) == td_type # isinstance(o, Timedelta) - - -cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): - """ - Convert an ndarray to an array of timedeltas. If errors == 'coerce', - coerce non-convertible objects to NaT. Otherwise, raise. - """ - - cdef: - Py_ssize_t i, n - ndarray[int64_t] iresult - - if errors not in ('ignore', 'raise', 'coerce'): - raise ValueError("errors must be one of 'ignore', " - "'raise', or 'coerce'}") - - n = values.shape[0] - result = np.empty(n, dtype='m8[ns]') - iresult = result.view('i8') - - # Usually, we have all strings. If so, we hit the fast path. - # If this path fails, we try conversion a different way, and - # this is where all of the error handling will take place. - try: - for i in range(n): - result[i] = parse_timedelta_string(values[i]) - except: - for i in range(n): - try: - result[i] = convert_to_timedelta64(values[i], unit) - except ValueError: - if errors == 'coerce': - result[i] = NPY_NAT - else: - raise - - return iresult - -cdef dict timedelta_abbrevs = { 'D': 'd', - 'd': 'd', - 'days': 'd', - 'day': 'd', - 'hours': 'h', - 'hour': 'h', - 'hr': 'h', - 'h': 'h', - 'm': 'm', - 'minute': 'm', - 'min': 'm', - 'minutes': 'm', - 's': 's', - 'seconds': 's', - 'sec': 's', - 'second': 's', - 'ms': 'ms', - 'milliseconds': 'ms', - 'millisecond': 'ms', - 'milli': 'ms', - 'millis': 'ms', - 'us': 'us', - 'microseconds': 'us', - 'microsecond': 'us', - 'micro': 'us', - 'micros': 'us', - 'ns': 'ns', - 'nanoseconds': 'ns', - 'nano': 'ns', - 'nanos': 'ns', - 'nanosecond': 'ns', - } -timedelta_abbrevs_map = timedelta_abbrevs - -cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): - """ - - Parameters - ---------- - value : int64_t of the timedelta value - neg : boolean if the a negative value - """ - if neg: - return -value - return value - -cdef inline timedelta_from_spec(object number, object frac, object unit): - """ - - Parameters - ---------- - number : a list of number digits - frac : a list of frac digits - unit : a list of unit characters - """ - cdef object n - - try: - unit = ''.join(unit) - unit = timedelta_abbrevs[unit.lower()] - except KeyError: - raise ValueError("invalid abbreviation: {0}".format(unit)) - - n = ''.join(number) + '.' + ''.join(frac) - return cast_from_unit(float(n), unit) - -cdef inline parse_timedelta_string(object ts): - """ - Parse a regular format timedelta string. Return an int64_t (in ns) - or raise a ValueError on an invalid parse. - """ - - cdef: - unicode c - bint neg=0, have_dot=0, have_value=0, have_hhmmss=0 - object current_unit=None - int64_t result=0, m=0, r - list number=[], frac=[], unit=[] - - # neg : tracks if we have a leading negative for the value - # have_dot : tracks if we are processing a dot (either post hhmmss or - # inside an expression) - # have_value : track if we have at least 1 leading unit - # have_hhmmss : tracks if we have a regular format hh:mm:ss - - if len(ts) == 0 or ts in _nat_strings: - return NPY_NAT - - # decode ts if necessary - if not PyUnicode_Check(ts) and not PY3: - ts = str(ts).decode('utf-8') - - for c in ts: - - # skip whitespace / commas - if c == ' ' or c == ',': - pass - - # positive signs are ignored - elif c == '+': - pass - - # neg - elif c == '-': - - if neg or have_value or have_hhmmss: - raise ValueError("only leading negative signs are allowed") - - neg = 1 - - # number (ascii codes) - elif ord(c) >= 48 and ord(c) <= 57: - - if have_dot: - - # we found a dot, but now its just a fraction - if len(unit): - number.append(c) - have_dot = 0 - else: - frac.append(c) - - elif not len(unit): - number.append(c) - - else: - r = timedelta_from_spec(number, frac, unit) - unit, number, frac = [], [c], [] - - result += timedelta_as_neg(r, neg) - - # hh:mm:ss. - elif c == ':': - - # we flip this off if we have a leading value - if have_value: - neg = 0 - - # we are in the pattern hh:mm:ss pattern - if len(number): - if current_unit is None: - current_unit = 'h' - m = 1000000000L * 3600 - elif current_unit == 'h': - current_unit = 'm' - m = 1000000000L * 60 - elif current_unit == 'm': - current_unit = 's' - m = 1000000000L - r = int(''.join(number)) * m - result += timedelta_as_neg(r, neg) - have_hhmmss = 1 - else: - raise ValueError("expecting hh:mm:ss format, " - "received: {0}".format(ts)) - - unit, number = [], [] - - # after the decimal point - elif c == '.': - - if len(number) and current_unit is not None: - - # by definition we had something like - # so we need to evaluate the final field from a - # hh:mm:ss (so current_unit is 'm') - if current_unit != 'm': - raise ValueError("expected hh:mm:ss format before .") - m = 1000000000L - r = int(''.join(number)) * m - result += timedelta_as_neg(r, neg) - have_value = 1 - unit, number, frac = [], [], [] - - have_dot = 1 - - # unit - else: - unit.append(c) - have_value = 1 - have_dot = 0 - - # we had a dot, but we have a fractional - # value since we have an unit - if have_dot and len(unit): - r = timedelta_from_spec(number, frac, unit) - result += timedelta_as_neg(r, neg) - - # we have a dot as part of a regular format - # e.g. hh:mm:ss.fffffff - elif have_dot: - - if ((len(number) or len(frac)) and not len(unit) - and current_unit is None): - raise ValueError("no units specified") - - if len(frac) > 0 and len(frac) <= 3: - m = 10**(3 -len(frac)) * 1000L * 1000L - elif len(frac) > 3 and len(frac) <= 6: - m = 10**(6 -len(frac)) * 1000L - else: - m = 10**(9 -len(frac)) - - r = int(''.join(frac)) * m - result += timedelta_as_neg(r, neg) - - # we have a regular format - # we must have seconds at this point (hence the unit is still 'm') - elif current_unit is not None: - if current_unit != 'm': - raise ValueError("expected hh:mm:ss format") - m = 1000000000L - r = int(''.join(number)) * m - result += timedelta_as_neg(r, neg) - - # we have a last abbreviation - elif len(unit): - if len(number): - r = timedelta_from_spec(number, frac, unit) - result += timedelta_as_neg(r, neg) - else: - raise ValueError("unit abbreviation w/o a number") - - # treat as nanoseconds - # but only if we don't have anything else - else: - if have_value: - raise ValueError("have leftover units") - if len(number): - r = timedelta_from_spec(number, frac, 'ns') - result += timedelta_as_neg(r, neg) - - return result - -cpdef convert_to_timedelta64(object ts, object unit): - """ - Convert an incoming object to a timedelta64 if possible - - Handle these types of objects: - - timedelta/Timedelta - - timedelta64 - - an offset - - np.int64 (with unit providing a possible modifier) - - None/NaT - - Return an ns based int64 - - # kludgy here until we have a timedelta scalar - # handle the numpy < 1.7 case - """ - if _checknull_with_nat(ts): - return np.timedelta64(NPY_NAT) - elif isinstance(ts, Timedelta): - # already in the proper format - ts = np.timedelta64(ts.value) - elif util.is_datetime64_object(ts): - # only accept a NaT here - if ts.astype('int64') == NPY_NAT: - return np.timedelta64(NPY_NAT) - elif isinstance(ts, np.timedelta64): - ts = ts.astype("m8[{0}]".format(unit.lower())) - elif is_integer_object(ts): - if ts == NPY_NAT: - return np.timedelta64(NPY_NAT) - else: - if util.is_array(ts): - ts = ts.astype('int64').item() - if unit in ['Y', 'M', 'W']: - ts = np.timedelta64(ts, unit) - else: - ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts) - elif is_float_object(ts): - if util.is_array(ts): - ts = ts.astype('int64').item() - if unit in ['Y', 'M', 'W']: - ts = np.timedelta64(int(ts), unit) - else: - ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts) - elif util.is_string_object(ts): - ts = np.timedelta64(parse_timedelta_string(ts)) - elif hasattr(ts, 'delta'): - ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') - - if isinstance(ts, timedelta): - ts = np.timedelta64(ts) - elif not isinstance(ts, np.timedelta64): - raise ValueError("Invalid type for timedelta " - "scalar: %s" % type(ts)) - return ts.astype('timedelta64[ns]') - - -def array_strptime(ndarray[object] values, object fmt, - bint exact=True, errors='raise'): - """ - Parameters - ---------- - values : ndarray of string-like objects - fmt : string-like regex - exact : matches must be exact if True, search if False - coerce : if invalid values found, coerce to NaT - """ - - cdef: - Py_ssize_t i, n = len(values) - pandas_datetimestruct dts - ndarray[int64_t] iresult - int year, month, day, minute, hour, second, weekday, julian, tz - int week_of_year, week_of_year_start - int64_t us, ns - object val, group_key, ampm, found - dict found_key - bint is_raise = errors=='raise' - bint is_ignore = errors=='ignore' - bint is_coerce = errors=='coerce' - - assert is_raise or is_ignore or is_coerce - - global _TimeRE_cache, _regex_cache - with _cache_lock: - if _getlang() != _TimeRE_cache.locale_time.lang: - _TimeRE_cache = TimeRE() - _regex_cache.clear() - if len(_regex_cache) > _CACHE_MAX_SIZE: - _regex_cache.clear() - locale_time = _TimeRE_cache.locale_time - format_regex = _regex_cache.get(fmt) - if not format_regex: - try: - format_regex = _TimeRE_cache.compile(fmt) - # KeyError raised when a bad format is found; can be specified as - # \\, in which case it was a stray % but with a space after it - except KeyError, err: - bad_directive = err.args[0] - if bad_directive == "\\": - bad_directive = "%" - del err - raise ValueError("'%s' is a bad directive in format '%s'" % - (bad_directive, fmt)) - # IndexError only occurs when the format string is "%" - except IndexError: - raise ValueError("stray %% in format '%s'" % fmt) - _regex_cache[fmt] = format_regex - - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') - - dts.us = dts.ps = dts.as = 0 - - cdef dict _parse_code_table = { - 'y': 0, - 'Y': 1, - 'm': 2, - 'B': 3, - 'b': 4, - 'd': 5, - 'H': 6, - 'I': 7, - 'M': 8, - 'S': 9, - 'f': 10, - 'A': 11, - 'a': 12, - 'w': 13, - 'j': 14, - 'U': 15, - 'W': 16, - 'Z': 17, - 'p': 18 # just an additional key, works only with I - } - cdef int parse_code - - for i in range(n): - val = values[i] - if util.is_string_object(val): - if val in _nat_strings: - iresult[i] = NPY_NAT - continue - else: - if _checknull_with_nat(val): - iresult[i] = NPY_NAT - continue - else: - val = str(val) - - # exact matching - if exact: - found = format_regex.match(val) - if not found: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise ValueError("time data %r does not match " - "format %r (match)" % (values[i], fmt)) - if len(val) != found.end(): - if is_coerce: - iresult[i] = NPY_NAT - continue - raise ValueError("unconverted data remains: %s" % - values[i][found.end():]) - - # search - else: - found = format_regex.search(val) - if not found: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise ValueError("time data %r does not match format " - "%r (search)" % (values[i], fmt)) - - year = 1900 - month = day = 1 - hour = minute = second = ns = us = 0 - tz = -1 - # Default to -1 to signify that values not known; not critical to have, - # though - week_of_year = -1 - week_of_year_start = -1 - # weekday and julian defaulted to -1 so as to signal need to calculate - # values - weekday = julian = -1 - found_dict = found.groupdict() - for group_key in found_dict.iterkeys(): - # Directives not explicitly handled below: - # c, x, X - # handled by making out of other directives - # U, W - # worthless without day of the week - parse_code = _parse_code_table[group_key] - - if parse_code == 0: - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 - else: - year += 1900 - elif parse_code == 1: - year = int(found_dict['Y']) - elif parse_code == 2: - month = int(found_dict['m']) - elif parse_code == 3: - # elif group_key == 'B': - month = locale_time.f_month.index(found_dict['B'].lower()) - elif parse_code == 4: - # elif group_key == 'b': - month = locale_time.a_month.index(found_dict['b'].lower()) - elif parse_code == 5: - # elif group_key == 'd': - day = int(found_dict['d']) - elif parse_code == 6: - # elif group_key == 'H': - hour = int(found_dict['H']) - elif parse_code == 7: - hour = int(found_dict['I']) - ampm = found_dict.get('p', '').lower() - # If there was no AM/PM indicator, we'll treat this like AM - if ampm in ('', locale_time.am_pm[0]): - # We're in AM so the hour is correct unless we're - # looking at 12 midnight. - # 12 midnight == 12 AM == hour 0 - if hour == 12: - hour = 0 - elif ampm == locale_time.am_pm[1]: - # We're in PM so we need to add 12 to the hour unless - # we're looking at 12 noon. - # 12 noon == 12 PM == hour 12 - if hour != 12: - hour += 12 - elif parse_code == 8: - minute = int(found_dict['M']) - elif parse_code == 9: - second = int(found_dict['S']) - elif parse_code == 10: - s = found_dict['f'] - # Pad to always return nanoseconds - s += "0" * (9 - len(s)) - us = long(s) - ns = us % 1000 - us = us / 1000 - elif parse_code == 11: - weekday = locale_time.f_weekday.index(found_dict['A'].lower()) - elif parse_code == 12: - weekday = locale_time.a_weekday.index(found_dict['a'].lower()) - elif parse_code == 13: - weekday = int(found_dict['w']) - if weekday == 0: - weekday = 6 - else: - weekday -= 1 - elif parse_code == 14: - julian = int(found_dict['j']) - elif parse_code == 15 or parse_code == 16: - week_of_year = int(found_dict[group_key]) - if group_key == 'U': - # U starts week on Sunday. - week_of_year_start = 6 - else: - # W starts week on Monday. - week_of_year_start = 0 - elif parse_code == 17: - # Since -1 is default value only need to worry about setting tz - # if it can be something other than -1. - found_zone = found_dict['Z'].lower() - for value, tz_values in enumerate(locale_time.timezone): - if found_zone in tz_values: - # Deal w/ bad locale setup where timezone names are the - # same and yet time.daylight is true; too ambiguous to - # be able to tell what timezone has daylight savings - if (time.tzname[0] == time.tzname[1] and - time.daylight and found_zone not in ( - "utc", "gmt")): - break - else: - tz = value - break - # If we know the wk of the year and what day of that wk, we can figure - # out the Julian day of the year. - if julian == -1 and week_of_year != -1 and weekday != -1: - week_starts_Mon = True if week_of_year_start == 0 else False - julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, - week_starts_Mon) - # Cannot pre-calculate datetime_date() since can change in Julian - # calculation and thus could have different value for the day of the wk - # calculation. - try: - if julian == -1: - # Need to add 1 to result since first day of the year is 1, not - # 0. - julian = datetime_date(year, month, day).toordinal() - \ - datetime_date(year, 1, 1).toordinal() + 1 - else: # Assume that if they bothered to include Julian day it will - # be accurate. - datetime_result = datetime_date.fromordinal( - (julian - 1) + datetime_date(year, 1, 1).toordinal()) - year = datetime_result.year - month = datetime_result.month - day = datetime_result.day - except ValueError: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - if weekday == -1: - weekday = datetime_date(year, month, day).weekday() - - dts.year = year - dts.month = month - dts.day = day - dts.hour = hour - dts.min = minute - dts.sec = second - dts.us = us - dts.ps = ns * 1000 - - iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - try: - _check_dts_bounds(&dts) - except ValueError: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - - return result - - -#---------------------------------------------------------------------- -# NaT methods/property setups - - -# inject the Timestamp field properties -# these by definition return np.nan -fields = ['year', 'quarter', 'month', 'day', 'hour', - 'minute', 'second', 'millisecond', 'microsecond', 'nanosecond', - 'week', 'dayofyear', 'days_in_month', 'daysinmonth', 'dayofweek', - 'weekday_name'] -for field in fields: - prop = property(fget=lambda self: np.nan) - setattr(NaTType, field, prop) - - -# define how we are handling NaT methods & inject -# to the NaTType class; these can return NaT, np.nan -# or raise respectively -_nat_methods = ['date', 'now', 'replace', 'to_pydatetime', - 'today', 'round', 'floor', 'ceil'] -_nan_methods = ['weekday', 'isoweekday', 'total_seconds'] -_implemented_methods = ['to_datetime', 'to_datetime64', 'isoformat'] -_implemented_methods.extend(_nat_methods) -_implemented_methods.extend(_nan_methods) - - -def _get_docstring(_method_name): - # NaT serves double duty as Timestamp & Timedelta - # missing value, so need to acquire doc-strings for both - - try: - return getattr(Timestamp, _method_name).__doc__ - except AttributeError: - pass - - try: - return getattr(Timedelta, _method_name).__doc__ - except AttributeError: - pass - - return None - - -for _method_name in _nat_methods: - - def _make_nat_func(func_name): - def f(*args, **kwargs): - return NaT - f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) - return f - - setattr(NaTType, _method_name, _make_nat_func(_method_name)) - - -for _method_name in _nan_methods: - - def _make_nan_func(func_name): - def f(*args, **kwargs): - return np.nan - f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) - return f - - setattr(NaTType, _method_name, _make_nan_func(_method_name)) - - -# GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or -# return NaT create functions that raise, for binding to NaTType -for _maybe_method_name in dir(NaTType): - _maybe_method = getattr(NaTType, _maybe_method_name) - if (callable(_maybe_method) - and not _maybe_method_name.startswith("_") - and _maybe_method_name not in _implemented_methods): - - def _make_error_func(func_name): - def f(*args, **kwargs): - raise ValueError("NaTType does not support " + func_name) - f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) - return f - - setattr(NaTType, _maybe_method_name, - _make_error_func(_maybe_method_name)) - - -#---------------------------------------------------------------------- -# Conversion routines - - -def _delta_to_nanoseconds(delta): - if isinstance(delta, np.ndarray): - return delta.astype('m8[ns]').astype('int64') - if hasattr(delta, 'nanos'): - return delta.nanos - if hasattr(delta, 'delta'): - delta = delta.delta - if is_timedelta64_object(delta): - return delta.astype("timedelta64[ns]").item() - if is_integer_object(delta): - return delta - return (delta.days * 24 * 60 * 60 * 1000000 - + delta.seconds * 1000000 - + delta.microseconds) * 1000 - - -cdef inline _get_datetime64_nanos(object val): - cdef: - pandas_datetimestruct dts - PANDAS_DATETIMEUNIT unit - npy_datetime ival - - unit = get_datetime64_unit(val) - ival = get_datetime64_value(val) - - if unit != PANDAS_FR_ns: - pandas_datetime_to_datetimestruct(ival, unit, &dts) - _check_dts_bounds(&dts) - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - else: - return ival - -cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: - """ return a casting of the unit represented to nanoseconds - round the fractional part of a float to our precision, p """ - cdef: - int64_t m - int p - - if unit == 'D' or unit == 'd': - m = 1000000000L * 86400 - p = 6 - elif unit == 'h': - m = 1000000000L * 3600 - p = 6 - elif unit == 'm': - m = 1000000000L * 60 - p = 6 - elif unit == 's': - m = 1000000000L - p = 6 - elif unit == 'ms': - m = 1000000L - p = 3 - elif unit == 'us': - m = 1000L - p = 0 - elif unit == 'ns' or unit is None: - m = 1L - p = 0 - else: - raise ValueError("cannot cast unit {0}".format(unit)) - - # just give me the unit back - if ts is None: - return m - - # cast the unit, multiply base/frace separately - # to avoid precision issues from float -> int - base = ts - frac = ts -base - if p: - frac = round(frac, p) - return (base *m) + (frac *m) - - -def cast_to_nanoseconds(ndarray arr): - cdef: - Py_ssize_t i, n = arr.size - ndarray[int64_t] ivalues, iresult - PANDAS_DATETIMEUNIT unit - pandas_datetimestruct dts - - shape = ( arr).shape - - ivalues = arr.view(np.int64).ravel() - - result = np.empty(shape, dtype='M8[ns]') - iresult = result.ravel().view(np.int64) - - if len(iresult) == 0: - return result - - unit = get_datetime64_unit(arr.flat[0]) - for i in range(n): - if ivalues[i] != NPY_NAT: - pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts) - iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - _check_dts_bounds(&dts) - else: - iresult[i] = NPY_NAT - - return result - - -def pydt_to_i8(object pydt): - """ - Convert to int64 representation compatible with numpy datetime64; converts - to UTC - """ - cdef: - _TSObject ts - - ts = convert_to_tsobject(pydt, None, None, 0, 0) - - return ts.value - - -def i8_to_pydt(int64_t i8, object tzinfo = None): - """ - Inverse of pydt_to_i8 - """ - return Timestamp(i8) - -#---------------------------------------------------------------------- -# time zone conversion helpers - -try: - import pytz - UTC = pytz.utc - have_pytz = True -except: - have_pytz = False - - -def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): - cdef: - ndarray[int64_t] utc_dates, tt, result, trans, deltas - Py_ssize_t i, j, pos, n = len(vals) - ndarray[Py_ssize_t] posn - int64_t v, offset, delta - pandas_datetimestruct dts - - if not have_pytz: - import pytz - - if len(vals) == 0: - return np.array([], dtype=np.int64) - - # Convert to UTC - if _get_zone(tz1) != 'UTC': - utc_dates = np.empty(n, dtype=np.int64) - if _is_tzlocal(tz1): - for i in range(n): - v = vals[i] - if v == NPY_NAT: - utc_dates[i] = NPY_NAT - else: - pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz1) - delta = (int(total_seconds(_get_utcoffset(tz1, dt))) - * 1000000000) - utc_dates[i] = v - delta - else: - trans, deltas, typ = _get_dst_info(tz1) - - # all-NaT - tt = vals[vals!=NPY_NAT] - if not len(tt): - return vals - - posn = trans.searchsorted(tt, side='right') - j = 0 - for i in range(n): - v = vals[i] - if v == NPY_NAT: - utc_dates[i] = NPY_NAT - else: - pos = posn[j] - 1 - j = j + 1 - if pos < 0: - raise ValueError('First time before start of DST info') - offset = deltas[pos] - utc_dates[i] = v - offset - else: - utc_dates = vals - - if _get_zone(tz2) == 'UTC': - return utc_dates - - result = np.zeros(n, dtype=np.int64) - if _is_tzlocal(tz2): - for i in range(n): - v = utc_dates[i] - if v == NPY_NAT: - result[i] = NPY_NAT - else: - pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz2) - delta = int(total_seconds( - _get_utcoffset(tz2, dt))) * 1000000000 - result[i] = v + delta - return result - - # Convert UTC to other timezone - trans, deltas, typ = _get_dst_info(tz2) - - # use first non-NaT element - # if all-NaT, return all-NaT - if (result==NPY_NAT).all(): - return result - - # if all NaT, return all NaT - tt = utc_dates[utc_dates!=NPY_NAT] - if not len(tt): - return utc_dates - - posn = trans.searchsorted(tt, side='right') - - j = 0 - for i in range(n): - v = utc_dates[i] - if vals[i] == NPY_NAT: - result[i] = vals[i] - else: - pos = posn[j] - 1 - j = j + 1 - if pos < 0: - raise ValueError('First time before start of DST info') - offset = deltas[pos] - result[i] = v + offset - return result - - -def tz_convert_single(int64_t val, object tz1, object tz2): - cdef: - ndarray[int64_t] trans, deltas - Py_ssize_t pos - int64_t v, offset, utc_date - pandas_datetimestruct dts - - if not have_pytz: - import pytz - - if val == NPY_NAT: - return val - - # Convert to UTC - if _is_tzlocal(tz1): - pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz1) - delta = int(total_seconds(_get_utcoffset(tz1, dt))) * 1000000000 - utc_date = val - delta - elif _get_zone(tz1) != 'UTC': - trans, deltas, typ = _get_dst_info(tz1) - pos = trans.searchsorted(val, side='right') - 1 - if pos < 0: - raise ValueError('First time before start of DST info') - offset = deltas[pos] - utc_date = val - offset - else: - utc_date = val - - if _get_zone(tz2) == 'UTC': - return utc_date - if _is_tzlocal(tz2): - pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz2) - delta = int(total_seconds(_get_utcoffset(tz2, dt))) * 1000000000 - return utc_date + delta - - # Convert UTC to other timezone - trans, deltas, typ = _get_dst_info(tz2) - - pos = trans.searchsorted(utc_date, side='right') - 1 - if pos < 0: - raise ValueError('First time before start of DST info') - - offset = deltas[pos] - return utc_date + offset - -# Timezone data caches, key is the pytz string or dateutil file name. -dst_cache = {} - -cdef inline bint _treat_tz_as_pytz(object tz): - return hasattr(tz, '_utc_transition_times') and hasattr( - tz, '_transition_info') - -cdef inline bint _treat_tz_as_dateutil(object tz): - return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') - - -def _p_tz_cache_key(tz): - """ Python interface for cache function to facilitate testing.""" - return _tz_cache_key(tz) - - -cdef inline object _tz_cache_key(object tz): - """ - Return the key in the cache for the timezone info object or None - if unknown. - - The key is currently the tz string for pytz timezones, the filename for - dateutil timezones. - - Notes - ===== - This cannot just be the hash of a timezone object. Unfortunately, the - hashes of two dateutil tz objects which represent the same timezone are - not equal (even though the tz objects will compare equal and represent - the same tz file). Also, pytz objects are not always hashable so we use - str(tz) instead. - """ - if isinstance(tz, _pytz_BaseTzInfo): - return tz.zone - elif isinstance(tz, _dateutil_tzfile): - if '.tar.gz' in tz._filename: - raise ValueError('Bad tz filename. Dateutil on python 3 on ' - 'windows has a bug which causes tzfile._filename ' - 'to be the same for all timezone files. Please ' - 'construct dateutil timezones implicitly by ' - 'passing a string like "dateutil/Europe/London" ' - 'when you construct your pandas objects instead ' - 'of passing a timezone object. See ' - 'https://github.com/pandas-dev/pandas/pull/7362') - return 'dateutil' + tz._filename - else: - return None - - -cdef object _get_dst_info(object tz): - """ - return a tuple of : - (UTC times of DST transitions, - UTC offsets in microseconds corresponding to DST transitions, - string of type of transitions) - - """ - cache_key = _tz_cache_key(tz) - if cache_key is None: - num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 - return (np.array([NPY_NAT + 1], dtype=np.int64), - np.array([num], dtype=np.int64), - None) - - if cache_key not in dst_cache: - if _treat_tz_as_pytz(tz): - trans = np.array(tz._utc_transition_times, dtype='M8[ns]') - trans = trans.view('i8') - try: - if tz._utc_transition_times[0].year == 1: - trans[0] = NPY_NAT + 1 - except Exception: - pass - deltas = _unbox_utcoffsets(tz._transition_info) - typ = 'pytz' - - elif _treat_tz_as_dateutil(tz): - if len(tz._trans_list): - # get utc trans times - trans_list = _get_utc_trans_times_from_dateutil_tz(tz) - trans = np.hstack([ - np.array([0], dtype='M8[s]'), # place holder for first item - np.array(trans_list, dtype='M8[s]')]).astype( - 'M8[ns]') # all trans listed - trans = trans.view('i8') - trans[0] = NPY_NAT + 1 - - # deltas - deltas = np.array([v.offset for v in ( - tz._ttinfo_before,) + tz._trans_idx], dtype='i8') - deltas *= 1000000000 - typ = 'dateutil' - - elif _is_fixed_offset(tz): - trans = np.array([NPY_NAT + 1], dtype=np.int64) - deltas = np.array([tz._ttinfo_std.offset], - dtype='i8') * 1000000000 - typ = 'fixed' - else: - trans = np.array([], dtype='M8[ns]') - deltas = np.array([], dtype='i8') - typ = None - - else: - # static tzinfo - trans = np.array([NPY_NAT + 1], dtype=np.int64) - num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 - deltas = np.array([num], dtype=np.int64) - typ = 'static' - - dst_cache[cache_key] = (trans, deltas, typ) - - return dst_cache[cache_key] - -cdef object _get_utc_trans_times_from_dateutil_tz(object tz): - """ - Transition times in dateutil timezones are stored in local non-dst - time. This code converts them to UTC. It's the reverse of the code - in dateutil.tz.tzfile.__init__. - """ - new_trans = list(tz._trans_list) - last_std_offset = 0 - for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)): - if not tti.isdst: - last_std_offset = tti.offset - new_trans[i] = trans - last_std_offset - return new_trans - - -def tot_seconds(td): - return total_seconds(td) - -cpdef ndarray _unbox_utcoffsets(object transinfo): - cdef: - Py_ssize_t i, sz - ndarray[int64_t] arr - - sz = len(transinfo) - arr = np.empty(sz, dtype='i8') - - for i in range(sz): - arr[i] = int(total_seconds(transinfo[i][0])) * 1000000000 - - return arr - - -@cython.boundscheck(False) -@cython.wraparound(False) -def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, - object errors='raise'): - """ - Localize tzinfo-naive DateRange to given time zone (using pytz). If - there are ambiguities in the values, raise AmbiguousTimeError. - - Returns - ------- - localized : DatetimeIndex - """ - cdef: - ndarray[int64_t] trans, deltas, idx_shifted - ndarray ambiguous_array - Py_ssize_t i, idx, pos, ntrans, n = len(vals) - int64_t *tdata - int64_t v, left, right - ndarray[int64_t] result, result_a, result_b, dst_hours - pandas_datetimestruct dts - bint infer_dst = False, is_dst = False, fill = False - bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' - - # Vectorized version of DstTzInfo.localize - - assert is_coerce or is_raise - - if not have_pytz: - raise Exception("Could not find pytz module") - - if tz == UTC or tz is None: - return vals - - result = np.empty(n, dtype=np.int64) - - if _is_tzlocal(tz): - for i in range(n): - v = vals[i] - pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 - result[i] = v - delta - return result - - if isinstance(ambiguous, string_types): - if ambiguous == 'infer': - infer_dst = True - elif ambiguous == 'NaT': - fill = True - elif isinstance(ambiguous, bool): - is_dst = True - if ambiguous: - ambiguous_array = np.ones(len(vals), dtype=bool) - else: - ambiguous_array = np.zeros(len(vals), dtype=bool) - elif hasattr(ambiguous, '__iter__'): - is_dst = True - if len(ambiguous) != len(vals): - raise ValueError( - "Length of ambiguous bool-array must be the same size as vals") - ambiguous_array = np.asarray(ambiguous) - - trans, deltas, typ = _get_dst_info(tz) - - tdata = trans.data - ntrans = len(trans) - - result_a = np.empty(n, dtype=np.int64) - result_b = np.empty(n, dtype=np.int64) - result_a.fill(NPY_NAT) - result_b.fill(NPY_NAT) - - # left side - idx_shifted = (np.maximum(0, trans.searchsorted( - vals - DAY_NS, side='right') - 1)).astype(np.int64) - - for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 - - # timestamp falls to the left side of the DST transition - if v + deltas[pos] == vals[i]: - result_a[i] = v - - # right side - idx_shifted = (np.maximum(0, trans.searchsorted( - vals + DAY_NS, side='right') - 1)).astype(np.int64) - - for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 - - # timestamp falls to the right side of the DST transition - if v + deltas[pos] == vals[i]: - result_b[i] = v - - if infer_dst: - dst_hours = np.empty(n, dtype=np.int64) - dst_hours.fill(NPY_NAT) - - # Get the ambiguous hours (given the above, these are the hours - # where result_a != result_b and neither of them are NAT) - both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) - both_eq = result_a == result_b - trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) - if trans_idx.size == 1: - stamp = Timestamp(vals[trans_idx]) - raise pytz.AmbiguousTimeError( - "Cannot infer dst time from %s as there " - "are no repeated times" % stamp) - # Split the array into contiguous chunks (where the difference between - # indices is 1). These are effectively dst transitions in different - # years which is useful for checking that there is not an ambiguous - # transition in an individual year. - if trans_idx.size > 0: - one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 - trans_grp = np.array_split(trans_idx, one_diff) - - # Iterate through each day, if there are no hours where the - # delta is negative (indicates a repeat of hour) the switch - # cannot be inferred - for grp in trans_grp: - - delta = np.diff(result_a[grp]) - if grp.size == 1 or np.all(delta > 0): - stamp = Timestamp(vals[grp[0]]) - raise pytz.AmbiguousTimeError(stamp) - - # Find the index for the switch and pull from a for dst and b - # for standard - switch_idx = (delta <= 0).nonzero()[0] - if switch_idx.size > 1: - raise pytz.AmbiguousTimeError( - "There are %i dst switches when " - "there should only be 1." % switch_idx.size) - switch_idx = switch_idx[0] + 1 # Pull the only index and adjust - a_idx = grp[:switch_idx] - b_idx = grp[switch_idx:] - dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) - - for i in range(n): - left = result_a[i] - right = result_b[i] - if vals[i] == NPY_NAT: - result[i] = vals[i] - elif left != NPY_NAT and right != NPY_NAT: - if left == right: - result[i] = left - else: - if infer_dst and dst_hours[i] != NPY_NAT: - result[i] = dst_hours[i] - elif is_dst: - if ambiguous_array[i]: - result[i] = left - else: - result[i] = right - elif fill: - result[i] = NPY_NAT - else: - stamp = Timestamp(vals[i]) - raise pytz.AmbiguousTimeError( - "Cannot infer dst time from %r, try using the " - "'ambiguous' argument" % stamp) - elif left != NPY_NAT: - result[i] = left - elif right != NPY_NAT: - result[i] = right - else: - if is_coerce: - result[i] = NPY_NAT - else: - stamp = Timestamp(vals[i]) - raise pytz.NonExistentTimeError(stamp) - - return result - -cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): - cdef Py_ssize_t pivot, left = 0, right = n - - # edge cases - if val > data[n - 1]: - return n - - if val < data[0]: - return 0 - - while left < right: - pivot = left + (right - left) // 2 - - if data[pivot] <= val: - left = pivot + 1 - else: - right = pivot - - return left - - -# Accessors -#---------------------------------------------------------------------- - -def build_field_sarray(ndarray[int64_t] dtindex): - """ - Datetime as int64 representation to a structured array of fields - """ - cdef: - Py_ssize_t i, count = 0 - int isleap - pandas_datetimestruct dts - ndarray[int32_t] years, months, days, hours, minutes, seconds, mus - - count = len(dtindex) - - sa_dtype = [('Y', 'i4'), # year - ('M', 'i4'), # month - ('D', 'i4'), # day - ('h', 'i4'), # hour - ('m', 'i4'), # min - ('s', 'i4'), # second - ('u', 'i4')] # microsecond - - out = np.empty(count, dtype=sa_dtype) - - years = out['Y'] - months = out['M'] - days = out['D'] - hours = out['h'] - minutes = out['m'] - seconds = out['s'] - mus = out['u'] - - for i in range(count): - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - years[i] = dts.year - months[i] = dts.month - days[i] = dts.day - hours[i] = dts.hour - minutes[i] = dts.min - seconds[i] = dts.sec - mus[i] = dts.us - - return out - - -def get_time_micros(ndarray[int64_t] dtindex): - """ - Datetime as int64 representation to a structured array of fields - """ - cdef: - Py_ssize_t i, n = len(dtindex) - pandas_datetimestruct dts - ndarray[int64_t] micros - - micros = np.empty(n, dtype=np.int64) - - for i in range(n): - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - micros[i] = 1000000LL * (dts.hour * 60 * 60 + - 60 * dts.min + dts.sec) + dts.us - - return micros - - -@cython.wraparound(False) -@cython.boundscheck(False) -def get_date_field(ndarray[int64_t] dtindex, object field): - """ - Given a int64-based datetime index, extract the year, month, etc., - field and return an array of these values. - """ - cdef: - _TSObject ts - Py_ssize_t i, count = 0 - ndarray[int32_t] out - ndarray[int32_t, ndim=2] _month_offset - int isleap, isleap_prev - pandas_datetimestruct dts - int mo_off, doy, dow, woy - - _month_offset = np.array( - [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], - [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], - dtype=np.int32 ) - - count = len(dtindex) - out = np.empty(count, dtype='i4') - - if field == 'Y': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.year - return out - - elif field == 'M': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.month - return out - - elif field == 'D': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.day - return out - - elif field == 'h': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.hour - return out - - elif field == 'm': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.min - return out - - elif field == 's': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.sec - return out - - elif field == 'us': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.us - return out - - elif field == 'ns': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.ps / 1000 - return out - elif field == 'doy': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - out[i] = _month_offset[isleap, dts.month -1] + dts.day - return out - - elif field == 'dow': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dayofweek(dts.year, dts.month, dts.day) - return out - - elif field == 'woy': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - isleap_prev = is_leapyear(dts.year - 1) - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - #estimate - woy = (doy - 1) - dow + 3 - if woy >= 0: - woy = woy / 7 + 1 - - # verify - if woy < 0: - if (woy > -2) or (woy == -2 and isleap_prev): - woy = 53 - else: - woy = 52 - elif woy == 53: - if 31 - dts.day + dow < 3: - woy = 1 - - out[i] = woy - return out - - elif field == 'q': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.month - out[i] = ((out[i] - 1) / 3) + 1 - return out - - elif field == 'dim': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = days_in_month(dts) - return out - elif field == 'is_leap_year': - return _isleapyear_arr(get_date_field(dtindex, 'Y')) - - raise ValueError("Field %s not supported" % field) - - -@cython.wraparound(False) -def get_start_end_field(ndarray[int64_t] dtindex, object field, - object freqstr=None, int month_kw=12): - """ - Given an int64-based datetime index return array of indicators - of whether timestamps are at the start/end of the month/quarter/year - (defined by frequency). - """ - cdef: - _TSObject ts - Py_ssize_t i - int count = 0 - bint is_business = 0 - int end_month = 12 - int start_month = 1 - ndarray[int8_t] out - ndarray[int32_t, ndim=2] _month_offset - bint isleap - pandas_datetimestruct dts - int mo_off, dom, doy, dow, ldom - - _month_offset = np.array( - [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], - [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], - dtype=np.int32 ) - - count = len(dtindex) - out = np.zeros(count, dtype='int8') - - if freqstr: - if freqstr == 'C': - raise ValueError( - "Custom business days is not supported by %s" % field) - is_business = freqstr[0] == 'B' - - # YearBegin(), BYearBegin() use month = starting month of year. - # QuarterBegin(), BQuarterBegin() use startingMonth = starting - # month of year. Other offests use month, startingMonth as ending - # month of year. - - if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( - freqstr[1:3] in ['MS', 'QS', 'AS']): - end_month = 12 if month_kw == 1 else month_kw - 1 - start_month = month_kw - else: - end_month = month_kw - start_month = (end_month % 12) + 1 - else: - end_month = 12 - start_month = 1 - - if field == 'is_month_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) - dom = dts.day - dow = ts_dayofweek(ts) - - if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_month_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - dow = ts_dayofweek(ts) - - if (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if ldom == doy: - out[i] = 1 - return out.view(bool) - - elif field == 'is_quarter_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) - dom = dts.day - dow = ts_dayofweek(ts) - - if ((dts.month - start_month) % 3 == 0) and ( - (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if ((dts.month - start_month) % 3 == 0) and dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_quarter_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - dow = ts_dayofweek(ts) - - if ((dts.month - end_month) % 3 == 0) and ( - (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2))): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if ((dts.month - end_month) % 3 == 0) and (ldom == doy): - out[i] = 1 - return out.view(bool) - - elif field == 'is_year_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) - dom = dts.day - dow = ts_dayofweek(ts) - - if (dts.month == start_month) and ( - (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if (dts.month == start_month) and dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_year_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) - isleap = is_leapyear(dts.year) - dom = dts.day - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dom - dow = ts_dayofweek(ts) - ldom = _month_offset[isleap, dts.month] - - if (dts.month == end_month) and ( - (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2))): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if (dts.month == end_month) and (ldom == doy): - out[i] = 1 - return out.view(bool) - - raise ValueError("Field %s not supported" % field) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def get_date_name_field(ndarray[int64_t] dtindex, object field): - """ - Given a int64-based datetime index, return array of strings of date - name based on requested field (e.g. weekday_name) - """ - cdef: - _TSObject ts - Py_ssize_t i, count = 0 - ndarray[object] out - pandas_datetimestruct dts - int dow - - _dayname = np.array( - ['Monday', 'Tuesday', 'Wednesday', 'Thursday', - 'Friday', 'Saturday', 'Sunday'], - dtype=np.object_ ) - - count = len(dtindex) - out = np.empty(count, dtype=object) - - if field == 'weekday_name': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = np.nan; continue - - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - dow = dayofweek(dts.year, dts.month, dts.day) - out[i] = _dayname[dow] - return out - - raise ValueError("Field %s not supported" % field) - - -cdef inline int m8_weekday(int64_t val): - ts = convert_to_tsobject(val, None, None, 0, 0) - return ts_dayofweek(ts) - -cdef int64_t DAY_NS = 86400000000000LL - - -@cython.wraparound(False) -@cython.boundscheck(False) -def date_normalize(ndarray[int64_t] stamps, tz=None): - cdef: - Py_ssize_t i, n = len(stamps) - pandas_datetimestruct dts - _TSObject tso - ndarray[int64_t] result = np.empty(n, dtype=np.int64) - - if tz is not None: - tso = _TSObject() - tz = maybe_get_tz(tz) - result = _normalize_local(stamps, tz) - else: - with nogil: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct( - stamps[i], PANDAS_FR_ns, &dts) - result[i] = _normalized_stamp(&dts) - - return result - - -@cython.wraparound(False) -@cython.boundscheck(False) -cdef _normalize_local(ndarray[int64_t] stamps, object tz): - cdef: - Py_ssize_t n = len(stamps) - ndarray[int64_t] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] trans, deltas, pos - pandas_datetimestruct dts - - if _is_utc(tz): - with nogil: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct( - stamps[i], PANDAS_FR_ns, &dts) - result[i] = _normalized_stamp(&dts) - elif _is_tzlocal(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 - pandas_datetime_to_datetimestruct(stamps[i] + delta, - PANDAS_FR_ns, &dts) - result[i] = _normalized_stamp(&dts) - else: - # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = _get_dst_info(tz) - - _pos = trans.searchsorted(stamps, side='right') - 1 - if _pos.dtype != np.int64: - _pos = _pos.astype(np.int64) - pos = _pos - - # statictzinfo - if typ not in ['pytz', 'dateutil']: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i] + deltas[0], - PANDAS_FR_ns, &dts) - result[i] = _normalized_stamp(&dts) - else: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos[i]], - PANDAS_FR_ns, &dts) - result[i] = _normalized_stamp(&dts) - - return result - -cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil: - dts.hour = 0 - dts.min = 0 - dts.sec = 0 - dts.us = 0 - dts.ps = 0 - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) - - -def dates_normalized(ndarray[int64_t] stamps, tz=None): - cdef: - Py_ssize_t i, n = len(stamps) - pandas_datetimestruct dts - - if tz is None or _is_utc(tz): - for i in range(n): - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) - if (dts.hour + dts.min + dts.sec + dts.us) > 0: - return False - elif _is_tzlocal(tz): - for i in range(n): - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tz) - dt = dt + tz.utcoffset(dt) - if (dt.hour + dt.minute + dt.second + dt.microsecond) > 0: - return False - else: - trans, deltas, typ = _get_dst_info(tz) - - for i in range(n): - # Adjust datetime64 timestamp, recompute datetimestruct - pos = trans.searchsorted(stamps[i]) - 1 - inf = tz._transition_info[pos] - - pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos], - PANDAS_FR_ns, &dts) - if (dts.hour + dts.min + dts.sec + dts.us) > 0: - return False - - return True - -# Some general helper functions -#---------------------------------------------------------------------- - - -cpdef _isleapyear_arr(ndarray years): - cdef: - ndarray[int8_t] out - - # to make NaT result as False - out = np.zeros(len(years), dtype='int8') - out[np.logical_or(years % 400 == 0, - np.logical_and(years % 4 == 0, - years % 100 > 0))] = 1 - return out.view(bool) - - -def monthrange(int64_t year, int64_t month): - cdef: - int64_t days - int64_t day_of_week - - if month < 1 or month > 12: - raise ValueError("bad month number 0; must be 1-12") - - days = days_per_month_table[is_leapyear(year)][month -1] - - return (dayofweek(year, month, 1), days) - -cdef inline int64_t ts_dayofweek(_TSObject ts): - return dayofweek(ts.dts.year, ts.dts.month, ts.dts.day) - -cdef inline int days_in_month(pandas_datetimestruct dts) nogil: - return days_per_month_table[is_leapyear(dts.year)][dts.month -1] - -cpdef normalize_date(object dt): - """ - Normalize datetime.datetime value to midnight. Returns datetime.date as a - datetime.datetime at midnight - - Returns - ------- - normalized : datetime.datetime or Timestamp - """ - if is_timestamp(dt): - return dt.replace(hour=0, minute=0, second=0, microsecond=0, - nanosecond=0) - elif PyDateTime_Check(dt): - return dt.replace(hour=0, minute=0, second=0, microsecond=0) - elif PyDate_Check(dt): - return datetime(dt.year, dt.month, dt.day) - else: - raise TypeError('Unrecognized type: %s' % type(dt)) - - -cdef inline int _year_add_months(pandas_datetimestruct dts, - int months) nogil: - """new year number after shifting pandas_datetimestruct number of months""" - return dts.year + (dts.month + months - 1) / 12 - -cdef inline int _month_add_months(pandas_datetimestruct dts, - int months) nogil: - """ - New month number after shifting pandas_datetimestruct - number of months. - """ - cdef int new_month = (dts.month + months) % 12 - return 12 if new_month == 0 else new_month - - -@cython.wraparound(False) -@cython.boundscheck(False) -def shift_months(int64_t[:] dtindex, int months, object day=None): - """ - Given an int64-based datetime index, shift all elements - specified number of months using DateOffset semantics - - day: {None, 'start', 'end'} - * None: day of month - * 'start' 1st day of month - * 'end' last day of month - """ - cdef: - Py_ssize_t i - pandas_datetimestruct dts - int count = len(dtindex) - int months_to_roll - bint roll_check - int64_t[:] out = np.empty(count, dtype='int64') - - if day is None: - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = NPY_NAT; continue - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dts.year = _year_add_months(dts, months) - dts.month = _month_add_months(dts, months) - - dts.day = min(dts.day, days_in_month(dts)) - out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - elif day == 'start': - roll_check = False - if months <= 0: - months += 1 - roll_check = True - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = NPY_NAT; continue - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - months_to_roll = months - - # offset semantics - if on the anchor point and going backwards - # shift to next - if roll_check and dts.day == 1: - months_to_roll -= 1 - - dts.year = _year_add_months(dts, months_to_roll) - dts.month = _month_add_months(dts, months_to_roll) - dts.day = 1 - - out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - elif day == 'end': - roll_check = False - if months > 0: - months -= 1 - roll_check = True - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = NPY_NAT; continue - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - months_to_roll = months - - # similar semantics - when adding shift forward by one - # month if already at an end of month - if roll_check and dts.day == days_in_month(dts): - months_to_roll += 1 - - dts.year = _year_add_months(dts, months_to_roll) - dts.month = _month_add_months(dts, months_to_roll) - - dts.day = days_in_month(dts) - out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - else: - raise ValueError("day must be None, 'start' or 'end'") - - return np.asarray(out) - -#---------------------------------------------------------------------- -# Don't even ask - -"""Strptime-related classes and functions. - -CLASSES: - LocaleTime -- Discovers and stores locale-specific time information - TimeRE -- Creates regexes for pattern matching a string of text containing - time information - -FUNCTIONS: - _getlang -- Figure out what language is being used for the locale - strptime -- Calculates the time struct represented by the passed-in string - -""" -import time -import locale -import calendar -from re import compile as re_compile -from re import IGNORECASE -from re import escape as re_escape -from datetime import date as datetime_date - -# Python 2 vs Python 3 -try: - from thread import allocate_lock as _thread_allocate_lock -except: - try: - from _thread import allocate_lock as _thread_allocate_lock - except: - try: - from dummy_thread import allocate_lock as _thread_allocate_lock - except: - from _dummy_thread import allocate_lock as _thread_allocate_lock - -__all__ = [] - - -def _getlang(): - # Figure out what the current language is set to. - return locale.getlocale(locale.LC_TIME) - - -class LocaleTime(object): - """Stores and handles locale-specific information related to time. - - ATTRIBUTES: - f_weekday -- full weekday names (7-item list) - a_weekday -- abbreviated weekday names (7-item list) - f_month -- full month names (13-item list; dummy value in [0], which - is added by code) - a_month -- abbreviated month names (13-item list, dummy value in - [0], which is added by code) - am_pm -- AM/PM representation (2-item list) - LC_date_time -- format string for date/time representation (string) - LC_date -- format string for date representation (string) - LC_time -- format string for time representation (string) - timezone -- daylight- and non-daylight-savings timezone representation - (2-item list of sets) - lang -- Language used by instance (2-item tuple) - """ - - def __init__(self): - """Set all attributes. - - Order of methods called matters for dependency reasons. - - The locale language is set at the offset and then checked again before - exiting. This is to make sure that the attributes were not set with a - mix of information from more than one locale. This would most likely - happen when using threads where one thread calls a locale-dependent - function while another thread changes the locale while the function in - the other thread is still running. Proper coding would call for - locks to prevent changing the locale while locale-dependent code is - running. The check here is done in case someone does not think about - doing this. - - Only other possible issue is if someone changed the timezone and did - not call tz.tzset . That is an issue for the programmer, though, - since changing the timezone is worthless without that call. - - """ - self.lang = _getlang() - self.__calc_weekday() - self.__calc_month() - self.__calc_am_pm() - self.__calc_timezone() - self.__calc_date_time() - if _getlang() != self.lang: - raise ValueError("locale changed during initialization") - - def __pad(self, seq, front): - # Add '' to seq to either the front (is True), else the back. - seq = list(seq) - if front: - seq.insert(0, '') - else: - seq.append('') - return seq - - def __calc_weekday(self): - # Set self.a_weekday and self.f_weekday using the calendar - # module. - a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] - f_weekday = [calendar.day_name[i].lower() for i in range(7)] - self.a_weekday = a_weekday - self.f_weekday = f_weekday - - def __calc_month(self): - # Set self.f_month and self.a_month using the calendar module. - a_month = [calendar.month_abbr[i].lower() for i in range(13)] - f_month = [calendar.month_name[i].lower() for i in range(13)] - self.a_month = a_month - self.f_month = f_month - - def __calc_am_pm(self): - # Set self.am_pm by using time.strftime(). - - # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that - # magical; just happened to have used it everywhere else where a - # static date was needed. - am_pm = [] - for hour in (01, 22): - time_tuple = time.struct_time( - (1999, 3, 17, hour, 44, 55, 2, 76, 0)) - am_pm.append(time.strftime("%p", time_tuple).lower()) - self.am_pm = am_pm - - def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). - - # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of - # overloaded numbers is minimized. The order in which searches for - # values within the format string is very important; it eliminates - # possible ambiguity for what something represents. - time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, 2, 76, 0)) - date_time = [None, None, None] - date_time[0] = time.strftime("%c", time_tuple).lower() - date_time[1] = time.strftime("%x", time_tuple).lower() - date_time[2] = time.strftime("%X", time_tuple).lower() - replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], - '%B'), (self.a_weekday[2], '%a'), - (self.a_month[3], '%b'), (self.am_pm[1], '%p'), - ('1999', '%Y'), ('99', '%y'), ('22', '%H'), - ('44', '%M'), ('55', '%S'), ('76', '%j'), - ('17', '%d'), ('03', '%m'), ('3', '%m'), - # '3' needed for when no leading zero. - ('2', '%w'), ('10', '%I')] - replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) - for offset, directive in ((0, '%c'), (1, '%x'), (2, '%X')): - current_format = date_time[offset] - for old, new in replacement_pairs: - # Must deal with possible lack of locale info - # manifesting itself as the empty string (e.g., Swedish's - # lack of AM/PM info) or a platform returning a tuple of empty - # strings (e.g., MacOS 9 having timezone as ('','')). - if old: - current_format = current_format.replace(old, new) - # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since - # 2005-01-03 occurs before the first Monday of the year. Otherwise - # %U is used. - time_tuple = time.struct_time((1999, 1, 3, 1, 1, 1, 6, 3, 0)) - if '00' in time.strftime(directive, time_tuple): - U_W = '%W' - else: - U_W = '%U' - date_time[offset] = current_format.replace('11', U_W) - self.LC_date_time = date_time[0] - self.LC_date = date_time[1] - self.LC_time = date_time[2] - - def __calc_timezone(self): - # Set self.timezone by using time.tzname. - # Do not worry about possibility of time.tzname[0] == timetzname[1] - # and time.daylight; handle that in strptime . - try: - time.tzset() - except AttributeError: - pass - no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()]) - if time.daylight: - has_saving = frozenset([time.tzname[1].lower()]) - else: - has_saving = frozenset() - self.timezone = (no_saving, has_saving) - - -class TimeRE(dict): - """Handle conversion from format directives to regexes.""" - - def __init__(self, locale_time=None): - """Create keys/values. - - Order of execution is important for dependency reasons. - - """ - if locale_time: - self.locale_time = locale_time - else: - self.locale_time = LocaleTime() - base = super(TimeRE, self) - base.__init__({ - # The " \d" part of the regex is to make %c from ANSI C work - 'd': r"(?P3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", - 'f': r"(?P[0-9]{1,9})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", - 'I': r"(?P1[0-2]|0[1-9]|[1-9])", - 'j': (r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|" - r"[1-9]\d|0[1-9]|[1-9])"), - 'm': r"(?P1[0-2]|0[1-9]|[1-9])", - 'M': r"(?P[0-5]\d|\d)", - 'S': r"(?P6[0-1]|[0-5]\d|\d)", - 'U': r"(?P5[0-3]|[0-4]\d|\d)", - 'w': r"(?P[0-6])", - # W is set below by using 'U' - 'y': r"(?P\d\d)", - #XXX: Does 'Y' need to worry about having less or more than - # 4 digits? - 'Y': r"(?P\d\d\d\d)", - 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), - 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), - 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), - 'Z': self.__seqToRE([tz for tz_names in self.locale_time.timezone - for tz in tz_names], - 'Z'), - '%': '%'}) - base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) - base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - base.__setitem__('x', self.pattern(self.locale_time.LC_date)) - base.__setitem__('X', self.pattern(self.locale_time.LC_time)) - - def __seqToRE(self, to_convert, directive): - """Convert a list to a regex string for matching a directive. - - Want possible matching values to be from longest to shortest. This - prevents the possibility of a match occuring for a value that also - a substring of a larger value that should have matched (e.g., 'abc' - matching when 'abcdef' should have been the match). - - """ - to_convert = sorted(to_convert, key=len, reverse=True) - for value in to_convert: - if value != '': - break - else: - return '' - regex = '|'.join([re_escape(stuff) for stuff in to_convert]) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex - - def pattern(self, format): - """Return regex pattern for the format string. - - Need to make sure that any characters that might be interpreted as - regex syntax are escaped. - - """ - processed_format = '' - # The sub() call escapes all characters that might be misconstrued - # as regex syntax. Cannot use re.escape since we have to deal with - # format directives (%m, etc.). - regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") - format = regex_chars.sub(r"\\\1", format) - whitespace_replacement = re_compile(r'\s+') - format = whitespace_replacement.sub(r'\\s+', format) - while '%' in format: - directive_index = format.index('%') +1 - processed_format = "%s%s%s" % (processed_format, - format[:directive_index -1], - self[format[directive_index]]) - format = format[directive_index +1:] - return "%s%s" % (processed_format, format) - - def compile(self, format): - """Return a compiled re object for the format string.""" - return re_compile(self.pattern(format), IGNORECASE) - -_cache_lock = _thread_allocate_lock() -# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock -# first! -_TimeRE_cache = TimeRE() -_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache -_regex_cache = {} - -cdef _calc_julian_from_U_or_W(int year, int week_of_year, - int day_of_week, int week_starts_Mon): - """Calculate the Julian day based on the year, week of the year, and day of - the week, with week_start_day representing whether the week of the year - assumes the week starts on Sunday or Monday (6 or 0).""" - - cdef: - int first_weekday, week_0_length, days_to_week - - first_weekday = datetime_date(year, 1, 1).weekday() - # If we are dealing with the %U directive (week starts on Sunday), it's - # easier to just shift the view to Sunday being the first day of the - # week. - if not week_starts_Mon: - first_weekday = (first_weekday + 1) % 7 - day_of_week = (day_of_week + 1) % 7 - # Need to watch out for a week 0 (when the first day of the year is not - # the same as that specified by %U or %W). - week_0_length = (7 - first_weekday) % 7 - if week_of_year == 0: - return 1 + day_of_week - first_weekday - else: - days_to_week = week_0_length + (7 * (week_of_year - 1)) - return 1 + days_to_week + day_of_week - -# def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): -# return _strptime(data_string, format)[0] diff --git a/pandas/types/common.py b/pandas/types/common.py index e58e0826ea49a..a125c27d04596 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -1,493 +1,8 @@ -""" common type operations """ +import warnings -import numpy as np -from pandas.compat import (string_types, text_type, binary_type, - PY3, PY36) -from pandas import lib, algos -from .dtypes import (CategoricalDtype, CategoricalDtypeType, - DatetimeTZDtype, DatetimeTZDtypeType, - PeriodDtype, PeriodDtypeType, - ExtensionDtype) -from .generic import (ABCCategorical, ABCPeriodIndex, - ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries) -from .inference import is_string_like -from .inference import * # noqa +warnings.warn("pandas.types.common is deprecated and will be " + "removed in a future version, import " + "from pandas.api.types", + DeprecationWarning, stacklevel=3) - -_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name - for t in ['O', 'int8', 'uint8', 'int16', 'uint16', - 'int32', 'uint32', 'int64', 'uint64']]) - -_NS_DTYPE = np.dtype('M8[ns]') -_TD_DTYPE = np.dtype('m8[ns]') -_INT64_DTYPE = np.dtype(np.int64) - -_ensure_float64 = algos.ensure_float64 -_ensure_float32 = algos.ensure_float32 - - -def _ensure_float(arr): - if issubclass(arr.dtype.type, (np.integer, np.bool_)): - arr = arr.astype(float) - return arr - - -_ensure_uint64 = algos.ensure_uint64 -_ensure_int64 = algos.ensure_int64 -_ensure_int32 = algos.ensure_int32 -_ensure_int16 = algos.ensure_int16 -_ensure_int8 = algos.ensure_int8 -_ensure_platform_int = algos.ensure_platform_int -_ensure_object = algos.ensure_object - - -def _ensure_categorical(arr): - if not is_categorical(arr): - from pandas import Categorical - arr = Categorical(arr) - return arr - - -def is_object_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.object_) - - -def is_sparse(array): - """ return if we are a sparse array """ - return isinstance(array, (ABCSparseArray, ABCSparseSeries)) - - -def is_categorical(array): - """ return if we are a categorical possibility """ - return isinstance(array, ABCCategorical) or is_categorical_dtype(array) - - -def is_datetimetz(array): - """ return if we are a datetime with tz array """ - return ((isinstance(array, ABCDatetimeIndex) and - getattr(array, 'tz', None) is not None) or - is_datetime64tz_dtype(array)) - - -def is_period(array): - """ return if we are a period array """ - return isinstance(array, ABCPeriodIndex) or is_period_arraylike(array) - - -def is_datetime64_dtype(arr_or_dtype): - try: - tipo = _get_dtype_type(arr_or_dtype) - except TypeError: - return False - return issubclass(tipo, np.datetime64) - - -def is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtype.is_dtype(arr_or_dtype) - - -def is_timedelta64_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.timedelta64) - - -def is_period_dtype(arr_or_dtype): - return PeriodDtype.is_dtype(arr_or_dtype) - - -def is_categorical_dtype(arr_or_dtype): - return CategoricalDtype.is_dtype(arr_or_dtype) - - -def is_string_dtype(arr_or_dtype): - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype) - - -def is_period_arraylike(arr): - """ return if we are period arraylike / PeriodIndex """ - if isinstance(arr, ABCPeriodIndex): - return True - elif isinstance(arr, (np.ndarray, ABCSeries)): - return arr.dtype == object and lib.infer_dtype(arr) == 'period' - return getattr(arr, 'inferred_type', None) == 'period' - - -def is_datetime_arraylike(arr): - """ return if we are datetime arraylike / DatetimeIndex """ - if isinstance(arr, ABCDatetimeIndex): - return True - elif isinstance(arr, (np.ndarray, ABCSeries)): - return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' - return getattr(arr, 'inferred_type', None) == 'datetime' - - -def is_datetimelike(arr): - return (is_datetime64_dtype(arr) or is_datetime64tz_dtype(arr) or - is_timedelta64_dtype(arr) or - isinstance(arr, ABCPeriodIndex) or - is_datetimetz(arr)) - - -def is_dtype_equal(source, target): - """ return a boolean if the dtypes are equal """ - try: - source = _get_dtype(source) - target = _get_dtype(target) - return source == target - except (TypeError, AttributeError): - - # invalid comparison - # object == category will hit this - return False - - -def is_any_int_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.integer) - - -def is_integer_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.integer) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_signed_integer_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.signedinteger) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_unsigned_integer_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.unsignedinteger) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_int64_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.int64) - - -def is_int_or_datetime_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.integer) or - issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_datetime64_any_dtype(arr_or_dtype): - return (is_datetime64_dtype(arr_or_dtype) or - is_datetime64tz_dtype(arr_or_dtype)) - - -def is_datetime64_ns_dtype(arr_or_dtype): - try: - tipo = _get_dtype(arr_or_dtype) - except TypeError: - if is_datetime64tz_dtype(arr_or_dtype): - tipo = _get_dtype(arr_or_dtype.dtype) - else: - return False - return tipo == _NS_DTYPE or getattr(tipo, 'base', None) == _NS_DTYPE - - -def is_timedelta64_ns_dtype(arr_or_dtype): - tipo = _get_dtype(arr_or_dtype) - return tipo == _TD_DTYPE - - -def is_datetime_or_timedelta_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, (np.datetime64, np.timedelta64)) - - -def _is_unorderable_exception(e): - """ - return a boolean if we an unorderable exception error message - - These are different error message for PY>=3<=3.5 and PY>=3.6 - """ - if PY36: - return "'>' not supported between instances of" in str(e) - - elif PY3: - return 'unorderable' in str(e) - return False - - -def is_numeric_v_string_like(a, b): - """ - numpy doesn't like to compare numeric arrays vs scalar string-likes - - return a boolean result if this is the case for a,b or b,a - - """ - is_a_array = isinstance(a, np.ndarray) - is_b_array = isinstance(b, np.ndarray) - - is_a_numeric_array = is_a_array and is_numeric_dtype(a) - is_b_numeric_array = is_b_array and is_numeric_dtype(b) - is_a_string_array = is_a_array and is_string_like_dtype(a) - is_b_string_array = is_b_array and is_string_like_dtype(b) - - is_a_scalar_string_like = not is_a_array and is_string_like(a) - is_b_scalar_string_like = not is_b_array and is_string_like(b) - - return ((is_a_numeric_array and is_b_scalar_string_like) or - (is_b_numeric_array and is_a_scalar_string_like) or - (is_a_numeric_array and is_b_string_array) or - (is_b_numeric_array and is_a_string_array)) - - -def is_datetimelike_v_numeric(a, b): - # return if we have an i8 convertible and numeric comparison - if not hasattr(a, 'dtype'): - a = np.asarray(a) - if not hasattr(b, 'dtype'): - b = np.asarray(b) - - def is_numeric(x): - return is_integer_dtype(x) or is_float_dtype(x) - - is_datetimelike = needs_i8_conversion - return ((is_datetimelike(a) and is_numeric(b)) or - (is_datetimelike(b) and is_numeric(a))) - - -def is_datetimelike_v_object(a, b): - # return if we have an i8 convertible and object comparsion - if not hasattr(a, 'dtype'): - a = np.asarray(a) - if not hasattr(b, 'dtype'): - b = np.asarray(b) - - def f(x): - return is_object_dtype(x) - - def is_object(x): - return is_integer_dtype(x) or is_float_dtype(x) - - is_datetimelike = needs_i8_conversion - return ((is_datetimelike(a) and is_object(b)) or - (is_datetimelike(b) and is_object(a))) - - -def needs_i8_conversion(arr_or_dtype): - return (is_datetime_or_timedelta_dtype(arr_or_dtype) or - is_datetime64tz_dtype(arr_or_dtype) or - is_period_dtype(arr_or_dtype)) - - -def is_numeric_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, (np.number, np.bool_)) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_string_like_dtype(arr_or_dtype): - # exclude object as its a mixed dtype - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('S', 'U') - - -def is_float_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.floating) - - -def is_floating_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return isinstance(tipo, np.floating) - - -def is_bool_dtype(arr_or_dtype): - try: - tipo = _get_dtype_type(arr_or_dtype) - except ValueError: - # this isn't even a dtype - return False - return issubclass(tipo, np.bool_) - - -def is_extension_type(value): - """ - if we are a klass that is preserved by the internals - these are internal klasses that we represent (and don't use a np.array) - """ - if is_categorical(value): - return True - elif is_sparse(value): - return True - elif is_datetimetz(value): - return True - return False - - -def is_complex_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.complexfloating) - - -def _coerce_to_dtype(dtype): - """ coerce a string / np.dtype to a dtype """ - if is_categorical_dtype(dtype): - dtype = CategoricalDtype() - elif is_datetime64tz_dtype(dtype): - dtype = DatetimeTZDtype(dtype) - elif is_period_dtype(dtype): - dtype = PeriodDtype(dtype) - else: - dtype = np.dtype(dtype) - return dtype - - -def _get_dtype(arr_or_dtype): - if isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, type): - return np.dtype(arr_or_dtype) - elif isinstance(arr_or_dtype, CategoricalDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, DatetimeTZDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, PeriodDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, string_types): - if is_categorical_dtype(arr_or_dtype): - return CategoricalDtype.construct_from_string(arr_or_dtype) - elif is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtype.construct_from_string(arr_or_dtype) - elif is_period_dtype(arr_or_dtype): - return PeriodDtype.construct_from_string(arr_or_dtype) - - if hasattr(arr_or_dtype, 'dtype'): - arr_or_dtype = arr_or_dtype.dtype - return np.dtype(arr_or_dtype) - - -def _get_dtype_type(arr_or_dtype): - if isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype.type - elif isinstance(arr_or_dtype, type): - return np.dtype(arr_or_dtype).type - elif isinstance(arr_or_dtype, CategoricalDtype): - return CategoricalDtypeType - elif isinstance(arr_or_dtype, DatetimeTZDtype): - return DatetimeTZDtypeType - elif isinstance(arr_or_dtype, PeriodDtype): - return PeriodDtypeType - elif isinstance(arr_or_dtype, string_types): - if is_categorical_dtype(arr_or_dtype): - return CategoricalDtypeType - elif is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtypeType - elif is_period_dtype(arr_or_dtype): - return PeriodDtypeType - return _get_dtype_type(np.dtype(arr_or_dtype)) - try: - return arr_or_dtype.dtype.type - except AttributeError: - return type(None) - - -def _get_dtype_from_object(dtype): - """Get a numpy dtype.type-style object. This handles the datetime64[ns] - and datetime64[ns, TZ] compat - - Notes - ----- - If nothing can be found, returns ``object``. - """ - - # type object from a dtype - if isinstance(dtype, type) and issubclass(dtype, np.generic): - return dtype - elif is_categorical(dtype): - return CategoricalDtype().type - elif is_datetimetz(dtype): - return DatetimeTZDtype(dtype).type - elif isinstance(dtype, np.dtype): # dtype object - try: - _validate_date_like_dtype(dtype) - except TypeError: - # should still pass if we don't have a datelike - pass - return dtype.type - elif isinstance(dtype, string_types): - if dtype in ['datetimetz', 'datetime64tz']: - return DatetimeTZDtype.type - elif dtype in ['period']: - raise NotImplementedError - - if dtype == 'datetime' or dtype == 'timedelta': - dtype += '64' - - try: - return _get_dtype_from_object(getattr(np, dtype)) - except (AttributeError, TypeError): - # handles cases like _get_dtype(int) - # i.e., python objects that are valid dtypes (unlike user-defined - # types, in general) - # TypeError handles the float16 typecode of 'e' - # further handle internal types - pass - - return _get_dtype_from_object(np.dtype(dtype)) - - -def _validate_date_like_dtype(dtype): - try: - typ = np.datetime_data(dtype)[0] - except ValueError as e: - raise TypeError('%s' % e) - if typ != 'generic' and typ != 'ns': - raise ValueError('%r is too specific of a frequency, try passing %r' % - (dtype.name, dtype.type.__name__)) - - -_string_dtypes = frozenset(map(_get_dtype_from_object, (binary_type, - text_type))) - - -def pandas_dtype(dtype): - """ - Converts input into a pandas only dtype object or a numpy dtype object. - - Parameters - ---------- - dtype : object to be converted - - Returns - ------- - np.dtype or a pandas dtype - """ - if isinstance(dtype, DatetimeTZDtype): - return dtype - elif isinstance(dtype, PeriodDtype): - return dtype - elif isinstance(dtype, CategoricalDtype): - return dtype - elif isinstance(dtype, string_types): - try: - return DatetimeTZDtype.construct_from_string(dtype) - except TypeError: - pass - - if dtype.startswith('period[') or dtype.startswith('Period['): - # do not parse string like U as period[U] - try: - return PeriodDtype.construct_from_string(dtype) - except TypeError: - pass - - try: - return CategoricalDtype.construct_from_string(dtype) - except TypeError: - pass - elif isinstance(dtype, ExtensionDtype): - return dtype - - return np.dtype(dtype) +from pandas.core.dtypes.common import * # noqa diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 827eb160c452d..477156b38d56d 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -1,480 +1,11 @@ -""" -Utility functions related to concat -""" +import warnings -import numpy as np -import pandas.tslib as tslib -from pandas import compat -from pandas.core.algorithms import take_1d -from .common import (is_categorical_dtype, - is_sparse, - is_datetimetz, - is_datetime64_dtype, - is_timedelta64_dtype, - is_period_dtype, - is_object_dtype, - is_bool_dtype, - is_dtype_equal, - _NS_DTYPE, - _TD_DTYPE) -from pandas.types.generic import (ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex) - -def get_dtype_kinds(l): - """ - Parameters - ---------- - l : list of arrays - - Returns - ------- - a set of kinds that exist in this list of arrays - """ - - typs = set() - for arr in l: - - dtype = arr.dtype - if is_categorical_dtype(dtype): - typ = 'category' - elif is_sparse(arr): - typ = 'sparse' - elif is_datetimetz(arr): - # if to_concat contains different tz, - # the result must be object dtype - typ = str(arr.dtype) - elif is_datetime64_dtype(dtype): - typ = 'datetime' - elif is_timedelta64_dtype(dtype): - typ = 'timedelta' - elif is_object_dtype(dtype): - typ = 'object' - elif is_bool_dtype(dtype): - typ = 'bool' - elif is_period_dtype(dtype): - typ = str(arr.dtype) - else: - typ = dtype.kind - typs.add(typ) - return typs - - -def _get_series_result_type(result): - """ - return appropriate class of Series concat - input is either dict or array-like - """ - if isinstance(result, dict): - # concat Series with axis 1 - if all(is_sparse(c) for c in compat.itervalues(result)): - from pandas.sparse.api import SparseDataFrame - return SparseDataFrame - else: - from pandas.core.frame import DataFrame - return DataFrame - - elif is_sparse(result): - # concat Series with axis 1 - from pandas.sparse.api import SparseSeries - return SparseSeries - else: - from pandas.core.series import Series - return Series - - -def _get_frame_result_type(result, objs): - """ - return appropriate class of DataFrame-like concat - if any block is SparseBlock, return SparseDataFrame - otherwise, return 1st obj - """ - if any(b.is_sparse for b in result.blocks): - from pandas.sparse.api import SparseDataFrame - return SparseDataFrame - else: - return objs[0] - - -def _concat_compat(to_concat, axis=0): - """ - provide concatenation of an array of arrays each of which is a single - 'normalized' dtypes (in that for example, if it's object, then it is a - non-datetimelike and provide a combined dtype for the resulting array that - preserves the overall dtype if possible) - - Parameters - ---------- - to_concat : array of arrays - axis : axis to provide concatenation - - Returns - ------- - a single array, preserving the combined dtypes - """ - - # filter empty arrays - # 1-d dtypes always are included here - def is_nonempty(x): - try: - return x.shape[axis] > 0 - except Exception: - return True - - nonempty = [x for x in to_concat if is_nonempty(x)] - - # If all arrays are empty, there's nothing to convert, just short-cut to - # the concatenation, #3121. - # - # Creating an empty array directly is tempting, but the winnings would be - # marginal given that it would still require shape & dtype calculation and - # np.concatenate which has them both implemented is compiled. - - typs = get_dtype_kinds(to_concat) - - _contains_datetime = any(typ.startswith('datetime') for typ in typs) - _contains_period = any(typ.startswith('period') for typ in typs) - - if 'category' in typs: - # this must be priort to _concat_datetime, - # to support Categorical + datetime-like - return _concat_categorical(to_concat, axis=axis) - - elif _contains_datetime or 'timedelta' in typs or _contains_period: - return _concat_datetime(to_concat, axis=axis, typs=typs) - - # these are mandated to handle empties as well - elif 'sparse' in typs: - return _concat_sparse(to_concat, axis=axis, typs=typs) - - if not nonempty: - # we have all empties, but may need to coerce the result dtype to - # object if we have non-numeric type operands (numpy would otherwise - # cast this to float) - typs = get_dtype_kinds(to_concat) - if len(typs) != 1: - - if (not len(typs - set(['i', 'u', 'f'])) or - not len(typs - set(['bool', 'i', 'u']))): - # let numpy coerce - pass - else: - # coerce to object - to_concat = [x.astype('object') for x in to_concat] - - return np.concatenate(to_concat, axis=axis) - - -def _concat_categorical(to_concat, axis=0): - """Concatenate an object/categorical array of arrays, each of which is a - single dtype - - Parameters - ---------- - to_concat : array of arrays - axis : int - Axis to provide concatenation in the current implementation this is - always 0, e.g. we only have 1D categoricals - - Returns - ------- - Categorical - A single array, preserving the combined dtypes - """ - - def _concat_asobject(to_concat): - to_concat = [x.get_values() if is_categorical_dtype(x.dtype) - else x.ravel() for x in to_concat] - res = _concat_compat(to_concat) - if axis == 1: - return res.reshape(1, len(res)) - else: - return res - - # we could have object blocks and categoricals here - # if we only have a single categoricals then combine everything - # else its a non-compat categorical - categoricals = [x for x in to_concat if is_categorical_dtype(x.dtype)] - - # validate the categories - if len(categoricals) != len(to_concat): - pass - else: - # when all categories are identical - first = to_concat[0] - if all(first.is_dtype_equal(other) for other in to_concat[1:]): - return union_categoricals(categoricals) - - return _concat_asobject(to_concat) - - -def union_categoricals(to_union, sort_categories=False): - """ - Combine list-like of Categorical-like, unioning categories. All - categories must have the same dtype. - - .. versionadded:: 0.19.0 - - Parameters - ---------- - to_union : list-like of Categorical, CategoricalIndex, - or Series with dtype='category' - sort_categories : boolean, default False - If true, resulting categories will be lexsorted, otherwise - they will be ordered as they appear in the data. - - Returns - ------- - result : Categorical - - Raises - ------ - TypeError - - all inputs do not have the same dtype - - all inputs do not have the same ordered property - - all inputs are ordered and their categories are not identical - - sort_categories=True and Categoricals are ordered - ValueError - Emmpty list of categoricals passed - """ - from pandas import Index, Categorical, CategoricalIndex, Series - - if len(to_union) == 0: - raise ValueError('No Categoricals to union') - - def _maybe_unwrap(x): - if isinstance(x, (CategoricalIndex, Series)): - return x.values - elif isinstance(x, Categorical): - return x - else: - raise TypeError("all components to combine must be Categorical") - - to_union = [_maybe_unwrap(x) for x in to_union] - first = to_union[0] - - if not all(is_dtype_equal(other.categories.dtype, first.categories.dtype) - for other in to_union[1:]): - raise TypeError("dtype of categories must be the same") - - ordered = False - if all(first.is_dtype_equal(other) for other in to_union[1:]): - # identical categories - fastpath - categories = first.categories - ordered = first.ordered - new_codes = np.concatenate([c.codes for c in to_union]) - - if sort_categories and ordered: - raise TypeError("Cannot use sort_categories=True with " - "ordered Categoricals") - - if sort_categories and not categories.is_monotonic_increasing: - categories = categories.sort_values() - indexer = categories.get_indexer(first.categories) - new_codes = take_1d(indexer, new_codes, fill_value=-1) - elif all(not c.ordered for c in to_union): - # different categories - union and recode - cats = first.categories.append([c.categories for c in to_union[1:]]) - categories = Index(cats.unique()) - if sort_categories: - categories = categories.sort_values() - - new_codes = [] - for c in to_union: - if len(c.categories) > 0: - indexer = categories.get_indexer(c.categories) - new_codes.append(take_1d(indexer, c.codes, fill_value=-1)) - else: - # must be all NaN - new_codes.append(c.codes) - new_codes = np.concatenate(new_codes) - else: - # ordered - to show a proper error message - if all(c.ordered for c in to_union): - msg = ("to union ordered Categoricals, " - "all categories must be the same") - raise TypeError(msg) - else: - raise TypeError('Categorical.ordered must be the same') - - return Categorical(new_codes, categories=categories, ordered=ordered, - fastpath=True) - - -def _concat_datetime(to_concat, axis=0, typs=None): - """ - provide concatenation of an datetimelike array of arrays each of which is a - single M8[ns], datetimet64[ns, tz] or m8[ns] dtype - - Parameters - ---------- - to_concat : array of arrays - axis : axis to provide concatenation - typs : set of to_concat dtypes - - Returns - ------- - a single array, preserving the combined dtypes - """ - - def convert_to_pydatetime(x, axis): - # coerce to an object dtype - - # if dtype is of datetimetz or timezone - if x.dtype.kind == _NS_DTYPE.kind: - if getattr(x, 'tz', None) is not None: - x = x.asobject.values - else: - shape = x.shape - x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), - box=True) - x = x.reshape(shape) - - elif x.dtype == _TD_DTYPE: - shape = x.shape - x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True) - x = x.reshape(shape) - - if axis == 1: - x = np.atleast_2d(x) - return x - - if typs is None: - typs = get_dtype_kinds(to_concat) - - # must be single dtype - if len(typs) == 1: - _contains_datetime = any(typ.startswith('datetime') for typ in typs) - _contains_period = any(typ.startswith('period') for typ in typs) - - if _contains_datetime: - - if 'datetime' in typs: - new_values = np.concatenate([x.view(np.int64) for x in - to_concat], axis=axis) - return new_values.view(_NS_DTYPE) - else: - # when to_concat has different tz, len(typs) > 1. - # thus no need to care - return _concat_datetimetz(to_concat) - - elif 'timedelta' in typs: - new_values = np.concatenate([x.view(np.int64) for x in to_concat], - axis=axis) - return new_values.view(_TD_DTYPE) - - elif _contains_period: - # PeriodIndex must be handled by PeriodIndex, - # Thus can't meet this condition ATM - # Must be changed when we adding PeriodDtype - raise NotImplementedError - - # need to coerce to object - to_concat = [convert_to_pydatetime(x, axis) for x in to_concat] - return np.concatenate(to_concat, axis=axis) - - -def _concat_datetimetz(to_concat, name=None): - """ - concat DatetimeIndex with the same tz - all inputs must be DatetimeIndex - it is used in DatetimeIndex.append also - """ - # do not pass tz to set because tzlocal cannot be hashed - if len(set([str(x.dtype) for x in to_concat])) != 1: - raise ValueError('to_concat must have the same tz') - tz = to_concat[0].tz - # no need to localize because internal repr will not be changed - new_values = np.concatenate([x.asi8 for x in to_concat]) - return to_concat[0]._simple_new(new_values, tz=tz, name=name) - - -def _concat_index_asobject(to_concat, name=None): - """ - concat all inputs as object. DatetimeIndex, TimedeltaIndex and - PeriodIndex are converted to object dtype before concatenation - """ - - klasses = ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex - to_concat = [x.asobject if isinstance(x, klasses) else x - for x in to_concat] - - from pandas import Index - self = to_concat[0] - attribs = self._get_attributes_dict() - attribs['name'] = name - - to_concat = [x._values if isinstance(x, Index) else x - for x in to_concat] - return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) - - -def _concat_sparse(to_concat, axis=0, typs=None): - """ - provide concatenation of an sparse/dense array of arrays each of which is a - single dtype - - Parameters - ---------- - to_concat : array of arrays - axis : axis to provide concatenation - typs : set of to_concat dtypes - - Returns - ------- - a single array, preserving the combined dtypes - """ - - from pandas.sparse.array import SparseArray, _make_index - - def convert_sparse(x, axis): - # coerce to native type - if isinstance(x, SparseArray): - x = x.get_values() - x = x.ravel() - if axis > 0: - x = np.atleast_2d(x) - return x - - if typs is None: - typs = get_dtype_kinds(to_concat) - - if len(typs) == 1: - # concat input as it is if all inputs are sparse - # and have the same fill_value - fill_values = set(c.fill_value for c in to_concat) - if len(fill_values) == 1: - sp_values = [c.sp_values for c in to_concat] - indexes = [c.sp_index.to_int_index() for c in to_concat] - - indices = [] - loc = 0 - for idx in indexes: - indices.append(idx.indices + loc) - loc += idx.length - sp_values = np.concatenate(sp_values) - indices = np.concatenate(indices) - sp_index = _make_index(loc, indices, kind=to_concat[0].sp_index) - - return SparseArray(sp_values, sparse_index=sp_index, - fill_value=to_concat[0].fill_value) - - # input may be sparse / dense mixed and may have different fill_value - # input must contain sparse at least 1 - sparses = [c for c in to_concat if is_sparse(c)] - fill_values = [c.fill_value for c in sparses] - sp_indexes = [c.sp_index for c in sparses] - - # densify and regular concat - to_concat = [convert_sparse(x, axis) for x in to_concat] - result = np.concatenate(to_concat, axis=axis) - - if not len(typs - set(['sparse', 'f', 'i'])): - # sparsify if inputs are sparse and dense numerics - # first sparse input's fill_value and SparseIndex is used - result = SparseArray(result.ravel(), fill_value=fill_values[0], - kind=sp_indexes[0]) - else: - # coerce to object if needed - result = result.astype('object') - return result +def union_categoricals(to_union, sort_categories=False, ignore_order=False): + warnings.warn("pandas.types.concat.union_categoricals is " + "deprecated and will be removed in a future version.\n" + "use pandas.api.types.union_categoricals", + FutureWarning, stacklevel=2) + from pandas.api.types import union_categoricals + return union_categoricals( + to_union, sort_categories=sort_categories, ignore_order=ignore_order) diff --git a/pandas/types/dtypes.py b/pandas/types/dtypes.py deleted file mode 100644 index 5b6d7905d4095..0000000000000 --- a/pandas/types/dtypes.py +++ /dev/null @@ -1,367 +0,0 @@ -""" define extension dtypes """ - -import re -import numpy as np -from pandas import compat - - -class ExtensionDtype(object): - """ - A np.dtype duck-typed class, suitable for holding a custom dtype. - - THIS IS NOT A REAL NUMPY DTYPE - """ - name = None - names = None - type = None - subdtype = None - kind = None - str = None - num = 100 - shape = tuple() - itemsize = 8 - base = None - isbuiltin = 0 - isnative = 0 - _metadata = [] - - def __unicode__(self): - return self.name - - def __str__(self): - """ - Return a string representation for a particular Object - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular object. - - Invoked by bytes(obj) in py3 only. - Yields a bytestring in both py2/py3. - """ - from pandas.core.config import get_option - - encoding = get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - - def __repr__(self): - """ - Return a string representation for a particular object. - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - - def __hash__(self): - raise NotImplementedError("sub-classes should implement an __hash__ " - "method") - - def __eq__(self, other): - raise NotImplementedError("sub-classes should implement an __eq__ " - "method") - - def __ne__(self, other): - return not self.__eq__(other) - - @classmethod - def is_dtype(cls, dtype): - """ Return a boolean if we if the passed type is an actual dtype that - we can match (via string or type) - """ - if hasattr(dtype, 'dtype'): - dtype = dtype.dtype - if isinstance(dtype, cls): - return True - elif isinstance(dtype, np.dtype): - return False - try: - return cls.construct_from_string(dtype) is not None - except: - return False - - -class CategoricalDtypeType(type): - """ - the type of CategoricalDtype, this metaclass determines subclass ability - """ - pass - - -class CategoricalDtype(ExtensionDtype): - - """ - A np.dtype duck-typed class, suitable for holding a custom categorical - dtype. - - THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.object - """ - name = 'category' - type = CategoricalDtypeType - kind = 'O' - str = '|O08' - base = np.dtype('O') - _cache = {} - - def __new__(cls): - - try: - return cls._cache[cls.name] - except KeyError: - c = object.__new__(cls) - cls._cache[cls.name] = c - return c - - def __hash__(self): - # make myself hashable - return hash(str(self)) - - def __eq__(self, other): - if isinstance(other, compat.string_types): - return other == self.name - - return isinstance(other, CategoricalDtype) - - @classmethod - def construct_from_string(cls, string): - """ attempt to construct this type from a string, raise a TypeError if - it's not possible """ - try: - if string == 'category': - return cls() - except: - pass - - raise TypeError("cannot construct a CategoricalDtype") - - -class DatetimeTZDtypeType(type): - """ - the type of DatetimeTZDtype, this metaclass determines subclass ability - """ - pass - - -class DatetimeTZDtype(ExtensionDtype): - - """ - A np.dtype duck-typed class, suitable for holding a custom datetime with tz - dtype. - - THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of - np.datetime64[ns] - """ - type = DatetimeTZDtypeType - kind = 'M' - str = '|M8[ns]' - num = 101 - base = np.dtype('M8[ns]') - _metadata = ['unit', 'tz'] - _match = re.compile("(datetime64|M8)\[(?P.+), (?P.+)\]") - _cache = {} - - def __new__(cls, unit=None, tz=None): - """ Create a new unit if needed, otherwise return from the cache - - Parameters - ---------- - unit : string unit that this represents, currently must be 'ns' - tz : string tz that this represents - """ - - if isinstance(unit, DatetimeTZDtype): - unit, tz = unit.unit, unit.tz - - elif unit is None: - # we are called as an empty constructor - # generally for pickle compat - return object.__new__(cls) - - elif tz is None: - - # we were passed a string that we can construct - try: - m = cls._match.search(unit) - if m is not None: - unit = m.groupdict()['unit'] - tz = m.groupdict()['tz'] - except: - raise ValueError("could not construct DatetimeTZDtype") - - elif isinstance(unit, compat.string_types): - - if unit != 'ns': - raise ValueError("DatetimeTZDtype only supports ns units") - - unit = unit - tz = tz - - if tz is None: - raise ValueError("DatetimeTZDtype constructor must have a tz " - "supplied") - - # set/retrieve from cache - key = (unit, str(tz)) - try: - return cls._cache[key] - except KeyError: - u = object.__new__(cls) - u.unit = unit - u.tz = tz - cls._cache[key] = u - return u - - @classmethod - def construct_from_string(cls, string): - """ attempt to construct this type from a string, raise a TypeError if - it's not possible - """ - try: - return cls(unit=string) - except ValueError: - raise TypeError("could not construct DatetimeTZDtype") - - def __unicode__(self): - # format the tz - return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) - - @property - def name(self): - return str(self) - - def __hash__(self): - # make myself hashable - return hash(str(self)) - - def __eq__(self, other): - if isinstance(other, compat.string_types): - return other == self.name - - return (isinstance(other, DatetimeTZDtype) and - self.unit == other.unit and - str(self.tz) == str(other.tz)) - - -class PeriodDtypeType(type): - """ - the type of PeriodDtype, this metaclass determines subclass ability - """ - pass - - -class PeriodDtype(ExtensionDtype): - __metaclass__ = PeriodDtypeType - """ - A Period duck-typed class, suitable for holding a period with freq dtype. - - THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64. - """ - type = PeriodDtypeType - kind = 'O' - str = '|O08' - base = np.dtype('O') - num = 102 - _metadata = ['freq'] - _match = re.compile("(P|p)eriod\[(?P.+)\]") - _cache = {} - - def __new__(cls, freq=None): - """ - Parameters - ---------- - freq : frequency - """ - - if isinstance(freq, PeriodDtype): - return freq - - elif freq is None: - # empty constructor for pickle compat - return object.__new__(cls) - - from pandas.tseries.offsets import DateOffset - if not isinstance(freq, DateOffset): - freq = cls._parse_dtype_strict(freq) - - try: - return cls._cache[freq.freqstr] - except KeyError: - u = object.__new__(cls) - u.freq = freq - cls._cache[freq.freqstr] = u - return u - - @classmethod - def _parse_dtype_strict(cls, freq): - if isinstance(freq, compat.string_types): - if freq.startswith('period[') or freq.startswith('Period['): - m = cls._match.search(freq) - if m is not None: - freq = m.group('freq') - from pandas.tseries.frequencies import to_offset - freq = to_offset(freq) - if freq is not None: - return freq - - raise ValueError("could not construct PeriodDtype") - - @classmethod - def construct_from_string(cls, string): - """ - attempt to construct this type from a string, raise a TypeError - if its not possible - """ - from pandas.tseries.offsets import DateOffset - if isinstance(string, (compat.string_types, DateOffset)): - # avoid tuple to be regarded as freq - try: - return cls(freq=string) - except ValueError: - pass - raise TypeError("could not construct PeriodDtype") - - def __unicode__(self): - return "period[{freq}]".format(freq=self.freq.freqstr) - - @property - def name(self): - return str(self) - - def __hash__(self): - # make myself hashable - return hash(str(self)) - - def __eq__(self, other): - if isinstance(other, compat.string_types): - return other == self.name or other == self.name.title() - - return isinstance(other, PeriodDtype) and self.freq == other.freq - - @classmethod - def is_dtype(cls, dtype): - """ - Return a boolean if we if the passed type is an actual dtype that we - can match (via string or type) - """ - - if isinstance(dtype, compat.string_types): - # PeriodDtype can be instanciated from freq string like "U", - # but dosn't regard freq str like "U" as dtype. - if dtype.startswith('period[') or dtype.startswith('Period['): - try: - if cls._parse_dtype_strict(dtype) is not None: - return True - else: - return False - except ValueError: - return False - else: - return False - return super(PeriodDtype, cls).is_dtype(dtype) diff --git a/pandas/types/inference.py b/pandas/types/inference.py deleted file mode 100644 index d2a2924b27659..0000000000000 --- a/pandas/types/inference.py +++ /dev/null @@ -1,106 +0,0 @@ -""" basic inference routines """ - -import collections -import re -import numpy as np -from numbers import Number -from pandas.compat import (string_types, text_type, - string_and_binary_types) -from pandas import lib - -is_bool = lib.is_bool - -is_integer = lib.is_integer - -is_float = lib.is_float - -is_complex = lib.is_complex - -is_scalar = lib.isscalar - -is_decimal = lib.is_decimal - - -def is_number(obj): - return isinstance(obj, (Number, np.number)) - - -def is_string_like(obj): - return isinstance(obj, (text_type, string_types)) - - -def _iterable_not_string(x): - return (isinstance(x, collections.Iterable) and - not isinstance(x, string_types)) - - -def is_iterator(obj): - # python 3 generators have __next__ instead of next - return hasattr(obj, 'next') or hasattr(obj, '__next__') - - -def is_re(obj): - return isinstance(obj, re._pattern_type) - - -def is_re_compilable(obj): - try: - re.compile(obj) - except TypeError: - return False - else: - return True - - -def is_list_like(arg): - return (hasattr(arg, '__iter__') and - not isinstance(arg, string_and_binary_types)) - - -def is_dict_like(arg): - return hasattr(arg, '__getitem__') and hasattr(arg, 'keys') - - -def is_named_tuple(arg): - return isinstance(arg, tuple) and hasattr(arg, '_fields') - - -def is_hashable(arg): - """Return True if hash(arg) will succeed, False otherwise. - - Some types will pass a test against collections.Hashable but fail when they - are actually hashed with hash(). - - Distinguish between these and other types by trying the call to hash() and - seeing if they raise TypeError. - - Examples - -------- - >>> a = ([],) - >>> isinstance(a, collections.Hashable) - True - >>> is_hashable(a) - False - """ - # unfortunately, we can't use isinstance(arg, collections.Hashable), which - # can be faster than calling hash, because numpy scalars on Python 3 fail - # this test - - # reconsider this decision once this numpy bug is fixed: - # https://github.com/numpy/numpy/issues/5562 - - try: - hash(arg) - except TypeError: - return False - else: - return True - - -def is_sequence(x): - try: - iter(x) - len(x) # it has a length - return not isinstance(x, string_and_binary_types) - except (TypeError, AttributeError): - return False diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index e69de29bb2d1d..202e58c916e47 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -0,0 +1,2 @@ +from pandas.util._decorators import Appender, Substitution, cache_readonly # noqa +from pandas.core.util.hashing import hash_pandas_object, hash_array # noqa diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py new file mode 100644 index 0000000000000..1753bc8b8fc33 --- /dev/null +++ b/pandas/util/_decorators.py @@ -0,0 +1,380 @@ +from pandas.compat import callable, signature, PY2 +from pandas._libs.properties import cache_readonly # noqa +import inspect +import types +import warnings +from textwrap import dedent, wrap +from functools import wraps, update_wrapper + + +def deprecate(name, alternative, version, alt_name=None, + klass=None, stacklevel=2, msg=None): + """Return a new function that emits a deprecation warning on use. + + To use this method for a deprecated function, another function + `alternative` with the same signature must exist. The deprecated + function will emit a deprecation warning, and in the docstring + it will contain the deprecation directive with the provided version + so it can be detected for future removal. + + Parameters + ---------- + name : str + Name of function to deprecate + alternative : str + Name of function to use instead + version : str + Version of pandas in which the method has been deprecated + alt_name : str, optional + Name to use in preference of alternative.__name__ + klass : Warning, default FutureWarning + stacklevel : int, default 2 + msg : str + The message to display in the warning. + Default is '{name} is deprecated. Use {alt_name} instead.' + """ + + alt_name = alt_name or alternative.__name__ + klass = klass or FutureWarning + warning_msg = msg or '{} is deprecated, use {} instead'.format(name, + alt_name) + + @wraps(alternative) + def wrapper(*args, **kwargs): + warnings.warn(warning_msg, klass, stacklevel=stacklevel) + return alternative(*args, **kwargs) + + # adding deprecated directive to the docstring + msg = msg or 'Use `{alt_name}` instead.' + docstring = '.. deprecated:: {}\n'.format(version) + docstring += dedent(' ' + ('\n'.join(wrap(msg, 70)))) + + if getattr(wrapper, '__doc__') is not None: + docstring += dedent(wrapper.__doc__) + + wrapper.__doc__ = docstring + + return wrapper + + +def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): + """ + Decorator to deprecate a keyword argument of a function. + + Parameters + ---------- + old_arg_name : str + Name of argument in function to deprecate + new_arg_name : str or None + Name of preferred argument in function. Use None to raise warning that + ``old_arg_name`` keyword is deprecated. + mapping : dict or callable + If mapping is present, use it to translate old arguments to + new arguments. A callable must do its own value checking; + values not found in a dict will be forwarded unchanged. + + Examples + -------- + The following deprecates 'cols', using 'columns' instead + + >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') + ... def f(columns=''): + ... print(columns) + ... + >>> f(columns='should work ok') + should work ok + + >>> f(cols='should raise warning') + FutureWarning: cols is deprecated, use columns instead + warnings.warn(msg, FutureWarning) + should raise warning + + >>> f(cols='should error', columns="can\'t pass do both") + TypeError: Can only specify 'cols' or 'columns', not both + + >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) + ... def f(new=False): + ... print('yes!' if new else 'no!') + ... + >>> f(old='yes') + FutureWarning: old='yes' is deprecated, use new=True instead + warnings.warn(msg, FutureWarning) + yes! + + + To raise a warning that a keyword will be removed entirely in the future + + >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None) + ... def f(cols='', another_param=''): + ... print(cols) + ... + >>> f(cols='should raise warning') + FutureWarning: the 'cols' keyword is deprecated and will be removed in a + future version please takes steps to stop use of 'cols' + should raise warning + >>> f(another_param='should not raise warning') + should not raise warning + + >>> f(cols='should raise warning', another_param='') + FutureWarning: the 'cols' keyword is deprecated and will be removed in a + future version please takes steps to stop use of 'cols' + should raise warning + """ + + if mapping is not None and not hasattr(mapping, 'get') and \ + not callable(mapping): + raise TypeError("mapping from old to new argument values " + "must be dict or callable!") + + def _deprecate_kwarg(func): + @wraps(func) + def wrapper(*args, **kwargs): + old_arg_value = kwargs.pop(old_arg_name, None) + + if new_arg_name is None and old_arg_value is not None: + msg = ( + "the '{old_name}' keyword is deprecated and will be " + "removed in a future version " + "please takes steps to stop use of '{old_name}'" + ).format(old_name=old_arg_name) + warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + kwargs[old_arg_name] = old_arg_value + return func(*args, **kwargs) + + if old_arg_value is not None: + if mapping is not None: + if hasattr(mapping, 'get'): + new_arg_value = mapping.get(old_arg_value, + old_arg_value) + else: + new_arg_value = mapping(old_arg_value) + msg = ("the {old_name}={old_val!r} keyword is deprecated, " + "use {new_name}={new_val!r} instead" + ).format(old_name=old_arg_name, + old_val=old_arg_value, + new_name=new_arg_name, + new_val=new_arg_value) + else: + new_arg_value = old_arg_value + msg = ("the '{old_name}' keyword is deprecated, " + "use '{new_name}' instead" + ).format(old_name=old_arg_name, + new_name=new_arg_name) + + warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + if kwargs.get(new_arg_name, None) is not None: + msg = ("Can only specify '{old_name}' or '{new_name}', " + "not both").format(old_name=old_arg_name, + new_name=new_arg_name) + raise TypeError(msg) + else: + kwargs[new_arg_name] = new_arg_value + return func(*args, **kwargs) + return wrapper + return _deprecate_kwarg + + +def rewrite_axis_style_signature(name, extra_params): + def decorate(func): + @wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + + if not PY2: + kind = inspect.Parameter.POSITIONAL_OR_KEYWORD + params = [ + inspect.Parameter('self', kind), + inspect.Parameter(name, kind, default=None), + inspect.Parameter('index', kind, default=None), + inspect.Parameter('columns', kind, default=None), + inspect.Parameter('axis', kind, default=None), + ] + + for pname, default in extra_params: + params.append(inspect.Parameter(pname, kind, default=default)) + + sig = inspect.Signature(params) + + func.__signature__ = sig + return wrapper + return decorate + +# Substitution and Appender are derived from matplotlib.docstring (1.1.0) +# module http://matplotlib.org/users/license.html + + +class Substitution(object): + """ + A decorator to take a function's docstring and perform string + substitution on it. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter) + + Usage: construct a docstring.Substitution with a sequence or + dictionary suitable for performing substitution; then + decorate a suitable function with the constructed object. e.g. + + sub_author_name = Substitution(author='Jason') + + @sub_author_name + def some_function(x): + "%(author)s wrote this function" + + # note that some_function.__doc__ is now "Jason wrote this function" + + One can also use positional arguments. + + sub_first_last_names = Substitution('Edgar Allen', 'Poe') + + @sub_first_last_names + def some_function(x): + "%s %s wrote the Raven" + """ + + def __init__(self, *args, **kwargs): + if (args and kwargs): + raise AssertionError("Only positional or keyword args are allowed") + + self.params = args or kwargs + + def __call__(self, func): + func.__doc__ = func.__doc__ and func.__doc__ % self.params + return func + + def update(self, *args, **kwargs): + """ + Update self.params with supplied args. + + If called, we assume self.params is a dict. + """ + + self.params.update(*args, **kwargs) + + @classmethod + def from_params(cls, params): + """ + In the case where the params is a mutable sequence (list or dictionary) + and it may change before this class is called, one may explicitly use a + reference to the params rather than using *args or **kwargs which will + copy the values and not reference them. + """ + result = cls() + result.params = params + return result + + +class Appender(object): + """ + A function decorator that will append an addendum to the docstring + of the target function. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter). + + Usage: construct a docstring.Appender with a string to be joined to + the original docstring. An optional 'join' parameter may be supplied + which will be used to join the docstring and addendum. e.g. + + add_copyright = Appender("Copyright (c) 2009", join='\n') + + @add_copyright + def my_dog(has='fleas'): + "This docstring will have a copyright below" + pass + """ + + def __init__(self, addendum, join='', indents=0): + if indents > 0: + self.addendum = indent(addendum, indents=indents) + else: + self.addendum = addendum + self.join = join + + def __call__(self, func): + func.__doc__ = func.__doc__ if func.__doc__ else '' + self.addendum = self.addendum if self.addendum else '' + docitems = [func.__doc__, self.addendum] + func.__doc__ = dedent(self.join.join(docitems)) + return func + + +def indent(text, indents=1): + if not text or not isinstance(text, str): + return '' + jointext = ''.join(['\n'] + [' '] * indents) + return jointext.join(text.split('\n')) + + +def make_signature(func): + """ + Returns a string repr of the arg list of a func call, with any defaults. + + Examples + -------- + >>> def f(a,b,c=2) : + >>> return a*b*c + >>> print(_make_signature(f)) + a,b,c=2 + """ + + spec = signature(func) + if spec.defaults is None: + n_wo_defaults = len(spec.args) + defaults = ('',) * n_wo_defaults + else: + n_wo_defaults = len(spec.args) - len(spec.defaults) + defaults = ('',) * n_wo_defaults + tuple(spec.defaults) + args = [] + for i, (var, default) in enumerate(zip(spec.args, defaults)): + args.append(var if default == '' else var + '=' + repr(default)) + if spec.varargs: + args.append('*' + spec.varargs) + if spec.keywords: + args.append('**' + spec.keywords) + return args, spec.args + + +class docstring_wrapper(object): + """ + Decorator to wrap a function and provide + a dynamically evaluated doc-string. + + Parameters + ---------- + func : callable + creator : callable + return the doc-string + default : str, optional + return this doc-string on error + """ + _attrs = ['__module__', '__name__', + '__qualname__', '__annotations__'] + + def __init__(self, func, creator, default=None): + self.func = func + self.creator = creator + self.default = default + update_wrapper( + self, func, [attr for attr in self._attrs + if hasattr(func, attr)]) + + def __get__(self, instance, cls=None): + + # we are called with a class + if instance is None: + return self + + # we want to return the actual passed instance + return types.MethodType(self, instance) + + def __call__(self, *args, **kwargs): + return self.func(*args, **kwargs) + + @property + def __doc__(self): + try: + return self.creator() + except Exception as exc: + msg = self.default or str(exc) + return msg diff --git a/pandas/util/_depr_module.py b/pandas/util/_depr_module.py new file mode 100644 index 0000000000000..9c648b76fdad1 --- /dev/null +++ b/pandas/util/_depr_module.py @@ -0,0 +1,103 @@ +""" +This module houses a utility class for mocking deprecated modules. +It is for internal use only and should not be used beyond this purpose. +""" + +import warnings +import importlib + + +class _DeprecatedModule(object): + """ Class for mocking deprecated modules. + + Parameters + ---------- + deprmod : name of module to be deprecated. + deprmodto : name of module as a replacement, optional. + If not given, the __module__ attribute will + be used when needed. + removals : objects or methods in module that will no longer be + accessible once module is removed. + moved : dict, optional + dictionary of function name -> new location for moved + objects + """ + + def __init__(self, deprmod, deprmodto=None, removals=None, + moved=None): + self.deprmod = deprmod + self.deprmodto = deprmodto + self.removals = removals + if self.removals is not None: + self.removals = frozenset(self.removals) + self.moved = moved + + # For introspection purposes. + self.self_dir = frozenset(dir(self.__class__)) + + def __dir__(self): + deprmodule = self._import_deprmod() + return dir(deprmodule) + + def __repr__(self): + deprmodule = self._import_deprmod() + return repr(deprmodule) + + __str__ = __repr__ + + def __getattr__(self, name): + if name in self.self_dir: + return object.__getattribute__(self, name) + + try: + deprmodule = self._import_deprmod(self.deprmod) + except ImportError: + if self.deprmodto is None: + raise + + # a rename + deprmodule = self._import_deprmod(self.deprmodto) + + obj = getattr(deprmodule, name) + + if self.removals is not None and name in self.removals: + warnings.warn( + "{deprmod}.{name} is deprecated and will be removed in " + "a future version.".format(deprmod=self.deprmod, name=name), + FutureWarning, stacklevel=2) + elif self.moved is not None and name in self.moved: + warnings.warn( + "{deprmod} is deprecated and will be removed in " + "a future version.\nYou can access {name} as {moved}".format( + deprmod=self.deprmod, + name=name, + moved=self.moved[name]), + FutureWarning, stacklevel=2) + else: + deprmodto = self.deprmodto + if deprmodto is False: + warnings.warn( + "{deprmod}.{name} is deprecated and will be removed in " + "a future version.".format( + deprmod=self.deprmod, name=name), + FutureWarning, stacklevel=2) + else: + if deprmodto is None: + deprmodto = obj.__module__ + # The object is actually located in another module. + warnings.warn( + "{deprmod}.{name} is deprecated. Please use " + "{deprmodto}.{name} instead.".format( + deprmod=self.deprmod, name=name, deprmodto=deprmodto), + FutureWarning, stacklevel=2) + + return obj + + def _import_deprmod(self, mod=None): + if mod is None: + mod = self.deprmod + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=FutureWarning) + deprmodule = importlib.import_module(mod) + return deprmodule diff --git a/pandas/util/doctools.py b/pandas/util/_doctools.py similarity index 88% rename from pandas/util/doctools.py rename to pandas/util/_doctools.py index 62dcba1405581..667c5d9526563 100644 --- a/pandas/util/doctools.py +++ b/pandas/util/_doctools.py @@ -15,17 +15,23 @@ def __init__(self, cell_width=0.37, cell_height=0.25, font_size=7.5): self.font_size = font_size def _shape(self, df): - """Calcurate table chape considering index levels""" + """ + Calculate table chape considering index levels. + """ + row, col = df.shape return row + df.columns.nlevels, col + df.index.nlevels def _get_cells(self, left, right, vertical): - """Calcurate appropriate figure size based on left and right data""" + """ + Calculate appropriate figure size based on left and right data. + """ + if vertical: - # calcurate required number of cells - vcells = max(sum([self._shape(l)[0] for l in left]), + # calculate required number of cells + vcells = max(sum(self._shape(l)[0] for l in left), self._shape(right)[0]) - hcells = (max([self._shape(l)[1] for l in left]) + + hcells = (max(self._shape(l)[1] for l in left) + self._shape(right)[1]) else: vcells = max([self._shape(l)[0] for l in left] + @@ -66,8 +72,8 @@ def plot(self, left, right, labels=None, vertical=True): if vertical: gs = gridspec.GridSpec(len(left), hcells) # left - max_left_cols = max([self._shape(l)[1] for l in left]) - max_left_rows = max([self._shape(l)[0] for l in left]) + max_left_cols = max(self._shape(l)[1] for l in left) + max_left_rows = max(self._shape(l)[0] for l in left) for i, (l, label) in enumerate(zip(left, labels)): ax = fig.add_subplot(gs[i, 0:max_left_cols]) self._make_table(ax, l, title=label, @@ -77,7 +83,7 @@ def plot(self, left, right, labels=None, vertical=True): self._make_table(ax, right, title='Result', height=1.05 / vcells) fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95) else: - max_rows = max([self._shape(df)[0] for df in left + [right]]) + max_rows = max(self._shape(df)[0] for df in left + [right]) height = 1.0 / np.max(max_rows) gs = gridspec.GridSpec(1, hcells) # left @@ -113,12 +119,12 @@ def _insert_index(self, data): else: for i in range(idx_nlevels): data.insert(i, 'Index{0}'.format(i), - data.index.get_level_values(i)) + data.index._get_level_values(i)) col_nlevels = data.columns.nlevels if col_nlevels > 1: - col = data.columns.get_level_values(0) - values = [data.columns.get_level_values(i).values + col = data.columns._get_level_values(0) + values = [data.columns._get_level_values(i).values for i in range(1, col_nlevels)] col_df = pd.DataFrame(values) data.columns = col_df.columns @@ -131,7 +137,7 @@ def _make_table(self, ax, df, title, height=None): ax.set_visible(False) return - import pandas.tools.plotting as plotting + import pandas.plotting as plotting idx_nlevels = df.index.nlevels col_nlevels = df.columns.nlevels diff --git a/pandas/util/print_versions.py b/pandas/util/_print_versions.py similarity index 80% rename from pandas/util/print_versions.py rename to pandas/util/_print_versions.py index 7c5148caf7e74..83c1433bf5c39 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/_print_versions.py @@ -38,18 +38,17 @@ def get_sys_info(): (sysname, nodename, release, version, machine, processor) = platform.uname() blob.extend([ - ("python", "%d.%d.%d.%s.%s" % sys.version_info[:]), + ("python", '.'.join(map(str, sys.version_info))), ("python-bits", struct.calcsize("P") * 8), - ("OS", "%s" % (sysname)), - ("OS-release", "%s" % (release)), - # ("Version", "%s" % (version)), - ("machine", "%s" % (machine)), - ("processor", "%s" % (processor)), - ("byteorder", "%s" % sys.byteorder), - ("LC_ALL", "%s" % os.environ.get('LC_ALL', "None")), - ("LANG", "%s" % os.environ.get('LANG', "None")), - ("LOCALE", "%s.%s" % locale.getlocale()), - + ("OS", "{sysname}".format(sysname=sysname)), + ("OS-release", "{release}".format(release=release)), + # ("Version", "{version}".format(version=version)), + ("machine", "{machine}".format(machine=machine)), + ("processor", "{processor}".format(processor=processor)), + ("byteorder", "{byteorder}".format(byteorder=sys.byteorder)), + ("LC_ALL", "{lc}".format(lc=os.environ.get('LC_ALL', "None"))), + ("LANG", "{lang}".format(lang=os.environ.get('LANG', "None"))), + ("LOCALE", '.'.join(map(str, locale.getlocale()))), ]) except: pass @@ -69,6 +68,7 @@ def show_versions(as_json=False): ("Cython", lambda mod: mod.__version__), ("numpy", lambda mod: mod.version.version), ("scipy", lambda mod: mod.version.version), + ("pyarrow", lambda mod: mod.__version__), ("xarray", lambda mod: mod.__version__), ("IPython", lambda mod: mod.__version__), ("sphinx", lambda mod: mod.__version__), @@ -88,14 +88,14 @@ def show_versions(as_json=False): ("lxml", lambda mod: mod.etree.__version__), ("bs4", lambda mod: mod.__version__), ("html5lib", lambda mod: mod.__version__), - ("httplib2", lambda mod: mod.__version__), - ("apiclient", lambda mod: mod.__version__), ("sqlalchemy", lambda mod: mod.__version__), ("pymysql", lambda mod: mod.__version__), ("psycopg2", lambda mod: mod.__version__), ("jinja2", lambda mod: mod.__version__), ("s3fs", lambda mod: mod.__version__), - ("pandas_datareader", lambda mod: mod.__version__) + ("fastparquet", lambda mod: mod.__version__), + ("pandas_gbq", lambda mod: mod.__version__), + ("pandas_datareader", lambda mod: mod.__version__), ] deps_blob = list() @@ -130,11 +130,11 @@ def show_versions(as_json=False): print("------------------") for k, stat in sys_info: - print("%s: %s" % (k, stat)) + print("{k}: {stat}".format(k=k, stat=stat)) print("") for k, stat in deps_blob: - print("%s: %s" % (k, stat)) + print("{k}: {stat}".format(k=k, stat=stat)) def main(): @@ -153,5 +153,6 @@ def main(): return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py new file mode 100644 index 0000000000000..8ad73538fbec1 --- /dev/null +++ b/pandas/util/_test_decorators.py @@ -0,0 +1,189 @@ +""" +This module provides decorator functions which can be applied to test objects +in order to skip those objects when certain conditions occur. A sample use case +is to detect if the platform is missing ``matplotlib``. If so, any test objects +which require ``matplotlib`` and decorated with ``@td.skip_if_no_mpl`` will be +skipped by ``pytest`` during the execution of the test suite. + +To illustrate, after importing this module: + +import pandas.util._test_decorators as td + +The decorators can be applied to classes: + +@td.skip_if_some_reason +class Foo(): + ... + +Or individual functions: + +@td.skip_if_some_reason +def test_foo(): + ... + +For more information, refer to the ``pytest`` documentation on ``skipif``. +""" + +import pytest +import locale +from distutils.version import LooseVersion + +from pandas.compat import (is_platform_windows, is_platform_32bit, PY3, + import_lzma) +from pandas.core.computation.expressions import (_USE_NUMEXPR, + _NUMEXPR_INSTALLED) + + +def safe_import(mod_name, min_version=None): + """ + Parameters: + ----------- + mod_name : str + Name of the module to be imported + min_version : str, default None + Minimum required version of the specified mod_name + + Returns: + -------- + object + The imported module if successful, or False + """ + try: + mod = __import__(mod_name) + except ImportError: + return False + + if not min_version: + return mod + else: + import sys + try: + version = getattr(sys.modules[mod_name], '__version__') + except AttributeError: + # xlrd uses a capitalized attribute name + version = getattr(sys.modules[mod_name], '__VERSION__') + if version: + from distutils.version import LooseVersion + if LooseVersion(version) >= LooseVersion(min_version): + return mod + + return False + + +def _skip_if_no_mpl(): + mod = safe_import("matplotlib") + if mod: + mod.use("Agg", warn=False) + else: + return True + + +def _skip_if_mpl_1_5(): + mod = safe_import("matplotlib") + + if mod: + v = mod.__version__ + if LooseVersion(v) > LooseVersion('1.4.3') or str(v)[0] == '0': + return True + else: + mod.use("Agg", warn=False) + + +def _skip_if_mpl_2_2(): + mod = safe_import("matplotlib") + + if mod: + v = mod.__version__ + if LooseVersion(v) > LooseVersion('2.1.2'): + return True + else: + mod.use("Agg", warn=False) + + +def _skip_if_has_locale(): + lang, _ = locale.getlocale() + if lang is not None: + return True + + +def _skip_if_not_us_locale(): + lang, _ = locale.getlocale() + if lang != 'en_US': + return True + + +def _skip_if_no_scipy(): + return not (safe_import('scipy.stats') and safe_import('scipy.sparse') and + safe_import('scipy.interpolate')) + + +def _skip_if_no_lzma(): + try: + import_lzma() + except ImportError: + return True + + +def skip_if_no(package, min_version=None): + """ + Generic function to help skip test functions when required packages are not + present on the testing system. + + Intended for use as a decorator, this function will wrap the decorated + function with a pytest ``skip_if`` mark. During a pytest test suite + execution, that mark will attempt to import the specified ``package`` and + optionally ensure it meets the ``min_version``. If the import and version + check are unsuccessful, then the decorated function will be skipped. + + Parameters + ---------- + package: str + The name of the package required by the decorated function + min_version: str or None, default None + Optional minimum version of the package required by the decorated + function + + Returns + ------- + decorated_func: function + The decorated function wrapped within a pytest ``skip_if`` mark + """ + def decorated_func(func): + msg = "Could not import '{}'".format(package) + if min_version: + msg += " satisfying a min_version of {}".format(min_version) + return pytest.mark.skipif( + not safe_import(package, min_version=min_version), reason=msg + )(func) + return decorated_func + + +skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(), + reason="Missing matplotlib dependency") +skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(), + reason="matplotlib 1.5") +xfail_if_mpl_2_2 = pytest.mark.xfail(_skip_if_mpl_2_2(), + reason="matplotlib 2.2") +skip_if_32bit = pytest.mark.skipif(is_platform_32bit(), + reason="skipping for 32 bit") +skip_if_windows = pytest.mark.skipif(is_platform_windows(), + reason="Running on Windows") +skip_if_windows_python_3 = pytest.mark.skipif(is_platform_windows() and PY3, + reason=("not used on python3/" + "win32")) +skip_if_has_locale = pytest.mark.skipif(_skip_if_has_locale(), + reason="Specific locale is set {lang}" + .format(lang=locale.getlocale()[0])) +skip_if_not_us_locale = pytest.mark.skipif(_skip_if_not_us_locale(), + reason="Specific locale is set " + "{lang}".format( + lang=locale.getlocale()[0])) +skip_if_no_scipy = pytest.mark.skipif(_skip_if_no_scipy(), + reason="Missing SciPy requirement") +skip_if_no_lzma = pytest.mark.skipif(_skip_if_no_lzma(), + reason="need backports.lzma to run") +skip_if_no_ne = pytest.mark.skipif(not _USE_NUMEXPR, + reason="numexpr enabled->{enabled}, " + "installed->{installed}".format( + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED)) diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index 8d9701e0b4672..d18467f17ec5b 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -2,25 +2,24 @@ Entrypoint for testing from the top-level namespace """ import os +import sys PKG = os.path.dirname(os.path.dirname(__file__)) -try: - import pytest -except ImportError: - def test(): +def test(extra_args=None): + try: + import pytest + except ImportError: raise ImportError("Need pytest>=3.0 to run tests") -else: - def test(extra_args=None): - cmd = ['--skip-slow', '--skip-network'] - if extra_args: - if not isinstance(extra_args, list): - extra_args = [extra_args] - cmd = extra_args - cmd += [PKG] - print("running: pytest {}".format(' '.join(cmd))) - pytest.main(cmd) + cmd = ['--skip-slow', '--skip-network'] + if extra_args: + if not isinstance(extra_args, list): + extra_args = [extra_args] + cmd = extra_args + cmd += [PKG] + print("running: pytest {}".format(' '.join(cmd))) + sys.exit(pytest.main(cmd)) __all__ = ['test'] diff --git a/pandas/util/validators.py b/pandas/util/_validators.py similarity index 60% rename from pandas/util/validators.py rename to pandas/util/_validators.py index f22412a2bcd17..a96563051e7de 100644 --- a/pandas/util/validators.py +++ b/pandas/util/_validators.py @@ -2,8 +2,9 @@ Module that contains many useful utilities for validating data or function arguments """ +import warnings -from pandas.types.common import is_bool +from pandas.core.dtypes.common import is_bool def _check_arg_length(fname, args, max_fname_arg_count, compat_args): @@ -39,7 +40,7 @@ def _check_for_default_values(fname, arg_val_dict, compat_args): """ for key in arg_val_dict: # try checking equality directly with '=' operator, - # as comparison may have been overriden for the left + # as comparison may have been overridden for the left # hand object try: v1 = arg_val_dict[key] @@ -220,7 +221,138 @@ def validate_args_and_kwargs(fname, args, kwargs, def validate_bool_kwarg(value, arg_name): """ Ensures that argument passed in arg_name is of type bool. """ if not (is_bool(value) or value is None): - raise ValueError('For argument "%s" expected type bool, ' - 'received type %s.' % - (arg_name, type(value).__name__)) + raise ValueError('For argument "{arg}" expected type bool, received ' + 'type {typ}.'.format(arg=arg_name, + typ=type(value).__name__)) return value + + +def validate_axis_style_args(data, args, kwargs, arg_name, method_name): + """Argument handler for mixed index, columns / axis functions + + In an attempt to handle both `.method(index, columns)`, and + `.method(arg, axis=.)`, we have to do some bad things to argument + parsing. This translates all arguments to `{index=., columns=.}` style. + + Parameters + ---------- + data : DataFrame or Panel + arg : tuple + All positional arguments from the user + kwargs : dict + All keyword arguments from the user + arg_name, method_name : str + Used for better error messages + + Returns + ------- + kwargs : dict + A dictionary of keyword arguments. Doesn't modify ``kwargs`` + inplace, so update them with the return value here. + + Examples + -------- + >>> df._validate_axis_style_args((str.upper,), {'columns': id}, + ... 'mapper', 'rename') + {'columns': , 'index': } + + This emits a warning + >>> df._validate_axis_style_args((str.upper, id), {}, + ... 'mapper', 'rename') + {'columns': , 'index': } + """ + # TODO(PY3): Change to keyword-only args and remove all this + + out = {} + # Goal: fill 'out' with index/columns-style arguments + # like out = {'index': foo, 'columns': bar} + + # Start by validating for consistency + if 'axis' in kwargs and any(x in kwargs for x in data._AXIS_NUMBERS): + msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." + raise TypeError(msg) + + # First fill with explicit values provided by the user... + if arg_name in kwargs: + if args: + msg = ("{} got multiple values for argument " + "'{}'".format(method_name, arg_name)) + raise TypeError(msg) + + axis = data._get_axis_name(kwargs.get('axis', 0)) + out[axis] = kwargs[arg_name] + + # More user-provided arguments, now from kwargs + for k, v in kwargs.items(): + try: + ax = data._get_axis_name(k) + except ValueError: + pass + else: + out[ax] = v + + # All user-provided kwargs have been handled now. + # Now we supplement with positional arguments, emitting warnings + # when there's ambiguity and raising when there's conflicts + + if len(args) == 0: + pass # It's up to the function to decide if this is valid + elif len(args) == 1: + axis = data._get_axis_name(kwargs.get('axis', 0)) + out[axis] = args[0] + elif len(args) == 2: + if 'axis' in kwargs: + # Unambiguously wrong + msg = ("Cannot specify both 'axis' and any of 'index' " + "or 'columns'") + raise TypeError(msg) + + msg = ("Interpreting call\n\t'.{method_name}(a, b)' as " + "\n\t'.{method_name}(index=a, columns=b)'.\nUse named " + "arguments to remove any ambiguity. In the future, using " + "positional arguments for 'index' or 'columns' will raise " + " a 'TypeError'.") + warnings.warn(msg.format(method_name=method_name,), FutureWarning, + stacklevel=4) + out[data._AXIS_NAMES[0]] = args[0] + out[data._AXIS_NAMES[1]] = args[1] + else: + msg = "Cannot specify all of '{}', 'index', 'columns'." + raise TypeError(msg.format(arg_name)) + return out + + +def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): + """Validate the keyword arguments to 'fillna'. + + This checks that exactly one of 'value' and 'method' is specified. + If 'method' is specified, this validates that it's a valid method. + + Parameters + ---------- + value, method : object + The 'value' and 'method' keyword arguments for 'fillna'. + validate_scalar_dict_value : bool, default True + Whether to validate that 'value' is a scalar or dict. Specifically, + validate that it is not a list or tuple. + + Returns + ------- + value, method : object + """ + from pandas.core.missing import clean_fill_method + + if value is None and method is None: + raise ValueError("Must specify a fill 'value' or 'method'.") + elif value is None and method is not None: + method = clean_fill_method(method) + + elif value is not None and method is None: + if validate_scalar_dict_value and isinstance(value, (list, tuple)): + raise TypeError('"value" parameter must be a scalar or dict, but ' + 'you passed a "{0}"'.format(type(value).__name__)) + + elif value is not None and method is not None: + raise ValueError("Cannot specify both 'value' and 'method'.") + + return value, method diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 1b501eb1d9bda..54bb834e829f3 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,235 +1,8 @@ -from pandas.compat import StringIO, callable, signature -from pandas.lib import cache_readonly # noqa -import sys import warnings -from textwrap import dedent -from functools import wraps +warnings.warn("pandas.util.decorators is deprecated and will be " + "removed in a future version, import " + "from pandas.util", + DeprecationWarning, stacklevel=3) -def deprecate(name, alternative, alt_name=None): - alt_name = alt_name or alternative.__name__ - - def wrapper(*args, **kwargs): - warnings.warn("%s is deprecated. Use %s instead" % (name, alt_name), - FutureWarning, stacklevel=2) - return alternative(*args, **kwargs) - return wrapper - - -def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): - """Decorator to deprecate a keyword argument of a function - - Parameters - ---------- - old_arg_name : str - Name of argument in function to deprecate - new_arg_name : str - Name of prefered argument in function - mapping : dict or callable - If mapping is present, use it to translate old arguments to - new arguments. A callable must do its own value checking; - values not found in a dict will be forwarded unchanged. - - Examples - -------- - The following deprecates 'cols', using 'columns' instead - - >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') - ... def f(columns=''): - ... print(columns) - ... - >>> f(columns='should work ok') - should work ok - >>> f(cols='should raise warning') - FutureWarning: cols is deprecated, use columns instead - warnings.warn(msg, FutureWarning) - should raise warning - >>> f(cols='should error', columns="can\'t pass do both") - TypeError: Can only specify 'cols' or 'columns', not both - >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) - ... def f(new=False): - ... print('yes!' if new else 'no!') - ... - >>> f(old='yes') - FutureWarning: old='yes' is deprecated, use new=True instead - warnings.warn(msg, FutureWarning) - yes! - - """ - if mapping is not None and not hasattr(mapping, 'get') and \ - not callable(mapping): - raise TypeError("mapping from old to new argument values " - "must be dict or callable!") - - def _deprecate_kwarg(func): - @wraps(func) - def wrapper(*args, **kwargs): - old_arg_value = kwargs.pop(old_arg_name, None) - if old_arg_value is not None: - if mapping is not None: - if hasattr(mapping, 'get'): - new_arg_value = mapping.get(old_arg_value, - old_arg_value) - else: - new_arg_value = mapping(old_arg_value) - msg = "the %s=%r keyword is deprecated, " \ - "use %s=%r instead" % \ - (old_arg_name, old_arg_value, - new_arg_name, new_arg_value) - else: - new_arg_value = old_arg_value - msg = "the '%s' keyword is deprecated, " \ - "use '%s' instead" % (old_arg_name, new_arg_name) - - warnings.warn(msg, FutureWarning, stacklevel=stacklevel) - if kwargs.get(new_arg_name, None) is not None: - msg = ("Can only specify '%s' or '%s', not both" % - (old_arg_name, new_arg_name)) - raise TypeError(msg) - else: - kwargs[new_arg_name] = new_arg_value - return func(*args, **kwargs) - return wrapper - return _deprecate_kwarg - - -# Substitution and Appender are derived from matplotlib.docstring (1.1.0) -# module http://matplotlib.org/users/license.html - - -class Substitution(object): - """ - A decorator to take a function's docstring and perform string - substitution on it. - - This decorator should be robust even if func.__doc__ is None - (for example, if -OO was passed to the interpreter) - - Usage: construct a docstring.Substitution with a sequence or - dictionary suitable for performing substitution; then - decorate a suitable function with the constructed object. e.g. - - sub_author_name = Substitution(author='Jason') - - @sub_author_name - def some_function(x): - "%(author)s wrote this function" - - # note that some_function.__doc__ is now "Jason wrote this function" - - One can also use positional arguments. - - sub_first_last_names = Substitution('Edgar Allen', 'Poe') - - @sub_first_last_names - def some_function(x): - "%s %s wrote the Raven" - """ - - def __init__(self, *args, **kwargs): - if (args and kwargs): - raise AssertionError("Only positional or keyword args are allowed") - - self.params = args or kwargs - - def __call__(self, func): - func.__doc__ = func.__doc__ and func.__doc__ % self.params - return func - - def update(self, *args, **kwargs): - "Assume self.params is a dict and update it with supplied args" - self.params.update(*args, **kwargs) - - @classmethod - def from_params(cls, params): - """ - In the case where the params is a mutable sequence (list or dictionary) - and it may change before this class is called, one may explicitly use a - reference to the params rather than using *args or **kwargs which will - copy the values and not reference them. - """ - result = cls() - result.params = params - return result - - -class Appender(object): - """ - A function decorator that will append an addendum to the docstring - of the target function. - - This decorator should be robust even if func.__doc__ is None - (for example, if -OO was passed to the interpreter). - - Usage: construct a docstring.Appender with a string to be joined to - the original docstring. An optional 'join' parameter may be supplied - which will be used to join the docstring and addendum. e.g. - - add_copyright = Appender("Copyright (c) 2009", join='\n') - - @add_copyright - def my_dog(has='fleas'): - "This docstring will have a copyright below" - pass - """ - - def __init__(self, addendum, join='', indents=0): - if indents > 0: - self.addendum = indent(addendum, indents=indents) - else: - self.addendum = addendum - self.join = join - - def __call__(self, func): - func.__doc__ = func.__doc__ if func.__doc__ else '' - self.addendum = self.addendum if self.addendum else '' - docitems = [func.__doc__, self.addendum] - func.__doc__ = dedent(self.join.join(docitems)) - return func - - -def indent(text, indents=1): - if not text or not isinstance(text, str): - return '' - jointext = ''.join(['\n'] + [' '] * indents) - return jointext.join(text.split('\n')) - - -def suppress_stdout(f): - def wrapped(*args, **kwargs): - try: - sys.stdout = StringIO() - f(*args, **kwargs) - finally: - sys.stdout = sys.__stdout__ - - return wrapped - - -def make_signature(func): - """ - Returns a string repr of the arg list of a func call, with any defaults - - Examples - -------- - - >>> def f(a,b,c=2) : - >>> return a*b*c - >>> print(_make_signature(f)) - a,b,c=2 - """ - spec = signature(func) - if spec.defaults is None: - n_wo_defaults = len(spec.args) - defaults = ('',) * n_wo_defaults - else: - n_wo_defaults = len(spec.args) - len(spec.defaults) - defaults = ('',) * n_wo_defaults + spec.defaults - args = [] - for i, (var, default) in enumerate(zip(spec.args, defaults)): - args.append(var if default == '' else var + '=' + repr(default)) - if spec.varargs: - args.append('*' + spec.varargs) - if spec.keywords: - args.append('**' + spec.keywords) - return args, spec.args +from pandas.util._decorators import * # noqa diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py deleted file mode 100644 index cf8b0f7960f17..0000000000000 --- a/pandas/util/depr_module.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -This module houses a utility class for mocking deprecated modules. -It is for internal use only and should not be used beyond this purpose. -""" - -import warnings -import importlib - - -class _DeprecatedModule(object): - """ Class for mocking deprecated modules. - - Parameters - ---------- - deprmod : name of module to be deprecated. - removals : objects or methods in module that will no longer be - accessible once module is removed. - """ - - def __init__(self, deprmod, removals=None): - self.deprmod = deprmod - self.removals = removals - if self.removals is not None: - self.removals = frozenset(self.removals) - - # For introspection purposes. - self.self_dir = frozenset(dir(self.__class__)) - - def __dir__(self): - deprmodule = self._import_deprmod() - return dir(deprmodule) - - def __repr__(self): - deprmodule = self._import_deprmod() - return repr(deprmodule) - - __str__ = __repr__ - - def __getattr__(self, name): - if name in self.self_dir: - return object.__getattribute__(self, name) - - deprmodule = self._import_deprmod() - obj = getattr(deprmodule, name) - - if self.removals is not None and name in self.removals: - warnings.warn( - "{deprmod}.{name} is deprecated and will be removed in " - "a future version.".format(deprmod=self.deprmod, name=name), - FutureWarning, stacklevel=2) - else: - # The object is actually located in another module. - warnings.warn( - "{deprmod}.{name} is deprecated. Please use " - "{modname}.{name} instead.".format( - deprmod=self.deprmod, modname=obj.__module__, name=name), - FutureWarning, stacklevel=2) - - return obj - - def _import_deprmod(self): - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', category=FutureWarning) - deprmodule = importlib.import_module(self.deprmod) - return deprmodule diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 566ceec027b2b..a223e4d8fd23e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -10,53 +10,48 @@ import os import subprocess import locale -import unittest import traceback from datetime import datetime -from functools import wraps, partial +from functools import wraps from contextlib import contextmanager -from distutils.version import LooseVersion from numpy.random import randn, rand -import pytest import numpy as np import pandas as pd -from pandas.types.missing import array_equivalent -from pandas.types.common import (is_datetimelike_v_numeric, - is_datetimelike_v_object, - is_number, is_bool, - needs_i8_conversion, - is_categorical_dtype, - is_sequence, - is_list_like) -from pandas.formats.printing import pprint_thing +from pandas.core.dtypes.missing import array_equivalent +from pandas.core.dtypes.common import ( + is_datetimelike_v_numeric, + is_datetimelike_v_object, + is_number, is_bool, + needs_i8_conversion, + is_categorical_dtype, + is_interval_dtype, + is_sequence, + is_list_like) +from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d +import pandas.core.common as com import pandas.compat as compat from pandas.compat import ( filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, - raise_with_traceback, httplib, is_platform_windows, is_platform_32bit, - PY3 -) + raise_with_traceback, httplib, StringIO, PY3) -from pandas.computation import expressions as expr +from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex, + DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, + Index, MultiIndex, + Series, DataFrame, Panel) -from pandas import (bdate_range, CategoricalIndex, Categorical, DatetimeIndex, - TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, - Series, DataFrame, Panel, Panel4D) -from pandas.util.decorators import deprecate -from pandas import _testing +from pandas._libs import testing as _testing from pandas.io.common import urlopen -slow = pytest.mark.slow N = 30 K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False - # set testing_mode _testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning) @@ -74,62 +69,156 @@ def reset_testing_mode(): if 'deprecate' in testing_mode: warnings.simplefilter('ignore', _testing_mode_warnings) + set_testing_mode() -class TestCase(unittest.TestCase): +def reset_display_options(): + """ + Reset the display options for printing and representing objects. + """ + + pd.reset_option('^display.', silent=True) + - @classmethod - def setUpClass(cls): - pd.set_option('chained_assignment', 'raise') +def round_trip_pickle(obj, path=None): + """ + Pickle an object and then read it again. - @classmethod - def tearDownClass(cls): - pass + Parameters + ---------- + obj : pandas object + The object to pickle and then re-read. + path : str, default None + The path where the pickled object is written and then read. + + Returns + ------- + round_trip_pickled_object : pandas object + The original object that was pickled and then re-read. + """ + + if path is None: + path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) + with ensure_clean(path) as path: + pd.to_pickle(obj, path) + return pd.read_pickle(path) + + +def round_trip_pathlib(writer, reader, path=None): + """ + Write an object to file specified by a pathlib.Path and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + round_trip_object : pandas object + The original object that was serialized and then re-read. + """ + + import pytest + Path = pytest.importorskip('pathlib').Path + if path is None: + path = '___pathlib___' + with ensure_clean(path) as path: + writer(Path(path)) + obj = reader(Path(path)) + return obj + + +def round_trip_localpath(writer, reader, path=None): + """ + Write an object to file specified by a py.path LocalPath and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + round_trip_object : pandas object + The original object that was serialized and then re-read. + """ + import pytest + LocalPath = pytest.importorskip('py.path').local + if path is None: + path = '___localpath___' + with ensure_clean(path) as path: + writer(LocalPath(path)) + obj = reader(LocalPath(path)) + return obj - def reset_display_options(self): - # reset the display options - pd.reset_option('^display.', silent=True) - def round_trip_pickle(self, obj, path=None): - if path is None: - path = u('__%s__.pickle' % rands(10)) - with ensure_clean(path) as path: - pd.to_pickle(obj, path) - return pd.read_pickle(path) +@contextmanager +def decompress_file(path, compression): + """ + Open a compressed file and return a file object - # https://docs.python.org/3/library/unittest.html#deprecated-aliases - def assertEquals(self, *args, **kwargs): - return deprecate('assertEquals', - self.assertEqual)(*args, **kwargs) + Parameters + ---------- + path : str + The path where the file is read from - def assertNotEquals(self, *args, **kwargs): - return deprecate('assertNotEquals', - self.assertNotEqual)(*args, **kwargs) + compression : {'gzip', 'bz2', 'xz', None} + Name of the decompression to use - def assert_(self, *args, **kwargs): - return deprecate('assert_', - self.assertTrue)(*args, **kwargs) + Returns + ------- + f : file object + """ - def assertAlmostEquals(self, *args, **kwargs): - return deprecate('assertAlmostEquals', - self.assertAlmostEqual)(*args, **kwargs) + if compression is None: + f = open(path, 'rb') + elif compression == 'gzip': + import gzip + f = gzip.open(path, 'rb') + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(path, 'rb') + elif compression == 'xz': + lzma = compat.import_lzma() + f = lzma.LZMAFile(path, 'rb') + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(path) + zip_names = zip_file.namelist() + if len(zip_names) == 1: + f = zip_file.open(zip_names.pop()) + else: + raise ValueError('ZIP file {} error. Only one file per ZIP.' + .format(path)) + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) - def assertNotAlmostEquals(self, *args, **kwargs): - return deprecate('assertNotAlmostEquals', - self.assertNotAlmostEqual)(*args, **kwargs) + yield f + f.close() def assert_almost_equal(left, right, check_exact=False, check_dtype='equiv', check_less_precise=False, **kwargs): - """Check that left and right Index are equal. + """ + Check that the left and right objects are approximately equal. Parameters ---------- left : object right : object - check_exact : bool, default True + check_exact : bool, default False Whether to compare number exactly. check_dtype: bool, default True check dtype if both a and b are the same type @@ -179,11 +268,37 @@ def assert_almost_equal(left, right, check_exact=False, **kwargs) -def assert_dict_equal(left, right, compare_keys=True): +def _check_isinstance(left, right, cls): + """ + Helper method for our assert_* methods that ensures that + the two objects being compared have the right type before + proceeding with the comparison. + + Parameters + ---------- + left : The first object being compared. + right : The second object being compared. + cls : The class type to check against. - assertIsInstance(left, dict, '[dict] ') - assertIsInstance(right, dict, '[dict] ') + Raises + ------ + AssertionError : Either `left` or `right` is not an instance of `cls`. + """ + + err_msg = "{name} Expected type {exp_type}, found {act_type} instead" + cls_name = cls.__name__ + + if not isinstance(left, cls): + raise AssertionError(err_msg.format(name=cls_name, exp_type=cls, + act_type=type(left))) + if not isinstance(right, cls): + raise AssertionError(err_msg.format(name=cls_name, exp_type=cls, + act_type=type(right))) + + +def assert_dict_equal(left, right, compare_keys=True): + _check_isinstance(left, right, dict) return _testing.assert_dict_equal(left, right, compare_keys=compare_keys) @@ -247,173 +362,6 @@ def close(fignum=None): _close(fignum) -def _skip_if_32bit(): - import pytest - if is_platform_32bit(): - pytest.skip("skipping for 32 bit") - - -def mplskip(cls): - """Skip a TestCase instance if matplotlib isn't installed""" - - @classmethod - def setUpClass(cls): - try: - import matplotlib as mpl - mpl.use("Agg", warn=False) - except ImportError: - import pytest - pytest.skip("matplotlib not installed") - - cls.setUpClass = setUpClass - return cls - - -def _skip_if_no_mpl(): - try: - import matplotlib # noqa - except ImportError: - import pytest - pytest.skip("matplotlib not installed") - - -def _skip_if_mpl_1_5(): - import matplotlib - v = matplotlib.__version__ - if v > LooseVersion('1.4.3') or v[0] == '0': - import pytest - pytest.skip("matplotlib 1.5") - - -def _skip_if_no_scipy(): - try: - import scipy.stats # noqa - except ImportError: - import pytest - pytest.skip("no scipy.stats module") - try: - import scipy.interpolate # noqa - except ImportError: - import pytest - pytest.skip('scipy.interpolate missing') - - -def _skip_if_scipy_0_17(): - import scipy - v = scipy.__version__ - if v >= LooseVersion("0.17.0"): - import pytest - pytest.skip("scipy 0.17") - - -def _skip_if_no_lzma(): - try: - return compat.import_lzma() - except ImportError: - import pytest - pytest.skip('need backports.lzma to run') - - -def _skip_if_no_xarray(): - try: - import xarray - except ImportError: - import pytest - pytest.skip("xarray not installed") - - v = xarray.__version__ - if v < LooseVersion('0.7.0'): - import pytest - pytest.skip("xarray not version is too low: {0}".format(v)) - - -def _skip_if_no_pytz(): - try: - import pytz # noqa - except ImportError: - import pytest - pytest.skip("pytz not installed") - - -def _skip_if_no_dateutil(): - try: - import dateutil # noqa - except ImportError: - import pytest - pytest.skip("dateutil not installed") - - -def _skip_if_windows_python_3(): - if PY3 and is_platform_windows(): - import pytest - pytest.skip("not used on python 3/win32") - - -def _skip_if_windows(): - if is_platform_windows(): - import pytest - pytest.skip("Running on Windows") - - -def _skip_if_no_pathlib(): - try: - from pathlib import Path # noqa - except ImportError: - import pytest - pytest.skip("pathlib not available") - - -def _skip_if_no_localpath(): - try: - from py.path import local as LocalPath # noqa - except ImportError: - import pytest - pytest.skip("py.path not installed") - - -def _incompat_bottleneck_version(method): - """ skip if we have bottleneck installed - and its >= 1.0 - as we don't match the nansum/nanprod behavior for all-nan - ops, see GH9422 - """ - if method not in ['sum', 'prod']: - return False - try: - import bottleneck as bn - return bn.__version__ >= LooseVersion('1.0') - except ImportError: - return False - - -def skip_if_no_ne(engine='numexpr'): - from pandas.computation.expressions import (_USE_NUMEXPR, - _NUMEXPR_INSTALLED) - - if engine == 'numexpr': - if not _USE_NUMEXPR: - import pytest - pytest.skip("numexpr enabled->{enabled}, " - "installed->{installed}".format( - enabled=_USE_NUMEXPR, - installed=_NUMEXPR_INSTALLED)) - - -def _skip_if_has_locale(): - import locale - lang, _ = locale.getlocale() - if lang is not None: - import pytest - pytest.skip("Specific locale is set {0}".format(lang)) - - -def _skip_if_not_us_locale(): - import locale - lang, _ = locale.getlocale() - if lang != 'en_US': - import pytest - pytest.skip("Specific locale is set {0}".format(lang)) - # ----------------------------------------------------------------------------- # locale utilities @@ -457,8 +405,8 @@ def _default_locale_getter(): try: raw_locales = check_output(['locale -a'], shell=True) except subprocess.CalledProcessError as e: - raise type(e)("%s, the 'locale -a' command cannot be found on your " - "system" % e) + raise type(e)("{exception}, the 'locale -a' command cannot be found " + "on your system".format(exception=e)) return raw_locales @@ -493,7 +441,7 @@ def get_locales(prefix=None, normalize=True, """ try: raw_locales = locale_getter() - except: + except Exception: return None try: @@ -515,7 +463,8 @@ def get_locales(prefix=None, normalize=True, if prefix is None: return _valid_locales(out_locales, normalize) - found = re.compile('%s.*' % prefix).findall('\n'.join(out_locales)) + found = re.compile('{prefix}.*'.format(prefix=prefix)) \ + .findall('\n'.join(out_locales)) return _valid_locales(found, normalize) @@ -546,7 +495,7 @@ def set_locale(new_locale, lc_var=locale.LC_ALL): except ValueError: yield new_locale else: - if all(lc is not None for lc in normalized_locale): + if com._all_not_none(*normalized_locale): yield '.'.join(normalized_locale) else: yield new_locale @@ -599,6 +548,105 @@ def _valid_locales(locales, normalize): return list(filter(_can_set_locale, map(normalizer, locales))) +# ----------------------------------------------------------------------------- +# Stdout / stderr decorators + + +def capture_stdout(f): + """ + Decorator to capture stdout in a buffer so that it can be checked + (or suppressed) during testing. + + Parameters + ---------- + f : callable + The test that is capturing stdout. + + Returns + ------- + f : callable + The decorated test ``f``, which captures stdout. + + Examples + -------- + + >>> from pandas.util.testing import capture_stdout + >>> + >>> import sys + >>> + >>> @capture_stdout + ... def test_print_pass(): + ... print("foo") + ... out = sys.stdout.getvalue() + ... assert out == "foo\n" + >>> + >>> @capture_stdout + ... def test_print_fail(): + ... print("foo") + ... out = sys.stdout.getvalue() + ... assert out == "bar\n" + ... + AssertionError: assert 'foo\n' == 'bar\n' + """ + + @wraps(f) + def wrapper(*args, **kwargs): + try: + sys.stdout = StringIO() + f(*args, **kwargs) + finally: + sys.stdout = sys.__stdout__ + + return wrapper + + +def capture_stderr(f): + """ + Decorator to capture stderr in a buffer so that it can be checked + (or suppressed) during testing. + + Parameters + ---------- + f : callable + The test that is capturing stderr. + + Returns + ------- + f : callable + The decorated test ``f``, which captures stderr. + + Examples + -------- + + >>> from pandas.util.testing import capture_stderr + >>> + >>> import sys + >>> + >>> @capture_stderr + ... def test_stderr_pass(): + ... sys.stderr.write("foo") + ... out = sys.stderr.getvalue() + ... assert out == "foo\n" + >>> + >>> @capture_stderr + ... def test_stderr_fail(): + ... sys.stderr.write("foo") + ... out = sys.stderr.getvalue() + ... assert out == "bar\n" + ... + AssertionError: assert 'foo\n' == 'bar\n' + """ + + @wraps(f) + def wrapper(*args, **kwargs): + try: + sys.stderr = StringIO() + f(*args, **kwargs) + finally: + sys.stderr = sys.__stderr__ + + return wrapper + # ----------------------------------------------------------------------------- # Console debugging tools @@ -624,7 +672,7 @@ def set_trace(): from IPython.core.debugger import Pdb try: Pdb(color_scheme='Linux').set_trace(sys._getframe().f_back) - except: + except Exception: from pdb import Pdb as OldPdb OldPdb().set_trace(sys._getframe().f_back) @@ -671,13 +719,13 @@ def ensure_clean(filename=None, return_filelike=False): try: os.close(fd) except Exception as e: - print("Couldn't close file descriptor: %d (file: %s)" % - (fd, filename)) + print("Couldn't close file descriptor: {fdesc} (file: {fname})" + .format(fdesc=fd, fname=filename)) try: if os.path.exists(filename): os.remove(filename) except Exception as e: - print("Exception on removing file: %s" % e) + print("Exception on removing file: {error}".format(error=e)) def get_data_path(f=''): @@ -699,23 +747,6 @@ def equalContents(arr1, arr2): return frozenset(arr1) == frozenset(arr2) -def assert_equal(a, b, msg=""): - """asserts that a equals b, like nose's assert_equal, - but allows custom message to start. Passes a and b to - format string as well. So you can use '{0}' and '{1}' - to display a and b. - - Examples - -------- - >>> assert_equal(2, 2, "apples") - >>> assert_equal(5.2, 1.2, "{0} was really a dead parrot") - Traceback (most recent call last): - ... - AssertionError: 5.2 was really a dead parrot: 5.2 != 1.2 - """ - assert a == b, "%s: %r != %r" % (msg.format(a, b), a, b) - - def assert_index_equal(left, right, exact='equiv', check_names=True, check_less_precise=False, check_exact=True, check_categorical=True, obj='Index'): @@ -727,8 +758,8 @@ def assert_index_equal(left, right, exact='equiv', check_names=True, right : Index exact : bool / string {'equiv'}, default False Whether to check the Index class, dtype and inferred_type - are identical. If 'equiv', then RangeIndex can be substitued for - Int64Index as well + are identical. If 'equiv', then RangeIndex can be substituted for + Int64Index as well. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False @@ -750,7 +781,7 @@ def _check_types(l, r, obj='Index'): assert_attr_equal('dtype', l, r, obj=obj) # allow string-like to have different inferred_types if l.inferred_type in ('string', 'unicode'): - assertIn(r.inferred_type, ('string', 'unicode')) + assert r.inferred_type in ('string', 'unicode') else: assert_attr_equal('inferred_type', l, r, obj=obj) @@ -763,23 +794,24 @@ def _get_ilevel_values(index, level): return values # instance validation - assertIsInstance(left, Index, '[index] ') - assertIsInstance(right, Index, '[index] ') + _check_isinstance(left, right, Index) # class / dtype comparison _check_types(left, right, obj=obj) # level comparison if left.nlevels != right.nlevels: - raise_assert_detail(obj, '{0} levels are different'.format(obj), - '{0}, {1}'.format(left.nlevels, left), - '{0}, {1}'.format(right.nlevels, right)) + msg1 = '{obj} levels are different'.format(obj=obj) + msg2 = '{nlevels}, {left}'.format(nlevels=left.nlevels, left=left) + msg3 = '{nlevels}, {right}'.format(nlevels=right.nlevels, right=right) + raise_assert_detail(obj, msg1, msg2, msg3) # length comparison if len(left) != len(right): - raise_assert_detail(obj, '{0} length are different'.format(obj), - '{0}, {1}'.format(len(left), left), - '{0}, {1}'.format(len(right), right)) + msg1 = '{obj} length are different'.format(obj=obj) + msg2 = '{length}, {left}'.format(length=len(left), left=left) + msg3 = '{length}, {right}'.format(length=len(right), right=right) + raise_assert_detail(obj, msg1, msg2, msg3) # MultiIndex special comparison for little-friendly error messages if left.nlevels > 1: @@ -788,7 +820,7 @@ def _get_ilevel_values(index, level): llevel = _get_ilevel_values(left, level) rlevel = _get_ilevel_values(right, level) - lobj = 'MultiIndex level [{0}]'.format(level) + lobj = 'MultiIndex level [{level}]'.format(level=level) assert_index_equal(llevel, rlevel, exact=exact, check_names=check_names, check_less_precise=check_less_precise, @@ -800,8 +832,8 @@ def _get_ilevel_values(index, level): if not left.equals(right): diff = np.sum((left.values != right.values) .astype(int)) * 100.0 / len(left) - msg = '{0} values are different ({1} %)'\ - .format(obj, np.round(diff, 5)) + msg = '{obj} values are different ({pct} %)'.format( + obj=obj, pct=np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) else: _testing.assert_almost_equal(left.values, right.values, @@ -814,11 +846,14 @@ def _get_ilevel_values(index, level): assert_attr_equal('names', left, right, obj=obj) if isinstance(left, pd.PeriodIndex) or isinstance(right, pd.PeriodIndex): assert_attr_equal('freq', left, right, obj=obj) + if (isinstance(left, pd.IntervalIndex) or + isinstance(right, pd.IntervalIndex)): + assert_attr_equal('closed', left, right, obj=obj) if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): assert_categorical_equal(left.values, right.values, - obj='{0} category'.format(obj)) + obj='{obj} category'.format(obj=obj)) def assert_class_equal(left, right, exact=True, obj='Input'): @@ -839,12 +874,12 @@ def repr_class(x): # allow equivalence of Int64Index/RangeIndex types = set([type(left).__name__, type(right).__name__]) if len(types - set(['Int64Index', 'RangeIndex'])): - msg = '{0} classes are not equivalent'.format(obj) + msg = '{obj} classes are not equivalent'.format(obj=obj) raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) elif exact: if type(left) != type(right): - msg = '{0} classes are different'.format(obj) + msg = '{obj} classes are different'.format(obj=obj) raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) @@ -884,23 +919,22 @@ def assert_attr_equal(attr, left, right, obj='Attributes'): if result: return True else: - raise_assert_detail(obj, 'Attribute "{0}" are different'.format(attr), - left_attr, right_attr) + msg = 'Attribute "{attr}" are different'.format(attr=attr) + raise_assert_detail(obj, msg, left_attr, right_attr) def assert_is_valid_plot_return_object(objs): import matplotlib.pyplot as plt if isinstance(objs, (pd.Series, np.ndarray)): for el in objs.ravel(): - msg = ('one of \'objs\' is not a matplotlib Axes instance, ' - 'type encountered {0!r}') - assert isinstance(el, (plt.Axes, dict)), msg.format( - el.__class__.__name__) + msg = ('one of \'objs\' is not a matplotlib Axes instance, type ' + 'encountered {name!r}').format(name=el.__class__.__name__) + assert isinstance(el, (plt.Axes, dict)), msg else: assert isinstance(objs, (plt.Artist, tuple, dict)), \ ('objs is neither an ndarray of Artist instances nor a ' - 'single Artist instance, tuple, or dict, "objs" is a {0!r} ' - ''.format(objs.__class__.__name__)) + 'single Artist instance, tuple, or dict, "objs" is a {name!r}' + ).format(name=objs.__class__.__name__) def isiterable(obj): @@ -914,66 +948,9 @@ def is_sorted(seq): return assert_numpy_array_equal(seq, np.sort(np.array(seq))) -def assertIs(first, second, msg=''): - """Checks that 'first' is 'second'""" - a, b = first, second - assert a is b, "%s: %r is not %r" % (msg.format(a, b), a, b) - - -def assertIsNot(first, second, msg=''): - """Checks that 'first' is not 'second'""" - a, b = first, second - assert a is not b, "%s: %r is %r" % (msg.format(a, b), a, b) - - -def assertIn(first, second, msg=''): - """Checks that 'first' is in 'second'""" - a, b = first, second - assert a in b, "%s: %r is not in %r" % (msg.format(a, b), a, b) - - -def assertNotIn(first, second, msg=''): - """Checks that 'first' is not in 'second'""" - a, b = first, second - assert a not in b, "%s: %r is in %r" % (msg.format(a, b), a, b) - - -def assertIsNone(expr, msg=''): - """Checks that 'expr' is None""" - return assertIs(expr, None, msg) - - -def assertIsNotNone(expr, msg=''): - """Checks that 'expr' is not None""" - return assertIsNot(expr, None, msg) - - -def assertIsInstance(obj, cls, msg=''): - """Test that obj is an instance of cls - (which can be a class or a tuple of classes, - as supported by isinstance()).""" - if not isinstance(obj, cls): - err_msg = "{0}Expected type {1}, found {2} instead" - raise AssertionError(err_msg.format(msg, cls, type(obj))) - - -def assert_isinstance(obj, class_type_or_tuple, msg=''): - return deprecate('assert_isinstance', assertIsInstance)( - obj, class_type_or_tuple, msg=msg) - - -def assertNotIsInstance(obj, cls, msg=''): - """Test that obj is not an instance of cls - (which can be a class or a tuple of classes, - as supported by isinstance()).""" - if isinstance(obj, cls): - err_msg = "{0}Input must not be type {1}" - raise AssertionError(err_msg.format(msg, cls)) - - def assert_categorical_equal(left, right, check_dtype=True, obj='Categorical', check_category_order=True): - """Test that categoricals are eqivalent + """Test that Categoricals are equivalent. Parameters ---------- @@ -990,22 +967,21 @@ def assert_categorical_equal(left, right, check_dtype=True, values are compared. The ordered attribute is checked regardless. """ - assertIsInstance(left, pd.Categorical, '[Categorical] ') - assertIsInstance(right, pd.Categorical, '[Categorical] ') + _check_isinstance(left, right, Categorical) if check_category_order: assert_index_equal(left.categories, right.categories, - obj='{0}.categories'.format(obj)) + obj='{obj}.categories'.format(obj=obj)) assert_numpy_array_equal(left.codes, right.codes, check_dtype=check_dtype, - obj='{0}.codes'.format(obj)) + obj='{obj}.codes'.format(obj=obj)) else: assert_index_equal(left.categories.sort_values(), right.categories.sort_values(), - obj='{0}.categories'.format(obj)) + obj='{obj}.categories'.format(obj=obj)) assert_index_equal(left.categories.take(left.codes), right.categories.take(right.codes), - obj='{0}.values'.format(obj)) + obj='{obj}.values'.format(obj=obj)) assert_attr_equal('ordered', left, right, obj=obj) @@ -1013,17 +989,21 @@ def assert_categorical_equal(left, right, check_dtype=True, def raise_assert_detail(obj, message, left, right, diff=None): if isinstance(left, np.ndarray): left = pprint_thing(left) + elif is_categorical_dtype(left): + left = repr(left) if isinstance(right, np.ndarray): right = pprint_thing(right) + elif is_categorical_dtype(right): + right = repr(right) - msg = """{0} are different + msg = """{obj} are different -{1} -[left]: {2} -[right]: {3}""".format(obj, message, left, right) +{message} +[left]: {left} +[right]: {right}""".format(obj=obj, message=message, left=left, right=right) if diff is not None: - msg = msg + "\n[diff]: {diff}".format(diff=diff) + msg += "\n[diff]: {diff}".format(diff=diff) raise AssertionError(msg) @@ -1051,25 +1031,33 @@ def assert_numpy_array_equal(left, right, strict_nan=False, """ # instance validation - # to show a detailed erorr message when classes are different + # Show a detailed error message when classes are different assert_class_equal(left, right, obj=obj) # both classes must be an np.ndarray - assertIsInstance(left, np.ndarray, '[ndarray] ') - assertIsInstance(right, np.ndarray, '[ndarray] ') + _check_isinstance(left, right, np.ndarray) def _get_base(obj): return obj.base if getattr(obj, 'base', None) is not None else obj + left_base = _get_base(left) + right_base = _get_base(right) + if check_same == 'same': - assertIs(_get_base(left), _get_base(right)) + if left_base is not right_base: + msg = "{left!r} is not {right!r}".format( + left=left_base, right=right_base) + raise AssertionError(msg) elif check_same == 'copy': - assertIsNot(_get_base(left), _get_base(right)) + if left_base is right_base: + msg = "{left!r} is {right!r}".format( + left=left_base, right=right_base) + raise AssertionError(msg) def _raise(left, right, err_msg): if err_msg is None: if left.shape != right.shape: - raise_assert_detail(obj, '{0} shapes are different' - .format(obj), left.shape, right.shape) + raise_assert_detail(obj, '{obj} shapes are different' + .format(obj=obj), left.shape, right.shape) diff = 0 for l, r in zip(left, right): @@ -1078,8 +1066,8 @@ def _raise(left, right, err_msg): diff += 1 diff = diff * 100.0 / left.size - msg = '{0} values are different ({1} %)'\ - .format(obj, np.round(diff, 5)) + msg = '{obj} values are different ({pct} %)'.format( + obj=obj, pct=np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) raise AssertionError(err_msg) @@ -1126,7 +1114,7 @@ def assert_series_equal(left, right, check_dtype=True, Whether to compare number exactly. check_names : bool, default True Whether to check the Series and Index names attribute. - check_dateteimelike_compat : bool, default False + check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. @@ -1136,20 +1124,19 @@ def assert_series_equal(left, right, check_dtype=True, """ # instance validation - assertIsInstance(left, Series, '[Series] ') - assertIsInstance(right, Series, '[Series] ') + _check_isinstance(left, right, Series) if check_series_type: # ToDo: There are some tests using rhs is sparse # lhs is dense. Should use assert_class_equal in future - assertIsInstance(left, type(right)) + assert isinstance(left, type(right)) # assert_class_equal(left, right, obj=obj) # length comparison if len(left) != len(right): - raise_assert_detail(obj, 'Series length are different', - '{0}, {1}'.format(len(left), left.index), - '{0}, {1}'.format(len(right), right.index)) + msg1 = '{len}, {left}'.format(len=len(left), left=left.index) + msg2 = '{len}, {right}'.format(len=len(right), right=right.index) + raise_assert_detail(obj, 'Series length are different', msg1, msg2) # index comparison assert_index_equal(left.index, right.index, exact=check_index_type, @@ -1157,15 +1144,22 @@ def assert_series_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) if check_dtype: - assert_attr_equal('dtype', left, right) + # We want to skip exact dtype checking when `check_categorical` + # is False. We'll still raise if only one is a `Categorical`, + # regardless of `check_categorical` + if (is_categorical_dtype(left) and is_categorical_dtype(right) and + not check_categorical): + pass + else: + assert_attr_equal('dtype', left, right) if check_exact: assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype, - obj='{0}'.format(obj),) + obj='{obj}'.format(obj=obj),) elif check_datetimelike_compat: # we want to check only if we have compat dtypes # e.g. integer and M|m are NOT compat, but we can simply check @@ -1178,16 +1172,23 @@ def assert_series_equal(left, right, check_dtype=True, # datetimelike may have different objects (e.g. datetime.datetime # vs Timestamp) but will compare equal if not Index(left.values).equals(Index(right.values)): - msg = '[datetimelike_compat=True] {0} is not equal to {1}.' - raise AssertionError(msg.format(left.values, right.values)) + msg = ('[datetimelike_compat=True] {left} is not equal to ' + '{right}.').format(left=left.values, right=right.values) + raise AssertionError(msg) else: assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype) + elif is_interval_dtype(left) or is_interval_dtype(right): + # TODO: big hack here + left = pd.IntervalIndex(left) + right = pd.IntervalIndex(right) + assert_index_equal(left, right, obj='{obj}.index'.format(obj=obj)) + else: _testing.assert_almost_equal(left.get_values(), right.get_values(), check_less_precise=check_less_precise, check_dtype=check_dtype, - obj='{0}'.format(obj)) + obj='{obj}'.format(obj=obj)) # metadata comparison if check_names: @@ -1196,7 +1197,7 @@ def assert_series_equal(left, right, check_dtype=True, if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): assert_categorical_equal(left.values, right.values, - obj='{0} category'.format(obj)) + obj='{obj} category'.format(obj=obj)) # This could be refactored to use the NDFrame.equals method @@ -1239,53 +1240,43 @@ def assert_frame_equal(left, right, check_dtype=True, If True, compare by blocks. check_exact : bool, default False Whether to compare number exactly. - check_dateteimelike_compat : bool, default False + check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. check_like : bool, default False - If true, then reindex_like operands + If true, ignore the order of rows & columns obj : str, default 'DataFrame' Specify object name being compared, internally used to show appropriate assertion message """ # instance validation - assertIsInstance(left, DataFrame, '[DataFrame] ') - assertIsInstance(right, DataFrame, '[DataFrame] ') + _check_isinstance(left, right, DataFrame) if check_frame_type: # ToDo: There are some tests using rhs is SparseDataFrame # lhs is DataFrame. Should use assert_class_equal in future - assertIsInstance(left, type(right)) + assert isinstance(left, type(right)) # assert_class_equal(left, right, obj=obj) + # shape comparison + if left.shape != right.shape: + raise_assert_detail(obj, + 'DataFrame shape mismatch', + '{shape!r}'.format(shape=left.shape), + '{shape!r}'.format(shape=right.shape)) + if check_like: left, right = left.reindex_like(right), right - # shape comparison (row) - if left.shape[0] != right.shape[0]: - raise_assert_detail(obj, - 'DataFrame shape (number of rows) are different', - '{0}, {1}'.format(left.shape[0], left.index), - '{0}, {1}'.format(right.shape[0], right.index)) - # shape comparison (columns) - if left.shape[1] != right.shape[1]: - raise_assert_detail(obj, - 'DataFrame shape (number of columns) ' - 'are different', - '{0}, {1}'.format(left.shape[1], - left.columns), - '{0}, {1}'.format(right.shape[1], - right.columns)) - # index comparison assert_index_equal(left.index, right.index, exact=check_index_type, check_names=check_names, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) # column comparison assert_index_equal(left.columns, right.columns, exact=check_column_type, @@ -1293,12 +1284,12 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.columns'.format(obj)) + obj='{obj}.columns'.format(obj=obj)) # compare by blocks if by_blocks: - rblocks = right.blocks - lblocks = left.blocks + rblocks = right._to_dict_of_blocks() + lblocks = left._to_dict_of_blocks() for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): assert dtype in lblocks assert dtype in rblocks @@ -1318,17 +1309,16 @@ def assert_frame_equal(left, right, check_dtype=True, check_exact=check_exact, check_names=check_names, check_datetimelike_compat=check_datetimelike_compat, check_categorical=check_categorical, - obj='DataFrame.iloc[:, {0}]'.format(i)) + obj='DataFrame.iloc[:, {idx}]'.format(idx=i)) -def assert_panelnd_equal(left, right, - check_dtype=True, - check_panel_type=False, - check_less_precise=False, - assert_func=assert_frame_equal, - check_names=False, - by_blocks=False, - obj='Panel'): +def assert_panel_equal(left, right, + check_dtype=True, + check_panel_type=False, + check_less_precise=False, + check_names=False, + by_blocks=False, + obj='Panel'): """Check that left and right Panels are equal. Parameters @@ -1343,7 +1333,6 @@ def assert_panelnd_equal(left, right, Specify comparison precision. Only used when check_exact is False. 5 digits (False) or 3 digits (True) after decimal points are compared. If int, then specify the digits to compare - assert_func : function for comparing data check_names : bool, default True Whether to check the Index names attribute. by_blocks : bool, default False @@ -1363,8 +1352,8 @@ def assert_panelnd_equal(left, right, assert_index_equal(left_ind, right_ind, check_names=check_names) if by_blocks: - rblocks = right.blocks - lblocks = left.blocks + rblocks = right._to_dict_of_blocks() + lblocks = left._to_dict_of_blocks() for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): assert dtype in lblocks assert dtype in rblocks @@ -1373,20 +1362,17 @@ def assert_panelnd_equal(left, right, # can potentially be slow for i, item in enumerate(left._get_axis(0)): - assert item in right, "non-matching item (right) '%s'" % item + msg = "non-matching item (right) '{item}'".format(item=item) + assert item in right, msg litem = left.iloc[i] ritem = right.iloc[i] - assert_func(litem, ritem, check_less_precise=check_less_precise) + assert_frame_equal(litem, ritem, + check_less_precise=check_less_precise, + check_names=check_names) for i, item in enumerate(right._get_axis(0)): - assert item in left, "non-matching item (left) '%s'" % item - -# TODO: strangely check_names fails in py3 ? -_panel_frame_equal = partial(assert_frame_equal, check_names=False) -assert_panel_equal = partial(assert_panelnd_equal, - assert_func=_panel_frame_equal) -assert_panel4d_equal = partial(assert_panelnd_equal, - assert_func=assert_panel_equal) + msg = "non-matching item (left) '{item}'".format(item=item) + assert item in left, msg # ----------------------------------------------------------------------------- @@ -1404,15 +1390,14 @@ def assert_sp_array_equal(left, right, check_dtype=True): Whether to check the data dtype is identical. """ - assertIsInstance(left, pd.SparseArray, '[SparseArray]') - assertIsInstance(right, pd.SparseArray, '[SparseArray]') + _check_isinstance(left, right, pd.SparseArray) assert_numpy_array_equal(left.sp_values, right.sp_values, check_dtype=check_dtype) # SparseIndex comparison - assertIsInstance(left.sp_index, pd._sparse.SparseIndex, '[SparseIndex]') - assertIsInstance(right.sp_index, pd._sparse.SparseIndex, '[SparseIndex]') + assert isinstance(left.sp_index, pd._libs.sparse.SparseIndex) + assert isinstance(right.sp_index, pd._libs.sparse.SparseIndex) if not left.sp_index.equals(right.sp_index): raise_assert_detail('SparseArray.index', 'index are not equal', @@ -1445,14 +1430,13 @@ def assert_sp_series_equal(left, right, check_dtype=True, exact_indices=True, Specify the object name being compared, internally used to show the appropriate assertion message. """ - assertIsInstance(left, pd.SparseSeries, '[SparseSeries]') - assertIsInstance(right, pd.SparseSeries, '[SparseSeries]') + _check_isinstance(left, right, pd.SparseSeries) if check_series_type: assert_class_equal(left, right, obj=obj) assert_index_equal(left.index, right.index, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) assert_sp_array_equal(left.block.values, right.block.values) @@ -1483,16 +1467,15 @@ def assert_sp_frame_equal(left, right, check_dtype=True, exact_indices=True, Specify the object name being compared, internally used to show the appropriate assertion message. """ - assertIsInstance(left, pd.SparseDataFrame, '[SparseDataFrame]') - assertIsInstance(right, pd.SparseDataFrame, '[SparseDataFrame]') + _check_isinstance(left, right, pd.SparseDataFrame) if check_frame_type: assert_class_equal(left, right, obj=obj) assert_index_equal(left.index, right.index, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) assert_index_equal(left.columns, right.columns, - obj='{0}.columns'.format(obj)) + obj='{obj}.columns'.format(obj=obj)) for col, series in compat.iteritems(left): assert (col in right) @@ -1513,20 +1496,13 @@ def assert_sp_frame_equal(left, right, check_dtype=True, exact_indices=True, for col in right: assert (col in left) - -def assert_sp_list_equal(left, right): - assertIsInstance(left, pd.SparseList, '[SparseList]') - assertIsInstance(right, pd.SparseList, '[SparseList]') - - assert_sp_array_equal(left.to_array(), right.to_array()) - # ----------------------------------------------------------------------------- # Others def assert_contains_all(iterable, dic): for k in iterable: - assert k in dic, "Did not contain item: '%r'" % k + assert k in dic, "Did not contain item: '{key!r}'".format(key=k) def assert_copy(iter1, iter2, **eql_kwargs): @@ -1540,10 +1516,10 @@ def assert_copy(iter1, iter2, **eql_kwargs): """ for elem1, elem2 in zip(iter1, iter2): assert_almost_equal(elem1, elem2, **eql_kwargs) - assert elem1 is not elem2, ("Expected object %r and " - "object %r to be different " - "objects, were same." - % (type(elem1), type(elem2))) + msg = ("Expected object {obj1!r} and object {obj2!r} to be " + "different objects, but they were the same object." + ).format(obj1=type(elem1), obj2=type(elem2)) + assert elem1 is not elem2, msg def getCols(k): @@ -1563,10 +1539,16 @@ def makeUnicodeIndex(k=10, name=None): return Index(randu_array(nchars=10, size=k), name=name) -def makeCategoricalIndex(k=10, n=3, name=None): +def makeCategoricalIndex(k=10, n=3, name=None, **kwargs): """ make a length k index or n categories """ x = rands_array(nchars=4, size=n) - return CategoricalIndex(np.random.choice(x, k), name=name) + return CategoricalIndex(np.random.choice(x, k), name=name, **kwargs) + + +def makeIntervalIndex(k=10, name=None, **kwargs): + """ make a length k IntervalIndex """ + x = np.linspace(0, 100, num=(k + 1)) + return IntervalIndex.from_breaks(x, name=name, **kwargs) def makeBoolIndex(k=10, name=None): @@ -1585,8 +1567,8 @@ def makeUIntIndex(k=10, name=None): return Index([2**63 + i for i in lrange(k)], name=name) -def makeRangeIndex(k=10, name=None): - return RangeIndex(0, k, 1, name=name) +def makeRangeIndex(k=10, name=None, **kwargs): + return RangeIndex(0, k, 1, name=name, **kwargs) def makeFloatIndex(k=10, name=None): @@ -1594,22 +1576,28 @@ def makeFloatIndex(k=10, name=None): return Index(values * (10 ** np.random.randint(0, 9)), name=name) -def makeDateIndex(k=10, freq='B', name=None): +def makeDateIndex(k=10, freq='B', name=None, **kwargs): dt = datetime(2000, 1, 1) dr = bdate_range(dt, periods=k, freq=freq, name=name) - return DatetimeIndex(dr, name=name) + return DatetimeIndex(dr, name=name, **kwargs) -def makeTimedeltaIndex(k=10, freq='D', name=None): - return TimedeltaIndex(start='1 day', periods=k, freq=freq, name=name) +def makeTimedeltaIndex(k=10, freq='D', name=None, **kwargs): + return TimedeltaIndex(start='1 day', periods=k, freq=freq, + name=name, **kwargs) -def makePeriodIndex(k=10, name=None): +def makePeriodIndex(k=10, name=None, **kwargs): dt = datetime(2000, 1, 1) - dr = PeriodIndex(start=dt, periods=k, freq='B', name=name) + dr = PeriodIndex(start=dt, periods=k, freq='B', name=name, **kwargs) return dr +def makeMultiIndex(k=10, names=None, **kwargs): + return MultiIndex.from_product( + (('foo', 'bar'), (1, 2)), names=names, **kwargs) + + def all_index_generator(k=10): """Generator which can be iterated over to get instances of all the various index classes. @@ -1620,12 +1608,24 @@ def all_index_generator(k=10): """ all_make_index_funcs = [makeIntIndex, makeFloatIndex, makeStringIndex, makeUnicodeIndex, makeDateIndex, makePeriodIndex, - makeTimedeltaIndex, makeBoolIndex, + makeTimedeltaIndex, makeBoolIndex, makeRangeIndex, + makeIntervalIndex, makeCategoricalIndex] for make_index_func in all_make_index_funcs: yield make_index_func(k=k) +def index_subclass_makers_generator(): + make_index_funcs = [ + makeDateIndex, makePeriodIndex, + makeTimedeltaIndex, makeRangeIndex, + makeIntervalIndex, makeCategoricalIndex, + makeMultiIndex + ] + for make_index_func in make_index_funcs: + yield make_index_func + + def all_timeseries_index_generator(k=10): """Generator which can be iterated over to get instances of all the classes which represent time-seires. @@ -1659,7 +1659,7 @@ def makeObjectSeries(name=None): def getSeriesData(): index = makeStringIndex(N) - return dict((c, Series(randn(N), index=index)) for c in getCols(K)) + return {c: Series(randn(N), index=index) for c in getCols(K)} def makeTimeSeries(nper=None, freq='B', name=None): @@ -1675,11 +1675,11 @@ def makePeriodSeries(nper=None, name=None): def getTimeSeriesData(nper=None, freq='B'): - return dict((c, makeTimeSeries(nper, freq)) for c in getCols(K)) + return {c: makeTimeSeries(nper, freq) for c in getCols(K)} def getPeriodData(nper=None): - return dict((c, makePeriodSeries(nper)) for c in getCols(K)) + return {c: makePeriodSeries(nper) for c in getCols(K)} # make frame @@ -1716,20 +1716,17 @@ def makePeriodFrame(nper=None): def makePanel(nper=None): - cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] - data = dict((c, makeTimeDataFrame(nper)) for c in cols) - return Panel.fromDict(data) + with warnings.catch_warnings(record=True): + cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] + data = {c: makeTimeDataFrame(nper) for c in cols} + return Panel.fromDict(data) def makePeriodPanel(nper=None): - cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] - data = dict((c, makePeriodFrame(nper)) for c in cols) - return Panel.fromDict(data) - - -def makePanel4D(nper=None): - return Panel4D(dict(l1=makePanel(nper), l2=makePanel(nper), - l3=makePanel(nper))) + with warnings.catch_warnings(record=True): + cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] + data = {c: makePeriodFrame(nper) for c in cols} + return Panel.fromDict(data) def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, @@ -1787,27 +1784,28 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, idx.name = names[0] return idx elif idx_type is not None: - raise ValueError('"%s" is not a legal value for `idx_type`, use ' - '"i"/"f"/"s"/"u"/"dt/"p"/"td".' % idx_type) + raise ValueError('"{idx_type}" is not a legal value for `idx_type`, ' + 'use "i"/"f"/"s"/"u"/"dt/"p"/"td".' + .format(idx_type=idx_type)) if len(ndupe_l) < nlevels: ndupe_l.extend([1] * (nlevels - len(ndupe_l))) assert len(ndupe_l) == nlevels - assert all([x > 0 for x in ndupe_l]) + assert all(x > 0 for x in ndupe_l) tuples = [] for i in range(nlevels): def keyfunc(x): import re - numeric_tuple = re.sub("[^\d_]_?", "", x).split("_") + numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_") return lmap(int, numeric_tuple) # build a list of lists to create the index from div_factor = nentries // ndupe_l[i] + 1 cnt = Counter() for j in range(div_factor): - label = prefix + '_l%d_g' % i + str(j) + label = '{prefix}_l{i}_g{j}'.format(prefix=prefix, i=i, j=j) cnt[label] = ndupe_l[i] # cute Counter trick result = list(sorted(cnt.elements(), key=keyfunc))[:nentries] @@ -1817,7 +1815,11 @@ def keyfunc(x): # convert tuples to index if nentries == 1: + # we have a single level of tuples, i.e. a regular Index index = Index(tuples[0], name=names[0]) + elif nlevels == 1: + name = None if names is None else names[0] + index = Index((x[0] for x in tuples), name=name) else: index = MultiIndex.from_tuples(tuples, names=names) return index @@ -1830,8 +1832,8 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, """ nrows, ncols - number of data rows/cols c_idx_names, idx_names - False/True/list of strings, yields No names , - default names or uses the provided names for the levels of the - corresponding index. You can provide a single string when + default names or uses the provided names for the levels of the + corresponding index. You can provide a single string when c_idx_nlevels ==1. c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex @@ -1900,7 +1902,7 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, # by default, generate data based on location if data_gen_f is None: - data_gen_f = lambda r, c: "R%dC%d" % (r, c) + data_gen_f = lambda r, c: "R{rows}C{cols}".format(rows=r, cols=c) data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] @@ -1995,62 +1997,6 @@ def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) -# Dependency checks. Copied this from Nipy/Nipype (Copyright of -# respective developers, license: BSD-3) -def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): - """Check that the minimal version of the required package is installed. - - Parameters - ---------- - pkg_name : string - Name of the required package. - version : string, optional - Minimal version number for required package. - app : string, optional - Application that is performing the check. For instance, the - name of the tutorial being executed that depends on specific - packages. - checker : object, optional - The class that will perform the version checking. Default is - distutils.version.LooseVersion. - - Examples - -------- - package_check('numpy', '1.3') - package_check('networkx', '1.0', 'tutorial1') - - """ - - import pytest - if app: - msg = '%s requires %s' % (app, pkg_name) - else: - msg = 'module requires %s' % pkg_name - if version: - msg += ' with version >= %s' % (version,) - try: - mod = __import__(pkg_name) - except ImportError: - mod = None - try: - have_version = mod.__version__ - except AttributeError: - pytest.skip('Cannot find version for %s' % pkg_name) - if version and checker(have_version) < checker(version): - pytest.skip(msg) - - -def skip_if_no_package(*args, **kwargs): - """pytest.skip() if package_check fails - - Parameters - ---------- - *args Positional parameters passed to `package_check` - *kwargs Keyword parameters passed to `package_check` - """ - package_check(*args, **kwargs) - - def optional_args(decorator): """allows a decorator to take optional positional and keyword arguments. Assumes that taking a single, callable, positional argument means that @@ -2076,6 +2022,7 @@ def dec(f): return wrapper + # skip tests on exceptions with this message _network_error_messages = ( # 'urlopen error timed out', @@ -2093,6 +2040,7 @@ def dec(f): 'Temporary failure in name resolution', 'Name or service not known', 'Connection refused', + 'certificate verify', ) # or this e.errno/e.reason.errno @@ -2181,7 +2129,7 @@ def network(t, url="http://www.google.com", _skip_on_messages: iterable of string any exception e for which one of the strings is a substring of str(e) will be skipped with an appropriate - message. Intended to supress errors where an errno isn't available. + message. Intended to suppress errors where an errno isn't available. Notes ----- @@ -2234,7 +2182,7 @@ def network(t, url="http://www.google.com", from pytest import skip t.network = True - @wraps(t) + @compat.wraps(t) def wrapper(*args, **kwargs): if check_before_test and not raise_on_error: if not can_connect(url, error_classes): @@ -2248,16 +2196,16 @@ def wrapper(*args, **kwargs): if errno in skip_errnos: skip("Skipping test due to known errno" - " and error %s" % e) + " and error {error}".format(error=e)) try: e_str = traceback.format_exc(e) - except: + except Exception: e_str = str(e) - if any([m.lower() in e_str.lower() for m in _skip_on_messages]): + if any(m.lower() in e_str.lower() for m in _skip_on_messages): skip("Skipping test because exception " - "message is known and error %s" % e) + "message is known and error {error}".format(error=e)) if not isinstance(e, error_classes): raise @@ -2266,7 +2214,7 @@ def wrapper(*args, **kwargs): raise else: skip("Skipping test due to lack of connectivity" - " and error %s" % e) + " and error {error}".format(e)) return wrapper @@ -2327,78 +2275,37 @@ def stdin_encoding(encoding=None): sys.stdin = _stdin -def assertRaises(_exception, _callable=None, *args, **kwargs): - """assertRaises that is usable as context manager or in a with statement - - Exceptions that don't match the given Exception type fall through:: - - >>> with assertRaises(ValueError): - ... raise TypeError("banana") - ... - Traceback (most recent call last): - ... - TypeError: banana +def assert_raises_regex(_exception, _regexp, _callable=None, + *args, **kwargs): + r""" + Check that the specified Exception is raised and that the error message + matches a given regular expression pattern. This may be a regular + expression object or a string containing a regular expression suitable + for use by `re.search()`. This is a port of the `assertRaisesRegexp` + function from unittest in Python 2.7. - If it raises the given Exception type, the test passes - >>> with assertRaises(KeyError): - ... dct = dict() - ... dct["apple"] - - If the expected error doesn't occur, it raises an error. - >>> with assertRaises(KeyError): - ... dct = {'apple':True} - ... dct["apple"] - Traceback (most recent call last): - ... - AssertionError: KeyError not raised. - - In addition to using it as a contextmanager, you can also use it as a - function, just like the normal assertRaises - - >>> assertRaises(TypeError, ",".join, [1, 3, 5]) - """ - manager = _AssertRaisesContextmanager(exception=_exception) - # don't return anything if used in function form - if _callable is not None: - with manager: - _callable(*args, **kwargs) - else: - return manager - - -def assertRaisesRegexp(_exception, _regexp, _callable=None, *args, **kwargs): - """ Port of assertRaisesRegexp from unittest in - Python 2.7 - used in with statement. - - Explanation from standard library: - Like assertRaises() but also tests that regexp matches on the - string representation of the raised exception. regexp may be a - regular expression object or a string containing a regular - expression suitable for use by re.search(). - - You can pass either a regular expression - or a compiled regular expression object. - >>> assertRaisesRegexp(ValueError, 'invalid literal for.*XYZ', - ... int, 'XYZ') + Examples + -------- + >>> assert_raises_regex(ValueError, 'invalid literal for.*XYZ', int, 'XYZ') >>> import re - >>> assertRaisesRegexp(ValueError, re.compile('literal'), int, 'XYZ') + >>> assert_raises_regex(ValueError, re.compile('literal'), int, 'XYZ') If an exception of a different type is raised, it bubbles up. - >>> assertRaisesRegexp(TypeError, 'literal', int, 'XYZ') + >>> assert_raises_regex(TypeError, 'literal', int, 'XYZ') Traceback (most recent call last): ... ValueError: invalid literal for int() with base 10: 'XYZ' >>> dct = dict() - >>> assertRaisesRegexp(KeyError, 'pear', dct.__getitem__, 'apple') + >>> assert_raises_regex(KeyError, 'pear', dct.__getitem__, 'apple') Traceback (most recent call last): ... AssertionError: "pear" does not match "'apple'" You can also use this in a with statement. - >>> with assertRaisesRegexp(TypeError, 'unsupported operand type\(s\)'): + >>> with assert_raises_regex(TypeError, 'unsupported operand type\(s\)'): ... 1 + {} - >>> with assertRaisesRegexp(TypeError, 'banana'): + >>> with assert_raises_regex(TypeError, 'banana'): ... 'apple'[0] = 'b' Traceback (most recent call last): ... @@ -2415,52 +2322,124 @@ def assertRaisesRegexp(_exception, _regexp, _callable=None, *args, **kwargs): class _AssertRaisesContextmanager(object): """ - Handles the behind the scenes work - for assertRaises and assertRaisesRegexp + Context manager behind `assert_raises_regex`. """ - def __init__(self, exception, regexp=None, *args, **kwargs): + def __init__(self, exception, regexp=None): + """ + Initialize an _AssertRaisesContextManager instance. + + Parameters + ---------- + exception : class + The expected Exception class. + regexp : str, default None + The regex to compare against the Exception message. + """ + self.exception = exception + if regexp is not None and not hasattr(regexp, "search"): regexp = re.compile(regexp, re.DOTALL) + self.regexp = regexp def __enter__(self): return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_value, trace_back): expected = self.exception - if not exc_type: - name = getattr(expected, "__name__", str(expected)) - raise AssertionError("{0} not raised.".format(name)) - if issubclass(exc_type, expected): - return self.handle_success(exc_type, exc_value, traceback) - return self.handle_failure(exc_type, exc_value, traceback) - - def handle_failure(*args, **kwargs): - # Failed, so allow Exception to bubble up - return False - def handle_success(self, exc_type, exc_value, traceback): - if self.regexp is not None: - val = str(exc_value) - if not self.regexp.search(val): - e = AssertionError('"%s" does not match "%s"' % - (self.regexp.pattern, str(val))) - raise_with_traceback(e, traceback) - return True + if not exc_type: + exp_name = getattr(expected, "__name__", str(expected)) + raise AssertionError("{name} not raised.".format(name=exp_name)) + + return self.exception_matches(exc_type, exc_value, trace_back) + + def exception_matches(self, exc_type, exc_value, trace_back): + """ + Check that the Exception raised matches the expected Exception + and expected error message regular expression. + + Parameters + ---------- + exc_type : class + The type of Exception raised. + exc_value : Exception + The instance of `exc_type` raised. + trace_back : stack trace object + The traceback object associated with `exc_value`. + + Returns + ------- + is_matched : bool + Whether or not the Exception raised matches the expected + Exception class and expected error message regular expression. + + Raises + ------ + AssertionError : The error message provided does not match + the expected error message regular expression. + """ + + if issubclass(exc_type, self.exception): + if self.regexp is not None: + val = str(exc_value) + + if not self.regexp.search(val): + msg = '"{pat}" does not match "{val}"'.format( + pat=self.regexp.pattern, val=val) + e = AssertionError(msg) + raise_with_traceback(e, trace_back) + + return True + else: + # Failed, so allow Exception to bubble up. + return False @contextmanager def assert_produces_warning(expected_warning=Warning, filter_level="always", clear=None, check_stacklevel=True): """ - Context manager for running code that expects to raise (or not raise) - warnings. Checks that code raises the expected warning and only the - expected warning. Pass ``False`` or ``None`` to check that it does *not* - raise a warning. Defaults to ``exception.Warning``, baseclass of all - Warnings. (basically a wrapper around ``warnings.catch_warnings``). + Context manager for running code expected to either raise a specific + warning, or not raise any warnings. Verifies that the code raises the + expected warning, and that it does not raise any other unexpected + warnings. It is basically a wrapper around ``warnings.catch_warnings``. + Parameters + ---------- + expected_warning : {Warning, False, None}, default Warning + The type of Exception raised. ``exception.Warning`` is the base + class for all warnings. To check that no warning is returned, + specify ``False`` or ``None``. + filter_level : str, default "always" + Specifies whether warnings are ignored, displayed, or turned + into errors. + Valid values are: + + * "error" - turns matching warnings into exceptions + * "ignore" - discard the warning + * "always" - always emit a warning + * "default" - print the warning the first time it is generated + from each location + * "module" - print the warning the first time it is generated + from each module + * "once" - print the warning the first time it is generated + + clear : str, default None + If not ``None`` then remove any previously raised warnings from + the ``__warningsregistry__`` to ensure that no warning messages are + suppressed by this context manager. If ``None`` is specified, + the ``__warningsregistry__`` keeps track of which warnings have been + shown, and does not show them again. + check_stacklevel : bool, default True + If True, displays the line that called the function containing + the warning to show were the function is called. Otherwise, the + line that implements the function is displayed. + + Examples + -------- >>> import warnings >>> with assert_produces_warning(): ... warnings.warn(UserWarning()) @@ -2490,7 +2469,7 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", for m in clear: try: m.__warningregistry__.clear() - except: + except Exception: pass saw_warning = False @@ -2509,18 +2488,20 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", from inspect import getframeinfo, stack caller = getframeinfo(stack()[2][0]) msg = ("Warning not set with correct stacklevel. " - "File where warning is raised: {0} != {1}. " - "Warning message: {2}".format( - actual_warning.filename, caller.filename, - actual_warning.message)) + "File where warning is raised: {actual} != " + "{caller}. Warning message: {message}" + ).format(actual=actual_warning.filename, + caller=caller.filename, + message=actual_warning.message) assert actual_warning.filename == caller.filename, msg else: extra_warnings.append(actual_warning.category.__name__) if expected_warning: - assert saw_warning, ("Did not see expected warning of class %r." - % expected_warning.__name__) - assert not extra_warnings, ("Caused unexpected warning(s): %r." - % extra_warnings) + msg = "Did not see expected warning of class {name!r}.".format( + name=expected_warning.__name__) + assert saw_warning, msg + assert not extra_warnings, ("Caused unexpected warning(s): {extra!r}." + ).format(extra=extra_warnings) class RNGContext(object): @@ -2554,7 +2535,11 @@ def __exit__(self, exc_type, exc_value, traceback): @contextmanager -def use_numexpr(use, min_elements=expr._MIN_ELEMENTS): +def use_numexpr(use, min_elements=None): + from pandas.core.computation import expressions as expr + if min_elements is None: + min_elements = expr._MIN_ELEMENTS + olduse = expr._USE_NUMEXPR oldmin = expr._MIN_ELEMENTS expr.set_use_numexpr(use) @@ -2564,12 +2549,6 @@ def use_numexpr(use, min_elements=expr._MIN_ELEMENTS): expr.set_use_numexpr(olduse) -# Also provide all assert_* functions in the TestCase class -for name, obj in inspect.getmembers(sys.modules[__name__]): - if inspect.isfunction(obj) and name.startswith('assert'): - setattr(TestCase, name, staticmethod(obj)) - - def test_parallel(num_threads=2, kwargs_list=None): """Decorator to run the same function multiple times in parallel. @@ -2746,9 +2725,6 @@ def set_timezone(tz): ... 'EDT' """ - if is_platform_windows(): - import pytest - pytest.skip("timezone setting not supported on windows") import os import time @@ -2757,7 +2733,7 @@ def setTZ(tz): if tz is None: try: del os.environ['TZ'] - except: + except KeyError: pass else: os.environ['TZ'] = tz @@ -2769,3 +2745,31 @@ def setTZ(tz): yield finally: setTZ(orig_tz) + + +def _make_skipna_wrapper(alternative, skipna_alternative=None): + """Create a function for calling on an array. + + Parameters + ---------- + alternative : function + The function to be called on the array with no NaNs. + Only used when 'skipna_alternative' is None. + skipna_alternative : function + The function to be called on the original array + + Returns + ------- + skipna_wrapper : function + """ + if skipna_alternative: + def skipna_wrapper(x): + return skipna_alternative(x.values) + else: + def skipna_wrapper(x): + nona = x.dropna() + if len(nona) == 0: + return np.nan + return alternative(nona) + + return skipna_wrapper diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000..f0d57d1d808a2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[build-system] +requires = [ + "wheel", + "setuptools", + "Cython", # required for VCS build, optional for released source + "numpy==1.9.3; python_version=='3.5'", + "numpy==1.12.1; python_version=='3.6'", + "numpy==1.13.1; python_version>='3.7'", +] diff --git a/scripts/announce.py b/scripts/announce.py new file mode 100755 index 0000000000000..7b7933eba54dd --- /dev/null +++ b/scripts/announce.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# -*- encoding:utf-8 -*- +""" +Script to generate contributor and pull request lists + +This script generates contributor and pull request lists for release +announcements using Github v3 protocol. Use requires an authentication token in +order to have sufficient bandwidth, you can get one following the directions at +`_ +Don't add any scope, as the default is read access to public information. The +token may be stored in an environment variable as you only get one chance to +see it. + +Usage:: + + $ ./scripts/announce.py + +The output is utf8 rst. + +Dependencies +------------ + +- gitpython +- pygithub + +Some code was copied from scipy `tools/gh_lists.py` and `tools/authors.py`. + +Examples +-------- + +From the bash command line with $GITHUB token. + + $ ./scripts/announce.py $GITHUB v1.11.0..v1.11.1 > announce.rst + +""" +from __future__ import print_function, division + +import os +import re +import codecs +from git import Repo + +UTF8Writer = codecs.getwriter('utf8') +this_repo = Repo(os.path.join(os.path.dirname(__file__), "..")) + +author_msg = """\ +A total of %d people contributed to this release. People with a "+" by their +names contributed a patch for the first time. +""" + +pull_request_msg = """\ +A total of %d pull requests were merged for this release. +""" + + +def get_authors(revision_range): + pat = u'^.*\\t(.*)$' + lst_release, cur_release = [r.strip() for r in revision_range.split('..')] + + # authors, in current release and previous to current release. + cur = set(re.findall(pat, this_repo.git.shortlog('-s', revision_range), + re.M)) + pre = set(re.findall(pat, this_repo.git.shortlog('-s', lst_release), + re.M)) + + # Homu is the author of auto merges, clean him out. + cur.discard('Homu') + pre.discard('Homu') + + # Append '+' to new authors. + authors = [s + u' +' for s in cur - pre] + [s for s in cur & pre] + authors.sort() + return authors + + +def get_pull_requests(repo, revision_range): + prnums = [] + + # From regular merges + merges = this_repo.git.log( + '--oneline', '--merges', revision_range) + issues = re.findall(u"Merge pull request \\#(\\d*)", merges) + prnums.extend(int(s) for s in issues) + + # From Homu merges (Auto merges) + issues = re. findall(u"Auto merge of \\#(\\d*)", merges) + prnums.extend(int(s) for s in issues) + + # From fast forward squash-merges + commits = this_repo.git.log( + '--oneline', '--no-merges', '--first-parent', revision_range) + issues = re.findall(u'^.*\\(\\#(\\d+)\\)$', commits, re.M) + prnums.extend(int(s) for s in issues) + + # get PR data from github repo + prnums.sort() + prs = [repo.get_pull(n) for n in prnums] + return prs + + +def main(revision_range, repo): + lst_release, cur_release = [r.strip() for r in revision_range.split('..')] + + # document authors + authors = get_authors(revision_range) + heading = u"Contributors" + print() + print(heading) + print(u"=" * len(heading)) + print(author_msg % len(authors)) + + for s in authors: + print(u'* ' + s) + + +if __name__ == "__main__": + from argparse import ArgumentParser + + parser = ArgumentParser(description="Generate author lists for release") + parser.add_argument('revision_range', help='..') + parser.add_argument('--repo', help="Github org/repository", + default="pandas-dev/pandas") + args = parser.parse_args() + main(args.revision_range, args.repo) diff --git a/scripts/api_rst_coverage.py b/scripts/api_rst_coverage.py deleted file mode 100644 index cc456f03c02ec..0000000000000 --- a/scripts/api_rst_coverage.py +++ /dev/null @@ -1,43 +0,0 @@ -import pandas as pd -import inspect -import re - -def main(): - # classes whose members to check - classes = [pd.Series, pd.DataFrame, pd.Panel, pd.Panel4D] - - def class_name_sort_key(x): - if x.startswith('Series'): - # make sure Series precedes DataFrame, Panel, and Panel4D - return ' ' + x - else: - return x - - # class members - class_members = set() - for cls in classes: - class_members.update([cls.__name__ + '.' + x[0] for x in inspect.getmembers(cls)]) - - # class members referenced in api.rst - api_rst_members = set() - file_name = '../doc/source/api.rst' - with open(file_name, 'r') as f: - pattern = re.compile('({})\.(\w+)'.format('|'.join([cls.__name__ for cls in classes]))) - for line in f: - match = pattern.search(line) - if match: - api_rst_members.add(match.group(0)) - - print() - print("Documented members in api.rst that aren't actual class members:") - for x in sorted(api_rst_members.difference(class_members), key=class_name_sort_key): - print(x) - - print() - print("Class members (other than those beginning with '_') missing from api.rst:") - for x in sorted(class_members.difference(api_rst_members), key=class_name_sort_key): - if '._' not in x: - print(x) - -if __name__ == "__main__": - main() diff --git a/scripts/bench_join.R b/scripts/bench_join.R deleted file mode 100644 index edba277f0295c..0000000000000 --- a/scripts/bench_join.R +++ /dev/null @@ -1,50 +0,0 @@ -library(xts) - -iterations <- 50 - -ns = c(100, 1000, 10000, 100000, 1000000) -kinds = c("outer", "left", "inner") - -result = matrix(0, nrow=3, ncol=length(ns)) -n <- 100000 -pct.overlap <- 0.2 - -k <- 1 - -for (ni in 1:length(ns)){ - n <- ns[ni] - rng1 <- 1:n - offset <- as.integer(n * pct.overlap) - rng2 <- rng1 + offset - x <- xts(matrix(rnorm(n * k), nrow=n, ncol=k), - as.POSIXct(Sys.Date()) + rng1) - y <- xts(matrix(rnorm(n * k), nrow=n, ncol=k), - as.POSIXct(Sys.Date()) + rng2) - timing <- numeric() - for (i in 1:3) { - kind = kinds[i] - for(j in 1:iterations) { - gc() # just to be sure - timing[j] <- system.time(merge(x,y,join=kind))[3] - } - #timing <- system.time(for (j in 1:iterations) merge.xts(x, y, join=kind), - # gcFirst=F) - #timing <- as.list(timing) - result[i, ni] <- mean(timing) * 1000 - #result[i, ni] = (timing$elapsed / iterations) * 1000 - } -} - -rownames(result) <- kinds -colnames(result) <- log10(ns) - -mat <- matrix(rnorm(500000), nrow=100000, ncol=5) -set.seed(12345) -indexer <- sample(1:100000) - -timing <- rep(0, 10) -for (i in 1:10) { - gc() - timing[i] = system.time(mat[indexer,])[3] -} - diff --git a/scripts/bench_join.py b/scripts/bench_join.py deleted file mode 100644 index 1ce5c94130e85..0000000000000 --- a/scripts/bench_join.py +++ /dev/null @@ -1,211 +0,0 @@ -from pandas.compat import range, lrange -import numpy as np -import pandas.lib as lib -from pandas import * -from copy import deepcopy -import time - -n = 1000000 -K = 1 -pct_overlap = 0.2 - -a = np.arange(n, dtype=np.int64) -b = np.arange(n * pct_overlap, n * (1 + pct_overlap), dtype=np.int64) - -dr1 = DatetimeIndex('1/1/2000', periods=n, offset=offsets.Minute()) -dr2 = DatetimeIndex( - dr1[int(pct_overlap * n)], periods=n, offset=offsets.Minute(2)) - -aobj = a.astype(object) -bobj = b.astype(object) - -av = np.random.randn(n) -bv = np.random.randn(n) - -avf = np.random.randn(n, K) -bvf = np.random.randn(n, K) - -a_series = Series(av, index=a) -b_series = Series(bv, index=b) - -a_frame = DataFrame(avf, index=a, columns=lrange(K)) -b_frame = DataFrame(bvf, index=b, columns=lrange(K, 2 * K)) - - -def do_left_join(a, b, av, bv): - out = np.empty((len(a), 2)) - lib.left_join_1d(a, b, av, bv, out) - return out - - -def do_outer_join(a, b, av, bv): - result_index, aindexer, bindexer = lib.outer_join_indexer(a, b) - result = np.empty((2, len(result_index))) - lib.take_1d(av, aindexer, result[0]) - lib.take_1d(bv, bindexer, result[1]) - return result_index, result - - -def do_inner_join(a, b, av, bv): - result_index, aindexer, bindexer = lib.inner_join_indexer(a, b) - result = np.empty((2, len(result_index))) - lib.take_1d(av, aindexer, result[0]) - lib.take_1d(bv, bindexer, result[1]) - return result_index, result - -from line_profiler import LineProfiler -prof = LineProfiler() - -from pandas.util.testing import set_trace - - -def do_left_join_python(a, b, av, bv): - indexer, mask = lib.ordered_left_join_int64(a, b) - - n, ak = av.shape - _, bk = bv.shape - result_width = ak + bk - - result = np.empty((result_width, n), dtype=np.float64) - result[:ak] = av.T - - bchunk = result[ak:] - _take_multi(bv.T, indexer, bchunk) - np.putmask(bchunk, np.tile(mask, bk), np.nan) - return result - - -def _take_multi(data, indexer, out): - if not data.flags.c_contiguous: - data = data.copy() - for i in range(data.shape[0]): - data[i].take(indexer, out=out[i]) - - -def do_left_join_multi(a, b, av, bv): - n, ak = av.shape - _, bk = bv.shape - result = np.empty((n, ak + bk), dtype=np.float64) - lib.left_join_2d(a, b, av, bv, result) - return result - - -def do_outer_join_multi(a, b, av, bv): - n, ak = av.shape - _, bk = bv.shape - result_index, rindexer, lindexer = lib.outer_join_indexer(a, b) - result = np.empty((len(result_index), ak + bk), dtype=np.float64) - lib.take_join_contiguous(av, bv, lindexer, rindexer, result) - # result = np.empty((ak + bk, len(result_index)), dtype=np.float64) - # lib.take_axis0(av, rindexer, out=result[:ak].T) - # lib.take_axis0(bv, lindexer, out=result[ak:].T) - return result_index, result - - -def do_inner_join_multi(a, b, av, bv): - n, ak = av.shape - _, bk = bv.shape - result_index, rindexer, lindexer = lib.inner_join_indexer(a, b) - result = np.empty((len(result_index), ak + bk), dtype=np.float64) - lib.take_join_contiguous(av, bv, lindexer, rindexer, result) - # result = np.empty((ak + bk, len(result_index)), dtype=np.float64) - # lib.take_axis0(av, rindexer, out=result[:ak].T) - # lib.take_axis0(bv, lindexer, out=result[ak:].T) - return result_index, result - - -def do_left_join_multi_v2(a, b, av, bv): - indexer, mask = lib.ordered_left_join_int64(a, b) - bv_taken = bv.take(indexer, axis=0) - np.putmask(bv_taken, mask.repeat(bv.shape[1]), np.nan) - return np.concatenate((av, bv_taken), axis=1) - - -def do_left_join_series(a, b): - return b.reindex(a.index) - - -def do_left_join_frame(a, b): - a.index._indexMap = None - b.index._indexMap = None - return a.join(b, how='left') - - -# a = np.array([1, 2, 3, 4, 5], dtype=np.int64) -# b = np.array([0, 3, 5, 7, 9], dtype=np.int64) -# print(lib.inner_join_indexer(a, b)) - -out = np.empty((10, 120000)) - - -def join(a, b, av, bv, how="left"): - func_dict = {'left': do_left_join_multi, - 'outer': do_outer_join_multi, - 'inner': do_inner_join_multi} - - f = func_dict[how] - return f(a, b, av, bv) - - -def bench_python(n=100000, pct_overlap=0.20, K=1): - import gc - ns = [2, 3, 4, 5, 6] - iterations = 200 - pct_overlap = 0.2 - kinds = ['outer', 'left', 'inner'] - - all_results = {} - for logn in ns: - n = 10 ** logn - a = np.arange(n, dtype=np.int64) - b = np.arange(n * pct_overlap, n * pct_overlap + n, dtype=np.int64) - - avf = np.random.randn(n, K) - bvf = np.random.randn(n, K) - - a_frame = DataFrame(avf, index=a, columns=lrange(K)) - b_frame = DataFrame(bvf, index=b, columns=lrange(K, 2 * K)) - - all_results[logn] = result = {} - - for kind in kinds: - gc.disable() - elapsed = 0 - _s = time.clock() - for i in range(iterations): - if i % 10 == 0: - elapsed += time.clock() - _s - gc.collect() - _s = time.clock() - a_frame.join(b_frame, how=kind) - # join(a, b, avf, bvf, how=kind) - elapsed += time.clock() - _s - gc.enable() - result[kind] = (elapsed / iterations) * 1000 - - return DataFrame(all_results, index=kinds) - - -def bench_xts(n=100000, pct_overlap=0.20): - from pandas.rpy.common import r - r('a <- 5') - - xrng = '1:%d' % n - - start = n * pct_overlap + 1 - end = n + start - 1 - yrng = '%d:%d' % (start, end) - - r('library(xts)') - - iterations = 500 - - kinds = ['left', 'outer', 'inner'] - result = {} - for kind in kinds: - r('x <- xts(rnorm(%d), as.POSIXct(Sys.Date()) + %s)' % (n, xrng)) - r('y <- xts(rnorm(%d), as.POSIXct(Sys.Date()) + %s)' % (n, yrng)) - stmt = 'for (i in 1:%d) merge(x, y, join="%s")' % (iterations, kind) - elapsed = r('as.list(system.time(%s, gcFirst=F))$elapsed' % stmt)[0] - result[kind] = (elapsed / iterations) * 1000 - return Series(result) diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py deleted file mode 100644 index 7b93112b7f869..0000000000000 --- a/scripts/bench_join_multi.py +++ /dev/null @@ -1,32 +0,0 @@ -from pandas import * - -import numpy as np -from pandas.compat import zip, range, lzip -from pandas.util.testing import rands -import pandas.lib as lib - -N = 100000 - -key1 = [rands(10) for _ in range(N)] -key2 = [rands(10) for _ in range(N)] - -zipped = lzip(key1, key2) - - -def _zip(*args): - arr = np.empty(N, dtype=object) - arr[:] = lzip(*args) - return arr - - -def _zip2(*args): - return lib.list_to_object_array(lzip(*args)) - -index = MultiIndex.from_arrays([key1, key2]) -to_join = DataFrame({'j1': np.random.randn(100000)}, index=index) - -data = DataFrame({'A': np.random.randn(500000), - 'key1': np.repeat(key1, 5), - 'key2': np.repeat(key2, 5)}) - -# data.join(to_join, on=['key1', 'key2']) diff --git a/scripts/bench_refactor.py b/scripts/bench_refactor.py deleted file mode 100644 index dafba371e995a..0000000000000 --- a/scripts/bench_refactor.py +++ /dev/null @@ -1,51 +0,0 @@ -from pandas import * -from pandas.compat import range -try: - import pandas.core.internals as internals - reload(internals) - import pandas.core.frame as frame - reload(frame) - from pandas.core.frame import DataFrame as DataMatrix -except ImportError: - pass - -N = 1000 -K = 500 - - -def horribly_unconsolidated(): - index = np.arange(N) - - df = DataMatrix(index=index) - - for i in range(K): - df[i] = float(K) - - return df - - -def bench_reindex_index(df, it=100): - new_idx = np.arange(0, N, 2) - for i in range(it): - df.reindex(new_idx) - - -def bench_reindex_columns(df, it=100): - new_cols = np.arange(0, K, 2) - for i in range(it): - df.reindex(columns=new_cols) - - -def bench_join_index(df, it=10): - left = df.reindex(index=np.arange(0, N, 2), - columns=np.arange(K // 2)) - right = df.reindex(columns=np.arange(K // 2 + 1, K)) - for i in range(it): - joined = left.join(right) - -if __name__ == '__main__': - df = horribly_unconsolidated() - left = df.reindex(index=np.arange(0, N, 2), - columns=np.arange(K // 2)) - right = df.reindex(columns=np.arange(K // 2 + 1, K)) - bench_join_index(df) diff --git a/scripts/boxplot_test.py b/scripts/boxplot_test.py deleted file mode 100644 index 3704f7b60dc60..0000000000000 --- a/scripts/boxplot_test.py +++ /dev/null @@ -1,14 +0,0 @@ -import matplotlib.pyplot as plt - -import random -import pandas.util.testing as tm -tm.N = 1000 -df = tm.makeTimeDataFrame() -import string -foo = list(string.letters[:5]) * 200 -df['indic'] = list(string.letters[:5]) * 200 -random.shuffle(foo) -df['indic2'] = foo -df.boxplot(by=['indic', 'indic2'], fontsize=8, rot=90) - -plt.show() diff --git a/scripts/build_dist.sh b/scripts/build_dist.sh index c9c36c18bed9c..c3f849ce7a6eb 100755 --- a/scripts/build_dist.sh +++ b/scripts/build_dist.sh @@ -10,9 +10,7 @@ read -p "Ok to continue (y/n)? " answer case ${answer:0:1} in y|Y ) echo "Building distribution" - python setup.py clean - python setup.py build_ext --inplace - python setup.py sdist --formats=gztar + ./build_dist_for_release.sh ;; * ) echo "Not building distribution" diff --git a/scripts/build_dist_for_release.sh b/scripts/build_dist_for_release.sh new file mode 100755 index 0000000000000..bee0f23a68ec2 --- /dev/null +++ b/scripts/build_dist_for_release.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# this requires cython to be installed + +# this builds the release cleanly & is building on the current checkout +rm -rf dist +git clean -xfd +python setup.py clean --quiet +python setup.py cython --quiet +python setup.py sdist --formats=gztar --quiet diff --git a/scripts/convert_deps.py b/scripts/convert_deps.py new file mode 100755 index 0000000000000..aabeb24a0c3c8 --- /dev/null +++ b/scripts/convert_deps.py @@ -0,0 +1,29 @@ +""" +Convert the conda environment.yaml to a pip requirements.txt +""" +import yaml + +exclude = {'python=3'} +rename = {'pytables': 'tables'} + +with open("ci/environment-dev.yaml") as f: + dev = yaml.load(f) + +with open("ci/requirements-optional-conda.txt") as f: + optional = [x.strip() for x in f.readlines()] + +required = dev['dependencies'] +required = [rename.get(dep, dep) for dep in required if dep not in exclude] +optional = [rename.get(dep, dep) for dep in optional if dep not in exclude] + + +with open("ci/requirements_dev.txt", 'wt') as f: + f.write("# This file was autogenerated by scripts/convert_deps.py\n") + f.write("# Do not modify directly\n") + f.write('\n'.join(required)) + + +with open("ci/requirements-optional-pip.txt", 'wt') as f: + f.write("# This file was autogenerated by scripts/convert_deps.py\n") + f.write("# Do not modify directly\n") + f.write("\n".join(optional)) diff --git a/scripts/count_code.sh b/scripts/count_code.sh deleted file mode 100755 index 991faf2e8711b..0000000000000 --- a/scripts/count_code.sh +++ /dev/null @@ -1 +0,0 @@ -cloc pandas --force-lang=Python,pyx --not-match-f="parser.c|lib.c|tslib.c|sandbox.c|hashtable.c|sparse.c|algos.c|index.c" \ No newline at end of file diff --git a/scripts/faster_xs.py b/scripts/faster_xs.py deleted file mode 100644 index 2bb6271124c4f..0000000000000 --- a/scripts/faster_xs.py +++ /dev/null @@ -1,15 +0,0 @@ -import numpy as np - -import pandas.util.testing as tm - -from pandas.core.internals import _interleaved_dtype - -df = tm.makeDataFrame() - -df['E'] = 'foo' -df['F'] = 'foo' -df['G'] = 2 -df['H'] = df['A'] > 0 - -blocks = df._data.blocks -items = df.columns diff --git a/scripts/file_sizes.py b/scripts/file_sizes.py deleted file mode 100644 index de03c72ffbd09..0000000000000 --- a/scripts/file_sizes.py +++ /dev/null @@ -1,208 +0,0 @@ -from __future__ import print_function -import os -import sys - -import numpy as np -import matplotlib.pyplot as plt - -from pandas import DataFrame -from pandas.util.testing import set_trace -from pandas import compat - -dirs = [] -names = [] -lengths = [] - -if len(sys.argv) > 1: - loc = sys.argv[1] -else: - loc = '.' -walked = os.walk(loc) - - -def _should_count_file(path): - return path.endswith('.py') or path.endswith('.pyx') - - -def _is_def_line(line): - """def/cdef/cpdef, but not `cdef class`""" - return (line.endswith(':') and not 'class' in line.split() and - (line.startswith('def ') or - line.startswith('cdef ') or - line.startswith('cpdef ') or - ' def ' in line or ' cdef ' in line or ' cpdef ' in line)) - - -class LengthCounter(object): - """ - should add option for subtracting nested function lengths?? - """ - def __init__(self, lines): - self.lines = lines - self.pos = 0 - self.counts = [] - self.n = len(lines) - - def get_counts(self): - self.pos = 0 - self.counts = [] - while self.pos < self.n: - line = self.lines[self.pos] - self.pos += 1 - if _is_def_line(line): - level = _get_indent_level(line) - self._count_function(indent_level=level) - return self.counts - - def _count_function(self, indent_level=1): - indent = ' ' * indent_level - - def _end_of_function(line): - return (line != '' and - not line.startswith(indent) and - not line.startswith('#')) - - start_pos = self.pos - while self.pos < self.n: - line = self.lines[self.pos] - if _end_of_function(line): - self._push_count(start_pos) - return - - self.pos += 1 - - if _is_def_line(line): - self._count_function(indent_level=indent_level + 1) - - # end of file - self._push_count(start_pos) - - def _push_count(self, start_pos): - func_lines = self.lines[start_pos:self.pos] - - if len(func_lines) > 300: - set_trace() - - # remove blank lines at end - while len(func_lines) > 0 and func_lines[-1] == '': - func_lines = func_lines[:-1] - - # remove docstrings and comments - clean_lines = [] - in_docstring = False - for line in func_lines: - line = line.strip() - if in_docstring and _is_triplequote(line): - in_docstring = False - continue - - if line.startswith('#'): - continue - - if _is_triplequote(line): - in_docstring = True - continue - - self.counts.append(len(func_lines)) - - -def _get_indent_level(line): - level = 0 - while line.startswith(' ' * level): - level += 1 - return level - - -def _is_triplequote(line): - return line.startswith('"""') or line.startswith("'''") - - -def _get_file_function_lengths(path): - lines = [x.rstrip() for x in open(path).readlines()] - counter = LengthCounter(lines) - return counter.get_counts() - -# def test_get_function_lengths(): -text = """ -class Foo: - -def foo(): - def bar(): - a = 1 - - b = 2 - - c = 3 - - foo = 'bar' - -def x(): - a = 1 - - b = 3 - - c = 7 - - pass -""" - -expected = [5, 8, 7] - -lines = [x.rstrip() for x in text.splitlines()] -counter = LengthCounter(lines) -result = counter.get_counts() -assert(result == expected) - - -def doit(): - for directory, _, files in walked: - print(directory) - for path in files: - if not _should_count_file(path): - continue - - full_path = os.path.join(directory, path) - print(full_path) - lines = len(open(full_path).readlines()) - - dirs.append(directory) - names.append(path) - lengths.append(lines) - - result = DataFrame({'dirs': dirs, 'names': names, - 'lengths': lengths}) - - -def doit2(): - counts = {} - for directory, _, files in walked: - print(directory) - for path in files: - if not _should_count_file(path) or path.startswith('test_'): - continue - - full_path = os.path.join(directory, path) - counts[full_path] = _get_file_function_lengths(full_path) - - return counts - -counts = doit2() - -# counts = _get_file_function_lengths('pandas/tests/test_series.py') - -all_counts = [] -for k, v in compat.iteritems(counts): - all_counts.extend(v) -all_counts = np.array(all_counts) - -fig = plt.figure(figsize=(10, 5)) -ax = fig.add_subplot(111) -ax.hist(all_counts, bins=100) -n = len(all_counts) -nmore = (all_counts > 50).sum() -ax.set_title('%s function lengths, n=%d' % ('pandas', n)) -ax.set_ylabel('N functions') -ax.set_xlabel('Function length') -ax.text(100, 300, '%.3f%% with > 50 lines' % ((n - nmore) / float(n)), - fontsize=18) -plt.show() diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index 099761f38bb44..29eb4161718ff 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -1,135 +1,148 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - -# copryright 2013, y-p @ github - -from __future__ import print_function -from pandas.compat import range, lrange, map - -"""Search the git history for all commits touching a named method +# copyright 2013, y-p @ github +""" +Search the git history for all commits touching a named method You need the sh module to run this -WARNING: this script uses git clean -f, running it on a repo with untracked files -will probably erase them. +WARNING: this script uses git clean -f, running it on a repo with untracked +files will probably erase them. + +Usage:: + $ ./find_commits_touching_func.py (see arguments below) """ +from __future__ import print_function import logging import re import os +import argparse from collections import namedtuple -from pandas.compat import parse_date - +from pandas.compat import lrange, map, string_types, text_type, parse_date try: import sh except ImportError: - raise ImportError("The 'sh' package is required in order to run this script. ") + raise ImportError("The 'sh' package is required to run this script.") -import argparse desc = """ -Find all commits touching a sepcified function across the codebase. +Find all commits touching a specified function across the codebase. """.strip() argparser = argparse.ArgumentParser(description=desc) argparser.add_argument('funcname', metavar='FUNCNAME', - help='Name of function/method to search for changes on.') + help='Name of function/method to search for changes on') argparser.add_argument('-f', '--file-masks', metavar='f_re(,f_re)*', default=["\.py.?$"], - help='comma seperated list of regexes to match filenames against\n'+ - 'defaults all .py? files') + help='comma separated list of regexes to match ' + 'filenames against\ndefaults all .py? files') argparser.add_argument('-d', '--dir-masks', metavar='d_re(,d_re)*', default=[], - help='comma seperated list of regexes to match base path against') + help='comma separated list of regexes to match base ' + 'path against') argparser.add_argument('-p', '--path-masks', metavar='p_re(,p_re)*', default=[], - help='comma seperated list of regexes to match full file path against') + help='comma separated list of regexes to match full ' + 'file path against') argparser.add_argument('-y', '--saw-the-warning', - action='store_true',default=False, - help='must specify this to run, acknowledge you realize this will erase untracked files') + action='store_true', default=False, + help='must specify this to run, acknowledge you ' + 'realize this will erase untracked files') argparser.add_argument('--debug-level', default="CRITICAL", - help='debug level of messages (DEBUG,INFO,etc...)') - + help='debug level of messages (DEBUG, INFO, etc...)') args = argparser.parse_args() lfmt = logging.Formatter(fmt='%(levelname)-8s %(message)s', - datefmt='%m-%d %H:%M:%S' -) - + datefmt='%m-%d %H:%M:%S') shh = logging.StreamHandler() shh.setFormatter(lfmt) - -logger=logging.getLogger("findit") +logger = logging.getLogger("findit") logger.addHandler(shh) +Hit = namedtuple("Hit", "commit path") +HASH_LEN = 8 -Hit=namedtuple("Hit","commit path") -HASH_LEN=8 def clean_checkout(comm): - h,s,d = get_commit_vitals(comm) + h, s, d = get_commit_vitals(comm) if len(s) > 60: s = s[:60] + "..." - s=s.split("\n")[0] - logger.info("CO: %s %s" % (comm,s )) + s = s.split("\n")[0] + logger.info("CO: %s %s" % (comm, s)) - sh.git('checkout', comm ,_tty_out=False) + sh.git('checkout', comm, _tty_out=False) sh.git('clean', '-f') -def get_hits(defname,files=()): - cs=set() + +def get_hits(defname, files=()): + cs = set() for f in files: try: - r=sh.git('blame', '-L', '/def\s*{start}/,/def/'.format(start=defname),f,_tty_out=False) + r = sh.git('blame', + '-L', + '/def\s*{start}/,/def/'.format(start=defname), + f, + _tty_out=False) except sh.ErrorReturnCode_128: logger.debug("no matches in %s" % f) continue lines = r.strip().splitlines()[:-1] # remove comment lines - lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#",x)] - hits = set(map(lambda x: x.split(" ")[0],lines)) - cs.update(set([Hit(commit=c,path=f) for c in hits])) + lines = [x for x in lines if not re.search("^\w+\s*\(.+\)\s*#", x)] + hits = set(map(lambda x: x.split(" ")[0], lines)) + cs.update(set(Hit(commit=c, path=f) for c in hits)) return cs -def get_commit_info(c,fmt,sep='\t'): - r=sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c,c),"-n","1",_tty_out=False) - return compat.text_type(r).split(sep) -def get_commit_vitals(c,hlen=HASH_LEN): - h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") - return h[:hlen],s,parse_date(d) +def get_commit_info(c, fmt, sep='\t'): + r = sh.git('log', + "--format={}".format(fmt), + '{}^..{}'.format(c, c), + "-n", + "1", + _tty_out=False) + return text_type(r).split(sep) + -def file_filter(state,dirname,fnames): - if args.dir_masks and not any([re.search(x,dirname) for x in args.dir_masks]): +def get_commit_vitals(c, hlen=HASH_LEN): + h, s, d = get_commit_info(c, '%H\t%s\t%ci', "\t") + return h[:hlen], s, parse_date(d) + + +def file_filter(state, dirname, fnames): + if (args.dir_masks and + not any(re.search(x, dirname) for x in args.dir_masks)): return for f in fnames: - p = os.path.abspath(os.path.join(os.path.realpath(dirname),f)) - if any([re.search(x,f) for x in args.file_masks])\ - or any([re.search(x,p) for x in args.path_masks]): + p = os.path.abspath(os.path.join(os.path.realpath(dirname), f)) + if (any(re.search(x, f) for x in args.file_masks) or + any(re.search(x, p) for x in args.path_masks)): if os.path.isfile(p): state['files'].append(p) -def search(defname,head_commit="HEAD"): - HEAD,s = get_commit_vitals("HEAD")[:2] - logger.info("HEAD at %s: %s" % (HEAD,s)) + +def search(defname, head_commit="HEAD"): + HEAD, s = get_commit_vitals("HEAD")[:2] + logger.info("HEAD at %s: %s" % (HEAD, s)) done_commits = set() # allhits = set() files = [] state = dict(files=files) - os.path.walk('.',file_filter,state) + os.walk('.', file_filter, state) # files now holds a list of paths to files # seed with hits from q - allhits= set(get_hits(defname, files = files)) + allhits = set(get_hits(defname, files=files)) q = set([HEAD]) try: while q: - h=q.pop() + h = q.pop() clean_checkout(h) - hits = get_hits(defname, files = files) + hits = get_hits(defname, files=files) for x in hits: - prevc = get_commit_vitals(x.commit+"^")[0] + prevc = get_commit_vitals(x.commit + "^")[0] if prevc not in done_commits: q.add(prevc) allhits.update(hits) @@ -141,61 +154,63 @@ def search(defname,head_commit="HEAD"): clean_checkout(HEAD) return allhits + def pprint_hits(hits): - SUBJ_LEN=50 + SUBJ_LEN = 50 PATH_LEN = 20 - hits=list(hits) + hits = list(hits) max_p = 0 for hit in hits: - p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] - max_p=max(max_p,len(p)) + p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] + max_p = max(max_p, len(p)) if max_p < PATH_LEN: SUBJ_LEN += PATH_LEN - max_p PATH_LEN = max_p def sorter(i): - h,s,d=get_commit_vitals(hits[i].commit) - return hits[i].path,d + h, s, d = get_commit_vitals(hits[i].commit) + return hits[i].path, d - print("\nThese commits touched the %s method in these files on these dates:\n" \ - % args.funcname) - for i in sorted(lrange(len(hits)),key=sorter): + print(('\nThese commits touched the %s method in these files ' + 'on these dates:\n') % args.funcname) + for i in sorted(lrange(len(hits)), key=sorter): hit = hits[i] - h,s,d=get_commit_vitals(hit.commit) - p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] + h, s, d = get_commit_vitals(hit.commit) + p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] fmt = "{:%d} {:10} {:<%d} {:<%d}" % (HASH_LEN, SUBJ_LEN, PATH_LEN) if len(s) > SUBJ_LEN: - s = s[:SUBJ_LEN-5] + " ..." - print(fmt.format(h[:HASH_LEN],d.isoformat()[:10],s,p[-20:]) ) + s = s[:SUBJ_LEN - 5] + " ..." + print(fmt.format(h[:HASH_LEN], d.isoformat()[:10], s, p[-20:])) print("\n") + def main(): if not args.saw_the_warning: argparser.print_help() print(""" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -WARNING: this script uses git clean -f, running it on a repo with untracked files. +WARNING: +this script uses git clean -f, running it on a repo with untracked files. It's recommended that you make a fresh clone and run from its root directory. You must specify the -y argument to ignore this warning. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """) return - if isinstance(args.file_masks,compat.string_types): + if isinstance(args.file_masks, string_types): args.file_masks = args.file_masks.split(',') - if isinstance(args.path_masks,compat.string_types): + if isinstance(args.path_masks, string_types): args.path_masks = args.path_masks.split(',') - if isinstance(args.dir_masks,compat.string_types): + if isinstance(args.dir_masks, string_types): args.dir_masks = args.dir_masks.split(',') - logger.setLevel(getattr(logging,args.debug_level)) + logger.setLevel(getattr(logging, args.debug_level)) - hits=search(args.funcname) + hits = search(args.funcname) pprint_hits(hits) - pass if __name__ == "__main__": import sys diff --git a/scripts/find_undoc_args.py b/scripts/find_undoc_args.py index 49273bacccf98..a135c8e5171a1 100755 --- a/scripts/find_undoc_args.py +++ b/scripts/find_undoc_args.py @@ -1,126 +1,135 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +""" +Script that compares the signature arguments with the ones in the docsting +and returns the differences in plain text or GitHub task list format. +Usage:: + $ ./find_undoc_args.py (see arguments below) +""" from __future__ import print_function - +import sys from collections import namedtuple -from itertools import islice import types import os import re import argparse -#http://docs.python.org/2/library/argparse.html -# arg name is positional is not prefixed with - or -- +import inspect + parser = argparse.ArgumentParser(description='Program description.') parser.add_argument('-p', '--path', metavar='PATH', type=str, required=False, - default=None, - help='full path relative to which paths wills be reported',action='store') -parser.add_argument('-m', '--module', metavar='MODULE', type=str,required=True, - help='name of package to import and examine',action='store') -parser.add_argument('-G', '--github_repo', metavar='REPO', type=str,required=False, - help='github project where the the code lives, e.g. "pandas-dev/pandas"', - default=None,action='store') - + default=None, action='store', + help='full path relative to which paths wills be reported') +parser.add_argument('-m', '--module', metavar='MODULE', type=str, + required=True, action='store', + help='name of package to import and examine') +parser.add_argument('-G', '--github_repo', metavar='REPO', type=str, + required=False, default=None, action='store', + help='github project where the code lives, ' + 'e.g. "pandas-dev/pandas"') args = parser.parse_args() -Entry=namedtuple("Entry","func path lnum undoc_names missing_args nsig_names ndoc_names") +Entry = namedtuple('Entry', + 'func path lnum undoc_names missing_args ' + 'nsig_names ndoc_names') -def entry_gen(root_ns,module_name): - q=[root_ns] - seen=set() +def entry_gen(root_ns, module_name): + """Walk and yield all methods and functions in the module root_ns and + submodules.""" + q = [root_ns] + seen = set() while q: ns = q.pop() for x in dir(ns): - cand = getattr(ns,x) - if (isinstance(cand,types.ModuleType) - and cand.__name__ not in seen - and cand.__name__.startswith(module_name)): - # print(cand.__name__) + cand = getattr(ns, x) + if (isinstance(cand, types.ModuleType) and + cand.__name__ not in seen and + cand.__name__.startswith(module_name)): seen.add(cand.__name__) - q.insert(0,cand) - elif (isinstance(cand,(types.MethodType,types.FunctionType)) and + q.insert(0, cand) + elif (isinstance(cand, (types.MethodType, types.FunctionType)) and cand not in seen and cand.__doc__): seen.add(cand) yield cand + def cmp_docstring_sig(f): + """Return an `Entry` object describing the differences between the + arguments in the signature and the documented ones.""" def build_loc(f): - path=f.__code__.co_filename.split(args.path,1)[-1][1:] - return dict(path=path,lnum=f.__code__.co_firstlineno) + path = f.__code__.co_filename.split(args.path, 1)[-1][1:] + return dict(path=path, lnum=f.__code__.co_firstlineno) - import inspect - sig_names=set(inspect.getargspec(f).args) + sig_names = set(inspect.getargspec(f).args) + # XXX numpydoc can be used to get the list of parameters doc = f.__doc__.lower() - doc = re.split("^\s*parameters\s*",doc,1,re.M)[-1] - doc = re.split("^\s*returns*",doc,1,re.M)[0] - doc_names={x.split(":")[0].strip() for x in doc.split("\n") - if re.match("\s+[\w_]+\s*:",x)} - sig_names.discard("self") - doc_names.discard("kwds") - doc_names.discard("kwargs") - doc_names.discard("args") - return Entry(func=f,path=build_loc(f)['path'],lnum=build_loc(f)['lnum'], + doc = re.split('^\s*parameters\s*', doc, 1, re.M)[-1] + doc = re.split('^\s*returns*', doc, 1, re.M)[0] + doc_names = {x.split(":")[0].strip() for x in doc.split('\n') + if re.match('\s+[\w_]+\s*:', x)} + sig_names.discard('self') + doc_names.discard('kwds') + doc_names.discard('kwargs') + doc_names.discard('args') + return Entry(func=f, path=build_loc(f)['path'], lnum=build_loc(f)['lnum'], undoc_names=sig_names.difference(doc_names), - missing_args=doc_names.difference(sig_names),nsig_names=len(sig_names), - ndoc_names=len(doc_names)) + missing_args=doc_names.difference(sig_names), + nsig_names=len(sig_names), ndoc_names=len(doc_names)) + def format_id(i): return i -def format_item_as_github_task_list( i,item,repo): - tmpl = "- [ ] {id}) [{file}:{lnum} ({func_name}())]({link}) - __Missing__[{nmissing}/{total_args}]: {undoc_names}" +def format_item_as_github_task_list(i, item, repo): + tmpl = ('- [ ] {id_}) [{fname}:{lnum} ({func_name}())]({link}) - ' + '__Missing__[{nmissing}/{total_args}]: {undoc_names}') link_tmpl = "https://github.com/{repo}/blob/master/{file}#L{lnum}" - - link = link_tmpl.format(repo=repo,file=item.path ,lnum=item.lnum ) - - s = tmpl.format(id=i,file=item.path , - lnum=item.lnum, - func_name=item.func.__name__, - link=link, - nmissing=len(item.undoc_names), - total_args=item.nsig_names, - undoc_names=list(item.undoc_names)) - + link = link_tmpl.format(repo=repo, file=item.path, lnum=item.lnum) + s = tmpl.format(id_=i, fname=item.path, lnum=item.lnum, + func_name=item.func.__name__, link=link, + nmissing=len(item.undoc_names), + total_args=item.nsig_names, + undoc_names=list(item.undoc_names)) if item.missing_args: - s+= " __Extra__(?): {missing_args}".format(missing_args=list(item.missing_args)) - + s += ' __Extra__(?): %s' % list(item.missing_args) return s -def format_item_as_plain(i,item): - tmpl = "+{lnum} {path} {func_name}(): Missing[{nmissing}/{total_args}]={undoc_names}" - - s = tmpl.format(path=item.path , - lnum=item.lnum, - func_name=item.func.__name__, - nmissing=len(item.undoc_names), - total_args=item.nsig_names, - undoc_names=list(item.undoc_names)) +def format_item_as_plain(i, item): + tmpl = ('+{lnum} {path} {func_name}(): ' + 'Missing[{nmissing}/{total_args}]={undoc_names}') + s = tmpl.format(path=item.path, lnum=item.lnum, + func_name=item.func.__name__, + nmissing=len(item.undoc_names), + total_args=item.nsig_names, + undoc_names=list(item.undoc_names)) if item.missing_args: - s+= " Extra(?)={missing_args}".format(missing_args=list(item.missing_args)) - + s += ' Extra(?)=%s' % list(item.missing_args) return s + def main(): module = __import__(args.module) if not args.path: - args.path=os.path.dirname(module.__file__) - collect=[cmp_docstring_sig(e) for e in entry_gen(module,module.__name__)] - # only include if there are missing arguments in the docstring (fewer false positives) - # and there are at least some documented arguments - collect = [e for e in collect if e.undoc_names and len(e.undoc_names) != e.nsig_names] - collect.sort(key=lambda x:x.path) + args.path = os.path.dirname(module.__file__) + collect = [cmp_docstring_sig(e) + for e in entry_gen(module, module.__name__)] + # only include if there are missing arguments in the docstring + # (fewer false positives) and there are at least some documented arguments + collect = [e for e in collect + if e.undoc_names and len(e.undoc_names) != e.nsig_names] + collect.sort(key=lambda x: x.path) if args.github_repo: - for i,item in enumerate(collect,1): - print( format_item_as_github_task_list(i,item,args.github_repo)) + for i, item in enumerate(collect, 1): + print(format_item_as_github_task_list(i, item, args.github_repo)) else: - for i,item in enumerate(collect,1): - print( format_item_as_plain(i, item)) + for i, item in enumerate(collect, 1): + print(format_item_as_plain(i, item)) + -if __name__ == "__main__": - import sys +if __name__ == '__main__': sys.exit(main()) diff --git a/scripts/gen_release_notes.py b/scripts/gen_release_notes.py deleted file mode 100644 index 7e4ffca59a0ab..0000000000000 --- a/scripts/gen_release_notes.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import print_function -import sys -import json -from pandas.io.common import urlopen -from datetime import datetime - - -class Milestone(object): - - def __init__(self, title, number): - self.title = title - self.number = number - - def __eq__(self, other): - if isinstance(other, Milestone): - return self.number == other.number - return False - - -class Issue(object): - - def __init__(self, title, labels, number, milestone, body, state): - self.title = title - self.labels = set([x['name'] for x in labels]) - self.number = number - self.milestone = milestone - self.body = body - self.closed = state == 'closed' - - def __eq__(self, other): - if isinstance(other, Issue): - return self.number == other.number - return False - - -def get_issues(): - all_issues = [] - page_number = 1 - while True: - iss = _get_page(page_number) - if len(iss) == 0: - break - page_number += 1 - all_issues.extend(iss) - return all_issues - - -def _get_page(page_number): - gh_url = ('https://api.github.com/repos/pandas-dev/pandas/issues?' - 'milestone=*&state=closed&assignee=*&page=%d') % page_number - with urlopen(gh_url) as resp: - rs = resp.readlines()[0] - jsondata = json.loads(rs) - issues = [Issue(x['title'], x['labels'], x['number'], - get_milestone(x['milestone']), x['body'], x['state']) - for x in jsondata] - return issues - - -def get_milestone(data): - if data is None: - return None - return Milestone(data['title'], data['number']) - - -def collate_label(issues, label): - lines = [] - for x in issues: - if label in x.labels: - lines.append('\t- %s(#%d)' % (x.title, x.number)) - - return '\n'.join(lines) - - -def release_notes(milestone): - issues = get_issues() - - headers = ['New Features', 'Improvements to existing features', - 'API Changes', 'Bug fixes'] - labels = ['New', 'Enhancement', 'API-Change', 'Bug'] - - rs = 'pandas %s' % milestone - rs += '\n' + ('=' * len(rs)) - rs += '\n\n **Release date:** %s' % datetime.today().strftime('%B %d, %Y') - for i, h in enumerate(headers): - rs += '\n\n**%s**\n\n' % h - l = labels[i] - rs += collate_label(issues, l) - - return rs - -if __name__ == '__main__': - - rs = release_notes(sys.argv[1]) - print(rs) diff --git a/scripts/git-mrb b/scripts/git-mrb deleted file mode 100644 index c15e6dbf9f51a..0000000000000 --- a/scripts/git-mrb +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python -"""git-mrb: merge remote branch. - -git mrb [remote:branch OR remote-branch] [onto] [upstream] - -remote must be locally available, and branch must exist in that remote. - -If 'onto' branch isn't given, default is 'master'. - -If 'upstream' repository isn't given, default is 'origin'. - -You can separate the remote and branch spec with either a : or a -. - -Taken from IPython project -""" -#----------------------------------------------------------------------------- -# Imports -#----------------------------------------------------------------------------- - -from subprocess import check_call -import sys - -#----------------------------------------------------------------------------- -# Functions -#----------------------------------------------------------------------------- - -def sh(cmd): - cmd = cmd.format(**shvars) - print('$', cmd) - check_call(cmd, shell=True) - -#----------------------------------------------------------------------------- -# Main Script -#----------------------------------------------------------------------------- - -argv = sys.argv[1:] -narg = len(argv) - -try: - branch_spec = argv[0] - sep = ':' if ':' in branch_spec else '-' - remote, branch = branch_spec.split(':', 1) - if not branch: - raise ValueError('Branch spec %s invalid, branch not found' % - branch_spec) -except: - import traceback as tb - tb.print_exc() - print(__doc__) - sys.exit(1) - -onto = argv[1] if narg >= 2 else 'master' -upstream = argv[1] if narg == 3 else 'origin' - -# Git doesn't like ':' in branch names. -if sep == ':': - branch_spec = branch_spec.replace(':', '-') - -# Global used by sh -shvars = dict(remote=remote, branch_spec=branch_spec, branch=branch, - onto=onto, upstream=upstream) - -# Start git calls. -sh('git fetch {remote}') -sh('git checkout -b {branch_spec} {onto}') -sh('git merge {remote}/{branch}') - -print(""" -************************************************************* - Run test suite. If tests pass, run the following to merge: - -git checkout {onto} -git merge {branch_spec} -git push {upstream} {onto} - -************************************************************* -""".format(**shvars)) - -ans = raw_input("Revert to master and delete temporary branch? [Y/n]: ") -if ans.strip().lower() in ('', 'y', 'yes'): - sh('git checkout {onto}') - sh('git branch -D {branch_spec}') \ No newline at end of file diff --git a/scripts/git_code_churn.py b/scripts/git_code_churn.py deleted file mode 100644 index 18c9b244a6ba0..0000000000000 --- a/scripts/git_code_churn.py +++ /dev/null @@ -1,34 +0,0 @@ -import subprocess -import os -import re -import sys - -import numpy as np - -from pandas import * - - -if __name__ == '__main__': - from vbench.git import GitRepo - repo = GitRepo('/Users/wesm/code/pandas') - churn = repo.get_churn_by_file() - - file_include = [] - for path in churn.major_axis: - if path.endswith('.pyx') or path.endswith('.py'): - file_include.append(path) - commits_include = [sha for sha in churn.minor_axis - if 'LF' not in repo.messages[sha]] - commits_include.remove('dcf3490') - - clean_churn = churn.reindex(major=file_include, minor=commits_include) - - by_commit = clean_churn.sum('major').sum(1) - - by_date = by_commit.groupby(repo.commit_date).sum() - - by_date = by_date.drop([datetime(2011, 6, 10)]) - - # clean out days where I touched Cython - - by_date = by_date[by_date < 5000] diff --git a/scripts/groupby_sample.py b/scripts/groupby_sample.py deleted file mode 100644 index 42008858d3cad..0000000000000 --- a/scripts/groupby_sample.py +++ /dev/null @@ -1,54 +0,0 @@ -from pandas import * -import numpy as np -import string -import pandas.compat as compat - -g1 = np.array(list(string.letters))[:-1] -g2 = np.arange(510) -df_small = DataFrame({'group1': ["a", "b", "a", "a", "b", "c", "c", "c", "c", - "c", "a", "a", "a", "b", "b", "b", "b"], - 'group2': [1, 2, 3, 4, 1, 3, 5, 6, 5, 4, 1, 2, 3, 4, 3, 2, 1], - 'value': ["apple", "pear", "orange", "apple", - "banana", "durian", "lemon", "lime", - "raspberry", "durian", "peach", "nectarine", - "banana", "lemon", "guava", "blackberry", - "grape"]}) -value = df_small['value'].values.repeat(3) -df = DataFrame({'group1': g1.repeat(4000 * 5), - 'group2': np.tile(g2, 400 * 5), - 'value': value.repeat(4000 * 5)}) - - -def random_sample(): - grouped = df.groupby(['group1', 'group2'])['value'] - from random import choice - choose = lambda group: choice(group.index) - indices = grouped.apply(choose) - return df.reindex(indices) - - -def random_sample_v2(): - grouped = df.groupby(['group1', 'group2'])['value'] - from random import choice - choose = lambda group: choice(group.index) - indices = [choice(v) for k, v in compat.iteritems(grouped.groups)] - return df.reindex(indices) - - -def do_shuffle(arr): - from random import shuffle - result = arr.copy().values - shuffle(result) - return result - - -def shuffle_uri(df, grouped): - perm = np.r_[tuple([np.random.permutation( - idxs) for idxs in compat.itervalues(grouped.groups)])] - df['state_permuted'] = np.asarray(df.ix[perm]['value']) - -df2 = df.copy() -grouped = df2.groupby('group1') -shuffle_uri(df2, grouped) - -df2['state_perm'] = grouped['value'].transform(do_shuffle) diff --git a/scripts/groupby_speed.py b/scripts/groupby_speed.py deleted file mode 100644 index 3be9fac12418e..0000000000000 --- a/scripts/groupby_speed.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import print_function -from pandas import * - -rng = DatetimeIndex('1/3/2011', '11/30/2011', offset=offsets.Minute()) - -df = DataFrame(np.random.randn(len(rng), 5), index=rng, - columns=list('OHLCV')) - -rng5 = DatetimeIndex('1/3/2011', '11/30/2011', offset=offsets.Minute(5)) -gp = rng5.asof -grouped = df.groupby(gp) - - -def get1(dt): - k = gp(dt) - return grouped.get_group(k) - - -def get2(dt): - k = gp(dt) - return df.ix[grouped.groups[k]] - - -def f(): - for i, date in enumerate(df.index): - if i % 10000 == 0: - print(i) - get1(date) - - -def g(): - for i, date in enumerate(df.index): - if i % 10000 == 0: - print(i) - get2(date) diff --git a/scripts/groupby_test.py b/scripts/groupby_test.py deleted file mode 100644 index 5acf7da7534a3..0000000000000 --- a/scripts/groupby_test.py +++ /dev/null @@ -1,145 +0,0 @@ -from collections import defaultdict - -from numpy import nan -import numpy as np - -from pandas import * - -import pandas.lib as tseries -import pandas.core.groupby as gp -import pandas.util.testing as tm -from pandas.compat import range -reload(gp) - -""" - -k = 1000 -values = np.random.randn(8 * k) -key1 = np.array(['foo', 'bar', 'baz', 'bar', 'foo', 'baz', 'bar', 'baz'] * k, - dtype=object) -key2 = np.array(['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a' ] * k, - dtype=object) -shape, labels, idicts = gp.labelize(key1, key2) - -print(tseries.group_labels(key1)) - -# print(shape) -# print(labels) -# print(idicts) - -result = tseries.group_aggregate(values, labels, shape) - -print(tseries.groupby_indices(key2)) - -df = DataFrame({'key1' : key1, - 'key2' : key2, - 'v1' : values, - 'v2' : values}) -k1 = df['key1'] -k2 = df['key2'] - -# del df['key1'] -# del df['key2'] - -# r2 = gp.multi_groupby(df, np.sum, k1, k2) - -# print(result) - -gen = gp.generate_groups(df['v1'], labels, shape, axis=1, - factory=DataFrame) - -res = defaultdict(dict) -for a, gen1 in gen: - for b, group in gen1: - print(a, b) - print(group) - # res[b][a] = group['values'].sum() - res[b][a] = group.sum() - -res = DataFrame(res) - -grouped = df.groupby(['key1', 'key2']) -""" - -# data = {'A' : [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], -# 'B' : ['A', 'B'] * 6, -# 'C' : np.random.randn(12)} -# df = DataFrame(data) -# df['C'][2:10:2] = nan - -# single column -# grouped = df.drop(['B'], axis=1).groupby('A') -# exp = {} -# for cat, group in grouped: -# exp[cat] = group['C'].sum() -# exp = DataFrame({'C' : exp}) -# result = grouped.sum() - -# grouped = df.groupby(['A', 'B']) -# expd = {} -# for cat1, cat2, group in grouped: -# expd.setdefault(cat1, {})[cat2] = group['C'].sum() -# exp = DataFrame(expd).T.stack() -# result = grouped.sum()['C'] - -# print('wanted') -# print(exp) -# print('got') -# print(result) - -# tm.N = 10000 - -# mapping = {'A': 0, 'C': 1, 'B': 0, 'D': 1} -# tf = lambda x: x - x.mean() - -# df = tm.makeTimeDataFrame() -# ts = df['A'] - -# # grouped = df.groupby(lambda x: x.strftime('%m/%y')) -# grouped = df.groupby(mapping, axis=1) -# groupedT = df.T.groupby(mapping, axis=0) - -# r1 = groupedT.transform(tf).T -# r2 = grouped.transform(tf) - -# fillit = lambda x: x.fillna(method='pad') - -# f = lambda x: x - -# transformed = df.groupby(lambda x: x.strftime('%m/%y')).transform(lambda -# x: x) - -# def ohlc(group): -# return Series([group[0], group.max(), group.min(), group[-1]], -# index=['open', 'high', 'low', 'close']) -# grouper = [lambda x: x.year, lambda x: x.month] -# dr = DateRange('1/1/2000', '1/1/2002') -# ts = Series(np.random.randn(len(dr)), index=dr) - -# import string - -# k = 20 -# n = 1000 - -# keys = list(string.letters[:k]) - -# df = DataFrame({'A' : np.tile(keys, n), -# 'B' : np.repeat(keys[:k/2], n * 2), -# 'C' : np.random.randn(k * n)}) - -# def f(): -# for x in df.groupby(['A', 'B']): -# pass - -a = np.arange(100).repeat(100) -b = np.tile(np.arange(100), 100) -index = MultiIndex.from_arrays([a, b]) -s = Series(np.random.randn(len(index)), index) -df = DataFrame({'A': s}) -df['B'] = df.index.get_level_values(0) -df['C'] = df.index.get_level_values(1) - - -def f(): - for x in df.groupby(['B', 'B']): - pass diff --git a/scripts/hdfstore_panel_perf.py b/scripts/hdfstore_panel_perf.py deleted file mode 100644 index c66e9506fc4c5..0000000000000 --- a/scripts/hdfstore_panel_perf.py +++ /dev/null @@ -1,17 +0,0 @@ -from pandas import * -from pandas.util.testing import rands -from pandas.compat import range - -i, j, k = 7, 771, 5532 - -panel = Panel(np.random.randn(i, j, k), - items=[rands(10) for _ in range(i)], - major_axis=DatetimeIndex('1/1/2000', periods=j, - offset=offsets.Minute()), - minor_axis=[rands(10) for _ in range(k)]) - - -store = HDFStore('test.h5') -store.put('test_panel', panel, table=True) - -retrieved = store['test_panel'] diff --git a/scripts/json_manip.py b/scripts/json_manip.py deleted file mode 100644 index 7ff4547825568..0000000000000 --- a/scripts/json_manip.py +++ /dev/null @@ -1,423 +0,0 @@ -""" - -Tasks -------- - -Search and transform jsonable structures, specifically to make it 'easy' to make tabular/csv output for other consumers. - -Example -~~~~~~~~~~~~~ - - *give me a list of all the fields called 'id' in this stupid, gnarly - thing* - - >>> Q('id',gnarly_data) - ['id1','id2','id3'] - - -Observations: ---------------------- - -1) 'simple data structures' exist and are common. They are tedious - to search. - -2) The DOM is another nested / treeish structure, and jQuery selector is - a good tool for that. - -3a) R, Numpy, Excel and other analysis tools want 'tabular' data. These - analyses are valuable and worth doing. - -3b) Dot/Graphviz, NetworkX, and some other analyses *like* treeish/dicty - things, and those analyses are also worth doing! - -3c) Some analyses are best done using 'one-off' and custom code in C, Python, - or another 'real' programming language. - -4) Arbitrary transforms are tedious and error prone. SQL is one solution, - XSLT is another, - -5) the XPATH/XML/XSLT family is.... not universally loved :) They are - very complete, and the completeness can make simple cases... gross. - -6) For really complicated data structures, we can write one-off code. Getting - 80% of the way is mostly okay. There will always have to be programmers - in the loop. - -7) Re-inventing SQL is probably a failure mode. So is reinventing XPATH, XSLT - and the like. Be wary of mission creep! Re-use when possible (e.g., can - we put the thing into a DOM using - -8) If the interface is good, people can improve performance later. - - -Simplifying ---------------- - - -1) Assuming 'jsonable' structures - -2) keys are strings or stringlike. Python allows any hashable to be a key. - for now, we pretend that doesn't happen. - -3) assumes most dicts are 'well behaved'. DAG, no cycles! - -4) assume that if people want really specialized transforms, they can do it - themselves. - -""" -from __future__ import print_function - -from collections import namedtuple -import csv -import itertools -from itertools import product -from operator import attrgetter as aget, itemgetter as iget -import operator -import sys -from pandas.compat import map, u, callable, Counter -import pandas.compat as compat - - -## note 'url' appears multiple places and not all extensions have same struct -ex1 = { - 'name': 'Gregg', - 'extensions': [ - {'id':'hello', - 'url':'url1'}, - {'id':'gbye', - 'url':'url2', - 'more': dict(url='url3')}, - ] -} - -## much longer example -ex2 = {u('metadata'): {u('accessibilities'): [{u('name'): u('accessibility.tabfocus'), - u('value'): 7}, - {u('name'): u('accessibility.mouse_focuses_formcontrol'), u('value'): False}, - {u('name'): u('accessibility.browsewithcaret'), u('value'): False}, - {u('name'): u('accessibility.win32.force_disabled'), u('value'): False}, - {u('name'): u('accessibility.typeaheadfind.startlinksonly'), u('value'): False}, - {u('name'): u('accessibility.usebrailledisplay'), u('value'): u('')}, - {u('name'): u('accessibility.typeaheadfind.timeout'), u('value'): 5000}, - {u('name'): u('accessibility.typeaheadfind.enabletimeout'), u('value'): True}, - {u('name'): u('accessibility.tabfocus_applies_to_xul'), u('value'): False}, - {u('name'): u('accessibility.typeaheadfind.flashBar'), u('value'): 1}, - {u('name'): u('accessibility.typeaheadfind.autostart'), u('value'): True}, - {u('name'): u('accessibility.blockautorefresh'), u('value'): False}, - {u('name'): u('accessibility.browsewithcaret_shortcut.enabled'), - u('value'): True}, - {u('name'): u('accessibility.typeaheadfind.enablesound'), u('value'): True}, - {u('name'): u('accessibility.typeaheadfind.prefillwithselection'), - u('value'): True}, - {u('name'): u('accessibility.typeaheadfind.soundURL'), u('value'): u('beep')}, - {u('name'): u('accessibility.typeaheadfind'), u('value'): False}, - {u('name'): u('accessibility.typeaheadfind.casesensitive'), u('value'): 0}, - {u('name'): u('accessibility.warn_on_browsewithcaret'), u('value'): True}, - {u('name'): u('accessibility.usetexttospeech'), u('value'): u('')}, - {u('name'): u('accessibility.accesskeycausesactivation'), u('value'): True}, - {u('name'): u('accessibility.typeaheadfind.linksonly'), u('value'): False}, - {u('name'): u('isInstantiated'), u('value'): True}], - u('extensions'): [{u('id'): u('216ee7f7f4a5b8175374cd62150664efe2433a31'), - u('isEnabled'): True}, - {u('id'): u('1aa53d3b720800c43c4ced5740a6e82bb0b3813e'), u('isEnabled'): False}, - {u('id'): u('01ecfac5a7bd8c9e27b7c5499e71c2d285084b37'), u('isEnabled'): True}, - {u('id'): u('1c01f5b22371b70b312ace94785f7b0b87c3dfb2'), u('isEnabled'): True}, - {u('id'): u('fb723781a2385055f7d024788b75e959ad8ea8c3'), u('isEnabled'): True}], - u('fxVersion'): u('9.0'), - u('location'): u('zh-CN'), - u('operatingSystem'): u('WINNT Windows NT 5.1'), - u('surveyAnswers'): u(''), - u('task_guid'): u('d69fbd15-2517-45b5-8a17-bb7354122a75'), - u('tpVersion'): u('1.2'), - u('updateChannel'): u('beta')}, - u('survey_data'): { - u('extensions'): [{u('appDisabled'): False, - u('id'): u('testpilot?labs.mozilla.com'), - u('isCompatible'): True, - u('isEnabled'): True, - u('isPlatformCompatible'): True, - u('name'): u('Test Pilot')}, - {u('appDisabled'): True, - u('id'): u('dict?www.youdao.com'), - u('isCompatible'): False, - u('isEnabled'): False, - u('isPlatformCompatible'): True, - u('name'): u('Youdao Word Capturer')}, - {u('appDisabled'): False, - u('id'): u('jqs?sun.com'), - u('isCompatible'): True, - u('isEnabled'): True, - u('isPlatformCompatible'): True, - u('name'): u('Java Quick Starter')}, - {u('appDisabled'): False, - u('id'): u('?20a82645-c095-46ed-80e3-08825760534b?'), - u('isCompatible'): True, - u('isEnabled'): True, - u('isPlatformCompatible'): True, - u('name'): u('Microsoft .NET Framework Assistant')}, - {u('appDisabled'): False, - u('id'): u('?a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7?'), - u('isCompatible'): True, - u('isEnabled'): True, - u('isPlatformCompatible'): True, - u('name'): u('WOT')}], - u('version_number'): 1}} - -# class SurveyResult(object): - -# def __init__(self, record): -# self.record = record -# self.metadata, self.survey_data = self._flatten_results() - -# def _flatten_results(self): -# survey_data = self.record['survey_data'] -# extensions = DataFrame(survey_data['extensions']) - -def denorm(queries,iterable_of_things,default=None): - """ - 'repeat', or 'stutter' to 'tableize' for downstream. - (I have no idea what a good word for this is!) - - Think ``kronecker`` products, or: - - ``SELECT single,multiple FROM table;`` - - single multiple - ------- --------- - id1 val1 - id1 val2 - - - Args: - - queries: iterable of ``Q`` queries. - iterable_of_things: to be queried. - - Returns: - - list of 'stuttered' output, where if a query returns - a 'single', it gets repeated appropriately. - - - """ - - def _denorm(queries,thing): - fields = [] - results = [] - for q in queries: - #print(q) - r = Ql(q,thing) - #print("-- result: ", r) - if not r: - r = [default] - if isinstance(r[0], type({})): - fields.append(sorted(r[0].keys())) # dicty answers - else: - fields.append([q]) # stringy answer - - results.append(r) - - #print(results) - #print(fields) - flist = list(flatten(*map(iter,fields))) - - prod = itertools.product(*results) - for p in prod: - U = dict() - for (ii,thing) in enumerate(p): - #print(ii,thing) - if isinstance(thing, type({})): - U.update(thing) - else: - U[fields[ii][0]] = thing - - yield U - - return list(flatten(*[_denorm(queries,thing) for thing in iterable_of_things])) - - -def default_iget(fields,default=None,): - """ itemgetter with 'default' handling, that *always* returns lists - - API CHANGES from ``operator.itemgetter`` - - Note: Sorry to break the iget api... (fields vs *fields) - Note: *always* returns a list... unlike itemgetter, - which can return tuples or 'singles' - """ - myiget = operator.itemgetter(*fields) - L = len(fields) - def f(thing): - try: - ans = list(myiget(thing)) - if L < 2: - ans = [ans,] - return ans - except KeyError: - # slower! - return [thing.get(x,default) for x in fields] - - f.__doc__ = "itemgetter with default %r for fields %r" %(default,fields) - f.__name__ = "default_itemgetter" - return f - - -def flatten(*stack): - """ - helper function for flattening iterables of generators in a - sensible way. - """ - stack = list(stack) - while stack: - try: x = next(stack[0]) - except StopIteration: - stack.pop(0) - continue - if hasattr(x,'next') and callable(getattr(x,'next')): - stack.insert(0, x) - - #if isinstance(x, (GeneratorType,listerator)): - else: yield x - - -def _Q(filter_, thing): - """ underlying machinery for Q function recursion """ - T = type(thing) - if isinstance({}, T): - for k,v in compat.iteritems(thing): - #print(k,v) - if filter_ == k: - if isinstance(v, type([])): - yield iter(v) - else: - yield v - - if type(v) in (type({}),type([])): - yield Q(filter_,v) - - elif isinstance([], T): - for k in thing: - #print(k) - yield Q(filter_,k) - - else: - # no recursion. - pass - -def Q(filter_,thing): - """ - type(filter): - - list: a flattened list of all searches (one list) - - dict: dict with vals each of which is that search - - Notes: - - [1] 'parent thing', with space, will do a descendent - [2] this will come back 'flattened' jQuery style - [3] returns a generator. Use ``Ql`` if you want a list. - - """ - if isinstance(filter_, type([])): - return flatten(*[_Q(x,thing) for x in filter_]) - elif isinstance(filter_, type({})): - d = dict.fromkeys(list(filter_.keys())) - #print(d) - for k in d: - #print(flatten(Q(k,thing))) - d[k] = Q(k,thing) - - return d - - else: - if " " in filter_: # i.e. "antecendent post" - parts = filter_.strip().split() - r = None - for p in parts: - r = Ql(p,thing) - thing = r - - return r - - else: # simple. - return flatten(_Q(filter_,thing)) - -def Ql(filter_,thing): - """ same as Q, but returns a list, not a generator """ - res = Q(filter_,thing) - - if isinstance(filter_, type({})): - for k in res: - res[k] = list(res[k]) - return res - - else: - return list(res) - - - -def countit(fields,iter_of_iter,default=None): - """ - note: robust to fields not being in i_of_i, using ``default`` - """ - C = Counter() # needs hashables - T = namedtuple("Thing",fields) - get = default_iget(*fields,default=default) - return Counter( - (T(*get(thing)) for thing in iter_of_iter) - ) - - -## right now this works for one row... -def printout(queries,things,default=None, f=sys.stdout, **kwargs): - """ will print header and objects - - **kwargs go to csv.DictWriter - - help(csv.DictWriter) for more. - """ - - results = denorm(queries,things,default=None) - fields = set(itertools.chain(*(x.keys() for x in results))) - - W = csv.DictWriter(f=f,fieldnames=fields,**kwargs) - #print("---prod---") - #print(list(prod)) - W.writeheader() - for r in results: - W.writerow(r) - - -def test_run(): - print("\n>>> print(list(Q('url',ex1)))") - print(list(Q('url',ex1))) - assert list(Q('url',ex1)) == ['url1','url2','url3'] - assert Ql('url',ex1) == ['url1','url2','url3'] - - print("\n>>> print(list(Q(['name','id'],ex1)))") - print(list(Q(['name','id'],ex1))) - assert Ql(['name','id'],ex1) == ['Gregg','hello','gbye'] - - - print("\n>>> print(Ql('more url',ex1))") - print(Ql('more url',ex1)) - - - print("\n>>> list(Q('extensions',ex1))") - print(list(Q('extensions',ex1))) - - print("\n>>> print(Ql('extensions',ex1))") - print(Ql('extensions',ex1)) - - print("\n>>> printout(['name','extensions'],[ex1,], extrasaction='ignore')") - printout(['name','extensions'],[ex1,], extrasaction='ignore') - - print("\n\n") - - from pprint import pprint as pp - - print("-- note that the extension fields are also flattened! (and N/A) -- ") - pp(denorm(['location','fxVersion','notthere','survey_data extensions'],[ex2,], default="N/A")[:2]) - - -if __name__ == "__main__": - pass diff --git a/scripts/leak.py b/scripts/leak.py deleted file mode 100644 index 47f74bf020597..0000000000000 --- a/scripts/leak.py +++ /dev/null @@ -1,13 +0,0 @@ -from pandas import * -from pandas.compat import range -import numpy as np -import pandas.util.testing as tm -import os -import psutil - -pid = os.getpid() -proc = psutil.Process(pid) - -df = DataFrame(index=np.arange(100)) -for i in range(5000): - df[i] = 5 diff --git a/scripts/list_future_warnings.sh b/scripts/list_future_warnings.sh new file mode 100755 index 0000000000000..0c4046bbb5f49 --- /dev/null +++ b/scripts/list_future_warnings.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# Check all future warnings in Python files, and report them with the version +# where the FutureWarning was added. +# +# This is useful to detect features that have been deprecated, and should be +# removed from the code. For example, if a line of code contains: +# +# warning.warn('Method deprecated', FutureWarning, stacklevel=2) +# +# Which is released in Pandas 0.20.0, then it is expected that the method +# is removed before releasing Pandas 0.24.0, including the warning. If it +# is not, this script will list this line, with the version 0.20.0, which +# will make it easy to detect that it had to be removed. +# +# In some cases this script can return false positives, for example in files +# where FutureWarning is used to detect deprecations, or similar. The EXCLUDE +# variable can be used to ignore files that use FutureWarning, but do not +# deprecate functionality. +# +# Usage: +# +# $ ./list_future_warnings.sh + +EXCLUDE="^pandas/tests/|" # tests validate that FutureWarnings are raised +EXCLUDE+="^pandas/util/_decorators.py$|" # generic deprecate function that raises warning +EXCLUDE+="^pandas/util/_depr_module.py$|" # generic deprecate module that raises warnings +EXCLUDE+="^pandas/util/testing.py$|" # contains function to evaluate if warning is raised +EXCLUDE+="^pandas/io/parsers.py$" # implements generic deprecation system in io reading + +BASE_DIR="$(dirname $0)/.." +cd $BASE_DIR +FILES=`grep -RIl "FutureWarning" pandas/* | grep -vE "$EXCLUDE"` +OUTPUT=() +IFS=$'\n' + +for FILE in $FILES; do + FILE_LINES=`git blame -sf $FILE | grep FutureWarning | tr -s " " | cut -d " " -f1,3` + for FILE_LINE in $FILE_LINES; do + TAG=$(git tag --contains $(echo $FILE_LINE | cut -d" " -f1) | head -n1) + OUTPUT_ROW=`printf "%-14s %-16s %s" ${TAG:-"(not released)"} $FILE_LINE $FILE` + OUTPUT+=($OUTPUT_ROW) + done +done + +printf "%s\n" "${OUTPUT[@]}" | sort -V diff --git a/scripts/merge-py.py b/scripts/merge-pr.py similarity index 80% rename from scripts/merge-py.py rename to scripts/merge-pr.py index b9350f8feceb8..31264cad52e4f 100755 --- a/scripts/merge-py.py +++ b/scripts/merge-pr.py @@ -22,7 +22,6 @@ # usage: ./apache-pr-merge.py (see config env vars below) # # Lightly modified from version of this script in incubator-parquet-format - from __future__ import print_function from subprocess import check_output @@ -99,6 +98,14 @@ def continue_maybe(prompt): fail("Okay, exiting") +def continue_maybe2(prompt): + result = input("\n%s (y/n): " % prompt) + if result.lower() != "y": + return False + else: + return True + + original_head = run_cmd("git rev-parse HEAD")[:8] @@ -152,7 +159,7 @@ def merge_pr(pr_num, target_ref): if body is not None: merge_message_flags += ["-m", '\n'.join(textwrap.wrap(body))] - authors = "\n".join(["Author: %s" % a for a in distinct_authors]) + authors = "\n".join("Author: %s" % a for a in distinct_authors) merge_message_flags += ["-m", authors] @@ -193,6 +200,40 @@ def merge_pr(pr_num, target_ref): return merge_hash +def update_pr(pr_num, user_login, base_ref): + + pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num) + + run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, + pr_branch_name)) + run_cmd("git checkout %s" % pr_branch_name) + + continue_maybe("Update ready (local ref %s)? Push to %s/%s?" % ( + pr_branch_name, user_login, base_ref)) + + push_user_remote = "https://github.com/%s/pandas.git" % user_login + + try: + run_cmd('git push %s %s:%s' % (push_user_remote, pr_branch_name, + base_ref)) + except Exception as e: + + if continue_maybe2("Force push?"): + try: + run_cmd( + 'git push -f %s %s:%s' % (push_user_remote, pr_branch_name, + base_ref)) + except Exception as e: + fail("Exception while pushing: %s" % e) + clean_up() + else: + fail("Exception while pushing: %s" % e) + clean_up() + + clean_up() + print("Pull request #%s updated!" % pr_num) + + def cherry_pick(pr_num, merge_hash, default_branch): pick_ref = input("Enter a branch name [%s]: " % default_branch) if pick_ref == "": @@ -233,6 +274,7 @@ def fix_version_from_branch(branch, versions): branch_ver = branch.replace("branch-", "") return filter(lambda x: x.name.startswith(branch_ver), versions)[-1] + pr_num = input("Which pull request would you like to merge? (e.g. 34): ") pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) @@ -255,10 +297,25 @@ def fix_version_from_branch(branch, versions): continue_maybe(msg) print("\n=== Pull Request #%s ===" % pr_num) -print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" - % (title, pr_repo_desc, target_ref, url)) -continue_maybe("Proceed with merging pull request #%s?" % pr_num) + +# we may have un-printable unicode in our title +try: + title = title.encode('raw_unicode_escape') +except Exception: + pass + +print("title\t{title}\nsource\t{source}\ntarget\t{target}\nurl\t{url}".format( + title=title, source=pr_repo_desc, target=target_ref, url=url)) + merged_refs = [target_ref] -merge_hash = merge_pr(pr_num, target_ref) +print("\nProceed with updating or merging pull request #%s?" % pr_num) +update = input("Update PR and push to remote (r), merge locally (l), " + "or do nothing (n) ?") +update = update.lower() + +if update == 'r': + merge_hash = update_pr(pr_num, user_login, base_ref) +elif update == 'l': + merge_hash = merge_pr(pr_num, target_ref) diff --git a/scripts/parser_magic.py b/scripts/parser_magic.py deleted file mode 100644 index 72fef39d8db65..0000000000000 --- a/scripts/parser_magic.py +++ /dev/null @@ -1,74 +0,0 @@ -from pandas.util.testing import set_trace -import pandas.util.testing as tm -import pandas.compat as compat - -from pandas import * -import ast -import inspect -import sys - - -def merge(a, b): - f, args, _ = parse_stmt(inspect.currentframe().f_back) - return DataFrame({args[0]: a, - args[1]: b}) - - -def parse_stmt(frame): - info = inspect.getframeinfo(frame) - call = info[-2][0] - mod = ast.parse(call) - body = mod.body[0] - if isinstance(body, (ast.Assign, ast.Expr)): - call = body.value - elif isinstance(body, ast.Call): - call = body - return _parse_call(call) - - -def _parse_call(call): - func = _maybe_format_attribute(call.func) - - str_args = [] - for arg in call.args: - if isinstance(arg, ast.Name): - str_args.append(arg.id) - elif isinstance(arg, ast.Call): - formatted = _format_call(arg) - str_args.append(formatted) - - return func, str_args, {} - - -def _format_call(call): - func, args, kwds = _parse_call(call) - content = '' - if args: - content += ', '.join(args) - if kwds: - fmt_kwds = ['%s=%s' % item for item in compat.iteritems(kwds)] - joined_kwds = ', '.join(fmt_kwds) - if args: - content = content + ', ' + joined_kwds - else: - content += joined_kwds - return '%s(%s)' % (func, content) - - -def _maybe_format_attribute(name): - if isinstance(name, ast.Attribute): - return _format_attribute(name) - return name.id - - -def _format_attribute(attr): - obj = attr.value - if isinstance(attr.value, ast.Attribute): - obj = _format_attribute(attr.value) - else: - obj = obj.id - return '.'.join((obj, attr.attr)) - -a = tm.makeTimeSeries() -b = tm.makeTimeSeries() -df = merge(a, b) diff --git a/scripts/preepoch_test.py b/scripts/preepoch_test.py deleted file mode 100644 index 36a3d768e671f..0000000000000 --- a/scripts/preepoch_test.py +++ /dev/null @@ -1,23 +0,0 @@ -import numpy as np -from pandas import * - - -def panda_test(): - - # generate some data - data = np.random.rand(50, 5) - # generate some dates - dates = DatetimeIndex('1/1/1969', periods=50) - # generate column headings - cols = ['A', 'B', 'C', 'D', 'E'] - - df = DataFrame(data, index=dates, columns=cols) - - # save to HDF5Store - store = HDFStore('bugzilla.h5', mode='w') - store['df'] = df # This gives: OverflowError: mktime argument out of range - store.close() - - -if __name__ == '__main__': - panda_test() diff --git a/scripts/pypistats.py b/scripts/pypistats.py deleted file mode 100644 index 41343f6d30c76..0000000000000 --- a/scripts/pypistats.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Calculates the total number of downloads that a particular PyPI package has -received across all versions tracked by PyPI -""" - -from datetime import datetime -import locale -import sys -import xmlrpclib -import pandas as pd - -locale.setlocale(locale.LC_ALL, '') - - -class PyPIDownloadAggregator(object): - - def __init__(self, package_name, include_hidden=True): - self.package_name = package_name - self.include_hidden = include_hidden - self.proxy = xmlrpclib.Server('http://pypi.python.org/pypi') - self._downloads = {} - - @property - def releases(self): - """Retrieves the release number for each uploaded release""" - - result = self.proxy.package_releases(self.package_name, - self.include_hidden) - - if len(result) == 0: - # no matching package--search for possibles, and limit to 15 - # results - results = self.proxy.search({ - 'name': self.package_name, - 'description': self.package_name - }, 'or')[:15] - - # make sure we only get unique package names - matches = [] - for match in results: - name = match['name'] - if name not in matches: - matches.append(name) - - # if only one package was found, return it - if len(matches) == 1: - self.package_name = matches[0] - return self.releases - - error = """No such package found: %s - -Possible matches include: -%s -""" % (self.package_name, '\n'.join('\t- %s' % n for n in matches)) - - sys.exit(error) - - return result - - def get_downloads(self): - """Calculate the total number of downloads for the package""" - downloads = {} - for release in self.releases: - urls = self.proxy.release_urls(self.package_name, release) - urls = pd.DataFrame(urls) - urls['version'] = release - downloads[release] = urls - - return pd.concat(downloads, ignore_index=True) - -if __name__ == '__main__': - agg = PyPIDownloadAggregator('pandas') - - data = agg.get_downloads() - - to_omit = ['0.2b1', '0.2beta'] - - isostrings = data['upload_time'].map(lambda x: x.value) - data['upload_time'] = pd.to_datetime(isostrings) - - totals = data.groupby('version').downloads.sum() - rollup = {'0.8.0rc1': '0.8.0', - '0.8.0rc2': '0.8.0', - '0.3.0.beta': '0.3.0', - '0.3.0.beta2': '0.3.0'} - downloads = totals.groupby(lambda x: rollup.get(x, x)).sum() - - first_upload = data.groupby('version').upload_time.min() - - result = pd.DataFrame({'downloads': totals, - 'release_date': first_upload}) - result = result.sort('release_date') - result = result.drop(to_omit + list(rollup.keys())) - result.index.name = 'release' - - by_date = result.reset_index().set_index('release_date').downloads - dummy = pd.Series(index=pd.DatetimeIndex([datetime(2012, 12, 27)])) - by_date = by_date.append(dummy).shift(1).fillna(0) diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py deleted file mode 100644 index 07161cc6499bf..0000000000000 --- a/scripts/roll_median_leak.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import print_function -from pandas import * - -import numpy as np -import os - -from vbench.api import Benchmark -from pandas.util.testing import rands -from pandas.compat import range -import pandas.lib as lib -import pandas._sandbox as sbx -import time - -import psutil - -pid = os.getpid() -proc = psutil.Process(pid) - -lst = SparseList() -lst.append([5] * 10000) -lst.append(np.repeat(np.nan, 1000000)) - -for _ in range(10000): - print(proc.get_memory_info()) - sdf = SparseDataFrame({'A': lst.to_array()}) - chunk = sdf[sdf['A'] == 5] diff --git a/scripts/runtests.py b/scripts/runtests.py deleted file mode 100644 index e14752b43116b..0000000000000 --- a/scripts/runtests.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import print_function -import os -print(os.getpid()) -import nose -nose.main('pandas.core') diff --git a/scripts/test_py27.bat b/scripts/test_py27.bat deleted file mode 100644 index 11e3056287e31..0000000000000 --- a/scripts/test_py27.bat +++ /dev/null @@ -1,6 +0,0 @@ -SET PATH=C:\MinGW\bin;C:\Python27;C:\Python27\Scripts;%PATH% - -python setup.py clean -python setup.py build_ext -c mingw32 --inplace - -nosetests pandas \ No newline at end of file diff --git a/scripts/testmed.py b/scripts/testmed.py deleted file mode 100644 index dd3b952d58c60..0000000000000 --- a/scripts/testmed.py +++ /dev/null @@ -1,171 +0,0 @@ -## {{{ Recipe 576930 (r10): Efficient Running Median using an Indexable Skiplist - -from random import random -from math import log, ceil -from pandas.compat import range -from numpy.random import randn -from pandas.lib.skiplist import rolling_median - - -class Node(object): - __slots__ = 'value', 'next', 'width' - - def __init__(self, value, next, width): - self.value, self.next, self.width = value, next, width - - -class End(object): - 'Sentinel object that always compares greater than another object' - def __cmp__(self, other): - return 1 - -NIL = Node(End(), [], []) # Singleton terminator node - - -class IndexableSkiplist: - 'Sorted collection supporting O(lg n) insertion, removal, and lookup by rank.' - - def __init__(self, expected_size=100): - self.size = 0 - self.maxlevels = int(1 + log(expected_size, 2)) - self.head = Node('HEAD', [NIL] * self.maxlevels, [1] * self.maxlevels) - - def __len__(self): - return self.size - - def __getitem__(self, i): - node = self.head - i += 1 - for level in reversed(range(self.maxlevels)): - while node.width[level] <= i: - i -= node.width[level] - node = node.next[level] - return node.value - - def insert(self, value): - # find first node on each level where node.next[levels].value > value - chain = [None] * self.maxlevels - steps_at_level = [0] * self.maxlevels - node = self.head - for level in reversed(range(self.maxlevels)): - while node.next[level].value <= value: - steps_at_level[level] += node.width[level] - node = node.next[level] - chain[level] = node - - # insert a link to the newnode at each level - d = min(self.maxlevels, 1 - int(log(random(), 2.0))) - newnode = Node(value, [None] * d, [None] * d) - steps = 0 - for level in range(d): - prevnode = chain[level] - newnode.next[level] = prevnode.next[level] - prevnode.next[level] = newnode - newnode.width[level] = prevnode.width[level] - steps - prevnode.width[level] = steps + 1 - steps += steps_at_level[level] - for level in range(d, self.maxlevels): - chain[level].width[level] += 1 - self.size += 1 - - def remove(self, value): - # find first node on each level where node.next[levels].value >= value - chain = [None] * self.maxlevels - node = self.head - for level in reversed(range(self.maxlevels)): - while node.next[level].value < value: - node = node.next[level] - chain[level] = node - if value != chain[0].next[0].value: - raise KeyError('Not Found') - - # remove one link at each level - d = len(chain[0].next[0].next) - for level in range(d): - prevnode = chain[level] - prevnode.width[level] += prevnode.next[level].width[level] - 1 - prevnode.next[level] = prevnode.next[level].next[level] - for level in range(d, self.maxlevels): - chain[level].width[level] -= 1 - self.size -= 1 - - def __iter__(self): - 'Iterate over values in sorted order' - node = self.head.next[0] - while node is not NIL: - yield node.value - node = node.next[0] - -from collections import deque -from itertools import islice - - -class RunningMedian: - 'Fast running median with O(lg n) updates where n is the window size' - - def __init__(self, n, iterable): - from pandas.lib.skiplist import IndexableSkiplist as skiplist - - self.it = iter(iterable) - self.queue = deque(islice(self.it, n)) - self.skiplist = IndexableSkiplist(n) - for elem in self.queue: - self.skiplist.insert(elem) - - def __iter__(self): - queue = self.queue - skiplist = self.skiplist - midpoint = len(queue) // 2 - yield skiplist[midpoint] - for newelem in self.it: - oldelem = queue.popleft() - skiplist.remove(oldelem) - queue.append(newelem) - skiplist.insert(newelem) - yield skiplist[midpoint] - -N = 100000 -K = 10000 - -import time - - -def test(): - from numpy.random import randn - - arr = randn(N) - - def _test(arr, k): - meds = RunningMedian(k, arr) - return list(meds) - - _test(arr, K) - - - -def test2(): - - arr = randn(N) - - return rolling_median(arr, K) - - -def runmany(f, arr, arglist): - timings = [] - - for arg in arglist: - tot = 0 - for i in range(5): - tot += _time(f, arr, arg) - timings.append(tot / 5) - - return timings - - -def _time(f, *args): - _start = time.clock() - result = f(*args) - return time.clock() - _start - -if __name__ == '__main__': - test2() diff --git a/scripts/touchup_gh_issues.py b/scripts/touchup_gh_issues.py deleted file mode 100755 index 8aa6d426156f0..0000000000000 --- a/scripts/touchup_gh_issues.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function -from collections import OrderedDict -import sys -import re - -""" -Reads in stdin, replace all occurences of '#num' or 'GH #num' with -links to github issue. dumps the issue anchors before the next -section header -""" - -pat = "((?:\s*GH\s*)?)#(\d{3,4})([^_]|$)?" -rep_pat = r"\1GH\2_\3" -anchor_pat = ".. _GH{id}: https://github.com/pandas-dev/pandas/issues/{id}" -section_pat = "^pandas\s[\d\.]+\s*$" - - -def main(): - issues = OrderedDict() - while True: - - line = sys.stdin.readline() - if not line: - break - - if re.search(section_pat, line): - for id in issues: - print(anchor_pat.format(id=id).rstrip()) - if issues: - print("\n") - issues = OrderedDict() - - for m in re.finditer(pat, line): - id = m.group(2) - if id not in issues: - issues[id] = True - print(re.sub(pat, rep_pat, line).rstrip()) - pass - -if __name__ == "__main__": - main() diff --git a/scripts/use_build_cache.py b/scripts/use_build_cache.py deleted file mode 100755 index f8c2df2a8a45d..0000000000000 --- a/scripts/use_build_cache.py +++ /dev/null @@ -1,354 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import os - -""" -This script should be run from the repo root dir, it rewrites setup.py -to use the build cache directory specified in the envar BUILD_CACHE_DIR -or in a file named .build_cache_dir in the repo root directory. - -Artifacts included in the cache: -- gcc artifacts -- The .c files resulting from cythonizing pyx/d files -- 2to3 refactoring results (when run under python3) - -Tested on releases back to 0.7.0. - -""" - -try: - import argparse - argparser = argparse.ArgumentParser(description=""" - 'Program description. - """.strip()) - - argparser.add_argument('-f', '--force-overwrite', - default=False, - help='Setting this will overwrite any existing cache results for the current commit', - action='store_true') - argparser.add_argument('-d', '--debug', - default=False, - help='Report cache hits/misses', - action='store_true') - - args = argparser.parse_args() -except: - class Foo(object): - debug=False - force_overwrite=False - - args = Foo() # for 2.6, no argparse - -#print(args.accumulate(args.integers)) - -shim=""" -import os -import sys -import shutil -import warnings -import re -""" - -shim += ("BC_FORCE_OVERWRITE = %s\n" % args.force_overwrite) -shim += ("BC_DEBUG = %s\n" % args.debug) - -shim += """ -try: - if not ("develop" in sys.argv) and not ("install" in sys.argv): - 1/0 - basedir = os.path.dirname(__file__) - dotfile = os.path.join(basedir,".build_cache_dir") - BUILD_CACHE_DIR = "" - if os.path.exists(dotfile): - BUILD_CACHE_DIR = open(dotfile).readline().strip() - BUILD_CACHE_DIR = os.environ.get('BUILD_CACHE_DIR',BUILD_CACHE_DIR) - - if os.path.isdir(BUILD_CACHE_DIR): - print("--------------------------------------------------------") - print("BUILD CACHE ACTIVATED (V2). be careful, this is experimental.") - print("BUILD_CACHE_DIR: " + BUILD_CACHE_DIR ) - print("--------------------------------------------------------") - else: - BUILD_CACHE_DIR = None - - # retrieve 2to3 artifacts - if sys.version_info[0] >= 3: - from lib2to3 import refactor - from hashlib import sha1 - import shutil - import multiprocessing - pyver = "%d.%d" % (sys.version_info[:2]) - fileq = ["pandas"] - to_process = dict() - - # retrieve the hashes existing in the cache - orig_hashes=dict() - post_hashes=dict() - for path,dirs,files in os.walk(os.path.join(BUILD_CACHE_DIR,'pandas')): - for f in files: - s=f.split(".py-")[-1] - try: - prev_h,post_h,ver = s.split('-') - if ver == pyver: - orig_hashes[prev_h] = os.path.join(path,f) - post_hashes[post_h] = os.path.join(path,f) - except: - pass - - while fileq: - f = fileq.pop() - - if os.path.isdir(f): - fileq.extend([os.path.join(f,x) for x in os.listdir(f)]) - else: - if not f.endswith(".py"): - continue - else: - try: - h = sha1(open(f,"rb").read()).hexdigest() - except IOError: - to_process[h] = f - else: - if h in orig_hashes and not BC_FORCE_OVERWRITE: - src = orig_hashes[h] - if BC_DEBUG: - print("2to3 cache hit %s,%s" % (f,h)) - shutil.copyfile(src,f) - elif h not in post_hashes: - # we're not in a dev dir with already processed files - if BC_DEBUG: - print("2to3 cache miss (will process) %s,%s" % (f,h)) - to_process[h] = f - - avail_fixes = set(refactor.get_fixers_from_package("lib2to3.fixes")) - avail_fixes.discard('lib2to3.fixes.fix_next') - t=refactor.RefactoringTool(avail_fixes) - if to_process: - print("Starting 2to3 refactoring...") - for orig_h,f in to_process.items(): - if BC_DEBUG: - print("2to3 on %s" % f) - try: - t.refactor([f],True) - post_h = sha1(open(f, "rb").read()).hexdigest() - cached_fname = f + '-' + orig_h + '-' + post_h + '-' + pyver - path = os.path.join(BUILD_CACHE_DIR, cached_fname) - pathdir =os.path.dirname(path) - if BC_DEBUG: - print("cache put %s in %s" % (f, path)) - try: - os.makedirs(pathdir) - except OSError as exc: - import errno - if exc.errno == errno.EEXIST and os.path.isdir(pathdir): - pass - else: - raise - - shutil.copyfile(f, path) - - except Exception as e: - print("While processing %s 2to3 raised: %s" % (f,str(e))) - - pass - print("2to3 done refactoring.") - -except Exception as e: - if not isinstance(e,ZeroDivisionError): - print( "Exception: " + str(e)) - BUILD_CACHE_DIR = None - -class CompilationCacheMixin(object): - def __init__(self, *args, **kwds): - cache_dir = kwds.pop("cache_dir", BUILD_CACHE_DIR) - self.cache_dir = cache_dir - if not os.path.isdir(cache_dir): - raise Exception("Error: path to Cache directory (%s) is not a dir" % cache_dir) - - def _copy_from_cache(self, hash, target): - src = os.path.join(self.cache_dir, hash) - if os.path.exists(src) and not BC_FORCE_OVERWRITE: - if BC_DEBUG: - print("Cache HIT: asked to copy file %s in %s" % - (src,os.path.abspath(target))) - s = "." - for d in target.split(os.path.sep)[:-1]: - s = os.path.join(s, d) - if not os.path.exists(s): - os.mkdir(s) - shutil.copyfile(src, target) - - return True - - return False - - def _put_to_cache(self, hash, src): - target = os.path.join(self.cache_dir, hash) - if BC_DEBUG: - print( "Cache miss: asked to copy file from %s to %s" % (src,target)) - s = "." - for d in target.split(os.path.sep)[:-1]: - s = os.path.join(s, d) - if not os.path.exists(s): - os.mkdir(s) - shutil.copyfile(src, target) - - def _hash_obj(self, obj): - try: - return hash(obj) - except: - raise NotImplementedError("You must override this method") - -class CompilationCacheExtMixin(CompilationCacheMixin): - def _hash_file(self, fname): - from hashlib import sha1 - f= None - try: - hash = sha1() - hash.update(self.build_lib.encode('utf-8')) - try: - if sys.version_info[0] >= 3: - import io - f = io.open(fname, "rb") - else: - f = open(fname) - - first_line = f.readline() - # ignore cython generation timestamp header - if "Generated by Cython" not in first_line.decode('utf-8'): - hash.update(first_line) - hash.update(f.read()) - return hash.hexdigest() - - except: - raise - return None - finally: - if f: - f.close() - - except IOError: - return None - - def _hash_obj(self, ext): - from hashlib import sha1 - - sources = ext.sources - if (sources is None or - (not hasattr(sources, '__iter__')) or - isinstance(sources, str) or - sys.version[0] == 2 and isinstance(sources, unicode)): # argh - return False - - sources = list(sources) + ext.depends - hash = sha1() - try: - for fname in sources: - fhash = self._hash_file(fname) - if fhash: - hash.update(fhash.encode('utf-8')) - except: - return None - - return hash.hexdigest() - - -class CachingBuildExt(build_ext, CompilationCacheExtMixin): - def __init__(self, *args, **kwds): - CompilationCacheExtMixin.__init__(self, *args, **kwds) - kwds.pop("cache_dir", None) - build_ext.__init__(self, *args, **kwds) - - def build_extension(self, ext, *args, **kwds): - ext_path = self.get_ext_fullpath(ext.name) - build_path = os.path.join(self.build_lib, os.path.basename(ext_path)) - - hash = self._hash_obj(ext) - if hash and self._copy_from_cache(hash, ext_path): - return - - build_ext.build_extension(self, ext, *args, **kwds) - - hash = self._hash_obj(ext) - if os.path.exists(build_path): - self._put_to_cache(hash, build_path) # build_ext - if os.path.exists(ext_path): - self._put_to_cache(hash, ext_path) # develop - - def cython_sources(self, sources, extension): - import re - cplus = self.cython_cplus or getattr(extension, 'cython_cplus', 0) or \ - (extension.language and extension.language.lower() == 'c++') - target_ext = '.c' - if cplus: - target_ext = '.cpp' - - for i, s in enumerate(sources): - if not re.search("\.(pyx|pxi|pxd)$", s): - continue - ext_dir = os.path.dirname(s) - ext_basename = re.sub("\.[^\.]+$", "", os.path.basename(s)) - ext_basename += target_ext - target = os.path.join(ext_dir, ext_basename) - hash = self._hash_file(s) - sources[i] = target - if hash and self._copy_from_cache(hash, target): - continue - build_ext.cython_sources(self, [s], extension) - self._put_to_cache(hash, target) - - sources = [x for x in sources if x.startswith("pandas") or "lib." in x] - - return sources - -if BUILD_CACHE_DIR: # use the cache - cmdclass['build_ext'] = CachingBuildExt - -try: - # recent - setuptools_kwargs['use_2to3'] = True if BUILD_CACHE_DIR is None else False -except: - pass - -try: - # pre eb2234231 , ~ 0.7.0, - setuptools_args['use_2to3'] = True if BUILD_CACHE_DIR is None else False -except: - pass - -""" -def main(): - opd = os.path.dirname - opj = os.path.join - s= None - with open(opj(opd(__file__),"..","setup.py")) as f: - s = f.read() - if s: - if "BUILD CACHE ACTIVATED (V2)" in s: - print( "setup.py already wired with V2 build_cache, skipping..") - else: - SEP="\nsetup(" - before,after = s.split(SEP) - with open(opj(opd(__file__),"..","setup.py"),"wb") as f: - f.write((before + shim + SEP + after).encode('ascii')) - print(""" - setup.py was rewritten to use a build cache. - Make sure you've put the following in your .bashrc: - - export BUILD_CACHE_DIR= - echo $BUILD_CACHE_DIR > pandas_repo_rootdir/.build_cache_dir - - Once active, build results (compilation, cythonizations and 2to3 artifacts) - will be cached in "$BUILD_CACHE_DIR" and subsequent builds should be - sped up if no changes requiring recompilation were made. - - Go ahead and run: - - python setup.py clean - python setup.py develop - - """) - -if __name__ == '__main__': - import sys - sys.exit(main()) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py new file mode 100755 index 0000000000000..8425882f07be1 --- /dev/null +++ b/scripts/validate_docstrings.py @@ -0,0 +1,499 @@ +#!/usr/bin/env python +""" +Analyze docstrings to detect errors. + +If no argument is provided, it does a quick check of docstrings and returns +a csv with all API functions and results of basic checks. + +If a function or method is provided in the form "pandas.function", +"pandas.module.class.method", etc. a list of all errors in the docstring for +the specified function or method. + +Usage:: + $ ./validate_docstrings.py + $ ./validate_docstrings.py pandas.DataFrame.head +""" +import os +import sys +import csv +import re +import functools +import collections +import argparse +import contextlib +import pydoc +import inspect +import importlib +import doctest +try: + from io import StringIO +except ImportError: + from cStringIO import StringIO +import numpy + +BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +sys.path.insert(0, os.path.join(BASE_PATH)) +import pandas + +sys.path.insert(1, os.path.join(BASE_PATH, 'doc', 'sphinxext')) +from numpydoc.docscrape import NumpyDocString + + +PRIVATE_CLASSES = ['NDFrame', 'IndexOpsMixin'] + + +def _load_obj(obj_name): + for maxsplit in range(1, obj_name.count('.') + 1): + # TODO when py3 only replace by: module, *func_parts = ... + func_name_split = obj_name.rsplit('.', maxsplit=maxsplit) + module = func_name_split[0] + func_parts = func_name_split[1:] + try: + obj = importlib.import_module(module) + except ImportError: + pass + else: + continue + + if 'module' not in locals(): + raise ImportError('No module can be imported ' + 'from "{}"'.format(obj_name)) + + for part in func_parts: + obj = getattr(obj, part) + return obj + + +def _to_original_callable(obj): + while True: + if inspect.isfunction(obj) or inspect.isclass(obj): + f = inspect.getfile(obj) + if f.startswith('<') and f.endswith('>'): + return None + return obj + if inspect.ismethod(obj): + obj = obj.__func__ + elif isinstance(obj, functools.partial): + obj = obj.func + elif isinstance(obj, property): + obj = obj.fget + else: + return None + + +def _output_header(title, width=80, char='#'): + full_line = char * width + side_len = (width - len(title) - 2) // 2 + adj = '' if len(title) % 2 == 0 else ' ' + title_line = '{side} {title}{adj} {side}'.format(side=char * side_len, + title=title, + adj=adj) + + return '\n{full_line}\n{title_line}\n{full_line}\n\n'.format( + full_line=full_line, title_line=title_line) + + +class Docstring: + def __init__(self, method_name, method_obj): + self.method_name = method_name + self.method_obj = method_obj + self.raw_doc = method_obj.__doc__ or '' + self.clean_doc = pydoc.getdoc(self.method_obj) + self.doc = NumpyDocString(self.clean_doc) + + def __len__(self): + return len(self.raw_doc) + + @property + def is_function_or_method(self): + return inspect.isfunction(self.method_obj) + + @property + def source_file_name(self): + fname = inspect.getsourcefile(self.method_obj) + if fname: + fname = os.path.relpath(fname, BASE_PATH) + return fname + + @property + def source_file_def_line(self): + try: + return inspect.getsourcelines(self.method_obj)[-1] + except OSError: + pass + + @property + def github_url(self): + url = 'https://github.com/pandas-dev/pandas/blob/master/' + url += '{}#L{}'.format(self.source_file_name, + self.source_file_def_line) + return url + + @property + def start_blank_lines(self): + i = None + if self.raw_doc: + for i, row in enumerate(self.raw_doc.split('\n')): + if row.strip(): + break + return i + + @property + def end_blank_lines(self): + i = None + if self.raw_doc: + for i, row in enumerate(reversed(self.raw_doc.split('\n'))): + if row.strip(): + break + return i + + @property + def double_blank_lines(self): + prev = True + for row in self.raw_doc.split('\n'): + if not prev and not row.strip(): + return True + prev = row.strip() + return False + + @property + def summary(self): + if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: + return '' + return ' '.join(self.doc['Summary']) + + @property + def extended_summary(self): + if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: + return ' '.join(self.doc['Summary']) + return ' '.join(self.doc['Extended Summary']) + + @property + def needs_summary(self): + return not (bool(self.summary) and bool(self.extended_summary)) + + @property + def doc_parameters(self): + return collections.OrderedDict((name, (type_, ''.join(desc))) + for name, type_, desc + in self.doc['Parameters']) + + @property + def signature_parameters(self): + if (inspect.isclass(self.method_obj) + and self.method_name.split('.')[-1] in {'dt', 'str', 'cat'}): + # accessor classes have a signature, but don't want to show this + return tuple() + try: + signature = inspect.signature(self.method_obj) + except (TypeError, ValueError): + # Some objects, mainly in C extensions do not support introspection + # of the signature + return tuple() + params = tuple(signature.parameters.keys()) + if params and params[0] in ('self', 'cls'): + return params[1:] + return params + + @property + def parameter_mismatches(self): + errs = [] + signature_params = self.signature_parameters + doc_params = tuple(self.doc_parameters) + missing = set(signature_params) - set(doc_params) + if missing: + errs.append('Parameters {!r} not documented'.format(missing)) + extra = set(doc_params) - set(signature_params) + if extra: + errs.append('Unknown parameters {!r}'.format(extra)) + if (not missing and not extra and signature_params != doc_params + and not (not signature_params and not doc_params)): + errs.append('Wrong parameters order. ' + + 'Actual: {!r}. '.format(signature_params) + + 'Documented: {!r}'.format(doc_params)) + + return errs + + @property + def correct_parameters(self): + return not bool(self.parameter_mismatches) + + def parameter_type(self, param): + return self.doc_parameters[param][0] + + def parameter_desc(self, param): + return self.doc_parameters[param][1] + + @property + def see_also(self): + return collections.OrderedDict((name, ''.join(desc)) + for name, desc, _ + in self.doc['See Also']) + + @property + def examples(self): + return self.doc['Examples'] + + @property + def returns(self): + return self.doc['Returns'] + + @property + def first_line_ends_in_dot(self): + if self.doc: + return self.doc.split('\n')[0][-1] == '.' + + @property + def deprecated(self): + pattern = re.compile('.. deprecated:: ') + return (self.method_name.startswith('pandas.Panel') or + bool(pattern.search(self.summary)) or + bool(pattern.search(self.extended_summary))) + + @property + def mentioned_private_classes(self): + return [klass for klass in PRIVATE_CLASSES if klass in self.raw_doc] + + @property + def examples_errors(self): + flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL + finder = doctest.DocTestFinder() + runner = doctest.DocTestRunner(optionflags=flags) + context = {'np': numpy, 'pd': pandas} + error_msgs = '' + for test in finder.find(self.raw_doc, self.method_name, globs=context): + f = StringIO() + with contextlib.redirect_stdout(f): + runner.run(test) + error_msgs += f.getvalue() + return error_msgs + + +def get_api_items(): + api_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst') + + previous_line = current_section = current_subsection = '' + position = None + with open(api_fname) as f: + for line in f: + line = line.strip() + if len(line) == len(previous_line): + if set(line) == set('-'): + current_section = previous_line + continue + if set(line) == set('~'): + current_subsection = previous_line + continue + + if line.startswith('.. currentmodule::'): + current_module = line.replace('.. currentmodule::', '').strip() + continue + + if line == '.. autosummary::': + position = 'autosummary' + continue + + if position == 'autosummary': + if line == '': + position = 'items' + continue + + if position == 'items': + if line == '': + position = None + continue + item = line.strip() + func = importlib.import_module(current_module) + for part in item.split('.'): + func = getattr(func, part) + + yield ('.'.join([current_module, item]), func, + current_section, current_subsection) + + previous_line = line + + +def _csv_row(func_name, func_obj, section, subsection, in_api, seen={}): + obj_type = type(func_obj).__name__ + original_callable = _to_original_callable(func_obj) + if original_callable is None: + return [func_name, obj_type] + [''] * 12, '' + else: + doc = Docstring(func_name, original_callable) + key = doc.source_file_name, doc.source_file_def_line + shared_code = seen.get(key, '') + return [func_name, + obj_type, + in_api, + int(doc.deprecated), + section, + subsection, + doc.source_file_name, + doc.source_file_def_line, + doc.github_url, + int(bool(doc.summary)), + int(bool(doc.extended_summary)), + int(doc.correct_parameters), + int(bool(doc.examples)), + shared_code], key + + +def validate_all(): + writer = csv.writer(sys.stdout) + cols = ('Function or method', + 'Type', + 'In API doc', + 'Is deprecated', + 'Section', + 'Subsection', + 'File', + 'Code line', + 'GitHub link', + 'Has summary', + 'Has extended summary', + 'Parameters ok', + 'Has examples', + 'Shared code with') + writer.writerow(cols) + seen = {} + api_items = list(get_api_items()) + for func_name, func, section, subsection in api_items: + row, key = _csv_row(func_name, func, section, subsection, + in_api=1, seen=seen) + seen[key] = func_name + writer.writerow(row) + + api_item_names = set(list(zip(*api_items))[0]) + for class_ in (pandas.Series, pandas.DataFrame, pandas.Panel): + for member in inspect.getmembers(class_): + func_name = 'pandas.{}.{}'.format(class_.__name__, member[0]) + if (not member[0].startswith('_') and + func_name not in api_item_names): + func = _load_obj(func_name) + row, key = _csv_row(func_name, func, section='', subsection='', + in_api=0) + writer.writerow(row) + + return 0 + + +def validate_one(func_name): + func_obj = _load_obj(func_name) + doc = Docstring(func_name, func_obj) + + sys.stderr.write(_output_header('Docstring ({})'.format(func_name))) + sys.stderr.write('{}\n'.format(doc.clean_doc)) + + errs = [] + if doc.start_blank_lines != 1: + errs.append('Docstring text (summary) should start in the line ' + 'immediately after the opening quotes (not in the same ' + 'line, or leaving a blank line in between)') + if doc.end_blank_lines != 1: + errs.append('Closing quotes should be placed in the line after ' + 'the last text in the docstring (do not close the ' + 'quotes in the same line as the text, or leave a ' + 'blank line between the last text and the quotes)') + if doc.double_blank_lines: + errs.append('Use only one blank line to separate sections or ' + 'paragraphs') + + if not doc.summary: + errs.append('No summary found (a short summary in a single line ' + 'should be present at the beginning of the docstring)') + else: + if not doc.summary[0].isupper(): + errs.append('Summary does not start with capital') + if doc.summary[-1] != '.': + errs.append('Summary does not end with dot') + if (doc.is_function_or_method and + doc.summary.split(' ')[0][-1] == 's'): + errs.append('Summary must start with infinitive verb, ' + 'not third person (e.g. use "Generate" instead of ' + '"Generates")') + if not doc.extended_summary: + errs.append('No extended summary found') + + param_errs = doc.parameter_mismatches + for param in doc.doc_parameters: + if not doc.parameter_type(param): + param_errs.append('Parameter "{}" has no type'.format(param)) + else: + if doc.parameter_type(param)[-1] == '.': + param_errs.append('Parameter "{}" type ' + 'should not finish with "."'.format(param)) + + if not doc.parameter_desc(param): + param_errs.append('Parameter "{}" ' + 'has no description'.format(param)) + else: + if not doc.parameter_desc(param)[0].isupper(): + param_errs.append('Parameter "{}" description ' + 'should start with ' + 'capital letter'.format(param)) + if doc.parameter_desc(param)[-1] != '.': + param_errs.append('Parameter "{}" description ' + 'should finish with "."'.format(param)) + if param_errs: + errs.append('Errors in parameters section') + for param_err in param_errs: + errs.append('\t{}'.format(param_err)) + + if not doc.returns: + errs.append('No returns section found') + + mentioned_errs = doc.mentioned_private_classes + if mentioned_errs: + errs.append('Private classes ({}) should not be mentioned in public ' + 'docstring.'.format(mentioned_errs)) + + if not doc.see_also: + errs.append('See Also section not found') + else: + for rel_name, rel_desc in doc.see_also.items(): + if not rel_desc: + errs.append('Missing description for ' + 'See Also "{}" reference'.format(rel_name)) + examples_errs = '' + if not doc.examples: + errs.append('No examples section found') + else: + examples_errs = doc.examples_errors + if examples_errs: + errs.append('Examples do not pass tests') + + sys.stderr.write(_output_header('Validation')) + if errs: + sys.stderr.write('Errors found:\n') + for err in errs: + sys.stderr.write('\t{}\n'.format(err)) + else: + sys.stderr.write('Docstring for "{}" correct. :)\n'.format(func_name)) + + if examples_errs: + sys.stderr.write(_output_header('Doctests')) + sys.stderr.write(examples_errs) + + return len(errs) + + +def main(function): + if function is None: + return validate_all() + else: + return validate_one(function) + + +if __name__ == '__main__': + argparser = argparse.ArgumentParser( + description='validate pandas docstrings') + argparser.add_argument('function', + nargs='?', + default=None, + help=('function or method to validate ' + '(e.g. pandas.DataFrame.head) ' + 'if not provided, all docstrings ' + 'are validated')) + args = argparser.parse_args() + sys.exit(main(args.function)) diff --git a/scripts/winbuild_py27.bat b/scripts/winbuild_py27.bat deleted file mode 100644 index bec67c7e527ed..0000000000000 --- a/scripts/winbuild_py27.bat +++ /dev/null @@ -1,2 +0,0 @@ -SET PATH=C:\MinGW\bin;C:\Python27;C:\Python27\Scripts;%PATH% -python setup.py build -c mingw32 bdist_wininst diff --git a/scripts/windows_builder/build_27-32.bat b/scripts/windows_builder/build_27-32.bat deleted file mode 100644 index 37eb4d436d567..0000000000000 --- a/scripts/windows_builder/build_27-32.bat +++ /dev/null @@ -1,25 +0,0 @@ -@echo off -echo "starting 27-32" - -setlocal EnableDelayedExpansion -set MSSdk=1 -CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x86 /release -set DISTUTILS_USE_SDK=1 - -title 27-32 build -echo "building" -cd "c:\users\Jeff Reback\documents\github\pandas" -C:\python27-32\python.exe setup.py build > build.27-32.log 2>&1 - -title "installing" -C:\python27-32\python.exe setup.py bdist --formats=wininst > install.27-32.log 2>&1 - -echo "testing" -C:\python27-32\scripts\nosetests -A "not slow" build\lib.win32-2.7\pandas > test.27-32.log 2>&1 - -echo "versions" -cd build\lib.win32-2.7 -C:\python27-32\python.exe ../../ci/print_versions.py > ../../versions.27-32.log 2>&1 - -exit - diff --git a/scripts/windows_builder/build_27-64.bat b/scripts/windows_builder/build_27-64.bat deleted file mode 100644 index e76e25d0ef39c..0000000000000 --- a/scripts/windows_builder/build_27-64.bat +++ /dev/null @@ -1,25 +0,0 @@ -@echo off -echo "starting 27-64" - -setlocal EnableDelayedExpansion -set MSSdk=1 -CALL "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release -set DISTUTILS_USE_SDK=1 - -title 27-64 build -echo "building" -cd "c:\users\Jeff Reback\documents\github\pandas" -C:\python27-64\python.exe setup.py build > build.27-64.log 2>&1 - -echo "installing" -C:\python27-64\python.exe setup.py bdist --formats=wininst > install.27-64.log 2>&1 - -echo "testing" -C:\python27-64\scripts\nosetests -A "not slow" build\lib.win-amd64-2.7\pandas > test.27-64.log 2>&1 - -echo "versions" -cd build\lib.win-amd64-2.7 -C:\python27-64\python.exe ../../ci/print_versions.py > ../../versions.27-64.log 2>&1 - -exit - diff --git a/scripts/windows_builder/build_34-32.bat b/scripts/windows_builder/build_34-32.bat deleted file mode 100644 index 8e060e000bc8f..0000000000000 --- a/scripts/windows_builder/build_34-32.bat +++ /dev/null @@ -1,27 +0,0 @@ -@echo off -echo "starting 34-32" - -setlocal EnableDelayedExpansion -set MSSdk=1 -CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 /release -set DISTUTILS_USE_SDK=1 - -title 34-32 build -echo "building" -cd "c:\users\Jeff Reback\documents\github\pandas" -C:\python34-32\python.exe setup.py build > build.34-32.log 2>&1 - -echo "installing" -C:\python34-32\python.exe setup.py bdist --formats=wininst > install.34-32.log 2>&1 - -echo "testing" -C:\python34-32\scripts\nosetests -A "not slow" build\lib.win32-3.4\pandas > test.34-32.log 2>&1 - -echo "versions" -cd build\lib.win32-3.4 -C:\python34-32\python.exe ../../ci/print_versions.py > ../../versions.34-32.log 2>&1 - -exit - - - diff --git a/scripts/windows_builder/build_34-64.bat b/scripts/windows_builder/build_34-64.bat deleted file mode 100644 index 3a8512b730346..0000000000000 --- a/scripts/windows_builder/build_34-64.bat +++ /dev/null @@ -1,27 +0,0 @@ -@echo off -echo "starting 34-64" - -setlocal EnableDelayedExpansion -set MSSdk=1 -CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release -set DISTUTILS_USE_SDK=1 - -title 34-64 build -echo "building" -cd "c:\users\Jeff Reback\documents\github\pandas" -C:\python34-64\python.exe setup.py build > build.34-64.log 2>&1 - -echo "installing" -C:\python34-64\python.exe setup.py bdist --formats=wininst > install.34-64.log 2>&1 - -echo "testing" -C:\python34-64\scripts\nosetests -A "not slow" build\lib.win-amd64-3.4\pandas > test.34-64.log 2>&1 - -echo "versions" -cd build\lib.win-amd64-3.4 -C:\python34-64\python.exe ../../ci/print_versions.py > ../../versions.34-64.log 2>&1 - -exit - - - diff --git a/scripts/windows_builder/check_and_build.bat b/scripts/windows_builder/check_and_build.bat deleted file mode 100644 index 32be1bde1f7f3..0000000000000 --- a/scripts/windows_builder/check_and_build.bat +++ /dev/null @@ -1,2 +0,0 @@ -set PYTHONPATH=c:/python27-64/lib -c:/python27-64/python.exe c:/Builds/check_and_build.py %1 %2 %3 %4 %4 %6 %7 %8 %9 diff --git a/scripts/windows_builder/check_and_build.py b/scripts/windows_builder/check_and_build.py deleted file mode 100644 index 2eb32fb4265d9..0000000000000 --- a/scripts/windows_builder/check_and_build.py +++ /dev/null @@ -1,194 +0,0 @@ -import datetime -import git -import logging -import os, re, time -import subprocess -import argparse -import pysftp - -# parse the args -parser = argparse.ArgumentParser(description='build, test, and install updated versions of master pandas') -parser.add_argument('-b', '--build', - help='run just this build', - dest='build') -parser.add_argument('-u', '--update', - help='get a git update', - dest='update', - action='store_true', - default=False) -parser.add_argument('-t', '--test', - help='run the tests', - dest='test', - action='store_true', - default=False) -parser.add_argument('-c', '--compare', - help='show the last tests compare', - dest='compare', - action='store_true', - default=False) -parser.add_argument('-v', '--version', - help='show the last versions', - dest='version', - action='store_true', - default=False) -parser.add_argument('-i', '--install', - help='run the install', - dest='install', - action='store_true', - default=False) -parser.add_argument('--dry', - help='dry run', - dest='dry', - action='store_true', - default=False) - -args = parser.parse_args() -dry_run = args.dry - -builds = ['27-32','27-64','34-32','34-64'] -base_dir = "C:\Users\Jeff Reback\Documents\GitHub\pandas" -remote_host='pandas.pydata.org' -username='pandas' -password=############ - -# drop python from our environment to avoid -# passing this onto sub-processes -env = os.environ -del env['PYTHONPATH'] - -# the stdout logger -fmt = '%(asctime)s: %(message)s' -logger = logging.getLogger('check_and_build') -logger.setLevel(logging.DEBUG) -stream_handler = logging.StreamHandler() -stream_handler.setFormatter(logging.Formatter(fmt)) -logger.addHandler(stream_handler) - -def run_all(test=False,compare=False,install=False,version=False,build=None): - # run everything - - for b in builds: - if build is not None and build != b: - continue - if test: - do_rebuild(b) - if compare or test: - try: - do_compare(b) - except (Exception) as e: - logger.info("ERROR COMPARE {0} : {1}".format(b,e)) - if version: - try: - do_version(b) - except (Exception) as e: - logger.info("ERROR VERSION {0} : {1}".format(b,e)) - - if install: - run_install() - -def do_rebuild(build): - # trigger the rebuild - - cmd = "c:/Builds/build_{0}.bat".format(build) - logger.info("rebuild : {0}".format(cmd)) - p = subprocess.Popen("start /wait /min {0}".format(cmd),env=env,shell=True,close_fds=True) - ret = p.wait() - -def do_compare(build): - # print the test outputs - - f = os.path.join(base_dir,"test.{0}.log".format(build)) - with open(f,'r') as fh: - for l in fh: - l = l.rstrip() - if l.startswith('ERROR:'): - logger.info("{0} : {1}".format(build,l)) - if l.startswith('Ran') or l.startswith('OK') or l.startswith('FAIL'): - logger.info("{0} : {1}".format(build,l)) - -def do_version(build): - # print the version strings - - f = os.path.join(base_dir,"versions.{0}.log".format(build)) - with open(f,'r') as fh: - for l in fh: - l = l.rstrip() - logger.info("{0} : {1}".format(build,l)) - -def do_update(is_verbose=True): - # update git; return True if the commit has changed - - repo = git.Repo(base_dir) - master = repo.heads.master - origin = repo.remotes.origin - start_commit = master.commit - - if is_verbose: - logger.info("current commit : {0}".format(start_commit)) - - try: - origin.update() - except (Exception) as e: - logger.info("update exception : {0}".format(e)) - try: - origin.pull() - except (Exception) as e: - logger.info("pull exception : {0}".format(e)) - - result = start_commit != master.commit - if result: - if is_verbose: - logger.info("commits changed : {0} -> {1}".format(start_commit,master.commit)) - return result - -def run_install(): - # send the installation binaries - - repo = git.Repo(base_dir) - master = repo.heads.master - commit = master.commit - short_hash = str(commit)[:7] - - logger.info("sending files : {0}".format(commit)) - d = os.path.join(base_dir,"dist") - files = [ f for f in os.listdir(d) if re.search(short_hash,f) ] - srv = pysftp.Connection(host=remote_host,username=username,password=password) - srv.chdir("www/pandas-build/dev") - - # get current files - remote_files = set(srv.listdir(path='.')) - - for f in files: - if f not in remote_files: - logger.info("sending: {0}".format(f)) - local = os.path.join(d,f) - srv.put(localpath=local) - - srv.close() - logger.info("sending files: done") - -# just perform the action -if args.update or args.test or args.compare or args.install or args.version: - if args.update: - do_update() - run_all(test=args.test,compare=args.compare,install=args.install,version=args.version,build=args.build) - exit(0) - -# file logging -file_handler = logging.FileHandler("C:\Builds\logs\check_and_build.log") -file_handler.setFormatter(logging.Formatter(fmt)) -logger.addHandler(file_handler) - -logger.info("start") - -# main loop -while(True): - - if do_update(): - run_all(test=True,install=False) - - time.sleep(60*60) - -logger.info("exit") -file_handler.close() - diff --git a/scripts/windows_builder/readme.txt b/scripts/windows_builder/readme.txt deleted file mode 100644 index 789e2a9ee0c63..0000000000000 --- a/scripts/windows_builder/readme.txt +++ /dev/null @@ -1,17 +0,0 @@ -This is a collection of windows batch scripts (and a python script) -to rebuild the binaries, test, and upload the binaries for public distribution -upon a commit on github. - -Obviously requires that these be setup on windows -Requires an install of Windows SDK 3.5 and 4.0 -Full python installs for each version with the deps - -Currently supporting - -27-32,27-64,34-32,34-64 - -Note that 34 use the 4.0 SDK, while the other suse 3.5 SDK - -I installed these scripts in C:\Builds - -Installed libaries in C:\Installs diff --git a/setup.cfg b/setup.cfg index 45d98dd733f1f..942b2b0a1a0bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,12 @@ tag_prefix = v parentdir_prefix = pandas- [flake8] -ignore = E731,E402 +ignore = + E402, # module level import not at top of file + E731, # do not assign a lambda expression, use a def + E741, # do not use variables named 'l', 'O', or 'I' + W503 # line break before binary operator +max-line-length = 79 [yapf] based_on_style = pep8 @@ -21,7 +26,9 @@ split_penalty_after_opening_bracket = 1000000 split_penalty_logical_operator = 30 [tool:pytest] -# TODO: Change all yield-based (nose-style) fixutures to pytest fixtures -# Silencing the warning until then -addopts = --disable-pytest-warnings testpaths = pandas +markers = + single: mark a test as single cpu only + slow: mark a test as slow + network: mark a test as network + high_memory: mark a test as a high-memory only diff --git a/setup.py b/setup.py index edec53e9cefb0..7fb5358d0950b 100755 --- a/setup.py +++ b/setup.py @@ -7,27 +7,32 @@ """ import os +from os.path import join as pjoin + +import pkg_resources import sys import shutil -import warnings -import re -import platform from distutils.version import LooseVersion +from setuptools import setup, Command, find_packages + +# versioning +import versioneer +cmdclass = versioneer.get_cmdclass() + def is_platform_windows(): return sys.platform == 'win32' or sys.platform == 'cygwin' + def is_platform_linux(): return sys.platform == 'linux2' + def is_platform_mac(): return sys.platform == 'darwin' -# versioning -import versioneer -cmdclass = versioneer.get_cmdclass() -min_cython_ver = '0.23' +min_cython_ver = '0.24' try: import Cython ver = Cython.__version__ @@ -35,58 +40,28 @@ def is_platform_mac(): except ImportError: _CYTHON_INSTALLED = False -try: - import pkg_resources - from setuptools import setup, Command - _have_setuptools = True -except ImportError: - # no setuptools installed - from distutils.core import setup, Command - _have_setuptools = False - -setuptools_kwargs = {} -min_numpy_ver = '1.7.0' -if sys.version_info[0] >= 3: - - setuptools_kwargs = { - 'zip_safe': False, - 'install_requires': ['python-dateutil >= 2', - 'pytz >= 2011k', - 'numpy >= %s' % min_numpy_ver], - 'setup_requires': ['numpy >= %s' % min_numpy_ver], - } - if not _have_setuptools: - sys.exit("need setuptools/distribute for Py3k" - "\n$ pip install distribute") -else: - setuptools_kwargs = { - 'install_requires': ['python-dateutil', - 'pytz >= 2011k', - 'numpy >= %s' % min_numpy_ver], - 'setup_requires': ['numpy >= %s' % min_numpy_ver], - 'zip_safe': False, - } +min_numpy_ver = '1.9.0' +setuptools_kwargs = { + 'install_requires': [ + 'python-dateutil >= 2.5.0', + 'pytz >= 2011k', + 'numpy >= {numpy_ver}'.format(numpy_ver=min_numpy_ver), + ], + 'setup_requires': ['numpy >= {numpy_ver}'.format(numpy_ver=min_numpy_ver)], + 'zip_safe': False, +} - if not _have_setuptools: - try: - import numpy - import dateutil - setuptools_kwargs = {} - except ImportError: - sys.exit("install requires: 'python-dateutil < 2','numpy'." - " use pip or easy_install." - "\n $ pip install 'python-dateutil < 2' 'numpy'") -from distutils.extension import Extension -from distutils.command.build import build -from distutils.command.build_ext import build_ext as _build_ext +from distutils.extension import Extension # noqa:E402 +from distutils.command.build import build # noqa:E402 +from distutils.command.build_ext import build_ext as _build_ext # noqa:E402 try: if not _CYTHON_INSTALLED: raise ImportError('No supported version of Cython installed.') try: - from Cython.Distutils.old_build_ext import old_build_ext as _build_ext + from Cython.Distutils.old_build_ext import old_build_ext as _build_ext # noqa:F811,E501 except ImportError: # Pre 0.25 from Cython.Distutils import build_ext as _build_ext @@ -106,23 +81,23 @@ def is_platform_mac(): 'pip install Tempita') -from os.path import join as pjoin - - -_pxipath = pjoin('pandas', 'src') _pxi_dep_template = { - 'algos': ['algos_common_helper.pxi.in', 'algos_groupby_helper.pxi.in', - 'algos_take_helper.pxi.in', 'algos_rank_helper.pxi.in'], - '_join': ['join_helper.pxi.in', 'joins_func_helper.pxi.in'], - 'hashtable': ['hashtable_class_helper.pxi.in', - 'hashtable_func_helper.pxi.in'], - 'index': ['index_class_helper.pxi.in'], - '_sparse': ['sparse_op_helper.pxi.in'] -} + 'algos': ['_libs/algos_common_helper.pxi.in', + '_libs/algos_take_helper.pxi.in', + '_libs/algos_rank_helper.pxi.in'], + 'groupby': ['_libs/groupby_helper.pxi.in'], + 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], + 'reshape': ['_libs/reshape_helper.pxi.in'], + 'hashtable': ['_libs/hashtable_class_helper.pxi.in', + '_libs/hashtable_func_helper.pxi.in'], + 'index': ['_libs/index_class_helper.pxi.in'], + 'sparse': ['_libs/sparse_op_helper.pxi.in'], + 'interval': ['_libs/intervaltree.pxi.in']} + _pxifiles = [] _pxi_dep = {} for module, files in _pxi_dep_template.items(): - pxi_files = [pjoin(_pxipath, x) for x in files] + pxi_files = [pjoin('pandas', x) for x in files] _pxifiles.extend(pxi_files) _pxi_dep[module] = pxi_files @@ -134,12 +109,12 @@ def build_extensions(self): # generate template output if cython: for pxifile in _pxifiles: - # build pxifiles first, template extention must be .pxi.in + # build pxifiles first, template extension must be .pxi.in assert pxifile.endswith('.pxi.in') outfile = pxifile[:-3] if (os.path.exists(outfile) and - os.stat(pxifile).st_mtime < os.stat(outfile).st_mtime): + os.stat(pxifile).st_mtime < os.stat(outfile).st_mtime): # if .pxi.in is not updated, no need to output .pxi continue @@ -153,7 +128,8 @@ def build_extensions(self): numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') for ext in self.extensions: - if hasattr(ext, 'include_dirs') and not numpy_incl in ext.include_dirs: + if (hasattr(ext, 'include_dirs') and + numpy_incl not in ext.include_dirs): ext.include_dirs.append(numpy_incl) _build_ext.build_extensions(self) @@ -222,10 +198,6 @@ def build_extensions(self): munging and cleaning data, analyzing / modeling it, then organizing the results of the analysis into a form suitable for plotting or tabular display. pandas is the ideal tool for all of these tasks. - -Note ----- -Windows binaries built against NumPy 1.8.1 """ DISTNAME = 'pandas' @@ -243,12 +215,11 @@ def build_extensions(self): 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Cython', - 'Topic :: Scientific/Engineering', -] + 'Topic :: Scientific/Engineering'] + class CleanCommand(Command): """Custom distutils command to clean the .so and .pyc files.""" @@ -260,24 +231,24 @@ def initialize_options(self): self._clean_me = [] self._clean_trees = [] - base = pjoin('pandas','src') - dt = pjoin(base,'datetime') + base = pjoin('pandas', '_libs', 'src') + dt = pjoin(base, 'datetime') src = base - util = pjoin('pandas','util') - parser = pjoin(base,'parser') - ujson_python = pjoin(base,'ujson','python') - ujson_lib = pjoin(base,'ujson','lib') - self._clean_exclude = [pjoin(dt,'np_datetime.c'), - pjoin(dt,'np_datetime_strings.c'), - pjoin(src,'period_helper.c'), - pjoin(parser,'tokenizer.c'), - pjoin(parser,'io.c'), - pjoin(ujson_python,'ujson.c'), - pjoin(ujson_python,'objToJSON.c'), - pjoin(ujson_python,'JSONtoObj.c'), - pjoin(ujson_lib,'ultrajsonenc.c'), - pjoin(ujson_lib,'ultrajsondec.c'), - pjoin(util,'move.c'), + util = pjoin('pandas', 'util') + parser = pjoin(base, 'parser') + ujson_python = pjoin(base, 'ujson', 'python') + ujson_lib = pjoin(base, 'ujson', 'lib') + self._clean_exclude = [pjoin(dt, 'np_datetime.c'), + pjoin(dt, 'np_datetime_strings.c'), + pjoin(src, 'period_helper.c'), + pjoin(parser, 'tokenizer.c'), + pjoin(parser, 'io.c'), + pjoin(ujson_python, 'ujson.c'), + pjoin(ujson_python, 'objToJSON.c'), + pjoin(ujson_python, 'JSONtoObj.c'), + pjoin(ujson_lib, 'ultrajsonenc.c'), + pjoin(ujson_lib, 'ultrajsondec.c'), + pjoin(util, 'move.c'), ] for root, dirs, files in os.walk('pandas'): @@ -323,43 +294,65 @@ def run(self): # class as it encodes the version info sdist_class = cmdclass['sdist'] + class CheckSDist(sdist_class): """Custom sdist that ensures Cython has compiled all pyx files to c.""" - _pyxfiles = ['pandas/lib.pyx', - 'pandas/hashtable.pyx', - 'pandas/tslib.pyx', - 'pandas/index.pyx', - 'pandas/algos.pyx', - 'pandas/join.pyx', - 'pandas/window.pyx', - 'pandas/parser.pyx', - 'pandas/src/period.pyx', - 'pandas/src/sparse.pyx', - 'pandas/src/testing.pyx', - 'pandas/src/hash.pyx', - 'pandas/io/sas/saslib.pyx'] + _pyxfiles = ['pandas/_libs/lib.pyx', + 'pandas/_libs/hashtable.pyx', + 'pandas/_libs/tslib.pyx', + 'pandas/_libs/index.pyx', + 'pandas/_libs/internals.pyx', + 'pandas/_libs/algos.pyx', + 'pandas/_libs/join.pyx', + 'pandas/_libs/indexing.pyx', + 'pandas/_libs/interval.pyx', + 'pandas/_libs/hashing.pyx', + 'pandas/_libs/missing.pyx', + 'pandas/_libs/reduction.pyx', + 'pandas/_libs/testing.pyx', + 'pandas/_libs/skiplist.pyx', + 'pandas/_libs/sparse.pyx', + 'pandas/_libs/ops.pyx', + 'pandas/_libs/parsers.pyx', + 'pandas/_libs/tslibs/ccalendar.pyx', + 'pandas/_libs/tslibs/period.pyx', + 'pandas/_libs/tslibs/strptime.pyx', + 'pandas/_libs/tslibs/np_datetime.pyx', + 'pandas/_libs/tslibs/timedeltas.pyx', + 'pandas/_libs/tslibs/timestamps.pyx', + 'pandas/_libs/tslibs/timezones.pyx', + 'pandas/_libs/tslibs/conversion.pyx', + 'pandas/_libs/tslibs/fields.pyx', + 'pandas/_libs/tslibs/offsets.pyx', + 'pandas/_libs/tslibs/frequencies.pyx', + 'pandas/_libs/tslibs/resolution.pyx', + 'pandas/_libs/tslibs/parsing.pyx', + 'pandas/_libs/writers.pyx', + 'pandas/io/sas/sas.pyx'] + + _cpp_pyxfiles = ['pandas/_libs/window.pyx', + 'pandas/io/msgpack/_packer.pyx', + 'pandas/io/msgpack/_unpacker.pyx'] def initialize_options(self): sdist_class.initialize_options(self) - ''' - self._pyxfiles = [] - for root, dirs, files in os.walk('pandas'): - for f in files: - if f.endswith('.pyx'): - self._pyxfiles.append(pjoin(root, f)) - ''' - def run(self): if 'cython' in cmdclass: self.run_command('cython') else: - for pyxfile in self._pyxfiles: - cfile = pyxfile[:-3] + 'c' - msg = "C-source file '%s' not found." % (cfile) +\ - " Run 'setup.py cython' before sdist." - assert os.path.isfile(cfile), msg + # If we are not running cython then + # compile the extensions correctly + pyx_files = [(self._pyxfiles, 'c'), (self._cpp_pyxfiles, 'cpp')] + + for pyxfiles, extension in pyx_files: + for pyxfile in pyxfiles: + sourcefile = pyxfile[:-3] + extension + msg = ("{extension}-source file '{source}' not found.\n" + "Run 'setup.py cython' before sdist.".format( + source=sourcefile, extension=extension)) + assert os.path.isfile(sourcefile), msg sdist_class.run(self) @@ -373,10 +366,11 @@ def check_cython_extensions(self, extensions): for ext in extensions: for src in ext.sources: if not os.path.exists(src): - raise Exception("""Cython-generated file '%s' not found. + print("{}: -> [{}]".format(ext.name, ext.sources)) + raise Exception("""Cython-generated file '{src}' not found. Cython is required to compile pandas from a development branch. Please install Cython or download a release package of pandas. - """ % src) + """.format(src=src)) def build_extensions(self): self.check_cython_extensions(self.extensions) @@ -405,6 +399,7 @@ def finalize_options(self): def run(self): pass + cmdclass.update({'clean': CleanCommand, 'build': build}) @@ -431,99 +426,253 @@ def get_tag(self): cmdclass['build_src'] = DummyBuildSrc cmdclass['build_ext'] = CheckingBuildExt -lib_depends = ['reduce', 'inference', 'properties'] +if sys.byteorder == 'big': + endian_macro = [('__BIG_ENDIAN__', '1')] +else: + endian_macro = [('__LITTLE_ENDIAN__', '1')] + +lib_depends = ['inference'] def srcpath(name=None, suffix='.pyx', subdir='src'): return pjoin('pandas', subdir, name + suffix) + if suffix == '.pyx': - lib_depends = [srcpath(f, suffix='.pyx') for f in lib_depends] - lib_depends.append('pandas/src/util.pxd') + lib_depends = [srcpath(f, suffix='.pyx', subdir='_libs/src') + for f in lib_depends] + lib_depends.append('pandas/_libs/src/util.pxd') else: lib_depends = [] plib_depends = [] -common_include = ['pandas/src/klib', 'pandas/src'] +common_include = ['pandas/_libs/src/klib', 'pandas/_libs/src'] def pxd(name): return os.path.abspath(pjoin('pandas', name + '.pxd')) + # args to ignore warnings if is_platform_windows(): - extra_compile_args=[] + extra_compile_args = [] else: - extra_compile_args=['-Wno-unused-function'] + extra_compile_args = ['-Wno-unused-function'] -lib_depends = lib_depends + ['pandas/src/numpy_helper.h', - 'pandas/src/parse_helper.h'] +lib_depends = lib_depends + ['pandas/_libs/src/numpy_helper.h', + 'pandas/_libs/src/parse_helper.h', + 'pandas/_libs/src/compat_helper.h'] +np_datetime_headers = ['pandas/_libs/src/datetime/np_datetime.h', + 'pandas/_libs/src/datetime/np_datetime_strings.h'] +np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c'] -tseries_depends = ['pandas/src/datetime/np_datetime.h', - 'pandas/src/datetime/np_datetime_strings.h', - 'pandas/src/datetime_helper.h', - 'pandas/src/period_helper.h', - 'pandas/src/datetime.pxd'] - +tseries_depends = np_datetime_headers + ['pandas/_libs/tslibs/np_datetime.pxd'] # some linux distros require it libraries = ['m'] if not is_platform_windows() else [] -ext_data = dict( - lib={'pyxfile': 'lib', - 'pxdfiles': [], - 'depends': lib_depends}, - hashtable={'pyxfile': 'hashtable', - 'pxdfiles': ['hashtable'], - 'depends': (['pandas/src/klib/khash_python.h'] - + _pxi_dep['hashtable'])}, - tslib={'pyxfile': 'tslib', - 'depends': tseries_depends, - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c', - 'pandas/src/period_helper.c']}, - _period={'pyxfile': 'src/period', - 'depends': tseries_depends, - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c', - 'pandas/src/period_helper.c']}, - index={'pyxfile': 'index', - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c'], - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['index']}, - algos={'pyxfile': 'algos', - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['algos']}, - _join={'pyxfile': 'src/join', - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['_join']}, - _window={'pyxfile': 'window', - 'pxdfiles': ['src/skiplist', 'src/util'], - 'depends': ['pandas/src/skiplist.pyx', - 'pandas/src/skiplist.h']}, - parser={'pyxfile': 'parser', - 'depends': ['pandas/src/parser/tokenizer.h', - 'pandas/src/parser/io.h', - 'pandas/src/numpy_helper.h'], - 'sources': ['pandas/src/parser/tokenizer.c', - 'pandas/src/parser/io.c']}, - _sparse={'pyxfile': 'src/sparse', - 'depends': ([srcpath('sparse', suffix='.pyx')] + - _pxi_dep['_sparse'])}, - _testing={'pyxfile': 'src/testing', - 'depends': [srcpath('testing', suffix='.pyx')]}, - _hash={'pyxfile': 'src/hash', - 'depends': [srcpath('hash', suffix='.pyx')]}, -) - -ext_data["io.sas.saslib"] = {'pyxfile': 'io/sas/saslib'} +ext_data = { + '_libs.algos': { + 'pyxfile': '_libs/algos', + 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], + 'depends': _pxi_dep['algos']}, + '_libs.groupby': { + 'pyxfile': '_libs/groupby', + 'pxdfiles': ['_libs/src/util', '_libs/algos'], + 'depends': _pxi_dep['groupby']}, + '_libs.hashing': { + 'pyxfile': '_libs/hashing'}, + '_libs.hashtable': { + 'pyxfile': '_libs/hashtable', + 'pxdfiles': ['_libs/hashtable', '_libs/missing', '_libs/khash'], + 'depends': (['pandas/_libs/src/klib/khash_python.h'] + + _pxi_dep['hashtable'])}, + '_libs.index': { + 'pyxfile': '_libs/index', + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['index'], + 'sources': np_datetime_sources}, + '_libs.indexing': { + 'pyxfile': '_libs/indexing'}, + '_libs.internals': { + 'pyxfile': '_libs/internals'}, + '_libs.interval': { + 'pyxfile': '_libs/interval', + 'pxdfiles': ['_libs/hashtable'], + 'depends': _pxi_dep['interval']}, + '_libs.join': { + 'pyxfile': '_libs/join', + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['join']}, + '_libs.lib': { + 'pyxfile': '_libs/lib', + 'pxdfiles': ['_libs/src/util', + '_libs/missing', + '_libs/tslibs/conversion'], + 'depends': lib_depends + tseries_depends}, + '_libs.missing': { + 'pyxfile': '_libs/missing', + 'pxdfiles': ['_libs/src/util'], + 'depends': tseries_depends}, + '_libs.parsers': { + 'pyxfile': '_libs/parsers', + 'depends': ['pandas/_libs/src/parser/tokenizer.h', + 'pandas/_libs/src/parser/io.h', + 'pandas/_libs/src/numpy_helper.h'], + 'sources': ['pandas/_libs/src/parser/tokenizer.c', + 'pandas/_libs/src/parser/io.c']}, + '_libs.reduction': { + 'pyxfile': '_libs/reduction', + 'pxdfiles': ['_libs/src/util']}, + '_libs.ops': { + 'pyxfile': '_libs/ops', + 'pxdfiles': ['_libs/src/util', + '_libs/missing']}, + '_libs.tslibs.period': { + 'pyxfile': '_libs/tslibs/period', + 'pxdfiles': ['_libs/src/util', + '_libs/missing', + '_libs/tslibs/ccalendar', + '_libs/tslibs/timedeltas', + '_libs/tslibs/timezones', + '_libs/tslibs/nattype'], + 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], + 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, + '_libs.properties': { + 'pyxfile': '_libs/properties', + 'include': []}, + '_libs.reshape': { + 'pyxfile': '_libs/reshape', + 'depends': _pxi_dep['reshape']}, + '_libs.skiplist': { + 'pyxfile': '_libs/skiplist', + 'depends': ['pandas/_libs/src/skiplist.h']}, + '_libs.sparse': { + 'pyxfile': '_libs/sparse', + 'depends': _pxi_dep['sparse']}, + '_libs.tslib': { + 'pyxfile': '_libs/tslib', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/conversion', + '_libs/tslibs/timedeltas', + '_libs/tslibs/timestamps', + '_libs/tslibs/timezones', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.ccalendar': { + 'pyxfile': '_libs/tslibs/ccalendar'}, + '_libs.tslibs.conversion': { + 'pyxfile': '_libs/tslibs/conversion', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/nattype', + '_libs/tslibs/timezones', + '_libs/tslibs/timedeltas'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.fields': { + 'pyxfile': '_libs/tslibs/fields', + 'pxdfiles': ['_libs/tslibs/ccalendar', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.frequencies': { + 'pyxfile': '_libs/tslibs/frequencies', + 'pxdfiles': ['_libs/src/util']}, + '_libs.tslibs.nattype': { + 'pyxfile': '_libs/tslibs/nattype', + 'pxdfiles': ['_libs/src/util']}, + '_libs.tslibs.np_datetime': { + 'pyxfile': '_libs/tslibs/np_datetime', + 'depends': np_datetime_headers, + 'sources': np_datetime_sources}, + '_libs.tslibs.offsets': { + 'pyxfile': '_libs/tslibs/offsets', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/conversion', + '_libs/tslibs/frequencies', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.parsing': { + 'pyxfile': '_libs/tslibs/parsing', + 'pxdfiles': ['_libs/src/util']}, + '_libs.tslibs.resolution': { + 'pyxfile': '_libs/tslibs/resolution', + 'pxdfiles': ['_libs/src/util', + '_libs/khash', + '_libs/tslibs/frequencies', + '_libs/tslibs/timezones'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.strptime': { + 'pyxfile': '_libs/tslibs/strptime', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.timedeltas': { + 'pyxfile': '_libs/tslibs/timedeltas', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/nattype'], + 'depends': np_datetime_headers, + 'sources': np_datetime_sources}, + '_libs.tslibs.timestamps': { + 'pyxfile': '_libs/tslibs/timestamps', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/ccalendar', + '_libs/tslibs/conversion', + '_libs/tslibs/nattype', + '_libs/tslibs/timedeltas', + '_libs/tslibs/timezones'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.timezones': { + 'pyxfile': '_libs/tslibs/timezones', + 'pxdfiles': ['_libs/src/util']}, + '_libs.testing': { + 'pyxfile': '_libs/testing'}, + '_libs.window': { + 'pyxfile': '_libs/window', + 'pxdfiles': ['_libs/skiplist', '_libs/src/util'], + 'language': 'c++', + 'suffix': '.cpp'}, + '_libs.writers': { + 'pyxfile': '_libs/writers', + 'pxdfiles': ['_libs/src/util']}, + 'io.sas._sas': { + 'pyxfile': 'io/sas/sas'}, + 'io.msgpack._packer': { + 'macros': endian_macro, + 'depends': ['pandas/_libs/src/msgpack/pack.h', + 'pandas/_libs/src/msgpack/pack_template.h'], + 'include': ['pandas/_libs/src/msgpack'] + common_include, + 'language': 'c++', + 'suffix': '.cpp', + 'pyxfile': 'io/msgpack/_packer', + 'subdir': 'io/msgpack'}, + 'io.msgpack._unpacker': { + 'depends': ['pandas/_libs/src/msgpack/unpack.h', + 'pandas/_libs/src/msgpack/unpack_define.h', + 'pandas/_libs/src/msgpack/unpack_template.h'], + 'macros': endian_macro, + 'include': ['pandas/_libs/src/msgpack'] + common_include, + 'language': 'c++', + 'suffix': '.cpp', + 'pyxfile': 'io/msgpack/_unpacker', + 'subdir': 'io/msgpack' + } +} extensions = [] for name, data in ext_data.items(): - sources = [srcpath(data['pyxfile'], suffix=suffix, subdir='')] + source_suffix = suffix if suffix == '.pyx' else data.get('suffix', '.c') + + sources = [srcpath(data['pyxfile'], suffix=source_suffix, subdir='')] + pxds = [pxd(x) for x in data.get('pxdfiles', [])] if suffix == '.pyx' and pxds: sources.extend(pxds) @@ -532,77 +681,44 @@ def pxd(name): include = data.get('include', common_include) - obj = Extension('pandas.%s' % name, + obj = Extension('pandas.{name}'.format(name=name), sources=sources, depends=data.get('depends', []), include_dirs=include, + language=data.get('language', 'c'), + define_macros=data.get('macros', []), extra_compile_args=extra_compile_args) extensions.append(obj) - -#---------------------------------------------------------------------- -# msgpack - -if sys.byteorder == 'big': - macros = [('__BIG_ENDIAN__', '1')] -else: - macros = [('__LITTLE_ENDIAN__', '1')] - -packer_ext = Extension('pandas.msgpack._packer', - depends=['pandas/src/msgpack/pack.h', - 'pandas/src/msgpack/pack_template.h'], - sources = [srcpath('_packer', - suffix=suffix if suffix == '.pyx' else '.cpp', - subdir='msgpack')], - language='c++', - include_dirs=['pandas/src/msgpack'] + common_include, - define_macros=macros, - extra_compile_args=extra_compile_args) -unpacker_ext = Extension('pandas.msgpack._unpacker', - depends=['pandas/src/msgpack/unpack.h', - 'pandas/src/msgpack/unpack_define.h', - 'pandas/src/msgpack/unpack_template.h'], - sources = [srcpath('_unpacker', - suffix=suffix if suffix == '.pyx' else '.cpp', - subdir='msgpack')], - language='c++', - include_dirs=['pandas/src/msgpack'] + common_include, - define_macros=macros, - extra_compile_args=extra_compile_args) -extensions.append(packer_ext) -extensions.append(unpacker_ext) - -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # ujson -if suffix == '.pyx' and 'setuptools' in sys.modules: +if suffix == '.pyx': # undo dumb setuptools bug clobbering .pyx sources back to .c for ext in extensions: - if ext.sources[0].endswith(('.c','.cpp')): + if ext.sources[0].endswith(('.c', '.cpp')): root, _ = os.path.splitext(ext.sources[0]) ext.sources[0] = root + suffix -ujson_ext = Extension('pandas.json', - depends=['pandas/src/ujson/lib/ultrajson.h', - 'pandas/src/datetime_helper.h', - 'pandas/src/numpy_helper.h'], - sources=['pandas/src/ujson/python/ujson.c', - 'pandas/src/ujson/python/objToJSON.c', - 'pandas/src/ujson/python/JSONtoObj.c', - 'pandas/src/ujson/lib/ultrajsonenc.c', - 'pandas/src/ujson/lib/ultrajsondec.c', - 'pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c'], - include_dirs=['pandas/src/ujson/python', - 'pandas/src/ujson/lib', - 'pandas/src/datetime'] + common_include, - extra_compile_args=['-D_GNU_SOURCE'] + extra_compile_args) +ujson_ext = Extension('pandas._libs.json', + depends=['pandas/_libs/src/ujson/lib/ultrajson.h'], + sources=(['pandas/_libs/src/ujson/python/ujson.c', + 'pandas/_libs/src/ujson/python/objToJSON.c', + 'pandas/_libs/src/ujson/python/JSONtoObj.c', + 'pandas/_libs/src/ujson/lib/ultrajsonenc.c', + 'pandas/_libs/src/ujson/lib/ultrajsondec.c'] + + np_datetime_sources), + include_dirs=['pandas/_libs/src/ujson/python', + 'pandas/_libs/src/ujson/lib', + 'pandas/_libs/src/datetime'], + extra_compile_args=(['-D_GNU_SOURCE'] + + extra_compile_args)) extensions.append(ujson_ext) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # util # extension for pseudo-safely moving bytes into mutable buffers _move_ext = Extension('pandas.util._move', @@ -610,86 +726,18 @@ def pxd(name): sources=['pandas/util/move.c']) extensions.append(_move_ext) - -if _have_setuptools: - setuptools_kwargs["test_suite"] = "nose.collector" - # The build cache system does string matching below this point. # if you change something, be careful. setup(name=DISTNAME, maintainer=AUTHOR, version=versioneer.get_version(), - packages=['pandas', - 'pandas.api', - 'pandas.api.tests', - 'pandas.api.types', - 'pandas.compat', - 'pandas.compat.numpy', - 'pandas.computation', - 'pandas.computation.tests', - 'pandas.core', - 'pandas.indexes', - 'pandas.io', - 'pandas.io.json', - 'pandas.io.sas', - 'pandas.formats', - 'pandas.sparse', - 'pandas.sparse.tests', - 'pandas.stats', - 'pandas.util', - 'pandas.tests', - 'pandas.tests.frame', - 'pandas.tests.indexes', - 'pandas.tests.indexes.datetimes', - 'pandas.tests.indexes.timedeltas', - 'pandas.tests.indexes.period', - 'pandas.tests.groupby', - 'pandas.tests.series', - 'pandas.tests.formats', - 'pandas.tests.scalar', - 'pandas.tests.tseries', - 'pandas.tests.types', - 'pandas.tests.test_msgpack', - 'pandas.tests.plotting', - 'pandas.tools', - 'pandas.tools.tests', - 'pandas.tseries', - 'pandas.types', - 'pandas.io.tests', - 'pandas.io.tests.json', - 'pandas.io.tests.parser', - 'pandas.io.tests.sas', - 'pandas.msgpack', - 'pandas.util.clipboard' - ], - package_data={'pandas.io': ['tests/data/legacy_hdf/*.h5', - 'tests/data/legacy_pickle/*/*.pickle', - 'tests/data/legacy_msgpack/*/*.msgpack', - 'tests/data/*.csv*', - 'tests/data/*.dta', - 'tests/data/*.pickle', - 'tests/data/*.txt', - 'tests/data/*.xls', - 'tests/data/*.xlsx', - 'tests/data/*.xlsm', - 'tests/data/*.table', - 'tests/parser/data/*.csv', - 'tests/parser/data/*.gz', - 'tests/parser/data/*.bz2', - 'tests/parser/data/*.txt', - 'tests/sas/data/*.csv', - 'tests/sas/data/*.xpt', - 'tests/sas/data/*.sas7bdat', - 'tests/data/*.html', - 'tests/data/html_encoding/*.html', - 'tests/json/data/*.json'], - 'pandas.tools': ['tests/data/*.csv'], - 'pandas.tests': ['data/*.csv'], - 'pandas.tests.formats': ['data/*.csv'], - 'pandas.tests.indexes': ['data/*.pickle'], - 'pandas.tests.tseries': ['data/*.pickle'] - }, + packages=find_packages(include=['pandas', 'pandas.*']), + package_data={'': ['data/*', 'templates/*'], + 'pandas.tests.io': ['data/legacy_hdf/*.h5', + 'data/legacy_pickle/*/*.pickle', + 'data/legacy_msgpack/*/*.msgpack', + 'data/html_encoding/*.html']}, ext_modules=extensions, maintainer_email=EMAIL, description=DESCRIPTION, diff --git a/test.bat b/test.bat index 7f9244abb2bc8..e07c84f257a69 100644 --- a/test.bat +++ b/test.bat @@ -1,4 +1,3 @@ :: test on windows -:: nosetests --exe -A "not slow and not network and not disabled" pandas %* -pytest pandas +pytest --skip-slow --skip-network pandas -n 2 -r sxX --strict %* diff --git a/test.sh b/test.sh index 23c7ff52d2ce9..1255a39816f78 100755 --- a/test.sh +++ b/test.sh @@ -1,4 +1,4 @@ #!/bin/sh command -v coverage >/dev/null && coverage erase command -v python-coverage >/dev/null && python-coverage erase -pytest pandas --cov=pandas +pytest pandas --cov=pandas -r sxX --strict diff --git a/test_fast.bat b/test_fast.bat new file mode 100644 index 0000000000000..81f30dd310e28 --- /dev/null +++ b/test_fast.bat @@ -0,0 +1,3 @@ +:: test on windows +set PYTHONHASHSEED=314159265 +pytest --skip-slow --skip-network -m "not single" -n 4 -r sXX --strict pandas diff --git a/test_fast.sh b/test_fast.sh index 0b394cffa3d74..1fb55e581d292 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -1,2 +1,8 @@ -# nosetests -A "not slow and not network" pandas --with-id $* -pytest pandas --skip-slow +#!/bin/bash + +# Workaround for pytest-xdist flaky collection order +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') + +pytest pandas --skip-slow --skip-network -m "not single" -n 4 -r sxX --strict "$@" diff --git a/test_perf.sh b/test_perf.sh deleted file mode 100755 index 022de25bca8fc..0000000000000 --- a/test_perf.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -CURDIR=$(pwd) -BASEDIR=$(cd "$(dirname "$0")"; pwd) -python "$BASEDIR"/vb_suite/test_perf.py $@ diff --git a/tox.ini b/tox.ini index 85c5d90fde7fb..f055251581a93 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py27, py34, py35 +envlist = py27, py35, py36 [testenv] deps = @@ -19,6 +19,7 @@ deps = xlrd six sqlalchemy + moto # cd to anything but the default {toxinidir} which # contains the pandas subdirectory and confuses @@ -49,14 +50,14 @@ deps = bigquery {[testenv]deps} -[testenv:py34] +[testenv:py35] deps = - numpy==1.8.0 + numpy==1.10.0 {[testenv]deps} -[testenv:py35] +[testenv:py36] deps = - numpy==1.10.0 + numpy {[testenv]deps} [testenv:openpyxl1] diff --git a/vb_suite/.gitignore b/vb_suite/.gitignore deleted file mode 100644 index cc110f04e1225..0000000000000 --- a/vb_suite/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -benchmarks.db -build/* -source/vbench/* -source/*.rst \ No newline at end of file diff --git a/vb_suite/attrs_caching.py b/vb_suite/attrs_caching.py deleted file mode 100644 index a7e3ed7094ed6..0000000000000 --- a/vb_suite/attrs_caching.py +++ /dev/null @@ -1,20 +0,0 @@ -from vbench.benchmark import Benchmark - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# DataFrame.index / columns property lookup time - -setup = common_setup + """ -df = DataFrame(np.random.randn(10, 6)) -cur_index = df.index -""" -stmt = "foo = df.index" - -getattr_dataframe_index = Benchmark(stmt, setup, - name="getattr_dataframe_index") - -stmt = "df.index = cur_index" -setattr_dataframe_index = Benchmark(stmt, setup, - name="setattr_dataframe_index") diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py deleted file mode 100644 index 7c821374a83ab..0000000000000 --- a/vb_suite/binary_ops.py +++ /dev/null @@ -1,199 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -SECTION = 'Binary ops' - -#---------------------------------------------------------------------- -# binary ops - -#---------------------------------------------------------------------- -# add - -setup = common_setup + """ -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -""" -frame_add = \ - Benchmark("df + df2", setup, name='frame_add', - start_date=datetime(2012, 1, 1)) - -setup = common_setup + """ -import pandas.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_numexpr_threads(1) -""" - -frame_add_st = \ - Benchmark("df + df2", setup, name='frame_add_st',cleanup="expr.set_numexpr_threads()", - start_date=datetime(2013, 2, 26)) - -setup = common_setup + """ -import pandas.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_use_numexpr(False) -""" -frame_add_no_ne = \ - Benchmark("df + df2", setup, name='frame_add_no_ne',cleanup="expr.set_use_numexpr(True)", - start_date=datetime(2013, 2, 26)) - -#---------------------------------------------------------------------- -# mult - -setup = common_setup + """ -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -""" -frame_mult = \ - Benchmark("df * df2", setup, name='frame_mult', - start_date=datetime(2012, 1, 1)) - -setup = common_setup + """ -import pandas.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_numexpr_threads(1) -""" -frame_mult_st = \ - Benchmark("df * df2", setup, name='frame_mult_st',cleanup="expr.set_numexpr_threads()", - start_date=datetime(2013, 2, 26)) - -setup = common_setup + """ -import pandas.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_use_numexpr(False) -""" -frame_mult_no_ne = \ - Benchmark("df * df2", setup, name='frame_mult_no_ne',cleanup="expr.set_use_numexpr(True)", - start_date=datetime(2013, 2, 26)) - -#---------------------------------------------------------------------- -# division - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 1000)) -""" -frame_float_div_by_zero = \ - Benchmark("df / 0", setup, name='frame_float_div_by_zero') - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 1000)) -""" -frame_float_floor_by_zero = \ - Benchmark("df // 0", setup, name='frame_float_floor_by_zero') - -setup = common_setup + """ -df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000))) -""" -frame_int_div_by_zero = \ - Benchmark("df / 0", setup, name='frame_int_div_by_zero') - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 1000)) -df2 = DataFrame(np.random.randn(1000, 1000)) -""" -frame_float_div = \ - Benchmark("df // df2", setup, name='frame_float_div') - -#---------------------------------------------------------------------- -# modulo - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 1000)) -df2 = DataFrame(np.random.randn(1000, 1000)) -""" -frame_float_mod = \ - Benchmark("df / df2", setup, name='frame_float_mod') - -setup = common_setup + """ -df = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000))) -df2 = DataFrame(np.random.random_integers(np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(1000, 1000))) -""" -frame_int_mod = \ - Benchmark("df / df2", setup, name='frame_int_mod') - -#---------------------------------------------------------------------- -# multi and - -setup = common_setup + """ -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -""" -frame_multi_and = \ - Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and', - start_date=datetime(2012, 1, 1)) - -setup = common_setup + """ -import pandas.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_numexpr_threads(1) -""" -frame_multi_and_st = \ - Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and_st',cleanup="expr.set_numexpr_threads()", - start_date=datetime(2013, 2, 26)) - -setup = common_setup + """ -import pandas.computation.expressions as expr -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -expr.set_use_numexpr(False) -""" -frame_multi_and_no_ne = \ - Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and_no_ne',cleanup="expr.set_use_numexpr(True)", - start_date=datetime(2013, 2, 26)) - -#---------------------------------------------------------------------- -# timeseries - -setup = common_setup + """ -N = 1000000 -halfway = N // 2 - 1 -s = Series(date_range('20010101', periods=N, freq='T')) -ts = s[halfway] -""" - -timestamp_series_compare = Benchmark("ts >= s", setup, - start_date=datetime(2013, 9, 27)) -series_timestamp_compare = Benchmark("s <= ts", setup, - start_date=datetime(2012, 2, 21)) - -setup = common_setup + """ -N = 1000000 -s = Series(date_range('20010101', periods=N, freq='s')) -""" - -timestamp_ops_diff1 = Benchmark("s.diff()", setup, - start_date=datetime(2013, 1, 1)) -timestamp_ops_diff2 = Benchmark("s-s.shift()", setup, - start_date=datetime(2013, 1, 1)) - -#---------------------------------------------------------------------- -# timeseries with tz - -setup = common_setup + """ -N = 10000 -halfway = N // 2 - 1 -s = Series(date_range('20010101', periods=N, freq='T', tz='US/Eastern')) -ts = s[halfway] -""" - -timestamp_tz_series_compare = Benchmark("ts >= s", setup, - start_date=datetime(2013, 9, 27)) -series_timestamp_tz_compare = Benchmark("s <= ts", setup, - start_date=datetime(2012, 2, 21)) - -setup = common_setup + """ -N = 10000 -s = Series(date_range('20010101', periods=N, freq='s', tz='US/Eastern')) -""" - -timestamp_tz_ops_diff1 = Benchmark("s.diff()", setup, - start_date=datetime(2013, 1, 1)) -timestamp_tz_ops_diff2 = Benchmark("s-s.shift()", setup, - start_date=datetime(2013, 1, 1)) diff --git a/vb_suite/categoricals.py b/vb_suite/categoricals.py deleted file mode 100644 index a08d479df20cb..0000000000000 --- a/vb_suite/categoricals.py +++ /dev/null @@ -1,16 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# Series constructors - -setup = common_setup + """ -s = pd.Series(list('aabbcd') * 1000000).astype('category') -""" - -concat_categorical = \ - Benchmark("concat([s, s])", setup=setup, name='concat_categorical', - start_date=datetime(year=2015, month=7, day=15)) diff --git a/vb_suite/ctors.py b/vb_suite/ctors.py deleted file mode 100644 index 8123322383f0a..0000000000000 --- a/vb_suite/ctors.py +++ /dev/null @@ -1,39 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# Series constructors - -setup = common_setup + """ -data = np.random.randn(100) -index = Index(np.arange(100)) -""" - -ctor_series_ndarray = \ - Benchmark("Series(data, index=index)", setup=setup, - name='series_constructor_ndarray') - -setup = common_setup + """ -arr = np.random.randn(100, 100) -""" - -ctor_frame_ndarray = \ - Benchmark("DataFrame(arr)", setup=setup, - name='frame_constructor_ndarray') - -setup = common_setup + """ -data = np.array(['foo', 'bar', 'baz'], dtype=object) -""" - -ctor_index_array_string = Benchmark('Index(data)', setup=setup) - -# index constructors -setup = common_setup + """ -s = Series([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]*1000) -""" -index_from_series_ctor = Benchmark('Index(s)', setup=setup) - -dtindex_from_series_ctor = Benchmark('DatetimeIndex(s)', setup=setup) diff --git a/vb_suite/eval.py b/vb_suite/eval.py deleted file mode 100644 index bf80aad956184..0000000000000 --- a/vb_suite/eval.py +++ /dev/null @@ -1,150 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -import pandas as pd -df = DataFrame(np.random.randn(20000, 100)) -df2 = DataFrame(np.random.randn(20000, 100)) -df3 = DataFrame(np.random.randn(20000, 100)) -df4 = DataFrame(np.random.randn(20000, 100)) -""" - -setup = common_setup + """ -import pandas.computation.expressions as expr -expr.set_numexpr_threads(1) -""" - -SECTION = 'Eval' - -#---------------------------------------------------------------------- -# binary ops - -#---------------------------------------------------------------------- -# add -eval_frame_add_all_threads = \ - Benchmark("pd.eval('df + df2 + df3 + df4')", common_setup, - name='eval_frame_add_all_threads', - start_date=datetime(2013, 7, 21)) - - - -eval_frame_add_one_thread = \ - Benchmark("pd.eval('df + df2 + df3 + df4')", setup, - name='eval_frame_add_one_thread', - start_date=datetime(2013, 7, 26)) - -eval_frame_add_python = \ - Benchmark("pd.eval('df + df2 + df3 + df4', engine='python')", common_setup, - name='eval_frame_add_python', start_date=datetime(2013, 7, 21)) - -eval_frame_add_python_one_thread = \ - Benchmark("pd.eval('df + df2 + df3 + df4', engine='python')", setup, - name='eval_frame_add_python_one_thread', - start_date=datetime(2013, 7, 26)) -#---------------------------------------------------------------------- -# mult - -eval_frame_mult_all_threads = \ - Benchmark("pd.eval('df * df2 * df3 * df4')", common_setup, - name='eval_frame_mult_all_threads', - start_date=datetime(2013, 7, 21)) - -eval_frame_mult_one_thread = \ - Benchmark("pd.eval('df * df2 * df3 * df4')", setup, - name='eval_frame_mult_one_thread', - start_date=datetime(2013, 7, 26)) - -eval_frame_mult_python = \ - Benchmark("pd.eval('df * df2 * df3 * df4', engine='python')", - common_setup, - name='eval_frame_mult_python', start_date=datetime(2013, 7, 21)) - -eval_frame_mult_python_one_thread = \ - Benchmark("pd.eval('df * df2 * df3 * df4', engine='python')", setup, - name='eval_frame_mult_python_one_thread', - start_date=datetime(2013, 7, 26)) - -#---------------------------------------------------------------------- -# multi and - -eval_frame_and_all_threads = \ - Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')", - common_setup, - name='eval_frame_and_all_threads', - start_date=datetime(2013, 7, 21)) - -eval_frame_and_one_thread = \ - Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')", setup, - name='eval_frame_and_one_thread', - start_date=datetime(2013, 7, 26)) - -eval_frame_and_python = \ - Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')", - common_setup, name='eval_frame_and_python', - start_date=datetime(2013, 7, 21)) - -eval_frame_and_one_thread = \ - Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')", - setup, - name='eval_frame_and_python_one_thread', - start_date=datetime(2013, 7, 26)) - -#-------------------------------------------------------------------- -# chained comp -eval_frame_chained_cmp_all_threads = \ - Benchmark("pd.eval('df < df2 < df3 < df4')", common_setup, - name='eval_frame_chained_cmp_all_threads', - start_date=datetime(2013, 7, 21)) - -eval_frame_chained_cmp_one_thread = \ - Benchmark("pd.eval('df < df2 < df3 < df4')", setup, - name='eval_frame_chained_cmp_one_thread', - start_date=datetime(2013, 7, 26)) - -eval_frame_chained_cmp_python = \ - Benchmark("pd.eval('df < df2 < df3 < df4', engine='python')", - common_setup, name='eval_frame_chained_cmp_python', - start_date=datetime(2013, 7, 26)) - -eval_frame_chained_cmp_one_thread = \ - Benchmark("pd.eval('df < df2 < df3 < df4', engine='python')", setup, - name='eval_frame_chained_cmp_python_one_thread', - start_date=datetime(2013, 7, 26)) - - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -N = 1000000 -halfway = N // 2 - 1 -index = date_range('20010101', periods=N, freq='T') -s = Series(index) -ts = s.iloc[halfway] -""" - -series_setup = setup + """ -df = DataFrame({'dates': s.values}) -""" - -query_datetime_series = Benchmark("df.query('dates < @ts')", - series_setup, - start_date=datetime(2013, 9, 27)) - -index_setup = setup + """ -df = DataFrame({'a': np.random.randn(N)}, index=index) -""" - -query_datetime_index = Benchmark("df.query('index < @ts')", - index_setup, start_date=datetime(2013, 9, 27)) - -setup = setup + """ -N = 1000000 -df = DataFrame({'a': np.random.randn(N)}) -min_val = df['a'].min() -max_val = df['a'].max() -""" - -query_with_boolean_selection = Benchmark("df.query('(a >= @min_val) & (a <= @max_val)')", - setup, start_date=datetime(2013, 9, 27)) - diff --git a/vb_suite/frame_ctor.py b/vb_suite/frame_ctor.py deleted file mode 100644 index 0d57da7b88d3b..0000000000000 --- a/vb_suite/frame_ctor.py +++ /dev/null @@ -1,123 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime -try: - import pandas.tseries.offsets as offsets -except: - import pandas.core.datetools as offsets - -common_setup = """from .pandas_vb_common import * -try: - from pandas.tseries.offsets import * -except: - from pandas.core.datetools import * -""" - -#---------------------------------------------------------------------- -# Creation from nested dict - -setup = common_setup + """ -N, K = 5000, 50 -index = tm.makeStringIndex(N) -columns = tm.makeStringIndex(K) -frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) - -try: - data = frame.to_dict() -except: - data = frame.toDict() - -some_dict = data.values()[0] -dict_list = [dict(zip(columns, row)) for row in frame.values] -""" - -frame_ctor_nested_dict = Benchmark("DataFrame(data)", setup) - -# From JSON-like stuff -frame_ctor_list_of_dict = Benchmark("DataFrame(dict_list)", setup, - start_date=datetime(2011, 12, 20)) - -series_ctor_from_dict = Benchmark("Series(some_dict)", setup) - -# nested dict, integer indexes, regression described in #621 -setup = common_setup + """ -data = dict((i,dict((j,float(j)) for j in range(100))) for i in xrange(2000)) -""" -frame_ctor_nested_dict_int64 = Benchmark("DataFrame(data)", setup) - -# dynamically generate benchmarks for every offset -# -# get_period_count & get_index_for_offset are there because blindly taking each -# offset times 1000 can easily go out of Timestamp bounds and raise errors. -dynamic_benchmarks = {} -n_steps = [1, 2] -offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1}, - 'LastWeekOfMonth': {'weekday': 1, 'week': 1}, - 'FY5253': {'startingMonth': 1, 'weekday': 1}, - 'FY5253Quarter': {'qtr_with_extra_week': 1, 'startingMonth': 1, 'weekday': 1}} - -offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']}, - 'FY5253Quarter': {'variation': ['nearest', 'last']}} - -for offset in offsets.__all__: - for n in n_steps: - kwargs = {} - if offset in offset_kwargs: - kwargs = offset_kwargs[offset] - - if offset in offset_extra_cases: - extras = offset_extra_cases[offset] - else: - extras = {'': ['']} - - for extra_arg in extras: - for extra in extras[extra_arg]: - if extra: - kwargs[extra_arg] = extra - setup = common_setup + """ - -def get_period_count(start_date, off): - ten_offsets_in_days = ((start_date + off * 10) - start_date).days - if ten_offsets_in_days == 0: - return 1000 - else: - return min(9 * ((Timestamp.max - start_date).days // - ten_offsets_in_days), - 1000) - -def get_index_for_offset(off): - start_date = Timestamp('1/1/1900') - return date_range(start_date, - periods=min(1000, get_period_count(start_date, off)), - freq=off) - -idx = get_index_for_offset({}({}, **{})) -df = DataFrame(np.random.randn(len(idx),10), index=idx) -d = dict([ (col,df[col]) for col in df.columns ]) -""".format(offset, n, kwargs) - key = 'frame_ctor_dtindex_{}x{}'.format(offset, n) - if extra: - key += '__{}_{}'.format(extra_arg, extra) - dynamic_benchmarks[key] = Benchmark("DataFrame(d)", setup, name=key) - -# Have to stuff them in globals() so vbench detects them -globals().update(dynamic_benchmarks) - -# from a mi-series -setup = common_setup + """ -mi = MultiIndex.from_tuples([(x,y) for x in range(100) for y in range(100)]) -s = Series(randn(10000), index=mi) -""" -frame_from_series = Benchmark("DataFrame(s)", setup) - -#---------------------------------------------------------------------- -# get_numeric_data - -setup = common_setup + """ -df = DataFrame(randn(10000, 25)) -df['foo'] = 'bar' -df['bar'] = 'baz' -df = df.consolidate() -""" - -frame_get_numeric_data = Benchmark('df._get_numeric_data()', setup, - start_date=datetime(2011, 11, 1)) diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py deleted file mode 100644 index 46343e9c607fd..0000000000000 --- a/vb_suite/frame_methods.py +++ /dev/null @@ -1,525 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# lookup - -setup = common_setup + """ -df = DataFrame(np.random.randn(10000, 8), columns=list('abcdefgh')) -df['foo'] = 'bar' - -row_labels = list(df.index[::10])[:900] -col_labels = list(df.columns) * 100 -row_labels_all = np.array(list(df.index) * len(df.columns), dtype='object') -col_labels_all = np.array(list(df.columns) * len(df.index), dtype='object') -""" - -frame_fancy_lookup = Benchmark('df.lookup(row_labels, col_labels)', setup, - start_date=datetime(2012, 1, 12)) - -frame_fancy_lookup_all = Benchmark('df.lookup(row_labels_all, col_labels_all)', - setup, - start_date=datetime(2012, 1, 12)) - -#---------------------------------------------------------------------- -# fillna in place - -setup = common_setup + """ -df = DataFrame(randn(10000, 100)) -df.values[::2] = np.nan -""" - -frame_fillna_inplace = Benchmark('df.fillna(0, inplace=True)', setup, - start_date=datetime(2012, 4, 4)) - - -#---------------------------------------------------------------------- -# reindex both axes - -setup = common_setup + """ -df = DataFrame(randn(10000, 10000)) -idx = np.arange(4000, 7000) -""" - -frame_reindex_axis0 = Benchmark('df.reindex(idx)', setup) - -frame_reindex_axis1 = Benchmark('df.reindex(columns=idx)', setup) - -frame_reindex_both_axes = Benchmark('df.reindex(index=idx, columns=idx)', - setup, start_date=datetime(2011, 1, 1)) - -frame_reindex_both_axes_ix = Benchmark('df.ix[idx, idx]', setup, - start_date=datetime(2011, 1, 1)) - -#---------------------------------------------------------------------- -# reindex with upcasts -setup = common_setup + """ -df=DataFrame(dict([(c, { - 0: randint(0, 2, 1000).astype(np.bool_), - 1: randint(0, 1000, 1000).astype(np.int16), - 2: randint(0, 1000, 1000).astype(np.int32), - 3: randint(0, 1000, 1000).astype(np.int64) - }[randint(0, 4)]) for c in range(1000)])) -""" - -frame_reindex_upcast = Benchmark('df.reindex(permutation(range(1200)))', setup) - -#---------------------------------------------------------------------- -# boolean indexing - -setup = common_setup + """ -df = DataFrame(randn(10000, 100)) -bool_arr = np.zeros(10000, dtype=bool) -bool_arr[:1000] = True -""" - -frame_boolean_row_select = Benchmark('df[bool_arr]', setup, - start_date=datetime(2011, 1, 1)) - -#---------------------------------------------------------------------- -# iteritems (monitor no-copying behaviour) - -setup = common_setup + """ -df = DataFrame(randn(10000, 1000)) -df2 = DataFrame(randn(3000,1),columns=['A']) -df3 = DataFrame(randn(3000,1)) - -def f(): - if hasattr(df, '_item_cache'): - df._item_cache.clear() - for name, col in df.iteritems(): - pass - -def g(): - for name, col in df.iteritems(): - pass - -def h(): - for i in range(10000): - df2['A'] - -def j(): - for i in range(10000): - df3[0] - -""" - -# as far back as the earliest test currently in the suite -frame_iteritems = Benchmark('f()', setup, - start_date=datetime(2010, 6, 1)) - -frame_iteritems_cached = Benchmark('g()', setup, - start_date=datetime(2010, 6, 1)) - -frame_getitem_single_column = Benchmark('h()', setup, - start_date=datetime(2010, 6, 1)) - -frame_getitem_single_column2 = Benchmark('j()', setup, - start_date=datetime(2010, 6, 1)) - -#---------------------------------------------------------------------- -# assignment - -setup = common_setup + """ -idx = date_range('1/1/2000', periods=100000, freq='D') -df = DataFrame(randn(100000, 1),columns=['A'],index=idx) -def f(df): - x = df.copy() - x['date'] = x.index -""" - -frame_assign_timeseries_index = Benchmark('f(df)', setup, - start_date=datetime(2013, 10, 1)) - - -#---------------------------------------------------------------------- -# to_string - -setup = common_setup + """ -df = DataFrame(randn(100, 10)) -""" - -frame_to_string_floats = Benchmark('df.to_string()', setup, - start_date=datetime(2010, 6, 1)) - -#---------------------------------------------------------------------- -# to_html - -setup = common_setup + """ -nrows=500 -df = DataFrame(randn(nrows, 10)) -df[0]=period_range("2000","2010",nrows) -df[1]=range(nrows) - -""" - -frame_to_html_mixed = Benchmark('df.to_html()', setup, - start_date=datetime(2011, 11, 18)) - - -# truncated repr_html, single index - -setup = common_setup + """ -nrows=10000 -data=randn(nrows,10) -idx=MultiIndex.from_arrays(np.tile(randn(3,nrows/100),100)) -df=DataFrame(data,index=idx) - -""" - -frame_html_repr_trunc_mi = Benchmark('df._repr_html_()', setup, - start_date=datetime(2013, 11, 25)) - -# truncated repr_html, MultiIndex - -setup = common_setup + """ -nrows=10000 -data=randn(nrows,10) -idx=randn(nrows) -df=DataFrame(data,index=idx) - -""" - -frame_html_repr_trunc_si = Benchmark('df._repr_html_()', setup, - start_date=datetime(2013, 11, 25)) - - -# insert many columns - -setup = common_setup + """ -N = 1000 - -def f(K=500): - df = DataFrame(index=range(N)) - new_col = np.random.randn(N) - for i in range(K): - df[i] = new_col -""" - -frame_insert_500_columns_end = Benchmark('f()', setup, start_date=datetime(2011, 1, 1)) - -setup = common_setup + """ -N = 1000 - -def f(K=100): - df = DataFrame(index=range(N)) - new_col = np.random.randn(N) - for i in range(K): - df.insert(0,i,new_col) -""" - -frame_insert_100_columns_begin = Benchmark('f()', setup, start_date=datetime(2011, 1, 1)) - -#---------------------------------------------------------------------- -# strings methods, #2602 - -setup = common_setup + """ -s = Series(['abcdefg', np.nan]*500000) -""" - -series_string_vector_slice = Benchmark('s.str[:5]', setup, - start_date=datetime(2012, 8, 1)) - -#---------------------------------------------------------------------- -# df.info() and get_dtype_counts() # 2807 - -setup = common_setup + """ -df = pandas.DataFrame(np.random.randn(10,10000)) -""" - -frame_get_dtype_counts = Benchmark('df.get_dtype_counts()', setup, - start_date=datetime(2012, 8, 1)) - -## -setup = common_setup + """ -df = pandas.DataFrame(np.random.randn(10,10000)) -""" - -frame_repr_wide = Benchmark('repr(df)', setup, - start_date=datetime(2012, 8, 1)) - -## -setup = common_setup + """ -df = pandas.DataFrame(np.random.randn(10000, 10)) -""" - -frame_repr_tall = Benchmark('repr(df)', setup, - start_date=datetime(2012, 8, 1)) - -## -setup = common_setup + """ -df = DataFrame(randn(100000, 1)) -""" - -frame_xs_row = Benchmark('df.xs(50000)', setup) - -## -setup = common_setup + """ -df = DataFrame(randn(1,100000)) -""" - -frame_xs_col = Benchmark('df.xs(50000,axis = 1)', setup) - -#---------------------------------------------------------------------- -# nulls/masking - -## masking -setup = common_setup + """ -data = np.random.randn(1000, 500) -df = DataFrame(data) -df = df.where(df > 0) # create nans -bools = df > 0 -mask = isnull(df) -""" - -frame_mask_bools = Benchmark('bools.mask(mask)', setup, - start_date=datetime(2013,1,1)) - -frame_mask_floats = Benchmark('bools.astype(float).mask(mask)', setup, - start_date=datetime(2013,1,1)) - -## isnull -setup = common_setup + """ -data = np.random.randn(1000, 1000) -df = DataFrame(data) -""" -frame_isnull = Benchmark('isnull(df)', setup, - start_date=datetime(2012,1,1)) - -## dropna -dropna_setup = common_setup + """ -data = np.random.randn(10000, 1000) -df = DataFrame(data) -df.ix[50:1000,20:50] = np.nan -df.ix[2000:3000] = np.nan -df.ix[:,60:70] = np.nan -""" -frame_dropna_axis0_any = Benchmark('df.dropna(how="any",axis=0)', dropna_setup, - start_date=datetime(2012,1,1)) -frame_dropna_axis0_all = Benchmark('df.dropna(how="all",axis=0)', dropna_setup, - start_date=datetime(2012,1,1)) - -frame_dropna_axis1_any = Benchmark('df.dropna(how="any",axis=1)', dropna_setup, - start_date=datetime(2012,1,1)) - -frame_dropna_axis1_all = Benchmark('df.dropna(how="all",axis=1)', dropna_setup, - start_date=datetime(2012,1,1)) - -# dropna on mixed dtypes -dropna_mixed_setup = common_setup + """ -data = np.random.randn(10000, 1000) -df = DataFrame(data) -df.ix[50:1000,20:50] = np.nan -df.ix[2000:3000] = np.nan -df.ix[:,60:70] = np.nan -df['foo'] = 'bar' -""" -frame_dropna_axis0_any_mixed_dtypes = Benchmark('df.dropna(how="any",axis=0)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) -frame_dropna_axis0_all_mixed_dtypes = Benchmark('df.dropna(how="all",axis=0)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -frame_dropna_axis1_any_mixed_dtypes = Benchmark('df.dropna(how="any",axis=1)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -frame_dropna_axis1_all_mixed_dtypes = Benchmark('df.dropna(how="all",axis=1)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -## dropna multi -dropna_setup = common_setup + """ -data = np.random.randn(10000, 1000) -df = DataFrame(data) -df.ix[50:1000,20:50] = np.nan -df.ix[2000:3000] = np.nan -df.ix[:,60:70] = np.nan -df.index = MultiIndex.from_tuples(df.index.map(lambda x: (x, x))) -df.columns = MultiIndex.from_tuples(df.columns.map(lambda x: (x, x))) -""" -frame_count_level_axis0_multi = Benchmark('df.count(axis=0, level=1)', dropna_setup, - start_date=datetime(2012,1,1)) - -frame_count_level_axis1_multi = Benchmark('df.count(axis=1, level=1)', dropna_setup, - start_date=datetime(2012,1,1)) - -# dropna on mixed dtypes -dropna_mixed_setup = common_setup + """ -data = np.random.randn(10000, 1000) -df = DataFrame(data) -df.ix[50:1000,20:50] = np.nan -df.ix[2000:3000] = np.nan -df.ix[:,60:70] = np.nan -df['foo'] = 'bar' -df.index = MultiIndex.from_tuples(df.index.map(lambda x: (x, x))) -df.columns = MultiIndex.from_tuples(df.columns.map(lambda x: (x, x))) -""" -frame_count_level_axis0_mixed_dtypes_multi = Benchmark('df.count(axis=0, level=1)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -frame_count_level_axis1_mixed_dtypes_multi = Benchmark('df.count(axis=1, level=1)', dropna_mixed_setup, - start_date=datetime(2012,1,1)) - -#---------------------------------------------------------------------- -# apply - -setup = common_setup + """ -s = Series(np.arange(1028.)) -df = DataFrame({ i:s for i in range(1028) }) -""" -frame_apply_user_func = Benchmark('df.apply(lambda x: np.corrcoef(x,s)[0,1])', setup, - name = 'frame_apply_user_func', - start_date=datetime(2012,1,1)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,100)) -""" -frame_apply_lambda_mean = Benchmark('df.apply(lambda x: x.sum())', setup, - name = 'frame_apply_lambda_mean', - start_date=datetime(2012,1,1)) -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,100)) -""" -frame_apply_np_mean = Benchmark('df.apply(np.mean)', setup, - name = 'frame_apply_np_mean', - start_date=datetime(2012,1,1)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,100)) -""" -frame_apply_pass_thru = Benchmark('df.apply(lambda x: x)', setup, - name = 'frame_apply_pass_thru', - start_date=datetime(2012,1,1)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,100)) -""" -frame_apply_axis_1 = Benchmark('df.apply(lambda x: x+1,axis=1)', setup, - name = 'frame_apply_axis_1', - start_date=datetime(2012,1,1)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,3),columns=list('ABC')) -""" -frame_apply_ref_by_name = Benchmark('df.apply(lambda x: x["A"] + x["B"],axis=1)', setup, - name = 'frame_apply_ref_by_name', - start_date=datetime(2012,1,1)) - -#---------------------------------------------------------------------- -# dtypes - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000,1000)) -""" -frame_dtypes = Benchmark('df.dtypes', setup, - start_date=datetime(2012,1,1)) - -#---------------------------------------------------------------------- -# equals -setup = common_setup + """ -def make_pair(frame): - df = frame - df2 = df.copy() - df2.ix[-1,-1] = np.nan - return df, df2 - -def test_equal(name): - df, df2 = pairs[name] - return df.equals(df) - -def test_unequal(name): - df, df2 = pairs[name] - return df.equals(df2) - -float_df = DataFrame(np.random.randn(1000, 1000)) -object_df = DataFrame([['foo']*1000]*1000) -nonunique_cols = object_df.copy() -nonunique_cols.columns = ['A']*len(nonunique_cols.columns) - -pairs = dict([(name, make_pair(frame)) - for name, frame in (('float_df', float_df), ('object_df', object_df), ('nonunique_cols', nonunique_cols))]) -""" -frame_float_equal = Benchmark('test_equal("float_df")', setup) -frame_object_equal = Benchmark('test_equal("object_df")', setup) -frame_nonunique_equal = Benchmark('test_equal("nonunique_cols")', setup) - -frame_float_unequal = Benchmark('test_unequal("float_df")', setup) -frame_object_unequal = Benchmark('test_unequal("object_df")', setup) -frame_nonunique_unequal = Benchmark('test_unequal("nonunique_cols")', setup) - -#----------------------------------------------------------------------------- -# interpolate -# this is the worst case, where every column has NaNs. -setup = common_setup + """ -df = DataFrame(randn(10000, 100)) -df.values[::2] = np.nan -""" - -frame_interpolate = Benchmark('df.interpolate()', setup, - start_date=datetime(2014, 2, 7)) - -setup = common_setup + """ -df = DataFrame({'A': np.arange(0, 10000), - 'B': np.random.randint(0, 100, 10000), - 'C': randn(10000), - 'D': randn(10000)}) -df.loc[1::5, 'A'] = np.nan -df.loc[1::5, 'C'] = np.nan -""" - -frame_interpolate_some_good = Benchmark('df.interpolate()', setup, - start_date=datetime(2014, 2, 7)) -frame_interpolate_some_good_infer = Benchmark('df.interpolate(downcast="infer")', - setup, - start_date=datetime(2014, 2, 7)) - - -#------------------------------------------------------------------------- -# frame shift speedup issue-5609 - -setup = common_setup + """ -df = DataFrame(np.random.rand(10000,500)) -# note: df._data.blocks are f_contigous -""" -frame_shift_axis0 = Benchmark('df.shift(1,axis=0)', setup, - start_date=datetime(2014,1,1)) -frame_shift_axis1 = Benchmark('df.shift(1,axis=1)', setup, - name = 'frame_shift_axis_1', - start_date=datetime(2014,1,1)) - - -#----------------------------------------------------------------------------- -# from_records issue-6700 - -setup = common_setup + """ -def get_data(n=100000): - return ((x, x*20, x*100) for x in range(n)) -""" - -frame_from_records_generator = Benchmark('df = DataFrame.from_records(get_data())', - setup, - name='frame_from_records_generator', - start_date=datetime(2013,10,4)) # issue-4911 - -frame_from_records_generator_nrows = Benchmark('df = DataFrame.from_records(get_data(), nrows=1000)', - setup, - name='frame_from_records_generator_nrows', - start_date=datetime(2013,10,04)) # issue-4911 - -#----------------------------------------------------------------------------- -# duplicated - -setup = common_setup + ''' -n = 1 << 20 - -t = date_range('2015-01-01', freq='S', periods=n // 64) -xs = np.random.randn(n // 64).round(2) - -df = DataFrame({'a':np.random.randint(- 1 << 8, 1 << 8, n), - 'b':np.random.choice(t, n), - 'c':np.random.choice(xs, n)}) -''' - -frame_duplicated = Benchmark('df.duplicated()', setup, - name='frame_duplicated') diff --git a/vb_suite/generate_rst_files.py b/vb_suite/generate_rst_files.py deleted file mode 100644 index 92e7cd4d59b71..0000000000000 --- a/vb_suite/generate_rst_files.py +++ /dev/null @@ -1,2 +0,0 @@ -from suite import benchmarks, generate_rst_files -generate_rst_files(benchmarks) diff --git a/vb_suite/gil.py b/vb_suite/gil.py deleted file mode 100644 index df2bd2dcd8db4..0000000000000 --- a/vb_suite/gil.py +++ /dev/null @@ -1,110 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -basic = common_setup + """ -try: - from pandas.util.testing import test_parallel - have_real_test_parallel = True -except ImportError: - have_real_test_parallel = False - def test_parallel(num_threads=1): - def wrapper(fname): - return fname - - return wrapper - -N = 1000000 -ngroups = 1000 -np.random.seed(1234) - -df = DataFrame({'key' : np.random.randint(0,ngroups,size=N), - 'data' : np.random.randn(N) }) - -if not have_real_test_parallel: - raise NotImplementedError -""" - -setup = basic + """ - -def f(): - df.groupby('key')['data'].sum() - -# run consecutivily -def g2(): - for i in range(2): - f() -def g4(): - for i in range(4): - f() -def g8(): - for i in range(8): - f() - -# run in parallel -@test_parallel(num_threads=2) -def pg2(): - f() - -@test_parallel(num_threads=4) -def pg4(): - f() - -@test_parallel(num_threads=8) -def pg8(): - f() - -""" - -nogil_groupby_sum_4 = Benchmark( - 'pg4()', setup, - start_date=datetime(2015, 1, 1)) - -nogil_groupby_sum_8 = Benchmark( - 'pg8()', setup, - start_date=datetime(2015, 1, 1)) - - -#### test all groupby funcs #### - -setup = basic + """ - -@test_parallel(num_threads=2) -def pg2(): - df.groupby('key')['data'].func() - -""" - -for f in ['sum','prod','var','count','min','max','mean','last']: - - name = "nogil_groupby_{f}_2".format(f=f) - bmark = Benchmark('pg2()', setup.replace('func',f), start_date=datetime(2015, 1, 1)) - bmark.name = name - globals()[name] = bmark - -del bmark - - -#### test take_1d #### -setup = basic + """ -from pandas.core import common as com - -N = 1e7 -df = DataFrame({'int64' : np.arange(N,dtype='int64'), - 'float64' : np.arange(N,dtype='float64')}) -indexer = np.arange(100,len(df)-100) - -@test_parallel(num_threads=2) -def take_1d_pg2_int64(): - com.take_1d(df.int64.values,indexer) - -@test_parallel(num_threads=2) -def take_1d_pg2_float64(): - com.take_1d(df.float64.values,indexer) - -""" - -nogil_take1d_float64 = Benchmark('take_1d_pg2_int64()', setup, start_date=datetime(2015, 1, 1)) -nogil_take1d_int64 = Benchmark('take_1d_pg2_float64()', setup, start_date=datetime(2015, 1, 1)) diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py deleted file mode 100644 index 268d71f864823..0000000000000 --- a/vb_suite/groupby.py +++ /dev/null @@ -1,620 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -N = 100000 -ngroups = 100 - -def get_test_data(ngroups=100, n=100000): - unique_groups = range(ngroups) - arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) - - if len(arr) < n: - arr = np.asarray(list(arr) + unique_groups[:n - len(arr)], - dtype=object) - - random.shuffle(arr) - return arr - -# aggregate multiple columns -df = DataFrame({'key1' : get_test_data(ngroups=ngroups), - 'key2' : get_test_data(ngroups=ngroups), - 'data1' : np.random.randn(N), - 'data2' : np.random.randn(N)}) -def f(): - df.groupby(['key1', 'key2']).agg(lambda x: x.values.sum()) - -simple_series = Series(np.random.randn(N)) -key1 = df['key1'] -""" - -stmt1 = "df.groupby(['key1', 'key2'])['data1'].agg(lambda x: x.values.sum())" -groupby_multi_python = Benchmark(stmt1, setup, - start_date=datetime(2011, 7, 1)) - -stmt3 = "df.groupby(['key1', 'key2']).sum()" -groupby_multi_cython = Benchmark(stmt3, setup, - start_date=datetime(2011, 7, 1)) - -stmt = "df.groupby(['key1', 'key2'])['data1'].agg(np.std)" -groupby_multi_series_op = Benchmark(stmt, setup, - start_date=datetime(2011, 8, 1)) - -groupby_series_simple_cython = \ - Benchmark('simple_series.groupby(key1).sum()', setup, - start_date=datetime(2011, 3, 1)) - - -stmt4 = "df.groupby('key1').rank(pct=True)" -groupby_series_simple_cython = Benchmark(stmt4, setup, - start_date=datetime(2014, 1, 16)) - -#---------------------------------------------------------------------- -# 2d grouping, aggregate many columns - -setup = common_setup + """ -labels = np.random.randint(0, 100, size=1000) -df = DataFrame(randn(1000, 1000)) -""" - -groupby_frame_cython_many_columns = Benchmark( - 'df.groupby(labels).sum()', setup, - start_date=datetime(2011, 8, 1), - logy=True) - -#---------------------------------------------------------------------- -# single key, long, integer key - -setup = common_setup + """ -data = np.random.randn(100000, 1) -labels = np.random.randint(0, 1000, size=100000) -df = DataFrame(data) -""" - -groupby_frame_singlekey_integer = \ - Benchmark('df.groupby(labels).sum()', setup, - start_date=datetime(2011, 8, 1), logy=True) - -#---------------------------------------------------------------------- -# group with different functions per column - -setup = common_setup + """ -fac1 = np.array(['A', 'B', 'C'], dtype='O') -fac2 = np.array(['one', 'two'], dtype='O') - -df = DataFrame({'key1': fac1.take(np.random.randint(0, 3, size=100000)), - 'key2': fac2.take(np.random.randint(0, 2, size=100000)), - 'value1' : np.random.randn(100000), - 'value2' : np.random.randn(100000), - 'value3' : np.random.randn(100000)}) -""" - -groupby_multi_different_functions = \ - Benchmark("""df.groupby(['key1', 'key2']).agg({'value1' : 'mean', - 'value2' : 'var', - 'value3' : 'sum'})""", - setup, start_date=datetime(2011, 9, 1)) - -groupby_multi_different_numpy_functions = \ - Benchmark("""df.groupby(['key1', 'key2']).agg({'value1' : np.mean, - 'value2' : np.var, - 'value3' : np.sum})""", - setup, start_date=datetime(2011, 9, 1)) - -#---------------------------------------------------------------------- -# size() speed - -setup = common_setup + """ -n = 100000 -offsets = np.random.randint(n, size=n).astype('timedelta64[ns]') -dates = np.datetime64('now') + offsets -df = DataFrame({'key1': np.random.randint(0, 500, size=n), - 'key2': np.random.randint(0, 100, size=n), - 'value1' : np.random.randn(n), - 'value2' : np.random.randn(n), - 'value3' : np.random.randn(n), - 'dates' : dates}) -""" - -groupby_multi_size = Benchmark("df.groupby(['key1', 'key2']).size()", - setup, start_date=datetime(2011, 10, 1)) - -groupby_dt_size = Benchmark("df.groupby(['dates']).size()", - setup, start_date=datetime(2011, 10, 1)) - -groupby_dt_timegrouper_size = Benchmark("df.groupby(TimeGrouper(key='dates', freq='M')).size()", - setup, start_date=datetime(2011, 10, 1)) - -#---------------------------------------------------------------------- -# count() speed - -setup = common_setup + """ -n = 10000 -offsets = np.random.randint(n, size=n).astype('timedelta64[ns]') - -dates = np.datetime64('now') + offsets -dates[np.random.rand(n) > 0.5] = np.datetime64('nat') - -offsets[np.random.rand(n) > 0.5] = np.timedelta64('nat') - -value2 = np.random.randn(n) -value2[np.random.rand(n) > 0.5] = np.nan - -obj = np.random.choice(list('ab'), size=n).astype(object) -obj[np.random.randn(n) > 0.5] = np.nan - -df = DataFrame({'key1': np.random.randint(0, 500, size=n), - 'key2': np.random.randint(0, 100, size=n), - 'dates': dates, - 'value2' : value2, - 'value3' : np.random.randn(n), - 'ints': np.random.randint(0, 1000, size=n), - 'obj': obj, - 'offsets': offsets}) -""" - -groupby_multi_count = Benchmark("df.groupby(['key1', 'key2']).count()", - setup, name='groupby_multi_count', - start_date=datetime(2014, 5, 5)) - -setup = common_setup + """ -n = 10000 - -df = DataFrame({'key1': randint(0, 500, size=n), - 'key2': randint(0, 100, size=n), - 'ints': randint(0, 1000, size=n), - 'ints2': randint(0, 1000, size=n)}) -""" - -groupby_int_count = Benchmark("df.groupby(['key1', 'key2']).count()", - setup, name='groupby_int_count', - start_date=datetime(2014, 5, 6)) -#---------------------------------------------------------------------- -# Series.value_counts - -setup = common_setup + """ -s = Series(np.random.randint(0, 1000, size=100000)) -""" - -series_value_counts_int64 = Benchmark('s.value_counts()', setup, - start_date=datetime(2011, 10, 21)) - -# value_counts on lots of strings - -setup = common_setup + """ -K = 1000 -N = 100000 -uniques = tm.makeStringIndex(K).values -s = Series(np.tile(uniques, N // K)) -""" - -series_value_counts_strings = Benchmark('s.value_counts()', setup, - start_date=datetime(2011, 10, 21)) - -#value_counts on float dtype - -setup = common_setup + """ -s = Series(np.random.randint(0, 1000, size=100000)).astype(float) -""" - -series_value_counts_float64 = Benchmark('s.value_counts()', setup, - start_date=datetime(2015, 8, 17)) - -#---------------------------------------------------------------------- -# pivot_table - -setup = common_setup + """ -fac1 = np.array(['A', 'B', 'C'], dtype='O') -fac2 = np.array(['one', 'two'], dtype='O') - -ind1 = np.random.randint(0, 3, size=100000) -ind2 = np.random.randint(0, 2, size=100000) - -df = DataFrame({'key1': fac1.take(ind1), -'key2': fac2.take(ind2), -'key3': fac2.take(ind2), -'value1' : np.random.randn(100000), -'value2' : np.random.randn(100000), -'value3' : np.random.randn(100000)}) -""" - -stmt = "df.pivot_table(index='key1', columns=['key2', 'key3'])" -groupby_pivot_table = Benchmark(stmt, setup, start_date=datetime(2011, 12, 15)) - - -#---------------------------------------------------------------------- -# dict return values - -setup = common_setup + """ -labels = np.arange(1000).repeat(10) -data = Series(randn(len(labels))) -f = lambda x: {'first': x.values[0], 'last': x.values[-1]} -""" - -groupby_apply_dict_return = Benchmark('data.groupby(labels).apply(f)', - setup, start_date=datetime(2011, 12, 15)) - -#---------------------------------------------------------------------- -# First / last functions - -setup = common_setup + """ -labels = np.arange(10000).repeat(10) -data = Series(randn(len(labels))) -data[::3] = np.nan -data[1::3] = np.nan -data2 = Series(randn(len(labels)),dtype='float32') -data2[::3] = np.nan -data2[1::3] = np.nan -labels = labels.take(np.random.permutation(len(labels))) -""" - -groupby_first_float64 = Benchmark('data.groupby(labels).first()', setup, - start_date=datetime(2012, 5, 1)) - -groupby_first_float32 = Benchmark('data2.groupby(labels).first()', setup, - start_date=datetime(2013, 1, 1)) - -groupby_last_float64 = Benchmark('data.groupby(labels).last()', setup, - start_date=datetime(2012, 5, 1)) - -groupby_last_float32 = Benchmark('data2.groupby(labels).last()', setup, - start_date=datetime(2013, 1, 1)) - -groupby_nth_float64_none = Benchmark('data.groupby(labels).nth(0)', setup, - start_date=datetime(2012, 5, 1)) -groupby_nth_float32_none = Benchmark('data2.groupby(labels).nth(0)', setup, - start_date=datetime(2013, 1, 1)) -groupby_nth_float64_any = Benchmark('data.groupby(labels).nth(0,dropna="all")', setup, - start_date=datetime(2012, 5, 1)) -groupby_nth_float32_any = Benchmark('data2.groupby(labels).nth(0,dropna="all")', setup, - start_date=datetime(2013, 1, 1)) - -# with datetimes (GH7555) -setup = common_setup + """ -df = DataFrame({'a' : date_range('1/1/2011',periods=100000,freq='s'),'b' : range(100000)}) -""" - -groupby_first_datetimes = Benchmark('df.groupby("b").first()', setup, - start_date=datetime(2013, 5, 1)) -groupby_last_datetimes = Benchmark('df.groupby("b").last()', setup, - start_date=datetime(2013, 5, 1)) -groupby_nth_datetimes_none = Benchmark('df.groupby("b").nth(0)', setup, - start_date=datetime(2013, 5, 1)) -groupby_nth_datetimes_any = Benchmark('df.groupby("b").nth(0,dropna="all")', setup, - start_date=datetime(2013, 5, 1)) - -# with object -setup = common_setup + """ -df = DataFrame({'a' : ['foo']*100000,'b' : range(100000)}) -""" - -groupby_first_object = Benchmark('df.groupby("b").first()', setup, - start_date=datetime(2013, 5, 1)) -groupby_last_object = Benchmark('df.groupby("b").last()', setup, - start_date=datetime(2013, 5, 1)) -groupby_nth_object_none = Benchmark('df.groupby("b").nth(0)', setup, - start_date=datetime(2013, 5, 1)) -groupby_nth_object_any = Benchmark('df.groupby("b").nth(0,dropna="any")', setup, - start_date=datetime(2013, 5, 1)) - -#---------------------------------------------------------------------- -# groupby_indices replacement, chop up Series - -setup = common_setup + """ -try: - rng = date_range('1/1/2000', '12/31/2005', freq='H') - year, month, day = rng.year, rng.month, rng.day -except: - rng = date_range('1/1/2000', '12/31/2000', offset=datetools.Hour()) - year = rng.map(lambda x: x.year) - month = rng.map(lambda x: x.month) - day = rng.map(lambda x: x.day) - -ts = Series(np.random.randn(len(rng)), index=rng) -""" - -groupby_indices = Benchmark('len(ts.groupby([year, month, day]))', - setup, start_date=datetime(2012, 1, 1)) - -#---------------------------------------------------------------------- -# median - -#---------------------------------------------------------------------- -# single key, long, integer key - -setup = common_setup + """ -data = np.random.randn(100000, 2) -labels = np.random.randint(0, 1000, size=100000) -df = DataFrame(data) -""" - -groupby_frame_median = \ - Benchmark('df.groupby(labels).median()', setup, - start_date=datetime(2011, 8, 1), logy=True) - - -setup = common_setup + """ -data = np.random.randn(1000000, 2) -labels = np.random.randint(0, 1000, size=1000000) -df = DataFrame(data) -""" - -groupby_simple_compress_timing = \ - Benchmark('df.groupby(labels).mean()', setup, - start_date=datetime(2011, 8, 1)) - - -#---------------------------------------------------------------------- -# DataFrame Apply overhead - -setup = common_setup + """ -N = 10000 -labels = np.random.randint(0, 2000, size=N) -labels2 = np.random.randint(0, 3, size=N) -df = DataFrame({'key': labels, -'key2': labels2, -'value1': randn(N), -'value2': ['foo', 'bar', 'baz', 'qux'] * (N / 4)}) -def f(g): - return 1 -""" - -groupby_frame_apply_overhead = Benchmark("df.groupby('key').apply(f)", setup, - start_date=datetime(2011, 10, 1)) - -groupby_frame_apply = Benchmark("df.groupby(['key', 'key2']).apply(f)", setup, - start_date=datetime(2011, 10, 1)) - - -#---------------------------------------------------------------------- -# DataFrame nth - -setup = common_setup + """ -df = DataFrame(np.random.randint(1, 100, (10000, 2))) -""" - -# Not really a fair test as behaviour has changed! -groupby_frame_nth_none = Benchmark("df.groupby(0).nth(0)", setup, - start_date=datetime(2014, 3, 1)) - -groupby_series_nth_none = Benchmark("df[1].groupby(df[0]).nth(0)", setup, - start_date=datetime(2014, 3, 1)) -groupby_frame_nth_any= Benchmark("df.groupby(0).nth(0,dropna='any')", setup, - start_date=datetime(2014, 3, 1)) - -groupby_series_nth_any = Benchmark("df[1].groupby(df[0]).nth(0,dropna='any')", setup, - start_date=datetime(2014, 3, 1)) - - -#---------------------------------------------------------------------- -# Sum booleans #2692 - -setup = common_setup + """ -N = 500 -df = DataFrame({'ii':range(N),'bb':[True for x in range(N)]}) -""" - -groupby_sum_booleans = Benchmark("df.groupby('ii').sum()", setup) - - -#---------------------------------------------------------------------- -# multi-indexed group sum #9049 - -setup = common_setup + """ -N = 50 -df = DataFrame({'A': range(N) * 2, 'B': range(N*2), 'C': 1}).set_index(["A", "B"]) -""" - -groupby_sum_multiindex = Benchmark("df.groupby(level=[0, 1]).sum()", setup) - - -#---------------------------------------------------------------------- -# Transform testing - -setup = common_setup + """ -n_dates = 400 -n_securities = 250 -n_columns = 3 -share_na = 0.1 - -dates = date_range('1997-12-31', periods=n_dates, freq='B') -dates = Index(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates)) - -secid_min = int('10000000', 16) -secid_max = int('F0000000', 16) -step = (secid_max - secid_min) // (n_securities - 1) -security_ids = map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step)) - -data_index = MultiIndex(levels=[dates.values, security_ids], - labels=[[i for i in range(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates], - names=['date', 'security_id']) -n_data = len(data_index) - -columns = Index(['factor{}'.format(i) for i in range(1, n_columns + 1)]) - -data = DataFrame(np.random.randn(n_data, n_columns), index=data_index, columns=columns) - -step = int(n_data * share_na) -for column_index in range(n_columns): - index = column_index - while index < n_data: - data.set_value(data_index[index], columns[column_index], np.nan) - index += step - -f_fillna = lambda x: x.fillna(method='pad') -""" - -groupby_transform = Benchmark("data.groupby(level='security_id').transform(f_fillna)", setup) -groupby_transform_ufunc = Benchmark("data.groupby(level='date').transform(np.max)", setup) - -setup = common_setup + """ -np.random.seed(0) - -N = 120000 -N_TRANSITIONS = 1400 - -# generate groups -transition_points = np.random.permutation(np.arange(N))[:N_TRANSITIONS] -transition_points.sort() -transitions = np.zeros((N,), dtype=np.bool) -transitions[transition_points] = True -g = transitions.cumsum() - -df = DataFrame({ 'signal' : np.random.rand(N)}) -""" -groupby_transform_series = Benchmark("df['signal'].groupby(g).transform(np.mean)", setup) - -setup = common_setup + """ -np.random.seed(0) - -df=DataFrame( { 'id' : np.arange( 100000 ) / 3, - 'val': np.random.randn( 100000) } ) -""" - -groupby_transform_series2 = Benchmark("df.groupby('id')['val'].transform(np.mean)", setup) - -setup = common_setup + ''' -np.random.seed(2718281) -n = 20000 -df = DataFrame(np.random.randint(1, n, (n, 3)), - columns=['jim', 'joe', 'jolie']) -''' - -stmt = "df.groupby(['jim', 'joe'])['jolie'].transform('max')"; -groupby_transform_multi_key1 = Benchmark(stmt, setup) -groupby_transform_multi_key2 = Benchmark(stmt, setup + "df['jim'] = df['joe']") - -setup = common_setup + ''' -np.random.seed(2718281) -n = 200000 -df = DataFrame(np.random.randint(1, n / 10, (n, 3)), - columns=['jim', 'joe', 'jolie']) -''' -groupby_transform_multi_key3 = Benchmark(stmt, setup) -groupby_transform_multi_key4 = Benchmark(stmt, setup + "df['jim'] = df['joe']") - -setup = common_setup + ''' -np.random.seed(27182) -n = 100000 -df = DataFrame(np.random.randint(1, n / 100, (n, 3)), - columns=['jim', 'joe', 'jolie']) -''' - -groupby_agg_builtins1 = Benchmark("df.groupby('jim').agg([sum, min, max])", setup) -groupby_agg_builtins2 = Benchmark("df.groupby(['jim', 'joe']).agg([sum, min, max])", setup) - - -setup = common_setup + ''' -arr = np.random.randint(- 1 << 12, 1 << 12, (1 << 17, 5)) -i = np.random.choice(len(arr), len(arr) * 5) -arr = np.vstack((arr, arr[i])) # add sume duplicate rows - -i = np.random.permutation(len(arr)) -arr = arr[i] # shuffle rows - -df = DataFrame(arr, columns=list('abcde')) -df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10 -''' - -groupby_int64_overflow = Benchmark("df.groupby(list('abcde')).max()", setup, - name='groupby_int64_overflow') - - -setup = common_setup + ''' -from itertools import product -from string import ascii_letters, digits - -n = 5 * 7 * 11 * (1 << 9) -alpha = list(map(''.join, product(ascii_letters + digits, repeat=4))) -f = lambda k: np.repeat(np.random.choice(alpha, n // k), k) - -df = DataFrame({'a': f(11), 'b': f(7), 'c': f(5), 'd': f(1)}) -df['joe'] = (np.random.randn(len(df)) * 10).round(3) - -i = np.random.permutation(len(df)) -df = df.iloc[i].reset_index(drop=True).copy() -''' - -groupby_multi_index = Benchmark("df.groupby(list('abcd')).max()", setup, - name='groupby_multi_index') - -#---------------------------------------------------------------------- -# groupby with a variable value for ngroups - - -ngroups_list = [100, 10000] -no_arg_func_list = [ - 'all', - 'any', - 'count', - 'cumcount', - 'cummax', - 'cummin', - 'cumprod', - 'cumsum', - 'describe', - 'diff', - 'first', - 'head', - 'last', - 'mad', - 'max', - 'mean', - 'median', - 'min', - 'nunique', - 'pct_change', - 'prod', - 'rank', - 'sem', - 'size', - 'skew', - 'std', - 'sum', - 'tail', - 'unique', - 'var', - 'value_counts', -] - - -_stmt_template = "df.groupby('value')['timestamp'].%s" -_setup_template = common_setup + """ -np.random.seed(1234) -ngroups = %s -size = ngroups * 2 -rng = np.arange(ngroups) -df = DataFrame(dict( - timestamp=rng.take(np.random.randint(0, ngroups, size=size)), - value=np.random.randint(0, size, size=size) -)) -""" -START_DATE = datetime(2011, 7, 1) - - -def make_large_ngroups_bmark(ngroups, func_name, func_args=''): - bmark_name = 'groupby_ngroups_%s_%s' % (ngroups, func_name) - stmt = _stmt_template % ('%s(%s)' % (func_name, func_args)) - setup = _setup_template % ngroups - bmark = Benchmark(stmt, setup, start_date=START_DATE) - # MUST set name - bmark.name = bmark_name - return bmark - - -def inject_bmark_into_globals(bmark): - if not bmark.name: - raise AssertionError('benchmark must have a name') - globals()[bmark.name] = bmark - - -for ngroups in ngroups_list: - for func_name in no_arg_func_list: - bmark = make_large_ngroups_bmark(ngroups, func_name) - inject_bmark_into_globals(bmark) - -# avoid bmark to be collected as Benchmark object -del bmark diff --git a/vb_suite/hdfstore_bench.py b/vb_suite/hdfstore_bench.py deleted file mode 100644 index 393fd4cc77e66..0000000000000 --- a/vb_suite/hdfstore_bench.py +++ /dev/null @@ -1,278 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -start_date = datetime(2012, 7, 1) - -common_setup = """from .pandas_vb_common import * -import os - -f = '__test__.h5' -def remove(f): - try: - os.remove(f) - except: - pass - -""" - -#---------------------------------------------------------------------- -# get from a store - -setup1 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000)}, - index=index) -remove(f) -store = HDFStore(f) -store.put('df1',df) -""" - -read_store = Benchmark("store.get('df1')", setup1, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a store - -setup2 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000)}, - index=index) -remove(f) -store = HDFStore(f) -""" - -write_store = Benchmark( - "store.put('df2',df)", setup2, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# get from a store (mixed) - -setup3 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000), - 'string1' : ['foo'] * 25000, - 'bool1' : [True] * 25000, - 'int1' : np.random.randint(0, 250000, size=25000)}, - index=index) -remove(f) -store = HDFStore(f) -store.put('df3',df) -""" - -read_store_mixed = Benchmark( - "store.get('df3')", setup3, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a store (mixed) - -setup4 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000), - 'string1' : ['foo'] * 25000, - 'bool1' : [True] * 25000, - 'int1' : np.random.randint(0, 250000, size=25000)}, - index=index) -remove(f) -store = HDFStore(f) -""" - -write_store_mixed = Benchmark( - "store.put('df4',df)", setup4, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# get from a table (mixed) - -setup5 = common_setup + """ -N=10000 -index = tm.makeStringIndex(N) -df = DataFrame({'float1' : randn(N), - 'float2' : randn(N), - 'string1' : ['foo'] * N, - 'bool1' : [True] * N, - 'int1' : np.random.randint(0, N, size=N)}, - index=index) - -remove(f) -store = HDFStore(f) -store.append('df5',df) -""" - -read_store_table_mixed = Benchmark( - "store.select('df5')", setup5, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a table (mixed) - -setup6 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000), - 'string1' : ['foo'] * 25000, - 'bool1' : [True] * 25000, - 'int1' : np.random.randint(0, 25000, size=25000)}, - index=index) -remove(f) -store = HDFStore(f) -""" - -write_store_table_mixed = Benchmark( - "store.append('df6',df)", setup6, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# select from a table - -setup7 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000) }, - index=index) - -remove(f) -store = HDFStore(f) -store.append('df7',df) -""" - -read_store_table = Benchmark( - "store.select('df7')", setup7, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a table - -setup8 = common_setup + """ -index = tm.makeStringIndex(25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000) }, - index=index) -remove(f) -store = HDFStore(f) -""" - -write_store_table = Benchmark( - "store.append('df8',df)", setup8, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# get from a table (wide) - -setup9 = common_setup + """ -df = DataFrame(np.random.randn(25000,100)) - -remove(f) -store = HDFStore(f) -store.append('df9',df) -""" - -read_store_table_wide = Benchmark( - "store.select('df9')", setup9, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a table (wide) - -setup10 = common_setup + """ -df = DataFrame(np.random.randn(25000,100)) - -remove(f) -store = HDFStore(f) -""" - -write_store_table_wide = Benchmark( - "store.append('df10',df)", setup10, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# get from a table (wide) - -setup11 = common_setup + """ -index = date_range('1/1/2000', periods = 25000) -df = DataFrame(np.random.randn(25000,100), index = index) - -remove(f) -store = HDFStore(f) -store.append('df11',df) -""" - -query_store_table_wide = Benchmark( - "store.select('df11', [ ('index', '>', df.index[10000]), ('index', '<', df.index[15000]) ])", setup11, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# query from a table - -setup12 = common_setup + """ -index = date_range('1/1/2000', periods = 25000) -df = DataFrame({'float1' : randn(25000), - 'float2' : randn(25000) }, - index=index) - -remove(f) -store = HDFStore(f) -store.append('df12',df) -""" - -query_store_table = Benchmark( - "store.select('df12', [ ('index', '>', df.index[10000]), ('index', '<', df.index[15000]) ])", setup12, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# select from a panel table - -setup13 = common_setup + """ -p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in range(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in range(25) ]) - -remove(f) -store = HDFStore(f) -store.append('p1',p) -""" - -read_store_table_panel = Benchmark( - "store.select('p1')", setup13, cleanup="store.close()", - start_date=start_date) - - -#---------------------------------------------------------------------- -# write to a panel table - -setup14 = common_setup + """ -p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in range(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in range(25) ]) - -remove(f) -store = HDFStore(f) -""" - -write_store_table_panel = Benchmark( - "store.append('p2',p)", setup14, cleanup="store.close()", - start_date=start_date) - -#---------------------------------------------------------------------- -# write to a table (data_columns) - -setup15 = common_setup + """ -df = DataFrame(np.random.randn(10000,10),columns = [ 'C%03d' % i for i in range(10) ]) - -remove(f) -store = HDFStore(f) -""" - -write_store_table_dc = Benchmark( - "store.append('df15',df,data_columns=True)", setup15, cleanup="store.close()", - start_date=start_date) - diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py deleted file mode 100644 index 2ab2bc15f3853..0000000000000 --- a/vb_suite/index_object.py +++ /dev/null @@ -1,173 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -SECTION = "Index / MultiIndex objects" - - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# intersection, union - -setup = common_setup + """ -rng = DatetimeIndex(start='1/1/2000', periods=10000, freq=datetools.Minute()) -if rng.dtype == object: - rng = rng.view(Index) -else: - rng = rng.asobject -rng2 = rng[:-1] -""" - -index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup) -index_datetime_union = Benchmark("rng.union(rng2)", setup) - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=10000, freq='T') -rng2 = rng[:-1] -""" - -datetime_index_intersection = Benchmark("rng.intersection(rng2)", setup, - start_date=datetime(2013, 9, 27)) -datetime_index_union = Benchmark("rng.union(rng2)", setup, - start_date=datetime(2013, 9, 27)) - -# integers -setup = common_setup + """ -N = 1000000 -options = np.arange(N) - -left = Index(options.take(np.random.permutation(N)[:N // 2])) -right = Index(options.take(np.random.permutation(N)[:N // 2])) -""" - -index_int64_union = Benchmark('left.union(right)', setup, - start_date=datetime(2011, 1, 1)) - -index_int64_intersection = Benchmark('left.intersection(right)', setup, - start_date=datetime(2011, 1, 1)) - -#---------------------------------------------------------------------- -# string index slicing -setup = common_setup + """ -idx = tm.makeStringIndex(1000000) - -mask = np.arange(1000000) % 3 == 0 -series_mask = Series(mask) -""" -index_str_slice_indexer_basic = Benchmark('idx[:-1]', setup) -index_str_slice_indexer_even = Benchmark('idx[::2]', setup) -index_str_boolean_indexer = Benchmark('idx[mask]', setup) -index_str_boolean_series_indexer = Benchmark('idx[series_mask]', setup) - -#---------------------------------------------------------------------- -# float64 index -#---------------------------------------------------------------------- -# construction -setup = common_setup + """ -baseidx = np.arange(1e6) -""" - -index_float64_construct = Benchmark('Index(baseidx)', setup, - name='index_float64_construct', - start_date=datetime(2014, 4, 13)) - -setup = common_setup + """ -idx = tm.makeFloatIndex(1000000) - -mask = np.arange(idx.size) % 3 == 0 -series_mask = Series(mask) -""" -#---------------------------------------------------------------------- -# getting -index_float64_get = Benchmark('idx[1]', setup, name='index_float64_get', - start_date=datetime(2014, 4, 13)) - - -#---------------------------------------------------------------------- -# slicing -index_float64_slice_indexer_basic = Benchmark('idx[:-1]', setup, - name='index_float64_slice_indexer_basic', - start_date=datetime(2014, 4, 13)) -index_float64_slice_indexer_even = Benchmark('idx[::2]', setup, - name='index_float64_slice_indexer_even', - start_date=datetime(2014, 4, 13)) -index_float64_boolean_indexer = Benchmark('idx[mask]', setup, - name='index_float64_boolean_indexer', - start_date=datetime(2014, 4, 13)) -index_float64_boolean_series_indexer = Benchmark('idx[series_mask]', setup, - name='index_float64_boolean_series_indexer', - start_date=datetime(2014, 4, 13)) - -#---------------------------------------------------------------------- -# arith ops -index_float64_mul = Benchmark('idx * 2', setup, name='index_float64_mul', - start_date=datetime(2014, 4, 13)) -index_float64_div = Benchmark('idx / 2', setup, name='index_float64_div', - start_date=datetime(2014, 4, 13)) - - -# Constructing MultiIndex from cartesian product of iterables -# - -setup = common_setup + """ -iterables = [tm.makeStringIndex(10000), range(20)] -""" - -multiindex_from_product = Benchmark('MultiIndex.from_product(iterables)', - setup, name='multiindex_from_product', - start_date=datetime(2014, 6, 30)) - -#---------------------------------------------------------------------- -# MultiIndex with DatetimeIndex level - -setup = common_setup + """ -level1 = range(1000) -level2 = date_range(start='1/1/2012', periods=100) -mi = MultiIndex.from_product([level1, level2]) -""" - -multiindex_with_datetime_level_full = \ - Benchmark("mi.copy().values", setup, - name='multiindex_with_datetime_level_full', - start_date=datetime(2014, 10, 11)) - - -multiindex_with_datetime_level_sliced = \ - Benchmark("mi[:10].values", setup, - name='multiindex_with_datetime_level_sliced', - start_date=datetime(2014, 10, 11)) - -# multi-index duplicated -setup = common_setup + """ -n, k = 200, 5000 -levels = [np.arange(n), tm.makeStringIndex(n).values, 1000 + np.arange(n)] -labels = [np.random.choice(n, k * n) for lev in levels] -mi = MultiIndex(levels=levels, labels=labels) -""" - -multiindex_duplicated = Benchmark('mi.duplicated()', setup, - name='multiindex_duplicated') - -#---------------------------------------------------------------------- -# repr - -setup = common_setup + """ -dr = pd.date_range('20000101', freq='D', periods=100000) -""" - -datetime_index_repr = \ - Benchmark("dr._is_dates_only", setup, - start_date=datetime(2012, 1, 11)) - -setup = common_setup + """ -n = 3 * 5 * 7 * 11 * (1 << 10) -low, high = - 1 << 12, 1 << 12 -f = lambda k: np.repeat(np.random.randint(low, high, n // k), k) - -i = np.random.permutation(n) -mi = MultiIndex.from_arrays([f(11), f(7), f(5), f(3), f(1)])[i] -""" - -multiindex_sortlevel_int64 = Benchmark('mi.sortlevel()', setup, - name='multiindex_sortlevel_int64') diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py deleted file mode 100644 index 3d95d52dccd71..0000000000000 --- a/vb_suite/indexing.py +++ /dev/null @@ -1,292 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -SECTION = 'Indexing and scalar value access' - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# Series.__getitem__, get_value, __getitem__(slice) - -setup = common_setup + """ -tm.N = 1000 -ts = tm.makeTimeSeries() -dt = ts.index[500] -""" -statement = "ts[dt]" -bm_getitem = Benchmark(statement, setup, ncalls=100000, - name='time_series_getitem_scalar') - -setup = common_setup + """ -index = tm.makeStringIndex(1000) -s = Series(np.random.rand(1000), index=index) -idx = index[100] -""" -statement = "s.get_value(idx)" -bm_get_value = Benchmark(statement, setup, - name='series_get_value', - start_date=datetime(2011, 11, 12)) - - -setup = common_setup + """ -index = tm.makeStringIndex(1000000) -s = Series(np.random.rand(1000000), index=index) -""" -series_getitem_pos_slice = Benchmark("s[:800000]", setup, - name="series_getitem_pos_slice") - - -setup = common_setup + """ -index = tm.makeStringIndex(1000000) -s = Series(np.random.rand(1000000), index=index) -lbl = s.index[800000] -""" -series_getitem_label_slice = Benchmark("s[:lbl]", setup, - name="series_getitem_label_slice") - - -#---------------------------------------------------------------------- -# DataFrame __getitem__ - -setup = common_setup + """ -index = tm.makeStringIndex(1000) -columns = tm.makeStringIndex(30) -df = DataFrame(np.random.rand(1000, 30), index=index, - columns=columns) -idx = index[100] -col = columns[10] -""" -statement = "df[col][idx]" -bm_df_getitem = Benchmark(statement, setup, - name='dataframe_getitem_scalar') - -setup = common_setup + """ -try: - klass = DataMatrix -except: - klass = DataFrame - -index = tm.makeStringIndex(1000) -columns = tm.makeStringIndex(30) -df = klass(np.random.rand(1000, 30), index=index, columns=columns) -idx = index[100] -col = columns[10] -""" -statement = "df[col][idx]" -bm_df_getitem2 = Benchmark(statement, setup, - name='datamatrix_getitem_scalar') - - -#---------------------------------------------------------------------- -# ix get scalar - -setup = common_setup + """ -index = tm.makeStringIndex(1000) -columns = tm.makeStringIndex(30) -df = DataFrame(np.random.randn(1000, 30), index=index, columns=columns) -idx = index[100] -col = columns[10] -""" - -indexing_frame_get_value_ix = Benchmark("df.ix[idx,col]", setup, - name='indexing_frame_get_value_ix', - start_date=datetime(2011, 11, 12)) - -indexing_frame_get_value = Benchmark("df.get_value(idx,col)", setup, - name='indexing_frame_get_value', - start_date=datetime(2011, 11, 12)) - -setup = common_setup + """ -mi = MultiIndex.from_tuples([(x,y) for x in range(1000) for y in range(1000)]) -s = Series(np.random.randn(1000000), index=mi) -""" - -series_xs_mi_ix = Benchmark("s.ix[999]", setup, - name='series_xs_mi_ix', - start_date=datetime(2013, 1, 1)) - -setup = common_setup + """ -mi = MultiIndex.from_tuples([(x,y) for x in range(1000) for y in range(1000)]) -s = Series(np.random.randn(1000000), index=mi) -df = DataFrame(s) -""" - -frame_xs_mi_ix = Benchmark("df.ix[999]", setup, - name='frame_xs_mi_ix', - start_date=datetime(2013, 1, 1)) - -#---------------------------------------------------------------------- -# Boolean DataFrame row selection - -setup = common_setup + """ -df = DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D']) -indexer = df['B'] > 0 -obj_indexer = indexer.astype('O') -""" -indexing_dataframe_boolean_rows = \ - Benchmark("df[indexer]", setup, name='indexing_dataframe_boolean_rows') - -indexing_dataframe_boolean_rows_object = \ - Benchmark("df[obj_indexer]", setup, - name='indexing_dataframe_boolean_rows_object') - -setup = common_setup + """ -df = DataFrame(np.random.randn(50000, 100)) -df2 = DataFrame(np.random.randn(50000, 100)) -""" -indexing_dataframe_boolean = \ - Benchmark("df > df2", setup, name='indexing_dataframe_boolean', - start_date=datetime(2012, 1, 1)) - -setup = common_setup + """ -try: - import pandas.computation.expressions as expr -except: - expr = None - -if expr is None: - raise NotImplementedError -df = DataFrame(np.random.randn(50000, 100)) -df2 = DataFrame(np.random.randn(50000, 100)) -expr.set_numexpr_threads(1) -""" - -indexing_dataframe_boolean_st = \ - Benchmark("df > df2", setup, name='indexing_dataframe_boolean_st',cleanup="expr.set_numexpr_threads()", - start_date=datetime(2013, 2, 26)) - - -setup = common_setup + """ -try: - import pandas.computation.expressions as expr -except: - expr = None - -if expr is None: - raise NotImplementedError -df = DataFrame(np.random.randn(50000, 100)) -df2 = DataFrame(np.random.randn(50000, 100)) -expr.set_use_numexpr(False) -""" - -indexing_dataframe_boolean_no_ne = \ - Benchmark("df > df2", setup, name='indexing_dataframe_boolean_no_ne',cleanup="expr.set_use_numexpr(True)", - start_date=datetime(2013, 2, 26)) -#---------------------------------------------------------------------- -# MultiIndex sortlevel - -setup = common_setup + """ -a = np.repeat(np.arange(100), 1000) -b = np.tile(np.arange(1000), 100) -midx = MultiIndex.from_arrays([a, b]) -midx = midx.take(np.random.permutation(np.arange(100000))) -""" -sort_level_zero = Benchmark("midx.sortlevel(0)", setup, - start_date=datetime(2012, 1, 1)) -sort_level_one = Benchmark("midx.sortlevel(1)", setup, - start_date=datetime(2012, 1, 1)) - -#---------------------------------------------------------------------- -# Panel subset selection - -setup = common_setup + """ -p = Panel(np.random.randn(100, 100, 100)) -inds = range(0, 100, 10) -""" - -indexing_panel_subset = Benchmark('p.ix[inds, inds, inds]', setup, - start_date=datetime(2012, 1, 1)) - -#---------------------------------------------------------------------- -# Iloc - -setup = common_setup + """ -df = DataFrame({'A' : [0.1] * 3000, 'B' : [1] * 3000}) -idx = np.array(range(30)) * 99 -df2 = DataFrame({'A' : [0.1] * 1000, 'B' : [1] * 1000}) -df2 = concat([df2, 2*df2, 3*df2]) -""" - -frame_iloc_dups = Benchmark('df2.iloc[idx]', setup, - start_date=datetime(2013, 1, 1)) - -frame_loc_dups = Benchmark('df2.loc[idx]', setup, - start_date=datetime(2013, 1, 1)) - -setup = common_setup + """ -df = DataFrame(dict( A = [ 'foo'] * 1000000)) -""" - -frame_iloc_big = Benchmark('df.iloc[:100,0]', setup, - start_date=datetime(2013, 1, 1)) - -#---------------------------------------------------------------------- -# basic tests for [], .loc[], .iloc[] and .ix[] - -setup = common_setup + """ -s = Series(np.random.rand(1000000)) -""" - -series_getitem_scalar = Benchmark("s[800000]", setup) -series_getitem_slice = Benchmark("s[:800000]", setup) -series_getitem_list_like = Benchmark("s[[800000]]", setup) -series_getitem_array = Benchmark("s[np.arange(10000)]", setup) - -series_loc_scalar = Benchmark("s.loc[800000]", setup) -series_loc_slice = Benchmark("s.loc[:800000]", setup) -series_loc_list_like = Benchmark("s.loc[[800000]]", setup) -series_loc_array = Benchmark("s.loc[np.arange(10000)]", setup) - -series_iloc_scalar = Benchmark("s.iloc[800000]", setup) -series_iloc_slice = Benchmark("s.iloc[:800000]", setup) -series_iloc_list_like = Benchmark("s.iloc[[800000]]", setup) -series_iloc_array = Benchmark("s.iloc[np.arange(10000)]", setup) - -series_ix_scalar = Benchmark("s.ix[800000]", setup) -series_ix_slice = Benchmark("s.ix[:800000]", setup) -series_ix_list_like = Benchmark("s.ix[[800000]]", setup) -series_ix_array = Benchmark("s.ix[np.arange(10000)]", setup) - - -# multi-index slicing -setup = common_setup + """ -np.random.seed(1234) -idx=pd.IndexSlice -n=100000 -mdt = pandas.DataFrame() -mdt['A'] = np.random.choice(range(10000,45000,1000), n) -mdt['B'] = np.random.choice(range(10,400), n) -mdt['C'] = np.random.choice(range(1,150), n) -mdt['D'] = np.random.choice(range(10000,45000), n) -mdt['x'] = np.random.choice(range(400), n) -mdt['y'] = np.random.choice(range(25), n) - - -test_A = 25000 -test_B = 25 -test_C = 40 -test_D = 35000 - -eps_A = 5000 -eps_B = 5 -eps_C = 5 -eps_D = 5000 -mdt2 = mdt.set_index(['A','B','C','D']).sortlevel() -""" - -multiindex_slicers = Benchmark('mdt2.loc[idx[test_A-eps_A:test_A+eps_A,test_B-eps_B:test_B+eps_B,test_C-eps_C:test_C+eps_C,test_D-eps_D:test_D+eps_D],:]', setup, - start_date=datetime(2015, 1, 1)) - -#---------------------------------------------------------------------- -# take - -setup = common_setup + """ -s = Series(np.random.rand(100000)) -ts = Series(np.random.rand(100000), - index=date_range('2011-01-01', freq='S', periods=100000)) -indexer = [True, False, True, True, False] * 20000 -""" - -series_take_intindex = Benchmark("s.take(indexer)", setup) -series_take_dtindex = Benchmark("ts.take(indexer)", setup) diff --git a/vb_suite/inference.py b/vb_suite/inference.py deleted file mode 100644 index aaa51aa5163ce..0000000000000 --- a/vb_suite/inference.py +++ /dev/null @@ -1,36 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime -import sys - -# from GH 7332 - -setup = """from .pandas_vb_common import * -import pandas as pd -N = 500000 -df_int64 = DataFrame(dict(A = np.arange(N,dtype='int64'), B = np.arange(N,dtype='int64'))) -df_int32 = DataFrame(dict(A = np.arange(N,dtype='int32'), B = np.arange(N,dtype='int32'))) -df_uint32 = DataFrame(dict(A = np.arange(N,dtype='uint32'), B = np.arange(N,dtype='uint32'))) -df_float64 = DataFrame(dict(A = np.arange(N,dtype='float64'), B = np.arange(N,dtype='float64'))) -df_float32 = DataFrame(dict(A = np.arange(N,dtype='float32'), B = np.arange(N,dtype='float32'))) -df_datetime64 = DataFrame(dict(A = pd.to_datetime(np.arange(N,dtype='int64'),unit='ms'), - B = pd.to_datetime(np.arange(N,dtype='int64'),unit='ms'))) -df_timedelta64 = DataFrame(dict(A = df_datetime64['A']-df_datetime64['B'], - B = df_datetime64['B'])) -""" - -dtype_infer_int64 = Benchmark('df_int64["A"] + df_int64["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_int32 = Benchmark('df_int32["A"] + df_int32["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_uint32 = Benchmark('df_uint32["A"] + df_uint32["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_float64 = Benchmark('df_float64["A"] + df_float64["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_float32 = Benchmark('df_float32["A"] + df_float32["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_datetime64 = Benchmark('df_datetime64["A"] - df_datetime64["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_timedelta64_1 = Benchmark('df_timedelta64["A"] + df_timedelta64["B"]', setup, - start_date=datetime(2014, 1, 1)) -dtype_infer_timedelta64_2 = Benchmark('df_timedelta64["A"] + df_timedelta64["A"]', setup, - start_date=datetime(2014, 1, 1)) diff --git a/vb_suite/io_bench.py b/vb_suite/io_bench.py deleted file mode 100644 index af5f6076515cc..0000000000000 --- a/vb_suite/io_bench.py +++ /dev/null @@ -1,150 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -from io import StringIO -""" - -#---------------------------------------------------------------------- -# read_csv - -setup1 = common_setup + """ -index = tm.makeStringIndex(10000) -df = DataFrame({'float1' : randn(10000), - 'float2' : randn(10000), - 'string1' : ['foo'] * 10000, - 'bool1' : [True] * 10000, - 'int1' : np.random.randint(0, 100000, size=10000)}, - index=index) -df.to_csv('__test__.csv') -""" - -read_csv_standard = Benchmark("read_csv('__test__.csv')", setup1, - start_date=datetime(2011, 9, 15)) - -#---------------------------------- -# skiprows - -setup1 = common_setup + """ -index = tm.makeStringIndex(20000) -df = DataFrame({'float1' : randn(20000), - 'float2' : randn(20000), - 'string1' : ['foo'] * 20000, - 'bool1' : [True] * 20000, - 'int1' : np.random.randint(0, 200000, size=20000)}, - index=index) -df.to_csv('__test__.csv') -""" - -read_csv_skiprows = Benchmark("read_csv('__test__.csv', skiprows=10000)", setup1, - start_date=datetime(2011, 9, 15)) - -#---------------------------------------------------------------------- -# write_csv - -setup2 = common_setup + """ -index = tm.makeStringIndex(10000) -df = DataFrame({'float1' : randn(10000), - 'float2' : randn(10000), - 'string1' : ['foo'] * 10000, - 'bool1' : [True] * 10000, - 'int1' : np.random.randint(0, 100000, size=10000)}, - index=index) -""" - -write_csv_standard = Benchmark("df.to_csv('__test__.csv')", setup2, - start_date=datetime(2011, 9, 15)) - -#---------------------------------- -setup = common_setup + """ -df = DataFrame(np.random.randn(3000, 30)) -""" -frame_to_csv = Benchmark("df.to_csv('__test__.csv')", setup, - start_date=datetime(2011, 1, 1)) -#---------------------------------- - -setup = common_setup + """ -df=DataFrame({'A':range(50000)}) -df['B'] = df.A + 1.0 -df['C'] = df.A + 2.0 -df['D'] = df.A + 3.0 -""" -frame_to_csv2 = Benchmark("df.to_csv('__test__.csv')", setup, - start_date=datetime(2011, 1, 1)) - -#---------------------------------- -setup = common_setup + """ -from pandas import concat, Timestamp - -def create_cols(name): - return [ "%s%03d" % (name,i) for i in range(5) ] -df_float = DataFrame(np.random.randn(5000, 5),dtype='float64',columns=create_cols('float')) -df_int = DataFrame(np.random.randn(5000, 5),dtype='int64',columns=create_cols('int')) -df_bool = DataFrame(True,index=df_float.index,columns=create_cols('bool')) -df_object = DataFrame('foo',index=df_float.index,columns=create_cols('object')) -df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=create_cols('date')) - -# add in some nans -df_float.ix[30:500,1:3] = np.nan - -df = concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1) - -""" -frame_to_csv_mixed = Benchmark("df.to_csv('__test__.csv')", setup, - start_date=datetime(2012, 6, 1)) - -#---------------------------------------------------------------------- -# parse dates, ISO8601 format - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))) -""" - -stmt = ("read_csv(StringIO(data), header=None, names=['foo'], " - " parse_dates=['foo'])") -read_parse_dates_iso8601 = Benchmark(stmt, setup, - start_date=datetime(2012, 3, 1)) - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = DataFrame(rng, index=rng) -""" - -stmt = ("data.to_csv('__test__.csv', date_format='%Y%m%d')") - -frame_to_csv_date_formatting = Benchmark(stmt, setup, - start_date=datetime(2013, 9, 1)) - -#---------------------------------------------------------------------- -# infer datetime format - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))) -""" - -stmt = ("read_csv(StringIO(data), header=None, names=['foo'], " - " parse_dates=['foo'], infer_datetime_format=True)") - -read_csv_infer_datetime_format_iso8601 = Benchmark(stmt, setup) - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = '\\n'.join(rng.map(lambda x: x.strftime("%Y%m%d"))) -""" - -stmt = ("read_csv(StringIO(data), header=None, names=['foo'], " - " parse_dates=['foo'], infer_datetime_format=True)") - -read_csv_infer_datetime_format_ymd = Benchmark(stmt, setup) - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=1000) -data = '\\n'.join(rng.map(lambda x: x.strftime("%m/%d/%Y %H:%M:%S.%f"))) -""" - -stmt = ("read_csv(StringIO(data), header=None, names=['foo'], " - " parse_dates=['foo'], infer_datetime_format=True)") - -read_csv_infer_datetime_format_custom = Benchmark(stmt, setup) diff --git a/vb_suite/io_sql.py b/vb_suite/io_sql.py deleted file mode 100644 index ba8367e7e356b..0000000000000 --- a/vb_suite/io_sql.py +++ /dev/null @@ -1,126 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -import sqlite3 -import sqlalchemy -from sqlalchemy import create_engine - -engine = create_engine('sqlite:///:memory:') -con = sqlite3.connect(':memory:') -""" - -sdate = datetime(2014, 6, 1) - - -#------------------------------------------------------------------------------- -# to_sql - -setup = common_setup + """ -index = tm.makeStringIndex(10000) -df = DataFrame({'float1' : randn(10000), - 'float2' : randn(10000), - 'string1' : ['foo'] * 10000, - 'bool1' : [True] * 10000, - 'int1' : np.random.randint(0, 100000, size=10000)}, - index=index) -""" - -sql_write_sqlalchemy = Benchmark("df.to_sql('test1', engine, if_exists='replace')", - setup, start_date=sdate) - -sql_write_fallback = Benchmark("df.to_sql('test1', con, if_exists='replace')", - setup, start_date=sdate) - - -#------------------------------------------------------------------------------- -# read_sql - -setup = common_setup + """ -index = tm.makeStringIndex(10000) -df = DataFrame({'float1' : randn(10000), - 'float2' : randn(10000), - 'string1' : ['foo'] * 10000, - 'bool1' : [True] * 10000, - 'int1' : np.random.randint(0, 100000, size=10000)}, - index=index) -df.to_sql('test2', engine, if_exists='replace') -df.to_sql('test2', con, if_exists='replace') -""" - -sql_read_query_sqlalchemy = Benchmark("read_sql_query('SELECT * FROM test2', engine)", - setup, start_date=sdate) - -sql_read_query_fallback = Benchmark("read_sql_query('SELECT * FROM test2', con)", - setup, start_date=sdate) - -sql_read_table_sqlalchemy = Benchmark("read_sql_table('test2', engine)", - setup, start_date=sdate) - - -#------------------------------------------------------------------------------- -# type specific write - -setup = common_setup + """ -df = DataFrame({'float' : randn(10000), - 'string' : ['foo'] * 10000, - 'bool' : [True] * 10000, - 'datetime' : date_range('2000-01-01', periods=10000, freq='s')}) -df.loc[1000:3000, 'float'] = np.nan -""" - -sql_float_write_sqlalchemy = \ - Benchmark("df[['float']].to_sql('test_float', engine, if_exists='replace')", - setup, start_date=sdate) - -sql_float_write_fallback = \ - Benchmark("df[['float']].to_sql('test_float', con, if_exists='replace')", - setup, start_date=sdate) - -sql_string_write_sqlalchemy = \ - Benchmark("df[['string']].to_sql('test_string', engine, if_exists='replace')", - setup, start_date=sdate) - -sql_string_write_fallback = \ - Benchmark("df[['string']].to_sql('test_string', con, if_exists='replace')", - setup, start_date=sdate) - -sql_datetime_write_sqlalchemy = \ - Benchmark("df[['datetime']].to_sql('test_datetime', engine, if_exists='replace')", - setup, start_date=sdate) - -#sql_datetime_write_fallback = \ -# Benchmark("df[['datetime']].to_sql('test_datetime', con, if_exists='replace')", -# setup3, start_date=sdate) - -#------------------------------------------------------------------------------- -# type specific read - -setup = common_setup + """ -df = DataFrame({'float' : randn(10000), - 'datetime' : date_range('2000-01-01', periods=10000, freq='s')}) -df['datetime_string'] = df['datetime'].map(str) - -df.to_sql('test_type', engine, if_exists='replace') -df[['float', 'datetime_string']].to_sql('test_type', con, if_exists='replace') -""" - -sql_float_read_query_sqlalchemy = \ - Benchmark("read_sql_query('SELECT float FROM test_type', engine)", - setup, start_date=sdate) - -sql_float_read_table_sqlalchemy = \ - Benchmark("read_sql_table('test_type', engine, columns=['float'])", - setup, start_date=sdate) - -sql_float_read_query_fallback = \ - Benchmark("read_sql_query('SELECT float FROM test_type', con)", - setup, start_date=sdate) - -sql_datetime_read_as_native_sqlalchemy = \ - Benchmark("read_sql_table('test_type', engine, columns=['datetime'])", - setup, start_date=sdate) - -sql_datetime_read_and_parse_sqlalchemy = \ - Benchmark("read_sql_table('test_type', engine, columns=['datetime_string'], parse_dates=['datetime_string'])", - setup, start_date=sdate) diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py deleted file mode 100644 index 238a129552e90..0000000000000 --- a/vb_suite/join_merge.py +++ /dev/null @@ -1,270 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -level1 = tm.makeStringIndex(10).values -level2 = tm.makeStringIndex(1000).values -label1 = np.arange(10).repeat(1000) -label2 = np.tile(np.arange(1000), 10) - -key1 = np.tile(level1.take(label1), 10) -key2 = np.tile(level2.take(label2), 10) - -shuf = np.arange(100000) -random.shuffle(shuf) -try: - index2 = MultiIndex(levels=[level1, level2], labels=[label1, label2]) - index3 = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], - labels=[np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)]) - df_multi = DataFrame(np.random.randn(len(index2), 4), index=index2, - columns=['A', 'B', 'C', 'D']) -except: # pre-MultiIndex - pass - -try: - DataFrame = DataMatrix -except: - pass - -df = pd.DataFrame({'data1' : np.random.randn(100000), - 'data2' : np.random.randn(100000), - 'key1' : key1, - 'key2' : key2}) - - -df_key1 = pd.DataFrame(np.random.randn(len(level1), 4), index=level1, - columns=['A', 'B', 'C', 'D']) -df_key2 = pd.DataFrame(np.random.randn(len(level2), 4), index=level2, - columns=['A', 'B', 'C', 'D']) - -df_shuf = df.reindex(df.index[shuf]) -""" - -#---------------------------------------------------------------------- -# DataFrame joins on key - -join_dataframe_index_single_key_small = \ - Benchmark("df.join(df_key1, on='key1')", setup, - name='join_dataframe_index_single_key_small') - -join_dataframe_index_single_key_bigger = \ - Benchmark("df.join(df_key2, on='key2')", setup, - name='join_dataframe_index_single_key_bigger') - -join_dataframe_index_single_key_bigger_sort = \ - Benchmark("df_shuf.join(df_key2, on='key2', sort=True)", setup, - name='join_dataframe_index_single_key_bigger_sort', - start_date=datetime(2012, 2, 5)) - -join_dataframe_index_multi = \ - Benchmark("df.join(df_multi, on=['key1', 'key2'])", setup, - name='join_dataframe_index_multi', - start_date=datetime(2011, 10, 20)) - -#---------------------------------------------------------------------- -# Joins on integer keys -setup = common_setup + """ -df = pd.DataFrame({'key1': np.tile(np.arange(500).repeat(10), 2), - 'key2': np.tile(np.arange(250).repeat(10), 4), - 'value': np.random.randn(10000)}) -df2 = pd.DataFrame({'key1': np.arange(500), 'value2': randn(500)}) -df3 = df[:5000] -""" - - -join_dataframe_integer_key = Benchmark("merge(df, df2, on='key1')", setup, - start_date=datetime(2011, 10, 20)) -join_dataframe_integer_2key = Benchmark("merge(df, df3)", setup, - start_date=datetime(2011, 10, 20)) - -#---------------------------------------------------------------------- -# DataFrame joins on index - - -#---------------------------------------------------------------------- -# Merges -setup = common_setup + """ -N = 10000 - -indices = tm.makeStringIndex(N).values -indices2 = tm.makeStringIndex(N).values -key = np.tile(indices[:8000], 10) -key2 = np.tile(indices2[:8000], 10) - -left = pd.DataFrame({'key' : key, 'key2':key2, - 'value' : np.random.randn(80000)}) -right = pd.DataFrame({'key': indices[2000:], 'key2':indices2[2000:], - 'value2' : np.random.randn(8000)}) -""" - -merge_2intkey_nosort = Benchmark('merge(left, right, sort=False)', setup, - start_date=datetime(2011, 10, 20)) - -merge_2intkey_sort = Benchmark('merge(left, right, sort=True)', setup, - start_date=datetime(2011, 10, 20)) - -#---------------------------------------------------------------------- -# Appending DataFrames - -setup = common_setup + """ -df1 = pd.DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D']) -df2 = df1.copy() -df2.index = np.arange(10000, 20000) -mdf1 = df1.copy() -mdf1['obj1'] = 'bar' -mdf1['obj2'] = 'bar' -mdf1['int1'] = 5 -try: - mdf1.consolidate(inplace=True) -except: - pass -mdf2 = mdf1.copy() -mdf2.index = df2.index -""" - -stmt = "df1.append(df2)" -append_frame_single_homogenous = \ - Benchmark(stmt, setup, name='append_frame_single_homogenous', - ncalls=500, repeat=1) - -stmt = "mdf1.append(mdf2)" -append_frame_single_mixed = Benchmark(stmt, setup, - name='append_frame_single_mixed', - ncalls=500, repeat=1) - -#---------------------------------------------------------------------- -# data alignment - -setup = common_setup + """n = 1000000 -# indices = tm.makeStringIndex(n) -def sample(values, k): - sampler = np.random.permutation(len(values)) - return values.take(sampler[:k]) -sz = 500000 -rng = np.arange(0, 10000000000000, 10000000) -stamps = np.datetime64(datetime.now()).view('i8') + rng -idx1 = np.sort(sample(stamps, sz)) -idx2 = np.sort(sample(stamps, sz)) -ts1 = Series(np.random.randn(sz), idx1) -ts2 = Series(np.random.randn(sz), idx2) -""" -stmt = "ts1 + ts2" -series_align_int64_index = \ - Benchmark(stmt, setup, - name="series_align_int64_index", - start_date=datetime(2010, 6, 1), logy=True) - -stmt = "ts1.align(ts2, join='left')" -series_align_left_monotonic = \ - Benchmark(stmt, setup, - name="series_align_left_monotonic", - start_date=datetime(2011, 12, 1), logy=True) - -#---------------------------------------------------------------------- -# Concat Series axis=1 - -setup = common_setup + """ -n = 1000 -indices = tm.makeStringIndex(1000) -s = Series(n, index=indices) -pieces = [s[i:-i] for i in range(1, 10)] -pieces = pieces * 50 -""" - -concat_series_axis1 = Benchmark('concat(pieces, axis=1)', setup, - start_date=datetime(2012, 2, 27)) - -setup = common_setup + """ -df = pd.DataFrame(randn(5, 4)) -""" - -concat_small_frames = Benchmark('concat([df] * 1000)', setup, - start_date=datetime(2012, 1, 1)) - - -#---------------------------------------------------------------------- -# Concat empty - -setup = common_setup + """ -df = pd.DataFrame(dict(A = range(10000)),index=date_range('20130101',periods=10000,freq='s')) -empty = pd.DataFrame() -""" - -concat_empty_frames1 = Benchmark('concat([df,empty])', setup, - start_date=datetime(2012, 1, 1)) -concat_empty_frames2 = Benchmark('concat([empty,df])', setup, - start_date=datetime(2012, 1, 1)) - - -#---------------------------------------------------------------------- -# Ordered merge - -setup = common_setup + """ -groups = tm.makeStringIndex(10).values - -left = pd.DataFrame({'group': groups.repeat(5000), - 'key' : np.tile(np.arange(0, 10000, 2), 10), - 'lvalue': np.random.randn(50000)}) - -right = pd.DataFrame({'key' : np.arange(10000), - 'rvalue' : np.random.randn(10000)}) - -""" - -stmt = "ordered_merge(left, right, on='key', left_by='group')" - -#---------------------------------------------------------------------- -# outer join of non-unique -# GH 6329 - -setup = common_setup + """ -date_index = date_range('01-Jan-2013', '23-Jan-2013', freq='T') -daily_dates = date_index.to_period('D').to_timestamp('S','S') -fracofday = date_index.view(np.ndarray) - daily_dates.view(np.ndarray) -fracofday = fracofday.astype('timedelta64[ns]').astype(np.float64)/864e11 -fracofday = TimeSeries(fracofday, daily_dates) -index = date_range(date_index.min().to_period('A').to_timestamp('D','S'), - date_index.max().to_period('A').to_timestamp('D','E'), - freq='D') -temp = TimeSeries(1.0, index) -""" - -join_non_unique_equal = Benchmark('fracofday * temp[fracofday.index]', setup, - start_date=datetime(2013, 1, 1)) - - -setup = common_setup + ''' -np.random.seed(2718281) -n = 50000 - -left = pd.DataFrame(np.random.randint(1, n/500, (n, 2)), - columns=['jim', 'joe']) - -right = pd.DataFrame(np.random.randint(1, n/500, (n, 2)), - columns=['jolie', 'jolia']).set_index('jolie') -''' - -left_outer_join_index = Benchmark("left.join(right, on='jim')", setup, - name='left_outer_join_index') - - -setup = common_setup + """ -low, high, n = -1 << 10, 1 << 10, 1 << 20 -left = pd.DataFrame(np.random.randint(low, high, (n, 7)), - columns=list('ABCDEFG')) -left['left'] = left.sum(axis=1) - -i = np.random.permutation(len(left)) -right = left.iloc[i].copy() -right.columns = right.columns[:-1].tolist() + ['right'] -right.index = np.arange(len(right)) -right['right'] *= -1 -""" - -i8merge = Benchmark("merge(left, right, how='outer')", setup, - name='i8merge') diff --git a/vb_suite/make.py b/vb_suite/make.py deleted file mode 100755 index 5a8a8215db9a4..0000000000000 --- a/vb_suite/make.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python - -""" -Python script for building documentation. - -To build the docs you must have all optional dependencies for statsmodels -installed. See the installation instructions for a list of these. - -Note: currently latex builds do not work because of table formats that are not -supported in the latex generation. - -Usage ------ -python make.py clean -python make.py html -""" - -import glob -import os -import shutil -import sys -import sphinx - -os.environ['PYTHONPATH'] = '..' - -SPHINX_BUILD = 'sphinxbuild' - - -def upload(): - 'push a copy to the site' - os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org' - ':/usr/share/nginx/pandas/pandas-docs/vbench/ -essh') - - -def clean(): - if os.path.exists('build'): - shutil.rmtree('build') - - if os.path.exists('source/generated'): - shutil.rmtree('source/generated') - - -def html(): - check_build() - if os.system('sphinx-build -P -b html -d build/doctrees ' - 'source build/html'): - raise SystemExit("Building HTML failed.") - - -def check_build(): - build_dirs = [ - 'build', 'build/doctrees', 'build/html', - 'build/plots', 'build/_static', - 'build/_templates'] - for d in build_dirs: - try: - os.mkdir(d) - except OSError: - pass - - -def all(): - clean() - html() - - -def auto_update(): - msg = '' - try: - clean() - html() - upload() - sendmail() - except (Exception, SystemExit), inst: - msg += str(inst) + '\n' - sendmail(msg) - - -def sendmail(err_msg=None): - from_name, to_name = _get_config() - - if err_msg is None: - msgstr = 'Daily vbench uploaded successfully' - subject = "VB: daily update successful" - else: - msgstr = err_msg - subject = "VB: daily update failed" - - import smtplib - from email.MIMEText import MIMEText - msg = MIMEText(msgstr) - msg['Subject'] = subject - msg['From'] = from_name - msg['To'] = to_name - - server_str, port, login, pwd = _get_credentials() - server = smtplib.SMTP(server_str, port) - server.ehlo() - server.starttls() - server.ehlo() - - server.login(login, pwd) - try: - server.sendmail(from_name, to_name, msg.as_string()) - finally: - server.close() - - -def _get_dir(subdir=None): - import getpass - USERNAME = getpass.getuser() - if sys.platform == 'darwin': - HOME = '/Users/%s' % USERNAME - else: - HOME = '/home/%s' % USERNAME - - if subdir is None: - subdir = '/code/scripts' - conf_dir = '%s%s' % (HOME, subdir) - return conf_dir - - -def _get_credentials(): - tmp_dir = _get_dir() - cred = '%s/credentials' % tmp_dir - with open(cred, 'r') as fh: - server, port, un, domain = fh.read().split(',') - port = int(port) - login = un + '@' + domain + '.com' - - import base64 - with open('%s/cron_email_pwd' % tmp_dir, 'r') as fh: - pwd = base64.b64decode(fh.read()) - - return server, port, login, pwd - - -def _get_config(): - tmp_dir = _get_dir() - with open('%s/addresses' % tmp_dir, 'r') as fh: - from_name, to_name = fh.read().split(',') - return from_name, to_name - -funcd = { - 'html': html, - 'clean': clean, - 'upload': upload, - 'auto_update': auto_update, - 'all': all, -} - -small_docs = False - -# current_dir = os.getcwd() -# os.chdir(os.path.dirname(os.path.join(current_dir, __file__))) - -if len(sys.argv) > 1: - for arg in sys.argv[1:]: - func = funcd.get(arg) - if func is None: - raise SystemExit('Do not know how to handle %s; valid args are %s' % ( - arg, funcd.keys())) - func() -else: - small_docs = False - all() -# os.chdir(current_dir) diff --git a/vb_suite/measure_memory_consumption.py b/vb_suite/measure_memory_consumption.py deleted file mode 100755 index bb73cf5da4302..0000000000000 --- a/vb_suite/measure_memory_consumption.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function - -"""Short one-line summary - -long summary -""" - - -def main(): - import shutil - import tempfile - import warnings - - from pandas import Series - - from vbench.api import BenchmarkRunner - from suite import (REPO_PATH, BUILD, DB_PATH, PREPARE, - dependencies, benchmarks) - - from memory_profiler import memory_usage - - warnings.filterwarnings('ignore', category=FutureWarning) - - try: - TMP_DIR = tempfile.mkdtemp() - runner = BenchmarkRunner( - benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, - TMP_DIR, PREPARE, always_clean=True, - # run_option='eod', start_date=START_DATE, - module_dependencies=dependencies) - results = {} - for b in runner.benchmarks: - k = b.name - try: - vs = memory_usage((b.run,)) - v = max(vs) - # print(k, v) - results[k] = v - except Exception as e: - print("Exception caught in %s\n" % k) - print(str(e)) - - s = Series(results) - s.sort() - print((s)) - - finally: - shutil.rmtree(TMP_DIR) - - -if __name__ == "__main__": - main() diff --git a/vb_suite/miscellaneous.py b/vb_suite/miscellaneous.py deleted file mode 100644 index da2c736e79ea7..0000000000000 --- a/vb_suite/miscellaneous.py +++ /dev/null @@ -1,32 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# cache_readonly - -setup = common_setup + """ -from pandas.util.decorators import cache_readonly - -class Foo: - - @cache_readonly - def prop(self): - return 5 -obj = Foo() -""" -misc_cache_readonly = Benchmark("obj.prop", setup, name="misc_cache_readonly", - ncalls=2000000) - -#---------------------------------------------------------------------- -# match - -setup = common_setup + """ -uniques = tm.makeStringIndex(1000).values -all = uniques.repeat(10) -""" - -match_strings = Benchmark("match(all, uniques)", setup, - start_date=datetime(2012, 5, 12)) diff --git a/vb_suite/packers.py b/vb_suite/packers.py deleted file mode 100644 index 69ec10822b392..0000000000000 --- a/vb_suite/packers.py +++ /dev/null @@ -1,252 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -start_date = datetime(2013, 5, 1) - -common_setup = """from .pandas_vb_common import * -import os -import pandas as pd -from pandas.core import common as com -from pandas.compat import BytesIO -from random import randrange - -f = '__test__.msg' -def remove(f): - try: - os.remove(f) - except: - pass - -N=100000 -C=5 -index = date_range('20000101',periods=N,freq='H') -df = DataFrame(dict([ ("float{0}".format(i),randn(N)) for i in range(C) ]), - index=index) - -N=100000 -C=5 -index = date_range('20000101',periods=N,freq='H') -df2 = DataFrame(dict([ ("float{0}".format(i),randn(N)) for i in range(C) ]), - index=index) -df2['object'] = ['%08x'%randrange(16**8) for _ in range(N)] -remove(f) -""" - -#---------------------------------------------------------------------- -# msgpack - -setup = common_setup + """ -df2.to_msgpack(f) -""" - -packers_read_pack = Benchmark("pd.read_msgpack(f)", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_pack = Benchmark("df2.to_msgpack(f)", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# pickle - -setup = common_setup + """ -df2.to_pickle(f) -""" - -packers_read_pickle = Benchmark("pd.read_pickle(f)", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_pickle = Benchmark("df2.to_pickle(f)", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# csv - -setup = common_setup + """ -df.to_csv(f) -""" - -packers_read_csv = Benchmark("pd.read_csv(f)", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_csv = Benchmark("df.to_csv(f)", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# hdf store - -setup = common_setup + """ -df2.to_hdf(f,'df') -""" - -packers_read_hdf_store = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_hdf_store = Benchmark("df2.to_hdf(f,'df')", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# hdf table - -setup = common_setup + """ -df2.to_hdf(f,'df',format='table') -""" - -packers_read_hdf_table = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date) - -setup = common_setup + """ -""" - -packers_write_hdf_table = Benchmark("df2.to_hdf(f,'df',table=True)", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# sql - -setup = common_setup + """ -import sqlite3 -from sqlalchemy import create_engine -engine = create_engine('sqlite:///:memory:') - -df2.to_sql('table', engine, if_exists='replace') -""" - -packers_read_sql= Benchmark("pd.read_sql_table('table', engine)", setup, start_date=start_date) - -setup = common_setup + """ -import sqlite3 -from sqlalchemy import create_engine -engine = create_engine('sqlite:///:memory:') -""" - -packers_write_sql = Benchmark("df2.to_sql('table', engine, if_exists='replace')", setup, start_date=start_date) - -#---------------------------------------------------------------------- -# json - -setup_int_index = """ -import numpy as np -df.index = np.arange(N) -""" - -setup = common_setup + """ -df.to_json(f,orient='split') -""" -packers_read_json_date_index = Benchmark("pd.read_json(f, orient='split')", setup, start_date=start_date) -setup = setup + setup_int_index -packers_read_json = Benchmark("pd.read_json(f, orient='split')", setup, start_date=start_date) - -setup = common_setup + """ -""" -packers_write_json_date_index = Benchmark("df.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date) - -setup = setup + setup_int_index -packers_write_json = Benchmark("df.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date) -packers_write_json_T = Benchmark("df.to_json(f,orient='columns')", setup, cleanup="remove(f)", start_date=start_date) - -setup = common_setup + """ -from numpy.random import randint -from collections import OrderedDict - -cols = [ - lambda i: ("{0}_timedelta".format(i), [pd.Timedelta('%d seconds' % randrange(1e6)) for _ in range(N)]), - lambda i: ("{0}_int".format(i), randint(1e8, size=N)), - lambda i: ("{0}_timestamp".format(i), [pd.Timestamp( 1418842918083256000 + randrange(1e9, 1e18, 200)) for _ in range(N)]) - ] -df_mixed = DataFrame(OrderedDict([cols[i % len(cols)](i) for i in range(C)]), - index=index) -""" -packers_write_json_mixed_delta_int_tstamp = Benchmark("df_mixed.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date) - -setup = common_setup + """ -from numpy.random import randint -from collections import OrderedDict -cols = [ - lambda i: ("{0}_float".format(i), randn(N)), - lambda i: ("{0}_int".format(i), randint(1e8, size=N)) - ] -df_mixed = DataFrame(OrderedDict([cols[i % len(cols)](i) for i in range(C)]), - index=index) -""" -packers_write_json_mixed_float_int = Benchmark("df_mixed.to_json(f,orient='index')", setup, cleanup="remove(f)", start_date=start_date) -packers_write_json_mixed_float_int_T = Benchmark("df_mixed.to_json(f,orient='columns')", setup, cleanup="remove(f)", start_date=start_date) - -setup = common_setup + """ -from numpy.random import randint -from collections import OrderedDict -cols = [ - lambda i: ("{0}_float".format(i), randn(N)), - lambda i: ("{0}_int".format(i), randint(1e8, size=N)), - lambda i: ("{0}_str".format(i), ['%08x'%randrange(16**8) for _ in range(N)]) - ] -df_mixed = DataFrame(OrderedDict([cols[i % len(cols)](i) for i in range(C)]), - index=index) -""" -packers_write_json_mixed_float_int_str = Benchmark("df_mixed.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# stata - -setup = common_setup + """ -df.to_stata(f, {'index': 'tc'}) -""" -packers_read_stata = Benchmark("pd.read_stata(f)", setup, start_date=start_date) - -packers_write_stata = Benchmark("df.to_stata(f, {'index': 'tc'})", setup, cleanup="remove(f)", start_date=start_date) - -setup = common_setup + """ -df['int8_'] = [randint(np.iinfo(np.int8).min, np.iinfo(np.int8).max - 27) for _ in range(N)] -df['int16_'] = [randint(np.iinfo(np.int16).min, np.iinfo(np.int16).max - 27) for _ in range(N)] -df['int32_'] = [randint(np.iinfo(np.int32).min, np.iinfo(np.int32).max - 27) for _ in range(N)] -df['float32_'] = np.array(randn(N), dtype=np.float32) -df.to_stata(f, {'index': 'tc'}) -""" - -packers_read_stata_with_validation = Benchmark("pd.read_stata(f)", setup, start_date=start_date) - -packers_write_stata_with_validation = Benchmark("df.to_stata(f, {'index': 'tc'})", setup, cleanup="remove(f)", start_date=start_date) - -#---------------------------------------------------------------------- -# Excel - alternative writers -setup = common_setup + """ -bio = BytesIO() -""" - -excel_writer_bench = """ -bio.seek(0) -writer = pd.io.excel.ExcelWriter(bio, engine='{engine}') -df[:2000].to_excel(writer) -writer.save() -""" - -benchmark_xlsxwriter = excel_writer_bench.format(engine='xlsxwriter') - -packers_write_excel_xlsxwriter = Benchmark(benchmark_xlsxwriter, setup) - -benchmark_openpyxl = excel_writer_bench.format(engine='openpyxl') - -packers_write_excel_openpyxl = Benchmark(benchmark_openpyxl, setup) - -benchmark_xlwt = excel_writer_bench.format(engine='xlwt') - -packers_write_excel_xlwt = Benchmark(benchmark_xlwt, setup) - - -#---------------------------------------------------------------------- -# Excel - reader - -setup = common_setup + """ -bio = BytesIO() -writer = pd.io.excel.ExcelWriter(bio, engine='xlsxwriter') -df[:2000].to_excel(writer) -writer.save() -""" - -benchmark_read_excel=""" -bio.seek(0) -pd.read_excel(bio) -""" - -packers_read_excel = Benchmark(benchmark_read_excel, setup) diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py deleted file mode 100644 index a1326d63a112a..0000000000000 --- a/vb_suite/pandas_vb_common.py +++ /dev/null @@ -1,30 +0,0 @@ -from pandas import * -import pandas as pd -from datetime import timedelta -from numpy.random import randn -from numpy.random import randint -from numpy.random import permutation -import pandas.util.testing as tm -import random -import numpy as np -try: - from pandas.compat import range -except ImportError: - pass - -np.random.seed(1234) -try: - import pandas._tseries as lib -except: - import pandas.lib as lib - -try: - Panel = WidePanel -except Exception: - pass - -# didn't add to namespace until later -try: - from pandas.core.index import MultiIndex -except ImportError: - pass diff --git a/vb_suite/panel_ctor.py b/vb_suite/panel_ctor.py deleted file mode 100644 index 9f497e7357a61..0000000000000 --- a/vb_suite/panel_ctor.py +++ /dev/null @@ -1,76 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# Panel.from_dict homogenization time - -START_DATE = datetime(2011, 6, 1) - -setup_same_index = common_setup + """ -# create 100 dataframes with the same index -dr = np.asarray(DatetimeIndex(start=datetime(1990,1,1), end=datetime(2012,1,1), - freq=datetools.Day(1))) -data_frames = {} -for x in range(100): - df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), - "c": [2]*len(dr)}, index=dr) - data_frames[x] = df -""" - -panel_from_dict_same_index = \ - Benchmark("Panel.from_dict(data_frames)", - setup_same_index, name='panel_from_dict_same_index', - start_date=START_DATE, repeat=1, logy=True) - -setup_equiv_indexes = common_setup + """ -data_frames = {} -for x in range(100): - dr = np.asarray(DatetimeIndex(start=datetime(1990,1,1), end=datetime(2012,1,1), - freq=datetools.Day(1))) - df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), - "c": [2]*len(dr)}, index=dr) - data_frames[x] = df -""" - -panel_from_dict_equiv_indexes = \ - Benchmark("Panel.from_dict(data_frames)", - setup_equiv_indexes, name='panel_from_dict_equiv_indexes', - start_date=START_DATE, repeat=1, logy=True) - -setup_all_different_indexes = common_setup + """ -data_frames = {} -start = datetime(1990,1,1) -end = datetime(2012,1,1) -for x in range(100): - end += timedelta(days=1) - dr = np.asarray(date_range(start, end)) - df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), - "c": [2]*len(dr)}, index=dr) - data_frames[x] = df -""" -panel_from_dict_all_different_indexes = \ - Benchmark("Panel.from_dict(data_frames)", - setup_all_different_indexes, - name='panel_from_dict_all_different_indexes', - start_date=START_DATE, repeat=1, logy=True) - -setup_two_different_indexes = common_setup + """ -data_frames = {} -start = datetime(1990,1,1) -end = datetime(2012,1,1) -for x in range(100): - if x == 50: - end += timedelta(days=1) - dr = np.asarray(date_range(start, end)) - df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), - "c": [2]*len(dr)}, index=dr) - data_frames[x] = df -""" -panel_from_dict_two_different_indexes = \ - Benchmark("Panel.from_dict(data_frames)", - setup_two_different_indexes, - name='panel_from_dict_two_different_indexes', - start_date=START_DATE, repeat=1, logy=True) diff --git a/vb_suite/panel_methods.py b/vb_suite/panel_methods.py deleted file mode 100644 index 28586422a66e3..0000000000000 --- a/vb_suite/panel_methods.py +++ /dev/null @@ -1,28 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# shift - -setup = common_setup + """ -index = date_range(start="2000", freq="D", periods=1000) -panel = Panel(np.random.randn(100, len(index), 1000)) -""" - -panel_shift = Benchmark('panel.shift(1)', setup, - start_date=datetime(2012, 1, 12)) - -panel_shift_minor = Benchmark('panel.shift(1, axis="minor")', setup, - start_date=datetime(2012, 1, 12)) - -panel_pct_change_major = Benchmark('panel.pct_change(1, axis="major")', setup, - start_date=datetime(2014, 4, 19)) - -panel_pct_change_minor = Benchmark('panel.pct_change(1, axis="minor")', setup, - start_date=datetime(2014, 4, 19)) - -panel_pct_change_items = Benchmark('panel.pct_change(1, axis="items")', setup, - start_date=datetime(2014, 4, 19)) diff --git a/vb_suite/parser_vb.py b/vb_suite/parser_vb.py deleted file mode 100644 index bb9ccbdb5e854..0000000000000 --- a/vb_suite/parser_vb.py +++ /dev/null @@ -1,112 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -from pandas import read_csv, read_table -""" - -setup = common_setup + """ -import os -N = 10000 -K = 8 -df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K))) -df.to_csv('test.csv', sep='|') -""" - -read_csv_vb = Benchmark("read_csv('test.csv', sep='|')", setup, - cleanup="os.remove('test.csv')", - start_date=datetime(2012, 5, 7)) - - -setup = common_setup + """ -import os -N = 10000 -K = 8 -format = lambda x: '{:,}'.format(x) -df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K))) -df = df.applymap(format) -df.to_csv('test.csv', sep='|') -""" - -read_csv_thou_vb = Benchmark("read_csv('test.csv', sep='|', thousands=',')", - setup, - cleanup="os.remove('test.csv')", - start_date=datetime(2012, 5, 7)) - -setup = common_setup + """ -data = ['A,B,C'] -data = data + ['1,2,3 # comment'] * 100000 -data = '\\n'.join(data) -""" - -stmt = "read_csv(StringIO(data), comment='#')" -read_csv_comment2 = Benchmark(stmt, setup, - start_date=datetime(2011, 11, 1)) - -setup = common_setup + """ -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - -import os -N = 10000 -K = 8 -data = '''\ -KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 -KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 -KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 -KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 -KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 -''' -data = data * 200 -""" -cmd = ("read_table(StringIO(data), sep=',', header=None, " - "parse_dates=[[1,2], [1,3]])") -sdate = datetime(2012, 5, 7) -read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) - -setup = common_setup + """ -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - -import os -N = 10000 -K = 8 -data = '''\ -KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 -KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 -KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 -KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 -KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 -''' -data = data * 200 -""" -cmd = "read_table(StringIO(data), sep=',', header=None, parse_dates=[1])" -sdate = datetime(2012, 5, 7) -read_table_multiple_date_baseline = Benchmark(cmd, setup, start_date=sdate) - -setup = common_setup + """ -try: - from cStringIO import StringIO -except ImportError: - from io import StringIO - -data = '''\ -0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336 -0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285 -0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126 -0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394 -0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020 -''' -data = data * 200 -""" -cmd = "read_csv(StringIO(data), sep=',', header=None, float_precision=None)" -sdate = datetime(2014, 8, 20) -read_csv_default_converter = Benchmark(cmd, setup, start_date=sdate) -cmd = "read_csv(StringIO(data), sep=',', header=None, float_precision='high')" -read_csv_precise_converter = Benchmark(cmd, setup, start_date=sdate) -cmd = "read_csv(StringIO(data), sep=',', header=None, float_precision='round_trip')" -read_csv_roundtrip_converter = Benchmark(cmd, setup, start_date=sdate) diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py deleted file mode 100755 index 143d943b9eadf..0000000000000 --- a/vb_suite/perf_HEAD.py +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function - -"""Run all the vbenches in `suite`, and post the results as a json blob to gist - -""" - -import urllib2 -from contextlib import closing -from urllib2 import urlopen -import json - -import pandas as pd - -WEB_TIMEOUT = 10 - - -def get_travis_data(): - """figure out what worker we're running on, and the number of jobs it's running - """ - import os - jobid = os.environ.get("TRAVIS_JOB_ID") - if not jobid: - return None, None - - with closing(urlopen("https://api.travis-ci.org/workers/")) as resp: - workers = json.loads(resp.read()) - - host = njobs = None - for item in workers: - host = item.get("host") - id = ((item.get("payload") or {}).get("job") or {}).get("id") - if id and str(id) == str(jobid): - break - if host: - njobs = len( - [x for x in workers if host in x['host'] and x['payload']]) - - return host, njobs - - -def get_utcdatetime(): - try: - from datetime import datetime - return datetime.utcnow().isoformat(" ") - except: - pass - - -def dump_as_gist(data, desc="The Commit", njobs=None): - host, njobs2 = get_travis_data()[:2] - - if njobs: # be slightly more reliable - njobs = max(njobs, njobs2) - - content = dict(version="0.1.1", - timings=data, - datetime=get_utcdatetime(), # added in 0.1.1 - hostname=host, # added in 0.1.1 - njobs=njobs # added in 0.1.1, a measure of load on the travis box - ) - - payload = dict(description=desc, - public=True, - files={'results.json': dict(content=json.dumps(content))}) - try: - with closing(urlopen("https://api.github.com/gists", - json.dumps(payload), timeout=WEB_TIMEOUT)) as r: - if 200 <= r.getcode() < 300: - print("\n\n" + "-" * 80) - - gist = json.loads(r.read()) - file_raw_url = gist['files'].items()[0][1]['raw_url'] - print("[vbench-gist-raw_url] %s" % file_raw_url) - print("[vbench-html-url] %s" % gist['html_url']) - print("[vbench-api-url] %s" % gist['url']) - - print("-" * 80 + "\n\n") - else: - print("api.github.com returned status %d" % r.getcode()) - except: - print("Error occured while dumping to gist") - - -def main(): - import warnings - from suite import benchmarks - - exit_code = 0 - warnings.filterwarnings('ignore', category=FutureWarning) - - host, njobs = get_travis_data()[:2] - results = [] - for b in benchmarks: - try: - d = b.run() - d.update(dict(name=b.name)) - results.append(d) - msg = "{name:<40}: {timing:> 10.4f} [ms]" - print(msg.format(name=results[-1]['name'], - timing=results[-1]['timing'])) - - except Exception as e: - exit_code = 1 - if (type(e) == KeyboardInterrupt or - 'KeyboardInterrupt' in str(d)): - raise KeyboardInterrupt() - - msg = "{name:<40}: ERROR:\n<-------" - print(msg.format(name=b.name)) - if isinstance(d, dict): - if d['succeeded']: - print("\nException:\n%s\n" % str(e)) - else: - for k, v in sorted(d.iteritems()): - print("{k}: {v}".format(k=k, v=v)) - - print("------->\n") - - dump_as_gist(results, "testing", njobs=njobs) - - return exit_code - - -if __name__ == "__main__": - import sys - sys.exit(main()) - -##################################################### -# functions for retrieving and processing the results - - -def get_vbench_log(build_url): - with closing(urllib2.urlopen(build_url)) as r: - if not (200 <= r.getcode() < 300): - return - - s = json.loads(r.read()) - s = [x for x in s['matrix'] if "VBENCH" in ((x.get('config', {}) - or {}).get('env', {}) or {})] - # s=[x for x in s['matrix']] - if not s: - return - id = s[0]['id'] # should be just one for now - with closing(urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id)) as r2: - if not 200 <= r.getcode() < 300: - return - s2 = json.loads(r2.read()) - return s2.get('log') - - -def get_results_raw_url(build): - "Taks a Travis a build number, retrieves the build log and extracts the gist url" - import re - log = get_vbench_log("https://api.travis-ci.org/builds/%s" % build) - if not log: - return - l = [x.strip( - ) for x in log.split("\n") if re.match(".vbench-gist-raw_url", x)] - if l: - s = l[0] - m = re.search("(https://[^\s]+)", s) - if m: - return m.group(0) - - -def convert_json_to_df(results_url): - """retrieve json results file from url and return df - - df contains timings for all successful vbenchmarks - """ - - with closing(urlopen(results_url)) as resp: - res = json.loads(resp.read()) - timings = res.get("timings") - if not timings: - return - res = [x for x in timings if x.get('succeeded')] - df = pd.DataFrame(res) - df = df.set_index("name") - return df - - -def get_build_results(build): - "Returns a df with the results of the VBENCH job associated with the travis build" - r_url = get_results_raw_url(build) - if not r_url: - return - - return convert_json_to_df(r_url) - - -def get_all_results(repo_id=53976): # travis pandas-dev/pandas id - """Fetches the VBENCH results for all travis builds, and returns a list of result df - - unsuccesful individual vbenches are dropped. - """ - from collections import OrderedDict - - def get_results_from_builds(builds): - dfs = OrderedDict() - for build in builds: - build_id = build['id'] - build_number = build['number'] - print(build_number) - res = get_build_results(build_id) - if res is not None: - dfs[build_number] = res - return dfs - - base_url = 'https://api.travis-ci.org/builds?url=%2Fbuilds&repository_id={repo_id}' - url = base_url.format(repo_id=repo_id) - url_after = url + '&after_number={after}' - dfs = OrderedDict() - - while True: - with closing(urlopen(url)) as r: - if not (200 <= r.getcode() < 300): - break - builds = json.loads(r.read()) - res = get_results_from_builds(builds) - if not res: - break - last_build_number = min(res.keys()) - dfs.update(res) - url = url_after.format(after=last_build_number) - - return dfs - - -def get_all_results_joined(repo_id=53976): - def mk_unique(df): - for dupe in df.index.get_duplicates(): - df = df.ix[df.index != dupe] - return df - dfs = get_all_results(repo_id) - for k in dfs: - dfs[k] = mk_unique(dfs[k]) - ss = [pd.Series(v.timing, name=k) for k, v in dfs.iteritems()] - results = pd.concat(reversed(ss), 1) - return results diff --git a/vb_suite/plotting.py b/vb_suite/plotting.py deleted file mode 100644 index 79e81e9eea8f4..0000000000000 --- a/vb_suite/plotting.py +++ /dev/null @@ -1,25 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * - -try: - from pandas import date_range -except ImportError: - def date_range(start=None, end=None, periods=None, freq=None): - return DatetimeIndex(start, end, periods=periods, offset=freq) - -""" - -#----------------------------------------------------------------------------- -# Timeseries plotting - -setup = common_setup + """ -N = 2000 -M = 5 -df = DataFrame(np.random.randn(N,M), index=date_range('1/1/1975', periods=N)) -""" - -plot_timeseries_period = Benchmark("df.plot()", setup=setup, - name='plot_timeseries_period') - diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py deleted file mode 100644 index 443eb43835745..0000000000000 --- a/vb_suite/reindex.py +++ /dev/null @@ -1,225 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# DataFrame reindex columns - -setup = common_setup + """ -df = DataFrame(index=range(10000), data=np.random.rand(10000,30), - columns=range(30)) -""" -statement = "df.reindex(columns=df.columns[1:5])" - -frame_reindex_columns = Benchmark(statement, setup) - -#---------------------------------------------------------------------- - -setup = common_setup + """ -rng = DatetimeIndex(start='1/1/1970', periods=10000, freq=datetools.Minute()) -df = DataFrame(np.random.rand(10000, 10), index=rng, - columns=range(10)) -df['foo'] = 'bar' -rng2 = Index(rng[::2]) -""" -statement = "df.reindex(rng2)" -dataframe_reindex = Benchmark(statement, setup) - -#---------------------------------------------------------------------- -# multiindex reindexing - -setup = common_setup + """ -N = 1000 -K = 20 - -level1 = tm.makeStringIndex(N).values.repeat(K) -level2 = np.tile(tm.makeStringIndex(K).values, N) -index = MultiIndex.from_arrays([level1, level2]) - -s1 = Series(np.random.randn(N * K), index=index) -s2 = s1[::2] -""" -statement = "s1.reindex(s2.index)" -reindex_multi = Benchmark(statement, setup, - name='reindex_multiindex', - start_date=datetime(2011, 9, 1)) - -#---------------------------------------------------------------------- -# Pad / backfill - -def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') - -def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') - -setup = common_setup + """ -rng = date_range('1/1/2000', periods=100000, freq=datetools.Minute()) - -ts = Series(np.random.randn(len(rng)), index=rng) -ts2 = ts[::2] -ts3 = ts2.reindex(ts.index) -ts4 = ts3.astype('float32') - -def pad(source_series, target_index): - try: - source_series.reindex(target_index, method='pad') - except: - source_series.reindex(target_index, fillMethod='pad') -def backfill(source_series, target_index): - try: - source_series.reindex(target_index, method='backfill') - except: - source_series.reindex(target_index, fillMethod='backfill') -""" - -statement = "pad(ts2, ts.index)" -reindex_daterange_pad = Benchmark(statement, setup, - name="reindex_daterange_pad") - -statement = "backfill(ts2, ts.index)" -reindex_daterange_backfill = Benchmark(statement, setup, - name="reindex_daterange_backfill") - -reindex_fillna_pad = Benchmark("ts3.fillna(method='pad')", setup, - name="reindex_fillna_pad", - start_date=datetime(2011, 3, 1)) - -reindex_fillna_pad_float32 = Benchmark("ts4.fillna(method='pad')", setup, - name="reindex_fillna_pad_float32", - start_date=datetime(2013, 1, 1)) - -reindex_fillna_backfill = Benchmark("ts3.fillna(method='backfill')", setup, - name="reindex_fillna_backfill", - start_date=datetime(2011, 3, 1)) -reindex_fillna_backfill_float32 = Benchmark("ts4.fillna(method='backfill')", setup, - name="reindex_fillna_backfill_float32", - start_date=datetime(2013, 1, 1)) - -#---------------------------------------------------------------------- -# align on level - -setup = common_setup + """ -index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], - labels=[np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)]) -random.shuffle(index.values) -df = DataFrame(np.random.randn(len(index), 4), index=index) -df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1]) -""" - -reindex_frame_level_align = \ - Benchmark("df.align(df_level, level=1, copy=False)", setup, - name='reindex_frame_level_align', - start_date=datetime(2011, 12, 27)) - -reindex_frame_level_reindex = \ - Benchmark("df_level.reindex(df.index, level=1)", setup, - name='reindex_frame_level_reindex', - start_date=datetime(2011, 12, 27)) - - -#---------------------------------------------------------------------- -# sort_index, drop_duplicates - -# pathological, but realistic -setup = common_setup + """ -N = 10000 -K = 10 - -key1 = tm.makeStringIndex(N).values.repeat(K) -key2 = tm.makeStringIndex(N).values.repeat(K) - -df = DataFrame({'key1' : key1, 'key2' : key2, - 'value' : np.random.randn(N * K)}) -col_array_list = list(df.values.T) -""" -statement = "df.sort_index(by=['key1', 'key2'])" -frame_sort_index_by_columns = Benchmark(statement, setup, - start_date=datetime(2011, 11, 1)) - -# drop_duplicates - -statement = "df.drop_duplicates(['key1', 'key2'])" -frame_drop_duplicates = Benchmark(statement, setup, - start_date=datetime(2011, 11, 15)) - -statement = "df.drop_duplicates(['key1', 'key2'], inplace=True)" -frame_drop_dup_inplace = Benchmark(statement, setup, - start_date=datetime(2012, 5, 16)) - -lib_fast_zip = Benchmark('lib.fast_zip(col_array_list)', setup, - name='lib_fast_zip', - start_date=datetime(2012, 1, 1)) - -setup = setup + """ -df.ix[:10000, :] = np.nan -""" -statement2 = "df.drop_duplicates(['key1', 'key2'])" -frame_drop_duplicates_na = Benchmark(statement2, setup, - start_date=datetime(2012, 5, 15)) - -lib_fast_zip_fillna = Benchmark('lib.fast_zip_fillna(col_array_list)', setup, - start_date=datetime(2012, 5, 15)) - -statement2 = "df.drop_duplicates(['key1', 'key2'], inplace=True)" -frame_drop_dup_na_inplace = Benchmark(statement2, setup, - start_date=datetime(2012, 5, 16)) - -setup = common_setup + """ -s = Series(np.random.randint(0, 1000, size=10000)) -s2 = Series(np.tile(tm.makeStringIndex(1000).values, 10)) -""" - -series_drop_duplicates_int = Benchmark('s.drop_duplicates()', setup, - start_date=datetime(2012, 11, 27)) - -series_drop_duplicates_string = \ - Benchmark('s2.drop_duplicates()', setup, - start_date=datetime(2012, 11, 27)) - -#---------------------------------------------------------------------- -# fillna, many columns - - -setup = common_setup + """ -values = np.random.randn(1000, 1000) -values[::2] = np.nan -df = DataFrame(values) -""" - -frame_fillna_many_columns_pad = Benchmark("df.fillna(method='pad')", - setup, - start_date=datetime(2011, 3, 1)) - -#---------------------------------------------------------------------- -# blog "pandas escaped the zoo" - -setup = common_setup + """ -n = 50000 -indices = tm.makeStringIndex(n) - -def sample(values, k): - from random import shuffle - sampler = np.arange(len(values)) - shuffle(sampler) - return values.take(sampler[:k]) - -subsample_size = 40000 - -x = Series(np.random.randn(50000), indices) -y = Series(np.random.randn(subsample_size), - index=sample(indices, subsample_size)) -""" - -series_align_irregular_string = Benchmark("x + y", setup, - start_date=datetime(2010, 6, 1)) diff --git a/vb_suite/replace.py b/vb_suite/replace.py deleted file mode 100644 index 9326aa5becca9..0000000000000 --- a/vb_suite/replace.py +++ /dev/null @@ -1,36 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -from datetime import timedelta - -N = 1000000 - -try: - rng = date_range('1/1/2000', periods=N, freq='min') -except NameError: - rng = DatetimeIndex('1/1/2000', periods=N, offset=datetools.Minute()) - date_range = DateRange - -ts = Series(np.random.randn(N), index=rng) -""" - -large_dict_setup = """from .pandas_vb_common import * -from pandas.compat import range -n = 10 ** 6 -start_value = 10 ** 5 -to_rep = dict((i, start_value + i) for i in range(n)) -s = Series(np.random.randint(n, size=10 ** 3)) -""" - -replace_fillna = Benchmark('ts.fillna(0., inplace=True)', common_setup, - name='replace_fillna', - start_date=datetime(2012, 4, 4)) -replace_replacena = Benchmark('ts.replace(np.nan, 0., inplace=True)', - common_setup, - name='replace_replacena', - start_date=datetime(2012, 5, 15)) -replace_large_dict = Benchmark('s.replace(to_rep, inplace=True)', - large_dict_setup, - name='replace_large_dict', - start_date=datetime(2014, 4, 6)) diff --git a/vb_suite/reshape.py b/vb_suite/reshape.py deleted file mode 100644 index daab96103f2c5..0000000000000 --- a/vb_suite/reshape.py +++ /dev/null @@ -1,65 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -index = MultiIndex.from_arrays([np.arange(100).repeat(100), - np.roll(np.tile(np.arange(100), 100), 25)]) -df = DataFrame(np.random.randn(10000, 4), index=index) -""" - -reshape_unstack_simple = Benchmark('df.unstack(1)', common_setup, - start_date=datetime(2011, 10, 1)) - -setup = common_setup + """ -udf = df.unstack(1) -""" - -reshape_stack_simple = Benchmark('udf.stack()', setup, - start_date=datetime(2011, 10, 1)) - -setup = common_setup + """ -def unpivot(frame): - N, K = frame.shape - data = {'value' : frame.values.ravel('F'), - 'variable' : np.asarray(frame.columns).repeat(N), - 'date' : np.tile(np.asarray(frame.index), K)} - return DataFrame(data, columns=['date', 'variable', 'value']) -index = date_range('1/1/2000', periods=10000, freq='h') -df = DataFrame(randn(10000, 50), index=index, columns=range(50)) -pdf = unpivot(df) -f = lambda: pdf.pivot('date', 'variable', 'value') -""" - -reshape_pivot_time_series = Benchmark('f()', setup, - start_date=datetime(2012, 5, 1)) - -# Sparse key space, re: #2278 - -setup = common_setup + """ -NUM_ROWS = 1000 -for iter in range(10): - df = DataFrame({'A' : np.random.randint(50, size=NUM_ROWS), - 'B' : np.random.randint(50, size=NUM_ROWS), - 'C' : np.random.randint(-10,10, size=NUM_ROWS), - 'D' : np.random.randint(-10,10, size=NUM_ROWS), - 'E' : np.random.randint(10, size=NUM_ROWS), - 'F' : np.random.randn(NUM_ROWS)}) - idf = df.set_index(['A', 'B', 'C', 'D', 'E']) - if len(idf.index.unique()) == NUM_ROWS: - break -""" - -unstack_sparse_keyspace = Benchmark('idf.unstack()', setup, - start_date=datetime(2011, 10, 1)) - -# Melt - -setup = common_setup + """ -from pandas.core.reshape import melt -df = DataFrame(np.random.randn(10000, 3), columns=['A', 'B', 'C']) -df['id1'] = np.random.randint(0, 10, 10000) -df['id2'] = np.random.randint(100, 1000, 10000) -""" - -melt_dataframe = Benchmark("melt(df, id_vars=['id1', 'id2'])", setup, - start_date=datetime(2012, 8, 1)) diff --git a/vb_suite/run_suite.py b/vb_suite/run_suite.py deleted file mode 100755 index 43bf24faae43a..0000000000000 --- a/vb_suite/run_suite.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -from vbench.api import BenchmarkRunner -from suite import * - - -def run_process(): - runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_URL, - BUILD, DB_PATH, TMP_DIR, PREPARE, - always_clean=True, - run_option='eod', start_date=START_DATE, - module_dependencies=dependencies) - runner.run() - -if __name__ == '__main__': - run_process() diff --git a/vb_suite/series_methods.py b/vb_suite/series_methods.py deleted file mode 100644 index cd8688495fa09..0000000000000 --- a/vb_suite/series_methods.py +++ /dev/null @@ -1,39 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -s1 = Series(np.random.randn(10000)) -s2 = Series(np.random.randint(1, 10, 10000)) -s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') -values = [1,2] -s4 = s3.astype('object') -""" - -series_nlargest1 = Benchmark('s1.nlargest(3, take_last=True);' - 's1.nlargest(3, take_last=False)', - setup, - start_date=datetime(2014, 1, 25)) -series_nlargest2 = Benchmark('s2.nlargest(3, take_last=True);' - 's2.nlargest(3, take_last=False)', - setup, - start_date=datetime(2014, 1, 25)) - -series_nsmallest2 = Benchmark('s1.nsmallest(3, take_last=True);' - 's1.nsmallest(3, take_last=False)', - setup, - start_date=datetime(2014, 1, 25)) - -series_nsmallest2 = Benchmark('s2.nsmallest(3, take_last=True);' - 's2.nsmallest(3, take_last=False)', - setup, - start_date=datetime(2014, 1, 25)) - -series_isin_int64 = Benchmark('s3.isin(values)', - setup, - start_date=datetime(2014, 1, 25)) -series_isin_object = Benchmark('s4.isin(values)', - setup, - start_date=datetime(2014, 1, 25)) diff --git a/vb_suite/source/conf.py b/vb_suite/source/conf.py deleted file mode 100644 index d83448fd97d09..0000000000000 --- a/vb_suite/source/conf.py +++ /dev/null @@ -1,225 +0,0 @@ -# -*- coding: utf-8 -*- -# -# pandas documentation build configuration file, created by -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.append(os.path.abspath('.')) -sys.path.insert(0, os.path.abspath('../sphinxext')) - -sys.path.extend([ - - # numpy standard doc extensions - os.path.join(os.path.dirname(__file__), - '..', '../..', - 'sphinxext') - -]) - -# -- General configuration ----------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. sphinxext. - -extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.doctest'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates', '_templates/autosummary'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -# source_encoding = 'utf-8' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'pandas' -copyright = u'2008-2011, the pandas development team' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -import pandas - -# version = '%s r%s' % (pandas.__version__, svn_version()) -version = '%s' % (pandas.__version__) - -# The full version, including alpha/beta/rc tags. -release = version - -# JP: added from sphinxdocs -autosummary_generate = True - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of documents that shouldn't be included in the build. -# unused_docs = [] - -# List of directories, relative to source directory, that shouldn't be searched -# for source files. -exclude_trees = [] - -# The reST default role (used for this markup: `text`) to use for all documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - - -# -- Options for HTML output --------------------------------------------- - -# The theme to use for HTML and HTML Help pages. Major themes that come with -# Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = 'agogo' - -# The style sheet to use for HTML and HTML Help pages. A file of that name -# must exist either in Sphinx' static/ path, or in one of the custom paths -# given in html_static_path. -# html_style = 'statsmodels.css' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ['themes'] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -html_title = 'Vbench performance benchmarks for pandas' - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -# html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# html_additional_pages = {} - -# If false, no module index is generated. -html_use_modindex = True - -# If false, no index is generated. -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# html_show_sourcelink = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# html_use_opensearch = '' - -# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = '' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'performance' - - -# -- Options for LaTeX output -------------------------------------------- - -# The paper size ('letter' or 'a4'). -# latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -# latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'performance.tex', - u'pandas vbench Performance Benchmarks', - u'Wes McKinney', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# latex_use_parts = False - -# Additional stuff for the LaTeX preamble. -# latex_preamble = '' - -# Documents to append as an appendix to all manuals. -# latex_appendices = [] - -# If false, no module index is generated. -# latex_use_modindex = True - - -# Example configuration for intersphinx: refer to the Python standard library. -# intersphinx_mapping = {'http://docs.scipy.org/': None} -import glob -autosummary_generate = glob.glob("*.rst") diff --git a/vb_suite/source/themes/agogo/layout.html b/vb_suite/source/themes/agogo/layout.html deleted file mode 100644 index cd0f3d7ffc9c7..0000000000000 --- a/vb_suite/source/themes/agogo/layout.html +++ /dev/null @@ -1,95 +0,0 @@ -{# - agogo/layout.html - ~~~~~~~~~~~~~~~~~ - - Sphinx layout template for the agogo theme, originally written - by Andi Albrecht. - - :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. - :license: BSD, see LICENSE for details. -#} -{% extends "basic/layout.html" %} - -{% block header %} -
-
- {%- if logo %} - - {%- endif %} - {%- block headertitle %} -

{{ shorttitle|e }}

- {%- endblock %} -
- {%- for rellink in rellinks|reverse %} - {{ rellink[3] }} - {%- if not loop.last %}{{ reldelim2 }}{% endif %} - {%- endfor %} -
-
-
-{% endblock %} - -{% block content %} -
-
- -
- {%- block document %} - {{ super() }} - {%- endblock %} -
-
-
-
-{% endblock %} - -{% block footer %} - -{% endblock %} - -{% block relbar1 %}{% endblock %} -{% block relbar2 %}{% endblock %} diff --git a/vb_suite/source/themes/agogo/static/agogo.css_t b/vb_suite/source/themes/agogo/static/agogo.css_t deleted file mode 100644 index ef909b72e20f6..0000000000000 --- a/vb_suite/source/themes/agogo/static/agogo.css_t +++ /dev/null @@ -1,476 +0,0 @@ -/* - * agogo.css_t - * ~~~~~~~~~~~ - * - * Sphinx stylesheet -- agogo theme. - * - * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * - */ - -* { - margin: 0px; - padding: 0px; -} - -body { - font-family: {{ theme_bodyfont }}; - line-height: 1.4em; - color: black; - background-color: {{ theme_bgcolor }}; -} - - -/* Page layout */ - -div.header, div.content, div.footer { - max-width: {{ theme_pagewidth }}; - margin-left: auto; - margin-right: auto; -} - -div.header-wrapper { - background: {{ theme_headerbg }}; - padding: 1em 1em 0; - border-bottom: 3px solid #2e3436; - min-height: 0px; -} - - -/* Default body styles */ -a { - color: {{ theme_linkcolor }}; -} - -div.bodywrapper a, div.footer a { - text-decoration: underline; -} - -.clearer { - clear: both; -} - -.left { - float: left; -} - -.right { - float: right; -} - -.line-block { - display: block; - margin-top: 1em; - margin-bottom: 1em; -} - -.line-block .line-block { - margin-top: 0; - margin-bottom: 0; - margin-left: 1.5em; -} - -h1, h2, h3, h4 { - font-family: {{ theme_headerfont }}; - font-weight: normal; - color: {{ theme_headercolor2 }}; - margin-bottom: .8em; -} - -h1 { - color: {{ theme_headercolor1 }}; -} - -h2 { - padding-bottom: .5em; - border-bottom: 1px solid {{ theme_headercolor2 }}; -} - -a.headerlink { - visibility: hidden; - color: #dddddd; - padding-left: .3em; -} - -h1:hover > a.headerlink, -h2:hover > a.headerlink, -h3:hover > a.headerlink, -h4:hover > a.headerlink, -h5:hover > a.headerlink, -h6:hover > a.headerlink, -dt:hover > a.headerlink { - visibility: visible; -} - -img { - border: 0; -} - -pre { - background-color: #EEE; - padding: 0.5em; -} - -div.admonition { - margin-top: 10px; - margin-bottom: 10px; - padding: 2px 7px 1px 7px; - border-left: 0.2em solid black; -} - -p.admonition-title { - margin: 0px 10px 5px 0px; - font-weight: bold; -} - -dt:target, .highlighted { - background-color: #fbe54e; -} - -/* Header */ - -/* -div.header { - padding-top: 10px; - padding-bottom: 10px; -} -*/ - -div.header {} - -div.header h1 { - font-family: {{ theme_headerfont }}; - font-weight: normal; - font-size: 180%; - letter-spacing: .08em; -} - -div.header h1 a { - color: white; -} - -div.header div.rel { - text-decoration: none; -} -/* margin-top: 1em; */ - -div.header div.rel a { - margin-top: 1em; - color: {{ theme_headerlinkcolor }}; - letter-spacing: .1em; - text-transform: uppercase; - padding: 3px 1em; -} - -p.logo { - float: right; -} - -img.logo { - border: 0; -} - - -/* Content */ -div.content-wrapper { - background-color: white; - padding: 1em; -} -/* - padding-top: 20px; - padding-bottom: 20px; -*/ - -/* float: left; */ - -div.document { - max-width: {{ theme_documentwidth }}; -} - -div.body { - padding-right: 2em; - text-align: {{ theme_textalign }}; -} - -div.document ul { - margin: 1.5em; - list-style-type: square; -} - -div.document dd { - margin-left: 1.2em; - margin-top: .4em; - margin-bottom: 1em; -} - -div.document .section { - margin-top: 1.7em; -} -div.document .section:first-child { - margin-top: 0px; -} - -div.document div.highlight { - padding: 3px; - background-color: #eeeeec; - border-top: 2px solid #dddddd; - border-bottom: 2px solid #dddddd; - margin-top: .8em; - margin-bottom: .8em; -} - -div.document h2 { - margin-top: .7em; -} - -div.document p { - margin-bottom: .5em; -} - -div.document li.toctree-l1 { - margin-bottom: 1em; -} - -div.document .descname { - font-weight: bold; -} - -div.document .docutils.literal { - background-color: #eeeeec; - padding: 1px; -} - -div.document .docutils.xref.literal { - background-color: transparent; - padding: 0px; -} - -div.document blockquote { - margin: 1em; -} - -div.document ol { - margin: 1.5em; -} - - -/* Sidebar */ - - -div.sidebar { - width: {{ theme_sidebarwidth }}; - padding: 0 1em; - float: right; - font-size: .93em; -} - -div.sidebar a, div.header a { - text-decoration: none; -} - -div.sidebar a:hover, div.header a:hover { - text-decoration: underline; -} - -div.sidebar h3 { - color: #2e3436; - text-transform: uppercase; - font-size: 130%; - letter-spacing: .1em; -} - -div.sidebar ul { - list-style-type: none; -} - -div.sidebar li.toctree-l1 a { - display: block; - padding: 1px; - border: 1px solid #dddddd; - background-color: #eeeeec; - margin-bottom: .4em; - padding-left: 3px; - color: #2e3436; -} - -div.sidebar li.toctree-l2 a { - background-color: transparent; - border: none; - margin-left: 1em; - border-bottom: 1px solid #dddddd; -} - -div.sidebar li.toctree-l3 a { - background-color: transparent; - border: none; - margin-left: 2em; - border-bottom: 1px solid #dddddd; -} - -div.sidebar li.toctree-l2:last-child a { - border-bottom: none; -} - -div.sidebar li.toctree-l1.current a { - border-right: 5px solid {{ theme_headerlinkcolor }}; -} - -div.sidebar li.toctree-l1.current li.toctree-l2 a { - border-right: none; -} - - -/* Footer */ - -div.footer-wrapper { - background: {{ theme_footerbg }}; - border-top: 4px solid #babdb6; - padding-top: 10px; - padding-bottom: 10px; - min-height: 80px; -} - -div.footer, div.footer a { - color: #888a85; -} - -div.footer .right { - text-align: right; -} - -div.footer .left { - text-transform: uppercase; -} - - -/* Styles copied from basic theme */ - -img.align-left, .figure.align-left, object.align-left { - clear: left; - float: left; - margin-right: 1em; -} - -img.align-right, .figure.align-right, object.align-right { - clear: right; - float: right; - margin-left: 1em; -} - -img.align-center, .figure.align-center, object.align-center { - display: block; - margin-left: auto; - margin-right: auto; -} - -.align-left { - text-align: left; -} - -.align-center { - clear: both; - text-align: center; -} - -.align-right { - text-align: right; -} - -/* -- search page ----------------------------------------------------------- */ - -ul.search { - margin: 10px 0 0 20px; - padding: 0; -} - -ul.search li { - padding: 5px 0 5px 20px; - background-image: url(file.png); - background-repeat: no-repeat; - background-position: 0 7px; -} - -ul.search li a { - font-weight: bold; -} - -ul.search li div.context { - color: #888; - margin: 2px 0 0 30px; - text-align: left; -} - -ul.keywordmatches li.goodmatch a { - font-weight: bold; -} - -/* -- index page ------------------------------------------------------------ */ - -table.contentstable { - width: 90%; -} - -table.contentstable p.biglink { - line-height: 150%; -} - -a.biglink { - font-size: 1.3em; -} - -span.linkdescr { - font-style: italic; - padding-top: 5px; - font-size: 90%; -} - -/* -- general index --------------------------------------------------------- */ - -table.indextable td { - text-align: left; - vertical-align: top; -} - -table.indextable dl, table.indextable dd { - margin-top: 0; - margin-bottom: 0; -} - -table.indextable tr.pcap { - height: 10px; -} - -table.indextable tr.cap { - margin-top: 10px; - background-color: #f2f2f2; -} - -img.toggler { - margin-right: 3px; - margin-top: 3px; - cursor: pointer; -} - -/* -- viewcode extension ---------------------------------------------------- */ - -.viewcode-link { - float: right; -} - -.viewcode-back { - float: right; - font-family:: {{ theme_bodyfont }}; -} - -div.viewcode-block:target { - margin: -1px -3px; - padding: 0 3px; - background-color: #f4debf; - border-top: 1px solid #ac9; - border-bottom: 1px solid #ac9; -} - -th.field-name { - white-space: nowrap; -} diff --git a/vb_suite/source/themes/agogo/static/bgfooter.png b/vb_suite/source/themes/agogo/static/bgfooter.png deleted file mode 100644 index 9ce5bdd902943fdf8b0c0ca6a545297e1e2cc665..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 434 zcmV;j0ZsmiP)Px#24YJ`L;%wO*8tD73qoQ5000SaNLh0L01FcU01FcV0GgZ_00007bV*G`2iXD> z2Q(2CT#42I000?uMObu0Z*6U5Zgc=ca%Ew3Wn>_CX>@2HM@dakSAh-}0003ENklR?sq9~H`=l5UI-{JW_f9!)=Hwush3JC}Y z1gFM&r>$lJNPt^*1k!w;l|obx>lr$2IOaI$n=(gBBaj^I0=y%@K5N&GIU&-%OE_~V zX=m=_j7d`hvubQRuF+xT63vIfWnC3%kKN*T3l7ob3nEC2R->wU1Y)4)(7_t^thiqb zj$CO7xBn9gg`*!MY$}SI|_*)!a*&V0w7h>cUb&$Grh37iJ=C%Yn c>}w1E0Z4f>1OEiDlmGw#07*qoM6N<$g4BwtIsgCw diff --git a/vb_suite/source/themes/agogo/static/bgtop.png b/vb_suite/source/themes/agogo/static/bgtop.png deleted file mode 100644 index a0d4709bac8f79943a817195c086461c8c4d5419..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 430 zcmV;f0a5;mP)Px#24YJ`L;zI)R{&FzA;Z4_000SaNLh0L01FcU01FcV0GgZ_00007bV*G`2iXD> z2Q3AZhV-)l000?uMObu0Z*6U5Zgc=ca%Ew3Wn>_CX>@2HM@dakSAh-}0003ANklMo8vqN`cM=KwSQV|n zk}naE+VzlN;kK@Ej${PSkI$-R6-Yfp`zA;^O$`)7`gRi{-0i?owGIbX{p>Nc##93U z;sA|ayOYkG%F9M0iEMUM*s3NDYSS=KN2ht8Rv|7nv77i{NTO47R)}V_+2H~mL-nTR z_8j}*%6Qm8?#7NU2kM$#gcP&kO?iw|n}ynz+r-~FA9nKcZnfixWvZ&d28Cc_6&_Pe zMpbjI>9r+<=}NIDz4mCd3U++H?rrHcYxH&eeB|)>mnv*N#44ILM2zL6yU!VVWSrgp Y0Yu&#qm)=by8r+H07*qoM6N<$f@HC)j{pDw diff --git a/vb_suite/source/themes/agogo/theme.conf b/vb_suite/source/themes/agogo/theme.conf deleted file mode 100644 index 3fc88580f1ab4..0000000000000 --- a/vb_suite/source/themes/agogo/theme.conf +++ /dev/null @@ -1,19 +0,0 @@ -[theme] -inherit = basic -stylesheet = agogo.css -pygments_style = tango - -[options] -bodyfont = "Verdana", Arial, sans-serif -headerfont = "Georgia", "Times New Roman", serif -pagewidth = 70em -documentwidth = 50em -sidebarwidth = 20em -bgcolor = #eeeeec -headerbg = url(bgtop.png) top left repeat-x -footerbg = url(bgfooter.png) top left repeat-x -linkcolor = #ce5c00 -headercolor1 = #204a87 -headercolor2 = #3465a4 -headerlinkcolor = #fcaf3e -textalign = justify \ No newline at end of file diff --git a/vb_suite/sparse.py b/vb_suite/sparse.py deleted file mode 100644 index 53e2778ee0865..0000000000000 --- a/vb_suite/sparse.py +++ /dev/null @@ -1,65 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- - -setup = common_setup + """ -from pandas.core.sparse import SparseSeries, SparseDataFrame - -K = 50 -N = 50000 -rng = np.asarray(date_range('1/1/2000', periods=N, - freq='T')) - -# rng2 = np.asarray(rng).astype('M8[ns]').astype('i8') - -series = {} -for i in range(1, K + 1): - data = np.random.randn(N)[:-i] - this_rng = rng[:-i] - data[100:] = np.nan - series[i] = SparseSeries(data, index=this_rng) -""" -stmt = "SparseDataFrame(series)" - -bm_sparse1 = Benchmark(stmt, setup, name="sparse_series_to_frame", - start_date=datetime(2011, 6, 1)) - - -setup = common_setup + """ -from pandas.core.sparse import SparseDataFrame -""" - -stmt = "SparseDataFrame(columns=np.arange(100), index=np.arange(1000))" - -sparse_constructor = Benchmark(stmt, setup, name="sparse_frame_constructor", - start_date=datetime(2012, 6, 1)) - - -setup = common_setup + """ -s = pd.Series([np.nan] * 10000) -s[0] = 3.0 -s[100] = -1.0 -s[999] = 12.1 -s.index = pd.MultiIndex.from_product((range(10), range(10), range(10), range(10))) -ss = s.to_sparse() -""" - -stmt = "ss.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True)" - -sparse_series_to_coo = Benchmark(stmt, setup, name="sparse_series_to_coo", - start_date=datetime(2015, 1, 3)) - -setup = common_setup + """ -import scipy.sparse -import pandas.sparse.series -A = scipy.sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100)) -""" - -stmt = "ss = pandas.sparse.series.SparseSeries.from_coo(A)" - -sparse_series_from_coo = Benchmark(stmt, setup, name="sparse_series_from_coo", - start_date=datetime(2015, 1, 3)) diff --git a/vb_suite/stat_ops.py b/vb_suite/stat_ops.py deleted file mode 100644 index 8d7c30dc9fdcf..0000000000000 --- a/vb_suite/stat_ops.py +++ /dev/null @@ -1,126 +0,0 @@ -from vbench.benchmark import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -""" - -#---------------------------------------------------------------------- -# nanops - -setup = common_setup + """ -s = Series(np.random.randn(100000), index=np.arange(100000)) -s[::2] = np.nan -""" - -stat_ops_series_std = Benchmark("s.std()", setup) - -#---------------------------------------------------------------------- -# ops by level - -setup = common_setup + """ -index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], - labels=[np.arange(10).repeat(10000), - np.tile(np.arange(100).repeat(100), 10), - np.tile(np.tile(np.arange(100), 100), 10)]) -random.shuffle(index.values) -df = DataFrame(np.random.randn(len(index), 4), index=index) -df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1]) -""" - -stat_ops_level_frame_sum = \ - Benchmark("df.sum(level=1)", setup, - start_date=datetime(2011, 11, 15)) - -stat_ops_level_frame_sum_multiple = \ - Benchmark("df.sum(level=[0, 1])", setup, repeat=1, - start_date=datetime(2011, 11, 15)) - -stat_ops_level_series_sum = \ - Benchmark("df[1].sum(level=1)", setup, - start_date=datetime(2011, 11, 15)) - -stat_ops_level_series_sum_multiple = \ - Benchmark("df[1].sum(level=[0, 1])", setup, repeat=1, - start_date=datetime(2011, 11, 15)) - -sum_setup = common_setup + """ -df = DataFrame(np.random.randn(100000, 4)) -dfi = DataFrame(np.random.randint(1000, size=df.shape)) -""" - -stat_ops_frame_sum_int_axis_0 = \ - Benchmark("dfi.sum()", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_sum_float_axis_0 = \ - Benchmark("df.sum()", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_mean_int_axis_0 = \ - Benchmark("dfi.mean()", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_mean_float_axis_0 = \ - Benchmark("df.mean()", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_sum_int_axis_1 = \ - Benchmark("dfi.sum(1)", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_sum_float_axis_1 = \ - Benchmark("df.sum(1)", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_mean_int_axis_1 = \ - Benchmark("dfi.mean(1)", sum_setup, start_date=datetime(2013, 7, 25)) - -stat_ops_frame_mean_float_axis_1 = \ - Benchmark("df.mean(1)", sum_setup, start_date=datetime(2013, 7, 25)) - -#---------------------------------------------------------------------- -# rank - -setup = common_setup + """ -values = np.concatenate([np.arange(100000), - np.random.randn(100000), - np.arange(100000)]) -s = Series(values) -""" - -stats_rank_average = Benchmark('s.rank()', setup, - start_date=datetime(2011, 12, 12)) - -stats_rank_pct_average = Benchmark('s.rank(pct=True)', setup, - start_date=datetime(2014, 1, 16)) -stats_rank_pct_average_old = Benchmark('s.rank() / len(s)', setup, - start_date=datetime(2014, 1, 16)) -setup = common_setup + """ -values = np.random.randint(0, 100000, size=200000) -s = Series(values) -""" - -stats_rank_average_int = Benchmark('s.rank()', setup, - start_date=datetime(2011, 12, 12)) - -setup = common_setup + """ -df = DataFrame(np.random.randn(5000, 50)) -""" - -stats_rank2d_axis1_average = Benchmark('df.rank(1)', setup, - start_date=datetime(2011, 12, 12)) - -stats_rank2d_axis0_average = Benchmark('df.rank()', setup, - start_date=datetime(2011, 12, 12)) - -# rolling functions - -setup = common_setup + """ -arr = np.random.randn(100000) -""" - -stats_rolling_mean = Benchmark('rolling_mean(arr, 100)', setup, - start_date=datetime(2011, 6, 1)) - -# spearman correlation - -setup = common_setup + """ -df = DataFrame(np.random.randn(1000, 30)) -""" - -stats_corr_spearman = Benchmark("df.corr(method='spearman')", setup, - start_date=datetime(2011, 12, 4)) diff --git a/vb_suite/strings.py b/vb_suite/strings.py deleted file mode 100644 index 0948df5673a0d..0000000000000 --- a/vb_suite/strings.py +++ /dev/null @@ -1,59 +0,0 @@ -from vbench.api import Benchmark - -common_setup = """from .pandas_vb_common import * -""" - -setup = common_setup + """ -import string -import itertools as IT - -def make_series(letters, strlen, size): - return Series( - [str(x) for x in np.fromiter(IT.cycle(letters), count=size*strlen, dtype='|S1') - .view('|S{}'.format(strlen))]) - -many = make_series('matchthis'+string.ascii_uppercase, strlen=19, size=10000) # 31% matches -few = make_series('matchthis'+string.ascii_uppercase*42, strlen=19, size=10000) # 1% matches -""" - -strings_cat = Benchmark("many.str.cat(sep=',')", setup) -strings_title = Benchmark("many.str.title()", setup) -strings_count = Benchmark("many.str.count('matchthis')", setup) -strings_contains_many = Benchmark("many.str.contains('matchthis')", setup) -strings_contains_few = Benchmark("few.str.contains('matchthis')", setup) -strings_contains_many_noregex = Benchmark( - "many.str.contains('matchthis', regex=False)", setup) -strings_contains_few_noregex = Benchmark( - "few.str.contains('matchthis', regex=False)", setup) -strings_startswith = Benchmark("many.str.startswith('matchthis')", setup) -strings_endswith = Benchmark("many.str.endswith('matchthis')", setup) -strings_lower = Benchmark("many.str.lower()", setup) -strings_upper = Benchmark("many.str.upper()", setup) -strings_replace = Benchmark("many.str.replace(r'(matchthis)', r'\1\1')", setup) -strings_repeat = Benchmark( - "many.str.repeat(list(IT.islice(IT.cycle(range(1,4)),len(many))))", setup) -strings_match = Benchmark("many.str.match(r'mat..this')", setup) -strings_extract = Benchmark("many.str.extract(r'(\w*)matchthis(\w*)')", setup) -strings_join_split = Benchmark("many.str.join(r'--').str.split('--')", setup) -strings_join_split_expand = Benchmark("many.str.join(r'--').str.split('--',expand=True)", setup) -strings_len = Benchmark("many.str.len()", setup) -strings_findall = Benchmark("many.str.findall(r'[A-Z]+')", setup) -strings_pad = Benchmark("many.str.pad(100, side='both')", setup) -strings_center = Benchmark("many.str.center(100)", setup) -strings_slice = Benchmark("many.str.slice(5,15,2)", setup) -strings_strip = Benchmark("many.str.strip('matchthis')", setup) -strings_lstrip = Benchmark("many.str.lstrip('matchthis')", setup) -strings_rstrip = Benchmark("many.str.rstrip('matchthis')", setup) -strings_get = Benchmark("many.str.get(0)", setup) - -setup = setup + """ -s = make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|') -""" -strings_get_dummies = Benchmark("s.str.get_dummies('|')", setup) - -setup = common_setup + """ -import pandas.util.testing as testing -ser = Series(testing.makeUnicodeIndex()) -""" - -strings_encode_decode = Benchmark("ser.str.encode('utf-8').str.decode('utf-8')", setup) diff --git a/vb_suite/suite.py b/vb_suite/suite.py deleted file mode 100644 index 45053b6610896..0000000000000 --- a/vb_suite/suite.py +++ /dev/null @@ -1,164 +0,0 @@ -from vbench.api import Benchmark, GitRepo -from datetime import datetime - -import os - -modules = ['attrs_caching', - 'binary_ops', - 'ctors', - 'frame_ctor', - 'frame_methods', - 'groupby', - 'index_object', - 'indexing', - 'io_bench', - 'io_sql', - 'inference', - 'hdfstore_bench', - 'join_merge', - 'gil', - 'miscellaneous', - 'panel_ctor', - 'packers', - 'parser_vb', - 'panel_methods', - 'plotting', - 'reindex', - 'replace', - 'sparse', - 'strings', - 'reshape', - 'stat_ops', - 'timeseries', - 'timedelta', - 'eval'] - -by_module = {} -benchmarks = [] - -for modname in modules: - ref = __import__(modname) - by_module[modname] = [v for v in ref.__dict__.values() - if isinstance(v, Benchmark)] - benchmarks.extend(by_module[modname]) - -for bm in benchmarks: - assert(bm.name is not None) - -import getpass -import sys - -USERNAME = getpass.getuser() - -if sys.platform == 'darwin': - HOME = '/Users/%s' % USERNAME -else: - HOME = '/home/%s' % USERNAME - -try: - import ConfigParser - - config = ConfigParser.ConfigParser() - config.readfp(open(os.path.expanduser('~/.vbenchcfg'))) - - REPO_PATH = config.get('setup', 'repo_path') - REPO_URL = config.get('setup', 'repo_url') - DB_PATH = config.get('setup', 'db_path') - TMP_DIR = config.get('setup', 'tmp_dir') -except: - REPO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) - REPO_URL = 'git@github.com:pandas-dev/pandas.git' - DB_PATH = os.path.join(REPO_PATH, 'vb_suite/benchmarks.db') - TMP_DIR = os.path.join(HOME, 'tmp/vb_pandas') - -PREPARE = """ -python setup.py clean -""" -BUILD = """ -python setup.py build_ext --inplace -""" -dependencies = ['pandas_vb_common.py'] - -START_DATE = datetime(2010, 6, 1) - -# repo = GitRepo(REPO_PATH) - -RST_BASE = 'source' - -# HACK! - -# timespan = [datetime(2011, 1, 1), datetime(2012, 1, 1)] - - -def generate_rst_files(benchmarks): - import matplotlib as mpl - mpl.use('Agg') - import matplotlib.pyplot as plt - - vb_path = os.path.join(RST_BASE, 'vbench') - fig_base_path = os.path.join(vb_path, 'figures') - - if not os.path.exists(vb_path): - print('creating %s' % vb_path) - os.makedirs(vb_path) - - if not os.path.exists(fig_base_path): - print('creating %s' % fig_base_path) - os.makedirs(fig_base_path) - - for bmk in benchmarks: - print('Generating rst file for %s' % bmk.name) - rst_path = os.path.join(RST_BASE, 'vbench/%s.txt' % bmk.name) - - fig_full_path = os.path.join(fig_base_path, '%s.png' % bmk.name) - - # make the figure - plt.figure(figsize=(10, 6)) - ax = plt.gca() - bmk.plot(DB_PATH, ax=ax) - - start, end = ax.get_xlim() - - plt.xlim([start - 30, end + 30]) - plt.savefig(fig_full_path, bbox_inches='tight') - plt.close('all') - - fig_rel_path = 'vbench/figures/%s.png' % bmk.name - rst_text = bmk.to_rst(image_path=fig_rel_path) - with open(rst_path, 'w') as f: - f.write(rst_text) - - with open(os.path.join(RST_BASE, 'index.rst'), 'w') as f: - print >> f, """ -Performance Benchmarks -====================== - -These historical benchmark graphs were produced with `vbench -`__. - -The ``.pandas_vb_common`` setup script can be found here_ - -.. _here: https://github.com/pandas-dev/pandas/tree/master/vb_suite - -Produced on a machine with - - - Intel Core i7 950 processor - - (K)ubuntu Linux 12.10 - - Python 2.7.2 64-bit (Enthought Python Distribution 7.1-2) - - NumPy 1.6.1 - -.. toctree:: - :hidden: - :maxdepth: 3 -""" - for modname, mod_bmks in sorted(by_module.items()): - print >> f, ' vb_%s' % modname - modpath = os.path.join(RST_BASE, 'vb_%s.rst' % modname) - with open(modpath, 'w') as mh: - header = '%s\n%s\n\n' % (modname, '=' * len(modname)) - print >> mh, header - - for bmk in mod_bmks: - print >> mh, bmk.name - print >> mh, '-' * len(bmk.name) - print >> mh, '.. include:: vbench/%s.txt\n' % bmk.name diff --git a/vb_suite/test.py b/vb_suite/test.py deleted file mode 100644 index da30c3e1a5f76..0000000000000 --- a/vb_suite/test.py +++ /dev/null @@ -1,67 +0,0 @@ -from pandas import * -import matplotlib.pyplot as plt - -import sqlite3 - -from vbench.git import GitRepo - - -REPO_PATH = '/home/adam/code/pandas' -repo = GitRepo(REPO_PATH) - -con = sqlite3.connect('vb_suite/benchmarks.db') - -bmk = '36900a889961162138c140ce4ae3c205' -# bmk = '9d7b8c04b532df6c2d55ef497039b0ce' -bmk = '4481aa4efa9926683002a673d2ed3dac' -bmk = '00593cd8c03d769669d7b46585161726' -bmk = '3725ab7cd0a0657d7ae70f171c877cea' -bmk = '3cd376d6d6ef802cdea49ac47a67be21' -bmk2 = '459225186023853494bc345fd180f395' -bmk = 'c22ca82e0cfba8dc42595103113c7da3' -bmk = 'e0e651a8e9fbf0270ab68137f8b9df5f' -bmk = '96bda4b9a60e17acf92a243580f2a0c3' - - -def get_results(bmk): - results = con.execute( - "select * from results where checksum='%s'" % bmk).fetchall() - x = Series(dict((t[1], t[3]) for t in results)) - x.index = x.index.map(repo.timestamps.get) - x = x.sort_index() - return x - -x = get_results(bmk) - - -def graph1(): - dm_getitem = get_results('459225186023853494bc345fd180f395') - dm_getvalue = get_results('c22ca82e0cfba8dc42595103113c7da3') - - plt.figure() - ax = plt.gca() - - dm_getitem.plot(label='df[col][idx]', ax=ax) - dm_getvalue.plot(label='df.get_value(idx, col)', ax=ax) - - plt.ylabel('ms') - plt.legend(loc='best') - - -def graph2(): - bm = get_results('96bda4b9a60e17acf92a243580f2a0c3') - plt.figure() - ax = plt.gca() - - bm.plot(ax=ax) - plt.ylabel('ms') - -bm = get_results('36900a889961162138c140ce4ae3c205') -fig = plt.figure() -ax = plt.gca() -bm.plot(ax=ax) -fig.autofmt_xdate() - -plt.xlim([bm.dropna().index[0] - datetools.MonthEnd(), - bm.dropna().index[-1] + datetools.MonthEnd()]) -plt.ylabel('ms') diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py deleted file mode 100755 index be546b72f9465..0000000000000 --- a/vb_suite/test_perf.py +++ /dev/null @@ -1,616 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -What ----- -vbench is a library which can be used to benchmark the performance -of a codebase over time. -Although vbench can collect data over many commites, generate plots -and other niceties, for Pull-Requests the important thing is the -performance of the HEAD commit against a known-good baseline. - -This script tries to automate the process of comparing these -two commits, and is meant to run out of the box on a fresh -clone. - -How ---- -These are the steps taken: -1) create a temp directory into which vbench will clone the temporary repo. -2) instantiate a vbench runner, using the local repo as the source repo. -3) perform a vbench run for the baseline commit, then the target commit. -4) pull the results for both commits from the db. use pandas to align -everything and calculate a ration for the timing information. -5) print the results to the log file and to stdout. - -""" - -# IMPORTANT NOTE -# -# This script should run on pandas versions at least as far back as 0.9.1. -# devs should be able to use the latest version of this script with -# any dusty old commit and expect it to "just work". -# One way in which this is useful is when collecting historical data, -# where writing some logic around this script may prove easier -# in some cases then running vbench directly (think perf bisection). -# -# *please*, when you modify this script for whatever reason, -# make sure you do not break its functionality when running under older -# pandas versions. -# Note that depreaction warnings are turned off in main(), so there's -# no need to change the actual code to supress such warnings. - -import shutil -import os -import sys -import argparse -import tempfile -import time -import re - -import random -import numpy as np - -import pandas as pd -from pandas import DataFrame, Series - -from suite import REPO_PATH -VB_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -DEFAULT_MIN_DURATION = 0.01 -HEAD_COL="head[ms]" -BASE_COL="base[ms]" - -try: - import git # gitpython -except Exception: - print("Error: Please install the `gitpython` package\n") - sys.exit(1) - -class RevParseAction(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - import subprocess - cmd = 'git rev-parse --short -verify {0}^{{commit}}'.format(values) - rev_parse = subprocess.check_output(cmd, shell=True) - setattr(namespace, self.dest, rev_parse.strip()) - - -parser = argparse.ArgumentParser(description='Use vbench to measure and compare the performance of commits.') -parser.add_argument('-H', '--head', - help='Execute vbenches using the currently checked out copy.', - dest='head', - action='store_true', - default=False) -parser.add_argument('-b', '--base-commit', - help='The commit serving as performance baseline ', - type=str, action=RevParseAction) -parser.add_argument('-t', '--target-commit', - help='The commit to compare against the baseline (default: HEAD).', - type=str, action=RevParseAction) -parser.add_argument('--base-pickle', - help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\ - 'filename must be of the form -*.* or specify --base-commit seperately', - type=str) -parser.add_argument('--target-pickle', - help='name of pickle file with timings data generated by a former `-H -d FILE` run '\ - 'filename must be of the form -*.* or specify --target-commit seperately', - type=str) -parser.add_argument('-m', '--min-duration', - help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION, - type=float, - default=0.01) -parser.add_argument('-o', '--output', - metavar="", - dest='log_file', - help='Path of file in which to save the textual report (default: vb_suite.log).') -parser.add_argument('-d', '--outdf', - metavar="FNAME", - dest='outdf', - default=None, - help='Name of file to df.save() the result table into. Will overwrite') -parser.add_argument('-r', '--regex', - metavar="REGEX", - dest='regex', - default="", - help='Regex pat, only tests whose name matches the regext will be run.') -parser.add_argument('-s', '--seed', - metavar="SEED", - dest='seed', - default=1234, - type=int, - help='Integer value to seed PRNG with') -parser.add_argument('-n', '--repeats', - metavar="N", - dest='repeats', - default=3, - type=int, - help='Number of times to run each vbench, result value is the best of') -parser.add_argument('-c', '--ncalls', - metavar="N", - dest='ncalls', - default=3, - type=int, - help='Number of calls to in each repetition of a vbench') -parser.add_argument('-N', '--hrepeats', - metavar="N", - dest='hrepeats', - default=1, - type=int, - help='implies -H, number of times to run the vbench suite on the head commit.\n' - 'Each iteration will yield another column in the output' ) -parser.add_argument('-a', '--affinity', - metavar="a", - dest='affinity', - default=1, - type=int, - help='set processor affinity of process by default bind to cpu/core #1 only. ' - 'Requires the "affinity" or "psutil" python module, will raise Warning otherwise') -parser.add_argument('-u', '--burnin', - metavar="u", - dest='burnin', - default=1, - type=int, - help='Number of extra iteration per benchmark to perform first, then throw away. ' ) - -parser.add_argument('-S', '--stats', - default=False, - action='store_true', - help='when specified with -N, prints the output of describe() per vbench results. ' ) - -parser.add_argument('--temp-dir', - metavar="PATH", - default=None, - help='Specify temp work dir to use. ccache depends on builds being invoked from consistent directory.' ) - -parser.add_argument('-q', '--quiet', - default=False, - action='store_true', - help='Suppress report output to stdout. ' ) - -def get_results_df(db, rev): - """Takes a git commit hash and returns a Dataframe of benchmark results - """ - bench = DataFrame(db.get_benchmarks()) - results = DataFrame(map(list,db.get_rev_results(rev).values())) - - # Sinch vbench.db._reg_rev_results returns an unlabeled dict, - # we have to break encapsulation a bit. - results.columns = db._results.c.keys() - results = results.join(bench['name'], on='checksum').set_index("checksum") - return results - - -def prprint(s): - print("*** %s" % s) - -def pre_hook(): - import gc - gc.disable() - -def post_hook(): - import gc - gc.enable() - -def profile_comparative(benchmarks): - - from vbench.api import BenchmarkRunner - from vbench.db import BenchmarkDB - from vbench.git import GitRepo - from suite import BUILD, DB_PATH, PREPARE, dependencies - - TMP_DIR = args.temp_dir or tempfile.mkdtemp() - - try: - - prprint("Opening DB at '%s'...\n" % DB_PATH) - db = BenchmarkDB(DB_PATH) - - prprint("Initializing Runner...") - - # all in a good cause... - GitRepo._parse_commit_log = _parse_wrapper(args.base_commit) - - runner = BenchmarkRunner( - benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH, - TMP_DIR, PREPARE, always_clean=True, - # run_option='eod', start_date=START_DATE, - module_dependencies=dependencies) - - repo = runner.repo # (steal the parsed git repo used by runner) - h_head = args.target_commit or repo.shas[-1] - h_baseline = args.base_commit - - # ARGH. reparse the repo, without discarding any commits, - # then overwrite the previous parse results - # prprint("Slaughtering kittens...") - (repo.shas, repo.messages, - repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH, - args.base_commit) - - prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))) - prprint('Baseline [%s] : %s\n' % (h_baseline, - repo.messages.get(h_baseline, ""))) - - prprint("Removing any previous measurements for the commits.") - db.delete_rev_results(h_baseline) - db.delete_rev_results(h_head) - - # TODO: we could skip this, but we need to make sure all - # results are in the DB, which is a little tricky with - # start dates and so on. - prprint("Running benchmarks for baseline [%s]" % h_baseline) - runner._run_and_write_results(h_baseline) - - prprint("Running benchmarks for target [%s]" % h_head) - runner._run_and_write_results(h_head) - - prprint('Processing results...') - - head_res = get_results_df(db, h_head) - baseline_res = get_results_df(db, h_baseline) - - report_comparative(head_res,baseline_res) - - finally: - # print("Disposing of TMP_DIR: %s" % TMP_DIR) - shutil.rmtree(TMP_DIR) - -def prep_pickle_for_total(df, agg_name='median'): - """ - accepts a datafram resulting from invocation with -H -d o.pickle - If multiple data columns are present (-N was used), the - `agg_name` attr of the datafram will be used to reduce - them to a single value per vbench, df.median is used by defa - ult. - - Returns a datadrame of the form expected by prep_totals - """ - def prep(df): - agg = getattr(df,agg_name) - df = DataFrame(agg(1)) - cols = list(df.columns) - cols[0]='timing' - df.columns=cols - df['name'] = list(df.index) - return df - - return prep(df) - -def prep_totals(head_res, baseline_res): - """ - Each argument should be a dataframe with 'timing' and 'name' columns - where name is the name of the vbench. - - returns a 'totals' dataframe, suitable as input for print_report. - """ - head_res, baseline_res = head_res.align(baseline_res) - ratio = head_res['timing'] / baseline_res['timing'] - totals = DataFrame({HEAD_COL:head_res['timing'], - BASE_COL:baseline_res['timing'], - 'ratio':ratio, - 'name':baseline_res.name}, - columns=[HEAD_COL, BASE_COL, "ratio", "name"]) - totals = totals.ix[totals[HEAD_COL] > args.min_duration] - # ignore below threshold - totals = totals.dropna( - ).sort("ratio").set_index('name') # sort in ascending order - return totals - -def report_comparative(head_res,baseline_res): - try: - r=git.Repo(VB_DIR) - except: - import pdb - pdb.set_trace() - - totals = prep_totals(head_res,baseline_res) - - h_head = args.target_commit - h_baseline = args.base_commit - h_msg = b_msg = "Unknown" - try: - h_msg = r.commit(h_head).message.strip() - except git.exc.BadObject: - pass - try: - b_msg = r.commit(h_baseline).message.strip() - except git.exc.BadObject: - pass - - - print_report(totals,h_head=h_head,h_msg=h_msg, - h_baseline=h_baseline,b_msg=b_msg) - - if args.outdf: - prprint("The results DataFrame was written to '%s'\n" % args.outdf) - totals.save(args.outdf) - -def profile_head_single(benchmark): - import gc - results = [] - - # just in case - gc.collect() - - try: - from ctypes import cdll, CDLL - cdll.LoadLibrary("libc.so.6") - libc = CDLL("libc.so.6") - libc.malloc_trim(0) - except: - pass - - - N = args.hrepeats + args.burnin - - results = [] - try: - for i in range(N): - gc.disable() - d=dict() - - try: - d = benchmark.run() - - except KeyboardInterrupt: - raise - except Exception as e: # if a single vbench bursts into flames, don't die. - err="" - try: - err = d.get("traceback") - if err is None: - err = str(e) - except: - pass - print("%s died with:\n%s\nSkipping...\n" % (benchmark.name, err)) - - results.append(d.get('timing',np.nan)) - gc.enable() - gc.collect() - - finally: - gc.enable() - - if results: - # throw away the burn_in - results = results[args.burnin:] - sys.stdout.write('.') - sys.stdout.flush() - return Series(results, name=benchmark.name) - - # df = DataFrame(results) - # df.columns = ["name",HEAD_COL] - # return df.set_index("name")[HEAD_COL] - -def profile_head(benchmarks): - print( "Performing %d benchmarks (%d runs each)" % ( len(benchmarks), args.hrepeats)) - - ss= [profile_head_single(b) for b in benchmarks] - print("\n") - - results = DataFrame(ss) - results.columns=[ "#%d" %i for i in range(args.hrepeats)] - # results.index = ["#%d" % i for i in range(len(ss))] - # results = results.T - - shas, messages, _,_ = _parse_commit_log(None,REPO_PATH,base_commit="HEAD^") - print_report(results,h_head=shas[-1],h_msg=messages[-1]) - - - if args.outdf: - prprint("The results DataFrame was written to '%s'\n" % args.outdf) - DataFrame(results).save(args.outdf) - -def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""): - - name_width=45 - col_width = 10 - - hdr = ("{:%s}" % name_width).format("Test name") - hdr += ("|{:^%d}" % col_width)* len(df.columns) - hdr += "|" - hdr = hdr.format(*df.columns) - hdr = "-"*len(hdr) + "\n" + hdr + "\n" + "-"*len(hdr) + "\n" - ftr=hdr - s = "\n" - s+= "Invoked with :\n" - s+= "--ncalls: %s\n" % (args.ncalls or 'Auto') - s+= "--repeats: %s\n" % (args.repeats) - s+= "\n\n" - - s += hdr - # import ipdb - # ipdb.set_trace() - for i in range(len(df)): - lfmt = ("{:%s}" % name_width) - lfmt += ("| {:%d.4f} " % (col_width-2))* len(df.columns) - lfmt += "|\n" - s += lfmt.format(df.index[i],*list(df.iloc[i].values)) - - s+= ftr + "\n" - - s += "Ratio < 1.0 means the target commit is faster then the baseline.\n" - s += "Seed used: %d\n\n" % args.seed - - if h_head: - s += 'Target [%s] : %s\n' % (h_head, h_msg) - if h_baseline: - s += 'Base [%s] : %s\n\n' % ( - h_baseline, b_msg) - - stats_footer = "\n" - if args.stats : - try: - pd.options.display.expand_frame_repr=False - except: - pass - stats_footer += str(df.T.describe().T) + "\n\n" - - s+= stats_footer - logfile = open(args.log_file, 'w') - logfile.write(s) - logfile.close() - - if not args.quiet: - prprint(s) - - if args.stats and args.quiet: - prprint(stats_footer) - - prprint("Results were also written to the logfile at '%s'" % - args.log_file) - - - -def main(): - from suite import benchmarks - - if not args.log_file: - args.log_file = os.path.abspath( - os.path.join(REPO_PATH, 'vb_suite.log')) - - saved_dir = os.path.curdir - if args.outdf: - # not bullet-proof but enough for us - args.outdf = os.path.realpath(args.outdf) - - if args.log_file: - # not bullet-proof but enough for us - args.log_file = os.path.realpath(args.log_file) - - random.seed(args.seed) - np.random.seed(args.seed) - - if args.base_pickle and args.target_pickle: - baseline_res = prep_pickle_for_total(pd.load(args.base_pickle)) - target_res = prep_pickle_for_total(pd.load(args.target_pickle)) - - report_comparative(target_res, baseline_res) - sys.exit(0) - - if args.affinity is not None: - try: # use psutil rather then stale affinity module. Thanks @yarikoptic - import psutil - if hasattr(psutil.Process, 'set_cpu_affinity'): - psutil.Process(os.getpid()).set_cpu_affinity([args.affinity]) - print("CPU affinity set to %d" % args.affinity) - except ImportError: - print("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n") - sys.exit(1) - - print("\n") - prprint("LOG_FILE = %s" % args.log_file) - if args.outdf: - prprint("PICKE_FILE = %s" % args.outdf) - - print("\n") - - # move away from the pandas root dir, to avoid possible import - # surprises - os.chdir(os.path.dirname(os.path.abspath(__file__))) - - benchmarks = [x for x in benchmarks if re.search(args.regex,x.name)] - - for b in benchmarks: - b.repeat = args.repeats - if args.ncalls: - b.ncalls = args.ncalls - - if benchmarks: - if args.head: - profile_head(benchmarks) - else: - profile_comparative(benchmarks) - else: - print( "No matching benchmarks") - - os.chdir(saved_dir) - -# hack , vbench.git ignores some commits, but we -# need to be able to reference any commit. -# modified from vbench.git -def _parse_commit_log(this,repo_path,base_commit=None): - from vbench.git import _convert_timezones - from pandas import Series - from dateutil import parser as dparser - - git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path) - githist = git_cmd + ('log --graph --pretty=format:'+ - '\"::%h::%cd::%s::%an\"'+ - ('%s..' % base_commit)+ - '> githist.txt') - os.system(githist) - githist = open('githist.txt').read() - os.remove('githist.txt') - - shas = [] - timestamps = [] - messages = [] - authors = [] - for line in githist.split('\n'): - if '*' not in line.split("::")[0]: # skip non-commit lines - continue - - _, sha, stamp, message, author = line.split('::', 4) - - # parse timestamp into datetime object - stamp = dparser.parse(stamp) - - shas.append(sha) - timestamps.append(stamp) - messages.append(message) - authors.append(author) - - # to UTC for now - timestamps = _convert_timezones(timestamps) - - shas = Series(shas, timestamps) - messages = Series(messages, shas) - timestamps = Series(timestamps, shas) - authors = Series(authors, shas) - return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1] - -# even worse, monkey patch vbench -def _parse_wrapper(base_commit): - def inner(repo_path): - return _parse_commit_log(repo_path,base_commit) - return inner - -if __name__ == '__main__': - args = parser.parse_args() - if (not args.head - and not (args.base_commit and args.target_commit) - and not (args.base_pickle and args.target_pickle)): - parser.print_help() - sys.exit(1) - elif ((args.base_pickle or args.target_pickle) and not - (args.base_pickle and args.target_pickle)): - print("Must specify Both --base-pickle and --target-pickle.") - sys.exit(1) - - if ((args.base_pickle or args.target_pickle) and not - (args.base_commit and args.target_commit)): - if not args.base_commit: - print("base_commit not specified, Assuming base_pickle is named -foo.*") - args.base_commit = args.base_pickle.split('-')[0] - if not args.target_commit: - print("target_commit not specified, Assuming target_pickle is named -foo.*") - args.target_commit = args.target_pickle.split('-')[0] - - import warnings - warnings.filterwarnings('ignore',category=FutureWarning) - warnings.filterwarnings('ignore',category=DeprecationWarning) - - if args.base_commit and args.target_commit: - print("Verifying specified commits exist in repo...") - r=git.Repo(VB_DIR) - for c in [ args.base_commit, args.target_commit ]: - try: - msg = r.commit(c).message.strip() - except git.BadObject: - print("The commit '%s' was not found, aborting..." % c) - sys.exit(1) - else: - print("%s: %s" % (c,msg)) - - main() diff --git a/vb_suite/timedelta.py b/vb_suite/timedelta.py deleted file mode 100644 index 378968ea1379a..0000000000000 --- a/vb_suite/timedelta.py +++ /dev/null @@ -1,32 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime - -common_setup = """from .pandas_vb_common import * -from pandas import to_timedelta -""" - -#---------------------------------------------------------------------- -# conversion - -setup = common_setup + """ -arr = np.random.randint(0,1000,size=10000) -""" - -stmt = "to_timedelta(arr,unit='s')" -timedelta_convert_int = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) - -setup = common_setup + """ -arr = np.random.randint(0,1000,size=10000) -arr = [ '{0} days'.format(i) for i in arr ] -""" - -stmt = "to_timedelta(arr)" -timedelta_convert_string = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) - -setup = common_setup + """ -arr = np.random.randint(0,60,size=10000) -arr = [ '00:00:{0:02d}'.format(i) for i in arr ] -""" - -stmt = "to_timedelta(arr)" -timedelta_convert_string_seconds = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1)) diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py deleted file mode 100644 index 15bc89d62305f..0000000000000 --- a/vb_suite/timeseries.py +++ /dev/null @@ -1,445 +0,0 @@ -from vbench.api import Benchmark -from datetime import datetime -from pandas import * - -N = 100000 -try: - rng = date_range(start='1/1/2000', periods=N, freq='min') -except NameError: - rng = DatetimeIndex(start='1/1/2000', periods=N, freq='T') - def date_range(start=None, end=None, periods=None, freq=None): - return DatetimeIndex(start=start, end=end, periods=periods, offset=freq) - - -common_setup = """from .pandas_vb_common import * -from datetime import timedelta -N = 100000 - -rng = date_range(start='1/1/2000', periods=N, freq='T') - -if hasattr(Series, 'convert'): - Series.resample = Series.convert - -ts = Series(np.random.randn(N), index=rng) -""" - -#---------------------------------------------------------------------- -# Lookup value in large time series, hash map population - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=1500000, freq='S') -ts = Series(1, index=rng) -""" - -stmt = "ts[ts.index[len(ts) // 2]]; ts.index._cleanup()" -timeseries_large_lookup_value = Benchmark(stmt, setup, - start_date=datetime(2012, 1, 1)) - -#---------------------------------------------------------------------- -# Test slice minutely series - -timeseries_slice_minutely = Benchmark('ts[:10000]', common_setup) - -#---------------------------------------------------------------------- -# Test conversion - -setup = common_setup + """ - -""" - -timeseries_1min_5min_ohlc = Benchmark( - "ts[:10000].resample('5min', how='ohlc')", - common_setup, - start_date=datetime(2012, 5, 1)) - -timeseries_1min_5min_mean = Benchmark( - "ts[:10000].resample('5min', how='mean')", - common_setup, - start_date=datetime(2012, 5, 1)) - -#---------------------------------------------------------------------- -# Irregular alignment - -setup = common_setup + """ -lindex = np.random.permutation(N)[:N // 2] -rindex = np.random.permutation(N)[:N // 2] -left = Series(ts.values.take(lindex), index=ts.index.take(lindex)) -right = Series(ts.values.take(rindex), index=ts.index.take(rindex)) -""" - -timeseries_add_irregular = Benchmark('left + right', setup) - -#---------------------------------------------------------------------- -# Sort large irregular time series - -setup = common_setup + """ -N = 100000 -rng = date_range(start='1/1/2000', periods=N, freq='s') -rng = rng.take(np.random.permutation(N)) -ts = Series(np.random.randn(N), index=rng) -""" - -timeseries_sort_index = Benchmark('ts.sort_index()', setup, - start_date=datetime(2012, 4, 1)) - -#---------------------------------------------------------------------- -# Shifting, add offset - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=10000, freq='T') -""" - -datetimeindex_add_offset = Benchmark('rng + timedelta(minutes=2)', setup, - start_date=datetime(2012, 4, 1)) - -setup = common_setup + """ -N = 10000 -rng = date_range(start='1/1/1990', periods=N, freq='53s') -ts = Series(np.random.randn(N), index=rng) -dates = date_range(start='1/1/1990', periods=N * 10, freq='5s') -""" -timeseries_asof_single = Benchmark('ts.asof(dates[0])', setup, - start_date=datetime(2012, 4, 27)) - -timeseries_asof = Benchmark('ts.asof(dates)', setup, - start_date=datetime(2012, 4, 27)) - -setup = setup + 'ts[250:5000] = np.nan' - -timeseries_asof_nan = Benchmark('ts.asof(dates)', setup, - start_date=datetime(2012, 4, 27)) - -#---------------------------------------------------------------------- -# Time zone - -setup = common_setup + """ -rng = date_range(start='1/1/2000', end='3/1/2000', tz='US/Eastern') -""" - -timeseries_timestamp_tzinfo_cons = \ - Benchmark('rng[0]', setup, start_date=datetime(2012, 5, 5)) - -#---------------------------------------------------------------------- -# Resampling period - -setup = common_setup + """ -rng = period_range(start='1/1/2000', end='1/1/2001', freq='T') -ts = Series(np.random.randn(len(rng)), index=rng) -""" - -timeseries_period_downsample_mean = \ - Benchmark("ts.resample('D', how='mean')", setup, - start_date=datetime(2012, 4, 25)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000', end='1/1/2001', freq='T') -ts = Series(np.random.randn(len(rng)), index=rng) -""" - -timeseries_timestamp_downsample_mean = \ - Benchmark("ts.resample('D', how='mean')", setup, - start_date=datetime(2012, 4, 25)) - -# GH 7754 -setup = common_setup + """ -rng = date_range(start='2000-01-01 00:00:00', - end='2000-01-01 10:00:00', freq='555000U') -int_ts = Series(5, rng, dtype='int64') -ts = int_ts.astype('datetime64[ns]') -""" - -timeseries_resample_datetime64 = Benchmark("ts.resample('1S', how='last')", setup) - -#---------------------------------------------------------------------- -# to_datetime - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=20000, freq='H') -strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in rng] -""" - -timeseries_to_datetime_iso8601 = \ - Benchmark('to_datetime(strings)', setup, - start_date=datetime(2012, 7, 11)) - -timeseries_to_datetime_iso8601_format = \ - Benchmark("to_datetime(strings, format='%Y-%m-%d %H:%M:%S')", setup, - start_date=datetime(2012, 7, 11)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=10000, freq='D') -strings = Series(rng.year*10000+rng.month*100+rng.day,dtype=np.int64).apply(str) -""" - -timeseries_to_datetime_YYYYMMDD = \ - Benchmark('to_datetime(strings,format="%Y%m%d")', setup, - start_date=datetime(2012, 7, 1)) - -setup = common_setup + """ -s = Series(['19MAY11','19MAY11:00:00:00']*100000) -""" -timeseries_with_format_no_exact = Benchmark("to_datetime(s,format='%d%b%y',exact=False)", \ - setup, start_date=datetime(2014, 11, 26)) -timeseries_with_format_replace = Benchmark("to_datetime(s.str.replace(':\S+$',''),format='%d%b%y')", \ - setup, start_date=datetime(2014, 11, 26)) - -# ---- infer_freq -# infer_freq - -setup = common_setup + """ -from pandas.tseries.frequencies import infer_freq -rng = date_range(start='1/1/1700', freq='D', periods=100000) -a = rng[:50000].append(rng[50002:]) -""" - -timeseries_infer_freq = \ - Benchmark('infer_freq(a)', setup, start_date=datetime(2012, 7, 1)) - -# setitem PeriodIndex - -setup = common_setup + """ -rng = period_range(start='1/1/1990', freq='S', periods=20000) -df = DataFrame(index=range(len(rng))) -""" - -period_setitem = \ - Benchmark("df['col'] = rng", setup, - start_date=datetime(2012, 8, 1)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000 9:30', periods=10000, freq='S', tz='US/Eastern') -""" - -datetimeindex_normalize = \ - Benchmark('rng.normalize()', setup, - start_date=datetime(2012, 9, 1)) - -setup = common_setup + """ -from pandas.tseries.offsets import Second -s1 = date_range(start='1/1/2000', periods=100, freq='S') -curr = s1[-1] -slst = [] -for i in range(100): - slst.append(curr + Second()), periods=100, freq='S') - curr = slst[-1][-1] -""" - -# dti_append_tz = \ -# Benchmark('s1.append(slst)', setup, start_date=datetime(2012, 9, 1)) - - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=1000, freq='H') -df = DataFrame(np.random.randn(len(rng), 2), rng) -""" - -dti_reset_index = \ - Benchmark('df.reset_index()', setup, start_date=datetime(2012, 9, 1)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=1000, freq='H', - tz='US/Eastern') -df = DataFrame(np.random.randn(len(rng), 2), index=rng) -""" - -dti_reset_index_tz = \ - Benchmark('df.reset_index()', setup, start_date=datetime(2012, 9, 1)) - -setup = common_setup + """ -rng = date_range(start='1/1/2000', periods=1000, freq='T') -index = rng.repeat(10) -""" - -datetimeindex_unique = Benchmark('index.unique()', setup, - start_date=datetime(2012, 7, 1)) - -# tz_localize with infer argument. This is an attempt to emulate the results -# of read_csv with duplicated data. Not passing infer_dst will fail -setup = common_setup + """ -dst_rng = date_range(start='10/29/2000 1:00:00', - end='10/29/2000 1:59:59', freq='S') -index = date_range(start='10/29/2000', end='10/29/2000 00:59:59', freq='S') -index = index.append(dst_rng) -index = index.append(dst_rng) -index = index.append(date_range(start='10/29/2000 2:00:00', - end='10/29/2000 3:00:00', freq='S')) -""" - -datetimeindex_infer_dst = \ -Benchmark('index.tz_localize("US/Eastern", infer_dst=True)', - setup, start_date=datetime(2013, 9, 30)) - - -#---------------------------------------------------------------------- -# Resampling: fast-path various functions - -setup = common_setup + """ -rng = date_range(start='20130101',periods=100000,freq='50L') -df = DataFrame(np.random.randn(100000,2),index=rng) -""" - -dataframe_resample_mean_string = \ - Benchmark("df.resample('1s', how='mean')", setup) - -dataframe_resample_mean_numpy = \ - Benchmark("df.resample('1s', how=np.mean)", setup) - -dataframe_resample_min_string = \ - Benchmark("df.resample('1s', how='min')", setup) - -dataframe_resample_min_numpy = \ - Benchmark("df.resample('1s', how=np.min)", setup) - -dataframe_resample_max_string = \ - Benchmark("df.resample('1s', how='max')", setup) - -dataframe_resample_max_numpy = \ - Benchmark("df.resample('1s', how=np.max)", setup) - - -#---------------------------------------------------------------------- -# DatetimeConverter - -setup = common_setup + """ -from pandas.tseries.converter import DatetimeConverter -""" - -datetimeindex_converter = \ - Benchmark('DatetimeConverter.convert(rng, None, None)', - setup, start_date=datetime(2013, 1, 1)) - -# Adding custom business day -setup = common_setup + """ -import datetime as dt -import pandas as pd -try: - import pandas.tseries.holiday -except ImportError: - pass -import numpy as np - -date = dt.datetime(2011,1,1) -dt64 = np.datetime64('2011-01-01 09:00Z') -hcal = pd.tseries.holiday.USFederalHolidayCalendar() - -day = pd.offsets.Day() -year = pd.offsets.YearBegin() -cday = pd.offsets.CustomBusinessDay() -cmb = pd.offsets.CustomBusinessMonthBegin(calendar=hcal) -cme = pd.offsets.CustomBusinessMonthEnd(calendar=hcal) - -cdayh = pd.offsets.CustomBusinessDay(calendar=hcal) -""" -timeseries_day_incr = Benchmark("date + day",setup) - -timeseries_day_apply = Benchmark("day.apply(date)",setup) - -timeseries_year_incr = Benchmark("date + year",setup) - -timeseries_year_apply = Benchmark("year.apply(date)",setup) - -timeseries_custom_bday_incr = \ - Benchmark("date + cday",setup) - -timeseries_custom_bday_decr = \ - Benchmark("date - cday",setup) - -timeseries_custom_bday_apply = \ - Benchmark("cday.apply(date)",setup) - -timeseries_custom_bday_apply_dt64 = \ - Benchmark("cday.apply(dt64)",setup) - -timeseries_custom_bday_cal_incr = \ - Benchmark("date + 1 * cdayh",setup) - -timeseries_custom_bday_cal_decr = \ - Benchmark("date - 1 * cdayh",setup) - -timeseries_custom_bday_cal_incr_n = \ - Benchmark("date + 10 * cdayh",setup) - -timeseries_custom_bday_cal_incr_neg_n = \ - Benchmark("date - 10 * cdayh",setup) - -# Increment custom business month -timeseries_custom_bmonthend_incr = \ - Benchmark("date + cme",setup) - -timeseries_custom_bmonthend_incr_n = \ - Benchmark("date + 10 * cme",setup) - -timeseries_custom_bmonthend_decr_n = \ - Benchmark("date - 10 * cme",setup) - -timeseries_custom_bmonthbegin_incr_n = \ - Benchmark("date + 10 * cmb",setup) - -timeseries_custom_bmonthbegin_decr_n = \ - Benchmark("date - 10 * cmb",setup) - - -#---------------------------------------------------------------------- -# month/quarter/year start/end accessors - -setup = common_setup + """ -N = 10000 -rng = date_range(start='1/1/1', periods=N, freq='B') -""" - -timeseries_is_month_start = Benchmark('rng.is_month_start', setup, - start_date=datetime(2014, 4, 1)) - -#---------------------------------------------------------------------- -# iterate over DatetimeIndex/PeriodIndex -setup = common_setup + """ -N = 1000000 -M = 10000 -idx1 = date_range(start='20140101', freq='T', periods=N) -idx2 = period_range(start='20140101', freq='T', periods=N) - -def iter_n(iterable, n=None): - i = 0 - for _ in iterable: - i += 1 - if n is not None and i > n: - break -""" - -timeseries_iter_datetimeindex = Benchmark('iter_n(idx1)', setup) - -timeseries_iter_periodindex = Benchmark('iter_n(idx2)', setup) - -timeseries_iter_datetimeindex_preexit = Benchmark('iter_n(idx1, M)', setup) - -timeseries_iter_periodindex_preexit = Benchmark('iter_n(idx2, M)', setup) - - -#---------------------------------------------------------------------- -# apply an Offset to a DatetimeIndex -setup = common_setup + """ -N = 100000 -idx1 = date_range(start='20140101', freq='T', periods=N) -delta_offset = pd.offsets.Day() -fast_offset = pd.offsets.DateOffset(months=2, days=2) -slow_offset = pd.offsets.BusinessDay() - -""" - -timeseries_datetimeindex_offset_delta = Benchmark('idx1 + delta_offset', setup) -timeseries_datetimeindex_offset_fast = Benchmark('idx1 + fast_offset', setup) -timeseries_datetimeindex_offset_slow = Benchmark('idx1 + slow_offset', setup) - -# apply an Offset to a Series containing datetime64 values -setup = common_setup + """ -N = 100000 -s = Series(date_range(start='20140101', freq='T', periods=N)) -delta_offset = pd.offsets.Day() -fast_offset = pd.offsets.DateOffset(months=2, days=2) -slow_offset = pd.offsets.BusinessDay() - -""" - -timeseries_series_offset_delta = Benchmark('s + delta_offset', setup) -timeseries_series_offset_fast = Benchmark('s + fast_offset', setup) -timeseries_series_offset_slow = Benchmark('s + slow_offset', setup) diff --git a/versioneer.py b/versioneer.py index c010f63e3ead8..b0ae4fa2dc8e8 100644 --- a/versioneer.py +++ b/versioneer.py @@ -606,11 +606,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d @@ -619,7 +619,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%%s', no digits" %% ",".join(refs-tags)) if verbose: @@ -960,11 +960,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = set([r.strip() for r in refnames.strip("()").split(",")]) + refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -973,7 +973,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs-tags)) if verbose: @@ -1130,7 +1130,9 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. -import json +from warnings import catch_warnings +with catch_warnings(record=True): + import json import sys version_json = '''